#!/usr/bin/perl -w use strict; use Text::Iconv; my $cp = "ISO-8859-2"; Text::Iconv->raise_error(1); # die on bad encoding! my $html; while(<>) { $html .= $_; } my $c; if ($html =~ m!CONTENT="text/html; charset=([^"]+)"!is) { $c = Text::Iconv->new("$1",$cp); } $html =~ s/\s+LANG="[^"]+"//gsi; $html =~ s/<\/*FONT[^>]*>//gsi; $html =~ s/(\d+);/chr($1)/gsie; $html =~ s/\s+STYLE="margin-bottom: 0in"//gsi; $html =~ s/\s+STYLE="line-height: 100%"//gsi; $html =~ s/<(SDFIELD)[^>]*><\/\1>//gsi; $html =~ s/(STYLE="[^"]*)text-indent:\s+\d+cm(;\s+)*/$1/gsi; $html =~ s/(STYLE="[^"]*)line-height:\s+\d+%;*/$1/gsi; $html =~ s/(STYLE="[^"]*)widows:\s+\d+;*/$1/gsi; $html =~ s/(STYLE="[^"]*)orphans:\s+\d+;*/$1/gsi; $html =~ s/STYLE="\s*"\s*//gsi; # remove excessive empty lines $html =~ s,
]*>(?:\s*
\s*)*