#!/usr/bin/perl -w #---------------------------------# # PROGRAM: scholia_formatter.pl # #---------------------------------# # Open the file supplied by the first argument open (INFILE, $ARGV[0]) || die "Error opening input file"; @contents = ; $outputFile = "new_".$ARGV[0]; open (OUTFILE, "> $outputFile"); $print_begin = 0; $dashed_list_found = 0; $footnote = 0; $inside_a_list = 0; $header_two_lines = 0; $new_footnote_paragraph = 0; foreach $line (@contents) { # break at end of content if ($line =~ //) { last; } # TITLE if ($line =~ /class="art_head/) { $line =~ s{

}{}; $line =~ s{

}{}; print OUTFILE $line; next; } # BODY if ($line =~ //) { # start printing each line only once the print actually starts $print_begin = 1; next; } if ($print_begin == 1) { # $line =~ s/^\s+//; # trim whitespace at beginning of line # Headers # replaces headers contained in a single line $line =~ s{

(.+)

}{

$2

}; $line =~ s{

(.+)

}{

$2

}; $line =~ s{

(.+)

}{

$2

}; $line =~ s{

(.+)

}{

$2

}; if ($header_two_lines ge 1 && $line =~ '

') { # replaces the closing header tag on a later line $line =~ s#

##; $header_two_lines = 0; } if ($line =~ 'p class="art_sub1') { # replaces headers spanning multiple lines $line =~ s{

}{

}; $header_two_lines = 2; } if ($line =~ 'p class="art_sub2') { $line =~ s{

}{

}; $header_two_lines = 3; } if ($line =~ 'p class="art_sub3') { $line =~ s{

}{

}; $header_two_lines = 4; } if ($line =~ 'p class="art_sub4') { $line =~ s{

}{

}; $header_two_lines = 4; } # Content $line =~ s{

}{

}; $line =~ s{

}{

}; $line =~ s{}{}; # center $line =~ s{}{}; # right $line =~ s{font face="Symbol"}{span class="greek"}g; # this line shouldn't be necessary, really... $line =~ s{}{}; # Symbol font $line =~ s{}{}; $line =~ s{

?

(.+)

?
}{
$1
}; ### I haven't seen this occur yet... $line =~ s{centered" class="art_sub2">Thesis (..?)}{centered">Thesis $1}; # Thesis statements $line =~ s{Thesis (..?)}{Thesis $1}; $line =~ s{centered" class="art_sub2">(.*)}{centered">$1}; #

$1}g; $line =~ s{^