718 lines
16 KiB
Perl
Executable File
718 lines
16 KiB
Perl
Executable File
#!/usr/bin/perl
|
|
#
|
|
# Creates 7 bit ASCII text from Latin-1 FAQ
|
|
#
|
|
# (c) 1996 Volker Götz <volker@oops.franken.de>
|
|
# modified 1996/1997 by
|
|
# Bernhard Hailer <dl4mhk@lrz.uni-muenchen.de>
|
|
# Matthias Heßler <hessler@wi-inf.uni-essen.de>
|
|
#
|
|
# $Id$
|
|
|
|
|
|
#
|
|
# Variables
|
|
#
|
|
|
|
# Unbuffered print's
|
|
$| = 1;
|
|
|
|
$margin = 6;
|
|
$bar = "| ";
|
|
$barlen = length ($bar);
|
|
# maxlen gibt an, wieviele Zeichen max. pro Zeile ausgegeben werden
|
|
# (margin geht extra...)
|
|
$maxlen = 79 - $margin;
|
|
|
|
# Für die Datenausgabe benötigt...
|
|
$actline = "";
|
|
$commentline = "";
|
|
$lastnumber = "";
|
|
|
|
# Für Änderungsstrich (default: Änderungsstrich!)
|
|
$old = 0;
|
|
|
|
# Fragenzaehler
|
|
$maincount = 0;
|
|
$subcount = 0;
|
|
$qcount = 0;
|
|
|
|
# Statusvariable
|
|
$script = "FALSE";
|
|
$state = "begin";
|
|
$buffer = "-";
|
|
|
|
|
|
#
|
|
# Get file names from prompt
|
|
#
|
|
|
|
$in = $ARGV[0];
|
|
die "Usage: 7bit.pl infile [outfile] [headtempfile] [tailtempfile]\n" unless defined ($in);
|
|
|
|
$out = $ARGV[1];
|
|
$out = "$in.asc" unless defined ($out);
|
|
|
|
|
|
#
|
|
# Open files
|
|
#
|
|
|
|
open(IN,"<$in") || die "Can not open $in for input!\n";
|
|
|
|
open(OUT,">$out") || die "Can not open $out for output!\n";
|
|
|
|
|
|
#
|
|
# Create FAQ - TOC
|
|
#
|
|
|
|
print ("Creating TOC:\n");
|
|
|
|
while($line = <IN>) {
|
|
|
|
chop($line); # kill \n character (newline)
|
|
|
|
if ($line =~ s/^!1//g) { # main headline
|
|
if ($state eq "begin") {
|
|
$line = &latin2asc ($line);
|
|
output_newline ("\n\n".$line);
|
|
output_newline ("#" x length ($line));
|
|
output_newline ("");
|
|
$state = "!1";
|
|
}
|
|
else {
|
|
print "ERROR: too many !1 or !1 improper located:\n";
|
|
print "!1$line\n";
|
|
}
|
|
}
|
|
|
|
elsif ($state eq "begin") { # Vorspann...
|
|
$line = &latin2asc ($line);
|
|
output_newline ($line);
|
|
}
|
|
|
|
elsif ($line =~ s/^!2//g) { # bigger headlines
|
|
$maincount++;
|
|
$subcount = 0;
|
|
$qcount = 0;
|
|
flush_output ();
|
|
$line = &latin2asc ($line);
|
|
output_newline ("\n\n$maincount $line");
|
|
output_newline ("=" x (length($line)+length($maincount)+1));
|
|
output_newline ("");
|
|
$state = "!2";
|
|
}
|
|
|
|
elsif ($line =~ s/^!3//g) { # smaller headlines
|
|
$subcount++;
|
|
$qcount = 0;
|
|
flush_output ();
|
|
$line = &latin2asc ($line);
|
|
output_newline ("\n$maincount.$subcount $line");
|
|
output_newline ("-" x (length($line)+length($maincount)+length($subcount)+2));
|
|
output_newline ("");
|
|
$state = "!3";
|
|
}
|
|
|
|
elsif ($line =~ s/^!old//g) { # old mode on
|
|
if ($old eq 0) {
|
|
flush_output();
|
|
$old = 1;
|
|
}
|
|
else {
|
|
print "ERROR: more than 1 !old ($state $maincount.$subcount.$qcount):\n";
|
|
print "!old$line\n";
|
|
}
|
|
}
|
|
|
|
elsif ($line =~ s/^Q: //g) { # questions
|
|
|
|
$qcount++;
|
|
|
|
# für jede Frage einen Punkt...
|
|
print ".";
|
|
|
|
# index part
|
|
if ($state ne "question") { # first line
|
|
flush_output ();
|
|
|
|
# delete leading/trailing whitespace
|
|
$line =~ s/^[ \t]*(.*?)[ \t]*$/$1/g;
|
|
nice_output ( $line, "$qcount. ");
|
|
}
|
|
else {
|
|
print "ERROR: \"Q: \" is already active ($state $maincount.$subcount.$qcount):\n";
|
|
print "Q: $line\n";
|
|
}
|
|
$state = "question";
|
|
}
|
|
|
|
elsif ($line =~ s/^A: //g) { # answers
|
|
if ($state eq "question") {
|
|
flush_output();
|
|
$old = 0; # Kann wieder zurückschalten, da keine Ausgabe mehr...
|
|
$state = "answer";
|
|
}
|
|
else {
|
|
print "ERROR: \"A: \" is already active ($state $maincount.$subcount.$qcount):\n";
|
|
print "A: $line\n";
|
|
}
|
|
}
|
|
|
|
elsif ($line =~ s/^!verbon//g) { # script mode on
|
|
if ($state ne "script") {
|
|
flush_output ();
|
|
$buffer = $state;
|
|
$state = "script";
|
|
}
|
|
else {
|
|
print "ERROR: !verbon is already active ($state $maincount.$subcount.$qcount):\n";
|
|
print "!verbon$line\n";
|
|
}
|
|
}
|
|
|
|
elsif ($line =~ s/^!verboff//g) { # script mode off
|
|
if ($state eq "script") {
|
|
flush_output ();
|
|
$state = $buffer;
|
|
}
|
|
else {
|
|
print "ERROR: missing !verbon ($state $maincount.$subcount.$qcount):\n";
|
|
print "!verboff$line\n";
|
|
}
|
|
}
|
|
|
|
elsif ($state eq "script") { # script mode ("!verbon")
|
|
if ($buffer eq "question") {
|
|
# leave spaces/margin alone => keine Formattierung
|
|
ugly_output ($line);
|
|
}
|
|
elsif ($buffer eq "answer") {
|
|
# keine Ausgabe in der TOC
|
|
}
|
|
}
|
|
|
|
elsif ($state eq "question") { # do while in question mode
|
|
# delete leading/trailing whitespace
|
|
$line =~ s/^[ \t]*(.*?)[ \t]*$/$1/g;
|
|
if (length ($line) > 0) {
|
|
nice_output ( $line);
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
flush_output();
|
|
|
|
#
|
|
# Reopen Inputfile
|
|
#
|
|
|
|
close (IN);
|
|
print "\n";
|
|
open(IN,"<$in") || die "Can not reopen $in for input!\n";
|
|
|
|
|
|
#
|
|
# Create main part of FAQ
|
|
#
|
|
|
|
print ("Writing questions:\n");
|
|
|
|
$maincount= 0;
|
|
$subcount = 0;
|
|
$qcount = 0;
|
|
|
|
$state = "begin";
|
|
$buffer = "-";
|
|
|
|
while($line = <IN>) {
|
|
|
|
chop($line); # kill \n character (newline)
|
|
|
|
if ($line =~ s/^!1//g) { # main headline
|
|
if ($state eq "begin") {
|
|
$line = &latin2asc ($line);
|
|
output_newline ("");
|
|
output_newline ("\n\n$line");
|
|
output_newline ("#" x length ($line));
|
|
output_newline ("");
|
|
$state = "!1";
|
|
}
|
|
else {
|
|
print "ERROR: too many !1 or !1 improper located:\n";
|
|
print "!1$line\n";
|
|
}
|
|
}
|
|
|
|
elsif ($state eq "begin") { # Vorspann...
|
|
# wurde schon ausgegeben => überspringen!
|
|
}
|
|
|
|
elsif ($line =~ s/^!2//g) { # bigger headlines
|
|
$maincount++;
|
|
$subcount = 0;
|
|
$qcount = 0;
|
|
flush_output ();
|
|
$line = &latin2asc ($line);
|
|
output_newline ("");
|
|
output_newline ("\n\n$maincount $line");
|
|
output_newline ("=" x (length($line) + length($maincount) + 1));
|
|
output_newline ("");
|
|
$state = "!2";
|
|
}
|
|
|
|
elsif ($line =~ s/^!3//g) { # smaller headlines
|
|
$subcount++;
|
|
$qcount = 0;
|
|
flush_output ();
|
|
$line = &latin2asc ($line);
|
|
output_newline ("");
|
|
output_newline ("\n\n$maincount.$subcount $line");
|
|
output_newline ("-" x (length($line) + length($maincount) + length($subcount) + 2));
|
|
output_newline ("");
|
|
$state = "!3";
|
|
}
|
|
|
|
elsif ($line =~ s/^!old//g) { # old mode on
|
|
if ($old eq 0) {
|
|
flush_output();
|
|
$old = 1;
|
|
}
|
|
else {
|
|
print "ERROR: more than 1 !old ($state $maincount.$subcount.$qcount):\n";
|
|
print "!old$line\n";
|
|
}
|
|
}
|
|
|
|
elsif ($line =~ s/^Q: //g) { # questions
|
|
|
|
$qcount++;
|
|
|
|
# für jede Frage einen Punkt...
|
|
print ".";
|
|
|
|
# index part
|
|
if ($state ne "question") { # first line
|
|
flush_output ();
|
|
|
|
# delete leading/trailing whitespace
|
|
$line =~ s/^[ \t]*(.*?)[ \t]*$/$1/g;
|
|
if ($qcount != 1) {
|
|
output_newline("");
|
|
}
|
|
nice_output ( $line, "$qcount. ");
|
|
}
|
|
else {
|
|
print "ERROR: \"Q: \" is already active ($state $maincount.$subcount.$qcount):\n";
|
|
print "Q: $line\n";
|
|
}
|
|
$state = "question";
|
|
}
|
|
|
|
elsif ($line =~ s/^A: //g) { # answers
|
|
if ($state eq "question") {
|
|
# delete leading/trailing whitespace
|
|
$line =~ s/^[ \t]*(.*?)[ \t]*$/$1/g;
|
|
ugly_output("");
|
|
nice_output ( $line);
|
|
$state = "answer";
|
|
}
|
|
else {
|
|
print "ERROR: \"A: \" is already active ($state $maincount.$subcount.$qcount):\n";
|
|
print "A: $line\n";
|
|
}
|
|
}
|
|
|
|
elsif ($line =~ s/^!verbon//g) { # script mode on
|
|
if ($state ne "script") {
|
|
flush_output ();
|
|
$buffer = $state;
|
|
$state = "script";
|
|
}
|
|
else {
|
|
print "ERROR: !verbon is already active ($state $maincount.$subcount.$qcount):\n";
|
|
print "!verbon$line\n";
|
|
}
|
|
}
|
|
|
|
elsif ($line =~ s/^!verboff//g) { # script mode off
|
|
if ($state eq "script") {
|
|
flush_output ();
|
|
$state = $buffer;
|
|
}
|
|
else {
|
|
print "ERROR: missing !verbon ($state $maincount.$subcount.$qcount):\n";
|
|
print "!verboff$line\n";
|
|
}
|
|
}
|
|
|
|
elsif ($line =~ s/^!commenton//g) { # comment mode on
|
|
if ($state ne "comment") {
|
|
flush_output ();
|
|
output_newline ("");
|
|
$buffer = $state;
|
|
$state = "comment";
|
|
}
|
|
else {
|
|
print "ERROR: !commenton is already active ($state $maincount.$subcount.$qcount):\n";
|
|
print "!commenton$line\n";
|
|
}
|
|
}
|
|
|
|
elsif ($line =~ s/^!commentoff//g) { # comment mode off
|
|
if ($state eq "comment") {
|
|
flush_output ();
|
|
$state = $buffer;
|
|
}
|
|
else {
|
|
print "ERROR: missing !commenton ($state $maincount.$subcount.$qcount):\n";
|
|
print "!commentoff$line\n";
|
|
}
|
|
}
|
|
|
|
elsif ($state eq "question") { # do while in question mode
|
|
# delete leading/trailing whitespace
|
|
$line =~ s/^[ \t]*(.*?)[ \t]*$/$1/g;
|
|
if (length ($line) > 0) {
|
|
nice_output ( $line);
|
|
}
|
|
}
|
|
|
|
elsif ($state eq "answer") { # do while in answer mode
|
|
# delete leading/trailing whitespace
|
|
$line =~ s/^[ \t]*(.*?)[ \t]*$/$1/g;
|
|
if (length ($line) > 0) {
|
|
nice_output ( $line);
|
|
}
|
|
else {
|
|
flush_output ();
|
|
output_newline ("");
|
|
$old = 0;
|
|
}
|
|
}
|
|
|
|
elsif ($state eq "script") { # script mode ("!verbon")
|
|
if ($buffer eq "question") {
|
|
# leave spaces/margin alone => keine Formattierung
|
|
ugly_output ($line);
|
|
}
|
|
elsif ($buffer eq "answer") {
|
|
# leave spaces/margin alone => keine Formattierung
|
|
ugly_output ($line);
|
|
}
|
|
}
|
|
|
|
elsif ($state eq "comment") {
|
|
# delete leading/trailing whitespace
|
|
$line =~ s/^[ \t]*(.*?)[ \t]*$/$1/g;
|
|
comment_output ($line);
|
|
}
|
|
|
|
elsif ($line =~ s/^<.*>//g) { # do if mark
|
|
# leave spaces/margin alone
|
|
# - and whatever the purpose of that part is after the change in
|
|
# syntax... MatHes
|
|
print "Skipped mark $&\n";
|
|
}
|
|
|
|
else {
|
|
# delete leading/trailing whitespace
|
|
$line =~ s/^[ \t]*(.*?)[ \t]*$/$1/g;
|
|
if (length($line) > 0) {
|
|
comment_output ($line);
|
|
}
|
|
else {
|
|
flush_output();
|
|
}
|
|
}
|
|
}
|
|
|
|
flush_output();
|
|
ugly_output ("-" x 75);
|
|
ugly_output ("(c) 1995,1996 Das FAQ-Team");
|
|
|
|
print "\n";
|
|
|
|
|
|
#
|
|
# Close files
|
|
#
|
|
|
|
close (IN);
|
|
close (OUT);
|
|
|
|
|
|
sub ugly_output
|
|
{ # Kein Wrapping, kein Rand (außer bei Änderungsstrich)
|
|
# Input Parameter: Textzeile, evtl. Nummerierung (_als String_)
|
|
my ($line ) = @_;
|
|
|
|
# Umlaute umwandeln
|
|
$line = &latin2asc ($line);
|
|
|
|
# flush
|
|
flush_output ();
|
|
|
|
# output...
|
|
output_simple_line ($line);
|
|
}
|
|
|
|
|
|
sub comment_output
|
|
{ # Wrapping, kein Rand, kein Änderungsstrich
|
|
# Input Parameter: Textzeile, evtl. Numerierung (_als String_)
|
|
my ($line) = @_;
|
|
my $pos = 0;
|
|
|
|
# Umlaute umwandeln _vor_ Wrapping
|
|
$line = &latin2asc ($line);
|
|
|
|
# Zeile anhängen mit korrektem Zwischenraum
|
|
if (length ($line) > 0) {
|
|
if (length ($commentline) > 0) {
|
|
if (substr ($commentline, length ($commentline)) ne "-") {
|
|
# Leerzeichen zwischen Zeilen einfügen, falls kein Trennstrich
|
|
$commentline .= " ";
|
|
}
|
|
else {
|
|
# Trennstrich entfernen
|
|
$commentline = substr ($commentline, 0, length ($commentline) -1);
|
|
}
|
|
}
|
|
# Zeile anhängen
|
|
$commentline .= $line;
|
|
}
|
|
|
|
# Zeilenweise ausgeben
|
|
while( length( $commentline) > $maxlen+$margin || $commentline =~ /\n/) {
|
|
$pos = wrap_pos ($commentline, $maxlen+$margin);
|
|
if ($pos > 0) {
|
|
output_newline (substr ($commentline, 0, $pos));
|
|
# +1 für Trennzeichen: Space bzw. "\n"
|
|
$commentline = substr ($commentline, $pos +1);
|
|
}
|
|
else {
|
|
# kein wrapping möglich, da line zu lange => einfach abschneiden!
|
|
# (zu kurze line muß aufgrund der while-Bedingung ja mindestens
|
|
# ein "\n" als Trennzeichen haben => schon oben behandelt)
|
|
output_newline (substr ($commentline, 0, $maxlen));
|
|
$commentline = substr ($commentline, $maxlen);
|
|
}
|
|
}
|
|
|
|
# in commentline steht der Rest der Zeile...
|
|
}
|
|
|
|
|
|
sub nice_output
|
|
{ # Wrapping und Rand
|
|
# Input Parameter: Textzeile, evtl. Numerierung (_als String_)
|
|
my ($line, $number ) = @_;
|
|
$number = "" unless defined ($number);
|
|
my $pos = 0;
|
|
|
|
# Umlaute umwandeln _vor_ Wrapping
|
|
$line = &latin2asc ($line);
|
|
|
|
# flush actline, when new number comes in
|
|
if (length ($number) > 0) {
|
|
flush_output ();
|
|
# Neue Nummer merken
|
|
$lastnumber = $number;
|
|
}
|
|
|
|
# Zeile anhängen mit korrektem Zwischenraum
|
|
if (length ($line) > 0) {
|
|
if (length ($actline) >0) {
|
|
if (substr ($actline, length ($actline)) ne "-") {
|
|
# Leerzeichen zwischen Zeilen einfügen, falls kein Trennstrich
|
|
$actline .= " ";
|
|
}
|
|
else {
|
|
# Trennstrich entfernen
|
|
$actline= substr ($actline, 0, length ($actline) -1);
|
|
}
|
|
}
|
|
# Zeile anhängen
|
|
$actline .= $line;
|
|
}
|
|
|
|
# Zeilenweise ausgeben
|
|
while( length( $actline) > $maxlen || $actline =~ /\n/) {
|
|
$pos = wrap_pos ($actline, $maxlen);
|
|
if ($pos > 0) {
|
|
print OUT format_margin( $lastnumber);
|
|
print OUT substr ($actline, 0, $pos);
|
|
print OUT "\n";
|
|
# +1 für Trennzeichen: Space bzw. "\n"
|
|
$actline = substr ($actline, $pos +1);
|
|
}
|
|
else {
|
|
# kein wrapping möglich, da line zu lange => einfach abschneiden!
|
|
# (zu kurze line muß aufgrund der while-Bedingung ja mindestens
|
|
# ein "\n" als Trennzeichen haben => schon oben behandelt)
|
|
print OUT format_margin( $lastnumber);
|
|
print OUT substr ($actline, 0, $maxlen);
|
|
print OUT "\n";
|
|
$actline = substr ($actline, $maxlen);
|
|
}
|
|
$lastnumber = "";
|
|
}
|
|
|
|
# in $actline steht der Rest der Zeile...
|
|
}
|
|
|
|
|
|
sub flush_output
|
|
{
|
|
# flush actline
|
|
if (length ($actline) > 0) {
|
|
print OUT format_margin( $lastnumber);
|
|
print OUT $actline;
|
|
print OUT "\n";
|
|
$actline = "";
|
|
$lastnumber = "";
|
|
}
|
|
|
|
# flush commentline
|
|
if (length ($commentline) > 0) {
|
|
print OUT $commentline;
|
|
print OUT "\n";
|
|
$commentline = "";
|
|
}
|
|
}
|
|
|
|
|
|
sub output_newline
|
|
{ # Druckt eine Zeile (_ohne_ Änderungsstrich)
|
|
# Achtung: die Umwandlung der Umlaute (latin2asc) muß schon erledigt sein!
|
|
# Input Parameter: Textzeile
|
|
my ($line) = @_;
|
|
|
|
print OUT $line;
|
|
print OUT "\n";
|
|
}
|
|
|
|
|
|
sub output_simple_line
|
|
{ # Gibt eine Textzeile aus, ggf. mit Änderungsstrich
|
|
# Input Parameter: Textzeile
|
|
my ($line) = @_;
|
|
|
|
flush_output();
|
|
|
|
if ($old == 0) {
|
|
print OUT $bar;
|
|
if ($line =~ /^ /) {
|
|
print OUT substr ($line, $barlen);
|
|
print OUT "\n";
|
|
}
|
|
else {
|
|
print OUT $line;
|
|
print OUT "\n";
|
|
}
|
|
}
|
|
else {
|
|
print OUT $line;
|
|
print OUT "\n";
|
|
}
|
|
}
|
|
|
|
|
|
sub wrap_pos
|
|
{
|
|
# Input Parameter: Textzeile
|
|
my ($line, $maxlen ) = @_;
|
|
my $pos1 = 0;
|
|
my $pos2 = 0;
|
|
|
|
$pos1 = index( $line, "\n");
|
|
if ($pos1 <= $maxlen && $pos1 >= 0) { # !br/Zeilenumbrüche berücksichtigen
|
|
return $pos1;
|
|
}
|
|
else {
|
|
$pos2 = rindex( $line, " ", $maxlen); # wo word wrapping?
|
|
if ($pos2 <= $maxlen && $pos2 >= 0) {
|
|
return $pos2;
|
|
}
|
|
else { # Kein wrapping möglich
|
|
return 0;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
sub format_margin
|
|
{
|
|
my( $number) = @_; # Input Parameter: evtl. Nummerierung _als String_
|
|
my $out = "";
|
|
|
|
if (length ($number) > 0) {
|
|
if ($old == 0) {
|
|
$out = $bar;
|
|
$out .= " " x ($margin - length ($number) -$barlen); # Margin
|
|
$out .= $number;
|
|
}
|
|
else {
|
|
$out = " " x ($margin - length ($number)); # Margin
|
|
$out .= $number;
|
|
}
|
|
}
|
|
else {
|
|
if ($old == 0) {
|
|
$out = $bar;
|
|
$out .= " " x ($margin -$barlen); # Margin
|
|
}
|
|
else {
|
|
$out .= " " x $margin; # Margin
|
|
}
|
|
}
|
|
return $out;
|
|
}
|
|
|
|
|
|
sub latin2asc
|
|
{
|
|
my ($line) = @_;
|
|
|
|
# native codes
|
|
$line =~ s/ä/ae/g;
|
|
$line =~ s/ö/oe/g;
|
|
$line =~ s/ü/ue/g;
|
|
$line =~ s/Ä/Ae/g;
|
|
$line =~ s/Ö/Oe/g;
|
|
$line =~ s/Ü/Ue/g;
|
|
$line =~ s/ß/ss/g;
|
|
|
|
# !br als Umbruch verwenden
|
|
$line =~ s|!br|\n|g;
|
|
while ($line =~ /!link /) {
|
|
my $pos1= 0;
|
|
my $pos2= 0;
|
|
my $url= "";
|
|
|
|
$pos1= index($line, "!link ");
|
|
$pos2= index($line, ";", $pos1);
|
|
if ($pos2 == -1) {
|
|
$pos2= length($line);
|
|
}
|
|
$url= substr($line, $pos1+length("!link "),
|
|
$pos2-$pos1-length("!link "));
|
|
$line = substr($line, 0, $pos1)
|
|
. " $url "
|
|
. substr($line, $pos2+1);
|
|
}
|
|
|
|
# Hervorhebungen
|
|
$line =~ s|!b1|*|g;
|
|
$line =~ s|!b0|*|g;
|
|
$line =~ s|!i1|/|g;
|
|
$line =~ s|!i0|/|g;
|
|
$line =~ s|!u1|_|g;
|
|
$line =~ s|!u0|_|g;
|
|
|
|
return $line;
|
|
}
|