#!/usr/bin/perl # # Creates 7 bit ASCII text from Latin-1 FAQ # # (c) 1996 Volker Götz # modified 1996/1997 by # Bernhard Hailer # Matthias Heßler # # $Id$ # # Variables # # Unbuffered print's $| = 1; $margin = 6; $bar = "| "; $barlen = length ($bar); # maxlen gibt an, wieviele Zeichen max. pro Zeile ausgegeben werden # (margin geht extra...) $maxlen = 79 - $margin; # Für die Datenausgabe benötigt... $actline = ""; $commentline = ""; $lastnumber = ""; # Für Änderungsstrich (default: Änderungsstrich!) $old = 0; # Fragenzaehler $maincount = 0; $subcount = 0; $qcount = 0; # Statusvariable $script = "FALSE"; $state = "begin"; $buffer = "-"; # # Get file names from prompt # $in = $ARGV[0]; die "Usage: 7bit.pl infile [outfile] [headtempfile] [tailtempfile]\n" unless defined ($in); $out = $ARGV[1]; $out = "$in.asc" unless defined ($out); # # Open files # open(IN,"<$in") || die "Can not open $in for input!\n"; open(OUT,">$out") || die "Can not open $out for output!\n"; # # Create FAQ - TOC # print ("Creating TOC:\n"); while($line = ) { chop($line); # kill \n character (newline) if ($line =~ s/^!1//g) { # main headline if ($state eq "begin") { $line = &latin2asc ($line); output_newline ("\n\n".$line); output_newline ("#" x length ($line)); output_newline (""); $state = "!1"; } else { print "ERROR: too many !1 or !1 improper located:\n"; print "!1$line\n"; } } elsif ($state eq "begin") { # Vorspann... $line = &latin2asc ($line); output_newline ($line); } elsif ($line =~ s/^!2//g) { # bigger headlines $maincount++; $subcount = 0; $qcount = 0; flush_output (); $line = &latin2asc ($line); output_newline ("\n\n$maincount $line"); output_newline ("=" x (length($line)+length($maincount)+1)); output_newline (""); $state = "!2"; } elsif ($line =~ s/^!3//g) { # smaller headlines $subcount++; $qcount = 0; flush_output (); $line = &latin2asc ($line); output_newline ("\n$maincount.$subcount $line"); output_newline ("-" x (length($line)+length($maincount)+length($subcount)+2)); output_newline (""); $state = "!3"; } elsif ($line =~ s/^!old//g) { # old mode on if ($old eq 0) { flush_output(); $old = 1; } else { print "ERROR: more than 1 !old ($state $maincount.$subcount.$qcount):\n"; print "!old$line\n"; } } elsif ($line =~ s/^Q: //g) { # questions $qcount++; # für jede Frage einen Punkt... print "."; # index part if ($state ne "question") { # first line flush_output (); # delete leading/trailing whitespace $line =~ s/^[ \t]*(.*?)[ \t]*$/$1/g; nice_output ( $line, "$qcount. "); } else { print "ERROR: \"Q: \" is already active ($state $maincount.$subcount.$qcount):\n"; print "Q: $line\n"; } $state = "question"; } elsif ($line =~ s/^A: //g) { # answers if ($state eq "question") { flush_output(); $old = 0; # Kann wieder zurückschalten, da keine Ausgabe mehr... $state = "answer"; } else { print "ERROR: \"A: \" is already active ($state $maincount.$subcount.$qcount):\n"; print "A: $line\n"; } } elsif ($line =~ s/^!verbon//g) { # script mode on if ($state ne "script") { flush_output (); $buffer = $state; $state = "script"; } else { print "ERROR: !verbon is already active ($state $maincount.$subcount.$qcount):\n"; print "!verbon$line\n"; } } elsif ($line =~ s/^!verboff//g) { # script mode off if ($state eq "script") { flush_output (); $state = $buffer; } else { print "ERROR: missing !verbon ($state $maincount.$subcount.$qcount):\n"; print "!verboff$line\n"; } } elsif ($state eq "script") { # script mode ("!verbon") if ($buffer eq "question") { # leave spaces/margin alone => keine Formattierung ugly_output ($line); } elsif ($buffer eq "answer") { # keine Ausgabe in der TOC } } elsif ($state eq "question") { # do while in question mode # delete leading/trailing whitespace $line =~ s/^[ \t]*(.*?)[ \t]*$/$1/g; if (length ($line) > 0) { nice_output ( $line); } } } flush_output(); # # Reopen Inputfile # close (IN); print "\n"; open(IN,"<$in") || die "Can not reopen $in for input!\n"; # # Create main part of FAQ # print ("Writing questions:\n"); $maincount= 0; $subcount = 0; $qcount = 0; $state = "begin"; $buffer = "-"; while($line = ) { chop($line); # kill \n character (newline) if ($line =~ s/^!1//g) { # main headline if ($state eq "begin") { $line = &latin2asc ($line); output_newline (""); output_newline ("\n\n$line"); output_newline ("#" x length ($line)); output_newline (""); $state = "!1"; } else { print "ERROR: too many !1 or !1 improper located:\n"; print "!1$line\n"; } } elsif ($state eq "begin") { # Vorspann... # wurde schon ausgegeben => überspringen! } elsif ($line =~ s/^!2//g) { # bigger headlines $maincount++; $subcount = 0; $qcount = 0; flush_output (); $line = &latin2asc ($line); output_newline (""); output_newline ("\n\n$maincount $line"); output_newline ("=" x (length($line) + length($maincount) + 1)); output_newline (""); $state = "!2"; } elsif ($line =~ s/^!3//g) { # smaller headlines $subcount++; $qcount = 0; flush_output (); $line = &latin2asc ($line); output_newline (""); output_newline ("\n\n$maincount.$subcount $line"); output_newline ("-" x (length($line) + length($maincount) + length($subcount) + 2)); output_newline (""); $state = "!3"; } elsif ($line =~ s/^!old//g) { # old mode on if ($old eq 0) { flush_output(); $old = 1; } else { print "ERROR: more than 1 !old ($state $maincount.$subcount.$qcount):\n"; print "!old$line\n"; } } elsif ($line =~ s/^Q: //g) { # questions $qcount++; # für jede Frage einen Punkt... print "."; # index part if ($state ne "question") { # first line flush_output (); # delete leading/trailing whitespace $line =~ s/^[ \t]*(.*?)[ \t]*$/$1/g; if ($qcount != 1) { output_newline(""); } nice_output ( $line, "$qcount. "); } else { print "ERROR: \"Q: \" is already active ($state $maincount.$subcount.$qcount):\n"; print "Q: $line\n"; } $state = "question"; } elsif ($line =~ s/^A: //g) { # answers if ($state eq "question") { # delete leading/trailing whitespace $line =~ s/^[ \t]*(.*?)[ \t]*$/$1/g; ugly_output(""); nice_output ( $line); $state = "answer"; } else { print "ERROR: \"A: \" is already active ($state $maincount.$subcount.$qcount):\n"; print "A: $line\n"; } } elsif ($line =~ s/^!verbon//g) { # script mode on if ($state ne "script") { flush_output (); $buffer = $state; $state = "script"; } else { print "ERROR: !verbon is already active ($state $maincount.$subcount.$qcount):\n"; print "!verbon$line\n"; } } elsif ($line =~ s/^!verboff//g) { # script mode off if ($state eq "script") { flush_output (); $state = $buffer; } else { print "ERROR: missing !verbon ($state $maincount.$subcount.$qcount):\n"; print "!verboff$line\n"; } } elsif ($line =~ s/^!commenton//g) { # comment mode on if ($state ne "comment") { flush_output (); output_newline (""); $buffer = $state; $state = "comment"; } else { print "ERROR: !commenton is already active ($state $maincount.$subcount.$qcount):\n"; print "!commenton$line\n"; } } elsif ($line =~ s/^!commentoff//g) { # comment mode off if ($state eq "comment") { flush_output (); $state = $buffer; } else { print "ERROR: missing !commenton ($state $maincount.$subcount.$qcount):\n"; print "!commentoff$line\n"; } } elsif ($state eq "question") { # do while in question mode # delete leading/trailing whitespace $line =~ s/^[ \t]*(.*?)[ \t]*$/$1/g; if (length ($line) > 0) { nice_output ( $line); } } elsif ($state eq "answer") { # do while in answer mode # delete leading/trailing whitespace $line =~ s/^[ \t]*(.*?)[ \t]*$/$1/g; if (length ($line) > 0) { nice_output ( $line); } else { flush_output (); output_newline (""); $old = 0; } } elsif ($state eq "script") { # script mode ("!verbon") if ($buffer eq "question") { # leave spaces/margin alone => keine Formattierung ugly_output ($line); } elsif ($buffer eq "answer") { # leave spaces/margin alone => keine Formattierung ugly_output ($line); } } elsif ($state eq "comment") { # delete leading/trailing whitespace $line =~ s/^[ \t]*(.*?)[ \t]*$/$1/g; comment_output ($line); } elsif ($line =~ s/^<.*>//g) { # do if mark # leave spaces/margin alone # - and whatever the purpose of that part is after the change in # syntax... MatHes print "Skipped mark $&\n"; } else { # delete leading/trailing whitespace $line =~ s/^[ \t]*(.*?)[ \t]*$/$1/g; if (length($line) > 0) { comment_output ($line); } else { flush_output(); } } } flush_output(); ugly_output ("-" x 75); ugly_output ("(c) 1995,1996 Das FAQ-Team"); print "\n"; # # Close files # close (IN); close (OUT); sub ugly_output { # Kein Wrapping, kein Rand (außer bei Änderungsstrich) # Input Parameter: Textzeile, evtl. Nummerierung (_als String_) my ($line ) = @_; # Umlaute umwandeln $line = &latin2asc ($line); # flush flush_output (); # output... output_simple_line ($line); } sub comment_output { # Wrapping, kein Rand, kein Änderungsstrich # Input Parameter: Textzeile, evtl. Numerierung (_als String_) my ($line) = @_; my $pos = 0; # Umlaute umwandeln _vor_ Wrapping $line = &latin2asc ($line); # Zeile anhängen mit korrektem Zwischenraum if (length ($line) > 0) { if (length ($commentline) > 0) { if (substr ($commentline, length ($commentline)) ne "-") { # Leerzeichen zwischen Zeilen einfügen, falls kein Trennstrich $commentline .= " "; } else { # Trennstrich entfernen $commentline = substr ($commentline, 0, length ($commentline) -1); } } # Zeile anhängen $commentline .= $line; } # Zeilenweise ausgeben while( length( $commentline) > $maxlen+$margin || $commentline =~ /\n/) { $pos = wrap_pos ($commentline, $maxlen+$margin); if ($pos > 0) { output_newline (substr ($commentline, 0, $pos)); # +1 für Trennzeichen: Space bzw. "\n" $commentline = substr ($commentline, $pos +1); } else { # kein wrapping möglich, da line zu lange => einfach abschneiden! # (zu kurze line muß aufgrund der while-Bedingung ja mindestens # ein "\n" als Trennzeichen haben => schon oben behandelt) output_newline (substr ($commentline, 0, $maxlen)); $commentline = substr ($commentline, $maxlen); } } # in commentline steht der Rest der Zeile... } sub nice_output { # Wrapping und Rand # Input Parameter: Textzeile, evtl. Numerierung (_als String_) my ($line, $number ) = @_; $number = "" unless defined ($number); my $pos = 0; # Umlaute umwandeln _vor_ Wrapping $line = &latin2asc ($line); # flush actline, when new number comes in if (length ($number) > 0) { flush_output (); # Neue Nummer merken $lastnumber = $number; } # Zeile anhängen mit korrektem Zwischenraum if (length ($line) > 0) { if (length ($actline) >0) { if (substr ($actline, length ($actline)) ne "-") { # Leerzeichen zwischen Zeilen einfügen, falls kein Trennstrich $actline .= " "; } else { # Trennstrich entfernen $actline= substr ($actline, 0, length ($actline) -1); } } # Zeile anhängen $actline .= $line; } # Zeilenweise ausgeben while( length( $actline) > $maxlen || $actline =~ /\n/) { $pos = wrap_pos ($actline, $maxlen); if ($pos > 0) { print OUT format_margin( $lastnumber); print OUT substr ($actline, 0, $pos); print OUT "\n"; # +1 für Trennzeichen: Space bzw. "\n" $actline = substr ($actline, $pos +1); } else { # kein wrapping möglich, da line zu lange => einfach abschneiden! # (zu kurze line muß aufgrund der while-Bedingung ja mindestens # ein "\n" als Trennzeichen haben => schon oben behandelt) print OUT format_margin( $lastnumber); print OUT substr ($actline, 0, $maxlen); print OUT "\n"; $actline = substr ($actline, $maxlen); } $lastnumber = ""; } # in $actline steht der Rest der Zeile... } sub flush_output { # flush actline if (length ($actline) > 0) { print OUT format_margin( $lastnumber); print OUT $actline; print OUT "\n"; $actline = ""; $lastnumber = ""; } # flush commentline if (length ($commentline) > 0) { print OUT $commentline; print OUT "\n"; $commentline = ""; } } sub output_newline { # Druckt eine Zeile (_ohne_ Änderungsstrich) # Achtung: die Umwandlung der Umlaute (latin2asc) muß schon erledigt sein! # Input Parameter: Textzeile my ($line) = @_; print OUT $line; print OUT "\n"; } sub output_simple_line { # Gibt eine Textzeile aus, ggf. mit Änderungsstrich # Input Parameter: Textzeile my ($line) = @_; flush_output(); if ($old == 0) { print OUT $bar; if ($line =~ /^ /) { print OUT substr ($line, $barlen); print OUT "\n"; } else { print OUT $line; print OUT "\n"; } } else { print OUT $line; print OUT "\n"; } } sub wrap_pos { # Input Parameter: Textzeile my ($line, $maxlen ) = @_; my $pos1 = 0; my $pos2 = 0; $pos1 = index( $line, "\n"); if ($pos1 <= $maxlen && $pos1 >= 0) { # !br/Zeilenumbrüche berücksichtigen return $pos1; } else { $pos2 = rindex( $line, " ", $maxlen); # wo word wrapping? if ($pos2 <= $maxlen && $pos2 >= 0) { return $pos2; } else { # Kein wrapping möglich return 0; } } } sub format_margin { my( $number) = @_; # Input Parameter: evtl. Nummerierung _als String_ my $out = ""; if (length ($number) > 0) { if ($old == 0) { $out = $bar; $out .= " " x ($margin - length ($number) -$barlen); # Margin $out .= $number; } else { $out = " " x ($margin - length ($number)); # Margin $out .= $number; } } else { if ($old == 0) { $out = $bar; $out .= " " x ($margin -$barlen); # Margin } else { $out .= " " x $margin; # Margin } } return $out; } sub latin2asc { my ($line) = @_; # native codes $line =~ s/ä/ae/g; $line =~ s/ö/oe/g; $line =~ s/ü/ue/g; $line =~ s/Ä/Ae/g; $line =~ s/Ö/Oe/g; $line =~ s/Ü/Ue/g; $line =~ s/ß/ss/g; # !br als Umbruch verwenden $line =~ s|!br|\n|g; while ($line =~ /!link /) { my $pos1= 0; my $pos2= 0; my $url= ""; $pos1= index($line, "!link "); $pos2= index($line, ";", $pos1); if ($pos2 == -1) { $pos2= length($line); } $url= substr($line, $pos1+length("!link "), $pos2-$pos1-length("!link ")); $line = substr($line, 0, $pos1) . " $url " . substr($line, $pos2+1); } # Hervorhebungen $line =~ s|!b1|*|g; $line =~ s|!b0|*|g; $line =~ s|!i1|/|g; $line =~ s|!i0|/|g; $line =~ s|!u1|_|g; $line =~ s|!u0|_|g; return $line; }