docbook: Port make-wsluarm to Python3

Port the script that creates docbook/wsluarm_src/*.adoc to Python3.
Ping #18152.
This commit is contained in:
Gerald Combs 2022-06-29 18:33:36 -07:00
parent 832687cc74
commit a1c83a901b
4 changed files with 474 additions and 721 deletions

View File

@ -29,10 +29,10 @@ Wireshark uses C-macros liberally, both for the usual reasons as well as for
the binding generator and documentation generator scripts. The macros are
described within this document.
The API documentation is auto-generated from a Perl script called 'make-
wsluarm.pl', which searches C-files for the known macros and generates
appropriate HTML documentation from them. This includes using the C-comments
after the macros for the API document info.
The API documentation is auto-generated from a Python script called 'make-
wsluarm.py', which searches C-files for the known macros and generates
appropriate AsciiDoc documentation from them. This includes using the C
comments after the macros for the API document info.
Likewise, another script called 'make-reg.py' generates the C-files
'register_wslua.c' and 'declare_wslua.h', based on the C-macros it searches
@ -81,7 +81,7 @@ followed.
Documenting things for the API docs:
As explained previously, the API documentation is auto-generated from a
Perl script called 'make-wsluarm.pl', which searches C-files for the known
Python script called 'make-wsluarm.py', which searches C-files for the known
macros and generates appropriate HTML documentation from them. This includes
using the C-comments after the macros for the API document info. The comments
are extremely important, because the API documentation is what most Lua script
@ -92,10 +92,10 @@ Please make sure to at least use the '@since' version notification markup
in your comments, to let users know when the new class/function/etc. you
created became available.
Because documentation is so important, the make-wsluarm.pl script supports
Because documentation is so important, the make-wsluarm.py script supports
specific markup syntax in comments, and converts them to XML and ultimately
into the various documentation formats. The markup syntax is documented in
the top comments in make-wsluarm.pl, but are repeated here as well:
the top comments in make-wsluarm.py, but are repeated here as well:
- two (or more) line breaks in comments result in separate paragraphs
- all '&' are converted into their entity names, except inside urls
- all '<', and '>' are converted into their entity names everywhere
@ -292,7 +292,7 @@ table named 'Gui' (in fact there isn't). It's just for documentation.
If you look at the documentation, you'll see there is 'ProgDlg', 'TextWindow',
etc. in that 'GUI support' section. That's because both ProgDlg and
TextWindow are defined in that same wslua_gui.c file using the
'WSLUA_CLASS_DEFINE' macro. (see description of that later) make-wsluarm.pl
'WSLUA_CLASS_DEFINE' macro. (see description of that later) make-wsluarm.py
created those in the same documentation section because they're in the same c
file as that WSLUA_MODULE comment. You'll also note the documentation
includes a sub-section for 'Non Method Functions', which it auto-generated
@ -442,7 +442,7 @@ WSLUA_ARG_ - the prefix used in a #define statement, for a required
function/method argument (ie, one without a default value). It is defined to
an integer representing the index slot number of the Lua stack it will be at,
when calling the appropriate lua_check/lua_opt routine to get it from the
stack. The make_wsluarm.pl Perl script will generate API documentation with
stack. The make_wsluarm.py Python script will generate API documentation with
this argument name for the function/method, removing the 'WSLUA_ARG_' prefix.
The name following the 'WSLUA_ARG_' prefix must be the same name as the
function it's an argument for, followed by an underscore and then an ALLCAPS
@ -458,7 +458,7 @@ WSLUA_OPTARG_ - the prefix used in a #define statement, for an optional
function/method argument (ie, one with a default value). It is defined to an
integer representing the index slot number of the Lua stack it will be at,
when calling the appropriate lua_check/lua_opt routine to get it from the
stack. The make_wsluarm.pl Perl script will generate API documentation with
stack. The make_wsluarm.py Python script will generate API documentation with
this argument name for the function/method, removing the 'WSLUA_OPTARG_'
prefix. The rules for the name of the argument after the prefix are the same
as for 'WSLUA_ARG_' above.

View File

@ -315,13 +315,14 @@ ADD_CUSTOM_COMMAND(
OUTPUT
wsluarm
COMMAND ${CMAKE_COMMAND} -E make_directory wsluarm_src
COMMAND ${PERL_EXECUTABLE}
${CMAKE_CURRENT_SOURCE_DIR}/make-wsluarm.pl
COMMAND ${PYTHON_EXECUTABLE}
${CMAKE_CURRENT_SOURCE_DIR}/make-wsluarm.py
--output-directory wsluarm_src
${WSLUA_MODULES}
COMMAND ${CMAKE_COMMAND} -E touch
wsluarm
DEPENDS
${CMAKE_CURRENT_SOURCE_DIR}/make-wsluarm.pl
${CMAKE_CURRENT_SOURCE_DIR}/make-wsluarm.py
${WSLUA_MODULES}
)

View File

@ -1,708 +0,0 @@
#!/usr/bin/perl
#
# make-wsluarm.pl
# WSLUA's Reference Manual Generator
#
# (c) 2006, Luis E. Garcia Onatnon <luis@ontanon.org>
#
# Wireshark - Network traffic analyzer
# By Gerald Combs <gerald@wireshark.org>
# Copyright 1998 Gerald Combs
#
# SPDX-License-Identifier: GPL-2.0-or-later
#
# (-: I don't even think writing this in Lua :-)
# ...well I wished you had!
#
# changed by Hadriel Kaplan to do the following:
# - generates pretty XML output, to make debugging it easier
# - allows modules (i.e., WSLUA_MODULE) to have detailed descriptions
# - two (or more) line breaks in comments result in separate paragraphs
# - all '&' are converted into their entity names, except inside urls
# - all '<', and '>' are converted into their entity names everywhere
# - any word(s) wrapped in one star, e.g., *foo bar*, become italics
# - any word(s) wrapped in two stars, e.g., **foo bar**, become commands (is there a 'bold'?)
# - any word(s) wrapped in backticks, e.g., `foo bar`, become commands (is there something better?)
# - any word(s) wrapped in two backticks, e.g., ``foo bar``, become one backtick
# - any "[[url]]" becomes an XML ulink with the url as both the url and text
# - any "[[url|text]]" becomes an XML ulink with the url as the url and text as text
# - any indent with a single leading star '*' followed by space is a bulleted list item
# reducing indent or having an extra linebreak stops the list
# - any indent with a leading digits-dot followed by space, i.e. "1. ", is a numbered list item
# reducing indent or having an extra linebreak stops the list
# - supports meta-tagged info inside comment descriptions as follows:
# * a line starting with "@note" or "Note:" becomes an XML note line
# * a line starting with "@warning" or "Warning:" becomes an XML warning line
# * a line starting with "@version" or "@since" becomes a "Since:" line
# * a line starting with "@code" and ending with "@endcode" becomes an
# XML programlisting block, with no indenting/parsing within the block
# The above '@' commands are based on Doxygen commands
#
# Changed by Gerald Combs to generate AsciiDoc.
# - We might want to convert the epan/wslua/*.c markup to AsciiDoc
# - ...or we might want to generate Doxygen output instead.
use strict;
#use V2P;
sub deb {
# warn $_[0];
}
sub gorolla {
# a gorilla stays to a chimp like gorolla stays to chomp
# but this one returns the shrugged string.
my $s = shift;
# remove leading newlines and spaces at beginning
$s =~ s/^([\n]|\s)*//ms;
# remove trailing newlines and spaces at end
$s =~ s/([\n]|\s)*$//s;
# Prior versions converted a custom markup syntax to DocBook.
# Markup must now be compatible with Asciidoctor.
$s;
}
# break up descriptions based on newlines and keywords
# builds an array of paragraphs and returns the array ref
# each entry in the array is a single line for doc source, but not a
# whole paragraph - there are "<para>"/"</para>" entries in the
# array to make them paragraphs - this way the doc source itself is
# also pretty, while the resulting output is of course valid
# first arg is the array to build into; second arg is an array
# of lines to parse - this way it can be called from multiple
# other functions with slightly different needs
# this function assumes gorolla was called previously
sub parse_desc_common {
my @r; # a temp array we fill, then copy into @ret below
my @ret = @{ $_[0] };
my @lines = @{ $_[1] };
# the following will unfortunately create empty paragraphs too
# (ie, <para> followed by </para>), so we do this stuff to a temp @r
# array and then copy the non-empty ones into the passed-in array @ret
if ($#lines >= 0) {
# for each double newline, break into separate para's
for (my $idx=0; $idx <= $#lines; $idx++) {
$lines[$idx] =~ s/^(\s*)//; # remove leading whitespace
# save number of spaces in case we need to know later
my $indent = length($1);
# if we find [source,...] then treat it as a blob
if ($lines[$idx] =~ /^\[source.*\]/) {
my $line = $lines[$idx] . "\n";
# the next line *should* be a delimiter...
my $block_delim = $lines[++$idx];
$block_delim =~ s/^\s+|\s+$//g;
$line .= $block_delim . "\n";
my $block_line = $lines[++$idx];
while (!($block_line =~ qr/^\s*$block_delim\s*$/) && $idx <= $#lines) {
# keep eating lines until the closing delimiter.
# XXX Strip $indent spaces?
$line .= $block_line . "\n";
$block_line = $lines[++$idx];
}
$line .= $block_delim;
$r[++$#r] = $line . "\n";
} elsif ($lines[$idx] =~ /^\s*$/) {
# line is either empty or just whitespace, and we're not in a @code block
# so it's the end of a previous paragraph, beginning of new one
$r[++$#r] = "\n";
} else {
# We have a regular line, not in a @code block.
# Add it as-is.
my $line = $lines[$idx];
# if line starts with "@version" or "@since", make it a "Since:"
if ($line =~ /^\@version |^\@since /) {
$line =~ s/^\@version\s+|^\@since\s+/Since: /;
$r[++$#r] = $line . "\n";
# if line starts with single "*" and space, leave it mostly intact.
} elsif ($line =~ /^\*\s/) {
$r[++$#r] = "\n";
$r[++$#r] = "" . $line . "\n";
# keep eating until we find a blank line or end
while (!($lines[++$idx] =~ /^\s*$/) && $idx <= $#lines) {
$lines[$idx] =~ s/^(\s*)//; # count and remove leading whitespace
# if this is less indented than before, break out
last if length($1) < $indent;
$r[++$#r] = "" . $lines[$idx] . "\n";
}
$r[++$#r] = "\n\n";
# if line starts with "1." and space, leave it mostly intact.
} elsif ($line =~ /^1\.\s/) {
$r[++$#r] = "\n";
$r[++$#r] = "" . $line . "\n";
# keep eating until we find a blank line or end
while (!($lines[++$idx] =~ /^\s*$/) && $idx <= $#lines) {
$lines[$idx] =~ s/^(\s*)//; # count and remove leading whitespace
# if this is less indented than before, break out
last if length($1) < $indent;
$r[++$#r] = "" . $lines[$idx] . "\n";
}
$r[++$#r] = "\n\n";
# just a normal line, add it to array
} else {
# Nested Lua arrays
$line =~ s/\[\[(.*)\]\]/\$\$$1\$\$/g;
$r[++$#r] = "" . $line . "\n";
}
}
}
$r[++$#r] = "\n\n";
# Now go through @r, and copy into @ret but skip empty lines.
# This isn't strictly necessary but makes the AsciiDoc output prettier.
for (my $idx=0; $idx <= $#r; $idx++) {
if ($r[$idx] =~ /^\s*$/ && $r[$idx+1] =~ /^\s*$/ && $r[$idx+2] =~ /^\s*$/) {
$idx++; # for-loop will increment $idx and skip the other one
} else {
$ret[++$#ret] = $r[$idx];
}
}
}
return \@ret;
}
# for "normal" description cases - class, function, etc.
# but not for modules nor function arguments
sub parse_desc {
my $s = gorolla(shift);
# break description into separate sections
my @r = (); # the array we return
# split each line into an array
my @lines = split(/\n/, $s);
return parse_desc_common(\@r, \@lines);
}
# modules have a "title" and an optional description
sub parse_module_desc {
my $s = gorolla(shift);
# break description into separate sections
my @r = (); # the array we return
my @lines = split(/\n/, $s);
my $line = shift @lines;
$r[++$#r] = "=== $line\n";
return parse_desc_common(\@r, \@lines);
}
# function argument descriptions are in a <listitem>
sub parse_function_arg_desc {
my $s = gorolla(shift);
# break description into separate sections
my @r = ( "\n" ); # the array we return
my @lines = split(/\n/, $s);
@r = @{ parse_desc_common(\@r, \@lines) };
#$r[++$#r] = "</listitem>\n";
return \@r;
}
# attributes have a "mode" and an optional description
sub parse_attrib_desc {
my $s = gorolla(shift);
# break description into separate sections
my @r = (); # the array we return
my $mode = shift;
if ($mode) {
$mode =~ s/RO/Retrieve only./;
$mode =~ s/WO/Assign only./;
$mode =~ s/RW|WR/Retrieve or assign./;
$r[++$#r] = "Mode: $mode\n\n";
} else {
die "Attribute does not have a RO/WO/RW mode: '$s'\n";
}
# split each line into an array
my @lines = split(/\n/, $s);
return parse_desc_common(\@r, \@lines);
}
# prints the parse_* arrays into the doc source file with pretty indenting
# first arg is the description array, second is indent level
sub print_desc {
my $desc_ref = $_[0];
my $indent = $_[1];
if (!$indent) {
$indent = 2;
}
#my $tabs = "\t" x $indent;
for my $line ( @{ $desc_ref } ) {
printf D "%s", $line;
}
printf D "\n";
}
my %module = ();
my %modules = ();
my $class;
my %classes;
my $function;
my @functions;
my $asciidoc_template = {
module_header => "[#lua_module_%s]\n\n",
# module_desc => "\t<title>%s</title>\n",
class_header => "[#lua_class_%s]\n\n" .
"==== %s\n\n",
#class_desc => "\t\t<para>%s</para>\n",
class_attr_header => "[#lua_class_attrib_%s]\n\n" .
"===== %s\n\n",
#class_attr_descr => "\t\t\t<para>%s%s</para>\n",
class_attr_footer => "// End %s\n\n",
function_header => "[#lua_fn_%s]\n\n" .
"===== %s\n\n",
#function_descr => "\t\t\t<para>%s</para>\n",
function_args_header => "[float]\n" .
"===== Arguments\n\n",
function_arg_header => "%s::\n",
#function_arg_descr => "\t\t\t\t\t\t<listitem>\n" .
# "\t\t\t\t\t\t\t<para>%s</para>\n" .
# "\t\t\t\t\t\t</listitem>\n",
function_arg_footer => "// function_arg_footer: %s\n",
function_args_footer => "// end of function_args\n\n",
function_argerror_header => "", #"\t\t\t\t\t<section><title>Errors</title>\n\t\t\t\t\t\t<itemizedlist>\n",
function_argerror => "", #"\t\t\t\t\t\t\t<listitem><para>%s</para></listitem>\n",
function_argerror_footer => "", #"\t\t\t\t\t\t</itemizedlist></section> <!-- function_argerror_footer: %s -->\n",
function_returns_header => "[float]\n" .
"===== Returns\n\n",
function_returns => "%s\n\n",
function_returns_footer => "// function_returns_footer: %s\n",
function_errors_header => "[float]\n" .
"===== Errors\n\n",
function_errors => "* %s\n\n",
function_errors_footer => "// function_errors_footer: %s\n",
function_footer => "// function_footer: %s\n\n",
class_footer => "// class_footer: %s\n",
global_functions_header => "\n[#global_functions_%s]\n\n" .
"==== Global Functions\n\n",
global_functions_footer => "// Global function\n",
module_footer => "// end of module\n",
};
# class_constructors_header => "\t\t<section id='lua_class_constructors_%s'>\n\t\t\t<title>%s Constructors</title>\n",
# class_constructors_footer => "\t\t</section> <!-- class_constructors_footer -->\n",
# class_methods_header => "\t\t<section id='lua_class_methods_%s'>\n\t\t\t<title>%s Methods</title>\n",
# class_methods_footer => "\t\t</section> <!-- class_methods_footer: %s -->\n",
my $template_ref = $asciidoc_template;
my $out_extension = "adoc";
# It's said that only perl can parse perl... my editor isn't perl...
# if unencoded this causes my editor's autoindent to bail out so I encoded in octal
# XXX: support \" within ""
my $QUOTED_RE = "\042\050\133^\042\135*\051\042";
# group 1: whole trailing comment (possibly empty), e.g. " /* foo */"
# group 2: any leading whitespace. XXX why is this not removed using (?:...)
# group 3: actual comment text, e.g. " foo ".
my $TRAILING_COMMENT_RE = '((\s*|[\n\r]*)/\*(.*?)\*/)?';
my $IN_COMMENT_RE = '[\s\r\n]*((.*?)\*/)?';
my @control =
(
# This will be scanned in order trying to match the re if it matches
# the body will be executed immediately after.
[ 'WSLUA_MODULE\s*([A-Z][a-zA-Z0-9]+)' . $IN_COMMENT_RE,
sub {
$module{name} = $1;
$module{descr} = parse_module_desc($3);
} ],
[ 'WSLUA_CLASS_DEFINE(?:_BASE)?\050\s*([A-Z][a-zA-Z0-9]+).*?\051;' . $TRAILING_COMMENT_RE,
sub {
deb ">c=$1=$2=$3=$4=$5=$6=$7=\n";
$class = {
name => $1,
descr=> parse_desc($4),
constructors => [],
methods => [],
attributes => []
};
$classes{$1} = $class;
} ],
[ 'WSLUA_FUNCTION\s+wslua_([a-z_0-9]+)[^\173]*\173' . $TRAILING_COMMENT_RE,
sub {
deb ">f=$1=$2=$3=$4=$5=$6=$7=\n";
$function = {
returns => [],
arglist => [],
args => {},
name => $1,
descr => parse_desc($4),
type => 'standalone'
};
push @functions, $function;
} ],
[ 'WSLUA_CONSTRUCTOR\s+([A-Za-z0-9]+)_([a-z0-9_]+).*?\173' . $TRAILING_COMMENT_RE,
sub {
deb ">cc=$1=$2=$3=$4=$5=$6=$7=\n";
$function = {
returns => [],
arglist => [],
args => {},
name => "$1.$2",
descr => parse_desc($5),
type => 'constructor'
};
push @{${$class}{constructors}}, $function;
} ],
[ '_WSLUA_CONSTRUCTOR_\s+([A-Za-z0-9]+)_([a-z0-9_]+)\s*(.*?)\052\057',
sub {
deb ">cc=$1=$2=$3=$4=$5=$6=$7=\n";
$function = {
returns => [],
arglist => [],
args => {},
name => "$1.$2",
descr => parse_desc($3),
type => 'constructor'
};
push @{${$class}{constructors}}, $function;
} ],
[ 'WSLUA_METHOD\s+([A-Za-z0-9]+)_([a-z0-9_]+)[^\173]*\173' . $TRAILING_COMMENT_RE,
sub {
deb ">cm=$1=$2=$3=$4=$5=$6=$7=\n";
my $name = "$1";
$name =~ tr/A-Z/a-z/;
$name .= ":$2";
$function = {
returns => [],
arglist => [],
args => {},
name => $name,
descr => parse_desc($5),
type => 'method'
};
push @{${$class}{methods}}, $function;
} ],
[ 'WSLUA_METAMETHOD\s+([A-Za-z0-9]+)(__[a-z0-9]+)[^\173]*\173' . $TRAILING_COMMENT_RE,
sub {
deb ">cm=$1=$2=$3=$4=$5=$6=$7=\n";
my $name = "$1";
$name =~ tr/A-Z/a-z/;
$name .= ":$2";
my ($c,$d) = ($1,$5);
$function = {
returns => [],
arglist => [],
args => {},
name => $name,
descr => parse_desc($5),
type => 'metamethod'
};
push @{${$class}{methods}}, $function;
} ],
# Splits "WSLUA_OPTARG_ProtoField_int8_NAME /* food */" into
# "OPT" (1), "ProtoField_int8" (2), "NAME" (3), ..., ..., " food " (6)
# Handles functions like "loadfile(filename)" too.
[ '#define WSLUA_(OPT)?ARG_((?:[A-Za-z0-9]+_)?[a-z0-9_]+)_([A-Z0-9_]+)\s+\d+' . $TRAILING_COMMENT_RE,
sub {
deb ">a=$1=$2=$3=$4=$5=$6=\n";
my $name = $1 eq 'OPT' ? "[$3]" : $3;
push @{${$function}{arglist}} , $name;
${${$function}{args}}{$name} = {descr=>parse_function_arg_desc($6),}
} ],
# same as above, except that there is no macro but a (multi-line) comment.
[ '\057\052\s*WSLUA_(OPT)?ARG_((?:[A-Za-z0-9]+_)?[a-z0-9_]+)_([A-Z0-9_]+)\s*(.*?)\052\057',
sub {
deb ">a=$1=$2=$3=$4\n";
my $name = $1 eq 'OPT' ? "[$3]" : $3;
push @{${$function}{arglist}} , $name;
${${$function}{args}}{$name} = {descr=>parse_function_arg_desc($4),}
} ],
[ '/\052\s+WSLUA_ATTRIBUTE\s+([A-Za-z0-9]+)_([a-z0-9_]+)\s+([A-Z]*)\s*(.*?)\052/',
sub {
deb ">at=$1=$2=$3=$4=$5=$6=$7=\n";
my $name = "$1";
$name =~ tr/A-Z/a-z/;
$name .= ".$2";
push @{${$class}{attributes}}, { name => $name, descr => parse_attrib_desc($4, $3) };
} ],
[ '/\052\s+WSLUA_MOREARGS\s+([A-Za-z_]+)\s+(.*?)\052/',
sub {
deb ">ma=$1=$2=$3=$4=$5=$6=$7=\n";
push @{${$function}{arglist}} , "...";
${${$function}{args}}{"..."} = {descr=>parse_function_arg_desc($2)}
} ],
[ 'WSLUA_(FINAL_)?RETURN\050\s*.*?\s*\051\s*;' . $TRAILING_COMMENT_RE,
sub {
deb ">fr=$1=$2=$3=$4=$5=$6=$7=\n";
push @{${$function}{returns}} , gorolla($4) if $4 ne '';
} ],
[ '\057\052\s*_WSLUA_RETURNS_\s*(.*?)\052\057',
sub {
deb ">fr2=$1=$2=$3=$4=$5=$6=$7=\n";
push @{${$function}{returns}} , gorolla($1) if $1 ne '';
} ],
[ 'WSLUA_ERROR\s*\050\s*(([A-Z][A-Za-z]+)_)?([a-z_]+),' . $QUOTED_RE ,
sub {
deb ">e=$1=$2=$3=$4=$5=$6=$7=\n";
my $errors;
unless (exists ${$function}{errors}) {
$errors = ${$function}{errors} = [];
} else {
$errors = ${$function}{errors};
}
push @{$errors}, gorolla($4);
} ],
[ 'WSLUA_(OPT)?ARG_ERROR\s*\050\s*(([A-Z][A-Za-z0-9]+)_)?([a-z_]+)\s*,\s*([A-Z0-9]+)\s*,\s*' . $QUOTED_RE,
sub {
deb ">ae=$1=$2=$3=$4=$5=$6=$7=\n";
my $errors;
unless (exists ${${${$function}{args}}{$5}}{errors}) {
$errors = ${${${$function}{args}}{$5}}{errors} = [];
} else {
$errors = ${${${$function}{args}}{$5}}{errors};
}
push @{$errors}, gorolla($6);
} ],
);
my $anymatch = '(^ThIsWiLlNeVeRmAtCh$';
for (@control) {
$anymatch .= "|${$_}[0]";
}
$anymatch .= ')';
# for each file given in the command line args
my $file = shift;
my $docfile = 0;
while ( $file ) {
# continue to next loop if the file is not plain text
next unless -f $file;
if (!$docfile) {
$docfile = $file;
$docfile =~ s#.*/##;
$docfile =~ s/\.c$/.$out_extension/;
}
open C, "< $file" or die "Can't open input file $file: $!";
open D, "> wsluarm_src/$docfile" or die "Can't open output file wsluarm_src/$docfile: $!";
my $b = '';
$b .= $_ while (<C>);
close C;
while ($b =~ /$anymatch/ms ) {
my $match = $1;
# print "\n-----\n$match\n-----\n";
for (@control) {
my ($re,$f) = @{$_};
if ( $match =~ /$re/ms) {
&{$f}();
$b =~ s/.*?$re//ms;
last;
}
}
}
# peek at next file to see if it's continuing this module
$file = shift;
# make sure we get the next plain text file
while ($file and !(-f $file)) {
$file = shift;
}
if ($file) {
# we have another file - check it out
open C, "< $file" or die "Can't open input file $file: $!";
my $peek_for_continue = '';
$peek_for_continue .= $_ while (<C>);
close C;
if ($peek_for_continue =~ /WSLUA_CONTINUE_MODULE\s*([A-Z][a-zA-Z0-9]+)/) {
if ($module{name} ne $1) {
die "Input file $file continues a different module: $1 (previous module is $module{name})!";
}
# ok, we're continuing the same module
next;
}
}
# if we got here, we're not continuing the module
$modules{$module{name}} = $docfile;
print "Generating source AsciiDoc for: $module{name}\n";
printf D ${$template_ref}{module_header}, $module{name}, $module{name};
if ($module{descr} && @{$module{descr}} >= 0) {
print_desc($module{descr}, 1);
} else {
die "did NOT print $module{name} description\n";
}
for my $cname (sort keys %classes) {
my $cl = $classes{$cname};
printf D ${$template_ref}{class_header}, $cname, $cname;
if (${$cl}{descr} && @{${$cl}{descr}} >= 0) {
print_desc(${$cl}{descr}, 2);
} else {
die "did NOT print $cname description\n";
}
if ( $#{${$cl}{constructors}} >= 0) {
for my $c (@{${$cl}{constructors}}) {
function_descr($c,3);
}
}
if ( $#{${$cl}{methods}} >= 0) {
for my $m (@{${$cl}{methods}}) {
function_descr($m, 3);
}
}
if ( $#{${$cl}{attributes}} >= 0) {
for my $a (@{${$cl}{attributes}}) {
my $a_id = ${$a}{name};
$a_id =~ s/[^a-zA-Z0-9]/_/g;
printf D ${$template_ref}{class_attr_header}, $a_id, ${$a}{name};
if (${$a}{descr} && @{${$a}{descr}} >= 0) {
print_desc(${$a}{descr}, 3);
} else {
die "did not print $a_id description\n";
}
printf D ${$template_ref}{class_attr_footer}, ${$a}{name}, ${$a}{name};
}
}
if (exists ${$template_ref}{class_footer}) {
printf D ${$template_ref}{class_footer}, $cname, $cname;
}
}
if ($#functions >= 0) {
printf D ${$template_ref}{global_functions_header}, $module{name};
for my $f (@functions) {
function_descr($f, 3);
}
print D ${$template_ref}{global_functions_footer};
}
printf D ${$template_ref}{module_footer}, $module{name};
close D;
%module = ();
%classes = ();
$class = undef;
$function = undef;
@functions = ();
$docfile = 0;
}
sub function_descr {
my $f = $_[0];
my $indent = $_[1];
my $section_name = 'UNKNOWN';
my $arglist = '';
for (@{ ${$f}{arglist} }) {
my $a = $_;
$a =~ tr/A-Z/a-z/;
$arglist .= "$a, ";
}
$arglist =~ s/, $//;
$section_name = "${$f}{name}($arglist)";
$section_name =~ s/[^a-zA-Z0-9]/_/g;
printf D ${$template_ref}{function_header}, $section_name , "${$f}{name}($arglist)";
my @desc = ${$f}{descr};
if ($#desc >= 0) {
print_desc(@desc, $indent);
}
print D ${$template_ref}{function_args_header} if $#{${$f}{arglist}} >= 0;
for my $argname (@{${$f}{arglist}}) {
my $arg = ${${$f}{args}}{$argname};
$argname =~ tr/A-Z/a-z/;
$argname =~ s/\[(.*)\]/$1 (optional)/;
printf D ${$template_ref}{function_arg_header}, $argname, $argname;
my @desc = ${$arg}{descr};
if ($#desc >= 0) {
print_desc(@desc, $indent+2);
}
if ( $#{${$arg}{errors}} >= 0) {
printf D ${$template_ref}{function_argerror_header}, $argname, $argname;
printf D ${$template_ref}{function_argerror}, $_, $_ for @{${$arg}{errors}};
printf D ${$template_ref}{function_argerror_footer}, $argname, $argname;
}
printf D ${$template_ref}{function_arg_footer}, $argname, $argname;
}
print D ${$template_ref}{function_args_footer} if $#{${$f}{arglist}} >= 0;
if ( $#{${$f}{returns}} >= 0) {
printf D ${$template_ref}{function_returns_header}, ${$f}{name};
printf D ${$template_ref}{function_returns}, $_ for @{${$f}{returns}};
printf D ${$template_ref}{function_returns_footer}, ${$f}{name};
}
if ( $#{${$f}{errors}} >= 0) {
my $sname = exists ${$f}{section_name} ? ${$f}{section_name} : ${$f}{name};
printf D ${$template_ref}{function_errors_header}, $sname;
printf D ${$template_ref}{function_errors}, $_ for @{${$f}{errors}};
printf D ${$template_ref}{function_errors_footer}, ${$f}{name};
}
printf D ${$template_ref}{function_footer}, $section_name;
}

460
docbook/make-wsluarm.py Executable file
View File

@ -0,0 +1,460 @@
#!/usr/bin/env python3
#
# make-wsluarm.py
#
# By Gerald Combs <gerald@wireshark.org>
# Based on make-wsluarm.pl by Luis E. Garcia Onatnon <luis.ontanon@gmail.com> and Hadriel Kaplan
#
# Wireshark - Network traffic analyzer
# By Gerald Combs <gerald@wireshark.org>
# Copyright 1998 Gerald Combs
#
# SPDX-License-Identifier: GPL-2.0-or-later
'''\
WSLUA's Reference Manual Generator
This reads Doxygen-style comments in C code and generates wslua API documentation
formatted as AsciiDoc.
Behavior as documented by Hadriel:
- Allows modules (i.e., WSLUA_MODULE) to have detailed descriptions
- Two (or more) line breaks in comments result in separate paragraphs
- Any indent with a single leading star '*' followed by space is a bulleted list item
reducing indent or having an extra linebreak stops the list
- Any indent with a leading digits-dot followed by space, i.e. "1. ", is a numbered list item
reducing indent or having an extra linebreak stops the list
'''
import argparse
import logging
import os
import re
import sys
from enum import Enum
from string import Template
def parse_desc(description):
'''\
Break up descriptions based on newlines and keywords. Some processing
is done for code blocks and lists, but the output is otherwise left
intact. Assumes the input has been stripped.
'''
c_lines = description.strip().splitlines()
if len(c_lines) < 1:
return ''
adoc_lines = []
cli = iter(c_lines)
for line in cli:
raw_len = len(line)
line = line.lstrip()
indent = raw_len - len(line)
# If we find "[source,...]" then treat it as a block
if re.search(r'\[source.*\]', line):
# The next line *should* be a delimiter...
block_delim = next(cli).strip()
line += f'\n{block_delim}\n'
block_line = next(cli)
# XXX try except StopIteration
while block_line.strip() != block_delim:
# Keep eating lines until the closing delimiter.
# XXX Strip indent spaces?
line += block_line + '\n'
block_line = next(cli)
line += block_delim + '\n'
adoc_lines.append(line)
elif re.match(r'^\s*$', line):
# line is either empty or just whitespace, and we're not in a @code block
# so it's the end of a previous paragraph, beginning of new one
adoc_lines.append('')
else:
# We have a regular line, not in a @code block.
# Add it as-is.
# if line starts with "@version" or "@since", make it a "Since:"
if re.match(r'^@(version|since)\s+', line):
line = re.sub(r'^@(version|since)\s+', 'Since: ', line)
adoc_lines.append(line)
# If line starts with single "*" and space, leave it mostly intact.
elif re.match(r'^\*\s', line):
adoc_lines += ['', line]
# keep eating until we find a blank line or end
line = next(cli)
try:
while not re.match(r'^\s*$', line):
raw_len = len(line)
line = line.lstrip()
# if this is less indented than before, break out
if raw_len - len(line) < indent:
break
adoc_lines += [line]
line = next(cli)
except StopIteration:
pass
adoc_lines.append('')
# if line starts with "1." and space, leave it mostly intact.
elif re.match(r'^1\.\s', line):
adoc_lines += ['', line]
# keep eating until we find a blank line or end
line = next(cli)
try:
while not re.match(r'^\s*$', line):
raw_len = len(line)
line = line.lstrip()
# if this is less indented than before, break out
if raw_len - len(line) < indent:
break
adoc_lines += [line]
line = next(cli)
except StopIteration:
pass
adoc_lines.append('')
# Just a normal line, add it to array
else:
# Nested Lua arrays
line = re.sub(r'\[\[(.*)\]\]', r'$$\1$$', line)
adoc_lines += [line]
# Strip out consecutive empty lines.
# This isn't strictly necessary but makes the AsciiDoc output prettier.
adoc_lines = '\n'.join(adoc_lines).splitlines()
adoc_lines = [val for idx, val in enumerate(adoc_lines) if idx == 0 or not (val == '' and val == adoc_lines[idx - 1])]
return '\n'.join(adoc_lines)
class LuaFunction:
def __init__(self, c_file, id, start, name, raw_description):
self.c_file = c_file
self.id = id
self.start = start
self.name = name
if not raw_description:
raw_description = ''
self.description = parse_desc(raw_description)
self.arguments = [] # (name, description, optional)
self.returns = [] # description
self.errors = [] # description
logging.info(f'Created function {id} ({name}) at {start}')
def add_argument(self, id, raw_name, raw_description, raw_optional):
if id != self.id:
logging.critical(f'Invalid argument ID {id} in function {self.id}')
sys.exit(1)
if not raw_description:
raw_description = ''
optional = False
if raw_optional == 'OPT':
optional = True
self.arguments.append((raw_name.lower(), parse_desc(raw_description), optional))
def extract_buf(self, buf):
"Extract arguments, errors, and return values from a function's buffer."
# Splits "WSLUA_OPTARG_ProtoField_int8_NAME /* food */" into
# "OPT" (1), "ProtoField_int8" (2), "NAME" (3), ..., ..., " food " (6)
# Handles functions like "loadfile(filename)" too.
for m in re.finditer(r'#define WSLUA_(OPT)?ARG_((?:[A-Za-z0-9]+_)?[a-z0-9_]+)_([A-Z0-9_]+)\s+\d+' + TRAILING_COMMENT_RE, buf, re.MULTILINE|re.DOTALL):
self.add_argument(m.group(2), m.group(3), m.group(6), m.group(1))
logging.info(f'Created arg {m.group(3)} for {self.id} at {m.start()}')
# Same as above, except that there is no macro but a (multi-line) comment.
for m in re.finditer(r'/\*\s*WSLUA_(OPT)?ARG_((?:[A-Za-z0-9]+_)?[a-z0-9_]+)_([A-Z0-9_]+)\s*(.*?)\*/', buf, re.MULTILINE|re.DOTALL):
self.add_argument(m.group(2), m.group(3), m.group(4), m.group(1))
logging.info(f'Created arg {m.group(3)} for {self.id} at {m.start()}')
for m in re.finditer(r'/\*\s+WSLUA_MOREARGS\s+([A-Za-z_]+)\s+(.*?)\*/', buf, re.MULTILINE|re.DOTALL):
self.add_argument(m.group(1), '...', m.group(2), False)
logging.info(f'Created morearg for {self.id}')
for m in re.finditer(r'WSLUA_(FINAL_)?RETURN\(\s*.*?\s*\)\s*;' + TRAILING_COMMENT_RE, buf, re.MULTILINE|re.DOTALL):
if m.group(4) and len(m.group(4)) > 0:
self.returns.append(m.group(4).strip())
logging.info(f'Created return for {self.id} at {m.start()}')
for m in re.finditer(r'/\*\s*_WSLUA_RETURNS_\s*(.*?)\*/', buf, re.MULTILINE|re.DOTALL):
if m.group(1) and len(m.group(1)) > 0:
self.returns.append(m.group(1).strip())
logging.info(f'Created return for {self.id} at {m.start()}')
for m in re.finditer(r'WSLUA_ERROR\s*\(\s*(([A-Z][A-Za-z]+)_)?([a-z_]+),' + QUOTED_RE, buf, re.MULTILINE|re.DOTALL):
self.errors.append(m.group(4).strip())
logging.info(f'Created error {m.group(4)[:10]} for {self.id} at {m.start()}')
def to_adoc(self):
# The Perl script wrapped optional args in '[]', joined them with ', ', and
# converted non-alphabetic characters to underscores.
mangled_names = [f'_{a}_' if optional else a for a, _, optional in self.arguments]
section_name = re.sub('[^A-Za-z0-9]', '_', f'{self.name}_{"__".join(mangled_names)}_')
opt_names = [f'[{a}]' if optional else a for a, _, optional in self.arguments]
adoc_buf = f'''
[#lua_fn_{section_name}]
===== {self.name}({', '.join(opt_names)})
{self.description}
'''
if len(self.arguments) > 0:
adoc_buf += '''
[float]
===== Arguments
'''
for (name, description, optional) in self.arguments:
if optional:
name += ' (optional)'
adoc_buf += f'\n{name}::\n'
if len(description) > 0:
adoc_buf += f'\n{description}\n'
adoc_buf += f'\n// function_arg_footer: {name}'
if len(self.arguments) > 0:
adoc_buf += '\n// end of function_args\n'
if len(self.returns) > 0:
adoc_buf += '''
[float]
===== Returns
'''
for description in self.returns:
adoc_buf += f'\n{description}\n'
if len(self.returns) > 0:
adoc_buf += f'\n// function_returns_footer: {self.name}'
if len(self.errors) > 0:
adoc_buf += '''
[float]
===== Errors
'''
for description in self.errors:
adoc_buf += f'\n* {description}\n'
if len(self.errors) > 0:
adoc_buf += f'\n// function_errors_footer: {self.name}'
adoc_buf += f'\n// function_footer: {section_name}\n'
return adoc_buf
# group 1: whole trailing comment (possibly empty), e.g. " /* foo */"
# group 2: any leading whitespace. XXX why is this not removed using (?:...)
# group 3: actual comment text, e.g. " foo ".
TRAILING_COMMENT_RE = r'((\s*|[\n\r]*)/\*(.*?)\*/)?'
IN_COMMENT_RE = r'[\s\r\n]*((.*?)\s*\*/)?'
QUOTED_RE = r'"([^"]*)"'
# XXX We might want to create a "LuaClass" class similar to LuaFunction
# and move these there.
def extract_class_definitions(c_file, c_buf, module, classes, functions):
for m in re.finditer(r'WSLUA_CLASS_DEFINE(?:_BASE)?\(\s*([A-Z][a-zA-Z0-9]+).*?\);' + TRAILING_COMMENT_RE, c_buf, re.MULTILINE|re.DOTALL):
raw_desc = m.group(4)
if raw_desc is None:
raw_desc = ''
name = m.group(1)
mod_class = {
'description': parse_desc(raw_desc),
'constructors': [],
'methods': [],
'attributes': [],
}
classes[name] = mod_class
logging.info(f'Created class {name}')
return 0
def extract_function_definitions(c_file, c_buf, module, classes, functions):
for m in re.finditer(r'WSLUA_FUNCTION\s+wslua_([a-z_0-9]+)[^\{]*\{' + TRAILING_COMMENT_RE, c_buf, re.MULTILINE|re.DOTALL):
id = m.group(1)
functions[id] = LuaFunction(c_file, id, m.start(), id, m.group(4))
def extract_constructor_definitions(c_file, c_buf, module, classes, functions):
for m in re.finditer(r'WSLUA_CONSTRUCTOR\s+([A-Za-z0-9]+)_([a-z0-9_]+).*?\{' + TRAILING_COMMENT_RE, c_buf, re.MULTILINE|re.DOTALL):
class_name = m.group(1)
id = f'{class_name}_{m.group(2)}'
name = f'{class_name}.{m.group(2)}'
functions[id] = LuaFunction(c_file, id, m.start(), name, m.group(5))
classes[class_name]['constructors'].append(id)
def extract_constructor_markups(c_file, c_buf, module, classes, functions):
for m in re.finditer(r'_WSLUA_CONSTRUCTOR_\s+([A-Za-z0-9]+)_([a-z0-9_]+)\s*(.*?)\*/', c_buf, re.MULTILINE|re.DOTALL):
class_name = m.group(1)
id = f'{class_name}_{m.group(2)}'
name = f'{class_name}.{m.group(2)}'
functions[id] = LuaFunction(c_file, id, m.start(), name, m.group(3))
classes[class_name]['constructors'].append(id)
def extract_method_definitions(c_file, c_buf, module, classes, functions):
for m in re.finditer(r'WSLUA_METHOD\s+([A-Za-z0-9]+)_([a-z0-9_]+)[^\{]*\{' + TRAILING_COMMENT_RE, c_buf, re.MULTILINE|re.DOTALL):
class_name = m.group(1)
id = f'{class_name}_{m.group(2)}'
name = f'{class_name.lower()}:{m.group(2)}'
functions[id] = LuaFunction(c_file, id, m.start(), name, m.group(5))
classes[class_name]['methods'].append(id)
def extract_metamethod_definitions(c_file, c_buf, module, classes, functions):
for m in re.finditer(r'WSLUA_METAMETHOD\s+([A-Za-z0-9]+)(__[a-z0-9]+)[^\{]*\{' + TRAILING_COMMENT_RE, c_buf, re.MULTILINE|re.DOTALL):
class_name = m.group(1)
id = f'{class_name}{m.group(2)}'
name = f'{class_name.lower()}:{m.group(2)}'
functions[id] = LuaFunction(c_file, id, m.start(), name, m.group(5))
classes[class_name]['methods'].append(id)
def extract_attribute_markups(c_file, c_buf, module, classes, functions):
for m in re.finditer(r'/\*\s+WSLUA_ATTRIBUTE\s+([A-Za-z0-9]+)_([a-z0-9_]+)\s+([A-Z]*)\s*(.*?)\*/', c_buf, re.MULTILINE|re.DOTALL):
class_name = m.group(1)
name = f'{m.group(1).lower()}.{m.group(2)}'
mode = m.group(3)
mode_desc = 'Mode: '
if 'RO' in mode:
mode_desc += 'Retrieve only.\n'
elif 'WO' in mode:
mode_desc += 'Assign only.\n'
elif 'RW' in mode or 'WR' in mode:
mode_desc += 'Retrieve or assign.\n'
else:
sys.stderr.write(f'Attribute does not have a RO/WO/RW mode {mode}\n')
sys.exit(1)
attribute = {
'name': name,
'description': parse_desc(f'{mode_desc}\n{m.group(4)}'),
}
classes[class_name]['attributes'].append(attribute)
logging.info(f'Created attribute {name} for class {class_name}')
def main():
parser = argparse.ArgumentParser(description="WSLUA's Reference Manual Generator")
parser.add_argument("c_files", nargs='+', metavar='C file', help="C file")
parser.add_argument('--output-directory', help='Output directory')
parser.add_argument('--verbose', action='store_true', help='Show more output')
args = parser.parse_args()
logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.DEBUG if args.verbose else logging.WARNING)
modules = {}
for c_file in args.c_files:
with open(c_file, encoding='utf-8') as c_f:
c_buf = c_f.read()
# Peek for modules vs continuations.
m = re.search(r'WSLUA_(|CONTINUE_)MODULE\s*(\w+)', c_buf)
if m:
module_name = m.group(2)
c_pair = (os.path.basename(c_file), c_buf)
try:
if m.group(1) == 'CONTINUE_':
modules[module_name]['c'].append(c_pair)
else:
modules[module_name]['c'].insert(0, c_pair)
except KeyError:
modules[module_name] = {}
modules[module_name]['c'] = [c_pair]
modules[module_name]['file_base'] = os.path.splitext(c_pair[0])[0]
else:
logging.warning(f'No module found in {c_file}')
extractors = [
extract_class_definitions,
extract_function_definitions,
extract_constructor_definitions,
extract_constructor_markups,
extract_method_definitions,
extract_metamethod_definitions,
extract_attribute_markups,
]
for module_name in sorted(modules):
adoc_file = f'{modules[module_name]["file_base"]}.adoc'
logging.info(f'Writing module {module_name} to {adoc_file} from {len(modules[module_name]["c"])} input(s)')
functions = {}
classes = {}
# Extract our module's description.
m = re.search(r'WSLUA_MODULE\s*[A-Z][a-zA-Z0-9]+' + IN_COMMENT_RE, modules[module_name]['c'][0][1], re.MULTILINE|re.DOTALL)
if not m:
return
modules[module_name]['description'] = parse_desc(f'{m.group(2)}')
# Extract module-level information from each file.
for (c_file, c_buf) in modules[module_name]['c']:
for extractor in extractors:
extractor(c_file, c_buf, modules[module_name], classes, functions)
# Extract function-level information from each file.
for (c_file, c_buf) in modules[module_name]['c']:
c_file_ids = filter(lambda k: functions[k].c_file == c_file, functions.keys())
func_ids = sorted(c_file_ids, key=lambda k: functions[k].start)
id = func_ids.pop(0)
for next_id in func_ids:
functions[id].extract_buf(c_buf[functions[id].start:functions[next_id].start])
id = next_id
functions[id].extract_buf(c_buf[functions[id].start:])
with open(os.path.join(args.output_directory, adoc_file), 'w', encoding='utf-8') as adoc_f:
adoc_f.write(f'''\
[#lua_module_{module_name}]
=== {modules[module_name]["description"]}
''')
for class_name in sorted(classes.keys()):
lua_class = classes[class_name]
adoc_f.write(f'''
[#lua_class_{class_name}]
==== {class_name}
''')
if not lua_class["description"] == '':
adoc_f.write(f'\n{lua_class["description"]}\n')
for constructor_id in sorted(lua_class['constructors'], key=lambda id: functions[id].start):
adoc_f.write(functions[constructor_id].to_adoc())
del functions[constructor_id]
for method_id in sorted(lua_class['methods'], key=lambda id: functions[id].start):
adoc_f.write(functions[method_id].to_adoc())
del functions[method_id]
for attribute in lua_class['attributes']:
attribute_id = re.sub('[^A-Za-z0-9]', '_', f'{attribute["name"]}')
adoc_f.write(f'''
[#lua_class_attrib_{attribute_id}]
===== {attribute["name"]}
{attribute["description"]}
// End {attribute["name"]}
''')
adoc_f.write(f'\n// class_footer: {class_name}\n')
if len(functions.keys()) > 0:
adoc_f.write(f'''\
[#global_functions_{module_name}]
==== Global Functions
''')
for global_id in sorted(functions.keys(), key=lambda id: functions[id].start):
adoc_f.write(functions[global_id].to_adoc())
if len(functions.keys()) > 0:
adoc_f.write(f'// Global function\n')
adoc_f.write('// end of module\n')
if __name__ == '__main__':
main()