From a1c83a901bad955928b6ee7d72362489d05e09b3 Mon Sep 17 00:00:00 2001 From: Gerald Combs Date: Wed, 29 Jun 2022 18:33:36 -0700 Subject: [PATCH] docbook: Port make-wsluarm to Python3 Port the script that creates docbook/wsluarm_src/*.adoc to Python3. Ping #18152. --- doc/README.wslua | 20 +- docbook/CMakeLists.txt | 7 +- docbook/make-wsluarm.pl | 708 ---------------------------------------- docbook/make-wsluarm.py | 460 ++++++++++++++++++++++++++ 4 files changed, 474 insertions(+), 721 deletions(-) delete mode 100755 docbook/make-wsluarm.pl create mode 100755 docbook/make-wsluarm.py diff --git a/doc/README.wslua b/doc/README.wslua index c96a4f4be4..4ecb15c654 100644 --- a/doc/README.wslua +++ b/doc/README.wslua @@ -29,10 +29,10 @@ Wireshark uses C-macros liberally, both for the usual reasons as well as for the binding generator and documentation generator scripts. The macros are described within this document. -The API documentation is auto-generated from a Perl script called 'make- -wsluarm.pl', which searches C-files for the known macros and generates -appropriate HTML documentation from them. This includes using the C-comments -after the macros for the API document info. +The API documentation is auto-generated from a Python script called 'make- +wsluarm.py', which searches C-files for the known macros and generates +appropriate AsciiDoc documentation from them. This includes using the C +comments after the macros for the API document info. Likewise, another script called 'make-reg.py' generates the C-files 'register_wslua.c' and 'declare_wslua.h', based on the C-macros it searches @@ -81,7 +81,7 @@ followed. Documenting things for the API docs: As explained previously, the API documentation is auto-generated from a -Perl script called 'make-wsluarm.pl', which searches C-files for the known +Python script called 'make-wsluarm.py', which searches C-files for the known macros and generates appropriate HTML documentation from them. This includes using the C-comments after the macros for the API document info. The comments are extremely important, because the API documentation is what most Lua script @@ -92,10 +92,10 @@ Please make sure to at least use the '@since' version notification markup in your comments, to let users know when the new class/function/etc. you created became available. -Because documentation is so important, the make-wsluarm.pl script supports +Because documentation is so important, the make-wsluarm.py script supports specific markup syntax in comments, and converts them to XML and ultimately into the various documentation formats. The markup syntax is documented in -the top comments in make-wsluarm.pl, but are repeated here as well: +the top comments in make-wsluarm.py, but are repeated here as well: - two (or more) line breaks in comments result in separate paragraphs - all '&' are converted into their entity names, except inside urls - all '<', and '>' are converted into their entity names everywhere @@ -292,7 +292,7 @@ table named 'Gui' (in fact there isn't). It's just for documentation. If you look at the documentation, you'll see there is 'ProgDlg', 'TextWindow', etc. in that 'GUI support' section. That's because both ProgDlg and TextWindow are defined in that same wslua_gui.c file using the -'WSLUA_CLASS_DEFINE' macro. (see description of that later) make-wsluarm.pl +'WSLUA_CLASS_DEFINE' macro. (see description of that later) make-wsluarm.py created those in the same documentation section because they're in the same c file as that WSLUA_MODULE comment. You'll also note the documentation includes a sub-section for 'Non Method Functions', which it auto-generated @@ -442,7 +442,7 @@ WSLUA_ARG_ - the prefix used in a #define statement, for a required function/method argument (ie, one without a default value). It is defined to an integer representing the index slot number of the Lua stack it will be at, when calling the appropriate lua_check/lua_opt routine to get it from the -stack. The make_wsluarm.pl Perl script will generate API documentation with +stack. The make_wsluarm.py Python script will generate API documentation with this argument name for the function/method, removing the 'WSLUA_ARG_' prefix. The name following the 'WSLUA_ARG_' prefix must be the same name as the function it's an argument for, followed by an underscore and then an ALLCAPS @@ -458,7 +458,7 @@ WSLUA_OPTARG_ - the prefix used in a #define statement, for an optional function/method argument (ie, one with a default value). It is defined to an integer representing the index slot number of the Lua stack it will be at, when calling the appropriate lua_check/lua_opt routine to get it from the -stack. The make_wsluarm.pl Perl script will generate API documentation with +stack. The make_wsluarm.py Python script will generate API documentation with this argument name for the function/method, removing the 'WSLUA_OPTARG_' prefix. The rules for the name of the argument after the prefix are the same as for 'WSLUA_ARG_' above. diff --git a/docbook/CMakeLists.txt b/docbook/CMakeLists.txt index a063716d3e..b068a87c0b 100644 --- a/docbook/CMakeLists.txt +++ b/docbook/CMakeLists.txt @@ -315,13 +315,14 @@ ADD_CUSTOM_COMMAND( OUTPUT wsluarm COMMAND ${CMAKE_COMMAND} -E make_directory wsluarm_src - COMMAND ${PERL_EXECUTABLE} - ${CMAKE_CURRENT_SOURCE_DIR}/make-wsluarm.pl + COMMAND ${PYTHON_EXECUTABLE} + ${CMAKE_CURRENT_SOURCE_DIR}/make-wsluarm.py + --output-directory wsluarm_src ${WSLUA_MODULES} COMMAND ${CMAKE_COMMAND} -E touch wsluarm DEPENDS - ${CMAKE_CURRENT_SOURCE_DIR}/make-wsluarm.pl + ${CMAKE_CURRENT_SOURCE_DIR}/make-wsluarm.py ${WSLUA_MODULES} ) diff --git a/docbook/make-wsluarm.pl b/docbook/make-wsluarm.pl deleted file mode 100755 index 81d6388bac..0000000000 --- a/docbook/make-wsluarm.pl +++ /dev/null @@ -1,708 +0,0 @@ -#!/usr/bin/perl -# -# make-wsluarm.pl -# WSLUA's Reference Manual Generator -# -# (c) 2006, Luis E. Garcia Onatnon -# -# Wireshark - Network traffic analyzer -# By Gerald Combs -# Copyright 1998 Gerald Combs -# -# SPDX-License-Identifier: GPL-2.0-or-later -# -# (-: I don't even think writing this in Lua :-) -# ...well I wished you had! -# -# changed by Hadriel Kaplan to do the following: -# - generates pretty XML output, to make debugging it easier -# - allows modules (i.e., WSLUA_MODULE) to have detailed descriptions -# - two (or more) line breaks in comments result in separate paragraphs -# - all '&' are converted into their entity names, except inside urls -# - all '<', and '>' are converted into their entity names everywhere -# - any word(s) wrapped in one star, e.g., *foo bar*, become italics -# - any word(s) wrapped in two stars, e.g., **foo bar**, become commands (is there a 'bold'?) -# - any word(s) wrapped in backticks, e.g., `foo bar`, become commands (is there something better?) -# - any word(s) wrapped in two backticks, e.g., ``foo bar``, become one backtick -# - any "[[url]]" becomes an XML ulink with the url as both the url and text -# - any "[[url|text]]" becomes an XML ulink with the url as the url and text as text -# - any indent with a single leading star '*' followed by space is a bulleted list item -# reducing indent or having an extra linebreak stops the list -# - any indent with a leading digits-dot followed by space, i.e. "1. ", is a numbered list item -# reducing indent or having an extra linebreak stops the list -# - supports meta-tagged info inside comment descriptions as follows: -# * a line starting with "@note" or "Note:" becomes an XML note line -# * a line starting with "@warning" or "Warning:" becomes an XML warning line -# * a line starting with "@version" or "@since" becomes a "Since:" line -# * a line starting with "@code" and ending with "@endcode" becomes an -# XML programlisting block, with no indenting/parsing within the block -# The above '@' commands are based on Doxygen commands -# -# Changed by Gerald Combs to generate AsciiDoc. -# - We might want to convert the epan/wslua/*.c markup to AsciiDoc -# - ...or we might want to generate Doxygen output instead. - -use strict; -#use V2P; - -sub deb { -# warn $_[0]; -} - -sub gorolla { -# a gorilla stays to a chimp like gorolla stays to chomp -# but this one returns the shrugged string. - my $s = shift; - # remove leading newlines and spaces at beginning - $s =~ s/^([\n]|\s)*//ms; - # remove trailing newlines and spaces at end - $s =~ s/([\n]|\s)*$//s; - - # Prior versions converted a custom markup syntax to DocBook. - # Markup must now be compatible with Asciidoctor. - - $s; -} - -# break up descriptions based on newlines and keywords -# builds an array of paragraphs and returns the array ref -# each entry in the array is a single line for doc source, but not a -# whole paragraph - there are ""/"" entries in the -# array to make them paragraphs - this way the doc source itself is -# also pretty, while the resulting output is of course valid -# first arg is the array to build into; second arg is an array -# of lines to parse - this way it can be called from multiple -# other functions with slightly different needs -# this function assumes gorolla was called previously -sub parse_desc_common { - my @r; # a temp array we fill, then copy into @ret below - my @ret = @{ $_[0] }; - my @lines = @{ $_[1] }; - - # the following will unfortunately create empty paragraphs too - # (ie, followed by ), so we do this stuff to a temp @r - # array and then copy the non-empty ones into the passed-in array @ret - if ($#lines >= 0) { - # for each double newline, break into separate para's - for (my $idx=0; $idx <= $#lines; $idx++) { - - $lines[$idx] =~ s/^(\s*)//; # remove leading whitespace - # save number of spaces in case we need to know later - my $indent = length($1); - - # if we find [source,...] then treat it as a blob - if ($lines[$idx] =~ /^\[source.*\]/) { - my $line = $lines[$idx] . "\n"; - # the next line *should* be a delimiter... - my $block_delim = $lines[++$idx]; - $block_delim =~ s/^\s+|\s+$//g; - $line .= $block_delim . "\n"; - my $block_line = $lines[++$idx]; - while (!($block_line =~ qr/^\s*$block_delim\s*$/) && $idx <= $#lines) { - # keep eating lines until the closing delimiter. - # XXX Strip $indent spaces? - $line .= $block_line . "\n"; - $block_line = $lines[++$idx]; - } - $line .= $block_delim; - - $r[++$#r] = $line . "\n"; - } elsif ($lines[$idx] =~ /^\s*$/) { - # line is either empty or just whitespace, and we're not in a @code block - # so it's the end of a previous paragraph, beginning of new one - $r[++$#r] = "\n"; - } else { - # We have a regular line, not in a @code block. - # Add it as-is. - my $line = $lines[$idx]; - - # if line starts with "@version" or "@since", make it a "Since:" - if ($line =~ /^\@version |^\@since /) { - $line =~ s/^\@version\s+|^\@since\s+/Since: /; - $r[++$#r] = $line . "\n"; - - # if line starts with single "*" and space, leave it mostly intact. - } elsif ($line =~ /^\*\s/) { - $r[++$#r] = "\n"; - $r[++$#r] = "" . $line . "\n"; - # keep eating until we find a blank line or end - while (!($lines[++$idx] =~ /^\s*$/) && $idx <= $#lines) { - $lines[$idx] =~ s/^(\s*)//; # count and remove leading whitespace - # if this is less indented than before, break out - last if length($1) < $indent; - $r[++$#r] = "" . $lines[$idx] . "\n"; - } - $r[++$#r] = "\n\n"; - - # if line starts with "1." and space, leave it mostly intact. - } elsif ($line =~ /^1\.\s/) { - $r[++$#r] = "\n"; - $r[++$#r] = "" . $line . "\n"; - # keep eating until we find a blank line or end - while (!($lines[++$idx] =~ /^\s*$/) && $idx <= $#lines) { - $lines[$idx] =~ s/^(\s*)//; # count and remove leading whitespace - # if this is less indented than before, break out - last if length($1) < $indent; - $r[++$#r] = "" . $lines[$idx] . "\n"; - } - $r[++$#r] = "\n\n"; - - # just a normal line, add it to array - } else { - # Nested Lua arrays - $line =~ s/\[\[(.*)\]\]/\$\$$1\$\$/g; - $r[++$#r] = "" . $line . "\n"; - } - } - } - $r[++$#r] = "\n\n"; - - # Now go through @r, and copy into @ret but skip empty lines. - # This isn't strictly necessary but makes the AsciiDoc output prettier. - for (my $idx=0; $idx <= $#r; $idx++) { - if ($r[$idx] =~ /^\s*$/ && $r[$idx+1] =~ /^\s*$/ && $r[$idx+2] =~ /^\s*$/) { - $idx++; # for-loop will increment $idx and skip the other one - } else { - $ret[++$#ret] = $r[$idx]; - } - } - } - - return \@ret; -} - -# for "normal" description cases - class, function, etc. -# but not for modules nor function arguments -sub parse_desc { - my $s = gorolla(shift); - # break description into separate sections - my @r = (); # the array we return - - # split each line into an array - my @lines = split(/\n/, $s); - - return parse_desc_common(\@r, \@lines); -} - -# modules have a "title" and an optional description -sub parse_module_desc { - my $s = gorolla(shift); - # break description into separate sections - my @r = (); # the array we return - - my @lines = split(/\n/, $s); - my $line = shift @lines; - - $r[++$#r] = "=== $line\n"; - - return parse_desc_common(\@r, \@lines); -} - -# function argument descriptions are in a -sub parse_function_arg_desc { - my $s = gorolla(shift); - # break description into separate sections - my @r = ( "\n" ); # the array we return - - my @lines = split(/\n/, $s); - @r = @{ parse_desc_common(\@r, \@lines) }; - - #$r[++$#r] = "\n"; - - return \@r; -} - -# attributes have a "mode" and an optional description -sub parse_attrib_desc { - my $s = gorolla(shift); - # break description into separate sections - my @r = (); # the array we return - - my $mode = shift; - if ($mode) { - $mode =~ s/RO/Retrieve only./; - $mode =~ s/WO/Assign only./; - $mode =~ s/RW|WR/Retrieve or assign./; - $r[++$#r] = "Mode: $mode\n\n"; - } else { - die "Attribute does not have a RO/WO/RW mode: '$s'\n"; - } - - # split each line into an array - my @lines = split(/\n/, $s); - - return parse_desc_common(\@r, \@lines); -} - -# prints the parse_* arrays into the doc source file with pretty indenting -# first arg is the description array, second is indent level -sub print_desc { - my $desc_ref = $_[0]; - - my $indent = $_[1]; - if (!$indent) { - $indent = 2; - } - #my $tabs = "\t" x $indent; - - for my $line ( @{ $desc_ref } ) { - printf D "%s", $line; - } - printf D "\n"; -} - -my %module = (); -my %modules = (); -my $class; -my %classes; -my $function; -my @functions; - -my $asciidoc_template = { - module_header => "[#lua_module_%s]\n\n", - # module_desc => "\t%s\n", - class_header => "[#lua_class_%s]\n\n" . - "==== %s\n\n", - #class_desc => "\t\t%s\n", - class_attr_header => "[#lua_class_attrib_%s]\n\n" . - "===== %s\n\n", - #class_attr_descr => "\t\t\t%s%s\n", - class_attr_footer => "// End %s\n\n", - function_header => "[#lua_fn_%s]\n\n" . - "===== %s\n\n", - #function_descr => "\t\t\t%s\n", - function_args_header => "[float]\n" . - "===== Arguments\n\n", - function_arg_header => "%s::\n", - #function_arg_descr => "\t\t\t\t\t\t\n" . - # "\t\t\t\t\t\t\t%s\n" . - # "\t\t\t\t\t\t\n", - function_arg_footer => "// function_arg_footer: %s\n", - function_args_footer => "// end of function_args\n\n", - function_argerror_header => "", #"\t\t\t\t\t
Errors\n\t\t\t\t\t\t\n", - function_argerror => "", #"\t\t\t\t\t\t\t%s\n", - function_argerror_footer => "", #"\t\t\t\t\t\t
\n", - function_returns_header => "[float]\n" . - "===== Returns\n\n", - function_returns => "%s\n\n", - function_returns_footer => "// function_returns_footer: %s\n", - function_errors_header => "[float]\n" . - "===== Errors\n\n", - function_errors => "* %s\n\n", - function_errors_footer => "// function_errors_footer: %s\n", - function_footer => "// function_footer: %s\n\n", - class_footer => "// class_footer: %s\n", - global_functions_header => "\n[#global_functions_%s]\n\n" . - "==== Global Functions\n\n", - global_functions_footer => "// Global function\n", - module_footer => "// end of module\n", -}; - -# class_constructors_header => "\t\t
\n\t\t\t%s Constructors\n", -# class_constructors_footer => "\t\t
\n", -# class_methods_header => "\t\t
\n\t\t\t%s Methods\n", -# class_methods_footer => "\t\t
\n", - - -my $template_ref = $asciidoc_template; -my $out_extension = "adoc"; - -# It's said that only perl can parse perl... my editor isn't perl... -# if unencoded this causes my editor's autoindent to bail out so I encoded in octal -# XXX: support \" within "" -my $QUOTED_RE = "\042\050\133^\042\135*\051\042"; - -# group 1: whole trailing comment (possibly empty), e.g. " /* foo */" -# group 2: any leading whitespace. XXX why is this not removed using (?:...) -# group 3: actual comment text, e.g. " foo ". -my $TRAILING_COMMENT_RE = '((\s*|[\n\r]*)/\*(.*?)\*/)?'; -my $IN_COMMENT_RE = '[\s\r\n]*((.*?)\*/)?'; - -my @control = -( -# This will be scanned in order trying to match the re if it matches -# the body will be executed immediately after. -[ 'WSLUA_MODULE\s*([A-Z][a-zA-Z0-9]+)' . $IN_COMMENT_RE, -sub { - $module{name} = $1; - $module{descr} = parse_module_desc($3); -} ], - -[ 'WSLUA_CLASS_DEFINE(?:_BASE)?\050\s*([A-Z][a-zA-Z0-9]+).*?\051;' . $TRAILING_COMMENT_RE, -sub { - deb ">c=$1=$2=$3=$4=$5=$6=$7=\n"; - $class = { - name => $1, - descr=> parse_desc($4), - constructors => [], - methods => [], - attributes => [] - }; - $classes{$1} = $class; -} ], - -[ 'WSLUA_FUNCTION\s+wslua_([a-z_0-9]+)[^\173]*\173' . $TRAILING_COMMENT_RE, -sub { - deb ">f=$1=$2=$3=$4=$5=$6=$7=\n"; - $function = { - returns => [], - arglist => [], - args => {}, - name => $1, - descr => parse_desc($4), - type => 'standalone' - }; - push @functions, $function; -} ], - -[ 'WSLUA_CONSTRUCTOR\s+([A-Za-z0-9]+)_([a-z0-9_]+).*?\173' . $TRAILING_COMMENT_RE, -sub { - deb ">cc=$1=$2=$3=$4=$5=$6=$7=\n"; - $function = { - returns => [], - arglist => [], - args => {}, - name => "$1.$2", - descr => parse_desc($5), - type => 'constructor' - }; - push @{${$class}{constructors}}, $function; -} ], - -[ '_WSLUA_CONSTRUCTOR_\s+([A-Za-z0-9]+)_([a-z0-9_]+)\s*(.*?)\052\057', -sub { - deb ">cc=$1=$2=$3=$4=$5=$6=$7=\n"; - $function = { - returns => [], - arglist => [], - args => {}, - name => "$1.$2", - descr => parse_desc($3), - type => 'constructor' - }; - push @{${$class}{constructors}}, $function; -} ], - -[ 'WSLUA_METHOD\s+([A-Za-z0-9]+)_([a-z0-9_]+)[^\173]*\173' . $TRAILING_COMMENT_RE, -sub { - deb ">cm=$1=$2=$3=$4=$5=$6=$7=\n"; - my $name = "$1"; - $name =~ tr/A-Z/a-z/; - $name .= ":$2"; - $function = { - returns => [], - arglist => [], - args => {}, - name => $name, - descr => parse_desc($5), - type => 'method' - }; - push @{${$class}{methods}}, $function; -} ], - -[ 'WSLUA_METAMETHOD\s+([A-Za-z0-9]+)(__[a-z0-9]+)[^\173]*\173' . $TRAILING_COMMENT_RE, -sub { - deb ">cm=$1=$2=$3=$4=$5=$6=$7=\n"; - my $name = "$1"; - $name =~ tr/A-Z/a-z/; - $name .= ":$2"; - my ($c,$d) = ($1,$5); - $function = { - returns => [], - arglist => [], - args => {}, - name => $name, - descr => parse_desc($5), - type => 'metamethod' - }; - push @{${$class}{methods}}, $function; -} ], - -# Splits "WSLUA_OPTARG_ProtoField_int8_NAME /* food */" into -# "OPT" (1), "ProtoField_int8" (2), "NAME" (3), ..., ..., " food " (6) -# Handles functions like "loadfile(filename)" too. -[ '#define WSLUA_(OPT)?ARG_((?:[A-Za-z0-9]+_)?[a-z0-9_]+)_([A-Z0-9_]+)\s+\d+' . $TRAILING_COMMENT_RE, -sub { - deb ">a=$1=$2=$3=$4=$5=$6=\n"; - my $name = $1 eq 'OPT' ? "[$3]" : $3; - push @{${$function}{arglist}} , $name; - ${${$function}{args}}{$name} = {descr=>parse_function_arg_desc($6),} -} ], - -# same as above, except that there is no macro but a (multi-line) comment. -[ '\057\052\s*WSLUA_(OPT)?ARG_((?:[A-Za-z0-9]+_)?[a-z0-9_]+)_([A-Z0-9_]+)\s*(.*?)\052\057', -sub { - deb ">a=$1=$2=$3=$4\n"; - my $name = $1 eq 'OPT' ? "[$3]" : $3; - push @{${$function}{arglist}} , $name; - ${${$function}{args}}{$name} = {descr=>parse_function_arg_desc($4),} -} ], - -[ '/\052\s+WSLUA_ATTRIBUTE\s+([A-Za-z0-9]+)_([a-z0-9_]+)\s+([A-Z]*)\s*(.*?)\052/', -sub { - deb ">at=$1=$2=$3=$4=$5=$6=$7=\n"; - my $name = "$1"; - $name =~ tr/A-Z/a-z/; - $name .= ".$2"; - push @{${$class}{attributes}}, { name => $name, descr => parse_attrib_desc($4, $3) }; -} ], - -[ '/\052\s+WSLUA_MOREARGS\s+([A-Za-z_]+)\s+(.*?)\052/', -sub { - deb ">ma=$1=$2=$3=$4=$5=$6=$7=\n"; - push @{${$function}{arglist}} , "..."; - ${${$function}{args}}{"..."} = {descr=>parse_function_arg_desc($2)} -} ], - -[ 'WSLUA_(FINAL_)?RETURN\050\s*.*?\s*\051\s*;' . $TRAILING_COMMENT_RE, -sub { - deb ">fr=$1=$2=$3=$4=$5=$6=$7=\n"; - push @{${$function}{returns}} , gorolla($4) if $4 ne ''; -} ], - -[ '\057\052\s*_WSLUA_RETURNS_\s*(.*?)\052\057', -sub { - deb ">fr2=$1=$2=$3=$4=$5=$6=$7=\n"; - push @{${$function}{returns}} , gorolla($1) if $1 ne ''; -} ], - -[ 'WSLUA_ERROR\s*\050\s*(([A-Z][A-Za-z]+)_)?([a-z_]+),' . $QUOTED_RE , -sub { - deb ">e=$1=$2=$3=$4=$5=$6=$7=\n"; - my $errors; - unless (exists ${$function}{errors}) { - $errors = ${$function}{errors} = []; - } else { - $errors = ${$function}{errors}; - } - push @{$errors}, gorolla($4); -} ], - -[ 'WSLUA_(OPT)?ARG_ERROR\s*\050\s*(([A-Z][A-Za-z0-9]+)_)?([a-z_]+)\s*,\s*([A-Z0-9]+)\s*,\s*' . $QUOTED_RE, -sub { - deb ">ae=$1=$2=$3=$4=$5=$6=$7=\n"; - my $errors; - unless (exists ${${${$function}{args}}{$5}}{errors}) { - $errors = ${${${$function}{args}}{$5}}{errors} = []; - } else { - $errors = ${${${$function}{args}}{$5}}{errors}; - } - push @{$errors}, gorolla($6); -} ], - -); - -my $anymatch = '(^ThIsWiLlNeVeRmAtCh$'; -for (@control) { - $anymatch .= "|${$_}[0]"; -} -$anymatch .= ')'; - -# for each file given in the command line args -my $file = shift; -my $docfile = 0; - -while ( $file ) { - - # continue to next loop if the file is not plain text - next unless -f $file; - - if (!$docfile) { - $docfile = $file; - $docfile =~ s#.*/##; - $docfile =~ s/\.c$/.$out_extension/; - } - - open C, "< $file" or die "Can't open input file $file: $!"; - open D, "> wsluarm_src/$docfile" or die "Can't open output file wsluarm_src/$docfile: $!"; - - my $b = ''; - $b .= $_ while (); - - close C; - - while ($b =~ /$anymatch/ms ) { - my $match = $1; -# print "\n-----\n$match\n-----\n"; - for (@control) { - my ($re,$f) = @{$_}; - if ( $match =~ /$re/ms) { - &{$f}(); - $b =~ s/.*?$re//ms; - last; - } - } - } - - # peek at next file to see if it's continuing this module - $file = shift; - # make sure we get the next plain text file - while ($file and !(-f $file)) { - $file = shift; - } - - if ($file) { - # we have another file - check it out - - open C, "< $file" or die "Can't open input file $file: $!"; - - my $peek_for_continue = ''; - $peek_for_continue .= $_ while (); - - close C; - - if ($peek_for_continue =~ /WSLUA_CONTINUE_MODULE\s*([A-Z][a-zA-Z0-9]+)/) { - if ($module{name} ne $1) { - die "Input file $file continues a different module: $1 (previous module is $module{name})!"; - } - # ok, we're continuing the same module - next; - } - } - - # if we got here, we're not continuing the module - - $modules{$module{name}} = $docfile; - - print "Generating source AsciiDoc for: $module{name}\n"; - - printf D ${$template_ref}{module_header}, $module{name}, $module{name}; - - if ($module{descr} && @{$module{descr}} >= 0) { - print_desc($module{descr}, 1); - } else { - die "did NOT print $module{name} description\n"; - } - - for my $cname (sort keys %classes) { - my $cl = $classes{$cname}; - printf D ${$template_ref}{class_header}, $cname, $cname; - - if (${$cl}{descr} && @{${$cl}{descr}} >= 0) { - print_desc(${$cl}{descr}, 2); - } else { - die "did NOT print $cname description\n"; - } - - if ( $#{${$cl}{constructors}} >= 0) { - for my $c (@{${$cl}{constructors}}) { - function_descr($c,3); - } - } - - if ( $#{${$cl}{methods}} >= 0) { - for my $m (@{${$cl}{methods}}) { - function_descr($m, 3); - } - } - - if ( $#{${$cl}{attributes}} >= 0) { - for my $a (@{${$cl}{attributes}}) { - my $a_id = ${$a}{name}; - $a_id =~ s/[^a-zA-Z0-9]/_/g; - printf D ${$template_ref}{class_attr_header}, $a_id, ${$a}{name}; - if (${$a}{descr} && @{${$a}{descr}} >= 0) { - print_desc(${$a}{descr}, 3); - } else { - die "did not print $a_id description\n"; - } - printf D ${$template_ref}{class_attr_footer}, ${$a}{name}, ${$a}{name}; - - } - } - - if (exists ${$template_ref}{class_footer}) { - printf D ${$template_ref}{class_footer}, $cname, $cname; - } - - } - - if ($#functions >= 0) { - printf D ${$template_ref}{global_functions_header}, $module{name}; - - for my $f (@functions) { - function_descr($f, 3); - } - - print D ${$template_ref}{global_functions_footer}; - } - - printf D ${$template_ref}{module_footer}, $module{name}; - - close D; - - %module = (); - %classes = (); - $class = undef; - $function = undef; - @functions = (); - $docfile = 0; - -} - -sub function_descr { - my $f = $_[0]; - my $indent = $_[1]; - my $section_name = 'UNKNOWN'; - - my $arglist = ''; - - for (@{ ${$f}{arglist} }) { - my $a = $_; - $a =~ tr/A-Z/a-z/; - $arglist .= "$a, "; - } - - $arglist =~ s/, $//; - $section_name = "${$f}{name}($arglist)"; - $section_name =~ s/[^a-zA-Z0-9]/_/g; - - printf D ${$template_ref}{function_header}, $section_name , "${$f}{name}($arglist)"; - - my @desc = ${$f}{descr}; - if ($#desc >= 0) { - print_desc(@desc, $indent); - } - - print D ${$template_ref}{function_args_header} if $#{${$f}{arglist}} >= 0; - - for my $argname (@{${$f}{arglist}}) { - my $arg = ${${$f}{args}}{$argname}; - $argname =~ tr/A-Z/a-z/; - $argname =~ s/\[(.*)\]/$1 (optional)/; - - printf D ${$template_ref}{function_arg_header}, $argname, $argname; - my @desc = ${$arg}{descr}; - if ($#desc >= 0) { - print_desc(@desc, $indent+2); - } - - if ( $#{${$arg}{errors}} >= 0) { - printf D ${$template_ref}{function_argerror_header}, $argname, $argname; - printf D ${$template_ref}{function_argerror}, $_, $_ for @{${$arg}{errors}}; - printf D ${$template_ref}{function_argerror_footer}, $argname, $argname; - } - - printf D ${$template_ref}{function_arg_footer}, $argname, $argname; - - } - - print D ${$template_ref}{function_args_footer} if $#{${$f}{arglist}} >= 0; - - if ( $#{${$f}{returns}} >= 0) { - printf D ${$template_ref}{function_returns_header}, ${$f}{name}; - printf D ${$template_ref}{function_returns}, $_ for @{${$f}{returns}}; - printf D ${$template_ref}{function_returns_footer}, ${$f}{name}; - } - - if ( $#{${$f}{errors}} >= 0) { - my $sname = exists ${$f}{section_name} ? ${$f}{section_name} : ${$f}{name}; - - printf D ${$template_ref}{function_errors_header}, $sname; - printf D ${$template_ref}{function_errors}, $_ for @{${$f}{errors}}; - printf D ${$template_ref}{function_errors_footer}, ${$f}{name}; - } - - printf D ${$template_ref}{function_footer}, $section_name; - -} diff --git a/docbook/make-wsluarm.py b/docbook/make-wsluarm.py new file mode 100755 index 0000000000..bdd1a0948c --- /dev/null +++ b/docbook/make-wsluarm.py @@ -0,0 +1,460 @@ +#!/usr/bin/env python3 +# +# make-wsluarm.py +# +# By Gerald Combs +# Based on make-wsluarm.pl by Luis E. Garcia Onatnon and Hadriel Kaplan +# +# Wireshark - Network traffic analyzer +# By Gerald Combs +# Copyright 1998 Gerald Combs +# +# SPDX-License-Identifier: GPL-2.0-or-later +'''\ +WSLUA's Reference Manual Generator + +This reads Doxygen-style comments in C code and generates wslua API documentation +formatted as AsciiDoc. + +Behavior as documented by Hadriel: +- Allows modules (i.e., WSLUA_MODULE) to have detailed descriptions +- Two (or more) line breaks in comments result in separate paragraphs +- Any indent with a single leading star '*' followed by space is a bulleted list item + reducing indent or having an extra linebreak stops the list +- Any indent with a leading digits-dot followed by space, i.e. "1. ", is a numbered list item + reducing indent or having an extra linebreak stops the list +''' + +import argparse +import logging +import os +import re +import sys + +from enum import Enum +from string import Template + +def parse_desc(description): + '''\ +Break up descriptions based on newlines and keywords. Some processing +is done for code blocks and lists, but the output is otherwise left +intact. Assumes the input has been stripped. +''' + + c_lines = description.strip().splitlines() + + if len(c_lines) < 1: + return '' + + adoc_lines = [] + cli = iter(c_lines) + for line in cli: + raw_len = len(line) + line = line.lstrip() + indent = raw_len - len(line) + + # If we find "[source,...]" then treat it as a block + if re.search(r'\[source.*\]', line): + # The next line *should* be a delimiter... + block_delim = next(cli).strip() + line += f'\n{block_delim}\n' + block_line = next(cli) + # XXX try except StopIteration + while block_line.strip() != block_delim: + # Keep eating lines until the closing delimiter. + # XXX Strip indent spaces? + line += block_line + '\n' + block_line = next(cli) + line += block_delim + '\n' + + adoc_lines.append(line) + elif re.match(r'^\s*$', line): + # line is either empty or just whitespace, and we're not in a @code block + # so it's the end of a previous paragraph, beginning of new one + adoc_lines.append('') + else: + # We have a regular line, not in a @code block. + # Add it as-is. + + # if line starts with "@version" or "@since", make it a "Since:" + if re.match(r'^@(version|since)\s+', line): + line = re.sub(r'^@(version|since)\s+', 'Since: ', line) + adoc_lines.append(line) + + # If line starts with single "*" and space, leave it mostly intact. + elif re.match(r'^\*\s', line): + adoc_lines += ['', line] + # keep eating until we find a blank line or end + line = next(cli) + try: + while not re.match(r'^\s*$', line): + raw_len = len(line) + line = line.lstrip() + # if this is less indented than before, break out + if raw_len - len(line) < indent: + break + adoc_lines += [line] + line = next(cli) + except StopIteration: + pass + adoc_lines.append('') + + # if line starts with "1." and space, leave it mostly intact. + elif re.match(r'^1\.\s', line): + adoc_lines += ['', line] + # keep eating until we find a blank line or end + line = next(cli) + try: + while not re.match(r'^\s*$', line): + raw_len = len(line) + line = line.lstrip() + # if this is less indented than before, break out + if raw_len - len(line) < indent: + break + adoc_lines += [line] + line = next(cli) + except StopIteration: + pass + adoc_lines.append('') + + # Just a normal line, add it to array + else: + # Nested Lua arrays + line = re.sub(r'\[\[(.*)\]\]', r'$$\1$$', line) + adoc_lines += [line] + + # Strip out consecutive empty lines. + # This isn't strictly necessary but makes the AsciiDoc output prettier. + adoc_lines = '\n'.join(adoc_lines).splitlines() + adoc_lines = [val for idx, val in enumerate(adoc_lines) if idx == 0 or not (val == '' and val == adoc_lines[idx - 1])] + + return '\n'.join(adoc_lines) + + +class LuaFunction: + def __init__(self, c_file, id, start, name, raw_description): + self.c_file = c_file + self.id = id + self.start = start + self.name = name + if not raw_description: + raw_description = '' + self.description = parse_desc(raw_description) + self.arguments = [] # (name, description, optional) + self.returns = [] # description + self.errors = [] # description + logging.info(f'Created function {id} ({name}) at {start}') + + def add_argument(self, id, raw_name, raw_description, raw_optional): + if id != self.id: + logging.critical(f'Invalid argument ID {id} in function {self.id}') + sys.exit(1) + if not raw_description: + raw_description = '' + optional = False + if raw_optional == 'OPT': + optional = True + self.arguments.append((raw_name.lower(), parse_desc(raw_description), optional)) + + def extract_buf(self, buf): + "Extract arguments, errors, and return values from a function's buffer." + + # Splits "WSLUA_OPTARG_ProtoField_int8_NAME /* food */" into + # "OPT" (1), "ProtoField_int8" (2), "NAME" (3), ..., ..., " food " (6) + # Handles functions like "loadfile(filename)" too. + for m in re.finditer(r'#define WSLUA_(OPT)?ARG_((?:[A-Za-z0-9]+_)?[a-z0-9_]+)_([A-Z0-9_]+)\s+\d+' + TRAILING_COMMENT_RE, buf, re.MULTILINE|re.DOTALL): + self.add_argument(m.group(2), m.group(3), m.group(6), m.group(1)) + logging.info(f'Created arg {m.group(3)} for {self.id} at {m.start()}') + + # Same as above, except that there is no macro but a (multi-line) comment. + for m in re.finditer(r'/\*\s*WSLUA_(OPT)?ARG_((?:[A-Za-z0-9]+_)?[a-z0-9_]+)_([A-Z0-9_]+)\s*(.*?)\*/', buf, re.MULTILINE|re.DOTALL): + self.add_argument(m.group(2), m.group(3), m.group(4), m.group(1)) + logging.info(f'Created arg {m.group(3)} for {self.id} at {m.start()}') + + for m in re.finditer(r'/\*\s+WSLUA_MOREARGS\s+([A-Za-z_]+)\s+(.*?)\*/', buf, re.MULTILINE|re.DOTALL): + self.add_argument(m.group(1), '...', m.group(2), False) + logging.info(f'Created morearg for {self.id}') + + for m in re.finditer(r'WSLUA_(FINAL_)?RETURN\(\s*.*?\s*\)\s*;' + TRAILING_COMMENT_RE, buf, re.MULTILINE|re.DOTALL): + if m.group(4) and len(m.group(4)) > 0: + self.returns.append(m.group(4).strip()) + logging.info(f'Created return for {self.id} at {m.start()}') + + for m in re.finditer(r'/\*\s*_WSLUA_RETURNS_\s*(.*?)\*/', buf, re.MULTILINE|re.DOTALL): + if m.group(1) and len(m.group(1)) > 0: + self.returns.append(m.group(1).strip()) + logging.info(f'Created return for {self.id} at {m.start()}') + + for m in re.finditer(r'WSLUA_ERROR\s*\(\s*(([A-Z][A-Za-z]+)_)?([a-z_]+),' + QUOTED_RE, buf, re.MULTILINE|re.DOTALL): + self.errors.append(m.group(4).strip()) + logging.info(f'Created error {m.group(4)[:10]} for {self.id} at {m.start()}') + + def to_adoc(self): + # The Perl script wrapped optional args in '[]', joined them with ', ', and + # converted non-alphabetic characters to underscores. + mangled_names = [f'_{a}_' if optional else a for a, _, optional in self.arguments] + section_name = re.sub('[^A-Za-z0-9]', '_', f'{self.name}_{"__".join(mangled_names)}_') + opt_names = [f'[{a}]' if optional else a for a, _, optional in self.arguments] + adoc_buf = f''' +[#lua_fn_{section_name}] + +===== {self.name}({', '.join(opt_names)}) + +{self.description} +''' + if len(self.arguments) > 0: + adoc_buf += ''' +[float] +===== Arguments +''' + for (name, description, optional) in self.arguments: + if optional: + name += ' (optional)' + adoc_buf += f'\n{name}::\n' + + if len(description) > 0: + adoc_buf += f'\n{description}\n' + + adoc_buf += f'\n// function_arg_footer: {name}' + + if len(self.arguments) > 0: + adoc_buf += '\n// end of function_args\n' + + if len(self.returns) > 0: + adoc_buf += ''' +[float] +===== Returns +''' + for description in self.returns: + adoc_buf += f'\n{description}\n' + + if len(self.returns) > 0: + adoc_buf += f'\n// function_returns_footer: {self.name}' + + if len(self.errors) > 0: + adoc_buf += ''' +[float] +===== Errors +''' + for description in self.errors: + adoc_buf += f'\n* {description}\n' + + if len(self.errors) > 0: + adoc_buf += f'\n// function_errors_footer: {self.name}' + + adoc_buf += f'\n// function_footer: {section_name}\n' + + return adoc_buf + + +# group 1: whole trailing comment (possibly empty), e.g. " /* foo */" +# group 2: any leading whitespace. XXX why is this not removed using (?:...) +# group 3: actual comment text, e.g. " foo ". +TRAILING_COMMENT_RE = r'((\s*|[\n\r]*)/\*(.*?)\*/)?' +IN_COMMENT_RE = r'[\s\r\n]*((.*?)\s*\*/)?' +QUOTED_RE = r'"([^"]*)"' + +# XXX We might want to create a "LuaClass" class similar to LuaFunction +# and move these there. +def extract_class_definitions(c_file, c_buf, module, classes, functions): + for m in re.finditer(r'WSLUA_CLASS_DEFINE(?:_BASE)?\(\s*([A-Z][a-zA-Z0-9]+).*?\);' + TRAILING_COMMENT_RE, c_buf, re.MULTILINE|re.DOTALL): + raw_desc = m.group(4) + if raw_desc is None: + raw_desc = '' + name = m.group(1) + mod_class = { + 'description': parse_desc(raw_desc), + 'constructors': [], + 'methods': [], + 'attributes': [], + } + classes[name] = mod_class + logging.info(f'Created class {name}') + return 0 + +def extract_function_definitions(c_file, c_buf, module, classes, functions): + for m in re.finditer(r'WSLUA_FUNCTION\s+wslua_([a-z_0-9]+)[^\{]*\{' + TRAILING_COMMENT_RE, c_buf, re.MULTILINE|re.DOTALL): + id = m.group(1) + functions[id] = LuaFunction(c_file, id, m.start(), id, m.group(4)) + +def extract_constructor_definitions(c_file, c_buf, module, classes, functions): + for m in re.finditer(r'WSLUA_CONSTRUCTOR\s+([A-Za-z0-9]+)_([a-z0-9_]+).*?\{' + TRAILING_COMMENT_RE, c_buf, re.MULTILINE|re.DOTALL): + class_name = m.group(1) + id = f'{class_name}_{m.group(2)}' + name = f'{class_name}.{m.group(2)}' + functions[id] = LuaFunction(c_file, id, m.start(), name, m.group(5)) + classes[class_name]['constructors'].append(id) + +def extract_constructor_markups(c_file, c_buf, module, classes, functions): + for m in re.finditer(r'_WSLUA_CONSTRUCTOR_\s+([A-Za-z0-9]+)_([a-z0-9_]+)\s*(.*?)\*/', c_buf, re.MULTILINE|re.DOTALL): + class_name = m.group(1) + id = f'{class_name}_{m.group(2)}' + name = f'{class_name}.{m.group(2)}' + functions[id] = LuaFunction(c_file, id, m.start(), name, m.group(3)) + classes[class_name]['constructors'].append(id) + +def extract_method_definitions(c_file, c_buf, module, classes, functions): + for m in re.finditer(r'WSLUA_METHOD\s+([A-Za-z0-9]+)_([a-z0-9_]+)[^\{]*\{' + TRAILING_COMMENT_RE, c_buf, re.MULTILINE|re.DOTALL): + class_name = m.group(1) + id = f'{class_name}_{m.group(2)}' + name = f'{class_name.lower()}:{m.group(2)}' + functions[id] = LuaFunction(c_file, id, m.start(), name, m.group(5)) + classes[class_name]['methods'].append(id) + +def extract_metamethod_definitions(c_file, c_buf, module, classes, functions): + for m in re.finditer(r'WSLUA_METAMETHOD\s+([A-Za-z0-9]+)(__[a-z0-9]+)[^\{]*\{' + TRAILING_COMMENT_RE, c_buf, re.MULTILINE|re.DOTALL): + class_name = m.group(1) + id = f'{class_name}{m.group(2)}' + name = f'{class_name.lower()}:{m.group(2)}' + functions[id] = LuaFunction(c_file, id, m.start(), name, m.group(5)) + classes[class_name]['methods'].append(id) + +def extract_attribute_markups(c_file, c_buf, module, classes, functions): + for m in re.finditer(r'/\*\s+WSLUA_ATTRIBUTE\s+([A-Za-z0-9]+)_([a-z0-9_]+)\s+([A-Z]*)\s*(.*?)\*/', c_buf, re.MULTILINE|re.DOTALL): + class_name = m.group(1) + name = f'{m.group(1).lower()}.{m.group(2)}' + mode = m.group(3) + mode_desc = 'Mode: ' + if 'RO' in mode: + mode_desc += 'Retrieve only.\n' + elif 'WO' in mode: + mode_desc += 'Assign only.\n' + elif 'RW' in mode or 'WR' in mode: + mode_desc += 'Retrieve or assign.\n' + else: + sys.stderr.write(f'Attribute does not have a RO/WO/RW mode {mode}\n') + sys.exit(1) + + attribute = { + 'name': name, + 'description': parse_desc(f'{mode_desc}\n{m.group(4)}'), + } + classes[class_name]['attributes'].append(attribute) + logging.info(f'Created attribute {name} for class {class_name}') + +def main(): + parser = argparse.ArgumentParser(description="WSLUA's Reference Manual Generator") + parser.add_argument("c_files", nargs='+', metavar='C file', help="C file") + parser.add_argument('--output-directory', help='Output directory') + parser.add_argument('--verbose', action='store_true', help='Show more output') + args = parser.parse_args() + + logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.DEBUG if args.verbose else logging.WARNING) + + modules = {} + + for c_file in args.c_files: + with open(c_file, encoding='utf-8') as c_f: + c_buf = c_f.read() + + # Peek for modules vs continuations. + m = re.search(r'WSLUA_(|CONTINUE_)MODULE\s*(\w+)', c_buf) + if m: + module_name = m.group(2) + c_pair = (os.path.basename(c_file), c_buf) + try: + if m.group(1) == 'CONTINUE_': + modules[module_name]['c'].append(c_pair) + else: + modules[module_name]['c'].insert(0, c_pair) + except KeyError: + modules[module_name] = {} + modules[module_name]['c'] = [c_pair] + modules[module_name]['file_base'] = os.path.splitext(c_pair[0])[0] + else: + logging.warning(f'No module found in {c_file}') + + extractors = [ + extract_class_definitions, + extract_function_definitions, + extract_constructor_definitions, + extract_constructor_markups, + extract_method_definitions, + extract_metamethod_definitions, + extract_attribute_markups, + ] + + for module_name in sorted(modules): + adoc_file = f'{modules[module_name]["file_base"]}.adoc' + logging.info(f'Writing module {module_name} to {adoc_file} from {len(modules[module_name]["c"])} input(s)') + functions = {} + classes = {} + + # Extract our module's description. + m = re.search(r'WSLUA_MODULE\s*[A-Z][a-zA-Z0-9]+' + IN_COMMENT_RE, modules[module_name]['c'][0][1], re.MULTILINE|re.DOTALL) + if not m: + return + modules[module_name]['description'] = parse_desc(f'{m.group(2)}') + + # Extract module-level information from each file. + for (c_file, c_buf) in modules[module_name]['c']: + for extractor in extractors: + extractor(c_file, c_buf, modules[module_name], classes, functions) + + # Extract function-level information from each file. + for (c_file, c_buf) in modules[module_name]['c']: + c_file_ids = filter(lambda k: functions[k].c_file == c_file, functions.keys()) + func_ids = sorted(c_file_ids, key=lambda k: functions[k].start) + id = func_ids.pop(0) + for next_id in func_ids: + functions[id].extract_buf(c_buf[functions[id].start:functions[next_id].start]) + id = next_id + functions[id].extract_buf(c_buf[functions[id].start:]) + + with open(os.path.join(args.output_directory, adoc_file), 'w', encoding='utf-8') as adoc_f: + adoc_f.write(f'''\ +[#lua_module_{module_name}] + +=== {modules[module_name]["description"]} +''') + for class_name in sorted(classes.keys()): + lua_class = classes[class_name] + adoc_f.write(f''' +[#lua_class_{class_name}] + +==== {class_name} +''') + + if not lua_class["description"] == '': + adoc_f.write(f'\n{lua_class["description"]}\n') + + for constructor_id in sorted(lua_class['constructors'], key=lambda id: functions[id].start): + adoc_f.write(functions[constructor_id].to_adoc()) + del functions[constructor_id] + + for method_id in sorted(lua_class['methods'], key=lambda id: functions[id].start): + adoc_f.write(functions[method_id].to_adoc()) + del functions[method_id] + + for attribute in lua_class['attributes']: + attribute_id = re.sub('[^A-Za-z0-9]', '_', f'{attribute["name"]}') + adoc_f.write(f''' +[#lua_class_attrib_{attribute_id}] + +===== {attribute["name"]} + +{attribute["description"]} + +// End {attribute["name"]} +''') + + + adoc_f.write(f'\n// class_footer: {class_name}\n') + + if len(functions.keys()) > 0: + adoc_f.write(f'''\ +[#global_functions_{module_name}] + +==== Global Functions +''') + + for global_id in sorted(functions.keys(), key=lambda id: functions[id].start): + adoc_f.write(functions[global_id].to_adoc()) + + if len(functions.keys()) > 0: + adoc_f.write(f'// Global function\n') + + adoc_f.write('// end of module\n') + +if __name__ == '__main__': + main()