wireshark/tools/tpg/tpg.pl

595 lines
16 KiB
Perl
Executable File

#!/usr/bin/perl
#
# TPG TVB Parser Generator
#
# Given a bnf like grammar generate a parser for text based tvbs
#
# Wireshark - Network traffic analyzer
# By Gerald Combs <gerald@wireshark.org>
# Copyright 2004 Gerald Combs
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
use TPG;
use V2P;
use strict;
my $DEBUG = 0;
my $b = '';
while(<>) {
$b .= $_;
}
my @T = @{tokenizer()};
my $linenum = 1;
my %CODE = ();
my $codenum = 0;
$b =~ s/\%\{(.*?)\%\}/add_code($1)/egms;
$b =~ s/#.*?\n/\n/gms;
my $parser = new TPG();
my $last_token = '';
$parser->YYData->{DATA}=\$linenum;
my $parser_info = $parser->YYParse(yylex => \&next_token, yyerror => \&error_sub);#,yydebug => 0x1f);
die "failed parsing" unless defined $parser_info;
if ($DEBUG > 3) {
warn "\n=========================== parser_info ===========================\n";
warn V2P::var2perl( $parser_info );
warn "\n=========================== ======== ===========================\n" ;
}
my $proto_name = ${$parser_info}{proto_name};
my $upper_name = $proto_name;
$upper_name =~ tr/a-z/A-Z/;
my $global_struct = "$proto_name\_tpg_data";
warn "parser_data_type: ${$parser_info}{pdata}\n" if $DEBUG;
my %exports = %{${$parser_info}{export}};
my $field_num = 0;
my $tt_type = ${$parser_info}{pdata};
$tt_type =~ s/\n#line.*?\n//ms;
$tt_type =~ s@\n/\*eocode\*/\n@@ms;
my $init_function_hfs = "\n/* initialize hfids */\n";
my $init_function_etts = "\n/* initialize etts */\n";
my $init_function_wanted_decl = "\n/* declare private wanted elements */\n";
my $init_function_wanted = "\n/* initialize wanted elements */\n";
my $callback_definitions = "\n/* callback definitions */\n";
my $datastruct_ett = "\n/* etts */\n";
my $datastruct_hf = "\n/* hfis */\n";
my $datastruct_wanted = "\n/* wanted elems */\n";
my $hfarr = "/* field array */\n#define HF_$upper_name\_PARSER \\\n";
my $ett_arr = "#define ETT_$upper_name\_PARSER \\\n";
for my $fieldname (keys %{${$parser_info}{fields}}) {
my $f = ${${$parser_info}{fields}}{$fieldname};
my $vs = defined ${$f}{vs} ? 'VALS(' . ${$f}{vs}. ')' : "NULL" ;
${$f}{vname} = "$global_struct.hf_${$f}{name}" unless defined ${$f}{vname};
${$f}{base} = 'BASE_NONE' unless defined ${$f}{base};
${$f}{desc} = '""' unless defined ${$f}{desc};
$datastruct_hf .= "\tint hf_${$f}{name};\n";
$init_function_hfs .= "\t${$f}{vname} = -1;\n";
$hfarr .= "{ &${$f}{vname}, { ${$f}{pname}, ${$f}{abbr}, ${$f}{type}, ${$f}{base}, $vs, 0x0, ${$f}{desc}, HFILL }},\\\n";
# warn "\nFIELD:$fieldname " . V2P::var2perl($f);
}
$hfarr =~ s/,\\\n$/\n/msi;
for my $rulename ( keys %{${$parser_info}{rules}} ) {
my $r = ${${$parser_info}{rules}}{$rulename};
# warn "\nRULE BEFORE:$rulename " . V2P::var2perl($r);
make_rule($r,0);
# warn "\nRULE AFTER:$rulename " . V2P::var2perl($r);
}
$ett_arr =~ s/,\\\n$//ms;
for my $rulename (sort keys %{${$parser_info}{rules}} ) {
my $r = ${${$parser_info}{rules}}{$rulename};
$callback_definitions .= "\n\n/* callback definitions for rule $rulename */\n";
$callback_definitions .= ${$r}{before_cb_code} . "\n";
$callback_definitions .= ${$r}{after_cb_def} . "\n";
$init_function_wanted .= ${$r}{definition_code} . "\n\n";
}
my $h_file = <<"__H_HEAD";
/*
$proto_name-parser.h
automagically generated by $0 from $ARGV
DO NOT MODIFY.
*/
#ifndef _H_$upper_name\_PARSER
#define _H_$upper_name\_PARSER
#include <epan/tpg.h>
/* begin %header_head */
${$parser_info}{header_head}
/* end %header_head */
extern void tpg_${proto_name}_init(void);
struct _${proto_name}_tpg_data_t {
$datastruct_ett
$datastruct_hf
$datastruct_wanted
};
extern struct _${global_struct}_t $global_struct;
$hfarr
$ett_arr
#endif
__H_HEAD
my $c_file = <<"__C_FILE";
/*
$proto_name-parser.c
automagically generated by $0 from $ARGV
DO NOT MODIFY.
*/
#include "config.h"
#include "$proto_name-parser.h"
/* begin %head */
${$parser_info}{head}
/* end %head */
/* hfids container */
struct _${proto_name}_tpg_data_t $global_struct;
$callback_definitions
/* end callback definitions */
void tpg_$proto_name\_init(void) {
$init_function_wanted_decl
$init_function_hfs
$init_function_etts
$init_function_wanted
}
/* begin %tail */
${$parser_info}{tail}
/* end %tail */
__C_FILE
my $c_buf = '';
my $c_line = 3;
while($c_file =~ s/^([^\n]*)\n//ms) {
my $line = $1;
$c_line += 2 if $line =~ s@/\*eocode\*/@\n#line $c_line \"$proto_name-parser.c\"\n@;
$c_buf .= $line . "\n";
$c_line++;
}
my $h_buf = '';
my $h_line = 3;
while($h_file =~ s/^([^\n]*)\n//ms) {
my $line = $1;
$h_line += 2 if $line =~ s@/\*eocode\*/@\n#line $h_line \"$proto_name-parser.h\"\n@;
$h_buf .= $line . "\n";
$h_line++;
}
open C, "> $proto_name-parser.c";
open H, "> $proto_name-parser.h";
print C $c_buf;
print H $h_buf;
close C;
close H;
exit;
sub make_rule {
my $r = shift;
my $dd = shift;
my $rule_id = "0";
my $code = \${$r}{definition_code};
my $indent;
for (0..$dd) {
$indent .= "\t";
}
my $indent_more = $indent . "\t";
my $min;
my $max;
if (exists ${$r}{min}) {
$min = ${$r}{min};
} else {
$min = ${$r}{min} = 1;
}
if (exists ${$r}{max}) {
$max = ${$r}{max};
} else {
$max = ${$r}{max} = 1;
}
if ($dd == 0) {
my %VARS = ();
if ( exists $exports{${$r}{name}}) {
${$code} = "\t$global_struct.";
$datastruct_wanted .= "\ttvbparse_wanted_t* wanted_$proto_name\_${$r}{name};\n"
} else {
${$code} = "\t";
$init_function_wanted_decl .= "\tstatic tvbparse_wanted_t* wanted_$proto_name\_${$r}{name};\n"
}
${$code} .= "wanted_$proto_name\_${$r}{name} = ";
$VARS{"TT_DATA"} = "TPG_DATA(tpg,$tt_type)" if defined $tt_type;
make_vars(\%VARS,$r,"elem");
# warn "VARS::${$r}{name} " . V2P::var2perl(\%VARS);
my $tree_code_head = "";
my $tree_code_body = "";
my $tree_code_after = "";
make_tree_code($r,\$tree_code_head,\$tree_code_body,\$tree_code_after,"elem");
if (length $tree_code_body ) {
my $cb_name = ${$r}{before_cb_name} = "${$r}{name}\_before_cb";
${$r}{before_cb_code} = "static void $cb_name(void* tpg _U_, const void* wd _U_, struct _tvbparse_elem_t* elem _U_) {\n\tproto_item* pi;\n$tree_code_head\n$tree_code_body\n}";
${$r}{code} .= $tree_code_after;
}
my $tree_code = \${$r}{tree_code};
if (${$r}{code}) {
my $after = ${$r}{code};
${$r}{after_cb_name} = "${$r}{name}_after\_cb";
${$r}{after_cb_def} = "static void ${$r}{after_cb_name}(void* tpg _U_, const void* wd _U_, struct _tvbparse_elem_t* elem _U_) {\n";
for (keys %VARS) {
$after =~ s/($_)([A-Z]?)/$VARS{$1}$2/msg;
}
${$r}{after_cb_def} .= $after . "\n}\n";
}
}
my $after_fn = ${$r}{after_cb_name} ? ${$r}{after_cb_name} : "NULL";
my $before_fn = ${$r}{before_cb_name} ? ${$r}{before_cb_name} : "NULL";
my $wd_data = "NULL";
if (exists ${$r}{field}) {
my $field = ${${$parser_info}{fields}}{${$r}{field}};
die "field ${$r}{field} does not exists\n" . V2P::var2perl(${$parser_info}{fields}) unless defined $field;
my $ett = exists ${$r}{ett} ? ${$r}{ett} : "NULL";
my $wd_data = 'tpg_wd(${$field}{vname},$ett,NULL)';
}
my $control = ${$r}{control};
if (${$r}{type} eq 'chars' || ${$r}{type} eq 'not_chars') {
if (! ($min == 1 && $max == 1) ) {
${$code} .= $indent . "tvbparse_${$r}{type}($rule_id,$min,$max,$control,$wd_data,$before_fn,$after_fn)"
} else {
my $rn = ${$r}{type};
$rn =~ s/.$//;
${$code} .= $indent . "tvbparse_$rn($rule_id,$control,$wd_data,$before_fn,$after_fn)"
}
} else {
if (! ($min == 1 && $max == 1)) {
${$code} .= $indent . "tvbparse_some(0,$min,$max,NULL,NULL,NULL,\n";
}
if (${$r}{type} eq 'string') {
${$code} .= $indent . "tvbparse_string($rule_id,$control,$wd_data,$before_fn,$after_fn)";
} elsif (${$r}{type} eq 'caseless') {
${$code} .= $indent . "tvbparse_casestring($rule_id,$control,$wd_data,$before_fn,$after_fn)";
} elsif (${$r}{type} eq 'named') {
if(exists $exports{$control}) {
${$code} .= $indent . "tvbparse_handle(&$global_struct.wanted_$proto_name\_$control)";
} else {
${$code} .= $indent . "tvbparse_handle(&wanted_$proto_name\_$control)";
}
} elsif (${$r}{type} eq 'seq') {
${$code} .= $indent . "tvbparse_set_seq($rule_id,$wd_data,$before_fn,$after_fn,\n";
for ( @{${$r}{subrules}}) {
$dd++;
${$code} .= $indent_more . make_rule($_,$dd) . ",\n";
$dd--;
}
${$code} .= $indent . " NULL)"
} elsif (${$r}{type} eq 'choice') {
${$code} .= $indent . "tvbparse_set_oneof($rule_id,$wd_data,$before_fn,$after_fn,\n";
for (@{${$r}{subrules}}) {
$dd++;
${$code} .= $indent_more . make_rule($_,$dd) . ",\n";
$dd--;
}
${$code} .= $indent . " NULL)"
} elsif (${$r}{type} eq 'until') {
${$r}{inc_mode} = 'TP_UNTIL_SPEND' unless defined ${$r}{inc_mode};
${$code} .= $indent ."tvbparse_until(0,$wd_data,$before_fn,$after_fn,\n";
$dd++;
${$code} .= $indent_more . make_rule(${$r}{subrule},$dd) . ", ${$r}{inc_mode})";
$dd--;
}
if (! ($min == 1 && $max == 1) ) {
${$code} .= ")";
}
}
if ($dd == 0) {
${$code} .= ";\n";
# warn "RULE::${$r}{name} " . V2P::var2perl($r);
}
${$code};
}
sub make_vars {
my $v = shift;
my $r = shift;
my $base = shift;
if (exists ${$r}{var}) {
${$v}{${$r}{var}} = $base;
}
if (! ( ${$r}{type} =~ /chars$/ ) && ! (${$r}{min} == 1 && ${$r}{max} == 1) ) {
$base .= "->sub";
}
if (exists ${$r}{subrule} ) {
make_vars($v,${$r}{subrule},"$base->sub");
}
if (exists ${$r}{subrules} ) {
my $sub_base = "$base->sub";
for my $rule (@{${$r}{subrules}}) {
make_vars($v,$rule,$sub_base);
$sub_base .= "->next";
}
}
}
sub make_tree_code {
my $r = shift;
my $head = shift;
my $body = shift;
my $after = shift;
my $elem = shift;
if (exists ${$r}{field}) {
my $fieldname = ${$r}{field};
my $f = ${${$parser_info}{fields}}{$fieldname};
my $root_var = '';
if (exists ${$r}{tree}) {
$root_var = "root_$fieldname";
${$head} .= "\tproto_item* $root_var;\n\n";
${$body} .= "\t$root_var = ";
$ett_arr .= "\t&$global_struct.ett_$fieldname,\\\n";
$datastruct_ett .= "\tguint ett_$fieldname; \n";
$init_function_etts .= "\t$global_struct.ett_$fieldname = -1;\n";
${$r}{ett} = "$global_struct.ett_$fieldname";
} else {
${$body} .= "\t";
}
if (${$f}{type} eq 'FT_STRING') {
${$body} .= "\tpi = TPG_ADD_STRING(tpg,${$f}{vname},$elem);\n";
} elsif (${$f}{type} =~ /^FT_UINT/) {
my $fieldvar = "tpg_uint_$fieldname";
${$head} .= "\tguint $fieldvar = TPG_UINT($elem);\n";
${$body} .= "\tpi = TPG_ADD_UINT(tpg,${$f}{vname},$elem,$fieldvar);\n";
} elsif (${$f}{type} =~ /^FT_INT/) {
my $fieldvar = "tpg_int_$fieldname";
${$head} .= "\tgint $fieldvar = TPG_INT($elem);\n";
${$body} .= "\tpi = TPG_ADD_INT(tpg,${$f}{vname},$elem,$fieldvar);\n";
} elsif (${$f}{type} eq 'FT_IPV4') {
my $fieldvar = "tpg_ipv4_$fieldname";
${$head} .= "\tguint32 $fieldvar = TPG_IPV4($elem);\n";
${$body} .= "\tpi = TPG_ADD_IPV4(tpg,${$f}{vname},$elem,$fieldvar);\n";
} elsif (${$f}{type} eq 'FT_IPV6') {
my $fieldvar = "tpg_ipv6_$fieldname";
${$head} .= "\tguint8* $fieldvar = TPG_IPV6($elem);\n";
${$body} .= "\tpi = TPG_ADD_IPV6(tpg,${$f}{vname},$elem,$fieldvar);\n";
} else {
${$body} .= "\tpi = TPG_ADD_TEXT(tpg,$elem);\n";
}
if (exists ${$r}{plain_text}) {
${$body} .= "\tTPG_SET_TEXT(pi,$elem);\n"
}
if (exists ${$r}{tree}) {
${$body} .= "\tTPG_PUSH(tpg,$root_var,${$r}{ett});\n";
}
}
if (! ( ${$r}{type} =~ /chars$/ ) && ! (${$r}{min} == 1 && ${$r}{max} == 1) ) {
$elem .= "->sub";
}
if (exists ${$r}{subrule} ) {
make_tree_code(${$r}{subrule},$head,$body,$after,"$elem->sub");
}
if (exists ${$r}{subrules} ) {
my $sub_base = "$elem->sub";
for my $rule (@{${$r}{subrules}}) {
make_tree_code($rule,$head,$body,$after,$sub_base);
$sub_base .= "->next";
}
}
if (exists ${$r}{field}) {
if (exists ${$r}{tree}) {
${$after} .= "\n\t/* tree after code */\n\tTPG_POP(tpg);\n";
}
}
}
sub tokenizer {
[
[ '(FT_(UINT(8|16|24|32)|STRING|INT(8|16|24|32)|IPV[46]|ETH|BOOLEAN|DOUBLE|FLOAT|(ABSOLUTE|RELATIVE)_TIME|BYTES))' , sub { [ 'FT', $_[0] ] } ],
[ '(BASE_(NONE|DEC|HEX))', sub { [ 'BASE', $_[0] ] }],
[ '([a-z]+\\.[a-z0-9_\\.]*[a-z])', sub { [ 'DOTEDNAME', $_[0] ] }],
[ '([a-z][a-z0-9_]*)', sub { [ 'LOWERCASE', $_[0] ] }],
[ '([A-Z][A-Z0-9_]*)', sub { [ 'UPPERCASE', $_[0] ] }],
[ '([0-9]+|0x[0-9a-fA-F]+)', sub { [ 'NUMBER', $_[0] ] }],
[ '(\%\%[0-9]+\%\%)', \&c_code ],
[ "'((\\\\'|[^'])*)'", sub { [ 'SQUOTED', $_[0] ] }],
[ '\[\^((\\\\\\]|[^\\]])*)\]', sub { [ 'NOTCHARS', $_[0] ] }],
[ '\[((\\\\\\]|[^\\]])*)\]', sub { [ 'CHARS', $_[0] ] }],
[ '"((\\\\"|[^"])*)"', sub { [ 'DQUOTED', $_[0] ] }],
[ '(\%[a-z_]+|\%[A-Z][A-Z-]*|\&|\=|\.\.\.|\.|\:|\;|\(|\)|\{|\}|\+|\*|\?|\<|\>|\|)', sub { [ $_[0], $_[0] ] }],
]
}
sub next_token {
if ($b =~ s/^([\r\n\s]+)// ) {
my $l = $1;
while ( $l =~ s/\n//ms ) {
$linenum++;
}
}
return (undef,'') unless length $b;
for (@T) {
my ($re,$ac) = @{$_};
if( $b =~ s/^$re//ms) {
$a = &{$ac}($1);
$last_token = ${$a}[1];
#warn "=($linenum)=> ${$a}[0] ${$a}[1]\n";
return (${$a}[0],${$a}[1]);
}
}
die "unrecognized token at line $linenum after '$last_token'";
}
sub error_sub {
my @a = $_[0]->YYExpect;
my $t = $_[0]->YYCurtok;
die "error at $linenum after '$last_token' expecting (@a)";
}
sub add_code {
my $k = "%%$codenum%%";
$CODE{$k} = $_[0];
$codenum++;
return $k;
}
sub c_code {
my $k = $_[0];
my $t = $CODE{$k};
my $start = $linenum;
$linenum++ while ( $t =~ s/\n// );
return [ 'CODE', "\n#line $start \"$ARGV\"\n$CODE{$k}\n/*eocode*/\n"];
}
__END__
do {
($type,$value) = @{next_token()};
last if not defined $type;
} while(1);