wireshark/tools/checkhf.pl

490 lines
16 KiB
Perl
Raw Normal View History

#!/usr/bin/perl
#
# Copyright 2013, William Meier (See AUTHORS file)
#
# Validate hf_... usage for a dissector file;
#
# Usage: checkhf.pl [--debug=?] <file or files>
#
# $Id$
#
# Wireshark - Network traffic analyzer
# By Gerald Combs <gerald@wireshark.org>
# Copyright 1998 Gerald Combs
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
#
## Note: This program is a re-implementation of the
## original checkhf.pl written and (C) by Joerg Mayer.
## The overall objective of the new implementation was to reduce
## the number of false positives which occurred with the
## original checkhf.pl
##
## This program can be used to scan original .c source files or source
## files which have been passed through a C pre-processor.
## Operating on pre-prosessed source files is optimal; There should be
## minimal false positives.
## If the .c input is an original source file there may very well be
## false positives/negatives due to the fact that the hf_... variables & etc
## may have been created via macros.
##
## ----- (The following is extracted from the original checkhf.pl with thanks to Joerg) -------
## Example:
## ~/work/wireshark/trunk/epan/dissectors> ../../tools/checkhf.pl packet-afs.c
## Unused entry: packet-afs.c, hf_afs_ubik_voteend
## Unused entry: packet-afs.c, hf_afs_ubik_errcode
## Unused entry: packet-afs.c, hf_afs_ubik_votetype
## ERROR: NO ARRAY: packet-afs.c, hf_afs_fs_ipaddr
##
## or checkhf.pl packet-*.c, which will check all the dissector files.
##
## NOTE: This tool currently generates false positives!
##
## The "NO ARRAY" messages - if accurate - points to an error that will
## cause (t|wire)shark to terminate with an assertion when a packet containing
## this particular element is being dissected.
##
## The "Unused entry" message indicates the opposite: We define an entry but
## never use it in a proto_...add... function.
## ------------------------------------------------------------------------------------
# ------------------------------------------------------------------------------------
# Main
#
# Logic:
# 1. Clean the input: remove blank lines, comments, quoted strings and code under '#if 0'.
# 2. hfDefs:
# Find (and remove from input) list of static hf_ variable
# definitions ('static int hf_... ;')
# 2. hfArrayEntries:
# Find (and remove from input) list of hf_... variables
# referenced in the hf[] entries;
# 3. hfUsage:
# From the remaining input, extract list of all strings of form hf_...
# (which may include strings which are not actually valid
# hf_... variable references).
# 4. Checks:
# If entries in hfDefs not in hfUsage then "unused";
# If entries in hfDefs not in hfArrayEntries then "ERROR: NO ARRAY";
use strict;
use warnings;
use Getopt::Long;
my $helpFlag = '';
my $debug = 0; # default: off; 1=cmt; 2=#if0; 3=hfDefs; 4=hfArrayEntry; 5=hfusage (See code)
my $sts = GetOptions(
'debug=i' => \$debug,
'help|?' => \$helpFlag
);
if (!$sts || $helpFlag || !$ARGV[0]) {
usage();
}
my $error = 0;
while (my $fileName = $ARGV[0]) {
shift;
my ($fileContents);
my ($hfDefHRef, $hfArrayEntryHRef, $hfUsageHRef);
my ($unUsedHRef, $noArrayHRef);
read_file(\$fileName, \$fileContents);
remove_blank_lines (\$fileContents, $fileName);
remove_comments (\$fileContents, $fileName);
remove_quoted_strings(\$fileContents, $fileName);
remove_if0_code (\$fileContents, $fileName);
$hfDefHRef = find_remove_hf_defs(\$fileContents, $fileName);
$hfArrayEntryHRef = find_remove_hf_array_entries(\$fileContents, $fileName);
$hfUsageHRef = find_hf_usage(\$fileContents);
# Tests (See above)
# 1. Are all the hfDef entries in hfUsage ?
# if not: "Unused entry:"
#
$unUsedHRef = diff_hash($hfDefHRef, $hfUsageHRef);
print_list("Unused entry: $fileName, ", $unUsedHRef);
# 2. Are all the hfDef entries in hfArrayEntry ?
# (Note: if hfDef is "unused", don't check for same in hfArrayEntry)
# if not: "ERROR: NO ARRAY"
$noArrayHRef = diff_hash($hfDefHRef, $hfArrayEntryHRef);
$noArrayHRef = diff_hash($noArrayHRef, $unUsedHRef); # Remove "unused" hf_... from noArray list
print_list("ERROR: NO ARRAY: $fileName, ", $noArrayHRef);
if ((keys %$noArrayHRef) != 0) {
$error += 1;
}
}
exit (($error == 0) ? 0 : 1); # exit 1 if ERROR
# ---------------------------------------------------------------------
#
sub usage {
print "Usage: $0 [--debug=n] Filename [...] #debug: 1=cmt; 2=#if0; 3=hfDefs; 4=hfArrayEntry; 5=hfUsage\n";
exit(1);
}
# ---------------------------------------------------------------------
# action: read contents of a file to specified string
# arg: fileNameRef, fileContentsRef
# returns: fileContentsRef (containing the contents of the file)
sub read_file {
my ($fileNameRef, $fileContentsRef) = @_;
die "No such file: \"$$fileNameRef\"\n" if (! -e $$fileNameRef);
# delete leading './'
$$fileNameRef =~ s{ ^ \. / } {}xo;
$$fileContentsRef = '';
# Read in the file (ouch, but it's easier that way)
open(FCI, "<", $$fileNameRef) || die("Couldn't open $$fileNameRef");
while (<FCI>) {
$$fileContentsRef .= $_;
}
close(FCI);
return $fileContentsRef;
}
# ---------------------------------------------------------------------
# action: Create a hash containing entries in 'a' that are not in 'b'
# arg: aHRef, bHref
# returns: pointer to hash
sub diff_hash {
my ($aHRef, $bHRef) = @_;
my %diffs;
@diffs{grep {! exists $$bHRef{$_}} keys %$aHRef} = {}; # each key in the new hash
# will have value 'undef'
return \%diffs;
}
# ---------------------------------------------------------------------
# action: print a list
# arg: hdr, listHRef
# returns: nothing
sub print_list {
my ($hdr, $listHRef) = @_;
print
map {"$hdr$_\n"}
sort
keys %$listHRef;
}
# ------------
# action: remove blank lines from input string
# arg: codeRef, fileName
# returns: codeRef
sub remove_blank_lines {
my ($codeRef, $fileName) = @_;
$$codeRef =~ s { ^ \s* $ } []xog;
return $codeRef;
}
# ------------
# action: remove comments from input string
# arg: codeRef, fileName
# returns: codeRef
sub remove_comments {
my ($codeRef, $fileName) = @_;
# The below Regexp is based on one from:
# http://aspn.activestate.com/ASPN/Cookbook/Rx/Recipe/59811
# It is in the public domain.
# A complicated regex which matches C-style comments.
my $CCommentRegEx = qr{ / [*] [^*]* [*]+ (?: [^/*] [^*]* [*]+ )* / }xo;
$$codeRef =~ s { $CCommentRegEx } []xog;
($debug == 1) && print "==> After Remove Comments: code: [$fileName]\n$$codeRef\n===<\n";
return $codeRef
}
# ------------
# action: remove quoted strings from input string
# arg: codeRef, fileName
# returns: codeRef
sub remove_quoted_strings {
my ($codeRef, $fileName) = @_;
# A regex which matches double-quoted strings.
# ?s added so that strings containing a 'line continuation'
# ( \ followed by a new-line) will match.
my $DoubleQuotedStr = qr{ (?: ["] (?s: \\. | [^\"\\])* ["]) }x;
# A regex which matches single-quoted strings.
my $SingleQuotedStr = qr{ (?: \' (?: \\. | [^\'\\])* [']) }x;
$$codeRef =~ s{ $DoubleQuotedStr | $SingleQuotedStr } []xog;
($debug == 1) && print "==> After Remove quoted strings: code: [$fileName]\n$$codeRef\n===<\n";
return $codeRef;
}
# -------------
# action: remove '#if 0'd code from the input string
# args codeRef, fileName
# returns: codeRef
#
# Essentially: Use s//patsub/meg to pass each line to patsub.
# patsub monitors #if/#if 0/etc and determines
# if a particular code line should be removed.
# XXX: This is probably pretty inefficient;
# I could imagine using another approach such as converting
# the input string to an array of lines and then making
# a pass through the array deleting lines as needed.
{ # block begin
my ($if_lvl, $if0_lvl, $if0); # shared vars
sub remove_if0_code {
my ($codeRef, $fileName) = @_;
my ($preprocRegEx) = qr {
( # $1 [complete line)
^
(?: # non-capturing
\s* \# \s*
(if \s 0| if | else | endif) # $2 (only if #...)
) ?
.*
$
)
}xom;
($if_lvl, $if0_lvl, $if0) = (0,0,0);
$$codeRef =~ s{ $preprocRegEx }{patsub($1,$2)}xegm;
($debug == 2) && print "==> After Remove if0: code: [$fileName]\n$$codeRef\n===<\n";
return $codeRef;
}
sub patsub {
if ($debug == 99) {
print "-->$_[0]\n";
(defined $_[1]) && print " >$_[1]<\n";
}
# #if/#if 0/#else/#ndif processing
if (defined $_[1]) {
my ($if) = $_[1];
if ($if eq 'if') {
$if_lvl += 1;
} elsif ($if eq 'if 0') {
$if_lvl += 1;
if ($if0_lvl == 0) {
$if0_lvl = $if_lvl;
$if0 = 1; # inside #if 0
}
} elsif ($if eq 'else') {
if ($if0_lvl == $if_lvl) {
$if0 = 0;
}
} elsif ($if eq 'endif') {
if ($if0_lvl == $if_lvl) {
$if0 = 0;
$if0_lvl = 0;
}
$if_lvl -= 1;
if ($if_lvl < 0) {
die "patsub: #if/#endif mismatch"
}
}
return $_[0]; # don't remove preprocessor lines themselves
}
# not preprocessor line: See if under #if 0: If so, remove
if ($if0 == 1) {
return ''; # remove
}
return $_[0];
}
} # block end
# ---------------------------------------------------------------------
# action: Create a hash containing an entry (hf_... => 1) for each
# 'static g?int hf_...' definition (including array names)
# in the input string.
# Remove each definition found from the input string.
# args: codeRef, fileName
# returns: ref to the hash
sub find_remove_hf_defs {
my ($codeRef, $fileName) = @_;
# Build pattern to match any of the following
# static g?int hf_foo = -1;
# static g?int hf_foo = HF_EMPTY;
# static g?int hf_foo[xxx];
# static g?int hf_foo[xxx] = {
# p1: 'static g?int hf_foo'
my $p1RegEx = qr {
^
\s*
static
\s+
g?int
\s+
(hf_[a-zA-Z0-9_]+) # hf_..
}xom;
# p2a: ' = -1;' or ' = HF_EMPTY;'
my $p2aRegEx = qr {
\s* = \s*
(?:
- \s* 1 | HF_EMPTY
)
\s* ;
}xom;
# p2b: '[xxx];' or '[xxx] = {'
my $p2bRegEx = qr !
\s* \[ [^]]+ \] \s*
(?:
= \s* \{ | ;
)
!xom;
my $hfDefRegEx = qr { $p1RegEx (?: $p2aRegEx | $p2bRegEx ) }xom;
my %hfDefs;
while ($$codeRef =~ m{ $hfDefRegEx }xogm) {
#print "$1\n";
$hfDefs{$1} = 1;
}
($debug == 3) && print_hash("VD: $fileName", \%hfDefs); # VariableDefinition
# remove all
$$codeRef =~ s{ $hfDefRegEx }{}xiogm;
($debug == 3) && print "==> After remove hfDefs: code: [$fileName]\n$$codeRef\n===<\n";
return \%hfDefs;
}
# ---------------------------------------------------------------------
# action: Create a hash containing an entry (hf_...) for each hf[] entry.
# Remove each hf[] entry found from the input string.
# args: codeRef, fileName
# returns: ref to the hfArrayEntry hash
sub find_remove_hf_array_entries {
my ($codeRef, $fileName) = @_;
# hf[] entry regex (to extract an hf_index_name and associated field type)
my $hfArrayEntryRegEx = qr /
\{
\s*
& \s* ( [a-zA-Z0-9_]+ ) # &hf
(?:
\s* \[ [^]]+ \] # optional array ref
) ?
\s* , \s*
\{
[^}]+
, \s*
(FT_[a-zA-Z0-9_]+) # field type
\s* ,
[^}]+
, \s*
(?:
HFILL | HF_REF_TYPE_NONE
)
[^}]*
}
[\s,]*
}
/xos;
# find all the hf[] entries (searching $$codeRef).
# Create a hash keyed by the hf_... string
my %hfArrayEntry;
while ($$codeRef =~ m{ $hfArrayEntryRegEx }xgos) {
($debug == 98) && print "+++ $1 $2\n";
$hfArrayEntry{$1} = 1;
}
($debug == 4) && print_hash("AE: $fileName", \%hfArrayEntry); # ArrayEntry
# now remove all
$$codeRef =~ s{ $hfArrayEntryRegEx }{}xgois;
($debug == 4) && print "==> After remove hfArrayEntry: code: [$fileName]\n$$codeRef\n===<\n";
return \%hfArrayEntry;
}
# ---------------------------------------------------------------------
# action: create hash of all hf_... strings remaining in input string.
# arga: codeRef, fileName
# return: ref to hfUsage hash
#
# The hash will include *all* strings of form hf_...
# which are in the input string (even strings which
# aren't actually vars).
# We don't care since we'll be checking only
# known valid vars against these strings.
sub find_hf_usage {
my ($codeRef, $fileName) = @_;
my $hfUsageRegEx = qr {
\b ( hf_[a-zA-Z0-9_]+ ) # hf_...
}ox;
my %hfUsage;
while ($$codeRef =~ m{ $hfUsageRegEx }xog) {
#print "$1\n";
$hfUsage{$1} += 1;
}
($debug == 5) && print_hash("VU: $fileName", \%hfUsage); # VariableUsage
return \%hfUsage;
}
# ---------------------------------------------------------------------
sub print_hash {
my ($title, $HRef) = @_;
##print "==> $title\n";
for my $k (sort keys %$HRef) {
printf "%-40.40s %s\n", $title, $k;
}
}