wireshark/tools/check_tfs.py
2023-09-24 21:20:58 +00:00

596 lines
21 KiB
Python
Executable file

#!/usr/bin/env python3
# Wireshark - Network traffic analyzer
# By Gerald Combs <gerald@wireshark.org>
# Copyright 1998 Gerald Combs
#
# SPDX-License-Identifier: GPL-2.0-or-later
import os
import re
import subprocess
import argparse
import signal
# This utility scans for tfs items, and works out if standard ones
# could have been used intead (from epan/tfs.c)
# Can also check for value_string where common tfs could be used instead.
# TODO:
# - check how many of the definitions in epan/tfs.c are used in other dissectors
# - although even if unused, might be in external dissectors?
# - consider merging Item class with check_typed_item_calls.py ?
# Try to exit soon after Ctrl-C is pressed.
should_exit = False
def signal_handler(sig, frame):
global should_exit
should_exit = True
print('You pressed Ctrl+C - exiting')
signal.signal(signal.SIGINT, signal_handler)
# Test for whether the given file was automatically generated.
def isGeneratedFile(filename):
# Check file exists - e.g. may have been deleted in a recent commit.
if not os.path.exists(filename):
return False
# Open file
f_read = open(os.path.join(filename), 'r')
lines_tested = 0
for line in f_read:
# The comment to say that its generated is near the top, so give up once
# get a few lines down.
if lines_tested > 10:
f_read.close()
return False
if (line.find('Generated automatically') != -1 or
line.find('Generated Automatically') != -1 or
line.find('Autogenerated from') != -1 or
line.find('is autogenerated') != -1 or
line.find('automatically generated by Pidl') != -1 or
line.find('Created by: The Qt Meta Object Compiler') != -1 or
line.find('This file was generated') != -1 or
line.find('This filter was automatically generated') != -1 or
line.find('This file is auto generated, do not edit!') != -1 or
line.find('This file is auto generated') != -1):
f_read.close()
return True
lines_tested = lines_tested + 1
# OK, looks like a hand-written file!
f_read.close()
return False
# Keep track of custom entries that might appear in multiple dissectors,
# so we can consider adding them to tfs.c
custom_tfs_entries = {}
def AddCustomEntry(val1, val2, file):
global custom_tfs_entries
if (val1, val2) in custom_tfs_entries:
custom_tfs_entries[(val1, val2)].append(file)
else:
custom_tfs_entries[(val1, val2)] = [file]
class TFS:
def __init__(self, file, name, val1, val2):
self.file = file
self.name = name
self.val1 = val1
self.val2 = val2
global warnings_found
# Should not be empty
if not len(val1) or not len(val2):
print('Warning:', file, name, 'has an empty field', self)
warnings_found += 1
#else:
# Strange if one begins with capital but other doesn't?
#if val1[0].isalpha() and val2[0].isalpha():
# if val1[0].isupper() != val2[0].isupper():
# print(file, name, 'one starts lowercase and the other upper', self)
# Leading or trailing space should not be needed.
if val1.startswith(' ') or val1.endswith(' '):
print('Note: ' + self.file + ' ' + self.name + ' - false val begins or ends with space \"' + self.val1 + '\"')
if val2.startswith(' ') or val2.endswith(' '):
print('Note: ' + self.file + ' ' + self.name + ' - true val begins or ends with space \"' + self.val2 + '\"')
# Should really not be identical...
if val1.lower() == val2.lower():
print('Warning:', file, name, 'true and false strings are the same', self)
warnings_found += 1
# Shouldn't both be negation (with exception..)
if (file != os.path.join('epan', 'dissectors', 'packet-smb.c') and (val1.lower().find('not ') != -1) and (val2.lower().find('not ') != -1)):
print('Warning:', file, name, self, 'both strings contain not')
warnings_found += 1
# Not expecting full-stops inside strings..
if val1.find('.') != -1 or val2.find('.') != -1:
print('Warning:', file, name, 'Period found in string', self)
warnings_found += 1
def __str__(self):
return '{' + '"' + self.val1 + '", "' + self.val2 + '"}'
class ValueString:
def __init__(self, file, name, vals):
self.file = file
self.name = name
self.raw_vals = vals
self.parsed_vals = {}
self.looks_like_tfs = True
no_lines = self.raw_vals.count('{')
if no_lines != 3:
self.looks_like_tfs = False
return
# Now parse out each entry in the value_string
matches = re.finditer(r'\{([\"a-zA-Z\s\d\,]*)\}', self.raw_vals)
for m in matches:
entry = m[1]
# Check each entry looks like part of a TFS entry.
match = re.match(r'\s*([01])\,\s*\"([a-zA-Z\d\s]*\s*)\"', entry)
if match:
if match[1] == '1':
self.parsed_vals[True] = match[2]
else:
self.parsed_vals[False] = match[2]
# Now have both entries
if len(self.parsed_vals) == 2:
break
else:
self.looks_like_tfs = False
break
def __str__(self):
return '{' + '"' + self.raw_vals + '"}'
field_widths = {
'FT_BOOLEAN' : 64, # TODO: Width depends upon 'display' field
'FT_CHAR' : 8,
'FT_UINT8' : 8,
'FT_INT8' : 8,
'FT_UINT16' : 16,
'FT_INT16' : 16,
'FT_UINT24' : 24,
'FT_INT24' : 24,
'FT_UINT32' : 32,
'FT_INT32' : 32,
'FT_UINT40' : 40,
'FT_INT40' : 40,
'FT_UINT48' : 48,
'FT_INT48' : 48,
'FT_UINT56' : 56,
'FT_INT56' : 56,
'FT_UINT64' : 64,
'FT_INT64' : 64
}
# Simplified version of class that is in check_typed_item_calls.py
class Item:
previousItem = None
def __init__(self, filename, hf, filter, label, item_type, type_modifier, strings, macros, mask=None,
check_mask=False):
self.filename = filename
self.hf = hf
self.filter = filter
self.label = label
self.strings = strings
self.mask = mask
# N.B. Not sestting mask by looking up macros.
self.item_type = item_type
self.type_modifier = type_modifier
self.set_mask_value(macros)
self.bits_set = 0
for n in range(0, self.get_field_width_in_bits()):
if self.check_bit(self.mask_value, n):
self.bits_set += 1
def check_bit(self, value, n):
return (value & (0x1 << n)) != 0
def __str__(self):
return 'Item ({0} "{1}" {2} type={3}:{4} strings={5} mask={6})'.format(self.filename, self.label, self.filter,
self.item_type, self.type_modifier, self.strings, self.mask)
def set_mask_value(self, macros):
try:
self.mask_read = True
# Substitute mask if found as a macro..
if self.mask in macros:
self.mask = macros[self.mask]
elif any(not c in '0123456789abcdefABCDEFxX' for c in self.mask):
self.mask_read = False
self.mask_value = 0
return
# Read according to the appropriate base.
if self.mask.startswith('0x'):
self.mask_value = int(self.mask, 16)
elif self.mask.startswith('0'):
self.mask_value = int(self.mask, 8)
else:
self.mask_value = int(self.mask, 10)
except:
self.mask_read = False
self.mask_value = 0
# Return true if bit position n is set in value.
def check_bit(self, value, n):
return (value & (0x1 << n)) != 0
def get_field_width_in_bits(self):
if self.item_type == 'FT_BOOLEAN':
if self.type_modifier == 'NULL':
return 8 # i.e. 1 byte
elif self.type_modifier == 'BASE_NONE':
return 8
elif self.type_modifier == 'SEP_DOT': # from proto.h, only meant for FT_BYTES
return 64
else:
try:
# For FT_BOOLEAN, modifier is just numerical number of bits. Round up to next nibble.
return int((int(self.type_modifier) + 3)/4)*4
except:
#print('oops', self)
return 0
else:
if self.item_type in field_widths:
# Lookup fixed width for this type
return field_widths[self.item_type]
else:
#print('returning 0 for', self)
return 0
def removeComments(code_string):
code_string = re.sub(re.compile(r"/\*.*?\*/",re.DOTALL ) ,"" ,code_string) # C-style comment
code_string = re.sub(re.compile(r"//.*?\n" ) ,"" ,code_string) # C++-style comment
code_string = re.sub(re.compile(r"#if 0.*?#endif",re.DOTALL ) ,"" , code_string) # Ignored region
return code_string
# Look for true_false_string items in a dissector file.
def findTFS(filename):
tfs_found = {}
with open(filename, 'r', encoding="utf8") as f:
contents = f.read()
# Example: const true_false_string tfs_yes_no = { "Yes", "No" };
# Remove comments so as not to trip up RE.
contents = removeComments(contents)
matches = re.finditer(r'\sconst\s*true_false_string\s*([a-zA-Z0-9_]*)\s*=\s*{\s*\"([a-zA-Z_0-9/:! ]*)\"\s*,\s*\"([a-zA-Z_0-9/:! ]*)\"', contents)
for m in matches:
name = m.group(1)
val1 = m.group(2)
val2 = m.group(3)
# Store this entry.
tfs_found[name] = TFS(filename, name, val1, val2)
return tfs_found
# Look for value_string entries in a dissector file.
def findValueStrings(filename):
vals_found = {}
#static const value_string radio_type_vals[] =
#{
# { 0, "FDD"},
# { 1, "TDD"},
# { 0, NULL }
#};
with open(filename, 'r', encoding="utf8") as f:
contents = f.read()
# Remove comments so as not to trip up RE.
contents = removeComments(contents)
matches = re.finditer(r'.*const value_string\s*([a-zA-Z0-9_]*)\s*\[\s*\]\s*\=\s*\{([\{\}\d\,a-zA-Z0-9\s\"]*)\};', contents)
for m in matches:
name = m.group(1)
vals = m.group(2)
vals_found[name] = ValueString(filename, name, vals)
return vals_found
# Look for hf items (i.e. full item to be registered) in a dissector file.
def find_items(filename, macros, check_mask=False, mask_exact_width=False, check_label=False, check_consecutive=False):
is_generated = isGeneratedFile(filename)
items = {}
with open(filename, 'r', encoding="utf8") as f:
contents = f.read()
# Remove comments so as not to trip up RE.
contents = removeComments(contents)
# N.B. re extends all the way to HFILL to avoid greedy matching
matches = re.finditer( r'.*\{\s*\&(hf_[a-z_A-Z0-9]*)\s*,\s*{\s*\"(.*?)\"\s*,\s*\"(.*?)\"\s*,\s*(.*?)\s*,\s*([0-9A-Z_\|\s]*?)\s*,\s*(.*?)\s*,\s*(.*?)\s*,\s*([a-zA-Z0-9\W\s_\u00f6\u00e4]*?)\s*,\s*HFILL', contents)
for m in matches:
# Store this item.
hf = m.group(1)
items[hf] = Item(filename, hf, filter=m.group(3), label=m.group(2), item_type=m.group(4),
type_modifier=m.group(5),
strings=m.group(6),
macros=macros,
mask=m.group(7))
return items
def find_macros(filename):
macros = {}
with open(filename, 'r', encoding="utf8") as f:
contents = f.read()
# Remove comments so as not to trip up RE.
contents = removeComments(contents)
matches = re.finditer( r'#define\s*([A-Z0-9_]*)\s*([0-9xa-fA-F]*)\n', contents)
for m in matches:
# Store this mapping.
macros[m.group(1)] = m.group(2)
return macros
def is_dissector_file(filename):
p = re.compile(r'.*packet-.*\.c')
return p.match(filename)
def findDissectorFilesInFolder(folder):
# Look at files in sorted order, to give some idea of how far through is.
files = []
for f in sorted(os.listdir(folder)):
if should_exit:
return
if is_dissector_file(f):
filename = os.path.join(folder, f)
files.append(filename)
return files
warnings_found = 0
errors_found = 0
tfs_found = 0
# Check the given dissector file.
def checkFile(filename, common_tfs, look_for_common=False, check_value_strings=False):
global warnings_found
global errors_found
# Check file exists - e.g. may have been deleted in a recent commit.
if not os.path.exists(filename):
print(filename, 'does not exist!')
return
# Find items.
file_tfs = findTFS(filename)
# See if any of these items already existed in tfs.c
for f in file_tfs:
for c in common_tfs:
found = False
#
# Do not do this check for plugins; plugins cannot import
# data values from libwireshark (functions, yes; data
# values, no).
#
# Test whether there's a common prefix for the file name
# and "plugin/epan/"; if so, this is a plugin, and there
# is no common path and os.path.commonprefix returns an
# empty string, otherwise it returns the common path, so
# we check whether the common path is an empty string.
#
if os.path.commonprefix([filename, 'plugin/epan/']) == '':
exact_case = False
if file_tfs[f].val1 == common_tfs[c].val1 and file_tfs[f].val2 == common_tfs[c].val2:
found = True
exact_case = True
elif file_tfs[f].val1.upper() == common_tfs[c].val1.upper() and file_tfs[f].val2.upper() == common_tfs[c].val2.upper():
found = True
if found:
print("Error:" if exact_case else "Warn: ", filename, f, "- could have used", c, 'from tfs.c instead: ', common_tfs[c],
'' if exact_case else ' (capitalisation differs)')
if exact_case:
errors_found += 1
else:
warnings_found += 1
break
if not found:
if look_for_common:
AddCustomEntry(file_tfs[f].val1, file_tfs[f].val2, filename)
if check_value_strings:
# Get macros
macros = find_macros(filename)
# Get value_string entries.
vs = findValueStrings(filename)
# Also get hf items
items = find_items(filename, macros, check_mask=True)
for v in vs:
if vs[v].looks_like_tfs:
found = False
exact_case = False
#print('Candidate', v, vs[v])
for c in common_tfs:
found = False
#
# Do not do this check for plugins; plugins cannot import
# data values from libwireshark (functions, yes; data
# values, no).
#
# Test whether there's a common prefix for the file name
# and "plugin/epan/"; if so, this is a plugin, and there
# is no common path and os.path.commonprefix returns an
# empty string, otherwise it returns the common path, so
# we check whether the common path is an empty string.
#
if os.path.commonprefix([filename, 'plugin/epan/']) == '':
exact_case = False
if common_tfs[c].val1 == vs[v].parsed_vals[True] and common_tfs[c].val2 == vs[v].parsed_vals[False]:
found = True
exact_case = True
elif common_tfs[c].val1.upper() == vs[v].parsed_vals[True].upper() and common_tfs[c].val2.upper() == vs[v].parsed_vals[False].upper():
found = True
# Do values match?
if found:
# OK, now look for items that:
# - have VALS(v) AND
# - have a mask width of 1 bit (no good if field can have values > 1...)
for i in items:
if re.match(r'VALS\(\s*'+v+r'\s*\)', items[i].strings):
if items[i].bits_set == 1:
print("Warn:" if exact_case else "Note:", filename, 'value_string', "'"+v+"'",
"- could have used", c, 'from tfs.c instead: ', common_tfs[c], 'for', i,
'' if exact_case else ' (capitalisation differs)')
if exact_case:
warnings_found += 1
#################################################################
# Main logic.
# command-line args. Controls which dissector files should be checked.
# If no args given, will just scan epan/dissectors folder.
parser = argparse.ArgumentParser(description='Check calls in dissectors')
parser.add_argument('--file', action='append',
help='specify individual dissector file to test')
parser.add_argument('--commits', action='store',
help='last N commits to check')
parser.add_argument('--open', action='store_true',
help='check open files')
parser.add_argument('--check-value-strings', action='store_true',
help='check whether value_strings could have been tfs?')
parser.add_argument('--common', action='store_true',
help='check for potential new entries for tfs.c')
args = parser.parse_args()
# Get files from wherever command-line args indicate.
files = []
if args.file:
# Add specified file(s)
for f in args.file:
if not f.startswith('epan'):
f = os.path.join('epan', 'dissectors', f)
if not os.path.isfile(f):
print('Chosen file', f, 'does not exist.')
exit(1)
else:
files.append(f)
elif args.commits:
# Get files affected by specified number of commits.
command = ['git', 'diff', '--name-only', 'HEAD~' + args.commits]
files = [f.decode('utf-8')
for f in subprocess.check_output(command).splitlines()]
# Will examine dissector files only
files = list(filter(lambda f : is_dissector_file(f), files))
elif args.open:
# Unstaged changes.
command = ['git', 'diff', '--name-only']
files = [f.decode('utf-8')
for f in subprocess.check_output(command).splitlines()]
# Only interested in dissector files.
files = list(filter(lambda f : is_dissector_file(f), files))
# Staged changes.
command = ['git', 'diff', '--staged', '--name-only']
files_staged = [f.decode('utf-8')
for f in subprocess.check_output(command).splitlines()]
# Only interested in dissector files.
files_staged = list(filter(lambda f : is_dissector_file(f), files_staged))
for f in files_staged:
if not f in files:
files.append(f)
else:
# Find all dissector files from folder.
files = findDissectorFilesInFolder(os.path.join('epan', 'dissectors'))
# If scanning a subset of files, list them here.
print('Examining:')
if args.file or args.commits or args.open:
if files:
print(' '.join(files), '\n')
else:
print('No files to check.\n')
else:
print('All dissector modules\n')
# Get standard/ shared ones.
tfs_entries = findTFS(os.path.join('epan', 'tfs.c'))
# Now check the files to see if they could have used shared ones instead.
for f in files:
if should_exit:
exit(1)
if not isGeneratedFile(f):
checkFile(f, tfs_entries, look_for_common=args.common, check_value_strings=args.check_value_strings)
# Report on commonly-defined values.
if args.common:
# Looking for items that could potentially be moved to tfs.c
for c in custom_tfs_entries:
# Only want to see items that have 3 or more occurrences.
# Even then, probably only want to consider ones that sound generic.
if len(custom_tfs_entries[c]) > 2:
print(c, 'appears', len(custom_tfs_entries[c]), 'times, in: ', custom_tfs_entries[c])
# Show summary.
print(warnings_found, 'warnings found')
if errors_found:
print(errors_found, 'errors found')
exit(1)