wireshark/tools/check_val_to_str.py

244 lines
8.5 KiB
Python
Executable File

#!/usr/bin/env python3
# Wireshark - Network traffic analyzer
# By Gerald Combs <gerald@wireshark.org>
# Copyright 1998 Gerald Combs
#
# SPDX-License-Identifier: GPL-2.0-or-later
# Scan dissectors for calls to val_to_str() and friends,
# checking for appropriate format specifier strings in
# 'unknown' arg.
# TODO:
# - more detailed format specifier checking (check letter, that there is only 1)
# - scan conformance (.cnf) files for ASN1 dissectors?
import os
import re
import subprocess
import argparse
import signal
# Try to exit soon after Ctrl-C is pressed.
should_exit = False
def signal_handler(sig, frame):
global should_exit
should_exit = True
print('You pressed Ctrl+C - exiting')
signal.signal(signal.SIGINT, signal_handler)
# Test for whether the given file was automatically generated.
def isGeneratedFile(filename):
# Check file exists - e.g. may have been deleted in a recent commit.
if not os.path.exists(filename):
return False
# Open file
f_read = open(os.path.join(filename), 'r', encoding="utf8")
lines_tested = 0
for line in f_read:
# The comment to say that its generated is near the top, so give up once
# get a few lines down.
if lines_tested > 10:
f_read.close()
return False
if (line.find('Generated automatically') != -1 or
line.find('Generated Automatically') != -1 or
line.find('Autogenerated from') != -1 or
line.find('is autogenerated') != -1 or
line.find('automatically generated by Pidl') != -1 or
line.find('Created by: The Qt Meta Object Compiler') != -1 or
line.find('This file was generated') != -1 or
line.find('This filter was automatically generated') != -1 or
line.find('This file is auto generated, do not edit!') != -1 or
line.find('This file is auto generated') != -1):
f_read.close()
return True
lines_tested = lines_tested + 1
# OK, looks like a hand-written file!
f_read.close()
return False
def removeComments(code_string):
code_string = re.sub(re.compile(r"/\*.*?\*/",re.DOTALL ) ,"" ,code_string) # C-style comment
code_string = re.sub(re.compile(r"//.*?\n" ) ,"" ,code_string) # C++-style comment
return code_string
def is_dissector_file(filename):
p = re.compile(r'.*packet-.*\.c')
return p.match(filename)
def findDissectorFilesInFolder(folder, recursive=False):
dissector_files = []
if recursive:
for root, subfolders, files in os.walk(folder):
for f in files:
if should_exit:
return
f = os.path.join(root, f)
dissector_files.append(f)
else:
for f in sorted(os.listdir(folder)):
if should_exit:
return
filename = os.path.join(folder, f)
dissector_files.append(filename)
return [x for x in filter(is_dissector_file, dissector_files)]
warnings_found = 0
errors_found = 0
# Check the given dissector file.
def checkFile(filename, generated):
global warnings_found
global errors_found
# Check file exists - e.g. may have been deleted in a recent commit.
if not os.path.exists(filename):
print(filename, 'does not exist!')
return
with open(filename, 'r', encoding="utf8") as f:
contents = f.read()
# Remove comments so as not to trip up RE.
contents = removeComments(contents)
matches = re.finditer(r'(?<!try_)(?<!char_)(?<!bytes)(r?val_to_str(?:_ext|)(?:_const|))\(.*?,.*?,\s*(".*?\")\s*\)', contents)
for m in matches:
function = m.group(1)
format_string = m.group(2)
# Ignore what appears to be a macro.
if format_string.find('#') != -1:
continue
if function.endswith('_const'):
# These ones shouldn't have a specifier - its an error if they do.
# TODO: I suppose it could be escaped, but haven't seen this...
if format_string.find('%') != -1:
# This is an error as format specifier would show in app
print('Error:', filename, " ", m.group(0),
' - should not have specifiers in unknown string',
'(GENERATED)' if generated else '')
errors_found += 1
else:
# These ones need to have a specifier, and it should be suitable for an int
count = format_string.count('%')
if count == 0:
print('Warning:', filename, " ", m.group(0),
' - should have suitable format specifier in unknown string (or use _const()?)',
'(GENERATED)' if generated else '')
warnings_found += 1
elif count > 1:
print('Warning:', filename, " ", m.group(0),
' - has more than one specifier?',
'(GENERATED)' if generated else '')
# TODO: check allowed specifiers (d, u, x, ?) and modifiers (0-9*) in re ?
if format_string.find('%s') != -1:
# This is an error as this likely causes a crash
print('Error:', filename, " ", m.group(0),
' - inappropriate format specifier in unknown string',
'(GENERATED)' if generated else '')
errors_found += 1
#################################################################
# Main logic.
# command-line args. Controls which dissector files should be checked.
# If no args given, will scan all dissectors.
parser = argparse.ArgumentParser(description='Check calls in dissectors')
parser.add_argument('--file', action='append',
help='specify individual dissector file to test')
parser.add_argument('--commits', action='store',
help='last N commits to check')
parser.add_argument('--open', action='store_true',
help='check open files')
parser.add_argument('--generated', action='store_true',
help='check generated files')
args = parser.parse_args()
# Get files from wherever command-line args indicate.
files = []
if args.file:
# Add specified file(s)
for f in args.file:
if not f.startswith('epan'):
f = os.path.join('epan', 'dissectors', f)
if not os.path.isfile(f):
print('Chosen file', f, 'does not exist.')
exit(1)
else:
files.append(f)
elif args.commits:
# Get files affected by specified number of commits.
command = ['git', 'diff', '--name-only', 'HEAD~' + args.commits]
files = [f.decode('utf-8')
for f in subprocess.check_output(command).splitlines()]
# Will examine dissector files only
files = list(filter(lambda f : is_dissector_file(f), files))
elif args.open:
# Unstaged changes.
command = ['git', 'diff', '--name-only']
files = [f.decode('utf-8')
for f in subprocess.check_output(command).splitlines()]
# Only interested in dissector files.
files = list(filter(lambda f : is_dissector_file(f), files))
# Staged changes.
command = ['git', 'diff', '--staged', '--name-only']
files_staged = [f.decode('utf-8')
for f in subprocess.check_output(command).splitlines()]
# Only interested in dissector files.
files_staged = list(filter(lambda f : is_dissector_file(f), files_staged))
for f in files_staged:
if not f in files:
files.append(f)
else:
# Find all dissector files from folder.
files = findDissectorFilesInFolder(os.path.join('epan', 'dissectors'))
files += findDissectorFilesInFolder(os.path.join('plugins', 'epan'), recursive=True)
files += findDissectorFilesInFolder(os.path.join('epan', 'dissectors', 'asn1'), recursive=True)
# If scanning a subset of files, list them here.
print('Examining:')
if args.file or args.commits or args.open:
if files:
print(' '.join(files), '\n')
else:
print('No files to check.\n')
else:
print('All dissectors\n')
# Now check the chosen files
for f in files:
if should_exit:
exit(1)
generated = isGeneratedFile(f)
if args.generated or not generated:
checkFile(f, generated)
# Show summary.
print(warnings_found, 'warnings found')
if errors_found:
print(errors_found, 'errors found')
exit(1)