
380 lines
13 KiB
Raw Normal View History

#!/usr/bin/env python3
# Wireshark - Network traffic analyzer
# By Gerald Combs <gerald@wireshark.org>
# Copyright 1998 Gerald Combs
# SPDX-License-Identifier: GPL-2.0-or-later
Generate Sysdig event dissector sections from the sysdig sources.
Reads driver/event_table.c and driver/ppm_events_public.h and generates
corresponding dissection code in packet-sysdig-event.c. Updates are
performed in-place in the dissector code.
Requires an Internet connection. Assets are loaded from GitHub over HTTPS.
import logging
import os
import os.path
import re
import urllib.request, urllib.error, urllib.parse
import sys
sysdig_repo_pfx = 'https://raw.githubusercontent.com/draios/sysdig/0.26.1/'
def exit_msg(msg=None, status=1):
if msg is not None:
sys.stderr.write(msg + '\n\n')
sys.stderr.write(__doc__ + '\n')
def get_url_lines(url):
'''Open a URL.
Returns the URL body as a list of lines.
req_headers = { 'User-Agent': 'Wireshark generate-sysdig-event' }
req = urllib.request.Request(url, headers=req_headers)
response = urllib.request.urlopen(req)
lines = response.read().decode().splitlines()
except urllib.error.HTTPError as err:
exit_msg("HTTP error fetching {0}: {1}".format(url, err.reason))
except urllib.error.URLError as err:
exit_msg("URL error fetching {0}: {1}".format(url, err.reason))
except OSError as err:
exit_msg("OS error fetching {0}".format(url, err.strerror))
Fix issues discovered by common python linters Fix some issues discovered by common python linters including: * switch `None` comparisons to use `is` rather than `==`. Identity != equality, and I've spent 40+ hours before tracking down a subtle bug caused by exactly this issue. Note that this may introduce a problem if one of the scripts is depending on this behavior, in which case the comparison should be changed to `True`/`False` rather than `None`. * Use `except Exception:` as bare `except:` statements have been discouraged for years. Ideally for some of these we'd examine if there were specific exceptions that should be caught, but for now I simply caught all. Again, this could introduce very subtle behavioral changes under Python 2, but IIUC, that was all fixed in Python 3, so safe to move to `except Exception:`. * Use more idiomatic `if not x in y`--> `if x not in y` * Use more idiomatic 2 blank lines. I only did this at the beginning, until I realized how overwhelming this was going to be to apply, then I stopped. * Add a TODO where an undefined function name is called, so will fail whenever that code is run. * Add more idiomatic spacing around `:`. This is also only partially cleaned up, as I gave up when I saw how `asn2wrs.py` was clearly infatuated with the construct. * Various other small cleanups, removed some trailing whitespace and improper indentation that wasn't a multiple of 4, etc. There is still _much_ to do, but I haven't been heavily involved with this project before, so thought this was a sufficient amount to put up and see what the feedback is. Linters that I have enabled which highlighted some of these issues include: * `pylint` * `flake8` * `pycodestyle`
2020-09-21 05:44:41 +00:00
except Exception:
exit_msg("Unexpected error:", sys.exc_info()[0])
return lines
ppm_ev_pub_lines = get_url_lines(sysdig_repo_pfx + 'driver/ppm_events_public.h')
ppme_re = re.compile('^\s+PPME_([A-Z0-9_]+_[EX])\s*=\s*([0-9]+)\s*,')
event_info_d = {}
def get_event_defines():
event_d = {}
for line in ppm_ev_pub_lines:
m = ppme_re.match(line)
if m:
event_d[int(m.group(2))] = m.group(1)
return event_d
ppm_ev_table_lines = get_url_lines(sysdig_repo_pfx + 'driver/event_table.c')
hf_d = {}
event_info_re = re.compile('^\s+/\*\s*PPME_.*\*\/\s*{\s*"([A-Za-z0-9_]+)"\s*,[^,]+,[^,]+,\s*([0-9]+)\s*[,{}]')
event_param_re = re.compile('{\s*"([A-Za-z0-9_]+)"\s*,\s*PT_([A-Z0-9_]+)\s*,\s*PF_([A-Z0-9_]+)\s*[,}]')
def get_event_names():
'''Return a contiguous list of event names. Names are lower case.'''
event_name_l = []
for line in ppm_ev_table_lines:
ei = event_info_re.match(line)
if ei:
return event_name_l
# PT_xxx to FT_xxx
pt_to_ft = {
'FD': 'INT64',
# FT_xxx to BASE_xxx
force_param_formats = {
'INT.*': 'DEC',
def get_event_params():
'''Return a list of dictionaries containing event names and parameter info.'''
event_param_l = []
event_num = 0
force_string_l = ['args', 'env']
for line in ppm_ev_table_lines:
ei = event_info_re.match(line)
ep = event_param_re.findall(line)
if ei and ep:
event_name = ei.group(1)
src_param_count = int(ei.group(2))
if len(ep) != src_param_count:
err_msg = '{}: found {} parameters. Expected {}. Params: {}'.format(
ei.group(1), len(ep), src_param_count, repr(ep))
if len(ep) > src_param_count:
del ep[src_param_count:]
raise NameError(err_msg)
for p in ep:
if p[0] in force_string_l:
param_type = 'STRING'
elif p[1] in pt_to_ft:
param_type = pt_to_ft[p[1]]
elif p[0] == 'flags' and p[1].startswith('INT') and 'HEX' in p[2]:
param_type = 'U' + p[1]
elif 'INT' in p[1]:
# Ints
param_type = p[1]
# Fall back to bytes
param_type = 'BYTES'
if p[2] == 'NA':
if 'INT' in param_type:
param_format = 'DEC'
param_format = 'NONE'
elif param_type == 'BYTES':
param_format = 'NONE'
param_format = p[2]
for pt_pat, force_pf in force_param_formats.items():
if re.match(pt_pat, param_type) and param_format != force_pf:
err_msg = 'Forcing {} {} format to {}. Params: {}'.format(
event_name, param_type, force_pf, repr(ep))
param_format = force_pf
param_d = {
'event_name': event_name,
'event_num': event_num,
'param_name': p[0],
'param_type': param_type,
'param_format': param_format,
if ei:
event_num += 1
return event_param_l
def param_to_hf_name(param):
return 'hf_param_{}_{}'.format(param['param_name'], param['param_type'].lower())
def param_to_value_string_name(param):
return '{}_{}_vals'.format(param['param_name'], param['param_type'].lower())
def get_param_desc(param):
# Try to coerce event names and parameters into human-friendly
# strings.
# XXX This could use some work.
# Specific descriptions. Event name + parameter name.
param_descs = {
'accept.queuepct': 'Accept queue per connection',
'execve.args': 'Program arguments',
'execve.comm': 'Command',
'execve.cwd': 'Current working directory',
# General descriptions. Event name only.
event_descs = {
'ioctl': 'I/O control',
event_name = param['event_name']
param_id = '{}.{}'.format(event_name, param['param_name'])
if param_id in param_descs:
param_desc = param_descs[param_id]
elif event_name in event_descs:
param_desc = '{}: {}'.format(event_descs[event_name], param['param_name'])
param_desc = param['param_name']
return param_desc
def main():
logging.basicConfig(format='%(levelname)s: %(message)s')
# Event list
event_d = get_event_defines()
event_nums = list(event_d.keys())
event_name_l = get_event_names()
event_param_l = get_event_params()
hf_d = {}
for param in event_param_l:
hf_name = param_to_hf_name(param)
hf_d[hf_name] = param
idx_id_to_name = { '': 'no' }
parameter_index_l = []
for en in range (0, len(event_nums)):
param_id = ''
param_l = []
event_var = event_d[en].lower()
for param in event_param_l:
if param['event_num'] == en:
hf_name = param_to_hf_name(param)
param_id += ':' + param['param_name'] + '_' + param['param_type']
ei_str = ''
if param_id not in idx_id_to_name:
idx_id_to_name[param_id] = event_var
ei_str = 'static int * const {}_indexes[] = {{ &{}, NULL }};'.format(
', &'.join(param_l)
ei_str = '#define {}_indexes {}_indexes'.format(event_var, idx_id_to_name[param_id])
dissector_path = os.path.join(os.path.dirname(__file__),
'..', 'epan', 'dissectors', 'packet-sysdig-event.c')
dissector_f = open(dissector_path, 'r')
dissector_lines = list(dissector_f)
dissector_f = open(dissector_path, 'w+')
# Strip out old content
strip_re_l = []
strip_re_l.append(re.compile('^\s*{\s*&hf_param_.*},')) # Must all be on one line
for strip_re in strip_re_l:
dissector_lines = [l for l in dissector_lines if not strip_re.search(l)]
# Find our value strings
value_string_re = re.compile('static\s+const\s+value_string\s+([A-Za-z0-9_]+_vals)')
value_string_l = []
for line in dissector_lines:
vs = value_string_re.match(line)
if vs:
# Add in new content after comments.
header_fields_c = 'Header fields'
header_fields_re = re.compile('/\*\s+' + header_fields_c, flags = re.IGNORECASE)
header_fields_l = []
for hf_name in sorted(hf_d.keys()):
header_fields_l.append('static int {} = -1;'.format(hf_name))
event_names_c = 'Event names'
event_names_re = re.compile('/\*\s+' + event_names_c, flags = re.IGNORECASE)
event_names_l = []
event_str_l = list(set(event_name_l))
for evt_str in event_str_l:
event_names_l.append('#define EVT_STR_{0:24s} "{1:s}"'.format(evt_str.upper(), evt_str))
event_definitions_c = 'Event definitions'
event_definitions_re = re.compile('/\*\s+' + event_definitions_c, flags = re.IGNORECASE)
event_definitions_l = []
for evt in event_nums:
event_definitions_l.append('#define EVT_{0:24s} {1:3d}'.format(event_d[evt], evt))
value_strings_c = 'Value strings'
value_strings_re = re.compile('/\*\s+' + value_strings_c, flags = re.IGNORECASE)
value_strings_l = []
for evt in event_nums:
evt_num = 'EVT_{},'.format(event_d[evt])
evt_str = 'EVT_STR_' + event_name_l[evt].upper()
value_strings_l.append(' {{ {0:<32s} {1:s} }},'.format(evt_num, evt_str))
parameter_index_c = 'Parameter indexes'
parameter_index_re = re.compile('/\*\s+' + parameter_index_c, flags = re.IGNORECASE)
# parameter_index_l defined above.
event_tree_c = 'Event tree'
event_tree_re = re.compile('/\*\s+' + event_tree_c, flags = re.IGNORECASE)
event_tree_l = []
for evt in event_nums:
evt_num = 'EVT_{}'.format(event_d[evt])
evt_idx = '{}_indexes'.format(event_d[evt].lower())
event_tree_l.append(' {{ {}, {} }},'.format(evt_num, evt_idx))
header_field_reg_c = 'Header field registration'
header_field_reg_re = re.compile('/\*\s+' + header_field_reg_c, flags = re.IGNORECASE)
header_field_reg_l = []
for hf_name in sorted(hf_d.keys()):
param = hf_d[hf_name]
event_name = param['event_name']
param_desc = get_param_desc(param)
param_name = param['param_name']
param_type = param['param_type']
param_format = param['param_format']
fieldconvert = 'NULL'
vs_name = param_to_value_string_name(param)
if vs_name in value_string_l and 'INT' in param_type:
fieldconvert = 'VALS({})'.format(vs_name)
header_field_reg_l.append(' {{ &{}, {{ "{}", "sysdig.param.{}.{}", FT_{}, BASE_{}, {}, 0, NULL, HFILL }} }},'.format(
for line in dissector_lines:
fill_comment = None
fill_l = []
if header_fields_re.match(line):
fill_comment = header_fields_c
fill_l = header_fields_l
elif event_names_re.match(line):
fill_comment = event_names_c
fill_l = event_names_l
elif event_definitions_re.match(line):
fill_comment = event_definitions_c
fill_l = event_definitions_l
elif value_strings_re.match(line):
fill_comment = value_strings_c
fill_l = value_strings_l
elif parameter_index_re.match(line):
fill_comment = parameter_index_c
fill_l = parameter_index_l
elif event_tree_re.match(line):
fill_comment = event_tree_c
fill_l = event_tree_l
elif header_field_reg_re.match(line):
fill_comment = header_field_reg_c
fill_l = header_field_reg_l
if fill_comment is not None:
# Write our comment followed by the content
print(('Generating {}, {:d} lines'.format(fill_comment, len(fill_l))))
dissector_f.write('/* {}. Automatically generated by tools/{} */\n'.format(
for line in fill_l:
# Fill each section only once
del fill_l[:]
# Existing content
# On with the show
if __name__ == "__main__":