Speed up handling of global enterprises.tsv

This commit is contained in:
Martin Mathieson 2023-07-05 12:09:54 +00:00
parent 5fb94c8a1a
commit 44258d5bed
6 changed files with 60868 additions and 13 deletions

View File

@ -5,7 +5,7 @@
# The format used here is: <NUMERICAL_ID><SPACE><NAME>
# Where SPACE can be any sequence of spaces and tabs.
#
# (last updated 2023-07-01)
# (last updated 2023-07-03)
0 Reserved
1 NxNetworks
@ -56102,7 +56102,6 @@
56169 Open Products, Networks & Software (OPNS)
56170 Secure Nordic Payments
56171 Samm Technology Communications Industry and Trade Inc.
56172 Stadt Lauf
56173 A.J. Steenkist
56174 Geminare Inc.
56175 AirPro Technology India Pvt Ltd
@ -60612,3 +60611,6 @@
60691 Cabalier Intelligent Software
60692 Health Plan Services, Inc
60693 Levi Keehn
60694 MEBAY
60695 iPresso S.A.
60696 SlashID

Can't render this file because it is too large.

View File

@ -118,6 +118,7 @@ set(LIBWIRESHARK_PUBLIC_HEADERS
params.h
pci-ids.h
plugin_if.h
global_enterprise_entries.h
ppptypes.h
print.h
print_stream.h
@ -271,7 +272,7 @@ set(LIBWIRESHARK_NONGENERATED_FILES
${CMAKE_CURRENT_BINARY_DIR}/ps.c
)
set(LIBWIRESHARK_FILES ${LIBWIRESHARK_NONGENERATED_FILES})
set(LIBWIRESHARK_FILES ${LIBWIRESHARK_NONGENERATED_FILES} global_enterprise_entries.c)
add_lex_files(LEX_FILES LIBWIRESHARK_FILES
diam_dict.l

View File

@ -23,6 +23,8 @@
#include <wsutil/strtoi.h>
#include <wsutil/ws_assert.h>
#include "global_enterprise_entries.h"
/*
* Win32 doesn't have SIGALRM (and it's the OS where name lookup calls
* are most likely to take a long time, given the way address-to-name
@ -223,6 +225,8 @@ static wmem_map_t *wka_hashtable = NULL;
static wmem_map_t *eth_hashtable = NULL;
// Maps guint -> serv_port_t*
static wmem_map_t *serv_port_hashtable = NULL;
// Maps enterprise-id -> enterprise-desc (only used for user additions)
static GHashTable *enterprises_hashtable = NULL;
static subnet_length_entry_t subnet_length_entries[SUBNETLENGTHSIZE]; /* Ordered array of entries */
@ -303,7 +307,6 @@ gchar *g_services_path = NULL; /* global services file */
gchar *g_pservices_path = NULL; /* personal services file */
gchar *g_pvlan_path = NULL; /* personal vlans file */
gchar *g_ss7pcs_path = NULL; /* personal ss7pcs file */
gchar *g_enterprises_path = NULL; /* global enterprises file */
gchar *g_penterprises_path = NULL; /* personal enterprises file */
/* first resolving call */
@ -942,11 +945,7 @@ initialize_enterprises(void)
ws_assert(enterprises_hashtable == NULL);
enterprises_hashtable = g_hash_table_new_full(NULL, NULL, NULL, g_free);
if (g_enterprises_path == NULL) {
g_enterprises_path = get_datafile_path(ENAME_ENTERPRISES);
}
parse_enterprises_file(g_enterprises_path);
/* Populate entries from profile or personal */
if (g_penterprises_path == NULL) {
/* Check profile directory before personal configuration */
g_penterprises_path = get_persconffile_path(ENAME_ENTERPRISES, TRUE);
@ -955,13 +954,21 @@ initialize_enterprises(void)
g_penterprises_path = get_persconffile_path(ENAME_ENTERPRISES, FALSE);
}
}
/* Parse personal file (if present) */
parse_enterprises_file(g_penterprises_path);
}
const gchar *
try_enterprises_lookup(guint32 value)
{
return (const gchar *)g_hash_table_lookup(enterprises_hashtable, GUINT_TO_POINTER(value));
/* Trying extra entries first. N.B. This does allow entries to be overwritten and found.. */
const char *name = (const gchar *)g_hash_table_lookup(enterprises_hashtable, GUINT_TO_POINTER(value));
if (name) {
return name;
}
else {
return global_enterprises_lookup(value);
}
}
const gchar *
@ -993,9 +1000,6 @@ enterprises_cleanup(void)
ws_assert(enterprises_hashtable);
g_hash_table_destroy(enterprises_hashtable);
enterprises_hashtable = NULL;
ws_assert(g_enterprises_path);
g_free(g_enterprises_path);
g_enterprises_path = NULL;
g_free(g_penterprises_path);
g_penterprises_path = NULL;
g_free(g_pservices_path);

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,12 @@
/* populate_global_enterprises.h
*
* Wireshark - Network traffic analyzer
* By Gerald Combs <gerald@wireshark.org>
* Copyright 1998 Gerald Combs
*
* SPDX-License-Identifier: GPL-2.0-or-later
*/
#include <stdint.h>
const char* global_enterprises_lookup(uint32_t value);

View File

@ -12,6 +12,7 @@
#
# SPDX-License-Identifier: GPL-2.0-or-later
import os
import argparse
import re
import urllib.request
@ -63,6 +64,85 @@ def generate_enterprise_files(file_content):
output = ENTERPRISES_HEADER + last_updated_line + "\n".join(org_lines) + "\n"
return output
class CFile:
def __init__(self, filename):
self.filename = filename
self.f = open(filename, 'w')
self.mappings = {}
self.highest_num = 0
# Write file header
self.f.write('/* ' + os.path.basename(self.filename) + '\n')
self.f.write(' *\n')
self.f.write(' * Wireshark - Network traffic analyzer\n')
self.f.write(' * By Gerald Combs <gerald@wireshark.org>\n')
self.f.write(' * Copyright 1998 Gerald Combs\n')
self.f.write(' *\n')
self.f.write(' * Do not edit - this file is automatically generated\n')
self.f.write(' * SPDX-License-Identifier: GPL-2.0-or-later\n')
self.f.write(' */\n\n')
# Include header files
self.f.write('#include "config.h"\n\n')
self.f.write('#include <stddef.h>\n')
self.f.write('#include <stdint.h>\n\n')
self.f.write('#include "global_enterprise_entries.h"\n')
self.f.write('\n\n')
def __del__(self):
self.f.write('typedef struct\n')
self.f.write('{\n')
self.f.write(' uint32_t max_idx;\n')
self.f.write(' const char* values[' + str(self.highest_num+1) + '];\n')
self.f.write('} global_enterprises_table_t;\n\n')
# Write static table
self.f.write('static global_enterprises_table_t table =\n')
self.f.write('{\n')
# Largest index
self.f.write(' ' + str(self.highest_num) + ',\n')
self.f.write(' {\n')
# Entries (read from dict)
for n in range(0, self.highest_num+1):
if n not in self.mappings:
# There are some gaps, write a NULL entry so can lookup by index
line = ' NULL'
else:
line = ' "' + self.mappings[n] + '"'
# Add coma.
if n < self.highest_num:
line += ','
# Add number as aligned comment.
line += ' '*(90-len(line)) + '// ' + str(n)
self.f.write(line+'\n')
# End of array
self.f.write(' }\n')
# End of struct
self.f.write('};\n')
print('Re-generated', self.filename)
# Lookup function
self.f.write('const char* global_enterprises_lookup(uint32_t value)\n')
self.f.write('{\n')
self.f.write(' if (value > table.max_idx) {\n')
self.f.write(' return NULL;\n')
self.f.write(' }\n')
self.f.write(' else return table.values[value];\n')
self.f.write('}\n')
# Add an individual mapping to the function
def addMapping(self, num, name):
# Handle some escapings
name = name.replace('\\', '\\\\')
name = name.replace('"', '""')
# Record.
self.mappings[num] = name
self.highest_num = num if num>self.highest_num else self.highest_num
def main():
parser = argparse.ArgumentParser(description="Create the enterprises.tsv file.")
@ -70,6 +150,7 @@ def main():
parser.add_argument('outfile', nargs=1)
parsed_args = parser.parse_args()
# Read data from file or webpage
if parsed_args.infile:
with open(parsed_args.infile, encoding='utf-8') as fh:
data = fh.read()
@ -79,10 +160,31 @@ def main():
raise Exception("request for " + ENTERPRISE_NUMBERS_URL + " failed with result code " + f.status)
data = f.read().decode('utf-8')
# Find bits we need and write them to file
enterprises_content = generate_enterprise_files(data)
with open(parsed_args.outfile[0], encoding='utf-8', mode='w') as fh:
fh.write(enterprises_content)
# Now write to a C file the contents (which is faster than parsing the global file at runtime).
with open(parsed_args.outfile[0], 'r') as tsv_f:
c_file = CFile(os.path.join('epan', 'global_enterprise_entries.c'))
# Find all mappings from .tsv file
lines = tsv_f.read().splitlines()
mapping_re = re.compile(r'^(\d+)\s+(.*)$')
for line in lines:
match = mapping_re.match(line)
if match:
num, name = match.group(1), match.group(2)
# Strip any comments and/or trailing whitespace
idx = name.find('#')
if idx != -1:
name = name[0:idx]
name = name.rstrip()
# Add
c_file.addMapping(int(num), name)
if __name__ == "__main__":
main()