tools: Port make-sminmpec.pl to make-sminmpec.py

Port make-sminmpec.pl to Python.
Now uses an explicit destination path,
instead of a hardcoded path relative to
the script's location on disk.
Ping #18152
This commit is contained in:
Moshe Kaplan 2022-06-28 16:35:54 -04:00 committed by Gerald Combs
parent a2272362ce
commit cdb83a370f
2 changed files with 88 additions and 94 deletions

View File

@ -1,94 +0,0 @@
#!/usr/bin/perl -w
# create the enterprises file from
# https://www.iana.org/assignments/enterprise-numbers/enterprise-numbers
#
# Wireshark - Network traffic analyzer
# By Gerald Combs <gerald@wireshark.org>
# Copyright 2004 Gerald Combs
#
# SPDX-License-Identifier: GPL-2.0-or-later
use strict;
use File::Spec;
my ($vol, $script_dir) = File::Spec->splitpath( __FILE__ );
my $root_dir = File::Spec->catpath($vol, $script_dir, "..");
chdir($root_dir) || die("Can't find $root_dir");
my $in = shift;
$in = "https://www.iana.org/assignments/enterprise-numbers/enterprise-numbers" unless(defined $in);
my @in_lines;
my $revision = '2014-04-27';
my $min_entries = 100;
my $smi_total = 0;
if($in =~ m/^https?:/i) {
eval "require LWP::UserAgent;";
die "LWP isn't installed. It is part of the standard Perl module libwww." if $@;
my $agent = LWP::UserAgent->new;
$agent->env_proxy;
$agent->agent("Wireshark make-sminmpec.pl/$revision");
warn "starting to fetch $in ...\n";
my $request = HTTP::Request->new(GET => $in);
my $result = $agent->request($request);
if ($result->code eq 200) {
warn "done fetching $in\n";
@in_lines = split /\n/, $result->content;
} else {
die "request for $in failed with result code:" . $result->code;
}
} else {
open IN, "< $in";
@in_lines = <IN>;
close IN;
}
my $body = '';
my $code;
my $name;
my $last_updated = "(last updated ???)";
my $end_of_document = 0;
for(@in_lines) {
chomp;
if (/^(\d+)/) {
$code = sprintf("%d", $1);
} elsif (/^ ?(\S.*)/ ) { # up to three spaces because of formatting errors in the source
$name = $1;
next if (/^\s*\(?\s*unassigned/i);
$name =~ s/\s+$//;
$name =~ s/ \((formerly .*)\)/\t# $1/;
$body .= "\n$code\t$name";
} elsif (/\(last updated/i) {
$last_updated = $_;
} elsif (/^ *End of Document/) {
$end_of_document = 1;
}
}
die "\"End of Document\" not found. Truncated source file?" unless ($end_of_document);
open OUT, "> enterprises.tsv";
print OUT <<"_SMINMPEC";
#
# generated from https://www.iana.org/assignments/enterprise-numbers/enterprise-numbers
# run "tools/make-sminmpec.pl [infile]" to regenerate
#
# The format used here is: <NUMERICAL_ID><SPACE><NAME>
# Where SPACE can be any sequence of spaces and tabs.
#
# $last_updated
$body
_SMINMPEC
close OUT;

88
tools/make-sminmpec.py Executable file
View File

@ -0,0 +1,88 @@
#!/usr/bin/env python3
# create the enterprises.tsv file from
# https://www.iana.org/assignments/enterprise-numbers/enterprise-numbers
# or an offline copy
#
# Copyright 2022 by Moshe Kaplan
# Based on make-sminmpec.pl by Gerald Combs
#
# Wireshark - Network traffic analyzer
# By Gerald Combs <gerald@wireshark.org>
# Copyright 2004 Gerald Combs
#
# SPDX-License-Identifier: GPL-2.0-or-later
import argparse
import re
import urllib.request
ENTERPRISE_NUMBERS_URL = "https://www.iana.org/assignments/enterprise-numbers/enterprise-numbers"
ENTERPRISES_HEADER = """\
#
# generated from https://www.iana.org/assignments/enterprise-numbers/enterprise-numbers
# run "tools/make-sminmpec.py [infile] outfile" to regenerate
#
# The format used here is: <NUMERICAL_ID><SPACE><NAME>
# Where SPACE can be any sequence of spaces and tabs.
#
"""
DECIMAL_PATTERN = r"^(\d+)"
# up to three spaces because of formatting errors in the source
ORGANIZATION_PATTERN = r"^ ?(\S.*)"
FORMERLY_PATTERN = r" \((formerly .*)\)"
def generate_enterprise_files(file_content):
# We only care about the "Decimal" and "Organization",
# not the contact or email
org_lines = []
last_updated = ""
end_seen = False
for line in file_content.splitlines():
decimal_match = re.match(DECIMAL_PATTERN, line)
if decimal_match:
decimal = decimal_match.group(0)
elif re.match(ORGANIZATION_PATTERN, line):
organization = line.strip()
if organization.lower() == "unassigned":
continue
organization = re.sub(FORMERLY_PATTERN, r"\t# \1", organization)
org_lines += [decimal + "\t" + organization]
elif "last updated" in line.lower():
last_updated = line
elif "end of document" in line.lower():
end_seen = True
if not end_seen:
raise Exception('"End of Document" not found. Truncated source file?')
last_updated_line = "# " + last_updated + "\n\n"
output = ENTERPRISES_HEADER + last_updated_line + "\n".join(org_lines) + "\n"
return output
def main():
parser = argparse.ArgumentParser(description="Create the enterprises.tsv file.")
parser.add_argument('infile', nargs='?')
parser.add_argument('outfile', nargs=1)
parsed_args = parser.parse_args()
if parsed_args.infile:
with open(parsed_args.infile, encoding='utf-8') as fh:
data = fh.read()
else:
with urllib.request.urlopen(ENTERPRISE_NUMBERS_URL) as f:
if f.status != 200:
raise Exception("request for " + ENTERPRISE_NUMBERS_URL + " failed with result code " + f.status)
data = f.read().decode('utf-8')
enterprises_content = generate_enterprise_files(data)
with open(parsed_args.outfile[0], encoding='utf-8', mode='w') as fh:
fh.write(enterprises_content)
if __name__ == "__main__":
main()