tools: Port make-sminmpec.pl to make-sminmpec.py
Port make-sminmpec.pl to Python. Now uses an explicit destination path, instead of a hardcoded path relative to the script's location on disk. Ping #18152
This commit is contained in:
parent
a2272362ce
commit
cdb83a370f
|
@ -1,94 +0,0 @@
|
|||
#!/usr/bin/perl -w
|
||||
# create the enterprises file from
|
||||
# https://www.iana.org/assignments/enterprise-numbers/enterprise-numbers
|
||||
#
|
||||
# Wireshark - Network traffic analyzer
|
||||
# By Gerald Combs <gerald@wireshark.org>
|
||||
# Copyright 2004 Gerald Combs
|
||||
#
|
||||
# SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
use strict;
|
||||
use File::Spec;
|
||||
|
||||
my ($vol, $script_dir) = File::Spec->splitpath( __FILE__ );
|
||||
my $root_dir = File::Spec->catpath($vol, $script_dir, "..");
|
||||
chdir($root_dir) || die("Can't find $root_dir");
|
||||
|
||||
my $in = shift;
|
||||
|
||||
$in = "https://www.iana.org/assignments/enterprise-numbers/enterprise-numbers" unless(defined $in);
|
||||
|
||||
my @in_lines;
|
||||
my $revision = '2014-04-27';
|
||||
|
||||
my $min_entries = 100;
|
||||
my $smi_total = 0;
|
||||
|
||||
if($in =~ m/^https?:/i) {
|
||||
eval "require LWP::UserAgent;";
|
||||
die "LWP isn't installed. It is part of the standard Perl module libwww." if $@;
|
||||
|
||||
my $agent = LWP::UserAgent->new;
|
||||
$agent->env_proxy;
|
||||
$agent->agent("Wireshark make-sminmpec.pl/$revision");
|
||||
|
||||
warn "starting to fetch $in ...\n";
|
||||
|
||||
my $request = HTTP::Request->new(GET => $in);
|
||||
|
||||
my $result = $agent->request($request);
|
||||
|
||||
if ($result->code eq 200) {
|
||||
warn "done fetching $in\n";
|
||||
@in_lines = split /\n/, $result->content;
|
||||
} else {
|
||||
die "request for $in failed with result code:" . $result->code;
|
||||
}
|
||||
} else {
|
||||
open IN, "< $in";
|
||||
@in_lines = <IN>;
|
||||
close IN;
|
||||
}
|
||||
|
||||
my $body = '';
|
||||
my $code;
|
||||
my $name;
|
||||
my $last_updated = "(last updated ???)";
|
||||
my $end_of_document = 0;
|
||||
|
||||
for(@in_lines) {
|
||||
chomp;
|
||||
|
||||
if (/^(\d+)/) {
|
||||
$code = sprintf("%d", $1);
|
||||
} elsif (/^ ?(\S.*)/ ) { # up to three spaces because of formatting errors in the source
|
||||
$name = $1;
|
||||
next if (/^\s*\(?\s*unassigned/i);
|
||||
$name =~ s/\s+$//;
|
||||
$name =~ s/ \((formerly .*)\)/\t# $1/;
|
||||
$body .= "\n$code\t$name";
|
||||
} elsif (/\(last updated/i) {
|
||||
$last_updated = $_;
|
||||
} elsif (/^ *End of Document/) {
|
||||
$end_of_document = 1;
|
||||
}
|
||||
}
|
||||
|
||||
die "\"End of Document\" not found. Truncated source file?" unless ($end_of_document);
|
||||
|
||||
open OUT, "> enterprises.tsv";
|
||||
|
||||
print OUT <<"_SMINMPEC";
|
||||
#
|
||||
# generated from https://www.iana.org/assignments/enterprise-numbers/enterprise-numbers
|
||||
# run "tools/make-sminmpec.pl [infile]" to regenerate
|
||||
#
|
||||
# The format used here is: <NUMERICAL_ID><SPACE><NAME>
|
||||
# Where SPACE can be any sequence of spaces and tabs.
|
||||
#
|
||||
# $last_updated
|
||||
$body
|
||||
_SMINMPEC
|
||||
|
||||
close OUT;
|
|
@ -0,0 +1,88 @@
|
|||
#!/usr/bin/env python3
|
||||
# create the enterprises.tsv file from
|
||||
# https://www.iana.org/assignments/enterprise-numbers/enterprise-numbers
|
||||
# or an offline copy
|
||||
#
|
||||
# Copyright 2022 by Moshe Kaplan
|
||||
# Based on make-sminmpec.pl by Gerald Combs
|
||||
#
|
||||
# Wireshark - Network traffic analyzer
|
||||
# By Gerald Combs <gerald@wireshark.org>
|
||||
# Copyright 2004 Gerald Combs
|
||||
#
|
||||
# SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
import argparse
|
||||
import re
|
||||
import urllib.request
|
||||
|
||||
|
||||
ENTERPRISE_NUMBERS_URL = "https://www.iana.org/assignments/enterprise-numbers/enterprise-numbers"
|
||||
|
||||
ENTERPRISES_HEADER = """\
|
||||
#
|
||||
# generated from https://www.iana.org/assignments/enterprise-numbers/enterprise-numbers
|
||||
# run "tools/make-sminmpec.py [infile] outfile" to regenerate
|
||||
#
|
||||
# The format used here is: <NUMERICAL_ID><SPACE><NAME>
|
||||
# Where SPACE can be any sequence of spaces and tabs.
|
||||
#
|
||||
"""
|
||||
|
||||
DECIMAL_PATTERN = r"^(\d+)"
|
||||
# up to three spaces because of formatting errors in the source
|
||||
ORGANIZATION_PATTERN = r"^ ?(\S.*)"
|
||||
FORMERLY_PATTERN = r" \((formerly .*)\)"
|
||||
|
||||
|
||||
def generate_enterprise_files(file_content):
|
||||
# We only care about the "Decimal" and "Organization",
|
||||
# not the contact or email
|
||||
org_lines = []
|
||||
last_updated = ""
|
||||
end_seen = False
|
||||
for line in file_content.splitlines():
|
||||
decimal_match = re.match(DECIMAL_PATTERN, line)
|
||||
if decimal_match:
|
||||
decimal = decimal_match.group(0)
|
||||
elif re.match(ORGANIZATION_PATTERN, line):
|
||||
organization = line.strip()
|
||||
if organization.lower() == "unassigned":
|
||||
continue
|
||||
organization = re.sub(FORMERLY_PATTERN, r"\t# \1", organization)
|
||||
org_lines += [decimal + "\t" + organization]
|
||||
elif "last updated" in line.lower():
|
||||
last_updated = line
|
||||
elif "end of document" in line.lower():
|
||||
end_seen = True
|
||||
|
||||
if not end_seen:
|
||||
raise Exception('"End of Document" not found. Truncated source file?')
|
||||
|
||||
last_updated_line = "# " + last_updated + "\n\n"
|
||||
output = ENTERPRISES_HEADER + last_updated_line + "\n".join(org_lines) + "\n"
|
||||
return output
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Create the enterprises.tsv file.")
|
||||
parser.add_argument('infile', nargs='?')
|
||||
parser.add_argument('outfile', nargs=1)
|
||||
parsed_args = parser.parse_args()
|
||||
|
||||
if parsed_args.infile:
|
||||
with open(parsed_args.infile, encoding='utf-8') as fh:
|
||||
data = fh.read()
|
||||
else:
|
||||
with urllib.request.urlopen(ENTERPRISE_NUMBERS_URL) as f:
|
||||
if f.status != 200:
|
||||
raise Exception("request for " + ENTERPRISE_NUMBERS_URL + " failed with result code " + f.status)
|
||||
data = f.read().decode('utf-8')
|
||||
|
||||
enterprises_content = generate_enterprise_files(data)
|
||||
with open(parsed_args.outfile[0], encoding='utf-8', mode='w') as fh:
|
||||
fh.write(enterprises_content)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
Loading…
Reference in New Issue