tools: Port make-sminmpec.pl to make-sminmpec.py

Port make-sminmpec.pl to Python. Now uses an explicit destination path, instead of a hardcoded path relative to the script's location on disk. Ping #18152
2022-06-28 16:35:54 -04:00 · 2022-06-28 16:35:54 -04:00 · cdb83a370f
parent a2272362ce
commit cdb83a370f
2 changed files with 88 additions and 94 deletions
--- a/tools/make-sminmpec.pl
+++ b/tools/make-sminmpec.pl
@ -1,94 +0,0 @@
-#!/usr/bin/perl -w
-# create the enterprises file from
-# https://www.iana.org/assignments/enterprise-numbers/enterprise-numbers
-#
-# Wireshark - Network traffic analyzer
-# By Gerald Combs <gerald@wireshark.org>
-# Copyright 2004 Gerald Combs
-#
-# SPDX-License-Identifier: GPL-2.0-or-later
-
-use strict;
-use File::Spec;
-
-my ($vol, $script_dir) = File::Spec->splitpath( __FILE__ );
-my $root_dir = File::Spec->catpath($vol, $script_dir, "..");
-chdir($root_dir) || die("Can't find $root_dir");
-
-my $in = shift;
-
-$in = "https://www.iana.org/assignments/enterprise-numbers/enterprise-numbers" unless(defined $in);
-
-my @in_lines;
-my $revision = '2014-04-27';
-
-my $min_entries = 100;
-my $smi_total = 0;
-
-if($in =~ m/^https?:/i) {
-	eval "require LWP::UserAgent;";
-	die "LWP isn't installed. It is part of the standard Perl module libwww." if $@;
-
-	my $agent    = LWP::UserAgent->new;
-	$agent->env_proxy;
-	$agent->agent("Wireshark make-sminmpec.pl/$revision");
-
-	warn "starting to fetch $in ...\n";
-
-	my $request  = HTTP::Request->new(GET => $in);
-
-	my $result   = $agent->request($request);
-
-	if ($result->code eq 200) {
-		warn "done fetching $in\n";
-		@in_lines = split /\n/, $result->content;
-	} else {
-		die "request for $in failed with result code:" . $result->code;
-	}
-} else {
-  open IN, "< $in";
-  @in_lines = <IN>;
-  close IN;
-}
-
-my $body = '';
-my $code;
-my $name;
-my $last_updated = "(last updated ???)";
-my $end_of_document = 0;
-
-for(@in_lines) {
-	chomp;
-
-	if (/^(\d+)/) {
-		$code = sprintf("%d", $1);
-	} elsif (/^   ?(\S.*)/ ) { # up to three spaces because of formatting errors in the source
-		$name = $1;
-		next if (/^\s*\(?\s*unassigned/i);
-		$name =~ s/\s+$//;
-		$name =~ s/ \((formerly .*)\)/\t# $1/;
-		$body .= "\n$code\t$name";
-	} elsif (/\(last updated/i) {
-		$last_updated = $_;
-	} elsif (/^ *End of Document/) {
-		$end_of_document = 1;
-	}
-}
-
-die "\"End of Document\" not found. Truncated source file?" unless ($end_of_document);
-
-open OUT, "> enterprises.tsv";
-
-print OUT <<"_SMINMPEC";
-#
-# generated from https://www.iana.org/assignments/enterprise-numbers/enterprise-numbers
-# run "tools/make-sminmpec.pl [infile]" to regenerate
-#
-# The format used here is: <NUMERICAL_ID><SPACE><NAME>
-# Where SPACE can be any sequence of spaces and tabs.
-#
-# $last_updated
-$body
-_SMINMPEC
-
-close OUT;
--- a/tools/make-sminmpec.py
+++ b/tools/make-sminmpec.py
@ -0,0 +1,88 @@
+#!/usr/bin/env python3
+# create the enterprises.tsv file from
+# https://www.iana.org/assignments/enterprise-numbers/enterprise-numbers
+# or an offline copy
+#
+# Copyright 2022 by Moshe Kaplan
+# Based on make-sminmpec.pl by Gerald Combs
+#
+# Wireshark - Network traffic analyzer
+# By Gerald Combs <gerald@wireshark.org>
+# Copyright 2004 Gerald Combs
+#
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+import argparse
+import re
+import urllib.request
+
+
+ENTERPRISE_NUMBERS_URL = "https://www.iana.org/assignments/enterprise-numbers/enterprise-numbers"
+
+ENTERPRISES_HEADER = """\
+#
+# generated from https://www.iana.org/assignments/enterprise-numbers/enterprise-numbers
+# run "tools/make-sminmpec.py [infile] outfile" to regenerate
+#
+# The format used here is: <NUMERICAL_ID><SPACE><NAME>
+# Where SPACE can be any sequence of spaces and tabs.
+#
+"""
+
+DECIMAL_PATTERN = r"^(\d+)"
+# up to three spaces because of formatting errors in the source
+ORGANIZATION_PATTERN = r"^   ?(\S.*)"
+FORMERLY_PATTERN = r" \((formerly .*)\)"
+
+
+def generate_enterprise_files(file_content):
+    # We only care about the "Decimal" and "Organization",
+    # not the contact or email
+    org_lines = []
+    last_updated = ""
+    end_seen = False
+    for line in file_content.splitlines():
+        decimal_match = re.match(DECIMAL_PATTERN, line)
+        if decimal_match:
+            decimal = decimal_match.group(0)
+        elif re.match(ORGANIZATION_PATTERN, line):
+            organization = line.strip()
+            if organization.lower() == "unassigned":
+                continue
+            organization = re.sub(FORMERLY_PATTERN, r"\t# \1", organization)
+            org_lines += [decimal + "\t" + organization]
+        elif "last updated" in line.lower():
+            last_updated = line
+        elif "end of document" in line.lower():
+            end_seen = True
+
+    if not end_seen:
+        raise Exception('"End of Document" not found. Truncated source file?')
+
+    last_updated_line = "# " + last_updated + "\n\n"
+    output = ENTERPRISES_HEADER + last_updated_line + "\n".join(org_lines) + "\n"
+    return output
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Create the enterprises.tsv file.")
+    parser.add_argument('infile', nargs='?')
+    parser.add_argument('outfile', nargs=1)
+    parsed_args = parser.parse_args()
+
+    if parsed_args.infile:
+        with open(parsed_args.infile, encoding='utf-8') as fh:
+            data = fh.read()
+    else:
+        with urllib.request.urlopen(ENTERPRISE_NUMBERS_URL) as f:
+            if f.status != 200:
+                raise Exception("request for " + ENTERPRISE_NUMBERS_URL + " failed with result code " + f.status)
+            data = f.read().decode('utf-8')
+
+    enterprises_content = generate_enterprise_files(data)
+    with open(parsed_args.outfile[0], encoding='utf-8', mode='w') as fh:
+        fh.write(enterprises_content)
+
+
+if __name__ == "__main__":
+    main()