wireshark/tools/make-manuf

307 lines
9.2 KiB
Perl
Executable File

#!/usr/bin/perl -w
#
# Make-manuf - Creates a file containing ethernet OUIs and their
# company IDs. It merges the databases at IEEE with entries in our
# template file. Our file in turn contains entries from
# http://www.cavebear.com/archive/cavebear/Ethernet/Ethernet.txt
# along with our own.
#
# The script reads the comments at the top of "manuf.tmpl" and writes
# them to "manuf". It then joins the manufacturer listing in "manuf.tmpl"
# with the listing in "oui.txt", "iab.txt", etc, with the entries in
# "manuf.tmpl" taking precedence.
# LWP is part of the standard Perl module libwww
# As of April 2012 the IEEE content is mostly UTF-8 encoded although some
# of the entries feature sequences listed at
# http://www.i18nqa.com/debug/utf8-debug.html
# As of October 2016 the download links at
# http://standards.ieee.org/develop/regauth/
# point to CSV files. We might want to download and parse those in favor
# of our current text munging.
use Encode;
use open ':encoding(utf8)';
use Text::Autoformat;
eval "require LWP::UserAgent;";
if( $@ ) {
die "LWP isn't installed. It is part of the standard Perl\n" .
" module libwww. Bailing.\n";
}
$agent = LWP::UserAgent->new;
$agent->env_proxy;
$agent->agent("Wireshark make-manuf");
$template = "manuf.tmpl";
$outfile = "manuf";
$inheader = 1;
$oui_url = "http://standards.ieee.org/develop/regauth/oui/oui.txt";
$cid_url = "http://standards.ieee.org/develop/regauth/cid/cid.txt";
$iab_url = "http://standards.ieee.org/develop/regauth/iab/iab.txt";
$oui28_url = "http://standards.ieee.org/develop/regauth/oui28/oui28.txt";
$oui36_url = "http://standards.ieee.org/develop/regauth/oui36/oui36.txt";
%oui_list = ();
$hp = "[0-9a-fA-F]{2}";
$oui_re = "$hp:$hp:$hp";
$ieee_re = "$hp-$hp-$hp";
$min_entries = 1000;
$min_cid_entries = 30;
$min_total = 27000; # 27196 as of 2016-10-02
$tmpl_added = 0;
$oui_added = 0;
$oui_skipped = 0;
$oui_total = 0;
$iab_added = 0;
$iab_skipped = 0;
$iab_total = 0;
$oui28_added = 0;
$oui28_skipped = 0;
$oui28_total = 0;
$oui36_added = 0;
$oui36_skipped = 0;
$oui36_total = 0;
$cid_skipped = 0;
$cid_total = 0;
sub shorten
{
my $origmanuf = shift;
# Trim leading (probably not needed) & trailing (absolutely needed) space.
$origmanuf =~ s/^\s+|\s+$//g;
my $manuf = " " . $origmanuf . " ";
# Remove any punctuation
$manuf =~ tr/',.()/ /;
# & isn't needed when Standalone
$manuf =~ s/ \& / /g;
# Remove any "the", "inc", "plc" ...
$manuf =~ s/\s(the|inc|incorporated|plc||systems|corp|corporation|s\/a|a\/s|ab|ag|kg|gmbh|co|company|limited|ltd|holding|spa)(?= )//gi;
# Convert to consistent case
$manuf =~ s/(\w+)/\u\L$1/g;
# Remove all spaces
$manuf =~ s/\s+//g;
# Truncate all names to a reasonable length, say, 8 characters.
# If the string contains UTF-8, this may be substantially more than 8 bytes.
$manuf = substr($manuf, 0, 8);
if ($manuf =~ /\Q$origmanuf\E/i) {
return $manuf;
} else {
my $mixedcase = $origmanuf;
#If company is all caps, convert to mixed case (so it doesn't look like we're screaming the company name)
if ($origmanuf =~ /^[A-Z\s\.\,]+$/) {
$mixedcase = autoformat $origmanuf, { case => 'title' };
#autoformat produces some unnecessary linebreaks
chomp($mixedcase);
chomp($mixedcase);
}
return sprintf("%s\t%s", $manuf, $mixedcase);
}
}
sub fetch
{
my $url = shift;
print "Fetching $url.\n";
$request = HTTP::Request->new(GET => $url);
$result = $agent->request($request);
if (!$result->is_success) {
die ("Error fetching $url: " . $result->status_line . "\n");
}
return decode("utf-8", $result->content);
}
# Write out the header and populate the OUI list with our entries.
open (TMPL, "< $template") ||
die "Couldn't open template file for reading ($template)\n";
while ($line = <TMPL>) {
chomp($line);
if ($line !~ /^$oui_re\s+\S/ && $inheader) {
$header .= "$line\n";
} elsif (($oui, $manuf) = ($line =~ /^($oui_re)\s+(\S.*)$/)) {
$inheader = 0;
# Ensure OUI is all upper-case
$oui =~ tr/a-f/A-F/;
# $oui_list{$oui} = &shorten($manuf);
$oui_list{$oui} = $manuf;
$tmpl_added++;
}
}
# Add IEEE entries for IABs
$ieee_list = fetch($iab_url);
foreach $line (split(/[\r\n]+/, $ieee_list)) {
# determine the OUI used for IAB (currently only 00-50-C2)
if (($iab_tmp, $manuf) = ($line =~ /^\s*($ieee_re)\s+\(hex\)\s+(\S.*)$/)) {
$iab_base = $iab_tmp;
}
# determine next two bytes
if (($iab4, $iab5, $manuf) = ($line =~ /^\s*($hp)($hp)$hp-$hp$hp$hp\s+\(base\s16\)\s+(\S.*)$/)) {
$iab = "$iab_base:$iab4:$iab5:00/36";
$iab =~ tr /-/:/; # The IEEE bytes are separated by dashes.
# Ensure IAB is all upper-case
$iab =~ tr/a-f/A-F/;
if (exists $oui_list{$iab}) {
printf "$iab - Skipping IEEE \"$manuf\" in favor of \"$oui_list{$iab}\"\n";
$iab_skipped++;
} else {
$oui_list{$iab} = &shorten($manuf);
$iab_added++;
}
}
$iab_total++;
}
if ($iab_total < $min_entries) { die "Too few IAB entries ($iab_total)\n"; }
# Add IEEE entries for OUI-28
$ieee_list = fetch($oui28_url);
foreach $line (split(/[\r\n]+/, $ieee_list)) {
chomp($line);
# determine the OUI used for OUI-28
if (($oui28_tmp, $manuf) = ($line =~ /^\s*($ieee_re)\s+\(hex\)\s+(\S.*)$/)) {
$oui28_base = $oui28_tmp;
}
# determine next two bytes
if (($oui28_4, $oui28_5, $manuf) = ($line =~ /^\s*($hp)($hp)$hp-$hp$hp$hp\s+\(base\s16\)\s+(\S.*)$/)) {
$oui28 = "$oui28_base:$oui28_4:$oui28_5:00/28";
$oui28 =~ tr /-/:/; # The IEEE bytes are separated by dashes.
# Ensure OUI-28 is all upper-case
$oui28 =~ tr/a-f/A-F/;
if (exists $oui_list{$oui28}) {
printf "$oui28 - Skipping IEEE \"$manuf\" in favor of \"$oui_list{$oui28}\"\n";
$oui28_skipped++;
} else {
$oui_list{$oui28} = &shorten($manuf);
$oui28_added++;
}
}
$oui28_total++;
}
if ($oui28_total < $min_entries) { die "Too few OUI-28 entries ($oui28_total)\n"; }
# Add IEEE entries for OUI-36
$ieee_list = fetch($oui36_url);
foreach $line (split(/[\r\n]+/, $ieee_list)) {
chomp($line);
# determine the OUI used for OUI-36 (currently only 00-1B-C5)
if (($oui36_tmp, $manuf) = ($line =~ /^\s*($ieee_re)\s+\(hex\)\s+(\S.*)$/)) {
$oui36_base = $oui36_tmp;
}
# determine next two bytes
if (($oui36_4, $oui36_5, $manuf) = ($line =~ /^\s*($hp)($hp)$hp-$hp$hp$hp\s+\(base\s16\)\s+(\S.*)$/)) {
$oui36 = "$oui36_base:$oui36_4:$oui36_5:00/36";
$oui36 =~ tr /-/:/; # The IEEE bytes are separated by dashes.
# Ensure OUI-36 is all upper-case
$oui36 =~ tr/a-f/A-F/;
if (exists $oui_list{$oui36}) {
printf "$oui36 - Skipping IEEE \"$manuf\" in favor of \"$oui_list{$oui36}\"\n";
$oui36_skipped++;
} else {
$oui_list{$oui36} = &shorten($manuf);
$oui36_added++;
}
}
$oui36_total++;
}
if ($oui36_total < $min_entries) { die "Too few OUI-36 entries ($oui36_total)\n"; }
# Add IEEE entries for CIDs.
$ieee_list = fetch($cid_url);
foreach $line (split(/[\r\n]+/, $ieee_list)) {
if (($cid, $manuf) = ($line =~ /^\s*($ieee_re)\s+\(hex\)\s+(\S.*)$/)) {
$cid =~ tr /-/:/; # The IEEE bytes are separated by dashes.
# Ensure OUI is all upper-case
$cid =~ tr/a-f/A-F/;
if (exists $oui_list{$cid}) {
printf "$cid - Skipping IEEE \"$manuf\" in favor of \"$oui_list{$cid}\"\n";
$cid_skipped++;
} else {
$oui_list{$cid} = &shorten($manuf);
$cid_added++;
}
}
$cid_total++;
}
if ($cid_total < $min_cid_entries) { die "Too few CID entries ($cid_total)\n"; }
# Add IEEE entries for OUIs not yet known.
$ieee_list = fetch($oui_url);
foreach $line (split(/[\r\n]+/, $ieee_list)) {
if (($oui, $manuf) = ($line =~ /^\s*($ieee_re)\s+\(hex\)\s+(\S.*)$/)) {
$oui =~ tr /-/:/; # The IEEE bytes are separated by dashes.
# Ensure OUI is all upper-case
$oui =~ tr/a-f/A-F/;
if (exists $oui_list{$oui}) {
printf "$oui - Skipping IEEE \"$manuf\" in favor of \"$oui_list{$oui}\"\n";
$oui_skipped++;
} else {
$oui_list{$oui} = &shorten($manuf);
$oui_added++;
}
}
$oui_total++;
}
if ($oui_total < $min_entries) { die "Too few OUI entries ($oui_total)\n"; }
$total_added = $tmpl_added + $oui_added + $iab_added + $oui36_added + $cid_added;
if ($total_added < $min_total) { die "Too few total entries ($total_added)\n"; }
# Write output file
open (OUT, "> $outfile") ||
die "Couldn't open output file for writing ($outfile)\n";
print(OUT "# This file was generated by running ./tools/make-manuf.\n");
print(OUT "# Don't change it directly, change manuf.tmpl instead.\n#\n");
print(OUT "$header");
foreach $oui (sort(keys %oui_list)) {
print(OUT "$oui\t$oui_list{$oui}\n");
}
print <<"Fin"
Original entries : $tmpl_added
IEEE OUI added : $oui_added
IEEE IAB added : $iab_added
IEEE OUI28 added : $oui28_added
IEEE OUI36 added : $oui36_added
IEEE CID added : $cid_added
Total added : $total_added
IEEE OUI total : $oui_total
IEEE IAB total : $iab_total
IEEE OUI28 total : $oui28_total
IEEE OUI36 total : $oui36_total
IEEE CID total : $cid_total
IEEE OUI skipped : $oui_skipped
IEEE IAB skipped : $iab_skipped
IEEE OUI28 skipped : $oui28_skipped
IEEE OUI36 skipped : $oui36_skipped
IEEE CID skipped : $cid_skipped
Fin