Tools: Port tools/generate_authors.py to Python3.
Port the script that creates AUTHORS to Python3. Ping #18152.
This commit is contained in:
parent
a9bcbaf738
commit
acb8158033
|
@ -3899,7 +3899,7 @@ if (GIT_EXECUTABLE)
|
|||
# Update AUTHORS file with entries from git shortlog
|
||||
add_custom_target(
|
||||
gen-authors
|
||||
COMMAND ${PERL_EXECUTABLE} tools/generate_authors.pl AUTHORS.src > AUTHORS
|
||||
COMMAND ${PYTHON_EXECUTABLE} tools/generate_authors.py AUTHORS.src > AUTHORS
|
||||
WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}
|
||||
)
|
||||
else (GIT_EXECUTABLE)
|
||||
|
|
|
@ -1,157 +0,0 @@
|
|||
#!/usr/bin/perl
|
||||
|
||||
#
|
||||
# Generate the AUTHORS file combining existing AUTHORS file with
|
||||
# git commit log.
|
||||
#
|
||||
# Usage: generate_authors.pl AUTHORS.src
|
||||
|
||||
#
|
||||
# Copyright 2016 Michael Mann (see AUTHORS file)
|
||||
#
|
||||
# Wireshark - Network traffic analyzer
|
||||
# By Gerald Combs <gerald@wireshark.org>
|
||||
# Copyright 1998 Gerald Combs
|
||||
#
|
||||
# SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
use v5.10;
|
||||
|
||||
use warnings;
|
||||
use strict;
|
||||
use open ':std', ':encoding(UTF-8)';
|
||||
|
||||
my $state = "";
|
||||
my %contributors = ();
|
||||
|
||||
my $acknowledgements_heading = "= Acknowledgements =";
|
||||
|
||||
my $git_log_text = "
|
||||
= From git log =
|
||||
|
||||
";
|
||||
|
||||
# Perl trim function to remove whitespace from the start and end of the string
|
||||
sub trim($)
|
||||
{
|
||||
my $string = shift;
|
||||
$string =~ s/^\s+//;
|
||||
$string =~ s/\s+$//;
|
||||
return $string;
|
||||
}
|
||||
|
||||
sub parse_author_name {
|
||||
my $full_name = $_[0];
|
||||
my $email_key;
|
||||
|
||||
if ($full_name =~ /^([\w\.\-\'\x80-\xff]+(\s*[\w+\.\-\'\x80-\xff])*)\s+<([^>]*)>/) {
|
||||
#Make an exception for Gerald because he's part of the header
|
||||
if ($3 ne "gerald[AT]wireshark.org") {
|
||||
$email_key = lc($3);
|
||||
$contributors{$email_key} = $1;
|
||||
say $full_name;
|
||||
}
|
||||
} elsif ($full_name =~ /^([\w\.\-\'\x80-\xff]+(\s*[\w+\.\-\'\x80-\xff])*)\s+\(/) {
|
||||
$contributors{"<no_email>"} = $1;
|
||||
say $full_name;
|
||||
}
|
||||
}
|
||||
|
||||
sub parse_git_name {
|
||||
my $full_name = $_[0];
|
||||
my $name;
|
||||
my $email;
|
||||
my $email_key;
|
||||
my $len;
|
||||
my $ntab = 3;
|
||||
my $line;
|
||||
|
||||
# 4321 Navin R. Johnson <nrjohnson@example.com>
|
||||
if ($full_name =~ /^\s*\d+\s+([^<]*)\s*<([^>]*)>/) {
|
||||
$name = trim($1);
|
||||
#Convert real email address to "spam proof" one
|
||||
$email = trim($2);
|
||||
$email =~ s/@/[AT]/g;
|
||||
$email_key = lc($email);
|
||||
|
||||
if (!exists($contributors{ $email_key })) {
|
||||
#Make an exception for Gerald because he's part of the header
|
||||
if ($email ne "gerald[AT]wireshark.org") {
|
||||
$len = length $name;
|
||||
if ($len >= 8 * $ntab) {
|
||||
$line = "$name <$email>";
|
||||
} else {
|
||||
$ntab -= $len / 8;
|
||||
$ntab +=1 if ($len % 8);
|
||||
$line = $name . "\t" x $ntab . "<$email>";
|
||||
}
|
||||
$contributors{$email_key} = $1;
|
||||
say $line;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
# ---------------------------------------------------------------------
|
||||
#
|
||||
# MAIN
|
||||
#
|
||||
|
||||
open( my $author_fh, '<', $ARGV[0] ) or die "Can't open $ARGV[0]: $!";
|
||||
|
||||
while ( my $line = <$author_fh> ) {
|
||||
chomp $line;
|
||||
|
||||
say $line;
|
||||
|
||||
last if $line eq "= Contributors =";
|
||||
}
|
||||
|
||||
while ( my $line = <$author_fh> ) {
|
||||
chomp $line;
|
||||
|
||||
last if ($line eq $acknowledgements_heading);
|
||||
|
||||
if ($line =~ /([^\{]*)\{/) {
|
||||
parse_author_name($line);
|
||||
$state = "s_in_bracket";
|
||||
} elsif ($state eq "s_in_bracket") {
|
||||
if ($line =~ /([^\}]*)\}/) {
|
||||
say $line;
|
||||
$state = "";
|
||||
} else {
|
||||
say $line;
|
||||
}
|
||||
} elsif ($line =~ /</) {
|
||||
parse_author_name($line);
|
||||
} elsif ($line =~ "(e-mail address removed at contributor's request)") {
|
||||
parse_author_name($line);
|
||||
} else {
|
||||
say $line;
|
||||
}
|
||||
}
|
||||
|
||||
print $git_log_text;
|
||||
|
||||
open( my $git_author_fh, 'git --no-pager shortlog -se HEAD|')
|
||||
or die "Can't execute git shortlog: $!";
|
||||
|
||||
while ( my $git_line = <$git_author_fh> ) {
|
||||
chomp $git_line;
|
||||
|
||||
parse_git_name($git_line);
|
||||
}
|
||||
close $git_author_fh;
|
||||
|
||||
print "\n\n";
|
||||
|
||||
say $acknowledgements_heading;
|
||||
|
||||
while ( my $line = <$author_fh> ) {
|
||||
chomp $line;
|
||||
say $line;
|
||||
}
|
||||
|
||||
close $author_fh;
|
||||
|
||||
__END__
|
|
@ -0,0 +1,134 @@
|
|||
#!/usr/bin/env python
|
||||
|
||||
#
|
||||
# Generate the AUTHORS file combining existing AUTHORS file with
|
||||
# git commit log.
|
||||
#
|
||||
# Usage: generate_authors.py AUTHORS.src
|
||||
|
||||
# Copyright 2022 Moshe Kaplan
|
||||
# Based on generate_authors.pl by Michael Mann
|
||||
#
|
||||
# Wireshark - Network traffic analyzer
|
||||
# By Gerald Combs <gerald@wireshark.org>
|
||||
# Copyright 1998 Gerald Combs
|
||||
#
|
||||
# SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
import argparse
|
||||
import re
|
||||
import subprocess
|
||||
|
||||
|
||||
def get_git_authors():
|
||||
'''
|
||||
Sample line:
|
||||
# 4321 Navin R. Johnson <nrjohnson@example.com>
|
||||
'''
|
||||
GIT_LINE_REGEX = r"^\s*\d+\s+([^<]*)\s*<([^>]*)>"
|
||||
cmd = "git --no-pager shortlog --email --summary HEAD".split(' ')
|
||||
# check_output is used for Python 3.4 compatability
|
||||
git_cmd_output = subprocess.check_output(cmd, universal_newlines=True)
|
||||
|
||||
git_authors = []
|
||||
for line in git_cmd_output.splitlines():
|
||||
# Check if this is needed:
|
||||
line = line.strip()
|
||||
match = re.match(GIT_LINE_REGEX, line)
|
||||
name = match.group(1).strip()
|
||||
email = match.group(2).strip()
|
||||
# Try to lower how much spam people get:
|
||||
email = email.replace('@', '[AT]')
|
||||
git_authors.append((name, email))
|
||||
return git_authors
|
||||
|
||||
|
||||
def extract_contributors(authors_content):
|
||||
# Extract names and email addresses from the AUTHORS file Contributors
|
||||
contributors_content = authors_content.split("= Contributors =", 1)[1]
|
||||
contributors_content = contributors_content.split("= Acknowledgements =", 1)[0]
|
||||
CONTRIBUTOR_LINE_REGEX = r"^([\w\.\-\'\x80-\xff]+(\s*[\w+\.\-\'\x80-\xff])*)\s+<([^>]*)>"
|
||||
contributors = []
|
||||
state = ""
|
||||
for line in contributors_content.splitlines():
|
||||
contributor_match = re.match(CONTRIBUTOR_LINE_REGEX, line)
|
||||
if re.search(r'([^\{]*)\{', line):
|
||||
if contributor_match:
|
||||
name = contributor_match.group(1)
|
||||
email = contributor_match.group(3)
|
||||
contributors.append((name, email))
|
||||
state = "s_in_bracket"
|
||||
elif state == "s_in_bracket":
|
||||
if re.search(r'([^\}]*)\}', line):
|
||||
state = ""
|
||||
elif re.search('<', line):
|
||||
if contributor_match:
|
||||
name = contributor_match.group(1)
|
||||
email = contributor_match.group(3)
|
||||
contributors.append((name, email))
|
||||
elif re.search(r"(e-mail address removed at contributor's request)", line):
|
||||
if contributor_match:
|
||||
name = contributor_match.group(1)
|
||||
email = contributor_match.group(3)
|
||||
contributors.append((name, email))
|
||||
else:
|
||||
pass
|
||||
return contributors
|
||||
|
||||
|
||||
def generate_git_contributors_text(contributors_emails, git_authors_emails):
|
||||
# Track the email addresses seen to avoid including the same email address twice
|
||||
emails_addresses_seen = set()
|
||||
for name, email in contributors_emails:
|
||||
emails_addresses_seen.add(email.lower())
|
||||
|
||||
output_lines = []
|
||||
for name, email in git_authors_emails:
|
||||
if email.lower() in emails_addresses_seen:
|
||||
continue
|
||||
|
||||
# Skip Gerald, since he's part of the header:
|
||||
if email == "gerald[AT]wireshark.org":
|
||||
continue
|
||||
|
||||
ntab = 3
|
||||
if len(name) >= 8*ntab:
|
||||
line = "{name} <{email}>".format(name=name, email=email)
|
||||
else:
|
||||
ntab -= len(name)/8
|
||||
if len(name) % 8:
|
||||
ntab += 1
|
||||
tabs = '\t'*int(ntab)
|
||||
line = "{name}{tabs}<{email}>".format(name=name, tabs=tabs, email=email)
|
||||
|
||||
emails_addresses_seen.add(email.lower())
|
||||
output_lines += [line]
|
||||
return "\n".join(output_lines)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Generate the AUTHORS file combining existing AUTHORS file with git commit log.")
|
||||
parser.add_argument("authors", metavar='authors', nargs=1, help="path to AUTHORS file")
|
||||
parsed_args = parser.parse_args()
|
||||
|
||||
with open(parsed_args.authors[0]) as fh:
|
||||
author_content = fh.read()
|
||||
|
||||
# Collect the listed contributors emails so that we don't duplicate them
|
||||
# in the listing of git contributors
|
||||
contributors_emails = extract_contributors(author_content)
|
||||
git_authors_emails = get_git_authors()
|
||||
# Then generate the text output for git contributors
|
||||
git_contributors_text = generate_git_contributors_text(contributors_emails, git_authors_emails)
|
||||
|
||||
# Now we can write our output:
|
||||
acknowledgements_start = author_content.find("\n\n= Acknowledgements =")
|
||||
before_acknowledgements = author_content[:acknowledgements_start]
|
||||
acknowledgements = author_content[acknowledgements_start:]
|
||||
git_contributor_header = '\n\n\n= From git log =\n\n'
|
||||
output = before_acknowledgements + git_contributor_header + git_contributors_text + '\n' + acknowledgements
|
||||
print(output)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
Loading…
Reference in New Issue