From ed0741ffbd87d113033e0b1d9a4ac80c93a1f1e7 Mon Sep 17 00:00:00 2001 From: Peter Wu Date: Sat, 22 Sep 2018 13:47:22 +0200 Subject: [PATCH] fix-encoding-args.pl: fix terrible performance with large files "fix-encoding-args.pl epan/dissectors/packet-ieee80211.c" used to take over 12 seconds to complete. After this change it is reduced to 400ms. Profiling with Devel::NYTProf showed two issues: - find_hf_array_entries (5 seconds): matching leading whitespace triggers a candidate match against every line. Fix this by removing whitespace prior to matching. - fix_encoding_args_by_hf_type (7.5 seconds): executing 2131 different substitution patterns is slow. Fix this by grouping field names and execute the substitution only once afterwards (in total 6 calls). packet-rrc.c is by far the largest file with 215k lines, this used to take forever (321s) and now completes in 1.3s. Regression tested by removing "ENC_ASCII" and "ENC_UTF_8" in dissect_venue_name_info, the expected warnings are still visible. Change-Id: I071038e8fcb56474ac41223568ce6724258c059d Reviewed-on: https://code.wireshark.org/review/29789 Petri-Dish: Peter Wu Tested-by: Petri Dish Buildbot Reviewed-by: Anders Broman --- tools/fix-encoding-args.pl | 31 +++++++++++++++++++------------ 1 file changed, 19 insertions(+), 12 deletions(-) diff --git a/tools/fix-encoding-args.pl b/tools/fix-encoding-args.pl index a4c4887914..f05163ce97 100755 --- a/tools/fix-encoding-args.pl +++ b/tools/fix-encoding-args.pl @@ -390,22 +390,22 @@ sub find_hf_array_entries { } } + # pre-process contents to fold multiple lines and speed up matching. + $fileContentsWithoutComments =~ s/\s*=\s*/=/gs; + $fileContentsWithoutComments =~ s/^\s+//g; + # RegEx to get "proto" variable name my $protoRegEx = qr / - ^ \s* # note m modifier below + ^ # note m modifier below ( [a-zA-Z0-9_]+ ) - \s* = - \s* - proto_register_protocol - \s* - \( - /xoms; + proto_register_protocol\b + /xom; # Find all registered protocols - while ($fileContentsWithoutComments =~ m { $protoRegEx }xgioms ) { + while ($fileContentsWithoutComments =~ m { $protoRegEx }xgom ) { ##print "$1\n"; if (exists $hfArrayEntryFieldType{$1}) { printf "%-35.35s: ? duplicate 'proto': no fixes done for: $1; manual action may be req'd\n", $fileName; @@ -517,8 +517,8 @@ sub find_hf_array_entries { # - ref to array containing hf[] types to be processed (FT_STRING, etc) # - ref to hash containing search (keys) and replacement (values) for encoding arg # fcn_name string -# ref to hfArrayEntries hash (key: hf name; value: field type) # ref to string containing file contents +# ref to hfArrayEntries hash (key: hf name; value: field type) # filename string { # block begin @@ -573,24 +573,32 @@ sub find_hf_array_entries { $encArgPat = qr / [^,)]+? /x; } + my @hf_index_names; + # For each hf[] entry which matches a type in %hfTypes do replacements $found = 0; foreach my $key (keys %$hfArrayEntryFieldTypeHRef) { $hf_index_name = $key; - $hf_index_name =~ s{ ( \[ | \] ) }{\\$1}xg; # escape any "[" or "]" characters $hf_field_type = $$hfArrayEntryFieldTypeHRef{$key}; ##printf "--> %-35.35s: %s\n", $hf_index_name, $hf_field_type; next unless exists $hfTypes{$hf_field_type}; # Do we want to process for this hf[] entry type ? + ##print "\n$hf_index_name $hf_field_type\n"; + push @hf_index_names, $hf_index_name; + } + + if (@hf_index_names) { # build the complete pattern + my $hf_index_names_re = join('|', @hf_index_names); + $hf_index_names_re =~ s/\[|\]/\\$&/g; # escape any "[" or "]" characters my $patRegEx = qr / # part 1: $1 ( $fcn_name \s* \( [^;]+? ,\s* - $hf_index_name + (?:$hf_index_names_re) \s*, [^;]+ ,\s* @@ -607,7 +615,6 @@ sub find_hf_array_entries { ) /xs; - ##print "\n$hf_index_name $hf_field_type\n"; ##print "\n$patRegEx\n"; ## Match and substitute as specified