From bd75f5af0a989bb524d005228ac24ac8d55381c1 Mon Sep 17 00:00:00 2001 From: Gerald Combs Date: Mon, 15 Apr 2019 12:20:25 -0700 Subject: [PATCH] checkAPIs: Check for non-UTF-8 instead of non-ASCII. Replace our check for non-ASCII characters with one that checks for a proper UTF-8 encoding. Change-Id: I8386f5d4376b05bc10358c0d2849a214d8ff00a0 Reviewed-on: https://code.wireshark.org/review/32866 Petri-Dish: Gerald Combs Tested-by: Petri Dish Buildbot Reviewed-by: Gerald Combs --- tools/checkAPIs.pl | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/tools/checkAPIs.pl b/tools/checkAPIs.pl index fe9a5c6724..bd8c69e249 100755 --- a/tools/checkAPIs.pl +++ b/tools/checkAPIs.pl @@ -20,6 +20,8 @@ # use strict; +use Encode; +use English; use Getopt::Long; use Text::Balanced qw(extract_bracketed); @@ -1132,8 +1134,9 @@ while ($_ = pop @filelist) $line = 1; while () { $fileContents .= $_; - if ($_ =~ m{ [\x80-\xFF] }xo) { - print STDERR "Error: Found non-ASCII characters on line " .$line. " of " .$filename."\n"; + eval { decode( 'UTF-8', $_, Encode::FB_CROAK ) }; + if ($EVAL_ERROR) { + print STDERR "Error: Found an invalid UTF-8 sequence on line " .$line. " of " .$filename."\n"; $errorCount++; } $line++;