From 72c5efea1b494daca59c33bf5434c886a798c32d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Valverde?= Date: Tue, 23 Nov 2021 13:40:14 +0000 Subject: [PATCH] dfilter: Reject invalid character escape sequences For double quoted strings. This is consistent with single quote character constants and the C standard. It also avoids common mistakes where the superfluous backslash is silently suppressed. --- docbook/release-notes.adoc | 3 ++- docbook/wsug_src/WSUG_chapter_work.adoc | 7 ++----- epan/dfilter/scanner.l | 25 +++++++++++++------------ epan/ftypes/ftype-integer.c | 4 ++-- test/suite_dfilter/group_scanner.py | 6 +++--- 5 files changed, 22 insertions(+), 23 deletions(-) diff --git a/docbook/release-notes.adoc b/docbook/release-notes.adoc index 1ab69ee16d..1239d8606f 100644 --- a/docbook/release-notes.adoc +++ b/docbook/release-notes.adoc @@ -42,7 +42,8 @@ The following features are new (or have been significantly updated) since versio ** Adds support for some additional character escape sequences in double quoted strings. Besides octal and hex byte specification the following C escape sequences are now supported with the same meaning: \a, \b, \f, \n, \r, \t, \v. Previously they were only supported with character constants. - Note that unrecognized escape sequences are treated as a literal character. This has not changed from previous versions. +** Unrecognized escape sequences are now treated as a syntax error. Previously they were treated as a literal character. + In addition to the sequences indicated above, backslash, single quotation and double quotation mark are also valid sequences: \\, \', \". ** The display filter engine now uses PCRE2 instead of GRegex (GLib bindings to the older end-of-life PCRE library). PCRE2 is compatible with PCRE so the user-visible changes should be minimal. Some exotic patterns may now be invalid and require rewriting. diff --git a/docbook/wsug_src/WSUG_chapter_work.adoc b/docbook/wsug_src/WSUG_chapter_work.adoc index 9dc613e1f4..317beb9a56 100644 --- a/docbook/wsug_src/WSUG_chapter_work.adoc +++ b/docbook/wsug_src/WSUG_chapter_work.adoc @@ -648,7 +648,7 @@ i.e. the SYN bit, set. ==== Possible Pitfalls Using Regular Expressions String literals containing regular expressions are parsed twice. Once by Wireshark's display -filter engine and again by the PCRE library. It's important to keep this in mind when using +filter engine and again by the PCRE2 library. It's important to keep this in mind when using the "matches" operator with regex escape sequences and special characters. For example the filter expression `+frame matches "AB\x43"+` uses the string `+"ABC"+` as input @@ -661,10 +661,7 @@ code for `(` the pattern input to PCRE is `+"bar("+`. This regular expression is invalid (missing closing parenthesis). To match a literal parenthesis in a display filter regular expression it must be escaped (twice) with backslashes. -Another common pitfall is using `\.` instead of `\\.` in a regular expression. The former -will match any character (the backslash is superfluous) while the latter will match a literal dot. - -TIP: Using raw strings avoids most problem with the "matches" operator and double escapes. +TIP: Using raw strings avoids most problem with the "matches" operator and double escape requirements. ==== Combining Expressions diff --git a/epan/dfilter/scanner.l b/epan/dfilter/scanner.l index a82e143013..15e7419f16 100644 --- a/epan/dfilter/scanner.l +++ b/epan/dfilter/scanner.l @@ -79,7 +79,7 @@ df_lval_t *df_lval; static int set_lval_str(int token, const char *token_value); #define simple(token) set_lval_str(token, yytext) -static GString *append_escaped_char(GString *str, char c); +static gboolean append_escaped_char(dfwork_t *dfw, GString *str, char c); /* * Sleazy hack to suppress compiler warnings in yy_fatal_error(). @@ -255,8 +255,8 @@ static GString *append_escaped_char(GString *str, char c); if (yyextra->raw_string) { g_string_append(yyextra->quoted_string, yytext); } - else { - append_escaped_char(yyextra->quoted_string, yytext[1]); + else if (!append_escaped_char(yyextra->dfw, yyextra->quoted_string, yytext[1])) { + return SCAN_FAILED; } } @@ -348,8 +348,8 @@ set_lval_str(int token, const char *token_value) return token; } -static GString * -append_escaped_char(GString *str, char c) +static gboolean +append_escaped_char(dfwork_t *dfw, GString *str, char c) { switch (c) { case 'a': @@ -373,14 +373,15 @@ append_escaped_char(GString *str, char c) case 'v': c = '\v'; break; - default: - /* Unrecognized escapes are treated as a literal character. - * If this is turned into an error instead (which is a backward - * incompatibility but arguably the right thing to do) - * we need to take care to accept all valid sequences - * (like \" and \\). */ + case '\\': + case '\'': + case '\"': break; + default: + dfilter_fail(dfw, "\\%c is not a valid character escape sequence", c); + return FALSE; } - return g_string_append_c(str, c); + g_string_append_c(str, c); + return TRUE; } diff --git a/epan/ftypes/ftype-integer.c b/epan/ftypes/ftype-integer.c index 37177d2047..cb58be2155 100644 --- a/epan/ftypes/ftype-integer.c +++ b/epan/ftypes/ftype-integer.c @@ -59,8 +59,8 @@ parse_charconst(const char *s, unsigned long *valuep, gchar **err_msg) /* * C escape sequence. * An escape sequence is an octal number \NNN, - * an hex number \xNN, or one of \' \" \? \\ \a \b \f \n \r - * \t \v that stands for the byte value of the equivalent + * an hex number \xNN, or one of \' \" \\ \a \b \f \n \r \t \v + * that stands for the byte value of the equivalent * C-escape in ASCII encoding. */ cp++; diff --git a/test/suite_dfilter/group_scanner.py b/test/suite_dfilter/group_scanner.py index 4144a0fe27..d04eccb9c7 100644 --- a/test/suite_dfilter/group_scanner.py +++ b/test/suite_dfilter/group_scanner.py @@ -31,6 +31,6 @@ class case_scanner(unittest.TestCase): dfilter = 'http.request.method == "\\111EAD"' checkDFilterCount(dfilter, 0) - def test_dquote_6(self, checkDFilterCount): - dfilter = 'http.request.method == "\\HEAD"' - checkDFilterCount(dfilter, 1) + def test_dquote_6(self, checkDFilterFail): + dfilter = r'http.request.method == "\HEAD"' + checkDFilterFail(dfilter, 'not a valid character escape sequence')