dfilter: Reject invalid character escape sequences

For double quoted strings. This is consistent with single quote
character constants and the C standard. It also avoids common
mistakes where the superfluous backslash is silently suppressed.
This commit is contained in:
João Valverde 2021-11-23 13:40:14 +00:00
parent bbaa144b3c
commit 72c5efea1b
5 changed files with 22 additions and 23 deletions

View File

@ -42,7 +42,8 @@ The following features are new (or have been significantly updated) since versio
** Adds support for some additional character escape sequences in double quoted strings.
Besides octal and hex byte specification the following C escape sequences are now supported with the same meaning: \a, \b, \f, \n, \r, \t, \v.
Previously they were only supported with character constants.
Note that unrecognized escape sequences are treated as a literal character. This has not changed from previous versions.
** Unrecognized escape sequences are now treated as a syntax error. Previously they were treated as a literal character.
In addition to the sequences indicated above, backslash, single quotation and double quotation mark are also valid sequences: \\, \', \".
** The display filter engine now uses PCRE2 instead of GRegex (GLib bindings to the older end-of-life PCRE library).
PCRE2 is compatible with PCRE so the user-visible changes should be minimal.
Some exotic patterns may now be invalid and require rewriting.

View File

@ -648,7 +648,7 @@ i.e. the SYN bit, set.
==== Possible Pitfalls Using Regular Expressions
String literals containing regular expressions are parsed twice. Once by Wireshark's display
filter engine and again by the PCRE library. It's important to keep this in mind when using
filter engine and again by the PCRE2 library. It's important to keep this in mind when using
the "matches" operator with regex escape sequences and special characters.
For example the filter expression `+frame matches "AB\x43"+` uses the string `+"ABC"+` as input
@ -661,10 +661,7 @@ code for `(` the pattern input to PCRE is `+"bar("+`. This regular expression is
invalid (missing closing parenthesis). To match a literal parenthesis in a display filter regular
expression it must be escaped (twice) with backslashes.
Another common pitfall is using `\.` instead of `\\.` in a regular expression. The former
will match any character (the backslash is superfluous) while the latter will match a literal dot.
TIP: Using raw strings avoids most problem with the "matches" operator and double escapes.
TIP: Using raw strings avoids most problem with the "matches" operator and double escape requirements.
==== Combining Expressions

View File

@ -79,7 +79,7 @@ df_lval_t *df_lval;
static int set_lval_str(int token, const char *token_value);
#define simple(token) set_lval_str(token, yytext)
static GString *append_escaped_char(GString *str, char c);
static gboolean append_escaped_char(dfwork_t *dfw, GString *str, char c);
/*
* Sleazy hack to suppress compiler warnings in yy_fatal_error().
@ -255,8 +255,8 @@ static GString *append_escaped_char(GString *str, char c);
if (yyextra->raw_string) {
g_string_append(yyextra->quoted_string, yytext);
}
else {
append_escaped_char(yyextra->quoted_string, yytext[1]);
else if (!append_escaped_char(yyextra->dfw, yyextra->quoted_string, yytext[1])) {
return SCAN_FAILED;
}
}
@ -348,8 +348,8 @@ set_lval_str(int token, const char *token_value)
return token;
}
static GString *
append_escaped_char(GString *str, char c)
static gboolean
append_escaped_char(dfwork_t *dfw, GString *str, char c)
{
switch (c) {
case 'a':
@ -373,14 +373,15 @@ append_escaped_char(GString *str, char c)
case 'v':
c = '\v';
break;
default:
/* Unrecognized escapes are treated as a literal character.
* If this is turned into an error instead (which is a backward
* incompatibility but arguably the right thing to do)
* we need to take care to accept all valid sequences
* (like \" and \\). */
case '\\':
case '\'':
case '\"':
break;
default:
dfilter_fail(dfw, "\\%c is not a valid character escape sequence", c);
return FALSE;
}
return g_string_append_c(str, c);
g_string_append_c(str, c);
return TRUE;
}

View File

@ -59,8 +59,8 @@ parse_charconst(const char *s, unsigned long *valuep, gchar **err_msg)
/*
* C escape sequence.
* An escape sequence is an octal number \NNN,
* an hex number \xNN, or one of \' \" \? \\ \a \b \f \n \r
* \t \v that stands for the byte value of the equivalent
* an hex number \xNN, or one of \' \" \\ \a \b \f \n \r \t \v
* that stands for the byte value of the equivalent
* C-escape in ASCII encoding.
*/
cp++;

View File

@ -31,6 +31,6 @@ class case_scanner(unittest.TestCase):
dfilter = 'http.request.method == "\\111EAD"'
checkDFilterCount(dfilter, 0)
def test_dquote_6(self, checkDFilterCount):
dfilter = 'http.request.method == "\\HEAD"'
checkDFilterCount(dfilter, 1)
def test_dquote_6(self, checkDFilterFail):
dfilter = r'http.request.method == "\HEAD"'
checkDFilterFail(dfilter, 'not a valid character escape sequence')