dfilter: Reject invalid character escape sequences
For double quoted strings. This is consistent with single quote character constants and the C standard. It also avoids common mistakes where the superfluous backslash is silently suppressed.
This commit is contained in:
parent
bbaa144b3c
commit
72c5efea1b
|
@ -42,7 +42,8 @@ The following features are new (or have been significantly updated) since versio
|
|||
** Adds support for some additional character escape sequences in double quoted strings.
|
||||
Besides octal and hex byte specification the following C escape sequences are now supported with the same meaning: \a, \b, \f, \n, \r, \t, \v.
|
||||
Previously they were only supported with character constants.
|
||||
Note that unrecognized escape sequences are treated as a literal character. This has not changed from previous versions.
|
||||
** Unrecognized escape sequences are now treated as a syntax error. Previously they were treated as a literal character.
|
||||
In addition to the sequences indicated above, backslash, single quotation and double quotation mark are also valid sequences: \\, \', \".
|
||||
** The display filter engine now uses PCRE2 instead of GRegex (GLib bindings to the older end-of-life PCRE library).
|
||||
PCRE2 is compatible with PCRE so the user-visible changes should be minimal.
|
||||
Some exotic patterns may now be invalid and require rewriting.
|
||||
|
|
|
@ -648,7 +648,7 @@ i.e. the SYN bit, set.
|
|||
==== Possible Pitfalls Using Regular Expressions
|
||||
|
||||
String literals containing regular expressions are parsed twice. Once by Wireshark's display
|
||||
filter engine and again by the PCRE library. It's important to keep this in mind when using
|
||||
filter engine and again by the PCRE2 library. It's important to keep this in mind when using
|
||||
the "matches" operator with regex escape sequences and special characters.
|
||||
|
||||
For example the filter expression `+frame matches "AB\x43"+` uses the string `+"ABC"+` as input
|
||||
|
@ -661,10 +661,7 @@ code for `(` the pattern input to PCRE is `+"bar("+`. This regular expression is
|
|||
invalid (missing closing parenthesis). To match a literal parenthesis in a display filter regular
|
||||
expression it must be escaped (twice) with backslashes.
|
||||
|
||||
Another common pitfall is using `\.` instead of `\\.` in a regular expression. The former
|
||||
will match any character (the backslash is superfluous) while the latter will match a literal dot.
|
||||
|
||||
TIP: Using raw strings avoids most problem with the "matches" operator and double escapes.
|
||||
TIP: Using raw strings avoids most problem with the "matches" operator and double escape requirements.
|
||||
|
||||
==== Combining Expressions
|
||||
|
||||
|
|
|
@ -79,7 +79,7 @@ df_lval_t *df_lval;
|
|||
|
||||
static int set_lval_str(int token, const char *token_value);
|
||||
#define simple(token) set_lval_str(token, yytext)
|
||||
static GString *append_escaped_char(GString *str, char c);
|
||||
static gboolean append_escaped_char(dfwork_t *dfw, GString *str, char c);
|
||||
|
||||
/*
|
||||
* Sleazy hack to suppress compiler warnings in yy_fatal_error().
|
||||
|
@ -255,8 +255,8 @@ static GString *append_escaped_char(GString *str, char c);
|
|||
if (yyextra->raw_string) {
|
||||
g_string_append(yyextra->quoted_string, yytext);
|
||||
}
|
||||
else {
|
||||
append_escaped_char(yyextra->quoted_string, yytext[1]);
|
||||
else if (!append_escaped_char(yyextra->dfw, yyextra->quoted_string, yytext[1])) {
|
||||
return SCAN_FAILED;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -348,8 +348,8 @@ set_lval_str(int token, const char *token_value)
|
|||
return token;
|
||||
}
|
||||
|
||||
static GString *
|
||||
append_escaped_char(GString *str, char c)
|
||||
static gboolean
|
||||
append_escaped_char(dfwork_t *dfw, GString *str, char c)
|
||||
{
|
||||
switch (c) {
|
||||
case 'a':
|
||||
|
@ -373,14 +373,15 @@ append_escaped_char(GString *str, char c)
|
|||
case 'v':
|
||||
c = '\v';
|
||||
break;
|
||||
default:
|
||||
/* Unrecognized escapes are treated as a literal character.
|
||||
* If this is turned into an error instead (which is a backward
|
||||
* incompatibility but arguably the right thing to do)
|
||||
* we need to take care to accept all valid sequences
|
||||
* (like \" and \\). */
|
||||
case '\\':
|
||||
case '\'':
|
||||
case '\"':
|
||||
break;
|
||||
default:
|
||||
dfilter_fail(dfw, "\\%c is not a valid character escape sequence", c);
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
return g_string_append_c(str, c);
|
||||
g_string_append_c(str, c);
|
||||
return TRUE;
|
||||
}
|
||||
|
|
|
@ -59,8 +59,8 @@ parse_charconst(const char *s, unsigned long *valuep, gchar **err_msg)
|
|||
/*
|
||||
* C escape sequence.
|
||||
* An escape sequence is an octal number \NNN,
|
||||
* an hex number \xNN, or one of \' \" \? \\ \a \b \f \n \r
|
||||
* \t \v that stands for the byte value of the equivalent
|
||||
* an hex number \xNN, or one of \' \" \\ \a \b \f \n \r \t \v
|
||||
* that stands for the byte value of the equivalent
|
||||
* C-escape in ASCII encoding.
|
||||
*/
|
||||
cp++;
|
||||
|
|
|
@ -31,6 +31,6 @@ class case_scanner(unittest.TestCase):
|
|||
dfilter = 'http.request.method == "\\111EAD"'
|
||||
checkDFilterCount(dfilter, 0)
|
||||
|
||||
def test_dquote_6(self, checkDFilterCount):
|
||||
dfilter = 'http.request.method == "\\HEAD"'
|
||||
checkDFilterCount(dfilter, 1)
|
||||
def test_dquote_6(self, checkDFilterFail):
|
||||
dfilter = r'http.request.method == "\HEAD"'
|
||||
checkDFilterFail(dfilter, 'not a valid character escape sequence')
|
||||
|
|
Loading…
Reference in New Issue