dfilter: Add support for raw strings

Add support for a literal string specification copied from Python
raw strings[1].

Raw string literals are enclosed with r"..." or R"...". Double quotes
can be include in the string but they must be escaped with backslash.
In escape sequences backslashes are preserved in the final result.

So for example the string "a\\\"b" is the same as r"a\"b".

r"\\\a" is the same as "\\\\\\a".

Raw strings should be used for convenience wherever a regular expression
is used in a display filter expression.

[1]https://docs.python.org/3/reference/lexical_analysis.html#string-and-bytes-literals
This commit is contained in:
João Valverde 2021-05-30 03:38:12 +01:00
parent eaa4a7022b
commit 85c257431f
5 changed files with 52 additions and 13 deletions

View File

@ -56,6 +56,11 @@ They previously shipped with Npcap 1.20.
Also the timestamp format now allows the second-fractions to be placed anywhere in the timestamp and it will be stored with
nanosecond instead of microsecond precision.
* Display filter literal strings can now be specified using raw string syntax,
identical to raw strings in the Python programming language. This is useful
to avoid the complexity of using two levels of character escapes with regular
expressions.
* Significant RTP Player redesign and improvements (see Wireshark User Documentation,
https://www.wireshark.org/docs/wsug_html_chunked/ChTelPlayingCalls.html[Playing VoIP Calls] and
https://www.wireshark.org/docs/wsug_html_chunked/_rtp.html#ChTelRtpPlayer[RTP Player Window])

View File

@ -50,6 +50,7 @@ typedef struct {
typedef struct {
dfwork_t *dfw;
GString* quoted_string;
gboolean raw_string;
gboolean in_set; /* true if parsing set elements for the membership operator */
} df_scanner_state_t;

View File

@ -240,6 +240,7 @@ dfilter_compile(const gchar *text, dfilter_t **dfp, gchar **err_msg)
state.dfw = dfw;
state.quoted_string = NULL;
state.in_set = FALSE;
state.raw_string = FALSE;
df_set_extra(&state, scanner);

View File

@ -219,7 +219,7 @@ static void mark_lval_deprecated(const char *s);
return SCAN_FAILED;
}
\042 {
[rR]{0,1}\042 {
/* start quote of a quoted string */
/* The example of how to scan for strings was taken from
the flex 2.5.4 manual, from the section "Start Conditions".
@ -238,6 +238,21 @@ static void mark_lval_deprecated(const char *s);
about to set it in the next line. */
}
yyextra->quoted_string = g_string_new("");
if (yytext[0] == 'r' || yytext[0] == 'R') {
/*
* This is a raw string (like in Python). Rules: 1) The two
* escape sequences are \\ and \". 2) Backslashes are
* preserved. 3) Double quotes in the string must be escaped.
* Corollary: Strings cannot end with an odd number of
* backslashes.
* Example: r"a\b\x12\"\\" is the string (including the implicit NUL terminator)
* {'a', '\\', 'b', '\\', 'x', '1', '2', '\\', '"', '\\'. '\\', '\0'}
*/
yyextra->raw_string = TRUE;
}
else {
yyextra->raw_string = FALSE;
}
}
<DQUOTE><<EOF>> {
@ -263,28 +278,43 @@ static void mark_lval_deprecated(const char *s);
<DQUOTE>\\[0-7]{1,3} {
/* octal sequence */
unsigned long result;
result = strtoul(yytext + 1, NULL, 8);
if (result > 0xff) {
g_string_free(yyextra->quoted_string, TRUE);
yyextra->quoted_string = NULL;
dfilter_fail(yyextra->dfw, "%s is larger than 255.", yytext);
return SCAN_FAILED;
if (yyextra->raw_string) {
g_string_append(yyextra->quoted_string, yytext);
}
else {
unsigned long result;
result = strtoul(yytext + 1, NULL, 8);
if (result > 0xff) {
g_string_free(yyextra->quoted_string, TRUE);
yyextra->quoted_string = NULL;
dfilter_fail(yyextra->dfw, "%s is larger than 255.", yytext);
return SCAN_FAILED;
}
g_string_append_c(yyextra->quoted_string, (gchar) result);
}
g_string_append_c(yyextra->quoted_string, (gchar) result);
}
<DQUOTE>\\x[[:xdigit:]]{1,2} {
/* hex sequence */
unsigned long result;
result = strtoul(yytext + 2, NULL, 16);
g_string_append_c(yyextra->quoted_string, (gchar) result);
if (yyextra->raw_string) {
g_string_append(yyextra->quoted_string, yytext);
}
else {
unsigned long result;
result = strtoul(yytext + 2, NULL, 16);
g_string_append_c(yyextra->quoted_string, (gchar) result);
}
}
<DQUOTE>\\. {
/* escaped character */
g_string_append_c(yyextra->quoted_string, yytext[1]);
if (yyextra->raw_string) {
g_string_append(yyextra->quoted_string, yytext);
}
else {
g_string_append_c(yyextra->quoted_string, yytext[1]);
}
}
<DQUOTE>[^\\\042]+ {

View File

@ -409,6 +409,8 @@ dfilter_g_regex_from_string(dfwork_t *dfw, const char *s)
*/
cflags = (GRegexCompileFlags)(cflags | G_REGEX_RAW);
DebugLog(("Compile regex pattern: '%s'\n", s));
pcre = g_regex_new(
s, /* pattern */
cflags, /* Compile options */