column: Update custom column regex

When splitting a possibly multifield custom column expression
into components, don't match "or" unless it's a word by itself,
i.e. is surrounded by space. "||" by itself is fine as a token.
This is necessary if we allow more complicated filters to match
than just single fields separated by "||" or "or". Also split
at space at the beginning or end of a string (since we don't
always guarantee that whitespace is stripped.)

When spliting into components, only split on "||" and " or " that
are not inside parenthesis. Splitting on operators inside parentheses
results in components which are not fields or valid filter expressions
and has never worked, e.g. splitting "(tcp.srcport or tcp.dstport)"
into "(tcp.srcport" and "tcp.dstport)".

TEST_OR has the lowest possible operator precedence (see
commit 34ad6bb478), so this works,
and also justifies using OR instead of AND for multifield custom
columns.

This means that, e.g., "tcp.srcport or tcp.dstport" will be treated
as a multifield custom column expression that returns the values
for both of the fields, whereas "(tcp.srcport or tcp.dstport)" will
be ultimately treated as a single logical test that returns true
if one of the fields exist and false if neither do. Until tests
and other non single-field expressions are supported, the latter
won't work, but it never has worked.

Related to #7752, #10154, #15990, #18588, and #16181.
This commit is contained in:
John Thacker 2024-02-07 20:01:53 -05:00
parent c787f791bf
commit 4c9584ef45
5 changed files with 28 additions and 9 deletions

View File

@ -78,8 +78,8 @@ col_setup(column_info *cinfo, const gint num_cols)
cinfo->col_last[i] = -1;
}
cinfo->prime_regex = g_regex_new(COL_CUSTOM_PRIME_REGEX,
(GRegexCompileFlags) (G_REGEX_ANCHORED | G_REGEX_RAW),
G_REGEX_MATCH_ANCHORED, NULL);
(GRegexCompileFlags) (G_REGEX_RAW),
0, NULL);
}
static void

View File

@ -23,7 +23,25 @@ extern "C" {
#define COL_MAX_LEN 2048
#define COL_MAX_INFO_LEN 4096
#define COL_CUSTOM_PRIME_REGEX " *([^ \\|]+) *(?:(?:\\|\\|)|(?:or)| *$){1}"
/* A regex to split possibly multifield custom columns into components
*
* Split on operator "||" (with optional space around it) and on "or"
* (which must have space around it to avoid matching in the middle of
* a word, field in the "synphasor" protocol, etc. This is somewhat too
* strict, as "or" adjacent to parentheses ought to be fine so long
* as the filter matches the grammar, like "(tcp.port)or(udp.port)",
* but that's the cost of using regex instead of the real parser.)
* Also split on space at the beginning or end of the expression (in
* lieu of always stripping whitespace at the beginning and end, but it
* does mean that we have to ignore any empty tokens in the result.)
*
* Use negative lookahead to avoid matching "||" or "or" that are contained
* within parentheses. Don't match if a close parenthesis comes before an
* open parenthesis. The regex doesn't help with unmatched parentheses, but
* such an expression already won't satisfy the grammar and won't compile.
*/
#define COL_CUSTOM_PRIME_REGEX "(?:^ *| *\\|\\| *| +or +| *$)(?![^(]*\\))"
struct epan_dissect;

View File

@ -989,8 +989,8 @@ get_column_tooltip(const gint col)
}
fields = g_regex_split_simple(COL_CUSTOM_PRIME_REGEX, cfmt->custom_fields,
(GRegexCompileFlags) (G_REGEX_ANCHORED | G_REGEX_RAW),
G_REGEX_MATCH_ANCHORED);
(GRegexCompileFlags) (G_REGEX_RAW),
0);
column_tooltip = g_string_new("");
for (i = 0; i < g_strv_length(fields); i++) {
@ -1043,7 +1043,7 @@ col_finalize(column_info *cinfo)
}
if (col_item->col_custom_fields) {
gchar **fields = g_regex_split(cinfo->prime_regex, col_item->col_custom_fields,
G_REGEX_MATCH_ANCHORED);
0);
guint i_field;
for (i_field = 0; i_field < g_strv_length(fields); i_field++) {

View File

@ -235,8 +235,8 @@ column_prefs_custom_resolve(const gchar* custom_field)
bool resolve = false;
fields = g_regex_split_simple(COL_CUSTOM_PRIME_REGEX, custom_field,
(GRegexCompileFlags) (G_REGEX_ANCHORED | G_REGEX_RAW),
G_REGEX_MATCH_ANCHORED);
(GRegexCompileFlags) (G_REGEX_RAW),
0);
for (guint i = 0; i < g_strv_length(fields); i++) {
if (fields[i] && *fields[i]) {

View File

@ -268,7 +268,8 @@ void SyntaxLineEdit::checkCustomColumn(QString fields)
}
gchar **splitted_fields = g_regex_split_simple(COL_CUSTOM_PRIME_REGEX,
fields.toUtf8().constData(), G_REGEX_ANCHORED, G_REGEX_MATCH_ANCHORED);
fields.toUtf8().constData(), (GRegexCompileFlags) G_REGEX_RAW,
(GRegexMatchFlags) 0);
for (guint i = 0; i < g_strv_length(splitted_fields); i++) {
if (splitted_fields[i] && *splitted_fields[i]) {