From 49540ec646333d92ab4d54fbdc7a0692a3339d1a Mon Sep 17 00:00:00 2001 From: John Thacker Date: Wed, 7 Feb 2024 21:13:23 -0500 Subject: [PATCH] columns: Allow any field expression syntax to be used in columns Allow anything that can be used in a display filter to be used in columns (with the exception that field references don't work without a notion of a currently selected frame): display filter functions, slices, arithmetic calculations, logical tests, raw byte addressing, the layer modifier, display filter macros, etc., alone or in combination. Show the results and generate filters. Note that "resolved" values are not yet supported. They make conceptual sense for some expressions (e.g., if the layer modifier only is used) but not for others. Perhaps resolution could be done as a final step in the filter before returning values. It would also be useful to be able to get the expected return type of an expression, so that the functions for right justifying a column or sorting numerically could work. Right now the results are treated as strings even if the return field values are numeric. Multifield columns (i.e., concatenation of field values) are currently implemented using the OR operator.For backwards compability, continue to support that. When a true logical OR would give a different result, surround the expression in parentheses, which the multifield columns did not previously support (due to the regex used instead of full filter grammar parsing.) Perhaps in the future we should introduce a separate operator for concatenation, possibly only used in column definitions and nowhere else. Update release notes. Fix #7752. Fix #10154. Fix #15990. Fix #18588. Fix #19076. Related to #16181 - it's now possibly to define new display filter functions so that is essentially solved, though I suppose there's always room for more built-in functions. --- doc/release-notes.adoc | 40 +++++++++ epan/column-info.h | 9 ++ epan/column-utils.c | 11 ++- epan/column.c | 20 +++-- epan/proto.c | 131 ++++++++++++++++++++++++++++- ui/packet_list_utils.c | 30 +++++-- ui/qt/models/packet_list_model.cpp | 16 +++- ui/qt/widgets/syntax_line_edit.cpp | 13 +++ 8 files changed, 252 insertions(+), 18 deletions(-) diff --git a/doc/release-notes.adoc b/doc/release-notes.adoc index eba9950c74..185883d6ca 100644 --- a/doc/release-notes.adoc +++ b/doc/release-notes.adoc @@ -27,6 +27,10 @@ Display filter functions can be implemented as runtime-loadable C plugins. Plugin registration API was refactored. Plugin authors must update their plugins as described below. +Custom columns can be defined using any valid field expression, such as +display filter functions, slices, arithmetic calculations, logical tests, +raw byte addressing, and the layer modifier. + Many other improvements have been made. See the “New and Updated Features” section below for more details. @@ -97,6 +101,42 @@ The following features are new (or have been significantly updated) since versio but it is advisable to check that the "dfilter_macros" (old) and "dmacros" (new) files in the profile directory are consistent. +* Custom columns can be defined using any valid field expression: + + ** Display filter functions, like `len(tcp.payload)`, including nested functions + like `min(len(tcp.payload), len(udp.payload)` and newly defined functions + using the plugin system mentioned above. wsbuglink:15990[] wsbuglink:16181[] + + ** Arithmetic calculations, like `ip.len * 8` or `tcp.srcport + tcp.dstport`. + wsbuglink:7752[] + + ** Slices, like `tcp.payload[4:4]`. wsbuglink:10154[] + + ** The layer operator, like `ip.proto#1` to return the proto field in the + first IPv4 layer if there is tunneling. wsbuglink:18588[] + + ** Raw byte addressing, like `@ip`, useful to return the bytes of a protocol + or FT_NONE field, among others. wsbuglink:19076[] + + ** Logical tests, like `tcp.port == 443`, which produce a check mark if + the test matches (similar to protocol and none fields without `@`.) + This works with all logical operators, including e.g. regular expression + matching (`matches` or `~`.) + + ** Defined display filter macros. + + ** Any combination of the above also works. + + ** Multifield columns are still available. For backwards compatiblity, + `X or Y` is interpreted as a multifield column as before. To represent a + logical test for the presence of multiple fields instead of concatenating + values, use parenthesis, like `(tcp.options.timestamp or tcp.options.nop`. + + ** Field references are not implemented, because there's no sense of a + currently selected frame. "Resolved" column values (such as host name + resolution or value string lookup) are not supported for any of the new + expressions yet. + * When selecting "Manage Interfaces" from "Capture Options", Wireshark only attempts to reconnect to rpcap (remote) hosts that were connected to in the last session, instead of every remote host that the current profile has ever diff --git a/epan/column-info.h b/epan/column-info.h index 0d26e65234..ea05ee2bf5 100644 --- a/epan/column-info.h +++ b/epan/column-info.h @@ -34,6 +34,15 @@ typedef struct { gchar **col_expr_val; /**< Value for filter expression */ } col_expr_t; +/** Custom column filter expression information used in the GSList below. + * One for each expression in a multifield column. + */ +typedef struct { + char *dftext; /**< Filter expression */ + struct epan_dfilter *dfilter; /**< Compiled filter expression */ + int field_id; /**< ID for a single field expression, or 0 */ +} col_custom_t; + /** Individual column info */ typedef struct { gint col_fmt; /**< Format of column */ diff --git a/epan/column-utils.c b/epan/column-utils.c index a9e8ac34ff..7befdd4492 100644 --- a/epan/column-utils.c +++ b/epan/column-utils.c @@ -82,11 +82,20 @@ col_setup(column_info *cinfo, const gint num_cols) 0, NULL); } +static void +col_custom_free_cb(void *data) +{ + col_custom_t *col_custom = (col_custom_t*)data; + dfilter_free(col_custom->dfilter); + g_free(col_custom->dftext); + g_free(col_custom); +} + static void col_custom_fields_ids_free(GSList** custom_fields_id) { if (*custom_fields_id != NULL) { - g_slist_free_full(*custom_fields_id, g_free); + g_slist_free_full(*custom_fields_id, col_custom_free_cb); } *custom_fields_id = NULL; } diff --git a/epan/column.c b/epan/column.c index 0d836e8ed8..c30db61527 100644 --- a/epan/column.c +++ b/epan/column.c @@ -951,6 +951,11 @@ get_custom_field_tooltip (gchar *custom_field, gint occurrence) header_field_info *hfi = proto_registrar_get_byname(custom_field); if (hfi == NULL) { /* Not a valid field */ + dfilter_t *dfilter; + if (dfilter_compile(custom_field, &dfilter, NULL)) { + dfilter_free(dfilter); + return ws_strdup_printf("Expression: %s", custom_field); + } return ws_strdup_printf("Unknown Field: %s", custom_field); } @@ -1029,6 +1034,7 @@ col_finalize(column_info *cinfo) { int i; col_item_t* col_item; + dfilter_t *dfilter; for (i = 0; i < cinfo->num_cols; i++) { col_item = &cinfo->columns[i]; @@ -1048,11 +1054,15 @@ col_finalize(column_info *cinfo) for (i_field = 0; i_field < g_strv_length(fields); i_field++) { if (fields[i_field] && *fields[i_field]) { - header_field_info *hfinfo = proto_registrar_get_byname(fields[i_field]); - if (hfinfo) { - int *idx = g_new(int, 1); - *idx = hfinfo->id; - col_item->col_custom_fields_ids = g_slist_append(col_item->col_custom_fields_ids, idx); + if (dfilter_compile_full(fields[i_field], &dfilter, NULL, DF_EXPAND_MACROS|DF_OPTIMIZE|DF_RETURN_VALUES, __func__)) { + col_custom_t *custom_info = g_new0(col_custom_t, 1); + custom_info->dftext = g_strdup(fields[i_field]); + custom_info->dfilter = dfilter; + header_field_info *hfinfo = proto_registrar_get_byname(fields[i_field]); + if (hfinfo) { + custom_info->field_id = hfinfo->id; + } + col_item->col_custom_fields_ids = g_slist_append(col_item->col_custom_fields_ids, custom_info); } } } diff --git a/epan/proto.c b/epan/proto.c index b61f697f79..9fb5ee9f38 100644 --- a/epan/proto.c +++ b/epan/proto.c @@ -38,6 +38,7 @@ #include "oids.h" #include "proto.h" #include "epan_dissect.h" +#include "dfilter/dfilter.h" #include "tvbuff.h" #include #include "charsets.h" @@ -7145,13 +7146,83 @@ proto_custom_set(proto_tree* tree, GSList *field_ids, gint occurrence, const char *hf_str_val; char *str; - int *field_idx; + col_custom_t *field_idx; int field_id; int ii = 0; ws_assert(field_ids != NULL); - while ((field_idx = (int *) g_slist_nth_data(field_ids, ii++))) { - field_id = *field_idx; + while ((field_idx = (col_custom_t *) g_slist_nth_data(field_ids, ii++))) { + field_id = field_idx->field_id; + if (field_id == 0) { + GPtrArray *fvals = NULL; + bool passed = dfilter_apply_full(field_idx->dfilter, tree, &fvals); + if (fvals != NULL) { + + // XXX - Handling occurrences is unusual when more + // than one field is involved, e.g. there's four + // results for tcp.port + tcp.port. We may really + // want to apply it to the operands, not the output. + // Note that occurrences are not quite the same as + // the layer operator (should the grammar support + // both?) + /* Calculate single index or set outer boundaries */ + len = g_ptr_array_len(fvals); + if (occurrence < 0) { + i = occurrence + len; + last = i; + } else if (occurrence > 0) { + i = occurrence - 1; + last = i; + } else { + i = 0; + last = len - 1; + } + if (i < 0 || i >= len) { + g_ptr_array_unref(fvals); + continue; + } + for (; i <= last; i++) { + /* XXX - We could have a "resolved" result + * for types where the value depends only + * on the type, e.g. FT_IPv4, and not on + * hfinfo->strings. Supporting the latter + * requires knowing which hfinfo matched + * if there are multiple with the same + * abbreviation. In any case, we need to + * know the expected return type of the + * field expression. + */ + str = fvalue_to_string_repr(NULL, fvals->pdata[i], FTREPR_DISPLAY, BASE_NONE); + if (offset_r && (offset_r < (size - 1))) + result[offset_r++] = ','; + if (offset_e && (offset_e < (size - 1))) + expr[offset_e++] = ','; + offset_r += protoo_strlcpy(result+offset_r, str, size-offset_r); + offset_e += protoo_strlcpy(expr+offset_e, str, size-offset_e); + g_free(str); + } + g_ptr_array_unref(fvals); + } else if (passed) { + // XXX - Occurrence doesn't make sense for a test + // output, it should be applied to the operands. + if (offset_r && (offset_r < (size - 1))) + result[offset_r++] = ','; + if (offset_e && (offset_e < (size - 1))) + expr[offset_e++] = ','; + /* Prevent multiple check marks */ + if (strstr(result, UTF8_CHECK_MARK ",") == NULL) { + offset_r += protoo_strlcpy(result+offset_r, UTF8_CHECK_MARK, size-offset_r); + } else { + result[--offset_r] = '\0'; /* Remove the added trailing ',' */ + } + if (strstr(expr, UTF8_CHECK_MARK ",") == NULL) { + offset_e += protoo_strlcpy(expr+offset_r, UTF8_CHECK_MARK, size-offset_e); + } else { + expr[--offset_e] = '\0'; /* Remove the added trailing ',' */ + } + } + continue; + } PROTO_REGISTRAR_GET_NTH((guint)field_id, hfinfo); /* do we need to rewind ? */ @@ -7295,12 +7366,64 @@ proto_custom_get_filter(epan_dissect_t* edt, GSList *field_ids, gint occurrence) char *filter = NULL; GPtrArray *filter_array; + col_custom_t *col_custom; int field_id; ws_assert(field_ids != NULL); filter_array = g_ptr_array_new_full(g_slist_length(field_ids), g_free); for (GSList *iter = field_ids; iter; iter = iter->next) { - field_id = *(int *)iter->data; + col_custom = (col_custom_t*)iter->data; + field_id = col_custom->field_id; + if (field_id == 0) { + GPtrArray *fvals = NULL; + bool passed = dfilter_apply_full(col_custom->dfilter, edt->tree, &fvals); + if (fvals != NULL) { + // XXX - Handling occurrences is unusual when more + // than one field is involved, e.g. there's four + // results for tcp.port + tcp.port. We really + // want to apply it to the operands, not the output. + /* Calculate single index or set outer boundaries */ + len = g_ptr_array_len(fvals); + if (occurrence < 0) { + i = occurrence + len; + last = i; + } else if (occurrence > 0) { + i = occurrence - 1; + last = i; + } else { + i = 0; + last = len - 1; + } + if (i < 0 || i >= len) { + g_ptr_array_unref(fvals); + continue; + } + for (; i <= last; i++) { + /* XXX - Should multiple values for one + * field use set membership to reduce + * verbosity, here and below? */ + char *str = fvalue_to_string_repr(NULL, fvals->pdata[i], FTREPR_DFILTER, BASE_NONE); + filter = wmem_strdup_printf(NULL, "%s == %s", col_custom->dftext, str); + wmem_free(NULL, str); + if (!g_ptr_array_find_with_equal_func(filter_array, filter, g_str_equal, NULL)) { + g_ptr_array_add(filter_array, filter); + } + } + g_ptr_array_unref(fvals); + } else if (passed) { + filter = wmem_strdup(NULL, col_custom->dftext); + if (!g_ptr_array_find_with_equal_func(filter_array, filter, g_str_equal, NULL)) { + g_ptr_array_add(filter_array, filter); + } + } else { + filter = wmem_strdup_printf(NULL, "!(%s)", col_custom->dftext); + if (!g_ptr_array_find_with_equal_func(filter_array, filter, g_str_equal, NULL)) { + g_ptr_array_add(filter_array, filter); + } + } + continue; + } + PROTO_REGISTRAR_GET_NTH((guint)field_id, hfinfo); /* do we need to rewind ? */ diff --git a/ui/packet_list_utils.c b/ui/packet_list_utils.c index ce5a6d27ce..312d156e12 100644 --- a/ui/packet_list_utils.c +++ b/ui/packet_list_utils.c @@ -20,7 +20,8 @@ right_justify_column (gint col, capture_file *cf) { header_field_info *hfi; gboolean right_justify = FALSE; - guint num_fields, *field_idx, ii; + guint num_fields, ii; + col_custom_t *col_custom; guint right_justify_count = 0; if (!cf) return FALSE; @@ -43,10 +44,17 @@ right_justify_column (gint col, capture_file *cf) case COL_CUSTOM: num_fields = g_slist_length(cf->cinfo.columns[col].col_custom_fields_ids); for (ii = 0; ii < num_fields; ii++) { - field_idx = (guint *) g_slist_nth_data(cf->cinfo.columns[col].col_custom_fields_ids, ii); - hfi = proto_registrar_get_nth(*field_idx); + col_custom = (col_custom_t *) g_slist_nth_data(cf->cinfo.columns[col].col_custom_fields_ids, ii); + if (col_custom->field_id == 0) { + /* XXX - If there were some way to check the compiled dfilter's + * expected return type, we could use that. + */ + return FALSE; + } + hfi = proto_registrar_get_nth(col_custom->field_id); /* Check if this is a valid field and we have no strings lookup table */ + /* XXX - We should check every hfi with the same abbreviation */ if ((hfi != NULL) && ((hfi->strings == NULL) || !get_column_resolved(col))) { /* Check for bool, framenum, double, float, relative time and decimal/octal integer types */ if ((hfi->type == FT_BOOLEAN) || (hfi->type == FT_FRAMENUM) || (hfi->type == FT_DOUBLE) || @@ -77,7 +85,8 @@ resolve_column (gint col, capture_file *cf) { header_field_info *hfi; gboolean resolve = FALSE; - guint num_fields, *field_idx, ii; + guint num_fields, ii; + col_custom_t *col_custom; if (!cf) return FALSE; @@ -86,8 +95,17 @@ resolve_column (gint col, capture_file *cf) case COL_CUSTOM: num_fields = g_slist_length(cf->cinfo.columns[col].col_custom_fields_ids); for (ii = 0; ii < num_fields; ii++) { - field_idx = (guint *) g_slist_nth_data(cf->cinfo.columns[col].col_custom_fields_ids, ii); - hfi = proto_registrar_get_nth(*field_idx); + col_custom = (col_custom_t *) g_slist_nth_data(cf->cinfo.columns[col].col_custom_fields_ids, ii); + if (col_custom->field_id == 0) { + /* XXX - A "resolved" string might be conceivable for certain + * expressions, but would require being able to know which + * hfinfo produced each value, if there are multiple hfi with + * the same abbreviation. + */ + continue; + } + hfi = proto_registrar_get_nth(col_custom->field_id); + /* XXX - We should check every hfi with the same abbreviation */ /* Check if we have an OID, a (potentially) resolvable network * address, a Boolean, or a strings table with integer values */ diff --git a/ui/qt/models/packet_list_model.cpp b/ui/qt/models/packet_list_model.cpp index 0ed61d745f..c018255326 100644 --- a/ui/qt/models/packet_list_model.cpp +++ b/ui/qt/models/packet_list_model.cpp @@ -617,6 +617,9 @@ void PacketListModel::stopSorting() bool PacketListModel::isNumericColumn(int column) { + /* XXX - Should this and ui/packet_list_utils.c right_justify_column() + * be the same list of columns? + */ if (column < 0) { return false; } @@ -652,9 +655,18 @@ bool PacketListModel::isNumericColumn(int column) } guint num_fields = g_slist_length(sort_cap_file_->cinfo.columns[column].col_custom_fields_ids); + col_custom_t *col_custom; for (guint i = 0; i < num_fields; i++) { - guint *field_idx = (guint *) g_slist_nth_data(sort_cap_file_->cinfo.columns[column].col_custom_fields_ids, i); - header_field_info *hfi = proto_registrar_get_nth(*field_idx); + col_custom = (col_custom_t *) g_slist_nth_data(sort_cap_file_->cinfo.columns[column].col_custom_fields_ids, i); + if (col_custom->field_id == 0) { + /* XXX - We need some way to check the compiled dfilter's expected + * return type. Best would be to use the actual field values return + * and sort on those (we could skip expensive string conversions + * in the numeric case, see below) + */ + return false; + } + header_field_info *hfi = proto_registrar_get_nth(col_custom->field_id); /* * Reject a field when there is no numeric field type or when: diff --git a/ui/qt/widgets/syntax_line_edit.cpp b/ui/qt/widgets/syntax_line_edit.cpp index 2ea83ea112..185bf585c9 100644 --- a/ui/qt/widgets/syntax_line_edit.cpp +++ b/ui/qt/widgets/syntax_line_edit.cpp @@ -267,6 +267,18 @@ void SyntaxLineEdit::checkCustomColumn(QString fields) return; } +#if 0 + // XXX - Eventually, if the operator we split on is something not supported + // in the filter expression syntax (so that we can distinguish multifield + // concatenation of column strings from a logical OR), we would split and + // then check each split result as a valid display filter. + // For now, any expression that is a valid display filter should work. + // + // We also, for the custom columns, want some of the extra completion + // information from DisplayFilterEdit (like the display filter functions), + // without all of its integration into the main app, but not every user + // of FieldFilterEdit wants that, so perhaps we eventually should have + // another class. gchar **splitted_fields = g_regex_split_simple(COL_CUSTOM_PRIME_REGEX, fields.toUtf8().constData(), (GRegexCompileFlags) G_REGEX_RAW, (GRegexMatchFlags) 0); @@ -281,6 +293,7 @@ void SyntaxLineEdit::checkCustomColumn(QString fields) } } g_strfreev(splitted_fields); +#endif checkDisplayFilter(fields); }