From e8800ff3c49e0b8a7a04d06ea68a39a16bab778f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Valverde?= Date: Fri, 15 Oct 2021 23:47:28 +0100 Subject: [PATCH] dfilter: Add a thin encapsulation layer for REs --- epan/dfilter/dfilter.c | 39 +++++-------------- epan/dfilter/dfvm.c | 10 ++--- epan/dfilter/dfvm.h | 2 +- epan/dfilter/gencode.c | 4 +- epan/dfilter/sttype-pointer.c | 8 ++-- epan/ftypes/ftype-bytes.c | 13 +------ epan/ftypes/ftype-protocol.c | 23 ++--------- epan/ftypes/ftype-string.c | 12 +----- epan/ftypes/ftypes-int.h | 2 +- epan/ftypes/ftypes.c | 61 ++++++++++++++++++++++++++++-- epan/ftypes/ftypes.h | 17 ++++++++- test/suite_dfilter/group_syntax.py | 4 ++ 12 files changed, 106 insertions(+), 89 deletions(-) diff --git a/epan/dfilter/dfilter.c b/epan/dfilter/dfilter.c index 263b5c192c..1836aa8efa 100644 --- a/epan/dfilter/dfilter.c +++ b/epan/dfilter/dfilter.c @@ -78,46 +78,25 @@ dfilter_new_function(dfwork_t *dfw, const char *name) return stnode_new(STTYPE_FUNCTION, def, name); } -/* Gets a GRegex from a string, and sets the error message on failure. */ +/* Gets a regex from a string, and sets the error message on failure. */ stnode_t * dfilter_new_regex(dfwork_t *dfw, stnode_t *node) { - GError *regex_error = NULL; - GRegex *pcre; - const char *patt; + fvalue_regex_t *pcre; + char *errmsg = NULL; - if (stnode_type_id(node) == STTYPE_STRING) { - patt = stnode_data(node); - } - else { + if (stnode_type_id(node) != STTYPE_STRING) { dfilter_parse_fail(dfw, "Expected a string not %s", stnode_todisplay(node)); return node; } - patt = stnode_data(node); + const char *patt = stnode_data(node); ws_debug("Compile regex pattern: %s", patt); - /* - * As a string is not guaranteed to contain valid UTF-8, - * we have to disable support for UTF-8 patterns and treat - * every pattern and subject as raw bytes. - * - * Should support for UTF-8 patterns be necessary, then we - * should compile a pattern without G_REGEX_RAW. Additionally, - * we MUST use g_utf8_validate() before calling g_regex_match_full() - * or risk crashes. - */ - GRegexCompileFlags cflags = G_REGEX_CASELESS | G_REGEX_OPTIMIZE | G_REGEX_RAW; - - pcre = g_regex_new( - patt, /* pattern */ - cflags, /* Compile options */ - 0, /* Match options */ - ®ex_error); /* Compile / study errors */ - - if (regex_error) { - dfilter_parse_fail(dfw, "%s", regex_error->message); - g_error_free(regex_error); + pcre = fvalue_regex_compile(patt, &errmsg); + if (errmsg) { + dfilter_parse_fail(dfw, "%s", errmsg); + g_free(errmsg); return node; } diff --git a/epan/dfilter/dfvm.c b/epan/dfilter/dfvm.c index 42f29e0150..47278e4169 100644 --- a/epan/dfilter/dfvm.c +++ b/epan/dfilter/dfvm.c @@ -38,7 +38,7 @@ dfvm_value_free(dfvm_value_t *v) drange_free(v->value.drange); break; case PCRE: - g_regex_unref(v->value.pcre); + fvalue_regex_free(v->value.pcre); break; default: /* nothing */ @@ -110,9 +110,9 @@ dfvm_dump(FILE *f, dfilter_t *df) wmem_free(NULL, value_str); break; case PUT_PCRE: - fprintf(f, "%05d PUT_PCRE\t%s -> reg#%u\n", + fprintf(f, "%05d PUT_PCRE \t%s -> reg#%u\n", id, - g_regex_get_pattern(arg1->value.pcre), + fvalue_regex_pattern(arg1->value.pcre), arg2->value.numeric); break; case CHECK_EXISTS: @@ -365,7 +365,7 @@ put_fvalue(dfilter_t *df, fvalue_t *fv, int reg) /* Put a constant PCRE in a register. These will not be cleared by * free_register_overhead. */ static gboolean -put_pcre(dfilter_t *df, GRegex *pcre, int reg) +put_pcre(dfilter_t *df, fvalue_regex_t *pcre, int reg) { df->registers[reg] = g_list_append(NULL, pcre); df->owns_memory[reg] = FALSE; @@ -404,7 +404,7 @@ any_matches(dfilter_t *df, int reg1, int reg2) while (list_a) { list_b = df->registers[reg2]; while (list_b) { - if (fvalue_matches((fvalue_t *)list_a->data, (GRegex *)list_b->data)) { + if (fvalue_matches((fvalue_t *)list_a->data, list_b->data)) { return TRUE; } list_b = g_list_next(list_b); diff --git a/epan/dfilter/dfvm.h b/epan/dfilter/dfvm.h index 71e6cd4449..a57333c707 100644 --- a/epan/dfilter/dfvm.h +++ b/epan/dfilter/dfvm.h @@ -36,7 +36,7 @@ typedef struct { drange_t *drange; header_field_info *hfinfo; df_func_def_t *funcdef; - GRegex *pcre; + fvalue_regex_t *pcre; } value; } dfvm_value_t; diff --git a/epan/dfilter/gencode.c b/epan/dfilter/gencode.c index 28cb573104..bb5023710c 100644 --- a/epan/dfilter/gencode.c +++ b/epan/dfilter/gencode.c @@ -234,7 +234,7 @@ dfw_append_function(dfwork_t *dfw, stnode_t *node, dfvm_value_t **p_jmp) /* returns register number */ static int -dfw_append_put_pcre(dfwork_t *dfw, GRegex *pcre) +dfw_append_put_pcre(dfwork_t *dfw, fvalue_regex_t *pcre) { dfvm_insn_t *insn; dfvm_value_t *val1, *val2; @@ -427,7 +427,7 @@ gen_entity(dfwork_t *dfw, stnode_t *st_arg, dfvm_value_t **p_jmp) reg = dfw_append_function(dfw, st_arg, p_jmp); } else if (e_type == STTYPE_PCRE) { - reg = dfw_append_put_pcre(dfw, (GRegex *)stnode_steal_data(st_arg)); + reg = dfw_append_put_pcre(dfw, stnode_steal_data(st_arg)); } else { /* printf("sttype_id is %u\n", (unsigned)e_type); */ diff --git a/epan/dfilter/sttype-pointer.c b/epan/dfilter/sttype-pointer.c index 1f2a5ffa2b..4feb8b1d45 100644 --- a/epan/dfilter/sttype-pointer.c +++ b/epan/dfilter/sttype-pointer.c @@ -27,7 +27,7 @@ fvalue_free(gpointer value) static void pcre_free(gpointer value) { - GRegex *pcre = (GRegex*)value; + fvalue_regex_t *pcre = value; /* If the data was not claimed with stnode_steal_data(), free it. */ if (pcre) { @@ -36,7 +36,7 @@ pcre_free(gpointer value) * count; it'll get freed when the reference count drops * to 0. */ - g_regex_unref(pcre); + fvalue_regex_free(pcre); } } @@ -67,9 +67,9 @@ field_tostr(const void *data, gboolean pretty _U_) static char * pcre_tostr(const void *data, gboolean pretty _U_) { - const GRegex *pcre = (const GRegex *)data; + const fvalue_regex_t *pcre = data; - return g_strdup(g_regex_get_pattern(pcre)); + return g_strdup(fvalue_regex_pattern(pcre)); } void diff --git a/epan/ftypes/ftype-bytes.c b/epan/ftypes/ftype-bytes.c index 5eed4868a2..d0da0fd482 100644 --- a/epan/ftypes/ftype-bytes.c +++ b/epan/ftypes/ftype-bytes.c @@ -586,20 +586,11 @@ cmp_contains(const fvalue_t *fv_a, const fvalue_t *fv_b) } static gboolean -cmp_matches(const fvalue_t *fv, const GRegex *regex) +cmp_matches(const fvalue_t *fv, const fvalue_regex_t *regex) { GByteArray *a = fv->value.bytes; - return g_regex_match_full( - regex, /* Compiled PCRE */ - (char *)a->data, /* The data to check for the pattern... */ - (int)a->len, /* ... and its length */ - 0, /* Start offset within data */ - (GRegexMatchFlags)0, /* GRegexMatchFlags */ - NULL, /* We are not interested in the match information */ - NULL /* We don't want error information */ - ); - /* NOTE - DO NOT g_free(data) */ + return fvalue_regex_matches(regex, a->data, a->len); } void diff --git a/epan/ftypes/ftype-protocol.c b/epan/ftypes/ftype-protocol.c index 98d38dcc7d..9af382e773 100644 --- a/epan/ftypes/ftype-protocol.c +++ b/epan/ftypes/ftype-protocol.c @@ -257,7 +257,7 @@ cmp_contains(const fvalue_t *fv_a, const fvalue_t *fv_b) } static gboolean -cmp_matches(const fvalue_t *fv, const GRegex *regex) +cmp_matches(const fvalue_t *fv, const fvalue_regex_t *regex) { const protocol_value_t *a = (const protocol_value_t *)&fv->value.protocol; volatile gboolean rc = FALSE; @@ -271,26 +271,9 @@ cmp_matches(const fvalue_t *fv, const GRegex *regex) if (a->tvb != NULL) { tvb_len = tvb_captured_length(a->tvb); data = (const char *)tvb_get_ptr(a->tvb, 0, tvb_len); - rc = g_regex_match_full( - regex, /* Compiled PCRE */ - data, /* The data to check for the pattern... */ - tvb_len, /* ... and its length */ - 0, /* Start offset within data */ - (GRegexMatchFlags)0, /* GRegexMatchFlags */ - NULL, /* We are not interested in the match information */ - NULL /* We don't want error information */ - ); - /* NOTE - DO NOT g_free(data) */ + rc = fvalue_regex_matches(regex, data, tvb_len); } else { - rc = g_regex_match_full( - regex, /* Compiled PCRE */ - a->proto_string, /* The data to check for the pattern... */ - (int)strlen(a->proto_string), /* ... and its length */ - 0, /* Start offset within data */ - (GRegexMatchFlags)0, /* GRegexMatchFlags */ - NULL, /* We are not interested in the match information */ - NULL /* We don't want error information */ - ); + rc = fvalue_regex_matches(regex, a->proto_string, -1); } } CATCH_ALL { diff --git a/epan/ftypes/ftype-string.c b/epan/ftypes/ftype-string.c index 21c76dfa8b..ccc3f97733 100644 --- a/epan/ftypes/ftype-string.c +++ b/epan/ftypes/ftype-string.c @@ -138,22 +138,14 @@ cmp_contains(const fvalue_t *fv_a, const fvalue_t *fv_b) } static gboolean -cmp_matches(const fvalue_t *fv, const GRegex *regex) +cmp_matches(const fvalue_t *fv, const fvalue_regex_t *regex) { char *str = fv->value.string; if (! regex) { return FALSE; } - return g_regex_match_full( - regex, /* Compiled PCRE */ - str, /* The data to check for the pattern... */ - (int)strlen(str), /* ... and its length */ - 0, /* Start offset within data */ - (GRegexMatchFlags)0, /* GRegexMatchFlags */ - NULL, /* We are not interested in the match information */ - NULL /* We don't want error information */ - ); + return fvalue_regex_matches(regex, str, -1); } void diff --git a/epan/ftypes/ftypes-int.h b/epan/ftypes/ftypes-int.h index 704b5d55cc..df01c95c12 100644 --- a/epan/ftypes/ftypes-int.h +++ b/epan/ftypes/ftypes-int.h @@ -64,7 +64,7 @@ typedef double (*FvalueGetFloatingFunc)(fvalue_t*); typedef int (*FvalueCmp)(const fvalue_t*, const fvalue_t*); typedef gboolean (*FvalueBitwiseAnd)(const fvalue_t*, const fvalue_t*); typedef gboolean (*FvalueContains)(const fvalue_t*, const fvalue_t*); -typedef gboolean (*FvalueMatches)(const fvalue_t*, const GRegex*); +typedef gboolean (*FvalueMatches)(const fvalue_t*, const fvalue_regex_t*); typedef guint (*FvalueLen)(fvalue_t*); typedef void (*FvalueSlice)(fvalue_t*, GByteArray *, guint offset, guint length); diff --git a/epan/ftypes/ftypes.c b/epan/ftypes/ftypes.c index e10a38513d..12324c2d3e 100644 --- a/epan/ftypes/ftypes.c +++ b/epan/ftypes/ftypes.c @@ -8,12 +8,14 @@ #include "config.h" -#include -#include +#include "ftypes-int.h" -#include "ftypes.h" #include +struct _fvalue_regex_t { + GRegex *code; +}; + /* Keep track of ftype_t's via their ftenum number */ static ftype_t* type_list[FT_NUM_TYPES]; @@ -747,13 +749,64 @@ fvalue_contains(const fvalue_t *a, const fvalue_t *b) } gboolean -fvalue_matches(const fvalue_t *a, const GRegex *b) +fvalue_matches(const fvalue_t *a, const fvalue_regex_t *b) { /* XXX - check compatibility of a and b */ ws_assert(a->ftype->cmp_matches); return a->ftype->cmp_matches(a, b); } +fvalue_regex_t * +fvalue_regex_compile(const char *patt, char **errmsg) +{ + GError *regex_error = NULL; + GRegex *pcre; + + /* + * As a string is not guaranteed to contain valid UTF-8, + * we have to disable support for UTF-8 patterns and treat + * every pattern and subject as raw bytes. + * + * Should support for UTF-8 patterns be necessary, then we + * should compile a pattern without G_REGEX_RAW. Additionally, + * we MUST use g_utf8_validate() before calling g_regex_match_full() + * or risk crashes. + */ + GRegexCompileFlags cflags = G_REGEX_CASELESS | G_REGEX_OPTIMIZE | G_REGEX_RAW; + + pcre = g_regex_new(patt, cflags, 0, ®ex_error); + + if (regex_error) { + *errmsg = g_strdup(regex_error->message); + g_error_free(regex_error); + return NULL; + } + + struct _fvalue_regex_t *re = g_new(struct _fvalue_regex_t, 1); + re->code = pcre; + + return re; +} + +gboolean +fvalue_regex_matches(const fvalue_regex_t *regex, const char *subj, gssize subj_size) +{ + return g_regex_match_full(regex->code, subj, subj_size, 0, 0, NULL, NULL); +} + +void +fvalue_regex_free(fvalue_regex_t *regex) +{ + g_regex_unref(regex->code); + g_free(regex); +} + +const char * +fvalue_regex_pattern(const fvalue_regex_t *regex) +{ + return g_regex_get_pattern(regex->code); +} + /* * Editor modelines - https://www.wireshark.org/tools/modelines.html * diff --git a/epan/ftypes/ftypes.h b/epan/ftypes/ftypes.h index a43b655d84..9659bd0e4b 100644 --- a/epan/ftypes/ftypes.h +++ b/epan/ftypes/ftypes.h @@ -131,6 +131,9 @@ typedef enum ft_framenum_type ft_framenum_type_t; struct _ftype_t; typedef struct _ftype_t ftype_t; +struct _fvalue_regex_t; +typedef struct _fvalue_regex_t fvalue_regex_t; + /* String representation types. */ enum ftrepr { FTREPR_DISPLAY, @@ -368,7 +371,19 @@ gboolean fvalue_contains(const fvalue_t *a, const fvalue_t *b); gboolean -fvalue_matches(const fvalue_t *a, const GRegex *b); +fvalue_matches(const fvalue_t *a, const fvalue_regex_t *re); + +fvalue_regex_t * +fvalue_regex_compile(const char *patt, char **errmsg); + +gboolean +fvalue_regex_matches(const fvalue_regex_t *regex, const char *subj, gssize subj_size); + +void +fvalue_regex_free(fvalue_regex_t *regex); + +const char * +fvalue_regex_pattern(const fvalue_regex_t *regex); guint fvalue_length(fvalue_t *fv); diff --git a/test/suite_dfilter/group_syntax.py b/test/suite_dfilter/group_syntax.py index 42a7099e68..204851e7d0 100644 --- a/test/suite_dfilter/group_syntax.py +++ b/test/suite_dfilter/group_syntax.py @@ -42,3 +42,7 @@ class case_syntax(unittest.TestCase): def test_matches_3(self, checkDFilterFail): dfilter = 'http.request.method matches "^HEAD" matches "^POST"' checkDFilterFail(dfilter, '"matches" was unexpected in this context.') + + def test_matches_4(self, checkDFilterCount): + dfilter = r'http.host matches r"update\.microsoft\.c.."' + checkDFilterCount(dfilter, 1)