dfilter: Add a thin encapsulation layer for REs

This commit is contained in:
João Valverde 2021-10-15 23:47:28 +01:00 committed by Wireshark GitLab Utility
parent 2e048df011
commit e8800ff3c4
12 changed files with 106 additions and 89 deletions

View File

@ -78,46 +78,25 @@ dfilter_new_function(dfwork_t *dfw, const char *name)
return stnode_new(STTYPE_FUNCTION, def, name);
}
/* Gets a GRegex from a string, and sets the error message on failure. */
/* Gets a regex from a string, and sets the error message on failure. */
stnode_t *
dfilter_new_regex(dfwork_t *dfw, stnode_t *node)
{
GError *regex_error = NULL;
GRegex *pcre;
const char *patt;
fvalue_regex_t *pcre;
char *errmsg = NULL;
if (stnode_type_id(node) == STTYPE_STRING) {
patt = stnode_data(node);
}
else {
if (stnode_type_id(node) != STTYPE_STRING) {
dfilter_parse_fail(dfw, "Expected a string not %s", stnode_todisplay(node));
return node;
}
patt = stnode_data(node);
const char *patt = stnode_data(node);
ws_debug("Compile regex pattern: %s", patt);
/*
* As a string is not guaranteed to contain valid UTF-8,
* we have to disable support for UTF-8 patterns and treat
* every pattern and subject as raw bytes.
*
* Should support for UTF-8 patterns be necessary, then we
* should compile a pattern without G_REGEX_RAW. Additionally,
* we MUST use g_utf8_validate() before calling g_regex_match_full()
* or risk crashes.
*/
GRegexCompileFlags cflags = G_REGEX_CASELESS | G_REGEX_OPTIMIZE | G_REGEX_RAW;
pcre = g_regex_new(
patt, /* pattern */
cflags, /* Compile options */
0, /* Match options */
&regex_error); /* Compile / study errors */
if (regex_error) {
dfilter_parse_fail(dfw, "%s", regex_error->message);
g_error_free(regex_error);
pcre = fvalue_regex_compile(patt, &errmsg);
if (errmsg) {
dfilter_parse_fail(dfw, "%s", errmsg);
g_free(errmsg);
return node;
}

View File

@ -38,7 +38,7 @@ dfvm_value_free(dfvm_value_t *v)
drange_free(v->value.drange);
break;
case PCRE:
g_regex_unref(v->value.pcre);
fvalue_regex_free(v->value.pcre);
break;
default:
/* nothing */
@ -110,9 +110,9 @@ dfvm_dump(FILE *f, dfilter_t *df)
wmem_free(NULL, value_str);
break;
case PUT_PCRE:
fprintf(f, "%05d PUT_PCRE\t%s -> reg#%u\n",
fprintf(f, "%05d PUT_PCRE \t%s <GRegex> -> reg#%u\n",
id,
g_regex_get_pattern(arg1->value.pcre),
fvalue_regex_pattern(arg1->value.pcre),
arg2->value.numeric);
break;
case CHECK_EXISTS:
@ -365,7 +365,7 @@ put_fvalue(dfilter_t *df, fvalue_t *fv, int reg)
/* Put a constant PCRE in a register. These will not be cleared by
* free_register_overhead. */
static gboolean
put_pcre(dfilter_t *df, GRegex *pcre, int reg)
put_pcre(dfilter_t *df, fvalue_regex_t *pcre, int reg)
{
df->registers[reg] = g_list_append(NULL, pcre);
df->owns_memory[reg] = FALSE;
@ -404,7 +404,7 @@ any_matches(dfilter_t *df, int reg1, int reg2)
while (list_a) {
list_b = df->registers[reg2];
while (list_b) {
if (fvalue_matches((fvalue_t *)list_a->data, (GRegex *)list_b->data)) {
if (fvalue_matches((fvalue_t *)list_a->data, list_b->data)) {
return TRUE;
}
list_b = g_list_next(list_b);

View File

@ -36,7 +36,7 @@ typedef struct {
drange_t *drange;
header_field_info *hfinfo;
df_func_def_t *funcdef;
GRegex *pcre;
fvalue_regex_t *pcre;
} value;
} dfvm_value_t;

View File

@ -234,7 +234,7 @@ dfw_append_function(dfwork_t *dfw, stnode_t *node, dfvm_value_t **p_jmp)
/* returns register number */
static int
dfw_append_put_pcre(dfwork_t *dfw, GRegex *pcre)
dfw_append_put_pcre(dfwork_t *dfw, fvalue_regex_t *pcre)
{
dfvm_insn_t *insn;
dfvm_value_t *val1, *val2;
@ -427,7 +427,7 @@ gen_entity(dfwork_t *dfw, stnode_t *st_arg, dfvm_value_t **p_jmp)
reg = dfw_append_function(dfw, st_arg, p_jmp);
}
else if (e_type == STTYPE_PCRE) {
reg = dfw_append_put_pcre(dfw, (GRegex *)stnode_steal_data(st_arg));
reg = dfw_append_put_pcre(dfw, stnode_steal_data(st_arg));
}
else {
/* printf("sttype_id is %u\n", (unsigned)e_type); */

View File

@ -27,7 +27,7 @@ fvalue_free(gpointer value)
static void
pcre_free(gpointer value)
{
GRegex *pcre = (GRegex*)value;
fvalue_regex_t *pcre = value;
/* If the data was not claimed with stnode_steal_data(), free it. */
if (pcre) {
@ -36,7 +36,7 @@ pcre_free(gpointer value)
* count; it'll get freed when the reference count drops
* to 0.
*/
g_regex_unref(pcre);
fvalue_regex_free(pcre);
}
}
@ -67,9 +67,9 @@ field_tostr(const void *data, gboolean pretty _U_)
static char *
pcre_tostr(const void *data, gboolean pretty _U_)
{
const GRegex *pcre = (const GRegex *)data;
const fvalue_regex_t *pcre = data;
return g_strdup(g_regex_get_pattern(pcre));
return g_strdup(fvalue_regex_pattern(pcre));
}
void

View File

@ -586,20 +586,11 @@ cmp_contains(const fvalue_t *fv_a, const fvalue_t *fv_b)
}
static gboolean
cmp_matches(const fvalue_t *fv, const GRegex *regex)
cmp_matches(const fvalue_t *fv, const fvalue_regex_t *regex)
{
GByteArray *a = fv->value.bytes;
return g_regex_match_full(
regex, /* Compiled PCRE */
(char *)a->data, /* The data to check for the pattern... */
(int)a->len, /* ... and its length */
0, /* Start offset within data */
(GRegexMatchFlags)0, /* GRegexMatchFlags */
NULL, /* We are not interested in the match information */
NULL /* We don't want error information */
);
/* NOTE - DO NOT g_free(data) */
return fvalue_regex_matches(regex, a->data, a->len);
}
void

View File

@ -257,7 +257,7 @@ cmp_contains(const fvalue_t *fv_a, const fvalue_t *fv_b)
}
static gboolean
cmp_matches(const fvalue_t *fv, const GRegex *regex)
cmp_matches(const fvalue_t *fv, const fvalue_regex_t *regex)
{
const protocol_value_t *a = (const protocol_value_t *)&fv->value.protocol;
volatile gboolean rc = FALSE;
@ -271,26 +271,9 @@ cmp_matches(const fvalue_t *fv, const GRegex *regex)
if (a->tvb != NULL) {
tvb_len = tvb_captured_length(a->tvb);
data = (const char *)tvb_get_ptr(a->tvb, 0, tvb_len);
rc = g_regex_match_full(
regex, /* Compiled PCRE */
data, /* The data to check for the pattern... */
tvb_len, /* ... and its length */
0, /* Start offset within data */
(GRegexMatchFlags)0, /* GRegexMatchFlags */
NULL, /* We are not interested in the match information */
NULL /* We don't want error information */
);
/* NOTE - DO NOT g_free(data) */
rc = fvalue_regex_matches(regex, data, tvb_len);
} else {
rc = g_regex_match_full(
regex, /* Compiled PCRE */
a->proto_string, /* The data to check for the pattern... */
(int)strlen(a->proto_string), /* ... and its length */
0, /* Start offset within data */
(GRegexMatchFlags)0, /* GRegexMatchFlags */
NULL, /* We are not interested in the match information */
NULL /* We don't want error information */
);
rc = fvalue_regex_matches(regex, a->proto_string, -1);
}
}
CATCH_ALL {

View File

@ -138,22 +138,14 @@ cmp_contains(const fvalue_t *fv_a, const fvalue_t *fv_b)
}
static gboolean
cmp_matches(const fvalue_t *fv, const GRegex *regex)
cmp_matches(const fvalue_t *fv, const fvalue_regex_t *regex)
{
char *str = fv->value.string;
if (! regex) {
return FALSE;
}
return g_regex_match_full(
regex, /* Compiled PCRE */
str, /* The data to check for the pattern... */
(int)strlen(str), /* ... and its length */
0, /* Start offset within data */
(GRegexMatchFlags)0, /* GRegexMatchFlags */
NULL, /* We are not interested in the match information */
NULL /* We don't want error information */
);
return fvalue_regex_matches(regex, str, -1);
}
void

View File

@ -64,7 +64,7 @@ typedef double (*FvalueGetFloatingFunc)(fvalue_t*);
typedef int (*FvalueCmp)(const fvalue_t*, const fvalue_t*);
typedef gboolean (*FvalueBitwiseAnd)(const fvalue_t*, const fvalue_t*);
typedef gboolean (*FvalueContains)(const fvalue_t*, const fvalue_t*);
typedef gboolean (*FvalueMatches)(const fvalue_t*, const GRegex*);
typedef gboolean (*FvalueMatches)(const fvalue_t*, const fvalue_regex_t*);
typedef guint (*FvalueLen)(fvalue_t*);
typedef void (*FvalueSlice)(fvalue_t*, GByteArray *, guint offset, guint length);

View File

@ -8,12 +8,14 @@
#include "config.h"
#include <ftypes-int.h>
#include <glib.h>
#include "ftypes-int.h"
#include "ftypes.h"
#include <wsutil/ws_assert.h>
struct _fvalue_regex_t {
GRegex *code;
};
/* Keep track of ftype_t's via their ftenum number */
static ftype_t* type_list[FT_NUM_TYPES];
@ -747,13 +749,64 @@ fvalue_contains(const fvalue_t *a, const fvalue_t *b)
}
gboolean
fvalue_matches(const fvalue_t *a, const GRegex *b)
fvalue_matches(const fvalue_t *a, const fvalue_regex_t *b)
{
/* XXX - check compatibility of a and b */
ws_assert(a->ftype->cmp_matches);
return a->ftype->cmp_matches(a, b);
}
fvalue_regex_t *
fvalue_regex_compile(const char *patt, char **errmsg)
{
GError *regex_error = NULL;
GRegex *pcre;
/*
* As a string is not guaranteed to contain valid UTF-8,
* we have to disable support for UTF-8 patterns and treat
* every pattern and subject as raw bytes.
*
* Should support for UTF-8 patterns be necessary, then we
* should compile a pattern without G_REGEX_RAW. Additionally,
* we MUST use g_utf8_validate() before calling g_regex_match_full()
* or risk crashes.
*/
GRegexCompileFlags cflags = G_REGEX_CASELESS | G_REGEX_OPTIMIZE | G_REGEX_RAW;
pcre = g_regex_new(patt, cflags, 0, &regex_error);
if (regex_error) {
*errmsg = g_strdup(regex_error->message);
g_error_free(regex_error);
return NULL;
}
struct _fvalue_regex_t *re = g_new(struct _fvalue_regex_t, 1);
re->code = pcre;
return re;
}
gboolean
fvalue_regex_matches(const fvalue_regex_t *regex, const char *subj, gssize subj_size)
{
return g_regex_match_full(regex->code, subj, subj_size, 0, 0, NULL, NULL);
}
void
fvalue_regex_free(fvalue_regex_t *regex)
{
g_regex_unref(regex->code);
g_free(regex);
}
const char *
fvalue_regex_pattern(const fvalue_regex_t *regex)
{
return g_regex_get_pattern(regex->code);
}
/*
* Editor modelines - https://www.wireshark.org/tools/modelines.html
*

View File

@ -131,6 +131,9 @@ typedef enum ft_framenum_type ft_framenum_type_t;
struct _ftype_t;
typedef struct _ftype_t ftype_t;
struct _fvalue_regex_t;
typedef struct _fvalue_regex_t fvalue_regex_t;
/* String representation types. */
enum ftrepr {
FTREPR_DISPLAY,
@ -368,7 +371,19 @@ gboolean
fvalue_contains(const fvalue_t *a, const fvalue_t *b);
gboolean
fvalue_matches(const fvalue_t *a, const GRegex *b);
fvalue_matches(const fvalue_t *a, const fvalue_regex_t *re);
fvalue_regex_t *
fvalue_regex_compile(const char *patt, char **errmsg);
gboolean
fvalue_regex_matches(const fvalue_regex_t *regex, const char *subj, gssize subj_size);
void
fvalue_regex_free(fvalue_regex_t *regex);
const char *
fvalue_regex_pattern(const fvalue_regex_t *regex);
guint
fvalue_length(fvalue_t *fv);

View File

@ -42,3 +42,7 @@ class case_syntax(unittest.TestCase):
def test_matches_3(self, checkDFilterFail):
dfilter = 'http.request.method matches "^HEAD" matches "^POST"'
checkDFilterFail(dfilter, '"matches" was unexpected in this context.')
def test_matches_4(self, checkDFilterCount):
dfilter = r'http.host matches r"update\.microsoft\.c.."'
checkDFilterCount(dfilter, 1)