dfilter: Add a thin encapsulation layer for REs
This commit is contained in:
parent
2e048df011
commit
e8800ff3c4
|
@ -78,46 +78,25 @@ dfilter_new_function(dfwork_t *dfw, const char *name)
|
||||||
return stnode_new(STTYPE_FUNCTION, def, name);
|
return stnode_new(STTYPE_FUNCTION, def, name);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Gets a GRegex from a string, and sets the error message on failure. */
|
/* Gets a regex from a string, and sets the error message on failure. */
|
||||||
stnode_t *
|
stnode_t *
|
||||||
dfilter_new_regex(dfwork_t *dfw, stnode_t *node)
|
dfilter_new_regex(dfwork_t *dfw, stnode_t *node)
|
||||||
{
|
{
|
||||||
GError *regex_error = NULL;
|
fvalue_regex_t *pcre;
|
||||||
GRegex *pcre;
|
char *errmsg = NULL;
|
||||||
const char *patt;
|
|
||||||
|
|
||||||
if (stnode_type_id(node) == STTYPE_STRING) {
|
if (stnode_type_id(node) != STTYPE_STRING) {
|
||||||
patt = stnode_data(node);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
dfilter_parse_fail(dfw, "Expected a string not %s", stnode_todisplay(node));
|
dfilter_parse_fail(dfw, "Expected a string not %s", stnode_todisplay(node));
|
||||||
return node;
|
return node;
|
||||||
}
|
}
|
||||||
|
|
||||||
patt = stnode_data(node);
|
const char *patt = stnode_data(node);
|
||||||
ws_debug("Compile regex pattern: %s", patt);
|
ws_debug("Compile regex pattern: %s", patt);
|
||||||
|
|
||||||
/*
|
pcre = fvalue_regex_compile(patt, &errmsg);
|
||||||
* As a string is not guaranteed to contain valid UTF-8,
|
if (errmsg) {
|
||||||
* we have to disable support for UTF-8 patterns and treat
|
dfilter_parse_fail(dfw, "%s", errmsg);
|
||||||
* every pattern and subject as raw bytes.
|
g_free(errmsg);
|
||||||
*
|
|
||||||
* Should support for UTF-8 patterns be necessary, then we
|
|
||||||
* should compile a pattern without G_REGEX_RAW. Additionally,
|
|
||||||
* we MUST use g_utf8_validate() before calling g_regex_match_full()
|
|
||||||
* or risk crashes.
|
|
||||||
*/
|
|
||||||
GRegexCompileFlags cflags = G_REGEX_CASELESS | G_REGEX_OPTIMIZE | G_REGEX_RAW;
|
|
||||||
|
|
||||||
pcre = g_regex_new(
|
|
||||||
patt, /* pattern */
|
|
||||||
cflags, /* Compile options */
|
|
||||||
0, /* Match options */
|
|
||||||
®ex_error); /* Compile / study errors */
|
|
||||||
|
|
||||||
if (regex_error) {
|
|
||||||
dfilter_parse_fail(dfw, "%s", regex_error->message);
|
|
||||||
g_error_free(regex_error);
|
|
||||||
return node;
|
return node;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -38,7 +38,7 @@ dfvm_value_free(dfvm_value_t *v)
|
||||||
drange_free(v->value.drange);
|
drange_free(v->value.drange);
|
||||||
break;
|
break;
|
||||||
case PCRE:
|
case PCRE:
|
||||||
g_regex_unref(v->value.pcre);
|
fvalue_regex_free(v->value.pcre);
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
/* nothing */
|
/* nothing */
|
||||||
|
@ -110,9 +110,9 @@ dfvm_dump(FILE *f, dfilter_t *df)
|
||||||
wmem_free(NULL, value_str);
|
wmem_free(NULL, value_str);
|
||||||
break;
|
break;
|
||||||
case PUT_PCRE:
|
case PUT_PCRE:
|
||||||
fprintf(f, "%05d PUT_PCRE\t%s -> reg#%u\n",
|
fprintf(f, "%05d PUT_PCRE \t%s <GRegex> -> reg#%u\n",
|
||||||
id,
|
id,
|
||||||
g_regex_get_pattern(arg1->value.pcre),
|
fvalue_regex_pattern(arg1->value.pcre),
|
||||||
arg2->value.numeric);
|
arg2->value.numeric);
|
||||||
break;
|
break;
|
||||||
case CHECK_EXISTS:
|
case CHECK_EXISTS:
|
||||||
|
@ -365,7 +365,7 @@ put_fvalue(dfilter_t *df, fvalue_t *fv, int reg)
|
||||||
/* Put a constant PCRE in a register. These will not be cleared by
|
/* Put a constant PCRE in a register. These will not be cleared by
|
||||||
* free_register_overhead. */
|
* free_register_overhead. */
|
||||||
static gboolean
|
static gboolean
|
||||||
put_pcre(dfilter_t *df, GRegex *pcre, int reg)
|
put_pcre(dfilter_t *df, fvalue_regex_t *pcre, int reg)
|
||||||
{
|
{
|
||||||
df->registers[reg] = g_list_append(NULL, pcre);
|
df->registers[reg] = g_list_append(NULL, pcre);
|
||||||
df->owns_memory[reg] = FALSE;
|
df->owns_memory[reg] = FALSE;
|
||||||
|
@ -404,7 +404,7 @@ any_matches(dfilter_t *df, int reg1, int reg2)
|
||||||
while (list_a) {
|
while (list_a) {
|
||||||
list_b = df->registers[reg2];
|
list_b = df->registers[reg2];
|
||||||
while (list_b) {
|
while (list_b) {
|
||||||
if (fvalue_matches((fvalue_t *)list_a->data, (GRegex *)list_b->data)) {
|
if (fvalue_matches((fvalue_t *)list_a->data, list_b->data)) {
|
||||||
return TRUE;
|
return TRUE;
|
||||||
}
|
}
|
||||||
list_b = g_list_next(list_b);
|
list_b = g_list_next(list_b);
|
||||||
|
|
|
@ -36,7 +36,7 @@ typedef struct {
|
||||||
drange_t *drange;
|
drange_t *drange;
|
||||||
header_field_info *hfinfo;
|
header_field_info *hfinfo;
|
||||||
df_func_def_t *funcdef;
|
df_func_def_t *funcdef;
|
||||||
GRegex *pcre;
|
fvalue_regex_t *pcre;
|
||||||
} value;
|
} value;
|
||||||
|
|
||||||
} dfvm_value_t;
|
} dfvm_value_t;
|
||||||
|
|
|
@ -234,7 +234,7 @@ dfw_append_function(dfwork_t *dfw, stnode_t *node, dfvm_value_t **p_jmp)
|
||||||
|
|
||||||
/* returns register number */
|
/* returns register number */
|
||||||
static int
|
static int
|
||||||
dfw_append_put_pcre(dfwork_t *dfw, GRegex *pcre)
|
dfw_append_put_pcre(dfwork_t *dfw, fvalue_regex_t *pcre)
|
||||||
{
|
{
|
||||||
dfvm_insn_t *insn;
|
dfvm_insn_t *insn;
|
||||||
dfvm_value_t *val1, *val2;
|
dfvm_value_t *val1, *val2;
|
||||||
|
@ -427,7 +427,7 @@ gen_entity(dfwork_t *dfw, stnode_t *st_arg, dfvm_value_t **p_jmp)
|
||||||
reg = dfw_append_function(dfw, st_arg, p_jmp);
|
reg = dfw_append_function(dfw, st_arg, p_jmp);
|
||||||
}
|
}
|
||||||
else if (e_type == STTYPE_PCRE) {
|
else if (e_type == STTYPE_PCRE) {
|
||||||
reg = dfw_append_put_pcre(dfw, (GRegex *)stnode_steal_data(st_arg));
|
reg = dfw_append_put_pcre(dfw, stnode_steal_data(st_arg));
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
/* printf("sttype_id is %u\n", (unsigned)e_type); */
|
/* printf("sttype_id is %u\n", (unsigned)e_type); */
|
||||||
|
|
|
@ -27,7 +27,7 @@ fvalue_free(gpointer value)
|
||||||
static void
|
static void
|
||||||
pcre_free(gpointer value)
|
pcre_free(gpointer value)
|
||||||
{
|
{
|
||||||
GRegex *pcre = (GRegex*)value;
|
fvalue_regex_t *pcre = value;
|
||||||
|
|
||||||
/* If the data was not claimed with stnode_steal_data(), free it. */
|
/* If the data was not claimed with stnode_steal_data(), free it. */
|
||||||
if (pcre) {
|
if (pcre) {
|
||||||
|
@ -36,7 +36,7 @@ pcre_free(gpointer value)
|
||||||
* count; it'll get freed when the reference count drops
|
* count; it'll get freed when the reference count drops
|
||||||
* to 0.
|
* to 0.
|
||||||
*/
|
*/
|
||||||
g_regex_unref(pcre);
|
fvalue_regex_free(pcre);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -67,9 +67,9 @@ field_tostr(const void *data, gboolean pretty _U_)
|
||||||
static char *
|
static char *
|
||||||
pcre_tostr(const void *data, gboolean pretty _U_)
|
pcre_tostr(const void *data, gboolean pretty _U_)
|
||||||
{
|
{
|
||||||
const GRegex *pcre = (const GRegex *)data;
|
const fvalue_regex_t *pcre = data;
|
||||||
|
|
||||||
return g_strdup(g_regex_get_pattern(pcre));
|
return g_strdup(fvalue_regex_pattern(pcre));
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
|
|
|
@ -586,20 +586,11 @@ cmp_contains(const fvalue_t *fv_a, const fvalue_t *fv_b)
|
||||||
}
|
}
|
||||||
|
|
||||||
static gboolean
|
static gboolean
|
||||||
cmp_matches(const fvalue_t *fv, const GRegex *regex)
|
cmp_matches(const fvalue_t *fv, const fvalue_regex_t *regex)
|
||||||
{
|
{
|
||||||
GByteArray *a = fv->value.bytes;
|
GByteArray *a = fv->value.bytes;
|
||||||
|
|
||||||
return g_regex_match_full(
|
return fvalue_regex_matches(regex, a->data, a->len);
|
||||||
regex, /* Compiled PCRE */
|
|
||||||
(char *)a->data, /* The data to check for the pattern... */
|
|
||||||
(int)a->len, /* ... and its length */
|
|
||||||
0, /* Start offset within data */
|
|
||||||
(GRegexMatchFlags)0, /* GRegexMatchFlags */
|
|
||||||
NULL, /* We are not interested in the match information */
|
|
||||||
NULL /* We don't want error information */
|
|
||||||
);
|
|
||||||
/* NOTE - DO NOT g_free(data) */
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
|
|
|
@ -257,7 +257,7 @@ cmp_contains(const fvalue_t *fv_a, const fvalue_t *fv_b)
|
||||||
}
|
}
|
||||||
|
|
||||||
static gboolean
|
static gboolean
|
||||||
cmp_matches(const fvalue_t *fv, const GRegex *regex)
|
cmp_matches(const fvalue_t *fv, const fvalue_regex_t *regex)
|
||||||
{
|
{
|
||||||
const protocol_value_t *a = (const protocol_value_t *)&fv->value.protocol;
|
const protocol_value_t *a = (const protocol_value_t *)&fv->value.protocol;
|
||||||
volatile gboolean rc = FALSE;
|
volatile gboolean rc = FALSE;
|
||||||
|
@ -271,26 +271,9 @@ cmp_matches(const fvalue_t *fv, const GRegex *regex)
|
||||||
if (a->tvb != NULL) {
|
if (a->tvb != NULL) {
|
||||||
tvb_len = tvb_captured_length(a->tvb);
|
tvb_len = tvb_captured_length(a->tvb);
|
||||||
data = (const char *)tvb_get_ptr(a->tvb, 0, tvb_len);
|
data = (const char *)tvb_get_ptr(a->tvb, 0, tvb_len);
|
||||||
rc = g_regex_match_full(
|
rc = fvalue_regex_matches(regex, data, tvb_len);
|
||||||
regex, /* Compiled PCRE */
|
|
||||||
data, /* The data to check for the pattern... */
|
|
||||||
tvb_len, /* ... and its length */
|
|
||||||
0, /* Start offset within data */
|
|
||||||
(GRegexMatchFlags)0, /* GRegexMatchFlags */
|
|
||||||
NULL, /* We are not interested in the match information */
|
|
||||||
NULL /* We don't want error information */
|
|
||||||
);
|
|
||||||
/* NOTE - DO NOT g_free(data) */
|
|
||||||
} else {
|
} else {
|
||||||
rc = g_regex_match_full(
|
rc = fvalue_regex_matches(regex, a->proto_string, -1);
|
||||||
regex, /* Compiled PCRE */
|
|
||||||
a->proto_string, /* The data to check for the pattern... */
|
|
||||||
(int)strlen(a->proto_string), /* ... and its length */
|
|
||||||
0, /* Start offset within data */
|
|
||||||
(GRegexMatchFlags)0, /* GRegexMatchFlags */
|
|
||||||
NULL, /* We are not interested in the match information */
|
|
||||||
NULL /* We don't want error information */
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
CATCH_ALL {
|
CATCH_ALL {
|
||||||
|
|
|
@ -138,22 +138,14 @@ cmp_contains(const fvalue_t *fv_a, const fvalue_t *fv_b)
|
||||||
}
|
}
|
||||||
|
|
||||||
static gboolean
|
static gboolean
|
||||||
cmp_matches(const fvalue_t *fv, const GRegex *regex)
|
cmp_matches(const fvalue_t *fv, const fvalue_regex_t *regex)
|
||||||
{
|
{
|
||||||
char *str = fv->value.string;
|
char *str = fv->value.string;
|
||||||
|
|
||||||
if (! regex) {
|
if (! regex) {
|
||||||
return FALSE;
|
return FALSE;
|
||||||
}
|
}
|
||||||
return g_regex_match_full(
|
return fvalue_regex_matches(regex, str, -1);
|
||||||
regex, /* Compiled PCRE */
|
|
||||||
str, /* The data to check for the pattern... */
|
|
||||||
(int)strlen(str), /* ... and its length */
|
|
||||||
0, /* Start offset within data */
|
|
||||||
(GRegexMatchFlags)0, /* GRegexMatchFlags */
|
|
||||||
NULL, /* We are not interested in the match information */
|
|
||||||
NULL /* We don't want error information */
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
|
|
|
@ -64,7 +64,7 @@ typedef double (*FvalueGetFloatingFunc)(fvalue_t*);
|
||||||
typedef int (*FvalueCmp)(const fvalue_t*, const fvalue_t*);
|
typedef int (*FvalueCmp)(const fvalue_t*, const fvalue_t*);
|
||||||
typedef gboolean (*FvalueBitwiseAnd)(const fvalue_t*, const fvalue_t*);
|
typedef gboolean (*FvalueBitwiseAnd)(const fvalue_t*, const fvalue_t*);
|
||||||
typedef gboolean (*FvalueContains)(const fvalue_t*, const fvalue_t*);
|
typedef gboolean (*FvalueContains)(const fvalue_t*, const fvalue_t*);
|
||||||
typedef gboolean (*FvalueMatches)(const fvalue_t*, const GRegex*);
|
typedef gboolean (*FvalueMatches)(const fvalue_t*, const fvalue_regex_t*);
|
||||||
|
|
||||||
typedef guint (*FvalueLen)(fvalue_t*);
|
typedef guint (*FvalueLen)(fvalue_t*);
|
||||||
typedef void (*FvalueSlice)(fvalue_t*, GByteArray *, guint offset, guint length);
|
typedef void (*FvalueSlice)(fvalue_t*, GByteArray *, guint offset, guint length);
|
||||||
|
|
|
@ -8,12 +8,14 @@
|
||||||
|
|
||||||
#include "config.h"
|
#include "config.h"
|
||||||
|
|
||||||
#include <ftypes-int.h>
|
#include "ftypes-int.h"
|
||||||
#include <glib.h>
|
|
||||||
|
|
||||||
#include "ftypes.h"
|
|
||||||
#include <wsutil/ws_assert.h>
|
#include <wsutil/ws_assert.h>
|
||||||
|
|
||||||
|
struct _fvalue_regex_t {
|
||||||
|
GRegex *code;
|
||||||
|
};
|
||||||
|
|
||||||
/* Keep track of ftype_t's via their ftenum number */
|
/* Keep track of ftype_t's via their ftenum number */
|
||||||
static ftype_t* type_list[FT_NUM_TYPES];
|
static ftype_t* type_list[FT_NUM_TYPES];
|
||||||
|
|
||||||
|
@ -747,13 +749,64 @@ fvalue_contains(const fvalue_t *a, const fvalue_t *b)
|
||||||
}
|
}
|
||||||
|
|
||||||
gboolean
|
gboolean
|
||||||
fvalue_matches(const fvalue_t *a, const GRegex *b)
|
fvalue_matches(const fvalue_t *a, const fvalue_regex_t *b)
|
||||||
{
|
{
|
||||||
/* XXX - check compatibility of a and b */
|
/* XXX - check compatibility of a and b */
|
||||||
ws_assert(a->ftype->cmp_matches);
|
ws_assert(a->ftype->cmp_matches);
|
||||||
return a->ftype->cmp_matches(a, b);
|
return a->ftype->cmp_matches(a, b);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fvalue_regex_t *
|
||||||
|
fvalue_regex_compile(const char *patt, char **errmsg)
|
||||||
|
{
|
||||||
|
GError *regex_error = NULL;
|
||||||
|
GRegex *pcre;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* As a string is not guaranteed to contain valid UTF-8,
|
||||||
|
* we have to disable support for UTF-8 patterns and treat
|
||||||
|
* every pattern and subject as raw bytes.
|
||||||
|
*
|
||||||
|
* Should support for UTF-8 patterns be necessary, then we
|
||||||
|
* should compile a pattern without G_REGEX_RAW. Additionally,
|
||||||
|
* we MUST use g_utf8_validate() before calling g_regex_match_full()
|
||||||
|
* or risk crashes.
|
||||||
|
*/
|
||||||
|
GRegexCompileFlags cflags = G_REGEX_CASELESS | G_REGEX_OPTIMIZE | G_REGEX_RAW;
|
||||||
|
|
||||||
|
pcre = g_regex_new(patt, cflags, 0, ®ex_error);
|
||||||
|
|
||||||
|
if (regex_error) {
|
||||||
|
*errmsg = g_strdup(regex_error->message);
|
||||||
|
g_error_free(regex_error);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct _fvalue_regex_t *re = g_new(struct _fvalue_regex_t, 1);
|
||||||
|
re->code = pcre;
|
||||||
|
|
||||||
|
return re;
|
||||||
|
}
|
||||||
|
|
||||||
|
gboolean
|
||||||
|
fvalue_regex_matches(const fvalue_regex_t *regex, const char *subj, gssize subj_size)
|
||||||
|
{
|
||||||
|
return g_regex_match_full(regex->code, subj, subj_size, 0, 0, NULL, NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
fvalue_regex_free(fvalue_regex_t *regex)
|
||||||
|
{
|
||||||
|
g_regex_unref(regex->code);
|
||||||
|
g_free(regex);
|
||||||
|
}
|
||||||
|
|
||||||
|
const char *
|
||||||
|
fvalue_regex_pattern(const fvalue_regex_t *regex)
|
||||||
|
{
|
||||||
|
return g_regex_get_pattern(regex->code);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Editor modelines - https://www.wireshark.org/tools/modelines.html
|
* Editor modelines - https://www.wireshark.org/tools/modelines.html
|
||||||
*
|
*
|
||||||
|
|
|
@ -131,6 +131,9 @@ typedef enum ft_framenum_type ft_framenum_type_t;
|
||||||
struct _ftype_t;
|
struct _ftype_t;
|
||||||
typedef struct _ftype_t ftype_t;
|
typedef struct _ftype_t ftype_t;
|
||||||
|
|
||||||
|
struct _fvalue_regex_t;
|
||||||
|
typedef struct _fvalue_regex_t fvalue_regex_t;
|
||||||
|
|
||||||
/* String representation types. */
|
/* String representation types. */
|
||||||
enum ftrepr {
|
enum ftrepr {
|
||||||
FTREPR_DISPLAY,
|
FTREPR_DISPLAY,
|
||||||
|
@ -368,7 +371,19 @@ gboolean
|
||||||
fvalue_contains(const fvalue_t *a, const fvalue_t *b);
|
fvalue_contains(const fvalue_t *a, const fvalue_t *b);
|
||||||
|
|
||||||
gboolean
|
gboolean
|
||||||
fvalue_matches(const fvalue_t *a, const GRegex *b);
|
fvalue_matches(const fvalue_t *a, const fvalue_regex_t *re);
|
||||||
|
|
||||||
|
fvalue_regex_t *
|
||||||
|
fvalue_regex_compile(const char *patt, char **errmsg);
|
||||||
|
|
||||||
|
gboolean
|
||||||
|
fvalue_regex_matches(const fvalue_regex_t *regex, const char *subj, gssize subj_size);
|
||||||
|
|
||||||
|
void
|
||||||
|
fvalue_regex_free(fvalue_regex_t *regex);
|
||||||
|
|
||||||
|
const char *
|
||||||
|
fvalue_regex_pattern(const fvalue_regex_t *regex);
|
||||||
|
|
||||||
guint
|
guint
|
||||||
fvalue_length(fvalue_t *fv);
|
fvalue_length(fvalue_t *fv);
|
||||||
|
|
|
@ -42,3 +42,7 @@ class case_syntax(unittest.TestCase):
|
||||||
def test_matches_3(self, checkDFilterFail):
|
def test_matches_3(self, checkDFilterFail):
|
||||||
dfilter = 'http.request.method matches "^HEAD" matches "^POST"'
|
dfilter = 'http.request.method matches "^HEAD" matches "^POST"'
|
||||||
checkDFilterFail(dfilter, '"matches" was unexpected in this context.')
|
checkDFilterFail(dfilter, '"matches" was unexpected in this context.')
|
||||||
|
|
||||||
|
def test_matches_4(self, checkDFilterCount):
|
||||||
|
dfilter = r'http.host matches r"update\.microsoft\.c.."'
|
||||||
|
checkDFilterCount(dfilter, 1)
|
||||||
|
|
Loading…
Reference in New Issue