dfilter: Add a thin encapsulation layer for REs
This commit is contained in:
parent
2e048df011
commit
e8800ff3c4
|
@ -78,46 +78,25 @@ dfilter_new_function(dfwork_t *dfw, const char *name)
|
|||
return stnode_new(STTYPE_FUNCTION, def, name);
|
||||
}
|
||||
|
||||
/* Gets a GRegex from a string, and sets the error message on failure. */
|
||||
/* Gets a regex from a string, and sets the error message on failure. */
|
||||
stnode_t *
|
||||
dfilter_new_regex(dfwork_t *dfw, stnode_t *node)
|
||||
{
|
||||
GError *regex_error = NULL;
|
||||
GRegex *pcre;
|
||||
const char *patt;
|
||||
fvalue_regex_t *pcre;
|
||||
char *errmsg = NULL;
|
||||
|
||||
if (stnode_type_id(node) == STTYPE_STRING) {
|
||||
patt = stnode_data(node);
|
||||
}
|
||||
else {
|
||||
if (stnode_type_id(node) != STTYPE_STRING) {
|
||||
dfilter_parse_fail(dfw, "Expected a string not %s", stnode_todisplay(node));
|
||||
return node;
|
||||
}
|
||||
|
||||
patt = stnode_data(node);
|
||||
const char *patt = stnode_data(node);
|
||||
ws_debug("Compile regex pattern: %s", patt);
|
||||
|
||||
/*
|
||||
* As a string is not guaranteed to contain valid UTF-8,
|
||||
* we have to disable support for UTF-8 patterns and treat
|
||||
* every pattern and subject as raw bytes.
|
||||
*
|
||||
* Should support for UTF-8 patterns be necessary, then we
|
||||
* should compile a pattern without G_REGEX_RAW. Additionally,
|
||||
* we MUST use g_utf8_validate() before calling g_regex_match_full()
|
||||
* or risk crashes.
|
||||
*/
|
||||
GRegexCompileFlags cflags = G_REGEX_CASELESS | G_REGEX_OPTIMIZE | G_REGEX_RAW;
|
||||
|
||||
pcre = g_regex_new(
|
||||
patt, /* pattern */
|
||||
cflags, /* Compile options */
|
||||
0, /* Match options */
|
||||
®ex_error); /* Compile / study errors */
|
||||
|
||||
if (regex_error) {
|
||||
dfilter_parse_fail(dfw, "%s", regex_error->message);
|
||||
g_error_free(regex_error);
|
||||
pcre = fvalue_regex_compile(patt, &errmsg);
|
||||
if (errmsg) {
|
||||
dfilter_parse_fail(dfw, "%s", errmsg);
|
||||
g_free(errmsg);
|
||||
return node;
|
||||
}
|
||||
|
||||
|
|
|
@ -38,7 +38,7 @@ dfvm_value_free(dfvm_value_t *v)
|
|||
drange_free(v->value.drange);
|
||||
break;
|
||||
case PCRE:
|
||||
g_regex_unref(v->value.pcre);
|
||||
fvalue_regex_free(v->value.pcre);
|
||||
break;
|
||||
default:
|
||||
/* nothing */
|
||||
|
@ -110,9 +110,9 @@ dfvm_dump(FILE *f, dfilter_t *df)
|
|||
wmem_free(NULL, value_str);
|
||||
break;
|
||||
case PUT_PCRE:
|
||||
fprintf(f, "%05d PUT_PCRE\t%s -> reg#%u\n",
|
||||
fprintf(f, "%05d PUT_PCRE \t%s <GRegex> -> reg#%u\n",
|
||||
id,
|
||||
g_regex_get_pattern(arg1->value.pcre),
|
||||
fvalue_regex_pattern(arg1->value.pcre),
|
||||
arg2->value.numeric);
|
||||
break;
|
||||
case CHECK_EXISTS:
|
||||
|
@ -365,7 +365,7 @@ put_fvalue(dfilter_t *df, fvalue_t *fv, int reg)
|
|||
/* Put a constant PCRE in a register. These will not be cleared by
|
||||
* free_register_overhead. */
|
||||
static gboolean
|
||||
put_pcre(dfilter_t *df, GRegex *pcre, int reg)
|
||||
put_pcre(dfilter_t *df, fvalue_regex_t *pcre, int reg)
|
||||
{
|
||||
df->registers[reg] = g_list_append(NULL, pcre);
|
||||
df->owns_memory[reg] = FALSE;
|
||||
|
@ -404,7 +404,7 @@ any_matches(dfilter_t *df, int reg1, int reg2)
|
|||
while (list_a) {
|
||||
list_b = df->registers[reg2];
|
||||
while (list_b) {
|
||||
if (fvalue_matches((fvalue_t *)list_a->data, (GRegex *)list_b->data)) {
|
||||
if (fvalue_matches((fvalue_t *)list_a->data, list_b->data)) {
|
||||
return TRUE;
|
||||
}
|
||||
list_b = g_list_next(list_b);
|
||||
|
|
|
@ -36,7 +36,7 @@ typedef struct {
|
|||
drange_t *drange;
|
||||
header_field_info *hfinfo;
|
||||
df_func_def_t *funcdef;
|
||||
GRegex *pcre;
|
||||
fvalue_regex_t *pcre;
|
||||
} value;
|
||||
|
||||
} dfvm_value_t;
|
||||
|
|
|
@ -234,7 +234,7 @@ dfw_append_function(dfwork_t *dfw, stnode_t *node, dfvm_value_t **p_jmp)
|
|||
|
||||
/* returns register number */
|
||||
static int
|
||||
dfw_append_put_pcre(dfwork_t *dfw, GRegex *pcre)
|
||||
dfw_append_put_pcre(dfwork_t *dfw, fvalue_regex_t *pcre)
|
||||
{
|
||||
dfvm_insn_t *insn;
|
||||
dfvm_value_t *val1, *val2;
|
||||
|
@ -427,7 +427,7 @@ gen_entity(dfwork_t *dfw, stnode_t *st_arg, dfvm_value_t **p_jmp)
|
|||
reg = dfw_append_function(dfw, st_arg, p_jmp);
|
||||
}
|
||||
else if (e_type == STTYPE_PCRE) {
|
||||
reg = dfw_append_put_pcre(dfw, (GRegex *)stnode_steal_data(st_arg));
|
||||
reg = dfw_append_put_pcre(dfw, stnode_steal_data(st_arg));
|
||||
}
|
||||
else {
|
||||
/* printf("sttype_id is %u\n", (unsigned)e_type); */
|
||||
|
|
|
@ -27,7 +27,7 @@ fvalue_free(gpointer value)
|
|||
static void
|
||||
pcre_free(gpointer value)
|
||||
{
|
||||
GRegex *pcre = (GRegex*)value;
|
||||
fvalue_regex_t *pcre = value;
|
||||
|
||||
/* If the data was not claimed with stnode_steal_data(), free it. */
|
||||
if (pcre) {
|
||||
|
@ -36,7 +36,7 @@ pcre_free(gpointer value)
|
|||
* count; it'll get freed when the reference count drops
|
||||
* to 0.
|
||||
*/
|
||||
g_regex_unref(pcre);
|
||||
fvalue_regex_free(pcre);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -67,9 +67,9 @@ field_tostr(const void *data, gboolean pretty _U_)
|
|||
static char *
|
||||
pcre_tostr(const void *data, gboolean pretty _U_)
|
||||
{
|
||||
const GRegex *pcre = (const GRegex *)data;
|
||||
const fvalue_regex_t *pcre = data;
|
||||
|
||||
return g_strdup(g_regex_get_pattern(pcre));
|
||||
return g_strdup(fvalue_regex_pattern(pcre));
|
||||
}
|
||||
|
||||
void
|
||||
|
|
|
@ -586,20 +586,11 @@ cmp_contains(const fvalue_t *fv_a, const fvalue_t *fv_b)
|
|||
}
|
||||
|
||||
static gboolean
|
||||
cmp_matches(const fvalue_t *fv, const GRegex *regex)
|
||||
cmp_matches(const fvalue_t *fv, const fvalue_regex_t *regex)
|
||||
{
|
||||
GByteArray *a = fv->value.bytes;
|
||||
|
||||
return g_regex_match_full(
|
||||
regex, /* Compiled PCRE */
|
||||
(char *)a->data, /* The data to check for the pattern... */
|
||||
(int)a->len, /* ... and its length */
|
||||
0, /* Start offset within data */
|
||||
(GRegexMatchFlags)0, /* GRegexMatchFlags */
|
||||
NULL, /* We are not interested in the match information */
|
||||
NULL /* We don't want error information */
|
||||
);
|
||||
/* NOTE - DO NOT g_free(data) */
|
||||
return fvalue_regex_matches(regex, a->data, a->len);
|
||||
}
|
||||
|
||||
void
|
||||
|
|
|
@ -257,7 +257,7 @@ cmp_contains(const fvalue_t *fv_a, const fvalue_t *fv_b)
|
|||
}
|
||||
|
||||
static gboolean
|
||||
cmp_matches(const fvalue_t *fv, const GRegex *regex)
|
||||
cmp_matches(const fvalue_t *fv, const fvalue_regex_t *regex)
|
||||
{
|
||||
const protocol_value_t *a = (const protocol_value_t *)&fv->value.protocol;
|
||||
volatile gboolean rc = FALSE;
|
||||
|
@ -271,26 +271,9 @@ cmp_matches(const fvalue_t *fv, const GRegex *regex)
|
|||
if (a->tvb != NULL) {
|
||||
tvb_len = tvb_captured_length(a->tvb);
|
||||
data = (const char *)tvb_get_ptr(a->tvb, 0, tvb_len);
|
||||
rc = g_regex_match_full(
|
||||
regex, /* Compiled PCRE */
|
||||
data, /* The data to check for the pattern... */
|
||||
tvb_len, /* ... and its length */
|
||||
0, /* Start offset within data */
|
||||
(GRegexMatchFlags)0, /* GRegexMatchFlags */
|
||||
NULL, /* We are not interested in the match information */
|
||||
NULL /* We don't want error information */
|
||||
);
|
||||
/* NOTE - DO NOT g_free(data) */
|
||||
rc = fvalue_regex_matches(regex, data, tvb_len);
|
||||
} else {
|
||||
rc = g_regex_match_full(
|
||||
regex, /* Compiled PCRE */
|
||||
a->proto_string, /* The data to check for the pattern... */
|
||||
(int)strlen(a->proto_string), /* ... and its length */
|
||||
0, /* Start offset within data */
|
||||
(GRegexMatchFlags)0, /* GRegexMatchFlags */
|
||||
NULL, /* We are not interested in the match information */
|
||||
NULL /* We don't want error information */
|
||||
);
|
||||
rc = fvalue_regex_matches(regex, a->proto_string, -1);
|
||||
}
|
||||
}
|
||||
CATCH_ALL {
|
||||
|
|
|
@ -138,22 +138,14 @@ cmp_contains(const fvalue_t *fv_a, const fvalue_t *fv_b)
|
|||
}
|
||||
|
||||
static gboolean
|
||||
cmp_matches(const fvalue_t *fv, const GRegex *regex)
|
||||
cmp_matches(const fvalue_t *fv, const fvalue_regex_t *regex)
|
||||
{
|
||||
char *str = fv->value.string;
|
||||
|
||||
if (! regex) {
|
||||
return FALSE;
|
||||
}
|
||||
return g_regex_match_full(
|
||||
regex, /* Compiled PCRE */
|
||||
str, /* The data to check for the pattern... */
|
||||
(int)strlen(str), /* ... and its length */
|
||||
0, /* Start offset within data */
|
||||
(GRegexMatchFlags)0, /* GRegexMatchFlags */
|
||||
NULL, /* We are not interested in the match information */
|
||||
NULL /* We don't want error information */
|
||||
);
|
||||
return fvalue_regex_matches(regex, str, -1);
|
||||
}
|
||||
|
||||
void
|
||||
|
|
|
@ -64,7 +64,7 @@ typedef double (*FvalueGetFloatingFunc)(fvalue_t*);
|
|||
typedef int (*FvalueCmp)(const fvalue_t*, const fvalue_t*);
|
||||
typedef gboolean (*FvalueBitwiseAnd)(const fvalue_t*, const fvalue_t*);
|
||||
typedef gboolean (*FvalueContains)(const fvalue_t*, const fvalue_t*);
|
||||
typedef gboolean (*FvalueMatches)(const fvalue_t*, const GRegex*);
|
||||
typedef gboolean (*FvalueMatches)(const fvalue_t*, const fvalue_regex_t*);
|
||||
|
||||
typedef guint (*FvalueLen)(fvalue_t*);
|
||||
typedef void (*FvalueSlice)(fvalue_t*, GByteArray *, guint offset, guint length);
|
||||
|
|
|
@ -8,12 +8,14 @@
|
|||
|
||||
#include "config.h"
|
||||
|
||||
#include <ftypes-int.h>
|
||||
#include <glib.h>
|
||||
#include "ftypes-int.h"
|
||||
|
||||
#include "ftypes.h"
|
||||
#include <wsutil/ws_assert.h>
|
||||
|
||||
struct _fvalue_regex_t {
|
||||
GRegex *code;
|
||||
};
|
||||
|
||||
/* Keep track of ftype_t's via their ftenum number */
|
||||
static ftype_t* type_list[FT_NUM_TYPES];
|
||||
|
||||
|
@ -747,13 +749,64 @@ fvalue_contains(const fvalue_t *a, const fvalue_t *b)
|
|||
}
|
||||
|
||||
gboolean
|
||||
fvalue_matches(const fvalue_t *a, const GRegex *b)
|
||||
fvalue_matches(const fvalue_t *a, const fvalue_regex_t *b)
|
||||
{
|
||||
/* XXX - check compatibility of a and b */
|
||||
ws_assert(a->ftype->cmp_matches);
|
||||
return a->ftype->cmp_matches(a, b);
|
||||
}
|
||||
|
||||
fvalue_regex_t *
|
||||
fvalue_regex_compile(const char *patt, char **errmsg)
|
||||
{
|
||||
GError *regex_error = NULL;
|
||||
GRegex *pcre;
|
||||
|
||||
/*
|
||||
* As a string is not guaranteed to contain valid UTF-8,
|
||||
* we have to disable support for UTF-8 patterns and treat
|
||||
* every pattern and subject as raw bytes.
|
||||
*
|
||||
* Should support for UTF-8 patterns be necessary, then we
|
||||
* should compile a pattern without G_REGEX_RAW. Additionally,
|
||||
* we MUST use g_utf8_validate() before calling g_regex_match_full()
|
||||
* or risk crashes.
|
||||
*/
|
||||
GRegexCompileFlags cflags = G_REGEX_CASELESS | G_REGEX_OPTIMIZE | G_REGEX_RAW;
|
||||
|
||||
pcre = g_regex_new(patt, cflags, 0, ®ex_error);
|
||||
|
||||
if (regex_error) {
|
||||
*errmsg = g_strdup(regex_error->message);
|
||||
g_error_free(regex_error);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
struct _fvalue_regex_t *re = g_new(struct _fvalue_regex_t, 1);
|
||||
re->code = pcre;
|
||||
|
||||
return re;
|
||||
}
|
||||
|
||||
gboolean
|
||||
fvalue_regex_matches(const fvalue_regex_t *regex, const char *subj, gssize subj_size)
|
||||
{
|
||||
return g_regex_match_full(regex->code, subj, subj_size, 0, 0, NULL, NULL);
|
||||
}
|
||||
|
||||
void
|
||||
fvalue_regex_free(fvalue_regex_t *regex)
|
||||
{
|
||||
g_regex_unref(regex->code);
|
||||
g_free(regex);
|
||||
}
|
||||
|
||||
const char *
|
||||
fvalue_regex_pattern(const fvalue_regex_t *regex)
|
||||
{
|
||||
return g_regex_get_pattern(regex->code);
|
||||
}
|
||||
|
||||
/*
|
||||
* Editor modelines - https://www.wireshark.org/tools/modelines.html
|
||||
*
|
||||
|
|
|
@ -131,6 +131,9 @@ typedef enum ft_framenum_type ft_framenum_type_t;
|
|||
struct _ftype_t;
|
||||
typedef struct _ftype_t ftype_t;
|
||||
|
||||
struct _fvalue_regex_t;
|
||||
typedef struct _fvalue_regex_t fvalue_regex_t;
|
||||
|
||||
/* String representation types. */
|
||||
enum ftrepr {
|
||||
FTREPR_DISPLAY,
|
||||
|
@ -368,7 +371,19 @@ gboolean
|
|||
fvalue_contains(const fvalue_t *a, const fvalue_t *b);
|
||||
|
||||
gboolean
|
||||
fvalue_matches(const fvalue_t *a, const GRegex *b);
|
||||
fvalue_matches(const fvalue_t *a, const fvalue_regex_t *re);
|
||||
|
||||
fvalue_regex_t *
|
||||
fvalue_regex_compile(const char *patt, char **errmsg);
|
||||
|
||||
gboolean
|
||||
fvalue_regex_matches(const fvalue_regex_t *regex, const char *subj, gssize subj_size);
|
||||
|
||||
void
|
||||
fvalue_regex_free(fvalue_regex_t *regex);
|
||||
|
||||
const char *
|
||||
fvalue_regex_pattern(const fvalue_regex_t *regex);
|
||||
|
||||
guint
|
||||
fvalue_length(fvalue_t *fv);
|
||||
|
|
|
@ -42,3 +42,7 @@ class case_syntax(unittest.TestCase):
|
|||
def test_matches_3(self, checkDFilterFail):
|
||||
dfilter = 'http.request.method matches "^HEAD" matches "^POST"'
|
||||
checkDFilterFail(dfilter, '"matches" was unexpected in this context.')
|
||||
|
||||
def test_matches_4(self, checkDFilterCount):
|
||||
dfilter = r'http.host matches r"update\.microsoft\.c.."'
|
||||
checkDFilterCount(dfilter, 1)
|
||||
|
|
Loading…
Reference in New Issue