dfilter: Add support for universal quantifiers

Adds the keywords "any" and "all" to implement the quantification
to any existing relational operator.

Filter: all tcp.port in {100, 2000..3000}

Syntax tree:
 0 ALL TEST_IN:
   1 FIELD(tcp.port)
   1 SET(#2):
     2 FVALUE(100 <FT_UINT16>)
     2 FVALUE(2000 <FT_UINT16>) .. FVALUE(3000 <FT_UINT16>)

Instructions:
00000 READ_TREE		tcp.port -> reg#0
00001 IF_FALSE_GOTO	5
00002 ALL_EQ		reg#0 === 100 <FT_UINT16>
00003 IF_TRUE_GOTO	5
00004 ALL_IN_RANGE	reg#0 in { 2000 <FT_UINT16> .. 3000 <FT_UINT16> }
00005 RETURN
This commit is contained in:
João Valverde 2022-04-20 00:04:05 +01:00
parent 164f3ce9a2
commit b602911b31
12 changed files with 278 additions and 47 deletions

View File

@ -77,6 +77,16 @@ operators, their aliases and meaning:
all_eq, === All fields must be equal
any_ne, !== Any fields must be not equal
The operators "any" or "all" can be used with any comparison operator to make
the test match any or all fields:
all tcp.port > 1024
any ip.addr != 1.1.1.1
The "any" and "all" modifiers take precedence over comparison operators such
as "===" and "any_eq".
=== Search and match operators
Additional operators exist expressed only in English, not C-like syntax:

View File

@ -81,6 +81,7 @@ The following features are new (or have been significantly updated) since versio
** New display filter functions max(), min() and abs() have been added.
** Functions can accept expressions as arguments, including other functions.
Previously only protocol fields and slices were syntactically valid function arguments.
** Adds the universal quantifiers "any" and "all" to any relational operator: all tcp.port > 1024.
* The `text2pcap` command and the “Import from Hex Dump” feature have been updated and enhanced:
** `text2pcap` supports writing the output file in all the capture file formats that wiretap library supports, using the same `-F` option as `editcap`, `mergecap`, and `tshark`.

View File

@ -35,13 +35,19 @@ dfvm_opcode_tostr(dfvm_opcode_t code)
case ANY_EQ: return "ANY_EQ";
case ALL_NE: return "ALL_NE";
case ANY_NE: return "ANY_NE";
case ALL_GT: return "ALL_GT";
case ANY_GT: return "ANY_GT";
case ALL_GE: return "ALL_GE";
case ANY_GE: return "ANY_GE";
case ALL_LT: return "ALL_LT";
case ANY_LT: return "ANY_LT";
case ALL_LE: return "ALL_LE";
case ANY_LE: return "ANY_LE";
case ANY_ZERO: return "ANY_ZERO";
case ALL_ZERO: return "ALL_ZERO";
case ANY_ZERO: return "ANY_ZERO";
case ALL_CONTAINS: return "ALL_CONTAINS";
case ANY_CONTAINS: return "ANY_CONTAINS";
case ALL_MATCHES: return "ALL_MATCHES";
case ANY_MATCHES: return "ANY_MATCHES";
case MK_SLICE: return "MK_SLICE";
case MK_BITWISE_AND: return "MK_BITWISE_AND";
@ -54,6 +60,7 @@ dfvm_opcode_tostr(dfvm_opcode_t code)
case CALL_FUNCTION: return "CALL_FUNCTION";
case STACK_PUSH: return "STACK_PUSH";
case STACK_POP: return "STACK_POP";
case ALL_IN_RANGE: return "ALL_IN_RANGE";
case ANY_IN_RANGE: return "ANY_IN_RANGE";
}
return "(fix-opcode-string)";
@ -370,24 +377,28 @@ dfvm_dump_str(wmem_allocator_t *alloc, dfilter_t *df, gboolean print_references)
id, arg1_str, arg2_str);
break;
case ALL_GT:
case ANY_GT:
wmem_strbuf_append_printf(buf, "%05d ANY_GT\t\t%s > %s\n",
id, arg1_str, arg2_str);
wmem_strbuf_append_printf(buf, "%05d %s\t\t%s > %s\n",
id, dfvm_opcode_tostr(insn->op), arg1_str, arg2_str);
break;
case ALL_GE:
case ANY_GE:
wmem_strbuf_append_printf(buf, "%05d ANY_GE\t\t%s >= %s\n",
id, arg1_str, arg2_str);
wmem_strbuf_append_printf(buf, "%05d %s\t\t%s >= %s\n",
id, dfvm_opcode_tostr(insn->op), arg1_str, arg2_str);
break;
case ALL_LT:
case ANY_LT:
wmem_strbuf_append_printf(buf, "%05d ANY_LT\t\t%s < %s\n",
id, arg1_str, arg2_str);
wmem_strbuf_append_printf(buf, "%05d %s\t\t%s < %s\n",
id, dfvm_opcode_tostr(insn->op), arg1_str, arg2_str);
break;
case ALL_LE:
case ANY_LE:
wmem_strbuf_append_printf(buf, "%05d ANY_LE\t\t%s <= %s\n",
id, arg1_str, arg2_str);
wmem_strbuf_append_printf(buf, "%05d %s\t\t%s <= %s\n",
id, dfvm_opcode_tostr(insn->op), arg1_str, arg2_str);
break;
case MK_BITWISE_AND:
@ -395,14 +406,10 @@ dfvm_dump_str(wmem_allocator_t *alloc, dfilter_t *df, gboolean print_references)
id, arg1_str, arg2_str, arg3_str);
break;
case ANY_ZERO:
wmem_strbuf_append_printf(buf, "%05d ANY_ZERO\t\t%s\n",
id, arg1_str);
break;
case ALL_ZERO:
wmem_strbuf_append_printf(buf, "%05d ALL_ZERO\t\t%s\n",
id, arg1_str);
case ANY_ZERO:
wmem_strbuf_append_printf(buf, "%05d %s\t\t%s\n",
id, dfvm_opcode_tostr(insn->op), arg1_str);
break;
case DFVM_ADD:
@ -430,19 +437,23 @@ dfvm_dump_str(wmem_allocator_t *alloc, dfilter_t *df, gboolean print_references)
id, arg1_str, arg2_str, arg3_str);
break;
case ALL_CONTAINS:
case ANY_CONTAINS:
wmem_strbuf_append_printf(buf, "%05d ANY_CONTAINS\t%s contains %s\n",
id, arg1_str, arg2_str);
wmem_strbuf_append_printf(buf, "%05d %s\t%s contains %s\n",
id, dfvm_opcode_tostr(insn->op), arg1_str, arg2_str);
break;
case ALL_MATCHES:
case ANY_MATCHES:
wmem_strbuf_append_printf(buf, "%05d ANY_MATCHES\t%s matches %s\n",
id, arg1_str, arg2_str);
wmem_strbuf_append_printf(buf, "%05d %s\t%s matches %s\n",
id, dfvm_opcode_tostr(insn->op), arg1_str, arg2_str);
break;
case ALL_IN_RANGE:
case ANY_IN_RANGE:
wmem_strbuf_append_printf(buf, "%05d ANY_IN_RANGE\t%s in { %s .. %s }\n",
id, arg1_str, arg2_str, arg3_str);
wmem_strbuf_append_printf(buf, "%05d %s\t%s in { %s .. %s }\n",
id, dfvm_opcode_tostr(insn->op),
arg1_str, arg2_str, arg3_str);
break;
case MK_MINUS:
@ -812,6 +823,21 @@ any_matches(dfilter_t *df, dfvm_value_t *arg1, dfvm_value_t *arg2)
return FALSE;
}
static gboolean
all_matches(dfilter_t *df, dfvm_value_t *arg1, dfvm_value_t *arg2)
{
GSList *list1 = df->registers[arg1->value.numeric];
ws_regex_t *re = arg2->value.pcre;
while (list1) {
if (!fvalue_matches(list1->data, re)) {
return FALSE;
}
list1 = g_slist_next(list1);
}
return TRUE;
}
static gboolean
any_in_range_internal(GSList *list1, fvalue_t *low, fvalue_t *high)
{
@ -826,7 +852,20 @@ any_in_range_internal(GSList *list1, fvalue_t *low, fvalue_t *high)
}
static gboolean
any_in_range(dfilter_t *df, dfvm_value_t *arg1,
all_in_range_internal(GSList *list1, fvalue_t *low, fvalue_t *high)
{
while (list1) {
if (!fvalue_ge(list1->data, low) ||
!fvalue_le(list1->data, high)) {
return FALSE;
}
list1 = g_slist_next(list1);
}
return TRUE;
}
static gboolean
match_in_range(dfilter_t *df, enum match_how how, dfvm_value_t *arg1,
dfvm_value_t *arg_low, dfvm_value_t *arg_high)
{
GSList *list1 = df->registers[arg1->value.numeric];
@ -855,7 +894,27 @@ any_in_range(dfilter_t *df, dfvm_value_t *arg1,
else {
ws_assert_not_reached();
}
return any_in_range_internal(list1, low, high);
if (how == MATCH_ALL)
return all_in_range_internal(list1, low, high);
else if (how == MATCH_ANY)
return any_in_range_internal(list1, low, high);
else
ws_assert_not_reached();
}
static gboolean
any_in_range(dfilter_t *df, dfvm_value_t *arg1,
dfvm_value_t *arg_low, dfvm_value_t *arg_high)
{
return match_in_range(df, MATCH_ANY, arg1, arg_low, arg_high);
}
static gboolean
all_in_range(dfilter_t *df, dfvm_value_t *arg1,
dfvm_value_t *arg_low, dfvm_value_t *arg_high)
{
return match_in_range(df, MATCH_ALL, arg1, arg_low, arg_high);
}
/* Clear registers that were populated during evaluation.
@ -1246,18 +1305,34 @@ dfvm_apply(dfilter_t *df, proto_tree *tree)
accum = any_test(df, fvalue_ne, arg1, arg2);
break;
case ALL_GT:
accum = all_test(df, fvalue_gt, arg1, arg2);
break;
case ANY_GT:
accum = any_test(df, fvalue_gt, arg1, arg2);
break;
case ALL_GE:
accum = all_test(df, fvalue_ge, arg1, arg2);
break;
case ANY_GE:
accum = any_test(df, fvalue_ge, arg1, arg2);
break;
case ALL_LT:
accum = all_test(df, fvalue_lt, arg1, arg2);
break;
case ANY_LT:
accum = any_test(df, fvalue_lt, arg1, arg2);
break;
case ALL_LE:
accum = all_test(df, fvalue_le, arg1, arg2);
break;
case ANY_LE:
accum = any_test(df, fvalue_le, arg1, arg2);
break;
@ -1286,22 +1361,34 @@ dfvm_apply(dfilter_t *df, proto_tree *tree)
mk_binary(df, fvalue_modulo, arg1, arg2, arg3);
break;
case ALL_ZERO:
accum = all_test_unary(df, fvalue_is_zero, arg1);
break;
case ANY_ZERO:
accum = any_test_unary(df, fvalue_is_zero, arg1);
break;
case ALL_ZERO:
accum = all_test_unary(df, fvalue_is_zero, arg1);
case ALL_CONTAINS:
accum = all_test(df, fvalue_contains, arg1, arg2);
break;
case ANY_CONTAINS:
accum = any_test(df, fvalue_contains, arg1, arg2);
break;
case ALL_MATCHES:
accum = all_matches(df, arg1, arg2);
break;
case ANY_MATCHES:
accum = any_matches(df, arg1, arg2);
break;
case ALL_IN_RANGE:
accum = all_in_range(df, arg1, arg2, arg3);
break;
case ANY_IN_RANGE:
accum = any_in_range(df, arg1, arg2, arg3);
break;

View File

@ -61,13 +61,19 @@ typedef enum {
ANY_EQ,
ALL_NE,
ANY_NE,
ALL_GT,
ANY_GT,
ALL_GE,
ANY_GE,
ALL_LT,
ANY_LT,
ALL_LE,
ANY_LE,
ANY_ZERO,
ALL_ZERO,
ANY_ZERO,
ALL_CONTAINS,
ANY_CONTAINS,
ALL_MATCHES,
ANY_MATCHES,
MK_SLICE,
MK_BITWISE_AND,
@ -80,8 +86,8 @@ typedef enum {
CALL_FUNCTION,
STACK_PUSH,
STACK_POP,
ANY_IN_RANGE
ALL_IN_RANGE,
ANY_IN_RANGE,
} dfvm_opcode_t;
const char *

View File

@ -28,6 +28,61 @@ gencode(dfwork_t *dfw, stnode_t *st_node);
static dfvm_value_t *
gen_entity(dfwork_t *dfw, stnode_t *st_arg, GSList **jumps_ptr);
static dfvm_opcode_t
select_opcode(dfvm_opcode_t op, test_match_t how)
{
if (how == ST_MATCH_DEF)
return op;
switch (op) {
case ALL_EQ:
case ALL_NE:
case ALL_GT:
case ALL_GE:
case ALL_LT:
case ALL_LE:
case ALL_ZERO:
case ALL_CONTAINS:
case ALL_MATCHES:
case ALL_IN_RANGE:
return how == ST_MATCH_ALL ? op : op + 1;
case ANY_EQ:
case ANY_NE:
case ANY_GT:
case ANY_GE:
case ANY_LT:
case ANY_LE:
case ANY_ZERO:
case ANY_CONTAINS:
case ANY_MATCHES:
case ANY_IN_RANGE:
return how == ST_MATCH_ANY ? op : op - 1;
case IF_TRUE_GOTO:
case IF_FALSE_GOTO:
case CHECK_EXISTS:
case CHECK_EXISTS_R:
case NOT:
case RETURN:
case READ_TREE:
case READ_TREE_R:
case READ_REFERENCE:
case PUT_FVALUE:
case MK_SLICE:
case MK_BITWISE_AND:
case MK_MINUS:
case DFVM_ADD:
case DFVM_SUBTRACT:
case DFVM_MULTIPLY:
case DFVM_DIVIDE:
case DFVM_MODULO:
case CALL_FUNCTION:
case STACK_PUSH:
case STACK_POP:
break;
}
ws_assert_not_reached();
}
static void
dfw_append_insn(dfwork_t *dfw, dfvm_insn_t *insn)
{
@ -298,7 +353,8 @@ gen_relation_insn(dfwork_t *dfw, dfvm_opcode_t op,
}
static void
gen_relation(dfwork_t *dfw, dfvm_opcode_t op, stnode_t *st_arg1, stnode_t *st_arg2)
gen_relation(dfwork_t *dfw, dfvm_opcode_t op, test_match_t how,
stnode_t *st_arg1, stnode_t *st_arg2)
{
GSList *jumps = NULL;
dfvm_value_t *val1, *val2;
@ -308,6 +364,7 @@ gen_relation(dfwork_t *dfw, dfvm_opcode_t op, stnode_t *st_arg1, stnode_t *st_ar
val2 = gen_entity(dfw, st_arg2, &jumps);
/* Then combine them in a DFVM insruction */
op = select_opcode(op, how);
gen_relation_insn(dfw, op, val1, val2, NULL, NULL);
/* If either of the relation arguments need an "exit" instruction
@ -331,7 +388,8 @@ fixup_jumps(gpointer data, gpointer user_data)
/* Generate the code for the in operator. It behaves much like an OR-ed
* series of == tests, but without the redundant existence checks. */
static void
gen_relation_in(dfwork_t *dfw, stnode_t *st_arg1, stnode_t *st_arg2)
gen_relation_in(dfwork_t *dfw, test_match_t how,
stnode_t *st_arg1, stnode_t *st_arg2)
{
dfvm_insn_t *insn;
dfvm_value_t *jmp;
@ -339,6 +397,7 @@ gen_relation_in(dfwork_t *dfw, stnode_t *st_arg1, stnode_t *st_arg2)
GSList *node_jumps = NULL;
dfvm_value_t *val1, *val2, *val3;
stnode_t *node1, *node2;
dfvm_opcode_t op;
GSList *nodelist_head, *nodelist;
/* Create code for the LHS of the relation */
@ -358,13 +417,15 @@ gen_relation_in(dfwork_t *dfw, stnode_t *st_arg1, stnode_t *st_arg2)
val3 = gen_entity(dfw, node2, &node_jumps);
/* Add test to see if the item is in range. */
gen_relation_insn(dfw, ANY_IN_RANGE, val1, val2, val3, NULL);
op = select_opcode(ANY_IN_RANGE, how);
gen_relation_insn(dfw, op, val1, val2, val3, NULL);
} else {
/* Normal element: add equality test. */
val2 = gen_entity(dfw, node1, &node_jumps);
/* Add test to see if the item matches */
gen_relation_insn(dfw, ANY_EQ, val1, val2, NULL, NULL);
op = select_opcode(ANY_EQ, how);
gen_relation_insn(dfw, op, val1, val2, NULL, NULL);
}
/* Exit as soon as we find a match */
@ -560,12 +621,14 @@ static void
gen_test(dfwork_t *dfw, stnode_t *st_node)
{
test_op_t st_op;
test_match_t st_how;
stnode_t *st_arg1, *st_arg2;
dfvm_insn_t *insn;
dfvm_value_t *jmp;
sttype_test_get(st_node, &st_op, &st_arg1, &st_arg2);
st_how = sttype_test_get_match(st_node);
switch (st_op) {
case TEST_OP_UNINITIALIZED:
@ -603,47 +666,47 @@ gen_test(dfwork_t *dfw, stnode_t *st_node)
break;
case TEST_OP_ALL_EQ:
gen_relation(dfw, ALL_EQ, st_arg1, st_arg2);
gen_relation(dfw, ALL_EQ, st_how, st_arg1, st_arg2);
break;
case TEST_OP_ANY_EQ:
gen_relation(dfw, ANY_EQ, st_arg1, st_arg2);
gen_relation(dfw, ANY_EQ, st_how, st_arg1, st_arg2);
break;
case TEST_OP_ALL_NE:
gen_relation(dfw, ALL_NE, st_arg1, st_arg2);
gen_relation(dfw, ALL_NE, st_how, st_arg1, st_arg2);
break;
case TEST_OP_ANY_NE:
gen_relation(dfw, ANY_NE, st_arg1, st_arg2);
gen_relation(dfw, ANY_NE, st_how, st_arg1, st_arg2);
break;
case TEST_OP_GT:
gen_relation(dfw, ANY_GT, st_arg1, st_arg2);
gen_relation(dfw, ANY_GT, st_how, st_arg1, st_arg2);
break;
case TEST_OP_GE:
gen_relation(dfw, ANY_GE, st_arg1, st_arg2);
gen_relation(dfw, ANY_GE, st_how, st_arg1, st_arg2);
break;
case TEST_OP_LT:
gen_relation(dfw, ANY_LT, st_arg1, st_arg2);
gen_relation(dfw, ANY_LT, st_how, st_arg1, st_arg2);
break;
case TEST_OP_LE:
gen_relation(dfw, ANY_LE, st_arg1, st_arg2);
gen_relation(dfw, ANY_LE, st_how, st_arg1, st_arg2);
break;
case TEST_OP_CONTAINS:
gen_relation(dfw, ANY_CONTAINS, st_arg1, st_arg2);
gen_relation(dfw, ANY_CONTAINS, st_how, st_arg1, st_arg2);
break;
case TEST_OP_MATCHES:
gen_relation(dfw, ANY_MATCHES, st_arg1, st_arg2);
gen_relation(dfw, ANY_MATCHES, st_how, st_arg1, st_arg2);
break;
case TEST_OP_IN:
gen_relation_in(dfw, st_arg1, st_arg2);
gen_relation_in(dfw, st_how, st_arg1, st_arg2);
break;
case OP_BITWISE_AND:

View File

@ -88,7 +88,7 @@ shifting 3 more symbols. */
sentence ::= expr(X). { dfw->st_root = X; }
sentence ::= . { dfw->st_root = NULL; }
expr(X) ::= relation_test(R). { X = R; }
expr(X) ::= relation(R). { X = R; }
expr(X) ::= arithmetic_expr(E). { X = E; }
/* Logical tests */
@ -265,6 +265,20 @@ relation_test(T) ::= entity(E) TEST_NOT(P) TEST_IN(O) set(S).
sttype_test_set1(T, TEST_OP_NOT, O);
}
relation(R) ::= relation_test(T). { R = T; }
relation(R) ::= ANY relation_test(T).
{
R = T;
sttype_test_set_match(R, ST_MATCH_ANY);
}
relation(R) ::= ALL relation_test(T).
{
R = T;
sttype_test_set_match(R, ST_MATCH_ALL);
}
set(S) ::= LBRACE set_list(L) RBRACE.
{
S = stnode_new(STTYPE_SET, L, NULL, NULL);

View File

@ -153,6 +153,8 @@ hyphen-bytes {hex2}(-{hex2})+
"{" return simple(TOKEN_LBRACE);
".." return simple(TOKEN_DOTDOT);
"}" return simple(TOKEN_RBRACE);
"any" return simple(TOKEN_ANY);
"all" return simple(TOKEN_ALL);
"==" return test(TOKEN_TEST_ANY_EQ);
"eq" return test(TOKEN_TEST_ANY_EQ);

View File

@ -13,6 +13,7 @@
typedef struct {
guint32 magic;
test_op_t op;
test_match_t how;
stnode_t *val1;
stnode_t *val2;
} test_t;
@ -30,6 +31,7 @@ test_new(gpointer junk)
test->magic = TEST_MAGIC;
test->op = TEST_OP_UNINITIALIZED;
test->how = ST_MATCH_DEF;
test->val1 = NULL;
test->val2 = NULL;
@ -43,7 +45,8 @@ test_dup(gconstpointer data)
test_t *test;
test = test_new(NULL);
test->op = org->op;
test->op = org->op;
test->how = org->how;
test->val1 = stnode_dup(org->val1);
test->val2 = stnode_dup(org->val1);
@ -212,6 +215,10 @@ test_todebug(const test_t *test)
break;
}
if (test->how == ST_MATCH_ALL)
return g_strdup_printf("ALL %s", s);
if (test->how == ST_MATCH_ANY)
return g_strdup_printf("ANY %s", s);
return g_strdup(s);
}
@ -341,6 +348,22 @@ sttype_test_get(stnode_t *node, test_op_t *p_op, stnode_t **p_val1, stnode_t **p
*p_val2 = test->val2;
}
void
sttype_test_set_match(stnode_t *node, test_match_t how)
{
test_t *test = stnode_data(node);
ws_assert_magic(test, TEST_MAGIC);
test->how = how;
}
test_match_t
sttype_test_get_match(stnode_t *node)
{
test_t *test = stnode_data(node);
ws_assert_magic(test, TEST_MAGIC);
return test->how;
}
void
sttype_register_test(void)
{

View File

@ -32,4 +32,10 @@ sttype_test_get_op(stnode_t *node);
void
sttype_test_get(stnode_t *node, test_op_t *p_op, stnode_t **p_val1, stnode_t **p_val2);
void
sttype_test_set_match(stnode_t *node, test_match_t how);
test_match_t
sttype_test_get_match(stnode_t *node);
#endif

View File

@ -65,6 +65,12 @@ typedef enum {
TEST_OP_IN
} test_op_t;
typedef enum {
ST_MATCH_DEF,
ST_MATCH_ANY,
ST_MATCH_ALL,
} test_match_t;
typedef gpointer (*STTypeNewFunc)(gpointer);
typedef gpointer (*STTypeDupFunc)(gconstpointer);
typedef void (*STTypeFreeFunc)(gpointer);

View File

@ -478,6 +478,8 @@ static const char *reserved_filter_names[] = {
"and",
"or",
"in",
"any",
"all",
NULL
};

View File

@ -311,4 +311,15 @@ class case_field_reference(unittest.TestCase):
def test_layer_7(self, checkDFilterCount):
dfilter = 'ip.dst#[-5] == 2.2.2.2'
checkDFilterCount(dfilter, 1)
@fixtures.uses_fixtures
class case_quantifiers(unittest.TestCase):
trace_file = "ipoipoip.pcap"
def test_any_1(self, checkDFilterCount):
dfilter = 'any ip.addr > 1.1.1.1'
checkDFilterCount(dfilter, 2)
def test_all_1(self, checkDFilterCount):
dfilter = 'all ip.addr > 1.1.1.1'
checkDFilterCount(dfilter, 1)