wireshark/epan/dfilter/dfvm.c

1500 lines
34 KiB
C
Raw Normal View History

/*
* Wireshark - Network traffic analyzer
* By Gerald Combs <gerald@wireshark.org>
* Copyright 2001 Gerald Combs
*
* SPDX-License-Identifier: GPL-2.0-or-later
*/
#include "config.h"
#define WS_LOG_DOMAIN LOG_DOMAIN_DFILTER
#include "dfvm.h"
#include <ftypes/ftypes.h>
2021-06-18 18:21:42 +00:00
#include <wsutil/ws_assert.h>
static void
debug_register(GSList *reg, guint32 num);
dfilter: Allow arithmetic expressions as function arguments This allows writing moderately complex expressions, for example a float epsilon test (#16483): Filter: {abs(_ws.ftypes.double - 1) / max(abs(_ws.ftypes.double), abs(1))} < 0.01 Syntax tree: 0 TEST_LT: 1 OP_DIVIDE: 2 FUNCTION(abs#1): 3 OP_SUBTRACT: 4 FIELD(_ws.ftypes.double) 4 FVALUE(1 <FT_DOUBLE>) 2 FUNCTION(max#2): 3 FUNCTION(abs#1): 4 FIELD(_ws.ftypes.double) 3 FUNCTION(abs#1): 4 FVALUE(1 <FT_DOUBLE>) 1 FVALUE(0.01 <FT_DOUBLE>) Instructions: 00000 READ_TREE _ws.ftypes.double -> reg#1 00001 IF_FALSE_GOTO 3 00002 SUBRACT reg#1 - 1 <FT_DOUBLE> -> reg#2 00003 STACK_PUSH reg#2 00004 CALL_FUNCTION abs(reg#2) -> reg#0 00005 STACK_POP 1 00006 IF_FALSE_GOTO 24 00007 READ_TREE _ws.ftypes.double -> reg#1 00008 IF_FALSE_GOTO 9 00009 STACK_PUSH reg#1 00010 CALL_FUNCTION abs(reg#1) -> reg#4 00011 STACK_POP 1 00012 IF_FALSE_GOTO 13 00013 STACK_PUSH reg#4 00014 STACK_PUSH 1 <FT_DOUBLE> 00015 CALL_FUNCTION abs(1 <FT_DOUBLE>) -> reg#5 00016 STACK_POP 1 00017 IF_FALSE_GOTO 18 00018 STACK_PUSH reg#5 00019 CALL_FUNCTION max(reg#5, reg#4) -> reg#3 00020 STACK_POP 2 00021 IF_FALSE_GOTO 24 00022 DIVIDE reg#0 / reg#3 -> reg#6 00023 ANY_LT reg#6 < 0.01 <FT_DOUBLE> 00024 RETURN We now use a stack to pass arguments to the function. The stack is implemented as a list of lists (list of registers). Arguments may still be non-existent to functions (this is a feature). Functions must check for nil arguments (NULL lists) and handle that case. It's somewhat complicated to allow literal values and test compatibility for different types, both because of lack of type information with unparsed/literal and also because it is an underdeveloped area in the code. In my limited testing it was good enough and useful, further enhancements are left for future work.
2022-04-16 01:42:20 +00:00
const char *
dfvm_opcode_tostr(dfvm_opcode_t code)
{
switch (code) {
case DFVM_IF_TRUE_GOTO: return "IF_TRUE_GOTO";
case DFVM_IF_FALSE_GOTO: return "IF_FALSE_GOTO";
case DFVM_CHECK_EXISTS: return "CHECK_EXISTS";
case DFVM_CHECK_EXISTS_R: return "CHECK_EXISTS_R";
case DFVM_NOT: return "NOT";
case DFVM_RETURN: return "RETURN";
case DFVM_READ_TREE: return "READ_TREE";
case DFVM_READ_TREE_R: return "READ_TREE_R";
case DFVM_READ_REFERENCE: return "READ_REFERENCE";
case DFVM_READ_REFERENCE_R: return "READ_REFERENCE_R";
case DFVM_PUT_FVALUE: return "PUT_FVALUE";
case DFVM_ALL_EQ: return "ALL_EQ";
case DFVM_ANY_EQ: return "ANY_EQ";
case DFVM_ALL_NE: return "ALL_NE";
case DFVM_ANY_NE: return "ANY_NE";
case DFVM_ALL_GT: return "ALL_GT";
case DFVM_ANY_GT: return "ANY_GT";
case DFVM_ALL_GE: return "ALL_GE";
case DFVM_ANY_GE: return "ANY_GE";
case DFVM_ALL_LT: return "ALL_LT";
case DFVM_ANY_LT: return "ANY_LT";
case DFVM_ALL_LE: return "ALL_LE";
case DFVM_ANY_LE: return "ANY_LE";
case DFVM_ALL_ZERO: return "ALL_ZERO";
case DFVM_ANY_ZERO: return "ANY_ZERO";
case DFVM_ALL_CONTAINS: return "ALL_CONTAINS";
case DFVM_ANY_CONTAINS: return "ANY_CONTAINS";
case DFVM_ALL_MATCHES: return "ALL_MATCHES";
case DFVM_ANY_MATCHES: return "ANY_MATCHES";
case DFVM_ALL_IN_RANGE: return "ALL_IN_RANGE";
case DFVM_ANY_IN_RANGE: return "ANY_IN_RANGE";
case DFVM_SLICE: return "SLICE";
case DFVM_LENGTH: return "LENGTH";
case DFVM_BITWISE_AND: return "BITWISE_AND";
case DFVM_UNARY_MINUS: return "UNARY_MINUS";
case DFVM_ADD: return "ADD";
case DFVM_SUBTRACT: return "SUBTRACT";
case DFVM_MULTIPLY: return "MULTIPLY";
case DFVM_DIVIDE: return "DIVIDE";
case DFVM_MODULO: return "MODULO";
case DFVM_CALL_FUNCTION: return "CALL_FUNCTION";
case DFVM_STACK_PUSH: return "STACK_PUSH";
case DFVM_STACK_POP: return "STACK_POP";
dfilter: Allow arithmetic expressions as function arguments This allows writing moderately complex expressions, for example a float epsilon test (#16483): Filter: {abs(_ws.ftypes.double - 1) / max(abs(_ws.ftypes.double), abs(1))} < 0.01 Syntax tree: 0 TEST_LT: 1 OP_DIVIDE: 2 FUNCTION(abs#1): 3 OP_SUBTRACT: 4 FIELD(_ws.ftypes.double) 4 FVALUE(1 <FT_DOUBLE>) 2 FUNCTION(max#2): 3 FUNCTION(abs#1): 4 FIELD(_ws.ftypes.double) 3 FUNCTION(abs#1): 4 FVALUE(1 <FT_DOUBLE>) 1 FVALUE(0.01 <FT_DOUBLE>) Instructions: 00000 READ_TREE _ws.ftypes.double -> reg#1 00001 IF_FALSE_GOTO 3 00002 SUBRACT reg#1 - 1 <FT_DOUBLE> -> reg#2 00003 STACK_PUSH reg#2 00004 CALL_FUNCTION abs(reg#2) -> reg#0 00005 STACK_POP 1 00006 IF_FALSE_GOTO 24 00007 READ_TREE _ws.ftypes.double -> reg#1 00008 IF_FALSE_GOTO 9 00009 STACK_PUSH reg#1 00010 CALL_FUNCTION abs(reg#1) -> reg#4 00011 STACK_POP 1 00012 IF_FALSE_GOTO 13 00013 STACK_PUSH reg#4 00014 STACK_PUSH 1 <FT_DOUBLE> 00015 CALL_FUNCTION abs(1 <FT_DOUBLE>) -> reg#5 00016 STACK_POP 1 00017 IF_FALSE_GOTO 18 00018 STACK_PUSH reg#5 00019 CALL_FUNCTION max(reg#5, reg#4) -> reg#3 00020 STACK_POP 2 00021 IF_FALSE_GOTO 24 00022 DIVIDE reg#0 / reg#3 -> reg#6 00023 ANY_LT reg#6 < 0.01 <FT_DOUBLE> 00024 RETURN We now use a stack to pass arguments to the function. The stack is implemented as a list of lists (list of registers). Arguments may still be non-existent to functions (this is a feature). Functions must check for nil arguments (NULL lists) and handle that case. It's somewhat complicated to allow literal values and test compatibility for different types, both because of lack of type information with unparsed/literal and also because it is an underdeveloped area in the code. In my limited testing it was good enough and useful, further enhancements are left for future work.
2022-04-16 01:42:20 +00:00
}
return "(fix-opcode-string)";
}
dfvm_insn_t*
dfvm_insn_new(dfvm_opcode_t op)
{
dfvm_insn_t *insn;
insn = g_new(dfvm_insn_t, 1);
insn->op = op;
insn->arg1 = NULL;
insn->arg2 = NULL;
insn->arg3 = NULL;
return insn;
}
static void
dfvm_value_free(dfvm_value_t *v)
{
switch (v->type) {
case FVALUE:
fvalue_free(v->value.fvalue);
break;
case DRANGE:
drange_free(v->value.drange);
break;
case PCRE:
2021-11-12 15:55:14 +00:00
ws_regex_free(v->value.pcre);
break;
default:
/* nothing */
;
}
g_free(v);
}
dfvm_value_t*
dfvm_value_ref(dfvm_value_t *v)
{
if (v == NULL)
return NULL;
v->ref_count++;
return v;
}
void
dfvm_value_unref(dfvm_value_t *v)
{
ws_assert(v);
v->ref_count--;
if (v->ref_count > 0)
return;
dfvm_value_free(v);
}
void
dfvm_insn_free(dfvm_insn_t *insn)
{
if (insn->arg1) {
dfvm_value_unref(insn->arg1);
}
if (insn->arg2) {
dfvm_value_unref(insn->arg2);
}
if (insn->arg3) {
dfvm_value_unref(insn->arg3);
}
g_free(insn);
}
dfvm_value_t*
dfvm_value_new(dfvm_value_type_t type)
{
dfvm_value_t *v;
v = g_new(dfvm_value_t, 1);
v->type = type;
v->ref_count = 0;
return v;
}
2022-03-21 15:38:22 +00:00
dfvm_value_t*
dfvm_value_new_fvalue(fvalue_t *fv)
{
dfvm_value_t *v = dfvm_value_new(FVALUE);
v->value.fvalue = fv;
return v;
}
dfvm_value_t*
dfvm_value_new_hfinfo(header_field_info *hfinfo)
{
dfvm_value_t *v = dfvm_value_new(HFINFO);
v->value.hfinfo = hfinfo;
return v;
}
dfvm_value_t*
dfvm_value_new_register(int reg)
{
dfvm_value_t *v = dfvm_value_new(REGISTER);
v->value.numeric = reg;
return v;
}
dfvm_value_t*
dfvm_value_new_drange(drange_t *dr)
{
dfvm_value_t *v = dfvm_value_new(DRANGE);
v->value.drange = dr;
return v;
}
dfvm_value_t*
dfvm_value_new_funcdef(df_func_def_t *funcdef)
{
dfvm_value_t *v = dfvm_value_new(FUNCTION_DEF);
v->value.funcdef = funcdef;
return v;
}
dfvm_value_t*
dfvm_value_new_pcre(ws_regex_t *re)
{
dfvm_value_t *v = dfvm_value_new(PCRE);
v->value.pcre = re;
return v;
}
dfilter: Allow arithmetic expressions as function arguments This allows writing moderately complex expressions, for example a float epsilon test (#16483): Filter: {abs(_ws.ftypes.double - 1) / max(abs(_ws.ftypes.double), abs(1))} < 0.01 Syntax tree: 0 TEST_LT: 1 OP_DIVIDE: 2 FUNCTION(abs#1): 3 OP_SUBTRACT: 4 FIELD(_ws.ftypes.double) 4 FVALUE(1 <FT_DOUBLE>) 2 FUNCTION(max#2): 3 FUNCTION(abs#1): 4 FIELD(_ws.ftypes.double) 3 FUNCTION(abs#1): 4 FVALUE(1 <FT_DOUBLE>) 1 FVALUE(0.01 <FT_DOUBLE>) Instructions: 00000 READ_TREE _ws.ftypes.double -> reg#1 00001 IF_FALSE_GOTO 3 00002 SUBRACT reg#1 - 1 <FT_DOUBLE> -> reg#2 00003 STACK_PUSH reg#2 00004 CALL_FUNCTION abs(reg#2) -> reg#0 00005 STACK_POP 1 00006 IF_FALSE_GOTO 24 00007 READ_TREE _ws.ftypes.double -> reg#1 00008 IF_FALSE_GOTO 9 00009 STACK_PUSH reg#1 00010 CALL_FUNCTION abs(reg#1) -> reg#4 00011 STACK_POP 1 00012 IF_FALSE_GOTO 13 00013 STACK_PUSH reg#4 00014 STACK_PUSH 1 <FT_DOUBLE> 00015 CALL_FUNCTION abs(1 <FT_DOUBLE>) -> reg#5 00016 STACK_POP 1 00017 IF_FALSE_GOTO 18 00018 STACK_PUSH reg#5 00019 CALL_FUNCTION max(reg#5, reg#4) -> reg#3 00020 STACK_POP 2 00021 IF_FALSE_GOTO 24 00022 DIVIDE reg#0 / reg#3 -> reg#6 00023 ANY_LT reg#6 < 0.01 <FT_DOUBLE> 00024 RETURN We now use a stack to pass arguments to the function. The stack is implemented as a list of lists (list of registers). Arguments may still be non-existent to functions (this is a feature). Functions must check for nil arguments (NULL lists) and handle that case. It's somewhat complicated to allow literal values and test compatibility for different types, both because of lack of type information with unparsed/literal and also because it is an underdeveloped area in the code. In my limited testing it was good enough and useful, further enhancements are left for future work.
2022-04-16 01:42:20 +00:00
dfvm_value_t*
dfvm_value_new_guint(guint num)
{
dfvm_value_t *v = dfvm_value_new(INTEGER);
v->value.numeric = num;
return v;
}
2022-03-21 11:13:02 +00:00
char *
dfvm_value_tostr(dfvm_value_t *v)
{
char *s, *aux;
if (!v)
return NULL;
switch (v->type) {
case HFINFO:
2022-06-24 12:57:04 +00:00
s = ws_strdup_printf("%s <%s>",
v->value.hfinfo->abbrev,
ftype_name(v->value.hfinfo->type));
2022-03-21 11:13:02 +00:00
break;
case FVALUE:
aux = fvalue_to_debug_repr(NULL, v->value.fvalue);
s = ws_strdup_printf("%s <%s>",
aux, fvalue_type_name(v->value.fvalue));
g_free(aux);
break;
case DRANGE:
s = drange_tostr(v->value.drange);
break;
case PCRE:
s = ws_strdup(ws_regex_pattern(v->value.pcre));
break;
case REGISTER:
s = ws_strdup_printf("reg#%"G_GUINT32_FORMAT, v->value.numeric);
2022-03-21 11:13:02 +00:00
break;
case FUNCTION_DEF:
s = ws_strdup(v->value.funcdef->name);
break;
case INTEGER:
s = ws_strdup_printf("%"G_GUINT32_FORMAT, v->value.numeric);
break;
2022-03-21 11:13:02 +00:00
default:
s = ws_strdup("FIXME");
}
return s;
}
dfilter: Allow arithmetic expressions as function arguments This allows writing moderately complex expressions, for example a float epsilon test (#16483): Filter: {abs(_ws.ftypes.double - 1) / max(abs(_ws.ftypes.double), abs(1))} < 0.01 Syntax tree: 0 TEST_LT: 1 OP_DIVIDE: 2 FUNCTION(abs#1): 3 OP_SUBTRACT: 4 FIELD(_ws.ftypes.double) 4 FVALUE(1 <FT_DOUBLE>) 2 FUNCTION(max#2): 3 FUNCTION(abs#1): 4 FIELD(_ws.ftypes.double) 3 FUNCTION(abs#1): 4 FVALUE(1 <FT_DOUBLE>) 1 FVALUE(0.01 <FT_DOUBLE>) Instructions: 00000 READ_TREE _ws.ftypes.double -> reg#1 00001 IF_FALSE_GOTO 3 00002 SUBRACT reg#1 - 1 <FT_DOUBLE> -> reg#2 00003 STACK_PUSH reg#2 00004 CALL_FUNCTION abs(reg#2) -> reg#0 00005 STACK_POP 1 00006 IF_FALSE_GOTO 24 00007 READ_TREE _ws.ftypes.double -> reg#1 00008 IF_FALSE_GOTO 9 00009 STACK_PUSH reg#1 00010 CALL_FUNCTION abs(reg#1) -> reg#4 00011 STACK_POP 1 00012 IF_FALSE_GOTO 13 00013 STACK_PUSH reg#4 00014 STACK_PUSH 1 <FT_DOUBLE> 00015 CALL_FUNCTION abs(1 <FT_DOUBLE>) -> reg#5 00016 STACK_POP 1 00017 IF_FALSE_GOTO 18 00018 STACK_PUSH reg#5 00019 CALL_FUNCTION max(reg#5, reg#4) -> reg#3 00020 STACK_POP 2 00021 IF_FALSE_GOTO 24 00022 DIVIDE reg#0 / reg#3 -> reg#6 00023 ANY_LT reg#6 < 0.01 <FT_DOUBLE> 00024 RETURN We now use a stack to pass arguments to the function. The stack is implemented as a list of lists (list of registers). Arguments may still be non-existent to functions (this is a feature). Functions must check for nil arguments (NULL lists) and handle that case. It's somewhat complicated to allow literal values and test compatibility for different types, both because of lack of type information with unparsed/literal and also because it is an underdeveloped area in the code. In my limited testing it was good enough and useful, further enhancements are left for future work.
2022-04-16 01:42:20 +00:00
static GSList *
dump_str_stack_push(GSList *stack, const char *str)
{
return g_slist_prepend(stack, g_strdup(str));
}
static GSList *
dump_str_stack_pop(GSList *stack)
{
if (!stack) {
return NULL;
}
dfilter: Allow arithmetic expressions as function arguments This allows writing moderately complex expressions, for example a float epsilon test (#16483): Filter: {abs(_ws.ftypes.double - 1) / max(abs(_ws.ftypes.double), abs(1))} < 0.01 Syntax tree: 0 TEST_LT: 1 OP_DIVIDE: 2 FUNCTION(abs#1): 3 OP_SUBTRACT: 4 FIELD(_ws.ftypes.double) 4 FVALUE(1 <FT_DOUBLE>) 2 FUNCTION(max#2): 3 FUNCTION(abs#1): 4 FIELD(_ws.ftypes.double) 3 FUNCTION(abs#1): 4 FVALUE(1 <FT_DOUBLE>) 1 FVALUE(0.01 <FT_DOUBLE>) Instructions: 00000 READ_TREE _ws.ftypes.double -> reg#1 00001 IF_FALSE_GOTO 3 00002 SUBRACT reg#1 - 1 <FT_DOUBLE> -> reg#2 00003 STACK_PUSH reg#2 00004 CALL_FUNCTION abs(reg#2) -> reg#0 00005 STACK_POP 1 00006 IF_FALSE_GOTO 24 00007 READ_TREE _ws.ftypes.double -> reg#1 00008 IF_FALSE_GOTO 9 00009 STACK_PUSH reg#1 00010 CALL_FUNCTION abs(reg#1) -> reg#4 00011 STACK_POP 1 00012 IF_FALSE_GOTO 13 00013 STACK_PUSH reg#4 00014 STACK_PUSH 1 <FT_DOUBLE> 00015 CALL_FUNCTION abs(1 <FT_DOUBLE>) -> reg#5 00016 STACK_POP 1 00017 IF_FALSE_GOTO 18 00018 STACK_PUSH reg#5 00019 CALL_FUNCTION max(reg#5, reg#4) -> reg#3 00020 STACK_POP 2 00021 IF_FALSE_GOTO 24 00022 DIVIDE reg#0 / reg#3 -> reg#6 00023 ANY_LT reg#6 < 0.01 <FT_DOUBLE> 00024 RETURN We now use a stack to pass arguments to the function. The stack is implemented as a list of lists (list of registers). Arguments may still be non-existent to functions (this is a feature). Functions must check for nil arguments (NULL lists) and handle that case. It's somewhat complicated to allow literal values and test compatibility for different types, both because of lack of type information with unparsed/literal and also because it is an underdeveloped area in the code. In my limited testing it was good enough and useful, further enhancements are left for future work.
2022-04-16 01:42:20 +00:00
char *str;
str = stack->data;
stack = g_slist_delete_link(stack, stack);
g_free(str);
return stack;
}
2022-03-27 15:38:39 +00:00
char *
dfilter: Refactor macro tree references This replaces the current macro reference system with a completely different implementation. Instead of a macro a reference is a syntax element. A reference is a constant that can be filled in the dfilter code after compilation from an existing protocol tree. It is best understood as a field value that can be read from a fixed tree that is not the frame being filtered. Usually this fixed tree is the currently selected frame when the filter is applied. This allows comparing fields in the filtered frame with fields in the selected frame. Because the field reference syntax uses the same sigil notation as a macro we have to use a heuristic to distinguish them: if the name has a dot it is a field reference, otherwise it is a macro name. The reference is synctatically validated at compile time. There are two main advantages to this implementation (and a couple of minor ones): The protocol tree for each selected frame is only walked if we have a display filter and if the display filter uses references. Also only the actual reference values are copied, intead of loading the entire tree into a hash table (in textual form even). The other advantage is that the reference is tested like a protocol field against all the values in the selected frame (if there is more than one). Currently the reference fields are not "primed" during dissection, so the entire tree is walked to find a particular reference (this is similar to the previous implementation). If the display filter contains a valid reference and the reference is not loaded at the time the filter is run the result is the same as a non existing field for a regular READ_TREE instruction. Fixes #17599.
2022-03-27 14:26:46 +00:00
dfvm_dump_str(wmem_allocator_t *alloc, dfilter_t *df, gboolean print_references)
{
int id, length;
dfvm_insn_t *insn;
dfvm_value_t *arg1, *arg2, *arg3;
char *arg1_str, *arg2_str, *arg3_str;
const char *opcode_str;
2022-03-27 15:38:39 +00:00
wmem_strbuf_t *buf;
dfilter: Refactor macro tree references This replaces the current macro reference system with a completely different implementation. Instead of a macro a reference is a syntax element. A reference is a constant that can be filled in the dfilter code after compilation from an existing protocol tree. It is best understood as a field value that can be read from a fixed tree that is not the frame being filtered. Usually this fixed tree is the currently selected frame when the filter is applied. This allows comparing fields in the filtered frame with fields in the selected frame. Because the field reference syntax uses the same sigil notation as a macro we have to use a heuristic to distinguish them: if the name has a dot it is a field reference, otherwise it is a macro name. The reference is synctatically validated at compile time. There are two main advantages to this implementation (and a couple of minor ones): The protocol tree for each selected frame is only walked if we have a display filter and if the display filter uses references. Also only the actual reference values are copied, intead of loading the entire tree into a hash table (in textual form even). The other advantage is that the reference is tested like a protocol field against all the values in the selected frame (if there is more than one). Currently the reference fields are not "primed" during dissection, so the entire tree is walked to find a particular reference (this is similar to the previous implementation). If the display filter contains a valid reference and the reference is not loaded at the time the filter is run the result is the same as a non existing field for a regular READ_TREE instruction. Fixes #17599.
2022-03-27 14:26:46 +00:00
GHashTableIter ref_iter;
gpointer key, value;
char *str;
dfilter: Allow arithmetic expressions as function arguments This allows writing moderately complex expressions, for example a float epsilon test (#16483): Filter: {abs(_ws.ftypes.double - 1) / max(abs(_ws.ftypes.double), abs(1))} < 0.01 Syntax tree: 0 TEST_LT: 1 OP_DIVIDE: 2 FUNCTION(abs#1): 3 OP_SUBTRACT: 4 FIELD(_ws.ftypes.double) 4 FVALUE(1 <FT_DOUBLE>) 2 FUNCTION(max#2): 3 FUNCTION(abs#1): 4 FIELD(_ws.ftypes.double) 3 FUNCTION(abs#1): 4 FVALUE(1 <FT_DOUBLE>) 1 FVALUE(0.01 <FT_DOUBLE>) Instructions: 00000 READ_TREE _ws.ftypes.double -> reg#1 00001 IF_FALSE_GOTO 3 00002 SUBRACT reg#1 - 1 <FT_DOUBLE> -> reg#2 00003 STACK_PUSH reg#2 00004 CALL_FUNCTION abs(reg#2) -> reg#0 00005 STACK_POP 1 00006 IF_FALSE_GOTO 24 00007 READ_TREE _ws.ftypes.double -> reg#1 00008 IF_FALSE_GOTO 9 00009 STACK_PUSH reg#1 00010 CALL_FUNCTION abs(reg#1) -> reg#4 00011 STACK_POP 1 00012 IF_FALSE_GOTO 13 00013 STACK_PUSH reg#4 00014 STACK_PUSH 1 <FT_DOUBLE> 00015 CALL_FUNCTION abs(1 <FT_DOUBLE>) -> reg#5 00016 STACK_POP 1 00017 IF_FALSE_GOTO 18 00018 STACK_PUSH reg#5 00019 CALL_FUNCTION max(reg#5, reg#4) -> reg#3 00020 STACK_POP 2 00021 IF_FALSE_GOTO 24 00022 DIVIDE reg#0 / reg#3 -> reg#6 00023 ANY_LT reg#6 < 0.01 <FT_DOUBLE> 00024 RETURN We now use a stack to pass arguments to the function. The stack is implemented as a list of lists (list of registers). Arguments may still be non-existent to functions (this is a feature). Functions must check for nil arguments (NULL lists) and handle that case. It's somewhat complicated to allow literal values and test compatibility for different types, both because of lack of type information with unparsed/literal and also because it is an underdeveloped area in the code. In my limited testing it was good enough and useful, further enhancements are left for future work.
2022-04-16 01:42:20 +00:00
GSList *stack_print = NULL, *l;
guint i;
2022-03-27 15:38:39 +00:00
buf = wmem_strbuf_new(alloc, NULL);
2022-03-27 23:21:53 +00:00
wmem_strbuf_append(buf, "Instructions:\n");
length = df->insns->len;
for (id = 0; id < length; id++) {
2022-03-21 11:13:02 +00:00
insn = g_ptr_array_index(df->insns, id);
arg1 = insn->arg1;
arg2 = insn->arg2;
arg3 = insn->arg3;
2022-03-21 11:13:02 +00:00
arg1_str = dfvm_value_tostr(arg1);
arg2_str = dfvm_value_tostr(arg2);
arg3_str = dfvm_value_tostr(arg3);
opcode_str = dfvm_opcode_tostr(insn->op);
switch (insn->op) {
case DFVM_CHECK_EXISTS:
case DFVM_CHECK_EXISTS_R:
wmem_strbuf_append_printf(buf, "%05d %s\t%s\n",
id, opcode_str, arg1_str);
break;
case DFVM_READ_TREE:
case DFVM_READ_TREE_R:
wmem_strbuf_append_printf(buf, "%05d %s\t\t%s -> %s\n",
id, opcode_str, arg1_str, arg2_str);
break;
case DFVM_READ_REFERENCE:
case DFVM_READ_REFERENCE_R:
wmem_strbuf_append_printf(buf, "%05d %s\t${%s} -> %s\n",
id, opcode_str, arg1_str, arg2_str);
break;
case DFVM_PUT_FVALUE:
wmem_strbuf_append_printf(buf, "%05d %s\t%s -> %s\n",
id, opcode_str, arg1_str, arg2_str);
break;
case DFVM_CALL_FUNCTION:
wmem_strbuf_append_printf(buf, "%05d %s\t%s(",
id, opcode_str, arg1_str);
dfilter: Allow arithmetic expressions as function arguments This allows writing moderately complex expressions, for example a float epsilon test (#16483): Filter: {abs(_ws.ftypes.double - 1) / max(abs(_ws.ftypes.double), abs(1))} < 0.01 Syntax tree: 0 TEST_LT: 1 OP_DIVIDE: 2 FUNCTION(abs#1): 3 OP_SUBTRACT: 4 FIELD(_ws.ftypes.double) 4 FVALUE(1 <FT_DOUBLE>) 2 FUNCTION(max#2): 3 FUNCTION(abs#1): 4 FIELD(_ws.ftypes.double) 3 FUNCTION(abs#1): 4 FVALUE(1 <FT_DOUBLE>) 1 FVALUE(0.01 <FT_DOUBLE>) Instructions: 00000 READ_TREE _ws.ftypes.double -> reg#1 00001 IF_FALSE_GOTO 3 00002 SUBRACT reg#1 - 1 <FT_DOUBLE> -> reg#2 00003 STACK_PUSH reg#2 00004 CALL_FUNCTION abs(reg#2) -> reg#0 00005 STACK_POP 1 00006 IF_FALSE_GOTO 24 00007 READ_TREE _ws.ftypes.double -> reg#1 00008 IF_FALSE_GOTO 9 00009 STACK_PUSH reg#1 00010 CALL_FUNCTION abs(reg#1) -> reg#4 00011 STACK_POP 1 00012 IF_FALSE_GOTO 13 00013 STACK_PUSH reg#4 00014 STACK_PUSH 1 <FT_DOUBLE> 00015 CALL_FUNCTION abs(1 <FT_DOUBLE>) -> reg#5 00016 STACK_POP 1 00017 IF_FALSE_GOTO 18 00018 STACK_PUSH reg#5 00019 CALL_FUNCTION max(reg#5, reg#4) -> reg#3 00020 STACK_POP 2 00021 IF_FALSE_GOTO 24 00022 DIVIDE reg#0 / reg#3 -> reg#6 00023 ANY_LT reg#6 < 0.01 <FT_DOUBLE> 00024 RETURN We now use a stack to pass arguments to the function. The stack is implemented as a list of lists (list of registers). Arguments may still be non-existent to functions (this is a feature). Functions must check for nil arguments (NULL lists) and handle that case. It's somewhat complicated to allow literal values and test compatibility for different types, both because of lack of type information with unparsed/literal and also because it is an underdeveloped area in the code. In my limited testing it was good enough and useful, further enhancements are left for future work.
2022-04-16 01:42:20 +00:00
for (l = stack_print, i = 0; i < arg3->value.numeric; i++, l = l->next) {
if (l != stack_print) {
wmem_strbuf_append(buf, ", ");
}
wmem_strbuf_append(buf, l->data);
}
2022-03-27 15:38:39 +00:00
wmem_strbuf_append_printf(buf, ") -> %s\n", arg2_str);
break;
case DFVM_STACK_PUSH:
wmem_strbuf_append_printf(buf, "%05d %s\t%s\n",
id, opcode_str, arg1_str);
dfilter: Allow arithmetic expressions as function arguments This allows writing moderately complex expressions, for example a float epsilon test (#16483): Filter: {abs(_ws.ftypes.double - 1) / max(abs(_ws.ftypes.double), abs(1))} < 0.01 Syntax tree: 0 TEST_LT: 1 OP_DIVIDE: 2 FUNCTION(abs#1): 3 OP_SUBTRACT: 4 FIELD(_ws.ftypes.double) 4 FVALUE(1 <FT_DOUBLE>) 2 FUNCTION(max#2): 3 FUNCTION(abs#1): 4 FIELD(_ws.ftypes.double) 3 FUNCTION(abs#1): 4 FVALUE(1 <FT_DOUBLE>) 1 FVALUE(0.01 <FT_DOUBLE>) Instructions: 00000 READ_TREE _ws.ftypes.double -> reg#1 00001 IF_FALSE_GOTO 3 00002 SUBRACT reg#1 - 1 <FT_DOUBLE> -> reg#2 00003 STACK_PUSH reg#2 00004 CALL_FUNCTION abs(reg#2) -> reg#0 00005 STACK_POP 1 00006 IF_FALSE_GOTO 24 00007 READ_TREE _ws.ftypes.double -> reg#1 00008 IF_FALSE_GOTO 9 00009 STACK_PUSH reg#1 00010 CALL_FUNCTION abs(reg#1) -> reg#4 00011 STACK_POP 1 00012 IF_FALSE_GOTO 13 00013 STACK_PUSH reg#4 00014 STACK_PUSH 1 <FT_DOUBLE> 00015 CALL_FUNCTION abs(1 <FT_DOUBLE>) -> reg#5 00016 STACK_POP 1 00017 IF_FALSE_GOTO 18 00018 STACK_PUSH reg#5 00019 CALL_FUNCTION max(reg#5, reg#4) -> reg#3 00020 STACK_POP 2 00021 IF_FALSE_GOTO 24 00022 DIVIDE reg#0 / reg#3 -> reg#6 00023 ANY_LT reg#6 < 0.01 <FT_DOUBLE> 00024 RETURN We now use a stack to pass arguments to the function. The stack is implemented as a list of lists (list of registers). Arguments may still be non-existent to functions (this is a feature). Functions must check for nil arguments (NULL lists) and handle that case. It's somewhat complicated to allow literal values and test compatibility for different types, both because of lack of type information with unparsed/literal and also because it is an underdeveloped area in the code. In my limited testing it was good enough and useful, further enhancements are left for future work.
2022-04-16 01:42:20 +00:00
stack_print = dump_str_stack_push(stack_print, arg1_str);
break;
case DFVM_STACK_POP:
wmem_strbuf_append_printf(buf, "%05d %s\t%s\n",
id, opcode_str, arg1_str);
dfilter: Allow arithmetic expressions as function arguments This allows writing moderately complex expressions, for example a float epsilon test (#16483): Filter: {abs(_ws.ftypes.double - 1) / max(abs(_ws.ftypes.double), abs(1))} < 0.01 Syntax tree: 0 TEST_LT: 1 OP_DIVIDE: 2 FUNCTION(abs#1): 3 OP_SUBTRACT: 4 FIELD(_ws.ftypes.double) 4 FVALUE(1 <FT_DOUBLE>) 2 FUNCTION(max#2): 3 FUNCTION(abs#1): 4 FIELD(_ws.ftypes.double) 3 FUNCTION(abs#1): 4 FVALUE(1 <FT_DOUBLE>) 1 FVALUE(0.01 <FT_DOUBLE>) Instructions: 00000 READ_TREE _ws.ftypes.double -> reg#1 00001 IF_FALSE_GOTO 3 00002 SUBRACT reg#1 - 1 <FT_DOUBLE> -> reg#2 00003 STACK_PUSH reg#2 00004 CALL_FUNCTION abs(reg#2) -> reg#0 00005 STACK_POP 1 00006 IF_FALSE_GOTO 24 00007 READ_TREE _ws.ftypes.double -> reg#1 00008 IF_FALSE_GOTO 9 00009 STACK_PUSH reg#1 00010 CALL_FUNCTION abs(reg#1) -> reg#4 00011 STACK_POP 1 00012 IF_FALSE_GOTO 13 00013 STACK_PUSH reg#4 00014 STACK_PUSH 1 <FT_DOUBLE> 00015 CALL_FUNCTION abs(1 <FT_DOUBLE>) -> reg#5 00016 STACK_POP 1 00017 IF_FALSE_GOTO 18 00018 STACK_PUSH reg#5 00019 CALL_FUNCTION max(reg#5, reg#4) -> reg#3 00020 STACK_POP 2 00021 IF_FALSE_GOTO 24 00022 DIVIDE reg#0 / reg#3 -> reg#6 00023 ANY_LT reg#6 < 0.01 <FT_DOUBLE> 00024 RETURN We now use a stack to pass arguments to the function. The stack is implemented as a list of lists (list of registers). Arguments may still be non-existent to functions (this is a feature). Functions must check for nil arguments (NULL lists) and handle that case. It's somewhat complicated to allow literal values and test compatibility for different types, both because of lack of type information with unparsed/literal and also because it is an underdeveloped area in the code. In my limited testing it was good enough and useful, further enhancements are left for future work.
2022-04-16 01:42:20 +00:00
for (i = 0; i < arg1->value.numeric; i ++) {
stack_print = dump_str_stack_pop(stack_print);
}
break;
case DFVM_SLICE:
wmem_strbuf_append_printf(buf, "%05d %s\t\t%s[%s] -> %s\n",
id, opcode_str, arg1_str, arg3_str, arg2_str);
break;
case DFVM_LENGTH:
wmem_strbuf_append_printf(buf, "%05d %s\t\t%s -> %s\n",
id, opcode_str, arg1_str, arg2_str);
break;
case DFVM_ALL_EQ:
case DFVM_ANY_EQ:
wmem_strbuf_append_printf(buf, "%05d %s\t\t%s === %s\n",
id, opcode_str, arg1_str, arg2_str);
break;
case DFVM_ALL_NE:
case DFVM_ANY_NE:
wmem_strbuf_append_printf(buf, "%05d %s\t\t%s != %s\n",
id, opcode_str, arg1_str, arg2_str);
dfilter: Fix "!=" relation to be free of contradictions Wireshark defines the relation of equality A == B as A any_eq B <=> An == Bn for at least one An, Bn. More accurately I think this is (formally) an equivalence relation, not true equality. Whichever definition for "==" we choose we must keep the definition of "!=" as !(A == B), otherwise it will lead to logical contradictions like (A == B) AND (A != B) being true. Fix the '!=' relation to match the definition of equality: A != B <=> !(A == B) <=> A all_ne B <=> An != Bn, for every n. This has been the recomended way to write "not equal" for a long time in the documentation, even to the point where != was deprecated, but it just wasn't implemented consistently in the language, which has understandably been a persistent source of confusion. Even a field that is normally well-behaved with "!=" like "ip.src" or "ip.dst" will produce unexpected results with encapsulations like IP-over-IP. The opcode ALL_NE could have been implemented in the compiler instead using NOT and ANY_EQ but I chose to implement it in bytecode. It just seemed more elegant and efficient but the difference was not very significant. Keep around "~=" for any_ne relation, in case someone depends on that, and because we don't have an operator for true equality: A strict_equal B <=> A all_eq B <=> !(A any_ne B). If there is only one value then any_ne and all_ne are the same comparison operation. Implementing this change did not require fixing any tests so it is unlikely the relation "~=" (any_ne) will be very useful. Note that the behaviour of the '<' (less than) comparison relation is a separate, more subtle issue. In the general case the definition of '<' that is used is only a partial order.
2021-10-18 20:07:06 +00:00
break;
case DFVM_ALL_GT:
case DFVM_ANY_GT:
wmem_strbuf_append_printf(buf, "%05d %s\t\t%s > %s\n",
id, opcode_str, arg1_str, arg2_str);
break;
case DFVM_ALL_GE:
case DFVM_ANY_GE:
wmem_strbuf_append_printf(buf, "%05d %s\t\t%s >= %s\n",
id, opcode_str, arg1_str, arg2_str);
break;
case DFVM_ALL_LT:
case DFVM_ANY_LT:
wmem_strbuf_append_printf(buf, "%05d %s\t\t%s < %s\n",
id, opcode_str, arg1_str, arg2_str);
break;
case DFVM_ALL_LE:
case DFVM_ANY_LE:
wmem_strbuf_append_printf(buf, "%05d %s\t\t%s <= %s\n",
id, opcode_str, arg1_str, arg2_str);
break;
case DFVM_ALL_ZERO:
case DFVM_ANY_ZERO:
wmem_strbuf_append_printf(buf, "%05d %s\t\t%s\n",
id, opcode_str, arg1_str);
break;
case DFVM_ALL_CONTAINS:
case DFVM_ANY_CONTAINS:
wmem_strbuf_append_printf(buf, "%05d %s\t%s contains %s\n",
id, opcode_str, arg1_str, arg2_str);
break;
case DFVM_ALL_MATCHES:
case DFVM_ANY_MATCHES:
wmem_strbuf_append_printf(buf, "%05d %s\t%s matches %s\n",
id, opcode_str, arg1_str, arg2_str);
break;
case DFVM_ALL_IN_RANGE:
case DFVM_ANY_IN_RANGE:
wmem_strbuf_append_printf(buf, "%05d %s\t%s in { %s .. %s }\n",
id, opcode_str,
arg1_str, arg2_str, arg3_str);
break;
case DFVM_BITWISE_AND:
wmem_strbuf_append_printf(buf, "%05d %s\t%s & %s -> %s\n",
id, opcode_str, arg1_str, arg2_str, arg3_str);
break;
case DFVM_UNARY_MINUS:
wmem_strbuf_append_printf(buf, "%05d %s\t\t-%s -> %s\n",
id, opcode_str, arg1_str, arg2_str);
break;
case DFVM_ADD:
wmem_strbuf_append_printf(buf, "%05d %s\t\t%s + %s -> %s\n",
id, opcode_str, arg1_str, arg2_str, arg3_str);
break;
case DFVM_SUBTRACT:
wmem_strbuf_append_printf(buf, "%05d %s\t\t%s - %s -> %s\n",
id, opcode_str, arg1_str, arg2_str, arg3_str);
break;
case DFVM_MULTIPLY:
wmem_strbuf_append_printf(buf, "%05d %s\t\t%s * %s -> %s\n",
id, opcode_str, arg1_str, arg2_str, arg3_str);
break;
case DFVM_DIVIDE:
wmem_strbuf_append_printf(buf, "%05d %s\t\t%s / %s -> %s\n",
id, opcode_str, arg1_str, arg2_str, arg3_str);
break;
case DFVM_MODULO:
wmem_strbuf_append_printf(buf, "%05d %s\t\t%s %% %s -> %s\n",
id, opcode_str, arg1_str, arg2_str, arg3_str);
break;
case DFVM_NOT:
2022-03-27 15:38:39 +00:00
wmem_strbuf_append_printf(buf, "%05d NOT\n", id);
break;
case DFVM_RETURN:
2022-03-27 15:38:39 +00:00
wmem_strbuf_append_printf(buf, "%05d RETURN\n", id);
break;
case DFVM_IF_TRUE_GOTO:
case DFVM_IF_FALSE_GOTO:
wmem_strbuf_append_printf(buf, "%05d %s\t%u\n",
id, opcode_str, arg1->value.numeric);
break;
}
2022-03-21 11:13:02 +00:00
g_free(arg1_str);
g_free(arg2_str);
g_free(arg3_str);
}
2022-03-27 15:38:39 +00:00
dfilter: Refactor macro tree references This replaces the current macro reference system with a completely different implementation. Instead of a macro a reference is a syntax element. A reference is a constant that can be filled in the dfilter code after compilation from an existing protocol tree. It is best understood as a field value that can be read from a fixed tree that is not the frame being filtered. Usually this fixed tree is the currently selected frame when the filter is applied. This allows comparing fields in the filtered frame with fields in the selected frame. Because the field reference syntax uses the same sigil notation as a macro we have to use a heuristic to distinguish them: if the name has a dot it is a field reference, otherwise it is a macro name. The reference is synctatically validated at compile time. There are two main advantages to this implementation (and a couple of minor ones): The protocol tree for each selected frame is only walked if we have a display filter and if the display filter uses references. Also only the actual reference values are copied, intead of loading the entire tree into a hash table (in textual form even). The other advantage is that the reference is tested like a protocol field against all the values in the selected frame (if there is more than one). Currently the reference fields are not "primed" during dissection, so the entire tree is walked to find a particular reference (this is similar to the previous implementation). If the display filter contains a valid reference and the reference is not loaded at the time the filter is run the result is the same as a non existing field for a regular READ_TREE instruction. Fixes #17599.
2022-03-27 14:26:46 +00:00
if (print_references && g_hash_table_size(df->references) > 0) {
wmem_strbuf_append(buf, "\nReferences:\n");
g_hash_table_iter_init(&ref_iter, df->references);
while (g_hash_table_iter_next(&ref_iter, &key, &value)) {
const char *abbrev = ((header_field_info *)key)->abbrev;
GPtrArray *refs_array = value;
df_reference_t *ref;
dfilter: Refactor macro tree references This replaces the current macro reference system with a completely different implementation. Instead of a macro a reference is a syntax element. A reference is a constant that can be filled in the dfilter code after compilation from an existing protocol tree. It is best understood as a field value that can be read from a fixed tree that is not the frame being filtered. Usually this fixed tree is the currently selected frame when the filter is applied. This allows comparing fields in the filtered frame with fields in the selected frame. Because the field reference syntax uses the same sigil notation as a macro we have to use a heuristic to distinguish them: if the name has a dot it is a field reference, otherwise it is a macro name. The reference is synctatically validated at compile time. There are two main advantages to this implementation (and a couple of minor ones): The protocol tree for each selected frame is only walked if we have a display filter and if the display filter uses references. Also only the actual reference values are copied, intead of loading the entire tree into a hash table (in textual form even). The other advantage is that the reference is tested like a protocol field against all the values in the selected frame (if there is more than one). Currently the reference fields are not "primed" during dissection, so the entire tree is walked to find a particular reference (this is similar to the previous implementation). If the display filter contains a valid reference and the reference is not loaded at the time the filter is run the result is the same as a non existing field for a regular READ_TREE instruction. Fixes #17599.
2022-03-27 14:26:46 +00:00
wmem_strbuf_append_printf(buf, "${%s} = {", abbrev);
for (i = 0; i < refs_array->len; i++) {
if (i != 0) {
wmem_strbuf_append(buf, ", ");
dfilter: Refactor macro tree references This replaces the current macro reference system with a completely different implementation. Instead of a macro a reference is a syntax element. A reference is a constant that can be filled in the dfilter code after compilation from an existing protocol tree. It is best understood as a field value that can be read from a fixed tree that is not the frame being filtered. Usually this fixed tree is the currently selected frame when the filter is applied. This allows comparing fields in the filtered frame with fields in the selected frame. Because the field reference syntax uses the same sigil notation as a macro we have to use a heuristic to distinguish them: if the name has a dot it is a field reference, otherwise it is a macro name. The reference is synctatically validated at compile time. There are two main advantages to this implementation (and a couple of minor ones): The protocol tree for each selected frame is only walked if we have a display filter and if the display filter uses references. Also only the actual reference values are copied, intead of loading the entire tree into a hash table (in textual form even). The other advantage is that the reference is tested like a protocol field against all the values in the selected frame (if there is more than one). Currently the reference fields are not "primed" during dissection, so the entire tree is walked to find a particular reference (this is similar to the previous implementation). If the display filter contains a valid reference and the reference is not loaded at the time the filter is run the result is the same as a non existing field for a regular READ_TREE instruction. Fixes #17599.
2022-03-27 14:26:46 +00:00
}
ref = refs_array->pdata[i];
str = fvalue_to_debug_repr(NULL, ref->value);
wmem_strbuf_append_printf(buf, "%s <%s>", str, fvalue_type_name(ref->value));
g_free(str);
dfilter: Refactor macro tree references This replaces the current macro reference system with a completely different implementation. Instead of a macro a reference is a syntax element. A reference is a constant that can be filled in the dfilter code after compilation from an existing protocol tree. It is best understood as a field value that can be read from a fixed tree that is not the frame being filtered. Usually this fixed tree is the currently selected frame when the filter is applied. This allows comparing fields in the filtered frame with fields in the selected frame. Because the field reference syntax uses the same sigil notation as a macro we have to use a heuristic to distinguish them: if the name has a dot it is a field reference, otherwise it is a macro name. The reference is synctatically validated at compile time. There are two main advantages to this implementation (and a couple of minor ones): The protocol tree for each selected frame is only walked if we have a display filter and if the display filter uses references. Also only the actual reference values are copied, intead of loading the entire tree into a hash table (in textual form even). The other advantage is that the reference is tested like a protocol field against all the values in the selected frame (if there is more than one). Currently the reference fields are not "primed" during dissection, so the entire tree is walked to find a particular reference (this is similar to the previous implementation). If the display filter contains a valid reference and the reference is not loaded at the time the filter is run the result is the same as a non existing field for a regular READ_TREE instruction. Fixes #17599.
2022-03-27 14:26:46 +00:00
}
wmem_strbuf_append(buf, "}\n");
}
}
2022-03-27 15:38:39 +00:00
return wmem_strbuf_finalize(buf);
}
void
dfvm_dump(FILE *f, dfilter_t *df)
{
dfilter: Refactor macro tree references This replaces the current macro reference system with a completely different implementation. Instead of a macro a reference is a syntax element. A reference is a constant that can be filled in the dfilter code after compilation from an existing protocol tree. It is best understood as a field value that can be read from a fixed tree that is not the frame being filtered. Usually this fixed tree is the currently selected frame when the filter is applied. This allows comparing fields in the filtered frame with fields in the selected frame. Because the field reference syntax uses the same sigil notation as a macro we have to use a heuristic to distinguish them: if the name has a dot it is a field reference, otherwise it is a macro name. The reference is synctatically validated at compile time. There are two main advantages to this implementation (and a couple of minor ones): The protocol tree for each selected frame is only walked if we have a display filter and if the display filter uses references. Also only the actual reference values are copied, intead of loading the entire tree into a hash table (in textual form even). The other advantage is that the reference is tested like a protocol field against all the values in the selected frame (if there is more than one). Currently the reference fields are not "primed" during dissection, so the entire tree is walked to find a particular reference (this is similar to the previous implementation). If the display filter contains a valid reference and the reference is not loaded at the time the filter is run the result is the same as a non existing field for a regular READ_TREE instruction. Fixes #17599.
2022-03-27 14:26:46 +00:00
char *str = dfvm_dump_str(NULL, df, FALSE);
2022-03-27 15:38:39 +00:00
fputs(str, f);
wmem_free(NULL, str);
}
static int
compare_finfo_layer(gconstpointer _a, gconstpointer _b)
{
const field_info *a = *(const field_info **)_a;
const field_info *b = *(const field_info **)_b;
return a->proto_layer_num - b->proto_layer_num;
}
static gboolean
drange_contains_layer(drange_t *dr, int num, int length)
{
drange_node *rn;
GSList *list = dr->range_list;
int lower, upper;
while (list) {
rn = list->data;
lower = rn->start_offset;
if (lower < 0) {
lower += length + 1;
}
if (rn->ending == DRANGE_NODE_END_T_LENGTH) {
upper = lower + rn->length - 1;
}
else if (rn->ending == DRANGE_NODE_END_T_OFFSET) {
upper = rn->end_offset;
}
else if (rn->ending == DRANGE_NODE_END_T_TO_THE_END) {
upper = INT_MAX;
}
else {
ws_assert_not_reached();
}
if (num >= lower && num <= upper) { /* inclusive */
return TRUE;
}
list = g_slist_next(list);
}
return FALSE;
}
GSList *
filter_finfo_fvalues(GSList *fvalues, GPtrArray *finfos, drange_t *range)
{
int length; /* maximum proto layer number. The numbers are sequential. */
field_info *last_finfo, *finfo;
int cookie = -1;
gboolean cookie_matches = false;
int layer;
g_ptr_array_sort(finfos, compare_finfo_layer);
last_finfo = finfos->pdata[finfos->len - 1];
length = last_finfo->proto_layer_num;
for (guint i = 0; i < finfos->len; i++) {
finfo = finfos->pdata[i];
layer = finfo->proto_layer_num;
if (cookie == layer) {
if (cookie_matches) {
fvalues = g_slist_prepend(fvalues, &finfo->value);
}
}
else {
cookie = layer;
cookie_matches = drange_contains_layer(range, layer, length);
if (cookie_matches) {
fvalues = g_slist_prepend(fvalues, &finfo->value);
}
}
}
return fvalues;
}
/* Reads a field from the proto_tree and loads the fvalues into a register,
* if that field has not already been read. */
static gboolean
2022-03-21 12:19:54 +00:00
read_tree(dfilter_t *df, proto_tree *tree,
dfvm_value_t *arg1, dfvm_value_t *arg2,
dfvm_value_t *arg3)
{
GPtrArray *finfos;
field_info *finfo;
int i, len;
GSList *fvalues = NULL;
drange_t *range = NULL;
2022-03-21 12:19:54 +00:00
header_field_info *hfinfo = arg1->value.hfinfo;
int reg = arg2->value.numeric;
if (arg3) {
range = arg3->value.drange;
}
/* Already loaded in this run of the dfilter? */
if (df->attempted_load[reg]) {
if (df->registers[reg]) {
return TRUE;
}
else {
return FALSE;
}
}
df->attempted_load[reg] = TRUE;
while (hfinfo) {
finfos = proto_get_finfo_ptr_array(tree, hfinfo->id);
if ((finfos == NULL) || (g_ptr_array_len(finfos) == 0)) {
hfinfo = hfinfo->same_name_next;
continue;
}
if (range) {
fvalues = filter_finfo_fvalues(fvalues, finfos, range);
}
else {
len = finfos->len;
for (i = 0; i < len; i++) {
finfo = g_ptr_array_index(finfos, i);
fvalues = g_slist_prepend(fvalues, &finfo->value);
}
}
hfinfo = hfinfo->same_name_next;
}
if (fvalues == NULL) {
return FALSE;
}
df->registers[reg] = fvalues;
// These values are referenced only, do not try to free it later.
df->free_registers[reg] = NULL;
return TRUE;
}
GSList *
filter_refs_fvalues(GPtrArray *refs_array, drange_t *range)
{
int length; /* maximum proto layer number. The numbers are sequential. */
df_reference_t *last_ref = NULL;
int cookie = -1;
gboolean cookie_matches = false;
GSList *fvalues = NULL;
if (!refs_array || refs_array->len == 0) {
return fvalues;
}
/* refs array is sorted. */
last_ref = refs_array->pdata[refs_array->len - 1];
length = last_ref->proto_layer_num;
for (guint i = 0; i < refs_array->len; i++) {
df_reference_t *ref = refs_array->pdata[i];
int layer = ref->proto_layer_num;
if (range == NULL) {
fvalues = g_slist_prepend(fvalues, fvalue_dup(ref->value));
continue;
}
if (cookie == layer) {
if (cookie_matches) {
fvalues = g_slist_prepend(fvalues, fvalue_dup(ref->value));
}
}
else {
cookie = layer;
cookie_matches = drange_contains_layer(range, layer, length);
if (cookie_matches) {
fvalues = g_slist_prepend(fvalues, fvalue_dup(ref->value));
}
}
}
return fvalues;
}
dfilter: Refactor macro tree references This replaces the current macro reference system with a completely different implementation. Instead of a macro a reference is a syntax element. A reference is a constant that can be filled in the dfilter code after compilation from an existing protocol tree. It is best understood as a field value that can be read from a fixed tree that is not the frame being filtered. Usually this fixed tree is the currently selected frame when the filter is applied. This allows comparing fields in the filtered frame with fields in the selected frame. Because the field reference syntax uses the same sigil notation as a macro we have to use a heuristic to distinguish them: if the name has a dot it is a field reference, otherwise it is a macro name. The reference is synctatically validated at compile time. There are two main advantages to this implementation (and a couple of minor ones): The protocol tree for each selected frame is only walked if we have a display filter and if the display filter uses references. Also only the actual reference values are copied, intead of loading the entire tree into a hash table (in textual form even). The other advantage is that the reference is tested like a protocol field against all the values in the selected frame (if there is more than one). Currently the reference fields are not "primed" during dissection, so the entire tree is walked to find a particular reference (this is similar to the previous implementation). If the display filter contains a valid reference and the reference is not loaded at the time the filter is run the result is the same as a non existing field for a regular READ_TREE instruction. Fixes #17599.
2022-03-27 14:26:46 +00:00
static gboolean
read_reference(dfilter_t *df, dfvm_value_t *arg1, dfvm_value_t *arg2,
dfvm_value_t *arg3)
dfilter: Refactor macro tree references This replaces the current macro reference system with a completely different implementation. Instead of a macro a reference is a syntax element. A reference is a constant that can be filled in the dfilter code after compilation from an existing protocol tree. It is best understood as a field value that can be read from a fixed tree that is not the frame being filtered. Usually this fixed tree is the currently selected frame when the filter is applied. This allows comparing fields in the filtered frame with fields in the selected frame. Because the field reference syntax uses the same sigil notation as a macro we have to use a heuristic to distinguish them: if the name has a dot it is a field reference, otherwise it is a macro name. The reference is synctatically validated at compile time. There are two main advantages to this implementation (and a couple of minor ones): The protocol tree for each selected frame is only walked if we have a display filter and if the display filter uses references. Also only the actual reference values are copied, intead of loading the entire tree into a hash table (in textual form even). The other advantage is that the reference is tested like a protocol field against all the values in the selected frame (if there is more than one). Currently the reference fields are not "primed" during dissection, so the entire tree is walked to find a particular reference (this is similar to the previous implementation). If the display filter contains a valid reference and the reference is not loaded at the time the filter is run the result is the same as a non existing field for a regular READ_TREE instruction. Fixes #17599.
2022-03-27 14:26:46 +00:00
{
GPtrArray *refs;
drange_t *range = NULL;
dfilter: Refactor macro tree references This replaces the current macro reference system with a completely different implementation. Instead of a macro a reference is a syntax element. A reference is a constant that can be filled in the dfilter code after compilation from an existing protocol tree. It is best understood as a field value that can be read from a fixed tree that is not the frame being filtered. Usually this fixed tree is the currently selected frame when the filter is applied. This allows comparing fields in the filtered frame with fields in the selected frame. Because the field reference syntax uses the same sigil notation as a macro we have to use a heuristic to distinguish them: if the name has a dot it is a field reference, otherwise it is a macro name. The reference is synctatically validated at compile time. There are two main advantages to this implementation (and a couple of minor ones): The protocol tree for each selected frame is only walked if we have a display filter and if the display filter uses references. Also only the actual reference values are copied, intead of loading the entire tree into a hash table (in textual form even). The other advantage is that the reference is tested like a protocol field against all the values in the selected frame (if there is more than one). Currently the reference fields are not "primed" during dissection, so the entire tree is walked to find a particular reference (this is similar to the previous implementation). If the display filter contains a valid reference and the reference is not loaded at the time the filter is run the result is the same as a non existing field for a regular READ_TREE instruction. Fixes #17599.
2022-03-27 14:26:46 +00:00
header_field_info *hfinfo = arg1->value.hfinfo;
int reg = arg2->value.numeric;
if (arg3) {
range = arg3->value.drange;
}
dfilter: Refactor macro tree references This replaces the current macro reference system with a completely different implementation. Instead of a macro a reference is a syntax element. A reference is a constant that can be filled in the dfilter code after compilation from an existing protocol tree. It is best understood as a field value that can be read from a fixed tree that is not the frame being filtered. Usually this fixed tree is the currently selected frame when the filter is applied. This allows comparing fields in the filtered frame with fields in the selected frame. Because the field reference syntax uses the same sigil notation as a macro we have to use a heuristic to distinguish them: if the name has a dot it is a field reference, otherwise it is a macro name. The reference is synctatically validated at compile time. There are two main advantages to this implementation (and a couple of minor ones): The protocol tree for each selected frame is only walked if we have a display filter and if the display filter uses references. Also only the actual reference values are copied, intead of loading the entire tree into a hash table (in textual form even). The other advantage is that the reference is tested like a protocol field against all the values in the selected frame (if there is more than one). Currently the reference fields are not "primed" during dissection, so the entire tree is walked to find a particular reference (this is similar to the previous implementation). If the display filter contains a valid reference and the reference is not loaded at the time the filter is run the result is the same as a non existing field for a regular READ_TREE instruction. Fixes #17599.
2022-03-27 14:26:46 +00:00
/* Already loaded in this run of the dfilter? */
if (df->attempted_load[reg]) {
if (df->registers[reg]) {
return TRUE;
}
else {
return FALSE;
}
}
df->attempted_load[reg] = TRUE;
refs = g_hash_table_lookup(df->references, hfinfo);
if (refs == NULL || refs->len == 0) {
dfilter: Refactor macro tree references This replaces the current macro reference system with a completely different implementation. Instead of a macro a reference is a syntax element. A reference is a constant that can be filled in the dfilter code after compilation from an existing protocol tree. It is best understood as a field value that can be read from a fixed tree that is not the frame being filtered. Usually this fixed tree is the currently selected frame when the filter is applied. This allows comparing fields in the filtered frame with fields in the selected frame. Because the field reference syntax uses the same sigil notation as a macro we have to use a heuristic to distinguish them: if the name has a dot it is a field reference, otherwise it is a macro name. The reference is synctatically validated at compile time. There are two main advantages to this implementation (and a couple of minor ones): The protocol tree for each selected frame is only walked if we have a display filter and if the display filter uses references. Also only the actual reference values are copied, intead of loading the entire tree into a hash table (in textual form even). The other advantage is that the reference is tested like a protocol field against all the values in the selected frame (if there is more than one). Currently the reference fields are not "primed" during dissection, so the entire tree is walked to find a particular reference (this is similar to the previous implementation). If the display filter contains a valid reference and the reference is not loaded at the time the filter is run the result is the same as a non existing field for a regular READ_TREE instruction. Fixes #17599.
2022-03-27 14:26:46 +00:00
df->registers[reg] = NULL;
return FALSE;
}
/* Shallow copy */
df->registers[reg] = filter_refs_fvalues(refs, range);
/* Creates new value so own it. */
df->free_registers[reg] = (GDestroyNotify)fvalue_free;
dfilter: Refactor macro tree references This replaces the current macro reference system with a completely different implementation. Instead of a macro a reference is a syntax element. A reference is a constant that can be filled in the dfilter code after compilation from an existing protocol tree. It is best understood as a field value that can be read from a fixed tree that is not the frame being filtered. Usually this fixed tree is the currently selected frame when the filter is applied. This allows comparing fields in the filtered frame with fields in the selected frame. Because the field reference syntax uses the same sigil notation as a macro we have to use a heuristic to distinguish them: if the name has a dot it is a field reference, otherwise it is a macro name. The reference is synctatically validated at compile time. There are two main advantages to this implementation (and a couple of minor ones): The protocol tree for each selected frame is only walked if we have a display filter and if the display filter uses references. Also only the actual reference values are copied, intead of loading the entire tree into a hash table (in textual form even). The other advantage is that the reference is tested like a protocol field against all the values in the selected frame (if there is more than one). Currently the reference fields are not "primed" during dissection, so the entire tree is walked to find a particular reference (this is similar to the previous implementation). If the display filter contains a valid reference and the reference is not loaded at the time the filter is run the result is the same as a non existing field for a regular READ_TREE instruction. Fixes #17599.
2022-03-27 14:26:46 +00:00
return TRUE;
}
dfilter: Fix "!=" relation to be free of contradictions Wireshark defines the relation of equality A == B as A any_eq B <=> An == Bn for at least one An, Bn. More accurately I think this is (formally) an equivalence relation, not true equality. Whichever definition for "==" we choose we must keep the definition of "!=" as !(A == B), otherwise it will lead to logical contradictions like (A == B) AND (A != B) being true. Fix the '!=' relation to match the definition of equality: A != B <=> !(A == B) <=> A all_ne B <=> An != Bn, for every n. This has been the recomended way to write "not equal" for a long time in the documentation, even to the point where != was deprecated, but it just wasn't implemented consistently in the language, which has understandably been a persistent source of confusion. Even a field that is normally well-behaved with "!=" like "ip.src" or "ip.dst" will produce unexpected results with encapsulations like IP-over-IP. The opcode ALL_NE could have been implemented in the compiler instead using NOT and ANY_EQ but I chose to implement it in bytecode. It just seemed more elegant and efficient but the difference was not very significant. Keep around "~=" for any_ne relation, in case someone depends on that, and because we don't have an operator for true equality: A strict_equal B <=> A all_eq B <=> !(A any_ne B). If there is only one value then any_ne and all_ne are the same comparison operation. Implementing this change did not require fixing any tests so it is unlikely the relation "~=" (any_ne) will be very useful. Note that the behaviour of the '<' (less than) comparison relation is a separate, more subtle issue. In the general case the definition of '<' that is used is only a partial order.
2021-10-18 20:07:06 +00:00
enum match_how {
MATCH_ANY,
MATCH_ALL
};
2022-03-21 12:19:54 +00:00
typedef gboolean (*DFVMCompareFunc)(const fvalue_t*, const fvalue_t*);
typedef gboolean (*DFVMTestFunc)(const fvalue_t*);
static gboolean
2022-03-21 12:19:54 +00:00
cmp_test(enum match_how how, DFVMCompareFunc match_func,
GSList *arg1, GSList *arg2)
{
GSList *list1, *list2;
dfilter: Fix "!=" relation to be free of contradictions Wireshark defines the relation of equality A == B as A any_eq B <=> An == Bn for at least one An, Bn. More accurately I think this is (formally) an equivalence relation, not true equality. Whichever definition for "==" we choose we must keep the definition of "!=" as !(A == B), otherwise it will lead to logical contradictions like (A == B) AND (A != B) being true. Fix the '!=' relation to match the definition of equality: A != B <=> !(A == B) <=> A all_ne B <=> An != Bn, for every n. This has been the recomended way to write "not equal" for a long time in the documentation, even to the point where != was deprecated, but it just wasn't implemented consistently in the language, which has understandably been a persistent source of confusion. Even a field that is normally well-behaved with "!=" like "ip.src" or "ip.dst" will produce unexpected results with encapsulations like IP-over-IP. The opcode ALL_NE could have been implemented in the compiler instead using NOT and ANY_EQ but I chose to implement it in bytecode. It just seemed more elegant and efficient but the difference was not very significant. Keep around "~=" for any_ne relation, in case someone depends on that, and because we don't have an operator for true equality: A strict_equal B <=> A all_eq B <=> !(A any_ne B). If there is only one value then any_ne and all_ne are the same comparison operation. Implementing this change did not require fixing any tests so it is unlikely the relation "~=" (any_ne) will be very useful. Note that the behaviour of the '<' (less than) comparison relation is a separate, more subtle issue. In the general case the definition of '<' that is used is only a partial order.
2021-10-18 20:07:06 +00:00
gboolean want_all = (how == MATCH_ALL);
gboolean want_any = (how == MATCH_ANY);
gboolean have_match;
list1 = arg1;
while (list1) {
list2 = arg2;
while (list2) {
have_match = match_func(list1->data, list2->data);
dfilter: Fix "!=" relation to be free of contradictions Wireshark defines the relation of equality A == B as A any_eq B <=> An == Bn for at least one An, Bn. More accurately I think this is (formally) an equivalence relation, not true equality. Whichever definition for "==" we choose we must keep the definition of "!=" as !(A == B), otherwise it will lead to logical contradictions like (A == B) AND (A != B) being true. Fix the '!=' relation to match the definition of equality: A != B <=> !(A == B) <=> A all_ne B <=> An != Bn, for every n. This has been the recomended way to write "not equal" for a long time in the documentation, even to the point where != was deprecated, but it just wasn't implemented consistently in the language, which has understandably been a persistent source of confusion. Even a field that is normally well-behaved with "!=" like "ip.src" or "ip.dst" will produce unexpected results with encapsulations like IP-over-IP. The opcode ALL_NE could have been implemented in the compiler instead using NOT and ANY_EQ but I chose to implement it in bytecode. It just seemed more elegant and efficient but the difference was not very significant. Keep around "~=" for any_ne relation, in case someone depends on that, and because we don't have an operator for true equality: A strict_equal B <=> A all_eq B <=> !(A any_ne B). If there is only one value then any_ne and all_ne are the same comparison operation. Implementing this change did not require fixing any tests so it is unlikely the relation "~=" (any_ne) will be very useful. Note that the behaviour of the '<' (less than) comparison relation is a separate, more subtle issue. In the general case the definition of '<' that is used is only a partial order.
2021-10-18 20:07:06 +00:00
if (want_all && !have_match) {
return FALSE;
}
else if (want_any && have_match) {
return TRUE;
}
list2 = g_slist_next(list2);
}
list1 = g_slist_next(list1);
}
dfilter: Fix "!=" relation to be free of contradictions Wireshark defines the relation of equality A == B as A any_eq B <=> An == Bn for at least one An, Bn. More accurately I think this is (formally) an equivalence relation, not true equality. Whichever definition for "==" we choose we must keep the definition of "!=" as !(A == B), otherwise it will lead to logical contradictions like (A == B) AND (A != B) being true. Fix the '!=' relation to match the definition of equality: A != B <=> !(A == B) <=> A all_ne B <=> An != Bn, for every n. This has been the recomended way to write "not equal" for a long time in the documentation, even to the point where != was deprecated, but it just wasn't implemented consistently in the language, which has understandably been a persistent source of confusion. Even a field that is normally well-behaved with "!=" like "ip.src" or "ip.dst" will produce unexpected results with encapsulations like IP-over-IP. The opcode ALL_NE could have been implemented in the compiler instead using NOT and ANY_EQ but I chose to implement it in bytecode. It just seemed more elegant and efficient but the difference was not very significant. Keep around "~=" for any_ne relation, in case someone depends on that, and because we don't have an operator for true equality: A strict_equal B <=> A all_eq B <=> !(A any_ne B). If there is only one value then any_ne and all_ne are the same comparison operation. Implementing this change did not require fixing any tests so it is unlikely the relation "~=" (any_ne) will be very useful. Note that the behaviour of the '<' (less than) comparison relation is a separate, more subtle issue. In the general case the definition of '<' that is used is only a partial order.
2021-10-18 20:07:06 +00:00
/* want_all || !want_any */
return want_all;
}
static gboolean
cmp_test_unary(enum match_how how, DFVMTestFunc test_func, GSList *arg1)
{
GSList *list1;
gboolean want_all = (how == MATCH_ALL);
gboolean want_any = (how == MATCH_ANY);
gboolean have_match;
list1 = arg1;
while (list1) {
have_match = test_func(list1->data);
if (want_all && !have_match) {
return FALSE;
}
else if (want_any && have_match) {
return TRUE;
}
list1 = g_slist_next(list1);
}
/* want_all || !want_any */
return want_all;
}
static gboolean
any_test_unary(dfilter_t *df, DFVMTestFunc func, dfvm_value_t *arg1)
{
ws_assert(arg1->type == REGISTER);
GSList *list1 = df->registers[arg1->value.numeric];
return cmp_test_unary(MATCH_ANY, func, list1);
}
static gboolean
all_test_unary(dfilter_t *df, DFVMTestFunc func, dfvm_value_t *arg1)
{
ws_assert(arg1->type == REGISTER);
GSList *list1 = df->registers[arg1->value.numeric];
return cmp_test_unary(MATCH_ALL, func, list1);
}
2022-03-21 12:19:54 +00:00
/* cmp(A) <=> cmp(a1) OR cmp(a2) OR cmp(a3) OR ... */
static gboolean
2022-03-21 12:19:54 +00:00
any_test(dfilter_t *df, DFVMCompareFunc cmp,
dfvm_value_t *arg1, dfvm_value_t *arg2)
dfilter: Fix "!=" relation to be free of contradictions Wireshark defines the relation of equality A == B as A any_eq B <=> An == Bn for at least one An, Bn. More accurately I think this is (formally) an equivalence relation, not true equality. Whichever definition for "==" we choose we must keep the definition of "!=" as !(A == B), otherwise it will lead to logical contradictions like (A == B) AND (A != B) being true. Fix the '!=' relation to match the definition of equality: A != B <=> !(A == B) <=> A all_ne B <=> An != Bn, for every n. This has been the recomended way to write "not equal" for a long time in the documentation, even to the point where != was deprecated, but it just wasn't implemented consistently in the language, which has understandably been a persistent source of confusion. Even a field that is normally well-behaved with "!=" like "ip.src" or "ip.dst" will produce unexpected results with encapsulations like IP-over-IP. The opcode ALL_NE could have been implemented in the compiler instead using NOT and ANY_EQ but I chose to implement it in bytecode. It just seemed more elegant and efficient but the difference was not very significant. Keep around "~=" for any_ne relation, in case someone depends on that, and because we don't have an operator for true equality: A strict_equal B <=> A all_eq B <=> !(A any_ne B). If there is only one value then any_ne and all_ne are the same comparison operation. Implementing this change did not require fixing any tests so it is unlikely the relation "~=" (any_ne) will be very useful. Note that the behaviour of the '<' (less than) comparison relation is a separate, more subtle issue. In the general case the definition of '<' that is used is only a partial order.
2021-10-18 20:07:06 +00:00
{
ws_assert(arg1->type == REGISTER);
GSList *list1 = df->registers[arg1->value.numeric];
dfilter: Fix "!=" relation to be free of contradictions Wireshark defines the relation of equality A == B as A any_eq B <=> An == Bn for at least one An, Bn. More accurately I think this is (formally) an equivalence relation, not true equality. Whichever definition for "==" we choose we must keep the definition of "!=" as !(A == B), otherwise it will lead to logical contradictions like (A == B) AND (A != B) being true. Fix the '!=' relation to match the definition of equality: A != B <=> !(A == B) <=> A all_ne B <=> An != Bn, for every n. This has been the recomended way to write "not equal" for a long time in the documentation, even to the point where != was deprecated, but it just wasn't implemented consistently in the language, which has understandably been a persistent source of confusion. Even a field that is normally well-behaved with "!=" like "ip.src" or "ip.dst" will produce unexpected results with encapsulations like IP-over-IP. The opcode ALL_NE could have been implemented in the compiler instead using NOT and ANY_EQ but I chose to implement it in bytecode. It just seemed more elegant and efficient but the difference was not very significant. Keep around "~=" for any_ne relation, in case someone depends on that, and because we don't have an operator for true equality: A strict_equal B <=> A all_eq B <=> !(A any_ne B). If there is only one value then any_ne and all_ne are the same comparison operation. Implementing this change did not require fixing any tests so it is unlikely the relation "~=" (any_ne) will be very useful. Note that the behaviour of the '<' (less than) comparison relation is a separate, more subtle issue. In the general case the definition of '<' that is used is only a partial order.
2021-10-18 20:07:06 +00:00
if (arg2->type == REGISTER) {
return cmp_test(MATCH_ANY, cmp, list1, df->registers[arg2->value.numeric]);
}
if (arg2->type == FVALUE) {
GSList list2;
list2.data = arg2->value.fvalue;
list2.next = NULL;
return cmp_test(MATCH_ANY, cmp, list1, &list2);
}
ws_assert_not_reached();
dfilter: Fix "!=" relation to be free of contradictions Wireshark defines the relation of equality A == B as A any_eq B <=> An == Bn for at least one An, Bn. More accurately I think this is (formally) an equivalence relation, not true equality. Whichever definition for "==" we choose we must keep the definition of "!=" as !(A == B), otherwise it will lead to logical contradictions like (A == B) AND (A != B) being true. Fix the '!=' relation to match the definition of equality: A != B <=> !(A == B) <=> A all_ne B <=> An != Bn, for every n. This has been the recomended way to write "not equal" for a long time in the documentation, even to the point where != was deprecated, but it just wasn't implemented consistently in the language, which has understandably been a persistent source of confusion. Even a field that is normally well-behaved with "!=" like "ip.src" or "ip.dst" will produce unexpected results with encapsulations like IP-over-IP. The opcode ALL_NE could have been implemented in the compiler instead using NOT and ANY_EQ but I chose to implement it in bytecode. It just seemed more elegant and efficient but the difference was not very significant. Keep around "~=" for any_ne relation, in case someone depends on that, and because we don't have an operator for true equality: A strict_equal B <=> A all_eq B <=> !(A any_ne B). If there is only one value then any_ne and all_ne are the same comparison operation. Implementing this change did not require fixing any tests so it is unlikely the relation "~=" (any_ne) will be very useful. Note that the behaviour of the '<' (less than) comparison relation is a separate, more subtle issue. In the general case the definition of '<' that is used is only a partial order.
2021-10-18 20:07:06 +00:00
}
2022-03-21 12:19:54 +00:00
/* cmp(A) <=> cmp(a1) AND cmp(a2) AND cmp(a3) AND ... */
static gboolean
2022-03-21 12:19:54 +00:00
all_test(dfilter_t *df, DFVMCompareFunc cmp,
dfvm_value_t *arg1, dfvm_value_t *arg2)
{
ws_assert(arg1->type == REGISTER);
GSList *list1 = df->registers[arg1->value.numeric];
if (arg2->type == REGISTER) {
return cmp_test(MATCH_ALL, cmp, list1, df->registers[arg2->value.numeric]);
}
if (arg2->type == FVALUE) {
GSList list2;
list2.data = arg2->value.fvalue;
list2.next = NULL;
return cmp_test(MATCH_ALL, cmp, list1, &list2);
}
ws_assert_not_reached();
}
static gboolean
2022-03-21 12:19:54 +00:00
any_matches(dfilter_t *df, dfvm_value_t *arg1, dfvm_value_t *arg2)
{
GSList *list1 = df->registers[arg1->value.numeric];
ws_regex_t *re = arg2->value.pcre;
while (list1) {
if (fvalue_matches(list1->data, re)) {
return TRUE;
}
list1 = g_slist_next(list1);
}
return FALSE;
}
static gboolean
all_matches(dfilter_t *df, dfvm_value_t *arg1, dfvm_value_t *arg2)
{
GSList *list1 = df->registers[arg1->value.numeric];
ws_regex_t *re = arg2->value.pcre;
while (list1) {
if (!fvalue_matches(list1->data, re)) {
return FALSE;
}
list1 = g_slist_next(list1);
}
return TRUE;
}
static gboolean
dfilter: Allow arithmetic expressions as function arguments This allows writing moderately complex expressions, for example a float epsilon test (#16483): Filter: {abs(_ws.ftypes.double - 1) / max(abs(_ws.ftypes.double), abs(1))} < 0.01 Syntax tree: 0 TEST_LT: 1 OP_DIVIDE: 2 FUNCTION(abs#1): 3 OP_SUBTRACT: 4 FIELD(_ws.ftypes.double) 4 FVALUE(1 <FT_DOUBLE>) 2 FUNCTION(max#2): 3 FUNCTION(abs#1): 4 FIELD(_ws.ftypes.double) 3 FUNCTION(abs#1): 4 FVALUE(1 <FT_DOUBLE>) 1 FVALUE(0.01 <FT_DOUBLE>) Instructions: 00000 READ_TREE _ws.ftypes.double -> reg#1 00001 IF_FALSE_GOTO 3 00002 SUBRACT reg#1 - 1 <FT_DOUBLE> -> reg#2 00003 STACK_PUSH reg#2 00004 CALL_FUNCTION abs(reg#2) -> reg#0 00005 STACK_POP 1 00006 IF_FALSE_GOTO 24 00007 READ_TREE _ws.ftypes.double -> reg#1 00008 IF_FALSE_GOTO 9 00009 STACK_PUSH reg#1 00010 CALL_FUNCTION abs(reg#1) -> reg#4 00011 STACK_POP 1 00012 IF_FALSE_GOTO 13 00013 STACK_PUSH reg#4 00014 STACK_PUSH 1 <FT_DOUBLE> 00015 CALL_FUNCTION abs(1 <FT_DOUBLE>) -> reg#5 00016 STACK_POP 1 00017 IF_FALSE_GOTO 18 00018 STACK_PUSH reg#5 00019 CALL_FUNCTION max(reg#5, reg#4) -> reg#3 00020 STACK_POP 2 00021 IF_FALSE_GOTO 24 00022 DIVIDE reg#0 / reg#3 -> reg#6 00023 ANY_LT reg#6 < 0.01 <FT_DOUBLE> 00024 RETURN We now use a stack to pass arguments to the function. The stack is implemented as a list of lists (list of registers). Arguments may still be non-existent to functions (this is a feature). Functions must check for nil arguments (NULL lists) and handle that case. It's somewhat complicated to allow literal values and test compatibility for different types, both because of lack of type information with unparsed/literal and also because it is an underdeveloped area in the code. In my limited testing it was good enough and useful, further enhancements are left for future work.
2022-04-16 01:42:20 +00:00
any_in_range_internal(GSList *list1, fvalue_t *low, fvalue_t *high)
{
while (list1) {
if (fvalue_ge(list1->data, low) &&
fvalue_le(list1->data, high)) {
return TRUE;
}
list1 = g_slist_next(list1);
}
return FALSE;
}
dfilter: Allow arithmetic expressions as function arguments This allows writing moderately complex expressions, for example a float epsilon test (#16483): Filter: {abs(_ws.ftypes.double - 1) / max(abs(_ws.ftypes.double), abs(1))} < 0.01 Syntax tree: 0 TEST_LT: 1 OP_DIVIDE: 2 FUNCTION(abs#1): 3 OP_SUBTRACT: 4 FIELD(_ws.ftypes.double) 4 FVALUE(1 <FT_DOUBLE>) 2 FUNCTION(max#2): 3 FUNCTION(abs#1): 4 FIELD(_ws.ftypes.double) 3 FUNCTION(abs#1): 4 FVALUE(1 <FT_DOUBLE>) 1 FVALUE(0.01 <FT_DOUBLE>) Instructions: 00000 READ_TREE _ws.ftypes.double -> reg#1 00001 IF_FALSE_GOTO 3 00002 SUBRACT reg#1 - 1 <FT_DOUBLE> -> reg#2 00003 STACK_PUSH reg#2 00004 CALL_FUNCTION abs(reg#2) -> reg#0 00005 STACK_POP 1 00006 IF_FALSE_GOTO 24 00007 READ_TREE _ws.ftypes.double -> reg#1 00008 IF_FALSE_GOTO 9 00009 STACK_PUSH reg#1 00010 CALL_FUNCTION abs(reg#1) -> reg#4 00011 STACK_POP 1 00012 IF_FALSE_GOTO 13 00013 STACK_PUSH reg#4 00014 STACK_PUSH 1 <FT_DOUBLE> 00015 CALL_FUNCTION abs(1 <FT_DOUBLE>) -> reg#5 00016 STACK_POP 1 00017 IF_FALSE_GOTO 18 00018 STACK_PUSH reg#5 00019 CALL_FUNCTION max(reg#5, reg#4) -> reg#3 00020 STACK_POP 2 00021 IF_FALSE_GOTO 24 00022 DIVIDE reg#0 / reg#3 -> reg#6 00023 ANY_LT reg#6 < 0.01 <FT_DOUBLE> 00024 RETURN We now use a stack to pass arguments to the function. The stack is implemented as a list of lists (list of registers). Arguments may still be non-existent to functions (this is a feature). Functions must check for nil arguments (NULL lists) and handle that case. It's somewhat complicated to allow literal values and test compatibility for different types, both because of lack of type information with unparsed/literal and also because it is an underdeveloped area in the code. In my limited testing it was good enough and useful, further enhancements are left for future work.
2022-04-16 01:42:20 +00:00
static gboolean
all_in_range_internal(GSList *list1, fvalue_t *low, fvalue_t *high)
{
while (list1) {
if (!fvalue_ge(list1->data, low) ||
!fvalue_le(list1->data, high)) {
return FALSE;
}
list1 = g_slist_next(list1);
}
return TRUE;
}
static gboolean
match_in_range(dfilter_t *df, enum match_how how, dfvm_value_t *arg1,
dfilter: Allow arithmetic expressions as function arguments This allows writing moderately complex expressions, for example a float epsilon test (#16483): Filter: {abs(_ws.ftypes.double - 1) / max(abs(_ws.ftypes.double), abs(1))} < 0.01 Syntax tree: 0 TEST_LT: 1 OP_DIVIDE: 2 FUNCTION(abs#1): 3 OP_SUBTRACT: 4 FIELD(_ws.ftypes.double) 4 FVALUE(1 <FT_DOUBLE>) 2 FUNCTION(max#2): 3 FUNCTION(abs#1): 4 FIELD(_ws.ftypes.double) 3 FUNCTION(abs#1): 4 FVALUE(1 <FT_DOUBLE>) 1 FVALUE(0.01 <FT_DOUBLE>) Instructions: 00000 READ_TREE _ws.ftypes.double -> reg#1 00001 IF_FALSE_GOTO 3 00002 SUBRACT reg#1 - 1 <FT_DOUBLE> -> reg#2 00003 STACK_PUSH reg#2 00004 CALL_FUNCTION abs(reg#2) -> reg#0 00005 STACK_POP 1 00006 IF_FALSE_GOTO 24 00007 READ_TREE _ws.ftypes.double -> reg#1 00008 IF_FALSE_GOTO 9 00009 STACK_PUSH reg#1 00010 CALL_FUNCTION abs(reg#1) -> reg#4 00011 STACK_POP 1 00012 IF_FALSE_GOTO 13 00013 STACK_PUSH reg#4 00014 STACK_PUSH 1 <FT_DOUBLE> 00015 CALL_FUNCTION abs(1 <FT_DOUBLE>) -> reg#5 00016 STACK_POP 1 00017 IF_FALSE_GOTO 18 00018 STACK_PUSH reg#5 00019 CALL_FUNCTION max(reg#5, reg#4) -> reg#3 00020 STACK_POP 2 00021 IF_FALSE_GOTO 24 00022 DIVIDE reg#0 / reg#3 -> reg#6 00023 ANY_LT reg#6 < 0.01 <FT_DOUBLE> 00024 RETURN We now use a stack to pass arguments to the function. The stack is implemented as a list of lists (list of registers). Arguments may still be non-existent to functions (this is a feature). Functions must check for nil arguments (NULL lists) and handle that case. It's somewhat complicated to allow literal values and test compatibility for different types, both because of lack of type information with unparsed/literal and also because it is an underdeveloped area in the code. In my limited testing it was good enough and useful, further enhancements are left for future work.
2022-04-16 01:42:20 +00:00
dfvm_value_t *arg_low, dfvm_value_t *arg_high)
{
GSList *list1 = df->registers[arg1->value.numeric];
GSList *_low, *_high;
fvalue_t *low, *high;
if (arg_low->type == REGISTER) {
_low = df->registers[arg_low->value.numeric];
ws_assert(g_slist_length(_low) == 1);
low = _low->data;
}
else if (arg_low->type == FVALUE) {
low = arg_low->value.fvalue;
}
else {
ws_assert_not_reached();
}
if (arg_high->type == REGISTER) {
_high = df->registers[arg_high->value.numeric];
ws_assert(g_slist_length(_high) == 1);
high = _high->data;
}
else if (arg_high->type == FVALUE) {
high = arg_high->value.fvalue;
}
else {
ws_assert_not_reached();
}
if (how == MATCH_ALL)
return all_in_range_internal(list1, low, high);
else if (how == MATCH_ANY)
return any_in_range_internal(list1, low, high);
else
ws_assert_not_reached();
}
static gboolean
any_in_range(dfilter_t *df, dfvm_value_t *arg1,
dfvm_value_t *arg_low, dfvm_value_t *arg_high)
{
return match_in_range(df, MATCH_ANY, arg1, arg_low, arg_high);
}
static gboolean
all_in_range(dfilter_t *df, dfvm_value_t *arg1,
dfvm_value_t *arg_low, dfvm_value_t *arg_high)
{
return match_in_range(df, MATCH_ALL, arg1, arg_low, arg_high);
dfilter: Allow arithmetic expressions as function arguments This allows writing moderately complex expressions, for example a float epsilon test (#16483): Filter: {abs(_ws.ftypes.double - 1) / max(abs(_ws.ftypes.double), abs(1))} < 0.01 Syntax tree: 0 TEST_LT: 1 OP_DIVIDE: 2 FUNCTION(abs#1): 3 OP_SUBTRACT: 4 FIELD(_ws.ftypes.double) 4 FVALUE(1 <FT_DOUBLE>) 2 FUNCTION(max#2): 3 FUNCTION(abs#1): 4 FIELD(_ws.ftypes.double) 3 FUNCTION(abs#1): 4 FVALUE(1 <FT_DOUBLE>) 1 FVALUE(0.01 <FT_DOUBLE>) Instructions: 00000 READ_TREE _ws.ftypes.double -> reg#1 00001 IF_FALSE_GOTO 3 00002 SUBRACT reg#1 - 1 <FT_DOUBLE> -> reg#2 00003 STACK_PUSH reg#2 00004 CALL_FUNCTION abs(reg#2) -> reg#0 00005 STACK_POP 1 00006 IF_FALSE_GOTO 24 00007 READ_TREE _ws.ftypes.double -> reg#1 00008 IF_FALSE_GOTO 9 00009 STACK_PUSH reg#1 00010 CALL_FUNCTION abs(reg#1) -> reg#4 00011 STACK_POP 1 00012 IF_FALSE_GOTO 13 00013 STACK_PUSH reg#4 00014 STACK_PUSH 1 <FT_DOUBLE> 00015 CALL_FUNCTION abs(1 <FT_DOUBLE>) -> reg#5 00016 STACK_POP 1 00017 IF_FALSE_GOTO 18 00018 STACK_PUSH reg#5 00019 CALL_FUNCTION max(reg#5, reg#4) -> reg#3 00020 STACK_POP 2 00021 IF_FALSE_GOTO 24 00022 DIVIDE reg#0 / reg#3 -> reg#6 00023 ANY_LT reg#6 < 0.01 <FT_DOUBLE> 00024 RETURN We now use a stack to pass arguments to the function. The stack is implemented as a list of lists (list of registers). Arguments may still be non-existent to functions (this is a feature). Functions must check for nil arguments (NULL lists) and handle that case. It's somewhat complicated to allow literal values and test compatibility for different types, both because of lack of type information with unparsed/literal and also because it is an underdeveloped area in the code. In my limited testing it was good enough and useful, further enhancements are left for future work.
2022-04-16 01:42:20 +00:00
}
/* Clear registers that were populated during evaluation.
* If we created the values, then these will be freed as well. */
static void
free_register_overhead(dfilter_t* df)
{
guint i;
for (i = 0; i < df->num_registers; i++) {
df->attempted_load[i] = FALSE;
if (df->registers[i]) {
if (df->free_registers[i]) {
for (GSList *l = df->registers[i]; l != NULL; l = l->next) {
df->free_registers[i](l->data);
}
df->free_registers[i] = NULL;
}
g_slist_free(df->registers[i]);
df->registers[i] = NULL;
}
}
}
/* Takes the list of fvalue_t's in a register, uses fvalue_slice()
* to make a new list of fvalue_t's (which are byte-slices),
* and puts the new list into a new register. */
static void
mk_slice(dfilter_t *df, dfvm_value_t *from_arg, dfvm_value_t *to_arg,
2022-03-21 12:19:54 +00:00
dfvm_value_t *drange_arg)
{
GSList *from_list, *to_list;
fvalue_t *old_fv, *new_fv;
to_list = NULL;
2022-03-21 12:19:54 +00:00
from_list = df->registers[from_arg->value.numeric];
drange_t *drange = drange_arg->value.drange;
while (from_list) {
2022-03-21 12:19:54 +00:00
old_fv = from_list->data;
new_fv = fvalue_slice(old_fv, drange);
/* Assert here because semcheck.c should have
* already caught the cases in which a slice
* cannot be made. */
2021-06-18 18:21:42 +00:00
ws_assert(new_fv);
to_list = g_slist_prepend(to_list, new_fv);
from_list = g_slist_next(from_list);
}
2022-03-21 12:19:54 +00:00
df->registers[to_arg->value.numeric] = to_list;
df->free_registers[to_arg->value.numeric] = (GDestroyNotify)fvalue_free;
}
static void
mk_length(dfilter_t *df, dfvm_value_t *from_arg, dfvm_value_t *to_arg)
{
GSList *from_list, *to_list;
fvalue_t *old_fv, *new_fv;
to_list = NULL;
from_list = df->registers[from_arg->value.numeric];
while (from_list) {
old_fv = from_list->data;
new_fv = fvalue_new(FT_UINT32);
fvalue_set_uinteger(new_fv, fvalue_length(old_fv));
to_list = g_slist_prepend(to_list, new_fv);
from_list = g_slist_next(from_list);
}
df->registers[to_arg->value.numeric] = to_list;
df->free_registers[to_arg->value.numeric] = (GDestroyNotify)fvalue_free;
}
2022-03-21 12:19:54 +00:00
static gboolean
call_function(dfilter_t *df, dfvm_value_t *arg1, dfvm_value_t *arg2,
dfilter: Allow arithmetic expressions as function arguments This allows writing moderately complex expressions, for example a float epsilon test (#16483): Filter: {abs(_ws.ftypes.double - 1) / max(abs(_ws.ftypes.double), abs(1))} < 0.01 Syntax tree: 0 TEST_LT: 1 OP_DIVIDE: 2 FUNCTION(abs#1): 3 OP_SUBTRACT: 4 FIELD(_ws.ftypes.double) 4 FVALUE(1 <FT_DOUBLE>) 2 FUNCTION(max#2): 3 FUNCTION(abs#1): 4 FIELD(_ws.ftypes.double) 3 FUNCTION(abs#1): 4 FVALUE(1 <FT_DOUBLE>) 1 FVALUE(0.01 <FT_DOUBLE>) Instructions: 00000 READ_TREE _ws.ftypes.double -> reg#1 00001 IF_FALSE_GOTO 3 00002 SUBRACT reg#1 - 1 <FT_DOUBLE> -> reg#2 00003 STACK_PUSH reg#2 00004 CALL_FUNCTION abs(reg#2) -> reg#0 00005 STACK_POP 1 00006 IF_FALSE_GOTO 24 00007 READ_TREE _ws.ftypes.double -> reg#1 00008 IF_FALSE_GOTO 9 00009 STACK_PUSH reg#1 00010 CALL_FUNCTION abs(reg#1) -> reg#4 00011 STACK_POP 1 00012 IF_FALSE_GOTO 13 00013 STACK_PUSH reg#4 00014 STACK_PUSH 1 <FT_DOUBLE> 00015 CALL_FUNCTION abs(1 <FT_DOUBLE>) -> reg#5 00016 STACK_POP 1 00017 IF_FALSE_GOTO 18 00018 STACK_PUSH reg#5 00019 CALL_FUNCTION max(reg#5, reg#4) -> reg#3 00020 STACK_POP 2 00021 IF_FALSE_GOTO 24 00022 DIVIDE reg#0 / reg#3 -> reg#6 00023 ANY_LT reg#6 < 0.01 <FT_DOUBLE> 00024 RETURN We now use a stack to pass arguments to the function. The stack is implemented as a list of lists (list of registers). Arguments may still be non-existent to functions (this is a feature). Functions must check for nil arguments (NULL lists) and handle that case. It's somewhat complicated to allow literal values and test compatibility for different types, both because of lack of type information with unparsed/literal and also because it is an underdeveloped area in the code. In my limited testing it was good enough and useful, further enhancements are left for future work.
2022-04-16 01:42:20 +00:00
dfvm_value_t *arg3)
2022-03-21 12:19:54 +00:00
{
df_func_def_t *funcdef;
GSList *retval = NULL;
gboolean accum;
dfilter: Allow arithmetic expressions as function arguments This allows writing moderately complex expressions, for example a float epsilon test (#16483): Filter: {abs(_ws.ftypes.double - 1) / max(abs(_ws.ftypes.double), abs(1))} < 0.01 Syntax tree: 0 TEST_LT: 1 OP_DIVIDE: 2 FUNCTION(abs#1): 3 OP_SUBTRACT: 4 FIELD(_ws.ftypes.double) 4 FVALUE(1 <FT_DOUBLE>) 2 FUNCTION(max#2): 3 FUNCTION(abs#1): 4 FIELD(_ws.ftypes.double) 3 FUNCTION(abs#1): 4 FVALUE(1 <FT_DOUBLE>) 1 FVALUE(0.01 <FT_DOUBLE>) Instructions: 00000 READ_TREE _ws.ftypes.double -> reg#1 00001 IF_FALSE_GOTO 3 00002 SUBRACT reg#1 - 1 <FT_DOUBLE> -> reg#2 00003 STACK_PUSH reg#2 00004 CALL_FUNCTION abs(reg#2) -> reg#0 00005 STACK_POP 1 00006 IF_FALSE_GOTO 24 00007 READ_TREE _ws.ftypes.double -> reg#1 00008 IF_FALSE_GOTO 9 00009 STACK_PUSH reg#1 00010 CALL_FUNCTION abs(reg#1) -> reg#4 00011 STACK_POP 1 00012 IF_FALSE_GOTO 13 00013 STACK_PUSH reg#4 00014 STACK_PUSH 1 <FT_DOUBLE> 00015 CALL_FUNCTION abs(1 <FT_DOUBLE>) -> reg#5 00016 STACK_POP 1 00017 IF_FALSE_GOTO 18 00018 STACK_PUSH reg#5 00019 CALL_FUNCTION max(reg#5, reg#4) -> reg#3 00020 STACK_POP 2 00021 IF_FALSE_GOTO 24 00022 DIVIDE reg#0 / reg#3 -> reg#6 00023 ANY_LT reg#6 < 0.01 <FT_DOUBLE> 00024 RETURN We now use a stack to pass arguments to the function. The stack is implemented as a list of lists (list of registers). Arguments may still be non-existent to functions (this is a feature). Functions must check for nil arguments (NULL lists) and handle that case. It's somewhat complicated to allow literal values and test compatibility for different types, both because of lack of type information with unparsed/literal and also because it is an underdeveloped area in the code. In my limited testing it was good enough and useful, further enhancements are left for future work.
2022-04-16 01:42:20 +00:00
guint32 reg_return, arg_count;
2022-03-21 12:19:54 +00:00
funcdef = arg1->value.funcdef;
reg_return = arg2->value.numeric;
dfilter: Allow arithmetic expressions as function arguments This allows writing moderately complex expressions, for example a float epsilon test (#16483): Filter: {abs(_ws.ftypes.double - 1) / max(abs(_ws.ftypes.double), abs(1))} < 0.01 Syntax tree: 0 TEST_LT: 1 OP_DIVIDE: 2 FUNCTION(abs#1): 3 OP_SUBTRACT: 4 FIELD(_ws.ftypes.double) 4 FVALUE(1 <FT_DOUBLE>) 2 FUNCTION(max#2): 3 FUNCTION(abs#1): 4 FIELD(_ws.ftypes.double) 3 FUNCTION(abs#1): 4 FVALUE(1 <FT_DOUBLE>) 1 FVALUE(0.01 <FT_DOUBLE>) Instructions: 00000 READ_TREE _ws.ftypes.double -> reg#1 00001 IF_FALSE_GOTO 3 00002 SUBRACT reg#1 - 1 <FT_DOUBLE> -> reg#2 00003 STACK_PUSH reg#2 00004 CALL_FUNCTION abs(reg#2) -> reg#0 00005 STACK_POP 1 00006 IF_FALSE_GOTO 24 00007 READ_TREE _ws.ftypes.double -> reg#1 00008 IF_FALSE_GOTO 9 00009 STACK_PUSH reg#1 00010 CALL_FUNCTION abs(reg#1) -> reg#4 00011 STACK_POP 1 00012 IF_FALSE_GOTO 13 00013 STACK_PUSH reg#4 00014 STACK_PUSH 1 <FT_DOUBLE> 00015 CALL_FUNCTION abs(1 <FT_DOUBLE>) -> reg#5 00016 STACK_POP 1 00017 IF_FALSE_GOTO 18 00018 STACK_PUSH reg#5 00019 CALL_FUNCTION max(reg#5, reg#4) -> reg#3 00020 STACK_POP 2 00021 IF_FALSE_GOTO 24 00022 DIVIDE reg#0 / reg#3 -> reg#6 00023 ANY_LT reg#6 < 0.01 <FT_DOUBLE> 00024 RETURN We now use a stack to pass arguments to the function. The stack is implemented as a list of lists (list of registers). Arguments may still be non-existent to functions (this is a feature). Functions must check for nil arguments (NULL lists) and handle that case. It's somewhat complicated to allow literal values and test compatibility for different types, both because of lack of type information with unparsed/literal and also because it is an underdeveloped area in the code. In my limited testing it was good enough and useful, further enhancements are left for future work.
2022-04-16 01:42:20 +00:00
arg_count = arg3->value.numeric;
dfilter: Allow arithmetic expressions as function arguments This allows writing moderately complex expressions, for example a float epsilon test (#16483): Filter: {abs(_ws.ftypes.double - 1) / max(abs(_ws.ftypes.double), abs(1))} < 0.01 Syntax tree: 0 TEST_LT: 1 OP_DIVIDE: 2 FUNCTION(abs#1): 3 OP_SUBTRACT: 4 FIELD(_ws.ftypes.double) 4 FVALUE(1 <FT_DOUBLE>) 2 FUNCTION(max#2): 3 FUNCTION(abs#1): 4 FIELD(_ws.ftypes.double) 3 FUNCTION(abs#1): 4 FVALUE(1 <FT_DOUBLE>) 1 FVALUE(0.01 <FT_DOUBLE>) Instructions: 00000 READ_TREE _ws.ftypes.double -> reg#1 00001 IF_FALSE_GOTO 3 00002 SUBRACT reg#1 - 1 <FT_DOUBLE> -> reg#2 00003 STACK_PUSH reg#2 00004 CALL_FUNCTION abs(reg#2) -> reg#0 00005 STACK_POP 1 00006 IF_FALSE_GOTO 24 00007 READ_TREE _ws.ftypes.double -> reg#1 00008 IF_FALSE_GOTO 9 00009 STACK_PUSH reg#1 00010 CALL_FUNCTION abs(reg#1) -> reg#4 00011 STACK_POP 1 00012 IF_FALSE_GOTO 13 00013 STACK_PUSH reg#4 00014 STACK_PUSH 1 <FT_DOUBLE> 00015 CALL_FUNCTION abs(1 <FT_DOUBLE>) -> reg#5 00016 STACK_POP 1 00017 IF_FALSE_GOTO 18 00018 STACK_PUSH reg#5 00019 CALL_FUNCTION max(reg#5, reg#4) -> reg#3 00020 STACK_POP 2 00021 IF_FALSE_GOTO 24 00022 DIVIDE reg#0 / reg#3 -> reg#6 00023 ANY_LT reg#6 < 0.01 <FT_DOUBLE> 00024 RETURN We now use a stack to pass arguments to the function. The stack is implemented as a list of lists (list of registers). Arguments may still be non-existent to functions (this is a feature). Functions must check for nil arguments (NULL lists) and handle that case. It's somewhat complicated to allow literal values and test compatibility for different types, both because of lack of type information with unparsed/literal and also because it is an underdeveloped area in the code. In my limited testing it was good enough and useful, further enhancements are left for future work.
2022-04-16 01:42:20 +00:00
accum = funcdef->function(df->function_stack, arg_count, &retval);
/* Write return registers. */
df->registers[reg_return] = retval;
2022-03-21 12:19:54 +00:00
// functions create a new value, so own it.
df->free_registers[reg_return] = (GDestroyNotify)fvalue_free;
2022-03-21 12:19:54 +00:00
return accum;
}
static void debug_op_error(fvalue_t *v1, fvalue_t *v2, const char *op, const char *msg)
{
char *s1 = fvalue_to_debug_repr(NULL, v1);
char *s2 = fvalue_to_debug_repr(NULL, v2);
ws_noisy("Error: %s %s %s: %s", s1, op, s2, msg);
g_free(s1);
g_free(s2);
}
/* Used for temporary debugging only, don't leave in production code (at
* a minimum WS_DEBUG_HERE must be replaced by another log level). */
static void _U_
debug_register(GSList *reg, guint32 num)
{
wmem_strbuf_t *buf;
GSList *l;
char *s;
buf = wmem_strbuf_new(NULL, NULL);
wmem_strbuf_append_printf(buf, "Reg#%"G_GUINT32_FORMAT" = { ", num);
for (l = reg; l != NULL; l = l->next) {
s = fvalue_to_debug_repr(NULL, l->data);
wmem_strbuf_append_printf(buf, "%s <%s>", s, fvalue_type_name(l->data));
g_free(s);
if (l->next != NULL) {
wmem_strbuf_append(buf, ", ");
}
}
wmem_strbuf_append_c(buf, '}');
WS_DEBUG_HERE("%s", wmem_strbuf_get_str(buf));
wmem_strbuf_destroy(buf);
}
typedef fvalue_t* (*DFVMBinaryFunc)(const fvalue_t*, const fvalue_t*, char **);
static void
mk_binary_internal(DFVMBinaryFunc func,
GSList *arg1, GSList *arg2, GSList **retval)
{
GSList *list1, *list2;
GSList *to_list = NULL;
fvalue_t *val1, *val2;
fvalue_t *result;
char *err_msg = NULL;
list1 = arg1;
while (list1) {
list2 = arg2;
while (list2) {
val1 = list1->data;
val2 = list2->data;
result = func(val1, val2, &err_msg);
if (result == NULL) {
debug_op_error(val1, val2, "&", err_msg);
g_free(err_msg);
err_msg = NULL;
}
else {
to_list = g_slist_prepend(to_list, result);
}
list2 = g_slist_next(list2);
}
list1 = g_slist_next(list1);
}
*retval = to_list;
}
static void
mk_binary(dfilter_t *df, DFVMBinaryFunc func,
dfvm_value_t *arg1, dfvm_value_t *arg2, dfvm_value_t *to_arg)
{
GSList ls1, ls2;
GSList *list1, *list2;
GSList *result = NULL;
if (arg1->type == REGISTER) {
list1 = df->registers[arg1->value.numeric];
}
else if (arg1->type == FVALUE) {
ls1.data = arg1->value.fvalue;
ls1.next = NULL;
list1 = &ls1;
}
else {
ws_assert_not_reached();
}
if (arg2->type == REGISTER) {
list2 = df->registers[arg2->value.numeric];
}
else if (arg2->type == FVALUE) {
ls2.data = arg2->value.fvalue;
ls2.next = NULL;
list2 = &ls2;
}
else {
ws_assert_not_reached();
}
mk_binary_internal(func, list1, list2, &result);
//debug_register(result, to_arg->value.numeric);
df->registers[to_arg->value.numeric] = result;
df->free_registers[to_arg->value.numeric] = (GDestroyNotify)fvalue_free;
}
static void
mk_minus_internal(GSList *arg1, GSList **retval)
{
GSList *list1;
GSList *to_list = NULL;
fvalue_t *val1;
fvalue_t *result;
char *err_msg = NULL;
list1 = arg1;
while (list1) {
val1 = list1->data;
result = fvalue_unary_minus(val1, &err_msg);
if (result == NULL) {
ws_noisy("unary_minus: %s", err_msg);
g_free(err_msg);
err_msg = NULL;
}
else {
to_list = g_slist_prepend(to_list, result);
}
list1 = g_slist_next(list1);
}
*retval = to_list;
}
static void
mk_minus(dfilter_t *df, dfvm_value_t *arg1, dfvm_value_t *to_arg)
{
ws_assert(arg1->type == REGISTER);
GSList *list1 = df->registers[arg1->value.numeric];
GSList *result = NULL;
mk_minus_internal(list1, &result);
df->registers[to_arg->value.numeric] = result;
df->free_registers[to_arg->value.numeric] = (GDestroyNotify)fvalue_free;
}
static void
put_fvalue(dfilter_t *df, dfvm_value_t *arg1, dfvm_value_t *to_arg)
{
fvalue_t *fv = arg1->value.fvalue;
df->registers[to_arg->value.numeric] = g_slist_append(NULL, fv);
/* Memory is owned by the dfvm_value_t. */
df->free_registers[to_arg->value.numeric] = NULL;
}
dfilter: Allow arithmetic expressions as function arguments This allows writing moderately complex expressions, for example a float epsilon test (#16483): Filter: {abs(_ws.ftypes.double - 1) / max(abs(_ws.ftypes.double), abs(1))} < 0.01 Syntax tree: 0 TEST_LT: 1 OP_DIVIDE: 2 FUNCTION(abs#1): 3 OP_SUBTRACT: 4 FIELD(_ws.ftypes.double) 4 FVALUE(1 <FT_DOUBLE>) 2 FUNCTION(max#2): 3 FUNCTION(abs#1): 4 FIELD(_ws.ftypes.double) 3 FUNCTION(abs#1): 4 FVALUE(1 <FT_DOUBLE>) 1 FVALUE(0.01 <FT_DOUBLE>) Instructions: 00000 READ_TREE _ws.ftypes.double -> reg#1 00001 IF_FALSE_GOTO 3 00002 SUBRACT reg#1 - 1 <FT_DOUBLE> -> reg#2 00003 STACK_PUSH reg#2 00004 CALL_FUNCTION abs(reg#2) -> reg#0 00005 STACK_POP 1 00006 IF_FALSE_GOTO 24 00007 READ_TREE _ws.ftypes.double -> reg#1 00008 IF_FALSE_GOTO 9 00009 STACK_PUSH reg#1 00010 CALL_FUNCTION abs(reg#1) -> reg#4 00011 STACK_POP 1 00012 IF_FALSE_GOTO 13 00013 STACK_PUSH reg#4 00014 STACK_PUSH 1 <FT_DOUBLE> 00015 CALL_FUNCTION abs(1 <FT_DOUBLE>) -> reg#5 00016 STACK_POP 1 00017 IF_FALSE_GOTO 18 00018 STACK_PUSH reg#5 00019 CALL_FUNCTION max(reg#5, reg#4) -> reg#3 00020 STACK_POP 2 00021 IF_FALSE_GOTO 24 00022 DIVIDE reg#0 / reg#3 -> reg#6 00023 ANY_LT reg#6 < 0.01 <FT_DOUBLE> 00024 RETURN We now use a stack to pass arguments to the function. The stack is implemented as a list of lists (list of registers). Arguments may still be non-existent to functions (this is a feature). Functions must check for nil arguments (NULL lists) and handle that case. It's somewhat complicated to allow literal values and test compatibility for different types, both because of lack of type information with unparsed/literal and also because it is an underdeveloped area in the code. In my limited testing it was good enough and useful, further enhancements are left for future work.
2022-04-16 01:42:20 +00:00
static void
stack_push(dfilter_t *df, dfvm_value_t *arg1)
{
GSList *arg;
if (arg1->type == FVALUE) {
arg = g_slist_prepend(NULL, arg1->value.fvalue);
}
else if (arg1->type == REGISTER) {
arg = g_slist_copy(df->registers[arg1->value.numeric]);
}
else {
ws_assert_not_reached();
}
df->function_stack = g_slist_prepend(df->function_stack, arg);
}
static void
stack_pop(dfilter_t *df, dfvm_value_t *arg1)
{
guint count;
GSList *reg;
count = arg1->value.numeric;
for (guint i = 0; i < count; i++) {
/* Free top of stack and register contained there. The register
* contentes are not owned by us. */
reg = df->function_stack->data;
/* Free the list but not the data it contains. */
g_slist_free(reg);
/* remove top of stack */
df->function_stack = g_slist_delete_link(df->function_stack, df->function_stack);
}
}
static gboolean
check_exists(proto_tree *tree, dfvm_value_t *arg1, dfvm_value_t *arg2)
{
GPtrArray *finfos;
header_field_info *hfinfo;
drange_t *range = NULL;
gboolean exists;
GSList *fvalues;
hfinfo = arg1->value.hfinfo;
if (arg2)
range = arg2->value.drange;
while (hfinfo) {
finfos = proto_get_finfo_ptr_array(tree, hfinfo->id);
if ((finfos == NULL) || (g_ptr_array_len(finfos) == 0)) {
hfinfo = hfinfo->same_name_next;
continue;
}
if (range == NULL) {
return TRUE;
}
fvalues = filter_finfo_fvalues(NULL, finfos, range);
exists = (fvalues != NULL);
g_slist_free(fvalues);
if (exists) {
return TRUE;
}
hfinfo = hfinfo->same_name_next;
}
return FALSE;
}
gboolean
dfvm_apply(dfilter_t *df, proto_tree *tree)
{
int id, length;
gboolean accum = TRUE;
dfvm_insn_t *insn;
dfvm_value_t *arg1;
dfvm_value_t *arg2;
dfvm_value_t *arg3 = NULL;
2021-06-18 18:21:42 +00:00
ws_assert(tree);
length = df->insns->len;
for (id = 0; id < length; id++) {
AGAIN:
2022-03-21 12:19:54 +00:00
insn = g_ptr_array_index(df->insns, id);
arg1 = insn->arg1;
arg2 = insn->arg2;
2022-03-21 12:19:54 +00:00
arg3 = insn->arg3;
dfilter: Allow arithmetic expressions as function arguments This allows writing moderately complex expressions, for example a float epsilon test (#16483): Filter: {abs(_ws.ftypes.double - 1) / max(abs(_ws.ftypes.double), abs(1))} < 0.01 Syntax tree: 0 TEST_LT: 1 OP_DIVIDE: 2 FUNCTION(abs#1): 3 OP_SUBTRACT: 4 FIELD(_ws.ftypes.double) 4 FVALUE(1 <FT_DOUBLE>) 2 FUNCTION(max#2): 3 FUNCTION(abs#1): 4 FIELD(_ws.ftypes.double) 3 FUNCTION(abs#1): 4 FVALUE(1 <FT_DOUBLE>) 1 FVALUE(0.01 <FT_DOUBLE>) Instructions: 00000 READ_TREE _ws.ftypes.double -> reg#1 00001 IF_FALSE_GOTO 3 00002 SUBRACT reg#1 - 1 <FT_DOUBLE> -> reg#2 00003 STACK_PUSH reg#2 00004 CALL_FUNCTION abs(reg#2) -> reg#0 00005 STACK_POP 1 00006 IF_FALSE_GOTO 24 00007 READ_TREE _ws.ftypes.double -> reg#1 00008 IF_FALSE_GOTO 9 00009 STACK_PUSH reg#1 00010 CALL_FUNCTION abs(reg#1) -> reg#4 00011 STACK_POP 1 00012 IF_FALSE_GOTO 13 00013 STACK_PUSH reg#4 00014 STACK_PUSH 1 <FT_DOUBLE> 00015 CALL_FUNCTION abs(1 <FT_DOUBLE>) -> reg#5 00016 STACK_POP 1 00017 IF_FALSE_GOTO 18 00018 STACK_PUSH reg#5 00019 CALL_FUNCTION max(reg#5, reg#4) -> reg#3 00020 STACK_POP 2 00021 IF_FALSE_GOTO 24 00022 DIVIDE reg#0 / reg#3 -> reg#6 00023 ANY_LT reg#6 < 0.01 <FT_DOUBLE> 00024 RETURN We now use a stack to pass arguments to the function. The stack is implemented as a list of lists (list of registers). Arguments may still be non-existent to functions (this is a feature). Functions must check for nil arguments (NULL lists) and handle that case. It's somewhat complicated to allow literal values and test compatibility for different types, both because of lack of type information with unparsed/literal and also because it is an underdeveloped area in the code. In my limited testing it was good enough and useful, further enhancements are left for future work.
2022-04-16 01:42:20 +00:00
ws_noisy("ID: %d; OP: %s", id, dfvm_opcode_tostr(insn->op));
switch (insn->op) {
case DFVM_CHECK_EXISTS:
accum = check_exists(tree, arg1, NULL);
break;
case DFVM_CHECK_EXISTS_R:
accum = check_exists(tree, arg1, arg2);
break;
case DFVM_READ_TREE:
accum = read_tree(df, tree, arg1, arg2, NULL);
break;
case DFVM_READ_TREE_R:
accum = read_tree(df, tree, arg1, arg2, arg3);
break;
case DFVM_READ_REFERENCE:
accum = read_reference(df, arg1, arg2, NULL);
break;
case DFVM_READ_REFERENCE_R:
accum = read_reference(df, arg1, arg2, arg3);
dfilter: Refactor macro tree references This replaces the current macro reference system with a completely different implementation. Instead of a macro a reference is a syntax element. A reference is a constant that can be filled in the dfilter code after compilation from an existing protocol tree. It is best understood as a field value that can be read from a fixed tree that is not the frame being filtered. Usually this fixed tree is the currently selected frame when the filter is applied. This allows comparing fields in the filtered frame with fields in the selected frame. Because the field reference syntax uses the same sigil notation as a macro we have to use a heuristic to distinguish them: if the name has a dot it is a field reference, otherwise it is a macro name. The reference is synctatically validated at compile time. There are two main advantages to this implementation (and a couple of minor ones): The protocol tree for each selected frame is only walked if we have a display filter and if the display filter uses references. Also only the actual reference values are copied, intead of loading the entire tree into a hash table (in textual form even). The other advantage is that the reference is tested like a protocol field against all the values in the selected frame (if there is more than one). Currently the reference fields are not "primed" during dissection, so the entire tree is walked to find a particular reference (this is similar to the previous implementation). If the display filter contains a valid reference and the reference is not loaded at the time the filter is run the result is the same as a non existing field for a regular READ_TREE instruction. Fixes #17599.
2022-03-27 14:26:46 +00:00
break;
case DFVM_PUT_FVALUE:
put_fvalue(df, arg1, arg2);
break;
case DFVM_CALL_FUNCTION:
dfilter: Allow arithmetic expressions as function arguments This allows writing moderately complex expressions, for example a float epsilon test (#16483): Filter: {abs(_ws.ftypes.double - 1) / max(abs(_ws.ftypes.double), abs(1))} < 0.01 Syntax tree: 0 TEST_LT: 1 OP_DIVIDE: 2 FUNCTION(abs#1): 3 OP_SUBTRACT: 4 FIELD(_ws.ftypes.double) 4 FVALUE(1 <FT_DOUBLE>) 2 FUNCTION(max#2): 3 FUNCTION(abs#1): 4 FIELD(_ws.ftypes.double) 3 FUNCTION(abs#1): 4 FVALUE(1 <FT_DOUBLE>) 1 FVALUE(0.01 <FT_DOUBLE>) Instructions: 00000 READ_TREE _ws.ftypes.double -> reg#1 00001 IF_FALSE_GOTO 3 00002 SUBRACT reg#1 - 1 <FT_DOUBLE> -> reg#2 00003 STACK_PUSH reg#2 00004 CALL_FUNCTION abs(reg#2) -> reg#0 00005 STACK_POP 1 00006 IF_FALSE_GOTO 24 00007 READ_TREE _ws.ftypes.double -> reg#1 00008 IF_FALSE_GOTO 9 00009 STACK_PUSH reg#1 00010 CALL_FUNCTION abs(reg#1) -> reg#4 00011 STACK_POP 1 00012 IF_FALSE_GOTO 13 00013 STACK_PUSH reg#4 00014 STACK_PUSH 1 <FT_DOUBLE> 00015 CALL_FUNCTION abs(1 <FT_DOUBLE>) -> reg#5 00016 STACK_POP 1 00017 IF_FALSE_GOTO 18 00018 STACK_PUSH reg#5 00019 CALL_FUNCTION max(reg#5, reg#4) -> reg#3 00020 STACK_POP 2 00021 IF_FALSE_GOTO 24 00022 DIVIDE reg#0 / reg#3 -> reg#6 00023 ANY_LT reg#6 < 0.01 <FT_DOUBLE> 00024 RETURN We now use a stack to pass arguments to the function. The stack is implemented as a list of lists (list of registers). Arguments may still be non-existent to functions (this is a feature). Functions must check for nil arguments (NULL lists) and handle that case. It's somewhat complicated to allow literal values and test compatibility for different types, both because of lack of type information with unparsed/literal and also because it is an underdeveloped area in the code. In my limited testing it was good enough and useful, further enhancements are left for future work.
2022-04-16 01:42:20 +00:00
accum = call_function(df, arg1, arg2, arg3);
break;
case DFVM_STACK_PUSH:
dfilter: Allow arithmetic expressions as function arguments This allows writing moderately complex expressions, for example a float epsilon test (#16483): Filter: {abs(_ws.ftypes.double - 1) / max(abs(_ws.ftypes.double), abs(1))} < 0.01 Syntax tree: 0 TEST_LT: 1 OP_DIVIDE: 2 FUNCTION(abs#1): 3 OP_SUBTRACT: 4 FIELD(_ws.ftypes.double) 4 FVALUE(1 <FT_DOUBLE>) 2 FUNCTION(max#2): 3 FUNCTION(abs#1): 4 FIELD(_ws.ftypes.double) 3 FUNCTION(abs#1): 4 FVALUE(1 <FT_DOUBLE>) 1 FVALUE(0.01 <FT_DOUBLE>) Instructions: 00000 READ_TREE _ws.ftypes.double -> reg#1 00001 IF_FALSE_GOTO 3 00002 SUBRACT reg#1 - 1 <FT_DOUBLE> -> reg#2 00003 STACK_PUSH reg#2 00004 CALL_FUNCTION abs(reg#2) -> reg#0 00005 STACK_POP 1 00006 IF_FALSE_GOTO 24 00007 READ_TREE _ws.ftypes.double -> reg#1 00008 IF_FALSE_GOTO 9 00009 STACK_PUSH reg#1 00010 CALL_FUNCTION abs(reg#1) -> reg#4 00011 STACK_POP 1 00012 IF_FALSE_GOTO 13 00013 STACK_PUSH reg#4 00014 STACK_PUSH 1 <FT_DOUBLE> 00015 CALL_FUNCTION abs(1 <FT_DOUBLE>) -> reg#5 00016 STACK_POP 1 00017 IF_FALSE_GOTO 18 00018 STACK_PUSH reg#5 00019 CALL_FUNCTION max(reg#5, reg#4) -> reg#3 00020 STACK_POP 2 00021 IF_FALSE_GOTO 24 00022 DIVIDE reg#0 / reg#3 -> reg#6 00023 ANY_LT reg#6 < 0.01 <FT_DOUBLE> 00024 RETURN We now use a stack to pass arguments to the function. The stack is implemented as a list of lists (list of registers). Arguments may still be non-existent to functions (this is a feature). Functions must check for nil arguments (NULL lists) and handle that case. It's somewhat complicated to allow literal values and test compatibility for different types, both because of lack of type information with unparsed/literal and also because it is an underdeveloped area in the code. In my limited testing it was good enough and useful, further enhancements are left for future work.
2022-04-16 01:42:20 +00:00
stack_push(df, arg1);
break;
case DFVM_STACK_POP:
dfilter: Allow arithmetic expressions as function arguments This allows writing moderately complex expressions, for example a float epsilon test (#16483): Filter: {abs(_ws.ftypes.double - 1) / max(abs(_ws.ftypes.double), abs(1))} < 0.01 Syntax tree: 0 TEST_LT: 1 OP_DIVIDE: 2 FUNCTION(abs#1): 3 OP_SUBTRACT: 4 FIELD(_ws.ftypes.double) 4 FVALUE(1 <FT_DOUBLE>) 2 FUNCTION(max#2): 3 FUNCTION(abs#1): 4 FIELD(_ws.ftypes.double) 3 FUNCTION(abs#1): 4 FVALUE(1 <FT_DOUBLE>) 1 FVALUE(0.01 <FT_DOUBLE>) Instructions: 00000 READ_TREE _ws.ftypes.double -> reg#1 00001 IF_FALSE_GOTO 3 00002 SUBRACT reg#1 - 1 <FT_DOUBLE> -> reg#2 00003 STACK_PUSH reg#2 00004 CALL_FUNCTION abs(reg#2) -> reg#0 00005 STACK_POP 1 00006 IF_FALSE_GOTO 24 00007 READ_TREE _ws.ftypes.double -> reg#1 00008 IF_FALSE_GOTO 9 00009 STACK_PUSH reg#1 00010 CALL_FUNCTION abs(reg#1) -> reg#4 00011 STACK_POP 1 00012 IF_FALSE_GOTO 13 00013 STACK_PUSH reg#4 00014 STACK_PUSH 1 <FT_DOUBLE> 00015 CALL_FUNCTION abs(1 <FT_DOUBLE>) -> reg#5 00016 STACK_POP 1 00017 IF_FALSE_GOTO 18 00018 STACK_PUSH reg#5 00019 CALL_FUNCTION max(reg#5, reg#4) -> reg#3 00020 STACK_POP 2 00021 IF_FALSE_GOTO 24 00022 DIVIDE reg#0 / reg#3 -> reg#6 00023 ANY_LT reg#6 < 0.01 <FT_DOUBLE> 00024 RETURN We now use a stack to pass arguments to the function. The stack is implemented as a list of lists (list of registers). Arguments may still be non-existent to functions (this is a feature). Functions must check for nil arguments (NULL lists) and handle that case. It's somewhat complicated to allow literal values and test compatibility for different types, both because of lack of type information with unparsed/literal and also because it is an underdeveloped area in the code. In my limited testing it was good enough and useful, further enhancements are left for future work.
2022-04-16 01:42:20 +00:00
stack_pop(df, arg1);
break;
case DFVM_SLICE:
mk_slice(df, arg1, arg2, arg3);
break;
case DFVM_LENGTH:
mk_length(df, arg1, arg2);
break;
case DFVM_ALL_EQ:
2022-03-21 12:19:54 +00:00
accum = all_test(df, fvalue_eq, arg1, arg2);
break;
case DFVM_ANY_EQ:
2022-03-21 12:19:54 +00:00
accum = any_test(df, fvalue_eq, arg1, arg2);
dfilter: Fix "!=" relation to be free of contradictions Wireshark defines the relation of equality A == B as A any_eq B <=> An == Bn for at least one An, Bn. More accurately I think this is (formally) an equivalence relation, not true equality. Whichever definition for "==" we choose we must keep the definition of "!=" as !(A == B), otherwise it will lead to logical contradictions like (A == B) AND (A != B) being true. Fix the '!=' relation to match the definition of equality: A != B <=> !(A == B) <=> A all_ne B <=> An != Bn, for every n. This has been the recomended way to write "not equal" for a long time in the documentation, even to the point where != was deprecated, but it just wasn't implemented consistently in the language, which has understandably been a persistent source of confusion. Even a field that is normally well-behaved with "!=" like "ip.src" or "ip.dst" will produce unexpected results with encapsulations like IP-over-IP. The opcode ALL_NE could have been implemented in the compiler instead using NOT and ANY_EQ but I chose to implement it in bytecode. It just seemed more elegant and efficient but the difference was not very significant. Keep around "~=" for any_ne relation, in case someone depends on that, and because we don't have an operator for true equality: A strict_equal B <=> A all_eq B <=> !(A any_ne B). If there is only one value then any_ne and all_ne are the same comparison operation. Implementing this change did not require fixing any tests so it is unlikely the relation "~=" (any_ne) will be very useful. Note that the behaviour of the '<' (less than) comparison relation is a separate, more subtle issue. In the general case the definition of '<' that is used is only a partial order.
2021-10-18 20:07:06 +00:00
break;
case DFVM_ALL_NE:
2022-03-21 12:19:54 +00:00
accum = all_test(df, fvalue_ne, arg1, arg2);
break;
case DFVM_ANY_NE:
2022-03-21 12:19:54 +00:00
accum = any_test(df, fvalue_ne, arg1, arg2);
break;
case DFVM_ALL_GT:
accum = all_test(df, fvalue_gt, arg1, arg2);
break;
case DFVM_ANY_GT:
2022-03-21 12:19:54 +00:00
accum = any_test(df, fvalue_gt, arg1, arg2);
break;
case DFVM_ALL_GE:
accum = all_test(df, fvalue_ge, arg1, arg2);
break;
case DFVM_ANY_GE:
2022-03-21 12:19:54 +00:00
accum = any_test(df, fvalue_ge, arg1, arg2);
break;
case DFVM_ALL_LT:
accum = all_test(df, fvalue_lt, arg1, arg2);
break;
case DFVM_ANY_LT:
2022-03-21 12:19:54 +00:00
accum = any_test(df, fvalue_lt, arg1, arg2);
break;
case DFVM_ALL_LE:
accum = all_test(df, fvalue_le, arg1, arg2);
break;
case DFVM_ANY_LE:
2022-03-21 12:19:54 +00:00
accum = any_test(df, fvalue_le, arg1, arg2);
break;
case DFVM_BITWISE_AND:
mk_binary(df, fvalue_bitwise_and, arg1, arg2, arg3);
break;
case DFVM_ADD:
mk_binary(df, fvalue_add, arg1, arg2, arg3);
break;
case DFVM_SUBTRACT:
mk_binary(df, fvalue_subtract, arg1, arg2, arg3);
break;
case DFVM_MULTIPLY:
mk_binary(df, fvalue_multiply, arg1, arg2, arg3);
break;
case DFVM_DIVIDE:
mk_binary(df, fvalue_divide, arg1, arg2, arg3);
break;
case DFVM_MODULO:
mk_binary(df, fvalue_modulo, arg1, arg2, arg3);
break;
case DFVM_ALL_ZERO:
accum = all_test_unary(df, fvalue_is_zero, arg1);
break;
case DFVM_ANY_ZERO:
accum = any_test_unary(df, fvalue_is_zero, arg1);
break;
case DFVM_ALL_CONTAINS:
accum = all_test(df, fvalue_contains, arg1, arg2);
break;
case DFVM_ANY_CONTAINS:
2022-03-21 12:19:54 +00:00
accum = any_test(df, fvalue_contains, arg1, arg2);
break;
case DFVM_ALL_MATCHES:
accum = all_matches(df, arg1, arg2);
break;
case DFVM_ANY_MATCHES:
2022-03-21 12:19:54 +00:00
accum = any_matches(df, arg1, arg2);
break;
case DFVM_ALL_IN_RANGE:
accum = all_in_range(df, arg1, arg2, arg3);
break;
case DFVM_ANY_IN_RANGE:
2022-03-21 12:19:54 +00:00
accum = any_in_range(df, arg1, arg2, arg3);
break;
case DFVM_UNARY_MINUS:
mk_minus(df, arg1, arg2);
break;
case DFVM_NOT:
accum = !accum;
break;
case DFVM_RETURN:
free_register_overhead(df);
return accum;
case DFVM_IF_TRUE_GOTO:
if (accum) {
id = arg1->value.numeric;
goto AGAIN;
}
break;
case DFVM_IF_FALSE_GOTO:
if (!accum) {
id = arg1->value.numeric;
goto AGAIN;
}
break;
}
}
ws_assert_not_reached();
}
/*
* Editor modelines - https://www.wireshark.org/tools/modelines.html
*
* Local variables:
* c-basic-offset: 8
* tab-width: 8
* indent-tabs-mode: t
* End:
*
* vi: set shiftwidth=8 tabstop=8 noexpandtab:
* :indentSize=8:tabSize=8:noTabs=false:
*/