wireshark/epan/dfilter/dfvm.c

1701 lines
38 KiB
C
Raw Normal View History

/*
* Wireshark - Network traffic analyzer
* By Gerald Combs <gerald@wireshark.org>
* Copyright 2001 Gerald Combs
*
* SPDX-License-Identifier: GPL-2.0-or-later
*/
#include "config.h"
#define WS_LOG_DOMAIN LOG_DOMAIN_DFILTER
#include "dfvm.h"
#include <ftypes/ftypes.h>
2021-06-18 18:21:42 +00:00
#include <wsutil/ws_assert.h>
static void
debug_register(GSList *reg, guint32 num);
dfilter: Allow arithmetic expressions as function arguments This allows writing moderately complex expressions, for example a float epsilon test (#16483): Filter: {abs(_ws.ftypes.double - 1) / max(abs(_ws.ftypes.double), abs(1))} < 0.01 Syntax tree: 0 TEST_LT: 1 OP_DIVIDE: 2 FUNCTION(abs#1): 3 OP_SUBTRACT: 4 FIELD(_ws.ftypes.double) 4 FVALUE(1 <FT_DOUBLE>) 2 FUNCTION(max#2): 3 FUNCTION(abs#1): 4 FIELD(_ws.ftypes.double) 3 FUNCTION(abs#1): 4 FVALUE(1 <FT_DOUBLE>) 1 FVALUE(0.01 <FT_DOUBLE>) Instructions: 00000 READ_TREE _ws.ftypes.double -> reg#1 00001 IF_FALSE_GOTO 3 00002 SUBRACT reg#1 - 1 <FT_DOUBLE> -> reg#2 00003 STACK_PUSH reg#2 00004 CALL_FUNCTION abs(reg#2) -> reg#0 00005 STACK_POP 1 00006 IF_FALSE_GOTO 24 00007 READ_TREE _ws.ftypes.double -> reg#1 00008 IF_FALSE_GOTO 9 00009 STACK_PUSH reg#1 00010 CALL_FUNCTION abs(reg#1) -> reg#4 00011 STACK_POP 1 00012 IF_FALSE_GOTO 13 00013 STACK_PUSH reg#4 00014 STACK_PUSH 1 <FT_DOUBLE> 00015 CALL_FUNCTION abs(1 <FT_DOUBLE>) -> reg#5 00016 STACK_POP 1 00017 IF_FALSE_GOTO 18 00018 STACK_PUSH reg#5 00019 CALL_FUNCTION max(reg#5, reg#4) -> reg#3 00020 STACK_POP 2 00021 IF_FALSE_GOTO 24 00022 DIVIDE reg#0 / reg#3 -> reg#6 00023 ANY_LT reg#6 < 0.01 <FT_DOUBLE> 00024 RETURN We now use a stack to pass arguments to the function. The stack is implemented as a list of lists (list of registers). Arguments may still be non-existent to functions (this is a feature). Functions must check for nil arguments (NULL lists) and handle that case. It's somewhat complicated to allow literal values and test compatibility for different types, both because of lack of type information with unparsed/literal and also because it is an underdeveloped area in the code. In my limited testing it was good enough and useful, further enhancements are left for future work.
2022-04-16 01:42:20 +00:00
const char *
dfvm_opcode_tostr(dfvm_opcode_t code)
{
switch (code) {
case DFVM_IF_TRUE_GOTO: return "IF_TRUE_GOTO";
case DFVM_IF_FALSE_GOTO: return "IF_FALSE_GOTO";
case DFVM_CHECK_EXISTS: return "CHECK_EXISTS";
case DFVM_CHECK_EXISTS_R: return "CHECK_EXISTS_R";
case DFVM_NOT: return "NOT";
case DFVM_RETURN: return "RETURN";
case DFVM_READ_TREE: return "READ_TREE";
case DFVM_READ_TREE_R: return "READ_TREE_R";
case DFVM_READ_REFERENCE: return "READ_REFERENCE";
case DFVM_READ_REFERENCE_R: return "READ_REFERENCE_R";
case DFVM_PUT_FVALUE: return "PUT_FVALUE";
case DFVM_ALL_EQ: return "ALL_EQ";
case DFVM_ANY_EQ: return "ANY_EQ";
case DFVM_ALL_NE: return "ALL_NE";
case DFVM_ANY_NE: return "ANY_NE";
case DFVM_ALL_GT: return "ALL_GT";
case DFVM_ANY_GT: return "ANY_GT";
case DFVM_ALL_GE: return "ALL_GE";
case DFVM_ANY_GE: return "ANY_GE";
case DFVM_ALL_LT: return "ALL_LT";
case DFVM_ANY_LT: return "ANY_LT";
case DFVM_ALL_LE: return "ALL_LE";
case DFVM_ANY_LE: return "ANY_LE";
case DFVM_ALL_CONTAINS: return "ALL_CONTAINS";
case DFVM_ANY_CONTAINS: return "ANY_CONTAINS";
case DFVM_ALL_MATCHES: return "ALL_MATCHES";
case DFVM_ANY_MATCHES: return "ANY_MATCHES";
case DFVM_ALL_IN_RANGE: return "ALL_IN_RANGE";
case DFVM_ANY_IN_RANGE: return "ANY_IN_RANGE";
case DFVM_SLICE: return "SLICE";
case DFVM_LENGTH: return "LENGTH";
case DFVM_BITWISE_AND: return "BITWISE_AND";
case DFVM_UNARY_MINUS: return "UNARY_MINUS";
case DFVM_ADD: return "ADD";
case DFVM_SUBTRACT: return "SUBTRACT";
case DFVM_MULTIPLY: return "MULTIPLY";
case DFVM_DIVIDE: return "DIVIDE";
case DFVM_MODULO: return "MODULO";
case DFVM_CALL_FUNCTION: return "CALL_FUNCTION";
case DFVM_STACK_PUSH: return "STACK_PUSH";
case DFVM_STACK_POP: return "STACK_POP";
case DFVM_NOT_ALL_ZERO: return "NOT_ALL_ZERO";
dfilter: Allow arithmetic expressions as function arguments This allows writing moderately complex expressions, for example a float epsilon test (#16483): Filter: {abs(_ws.ftypes.double - 1) / max(abs(_ws.ftypes.double), abs(1))} < 0.01 Syntax tree: 0 TEST_LT: 1 OP_DIVIDE: 2 FUNCTION(abs#1): 3 OP_SUBTRACT: 4 FIELD(_ws.ftypes.double) 4 FVALUE(1 <FT_DOUBLE>) 2 FUNCTION(max#2): 3 FUNCTION(abs#1): 4 FIELD(_ws.ftypes.double) 3 FUNCTION(abs#1): 4 FVALUE(1 <FT_DOUBLE>) 1 FVALUE(0.01 <FT_DOUBLE>) Instructions: 00000 READ_TREE _ws.ftypes.double -> reg#1 00001 IF_FALSE_GOTO 3 00002 SUBRACT reg#1 - 1 <FT_DOUBLE> -> reg#2 00003 STACK_PUSH reg#2 00004 CALL_FUNCTION abs(reg#2) -> reg#0 00005 STACK_POP 1 00006 IF_FALSE_GOTO 24 00007 READ_TREE _ws.ftypes.double -> reg#1 00008 IF_FALSE_GOTO 9 00009 STACK_PUSH reg#1 00010 CALL_FUNCTION abs(reg#1) -> reg#4 00011 STACK_POP 1 00012 IF_FALSE_GOTO 13 00013 STACK_PUSH reg#4 00014 STACK_PUSH 1 <FT_DOUBLE> 00015 CALL_FUNCTION abs(1 <FT_DOUBLE>) -> reg#5 00016 STACK_POP 1 00017 IF_FALSE_GOTO 18 00018 STACK_PUSH reg#5 00019 CALL_FUNCTION max(reg#5, reg#4) -> reg#3 00020 STACK_POP 2 00021 IF_FALSE_GOTO 24 00022 DIVIDE reg#0 / reg#3 -> reg#6 00023 ANY_LT reg#6 < 0.01 <FT_DOUBLE> 00024 RETURN We now use a stack to pass arguments to the function. The stack is implemented as a list of lists (list of registers). Arguments may still be non-existent to functions (this is a feature). Functions must check for nil arguments (NULL lists) and handle that case. It's somewhat complicated to allow literal values and test compatibility for different types, both because of lack of type information with unparsed/literal and also because it is an underdeveloped area in the code. In my limited testing it was good enough and useful, further enhancements are left for future work.
2022-04-16 01:42:20 +00:00
}
return "(fix-opcode-string)";
}
dfvm_insn_t*
dfvm_insn_new(dfvm_opcode_t op)
{
dfvm_insn_t *insn;
insn = g_new(dfvm_insn_t, 1);
insn->op = op;
insn->arg1 = NULL;
insn->arg2 = NULL;
insn->arg3 = NULL;
return insn;
}
static void
dfvm_value_free(dfvm_value_t *v)
{
switch (v->type) {
case FVALUE:
fvalue_free(v->value.fvalue);
break;
case DRANGE:
drange_free(v->value.drange);
break;
case PCRE:
2021-11-12 15:55:14 +00:00
ws_regex_free(v->value.pcre);
break;
default:
/* nothing */
;
}
g_free(v);
}
dfvm_value_t*
dfvm_value_ref(dfvm_value_t *v)
{
if (v == NULL)
return NULL;
v->ref_count++;
return v;
}
void
dfvm_value_unref(dfvm_value_t *v)
{
ws_assert(v);
v->ref_count--;
if (v->ref_count > 0)
return;
dfvm_value_free(v);
}
void
dfvm_insn_free(dfvm_insn_t *insn)
{
if (insn->arg1) {
dfvm_value_unref(insn->arg1);
}
if (insn->arg2) {
dfvm_value_unref(insn->arg2);
}
if (insn->arg3) {
dfvm_value_unref(insn->arg3);
}
g_free(insn);
}
dfvm_value_t*
dfvm_value_new(dfvm_value_type_t type)
{
dfvm_value_t *v;
v = g_new(dfvm_value_t, 1);
v->type = type;
v->ref_count = 0;
return v;
}
2022-03-21 15:38:22 +00:00
dfvm_value_t*
dfvm_value_new_fvalue(fvalue_t *fv)
{
dfvm_value_t *v = dfvm_value_new(FVALUE);
v->value.fvalue = fv;
return v;
}
dfvm_value_t*
dfilter: Add support for raw (bytes) addressing mode This adds new syntax to read a field from the tree as bytes, instead of the actual type. This is a useful extension for example to match matformed strings that contain unicode replacement characters. In this case it is not possible to match the raw value of the malformed string field. This extension fills this need and is generic enough that it should be useful in many other situations. The syntax used is to prefix the field name with "@". The following artificial example tests if the HTTP user agent contains a particular invalid UTF-8 sequence: @http.user_agent == "Mozill\xAA" Where simply using "http.user_agent" won't work because the invalid byte sequence will have been replaced with U+FFFD. Considering the following programs: $ dftest '_ws.ftypes.string == "ABC"' Filter: _ws.ftypes.string == "ABC" Syntax tree: 0 TEST_ANY_EQ: 1 FIELD(_ws.ftypes.string <FT_STRING>) 1 FVALUE("ABC" <FT_STRING>) Instructions: 00000 READ_TREE _ws.ftypes.string <FT_STRING> -> reg#0 00001 IF_FALSE_GOTO 3 00002 ANY_EQ reg#0 == "ABC" <FT_STRING> 00003 RETURN $ dftest '@_ws.ftypes.string == "ABC"' Filter: @_ws.ftypes.string == "ABC" Syntax tree: 0 TEST_ANY_EQ: 1 FIELD(_ws.ftypes.string <RAW>) 1 FVALUE(41:42:43 <FT_BYTES>) Instructions: 00000 READ_TREE @_ws.ftypes.string <FT_BYTES> -> reg#0 00001 IF_FALSE_GOTO 3 00002 ANY_EQ reg#0 == 41:42:43 <FT_BYTES> 00003 RETURN In the second case the field has a "raw" type, that equates directly to FT_BYTES, and the field value is read from the protocol raw data.
2022-10-25 03:20:18 +00:00
dfvm_value_new_hfinfo(header_field_info *hfinfo, gboolean raw)
2022-03-21 15:38:22 +00:00
{
dfilter: Add support for raw (bytes) addressing mode This adds new syntax to read a field from the tree as bytes, instead of the actual type. This is a useful extension for example to match matformed strings that contain unicode replacement characters. In this case it is not possible to match the raw value of the malformed string field. This extension fills this need and is generic enough that it should be useful in many other situations. The syntax used is to prefix the field name with "@". The following artificial example tests if the HTTP user agent contains a particular invalid UTF-8 sequence: @http.user_agent == "Mozill\xAA" Where simply using "http.user_agent" won't work because the invalid byte sequence will have been replaced with U+FFFD. Considering the following programs: $ dftest '_ws.ftypes.string == "ABC"' Filter: _ws.ftypes.string == "ABC" Syntax tree: 0 TEST_ANY_EQ: 1 FIELD(_ws.ftypes.string <FT_STRING>) 1 FVALUE("ABC" <FT_STRING>) Instructions: 00000 READ_TREE _ws.ftypes.string <FT_STRING> -> reg#0 00001 IF_FALSE_GOTO 3 00002 ANY_EQ reg#0 == "ABC" <FT_STRING> 00003 RETURN $ dftest '@_ws.ftypes.string == "ABC"' Filter: @_ws.ftypes.string == "ABC" Syntax tree: 0 TEST_ANY_EQ: 1 FIELD(_ws.ftypes.string <RAW>) 1 FVALUE(41:42:43 <FT_BYTES>) Instructions: 00000 READ_TREE @_ws.ftypes.string <FT_BYTES> -> reg#0 00001 IF_FALSE_GOTO 3 00002 ANY_EQ reg#0 == 41:42:43 <FT_BYTES> 00003 RETURN In the second case the field has a "raw" type, that equates directly to FT_BYTES, and the field value is read from the protocol raw data.
2022-10-25 03:20:18 +00:00
dfvm_value_t *v;
if (raw)
v = dfvm_value_new(RAW_HFINFO);
else
v = dfvm_value_new(HFINFO);
2022-03-21 15:38:22 +00:00
v->value.hfinfo = hfinfo;
return v;
}
dfvm_value_t*
dfvm_value_new_register(int reg)
{
dfvm_value_t *v = dfvm_value_new(REGISTER);
v->value.numeric = reg;
return v;
}
dfvm_value_t*
dfvm_value_new_drange(drange_t *dr)
{
dfvm_value_t *v = dfvm_value_new(DRANGE);
v->value.drange = dr;
return v;
}
dfvm_value_t*
dfvm_value_new_funcdef(df_func_def_t *funcdef)
{
dfvm_value_t *v = dfvm_value_new(FUNCTION_DEF);
v->value.funcdef = funcdef;
return v;
}
dfvm_value_t*
dfvm_value_new_pcre(ws_regex_t *re)
{
dfvm_value_t *v = dfvm_value_new(PCRE);
v->value.pcre = re;
return v;
}
dfilter: Allow arithmetic expressions as function arguments This allows writing moderately complex expressions, for example a float epsilon test (#16483): Filter: {abs(_ws.ftypes.double - 1) / max(abs(_ws.ftypes.double), abs(1))} < 0.01 Syntax tree: 0 TEST_LT: 1 OP_DIVIDE: 2 FUNCTION(abs#1): 3 OP_SUBTRACT: 4 FIELD(_ws.ftypes.double) 4 FVALUE(1 <FT_DOUBLE>) 2 FUNCTION(max#2): 3 FUNCTION(abs#1): 4 FIELD(_ws.ftypes.double) 3 FUNCTION(abs#1): 4 FVALUE(1 <FT_DOUBLE>) 1 FVALUE(0.01 <FT_DOUBLE>) Instructions: 00000 READ_TREE _ws.ftypes.double -> reg#1 00001 IF_FALSE_GOTO 3 00002 SUBRACT reg#1 - 1 <FT_DOUBLE> -> reg#2 00003 STACK_PUSH reg#2 00004 CALL_FUNCTION abs(reg#2) -> reg#0 00005 STACK_POP 1 00006 IF_FALSE_GOTO 24 00007 READ_TREE _ws.ftypes.double -> reg#1 00008 IF_FALSE_GOTO 9 00009 STACK_PUSH reg#1 00010 CALL_FUNCTION abs(reg#1) -> reg#4 00011 STACK_POP 1 00012 IF_FALSE_GOTO 13 00013 STACK_PUSH reg#4 00014 STACK_PUSH 1 <FT_DOUBLE> 00015 CALL_FUNCTION abs(1 <FT_DOUBLE>) -> reg#5 00016 STACK_POP 1 00017 IF_FALSE_GOTO 18 00018 STACK_PUSH reg#5 00019 CALL_FUNCTION max(reg#5, reg#4) -> reg#3 00020 STACK_POP 2 00021 IF_FALSE_GOTO 24 00022 DIVIDE reg#0 / reg#3 -> reg#6 00023 ANY_LT reg#6 < 0.01 <FT_DOUBLE> 00024 RETURN We now use a stack to pass arguments to the function. The stack is implemented as a list of lists (list of registers). Arguments may still be non-existent to functions (this is a feature). Functions must check for nil arguments (NULL lists) and handle that case. It's somewhat complicated to allow literal values and test compatibility for different types, both because of lack of type information with unparsed/literal and also because it is an underdeveloped area in the code. In my limited testing it was good enough and useful, further enhancements are left for future work.
2022-04-16 01:42:20 +00:00
dfvm_value_t*
dfvm_value_new_guint(guint num)
{
dfvm_value_t *v = dfvm_value_new(INTEGER);
v->value.numeric = num;
return v;
}
static char *
2022-03-21 11:13:02 +00:00
dfvm_value_tostr(dfvm_value_t *v)
{
char *s;
2022-03-21 11:13:02 +00:00
if (!v)
return NULL;
switch (v->type) {
case HFINFO:
s = ws_strdup(v->value.hfinfo->abbrev);
2022-03-21 11:13:02 +00:00
break;
dfilter: Add support for raw (bytes) addressing mode This adds new syntax to read a field from the tree as bytes, instead of the actual type. This is a useful extension for example to match matformed strings that contain unicode replacement characters. In this case it is not possible to match the raw value of the malformed string field. This extension fills this need and is generic enough that it should be useful in many other situations. The syntax used is to prefix the field name with "@". The following artificial example tests if the HTTP user agent contains a particular invalid UTF-8 sequence: @http.user_agent == "Mozill\xAA" Where simply using "http.user_agent" won't work because the invalid byte sequence will have been replaced with U+FFFD. Considering the following programs: $ dftest '_ws.ftypes.string == "ABC"' Filter: _ws.ftypes.string == "ABC" Syntax tree: 0 TEST_ANY_EQ: 1 FIELD(_ws.ftypes.string <FT_STRING>) 1 FVALUE("ABC" <FT_STRING>) Instructions: 00000 READ_TREE _ws.ftypes.string <FT_STRING> -> reg#0 00001 IF_FALSE_GOTO 3 00002 ANY_EQ reg#0 == "ABC" <FT_STRING> 00003 RETURN $ dftest '@_ws.ftypes.string == "ABC"' Filter: @_ws.ftypes.string == "ABC" Syntax tree: 0 TEST_ANY_EQ: 1 FIELD(_ws.ftypes.string <RAW>) 1 FVALUE(41:42:43 <FT_BYTES>) Instructions: 00000 READ_TREE @_ws.ftypes.string <FT_BYTES> -> reg#0 00001 IF_FALSE_GOTO 3 00002 ANY_EQ reg#0 == 41:42:43 <FT_BYTES> 00003 RETURN In the second case the field has a "raw" type, that equates directly to FT_BYTES, and the field value is read from the protocol raw data.
2022-10-25 03:20:18 +00:00
case RAW_HFINFO:
s = ws_strdup_printf("@%s", v->value.hfinfo->abbrev);
dfilter: Add support for raw (bytes) addressing mode This adds new syntax to read a field from the tree as bytes, instead of the actual type. This is a useful extension for example to match matformed strings that contain unicode replacement characters. In this case it is not possible to match the raw value of the malformed string field. This extension fills this need and is generic enough that it should be useful in many other situations. The syntax used is to prefix the field name with "@". The following artificial example tests if the HTTP user agent contains a particular invalid UTF-8 sequence: @http.user_agent == "Mozill\xAA" Where simply using "http.user_agent" won't work because the invalid byte sequence will have been replaced with U+FFFD. Considering the following programs: $ dftest '_ws.ftypes.string == "ABC"' Filter: _ws.ftypes.string == "ABC" Syntax tree: 0 TEST_ANY_EQ: 1 FIELD(_ws.ftypes.string <FT_STRING>) 1 FVALUE("ABC" <FT_STRING>) Instructions: 00000 READ_TREE _ws.ftypes.string <FT_STRING> -> reg#0 00001 IF_FALSE_GOTO 3 00002 ANY_EQ reg#0 == "ABC" <FT_STRING> 00003 RETURN $ dftest '@_ws.ftypes.string == "ABC"' Filter: @_ws.ftypes.string == "ABC" Syntax tree: 0 TEST_ANY_EQ: 1 FIELD(_ws.ftypes.string <RAW>) 1 FVALUE(41:42:43 <FT_BYTES>) Instructions: 00000 READ_TREE @_ws.ftypes.string <FT_BYTES> -> reg#0 00001 IF_FALSE_GOTO 3 00002 ANY_EQ reg#0 == 41:42:43 <FT_BYTES> 00003 RETURN In the second case the field has a "raw" type, that equates directly to FT_BYTES, and the field value is read from the protocol raw data.
2022-10-25 03:20:18 +00:00
break;
2022-03-21 11:13:02 +00:00
case FVALUE:
s = fvalue_to_debug_repr(NULL, v->value.fvalue);
2022-03-21 11:13:02 +00:00
break;
case DRANGE:
s = drange_tostr(v->value.drange);
break;
case PCRE:
s = ws_strdup(ws_regex_pattern(v->value.pcre));
break;
case REGISTER:
s = ws_strdup_printf("R%"G_GUINT32_FORMAT, v->value.numeric);
2022-03-21 11:13:02 +00:00
break;
case FUNCTION_DEF:
s = ws_strdup(v->value.funcdef->name);
break;
case INTEGER:
s = ws_strdup_printf("%"G_GUINT32_FORMAT, v->value.numeric);
break;
2022-03-21 11:13:02 +00:00
default:
s = ws_strdup("FIXME");
}
return s;
}
2023-01-10 15:42:32 +00:00
static char *
value_type_tostr(dfvm_value_t *v, gboolean show_ftype)
{
const char *s;
if (!v || !show_ftype)
return ws_strdup("");
switch (v->type) {
case HFINFO:
case RAW_HFINFO:
s = ftype_name(v->value.hfinfo->type);
break;
case FVALUE:
s = fvalue_type_name(v->value.fvalue);
break;
default:
return ws_strdup("");
break;
}
return ws_strdup_printf(" <%s>", s);
}
dfilter: Allow arithmetic expressions as function arguments This allows writing moderately complex expressions, for example a float epsilon test (#16483): Filter: {abs(_ws.ftypes.double - 1) / max(abs(_ws.ftypes.double), abs(1))} < 0.01 Syntax tree: 0 TEST_LT: 1 OP_DIVIDE: 2 FUNCTION(abs#1): 3 OP_SUBTRACT: 4 FIELD(_ws.ftypes.double) 4 FVALUE(1 <FT_DOUBLE>) 2 FUNCTION(max#2): 3 FUNCTION(abs#1): 4 FIELD(_ws.ftypes.double) 3 FUNCTION(abs#1): 4 FVALUE(1 <FT_DOUBLE>) 1 FVALUE(0.01 <FT_DOUBLE>) Instructions: 00000 READ_TREE _ws.ftypes.double -> reg#1 00001 IF_FALSE_GOTO 3 00002 SUBRACT reg#1 - 1 <FT_DOUBLE> -> reg#2 00003 STACK_PUSH reg#2 00004 CALL_FUNCTION abs(reg#2) -> reg#0 00005 STACK_POP 1 00006 IF_FALSE_GOTO 24 00007 READ_TREE _ws.ftypes.double -> reg#1 00008 IF_FALSE_GOTO 9 00009 STACK_PUSH reg#1 00010 CALL_FUNCTION abs(reg#1) -> reg#4 00011 STACK_POP 1 00012 IF_FALSE_GOTO 13 00013 STACK_PUSH reg#4 00014 STACK_PUSH 1 <FT_DOUBLE> 00015 CALL_FUNCTION abs(1 <FT_DOUBLE>) -> reg#5 00016 STACK_POP 1 00017 IF_FALSE_GOTO 18 00018 STACK_PUSH reg#5 00019 CALL_FUNCTION max(reg#5, reg#4) -> reg#3 00020 STACK_POP 2 00021 IF_FALSE_GOTO 24 00022 DIVIDE reg#0 / reg#3 -> reg#6 00023 ANY_LT reg#6 < 0.01 <FT_DOUBLE> 00024 RETURN We now use a stack to pass arguments to the function. The stack is implemented as a list of lists (list of registers). Arguments may still be non-existent to functions (this is a feature). Functions must check for nil arguments (NULL lists) and handle that case. It's somewhat complicated to allow literal values and test compatibility for different types, both because of lack of type information with unparsed/literal and also because it is an underdeveloped area in the code. In my limited testing it was good enough and useful, further enhancements are left for future work.
2022-04-16 01:42:20 +00:00
static GSList *
dump_str_stack_push(GSList *stack, const char *str)
{
return g_slist_prepend(stack, g_strdup(str));
}
static GSList *
dump_str_stack_pop(GSList *stack, guint32 count)
dfilter: Allow arithmetic expressions as function arguments This allows writing moderately complex expressions, for example a float epsilon test (#16483): Filter: {abs(_ws.ftypes.double - 1) / max(abs(_ws.ftypes.double), abs(1))} < 0.01 Syntax tree: 0 TEST_LT: 1 OP_DIVIDE: 2 FUNCTION(abs#1): 3 OP_SUBTRACT: 4 FIELD(_ws.ftypes.double) 4 FVALUE(1 <FT_DOUBLE>) 2 FUNCTION(max#2): 3 FUNCTION(abs#1): 4 FIELD(_ws.ftypes.double) 3 FUNCTION(abs#1): 4 FVALUE(1 <FT_DOUBLE>) 1 FVALUE(0.01 <FT_DOUBLE>) Instructions: 00000 READ_TREE _ws.ftypes.double -> reg#1 00001 IF_FALSE_GOTO 3 00002 SUBRACT reg#1 - 1 <FT_DOUBLE> -> reg#2 00003 STACK_PUSH reg#2 00004 CALL_FUNCTION abs(reg#2) -> reg#0 00005 STACK_POP 1 00006 IF_FALSE_GOTO 24 00007 READ_TREE _ws.ftypes.double -> reg#1 00008 IF_FALSE_GOTO 9 00009 STACK_PUSH reg#1 00010 CALL_FUNCTION abs(reg#1) -> reg#4 00011 STACK_POP 1 00012 IF_FALSE_GOTO 13 00013 STACK_PUSH reg#4 00014 STACK_PUSH 1 <FT_DOUBLE> 00015 CALL_FUNCTION abs(1 <FT_DOUBLE>) -> reg#5 00016 STACK_POP 1 00017 IF_FALSE_GOTO 18 00018 STACK_PUSH reg#5 00019 CALL_FUNCTION max(reg#5, reg#4) -> reg#3 00020 STACK_POP 2 00021 IF_FALSE_GOTO 24 00022 DIVIDE reg#0 / reg#3 -> reg#6 00023 ANY_LT reg#6 < 0.01 <FT_DOUBLE> 00024 RETURN We now use a stack to pass arguments to the function. The stack is implemented as a list of lists (list of registers). Arguments may still be non-existent to functions (this is a feature). Functions must check for nil arguments (NULL lists) and handle that case. It's somewhat complicated to allow literal values and test compatibility for different types, both because of lack of type information with unparsed/literal and also because it is an underdeveloped area in the code. In my limited testing it was good enough and useful, further enhancements are left for future work.
2022-04-16 01:42:20 +00:00
{
while (stack && count-- > 0) {
g_free(stack->data);
stack = g_slist_delete_link(stack, stack);
}
dfilter: Allow arithmetic expressions as function arguments This allows writing moderately complex expressions, for example a float epsilon test (#16483): Filter: {abs(_ws.ftypes.double - 1) / max(abs(_ws.ftypes.double), abs(1))} < 0.01 Syntax tree: 0 TEST_LT: 1 OP_DIVIDE: 2 FUNCTION(abs#1): 3 OP_SUBTRACT: 4 FIELD(_ws.ftypes.double) 4 FVALUE(1 <FT_DOUBLE>) 2 FUNCTION(max#2): 3 FUNCTION(abs#1): 4 FIELD(_ws.ftypes.double) 3 FUNCTION(abs#1): 4 FVALUE(1 <FT_DOUBLE>) 1 FVALUE(0.01 <FT_DOUBLE>) Instructions: 00000 READ_TREE _ws.ftypes.double -> reg#1 00001 IF_FALSE_GOTO 3 00002 SUBRACT reg#1 - 1 <FT_DOUBLE> -> reg#2 00003 STACK_PUSH reg#2 00004 CALL_FUNCTION abs(reg#2) -> reg#0 00005 STACK_POP 1 00006 IF_FALSE_GOTO 24 00007 READ_TREE _ws.ftypes.double -> reg#1 00008 IF_FALSE_GOTO 9 00009 STACK_PUSH reg#1 00010 CALL_FUNCTION abs(reg#1) -> reg#4 00011 STACK_POP 1 00012 IF_FALSE_GOTO 13 00013 STACK_PUSH reg#4 00014 STACK_PUSH 1 <FT_DOUBLE> 00015 CALL_FUNCTION abs(1 <FT_DOUBLE>) -> reg#5 00016 STACK_POP 1 00017 IF_FALSE_GOTO 18 00018 STACK_PUSH reg#5 00019 CALL_FUNCTION max(reg#5, reg#4) -> reg#3 00020 STACK_POP 2 00021 IF_FALSE_GOTO 24 00022 DIVIDE reg#0 / reg#3 -> reg#6 00023 ANY_LT reg#6 < 0.01 <FT_DOUBLE> 00024 RETURN We now use a stack to pass arguments to the function. The stack is implemented as a list of lists (list of registers). Arguments may still be non-existent to functions (this is a feature). Functions must check for nil arguments (NULL lists) and handle that case. It's somewhat complicated to allow literal values and test compatibility for different types, both because of lack of type information with unparsed/literal and also because it is an underdeveloped area in the code. In my limited testing it was good enough and useful, further enhancements are left for future work.
2022-04-16 01:42:20 +00:00
return stack;
}
static void
append_call_function(wmem_strbuf_t *buf, const char *func, uint32_t nargs,
GSList *stack_print)
{
uint32_t idx;
GString *gs;
GSList *l;
const char *sep = "";
wmem_strbuf_append_printf(buf, "%s(", func);
if (nargs > 0) {
gs = g_string_new(NULL);
for (l = stack_print, idx = 0; l != NULL && idx < nargs; idx++, l = l->next) {
g_string_prepend(gs, sep);
g_string_prepend(gs, l->data);
sep = ", ";
}
wmem_strbuf_append(buf, gs->str);
g_string_free(gs, TRUE);
}
wmem_strbuf_append(buf, ")");
}
static void
indent(wmem_strbuf_t *buf, size_t offset, size_t start)
{
size_t pos = buf->len - start;
if (pos >= offset)
return;
wmem_strbuf_append_c_count(buf, ' ', offset - pos);
}
#define indent1(buf, start) indent(buf, 24, start)
#define indent2(buf, start) indent(buf, 16, start)
static void
append_to_register(wmem_strbuf_t *buf, const char *reg)
{
wmem_strbuf_append_printf(buf, " -> %s", reg);
}
static void
2023-01-10 15:42:32 +00:00
append_op_args(wmem_strbuf_t *buf, dfvm_insn_t *insn, GSList **stack_print,
uint16_t flags)
{
dfvm_value_t *arg1, *arg2, *arg3;
char *arg1_str, *arg2_str, *arg3_str;
2023-01-10 15:42:32 +00:00
char *arg1_str_type, *arg2_str_type, *arg3_str_type;
size_t col_start;
arg1 = insn->arg1;
arg2 = insn->arg2;
arg3 = insn->arg3;
arg1_str = dfvm_value_tostr(arg1);
arg2_str = dfvm_value_tostr(arg2);
arg3_str = dfvm_value_tostr(arg3);
2023-01-10 15:42:32 +00:00
arg1_str_type = value_type_tostr(arg1, flags & DF_DUMP_SHOW_FTYPE);
arg2_str_type = value_type_tostr(arg2, flags & DF_DUMP_SHOW_FTYPE);
arg3_str_type = value_type_tostr(arg3, flags & DF_DUMP_SHOW_FTYPE);
2022-03-27 23:21:53 +00:00
col_start = buf->len;
switch (insn->op) {
case DFVM_CHECK_EXISTS:
2023-01-10 15:42:32 +00:00
wmem_strbuf_append_printf(buf, "%s%s",
arg1_str, arg1_str_type);
break;
case DFVM_CHECK_EXISTS_R:
2023-01-10 15:42:32 +00:00
wmem_strbuf_append_printf(buf, "%s#[%s]%s",
arg1_str, arg2_str, arg1_str_type);
break;
case DFVM_READ_TREE:
2023-01-10 15:42:32 +00:00
wmem_strbuf_append_printf(buf, "%s%s",
arg1_str, arg1_str_type);
indent2(buf, col_start);
append_to_register(buf, arg2_str);
break;
case DFVM_READ_TREE_R:
2023-01-10 15:42:32 +00:00
wmem_strbuf_append_printf(buf, "%s#[%s]%s",
arg1_str, arg3_str, arg1_str_type);
indent2(buf, col_start);
append_to_register(buf, arg2_str);
break;
case DFVM_READ_REFERENCE:
2023-01-10 15:42:32 +00:00
wmem_strbuf_append_printf(buf, "${%s}%s",
arg1_str, arg1_str_type);
indent2(buf, col_start);
append_to_register(buf, arg2_str);
break;
case DFVM_READ_REFERENCE_R:
2023-01-10 15:42:32 +00:00
wmem_strbuf_append_printf(buf, "${%s#[%s]}%s",
arg1_str, arg3_str, arg1_str_type);
indent2(buf, col_start);
append_to_register(buf, arg2_str);
break;
case DFVM_PUT_FVALUE:
2023-01-10 15:42:32 +00:00
wmem_strbuf_append_printf(buf, "%s%s",
arg1_str, arg1_str_type);
indent2(buf, col_start);
append_to_register(buf, arg2_str);
break;
case DFVM_CALL_FUNCTION:
append_call_function(buf, arg1_str, arg3->value.numeric, *stack_print);
indent2(buf, col_start);
append_to_register(buf, arg2_str);
break;
case DFVM_STACK_PUSH:
wmem_strbuf_append_printf(buf, "%s", arg1_str);
*stack_print = dump_str_stack_push(*stack_print, arg1_str);
break;
case DFVM_STACK_POP:
wmem_strbuf_append_printf(buf, "%s", arg1_str);
*stack_print = dump_str_stack_pop(*stack_print, arg1->value.numeric);
break;
case DFVM_SLICE:
2023-01-10 15:42:32 +00:00
wmem_strbuf_append_printf(buf, "%s[%s]%s",
arg1_str, arg3_str, arg1_str_type);
indent2(buf, col_start);
append_to_register(buf, arg2_str);
break;
dfilter: Allow arithmetic expressions as function arguments This allows writing moderately complex expressions, for example a float epsilon test (#16483): Filter: {abs(_ws.ftypes.double - 1) / max(abs(_ws.ftypes.double), abs(1))} < 0.01 Syntax tree: 0 TEST_LT: 1 OP_DIVIDE: 2 FUNCTION(abs#1): 3 OP_SUBTRACT: 4 FIELD(_ws.ftypes.double) 4 FVALUE(1 <FT_DOUBLE>) 2 FUNCTION(max#2): 3 FUNCTION(abs#1): 4 FIELD(_ws.ftypes.double) 3 FUNCTION(abs#1): 4 FVALUE(1 <FT_DOUBLE>) 1 FVALUE(0.01 <FT_DOUBLE>) Instructions: 00000 READ_TREE _ws.ftypes.double -> reg#1 00001 IF_FALSE_GOTO 3 00002 SUBRACT reg#1 - 1 <FT_DOUBLE> -> reg#2 00003 STACK_PUSH reg#2 00004 CALL_FUNCTION abs(reg#2) -> reg#0 00005 STACK_POP 1 00006 IF_FALSE_GOTO 24 00007 READ_TREE _ws.ftypes.double -> reg#1 00008 IF_FALSE_GOTO 9 00009 STACK_PUSH reg#1 00010 CALL_FUNCTION abs(reg#1) -> reg#4 00011 STACK_POP 1 00012 IF_FALSE_GOTO 13 00013 STACK_PUSH reg#4 00014 STACK_PUSH 1 <FT_DOUBLE> 00015 CALL_FUNCTION abs(1 <FT_DOUBLE>) -> reg#5 00016 STACK_POP 1 00017 IF_FALSE_GOTO 18 00018 STACK_PUSH reg#5 00019 CALL_FUNCTION max(reg#5, reg#4) -> reg#3 00020 STACK_POP 2 00021 IF_FALSE_GOTO 24 00022 DIVIDE reg#0 / reg#3 -> reg#6 00023 ANY_LT reg#6 < 0.01 <FT_DOUBLE> 00024 RETURN We now use a stack to pass arguments to the function. The stack is implemented as a list of lists (list of registers). Arguments may still be non-existent to functions (this is a feature). Functions must check for nil arguments (NULL lists) and handle that case. It's somewhat complicated to allow literal values and test compatibility for different types, both because of lack of type information with unparsed/literal and also because it is an underdeveloped area in the code. In my limited testing it was good enough and useful, further enhancements are left for future work.
2022-04-16 01:42:20 +00:00
case DFVM_LENGTH:
2023-01-10 15:42:32 +00:00
wmem_strbuf_append_printf(buf, "%s%s",
arg1_str, arg1_str_type);
indent2(buf, col_start);
append_to_register(buf, arg2_str);
break;
dfilter: Allow arithmetic expressions as function arguments This allows writing moderately complex expressions, for example a float epsilon test (#16483): Filter: {abs(_ws.ftypes.double - 1) / max(abs(_ws.ftypes.double), abs(1))} < 0.01 Syntax tree: 0 TEST_LT: 1 OP_DIVIDE: 2 FUNCTION(abs#1): 3 OP_SUBTRACT: 4 FIELD(_ws.ftypes.double) 4 FVALUE(1 <FT_DOUBLE>) 2 FUNCTION(max#2): 3 FUNCTION(abs#1): 4 FIELD(_ws.ftypes.double) 3 FUNCTION(abs#1): 4 FVALUE(1 <FT_DOUBLE>) 1 FVALUE(0.01 <FT_DOUBLE>) Instructions: 00000 READ_TREE _ws.ftypes.double -> reg#1 00001 IF_FALSE_GOTO 3 00002 SUBRACT reg#1 - 1 <FT_DOUBLE> -> reg#2 00003 STACK_PUSH reg#2 00004 CALL_FUNCTION abs(reg#2) -> reg#0 00005 STACK_POP 1 00006 IF_FALSE_GOTO 24 00007 READ_TREE _ws.ftypes.double -> reg#1 00008 IF_FALSE_GOTO 9 00009 STACK_PUSH reg#1 00010 CALL_FUNCTION abs(reg#1) -> reg#4 00011 STACK_POP 1 00012 IF_FALSE_GOTO 13 00013 STACK_PUSH reg#4 00014 STACK_PUSH 1 <FT_DOUBLE> 00015 CALL_FUNCTION abs(1 <FT_DOUBLE>) -> reg#5 00016 STACK_POP 1 00017 IF_FALSE_GOTO 18 00018 STACK_PUSH reg#5 00019 CALL_FUNCTION max(reg#5, reg#4) -> reg#3 00020 STACK_POP 2 00021 IF_FALSE_GOTO 24 00022 DIVIDE reg#0 / reg#3 -> reg#6 00023 ANY_LT reg#6 < 0.01 <FT_DOUBLE> 00024 RETURN We now use a stack to pass arguments to the function. The stack is implemented as a list of lists (list of registers). Arguments may still be non-existent to functions (this is a feature). Functions must check for nil arguments (NULL lists) and handle that case. It's somewhat complicated to allow literal values and test compatibility for different types, both because of lack of type information with unparsed/literal and also because it is an underdeveloped area in the code. In my limited testing it was good enough and useful, further enhancements are left for future work.
2022-04-16 01:42:20 +00:00
case DFVM_ALL_EQ:
2023-01-10 15:42:32 +00:00
wmem_strbuf_append_printf(buf, "%s%s === %s%s",
arg1_str, arg1_str_type, arg2_str, arg2_str_type);
break;
case DFVM_ANY_EQ:
2023-01-10 15:42:32 +00:00
wmem_strbuf_append_printf(buf, "%s%s == %s%s",
arg1_str, arg1_str_type, arg2_str, arg2_str_type);
break;
case DFVM_ALL_NE:
2023-01-10 15:42:32 +00:00
wmem_strbuf_append_printf(buf, "%s%s != %s%s",
arg1_str, arg1_str_type, arg2_str, arg2_str_type);
break;
case DFVM_ANY_NE:
2023-01-10 15:42:32 +00:00
wmem_strbuf_append_printf(buf, "%s%s !== %s%s",
arg1_str, arg1_str_type, arg2_str, arg2_str_type);
break;
case DFVM_ALL_GT:
case DFVM_ANY_GT:
2023-01-10 15:42:32 +00:00
wmem_strbuf_append_printf(buf, "%s%s > %s%s",
arg1_str, arg1_str_type, arg2_str, arg2_str_type);
break;
case DFVM_ALL_GE:
case DFVM_ANY_GE:
2023-01-10 15:42:32 +00:00
wmem_strbuf_append_printf(buf, "%s%s >= %s%s",
arg1_str, arg1_str_type, arg2_str, arg2_str_type);
break;
dfilter: Fix "!=" relation to be free of contradictions Wireshark defines the relation of equality A == B as A any_eq B <=> An == Bn for at least one An, Bn. More accurately I think this is (formally) an equivalence relation, not true equality. Whichever definition for "==" we choose we must keep the definition of "!=" as !(A == B), otherwise it will lead to logical contradictions like (A == B) AND (A != B) being true. Fix the '!=' relation to match the definition of equality: A != B <=> !(A == B) <=> A all_ne B <=> An != Bn, for every n. This has been the recomended way to write "not equal" for a long time in the documentation, even to the point where != was deprecated, but it just wasn't implemented consistently in the language, which has understandably been a persistent source of confusion. Even a field that is normally well-behaved with "!=" like "ip.src" or "ip.dst" will produce unexpected results with encapsulations like IP-over-IP. The opcode ALL_NE could have been implemented in the compiler instead using NOT and ANY_EQ but I chose to implement it in bytecode. It just seemed more elegant and efficient but the difference was not very significant. Keep around "~=" for any_ne relation, in case someone depends on that, and because we don't have an operator for true equality: A strict_equal B <=> A all_eq B <=> !(A any_ne B). If there is only one value then any_ne and all_ne are the same comparison operation. Implementing this change did not require fixing any tests so it is unlikely the relation "~=" (any_ne) will be very useful. Note that the behaviour of the '<' (less than) comparison relation is a separate, more subtle issue. In the general case the definition of '<' that is used is only a partial order.
2021-10-18 20:07:06 +00:00
case DFVM_ALL_LT:
case DFVM_ANY_LT:
2023-01-10 15:42:32 +00:00
wmem_strbuf_append_printf(buf, "%s%s < %s%s",
arg1_str, arg1_str_type, arg2_str, arg2_str_type);
break;
case DFVM_ALL_LE:
case DFVM_ANY_LE:
2023-01-10 15:42:32 +00:00
wmem_strbuf_append_printf(buf, "%s%s <= %s%s",
arg1_str, arg1_str_type, arg2_str, arg2_str_type);
break;
case DFVM_NOT_ALL_ZERO:
2023-01-10 15:42:32 +00:00
wmem_strbuf_append_printf(buf, "%s%s",
arg1_str, arg1_str_type);
break;
case DFVM_ALL_CONTAINS:
case DFVM_ANY_CONTAINS:
2023-01-10 15:42:32 +00:00
wmem_strbuf_append_printf(buf, "%s%s contains %s%s",
arg1_str, arg1_str_type, arg2_str, arg2_str_type);
break;
case DFVM_ALL_MATCHES:
case DFVM_ANY_MATCHES:
2023-01-10 15:42:32 +00:00
wmem_strbuf_append_printf(buf, "%s%s matches %s%s",
arg1_str, arg1_str_type, arg2_str, arg2_str_type);
break;
case DFVM_ALL_IN_RANGE:
case DFVM_ANY_IN_RANGE:
2023-01-10 15:42:32 +00:00
wmem_strbuf_append_printf(buf, "%s%s in { %s%s .. %s%s }",
arg1_str, arg1_str_type, arg2_str, arg2_str_type, arg3_str, arg3_str_type);
break;
case DFVM_BITWISE_AND:
2023-01-10 15:42:32 +00:00
wmem_strbuf_append_printf(buf, "%s%s & %s%s",
arg1_str, arg1_str_type, arg2_str, arg2_str_type);
indent2(buf, col_start);
append_to_register(buf, arg3_str);
break;
case DFVM_UNARY_MINUS:
2023-01-10 15:42:32 +00:00
wmem_strbuf_append_printf(buf, "-%s%s",
arg1_str, arg1_str_type);
indent2(buf, col_start);
append_to_register(buf, arg2_str);
break;
case DFVM_ADD:
2023-01-10 15:42:32 +00:00
wmem_strbuf_append_printf(buf, "%s%s + %s%s",
arg1_str, arg1_str_type, arg2_str, arg2_str_type);
indent2(buf, col_start);
append_to_register(buf, arg3_str);
break;
case DFVM_SUBTRACT:
2023-01-10 15:42:32 +00:00
wmem_strbuf_append_printf(buf, "%s%s - %s%s",
arg1_str, arg1_str_type, arg2_str, arg2_str_type);
indent2(buf, col_start);
append_to_register(buf, arg3_str);
break;
case DFVM_MULTIPLY:
2023-01-10 15:42:32 +00:00
wmem_strbuf_append_printf(buf, "%s%s * %s%s",
arg1_str, arg1_str_type, arg2_str, arg2_str_type);
indent2(buf, col_start);
append_to_register(buf, arg3_str);
break;
case DFVM_DIVIDE:
2023-01-10 15:42:32 +00:00
wmem_strbuf_append_printf(buf, "%s%s / %s%s",
arg1_str, arg1_str_type, arg2_str, arg2_str_type);
indent2(buf, col_start);
append_to_register(buf, arg3_str);
break;
case DFVM_MODULO:
2023-01-10 15:42:32 +00:00
wmem_strbuf_append_printf(buf, "%s%s %% %s%s",
arg1_str, arg1_str_type, arg2_str, arg2_str_type);
indent2(buf, col_start);
append_to_register(buf, arg3_str);
break;
case DFVM_IF_TRUE_GOTO:
case DFVM_IF_FALSE_GOTO:
wmem_strbuf_append_printf(buf, "%u", arg1->value.numeric);
break;
case DFVM_NOT:
case DFVM_RETURN:
ws_assert_not_reached();
}
g_free(arg1_str);
g_free(arg2_str);
g_free(arg3_str);
2023-01-10 15:42:32 +00:00
g_free(arg1_str_type);
g_free(arg2_str_type);
g_free(arg3_str_type);
}
static void
append_references(wmem_strbuf_t *buf, GHashTable *references, gboolean raw)
{
GHashTableIter ref_iter;
gpointer key, value;
char *str;
guint i;
2022-03-21 11:13:02 +00:00
g_hash_table_iter_init(&ref_iter, references);
while (g_hash_table_iter_next(&ref_iter, &key, &value)) {
const char *abbrev = ((header_field_info *)key)->abbrev;
GPtrArray *refs_array = value;
df_reference_t *ref;
if (raw)
wmem_strbuf_append_printf(buf, " ${@%s} = {", abbrev);
else
wmem_strbuf_append_printf(buf, " ${%s} = {", abbrev);
for (i = 0; i < refs_array->len; i++) {
if (i != 0) {
wmem_strbuf_append(buf, ", ");
}
ref = refs_array->pdata[i];
str = fvalue_to_debug_repr(NULL, ref->value);
wmem_strbuf_append_printf(buf, "%s <%s>", str, fvalue_type_name(ref->value));
g_free(str);
}
wmem_strbuf_append(buf, "}\n");
}
}
char *
2023-01-10 15:42:32 +00:00
dfvm_dump_str(wmem_allocator_t *alloc, dfilter_t *df, uint16_t flags)
{
int id, length;
dfvm_insn_t *insn;
wmem_strbuf_t *buf;
GSList *stack_print = NULL;
size_t col_start;
buf = wmem_strbuf_new(alloc, NULL);
2022-03-27 15:38:39 +00:00
2023-01-10 15:42:32 +00:00
if ((flags & DF_DUMP_REFERENCES) && g_hash_table_size(df->references) > 0) {
wmem_strbuf_append(buf, "References:\n");
append_references(buf, df->references, FALSE);
wmem_strbuf_append_c(buf, '\n');
dfilter: Refactor macro tree references This replaces the current macro reference system with a completely different implementation. Instead of a macro a reference is a syntax element. A reference is a constant that can be filled in the dfilter code after compilation from an existing protocol tree. It is best understood as a field value that can be read from a fixed tree that is not the frame being filtered. Usually this fixed tree is the currently selected frame when the filter is applied. This allows comparing fields in the filtered frame with fields in the selected frame. Because the field reference syntax uses the same sigil notation as a macro we have to use a heuristic to distinguish them: if the name has a dot it is a field reference, otherwise it is a macro name. The reference is synctatically validated at compile time. There are two main advantages to this implementation (and a couple of minor ones): The protocol tree for each selected frame is only walked if we have a display filter and if the display filter uses references. Also only the actual reference values are copied, intead of loading the entire tree into a hash table (in textual form even). The other advantage is that the reference is tested like a protocol field against all the values in the selected frame (if there is more than one). Currently the reference fields are not "primed" during dissection, so the entire tree is walked to find a particular reference (this is similar to the previous implementation). If the display filter contains a valid reference and the reference is not loaded at the time the filter is run the result is the same as a non existing field for a regular READ_TREE instruction. Fixes #17599.
2022-03-27 14:26:46 +00:00
}
2023-01-10 15:42:32 +00:00
if ((flags & DF_DUMP_REFERENCES) && g_hash_table_size(df->raw_references) > 0) {
wmem_strbuf_append(buf, "Raw references:\n");
append_references(buf, df->raw_references, TRUE);
wmem_strbuf_append_c(buf, '\n');
}
wmem_strbuf_append(buf, "Instructions:");
length = df->insns->len;
for (id = 0; id < length; id++) {
insn = g_ptr_array_index(df->insns, id);
col_start = buf->len;
wmem_strbuf_append_printf(buf, "\n %04d %s", id, dfvm_opcode_tostr(insn->op));
switch (insn->op) {
case DFVM_NOT:
case DFVM_RETURN:
/* Nothing here */
break;
default:
indent1(buf, col_start);
2023-01-10 15:42:32 +00:00
append_op_args(buf, insn, &stack_print, flags);
break;
}
}
2022-03-27 15:38:39 +00:00
return wmem_strbuf_finalize(buf);
}
void
2023-01-10 15:42:32 +00:00
dfvm_dump(FILE *f, dfilter_t *df, uint16_t flags)
2022-03-27 15:38:39 +00:00
{
2023-01-10 15:42:32 +00:00
char *str = dfvm_dump_str(NULL, df, flags);
2022-03-27 15:38:39 +00:00
fputs(str, f);
fputc('\n', f);
2022-03-27 15:38:39 +00:00
wmem_free(NULL, str);
}
static int
compare_finfo_layer(gconstpointer _a, gconstpointer _b)
{
const field_info *a = *(const field_info **)_a;
const field_info *b = *(const field_info **)_b;
return a->proto_layer_num - b->proto_layer_num;
}
static gboolean
drange_contains_layer(drange_t *dr, int num, int length)
{
drange_node *rn;
GSList *list = dr->range_list;
int lower, upper;
while (list) {
rn = list->data;
lower = rn->start_offset;
if (lower < 0) {
lower += length + 1;
}
if (rn->ending == DRANGE_NODE_END_T_LENGTH) {
upper = lower + rn->length - 1;
}
else if (rn->ending == DRANGE_NODE_END_T_OFFSET) {
upper = rn->end_offset;
}
else if (rn->ending == DRANGE_NODE_END_T_TO_THE_END) {
upper = INT_MAX;
}
else {
ws_assert_not_reached();
}
if (num >= lower && num <= upper) { /* inclusive */
return TRUE;
}
list = g_slist_next(list);
}
return FALSE;
}
fvalue_t *
dfvm_get_raw_fvalue(const field_info *fi)
dfilter: Add support for raw (bytes) addressing mode This adds new syntax to read a field from the tree as bytes, instead of the actual type. This is a useful extension for example to match matformed strings that contain unicode replacement characters. In this case it is not possible to match the raw value of the malformed string field. This extension fills this need and is generic enough that it should be useful in many other situations. The syntax used is to prefix the field name with "@". The following artificial example tests if the HTTP user agent contains a particular invalid UTF-8 sequence: @http.user_agent == "Mozill\xAA" Where simply using "http.user_agent" won't work because the invalid byte sequence will have been replaced with U+FFFD. Considering the following programs: $ dftest '_ws.ftypes.string == "ABC"' Filter: _ws.ftypes.string == "ABC" Syntax tree: 0 TEST_ANY_EQ: 1 FIELD(_ws.ftypes.string <FT_STRING>) 1 FVALUE("ABC" <FT_STRING>) Instructions: 00000 READ_TREE _ws.ftypes.string <FT_STRING> -> reg#0 00001 IF_FALSE_GOTO 3 00002 ANY_EQ reg#0 == "ABC" <FT_STRING> 00003 RETURN $ dftest '@_ws.ftypes.string == "ABC"' Filter: @_ws.ftypes.string == "ABC" Syntax tree: 0 TEST_ANY_EQ: 1 FIELD(_ws.ftypes.string <RAW>) 1 FVALUE(41:42:43 <FT_BYTES>) Instructions: 00000 READ_TREE @_ws.ftypes.string <FT_BYTES> -> reg#0 00001 IF_FALSE_GOTO 3 00002 ANY_EQ reg#0 == 41:42:43 <FT_BYTES> 00003 RETURN In the second case the field has a "raw" type, that equates directly to FT_BYTES, and the field value is read from the protocol raw data.
2022-10-25 03:20:18 +00:00
{
GByteArray *bytes;
fvalue_t *fv;
int length, tvb_length;
/*
* XXX - a field can have a length that runs past
* the end of the tvbuff. Ideally, that should
* be fixed when adding an item to the protocol
* tree, but checking the length when doing
* that could be expensive. Until we fix that,
* we'll do the check here.
*/
tvb_length = tvb_captured_length_remaining(fi->ds_tvb, fi->start);
if (tvb_length < 0) {
return NULL;
}
length = fi->length;
if (length > tvb_length)
length = tvb_length;
bytes = g_byte_array_new();
g_byte_array_append(bytes, tvb_get_ptr(fi->ds_tvb, fi->start, length), length);
fv = fvalue_new(FT_BYTES);
fvalue_set_byte_array(fv, bytes);
return fv;
}
static GSList *
dfilter: Add support for raw (bytes) addressing mode This adds new syntax to read a field from the tree as bytes, instead of the actual type. This is a useful extension for example to match matformed strings that contain unicode replacement characters. In this case it is not possible to match the raw value of the malformed string field. This extension fills this need and is generic enough that it should be useful in many other situations. The syntax used is to prefix the field name with "@". The following artificial example tests if the HTTP user agent contains a particular invalid UTF-8 sequence: @http.user_agent == "Mozill\xAA" Where simply using "http.user_agent" won't work because the invalid byte sequence will have been replaced with U+FFFD. Considering the following programs: $ dftest '_ws.ftypes.string == "ABC"' Filter: _ws.ftypes.string == "ABC" Syntax tree: 0 TEST_ANY_EQ: 1 FIELD(_ws.ftypes.string <FT_STRING>) 1 FVALUE("ABC" <FT_STRING>) Instructions: 00000 READ_TREE _ws.ftypes.string <FT_STRING> -> reg#0 00001 IF_FALSE_GOTO 3 00002 ANY_EQ reg#0 == "ABC" <FT_STRING> 00003 RETURN $ dftest '@_ws.ftypes.string == "ABC"' Filter: @_ws.ftypes.string == "ABC" Syntax tree: 0 TEST_ANY_EQ: 1 FIELD(_ws.ftypes.string <RAW>) 1 FVALUE(41:42:43 <FT_BYTES>) Instructions: 00000 READ_TREE @_ws.ftypes.string <FT_BYTES> -> reg#0 00001 IF_FALSE_GOTO 3 00002 ANY_EQ reg#0 == 41:42:43 <FT_BYTES> 00003 RETURN In the second case the field has a "raw" type, that equates directly to FT_BYTES, and the field value is read from the protocol raw data.
2022-10-25 03:20:18 +00:00
filter_finfo_fvalues(GSList *fvalues, GPtrArray *finfos, drange_t *range, gboolean raw)
{
int length; /* maximum proto layer number. The numbers are sequential. */
field_info *last_finfo, *finfo;
dfilter: Add support for raw (bytes) addressing mode This adds new syntax to read a field from the tree as bytes, instead of the actual type. This is a useful extension for example to match matformed strings that contain unicode replacement characters. In this case it is not possible to match the raw value of the malformed string field. This extension fills this need and is generic enough that it should be useful in many other situations. The syntax used is to prefix the field name with "@". The following artificial example tests if the HTTP user agent contains a particular invalid UTF-8 sequence: @http.user_agent == "Mozill\xAA" Where simply using "http.user_agent" won't work because the invalid byte sequence will have been replaced with U+FFFD. Considering the following programs: $ dftest '_ws.ftypes.string == "ABC"' Filter: _ws.ftypes.string == "ABC" Syntax tree: 0 TEST_ANY_EQ: 1 FIELD(_ws.ftypes.string <FT_STRING>) 1 FVALUE("ABC" <FT_STRING>) Instructions: 00000 READ_TREE _ws.ftypes.string <FT_STRING> -> reg#0 00001 IF_FALSE_GOTO 3 00002 ANY_EQ reg#0 == "ABC" <FT_STRING> 00003 RETURN $ dftest '@_ws.ftypes.string == "ABC"' Filter: @_ws.ftypes.string == "ABC" Syntax tree: 0 TEST_ANY_EQ: 1 FIELD(_ws.ftypes.string <RAW>) 1 FVALUE(41:42:43 <FT_BYTES>) Instructions: 00000 READ_TREE @_ws.ftypes.string <FT_BYTES> -> reg#0 00001 IF_FALSE_GOTO 3 00002 ANY_EQ reg#0 == 41:42:43 <FT_BYTES> 00003 RETURN In the second case the field has a "raw" type, that equates directly to FT_BYTES, and the field value is read from the protocol raw data.
2022-10-25 03:20:18 +00:00
fvalue_t *fv;
int cookie = -1;
gboolean cookie_matches = false;
int layer;
g_ptr_array_sort(finfos, compare_finfo_layer);
last_finfo = finfos->pdata[finfos->len - 1];
length = last_finfo->proto_layer_num;
for (guint i = 0; i < finfos->len; i++) {
finfo = finfos->pdata[i];
layer = finfo->proto_layer_num;
if (cookie == layer) {
if (cookie_matches) {
dfilter: Add support for raw (bytes) addressing mode This adds new syntax to read a field from the tree as bytes, instead of the actual type. This is a useful extension for example to match matformed strings that contain unicode replacement characters. In this case it is not possible to match the raw value of the malformed string field. This extension fills this need and is generic enough that it should be useful in many other situations. The syntax used is to prefix the field name with "@". The following artificial example tests if the HTTP user agent contains a particular invalid UTF-8 sequence: @http.user_agent == "Mozill\xAA" Where simply using "http.user_agent" won't work because the invalid byte sequence will have been replaced with U+FFFD. Considering the following programs: $ dftest '_ws.ftypes.string == "ABC"' Filter: _ws.ftypes.string == "ABC" Syntax tree: 0 TEST_ANY_EQ: 1 FIELD(_ws.ftypes.string <FT_STRING>) 1 FVALUE("ABC" <FT_STRING>) Instructions: 00000 READ_TREE _ws.ftypes.string <FT_STRING> -> reg#0 00001 IF_FALSE_GOTO 3 00002 ANY_EQ reg#0 == "ABC" <FT_STRING> 00003 RETURN $ dftest '@_ws.ftypes.string == "ABC"' Filter: @_ws.ftypes.string == "ABC" Syntax tree: 0 TEST_ANY_EQ: 1 FIELD(_ws.ftypes.string <RAW>) 1 FVALUE(41:42:43 <FT_BYTES>) Instructions: 00000 READ_TREE @_ws.ftypes.string <FT_BYTES> -> reg#0 00001 IF_FALSE_GOTO 3 00002 ANY_EQ reg#0 == 41:42:43 <FT_BYTES> 00003 RETURN In the second case the field has a "raw" type, that equates directly to FT_BYTES, and the field value is read from the protocol raw data.
2022-10-25 03:20:18 +00:00
if (raw)
fv = dfvm_get_raw_fvalue(finfo);
dfilter: Add support for raw (bytes) addressing mode This adds new syntax to read a field from the tree as bytes, instead of the actual type. This is a useful extension for example to match matformed strings that contain unicode replacement characters. In this case it is not possible to match the raw value of the malformed string field. This extension fills this need and is generic enough that it should be useful in many other situations. The syntax used is to prefix the field name with "@". The following artificial example tests if the HTTP user agent contains a particular invalid UTF-8 sequence: @http.user_agent == "Mozill\xAA" Where simply using "http.user_agent" won't work because the invalid byte sequence will have been replaced with U+FFFD. Considering the following programs: $ dftest '_ws.ftypes.string == "ABC"' Filter: _ws.ftypes.string == "ABC" Syntax tree: 0 TEST_ANY_EQ: 1 FIELD(_ws.ftypes.string <FT_STRING>) 1 FVALUE("ABC" <FT_STRING>) Instructions: 00000 READ_TREE _ws.ftypes.string <FT_STRING> -> reg#0 00001 IF_FALSE_GOTO 3 00002 ANY_EQ reg#0 == "ABC" <FT_STRING> 00003 RETURN $ dftest '@_ws.ftypes.string == "ABC"' Filter: @_ws.ftypes.string == "ABC" Syntax tree: 0 TEST_ANY_EQ: 1 FIELD(_ws.ftypes.string <RAW>) 1 FVALUE(41:42:43 <FT_BYTES>) Instructions: 00000 READ_TREE @_ws.ftypes.string <FT_BYTES> -> reg#0 00001 IF_FALSE_GOTO 3 00002 ANY_EQ reg#0 == 41:42:43 <FT_BYTES> 00003 RETURN In the second case the field has a "raw" type, that equates directly to FT_BYTES, and the field value is read from the protocol raw data.
2022-10-25 03:20:18 +00:00
else
fv = &finfo->value;
fvalues = g_slist_prepend(fvalues, fv);
}
}
else {
cookie = layer;
cookie_matches = drange_contains_layer(range, layer, length);
if (cookie_matches) {
dfilter: Add support for raw (bytes) addressing mode This adds new syntax to read a field from the tree as bytes, instead of the actual type. This is a useful extension for example to match matformed strings that contain unicode replacement characters. In this case it is not possible to match the raw value of the malformed string field. This extension fills this need and is generic enough that it should be useful in many other situations. The syntax used is to prefix the field name with "@". The following artificial example tests if the HTTP user agent contains a particular invalid UTF-8 sequence: @http.user_agent == "Mozill\xAA" Where simply using "http.user_agent" won't work because the invalid byte sequence will have been replaced with U+FFFD. Considering the following programs: $ dftest '_ws.ftypes.string == "ABC"' Filter: _ws.ftypes.string == "ABC" Syntax tree: 0 TEST_ANY_EQ: 1 FIELD(_ws.ftypes.string <FT_STRING>) 1 FVALUE("ABC" <FT_STRING>) Instructions: 00000 READ_TREE _ws.ftypes.string <FT_STRING> -> reg#0 00001 IF_FALSE_GOTO 3 00002 ANY_EQ reg#0 == "ABC" <FT_STRING> 00003 RETURN $ dftest '@_ws.ftypes.string == "ABC"' Filter: @_ws.ftypes.string == "ABC" Syntax tree: 0 TEST_ANY_EQ: 1 FIELD(_ws.ftypes.string <RAW>) 1 FVALUE(41:42:43 <FT_BYTES>) Instructions: 00000 READ_TREE @_ws.ftypes.string <FT_BYTES> -> reg#0 00001 IF_FALSE_GOTO 3 00002 ANY_EQ reg#0 == 41:42:43 <FT_BYTES> 00003 RETURN In the second case the field has a "raw" type, that equates directly to FT_BYTES, and the field value is read from the protocol raw data.
2022-10-25 03:20:18 +00:00
if (raw)
fv = dfvm_get_raw_fvalue(finfo);
dfilter: Add support for raw (bytes) addressing mode This adds new syntax to read a field from the tree as bytes, instead of the actual type. This is a useful extension for example to match matformed strings that contain unicode replacement characters. In this case it is not possible to match the raw value of the malformed string field. This extension fills this need and is generic enough that it should be useful in many other situations. The syntax used is to prefix the field name with "@". The following artificial example tests if the HTTP user agent contains a particular invalid UTF-8 sequence: @http.user_agent == "Mozill\xAA" Where simply using "http.user_agent" won't work because the invalid byte sequence will have been replaced with U+FFFD. Considering the following programs: $ dftest '_ws.ftypes.string == "ABC"' Filter: _ws.ftypes.string == "ABC" Syntax tree: 0 TEST_ANY_EQ: 1 FIELD(_ws.ftypes.string <FT_STRING>) 1 FVALUE("ABC" <FT_STRING>) Instructions: 00000 READ_TREE _ws.ftypes.string <FT_STRING> -> reg#0 00001 IF_FALSE_GOTO 3 00002 ANY_EQ reg#0 == "ABC" <FT_STRING> 00003 RETURN $ dftest '@_ws.ftypes.string == "ABC"' Filter: @_ws.ftypes.string == "ABC" Syntax tree: 0 TEST_ANY_EQ: 1 FIELD(_ws.ftypes.string <RAW>) 1 FVALUE(41:42:43 <FT_BYTES>) Instructions: 00000 READ_TREE @_ws.ftypes.string <FT_BYTES> -> reg#0 00001 IF_FALSE_GOTO 3 00002 ANY_EQ reg#0 == 41:42:43 <FT_BYTES> 00003 RETURN In the second case the field has a "raw" type, that equates directly to FT_BYTES, and the field value is read from the protocol raw data.
2022-10-25 03:20:18 +00:00
else
fv = &finfo->value;
fvalues = g_slist_prepend(fvalues, fv);
}
}
}
return fvalues;
}
/* Reads a field from the proto_tree and loads the fvalues into a register,
* if that field has not already been read. */
static gboolean
2022-03-21 12:19:54 +00:00
read_tree(dfilter_t *df, proto_tree *tree,
dfvm_value_t *arg1, dfvm_value_t *arg2,
dfvm_value_t *arg3)
{
GPtrArray *finfos;
field_info *finfo;
int i, len;
GSList *fvalues = NULL;
dfilter: Add support for raw (bytes) addressing mode This adds new syntax to read a field from the tree as bytes, instead of the actual type. This is a useful extension for example to match matformed strings that contain unicode replacement characters. In this case it is not possible to match the raw value of the malformed string field. This extension fills this need and is generic enough that it should be useful in many other situations. The syntax used is to prefix the field name with "@". The following artificial example tests if the HTTP user agent contains a particular invalid UTF-8 sequence: @http.user_agent == "Mozill\xAA" Where simply using "http.user_agent" won't work because the invalid byte sequence will have been replaced with U+FFFD. Considering the following programs: $ dftest '_ws.ftypes.string == "ABC"' Filter: _ws.ftypes.string == "ABC" Syntax tree: 0 TEST_ANY_EQ: 1 FIELD(_ws.ftypes.string <FT_STRING>) 1 FVALUE("ABC" <FT_STRING>) Instructions: 00000 READ_TREE _ws.ftypes.string <FT_STRING> -> reg#0 00001 IF_FALSE_GOTO 3 00002 ANY_EQ reg#0 == "ABC" <FT_STRING> 00003 RETURN $ dftest '@_ws.ftypes.string == "ABC"' Filter: @_ws.ftypes.string == "ABC" Syntax tree: 0 TEST_ANY_EQ: 1 FIELD(_ws.ftypes.string <RAW>) 1 FVALUE(41:42:43 <FT_BYTES>) Instructions: 00000 READ_TREE @_ws.ftypes.string <FT_BYTES> -> reg#0 00001 IF_FALSE_GOTO 3 00002 ANY_EQ reg#0 == 41:42:43 <FT_BYTES> 00003 RETURN In the second case the field has a "raw" type, that equates directly to FT_BYTES, and the field value is read from the protocol raw data.
2022-10-25 03:20:18 +00:00
fvalue_t *fv;
drange_t *range = NULL;
dfilter: Add support for raw (bytes) addressing mode This adds new syntax to read a field from the tree as bytes, instead of the actual type. This is a useful extension for example to match matformed strings that contain unicode replacement characters. In this case it is not possible to match the raw value of the malformed string field. This extension fills this need and is generic enough that it should be useful in many other situations. The syntax used is to prefix the field name with "@". The following artificial example tests if the HTTP user agent contains a particular invalid UTF-8 sequence: @http.user_agent == "Mozill\xAA" Where simply using "http.user_agent" won't work because the invalid byte sequence will have been replaced with U+FFFD. Considering the following programs: $ dftest '_ws.ftypes.string == "ABC"' Filter: _ws.ftypes.string == "ABC" Syntax tree: 0 TEST_ANY_EQ: 1 FIELD(_ws.ftypes.string <FT_STRING>) 1 FVALUE("ABC" <FT_STRING>) Instructions: 00000 READ_TREE _ws.ftypes.string <FT_STRING> -> reg#0 00001 IF_FALSE_GOTO 3 00002 ANY_EQ reg#0 == "ABC" <FT_STRING> 00003 RETURN $ dftest '@_ws.ftypes.string == "ABC"' Filter: @_ws.ftypes.string == "ABC" Syntax tree: 0 TEST_ANY_EQ: 1 FIELD(_ws.ftypes.string <RAW>) 1 FVALUE(41:42:43 <FT_BYTES>) Instructions: 00000 READ_TREE @_ws.ftypes.string <FT_BYTES> -> reg#0 00001 IF_FALSE_GOTO 3 00002 ANY_EQ reg#0 == 41:42:43 <FT_BYTES> 00003 RETURN In the second case the field has a "raw" type, that equates directly to FT_BYTES, and the field value is read from the protocol raw data.
2022-10-25 03:20:18 +00:00
gboolean raw;
2022-03-21 12:19:54 +00:00
header_field_info *hfinfo = arg1->value.hfinfo;
dfilter: Add support for raw (bytes) addressing mode This adds new syntax to read a field from the tree as bytes, instead of the actual type. This is a useful extension for example to match matformed strings that contain unicode replacement characters. In this case it is not possible to match the raw value of the malformed string field. This extension fills this need and is generic enough that it should be useful in many other situations. The syntax used is to prefix the field name with "@". The following artificial example tests if the HTTP user agent contains a particular invalid UTF-8 sequence: @http.user_agent == "Mozill\xAA" Where simply using "http.user_agent" won't work because the invalid byte sequence will have been replaced with U+FFFD. Considering the following programs: $ dftest '_ws.ftypes.string == "ABC"' Filter: _ws.ftypes.string == "ABC" Syntax tree: 0 TEST_ANY_EQ: 1 FIELD(_ws.ftypes.string <FT_STRING>) 1 FVALUE("ABC" <FT_STRING>) Instructions: 00000 READ_TREE _ws.ftypes.string <FT_STRING> -> reg#0 00001 IF_FALSE_GOTO 3 00002 ANY_EQ reg#0 == "ABC" <FT_STRING> 00003 RETURN $ dftest '@_ws.ftypes.string == "ABC"' Filter: @_ws.ftypes.string == "ABC" Syntax tree: 0 TEST_ANY_EQ: 1 FIELD(_ws.ftypes.string <RAW>) 1 FVALUE(41:42:43 <FT_BYTES>) Instructions: 00000 READ_TREE @_ws.ftypes.string <FT_BYTES> -> reg#0 00001 IF_FALSE_GOTO 3 00002 ANY_EQ reg#0 == 41:42:43 <FT_BYTES> 00003 RETURN In the second case the field has a "raw" type, that equates directly to FT_BYTES, and the field value is read from the protocol raw data.
2022-10-25 03:20:18 +00:00
raw = arg1->type == RAW_HFINFO;
2022-03-21 12:19:54 +00:00
int reg = arg2->value.numeric;
if (arg3) {
range = arg3->value.drange;
}
/* Already loaded in this run of the dfilter? */
if (df->attempted_load[reg]) {
if (df->registers[reg]) {
return TRUE;
}
else {
return FALSE;
}
}
df->attempted_load[reg] = TRUE;
while (hfinfo) {
finfos = proto_get_finfo_ptr_array(tree, hfinfo->id);
if ((finfos == NULL) || (g_ptr_array_len(finfos) == 0)) {
hfinfo = hfinfo->same_name_next;
continue;
}
if (range) {
dfilter: Add support for raw (bytes) addressing mode This adds new syntax to read a field from the tree as bytes, instead of the actual type. This is a useful extension for example to match matformed strings that contain unicode replacement characters. In this case it is not possible to match the raw value of the malformed string field. This extension fills this need and is generic enough that it should be useful in many other situations. The syntax used is to prefix the field name with "@". The following artificial example tests if the HTTP user agent contains a particular invalid UTF-8 sequence: @http.user_agent == "Mozill\xAA" Where simply using "http.user_agent" won't work because the invalid byte sequence will have been replaced with U+FFFD. Considering the following programs: $ dftest '_ws.ftypes.string == "ABC"' Filter: _ws.ftypes.string == "ABC" Syntax tree: 0 TEST_ANY_EQ: 1 FIELD(_ws.ftypes.string <FT_STRING>) 1 FVALUE("ABC" <FT_STRING>) Instructions: 00000 READ_TREE _ws.ftypes.string <FT_STRING> -> reg#0 00001 IF_FALSE_GOTO 3 00002 ANY_EQ reg#0 == "ABC" <FT_STRING> 00003 RETURN $ dftest '@_ws.ftypes.string == "ABC"' Filter: @_ws.ftypes.string == "ABC" Syntax tree: 0 TEST_ANY_EQ: 1 FIELD(_ws.ftypes.string <RAW>) 1 FVALUE(41:42:43 <FT_BYTES>) Instructions: 00000 READ_TREE @_ws.ftypes.string <FT_BYTES> -> reg#0 00001 IF_FALSE_GOTO 3 00002 ANY_EQ reg#0 == 41:42:43 <FT_BYTES> 00003 RETURN In the second case the field has a "raw" type, that equates directly to FT_BYTES, and the field value is read from the protocol raw data.
2022-10-25 03:20:18 +00:00
fvalues = filter_finfo_fvalues(fvalues, finfos, range, raw);
}
else {
len = finfos->len;
for (i = 0; i < len; i++) {
finfo = g_ptr_array_index(finfos, i);
dfilter: Add support for raw (bytes) addressing mode This adds new syntax to read a field from the tree as bytes, instead of the actual type. This is a useful extension for example to match matformed strings that contain unicode replacement characters. In this case it is not possible to match the raw value of the malformed string field. This extension fills this need and is generic enough that it should be useful in many other situations. The syntax used is to prefix the field name with "@". The following artificial example tests if the HTTP user agent contains a particular invalid UTF-8 sequence: @http.user_agent == "Mozill\xAA" Where simply using "http.user_agent" won't work because the invalid byte sequence will have been replaced with U+FFFD. Considering the following programs: $ dftest '_ws.ftypes.string == "ABC"' Filter: _ws.ftypes.string == "ABC" Syntax tree: 0 TEST_ANY_EQ: 1 FIELD(_ws.ftypes.string <FT_STRING>) 1 FVALUE("ABC" <FT_STRING>) Instructions: 00000 READ_TREE _ws.ftypes.string <FT_STRING> -> reg#0 00001 IF_FALSE_GOTO 3 00002 ANY_EQ reg#0 == "ABC" <FT_STRING> 00003 RETURN $ dftest '@_ws.ftypes.string == "ABC"' Filter: @_ws.ftypes.string == "ABC" Syntax tree: 0 TEST_ANY_EQ: 1 FIELD(_ws.ftypes.string <RAW>) 1 FVALUE(41:42:43 <FT_BYTES>) Instructions: 00000 READ_TREE @_ws.ftypes.string <FT_BYTES> -> reg#0 00001 IF_FALSE_GOTO 3 00002 ANY_EQ reg#0 == 41:42:43 <FT_BYTES> 00003 RETURN In the second case the field has a "raw" type, that equates directly to FT_BYTES, and the field value is read from the protocol raw data.
2022-10-25 03:20:18 +00:00
if (raw)
fv = dfvm_get_raw_fvalue(finfo);
dfilter: Add support for raw (bytes) addressing mode This adds new syntax to read a field from the tree as bytes, instead of the actual type. This is a useful extension for example to match matformed strings that contain unicode replacement characters. In this case it is not possible to match the raw value of the malformed string field. This extension fills this need and is generic enough that it should be useful in many other situations. The syntax used is to prefix the field name with "@". The following artificial example tests if the HTTP user agent contains a particular invalid UTF-8 sequence: @http.user_agent == "Mozill\xAA" Where simply using "http.user_agent" won't work because the invalid byte sequence will have been replaced with U+FFFD. Considering the following programs: $ dftest '_ws.ftypes.string == "ABC"' Filter: _ws.ftypes.string == "ABC" Syntax tree: 0 TEST_ANY_EQ: 1 FIELD(_ws.ftypes.string <FT_STRING>) 1 FVALUE("ABC" <FT_STRING>) Instructions: 00000 READ_TREE _ws.ftypes.string <FT_STRING> -> reg#0 00001 IF_FALSE_GOTO 3 00002 ANY_EQ reg#0 == "ABC" <FT_STRING> 00003 RETURN $ dftest '@_ws.ftypes.string == "ABC"' Filter: @_ws.ftypes.string == "ABC" Syntax tree: 0 TEST_ANY_EQ: 1 FIELD(_ws.ftypes.string <RAW>) 1 FVALUE(41:42:43 <FT_BYTES>) Instructions: 00000 READ_TREE @_ws.ftypes.string <FT_BYTES> -> reg#0 00001 IF_FALSE_GOTO 3 00002 ANY_EQ reg#0 == 41:42:43 <FT_BYTES> 00003 RETURN In the second case the field has a "raw" type, that equates directly to FT_BYTES, and the field value is read from the protocol raw data.
2022-10-25 03:20:18 +00:00
else
fv = &finfo->value;
fvalues = g_slist_prepend(fvalues, fv);
}
}
hfinfo = hfinfo->same_name_next;
}
if (fvalues == NULL) {
return FALSE;
}
df->registers[reg] = fvalues;
dfilter: Add support for raw (bytes) addressing mode This adds new syntax to read a field from the tree as bytes, instead of the actual type. This is a useful extension for example to match matformed strings that contain unicode replacement characters. In this case it is not possible to match the raw value of the malformed string field. This extension fills this need and is generic enough that it should be useful in many other situations. The syntax used is to prefix the field name with "@". The following artificial example tests if the HTTP user agent contains a particular invalid UTF-8 sequence: @http.user_agent == "Mozill\xAA" Where simply using "http.user_agent" won't work because the invalid byte sequence will have been replaced with U+FFFD. Considering the following programs: $ dftest '_ws.ftypes.string == "ABC"' Filter: _ws.ftypes.string == "ABC" Syntax tree: 0 TEST_ANY_EQ: 1 FIELD(_ws.ftypes.string <FT_STRING>) 1 FVALUE("ABC" <FT_STRING>) Instructions: 00000 READ_TREE _ws.ftypes.string <FT_STRING> -> reg#0 00001 IF_FALSE_GOTO 3 00002 ANY_EQ reg#0 == "ABC" <FT_STRING> 00003 RETURN $ dftest '@_ws.ftypes.string == "ABC"' Filter: @_ws.ftypes.string == "ABC" Syntax tree: 0 TEST_ANY_EQ: 1 FIELD(_ws.ftypes.string <RAW>) 1 FVALUE(41:42:43 <FT_BYTES>) Instructions: 00000 READ_TREE @_ws.ftypes.string <FT_BYTES> -> reg#0 00001 IF_FALSE_GOTO 3 00002 ANY_EQ reg#0 == 41:42:43 <FT_BYTES> 00003 RETURN In the second case the field has a "raw" type, that equates directly to FT_BYTES, and the field value is read from the protocol raw data.
2022-10-25 03:20:18 +00:00
if (raw) {
df->free_registers[reg] = (GDestroyNotify)fvalue_free;
}
else {
// These values are referenced only, do not try to free it later.
df->free_registers[reg] = NULL;
}
return TRUE;
}
static GSList *
filter_refs_fvalues(GPtrArray *refs_array, drange_t *range)
{
int length; /* maximum proto layer number. The numbers are sequential. */
df_reference_t *last_ref = NULL;
int cookie = -1;
gboolean cookie_matches = false;
GSList *fvalues = NULL;
if (!refs_array || refs_array->len == 0) {
return fvalues;
}
/* refs array is sorted. */
last_ref = refs_array->pdata[refs_array->len - 1];
length = last_ref->proto_layer_num;
for (guint i = 0; i < refs_array->len; i++) {
df_reference_t *ref = refs_array->pdata[i];
int layer = ref->proto_layer_num;
if (range == NULL) {
fvalues = g_slist_prepend(fvalues, ref->value);
continue;
}
if (cookie == layer) {
if (cookie_matches) {
fvalues = g_slist_prepend(fvalues, ref->value);
}
}
else {
cookie = layer;
cookie_matches = drange_contains_layer(range, layer, length);
if (cookie_matches) {
fvalues = g_slist_prepend(fvalues, ref->value);
}
}
}
return fvalues;
}
dfilter: Refactor macro tree references This replaces the current macro reference system with a completely different implementation. Instead of a macro a reference is a syntax element. A reference is a constant that can be filled in the dfilter code after compilation from an existing protocol tree. It is best understood as a field value that can be read from a fixed tree that is not the frame being filtered. Usually this fixed tree is the currently selected frame when the filter is applied. This allows comparing fields in the filtered frame with fields in the selected frame. Because the field reference syntax uses the same sigil notation as a macro we have to use a heuristic to distinguish them: if the name has a dot it is a field reference, otherwise it is a macro name. The reference is synctatically validated at compile time. There are two main advantages to this implementation (and a couple of minor ones): The protocol tree for each selected frame is only walked if we have a display filter and if the display filter uses references. Also only the actual reference values are copied, intead of loading the entire tree into a hash table (in textual form even). The other advantage is that the reference is tested like a protocol field against all the values in the selected frame (if there is more than one). Currently the reference fields are not "primed" during dissection, so the entire tree is walked to find a particular reference (this is similar to the previous implementation). If the display filter contains a valid reference and the reference is not loaded at the time the filter is run the result is the same as a non existing field for a regular READ_TREE instruction. Fixes #17599.
2022-03-27 14:26:46 +00:00
static gboolean
read_reference(dfilter_t *df, dfvm_value_t *arg1, dfvm_value_t *arg2,
dfvm_value_t *arg3)
dfilter: Refactor macro tree references This replaces the current macro reference system with a completely different implementation. Instead of a macro a reference is a syntax element. A reference is a constant that can be filled in the dfilter code after compilation from an existing protocol tree. It is best understood as a field value that can be read from a fixed tree that is not the frame being filtered. Usually this fixed tree is the currently selected frame when the filter is applied. This allows comparing fields in the filtered frame with fields in the selected frame. Because the field reference syntax uses the same sigil notation as a macro we have to use a heuristic to distinguish them: if the name has a dot it is a field reference, otherwise it is a macro name. The reference is synctatically validated at compile time. There are two main advantages to this implementation (and a couple of minor ones): The protocol tree for each selected frame is only walked if we have a display filter and if the display filter uses references. Also only the actual reference values are copied, intead of loading the entire tree into a hash table (in textual form even). The other advantage is that the reference is tested like a protocol field against all the values in the selected frame (if there is more than one). Currently the reference fields are not "primed" during dissection, so the entire tree is walked to find a particular reference (this is similar to the previous implementation). If the display filter contains a valid reference and the reference is not loaded at the time the filter is run the result is the same as a non existing field for a regular READ_TREE instruction. Fixes #17599.
2022-03-27 14:26:46 +00:00
{
GPtrArray *refs;
drange_t *range = NULL;
gboolean raw;
dfilter: Refactor macro tree references This replaces the current macro reference system with a completely different implementation. Instead of a macro a reference is a syntax element. A reference is a constant that can be filled in the dfilter code after compilation from an existing protocol tree. It is best understood as a field value that can be read from a fixed tree that is not the frame being filtered. Usually this fixed tree is the currently selected frame when the filter is applied. This allows comparing fields in the filtered frame with fields in the selected frame. Because the field reference syntax uses the same sigil notation as a macro we have to use a heuristic to distinguish them: if the name has a dot it is a field reference, otherwise it is a macro name. The reference is synctatically validated at compile time. There are two main advantages to this implementation (and a couple of minor ones): The protocol tree for each selected frame is only walked if we have a display filter and if the display filter uses references. Also only the actual reference values are copied, intead of loading the entire tree into a hash table (in textual form even). The other advantage is that the reference is tested like a protocol field against all the values in the selected frame (if there is more than one). Currently the reference fields are not "primed" during dissection, so the entire tree is walked to find a particular reference (this is similar to the previous implementation). If the display filter contains a valid reference and the reference is not loaded at the time the filter is run the result is the same as a non existing field for a regular READ_TREE instruction. Fixes #17599.
2022-03-27 14:26:46 +00:00
header_field_info *hfinfo = arg1->value.hfinfo;
raw = arg1->type == RAW_HFINFO;
dfilter: Refactor macro tree references This replaces the current macro reference system with a completely different implementation. Instead of a macro a reference is a syntax element. A reference is a constant that can be filled in the dfilter code after compilation from an existing protocol tree. It is best understood as a field value that can be read from a fixed tree that is not the frame being filtered. Usually this fixed tree is the currently selected frame when the filter is applied. This allows comparing fields in the filtered frame with fields in the selected frame. Because the field reference syntax uses the same sigil notation as a macro we have to use a heuristic to distinguish them: if the name has a dot it is a field reference, otherwise it is a macro name. The reference is synctatically validated at compile time. There are two main advantages to this implementation (and a couple of minor ones): The protocol tree for each selected frame is only walked if we have a display filter and if the display filter uses references. Also only the actual reference values are copied, intead of loading the entire tree into a hash table (in textual form even). The other advantage is that the reference is tested like a protocol field against all the values in the selected frame (if there is more than one). Currently the reference fields are not "primed" during dissection, so the entire tree is walked to find a particular reference (this is similar to the previous implementation). If the display filter contains a valid reference and the reference is not loaded at the time the filter is run the result is the same as a non existing field for a regular READ_TREE instruction. Fixes #17599.
2022-03-27 14:26:46 +00:00
int reg = arg2->value.numeric;
if (arg3) {
range = arg3->value.drange;
}
dfilter: Refactor macro tree references This replaces the current macro reference system with a completely different implementation. Instead of a macro a reference is a syntax element. A reference is a constant that can be filled in the dfilter code after compilation from an existing protocol tree. It is best understood as a field value that can be read from a fixed tree that is not the frame being filtered. Usually this fixed tree is the currently selected frame when the filter is applied. This allows comparing fields in the filtered frame with fields in the selected frame. Because the field reference syntax uses the same sigil notation as a macro we have to use a heuristic to distinguish them: if the name has a dot it is a field reference, otherwise it is a macro name. The reference is synctatically validated at compile time. There are two main advantages to this implementation (and a couple of minor ones): The protocol tree for each selected frame is only walked if we have a display filter and if the display filter uses references. Also only the actual reference values are copied, intead of loading the entire tree into a hash table (in textual form even). The other advantage is that the reference is tested like a protocol field against all the values in the selected frame (if there is more than one). Currently the reference fields are not "primed" during dissection, so the entire tree is walked to find a particular reference (this is similar to the previous implementation). If the display filter contains a valid reference and the reference is not loaded at the time the filter is run the result is the same as a non existing field for a regular READ_TREE instruction. Fixes #17599.
2022-03-27 14:26:46 +00:00
/* Already loaded in this run of the dfilter? */
if (df->attempted_load[reg]) {
if (df->registers[reg]) {
return TRUE;
}
else {
return FALSE;
}
}
df->attempted_load[reg] = TRUE;
if (raw)
refs = g_hash_table_lookup(df->raw_references, hfinfo);
else
refs = g_hash_table_lookup(df->references, hfinfo);
if (refs == NULL || refs->len == 0) {
dfilter: Refactor macro tree references This replaces the current macro reference system with a completely different implementation. Instead of a macro a reference is a syntax element. A reference is a constant that can be filled in the dfilter code after compilation from an existing protocol tree. It is best understood as a field value that can be read from a fixed tree that is not the frame being filtered. Usually this fixed tree is the currently selected frame when the filter is applied. This allows comparing fields in the filtered frame with fields in the selected frame. Because the field reference syntax uses the same sigil notation as a macro we have to use a heuristic to distinguish them: if the name has a dot it is a field reference, otherwise it is a macro name. The reference is synctatically validated at compile time. There are two main advantages to this implementation (and a couple of minor ones): The protocol tree for each selected frame is only walked if we have a display filter and if the display filter uses references. Also only the actual reference values are copied, intead of loading the entire tree into a hash table (in textual form even). The other advantage is that the reference is tested like a protocol field against all the values in the selected frame (if there is more than one). Currently the reference fields are not "primed" during dissection, so the entire tree is walked to find a particular reference (this is similar to the previous implementation). If the display filter contains a valid reference and the reference is not loaded at the time the filter is run the result is the same as a non existing field for a regular READ_TREE instruction. Fixes #17599.
2022-03-27 14:26:46 +00:00
df->registers[reg] = NULL;
return FALSE;
}
df->registers[reg] = filter_refs_fvalues(refs, range);
// These values are referenced only, do not try to free it later.
df->free_registers[reg] = NULL;
dfilter: Refactor macro tree references This replaces the current macro reference system with a completely different implementation. Instead of a macro a reference is a syntax element. A reference is a constant that can be filled in the dfilter code after compilation from an existing protocol tree. It is best understood as a field value that can be read from a fixed tree that is not the frame being filtered. Usually this fixed tree is the currently selected frame when the filter is applied. This allows comparing fields in the filtered frame with fields in the selected frame. Because the field reference syntax uses the same sigil notation as a macro we have to use a heuristic to distinguish them: if the name has a dot it is a field reference, otherwise it is a macro name. The reference is synctatically validated at compile time. There are two main advantages to this implementation (and a couple of minor ones): The protocol tree for each selected frame is only walked if we have a display filter and if the display filter uses references. Also only the actual reference values are copied, intead of loading the entire tree into a hash table (in textual form even). The other advantage is that the reference is tested like a protocol field against all the values in the selected frame (if there is more than one). Currently the reference fields are not "primed" during dissection, so the entire tree is walked to find a particular reference (this is similar to the previous implementation). If the display filter contains a valid reference and the reference is not loaded at the time the filter is run the result is the same as a non existing field for a regular READ_TREE instruction. Fixes #17599.
2022-03-27 14:26:46 +00:00
return TRUE;
}
dfilter: Fix "!=" relation to be free of contradictions Wireshark defines the relation of equality A == B as A any_eq B <=> An == Bn for at least one An, Bn. More accurately I think this is (formally) an equivalence relation, not true equality. Whichever definition for "==" we choose we must keep the definition of "!=" as !(A == B), otherwise it will lead to logical contradictions like (A == B) AND (A != B) being true. Fix the '!=' relation to match the definition of equality: A != B <=> !(A == B) <=> A all_ne B <=> An != Bn, for every n. This has been the recomended way to write "not equal" for a long time in the documentation, even to the point where != was deprecated, but it just wasn't implemented consistently in the language, which has understandably been a persistent source of confusion. Even a field that is normally well-behaved with "!=" like "ip.src" or "ip.dst" will produce unexpected results with encapsulations like IP-over-IP. The opcode ALL_NE could have been implemented in the compiler instead using NOT and ANY_EQ but I chose to implement it in bytecode. It just seemed more elegant and efficient but the difference was not very significant. Keep around "~=" for any_ne relation, in case someone depends on that, and because we don't have an operator for true equality: A strict_equal B <=> A all_eq B <=> !(A any_ne B). If there is only one value then any_ne and all_ne are the same comparison operation. Implementing this change did not require fixing any tests so it is unlikely the relation "~=" (any_ne) will be very useful. Note that the behaviour of the '<' (less than) comparison relation is a separate, more subtle issue. In the general case the definition of '<' that is used is only a partial order.
2021-10-18 20:07:06 +00:00
enum match_how {
MATCH_ANY,
MATCH_ALL
};
typedef ft_bool_t (*DFVMCompareFunc)(const fvalue_t*, const fvalue_t*);
typedef ft_bool_t (*DFVMTestFunc)(const fvalue_t*);
static gboolean
cmp_test_internal(enum match_how how, DFVMCompareFunc match_func,
GSList *arg1, GSList *arg2)
{
GSList *list1, *list2;
dfilter: Fix "!=" relation to be free of contradictions Wireshark defines the relation of equality A == B as A any_eq B <=> An == Bn for at least one An, Bn. More accurately I think this is (formally) an equivalence relation, not true equality. Whichever definition for "==" we choose we must keep the definition of "!=" as !(A == B), otherwise it will lead to logical contradictions like (A == B) AND (A != B) being true. Fix the '!=' relation to match the definition of equality: A != B <=> !(A == B) <=> A all_ne B <=> An != Bn, for every n. This has been the recomended way to write "not equal" for a long time in the documentation, even to the point where != was deprecated, but it just wasn't implemented consistently in the language, which has understandably been a persistent source of confusion. Even a field that is normally well-behaved with "!=" like "ip.src" or "ip.dst" will produce unexpected results with encapsulations like IP-over-IP. The opcode ALL_NE could have been implemented in the compiler instead using NOT and ANY_EQ but I chose to implement it in bytecode. It just seemed more elegant and efficient but the difference was not very significant. Keep around "~=" for any_ne relation, in case someone depends on that, and because we don't have an operator for true equality: A strict_equal B <=> A all_eq B <=> !(A any_ne B). If there is only one value then any_ne and all_ne are the same comparison operation. Implementing this change did not require fixing any tests so it is unlikely the relation "~=" (any_ne) will be very useful. Note that the behaviour of the '<' (less than) comparison relation is a separate, more subtle issue. In the general case the definition of '<' that is used is only a partial order.
2021-10-18 20:07:06 +00:00
gboolean want_all = (how == MATCH_ALL);
gboolean want_any = (how == MATCH_ANY);
ft_bool_t have_match;
list1 = arg1;
while (list1) {
list2 = arg2;
while (list2) {
have_match = match_func(list1->data, list2->data);
if (want_all && have_match == FT_FALSE) {
dfilter: Fix "!=" relation to be free of contradictions Wireshark defines the relation of equality A == B as A any_eq B <=> An == Bn for at least one An, Bn. More accurately I think this is (formally) an equivalence relation, not true equality. Whichever definition for "==" we choose we must keep the definition of "!=" as !(A == B), otherwise it will lead to logical contradictions like (A == B) AND (A != B) being true. Fix the '!=' relation to match the definition of equality: A != B <=> !(A == B) <=> A all_ne B <=> An != Bn, for every n. This has been the recomended way to write "not equal" for a long time in the documentation, even to the point where != was deprecated, but it just wasn't implemented consistently in the language, which has understandably been a persistent source of confusion. Even a field that is normally well-behaved with "!=" like "ip.src" or "ip.dst" will produce unexpected results with encapsulations like IP-over-IP. The opcode ALL_NE could have been implemented in the compiler instead using NOT and ANY_EQ but I chose to implement it in bytecode. It just seemed more elegant and efficient but the difference was not very significant. Keep around "~=" for any_ne relation, in case someone depends on that, and because we don't have an operator for true equality: A strict_equal B <=> A all_eq B <=> !(A any_ne B). If there is only one value then any_ne and all_ne are the same comparison operation. Implementing this change did not require fixing any tests so it is unlikely the relation "~=" (any_ne) will be very useful. Note that the behaviour of the '<' (less than) comparison relation is a separate, more subtle issue. In the general case the definition of '<' that is used is only a partial order.
2021-10-18 20:07:06 +00:00
return FALSE;
}
else if (want_any && have_match == FT_TRUE) {
return TRUE;
}
list2 = g_slist_next(list2);
}
list1 = g_slist_next(list1);
}
dfilter: Fix "!=" relation to be free of contradictions Wireshark defines the relation of equality A == B as A any_eq B <=> An == Bn for at least one An, Bn. More accurately I think this is (formally) an equivalence relation, not true equality. Whichever definition for "==" we choose we must keep the definition of "!=" as !(A == B), otherwise it will lead to logical contradictions like (A == B) AND (A != B) being true. Fix the '!=' relation to match the definition of equality: A != B <=> !(A == B) <=> A all_ne B <=> An != Bn, for every n. This has been the recomended way to write "not equal" for a long time in the documentation, even to the point where != was deprecated, but it just wasn't implemented consistently in the language, which has understandably been a persistent source of confusion. Even a field that is normally well-behaved with "!=" like "ip.src" or "ip.dst" will produce unexpected results with encapsulations like IP-over-IP. The opcode ALL_NE could have been implemented in the compiler instead using NOT and ANY_EQ but I chose to implement it in bytecode. It just seemed more elegant and efficient but the difference was not very significant. Keep around "~=" for any_ne relation, in case someone depends on that, and because we don't have an operator for true equality: A strict_equal B <=> A all_eq B <=> !(A any_ne B). If there is only one value then any_ne and all_ne are the same comparison operation. Implementing this change did not require fixing any tests so it is unlikely the relation "~=" (any_ne) will be very useful. Note that the behaviour of the '<' (less than) comparison relation is a separate, more subtle issue. In the general case the definition of '<' that is used is only a partial order.
2021-10-18 20:07:06 +00:00
/* want_all || !want_any */
return want_all;
}
static gboolean
cmp_test_unary(enum match_how how, DFVMTestFunc test_func, GSList *arg1)
{
GSList *list1;
gboolean want_all = (how == MATCH_ALL);
gboolean want_any = (how == MATCH_ANY);
ft_bool_t have_match;
list1 = arg1;
while (list1) {
have_match = test_func(list1->data);
if (want_all && have_match == FT_FALSE) {
return FALSE;
}
else if (want_any && have_match == FT_TRUE) {
return TRUE;
}
list1 = g_slist_next(list1);
}
/* want_all || !want_any */
return want_all;
}
static gboolean
all_test_unary(dfilter_t *df, DFVMTestFunc func, dfvm_value_t *arg1)
{
ws_assert(arg1->type == REGISTER);
GSList *list1 = df->registers[arg1->value.numeric];
return cmp_test_unary(MATCH_ALL, func, list1);
}
static gboolean
cmp_test(dfilter_t *df, DFVMCompareFunc cmp,
dfvm_value_t *arg1, dfvm_value_t *arg2,
enum match_how how)
dfilter: Fix "!=" relation to be free of contradictions Wireshark defines the relation of equality A == B as A any_eq B <=> An == Bn for at least one An, Bn. More accurately I think this is (formally) an equivalence relation, not true equality. Whichever definition for "==" we choose we must keep the definition of "!=" as !(A == B), otherwise it will lead to logical contradictions like (A == B) AND (A != B) being true. Fix the '!=' relation to match the definition of equality: A != B <=> !(A == B) <=> A all_ne B <=> An != Bn, for every n. This has been the recomended way to write "not equal" for a long time in the documentation, even to the point where != was deprecated, but it just wasn't implemented consistently in the language, which has understandably been a persistent source of confusion. Even a field that is normally well-behaved with "!=" like "ip.src" or "ip.dst" will produce unexpected results with encapsulations like IP-over-IP. The opcode ALL_NE could have been implemented in the compiler instead using NOT and ANY_EQ but I chose to implement it in bytecode. It just seemed more elegant and efficient but the difference was not very significant. Keep around "~=" for any_ne relation, in case someone depends on that, and because we don't have an operator for true equality: A strict_equal B <=> A all_eq B <=> !(A any_ne B). If there is only one value then any_ne and all_ne are the same comparison operation. Implementing this change did not require fixing any tests so it is unlikely the relation "~=" (any_ne) will be very useful. Note that the behaviour of the '<' (less than) comparison relation is a separate, more subtle issue. In the general case the definition of '<' that is used is only a partial order.
2021-10-18 20:07:06 +00:00
{
GSList list1, list2, *l1, *l2;
dfilter: Fix "!=" relation to be free of contradictions Wireshark defines the relation of equality A == B as A any_eq B <=> An == Bn for at least one An, Bn. More accurately I think this is (formally) an equivalence relation, not true equality. Whichever definition for "==" we choose we must keep the definition of "!=" as !(A == B), otherwise it will lead to logical contradictions like (A == B) AND (A != B) being true. Fix the '!=' relation to match the definition of equality: A != B <=> !(A == B) <=> A all_ne B <=> An != Bn, for every n. This has been the recomended way to write "not equal" for a long time in the documentation, even to the point where != was deprecated, but it just wasn't implemented consistently in the language, which has understandably been a persistent source of confusion. Even a field that is normally well-behaved with "!=" like "ip.src" or "ip.dst" will produce unexpected results with encapsulations like IP-over-IP. The opcode ALL_NE could have been implemented in the compiler instead using NOT and ANY_EQ but I chose to implement it in bytecode. It just seemed more elegant and efficient but the difference was not very significant. Keep around "~=" for any_ne relation, in case someone depends on that, and because we don't have an operator for true equality: A strict_equal B <=> A all_eq B <=> !(A any_ne B). If there is only one value then any_ne and all_ne are the same comparison operation. Implementing this change did not require fixing any tests so it is unlikely the relation "~=" (any_ne) will be very useful. Note that the behaviour of the '<' (less than) comparison relation is a separate, more subtle issue. In the general case the definition of '<' that is used is only a partial order.
2021-10-18 20:07:06 +00:00
if (arg1->type == REGISTER) {
l1 = df->registers[arg1->value.numeric];
}
else if (arg1->type == FVALUE) {
list1.data = arg1->value.fvalue;
list1.next = NULL;
l1 = &list1;
}
else {
ws_assert_not_reached();
}
if (arg2->type == REGISTER) {
l2 = df->registers[arg2->value.numeric];
}
else if (arg2->type == FVALUE) {
list2.data = arg2->value.fvalue;
list2.next = NULL;
l2 = &list2;
}
else {
ws_assert_not_reached();
}
return cmp_test_internal(how, cmp, l1, l2);
}
/* cmp(A) <=> cmp(a1) OR cmp(a2) OR cmp(a3) OR ... */
static inline gboolean
any_test(dfilter_t *df, DFVMCompareFunc cmp,
dfvm_value_t *arg1, dfvm_value_t *arg2)
{
return cmp_test(df, cmp, arg1, arg2, MATCH_ANY);
dfilter: Fix "!=" relation to be free of contradictions Wireshark defines the relation of equality A == B as A any_eq B <=> An == Bn for at least one An, Bn. More accurately I think this is (formally) an equivalence relation, not true equality. Whichever definition for "==" we choose we must keep the definition of "!=" as !(A == B), otherwise it will lead to logical contradictions like (A == B) AND (A != B) being true. Fix the '!=' relation to match the definition of equality: A != B <=> !(A == B) <=> A all_ne B <=> An != Bn, for every n. This has been the recomended way to write "not equal" for a long time in the documentation, even to the point where != was deprecated, but it just wasn't implemented consistently in the language, which has understandably been a persistent source of confusion. Even a field that is normally well-behaved with "!=" like "ip.src" or "ip.dst" will produce unexpected results with encapsulations like IP-over-IP. The opcode ALL_NE could have been implemented in the compiler instead using NOT and ANY_EQ but I chose to implement it in bytecode. It just seemed more elegant and efficient but the difference was not very significant. Keep around "~=" for any_ne relation, in case someone depends on that, and because we don't have an operator for true equality: A strict_equal B <=> A all_eq B <=> !(A any_ne B). If there is only one value then any_ne and all_ne are the same comparison operation. Implementing this change did not require fixing any tests so it is unlikely the relation "~=" (any_ne) will be very useful. Note that the behaviour of the '<' (less than) comparison relation is a separate, more subtle issue. In the general case the definition of '<' that is used is only a partial order.
2021-10-18 20:07:06 +00:00
}
2022-03-21 12:19:54 +00:00
/* cmp(A) <=> cmp(a1) AND cmp(a2) AND cmp(a3) AND ... */
static gboolean
2022-03-21 12:19:54 +00:00
all_test(dfilter_t *df, DFVMCompareFunc cmp,
dfvm_value_t *arg1, dfvm_value_t *arg2)
{
return cmp_test(df, cmp, arg1, arg2, MATCH_ALL);
}
static gboolean
2022-03-21 12:19:54 +00:00
any_matches(dfilter_t *df, dfvm_value_t *arg1, dfvm_value_t *arg2)
{
GSList *list1 = df->registers[arg1->value.numeric];
ws_regex_t *re = arg2->value.pcre;
while (list1) {
if (fvalue_matches(list1->data, re) == FT_TRUE) {
return TRUE;
}
list1 = g_slist_next(list1);
}
return FALSE;
}
static gboolean
all_matches(dfilter_t *df, dfvm_value_t *arg1, dfvm_value_t *arg2)
{
GSList *list1 = df->registers[arg1->value.numeric];
ws_regex_t *re = arg2->value.pcre;
while (list1) {
if (fvalue_matches(list1->data, re) == FT_FALSE) {
return FALSE;
}
list1 = g_slist_next(list1);
}
return TRUE;
}
static gboolean
dfilter: Allow arithmetic expressions as function arguments This allows writing moderately complex expressions, for example a float epsilon test (#16483): Filter: {abs(_ws.ftypes.double - 1) / max(abs(_ws.ftypes.double), abs(1))} < 0.01 Syntax tree: 0 TEST_LT: 1 OP_DIVIDE: 2 FUNCTION(abs#1): 3 OP_SUBTRACT: 4 FIELD(_ws.ftypes.double) 4 FVALUE(1 <FT_DOUBLE>) 2 FUNCTION(max#2): 3 FUNCTION(abs#1): 4 FIELD(_ws.ftypes.double) 3 FUNCTION(abs#1): 4 FVALUE(1 <FT_DOUBLE>) 1 FVALUE(0.01 <FT_DOUBLE>) Instructions: 00000 READ_TREE _ws.ftypes.double -> reg#1 00001 IF_FALSE_GOTO 3 00002 SUBRACT reg#1 - 1 <FT_DOUBLE> -> reg#2 00003 STACK_PUSH reg#2 00004 CALL_FUNCTION abs(reg#2) -> reg#0 00005 STACK_POP 1 00006 IF_FALSE_GOTO 24 00007 READ_TREE _ws.ftypes.double -> reg#1 00008 IF_FALSE_GOTO 9 00009 STACK_PUSH reg#1 00010 CALL_FUNCTION abs(reg#1) -> reg#4 00011 STACK_POP 1 00012 IF_FALSE_GOTO 13 00013 STACK_PUSH reg#4 00014 STACK_PUSH 1 <FT_DOUBLE> 00015 CALL_FUNCTION abs(1 <FT_DOUBLE>) -> reg#5 00016 STACK_POP 1 00017 IF_FALSE_GOTO 18 00018 STACK_PUSH reg#5 00019 CALL_FUNCTION max(reg#5, reg#4) -> reg#3 00020 STACK_POP 2 00021 IF_FALSE_GOTO 24 00022 DIVIDE reg#0 / reg#3 -> reg#6 00023 ANY_LT reg#6 < 0.01 <FT_DOUBLE> 00024 RETURN We now use a stack to pass arguments to the function. The stack is implemented as a list of lists (list of registers). Arguments may still be non-existent to functions (this is a feature). Functions must check for nil arguments (NULL lists) and handle that case. It's somewhat complicated to allow literal values and test compatibility for different types, both because of lack of type information with unparsed/literal and also because it is an underdeveloped area in the code. In my limited testing it was good enough and useful, further enhancements are left for future work.
2022-04-16 01:42:20 +00:00
any_in_range_internal(GSList *list1, fvalue_t *low, fvalue_t *high)
{
while (list1) {
if (fvalue_ge(list1->data, low) == FT_TRUE &&
fvalue_le(list1->data, high) == FT_TRUE) {
return TRUE;
}
list1 = g_slist_next(list1);
}
return FALSE;
}
dfilter: Allow arithmetic expressions as function arguments This allows writing moderately complex expressions, for example a float epsilon test (#16483): Filter: {abs(_ws.ftypes.double - 1) / max(abs(_ws.ftypes.double), abs(1))} < 0.01 Syntax tree: 0 TEST_LT: 1 OP_DIVIDE: 2 FUNCTION(abs#1): 3 OP_SUBTRACT: 4 FIELD(_ws.ftypes.double) 4 FVALUE(1 <FT_DOUBLE>) 2 FUNCTION(max#2): 3 FUNCTION(abs#1): 4 FIELD(_ws.ftypes.double) 3 FUNCTION(abs#1): 4 FVALUE(1 <FT_DOUBLE>) 1 FVALUE(0.01 <FT_DOUBLE>) Instructions: 00000 READ_TREE _ws.ftypes.double -> reg#1 00001 IF_FALSE_GOTO 3 00002 SUBRACT reg#1 - 1 <FT_DOUBLE> -> reg#2 00003 STACK_PUSH reg#2 00004 CALL_FUNCTION abs(reg#2) -> reg#0 00005 STACK_POP 1 00006 IF_FALSE_GOTO 24 00007 READ_TREE _ws.ftypes.double -> reg#1 00008 IF_FALSE_GOTO 9 00009 STACK_PUSH reg#1 00010 CALL_FUNCTION abs(reg#1) -> reg#4 00011 STACK_POP 1 00012 IF_FALSE_GOTO 13 00013 STACK_PUSH reg#4 00014 STACK_PUSH 1 <FT_DOUBLE> 00015 CALL_FUNCTION abs(1 <FT_DOUBLE>) -> reg#5 00016 STACK_POP 1 00017 IF_FALSE_GOTO 18 00018 STACK_PUSH reg#5 00019 CALL_FUNCTION max(reg#5, reg#4) -> reg#3 00020 STACK_POP 2 00021 IF_FALSE_GOTO 24 00022 DIVIDE reg#0 / reg#3 -> reg#6 00023 ANY_LT reg#6 < 0.01 <FT_DOUBLE> 00024 RETURN We now use a stack to pass arguments to the function. The stack is implemented as a list of lists (list of registers). Arguments may still be non-existent to functions (this is a feature). Functions must check for nil arguments (NULL lists) and handle that case. It's somewhat complicated to allow literal values and test compatibility for different types, both because of lack of type information with unparsed/literal and also because it is an underdeveloped area in the code. In my limited testing it was good enough and useful, further enhancements are left for future work.
2022-04-16 01:42:20 +00:00
static gboolean
all_in_range_internal(GSList *list1, fvalue_t *low, fvalue_t *high)
{
while (list1) {
if (fvalue_ge(list1->data, low) == FT_FALSE ||
fvalue_le(list1->data, high) == FT_FALSE) {
return FALSE;
}
list1 = g_slist_next(list1);
}
return TRUE;
}
static gboolean
match_in_range(dfilter_t *df, enum match_how how, dfvm_value_t *arg1,
dfilter: Allow arithmetic expressions as function arguments This allows writing moderately complex expressions, for example a float epsilon test (#16483): Filter: {abs(_ws.ftypes.double - 1) / max(abs(_ws.ftypes.double), abs(1))} < 0.01 Syntax tree: 0 TEST_LT: 1 OP_DIVIDE: 2 FUNCTION(abs#1): 3 OP_SUBTRACT: 4 FIELD(_ws.ftypes.double) 4 FVALUE(1 <FT_DOUBLE>) 2 FUNCTION(max#2): 3 FUNCTION(abs#1): 4 FIELD(_ws.ftypes.double) 3 FUNCTION(abs#1): 4 FVALUE(1 <FT_DOUBLE>) 1 FVALUE(0.01 <FT_DOUBLE>) Instructions: 00000 READ_TREE _ws.ftypes.double -> reg#1 00001 IF_FALSE_GOTO 3 00002 SUBRACT reg#1 - 1 <FT_DOUBLE> -> reg#2 00003 STACK_PUSH reg#2 00004 CALL_FUNCTION abs(reg#2) -> reg#0 00005 STACK_POP 1 00006 IF_FALSE_GOTO 24 00007 READ_TREE _ws.ftypes.double -> reg#1 00008 IF_FALSE_GOTO 9 00009 STACK_PUSH reg#1 00010 CALL_FUNCTION abs(reg#1) -> reg#4 00011 STACK_POP 1 00012 IF_FALSE_GOTO 13 00013 STACK_PUSH reg#4 00014 STACK_PUSH 1 <FT_DOUBLE> 00015 CALL_FUNCTION abs(1 <FT_DOUBLE>) -> reg#5 00016 STACK_POP 1 00017 IF_FALSE_GOTO 18 00018 STACK_PUSH reg#5 00019 CALL_FUNCTION max(reg#5, reg#4) -> reg#3 00020 STACK_POP 2 00021 IF_FALSE_GOTO 24 00022 DIVIDE reg#0 / reg#3 -> reg#6 00023 ANY_LT reg#6 < 0.01 <FT_DOUBLE> 00024 RETURN We now use a stack to pass arguments to the function. The stack is implemented as a list of lists (list of registers). Arguments may still be non-existent to functions (this is a feature). Functions must check for nil arguments (NULL lists) and handle that case. It's somewhat complicated to allow literal values and test compatibility for different types, both because of lack of type information with unparsed/literal and also because it is an underdeveloped area in the code. In my limited testing it was good enough and useful, further enhancements are left for future work.
2022-04-16 01:42:20 +00:00
dfvm_value_t *arg_low, dfvm_value_t *arg_high)
{
GSList *list1 = df->registers[arg1->value.numeric];
GSList *_low, *_high;
fvalue_t *low, *high;
if (arg_low->type == REGISTER) {
_low = df->registers[arg_low->value.numeric];
ws_assert(g_slist_length(_low) == 1);
low = _low->data;
}
else if (arg_low->type == FVALUE) {
low = arg_low->value.fvalue;
}
else {
ws_assert_not_reached();
}
if (arg_high->type == REGISTER) {
_high = df->registers[arg_high->value.numeric];
ws_assert(g_slist_length(_high) == 1);
high = _high->data;
}
else if (arg_high->type == FVALUE) {
high = arg_high->value.fvalue;
}
else {
ws_assert_not_reached();
}
if (how == MATCH_ALL)
return all_in_range_internal(list1, low, high);
else if (how == MATCH_ANY)
return any_in_range_internal(list1, low, high);
else
ws_assert_not_reached();
}
static gboolean
any_in_range(dfilter_t *df, dfvm_value_t *arg1,
dfvm_value_t *arg_low, dfvm_value_t *arg_high)
{
return match_in_range(df, MATCH_ANY, arg1, arg_low, arg_high);
}
static gboolean
all_in_range(dfilter_t *df, dfvm_value_t *arg1,
dfvm_value_t *arg_low, dfvm_value_t *arg_high)
{
return match_in_range(df, MATCH_ALL, arg1, arg_low, arg_high);
dfilter: Allow arithmetic expressions as function arguments This allows writing moderately complex expressions, for example a float epsilon test (#16483): Filter: {abs(_ws.ftypes.double - 1) / max(abs(_ws.ftypes.double), abs(1))} < 0.01 Syntax tree: 0 TEST_LT: 1 OP_DIVIDE: 2 FUNCTION(abs#1): 3 OP_SUBTRACT: 4 FIELD(_ws.ftypes.double) 4 FVALUE(1 <FT_DOUBLE>) 2 FUNCTION(max#2): 3 FUNCTION(abs#1): 4 FIELD(_ws.ftypes.double) 3 FUNCTION(abs#1): 4 FVALUE(1 <FT_DOUBLE>) 1 FVALUE(0.01 <FT_DOUBLE>) Instructions: 00000 READ_TREE _ws.ftypes.double -> reg#1 00001 IF_FALSE_GOTO 3 00002 SUBRACT reg#1 - 1 <FT_DOUBLE> -> reg#2 00003 STACK_PUSH reg#2 00004 CALL_FUNCTION abs(reg#2) -> reg#0 00005 STACK_POP 1 00006 IF_FALSE_GOTO 24 00007 READ_TREE _ws.ftypes.double -> reg#1 00008 IF_FALSE_GOTO 9 00009 STACK_PUSH reg#1 00010 CALL_FUNCTION abs(reg#1) -> reg#4 00011 STACK_POP 1 00012 IF_FALSE_GOTO 13 00013 STACK_PUSH reg#4 00014 STACK_PUSH 1 <FT_DOUBLE> 00015 CALL_FUNCTION abs(1 <FT_DOUBLE>) -> reg#5 00016 STACK_POP 1 00017 IF_FALSE_GOTO 18 00018 STACK_PUSH reg#5 00019 CALL_FUNCTION max(reg#5, reg#4) -> reg#3 00020 STACK_POP 2 00021 IF_FALSE_GOTO 24 00022 DIVIDE reg#0 / reg#3 -> reg#6 00023 ANY_LT reg#6 < 0.01 <FT_DOUBLE> 00024 RETURN We now use a stack to pass arguments to the function. The stack is implemented as a list of lists (list of registers). Arguments may still be non-existent to functions (this is a feature). Functions must check for nil arguments (NULL lists) and handle that case. It's somewhat complicated to allow literal values and test compatibility for different types, both because of lack of type information with unparsed/literal and also because it is an underdeveloped area in the code. In my limited testing it was good enough and useful, further enhancements are left for future work.
2022-04-16 01:42:20 +00:00
}
/* Clear registers that were populated during evaluation.
* If we created the values, then these will be freed as well. */
static void
free_register_overhead(dfilter_t* df)
{
guint i;
for (i = 0; i < df->num_registers; i++) {
df->attempted_load[i] = FALSE;
if (df->registers[i]) {
if (df->free_registers[i]) {
for (GSList *l = df->registers[i]; l != NULL; l = l->next) {
df->free_registers[i](l->data);
}
df->free_registers[i] = NULL;
}
g_slist_free(df->registers[i]);
df->registers[i] = NULL;
}
}
}
/* Takes the list of fvalue_t's in a register, uses fvalue_slice()
* to make a new list of fvalue_t's (which are byte-slices),
* and puts the new list into a new register. */
static void
mk_slice(dfilter_t *df, dfvm_value_t *from_arg, dfvm_value_t *to_arg,
2022-03-21 12:19:54 +00:00
dfvm_value_t *drange_arg)
{
GSList *from_list, *to_list;
fvalue_t *old_fv, *new_fv;
to_list = NULL;
2022-03-21 12:19:54 +00:00
from_list = df->registers[from_arg->value.numeric];
drange_t *drange = drange_arg->value.drange;
while (from_list) {
2022-03-21 12:19:54 +00:00
old_fv = from_list->data;
new_fv = fvalue_slice(old_fv, drange);
/* Assert here because semcheck.c should have
* already caught the cases in which a slice
* cannot be made. */
2021-06-18 18:21:42 +00:00
ws_assert(new_fv);
to_list = g_slist_prepend(to_list, new_fv);
from_list = g_slist_next(from_list);
}
2022-03-21 12:19:54 +00:00
df->registers[to_arg->value.numeric] = to_list;
df->free_registers[to_arg->value.numeric] = (GDestroyNotify)fvalue_free;
}
static void
mk_length(dfilter_t *df, dfvm_value_t *from_arg, dfvm_value_t *to_arg)
{
GSList *from_list, *to_list;
fvalue_t *old_fv, *new_fv;
to_list = NULL;
from_list = df->registers[from_arg->value.numeric];
while (from_list) {
old_fv = from_list->data;
new_fv = fvalue_new(FT_UINT32);
fvalue_set_uinteger(new_fv, fvalue_length(old_fv));
to_list = g_slist_prepend(to_list, new_fv);
from_list = g_slist_next(from_list);
}
df->registers[to_arg->value.numeric] = to_list;
df->free_registers[to_arg->value.numeric] = (GDestroyNotify)fvalue_free;
}
2022-03-21 12:19:54 +00:00
static gboolean
call_function(dfilter_t *df, dfvm_value_t *arg1, dfvm_value_t *arg2,
dfilter: Allow arithmetic expressions as function arguments This allows writing moderately complex expressions, for example a float epsilon test (#16483): Filter: {abs(_ws.ftypes.double - 1) / max(abs(_ws.ftypes.double), abs(1))} < 0.01 Syntax tree: 0 TEST_LT: 1 OP_DIVIDE: 2 FUNCTION(abs#1): 3 OP_SUBTRACT: 4 FIELD(_ws.ftypes.double) 4 FVALUE(1 <FT_DOUBLE>) 2 FUNCTION(max#2): 3 FUNCTION(abs#1): 4 FIELD(_ws.ftypes.double) 3 FUNCTION(abs#1): 4 FVALUE(1 <FT_DOUBLE>) 1 FVALUE(0.01 <FT_DOUBLE>) Instructions: 00000 READ_TREE _ws.ftypes.double -> reg#1 00001 IF_FALSE_GOTO 3 00002 SUBRACT reg#1 - 1 <FT_DOUBLE> -> reg#2 00003 STACK_PUSH reg#2 00004 CALL_FUNCTION abs(reg#2) -> reg#0 00005 STACK_POP 1 00006 IF_FALSE_GOTO 24 00007 READ_TREE _ws.ftypes.double -> reg#1 00008 IF_FALSE_GOTO 9 00009 STACK_PUSH reg#1 00010 CALL_FUNCTION abs(reg#1) -> reg#4 00011 STACK_POP 1 00012 IF_FALSE_GOTO 13 00013 STACK_PUSH reg#4 00014 STACK_PUSH 1 <FT_DOUBLE> 00015 CALL_FUNCTION abs(1 <FT_DOUBLE>) -> reg#5 00016 STACK_POP 1 00017 IF_FALSE_GOTO 18 00018 STACK_PUSH reg#5 00019 CALL_FUNCTION max(reg#5, reg#4) -> reg#3 00020 STACK_POP 2 00021 IF_FALSE_GOTO 24 00022 DIVIDE reg#0 / reg#3 -> reg#6 00023 ANY_LT reg#6 < 0.01 <FT_DOUBLE> 00024 RETURN We now use a stack to pass arguments to the function. The stack is implemented as a list of lists (list of registers). Arguments may still be non-existent to functions (this is a feature). Functions must check for nil arguments (NULL lists) and handle that case. It's somewhat complicated to allow literal values and test compatibility for different types, both because of lack of type information with unparsed/literal and also because it is an underdeveloped area in the code. In my limited testing it was good enough and useful, further enhancements are left for future work.
2022-04-16 01:42:20 +00:00
dfvm_value_t *arg3)
2022-03-21 12:19:54 +00:00
{
df_func_def_t *funcdef;
GSList *retval = NULL;
gboolean accum;
dfilter: Allow arithmetic expressions as function arguments This allows writing moderately complex expressions, for example a float epsilon test (#16483): Filter: {abs(_ws.ftypes.double - 1) / max(abs(_ws.ftypes.double), abs(1))} < 0.01 Syntax tree: 0 TEST_LT: 1 OP_DIVIDE: 2 FUNCTION(abs#1): 3 OP_SUBTRACT: 4 FIELD(_ws.ftypes.double) 4 FVALUE(1 <FT_DOUBLE>) 2 FUNCTION(max#2): 3 FUNCTION(abs#1): 4 FIELD(_ws.ftypes.double) 3 FUNCTION(abs#1): 4 FVALUE(1 <FT_DOUBLE>) 1 FVALUE(0.01 <FT_DOUBLE>) Instructions: 00000 READ_TREE _ws.ftypes.double -> reg#1 00001 IF_FALSE_GOTO 3 00002 SUBRACT reg#1 - 1 <FT_DOUBLE> -> reg#2 00003 STACK_PUSH reg#2 00004 CALL_FUNCTION abs(reg#2) -> reg#0 00005 STACK_POP 1 00006 IF_FALSE_GOTO 24 00007 READ_TREE _ws.ftypes.double -> reg#1 00008 IF_FALSE_GOTO 9 00009 STACK_PUSH reg#1 00010 CALL_FUNCTION abs(reg#1) -> reg#4 00011 STACK_POP 1 00012 IF_FALSE_GOTO 13 00013 STACK_PUSH reg#4 00014 STACK_PUSH 1 <FT_DOUBLE> 00015 CALL_FUNCTION abs(1 <FT_DOUBLE>) -> reg#5 00016 STACK_POP 1 00017 IF_FALSE_GOTO 18 00018 STACK_PUSH reg#5 00019 CALL_FUNCTION max(reg#5, reg#4) -> reg#3 00020 STACK_POP 2 00021 IF_FALSE_GOTO 24 00022 DIVIDE reg#0 / reg#3 -> reg#6 00023 ANY_LT reg#6 < 0.01 <FT_DOUBLE> 00024 RETURN We now use a stack to pass arguments to the function. The stack is implemented as a list of lists (list of registers). Arguments may still be non-existent to functions (this is a feature). Functions must check for nil arguments (NULL lists) and handle that case. It's somewhat complicated to allow literal values and test compatibility for different types, both because of lack of type information with unparsed/literal and also because it is an underdeveloped area in the code. In my limited testing it was good enough and useful, further enhancements are left for future work.
2022-04-16 01:42:20 +00:00
guint32 reg_return, arg_count;
2022-03-21 12:19:54 +00:00
funcdef = arg1->value.funcdef;
reg_return = arg2->value.numeric;
dfilter: Allow arithmetic expressions as function arguments This allows writing moderately complex expressions, for example a float epsilon test (#16483): Filter: {abs(_ws.ftypes.double - 1) / max(abs(_ws.ftypes.double), abs(1))} < 0.01 Syntax tree: 0 TEST_LT: 1 OP_DIVIDE: 2 FUNCTION(abs#1): 3 OP_SUBTRACT: 4 FIELD(_ws.ftypes.double) 4 FVALUE(1 <FT_DOUBLE>) 2 FUNCTION(max#2): 3 FUNCTION(abs#1): 4 FIELD(_ws.ftypes.double) 3 FUNCTION(abs#1): 4 FVALUE(1 <FT_DOUBLE>) 1 FVALUE(0.01 <FT_DOUBLE>) Instructions: 00000 READ_TREE _ws.ftypes.double -> reg#1 00001 IF_FALSE_GOTO 3 00002 SUBRACT reg#1 - 1 <FT_DOUBLE> -> reg#2 00003 STACK_PUSH reg#2 00004 CALL_FUNCTION abs(reg#2) -> reg#0 00005 STACK_POP 1 00006 IF_FALSE_GOTO 24 00007 READ_TREE _ws.ftypes.double -> reg#1 00008 IF_FALSE_GOTO 9 00009 STACK_PUSH reg#1 00010 CALL_FUNCTION abs(reg#1) -> reg#4 00011 STACK_POP 1 00012 IF_FALSE_GOTO 13 00013 STACK_PUSH reg#4 00014 STACK_PUSH 1 <FT_DOUBLE> 00015 CALL_FUNCTION abs(1 <FT_DOUBLE>) -> reg#5 00016 STACK_POP 1 00017 IF_FALSE_GOTO 18 00018 STACK_PUSH reg#5 00019 CALL_FUNCTION max(reg#5, reg#4) -> reg#3 00020 STACK_POP 2 00021 IF_FALSE_GOTO 24 00022 DIVIDE reg#0 / reg#3 -> reg#6 00023 ANY_LT reg#6 < 0.01 <FT_DOUBLE> 00024 RETURN We now use a stack to pass arguments to the function. The stack is implemented as a list of lists (list of registers). Arguments may still be non-existent to functions (this is a feature). Functions must check for nil arguments (NULL lists) and handle that case. It's somewhat complicated to allow literal values and test compatibility for different types, both because of lack of type information with unparsed/literal and also because it is an underdeveloped area in the code. In my limited testing it was good enough and useful, further enhancements are left for future work.
2022-04-16 01:42:20 +00:00
arg_count = arg3->value.numeric;
dfilter: Allow arithmetic expressions as function arguments This allows writing moderately complex expressions, for example a float epsilon test (#16483): Filter: {abs(_ws.ftypes.double - 1) / max(abs(_ws.ftypes.double), abs(1))} < 0.01 Syntax tree: 0 TEST_LT: 1 OP_DIVIDE: 2 FUNCTION(abs#1): 3 OP_SUBTRACT: 4 FIELD(_ws.ftypes.double) 4 FVALUE(1 <FT_DOUBLE>) 2 FUNCTION(max#2): 3 FUNCTION(abs#1): 4 FIELD(_ws.ftypes.double) 3 FUNCTION(abs#1): 4 FVALUE(1 <FT_DOUBLE>) 1 FVALUE(0.01 <FT_DOUBLE>) Instructions: 00000 READ_TREE _ws.ftypes.double -> reg#1 00001 IF_FALSE_GOTO 3 00002 SUBRACT reg#1 - 1 <FT_DOUBLE> -> reg#2 00003 STACK_PUSH reg#2 00004 CALL_FUNCTION abs(reg#2) -> reg#0 00005 STACK_POP 1 00006 IF_FALSE_GOTO 24 00007 READ_TREE _ws.ftypes.double -> reg#1 00008 IF_FALSE_GOTO 9 00009 STACK_PUSH reg#1 00010 CALL_FUNCTION abs(reg#1) -> reg#4 00011 STACK_POP 1 00012 IF_FALSE_GOTO 13 00013 STACK_PUSH reg#4 00014 STACK_PUSH 1 <FT_DOUBLE> 00015 CALL_FUNCTION abs(1 <FT_DOUBLE>) -> reg#5 00016 STACK_POP 1 00017 IF_FALSE_GOTO 18 00018 STACK_PUSH reg#5 00019 CALL_FUNCTION max(reg#5, reg#4) -> reg#3 00020 STACK_POP 2 00021 IF_FALSE_GOTO 24 00022 DIVIDE reg#0 / reg#3 -> reg#6 00023 ANY_LT reg#6 < 0.01 <FT_DOUBLE> 00024 RETURN We now use a stack to pass arguments to the function. The stack is implemented as a list of lists (list of registers). Arguments may still be non-existent to functions (this is a feature). Functions must check for nil arguments (NULL lists) and handle that case. It's somewhat complicated to allow literal values and test compatibility for different types, both because of lack of type information with unparsed/literal and also because it is an underdeveloped area in the code. In my limited testing it was good enough and useful, further enhancements are left for future work.
2022-04-16 01:42:20 +00:00
accum = funcdef->function(df->function_stack, arg_count, &retval);
/* Write return registers. */
df->registers[reg_return] = retval;
2022-03-21 12:19:54 +00:00
// functions create a new value, so own it.
df->free_registers[reg_return] = (GDestroyNotify)fvalue_free;
2022-03-21 12:19:54 +00:00
return accum;
}
static void debug_op_error(fvalue_t *v1, fvalue_t *v2, const char *op, const char *msg)
{
char *s1 = fvalue_to_debug_repr(NULL, v1);
char *s2 = fvalue_to_debug_repr(NULL, v2);
ws_noisy("Error: %s %s %s: %s", s1, op, s2, msg);
g_free(s1);
g_free(s2);
}
/* Used for temporary debugging only, don't leave in production code (at
* a minimum WS_DEBUG_HERE must be replaced by another log level). */
static void _U_
debug_register(GSList *reg, guint32 num)
{
wmem_strbuf_t *buf;
GSList *l;
char *s;
buf = wmem_strbuf_new(NULL, NULL);
wmem_strbuf_append_printf(buf, "Reg#%"G_GUINT32_FORMAT" = { ", num);
for (l = reg; l != NULL; l = l->next) {
s = fvalue_to_debug_repr(NULL, l->data);
wmem_strbuf_append_printf(buf, "%s <%s>", s, fvalue_type_name(l->data));
g_free(s);
if (l->next != NULL) {
wmem_strbuf_append(buf, ", ");
}
}
wmem_strbuf_append_c(buf, '}');
WS_DEBUG_HERE("%s", wmem_strbuf_get_str(buf));
wmem_strbuf_destroy(buf);
}
typedef fvalue_t* (*DFVMBinaryFunc)(const fvalue_t*, const fvalue_t*, char **);
static void
mk_binary_internal(DFVMBinaryFunc func,
GSList *arg1, GSList *arg2, GSList **retval)
{
GSList *list1, *list2;
GSList *to_list = NULL;
fvalue_t *val1, *val2;
fvalue_t *result;
char *err_msg = NULL;
list1 = arg1;
while (list1) {
list2 = arg2;
while (list2) {
val1 = list1->data;
val2 = list2->data;
result = func(val1, val2, &err_msg);
if (result == NULL) {
debug_op_error(val1, val2, "&", err_msg);
g_free(err_msg);
err_msg = NULL;
}
else {
to_list = g_slist_prepend(to_list, result);
}
list2 = g_slist_next(list2);
}
list1 = g_slist_next(list1);
}
*retval = to_list;
}
static void
mk_binary(dfilter_t *df, DFVMBinaryFunc func,
dfvm_value_t *arg1, dfvm_value_t *arg2, dfvm_value_t *to_arg)
{
GSList ls1, ls2;
GSList *list1, *list2;
GSList *result = NULL;
if (arg1->type == REGISTER) {
list1 = df->registers[arg1->value.numeric];
}
else if (arg1->type == FVALUE) {
ls1.data = arg1->value.fvalue;
ls1.next = NULL;
list1 = &ls1;
}
else {
ws_assert_not_reached();
}
if (arg2->type == REGISTER) {
list2 = df->registers[arg2->value.numeric];
}
else if (arg2->type == FVALUE) {
ls2.data = arg2->value.fvalue;
ls2.next = NULL;
list2 = &ls2;
}
else {
ws_assert_not_reached();
}
mk_binary_internal(func, list1, list2, &result);
//debug_register(result, to_arg->value.numeric);
df->registers[to_arg->value.numeric] = result;
df->free_registers[to_arg->value.numeric] = (GDestroyNotify)fvalue_free;
}
static void
mk_minus_internal(GSList *arg1, GSList **retval)
{
GSList *list1;
GSList *to_list = NULL;
fvalue_t *val1;
fvalue_t *result;
char *err_msg = NULL;
list1 = arg1;
while (list1) {
val1 = list1->data;
result = fvalue_unary_minus(val1, &err_msg);
if (result == NULL) {
ws_noisy("unary_minus: %s", err_msg);
g_free(err_msg);
err_msg = NULL;
}
else {
to_list = g_slist_prepend(to_list, result);
}
list1 = g_slist_next(list1);
}
*retval = to_list;
}
static void
mk_minus(dfilter_t *df, dfvm_value_t *arg1, dfvm_value_t *to_arg)
{
GSList ls1;
GSList *list1;
GSList *result = NULL;
if (arg1->type == REGISTER) {
list1 = df->registers[arg1->value.numeric];
}
else if (arg1->type == FVALUE) {
ls1.data = arg1->value.fvalue;
ls1.next = NULL;
list1 = &ls1;
}
else {
ws_assert_not_reached();
}
mk_minus_internal(list1, &result);
df->registers[to_arg->value.numeric] = result;
df->free_registers[to_arg->value.numeric] = (GDestroyNotify)fvalue_free;
}
static void
put_fvalue(dfilter_t *df, dfvm_value_t *arg1, dfvm_value_t *to_arg)
{
fvalue_t *fv = arg1->value.fvalue;
df->registers[to_arg->value.numeric] = g_slist_append(NULL, fv);
/* Memory is owned by the dfvm_value_t. */
df->free_registers[to_arg->value.numeric] = NULL;
}
dfilter: Allow arithmetic expressions as function arguments This allows writing moderately complex expressions, for example a float epsilon test (#16483): Filter: {abs(_ws.ftypes.double - 1) / max(abs(_ws.ftypes.double), abs(1))} < 0.01 Syntax tree: 0 TEST_LT: 1 OP_DIVIDE: 2 FUNCTION(abs#1): 3 OP_SUBTRACT: 4 FIELD(_ws.ftypes.double) 4 FVALUE(1 <FT_DOUBLE>) 2 FUNCTION(max#2): 3 FUNCTION(abs#1): 4 FIELD(_ws.ftypes.double) 3 FUNCTION(abs#1): 4 FVALUE(1 <FT_DOUBLE>) 1 FVALUE(0.01 <FT_DOUBLE>) Instructions: 00000 READ_TREE _ws.ftypes.double -> reg#1 00001 IF_FALSE_GOTO 3 00002 SUBRACT reg#1 - 1 <FT_DOUBLE> -> reg#2 00003 STACK_PUSH reg#2 00004 CALL_FUNCTION abs(reg#2) -> reg#0 00005 STACK_POP 1 00006 IF_FALSE_GOTO 24 00007 READ_TREE _ws.ftypes.double -> reg#1 00008 IF_FALSE_GOTO 9 00009 STACK_PUSH reg#1 00010 CALL_FUNCTION abs(reg#1) -> reg#4 00011 STACK_POP 1 00012 IF_FALSE_GOTO 13 00013 STACK_PUSH reg#4 00014 STACK_PUSH 1 <FT_DOUBLE> 00015 CALL_FUNCTION abs(1 <FT_DOUBLE>) -> reg#5 00016 STACK_POP 1 00017 IF_FALSE_GOTO 18 00018 STACK_PUSH reg#5 00019 CALL_FUNCTION max(reg#5, reg#4) -> reg#3 00020 STACK_POP 2 00021 IF_FALSE_GOTO 24 00022 DIVIDE reg#0 / reg#3 -> reg#6 00023 ANY_LT reg#6 < 0.01 <FT_DOUBLE> 00024 RETURN We now use a stack to pass arguments to the function. The stack is implemented as a list of lists (list of registers). Arguments may still be non-existent to functions (this is a feature). Functions must check for nil arguments (NULL lists) and handle that case. It's somewhat complicated to allow literal values and test compatibility for different types, both because of lack of type information with unparsed/literal and also because it is an underdeveloped area in the code. In my limited testing it was good enough and useful, further enhancements are left for future work.
2022-04-16 01:42:20 +00:00
static void
stack_push(dfilter_t *df, dfvm_value_t *arg1)
{
GSList *arg;
if (arg1->type == FVALUE) {
arg = g_slist_prepend(NULL, arg1->value.fvalue);
}
else if (arg1->type == REGISTER) {
arg = g_slist_copy(df->registers[arg1->value.numeric]);
}
else {
ws_assert_not_reached();
}
df->function_stack = g_slist_prepend(df->function_stack, arg);
}
static void
stack_pop(dfilter_t *df, dfvm_value_t *arg1)
{
guint count;
GSList *reg;
count = arg1->value.numeric;
for (guint i = 0; i < count; i++) {
/* Free top of stack and register contained there. The register
* contentes are not owned by us. */
reg = df->function_stack->data;
/* Free the list but not the data it contains. */
g_slist_free(reg);
/* remove top of stack */
df->function_stack = g_slist_delete_link(df->function_stack, df->function_stack);
}
}
static gboolean
check_exists(proto_tree *tree, dfvm_value_t *arg1, dfvm_value_t *arg2)
{
GPtrArray *finfos;
header_field_info *hfinfo;
drange_t *range = NULL;
gboolean exists;
GSList *fvalues;
hfinfo = arg1->value.hfinfo;
if (arg2)
range = arg2->value.drange;
while (hfinfo) {
finfos = proto_get_finfo_ptr_array(tree, hfinfo->id);
if ((finfos == NULL) || (g_ptr_array_len(finfos) == 0)) {
hfinfo = hfinfo->same_name_next;
continue;
}
if (range == NULL) {
return TRUE;
}
dfilter: Add support for raw (bytes) addressing mode This adds new syntax to read a field from the tree as bytes, instead of the actual type. This is a useful extension for example to match matformed strings that contain unicode replacement characters. In this case it is not possible to match the raw value of the malformed string field. This extension fills this need and is generic enough that it should be useful in many other situations. The syntax used is to prefix the field name with "@". The following artificial example tests if the HTTP user agent contains a particular invalid UTF-8 sequence: @http.user_agent == "Mozill\xAA" Where simply using "http.user_agent" won't work because the invalid byte sequence will have been replaced with U+FFFD. Considering the following programs: $ dftest '_ws.ftypes.string == "ABC"' Filter: _ws.ftypes.string == "ABC" Syntax tree: 0 TEST_ANY_EQ: 1 FIELD(_ws.ftypes.string <FT_STRING>) 1 FVALUE("ABC" <FT_STRING>) Instructions: 00000 READ_TREE _ws.ftypes.string <FT_STRING> -> reg#0 00001 IF_FALSE_GOTO 3 00002 ANY_EQ reg#0 == "ABC" <FT_STRING> 00003 RETURN $ dftest '@_ws.ftypes.string == "ABC"' Filter: @_ws.ftypes.string == "ABC" Syntax tree: 0 TEST_ANY_EQ: 1 FIELD(_ws.ftypes.string <RAW>) 1 FVALUE(41:42:43 <FT_BYTES>) Instructions: 00000 READ_TREE @_ws.ftypes.string <FT_BYTES> -> reg#0 00001 IF_FALSE_GOTO 3 00002 ANY_EQ reg#0 == 41:42:43 <FT_BYTES> 00003 RETURN In the second case the field has a "raw" type, that equates directly to FT_BYTES, and the field value is read from the protocol raw data.
2022-10-25 03:20:18 +00:00
fvalues = filter_finfo_fvalues(NULL, finfos, range, FALSE);
exists = (fvalues != NULL);
g_slist_free(fvalues);
if (exists) {
return TRUE;
}
hfinfo = hfinfo->same_name_next;
}
return FALSE;
}
gboolean
dfvm_apply(dfilter_t *df, proto_tree *tree)
{
int id, length;
gboolean accum = TRUE;
dfvm_insn_t *insn;
dfvm_value_t *arg1;
dfvm_value_t *arg2;
dfvm_value_t *arg3 = NULL;
2021-06-18 18:21:42 +00:00
ws_assert(tree);
length = df->insns->len;
for (id = 0; id < length; id++) {
AGAIN:
2022-03-21 12:19:54 +00:00
insn = g_ptr_array_index(df->insns, id);
arg1 = insn->arg1;
arg2 = insn->arg2;
2022-03-21 12:19:54 +00:00
arg3 = insn->arg3;
dfilter: Allow arithmetic expressions as function arguments This allows writing moderately complex expressions, for example a float epsilon test (#16483): Filter: {abs(_ws.ftypes.double - 1) / max(abs(_ws.ftypes.double), abs(1))} < 0.01 Syntax tree: 0 TEST_LT: 1 OP_DIVIDE: 2 FUNCTION(abs#1): 3 OP_SUBTRACT: 4 FIELD(_ws.ftypes.double) 4 FVALUE(1 <FT_DOUBLE>) 2 FUNCTION(max#2): 3 FUNCTION(abs#1): 4 FIELD(_ws.ftypes.double) 3 FUNCTION(abs#1): 4 FVALUE(1 <FT_DOUBLE>) 1 FVALUE(0.01 <FT_DOUBLE>) Instructions: 00000 READ_TREE _ws.ftypes.double -> reg#1 00001 IF_FALSE_GOTO 3 00002 SUBRACT reg#1 - 1 <FT_DOUBLE> -> reg#2 00003 STACK_PUSH reg#2 00004 CALL_FUNCTION abs(reg#2) -> reg#0 00005 STACK_POP 1 00006 IF_FALSE_GOTO 24 00007 READ_TREE _ws.ftypes.double -> reg#1 00008 IF_FALSE_GOTO 9 00009 STACK_PUSH reg#1 00010 CALL_FUNCTION abs(reg#1) -> reg#4 00011 STACK_POP 1 00012 IF_FALSE_GOTO 13 00013 STACK_PUSH reg#4 00014 STACK_PUSH 1 <FT_DOUBLE> 00015 CALL_FUNCTION abs(1 <FT_DOUBLE>) -> reg#5 00016 STACK_POP 1 00017 IF_FALSE_GOTO 18 00018 STACK_PUSH reg#5 00019 CALL_FUNCTION max(reg#5, reg#4) -> reg#3 00020 STACK_POP 2 00021 IF_FALSE_GOTO 24 00022 DIVIDE reg#0 / reg#3 -> reg#6 00023 ANY_LT reg#6 < 0.01 <FT_DOUBLE> 00024 RETURN We now use a stack to pass arguments to the function. The stack is implemented as a list of lists (list of registers). Arguments may still be non-existent to functions (this is a feature). Functions must check for nil arguments (NULL lists) and handle that case. It's somewhat complicated to allow literal values and test compatibility for different types, both because of lack of type information with unparsed/literal and also because it is an underdeveloped area in the code. In my limited testing it was good enough and useful, further enhancements are left for future work.
2022-04-16 01:42:20 +00:00
switch (insn->op) {
case DFVM_CHECK_EXISTS:
accum = check_exists(tree, arg1, NULL);
break;
case DFVM_CHECK_EXISTS_R:
accum = check_exists(tree, arg1, arg2);
break;
case DFVM_READ_TREE:
accum = read_tree(df, tree, arg1, arg2, NULL);
break;
case DFVM_READ_TREE_R:
accum = read_tree(df, tree, arg1, arg2, arg3);
break;
case DFVM_READ_REFERENCE:
accum = read_reference(df, arg1, arg2, NULL);
break;
case DFVM_READ_REFERENCE_R:
accum = read_reference(df, arg1, arg2, arg3);
dfilter: Refactor macro tree references This replaces the current macro reference system with a completely different implementation. Instead of a macro a reference is a syntax element. A reference is a constant that can be filled in the dfilter code after compilation from an existing protocol tree. It is best understood as a field value that can be read from a fixed tree that is not the frame being filtered. Usually this fixed tree is the currently selected frame when the filter is applied. This allows comparing fields in the filtered frame with fields in the selected frame. Because the field reference syntax uses the same sigil notation as a macro we have to use a heuristic to distinguish them: if the name has a dot it is a field reference, otherwise it is a macro name. The reference is synctatically validated at compile time. There are two main advantages to this implementation (and a couple of minor ones): The protocol tree for each selected frame is only walked if we have a display filter and if the display filter uses references. Also only the actual reference values are copied, intead of loading the entire tree into a hash table (in textual form even). The other advantage is that the reference is tested like a protocol field against all the values in the selected frame (if there is more than one). Currently the reference fields are not "primed" during dissection, so the entire tree is walked to find a particular reference (this is similar to the previous implementation). If the display filter contains a valid reference and the reference is not loaded at the time the filter is run the result is the same as a non existing field for a regular READ_TREE instruction. Fixes #17599.
2022-03-27 14:26:46 +00:00
break;
case DFVM_PUT_FVALUE:
put_fvalue(df, arg1, arg2);
break;
case DFVM_CALL_FUNCTION:
dfilter: Allow arithmetic expressions as function arguments This allows writing moderately complex expressions, for example a float epsilon test (#16483): Filter: {abs(_ws.ftypes.double - 1) / max(abs(_ws.ftypes.double), abs(1))} < 0.01 Syntax tree: 0 TEST_LT: 1 OP_DIVIDE: 2 FUNCTION(abs#1): 3 OP_SUBTRACT: 4 FIELD(_ws.ftypes.double) 4 FVALUE(1 <FT_DOUBLE>) 2 FUNCTION(max#2): 3 FUNCTION(abs#1): 4 FIELD(_ws.ftypes.double) 3 FUNCTION(abs#1): 4 FVALUE(1 <FT_DOUBLE>) 1 FVALUE(0.01 <FT_DOUBLE>) Instructions: 00000 READ_TREE _ws.ftypes.double -> reg#1 00001 IF_FALSE_GOTO 3 00002 SUBRACT reg#1 - 1 <FT_DOUBLE> -> reg#2 00003 STACK_PUSH reg#2 00004 CALL_FUNCTION abs(reg#2) -> reg#0 00005 STACK_POP 1 00006 IF_FALSE_GOTO 24 00007 READ_TREE _ws.ftypes.double -> reg#1 00008 IF_FALSE_GOTO 9 00009 STACK_PUSH reg#1 00010 CALL_FUNCTION abs(reg#1) -> reg#4 00011 STACK_POP 1 00012 IF_FALSE_GOTO 13 00013 STACK_PUSH reg#4 00014 STACK_PUSH 1 <FT_DOUBLE> 00015 CALL_FUNCTION abs(1 <FT_DOUBLE>) -> reg#5 00016 STACK_POP 1 00017 IF_FALSE_GOTO 18 00018 STACK_PUSH reg#5 00019 CALL_FUNCTION max(reg#5, reg#4) -> reg#3 00020 STACK_POP 2 00021 IF_FALSE_GOTO 24 00022 DIVIDE reg#0 / reg#3 -> reg#6 00023 ANY_LT reg#6 < 0.01 <FT_DOUBLE> 00024 RETURN We now use a stack to pass arguments to the function. The stack is implemented as a list of lists (list of registers). Arguments may still be non-existent to functions (this is a feature). Functions must check for nil arguments (NULL lists) and handle that case. It's somewhat complicated to allow literal values and test compatibility for different types, both because of lack of type information with unparsed/literal and also because it is an underdeveloped area in the code. In my limited testing it was good enough and useful, further enhancements are left for future work.
2022-04-16 01:42:20 +00:00
accum = call_function(df, arg1, arg2, arg3);
break;
case DFVM_STACK_PUSH:
dfilter: Allow arithmetic expressions as function arguments This allows writing moderately complex expressions, for example a float epsilon test (#16483): Filter: {abs(_ws.ftypes.double - 1) / max(abs(_ws.ftypes.double), abs(1))} < 0.01 Syntax tree: 0 TEST_LT: 1 OP_DIVIDE: 2 FUNCTION(abs#1): 3 OP_SUBTRACT: 4 FIELD(_ws.ftypes.double) 4 FVALUE(1 <FT_DOUBLE>) 2 FUNCTION(max#2): 3 FUNCTION(abs#1): 4 FIELD(_ws.ftypes.double) 3 FUNCTION(abs#1): 4 FVALUE(1 <FT_DOUBLE>) 1 FVALUE(0.01 <FT_DOUBLE>) Instructions: 00000 READ_TREE _ws.ftypes.double -> reg#1 00001 IF_FALSE_GOTO 3 00002 SUBRACT reg#1 - 1 <FT_DOUBLE> -> reg#2 00003 STACK_PUSH reg#2 00004 CALL_FUNCTION abs(reg#2) -> reg#0 00005 STACK_POP 1 00006 IF_FALSE_GOTO 24 00007 READ_TREE _ws.ftypes.double -> reg#1 00008 IF_FALSE_GOTO 9 00009 STACK_PUSH reg#1 00010 CALL_FUNCTION abs(reg#1) -> reg#4 00011 STACK_POP 1 00012 IF_FALSE_GOTO 13 00013 STACK_PUSH reg#4 00014 STACK_PUSH 1 <FT_DOUBLE> 00015 CALL_FUNCTION abs(1 <FT_DOUBLE>) -> reg#5 00016 STACK_POP 1 00017 IF_FALSE_GOTO 18 00018 STACK_PUSH reg#5 00019 CALL_FUNCTION max(reg#5, reg#4) -> reg#3 00020 STACK_POP 2 00021 IF_FALSE_GOTO 24 00022 DIVIDE reg#0 / reg#3 -> reg#6 00023 ANY_LT reg#6 < 0.01 <FT_DOUBLE> 00024 RETURN We now use a stack to pass arguments to the function. The stack is implemented as a list of lists (list of registers). Arguments may still be non-existent to functions (this is a feature). Functions must check for nil arguments (NULL lists) and handle that case. It's somewhat complicated to allow literal values and test compatibility for different types, both because of lack of type information with unparsed/literal and also because it is an underdeveloped area in the code. In my limited testing it was good enough and useful, further enhancements are left for future work.
2022-04-16 01:42:20 +00:00
stack_push(df, arg1);
break;
case DFVM_STACK_POP:
dfilter: Allow arithmetic expressions as function arguments This allows writing moderately complex expressions, for example a float epsilon test (#16483): Filter: {abs(_ws.ftypes.double - 1) / max(abs(_ws.ftypes.double), abs(1))} < 0.01 Syntax tree: 0 TEST_LT: 1 OP_DIVIDE: 2 FUNCTION(abs#1): 3 OP_SUBTRACT: 4 FIELD(_ws.ftypes.double) 4 FVALUE(1 <FT_DOUBLE>) 2 FUNCTION(max#2): 3 FUNCTION(abs#1): 4 FIELD(_ws.ftypes.double) 3 FUNCTION(abs#1): 4 FVALUE(1 <FT_DOUBLE>) 1 FVALUE(0.01 <FT_DOUBLE>) Instructions: 00000 READ_TREE _ws.ftypes.double -> reg#1 00001 IF_FALSE_GOTO 3 00002 SUBRACT reg#1 - 1 <FT_DOUBLE> -> reg#2 00003 STACK_PUSH reg#2 00004 CALL_FUNCTION abs(reg#2) -> reg#0 00005 STACK_POP 1 00006 IF_FALSE_GOTO 24 00007 READ_TREE _ws.ftypes.double -> reg#1 00008 IF_FALSE_GOTO 9 00009 STACK_PUSH reg#1 00010 CALL_FUNCTION abs(reg#1) -> reg#4 00011 STACK_POP 1 00012 IF_FALSE_GOTO 13 00013 STACK_PUSH reg#4 00014 STACK_PUSH 1 <FT_DOUBLE> 00015 CALL_FUNCTION abs(1 <FT_DOUBLE>) -> reg#5 00016 STACK_POP 1 00017 IF_FALSE_GOTO 18 00018 STACK_PUSH reg#5 00019 CALL_FUNCTION max(reg#5, reg#4) -> reg#3 00020 STACK_POP 2 00021 IF_FALSE_GOTO 24 00022 DIVIDE reg#0 / reg#3 -> reg#6 00023 ANY_LT reg#6 < 0.01 <FT_DOUBLE> 00024 RETURN We now use a stack to pass arguments to the function. The stack is implemented as a list of lists (list of registers). Arguments may still be non-existent to functions (this is a feature). Functions must check for nil arguments (NULL lists) and handle that case. It's somewhat complicated to allow literal values and test compatibility for different types, both because of lack of type information with unparsed/literal and also because it is an underdeveloped area in the code. In my limited testing it was good enough and useful, further enhancements are left for future work.
2022-04-16 01:42:20 +00:00
stack_pop(df, arg1);
break;
case DFVM_SLICE:
mk_slice(df, arg1, arg2, arg3);
break;
case DFVM_LENGTH:
mk_length(df, arg1, arg2);
break;
case DFVM_ALL_EQ:
2022-03-21 12:19:54 +00:00
accum = all_test(df, fvalue_eq, arg1, arg2);
break;
case DFVM_ANY_EQ:
2022-03-21 12:19:54 +00:00
accum = any_test(df, fvalue_eq, arg1, arg2);
dfilter: Fix "!=" relation to be free of contradictions Wireshark defines the relation of equality A == B as A any_eq B <=> An == Bn for at least one An, Bn. More accurately I think this is (formally) an equivalence relation, not true equality. Whichever definition for "==" we choose we must keep the definition of "!=" as !(A == B), otherwise it will lead to logical contradictions like (A == B) AND (A != B) being true. Fix the '!=' relation to match the definition of equality: A != B <=> !(A == B) <=> A all_ne B <=> An != Bn, for every n. This has been the recomended way to write "not equal" for a long time in the documentation, even to the point where != was deprecated, but it just wasn't implemented consistently in the language, which has understandably been a persistent source of confusion. Even a field that is normally well-behaved with "!=" like "ip.src" or "ip.dst" will produce unexpected results with encapsulations like IP-over-IP. The opcode ALL_NE could have been implemented in the compiler instead using NOT and ANY_EQ but I chose to implement it in bytecode. It just seemed more elegant and efficient but the difference was not very significant. Keep around "~=" for any_ne relation, in case someone depends on that, and because we don't have an operator for true equality: A strict_equal B <=> A all_eq B <=> !(A any_ne B). If there is only one value then any_ne and all_ne are the same comparison operation. Implementing this change did not require fixing any tests so it is unlikely the relation "~=" (any_ne) will be very useful. Note that the behaviour of the '<' (less than) comparison relation is a separate, more subtle issue. In the general case the definition of '<' that is used is only a partial order.
2021-10-18 20:07:06 +00:00
break;
case DFVM_ALL_NE:
2022-03-21 12:19:54 +00:00
accum = all_test(df, fvalue_ne, arg1, arg2);
break;
case DFVM_ANY_NE:
2022-03-21 12:19:54 +00:00
accum = any_test(df, fvalue_ne, arg1, arg2);
break;
case DFVM_ALL_GT:
accum = all_test(df, fvalue_gt, arg1, arg2);
break;
case DFVM_ANY_GT:
2022-03-21 12:19:54 +00:00
accum = any_test(df, fvalue_gt, arg1, arg2);
break;
case DFVM_ALL_GE:
accum = all_test(df, fvalue_ge, arg1, arg2);
break;
case DFVM_ANY_GE:
2022-03-21 12:19:54 +00:00
accum = any_test(df, fvalue_ge, arg1, arg2);
break;
case DFVM_ALL_LT:
accum = all_test(df, fvalue_lt, arg1, arg2);
break;
case DFVM_ANY_LT:
2022-03-21 12:19:54 +00:00
accum = any_test(df, fvalue_lt, arg1, arg2);
break;
case DFVM_ALL_LE:
accum = all_test(df, fvalue_le, arg1, arg2);
break;
case DFVM_ANY_LE:
2022-03-21 12:19:54 +00:00
accum = any_test(df, fvalue_le, arg1, arg2);
break;
case DFVM_BITWISE_AND:
mk_binary(df, fvalue_bitwise_and, arg1, arg2, arg3);
break;
case DFVM_ADD:
mk_binary(df, fvalue_add, arg1, arg2, arg3);
break;
case DFVM_SUBTRACT:
mk_binary(df, fvalue_subtract, arg1, arg2, arg3);
break;
case DFVM_MULTIPLY:
mk_binary(df, fvalue_multiply, arg1, arg2, arg3);
break;
case DFVM_DIVIDE:
mk_binary(df, fvalue_divide, arg1, arg2, arg3);
break;
case DFVM_MODULO:
mk_binary(df, fvalue_modulo, arg1, arg2, arg3);
break;
case DFVM_NOT_ALL_ZERO:
accum = !all_test_unary(df, fvalue_is_zero, arg1);
break;
case DFVM_ALL_CONTAINS:
accum = all_test(df, fvalue_contains, arg1, arg2);
break;
case DFVM_ANY_CONTAINS:
2022-03-21 12:19:54 +00:00
accum = any_test(df, fvalue_contains, arg1, arg2);
break;
case DFVM_ALL_MATCHES:
accum = all_matches(df, arg1, arg2);
break;
case DFVM_ANY_MATCHES:
2022-03-21 12:19:54 +00:00
accum = any_matches(df, arg1, arg2);
break;
case DFVM_ALL_IN_RANGE:
accum = all_in_range(df, arg1, arg2, arg3);
break;
case DFVM_ANY_IN_RANGE:
2022-03-21 12:19:54 +00:00
accum = any_in_range(df, arg1, arg2, arg3);
break;
case DFVM_UNARY_MINUS:
mk_minus(df, arg1, arg2);
break;
case DFVM_NOT:
accum = !accum;
break;
case DFVM_RETURN:
free_register_overhead(df);
return accum;
case DFVM_IF_TRUE_GOTO:
if (accum) {
id = arg1->value.numeric;
goto AGAIN;
}
break;
case DFVM_IF_FALSE_GOTO:
if (!accum) {
id = arg1->value.numeric;
goto AGAIN;
}
break;
}
}
ws_assert_not_reached();
}
/*
* Editor modelines - https://www.wireshark.org/tools/modelines.html
*
* Local variables:
* c-basic-offset: 8
* tab-width: 8
* indent-tabs-mode: t
* End:
*
* vi: set shiftwidth=8 tabstop=8 noexpandtab:
* :indentSize=8:tabSize=8:noTabs=false:
*/