wireshark/epan/dfilter/semcheck.c

1457 lines
38 KiB
C
Raw Normal View History

/*
* Wireshark - Network traffic analyzer
* By Gerald Combs <gerald@wireshark.org>
* Copyright 2001 Gerald Combs
*
* SPDX-License-Identifier: GPL-2.0-or-later
*/
#include "config.h"
#define WS_LOG_DOMAIN LOG_DOMAIN_DFILTER
#include <string.h>
#include "dfilter-int.h"
#include "semcheck.h"
#include "syntax-tree.h"
#include "sttype-field.h"
#include "sttype-slice.h"
#include "sttype-op.h"
#include "sttype-set.h"
#include "sttype-function.h"
#include "sttype-pointer.h"
#include <epan/exceptions.h>
#include <epan/packet.h>
2021-06-18 18:21:42 +00:00
#include <wsutil/ws_assert.h>
#include <wsutil/wslog.h>
2021-06-18 18:21:42 +00:00
#include <ftypes/ftypes.h>
#define FAIL(dfw, node, ...) \
do { \
ws_noisy("Semantic check failed here."); \
dfilter_fail_throw(dfw, DF_ERROR_GENERIC, stnode_location(node), __VA_ARGS__); \
} while (0)
typedef gboolean (*FtypeCanFunc)(enum ftenum);
static ftenum_t
check_arithmetic_LHS(dfwork_t *dfw, stnode_op_t st_op,
stnode_t *st_node, stnode_t *st_arg1, stnode_t *st_arg2,
ftenum_t lhs_ftype, int commute);
static void
check_relation(dfwork_t *dfw, stnode_op_t st_op,
FtypeCanFunc can_func, gboolean allow_partial_value,
stnode_t *st_node, stnode_t *st_arg1, stnode_t *st_arg2,
int commute);
static void
semcheck(dfwork_t *dfw, stnode_t *st_node);
static fvalue_t *
mk_fvalue_from_val_string(dfwork_t *dfw, header_field_info *hfinfo, const char *s,
df_loc_t loc);
/* Compares to ftenum_t's and decides if they're
* compatible or not (if they're the same basic type) */
gboolean
compatible_ftypes(ftenum_t a, ftenum_t b)
{
switch (a) {
case FT_NONE:
case FT_BOOLEAN:
case FT_PROTOCOL:
case FT_ABSOLUTE_TIME:
case FT_RELATIVE_TIME:
case FT_IEEE_11073_SFLOAT:
case FT_IEEE_11073_FLOAT:
case FT_IPv4:
case FT_IPv6:
return a == b;
case FT_FLOAT: /* XXX - should be able to compare with INT */
case FT_DOUBLE: /* XXX - should be able to compare with INT */
switch (b) {
case FT_FLOAT:
case FT_DOUBLE:
return TRUE;
default:
return FALSE;
}
case FT_ETHER:
case FT_BYTES:
case FT_UINT_BYTES:
case FT_GUID:
case FT_OID:
case FT_AX25:
case FT_VINES:
case FT_FCWWN:
case FT_REL_OID:
case FT_SYSTEM_ID:
return (b == FT_ETHER || b == FT_BYTES || b == FT_UINT_BYTES || b == FT_GUID || b == FT_OID || b == FT_AX25 || b == FT_VINES || b == FT_FCWWN || b == FT_REL_OID || b == FT_SYSTEM_ID);
case FT_UINT8:
case FT_UINT16:
case FT_UINT24:
case FT_UINT32:
case FT_CHAR:
case FT_FRAMENUM:
case FT_IPXNET:
return ftype_can_val_to_uinteger(b);
case FT_UINT40:
case FT_UINT48:
case FT_UINT56:
case FT_UINT64:
case FT_EUI64:
return ftype_can_val_to_uinteger64(b);
case FT_INT8:
case FT_INT16:
case FT_INT24:
case FT_INT32:
return ftype_can_val_to_sinteger(b);
case FT_INT40:
case FT_INT48:
case FT_INT56:
case FT_INT64:
return ftype_can_val_to_sinteger64(b);
case FT_STRING:
case FT_STRINGZ:
case FT_UINT_STRING:
case FT_STRINGZPAD:
case FT_STRINGZTRUNC:
switch (b) {
case FT_STRING:
case FT_STRINGZ:
case FT_UINT_STRING:
case FT_STRINGZPAD:
case FT_STRINGZTRUNC:
return TRUE;
default:
return FALSE;
}
case FT_NUM_TYPES:
2021-06-18 18:21:42 +00:00
ws_assert_not_reached();
}
2021-06-18 18:21:42 +00:00
ws_assert_not_reached();
return FALSE;
}
/* Don't set the error message if it's already set. */
#define SET_ERROR(dfw, str) \
do { \
if ((str) != NULL && (dfw)->error.msg == NULL) { \
(dfw)->error.msg = str; \
(dfw)->error.code = DF_ERROR_GENERIC; \
} \
else { \
g_free(str); \
} \
} while (0)
/* Gets an fvalue from a string, and sets the error message on failure. */
WS_RETNONNULL
fvalue_t*
dfilter_fvalue_from_literal(dfwork_t *dfw, ftenum_t ftype, stnode_t *st,
gboolean allow_partial_value, header_field_info *hfinfo_value_string)
{
fvalue_t *fv;
const char *s = stnode_data(st);
gchar *error_message = NULL;
fv = fvalue_from_literal(ftype, s, allow_partial_value, &error_message);
SET_ERROR(dfw, error_message);
if (fv == NULL && hfinfo_value_string) {
/* check value_string */
fv = mk_fvalue_from_val_string(dfw, hfinfo_value_string, s, stnode_location(st));
/*
* Ignore previous errors if this can be mapped
* to an item from value_string.
*/
if (fv) {
dfw_error_clear(&dfw->error);
}
}
if (fv == NULL) {
dfw_set_error_location(dfw, stnode_location(st));
THROW(TypeError);
}
return fv;
}
/* Gets an fvalue from a string, and sets the error message on failure. */
WS_RETNONNULL
fvalue_t *
dfilter_fvalue_from_string(dfwork_t *dfw, ftenum_t ftype, stnode_t *st,
header_field_info *hfinfo_value_string)
{
fvalue_t *fv;
const GString *gs = stnode_string(st);
gchar *error_message = NULL;
fv = fvalue_from_string(ftype, gs->str, gs->len, &error_message);
SET_ERROR(dfw, error_message);
if (fv == NULL && hfinfo_value_string) {
fv = mk_fvalue_from_val_string(dfw, hfinfo_value_string, gs->str, stnode_location(st));
/*
* Ignore previous errors if this can be mapped
* to an item from value_string.
*/
if (fv) {
dfw_error_clear(&dfw->error);
}
}
if (fv == NULL) {
dfw_set_error_location(dfw, stnode_location(st));
THROW(TypeError);
}
return fv;
}
/* Creates a FT_UINT32 fvalue with a given value. */
static fvalue_t*
mk_uint32_fvalue(guint32 val)
{
fvalue_t *fv;
fv = fvalue_new(FT_UINT32);
fvalue_set_uinteger(fv, val);
return fv;
}
/* Creates a FT_UINT64 fvalue with a given value. */
static fvalue_t*
mk_uint64_fvalue(guint64 val)
{
fvalue_t *fv;
fv = fvalue_new(FT_UINT64);
fvalue_set_uinteger64(fv, val);
return fv;
}
/* Try to make an fvalue from a string using a value_string or true_false_string.
* This works only for ftypes that are integers. Returns the created fvalue_t*
* or NULL if impossible. */
static fvalue_t*
mk_fvalue_from_val_string(dfwork_t *dfw, header_field_info *hfinfo, const char *s,
df_loc_t loc)
{
static const true_false_string default_tf = { "True", "False" };
const true_false_string *tf = &default_tf;
/* Early return? */
switch(hfinfo->type) {
case FT_NONE:
case FT_PROTOCOL:
case FT_FLOAT:
case FT_DOUBLE:
case FT_IEEE_11073_SFLOAT:
case FT_IEEE_11073_FLOAT:
case FT_ABSOLUTE_TIME:
case FT_RELATIVE_TIME:
case FT_IPv4:
case FT_IPv6:
case FT_IPXNET:
case FT_AX25:
case FT_VINES:
case FT_FCWWN:
case FT_ETHER:
case FT_BYTES:
case FT_UINT_BYTES:
case FT_STRING:
case FT_STRINGZ:
case FT_UINT_STRING:
case FT_STRINGZPAD:
case FT_STRINGZTRUNC:
case FT_EUI64:
case FT_GUID:
case FT_OID:
case FT_REL_OID:
case FT_SYSTEM_ID:
case FT_FRAMENUM: /* hfinfo->strings contains ft_framenum_type_t, not strings */
return NULL;
case FT_BOOLEAN:
case FT_CHAR:
case FT_UINT8:
case FT_UINT16:
case FT_UINT24:
case FT_UINT32:
case FT_UINT40:
case FT_UINT48:
case FT_UINT56:
case FT_UINT64:
case FT_INT8:
case FT_INT16:
case FT_INT24:
case FT_INT32:
case FT_INT40:
case FT_INT48:
case FT_INT56:
case FT_INT64:
break;
case FT_NUM_TYPES:
2021-06-18 18:21:42 +00:00
ws_assert_not_reached();
}
/* TRUE/FALSE *always* exist for FT_BOOLEAN. */
if (hfinfo->type == FT_BOOLEAN) {
if (hfinfo->strings) {
tf = (const true_false_string *)hfinfo->strings;
}
if (g_ascii_strcasecmp(s, tf->true_string) == 0) {
return mk_uint64_fvalue(TRUE);
}
else if (g_ascii_strcasecmp(s, tf->false_string) == 0) {
return mk_uint64_fvalue(FALSE);
}
else {
/*
* Prefer this error message to whatever error message
* has already been set.
*/
dfw_error_clear(&dfw->error);
dfilter_fail(dfw, DF_ERROR_GENERIC, loc, "\"%s\" cannot be found among the possible values for %s.",
s, hfinfo->abbrev);
return NULL;
}
}
/* Do val_strings exist? */
if (!hfinfo->strings) {
dfilter_fail(dfw, DF_ERROR_GENERIC, loc, "%s cannot accept strings as values.",
hfinfo->abbrev);
return NULL;
}
/* Reset the error message, since *something* interesting will happen,
* and the error message will be more interesting than any error message
* I happen to have now. */
dfw_error_clear(&dfw->error);
if (hfinfo->display & BASE_RANGE_STRING) {
dfilter_fail(dfw, DF_ERROR_GENERIC, loc, "\"%s\" cannot accept [range] strings as values.",
hfinfo->abbrev);
}
else if (hfinfo->display & BASE_VAL64_STRING) {
const val64_string *vals = (const val64_string *)hfinfo->strings;
while (vals->strptr != NULL) {
if (g_ascii_strcasecmp(s, vals->strptr) == 0) {
return mk_uint64_fvalue(vals->value);
}
vals++;
}
dfilter_fail(dfw, DF_ERROR_GENERIC, loc, "\"%s\" cannot be found among the possible values for %s.",
s, hfinfo->abbrev);
}
else if (hfinfo->display == BASE_CUSTOM) {
/* If a user wants to match against a custom string, we would
* somehow have to have the integer value here to pass it in
* to the custom-display function. But we don't have an
* integer, we have the string they're trying to match.
* -><-
*/
dfilter_fail(dfw, DF_ERROR_GENERIC, loc, "\"%s\" cannot accept [custom] strings as values.",
hfinfo->abbrev);
}
else {
const value_string *vals = (const value_string *)hfinfo->strings;
if (hfinfo->display & BASE_EXT_STRING)
vals = VALUE_STRING_EXT_VS_P((const value_string_ext *) vals);
while (vals->strptr != NULL) {
if (g_ascii_strcasecmp(s, vals->strptr) == 0) {
return mk_uint32_fvalue(vals->value);
}
vals++;
}
dfilter_fail(dfw, DF_ERROR_GENERIC, loc, "\"%s\" cannot be found among the possible values for %s.",
s, hfinfo->abbrev);
}
return NULL;
}
static gboolean
is_bytes_type(enum ftenum type)
{
switch(type) {
case FT_AX25:
case FT_VINES:
case FT_FCWWN:
case FT_ETHER:
case FT_BYTES:
case FT_UINT_BYTES:
case FT_IPv6:
case FT_GUID:
case FT_OID:
case FT_REL_OID:
case FT_SYSTEM_ID:
return TRUE;
case FT_NONE:
case FT_PROTOCOL:
case FT_FLOAT:
case FT_DOUBLE:
case FT_IEEE_11073_SFLOAT:
case FT_IEEE_11073_FLOAT:
case FT_ABSOLUTE_TIME:
case FT_RELATIVE_TIME:
case FT_IPv4:
case FT_IPXNET:
case FT_STRING:
case FT_STRINGZ:
case FT_UINT_STRING:
case FT_STRINGZPAD:
case FT_STRINGZTRUNC:
case FT_BOOLEAN:
case FT_FRAMENUM:
case FT_CHAR:
case FT_UINT8:
case FT_UINT16:
case FT_UINT24:
case FT_UINT32:
case FT_UINT40:
case FT_UINT48:
case FT_UINT56:
case FT_UINT64:
case FT_INT8:
case FT_INT16:
case FT_INT24:
case FT_INT32:
case FT_INT40:
case FT_INT48:
case FT_INT56:
case FT_INT64:
case FT_EUI64:
return FALSE;
case FT_NUM_TYPES:
2021-06-18 18:21:42 +00:00
ws_assert_not_reached();
}
2021-06-18 18:21:42 +00:00
ws_assert_not_reached();
return FALSE;
}
/* Check the semantics of an existence test. */
static void
check_exists(dfwork_t *dfw, stnode_t *st_arg1)
{
2022-02-27 14:11:50 +00:00
LOG_NODE(st_arg1);
switch (stnode_type_id(st_arg1)) {
case STTYPE_FIELD:
/* This is OK */
break;
dfilter: Refactor macro tree references This replaces the current macro reference system with a completely different implementation. Instead of a macro a reference is a syntax element. A reference is a constant that can be filled in the dfilter code after compilation from an existing protocol tree. It is best understood as a field value that can be read from a fixed tree that is not the frame being filtered. Usually this fixed tree is the currently selected frame when the filter is applied. This allows comparing fields in the filtered frame with fields in the selected frame. Because the field reference syntax uses the same sigil notation as a macro we have to use a heuristic to distinguish them: if the name has a dot it is a field reference, otherwise it is a macro name. The reference is synctatically validated at compile time. There are two main advantages to this implementation (and a couple of minor ones): The protocol tree for each selected frame is only walked if we have a display filter and if the display filter uses references. Also only the actual reference values are copied, intead of loading the entire tree into a hash table (in textual form even). The other advantage is that the reference is tested like a protocol field against all the values in the selected frame (if there is more than one). Currently the reference fields are not "primed" during dissection, so the entire tree is walked to find a particular reference (this is similar to the previous implementation). If the display filter contains a valid reference and the reference is not loaded at the time the filter is run the result is the same as a non existing field for a regular READ_TREE instruction. Fixes #17599.
2022-03-27 14:26:46 +00:00
case STTYPE_REFERENCE:
case STTYPE_STRING:
case STTYPE_LITERAL:
case STTYPE_CHARCONST:
FAIL(dfw, st_arg1, "%s is neither a field nor a protocol name.",
stnode_todisplay(st_arg1));
break;
case STTYPE_FUNCTION:
/* XXX - Maybe we should change functions so they can return fields,
* in which case the 'exist' should be fine. */
FAIL(dfw, st_arg1, "You cannot test whether a function is present.");
break;
case STTYPE_SET:
case STTYPE_UNINITIALIZED:
case STTYPE_NUM_TYPES:
case STTYPE_TEST:
case STTYPE_FVALUE:
case STTYPE_PCRE:
case STTYPE_ARITHMETIC:
case STTYPE_SLICE:
2021-06-18 18:21:42 +00:00
ws_assert_not_reached();
}
}
static void
check_slice_sanity(dfwork_t *dfw, stnode_t *st, ftenum_t lhs_ftype)
{
stnode_t *entity1;
header_field_info *hfinfo1;
ftenum_t ftype1;
2022-02-27 14:11:50 +00:00
LOG_NODE(st);
entity1 = sttype_slice_entity(st);
2021-11-08 01:05:53 +00:00
ws_assert(entity1);
if (stnode_type_id(entity1) == STTYPE_FIELD) {
hfinfo1 = sttype_field_hfinfo(entity1);
ftype1 = hfinfo1->type;
if (!ftype_can_slice(ftype1)) {
FAIL(dfw, entity1, "\"%s\" is a %s and cannot be sliced into a sequence of bytes.",
hfinfo1->abbrev, ftype_pretty_name(ftype1));
}
2021-11-08 01:05:53 +00:00
} else if (stnode_type_id(entity1) == STTYPE_FUNCTION) {
dfilter: Allow arithmetic expressions as function arguments This allows writing moderately complex expressions, for example a float epsilon test (#16483): Filter: {abs(_ws.ftypes.double - 1) / max(abs(_ws.ftypes.double), abs(1))} < 0.01 Syntax tree: 0 TEST_LT: 1 OP_DIVIDE: 2 FUNCTION(abs#1): 3 OP_SUBTRACT: 4 FIELD(_ws.ftypes.double) 4 FVALUE(1 <FT_DOUBLE>) 2 FUNCTION(max#2): 3 FUNCTION(abs#1): 4 FIELD(_ws.ftypes.double) 3 FUNCTION(abs#1): 4 FVALUE(1 <FT_DOUBLE>) 1 FVALUE(0.01 <FT_DOUBLE>) Instructions: 00000 READ_TREE _ws.ftypes.double -> reg#1 00001 IF_FALSE_GOTO 3 00002 SUBRACT reg#1 - 1 <FT_DOUBLE> -> reg#2 00003 STACK_PUSH reg#2 00004 CALL_FUNCTION abs(reg#2) -> reg#0 00005 STACK_POP 1 00006 IF_FALSE_GOTO 24 00007 READ_TREE _ws.ftypes.double -> reg#1 00008 IF_FALSE_GOTO 9 00009 STACK_PUSH reg#1 00010 CALL_FUNCTION abs(reg#1) -> reg#4 00011 STACK_POP 1 00012 IF_FALSE_GOTO 13 00013 STACK_PUSH reg#4 00014 STACK_PUSH 1 <FT_DOUBLE> 00015 CALL_FUNCTION abs(1 <FT_DOUBLE>) -> reg#5 00016 STACK_POP 1 00017 IF_FALSE_GOTO 18 00018 STACK_PUSH reg#5 00019 CALL_FUNCTION max(reg#5, reg#4) -> reg#3 00020 STACK_POP 2 00021 IF_FALSE_GOTO 24 00022 DIVIDE reg#0 / reg#3 -> reg#6 00023 ANY_LT reg#6 < 0.01 <FT_DOUBLE> 00024 RETURN We now use a stack to pass arguments to the function. The stack is implemented as a list of lists (list of registers). Arguments may still be non-existent to functions (this is a feature). Functions must check for nil arguments (NULL lists) and handle that case. It's somewhat complicated to allow literal values and test compatibility for different types, both because of lack of type information with unparsed/literal and also because it is an underdeveloped area in the code. In my limited testing it was good enough and useful, further enhancements are left for future work.
2022-04-16 01:42:20 +00:00
ftype1 = check_function(dfw, entity1, lhs_ftype);
if (!ftype_can_slice(ftype1)) {
FAIL(dfw, entity1, "Return value of function \"%s\" is a %s and cannot be converted into a sequence of bytes.",
sttype_function_name(entity1), ftype_pretty_name(ftype1));
}
} else if (stnode_type_id(entity1) == STTYPE_SLICE) {
/* Should this be rejected instead? */
check_slice_sanity(dfw, entity1, lhs_ftype);
2021-11-08 01:05:53 +00:00
} else {
FAIL(dfw, entity1, "Range is not supported for entity %s",
stnode_todisplay(entity1));
}
}
#define IS_FIELD_ENTITY(ft) \
((ft) == STTYPE_FIELD || \
(ft) == STTYPE_REFERENCE)
static void
convert_to_bytes(stnode_t *arg)
{
stnode_t *entity1;
drange_node *rn;
entity1 = stnode_dup(arg);
rn = drange_node_new();
drange_node_set_start_offset(rn, 0);
drange_node_set_to_the_end(rn);
stnode_replace(arg, STTYPE_SLICE, NULL);
sttype_slice_set1(arg, entity1, rn);
}
dfilter: Allow arithmetic expressions as function arguments This allows writing moderately complex expressions, for example a float epsilon test (#16483): Filter: {abs(_ws.ftypes.double - 1) / max(abs(_ws.ftypes.double), abs(1))} < 0.01 Syntax tree: 0 TEST_LT: 1 OP_DIVIDE: 2 FUNCTION(abs#1): 3 OP_SUBTRACT: 4 FIELD(_ws.ftypes.double) 4 FVALUE(1 <FT_DOUBLE>) 2 FUNCTION(max#2): 3 FUNCTION(abs#1): 4 FIELD(_ws.ftypes.double) 3 FUNCTION(abs#1): 4 FVALUE(1 <FT_DOUBLE>) 1 FVALUE(0.01 <FT_DOUBLE>) Instructions: 00000 READ_TREE _ws.ftypes.double -> reg#1 00001 IF_FALSE_GOTO 3 00002 SUBRACT reg#1 - 1 <FT_DOUBLE> -> reg#2 00003 STACK_PUSH reg#2 00004 CALL_FUNCTION abs(reg#2) -> reg#0 00005 STACK_POP 1 00006 IF_FALSE_GOTO 24 00007 READ_TREE _ws.ftypes.double -> reg#1 00008 IF_FALSE_GOTO 9 00009 STACK_PUSH reg#1 00010 CALL_FUNCTION abs(reg#1) -> reg#4 00011 STACK_POP 1 00012 IF_FALSE_GOTO 13 00013 STACK_PUSH reg#4 00014 STACK_PUSH 1 <FT_DOUBLE> 00015 CALL_FUNCTION abs(1 <FT_DOUBLE>) -> reg#5 00016 STACK_POP 1 00017 IF_FALSE_GOTO 18 00018 STACK_PUSH reg#5 00019 CALL_FUNCTION max(reg#5, reg#4) -> reg#3 00020 STACK_POP 2 00021 IF_FALSE_GOTO 24 00022 DIVIDE reg#0 / reg#3 -> reg#6 00023 ANY_LT reg#6 < 0.01 <FT_DOUBLE> 00024 RETURN We now use a stack to pass arguments to the function. The stack is implemented as a list of lists (list of registers). Arguments may still be non-existent to functions (this is a feature). Functions must check for nil arguments (NULL lists) and handle that case. It's somewhat complicated to allow literal values and test compatibility for different types, both because of lack of type information with unparsed/literal and also because it is an underdeveloped area in the code. In my limited testing it was good enough and useful, further enhancements are left for future work.
2022-04-16 01:42:20 +00:00
ftenum_t
check_function(dfwork_t *dfw, stnode_t *st_node, ftenum_t lhs_ftype)
{
df_func_def_t *funcdef;
GSList *params;
guint nparams;
2022-02-27 14:11:50 +00:00
LOG_NODE(st_node);
funcdef = sttype_function_funcdef(st_node);
params = sttype_function_params(st_node);
nparams = g_slist_length(params);
if (nparams < funcdef->min_nargs) {
FAIL(dfw, st_node, "Function %s needs at least %u arguments.",
funcdef->name, funcdef->min_nargs);
} else if (funcdef->max_nargs > 0 && nparams > funcdef->max_nargs) {
FAIL(dfw, st_node, "Function %s can only accept %u arguments.",
funcdef->name, funcdef->max_nargs);
}
dfilter: Allow arithmetic expressions as function arguments This allows writing moderately complex expressions, for example a float epsilon test (#16483): Filter: {abs(_ws.ftypes.double - 1) / max(abs(_ws.ftypes.double), abs(1))} < 0.01 Syntax tree: 0 TEST_LT: 1 OP_DIVIDE: 2 FUNCTION(abs#1): 3 OP_SUBTRACT: 4 FIELD(_ws.ftypes.double) 4 FVALUE(1 <FT_DOUBLE>) 2 FUNCTION(max#2): 3 FUNCTION(abs#1): 4 FIELD(_ws.ftypes.double) 3 FUNCTION(abs#1): 4 FVALUE(1 <FT_DOUBLE>) 1 FVALUE(0.01 <FT_DOUBLE>) Instructions: 00000 READ_TREE _ws.ftypes.double -> reg#1 00001 IF_FALSE_GOTO 3 00002 SUBRACT reg#1 - 1 <FT_DOUBLE> -> reg#2 00003 STACK_PUSH reg#2 00004 CALL_FUNCTION abs(reg#2) -> reg#0 00005 STACK_POP 1 00006 IF_FALSE_GOTO 24 00007 READ_TREE _ws.ftypes.double -> reg#1 00008 IF_FALSE_GOTO 9 00009 STACK_PUSH reg#1 00010 CALL_FUNCTION abs(reg#1) -> reg#4 00011 STACK_POP 1 00012 IF_FALSE_GOTO 13 00013 STACK_PUSH reg#4 00014 STACK_PUSH 1 <FT_DOUBLE> 00015 CALL_FUNCTION abs(1 <FT_DOUBLE>) -> reg#5 00016 STACK_POP 1 00017 IF_FALSE_GOTO 18 00018 STACK_PUSH reg#5 00019 CALL_FUNCTION max(reg#5, reg#4) -> reg#3 00020 STACK_POP 2 00021 IF_FALSE_GOTO 24 00022 DIVIDE reg#0 / reg#3 -> reg#6 00023 ANY_LT reg#6 < 0.01 <FT_DOUBLE> 00024 RETURN We now use a stack to pass arguments to the function. The stack is implemented as a list of lists (list of registers). Arguments may still be non-existent to functions (this is a feature). Functions must check for nil arguments (NULL lists) and handle that case. It's somewhat complicated to allow literal values and test compatibility for different types, both because of lack of type information with unparsed/literal and also because it is an underdeveloped area in the code. In my limited testing it was good enough and useful, further enhancements are left for future work.
2022-04-16 01:42:20 +00:00
return funcdef->semcheck_param_function(dfw, funcdef->name, lhs_ftype, params,
stnode_location(st_node));
}
WS_RETNONNULL
fvalue_t *
dfilter_fvalue_from_charconst(dfwork_t *dfw, ftenum_t ftype, stnode_t *st)
{
fvalue_t *fvalue;
unsigned long *nump = stnode_data(st);
char *error_message = NULL;
fvalue = fvalue_from_charconst(ftype, *nump, &error_message);
SET_ERROR(dfw, error_message);
if (fvalue == NULL) {
dfw_set_error_location(dfw, stnode_location(st));
THROW(TypeError);
}
return fvalue;
}
/* If the LHS of a relation test is a FIELD, run some checks
* and possibly some modifications of syntax tree nodes. */
static void
check_relation_LHS_FIELD(dfwork_t *dfw, stnode_op_t st_op,
FtypeCanFunc can_func, gboolean allow_partial_value,
stnode_t *st_node,
stnode_t *st_arg1, stnode_t *st_arg2)
{
sttype_id_t type2;
header_field_info *hfinfo1;
ftenum_t ftype1, ftype2;
fvalue_t *fvalue;
2022-02-27 14:11:50 +00:00
LOG_NODE(st_node);
type2 = stnode_type_id(st_arg2);
ws_assert(stnode_type_id(st_arg1) == STTYPE_FIELD ||
stnode_type_id(st_arg1) == STTYPE_REFERENCE);
hfinfo1 = sttype_field_hfinfo(st_arg1);
dfilter: Add support for raw (bytes) addressing mode This adds new syntax to read a field from the tree as bytes, instead of the actual type. This is a useful extension for example to match matformed strings that contain unicode replacement characters. In this case it is not possible to match the raw value of the malformed string field. This extension fills this need and is generic enough that it should be useful in many other situations. The syntax used is to prefix the field name with "@". The following artificial example tests if the HTTP user agent contains a particular invalid UTF-8 sequence: @http.user_agent == "Mozill\xAA" Where simply using "http.user_agent" won't work because the invalid byte sequence will have been replaced with U+FFFD. Considering the following programs: $ dftest '_ws.ftypes.string == "ABC"' Filter: _ws.ftypes.string == "ABC" Syntax tree: 0 TEST_ANY_EQ: 1 FIELD(_ws.ftypes.string <FT_STRING>) 1 FVALUE("ABC" <FT_STRING>) Instructions: 00000 READ_TREE _ws.ftypes.string <FT_STRING> -> reg#0 00001 IF_FALSE_GOTO 3 00002 ANY_EQ reg#0 == "ABC" <FT_STRING> 00003 RETURN $ dftest '@_ws.ftypes.string == "ABC"' Filter: @_ws.ftypes.string == "ABC" Syntax tree: 0 TEST_ANY_EQ: 1 FIELD(_ws.ftypes.string <RAW>) 1 FVALUE(41:42:43 <FT_BYTES>) Instructions: 00000 READ_TREE @_ws.ftypes.string <FT_BYTES> -> reg#0 00001 IF_FALSE_GOTO 3 00002 ANY_EQ reg#0 == 41:42:43 <FT_BYTES> 00003 RETURN In the second case the field has a "raw" type, that equates directly to FT_BYTES, and the field value is read from the protocol raw data.
2022-10-25 03:20:18 +00:00
ftype1 = sttype_field_ftenum(st_arg1);
if (!can_func(ftype1)) {
FAIL(dfw, st_arg1, "%s (type=%s) cannot participate in %s comparison.",
hfinfo1->abbrev, ftype_pretty_name(ftype1),
stnode_todisplay(st_node));
}
if (IS_FIELD_ENTITY(type2)) {
ftype2 = sttype_field_ftenum(st_arg2);
if (!compatible_ftypes(ftype1, ftype2)) {
FAIL(dfw, st_arg2, "%s and %s are not of compatible types.",
stnode_todisplay(st_arg1), stnode_todisplay(st_arg2));
}
/* Do this check even though you'd think that if
* they're compatible, then can_func() would pass. */
if (!can_func(ftype2)) {
FAIL(dfw, st_arg2, "%s (type=%s) cannot participate in specified comparison.",
stnode_todisplay(st_arg2), ftype_pretty_name(ftype2));
}
}
else if (type2 == STTYPE_STRING || type2 == STTYPE_LITERAL) {
dfilter: Require double-quoted strings with "matches" Matches is a special case that looks on the RHS and tries to convert every unparsed value to a string, regardless of the LHS type. This is not how types work in the display filter. Require double-quotes to avoid ambiguity, because matches doesn't follow normal Wireshark display filter type rules. It doesn't need nor benefit from the flexibility provided by unparsed strings in the syntax. For matches the RHS is always a literal strings except if the RHS is also a field name, then it complains of an incompatible type. This is confusing. No type can be compatible because no type rules are ever considered. Every unparsed value is a text string except if it happens to coincide with a field name it also requires double-quoting or it throws a syntax error, just to be difficult. We could remove this odd quirk but requiring double-quotes for regular expressions is a better, more elegant fix. Before: Filter: tcp matches "udp" Constants: 00000 PUT_PCRE udp -> reg#1 Instructions: 00000 READ_TREE tcp -> reg#0 00001 IF-FALSE-GOTO 3 00002 ANY_MATCHES reg#0 matches reg#1 00003 RETURN Filter: tcp matches udp Constants: 00000 PUT_PCRE udp -> reg#1 Instructions: 00000 READ_TREE tcp -> reg#0 00001 IF-FALSE-GOTO 3 00002 ANY_MATCHES reg#0 matches reg#1 00003 RETURN Filter: tcp matches udp.srcport dftest: tcp and udp.srcport are not of compatible types. Filter: tcp matches udp.srcportt Constants: 00000 PUT_PCRE udp.srcportt -> reg#1 Instructions: 00000 READ_TREE tcp -> reg#0 00001 IF-FALSE-GOTO 3 00002 ANY_MATCHES reg#0 matches reg#1 00003 RETURN After: Filter: tcp matches "udp" Constants: 00000 PUT_PCRE udp -> reg#1 Instructions: 00000 READ_TREE tcp -> reg#0 00001 IF-FALSE-GOTO 3 00002 ANY_MATCHES reg#0 matches reg#1 00003 RETURN Filter: tcp matches udp dftest: "udp" was unexpected in this context. Filter: tcp matches udp.srcport dftest: "udp.srcport" was unexpected in this context. Filter: tcp matches udp.srcportt dftest: "udp.srcportt" was unexpected in this context. The error message could still be improved.
2021-10-09 15:40:08 +00:00
/* Skip incompatible fields */
while (hfinfo1->same_name_prev_id != -1 &&
((type2 == STTYPE_STRING && ftype1 != FT_STRING && ftype1!= FT_STRINGZ) ||
(type2 != STTYPE_STRING && (ftype1 == FT_STRING || ftype1== FT_STRINGZ)))) {
hfinfo1 = proto_registrar_get_nth(hfinfo1->same_name_prev_id);
ftype1 = hfinfo1->type;
}
if (type2 == STTYPE_STRING) {
dfilter: Require double-quoted strings with "matches" Matches is a special case that looks on the RHS and tries to convert every unparsed value to a string, regardless of the LHS type. This is not how types work in the display filter. Require double-quotes to avoid ambiguity, because matches doesn't follow normal Wireshark display filter type rules. It doesn't need nor benefit from the flexibility provided by unparsed strings in the syntax. For matches the RHS is always a literal strings except if the RHS is also a field name, then it complains of an incompatible type. This is confusing. No type can be compatible because no type rules are ever considered. Every unparsed value is a text string except if it happens to coincide with a field name it also requires double-quoting or it throws a syntax error, just to be difficult. We could remove this odd quirk but requiring double-quotes for regular expressions is a better, more elegant fix. Before: Filter: tcp matches "udp" Constants: 00000 PUT_PCRE udp -> reg#1 Instructions: 00000 READ_TREE tcp -> reg#0 00001 IF-FALSE-GOTO 3 00002 ANY_MATCHES reg#0 matches reg#1 00003 RETURN Filter: tcp matches udp Constants: 00000 PUT_PCRE udp -> reg#1 Instructions: 00000 READ_TREE tcp -> reg#0 00001 IF-FALSE-GOTO 3 00002 ANY_MATCHES reg#0 matches reg#1 00003 RETURN Filter: tcp matches udp.srcport dftest: tcp and udp.srcport are not of compatible types. Filter: tcp matches udp.srcportt Constants: 00000 PUT_PCRE udp.srcportt -> reg#1 Instructions: 00000 READ_TREE tcp -> reg#0 00001 IF-FALSE-GOTO 3 00002 ANY_MATCHES reg#0 matches reg#1 00003 RETURN After: Filter: tcp matches "udp" Constants: 00000 PUT_PCRE udp -> reg#1 Instructions: 00000 READ_TREE tcp -> reg#0 00001 IF-FALSE-GOTO 3 00002 ANY_MATCHES reg#0 matches reg#1 00003 RETURN Filter: tcp matches udp dftest: "udp" was unexpected in this context. Filter: tcp matches udp.srcport dftest: "udp.srcport" was unexpected in this context. Filter: tcp matches udp.srcportt dftest: "udp.srcportt" was unexpected in this context. The error message could still be improved.
2021-10-09 15:40:08 +00:00
fvalue = dfilter_fvalue_from_string(dfw, ftype1, st_arg2, hfinfo1);
}
else {
fvalue = dfilter_fvalue_from_literal(dfw, ftype1, st_arg2, allow_partial_value, hfinfo1);
}
dfilter: Require double-quoted strings with "matches" Matches is a special case that looks on the RHS and tries to convert every unparsed value to a string, regardless of the LHS type. This is not how types work in the display filter. Require double-quotes to avoid ambiguity, because matches doesn't follow normal Wireshark display filter type rules. It doesn't need nor benefit from the flexibility provided by unparsed strings in the syntax. For matches the RHS is always a literal strings except if the RHS is also a field name, then it complains of an incompatible type. This is confusing. No type can be compatible because no type rules are ever considered. Every unparsed value is a text string except if it happens to coincide with a field name it also requires double-quoting or it throws a syntax error, just to be difficult. We could remove this odd quirk but requiring double-quotes for regular expressions is a better, more elegant fix. Before: Filter: tcp matches "udp" Constants: 00000 PUT_PCRE udp -> reg#1 Instructions: 00000 READ_TREE tcp -> reg#0 00001 IF-FALSE-GOTO 3 00002 ANY_MATCHES reg#0 matches reg#1 00003 RETURN Filter: tcp matches udp Constants: 00000 PUT_PCRE udp -> reg#1 Instructions: 00000 READ_TREE tcp -> reg#0 00001 IF-FALSE-GOTO 3 00002 ANY_MATCHES reg#0 matches reg#1 00003 RETURN Filter: tcp matches udp.srcport dftest: tcp and udp.srcport are not of compatible types. Filter: tcp matches udp.srcportt Constants: 00000 PUT_PCRE udp.srcportt -> reg#1 Instructions: 00000 READ_TREE tcp -> reg#0 00001 IF-FALSE-GOTO 3 00002 ANY_MATCHES reg#0 matches reg#1 00003 RETURN After: Filter: tcp matches "udp" Constants: 00000 PUT_PCRE udp -> reg#1 Instructions: 00000 READ_TREE tcp -> reg#0 00001 IF-FALSE-GOTO 3 00002 ANY_MATCHES reg#0 matches reg#1 00003 RETURN Filter: tcp matches udp dftest: "udp" was unexpected in this context. Filter: tcp matches udp.srcport dftest: "udp.srcport" was unexpected in this context. Filter: tcp matches udp.srcportt dftest: "udp.srcportt" was unexpected in this context. The error message could still be improved.
2021-10-09 15:40:08 +00:00
stnode_replace(st_arg2, STTYPE_FVALUE, fvalue);
}
else if (type2 == STTYPE_CHARCONST) {
fvalue = dfilter_fvalue_from_charconst(dfw, ftype1, st_arg2);
stnode_replace(st_arg2, STTYPE_FVALUE, fvalue);
}
else if (type2 == STTYPE_SLICE) {
check_slice_sanity(dfw, st_arg2, ftype1);
if (!is_bytes_type(ftype1)) {
if (!ftype_can_slice(ftype1)) {
FAIL(dfw, st_arg1, "\"%s\" is a %s and cannot be converted into a sequence of bytes.",
hfinfo1->abbrev,
ftype_pretty_name(ftype1));
}
/* Convert entire field to bytes */
convert_to_bytes(st_arg1);
}
}
else if (type2 == STTYPE_FUNCTION) {
dfilter: Allow arithmetic expressions as function arguments This allows writing moderately complex expressions, for example a float epsilon test (#16483): Filter: {abs(_ws.ftypes.double - 1) / max(abs(_ws.ftypes.double), abs(1))} < 0.01 Syntax tree: 0 TEST_LT: 1 OP_DIVIDE: 2 FUNCTION(abs#1): 3 OP_SUBTRACT: 4 FIELD(_ws.ftypes.double) 4 FVALUE(1 <FT_DOUBLE>) 2 FUNCTION(max#2): 3 FUNCTION(abs#1): 4 FIELD(_ws.ftypes.double) 3 FUNCTION(abs#1): 4 FVALUE(1 <FT_DOUBLE>) 1 FVALUE(0.01 <FT_DOUBLE>) Instructions: 00000 READ_TREE _ws.ftypes.double -> reg#1 00001 IF_FALSE_GOTO 3 00002 SUBRACT reg#1 - 1 <FT_DOUBLE> -> reg#2 00003 STACK_PUSH reg#2 00004 CALL_FUNCTION abs(reg#2) -> reg#0 00005 STACK_POP 1 00006 IF_FALSE_GOTO 24 00007 READ_TREE _ws.ftypes.double -> reg#1 00008 IF_FALSE_GOTO 9 00009 STACK_PUSH reg#1 00010 CALL_FUNCTION abs(reg#1) -> reg#4 00011 STACK_POP 1 00012 IF_FALSE_GOTO 13 00013 STACK_PUSH reg#4 00014 STACK_PUSH 1 <FT_DOUBLE> 00015 CALL_FUNCTION abs(1 <FT_DOUBLE>) -> reg#5 00016 STACK_POP 1 00017 IF_FALSE_GOTO 18 00018 STACK_PUSH reg#5 00019 CALL_FUNCTION max(reg#5, reg#4) -> reg#3 00020 STACK_POP 2 00021 IF_FALSE_GOTO 24 00022 DIVIDE reg#0 / reg#3 -> reg#6 00023 ANY_LT reg#6 < 0.01 <FT_DOUBLE> 00024 RETURN We now use a stack to pass arguments to the function. The stack is implemented as a list of lists (list of registers). Arguments may still be non-existent to functions (this is a feature). Functions must check for nil arguments (NULL lists) and handle that case. It's somewhat complicated to allow literal values and test compatibility for different types, both because of lack of type information with unparsed/literal and also because it is an underdeveloped area in the code. In my limited testing it was good enough and useful, further enhancements are left for future work.
2022-04-16 01:42:20 +00:00
ftype2 = check_function(dfw, st_arg2, ftype1);
if (!compatible_ftypes(ftype1, ftype2)) {
FAIL(dfw, st_arg2, "%s (type=%s) and return value of %s() (type=%s) are not of compatible types.",
hfinfo1->abbrev, ftype_pretty_name(ftype1),
sttype_function_name(st_arg2), ftype_pretty_name(ftype2));
}
if (!can_func(ftype2)) {
FAIL(dfw, st_arg2, "return value of %s() (type=%s) cannot participate in specified comparison.",
sttype_function_name(st_arg2), ftype_pretty_name(ftype2));
}
}
dfilter: Require double-quoted strings with "matches" Matches is a special case that looks on the RHS and tries to convert every unparsed value to a string, regardless of the LHS type. This is not how types work in the display filter. Require double-quotes to avoid ambiguity, because matches doesn't follow normal Wireshark display filter type rules. It doesn't need nor benefit from the flexibility provided by unparsed strings in the syntax. For matches the RHS is always a literal strings except if the RHS is also a field name, then it complains of an incompatible type. This is confusing. No type can be compatible because no type rules are ever considered. Every unparsed value is a text string except if it happens to coincide with a field name it also requires double-quoting or it throws a syntax error, just to be difficult. We could remove this odd quirk but requiring double-quotes for regular expressions is a better, more elegant fix. Before: Filter: tcp matches "udp" Constants: 00000 PUT_PCRE udp -> reg#1 Instructions: 00000 READ_TREE tcp -> reg#0 00001 IF-FALSE-GOTO 3 00002 ANY_MATCHES reg#0 matches reg#1 00003 RETURN Filter: tcp matches udp Constants: 00000 PUT_PCRE udp -> reg#1 Instructions: 00000 READ_TREE tcp -> reg#0 00001 IF-FALSE-GOTO 3 00002 ANY_MATCHES reg#0 matches reg#1 00003 RETURN Filter: tcp matches udp.srcport dftest: tcp and udp.srcport are not of compatible types. Filter: tcp matches udp.srcportt Constants: 00000 PUT_PCRE udp.srcportt -> reg#1 Instructions: 00000 READ_TREE tcp -> reg#0 00001 IF-FALSE-GOTO 3 00002 ANY_MATCHES reg#0 matches reg#1 00003 RETURN After: Filter: tcp matches "udp" Constants: 00000 PUT_PCRE udp -> reg#1 Instructions: 00000 READ_TREE tcp -> reg#0 00001 IF-FALSE-GOTO 3 00002 ANY_MATCHES reg#0 matches reg#1 00003 RETURN Filter: tcp matches udp dftest: "udp" was unexpected in this context. Filter: tcp matches udp.srcport dftest: "udp.srcport" was unexpected in this context. Filter: tcp matches udp.srcportt dftest: "udp.srcportt" was unexpected in this context. The error message could still be improved.
2021-10-09 15:40:08 +00:00
else if (type2 == STTYPE_PCRE) {
ws_assert(st_op == STNODE_OP_MATCHES);
dfilter: Require double-quoted strings with "matches" Matches is a special case that looks on the RHS and tries to convert every unparsed value to a string, regardless of the LHS type. This is not how types work in the display filter. Require double-quotes to avoid ambiguity, because matches doesn't follow normal Wireshark display filter type rules. It doesn't need nor benefit from the flexibility provided by unparsed strings in the syntax. For matches the RHS is always a literal strings except if the RHS is also a field name, then it complains of an incompatible type. This is confusing. No type can be compatible because no type rules are ever considered. Every unparsed value is a text string except if it happens to coincide with a field name it also requires double-quoting or it throws a syntax error, just to be difficult. We could remove this odd quirk but requiring double-quotes for regular expressions is a better, more elegant fix. Before: Filter: tcp matches "udp" Constants: 00000 PUT_PCRE udp -> reg#1 Instructions: 00000 READ_TREE tcp -> reg#0 00001 IF-FALSE-GOTO 3 00002 ANY_MATCHES reg#0 matches reg#1 00003 RETURN Filter: tcp matches udp Constants: 00000 PUT_PCRE udp -> reg#1 Instructions: 00000 READ_TREE tcp -> reg#0 00001 IF-FALSE-GOTO 3 00002 ANY_MATCHES reg#0 matches reg#1 00003 RETURN Filter: tcp matches udp.srcport dftest: tcp and udp.srcport are not of compatible types. Filter: tcp matches udp.srcportt Constants: 00000 PUT_PCRE udp.srcportt -> reg#1 Instructions: 00000 READ_TREE tcp -> reg#0 00001 IF-FALSE-GOTO 3 00002 ANY_MATCHES reg#0 matches reg#1 00003 RETURN After: Filter: tcp matches "udp" Constants: 00000 PUT_PCRE udp -> reg#1 Instructions: 00000 READ_TREE tcp -> reg#0 00001 IF-FALSE-GOTO 3 00002 ANY_MATCHES reg#0 matches reg#1 00003 RETURN Filter: tcp matches udp dftest: "udp" was unexpected in this context. Filter: tcp matches udp.srcport dftest: "udp.srcport" was unexpected in this context. Filter: tcp matches udp.srcportt dftest: "udp.srcportt" was unexpected in this context. The error message could still be improved.
2021-10-09 15:40:08 +00:00
}
else if (type2 == STTYPE_ARITHMETIC) {
ftype2 = check_arithmetic(dfw, st_arg2, ftype1);
if (!compatible_ftypes(ftype1, ftype2)) {
FAIL(dfw, st_arg2, "%s and %s are not of compatible types.",
stnode_todisplay(st_arg1), stnode_todisplay(st_arg2));
}
if (!can_func(ftype2)) {
FAIL(dfw, st_arg2, "%s (type=%s) cannot participate in specified comparison.",
stnode_todisplay(st_arg2), ftype_pretty_name(ftype2));
}
}
else {
2021-06-18 18:21:42 +00:00
ws_assert_not_reached();
}
}
static void
check_relation_LHS_SLICE(dfwork_t *dfw, stnode_op_t st_op,
FtypeCanFunc can_func _U_,
gboolean allow_partial_value,
stnode_t *st_node _U_,
stnode_t *st_arg1, stnode_t *st_arg2)
{
sttype_id_t type2;
ftenum_t ftype2;
fvalue_t *fvalue;
2022-02-27 14:11:50 +00:00
LOG_NODE(st_node);
check_slice_sanity(dfw, st_arg1, FT_NONE);
type2 = stnode_type_id(st_arg2);
if (IS_FIELD_ENTITY(type2)) {
ftype2 = sttype_field_ftenum(st_arg2);
if (!is_bytes_type(ftype2)) {
if (!ftype_can_slice(ftype2)) {
FAIL(dfw, st_arg2, "\"%s\" is a %s and cannot be converted into a sequence of bytes.",
stnode_todisplay(st_arg2),
ftype_pretty_name(ftype2));
}
/* Convert entire field to bytes */
convert_to_bytes(st_arg2);
}
}
else if (type2 == STTYPE_STRING) {
dfilter: Require double-quoted strings with "matches" Matches is a special case that looks on the RHS and tries to convert every unparsed value to a string, regardless of the LHS type. This is not how types work in the display filter. Require double-quotes to avoid ambiguity, because matches doesn't follow normal Wireshark display filter type rules. It doesn't need nor benefit from the flexibility provided by unparsed strings in the syntax. For matches the RHS is always a literal strings except if the RHS is also a field name, then it complains of an incompatible type. This is confusing. No type can be compatible because no type rules are ever considered. Every unparsed value is a text string except if it happens to coincide with a field name it also requires double-quoting or it throws a syntax error, just to be difficult. We could remove this odd quirk but requiring double-quotes for regular expressions is a better, more elegant fix. Before: Filter: tcp matches "udp" Constants: 00000 PUT_PCRE udp -> reg#1 Instructions: 00000 READ_TREE tcp -> reg#0 00001 IF-FALSE-GOTO 3 00002 ANY_MATCHES reg#0 matches reg#1 00003 RETURN Filter: tcp matches udp Constants: 00000 PUT_PCRE udp -> reg#1 Instructions: 00000 READ_TREE tcp -> reg#0 00001 IF-FALSE-GOTO 3 00002 ANY_MATCHES reg#0 matches reg#1 00003 RETURN Filter: tcp matches udp.srcport dftest: tcp and udp.srcport are not of compatible types. Filter: tcp matches udp.srcportt Constants: 00000 PUT_PCRE udp.srcportt -> reg#1 Instructions: 00000 READ_TREE tcp -> reg#0 00001 IF-FALSE-GOTO 3 00002 ANY_MATCHES reg#0 matches reg#1 00003 RETURN After: Filter: tcp matches "udp" Constants: 00000 PUT_PCRE udp -> reg#1 Instructions: 00000 READ_TREE tcp -> reg#0 00001 IF-FALSE-GOTO 3 00002 ANY_MATCHES reg#0 matches reg#1 00003 RETURN Filter: tcp matches udp dftest: "udp" was unexpected in this context. Filter: tcp matches udp.srcport dftest: "udp.srcport" was unexpected in this context. Filter: tcp matches udp.srcportt dftest: "udp.srcportt" was unexpected in this context. The error message could still be improved.
2021-10-09 15:40:08 +00:00
fvalue = dfilter_fvalue_from_string(dfw, FT_BYTES, st_arg2, NULL);
stnode_replace(st_arg2, STTYPE_FVALUE, fvalue);
}
else if (type2 == STTYPE_LITERAL) {
fvalue = dfilter_fvalue_from_literal(dfw, FT_BYTES, st_arg2, allow_partial_value, NULL);
dfilter: Require double-quoted strings with "matches" Matches is a special case that looks on the RHS and tries to convert every unparsed value to a string, regardless of the LHS type. This is not how types work in the display filter. Require double-quotes to avoid ambiguity, because matches doesn't follow normal Wireshark display filter type rules. It doesn't need nor benefit from the flexibility provided by unparsed strings in the syntax. For matches the RHS is always a literal strings except if the RHS is also a field name, then it complains of an incompatible type. This is confusing. No type can be compatible because no type rules are ever considered. Every unparsed value is a text string except if it happens to coincide with a field name it also requires double-quoting or it throws a syntax error, just to be difficult. We could remove this odd quirk but requiring double-quotes for regular expressions is a better, more elegant fix. Before: Filter: tcp matches "udp" Constants: 00000 PUT_PCRE udp -> reg#1 Instructions: 00000 READ_TREE tcp -> reg#0 00001 IF-FALSE-GOTO 3 00002 ANY_MATCHES reg#0 matches reg#1 00003 RETURN Filter: tcp matches udp Constants: 00000 PUT_PCRE udp -> reg#1 Instructions: 00000 READ_TREE tcp -> reg#0 00001 IF-FALSE-GOTO 3 00002 ANY_MATCHES reg#0 matches reg#1 00003 RETURN Filter: tcp matches udp.srcport dftest: tcp and udp.srcport are not of compatible types. Filter: tcp matches udp.srcportt Constants: 00000 PUT_PCRE udp.srcportt -> reg#1 Instructions: 00000 READ_TREE tcp -> reg#0 00001 IF-FALSE-GOTO 3 00002 ANY_MATCHES reg#0 matches reg#1 00003 RETURN After: Filter: tcp matches "udp" Constants: 00000 PUT_PCRE udp -> reg#1 Instructions: 00000 READ_TREE tcp -> reg#0 00001 IF-FALSE-GOTO 3 00002 ANY_MATCHES reg#0 matches reg#1 00003 RETURN Filter: tcp matches udp dftest: "udp" was unexpected in this context. Filter: tcp matches udp.srcport dftest: "udp.srcport" was unexpected in this context. Filter: tcp matches udp.srcportt dftest: "udp.srcportt" was unexpected in this context. The error message could still be improved.
2021-10-09 15:40:08 +00:00
stnode_replace(st_arg2, STTYPE_FVALUE, fvalue);
}
else if (type2 == STTYPE_CHARCONST) {
fvalue = dfilter_fvalue_from_charconst(dfw, FT_BYTES, st_arg2);
stnode_replace(st_arg2, STTYPE_FVALUE, fvalue);
}
else if (type2 == STTYPE_SLICE) {
check_slice_sanity(dfw, st_arg2, FT_BYTES);
}
else if (type2 == STTYPE_FUNCTION) {
dfilter: Allow arithmetic expressions as function arguments This allows writing moderately complex expressions, for example a float epsilon test (#16483): Filter: {abs(_ws.ftypes.double - 1) / max(abs(_ws.ftypes.double), abs(1))} < 0.01 Syntax tree: 0 TEST_LT: 1 OP_DIVIDE: 2 FUNCTION(abs#1): 3 OP_SUBTRACT: 4 FIELD(_ws.ftypes.double) 4 FVALUE(1 <FT_DOUBLE>) 2 FUNCTION(max#2): 3 FUNCTION(abs#1): 4 FIELD(_ws.ftypes.double) 3 FUNCTION(abs#1): 4 FVALUE(1 <FT_DOUBLE>) 1 FVALUE(0.01 <FT_DOUBLE>) Instructions: 00000 READ_TREE _ws.ftypes.double -> reg#1 00001 IF_FALSE_GOTO 3 00002 SUBRACT reg#1 - 1 <FT_DOUBLE> -> reg#2 00003 STACK_PUSH reg#2 00004 CALL_FUNCTION abs(reg#2) -> reg#0 00005 STACK_POP 1 00006 IF_FALSE_GOTO 24 00007 READ_TREE _ws.ftypes.double -> reg#1 00008 IF_FALSE_GOTO 9 00009 STACK_PUSH reg#1 00010 CALL_FUNCTION abs(reg#1) -> reg#4 00011 STACK_POP 1 00012 IF_FALSE_GOTO 13 00013 STACK_PUSH reg#4 00014 STACK_PUSH 1 <FT_DOUBLE> 00015 CALL_FUNCTION abs(1 <FT_DOUBLE>) -> reg#5 00016 STACK_POP 1 00017 IF_FALSE_GOTO 18 00018 STACK_PUSH reg#5 00019 CALL_FUNCTION max(reg#5, reg#4) -> reg#3 00020 STACK_POP 2 00021 IF_FALSE_GOTO 24 00022 DIVIDE reg#0 / reg#3 -> reg#6 00023 ANY_LT reg#6 < 0.01 <FT_DOUBLE> 00024 RETURN We now use a stack to pass arguments to the function. The stack is implemented as a list of lists (list of registers). Arguments may still be non-existent to functions (this is a feature). Functions must check for nil arguments (NULL lists) and handle that case. It's somewhat complicated to allow literal values and test compatibility for different types, both because of lack of type information with unparsed/literal and also because it is an underdeveloped area in the code. In my limited testing it was good enough and useful, further enhancements are left for future work.
2022-04-16 01:42:20 +00:00
ftype2 = check_function(dfw, st_arg2, FT_BYTES);
if (!is_bytes_type(ftype2)) {
if (!ftype_can_slice(ftype2)) {
FAIL(dfw, st_arg2, "Return value of function \"%s\" is a %s and cannot be converted into a sequence of bytes.",
sttype_function_name(st_arg2),
ftype_pretty_name(ftype2));
}
/* Convert function result to bytes */
convert_to_bytes(st_arg2);
}
}
dfilter: Require double-quoted strings with "matches" Matches is a special case that looks on the RHS and tries to convert every unparsed value to a string, regardless of the LHS type. This is not how types work in the display filter. Require double-quotes to avoid ambiguity, because matches doesn't follow normal Wireshark display filter type rules. It doesn't need nor benefit from the flexibility provided by unparsed strings in the syntax. For matches the RHS is always a literal strings except if the RHS is also a field name, then it complains of an incompatible type. This is confusing. No type can be compatible because no type rules are ever considered. Every unparsed value is a text string except if it happens to coincide with a field name it also requires double-quoting or it throws a syntax error, just to be difficult. We could remove this odd quirk but requiring double-quotes for regular expressions is a better, more elegant fix. Before: Filter: tcp matches "udp" Constants: 00000 PUT_PCRE udp -> reg#1 Instructions: 00000 READ_TREE tcp -> reg#0 00001 IF-FALSE-GOTO 3 00002 ANY_MATCHES reg#0 matches reg#1 00003 RETURN Filter: tcp matches udp Constants: 00000 PUT_PCRE udp -> reg#1 Instructions: 00000 READ_TREE tcp -> reg#0 00001 IF-FALSE-GOTO 3 00002 ANY_MATCHES reg#0 matches reg#1 00003 RETURN Filter: tcp matches udp.srcport dftest: tcp and udp.srcport are not of compatible types. Filter: tcp matches udp.srcportt Constants: 00000 PUT_PCRE udp.srcportt -> reg#1 Instructions: 00000 READ_TREE tcp -> reg#0 00001 IF-FALSE-GOTO 3 00002 ANY_MATCHES reg#0 matches reg#1 00003 RETURN After: Filter: tcp matches "udp" Constants: 00000 PUT_PCRE udp -> reg#1 Instructions: 00000 READ_TREE tcp -> reg#0 00001 IF-FALSE-GOTO 3 00002 ANY_MATCHES reg#0 matches reg#1 00003 RETURN Filter: tcp matches udp dftest: "udp" was unexpected in this context. Filter: tcp matches udp.srcport dftest: "udp.srcport" was unexpected in this context. Filter: tcp matches udp.srcportt dftest: "udp.srcportt" was unexpected in this context. The error message could still be improved.
2021-10-09 15:40:08 +00:00
else if (type2 == STTYPE_PCRE) {
ws_assert(st_op == STNODE_OP_MATCHES);
dfilter: Require double-quoted strings with "matches" Matches is a special case that looks on the RHS and tries to convert every unparsed value to a string, regardless of the LHS type. This is not how types work in the display filter. Require double-quotes to avoid ambiguity, because matches doesn't follow normal Wireshark display filter type rules. It doesn't need nor benefit from the flexibility provided by unparsed strings in the syntax. For matches the RHS is always a literal strings except if the RHS is also a field name, then it complains of an incompatible type. This is confusing. No type can be compatible because no type rules are ever considered. Every unparsed value is a text string except if it happens to coincide with a field name it also requires double-quoting or it throws a syntax error, just to be difficult. We could remove this odd quirk but requiring double-quotes for regular expressions is a better, more elegant fix. Before: Filter: tcp matches "udp" Constants: 00000 PUT_PCRE udp -> reg#1 Instructions: 00000 READ_TREE tcp -> reg#0 00001 IF-FALSE-GOTO 3 00002 ANY_MATCHES reg#0 matches reg#1 00003 RETURN Filter: tcp matches udp Constants: 00000 PUT_PCRE udp -> reg#1 Instructions: 00000 READ_TREE tcp -> reg#0 00001 IF-FALSE-GOTO 3 00002 ANY_MATCHES reg#0 matches reg#1 00003 RETURN Filter: tcp matches udp.srcport dftest: tcp and udp.srcport are not of compatible types. Filter: tcp matches udp.srcportt Constants: 00000 PUT_PCRE udp.srcportt -> reg#1 Instructions: 00000 READ_TREE tcp -> reg#0 00001 IF-FALSE-GOTO 3 00002 ANY_MATCHES reg#0 matches reg#1 00003 RETURN After: Filter: tcp matches "udp" Constants: 00000 PUT_PCRE udp -> reg#1 Instructions: 00000 READ_TREE tcp -> reg#0 00001 IF-FALSE-GOTO 3 00002 ANY_MATCHES reg#0 matches reg#1 00003 RETURN Filter: tcp matches udp dftest: "udp" was unexpected in this context. Filter: tcp matches udp.srcport dftest: "udp.srcport" was unexpected in this context. Filter: tcp matches udp.srcportt dftest: "udp.srcportt" was unexpected in this context. The error message could still be improved.
2021-10-09 15:40:08 +00:00
}
else if (type2 == STTYPE_ARITHMETIC) {
ftype2 = check_arithmetic(dfw, st_arg2, FT_BYTES);
if (!compatible_ftypes(FT_BYTES, ftype2)) {
FAIL(dfw, st_arg2, "%s and %s are not of compatible types.",
stnode_todisplay(st_arg1), stnode_todisplay(st_arg2));
}
if (!can_func(ftype2)) {
FAIL(dfw, st_arg2, "%s (type=%s) cannot participate in specified comparison.",
stnode_todisplay(st_arg2), ftype_pretty_name(ftype2));
}
}
else {
2021-06-18 18:21:42 +00:00
ws_assert_not_reached();
}
}
/* If the LHS of a relation test is a FUNCTION, run some checks
* and possibly some modifications of syntax tree nodes. */
static void
check_relation_LHS_FUNCTION(dfwork_t *dfw, stnode_op_t st_op,
FtypeCanFunc can_func, gboolean allow_partial_value,
stnode_t *st_node, stnode_t *st_arg1, stnode_t *st_arg2,
int commute)
{
sttype_id_t type2;
ftenum_t ftype1, ftype2;
fvalue_t *fvalue;
2022-02-27 14:11:50 +00:00
LOG_NODE(st_node);
dfilter: Allow arithmetic expressions as function arguments This allows writing moderately complex expressions, for example a float epsilon test (#16483): Filter: {abs(_ws.ftypes.double - 1) / max(abs(_ws.ftypes.double), abs(1))} < 0.01 Syntax tree: 0 TEST_LT: 1 OP_DIVIDE: 2 FUNCTION(abs#1): 3 OP_SUBTRACT: 4 FIELD(_ws.ftypes.double) 4 FVALUE(1 <FT_DOUBLE>) 2 FUNCTION(max#2): 3 FUNCTION(abs#1): 4 FIELD(_ws.ftypes.double) 3 FUNCTION(abs#1): 4 FVALUE(1 <FT_DOUBLE>) 1 FVALUE(0.01 <FT_DOUBLE>) Instructions: 00000 READ_TREE _ws.ftypes.double -> reg#1 00001 IF_FALSE_GOTO 3 00002 SUBRACT reg#1 - 1 <FT_DOUBLE> -> reg#2 00003 STACK_PUSH reg#2 00004 CALL_FUNCTION abs(reg#2) -> reg#0 00005 STACK_POP 1 00006 IF_FALSE_GOTO 24 00007 READ_TREE _ws.ftypes.double -> reg#1 00008 IF_FALSE_GOTO 9 00009 STACK_PUSH reg#1 00010 CALL_FUNCTION abs(reg#1) -> reg#4 00011 STACK_POP 1 00012 IF_FALSE_GOTO 13 00013 STACK_PUSH reg#4 00014 STACK_PUSH 1 <FT_DOUBLE> 00015 CALL_FUNCTION abs(1 <FT_DOUBLE>) -> reg#5 00016 STACK_POP 1 00017 IF_FALSE_GOTO 18 00018 STACK_PUSH reg#5 00019 CALL_FUNCTION max(reg#5, reg#4) -> reg#3 00020 STACK_POP 2 00021 IF_FALSE_GOTO 24 00022 DIVIDE reg#0 / reg#3 -> reg#6 00023 ANY_LT reg#6 < 0.01 <FT_DOUBLE> 00024 RETURN We now use a stack to pass arguments to the function. The stack is implemented as a list of lists (list of registers). Arguments may still be non-existent to functions (this is a feature). Functions must check for nil arguments (NULL lists) and handle that case. It's somewhat complicated to allow literal values and test compatibility for different types, both because of lack of type information with unparsed/literal and also because it is an underdeveloped area in the code. In my limited testing it was good enough and useful, further enhancements are left for future work.
2022-04-16 01:42:20 +00:00
ftype1 = check_function(dfw, st_arg1, FT_NONE);
if (ftype1 == FT_NONE) {
check_relation(dfw, st_op, can_func, allow_partial_value,
st_node, st_arg2, st_arg1, commute - 1);
return;
}
if (!can_func(ftype1)) {
FAIL(dfw, st_arg1, "Function %s (type=%s) cannot participate in %s comparison.",
sttype_function_name(st_arg1), ftype_pretty_name(ftype1),
stnode_todisplay(st_node));
}
type2 = stnode_type_id(st_arg2);
if (IS_FIELD_ENTITY(type2)) {
ftype2 = sttype_field_ftenum(st_arg2);
if (!compatible_ftypes(ftype1, ftype2)) {
FAIL(dfw, st_arg2, "Function %s and %s are not of compatible types.",
sttype_function_name(st_arg2), stnode_todisplay(st_arg2));
}
/* Do this check even though you'd think that if
* they're compatible, then can_func() would pass. */
if (!can_func(ftype2)) {
FAIL(dfw, st_arg2, "%s (type=%s) cannot participate in specified comparison.",
stnode_todisplay(st_arg2), ftype_pretty_name(ftype2));
}
}
else if (type2 == STTYPE_STRING) {
dfilter: Require double-quoted strings with "matches" Matches is a special case that looks on the RHS and tries to convert every unparsed value to a string, regardless of the LHS type. This is not how types work in the display filter. Require double-quotes to avoid ambiguity, because matches doesn't follow normal Wireshark display filter type rules. It doesn't need nor benefit from the flexibility provided by unparsed strings in the syntax. For matches the RHS is always a literal strings except if the RHS is also a field name, then it complains of an incompatible type. This is confusing. No type can be compatible because no type rules are ever considered. Every unparsed value is a text string except if it happens to coincide with a field name it also requires double-quoting or it throws a syntax error, just to be difficult. We could remove this odd quirk but requiring double-quotes for regular expressions is a better, more elegant fix. Before: Filter: tcp matches "udp" Constants: 00000 PUT_PCRE udp -> reg#1 Instructions: 00000 READ_TREE tcp -> reg#0 00001 IF-FALSE-GOTO 3 00002 ANY_MATCHES reg#0 matches reg#1 00003 RETURN Filter: tcp matches udp Constants: 00000 PUT_PCRE udp -> reg#1 Instructions: 00000 READ_TREE tcp -> reg#0 00001 IF-FALSE-GOTO 3 00002 ANY_MATCHES reg#0 matches reg#1 00003 RETURN Filter: tcp matches udp.srcport dftest: tcp and udp.srcport are not of compatible types. Filter: tcp matches udp.srcportt Constants: 00000 PUT_PCRE udp.srcportt -> reg#1 Instructions: 00000 READ_TREE tcp -> reg#0 00001 IF-FALSE-GOTO 3 00002 ANY_MATCHES reg#0 matches reg#1 00003 RETURN After: Filter: tcp matches "udp" Constants: 00000 PUT_PCRE udp -> reg#1 Instructions: 00000 READ_TREE tcp -> reg#0 00001 IF-FALSE-GOTO 3 00002 ANY_MATCHES reg#0 matches reg#1 00003 RETURN Filter: tcp matches udp dftest: "udp" was unexpected in this context. Filter: tcp matches udp.srcport dftest: "udp.srcport" was unexpected in this context. Filter: tcp matches udp.srcportt dftest: "udp.srcportt" was unexpected in this context. The error message could still be improved.
2021-10-09 15:40:08 +00:00
fvalue = dfilter_fvalue_from_string(dfw, ftype1, st_arg2, NULL);
stnode_replace(st_arg2, STTYPE_FVALUE, fvalue);
}
else if (type2 == STTYPE_LITERAL) {
fvalue = dfilter_fvalue_from_literal(dfw, ftype1, st_arg2, allow_partial_value, NULL);
dfilter: Require double-quoted strings with "matches" Matches is a special case that looks on the RHS and tries to convert every unparsed value to a string, regardless of the LHS type. This is not how types work in the display filter. Require double-quotes to avoid ambiguity, because matches doesn't follow normal Wireshark display filter type rules. It doesn't need nor benefit from the flexibility provided by unparsed strings in the syntax. For matches the RHS is always a literal strings except if the RHS is also a field name, then it complains of an incompatible type. This is confusing. No type can be compatible because no type rules are ever considered. Every unparsed value is a text string except if it happens to coincide with a field name it also requires double-quoting or it throws a syntax error, just to be difficult. We could remove this odd quirk but requiring double-quotes for regular expressions is a better, more elegant fix. Before: Filter: tcp matches "udp" Constants: 00000 PUT_PCRE udp -> reg#1 Instructions: 00000 READ_TREE tcp -> reg#0 00001 IF-FALSE-GOTO 3 00002 ANY_MATCHES reg#0 matches reg#1 00003 RETURN Filter: tcp matches udp Constants: 00000 PUT_PCRE udp -> reg#1 Instructions: 00000 READ_TREE tcp -> reg#0 00001 IF-FALSE-GOTO 3 00002 ANY_MATCHES reg#0 matches reg#1 00003 RETURN Filter: tcp matches udp.srcport dftest: tcp and udp.srcport are not of compatible types. Filter: tcp matches udp.srcportt Constants: 00000 PUT_PCRE udp.srcportt -> reg#1 Instructions: 00000 READ_TREE tcp -> reg#0 00001 IF-FALSE-GOTO 3 00002 ANY_MATCHES reg#0 matches reg#1 00003 RETURN After: Filter: tcp matches "udp" Constants: 00000 PUT_PCRE udp -> reg#1 Instructions: 00000 READ_TREE tcp -> reg#0 00001 IF-FALSE-GOTO 3 00002 ANY_MATCHES reg#0 matches reg#1 00003 RETURN Filter: tcp matches udp dftest: "udp" was unexpected in this context. Filter: tcp matches udp.srcport dftest: "udp.srcport" was unexpected in this context. Filter: tcp matches udp.srcportt dftest: "udp.srcportt" was unexpected in this context. The error message could still be improved.
2021-10-09 15:40:08 +00:00
stnode_replace(st_arg2, STTYPE_FVALUE, fvalue);
}
else if (type2 == STTYPE_CHARCONST) {
fvalue = dfilter_fvalue_from_charconst(dfw, ftype1, st_arg2);
stnode_replace(st_arg2, STTYPE_FVALUE, fvalue);
}
else if (type2 == STTYPE_SLICE) {
check_slice_sanity(dfw, st_arg2, ftype1);
if (!is_bytes_type(ftype1)) {
if (!ftype_can_slice(ftype1)) {
FAIL(dfw, st_arg1, "Function \"%s\" is a %s and cannot be converted into a sequence of bytes.",
sttype_function_name(st_arg1),
ftype_pretty_name(ftype1));
}
/* Convert function result to bytes */
convert_to_bytes(st_arg1);
}
}
else if (type2 == STTYPE_FUNCTION) {
dfilter: Allow arithmetic expressions as function arguments This allows writing moderately complex expressions, for example a float epsilon test (#16483): Filter: {abs(_ws.ftypes.double - 1) / max(abs(_ws.ftypes.double), abs(1))} < 0.01 Syntax tree: 0 TEST_LT: 1 OP_DIVIDE: 2 FUNCTION(abs#1): 3 OP_SUBTRACT: 4 FIELD(_ws.ftypes.double) 4 FVALUE(1 <FT_DOUBLE>) 2 FUNCTION(max#2): 3 FUNCTION(abs#1): 4 FIELD(_ws.ftypes.double) 3 FUNCTION(abs#1): 4 FVALUE(1 <FT_DOUBLE>) 1 FVALUE(0.01 <FT_DOUBLE>) Instructions: 00000 READ_TREE _ws.ftypes.double -> reg#1 00001 IF_FALSE_GOTO 3 00002 SUBRACT reg#1 - 1 <FT_DOUBLE> -> reg#2 00003 STACK_PUSH reg#2 00004 CALL_FUNCTION abs(reg#2) -> reg#0 00005 STACK_POP 1 00006 IF_FALSE_GOTO 24 00007 READ_TREE _ws.ftypes.double -> reg#1 00008 IF_FALSE_GOTO 9 00009 STACK_PUSH reg#1 00010 CALL_FUNCTION abs(reg#1) -> reg#4 00011 STACK_POP 1 00012 IF_FALSE_GOTO 13 00013 STACK_PUSH reg#4 00014 STACK_PUSH 1 <FT_DOUBLE> 00015 CALL_FUNCTION abs(1 <FT_DOUBLE>) -> reg#5 00016 STACK_POP 1 00017 IF_FALSE_GOTO 18 00018 STACK_PUSH reg#5 00019 CALL_FUNCTION max(reg#5, reg#4) -> reg#3 00020 STACK_POP 2 00021 IF_FALSE_GOTO 24 00022 DIVIDE reg#0 / reg#3 -> reg#6 00023 ANY_LT reg#6 < 0.01 <FT_DOUBLE> 00024 RETURN We now use a stack to pass arguments to the function. The stack is implemented as a list of lists (list of registers). Arguments may still be non-existent to functions (this is a feature). Functions must check for nil arguments (NULL lists) and handle that case. It's somewhat complicated to allow literal values and test compatibility for different types, both because of lack of type information with unparsed/literal and also because it is an underdeveloped area in the code. In my limited testing it was good enough and useful, further enhancements are left for future work.
2022-04-16 01:42:20 +00:00
ftype2 = check_function(dfw, st_arg2, ftype1);
if (!compatible_ftypes(ftype1, ftype2)) {
FAIL(dfw, st_arg2, "Return values of function %s (type=%s) and function %s (type=%s) are not of compatible types.",
sttype_function_name(st_arg1), ftype_pretty_name(ftype1), sttype_function_name(st_arg1), ftype_pretty_name(ftype2));
}
/* Do this check even though you'd think that if
* they're compatible, then can_func() would pass. */
if (!can_func(ftype2)) {
FAIL(dfw, st_arg2, "Return value of %s (type=%s) cannot participate in specified comparison.",
sttype_function_name(st_arg2), ftype_pretty_name(ftype2));
}
}
dfilter: Require double-quoted strings with "matches" Matches is a special case that looks on the RHS and tries to convert every unparsed value to a string, regardless of the LHS type. This is not how types work in the display filter. Require double-quotes to avoid ambiguity, because matches doesn't follow normal Wireshark display filter type rules. It doesn't need nor benefit from the flexibility provided by unparsed strings in the syntax. For matches the RHS is always a literal strings except if the RHS is also a field name, then it complains of an incompatible type. This is confusing. No type can be compatible because no type rules are ever considered. Every unparsed value is a text string except if it happens to coincide with a field name it also requires double-quoting or it throws a syntax error, just to be difficult. We could remove this odd quirk but requiring double-quotes for regular expressions is a better, more elegant fix. Before: Filter: tcp matches "udp" Constants: 00000 PUT_PCRE udp -> reg#1 Instructions: 00000 READ_TREE tcp -> reg#0 00001 IF-FALSE-GOTO 3 00002 ANY_MATCHES reg#0 matches reg#1 00003 RETURN Filter: tcp matches udp Constants: 00000 PUT_PCRE udp -> reg#1 Instructions: 00000 READ_TREE tcp -> reg#0 00001 IF-FALSE-GOTO 3 00002 ANY_MATCHES reg#0 matches reg#1 00003 RETURN Filter: tcp matches udp.srcport dftest: tcp and udp.srcport are not of compatible types. Filter: tcp matches udp.srcportt Constants: 00000 PUT_PCRE udp.srcportt -> reg#1 Instructions: 00000 READ_TREE tcp -> reg#0 00001 IF-FALSE-GOTO 3 00002 ANY_MATCHES reg#0 matches reg#1 00003 RETURN After: Filter: tcp matches "udp" Constants: 00000 PUT_PCRE udp -> reg#1 Instructions: 00000 READ_TREE tcp -> reg#0 00001 IF-FALSE-GOTO 3 00002 ANY_MATCHES reg#0 matches reg#1 00003 RETURN Filter: tcp matches udp dftest: "udp" was unexpected in this context. Filter: tcp matches udp.srcport dftest: "udp.srcport" was unexpected in this context. Filter: tcp matches udp.srcportt dftest: "udp.srcportt" was unexpected in this context. The error message could still be improved.
2021-10-09 15:40:08 +00:00
else if (type2 == STTYPE_PCRE) {
ws_assert(st_op == STNODE_OP_MATCHES);
dfilter: Require double-quoted strings with "matches" Matches is a special case that looks on the RHS and tries to convert every unparsed value to a string, regardless of the LHS type. This is not how types work in the display filter. Require double-quotes to avoid ambiguity, because matches doesn't follow normal Wireshark display filter type rules. It doesn't need nor benefit from the flexibility provided by unparsed strings in the syntax. For matches the RHS is always a literal strings except if the RHS is also a field name, then it complains of an incompatible type. This is confusing. No type can be compatible because no type rules are ever considered. Every unparsed value is a text string except if it happens to coincide with a field name it also requires double-quoting or it throws a syntax error, just to be difficult. We could remove this odd quirk but requiring double-quotes for regular expressions is a better, more elegant fix. Before: Filter: tcp matches "udp" Constants: 00000 PUT_PCRE udp -> reg#1 Instructions: 00000 READ_TREE tcp -> reg#0 00001 IF-FALSE-GOTO 3 00002 ANY_MATCHES reg#0 matches reg#1 00003 RETURN Filter: tcp matches udp Constants: 00000 PUT_PCRE udp -> reg#1 Instructions: 00000 READ_TREE tcp -> reg#0 00001 IF-FALSE-GOTO 3 00002 ANY_MATCHES reg#0 matches reg#1 00003 RETURN Filter: tcp matches udp.srcport dftest: tcp and udp.srcport are not of compatible types. Filter: tcp matches udp.srcportt Constants: 00000 PUT_PCRE udp.srcportt -> reg#1 Instructions: 00000 READ_TREE tcp -> reg#0 00001 IF-FALSE-GOTO 3 00002 ANY_MATCHES reg#0 matches reg#1 00003 RETURN After: Filter: tcp matches "udp" Constants: 00000 PUT_PCRE udp -> reg#1 Instructions: 00000 READ_TREE tcp -> reg#0 00001 IF-FALSE-GOTO 3 00002 ANY_MATCHES reg#0 matches reg#1 00003 RETURN Filter: tcp matches udp dftest: "udp" was unexpected in this context. Filter: tcp matches udp.srcport dftest: "udp.srcport" was unexpected in this context. Filter: tcp matches udp.srcportt dftest: "udp.srcportt" was unexpected in this context. The error message could still be improved.
2021-10-09 15:40:08 +00:00
}
else if (type2 == STTYPE_ARITHMETIC) {
ftype2 = check_arithmetic(dfw, st_arg2, ftype1);
if (!compatible_ftypes(ftype1, ftype2)) {
FAIL(dfw, st_arg2, "%s and %s are not of compatible types.",
stnode_todisplay(st_arg1), stnode_todisplay(st_arg2));
}
if (!can_func(ftype2)) {
FAIL(dfw, st_arg2, "%s (type=%s) cannot participate in specified comparison.",
stnode_todisplay(st_arg2), ftype_pretty_name(ftype2));
}
}
else {
2021-06-18 18:21:42 +00:00
ws_assert_not_reached();
}
}
static void
check_relation_LHS_ARITHMETIC(dfwork_t *dfw, stnode_op_t st_op,
FtypeCanFunc can_func, gboolean allow_partial_value,
stnode_t *st_node, stnode_t *st_arg1, stnode_t *st_arg2,
int commute)
{
sttype_id_t type2;
ftenum_t ftype1, ftype2;
fvalue_t *fvalue;
LOG_NODE(st_node);
ftype1 = check_arithmetic(dfw, st_arg1, FT_NONE);
if (ftype1 == FT_NONE) {
check_relation(dfw, st_op, can_func, allow_partial_value,
st_node, st_arg2, st_arg1, commute - 1);
return;
}
if (!can_func(ftype1)) {
FAIL(dfw, st_arg1, "Result with type %s cannot participate in %s comparison.",
ftype_pretty_name(ftype1),
stnode_todisplay(st_node));
}
type2 = stnode_type_id(st_arg2);
if (IS_FIELD_ENTITY(type2)) {
ftype2 = sttype_field_ftenum(st_arg2);
if (!compatible_ftypes(ftype1, ftype2)) {
FAIL(dfw, st_arg2, "%s and %s are not of compatible types.",
stnode_todisplay(st_arg1), stnode_todisplay(st_arg2));
}
if (!can_func(ftype2)) {
FAIL(dfw, st_arg2, "%s (type=%s) cannot participate in specified comparison.",
stnode_todisplay(st_arg2), ftype_pretty_name(ftype2));
}
}
else if (type2 == STTYPE_STRING) {
fvalue = dfilter_fvalue_from_string(dfw, ftype1, st_arg2, NULL);
stnode_replace(st_arg2, STTYPE_FVALUE, fvalue);
}
else if (type2 == STTYPE_LITERAL) {
fvalue = dfilter_fvalue_from_literal(dfw, ftype1, st_arg2, allow_partial_value, NULL);
stnode_replace(st_arg2, STTYPE_FVALUE, fvalue);
}
else if (type2 == STTYPE_CHARCONST) {
fvalue = dfilter_fvalue_from_charconst(dfw, ftype1, st_arg2);
stnode_replace(st_arg2, STTYPE_FVALUE, fvalue);
}
else if (type2 == STTYPE_SLICE) {
check_slice_sanity(dfw, st_arg2, ftype1);
if (!is_bytes_type(ftype1)) {
if (!ftype_can_slice(ftype1)) {
FAIL(dfw, st_arg1, "Result is a %s and cannot be converted into a sequence of bytes.",
ftype_pretty_name(ftype1));
}
/* Convert expression result to bytes */
convert_to_bytes(st_arg1);
}
}
else if (type2 == STTYPE_FUNCTION) {
ftype2 = check_function(dfw, st_arg2, ftype1);
if (!compatible_ftypes(ftype1, ftype2)) {
FAIL(dfw, st_arg2, "Result (type=%s) and return value of %s() (type=%s) are not of compatible types.",
ftype_pretty_name(ftype1),
sttype_function_name(st_arg2), ftype_pretty_name(ftype2));
}
if (!can_func(ftype2)) {
FAIL(dfw, st_arg2, "return value of %s() (type=%s) cannot participate in specified comparison.",
sttype_function_name(st_arg2), ftype_pretty_name(ftype2));
}
}
else if (type2 == STTYPE_PCRE) {
ws_assert(st_op == STNODE_OP_MATCHES);
}
else if (type2 == STTYPE_ARITHMETIC) {
ftype2 = check_arithmetic(dfw, st_arg2, ftype1);
if (!compatible_ftypes(ftype1, ftype2)) {
FAIL(dfw, st_arg2, "%s and %s are not of compatible types.",
stnode_todisplay(st_arg1), stnode_todisplay(st_arg2));
}
if (!can_func(ftype2)) {
FAIL(dfw, st_arg2, "%s (type=%s) cannot participate in specified comparison.",
stnode_todisplay(st_arg2), ftype_pretty_name(ftype2));
}
}
else {
ws_assert_not_reached();
}
}
/* Check the semantics of any relational test. */
static void
check_relation(dfwork_t *dfw, stnode_op_t st_op,
FtypeCanFunc can_func, gboolean allow_partial_value,
2022-12-26 00:40:00 +00:00
stnode_t *st_node, stnode_t *st_arg1, stnode_t *st_arg2,
int commute)
{
2022-02-27 14:11:50 +00:00
LOG_NODE(st_node);
2022-12-26 00:40:00 +00:00
if (commute < 0) {
/* We have already commuted the LHS with the RHS and still
cannot assign a field type to any side of the relation. */
FAIL(dfw, st_node, "Constant expression is invalid.");
}
switch (stnode_type_id(st_arg1)) {
case STTYPE_FIELD:
dfilter: Refactor macro tree references This replaces the current macro reference system with a completely different implementation. Instead of a macro a reference is a syntax element. A reference is a constant that can be filled in the dfilter code after compilation from an existing protocol tree. It is best understood as a field value that can be read from a fixed tree that is not the frame being filtered. Usually this fixed tree is the currently selected frame when the filter is applied. This allows comparing fields in the filtered frame with fields in the selected frame. Because the field reference syntax uses the same sigil notation as a macro we have to use a heuristic to distinguish them: if the name has a dot it is a field reference, otherwise it is a macro name. The reference is synctatically validated at compile time. There are two main advantages to this implementation (and a couple of minor ones): The protocol tree for each selected frame is only walked if we have a display filter and if the display filter uses references. Also only the actual reference values are copied, intead of loading the entire tree into a hash table (in textual form even). The other advantage is that the reference is tested like a protocol field against all the values in the selected frame (if there is more than one). Currently the reference fields are not "primed" during dissection, so the entire tree is walked to find a particular reference (this is similar to the previous implementation). If the display filter contains a valid reference and the reference is not loaded at the time the filter is run the result is the same as a non existing field for a regular READ_TREE instruction. Fixes #17599.
2022-03-27 14:26:46 +00:00
case STTYPE_REFERENCE:
check_relation_LHS_FIELD(dfw, st_op, can_func,
allow_partial_value, st_node, st_arg1, st_arg2);
break;
case STTYPE_SLICE:
check_relation_LHS_SLICE(dfw, st_op, can_func,
allow_partial_value, st_node, st_arg1, st_arg2);
break;
case STTYPE_FUNCTION:
check_relation_LHS_FUNCTION(dfw, st_op, can_func,
allow_partial_value, st_node, st_arg1, st_arg2, commute);
break;
case STTYPE_ARITHMETIC:
check_relation_LHS_ARITHMETIC(dfw, st_op, can_func,
allow_partial_value, st_node, st_arg1, st_arg2, commute);
break;
2022-12-26 00:40:00 +00:00
case STTYPE_LITERAL:
case STTYPE_STRING:
case STTYPE_CHARCONST:
/* We cannot semantically check a relation with literals on the LHS because we
don't have a type to assign them. Commute the LHS with the RHS and retry
the relation semantic check. */
check_relation(dfw, st_op, can_func,
allow_partial_value,st_node, st_arg2, st_arg1, commute - 1);
2022-12-26 00:40:00 +00:00
break;
default:
2022-12-26 00:40:00 +00:00
/* Should not happen. */
FAIL(dfw, st_arg1, "(FIXME) Syntax node type \"%s\" is invalid for relation \"%s\".",
stnode_type_name(st_arg1), stnode_todisplay(st_node));
}
}
static void
check_relation_contains_RHS_FIELD(dfwork_t *dfw, stnode_t *st_node _U_,
stnode_t *st_arg1 _U_, stnode_t *st_arg2)
{
const char *token = stnode_token(st_arg2);
if (token[0] == '.' || token[0] == ':')
return;
header_field_info *hfinfo = sttype_field_hfinfo(st_arg2);
fvalue_t *fvalue = fvalue_from_literal(FT_BYTES, hfinfo->abbrev, FALSE, NULL);
if (fvalue != NULL) {
add_compile_warning(dfw, "Interpreting \"%s\" as \"%s\". Consider writing :%s or .%s",
hfinfo->abbrev, hfinfo->name, hfinfo->abbrev, hfinfo->abbrev);
fvalue_free(fvalue);
}
}
static void
check_relation_contains(dfwork_t *dfw, stnode_t *st_node,
stnode_t *st_arg1, stnode_t *st_arg2)
{
2022-02-27 14:11:50 +00:00
LOG_NODE(st_node);
if (stnode_type_id(st_arg2) == STTYPE_FIELD) {
check_relation_contains_RHS_FIELD(dfw, st_node, st_arg1, st_arg2);
}
switch (stnode_type_id(st_arg1)) {
case STTYPE_FIELD:
dfilter: Refactor macro tree references This replaces the current macro reference system with a completely different implementation. Instead of a macro a reference is a syntax element. A reference is a constant that can be filled in the dfilter code after compilation from an existing protocol tree. It is best understood as a field value that can be read from a fixed tree that is not the frame being filtered. Usually this fixed tree is the currently selected frame when the filter is applied. This allows comparing fields in the filtered frame with fields in the selected frame. Because the field reference syntax uses the same sigil notation as a macro we have to use a heuristic to distinguish them: if the name has a dot it is a field reference, otherwise it is a macro name. The reference is synctatically validated at compile time. There are two main advantages to this implementation (and a couple of minor ones): The protocol tree for each selected frame is only walked if we have a display filter and if the display filter uses references. Also only the actual reference values are copied, intead of loading the entire tree into a hash table (in textual form even). The other advantage is that the reference is tested like a protocol field against all the values in the selected frame (if there is more than one). Currently the reference fields are not "primed" during dissection, so the entire tree is walked to find a particular reference (this is similar to the previous implementation). If the display filter contains a valid reference and the reference is not loaded at the time the filter is run the result is the same as a non existing field for a regular READ_TREE instruction. Fixes #17599.
2022-03-27 14:26:46 +00:00
case STTYPE_REFERENCE:
check_relation_LHS_FIELD(dfw, STNODE_OP_CONTAINS, ftype_can_contains,
TRUE, st_node, st_arg1, st_arg2);
break;
case STTYPE_FUNCTION:
check_relation_LHS_FUNCTION(dfw, STNODE_OP_CONTAINS, ftype_can_contains,
TRUE, st_node, st_arg1, st_arg2, 0);
break;
case STTYPE_SLICE:
check_relation_LHS_SLICE(dfw, STNODE_OP_CONTAINS, ftype_can_contains,
TRUE, st_node, st_arg1, st_arg2);
break;
default:
FAIL(dfw, st_arg1, "Left side of %s expression must be a field or function, not %s.",
stnode_todisplay(st_node), stnode_todisplay(st_arg1));
}
}
static void
check_relation_matches(dfwork_t *dfw, stnode_t *st_node,
stnode_t *st_arg1, stnode_t *st_arg2)
{
2021-11-12 15:55:14 +00:00
ws_regex_t *pcre;
char *errmsg = NULL;
GString *patt;
2022-02-27 14:11:50 +00:00
LOG_NODE(st_node);
if (stnode_type_id(st_arg2) != STTYPE_STRING) {
FAIL(dfw, st_arg2, "Matches requires a double quoted string on the right side.");
}
patt = stnode_string(st_arg2);
ws_debug("Compile regex pattern: %s", stnode_token(st_arg2));
pcre = ws_regex_compile_ex(patt->str, patt->len, &errmsg, WS_REGEX_CASELESS|WS_REGEX_NEVER_UTF);
if (errmsg) {
dfilter_fail(dfw, DF_ERROR_GENERIC, stnode_location(st_arg2), "Regex compilation error: %s.", errmsg);
g_free(errmsg);
THROW(TypeError);
}
stnode_replace(st_arg2, STTYPE_PCRE, pcre);
switch (stnode_type_id(st_arg1)) {
case STTYPE_FIELD:
dfilter: Refactor macro tree references This replaces the current macro reference system with a completely different implementation. Instead of a macro a reference is a syntax element. A reference is a constant that can be filled in the dfilter code after compilation from an existing protocol tree. It is best understood as a field value that can be read from a fixed tree that is not the frame being filtered. Usually this fixed tree is the currently selected frame when the filter is applied. This allows comparing fields in the filtered frame with fields in the selected frame. Because the field reference syntax uses the same sigil notation as a macro we have to use a heuristic to distinguish them: if the name has a dot it is a field reference, otherwise it is a macro name. The reference is synctatically validated at compile time. There are two main advantages to this implementation (and a couple of minor ones): The protocol tree for each selected frame is only walked if we have a display filter and if the display filter uses references. Also only the actual reference values are copied, intead of loading the entire tree into a hash table (in textual form even). The other advantage is that the reference is tested like a protocol field against all the values in the selected frame (if there is more than one). Currently the reference fields are not "primed" during dissection, so the entire tree is walked to find a particular reference (this is similar to the previous implementation). If the display filter contains a valid reference and the reference is not loaded at the time the filter is run the result is the same as a non existing field for a regular READ_TREE instruction. Fixes #17599.
2022-03-27 14:26:46 +00:00
case STTYPE_REFERENCE:
check_relation_LHS_FIELD(dfw, STNODE_OP_MATCHES, ftype_can_matches,
TRUE, st_node, st_arg1, st_arg2);
break;
case STTYPE_FUNCTION:
check_relation_LHS_FUNCTION(dfw, STNODE_OP_MATCHES, ftype_can_matches,
TRUE, st_node, st_arg1, st_arg2, 0);
break;
case STTYPE_SLICE:
check_relation_LHS_SLICE(dfw, STNODE_OP_MATCHES, ftype_can_matches,
TRUE, st_node, st_arg1, st_arg2);
break;
default:
FAIL(dfw, st_arg1, "Left side of %s expression must be a field or function, not %s.",
stnode_todisplay(st_node), stnode_todisplay(st_arg1));
}
}
static void
check_relation_in(dfwork_t *dfw, stnode_t *st_node _U_,
stnode_t *st_arg1, stnode_t *st_arg2)
{
GSList *nodelist;
stnode_t *node_left, *node_right;
2022-02-27 14:11:50 +00:00
LOG_NODE(st_node);
if (stnode_type_id(st_arg1) != STTYPE_FIELD) {
FAIL(dfw, st_arg1, "Only a field may be tested for membership in a set.");
}
/* Checked in the grammar parser. */
ws_assert(stnode_type_id(st_arg2) == STTYPE_SET);
/* Attempt to interpret one element of the set at a time. Each
* element is represented by two items in the list, the element
* value and NULL. Both will be replaced by a lower and upper
* value if the element is a range. */
nodelist = stnode_data(st_arg2);
while (nodelist) {
node_left = nodelist->data;
/* Don't let a range on the RHS affect the LHS field. */
if (stnode_type_id(node_left) == STTYPE_SLICE) {
FAIL(dfw, node_left, "A slice may not appear inside a set.");
break;
}
nodelist = g_slist_next(nodelist);
ws_assert(nodelist);
node_right = nodelist->data;
if (node_right) {
check_relation_LHS_FIELD(dfw, STNODE_OP_GE, ftype_can_cmp,
FALSE, st_node, st_arg1, node_left);
check_relation_LHS_FIELD(dfw, STNODE_OP_LE, ftype_can_cmp,
FALSE, st_node, st_arg1, node_right);
} else {
check_relation_LHS_FIELD(dfw, STNODE_OP_ANY_EQ, ftype_can_eq,
FALSE, st_node, st_arg1, node_left);
}
nodelist = g_slist_next(nodelist);
}
}
/* Check the semantics of any type of TEST */
static void
check_test(dfwork_t *dfw, stnode_t *st_node)
{
stnode_op_t st_op;
stnode_t *st_arg1, *st_arg2;
2022-02-27 14:11:50 +00:00
LOG_NODE(st_node);
sttype_oper_get(st_node, &st_op, &st_arg1, &st_arg2);
switch (st_op) {
case STNODE_OP_UNINITIALIZED:
2021-06-18 18:21:42 +00:00
ws_assert_not_reached();
break;
case STNODE_OP_NOT:
semcheck(dfw, st_arg1);
break;
case STNODE_OP_AND:
case STNODE_OP_OR:
semcheck(dfw, st_arg1);
semcheck(dfw, st_arg2);
break;
case STNODE_OP_ALL_EQ:
case STNODE_OP_ANY_EQ:
case STNODE_OP_ALL_NE:
case STNODE_OP_ANY_NE:
2022-12-26 00:40:00 +00:00
check_relation(dfw, st_op, ftype_can_eq, FALSE, st_node, st_arg1, st_arg2, 1);
dfilter: Fix "!=" relation to be free of contradictions Wireshark defines the relation of equality A == B as A any_eq B <=> An == Bn for at least one An, Bn. More accurately I think this is (formally) an equivalence relation, not true equality. Whichever definition for "==" we choose we must keep the definition of "!=" as !(A == B), otherwise it will lead to logical contradictions like (A == B) AND (A != B) being true. Fix the '!=' relation to match the definition of equality: A != B <=> !(A == B) <=> A all_ne B <=> An != Bn, for every n. This has been the recomended way to write "not equal" for a long time in the documentation, even to the point where != was deprecated, but it just wasn't implemented consistently in the language, which has understandably been a persistent source of confusion. Even a field that is normally well-behaved with "!=" like "ip.src" or "ip.dst" will produce unexpected results with encapsulations like IP-over-IP. The opcode ALL_NE could have been implemented in the compiler instead using NOT and ANY_EQ but I chose to implement it in bytecode. It just seemed more elegant and efficient but the difference was not very significant. Keep around "~=" for any_ne relation, in case someone depends on that, and because we don't have an operator for true equality: A strict_equal B <=> A all_eq B <=> !(A any_ne B). If there is only one value then any_ne and all_ne are the same comparison operation. Implementing this change did not require fixing any tests so it is unlikely the relation "~=" (any_ne) will be very useful. Note that the behaviour of the '<' (less than) comparison relation is a separate, more subtle issue. In the general case the definition of '<' that is used is only a partial order.
2021-10-18 20:07:06 +00:00
break;
case STNODE_OP_GT:
case STNODE_OP_GE:
case STNODE_OP_LT:
case STNODE_OP_LE:
2022-12-26 00:40:00 +00:00
check_relation(dfw, st_op, ftype_can_cmp, FALSE, st_node, st_arg1, st_arg2, 1);
break;
case STNODE_OP_CONTAINS:
check_relation_contains(dfw, st_node, st_arg1, st_arg2);
break;
case STNODE_OP_MATCHES:
check_relation_matches(dfw, st_node, st_arg1, st_arg2);
break;
case STNODE_OP_IN:
check_relation_in(dfw, st_node, st_arg1, st_arg2);
break;
default:
2021-06-18 18:21:42 +00:00
ws_assert_not_reached();
}
}
static const char *
op_to_error_msg(stnode_op_t st_op)
{
switch (st_op) {
case STNODE_OP_UNARY_MINUS:
return "cannot be negated";
case STNODE_OP_ADD:
return "cannot be added";
case STNODE_OP_SUBTRACT:
return "cannot be subtracted";
case STNODE_OP_MULTIPLY:
return "cannot be multiplied";
case STNODE_OP_DIVIDE:
return "cannot be divided";
case STNODE_OP_MODULO:
return "does not support modulo operation";
case STNODE_OP_BITWISE_AND:
return "does not support bitwise AND";
default:
return "cannot FIXME";
}
}
static ftenum_t
check_arithmetic_LHS(dfwork_t *dfw, stnode_op_t st_op,
stnode_t *st_node, stnode_t *st_arg1, stnode_t *st_arg2,
ftenum_t lhs_ftype, int commute)
{
ftenum_t ftype1, ftype2;
FtypeCanFunc can_func = NULL;
LOG_NODE(st_node);
if (commute < 0) {
return FT_NONE;
}
if (st_op == STNODE_OP_UNARY_MINUS) {
ftype1 = check_arithmetic(dfw, st_arg1, lhs_ftype);
if (ftype1 == FT_NONE)
return FT_NONE;
if (!ftype_can_unary_minus(ftype1)) {
FAIL(dfw, st_arg1, "%s %s.",
ftype_name(ftype1), op_to_error_msg(st_op));
}
if (stnode_type_id(st_arg1) == STTYPE_FVALUE) {
/* Pre-compute constant unary minus result */
char *err_msg;
fvalue_t *new_fv = fvalue_unary_minus(stnode_data(st_arg1), &err_msg);
if (new_fv == NULL) {
dfilter_fail(dfw, DF_ERROR_GENERIC, stnode_location(st_arg1),
"%s: %s", stnode_todisplay(st_arg1), err_msg);
g_free(err_msg);
THROW(TypeError);
}
/* Replaces unary operator with result */
stnode_replace(st_node, STTYPE_FVALUE, new_fv);
}
return ftype1;
}
switch (st_op) {
case STNODE_OP_ADD:
can_func = ftype_can_add;
break;
case STNODE_OP_SUBTRACT:
can_func = ftype_can_subtract;
break;
case STNODE_OP_MULTIPLY:
can_func = ftype_can_multiply;
break;
case STNODE_OP_DIVIDE:
can_func = ftype_can_divide;
break;
case STNODE_OP_MODULO:
can_func = ftype_can_modulo;
break;
case STNODE_OP_BITWISE_AND:
can_func = ftype_can_bitwise_and;
break;
default:
ws_assert_not_reached();
}
ftype1 = check_arithmetic(dfw, st_arg1, lhs_ftype);
if (ftype1 == FT_NONE) {
return check_arithmetic_LHS(dfw, st_op, st_node, st_arg2, st_arg1, lhs_ftype, commute - 1);
}
if (!can_func(ftype1)) {
FAIL(dfw, st_arg1, "%s %s.",
ftype_name(ftype1), op_to_error_msg(st_op));
}
ftype2 = check_arithmetic(dfw, st_arg2, ftype1);
if (!can_func(ftype2)) {
FAIL(dfw, st_arg2, "%s %s.",
ftype_name(ftype2), op_to_error_msg(st_op));
}
if (!compatible_ftypes(ftype1, ftype2)) {
FAIL(dfw, st_node, "%s and %s are not compatible.",
ftype_name(ftype1), ftype_name(ftype2));
}
return ftype1;
}
ftenum_t
check_arithmetic(dfwork_t *dfw, stnode_t *st_node, ftenum_t lhs_ftype)
{
sttype_id_t type;
stnode_op_t st_op;
stnode_t *st_arg1, *st_arg2;
ftenum_t ftype;
LOG_NODE(st_node);
type = stnode_type_id(st_node);
switch (type) {
case STTYPE_LITERAL:
if (lhs_ftype != FT_NONE) {
fvalue_t *fvalue = dfilter_fvalue_from_literal(dfw, lhs_ftype, st_node, FALSE, NULL);
stnode_replace(st_node, STTYPE_FVALUE, fvalue);
ftype = fvalue_type_ftenum(fvalue);
}
else {
ftype = FT_NONE;
}
break;
case STTYPE_FIELD:
case STTYPE_REFERENCE:
{
header_field_info *hfinfo = sttype_field_hfinfo(st_node);
ftype = hfinfo->type;
break;
}
case STTYPE_FUNCTION:
ftype = check_function(dfw, st_node, lhs_ftype);
break;
case STTYPE_SLICE:
check_slice_sanity(dfw, st_node, lhs_ftype);
ftype = FT_BYTES;
break;
case STTYPE_FVALUE:
ftype = fvalue_type_ftenum(stnode_data(st_node));
break;
case STTYPE_ARITHMETIC:
sttype_oper_get(st_node, &st_op, &st_arg1, &st_arg2);
ftype = check_arithmetic_LHS(dfw, st_op, st_node, st_arg1, st_arg2, lhs_ftype, 1);
break;
default:
FAIL(dfw, st_node, "%s is not a valid arithmetic operation.",
stnode_todisplay(st_node));
}
return ftype;
}
/* Check the entire syntax tree. */
static void
semcheck(dfwork_t *dfw, stnode_t *st_node)
{
2022-02-27 14:11:50 +00:00
LOG_NODE(st_node);
ftenum_t ftype;
2022-02-27 14:11:50 +00:00
switch (stnode_type_id(st_node)) {
case STTYPE_TEST:
check_test(dfw, st_node);
break;
case STTYPE_ARITHMETIC:
ftype = check_arithmetic(dfw, st_node, FT_NONE);
if (ftype == FT_NONE) {
FAIL(dfw, st_node, "Constant expression is invalid.");
}
break;
case STTYPE_SLICE:
check_slice_sanity(dfw, st_node, FT_NONE);
break;
default:
check_exists(dfw, st_node);
}
}
/* Check the syntax tree for semantic errors, and convert
* some of the nodes into the form they need to be in order to
* later generate the DFVM bytecode. */
gboolean
dfw_semcheck(dfwork_t *dfw)
{
volatile gboolean ok_filter = TRUE;
ws_noisy("Starting semantic check (dfw = %p)", dfw);
/* Instead of having to check for errors at every stage of
* the semantic-checking, the semantic-checking code will
* throw an exception if a problem is found. */
TRY {
semcheck(dfw, dfw->st_root);
}
CATCH(TypeError) {
ok_filter = FALSE;
}
ENDTRY;
ws_noisy("Semantic check (dfw = %p) returns %s",
dfw, ok_filter ? "TRUE" : "FALSE");
return ok_filter;
}
/*
* Editor modelines - https://www.wireshark.org/tools/modelines.html
*
* Local variables:
* c-basic-offset: 8
* tab-width: 8
* indent-tabs-mode: t
* End:
*
* vi: set shiftwidth=8 tabstop=8 noexpandtab:
* :indentSize=8:tabSize=8:noTabs=false:
*/