dfilter: Allow grouping arithmetical expressions with { }

This removes the limitation of having only two terms in an
arithmetic expression and allows setting the precedence using
curly braces (like any basic calculator).

Our grammar currently does not allow grouping arithmetic expressions
using parenthesis, because boolean expressions and arithmetic
expressions are different and parenthesis are used with the former.
This commit is contained in:
João Valverde 2022-04-04 16:52:11 +01:00
parent 73770c61b4
commit fb9a176587
5 changed files with 180 additions and 128 deletions

View File

@ -399,8 +399,7 @@ have the same number of bytes as the slice itself, as in:
=== Arithmetic operators
Simple arithmetic expressions are available. The following operators are
supported:
Arithmetic expressions are supported with the usual operators:
+ Addition
- Subtraction
@ -413,6 +412,17 @@ equal by one to the source port with the expression:
udp.dstport >= udp.srcport + 1
It is possible to group arithmetic expressions using curly brackets (parenthesis
will not work for this):
tcp.dstport >= 4 * {tcp.srcport + 3}
Do not confuse this usage of curly brackets with set membership.
An unfortunate quirk in the filter syntax is that the subtraction operator
must be preceded by a space character, so "A-B" must be written as "A -B"
or "A - B".
=== Protocol field references
A variable using a sigil with the form ${some.proto.field} is called a field

View File

@ -65,10 +65,8 @@ They previously shipped with Npcap 1.55.
It must be written "0.7" and "7.0" respectively.
** The "bitwise and" operator is now a first-class bit operator, not a boolean operator. In particular this means
it is now possible to mask bits, e.g.: frame[0] & 0x0F == 3.
** Simple arithmetic expressions are supported in filter expressions.
It is now possible to use expressions such as "tcp.dstport >= tcp.srcport + 1", or using field references
to the selected frame: frame.number > ${frame.number} - 5. Note that the last example is only meaningful using
the GUI because TShark has no concept of selected frame.
** Arithmetic is supported for numeric fields with the usual operators: +, -, *, /, %. Arithmetic expressions must be grouped using
curly brackets (not parenthesis).
** Logical AND now has higher precedence than logical OR, in line with most programming languages.
* text2pcap and "Import from Hex Dump":

View File

@ -90,13 +90,14 @@ shifting 3 more symbols. */
%left BITWISE_AND.
%left PLUS MINUS.
%left STAR RSLASH PERCENT.
%nonassoc UNARY_PLUS UNARY_MINUS.
/* Top-level targets */
sentence ::= expr(X). { dfw->st_root = X; }
sentence ::= . { dfw->st_root = NULL; }
expr(X) ::= relation_test(R). { X = R; }
expr(X) ::= logical_test(L). { X = L; }
expr(X) ::= arithmetic_expr(E). { X = E; }
%code {
static stnode_t *
@ -117,56 +118,55 @@ expr(X) ::= logical_test(L). { X = L; }
}
/* Logical tests */
logical_test(T) ::= expr(E) TEST_AND(L) expr(F).
expr(X) ::= expr(Y) TEST_AND(T) expr(Z).
{
T = new_test(dfw, TEST_OP_AND, L);
sttype_test_set2_args(T, E, F);
X = new_test(dfw, TEST_OP_AND, T);
sttype_test_set2_args(X, Y, Z);
}
logical_test(T) ::= expr(E) TEST_OR(L) expr(F).
expr(X) ::= expr(Y) TEST_OR(T) expr(Z).
{
T = new_test(dfw, TEST_OP_OR, L);
sttype_test_set2_args(T, E, F);
X = new_test(dfw, TEST_OP_OR, T);
sttype_test_set2_args(X, Y, Z);
}
logical_test(T) ::= TEST_NOT(L) expr(E).
expr(X) ::= TEST_NOT(T) expr(Y).
{
T = new_test(dfw, TEST_OP_NOT, L);
sttype_test_set1_args(T, E);
X = new_test(dfw, TEST_OP_NOT, T);
sttype_test_set1_args(X, Y);
}
logical_test(T) ::= entity(E).
/* Any expression inside parens is simply that expression */
expr(X) ::= LPAREN expr(Y) RPAREN.
{
T = E;
X = Y;
}
logical_test(T) ::= arithmetic_term(E).
{
T = E;
}
/* Entities, or things that can be compared/tested/checked */
atom(E) ::= STRING(S).
{
E = stnode_new_string(df_lval_value(S), df_lval_value(S));
df_lval_free(S, FALSE);
}
atom(E) ::= CHARCONST(C).
{
E = stnode_new_charconst(df_lval_number(C), df_lval_value(C));
df_lval_free(C, FALSE);
}
atom(E) ::= UNPARSED(U).
{
E = stnode_new_unparsed(df_lval_value(U), df_lval_value(U));
df_lval_free(U, FALSE);
}
atom(E) ::= LITERAL(S).
{
E = stnode_new_literal(df_lval_value(S), df_lval_value(S));
df_lval_free(S, FALSE);
}
atom(E) ::= IDENTIFIER(F).
{
char *name = df_lval_value(F);
@ -177,8 +177,8 @@ atom(E) ::= IDENTIFIER(F).
E = stnode_new(STTYPE_FIELD, hfinfo, name);
df_lval_free(F, FALSE);
}
entity(E) ::= atom(A). { E = A; }
entity(E) ::= REF_OPEN REFERENCE(F) REF_CLOSE.
atom(E) ::= REF_OPEN REFERENCE(F) REF_CLOSE.
{
char *name = df_lval_value(F);
header_field_info *hfinfo = dfilter_resolve_unparsed(dfw, name);
@ -188,69 +188,66 @@ entity(E) ::= REF_OPEN REFERENCE(F) REF_CLOSE.
E = stnode_new(STTYPE_REFERENCE, hfinfo, df_lval_value(F));
df_lval_free(F, FALSE);
}
entity(E) ::= atom(A). { E = A; }
entity(E) ::= range(R). { E = R; }
entity(E) ::= function(F). { E = F; }
bin_op(O) ::= BITWISE_AND(L). { O = new_math(dfw, OP_BITWISE_AND, L); }
bin_op(O) ::= PLUS(L). { O = new_math(dfw, OP_ADD, L); }
bin_op(O) ::= MINUS(L). { O = new_math(dfw, OP_SUBTRACT, L); }
bin_op(O) ::= STAR(L). { O = new_math(dfw, OP_MULTIPLY, L); }
bin_op(O) ::= RSLASH(L). { O = new_math(dfw, OP_DIVIDE, L); }
bin_op(O) ::= PERCENT(L). { O = new_math(dfw, OP_MODULO, L); }
arithmetic_term(T) ::= PLUS entity(N).
arithmetic_expr(T) ::= entity(N).
{
T = N;
}
arithmetic_term(T) ::= MINUS entity(N).
arithmetic_expr(T) ::= PLUS entity(N). [UNARY_PLUS]
{
T = N;
}
arithmetic_expr(T) ::= MINUS entity(N). [UNARY_MINUS]
{
T = stnode_new(STTYPE_ARITHMETIC, NULL, NULL);
sttype_test_set1(T, OP_UNARY_MINUS, N);
}
arithmetic_term(T) ::= entity(F) bin_op(O) entity(M).
arithmetic_expr(T) ::= arithmetic_expr(F) BITWISE_AND(O) arithmetic_expr(M).
{
T = O;
T = new_math(dfw, OP_BITWISE_AND, O);
sttype_test_set2_args(T, F, M);
}
term(T) ::= entity(E). { T = E; }
term(T) ::= arithmetic_term(E). { T = E; }
/* Ranges */
range(R) ::= entity(E) LBRACKET range_node_list(L) RBRACKET.
arithmetic_expr(T) ::= arithmetic_expr(F) PLUS(O) arithmetic_expr(M).
{
R = stnode_new(STTYPE_RANGE, NULL, NULL);
sttype_range_set(R, E, L);
/* Delete the list, but not the drange_nodes that
* the list contains. */
g_slist_free(L);
T = new_math(dfw, OP_ADD, O);
sttype_test_set2_args(T, F, M);
}
range_node_list(L) ::= range_node(D).
arithmetic_expr(T) ::= arithmetic_expr(F) MINUS(O) arithmetic_expr(M).
{
L = g_slist_append(NULL, D);
T = new_math(dfw, OP_SUBTRACT, O);
sttype_test_set2_args(T, F, M);
}
range_node_list(L) ::= range_node_list(P) COMMA range_node(D).
arithmetic_expr(T) ::= arithmetic_expr(F) STAR(O) arithmetic_expr(M).
{
L = g_slist_append(P, D);
T = new_math(dfw, OP_MULTIPLY, O);
sttype_test_set2_args(T, F, M);
}
range_node(D) ::= RANGE(R).
arithmetic_expr(T) ::= arithmetic_expr(F) RSLASH(O) arithmetic_expr(M).
{
char *err = NULL;
T = new_math(dfw, OP_DIVIDE, O);
sttype_test_set2_args(T, F, M);
}
D = drange_node_from_str(df_lval_value(R), &err);
if (err != NULL) {
dfilter_fail(dfw, "%s", err);
g_free(err);
}
arithmetic_expr(T) ::= arithmetic_expr(F) PERCENT(O) arithmetic_expr(M).
{
T = new_math(dfw, OP_MODULO, O);
sttype_test_set2_args(T, F, M);
}
df_lval_free(R, TRUE);
arithmetic_expr(T) ::= LBRACE arithmetic_expr(F) RBRACE.
{
T = F;
}
/* Relational tests */
@ -263,14 +260,14 @@ cmp_op(O) ::= TEST_GE(L). { O = new_test(dfw, TEST_OP_GE, L); }
cmp_op(O) ::= TEST_LT(L). { O = new_test(dfw, TEST_OP_LT, L); }
cmp_op(O) ::= TEST_LE(L). { O = new_test(dfw, TEST_OP_LE, L); }
comparison_test(T) ::= term(E) cmp_op(O) term(F).
comparison_test(T) ::= arithmetic_expr(E) cmp_op(O) arithmetic_expr(F).
{
T = O;
sttype_test_set2_args(O, E, F);
}
/* 'a == b == c' or 'a < b <= c <= d < e' */
comparison_test(T) ::= term(E) cmp_op(O) comparison_test(R).
comparison_test(T) ::= arithmetic_expr(E) cmp_op(O) comparison_test(R).
{
stnode_t *L, *F;
/* for now generate it like E O F TEST_OP_AND F P G, later it could be optimized
@ -292,14 +289,16 @@ comparison_test(T) ::= term(E) cmp_op(O) comparison_test(R).
relation_test(T) ::= comparison_test(C). { T = C; }
/* Does not chain like math comparisons. */
rel_binop(O) ::= TEST_CONTAINS(L). { O = new_test(dfw, TEST_OP_CONTAINS, L); }
rel_binop(O) ::= TEST_MATCHES(L). { O = new_test(dfw, TEST_OP_MATCHES, L); }
relation_test(T) ::= entity(E) rel_binop(O) entity(F).
relation_test(T) ::= entity(E) TEST_CONTAINS(L) entity(F).
{
T = O;
sttype_test_set2_args(O, E, F);
T = new_test(dfw, TEST_OP_CONTAINS, L);
sttype_test_set2_args(T, E, F);
}
relation_test(T) ::= entity(E) TEST_MATCHES(L) entity(F).
{
T = new_test(dfw, TEST_OP_MATCHES, L);
sttype_test_set2_args(T, E, F);
}
relation_test(T) ::= entity(E) TEST_IN(O) set(S).
@ -360,6 +359,41 @@ set_element(N) ::= set_entity(X) DOTDOT set_entity(Y).
N = g_slist_append(N, Y);
}
/* Ranges */
range(R) ::= entity(E) LBRACKET range_node_list(L) RBRACKET.
{
R = stnode_new(STTYPE_RANGE, NULL, NULL);
sttype_range_set(R, E, L);
/* Delete the list, but not the drange_nodes that
* the list contains. */
g_slist_free(L);
}
range_node_list(L) ::= range_node(D).
{
L = g_slist_append(NULL, D);
}
range_node_list(L) ::= range_node_list(P) COMMA range_node(D).
{
L = g_slist_append(P, D);
}
range_node(D) ::= RANGE(R).
{
char *err = NULL;
D = drange_node_from_str(df_lval_value(R), &err);
if (err != NULL) {
dfilter_fail(dfw, "%s", err);
g_free(err);
}
df_lval_free(R, TRUE);
}
/* Functions */
%code {
@ -399,11 +433,3 @@ function_params(P) ::= function_params(L) COMMA entity(E).
{
P = g_slist_append(L, E);
}
/* Any expression inside parens is simply that expression */
expr(X) ::= LPAREN expr(Y) RPAREN.
{
X = Y;
}

View File

@ -43,7 +43,7 @@ check_function(dfwork_t *dfw, stnode_t *st_node);
static
ftenum_t
check_arithmetic_operation(dfwork_t *dfw, stnode_t *st_node, ftenum_t lhs_ftype);
check_arithmetic_expr(dfwork_t *dfw, stnode_t *st_node, ftenum_t lhs_ftype);
static fvalue_t *
mk_fvalue_from_val_string(dfwork_t *dfw, header_field_info *hfinfo, const char *s);
@ -525,7 +525,6 @@ check_exists(dfwork_t *dfw, stnode_t *st_arg1)
break;
case STTYPE_REFERENCE:
case STTYPE_STRING:
case STTYPE_UNPARSED:
case STTYPE_LITERAL:
case STTYPE_CHARCONST:
FAIL(dfw, "%s is neither a field nor a protocol name.",
@ -548,7 +547,13 @@ check_exists(dfwork_t *dfw, stnode_t *st_arg1)
FAIL(dfw, "You cannot test whether a function is present.");
break;
default:
case STTYPE_SET:
case STTYPE_UNPARSED:
case STTYPE_UNINITIALIZED:
case STTYPE_NUM_TYPES:
case STTYPE_TEST:
case STTYPE_FVALUE:
case STTYPE_PCRE:
ws_assert_not_reached();
}
}
@ -759,7 +764,7 @@ again:
ws_assert(st_op == TEST_OP_MATCHES);
}
else if (type2 == STTYPE_ARITHMETIC) {
ftype2 = check_arithmetic_operation(dfw, st_arg2, ftype1);
ftype2 = check_arithmetic_expr(dfw, st_arg2, ftype1);
if (!compatible_ftypes(ftype1, ftype2)) {
FAIL(dfw, "%s and %s are not of compatible types.",
@ -854,7 +859,7 @@ again:
ws_assert(st_op == TEST_OP_MATCHES);
}
else if (type2 == STTYPE_ARITHMETIC) {
ftype2 = check_arithmetic_operation(dfw, st_arg2, FT_BYTES);
ftype2 = check_arithmetic_expr(dfw, st_arg2, FT_BYTES);
if (!compatible_ftypes(FT_BYTES, ftype2)) {
FAIL(dfw, "%s and %s are not of compatible types.",
@ -972,7 +977,7 @@ again:
ws_assert(st_op == TEST_OP_MATCHES);
}
else if (type2 == STTYPE_ARITHMETIC) {
ftype2 = check_arithmetic_operation(dfw, st_arg2, ftype1);
ftype2 = check_arithmetic_expr(dfw, st_arg2, ftype1);
if (!compatible_ftypes(ftype1, ftype2)) {
FAIL(dfw, "%s and %s are not of compatible types.",
@ -999,7 +1004,7 @@ check_relation_LHS_ARITHMETIC(dfwork_t *dfw, test_op_t st_op _U_,
LOG_NODE(st_node);
check_arithmetic_operation(dfw, st_arg1, FT_NONE);
check_arithmetic_expr(dfw, st_arg1, FT_NONE);
sttype_test_get(st_arg1, NULL, &entity, NULL);
entity_type = stnode_type_id(entity);
@ -1013,6 +1018,9 @@ check_relation_LHS_ARITHMETIC(dfwork_t *dfw, test_op_t st_op _U_,
else if (entity_type == STTYPE_RANGE) {
check_relation_LHS_RANGE(dfw, st_op, can_func, allow_partial_value, st_node, entity, st_arg2);
}
else if (entity_type == STTYPE_ARITHMETIC) {
check_relation_LHS_ARITHMETIC(dfw, st_op, can_func, allow_partial_value, st_node, entity, st_arg2);
}
else {
ws_assert_not_reached();
}
@ -1230,12 +1238,13 @@ check_test(dfwork_t *dfw, stnode_t *st_node)
}
ftenum_t
check_arithmetic_entity(dfwork_t *dfw, FtypeCanFunc can_func, test_op_t st_op,
stnode_t *st_node, stnode_t *st_arg, ftenum_t lhs_ftype)
check_arithmetic_entity(dfwork_t *dfw, stnode_t *st_arg, ftenum_t lhs_ftype)
{
sttype_id_t type;
ftenum_t ftype;
LOG_NODE(st_arg);
/* lhs_ftype variable determines the type for this entity. If LHS type
* is none we must have been passed an entity with a definite type
* (field, function, etc). */
@ -1249,70 +1258,47 @@ check_arithmetic_entity(dfwork_t *dfw, FtypeCanFunc can_func, test_op_t st_op,
fvalue_t *fvalue = dfilter_fvalue_from_literal(dfw, lhs_ftype, st_arg, FALSE, NULL);
stnode_replace(st_arg, STTYPE_FVALUE, fvalue);
ftype = fvalue_type_ftenum(fvalue);
if (!can_func(ftype)) {
FAIL(dfw, "%s (%s) is not a valid arithmetic operand for %s.",
stnode_todisplay(st_arg),
fvalue_type_name(fvalue),
stnode_todisplay(st_node));
}
if (st_op == OP_UNARY_MINUS) {
/* Pre-compute constant unary minus result */
char *err_msg;
fvalue_t *new_fv = fvalue_unary_minus(fvalue, &err_msg);
if (new_fv == NULL) {
dfilter_fail(dfw, "%s: %s", stnode_todisplay(st_arg), err_msg);
g_free(err_msg);
THROW(TypeError);
}
/* Replaces unary operator with result */
stnode_replace(st_node, STTYPE_FVALUE, new_fv);
}
}
else if (type == STTYPE_FIELD || type == STTYPE_REFERENCE) {
header_field_info *hfinfo = stnode_data(st_arg);
ftype = hfinfo->type;
if (!can_func(ftype)) {
FAIL(dfw, "%s (type=%s) is not a valid arithmetic operand for %s.",
hfinfo->abbrev, ftype_pretty_name(ftype),
stnode_todisplay(st_node));
}
}
else if (type == STTYPE_FUNCTION) {
check_function(dfw, st_arg);
df_func_def_t *funcdef = sttype_function_funcdef(st_arg);
ftype = funcdef->retval_ftype;
if (!can_func(ftype)) {
FAIL(dfw, "Function %s (type=%s) is not a valid arithmetic operand for %s.",
funcdef->name, ftype_pretty_name(ftype),
stnode_todisplay(st_node));
}
}
else if (type == STTYPE_RANGE) {
check_drange_sanity(dfw, st_arg);
ftype = FT_BYTES;
}
else if (type == STTYPE_FVALUE) {
ftype = fvalue_type_ftenum(stnode_data(st_arg));
}
else {
FAIL(dfw, "%s is not a valid arithmetic operand for %s.",
stnode_todisplay(st_arg),
stnode_todisplay(st_node));
FAIL(dfw, "%s is not a valid arithmetic operand",
stnode_todisplay(st_arg));
}
return ftype;
}
ftenum_t
check_arithmetic_operation(dfwork_t *dfw, stnode_t *st_node, ftenum_t lhs_ftype)
check_arithmetic_expr(dfwork_t *dfw, stnode_t *st_node, ftenum_t lhs_ftype)
{
test_op_t st_op;
stnode_t *st_arg1, *st_arg2;
ftenum_t ftype1, ftype2;
FtypeCanFunc can_func = NULL;
LOG_NODE(st_node);
if (stnode_type_id(st_node) != STTYPE_ARITHMETIC) {
return check_arithmetic_entity(dfw, st_node, lhs_ftype);
}
sttype_test_get(st_node, &st_op, &st_arg1, &st_arg2);
resolve_unparsed(dfw, st_arg1);
@ -1321,9 +1307,27 @@ check_arithmetic_operation(dfwork_t *dfw, stnode_t *st_node, ftenum_t lhs_ftype)
FAIL(dfw, "Constant arithmetic expression on the LHS is invalid.");
}
if (st_op == OP_UNARY_MINUS) {
ftype1 = check_arithmetic_entity(dfw, st_arg1, lhs_ftype);
if (stnode_type_id(st_arg1) == STTYPE_FVALUE) {
/* Pre-compute constant unary minus result */
char *err_msg;
fvalue_t *new_fv = fvalue_unary_minus(stnode_data(st_arg1), &err_msg);
if (new_fv == NULL) {
dfilter_fail(dfw, "%s: %s", stnode_todisplay(st_arg1), err_msg);
g_free(err_msg);
THROW(TypeError);
}
/* Replaces unary operator with result */
stnode_replace(st_node, STTYPE_FVALUE, new_fv);
}
return ftype1;
}
ftype1 = check_arithmetic_expr(dfw, st_arg1, lhs_ftype);
ftype2 = check_arithmetic_expr(dfw, st_arg2, ftype1);
switch (st_op) {
case OP_UNARY_MINUS:
return check_arithmetic_entity(dfw, ftype_can_unary_minus, st_op, st_node, st_arg1, lhs_ftype);
case OP_ADD:
can_func = ftype_can_add;
break;
@ -1346,14 +1350,20 @@ check_arithmetic_operation(dfwork_t *dfw, stnode_t *st_node, ftenum_t lhs_ftype)
ws_assert_not_reached();
}
ftype1 = check_arithmetic_entity(dfw, can_func, st_op, st_node, st_arg1, lhs_ftype);
ftype2 = check_arithmetic_entity(dfw, can_func, st_op, st_node, st_arg2, ftype1);
if (!can_func(ftype1)) {
FAIL(dfw, "%s cannot %s.",
ftype_name(ftype1), stnode_todisplay(st_node));
}
if (!can_func(ftype2)) {
FAIL(dfw, "%s cannot %s.",
ftype_name(ftype2), stnode_todisplay(st_node));
}
if (!compatible_ftypes(ftype1, ftype2)) {
FAIL(dfw, "%s and %s are not type compatible.",
stnode_todisplay(st_arg1), stnode_todisplay(st_arg2));
}
return ftype1;
}
@ -1369,7 +1379,7 @@ semcheck(dfwork_t *dfw, stnode_t *st_node)
check_test(dfw, st_node);
break;
case STTYPE_ARITHMETIC:
check_arithmetic_operation(dfw, st_node, FT_NONE);
check_arithmetic_expr(dfw, st_node, FT_NONE);
break;
default:
check_exists(dfw, st_node);

View File

@ -262,3 +262,11 @@ class case_arithmetic(unittest.TestCase):
def test_sub_4(self, checkDFilterCount):
dfilter = "udp.length == ip.len - 20"
checkDFilterCount(dfilter, 4)
def test_expr_1(self, checkDFilterCount):
dfilter = 'udp.port * { 10 / {5 - 4} } == udp.port * { {50 + 50} / 2 - 40 }'
checkDFilterCount(dfilter, 4)
def test_expr_2(self, checkDFilterCount):
dfilter = 'udp.dstport * { udp.srcport / {5 - 4} } == udp.srcport * { 2 * udp.dstport - 68 }'
checkDFilterCount(dfilter, 2)