dfilter: Add RHS bias for literal values

For unparsed values on the RHS of a comparison try
to parse them first as a literal and only then as
a protocol. This is more complicated in code but
should be a use case a lot more common and useful in
practice.

It removes some annoying special cases and applies this
rule consistently to any expression. Consistency is
important otherwise the special cases and exceptions
make the language confusing and difficult to learn.

For values on the LHS the rule remains to first try a
protocol value, then a literal.

Related with issue #17731.
This commit is contained in:
João Valverde 2022-02-20 00:14:24 +00:00 committed by A Wireshark GitLab Utility
parent c4f9d8abda
commit a68b408a9f
6 changed files with 148 additions and 52 deletions

View File

@ -142,13 +142,7 @@ entity(E) ::= CHARCONST(C).
}
entity(E) ::= UNPARSED(U).
{
header_field_info *hfinfo = dfilter_resolve_unparsed(dfw, df_lval_value(U));
if (hfinfo != NULL) {
E = stnode_new(STTYPE_FIELD, hfinfo, df_lval_value(U));
}
else {
E = stnode_new_literal(df_lval_value(U), df_lval_value(U));
}
E = stnode_new_unparsed(df_lval_value(U), df_lval_value(U));
df_lval_free(U, FALSE);
}
entity(E) ::= LITERAL(S).

View File

@ -137,9 +137,8 @@ compatible_ftypes(ftenum_t a, ftenum_t b)
}
/* Gets an fvalue from a string, and sets the error message on failure. */
WS_RETNONNULL
static fvalue_t*
dfilter_fvalue_from_literal(dfwork_t *dfw, ftenum_t ftype, stnode_t *st,
_fvalue_from_literal(dfwork_t *dfw, ftenum_t ftype, stnode_t *st,
gboolean allow_partial_value, header_field_info *hfinfo_value_string)
{
fvalue_t *fv;
@ -160,6 +159,16 @@ dfilter_fvalue_from_literal(dfwork_t *dfw, ftenum_t ftype, stnode_t *st,
dfw->error_message = NULL;
}
}
return fv;
}
/* Gets an fvalue from a string, and sets the error message on failure. */
WS_RETNONNULL
static fvalue_t*
dfilter_fvalue_from_literal(dfwork_t *dfw, ftenum_t ftype, stnode_t *st,
gboolean allow_partial_value, header_field_info *hfinfo_value_string)
{
fvalue_t *fv = _fvalue_from_literal(dfw, ftype, st, allow_partial_value, hfinfo_value_string);
if (fv == NULL)
THROW(TypeError);
return fv;
@ -167,7 +176,7 @@ dfilter_fvalue_from_literal(dfwork_t *dfw, ftenum_t ftype, stnode_t *st,
/* Gets an fvalue from a string, and sets the error message on failure. */
WS_RETNONNULL
static fvalue_t*
static fvalue_t *
dfilter_fvalue_from_string(dfwork_t *dfw, ftenum_t ftype, stnode_t *st,
header_field_info *hfinfo_value_string)
{
@ -192,6 +201,42 @@ dfilter_fvalue_from_string(dfwork_t *dfw, ftenum_t ftype, stnode_t *st,
return fv;
}
static fvalue_t *
dfilter_fvalue_from_unparsed_resolved(dfwork_t *dfw, ftenum_t ftype, stnode_t *st,
gboolean allow_partial_value, header_field_info *hfinfo_value_string)
{
fvalue_t *fv = _fvalue_from_literal(dfw, ftype, st, allow_partial_value, hfinfo_value_string);
if (fv != NULL) {
/* convert to fvalue successfully. */
return fv;
}
header_field_info *hfinfo = dfilter_resolve_unparsed(dfw, stnode_data(st));
if (hfinfo == NULL) {
/* This node is neither a valid fvalue nor a valid field. */
/* XXX Error message already set for literal? */
THROW(TypeError);
}
stnode_replace(st, STTYPE_FIELD, hfinfo);
/* Successfully resolved to a field. */
return NULL;
}
static gboolean
resolve_unparsed(dfwork_t *dfw, stnode_t *st)
{
header_field_info *hfinfo = dfilter_resolve_unparsed(dfw, stnode_data(st));
if (hfinfo != NULL) {
stnode_replace(st, STTYPE_FIELD, hfinfo);
return TRUE;
}
return FALSE;
}
/* Creates a FT_UINT32 fvalue with a given value. */
static fvalue_t*
mk_uint32_fvalue(guint32 val)
@ -430,11 +475,14 @@ check_exists(dfwork_t *dfw, stnode_t *st_arg1)
{
LOG_NODE(st_arg1);
resolve_unparsed(dfw, st_arg1);
switch (stnode_type_id(st_arg1)) {
case STTYPE_FIELD:
/* This is OK */
break;
case STTYPE_STRING:
case STTYPE_UNPARSED:
case STTYPE_LITERAL:
case STTYPE_CHARCONST:
FAIL(dfw, "%s is neither a field nor a protocol name.",
@ -473,6 +521,7 @@ check_drange_sanity(dfwork_t *dfw, stnode_t *st)
entity1 = sttype_range_entity(st);
ws_assert(entity1);
resolve_unparsed(dfw, entity1);
if (stnode_type_id(entity1) == STTYPE_FIELD) {
hfinfo1 = stnode_data(entity1);
@ -496,8 +545,8 @@ check_drange_sanity(dfwork_t *dfw, stnode_t *st)
/* Should this be rejected instead? */
check_drange_sanity(dfw, entity1);
} else {
FAIL(dfw, "Range is not supported for entity %s of type %s",
stnode_todisplay(entity1), stnode_type_name(entity1));
FAIL(dfw, "Range is not supported for entity %s",
stnode_todisplay(entity1));
}
}
@ -540,6 +589,7 @@ check_function(dfwork_t *dfw, stnode_t *st_node)
iparam = 0;
while (params) {
resolve_unparsed(dfw, params->data);
funcdef->semcheck_param_function(dfw, funcdef->name, iparam, params->data);
params = params->next;
iparam++;
@ -577,6 +627,7 @@ check_relation_LHS_FIELD(dfwork_t *dfw, test_op_t st_op,
LOG_NODE(st_node);
again:
type2 = stnode_type_id(st_arg2);
hfinfo1 = stnode_data(st_arg1);
@ -603,7 +654,7 @@ check_relation_LHS_FIELD(dfwork_t *dfw, test_op_t st_op,
hfinfo2->abbrev, ftype_pretty_name(ftype2));
}
}
else if (type2 == STTYPE_STRING || type2 == STTYPE_LITERAL) {
else if (type2 == STTYPE_STRING || type2 == STTYPE_LITERAL || type2 == STTYPE_UNPARSED) {
/* Skip incompatible fields */
while (hfinfo1->same_name_prev_id != -1 &&
((type2 == STTYPE_STRING && ftype1 != FT_STRING && ftype1!= FT_STRINGZ) ||
@ -612,7 +663,14 @@ check_relation_LHS_FIELD(dfwork_t *dfw, test_op_t st_op,
ftype1 = hfinfo1->type;
}
if (type2 == STTYPE_STRING) {
if (type2 == STTYPE_UNPARSED) {
fvalue = dfilter_fvalue_from_unparsed_resolved(dfw, ftype1, st_arg2, allow_partial_value, hfinfo1);
if (fvalue == NULL) {
/* We have a protocol or protocol field. */
goto again;
}
}
else if (type2 == STTYPE_STRING) {
fvalue = dfilter_fvalue_from_string(dfw, ftype1, st_arg2, hfinfo1);
}
else {
@ -676,6 +734,7 @@ check_relation_LHS_STRING(dfwork_t *dfw, test_op_t st_op _U_,
LOG_NODE(st_node);
again:
type2 = stnode_type_id(st_arg2);
if (type2 == STTYPE_FIELD) {
@ -691,8 +750,12 @@ check_relation_LHS_STRING(dfwork_t *dfw, test_op_t st_op _U_,
fvalue = dfilter_fvalue_from_string(dfw, ftype2, st_arg1, hfinfo2);
stnode_replace(st_arg1, STTYPE_FVALUE, fvalue);
}
else if (type2 == STTYPE_STRING || type2 == STTYPE_LITERAL ||
type2 == STTYPE_CHARCONST) {
else if (type2 == STTYPE_STRING || type2 == STTYPE_LITERAL || type2 == STTYPE_UNPARSED ||
type2 == STTYPE_CHARCONST) {
if (type2 == STTYPE_UNPARSED && resolve_unparsed(dfw, st_arg2)) {
/* We have a protocol or protocol field. */
goto again;
}
/* Well now that's silly... */
FAIL(dfw, "Neither %s nor %s are field or protocol names.",
stnode_todisplay(st_arg1),
@ -737,6 +800,7 @@ check_relation_LHS_LITERAL(dfwork_t *dfw, test_op_t st_op _U_,
LOG_NODE(st_node);
again:
type2 = stnode_type_id(st_arg2);
if (type2 == STTYPE_FIELD) {
@ -752,8 +816,12 @@ check_relation_LHS_LITERAL(dfwork_t *dfw, test_op_t st_op _U_,
fvalue = dfilter_fvalue_from_literal(dfw, ftype2, st_arg1, allow_partial_value, hfinfo2);
stnode_replace(st_arg1, STTYPE_FVALUE, fvalue);
}
else if (type2 == STTYPE_STRING || type2 == STTYPE_LITERAL ||
type2 == STTYPE_CHARCONST) {
else if (type2 == STTYPE_STRING || type2 == STTYPE_LITERAL || type2 == STTYPE_UNPARSED ||
type2 == STTYPE_CHARCONST) {
if (type2 == STTYPE_UNPARSED && resolve_unparsed(dfw, st_arg2)) {
/* We have a protocol or protocol field. */
goto again;
}
/* Well now that's silly... */
FAIL(dfw, "Neither %s nor %s are field or protocol names.",
stnode_todisplay(st_arg1),
@ -797,6 +865,7 @@ check_relation_LHS_CHARCONST(dfwork_t *dfw, test_op_t st_op _U_,
LOG_NODE(st_node);
again:
type2 = stnode_type_id(st_arg2);
if (type2 == STTYPE_FIELD) {
@ -812,8 +881,12 @@ check_relation_LHS_CHARCONST(dfwork_t *dfw, test_op_t st_op _U_,
fvalue = dfilter_fvalue_from_charconst(dfw, ftype2, st_arg1);
stnode_replace(st_arg1, STTYPE_FVALUE, fvalue);
}
else if (type2 == STTYPE_STRING || type2 == STTYPE_LITERAL ||
type2 == STTYPE_CHARCONST) {
else if (type2 == STTYPE_STRING || type2 == STTYPE_LITERAL || type2 == STTYPE_UNPARSED ||
type2 == STTYPE_CHARCONST) {
if (type2 == STTYPE_UNPARSED && resolve_unparsed(dfw, st_arg2)) {
/* We have a protocol or protocol field. */
goto again;
}
/* Well now that's silly... */
FAIL(dfw, "Neither %s nor %s are field or protocol names.",
stnode_todisplay(st_arg1),
@ -859,6 +932,7 @@ check_relation_LHS_RANGE(dfwork_t *dfw, test_op_t st_op,
check_drange_sanity(dfw, st_arg1);
again:
type2 = stnode_type_id(st_arg2);
if (type2 == STTYPE_FIELD) {
@ -880,6 +954,12 @@ check_relation_LHS_RANGE(dfwork_t *dfw, test_op_t st_op,
fvalue = dfilter_fvalue_from_string(dfw, FT_BYTES, st_arg2, NULL);
stnode_replace(st_arg2, STTYPE_FVALUE, fvalue);
}
else if (type2 == STTYPE_UNPARSED) {
fvalue = dfilter_fvalue_from_unparsed_resolved(dfw, FT_BYTES, st_arg2, allow_partial_value, NULL);
if (fvalue == NULL)
goto again;
stnode_replace(st_arg2, STTYPE_FVALUE, fvalue);
}
else if (type2 == STTYPE_LITERAL) {
fvalue = dfilter_fvalue_from_literal(dfw, FT_BYTES, st_arg2, allow_partial_value, NULL);
stnode_replace(st_arg2, STTYPE_FVALUE, fvalue);
@ -935,8 +1015,6 @@ check_relation_LHS_FUNCTION(dfwork_t *dfw, test_op_t st_op,
LOG_NODE(st_node);
check_function(dfw, st_arg1);
type2 = stnode_type_id(st_arg2);
funcdef = sttype_function_funcdef(st_arg1);
ftype1 = funcdef->retval_ftype;
@ -946,6 +1024,9 @@ check_relation_LHS_FUNCTION(dfwork_t *dfw, test_op_t st_op,
stnode_todisplay(st_node));
}
again:
type2 = stnode_type_id(st_arg2);
if (type2 == STTYPE_FIELD) {
hfinfo2 = stnode_data(st_arg2);
ftype2 = hfinfo2->type;
@ -965,6 +1046,12 @@ check_relation_LHS_FUNCTION(dfwork_t *dfw, test_op_t st_op,
fvalue = dfilter_fvalue_from_string(dfw, ftype1, st_arg2, NULL);
stnode_replace(st_arg2, STTYPE_FVALUE, fvalue);
}
else if (type2 == STTYPE_UNPARSED) {
fvalue = dfilter_fvalue_from_unparsed_resolved(dfw, ftype1, st_arg2, allow_partial_value, NULL);
if (fvalue == NULL)
goto again;
stnode_replace(st_arg2, STTYPE_FVALUE, fvalue);
}
else if (type2 == STTYPE_LITERAL) {
fvalue = dfilter_fvalue_from_literal(dfw, ftype1, st_arg2, allow_partial_value, NULL);
stnode_replace(st_arg2, STTYPE_FVALUE, fvalue);
@ -1021,6 +1108,14 @@ check_relation(dfwork_t *dfw, test_op_t st_op,
{
LOG_NODE(st_node);
if (!resolve_unparsed(dfw, st_arg1)) {
if (stnode_type_id(st_arg1) == STTYPE_UNPARSED) {
/* Unparsed value does not match a field so treat
* it as a literal. */
stnode_change_type(st_arg1, STTYPE_LITERAL);
}
}
switch (stnode_type_id(st_arg1)) {
case STTYPE_FIELD:
check_relation_LHS_FIELD(dfw, st_op, can_func,
@ -1058,34 +1153,7 @@ check_relation_contains(dfwork_t *dfw, stnode_t *st_node,
{
LOG_NODE(st_node);
/* Protocol can only be on LHS for "contains".
* Check to see if protocol is on RHS, and re-interpret it as LITERAL
* instead. The subsequent functions will parse it according to the
* existing rules for literal unquoted strings.
*
* This catches the case where the user has written "fc" on the RHS,
* probably intending a byte value rather than the fibre channel
* protocol, or similar for a number of other possibilities
* ("dc", "ff", "fefd"), and also catches the case where the user
* has written a generic string on the RHS. (The now literal value
* will be interpreted in a way that matches the LHS; e.g.
* FT_PROTOCOL and FT_BYTES fields expect byte arrays whereas
* FT_STRING[Z][PAD] fields expect strings.)
*
* XXX: Is there a better way to do this in the grammar parser,
* which now determines whether something is a field?
*/
if (stnode_type_id(st_arg2) == STTYPE_FIELD) {
header_field_info *hfinfo = stnode_data(st_arg2);
if (hfinfo->type == FT_PROTOCOL) {
/* Send it through as literal and all the other
* functions will take care of it as if it didn't
* match a protocol string.
*/
stnode_replace(st_arg2, STTYPE_LITERAL, g_strdup(hfinfo->abbrev));
}
}
resolve_unparsed(dfw, st_arg1);
switch (stnode_type_id(st_arg1)) {
case STTYPE_FIELD:
@ -1120,6 +1188,8 @@ check_relation_matches(dfwork_t *dfw, stnode_t *st_node,
LOG_NODE(st_node);
resolve_unparsed(dfw, st_arg1);
if (stnode_type_id(st_arg2) != STTYPE_STRING) {
FAIL(dfw, "Matches requires a double quoted string on the right side.");
}
@ -1168,6 +1238,8 @@ check_relation_in(dfwork_t *dfw, stnode_t *st_node _U_,
LOG_NODE(st_node);
resolve_unparsed(dfw, st_arg1);
if (stnode_type_id(st_arg1) != STTYPE_FIELD) {
FAIL(dfw, "Only a field may be tested for membership in a set.");
}

View File

@ -40,6 +40,15 @@ sttype_register_string(void)
string_tostr
};
static sttype_t unparsed_type = {
STTYPE_UNPARSED,
"UNPARSED",
NULL,
string_free,
string_dup,
string_tostr
};
static sttype_t literal_type = {
STTYPE_LITERAL,
"LITERAL",
@ -50,6 +59,7 @@ sttype_register_string(void)
};
sttype_register(&string_type);
sttype_register(&unparsed_type);
sttype_register(&literal_type);
}

View File

@ -141,6 +141,13 @@ stnode_replace(stnode_t *node, sttype_id_t type_id, gpointer data)
node->repr_token = repr_token;
}
void
stnode_change_type(stnode_t *node, sttype_id_t new_type)
{
/* XXX Check types are "compatible" */
node->type = sttype_lookup(new_type);
}
stnode_t*
stnode_new(sttype_id_t type_id, gpointer data, char *token)
{
@ -170,6 +177,12 @@ stnode_new_string(const char *str, char *token)
return stnode_new(STTYPE_STRING, g_strdup(str), token);
}
stnode_t *
stnode_new_unparsed(const char *str, char *token)
{
return stnode_new(STTYPE_UNPARSED, g_strdup(str), token);
}
stnode_t *
stnode_new_literal(const char *str, char *token)
{

View File

@ -23,6 +23,7 @@ typedef enum {
STTYPE_UNINITIALIZED,
STTYPE_TEST,
STTYPE_LITERAL,
STTYPE_UNPARSED,
STTYPE_STRING,
STTYPE_CHARCONST,
STTYPE_FIELD,
@ -110,6 +111,9 @@ stnode_new_test(test_op_t op, char *token);
stnode_t *
stnode_new_string(const char *str, char *token);
stnode_t *
stnode_new_unparsed(const char *str, char *token);
stnode_t *
stnode_new_literal(const char *str, char *token);
@ -128,6 +132,9 @@ stnode_init(stnode_t *node, sttype_id_t type_id, gpointer data, char *token);
void
stnode_replace(stnode_t *node, sttype_id_t type_id, gpointer data);
void
stnode_change_type(stnode_t *node, sttype_id_t new_type);
void
stnode_free(stnode_t *node);

View File

@ -37,11 +37,11 @@ class case_range(unittest.TestCase):
def test_slice_string_1(self, checkDFilterFail):
dfilter = "frame == \"00\"[1]"
checkDFilterFail(dfilter, "Range is not supported for entity \"00\" of type STRING")
checkDFilterFail(dfilter, "Range is not supported for entity \"00\"")
def test_slice_unparsed_1(self, checkDFilterFail):
dfilter = "a == b[1]"
checkDFilterFail(dfilter, "Range is not supported for entity \"b\" of type LITERAL")
checkDFilterFail(dfilter, "Range is not supported for entity \"b\"")
def test_slice_func_1(self, checkDFilterSucceed):
dfilter = "string(ipx.src.node)[3:2] == \"cc:dd\""