dfilter: Remove redundant STTYPE_CHARCONST syntax node

A charconst uses the same semantic rules as unparsed so just
use the latter to avoid redundancies.

We keep the use of TOKEN_CHARCONST as an optimization to avoid
an unnecessary name resolution (lookup for a registered field with
the same name as the charconst).
This commit is contained in:
João Valverde 2021-10-31 18:18:28 +00:00
parent 9ca27643fa
commit d635ff4933
9 changed files with 29 additions and 86 deletions

View File

@ -222,28 +222,8 @@ with a central engine.
Each node (stnode_t) in the syntax tree has a type (sttype).
These sttypes are very much related to ftypes (field types), but there
is not a one-to-one correspondence. The syntax tree nodes are slightly
high-level. For example, there is only a single INTEGER sttype, unlike
the ftype system that has a type for UINT64, UINT32, UINT16, UINT8, etc.
(INTEGER removed in 2c701ddf - dfilter: Improve grammar to parse ranges)
typedef enum {
STTYPE_UNINITIALIZED,
STTYPE_TEST,
STTYPE_UNPARSED,
STTYPE_STRING,
STTYPE_CHARCONST,
STTYPE_FIELD,
STTYPE_FVALUE,
STTYPE_RANGE,
STTYPE_FUNCTION,
STTYPE_SET,
STTYPE_PCRE,
STTYPE_NUM_TYPES
} sttype_id_t;
The root node of the syntax tree is the main test or comparison
being done.
higher-level abstractions. The root node of the syntax tree is the main
test or comparison being done.
Semantic Check
--------------

View File

@ -144,7 +144,8 @@ entity(E) ::= STRING(S).
}
entity(E) ::= CHARCONST(C).
{
E = stnode_new(STTYPE_CHARCONST, df_lval_value(C));
/* A charconst uses "unparsed" semantic rules. */
E = stnode_new(STTYPE_UNPARSED, df_lval_value(C));
df_lval_free(C);
}
entity(E) ::= UNPARSED(U).

View File

@ -438,7 +438,6 @@ check_exists(dfwork_t *dfw, stnode_t *st_arg1)
/* This is OK */
break;
case STTYPE_STRING:
case STTYPE_CHARCONST:
case STTYPE_UNPARSED:
dfilter_fail(dfw, "\"%s\" is neither a field nor a protocol name.",
(char *)stnode_data(st_arg1));
@ -560,31 +559,6 @@ check_function(dfwork_t *dfw, stnode_t *st_node)
}
}
/* Convert a character constant to a 1-byte BYTE_STRING containing the
* character. */
WS_RETNONNULL
static fvalue_t *
dfilter_fvalue_from_charconst_string(dfwork_t *dfw, ftenum_t ftype, stnode_t *st, gboolean allow_partial_value)
{
fvalue_t *fvalue;
const char *s = stnode_data(st);
fvalue = fvalue_from_unparsed(FT_CHAR, s, allow_partial_value,
dfw->error_message == NULL ? &dfw->error_message : NULL);
if (fvalue == NULL)
THROW(TypeError);
char *temp_string;
/* It's valid. Create a 1-byte BYTE_STRING from its value. */
temp_string = g_strdup_printf("%02x", fvalue->value.uinteger);
FVALUE_FREE(fvalue);
fvalue = fvalue_from_unparsed(ftype, temp_string, allow_partial_value, NULL);
ws_assert(fvalue);
g_free(temp_string);
return fvalue;
}
/* If the LHS of a relation test is a FIELD, run some checks
* and possibly some modifications of syntax tree nodes. */
static void
@ -633,8 +607,7 @@ check_relation_LHS_FIELD(dfwork_t *dfw, const char *relation_string,
THROW(TypeError);
}
}
else if (type2 == STTYPE_STRING || type2 == STTYPE_UNPARSED ||
type2 == STTYPE_CHARCONST) {
else if (type2 == STTYPE_STRING || type2 == STTYPE_UNPARSED) {
/* Skip incompatible fields */
while (hfinfo1->same_name_prev_id != -1 &&
((type2 == STTYPE_STRING && ftype1 != FT_STRING && ftype1!= FT_STRINGZ) ||
@ -646,13 +619,6 @@ check_relation_LHS_FIELD(dfwork_t *dfw, const char *relation_string,
if (type2 == STTYPE_STRING) {
fvalue = dfilter_fvalue_from_string(dfw, ftype1, st_arg2, hfinfo1);
}
else if (type2 == STTYPE_CHARCONST &&
strcmp(relation_string, "contains") == 0) {
/* The RHS should be the same type as the LHS,
* but a character is just a one-byte byte
* string. */
fvalue = dfilter_fvalue_from_charconst_string(dfw, ftype1, st_arg2, allow_partial_value);
}
else {
fvalue = dfilter_fvalue_from_unparsed(dfw, ftype1, st_arg2, allow_partial_value, hfinfo1);
}
@ -771,8 +737,7 @@ check_relation_LHS_STRING(dfwork_t *dfw, const char* relation_string,
fvalue = dfilter_fvalue_from_string(dfw, ftype2, st_arg1, hfinfo2);
stnode_replace(st_arg1, STTYPE_FVALUE, fvalue);
}
else if (type2 == STTYPE_STRING || type2 == STTYPE_UNPARSED ||
type2 == STTYPE_CHARCONST) {
else if (type2 == STTYPE_STRING || type2 == STTYPE_UNPARSED) {
/* Well now that's silly... */
dfilter_fail(dfw, "Neither \"%s\" nor \"%s\" are field or protocol names.",
(char *)stnode_data(st_arg1),
@ -839,8 +804,7 @@ check_relation_LHS_UNPARSED(dfwork_t *dfw, const char* relation_string,
fvalue = dfilter_fvalue_from_unparsed(dfw, ftype2, st_arg1, allow_partial_value, hfinfo2);
stnode_replace(st_arg1, STTYPE_FVALUE, fvalue);
}
else if (type2 == STTYPE_STRING || type2 == STTYPE_UNPARSED ||
type2 == STTYPE_CHARCONST) {
else if (type2 == STTYPE_STRING || type2 == STTYPE_UNPARSED) {
/* Well now that's silly... */
dfilter_fail(dfw, "Neither \"%s\" nor \"%s\" are field or protocol names.",
(char *)stnode_data(st_arg1),
@ -921,13 +885,6 @@ check_relation_LHS_RANGE(dfwork_t *dfw, const char *relation_string,
fvalue = dfilter_fvalue_from_unparsed(dfw, FT_BYTES, st_arg2, allow_partial_value, NULL);
stnode_replace(st_arg2, STTYPE_FVALUE, fvalue);
}
else if (type2 == STTYPE_CHARCONST) {
ws_debug("5 check_relation_LHS_RANGE(type2 = STTYPE_CHARCONST)");
/* The RHS should be FT_BYTES, but a character is just a
* one-byte byte string. */
fvalue = dfilter_fvalue_from_charconst_string(dfw, FT_BYTES, st_arg2, allow_partial_value);
stnode_replace(st_arg2, STTYPE_FVALUE, fvalue);
}
else if (type2 == STTYPE_RANGE) {
ws_debug("5 check_relation_LHS_RANGE(type2 = STTYPE_RANGE)");
check_drange_sanity(dfw, st_arg2);
@ -971,7 +928,7 @@ check_param_entity(dfwork_t *dfw, stnode_t *st_node)
e_type = stnode_type_id(st_node);
/* If there's an unparsed string, change it to an FT_STRING */
if (e_type == STTYPE_UNPARSED || e_type == STTYPE_CHARCONST) {
if (e_type == STTYPE_UNPARSED) {
fvalue = dfilter_fvalue_from_unparsed(dfw, FT_STRING, st_node, TRUE, NULL);
new_st = stnode_new(STTYPE_FVALUE, fvalue);
stnode_free(st_node);
@ -1033,7 +990,7 @@ check_relation_LHS_FUNCTION(dfwork_t *dfw, const char *relation_string,
fvalue = dfilter_fvalue_from_string(dfw, ftype1, st_arg2, NULL);
stnode_replace(st_arg2, STTYPE_FVALUE, fvalue);
}
else if (type2 == STTYPE_UNPARSED || type2 == STTYPE_CHARCONST) {
else if (type2 == STTYPE_UNPARSED) {
fvalue = dfilter_fvalue_from_unparsed(dfw, ftype1, st_arg2, allow_partial_value, NULL);
stnode_replace(st_arg2, STTYPE_FVALUE, fvalue);
}
@ -1148,7 +1105,6 @@ check_relation(dfwork_t *dfw, const char *relation_string,
allow_partial_value, st_node, st_arg1, st_arg2);
break;
case STTYPE_UNPARSED:
case STTYPE_CHARCONST:
check_relation_LHS_UNPARSED(dfw, relation_string, can_func,
allow_partial_value, st_node, st_arg1, st_arg2);
break;

View File

@ -48,15 +48,6 @@ sttype_register_string(void)
string_tostr
};
static sttype_t charconst_type = {
STTYPE_CHARCONST,
"CHARCONST",
string_new,
string_free,
string_dup,
string_tostr
};
static sttype_t unparsed_type = {
STTYPE_UNPARSED,
"UNPARSED",
@ -67,7 +58,6 @@ sttype_register_string(void)
};
sttype_register(&string_type);
sttype_register(&charconst_type);
sttype_register(&unparsed_type);
}

View File

@ -24,7 +24,6 @@ typedef enum {
STTYPE_TEST,
STTYPE_UNPARSED,
STTYPE_STRING,
STTYPE_CHARCONST,
STTYPE_FIELD,
STTYPE_FVALUE,
STTYPE_RANGE,

View File

@ -239,6 +239,20 @@ byte_array_from_unparsed(const char *s, gchar **err_msg)
GByteArray *bytes;
gboolean res;
if (s[0] == '\'') {
/*
* byte array with length 1 represented as a C-style character constant.
*/
unsigned long value;
if (!parse_charconst(s, &value, err_msg))
return FALSE;
ws_assert(value <= UINT8_MAX);
uint8_t one_byte = (uint8_t)value;
bytes = g_byte_array_new();
g_byte_array_append(bytes, &one_byte, 1);
return bytes;
}
/*
* Special case where the byte string is specified using a one byte
* hex literal. We can't allow this for byte strings that are longer

View File

@ -48,7 +48,7 @@ get_sinteger(fvalue_t *fv)
return fv->value.sinteger;
}
static gboolean
gboolean
parse_charconst(const char *s, unsigned long *valuep, gchar **err_msg)
{
const char *cp;

View File

@ -134,6 +134,9 @@ struct _ftype_t {
GByteArray *byte_array_from_unparsed(const char *s, gchar **err_msg);
gboolean
parse_charconst(const char *s, unsigned long *valuep, gchar **err_msg);
#endif /* FTYPES_INT_H */
/*

View File

@ -91,11 +91,11 @@ class case_syntax(unittest.TestCase):
dfilter = "bootp"
checkDFilterSucceed(dfilter, "Deprecated tokens: \"bootp\"")
def test_charconst_1(self, checkDFilterCount):
def test_charconst_bytes_1(self, checkDFilterCount):
# Bytes as a character constant.
dfilter = "frame contains 'H'"
checkDFilterCount(dfilter, 1)
def test_charconst_2(self, checkDFilterCount):
def test_charconst_bytes_2(self, checkDFilterCount):
dfilter = "frame[54] == 'H'"
checkDFilterCount(dfilter, 1)