From d635ff493315145f35438f758f6737b6446b9599 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Valverde?= Date: Sun, 31 Oct 2021 18:18:28 +0000 Subject: [PATCH] dfilter: Remove redundant STTYPE_CHARCONST syntax node A charconst uses the same semantic rules as unparsed so just use the latter to avoid redundancies. We keep the use of TOKEN_CHARCONST as an optimization to avoid an unnecessary name resolution (lookup for a registered field with the same name as the charconst). --- doc/README.display_filter | 24 ++----------- epan/dfilter/grammar.lemon | 3 +- epan/dfilter/semcheck.c | 54 +++--------------------------- epan/dfilter/sttype-string.c | 10 ------ epan/dfilter/syntax-tree.h | 1 - epan/ftypes/ftype-bytes.c | 14 ++++++++ epan/ftypes/ftype-integer.c | 2 +- epan/ftypes/ftypes-int.h | 3 ++ test/suite_dfilter/group_syntax.py | 4 +-- 9 files changed, 29 insertions(+), 86 deletions(-) diff --git a/doc/README.display_filter b/doc/README.display_filter index 50ec94d02e..c266f1174f 100644 --- a/doc/README.display_filter +++ b/doc/README.display_filter @@ -222,28 +222,8 @@ with a central engine. Each node (stnode_t) in the syntax tree has a type (sttype). These sttypes are very much related to ftypes (field types), but there is not a one-to-one correspondence. The syntax tree nodes are slightly -high-level. For example, there is only a single INTEGER sttype, unlike -the ftype system that has a type for UINT64, UINT32, UINT16, UINT8, etc. -(INTEGER removed in 2c701ddf - dfilter: Improve grammar to parse ranges) - -typedef enum { - STTYPE_UNINITIALIZED, - STTYPE_TEST, - STTYPE_UNPARSED, - STTYPE_STRING, - STTYPE_CHARCONST, - STTYPE_FIELD, - STTYPE_FVALUE, - STTYPE_RANGE, - STTYPE_FUNCTION, - STTYPE_SET, - STTYPE_PCRE, - STTYPE_NUM_TYPES -} sttype_id_t; - - -The root node of the syntax tree is the main test or comparison -being done. +higher-level abstractions. The root node of the syntax tree is the main +test or comparison being done. Semantic Check -------------- diff --git a/epan/dfilter/grammar.lemon b/epan/dfilter/grammar.lemon index 84567569c9..59d9120d09 100644 --- a/epan/dfilter/grammar.lemon +++ b/epan/dfilter/grammar.lemon @@ -144,7 +144,8 @@ entity(E) ::= STRING(S). } entity(E) ::= CHARCONST(C). { - E = stnode_new(STTYPE_CHARCONST, df_lval_value(C)); + /* A charconst uses "unparsed" semantic rules. */ + E = stnode_new(STTYPE_UNPARSED, df_lval_value(C)); df_lval_free(C); } entity(E) ::= UNPARSED(U). diff --git a/epan/dfilter/semcheck.c b/epan/dfilter/semcheck.c index 4057cb0010..cd575c81f0 100644 --- a/epan/dfilter/semcheck.c +++ b/epan/dfilter/semcheck.c @@ -438,7 +438,6 @@ check_exists(dfwork_t *dfw, stnode_t *st_arg1) /* This is OK */ break; case STTYPE_STRING: - case STTYPE_CHARCONST: case STTYPE_UNPARSED: dfilter_fail(dfw, "\"%s\" is neither a field nor a protocol name.", (char *)stnode_data(st_arg1)); @@ -560,31 +559,6 @@ check_function(dfwork_t *dfw, stnode_t *st_node) } } -/* Convert a character constant to a 1-byte BYTE_STRING containing the - * character. */ -WS_RETNONNULL -static fvalue_t * -dfilter_fvalue_from_charconst_string(dfwork_t *dfw, ftenum_t ftype, stnode_t *st, gboolean allow_partial_value) -{ - fvalue_t *fvalue; - const char *s = stnode_data(st); - - fvalue = fvalue_from_unparsed(FT_CHAR, s, allow_partial_value, - dfw->error_message == NULL ? &dfw->error_message : NULL); - if (fvalue == NULL) - THROW(TypeError); - - char *temp_string; - /* It's valid. Create a 1-byte BYTE_STRING from its value. */ - temp_string = g_strdup_printf("%02x", fvalue->value.uinteger); - FVALUE_FREE(fvalue); - fvalue = fvalue_from_unparsed(ftype, temp_string, allow_partial_value, NULL); - ws_assert(fvalue); - g_free(temp_string); - - return fvalue; -} - /* If the LHS of a relation test is a FIELD, run some checks * and possibly some modifications of syntax tree nodes. */ static void @@ -633,8 +607,7 @@ check_relation_LHS_FIELD(dfwork_t *dfw, const char *relation_string, THROW(TypeError); } } - else if (type2 == STTYPE_STRING || type2 == STTYPE_UNPARSED || - type2 == STTYPE_CHARCONST) { + else if (type2 == STTYPE_STRING || type2 == STTYPE_UNPARSED) { /* Skip incompatible fields */ while (hfinfo1->same_name_prev_id != -1 && ((type2 == STTYPE_STRING && ftype1 != FT_STRING && ftype1!= FT_STRINGZ) || @@ -646,13 +619,6 @@ check_relation_LHS_FIELD(dfwork_t *dfw, const char *relation_string, if (type2 == STTYPE_STRING) { fvalue = dfilter_fvalue_from_string(dfw, ftype1, st_arg2, hfinfo1); } - else if (type2 == STTYPE_CHARCONST && - strcmp(relation_string, "contains") == 0) { - /* The RHS should be the same type as the LHS, - * but a character is just a one-byte byte - * string. */ - fvalue = dfilter_fvalue_from_charconst_string(dfw, ftype1, st_arg2, allow_partial_value); - } else { fvalue = dfilter_fvalue_from_unparsed(dfw, ftype1, st_arg2, allow_partial_value, hfinfo1); } @@ -771,8 +737,7 @@ check_relation_LHS_STRING(dfwork_t *dfw, const char* relation_string, fvalue = dfilter_fvalue_from_string(dfw, ftype2, st_arg1, hfinfo2); stnode_replace(st_arg1, STTYPE_FVALUE, fvalue); } - else if (type2 == STTYPE_STRING || type2 == STTYPE_UNPARSED || - type2 == STTYPE_CHARCONST) { + else if (type2 == STTYPE_STRING || type2 == STTYPE_UNPARSED) { /* Well now that's silly... */ dfilter_fail(dfw, "Neither \"%s\" nor \"%s\" are field or protocol names.", (char *)stnode_data(st_arg1), @@ -839,8 +804,7 @@ check_relation_LHS_UNPARSED(dfwork_t *dfw, const char* relation_string, fvalue = dfilter_fvalue_from_unparsed(dfw, ftype2, st_arg1, allow_partial_value, hfinfo2); stnode_replace(st_arg1, STTYPE_FVALUE, fvalue); } - else if (type2 == STTYPE_STRING || type2 == STTYPE_UNPARSED || - type2 == STTYPE_CHARCONST) { + else if (type2 == STTYPE_STRING || type2 == STTYPE_UNPARSED) { /* Well now that's silly... */ dfilter_fail(dfw, "Neither \"%s\" nor \"%s\" are field or protocol names.", (char *)stnode_data(st_arg1), @@ -921,13 +885,6 @@ check_relation_LHS_RANGE(dfwork_t *dfw, const char *relation_string, fvalue = dfilter_fvalue_from_unparsed(dfw, FT_BYTES, st_arg2, allow_partial_value, NULL); stnode_replace(st_arg2, STTYPE_FVALUE, fvalue); } - else if (type2 == STTYPE_CHARCONST) { - ws_debug("5 check_relation_LHS_RANGE(type2 = STTYPE_CHARCONST)"); - /* The RHS should be FT_BYTES, but a character is just a - * one-byte byte string. */ - fvalue = dfilter_fvalue_from_charconst_string(dfw, FT_BYTES, st_arg2, allow_partial_value); - stnode_replace(st_arg2, STTYPE_FVALUE, fvalue); - } else if (type2 == STTYPE_RANGE) { ws_debug("5 check_relation_LHS_RANGE(type2 = STTYPE_RANGE)"); check_drange_sanity(dfw, st_arg2); @@ -971,7 +928,7 @@ check_param_entity(dfwork_t *dfw, stnode_t *st_node) e_type = stnode_type_id(st_node); /* If there's an unparsed string, change it to an FT_STRING */ - if (e_type == STTYPE_UNPARSED || e_type == STTYPE_CHARCONST) { + if (e_type == STTYPE_UNPARSED) { fvalue = dfilter_fvalue_from_unparsed(dfw, FT_STRING, st_node, TRUE, NULL); new_st = stnode_new(STTYPE_FVALUE, fvalue); stnode_free(st_node); @@ -1033,7 +990,7 @@ check_relation_LHS_FUNCTION(dfwork_t *dfw, const char *relation_string, fvalue = dfilter_fvalue_from_string(dfw, ftype1, st_arg2, NULL); stnode_replace(st_arg2, STTYPE_FVALUE, fvalue); } - else if (type2 == STTYPE_UNPARSED || type2 == STTYPE_CHARCONST) { + else if (type2 == STTYPE_UNPARSED) { fvalue = dfilter_fvalue_from_unparsed(dfw, ftype1, st_arg2, allow_partial_value, NULL); stnode_replace(st_arg2, STTYPE_FVALUE, fvalue); } @@ -1148,7 +1105,6 @@ check_relation(dfwork_t *dfw, const char *relation_string, allow_partial_value, st_node, st_arg1, st_arg2); break; case STTYPE_UNPARSED: - case STTYPE_CHARCONST: check_relation_LHS_UNPARSED(dfw, relation_string, can_func, allow_partial_value, st_node, st_arg1, st_arg2); break; diff --git a/epan/dfilter/sttype-string.c b/epan/dfilter/sttype-string.c index 0bd88c38bd..770d0765fb 100644 --- a/epan/dfilter/sttype-string.c +++ b/epan/dfilter/sttype-string.c @@ -48,15 +48,6 @@ sttype_register_string(void) string_tostr }; - static sttype_t charconst_type = { - STTYPE_CHARCONST, - "CHARCONST", - string_new, - string_free, - string_dup, - string_tostr - }; - static sttype_t unparsed_type = { STTYPE_UNPARSED, "UNPARSED", @@ -67,7 +58,6 @@ sttype_register_string(void) }; sttype_register(&string_type); - sttype_register(&charconst_type); sttype_register(&unparsed_type); } diff --git a/epan/dfilter/syntax-tree.h b/epan/dfilter/syntax-tree.h index 96c6ad30c7..45e37a8beb 100644 --- a/epan/dfilter/syntax-tree.h +++ b/epan/dfilter/syntax-tree.h @@ -24,7 +24,6 @@ typedef enum { STTYPE_TEST, STTYPE_UNPARSED, STTYPE_STRING, - STTYPE_CHARCONST, STTYPE_FIELD, STTYPE_FVALUE, STTYPE_RANGE, diff --git a/epan/ftypes/ftype-bytes.c b/epan/ftypes/ftype-bytes.c index d5b37bca5b..f78d535160 100644 --- a/epan/ftypes/ftype-bytes.c +++ b/epan/ftypes/ftype-bytes.c @@ -239,6 +239,20 @@ byte_array_from_unparsed(const char *s, gchar **err_msg) GByteArray *bytes; gboolean res; + if (s[0] == '\'') { + /* + * byte array with length 1 represented as a C-style character constant. + */ + unsigned long value; + if (!parse_charconst(s, &value, err_msg)) + return FALSE; + ws_assert(value <= UINT8_MAX); + uint8_t one_byte = (uint8_t)value; + bytes = g_byte_array_new(); + g_byte_array_append(bytes, &one_byte, 1); + return bytes; + } + /* * Special case where the byte string is specified using a one byte * hex literal. We can't allow this for byte strings that are longer diff --git a/epan/ftypes/ftype-integer.c b/epan/ftypes/ftype-integer.c index a099e033f6..22f365e156 100644 --- a/epan/ftypes/ftype-integer.c +++ b/epan/ftypes/ftype-integer.c @@ -48,7 +48,7 @@ get_sinteger(fvalue_t *fv) return fv->value.sinteger; } -static gboolean +gboolean parse_charconst(const char *s, unsigned long *valuep, gchar **err_msg) { const char *cp; diff --git a/epan/ftypes/ftypes-int.h b/epan/ftypes/ftypes-int.h index 2059c17bc4..757405c07d 100644 --- a/epan/ftypes/ftypes-int.h +++ b/epan/ftypes/ftypes-int.h @@ -134,6 +134,9 @@ struct _ftype_t { GByteArray *byte_array_from_unparsed(const char *s, gchar **err_msg); +gboolean +parse_charconst(const char *s, unsigned long *valuep, gchar **err_msg); + #endif /* FTYPES_INT_H */ /* diff --git a/test/suite_dfilter/group_syntax.py b/test/suite_dfilter/group_syntax.py index ffc339f5d9..dbd162177b 100644 --- a/test/suite_dfilter/group_syntax.py +++ b/test/suite_dfilter/group_syntax.py @@ -91,11 +91,11 @@ class case_syntax(unittest.TestCase): dfilter = "bootp" checkDFilterSucceed(dfilter, "Deprecated tokens: \"bootp\"") - def test_charconst_1(self, checkDFilterCount): + def test_charconst_bytes_1(self, checkDFilterCount): # Bytes as a character constant. dfilter = "frame contains 'H'" checkDFilterCount(dfilter, 1) - def test_charconst_2(self, checkDFilterCount): + def test_charconst_bytes_2(self, checkDFilterCount): dfilter = "frame[54] == 'H'" checkDFilterCount(dfilter, 1)