From 77ef21f86e248c7396bdd34c03a28542e8b19a3f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Valverde?= Date: Wed, 28 Dec 2022 22:12:36 +0000 Subject: [PATCH] dfilter: Replace unparsed lexical type and simplify grammar Remove unparsed lexical type and replace it with identifier and constant. This separation is still necessary to differentiate names (fields and function) from literals that look like names but it has some advantages to do it at the lexical level. The main advantage is a much cleaner and simplified grammar, because we only have a single token type for field names, without any loss of generality (the same name is valid for fields and function names for example). The CONSTANT token type is necessary to be different from literal to provide errors for function rules. --- epan/dfilter/dfilter.c | 6 +- epan/dfilter/grammar.lemon | 125 ++++++++++++----------------- epan/dfilter/scanner.l | 71 ++++++++++++---- test/suite_dfilter/group_syntax.py | 6 ++ 4 files changed, 116 insertions(+), 92 deletions(-) diff --git a/epan/dfilter/dfilter.c b/epan/dfilter/dfilter.c index 03e367e698..e590d5152d 100644 --- a/epan/dfilter/dfilter.c +++ b/epan/dfilter/dfilter.c @@ -308,7 +308,8 @@ const char *tokenstr(int token) case TOKEN_TEST_NOT: return "TEST_NOT"; case TOKEN_STRING: return "STRING"; case TOKEN_CHARCONST: return "CHARCONST"; - case TOKEN_UNPARSED: return "UNPARSED"; + case TOKEN_IDENTIFIER: return "IDENTIFIER"; + case TOKEN_CONSTANT: return "CONSTANT"; case TOKEN_LITERAL: return "LITERAL"; case TOKEN_FIELD: return "FIELD"; case TOKEN_LBRACKET: return "LBRACKET"; @@ -323,6 +324,7 @@ const char *tokenstr(int token) case TOKEN_RPAREN: return "RPAREN"; case TOKEN_DOLLAR: return "DOLLAR"; case TOKEN_ATSIGN: return "ATSIGN"; + case TOKEN_HASH: return "HASH"; } return ""; } @@ -422,12 +424,14 @@ dfilter_compile_real(const gchar *text, dfilter_t **dfp, /* Check for scanner failure */ if (token == SCAN_FAILED) { + ws_noisy("Scanning failed"); failure = TRUE; break; } /* Check for end-of-input */ if (token == 0) { + ws_noisy("Scanning finished"); break; } diff --git a/epan/dfilter/grammar.lemon b/epan/dfilter/grammar.lemon index 5171e708a0..f522c453a4 100644 --- a/epan/dfilter/grammar.lemon +++ b/epan/dfilter/grammar.lemon @@ -1,6 +1,7 @@ %include { #include "config.h" +#define WS_LOG_DOMAIN LOG_DOMAIN_DFILTER #include @@ -22,7 +23,11 @@ static stnode_t * new_function(dfwork_t *dfw, stnode_t *node); -#define FAIL(dfw, node, ...) dfilter_fail(dfw, DF_ERROR_GENERIC, stnode_location(node), __VA_ARGS__) +#define FAIL(dfw, node, ...) \ + do { \ + ws_noisy("Parsing failed here."); \ + dfilter_fail(dfw, DF_ERROR_GENERIC, stnode_location(node), __VA_ARGS__); \ + } while (0) DIAG_OFF_LEMON() } /* end of %include */ @@ -49,9 +54,6 @@ DIAG_ON_LEMON() %type range_node_list {GSList*} %destructor range_node_list {drange_node_free_list($$);} -%type layer {GSList*} -%destructor layer {drange_node_free_list($$);} - %type func_params_list {GSList*} %destructor func_params_list {st_funcparams_free($$);} @@ -132,73 +134,61 @@ expr(X) ::= LPAREN(L) expr(Y) RPAREN(R). atom(A) ::= STRING(S). { A = S; } atom(A) ::= CHARCONST(N). { A = N; } atom(A) ::= LITERAL(S). { A = S; } +atom(A) ::= CONSTANT(C). { A = C; } -layer(R) ::= HASH LBRACKET range_node_list(L) RBRACKET. +named_field(X) ::= FIELD(F). { - R = L; + X = F; } -layer(R) ::= HASH INTEGER(N). +named_field(X) ::= IDENTIFIER(U). { + X = U; + const char *name = stnode_token(U); + header_field_info *hfinfo = dfilter_resolve_unparsed(dfw, name); + if (hfinfo == NULL) { + FAIL(dfw, U, "\"%s\" is not a valid protocol or protocol field.", name); + } + stnode_replace(X, STTYPE_FIELD, hfinfo); +} + +layered_field(R) ::= named_field(F). +{ + R = F; +} + +layered_field(R) ::= named_field(F) HASH LBRACKET range_node_list(L) RBRACKET. +{ + R = F; + sttype_field_set_range(R, L); + g_slist_free(L); +} + +layered_field(R) ::= named_field(F) HASH INTEGER(N). +{ + R = F; char *err_msg = NULL; drange_node *range = drange_node_from_str(stnode_token(N), &err_msg); if (err_msg != NULL) { FAIL(dfw, N, "%s", err_msg); g_free(err_msg); } + sttype_field_set_range1(R, range); stnode_free(N); - R = g_slist_append(NULL, range); } -layered_field(R) ::= FIELD(F). +rawable_field(R) ::= layered_field(F). { R = F; } -layered_field(R) ::= FIELD(F) layer(L). -{ - R = F; - sttype_field_set_range(R, L); - g_slist_free(L); -} - -layered_field(R) ::= UNPARSED(U) layer(L). -{ - header_field_info *hfinfo = dfilter_resolve_unparsed(dfw, stnode_token(U)); - if (hfinfo == NULL) { - FAIL(dfw, U, "%s is not a valid field", stnode_token(U)); - } - R = stnode_new(STTYPE_FIELD, hfinfo, NULL, stnode_location(U)); - stnode_free(U); - sttype_field_set_range(R, L); - g_slist_free(L); -} - -field(R) ::= layered_field(F). -{ - R = F; -} - -field(R) ::= ATSIGN layered_field(F). +rawable_field(R) ::= ATSIGN layered_field(F). { R = F; sttype_field_set_raw(R, TRUE); } -field(R) ::= ATSIGN UNPARSED(U). -{ - const char *token = stnode_token(U); - df_loc_t loc = stnode_location(U); - header_field_info *hfinfo = dfilter_resolve_unparsed(dfw, token); - if (hfinfo == NULL) { - FAIL(dfw, U, "%s is not a valid field", stnode_token(U)); - } - R = stnode_new(STTYPE_FIELD, hfinfo, g_strdup(token), loc); - sttype_field_set_raw(R, TRUE); - stnode_free(U); -} - -reference(R) ::= DOLLAR LBRACE field(F) RBRACE. +reference(R) ::= DOLLAR LBRACE rawable_field(F) RBRACE. { /* convert field to reference */ R = stnode_new(STTYPE_REFERENCE, sttype_field_hfinfo(F), NULL, stnode_location(F)); @@ -207,34 +197,11 @@ reference(R) ::= DOLLAR LBRACE field(F) RBRACE. stnode_free(F); } -reference(R) ::= DOLLAR LBRACE UNPARSED(U) RBRACE. -{ - header_field_info *hfinfo = dfilter_resolve_unparsed(dfw, stnode_token(U)); - if (hfinfo == NULL) { - FAIL(dfw, U, "%s is not a valid field", stnode_token(U)); - } - R = stnode_new(STTYPE_REFERENCE, hfinfo, NULL, stnode_location(U)); - stnode_free(U); -} - entity(E) ::= atom(A). { E = A; } entity(E) ::= slice(R). { E = R; } entity(E) ::= function(F). { E = F; } -entity(E) ::= field(F). { E = F; } +entity(E) ::= rawable_field(F). { E = F; } entity(E) ::= reference(R). { E = R; } -entity(E) ::= UNPARSED(U). -{ - const char *token = stnode_token(U); - df_loc_t loc = stnode_location(U); - header_field_info *hfinfo = dfilter_resolve_unparsed(dfw, token); - if (hfinfo != NULL) { - E = stnode_new(STTYPE_FIELD, hfinfo, g_strdup(token), loc); - } - else { - E = stnode_new(STTYPE_LITERAL, g_strdup(token), g_strdup(token), loc); - } - stnode_free(U); -} arithmetic_expr(T) ::= entity(N). { @@ -507,7 +474,7 @@ range_node_list(L) ::= range_node_list(P) COMMA RANGE_NODE(N). } /* A function can have one or more parameters */ -function(F) ::= UNPARSED(U) LPAREN(L) func_params_list(P) RPAREN(R). +function(F) ::= IDENTIFIER(U) LPAREN(L) func_params_list(P) RPAREN(R). { F = new_function(dfw, U); sttype_function_set_params(F, P); @@ -517,8 +484,13 @@ function(F) ::= UNPARSED(U) LPAREN(L) func_params_list(P) RPAREN(R). stnode_free(R); } +function ::= CONSTANT(U) LPAREN func_params_list RPAREN. +{ + FAIL(dfw, U, "Function '%s' does not exist", stnode_token(U)); +} + /* A function can have zero parameters. */ -function(F) ::= UNPARSED(U) LPAREN(L) RPAREN(R). +function(F) ::= IDENTIFIER(U) LPAREN(L) RPAREN(R). { F = new_function(dfw, U); df_loc_t loc = stnode_merge_location(F, L, R, (stnode_t *)NULL); @@ -527,6 +499,11 @@ function(F) ::= UNPARSED(U) LPAREN(L) RPAREN(R). stnode_free(R); } +function ::= CONSTANT(U) LPAREN RPAREN. +{ + FAIL(dfw, U, "Function '%s' does not exist", stnode_token(U)); +} + func_params_list(P) ::= arithmetic_expr(E). { P = g_slist_append(NULL, E); diff --git a/epan/dfilter/scanner.l b/epan/dfilter/scanner.l index 7c759d7038..cefb9dc2d2 100644 --- a/epan/dfilter/scanner.l +++ b/epan/dfilter/scanner.l @@ -1,6 +1,7 @@ %top { /* Include this before everything else, for various large-file definitions */ #include "config.h" +#define WS_LOG_DOMAIN LOG_DOMAIN_DFILTER #include #include @@ -79,16 +80,18 @@ DIAG_OFF_FLEX() stnode_t *df_lval; -static int set_lval_simple(df_scanner_state_t *state, int token, const char *token_value, sttype_id_t type_id); +WS_WARN_UNUSED static int set_lval_simple(df_scanner_state_t *state, int token, const char *token_value, sttype_id_t type_id); #define simple(token) (update_location(yyextra, yytext), set_lval_simple(yyextra, token, yytext, STTYPE_UNINITIALIZED)) #define test(token) (update_location(yyextra, yytext), set_lval_simple(yyextra, token, yytext, STTYPE_TEST)) #define math(token) (update_location(yyextra, yytext), set_lval_simple(yyextra, token, yytext, STTYPE_ARITHMETIC)) -static int set_lval_literal(df_scanner_state_t *state, const char *token_value); -static int set_lval_unparsed(df_scanner_state_t *state, const char *token_value); -static int set_lval_quoted_string(df_scanner_state_t *state, GString *quoted_string); -static int set_lval_charconst(df_scanner_state_t *state, GString *quoted_string); -static int set_lval_field(df_scanner_state_t *state, const char *token_value); +WS_WARN_UNUSED static int set_lval_literal(df_scanner_state_t *state, const char *token_value); +WS_WARN_UNUSED static int set_lval_unparsed(df_scanner_state_t *state, const char *token_value); +WS_WARN_UNUSED static int set_lval_quoted_string(df_scanner_state_t *state, GString *quoted_string); +WS_WARN_UNUSED static int set_lval_charconst(df_scanner_state_t *state, GString *quoted_string); +WS_WARN_UNUSED static int set_lval_field(df_scanner_state_t *state, const char *token_value); +WS_WARN_UNUSED static int set_lval_identifier(df_scanner_state_t *state, const char *token_value); +WS_WARN_UNUSED static int set_lval_constant(df_scanner_state_t *state, const char *token_value); static gboolean append_escaped_char(df_scanner_state_t *state, GString *str, char c); static gboolean append_universal_character_name(df_scanner_state_t *state, GString *str, const char *ucn); @@ -97,7 +100,11 @@ static gboolean parse_charconst(df_scanner_state_t *state, const char *s, unsign static void update_location(df_scanner_state_t *state, const char *text); static void update_string_loc(df_scanner_state_t *state, const char *text); -#define FAIL(...) dfilter_fail(yyextra->dfw, DF_ERROR_GENERIC, yyextra->location, __VA_ARGS__) +#define FAIL(...) \ + do { \ + ws_noisy("Scanning failed here."); \ + dfilter_fail(yyextra->dfw, DF_ERROR_GENERIC, yyextra->location, __VA_ARGS__); \ + } while (0) /* * Sleazy hack to suppress compiler warnings in yy_fatal_error(). @@ -419,19 +426,19 @@ HyphenBytes {hex2}(-{hex2})+ {MacAddress}|{QuadMacAddress} { /* MAC Address. */ update_location(yyextra, yytext); - return set_lval_simple(yyextra, TOKEN_UNPARSED, yytext, STTYPE_UNINITIALIZED); + return set_lval_unparsed(yyextra, yytext); } {IPv4Address}{V4CidrPrefix}? { /* IPv4 with or without prefix. */ update_location(yyextra, yytext); - return set_lval_simple(yyextra, TOKEN_UNPARSED, yytext, STTYPE_UNINITIALIZED); + return set_lval_unparsed(yyextra, yytext); } {IPv6Address}{V6CidrPrefix}? { /* IPv6 with or without prefix. */ update_location(yyextra, yytext); - return set_lval_simple(yyextra, TOKEN_UNPARSED, yytext, STTYPE_UNINITIALIZED); + return set_lval_unparsed(yyextra, yytext); } :?({ColonBytes}|{DotBytes}|{HyphenBytes}) { @@ -448,14 +455,25 @@ HyphenBytes {hex2}(-{hex2})+ return set_lval_literal(yyextra, yytext); /* Keep leading colon. */ } -\.?{Identifier} { - /* Identifier or unparsed. */ +{Identifier} { + /* Identifier (field or function) or constant (literal). */ update_location(yyextra, yytext); - if (yytext[0] == '.') { - /* Skip leading dot. */ - return set_lval_field(yyextra, yytext + 1); + header_field_info *hfinfo = dfilter_resolve_unparsed(yyextra->dfw, yytext); + if (hfinfo != NULL) { + return set_lval_identifier(yyextra, yytext); } - return set_lval_unparsed(yyextra, yytext); + df_func_def_t *def = df_func_lookup(yytext); + if (def != NULL) { + return set_lval_identifier(yyextra, yytext); + } + return set_lval_constant(yyextra, yytext); +} + +\.{Identifier} { + /* Field. */ + update_location(yyextra, yytext); + /* Skip leading dot. */ + return set_lval_field(yyextra, yytext + 1); } . { @@ -511,10 +529,29 @@ set_lval_literal(df_scanner_state_t *state, const char *token_value) return TOKEN_LITERAL; } +static int +set_lval_identifier(df_scanner_state_t *state, const char *token_value) +{ + stnode_init(df_lval, STTYPE_LITERAL, g_strdup(token_value), g_strdup(token_value), state->location); + return TOKEN_IDENTIFIER; +} + +static int +set_lval_constant(df_scanner_state_t *state, const char *token_value) +{ + stnode_init(df_lval, STTYPE_LITERAL, g_strdup(token_value), g_strdup(token_value), state->location); + return TOKEN_CONSTANT; +} + static int set_lval_unparsed(df_scanner_state_t *state, const char *token_value) { - return set_lval_simple(state, TOKEN_UNPARSED, token_value, STTYPE_UNINITIALIZED); + header_field_info *hfinfo = dfilter_resolve_unparsed(state->dfw, token_value); + if (hfinfo != NULL) { + stnode_init(df_lval, STTYPE_FIELD, hfinfo, g_strdup(token_value), state->location); + return TOKEN_FIELD; + } + return set_lval_literal(state, token_value); } static int diff --git a/test/suite_dfilter/group_syntax.py b/test/suite_dfilter/group_syntax.py index c6f1f9ede0..829f4e6d8c 100644 --- a/test/suite_dfilter/group_syntax.py +++ b/test/suite_dfilter/group_syntax.py @@ -132,6 +132,12 @@ class case_syntax(unittest.TestCase): dfilter = '\ttcp.stream \r\n== 1' checkDFilterSucceed(dfilter) + def test_func_name_clash1(self, checkDFilterFail): + # "tcp" is a (non-existent) function, not a protocol + error = "Function 'tcp' does not exist" + dfilter = 'frame == tcp()' + checkDFilterFail(dfilter, error) + @fixtures.uses_fixtures class case_equality(unittest.TestCase): trace_file = "sip.pcapng"