dfilter: Resolve field names in the parser

The lexical rules for fields and unparsed strings are ambiguous,
e.g. "fc" can be the protocol fibre channel or the byte 0xfc.
In general a name is determined to be a protocol field or not by
checking the registry.

Resolving the name in the parser gives more flexibility, for example
to use different semantic rules according to the relation between
LHS and RHS, and allows function names and protocol names to co-exist
without ambiguity.

Before:
  Filter: tcp == 1

  Constants:
  00000 PUT_FVALUE	01 <FT_PROTOCOL> -> reg#1

  Instructions:
  00000 READ_TREE		tcp -> reg#0
  00001 IF-FALSE-GOTO	3
  00002 ANY_EQ		reg#0 == reg#1
  00003 RETURN

  Filter: tcp() == 1
  dftest: Syntax error near "(".

After:
  Filter: tcp == 1

  Constants:
  00000 PUT_FVALUE	01 <FT_PROTOCOL> -> reg#1

  Instructions:
  (same)

  Filter: tcp() == 1
  dftest: Function 'tcp' does not exist

It's also a goal to make it easier to modify the lexer rules.

Ping #12810.
This commit is contained in:
João Valverde 2021-09-26 12:05:54 +01:00
parent 3e6cc8ce4a
commit e91b5beafd
4 changed files with 37 additions and 32 deletions

View File

@ -90,6 +90,9 @@ dfilter_new_function(dfwork_t *dfw, const char *name);
gboolean
dfilter_str_to_gint32(dfwork_t *dfw, const char *s, gint32* pint);
stnode_t *
dfilter_resolve_unparsed(dfwork_t *dfw, stnode_t *node);
const char *tokenstr(int token);
#endif

View File

@ -129,6 +129,38 @@ dfilter_str_to_gint32(dfwork_t *dfw, const char *s, gint32* pint)
return TRUE;
}
/*
* Tries to convert an STTYPE_UNPARSED to a STTYPE_FIELD. If it's not registered as
* a field pass UNPARSED to the semantic check.
*/
stnode_t *
dfilter_resolve_unparsed(dfwork_t *dfw, stnode_t *node)
{
const char *name;
header_field_info *hfinfo;
ws_assert(stnode_type_id(node) == STTYPE_UNPARSED);
name = stnode_data(node);
hfinfo = proto_registrar_get_byname(name);
if (hfinfo != NULL) {
/* It's a field name */
stnode_replace(node, STTYPE_FIELD, hfinfo);
return node;
}
hfinfo = proto_registrar_get_byalias(name);
if (hfinfo != NULL) {
/* It's an aliased field name */
add_deprecated_token(dfw->deprecated, name);
stnode_replace(node, STTYPE_FIELD, hfinfo);
return node;
}
/* It's not a field. */
return node;
}
/* Initialize the dfilter module */
void
@ -292,7 +324,6 @@ const char *tokenstr(int token)
case TOKEN_TEST_MATCHES: return "TEST_MATCHES";
case TOKEN_TEST_BITWISE_AND: return "TEST_BITWISE_AND";
case TOKEN_TEST_NOT: return "TEST_NOT";
case TOKEN_FIELD: return "FIELD";
case TOKEN_STRING: return "STRING";
case TOKEN_CHARCONST: return "CHARCONST";
case TOKEN_UNPARSED: return "UNPARSED";

View File

@ -69,8 +69,6 @@
any "error" symbols are shifted, if possible. */
%syntax_error {
header_field_info *hfinfo;
if (!TOKEN) {
dfilter_fail(dfw, "Unexpected end of filter string.");
dfw->syntax_error = TRUE;
@ -96,14 +94,11 @@ any "error" symbols are shifted, if possible. */
dfilter_fail(dfw, "\"%s\" was unexpected in this context.",
(char *)stnode_data(TOKEN));
break;
case STTYPE_FIELD:
hfinfo = (header_field_info *)stnode_data(TOKEN);
dfilter_fail(dfw, "Syntax error near \"%s\".", hfinfo->abbrev);
break;
/* These aren't handed to use as terminal tokens from
the scanner, so was can assert that we'll never
see them here. */
case STTYPE_NUM_TYPES:
case STTYPE_FIELD:
case STTYPE_FUNCTION:
case STTYPE_RANGE:
case STTYPE_FVALUE:
@ -168,10 +163,9 @@ logical_test(T) ::= entity(E).
/* Entities, or things that can be compared/tested/checked */
entity(E) ::= FIELD(F). { E = F; }
entity(E) ::= STRING(S). { E = S; }
entity(E) ::= CHARCONST(C). { E = C; }
entity(E) ::= UNPARSED(U). { E = U; }
entity(E) ::= UNPARSED(U). { E = dfilter_resolve_unparsed(dfw, U); }
entity(E) ::= range(R). { E = R; }
entity(E) ::= function(F). { E = F; }

View File

@ -85,7 +85,6 @@ DIAG_OFF_FLEX
/*#undef YY_NO_UNPUT*/
static int set_lval_str(int token, const char *token_value);
static int set_lval_field(int token, header_field_info *hfinfo, const char *token_value);
static int simple(int token, const char *token_value);
#define SIMPLE(token) simple(token, yytext)
@ -393,7 +392,6 @@ static int simple(int token, const char *token_value);
([.][-+[:alnum:]_:]+)+[.]{0,2} |
[-+[:alnum:]_:]+([.][-+[:alnum:]_:]+)*[.]{0,2} {
/* Is it a field name or some other value (float, integer, bytes, ...)? */
header_field_info *hfinfo;
/* Trailing dot is allowed for floats, but make sure that trailing ".."
* is interpreted as a token on its own. */
@ -401,19 +399,6 @@ static int simple(int token, const char *token_value);
yyless(yyleng-2);
}
hfinfo = proto_registrar_get_byname(yytext);
if (hfinfo) {
/* Yes, it's a field name */
return set_lval_field(TOKEN_FIELD, hfinfo, yytext);
}
hfinfo = proto_registrar_get_byalias(yytext);
if (hfinfo) {
/* Yes, it's an aliased field name */
add_deprecated_token(yyextra->deprecated, yytext);
return set_lval_field(TOKEN_FIELD, hfinfo, yytext);
}
/* No match, so treat it as an unparsed string */
return set_lval_str(TOKEN_UNPARSED, yytext);
}
@ -492,11 +477,3 @@ set_lval_str(int token, const char *token_value)
stnode_init(df_lval, type_id, (gpointer)token_value, token_value);
return token;
}
static int
set_lval_field(int token, header_field_info *hfinfo, const char *token_value)
{
ws_assert(token == TOKEN_FIELD);
stnode_init(df_lval, STTYPE_FIELD, hfinfo, token_value);
return token;
}