forked from osmocom/wireshark
dfilter: Resolve field names in the parser
The lexical rules for fields and unparsed strings are ambiguous, e.g. "fc" can be the protocol fibre channel or the byte 0xfc. In general a name is determined to be a protocol field or not by checking the registry. Resolving the name in the parser gives more flexibility, for example to use different semantic rules according to the relation between LHS and RHS, and allows function names and protocol names to co-exist without ambiguity. Before: Filter: tcp == 1 Constants: 00000 PUT_FVALUE 01 <FT_PROTOCOL> -> reg#1 Instructions: 00000 READ_TREE tcp -> reg#0 00001 IF-FALSE-GOTO 3 00002 ANY_EQ reg#0 == reg#1 00003 RETURN Filter: tcp() == 1 dftest: Syntax error near "(". After: Filter: tcp == 1 Constants: 00000 PUT_FVALUE 01 <FT_PROTOCOL> -> reg#1 Instructions: (same) Filter: tcp() == 1 dftest: Function 'tcp' does not exist It's also a goal to make it easier to modify the lexer rules. Ping #12810.
This commit is contained in:
parent
3e6cc8ce4a
commit
e91b5beafd
|
@ -90,6 +90,9 @@ dfilter_new_function(dfwork_t *dfw, const char *name);
|
|||
gboolean
|
||||
dfilter_str_to_gint32(dfwork_t *dfw, const char *s, gint32* pint);
|
||||
|
||||
stnode_t *
|
||||
dfilter_resolve_unparsed(dfwork_t *dfw, stnode_t *node);
|
||||
|
||||
const char *tokenstr(int token);
|
||||
|
||||
#endif
|
||||
|
|
|
@ -129,6 +129,38 @@ dfilter_str_to_gint32(dfwork_t *dfw, const char *s, gint32* pint)
|
|||
return TRUE;
|
||||
}
|
||||
|
||||
/*
|
||||
* Tries to convert an STTYPE_UNPARSED to a STTYPE_FIELD. If it's not registered as
|
||||
* a field pass UNPARSED to the semantic check.
|
||||
*/
|
||||
stnode_t *
|
||||
dfilter_resolve_unparsed(dfwork_t *dfw, stnode_t *node)
|
||||
{
|
||||
const char *name;
|
||||
header_field_info *hfinfo;
|
||||
|
||||
ws_assert(stnode_type_id(node) == STTYPE_UNPARSED);
|
||||
|
||||
name = stnode_data(node);
|
||||
|
||||
hfinfo = proto_registrar_get_byname(name);
|
||||
if (hfinfo != NULL) {
|
||||
/* It's a field name */
|
||||
stnode_replace(node, STTYPE_FIELD, hfinfo);
|
||||
return node;
|
||||
}
|
||||
|
||||
hfinfo = proto_registrar_get_byalias(name);
|
||||
if (hfinfo != NULL) {
|
||||
/* It's an aliased field name */
|
||||
add_deprecated_token(dfw->deprecated, name);
|
||||
stnode_replace(node, STTYPE_FIELD, hfinfo);
|
||||
return node;
|
||||
}
|
||||
|
||||
/* It's not a field. */
|
||||
return node;
|
||||
}
|
||||
|
||||
/* Initialize the dfilter module */
|
||||
void
|
||||
|
@ -292,7 +324,6 @@ const char *tokenstr(int token)
|
|||
case TOKEN_TEST_MATCHES: return "TEST_MATCHES";
|
||||
case TOKEN_TEST_BITWISE_AND: return "TEST_BITWISE_AND";
|
||||
case TOKEN_TEST_NOT: return "TEST_NOT";
|
||||
case TOKEN_FIELD: return "FIELD";
|
||||
case TOKEN_STRING: return "STRING";
|
||||
case TOKEN_CHARCONST: return "CHARCONST";
|
||||
case TOKEN_UNPARSED: return "UNPARSED";
|
||||
|
|
|
@ -69,8 +69,6 @@
|
|||
any "error" symbols are shifted, if possible. */
|
||||
%syntax_error {
|
||||
|
||||
header_field_info *hfinfo;
|
||||
|
||||
if (!TOKEN) {
|
||||
dfilter_fail(dfw, "Unexpected end of filter string.");
|
||||
dfw->syntax_error = TRUE;
|
||||
|
@ -96,14 +94,11 @@ any "error" symbols are shifted, if possible. */
|
|||
dfilter_fail(dfw, "\"%s\" was unexpected in this context.",
|
||||
(char *)stnode_data(TOKEN));
|
||||
break;
|
||||
case STTYPE_FIELD:
|
||||
hfinfo = (header_field_info *)stnode_data(TOKEN);
|
||||
dfilter_fail(dfw, "Syntax error near \"%s\".", hfinfo->abbrev);
|
||||
break;
|
||||
/* These aren't handed to use as terminal tokens from
|
||||
the scanner, so was can assert that we'll never
|
||||
see them here. */
|
||||
case STTYPE_NUM_TYPES:
|
||||
case STTYPE_FIELD:
|
||||
case STTYPE_FUNCTION:
|
||||
case STTYPE_RANGE:
|
||||
case STTYPE_FVALUE:
|
||||
|
@ -168,10 +163,9 @@ logical_test(T) ::= entity(E).
|
|||
|
||||
|
||||
/* Entities, or things that can be compared/tested/checked */
|
||||
entity(E) ::= FIELD(F). { E = F; }
|
||||
entity(E) ::= STRING(S). { E = S; }
|
||||
entity(E) ::= CHARCONST(C). { E = C; }
|
||||
entity(E) ::= UNPARSED(U). { E = U; }
|
||||
entity(E) ::= UNPARSED(U). { E = dfilter_resolve_unparsed(dfw, U); }
|
||||
entity(E) ::= range(R). { E = R; }
|
||||
entity(E) ::= function(F). { E = F; }
|
||||
|
||||
|
|
|
@ -85,7 +85,6 @@ DIAG_OFF_FLEX
|
|||
/*#undef YY_NO_UNPUT*/
|
||||
|
||||
static int set_lval_str(int token, const char *token_value);
|
||||
static int set_lval_field(int token, header_field_info *hfinfo, const char *token_value);
|
||||
static int simple(int token, const char *token_value);
|
||||
#define SIMPLE(token) simple(token, yytext)
|
||||
|
||||
|
@ -393,7 +392,6 @@ static int simple(int token, const char *token_value);
|
|||
([.][-+[:alnum:]_:]+)+[.]{0,2} |
|
||||
[-+[:alnum:]_:]+([.][-+[:alnum:]_:]+)*[.]{0,2} {
|
||||
/* Is it a field name or some other value (float, integer, bytes, ...)? */
|
||||
header_field_info *hfinfo;
|
||||
|
||||
/* Trailing dot is allowed for floats, but make sure that trailing ".."
|
||||
* is interpreted as a token on its own. */
|
||||
|
@ -401,19 +399,6 @@ static int simple(int token, const char *token_value);
|
|||
yyless(yyleng-2);
|
||||
}
|
||||
|
||||
hfinfo = proto_registrar_get_byname(yytext);
|
||||
if (hfinfo) {
|
||||
/* Yes, it's a field name */
|
||||
return set_lval_field(TOKEN_FIELD, hfinfo, yytext);
|
||||
}
|
||||
|
||||
hfinfo = proto_registrar_get_byalias(yytext);
|
||||
if (hfinfo) {
|
||||
/* Yes, it's an aliased field name */
|
||||
add_deprecated_token(yyextra->deprecated, yytext);
|
||||
return set_lval_field(TOKEN_FIELD, hfinfo, yytext);
|
||||
}
|
||||
|
||||
/* No match, so treat it as an unparsed string */
|
||||
return set_lval_str(TOKEN_UNPARSED, yytext);
|
||||
}
|
||||
|
@ -492,11 +477,3 @@ set_lval_str(int token, const char *token_value)
|
|||
stnode_init(df_lval, type_id, (gpointer)token_value, token_value);
|
||||
return token;
|
||||
}
|
||||
|
||||
static int
|
||||
set_lval_field(int token, header_field_info *hfinfo, const char *token_value)
|
||||
{
|
||||
ws_assert(token == TOKEN_FIELD);
|
||||
stnode_init(df_lval, STTYPE_FIELD, hfinfo, token_value);
|
||||
return token;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue