dfilter: Replace unparsed lexical type and simplify grammar

Remove unparsed lexical type and replace it with identifier
and constant. This separation is still necessary to differentiate
names (fields and function) from literals that look like names
but it has some advantages to do it at the lexical level.

The main advantage is a much cleaner and simplified grammar,
because we only have a single token type for field names, without
any loss of generality (the same name is valid for fields and
function names for example).

The CONSTANT token type is necessary to be different from literal
to provide errors for function rules.
This commit is contained in:
João Valverde 2022-12-28 22:12:36 +00:00
parent bdd00edac8
commit 77ef21f86e
4 changed files with 116 additions and 92 deletions

View File

@ -308,7 +308,8 @@ const char *tokenstr(int token)
case TOKEN_TEST_NOT: return "TEST_NOT";
case TOKEN_STRING: return "STRING";
case TOKEN_CHARCONST: return "CHARCONST";
case TOKEN_UNPARSED: return "UNPARSED";
case TOKEN_IDENTIFIER: return "IDENTIFIER";
case TOKEN_CONSTANT: return "CONSTANT";
case TOKEN_LITERAL: return "LITERAL";
case TOKEN_FIELD: return "FIELD";
case TOKEN_LBRACKET: return "LBRACKET";
@ -323,6 +324,7 @@ const char *tokenstr(int token)
case TOKEN_RPAREN: return "RPAREN";
case TOKEN_DOLLAR: return "DOLLAR";
case TOKEN_ATSIGN: return "ATSIGN";
case TOKEN_HASH: return "HASH";
}
return "<unknown>";
}
@ -422,12 +424,14 @@ dfilter_compile_real(const gchar *text, dfilter_t **dfp,
/* Check for scanner failure */
if (token == SCAN_FAILED) {
ws_noisy("Scanning failed");
failure = TRUE;
break;
}
/* Check for end-of-input */
if (token == 0) {
ws_noisy("Scanning finished");
break;
}

View File

@ -1,6 +1,7 @@
%include {
#include "config.h"
#define WS_LOG_DOMAIN LOG_DOMAIN_DFILTER
#include <assert.h>
@ -22,7 +23,11 @@
static stnode_t *
new_function(dfwork_t *dfw, stnode_t *node);
#define FAIL(dfw, node, ...) dfilter_fail(dfw, DF_ERROR_GENERIC, stnode_location(node), __VA_ARGS__)
#define FAIL(dfw, node, ...) \
do { \
ws_noisy("Parsing failed here."); \
dfilter_fail(dfw, DF_ERROR_GENERIC, stnode_location(node), __VA_ARGS__); \
} while (0)
DIAG_OFF_LEMON()
} /* end of %include */
@ -49,9 +54,6 @@ DIAG_ON_LEMON()
%type range_node_list {GSList*}
%destructor range_node_list {drange_node_free_list($$);}
%type layer {GSList*}
%destructor layer {drange_node_free_list($$);}
%type func_params_list {GSList*}
%destructor func_params_list {st_funcparams_free($$);}
@ -132,73 +134,61 @@ expr(X) ::= LPAREN(L) expr(Y) RPAREN(R).
atom(A) ::= STRING(S). { A = S; }
atom(A) ::= CHARCONST(N). { A = N; }
atom(A) ::= LITERAL(S). { A = S; }
atom(A) ::= CONSTANT(C). { A = C; }
layer(R) ::= HASH LBRACKET range_node_list(L) RBRACKET.
named_field(X) ::= FIELD(F).
{
R = L;
X = F;
}
layer(R) ::= HASH INTEGER(N).
named_field(X) ::= IDENTIFIER(U).
{
X = U;
const char *name = stnode_token(U);
header_field_info *hfinfo = dfilter_resolve_unparsed(dfw, name);
if (hfinfo == NULL) {
FAIL(dfw, U, "\"%s\" is not a valid protocol or protocol field.", name);
}
stnode_replace(X, STTYPE_FIELD, hfinfo);
}
layered_field(R) ::= named_field(F).
{
R = F;
}
layered_field(R) ::= named_field(F) HASH LBRACKET range_node_list(L) RBRACKET.
{
R = F;
sttype_field_set_range(R, L);
g_slist_free(L);
}
layered_field(R) ::= named_field(F) HASH INTEGER(N).
{
R = F;
char *err_msg = NULL;
drange_node *range = drange_node_from_str(stnode_token(N), &err_msg);
if (err_msg != NULL) {
FAIL(dfw, N, "%s", err_msg);
g_free(err_msg);
}
sttype_field_set_range1(R, range);
stnode_free(N);
R = g_slist_append(NULL, range);
}
layered_field(R) ::= FIELD(F).
rawable_field(R) ::= layered_field(F).
{
R = F;
}
layered_field(R) ::= FIELD(F) layer(L).
{
R = F;
sttype_field_set_range(R, L);
g_slist_free(L);
}
layered_field(R) ::= UNPARSED(U) layer(L).
{
header_field_info *hfinfo = dfilter_resolve_unparsed(dfw, stnode_token(U));
if (hfinfo == NULL) {
FAIL(dfw, U, "%s is not a valid field", stnode_token(U));
}
R = stnode_new(STTYPE_FIELD, hfinfo, NULL, stnode_location(U));
stnode_free(U);
sttype_field_set_range(R, L);
g_slist_free(L);
}
field(R) ::= layered_field(F).
{
R = F;
}
field(R) ::= ATSIGN layered_field(F).
rawable_field(R) ::= ATSIGN layered_field(F).
{
R = F;
sttype_field_set_raw(R, TRUE);
}
field(R) ::= ATSIGN UNPARSED(U).
{
const char *token = stnode_token(U);
df_loc_t loc = stnode_location(U);
header_field_info *hfinfo = dfilter_resolve_unparsed(dfw, token);
if (hfinfo == NULL) {
FAIL(dfw, U, "%s is not a valid field", stnode_token(U));
}
R = stnode_new(STTYPE_FIELD, hfinfo, g_strdup(token), loc);
sttype_field_set_raw(R, TRUE);
stnode_free(U);
}
reference(R) ::= DOLLAR LBRACE field(F) RBRACE.
reference(R) ::= DOLLAR LBRACE rawable_field(F) RBRACE.
{
/* convert field to reference */
R = stnode_new(STTYPE_REFERENCE, sttype_field_hfinfo(F), NULL, stnode_location(F));
@ -207,34 +197,11 @@ reference(R) ::= DOLLAR LBRACE field(F) RBRACE.
stnode_free(F);
}
reference(R) ::= DOLLAR LBRACE UNPARSED(U) RBRACE.
{
header_field_info *hfinfo = dfilter_resolve_unparsed(dfw, stnode_token(U));
if (hfinfo == NULL) {
FAIL(dfw, U, "%s is not a valid field", stnode_token(U));
}
R = stnode_new(STTYPE_REFERENCE, hfinfo, NULL, stnode_location(U));
stnode_free(U);
}
entity(E) ::= atom(A). { E = A; }
entity(E) ::= slice(R). { E = R; }
entity(E) ::= function(F). { E = F; }
entity(E) ::= field(F). { E = F; }
entity(E) ::= rawable_field(F). { E = F; }
entity(E) ::= reference(R). { E = R; }
entity(E) ::= UNPARSED(U).
{
const char *token = stnode_token(U);
df_loc_t loc = stnode_location(U);
header_field_info *hfinfo = dfilter_resolve_unparsed(dfw, token);
if (hfinfo != NULL) {
E = stnode_new(STTYPE_FIELD, hfinfo, g_strdup(token), loc);
}
else {
E = stnode_new(STTYPE_LITERAL, g_strdup(token), g_strdup(token), loc);
}
stnode_free(U);
}
arithmetic_expr(T) ::= entity(N).
{
@ -507,7 +474,7 @@ range_node_list(L) ::= range_node_list(P) COMMA RANGE_NODE(N).
}
/* A function can have one or more parameters */
function(F) ::= UNPARSED(U) LPAREN(L) func_params_list(P) RPAREN(R).
function(F) ::= IDENTIFIER(U) LPAREN(L) func_params_list(P) RPAREN(R).
{
F = new_function(dfw, U);
sttype_function_set_params(F, P);
@ -517,8 +484,13 @@ function(F) ::= UNPARSED(U) LPAREN(L) func_params_list(P) RPAREN(R).
stnode_free(R);
}
function ::= CONSTANT(U) LPAREN func_params_list RPAREN.
{
FAIL(dfw, U, "Function '%s' does not exist", stnode_token(U));
}
/* A function can have zero parameters. */
function(F) ::= UNPARSED(U) LPAREN(L) RPAREN(R).
function(F) ::= IDENTIFIER(U) LPAREN(L) RPAREN(R).
{
F = new_function(dfw, U);
df_loc_t loc = stnode_merge_location(F, L, R, (stnode_t *)NULL);
@ -527,6 +499,11 @@ function(F) ::= UNPARSED(U) LPAREN(L) RPAREN(R).
stnode_free(R);
}
function ::= CONSTANT(U) LPAREN RPAREN.
{
FAIL(dfw, U, "Function '%s' does not exist", stnode_token(U));
}
func_params_list(P) ::= arithmetic_expr(E).
{
P = g_slist_append(NULL, E);

View File

@ -1,6 +1,7 @@
%top {
/* Include this before everything else, for various large-file definitions */
#include "config.h"
#define WS_LOG_DOMAIN LOG_DOMAIN_DFILTER
#include <wireshark.h>
#include <stdlib.h>
@ -79,16 +80,18 @@ DIAG_OFF_FLEX()
stnode_t *df_lval;
static int set_lval_simple(df_scanner_state_t *state, int token, const char *token_value, sttype_id_t type_id);
WS_WARN_UNUSED static int set_lval_simple(df_scanner_state_t *state, int token, const char *token_value, sttype_id_t type_id);
#define simple(token) (update_location(yyextra, yytext), set_lval_simple(yyextra, token, yytext, STTYPE_UNINITIALIZED))
#define test(token) (update_location(yyextra, yytext), set_lval_simple(yyextra, token, yytext, STTYPE_TEST))
#define math(token) (update_location(yyextra, yytext), set_lval_simple(yyextra, token, yytext, STTYPE_ARITHMETIC))
static int set_lval_literal(df_scanner_state_t *state, const char *token_value);
static int set_lval_unparsed(df_scanner_state_t *state, const char *token_value);
static int set_lval_quoted_string(df_scanner_state_t *state, GString *quoted_string);
static int set_lval_charconst(df_scanner_state_t *state, GString *quoted_string);
static int set_lval_field(df_scanner_state_t *state, const char *token_value);
WS_WARN_UNUSED static int set_lval_literal(df_scanner_state_t *state, const char *token_value);
WS_WARN_UNUSED static int set_lval_unparsed(df_scanner_state_t *state, const char *token_value);
WS_WARN_UNUSED static int set_lval_quoted_string(df_scanner_state_t *state, GString *quoted_string);
WS_WARN_UNUSED static int set_lval_charconst(df_scanner_state_t *state, GString *quoted_string);
WS_WARN_UNUSED static int set_lval_field(df_scanner_state_t *state, const char *token_value);
WS_WARN_UNUSED static int set_lval_identifier(df_scanner_state_t *state, const char *token_value);
WS_WARN_UNUSED static int set_lval_constant(df_scanner_state_t *state, const char *token_value);
static gboolean append_escaped_char(df_scanner_state_t *state, GString *str, char c);
static gboolean append_universal_character_name(df_scanner_state_t *state, GString *str, const char *ucn);
@ -97,7 +100,11 @@ static gboolean parse_charconst(df_scanner_state_t *state, const char *s, unsign
static void update_location(df_scanner_state_t *state, const char *text);
static void update_string_loc(df_scanner_state_t *state, const char *text);
#define FAIL(...) dfilter_fail(yyextra->dfw, DF_ERROR_GENERIC, yyextra->location, __VA_ARGS__)
#define FAIL(...) \
do { \
ws_noisy("Scanning failed here."); \
dfilter_fail(yyextra->dfw, DF_ERROR_GENERIC, yyextra->location, __VA_ARGS__); \
} while (0)
/*
* Sleazy hack to suppress compiler warnings in yy_fatal_error().
@ -419,19 +426,19 @@ HyphenBytes {hex2}(-{hex2})+
{MacAddress}|{QuadMacAddress} {
/* MAC Address. */
update_location(yyextra, yytext);
return set_lval_simple(yyextra, TOKEN_UNPARSED, yytext, STTYPE_UNINITIALIZED);
return set_lval_unparsed(yyextra, yytext);
}
{IPv4Address}{V4CidrPrefix}? {
/* IPv4 with or without prefix. */
update_location(yyextra, yytext);
return set_lval_simple(yyextra, TOKEN_UNPARSED, yytext, STTYPE_UNINITIALIZED);
return set_lval_unparsed(yyextra, yytext);
}
{IPv6Address}{V6CidrPrefix}? {
/* IPv6 with or without prefix. */
update_location(yyextra, yytext);
return set_lval_simple(yyextra, TOKEN_UNPARSED, yytext, STTYPE_UNINITIALIZED);
return set_lval_unparsed(yyextra, yytext);
}
:?({ColonBytes}|{DotBytes}|{HyphenBytes}) {
@ -448,14 +455,25 @@ HyphenBytes {hex2}(-{hex2})+
return set_lval_literal(yyextra, yytext); /* Keep leading colon. */
}
\.?{Identifier} {
/* Identifier or unparsed. */
{Identifier} {
/* Identifier (field or function) or constant (literal). */
update_location(yyextra, yytext);
if (yytext[0] == '.') {
/* Skip leading dot. */
return set_lval_field(yyextra, yytext + 1);
header_field_info *hfinfo = dfilter_resolve_unparsed(yyextra->dfw, yytext);
if (hfinfo != NULL) {
return set_lval_identifier(yyextra, yytext);
}
return set_lval_unparsed(yyextra, yytext);
df_func_def_t *def = df_func_lookup(yytext);
if (def != NULL) {
return set_lval_identifier(yyextra, yytext);
}
return set_lval_constant(yyextra, yytext);
}
\.{Identifier} {
/* Field. */
update_location(yyextra, yytext);
/* Skip leading dot. */
return set_lval_field(yyextra, yytext + 1);
}
. {
@ -511,10 +529,29 @@ set_lval_literal(df_scanner_state_t *state, const char *token_value)
return TOKEN_LITERAL;
}
static int
set_lval_identifier(df_scanner_state_t *state, const char *token_value)
{
stnode_init(df_lval, STTYPE_LITERAL, g_strdup(token_value), g_strdup(token_value), state->location);
return TOKEN_IDENTIFIER;
}
static int
set_lval_constant(df_scanner_state_t *state, const char *token_value)
{
stnode_init(df_lval, STTYPE_LITERAL, g_strdup(token_value), g_strdup(token_value), state->location);
return TOKEN_CONSTANT;
}
static int
set_lval_unparsed(df_scanner_state_t *state, const char *token_value)
{
return set_lval_simple(state, TOKEN_UNPARSED, token_value, STTYPE_UNINITIALIZED);
header_field_info *hfinfo = dfilter_resolve_unparsed(state->dfw, token_value);
if (hfinfo != NULL) {
stnode_init(df_lval, STTYPE_FIELD, hfinfo, g_strdup(token_value), state->location);
return TOKEN_FIELD;
}
return set_lval_literal(state, token_value);
}
static int

View File

@ -132,6 +132,12 @@ class case_syntax(unittest.TestCase):
dfilter = '\ttcp.stream \r\n== 1'
checkDFilterSucceed(dfilter)
def test_func_name_clash1(self, checkDFilterFail):
# "tcp" is a (non-existent) function, not a protocol
error = "Function 'tcp' does not exist"
dfilter = 'frame == tcp()'
checkDFilterFail(dfilter, error)
@fixtures.uses_fixtures
class case_equality(unittest.TestCase):
trace_file = "sip.pcapng"