wireshark/epan/dfilter/grammar.lemon

304 lines
6.4 KiB
Plaintext
Raw Normal View History

%include {
#include "config.h"
#include <assert.h>
#include "dfilter-int.h"
#include "syntax-tree.h"
#include "sttype-range.h"
#include "sttype-test.h"
#include "sttype-function.h"
#include "sttype-set.h"
#include "drange.h"
#include "grammar.h"
#ifdef _WIN32
#pragma warning(disable:4671)
#endif
/* End of C code */
}
/* Parser Information */
%name Dfilter
%token_prefix TOKEN_
%extra_argument {dfwork_t *dfw}
/* Terminal and Non-Terminal types and destructors */
%token_type {df_lval_t*}
%token_destructor {
(void)dfw;
df_lval_free($$);
}
%type sentence {stnode_t*}
%type expr {stnode_t*}
%destructor expr {stnode_free($$);}
%type entity {stnode_t*}
%destructor entity {stnode_free($$);}
%type function {stnode_t*}
%destructor function {stnode_free($$);}
%type relation_test {stnode_t*}
%destructor relation_test {stnode_free($$);}
%type logical_test {stnode_t*}
%destructor logical_test {stnode_free($$);}
%type rel_binop {test_op_t}
%type range {stnode_t*}
%destructor range {stnode_free($$);}
%type range_node {drange_node*}
%destructor range_node {drange_node_free($$);}
%type range_node_list {GSList*}
%destructor range_node_list {drange_node_free_list($$);}
%type function_params {GSList*}
%destructor function_params {st_funcparams_free($$);}
2021-10-26 09:35:12 +00:00
%type set {stnode_t*}
%destructor set {stnode_free($$);}
%type set_list {GSList*}
%destructor set_list {set_nodelist_free($$);}
%type set_element {GSList*}
%destructor set_element {set_nodelist_free($$);}
/* This is called as soon as a syntax error happens. After that,
any "error" symbols are shifted, if possible. */
%syntax_error {
if (!TOKEN) {
dfilter_fail(dfw, "Unexpected end of filter string.");
dfw->syntax_error = TRUE;
return;
}
dfilter_fail(dfw, "\"%s\" was unexpected in this context.",
df_lval_value(TOKEN));
dfw->syntax_error = TRUE;
}
/* When a parse fails, mark an error. This occurs after
the above syntax_error code and after the parser fails to
use error recovery, shifting an "error" symbol and successfully
shifting 3 more symbols. */
%parse_failure {
dfw->syntax_error = TRUE;
}
/* ----------------- The grammar -------------- */
/* Associativity */
%left TEST_AND.
%left TEST_OR.
%nonassoc TEST_EQ TEST_NE TEST_LT TEST_LE TEST_GT TEST_GE TEST_CONTAINS TEST_MATCHES TEST_BITWISE_AND.
%right TEST_NOT.
/* Top-level targets */
sentence ::= expr(X). { dfw->st_root = X; }
sentence ::= . { dfw->st_root = NULL; }
expr(X) ::= relation_test(R). { X = R; }
expr(X) ::= logical_test(L). { X = L; }
/* Logical tests */
logical_test(T) ::= expr(E) TEST_AND expr(F).
{
T = stnode_new_test(TEST_OP_AND, E, F);
}
logical_test(T) ::= expr(E) TEST_OR expr(F).
{
T = stnode_new_test(TEST_OP_OR, E, F);
}
logical_test(T) ::= TEST_NOT expr(E).
{
T = stnode_new_test(TEST_OP_NOT, E, NULL);
}
logical_test(T) ::= entity(E).
{
T = stnode_new_test(TEST_OP_EXISTS, E, NULL);
}
/* Entities, or things that can be compared/tested/checked */
entity(E) ::= STRING(S).
{
E = stnode_new(STTYPE_STRING, df_lval_value(S));
df_lval_free(S);
}
entity(E) ::= CHARCONST(C).
{
E = stnode_new(STTYPE_CHARCONST, df_lval_value(C));
df_lval_free(C);
}
entity(E) ::= UNPARSED(U).
{
E = stnode_new(STTYPE_UNPARSED, df_lval_value(U));
dfilter_resolve_unparsed(dfw, E);
df_lval_free(U);
}
entity(E) ::= range(R). { E = R; }
entity(E) ::= function(F). { E = F; }
/* Ranges */
range(R) ::= entity(E) LBRACKET range_node_list(L) RBRACKET.
{
R = stnode_new(STTYPE_RANGE, NULL);
sttype_range_set(R, E, L);
/* Delete the list, but not the drange_nodes that
* the list contains. */
g_slist_free(L);
}
range_node_list(L) ::= range_node(D).
{
L = g_slist_append(NULL, D);
}
range_node_list(L) ::= range_node_list(P) COMMA range_node(D).
{
L = g_slist_append(P, D);
}
range_node(D) ::= RANGE(R).
{
char *err = NULL;
D = drange_node_from_str(df_lval_value(R), &err);
if (err != NULL) {
dfilter_parse_fail(dfw, "%s", err);
g_free(err);
}
df_lval_free(R);
}
dfilter: Fix "!=" relation to be free of contradictions Wireshark defines the relation of equality A == B as A any_eq B <=> An == Bn for at least one An, Bn. More accurately I think this is (formally) an equivalence relation, not true equality. Whichever definition for "==" we choose we must keep the definition of "!=" as !(A == B), otherwise it will lead to logical contradictions like (A == B) AND (A != B) being true. Fix the '!=' relation to match the definition of equality: A != B <=> !(A == B) <=> A all_ne B <=> An != Bn, for every n. This has been the recomended way to write "not equal" for a long time in the documentation, even to the point where != was deprecated, but it just wasn't implemented consistently in the language, which has understandably been a persistent source of confusion. Even a field that is normally well-behaved with "!=" like "ip.src" or "ip.dst" will produce unexpected results with encapsulations like IP-over-IP. The opcode ALL_NE could have been implemented in the compiler instead using NOT and ANY_EQ but I chose to implement it in bytecode. It just seemed more elegant and efficient but the difference was not very significant. Keep around "~=" for any_ne relation, in case someone depends on that, and because we don't have an operator for true equality: A strict_equal B <=> A all_eq B <=> !(A any_ne B). If there is only one value then any_ne and all_ne are the same comparison operation. Implementing this change did not require fixing any tests so it is unlikely the relation "~=" (any_ne) will be very useful. Note that the behaviour of the '<' (less than) comparison relation is a separate, more subtle issue. In the general case the definition of '<' that is used is only a partial order.
2021-10-18 20:07:06 +00:00
rel_binop(O) ::= TEST_ANY_EQ. { O = TEST_OP_ANY_EQ; }
rel_binop(O) ::= TEST_ALL_NE. { O = TEST_OP_ALL_NE; }
rel_binop(O) ::= TEST_ANY_NE. { O = TEST_OP_ANY_NE; }
rel_binop(O) ::= TEST_GT. { O = TEST_OP_GT; }
rel_binop(O) ::= TEST_GE. { O = TEST_OP_GE; }
rel_binop(O) ::= TEST_LT. { O = TEST_OP_LT; }
rel_binop(O) ::= TEST_LE. { O = TEST_OP_LE; }
rel_binop(O) ::= TEST_BITWISE_AND. { O = TEST_OP_BITWISE_AND; }
rel_binop(O) ::= TEST_CONTAINS. { O = TEST_OP_CONTAINS; }
/* Relational tests */
relation_test(T) ::= entity(E) rel_binop(O) entity(F).
{
T = stnode_new_test(O, E, F);
}
/* 'a == b == c' or 'a < b <= c <= d < e' */
relation_test(T) ::= entity(E) rel_binop(O) relation_test(R).
{
stnode_t *L, *F;
/* for now generate it like E O F TEST_OP_AND F P G, later it could be optimized
or semantically checked (to make a <= b >= c or a == b != c invalid)?
*/
F = R;
do {
2021-06-18 18:21:42 +00:00
ws_assert(F != NULL && stnode_type_id(F) == STTYPE_TEST);
sttype_test_get(F, NULL, &F, NULL);
} while (stnode_type_id(F) == STTYPE_TEST);
L = stnode_new_test(O, E, stnode_dup(F));
T = stnode_new_test(TEST_OP_AND, L, R);
}
/* "matches" does not chain with other relational tests. */
relation_test(T) ::= entity(E) TEST_MATCHES entity(F).
{
stnode_t *R = dfilter_new_regex(dfw, F);
dfilter: Require double-quoted strings with "matches" Matches is a special case that looks on the RHS and tries to convert every unparsed value to a string, regardless of the LHS type. This is not how types work in the display filter. Require double-quotes to avoid ambiguity, because matches doesn't follow normal Wireshark display filter type rules. It doesn't need nor benefit from the flexibility provided by unparsed strings in the syntax. For matches the RHS is always a literal strings except if the RHS is also a field name, then it complains of an incompatible type. This is confusing. No type can be compatible because no type rules are ever considered. Every unparsed value is a text string except if it happens to coincide with a field name it also requires double-quoting or it throws a syntax error, just to be difficult. We could remove this odd quirk but requiring double-quotes for regular expressions is a better, more elegant fix. Before: Filter: tcp matches "udp" Constants: 00000 PUT_PCRE udp -> reg#1 Instructions: 00000 READ_TREE tcp -> reg#0 00001 IF-FALSE-GOTO 3 00002 ANY_MATCHES reg#0 matches reg#1 00003 RETURN Filter: tcp matches udp Constants: 00000 PUT_PCRE udp -> reg#1 Instructions: 00000 READ_TREE tcp -> reg#0 00001 IF-FALSE-GOTO 3 00002 ANY_MATCHES reg#0 matches reg#1 00003 RETURN Filter: tcp matches udp.srcport dftest: tcp and udp.srcport are not of compatible types. Filter: tcp matches udp.srcportt Constants: 00000 PUT_PCRE udp.srcportt -> reg#1 Instructions: 00000 READ_TREE tcp -> reg#0 00001 IF-FALSE-GOTO 3 00002 ANY_MATCHES reg#0 matches reg#1 00003 RETURN After: Filter: tcp matches "udp" Constants: 00000 PUT_PCRE udp -> reg#1 Instructions: 00000 READ_TREE tcp -> reg#0 00001 IF-FALSE-GOTO 3 00002 ANY_MATCHES reg#0 matches reg#1 00003 RETURN Filter: tcp matches udp dftest: "udp" was unexpected in this context. Filter: tcp matches udp.srcport dftest: "udp.srcport" was unexpected in this context. Filter: tcp matches udp.srcportt dftest: "udp.srcportt" was unexpected in this context. The error message could still be improved.
2021-10-09 15:40:08 +00:00
T = stnode_new_test(TEST_OP_MATCHES, E, R);
}
2021-10-26 09:35:12 +00:00
relation_test(T) ::= entity(E) TEST_IN set(S).
{
T = stnode_new_test(TEST_OP_IN, E, S);
}
2021-10-26 09:35:12 +00:00
set(S) ::= LBRACE set_list(L) RBRACE.
{
S = stnode_new(STTYPE_SET, L);
}
set_list(L) ::= set_element(N).
{
2021-10-26 09:35:12 +00:00
L = g_slist_concat(NULL, N);
}
2021-10-26 09:35:12 +00:00
set_list(L) ::= set_list(P) WHITESPACE set_element(N).
{
2021-10-26 09:35:12 +00:00
L = g_slist_concat(P, N);
}
2021-10-26 09:35:12 +00:00
set_element(N) ::= entity(X).
{
2021-10-26 09:35:12 +00:00
N = g_slist_append(NULL, X);
N = g_slist_append(N, NULL);
}
2021-10-26 09:35:12 +00:00
set_element(N) ::= entity(X) DOTDOT entity(Y).
{
2021-10-26 09:35:12 +00:00
N = g_slist_append(NULL, X);
N = g_slist_append(N, Y);
}
/* Functions */
/* A function can have one or more parameters */
function(F) ::= UNPARSED(U) LPAREN function_params(P) RPAREN.
{
F = dfilter_new_function(dfw, df_lval_value(U));
sttype_function_set_params(F, P);
df_lval_free(U);
}
/* A function can have zero parameters. */
function(F) ::= UNPARSED(U) LPAREN RPAREN.
{
F = dfilter_new_function(dfw, df_lval_value(U));
df_lval_free(U);
}
function_params(P) ::= entity(E).
{
P = g_slist_append(NULL, E);
}
function_params(P) ::= function_params(L) COMMA entity(E).
{
P = g_slist_append(L, E);
}
/* Any expression inside parens is simply that expression */
expr(X) ::= LPAREN expr(Y) RPAREN.
{
X = Y;
stnode_set_inside_parens(X, TRUE);
}