wireshark/epan/dfilter/grammar.lemon

516 lines
11 KiB
Plaintext
Raw Normal View History

%include {
#include "config.h"
#define WS_LOG_DOMAIN LOG_DOMAIN_DFILTER
#include <assert.h>
#include "dfilter-int.h"
#include "syntax-tree.h"
#include "sttype-field.h"
#include "sttype-slice.h"
#include "sttype-op.h"
#include "sttype-function.h"
#include "sttype-set.h"
#include "drange.h"
#include "grammar.h"
#ifdef _WIN32
#pragma warning(disable:4671)
#endif
static stnode_t *
new_function(dfwork_t *dfw, stnode_t *node);
#define FAIL(dfw, node, ...) \
do { \
ws_noisy("Parsing failed here."); \
dfilter_fail(dfw, DF_ERROR_GENERIC, stnode_location(node), __VA_ARGS__); \
} while (0)
DIAG_OFF_LEMON()
} /* end of %include */
%code {
DIAG_ON_LEMON()
}
/* Parser Information */
%name Dfilter
%token_prefix TOKEN_
%extra_argument {dfwork_t *dfw}
/* Terminal and Non-Terminal types and destructors */
%token_type {stnode_t*}
%token_destructor {
(void)dfw;
stnode_free($$);
}
%default_type {stnode_t*}
%default_destructor {stnode_free($$);}
%type range_node_list {GSList*}
%destructor range_node_list {drange_node_free_list($$);}
2022-12-29 02:22:53 +00:00
%type func_params_list {GSList*}
%destructor func_params_list {st_funcparams_free($$);}
%type set_list {GSList*}
%destructor set_list {set_nodelist_free($$);}
2021-10-26 09:35:12 +00:00
2022-12-29 02:22:53 +00:00
%type set_element_list {GSList*}
%destructor set_element_list {set_nodelist_free($$);}
/* This is called as soon as a syntax error happens. After that,
any "error" symbols are shifted, if possible. */
%syntax_error {
if (!TOKEN) {
dfilter_fail(dfw, DF_ERROR_UNEXPECTED_END, DFILTER_LOC_EMPTY, "Unexpected end of filter expression.");
return;
}
FAIL(dfw, TOKEN, "\"%s\" was unexpected in this context.", stnode_token(TOKEN));
}
/* When a parse fails, mark an error. This occurs after
the above syntax_error code and after the parser fails to
use error recovery, shifting an "error" symbol and successfully
shifting 3 more symbols. */
%parse_failure {
dfw->parse_failure = TRUE;
}
/* ----------------- The grammar -------------- */
/* Associativity */
%left TEST_OR.
%left TEST_AND.
%right TEST_NOT.
%nonassoc TEST_ALL_EQ TEST_ANY_EQ TEST_ALL_NE TEST_ANY_NE TEST_LT TEST_LE TEST_GT TEST_GE
TEST_CONTAINS TEST_MATCHES.
%left BITWISE_AND.
%left PLUS MINUS.
%left STAR RSLASH PERCENT.
%nonassoc UNARY_PLUS UNARY_MINUS.
/* Top-level targets */
sentence ::= expr(X). { dfw->st_root = X; }
sentence ::= . { dfw->st_root = NULL; }
expr(X) ::= relation(R). { X = R; }
expr(X) ::= arithmetic_expr(E). { X = E; }
/* Logical tests */
expr(X) ::= expr(Y) TEST_AND(T) expr(Z).
{
X = T;
sttype_oper_set2(X, STNODE_OP_AND, Y, Z);
}
expr(X) ::= expr(Y) TEST_OR(T) expr(Z).
{
X = T;
sttype_oper_set2(X, STNODE_OP_OR, Y, Z);
}
expr(X) ::= TEST_NOT(T) expr(Y).
{
X = T;
sttype_oper_set1(X, STNODE_OP_NOT, Y);
}
/* Any expression inside parens is simply that expression */
expr(X) ::= LPAREN(L) expr(Y) RPAREN(R).
{
X = Y;
df_loc_t loc = stnode_merge_location(L, Y, R, (stnode_t *)NULL);
stnode_set_location(X, loc);
2022-12-26 18:48:54 +00:00
stnode_free(L);
stnode_free(R);
}
/* Entities, or things that can be compared/tested/checked */
atom(A) ::= STRING(S). { A = S; }
atom(A) ::= CHARCONST(N). { A = N; }
atom(A) ::= LITERAL(S). { A = S; }
atom(A) ::= CONSTANT(C). { A = C; }
named_field(X) ::= FIELD(F).
{
X = F;
}
named_field(X) ::= IDENTIFIER(U).
{
X = U;
const char *name = stnode_token(U);
header_field_info *hfinfo = dfilter_resolve_unparsed(dfw, name);
if (hfinfo == NULL) {
FAIL(dfw, U, "\"%s\" is not a valid protocol or protocol field.", name);
}
stnode_replace(X, STTYPE_FIELD, hfinfo);
}
layered_field(R) ::= named_field(F).
{
R = F;
}
layered_field(R) ::= named_field(F) HASH LBRACKET range_node_list(L) RBRACKET.
{
R = F;
sttype_field_set_range(R, L);
g_slist_free(L);
}
layered_field(R) ::= named_field(F) HASH INTEGER(N).
{
R = F;
char *err_msg = NULL;
drange_node *range = drange_node_from_str(stnode_token(N), &err_msg);
if (err_msg != NULL) {
FAIL(dfw, N, "%s", err_msg);
g_free(err_msg);
}
sttype_field_set_range1(R, range);
stnode_free(N);
}
rawable_field(R) ::= layered_field(F).
dfilter: Add support for raw (bytes) addressing mode This adds new syntax to read a field from the tree as bytes, instead of the actual type. This is a useful extension for example to match matformed strings that contain unicode replacement characters. In this case it is not possible to match the raw value of the malformed string field. This extension fills this need and is generic enough that it should be useful in many other situations. The syntax used is to prefix the field name with "@". The following artificial example tests if the HTTP user agent contains a particular invalid UTF-8 sequence: @http.user_agent == "Mozill\xAA" Where simply using "http.user_agent" won't work because the invalid byte sequence will have been replaced with U+FFFD. Considering the following programs: $ dftest '_ws.ftypes.string == "ABC"' Filter: _ws.ftypes.string == "ABC" Syntax tree: 0 TEST_ANY_EQ: 1 FIELD(_ws.ftypes.string <FT_STRING>) 1 FVALUE("ABC" <FT_STRING>) Instructions: 00000 READ_TREE _ws.ftypes.string <FT_STRING> -> reg#0 00001 IF_FALSE_GOTO 3 00002 ANY_EQ reg#0 == "ABC" <FT_STRING> 00003 RETURN $ dftest '@_ws.ftypes.string == "ABC"' Filter: @_ws.ftypes.string == "ABC" Syntax tree: 0 TEST_ANY_EQ: 1 FIELD(_ws.ftypes.string <RAW>) 1 FVALUE(41:42:43 <FT_BYTES>) Instructions: 00000 READ_TREE @_ws.ftypes.string <FT_BYTES> -> reg#0 00001 IF_FALSE_GOTO 3 00002 ANY_EQ reg#0 == 41:42:43 <FT_BYTES> 00003 RETURN In the second case the field has a "raw" type, that equates directly to FT_BYTES, and the field value is read from the protocol raw data.
2022-10-25 03:20:18 +00:00
{
R = F;
}
rawable_field(R) ::= ATSIGN layered_field(F).
dfilter: Add support for raw (bytes) addressing mode This adds new syntax to read a field from the tree as bytes, instead of the actual type. This is a useful extension for example to match matformed strings that contain unicode replacement characters. In this case it is not possible to match the raw value of the malformed string field. This extension fills this need and is generic enough that it should be useful in many other situations. The syntax used is to prefix the field name with "@". The following artificial example tests if the HTTP user agent contains a particular invalid UTF-8 sequence: @http.user_agent == "Mozill\xAA" Where simply using "http.user_agent" won't work because the invalid byte sequence will have been replaced with U+FFFD. Considering the following programs: $ dftest '_ws.ftypes.string == "ABC"' Filter: _ws.ftypes.string == "ABC" Syntax tree: 0 TEST_ANY_EQ: 1 FIELD(_ws.ftypes.string <FT_STRING>) 1 FVALUE("ABC" <FT_STRING>) Instructions: 00000 READ_TREE _ws.ftypes.string <FT_STRING> -> reg#0 00001 IF_FALSE_GOTO 3 00002 ANY_EQ reg#0 == "ABC" <FT_STRING> 00003 RETURN $ dftest '@_ws.ftypes.string == "ABC"' Filter: @_ws.ftypes.string == "ABC" Syntax tree: 0 TEST_ANY_EQ: 1 FIELD(_ws.ftypes.string <RAW>) 1 FVALUE(41:42:43 <FT_BYTES>) Instructions: 00000 READ_TREE @_ws.ftypes.string <FT_BYTES> -> reg#0 00001 IF_FALSE_GOTO 3 00002 ANY_EQ reg#0 == 41:42:43 <FT_BYTES> 00003 RETURN In the second case the field has a "raw" type, that equates directly to FT_BYTES, and the field value is read from the protocol raw data.
2022-10-25 03:20:18 +00:00
{
R = F;
sttype_field_set_raw(R, TRUE);
}
reference(R) ::= DOLLAR LBRACE rawable_field(F) RBRACE.
{
/* convert field to reference */
R = stnode_new(STTYPE_REFERENCE, sttype_field_hfinfo(F), NULL, stnode_location(F));
sttype_field_set_drange(R, sttype_field_drange_steal(F));
sttype_field_set_raw(R, sttype_field_raw(F));
stnode_free(F);
}
entity(E) ::= atom(A). { E = A; }
entity(E) ::= slice(R). { E = R; }
entity(E) ::= function(F). { E = F; }
entity(E) ::= rawable_field(F). { E = F; }
entity(E) ::= reference(R). { E = R; }
arithmetic_expr(T) ::= entity(N).
{
T = N;
}
arithmetic_expr(T) ::= PLUS arithmetic_expr(N). [UNARY_PLUS]
{
T = N;
}
arithmetic_expr(T) ::= MINUS(M) arithmetic_expr(N). [UNARY_MINUS]
{
T = M;
sttype_oper_set1(T, STNODE_OP_UNARY_MINUS, N);
}
arithmetic_expr(T) ::= arithmetic_expr(F) BITWISE_AND(O) arithmetic_expr(M).
{
T = O;
sttype_oper_set2(T, STNODE_OP_BITWISE_AND, F, M);
}
arithmetic_expr(T) ::= arithmetic_expr(F) PLUS(O) arithmetic_expr(M).
{
T = O;
sttype_oper_set2(T, STNODE_OP_ADD, F, M);
}
arithmetic_expr(T) ::= arithmetic_expr(F) MINUS(O) arithmetic_expr(M).
{
T = O;
sttype_oper_set2(T, STNODE_OP_SUBTRACT, F, M);
}
arithmetic_expr(T) ::= arithmetic_expr(F) STAR(O) arithmetic_expr(M).
{
T = O;
sttype_oper_set2(T, STNODE_OP_MULTIPLY, F, M);
}
arithmetic_expr(T) ::= arithmetic_expr(F) RSLASH(O) arithmetic_expr(M).
{
T = O;
sttype_oper_set2(T, STNODE_OP_DIVIDE, F, M);
}
arithmetic_expr(T) ::= arithmetic_expr(F) PERCENT(O) arithmetic_expr(M).
{
T = O;
sttype_oper_set2(T, STNODE_OP_MODULO, F, M);
}
arithmetic_expr(T) ::= LBRACE(L) arithmetic_expr(F) RBRACE(R).
{
T = F;
df_loc_t loc = stnode_merge_location(L, F, R, (stnode_t *)NULL);
stnode_set_location(T, loc);
2022-12-26 18:48:54 +00:00
stnode_free(L);
stnode_free(R);
}
/* Relational tests */
cmp_op(O) ::= TEST_ALL_EQ(L).
{
O = L;
sttype_oper_set_op(O, STNODE_OP_ALL_EQ);
}
cmp_op(O) ::= TEST_ANY_EQ(L).
{
O = L;
sttype_oper_set_op(O, STNODE_OP_ANY_EQ);
}
cmp_op(O) ::= TEST_ALL_NE(L).
{
O = L;
sttype_oper_set_op(O, STNODE_OP_ALL_NE);
}
cmp_op(O) ::= TEST_ANY_NE(L).
{
O = L;
sttype_oper_set_op(O, STNODE_OP_ANY_NE);
}
cmp_op(O) ::= TEST_GT(L).
{
O = L;
sttype_oper_set_op(O, STNODE_OP_GT);
}
cmp_op(O) ::= TEST_GE(L).
{
O = L;
sttype_oper_set_op(O, STNODE_OP_GE);
}
cmp_op(O) ::= TEST_LT(L).
{
O = L;
sttype_oper_set_op(O, STNODE_OP_LT);
}
cmp_op(O) ::= TEST_LE(L).
{
O = L;
sttype_oper_set_op(O, STNODE_OP_LE);
}
comparison_test(T) ::= arithmetic_expr(E) cmp_op(O) arithmetic_expr(F).
{
T = O;
sttype_oper_set2_args(O, E, F);
}
/* 'a == b == c' or 'a < b <= c <= d < e' */
comparison_test(T) ::= arithmetic_expr(E) cmp_op(O) comparison_test(R).
{
stnode_t *L, *F;
F = R;
while (stnode_type_id(F) == STTYPE_TEST) {
sttype_oper_get(F, NULL, &F, NULL);
}
L = O;
sttype_oper_set2_args(L, E, stnode_dup(F));
T = stnode_new_empty(STTYPE_TEST);
sttype_oper_set2(T, STNODE_OP_AND, L, R);
}
relation_test(T) ::= comparison_test(C). { T = C; }
relation_test(T) ::= entity(E) TEST_CONTAINS(L) entity(F).
{
T = L;
sttype_oper_set2(T, STNODE_OP_CONTAINS, E, F);
}
relation_test(T) ::= entity(E) TEST_MATCHES(L) entity(F).
{
T = L;
sttype_oper_set2(T, STNODE_OP_MATCHES, E, F);
}
relation_test(T) ::= entity(E) TEST_IN(O) set(S).
{
T = O;
sttype_oper_set2(T, STNODE_OP_IN, E, S);
}
relation_test(T) ::= entity(E) TEST_NOT(P) TEST_IN(O) set(S).
{
T = P;
sttype_oper_set2(O, STNODE_OP_IN, E, S);
sttype_oper_set1(T, STNODE_OP_NOT, O);
}
relation(R) ::= relation_test(T). { R = T; }
relation(R) ::= ANY relation_test(T).
{
R = T;
sttype_test_set_match(R, STNODE_MATCH_ANY);
}
relation(R) ::= ALL relation_test(T).
{
R = T;
sttype_test_set_match(R, STNODE_MATCH_ALL);
}
2021-10-26 09:35:12 +00:00
set(S) ::= LBRACE set_list(L) RBRACE.
{
S = stnode_new(STTYPE_SET, L, NULL, DFILTER_LOC_EMPTY);
2021-10-26 09:35:12 +00:00
}
2022-12-29 02:22:53 +00:00
set_list(L) ::= set_element_list(N).
{
L = g_slist_concat(NULL, N);
}
2022-12-29 02:22:53 +00:00
set_list(L) ::= set_list(P) COMMA set_element_list(N).
{
L = g_slist_concat(P, N);
}
set_entity(N) ::= entity(E).
{
N = E;
}
set_entity(N) ::= MINUS(M) entity(E).
{
N = M;
sttype_oper_set1(N, STNODE_OP_UNARY_MINUS, E);
}
set_entity(N) ::= PLUS entity(E).
{
N = E;
}
2022-12-29 02:22:53 +00:00
set_element_list(N) ::= set_entity(X).
{
N = g_slist_append(NULL, X);
N = g_slist_append(N, NULL);
}
2022-12-29 02:22:53 +00:00
set_element_list(N) ::= set_entity(X) DOTDOT set_entity(Y).
{
N = g_slist_append(NULL, X);
N = g_slist_append(N, Y);
}
/* Slices */
slice(R) ::= entity(E) LBRACKET range_node_list(L) RBRACKET.
{
R = stnode_new(STTYPE_SLICE, NULL, NULL, DFILTER_LOC_EMPTY);
sttype_slice_set(R, E, L);
/* Delete the list, but not the drange_nodes that
* the list contains. */
g_slist_free(L);
}
range_node_list(L) ::= RANGE_NODE(N).
{
char *err_msg = NULL;
drange_node *rn = drange_node_from_str(stnode_token(N), &err_msg);
if (err_msg != NULL) {
FAIL(dfw, N, "%s", err_msg);
g_free(err_msg);
}
L = g_slist_append(NULL, rn);
stnode_free(N);
}
range_node_list(L) ::= range_node_list(P) COMMA RANGE_NODE(N).
{
char *err_msg = NULL;
drange_node *rn = drange_node_from_str(stnode_token(N), &err_msg);
if (err_msg != NULL) {
FAIL(dfw, N, "%s", err_msg);
g_free(err_msg);
}
L = g_slist_append(P, rn);
stnode_free(N);
}
/* Functions */
%code {
static stnode_t *
new_function(dfwork_t *dfw, stnode_t *node)
{
const char *name = stnode_token(node);
df_func_def_t *def = df_func_lookup(name);
if (!def) {
FAIL(dfw, node, "Function '%s' does not exist", name);
}
stnode_replace(node, STTYPE_FUNCTION, def);
return node;
}
}
/* A function can have one or more parameters */
function(F) ::= IDENTIFIER(U) LPAREN(L) func_params_list(P) RPAREN(R).
{
F = new_function(dfw, U);
sttype_function_set_params(F, P);
df_loc_t loc = stnode_merge_location(F, L, R, (stnode_t *)NULL);
stnode_set_location(F, loc);
stnode_free(L);
stnode_free(R);
}
function ::= CONSTANT(U) LPAREN func_params_list RPAREN.
{
FAIL(dfw, U, "Function '%s' does not exist", stnode_token(U));
}
/* A function can have zero parameters. */
function(F) ::= IDENTIFIER(U) LPAREN(L) RPAREN(R).
{
F = new_function(dfw, U);
df_loc_t loc = stnode_merge_location(F, L, R, (stnode_t *)NULL);
stnode_set_location(F, loc);
stnode_free(L);
stnode_free(R);
}
function ::= CONSTANT(U) LPAREN RPAREN.
{
FAIL(dfw, U, "Function '%s' does not exist", stnode_token(U));
}
2022-12-29 02:22:53 +00:00
func_params_list(P) ::= arithmetic_expr(E).
{
P = g_slist_append(NULL, E);
}
2022-12-29 02:22:53 +00:00
func_params_list(P) ::= func_params_list(L) COMMA arithmetic_expr(E).
{
P = g_slist_append(L, E);
}