forked from osmocom/wireshark
481 lines
11 KiB
Plaintext
481 lines
11 KiB
Plaintext
%top {
|
|
/* Include this before everything else, for various large-file definitions */
|
|
#include "config.h"
|
|
#include <wireshark.h>
|
|
|
|
#include <stdlib.h>
|
|
#include <errno.h>
|
|
|
|
#include "dfilter-int.h"
|
|
#include "syntax-tree.h"
|
|
#include "grammar.h"
|
|
#include "dfunctions.h"
|
|
}
|
|
|
|
/*
|
|
* We want a reentrant scanner.
|
|
*/
|
|
%option reentrant
|
|
|
|
/*
|
|
* We don't use input, so don't generate code for it.
|
|
*/
|
|
%option noinput
|
|
|
|
/*
|
|
* We don't use unput, so don't generate code for it.
|
|
*/
|
|
%option nounput
|
|
|
|
/*
|
|
* We don't read interactively from the terminal.
|
|
*/
|
|
%option never-interactive
|
|
|
|
/*
|
|
* Prefix scanner routines with "df_" rather than "yy", so this scanner
|
|
* can coexist with other scanners.
|
|
*/
|
|
%option prefix="df_"
|
|
|
|
/*
|
|
* We're reading from a string, so we don't need yywrap.
|
|
*/
|
|
%option noyywrap
|
|
|
|
/*
|
|
* The type for the state we keep for a scanner.
|
|
*/
|
|
%option extra-type="df_scanner_state_t *"
|
|
|
|
/*
|
|
* We have to override the memory allocators so that we don't get
|
|
* "unused argument" warnings from the yyscanner argument (which
|
|
* we don't use, as we have a global memory allocator).
|
|
*
|
|
* We provide, as macros, our own versions of the routines generated by Flex,
|
|
* which just call malloc()/realloc()/free() (as the Flex versions do),
|
|
* discarding the extra argument.
|
|
*/
|
|
%option noyyalloc
|
|
%option noyyrealloc
|
|
%option noyyfree
|
|
|
|
%{
|
|
/*
|
|
* Wireshark - Network traffic analyzer
|
|
* By Gerald Combs <gerald@wireshark.org>
|
|
* Copyright 2001 Gerald Combs
|
|
*
|
|
* SPDX-License-Identifier: GPL-2.0-or-later
|
|
*/
|
|
|
|
/*
|
|
* Disable diagnostics in the code generated by Flex.
|
|
*/
|
|
DIAG_OFF_FLEX
|
|
|
|
#define LVAL df_lval
|
|
#define LVAL_TYPE stnode_t*
|
|
#define LVAL_INIT_VAL NULL
|
|
#define MODNAME df
|
|
#define FLEX_YY_PREFIX df_
|
|
|
|
#include <lemonflex-head.inc>
|
|
|
|
/*#undef YY_NO_UNPUT*/
|
|
|
|
static int set_lval_str(int token, const char *token_value);
|
|
static int simple(int token, const char *token_value);
|
|
#define SIMPLE(token) simple(token, yytext)
|
|
|
|
/*
|
|
* Sleazy hack to suppress compiler warnings in yy_fatal_error().
|
|
*/
|
|
#define YY_EXIT_FAILURE ((void)yyscanner, 2)
|
|
|
|
/*
|
|
* Macros for the allocators, to discard the extra argument.
|
|
*/
|
|
#define df_alloc(size, yyscanner) (void *)malloc(size)
|
|
#define df_realloc(ptr, size, yyscanner) (void *)realloc((char *)(ptr), (size))
|
|
#define df_free(ptr, yyscanner) free((char *)ptr)
|
|
|
|
%}
|
|
|
|
%x RANGE_INT
|
|
%x RANGE_PUNCT
|
|
%x DQUOTE
|
|
%x SQUOTE
|
|
|
|
%%
|
|
|
|
[[:blank:]\n]+ {
|
|
/* Ignore whitespace, unless set elements are being parsed. Perhaps it
|
|
* should have used commas from the beginning, but now we are stuck with
|
|
* whitespace as separators. */
|
|
if (yyextra->in_set) {
|
|
return simple(TOKEN_WHITESPACE, NULL);
|
|
}
|
|
}
|
|
|
|
|
|
|
|
"(" return SIMPLE(TOKEN_LPAREN);
|
|
")" return SIMPLE(TOKEN_RPAREN);
|
|
"," return SIMPLE(TOKEN_COMMA);
|
|
|
|
"{"[[:blank:]\n]* {
|
|
yyextra->in_set = TRUE;
|
|
return simple(TOKEN_LBRACE, "{");
|
|
}
|
|
[[:blank:]\n]*".."[[:blank:]\n]* return simple(TOKEN_DOTDOT, "..");
|
|
[[:blank:]\n]*"}" {
|
|
yyextra->in_set = FALSE;
|
|
return simple(TOKEN_RBRACE, "}");
|
|
}
|
|
|
|
"==" return SIMPLE(TOKEN_TEST_EQ);
|
|
"eq" return SIMPLE(TOKEN_TEST_EQ);
|
|
"!=" {
|
|
add_deprecated_token(yyextra->dfw, "!=");
|
|
return SIMPLE(TOKEN_TEST_NE);
|
|
}
|
|
"ne" {
|
|
add_deprecated_token(yyextra->dfw, "ne");
|
|
return SIMPLE(TOKEN_TEST_NE);
|
|
}
|
|
">" return SIMPLE(TOKEN_TEST_GT);
|
|
"gt" return SIMPLE(TOKEN_TEST_GT);
|
|
">=" return SIMPLE(TOKEN_TEST_GE);
|
|
"ge" return SIMPLE(TOKEN_TEST_GE);
|
|
"<" return SIMPLE(TOKEN_TEST_LT);
|
|
"lt" return SIMPLE(TOKEN_TEST_LT);
|
|
"<=" return SIMPLE(TOKEN_TEST_LE);
|
|
"le" return SIMPLE(TOKEN_TEST_LE);
|
|
"bitwise_and" return SIMPLE(TOKEN_TEST_BITWISE_AND);
|
|
"&" return SIMPLE(TOKEN_TEST_BITWISE_AND);
|
|
"contains" return SIMPLE(TOKEN_TEST_CONTAINS);
|
|
"~" return SIMPLE(TOKEN_TEST_MATCHES);
|
|
"matches" return SIMPLE(TOKEN_TEST_MATCHES);
|
|
"!" return SIMPLE(TOKEN_TEST_NOT);
|
|
"not" return SIMPLE(TOKEN_TEST_NOT);
|
|
"&&" return SIMPLE(TOKEN_TEST_AND);
|
|
"and" return SIMPLE(TOKEN_TEST_AND);
|
|
"||" return SIMPLE(TOKEN_TEST_OR);
|
|
"or" return SIMPLE(TOKEN_TEST_OR);
|
|
"in" return SIMPLE(TOKEN_TEST_IN);
|
|
|
|
/*
|
|
* The syntax for ranges must handle slice[-d-d] and slice[-d--5], e.g:
|
|
* frame[-10-5] (minus ten to five)
|
|
* frame[-10--5] (minus ten to minus 5)
|
|
*/
|
|
|
|
"[" {
|
|
BEGIN(RANGE_INT);
|
|
return SIMPLE(TOKEN_LBRACKET);
|
|
}
|
|
|
|
<RANGE_INT>[+-]?[[:alnum:]]+ {
|
|
BEGIN(RANGE_PUNCT);
|
|
return set_lval_str(TOKEN_INTEGER, yytext);
|
|
}
|
|
|
|
<RANGE_INT,RANGE_PUNCT>":" {
|
|
BEGIN(RANGE_INT);
|
|
return SIMPLE(TOKEN_COLON);
|
|
}
|
|
|
|
<RANGE_PUNCT>"-" {
|
|
BEGIN(RANGE_INT);
|
|
return SIMPLE(TOKEN_HYPHEN);
|
|
}
|
|
|
|
<RANGE_INT,RANGE_PUNCT>"," {
|
|
BEGIN(RANGE_INT);
|
|
return SIMPLE(TOKEN_COMMA);
|
|
}
|
|
|
|
<RANGE_INT,RANGE_PUNCT>"]" {
|
|
BEGIN(INITIAL);
|
|
return SIMPLE(TOKEN_RBRACKET);
|
|
}
|
|
|
|
/* Error if none of the above while scanning a range (slice) */
|
|
|
|
<RANGE_PUNCT>[^:\-,\]]+ {
|
|
BEGIN(RANGE_INT);
|
|
return set_lval_str(TOKEN_UNPARSED, yytext);
|
|
}
|
|
|
|
<RANGE_INT>[+-]?[^[:alnum:]\]]+ {
|
|
BEGIN(RANGE_PUNCT);
|
|
return set_lval_str(TOKEN_UNPARSED, yytext);
|
|
}
|
|
|
|
[rR]{0,1}\042 {
|
|
/* start quote of a quoted string */
|
|
/* The example of how to scan for strings was taken from
|
|
the flex 2.5.4 manual, from the section "Start Conditions".
|
|
See:
|
|
http://www.gnu.org/software/flex/manual/html_node/flex_11.html */
|
|
|
|
BEGIN(DQUOTE);
|
|
/* A previous filter that failed to compile due to
|
|
a missing end quote will have left quoted_string set
|
|
to something. Clear it now that we are starting
|
|
a new quoted string. */
|
|
if (yyextra->quoted_string) {
|
|
g_string_free(yyextra->quoted_string, TRUE);
|
|
/* Don't set quoted_string to NULL, as we
|
|
do in other quoted_string-cleanup code, as we're
|
|
about to set it in the next line. */
|
|
}
|
|
yyextra->quoted_string = g_string_new("");
|
|
if (yytext[0] == 'r' || yytext[0] == 'R') {
|
|
/*
|
|
* This is a raw string (like in Python). Rules: 1) The two
|
|
* escape sequences are \\ and \". 2) Backslashes are
|
|
* preserved. 3) Double quotes in the string must be escaped.
|
|
* Corollary: Strings cannot end with an odd number of
|
|
* backslashes.
|
|
* Example: r"a\b\x12\"\\" is the string (including the implicit NUL terminator)
|
|
* {'a', '\\', 'b', '\\', 'x', '1', '2', '\\', '"', '\\'. '\\', '\0'}
|
|
*/
|
|
yyextra->raw_string = TRUE;
|
|
}
|
|
else {
|
|
yyextra->raw_string = FALSE;
|
|
}
|
|
}
|
|
|
|
<DQUOTE><<EOF>> {
|
|
/* unterminated string */
|
|
/* The example of how to handle unclosed strings was taken from
|
|
the flex 2.5.4 manual, from the section "End-of-file rules".
|
|
See:
|
|
http://www.gnu.org/software/flex/manual/html_node/flex_13.html */
|
|
|
|
dfilter_fail(yyextra->dfw, "The final quote was missing from a quoted string.");
|
|
return SCAN_FAILED;
|
|
}
|
|
|
|
<DQUOTE>\042 {
|
|
/* end quote */
|
|
int token;
|
|
BEGIN(INITIAL);
|
|
token = set_lval_str(TOKEN_STRING, yyextra->quoted_string->str);
|
|
g_string_free(yyextra->quoted_string, TRUE);
|
|
yyextra->quoted_string = NULL;
|
|
return token;
|
|
}
|
|
|
|
<DQUOTE>\\[0-7]{1,3} {
|
|
/* octal sequence */
|
|
if (yyextra->raw_string) {
|
|
g_string_append(yyextra->quoted_string, yytext);
|
|
}
|
|
else {
|
|
unsigned long result;
|
|
result = strtoul(yytext + 1, NULL, 8);
|
|
if (result == 0) {
|
|
g_string_free(yyextra->quoted_string, TRUE);
|
|
yyextra->quoted_string = NULL;
|
|
dfilter_fail(yyextra->dfw, "%s (NUL byte) cannot be used with a regular string.", yytext);
|
|
return SCAN_FAILED;
|
|
}
|
|
if (result > 0xff) {
|
|
g_string_free(yyextra->quoted_string, TRUE);
|
|
yyextra->quoted_string = NULL;
|
|
dfilter_fail(yyextra->dfw, "%s is larger than 255.", yytext);
|
|
return SCAN_FAILED;
|
|
}
|
|
g_string_append_c(yyextra->quoted_string, (gchar) result);
|
|
}
|
|
}
|
|
|
|
<DQUOTE>\\x[[:xdigit:]]{1,2} {
|
|
/* hex sequence */
|
|
if (yyextra->raw_string) {
|
|
g_string_append(yyextra->quoted_string, yytext);
|
|
}
|
|
else {
|
|
unsigned long result;
|
|
result = strtoul(yytext + 2, NULL, 16);
|
|
if (result == 0) {
|
|
g_string_free(yyextra->quoted_string, TRUE);
|
|
yyextra->quoted_string = NULL;
|
|
dfilter_fail(yyextra->dfw, "%s (NUL byte) cannot be used with a regular string.", yytext);
|
|
return SCAN_FAILED;
|
|
}
|
|
g_string_append_c(yyextra->quoted_string, (gchar) result);
|
|
}
|
|
}
|
|
|
|
|
|
<DQUOTE>\\. {
|
|
/* escaped character */
|
|
if (yyextra->raw_string) {
|
|
g_string_append(yyextra->quoted_string, yytext);
|
|
}
|
|
else {
|
|
g_string_append_c(yyextra->quoted_string, yytext[1]);
|
|
}
|
|
}
|
|
|
|
<DQUOTE>[^\\\042]+ {
|
|
/* non-escaped string */
|
|
g_string_append(yyextra->quoted_string, yytext);
|
|
}
|
|
|
|
|
|
\047 {
|
|
/* start quote of a quoted character value */
|
|
/* The example of how to scan for strings was taken from
|
|
the Flex manual, from the section "Start Conditions".
|
|
See:
|
|
http://flex.sourceforge.net/manual/Start-Conditions.html#Start-Conditions */
|
|
|
|
BEGIN(SQUOTE);
|
|
/* A previous filter that failed to compile due to
|
|
a missing end quote will have left quoted_string set
|
|
to something. Clear it now that we are starting
|
|
a new quoted string. */
|
|
if (yyextra->quoted_string) {
|
|
g_string_free(yyextra->quoted_string, TRUE);
|
|
/* Don't set quoted_string to NULL, as we
|
|
do in other quoted_string-cleanup code, as we're
|
|
about to set it in the next line. */
|
|
}
|
|
yyextra->quoted_string = g_string_new("'");
|
|
}
|
|
|
|
<SQUOTE><<EOF>> {
|
|
/* unterminated character value */
|
|
/* The example of how to handle unclosed strings was taken from
|
|
the Flex manual, from the section "End-of-file rules".
|
|
See:
|
|
http://flex.sourceforge.net/manual/EOF.html#EOF.html */
|
|
|
|
dfilter_fail(yyextra->dfw, "The final quote was missing from a character constant.");
|
|
return SCAN_FAILED;
|
|
}
|
|
|
|
<SQUOTE>\047 {
|
|
/* end quote */
|
|
int token;
|
|
BEGIN(INITIAL);
|
|
g_string_append_c(yyextra->quoted_string, '\'');
|
|
token = set_lval_str(TOKEN_CHARCONST, yyextra->quoted_string->str);
|
|
g_string_free(yyextra->quoted_string, TRUE);
|
|
yyextra->quoted_string = NULL;
|
|
return token;
|
|
}
|
|
|
|
<SQUOTE>\\. {
|
|
/* escaped character */
|
|
g_string_append(yyextra->quoted_string, yytext);
|
|
}
|
|
|
|
<SQUOTE>[^\\\047]+ {
|
|
/* non-escaped string */
|
|
g_string_append(yyextra->quoted_string, yytext);
|
|
}
|
|
|
|
|
|
|
|
[-[:alnum:]_\.:]*\/[[:digit:]]+ {
|
|
/* CIDR */
|
|
return set_lval_str(TOKEN_UNPARSED, yytext);
|
|
}
|
|
|
|
([.][-+[:alnum:]_:]+)+[.]{0,2} |
|
|
[-+[:alnum:]_:]+([.][-+[:alnum:]_:]+)*[.]{0,2} {
|
|
/* Is it a field name or some other value (float, integer, bytes, ...)? */
|
|
|
|
/* Trailing dot is allowed for floats, but make sure that trailing ".."
|
|
* is interpreted as a token on its own. */
|
|
if (strstr(yytext, "..")) {
|
|
yyless(yyleng-2);
|
|
}
|
|
|
|
/* No match, so treat it as an unparsed string */
|
|
return set_lval_str(TOKEN_UNPARSED, yytext);
|
|
}
|
|
|
|
. {
|
|
/* Default */
|
|
return set_lval_str(TOKEN_UNPARSED, yytext);
|
|
}
|
|
|
|
|
|
%%
|
|
|
|
/*
|
|
* Turn diagnostics back on, so we check the code that we've written.
|
|
*/
|
|
DIAG_ON_FLEX
|
|
|
|
static int
|
|
simple(int token, const char *token_value)
|
|
{
|
|
switch (token) {
|
|
case TOKEN_LPAREN:
|
|
case TOKEN_RPAREN:
|
|
case TOKEN_LBRACKET:
|
|
case TOKEN_RBRACKET:
|
|
case TOKEN_LBRACE:
|
|
case TOKEN_RBRACE:
|
|
case TOKEN_COLON:
|
|
case TOKEN_COMMA:
|
|
case TOKEN_DOTDOT:
|
|
case TOKEN_HYPHEN:
|
|
case TOKEN_WHITESPACE:
|
|
case TOKEN_TEST_EQ:
|
|
case TOKEN_TEST_NE:
|
|
case TOKEN_TEST_GT:
|
|
case TOKEN_TEST_GE:
|
|
case TOKEN_TEST_LT:
|
|
case TOKEN_TEST_LE:
|
|
case TOKEN_TEST_BITWISE_AND:
|
|
case TOKEN_TEST_CONTAINS:
|
|
case TOKEN_TEST_MATCHES:
|
|
case TOKEN_TEST_NOT:
|
|
case TOKEN_TEST_AND:
|
|
case TOKEN_TEST_OR:
|
|
case TOKEN_TEST_IN:
|
|
break;
|
|
default:
|
|
ws_assert_not_reached();
|
|
}
|
|
stnode_init(df_lval, STTYPE_UNINITIALIZED, NULL, token_value);
|
|
return token;
|
|
}
|
|
|
|
static int
|
|
set_lval_str(int token, const char *token_value)
|
|
{
|
|
sttype_id_t type_id;
|
|
|
|
switch (token) {
|
|
case TOKEN_STRING:
|
|
type_id = STTYPE_STRING;
|
|
break;
|
|
case TOKEN_CHARCONST:
|
|
type_id = STTYPE_CHARCONST;
|
|
break;
|
|
case TOKEN_UNPARSED:
|
|
type_id = STTYPE_UNPARSED;
|
|
break;
|
|
case TOKEN_INTEGER:
|
|
/* Not used in AST. */
|
|
type_id = STTYPE_UNINITIALIZED;
|
|
break;
|
|
default:
|
|
ws_assert_not_reached();
|
|
}
|
|
stnode_init(df_lval, type_id, (gpointer)token_value, token_value);
|
|
return token;
|
|
}
|