wireshark/epan/dfilter/scanner.l

481 lines
11 KiB
Plaintext

%top {
/* Include this before everything else, for various large-file definitions */
#include "config.h"
#include <wireshark.h>
#include <stdlib.h>
#include <errno.h>
#include "dfilter-int.h"
#include "syntax-tree.h"
#include "grammar.h"
#include "dfunctions.h"
}
/*
* We want a reentrant scanner.
*/
%option reentrant
/*
* We don't use input, so don't generate code for it.
*/
%option noinput
/*
* We don't use unput, so don't generate code for it.
*/
%option nounput
/*
* We don't read interactively from the terminal.
*/
%option never-interactive
/*
* Prefix scanner routines with "df_" rather than "yy", so this scanner
* can coexist with other scanners.
*/
%option prefix="df_"
/*
* We're reading from a string, so we don't need yywrap.
*/
%option noyywrap
/*
* The type for the state we keep for a scanner.
*/
%option extra-type="df_scanner_state_t *"
/*
* We have to override the memory allocators so that we don't get
* "unused argument" warnings from the yyscanner argument (which
* we don't use, as we have a global memory allocator).
*
* We provide, as macros, our own versions of the routines generated by Flex,
* which just call malloc()/realloc()/free() (as the Flex versions do),
* discarding the extra argument.
*/
%option noyyalloc
%option noyyrealloc
%option noyyfree
%{
/*
* Wireshark - Network traffic analyzer
* By Gerald Combs <gerald@wireshark.org>
* Copyright 2001 Gerald Combs
*
* SPDX-License-Identifier: GPL-2.0-or-later
*/
/*
* Disable diagnostics in the code generated by Flex.
*/
DIAG_OFF_FLEX
#define LVAL df_lval
#define LVAL_TYPE stnode_t*
#define LVAL_INIT_VAL NULL
#define MODNAME df
#define FLEX_YY_PREFIX df_
#include <lemonflex-head.inc>
/*#undef YY_NO_UNPUT*/
static int set_lval_str(int token, const char *token_value);
static int simple(int token, const char *token_value);
#define SIMPLE(token) simple(token, yytext)
/*
* Sleazy hack to suppress compiler warnings in yy_fatal_error().
*/
#define YY_EXIT_FAILURE ((void)yyscanner, 2)
/*
* Macros for the allocators, to discard the extra argument.
*/
#define df_alloc(size, yyscanner) (void *)malloc(size)
#define df_realloc(ptr, size, yyscanner) (void *)realloc((char *)(ptr), (size))
#define df_free(ptr, yyscanner) free((char *)ptr)
%}
%x RANGE_INT
%x RANGE_PUNCT
%x DQUOTE
%x SQUOTE
%%
[[:blank:]\n]+ {
/* Ignore whitespace, unless set elements are being parsed. Perhaps it
* should have used commas from the beginning, but now we are stuck with
* whitespace as separators. */
if (yyextra->in_set) {
return simple(TOKEN_WHITESPACE, NULL);
}
}
"(" return SIMPLE(TOKEN_LPAREN);
")" return SIMPLE(TOKEN_RPAREN);
"," return SIMPLE(TOKEN_COMMA);
"{"[[:blank:]\n]* {
yyextra->in_set = TRUE;
return simple(TOKEN_LBRACE, "{");
}
[[:blank:]\n]*".."[[:blank:]\n]* return simple(TOKEN_DOTDOT, "..");
[[:blank:]\n]*"}" {
yyextra->in_set = FALSE;
return simple(TOKEN_RBRACE, "}");
}
"==" return SIMPLE(TOKEN_TEST_EQ);
"eq" return SIMPLE(TOKEN_TEST_EQ);
"!=" {
add_deprecated_token(yyextra->dfw, "!=");
return SIMPLE(TOKEN_TEST_NE);
}
"ne" {
add_deprecated_token(yyextra->dfw, "ne");
return SIMPLE(TOKEN_TEST_NE);
}
">" return SIMPLE(TOKEN_TEST_GT);
"gt" return SIMPLE(TOKEN_TEST_GT);
">=" return SIMPLE(TOKEN_TEST_GE);
"ge" return SIMPLE(TOKEN_TEST_GE);
"<" return SIMPLE(TOKEN_TEST_LT);
"lt" return SIMPLE(TOKEN_TEST_LT);
"<=" return SIMPLE(TOKEN_TEST_LE);
"le" return SIMPLE(TOKEN_TEST_LE);
"bitwise_and" return SIMPLE(TOKEN_TEST_BITWISE_AND);
"&" return SIMPLE(TOKEN_TEST_BITWISE_AND);
"contains" return SIMPLE(TOKEN_TEST_CONTAINS);
"~" return SIMPLE(TOKEN_TEST_MATCHES);
"matches" return SIMPLE(TOKEN_TEST_MATCHES);
"!" return SIMPLE(TOKEN_TEST_NOT);
"not" return SIMPLE(TOKEN_TEST_NOT);
"&&" return SIMPLE(TOKEN_TEST_AND);
"and" return SIMPLE(TOKEN_TEST_AND);
"||" return SIMPLE(TOKEN_TEST_OR);
"or" return SIMPLE(TOKEN_TEST_OR);
"in" return SIMPLE(TOKEN_TEST_IN);
/*
* The syntax for ranges must handle slice[-d-d] and slice[-d--5], e.g:
* frame[-10-5] (minus ten to five)
* frame[-10--5] (minus ten to minus 5)
*/
"[" {
BEGIN(RANGE_INT);
return SIMPLE(TOKEN_LBRACKET);
}
<RANGE_INT>[+-]?[[:alnum:]]+ {
BEGIN(RANGE_PUNCT);
return set_lval_str(TOKEN_INTEGER, yytext);
}
<RANGE_INT,RANGE_PUNCT>":" {
BEGIN(RANGE_INT);
return SIMPLE(TOKEN_COLON);
}
<RANGE_PUNCT>"-" {
BEGIN(RANGE_INT);
return SIMPLE(TOKEN_HYPHEN);
}
<RANGE_INT,RANGE_PUNCT>"," {
BEGIN(RANGE_INT);
return SIMPLE(TOKEN_COMMA);
}
<RANGE_INT,RANGE_PUNCT>"]" {
BEGIN(INITIAL);
return SIMPLE(TOKEN_RBRACKET);
}
/* Error if none of the above while scanning a range (slice) */
<RANGE_PUNCT>[^:\-,\]]+ {
BEGIN(RANGE_INT);
return set_lval_str(TOKEN_UNPARSED, yytext);
}
<RANGE_INT>[+-]?[^[:alnum:]\]]+ {
BEGIN(RANGE_PUNCT);
return set_lval_str(TOKEN_UNPARSED, yytext);
}
[rR]{0,1}\042 {
/* start quote of a quoted string */
/* The example of how to scan for strings was taken from
the flex 2.5.4 manual, from the section "Start Conditions".
See:
http://www.gnu.org/software/flex/manual/html_node/flex_11.html */
BEGIN(DQUOTE);
/* A previous filter that failed to compile due to
a missing end quote will have left quoted_string set
to something. Clear it now that we are starting
a new quoted string. */
if (yyextra->quoted_string) {
g_string_free(yyextra->quoted_string, TRUE);
/* Don't set quoted_string to NULL, as we
do in other quoted_string-cleanup code, as we're
about to set it in the next line. */
}
yyextra->quoted_string = g_string_new("");
if (yytext[0] == 'r' || yytext[0] == 'R') {
/*
* This is a raw string (like in Python). Rules: 1) The two
* escape sequences are \\ and \". 2) Backslashes are
* preserved. 3) Double quotes in the string must be escaped.
* Corollary: Strings cannot end with an odd number of
* backslashes.
* Example: r"a\b\x12\"\\" is the string (including the implicit NUL terminator)
* {'a', '\\', 'b', '\\', 'x', '1', '2', '\\', '"', '\\'. '\\', '\0'}
*/
yyextra->raw_string = TRUE;
}
else {
yyextra->raw_string = FALSE;
}
}
<DQUOTE><<EOF>> {
/* unterminated string */
/* The example of how to handle unclosed strings was taken from
the flex 2.5.4 manual, from the section "End-of-file rules".
See:
http://www.gnu.org/software/flex/manual/html_node/flex_13.html */
dfilter_fail(yyextra->dfw, "The final quote was missing from a quoted string.");
return SCAN_FAILED;
}
<DQUOTE>\042 {
/* end quote */
int token;
BEGIN(INITIAL);
token = set_lval_str(TOKEN_STRING, yyextra->quoted_string->str);
g_string_free(yyextra->quoted_string, TRUE);
yyextra->quoted_string = NULL;
return token;
}
<DQUOTE>\\[0-7]{1,3} {
/* octal sequence */
if (yyextra->raw_string) {
g_string_append(yyextra->quoted_string, yytext);
}
else {
unsigned long result;
result = strtoul(yytext + 1, NULL, 8);
if (result == 0) {
g_string_free(yyextra->quoted_string, TRUE);
yyextra->quoted_string = NULL;
dfilter_fail(yyextra->dfw, "%s (NUL byte) cannot be used with a regular string.", yytext);
return SCAN_FAILED;
}
if (result > 0xff) {
g_string_free(yyextra->quoted_string, TRUE);
yyextra->quoted_string = NULL;
dfilter_fail(yyextra->dfw, "%s is larger than 255.", yytext);
return SCAN_FAILED;
}
g_string_append_c(yyextra->quoted_string, (gchar) result);
}
}
<DQUOTE>\\x[[:xdigit:]]{1,2} {
/* hex sequence */
if (yyextra->raw_string) {
g_string_append(yyextra->quoted_string, yytext);
}
else {
unsigned long result;
result = strtoul(yytext + 2, NULL, 16);
if (result == 0) {
g_string_free(yyextra->quoted_string, TRUE);
yyextra->quoted_string = NULL;
dfilter_fail(yyextra->dfw, "%s (NUL byte) cannot be used with a regular string.", yytext);
return SCAN_FAILED;
}
g_string_append_c(yyextra->quoted_string, (gchar) result);
}
}
<DQUOTE>\\. {
/* escaped character */
if (yyextra->raw_string) {
g_string_append(yyextra->quoted_string, yytext);
}
else {
g_string_append_c(yyextra->quoted_string, yytext[1]);
}
}
<DQUOTE>[^\\\042]+ {
/* non-escaped string */
g_string_append(yyextra->quoted_string, yytext);
}
\047 {
/* start quote of a quoted character value */
/* The example of how to scan for strings was taken from
the Flex manual, from the section "Start Conditions".
See:
http://flex.sourceforge.net/manual/Start-Conditions.html#Start-Conditions */
BEGIN(SQUOTE);
/* A previous filter that failed to compile due to
a missing end quote will have left quoted_string set
to something. Clear it now that we are starting
a new quoted string. */
if (yyextra->quoted_string) {
g_string_free(yyextra->quoted_string, TRUE);
/* Don't set quoted_string to NULL, as we
do in other quoted_string-cleanup code, as we're
about to set it in the next line. */
}
yyextra->quoted_string = g_string_new("'");
}
<SQUOTE><<EOF>> {
/* unterminated character value */
/* The example of how to handle unclosed strings was taken from
the Flex manual, from the section "End-of-file rules".
See:
http://flex.sourceforge.net/manual/EOF.html#EOF.html */
dfilter_fail(yyextra->dfw, "The final quote was missing from a character constant.");
return SCAN_FAILED;
}
<SQUOTE>\047 {
/* end quote */
int token;
BEGIN(INITIAL);
g_string_append_c(yyextra->quoted_string, '\'');
token = set_lval_str(TOKEN_CHARCONST, yyextra->quoted_string->str);
g_string_free(yyextra->quoted_string, TRUE);
yyextra->quoted_string = NULL;
return token;
}
<SQUOTE>\\. {
/* escaped character */
g_string_append(yyextra->quoted_string, yytext);
}
<SQUOTE>[^\\\047]+ {
/* non-escaped string */
g_string_append(yyextra->quoted_string, yytext);
}
[-[:alnum:]_\.:]*\/[[:digit:]]+ {
/* CIDR */
return set_lval_str(TOKEN_UNPARSED, yytext);
}
([.][-+[:alnum:]_:]+)+[.]{0,2} |
[-+[:alnum:]_:]+([.][-+[:alnum:]_:]+)*[.]{0,2} {
/* Is it a field name or some other value (float, integer, bytes, ...)? */
/* Trailing dot is allowed for floats, but make sure that trailing ".."
* is interpreted as a token on its own. */
if (strstr(yytext, "..")) {
yyless(yyleng-2);
}
/* No match, so treat it as an unparsed string */
return set_lval_str(TOKEN_UNPARSED, yytext);
}
. {
/* Default */
return set_lval_str(TOKEN_UNPARSED, yytext);
}
%%
/*
* Turn diagnostics back on, so we check the code that we've written.
*/
DIAG_ON_FLEX
static int
simple(int token, const char *token_value)
{
switch (token) {
case TOKEN_LPAREN:
case TOKEN_RPAREN:
case TOKEN_LBRACKET:
case TOKEN_RBRACKET:
case TOKEN_LBRACE:
case TOKEN_RBRACE:
case TOKEN_COLON:
case TOKEN_COMMA:
case TOKEN_DOTDOT:
case TOKEN_HYPHEN:
case TOKEN_WHITESPACE:
case TOKEN_TEST_EQ:
case TOKEN_TEST_NE:
case TOKEN_TEST_GT:
case TOKEN_TEST_GE:
case TOKEN_TEST_LT:
case TOKEN_TEST_LE:
case TOKEN_TEST_BITWISE_AND:
case TOKEN_TEST_CONTAINS:
case TOKEN_TEST_MATCHES:
case TOKEN_TEST_NOT:
case TOKEN_TEST_AND:
case TOKEN_TEST_OR:
case TOKEN_TEST_IN:
break;
default:
ws_assert_not_reached();
}
stnode_init(df_lval, STTYPE_UNINITIALIZED, NULL, token_value);
return token;
}
static int
set_lval_str(int token, const char *token_value)
{
sttype_id_t type_id;
switch (token) {
case TOKEN_STRING:
type_id = STTYPE_STRING;
break;
case TOKEN_CHARCONST:
type_id = STTYPE_CHARCONST;
break;
case TOKEN_UNPARSED:
type_id = STTYPE_UNPARSED;
break;
case TOKEN_INTEGER:
/* Not used in AST. */
type_id = STTYPE_UNINITIALIZED;
break;
default:
ws_assert_not_reached();
}
stnode_init(df_lval, type_id, (gpointer)token_value, token_value);
return token;
}