2016-12-02 19:18:50 +00:00
|
|
|
%top {
|
|
|
|
/* Include this before everything else, for various large-file definitions */
|
|
|
|
#include "config.h"
|
2021-10-17 12:17:47 +00:00
|
|
|
#include <wireshark.h>
|
2021-10-06 15:03:19 +00:00
|
|
|
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <errno.h>
|
|
|
|
|
2022-02-19 17:49:29 +00:00
|
|
|
#include <wsutil/str_util.h>
|
|
|
|
|
2021-10-06 15:03:19 +00:00
|
|
|
#include "dfilter-int.h"
|
|
|
|
#include "syntax-tree.h"
|
|
|
|
#include "grammar.h"
|
|
|
|
#include "dfunctions.h"
|
2016-12-02 19:18:50 +00:00
|
|
|
}
|
|
|
|
|
2016-03-31 01:44:01 +00:00
|
|
|
/*
|
|
|
|
* We want a reentrant scanner.
|
|
|
|
*/
|
|
|
|
%option reentrant
|
|
|
|
|
2013-02-10 19:13:07 +00:00
|
|
|
/*
|
|
|
|
* We don't use input, so don't generate code for it.
|
|
|
|
*/
|
|
|
|
%option noinput
|
|
|
|
|
2011-04-23 19:03:05 +00:00
|
|
|
/*
|
|
|
|
* We don't use unput, so don't generate code for it.
|
|
|
|
*/
|
|
|
|
%option nounput
|
|
|
|
|
|
|
|
/*
|
2015-11-05 23:35:04 +00:00
|
|
|
* We don't read interactively from the terminal.
|
2011-04-23 19:03:05 +00:00
|
|
|
*/
|
|
|
|
%option never-interactive
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Prefix scanner routines with "df_" rather than "yy", so this scanner
|
|
|
|
* can coexist with other scanners.
|
|
|
|
*/
|
|
|
|
%option prefix="df_"
|
|
|
|
|
2015-12-05 03:52:51 +00:00
|
|
|
/*
|
|
|
|
* We're reading from a string, so we don't need yywrap.
|
|
|
|
*/
|
|
|
|
%option noyywrap
|
|
|
|
|
2016-03-31 01:44:01 +00:00
|
|
|
/*
|
|
|
|
* The type for the state we keep for a scanner.
|
|
|
|
*/
|
|
|
|
%option extra-type="df_scanner_state_t *"
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We have to override the memory allocators so that we don't get
|
|
|
|
* "unused argument" warnings from the yyscanner argument (which
|
|
|
|
* we don't use, as we have a global memory allocator).
|
|
|
|
*
|
|
|
|
* We provide, as macros, our own versions of the routines generated by Flex,
|
|
|
|
* which just call malloc()/realloc()/free() (as the Flex versions do),
|
|
|
|
* discarding the extra argument.
|
|
|
|
*/
|
|
|
|
%option noyyalloc
|
|
|
|
%option noyyrealloc
|
|
|
|
%option noyyfree
|
|
|
|
|
2011-04-23 19:03:05 +00:00
|
|
|
%{
|
2001-02-01 20:31:21 +00:00
|
|
|
/*
|
2006-05-21 05:12:17 +00:00
|
|
|
* Wireshark - Network traffic analyzer
|
|
|
|
* By Gerald Combs <gerald@wireshark.org>
|
2001-02-01 20:31:21 +00:00
|
|
|
* Copyright 2001 Gerald Combs
|
2008-01-31 19:50:38 +00:00
|
|
|
*
|
2018-03-06 14:31:02 +00:00
|
|
|
* SPDX-License-Identifier: GPL-2.0-or-later
|
2001-02-01 20:21:25 +00:00
|
|
|
*/
|
|
|
|
|
2018-02-16 07:17:04 +00:00
|
|
|
/*
|
|
|
|
* Disable diagnostics in the code generated by Flex.
|
|
|
|
*/
|
|
|
|
DIAG_OFF_FLEX
|
2008-04-25 18:26:54 +00:00
|
|
|
|
2021-10-26 09:09:36 +00:00
|
|
|
df_lval_t *df_lval;
|
2001-02-01 20:21:25 +00:00
|
|
|
|
2021-09-26 21:22:50 +00:00
|
|
|
static int set_lval_str(int token, const char *token_value);
|
2021-10-29 14:59:39 +00:00
|
|
|
#define simple(token) set_lval_str(token, yytext)
|
2021-11-23 13:40:14 +00:00
|
|
|
static gboolean append_escaped_char(dfwork_t *dfw, GString *str, char c);
|
2021-11-23 21:20:34 +00:00
|
|
|
static gboolean parse_charconst(dfwork_t *dfw, const char *s, unsigned long *valuep);
|
2003-07-25 03:44:05 +00:00
|
|
|
|
2016-03-31 01:44:01 +00:00
|
|
|
/*
|
|
|
|
* Sleazy hack to suppress compiler warnings in yy_fatal_error().
|
|
|
|
*/
|
|
|
|
#define YY_EXIT_FAILURE ((void)yyscanner, 2)
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Macros for the allocators, to discard the extra argument.
|
|
|
|
*/
|
|
|
|
#define df_alloc(size, yyscanner) (void *)malloc(size)
|
|
|
|
#define df_realloc(ptr, size, yyscanner) (void *)realloc((char *)(ptr), (size))
|
|
|
|
#define df_free(ptr, yyscanner) free((char *)ptr)
|
|
|
|
|
2001-02-01 20:21:25 +00:00
|
|
|
%}
|
|
|
|
|
2022-04-05 14:38:20 +00:00
|
|
|
WORD_CHAR [[:alnum:]_-]
|
2022-04-04 18:58:35 +00:00
|
|
|
|
|
|
|
hex2 [[:xdigit:]]{2}
|
|
|
|
MacAddress {hex2}:{hex2}:{hex2}:{hex2}:{hex2}:{hex2}|{hex2}-{hex2}-{hex2}-{hex2}-{hex2}-{hex2}|{hex2}\.{hex2}\.{hex2}\.{hex2}\.{hex2}\.{hex2}
|
|
|
|
|
|
|
|
hex4 [[:xdigit:]]{4}
|
|
|
|
QuadMacAddress {hex4}\.{hex4}\.{hex4}
|
|
|
|
|
|
|
|
dec-octet [0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5]
|
|
|
|
IPv4address {dec-octet}\.{dec-octet}\.{dec-octet}\.{dec-octet}
|
|
|
|
|
|
|
|
h16 [0-9A-Fa-f]{1,4}
|
|
|
|
ls32 {h16}:{h16}|{IPv4address}
|
|
|
|
IPv6address ({h16}:){6}{ls32}|::({h16}:){5}{ls32}|({h16})?::({h16}:){4}{ls32}|(({h16}:){0,1}{h16})?::({h16}:){3}{ls32}|(({h16}:){0,2}{h16})?::({h16}:){2}{ls32}|(({h16}:){0,3}{h16})?::{h16}:{ls32}|(({h16}:){0,4}{h16})?::{ls32}|(({h16}:){0,5}{h16})?::{h16}|(({h16}:){0,6}{h16})?::
|
|
|
|
|
|
|
|
v4-cidr-prefix \/[[:digit:]]{1,2}
|
|
|
|
v6-cidr-prefix \/[[:digit:]]{1,3}
|
2022-02-22 21:55:05 +00:00
|
|
|
|
2021-10-25 20:27:40 +00:00
|
|
|
%x RANGE
|
2003-07-25 03:44:05 +00:00
|
|
|
%x DQUOTE
|
2016-09-19 01:48:50 +00:00
|
|
|
%x SQUOTE
|
2022-03-27 14:26:46 +00:00
|
|
|
%x REFERENCE
|
2001-02-01 20:21:25 +00:00
|
|
|
|
|
|
|
%%
|
|
|
|
|
2021-10-25 17:33:17 +00:00
|
|
|
[[:blank:]\n]+
|
2001-02-01 20:21:25 +00:00
|
|
|
|
2021-10-29 14:59:39 +00:00
|
|
|
"(" return simple(TOKEN_LPAREN);
|
|
|
|
")" return simple(TOKEN_RPAREN);
|
2021-10-25 17:33:17 +00:00
|
|
|
"," return simple(TOKEN_COMMA);
|
|
|
|
"{" return simple(TOKEN_LBRACE);
|
|
|
|
".." return simple(TOKEN_DOTDOT);
|
|
|
|
"}" return simple(TOKEN_RBRACE);
|
2022-02-27 09:56:41 +00:00
|
|
|
"+" return simple(TOKEN_PLUS);
|
|
|
|
"-" return simple(TOKEN_MINUS);
|
2022-03-31 13:50:20 +00:00
|
|
|
"*" return simple(TOKEN_STAR);
|
|
|
|
"/" return simple(TOKEN_RSLASH);
|
|
|
|
"%" return simple(TOKEN_PERCENT);
|
2001-02-01 20:21:25 +00:00
|
|
|
|
2021-10-29 14:59:39 +00:00
|
|
|
"==" return simple(TOKEN_TEST_ANY_EQ);
|
|
|
|
"eq" return simple(TOKEN_TEST_ANY_EQ);
|
2021-12-22 09:36:46 +00:00
|
|
|
"any_eq" return simple(TOKEN_TEST_ANY_EQ);
|
2021-10-29 14:59:39 +00:00
|
|
|
"!=" return simple(TOKEN_TEST_ALL_NE);
|
|
|
|
"ne" return simple(TOKEN_TEST_ALL_NE);
|
2021-12-22 09:36:46 +00:00
|
|
|
"all_ne" return simple(TOKEN_TEST_ALL_NE);
|
2021-12-13 01:06:01 +00:00
|
|
|
"===" return simple(TOKEN_TEST_ALL_EQ);
|
|
|
|
"all_eq" return simple(TOKEN_TEST_ALL_EQ);
|
|
|
|
"!==" return simple(TOKEN_TEST_ANY_NE);
|
2022-03-05 12:30:34 +00:00
|
|
|
"~=" {
|
|
|
|
add_deprecated_token(yyextra->dfw, "The operator \"~=\" is deprecated, use \"!==\" instead.");
|
|
|
|
return simple(TOKEN_TEST_ANY_NE);
|
|
|
|
}
|
2021-10-29 14:59:39 +00:00
|
|
|
"any_ne" return simple(TOKEN_TEST_ANY_NE);
|
|
|
|
">" return simple(TOKEN_TEST_GT);
|
|
|
|
"gt" return simple(TOKEN_TEST_GT);
|
|
|
|
">=" return simple(TOKEN_TEST_GE);
|
|
|
|
"ge" return simple(TOKEN_TEST_GE);
|
|
|
|
"<" return simple(TOKEN_TEST_LT);
|
|
|
|
"lt" return simple(TOKEN_TEST_LT);
|
|
|
|
"<=" return simple(TOKEN_TEST_LE);
|
|
|
|
"le" return simple(TOKEN_TEST_LE);
|
|
|
|
"contains" return simple(TOKEN_TEST_CONTAINS);
|
|
|
|
"~" return simple(TOKEN_TEST_MATCHES);
|
|
|
|
"matches" return simple(TOKEN_TEST_MATCHES);
|
|
|
|
"!" return simple(TOKEN_TEST_NOT);
|
|
|
|
"not" return simple(TOKEN_TEST_NOT);
|
|
|
|
"&&" return simple(TOKEN_TEST_AND);
|
|
|
|
"and" return simple(TOKEN_TEST_AND);
|
|
|
|
"||" return simple(TOKEN_TEST_OR);
|
|
|
|
"or" return simple(TOKEN_TEST_OR);
|
|
|
|
"in" return simple(TOKEN_TEST_IN);
|
2001-02-01 20:21:25 +00:00
|
|
|
|
2022-02-25 19:37:53 +00:00
|
|
|
"&" return simple(TOKEN_BITWISE_AND);
|
|
|
|
"bitwise_and" return simple(TOKEN_BITWISE_AND);
|
|
|
|
|
2022-03-27 14:26:46 +00:00
|
|
|
"${" {
|
|
|
|
BEGIN(REFERENCE);
|
|
|
|
return simple(TOKEN_REF_OPEN);
|
|
|
|
}
|
|
|
|
|
|
|
|
<REFERENCE>[^}]+ {
|
|
|
|
return set_lval_str(TOKEN_REFERENCE, yytext);
|
|
|
|
}
|
|
|
|
|
|
|
|
<REFERENCE>"}" {
|
|
|
|
BEGIN(INITIAL);
|
|
|
|
return simple(TOKEN_REF_CLOSE);
|
|
|
|
}
|
|
|
|
|
|
|
|
<REFERENCE><<EOF>> {
|
|
|
|
dfilter_fail(yyextra->dfw, "Right brace missing from field reference.");
|
|
|
|
return SCAN_FAILED;
|
|
|
|
}
|
|
|
|
|
2021-10-25 20:27:40 +00:00
|
|
|
"[" {
|
|
|
|
BEGIN(RANGE);
|
2021-10-29 14:59:39 +00:00
|
|
|
return simple(TOKEN_LBRACKET);
|
2001-02-01 20:21:25 +00:00
|
|
|
}
|
|
|
|
|
2021-10-25 20:27:40 +00:00
|
|
|
<RANGE>[^],]+ {
|
|
|
|
return set_lval_str(TOKEN_RANGE, yytext);
|
2001-03-02 17:04:25 +00:00
|
|
|
}
|
|
|
|
|
2021-10-25 20:27:40 +00:00
|
|
|
<RANGE>"," {
|
2021-10-29 14:59:39 +00:00
|
|
|
return simple(TOKEN_COMMA);
|
2001-03-02 17:04:25 +00:00
|
|
|
}
|
|
|
|
|
2021-10-25 20:27:40 +00:00
|
|
|
<RANGE>"]" {
|
2001-02-01 20:21:25 +00:00
|
|
|
BEGIN(INITIAL);
|
2021-10-29 14:59:39 +00:00
|
|
|
return simple(TOKEN_RBRACKET);
|
2001-02-01 20:21:25 +00:00
|
|
|
}
|
|
|
|
|
2021-10-25 20:27:40 +00:00
|
|
|
<RANGE><<EOF>> {
|
|
|
|
dfilter_fail(yyextra->dfw, "The right bracket was missing from a slice.");
|
|
|
|
return SCAN_FAILED;
|
2004-06-03 07:17:24 +00:00
|
|
|
}
|
|
|
|
|
2021-05-30 02:38:12 +00:00
|
|
|
[rR]{0,1}\042 {
|
2016-09-19 01:48:50 +00:00
|
|
|
/* start quote of a quoted string */
|
2021-10-31 15:52:05 +00:00
|
|
|
/*
|
|
|
|
* The example of how to scan for strings was taken from
|
|
|
|
* the flex manual, from the section "Start Conditions".
|
|
|
|
* See: https://westes.github.io/flex/manual/Start-Conditions.html
|
|
|
|
*/
|
2003-07-25 03:44:05 +00:00
|
|
|
BEGIN(DQUOTE);
|
2016-03-31 01:44:01 +00:00
|
|
|
yyextra->quoted_string = g_string_new("");
|
2021-10-31 15:52:05 +00:00
|
|
|
|
2021-05-30 02:38:12 +00:00
|
|
|
if (yytext[0] == 'r' || yytext[0] == 'R') {
|
|
|
|
/*
|
|
|
|
* This is a raw string (like in Python). Rules: 1) The two
|
|
|
|
* escape sequences are \\ and \". 2) Backslashes are
|
|
|
|
* preserved. 3) Double quotes in the string must be escaped.
|
|
|
|
* Corollary: Strings cannot end with an odd number of
|
|
|
|
* backslashes.
|
|
|
|
* Example: r"a\b\x12\"\\" is the string (including the implicit NUL terminator)
|
|
|
|
* {'a', '\\', 'b', '\\', 'x', '1', '2', '\\', '"', '\\'. '\\', '\0'}
|
|
|
|
*/
|
|
|
|
yyextra->raw_string = TRUE;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
yyextra->raw_string = FALSE;
|
|
|
|
}
|
2003-07-25 03:44:05 +00:00
|
|
|
}
|
|
|
|
|
2004-02-11 22:52:54 +00:00
|
|
|
<DQUOTE><<EOF>> {
|
|
|
|
/* unterminated string */
|
2021-10-31 15:52:05 +00:00
|
|
|
g_string_free(yyextra->quoted_string, TRUE);
|
|
|
|
yyextra->quoted_string = NULL;
|
2016-03-31 01:44:01 +00:00
|
|
|
dfilter_fail(yyextra->dfw, "The final quote was missing from a quoted string.");
|
2004-02-11 22:52:54 +00:00
|
|
|
return SCAN_FAILED;
|
|
|
|
}
|
|
|
|
|
2007-08-11 22:05:44 +00:00
|
|
|
<DQUOTE>\042 {
|
2003-07-25 03:44:05 +00:00
|
|
|
/* end quote */
|
|
|
|
BEGIN(INITIAL);
|
2021-10-29 12:53:32 +00:00
|
|
|
df_lval->value = g_string_free(yyextra->quoted_string, FALSE);
|
2016-03-31 01:44:01 +00:00
|
|
|
yyextra->quoted_string = NULL;
|
2021-10-29 12:53:32 +00:00
|
|
|
return TOKEN_STRING;
|
2003-07-25 03:44:05 +00:00
|
|
|
}
|
2001-02-01 20:21:25 +00:00
|
|
|
|
2003-07-25 03:44:05 +00:00
|
|
|
<DQUOTE>\\[0-7]{1,3} {
|
|
|
|
/* octal sequence */
|
2021-05-30 02:38:12 +00:00
|
|
|
if (yyextra->raw_string) {
|
|
|
|
g_string_append(yyextra->quoted_string, yytext);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
unsigned long result;
|
|
|
|
result = strtoul(yytext + 1, NULL, 8);
|
dfilter: Disallow embedded NUL bytes in regular strings
When byte escape sequences, that is hex \xhh or octal \0ddd,
are interpreted at the lexical level it is not possible to
use strings with embedded NUL bytes. The NUL byte is interpreted
as a C string terminator. As a consequence, for example, the
strings "AB" and "AB\x00CDE" compare as the same. This leads to
unexpected false matches and a poor user experience.
Disallow embedded NULs for regular strings (strings literals that
do not begin with 'r' or 'R') for this reason.
It is possible to use a raw string instead (eg: r"AB\x00C")
to match embedded NUL bytes, although that only works with regular
expressions. Normal escape rules would also work with regular
expressions (eg: "AB\\x00C"). This is the same string as the previous
one, written in an alternate form. What won't work is "AB\x00C", this
string is synctatically invalid.
So the expression: data matches r"AB\x00C"
will match the bytes {'A', 'B', '\0', '\C'}.
However the expression: data contains r"AB\x00C"
won't match the fvalue above. Because the "contains" operator
doesn't compile a regular expression it literally tries to
contains-match the bytes {'A', 'B', '\\', 'x', '0', '0', 'C'}.
Therefore raw strings are very convenient but it is still necessary
to be aware that the matches operator has an extra level of indirection
than other string operators (same as in Python).
Fixes #16156.
2021-05-30 07:40:30 +00:00
|
|
|
if (result == 0) {
|
|
|
|
g_string_free(yyextra->quoted_string, TRUE);
|
|
|
|
yyextra->quoted_string = NULL;
|
|
|
|
dfilter_fail(yyextra->dfw, "%s (NUL byte) cannot be used with a regular string.", yytext);
|
|
|
|
return SCAN_FAILED;
|
|
|
|
}
|
2021-05-30 02:38:12 +00:00
|
|
|
if (result > 0xff) {
|
|
|
|
g_string_free(yyextra->quoted_string, TRUE);
|
|
|
|
yyextra->quoted_string = NULL;
|
|
|
|
dfilter_fail(yyextra->dfw, "%s is larger than 255.", yytext);
|
|
|
|
return SCAN_FAILED;
|
|
|
|
}
|
|
|
|
g_string_append_c(yyextra->quoted_string, (gchar) result);
|
2003-07-25 03:44:05 +00:00
|
|
|
}
|
2001-02-01 20:21:25 +00:00
|
|
|
}
|
|
|
|
|
2003-07-25 03:44:05 +00:00
|
|
|
<DQUOTE>\\x[[:xdigit:]]{1,2} {
|
|
|
|
/* hex sequence */
|
2021-10-31 15:48:22 +00:00
|
|
|
/*
|
|
|
|
* C standard does not place a limit on the number of hex
|
|
|
|
* digits after \x... but we do. \xNN can have 1 or two Ns, not more.
|
|
|
|
*/
|
2021-05-30 02:38:12 +00:00
|
|
|
if (yyextra->raw_string) {
|
|
|
|
g_string_append(yyextra->quoted_string, yytext);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
unsigned long result;
|
|
|
|
result = strtoul(yytext + 2, NULL, 16);
|
dfilter: Disallow embedded NUL bytes in regular strings
When byte escape sequences, that is hex \xhh or octal \0ddd,
are interpreted at the lexical level it is not possible to
use strings with embedded NUL bytes. The NUL byte is interpreted
as a C string terminator. As a consequence, for example, the
strings "AB" and "AB\x00CDE" compare as the same. This leads to
unexpected false matches and a poor user experience.
Disallow embedded NULs for regular strings (strings literals that
do not begin with 'r' or 'R') for this reason.
It is possible to use a raw string instead (eg: r"AB\x00C")
to match embedded NUL bytes, although that only works with regular
expressions. Normal escape rules would also work with regular
expressions (eg: "AB\\x00C"). This is the same string as the previous
one, written in an alternate form. What won't work is "AB\x00C", this
string is synctatically invalid.
So the expression: data matches r"AB\x00C"
will match the bytes {'A', 'B', '\0', '\C'}.
However the expression: data contains r"AB\x00C"
won't match the fvalue above. Because the "contains" operator
doesn't compile a regular expression it literally tries to
contains-match the bytes {'A', 'B', '\\', 'x', '0', '0', 'C'}.
Therefore raw strings are very convenient but it is still necessary
to be aware that the matches operator has an extra level of indirection
than other string operators (same as in Python).
Fixes #16156.
2021-05-30 07:40:30 +00:00
|
|
|
if (result == 0) {
|
|
|
|
g_string_free(yyextra->quoted_string, TRUE);
|
|
|
|
yyextra->quoted_string = NULL;
|
|
|
|
dfilter_fail(yyextra->dfw, "%s (NUL byte) cannot be used with a regular string.", yytext);
|
|
|
|
return SCAN_FAILED;
|
|
|
|
}
|
2021-05-30 02:38:12 +00:00
|
|
|
g_string_append_c(yyextra->quoted_string, (gchar) result);
|
|
|
|
}
|
2003-07-25 03:44:05 +00:00
|
|
|
}
|
2001-02-01 20:21:25 +00:00
|
|
|
|
|
|
|
|
2003-07-25 03:44:05 +00:00
|
|
|
<DQUOTE>\\. {
|
|
|
|
/* escaped character */
|
2021-05-30 02:38:12 +00:00
|
|
|
if (yyextra->raw_string) {
|
|
|
|
g_string_append(yyextra->quoted_string, yytext);
|
|
|
|
}
|
2021-11-23 13:40:14 +00:00
|
|
|
else if (!append_escaped_char(yyextra->dfw, yyextra->quoted_string, yytext[1])) {
|
2021-11-24 09:54:17 +00:00
|
|
|
g_string_free(yyextra->quoted_string, TRUE);
|
|
|
|
yyextra->quoted_string = NULL;
|
2021-11-23 13:40:14 +00:00
|
|
|
return SCAN_FAILED;
|
2021-05-30 02:38:12 +00:00
|
|
|
}
|
2003-07-25 03:44:05 +00:00
|
|
|
}
|
|
|
|
|
2007-08-11 22:05:44 +00:00
|
|
|
<DQUOTE>[^\\\042]+ {
|
2003-07-25 03:44:05 +00:00
|
|
|
/* non-escaped string */
|
2016-03-31 01:44:01 +00:00
|
|
|
g_string_append(yyextra->quoted_string, yytext);
|
2003-07-25 03:44:05 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2016-09-19 01:48:50 +00:00
|
|
|
\047 {
|
|
|
|
/* start quote of a quoted character value */
|
|
|
|
BEGIN(SQUOTE);
|
|
|
|
yyextra->quoted_string = g_string_new("'");
|
|
|
|
}
|
|
|
|
|
|
|
|
<SQUOTE><<EOF>> {
|
|
|
|
/* unterminated character value */
|
2021-10-31 15:52:05 +00:00
|
|
|
g_string_free(yyextra->quoted_string, TRUE);
|
|
|
|
yyextra->quoted_string = NULL;
|
2016-09-19 01:48:50 +00:00
|
|
|
dfilter_fail(yyextra->dfw, "The final quote was missing from a character constant.");
|
|
|
|
return SCAN_FAILED;
|
|
|
|
}
|
|
|
|
|
|
|
|
<SQUOTE>\047 {
|
|
|
|
/* end quote */
|
|
|
|
BEGIN(INITIAL);
|
|
|
|
g_string_append_c(yyextra->quoted_string, '\'');
|
2021-10-29 12:53:32 +00:00
|
|
|
df_lval->value = g_string_free(yyextra->quoted_string, FALSE);
|
2016-09-19 01:48:50 +00:00
|
|
|
yyextra->quoted_string = NULL;
|
2021-11-23 21:20:34 +00:00
|
|
|
|
|
|
|
if (!parse_charconst(yyextra->dfw, df_lval->value, &df_lval->number)) {
|
|
|
|
return SCAN_FAILED;
|
|
|
|
}
|
2021-10-29 12:53:32 +00:00
|
|
|
return TOKEN_CHARCONST;
|
2016-09-19 01:48:50 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
<SQUOTE>\\. {
|
|
|
|
/* escaped character */
|
|
|
|
g_string_append(yyextra->quoted_string, yytext);
|
|
|
|
}
|
|
|
|
|
|
|
|
<SQUOTE>[^\\\047]+ {
|
|
|
|
/* non-escaped string */
|
|
|
|
g_string_append(yyextra->quoted_string, yytext);
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2021-10-27 18:19:36 +00:00
|
|
|
/* None of the patterns below can match ".." anywhere in the token string. */
|
2001-02-01 20:21:25 +00:00
|
|
|
|
2022-04-04 18:58:35 +00:00
|
|
|
{MacAddress}|{QuadMacAddress} {
|
2022-04-06 08:29:45 +00:00
|
|
|
/* MAC Address literal. */
|
2022-04-04 18:58:35 +00:00
|
|
|
return set_lval_str(TOKEN_UNPARSED, yytext);
|
|
|
|
}
|
|
|
|
|
2022-04-06 08:29:45 +00:00
|
|
|
{IPv4address}{v4-cidr-prefix}? {
|
|
|
|
/* IPv4 with or without prefix. */
|
|
|
|
return set_lval_str(TOKEN_UNPARSED, yytext);
|
2022-02-22 21:55:05 +00:00
|
|
|
}
|
|
|
|
|
2022-04-06 08:29:45 +00:00
|
|
|
{IPv6address}{v6-cidr-prefix}? {
|
|
|
|
/* IPv6 with or without prefix. */
|
|
|
|
return set_lval_str(TOKEN_UNPARSED, yytext);
|
2022-02-22 21:55:05 +00:00
|
|
|
}
|
|
|
|
|
2022-04-04 18:58:35 +00:00
|
|
|
[[:xdigit:]]+:[[:xdigit:]:]* {
|
2022-04-06 08:29:45 +00:00
|
|
|
/* Bytes. */
|
|
|
|
return set_lval_str(TOKEN_UNPARSED, yytext);
|
2022-04-04 18:58:35 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
"<"[^>=]+">" {
|
2022-04-06 08:29:45 +00:00
|
|
|
/* Literal in-between angle brackets (cannot be parsed as a protocol field). */
|
2022-04-04 18:58:35 +00:00
|
|
|
return set_lval_str(TOKEN_LITERAL, yytext);
|
2022-02-22 21:55:05 +00:00
|
|
|
}
|
2018-04-16 11:02:41 +00:00
|
|
|
|
2022-04-05 14:38:20 +00:00
|
|
|
[:.]?[[:alnum:]_]{WORD_CHAR}*(\.{WORD_CHAR}+)* {
|
2022-04-04 18:58:35 +00:00
|
|
|
/* Identifier or literal or unparsed. */
|
|
|
|
if (yytext[0] == '.')
|
|
|
|
return set_lval_str(TOKEN_IDENTIFIER, yytext);
|
|
|
|
if (yytext[0] == ':')
|
|
|
|
return set_lval_str(TOKEN_LITERAL, yytext);
|
2021-09-26 21:22:50 +00:00
|
|
|
return set_lval_str(TOKEN_UNPARSED, yytext);
|
2001-02-01 20:21:25 +00:00
|
|
|
}
|
|
|
|
|
2001-06-22 16:29:15 +00:00
|
|
|
. {
|
2022-02-19 17:49:29 +00:00
|
|
|
/* Default */
|
|
|
|
if (isprint_string(yytext))
|
|
|
|
dfilter_fail(yyextra->dfw, "\"%s\" was unexpected in this context.", yytext);
|
|
|
|
else
|
|
|
|
dfilter_fail(yyextra->dfw, "Non-printable ASCII characters may only appear inside double-quotes.");
|
2021-10-27 18:19:36 +00:00
|
|
|
return SCAN_FAILED;
|
2001-06-22 16:29:15 +00:00
|
|
|
}
|
2001-02-01 20:21:25 +00:00
|
|
|
|
|
|
|
|
|
|
|
%%
|
|
|
|
|
2018-02-16 07:17:04 +00:00
|
|
|
/*
|
|
|
|
* Turn diagnostics back on, so we check the code that we've written.
|
|
|
|
*/
|
|
|
|
DIAG_ON_FLEX
|
2017-08-18 19:11:47 +00:00
|
|
|
|
2001-02-27 19:23:30 +00:00
|
|
|
static int
|
2021-09-26 21:22:50 +00:00
|
|
|
set_lval_str(int token, const char *token_value)
|
2001-02-01 20:21:25 +00:00
|
|
|
{
|
2021-10-26 09:09:36 +00:00
|
|
|
df_lval->value = g_strdup(token_value);
|
2021-09-26 21:22:50 +00:00
|
|
|
return token;
|
|
|
|
}
|
2021-10-31 15:48:22 +00:00
|
|
|
|
2021-11-23 13:40:14 +00:00
|
|
|
static gboolean
|
|
|
|
append_escaped_char(dfwork_t *dfw, GString *str, char c)
|
2021-10-31 15:48:22 +00:00
|
|
|
{
|
|
|
|
switch (c) {
|
|
|
|
case 'a':
|
|
|
|
c = '\a';
|
|
|
|
break;
|
|
|
|
case 'b':
|
|
|
|
c = '\b';
|
|
|
|
break;
|
|
|
|
case 'f':
|
|
|
|
c = '\f';
|
|
|
|
break;
|
|
|
|
case 'n':
|
|
|
|
c = '\n';
|
|
|
|
break;
|
|
|
|
case 'r':
|
|
|
|
c = '\r';
|
|
|
|
break;
|
|
|
|
case 't':
|
|
|
|
c = '\t';
|
|
|
|
break;
|
|
|
|
case 'v':
|
|
|
|
c = '\v';
|
|
|
|
break;
|
2021-11-23 13:40:14 +00:00
|
|
|
case '\\':
|
|
|
|
case '\'':
|
|
|
|
case '\"':
|
2021-10-31 15:48:22 +00:00
|
|
|
break;
|
2021-11-23 13:40:14 +00:00
|
|
|
default:
|
|
|
|
dfilter_fail(dfw, "\\%c is not a valid character escape sequence", c);
|
|
|
|
return FALSE;
|
2021-10-31 15:48:22 +00:00
|
|
|
}
|
|
|
|
|
2021-11-23 13:40:14 +00:00
|
|
|
g_string_append_c(str, c);
|
|
|
|
return TRUE;
|
2021-10-31 15:48:22 +00:00
|
|
|
}
|
2021-11-23 21:20:34 +00:00
|
|
|
|
|
|
|
static gboolean
|
|
|
|
parse_charconst(dfwork_t *dfw, const char *s, unsigned long *valuep)
|
|
|
|
{
|
|
|
|
const char *cp;
|
|
|
|
unsigned long value;
|
|
|
|
|
|
|
|
cp = s + 1; /* skip the leading ' */
|
2021-11-24 09:29:24 +00:00
|
|
|
if (*cp == '\'') {
|
|
|
|
dfilter_fail(dfw, "Empty character constant.");
|
|
|
|
return FALSE;
|
|
|
|
}
|
|
|
|
|
2021-11-23 21:20:34 +00:00
|
|
|
if (*cp == '\\') {
|
|
|
|
/*
|
|
|
|
* C escape sequence.
|
|
|
|
* An escape sequence is an octal number \NNN,
|
|
|
|
* an hex number \xNN, or one of \' \" \\ \a \b \f \n \r \t \v
|
|
|
|
* that stands for the byte value of the equivalent
|
|
|
|
* C-escape in ASCII encoding.
|
|
|
|
*/
|
|
|
|
cp++;
|
|
|
|
switch (*cp) {
|
|
|
|
|
|
|
|
case '\0':
|
2021-11-24 09:29:24 +00:00
|
|
|
dfilter_fail(dfw, "%s isn't a valid character constant.", s);
|
2021-11-23 21:20:34 +00:00
|
|
|
return FALSE;
|
|
|
|
|
|
|
|
case 'a':
|
|
|
|
value = '\a';
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 'b':
|
|
|
|
value = '\b';
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 'f':
|
|
|
|
value = '\f';
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 'n':
|
|
|
|
value = '\n';
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 'r':
|
|
|
|
value = '\r';
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 't':
|
|
|
|
value = '\t';
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 'v':
|
|
|
|
value = '\v';
|
|
|
|
break;
|
|
|
|
|
|
|
|
case '\'':
|
|
|
|
value = '\'';
|
|
|
|
break;
|
|
|
|
|
|
|
|
case '\\':
|
|
|
|
value = '\\';
|
|
|
|
break;
|
|
|
|
|
|
|
|
case '"':
|
|
|
|
value = '"';
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 'x':
|
|
|
|
cp++;
|
|
|
|
if (*cp >= '0' && *cp <= '9')
|
|
|
|
value = *cp - '0';
|
|
|
|
else if (*cp >= 'A' && *cp <= 'F')
|
|
|
|
value = 10 + (*cp - 'A');
|
|
|
|
else if (*cp >= 'a' && *cp <= 'f')
|
|
|
|
value = 10 + (*cp - 'a');
|
|
|
|
else {
|
2021-11-24 09:29:24 +00:00
|
|
|
dfilter_fail(dfw, "%s isn't a valid character constant.", s);
|
2021-11-23 21:20:34 +00:00
|
|
|
return FALSE;
|
|
|
|
}
|
|
|
|
cp++;
|
|
|
|
if (*cp != '\'') {
|
|
|
|
value <<= 4;
|
|
|
|
if (*cp >= '0' && *cp <= '9')
|
|
|
|
value |= *cp - '0';
|
|
|
|
else if (*cp >= 'A' && *cp <= 'F')
|
|
|
|
value |= 10 + (*cp - 'A');
|
|
|
|
else if (*cp >= 'a' && *cp <= 'f')
|
|
|
|
value |= 10 + (*cp - 'a');
|
|
|
|
else {
|
2021-11-24 09:29:24 +00:00
|
|
|
dfilter_fail(dfw, "%s isn't a valid character constant.", s);
|
2021-11-23 21:20:34 +00:00
|
|
|
return FALSE;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
|
|
|
/* Octal */
|
|
|
|
if (*cp >= '0' && *cp <= '7')
|
|
|
|
value = *cp - '0';
|
|
|
|
else {
|
2021-11-24 09:29:24 +00:00
|
|
|
dfilter_fail(dfw, "%s isn't a valid character constant.", s);
|
2021-11-23 21:20:34 +00:00
|
|
|
return FALSE;
|
|
|
|
}
|
|
|
|
if (*(cp + 1) != '\'') {
|
|
|
|
cp++;
|
|
|
|
value <<= 3;
|
|
|
|
if (*cp >= '0' && *cp <= '7')
|
|
|
|
value |= *cp - '0';
|
|
|
|
else {
|
2021-11-24 09:29:24 +00:00
|
|
|
dfilter_fail(dfw, "%s isn't a valid character constant.", s);
|
2021-11-23 21:20:34 +00:00
|
|
|
return FALSE;
|
|
|
|
}
|
|
|
|
if (*(cp + 1) != '\'') {
|
|
|
|
cp++;
|
|
|
|
value <<= 3;
|
|
|
|
if (*cp >= '0' && *cp <= '7')
|
|
|
|
value |= *cp - '0';
|
|
|
|
else {
|
2021-11-24 09:29:24 +00:00
|
|
|
dfilter_fail(dfw, "%s isn't a valid character constant.", s);
|
2021-11-23 21:20:34 +00:00
|
|
|
return FALSE;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (value > 0xFF) {
|
2021-11-24 09:29:24 +00:00
|
|
|
dfilter_fail(dfw, "%s is too large to be a valid character constant.", s);
|
2021-11-23 21:20:34 +00:00
|
|
|
return FALSE;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
value = *cp;
|
|
|
|
if (!g_ascii_isprint(value)) {
|
2021-11-27 17:31:16 +00:00
|
|
|
dfilter_fail(dfw, "Non-printable value '0x%02lx' in character constant.", value);
|
2021-11-23 21:20:34 +00:00
|
|
|
return FALSE;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
cp++;
|
|
|
|
if ((*cp != '\'') || (*(cp + 1) != '\0')){
|
2021-11-24 09:29:24 +00:00
|
|
|
dfilter_fail(dfw, "%s is too long to be a valid character constant.", s);
|
2021-11-23 21:20:34 +00:00
|
|
|
return FALSE;
|
|
|
|
}
|
|
|
|
|
|
|
|
*valuep = value;
|
|
|
|
return TRUE;
|
|
|
|
}
|