forked from osmocom/wireshark
606 lines
14 KiB
Plaintext
606 lines
14 KiB
Plaintext
%top {
|
|
/* Include this before everything else, for various large-file definitions */
|
|
#include "config.h"
|
|
#include <wireshark.h>
|
|
|
|
#include <stdlib.h>
|
|
#include <errno.h>
|
|
|
|
#include <wsutil/str_util.h>
|
|
|
|
#include "dfilter-int.h"
|
|
#include "syntax-tree.h"
|
|
#include "grammar.h"
|
|
#include "dfunctions.h"
|
|
}
|
|
|
|
/*
|
|
* We want a reentrant scanner.
|
|
*/
|
|
%option reentrant
|
|
|
|
/*
|
|
* We don't use input, so don't generate code for it.
|
|
*/
|
|
%option noinput
|
|
|
|
/*
|
|
* We don't use unput, so don't generate code for it.
|
|
*/
|
|
%option nounput
|
|
|
|
/*
|
|
* We don't read interactively from the terminal.
|
|
*/
|
|
%option never-interactive
|
|
|
|
/*
|
|
* Prefix scanner routines with "df_" rather than "yy", so this scanner
|
|
* can coexist with other scanners.
|
|
*/
|
|
%option prefix="df_"
|
|
|
|
/*
|
|
* We're reading from a string, so we don't need yywrap.
|
|
*/
|
|
%option noyywrap
|
|
|
|
/*
|
|
* The type for the state we keep for a scanner.
|
|
*/
|
|
%option extra-type="df_scanner_state_t *"
|
|
|
|
/*
|
|
* We have to override the memory allocators so that we don't get
|
|
* "unused argument" warnings from the yyscanner argument (which
|
|
* we don't use, as we have a global memory allocator).
|
|
*
|
|
* We provide, as macros, our own versions of the routines generated by Flex,
|
|
* which just call malloc()/realloc()/free() (as the Flex versions do),
|
|
* discarding the extra argument.
|
|
*/
|
|
%option noyyalloc
|
|
%option noyyrealloc
|
|
%option noyyfree
|
|
|
|
%{
|
|
/*
|
|
* Wireshark - Network traffic analyzer
|
|
* By Gerald Combs <gerald@wireshark.org>
|
|
* Copyright 2001 Gerald Combs
|
|
*
|
|
* SPDX-License-Identifier: GPL-2.0-or-later
|
|
*/
|
|
|
|
/*
|
|
* Disable diagnostics in the code generated by Flex.
|
|
*/
|
|
DIAG_OFF_FLEX
|
|
|
|
df_lval_t *df_lval;
|
|
|
|
static int set_lval_str(int token, const char *token_value);
|
|
#define simple(token) set_lval_str(token, yytext)
|
|
static gboolean append_escaped_char(dfwork_t *dfw, GString *str, char c);
|
|
static gboolean parse_charconst(dfwork_t *dfw, const char *s, unsigned long *valuep);
|
|
|
|
/*
|
|
* Sleazy hack to suppress compiler warnings in yy_fatal_error().
|
|
*/
|
|
#define YY_EXIT_FAILURE ((void)yyscanner, 2)
|
|
|
|
/*
|
|
* Macros for the allocators, to discard the extra argument.
|
|
*/
|
|
#define df_alloc(size, yyscanner) (void *)malloc(size)
|
|
#define df_realloc(ptr, size, yyscanner) (void *)realloc((char *)(ptr), (size))
|
|
#define df_free(ptr, yyscanner) free((char *)ptr)
|
|
|
|
%}
|
|
|
|
WORD_CHAR [[:alnum:]_-]
|
|
|
|
hex2 [[:xdigit:]]{2}
|
|
MacAddress {hex2}:{hex2}:{hex2}:{hex2}:{hex2}:{hex2}|{hex2}-{hex2}-{hex2}-{hex2}-{hex2}-{hex2}|{hex2}\.{hex2}\.{hex2}\.{hex2}\.{hex2}\.{hex2}
|
|
|
|
hex4 [[:xdigit:]]{4}
|
|
QuadMacAddress {hex4}\.{hex4}\.{hex4}
|
|
|
|
dec-octet [0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5]
|
|
IPv4address {dec-octet}\.{dec-octet}\.{dec-octet}\.{dec-octet}
|
|
|
|
h16 [0-9A-Fa-f]{1,4}
|
|
ls32 {h16}:{h16}|{IPv4address}
|
|
IPv6address ({h16}:){6}{ls32}|::({h16}:){5}{ls32}|({h16})?::({h16}:){4}{ls32}|(({h16}:){0,1}{h16})?::({h16}:){3}{ls32}|(({h16}:){0,2}{h16})?::({h16}:){2}{ls32}|(({h16}:){0,3}{h16})?::{h16}:{ls32}|(({h16}:){0,4}{h16})?::{ls32}|(({h16}:){0,5}{h16})?::{h16}|(({h16}:){0,6}{h16})?::
|
|
|
|
v4-cidr-prefix \/[[:digit:]]{1,2}
|
|
v6-cidr-prefix \/[[:digit:]]{1,3}
|
|
|
|
%x RANGE
|
|
%x DQUOTE
|
|
%x SQUOTE
|
|
%x REFERENCE
|
|
|
|
%%
|
|
|
|
[[:blank:]\n]+
|
|
|
|
"(" return simple(TOKEN_LPAREN);
|
|
")" return simple(TOKEN_RPAREN);
|
|
"," return simple(TOKEN_COMMA);
|
|
"{" return simple(TOKEN_LBRACE);
|
|
".." return simple(TOKEN_DOTDOT);
|
|
"}" return simple(TOKEN_RBRACE);
|
|
"+" return simple(TOKEN_PLUS);
|
|
"-" return simple(TOKEN_MINUS);
|
|
"*" return simple(TOKEN_STAR);
|
|
"/" return simple(TOKEN_RSLASH);
|
|
"%" return simple(TOKEN_PERCENT);
|
|
|
|
"==" return simple(TOKEN_TEST_ANY_EQ);
|
|
"eq" return simple(TOKEN_TEST_ANY_EQ);
|
|
"any_eq" return simple(TOKEN_TEST_ANY_EQ);
|
|
"!=" return simple(TOKEN_TEST_ALL_NE);
|
|
"ne" return simple(TOKEN_TEST_ALL_NE);
|
|
"all_ne" return simple(TOKEN_TEST_ALL_NE);
|
|
"===" return simple(TOKEN_TEST_ALL_EQ);
|
|
"all_eq" return simple(TOKEN_TEST_ALL_EQ);
|
|
"!==" return simple(TOKEN_TEST_ANY_NE);
|
|
"~=" {
|
|
add_deprecated_token(yyextra->dfw, "The operator \"~=\" is deprecated, use \"!==\" instead.");
|
|
return simple(TOKEN_TEST_ANY_NE);
|
|
}
|
|
"any_ne" return simple(TOKEN_TEST_ANY_NE);
|
|
">" return simple(TOKEN_TEST_GT);
|
|
"gt" return simple(TOKEN_TEST_GT);
|
|
">=" return simple(TOKEN_TEST_GE);
|
|
"ge" return simple(TOKEN_TEST_GE);
|
|
"<" return simple(TOKEN_TEST_LT);
|
|
"lt" return simple(TOKEN_TEST_LT);
|
|
"<=" return simple(TOKEN_TEST_LE);
|
|
"le" return simple(TOKEN_TEST_LE);
|
|
"contains" return simple(TOKEN_TEST_CONTAINS);
|
|
"~" return simple(TOKEN_TEST_MATCHES);
|
|
"matches" return simple(TOKEN_TEST_MATCHES);
|
|
"!" return simple(TOKEN_TEST_NOT);
|
|
"not" return simple(TOKEN_TEST_NOT);
|
|
"&&" return simple(TOKEN_TEST_AND);
|
|
"and" return simple(TOKEN_TEST_AND);
|
|
"||" return simple(TOKEN_TEST_OR);
|
|
"or" return simple(TOKEN_TEST_OR);
|
|
"in" return simple(TOKEN_TEST_IN);
|
|
|
|
"&" return simple(TOKEN_BITWISE_AND);
|
|
"bitwise_and" return simple(TOKEN_BITWISE_AND);
|
|
|
|
"${" {
|
|
BEGIN(REFERENCE);
|
|
return simple(TOKEN_REF_OPEN);
|
|
}
|
|
|
|
<REFERENCE>[^}]+ {
|
|
return set_lval_str(TOKEN_REFERENCE, yytext);
|
|
}
|
|
|
|
<REFERENCE>"}" {
|
|
BEGIN(INITIAL);
|
|
return simple(TOKEN_REF_CLOSE);
|
|
}
|
|
|
|
<REFERENCE><<EOF>> {
|
|
dfilter_fail(yyextra->dfw, "Right brace missing from field reference.");
|
|
return SCAN_FAILED;
|
|
}
|
|
|
|
"[" {
|
|
BEGIN(RANGE);
|
|
return simple(TOKEN_LBRACKET);
|
|
}
|
|
|
|
<RANGE>[^],]+ {
|
|
return set_lval_str(TOKEN_RANGE, yytext);
|
|
}
|
|
|
|
<RANGE>"," {
|
|
return simple(TOKEN_COMMA);
|
|
}
|
|
|
|
<RANGE>"]" {
|
|
BEGIN(INITIAL);
|
|
return simple(TOKEN_RBRACKET);
|
|
}
|
|
|
|
<RANGE><<EOF>> {
|
|
dfilter_fail(yyextra->dfw, "The right bracket was missing from a slice.");
|
|
return SCAN_FAILED;
|
|
}
|
|
|
|
[rR]{0,1}\042 {
|
|
/* start quote of a quoted string */
|
|
/*
|
|
* The example of how to scan for strings was taken from
|
|
* the flex manual, from the section "Start Conditions".
|
|
* See: https://westes.github.io/flex/manual/Start-Conditions.html
|
|
*/
|
|
BEGIN(DQUOTE);
|
|
yyextra->quoted_string = g_string_new("");
|
|
|
|
if (yytext[0] == 'r' || yytext[0] == 'R') {
|
|
/*
|
|
* This is a raw string (like in Python). Rules: 1) The two
|
|
* escape sequences are \\ and \". 2) Backslashes are
|
|
* preserved. 3) Double quotes in the string must be escaped.
|
|
* Corollary: Strings cannot end with an odd number of
|
|
* backslashes.
|
|
* Example: r"a\b\x12\"\\" is the string (including the implicit NUL terminator)
|
|
* {'a', '\\', 'b', '\\', 'x', '1', '2', '\\', '"', '\\'. '\\', '\0'}
|
|
*/
|
|
yyextra->raw_string = TRUE;
|
|
}
|
|
else {
|
|
yyextra->raw_string = FALSE;
|
|
}
|
|
}
|
|
|
|
<DQUOTE><<EOF>> {
|
|
/* unterminated string */
|
|
g_string_free(yyextra->quoted_string, TRUE);
|
|
yyextra->quoted_string = NULL;
|
|
dfilter_fail(yyextra->dfw, "The final quote was missing from a quoted string.");
|
|
return SCAN_FAILED;
|
|
}
|
|
|
|
<DQUOTE>\042 {
|
|
/* end quote */
|
|
BEGIN(INITIAL);
|
|
df_lval->value = g_string_free(yyextra->quoted_string, FALSE);
|
|
yyextra->quoted_string = NULL;
|
|
return TOKEN_STRING;
|
|
}
|
|
|
|
<DQUOTE>\\[0-7]{1,3} {
|
|
/* octal sequence */
|
|
if (yyextra->raw_string) {
|
|
g_string_append(yyextra->quoted_string, yytext);
|
|
}
|
|
else {
|
|
unsigned long result;
|
|
result = strtoul(yytext + 1, NULL, 8);
|
|
if (result == 0) {
|
|
g_string_free(yyextra->quoted_string, TRUE);
|
|
yyextra->quoted_string = NULL;
|
|
dfilter_fail(yyextra->dfw, "%s (NUL byte) cannot be used with a regular string.", yytext);
|
|
return SCAN_FAILED;
|
|
}
|
|
if (result > 0xff) {
|
|
g_string_free(yyextra->quoted_string, TRUE);
|
|
yyextra->quoted_string = NULL;
|
|
dfilter_fail(yyextra->dfw, "%s is larger than 255.", yytext);
|
|
return SCAN_FAILED;
|
|
}
|
|
g_string_append_c(yyextra->quoted_string, (gchar) result);
|
|
}
|
|
}
|
|
|
|
<DQUOTE>\\x[[:xdigit:]]{1,2} {
|
|
/* hex sequence */
|
|
/*
|
|
* C standard does not place a limit on the number of hex
|
|
* digits after \x... but we do. \xNN can have 1 or two Ns, not more.
|
|
*/
|
|
if (yyextra->raw_string) {
|
|
g_string_append(yyextra->quoted_string, yytext);
|
|
}
|
|
else {
|
|
unsigned long result;
|
|
result = strtoul(yytext + 2, NULL, 16);
|
|
if (result == 0) {
|
|
g_string_free(yyextra->quoted_string, TRUE);
|
|
yyextra->quoted_string = NULL;
|
|
dfilter_fail(yyextra->dfw, "%s (NUL byte) cannot be used with a regular string.", yytext);
|
|
return SCAN_FAILED;
|
|
}
|
|
g_string_append_c(yyextra->quoted_string, (gchar) result);
|
|
}
|
|
}
|
|
|
|
|
|
<DQUOTE>\\. {
|
|
/* escaped character */
|
|
if (yyextra->raw_string) {
|
|
g_string_append(yyextra->quoted_string, yytext);
|
|
}
|
|
else if (!append_escaped_char(yyextra->dfw, yyextra->quoted_string, yytext[1])) {
|
|
g_string_free(yyextra->quoted_string, TRUE);
|
|
yyextra->quoted_string = NULL;
|
|
return SCAN_FAILED;
|
|
}
|
|
}
|
|
|
|
<DQUOTE>[^\\\042]+ {
|
|
/* non-escaped string */
|
|
g_string_append(yyextra->quoted_string, yytext);
|
|
}
|
|
|
|
|
|
\047 {
|
|
/* start quote of a quoted character value */
|
|
BEGIN(SQUOTE);
|
|
yyextra->quoted_string = g_string_new("'");
|
|
}
|
|
|
|
<SQUOTE><<EOF>> {
|
|
/* unterminated character value */
|
|
g_string_free(yyextra->quoted_string, TRUE);
|
|
yyextra->quoted_string = NULL;
|
|
dfilter_fail(yyextra->dfw, "The final quote was missing from a character constant.");
|
|
return SCAN_FAILED;
|
|
}
|
|
|
|
<SQUOTE>\047 {
|
|
/* end quote */
|
|
BEGIN(INITIAL);
|
|
g_string_append_c(yyextra->quoted_string, '\'');
|
|
df_lval->value = g_string_free(yyextra->quoted_string, FALSE);
|
|
yyextra->quoted_string = NULL;
|
|
|
|
if (!parse_charconst(yyextra->dfw, df_lval->value, &df_lval->number)) {
|
|
return SCAN_FAILED;
|
|
}
|
|
return TOKEN_CHARCONST;
|
|
}
|
|
|
|
<SQUOTE>\\. {
|
|
/* escaped character */
|
|
g_string_append(yyextra->quoted_string, yytext);
|
|
}
|
|
|
|
<SQUOTE>[^\\\047]+ {
|
|
/* non-escaped string */
|
|
g_string_append(yyextra->quoted_string, yytext);
|
|
}
|
|
|
|
|
|
/* None of the patterns below can match ".." anywhere in the token string. */
|
|
|
|
{MacAddress}|{QuadMacAddress} {
|
|
/* MAC Address literal. */
|
|
return set_lval_str(TOKEN_UNPARSED, yytext);
|
|
}
|
|
|
|
{IPv4address}{v4-cidr-prefix}? {
|
|
/* IPv4 with or without prefix. */
|
|
return set_lval_str(TOKEN_UNPARSED, yytext);
|
|
}
|
|
|
|
{IPv6address}{v6-cidr-prefix}? {
|
|
/* IPv6 with or without prefix. */
|
|
return set_lval_str(TOKEN_UNPARSED, yytext);
|
|
}
|
|
|
|
[[:xdigit:]]+:[[:xdigit:]:]* {
|
|
/* Bytes. */
|
|
return set_lval_str(TOKEN_UNPARSED, yytext);
|
|
}
|
|
|
|
"<"[^>=]+">" {
|
|
/* Literal in-between angle brackets (cannot be parsed as a protocol field). */
|
|
return set_lval_str(TOKEN_LITERAL, yytext);
|
|
}
|
|
|
|
[:.]?[[:alnum:]_]{WORD_CHAR}*(\.{WORD_CHAR}+)* {
|
|
/* Identifier or literal or unparsed. */
|
|
if (yytext[0] == '.')
|
|
return set_lval_str(TOKEN_IDENTIFIER, yytext);
|
|
if (yytext[0] == ':')
|
|
return set_lval_str(TOKEN_LITERAL, yytext);
|
|
return set_lval_str(TOKEN_UNPARSED, yytext);
|
|
}
|
|
|
|
. {
|
|
/* Default */
|
|
if (isprint_string(yytext))
|
|
dfilter_fail(yyextra->dfw, "\"%s\" was unexpected in this context.", yytext);
|
|
else
|
|
dfilter_fail(yyextra->dfw, "Non-printable ASCII characters may only appear inside double-quotes.");
|
|
return SCAN_FAILED;
|
|
}
|
|
|
|
|
|
%%
|
|
|
|
/*
|
|
* Turn diagnostics back on, so we check the code that we've written.
|
|
*/
|
|
DIAG_ON_FLEX
|
|
|
|
static int
|
|
set_lval_str(int token, const char *token_value)
|
|
{
|
|
df_lval->value = g_strdup(token_value);
|
|
return token;
|
|
}
|
|
|
|
static gboolean
|
|
append_escaped_char(dfwork_t *dfw, GString *str, char c)
|
|
{
|
|
switch (c) {
|
|
case 'a':
|
|
c = '\a';
|
|
break;
|
|
case 'b':
|
|
c = '\b';
|
|
break;
|
|
case 'f':
|
|
c = '\f';
|
|
break;
|
|
case 'n':
|
|
c = '\n';
|
|
break;
|
|
case 'r':
|
|
c = '\r';
|
|
break;
|
|
case 't':
|
|
c = '\t';
|
|
break;
|
|
case 'v':
|
|
c = '\v';
|
|
break;
|
|
case '\\':
|
|
case '\'':
|
|
case '\"':
|
|
break;
|
|
default:
|
|
dfilter_fail(dfw, "\\%c is not a valid character escape sequence", c);
|
|
return FALSE;
|
|
}
|
|
|
|
g_string_append_c(str, c);
|
|
return TRUE;
|
|
}
|
|
|
|
static gboolean
|
|
parse_charconst(dfwork_t *dfw, const char *s, unsigned long *valuep)
|
|
{
|
|
const char *cp;
|
|
unsigned long value;
|
|
|
|
cp = s + 1; /* skip the leading ' */
|
|
if (*cp == '\'') {
|
|
dfilter_fail(dfw, "Empty character constant.");
|
|
return FALSE;
|
|
}
|
|
|
|
if (*cp == '\\') {
|
|
/*
|
|
* C escape sequence.
|
|
* An escape sequence is an octal number \NNN,
|
|
* an hex number \xNN, or one of \' \" \\ \a \b \f \n \r \t \v
|
|
* that stands for the byte value of the equivalent
|
|
* C-escape in ASCII encoding.
|
|
*/
|
|
cp++;
|
|
switch (*cp) {
|
|
|
|
case '\0':
|
|
dfilter_fail(dfw, "%s isn't a valid character constant.", s);
|
|
return FALSE;
|
|
|
|
case 'a':
|
|
value = '\a';
|
|
break;
|
|
|
|
case 'b':
|
|
value = '\b';
|
|
break;
|
|
|
|
case 'f':
|
|
value = '\f';
|
|
break;
|
|
|
|
case 'n':
|
|
value = '\n';
|
|
break;
|
|
|
|
case 'r':
|
|
value = '\r';
|
|
break;
|
|
|
|
case 't':
|
|
value = '\t';
|
|
break;
|
|
|
|
case 'v':
|
|
value = '\v';
|
|
break;
|
|
|
|
case '\'':
|
|
value = '\'';
|
|
break;
|
|
|
|
case '\\':
|
|
value = '\\';
|
|
break;
|
|
|
|
case '"':
|
|
value = '"';
|
|
break;
|
|
|
|
case 'x':
|
|
cp++;
|
|
if (*cp >= '0' && *cp <= '9')
|
|
value = *cp - '0';
|
|
else if (*cp >= 'A' && *cp <= 'F')
|
|
value = 10 + (*cp - 'A');
|
|
else if (*cp >= 'a' && *cp <= 'f')
|
|
value = 10 + (*cp - 'a');
|
|
else {
|
|
dfilter_fail(dfw, "%s isn't a valid character constant.", s);
|
|
return FALSE;
|
|
}
|
|
cp++;
|
|
if (*cp != '\'') {
|
|
value <<= 4;
|
|
if (*cp >= '0' && *cp <= '9')
|
|
value |= *cp - '0';
|
|
else if (*cp >= 'A' && *cp <= 'F')
|
|
value |= 10 + (*cp - 'A');
|
|
else if (*cp >= 'a' && *cp <= 'f')
|
|
value |= 10 + (*cp - 'a');
|
|
else {
|
|
dfilter_fail(dfw, "%s isn't a valid character constant.", s);
|
|
return FALSE;
|
|
}
|
|
}
|
|
break;
|
|
|
|
default:
|
|
/* Octal */
|
|
if (*cp >= '0' && *cp <= '7')
|
|
value = *cp - '0';
|
|
else {
|
|
dfilter_fail(dfw, "%s isn't a valid character constant.", s);
|
|
return FALSE;
|
|
}
|
|
if (*(cp + 1) != '\'') {
|
|
cp++;
|
|
value <<= 3;
|
|
if (*cp >= '0' && *cp <= '7')
|
|
value |= *cp - '0';
|
|
else {
|
|
dfilter_fail(dfw, "%s isn't a valid character constant.", s);
|
|
return FALSE;
|
|
}
|
|
if (*(cp + 1) != '\'') {
|
|
cp++;
|
|
value <<= 3;
|
|
if (*cp >= '0' && *cp <= '7')
|
|
value |= *cp - '0';
|
|
else {
|
|
dfilter_fail(dfw, "%s isn't a valid character constant.", s);
|
|
return FALSE;
|
|
}
|
|
}
|
|
}
|
|
if (value > 0xFF) {
|
|
dfilter_fail(dfw, "%s is too large to be a valid character constant.", s);
|
|
return FALSE;
|
|
}
|
|
}
|
|
} else {
|
|
value = *cp;
|
|
if (!g_ascii_isprint(value)) {
|
|
dfilter_fail(dfw, "Non-printable value '0x%02lx' in character constant.", value);
|
|
return FALSE;
|
|
}
|
|
}
|
|
cp++;
|
|
if ((*cp != '\'') || (*(cp + 1) != '\0')){
|
|
dfilter_fail(dfw, "%s is too long to be a valid character constant.", s);
|
|
return FALSE;
|
|
}
|
|
|
|
*valuep = value;
|
|
return TRUE;
|
|
}
|