%top { /* Include this before everything else, for various large-file definitions */ #include "config.h" #include #include #include #include "dfilter-int.h" #include "syntax-tree.h" #include "grammar.h" #include "dfunctions.h" } /* * We want a reentrant scanner. */ %option reentrant /* * We don't use input, so don't generate code for it. */ %option noinput /* * We don't use unput, so don't generate code for it. */ %option nounput /* * We don't read interactively from the terminal. */ %option never-interactive /* * Prefix scanner routines with "df_" rather than "yy", so this scanner * can coexist with other scanners. */ %option prefix="df_" /* * We're reading from a string, so we don't need yywrap. */ %option noyywrap /* * The type for the state we keep for a scanner. */ %option extra-type="df_scanner_state_t *" /* * We have to override the memory allocators so that we don't get * "unused argument" warnings from the yyscanner argument (which * we don't use, as we have a global memory allocator). * * We provide, as macros, our own versions of the routines generated by Flex, * which just call malloc()/realloc()/free() (as the Flex versions do), * discarding the extra argument. */ %option noyyalloc %option noyyrealloc %option noyyfree %{ /* * Wireshark - Network traffic analyzer * By Gerald Combs * Copyright 2001 Gerald Combs * * SPDX-License-Identifier: GPL-2.0-or-later */ /* * Disable diagnostics in the code generated by Flex. */ DIAG_OFF_FLEX df_lval_t *df_lval; static int set_lval_str(int token, const char *token_value); #define simple(token) set_lval_str(token, yytext) static gboolean append_escaped_char(dfwork_t *dfw, GString *str, char c); static gboolean parse_charconst(dfwork_t *dfw, const char *s, unsigned long *valuep); /* * Sleazy hack to suppress compiler warnings in yy_fatal_error(). */ #define YY_EXIT_FAILURE ((void)yyscanner, 2) /* * Macros for the allocators, to discard the extra argument. */ #define df_alloc(size, yyscanner) (void *)malloc(size) #define df_realloc(ptr, size, yyscanner) (void *)realloc((char *)(ptr), (size)) #define df_free(ptr, yyscanner) free((char *)ptr) %} %x RANGE %x DQUOTE %x SQUOTE %% [[:blank:]\n]+ "(" return simple(TOKEN_LPAREN); ")" return simple(TOKEN_RPAREN); "," return simple(TOKEN_COMMA); "{" return simple(TOKEN_LBRACE); ".." return simple(TOKEN_DOTDOT); "}" return simple(TOKEN_RBRACE); "==" return simple(TOKEN_TEST_ANY_EQ); "eq" return simple(TOKEN_TEST_ANY_EQ); "!=" return simple(TOKEN_TEST_ALL_NE); "ne" return simple(TOKEN_TEST_ALL_NE); "~=" return simple(TOKEN_TEST_ANY_NE); "any_ne" return simple(TOKEN_TEST_ANY_NE); ">" return simple(TOKEN_TEST_GT); "gt" return simple(TOKEN_TEST_GT); ">=" return simple(TOKEN_TEST_GE); "ge" return simple(TOKEN_TEST_GE); "<" return simple(TOKEN_TEST_LT); "lt" return simple(TOKEN_TEST_LT); "<=" return simple(TOKEN_TEST_LE); "le" return simple(TOKEN_TEST_LE); "bitwise_and" return simple(TOKEN_TEST_BITWISE_AND); "&" return simple(TOKEN_TEST_BITWISE_AND); "contains" return simple(TOKEN_TEST_CONTAINS); "~" return simple(TOKEN_TEST_MATCHES); "matches" return simple(TOKEN_TEST_MATCHES); "!" return simple(TOKEN_TEST_NOT); "not" return simple(TOKEN_TEST_NOT); "&&" return simple(TOKEN_TEST_AND); "and" return simple(TOKEN_TEST_AND); "||" return simple(TOKEN_TEST_OR); "or" return simple(TOKEN_TEST_OR); "in" return simple(TOKEN_TEST_IN); "[" { BEGIN(RANGE); return simple(TOKEN_LBRACKET); } [^],]+ { return set_lval_str(TOKEN_RANGE, yytext); } "," { return simple(TOKEN_COMMA); } "]" { BEGIN(INITIAL); return simple(TOKEN_RBRACKET); } <> { dfilter_fail(yyextra->dfw, "The right bracket was missing from a slice."); return SCAN_FAILED; } [rR]{0,1}\042 { /* start quote of a quoted string */ /* * The example of how to scan for strings was taken from * the flex manual, from the section "Start Conditions". * See: https://westes.github.io/flex/manual/Start-Conditions.html */ BEGIN(DQUOTE); yyextra->quoted_string = g_string_new(""); if (yytext[0] == 'r' || yytext[0] == 'R') { /* * This is a raw string (like in Python). Rules: 1) The two * escape sequences are \\ and \". 2) Backslashes are * preserved. 3) Double quotes in the string must be escaped. * Corollary: Strings cannot end with an odd number of * backslashes. * Example: r"a\b\x12\"\\" is the string (including the implicit NUL terminator) * {'a', '\\', 'b', '\\', 'x', '1', '2', '\\', '"', '\\'. '\\', '\0'} */ yyextra->raw_string = TRUE; } else { yyextra->raw_string = FALSE; } } <> { /* unterminated string */ g_string_free(yyextra->quoted_string, TRUE); yyextra->quoted_string = NULL; dfilter_fail(yyextra->dfw, "The final quote was missing from a quoted string."); return SCAN_FAILED; } \042 { /* end quote */ BEGIN(INITIAL); df_lval->value = g_string_free(yyextra->quoted_string, FALSE); yyextra->quoted_string = NULL; return TOKEN_STRING; } \\[0-7]{1,3} { /* octal sequence */ if (yyextra->raw_string) { g_string_append(yyextra->quoted_string, yytext); } else { unsigned long result; result = strtoul(yytext + 1, NULL, 8); if (result == 0) { g_string_free(yyextra->quoted_string, TRUE); yyextra->quoted_string = NULL; dfilter_fail(yyextra->dfw, "%s (NUL byte) cannot be used with a regular string.", yytext); return SCAN_FAILED; } if (result > 0xff) { g_string_free(yyextra->quoted_string, TRUE); yyextra->quoted_string = NULL; dfilter_fail(yyextra->dfw, "%s is larger than 255.", yytext); return SCAN_FAILED; } g_string_append_c(yyextra->quoted_string, (gchar) result); } } \\x[[:xdigit:]]{1,2} { /* hex sequence */ /* * C standard does not place a limit on the number of hex * digits after \x... but we do. \xNN can have 1 or two Ns, not more. */ if (yyextra->raw_string) { g_string_append(yyextra->quoted_string, yytext); } else { unsigned long result; result = strtoul(yytext + 2, NULL, 16); if (result == 0) { g_string_free(yyextra->quoted_string, TRUE); yyextra->quoted_string = NULL; dfilter_fail(yyextra->dfw, "%s (NUL byte) cannot be used with a regular string.", yytext); return SCAN_FAILED; } g_string_append_c(yyextra->quoted_string, (gchar) result); } } \\. { /* escaped character */ if (yyextra->raw_string) { g_string_append(yyextra->quoted_string, yytext); } else if (!append_escaped_char(yyextra->dfw, yyextra->quoted_string, yytext[1])) { return SCAN_FAILED; } } [^\\\042]+ { /* non-escaped string */ g_string_append(yyextra->quoted_string, yytext); } \047 { /* start quote of a quoted character value */ BEGIN(SQUOTE); yyextra->quoted_string = g_string_new("'"); } <> { /* unterminated character value */ g_string_free(yyextra->quoted_string, TRUE); yyextra->quoted_string = NULL; dfilter_fail(yyextra->dfw, "The final quote was missing from a character constant."); return SCAN_FAILED; } \047 { /* end quote */ BEGIN(INITIAL); g_string_append_c(yyextra->quoted_string, '\''); df_lval->value = g_string_free(yyextra->quoted_string, FALSE); yyextra->quoted_string = NULL; if (!parse_charconst(yyextra->dfw, df_lval->value, &df_lval->number)) { return SCAN_FAILED; } return TOKEN_CHARCONST; } \\. { /* escaped character */ g_string_append(yyextra->quoted_string, yytext); } [^\\\047]+ { /* non-escaped string */ g_string_append(yyextra->quoted_string, yytext); } /* None of the patterns below can match ".." anywhere in the token string. */ [-+[:alnum:]_:./]+ { char *s, *value; /* Hack (but lesser) to make sure that ".." is interpreted as a token on its own. */ if ((s = strstr(yytext, "..")) != NULL) { /* If it starts with ".." it is its own token. */ if (yytext[0] == '.' && yytext[1] == '.') { yyless(2); df_lval->value = g_strdup(".."); return TOKEN_DOTDOT; } /* Match only the prefix before "..". */ *s = '\0'; value = g_strdup(yytext); *s = '.'; /* Restore */ yyless(strlen(value)); df_lval->value = value; return TOKEN_UNPARSED; } /* It is a variable or a literal value (CIDR, bytes, number, ...). */ return set_lval_str(TOKEN_UNPARSED, yytext); } . { /* Default */ dfilter_fail(yyextra->dfw, "\"%s\" was unexpected in this context.", yytext); return SCAN_FAILED; } %% /* * Turn diagnostics back on, so we check the code that we've written. */ DIAG_ON_FLEX static int set_lval_str(int token, const char *token_value) { df_lval->value = g_strdup(token_value); return token; } static gboolean append_escaped_char(dfwork_t *dfw, GString *str, char c) { switch (c) { case 'a': c = '\a'; break; case 'b': c = '\b'; break; case 'f': c = '\f'; break; case 'n': c = '\n'; break; case 'r': c = '\r'; break; case 't': c = '\t'; break; case 'v': c = '\v'; break; case '\\': case '\'': case '\"': break; default: dfilter_fail(dfw, "\\%c is not a valid character escape sequence", c); return FALSE; } g_string_append_c(str, c); return TRUE; } static gboolean parse_charconst(dfwork_t *dfw, const char *s, unsigned long *valuep) { const char *cp; unsigned long value; cp = s + 1; /* skip the leading ' */ if (*cp == '\\') { /* * C escape sequence. * An escape sequence is an octal number \NNN, * an hex number \xNN, or one of \' \" \\ \a \b \f \n \r \t \v * that stands for the byte value of the equivalent * C-escape in ASCII encoding. */ cp++; switch (*cp) { case '\0': dfilter_fail(dfw, "\"%s\" isn't a valid character constant.", s); return FALSE; case 'a': value = '\a'; break; case 'b': value = '\b'; break; case 'f': value = '\f'; break; case 'n': value = '\n'; break; case 'r': value = '\r'; break; case 't': value = '\t'; break; case 'v': value = '\v'; break; case '\'': value = '\''; break; case '\\': value = '\\'; break; case '"': value = '"'; break; case 'x': cp++; if (*cp >= '0' && *cp <= '9') value = *cp - '0'; else if (*cp >= 'A' && *cp <= 'F') value = 10 + (*cp - 'A'); else if (*cp >= 'a' && *cp <= 'f') value = 10 + (*cp - 'a'); else { dfilter_fail(dfw, "\"%s\" isn't a valid character constant.", s); return FALSE; } cp++; if (*cp != '\'') { value <<= 4; if (*cp >= '0' && *cp <= '9') value |= *cp - '0'; else if (*cp >= 'A' && *cp <= 'F') value |= 10 + (*cp - 'A'); else if (*cp >= 'a' && *cp <= 'f') value |= 10 + (*cp - 'a'); else { dfilter_fail(dfw, "\"%s\" isn't a valid character constant.", s); return FALSE; } } break; default: /* Octal */ if (*cp >= '0' && *cp <= '7') value = *cp - '0'; else { dfilter_fail(dfw, "\"%s\" isn't a valid character constant.", s); return FALSE; } if (*(cp + 1) != '\'') { cp++; value <<= 3; if (*cp >= '0' && *cp <= '7') value |= *cp - '0'; else { dfilter_fail(dfw, "\"%s\" isn't a valid character constant.", s); return FALSE; } if (*(cp + 1) != '\'') { cp++; value <<= 3; if (*cp >= '0' && *cp <= '7') value |= *cp - '0'; else { dfilter_fail(dfw, "\"%s\" isn't a valid character constant.", s); return FALSE; } } } if (value > 0xFF) { dfilter_fail(dfw, "\"%s\" is too large to be a valid character constant.", s); return FALSE; } } } else { value = *cp; if (!g_ascii_isprint(value)) { dfilter_fail(dfw, "Non-printable character '\\x%02lx' in character constant.", value); return FALSE; } } cp++; if ((*cp != '\'') || (*(cp + 1) != '\0')){ dfilter_fail(dfw, "\"%s\" is too long to be a valid character constant.", s); return FALSE; } *valuep = value; return TRUE; }