%top { /* Include this before everything else, for various large-file definitions */ #include "config.h" #define WS_LOG_DOMAIN LOG_DOMAIN_DFILTER #include #include #include #include #include #include "dfilter-int.h" #include "syntax-tree.h" #include "grammar.h" #include "dfunctions.h" #include "sttype-number.h" } /* * Always generate warnings. */ %option warn /* * We want a reentrant scanner. */ %option reentrant /* * We don't use input, so don't generate code for it. */ %option noinput /* * We don't use unput, so don't generate code for it. */ %option nounput /* * We don't read interactively from the terminal. */ %option never-interactive /* * Prefix scanner routines with "df_yy" rather than "yy", so this scanner * can coexist with other scanners. */ %option prefix="df_yy" /* * We're reading from a string, so we don't need yywrap. */ %option noyywrap /* * The type for the dfs we keep for a scanner. */ %option extra-type="dfsyntax_t *" %{ /* * Wireshark - Network traffic analyzer * By Gerald Combs * Copyright 2001 Gerald Combs * * SPDX-License-Identifier: GPL-2.0-or-later */ /* * Disable diagnostics in the code generated by Flex. */ DIAG_OFF_FLEX() WS_WARN_UNUSED static int set_lval_simple(dfsyntax_t *dfs, int token, const char *token_value, sttype_id_t type_id); #define simple(token) (update_location(yyextra, yytext), set_lval_simple(yyextra, token, yytext, STTYPE_UNINITIALIZED)) #define test(token) (update_location(yyextra, yytext), set_lval_simple(yyextra, token, yytext, STTYPE_TEST)) #define math(token) (update_location(yyextra, yytext), set_lval_simple(yyextra, token, yytext, STTYPE_ARITHMETIC)) WS_WARN_UNUSED static int set_lval_literal(dfsyntax_t *dfs, const char *value, const char *token_value); WS_WARN_UNUSED static int set_lval_identifier(dfsyntax_t *dfs, const char *value, const char *token_value); WS_WARN_UNUSED static int set_lval_unparsed(dfsyntax_t *dfs, const char *value, const char *token_value); WS_WARN_UNUSED static int set_lval_field(dfsyntax_t *dfs, const header_field_info *hfinfo, const char *token_value); WS_WARN_UNUSED static int set_lval_quoted_string(dfsyntax_t *dfs, GString *quoted_string); WS_WARN_UNUSED static int set_lval_charconst(dfsyntax_t *dfs, GString *quoted_string); WS_WARN_UNUSED static int set_lval_integer(dfsyntax_t *dfs, const char *value, const char *token_value); WS_WARN_UNUSED static int set_lval_float(dfsyntax_t *dfs, const char *value, const char *token_value); static bool append_escaped_char(dfsyntax_t *dfs, GString *str, char c); static bool append_universal_character_name(dfsyntax_t *dfs, GString *str, const char *ucn); static bool parse_charconst(dfsyntax_t *dfs, const char *s, unsigned long *valuep); static bool parse_unsigned_long_long(dfsyntax_t *dfs, const char *s, unsigned long long *valuep, bool set_error); static bool parse_double(dfsyntax_t *dfs, const char *s, double *valuep); static void update_location(dfsyntax_t *dfs, const char *text); static void update_string_loc(dfsyntax_t *dfs, const char *text); #define FAIL(...) \ do { \ ws_noisy("Scanning failed here."); \ dfilter_fail(yyextra, DF_ERROR_GENERIC, yyextra->location, __VA_ARGS__); \ } while (0) %} FunctionIdentifier [[:alpha:]_][[:alnum:]_]* /* * Cannot start with '-'. * Some protocol name can contain '-', for example "mac-lte". * Fields that contain '-' anywhere cannot start with a decimal digit. * Note that some protocol names start with a number, for example "9p". This is * handled as a special case for numeric patterns. * Some protocol names contain dots, e.g: _ws.expert * Protocol or protocol field cannot contain DOTDOT anywhere. */ VarIdentifier [[:alnum:]_][[:alnum:]_-]* ProtoFieldIdentifier {VarIdentifier}(\.{VarIdentifier})* hex2 [[:xdigit:]]{2} ColonMacAddress {hex2}:{hex2}:{hex2}:{hex2}:{hex2}:{hex2} HyphenMacAddress {hex2}-{hex2}-{hex2}-{hex2}-{hex2}-{hex2} DotMacAddress {hex2}\.{hex2}\.{hex2}\.{hex2}\.{hex2}\.{hex2} hex4 [[:xdigit:]]{4} DotQuadMacAddress {hex4}\.{hex4}\.{hex4} ColonBytes ({hex2}:)|({hex2}(:{hex2})+) HyphenBytes {hex2}(-{hex2})+ DotBytes {hex2}(\.{hex2})+ DecOctet [0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5] IPv4Address {DecOctet}\.{DecOctet}\.{DecOctet}\.{DecOctet} h16 [0-9A-Fa-f]{1,4} ls32 {h16}:{h16}|{IPv4Address} IPv6Address ({h16}:){6}{ls32}|::({h16}:){5}{ls32}|({h16})?::({h16}:){4}{ls32}|(({h16}:){0,1}{h16})?::({h16}:){3}{ls32}|(({h16}:){0,2}{h16})?::({h16}:){2}{ls32}|(({h16}:){0,3}{h16})?::{h16}:{ls32}|(({h16}:){0,4}{h16})?::{ls32}|(({h16}:){0,5}{h16})?::{h16}|(({h16}:){0,6}{h16})?:: V4CidrPrefix \/[[:digit:]]{1,2} V6CidrPrefix \/[[:digit:]]{1,3} /* Catch all valid semantic values. Cannot contain DOT DOT or start with MINUS. */ StartAlphabet [[:alnum:]_:] Alphabet [[:alnum:]_:/-] LiteralValue {StartAlphabet}{Alphabet}*(\.{Alphabet}+)* Exponent ([eE][+-]?[[:digit:]]+) HexExponent ([pP][+-]?[[:digit:]]+) %x RANGE %x LAYER %x DQUOTE %x SQUOTE %% [[:blank:]\n\r]+ { update_location(yyextra, yytext); } "(" return simple(TOKEN_LPAREN); ")" return simple(TOKEN_RPAREN); "," return simple(TOKEN_COMMA); "{" return simple(TOKEN_LBRACE); ".." return simple(TOKEN_DOTDOT); "}" return simple(TOKEN_RBRACE); "$" return simple(TOKEN_DOLLAR); "@" return simple(TOKEN_ATSIGN); "any" return simple(TOKEN_ANY); "all" return simple(TOKEN_ALL); "==" return test(TOKEN_TEST_ANY_EQ); "eq" return test(TOKEN_TEST_ANY_EQ); "any_eq" return test(TOKEN_TEST_ANY_EQ); "!=" return test(TOKEN_TEST_ALL_NE); "ne" return test(TOKEN_TEST_ALL_NE); "all_ne" return test(TOKEN_TEST_ALL_NE); "===" return test(TOKEN_TEST_ALL_EQ); "all_eq" return test(TOKEN_TEST_ALL_EQ); "!==" return test(TOKEN_TEST_ANY_NE); "any_ne" return test(TOKEN_TEST_ANY_NE); ">" return test(TOKEN_TEST_GT); "gt" return test(TOKEN_TEST_GT); ">=" return test(TOKEN_TEST_GE); "ge" return test(TOKEN_TEST_GE); "<" return test(TOKEN_TEST_LT); "lt" return test(TOKEN_TEST_LT); "<=" return test(TOKEN_TEST_LE); "le" return test(TOKEN_TEST_LE); "contains" return test(TOKEN_TEST_CONTAINS); "~" return test(TOKEN_TEST_MATCHES); "matches" return test(TOKEN_TEST_MATCHES); "!" return test(TOKEN_TEST_NOT); "not" return test(TOKEN_TEST_NOT); "&&" return test(TOKEN_TEST_AND); "and" return test(TOKEN_TEST_AND); "||" return test(TOKEN_TEST_OR); "or" return test(TOKEN_TEST_OR); "^^" return test(TOKEN_TEST_XOR); "xor" return test(TOKEN_TEST_XOR); "in" return test(TOKEN_TEST_IN); "+" return math(TOKEN_PLUS); "-" return math(TOKEN_MINUS); "*" return math(TOKEN_STAR); "/" return math(TOKEN_RSLASH); "%" return math(TOKEN_PERCENT); "&" return math(TOKEN_BITWISE_AND); "bitand" return math(TOKEN_BITWISE_AND); "bitwise_and" return math(TOKEN_BITWISE_AND); "#" { BEGIN(LAYER); return simple(TOKEN_HASH); } [[:digit:]]+ { BEGIN(INITIAL); update_location(yyextra, yytext); return set_lval_simple(yyextra, TOKEN_INDEX, yytext, STTYPE_UNINITIALIZED); } [^[:digit:][] { update_location(yyextra, yytext); FAIL("Expected digit or \"[\", not \"%s\"", yytext); return SCAN_FAILED; } "[" { BEGIN(RANGE); return simple(TOKEN_LBRACKET); } [^],]+ { update_location(yyextra, yytext); return set_lval_simple(yyextra, TOKEN_RANGE_NODE, yytext, STTYPE_UNINITIALIZED); } "," { return simple(TOKEN_COMMA); } "]" { BEGIN(INITIAL); return simple(TOKEN_RBRACKET); } <> { update_location(yyextra, yytext); FAIL("The right bracket was missing from a slice."); return SCAN_FAILED; } [rR]{0,1}\042 { /* start quote of a quoted string */ /* * The example of how to scan for strings was taken from * the flex manual, from the section "Start Conditions". * See: https://westes.github.io/flex/manual/Start-Conditions.html */ BEGIN(DQUOTE); update_location(yyextra, yytext); yyextra->string_loc = yyextra->location; yyextra->quoted_string = g_string_new(NULL); if (yytext[0] == 'r' || yytext[0] == 'R') { /* * This is a raw string (like in Python). Rules: 1) The two * escape sequences are \\ and \". 2) Backslashes are * preserved. 3) Double quotes in the string must be escaped. * Corollary: Strings cannot end with an odd number of * backslashes. * Example: r"a\b\x12\"\\" is the string (including the implicit NUL terminator) * {'a', '\\', 'b', '\\', 'x', '1', '2', '\\', '"', '\\'. '\\', '\0'} */ yyextra->raw_string = true; } else { yyextra->raw_string = false; } } <> { /* unterminated string */ update_string_loc(yyextra, yytext); g_string_free(yyextra->quoted_string, true); yyextra->quoted_string = NULL; FAIL("The final quote was missing from a quoted string."); return SCAN_FAILED; } \042 { /* end quote */ BEGIN(INITIAL); update_string_loc(yyextra, yytext); int token = set_lval_quoted_string(yyextra, yyextra->quoted_string); yyextra->quoted_string = NULL; yyextra->string_loc.col_start = -1; return token; } \\[0-7]{1,3} { /* octal sequence */ update_string_loc(yyextra, yytext); if (yyextra->raw_string) { g_string_append(yyextra->quoted_string, yytext); } else { unsigned long result; result = strtoul(yytext + 1, NULL, 8); if (result > 0xff) { g_string_free(yyextra->quoted_string, true); yyextra->quoted_string = NULL; FAIL("%s is larger than 255.", yytext); return SCAN_FAILED; } g_string_append_c(yyextra->quoted_string, (char) result); } } \\x[[:xdigit:]]{1,2} { /* hex sequence */ /* * C standard does not place a limit on the number of hex * digits after \x... but we do. \xNN can have 1 or two Ns, not more. */ update_string_loc(yyextra, yytext); if (yyextra->raw_string) { g_string_append(yyextra->quoted_string, yytext); } else { unsigned long result; result = strtoul(yytext + 2, NULL, 16); g_string_append_c(yyextra->quoted_string, (char) result); } } \\u[[:xdigit:]]{0,4} { /* universal character name */ update_string_loc(yyextra, yytext); if (yyextra->raw_string) { g_string_append(yyextra->quoted_string, yytext); } else if (!append_universal_character_name(yyextra, yyextra->quoted_string, yytext)) { g_string_free(yyextra->quoted_string, true); yyextra->quoted_string = NULL; return SCAN_FAILED; } } \\U[[:xdigit:]]{0,8} { /* universal character name */ update_string_loc(yyextra, yytext); if (yyextra->raw_string) { g_string_append(yyextra->quoted_string, yytext); } else if (!append_universal_character_name(yyextra, yyextra->quoted_string, yytext)) { g_string_free(yyextra->quoted_string, true); yyextra->quoted_string = NULL; return SCAN_FAILED; } } \\. { /* escaped character */ update_string_loc(yyextra, yytext); if (yyextra->raw_string) { g_string_append(yyextra->quoted_string, yytext); } else if (!append_escaped_char(yyextra, yyextra->quoted_string, yytext[1])) { g_string_free(yyextra->quoted_string, true); yyextra->quoted_string = NULL; return SCAN_FAILED; } } [^\\\042]+ { /* non-escaped string */ update_string_loc(yyextra, yytext); g_string_append(yyextra->quoted_string, yytext); } \047 { /* start quote of a quoted character value */ BEGIN(SQUOTE); update_location(yyextra, yytext); yyextra->string_loc = yyextra->location; yyextra->quoted_string = g_string_new("'"); } <> { /* unterminated character value */ update_string_loc(yyextra, yytext); g_string_free(yyextra->quoted_string, true); yyextra->quoted_string = NULL; FAIL("The final quote was missing from a character constant."); return SCAN_FAILED; } \047 { /* end quote */ BEGIN(INITIAL); update_string_loc(yyextra, yytext); g_string_append_c(yyextra->quoted_string, '\''); int token = set_lval_charconst(yyextra, yyextra->quoted_string); yyextra->quoted_string = NULL; yyextra->string_loc.col_start = -1; return token; } \\. { /* escaped character */ update_string_loc(yyextra, yytext); g_string_append(yyextra->quoted_string, yytext); } [^\\\047]+ { /* non-escaped string */ update_string_loc(yyextra, yytext); g_string_append(yyextra->quoted_string, yytext); } /* NOTE: None of the patterns below can match ".." anywhere in the token string. */ /* MAC address. */ {ColonMacAddress}|{HyphenMacAddress} { /* MAC Address. */ update_location(yyextra, yytext); return set_lval_literal(yyextra, yytext, yytext); } /* IP address. */ {IPv4Address}{V4CidrPrefix}? { /* IPv4 with or without prefix. */ update_location(yyextra, yytext); return set_lval_literal(yyextra, yytext, yytext); } {IPv6Address}{V6CidrPrefix}? { /* IPv6 with or without prefix. */ update_location(yyextra, yytext); return set_lval_literal(yyextra, yytext, yytext); } /* Integer */ [[:digit:]][[:digit:]]* { /* Numeric or field. */ update_location(yyextra, yytext); /* Check if we have a protocol or protocol field, otherwise assume a literal. */ /* It is only reasonable to assume a literal here, instead of a * (possibly non-existent) protocol field, because protocol field filter names * should not start with a digit (the lexical syntax for numbers). */ header_field_info *hfinfo = dfilter_resolve_unparsed(yytext, yyextra->deprecated); if (hfinfo != NULL) { return set_lval_field(yyextra, hfinfo, yytext); } return set_lval_integer(yyextra, yytext, yytext); } 0[bBxX]?[[:xdigit:]]+ { /* Binary or octal or hexadecimal. */ update_location(yyextra, yytext); return set_lval_integer(yyextra, yytext, yytext); } /* Floating point. */ [[:digit:]]+\.[[:digit:]]+ { /* Decimal float. */ update_location(yyextra, yytext); return set_lval_float(yyextra, yytext, yytext); } [[:digit:]]+{Exponent}|[[:digit:]]+\.[[:digit:]]+{Exponent}? { /* Decimal float with optional exponent. */ /* Significand cannot have any side omitted. */ update_location(yyextra, yytext); /* Check if we have a protocol or protocol field, otherwise assume a literal. */ /* It is only reasonable to assume a literal here, instead of a * (possibly non-existent) protocol field, because protocol field filter names * should not start with a digit (the lexical syntax for numbers). */ header_field_info *hfinfo = dfilter_resolve_unparsed(yytext, yyextra->deprecated); if (hfinfo != NULL) { return set_lval_field(yyextra, hfinfo, yytext); } return set_lval_float(yyextra, yytext, yytext); } 0[xX][[:xdigit:]]+{HexExponent}|0[xX][[:xdigit:]]+\.[[:xdigit:]]+{HexExponent}? { /* Hexadecimal float with optional exponent. Can't be a field because * field cannot beging with 0x. */ /* Significand cannot have any side omitted. */ update_location(yyextra, yytext); return set_lval_float(yyextra, yytext, yytext); } :[[:xdigit:]]+ { /* Numeric prefixed with ':'. */ update_location(yyextra, yytext); return set_lval_literal(yyextra, yytext + 1, yytext); } /* Bytes. */ :?{ColonBytes} { /* Bytes. */ update_location(yyextra, yytext); if (yytext[0] == ':') return set_lval_literal(yyextra, yytext + 1, yytext); return set_lval_literal(yyextra, yytext, yytext); } :{HyphenBytes} { /* Bytes. */ update_location(yyextra, yytext); return set_lval_literal(yyextra, yytext + 1, yytext); } :{DotBytes} { /* DotBytes, can be a field without ':' prefix. */ update_location(yyextra, yytext); return set_lval_literal(yyextra, yytext + 1, yytext); } /* Identifier (protocol/field/function name). */ /* This must come before FieldIdentifier to match function names. */ {FunctionIdentifier} { /* Identifier (field or function) or literal (bytes without separator). */ update_location(yyextra, yytext); return set_lval_identifier(yyextra, yytext, yytext); } \.{ProtoFieldIdentifier} { /* Identifier, prefixed with a '.', must be a field, no ifs or buts. */ update_location(yyextra, yytext); const char *name = yytext + 1; header_field_info *hfinfo = dfilter_resolve_unparsed(name, yyextra->deprecated); if (hfinfo == NULL) { FAIL("\"%s\" is not a valid protocol or protocol field.", name); return SCAN_FAILED; } return set_lval_field(yyextra, hfinfo, yytext); } {ProtoFieldIdentifier} { /* Catch-all for protocol values. Can also be a literal. */ update_location(yyextra, yytext); return set_lval_unparsed(yyextra, yytext, yytext); } {LiteralValue} { /* Catch-all for semantic values. */ update_location(yyextra, yytext); /* We use literal here because identifiers (using unparsed) should have * matched one of the previous rules. */ return set_lval_literal(yyextra, yytext, yytext); } . { /* Default */ update_location(yyextra, yytext); if (isprint_string(yytext)) FAIL("\"%s\" was unexpected in this context.", yytext); else FAIL("Non-printable ASCII characters may only appear inside double-quotes."); return SCAN_FAILED; } %% /* * Turn diagnostics back on, so we check the code that we've written. */ DIAG_ON_FLEX() static void _update_location(dfsyntax_t *dfs, size_t len) { dfs->location.col_start += (long)dfs->location.col_len; dfs->location.col_len = len; } static void update_location(dfsyntax_t *dfs, const char *text) { _update_location(dfs, strlen(text)); } static void update_string_loc(dfsyntax_t *dfs, const char *text) { size_t len = strlen(text); dfs->string_loc.col_len += len; _update_location(dfs, len); } static int set_lval_simple(dfsyntax_t *dfs, int token, const char *token_value, sttype_id_t type_id) { dfs->lval = stnode_new(type_id, NULL, g_strdup(token_value), dfs->location); return token; } static int set_lval_literal(dfsyntax_t *dfs, const char *value, const char *token_value) { dfs->lval = stnode_new(STTYPE_LITERAL, g_strdup(value), g_strdup(token_value), dfs->location); return TOKEN_LITERAL; } static int set_lval_identifier(dfsyntax_t *dfs, const char *value, const char *token_value) { dfs->lval = stnode_new(STTYPE_UNPARSED, g_strdup(value), g_strdup(token_value), dfs->location); stnode_set_flags(dfs->lval, STFLAG_UNPARSED); return TOKEN_IDENTIFIER; } static int set_lval_unparsed(dfsyntax_t *dfs, const char *value, const char *token_value) { dfs->lval = stnode_new(STTYPE_UNPARSED, g_strdup(value), g_strdup(token_value), dfs->location); stnode_set_flags(dfs->lval, STFLAG_UNPARSED); return TOKEN_UNPARSED; } static int set_lval_field(dfsyntax_t *dfs, const header_field_info *hfinfo, const char *token_value) { dfs->lval = stnode_new(STTYPE_FIELD, (void *)hfinfo, g_strdup(token_value), dfs->location); return TOKEN_FIELD; } static int set_lval_quoted_string(dfsyntax_t *dfs, GString *quoted_string) { char *token_value; token_value = ws_escape_string_len(NULL, quoted_string->str, quoted_string->len, true); dfs->lval = stnode_new(STTYPE_STRING, quoted_string, token_value, dfs->string_loc); return TOKEN_STRING; } static int set_lval_charconst(dfsyntax_t *dfs, GString *quoted_string) { unsigned long number; bool ok; char *token_value = g_string_free(quoted_string, false); ok = parse_charconst(dfs, token_value, &number); if (!ok) { g_free(token_value); return SCAN_FAILED; } dfs->lval = stnode_new(STTYPE_CHARCONST, g_memdup2(&number, sizeof(number)), token_value, dfs->string_loc); return TOKEN_CHARCONST; } static int set_lval_integer(dfsyntax_t *dfs, const char *value, const char *token_value) { unsigned long long number; bool ok; ok = parse_unsigned_long_long(dfs, value, &number, false); if (!ok) { /* Instead of failing assume this is a literal such as "10f3deccc00d5c8f629fba7a0fff34aa" that can be interpreted as a literal bytes valid. */ dfs->lval = stnode_new(STTYPE_LITERAL, g_strdup(value), g_strdup(token_value), dfs->location); return TOKEN_LITERAL; } dfs->lval = stnode_new(STTYPE_NUMBER, NULL, g_strdup(token_value), dfs->location); sttype_number_set_unsigned(dfs->lval, number); return TOKEN_NUMBER; } static int set_lval_float(dfsyntax_t *dfs, const char *value, const char *token_value) { double number; bool ok; ok = parse_double(dfs, value, &number); if (!ok) { return SCAN_FAILED; } dfs->lval = stnode_new(STTYPE_NUMBER, NULL, g_strdup(token_value), dfs->location); sttype_number_set_float(dfs->lval, number); return TOKEN_NUMBER; } static bool append_escaped_char(dfsyntax_t *dfs, GString *str, char c) { switch (c) { case 'a': c = '\a'; break; case 'b': c = '\b'; break; case 'f': c = '\f'; break; case 'n': c = '\n'; break; case 'r': c = '\r'; break; case 't': c = '\t'; break; case 'v': c = '\v'; break; case '\\': case '\'': case '\"': break; default: dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->location, "\\%c is not a valid character escape sequence", c); return false; } g_string_append_c(str, c); return true; } static bool parse_universal_character_name(dfsyntax_t *dfs _U_, const char *str, char **ret_endptr, gunichar *valuep) { uint64_t val; char *endptr; int ndigits; if (str[0] != '\\') return false; if (str[1] == 'u') ndigits = 4; else if (str[1] == 'U') ndigits = 8; else return false; for (int i = 2; i < ndigits + 2; i++) { if (!g_ascii_isxdigit(str[i])) { return false; } } errno = 0; val = g_ascii_strtoull(str + 2, &endptr, 16); /* skip leading 'u' or 'U' */ if (errno != 0 || endptr == str || val > UINT32_MAX) { return false; } /* * Ref: https://en.cppreference.com/w/c/language/escape * Range of universal character names * * If a universal character name corresponds to a code point that is * not 0x24 ($), 0x40 (@), nor 0x60 (`) and less than 0xA0, or a * surrogate code point (the range 0xD800-0xDFFF, inclusive), or * greater than 0x10FFFF, i.e. not a Unicode code point (since C23), * the program is ill-formed. In other words, members of basic source * character set and control characters (in ranges 0x0-0x1F and * 0x7F-0x9F) cannot be expressed in universal character names. */ if (val < 0xA0 && val != 0x24 && val != 0x40 && val != 0x60) return false; else if (val >= 0xD800 && val <= 0xDFFF) return false; else if (val > 0x10FFFF) return false; *valuep = (gunichar)val; if (ret_endptr) *ret_endptr = endptr; return true; } static bool append_universal_character_name(dfsyntax_t *dfs, GString *str, const char *ucn) { gunichar val; if (!parse_universal_character_name(dfs, ucn, NULL, &val)) { dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->location, "%s is not a valid universal character name", ucn); return false; } g_string_append_unichar(str, val); return true; } static bool parse_charconst(dfsyntax_t *dfs, const char *s, unsigned long *valuep) { const char *cp; unsigned long value; gunichar unival; char *endptr; cp = s + 1; /* skip the leading ' */ if (*cp == '\'') { dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->string_loc, "Empty character constant."); return false; } if (*cp == '\\') { /* * C escape sequence. * An escape sequence is an octal number \NNN, * an hex number \xNN, or one of \' \" \\ \a \b \f \n \r \t \v * that stands for the byte value of the equivalent * C-escape in ASCII encoding. */ cp++; switch (*cp) { case '\0': dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->string_loc, "%s isn't a valid character constant.", s); return false; case 'a': value = '\a'; cp++; break; case 'b': value = '\b'; cp++; break; case 'f': value = '\f'; cp++; break; case 'n': value = '\n'; break; case 'r': value = '\r'; cp++; break; case 't': value = '\t'; cp++; break; case 'v': value = '\v'; cp++; break; case '\'': value = '\''; cp++; break; case '\\': value = '\\'; cp++; break; case '"': value = '"'; cp++; break; case 'x': cp++; if (*cp >= '0' && *cp <= '9') value = *cp - '0'; else if (*cp >= 'A' && *cp <= 'F') value = 10 + (*cp - 'A'); else if (*cp >= 'a' && *cp <= 'f') value = 10 + (*cp - 'a'); else { dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->string_loc, "%s isn't a valid character constant.", s); return false; } cp++; if (*cp != '\'') { value <<= 4; if (*cp >= '0' && *cp <= '9') value |= *cp - '0'; else if (*cp >= 'A' && *cp <= 'F') value |= 10 + (*cp - 'A'); else if (*cp >= 'a' && *cp <= 'f') value |= 10 + (*cp - 'a'); else { dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->string_loc, "%s isn't a valid character constant.", s); return false; } } cp++; break; case 'u': case 'U': if (!parse_universal_character_name(dfs, s+1, &endptr, &unival)) { dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->string_loc, "%s is not a valid universal character name", s); return false; } value = (unsigned long)unival; cp = endptr; break; default: /* Octal */ if (*cp >= '0' && *cp <= '7') value = *cp - '0'; else { dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->string_loc, "%s isn't a valid character constant.", s); return false; } if (*(cp + 1) != '\'') { cp++; value <<= 3; if (*cp >= '0' && *cp <= '7') value |= *cp - '0'; else { dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->string_loc, "%s isn't a valid character constant.", s); return false; } if (*(cp + 1) != '\'') { cp++; value <<= 3; if (*cp >= '0' && *cp <= '7') value |= *cp - '0'; else { dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->string_loc, "%s isn't a valid character constant.", s); return false; } } } if (value > 0xFF) { dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->string_loc, "%s is too large to be a valid character constant.", s); return false; } cp++; } } else { value = *cp++; if (!g_ascii_isprint(value)) { dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->string_loc, "Non-printable value '0x%02lx' in character constant.", value); return false; } } if ((*cp != '\'') || (*(cp + 1) != '\0')){ dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->string_loc, "%s is too long to be a valid character constant.", s); return false; } *valuep = value; return true; } static bool parse_unsigned_long_long(dfsyntax_t *dfs, const char *s, unsigned long long *valuep, bool set_error) { char *endptr; errno = 0; if (s[0] == '0' && (s[1] == 'b' || s[1] == 'B')) { *valuep = g_ascii_strtoull(s + 2, &endptr, 2); } else { *valuep = g_ascii_strtoull(s, &endptr, 0); } if (errno == EINVAL || endptr == s || *endptr != '\0') { /* This isn't a valid number. */ if (set_error) dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->string_loc, "\"%s\" is not a valid number.", s); return false; } if (errno == ERANGE) { if (set_error) dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->string_loc, "\"%s\" is too large to be represented as a 64-bit number.", s); return false; } if (errno != 0) { // Should not happen if (set_error) dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->string_loc, "\"%s\" is not a valid number (%s).", s, g_strerror(errno)); return false; } return true; } static bool parse_double(dfsyntax_t *dfs, const char *s, double *valuep) { char *endptr = NULL; errno = 0; *valuep = g_ascii_strtod(s, &endptr); if (endptr == s || *endptr != '\0') { dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->string_loc, "\"%s\" is not a valid floating-point number.", s); return false; } if (errno == ERANGE) { if (*valuep == HUGE_VAL) { dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->string_loc, "\"%s\" causes floating-point overflow.", s); } else { dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->string_loc, "\"%s\" causes floating-point underflow.", s); } return false; } if (errno != 0) { // Should not happen dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->string_loc, "\"%s\" is not a valid floating-point number (%s).", s, g_strerror(errno)); return false; } return true; }