
1043 lines
27 KiB

%top {
/* Include this before everything else, for various large-file definitions */
#include "config.h"
#include <wireshark.h>
#include <stdlib.h>
#include <errno.h>
#include <math.h>
#include <wsutil/str_util.h>
#include "dfilter-int.h"
#include "syntax-tree.h"
#include "grammar.h"
#include "dfunctions.h"
#include "sttype-number.h"
* Always generate warnings.
%option warn
* We want a reentrant scanner.
%option reentrant
* We don't use input, so don't generate code for it.
%option noinput
* We don't use unput, so don't generate code for it.
%option nounput
* We don't read interactively from the terminal.
%option never-interactive
* Prefix scanner routines with "df_yy" rather than "yy", so this scanner
* can coexist with other scanners.
%option prefix="df_yy"
* We're reading from a string, so we don't need yywrap.
%option noyywrap
* The type for the dfs we keep for a scanner.
%option extra-type="dfsyntax_t *"
* Wireshark - Network traffic analyzer
* By Gerald Combs <gerald@wireshark.org>
* Copyright 2001 Gerald Combs
* SPDX-License-Identifier: GPL-2.0-or-later
* Disable diagnostics in the code generated by Flex.
WS_WARN_UNUSED static int set_lval_simple(dfsyntax_t *dfs, int token, const char *token_value, sttype_id_t type_id);
#define simple(token) (update_location(yyextra, yytext), set_lval_simple(yyextra, token, yytext, STTYPE_UNINITIALIZED))
#define test(token) (update_location(yyextra, yytext), set_lval_simple(yyextra, token, yytext, STTYPE_TEST))
#define math(token) (update_location(yyextra, yytext), set_lval_simple(yyextra, token, yytext, STTYPE_ARITHMETIC))
WS_WARN_UNUSED static int set_lval_literal(dfsyntax_t *dfs, const char *value, const char *token_value);
WS_WARN_UNUSED static int set_lval_identifier(dfsyntax_t *dfs, const char *value, const char *token_value);
WS_WARN_UNUSED static int set_lval_unparsed(dfsyntax_t *dfs, const char *value, const char *token_value);
WS_WARN_UNUSED static int set_lval_field(dfsyntax_t *dfs, const header_field_info *hfinfo, const char *token_value);
WS_WARN_UNUSED static int set_lval_quoted_string(dfsyntax_t *dfs, GString *quoted_string);
WS_WARN_UNUSED static int set_lval_charconst(dfsyntax_t *dfs, GString *quoted_string);
WS_WARN_UNUSED static int set_lval_integer(dfsyntax_t *dfs, const char *value, const char *token_value);
WS_WARN_UNUSED static int set_lval_float(dfsyntax_t *dfs, const char *value, const char *token_value);
static bool append_escaped_char(dfsyntax_t *dfs, GString *str, char c);
static bool append_universal_character_name(dfsyntax_t *dfs, GString *str, const char *ucn);
static bool parse_charconst(dfsyntax_t *dfs, const char *s, unsigned long *valuep);
static bool parse_unsigned_long_long(dfsyntax_t *dfs, const char *s, unsigned long long *valuep, bool set_error);
static bool parse_double(dfsyntax_t *dfs, const char *s, double *valuep);
static void update_location(dfsyntax_t *dfs, const char *text);
static void update_string_loc(dfsyntax_t *dfs, const char *text);
#define FAIL(...) \
do { \
ws_noisy("Scanning failed here."); \
dfilter_fail(yyextra, DF_ERROR_GENERIC, yyextra->location, __VA_ARGS__); \
} while (0)
FunctionIdentifier [[:alpha:]_][[:alnum:]_]*
* Cannot start with '-'. * Some protocol name can contain '-', for example "mac-lte".
* Fields that contain '-' anywhere cannot start with a decimal digit.
* Note that some protocol names start with a number, for example "9p". This is
* handled as a special case for numeric patterns.
* Some protocol names contain dots, e.g: _ws.expert
* Protocol or protocol field cannot contain DOTDOT anywhere.
VarIdentifier [[:alnum:]_][[:alnum:]_-]*
ProtoFieldIdentifier {VarIdentifier}(\.{VarIdentifier})*
hex2 [[:xdigit:]]{2}
ColonMacAddress {hex2}:{hex2}:{hex2}:{hex2}:{hex2}:{hex2}
HyphenMacAddress {hex2}-{hex2}-{hex2}-{hex2}-{hex2}-{hex2}
DotMacAddress {hex2}\.{hex2}\.{hex2}\.{hex2}\.{hex2}\.{hex2}
hex4 [[:xdigit:]]{4}
DotQuadMacAddress {hex4}\.{hex4}\.{hex4}
ColonBytes ({hex2}:)|({hex2}(:{hex2})+)
HyphenBytes {hex2}(-{hex2})+
DotBytes {hex2}(\.{hex2})+
DecOctet [0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5]
IPv4Address {DecOctet}\.{DecOctet}\.{DecOctet}\.{DecOctet}
h16 [0-9A-Fa-f]{1,4}
ls32 {h16}:{h16}|{IPv4Address}
IPv6Address ({h16}:){6}{ls32}|::({h16}:){5}{ls32}|({h16})?::({h16}:){4}{ls32}|(({h16}:){0,1}{h16})?::({h16}:){3}{ls32}|(({h16}:){0,2}{h16})?::({h16}:){2}{ls32}|(({h16}:){0,3}{h16})?::{h16}:{ls32}|(({h16}:){0,4}{h16})?::{ls32}|(({h16}:){0,5}{h16})?::{h16}|(({h16}:){0,6}{h16})?::
V4CidrPrefix \/[[:digit:]]{1,2}
V6CidrPrefix \/[[:digit:]]{1,3}
/* Catch all valid semantic values. Cannot contain DOT DOT or start with MINUS. */
StartAlphabet [[:alnum:]_:]
Alphabet [[:alnum:]_:/-]
LiteralValue {StartAlphabet}{Alphabet}*(\.{Alphabet}+)*
Exponent ([eE][+-]?[[:digit:]]+)
HexExponent ([pP][+-]?[[:digit:]]+)
[[:blank:]\n\r]+ {
update_location(yyextra, yytext);
"(" return simple(TOKEN_LPAREN);
")" return simple(TOKEN_RPAREN);
"," return simple(TOKEN_COMMA);
"{" return simple(TOKEN_LBRACE);
".." return simple(TOKEN_DOTDOT);
"}" return simple(TOKEN_RBRACE);
"$" return simple(TOKEN_DOLLAR);
"@" return simple(TOKEN_ATSIGN);
"any" return simple(TOKEN_ANY);
"all" return simple(TOKEN_ALL);
"==" return test(TOKEN_TEST_ANY_EQ);
"eq" return test(TOKEN_TEST_ANY_EQ);
"any_eq" return test(TOKEN_TEST_ANY_EQ);
"!=" return test(TOKEN_TEST_ALL_NE);
"ne" return test(TOKEN_TEST_ALL_NE);
"all_ne" return test(TOKEN_TEST_ALL_NE);
"===" return test(TOKEN_TEST_ALL_EQ);
"all_eq" return test(TOKEN_TEST_ALL_EQ);
"!==" return test(TOKEN_TEST_ANY_NE);
"any_ne" return test(TOKEN_TEST_ANY_NE);
">" return test(TOKEN_TEST_GT);
"gt" return test(TOKEN_TEST_GT);
">=" return test(TOKEN_TEST_GE);
"ge" return test(TOKEN_TEST_GE);
"<" return test(TOKEN_TEST_LT);
"lt" return test(TOKEN_TEST_LT);
"<=" return test(TOKEN_TEST_LE);
"le" return test(TOKEN_TEST_LE);
"contains" return test(TOKEN_TEST_CONTAINS);
"~" return test(TOKEN_TEST_MATCHES);
"matches" return test(TOKEN_TEST_MATCHES);
"!" return test(TOKEN_TEST_NOT);
"not" return test(TOKEN_TEST_NOT);
"&&" return test(TOKEN_TEST_AND);
"and" return test(TOKEN_TEST_AND);
"||" return test(TOKEN_TEST_OR);
"or" return test(TOKEN_TEST_OR);
"^^" return test(TOKEN_TEST_XOR);
"xor" return test(TOKEN_TEST_XOR);
"in" return test(TOKEN_TEST_IN);
"+" return math(TOKEN_PLUS);
"-" return math(TOKEN_MINUS);
"*" return math(TOKEN_STAR);
"/" return math(TOKEN_RSLASH);
"%" return math(TOKEN_PERCENT);
"&" return math(TOKEN_BITWISE_AND);
"bitand" return math(TOKEN_BITWISE_AND);
"bitwise_and" return math(TOKEN_BITWISE_AND);
"#" {
return simple(TOKEN_HASH);
<LAYER>[[:digit:]]+ {
update_location(yyextra, yytext);
return set_lval_simple(yyextra, TOKEN_INDEX, yytext, STTYPE_UNINITIALIZED);
<LAYER>[^[:digit:][] {
update_location(yyextra, yytext);
FAIL("Expected digit or \"[\", not \"%s\"", yytext);
return simple(TOKEN_LBRACKET);
<RANGE>[^],]+ {
update_location(yyextra, yytext);
return set_lval_simple(yyextra, TOKEN_RANGE_NODE, yytext, STTYPE_UNINITIALIZED);
<RANGE>"," {
return simple(TOKEN_COMMA);
<RANGE>"]" {
return simple(TOKEN_RBRACKET);
<RANGE><<EOF>> {
update_location(yyextra, yytext);
FAIL("The right bracket was missing from a slice.");
[rR]{0,1}\042 {
/* start quote of a quoted string */
* The example of how to scan for strings was taken from
* the flex manual, from the section "Start Conditions".
* See: https://westes.github.io/flex/manual/Start-Conditions.html
update_location(yyextra, yytext);
yyextra->string_loc = yyextra->location;
yyextra->quoted_string = g_string_new(NULL);
if (yytext[0] == 'r' || yytext[0] == 'R') {
* This is a raw string (like in Python). Rules: 1) The two
* escape sequences are \\ and \". 2) Backslashes are
* preserved. 3) Double quotes in the string must be escaped.
* Corollary: Strings cannot end with an odd number of
* backslashes.
* Example: r"a\b\x12\"\\" is the string (including the implicit NUL terminator)
* {'a', '\\', 'b', '\\', 'x', '1', '2', '\\', '"', '\\'. '\\', '\0'}
yyextra->raw_string = true;
else {
yyextra->raw_string = false;
/* unterminated string */
update_string_loc(yyextra, yytext);
g_string_free(yyextra->quoted_string, true);
yyextra->quoted_string = NULL;
FAIL("The final quote was missing from a quoted string.");
<DQUOTE>\042 {
/* end quote */
update_string_loc(yyextra, yytext);
int token = set_lval_quoted_string(yyextra, yyextra->quoted_string);
yyextra->quoted_string = NULL;
yyextra->string_loc.col_start = -1;
return token;
<DQUOTE>\\[0-7]{1,3} {
/* octal sequence */
update_string_loc(yyextra, yytext);
if (yyextra->raw_string) {
g_string_append(yyextra->quoted_string, yytext);
else {
unsigned long result;
result = strtoul(yytext + 1, NULL, 8);
if (result > 0xff) {
g_string_free(yyextra->quoted_string, true);
yyextra->quoted_string = NULL;
FAIL("%s is larger than 255.", yytext);
g_string_append_c(yyextra->quoted_string, (char) result);
<DQUOTE>\\x[[:xdigit:]]{1,2} {
/* hex sequence */
* C standard does not place a limit on the number of hex
* digits after \x... but we do. \xNN can have 1 or two Ns, not more.
update_string_loc(yyextra, yytext);
if (yyextra->raw_string) {
g_string_append(yyextra->quoted_string, yytext);
else {
unsigned long result;
result = strtoul(yytext + 2, NULL, 16);
g_string_append_c(yyextra->quoted_string, (char) result);
<DQUOTE>\\u[[:xdigit:]]{0,4} {
/* universal character name */
update_string_loc(yyextra, yytext);
if (yyextra->raw_string) {
g_string_append(yyextra->quoted_string, yytext);
else if (!append_universal_character_name(yyextra, yyextra->quoted_string, yytext)) {
g_string_free(yyextra->quoted_string, true);
yyextra->quoted_string = NULL;
<DQUOTE>\\U[[:xdigit:]]{0,8} {
/* universal character name */
update_string_loc(yyextra, yytext);
if (yyextra->raw_string) {
g_string_append(yyextra->quoted_string, yytext);
else if (!append_universal_character_name(yyextra, yyextra->quoted_string, yytext)) {
g_string_free(yyextra->quoted_string, true);
yyextra->quoted_string = NULL;
<DQUOTE>\\. {
/* escaped character */
update_string_loc(yyextra, yytext);
if (yyextra->raw_string) {
g_string_append(yyextra->quoted_string, yytext);
else if (!append_escaped_char(yyextra, yyextra->quoted_string, yytext[1])) {
g_string_free(yyextra->quoted_string, true);
yyextra->quoted_string = NULL;
<DQUOTE>[^\\\042]+ {
/* non-escaped string */
update_string_loc(yyextra, yytext);
g_string_append(yyextra->quoted_string, yytext);
\047 {
/* start quote of a quoted character value */
update_location(yyextra, yytext);
yyextra->string_loc = yyextra->location;
yyextra->quoted_string = g_string_new("'");
/* unterminated character value */
update_string_loc(yyextra, yytext);
g_string_free(yyextra->quoted_string, true);
yyextra->quoted_string = NULL;
FAIL("The final quote was missing from a character constant.");
<SQUOTE>\047 {
/* end quote */
update_string_loc(yyextra, yytext);
g_string_append_c(yyextra->quoted_string, '\'');
int token = set_lval_charconst(yyextra, yyextra->quoted_string);
yyextra->quoted_string = NULL;
yyextra->string_loc.col_start = -1;
return token;
<SQUOTE>\\. {
/* escaped character */
update_string_loc(yyextra, yytext);
g_string_append(yyextra->quoted_string, yytext);
<SQUOTE>[^\\\047]+ {
/* non-escaped string */
update_string_loc(yyextra, yytext);
g_string_append(yyextra->quoted_string, yytext);
/* NOTE: None of the patterns below can match ".." anywhere in the token string. */
/* MAC address. */
{ColonMacAddress}|{HyphenMacAddress} {
/* MAC Address. */
update_location(yyextra, yytext);
return set_lval_literal(yyextra, yytext, yytext);
/* IP address. */
{IPv4Address}{V4CidrPrefix}? {
/* IPv4 with or without prefix. */
update_location(yyextra, yytext);
return set_lval_literal(yyextra, yytext, yytext);
{IPv6Address}{V6CidrPrefix}? {
/* IPv6 with or without prefix. */
update_location(yyextra, yytext);
return set_lval_literal(yyextra, yytext, yytext);
/* Integer */
[[:digit:]][[:digit:]]* {
/* Numeric or field. */
update_location(yyextra, yytext);
/* Check if we have a protocol or protocol field, otherwise assume a literal. */
/* It is only reasonable to assume a literal here, instead of a
* (possibly non-existent) protocol field, because protocol field filter names
* should not start with a digit (the lexical syntax for numbers). */
header_field_info *hfinfo = dfilter_resolve_unparsed(yytext, yyextra->deprecated);
if (hfinfo != NULL) {
return set_lval_field(yyextra, hfinfo, yytext);
return set_lval_integer(yyextra, yytext, yytext);
0[bBxX]?[[:xdigit:]]+ {
/* Binary or octal or hexadecimal. */
update_location(yyextra, yytext);
return set_lval_integer(yyextra, yytext, yytext);
/* Floating point. */
[[:digit:]]+\.[[:digit:]]+ {
/* Decimal float. */
update_location(yyextra, yytext);
return set_lval_float(yyextra, yytext, yytext);
[[:digit:]]+{Exponent}|[[:digit:]]+\.[[:digit:]]+{Exponent}? {
/* Decimal float with optional exponent. */
/* Significand cannot have any side omitted. */
update_location(yyextra, yytext);
/* Check if we have a protocol or protocol field, otherwise assume a literal. */
/* It is only reasonable to assume a literal here, instead of a
* (possibly non-existent) protocol field, because protocol field filter names
* should not start with a digit (the lexical syntax for numbers). */
header_field_info *hfinfo = dfilter_resolve_unparsed(yytext, yyextra->deprecated);
if (hfinfo != NULL) {
return set_lval_field(yyextra, hfinfo, yytext);
return set_lval_float(yyextra, yytext, yytext);
0[xX][[:xdigit:]]+{HexExponent}|0[xX][[:xdigit:]]+\.[[:xdigit:]]+{HexExponent}? {
/* Hexadecimal float with optional exponent. Can't be a field because
* field cannot beging with 0x. */
/* Significand cannot have any side omitted. */
update_location(yyextra, yytext);
return set_lval_float(yyextra, yytext, yytext);
:[[:xdigit:]]+ {
/* Numeric prefixed with ':'. */
update_location(yyextra, yytext);
return set_lval_literal(yyextra, yytext + 1, yytext);
/* Bytes. */
:?{ColonBytes} {
/* Bytes. */
update_location(yyextra, yytext);
if (yytext[0] == ':')
return set_lval_literal(yyextra, yytext + 1, yytext);
return set_lval_literal(yyextra, yytext, yytext);
:{HyphenBytes} {
/* Bytes. */
update_location(yyextra, yytext);
return set_lval_literal(yyextra, yytext + 1, yytext);
:{DotBytes} {
/* DotBytes, can be a field without ':' prefix. */
update_location(yyextra, yytext);
return set_lval_literal(yyextra, yytext + 1, yytext);
/* Identifier (protocol/field/function name). */
/* This must come before FieldIdentifier to match function names. */
{FunctionIdentifier} {
/* Identifier (field or function) or literal (bytes without separator). */
update_location(yyextra, yytext);
return set_lval_identifier(yyextra, yytext, yytext);
\.{ProtoFieldIdentifier} {
/* Identifier, prefixed with a '.', must be a field, no ifs or buts. */
update_location(yyextra, yytext);
const char *name = yytext + 1;
header_field_info *hfinfo = dfilter_resolve_unparsed(name, yyextra->deprecated);
if (hfinfo == NULL) {
FAIL("\"%s\" is not a valid protocol or protocol field.", name);
return set_lval_field(yyextra, hfinfo, yytext);
{ProtoFieldIdentifier} {
/* Catch-all for protocol values. Can also be a literal. */
update_location(yyextra, yytext);
return set_lval_unparsed(yyextra, yytext, yytext);
{LiteralValue} {
/* Catch-all for semantic values. */
update_location(yyextra, yytext);
/* We use literal here because identifiers (using unparsed) should have
* matched one of the previous rules. */
return set_lval_literal(yyextra, yytext, yytext);
. {
/* Default */
update_location(yyextra, yytext);
if (isprint_string(yytext))
FAIL("\"%s\" was unexpected in this context.", yytext);
FAIL("Non-printable ASCII characters may only appear inside double-quotes.");
* Turn diagnostics back on, so we check the code that we've written.
static void
_update_location(dfsyntax_t *dfs, size_t len)
dfs->location.col_start += (long)dfs->location.col_len;
dfs->location.col_len = len;
static void
update_location(dfsyntax_t *dfs, const char *text)
_update_location(dfs, strlen(text));
static void
update_string_loc(dfsyntax_t *dfs, const char *text)
size_t len = strlen(text);
dfs->string_loc.col_len += len;
_update_location(dfs, len);
static int
set_lval_simple(dfsyntax_t *dfs, int token, const char *token_value, sttype_id_t type_id)
dfs->lval = stnode_new(type_id, NULL, g_strdup(token_value), dfs->location);
return token;
static int
set_lval_literal(dfsyntax_t *dfs, const char *value, const char *token_value)
dfs->lval = stnode_new(STTYPE_LITERAL, g_strdup(value), g_strdup(token_value), dfs->location);
static int
set_lval_identifier(dfsyntax_t *dfs, const char *value, const char *token_value)
dfs->lval = stnode_new(STTYPE_UNPARSED, g_strdup(value), g_strdup(token_value), dfs->location);
stnode_set_flags(dfs->lval, STFLAG_UNPARSED);
static int
set_lval_unparsed(dfsyntax_t *dfs, const char *value, const char *token_value)
dfs->lval = stnode_new(STTYPE_UNPARSED, g_strdup(value), g_strdup(token_value), dfs->location);
stnode_set_flags(dfs->lval, STFLAG_UNPARSED);
static int
set_lval_field(dfsyntax_t *dfs, const header_field_info *hfinfo, const char *token_value)
dfs->lval = stnode_new(STTYPE_FIELD, (void *)hfinfo, g_strdup(token_value), dfs->location);
static int
set_lval_quoted_string(dfsyntax_t *dfs, GString *quoted_string)
char *token_value;
token_value = ws_escape_string_len(NULL, quoted_string->str, quoted_string->len, true);
dfs->lval = stnode_new(STTYPE_STRING, quoted_string, token_value, dfs->string_loc);
static int
set_lval_charconst(dfsyntax_t *dfs, GString *quoted_string)
unsigned long number;
bool ok;
char *token_value = g_string_free(quoted_string, false);
ok = parse_charconst(dfs, token_value, &number);
if (!ok) {
dfs->lval = stnode_new(STTYPE_CHARCONST, g_memdup2(&number, sizeof(number)), token_value, dfs->string_loc);
static int
set_lval_integer(dfsyntax_t *dfs, const char *value, const char *token_value)
unsigned long long number;
bool ok;
ok = parse_unsigned_long_long(dfs, value, &number, false);
if (!ok) {
/* Instead of failing assume this is a literal such as
"10f3deccc00d5c8f629fba7a0fff34aa" that can be interpreted
as a literal bytes valid. */
dfs->lval = stnode_new(STTYPE_LITERAL, g_strdup(value), g_strdup(token_value), dfs->location);
dfs->lval = stnode_new(STTYPE_NUMBER, NULL, g_strdup(token_value), dfs->location);
sttype_number_set_unsigned(dfs->lval, number);
static int
set_lval_float(dfsyntax_t *dfs, const char *value, const char *token_value)
double number;
bool ok;
ok = parse_double(dfs, value, &number);
if (!ok) {
dfs->lval = stnode_new(STTYPE_NUMBER, NULL, g_strdup(token_value), dfs->location);
sttype_number_set_float(dfs->lval, number);
static bool
append_escaped_char(dfsyntax_t *dfs, GString *str, char c)
switch (c) {
case 'a':
c = '\a';
case 'b':
c = '\b';
case 'f':
c = '\f';
case 'n':
c = '\n';
case 'r':
c = '\r';
case 't':
c = '\t';
case 'v':
c = '\v';
case '\\':
case '\'':
case '\"':
dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->location,
"\\%c is not a valid character escape sequence", c);
return false;
g_string_append_c(str, c);
return true;
static bool
parse_universal_character_name(dfsyntax_t *dfs _U_, const char *str, char **ret_endptr, gunichar *valuep)
uint64_t val;
char *endptr;
int ndigits;
if (str[0] != '\\')
return false;
if (str[1] == 'u')
ndigits = 4;
else if (str[1] == 'U')
ndigits = 8;
return false;
for (int i = 2; i < ndigits + 2; i++) {
if (!g_ascii_isxdigit(str[i])) {
return false;
errno = 0;
val = g_ascii_strtoull(str + 2, &endptr, 16); /* skip leading 'u' or 'U' */
if (errno != 0 || endptr == str || val > UINT32_MAX) {
return false;
* Ref: https://en.cppreference.com/w/c/language/escape
* Range of universal character names
* If a universal character name corresponds to a code point that is
* not 0x24 ($), 0x40 (@), nor 0x60 (`) and less than 0xA0, or a
* surrogate code point (the range 0xD800-0xDFFF, inclusive), or
* greater than 0x10FFFF, i.e. not a Unicode code point (since C23),
* the program is ill-formed. In other words, members of basic source
* character set and control characters (in ranges 0x0-0x1F and
* 0x7F-0x9F) cannot be expressed in universal character names.
if (val < 0xA0 && val != 0x24 && val != 0x40 && val != 0x60)
return false;
else if (val >= 0xD800 && val <= 0xDFFF)
return false;
else if (val > 0x10FFFF)
return false;
*valuep = (gunichar)val;
if (ret_endptr)
*ret_endptr = endptr;
return true;
static bool
append_universal_character_name(dfsyntax_t *dfs, GString *str, const char *ucn)
gunichar val;
if (!parse_universal_character_name(dfs, ucn, NULL, &val)) {
dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->location, "%s is not a valid universal character name", ucn);
return false;
g_string_append_unichar(str, val);
return true;
static bool
parse_charconst(dfsyntax_t *dfs, const char *s, unsigned long *valuep)
const char *cp;
unsigned long value;
gunichar unival;
char *endptr;
cp = s + 1; /* skip the leading ' */
if (*cp == '\'') {
dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->string_loc, "Empty character constant.");
return false;
if (*cp == '\\') {
* C escape sequence.
* An escape sequence is an octal number \NNN,
* an hex number \xNN, or one of \' \" \\ \a \b \f \n \r \t \v
* that stands for the byte value of the equivalent
* C-escape in ASCII encoding.
switch (*cp) {
case '\0':
dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->string_loc, "%s isn't a valid character constant.", s);
return false;
case 'a':
value = '\a';
case 'b':
value = '\b';
case 'f':
value = '\f';
case 'n':
value = '\n';
case 'r':
value = '\r';
case 't':
value = '\t';
case 'v':
value = '\v';
case '\'':
value = '\'';
case '\\':
value = '\\';
case '"':
value = '"';
case 'x':
if (*cp >= '0' && *cp <= '9')
value = *cp - '0';
else if (*cp >= 'A' && *cp <= 'F')
value = 10 + (*cp - 'A');
else if (*cp >= 'a' && *cp <= 'f')
value = 10 + (*cp - 'a');
else {
dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->string_loc, "%s isn't a valid character constant.", s);
return false;
if (*cp != '\'') {
value <<= 4;
if (*cp >= '0' && *cp <= '9')
value |= *cp - '0';
else if (*cp >= 'A' && *cp <= 'F')
value |= 10 + (*cp - 'A');
else if (*cp >= 'a' && *cp <= 'f')
value |= 10 + (*cp - 'a');
else {
dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->string_loc, "%s isn't a valid character constant.", s);
return false;
case 'u':
case 'U':
if (!parse_universal_character_name(dfs, s+1, &endptr, &unival)) {
dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->string_loc, "%s is not a valid universal character name", s);
return false;
value = (unsigned long)unival;
cp = endptr;
/* Octal */
if (*cp >= '0' && *cp <= '7')
value = *cp - '0';
else {
dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->string_loc, "%s isn't a valid character constant.", s);
return false;
if (*(cp + 1) != '\'') {
value <<= 3;
if (*cp >= '0' && *cp <= '7')
value |= *cp - '0';
else {
dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->string_loc, "%s isn't a valid character constant.", s);
return false;
if (*(cp + 1) != '\'') {
value <<= 3;
if (*cp >= '0' && *cp <= '7')
value |= *cp - '0';
else {
dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->string_loc, "%s isn't a valid character constant.", s);
return false;
if (value > 0xFF) {
dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->string_loc, "%s is too large to be a valid character constant.", s);
return false;
} else {
value = *cp++;
if (!g_ascii_isprint(value)) {
dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->string_loc, "Non-printable value '0x%02lx' in character constant.", value);
return false;
if ((*cp != '\'') || (*(cp + 1) != '\0')){
dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->string_loc, "%s is too long to be a valid character constant.", s);
return false;
*valuep = value;
return true;
static bool
parse_unsigned_long_long(dfsyntax_t *dfs, const char *s, unsigned long long *valuep, bool set_error)
char *endptr;
errno = 0;
if (s[0] == '0' && (s[1] == 'b' || s[1] == 'B')) {
*valuep = g_ascii_strtoull(s + 2, &endptr, 2);
else {
*valuep = g_ascii_strtoull(s, &endptr, 0);
if (errno == EINVAL || endptr == s || *endptr != '\0') {
/* This isn't a valid number. */
if (set_error)
dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->string_loc, "\"%s\" is not a valid number.", s);
return false;
if (errno == ERANGE) {
if (set_error)
dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->string_loc, "\"%s\" is too large to be represented as a 64-bit number.", s);
return false;
if (errno != 0) {
// Should not happen
if (set_error)
dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->string_loc, "\"%s\" is not a valid number (%s).", s, g_strerror(errno));
return false;
return true;
static bool
parse_double(dfsyntax_t *dfs, const char *s, double *valuep)
char *endptr = NULL;
errno = 0;
*valuep = g_ascii_strtod(s, &endptr);
if (endptr == s || *endptr != '\0') {
dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->string_loc, "\"%s\" is not a valid floating-point number.", s);
return false;
if (errno == ERANGE) {
if (*valuep == HUGE_VAL) {
dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->string_loc, "\"%s\" causes floating-point overflow.", s);
else {
dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->string_loc, "\"%s\" causes floating-point underflow.", s);
return false;
if (errno != 0) {
// Should not happen
dfilter_fail(dfs, DF_ERROR_GENERIC, dfs->string_loc,
"\"%s\" is not a valid floating-point number (%s).",
s, g_strerror(errno));
return false;
return true;