wireshark/epan/dfilter/scanner.l

945 lines
24 KiB
Plaintext
Raw Normal View History

%top {
/* Include this before everything else, for various large-file definitions */
#include "config.h"
#define WS_LOG_DOMAIN LOG_DOMAIN_DFILTER
#include <wireshark.h>
#include <stdlib.h>
#include <errno.h>
#include <wsutil/str_util.h>
#include "dfilter-int.h"
#include "syntax-tree.h"
#include "grammar.h"
#include "dfunctions.h"
}
2022-12-30 04:00:22 +00:00
/*
* Always generate warnings.
*/
%option warn
/*
* We want a reentrant scanner.
*/
%option reentrant
/*
* We don't use input, so don't generate code for it.
*/
%option noinput
/*
* We don't use unput, so don't generate code for it.
*/
%option nounput
/*
* We don't read interactively from the terminal.
*/
%option never-interactive
/*
* Prefix scanner routines with "df_yy" rather than "yy", so this scanner
* can coexist with other scanners.
*/
%option prefix="df_yy"
/*
* We're reading from a string, so we don't need yywrap.
*/
%option noyywrap
/*
* The type for the state we keep for a scanner.
*/
%option extra-type="df_scanner_state_t *"
%{
/*
* Wireshark - Network traffic analyzer
* By Gerald Combs <gerald@wireshark.org>
* Copyright 2001 Gerald Combs
*
* SPDX-License-Identifier: GPL-2.0-or-later
*/
/*
* Disable diagnostics in the code generated by Flex.
*/
DIAG_OFF_FLEX()
WS_WARN_UNUSED static int set_lval_simple(df_scanner_state_t *state, int token, const char *token_value, sttype_id_t type_id);
#define simple(token) (update_location(yyextra, yytext), set_lval_simple(yyextra, token, yytext, STTYPE_UNINITIALIZED))
#define test(token) (update_location(yyextra, yytext), set_lval_simple(yyextra, token, yytext, STTYPE_TEST))
#define math(token) (update_location(yyextra, yytext), set_lval_simple(yyextra, token, yytext, STTYPE_ARITHMETIC))
WS_WARN_UNUSED static int set_lval_literal(df_scanner_state_t *state, const char *value, const char *token_value);
WS_WARN_UNUSED static int set_lval_identifier(df_scanner_state_t *state, const char *value, const char *token_value);
WS_WARN_UNUSED static int set_lval_constant(df_scanner_state_t *state, const char *value, const char *token_value);
WS_WARN_UNUSED static int set_lval_unparsed(df_scanner_state_t *state, const char *value, const char *token_value);
WS_WARN_UNUSED static int set_lval_field(df_scanner_state_t *state, const header_field_info *hfinfo, const char *token_value);
WS_WARN_UNUSED static int set_lval_quoted_string(df_scanner_state_t *state, GString *quoted_string);
WS_WARN_UNUSED static int set_lval_charconst(df_scanner_state_t *state, GString *quoted_string);
2022-07-03 21:33:29 +00:00
static gboolean append_escaped_char(df_scanner_state_t *state, GString *str, char c);
static gboolean append_universal_character_name(df_scanner_state_t *state, GString *str, const char *ucn);
static gboolean parse_charconst(df_scanner_state_t *state, const char *s, unsigned long *valuep);
static void update_location(df_scanner_state_t *state, const char *text);
static void update_string_loc(df_scanner_state_t *state, const char *text);
#define FAIL(...) \
do { \
ws_noisy("Scanning failed here."); \
dfilter_fail(yyextra->dfw, DF_ERROR_GENERIC, yyextra->location, __VA_ARGS__); \
} while (0)
%}
FunctionIdentifier [[:alpha:]_][[:alnum:]_]*
/*
* Cannot start with '-'. * Some protocol name can contain '-', for example "mac-lte".
* Note that some protocol names start with a number, for example "9p".
* Some protocol names contain dots, e.g: _ws.expert
* Protocol or protocol field cannot contain DOTDOT anywhere.
*/
VarIdentifier [[:alnum:]_][[:alnum:]_-]*
ProtoFieldIdentifier {VarIdentifier}(\.{VarIdentifier})*
hex2 [[:xdigit:]]{2}
ColonMacAddress {hex2}:{hex2}:{hex2}:{hex2}:{hex2}:{hex2}
HyphenMacAddress {hex2}-{hex2}-{hex2}-{hex2}-{hex2}-{hex2}
DotMacAddress {hex2}\.{hex2}\.{hex2}\.{hex2}\.{hex2}\.{hex2}
dfilter: Allow arithmetic expressions without spaces To allow an arithmetic expressions without spaces, such as "1+2", we cannot match the expression in other lexical rules using "+". Because of longest match this becomes the token LITERAL or UNPARSED with semantic value "1+2". The same goes for all the other arithmetic operators. So we need to remove [+-*/%] from "word chars" and add very specific patterns (that won't mistakenly match an arithmetic expression) for those literal or unparsed tokens we want to support using these characters. The plus was not a problem but right slash is used for CIDR, minus for mac address separator, etc. There are still some corner case. 11-22-33-44-55-66 is a mac address and not the arithmetic expression with six terms "eleven minus twenty two minus etc." (if we ever support more than two terms in the grammar, which we don't currently). We lift some patterns from the flex manual to match on IPv4 and IPv6 (ugly) and add MAC address. Other hypothetical literal lexical values using [+-*/%] are already supported enclosed in angle brackets but the cases of MAC/IPv4/IPv6 are are very common and moreover we need to do the utmost to not break backward compatibily here. Before: $ dftest "_ws.ftypes.int32 == 1+2" dftest: "1+2" is not a valid number. After: $ dftest "_ws.ftypes.int32 == 1+2" Filter: _ws.ftypes.int32 == 1+2 Instructions: 00000 READ_TREE _ws.ftypes.int32 -> reg#0 00001 IF_FALSE_GOTO 4 00002 ADD 1 <FT_INT32> + 2 <FT_INT32> -> reg#1 00003 ANY_EQ reg#0 == reg#1 00004 RETURN
2022-04-04 18:58:35 +00:00
hex4 [[:xdigit:]]{4}
DotQuadMacAddress {hex4}\.{hex4}\.{hex4}
dfilter: Allow arithmetic expressions without spaces To allow an arithmetic expressions without spaces, such as "1+2", we cannot match the expression in other lexical rules using "+". Because of longest match this becomes the token LITERAL or UNPARSED with semantic value "1+2". The same goes for all the other arithmetic operators. So we need to remove [+-*/%] from "word chars" and add very specific patterns (that won't mistakenly match an arithmetic expression) for those literal or unparsed tokens we want to support using these characters. The plus was not a problem but right slash is used for CIDR, minus for mac address separator, etc. There are still some corner case. 11-22-33-44-55-66 is a mac address and not the arithmetic expression with six terms "eleven minus twenty two minus etc." (if we ever support more than two terms in the grammar, which we don't currently). We lift some patterns from the flex manual to match on IPv4 and IPv6 (ugly) and add MAC address. Other hypothetical literal lexical values using [+-*/%] are already supported enclosed in angle brackets but the cases of MAC/IPv4/IPv6 are are very common and moreover we need to do the utmost to not break backward compatibily here. Before: $ dftest "_ws.ftypes.int32 == 1+2" dftest: "1+2" is not a valid number. After: $ dftest "_ws.ftypes.int32 == 1+2" Filter: _ws.ftypes.int32 == 1+2 Instructions: 00000 READ_TREE _ws.ftypes.int32 -> reg#0 00001 IF_FALSE_GOTO 4 00002 ADD 1 <FT_INT32> + 2 <FT_INT32> -> reg#1 00003 ANY_EQ reg#0 == reg#1 00004 RETURN
2022-04-04 18:58:35 +00:00
ColonBytes ({hex2}:)|({hex2}(:{hex2})+)
HyphenBytes {hex2}(-{hex2})+
DotBytes {hex2}(\.{hex2})+
dfilter: Allow arithmetic expressions without spaces To allow an arithmetic expressions without spaces, such as "1+2", we cannot match the expression in other lexical rules using "+". Because of longest match this becomes the token LITERAL or UNPARSED with semantic value "1+2". The same goes for all the other arithmetic operators. So we need to remove [+-*/%] from "word chars" and add very specific patterns (that won't mistakenly match an arithmetic expression) for those literal or unparsed tokens we want to support using these characters. The plus was not a problem but right slash is used for CIDR, minus for mac address separator, etc. There are still some corner case. 11-22-33-44-55-66 is a mac address and not the arithmetic expression with six terms "eleven minus twenty two minus etc." (if we ever support more than two terms in the grammar, which we don't currently). We lift some patterns from the flex manual to match on IPv4 and IPv6 (ugly) and add MAC address. Other hypothetical literal lexical values using [+-*/%] are already supported enclosed in angle brackets but the cases of MAC/IPv4/IPv6 are are very common and moreover we need to do the utmost to not break backward compatibily here. Before: $ dftest "_ws.ftypes.int32 == 1+2" dftest: "1+2" is not a valid number. After: $ dftest "_ws.ftypes.int32 == 1+2" Filter: _ws.ftypes.int32 == 1+2 Instructions: 00000 READ_TREE _ws.ftypes.int32 -> reg#0 00001 IF_FALSE_GOTO 4 00002 ADD 1 <FT_INT32> + 2 <FT_INT32> -> reg#1 00003 ANY_EQ reg#0 == reg#1 00004 RETURN
2022-04-04 18:58:35 +00:00
DecOctet [0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5]
IPv4Address {DecOctet}\.{DecOctet}\.{DecOctet}\.{DecOctet}
dfilter: Allow arithmetic expressions without spaces To allow an arithmetic expressions without spaces, such as "1+2", we cannot match the expression in other lexical rules using "+". Because of longest match this becomes the token LITERAL or UNPARSED with semantic value "1+2". The same goes for all the other arithmetic operators. So we need to remove [+-*/%] from "word chars" and add very specific patterns (that won't mistakenly match an arithmetic expression) for those literal or unparsed tokens we want to support using these characters. The plus was not a problem but right slash is used for CIDR, minus for mac address separator, etc. There are still some corner case. 11-22-33-44-55-66 is a mac address and not the arithmetic expression with six terms "eleven minus twenty two minus etc." (if we ever support more than two terms in the grammar, which we don't currently). We lift some patterns from the flex manual to match on IPv4 and IPv6 (ugly) and add MAC address. Other hypothetical literal lexical values using [+-*/%] are already supported enclosed in angle brackets but the cases of MAC/IPv4/IPv6 are are very common and moreover we need to do the utmost to not break backward compatibily here. Before: $ dftest "_ws.ftypes.int32 == 1+2" dftest: "1+2" is not a valid number. After: $ dftest "_ws.ftypes.int32 == 1+2" Filter: _ws.ftypes.int32 == 1+2 Instructions: 00000 READ_TREE _ws.ftypes.int32 -> reg#0 00001 IF_FALSE_GOTO 4 00002 ADD 1 <FT_INT32> + 2 <FT_INT32> -> reg#1 00003 ANY_EQ reg#0 == reg#1 00004 RETURN
2022-04-04 18:58:35 +00:00
h16 [0-9A-Fa-f]{1,4}
ls32 {h16}:{h16}|{IPv4Address}
IPv6Address ({h16}:){6}{ls32}|::({h16}:){5}{ls32}|({h16})?::({h16}:){4}{ls32}|(({h16}:){0,1}{h16})?::({h16}:){3}{ls32}|(({h16}:){0,2}{h16})?::({h16}:){2}{ls32}|(({h16}:){0,3}{h16})?::{h16}:{ls32}|(({h16}:){0,4}{h16})?::{ls32}|(({h16}:){0,5}{h16})?::{h16}|(({h16}:){0,6}{h16})?::
dfilter: Allow arithmetic expressions without spaces To allow an arithmetic expressions without spaces, such as "1+2", we cannot match the expression in other lexical rules using "+". Because of longest match this becomes the token LITERAL or UNPARSED with semantic value "1+2". The same goes for all the other arithmetic operators. So we need to remove [+-*/%] from "word chars" and add very specific patterns (that won't mistakenly match an arithmetic expression) for those literal or unparsed tokens we want to support using these characters. The plus was not a problem but right slash is used for CIDR, minus for mac address separator, etc. There are still some corner case. 11-22-33-44-55-66 is a mac address and not the arithmetic expression with six terms "eleven minus twenty two minus etc." (if we ever support more than two terms in the grammar, which we don't currently). We lift some patterns from the flex manual to match on IPv4 and IPv6 (ugly) and add MAC address. Other hypothetical literal lexical values using [+-*/%] are already supported enclosed in angle brackets but the cases of MAC/IPv4/IPv6 are are very common and moreover we need to do the utmost to not break backward compatibily here. Before: $ dftest "_ws.ftypes.int32 == 1+2" dftest: "1+2" is not a valid number. After: $ dftest "_ws.ftypes.int32 == 1+2" Filter: _ws.ftypes.int32 == 1+2 Instructions: 00000 READ_TREE _ws.ftypes.int32 -> reg#0 00001 IF_FALSE_GOTO 4 00002 ADD 1 <FT_INT32> + 2 <FT_INT32> -> reg#1 00003 ANY_EQ reg#0 == reg#1 00004 RETURN
2022-04-04 18:58:35 +00:00
V4CidrPrefix \/[[:digit:]]{1,2}
V6CidrPrefix \/[[:digit:]]{1,3}
dfilter: Add special syntax for literals and names The syntax for protocols and some literals like numbers and bytes/addresses can be ambiguous. Some protocols can be parsed as a literal, for example the protocol "fc" (Fibre Channel) can be parsed as 0xFC. If a numeric protocol is registered that will also take precedence over any literal, according to the current rules, thereby breaking numerical comparisons to that number. The same for an hypothetical protocol named "true", etc. To allow the user to disambiguate this meaning introduce new syntax. Any value prefixed with ':' or enclosed in <,> will be treated as a literal value only. The value :fc or <fc> will always mean 0xFC, under any context. Never a protocol whose filter name is "fc". Likewise any value prefixed with a dot will always be parsed as an identifier (protocol or protocol field) in the language. Never any literal value parsed from the token "fc". This allows the user to be explicit about the meaning, and between the two explicit methods plus the ambiguous one it doesn't completely break any one meaning. The difference can be seen in the following two programs: Filter: frame == fc Constants: Instructions: 00000 READ_TREE frame -> reg#0 00001 IF-FALSE-GOTO 5 00002 READ_TREE fc -> reg#1 00003 IF-FALSE-GOTO 5 00004 ANY_EQ reg#0 == reg#1 00005 RETURN -------- Filter: frame == :fc Constants: 00000 PUT_FVALUE fc <FT_PROTOCOL> -> reg#1 Instructions: 00000 READ_TREE frame -> reg#0 00001 IF-FALSE-GOTO 3 00002 ANY_EQ reg#0 == reg#1 00003 RETURN The filter "frame == fc" is the same as "filter == .fc", according to the current heuristic, except the first form will try to parse it as a literal if the name does not correspond to any registered protocol. By treating a leading dot as a name in the language we necessarily disallow writing floats with a leading dot. We will also disallow writing with an ending dot when using unparsed values. This is a backward incompatibility but has the happy side effect of making the expression {1...2} unambiguous. This could either mean "1 .. .2" or "1. .. 2". If we require a leading and ending digit then the meaning is clear: 1.0..0.2 -> 1.0 .. 0.2 Fixes #17731.
2022-02-22 21:55:05 +00:00
/* Catch all valid semantic values. Cannot contain DOT DOT or start with MINUS. */
StartAlphabet [[:alnum:]_:]
Alphabet [[:alnum:]_:/-]
LiteralValue {StartAlphabet}{Alphabet}*(\.{Alphabet}+)*
%x RANGE
%x LAYER
%x DQUOTE
%x SQUOTE
%%
[[:blank:]\n\r]+ {
update_location(yyextra, yytext);
}
"(" return simple(TOKEN_LPAREN);
")" return simple(TOKEN_RPAREN);
"," return simple(TOKEN_COMMA);
"{" return simple(TOKEN_LBRACE);
".." return simple(TOKEN_DOTDOT);
"}" return simple(TOKEN_RBRACE);
"$" return simple(TOKEN_DOLLAR);
dfilter: Add support for raw (bytes) addressing mode This adds new syntax to read a field from the tree as bytes, instead of the actual type. This is a useful extension for example to match matformed strings that contain unicode replacement characters. In this case it is not possible to match the raw value of the malformed string field. This extension fills this need and is generic enough that it should be useful in many other situations. The syntax used is to prefix the field name with "@". The following artificial example tests if the HTTP user agent contains a particular invalid UTF-8 sequence: @http.user_agent == "Mozill\xAA" Where simply using "http.user_agent" won't work because the invalid byte sequence will have been replaced with U+FFFD. Considering the following programs: $ dftest '_ws.ftypes.string == "ABC"' Filter: _ws.ftypes.string == "ABC" Syntax tree: 0 TEST_ANY_EQ: 1 FIELD(_ws.ftypes.string <FT_STRING>) 1 FVALUE("ABC" <FT_STRING>) Instructions: 00000 READ_TREE _ws.ftypes.string <FT_STRING> -> reg#0 00001 IF_FALSE_GOTO 3 00002 ANY_EQ reg#0 == "ABC" <FT_STRING> 00003 RETURN $ dftest '@_ws.ftypes.string == "ABC"' Filter: @_ws.ftypes.string == "ABC" Syntax tree: 0 TEST_ANY_EQ: 1 FIELD(_ws.ftypes.string <RAW>) 1 FVALUE(41:42:43 <FT_BYTES>) Instructions: 00000 READ_TREE @_ws.ftypes.string <FT_BYTES> -> reg#0 00001 IF_FALSE_GOTO 3 00002 ANY_EQ reg#0 == 41:42:43 <FT_BYTES> 00003 RETURN In the second case the field has a "raw" type, that equates directly to FT_BYTES, and the field value is read from the protocol raw data.
2022-10-25 03:20:18 +00:00
"@" return simple(TOKEN_ATSIGN);
"any" return simple(TOKEN_ANY);
"all" return simple(TOKEN_ALL);
"==" return test(TOKEN_TEST_ANY_EQ);
"eq" return test(TOKEN_TEST_ANY_EQ);
"any_eq" return test(TOKEN_TEST_ANY_EQ);
"!=" return test(TOKEN_TEST_ALL_NE);
"ne" return test(TOKEN_TEST_ALL_NE);
"all_ne" return test(TOKEN_TEST_ALL_NE);
"===" return test(TOKEN_TEST_ALL_EQ);
"all_eq" return test(TOKEN_TEST_ALL_EQ);
"!==" return test(TOKEN_TEST_ANY_NE);
"~=" {
add_deprecated_token(yyextra->dfw, "The operator \"~=\" is deprecated, use \"!==\" instead.");
return test(TOKEN_TEST_ANY_NE);
}
"any_ne" return test(TOKEN_TEST_ANY_NE);
">" return test(TOKEN_TEST_GT);
"gt" return test(TOKEN_TEST_GT);
">=" return test(TOKEN_TEST_GE);
"ge" return test(TOKEN_TEST_GE);
"<" return test(TOKEN_TEST_LT);
"lt" return test(TOKEN_TEST_LT);
"<=" return test(TOKEN_TEST_LE);
"le" return test(TOKEN_TEST_LE);
"contains" return test(TOKEN_TEST_CONTAINS);
"~" return test(TOKEN_TEST_MATCHES);
"matches" return test(TOKEN_TEST_MATCHES);
"!" return test(TOKEN_TEST_NOT);
"not" return test(TOKEN_TEST_NOT);
"&&" return test(TOKEN_TEST_AND);
"and" return test(TOKEN_TEST_AND);
"||" return test(TOKEN_TEST_OR);
"or" return test(TOKEN_TEST_OR);
"in" return test(TOKEN_TEST_IN);
"+" return math(TOKEN_PLUS);
"-" return math(TOKEN_MINUS);
"*" return math(TOKEN_STAR);
"/" return math(TOKEN_RSLASH);
"%" return math(TOKEN_PERCENT);
"&" return math(TOKEN_BITWISE_AND);
"bitwise_and" return math(TOKEN_BITWISE_AND);
dfilter: Refactor macro tree references This replaces the current macro reference system with a completely different implementation. Instead of a macro a reference is a syntax element. A reference is a constant that can be filled in the dfilter code after compilation from an existing protocol tree. It is best understood as a field value that can be read from a fixed tree that is not the frame being filtered. Usually this fixed tree is the currently selected frame when the filter is applied. This allows comparing fields in the filtered frame with fields in the selected frame. Because the field reference syntax uses the same sigil notation as a macro we have to use a heuristic to distinguish them: if the name has a dot it is a field reference, otherwise it is a macro name. The reference is synctatically validated at compile time. There are two main advantages to this implementation (and a couple of minor ones): The protocol tree for each selected frame is only walked if we have a display filter and if the display filter uses references. Also only the actual reference values are copied, intead of loading the entire tree into a hash table (in textual form even). The other advantage is that the reference is tested like a protocol field against all the values in the selected frame (if there is more than one). Currently the reference fields are not "primed" during dissection, so the entire tree is walked to find a particular reference (this is similar to the previous implementation). If the display filter contains a valid reference and the reference is not loaded at the time the filter is run the result is the same as a non existing field for a regular READ_TREE instruction. Fixes #17599.
2022-03-27 14:26:46 +00:00
"#" {
BEGIN(LAYER);
return simple(TOKEN_HASH);
}
<LAYER>[[:digit:]]+ {
BEGIN(INITIAL);
update_location(yyextra, yytext);
return set_lval_simple(yyextra, TOKEN_INTEGER, yytext, STTYPE_UNINITIALIZED);
}
<LAYER>[^[:digit:][] {
update_location(yyextra, yytext);
FAIL("Expected digit or \"[\", not \"%s\"", yytext);
return SCAN_FAILED;
}
<INITIAL,LAYER>"[" {
BEGIN(RANGE);
return simple(TOKEN_LBRACKET);
}
<RANGE>[^],]+ {
update_location(yyextra, yytext);
return set_lval_simple(yyextra, TOKEN_RANGE_NODE, yytext, STTYPE_UNINITIALIZED);
}
<RANGE>"," {
return simple(TOKEN_COMMA);
}
<RANGE>"]" {
BEGIN(INITIAL);
return simple(TOKEN_RBRACKET);
}
<RANGE><<EOF>> {
update_location(yyextra, yytext);
FAIL("The right bracket was missing from a slice.");
return SCAN_FAILED;
}
[rR]{0,1}\042 {
/* start quote of a quoted string */
2021-10-31 15:52:05 +00:00
/*
* The example of how to scan for strings was taken from
* the flex manual, from the section "Start Conditions".
* See: https://westes.github.io/flex/manual/Start-Conditions.html
*/
BEGIN(DQUOTE);
update_location(yyextra, yytext);
yyextra->string_loc = yyextra->location;
yyextra->quoted_string = g_string_new(NULL);
2021-10-31 15:52:05 +00:00
if (yytext[0] == 'r' || yytext[0] == 'R') {
/*
* This is a raw string (like in Python). Rules: 1) The two
* escape sequences are \\ and \". 2) Backslashes are
* preserved. 3) Double quotes in the string must be escaped.
* Corollary: Strings cannot end with an odd number of
* backslashes.
* Example: r"a\b\x12\"\\" is the string (including the implicit NUL terminator)
* {'a', '\\', 'b', '\\', 'x', '1', '2', '\\', '"', '\\'. '\\', '\0'}
*/
yyextra->raw_string = TRUE;
}
else {
yyextra->raw_string = FALSE;
}
}
<DQUOTE><<EOF>> {
/* unterminated string */
update_string_loc(yyextra, yytext);
2021-10-31 15:52:05 +00:00
g_string_free(yyextra->quoted_string, TRUE);
yyextra->quoted_string = NULL;
FAIL("The final quote was missing from a quoted string.");
return SCAN_FAILED;
}
<DQUOTE>\042 {
/* end quote */
BEGIN(INITIAL);
update_string_loc(yyextra, yytext);
2022-07-03 21:33:29 +00:00
int token = set_lval_quoted_string(yyextra, yyextra->quoted_string);
yyextra->quoted_string = NULL;
yyextra->string_loc.col_start = -1;
return token;
}
<DQUOTE>\\[0-7]{1,3} {
/* octal sequence */
update_string_loc(yyextra, yytext);
if (yyextra->raw_string) {
g_string_append(yyextra->quoted_string, yytext);
}
else {
unsigned long result;
result = strtoul(yytext + 1, NULL, 8);
if (result > 0xff) {
g_string_free(yyextra->quoted_string, TRUE);
yyextra->quoted_string = NULL;
FAIL("%s is larger than 255.", yytext);
return SCAN_FAILED;
}
g_string_append_c(yyextra->quoted_string, (gchar) result);
}
}
<DQUOTE>\\x[[:xdigit:]]{1,2} {
/* hex sequence */
/*
* C standard does not place a limit on the number of hex
* digits after \x... but we do. \xNN can have 1 or two Ns, not more.
*/
update_string_loc(yyextra, yytext);
if (yyextra->raw_string) {
g_string_append(yyextra->quoted_string, yytext);
}
else {
unsigned long result;
result = strtoul(yytext + 2, NULL, 16);
g_string_append_c(yyextra->quoted_string, (gchar) result);
}
}
<DQUOTE>\\u[[:xdigit:]]{0,4} {
/* universal character name */
update_string_loc(yyextra, yytext);
if (yyextra->raw_string) {
g_string_append(yyextra->quoted_string, yytext);
}
else if (!append_universal_character_name(yyextra, yyextra->quoted_string, yytext)) {
g_string_free(yyextra->quoted_string, TRUE);
yyextra->quoted_string = NULL;
return SCAN_FAILED;
}
}
<DQUOTE>\\U[[:xdigit:]]{0,8} {
/* universal character name */
update_string_loc(yyextra, yytext);
if (yyextra->raw_string) {
g_string_append(yyextra->quoted_string, yytext);
}
else if (!append_universal_character_name(yyextra, yyextra->quoted_string, yytext)) {
g_string_free(yyextra->quoted_string, TRUE);
yyextra->quoted_string = NULL;
return SCAN_FAILED;
}
}
<DQUOTE>\\. {
/* escaped character */
update_string_loc(yyextra, yytext);
if (yyextra->raw_string) {
g_string_append(yyextra->quoted_string, yytext);
}
else if (!append_escaped_char(yyextra, yyextra->quoted_string, yytext[1])) {
2021-11-24 09:54:17 +00:00
g_string_free(yyextra->quoted_string, TRUE);
yyextra->quoted_string = NULL;
return SCAN_FAILED;
}
}
<DQUOTE>[^\\\042]+ {
/* non-escaped string */
update_string_loc(yyextra, yytext);
g_string_append(yyextra->quoted_string, yytext);
}
\047 {
/* start quote of a quoted character value */
BEGIN(SQUOTE);
update_location(yyextra, yytext);
yyextra->string_loc = yyextra->location;
yyextra->quoted_string = g_string_new("'");
}
<SQUOTE><<EOF>> {
/* unterminated character value */
update_string_loc(yyextra, yytext);
2021-10-31 15:52:05 +00:00
g_string_free(yyextra->quoted_string, TRUE);
yyextra->quoted_string = NULL;
FAIL("The final quote was missing from a character constant.");
return SCAN_FAILED;
}
<SQUOTE>\047 {
/* end quote */
BEGIN(INITIAL);
update_string_loc(yyextra, yytext);
g_string_append_c(yyextra->quoted_string, '\'');
2022-07-03 21:33:29 +00:00
int token = set_lval_charconst(yyextra, yyextra->quoted_string);
yyextra->quoted_string = NULL;
yyextra->string_loc.col_start = -1;
return token;
}
<SQUOTE>\\. {
/* escaped character */
update_string_loc(yyextra, yytext);
g_string_append(yyextra->quoted_string, yytext);
}
<SQUOTE>[^\\\047]+ {
/* non-escaped string */
update_string_loc(yyextra, yytext);
g_string_append(yyextra->quoted_string, yytext);
}
/* NOTE: None of the patterns below can match ".." anywhere in the token string. */
/* MAC address. */
{ColonMacAddress}|{HyphenMacAddress} {
2022-04-06 13:58:10 +00:00
/* MAC Address. */
update_location(yyextra, yytext);
return set_lval_literal(yyextra, yytext, yytext);
}
{DotMacAddress}|{DotQuadMacAddress} {
/* MAC Address, can also be a field. */
update_location(yyextra, yytext);
return set_lval_unparsed(yyextra, yytext, yytext);
dfilter: Allow arithmetic expressions without spaces To allow an arithmetic expressions without spaces, such as "1+2", we cannot match the expression in other lexical rules using "+". Because of longest match this becomes the token LITERAL or UNPARSED with semantic value "1+2". The same goes for all the other arithmetic operators. So we need to remove [+-*/%] from "word chars" and add very specific patterns (that won't mistakenly match an arithmetic expression) for those literal or unparsed tokens we want to support using these characters. The plus was not a problem but right slash is used for CIDR, minus for mac address separator, etc. There are still some corner case. 11-22-33-44-55-66 is a mac address and not the arithmetic expression with six terms "eleven minus twenty two minus etc." (if we ever support more than two terms in the grammar, which we don't currently). We lift some patterns from the flex manual to match on IPv4 and IPv6 (ugly) and add MAC address. Other hypothetical literal lexical values using [+-*/%] are already supported enclosed in angle brackets but the cases of MAC/IPv4/IPv6 are are very common and moreover we need to do the utmost to not break backward compatibily here. Before: $ dftest "_ws.ftypes.int32 == 1+2" dftest: "1+2" is not a valid number. After: $ dftest "_ws.ftypes.int32 == 1+2" Filter: _ws.ftypes.int32 == 1+2 Instructions: 00000 READ_TREE _ws.ftypes.int32 -> reg#0 00001 IF_FALSE_GOTO 4 00002 ADD 1 <FT_INT32> + 2 <FT_INT32> -> reg#1 00003 ANY_EQ reg#0 == reg#1 00004 RETURN
2022-04-04 18:58:35 +00:00
}
/* IP address. */
{IPv4Address}{V4CidrPrefix}? {
/* IPv4 with or without prefix. */
update_location(yyextra, yytext);
return set_lval_literal(yyextra, yytext, yytext);
dfilter: Add special syntax for literals and names The syntax for protocols and some literals like numbers and bytes/addresses can be ambiguous. Some protocols can be parsed as a literal, for example the protocol "fc" (Fibre Channel) can be parsed as 0xFC. If a numeric protocol is registered that will also take precedence over any literal, according to the current rules, thereby breaking numerical comparisons to that number. The same for an hypothetical protocol named "true", etc. To allow the user to disambiguate this meaning introduce new syntax. Any value prefixed with ':' or enclosed in <,> will be treated as a literal value only. The value :fc or <fc> will always mean 0xFC, under any context. Never a protocol whose filter name is "fc". Likewise any value prefixed with a dot will always be parsed as an identifier (protocol or protocol field) in the language. Never any literal value parsed from the token "fc". This allows the user to be explicit about the meaning, and between the two explicit methods plus the ambiguous one it doesn't completely break any one meaning. The difference can be seen in the following two programs: Filter: frame == fc Constants: Instructions: 00000 READ_TREE frame -> reg#0 00001 IF-FALSE-GOTO 5 00002 READ_TREE fc -> reg#1 00003 IF-FALSE-GOTO 5 00004 ANY_EQ reg#0 == reg#1 00005 RETURN -------- Filter: frame == :fc Constants: 00000 PUT_FVALUE fc <FT_PROTOCOL> -> reg#1 Instructions: 00000 READ_TREE frame -> reg#0 00001 IF-FALSE-GOTO 3 00002 ANY_EQ reg#0 == reg#1 00003 RETURN The filter "frame == fc" is the same as "filter == .fc", according to the current heuristic, except the first form will try to parse it as a literal if the name does not correspond to any registered protocol. By treating a leading dot as a name in the language we necessarily disallow writing floats with a leading dot. We will also disallow writing with an ending dot when using unparsed values. This is a backward incompatibility but has the happy side effect of making the expression {1...2} unambiguous. This could either mean "1 .. .2" or "1. .. 2". If we require a leading and ending digit then the meaning is clear: 1.0..0.2 -> 1.0 .. 0.2 Fixes #17731.
2022-02-22 21:55:05 +00:00
}
{IPv6Address}{V6CidrPrefix}? {
/* IPv6 with or without prefix. */
update_location(yyextra, yytext);
return set_lval_literal(yyextra, yytext, yytext);
}
/* Integer or bytes */
0[bBoOxX][[:xdigit:]]+ {
/* Binary/octal/hex integer. */
update_location(yyextra, yytext);
return set_lval_literal(yyextra, yytext, yytext);
}
:[[:xdigit:]]+ {
/* Numeric prefixed with ':'. */
update_location(yyextra, yytext);
return set_lval_literal(yyextra, yytext + 1, yytext);
}
[[:xdigit:]]+ {
/* Numeric or field. */
update_location(yyextra, yytext);
return set_lval_unparsed(yyextra, yytext, yytext);
}
/* Floating point. */
[[:digit:]]+\.[[:digit:]]+([eE][+-]?[[:digit:]]+)? {
/* Decimal float with optional exponent. */
/* Significand cannot have any side omitted. */
update_location(yyextra, yytext);
return set_lval_unparsed(yyextra, yytext, yytext);
}
0[xX][[:xdigit:]]+\.[[:xdigit:]]+([pP][+-]?[[:digit:]]+)? {
/* Hexadecimal float with optional exponent. Can't be a field because
* field cannot beging with 0x. */
/* Significand cannot have any side omitted. */
update_location(yyextra, yytext);
return set_lval_literal(yyextra, yytext, yytext);
dfilter: Add special syntax for literals and names The syntax for protocols and some literals like numbers and bytes/addresses can be ambiguous. Some protocols can be parsed as a literal, for example the protocol "fc" (Fibre Channel) can be parsed as 0xFC. If a numeric protocol is registered that will also take precedence over any literal, according to the current rules, thereby breaking numerical comparisons to that number. The same for an hypothetical protocol named "true", etc. To allow the user to disambiguate this meaning introduce new syntax. Any value prefixed with ':' or enclosed in <,> will be treated as a literal value only. The value :fc or <fc> will always mean 0xFC, under any context. Never a protocol whose filter name is "fc". Likewise any value prefixed with a dot will always be parsed as an identifier (protocol or protocol field) in the language. Never any literal value parsed from the token "fc". This allows the user to be explicit about the meaning, and between the two explicit methods plus the ambiguous one it doesn't completely break any one meaning. The difference can be seen in the following two programs: Filter: frame == fc Constants: Instructions: 00000 READ_TREE frame -> reg#0 00001 IF-FALSE-GOTO 5 00002 READ_TREE fc -> reg#1 00003 IF-FALSE-GOTO 5 00004 ANY_EQ reg#0 == reg#1 00005 RETURN -------- Filter: frame == :fc Constants: 00000 PUT_FVALUE fc <FT_PROTOCOL> -> reg#1 Instructions: 00000 READ_TREE frame -> reg#0 00001 IF-FALSE-GOTO 3 00002 ANY_EQ reg#0 == reg#1 00003 RETURN The filter "frame == fc" is the same as "filter == .fc", according to the current heuristic, except the first form will try to parse it as a literal if the name does not correspond to any registered protocol. By treating a leading dot as a name in the language we necessarily disallow writing floats with a leading dot. We will also disallow writing with an ending dot when using unparsed values. This is a backward incompatibility but has the happy side effect of making the expression {1...2} unambiguous. This could either mean "1 .. .2" or "1. .. 2". If we require a leading and ending digit then the meaning is clear: 1.0..0.2 -> 1.0 .. 0.2 Fixes #17731.
2022-02-22 21:55:05 +00:00
}
/* Bytes. */
:?{ColonBytes} {
/* Bytes. */
update_location(yyextra, yytext);
if (yytext[0] == ':')
return set_lval_literal(yyextra, yytext + 1, yytext);
return set_lval_literal(yyextra, yytext, yytext);
dfilter: Allow arithmetic expressions without spaces To allow an arithmetic expressions without spaces, such as "1+2", we cannot match the expression in other lexical rules using "+". Because of longest match this becomes the token LITERAL or UNPARSED with semantic value "1+2". The same goes for all the other arithmetic operators. So we need to remove [+-*/%] from "word chars" and add very specific patterns (that won't mistakenly match an arithmetic expression) for those literal or unparsed tokens we want to support using these characters. The plus was not a problem but right slash is used for CIDR, minus for mac address separator, etc. There are still some corner case. 11-22-33-44-55-66 is a mac address and not the arithmetic expression with six terms "eleven minus twenty two minus etc." (if we ever support more than two terms in the grammar, which we don't currently). We lift some patterns from the flex manual to match on IPv4 and IPv6 (ugly) and add MAC address. Other hypothetical literal lexical values using [+-*/%] are already supported enclosed in angle brackets but the cases of MAC/IPv4/IPv6 are are very common and moreover we need to do the utmost to not break backward compatibily here. Before: $ dftest "_ws.ftypes.int32 == 1+2" dftest: "1+2" is not a valid number. After: $ dftest "_ws.ftypes.int32 == 1+2" Filter: _ws.ftypes.int32 == 1+2 Instructions: 00000 READ_TREE _ws.ftypes.int32 -> reg#0 00001 IF_FALSE_GOTO 4 00002 ADD 1 <FT_INT32> + 2 <FT_INT32> -> reg#1 00003 ANY_EQ reg#0 == reg#1 00004 RETURN
2022-04-04 18:58:35 +00:00
}
:?{HyphenBytes} {
/* Bytes. */
update_location(yyextra, yytext);
if (yytext[0] == ':')
return set_lval_literal(yyextra, yytext + 1, yytext);
return set_lval_literal(yyextra, yytext, yytext);
2022-04-06 13:58:10 +00:00
}
:?{DotBytes} {
/* DotBytes, can be a field without ':' prefix. */
update_location(yyextra, yytext);
if (yytext[0] == ':')
return set_lval_literal(yyextra, yytext + 1, yytext);
return set_lval_unparsed(yyextra, yytext, yytext);
}
/* Identifier (protocol/field/function name). */
/* This must come before FieldIdentifier to match function names. */
{FunctionIdentifier} {
/* Identifier (field or function) or constant (bytes without separator). */
/* We use CONSTANT instead of LITERAL because the difference is significant
* in the syntactical grammar. */
update_location(yyextra, yytext);
header_field_info *hfinfo = dfilter_resolve_unparsed(yyextra->dfw, yytext);
if (hfinfo != NULL) {
return set_lval_identifier(yyextra, yytext, yytext);
}
df_func_def_t *def = df_func_lookup(yytext);
if (def != NULL) {
return set_lval_identifier(yyextra, yytext, yytext);
}
return set_lval_constant(yyextra, yytext, yytext);
}
\.{ProtoFieldIdentifier} {
/* Identifier, prefixed with a '.'. */
update_location(yyextra, yytext);
const char *name = yytext + 1;
header_field_info *hfinfo = dfilter_resolve_unparsed(yyextra->dfw, name);
if (hfinfo == NULL) {
FAIL("\"%s\" is not a valid protocol or protocol field.", name);
return SCAN_FAILED;
}
return set_lval_field(yyextra, hfinfo, yytext);
}
{ProtoFieldIdentifier} {
/* Catch-all for protocol values. Can also be a literal. */
update_location(yyextra, yytext);
return set_lval_unparsed(yyextra, yytext, yytext);
}
{LiteralValue} {
/* Catch-all for semantic values. */
update_location(yyextra, yytext);
/* We use literal here because identifiers (using unparsed) should have
* matched one of the previous rules. */
return set_lval_literal(yyextra, yytext, yytext);
}
. {
/* Default */
update_location(yyextra, yytext);
if (isprint_string(yytext))
FAIL("\"%s\" was unexpected in this context.", yytext);
else
FAIL("Non-printable ASCII characters may only appear inside double-quotes.");
return SCAN_FAILED;
}
%%
/*
* Turn diagnostics back on, so we check the code that we've written.
*/
DIAG_ON_FLEX()
static void
_update_location(df_scanner_state_t *state, size_t len)
{
state->location.col_start += (long)state->location.col_len;
state->location.col_len = len;
}
static void
update_location(df_scanner_state_t *state, const char *text)
{
_update_location(state, strlen(text));
}
static void
update_string_loc(df_scanner_state_t *state, const char *text)
{
size_t len = strlen(text);
state->string_loc.col_len += len;
_update_location(state, len);
}
static int
set_lval_simple(df_scanner_state_t *state, int token, const char *token_value, sttype_id_t type_id)
{
2023-01-02 00:52:21 +00:00
state->df_lval = stnode_new(type_id, NULL, g_strdup(token_value), state->location);
return token;
}
static int
set_lval_literal(df_scanner_state_t *state, const char *value, const char *token_value)
{
state->df_lval = stnode_new(STTYPE_LITERAL, g_strdup(value), g_strdup(token_value), state->location);
2022-07-03 21:33:29 +00:00
return TOKEN_LITERAL;
}
static int
set_lval_identifier(df_scanner_state_t *state, const char *value, const char *token_value)
{
state->df_lval = stnode_new(STTYPE_LITERAL, g_strdup(value), g_strdup(token_value), state->location);
return TOKEN_IDENTIFIER;
}
static int
set_lval_constant(df_scanner_state_t *state, const char *value, const char *token_value)
{
state->df_lval = stnode_new(STTYPE_LITERAL, g_strdup(value), g_strdup(token_value), state->location);
return TOKEN_CONSTANT;
}
2022-07-03 21:33:29 +00:00
static int
set_lval_unparsed(df_scanner_state_t *state, const char *value, const char *token_value)
2022-07-03 21:33:29 +00:00
{
int token;
const header_field_info *hfinfo = dfilter_resolve_unparsed(state->dfw, value);
if (hfinfo != NULL) {
token = set_lval_field(state, hfinfo, token_value);
}
else {
token = set_lval_literal(state, value, token_value);
}
stnode_set_flags(state->df_lval, STFLAG_UNPARSED);
return token;
}
static int
set_lval_field(df_scanner_state_t *state, const header_field_info *hfinfo, const char *token_value)
{
state->df_lval = stnode_new(STTYPE_FIELD, (gpointer)hfinfo, g_strdup(token_value), state->location);
return TOKEN_FIELD;
}
static int
2022-07-03 21:33:29 +00:00
set_lval_quoted_string(df_scanner_state_t *state, GString *quoted_string)
{
2022-07-03 21:33:29 +00:00
char *token_value;
2022-07-03 21:33:29 +00:00
token_value = ws_escape_string_len(NULL, quoted_string->str, quoted_string->len, true);
2023-01-02 00:52:21 +00:00
state->df_lval = stnode_new(STTYPE_STRING, quoted_string, token_value, state->string_loc);
2022-07-03 21:33:29 +00:00
return TOKEN_STRING;
}
static int
2022-07-03 21:33:29 +00:00
set_lval_charconst(df_scanner_state_t *state, GString *quoted_string)
{
unsigned long number;
gboolean ok;
char *token_value = g_string_free(quoted_string, FALSE);
ok = parse_charconst(state, token_value, &number);
if (!ok) {
g_free(token_value);
return SCAN_FAILED;
}
2023-01-02 00:52:21 +00:00
state->df_lval = stnode_new(STTYPE_CHARCONST, g_memdup2(&number, sizeof(number)), token_value, state->string_loc);
2022-07-03 21:33:29 +00:00
return TOKEN_CHARCONST;
}
static gboolean
append_escaped_char(df_scanner_state_t *state, GString *str, char c)
{
switch (c) {
case 'a':
c = '\a';
break;
case 'b':
c = '\b';
break;
case 'f':
c = '\f';
break;
case 'n':
c = '\n';
break;
case 'r':
c = '\r';
break;
case 't':
c = '\t';
break;
case 'v':
c = '\v';
break;
case '\\':
case '\'':
case '\"':
break;
default:
dfilter_fail(state->dfw, DF_ERROR_GENERIC, state->location,
"\\%c is not a valid character escape sequence", c);
return FALSE;
}
g_string_append_c(str, c);
return TRUE;
}
static gboolean
parse_universal_character_name(df_scanner_state_t *state _U_, const char *str, char **ret_endptr, gunichar *valuep)
{
guint64 val;
char *endptr;
int ndigits;
if (str[0] != '\\')
return FALSE;
if (str[1] == 'u')
ndigits = 4;
else if (str[1] == 'U')
ndigits = 8;
else
return FALSE;
for (int i = 2; i < ndigits + 2; i++) {
if (!g_ascii_isxdigit(str[i])) {
return FALSE;
}
}
errno = 0;
val = g_ascii_strtoull(str + 2, &endptr, 16); /* skip leading 'u' or 'U' */
if (errno != 0 || endptr == str || val > G_MAXUINT32) {
return FALSE;
}
/*
* Ref: https://en.cppreference.com/w/c/language/escape
* Range of universal character names
*
* If a universal character name corresponds to a code point that is
* not 0x24 ($), 0x40 (@), nor 0x60 (`) and less than 0xA0, or a
* surrogate code point (the range 0xD800-0xDFFF, inclusive), or
* greater than 0x10FFFF, i.e. not a Unicode code point (since C23),
* the program is ill-formed. In other words, members of basic source
* character set and control characters (in ranges 0x0-0x1F and
* 0x7F-0x9F) cannot be expressed in universal character names.
*/
if (val < 0xA0 && val != 0x24 && val != 0x40 && val != 0x60)
return FALSE;
else if (val >= 0xD800 && val <= 0xDFFF)
return FALSE;
else if (val > 0x10FFFF)
return FALSE;
*valuep = (gunichar)val;
if (ret_endptr)
*ret_endptr = endptr;
return TRUE;
}
static gboolean
append_universal_character_name(df_scanner_state_t *state, GString *str, const char *ucn)
{
gunichar val;
if (!parse_universal_character_name(state, ucn, NULL, &val)) {
dfilter_fail(state->dfw, DF_ERROR_GENERIC, state->location, "%s is not a valid universal character name", ucn);
return FALSE;
}
g_string_append_unichar(str, val);
return TRUE;
}
static gboolean
parse_charconst(df_scanner_state_t *state, const char *s, unsigned long *valuep)
{
const char *cp;
unsigned long value;
gunichar unival;
char *endptr;
cp = s + 1; /* skip the leading ' */
if (*cp == '\'') {
dfilter_fail(state->dfw, DF_ERROR_GENERIC, state->string_loc, "Empty character constant.");
return FALSE;
}
if (*cp == '\\') {
/*
* C escape sequence.
* An escape sequence is an octal number \NNN,
* an hex number \xNN, or one of \' \" \\ \a \b \f \n \r \t \v
* that stands for the byte value of the equivalent
* C-escape in ASCII encoding.
*/
cp++;
switch (*cp) {
case '\0':
dfilter_fail(state->dfw, DF_ERROR_GENERIC, state->string_loc, "%s isn't a valid character constant.", s);
return FALSE;
case 'a':
value = '\a';
cp++;
break;
case 'b':
value = '\b';
cp++;
break;
case 'f':
value = '\f';
cp++;
break;
case 'n':
value = '\n';
break;
case 'r':
value = '\r';
cp++;
break;
case 't':
value = '\t';
cp++;
break;
case 'v':
value = '\v';
cp++;
break;
case '\'':
value = '\'';
cp++;
break;
case '\\':
value = '\\';
cp++;
break;
case '"':
value = '"';
cp++;
break;
case 'x':
cp++;
if (*cp >= '0' && *cp <= '9')
value = *cp - '0';
else if (*cp >= 'A' && *cp <= 'F')
value = 10 + (*cp - 'A');
else if (*cp >= 'a' && *cp <= 'f')
value = 10 + (*cp - 'a');
else {
dfilter_fail(state->dfw, DF_ERROR_GENERIC, state->string_loc, "%s isn't a valid character constant.", s);
return FALSE;
}
cp++;
if (*cp != '\'') {
value <<= 4;
if (*cp >= '0' && *cp <= '9')
value |= *cp - '0';
else if (*cp >= 'A' && *cp <= 'F')
value |= 10 + (*cp - 'A');
else if (*cp >= 'a' && *cp <= 'f')
value |= 10 + (*cp - 'a');
else {
dfilter_fail(state->dfw, DF_ERROR_GENERIC, state->string_loc, "%s isn't a valid character constant.", s);
return FALSE;
}
}
cp++;
break;
case 'u':
case 'U':
if (!parse_universal_character_name(state, s+1, &endptr, &unival)) {
dfilter_fail(state->dfw, DF_ERROR_GENERIC, state->string_loc, "%s is not a valid universal character name", s);
return FALSE;
}
value = (unsigned long)unival;
cp = endptr;
break;
default:
/* Octal */
if (*cp >= '0' && *cp <= '7')
value = *cp - '0';
else {
dfilter_fail(state->dfw, DF_ERROR_GENERIC, state->string_loc, "%s isn't a valid character constant.", s);
return FALSE;
}
if (*(cp + 1) != '\'') {
cp++;
value <<= 3;
if (*cp >= '0' && *cp <= '7')
value |= *cp - '0';
else {
dfilter_fail(state->dfw, DF_ERROR_GENERIC, state->string_loc, "%s isn't a valid character constant.", s);
return FALSE;
}
if (*(cp + 1) != '\'') {
cp++;
value <<= 3;
if (*cp >= '0' && *cp <= '7')
value |= *cp - '0';
else {
dfilter_fail(state->dfw, DF_ERROR_GENERIC, state->string_loc, "%s isn't a valid character constant.", s);
return FALSE;
}
}
}
if (value > 0xFF) {
dfilter_fail(state->dfw, DF_ERROR_GENERIC, state->string_loc, "%s is too large to be a valid character constant.", s);
return FALSE;
}
cp++;
}
} else {
value = *cp++;
if (!g_ascii_isprint(value)) {
dfilter_fail(state->dfw, DF_ERROR_GENERIC, state->string_loc, "Non-printable value '0x%02lx' in character constant.", value);
return FALSE;
}
}
if ((*cp != '\'') || (*(cp + 1) != '\0')){
dfilter_fail(state->dfw, DF_ERROR_GENERIC, state->string_loc, "%s is too long to be a valid character constant.", s);
return FALSE;
}
*valuep = value;
return TRUE;
}