forked from osmocom/wireshark
dfilter: Allow arithmetic expressions without spaces
To allow an arithmetic expressions without spaces, such as "1+2", we cannot match the expression in other lexical rules using "+". Because of longest match this becomes the token LITERAL or UNPARSED with semantic value "1+2". The same goes for all the other arithmetic operators. So we need to remove [+-*/%] from "word chars" and add very specific patterns (that won't mistakenly match an arithmetic expression) for those literal or unparsed tokens we want to support using these characters. The plus was not a problem but right slash is used for CIDR, minus for mac address separator, etc. There are still some corner case. 11-22-33-44-55-66 is a mac address and not the arithmetic expression with six terms "eleven minus twenty two minus etc." (if we ever support more than two terms in the grammar, which we don't currently). We lift some patterns from the flex manual to match on IPv4 and IPv6 (ugly) and add MAC address. Other hypothetical literal lexical values using [+-*/%] are already supported enclosed in angle brackets but the cases of MAC/IPv4/IPv6 are are very common and moreover we need to do the utmost to not break backward compatibily here. Before: $ dftest "_ws.ftypes.int32 == 1+2" dftest: "1+2" is not a valid number. After: $ dftest "_ws.ftypes.int32 == 1+2" Filter: _ws.ftypes.int32 == 1+2 Instructions: 00000 READ_TREE _ws.ftypes.int32 -> reg#0 00001 IF_FALSE_GOTO 4 00002 ADD 1 <FT_INT32> + 2 <FT_INT32> -> reg#1 00003 ANY_EQ reg#0 == reg#1 00004 RETURN
This commit is contained in:
parent
34ad6bb478
commit
330d408328
|
@ -98,7 +98,23 @@ static gboolean parse_charconst(dfwork_t *dfw, const char *s, unsigned long *val
|
|||
|
||||
%}
|
||||
|
||||
WORD_CHAR [[:alnum:]_:/+-]
|
||||
WORD_CHAR [[:alnum:]_]
|
||||
|
||||
hex2 [[:xdigit:]]{2}
|
||||
MacAddress {hex2}:{hex2}:{hex2}:{hex2}:{hex2}:{hex2}|{hex2}-{hex2}-{hex2}-{hex2}-{hex2}-{hex2}|{hex2}\.{hex2}\.{hex2}\.{hex2}\.{hex2}\.{hex2}
|
||||
|
||||
hex4 [[:xdigit:]]{4}
|
||||
QuadMacAddress {hex4}\.{hex4}\.{hex4}
|
||||
|
||||
dec-octet [0-9]|[1-9][0-9]|1[0-9][0-9]|2[0-4][0-9]|25[0-5]
|
||||
IPv4address {dec-octet}\.{dec-octet}\.{dec-octet}\.{dec-octet}
|
||||
|
||||
h16 [0-9A-Fa-f]{1,4}
|
||||
ls32 {h16}:{h16}|{IPv4address}
|
||||
IPv6address ({h16}:){6}{ls32}|::({h16}:){5}{ls32}|({h16})?::({h16}:){4}{ls32}|(({h16}:){0,1}{h16})?::({h16}:){3}{ls32}|(({h16}:){0,2}{h16})?::({h16}:){2}{ls32}|(({h16}:){0,3}{h16})?::{h16}:{ls32}|(({h16}:){0,4}{h16})?::{ls32}|(({h16}:){0,5}{h16})?::{h16}|(({h16}:){0,6}{h16})?::
|
||||
|
||||
v4-cidr-prefix \/[[:digit:]]{1,2}
|
||||
v6-cidr-prefix \/[[:digit:]]{1,3}
|
||||
|
||||
%x RANGE
|
||||
%x DQUOTE
|
||||
|
@ -347,23 +363,37 @@ WORD_CHAR [[:alnum:]_:/+-]
|
|||
|
||||
/* None of the patterns below can match ".." anywhere in the token string. */
|
||||
|
||||
:{WORD_CHAR}+(\.{WORD_CHAR}+)* {
|
||||
/* Literal. */
|
||||
{MacAddress}|{QuadMacAddress} {
|
||||
/* MAC Address literal or unparsed if using dots. */
|
||||
return set_lval_str(TOKEN_UNPARSED, yytext);
|
||||
}
|
||||
|
||||
{IPv4address}{v4-cidr-prefix} {
|
||||
/* IPv4 CIDR. */
|
||||
return set_lval_str(TOKEN_LITERAL, yytext);
|
||||
}
|
||||
|
||||
{IPv6address}{v6-cidr-prefix} {
|
||||
/* IPv6 CIDR. */
|
||||
return set_lval_str(TOKEN_LITERAL, yytext);
|
||||
}
|
||||
|
||||
[[:xdigit:]]+:[[:xdigit:]:]* {
|
||||
/* Bytes or address (IPv6, etc). */
|
||||
return set_lval_str(TOKEN_LITERAL, yytext);
|
||||
}
|
||||
|
||||
"<"[^>=]+">" {
|
||||
/* Literal. */
|
||||
/* Literal in angle brackets. */
|
||||
return set_lval_str(TOKEN_LITERAL, yytext);
|
||||
}
|
||||
|
||||
\.{WORD_CHAR}+(\.{WORD_CHAR}+)* {
|
||||
/* Identifier */
|
||||
return set_lval_str(TOKEN_IDENTIFIER, yytext);
|
||||
}
|
||||
|
||||
[[:alnum:]_]{WORD_CHAR}*(\.{WORD_CHAR}+)* {
|
||||
/* Unparsed word token. */
|
||||
[:.]?{WORD_CHAR}+(\.{WORD_CHAR}+)* {
|
||||
/* Identifier or literal or unparsed. */
|
||||
if (yytext[0] == '.')
|
||||
return set_lval_str(TOKEN_IDENTIFIER, yytext);
|
||||
if (yytext[0] == ':')
|
||||
return set_lval_str(TOKEN_LITERAL, yytext);
|
||||
return set_lval_str(TOKEN_UNPARSED, yytext);
|
||||
}
|
||||
|
||||
|
|
|
@ -198,6 +198,10 @@ class case_arithmetic(unittest.TestCase):
|
|||
dfilter = "udp.dstport == 66 + 1"
|
||||
checkDFilterCount(dfilter, 2)
|
||||
|
||||
def test_add_3(self, checkDFilterCount):
|
||||
dfilter = "udp.dstport == 66+1"
|
||||
checkDFilterCount(dfilter, 2)
|
||||
|
||||
def test_add_3(self, checkDFilterFail):
|
||||
error = 'Constant arithmetic expression on the LHS is invalid'
|
||||
dfilter = "2 + 3 == frame.number"
|
||||
|
|
Loading…
Reference in New Issue