dfilter: Improve grammar to parse ranges

Do the integer conversion for ranges in the parser. This is more
conventional, I think, and allows removing the unnecessary integer
syntax tree node type.

Try to minimize the number and complexity of lexical rules for
ranges. But it seems we need to keep different states for integer
and punctuation because of the need to disambiguate the ranges
[-n-n] and [-n--n].
This commit is contained in:
João Valverde 2021-10-08 13:26:53 +01:00
parent 9d87c4712e
commit 2c701ddf6f
10 changed files with 105 additions and 191 deletions

View File

@ -28,10 +28,6 @@ indent_size = tab
indent_style = tab
indent_size = tab
[sttype-integer.[ch]]
indent_style = tab
indent_size = tab
[sttype-pointer.[ch]]
indent_style = tab
indent_size = tab

View File

@ -38,7 +38,6 @@ set(DFILTER_NONGENERATED_FILES
gencode.c
semcheck.c
sttype-function.c
sttype-integer.c
sttype-pointer.c
sttype-range.c
sttype-set.c

View File

@ -87,6 +87,9 @@ DfilterTrace(FILE *TraceFILE, char *zTracePrompt);
stnode_t *
dfilter_new_function(dfwork_t *dfw, const char *name);
gboolean
dfilter_str_to_gint32(dfwork_t *dfw, const char *s, gint32* pint);
const char *tokenstr(int token);
#endif

View File

@ -78,6 +78,58 @@ dfilter_new_function(dfwork_t *dfw, const char *name)
return stnode_new(STTYPE_FUNCTION, def, name);
}
gboolean
dfilter_str_to_gint32(dfwork_t *dfw, const char *s, gint32* pint)
{
char *endptr;
long integer;
errno = 0;
integer = strtol(s, &endptr, 0);
if (errno == EINVAL || endptr == s || *endptr != '\0') {
/* This isn't a valid number. */
dfilter_parse_fail(dfw, "\"%s\" is not a valid number.", s);
return FALSE;
}
if (errno == ERANGE) {
if (integer == LONG_MAX) {
dfilter_parse_fail(dfw, "\"%s\" causes an integer overflow.", s);
}
else if (integer == LONG_MIN) {
dfilter_parse_fail(dfw, "\"%s\" causes an integer underflow.", s);
}
else {
/*
* XXX - can "strtol()" set errno to ERANGE without
* returning LONG_MAX or LONG_MIN?
*/
dfilter_parse_fail(dfw, "\"%s\" is not an integer.", s);
}
return FALSE;
}
if (integer > G_MAXINT32) {
/*
* Fits in a long, but not in a gint32 (a long might be
* 64 bits).
*/
dfilter_parse_fail(dfw, "\"%s\" causes an integer overflow.", s);
return FALSE;
}
if (integer < G_MININT32) {
/*
* Fits in a long, but not in a gint32 (a long might be
* 64 bits).
*/
dfilter_parse_fail(dfw, "\"%s\" causes an integer underflow.", s);
return FALSE;
}
*pint = (gint32)integer;
return TRUE;
}
/* Initialize the dfilter module */
void
dfilter_init(void)

View File

@ -96,10 +96,6 @@ any "error" symbols are shifted, if possible. */
dfilter_fail(dfw, "\"%s\" was unexpected in this context.",
(char *)stnode_data(TOKEN));
break;
case STTYPE_INTEGER:
dfilter_fail(dfw, "The integer %d was unexpected in this context.",
stnode_value(TOKEN));
break;
case STTYPE_FIELD:
hfinfo = (header_field_info *)stnode_data(TOKEN);
dfilter_fail(dfw, "Syntax error near \"%s\".", hfinfo->abbrev);
@ -201,54 +197,76 @@ range_node_list(L) ::= range_node_list(P) COMMA range_node(D).
L = g_slist_append(P, D);
}
/* x:y is offset:length */
/* x:y */
range_node(D) ::= INTEGER(X) COLON INTEGER(Y).
{
int32_t start = 0, length = 0;
dfilter_str_to_gint32(dfw, stnode_token_value(X), &start);
dfilter_str_to_gint32(dfw, stnode_token_value(Y), &length);
D = drange_node_new();
drange_node_set_start_offset(D, stnode_value(X));
drange_node_set_length(D, stnode_value(Y));
drange_node_set_start_offset(D, start);
drange_node_set_length(D, length);
stnode_free(X);
stnode_free(Y);
}
/* x-y == offset:offset */
/* x-y */
range_node(D) ::= INTEGER(X) HYPHEN INTEGER(Y).
{
int32_t start = 0, offset = 0;
dfilter_str_to_gint32(dfw, stnode_token_value(X), &start);
dfilter_str_to_gint32(dfw, stnode_token_value(Y), &offset);
D = drange_node_new();
drange_node_set_start_offset(D, stnode_value(X));
drange_node_set_end_offset(D, stnode_value(Y));
drange_node_set_start_offset(D, start);
drange_node_set_end_offset(D, offset);
stnode_free(X);
stnode_free(Y);
}
/* :y == from start to offset */
/* :y = 0:y*/
range_node(D) ::= COLON INTEGER(Y).
{
int32_t length = 0;
dfilter_str_to_gint32(dfw, stnode_token_value(Y), &length);
D = drange_node_new();
drange_node_set_start_offset(D, 0);
drange_node_set_length(D, stnode_value(Y));
drange_node_set_length(D, length);
stnode_free(Y);
}
/* x: from offset to end */
/* x: = x:-1 */
range_node(D) ::= INTEGER(X) COLON.
{
int32_t start = 0;
dfilter_str_to_gint32(dfw, stnode_token_value(X), &start);
D = drange_node_new();
drange_node_set_start_offset(D, stnode_value(X));
drange_node_set_start_offset(D, start);
drange_node_set_to_the_end(D);
stnode_free(X);
}
/* x == x:1 */
/* x = x:1 */
range_node(D) ::= INTEGER(X).
{
int32_t start = 0;
dfilter_str_to_gint32(dfw, stnode_token_value(X), &start);
D = drange_node_new();
drange_node_set_start_offset(D, stnode_value(X));
drange_node_set_start_offset(D, start);
drange_node_set_length(D, 1);
stnode_free(X);

View File

@ -86,10 +86,8 @@ DIAG_OFF_FLEX
static int set_lval_str(int token, const char *token_value);
static int set_lval_field(int token, header_field_info *hfinfo, const char *token_value);
static int set_lval_int(dfwork_t *dfw, int token, const char *token_value);
static int simple(int token, const char *token_value);
#define SIMPLE(token) simple(token, yytext)
static gboolean str_to_gint32(dfwork_t *dfw, const char *s, gint32* pint);
/*
* Sleazy hack to suppress compiler warnings in yy_fatal_error().
@ -168,20 +166,20 @@ static gboolean str_to_gint32(dfwork_t *dfw, const char *s, gint32* pint);
"or" return SIMPLE(TOKEN_TEST_OR);
"in" return SIMPLE(TOKEN_TEST_IN);
/*
* The syntax for ranges must handle slice[-d-d] and slice[-d--5], e.g:
* frame[-10-5] (minus ten to five)
* frame[-10--5] (minus ten to minus 5)
*/
"[" {
BEGIN(RANGE_INT);
return SIMPLE(TOKEN_LBRACKET);
}
<RANGE_INT>[+-]?[[:digit:]]+ {
<RANGE_INT>[+-]?[[:alnum:]]+ {
BEGIN(RANGE_PUNCT);
return set_lval_int(yyextra->dfw, TOKEN_INTEGER, yytext);
}
<RANGE_INT>[+-]?0x[[:xdigit:]]+ {
BEGIN(RANGE_PUNCT);
return set_lval_int(yyextra->dfw, TOKEN_INTEGER, yytext);
return set_lval_str(TOKEN_INTEGER, yytext);
}
<RANGE_INT,RANGE_PUNCT>":" {
@ -207,17 +205,13 @@ static gboolean str_to_gint32(dfwork_t *dfw, const char *s, gint32* pint);
/* Error if none of the above while scanning a range (slice) */
<RANGE_PUNCT>[^:\-,\]]+ {
dfilter_fail(yyextra->dfw, "Invalid string \"%s\" found while scanning slice.", yytext);
return SCAN_FAILED;
BEGIN(RANGE_INT);
return set_lval_str(TOKEN_UNPARSED, yytext);
}
/* XXX It would be nice to be able to match an entire non-integer string,
* but beware of Flex's "match the most text" rule.
*/
<RANGE_INT>. {
dfilter_fail(yyextra->dfw, "Invalid character \"%s\" found while scanning slice; expected integer.", yytext);
return SCAN_FAILED;
<RANGE_INT>[+-]?[^[:alnum:]\]]+ {
BEGIN(RANGE_PUNCT);
return set_lval_str(TOKEN_UNPARSED, yytext);
}
[rR]{0,1}\042 {
@ -488,6 +482,10 @@ set_lval_str(int token, const char *token_value)
case TOKEN_UNPARSED:
type_id = STTYPE_UNPARSED;
break;
case TOKEN_INTEGER:
/* Not used in AST. */
type_id = STTYPE_UNINITIALIZED;
break;
default:
ws_assert_not_reached();
}
@ -502,78 +500,3 @@ set_lval_field(int token, header_field_info *hfinfo, const char *token_value)
stnode_init(df_lval, STTYPE_FIELD, hfinfo, token_value);
return token;
}
static int
set_lval_int(dfwork_t *dfw, int token, const char *token_value)
{
sttype_id_t type_id = STTYPE_UNINITIALIZED;
gint32 val;
if (!str_to_gint32(dfw, token_value, &val)) {
return SCAN_FAILED;
}
switch (token) {
case TOKEN_INTEGER:
type_id = STTYPE_INTEGER;
break;
default:
ws_assert_not_reached();
}
stnode_init_int(df_lval, type_id, val, token_value);
return token;
}
static gboolean
str_to_gint32(dfwork_t *dfw, const char *s, gint32* pint)
{
char *endptr;
long integer;
errno = 0;
integer = strtol(s, &endptr, 0);
if (errno == EINVAL || endptr == s || *endptr != '\0') {
/* This isn't a valid number. */
dfilter_fail(dfw, "\"%s\" is not a valid number.", s);
return FALSE;
}
if (errno == ERANGE) {
if (integer == LONG_MAX) {
dfilter_fail(dfw, "\"%s\" causes an integer overflow.", s);
}
else if (integer == LONG_MIN) {
dfilter_fail(dfw, "\"%s\" causes an integer underflow.", s);
}
else {
/*
* XXX - can "strtol()" set errno to ERANGE without
* returning LONG_MAX or LONG_MIN?
*/
dfilter_fail(dfw, "\"%s\" is not an integer.", s);
}
return FALSE;
}
if (integer > G_MAXINT32) {
/*
* Fits in a long, but not in a gint32 (a long might be
* 64 bits).
*/
dfilter_fail(dfw, "\"%s\" causes an integer overflow.", s);
return FALSE;
}
if (integer < G_MININT32) {
/*
* Fits in a long, but not in a gint32 (a long might be
* 64 bits).
*/
dfilter_fail(dfw, "\"%s\" causes an integer underflow.", s);
return FALSE;
}
*pint = (gint32)integer;
return TRUE;
}

View File

@ -511,7 +511,6 @@ check_exists(dfwork_t *dfw, stnode_t *st_arg1)
case STTYPE_UNINITIALIZED:
case STTYPE_TEST:
case STTYPE_INTEGER:
case STTYPE_FVALUE:
case STTYPE_SET:
case STTYPE_PCRE:
@ -1354,7 +1353,6 @@ check_relation(dfwork_t *dfw, const char *relation_string,
case STTYPE_UNINITIALIZED:
case STTYPE_TEST:
case STTYPE_INTEGER:
case STTYPE_FVALUE:
case STTYPE_SET:
default:

View File

@ -1,41 +0,0 @@
/*
* Wireshark - Network traffic analyzer
* By Gerald Combs <gerald@wireshark.org>
* Copyright 2001 Gerald Combs
*
*
* SPDX-License-Identifier: GPL-2.0-or-later
*/
#include "config.h"
#include "ftypes/ftypes.h"
#include "syntax-tree.h"
void
sttype_register_integer(void)
{
static sttype_t integer_type = {
STTYPE_INTEGER,
"INTEGER",
NULL,
NULL,
NULL,
NULL
};
sttype_register(&integer_type);
}
/*
* Editor modelines - https://www.wireshark.org/tools/modelines.html
*
* Local variables:
* c-basic-offset: 8
* tab-width: 8
* indent-tabs-mode: t
* End:
*
* vi: set shiftwidth=8 tabstop=8 noexpandtab:
* :indentSize=8:tabSize=8:noTabs=false:
*/

View File

@ -26,7 +26,6 @@ void
sttype_init(void)
{
sttype_register_function();
sttype_register_integer();
sttype_register_pointer();
sttype_register_range();
sttype_register_set();
@ -89,7 +88,6 @@ _node_clear(stnode_t *node)
node->type = NULL;
node->flags = 0;
node->data = NULL;
node->value = 0;
}
void
@ -109,7 +107,6 @@ _node_init(stnode_t *node, sttype_id_t type_id, gpointer data)
ws_assert(!node->type);
ws_assert(!node->data);
node->flags = 0;
node->value = 0;
if (type_id == STTYPE_UNINITIALIZED) {
node->type = NULL;
@ -136,13 +133,6 @@ stnode_init(stnode_t *node, sttype_id_t type_id, gpointer data, const char *tok
node->token_value = g_strdup(token_value);
}
void
stnode_init_int(stnode_t *node, sttype_id_t type_id, gint32 value, const char *token_value)
{
stnode_init(node, type_id, NULL, token_value);
node->value = value;
}
void
stnode_replace(stnode_t *node, sttype_id_t type_id, gpointer data)
{
@ -186,7 +176,6 @@ stnode_dup(const stnode_t *org)
node->data = type->func_dup(org->data);
else
node->data = org->data;
node->value = org->value;
node->token_value = g_strdup(org->token_value);
@ -238,13 +227,6 @@ stnode_steal_data(stnode_t *node)
return data;
}
gint32
stnode_value(stnode_t *node)
{
ws_assert_magic(node, STNODE_MAGIC);
return node->value;
}
const char *
stnode_token_value(stnode_t *node)
{
@ -274,9 +256,6 @@ stnode_set_inside_parens(stnode_t *node, gboolean inside)
char *
stnode_tostr(stnode_t *node)
{
if (stnode_type_id(node) == STTYPE_INTEGER)
return g_strdup_printf("%"PRId32, stnode_value(node));
if (node->type->func_tostr == NULL)
return g_strdup("<FIXME>");
@ -298,7 +277,6 @@ sprint_node(stnode_t *node)
s = stnode_tostr(node);
wmem_strbuf_append_printf(buf, "\tdata = %s<%s>\n", stnode_type_name(node), s);
g_free(s);
wmem_strbuf_append_printf(buf, "\tvalue = %"PRId32"\n", stnode_value(node));
wmem_strbuf_append_printf(buf, "}\n");
return wmem_strbuf_finalize(buf);
}

View File

@ -27,7 +27,6 @@ typedef enum {
STTYPE_CHARCONST,
STTYPE_FIELD,
STTYPE_FVALUE,
STTYPE_INTEGER,
STTYPE_RANGE,
STTYPE_FUNCTION,
STTYPE_SET,
@ -58,12 +57,7 @@ typedef struct {
uint32_t magic;
sttype_t *type;
uint16_t flags;
/* This could be made an enum, but I haven't
* set aside to time to do so. */
gpointer data;
int32_t value;
char *token_value;
} stnode_t;
@ -97,9 +91,6 @@ stnode_clear(stnode_t *node);
void
stnode_init(stnode_t *node, sttype_id_t type_id, gpointer data, const char *token_value);
void
stnode_init_int(stnode_t *node, sttype_id_t type_id, gint32 value, const char *token_value);
void
stnode_replace(stnode_t *node, sttype_id_t type_id, gpointer data);
@ -118,9 +109,6 @@ stnode_data(stnode_t *node);
gpointer
stnode_steal_data(stnode_t *node);
gint32
stnode_value(stnode_t *node);
const char *
stnode_token_value(stnode_t *node);