dfilter: Parse ranges in the drange node constructor

Using a hand written tokenizer is simpler than using flex start
conditions. Do the validation in the drange node constructor.

Add validation for malformed ranges with different endpoint signs.
pespin/osmux-wip
João Valverde 2021-10-25 21:27:40 +01:00 committed by Wireshark GitLab Utility
parent d19bdb70ed
commit b1222edcd2
7 changed files with 155 additions and 291 deletions

View File

@ -89,9 +89,6 @@ dfilter_new_function(dfwork_t *dfw, const char *name);
stnode_t *
dfilter_new_regex(dfwork_t *dfw, stnode_t *node);
gboolean
dfilter_str_to_gint32(dfwork_t *dfw, const char *s, gint32* pint);
stnode_t *
dfilter_resolve_unparsed(dfwork_t *dfw, stnode_t *node);

View File

@ -104,57 +104,6 @@ dfilter_new_regex(dfwork_t *dfw, stnode_t *node)
return node;
}
gboolean
dfilter_str_to_gint32(dfwork_t *dfw, const char *s, gint32* pint)
{
char *endptr;
long integer;
errno = 0;
integer = strtol(s, &endptr, 0);
if (errno == EINVAL || endptr == s || *endptr != '\0') {
/* This isn't a valid number. */
dfilter_parse_fail(dfw, "\"%s\" is not a valid number.", s);
return FALSE;
}
if (errno == ERANGE) {
if (integer == LONG_MAX) {
dfilter_parse_fail(dfw, "\"%s\" causes an integer overflow.", s);
}
else if (integer == LONG_MIN) {
dfilter_parse_fail(dfw, "\"%s\" causes an integer underflow.", s);
}
else {
/*
* XXX - can "strtol()" set errno to ERANGE without
* returning LONG_MAX or LONG_MIN?
*/
dfilter_parse_fail(dfw, "\"%s\" is not an integer.", s);
}
return FALSE;
}
if (integer > G_MAXINT32) {
/*
* Fits in a long, but not in a gint32 (a long might be
* 64 bits).
*/
dfilter_parse_fail(dfw, "\"%s\" causes an integer overflow.", s);
return FALSE;
}
if (integer < G_MININT32) {
/*
* Fits in a long, but not in a gint32 (a long might be
* 64 bits).
*/
dfilter_parse_fail(dfw, "\"%s\" causes an integer underflow.", s);
return FALSE;
}
*pint = (gint32)integer;
return TRUE;
}
/*
* Tries to convert an STTYPE_UNPARSED to a STTYPE_FIELD. If it's not registered as
* a field pass UNPARSED to the semantic check.
@ -356,9 +305,6 @@ const char *tokenstr(int token)
case TOKEN_LBRACKET: return "LBRACKET";
case TOKEN_RBRACKET: return "RBRACKET";
case TOKEN_COMMA: return "COMMA";
case TOKEN_INTEGER: return "INTEGER";
case TOKEN_COLON: return "COLON";
case TOKEN_HYPHEN: return "HYPHEN";
case TOKEN_TEST_IN: return "TEST_IN";
case TOKEN_LBRACE: return "LBRACE";
case TOKEN_RBRACE: return "RBRACE";

View File

@ -12,6 +12,9 @@
#include "config.h"
#include "drange.h"
#include <errno.h>
#include <stdlib.h>
#include <wsutil/ws_assert.h>
/* drange_node constructor */
@ -28,6 +31,136 @@ drange_node_new(void)
return new_range_node;
}
static gboolean
drange_str_to_gint32(const char *s, gint32 *pint, char **endptr, char **err_ptr)
{
long integer;
errno = 0;
integer = strtol(s, endptr, 0);
if (errno == EINVAL || *endptr == s) {
/* This isn't a valid number. */
*err_ptr = g_strdup_printf("\"%s\" is not a valid number.", s);
return FALSE;
}
if (errno == ERANGE || integer > G_MAXINT32 || integer < G_MININT32) {
*err_ptr = g_strdup_printf("\"%s\" causes an integer overflow.", s);
return FALSE;
}
*pint = (gint32)integer;
return TRUE;
}
/* drange_node constructor from string */
drange_node*
drange_node_from_str(const char *range_str, char **err_ptr)
{
const char *str;
char *endptr;
gint32 lower, upper;
drange_node_end_t end = DRANGE_NODE_END_T_UNINITIALIZED;
drange_node *dn;
gboolean ok;
/*
* The following syntax governs slices:
* [i:j] i = start_offset, j = length
* [i-j] i = start_offset, j = end_offset, inclusive.
* [i] i = start_offset, length = 1
* [:j] start_offset = 0, length = j
* [i:] start_offset = i, end_offset = end_of_field
*/
str = range_str;
if (*str == ':') {
lower = 0;
str++;
}
else {
if (!drange_str_to_gint32(str, &lower, &endptr, err_ptr))
return NULL;
str = endptr;
}
while (*str != '\0' && g_ascii_isspace(*str))
str++;
if (*str == '-') {
str++;
end = DRANGE_NODE_END_T_OFFSET;
ok = drange_str_to_gint32(str, &upper, &endptr, err_ptr);
str = endptr;
}
else if (*str == ':') {
str++;
if (*str == '\0') {
end = DRANGE_NODE_END_T_TO_THE_END;
ok = TRUE;
}
else {
end = DRANGE_NODE_END_T_LENGTH;
ok = drange_str_to_gint32(str, &upper, &endptr, err_ptr);
str = endptr;
}
}
else if (*str == '\0') {
end = DRANGE_NODE_END_T_LENGTH;
upper = 1;
ok = TRUE;
}
else {
ok = FALSE;
}
while (*str != '\0' && g_ascii_isspace(*str))
str++;
if (!ok || *str != '\0') {
*err_ptr = g_strdup_printf("\"%s\" is not a valid range.", range_str);
return NULL;
}
dn = drange_node_new();
drange_node_set_start_offset(dn, lower);
switch (end) {
case DRANGE_NODE_END_T_LENGTH:
if (upper <= 0) {
*err_ptr = g_strdup_printf("Range %s isn't valid "
"because length %d isn't positive",
range_str, upper);
drange_node_free(dn);
return NULL;
}
drange_node_set_length(dn, upper);
break;
case DRANGE_NODE_END_T_OFFSET:
if ((lower < 0 && upper > 0) || (lower > 0 && upper < 0)) {
*err_ptr = g_strdup_printf("Range %s isn't valid "
"because %d and %d have different signs",
range_str, lower, upper);
drange_node_free(dn);
return NULL;
}
if (upper <= lower) {
*err_ptr = g_strdup_printf("Range %s isn't valid "
"because %d is greater or equal than %d",
range_str, lower, upper);
drange_node_free(dn);
return NULL;
}
drange_node_set_end_offset(dn, upper);
break;
case DRANGE_NODE_END_T_TO_THE_END:
drange_node_set_to_the_end(dn);
break;
default:
ws_assert_not_reached();
break;
}
return dn;
}
static drange_node*
drange_node_dup(drange_node *org)
{

View File

@ -49,6 +49,9 @@ typedef struct _drange {
/* drange_node constructor */
drange_node* drange_node_new(void);
/* drange_node constructor */
drange_node* drange_node_from_str(const char *range_str, char **err_ptr);
/* drange_node destructor */
void drange_node_free(drange_node* drnode);

View File

@ -189,79 +189,17 @@ range_node_list(L) ::= range_node_list(P) COMMA range_node(D).
L = g_slist_append(P, D);
}
/* x:y */
range_node(D) ::= INTEGER(X) COLON INTEGER(Y).
range_node(D) ::= RANGE(R).
{
int32_t start = 0, length = 0;
char *err = NULL;
dfilter_str_to_gint32(dfw, stnode_token_value(X), &start);
dfilter_str_to_gint32(dfw, stnode_token_value(Y), &length);
D = drange_node_from_str(stnode_token_value(R), &err);
if (err != NULL) {
dfilter_parse_fail(dfw, "%s", err);
g_free(err);
}
D = drange_node_new();
drange_node_set_start_offset(D, start);
drange_node_set_length(D, length);
stnode_free(X);
stnode_free(Y);
}
/* x-y */
range_node(D) ::= INTEGER(X) HYPHEN INTEGER(Y).
{
int32_t start = 0, offset = 0;
dfilter_str_to_gint32(dfw, stnode_token_value(X), &start);
dfilter_str_to_gint32(dfw, stnode_token_value(Y), &offset);
D = drange_node_new();
drange_node_set_start_offset(D, start);
drange_node_set_end_offset(D, offset);
stnode_free(X);
stnode_free(Y);
}
/* :y = 0:y*/
range_node(D) ::= COLON INTEGER(Y).
{
int32_t length = 0;
dfilter_str_to_gint32(dfw, stnode_token_value(Y), &length);
D = drange_node_new();
drange_node_set_start_offset(D, 0);
drange_node_set_length(D, length);
stnode_free(Y);
}
/* x: = x:-1 */
range_node(D) ::= INTEGER(X) COLON.
{
int32_t start = 0;
dfilter_str_to_gint32(dfw, stnode_token_value(X), &start);
D = drange_node_new();
drange_node_set_start_offset(D, start);
drange_node_set_to_the_end(D);
stnode_free(X);
}
/* x = x:1 */
range_node(D) ::= INTEGER(X).
{
int32_t start = 0;
dfilter_str_to_gint32(dfw, stnode_token_value(X), &start);
D = drange_node_new();
drange_node_set_start_offset(D, start);
drange_node_set_length(D, 1);
stnode_free(X);
stnode_free(R);
}
rel_binop(O) ::= TEST_ANY_EQ. { O = TEST_OP_ANY_EQ; }

View File

@ -103,8 +103,7 @@ static int simple(int token, const char *token_value);
%}
%x RANGE_INT
%x RANGE_PUNCT
%x RANGE
%x DQUOTE
%x SQUOTE
@ -162,52 +161,27 @@ static int simple(int token, const char *token_value);
"or" return SIMPLE(TOKEN_TEST_OR);
"in" return SIMPLE(TOKEN_TEST_IN);
/*
* The syntax for ranges must handle slice[-d-d] and slice[-d--5], e.g:
* frame[-10-5] (minus ten to five)
* frame[-10--5] (minus ten to minus 5)
*/
"[" {
BEGIN(RANGE_INT);
"[" {
BEGIN(RANGE);
return SIMPLE(TOKEN_LBRACKET);
}
<RANGE_INT>[+-]?[[:alnum:]]+ {
BEGIN(RANGE_PUNCT);
return set_lval_str(TOKEN_INTEGER, yytext);
<RANGE>[^],]+ {
return set_lval_str(TOKEN_RANGE, yytext);
}
<RANGE_INT,RANGE_PUNCT>":" {
BEGIN(RANGE_INT);
return SIMPLE(TOKEN_COLON);
}
<RANGE_PUNCT>"-" {
BEGIN(RANGE_INT);
return SIMPLE(TOKEN_HYPHEN);
}
<RANGE_INT,RANGE_PUNCT>"," {
BEGIN(RANGE_INT);
<RANGE>"," {
return SIMPLE(TOKEN_COMMA);
}
<RANGE_INT,RANGE_PUNCT>"]" {
<RANGE>"]" {
BEGIN(INITIAL);
return SIMPLE(TOKEN_RBRACKET);
}
/* Error if none of the above while scanning a range (slice) */
<RANGE_PUNCT>[^:\-,\]]+ {
BEGIN(RANGE_INT);
return set_lval_str(TOKEN_UNPARSED, yytext);
}
<RANGE_INT>[+-]?[^[:alnum:]\]]+ {
BEGIN(RANGE_PUNCT);
return set_lval_str(TOKEN_UNPARSED, yytext);
<RANGE><<EOF>> {
dfilter_fail(yyextra->dfw, "The right bracket was missing from a slice.");
return SCAN_FAILED;
}
[rR]{0,1}\042 {
@ -416,36 +390,6 @@ DIAG_ON_FLEX
static int
simple(int token, const char *token_value)
{
switch (token) {
case TOKEN_LPAREN:
case TOKEN_RPAREN:
case TOKEN_LBRACKET:
case TOKEN_RBRACKET:
case TOKEN_LBRACE:
case TOKEN_RBRACE:
case TOKEN_COLON:
case TOKEN_COMMA:
case TOKEN_DOTDOT:
case TOKEN_HYPHEN:
case TOKEN_WHITESPACE:
case TOKEN_TEST_ANY_EQ:
case TOKEN_TEST_ALL_NE:
case TOKEN_TEST_ANY_NE:
case TOKEN_TEST_GT:
case TOKEN_TEST_GE:
case TOKEN_TEST_LT:
case TOKEN_TEST_LE:
case TOKEN_TEST_BITWISE_AND:
case TOKEN_TEST_CONTAINS:
case TOKEN_TEST_MATCHES:
case TOKEN_TEST_NOT:
case TOKEN_TEST_AND:
case TOKEN_TEST_OR:
case TOKEN_TEST_IN:
break;
default:
ws_assert_not_reached();
}
stnode_init(df_lval, STTYPE_UNINITIALIZED, NULL, token_value);
return token;
}
@ -465,7 +409,7 @@ set_lval_str(int token, const char *token_value)
case TOKEN_UNPARSED:
type_id = STTYPE_UNPARSED;
break;
case TOKEN_INTEGER:
case TOKEN_RANGE:
/* Not used in AST. */
type_id = STTYPE_UNINITIALIZED;
break;

View File

@ -473,98 +473,12 @@ check_exists(dfwork_t *dfw, stnode_t *st_arg1)
}
}
struct check_drange_sanity_args {
dfwork_t *dfw;
stnode_t *st;
gboolean err;
};
static void
check_drange_node_sanity(gpointer data, gpointer user_data)
{
drange_node* drnode = (drange_node*)data;
struct check_drange_sanity_args *args = (struct check_drange_sanity_args*)user_data;
gint start_offset, end_offset, length;
stnode_t *entity;
header_field_info *hfinfo;
switch (drange_node_get_ending(drnode)) {
case DRANGE_NODE_END_T_LENGTH:
length = drange_node_get_length(drnode);
if (length <= 0) {
if (!args->err) {
args->err = TRUE;
start_offset = drange_node_get_start_offset(drnode);
entity = sttype_range_entity(args->st);
if (entity && stnode_type_id(entity) == STTYPE_FIELD) {
hfinfo = (header_field_info *)stnode_data(entity);
dfilter_fail(args->dfw, "Range %d:%d specified for \"%s\" isn't valid, "
"as length %d isn't positive",
start_offset, length,
hfinfo->abbrev,
length);
} else
dfilter_fail(args->dfw, "Range %d:%d isn't valid, "
"as length %d isn't positive",
start_offset, length,
length);
}
}
break;
case DRANGE_NODE_END_T_OFFSET:
/*
* Make sure the start offset isn't beyond the end
* offset. This applies to negative offsets too.
*/
/* XXX - [-ve - +ve] is probably pathological, but isn't
* disallowed.
* [+ve - -ve] is probably pathological too, and happens to be
* disallowed.
*/
start_offset = drange_node_get_start_offset(drnode);
end_offset = drange_node_get_end_offset(drnode);
if (start_offset > end_offset) {
if (!args->err) {
args->err = TRUE;
entity = sttype_range_entity(args->st);
if (entity && stnode_type_id(entity) == STTYPE_FIELD) {
hfinfo = (header_field_info *)stnode_data(entity);
dfilter_fail(args->dfw, "Range %d-%d specified for \"%s\" isn't valid, "
"as %d is greater than %d",
start_offset, end_offset,
hfinfo->abbrev,
start_offset, end_offset);
} else
dfilter_fail(args->dfw, "Range %d-%d isn't valid, "
"as %d is greater than %d",
start_offset, end_offset,
start_offset, end_offset);
}
}
break;
case DRANGE_NODE_END_T_TO_THE_END:
break;
case DRANGE_NODE_END_T_UNINITIALIZED:
default:
ws_assert_not_reached();
}
}
static void
check_drange_sanity(dfwork_t *dfw, stnode_t *st)
{
stnode_t *entity1;
header_field_info *hfinfo1;
ftenum_t ftype1;
struct check_drange_sanity_args args;
entity1 = sttype_range_entity(st);
if (entity1 && stnode_type_id(entity1) == STTYPE_FIELD) {
@ -598,17 +512,6 @@ check_drange_sanity(dfwork_t *dfw, stnode_t *st)
dfilter_fail(dfw, "Range is not supported, details: " G_STRLOC " entity: NULL");
THROW(TypeError);
}
args.dfw = dfw;
args.st = st;
args.err = FALSE;
drange_foreach_drange_node(sttype_range_drange(st),
check_drange_node_sanity, &args);
if (args.err) {
THROW(TypeError);
}
}
static void