wireshark/epan/dfilter/dfilter.c

636 lines
14 KiB
C
Raw Normal View History

/*
* Wireshark - Network traffic analyzer
* By Gerald Combs <gerald@wireshark.org>
* Copyright 2001 Gerald Combs
*
* SPDX-License-Identifier: GPL-2.0-or-later
*/
#include "config.h"
#define WS_LOG_DOMAIN LOG_DOMAIN_DFILTER
#include <stdio.h>
#include <string.h>
#include "dfilter-int.h"
#include "syntax-tree.h"
#include "gencode.h"
#include "semcheck.h"
#include "dfvm.h"
#include <epan/epan_dissect.h>
#include "dfilter.h"
#include "dfilter-macro.h"
#include "scanner_lex.h"
2021-06-14 23:06:02 +00:00
#include <wsutil/wslog.h>
2021-06-18 18:21:42 +00:00
#include <wsutil/ws_assert.h>
#include "grammar.h"
#define DFILTER_TOKEN_ID_OFFSET 1
/* Holds the singular instance of our Lemon parser object */
static void* ParserObj = NULL;
/*
* XXX - if we're using a version of Flex that supports reentrant lexical
* analyzers, we should put this into the lexical analyzer's state.
*/
dfwork_t *global_dfw;
static void
dfilter_vfail(dfwork_t *dfw, const char *format, va_list args)
{
/* If we've already reported one error, don't overwite it */
if (dfw->error_message != NULL)
return;
dfw->error_message = g_strdup_vprintf(format, args);
}
void
dfilter_fail(dfwork_t *dfw, const char *format, ...)
{
va_list args;
va_start(args, format);
dfilter_vfail(dfw, format, args);
va_end(args);
}
void
dfilter_parse_fail(dfwork_t *dfw, const char *format, ...)
{
va_list args;
va_start(args, format);
dfilter_vfail(dfw, format, args);
va_end(args);
dfw->syntax_error = TRUE;
}
stnode_t *
dfilter_new_function(dfwork_t *dfw, const char *name)
{
df_func_def_t *def = df_func_lookup(name);
if (!def) {
dfilter_parse_fail(dfw, "Function '%s' does not exist", name);
}
return stnode_new(STTYPE_FUNCTION, def, name);
}
/* Gets a regex from a string, and sets the error message on failure. */
dfilter: Require double-quoted strings with "matches" Matches is a special case that looks on the RHS and tries to convert every unparsed value to a string, regardless of the LHS type. This is not how types work in the display filter. Require double-quotes to avoid ambiguity, because matches doesn't follow normal Wireshark display filter type rules. It doesn't need nor benefit from the flexibility provided by unparsed strings in the syntax. For matches the RHS is always a literal strings except if the RHS is also a field name, then it complains of an incompatible type. This is confusing. No type can be compatible because no type rules are ever considered. Every unparsed value is a text string except if it happens to coincide with a field name it also requires double-quoting or it throws a syntax error, just to be difficult. We could remove this odd quirk but requiring double-quotes for regular expressions is a better, more elegant fix. Before: Filter: tcp matches "udp" Constants: 00000 PUT_PCRE udp -> reg#1 Instructions: 00000 READ_TREE tcp -> reg#0 00001 IF-FALSE-GOTO 3 00002 ANY_MATCHES reg#0 matches reg#1 00003 RETURN Filter: tcp matches udp Constants: 00000 PUT_PCRE udp -> reg#1 Instructions: 00000 READ_TREE tcp -> reg#0 00001 IF-FALSE-GOTO 3 00002 ANY_MATCHES reg#0 matches reg#1 00003 RETURN Filter: tcp matches udp.srcport dftest: tcp and udp.srcport are not of compatible types. Filter: tcp matches udp.srcportt Constants: 00000 PUT_PCRE udp.srcportt -> reg#1 Instructions: 00000 READ_TREE tcp -> reg#0 00001 IF-FALSE-GOTO 3 00002 ANY_MATCHES reg#0 matches reg#1 00003 RETURN After: Filter: tcp matches "udp" Constants: 00000 PUT_PCRE udp -> reg#1 Instructions: 00000 READ_TREE tcp -> reg#0 00001 IF-FALSE-GOTO 3 00002 ANY_MATCHES reg#0 matches reg#1 00003 RETURN Filter: tcp matches udp dftest: "udp" was unexpected in this context. Filter: tcp matches udp.srcport dftest: "udp.srcport" was unexpected in this context. Filter: tcp matches udp.srcportt dftest: "udp.srcportt" was unexpected in this context. The error message could still be improved.
2021-10-09 15:40:08 +00:00
stnode_t *
dfilter_new_regex(dfwork_t *dfw, stnode_t *node)
dfilter: Require double-quoted strings with "matches" Matches is a special case that looks on the RHS and tries to convert every unparsed value to a string, regardless of the LHS type. This is not how types work in the display filter. Require double-quotes to avoid ambiguity, because matches doesn't follow normal Wireshark display filter type rules. It doesn't need nor benefit from the flexibility provided by unparsed strings in the syntax. For matches the RHS is always a literal strings except if the RHS is also a field name, then it complains of an incompatible type. This is confusing. No type can be compatible because no type rules are ever considered. Every unparsed value is a text string except if it happens to coincide with a field name it also requires double-quoting or it throws a syntax error, just to be difficult. We could remove this odd quirk but requiring double-quotes for regular expressions is a better, more elegant fix. Before: Filter: tcp matches "udp" Constants: 00000 PUT_PCRE udp -> reg#1 Instructions: 00000 READ_TREE tcp -> reg#0 00001 IF-FALSE-GOTO 3 00002 ANY_MATCHES reg#0 matches reg#1 00003 RETURN Filter: tcp matches udp Constants: 00000 PUT_PCRE udp -> reg#1 Instructions: 00000 READ_TREE tcp -> reg#0 00001 IF-FALSE-GOTO 3 00002 ANY_MATCHES reg#0 matches reg#1 00003 RETURN Filter: tcp matches udp.srcport dftest: tcp and udp.srcport are not of compatible types. Filter: tcp matches udp.srcportt Constants: 00000 PUT_PCRE udp.srcportt -> reg#1 Instructions: 00000 READ_TREE tcp -> reg#0 00001 IF-FALSE-GOTO 3 00002 ANY_MATCHES reg#0 matches reg#1 00003 RETURN After: Filter: tcp matches "udp" Constants: 00000 PUT_PCRE udp -> reg#1 Instructions: 00000 READ_TREE tcp -> reg#0 00001 IF-FALSE-GOTO 3 00002 ANY_MATCHES reg#0 matches reg#1 00003 RETURN Filter: tcp matches udp dftest: "udp" was unexpected in this context. Filter: tcp matches udp.srcport dftest: "udp.srcport" was unexpected in this context. Filter: tcp matches udp.srcportt dftest: "udp.srcportt" was unexpected in this context. The error message could still be improved.
2021-10-09 15:40:08 +00:00
{
fvalue_regex_t *pcre;
char *errmsg = NULL;
dfilter: Require double-quoted strings with "matches" Matches is a special case that looks on the RHS and tries to convert every unparsed value to a string, regardless of the LHS type. This is not how types work in the display filter. Require double-quotes to avoid ambiguity, because matches doesn't follow normal Wireshark display filter type rules. It doesn't need nor benefit from the flexibility provided by unparsed strings in the syntax. For matches the RHS is always a literal strings except if the RHS is also a field name, then it complains of an incompatible type. This is confusing. No type can be compatible because no type rules are ever considered. Every unparsed value is a text string except if it happens to coincide with a field name it also requires double-quoting or it throws a syntax error, just to be difficult. We could remove this odd quirk but requiring double-quotes for regular expressions is a better, more elegant fix. Before: Filter: tcp matches "udp" Constants: 00000 PUT_PCRE udp -> reg#1 Instructions: 00000 READ_TREE tcp -> reg#0 00001 IF-FALSE-GOTO 3 00002 ANY_MATCHES reg#0 matches reg#1 00003 RETURN Filter: tcp matches udp Constants: 00000 PUT_PCRE udp -> reg#1 Instructions: 00000 READ_TREE tcp -> reg#0 00001 IF-FALSE-GOTO 3 00002 ANY_MATCHES reg#0 matches reg#1 00003 RETURN Filter: tcp matches udp.srcport dftest: tcp and udp.srcport are not of compatible types. Filter: tcp matches udp.srcportt Constants: 00000 PUT_PCRE udp.srcportt -> reg#1 Instructions: 00000 READ_TREE tcp -> reg#0 00001 IF-FALSE-GOTO 3 00002 ANY_MATCHES reg#0 matches reg#1 00003 RETURN After: Filter: tcp matches "udp" Constants: 00000 PUT_PCRE udp -> reg#1 Instructions: 00000 READ_TREE tcp -> reg#0 00001 IF-FALSE-GOTO 3 00002 ANY_MATCHES reg#0 matches reg#1 00003 RETURN Filter: tcp matches udp dftest: "udp" was unexpected in this context. Filter: tcp matches udp.srcport dftest: "udp.srcport" was unexpected in this context. Filter: tcp matches udp.srcportt dftest: "udp.srcportt" was unexpected in this context. The error message could still be improved.
2021-10-09 15:40:08 +00:00
if (stnode_type_id(node) != STTYPE_STRING) {
dfilter_parse_fail(dfw, "Expected a string not %s", stnode_todisplay(node));
return node;
}
const char *patt = stnode_data(node);
dfilter: Require double-quoted strings with "matches" Matches is a special case that looks on the RHS and tries to convert every unparsed value to a string, regardless of the LHS type. This is not how types work in the display filter. Require double-quotes to avoid ambiguity, because matches doesn't follow normal Wireshark display filter type rules. It doesn't need nor benefit from the flexibility provided by unparsed strings in the syntax. For matches the RHS is always a literal strings except if the RHS is also a field name, then it complains of an incompatible type. This is confusing. No type can be compatible because no type rules are ever considered. Every unparsed value is a text string except if it happens to coincide with a field name it also requires double-quoting or it throws a syntax error, just to be difficult. We could remove this odd quirk but requiring double-quotes for regular expressions is a better, more elegant fix. Before: Filter: tcp matches "udp" Constants: 00000 PUT_PCRE udp -> reg#1 Instructions: 00000 READ_TREE tcp -> reg#0 00001 IF-FALSE-GOTO 3 00002 ANY_MATCHES reg#0 matches reg#1 00003 RETURN Filter: tcp matches udp Constants: 00000 PUT_PCRE udp -> reg#1 Instructions: 00000 READ_TREE tcp -> reg#0 00001 IF-FALSE-GOTO 3 00002 ANY_MATCHES reg#0 matches reg#1 00003 RETURN Filter: tcp matches udp.srcport dftest: tcp and udp.srcport are not of compatible types. Filter: tcp matches udp.srcportt Constants: 00000 PUT_PCRE udp.srcportt -> reg#1 Instructions: 00000 READ_TREE tcp -> reg#0 00001 IF-FALSE-GOTO 3 00002 ANY_MATCHES reg#0 matches reg#1 00003 RETURN After: Filter: tcp matches "udp" Constants: 00000 PUT_PCRE udp -> reg#1 Instructions: 00000 READ_TREE tcp -> reg#0 00001 IF-FALSE-GOTO 3 00002 ANY_MATCHES reg#0 matches reg#1 00003 RETURN Filter: tcp matches udp dftest: "udp" was unexpected in this context. Filter: tcp matches udp.srcport dftest: "udp.srcport" was unexpected in this context. Filter: tcp matches udp.srcportt dftest: "udp.srcportt" was unexpected in this context. The error message could still be improved.
2021-10-09 15:40:08 +00:00
ws_debug("Compile regex pattern: %s", patt);
pcre = fvalue_regex_compile(patt, &errmsg);
if (errmsg) {
dfilter_parse_fail(dfw, "%s", errmsg);
g_free(errmsg);
return node;
dfilter: Require double-quoted strings with "matches" Matches is a special case that looks on the RHS and tries to convert every unparsed value to a string, regardless of the LHS type. This is not how types work in the display filter. Require double-quotes to avoid ambiguity, because matches doesn't follow normal Wireshark display filter type rules. It doesn't need nor benefit from the flexibility provided by unparsed strings in the syntax. For matches the RHS is always a literal strings except if the RHS is also a field name, then it complains of an incompatible type. This is confusing. No type can be compatible because no type rules are ever considered. Every unparsed value is a text string except if it happens to coincide with a field name it also requires double-quoting or it throws a syntax error, just to be difficult. We could remove this odd quirk but requiring double-quotes for regular expressions is a better, more elegant fix. Before: Filter: tcp matches "udp" Constants: 00000 PUT_PCRE udp -> reg#1 Instructions: 00000 READ_TREE tcp -> reg#0 00001 IF-FALSE-GOTO 3 00002 ANY_MATCHES reg#0 matches reg#1 00003 RETURN Filter: tcp matches udp Constants: 00000 PUT_PCRE udp -> reg#1 Instructions: 00000 READ_TREE tcp -> reg#0 00001 IF-FALSE-GOTO 3 00002 ANY_MATCHES reg#0 matches reg#1 00003 RETURN Filter: tcp matches udp.srcport dftest: tcp and udp.srcport are not of compatible types. Filter: tcp matches udp.srcportt Constants: 00000 PUT_PCRE udp.srcportt -> reg#1 Instructions: 00000 READ_TREE tcp -> reg#0 00001 IF-FALSE-GOTO 3 00002 ANY_MATCHES reg#0 matches reg#1 00003 RETURN After: Filter: tcp matches "udp" Constants: 00000 PUT_PCRE udp -> reg#1 Instructions: 00000 READ_TREE tcp -> reg#0 00001 IF-FALSE-GOTO 3 00002 ANY_MATCHES reg#0 matches reg#1 00003 RETURN Filter: tcp matches udp dftest: "udp" was unexpected in this context. Filter: tcp matches udp.srcport dftest: "udp.srcport" was unexpected in this context. Filter: tcp matches udp.srcportt dftest: "udp.srcportt" was unexpected in this context. The error message could still be improved.
2021-10-09 15:40:08 +00:00
}
stnode_replace(node, STTYPE_PCRE, pcre);
return node;
dfilter: Require double-quoted strings with "matches" Matches is a special case that looks on the RHS and tries to convert every unparsed value to a string, regardless of the LHS type. This is not how types work in the display filter. Require double-quotes to avoid ambiguity, because matches doesn't follow normal Wireshark display filter type rules. It doesn't need nor benefit from the flexibility provided by unparsed strings in the syntax. For matches the RHS is always a literal strings except if the RHS is also a field name, then it complains of an incompatible type. This is confusing. No type can be compatible because no type rules are ever considered. Every unparsed value is a text string except if it happens to coincide with a field name it also requires double-quoting or it throws a syntax error, just to be difficult. We could remove this odd quirk but requiring double-quotes for regular expressions is a better, more elegant fix. Before: Filter: tcp matches "udp" Constants: 00000 PUT_PCRE udp -> reg#1 Instructions: 00000 READ_TREE tcp -> reg#0 00001 IF-FALSE-GOTO 3 00002 ANY_MATCHES reg#0 matches reg#1 00003 RETURN Filter: tcp matches udp Constants: 00000 PUT_PCRE udp -> reg#1 Instructions: 00000 READ_TREE tcp -> reg#0 00001 IF-FALSE-GOTO 3 00002 ANY_MATCHES reg#0 matches reg#1 00003 RETURN Filter: tcp matches udp.srcport dftest: tcp and udp.srcport are not of compatible types. Filter: tcp matches udp.srcportt Constants: 00000 PUT_PCRE udp.srcportt -> reg#1 Instructions: 00000 READ_TREE tcp -> reg#0 00001 IF-FALSE-GOTO 3 00002 ANY_MATCHES reg#0 matches reg#1 00003 RETURN After: Filter: tcp matches "udp" Constants: 00000 PUT_PCRE udp -> reg#1 Instructions: 00000 READ_TREE tcp -> reg#0 00001 IF-FALSE-GOTO 3 00002 ANY_MATCHES reg#0 matches reg#1 00003 RETURN Filter: tcp matches udp dftest: "udp" was unexpected in this context. Filter: tcp matches udp.srcport dftest: "udp.srcport" was unexpected in this context. Filter: tcp matches udp.srcportt dftest: "udp.srcportt" was unexpected in this context. The error message could still be improved.
2021-10-09 15:40:08 +00:00
}
gboolean
dfilter_str_to_gint32(dfwork_t *dfw, const char *s, gint32* pint)
{
char *endptr;
long integer;
errno = 0;
integer = strtol(s, &endptr, 0);
if (errno == EINVAL || endptr == s || *endptr != '\0') {
/* This isn't a valid number. */
dfilter_parse_fail(dfw, "\"%s\" is not a valid number.", s);
return FALSE;
}
if (errno == ERANGE) {
if (integer == LONG_MAX) {
dfilter_parse_fail(dfw, "\"%s\" causes an integer overflow.", s);
}
else if (integer == LONG_MIN) {
dfilter_parse_fail(dfw, "\"%s\" causes an integer underflow.", s);
}
else {
/*
* XXX - can "strtol()" set errno to ERANGE without
* returning LONG_MAX or LONG_MIN?
*/
dfilter_parse_fail(dfw, "\"%s\" is not an integer.", s);
}
return FALSE;
}
if (integer > G_MAXINT32) {
/*
* Fits in a long, but not in a gint32 (a long might be
* 64 bits).
*/
dfilter_parse_fail(dfw, "\"%s\" causes an integer overflow.", s);
return FALSE;
}
if (integer < G_MININT32) {
/*
* Fits in a long, but not in a gint32 (a long might be
* 64 bits).
*/
dfilter_parse_fail(dfw, "\"%s\" causes an integer underflow.", s);
return FALSE;
}
*pint = (gint32)integer;
return TRUE;
}
/*
* Tries to convert an STTYPE_UNPARSED to a STTYPE_FIELD. If it's not registered as
* a field pass UNPARSED to the semantic check.
*/
stnode_t *
dfilter_resolve_unparsed(dfwork_t *dfw, stnode_t *node)
{
const char *name;
header_field_info *hfinfo;
ws_assert(stnode_type_id(node) == STTYPE_UNPARSED);
name = stnode_data(node);
hfinfo = proto_registrar_get_byname(name);
if (hfinfo != NULL) {
/* It's a field name */
stnode_replace(node, STTYPE_FIELD, hfinfo);
return node;
}
hfinfo = proto_registrar_get_byalias(name);
if (hfinfo != NULL) {
/* It's an aliased field name */
add_deprecated_token(dfw, name);
stnode_replace(node, STTYPE_FIELD, hfinfo);
return node;
}
/* It's not a field. */
return node;
}
/* Initialize the dfilter module */
void
dfilter_init(void)
{
if (ParserObj) {
2021-06-14 23:06:02 +00:00
ws_message("I expected ParserObj to be NULL\n");
/* Free the Lemon Parser object */
DfilterFree(ParserObj, g_free);
}
/* Allocate an instance of our Lemon-based parser */
ParserObj = DfilterAlloc(g_malloc);
/* Enable parser tracing by defining AM_CFLAGS
* so that it contains "-DDFTRACE".
*/
#ifdef DFTRACE
/* Trace parser */
DfilterTrace(stdout, "lemon> ");
#endif
/* Initialize the syntax-tree sub-sub-system */
sttype_init();
dfilter_macro_init();
}
/* Clean-up the dfilter module */
void
dfilter_cleanup(void)
{
dfilter_macro_cleanup();
/* Free the Lemon Parser object */
if (ParserObj) {
DfilterFree(ParserObj, g_free);
}
/* Clean up the syntax-tree sub-sub-system */
sttype_cleanup();
}
static dfilter_t*
dfilter_new(GPtrArray *deprecated)
{
dfilter_t *df;
df = g_new0(dfilter_t, 1);
df->insns = NULL;
if (deprecated)
df->deprecated = g_ptr_array_ref(deprecated);
return df;
}
/* Given a GPtrArray of instructions (dfvm_insn_t),
* free them. */
static void
free_insns(GPtrArray *insns)
{
unsigned int i;
dfvm_insn_t *insn;
for (i = 0; i < insns->len; i++) {
insn = (dfvm_insn_t *)g_ptr_array_index(insns, i);
dfvm_insn_free(insn);
}
g_ptr_array_free(insns, TRUE);
}
void
dfilter_free(dfilter_t *df)
{
guint i;
if (!df)
return;
if (df->insns) {
free_insns(df->insns);
}
if (df->consts) {
free_insns(df->consts);
}
g_free(df->interesting_fields);
/* Clear registers with constant values (as set by dfvm_init_const).
* Other registers were cleared on RETURN by free_register_overhead. */
for (i = df->num_registers; i < df->max_registers; i++) {
g_list_free(df->registers[i]);
}
if (df->deprecated)
g_ptr_array_unref(df->deprecated);
g_free(df->registers);
g_free(df->attempted_load);
g_free(df->owns_memory);
g_free(df);
}
static dfwork_t*
dfwork_new(void)
{
dfwork_t *dfw;
dfw = g_new0(dfwork_t, 1);
dfw->first_constant = -1;
return dfw;
}
static void
dfwork_free(dfwork_t *dfw)
{
if (dfw->st_root) {
stnode_free(dfw->st_root);
}
if (dfw->loaded_fields) {
g_hash_table_destroy(dfw->loaded_fields);
}
if (dfw->interesting_fields) {
g_hash_table_destroy(dfw->interesting_fields);
}
if (dfw->insns) {
free_insns(dfw->insns);
}
if (dfw->consts) {
free_insns(dfw->consts);
}
if (dfw->deprecated)
g_ptr_array_unref(dfw->deprecated);
/*
* We don't free the error message string; our caller will return
* it to its caller.
*/
g_free(dfw);
}
const char *tokenstr(int token)
{
switch (token) {
case TOKEN_TEST_AND: return "TEST_AND";
case TOKEN_TEST_OR: return "TEST_OR";
case TOKEN_TEST_EQ: return "TEST_EQ";
case TOKEN_TEST_NE: return "TEST_NE";
case TOKEN_TEST_LT: return "TEST_LT";
case TOKEN_TEST_LE: return "TEST_LE";
case TOKEN_TEST_GT: return "TEST_GT";
case TOKEN_TEST_GE: return "TEST_GE";
case TOKEN_TEST_CONTAINS: return "TEST_CONTAINS";
case TOKEN_TEST_MATCHES: return "TEST_MATCHES";
case TOKEN_TEST_BITWISE_AND: return "TEST_BITWISE_AND";
case TOKEN_TEST_NOT: return "TEST_NOT";
case TOKEN_STRING: return "STRING";
case TOKEN_CHARCONST: return "CHARCONST";
case TOKEN_UNPARSED: return "UNPARSED";
case TOKEN_LBRACKET: return "LBRACKET";
case TOKEN_RBRACKET: return "RBRACKET";
case TOKEN_COMMA: return "COMMA";
case TOKEN_INTEGER: return "INTEGER";
case TOKEN_COLON: return "COLON";
case TOKEN_HYPHEN: return "HYPHEN";
case TOKEN_TEST_IN: return "TEST_IN";
case TOKEN_LBRACE: return "LBRACE";
case TOKEN_RBRACE: return "RBRACE";
case TOKEN_WHITESPACE: return "WHITESPACE";
case TOKEN_DOTDOT: return "DOTDOT";
case TOKEN_LPAREN: return "LPAREN";
case TOKEN_RPAREN: return "RPAREN";
default: return "<unknown>";
}
ws_assert_not_reached();
}
void
add_deprecated_token(dfwork_t *dfw, const char *token)
{
if (dfw->deprecated == NULL)
dfw->deprecated = g_ptr_array_new_full(0, g_free);
GPtrArray *deprecated = dfw->deprecated;
for (guint i = 0; i < deprecated->len; i++) {
const char *str = g_ptr_array_index(deprecated, i);
if (g_ascii_strcasecmp(token, str) == 0) {
/* It's already in our list */
return;
}
}
g_ptr_array_add(deprecated, g_strdup(token));
}
gboolean
dfilter_compile(const gchar *text, dfilter_t **dfp, gchar **err_msg)
{
gchar *expanded_text;
int token;
dfilter_t *dfilter;
dfwork_t *dfw;
df_scanner_state_t state;
yyscan_t scanner;
YY_BUFFER_STATE in_buffer;
gboolean failure = FALSE;
unsigned token_count = 0;
2021-06-18 18:21:42 +00:00
ws_assert(dfp);
if (!text) {
*dfp = NULL;
if (err_msg != NULL)
*err_msg = g_strdup("BUG: NULL text pointer passed to dfilter_compile()");
return FALSE;
}
if ( !( expanded_text = dfilter_macro_apply(text, err_msg) ) ) {
*dfp = NULL;
return FALSE;
}
if (df_lex_init(&scanner) != 0) {
wmem_free(NULL, expanded_text);
*dfp = NULL;
if (err_msg != NULL)
*err_msg = g_strdup_printf("Can't initialize scanner: %s",
g_strerror(errno));
return FALSE;
}
in_buffer = df__scan_string(expanded_text, scanner);
dfw = dfwork_new();
state.dfw = dfw;
state.quoted_string = NULL;
state.in_set = FALSE;
state.raw_string = FALSE;
df_set_extra(&state, scanner);
while (1) {
df_lval = stnode_new(STTYPE_UNINITIALIZED, NULL, NULL);
token = df_lex(scanner);
/* Check for scanner failure */
if (token == SCAN_FAILED) {
failure = TRUE;
break;
}
/* Check for end-of-input */
if (token == 0) {
break;
}
2021-10-15 11:23:43 +00:00
ws_debug("(%u) Token %d %s %s",
++token_count, token, tokenstr(token),
stnode_token_value(df_lval));
/* Give the token to the parser */
Dfilter(ParserObj, token, df_lval, dfw);
/* We've used the stnode_t, so we don't want to free it */
df_lval = NULL;
if (dfw->syntax_error) {
failure = TRUE;
break;
}
} /* while (1) */
/* If we created an stnode_t but didn't use it, free it; the
* parser doesn't know about it and won't free it for us. */
if (df_lval) {
stnode_free(df_lval);
df_lval = NULL;
}
/* Tell the parser that we have reached the end of input; that
* way, it'll reset its state for the next compile. (We want
* to do that even if we got a syntax error, to make sure the
* parser state is cleaned up; we don't create a new parser
* object when we start a new parse, and don't destroy it when
* the parse finishes.) */
Dfilter(ParserObj, 0, NULL, dfw);
/* One last check for syntax error (after EOF) */
if (dfw->syntax_error)
failure = TRUE;
/* Free scanner state */
if (state.quoted_string != NULL)
g_string_free(state.quoted_string, TRUE);
df__delete_buffer(in_buffer, scanner);
df_lex_destroy(scanner);
if (failure)
goto FAILURE;
/* Success, but was it an empty filter? If so, discard
* it and set *dfp to NULL */
if (dfw->st_root == NULL) {
*dfp = NULL;
}
else {
log_syntax_tree(LOG_LEVEL_NOISY, dfw->st_root, "Syntax tree before semantic check");
/* Check semantics and do necessary type conversion*/
if (!dfw_semcheck(dfw)) {
goto FAILURE;
}
log_syntax_tree(LOG_LEVEL_NOISY, dfw->st_root, "Syntax tree after successful semantic check");
/* Create bytecode */
dfw_gencode(dfw);
/* Tuck away the bytecode in the dfilter_t */
dfilter = dfilter_new(dfw->deprecated);
dfilter->insns = dfw->insns;
dfilter->consts = dfw->consts;
dfw->insns = NULL;
dfw->consts = NULL;
dfilter->interesting_fields = dfw_interesting_fields(dfw,
&dfilter->num_interesting_fields);
/* Initialize run-time space */
dfilter->num_registers = dfw->first_constant;
dfilter->max_registers = dfw->next_register;
dfilter->registers = g_new0(GList*, dfilter->max_registers);
dfilter->attempted_load = g_new0(gboolean, dfilter->max_registers);
dfilter->owns_memory = g_new0(gboolean, dfilter->max_registers);
/* Initialize constants */
dfvm_init_const(dfilter);
/* And give it to the user. */
*dfp = dfilter;
}
/* SUCCESS */
global_dfw = NULL;
dfwork_free(dfw);
wmem_free(NULL, expanded_text);
return TRUE;
FAILURE:
if (dfw) {
if (err_msg != NULL)
*err_msg = dfw->error_message;
else
g_free(dfw->error_message);
global_dfw = NULL;
dfwork_free(dfw);
}
if (err_msg != NULL) {
/*
* Default error message.
*
* XXX - we should really make sure that this is never the
* case for any error.
*/
if (*err_msg == NULL)
*err_msg = g_strdup_printf("Unable to parse filter string \"%s\".", expanded_text);
}
wmem_free(NULL, expanded_text);
*dfp = NULL;
return FALSE;
}
gboolean
dfilter_apply(dfilter_t *df, proto_tree *tree)
{
return dfvm_apply(df, tree);
}
gboolean
dfilter_apply_edt(dfilter_t *df, epan_dissect_t* edt)
{
return dfvm_apply(df, edt->tree);
}
void
dfilter_prime_proto_tree(const dfilter_t *df, proto_tree *tree)
{
int i;
for (i = 0; i < df->num_interesting_fields; i++) {
proto_tree_prime_with_hfid(tree, df->interesting_fields[i]);
}
}
gboolean
dfilter_has_interesting_fields(const dfilter_t *df)
{
return (df->num_interesting_fields > 0);
}
GPtrArray *
dfilter_deprecated_tokens(dfilter_t *df) {
if (df->deprecated && df->deprecated->len > 0) {
return df->deprecated;
}
return NULL;
}
void
dfilter_dump(dfilter_t *df)
{
guint i;
const gchar *sep = "";
dfvm_dump(stdout, df);
if (df->deprecated && df->deprecated->len) {
printf("\nDeprecated tokens: ");
for (i = 0; i < df->deprecated->len; i++) {
printf("%s\"%s\"", sep, (char *) g_ptr_array_index(df->deprecated, i));
sep = ", ";
}
printf("\n");
}
}
/*
* Editor modelines - https://www.wireshark.org/tools/modelines.html
*
* Local variables:
* c-basic-offset: 8
* tab-width: 8
* indent-tabs-mode: t
* End:
*
* vi: set shiftwidth=8 tabstop=8 noexpandtab:
* :indentSize=8:tabSize=8:noTabs=false:
*/