dfilter: Add support for raw (bytes) addressing mode

This adds new syntax to read a field from the tree as bytes, instead
of the actual type. This is a useful extension for example to match
matformed strings that contain unicode replacement characters. In
this case it is not possible to match the raw value of the malformed
string field. This extension fills this need and is generic enough
that it should be useful in many other situations.

The syntax used is to prefix the field name with "@". The following
artificial example tests if the HTTP user agent contains a particular
invalid UTF-8 sequence:

    @http.user_agent == "Mozill\xAA"

Where simply using "http.user_agent" won't work because the invalid byte
sequence will have been replaced with U+FFFD.

Considering the following programs:

    $ dftest '_ws.ftypes.string == "ABC"'
    Filter: _ws.ftypes.string == "ABC"

    Syntax tree:
     0 TEST_ANY_EQ:
       1 FIELD(_ws.ftypes.string <FT_STRING>)
       1 FVALUE("ABC" <FT_STRING>)

    Instructions:
    00000 READ_TREE		_ws.ftypes.string <FT_STRING> -> reg#0
    00001 IF_FALSE_GOTO	3
    00002 ANY_EQ		reg#0 == "ABC" <FT_STRING>
    00003 RETURN

    $ dftest '@_ws.ftypes.string == "ABC"'
    Filter: @_ws.ftypes.string == "ABC"

    Syntax tree:
     0 TEST_ANY_EQ:
       1 FIELD(_ws.ftypes.string <RAW>)
       1 FVALUE(41:42:43 <FT_BYTES>)

    Instructions:
    00000 READ_TREE		@_ws.ftypes.string <FT_BYTES> -> reg#0
    00001 IF_FALSE_GOTO	3
    00002 ANY_EQ		reg#0 == 41:42:43 <FT_BYTES>
    00003 RETURN

In the second case the field has a "raw" type, that equates directly to
FT_BYTES, and the field value is read from the protocol raw data.
This commit is contained in:
João Valverde 2022-10-25 04:20:18 +01:00
parent 31a0147daa
commit 0853ddd1cb
14 changed files with 201 additions and 26 deletions

View File

@ -388,6 +388,26 @@ For more complicated ranges the same syntax used with slices is valid:
means layers number 2, 3 or 4 inclusive. The hash symbol is required to
distinguish a layer range from a slice.
== The at operator
By prefixing the field name with an at sign (@) the comparison is done against
the raw packet data for the field.
A character string must be decoded from a source encoding during dissection.
If there are decoding errors the resulting string will usually contain
replacement characters:
[subs="replacements"]
----
browser.comment == "string is &#xFFFD;&#xFFFD;&#xFFFD;&#xFFFD;"
----
The at operator allows testing the raw undecoded data:
@browser.comment == 73:74:72:69:6e:67:20:69:73:20:aa:aa:aa:aa
The syntactical rules for a bytes field type apply to the second example.
=== The membership operator
A field may be checked for matches against a set of values simply with the

View File

@ -21,6 +21,8 @@ It is used for troubleshooting, analysis, development and education.
Wireshark is now better about generating valid UTF-8 output.
A new display filter feature for filtering raw bytes has been added.
Many other improvements have been made.
See the “New and Updated Features” section below for more details.
@ -46,6 +48,9 @@ The following features are new (or have been significantly updated) since versio
* Wireshark now builds with Qt6 by default. To use Qt5 instead pass USE_qt6=OFF to CMake.
* It is now possible to filter on raw packet data for any field by using the syntax ``@some.field == <bytes...>``.
This can be useful to filter on malformed UTF-8 strings, among other use cases where it is necessary to
look at the field's raw data.
=== Removed Features and Support

View File

@ -46,6 +46,7 @@ typedef struct {
gchar *error_message;
GPtrArray *insns;
GHashTable *loaded_fields;
GHashTable *loaded_raw_fields;
GHashTable *interesting_fields;
int next_insn_id;
int next_register;

View File

@ -249,6 +249,10 @@ dfwork_free(dfwork_t *dfw)
g_hash_table_destroy(dfw->loaded_fields);
}
if (dfw->loaded_raw_fields) {
g_hash_table_destroy(dfw->loaded_raw_fields);
}
if (dfw->interesting_fields) {
g_hash_table_destroy(dfw->interesting_fields);
}
@ -311,6 +315,7 @@ const char *tokenstr(int token)
case TOKEN_LPAREN: return "LPAREN";
case TOKEN_RPAREN: return "RPAREN";
case TOKEN_DOLLAR: return "DOLLAR";
case TOKEN_ATSIGN: return "ATSIGN";
}
return "<unknown>";
}

View File

@ -155,9 +155,14 @@ dfvm_value_new_fvalue(fvalue_t *fv)
}
dfvm_value_t*
dfvm_value_new_hfinfo(header_field_info *hfinfo)
dfvm_value_new_hfinfo(header_field_info *hfinfo, gboolean raw)
{
dfvm_value_t *v = dfvm_value_new(HFINFO);
dfvm_value_t *v;
if (raw)
v = dfvm_value_new(RAW_HFINFO);
else
v = dfvm_value_new(HFINFO);
v->value.hfinfo = hfinfo;
return v;
}
@ -216,6 +221,10 @@ dfvm_value_tostr(dfvm_value_t *v)
v->value.hfinfo->abbrev,
ftype_name(v->value.hfinfo->type));
break;
case RAW_HFINFO:
s = ws_strdup_printf("@%s <FT_BYTES>",
v->value.hfinfo->abbrev);
break;
case FVALUE:
aux = fvalue_to_debug_repr(NULL, v->value.fvalue);
s = ws_strdup_printf("%s <%s>",
@ -554,11 +563,43 @@ drange_contains_layer(drange_t *dr, int num, int length)
return FALSE;
}
static fvalue_t *
get_raw_fvalue(field_info *fi)
{
GByteArray *bytes;
fvalue_t *fv;
int length, tvb_length;
/*
* XXX - a field can have a length that runs past
* the end of the tvbuff. Ideally, that should
* be fixed when adding an item to the protocol
* tree, but checking the length when doing
* that could be expensive. Until we fix that,
* we'll do the check here.
*/
tvb_length = tvb_captured_length_remaining(fi->ds_tvb, fi->start);
if (tvb_length < 0) {
return NULL;
}
length = fi->length;
if (length > tvb_length)
length = tvb_length;
bytes = g_byte_array_new();
g_byte_array_append(bytes, tvb_get_ptr(fi->ds_tvb, fi->start, length), length);
fv = fvalue_new(FT_BYTES);
fvalue_set_byte_array(fv, bytes);
return fv;
}
static GSList *
filter_finfo_fvalues(GSList *fvalues, GPtrArray *finfos, drange_t *range)
filter_finfo_fvalues(GSList *fvalues, GPtrArray *finfos, drange_t *range, gboolean raw)
{
int length; /* maximum proto layer number. The numbers are sequential. */
field_info *last_finfo, *finfo;
fvalue_t *fv;
int cookie = -1;
gboolean cookie_matches = false;
int layer;
@ -572,14 +613,22 @@ filter_finfo_fvalues(GSList *fvalues, GPtrArray *finfos, drange_t *range)
layer = finfo->proto_layer_num;
if (cookie == layer) {
if (cookie_matches) {
fvalues = g_slist_prepend(fvalues, &finfo->value);
if (raw)
fv = get_raw_fvalue(finfo);
else
fv = &finfo->value;
fvalues = g_slist_prepend(fvalues, fv);
}
}
else {
cookie = layer;
cookie_matches = drange_contains_layer(range, layer, length);
if (cookie_matches) {
fvalues = g_slist_prepend(fvalues, &finfo->value);
if (raw)
fv = get_raw_fvalue(finfo);
else
fv = &finfo->value;
fvalues = g_slist_prepend(fvalues, fv);
}
}
}
@ -597,9 +646,13 @@ read_tree(dfilter_t *df, proto_tree *tree,
field_info *finfo;
int i, len;
GSList *fvalues = NULL;
fvalue_t *fv;
drange_t *range = NULL;
gboolean raw;
header_field_info *hfinfo = arg1->value.hfinfo;
raw = arg1->type == RAW_HFINFO;
int reg = arg2->value.numeric;
if (arg3) {
@ -626,13 +679,17 @@ read_tree(dfilter_t *df, proto_tree *tree,
}
if (range) {
fvalues = filter_finfo_fvalues(fvalues, finfos, range);
fvalues = filter_finfo_fvalues(fvalues, finfos, range, raw);
}
else {
len = finfos->len;
for (i = 0; i < len; i++) {
finfo = g_ptr_array_index(finfos, i);
fvalues = g_slist_prepend(fvalues, &finfo->value);
if (raw)
fv = get_raw_fvalue(finfo);
else
fv = &finfo->value;
fvalues = g_slist_prepend(fvalues, fv);
}
}
@ -644,8 +701,13 @@ read_tree(dfilter_t *df, proto_tree *tree,
}
df->registers[reg] = fvalues;
// These values are referenced only, do not try to free it later.
df->free_registers[reg] = NULL;
if (raw) {
df->free_registers[reg] = (GDestroyNotify)fvalue_free;
}
else {
// These values are referenced only, do not try to free it later.
df->free_registers[reg] = NULL;
}
return TRUE;
}
@ -1261,7 +1323,7 @@ check_exists(proto_tree *tree, dfvm_value_t *arg1, dfvm_value_t *arg2)
return TRUE;
}
fvalues = filter_finfo_fvalues(NULL, finfos, range);
fvalues = filter_finfo_fvalues(NULL, finfos, range, FALSE);
exists = (fvalues != NULL);
g_slist_free(fvalues);
if (exists) {

View File

@ -21,6 +21,7 @@ typedef enum {
EMPTY,
FVALUE,
HFINFO,
RAW_HFINFO,
INSN_NUMBER,
REGISTER,
INTEGER,
@ -121,7 +122,7 @@ dfvm_value_t*
dfvm_value_new_fvalue(fvalue_t *fv);
dfvm_value_t*
dfvm_value_new_hfinfo(header_field_info *hfinfo);
dfvm_value_new_hfinfo(header_field_info *hfinfo, gboolean raw);
dfvm_value_t*
dfvm_value_new_register(int reg);

View File

@ -131,12 +131,14 @@ dfw_append_jump(dfwork_t *dfw)
/* returns register number */
static dfvm_value_t *
dfw_append_read_tree(dfwork_t *dfw, header_field_info *hfinfo,
drange_t *range)
drange_t *range,
gboolean raw)
{
dfvm_insn_t *insn;
int reg = -1;
dfvm_value_t *reg_val, *val1, *val3;
gboolean added_new_hfinfo = FALSE;
GHashTable *loaded_fields;
void *loaded_key;
/* Rewind to find the first field of this name. */
@ -144,11 +146,16 @@ dfw_append_read_tree(dfwork_t *dfw, header_field_info *hfinfo,
hfinfo = proto_registrar_get_nth(hfinfo->same_name_prev_id);
}
if (raw)
loaded_fields = dfw->loaded_raw_fields;
else
loaded_fields = dfw->loaded_fields;
/* Keep track of which registers
* were used for which hfinfo's so that we
* can re-use registers. */
/* Re-use only if we are not using a range (layer filter). */
loaded_key = g_hash_table_lookup(dfw->loaded_fields, hfinfo);
loaded_key = g_hash_table_lookup(loaded_fields, hfinfo);
if (loaded_key != NULL) {
if (range == NULL) {
/*
@ -165,13 +172,13 @@ dfw_append_read_tree(dfwork_t *dfw, header_field_info *hfinfo,
}
else {
reg = dfw->next_register++;
g_hash_table_insert(dfw->loaded_fields,
g_hash_table_insert(loaded_fields,
hfinfo, GINT_TO_POINTER(reg + 1));
added_new_hfinfo = TRUE;
}
val1 = dfvm_value_new_hfinfo(hfinfo);
val1 = dfvm_value_new_hfinfo(hfinfo, raw);
reg_val = dfvm_value_new_register(reg);
if (range) {
val3 = dfvm_value_new_drange(range);
@ -200,7 +207,8 @@ dfw_append_read_tree(dfwork_t *dfw, header_field_info *hfinfo,
/* returns register number */
static dfvm_value_t *
dfw_append_read_reference(dfwork_t *dfw, header_field_info *hfinfo,
drange_t *range)
drange_t *range,
gboolean raw)
{
dfvm_insn_t *insn;
dfvm_value_t *reg_val, *val1, *val3;
@ -213,7 +221,7 @@ dfw_append_read_reference(dfwork_t *dfw, header_field_info *hfinfo,
/* We can't reuse registers with a filter so just skip
* that optimization and don't reuse them at all. */
val1 = dfvm_value_new_hfinfo(hfinfo);
val1 = dfvm_value_new_hfinfo(hfinfo, raw);
reg_val = dfvm_value_new_register(dfw->next_register++);
if (range) {
val3 = dfvm_value_new_drange(range);
@ -537,18 +545,21 @@ gen_entity(dfwork_t *dfw, stnode_t *st_arg, GSList **jumps_ptr)
dfvm_value_t *val;
header_field_info *hfinfo;
drange_t *range = NULL;
gboolean raw;
e_type = stnode_type_id(st_arg);
if (e_type == STTYPE_FIELD) {
hfinfo = sttype_field_hfinfo(st_arg);
range = sttype_field_drange_steal(st_arg);
val = dfw_append_read_tree(dfw, hfinfo, range);
raw = sttype_field_raw(st_arg);
val = dfw_append_read_tree(dfw, hfinfo, range, raw);
*jumps_ptr = g_slist_prepend(*jumps_ptr, dfw_append_jump(dfw));
}
else if (e_type == STTYPE_REFERENCE) {
hfinfo = sttype_field_hfinfo(st_arg);
range = sttype_field_drange_steal(st_arg);
val = dfw_append_read_reference(dfw, hfinfo, range);
raw = sttype_field_raw(st_arg);
val = dfw_append_read_reference(dfw, hfinfo, range, raw);
*jumps_ptr = g_slist_prepend(*jumps_ptr, dfw_append_jump(dfw));
}
else if (e_type == STTYPE_FVALUE) {
@ -589,7 +600,8 @@ gen_exists(dfwork_t *dfw, stnode_t *st_node)
hfinfo = proto_registrar_get_nth(hfinfo->same_name_prev_id);
}
val1 = dfvm_value_new_hfinfo(hfinfo);
/* Ignore "rawness" for existence tests. */
val1 = dfvm_value_new_hfinfo(hfinfo, FALSE);
if (range) {
val2 = dfvm_value_new_drange(range);
}
@ -827,6 +839,7 @@ dfw_gencode(dfwork_t *dfw)
{
dfw->insns = g_ptr_array_new();
dfw->loaded_fields = g_hash_table_new(g_direct_hash, g_direct_equal);
dfw->loaded_raw_fields = g_hash_table_new(g_direct_hash, g_direct_equal);
dfw->interesting_fields = g_hash_table_new(g_int_hash, g_int_equal);
gencode(dfw, dfw->st_root);
dfw_append_insn(dfw, dfvm_insn_new(DFVM_RETURN));

View File

@ -139,19 +139,19 @@ layer(R) ::= HASH INTEGER(N).
R = g_slist_append(NULL, range);
}
field(R) ::= FIELD(F).
layered_field(R) ::= FIELD(F).
{
R = F;
}
field(R) ::= FIELD(F) layer(L).
layered_field(R) ::= FIELD(F) layer(L).
{
R = F;
sttype_field_set_range(R, L);
g_slist_free(L);
}
field(R) ::= UNPARSED(U) layer(L).
layered_field(R) ::= UNPARSED(U) layer(L).
{
header_field_info *hfinfo = dfilter_resolve_unparsed(dfw, stnode_token(U));
if (hfinfo == NULL) {
@ -163,6 +163,30 @@ field(R) ::= UNPARSED(U) layer(L).
g_slist_free(L);
}
field(R) ::= layered_field(F).
{
R = F;
}
field(R) ::= ATSIGN layered_field(F).
{
R = F;
sttype_field_set_raw(R, TRUE);
}
field(R) ::= ATSIGN UNPARSED(U).
{
const char *token = stnode_token(U);
const stloc_t *loc = stnode_location(U);
header_field_info *hfinfo = dfilter_resolve_unparsed(dfw, token);
if (hfinfo == NULL) {
FAIL(dfw, U, "%s is not a valid field", stnode_token(U));
}
R = stnode_new(STTYPE_FIELD, hfinfo, g_strdup(token), loc);
sttype_field_set_raw(R, TRUE);
stnode_free(U);
}
reference(R) ::= DOLLAR LBRACE field(F) RBRACE.
{
/* convert field to reference */

View File

@ -154,6 +154,7 @@ hyphen-bytes {hex2}(-{hex2})+
".." return simple(TOKEN_DOTDOT);
"}" return simple(TOKEN_RBRACE);
"$" return simple(TOKEN_DOLLAR);
"@" return simple(TOKEN_ATSIGN);
"any" return simple(TOKEN_ANY);
"all" return simple(TOKEN_ALL);

View File

@ -599,7 +599,7 @@ check_relation_LHS_FIELD(dfwork_t *dfw, stnode_op_t st_op,
ws_assert(stnode_type_id(st_arg1) == STTYPE_FIELD ||
stnode_type_id(st_arg1) == STTYPE_REFERENCE);
hfinfo1 = sttype_field_hfinfo(st_arg1);
ftype1 = hfinfo1->type;
ftype1 = sttype_field_ftenum(st_arg1);
if (!can_func(ftype1)) {
FAIL(dfw, st_arg1, "%s (type=%s) cannot participate in %s comparison.",

View File

@ -23,6 +23,7 @@ typedef struct {
guint32 magic;
header_field_info *hfinfo;
drange_t *drange;
gboolean raw;
} field_t;
#define FIELD_MAGIC 0xfc2002cf
@ -36,6 +37,7 @@ field_new(gpointer hfinfo)
field->magic = FIELD_MAGIC;
field->hfinfo = hfinfo;
field->drange = NULL;
field->raw = FALSE;
return field;
}
@ -50,6 +52,7 @@ field_dup(gconstpointer data)
field = field_new(NULL);
field->hfinfo = org->hfinfo;
field->drange = drange_dup(org->drange);
field->raw = org->raw;
return field;
}
@ -76,12 +79,12 @@ field_tostr(const void *data, gboolean pretty _U_)
repr = ws_strdup_printf("%s#[%s] <%s>",
field->hfinfo->abbrev,
drange_str,
ftype_name(field->hfinfo->type));
field->raw ? "RAW" : ftype_name(field->hfinfo->type));
g_free(drange_str);
}
else {
repr = ws_strdup_printf("%s <%s>", field->hfinfo->abbrev,
ftype_name(field->hfinfo->type));
field->raw ? "RAW" : ftype_name(field->hfinfo->type));
}
return repr;
@ -100,6 +103,8 @@ sttype_field_ftenum(stnode_t *node)
{
field_t *field = node->data;
ws_assert_magic(field, FIELD_MAGIC);
if (field->raw)
return FT_BYTES;
return field->hfinfo->type;
}
@ -111,6 +116,14 @@ sttype_field_drange(stnode_t *node)
return field->drange;
}
gboolean
sttype_field_raw(stnode_t *node)
{
field_t *field = node->data;
ws_assert_magic(field, FIELD_MAGIC);
return field->raw;
}
drange_t *
sttype_field_drange_steal(stnode_t *node)
{
@ -152,6 +165,14 @@ sttype_field_set_drange(stnode_t *node, drange_t *dr)
field->drange = dr;
}
void
sttype_field_set_raw(stnode_t *node, gboolean raw)
{
field_t *field = stnode_data(node);
ws_assert_magic(field, FIELD_MAGIC);
field->raw = raw;
}
char *
sttype_field_set_number(stnode_t *node, const char *number_str)
{

View File

@ -27,6 +27,9 @@ sttype_field_drange(stnode_t *node);
drange_t *
sttype_field_drange_steal(stnode_t *node);
gboolean
sttype_field_raw(stnode_t *node);
/* Set a range */
void
sttype_field_set_range(stnode_t *node, GSList* drange_list);
@ -37,6 +40,9 @@ sttype_field_set_range1(stnode_t *node, drange_node *rn);
void
sttype_field_set_drange(stnode_t *node, drange_t *dr);
void
sttype_field_set_raw(stnode_t *node, gboolean raw);
char *
sttype_field_set_number(stnode_t *node, const char *number_str);

Binary file not shown.

View File

@ -332,3 +332,19 @@ class case_quantifiers(unittest.TestCase):
def test_all_1(self, checkDFilterCount):
dfilter = 'all ip.addr > 1.1.1.1'
checkDFilterCount(dfilter, 1)
@fixtures.uses_fixtures
class case_raw_modifier(unittest.TestCase):
trace_file = "s7comm-fuzz.pcapng.gz"
def test_regular(self, checkDFilterCount):
dfilter = 's7comm.blockinfo.blocktype == "0\uFFFD"'
checkDFilterCount(dfilter, 3)
def test_raw1(self, checkDFilterCount):
dfilter = '@s7comm.blockinfo.blocktype == 30:aa'
checkDFilterCount(dfilter, 2)
def test_raw2(self, checkDFilterCount):
dfilter = '@s7comm.blockinfo.blocktype == 30:fe'
checkDFilterCount(dfilter, 1)