From ee35570e90434289d76fb492c694999f7464f42d Mon Sep 17 00:00:00 2001 From: Guy Harris Date: Fri, 7 Jun 2019 12:16:03 -0700 Subject: [PATCH] Improve handling of binary data that *might* be text. Add a BASE_SHOW_ASCII_PRINTABLE flag for the "display" field, to use with FT_BYTES and FT_UINT_BYTES fields; it specifies that, if the field consists solely of printable ASCII characters, its value be displayed as a string, in quotes. Have a routine hfinfo_format_bytes() to do that formatting, depending on the display field value. Add routines to fetch the display value of string and FT_BYTES/FT_UINT_BYTES fields; for strings, it's the result of hfinfo_format_text(), and for byte arrays, it's the result of hfinfo_format_bytes(). Use BASE_SHOW_ASCII_PRINTABLE for extended attribute data in SMB and SMB2. Use the routines in question for extended attribute names (string) and data (bytes). That keeps us from displaying non-text extended attribute data as if it were text. Document BASE_SHOW_ASCII_PRINTABLE. Change-Id: I24dcf459c14f00985e4daaf9b58f5933964eabd8 Reviewed-on: https://code.wireshark.org/review/33517 Petri-Dish: Guy Harris Tested-by: Petri Dish Buildbot Reviewed-by: Guy Harris --- debian/libwireshark0.symbols | 2 + doc/README.dissector | 26 +++-- epan/dissectors/packet-smb.c | 12 +-- epan/dissectors/packet-smb2.c | 37 +++---- epan/proto.c | 189 +++++++++++++++++++++------------- epan/proto.h | 46 +++++++-- 6 files changed, 191 insertions(+), 121 deletions(-) diff --git a/debian/libwireshark0.symbols b/debian/libwireshark0.symbols index 5cd7807e60..4eb38930d6 100644 --- a/debian/libwireshark0.symbols +++ b/debian/libwireshark0.symbols @@ -1092,6 +1092,7 @@ libwireshark.so.0 libwireshark0 #MINVER# process_stat_cmd_arg@Base 1.9.1 proto_all_finfos@Base 1.9.1 proto_add_deregistered_data@Base 1.12.2 + proto_bytes_item_get_display_string@Base 3.1.0 proto_can_match_selected@Base 1.9.1 proto_can_toggle_protocol@Base 1.9.1 proto_check_field_name@Base 1.9.1 @@ -1179,6 +1180,7 @@ libwireshark.so.0 libwireshark0 #MINVER# proto_report_dissector_bug@Base 1.12.0~rc1 proto_set_cant_toggle@Base 1.9.1 proto_set_decoding@Base 1.9.1 + proto_string_item_get_display_string@Base 3.1.0 proto_tracking_interesting_fields@Base 1.9.1 proto_tree_add_ascii_7bits_item@Base 1.12.0~rc1 proto_tree_add_bitmask@Base 1.9.1 diff --git a/doc/README.dissector b/doc/README.dissector index 68515603cc..53021b21a8 100644 --- a/doc/README.dissector +++ b/doc/README.dissector @@ -114,8 +114,8 @@ FIELDDISPLAY --For FT_UINT{8,16,24,32,40,48,56,64} and BASE_DEC, BASE_HEX, BASE_OCT, BASE_DEC_HEX, BASE_HEX_DEC, BASE_CUSTOM, or BASE_NONE, possibly ORed with BASE_RANGE_STRING, BASE_EXT_STRING, BASE_VAL64_STRING, - BASE_ALLOW_ZERO, BASE_UNIT_STRING, BASE_SPECIAL_VALS or - BASE_NO_DISPLAY_VALUE + BASE_ALLOW_ZERO, BASE_UNIT_STRING, BASE_SPECIAL_VALS, + BASE_NO_DISPLAY_VALUE, or BASE_SHOW_ASCII_PRINTABLE BASE_NONE may be used with a non-NULL FIELDCONVERT when the numeric value of the field itself is not of significance to @@ -125,9 +125,9 @@ FIELDDISPLAY --For FT_UINT{8,16,24,32,40,48,56,64} and filters for the field in question. BASE_NO_DISPLAY_VALUE will just display the field name with - no value. It is intended for byte arrays (FT_BYTES) or - header fields above a subtree. The value will still be - filterable, just not displayed. + no value. It is intended for byte arrays (FT_BYTES or + FT_UINT_BYTES) or header fields above a subtree. The + value will still be filterable, just not displayed. --For FT_UINT16: @@ -161,12 +161,20 @@ FIELDDISPLAY --For FT_UINT{8,16,24,32,40,48,56,64} and STR_ASCII or STR_UNICODE - --For FT_BYTES: + --For FT_BYTES and FT_UINT_BYTES: SEP_DOT, SEP_DASH, SEP_COLON, or SEP_SPACE to provide - a separator between bytes. - BASE_NONE has no separator between bytes - BASE_ALLOW_ZERO displays instead of for zero sized byte array + a separator between bytes; BASE_NONE has no separator + between bytes. These can be ORed with BASE_ALLOW_ZERO + and BASE_SHOW_ASCII_PRINTABLE. + + BASE_ALLOW_ZERO displays instead of + for a zero-sized byte array. + BASE_SHOW_ASCII_PRINTABLE will check whether the + field's value consists entirely of printable ASCII + characters and, if so, will display the field's value + as a string, in quotes. The value will still be + filterable as a byte value. --For FT_IPv4: diff --git a/epan/dissectors/packet-smb.c b/epan/dissectors/packet-smb.c index 659c1931ab..bbc5caf2e2 100644 --- a/epan/dissectors/packet-smb.c +++ b/epan/dissectors/packet-smb.c @@ -12275,10 +12275,9 @@ dissect_4_2_16_2(tvbuff_t *tvb, packet_info *pinfo _U_, proto_tree *tree, COUNT_BYTES_SUBR(4); while (*bcp > 0) { - proto_item *item; + proto_item *item, *ti; proto_tree *subtree; int start_offset = offset; - guint8 *name; subtree = proto_tree_add_subtree( tree, tvb, offset, 0, ett_smb_ea, &item, "Extended Attribute"); @@ -12310,13 +12309,12 @@ dissect_4_2_16_2(tvbuff_t *tvb, packet_info *pinfo _U_, proto_tree *tree, /* EA name */ - name = tvb_get_string_enc(wmem_packet_scope(), tvb, offset, name_len, ENC_ASCII); - proto_item_append_text(item, ": %s", format_text(wmem_packet_scope(), name, strlen(name))); - CHECK_BYTE_COUNT_SUBR(name_len + 1); - proto_tree_add_item( + ti = proto_tree_add_item( subtree, hf_smb_ea_name, tvb, offset, name_len + 1, ENC_ASCII|ENC_NA); + proto_item_append_text(item, ": %s", + proto_string_item_get_display_string(wmem_packet_scope(), ti)); COUNT_BYTES_SUBR(name_len + 1); /* EA data */ @@ -19664,7 +19662,7 @@ proto_register_smb(void) NULL, 0, NULL, HFILL }}, { &hf_smb_ea_data, - { "EA Data", "smb.ea.data", FT_BYTES, BASE_NONE, + { "EA Data", "smb.ea.data", FT_BYTES, BASE_NONE|BASE_SHOW_ASCII_PRINTABLE, NULL, 0, NULL, HFILL }}, { &hf_smb_file_name_len, diff --git a/epan/dissectors/packet-smb2.c b/epan/dissectors/packet-smb2.c index b01a405983..be29a50233 100644 --- a/epan/dissectors/packet-smb2.c +++ b/epan/dissectors/packet-smb2.c @@ -2684,10 +2684,8 @@ dissect_smb2_file_full_ea_info(tvbuff_t *tvb, packet_info *pinfo _U_, proto_tree } while (1) { - int length; const char *name = ""; const char *data = ""; - guint16 bc; int start_offset = offset; proto_item *ea_item; proto_tree *ea_tree; @@ -2714,31 +2712,26 @@ dissect_smb2_file_full_ea_info(tvbuff_t *tvb, packet_info *pinfo _U_, proto_tree offset += 2; /* ea name */ - length = ea_name_len; - if (length) { - bc = tvb_captured_length_remaining(tvb, offset); - name = get_unicode_or_ascii_string(tvb, &offset, - FALSE, &length, TRUE, TRUE, &bc); - if (name) { - proto_tree_add_string(ea_tree, hf_smb2_ea_name, tvb, - offset, length + 1, name); - } + if (ea_name_len) { + proto_item *name_item; + + name_item = proto_tree_add_item(ea_tree, hf_smb2_ea_name, tvb, + offset, ea_name_len, ENC_ASCII|ENC_NA); + name = proto_string_item_get_display_string(wmem_packet_scope(), + name_item); } /* The name is terminated with a NULL */ offset += ea_name_len + 1; /* ea data */ - length = ea_data_len; - if (length) { - bc = tvb_captured_length_remaining(tvb, offset); - data = get_unicode_or_ascii_string(tvb, &offset, - FALSE, &length, TRUE, TRUE, &bc); - /* - * We put the data here ... - */ - proto_tree_add_item(ea_tree, hf_smb2_ea_data, tvb, - offset, length, ENC_NA); + if (ea_data_len) { + proto_item *data_item; + + data_item = proto_tree_add_item(ea_tree, hf_smb2_ea_data, + tvb, offset, ea_data_len, ENC_NA); + data = proto_bytes_item_get_display_string(wmem_packet_scope(), + data_item); } offset += ea_data_len; @@ -10763,7 +10756,7 @@ proto_register_smb2(void) }, { &hf_smb2_ea_data, - { "EA Data", "smb2.ea.data", FT_BYTES, BASE_NONE, + { "EA Data", "smb2.ea.data", FT_BYTES, BASE_NONE|BASE_SHOW_ASCII_PRINTABLE, NULL, 0, NULL, HFILL } }, diff --git a/epan/proto.c b/epan/proto.c index 1d860c7f3b..1522b275fa 100644 --- a/epan/proto.c +++ b/epan/proto.c @@ -5734,21 +5734,93 @@ proto_tree_set_representation(proto_item *pi, const char *format, va_list ap) } static char * -hfinfo_format_text(const header_field_info *hfinfo, const guchar *string) +hfinfo_format_text(wmem_allocator_t *scope, const header_field_info *hfinfo, + const guchar *string) { switch (hfinfo->display) { case STR_ASCII: - return format_text(NULL, string, strlen(string)); + return format_text(scope, string, strlen(string)); /* case STR_ASCII_WSP return format_text_wsp(string, strlen(string)); */ case STR_UNICODE: /* XXX, format_unicode_text() */ - return wmem_strdup(NULL, string); + return wmem_strdup(scope, string); } - return format_text(NULL, string, strlen(string)); + return format_text(scope, string, strlen(string)); +} + +static char * +hfinfo_format_bytes(wmem_allocator_t *scope, const header_field_info *hfinfo, + guint8 *bytes, guint length) +{ + char *str = NULL; + guint8 *p; + gboolean is_printable; + + if (bytes) { + if (hfinfo->display & BASE_SHOW_ASCII_PRINTABLE) { + /* + * Check whether all bytes are printable. + */ + is_printable = TRUE; + for (p = bytes; p < bytes+length; p++) { + if (!g_ascii_isprint(*p)) { + /* Not printable. */ + is_printable = FALSE; + break; + } + } + + /* + * If all bytes are printable ASCII, show the bytes + * as a string - in quotes to indicate that it's + * a string. + */ + if (is_printable) { + str = wmem_strdup_printf(scope, "\"%.*s\"", + (int)length, bytes); + return str; + } + } + + /* + * Either it's not printable ASCII, or we don't care whether + * it's printable ASCII; show it as hex bytes. + */ + switch (FIELD_DISPLAY(hfinfo->display)) { + case SEP_DOT: + str = bytestring_to_str(scope, bytes, length, '.'); + break; + case SEP_DASH: + str = bytestring_to_str(scope, bytes, length, '-'); + break; + case SEP_COLON: + str = bytestring_to_str(scope, bytes, length, ':'); + break; + case SEP_SPACE: + str = bytestring_to_str(scope, bytes, length, ' '); + break; + case BASE_NONE: + default: + if (prefs.display_byte_fields_with_spaces) { + str = bytestring_to_str(scope, bytes, length, ' '); + } else { + str = bytes_to_str(scope, bytes, length); + } + break; + } + } + else { + if (hfinfo->display & BASE_ALLOW_ZERO) { + str = wmem_strdup(scope, ""); + } else { + str = wmem_strdup(scope, ""); + } + } + return str; } static int @@ -5900,40 +5972,12 @@ proto_custom_set(proto_tree* tree, GSList *field_ids, gint occurrence, case FT_UINT_BYTES: case FT_BYTES: - bytes = (guint8 *)fvalue_get(&finfo->value); - if (bytes) { - switch (hfinfo->display) { - case SEP_DOT: - str = bytestring_to_str(NULL, bytes, fvalue_length(&finfo->value), '.'); - break; - case SEP_DASH: - str = bytestring_to_str(NULL, bytes, fvalue_length(&finfo->value), '-'); - break; - case SEP_COLON: - str = bytestring_to_str(NULL, bytes, fvalue_length(&finfo->value), ':'); - break; - case SEP_SPACE: - str = bytestring_to_str(NULL, bytes, fvalue_length(&finfo->value), ' '); - break; - case BASE_NONE: - default: - if (prefs.display_byte_fields_with_spaces) { - str = bytestring_to_str(NULL, bytes, fvalue_length(&finfo->value), ' '); - } else { - str = bytes_to_str(NULL, bytes, fvalue_length(&finfo->value)); - } - break; - } - offset_r += protoo_strlcpy(result+offset_r, str, size-offset_r); - wmem_free(NULL, str); - } - else { - if (hfinfo->display & BASE_ALLOW_ZERO) { - offset_r += protoo_strlcpy(result+offset_r, "", size-offset_r); - } else { - offset_r += protoo_strlcpy(result+offset_r, "", size-offset_r); - } - } + tmpbuf = hfinfo_format_bytes(NULL, + hfinfo, + (guint8 *)fvalue_get(&finfo->value), + fvalue_length(&finfo->value)); + offset_r += protoo_strlcpy(result+offset_r, tmpbuf, size-offset_r); + wmem_free(NULL, tmpbuf); break; case FT_ABSOLUTE_TIME: @@ -6210,7 +6254,7 @@ proto_custom_set(proto_tree* tree, GSList *field_ids, gint occurrence, case FT_UINT_STRING: case FT_STRINGZPAD: bytes = (guint8 *)fvalue_get(&finfo->value); - str = hfinfo_format_text(hfinfo, bytes); + str = hfinfo_format_text(NULL, hfinfo, bytes); offset_r += protoo_strlcpy(result+offset_r, str, size-offset_r); wmem_free(NULL, str); @@ -8368,40 +8412,11 @@ proto_item_fill_label(field_info *fi, gchar *label_str) case FT_BYTES: case FT_UINT_BYTES: - bytes = (guint8 *)fvalue_get(&fi->value); - if (bytes) { - char* str = NULL; - switch (hfinfo->display) { - case SEP_DOT: - str = bytestring_to_str(NULL, bytes, fvalue_length(&fi->value), '.'); - break; - case SEP_DASH: - str = bytestring_to_str(NULL, bytes, fvalue_length(&fi->value), '-'); - break; - case SEP_COLON: - str = bytestring_to_str(NULL, bytes, fvalue_length(&fi->value), ':'); - break; - case SEP_SPACE: - str = bytestring_to_str(NULL, bytes, fvalue_length(&fi->value), ' '); - break; - case BASE_NONE: - default: - if (prefs.display_byte_fields_with_spaces) { - str = bytestring_to_str(NULL, bytes, fvalue_length(&fi->value), ' '); - } else { - str = bytes_to_str(NULL, bytes, fvalue_length(&fi->value)); - } - break; - } - label_fill(label_str, 0, hfinfo, str); - wmem_free(NULL, str); - } else { - if (hfinfo->display & BASE_ALLOW_ZERO) { - label_fill(label_str, 0, hfinfo, ""); - } else { - label_fill(label_str, 0, hfinfo, ""); - } - } + tmp = hfinfo_format_bytes(NULL, hfinfo, + (guint8 *)fvalue_get(&fi->value), + fvalue_length(&fi->value)); + label_fill(label_str, 0, hfinfo, tmp); + wmem_free(NULL, tmp); break; case FT_CHAR: @@ -8665,7 +8680,7 @@ proto_item_fill_label(field_info *fi, gchar *label_str) case FT_UINT_STRING: case FT_STRINGZPAD: bytes = (guint8 *)fvalue_get(&fi->value); - tmp = hfinfo_format_text(hfinfo, bytes); + tmp = hfinfo_format_text(NULL, hfinfo, bytes); label_fill(label_str, 0, hfinfo, tmp); wmem_free(NULL, tmp); break; @@ -12324,6 +12339,32 @@ proto_tree_add_checksum(proto_tree *tree, tvbuff_t *tvb, const guint offset, return ti; } +char * +proto_string_item_get_display_string(wmem_allocator_t *scope, proto_item *pi) +{ + field_info *fi = pi->finfo; + header_field_info *hfinfo = fi->hfinfo; + + DISSECTOR_ASSERT(hfinfo->type == FT_STRING || + hfinfo->type == FT_STRINGZ || + hfinfo->type == FT_UINT_STRING || + hfinfo->type == FT_STRINGZPAD); + return hfinfo_format_text(scope, hfinfo, + (guint8 *)fvalue_get(&fi->value)); +} + +char * +proto_bytes_item_get_display_string(wmem_allocator_t *scope, proto_item *pi) +{ + field_info *fi = pi->finfo; + header_field_info *hfinfo = fi->hfinfo; + + DISSECTOR_ASSERT(hfinfo->type == FT_BYTES || + hfinfo->type == FT_UINT_BYTES); + return hfinfo_format_bytes(scope, hfinfo, + (guint8 *)fvalue_get(&fi->value), fvalue_length(&fi->value)); +} + guchar proto_check_field_name(const gchar *field_name) { diff --git a/epan/proto.h b/epan/proto.h index 8e5f5fe10e..9c565bde39 100644 --- a/epan/proto.h +++ b/epan/proto.h @@ -615,15 +615,22 @@ typedef enum { /* Following constants have to be ORed with a field_display_e when dissector * want to use specials value-string MACROs for a header_field_info */ -#define BASE_RANGE_STRING 0x0100 /**< Use the supplied range string to convert the field to text */ -#define BASE_EXT_STRING 0x0200 -#define BASE_VAL64_STRING 0x0400 -#define BASE_ALLOW_ZERO 0x0800 /**< Display instead of for zero sized byte array */ -#define BASE_UNIT_STRING 0x1000 /**< Add unit text to the field value */ -#define BASE_NO_DISPLAY_VALUE 0x2000 /**< Just display the field name with no value. Intended for - byte arrays or header fields above a subtree */ -#define BASE_PROTOCOL_INFO 0x4000 /**< protocol_t in [FIELDCONVERT]. Internal use only. */ -#define BASE_SPECIAL_VALS 0x8000 /**< field will not display "Unknown" if value_string match is not found */ +#define BASE_RANGE_STRING 0x00000100 /**< Use the supplied range string to convert the field to text */ +#define BASE_EXT_STRING 0x00000200 +#define BASE_VAL64_STRING 0x00000400 + +#define BASE_ALLOW_ZERO 0x00000800 /**< Display instead of for zero sized byte array */ + +#define BASE_UNIT_STRING 0x00001000 /**< Add unit text to the field value */ + +#define BASE_NO_DISPLAY_VALUE 0x00002000 /**< Just display the field name with no value. Intended for + byte arrays or header fields above a subtree */ + +#define BASE_PROTOCOL_INFO 0x00004000 /**< protocol_t in [FIELDCONVERT]. Internal use only. */ + +#define BASE_SPECIAL_VALS 0x00008000 /**< field will not display "Unknown" if value_string match is not found */ + +#define BASE_SHOW_ASCII_PRINTABLE 0x00010000 /**< show byte array as ASCII if it's all printable characters */ /** BASE_ values that cause the field value to be displayed twice */ #define IS_BASE_DUAL(b) ((b)==BASE_DEC_HEX||(b)==BASE_HEX_DEC) @@ -3096,6 +3103,27 @@ proto_custom_set(proto_tree* tree, GSList *field_id, gchar *result, gchar *expr, const int size ); +/* + * We're exporting some formatting functions here to handle + * dissectors that want to append string or byte array values to + * a column or a higher-level protocol tree item. + * + * This should really be done with an internal-to-libwireshark routine + * to write that value to a string, snprintf-style (with a pointer to + * the buffer and and the buffer size, which could be a pointer into + * a buffer and the remaining amount of space in the buffer), and + * column and protocol-tree routines to append to columns and + * protocol-tree item representations, respectively. + * + * But we should probably try to make that append routine useful + * for appending to *all* protocol-tree item representations, and + * not have, for example, the XXX_value_format_display(), + * XXX_value_format(), and XXX_vals_format() routines - what the + * heck is going on there? + */ +WS_DLL_PUBLIC char *proto_string_item_get_display_string(wmem_allocator_t *scope, proto_item *pi); +WS_DLL_PUBLIC char *proto_bytes_item_get_display_string(wmem_allocator_t *scope, proto_item *pi); + /* #define HAVE_HFI_SECTION_INIT */ #ifdef HAVE_HFI_SECTION_INIT