wbxml: Fix encoding

Whenever a string is inline or retrieved from the string table,
it needs to use the document encoding. Not tvb_format_text
(which always assumes UTF-8, though that is the default for WBXML
if we don't know otherwise), and *definitely* not tvb_get_ptr.

Replace a bunch of calls of tvb_strsize and tvb_format_text
(and one tvb_get_ptr) with tvb_get_stringz_enc with the
document encoding, which is now stored in packet level proto
data. (There should be a fallback to parsing it from the
Content-Type string, if the calling dissector provides it.)

Fix #18573
This commit is contained in:
John Thacker 2022-11-04 21:14:43 -04:00
parent 07627f0230
commit d2f7b8907c
1 changed files with 24 additions and 19 deletions

View File

@ -7058,6 +7058,7 @@ parse_wbxml_attribute_list_defined (proto_tree *tree, tvbuff_t *tvb, packet_info
const char *attr_save_literal = NULL; /* Will contain the LITERAL attr identity */
const gchar *str;
unsigned recursion_level = p_get_proto_depth(pinfo, proto_wbxml);
unsigned encoding = GPOINTER_TO_UINT(p_get_proto_data(pinfo->pool, pinfo, proto_wbxml, 0));
DebugLog(("parse_wbxml_attr_defined (level = %u, offset = %u)\n", recursion_level, offset));
/* Parse attributes */
@ -7097,8 +7098,7 @@ parse_wbxml_attribute_list_defined (proto_tree *tree, tvbuff_t *tvb, packet_info
}
break;
case 0x03: /* STR_I */
len = tvb_strsize (tvb, off+1);
str = tvb_format_text (pinfo->pool, tvb, off+1, len-1);
str = tvb_get_stringz_enc(pinfo->pool, tvb, off+1, &len, encoding);
proto_tree_add_string_format(tree, hf_wbxml_str_i, tvb, off, 1+len, str,
" %3d | Attr | A %3d | STR_I (Inline string) | %s\'%s\'",
recursion_level, *codepage_attr, Indent (recursion_level), str);
@ -7110,9 +7110,8 @@ parse_wbxml_attribute_list_defined (proto_tree *tree, tvbuff_t *tvb, packet_info
*/
idx = tvb_get_guintvar (tvb, off+1, &len, pinfo, &ei_wbxml_oversized_uintvar);
if (len <= tvb_len) {
str_len = tvb_strsize (tvb, str_tbl+idx);
attr_save_known = 0;
attr_save_literal = tvb_format_text (pinfo->pool, tvb, str_tbl+idx, str_len-1);
attr_save_literal = tvb_get_stringz_enc(pinfo->pool, tvb, str_tbl+idx, &str_len, encoding);
proto_tree_add_string_format(tree, hf_wbxml_literal, tvb, off, 1+len, attr_save_literal,
" %3d | Attr | A %3d | LITERAL (Literal Attribute) | %s<%s />",
recursion_level, *codepage_attr, Indent (recursion_level), attr_save_literal);
@ -7126,8 +7125,7 @@ parse_wbxml_attribute_list_defined (proto_tree *tree, tvbuff_t *tvb, packet_info
case 0x41: /* EXT_I_1 */
case 0x42: /* EXT_I_2 */
/* Extension tokens */
len = tvb_strsize (tvb, off+1);
str = tvb_format_text (pinfo->pool, tvb, off+1, len-1);
str = tvb_get_stringz_enc(pinfo->pool, tvb, off+1, &len, encoding);
proto_tree_add_string_format(tree, hf_wbxml_ext_i, tvb, off, 1+len, str,
" %3d | Attr | A %3d | EXT_I_%1x (Extension Token) | %s(%s: \'%s\')",
recursion_level, *codepage_attr, peek & 0x0f, Indent (recursion_level),
@ -7168,8 +7166,7 @@ parse_wbxml_attribute_list_defined (proto_tree *tree, tvbuff_t *tvb, packet_info
case 0x83: /* STR_T */
idx = tvb_get_guintvar (tvb, off+1, &len, pinfo, &ei_wbxml_oversized_uintvar);
if (len <= tvb_len) {
str_len = tvb_strsize (tvb, str_tbl+idx);
str = tvb_format_text (pinfo->pool, tvb, str_tbl+idx, str_len-1);
str = tvb_get_stringz_enc(pinfo->pool, tvb, str_tbl+idx, &str_len, encoding);
proto_tree_add_string_format(tree, hf_wbxml_str_t, tvb, off, 1+len, str,
" %3d | Attr | A %3d | STR_T (Tableref string) | %s\'%s\'",
recursion_level, *codepage_attr, Indent (recursion_level), str);
@ -7334,7 +7331,8 @@ parse_wbxml_tag_defined (proto_tree *tree, tvbuff_t *tvb, packet_info *pinfo, gu
The initial state is FALSE.
This state will trigger recursion. */
unsigned recursion_level = p_get_proto_depth(pinfo, proto_wbxml);
unsigned recursion_level = p_get_proto_depth(pinfo, proto_wbxml);
unsigned encoding = GPOINTER_TO_UINT(p_get_proto_data(pinfo->pool, pinfo, proto_wbxml, 0));
if (++recursion_level >= WBXML_MAX_RECURSION_LEVEL) {
proto_tree_add_expert(tree, pinfo, &ei_wbxml_too_much_recursion, tvb, offset, tvb_captured_length_remaining(tvb, offset));
return tvb_len;
@ -7378,8 +7376,7 @@ parse_wbxml_tag_defined (proto_tree *tree, tvbuff_t *tvb, packet_info *pinfo, gu
off += 1+len;
break;
case 0x03: /* STR_I */
len = tvb_strsize (tvb, off+1);
str = tvb_format_text (pinfo->pool, tvb, off+1, len-1);
str = tvb_get_stringz_enc(pinfo->pool, tvb, off+1, &len, encoding);
proto_tree_add_string_format(tree, hf_wbxml_str_i, tvb, off, 1+len, str,
" %3d | Tag | T %3d | STR_I (Inline string) | %s\'%s\'",
recursion_level, *codepage_stag, Indent(recursion_level),
@ -7390,8 +7387,7 @@ parse_wbxml_tag_defined (proto_tree *tree, tvbuff_t *tvb, packet_info *pinfo, gu
case 0x41: /* EXT_I_1 */
case 0x42: /* EXT_I_2 */
/* Extension tokens */
len = tvb_strsize (tvb, off+1);
str = tvb_format_text (pinfo->pool, tvb, off+1, len-1);
str = tvb_get_stringz_enc(pinfo->pool, tvb, off+1, &len, encoding);
proto_tree_add_string_format(tree, hf_wbxml_ext_i, tvb, off, 1+len, str,
" %3d | Tag | T %3d | EXT_I_%1x (Extension Token) | %s(%s: \'%s\')",
recursion_level, *codepage_stag,
@ -7444,8 +7440,7 @@ parse_wbxml_tag_defined (proto_tree *tree, tvbuff_t *tvb, packet_info *pinfo, gu
break;
case 0x83: /* STR_T */
idx = tvb_get_guintvar (tvb, off+1, &len, pinfo, &ei_wbxml_oversized_uintvar);
str_len = tvb_strsize (tvb, str_tbl+idx);
str = tvb_format_text (pinfo->pool, tvb, str_tbl+idx, str_len-1);
str = tvb_get_stringz_enc(pinfo->pool, tvb, str_tbl+idx, &str_len, encoding);
proto_tree_add_string_format(tree, hf_wbxml_str_t, tvb, off, 1+len, str,
" %3d | Tag | T %3d | STR_T (Tableref string) | %s\'%s\'",
recursion_level, *codepage_stag, Indent (recursion_level), str);
@ -7531,8 +7526,7 @@ parse_wbxml_tag_defined (proto_tree *tree, tvbuff_t *tvb, packet_info *pinfo, gu
if ((peek & 0x3F) == 4) { /* LITERAL */
DebugLog(("STAG: LITERAL tag (peek = 0x%02X, off = %u) - TableRef follows!\n", peek, off));
idx = tvb_get_guintvar (tvb, off+1, &tag_len, pinfo, &ei_wbxml_oversized_uintvar);
str_len = tvb_strsize (tvb, str_tbl+idx);
tag_new_literal = (const gchar*)tvb_get_ptr (tvb, str_tbl+idx, str_len);
tag_new_literal = tvb_get_stringz_enc(pinfo->pool, tvb, str_tbl+idx, &str_len, encoding);
tag_new_known = 0; /* invalidate known tag_new */
} else { /* Known tag */
tag_new_known = peek & 0x3F;
@ -7706,6 +7700,7 @@ dissect_wbxml_common(tvbuff_t *tvb, packet_info *pinfo, proto_tree *tree,
guint32 len;
guint32 charset = 0;
guint32 charset_len = 0;
guint encoding;
guint32 publicid;
guint32 publicid_index = 0;
guint32 publicid_len;
@ -7777,6 +7772,17 @@ dissect_wbxml_common(tvbuff_t *tvb, packet_info *pinfo, proto_tree *tree,
break;
}
if (charset) {
encoding = mibenum_charset_to_encoding(charset);
} else {
/* XXX: If the charset is 0 we should look if there is a charset
* parameter in the Content-Type / media_type if passed to
* the dissector. Otherwise the default is UTF-8.
*/
encoding = ENC_UTF_8;
}
p_add_proto_data(pinfo->pool, pinfo, proto_wbxml, 0, GUINT_TO_POINTER(encoding));
/* String table: read string table length in bytes */
tvb_get_guintvar (tvb, offset, &str_tbl_len_len, pinfo, &ei_wbxml_oversized_uintvar);
str_tbl = offset + str_tbl_len_len; /* Start of 1st string in string table */
@ -7788,10 +7794,9 @@ dissect_wbxml_common(tvbuff_t *tvb, packet_info *pinfo, proto_tree *tree,
val_to_str_ext (publicid, &vals_wbxml_public_ids_ext, "(unknown 0x%x)"));
} else {
/* Read length of Public ID from string table */
len = tvb_strsize (tvb, str_tbl + publicid_index);
summary = wmem_strdup_printf(pinfo->pool, "%s, Public ID: \"%s\"",
val_to_str_ext (version, &vals_wbxml_versions_ext, "(unknown 0x%x)"),
tvb_format_text (pinfo->pool, tvb, str_tbl + publicid_index, len - 1));
tvb_get_stringz_enc(pinfo->pool, tvb, str_tbl + publicid_index, &len, encoding));
}
/* Add summary to INFO column if it is enabled */