json: fix buffer overflow in string_unescape()

Fixes a potential buffer overflow issue in string_unescape() with a redesign
to be more robust against invalid input.

It makes use of wmem_strbuf API now.

Closes #17475
This commit is contained in:
Developer Alexander 2021-07-16 12:09:19 +02:00 committed by Wireshark GitLab Utility
parent 96c29704c8
commit 1f12e1d267
1 changed files with 295 additions and 159 deletions

View File

@ -33,7 +33,7 @@
void proto_register_json(void);
void proto_reg_handoff_json(void);
static char* json_string_unescape(tvbparse_elem_t *tok, gboolean keep_outer_parentheses);
static char* json_string_unescape(tvbparse_elem_t *tok, gboolean enclose_in_quotation_marks);
static dissector_handle_t json_handle;
@ -200,164 +200,293 @@ json_object_add_key(json_parser_data_t *data)
wmem_stack_push(data->array_idx, GINT_TO_POINTER(JSON_COMPACT_OBJECT_WITH_KEY));
}
static int
json_tvb_memcpy_utf8(char* buf, tvbuff_t* tvb, int offset, int offset_max)
{
int len = ws_utf8_char_len((guint8)*buf);
/* XXX, before moving to core API check if it's off-by-one safe.
* For JSON analyzer it's not a problem
* (string always terminated by ", which is not valid UTF-8 continuation character) */
if (len == -1 || ((guint)(offset + len)) >= (guint)offset_max) {
*buf = '?';
return 1;
}
/* assume it's valid UTF-8 */
tvb_memcpy(tvb, buf + 1, offset + 1, (size_t)len - 1);
if (!g_utf8_validate(buf, len, NULL)) {
*buf = '?';
return 1;
}
return len;
}
static char*
json_string_unescape(tvbparse_elem_t* tok, gboolean keep_outer_parentheses)
json_string_unescape(tvbparse_elem_t* tok, gboolean enclose_in_quotation_marks)
{
int j = 0;
int i = keep_outer_parentheses == TRUE ? 0 : 1;
int length = keep_outer_parentheses == TRUE ? tok->len : tok->len - 1;
int read_index = 0;
char* str = (char*)wmem_alloc(wmem_packet_scope(), (size_t)length);
wmem_strbuf_t* output_string_buffer = wmem_strbuf_sized_new(wmem_packet_scope(), tok->len, tok->len + 2);
for (; i < length; i++) {
guint8 ch = tvb_get_guint8(tok->tvb, tok->offset + i);
int bin;
if (enclose_in_quotation_marks == TRUE)
{
wmem_strbuf_append_c(output_string_buffer, '\"');
}
if (ch == '\\') {
i++;
while (true)
{
// Do not overflow TVB
if (!tvb_offset_exists(tok->tvb, tok->offset + read_index))
{
break;
}
// Do not overflow input string
if (!(read_index < tok->len))
{
break;
}
ch = tvb_get_guint8(tok->tvb, tok->offset + i);
switch (ch) {
case '\"':
case '\\':
case '/':
str[j++] = ch;
break;
guint8 current_character = tvb_get_guint8(tok->tvb, tok->offset + read_index);
case 'b':
str[j++] = '\b';
break;
case 'f':
str[j++] = '\f';
break;
case 'n':
str[j++] = '\n';
break;
case 'r':
str[j++] = '\r';
break;
case 't':
str[j++] = '\t';
break;
// character that IS NOT escaped
if (current_character != '\\')
{
// A single UTF-8 character can cover more than one byte.
// Copy all bytes that belong to that character and forward currend_index by that amount of bytes
int utf8_character_length = ws_utf8_char_len(current_character);
case 'u':
if (utf8_character_length <= 0)
{
guint32 unicode_hex = 0;
gboolean valid = TRUE;
int k;
break;
}
for (k = 0; k < 4; k++) {
i++;
unicode_hex <<= 4;
for (int i = 0; i < utf8_character_length; i++)
{
// If it is a character of length 1 these checks are redundant.
// But it avoids a seperate code path since this loop works for lengths from 1 to 6
// Do not overflow TVB
if (!tvb_offset_exists(tok->tvb, tok->offset + read_index))
{
break;
}
// Do not overflow input string
if (!(read_index < tok->len ))
{
break;
}
ch = tvb_get_guint8(tok->tvb, tok->offset + i);
bin = ws_xton(ch);
if (bin == -1) {
valid = FALSE;
current_character = tvb_get_guint8(tok->tvb, tok->offset + read_index);
read_index++;
wmem_strbuf_append_c(output_string_buffer, current_character);
}
}
// character that IS escaped
else
{
read_index++;
// Do not overflow TVB
if (!tvb_offset_exists(tok->tvb, tok->offset + read_index))
{
break;
}
// Do not overflow input string
if (!(read_index < tok->len))
{
break;
}
current_character = tvb_get_guint8(tok->tvb, tok->offset + read_index);
if (current_character == '\"' || current_character == '\\' || current_character == '/')
{
read_index++;
wmem_strbuf_append_c(output_string_buffer, current_character);
}
else if (current_character == 'b')
{
read_index++;
wmem_strbuf_append_c(output_string_buffer, '\b');
}
else if (current_character == 'f')
{
read_index++;
wmem_strbuf_append_c(output_string_buffer, '\f');
}
else if (current_character == 'n')
{
read_index++;
wmem_strbuf_append_c(output_string_buffer, '\n');
}
else if (current_character == 'r')
{
read_index++;
wmem_strbuf_append_c(output_string_buffer, '\r');
}
else if (current_character == 't')
{
read_index++;
wmem_strbuf_append_c(output_string_buffer, '\t');
}
else if (current_character == 'u')
{
read_index++;
guint32 code_point = 0;
gboolean is_valid_unicode_character = TRUE;
for (int i = 0; i < 4; i++)
{
// Do not overflow TVB
if (!tvb_offset_exists(tok->tvb, tok->offset + read_index))
{
is_valid_unicode_character = FALSE;
break;
}
unicode_hex |= bin;
// Do not overflow input string
if (!(read_index < tok->len))
{
is_valid_unicode_character = FALSE;
break;
}
current_character = tvb_get_guint8(tok->tvb, tok->offset + read_index);
read_index++;
int nibble = ws_xton(current_character);
if(nibble < 0)
{
is_valid_unicode_character = FALSE;
break;
}
code_point <<= 4;
code_point |= nibble;
}
if ((IS_LEAD_SURROGATE(unicode_hex))) {
ch = tvb_get_guint8(tok->tvb, tok->offset + i + 1);
if ((IS_LEAD_SURROGATE(code_point)))
{
// Do not overflow TVB
if (!tvb_offset_exists(tok->tvb, tok->offset + read_index))
{
is_valid_unicode_character = FALSE;
break;
}
// Do not overflow input string
if (!(read_index < tok->len))
{
is_valid_unicode_character = FALSE;
break;
}
current_character = tvb_get_guint8(tok->tvb, tok->offset + read_index);
if (ch == '\\') {
i++;
ch = tvb_get_guint8(tok->tvb, tok->offset + i + 1);
if (ch == 'u') {
guint16 lead_surrogate = unicode_hex;
if (current_character == '\\')
{
read_index++;
// Do not overflow TVB
if (!tvb_offset_exists(tok->tvb, tok->offset + read_index))
{
is_valid_unicode_character = FALSE;
break;
}
// Do not overflow input string
if (!(read_index < tok->len))
{
is_valid_unicode_character = FALSE;
break;
}
current_character = tvb_get_guint8(tok->tvb, tok->offset + read_index);
if (current_character == 'u') {
guint16 lead_surrogate = code_point;
guint16 trail_surrogate = 0;
i++;
for (k = 0; k < 4; k++) {
i++;
trail_surrogate <<= 4;
read_index++;
ch = tvb_get_guint8(tok->tvb, tok->offset + i);
bin = ws_xton(ch);
if (bin == -1) {
valid = FALSE;
for (int i = 0; i < 4; i++)
{
// Do not overflow TVB
if (!tvb_offset_exists(tok->tvb, tok->offset + read_index))
{
is_valid_unicode_character = FALSE;
break;
}
trail_surrogate |= bin;
// Do not overflow input string
if (!(read_index < tok->len))
{
is_valid_unicode_character = FALSE;
break;
}
current_character = tvb_get_guint8(tok->tvb, tok->offset + read_index);
read_index++;
int nibble = ws_xton(current_character);
if (nibble < 0)
{
is_valid_unicode_character = FALSE;
break;
}
trail_surrogate <<= 4;
trail_surrogate |= nibble;
}
if ((IS_TRAIL_SURROGATE(trail_surrogate))) {
unicode_hex = SURROGATE_VALUE(lead_surrogate, trail_surrogate);
if ((IS_TRAIL_SURROGATE(trail_surrogate)))
{
code_point = SURROGATE_VALUE(lead_surrogate, trail_surrogate);
}
else {
valid = FALSE;
else
{
is_valid_unicode_character = FALSE;
}
}
else {
valid = FALSE;
else
{
read_index++;
is_valid_unicode_character = FALSE;
}
}
else {
valid = FALSE;
else
{
read_index++;
is_valid_unicode_character = FALSE;
}
}
else if ((IS_TRAIL_SURROGATE(unicode_hex))) {
i++;
valid = FALSE;
else if ((IS_TRAIL_SURROGATE(code_point)))
{
is_valid_unicode_character = FALSE;
}
if (valid && g_unichar_validate(unicode_hex) && g_unichar_isprint(unicode_hex)) {
/* \uXXXX => 6 bytes */
int charlen = g_unichar_to_utf8(unicode_hex, &str[j]);
j += charlen;
if (is_valid_unicode_character)
{
if (g_unichar_validate(code_point) && g_unichar_isprint(code_point))
{
gchar length_test_buffer[6];
int utf8_character_length = (int)g_unichar_to_utf8(code_point, length_test_buffer);
for (int i = 0; i < utf8_character_length; i++)
{
// Do not overflow TVB
if (!tvb_offset_exists(tok->tvb, tok->offset + read_index))
{
break;
}
// Do not overflow input string
if (!(read_index < tok->len))
{
break;
}
current_character = length_test_buffer[i];
wmem_strbuf_append_c(output_string_buffer, current_character);
}
}
}
else
{
str[j++] = '?';
wmem_strbuf_append_unichar(output_string_buffer, 0xFFFD);
}
break;
}
default:
/* not valid by JSON grammar (also tvbparse rules should not allow it) */
else
{
/* not valid by JSON grammar (tvbparse rules should not allow it) */
DISSECTOR_ASSERT_NOT_REACHED();
break;
}
}
else
{
int utf_len;
str[j] = ch;
/* XXX if it's not valid UTF-8 character, add some expert info? (it violates JSON grammar) */
utf_len = json_tvb_memcpy_utf8(&str[j], tok->tvb, tok->offset + i, tok->offset + length + 1);
j += utf_len;
i += (utf_len - 1);
}
}
str[j] = '\0';
return str;
if (enclose_in_quotation_marks == TRUE)
{
wmem_strbuf_append_c(output_string_buffer, '\"');
}
char* output_string = wmem_strbuf_finalize(output_string_buffer);
return output_string;
}
static GHashTable* header_fields_hash = NULL;
@ -389,42 +518,28 @@ json_key_lookup(proto_tree* tree, tvbparse_elem_t* tok, char* key_str, packet_in
}
static char*
join_strings(char* a, char* b, char separator)
join_strings(char* string_a, char* string_b, char separator)
{
size_t joined_string_length = separator == '\0' ? 1 : 2;
joined_string_length += strlen(a);
joined_string_length += strlen(b);
char* joined_string = (char*)wmem_alloc(wmem_packet_scope(), joined_string_length);
if (joined_string == NULL)
if (string_a == NULL)
{
return NULL;
}
if (string_b == NULL)
{
return NULL;
}
int i = 0;
while (a[i] != '\0')
wmem_strbuf_t* output_string_buffer = wmem_strbuf_new(wmem_packet_scope(), string_a);
if (separator != '\0')
{
joined_string[i] = a[i];
i++;
wmem_strbuf_append_c(output_string_buffer, separator);
}
if (separator != '\n')
{
joined_string[i++] = separator;
}
wmem_strbuf_append(output_string_buffer, string_b);
int offset = i;
i = 0;
while (b[i] != '\0')
{
joined_string[offset + i] = b[i];
i++;
}
joined_string[joined_string_length - 1] = '\0';
return joined_string;
char* output_string = wmem_strbuf_finalize(output_string_buffer);
return output_string;
}
static int
@ -627,8 +742,15 @@ before_member(void *tvbparse_data, const void *wanted_data _U_, tvbparse_elem_t
proto_tree *subtree;
proto_item *ti;
char* key_string = json_string_unescape(tok->sub, FALSE);
ti = proto_tree_add_string(tree, &hfi_json_member, tok->tvb, tok->offset, tok->len, key_string);
// tvb parse element covers the qutation marks which we don't want
tvbparse_elem_t key_parse_element = tok->sub[0];
key_parse_element.offset += 1;
key_parse_element.len -= 2;
char* key_string_without_quotation_marks = json_string_unescape(&key_parse_element, FALSE);
char* key_string_with_quotation_marks = json_string_unescape(tok->sub, FALSE);
ti = proto_tree_add_string(tree, &hfi_json_member, tok->tvb, tok->offset, tok->len, key_string_without_quotation_marks);
subtree = proto_item_add_subtree(ti, ett_json_member);
wmem_stack_push(data->stack, subtree);
@ -639,9 +761,9 @@ before_member(void *tvbparse_data, const void *wanted_data _U_, tvbparse_elem_t
wmem_stack_push(data->stack_path, base_path);
wmem_stack_push(data->stack_path, last_key_string);
char* path = join_strings(base_path, key_string, '/');
char* path = join_strings(base_path, key_string_without_quotation_marks, '/');
wmem_stack_push(data->stack_path, path);
wmem_stack_push(data->stack_path, key_string);
wmem_stack_push(data->stack_path, key_string_without_quotation_marks);
if (json_compact) {
proto_tree *tree_compact = (proto_tree *)wmem_stack_peek(data->stack_compact);
@ -651,9 +773,9 @@ before_member(void *tvbparse_data, const void *wanted_data _U_, tvbparse_elem_t
tvbparse_elem_t *key_tok = tok->sub;
if (key_tok && key_tok->id == JSON_TOKEN_STRING) {
ti_compact = json_key_lookup(tree_compact, tok, key_string, data->pinfo);
ti_compact = json_key_lookup(tree_compact, tok, key_string_with_quotation_marks, data->pinfo);
if (!ti_compact) {
ti_compact = proto_tree_add_none_format(tree_compact, &hfi_json_member_compact, tok->tvb, tok->offset, tok->len, "%s:", key_string);
ti_compact = proto_tree_add_none_format(tree_compact, &hfi_json_member_compact, tok->tvb, tok->offset, tok->len, "%s:", key_string_with_quotation_marks);
}
} else {
ti_compact = proto_tree_add_item(tree_compact, &hfi_json_member_compact, tok->tvb, tok->offset, tok->len, ENC_NA);
@ -670,14 +792,15 @@ after_member(void *tvbparse_data, const void *wanted_data _U_, tvbparse_elem_t *
proto_tree *tree = (proto_tree *)wmem_stack_pop(data->stack);
if (tree) {
tvbparse_elem_t* key_tok = tok->sub;
tvbparse_elem_t* key_tok = tok->sub;
if (tree && key_tok && key_tok->id == JSON_TOKEN_STRING) {
if (key_tok && key_tok->id == JSON_TOKEN_STRING) {
char* key = json_string_unescape(key_tok, FALSE);
tvbparse_elem_t key_parse_element = key_tok[0];
key_parse_element.offset += 1;
key_parse_element.len -= 2;
char* key_string_without_quotation_marks = json_string_unescape(&key_parse_element, FALSE);
proto_tree_add_string(tree, &hfi_json_key, key_tok->tvb, key_tok->offset, key_tok->len, key);
}
proto_tree_add_string(tree, &hfi_json_key, key_tok->tvb, key_tok->offset, key_tok->len, key_string_without_quotation_marks);
}
// extended path based filtering
@ -769,7 +892,21 @@ after_value(void *tvbparse_data, const void *wanted_data _U_, tvbparse_elem_t *t
char* key_string = (char*)wmem_stack_pop(data->stack_path);
char* path = (char*)wmem_stack_pop(data->stack_path);
char* value_str = json_string_unescape(tok, TRUE);
char* value_str = NULL;
if (value_id == JSON_TOKEN_STRING && tok->len >= 2)
{
// tvb parse element covers the qutation marks which we don't want
tvbparse_elem_t key_parse_element = tok[0];
key_parse_element.offset += 1;
key_parse_element.len -= 2;
value_str = json_string_unescape(&key_parse_element, FALSE);
}
else
{
value_str = json_string_unescape(tok, FALSE);
}
char* path_with_value = join_strings(path, value_str, ':');
char* memeber_with_value = join_strings(key_string, value_str, ':');
proto_item* path_with_value_item = proto_tree_add_string(tree, &hfi_json_path_with_value, tok->tvb, tok->offset, tok->len, path_with_value);
@ -791,8 +928,7 @@ after_value(void *tvbparse_data, const void *wanted_data _U_, tvbparse_elem_t *t
case JSON_TOKEN_STRING:
if (tok->len >= 2)
{
char* value_string_without_quotation_marks = json_string_unescape(tok, FALSE);
proto_tree_add_string(tree, &hfi_json_value_string, tok->tvb, tok->offset, tok->len, value_string_without_quotation_marks);
proto_tree_add_string(tree, &hfi_json_value_string, tok->tvb, tok->offset, tok->len, value_str);
}
else
{