Fix bug #9618: Invalid utf8 causes JSON dissector assertion failure "g_utf8_validate"

Validate JSON UTF-8 characters, replace with '?' when invalid.

svn path=/trunk/; revision=54633
This commit is contained in:
Jakub Zawadzki 2014-01-07 22:17:32 +00:00
parent d1dcee936b
commit abda30e9e6
7 changed files with 71 additions and 20 deletions

View File

@ -249,6 +249,30 @@ static void after_array(void *tvbparse_data, const void *wanted_data _U_, tvbpar
wmem_stack_pop(data->stack);
}
static int
json_tvb_memcpy_utf8(char *buf, tvbuff_t *tvb, int offset, int offset_max)
{
int len = ws_utf8_char_len((guint8) *buf);
/* XXX, before moving to core API check if it's off-by-one safe.
* For JSON analyzer it's not a problem
* (string always terminated by ", which is not valid UTF-8 continuation character) */
if (len == -1 || ((guint) (offset + len)) >= (guint) offset_max) {
*buf = '?';
return 1;
}
/* assume it's valid UTF-8 */
tvb_memcpy(tvb, buf + 1, offset + 1, len - 1);
if (!g_utf8_validate(buf, len, NULL)) {
*buf = '?';
return 1;
}
return len;
}
static char *json_string_unescape(tvbparse_elem_t *tok)
{
char *str = (char *)wmem_alloc(wmem_packet_scope(), tok->len - 1);
@ -266,7 +290,6 @@ static char *json_string_unescape(tvbparse_elem_t *tok)
case '\"':
case '\\':
case '/':
default:
str[j++] = ch;
break;
@ -361,10 +384,22 @@ static char *json_string_unescape(tvbparse_elem_t *tok)
str[j++] = '?';
break;
}
default:
/* not valid by JSON grammar (also tvbparse rules should not allow it) */
DISSECTOR_ASSERT_NOT_REACHED();
break;
}
} else
str[j++] = ch;
} else {
int utf_len;
str[j] = ch;
/* XXX if it's not valid UTF-8 character, add some expert info? (it violates JSON grammar) */
utf_len = json_tvb_memcpy_utf8(&str[j], tok->tvb, i, tok->len);
j += utf_len;
i += (utf_len - 1);
}
}
str[j] = '\0';

View File

@ -30,7 +30,6 @@ IF(WIN32)
inet_ntop.c
inet_pton.c
strptime.c
unicode-utils.c
wsgetopt.c
)
ENDIF(WIN32)
@ -69,6 +68,7 @@ set(WSUTIL_FILES
tempfile.c
type_util.c
u3.c
unicode-utils.c
${WSUTIL_PLATFORM_FILES}
)

View File

@ -99,8 +99,6 @@ EXTRA_DIST = \
Makefile.nmake \
file_util.c \
file_util.h \
unicode-utils.c \
unicode-utils.h \
wsgcrypt.h
CLEANFILES = \

View File

@ -61,7 +61,8 @@ LIBWSUTIL_SRC = \
report_err.c \
tempfile.c \
type_util.c \
u3.c
u3.c \
unicode-utils.c
# Header files that are not generated from other files
LIBWSUTIL_INCLUDES = \
@ -100,4 +101,5 @@ LIBWSUTIL_INCLUDES = \
report_err.h \
tempfile.h \
type_util.h \
u3.h
u3.h \
unicode-utils.h

View File

@ -28,7 +28,6 @@ OBJECTS = file_util.obj \
inet_pton.obj \
$(LIBWSUTIL_SRC:.c=.obj) \
strptime.obj \
unicode-utils.obj \
wsgetopt.obj
# For use when making libwsutil.dll

View File

@ -22,12 +22,23 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#ifndef _WIN32
#error "This is only for Windows"
#endif
#include "unicode-utils.h"
int
ws_utf8_char_len(guint8 ch)
{
if (ch >= 0xfe) return -1;
if (ch >= 0xfc) return 6;
if (ch >= 0xf8) return 5;
if (ch >= 0xf0) return 4;
if (ch >= 0xe0) return 3;
if (ch >= 0xc0) return 2;
else return 1;
}
#ifdef _WIN32
#include <shellapi.h>
/** @file
@ -157,3 +168,5 @@ arg_list_utf_16to8(int argc, char *argv[]) {
}
} /* XXX else bail because something is horribly, horribly wrong? */
}
#endif

View File

@ -25,21 +25,25 @@
#ifndef __UNICODEUTIL_H__
#define __UNICODEUTIL_H__
#include "ws_symbol_export.h"
#ifdef _WIN32
#include "config.h"
#include "ws_symbol_export.h"
#include <glib.h>
#include <windows.h>
#include <tchar.h>
#include <wchar.h>
/**
* @file Unicode convenience routines.
*/
WS_DLL_PUBLIC
int ws_utf8_char_len(guint8 ch);
#ifdef _WIN32
#include <windows.h>
#include <tchar.h>
#include <wchar.h>
/** Given a UTF-8 string, convert it to UTF-16. This is meant to be used
* to convert between GTK+ 2.x (UTF-8) to Windows (UTF-16).
*