Fix bug #9618: Invalid utf8 causes JSON dissector assertion failure "g_utf8_validate"
Validate JSON UTF-8 characters, replace with '?' when invalid. svn path=/trunk/; revision=54633
This commit is contained in:
parent
d1dcee936b
commit
abda30e9e6
|
@ -249,6 +249,30 @@ static void after_array(void *tvbparse_data, const void *wanted_data _U_, tvbpar
|
|||
wmem_stack_pop(data->stack);
|
||||
}
|
||||
|
||||
static int
|
||||
json_tvb_memcpy_utf8(char *buf, tvbuff_t *tvb, int offset, int offset_max)
|
||||
{
|
||||
int len = ws_utf8_char_len((guint8) *buf);
|
||||
|
||||
/* XXX, before moving to core API check if it's off-by-one safe.
|
||||
* For JSON analyzer it's not a problem
|
||||
* (string always terminated by ", which is not valid UTF-8 continuation character) */
|
||||
if (len == -1 || ((guint) (offset + len)) >= (guint) offset_max) {
|
||||
*buf = '?';
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* assume it's valid UTF-8 */
|
||||
tvb_memcpy(tvb, buf + 1, offset + 1, len - 1);
|
||||
|
||||
if (!g_utf8_validate(buf, len, NULL)) {
|
||||
*buf = '?';
|
||||
return 1;
|
||||
}
|
||||
|
||||
return len;
|
||||
}
|
||||
|
||||
static char *json_string_unescape(tvbparse_elem_t *tok)
|
||||
{
|
||||
char *str = (char *)wmem_alloc(wmem_packet_scope(), tok->len - 1);
|
||||
|
@ -266,7 +290,6 @@ static char *json_string_unescape(tvbparse_elem_t *tok)
|
|||
case '\"':
|
||||
case '\\':
|
||||
case '/':
|
||||
default:
|
||||
str[j++] = ch;
|
||||
break;
|
||||
|
||||
|
@ -361,10 +384,22 @@ static char *json_string_unescape(tvbparse_elem_t *tok)
|
|||
str[j++] = '?';
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
/* not valid by JSON grammar (also tvbparse rules should not allow it) */
|
||||
DISSECTOR_ASSERT_NOT_REACHED();
|
||||
break;
|
||||
}
|
||||
|
||||
} else
|
||||
str[j++] = ch;
|
||||
} else {
|
||||
int utf_len;
|
||||
|
||||
str[j] = ch;
|
||||
/* XXX if it's not valid UTF-8 character, add some expert info? (it violates JSON grammar) */
|
||||
utf_len = json_tvb_memcpy_utf8(&str[j], tok->tvb, i, tok->len);
|
||||
j += utf_len;
|
||||
i += (utf_len - 1);
|
||||
}
|
||||
|
||||
}
|
||||
str[j] = '\0';
|
||||
|
|
|
@ -30,7 +30,6 @@ IF(WIN32)
|
|||
inet_ntop.c
|
||||
inet_pton.c
|
||||
strptime.c
|
||||
unicode-utils.c
|
||||
wsgetopt.c
|
||||
)
|
||||
ENDIF(WIN32)
|
||||
|
@ -69,6 +68,7 @@ set(WSUTIL_FILES
|
|||
tempfile.c
|
||||
type_util.c
|
||||
u3.c
|
||||
unicode-utils.c
|
||||
${WSUTIL_PLATFORM_FILES}
|
||||
)
|
||||
|
||||
|
|
|
@ -99,8 +99,6 @@ EXTRA_DIST = \
|
|||
Makefile.nmake \
|
||||
file_util.c \
|
||||
file_util.h \
|
||||
unicode-utils.c \
|
||||
unicode-utils.h \
|
||||
wsgcrypt.h
|
||||
|
||||
CLEANFILES = \
|
||||
|
|
|
@ -61,7 +61,8 @@ LIBWSUTIL_SRC = \
|
|||
report_err.c \
|
||||
tempfile.c \
|
||||
type_util.c \
|
||||
u3.c
|
||||
u3.c \
|
||||
unicode-utils.c
|
||||
|
||||
# Header files that are not generated from other files
|
||||
LIBWSUTIL_INCLUDES = \
|
||||
|
@ -100,4 +101,5 @@ LIBWSUTIL_INCLUDES = \
|
|||
report_err.h \
|
||||
tempfile.h \
|
||||
type_util.h \
|
||||
u3.h
|
||||
u3.h \
|
||||
unicode-utils.h
|
||||
|
|
|
@ -28,7 +28,6 @@ OBJECTS = file_util.obj \
|
|||
inet_pton.obj \
|
||||
$(LIBWSUTIL_SRC:.c=.obj) \
|
||||
strptime.obj \
|
||||
unicode-utils.obj \
|
||||
wsgetopt.obj
|
||||
|
||||
# For use when making libwsutil.dll
|
||||
|
|
|
@ -22,12 +22,23 @@
|
|||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
*/
|
||||
|
||||
#ifndef _WIN32
|
||||
#error "This is only for Windows"
|
||||
#endif
|
||||
|
||||
#include "unicode-utils.h"
|
||||
|
||||
int
|
||||
ws_utf8_char_len(guint8 ch)
|
||||
{
|
||||
if (ch >= 0xfe) return -1;
|
||||
if (ch >= 0xfc) return 6;
|
||||
if (ch >= 0xf8) return 5;
|
||||
if (ch >= 0xf0) return 4;
|
||||
if (ch >= 0xe0) return 3;
|
||||
if (ch >= 0xc0) return 2;
|
||||
else return 1;
|
||||
}
|
||||
|
||||
|
||||
#ifdef _WIN32
|
||||
|
||||
#include <shellapi.h>
|
||||
|
||||
/** @file
|
||||
|
@ -157,3 +168,5 @@ arg_list_utf_16to8(int argc, char *argv[]) {
|
|||
}
|
||||
} /* XXX else bail because something is horribly, horribly wrong? */
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -25,21 +25,25 @@
|
|||
#ifndef __UNICODEUTIL_H__
|
||||
#define __UNICODEUTIL_H__
|
||||
|
||||
#include "ws_symbol_export.h"
|
||||
|
||||
#ifdef _WIN32
|
||||
|
||||
#include "config.h"
|
||||
|
||||
#include "ws_symbol_export.h"
|
||||
|
||||
#include <glib.h>
|
||||
#include <windows.h>
|
||||
#include <tchar.h>
|
||||
#include <wchar.h>
|
||||
|
||||
/**
|
||||
* @file Unicode convenience routines.
|
||||
*/
|
||||
|
||||
WS_DLL_PUBLIC
|
||||
int ws_utf8_char_len(guint8 ch);
|
||||
|
||||
#ifdef _WIN32
|
||||
|
||||
#include <windows.h>
|
||||
#include <tchar.h>
|
||||
#include <wchar.h>
|
||||
|
||||
/** Given a UTF-8 string, convert it to UTF-16. This is meant to be used
|
||||
* to convert between GTK+ 2.x (UTF-8) to Windows (UTF-16).
|
||||
*
|
||||
|
|
Loading…
Reference in New Issue