diff --git a/epan/dissectors/packet-json.c b/epan/dissectors/packet-json.c index dc339b5e61..3c9f09193d 100644 --- a/epan/dissectors/packet-json.c +++ b/epan/dissectors/packet-json.c @@ -249,6 +249,30 @@ static void after_array(void *tvbparse_data, const void *wanted_data _U_, tvbpar wmem_stack_pop(data->stack); } +static int +json_tvb_memcpy_utf8(char *buf, tvbuff_t *tvb, int offset, int offset_max) +{ + int len = ws_utf8_char_len((guint8) *buf); + + /* XXX, before moving to core API check if it's off-by-one safe. + * For JSON analyzer it's not a problem + * (string always terminated by ", which is not valid UTF-8 continuation character) */ + if (len == -1 || ((guint) (offset + len)) >= (guint) offset_max) { + *buf = '?'; + return 1; + } + + /* assume it's valid UTF-8 */ + tvb_memcpy(tvb, buf + 1, offset + 1, len - 1); + + if (!g_utf8_validate(buf, len, NULL)) { + *buf = '?'; + return 1; + } + + return len; +} + static char *json_string_unescape(tvbparse_elem_t *tok) { char *str = (char *)wmem_alloc(wmem_packet_scope(), tok->len - 1); @@ -266,7 +290,6 @@ static char *json_string_unescape(tvbparse_elem_t *tok) case '\"': case '\\': case '/': - default: str[j++] = ch; break; @@ -361,10 +384,22 @@ static char *json_string_unescape(tvbparse_elem_t *tok) str[j++] = '?'; break; } + + default: + /* not valid by JSON grammar (also tvbparse rules should not allow it) */ + DISSECTOR_ASSERT_NOT_REACHED(); + break; } - } else - str[j++] = ch; + } else { + int utf_len; + + str[j] = ch; + /* XXX if it's not valid UTF-8 character, add some expert info? (it violates JSON grammar) */ + utf_len = json_tvb_memcpy_utf8(&str[j], tok->tvb, i, tok->len); + j += utf_len; + i += (utf_len - 1); + } } str[j] = '\0'; diff --git a/wsutil/CMakeLists.txt b/wsutil/CMakeLists.txt index b14c236238..c4251ea0bb 100644 --- a/wsutil/CMakeLists.txt +++ b/wsutil/CMakeLists.txt @@ -30,7 +30,6 @@ IF(WIN32) inet_ntop.c inet_pton.c strptime.c - unicode-utils.c wsgetopt.c ) ENDIF(WIN32) @@ -69,6 +68,7 @@ set(WSUTIL_FILES tempfile.c type_util.c u3.c + unicode-utils.c ${WSUTIL_PLATFORM_FILES} ) diff --git a/wsutil/Makefile.am b/wsutil/Makefile.am index 6ead86a92f..0b1256c7e8 100644 --- a/wsutil/Makefile.am +++ b/wsutil/Makefile.am @@ -99,8 +99,6 @@ EXTRA_DIST = \ Makefile.nmake \ file_util.c \ file_util.h \ - unicode-utils.c \ - unicode-utils.h \ wsgcrypt.h CLEANFILES = \ diff --git a/wsutil/Makefile.common b/wsutil/Makefile.common index 96b5a7cfa3..0efc26fb1a 100644 --- a/wsutil/Makefile.common +++ b/wsutil/Makefile.common @@ -61,7 +61,8 @@ LIBWSUTIL_SRC = \ report_err.c \ tempfile.c \ type_util.c \ - u3.c + u3.c \ + unicode-utils.c # Header files that are not generated from other files LIBWSUTIL_INCLUDES = \ @@ -100,4 +101,5 @@ LIBWSUTIL_INCLUDES = \ report_err.h \ tempfile.h \ type_util.h \ - u3.h + u3.h \ + unicode-utils.h diff --git a/wsutil/Makefile.nmake b/wsutil/Makefile.nmake index 673c6e4817..4da5224075 100644 --- a/wsutil/Makefile.nmake +++ b/wsutil/Makefile.nmake @@ -28,7 +28,6 @@ OBJECTS = file_util.obj \ inet_pton.obj \ $(LIBWSUTIL_SRC:.c=.obj) \ strptime.obj \ - unicode-utils.obj \ wsgetopt.obj # For use when making libwsutil.dll diff --git a/wsutil/unicode-utils.c b/wsutil/unicode-utils.c index 8935e46f38..21cc489df7 100644 --- a/wsutil/unicode-utils.c +++ b/wsutil/unicode-utils.c @@ -22,12 +22,23 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ -#ifndef _WIN32 -#error "This is only for Windows" -#endif - #include "unicode-utils.h" +int +ws_utf8_char_len(guint8 ch) +{ + if (ch >= 0xfe) return -1; + if (ch >= 0xfc) return 6; + if (ch >= 0xf8) return 5; + if (ch >= 0xf0) return 4; + if (ch >= 0xe0) return 3; + if (ch >= 0xc0) return 2; + else return 1; +} + + +#ifdef _WIN32 + #include /** @file @@ -157,3 +168,5 @@ arg_list_utf_16to8(int argc, char *argv[]) { } } /* XXX else bail because something is horribly, horribly wrong? */ } + +#endif diff --git a/wsutil/unicode-utils.h b/wsutil/unicode-utils.h index a9d5318f6b..7b29d8460a 100644 --- a/wsutil/unicode-utils.h +++ b/wsutil/unicode-utils.h @@ -25,21 +25,25 @@ #ifndef __UNICODEUTIL_H__ #define __UNICODEUTIL_H__ -#include "ws_symbol_export.h" - -#ifdef _WIN32 - #include "config.h" +#include "ws_symbol_export.h" + #include -#include -#include -#include /** * @file Unicode convenience routines. */ +WS_DLL_PUBLIC +int ws_utf8_char_len(guint8 ch); + +#ifdef _WIN32 + +#include +#include +#include + /** Given a UTF-8 string, convert it to UTF-16. This is meant to be used * to convert between GTK+ 2.x (UTF-8) to Windows (UTF-16). *