From 603354203b608a0d94b91c61791b2ec3052a870b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Valverde?= Date: Sun, 16 Oct 2022 11:35:33 +0100 Subject: [PATCH] epan/proto: Replace format text() The proto.h APIs expect valid UTF-8 so replace uses of format_text() with a label copy function that just does formatting and does not check for encoding errors. Avoid multiple levels of temporary string allocations. Make sure the copy does not truncate a multibyte character and produce invalid strings. Add debug checks for UTF-8 encoding errors instead. We escape C0 and C1 control codes (because control codes) and ASCII whitespace (and bell). Overall the goal is to be more efficient and optimized and help detect misuse of APIs by passing invalid UTF-8. Add a unit test for ws_label_strcat. --- CMakeLists.txt | 1 + epan/CMakeLists.txt | 8 +++ epan/proto.c | 95 +++++++++++---------------- epan/strutil.c | 138 ++++++++++++++++++++++++++++++++++++++++ epan/strutil.h | 5 ++ epan/test_epan.c | 102 +++++++++++++++++++++++++++++ test/suite_unittests.py | 6 ++ 7 files changed, 298 insertions(+), 57 deletions(-) create mode 100644 epan/test_epan.c diff --git a/CMakeLists.txt b/CMakeLists.txt index 819f5e7f8c..aa348b1e4f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -3934,6 +3934,7 @@ add_custom_target(test-programs tvbtest wmem_test wscbor_test + test_epan test_wsutil COMMENT "Building unit test programs and wrapper" ) diff --git a/epan/CMakeLists.txt b/epan/CMakeLists.txt index c42ac121fa..43a5e2445d 100644 --- a/epan/CMakeLists.txt +++ b/epan/CMakeLists.txt @@ -434,6 +434,14 @@ set_target_properties(wscbor_test PROPERTIES COMPILE_FLAGS "${WERROR_COMMON_FLAGS}" ) +add_executable(test_epan EXCLUDE_FROM_ALL test_epan.c) +target_link_libraries(test_epan epan) +set_target_properties(test_epan PROPERTIES + FOLDER "Tests" + EXCLUDE_FROM_DEFAULT_BUILD True + COMPILE_FLAGS "${WERROR_COMMON_FLAGS}" +) + CHECKAPI( NAME epan diff --git a/epan/proto.c b/epan/proto.c index 387b0052df..8c37095c7e 100644 --- a/epan/proto.c +++ b/epan/proto.c @@ -25,6 +25,7 @@ #include #include #include +#include #include @@ -219,7 +220,9 @@ static int hfinfo_bitoffset(const header_field_info *hfinfo); static int hfinfo_mask_bitwidth(const header_field_info *hfinfo); static int hfinfo_container_bitwidth(const header_field_info *hfinfo); -static inline gsize label_concat(char *label_str, gsize pos, const char *str); +#define label_concat(dst, pos, src) \ + ws_label_strcat(dst, ITEM_LABEL_LENGTH, pos, src, 0) + static void label_mark_truncated(char *label_str, gsize name_pos); #define LABEL_MARK_TRUNCATED_START(label_str) label_mark_truncated(label_str, 0) @@ -1068,19 +1071,13 @@ proto_registrar_get_id_byname(const char *field_name) return hfinfo->id; } - -static char * -format_text_hfinfo(wmem_allocator_t *scope, const header_field_info *hfinfo, - const guchar *string) +static int +label_strcat_flags(const header_field_info *hfinfo) { - char *str = NULL; - if (FIELD_DISPLAY(hfinfo->display) & BASE_STR_WSP) - str = format_text_wsp(scope, string, strlen(string)); - else - str = format_text(scope, string, strlen(string)); + return FORMAT_LABEL_REPLACE_SPACE; - return str; + return 0; } static char * @@ -3822,23 +3819,28 @@ proto_tree_add_item_ret_display_string_and_length(proto_tree *tree, int hfindex, switch (hfinfo->type) { case FT_STRING: value = get_string_value(scope, tvb, start, length, lenretval, encoding); - *retval = format_text_hfinfo(scope, hfinfo, value); + *retval = wmem_alloc(scope, ITEM_LABEL_LENGTH); + ws_label_strcat(*retval, ITEM_LABEL_LENGTH, 0, value, label_strcat_flags(hfinfo)); break; case FT_STRINGZ: value = get_stringz_value(scope, tree, tvb, start, length, lenretval, encoding); - *retval = format_text_hfinfo(scope, hfinfo, value); + *retval = wmem_alloc(scope, ITEM_LABEL_LENGTH); + ws_label_strcat(*retval, ITEM_LABEL_LENGTH, 0, value, label_strcat_flags(hfinfo)); break; case FT_UINT_STRING: value = get_uint_string_value(scope, tree, tvb, start, length, lenretval, encoding); - *retval = format_text_hfinfo(scope, hfinfo, value); + *retval = wmem_alloc(scope, ITEM_LABEL_LENGTH); + ws_label_strcat(*retval, ITEM_LABEL_LENGTH, 0, value, label_strcat_flags(hfinfo)); break; case FT_STRINGZPAD: value = get_stringzpad_value(scope, tvb, start, length, lenretval, encoding); - *retval = format_text_hfinfo(scope, hfinfo, value); + *retval = wmem_alloc(scope, ITEM_LABEL_LENGTH); + ws_label_strcat(*retval, ITEM_LABEL_LENGTH, 0, value, label_strcat_flags(hfinfo)); break; case FT_STRINGZTRUNC: value = get_stringztrunc_value(scope, tvb, start, length, lenretval, encoding); - *retval = format_text_hfinfo(scope, hfinfo, value); + *retval = wmem_alloc(scope, ITEM_LABEL_LENGTH); + ws_label_strcat(*retval, ITEM_LABEL_LENGTH, 0, value, label_strcat_flags(hfinfo)); break; case FT_BYTES: value = tvb_get_ptr(tvb, start, length); @@ -6342,7 +6344,7 @@ proto_tree_set_representation_value(proto_item *pi, const char *format, va_list * items string representation */ if (PTREE_DATA(pi)->visible && !proto_item_is_hidden(pi)) { gsize name_pos, ret = 0; - char *str, *tmp; + char *str; field_info *fi = PITEM_FINFO(pi); header_field_info *hf; @@ -6371,11 +6373,9 @@ proto_tree_set_representation_value(proto_item *pi, const char *format, va_list ret = label_concat(fi->rep->representation, ret, ": "); /* If possible, Put in the value of the string */ - str = ws_strdup_vprintf(format, ap); - tmp = format_text_string(NULL, str); - wmem_free(NULL, str); - ret = label_concat(fi->rep->representation, ret, tmp); - wmem_free(NULL, tmp); + str = wmem_strdup_vprintf(PNODE_POOL(pi), format, ap); + WS_UTF_8_CHECK(str, -1); + ret = ws_label_strcat(fi->rep->representation, ITEM_LABEL_LENGTH, ret, str, 0); if (ret >= ITEM_LABEL_LENGTH) { /* Uh oh, we don't have enough room. Tell the user * that the field is truncated. @@ -6392,7 +6392,7 @@ static void proto_tree_set_representation(proto_item *pi, const char *format, va_list ap) { gsize ret; /*tmp return value */ - char *str, *tmp; + char *str; field_info *fi = PITEM_FINFO(pi); DISSECTOR_ASSERT(fi); @@ -6400,11 +6400,9 @@ proto_tree_set_representation(proto_item *pi, const char *format, va_list ap) if (!proto_item_is_hidden(pi)) { ITEM_LABEL_NEW(PNODE_POOL(pi), fi->rep); - str = ws_strdup_vprintf(format, ap); - tmp = format_text_string(NULL, str); - wmem_free(NULL, str); - ret = label_concat(fi->rep->representation, 0, tmp); - wmem_free(NULL, tmp); + str = wmem_strdup_vprintf(PNODE_POOL(pi), format, ap); + WS_UTF_8_CHECK(str, -1); + ret = ws_label_strcat(fi->rep->representation, ITEM_LABEL_LENGTH, 0, str, 0); if (ret >= ITEM_LABEL_LENGTH) { /* Uh oh, we don't have enough room. Tell the user * that the field is truncated. @@ -6709,9 +6707,7 @@ proto_item_fill_display_label(field_info *finfo, gchar *display_label_str, const case FT_STRINGZPAD: case FT_STRINGZTRUNC: str = fvalue_get_string(&finfo->value); - tmp_str = format_text_hfinfo(NULL, hfinfo, str); - label_len = protoo_strlcpy(display_label_str, tmp_str, label_str_size); - wmem_free(NULL, tmp_str); + label_len = (int)ws_label_strcat(display_label_str, label_str_size, 0, str, label_strcat_flags(hfinfo)); break; default: @@ -7040,7 +7036,7 @@ proto_item_append_text(proto_item *pi, const char *format, ...) { field_info *fi = NULL; size_t curlen; - char *str, *tmp; + char *str; va_list ap; TRY_TO_FAKE_THIS_REPR_VOID(pi); @@ -7071,12 +7067,10 @@ proto_item_append_text(proto_item *pi, const char *format, ...) */ if (ITEM_LABEL_LENGTH > (curlen + 4)) { va_start(ap, format); - str = ws_strdup_vprintf(format, ap); + str = wmem_strdup_vprintf(PNODE_POOL(pi), format, ap); va_end(ap); - tmp = format_text_string(NULL, str); - wmem_free(NULL, str); - curlen = label_concat(fi->rep->representation, curlen, tmp); - wmem_free(NULL, tmp); + WS_UTF_8_CHECK(str, -1); + curlen = ws_label_strcat(fi->rep->representation, ITEM_LABEL_LENGTH, curlen, str, 0); if (curlen >= ITEM_LABEL_LENGTH) { /* Uh oh, we don't have enough room. Tell the user * that the field is truncated. @@ -7094,7 +7088,7 @@ proto_item_prepend_text(proto_item *pi, const char *format, ...) field_info *fi = NULL; gsize pos; char representation[ITEM_LABEL_LENGTH]; - char *str, *tmp; + char *str; va_list ap; TRY_TO_FAKE_THIS_REPR_VOID(pi); @@ -7116,13 +7110,11 @@ proto_item_prepend_text(proto_item *pi, const char *format, ...) (void) g_strlcpy(representation, fi->rep->representation, ITEM_LABEL_LENGTH); va_start(ap, format); - str = ws_strdup_vprintf(format, ap); + str = wmem_strdup_vprintf(PNODE_POOL(pi), format, ap); va_end(ap); - tmp = format_text_string(NULL, str); - wmem_free(NULL, str); - pos = label_concat(fi->rep->representation, 0, tmp); - wmem_free(NULL, tmp); - pos = label_concat(fi->rep->representation, pos, representation); + WS_UTF_8_CHECK(str, -1); + pos = ws_label_strcat(fi->rep->representation, ITEM_LABEL_LENGTH, 0, str, 0); + pos = ws_label_strcat(fi->rep->representation, ITEM_LABEL_LENGTH, pos, representation, 0); /* XXX: As above, if the old representation is close to the label * length, it might already be marked as truncated. */ if (pos >= ITEM_LABEL_LENGTH && (strlen(representation) + 4) <= ITEM_LABEL_LENGTH) { @@ -9112,15 +9104,6 @@ proto_register_subtree_array(gint * const *indices, const int num_indices) } } -static inline gsize -label_concat(char *label_str, gsize pos, const char *str) -{ - if (pos < ITEM_LABEL_LENGTH) - pos += g_strlcpy(label_str + pos, str, ITEM_LABEL_LENGTH - pos); - - return pos; -} - static void label_mark_truncated(char *label_str, gsize name_pos) { @@ -9169,7 +9152,7 @@ label_fill(char *label_str, gsize pos, const header_field_info *hfinfo, const ch name_pos = pos = label_concat(label_str, pos, hfinfo->name); if (!(hfinfo->display & BASE_NO_DISPLAY_VALUE)) { pos = label_concat(label_str, pos, ": "); - pos = label_concat(label_str, pos, text ? text : "(null)"); + pos = ws_label_strcat(label_str, ITEM_LABEL_LENGTH, pos, text ? text : "(null)", label_strcat_flags(hfinfo)); } if (pos >= ITEM_LABEL_LENGTH) { @@ -9474,9 +9457,7 @@ proto_item_fill_label(field_info *fi, gchar *label_str) case FT_STRINGZPAD: case FT_STRINGZTRUNC: str = fvalue_get_string(&fi->value); - tmp = format_text_hfinfo(NULL, hfinfo, str); - label_fill(label_str, 0, hfinfo, tmp); - wmem_free(NULL, tmp); + label_fill(label_str, 0, hfinfo, str); break; case FT_IEEE_11073_SFLOAT: diff --git a/epan/strutil.c b/epan/strutil.c index b95146714c..8d92a2c2ac 100644 --- a/epan/strutil.c +++ b/epan/strutil.c @@ -16,6 +16,7 @@ #include "strutil.h" #include +#include #include #ifdef _WIN32 @@ -795,6 +796,143 @@ module_check_valid_name(const char *name, gboolean lower_only) return c; } +static const char _hex[16] = { '0', '1', '2', '3', '4', '5', '6', '7', + '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' }; + +/* + * Copy byte by byte without UTF-8 truncation (assume valid UTF-8 input). + * Return byte size written, or that would have been + * written with enough space. + */ +size_t +ws_label_strcat(char *label_str, size_t buf_size, size_t pos, + const uint8_t *str, int flags) +{ + if (pos >= buf_size) + return pos; + + uint8_t r = 0; + ssize_t chlen; + ssize_t idx, src_len; + ssize_t free_len; + + idx = 0; + src_len = strlen(str); + free_len = buf_size - pos - 1; + memset(label_str + pos, 0, free_len + 1); + + while (idx < src_len) { + chlen = ws_utf8_char_len(str[idx]); + if (chlen <= 0) { + /* We were passed invalid UTF-8. This is an error. Complain and do... something. */ + ws_log_utf8(str, -1, NULL); + /* Destination buffer is already nul terminated. */ + /* + * XXX If we are going to return here instead of trying to recover maybe the log level should + * be higher than DEBUG. + */ + return pos; + } + + /* ASCII */ + if (chlen == 1) { + if (flags & FORMAT_LABEL_REPLACE_SPACE && g_ascii_isspace(str[idx])) { + if (free_len >= 1) { + label_str[pos] = ' '; + } + pos++; + idx++; + free_len--; + continue; + } + + r = 0; + switch (str[idx]) { + case '\a': r = 'a'; break; + case '\b': r = 'b'; break; + case '\f': r = 'f'; break; + case '\n': r = 'n'; break; + case '\r': r = 'r'; break; + case '\t': r = 't'; break; + case '\v': r = 'v'; break; + } + if (r != 0) { + if (free_len >= 2) { + label_str[pos] = '\\'; + label_str[pos+1] = r; + } + pos += 2; + idx += 1; + free_len -= 2; + continue; + } + + if (g_ascii_isprint(str[idx])) { + if (free_len >= 1) { + label_str[pos] = str[idx]; + } + pos++; + idx++; + free_len--; + continue; + } + + if (free_len >= 4) { + label_str[pos+0] = '\\'; + label_str[pos+1] = 'x'; + + uint8_t ch = str[idx]; + label_str[pos+2] = _hex[ch >> 4]; + label_str[pos+3] = _hex[ch & 0x0F]; + } + pos += 4; + idx += chlen; + free_len -= 4; + continue; + } + + /* UTF-8 multibyte */ + if (chlen == 2 && str[idx] == 0xC2 && + str[idx+1] >= 0x80 && str[idx+1] <= 0x9F) { + /* + * Escape the C1 control codes. C0 (covered above) and C1 are + * inband signalling and transparent to Unicode. + * Anything else probably has text semantics should not be removed. + */ + /* + * Special case: The second UTF-8 byte is the same as the Unicode + * code point for range U+0080 - U+009F. + */ + if (free_len >= 6) { + label_str[pos+0] = '\\'; + label_str[pos+1] = 'u'; + label_str[pos+2] = '0'; + label_str[pos+3] = '0'; + + uint8_t ch = str[idx+1]; + label_str[pos+4] = _hex[ch >> 4]; + label_str[pos+5] = _hex[ch & 0x0F]; + } + pos += 6; + idx += chlen; + free_len -= 6; + continue; + } + + /* Just copy */ + if (free_len >= chlen) { + for (ssize_t j = 0; j < chlen; j++) { + label_str[pos+j] = str[idx+j]; + } + } + pos += chlen; + idx += chlen; + free_len -= chlen; + } + + return pos; +} + /* * Editor modelines - https://www.wireshark.org/tools/modelines.html * diff --git a/epan/strutil.h b/epan/strutil.h index ca941f0e4f..a70fe5da4f 100644 --- a/epan/strutil.h +++ b/epan/strutil.h @@ -190,6 +190,11 @@ char * convert_string_case(const char *string, gboolean case_insensitive); WS_DLL_PUBLIC void IA5_7BIT_decode(unsigned char * dest, const unsigned char* src, int len); +#define FORMAT_LABEL_REPLACE_SPACE (0x1 << 0) + +WS_DLL_PUBLIC +size_t ws_label_strcat(char *label_str, size_t bufsize, gsize pos, const uint8_t *str, int flags); + /* * Check name is valid. This covers names for display filter fields, dissector * tables, preference modules, etc. Lower case is preferred. diff --git a/epan/test_epan.c b/epan/test_epan.c new file mode 100644 index 0000000000..7109c36212 --- /dev/null +++ b/epan/test_epan.c @@ -0,0 +1,102 @@ +/* + * Wireshark - Network traffic analyzer + * By Gerald Combs + * Copyright 1998 Gerald Combs + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "config.h" + +#include "strutil.h" +#include + +/* + * FIXME: LABEL_LENGTH includes the nul byte terminator. + * This is confusing but matches ITEM_LABEL_LENGTH. + */ +#define LABEL_LENGTH 8 + +void test_label_strcat(void) +{ + char label[LABEL_LENGTH]; + const char *src; + size_t pos; + + src = "ABCD"; + pos = 0; + pos = ws_label_strcat(label, sizeof(label), pos, src, 0); + g_assert_cmpstr(label, ==, "ABCD"); + g_assert_cmpuint(pos, ==, 4); + + src = "EFGH"; + pos = ws_label_strcat(label, sizeof(label), pos, src, 0); + g_assert_cmpstr(label, ==, "ABCDEFG"); + g_assert_cmpuint(pos, ==, 8); + + src = "IJKL"; + pos = 7; + pos = ws_label_strcat(label, sizeof(label), pos, src, 0); + g_assert_cmpstr(label, ==, "ABCDEFG"); + g_assert_cmpuint(pos, ==, 11); + + /* UTF-8 multibyte does not fit, do not truncate. */ + src = "ABCDEF"UTF8_MIDDLE_DOT; + pos = 0; + pos = ws_label_strcat(label, sizeof(label), pos, src, 0); + g_assert_cmpstr(label, ==, "ABCDEF"); + g_assert_cmpuint(pos, ==, 8); /* Tried to write 8 bytes. */ +} + +void test_label_strcat2(void) +{ + char label[128]; + const char *src; + + src = "ABCD\n\t\f\r\aE"UTF8_MIDDLE_DOT"Z"; + ws_label_strcat(label, sizeof(label), 0, src, 0); + g_assert_cmpstr(label, ==, "ABCD\\n\\t\\f\\r\\aE"UTF8_MIDDLE_DOT"Z"); +} + +void test_label_escape_control(void) +{ + char label[128]; + const char *src, *dst; + size_t pos; + + src = "ABCD \x04\x17\xC2\x80 EFG \xC2\x90 HIJ \xC2\x9F Z"; + dst = "ABCD \\x04\\x17\\u0080 EFG \\u0090 HIJ \\u009F Z"; + pos = ws_label_strcat(label, sizeof(label), 0, src, 0); + g_assert_cmpstr(label, ==, dst); + g_assert_cmpuint(pos, ==, strlen(dst)); +} + +int main(int argc, char **argv) +{ + int ret; + + ws_log_init("test_proto", NULL); + + g_test_init(&argc, &argv, NULL); + + g_test_add_func("/label/strcat", test_label_strcat); + g_test_add_func("/label/strcat2", test_label_strcat2); + g_test_add_func("/label/escape_control", test_label_escape_control); + + ret = g_test_run(); + + return ret; +} + +/* + * Editor modelines - https://www.wireshark.org/tools/modelines.html + * + * Local variables: + * c-basic-offset: 4 + * tab-width: 8 + * indent-tabs-mode: nil + * End: + * + * vi: set shiftwidth=4 tabstop=8 expandtab: + * :indentSize=4:tabSize=8:noTabs=true: + */ diff --git a/test/suite_unittests.py b/test/suite_unittests.py index ce42697921..7c1e644268 100644 --- a/test/suite_unittests.py +++ b/test/suite_unittests.py @@ -45,6 +45,12 @@ class case_unittests(subprocesstest.SubprocessTestCase): '''wscbor_test''' self.assertRun(program('wscbor_test'), env=base_env) + def test_unit_epan(self, program, base_env): + '''epan unit tests''' + self.assertRun((program('test_epan'), + '--verbose' + ), env=base_env) + def test_unit_wsutil(self, program, base_env): '''wsutil unit tests''' self.assertRun((program('test_wsutil'),