epan/proto: Replace format text()

The proto.h APIs expect valid UTF-8 so replace uses of format_text()
with a label copy function that just does formatting and does not
check for encoding errors. Avoid multiple levels of temporary
string allocations.

Make sure the copy does not truncate a multibyte character and
produce invalid strings. Add debug checks for UTF-8 encoding errors
instead.

We escape C0 and C1 control codes (because control codes)
and ASCII whitespace (and bell).

Overall the goal is to be more efficient and optimized and help
detect misuse of APIs by passing invalid UTF-8.

Add a unit test for ws_label_strcat.
This commit is contained in:
João Valverde 2022-10-16 11:35:33 +01:00
parent 000c2c0bf4
commit 603354203b
7 changed files with 298 additions and 57 deletions

View File

@ -3934,6 +3934,7 @@ add_custom_target(test-programs
tvbtest
wmem_test
wscbor_test
test_epan
test_wsutil
COMMENT "Building unit test programs and wrapper"
)

View File

@ -434,6 +434,14 @@ set_target_properties(wscbor_test PROPERTIES
COMPILE_FLAGS "${WERROR_COMMON_FLAGS}"
)
add_executable(test_epan EXCLUDE_FROM_ALL test_epan.c)
target_link_libraries(test_epan epan)
set_target_properties(test_epan PROPERTIES
FOLDER "Tests"
EXCLUDE_FROM_DEFAULT_BUILD True
COMPILE_FLAGS "${WERROR_COMMON_FLAGS}"
)
CHECKAPI(
NAME
epan

View File

@ -25,6 +25,7 @@
#include <wsutil/json_dumper.h>
#include <wsutil/wslog.h>
#include <wsutil/ws_assert.h>
#include <wsutil/unicode-utils.h>
#include <ftypes/ftypes.h>
@ -219,7 +220,9 @@ static int hfinfo_bitoffset(const header_field_info *hfinfo);
static int hfinfo_mask_bitwidth(const header_field_info *hfinfo);
static int hfinfo_container_bitwidth(const header_field_info *hfinfo);
static inline gsize label_concat(char *label_str, gsize pos, const char *str);
#define label_concat(dst, pos, src) \
ws_label_strcat(dst, ITEM_LABEL_LENGTH, pos, src, 0)
static void label_mark_truncated(char *label_str, gsize name_pos);
#define LABEL_MARK_TRUNCATED_START(label_str) label_mark_truncated(label_str, 0)
@ -1068,19 +1071,13 @@ proto_registrar_get_id_byname(const char *field_name)
return hfinfo->id;
}
static char *
format_text_hfinfo(wmem_allocator_t *scope, const header_field_info *hfinfo,
const guchar *string)
static int
label_strcat_flags(const header_field_info *hfinfo)
{
char *str = NULL;
if (FIELD_DISPLAY(hfinfo->display) & BASE_STR_WSP)
str = format_text_wsp(scope, string, strlen(string));
else
str = format_text(scope, string, strlen(string));
return FORMAT_LABEL_REPLACE_SPACE;
return str;
return 0;
}
static char *
@ -3822,23 +3819,28 @@ proto_tree_add_item_ret_display_string_and_length(proto_tree *tree, int hfindex,
switch (hfinfo->type) {
case FT_STRING:
value = get_string_value(scope, tvb, start, length, lenretval, encoding);
*retval = format_text_hfinfo(scope, hfinfo, value);
*retval = wmem_alloc(scope, ITEM_LABEL_LENGTH);
ws_label_strcat(*retval, ITEM_LABEL_LENGTH, 0, value, label_strcat_flags(hfinfo));
break;
case FT_STRINGZ:
value = get_stringz_value(scope, tree, tvb, start, length, lenretval, encoding);
*retval = format_text_hfinfo(scope, hfinfo, value);
*retval = wmem_alloc(scope, ITEM_LABEL_LENGTH);
ws_label_strcat(*retval, ITEM_LABEL_LENGTH, 0, value, label_strcat_flags(hfinfo));
break;
case FT_UINT_STRING:
value = get_uint_string_value(scope, tree, tvb, start, length, lenretval, encoding);
*retval = format_text_hfinfo(scope, hfinfo, value);
*retval = wmem_alloc(scope, ITEM_LABEL_LENGTH);
ws_label_strcat(*retval, ITEM_LABEL_LENGTH, 0, value, label_strcat_flags(hfinfo));
break;
case FT_STRINGZPAD:
value = get_stringzpad_value(scope, tvb, start, length, lenretval, encoding);
*retval = format_text_hfinfo(scope, hfinfo, value);
*retval = wmem_alloc(scope, ITEM_LABEL_LENGTH);
ws_label_strcat(*retval, ITEM_LABEL_LENGTH, 0, value, label_strcat_flags(hfinfo));
break;
case FT_STRINGZTRUNC:
value = get_stringztrunc_value(scope, tvb, start, length, lenretval, encoding);
*retval = format_text_hfinfo(scope, hfinfo, value);
*retval = wmem_alloc(scope, ITEM_LABEL_LENGTH);
ws_label_strcat(*retval, ITEM_LABEL_LENGTH, 0, value, label_strcat_flags(hfinfo));
break;
case FT_BYTES:
value = tvb_get_ptr(tvb, start, length);
@ -6342,7 +6344,7 @@ proto_tree_set_representation_value(proto_item *pi, const char *format, va_list
* items string representation */
if (PTREE_DATA(pi)->visible && !proto_item_is_hidden(pi)) {
gsize name_pos, ret = 0;
char *str, *tmp;
char *str;
field_info *fi = PITEM_FINFO(pi);
header_field_info *hf;
@ -6371,11 +6373,9 @@ proto_tree_set_representation_value(proto_item *pi, const char *format, va_list
ret = label_concat(fi->rep->representation, ret, ": ");
/* If possible, Put in the value of the string */
str = ws_strdup_vprintf(format, ap);
tmp = format_text_string(NULL, str);
wmem_free(NULL, str);
ret = label_concat(fi->rep->representation, ret, tmp);
wmem_free(NULL, tmp);
str = wmem_strdup_vprintf(PNODE_POOL(pi), format, ap);
WS_UTF_8_CHECK(str, -1);
ret = ws_label_strcat(fi->rep->representation, ITEM_LABEL_LENGTH, ret, str, 0);
if (ret >= ITEM_LABEL_LENGTH) {
/* Uh oh, we don't have enough room. Tell the user
* that the field is truncated.
@ -6392,7 +6392,7 @@ static void
proto_tree_set_representation(proto_item *pi, const char *format, va_list ap)
{
gsize ret; /*tmp return value */
char *str, *tmp;
char *str;
field_info *fi = PITEM_FINFO(pi);
DISSECTOR_ASSERT(fi);
@ -6400,11 +6400,9 @@ proto_tree_set_representation(proto_item *pi, const char *format, va_list ap)
if (!proto_item_is_hidden(pi)) {
ITEM_LABEL_NEW(PNODE_POOL(pi), fi->rep);
str = ws_strdup_vprintf(format, ap);
tmp = format_text_string(NULL, str);
wmem_free(NULL, str);
ret = label_concat(fi->rep->representation, 0, tmp);
wmem_free(NULL, tmp);
str = wmem_strdup_vprintf(PNODE_POOL(pi), format, ap);
WS_UTF_8_CHECK(str, -1);
ret = ws_label_strcat(fi->rep->representation, ITEM_LABEL_LENGTH, 0, str, 0);
if (ret >= ITEM_LABEL_LENGTH) {
/* Uh oh, we don't have enough room. Tell the user
* that the field is truncated.
@ -6709,9 +6707,7 @@ proto_item_fill_display_label(field_info *finfo, gchar *display_label_str, const
case FT_STRINGZPAD:
case FT_STRINGZTRUNC:
str = fvalue_get_string(&finfo->value);
tmp_str = format_text_hfinfo(NULL, hfinfo, str);
label_len = protoo_strlcpy(display_label_str, tmp_str, label_str_size);
wmem_free(NULL, tmp_str);
label_len = (int)ws_label_strcat(display_label_str, label_str_size, 0, str, label_strcat_flags(hfinfo));
break;
default:
@ -7040,7 +7036,7 @@ proto_item_append_text(proto_item *pi, const char *format, ...)
{
field_info *fi = NULL;
size_t curlen;
char *str, *tmp;
char *str;
va_list ap;
TRY_TO_FAKE_THIS_REPR_VOID(pi);
@ -7071,12 +7067,10 @@ proto_item_append_text(proto_item *pi, const char *format, ...)
*/
if (ITEM_LABEL_LENGTH > (curlen + 4)) {
va_start(ap, format);
str = ws_strdup_vprintf(format, ap);
str = wmem_strdup_vprintf(PNODE_POOL(pi), format, ap);
va_end(ap);
tmp = format_text_string(NULL, str);
wmem_free(NULL, str);
curlen = label_concat(fi->rep->representation, curlen, tmp);
wmem_free(NULL, tmp);
WS_UTF_8_CHECK(str, -1);
curlen = ws_label_strcat(fi->rep->representation, ITEM_LABEL_LENGTH, curlen, str, 0);
if (curlen >= ITEM_LABEL_LENGTH) {
/* Uh oh, we don't have enough room. Tell the user
* that the field is truncated.
@ -7094,7 +7088,7 @@ proto_item_prepend_text(proto_item *pi, const char *format, ...)
field_info *fi = NULL;
gsize pos;
char representation[ITEM_LABEL_LENGTH];
char *str, *tmp;
char *str;
va_list ap;
TRY_TO_FAKE_THIS_REPR_VOID(pi);
@ -7116,13 +7110,11 @@ proto_item_prepend_text(proto_item *pi, const char *format, ...)
(void) g_strlcpy(representation, fi->rep->representation, ITEM_LABEL_LENGTH);
va_start(ap, format);
str = ws_strdup_vprintf(format, ap);
str = wmem_strdup_vprintf(PNODE_POOL(pi), format, ap);
va_end(ap);
tmp = format_text_string(NULL, str);
wmem_free(NULL, str);
pos = label_concat(fi->rep->representation, 0, tmp);
wmem_free(NULL, tmp);
pos = label_concat(fi->rep->representation, pos, representation);
WS_UTF_8_CHECK(str, -1);
pos = ws_label_strcat(fi->rep->representation, ITEM_LABEL_LENGTH, 0, str, 0);
pos = ws_label_strcat(fi->rep->representation, ITEM_LABEL_LENGTH, pos, representation, 0);
/* XXX: As above, if the old representation is close to the label
* length, it might already be marked as truncated. */
if (pos >= ITEM_LABEL_LENGTH && (strlen(representation) + 4) <= ITEM_LABEL_LENGTH) {
@ -9112,15 +9104,6 @@ proto_register_subtree_array(gint * const *indices, const int num_indices)
}
}
static inline gsize
label_concat(char *label_str, gsize pos, const char *str)
{
if (pos < ITEM_LABEL_LENGTH)
pos += g_strlcpy(label_str + pos, str, ITEM_LABEL_LENGTH - pos);
return pos;
}
static void
label_mark_truncated(char *label_str, gsize name_pos)
{
@ -9169,7 +9152,7 @@ label_fill(char *label_str, gsize pos, const header_field_info *hfinfo, const ch
name_pos = pos = label_concat(label_str, pos, hfinfo->name);
if (!(hfinfo->display & BASE_NO_DISPLAY_VALUE)) {
pos = label_concat(label_str, pos, ": ");
pos = label_concat(label_str, pos, text ? text : "(null)");
pos = ws_label_strcat(label_str, ITEM_LABEL_LENGTH, pos, text ? text : "(null)", label_strcat_flags(hfinfo));
}
if (pos >= ITEM_LABEL_LENGTH) {
@ -9474,9 +9457,7 @@ proto_item_fill_label(field_info *fi, gchar *label_str)
case FT_STRINGZPAD:
case FT_STRINGZTRUNC:
str = fvalue_get_string(&fi->value);
tmp = format_text_hfinfo(NULL, hfinfo, str);
label_fill(label_str, 0, hfinfo, tmp);
wmem_free(NULL, tmp);
label_fill(label_str, 0, hfinfo, str);
break;
case FT_IEEE_11073_SFLOAT:

View File

@ -16,6 +16,7 @@
#include "strutil.h"
#include <wsutil/str_util.h>
#include <wsutil/unicode-utils.h>
#include <epan/proto.h>
#ifdef _WIN32
@ -795,6 +796,143 @@ module_check_valid_name(const char *name, gboolean lower_only)
return c;
}
static const char _hex[16] = { '0', '1', '2', '3', '4', '5', '6', '7',
'8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
/*
* Copy byte by byte without UTF-8 truncation (assume valid UTF-8 input).
* Return byte size written, or that would have been
* written with enough space.
*/
size_t
ws_label_strcat(char *label_str, size_t buf_size, size_t pos,
const uint8_t *str, int flags)
{
if (pos >= buf_size)
return pos;
uint8_t r = 0;
ssize_t chlen;
ssize_t idx, src_len;
ssize_t free_len;
idx = 0;
src_len = strlen(str);
free_len = buf_size - pos - 1;
memset(label_str + pos, 0, free_len + 1);
while (idx < src_len) {
chlen = ws_utf8_char_len(str[idx]);
if (chlen <= 0) {
/* We were passed invalid UTF-8. This is an error. Complain and do... something. */
ws_log_utf8(str, -1, NULL);
/* Destination buffer is already nul terminated. */
/*
* XXX If we are going to return here instead of trying to recover maybe the log level should
* be higher than DEBUG.
*/
return pos;
}
/* ASCII */
if (chlen == 1) {
if (flags & FORMAT_LABEL_REPLACE_SPACE && g_ascii_isspace(str[idx])) {
if (free_len >= 1) {
label_str[pos] = ' ';
}
pos++;
idx++;
free_len--;
continue;
}
r = 0;
switch (str[idx]) {
case '\a': r = 'a'; break;
case '\b': r = 'b'; break;
case '\f': r = 'f'; break;
case '\n': r = 'n'; break;
case '\r': r = 'r'; break;
case '\t': r = 't'; break;
case '\v': r = 'v'; break;
}
if (r != 0) {
if (free_len >= 2) {
label_str[pos] = '\\';
label_str[pos+1] = r;
}
pos += 2;
idx += 1;
free_len -= 2;
continue;
}
if (g_ascii_isprint(str[idx])) {
if (free_len >= 1) {
label_str[pos] = str[idx];
}
pos++;
idx++;
free_len--;
continue;
}
if (free_len >= 4) {
label_str[pos+0] = '\\';
label_str[pos+1] = 'x';
uint8_t ch = str[idx];
label_str[pos+2] = _hex[ch >> 4];
label_str[pos+3] = _hex[ch & 0x0F];
}
pos += 4;
idx += chlen;
free_len -= 4;
continue;
}
/* UTF-8 multibyte */
if (chlen == 2 && str[idx] == 0xC2 &&
str[idx+1] >= 0x80 && str[idx+1] <= 0x9F) {
/*
* Escape the C1 control codes. C0 (covered above) and C1 are
* inband signalling and transparent to Unicode.
* Anything else probably has text semantics should not be removed.
*/
/*
* Special case: The second UTF-8 byte is the same as the Unicode
* code point for range U+0080 - U+009F.
*/
if (free_len >= 6) {
label_str[pos+0] = '\\';
label_str[pos+1] = 'u';
label_str[pos+2] = '0';
label_str[pos+3] = '0';
uint8_t ch = str[idx+1];
label_str[pos+4] = _hex[ch >> 4];
label_str[pos+5] = _hex[ch & 0x0F];
}
pos += 6;
idx += chlen;
free_len -= 6;
continue;
}
/* Just copy */
if (free_len >= chlen) {
for (ssize_t j = 0; j < chlen; j++) {
label_str[pos+j] = str[idx+j];
}
}
pos += chlen;
idx += chlen;
free_len -= chlen;
}
return pos;
}
/*
* Editor modelines - https://www.wireshark.org/tools/modelines.html
*

View File

@ -190,6 +190,11 @@ char * convert_string_case(const char *string, gboolean case_insensitive);
WS_DLL_PUBLIC
void IA5_7BIT_decode(unsigned char * dest, const unsigned char* src, int len);
#define FORMAT_LABEL_REPLACE_SPACE (0x1 << 0)
WS_DLL_PUBLIC
size_t ws_label_strcat(char *label_str, size_t bufsize, gsize pos, const uint8_t *str, int flags);
/*
* Check name is valid. This covers names for display filter fields, dissector
* tables, preference modules, etc. Lower case is preferred.

102
epan/test_epan.c Normal file
View File

@ -0,0 +1,102 @@
/*
* Wireshark - Network traffic analyzer
* By Gerald Combs <gerald@wireshark.org>
* Copyright 1998 Gerald Combs
*
* SPDX-License-Identifier: GPL-2.0-or-later
*/
#include "config.h"
#include "strutil.h"
#include <wsutil/utf8_entities.h>
/*
* FIXME: LABEL_LENGTH includes the nul byte terminator.
* This is confusing but matches ITEM_LABEL_LENGTH.
*/
#define LABEL_LENGTH 8
void test_label_strcat(void)
{
char label[LABEL_LENGTH];
const char *src;
size_t pos;
src = "ABCD";
pos = 0;
pos = ws_label_strcat(label, sizeof(label), pos, src, 0);
g_assert_cmpstr(label, ==, "ABCD");
g_assert_cmpuint(pos, ==, 4);
src = "EFGH";
pos = ws_label_strcat(label, sizeof(label), pos, src, 0);
g_assert_cmpstr(label, ==, "ABCDEFG");
g_assert_cmpuint(pos, ==, 8);
src = "IJKL";
pos = 7;
pos = ws_label_strcat(label, sizeof(label), pos, src, 0);
g_assert_cmpstr(label, ==, "ABCDEFG");
g_assert_cmpuint(pos, ==, 11);
/* UTF-8 multibyte does not fit, do not truncate. */
src = "ABCDEF"UTF8_MIDDLE_DOT;
pos = 0;
pos = ws_label_strcat(label, sizeof(label), pos, src, 0);
g_assert_cmpstr(label, ==, "ABCDEF");
g_assert_cmpuint(pos, ==, 8); /* Tried to write 8 bytes. */
}
void test_label_strcat2(void)
{
char label[128];
const char *src;
src = "ABCD\n\t\f\r\aE"UTF8_MIDDLE_DOT"Z";
ws_label_strcat(label, sizeof(label), 0, src, 0);
g_assert_cmpstr(label, ==, "ABCD\\n\\t\\f\\r\\aE"UTF8_MIDDLE_DOT"Z");
}
void test_label_escape_control(void)
{
char label[128];
const char *src, *dst;
size_t pos;
src = "ABCD \x04\x17\xC2\x80 EFG \xC2\x90 HIJ \xC2\x9F Z";
dst = "ABCD \\x04\\x17\\u0080 EFG \\u0090 HIJ \\u009F Z";
pos = ws_label_strcat(label, sizeof(label), 0, src, 0);
g_assert_cmpstr(label, ==, dst);
g_assert_cmpuint(pos, ==, strlen(dst));
}
int main(int argc, char **argv)
{
int ret;
ws_log_init("test_proto", NULL);
g_test_init(&argc, &argv, NULL);
g_test_add_func("/label/strcat", test_label_strcat);
g_test_add_func("/label/strcat2", test_label_strcat2);
g_test_add_func("/label/escape_control", test_label_escape_control);
ret = g_test_run();
return ret;
}
/*
* Editor modelines - https://www.wireshark.org/tools/modelines.html
*
* Local variables:
* c-basic-offset: 4
* tab-width: 8
* indent-tabs-mode: nil
* End:
*
* vi: set shiftwidth=4 tabstop=8 expandtab:
* :indentSize=4:tabSize=8:noTabs=true:
*/

View File

@ -45,6 +45,12 @@ class case_unittests(subprocesstest.SubprocessTestCase):
'''wscbor_test'''
self.assertRun(program('wscbor_test'), env=base_env)
def test_unit_epan(self, program, base_env):
'''epan unit tests'''
self.assertRun((program('test_epan'),
'--verbose'
), env=base_env)
def test_unit_wsutil(self, program, base_env):
'''wsutil unit tests'''
self.assertRun((program('test_wsutil'),