dfilter: Avoid dumping mangled UTF-8

We should not replace chars that cannot be represented
in ASCII, to avoid mangling UTF-8. This assumes every
string is UTF-8, of course.

This only affects the display of the compiled filter.

Before:

    Filter: http.user_agent == "João"

    Constants:
    00000 PUT_FVALUE	"Jo\xc3\xa3o" <FT_STRING> -> reg#1

After:

    Filter: http.user_agent == "João"

    Constants:
    00000 PUT_FVALUE	"João" <FT_STRING> -> reg#1
This commit is contained in:
João Valverde 2021-12-02 12:24:42 +00:00 committed by Wireshark GitLab Utility
parent 60e305d1e1
commit fe9b1fb103
2 changed files with 9 additions and 24 deletions

View File

@ -271,6 +271,10 @@ printable_char_or_period(gchar c)
static inline char
escape_char(char c)
{
/*
* Backslashes and double-quotes must
* be escaped. Whitespace is also escaped.
*/
switch (c) {
case '\a': return 'a';
case '\b': return 'b';
@ -295,18 +299,9 @@ escape_string_len(const char *string, bool add_quotes)
repr_len = 0;
for (p = string; (c = *p) != '\0'; p++) {
/* Backslashes and double-quotes must
* be escaped */
if (escape_char(c) != 0) {
repr_len += 2;
}
/* Values that can't nicely be represented
* in ASCII need to be escaped. */
else if (!g_ascii_isprint(c)) {
/* c --> \xNN */
repr_len += 4;
}
/* Other characters are just passed through. */
else {
repr_len++;
}
@ -316,6 +311,10 @@ escape_string_len(const char *string, bool add_quotes)
return repr_len;
}
/*
* This is used by the display filter engine and must be compatible
* with display filter syntax.
*/
char *
ws_escape_string(wmem_allocator_t *alloc, const char *string, bool add_quotes)
{
@ -327,22 +326,12 @@ ws_escape_string(wmem_allocator_t *alloc, const char *string, bool add_quotes)
if (add_quotes)
*bufp++ = '"';
for (p = string; (c = *p) != '\0'; p++) {
/* Backslashes and double-quotes must
* be escaped. */
if ((r = escape_char(c)) != 0) {
*bufp++ = '\\';
*bufp++ = r;
}
/* Values that can't nicely be represented
* in ASCII need to be escaped. */
else if (!g_ascii_isprint(c)) {
/* c --> \xNN */
*bufp++ = '\\';
*bufp++ = 'x';
bufp = guint8_to_hex(bufp, c);
}
/* Other characters are just passed through. */
else {
/* Other UTF-8 bytes are passed through. */
*bufp++ = c;
}
}

View File

@ -43,10 +43,6 @@ static void test_escape_string(void)
buf = ws_escape_string(NULL, "whitespace \t \n \r \f \v", TRUE);
g_assert_cmpstr(buf, ==, "\"whitespace \\t \\n \\r \\f \\v""\"");
wmem_free(NULL, buf);
buf = ws_escape_string(NULL, "bytes \xfe\xff", FALSE);
g_assert_cmpstr(buf, ==, "bytes \\xfe\\xff");
wmem_free(NULL, buf);
}
#include "to_str.h"