forked from osmocom/wireshark
epan: Add BASE_SHOW_UTF_8_PRINTABLE
Add BASE_SHOW_UTF_8_PRINTABLE and related function tvb_utf_8_isprint for supporting fields of bytes that are "maybe UTF-8" (default or SHOULD be UTF-8 but could be something else, with no encoding indicator), such as SSID fields in IEEE 802.11 (See #16208), certain OctetString fields in Diameter or PFCP, and other places where BASE_SHOW_ASCII_PRINTABLE is currently used. Fix #5307pespin/osmux-wip
parent
ebe22f7b7b
commit
25d0c88251
|
@ -1883,6 +1883,7 @@ libwireshark.so.0 libwireshark0 #MINVER#
|
|||
tvb_uncompress_lz77huff@Base 3.1.0
|
||||
tvb_uncompress_lznt1@Base 3.1.0
|
||||
tvb_unicode_strsize@Base 1.9.1
|
||||
tvb_utf_8_isprint@Base 3.7.0
|
||||
tvb_ws_mempbrk_pattern_guint8@Base 1.99.3
|
||||
tvbparse_casestring@Base 1.9.1
|
||||
tvbparse_char@Base 1.9.1
|
||||
|
|
|
@ -132,7 +132,8 @@ FIELDDISPLAY --For FT_UINT{8,16,24,32,40,48,56,64} and
|
|||
BASE_CUSTOM, or BASE_NONE, possibly ORed with
|
||||
BASE_RANGE_STRING, BASE_EXT_STRING, BASE_VAL64_STRING,
|
||||
BASE_ALLOW_ZERO, BASE_UNIT_STRING, BASE_SPECIAL_VALS,
|
||||
BASE_NO_DISPLAY_VALUE, or BASE_SHOW_ASCII_PRINTABLE
|
||||
BASE_NO_DISPLAY_VALUE, BASE_SHOW_ASCII_PRINTABLE, or
|
||||
BASE_SHOW_UTF_8_PRINTABLE
|
||||
|
||||
BASE_NONE may be used with a non-NULL FIELDCONVERT when the
|
||||
numeric value of the field itself is not of significance to
|
||||
|
@ -182,8 +183,8 @@ FIELDDISPLAY --For FT_UINT{8,16,24,32,40,48,56,64} and
|
|||
|
||||
SEP_DOT, SEP_DASH, SEP_COLON, or SEP_SPACE to provide
|
||||
a separator between bytes; BASE_NONE has no separator
|
||||
between bytes. These can be ORed with BASE_ALLOW_ZERO
|
||||
and BASE_SHOW_ASCII_PRINTABLE.
|
||||
between bytes. These can be ORed with BASE_ALLOW_ZERO,
|
||||
BASE_SHOW_ASCII_PRINTABLE, or BASE_SHOW_UTF_8_PRINTABLE.
|
||||
|
||||
BASE_ALLOW_ZERO displays <none> instead of <MISSING>
|
||||
for a zero-sized byte array.
|
||||
|
@ -192,6 +193,11 @@ FIELDDISPLAY --For FT_UINT{8,16,24,32,40,48,56,64} and
|
|||
characters and, if so, will display the field's value
|
||||
as a string, in quotes. The value will still be
|
||||
filterable as a byte value.
|
||||
BASE_SHOW_UTF_8_PRINTABLE will check whether the
|
||||
field's value is valid UTF-8 consisting entirely of
|
||||
printable characters and, if so, will display the field's
|
||||
value as a string, in quotes. The value will still be
|
||||
filterable as a byte value.
|
||||
|
||||
--For FT_IPv4:
|
||||
|
||||
|
|
|
@ -59,6 +59,7 @@ static ws_enum_t all_enums[] = {
|
|||
ENUM(BASE_PT_UDP),
|
||||
ENUM(BASE_RANGE_STRING),
|
||||
ENUM(BASE_SHOW_ASCII_PRINTABLE),
|
||||
ENUM(BASE_SHOW_UTF_8_PRINTABLE),
|
||||
ENUM(BASE_SPECIAL_VALS),
|
||||
ENUM(BASE_UNIT_STRING),
|
||||
ENUM(BASE_VAL64_STRING),
|
||||
|
|
13
epan/proto.c
13
epan/proto.c
|
@ -1082,7 +1082,18 @@ hfinfo_format_bytes(wmem_allocator_t *scope, const header_field_info *hfinfo,
|
|||
gboolean is_printable;
|
||||
|
||||
if (bytes) {
|
||||
if (hfinfo->display & BASE_SHOW_ASCII_PRINTABLE) {
|
||||
if (hfinfo->display & BASE_SHOW_UTF_8_PRINTABLE) {
|
||||
/*
|
||||
* If all bytes are valid and printable UTF-8, show the
|
||||
* bytes as a string - in quotes to indicate that it's
|
||||
* a string.
|
||||
*/
|
||||
if (isprint_utf8_string(bytes, length)) {
|
||||
str = wmem_strdup_printf(scope, "\"%.*s\"",
|
||||
(int)length, bytes);
|
||||
return str;
|
||||
}
|
||||
} else if (hfinfo->display & BASE_SHOW_ASCII_PRINTABLE) {
|
||||
/*
|
||||
* Check whether all bytes are printable.
|
||||
*/
|
||||
|
|
|
@ -718,6 +718,8 @@ typedef enum {
|
|||
|
||||
#define BASE_SHOW_ASCII_PRINTABLE 0x00010000 /**< show byte array as ASCII if it's all printable characters */
|
||||
|
||||
#define BASE_SHOW_UTF_8_PRINTABLE 0x00020000 /**< show byte array as UTF-8 if it's all valid and printable UTF-8 characters */
|
||||
|
||||
/** BASE_ values that cause the field value to be displayed twice */
|
||||
#define IS_BASE_DUAL(b) ((b)==BASE_DEC_HEX||(b)==BASE_HEX_DEC)
|
||||
|
||||
|
|
|
@ -3884,6 +3884,18 @@ gboolean tvb_ascii_isprint(tvbuff_t *tvb, const gint offset, const gint length)
|
|||
return TRUE;
|
||||
}
|
||||
|
||||
gboolean tvb_utf_8_isprint(tvbuff_t *tvb, const gint offset, const gint length)
|
||||
{
|
||||
const guint8* buf = tvb_get_ptr(tvb, offset, length);
|
||||
guint abs_offset, abs_length = length;
|
||||
|
||||
if (length == -1) {
|
||||
/* tvb_get_ptr has already checked for exceptions. */
|
||||
compute_offset_and_remaining(tvb, offset, &abs_offset, &abs_length);
|
||||
}
|
||||
|
||||
return isprint_utf8_string(buf, abs_length);
|
||||
}
|
||||
|
||||
static ws_mempbrk_pattern pbrk_crlf;
|
||||
/*
|
||||
|
|
|
@ -809,6 +809,18 @@ WS_DLL_PUBLIC gint tvb_get_raw_bytes_as_string(tvbuff_t *tvb, const gint offset,
|
|||
WS_DLL_PUBLIC gboolean tvb_ascii_isprint(tvbuff_t *tvb, const gint offset,
|
||||
const gint length);
|
||||
|
||||
/** Iterates over the provided portion of the tvb checking that it is
|
||||
* valid UTF-8 consisting entirely of printable characters. (The characters
|
||||
* must be complete; if the portion ends in a partial sequence that could
|
||||
* begin a valid character, this returns FALSE.) The length may be -1 for
|
||||
* "all the way to the end of the tvbuff".
|
||||
* Returns TRUE if printable, FALSE otherwise
|
||||
*
|
||||
* @see isprint_utf8_string()
|
||||
*/
|
||||
WS_DLL_PUBLIC gboolean tvb_utf_8_isprint(tvbuff_t *tvb, const gint offset,
|
||||
const gint length);
|
||||
|
||||
/**
|
||||
* Given a tvbuff, an offset into the tvbuff, and a length that starts
|
||||
* at that offset (which may be -1 for "all the way to the end of the
|
||||
|
|
|
@ -273,18 +273,24 @@ isprint_string(const gchar *str)
|
|||
|
||||
/* Check if an entire UTF-8 string is printable. */
|
||||
gboolean
|
||||
isprint_utf8_string(const gchar *str, guint length)
|
||||
isprint_utf8_string(const gchar *str, const guint length)
|
||||
{
|
||||
const char *c;
|
||||
const gchar *strend = str + length;
|
||||
|
||||
if (!g_utf8_validate (str, length, NULL)) {
|
||||
if (!g_utf8_validate_len(str, length, NULL)) {
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
for (c = str; *c; c = g_utf8_next_char(c)) {
|
||||
if (!g_unichar_isprint(g_utf8_get_char(c))) {
|
||||
while (str < strend) {
|
||||
/* This returns false for G_UNICODE_CONTROL | G_UNICODE_FORMAT |
|
||||
* G_UNICODE_UNASSIGNED | G_UNICODE_SURROGATE
|
||||
* XXX: Could it be ok to have certain format characters, e.g.
|
||||
* U+00AD SOFT HYPHEN? If so, format_text() should be changed too.
|
||||
*/
|
||||
if (!g_unichar_isprint(g_utf8_get_char(str))) {
|
||||
return FALSE;
|
||||
}
|
||||
str = g_utf8_next_char(str);
|
||||
}
|
||||
|
||||
return TRUE;
|
||||
|
|
|
@ -114,14 +114,34 @@ gchar *ascii_strup_inplace(gchar *str);
|
|||
WS_DLL_PUBLIC
|
||||
gboolean isprint_string(const gchar *str);
|
||||
|
||||
/** Check if an entire UTF-8 string consists of printable characters
|
||||
/** Given a not-necessarily-null-terminated string, expected to be in
|
||||
* UTF-8 but possibly containing invalid sequences (as it may have come
|
||||
* from packet data), and the length of the string, deterimine if the
|
||||
* string is valid UTF-8 consisting entirely of printable characters.
|
||||
*
|
||||
* This means that it:
|
||||
*
|
||||
* does not contain an illegal UTF-8 sequence (including overlong encodings,
|
||||
* the sequences reserved for UTF-16 surrogate halves, and the values for
|
||||
* code points above U+10FFFF that are no longer in Unicode)
|
||||
*
|
||||
* does not contain a non-printable Unicode character such as control
|
||||
* characters (including internal NULL bytes)
|
||||
*
|
||||
* does not end in a partial sequence that could begin a valid character;
|
||||
*
|
||||
* does not start with a partial sequence that could end a valid character;
|
||||
*
|
||||
* and thus guarantees that the result of format_text() would be the same as
|
||||
* that of wmem_strndup() with the same parameters.
|
||||
*
|
||||
* @param str The string to be checked
|
||||
* @param length The number of bytes to validate
|
||||
* @return TRUE if the entire string is printable, otherwise FALSE
|
||||
* @return TRUE if the entire string is valid and printable UTF-8,
|
||||
* otherwise FALSE
|
||||
*/
|
||||
WS_DLL_PUBLIC
|
||||
gboolean isprint_utf8_string(const gchar *str, guint length);
|
||||
gboolean isprint_utf8_string(const gchar *str, const guint length);
|
||||
|
||||
/** Check if an entire string consists of digits
|
||||
*
|
||||
|
|
Loading…
Reference in New Issue