2021-11-26 03:31:05 +00:00
|
|
|
/** @file
|
2008-09-03 19:14:52 +00:00
|
|
|
* String utility definitions
|
|
|
|
*
|
|
|
|
* Wireshark - Network traffic analyzer
|
|
|
|
* By Gerald Combs <gerald@wireshark.org>
|
|
|
|
* Copyright 1998 Gerald Combs
|
|
|
|
*
|
2018-02-07 11:26:45 +00:00
|
|
|
* SPDX-License-Identifier: GPL-2.0-or-later
|
2008-09-03 19:14:52 +00:00
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef __STR_UTIL_H__
|
|
|
|
#define __STR_UTIL_H__
|
|
|
|
|
2021-10-16 09:33:34 +00:00
|
|
|
#include <wireshark.h>
|
2021-07-10 15:12:03 +00:00
|
|
|
#include <wsutil/wmem/wmem.h>
|
|
|
|
|
2012-10-18 21:14:43 +00:00
|
|
|
#ifdef __cplusplus
|
|
|
|
extern "C" {
|
|
|
|
#endif /* __cplusplus */
|
|
|
|
|
2021-12-18 23:22:27 +00:00
|
|
|
WS_DLL_PUBLIC
|
|
|
|
gchar *
|
|
|
|
wmem_strconcat(wmem_allocator_t *allocator, const gchar *first, ...)
|
|
|
|
G_GNUC_MALLOC G_GNUC_NULL_TERMINATED;
|
|
|
|
|
|
|
|
WS_DLL_PUBLIC
|
|
|
|
gchar *
|
|
|
|
wmem_strjoin(wmem_allocator_t *allocator,
|
|
|
|
const gchar *separator, const gchar *first, ...)
|
|
|
|
G_GNUC_MALLOC G_GNUC_NULL_TERMINATED;
|
|
|
|
|
|
|
|
WS_DLL_PUBLIC
|
|
|
|
gchar *
|
|
|
|
wmem_strjoinv(wmem_allocator_t *allocator,
|
|
|
|
const gchar *separator, gchar **str_array)
|
|
|
|
G_GNUC_MALLOC;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Splits a string into a maximum of max_tokens pieces, using the given
|
|
|
|
* delimiter. If max_tokens is reached, the remainder of string is appended
|
|
|
|
* to the last token. Successive tokens are not folded and will instead result
|
|
|
|
* in an empty string as element.
|
|
|
|
*
|
|
|
|
* If src or delimiter are NULL, or if delimiter is empty, this will return
|
|
|
|
* NULL.
|
|
|
|
*
|
|
|
|
* Do not use with a NULL allocator, use g_strsplit instead.
|
|
|
|
*/
|
|
|
|
WS_DLL_PUBLIC
|
|
|
|
gchar **
|
|
|
|
wmem_strsplit(wmem_allocator_t *allocator, const gchar *src,
|
|
|
|
const gchar *delimiter, int max_tokens);
|
|
|
|
|
|
|
|
/**
|
|
|
|
* wmem_ascii_strdown:
|
|
|
|
* Based on g_ascii_strdown
|
|
|
|
* @param allocator An enumeration of the different types of available allocators.
|
|
|
|
* @param str a string.
|
|
|
|
* @param len length of str in bytes, or -1 if str is nul-terminated.
|
|
|
|
*
|
|
|
|
* Converts all upper case ASCII letters to lower case ASCII letters.
|
|
|
|
*
|
|
|
|
* Return value: a newly-allocated string, with all the upper case
|
|
|
|
* characters in str converted to lower case, with
|
|
|
|
* semantics that exactly match g_ascii_tolower(). (Note
|
|
|
|
* that this is unlike the old g_strdown(), which modified
|
|
|
|
* the string in place.)
|
|
|
|
**/
|
|
|
|
WS_DLL_PUBLIC
|
|
|
|
gchar*
|
|
|
|
wmem_ascii_strdown(wmem_allocator_t *allocator, const gchar *str, gssize len);
|
|
|
|
|
2008-09-03 19:14:52 +00:00
|
|
|
/** Convert all upper-case ASCII letters to their ASCII lower-case
|
|
|
|
* equivalents, in place, with a simple non-locale-dependent
|
|
|
|
* ASCII mapping (A-Z -> a-z).
|
|
|
|
* All other characters are left unchanged, as the mapping to
|
|
|
|
* lower case may be locale-dependent.
|
|
|
|
*
|
|
|
|
* The string is assumed to be in a character encoding, such as
|
|
|
|
* an ISO 8859 or other EUC encoding, or UTF-8, in which all
|
|
|
|
* bytes in the range 0x00 through 0x7F are ASCII characters and
|
|
|
|
* non-ASCII characters are constructed from one or more bytes in
|
|
|
|
* the range 0x80 through 0xFF.
|
2012-10-10 19:17:31 +00:00
|
|
|
*
|
2008-09-03 19:14:52 +00:00
|
|
|
* @param str The string to be lower-cased.
|
2009-01-17 17:30:23 +00:00
|
|
|
* @return ptr to the string
|
2008-09-03 19:14:52 +00:00
|
|
|
*/
|
2013-02-28 14:09:46 +00:00
|
|
|
WS_DLL_PUBLIC
|
2009-01-17 17:30:23 +00:00
|
|
|
gchar *ascii_strdown_inplace(gchar *str);
|
2008-09-03 19:14:52 +00:00
|
|
|
|
|
|
|
/** Convert all lower-case ASCII letters to their ASCII upper-case
|
|
|
|
* equivalents, in place, with a simple non-locale-dependent
|
|
|
|
* ASCII mapping (a-z -> A-Z).
|
|
|
|
* All other characters are left unchanged, as the mapping to
|
|
|
|
* lower case may be locale-dependent.
|
|
|
|
*
|
|
|
|
* The string is assumed to be in a character encoding, such as
|
|
|
|
* an ISO 8859 or other EUC encoding, or UTF-8, in which all
|
|
|
|
* bytes in the range 0x00 through 0x7F are ASCII characters and
|
|
|
|
* non-ASCII characters are constructed from one or more bytes in
|
|
|
|
* the range 0x80 through 0xFF.
|
2012-10-10 19:17:31 +00:00
|
|
|
*
|
2008-09-03 19:14:52 +00:00
|
|
|
* @param str The string to be upper-cased.
|
2009-01-17 17:30:23 +00:00
|
|
|
* @return ptr to the string
|
2008-09-03 19:14:52 +00:00
|
|
|
*/
|
2013-02-28 14:09:46 +00:00
|
|
|
WS_DLL_PUBLIC
|
2009-01-17 17:30:23 +00:00
|
|
|
gchar *ascii_strup_inplace(gchar *str);
|
2008-09-03 19:14:52 +00:00
|
|
|
|
2012-02-17 17:22:12 +00:00
|
|
|
/** Check if an entire string consists of printable characters
|
2012-10-10 19:17:31 +00:00
|
|
|
*
|
2019-01-06 19:49:32 +00:00
|
|
|
* @param str The string to be checked
|
2013-08-10 21:27:42 +00:00
|
|
|
* @return TRUE if the entire string is printable, otherwise FALSE
|
2012-02-17 17:22:12 +00:00
|
|
|
*/
|
2013-02-28 14:09:46 +00:00
|
|
|
WS_DLL_PUBLIC
|
2019-01-04 13:12:04 +00:00
|
|
|
gboolean isprint_string(const gchar *str);
|
2012-02-17 17:22:12 +00:00
|
|
|
|
2022-02-03 13:28:11 +00:00
|
|
|
/** Given a not-necessarily-null-terminated string, expected to be in
|
|
|
|
* UTF-8 but possibly containing invalid sequences (as it may have come
|
|
|
|
* from packet data), and the length of the string, deterimine if the
|
|
|
|
* string is valid UTF-8 consisting entirely of printable characters.
|
|
|
|
*
|
|
|
|
* This means that it:
|
|
|
|
*
|
|
|
|
* does not contain an illegal UTF-8 sequence (including overlong encodings,
|
|
|
|
* the sequences reserved for UTF-16 surrogate halves, and the values for
|
|
|
|
* code points above U+10FFFF that are no longer in Unicode)
|
|
|
|
*
|
|
|
|
* does not contain a non-printable Unicode character such as control
|
|
|
|
* characters (including internal NULL bytes)
|
|
|
|
*
|
|
|
|
* does not end in a partial sequence that could begin a valid character;
|
|
|
|
*
|
|
|
|
* does not start with a partial sequence that could end a valid character;
|
|
|
|
*
|
|
|
|
* and thus guarantees that the result of format_text() would be the same as
|
|
|
|
* that of wmem_strndup() with the same parameters.
|
2018-04-27 13:29:32 +00:00
|
|
|
*
|
2019-01-06 19:49:32 +00:00
|
|
|
* @param str The string to be checked
|
2018-04-27 13:29:32 +00:00
|
|
|
* @param length The number of bytes to validate
|
2022-02-03 13:28:11 +00:00
|
|
|
* @return TRUE if the entire string is valid and printable UTF-8,
|
|
|
|
* otherwise FALSE
|
2018-04-27 13:29:32 +00:00
|
|
|
*/
|
|
|
|
WS_DLL_PUBLIC
|
2022-02-03 13:28:11 +00:00
|
|
|
gboolean isprint_utf8_string(const gchar *str, const guint length);
|
2018-04-27 13:29:32 +00:00
|
|
|
|
2012-02-17 17:22:12 +00:00
|
|
|
/** Check if an entire string consists of digits
|
2012-10-10 19:17:31 +00:00
|
|
|
*
|
2019-01-06 19:49:32 +00:00
|
|
|
* @param str The string to be checked
|
2013-08-10 21:27:42 +00:00
|
|
|
* @return TRUE if the entire string is digits, otherwise FALSE
|
2012-02-17 17:22:12 +00:00
|
|
|
*/
|
2013-02-28 14:09:46 +00:00
|
|
|
WS_DLL_PUBLIC
|
2019-01-04 13:12:04 +00:00
|
|
|
gboolean isdigit_string(const guchar *str);
|
2012-02-17 17:22:12 +00:00
|
|
|
|
2021-11-27 17:57:46 +00:00
|
|
|
/** Finds the first occurrence of string 'needle' in string 'haystack'.
|
|
|
|
* The matching is done in a case insensitive manner.
|
|
|
|
*
|
|
|
|
* @param haystack The string possibly containing the substring
|
|
|
|
* @param needle The substring to be searched
|
|
|
|
* @return A pointer into 'haystack' where 'needle' is first found.
|
|
|
|
* Otherwise it returns NULL.
|
|
|
|
*/
|
|
|
|
WS_DLL_PUBLIC
|
|
|
|
const char *ws_strcasestr(const char *haystack, const char *needle);
|
|
|
|
|
2021-11-29 13:52:09 +00:00
|
|
|
WS_DLL_PUBLIC
|
2021-11-29 13:37:57 +00:00
|
|
|
char *ws_escape_string(wmem_allocator_t *alloc, const char *string, bool add_quotes);
|
2021-11-29 13:52:09 +00:00
|
|
|
|
2022-06-19 09:25:18 +00:00
|
|
|
WS_DLL_PUBLIC
|
|
|
|
char *ws_escape_string_len(wmem_allocator_t *alloc, const char *string, ssize_t len, bool add_quotes);
|
|
|
|
|
|
|
|
/* Replace null bytes with "\0". */
|
|
|
|
WS_DLL_PUBLIC
|
|
|
|
char *ws_escape_null(wmem_allocator_t *alloc, const char *string, size_t len, bool add_quotes);
|
|
|
|
|
2014-01-08 00:28:13 +00:00
|
|
|
WS_DLL_PUBLIC
|
|
|
|
int ws_xton(char ch);
|
|
|
|
|
2012-10-10 19:17:31 +00:00
|
|
|
typedef enum {
|
2021-11-29 19:29:55 +00:00
|
|
|
FORMAT_SIZE_UNIT_NONE, /**< No unit will be appended. You must supply your own. */
|
|
|
|
FORMAT_SIZE_UNIT_BYTES, /**< "bytes" for un-prefixed sizes, "B" otherwise. */
|
|
|
|
FORMAT_SIZE_UNIT_BITS, /**< "bits" for un-prefixed sizes, "b" otherwise. */
|
|
|
|
FORMAT_SIZE_UNIT_BITS_S, /**< "bits/s" for un-prefixed sizes, "bps" otherwise. */
|
|
|
|
FORMAT_SIZE_UNIT_BYTES_S, /**< "bytes/s" for un-prefixed sizes, "Bps" otherwise. */
|
|
|
|
FORMAT_SIZE_UNIT_PACKETS, /**< "packets" */
|
|
|
|
FORMAT_SIZE_UNIT_PACKETS_S, /**< "packets/s" */
|
|
|
|
} format_size_units_e;
|
|
|
|
|
|
|
|
#define FORMAT_SIZE_PREFIX_SI (1 << 0) /**< SI (power of 1000) prefixes will be used. */
|
|
|
|
#define FORMAT_SIZE_PREFIX_IEC (1 << 1) /**< IEC (power of 1024) prefixes will be used. */
|
2012-10-10 19:17:31 +00:00
|
|
|
|
|
|
|
/** Given a size, return its value in a human-readable format
|
|
|
|
*
|
|
|
|
* Prefixes up to "T/Ti" (tera, tebi) are currently supported.
|
|
|
|
*
|
|
|
|
* @param size The size value
|
|
|
|
* @param flags Flags to control the output (unit of measurement,
|
2021-07-11 00:01:14 +00:00
|
|
|
* SI vs IEC, etc). Unit and prefix flags may be ORed together.
|
2012-10-10 19:17:31 +00:00
|
|
|
* @return A newly-allocated string representing the value.
|
|
|
|
*/
|
2013-02-28 14:09:46 +00:00
|
|
|
WS_DLL_PUBLIC
|
2021-11-29 19:29:55 +00:00
|
|
|
char *format_size_wmem(wmem_allocator_t *allocator, int64_t size,
|
|
|
|
format_size_units_e unit, uint16_t flags);
|
2021-07-10 15:12:03 +00:00
|
|
|
|
2021-11-29 19:29:55 +00:00
|
|
|
#define format_size(size, unit, flags) \
|
|
|
|
format_size_wmem(NULL, size, unit, flags)
|
2012-10-10 19:17:31 +00:00
|
|
|
|
2014-09-04 01:57:02 +00:00
|
|
|
WS_DLL_PUBLIC
|
|
|
|
gchar printable_char_or_period(gchar c);
|
2012-10-18 21:14:43 +00:00
|
|
|
|
2021-12-27 13:28:29 +00:00
|
|
|
WS_DLL_PUBLIC WS_RETNONNULL
|
|
|
|
const char *ws_strerrorname_r(int errnum, char *buf, size_t buf_size);
|
|
|
|
|
2022-04-10 17:19:45 +00:00
|
|
|
WS_DLL_PUBLIC
|
2022-04-11 20:52:53 +00:00
|
|
|
char *ws_strdup_underline(wmem_allocator_t *allocator, long offset, size_t len);
|
2022-04-10 17:19:45 +00:00
|
|
|
|
2022-09-27 18:26:37 +00:00
|
|
|
/** Given a wmem scope, a not-necessarily-null-terminated string,
|
2022-10-01 21:32:42 +00:00
|
|
|
* expected to be in UTF-8 but possibly containing invalid sequences
|
|
|
|
* (as it may have come from packet data), and the length of the string,
|
2022-09-27 18:26:37 +00:00
|
|
|
* generate a valid UTF-8 string from it, allocated in the specified
|
|
|
|
* wmem scope, that:
|
|
|
|
*
|
|
|
|
* shows printable Unicode characters as themselves;
|
|
|
|
*
|
|
|
|
* shows non-printable ASCII characters as C-style escapes (octal
|
|
|
|
* if not one of the standard ones such as LF -> '\n');
|
|
|
|
*
|
|
|
|
* shows non-printable Unicode-but-not-ASCII characters as
|
|
|
|
* their universal character names;
|
|
|
|
*
|
|
|
|
* Replaces illegal UTF-8 sequences with U+FFFD (replacement character) ;
|
|
|
|
*
|
|
|
|
* and return a pointer to it.
|
|
|
|
*
|
|
|
|
* @param allocator The wmem scope
|
|
|
|
* @param string A pointer to the input string
|
|
|
|
* @param len The length of the input string
|
|
|
|
* @return A pointer to the formatted string
|
|
|
|
*
|
|
|
|
* @see tvb_format_text()
|
|
|
|
*/
|
|
|
|
WS_DLL_PUBLIC
|
|
|
|
char *format_text(wmem_allocator_t* allocator, const char *string, size_t len);
|
|
|
|
|
|
|
|
/** Same as format_text() but accepts a nul-terminated string.
|
|
|
|
*
|
|
|
|
* @param allocator The wmem scope
|
|
|
|
* @param string A pointer to the input string
|
|
|
|
* @return A pointer to the formatted string
|
|
|
|
*
|
|
|
|
* @see tvb_format_text()
|
|
|
|
*/
|
|
|
|
WS_DLL_PUBLIC
|
|
|
|
char *format_text_string(wmem_allocator_t* allocator, const char *string);
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Same as format_text() but replaces any whitespace characters
|
|
|
|
* (space, tab, carriage return, new line, vertical tab, or formfeed)
|
|
|
|
* with a space.
|
|
|
|
*
|
|
|
|
* @param allocator The wmem scope
|
|
|
|
* @param line A pointer to the input string
|
|
|
|
* @param len The length of the input string
|
|
|
|
* @return A pointer to the formatted string
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
WS_DLL_PUBLIC
|
|
|
|
char *format_text_wsp(wmem_allocator_t* allocator, const char *line, size_t len);
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Given a string, generate a string from it that shows non-printable
|
|
|
|
* characters as the chr parameter passed, except a whitespace character
|
|
|
|
* (space, tab, carriage return, new line, vertical tab, or formfeed)
|
|
|
|
* which will be replaced by a space, and return a pointer to it.
|
|
|
|
*
|
|
|
|
* This does *not* treat the input string as UTF-8.
|
|
|
|
*
|
|
|
|
* This is useful for displaying binary data that frequently but not always
|
|
|
|
* contains text; otherwise the number of C escape codes makes it unreadable.
|
|
|
|
*
|
|
|
|
* @param allocator The wmem scope
|
|
|
|
* @param string A pointer to the input string
|
|
|
|
* @param len The length of the input string
|
|
|
|
* @param chr The character to use to replace non-printable characters
|
|
|
|
* @return A pointer to the formatted string
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
WS_DLL_PUBLIC
|
|
|
|
char *format_text_chr(wmem_allocator_t *allocator,
|
|
|
|
const char *string, size_t len, char chr);
|
|
|
|
|
2022-10-03 17:22:35 +00:00
|
|
|
WS_DLL_PUBLIC
|
|
|
|
void EBCDIC_to_ASCII(guint8 *buf, guint bytes);
|
|
|
|
|
|
|
|
WS_DLL_PUBLIC
|
|
|
|
guint8 EBCDIC_to_ASCII1(guint8 c);
|
|
|
|
|
|
|
|
/* Types of character encodings */
|
|
|
|
typedef enum {
|
|
|
|
HEXDUMP_ENC_ASCII = 0, /* ASCII */
|
|
|
|
HEXDUMP_ENC_EBCDIC = 1 /* EBCDIC */
|
|
|
|
} hex_dump_enc;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Hexdump options for ASCII:
|
|
|
|
*/
|
|
|
|
|
|
|
|
#define HEXDUMP_ASCII_MASK (0x0003U)
|
|
|
|
#define HEXDUMP_ASCII_OPTION(option) ((option) & HEXDUMP_ASCII_MASK)
|
|
|
|
|
|
|
|
#define HEXDUMP_ASCII_INCLUDE (0x0000U) /* include ASCII section no delimiters (legacy tshark behavior) */
|
|
|
|
#define HEXDUMP_ASCII_DELIMIT (0x0001U) /* include ASCII section with delimiters, useful for reliable detection of last hexdata */
|
|
|
|
#define HEXDUMP_ASCII_EXCLUDE (0x0002U) /* exclude ASCII section from hexdump reports, if we really don't want or need it */
|
|
|
|
|
|
|
|
WS_DLL_PUBLIC
|
|
|
|
gboolean hex_dump_buffer(gboolean (*print_line)(void *, const char *), void *fp,
|
|
|
|
const guchar *cp, guint length,
|
|
|
|
hex_dump_enc encoding,
|
|
|
|
guint ascii_option);
|
|
|
|
|
2015-11-04 08:45:54 +00:00
|
|
|
/* To pass one of two strings, singular or plural */
|
|
|
|
#define plurality(d,s,p) ((d) == 1 ? (s) : (p))
|
|
|
|
|
2021-09-26 15:28:39 +00:00
|
|
|
#define true_or_false(val) ((val) ? "TRUE" : "FALSE")
|
|
|
|
|
2021-11-29 18:00:18 +00:00
|
|
|
#define string_or_null(val) ((val) ? (val) : "[NULL]")
|
|
|
|
|
2012-10-18 21:14:43 +00:00
|
|
|
#ifdef __cplusplus
|
|
|
|
}
|
|
|
|
#endif /* __cplusplus */
|
|
|
|
|
2008-09-03 19:14:52 +00:00
|
|
|
#endif /* __STR_UTIL_H__ */
|