2000-09-11 16:16:13 +00:00
|
|
|
/* strutil.c
|
|
|
|
* String utility routines
|
|
|
|
*
|
2006-05-21 05:12:17 +00:00
|
|
|
* Wireshark - Network traffic analyzer
|
|
|
|
* By Gerald Combs <gerald@wireshark.org>
|
2000-09-11 16:16:13 +00:00
|
|
|
* Copyright 1998 Gerald Combs
|
|
|
|
*
|
2018-02-08 16:59:17 +00:00
|
|
|
* SPDX-License-Identifier: GPL-2.0-or-later
|
2000-09-11 16:16:13 +00:00
|
|
|
*/
|
|
|
|
|
2012-09-20 01:48:30 +00:00
|
|
|
#include "config.h"
|
2000-09-11 16:16:13 +00:00
|
|
|
|
|
|
|
#include <stdlib.h>
|
|
|
|
#include <string.h>
|
|
|
|
#include <glib.h>
|
2000-09-11 20:05:13 +00:00
|
|
|
#include "strutil.h"
|
2010-08-30 15:33:32 +00:00
|
|
|
|
2014-01-08 00:28:13 +00:00
|
|
|
#include <wsutil/str_util.h>
|
2014-04-13 03:20:15 +00:00
|
|
|
#include <epan/proto.h>
|
2000-09-11 16:16:13 +00:00
|
|
|
|
2006-03-08 20:55:32 +00:00
|
|
|
#ifdef _WIN32
|
|
|
|
#include <windows.h>
|
|
|
|
#include <tchar.h>
|
|
|
|
#include <wchar.h>
|
|
|
|
#endif
|
2000-09-11 16:16:13 +00:00
|
|
|
|
2007-01-11 22:12:33 +00:00
|
|
|
static const char hex[16] = { '0', '1', '2', '3', '4', '5', '6', '7',
|
2012-12-22 23:27:40 +00:00
|
|
|
'8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
|
2007-01-11 22:12:33 +00:00
|
|
|
|
2000-09-11 16:16:13 +00:00
|
|
|
/*
|
|
|
|
* Given a pointer into a data buffer, and to the end of the buffer,
|
|
|
|
* find the end of the (putative) line at that position in the data
|
|
|
|
* buffer.
|
|
|
|
* Return a pointer to the EOL character(s) in "*eol".
|
|
|
|
*/
|
2002-08-02 21:29:45 +00:00
|
|
|
const guchar *
|
|
|
|
find_line_end(const guchar *data, const guchar *dataend, const guchar **eol)
|
2000-09-11 16:16:13 +00:00
|
|
|
{
|
2012-12-22 23:27:40 +00:00
|
|
|
const guchar *lineend;
|
2000-09-11 16:16:13 +00:00
|
|
|
|
2013-03-18 21:16:23 +00:00
|
|
|
lineend = (guchar *)memchr(data, '\n', dataend - data);
|
2012-12-22 23:27:40 +00:00
|
|
|
if (lineend == NULL) {
|
2000-09-11 16:16:13 +00:00
|
|
|
/*
|
2012-12-22 23:27:40 +00:00
|
|
|
* No LF - line is probably continued in next TCP segment.
|
|
|
|
*/
|
|
|
|
lineend = dataend;
|
|
|
|
*eol = dataend;
|
|
|
|
} else {
|
2000-09-11 16:16:13 +00:00
|
|
|
/*
|
2012-12-22 23:27:40 +00:00
|
|
|
* Is the LF at the beginning of the line?
|
2000-09-11 16:16:13 +00:00
|
|
|
*/
|
2012-12-22 23:27:40 +00:00
|
|
|
if (lineend > data) {
|
|
|
|
/*
|
|
|
|
* No - is it preceded by a carriage return?
|
|
|
|
* (Perhaps it's supposed to be, but that's not guaranteed....)
|
|
|
|
*/
|
|
|
|
if (*(lineend - 1) == '\r') {
|
|
|
|
/*
|
|
|
|
* Yes. The EOL starts with the CR.
|
|
|
|
*/
|
|
|
|
*eol = lineend - 1;
|
|
|
|
} else {
|
|
|
|
/*
|
|
|
|
* No. The EOL starts with the LF.
|
|
|
|
*/
|
|
|
|
*eol = lineend;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* I seem to remember that we once saw lines ending with LF-CR
|
|
|
|
* in an HTTP request or response, so check if it's *followed*
|
|
|
|
* by a carriage return.
|
|
|
|
*/
|
|
|
|
if (lineend < (dataend - 1) && *(lineend + 1) == '\r') {
|
|
|
|
/*
|
|
|
|
* It's <non-LF><LF><CR>; say it ends with the CR.
|
|
|
|
*/
|
|
|
|
lineend++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
/*
|
|
|
|
* Yes - the EOL starts with the LF.
|
|
|
|
*/
|
|
|
|
*eol = lineend;
|
|
|
|
}
|
2000-09-11 16:16:13 +00:00
|
|
|
|
|
|
|
/*
|
2012-12-22 23:27:40 +00:00
|
|
|
* Point to the character after the last character.
|
2000-09-11 16:16:13 +00:00
|
|
|
*/
|
2012-12-22 23:27:40 +00:00
|
|
|
lineend++;
|
2000-09-11 16:16:13 +00:00
|
|
|
}
|
2012-12-22 23:27:40 +00:00
|
|
|
return lineend;
|
2000-09-11 16:16:13 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Get the length of the next token in a line, and the beginning of the
|
|
|
|
* next token after that (if any).
|
|
|
|
* Return 0 if there is no next token.
|
|
|
|
*/
|
|
|
|
int
|
2002-08-02 21:29:45 +00:00
|
|
|
get_token_len(const guchar *linep, const guchar *lineend,
|
2012-12-22 23:27:40 +00:00
|
|
|
const guchar **next_token)
|
2000-09-11 16:16:13 +00:00
|
|
|
{
|
2012-12-22 23:27:40 +00:00
|
|
|
const guchar *tokenp;
|
|
|
|
int token_len;
|
2000-09-11 16:16:13 +00:00
|
|
|
|
2012-12-22 23:27:40 +00:00
|
|
|
tokenp = linep;
|
2002-08-28 20:41:00 +00:00
|
|
|
|
2012-12-22 23:27:40 +00:00
|
|
|
/*
|
|
|
|
* Search for a blank, a CR or an LF, or the end of the buffer.
|
|
|
|
*/
|
|
|
|
while (linep < lineend && *linep != ' ' && *linep != '\r' && *linep != '\n')
|
|
|
|
linep++;
|
|
|
|
token_len = (int) (linep - tokenp);
|
2000-09-11 16:16:13 +00:00
|
|
|
|
2012-12-22 23:27:40 +00:00
|
|
|
/*
|
|
|
|
* Skip trailing blanks.
|
|
|
|
*/
|
|
|
|
while (linep < lineend && *linep == ' ')
|
|
|
|
linep++;
|
2000-09-11 16:16:13 +00:00
|
|
|
|
2012-12-22 23:27:40 +00:00
|
|
|
*next_token = linep;
|
2000-09-11 16:16:13 +00:00
|
|
|
|
2012-12-22 23:27:40 +00:00
|
|
|
return token_len;
|
2000-09-11 16:16:13 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2012-12-22 23:27:40 +00:00
|
|
|
#define INITIAL_FMTBUF_SIZE 128
|
2000-09-11 16:16:13 +00:00
|
|
|
|
|
|
|
/*
|
2019-07-25 12:23:36 +00:00
|
|
|
* Declare, and initialize, the variables used for an output buffer.
|
|
|
|
*/
|
|
|
|
#define FMTBUF_VARS \
|
|
|
|
gchar *fmtbuf = (gchar*)wmem_alloc(allocator, INITIAL_FMTBUF_SIZE); \
|
|
|
|
guint fmtbuf_len = INITIAL_FMTBUF_SIZE; \
|
|
|
|
guint column = 0
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Expand the buffer to be large enough to add nbytes bytes, plus a
|
|
|
|
* terminating '\0'.
|
|
|
|
*/
|
|
|
|
#define FMTBUF_EXPAND(nbytes) \
|
|
|
|
/* \
|
|
|
|
* Is there enough room for those bytes and also enough room for \
|
|
|
|
* a terminating '\0'? \
|
|
|
|
*/ \
|
|
|
|
if (column+(nbytes+1) >= fmtbuf_len) { \
|
|
|
|
/* \
|
|
|
|
* Double the buffer's size if it's not big enough. \
|
|
|
|
* The size of the buffer starts at 128, so doubling its size \
|
|
|
|
* adds at least another 128 bytes, which is more than enough \
|
|
|
|
* for one more character plus a terminating '\0'. \
|
|
|
|
*/ \
|
|
|
|
fmtbuf_len *= 2; \
|
|
|
|
fmtbuf = (gchar *)wmem_realloc(allocator, fmtbuf, fmtbuf_len); \
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Put a byte into the buffer; space must have been ensured for it.
|
|
|
|
*/
|
|
|
|
#define FMTBUF_PUTCHAR(b) \
|
|
|
|
fmtbuf[column] = (b); \
|
|
|
|
column++
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Add the one-byte argument, as an octal escape sequence, to the end
|
|
|
|
* of the buffer.
|
|
|
|
*/
|
|
|
|
#define FMTBUF_PUTBYTE_OCTAL(b) \
|
|
|
|
FMTBUF_PUTCHAR((((b)>>6)&03) + '0'); \
|
|
|
|
FMTBUF_PUTCHAR((((b)>>3)&07) + '0'); \
|
|
|
|
FMTBUF_PUTCHAR((((b)>>0)&07) + '0')
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Add the one-byte argument, as a hex escape sequence, to the end
|
|
|
|
* of the buffer.
|
|
|
|
*/
|
|
|
|
#define FMTBUF_PUTBYTE_HEX(b) \
|
|
|
|
FMTBUF_PUTCHAR('\\'); \
|
|
|
|
FMTBUF_PUTCHAR('x'); \
|
|
|
|
FMTBUF_PUTCHAR(hex[((b) >> 4) & 0xF]); \
|
|
|
|
FMTBUF_PUTCHAR(hex[((b) >> 0) & 0xF])
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Put the trailing '\0' at the end of the buffer.
|
|
|
|
*/
|
|
|
|
#define FMTBUF_ENDSTR \
|
|
|
|
fmtbuf[column] = '\0'
|
|
|
|
|
|
|
|
/* REPLACEMENT CHARACTER */
|
|
|
|
#define UNREPL 0xFFFD
|
|
|
|
|
|
|
|
#define UNPOOP 0x1F4A9
|
|
|
|
|
|
|
|
/*
|
2020-08-20 06:58:20 +00:00
|
|
|
* Given a wmem scope, a not-necessarily-null-terminated string,
|
|
|
|
* expected to be in UTF-8 but possibly containing invalid sequences
|
|
|
|
* (as it may have come from packet data), and the length of the string,
|
|
|
|
* generate a valid UTF-8 string from it, allocated in the specified
|
|
|
|
* wmem scope, that:
|
2019-07-25 12:23:36 +00:00
|
|
|
*
|
|
|
|
* shows printable Unicode characters as themselves;
|
|
|
|
*
|
|
|
|
* shows non-printable ASCII characters as C-style escapes (octal
|
|
|
|
* if not one of the standard ones such as LF -> '\n');
|
|
|
|
*
|
|
|
|
* shows non-printable Unicode-but-not-ASCII characters as
|
|
|
|
* their universal character names;
|
|
|
|
*
|
|
|
|
* shows illegal UTF-8 sequences as a sequence of bytes represented
|
2020-12-08 23:21:13 +00:00
|
|
|
* as C-style hex escapes (XXX: Does not actually do this. Some illegal
|
|
|
|
* sequences, such as overlong encodings, the sequences reserved for
|
|
|
|
* UTF-16 surrogate halves (paired or unpaired), and values outside
|
|
|
|
* Unicode (i.e., the old sequences for code points above U+10FFFF)
|
|
|
|
* will be decoded in a permissive way. Other illegal sequences,
|
|
|
|
* such 0xFE and 0xFF and the presence of a continuation byte where
|
|
|
|
* not expected (or vice versa its absence), are replaced with
|
|
|
|
* REPLACEMENT CHARACTER.)
|
2019-07-25 12:23:36 +00:00
|
|
|
*
|
|
|
|
* and return a pointer to it.
|
2000-09-11 16:16:13 +00:00
|
|
|
*/
|
|
|
|
gchar *
|
2017-01-31 12:51:19 +00:00
|
|
|
format_text(wmem_allocator_t* allocator, const guchar *string, size_t len)
|
2017-01-30 02:53:49 +00:00
|
|
|
{
|
2019-07-25 12:23:36 +00:00
|
|
|
FMTBUF_VARS;
|
2017-01-30 02:53:49 +00:00
|
|
|
const guchar *stringend = string + len;
|
|
|
|
guchar c;
|
|
|
|
|
|
|
|
while (string < stringend) {
|
|
|
|
/*
|
2019-07-25 12:23:36 +00:00
|
|
|
* Get the first byte of this character.
|
2017-01-30 02:53:49 +00:00
|
|
|
*/
|
|
|
|
c = *string++;
|
|
|
|
if (g_ascii_isprint(c)) {
|
2019-07-25 12:23:36 +00:00
|
|
|
/*
|
|
|
|
* Printable ASCII, so not part of a multi-byte UTF-8 sequence.
|
|
|
|
* Make sure there's enough room for one more byte, and add
|
|
|
|
* the character.
|
|
|
|
*/
|
|
|
|
FMTBUF_EXPAND(1);
|
|
|
|
FMTBUF_PUTCHAR(c);
|
|
|
|
} else if (c < 128) {
|
|
|
|
/*
|
|
|
|
* ASCII, so not part of a multi-byte UTF-8 sequence, but not
|
|
|
|
* printable.
|
|
|
|
*
|
|
|
|
* That requires a minimum of 2 bytes, one for the backslash
|
|
|
|
* and one for a letter, so make sure we have enough room
|
|
|
|
* for that, plus a trailing '\0'.
|
|
|
|
*/
|
|
|
|
FMTBUF_EXPAND(2);
|
|
|
|
FMTBUF_PUTCHAR('\\');
|
2017-01-30 02:53:49 +00:00
|
|
|
switch (c) {
|
|
|
|
|
|
|
|
case '\a':
|
2019-07-25 12:23:36 +00:00
|
|
|
FMTBUF_PUTCHAR('a');
|
2017-01-30 02:53:49 +00:00
|
|
|
break;
|
|
|
|
|
|
|
|
case '\b':
|
2019-07-25 12:23:36 +00:00
|
|
|
FMTBUF_PUTCHAR('b'); /* BS */
|
2017-01-30 02:53:49 +00:00
|
|
|
break;
|
|
|
|
|
|
|
|
case '\f':
|
2019-07-25 12:23:36 +00:00
|
|
|
FMTBUF_PUTCHAR('f'); /* FF */
|
2017-01-30 02:53:49 +00:00
|
|
|
break;
|
|
|
|
|
|
|
|
case '\n':
|
2019-07-25 12:23:36 +00:00
|
|
|
FMTBUF_PUTCHAR('n'); /* NL */
|
2017-01-30 02:53:49 +00:00
|
|
|
break;
|
|
|
|
|
|
|
|
case '\r':
|
2019-07-25 12:23:36 +00:00
|
|
|
FMTBUF_PUTCHAR('r'); /* CR */
|
2017-01-30 02:53:49 +00:00
|
|
|
break;
|
|
|
|
|
|
|
|
case '\t':
|
2019-07-25 12:23:36 +00:00
|
|
|
FMTBUF_PUTCHAR('t'); /* tab */
|
2017-01-30 02:53:49 +00:00
|
|
|
break;
|
|
|
|
|
|
|
|
case '\v':
|
2019-07-25 12:23:36 +00:00
|
|
|
FMTBUF_PUTCHAR('v');
|
2017-01-30 02:53:49 +00:00
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
2019-07-25 12:23:36 +00:00
|
|
|
/*
|
|
|
|
* We've already put the backslash, but this
|
|
|
|
* will put 3 more characters for the octal
|
|
|
|
* number; make sure we have enough room for
|
|
|
|
* that, plus the trailing '\0'.
|
|
|
|
*/
|
|
|
|
FMTBUF_EXPAND(3);
|
|
|
|
FMTBUF_PUTBYTE_OCTAL(c);
|
2017-01-30 02:53:49 +00:00
|
|
|
break;
|
|
|
|
}
|
2019-07-25 12:23:36 +00:00
|
|
|
} else {
|
|
|
|
/*
|
|
|
|
* We've fetched the first byte of a multi-byte UTF-8
|
|
|
|
* sequence into c.
|
|
|
|
*/
|
|
|
|
int utf8_len;
|
|
|
|
guchar mask;
|
|
|
|
gunichar uc;
|
|
|
|
guchar first;
|
|
|
|
|
2020-12-08 23:21:13 +00:00
|
|
|
if ((c & 0xe0) == 0xc0) {
|
2019-07-25 12:23:36 +00:00
|
|
|
/* Starts a 2-byte UTF-8 sequence; 1 byte left */
|
|
|
|
utf8_len = 1;
|
|
|
|
mask = 0x1f;
|
|
|
|
} else if ((c & 0xf0) == 0xe0) {
|
|
|
|
/* Starts a 3-byte UTF-8 sequence; 2 bytes left */
|
|
|
|
utf8_len = 2;
|
|
|
|
mask = 0x0f;
|
|
|
|
} else if ((c & 0xf8) == 0xf0) {
|
|
|
|
/* Starts a 4-byte UTF-8 sequence; 3 bytes left */
|
|
|
|
utf8_len = 3;
|
|
|
|
mask = 0x07;
|
|
|
|
} else if ((c & 0xfc) == 0xf8) {
|
|
|
|
/* Starts an old-style 5-byte UTF-8 sequence; 4 bytes left */
|
|
|
|
utf8_len = 4;
|
|
|
|
mask = 0x03;
|
|
|
|
} else if ((c & 0xfe) == 0xfc) {
|
|
|
|
/* Starts an old-style 6-byte UTF-8 sequence; 5 bytes left */
|
|
|
|
utf8_len = 5;
|
|
|
|
mask = 0x01;
|
|
|
|
} else {
|
2020-12-08 23:21:13 +00:00
|
|
|
/* 0xfe or 0xff or a continuation byte - not valid */
|
2019-07-25 12:23:36 +00:00
|
|
|
utf8_len = -1;
|
|
|
|
}
|
|
|
|
if (utf8_len > 0) {
|
|
|
|
/* Try to construct the Unicode character */
|
|
|
|
uc = c & mask;
|
|
|
|
for (int i = 0; i < utf8_len; i++) {
|
|
|
|
if (string >= stringend) {
|
|
|
|
/*
|
|
|
|
* Ran out of octets, so the character is
|
|
|
|
* incomplete. Put in a REPLACEMENT CHARACTER
|
|
|
|
* instead, and then continue the loop, which
|
|
|
|
* will terminate.
|
|
|
|
*/
|
|
|
|
uc = UNREPL;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
c = *string;
|
|
|
|
if ((c & 0xc0) != 0x80) {
|
|
|
|
/*
|
|
|
|
* Not valid UTF-8 continuation character; put in
|
|
|
|
* a replacement character, and then re-process
|
|
|
|
* this octet as the beginning of a new character.
|
|
|
|
*/
|
|
|
|
uc = UNREPL;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
string++;
|
|
|
|
uc = (uc << 6) | (c & 0x3f);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If this isn't a valid Unicode character, put in
|
|
|
|
* a REPLACEMENT CHARACTER.
|
|
|
|
*/
|
|
|
|
if (!g_unichar_validate(uc))
|
|
|
|
uc = UNREPL;
|
|
|
|
} else {
|
|
|
|
/* 0xfe or 0xff; put it a REPLACEMENT CHARACTER */
|
|
|
|
uc = UNREPL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* OK, is it a printable Unicode character?
|
|
|
|
*/
|
|
|
|
if (g_unichar_isprint(uc)) {
|
|
|
|
/*
|
|
|
|
* Yes - put it into the string as UTF-8.
|
|
|
|
* This means that if it was an overlong
|
|
|
|
* encoding, this will put out the right
|
|
|
|
* sized encoding.
|
|
|
|
*/
|
|
|
|
if (uc < 0x80) {
|
|
|
|
first = 0;
|
|
|
|
utf8_len = 1;
|
|
|
|
} else if (uc < 0x800) {
|
|
|
|
first = 0xc0;
|
|
|
|
utf8_len = 2;
|
|
|
|
} else if (uc < 0x10000) {
|
|
|
|
first = 0xe0;
|
|
|
|
utf8_len = 3;
|
|
|
|
} else if (uc < 0x200000) {
|
|
|
|
first = 0xf0;
|
|
|
|
utf8_len = 4;
|
|
|
|
} else if (uc < 0x4000000) {
|
|
|
|
/*
|
|
|
|
* This should never happen, as Unicode doesn't
|
|
|
|
* go that high.
|
|
|
|
*/
|
|
|
|
first = 0xf8;
|
|
|
|
utf8_len = 5;
|
|
|
|
} else {
|
|
|
|
/*
|
|
|
|
* This should never happen, as Unicode doesn't
|
|
|
|
* go that high.
|
|
|
|
*/
|
|
|
|
first = 0xfc;
|
|
|
|
utf8_len = 6;
|
|
|
|
}
|
|
|
|
FMTBUF_EXPAND(utf8_len);
|
|
|
|
for (int i = utf8_len - 1; i > 0; i--) {
|
|
|
|
fmtbuf[column + i] = (uc & 0x3f) | 0x80;
|
|
|
|
uc >>= 6;
|
|
|
|
}
|
|
|
|
fmtbuf[column] = uc | first;
|
|
|
|
column += utf8_len;
|
|
|
|
} else if (c < 128) {
|
|
|
|
/*
|
|
|
|
* ASCII, but not printable.
|
|
|
|
* Yes, this could happen with an overlong encoding.
|
|
|
|
*
|
|
|
|
* That requires a minimum of 2 bytes, one for the
|
|
|
|
* backslash and one for a letter, so make sure we
|
|
|
|
* have enough room for that, plus a trailing '\0'.
|
|
|
|
*/
|
|
|
|
FMTBUF_EXPAND(2);
|
|
|
|
FMTBUF_PUTCHAR('\\');
|
|
|
|
switch (c) {
|
|
|
|
|
|
|
|
case '\a':
|
|
|
|
FMTBUF_PUTCHAR('a');
|
|
|
|
break;
|
|
|
|
|
|
|
|
case '\b':
|
|
|
|
FMTBUF_PUTCHAR('b'); /* BS */
|
|
|
|
break;
|
|
|
|
|
|
|
|
case '\f':
|
|
|
|
FMTBUF_PUTCHAR('f'); /* FF */
|
|
|
|
break;
|
|
|
|
|
|
|
|
case '\n':
|
|
|
|
FMTBUF_PUTCHAR('n'); /* NL */
|
|
|
|
break;
|
|
|
|
|
|
|
|
case '\r':
|
|
|
|
FMTBUF_PUTCHAR('r'); /* CR */
|
|
|
|
break;
|
|
|
|
|
|
|
|
case '\t':
|
|
|
|
FMTBUF_PUTCHAR('t'); /* tab */
|
|
|
|
break;
|
|
|
|
|
|
|
|
case '\v':
|
|
|
|
FMTBUF_PUTCHAR('v');
|
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
|
|
|
/*
|
|
|
|
* We've already put the backslash, but this
|
|
|
|
* will put 3 more characters for the octal
|
|
|
|
* number; make sure we have enough room for
|
|
|
|
* that, plus the trailing '\0'.
|
|
|
|
*/
|
|
|
|
FMTBUF_EXPAND(3);
|
|
|
|
FMTBUF_PUTBYTE_OCTAL(c);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
/*
|
|
|
|
* Unicode, but not printable, and not ASCII;
|
|
|
|
* put it out as \uxxxx or \Uxxxxxxxx.
|
|
|
|
*/
|
|
|
|
if (uc <= 0xFFFF) {
|
|
|
|
FMTBUF_EXPAND(6);
|
|
|
|
FMTBUF_PUTCHAR('\\');
|
|
|
|
FMTBUF_PUTCHAR('u');
|
|
|
|
FMTBUF_PUTCHAR(hex[(uc >> 12) & 0xF]);
|
|
|
|
FMTBUF_PUTCHAR(hex[(uc >> 8) & 0xF]);
|
|
|
|
FMTBUF_PUTCHAR(hex[(uc >> 4) & 0xF]);
|
|
|
|
FMTBUF_PUTCHAR(hex[(uc >> 0) & 0xF]);
|
|
|
|
} else {
|
|
|
|
FMTBUF_EXPAND(10);
|
|
|
|
FMTBUF_PUTCHAR('\\');
|
|
|
|
FMTBUF_PUTCHAR('U');
|
|
|
|
FMTBUF_PUTCHAR(hex[(uc >> 28) & 0xF]);
|
|
|
|
FMTBUF_PUTCHAR(hex[(uc >> 24) & 0xF]);
|
|
|
|
FMTBUF_PUTCHAR(hex[(uc >> 20) & 0xF]);
|
|
|
|
FMTBUF_PUTCHAR(hex[(uc >> 16) & 0xF]);
|
|
|
|
FMTBUF_PUTCHAR(hex[(uc >> 12) & 0xF]);
|
|
|
|
FMTBUF_PUTCHAR(hex[(uc >> 8) & 0xF]);
|
|
|
|
FMTBUF_PUTCHAR(hex[(uc >> 4) & 0xF]);
|
|
|
|
FMTBUF_PUTCHAR(hex[(uc >> 0) & 0xF]);
|
|
|
|
}
|
|
|
|
}
|
2017-01-30 02:53:49 +00:00
|
|
|
}
|
|
|
|
}
|
2019-07-25 12:23:36 +00:00
|
|
|
|
|
|
|
FMTBUF_ENDSTR;
|
2017-01-30 02:53:49 +00:00
|
|
|
return fmtbuf;
|
|
|
|
}
|
|
|
|
|
2020-08-20 06:58:20 +00:00
|
|
|
/** Given a wmem scope and a null-terminated string, expected to be in
|
|
|
|
* UTF-8 but possibly containing invalid sequences (as it may have come
|
|
|
|
* from packet data), and the length of the string, generate a valid
|
|
|
|
* UTF-8 string from it, allocated in the specified wmem scope, that:
|
|
|
|
*
|
|
|
|
* shows printable Unicode characters as themselves;
|
|
|
|
*
|
|
|
|
* shows non-printable ASCII characters as C-style escapes (octal
|
|
|
|
* if not one of the standard ones such as LF -> '\n');
|
|
|
|
*
|
|
|
|
* shows non-printable Unicode-but-not-ASCII characters as
|
|
|
|
* their universal character names;
|
|
|
|
*
|
|
|
|
* shows illegal UTF-8 sequences as a sequence of bytes represented
|
|
|
|
* as C-style hex escapes;
|
|
|
|
*
|
|
|
|
* and return a pointer to it.
|
|
|
|
*/
|
|
|
|
gchar *
|
|
|
|
format_text_string(wmem_allocator_t* allocator, const guchar *string)
|
|
|
|
{
|
|
|
|
return format_text(allocator, string, strlen(string));
|
|
|
|
}
|
|
|
|
|
2006-06-19 15:53:03 +00:00
|
|
|
/*
|
|
|
|
* Given a string, generate a string from it that shows non-printable
|
2006-09-22 21:14:54 +00:00
|
|
|
* characters as C-style escapes except a whitespace character
|
2006-06-19 15:53:03 +00:00
|
|
|
* (space, tab, carriage return, new line, vertical tab, or formfeed)
|
2008-05-05 00:39:47 +00:00
|
|
|
* which will be replaced by a space, and return a pointer to it.
|
2006-06-19 15:53:03 +00:00
|
|
|
*/
|
|
|
|
gchar *
|
2017-01-29 23:51:00 +00:00
|
|
|
format_text_wsp(wmem_allocator_t* allocator, const guchar *string, size_t len)
|
2006-06-19 15:53:03 +00:00
|
|
|
{
|
2019-07-25 12:23:36 +00:00
|
|
|
FMTBUF_VARS;
|
2012-12-22 23:27:40 +00:00
|
|
|
const guchar *stringend = string + len;
|
|
|
|
guchar c;
|
|
|
|
|
|
|
|
while (string < stringend) {
|
|
|
|
/*
|
2019-07-25 12:23:36 +00:00
|
|
|
* Get the first byte of this character.
|
2012-12-22 23:27:40 +00:00
|
|
|
*/
|
|
|
|
c = *string++;
|
2013-12-21 15:01:45 +00:00
|
|
|
if (g_ascii_isprint(c)) {
|
2019-07-25 12:23:36 +00:00
|
|
|
/*
|
|
|
|
* Printable ASCII, so not part of a multi-byte UTF-8 sequence.
|
|
|
|
* Make sure there's enough room for one more byte, and add
|
|
|
|
* the character.
|
|
|
|
*/
|
|
|
|
FMTBUF_EXPAND(1);
|
|
|
|
FMTBUF_PUTCHAR(c);
|
2014-10-17 23:10:53 +00:00
|
|
|
} else if (g_ascii_isspace(c)) {
|
2019-07-25 12:23:36 +00:00
|
|
|
/*
|
|
|
|
* ASCII, so not part of a multi-byte UTF-8 sequence, but
|
|
|
|
* not printable, but is a space character; show it as a
|
|
|
|
* blank.
|
|
|
|
*
|
|
|
|
* Make sure there's enough room for one more byte, and add
|
|
|
|
* the blank.
|
|
|
|
*/
|
|
|
|
FMTBUF_EXPAND(1);
|
|
|
|
FMTBUF_PUTCHAR(' ');
|
|
|
|
} else if (c < 128) {
|
|
|
|
/*
|
|
|
|
* ASCII, so not part of a multi-byte UTF-8 sequence, but not
|
|
|
|
* printable.
|
|
|
|
*
|
|
|
|
* That requires a minimum of 2 bytes, one for the backslash
|
|
|
|
* and one for a letter, so make sure we have enough room
|
|
|
|
* for that, plus a trailing '\0'.
|
|
|
|
*/
|
|
|
|
FMTBUF_EXPAND(2);
|
|
|
|
FMTBUF_PUTCHAR('\\');
|
2012-12-22 23:27:40 +00:00
|
|
|
switch (c) {
|
|
|
|
|
|
|
|
case '\a':
|
2019-07-25 12:23:36 +00:00
|
|
|
FMTBUF_PUTCHAR('a');
|
2012-12-22 23:27:40 +00:00
|
|
|
break;
|
|
|
|
|
|
|
|
case '\b':
|
2019-07-25 12:23:36 +00:00
|
|
|
FMTBUF_PUTCHAR('b'); /* BS */
|
2012-12-22 23:27:40 +00:00
|
|
|
break;
|
|
|
|
|
|
|
|
case '\f':
|
2019-07-25 12:23:36 +00:00
|
|
|
FMTBUF_PUTCHAR('f'); /* FF */
|
2012-12-22 23:27:40 +00:00
|
|
|
break;
|
|
|
|
|
|
|
|
case '\n':
|
2019-07-25 12:23:36 +00:00
|
|
|
FMTBUF_PUTCHAR('n'); /* NL */
|
2012-12-22 23:27:40 +00:00
|
|
|
break;
|
|
|
|
|
|
|
|
case '\r':
|
2019-07-25 12:23:36 +00:00
|
|
|
FMTBUF_PUTCHAR('r'); /* CR */
|
2012-12-22 23:27:40 +00:00
|
|
|
break;
|
|
|
|
|
|
|
|
case '\t':
|
2019-07-25 12:23:36 +00:00
|
|
|
FMTBUF_PUTCHAR('t'); /* tab */
|
2012-12-22 23:27:40 +00:00
|
|
|
break;
|
|
|
|
|
|
|
|
case '\v':
|
2019-07-25 12:23:36 +00:00
|
|
|
FMTBUF_PUTCHAR('v');
|
2012-12-22 23:27:40 +00:00
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
2019-07-25 12:23:36 +00:00
|
|
|
/*
|
|
|
|
* We've already put the backslash, but this
|
|
|
|
* will put 3 more characters for the octal
|
|
|
|
* number; make sure we have enough room for
|
|
|
|
* that, plus the trailing '\0'.
|
|
|
|
*/
|
|
|
|
FMTBUF_EXPAND(3);
|
|
|
|
FMTBUF_PUTBYTE_OCTAL(c);
|
2012-12-22 23:27:40 +00:00
|
|
|
break;
|
|
|
|
}
|
2019-07-25 12:23:36 +00:00
|
|
|
} else {
|
|
|
|
/*
|
|
|
|
* We've fetched the first byte of a multi-byte UTF-8
|
|
|
|
* sequence into c.
|
|
|
|
*/
|
|
|
|
int utf8_len;
|
|
|
|
guchar mask;
|
|
|
|
gunichar uc;
|
|
|
|
guchar first;
|
|
|
|
|
|
|
|
if ((c & 0xe8) == 0xc0) {
|
|
|
|
/* Starts a 2-byte UTF-8 sequence; 1 byte left */
|
|
|
|
utf8_len = 1;
|
|
|
|
mask = 0x1f;
|
|
|
|
} else if ((c & 0xf0) == 0xe0) {
|
|
|
|
/* Starts a 3-byte UTF-8 sequence; 2 bytes left */
|
|
|
|
utf8_len = 2;
|
|
|
|
mask = 0x0f;
|
|
|
|
} else if ((c & 0xf8) == 0xf0) {
|
|
|
|
/* Starts a 4-byte UTF-8 sequence; 3 bytes left */
|
|
|
|
utf8_len = 3;
|
|
|
|
mask = 0x07;
|
|
|
|
} else if ((c & 0xfc) == 0xf8) {
|
|
|
|
/* Starts an old-style 5-byte UTF-8 sequence; 4 bytes left */
|
|
|
|
utf8_len = 4;
|
|
|
|
mask = 0x03;
|
|
|
|
} else if ((c & 0xfe) == 0xfc) {
|
|
|
|
/* Starts an old-style 6-byte UTF-8 sequence; 5 bytes left */
|
|
|
|
utf8_len = 5;
|
|
|
|
mask = 0x01;
|
|
|
|
} else {
|
|
|
|
/* 0xfe or 0xff - not valid */
|
|
|
|
utf8_len = -1;
|
|
|
|
}
|
|
|
|
if (utf8_len > 0) {
|
|
|
|
/* Try to construct the Unicode character */
|
|
|
|
uc = c & mask;
|
|
|
|
for (int i = 0; i < utf8_len; i++) {
|
|
|
|
if (string >= stringend) {
|
|
|
|
/*
|
|
|
|
* Ran out of octets, so the character is
|
|
|
|
* incomplete. Put in a REPLACEMENT CHARACTER
|
|
|
|
* instead, and then continue the loop, which
|
|
|
|
* will terminate.
|
|
|
|
*/
|
|
|
|
uc = UNREPL;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
c = *string;
|
|
|
|
if ((c & 0xc0) != 0x80) {
|
|
|
|
/*
|
|
|
|
* Not valid UTF-8 continuation character; put in
|
|
|
|
* a replacement character, and then re-process
|
|
|
|
* this octet as the beginning of a new character.
|
|
|
|
*/
|
|
|
|
uc = UNREPL;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
string++;
|
|
|
|
uc = (uc << 6) | (c & 0x3f);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* If this isn't a valid Unicode character, put in
|
|
|
|
* a REPLACEMENT CHARACTER.
|
|
|
|
*/
|
|
|
|
if (!g_unichar_validate(uc))
|
|
|
|
uc = UNREPL;
|
|
|
|
} else {
|
|
|
|
/* 0xfe or 0xff; put it a REPLACEMENT CHARACTER */
|
|
|
|
uc = UNREPL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* OK, is it a printable Unicode character?
|
|
|
|
*/
|
|
|
|
if (g_unichar_isprint(uc)) {
|
|
|
|
/*
|
|
|
|
* Yes - put it into the string as UTF-8.
|
|
|
|
* This means that if it was an overlong
|
|
|
|
* encoding, this will put out the right
|
|
|
|
* sized encoding.
|
|
|
|
*/
|
|
|
|
if (uc < 0x80) {
|
|
|
|
first = 0;
|
|
|
|
utf8_len = 1;
|
|
|
|
} else if (uc < 0x800) {
|
|
|
|
first = 0xc0;
|
|
|
|
utf8_len = 2;
|
|
|
|
} else if (uc < 0x10000) {
|
|
|
|
first = 0xe0;
|
|
|
|
utf8_len = 3;
|
|
|
|
} else if (uc < 0x200000) {
|
|
|
|
first = 0xf0;
|
|
|
|
utf8_len = 4;
|
|
|
|
} else if (uc < 0x4000000) {
|
|
|
|
/*
|
|
|
|
* This should never happen, as Unicode doesn't
|
|
|
|
* go that high.
|
|
|
|
*/
|
|
|
|
first = 0xf8;
|
|
|
|
utf8_len = 5;
|
|
|
|
} else {
|
|
|
|
/*
|
|
|
|
* This should never happen, as Unicode doesn't
|
|
|
|
* go that high.
|
|
|
|
*/
|
|
|
|
first = 0xfc;
|
|
|
|
utf8_len = 6;
|
|
|
|
}
|
|
|
|
FMTBUF_EXPAND(utf8_len);
|
|
|
|
for (int i = utf8_len - 1; i > 0; i--) {
|
|
|
|
fmtbuf[column + i] = (uc & 0x3f) | 0x80;
|
|
|
|
uc >>= 6;
|
|
|
|
}
|
|
|
|
fmtbuf[column] = uc | first;
|
|
|
|
column += utf8_len;
|
|
|
|
} else if (g_unichar_isspace(uc)) {
|
|
|
|
/*
|
|
|
|
* Not printable, but is a space character; show it
|
|
|
|
* as a blank.
|
|
|
|
*
|
|
|
|
* Make sure there's enough room for one more byte,
|
|
|
|
* and add the blank.
|
|
|
|
*/
|
|
|
|
FMTBUF_EXPAND(1);
|
|
|
|
FMTBUF_PUTCHAR(' ');
|
|
|
|
} else if (c < 128) {
|
|
|
|
/*
|
|
|
|
* ASCII, but not printable.
|
|
|
|
* Yes, this could happen with an overlong encoding.
|
|
|
|
*
|
|
|
|
* That requires a minimum of 2 bytes, one for the
|
|
|
|
* backslash and one for a letter, so make sure we
|
|
|
|
* have enough room for that, plus a trailing '\0'.
|
|
|
|
*/
|
|
|
|
FMTBUF_EXPAND(2);
|
|
|
|
FMTBUF_PUTCHAR('\\');
|
|
|
|
switch (c) {
|
|
|
|
|
|
|
|
case '\a':
|
|
|
|
FMTBUF_PUTCHAR('a');
|
|
|
|
break;
|
|
|
|
|
|
|
|
case '\b':
|
|
|
|
FMTBUF_PUTCHAR('b'); /* BS */
|
|
|
|
break;
|
|
|
|
|
|
|
|
case '\f':
|
|
|
|
FMTBUF_PUTCHAR('f'); /* FF */
|
|
|
|
break;
|
|
|
|
|
|
|
|
case '\n':
|
|
|
|
FMTBUF_PUTCHAR('n'); /* NL */
|
|
|
|
break;
|
|
|
|
|
|
|
|
case '\r':
|
|
|
|
FMTBUF_PUTCHAR('r'); /* CR */
|
|
|
|
break;
|
|
|
|
|
|
|
|
case '\t':
|
|
|
|
FMTBUF_PUTCHAR('t'); /* tab */
|
|
|
|
break;
|
|
|
|
|
|
|
|
case '\v':
|
|
|
|
FMTBUF_PUTCHAR('v');
|
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
|
|
|
/*
|
|
|
|
* We've already put the backslash, but this
|
|
|
|
* will put 3 more characters for the octal
|
|
|
|
* number; make sure we have enough room for
|
|
|
|
* that, plus the trailing '\0'.
|
|
|
|
*/
|
|
|
|
FMTBUF_EXPAND(3);
|
|
|
|
FMTBUF_PUTBYTE_OCTAL(c);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
/*
|
|
|
|
* Unicode, but not printable, and not ASCII;
|
|
|
|
* put it out as \uxxxx or \Uxxxxxxxx.
|
|
|
|
*/
|
|
|
|
if (uc <= 0xFFFF) {
|
|
|
|
FMTBUF_EXPAND(6);
|
|
|
|
FMTBUF_PUTCHAR('\\');
|
|
|
|
FMTBUF_PUTCHAR('u');
|
|
|
|
FMTBUF_PUTCHAR(hex[(uc >> 12) & 0xF]);
|
|
|
|
FMTBUF_PUTCHAR(hex[(uc >> 8) & 0xF]);
|
|
|
|
FMTBUF_PUTCHAR(hex[(uc >> 4) & 0xF]);
|
|
|
|
FMTBUF_PUTCHAR(hex[(uc >> 0) & 0xF]);
|
|
|
|
} else {
|
|
|
|
FMTBUF_EXPAND(10);
|
|
|
|
FMTBUF_PUTCHAR('\\');
|
|
|
|
FMTBUF_PUTCHAR('U');
|
|
|
|
FMTBUF_PUTCHAR(hex[(uc >> 28) & 0xF]);
|
|
|
|
FMTBUF_PUTCHAR(hex[(uc >> 24) & 0xF]);
|
|
|
|
FMTBUF_PUTCHAR(hex[(uc >> 20) & 0xF]);
|
|
|
|
FMTBUF_PUTCHAR(hex[(uc >> 16) & 0xF]);
|
|
|
|
FMTBUF_PUTCHAR(hex[(uc >> 12) & 0xF]);
|
|
|
|
FMTBUF_PUTCHAR(hex[(uc >> 8) & 0xF]);
|
|
|
|
FMTBUF_PUTCHAR(hex[(uc >> 4) & 0xF]);
|
|
|
|
FMTBUF_PUTCHAR(hex[(uc >> 0) & 0xF]);
|
|
|
|
}
|
|
|
|
}
|
2012-12-22 23:27:40 +00:00
|
|
|
}
|
2000-09-11 16:16:13 +00:00
|
|
|
}
|
2019-07-25 12:23:36 +00:00
|
|
|
|
|
|
|
FMTBUF_ENDSTR;
|
2017-01-29 23:51:00 +00:00
|
|
|
return fmtbuf;
|
2000-09-11 16:16:13 +00:00
|
|
|
}
|
2000-11-13 07:19:37 +00:00
|
|
|
|
2013-10-12 12:38:56 +00:00
|
|
|
/*
|
|
|
|
* Given a string, generate a string from it that shows non-printable
|
|
|
|
* characters as the chr parameter passed, except a whitespace character
|
|
|
|
* (space, tab, carriage return, new line, vertical tab, or formfeed)
|
|
|
|
* which will be replaced by a space, and return a pointer to it.
|
2019-07-25 12:23:36 +00:00
|
|
|
*
|
|
|
|
* This does *not* treat the input string as UTF-8.
|
|
|
|
*
|
|
|
|
* XXX - is there any reason to use this?
|
2013-10-12 12:38:56 +00:00
|
|
|
*/
|
|
|
|
gchar *
|
2017-01-29 19:19:25 +00:00
|
|
|
format_text_chr(wmem_allocator_t* allocator, const guchar *string, const size_t len, const guchar chr)
|
2013-10-12 12:38:56 +00:00
|
|
|
{
|
2019-07-25 12:23:36 +00:00
|
|
|
FMTBUF_VARS;
|
2013-10-12 12:38:56 +00:00
|
|
|
const guchar *stringend = string + len;
|
|
|
|
guchar c;
|
|
|
|
|
2014-02-25 20:42:35 +00:00
|
|
|
while (string < stringend)
|
2013-10-12 12:38:56 +00:00
|
|
|
{
|
2019-07-25 12:23:36 +00:00
|
|
|
FMTBUF_EXPAND(1);
|
2013-10-12 12:38:56 +00:00
|
|
|
c = *string++;
|
|
|
|
|
2014-02-25 20:42:35 +00:00
|
|
|
if (g_ascii_isprint(c))
|
2013-10-12 12:38:56 +00:00
|
|
|
{
|
2019-07-25 12:23:36 +00:00
|
|
|
FMTBUF_PUTCHAR(c);
|
2014-02-25 20:42:35 +00:00
|
|
|
}
|
2014-10-17 23:10:53 +00:00
|
|
|
else if (g_ascii_isspace(c))
|
2013-10-12 12:38:56 +00:00
|
|
|
{
|
2019-07-25 12:23:36 +00:00
|
|
|
FMTBUF_PUTCHAR(' ');
|
2014-02-25 20:42:35 +00:00
|
|
|
}
|
|
|
|
else
|
2013-10-12 12:38:56 +00:00
|
|
|
{
|
2019-07-25 12:23:36 +00:00
|
|
|
FMTBUF_PUTCHAR(chr);
|
2013-10-12 12:38:56 +00:00
|
|
|
}
|
|
|
|
}
|
2019-07-25 12:23:36 +00:00
|
|
|
FMTBUF_ENDSTR;
|
2017-01-29 19:19:25 +00:00
|
|
|
return fmtbuf;
|
2013-10-12 12:38:56 +00:00
|
|
|
}
|
|
|
|
|
2003-12-29 04:07:06 +00:00
|
|
|
static gboolean
|
|
|
|
is_byte_sep(guint8 c)
|
|
|
|
{
|
2012-12-22 23:27:40 +00:00
|
|
|
return (c == '-' || c == ':' || c == '.');
|
2003-12-29 04:07:06 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/* Turn a string of hex digits with optional separators (defined by
|
|
|
|
* is_byte_sep() into a byte array.
|
|
|
|
*/
|
|
|
|
gboolean
|
2014-09-04 01:39:04 +00:00
|
|
|
hex_str_to_bytes(const char *hex_str, GByteArray *bytes, gboolean force_separators)
|
|
|
|
{
|
2012-12-22 23:27:40 +00:00
|
|
|
guint8 val;
|
2014-10-17 23:10:53 +00:00
|
|
|
const gchar *p, *q, *r, *s, *punct;
|
2012-12-22 23:27:40 +00:00
|
|
|
char four_digits_first_half[3];
|
|
|
|
char four_digits_second_half[3];
|
|
|
|
char two_digits[3];
|
|
|
|
char one_digit[2];
|
|
|
|
|
|
|
|
if (! hex_str || ! bytes) {
|
|
|
|
return FALSE;
|
|
|
|
}
|
|
|
|
g_byte_array_set_size(bytes, 0);
|
2014-10-17 23:10:53 +00:00
|
|
|
p = hex_str;
|
2012-12-22 23:27:40 +00:00
|
|
|
while (*p) {
|
|
|
|
q = p+1;
|
|
|
|
r = p+2;
|
|
|
|
s = p+3;
|
|
|
|
|
|
|
|
if (*q && *r && *s
|
2014-10-17 23:10:53 +00:00
|
|
|
&& g_ascii_isxdigit(*p) && g_ascii_isxdigit(*q) &&
|
|
|
|
g_ascii_isxdigit(*r) && g_ascii_isxdigit(*s)) {
|
2012-12-22 23:27:40 +00:00
|
|
|
four_digits_first_half[0] = *p;
|
|
|
|
four_digits_first_half[1] = *q;
|
|
|
|
four_digits_first_half[2] = '\0';
|
|
|
|
four_digits_second_half[0] = *r;
|
|
|
|
four_digits_second_half[1] = *s;
|
|
|
|
four_digits_second_half[2] = '\0';
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Four or more hex digits in a row.
|
|
|
|
*/
|
|
|
|
val = (guint8) strtoul(four_digits_first_half, NULL, 16);
|
|
|
|
g_byte_array_append(bytes, &val, 1);
|
|
|
|
val = (guint8) strtoul(four_digits_second_half, NULL, 16);
|
|
|
|
g_byte_array_append(bytes, &val, 1);
|
|
|
|
|
|
|
|
punct = s + 1;
|
|
|
|
if (*punct) {
|
|
|
|
/*
|
|
|
|
* Make sure the character after
|
|
|
|
* the forth hex digit is a byte
|
|
|
|
* separator, i.e. that we don't have
|
|
|
|
* more than four hex digits, or a
|
|
|
|
* bogus character.
|
|
|
|
*/
|
|
|
|
if (is_byte_sep(*punct)) {
|
|
|
|
p = punct + 1;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
else if (force_separators) {
|
|
|
|
return FALSE;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
p = punct;
|
|
|
|
continue;
|
|
|
|
}
|
2014-10-17 23:10:53 +00:00
|
|
|
else if (*q && g_ascii_isxdigit(*p) && g_ascii_isxdigit(*q)) {
|
2012-12-22 23:27:40 +00:00
|
|
|
two_digits[0] = *p;
|
|
|
|
two_digits[1] = *q;
|
|
|
|
two_digits[2] = '\0';
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Two hex digits in a row.
|
|
|
|
*/
|
|
|
|
val = (guint8) strtoul(two_digits, NULL, 16);
|
|
|
|
g_byte_array_append(bytes, &val, 1);
|
|
|
|
punct = q + 1;
|
|
|
|
if (*punct) {
|
|
|
|
/*
|
|
|
|
* Make sure the character after
|
|
|
|
* the second hex digit is a byte
|
|
|
|
* separator, i.e. that we don't have
|
|
|
|
* more than two hex digits, or a
|
|
|
|
* bogus character.
|
|
|
|
*/
|
|
|
|
if (is_byte_sep(*punct)) {
|
|
|
|
p = punct + 1;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
else if (force_separators) {
|
|
|
|
return FALSE;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
p = punct;
|
|
|
|
continue;
|
|
|
|
}
|
2014-10-17 23:10:53 +00:00
|
|
|
else if (*q && g_ascii_isxdigit(*p) && is_byte_sep(*q)) {
|
2012-12-22 23:27:40 +00:00
|
|
|
one_digit[0] = *p;
|
|
|
|
one_digit[1] = '\0';
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Only one hex digit (not at the end of the string)
|
|
|
|
*/
|
|
|
|
val = (guint8) strtoul(one_digit, NULL, 16);
|
|
|
|
g_byte_array_append(bytes, &val, 1);
|
|
|
|
p = q + 1;
|
|
|
|
continue;
|
|
|
|
}
|
2014-10-17 23:10:53 +00:00
|
|
|
else if (!*q && g_ascii_isxdigit(*p)) {
|
2012-12-22 23:27:40 +00:00
|
|
|
one_digit[0] = *p;
|
|
|
|
one_digit[1] = '\0';
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Only one hex digit (at the end of the string)
|
|
|
|
*/
|
|
|
|
val = (guint8) strtoul(one_digit, NULL, 16);
|
|
|
|
g_byte_array_append(bytes, &val, 1);
|
|
|
|
p = q;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
return FALSE;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return TRUE;
|
2003-12-29 04:07:06 +00:00
|
|
|
}
|
|
|
|
|
2014-04-13 03:20:15 +00:00
|
|
|
static inline gchar
|
|
|
|
get_valid_byte_sep(gchar c, const guint encoding)
|
|
|
|
{
|
|
|
|
gchar retval = -1; /* -1 means failure */
|
|
|
|
|
|
|
|
switch (c) {
|
|
|
|
case ':':
|
|
|
|
if (encoding & ENC_SEP_COLON)
|
|
|
|
retval = c;
|
|
|
|
break;
|
|
|
|
case '-':
|
|
|
|
if (encoding & ENC_SEP_DASH)
|
|
|
|
retval = c;
|
|
|
|
break;
|
|
|
|
case '.':
|
|
|
|
if (encoding & ENC_SEP_DOT)
|
|
|
|
retval = c;
|
|
|
|
break;
|
|
|
|
case ' ':
|
|
|
|
if (encoding & ENC_SEP_SPACE)
|
|
|
|
retval = c;
|
|
|
|
break;
|
|
|
|
case '\0':
|
|
|
|
/* we were given the end of the string, so it's fine */
|
|
|
|
retval = 0;
|
|
|
|
break;
|
|
|
|
default:
|
2014-10-17 23:10:53 +00:00
|
|
|
if (g_ascii_isxdigit(c) && (encoding & ENC_SEP_NONE))
|
2014-04-13 03:20:15 +00:00
|
|
|
retval = 0;
|
|
|
|
/* anything else means we've got a failure */
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
|
|
|
return retval;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Turn a string of hex digits with optional separators (defined by is_byte_sep())
|
|
|
|
* into a byte array. Unlike hex_str_to_bytes(), this will read as many hex-char
|
|
|
|
* pairs as possible and not error if it hits a non-hex-char; instead it just ends
|
|
|
|
* there. (i.e., like strtol()/atoi()/etc.) Unless fail_if_partial is TRUE.
|
|
|
|
*
|
|
|
|
* The **endptr, if not NULL, is set to the char after the last hex character.
|
|
|
|
*/
|
|
|
|
gboolean
|
|
|
|
hex_str_to_bytes_encoding(const gchar *hex_str, GByteArray *bytes, const gchar **endptr,
|
|
|
|
const guint encoding, const gboolean fail_if_partial)
|
|
|
|
{
|
2014-10-16 18:16:11 +00:00
|
|
|
gint8 c, d;
|
2014-04-13 03:20:15 +00:00
|
|
|
guint8 val;
|
|
|
|
const gchar *end = hex_str;
|
|
|
|
gboolean retval = FALSE;
|
|
|
|
gchar sep = -1;
|
|
|
|
|
|
|
|
/* a map from ASCII hex chars to their value */
|
2014-10-16 18:16:11 +00:00
|
|
|
static const gint8 str_to_nibble[256] = {
|
2014-04-13 03:20:15 +00:00
|
|
|
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
|
|
|
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
|
|
|
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
|
|
|
0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1,
|
|
|
|
-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
|
|
|
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
|
|
|
-1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
|
|
|
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
|
|
|
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
|
|
|
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
|
|
|
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
|
|
|
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
|
|
|
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
|
|
|
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
|
|
|
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
|
|
|
|
-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
|
|
|
|
};
|
|
|
|
|
|
|
|
/* we must see two hex chars at the beginning, or fail */
|
2014-10-17 23:10:53 +00:00
|
|
|
if (bytes && *end && g_ascii_isxdigit(*end) && g_ascii_isxdigit(*(end+1))) {
|
2014-04-13 03:20:15 +00:00
|
|
|
retval = TRUE;
|
|
|
|
|
|
|
|
/* set the separator character we'll allow; if this returns a -1, it means something's
|
|
|
|
* invalid after the hex, but we'll let the while-loop grab the first hex-pair anyway
|
|
|
|
*/
|
|
|
|
sep = get_valid_byte_sep(*(end+2), encoding);
|
|
|
|
|
|
|
|
while (*end) {
|
2014-10-16 18:16:11 +00:00
|
|
|
c = str_to_nibble[(guchar)*end];
|
2014-04-13 03:20:15 +00:00
|
|
|
if (c < 0) {
|
|
|
|
if (fail_if_partial) retval = FALSE;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
++end;
|
|
|
|
|
2014-10-16 18:16:11 +00:00
|
|
|
d = str_to_nibble[(guchar)*end];
|
2014-04-13 03:20:15 +00:00
|
|
|
if (d < 0) {
|
|
|
|
if (fail_if_partial) retval = FALSE;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
val = ((guint8)c * 16) + d;
|
|
|
|
g_byte_array_append(bytes, &val, 1);
|
|
|
|
++end;
|
|
|
|
|
|
|
|
/* check for separator and peek at next char to make sure we should keep going */
|
2014-10-16 20:03:52 +00:00
|
|
|
if (sep > 0 && *end == sep && str_to_nibble[(guchar)*(end+1)] > -1) {
|
2014-04-13 03:20:15 +00:00
|
|
|
/* yes, it's the right sep and followed by more hex, so skip the sep */
|
|
|
|
++end;
|
|
|
|
} else if (sep != 0 && *end) {
|
|
|
|
/* we either need a separator, but we don't see one; or the get_valid_byte_sep()
|
|
|
|
earlier didn't find a valid one to begin with */
|
|
|
|
if (fail_if_partial) retval = FALSE;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
/* otherwise, either no separator allowed, or *end is null, or *end is an invalid
|
|
|
|
* sep, or *end is a valid sep but after it is not a hex char - in all those
|
|
|
|
* cases, just loop back up and let it fail later naturally.
|
|
|
|
*/
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!retval) {
|
|
|
|
if (bytes) g_byte_array_set_size(bytes, 0);
|
|
|
|
end = hex_str;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (endptr) *endptr = end;
|
|
|
|
|
|
|
|
return retval;
|
|
|
|
}
|
|
|
|
|
2007-01-11 02:42:34 +00:00
|
|
|
/*
|
|
|
|
* Turn an RFC 3986 percent-encoded string into a byte array.
|
|
|
|
* XXX - We don't check for reserved characters.
|
|
|
|
*/
|
|
|
|
#define HEX_DIGIT_BUF_LEN 3
|
|
|
|
gboolean
|
2014-09-04 01:39:04 +00:00
|
|
|
uri_str_to_bytes(const char *uri_str, GByteArray *bytes)
|
|
|
|
{
|
2012-12-22 23:27:40 +00:00
|
|
|
guint8 val;
|
2014-10-17 23:10:53 +00:00
|
|
|
const gchar *p;
|
|
|
|
gchar hex_digit[HEX_DIGIT_BUF_LEN];
|
2012-12-22 23:27:40 +00:00
|
|
|
|
|
|
|
g_byte_array_set_size(bytes, 0);
|
|
|
|
if (! uri_str) {
|
|
|
|
return FALSE;
|
|
|
|
}
|
|
|
|
|
2014-10-17 23:10:53 +00:00
|
|
|
p = uri_str;
|
2012-12-22 23:27:40 +00:00
|
|
|
|
|
|
|
while (*p) {
|
2013-12-21 15:12:11 +00:00
|
|
|
if (!g_ascii_isprint(*p))
|
2012-12-22 23:27:40 +00:00
|
|
|
return FALSE;
|
|
|
|
if (*p == '%') {
|
|
|
|
p++;
|
|
|
|
if (*p == '\0') return FALSE;
|
|
|
|
hex_digit[0] = *p;
|
|
|
|
p++;
|
|
|
|
if (*p == '\0') return FALSE;
|
|
|
|
hex_digit[1] = *p;
|
|
|
|
hex_digit[2] = '\0';
|
2014-10-17 23:10:53 +00:00
|
|
|
if (! g_ascii_isxdigit(hex_digit[0]) || ! g_ascii_isxdigit(hex_digit[1]))
|
2012-12-22 23:27:40 +00:00
|
|
|
return FALSE;
|
2014-10-17 23:10:53 +00:00
|
|
|
val = (guint8) strtoul(hex_digit, NULL, 16);
|
2012-12-22 23:27:40 +00:00
|
|
|
g_byte_array_append(bytes, &val, 1);
|
|
|
|
} else {
|
|
|
|
g_byte_array_append(bytes, (const guint8 *) p, 1);
|
|
|
|
}
|
|
|
|
p++;
|
|
|
|
|
|
|
|
}
|
|
|
|
return TRUE;
|
2007-01-11 02:42:34 +00:00
|
|
|
}
|
|
|
|
|
2007-01-11 22:12:33 +00:00
|
|
|
/*
|
|
|
|
* Given a GByteArray, generate a string from it that shows non-printable
|
|
|
|
* characters as percent-style escapes, and return a pointer to it.
|
|
|
|
*/
|
2017-01-31 13:29:53 +00:00
|
|
|
gchar *
|
|
|
|
format_uri(wmem_allocator_t* allocator, const GByteArray *bytes, const gchar *reserved_chars)
|
2007-01-11 22:12:33 +00:00
|
|
|
{
|
2019-07-25 12:23:36 +00:00
|
|
|
FMTBUF_VARS;
|
|
|
|
static const guchar reserved_def[] = ":/?#[]@!$&'()*+,;= ";
|
2012-12-22 23:27:40 +00:00
|
|
|
const guchar *reserved = reserved_def;
|
|
|
|
guint8 c;
|
2019-07-25 12:23:36 +00:00
|
|
|
guint byte_index, i;
|
2012-12-22 23:27:40 +00:00
|
|
|
gboolean is_reserved = FALSE;
|
|
|
|
|
|
|
|
if (! bytes)
|
|
|
|
return "";
|
|
|
|
|
|
|
|
if (reserved_chars)
|
|
|
|
reserved = reserved_chars;
|
|
|
|
|
Fix format_uri().
It was using the same index into the input and output strings, which
means that if it escaped any character, it would skip the next two
characters in the input sring.
It was also not clearing is_reserved before testing whether a character
was reserved, so once it saw a character that neede dto be escaped, it
would escape all subsequent characters.
It was only used in get_key_string(), which was never used, so it was
dead code, but let's at least fix it, even if we end up removing that
code, so that if we bring it back, we bring back a non-broken version,
and so that if anybody *else* uses it, it's not broken.
Change-Id: I36588efad36908e012023bcfbd813c749a6a254f
Reviewed-on: https://code.wireshark.org/review/33287
Petri-Dish: Guy Harris <guy@alum.mit.edu>
Tested-by: Petri Dish Buildbot
Reviewed-by: Guy Harris <guy@alum.mit.edu>
2019-05-21 06:02:39 +00:00
|
|
|
for (byte_index = 0; byte_index < bytes->len; byte_index++) {
|
2012-12-22 23:27:40 +00:00
|
|
|
/*
|
2019-07-25 12:23:36 +00:00
|
|
|
* Make sure there is enough room for this character, if it
|
|
|
|
* expands to a percent plus 2 hex digits (which is the most
|
|
|
|
* it can expand to), and also enough room for a terminating '\0'.
|
2012-12-22 23:27:40 +00:00
|
|
|
*/
|
2019-07-25 12:23:36 +00:00
|
|
|
FMTBUF_EXPAND(2);
|
Fix format_uri().
It was using the same index into the input and output strings, which
means that if it escaped any character, it would skip the next two
characters in the input sring.
It was also not clearing is_reserved before testing whether a character
was reserved, so once it saw a character that neede dto be escaped, it
would escape all subsequent characters.
It was only used in get_key_string(), which was never used, so it was
dead code, but let's at least fix it, even if we end up removing that
code, so that if we bring it back, we bring back a non-broken version,
and so that if anybody *else* uses it, it's not broken.
Change-Id: I36588efad36908e012023bcfbd813c749a6a254f
Reviewed-on: https://code.wireshark.org/review/33287
Petri-Dish: Guy Harris <guy@alum.mit.edu>
Tested-by: Petri Dish Buildbot
Reviewed-by: Guy Harris <guy@alum.mit.edu>
2019-05-21 06:02:39 +00:00
|
|
|
c = bytes->data[byte_index];
|
2007-01-11 22:12:33 +00:00
|
|
|
|
Fix format_uri().
It was using the same index into the input and output strings, which
means that if it escaped any character, it would skip the next two
characters in the input sring.
It was also not clearing is_reserved before testing whether a character
was reserved, so once it saw a character that neede dto be escaped, it
would escape all subsequent characters.
It was only used in get_key_string(), which was never used, so it was
dead code, but let's at least fix it, even if we end up removing that
code, so that if we bring it back, we bring back a non-broken version,
and so that if anybody *else* uses it, it's not broken.
Change-Id: I36588efad36908e012023bcfbd813c749a6a254f
Reviewed-on: https://code.wireshark.org/review/33287
Petri-Dish: Guy Harris <guy@alum.mit.edu>
Tested-by: Petri Dish Buildbot
Reviewed-by: Guy Harris <guy@alum.mit.edu>
2019-05-21 06:02:39 +00:00
|
|
|
is_reserved = FALSE;
|
2013-12-21 15:12:11 +00:00
|
|
|
if (!g_ascii_isprint(c) || c == '%') {
|
2012-12-22 23:27:40 +00:00
|
|
|
is_reserved = TRUE;
|
Fix format_uri().
It was using the same index into the input and output strings, which
means that if it escaped any character, it would skip the next two
characters in the input sring.
It was also not clearing is_reserved before testing whether a character
was reserved, so once it saw a character that neede dto be escaped, it
would escape all subsequent characters.
It was only used in get_key_string(), which was never used, so it was
dead code, but let's at least fix it, even if we end up removing that
code, so that if we bring it back, we bring back a non-broken version,
and so that if anybody *else* uses it, it's not broken.
Change-Id: I36588efad36908e012023bcfbd813c749a6a254f
Reviewed-on: https://code.wireshark.org/review/33287
Petri-Dish: Guy Harris <guy@alum.mit.edu>
Tested-by: Petri Dish Buildbot
Reviewed-by: Guy Harris <guy@alum.mit.edu>
2019-05-21 06:02:39 +00:00
|
|
|
} else {
|
|
|
|
for (i = 0; reserved[i]; i++) {
|
|
|
|
if (c == reserved[i])
|
|
|
|
is_reserved = TRUE;
|
|
|
|
}
|
2012-12-22 23:27:40 +00:00
|
|
|
}
|
2007-01-11 22:12:33 +00:00
|
|
|
|
2012-12-22 23:27:40 +00:00
|
|
|
if (!is_reserved) {
|
2019-07-25 12:23:36 +00:00
|
|
|
FMTBUF_PUTCHAR(c);
|
2012-12-22 23:27:40 +00:00
|
|
|
} else {
|
2019-07-25 12:23:36 +00:00
|
|
|
FMTBUF_PUTCHAR('%');
|
|
|
|
FMTBUF_PUTCHAR(hex[c >> 4]);
|
|
|
|
FMTBUF_PUTCHAR(hex[c & 0xF]);
|
2012-12-22 23:27:40 +00:00
|
|
|
}
|
2007-01-11 22:12:33 +00:00
|
|
|
}
|
2017-01-31 13:29:53 +00:00
|
|
|
fmtbuf[column] = '\0';
|
|
|
|
return fmtbuf;
|
2007-01-11 22:12:33 +00:00
|
|
|
}
|
|
|
|
|
2007-01-11 02:42:34 +00:00
|
|
|
/**
|
|
|
|
* Create a copy of a GByteArray
|
|
|
|
*
|
|
|
|
* @param ba The byte array to be copied.
|
|
|
|
* @return If ba exists, a freshly allocated copy. NULL otherwise.
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
GByteArray *
|
2014-09-04 01:39:04 +00:00
|
|
|
byte_array_dup(const GByteArray *ba)
|
|
|
|
{
|
2007-01-11 02:42:34 +00:00
|
|
|
GByteArray *new_ba;
|
|
|
|
|
|
|
|
if (!ba)
|
2012-12-22 23:27:40 +00:00
|
|
|
return NULL;
|
2007-01-11 02:42:34 +00:00
|
|
|
|
|
|
|
new_ba = g_byte_array_new();
|
|
|
|
g_byte_array_append(new_ba, ba->data, ba->len);
|
|
|
|
return new_ba;
|
|
|
|
}
|
|
|
|
|
2005-12-02 13:16:58 +00:00
|
|
|
#define SUBID_BUF_LEN 5
|
|
|
|
gboolean
|
2014-09-04 01:39:04 +00:00
|
|
|
oid_str_to_bytes(const char *oid_str, GByteArray *bytes)
|
|
|
|
{
|
2013-10-06 02:31:10 +00:00
|
|
|
return rel_oid_str_to_bytes(oid_str, bytes, TRUE);
|
|
|
|
}
|
|
|
|
gboolean
|
2014-09-04 01:39:04 +00:00
|
|
|
rel_oid_str_to_bytes(const char *oid_str, GByteArray *bytes, gboolean is_absolute)
|
|
|
|
{
|
2012-12-22 23:27:40 +00:00
|
|
|
guint32 subid0, subid, sicnt, i;
|
|
|
|
const char *p, *dot;
|
|
|
|
guint8 buf[SUBID_BUF_LEN];
|
|
|
|
|
|
|
|
g_byte_array_set_size(bytes, 0);
|
|
|
|
|
|
|
|
/* check syntax */
|
|
|
|
p = oid_str;
|
|
|
|
dot = NULL;
|
|
|
|
while (*p) {
|
2014-10-17 23:10:53 +00:00
|
|
|
if (!g_ascii_isdigit(*p) && (*p != '.')) return FALSE;
|
2012-12-22 23:27:40 +00:00
|
|
|
if (*p == '.') {
|
2013-10-06 02:31:10 +00:00
|
|
|
if (p == oid_str && is_absolute) return FALSE;
|
2012-12-22 23:27:40 +00:00
|
|
|
if (!*(p+1)) return FALSE;
|
|
|
|
if ((p-1) == dot) return FALSE;
|
|
|
|
dot = p;
|
|
|
|
}
|
|
|
|
p++;
|
2005-12-02 13:16:58 +00:00
|
|
|
}
|
2012-12-22 23:27:40 +00:00
|
|
|
if (!dot) return FALSE;
|
|
|
|
|
|
|
|
p = oid_str;
|
2013-10-06 02:31:10 +00:00
|
|
|
sicnt = is_absolute ? 0 : 2;
|
|
|
|
if (!is_absolute) p++;
|
2012-12-22 23:27:40 +00:00
|
|
|
subid0 = 0; /* squelch GCC complaints */
|
|
|
|
while (*p) {
|
|
|
|
subid = 0;
|
2014-10-17 23:10:53 +00:00
|
|
|
while (g_ascii_isdigit(*p)) {
|
2012-12-22 23:27:40 +00:00
|
|
|
subid *= 10;
|
|
|
|
subid += *p - '0';
|
|
|
|
p++;
|
|
|
|
}
|
|
|
|
if (sicnt == 0) {
|
|
|
|
subid0 = subid;
|
|
|
|
if (subid0 > 2) return FALSE;
|
|
|
|
} else if (sicnt == 1) {
|
|
|
|
if ((subid0 < 2) && (subid > 39)) return FALSE;
|
|
|
|
subid += 40 * subid0;
|
|
|
|
}
|
|
|
|
if (sicnt) {
|
|
|
|
i = SUBID_BUF_LEN;
|
|
|
|
do {
|
|
|
|
i--;
|
|
|
|
buf[i] = 0x80 | (subid % 0x80);
|
|
|
|
subid >>= 7;
|
|
|
|
} while (subid && i);
|
|
|
|
buf[SUBID_BUF_LEN-1] &= 0x7F;
|
|
|
|
g_byte_array_append(bytes, buf + i, SUBID_BUF_LEN - i);
|
|
|
|
}
|
|
|
|
sicnt++;
|
|
|
|
if (*p) p++;
|
2005-12-02 13:16:58 +00:00
|
|
|
}
|
|
|
|
|
2012-12-22 23:27:40 +00:00
|
|
|
return TRUE;
|
2005-12-02 13:16:58 +00:00
|
|
|
}
|
|
|
|
|
2007-01-11 02:42:34 +00:00
|
|
|
/**
|
|
|
|
* Compare the contents of two GByteArrays
|
|
|
|
*
|
|
|
|
* @param ba1 A byte array
|
|
|
|
* @param ba2 A byte array
|
|
|
|
* @return If both arrays are non-NULL and their lengths are equal and
|
|
|
|
* their contents are equal, returns TRUE. Otherwise, returns
|
|
|
|
* FALSE.
|
|
|
|
*
|
|
|
|
* XXX - Should this be in strutil.c?
|
|
|
|
*/
|
|
|
|
gboolean
|
2014-09-04 01:39:04 +00:00
|
|
|
byte_array_equal(GByteArray *ba1, GByteArray *ba2)
|
|
|
|
{
|
2007-01-11 02:42:34 +00:00
|
|
|
if (!ba1 || !ba2)
|
2012-12-22 23:27:40 +00:00
|
|
|
return FALSE;
|
2007-01-11 02:42:34 +00:00
|
|
|
|
|
|
|
if (ba1->len != ba2->len)
|
2012-12-22 23:27:40 +00:00
|
|
|
return FALSE;
|
2007-01-11 02:42:34 +00:00
|
|
|
|
|
|
|
if (memcmp(ba1->data, ba2->data, ba1->len) != 0)
|
2012-12-22 23:27:40 +00:00
|
|
|
return FALSE;
|
2007-01-11 02:42:34 +00:00
|
|
|
|
|
|
|
return TRUE;
|
|
|
|
}
|
|
|
|
|
2003-12-29 04:07:06 +00:00
|
|
|
|
2004-05-01 20:46:24 +00:00
|
|
|
/* Return a XML escaped representation of the unescaped string.
|
|
|
|
* The returned string must be freed when no longer in use. */
|
|
|
|
gchar *
|
|
|
|
xml_escape(const gchar *unescaped)
|
|
|
|
{
|
2012-12-22 23:27:40 +00:00
|
|
|
GString *buffer = g_string_sized_new(128);
|
|
|
|
const gchar *p;
|
|
|
|
gchar c;
|
|
|
|
|
|
|
|
p = unescaped;
|
|
|
|
while ( (c = *p++) ) {
|
|
|
|
switch (c) {
|
|
|
|
case '<':
|
|
|
|
g_string_append(buffer, "<");
|
|
|
|
break;
|
|
|
|
case '>':
|
|
|
|
g_string_append(buffer, ">");
|
|
|
|
break;
|
|
|
|
case '&':
|
|
|
|
g_string_append(buffer, "&");
|
|
|
|
break;
|
|
|
|
case '\'':
|
2014-10-06 23:02:16 +00:00
|
|
|
g_string_append(buffer, "'");
|
2012-12-22 23:27:40 +00:00
|
|
|
break;
|
|
|
|
case '"':
|
|
|
|
g_string_append(buffer, """);
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
g_string_append_c(buffer, c);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
/* Return the string value contained within the GString
|
|
|
|
* after getting rid of the GString structure.
|
|
|
|
* This is the way to do this, see the GLib reference. */
|
|
|
|
return g_string_free(buffer, FALSE);
|
2004-05-01 20:46:24 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2003-08-27 15:23:11 +00:00
|
|
|
/* Return the first occurrence of needle in haystack.
|
|
|
|
* If not found, return NULL.
|
|
|
|
* If either haystack or needle has 0 length, return NULL.
|
2013-11-16 09:26:08 +00:00
|
|
|
* Algorithm copied from GNU's glibc 2.3.2 memmem() under LGPL 2.1+ */
|
2003-08-27 15:23:11 +00:00
|
|
|
const guint8 *
|
|
|
|
epan_memmem(const guint8 *haystack, guint haystack_len,
|
2012-12-22 23:27:40 +00:00
|
|
|
const guint8 *needle, guint needle_len)
|
2003-08-27 15:23:11 +00:00
|
|
|
{
|
2012-12-22 23:27:40 +00:00
|
|
|
const guint8 *begin;
|
|
|
|
const guint8 *const last_possible = haystack + haystack_len - needle_len;
|
|
|
|
|
|
|
|
if (needle_len == 0) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (needle_len > haystack_len) {
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (begin = haystack ; begin <= last_possible; ++begin) {
|
|
|
|
if (begin[0] == needle[0] &&
|
|
|
|
!memcmp(&begin[1], needle + 1,
|
|
|
|
needle_len - 1)) {
|
|
|
|
return begin;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return NULL;
|
2003-08-27 15:23:11 +00:00
|
|
|
}
|
2004-08-13 02:39:49 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Scan the search string to make sure it's valid hex. Return the
|
|
|
|
* number of bytes in nbytes.
|
|
|
|
*/
|
|
|
|
guint8 *
|
|
|
|
convert_string_to_hex(const char *string, size_t *nbytes)
|
|
|
|
{
|
2012-12-22 23:27:40 +00:00
|
|
|
size_t n_bytes;
|
|
|
|
const char *p;
|
2014-10-17 23:10:53 +00:00
|
|
|
gchar c;
|
2012-12-22 23:27:40 +00:00
|
|
|
guint8 *bytes, *q, byte_val;
|
|
|
|
|
|
|
|
n_bytes = 0;
|
|
|
|
p = &string[0];
|
|
|
|
for (;;) {
|
|
|
|
c = *p++;
|
|
|
|
if (c == '\0')
|
|
|
|
break;
|
2014-10-17 23:10:53 +00:00
|
|
|
if (g_ascii_isspace(c))
|
2012-12-22 23:27:40 +00:00
|
|
|
continue; /* allow white space */
|
|
|
|
if (c==':' || c=='.' || c=='-')
|
|
|
|
continue; /* skip any ':', '.', or '-' between bytes */
|
2014-10-17 23:10:53 +00:00
|
|
|
if (!g_ascii_isxdigit(c)) {
|
2012-12-22 23:27:40 +00:00
|
|
|
/* Not a valid hex digit - fail */
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* We can only match bytes, not nibbles; we must have a valid
|
|
|
|
* hex digit immediately after that hex digit.
|
|
|
|
*/
|
|
|
|
c = *p++;
|
2014-10-17 23:10:53 +00:00
|
|
|
if (!g_ascii_isxdigit(c))
|
2012-12-22 23:27:40 +00:00
|
|
|
return NULL;
|
|
|
|
|
|
|
|
/* 2 hex digits = 1 byte */
|
|
|
|
n_bytes++;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Were we given any hex digits?
|
|
|
|
*/
|
|
|
|
if (n_bytes == 0) {
|
|
|
|
/* No. */
|
|
|
|
return NULL;
|
2004-08-13 02:39:49 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2012-12-22 23:27:40 +00:00
|
|
|
* OK, it's valid, and it generates "n_bytes" bytes; generate the
|
|
|
|
* raw byte array.
|
2004-08-13 02:39:49 +00:00
|
|
|
*/
|
2013-03-18 21:16:23 +00:00
|
|
|
bytes = (guint8 *)g_malloc(n_bytes);
|
2012-12-22 23:27:40 +00:00
|
|
|
p = &string[0];
|
|
|
|
q = &bytes[0];
|
|
|
|
for (;;) {
|
|
|
|
c = *p++;
|
|
|
|
if (c == '\0')
|
|
|
|
break;
|
2014-10-17 23:10:53 +00:00
|
|
|
if (g_ascii_isspace(c))
|
2012-12-22 23:27:40 +00:00
|
|
|
continue; /* allow white space */
|
|
|
|
if (c==':' || c=='.' || c=='-')
|
|
|
|
continue; /* skip any ':', '.', or '-' between bytes */
|
|
|
|
/* From the loop above, we know this is a hex digit */
|
2014-01-08 00:28:13 +00:00
|
|
|
byte_val = ws_xton(c);
|
2012-12-22 23:27:40 +00:00
|
|
|
byte_val <<= 4;
|
|
|
|
|
|
|
|
/* We also know this is a hex digit */
|
|
|
|
c = *p++;
|
2014-01-08 00:28:13 +00:00
|
|
|
byte_val |= ws_xton(c);
|
2012-12-22 23:27:40 +00:00
|
|
|
|
|
|
|
*q++ = byte_val;
|
|
|
|
}
|
|
|
|
*nbytes = n_bytes;
|
|
|
|
return bytes;
|
2004-08-13 02:39:49 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Copy if if it's a case-sensitive search; uppercase it if it's
|
|
|
|
* a case-insensitive search.
|
|
|
|
*/
|
|
|
|
char *
|
|
|
|
convert_string_case(const char *string, gboolean case_insensitive)
|
|
|
|
{
|
|
|
|
|
2012-12-22 23:27:40 +00:00
|
|
|
if (case_insensitive) {
|
|
|
|
return g_utf8_strup(string, -1);
|
|
|
|
} else {
|
|
|
|
return g_strdup(string);
|
|
|
|
}
|
2004-08-13 02:39:49 +00:00
|
|
|
}
|
|
|
|
|
2016-01-20 01:52:17 +00:00
|
|
|
const char *
|
2007-02-07 13:45:28 +00:00
|
|
|
epan_strcasestr(const char *haystack, const char *needle)
|
|
|
|
{
|
2012-12-22 23:27:40 +00:00
|
|
|
gsize hlen = strlen(haystack);
|
|
|
|
gsize nlen = strlen(needle);
|
|
|
|
|
|
|
|
while (hlen-- >= nlen) {
|
|
|
|
if (!g_ascii_strncasecmp(haystack, needle, nlen))
|
2016-01-20 01:52:17 +00:00
|
|
|
return haystack;
|
2012-12-22 23:27:40 +00:00
|
|
|
haystack++;
|
|
|
|
}
|
|
|
|
return NULL;
|
2007-02-07 13:45:28 +00:00
|
|
|
}
|
2009-02-17 23:15:35 +00:00
|
|
|
|
|
|
|
const char *
|
|
|
|
string_or_null(const char *string)
|
|
|
|
{
|
2012-12-22 23:27:40 +00:00
|
|
|
if (string)
|
|
|
|
return string;
|
|
|
|
return "[NULL]";
|
2009-02-17 23:15:35 +00:00
|
|
|
}
|
2009-08-19 18:37:13 +00:00
|
|
|
|
2010-08-30 15:33:32 +00:00
|
|
|
int
|
2009-08-19 18:37:13 +00:00
|
|
|
escape_string_len(const char *string)
|
|
|
|
{
|
2012-12-22 23:27:40 +00:00
|
|
|
const char *p;
|
|
|
|
gchar c;
|
|
|
|
int repr_len;
|
|
|
|
|
|
|
|
repr_len = 0;
|
|
|
|
for (p = string; (c = *p) != '\0'; p++) {
|
|
|
|
/* Backslashes and double-quotes must
|
|
|
|
* be escaped */
|
|
|
|
if (c == '\\' || c == '"') {
|
|
|
|
repr_len += 2;
|
|
|
|
}
|
|
|
|
/* Values that can't nicely be represented
|
|
|
|
* in ASCII need to be escaped. */
|
2013-12-21 15:01:45 +00:00
|
|
|
else if (!g_ascii_isprint(c)) {
|
2012-12-22 23:27:40 +00:00
|
|
|
/* c --> \xNN */
|
|
|
|
repr_len += 4;
|
|
|
|
}
|
|
|
|
/* Other characters are just passed through. */
|
|
|
|
else {
|
|
|
|
repr_len++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return repr_len + 2; /* string plus leading and trailing quotes */
|
2009-08-19 18:37:13 +00:00
|
|
|
}
|
|
|
|
|
2010-08-30 15:33:32 +00:00
|
|
|
char *
|
2009-08-19 18:37:13 +00:00
|
|
|
escape_string(char *buf, const char *string)
|
|
|
|
{
|
2012-12-22 23:27:40 +00:00
|
|
|
const gchar *p;
|
|
|
|
gchar c;
|
|
|
|
char *bufp;
|
|
|
|
char hexbuf[3];
|
|
|
|
|
|
|
|
bufp = buf;
|
|
|
|
*bufp++ = '"';
|
|
|
|
for (p = string; (c = *p) != '\0'; p++) {
|
|
|
|
/* Backslashes and double-quotes must
|
|
|
|
* be escaped. */
|
|
|
|
if (c == '\\' || c == '"') {
|
|
|
|
*bufp++ = '\\';
|
|
|
|
*bufp++ = c;
|
|
|
|
}
|
|
|
|
/* Values that can't nicely be represented
|
|
|
|
* in ASCII need to be escaped. */
|
2013-12-21 15:01:45 +00:00
|
|
|
else if (!g_ascii_isprint(c)) {
|
2012-12-22 23:27:40 +00:00
|
|
|
/* c --> \xNN */
|
|
|
|
g_snprintf(hexbuf,sizeof(hexbuf), "%02x", (unsigned char) c);
|
|
|
|
*bufp++ = '\\';
|
|
|
|
*bufp++ = 'x';
|
|
|
|
*bufp++ = hexbuf[0];
|
|
|
|
*bufp++ = hexbuf[1];
|
|
|
|
}
|
|
|
|
/* Other characters are just passed through. */
|
|
|
|
else {
|
|
|
|
*bufp++ = c;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
*bufp++ = '"';
|
|
|
|
*bufp = '\0';
|
|
|
|
return buf;
|
2009-08-19 18:37:13 +00:00
|
|
|
}
|
2009-10-23 01:56:09 +00:00
|
|
|
|
|
|
|
#define GN_CHAR_ALPHABET_SIZE 128
|
|
|
|
|
|
|
|
static gunichar IA5_default_alphabet[GN_CHAR_ALPHABET_SIZE] = {
|
|
|
|
|
|
|
|
/*ITU-T recommendation T.50 specifies International Reference Alphabet 5 (IA5) */
|
|
|
|
|
|
|
|
'?', '?', '?', '?', '?', '?', '?', '?',
|
|
|
|
'?', '?', '?', '?', '?', '?', '?', '?',
|
|
|
|
'?', '?', '?', '?', '?', '?', '?', '?',
|
|
|
|
'?', '?', '?', '?', '?', '?', '?', '?',
|
|
|
|
' ', '!', '\"','#', '$', '%', '&', '\'',
|
|
|
|
'(', ')', '*', '+', ',', '-', '.', '/',
|
|
|
|
'0', '1', '2', '3', '4', '5', '6', '7',
|
|
|
|
'8', '9', ':', ';', '<', '=', '>', '?',
|
|
|
|
'@', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
|
|
|
|
'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O',
|
|
|
|
'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W',
|
|
|
|
'X', 'Y', 'Z', '[', '\\', ']', '^', '_',
|
|
|
|
'`', 'a', 'b', 'c', 'd', 'e', 'f', 'g',
|
|
|
|
'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o',
|
|
|
|
'p', 'q', 'r', 's', 't', 'u', 'v', 'w',
|
|
|
|
'x', 'y', 'z', '{', '|', '}', '~', '?'
|
|
|
|
};
|
|
|
|
|
2010-08-30 15:33:32 +00:00
|
|
|
static gunichar
|
2009-10-23 01:56:09 +00:00
|
|
|
char_def_ia5_alphabet_decode(unsigned char value)
|
|
|
|
{
|
2012-12-22 23:27:40 +00:00
|
|
|
if (value < GN_CHAR_ALPHABET_SIZE) {
|
|
|
|
return IA5_default_alphabet[value];
|
2009-10-23 01:56:09 +00:00
|
|
|
}
|
2012-12-22 23:27:40 +00:00
|
|
|
else {
|
|
|
|
return '?';
|
2009-10-23 01:56:09 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
IA5_7BIT_decode(unsigned char * dest, const unsigned char* src, int len)
|
|
|
|
{
|
|
|
|
int i, j;
|
|
|
|
gunichar buf;
|
|
|
|
|
2012-12-22 23:27:40 +00:00
|
|
|
for (i = 0, j = 0; j < len; j++) {
|
|
|
|
buf = char_def_ia5_alphabet_decode(src[j]);
|
|
|
|
i += g_unichar_to_utf8(buf,&(dest[i]));
|
2009-10-23 01:56:09 +00:00
|
|
|
}
|
|
|
|
dest[i]=0;
|
|
|
|
}
|
|
|
|
|
2010-01-26 18:21:17 +00:00
|
|
|
/*
|
2011-04-13 16:56:24 +00:00
|
|
|
* This function takes a string and copies it, inserting a 'chr' before
|
|
|
|
* every 'chr' in it.
|
2010-01-26 18:21:17 +00:00
|
|
|
*/
|
|
|
|
gchar*
|
2011-04-13 16:56:24 +00:00
|
|
|
ws_strdup_escape_char (const gchar *str, const gchar chr)
|
2010-01-26 18:21:17 +00:00
|
|
|
{
|
2012-12-22 23:27:40 +00:00
|
|
|
const gchar *p;
|
|
|
|
gchar *q, *new_str;
|
2010-01-26 18:21:17 +00:00
|
|
|
|
2012-12-22 23:27:40 +00:00
|
|
|
if(!str)
|
|
|
|
return NULL;
|
2010-01-26 18:21:17 +00:00
|
|
|
|
2012-12-22 23:27:40 +00:00
|
|
|
p = str;
|
|
|
|
/* Worst case: A string that is full of 'chr' */
|
2013-03-18 21:16:23 +00:00
|
|
|
q = new_str = (gchar *)g_malloc (strlen(str) * 2 + 1);
|
2010-01-26 18:21:17 +00:00
|
|
|
|
2012-12-22 23:27:40 +00:00
|
|
|
while(*p != 0) {
|
|
|
|
if(*p == chr)
|
|
|
|
*q++ = chr;
|
2010-01-26 18:21:17 +00:00
|
|
|
|
2012-12-22 23:27:40 +00:00
|
|
|
*q++ = *p++;
|
|
|
|
}
|
|
|
|
*q = '\0';
|
2010-01-26 18:21:17 +00:00
|
|
|
|
2012-12-22 23:27:40 +00:00
|
|
|
return new_str;
|
2010-01-26 18:21:17 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* This function takes a string and copies it, removing any occurences of double
|
2011-04-13 16:56:24 +00:00
|
|
|
* 'chr' with a single 'chr'.
|
2010-01-26 18:21:17 +00:00
|
|
|
*/
|
|
|
|
gchar*
|
2011-04-13 16:56:24 +00:00
|
|
|
ws_strdup_unescape_char (const gchar *str, const char chr)
|
2010-01-26 18:21:17 +00:00
|
|
|
{
|
2012-12-22 23:27:40 +00:00
|
|
|
const gchar *p;
|
|
|
|
gchar *q, *new_str;
|
|
|
|
|
|
|
|
if(!str)
|
|
|
|
return NULL;
|
|
|
|
|
|
|
|
p = str;
|
|
|
|
/* Worst case: A string that contains no 'chr' */
|
2013-03-18 21:16:23 +00:00
|
|
|
q = new_str = (gchar *)g_malloc (strlen(str) + 1);
|
2012-12-22 23:27:40 +00:00
|
|
|
|
|
|
|
while(*p != 0) {
|
|
|
|
*q++ = *p;
|
|
|
|
if ((*p == chr) && (*(p+1) == chr))
|
|
|
|
p += 2;
|
|
|
|
else
|
|
|
|
p++;
|
|
|
|
}
|
|
|
|
*q = '\0';
|
|
|
|
|
|
|
|
return new_str;
|
2010-01-26 18:21:17 +00:00
|
|
|
}
|
2010-05-27 15:51:25 +00:00
|
|
|
|
|
|
|
/* Create a newly-allocated string with replacement values. */
|
2014-09-04 01:39:04 +00:00
|
|
|
gchar *
|
|
|
|
string_replace(const gchar* str, const gchar *old_val, const gchar *new_val)
|
|
|
|
{
|
2012-12-22 23:27:40 +00:00
|
|
|
gchar **str_parts;
|
|
|
|
gchar *new_str;
|
2010-05-27 15:51:25 +00:00
|
|
|
|
2012-12-22 23:27:40 +00:00
|
|
|
if (!str || !old_val) {
|
|
|
|
return NULL;
|
|
|
|
}
|
2010-08-30 15:33:32 +00:00
|
|
|
|
2012-12-22 23:27:40 +00:00
|
|
|
str_parts = g_strsplit(str, old_val, 0);
|
|
|
|
new_str = g_strjoinv(new_val, str_parts);
|
|
|
|
g_strfreev(str_parts);
|
2010-08-30 15:33:32 +00:00
|
|
|
|
2012-12-22 23:27:40 +00:00
|
|
|
return new_str;
|
2010-05-27 15:51:25 +00:00
|
|
|
}
|
2012-10-22 12:20:36 +00:00
|
|
|
|
2018-12-28 15:55:58 +00:00
|
|
|
gchar*
|
|
|
|
format_size_wmem(wmem_allocator_t *allocator, gint64 size, format_size_flags_e flags)
|
|
|
|
{
|
|
|
|
gchar *str = format_size(size, flags);
|
|
|
|
gchar *ptr = wmem_strdup(allocator, str);
|
|
|
|
g_free(str);
|
|
|
|
return ptr;
|
|
|
|
}
|
|
|
|
|
2012-12-22 23:27:40 +00:00
|
|
|
/*
|
2019-07-26 18:43:17 +00:00
|
|
|
* Editor modelines - https://www.wireshark.org/tools/modelines.html
|
2012-12-22 23:27:40 +00:00
|
|
|
*
|
|
|
|
* Local variables:
|
|
|
|
* c-basic-offset: 4
|
|
|
|
* tab-width: 8
|
|
|
|
* indent-tabs-mode: nil
|
|
|
|
* End:
|
|
|
|
*
|
|
|
|
* vi: set shiftwidth=4 tabstop=8 expandtab:
|
|
|
|
* :indentSize=4:tabSize=8:noTabs=true:
|
|
|
|
*/
|