wireshark/epan/strutil.c

/* strutil.c
 * String utility routines
 *
 * Wireshark - Network traffic analyzer
 * By Gerald Combs <gerald@wireshark.org>
 * Copyright 1998 Gerald Combs
 *
 * SPDX-License-Identifier: GPL-2.0-or-later
 */

#include "config.h"

#include <stdlib.h>
#include <string.h>
#include <glib.h>
#include "strutil.h"

#include <wsutil/str_util.h>
#include <epan/proto.h>

#ifdef _WIN32
#include <windows.h>
#include <tchar.h>
#include <wchar.h>
#endif

static const char hex[16] = { '0', '1', '2', '3', '4', '5', '6', '7',
                              '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };

/*
 * Given a pointer into a data buffer, and to the end of the buffer,
 * find the end of the (putative) line at that position in the data
 * buffer.
 * Return a pointer to the EOL character(s) in "*eol".
 */
const guchar *
find_line_end(const guchar *data, const guchar *dataend, const guchar **eol)
{
    const guchar *lineend;

    lineend = (guchar *)memchr(data, '\n', dataend - data);
    if (lineend == NULL) {
        /*
         * No LF - line is probably continued in next TCP segment.
         */
        lineend = dataend;
        *eol = dataend;
    } else {
        /*
         * Is the LF at the beginning of the line?
         */
        if (lineend > data) {
            /*
             * No - is it preceded by a carriage return?
             * (Perhaps it's supposed to be, but that's not guaranteed....)
             */
            if (*(lineend - 1) == '\r') {
                /*
                 * Yes.  The EOL starts with the CR.
                 */
                *eol = lineend - 1;
            } else {
                /*
                 * No.  The EOL starts with the LF.
                 */
                *eol = lineend;

                /*
                 * I seem to remember that we once saw lines ending with LF-CR
                 * in an HTTP request or response, so check if it's *followed*
                 * by a carriage return.
                 */
                if (lineend < (dataend - 1) && *(lineend + 1) == '\r') {
                    /*
                     * It's <non-LF><LF><CR>; say it ends with the CR.
                     */
                    lineend++;
                }
            }
        } else {
            /*
             * Yes - the EOL starts with the LF.
             */
            *eol = lineend;
        }

        /*
         * Point to the character after the last character.
         */
        lineend++;
    }
    return lineend;
}

/*
 * Get the length of the next token in a line, and the beginning of the
 * next token after that (if any).
 * Return 0 if there is no next token.
 */
int
get_token_len(const guchar *linep, const guchar *lineend,
        const guchar **next_token)
{
    const guchar *tokenp;
    int token_len;

    tokenp = linep;

    /*
     * Search for a blank, a CR or an LF, or the end of the buffer.
     */
    while (linep < lineend && *linep != ' ' && *linep != '\r' && *linep != '\n')
        linep++;
    token_len = (int) (linep - tokenp);

    /*
     * Skip trailing blanks.
     */
    while (linep < lineend && *linep == ' ')
        linep++;

    *next_token = linep;

    return token_len;
}


#define    INITIAL_FMTBUF_SIZE    128

/*
 * Given a string, generate a string from it that shows non-printable
 * characters as C-style escapes, and return a pointer to it.
 */
gchar *
format_text(wmem_allocator_t* allocator, const guchar *string, size_t len)
{
    gchar *fmtbuf = (gchar*)wmem_alloc(allocator, INITIAL_FMTBUF_SIZE);
    int fmtbuf_len = INITIAL_FMTBUF_SIZE;
    int column;
    const guchar *stringend = string + len;
    guchar c;
    int i;

    column = 0;
    while (string < stringend) {
        /*
         * Is there enough room for this character, if it expands to
         * a backslash plus 3 octal digits (which is the most it can
         * expand to), and also enough room for a terminating '\0'?
         */
        if (column+3+1 >= fmtbuf_len) {
            /*
             * Double the buffer's size if it's not big enough.
             * The size of the buffer starts at 128, so doubling its size
             * adds at least another 128 bytes, which is more than enough
             * for one more character plus a terminating '\0'.
             */
            fmtbuf_len *= 2;
            fmtbuf = (gchar *)wmem_realloc(allocator, fmtbuf, fmtbuf_len);
        }
        c = *string++;

        if (g_ascii_isprint(c)) {
            fmtbuf[column] = c;
            column++;
        } else {
            fmtbuf[column] =  '\\';
            column++;
            switch (c) {

                case '\a':
                    fmtbuf[column] = 'a';
                    column++;
                    break;

                case '\b':
                    fmtbuf[column] = 'b'; /* BS */
                    column++;
                    break;

                case '\f':
                    fmtbuf[column] = 'f'; /* FF */
                    column++;
                    break;

                case '\n':
                    fmtbuf[column] = 'n'; /* NL */
                    column++;
                    break;

                case '\r':
                    fmtbuf[column] = 'r'; /* CR */
                    column++;
                    break;

                case '\t':
                    fmtbuf[column] = 't'; /* tab */
                    column++;
                    break;

                case '\v':
                    fmtbuf[column] = 'v';
                    column++;
                    break;

                default:
                    i = (c>>6)&03;
                    fmtbuf[column] = i + '0';
                    column++;
                    i = (c>>3)&07;
                    fmtbuf[column] = i + '0';
                    column++;
                    i = (c>>0)&07;
                    fmtbuf[column] = i + '0';
                    column++;
                    break;
            }
        }
    }
    fmtbuf[column] = '\0';
    return fmtbuf;
}


/*
 * Given a string, generate a string from it that shows non-printable
 * characters as C-style escapes except a whitespace character
 * (space, tab, carriage return, new line, vertical tab, or formfeed)
 * which will be replaced by a space, and return a pointer to it.
 */
gchar *
format_text_wsp(wmem_allocator_t* allocator, const guchar *string, size_t len)
{
    gchar *fmtbuf = (gchar*)wmem_alloc(allocator, INITIAL_FMTBUF_SIZE);
    int fmtbuf_len = INITIAL_FMTBUF_SIZE;
    int column;
    const guchar *stringend = string + len;
    guchar c;
    int i;

    column = 0;
    while (string < stringend) {
        /*
         * Is there enough room for this character, if it expands to
         * a backslash plus 3 octal digits (which is the most it can
         * expand to), and also enough room for a terminating '\0'?
         */
        if (column+3+1 >= fmtbuf_len) {
            /*
             * Double the buffer's size if it's not big enough.
             * The size of the buffer starts at 128, so doubling its size
             * adds at least another 128 bytes, which is more than enough
             * for one more character plus a terminating '\0'.
             */
            fmtbuf_len *= 2;
            fmtbuf = (gchar *)wmem_realloc(allocator, fmtbuf, fmtbuf_len);
        }
        c = *string++;

        if (g_ascii_isprint(c)) {
            fmtbuf[column] = c;
            column++;
        } else if (g_ascii_isspace(c)) {
            fmtbuf[column] = ' ';
            column++;
        } else {
            fmtbuf[column] =  '\\';
            column++;
            switch (c) {

                case '\a':
                    fmtbuf[column] = 'a';
                    column++;
                    break;

                case '\b':
                    fmtbuf[column] = 'b'; /* BS */
                    column++;
                    break;

                case '\f':
                    fmtbuf[column] = 'f'; /* FF */
                    column++;
                    break;

                case '\n':
                    fmtbuf[column] = 'n'; /* NL */
                    column++;
                    break;

                case '\r':
                    fmtbuf[column] = 'r'; /* CR */
                    column++;
                    break;

                case '\t':
                    fmtbuf[column] = 't'; /* tab */
                    column++;
                    break;

                case '\v':
                    fmtbuf[column] = 'v';
                    column++;
                    break;

                default:
                    i = (c>>6)&03;
                    fmtbuf[column] = i + '0';
                    column++;
                    i = (c>>3)&07;
                    fmtbuf[column] = i + '0';
                    column++;
                    i = (c>>0)&07;
                    fmtbuf[column] = i + '0';
                    column++;
                    break;
            }
        }
    }
    fmtbuf[column] = '\0';
    return fmtbuf;
}

/*
 * Given a string, generate a string from it that shows non-printable
 * characters as the chr parameter passed, except a whitespace character
 * (space, tab, carriage return, new line, vertical tab, or formfeed)
 * which will be replaced by a space, and return a pointer to it.
 */
gchar *
format_text_chr(wmem_allocator_t* allocator, const guchar *string, const size_t len, const guchar chr)
{
    gchar *fmtbuf = (gchar*)wmem_alloc(allocator, INITIAL_FMTBUF_SIZE);
    int fmtbuf_len = INITIAL_FMTBUF_SIZE;
    int column;
    const guchar *stringend = string + len;
    guchar c;

    column = 0;
    while (string < stringend)
    {
        /*
         * Is there enough room for this character,
         * and also enough room for a terminating '\0'?
         */
        if (column+1 >= fmtbuf_len)
        {
            /*
             * Double the buffer's size if it's not big enough.
             * The size of the buffer starts at 128, so doubling its size
             * adds at least another 128 bytes, which is more than enough
             * for one more character plus a terminating '\0'.
             */
            fmtbuf_len *= 2;
            fmtbuf = (gchar *)wmem_realloc(allocator, fmtbuf, fmtbuf_len);
        }
        c = *string++;

        if (g_ascii_isprint(c))
        {
            fmtbuf[column] = c;
            column++;
        }
        else if (g_ascii_isspace(c))
        {
            fmtbuf[column] = ' ';
            column++;
        }
        else
        {
            fmtbuf[column] =  chr;
            column++;
        }
    }
    fmtbuf[column] = '\0';
    return fmtbuf;
}

static gboolean
is_byte_sep(guint8 c)
{
    return (c == '-' || c == ':' || c == '.');
}

/* Turn a string of hex digits with optional separators (defined by
 * is_byte_sep() into a byte array.
 */
gboolean
hex_str_to_bytes(const char *hex_str, GByteArray *bytes, gboolean force_separators)
{
    guint8        val;
    const gchar    *p, *q, *r, *s, *punct;
    char        four_digits_first_half[3];
    char        four_digits_second_half[3];
    char        two_digits[3];
    char        one_digit[2];

    if (! hex_str || ! bytes) {
        return FALSE;
    }
    g_byte_array_set_size(bytes, 0);
    p = hex_str;
    while (*p) {
        q = p+1;
        r = p+2;
        s = p+3;

        if (*q && *r && *s
                && g_ascii_isxdigit(*p) && g_ascii_isxdigit(*q) &&
                g_ascii_isxdigit(*r) && g_ascii_isxdigit(*s)) {
            four_digits_first_half[0] = *p;
            four_digits_first_half[1] = *q;
            four_digits_first_half[2] = '\0';
            four_digits_second_half[0] = *r;
            four_digits_second_half[1] = *s;
            four_digits_second_half[2] = '\0';

            /*
             * Four or more hex digits in a row.
             */
            val = (guint8) strtoul(four_digits_first_half, NULL, 16);
            g_byte_array_append(bytes, &val, 1);
            val = (guint8) strtoul(four_digits_second_half, NULL, 16);
            g_byte_array_append(bytes, &val, 1);

            punct = s + 1;
            if (*punct) {
                /*
                 * Make sure the character after
                 * the forth hex digit is a byte
                 * separator, i.e. that we don't have
                 * more than four hex digits, or a
                 * bogus character.
                 */
                if (is_byte_sep(*punct)) {
                    p = punct + 1;
                    continue;
                }
                else if (force_separators) {
                    return FALSE;
                }
            }
            p = punct;
            continue;
        }
        else if (*q && g_ascii_isxdigit(*p) && g_ascii_isxdigit(*q)) {
            two_digits[0] = *p;
            two_digits[1] = *q;
            two_digits[2] = '\0';

            /*
             * Two hex digits in a row.
             */
            val = (guint8) strtoul(two_digits, NULL, 16);
            g_byte_array_append(bytes, &val, 1);
            punct = q + 1;
            if (*punct) {
                /*
                 * Make sure the character after
                 * the second hex digit is a byte
                 * separator, i.e. that we don't have
                 * more than two hex digits, or a
                 * bogus character.
                 */
                if (is_byte_sep(*punct)) {
                    p = punct + 1;
                    continue;
                }
                else if (force_separators) {
                    return FALSE;
                }
            }
            p = punct;
            continue;
        }
        else if (*q && g_ascii_isxdigit(*p) && is_byte_sep(*q)) {
            one_digit[0] = *p;
            one_digit[1] = '\0';

            /*
             * Only one hex digit (not at the end of the string)
             */
            val = (guint8) strtoul(one_digit, NULL, 16);
            g_byte_array_append(bytes, &val, 1);
            p = q + 1;
            continue;
        }
        else if (!*q && g_ascii_isxdigit(*p)) {
            one_digit[0] = *p;
            one_digit[1] = '\0';

            /*
             * Only one hex digit (at the end of the string)
             */
            val = (guint8) strtoul(one_digit, NULL, 16);
            g_byte_array_append(bytes, &val, 1);
            p = q;
            continue;
        }
        else {
            return FALSE;
        }
    }
    return TRUE;
}

static inline gchar
get_valid_byte_sep(gchar c, const guint encoding)
{
    gchar retval = -1; /* -1 means failure */

    switch (c) {
        case ':':
            if (encoding & ENC_SEP_COLON)
                retval = c;
            break;
        case '-':
            if (encoding & ENC_SEP_DASH)
                retval = c;
            break;
        case '.':
            if (encoding & ENC_SEP_DOT)
                retval = c;
            break;
        case ' ':
            if (encoding & ENC_SEP_SPACE)
                retval = c;
            break;
        case '\0':
            /* we were given the end of the string, so it's fine */
            retval = 0;
            break;
        default:
            if (g_ascii_isxdigit(c) && (encoding & ENC_SEP_NONE))
                retval = 0;
            /* anything else means we've got a failure */
            break;
    }

    return retval;
}

/* Turn a string of hex digits with optional separators (defined by is_byte_sep())
 * into a byte array. Unlike hex_str_to_bytes(), this will read as many hex-char
 * pairs as possible and not error if it hits a non-hex-char; instead it just ends
 * there. (i.e., like strtol()/atoi()/etc.) Unless fail_if_partial is TRUE.
 *
 * The **endptr, if not NULL, is set to the char after the last hex character.
 */
gboolean
hex_str_to_bytes_encoding(const gchar *hex_str, GByteArray *bytes, const gchar **endptr,
                          const guint encoding, const gboolean fail_if_partial)
{
    gint8 c, d;
    guint8 val;
    const gchar *end = hex_str;
    gboolean retval = FALSE;
    gchar sep = -1;

    /* a map from ASCII hex chars to their value */
    static const gint8 str_to_nibble[256] = {
        -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
        -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
        -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
         0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1,-1,-1,-1,-1,
        -1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1,
        -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
        -1,10,11,12,13,14,15,-1,-1,-1,-1,-1,-1,-1,-1,-1,
        -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
        -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
        -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
        -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
        -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
        -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
        -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
        -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,
        -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1
    };

    /* we must see two hex chars at the beginning, or fail */
    if (bytes && *end && g_ascii_isxdigit(*end) && g_ascii_isxdigit(*(end+1))) {
        retval = TRUE;

        /* set the separator character we'll allow; if this returns a -1, it means something's
         * invalid after the hex, but we'll let the while-loop grab the first hex-pair anyway
         */
        sep = get_valid_byte_sep(*(end+2), encoding);

        while (*end) {
            c = str_to_nibble[(guchar)*end];
            if (c < 0) {
                if (fail_if_partial) retval = FALSE;
                break;
            }
            ++end;

            d = str_to_nibble[(guchar)*end];
            if (d < 0) {
                if (fail_if_partial) retval = FALSE;
                break;
            }
            val = ((guint8)c * 16) + d;
            g_byte_array_append(bytes, &val, 1);
            ++end;

            /* check for separator and peek at next char to make sure we should keep going */
            if (sep > 0 && *end == sep && str_to_nibble[(guchar)*(end+1)] > -1) {
                /* yes, it's the right sep and followed by more hex, so skip the sep */
                ++end;
            } else if (sep != 0 && *end) {
                /* we either need a separator, but we don't see one; or the get_valid_byte_sep()
                   earlier didn't find a valid one to begin with */
                if (fail_if_partial) retval = FALSE;
                break;
            }
            /* otherwise, either no separator allowed, or *end is null, or *end is an invalid
             * sep, or *end is a valid sep but after it is not a hex char - in all those
             * cases, just loop back up and let it fail later naturally.
             */
        }
    }

    if (!retval) {
        if (bytes) g_byte_array_set_size(bytes, 0);
        end = hex_str;
    }

    if (endptr) *endptr = end;

    return retval;
}

/*
 * Turn an RFC 3986 percent-encoded string into a byte array.
 * XXX - We don't check for reserved characters.
 */
#define HEX_DIGIT_BUF_LEN 3
gboolean
uri_str_to_bytes(const char *uri_str, GByteArray *bytes)
{
    guint8        val;
    const gchar    *p;
    gchar         hex_digit[HEX_DIGIT_BUF_LEN];

    g_byte_array_set_size(bytes, 0);
    if (! uri_str) {
        return FALSE;
    }

    p = uri_str;

    while (*p) {
        if (!g_ascii_isprint(*p))
            return FALSE;
        if (*p == '%') {
            p++;
            if (*p == '\0') return FALSE;
            hex_digit[0] = *p;
            p++;
            if (*p == '\0') return FALSE;
            hex_digit[1] = *p;
            hex_digit[2] = '\0';
            if (! g_ascii_isxdigit(hex_digit[0]) || ! g_ascii_isxdigit(hex_digit[1]))
                return FALSE;
            val = (guint8) strtoul(hex_digit, NULL, 16);
            g_byte_array_append(bytes, &val, 1);
        } else {
            g_byte_array_append(bytes, (const guint8 *) p, 1);
        }
        p++;

    }
    return TRUE;
}

/*
 * Given a GByteArray, generate a string from it that shows non-printable
 * characters as percent-style escapes, and return a pointer to it.
 */
gchar *
format_uri(wmem_allocator_t* allocator, const GByteArray *bytes, const gchar *reserved_chars)
{
    gchar *fmtbuf = (gchar*)wmem_alloc(allocator, INITIAL_FMTBUF_SIZE);
    guint fmtbuf_len = INITIAL_FMTBUF_SIZE;
    static const guchar *reserved_def = ":/?#[]@!$&'()*+,;= ";
    const guchar *reserved = reserved_def;
    guint8 c;
    guint column, i;
    gboolean is_reserved = FALSE;

    if (! bytes)
        return "";

    if (reserved_chars)
        reserved = reserved_chars;

    for (column = 0; column < bytes->len; column++) {
        /*
         * Is there enough room for this character, if it expands to
         * a percent plus 2 hex digits (which is the most it can
         * expand to), and also enough room for a terminating '\0'?
         */
        if (column+2+1 >= fmtbuf_len) {
            /*
             * Double the buffer's size if it's not big enough.
             * The size of the buffer starts at 128, so doubling its size
             * adds at least another 128 bytes, which is more than enough
             * for one more character plus a terminating '\0'.
             */
            fmtbuf_len *= 2;
            fmtbuf = (gchar *)wmem_realloc(allocator, fmtbuf, fmtbuf_len);
        }
        c = bytes->data[column];

        if (!g_ascii_isprint(c) || c == '%') {
            is_reserved = TRUE;
        }

        for (i = 0; reserved[i]; i++) {
            if (c == reserved[i])
                is_reserved = TRUE;
        }

        if (!is_reserved) {
            fmtbuf[column] = c;
        } else {
            fmtbuf[column] = '%';
            column++;
            fmtbuf[column] = hex[c >> 4];
            column++;
            fmtbuf[column] = hex[c & 0xF];
        }
    }
    fmtbuf[column] = '\0';
    return fmtbuf;
}

/**
 * Create a copy of a GByteArray
 *
 * @param ba The byte array to be copied.
 * @return If ba exists, a freshly allocated copy.  NULL otherwise.
 *
 */
GByteArray *
byte_array_dup(const GByteArray *ba)
{
    GByteArray *new_ba;

    if (!ba)
        return NULL;

    new_ba = g_byte_array_new();
    g_byte_array_append(new_ba, ba->data, ba->len);
    return new_ba;
}

#define SUBID_BUF_LEN 5
gboolean
oid_str_to_bytes(const char *oid_str, GByteArray *bytes)
{
    return rel_oid_str_to_bytes(oid_str, bytes, TRUE);
}
gboolean
rel_oid_str_to_bytes(const char *oid_str, GByteArray *bytes, gboolean is_absolute)
{
    guint32 subid0, subid, sicnt, i;
    const char *p, *dot;
    guint8 buf[SUBID_BUF_LEN];

    g_byte_array_set_size(bytes, 0);

    /* check syntax */
    p = oid_str;
    dot = NULL;
    while (*p) {
        if (!g_ascii_isdigit(*p) && (*p != '.')) return FALSE;
        if (*p == '.') {
            if (p == oid_str && is_absolute) return FALSE;
            if (!*(p+1)) return FALSE;
            if ((p-1) == dot) return FALSE;
            dot = p;
        }
        p++;
    }
    if (!dot) return FALSE;

    p = oid_str;
    sicnt = is_absolute ? 0 : 2;
    if (!is_absolute) p++;
    subid0 = 0;    /* squelch GCC complaints */
    while (*p) {
        subid = 0;
        while (g_ascii_isdigit(*p)) {
            subid *= 10;
            subid += *p - '0';
            p++;
        }
        if (sicnt == 0) {
            subid0 = subid;
            if (subid0 > 2) return FALSE;
        } else if (sicnt == 1) {
            if ((subid0 < 2) && (subid > 39)) return FALSE;
            subid += 40 * subid0;
        }
        if (sicnt) {
            i = SUBID_BUF_LEN;
            do {
                i--;
                buf[i] = 0x80 | (subid % 0x80);
                subid >>= 7;
            } while (subid && i);
            buf[SUBID_BUF_LEN-1] &= 0x7F;
            g_byte_array_append(bytes, buf + i, SUBID_BUF_LEN - i);
        }
        sicnt++;
        if (*p) p++;
    }

    return TRUE;
}

/**
 * Compare the contents of two GByteArrays
 *
 * @param ba1 A byte array
 * @param ba2 A byte array
 * @return If both arrays are non-NULL and their lengths are equal and
 *         their contents are equal, returns TRUE.  Otherwise, returns
 *         FALSE.
 *
 * XXX - Should this be in strutil.c?
 */
gboolean
byte_array_equal(GByteArray *ba1, GByteArray *ba2)
{
    if (!ba1 || !ba2)
        return FALSE;

    if (ba1->len != ba2->len)
        return FALSE;

    if (memcmp(ba1->data, ba2->data, ba1->len) != 0)
        return FALSE;

    return TRUE;
}


/* Return a XML escaped representation of the unescaped string.
 * The returned string must be freed when no longer in use. */
gchar *
xml_escape(const gchar *unescaped)
{
    GString *buffer = g_string_sized_new(128);
    const gchar *p;
    gchar c;

    p = unescaped;
    while ( (c = *p++) ) {
        switch (c) {
            case '<':
                g_string_append(buffer, "&lt;");
                break;
            case '>':
                g_string_append(buffer, "&gt;");
                break;
            case '&':
                g_string_append(buffer, "&amp;");
                break;
            case '\'':
                g_string_append(buffer, "&#x27;");
                break;
            case '"':
                g_string_append(buffer, "&quot;");
                break;
            default:
                g_string_append_c(buffer, c);
                break;
        }
    }
    /* Return the string value contained within the GString
     * after getting rid of the GString structure.
     * This is the way to do this, see the GLib reference. */
    return g_string_free(buffer, FALSE);
}


/* Return the first occurrence of needle in haystack.
 * If not found, return NULL.
 * If either haystack or needle has 0 length, return NULL.
 * Algorithm copied from GNU's glibc 2.3.2 memmem() under LGPL 2.1+ */
const guint8 *
epan_memmem(const guint8 *haystack, guint haystack_len,
        const guint8 *needle, guint needle_len)
{
    const guint8 *begin;
    const guint8 *const last_possible = haystack + haystack_len - needle_len;

    if (needle_len == 0) {
        return NULL;
    }

    if (needle_len > haystack_len) {
        return NULL;
    }

    for (begin = haystack ; begin <= last_possible; ++begin) {
        if (begin[0] == needle[0] &&
                !memcmp(&begin[1], needle + 1,
                    needle_len - 1)) {
            return begin;
        }
    }

    return NULL;
}

/*
 * Scan the search string to make sure it's valid hex.  Return the
 * number of bytes in nbytes.
 */
guint8 *
convert_string_to_hex(const char *string, size_t *nbytes)
{
    size_t n_bytes;
    const char *p;
    gchar c;
    guint8 *bytes, *q, byte_val;

    n_bytes = 0;
    p = &string[0];
    for (;;) {
        c = *p++;
        if (c == '\0')
            break;
        if (g_ascii_isspace(c))
            continue;    /* allow white space */
        if (c==':' || c=='.' || c=='-')
            continue; /* skip any ':', '.', or '-' between bytes */
        if (!g_ascii_isxdigit(c)) {
            /* Not a valid hex digit - fail */
            return NULL;
        }

        /*
         * We can only match bytes, not nibbles; we must have a valid
         * hex digit immediately after that hex digit.
         */
        c = *p++;
        if (!g_ascii_isxdigit(c))
            return NULL;

        /* 2 hex digits = 1 byte */
        n_bytes++;
    }

    /*
     * Were we given any hex digits?
     */
    if (n_bytes == 0) {
        /* No. */
        return NULL;
    }

    /*
     * OK, it's valid, and it generates "n_bytes" bytes; generate the
     * raw byte array.
     */
    bytes = (guint8 *)g_malloc(n_bytes);
    p = &string[0];
    q = &bytes[0];
    for (;;) {
        c = *p++;
        if (c == '\0')
            break;
        if (g_ascii_isspace(c))
            continue;    /* allow white space */
        if (c==':' || c=='.' || c=='-')
            continue; /* skip any ':', '.', or '-' between bytes */
        /* From the loop above, we know this is a hex digit */
        byte_val = ws_xton(c);
        byte_val <<= 4;

        /* We also know this is a hex digit */
        c = *p++;
        byte_val |= ws_xton(c);

        *q++ = byte_val;
    }
    *nbytes = n_bytes;
    return bytes;
}

/*
 * Copy if if it's a case-sensitive search; uppercase it if it's
 * a case-insensitive search.
 */
char *
convert_string_case(const char *string, gboolean case_insensitive)
{

    if (case_insensitive) {
        return g_utf8_strup(string, -1);
    } else {
        return g_strdup(string);
    }
}

const char *
epan_strcasestr(const char *haystack, const char *needle)
{
    gsize hlen = strlen(haystack);
    gsize nlen = strlen(needle);

    while (hlen-- >= nlen) {
        if (!g_ascii_strncasecmp(haystack, needle, nlen))
            return haystack;
        haystack++;
    }
    return NULL;
}

const char *
string_or_null(const char *string)
{
    if (string)
        return string;
    return "[NULL]";
}

int
escape_string_len(const char *string)
{
    const char *p;
    gchar c;
    int repr_len;

    repr_len = 0;
    for (p = string; (c = *p) != '\0'; p++) {
        /* Backslashes and double-quotes must
         * be escaped */
        if (c == '\\' || c == '"') {
            repr_len += 2;
        }
        /* Values that can't nicely be represented
         * in ASCII need to be escaped. */
        else if (!g_ascii_isprint(c)) {
            /* c --> \xNN */
            repr_len += 4;
        }
        /* Other characters are just passed through. */
        else {
            repr_len++;
        }
    }
    return repr_len + 2;    /* string plus leading and trailing quotes */
}

char *
escape_string(char *buf, const char *string)
{
    const gchar *p;
    gchar c;
    char *bufp;
    char hexbuf[3];

    bufp = buf;
    *bufp++ = '"';
    for (p = string; (c = *p) != '\0'; p++) {
        /* Backslashes and double-quotes must
         * be escaped. */
        if (c == '\\' || c == '"') {
            *bufp++ = '\\';
            *bufp++ = c;
        }
        /* Values that can't nicely be represented
         * in ASCII need to be escaped. */
        else if (!g_ascii_isprint(c)) {
            /* c --> \xNN */
            g_snprintf(hexbuf,sizeof(hexbuf), "%02x", (unsigned char) c);
            *bufp++ = '\\';
            *bufp++ = 'x';
            *bufp++ = hexbuf[0];
            *bufp++ = hexbuf[1];
        }
        /* Other characters are just passed through. */
        else {
            *bufp++ = c;
        }
    }
    *bufp++ = '"';
    *bufp = '\0';
    return buf;
}

#define GN_CHAR_ALPHABET_SIZE 128

static gunichar IA5_default_alphabet[GN_CHAR_ALPHABET_SIZE] = {

    /*ITU-T recommendation T.50 specifies International Reference Alphabet 5 (IA5) */

    '?', '?', '?', '?', '?', '?', '?', '?',
    '?', '?', '?', '?', '?', '?', '?', '?',
    '?', '?', '?', '?', '?', '?', '?', '?',
    '?', '?', '?', '?', '?', '?', '?', '?',
    ' ', '!', '\"','#', '$', '%', '&', '\'',
    '(', ')', '*', '+', ',', '-', '.', '/',
    '0', '1', '2', '3', '4', '5', '6', '7',
    '8', '9', ':', ';', '<', '=', '>', '?',
    '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G',
    'H',  'I',  'J',  'K',  'L',  'M',  'N',  'O',
    'P',  'Q',  'R',  'S',  'T',  'U',  'V',  'W',
    'X',  'Y',  'Z',  '[',  '\\',  ']',  '^',  '_',
    '`', 'a',  'b',  'c',  'd',  'e',  'f',  'g',
    'h',  'i',  'j',  'k',  'l',  'm',  'n',  'o',
    'p',  'q',  'r',  's',  't',  'u',  'v',  'w',
    'x',  'y',  'z',  '{',  '|',  '}',  '~',  '?'
};

static gunichar
char_def_ia5_alphabet_decode(unsigned char value)
{
    if (value < GN_CHAR_ALPHABET_SIZE) {
        return IA5_default_alphabet[value];
    }
    else {
        return '?';
    }
}

void
IA5_7BIT_decode(unsigned char * dest, const unsigned char* src, int len)
{
    int i, j;
    gunichar buf;

    for (i = 0, j = 0; j < len;  j++) {
        buf = char_def_ia5_alphabet_decode(src[j]);
        i += g_unichar_to_utf8(buf,&(dest[i]));
    }
    dest[i]=0;
    return;
}

/*
 * This function takes a string and copies it, inserting a 'chr' before
 * every 'chr' in it.
 */
gchar*
ws_strdup_escape_char (const gchar *str, const gchar chr)
{
    const gchar *p;
    gchar *q, *new_str;

    if(!str)
        return NULL;

    p = str;
    /* Worst case: A string that is full of 'chr' */
    q = new_str = (gchar *)g_malloc (strlen(str) * 2 + 1);

    while(*p != 0) {
        if(*p == chr)
            *q++ = chr;

        *q++ = *p++;
    }
    *q = '\0';

    return new_str;
}

/*
 * This function takes a string and copies it, removing any occurences of double
 * 'chr' with a single 'chr'.
 */
gchar*
ws_strdup_unescape_char (const gchar *str, const char chr)
{
    const gchar *p;
    gchar *q, *new_str;

    if(!str)
        return NULL;

    p = str;
    /* Worst case: A string that contains no 'chr' */
    q = new_str = (gchar *)g_malloc (strlen(str) + 1);

    while(*p != 0) {
        *q++ = *p;
        if ((*p == chr) && (*(p+1) == chr))
            p += 2;
        else
            p++;
    }
    *q = '\0';

    return new_str;
}

/* Create a newly-allocated string with replacement values. */
gchar *
string_replace(const gchar* str, const gchar *old_val, const gchar *new_val)
{
    gchar **str_parts;
    gchar *new_str;

    if (!str || !old_val) {
        return NULL;
    }

    str_parts = g_strsplit(str, old_val, 0);
    new_str = g_strjoinv(new_val, str_parts);
    g_strfreev(str_parts);

    return new_str;
}

/*
 * Editor modelines  -  http://www.wireshark.org/tools/modelines.html
 *
 * Local variables:
 * c-basic-offset: 4
 * tab-width: 8
 * indent-tabs-mode: nil
 * End:
 *
 * vi: set shiftwidth=4 tabstop=8 expandtab:
 * :indentSize=4:tabSize=8:noTabs=true:
 */