wireshark/epan/strutil.c

/* strutil.c
 * String utility routines
 *
 * $Id: strutil.c,v 1.12 2003/08/27 15:23:02 gram Exp $
 *
 * Ethereal - Network traffic analyzer
 * By Gerald Combs <gerald@ethereal.com>
 * Copyright 1998 Gerald Combs
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 */

#ifdef HAVE_CONFIG_H
# include "config.h"
#endif

#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#include <glib.h>
#include "strutil.h"


/*
 * Given a pointer into a data buffer, and to the end of the buffer,
 * find the end of the (putative) line at that position in the data
 * buffer.
 * Return a pointer to the EOL character(s) in "*eol".
 */
const guchar *
find_line_end(const guchar *data, const guchar *dataend, const guchar **eol)
{
  const guchar *lineend;

  lineend = memchr(data, '\n', dataend - data);
  if (lineend == NULL) {
    /*
     * No LF - line is probably continued in next TCP segment.
     */
    lineend = dataend;
    *eol = dataend;
  } else {
    /*
     * Is the LF at the beginning of the line?
     */
    if (lineend > data) {
      /*
       * No - is it preceded by a carriage return?
       * (Perhaps it's supposed to be, but that's not guaranteed....)
       */
      if (*(lineend - 1) == '\r') {
        /*
	 * Yes.  The EOL starts with the CR.
	 */
        *eol = lineend - 1;
      } else {
        /*
         * No.  The EOL starts with the LF.
         */
        *eol = lineend;

        /*
         * I seem to remember that we once saw lines ending with LF-CR
         * in an HTTP request or response, so check if it's *followed*
         * by a carriage return.
         */
        if (lineend < (dataend - 1) && *(lineend + 1) == '\r') {
          /*
           * It's <non-LF><LF><CR>; say it ends with the CR.
           */
          lineend++;
        }
      }
    } else {
      /*
       * Yes - the EOL starts with the LF.
       */
      *eol = lineend;
    }

    /*
     * Point to the character after the last character.
     */
    lineend++;
  }
  return lineend;
}

/*
 * Get the length of the next token in a line, and the beginning of the
 * next token after that (if any).
 * Return 0 if there is no next token.
 */
int
get_token_len(const guchar *linep, const guchar *lineend,
	      const guchar **next_token)
{
  const guchar *tokenp;
  int token_len;

  tokenp = linep;

  /*
   * Search for a blank, a CR or an LF, or the end of the buffer.
   */
  while (linep < lineend && *linep != ' ' && *linep != '\r' && *linep != '\n')
      linep++;
  token_len = linep - tokenp;

  /*
   * Skip trailing blanks.
   */
  while (linep < lineend && *linep == ' ')
    linep++;

  *next_token = linep;

  return token_len;
}


#define	INITIAL_FMTBUF_SIZE	128

#ifdef _WIN32
/*
 * XXX - "isprint()" can return "true" for non-ASCII characters, but
 * those don't work with GTK+ on Windows, as GTK+ on Windows assumes
 * UTF-8 strings.  Until we fix up Ethereal to properly handle
 * non-ASCII characters in all output (both GUI displays and text
 * printouts) on all platforms including Windows, we work around
 * the problem by escaping all characters that aren't printable ASCII.
 */
#undef isprint
#define isprint(c) (c >= 0x20 && c < 0x7f)
#endif

/*
 * Given a string, generate a string from it that shows non-printable
 * characters as C-style escapes, and return a pointer to it.
 */
gchar *
format_text(const guchar *string, int len)
{
  static gchar *fmtbuf;
  static int fmtbuf_len;
  int column;
  const guchar *stringend = string + len;
  guchar c;
  int i;

  /*
   * Allocate the buffer if it's not already allocated.
   */
  if (fmtbuf == NULL) {
    fmtbuf = g_malloc(INITIAL_FMTBUF_SIZE);
    fmtbuf_len = INITIAL_FMTBUF_SIZE;
  }
  column = 0;
  while (string < stringend) {
    /*
     * Is there enough room for this character, if it expands to
     * a backslash plus 3 octal digits (which is the most it can
     * expand to), and also enough room for a terminating '\0'?
     */
    if (column+3+1 >= fmtbuf_len) {
      /*
       * Double the buffer's size if it's not big enough.
       * The size of the buffer starts at 128, so doubling its size
       * adds at least another 128 bytes, which is more than enough
       * for one more character plus a terminating '\0'.
       */
      fmtbuf_len = fmtbuf_len * 2;
      fmtbuf = g_realloc(fmtbuf, fmtbuf_len);
    }
    c = *string++;

    if (isprint(c)) {
      fmtbuf[column] = c;
      column++;
    } else {
      fmtbuf[column] =  '\\';
      column++;
      switch (c) {

      case '\\':
	fmtbuf[column] = '\\';
	column++;
	break;

      case '\a':
	fmtbuf[column] = 'a';
	column++;
	break;

      case '\b':
	fmtbuf[column] = 'b';
	column++;
	break;

      case '\f':
	fmtbuf[column] = 'f';
	column++;
	break;

      case '\n':
	fmtbuf[column] = 'n';
	column++;
	break;

      case '\r':
	fmtbuf[column] = 'r';
	column++;
	break;

      case '\t':
	fmtbuf[column] = 't';
	column++;
	break;

      case '\v':
	fmtbuf[column] = 'v';
	column++;
	break;

      default:
	i = (c>>6)&03;
	fmtbuf[column] = i + '0';
	column++;
	i = (c>>3)&07;
	fmtbuf[column] = i + '0';
	column++;
	i = (c>>0)&07;
	fmtbuf[column] = i + '0';
	column++;
	break;
      }
    }
  }
  fmtbuf[column] = '\0';
  return fmtbuf;
}

/* Max string length for displaying byte string.  */
#define	MAX_BYTE_STR_LEN	32

/* Turn an array of bytes into a string showing the bytes in hex. */
#define	N_BYTES_TO_STR_STRINGS	6
gchar *
bytes_to_str(const guint8 *bd, int bd_len) {
  return bytes_to_str_punct(bd,bd_len,'\0');
}

/* Turn an array of bytes into a string showing the bytes in hex with
 * punct as a bytes separator.
 */
gchar *
bytes_to_str_punct(const guint8 *bd, int bd_len, gchar punct) {
  static gchar  str[N_BYTES_TO_STR_STRINGS][MAX_BYTE_STR_LEN+3+1];
  static int    cur_idx;
  gchar        *cur;
  gchar        *p;
  int           len;
  static const char hex[16] = { '0', '1', '2', '3', '4', '5', '6', '7',
                                '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };

  cur_idx++;
  if (cur_idx >= N_BYTES_TO_STR_STRINGS)
    cur_idx = 0;
  cur = &str[cur_idx][0];
  p = cur;
  len = MAX_BYTE_STR_LEN;
  while (bd_len > 0 && len > 0) {
    *p++ = hex[(*bd) >> 4];
    *p++ = hex[(*bd) & 0xF];
    len -= 2;
    bd++;
    bd_len--;
    if(punct && bd_len > 0){
      *p++ = punct;
      len--;
    }
  }
  if (bd_len != 0) {
    /* Note that we're not showing the full string.  */
    *p++ = '.';
    *p++ = '.';
    *p++ = '.';
  }
  *p = '\0';
  return cur;
}

/* Return the first occurrence of needle in haystack.
 * If not found, return NULL.
 * If either haystack or needle has 0 length, return NULL.
 * Algorithm copied from GNU's glibc 2.3.2 memcmp() */
const guint8 *
epan_memmem(const guint8 *haystack, guint haystack_len,
		const guint8 *needle, guint needle_len)
{
	const guint8 *begin;
	const guint8 *const last_possible
		= haystack + haystack_len - needle_len;

	if (needle_len == 0) {
		return NULL;
	}

	if (needle_len > haystack_len) {
		return NULL;
	}

	for (begin = haystack ; begin <= last_possible; ++begin) {
		if (begin[0] == needle[0] &&
			!memcmp(&begin[1], needle + 1,
				needle_len - 1)) {
			return begin;
		}
	}

	return NULL;
}