an API for "bufferless" parsing of text tvbs
svn path=/trunk/; revision=15726
This commit is contained in:
parent
e0331bbb1c
commit
8a4fd5e3b0
|
@ -0,0 +1,756 @@
|
|||
/* tvbparse.c
|
||||
*
|
||||
* Copyright 2005, Luis E. Garcia Ontanon <luis.ontanon@gmail.com>
|
||||
*
|
||||
* $Id: $
|
||||
*
|
||||
* Ethereal - Network traffic analyzer
|
||||
* By Gerald Combs <gerald@ethereal.com>
|
||||
* Copyright 1998 Gerald Combs
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version 2
|
||||
* of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
*/
|
||||
|
||||
#ifdef HAVE_CONFIG_H
|
||||
#include "config.h"
|
||||
#endif
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <glib.h>
|
||||
|
||||
#include <epan/emem.h>
|
||||
#include <epan/proto.h>
|
||||
#include <epan/tvbparse.h>
|
||||
|
||||
typedef enum _tvbparse_wanted_type_t {
|
||||
TVBPARSE_WANTED_NONE, /* currently unused */
|
||||
|
||||
/* simple tokens */
|
||||
TVBPARSE_WANTED_SIMPLE_CHAR, /* just one matching char */
|
||||
TVBPARSE_WANTED_SIMPLE_CHARS, /* a sequence of matching chars */
|
||||
TVBPARSE_WANTED_SIMPLE_NOT_CHAR, /* one non matching char */
|
||||
TVBPARSE_WANTED_SIMPLE_NOT_CHARS, /* a sequence of non matching chars */
|
||||
TVBPARSE_WANTED_SIMPLE_STRING, /* a string */
|
||||
TVBPARSE_WANTED_SIMPLE_CASESTRING, /* a caseless string */
|
||||
TVBPARSE_WANTED_UNTIL, /* all the characters until the first matching token */
|
||||
|
||||
/* composed tokens */
|
||||
TVBPARSE_WANTED_SET_ONEOF, /* one of the given types */
|
||||
TVBPARSE_WANTED_SET_SEQ, /* an exact sequence of tokens of the given types */
|
||||
TVBPARSE_WANTED_CARDINALITY, /* one or more tokens of the given type */
|
||||
} tvbparse_type_t;
|
||||
|
||||
struct _tvbparse_t {
|
||||
tvbuff_t* tvb;
|
||||
int offset;
|
||||
int max_len;
|
||||
void* data;
|
||||
const tvbparse_wanted_t* ignore;
|
||||
guint depth;
|
||||
};
|
||||
|
||||
struct _tvbparse_wanted_t {
|
||||
int id;
|
||||
tvbparse_type_t type;
|
||||
|
||||
const gchar* ctl;
|
||||
int len;
|
||||
|
||||
guint min;
|
||||
guint max;
|
||||
|
||||
const void* data;
|
||||
tvbparse_action_t before;
|
||||
tvbparse_action_t after;
|
||||
|
||||
GPtrArray* elems;
|
||||
};
|
||||
|
||||
|
||||
tvbparse_wanted_t* tvbparse_char(int id,
|
||||
const gchar* chr,
|
||||
const void* data,
|
||||
tvbparse_action_t before_cb,
|
||||
tvbparse_action_t after_cb) {
|
||||
tvbparse_wanted_t* w = g_malloc(sizeof(tvbparse_wanted_t));
|
||||
|
||||
w->id = id;
|
||||
w->type = TVBPARSE_WANTED_SIMPLE_CHAR;
|
||||
w->ctl = chr;
|
||||
w->len = 1;
|
||||
w->min = 0;
|
||||
w->max = 0;
|
||||
w->data = data;
|
||||
w->before = before_cb;
|
||||
w->after = after_cb;
|
||||
w->elems = g_ptr_array_new();
|
||||
|
||||
return w;
|
||||
}
|
||||
|
||||
tvbparse_wanted_t* tvbparse_chars(int id,
|
||||
guint min_len,
|
||||
guint max_len,
|
||||
const gchar* chr,
|
||||
const void* data,
|
||||
tvbparse_action_t before_cb,
|
||||
tvbparse_action_t after_cb) {
|
||||
tvbparse_wanted_t* w = g_malloc(sizeof(tvbparse_wanted_t));
|
||||
|
||||
w->id = id;
|
||||
w->type = TVBPARSE_WANTED_SIMPLE_CHARS;
|
||||
w->ctl = chr;
|
||||
w->len = 0;
|
||||
w->min = min_len ? min_len : 1;
|
||||
w->max = max_len ? max_len : G_MAXINT;
|
||||
w->data = data;
|
||||
w->before = before_cb;
|
||||
w->after = after_cb;
|
||||
w->elems = g_ptr_array_new();
|
||||
|
||||
return w;
|
||||
}
|
||||
|
||||
tvbparse_wanted_t* tvbparse_not_char(int id,
|
||||
const gchar* chr,
|
||||
const void* data,
|
||||
tvbparse_action_t before_cb,
|
||||
tvbparse_action_t after_cb) {
|
||||
tvbparse_wanted_t* w = g_malloc(sizeof(tvbparse_wanted_t));
|
||||
|
||||
w->id = id;
|
||||
w->type = TVBPARSE_WANTED_SIMPLE_NOT_CHAR;
|
||||
w->ctl = chr;
|
||||
w->len = 0;
|
||||
w->min = 0;
|
||||
w->max = 0;
|
||||
w->data = data;
|
||||
w->before = before_cb;
|
||||
w->after = after_cb;
|
||||
w->elems = g_ptr_array_new();
|
||||
|
||||
return w;
|
||||
}
|
||||
|
||||
tvbparse_wanted_t* tvbparse_not_chars(int id,
|
||||
guint min_len,
|
||||
guint max_len,
|
||||
const gchar* chr,
|
||||
const void* data,
|
||||
tvbparse_action_t before_cb,
|
||||
tvbparse_action_t after_cb){
|
||||
tvbparse_wanted_t* w = g_malloc(sizeof(tvbparse_wanted_t));
|
||||
|
||||
w->id = id;
|
||||
w->type = TVBPARSE_WANTED_SIMPLE_NOT_CHARS;
|
||||
w->ctl = chr;
|
||||
w->len = 0;
|
||||
w->min = min_len ? min_len : 1;
|
||||
w->max = max_len ? max_len : G_MAXINT;
|
||||
w->data = data;
|
||||
w->before = before_cb;
|
||||
w->after = after_cb;
|
||||
w->elems = g_ptr_array_new();
|
||||
|
||||
return w;
|
||||
}
|
||||
|
||||
|
||||
tvbparse_wanted_t* tvbparse_string(int id,
|
||||
const gchar* str,
|
||||
const void* data,
|
||||
tvbparse_action_t before_cb,
|
||||
tvbparse_action_t after_cb) {
|
||||
tvbparse_wanted_t* w = g_malloc(sizeof(tvbparse_wanted_t));
|
||||
|
||||
w->id = id;
|
||||
w->type = TVBPARSE_WANTED_SIMPLE_STRING;
|
||||
w->ctl = str;
|
||||
w->len = strlen(str);
|
||||
w->min = 0;
|
||||
w->max = 0;
|
||||
w->data = data;
|
||||
w->before = before_cb;
|
||||
w->after = after_cb;
|
||||
w->elems = g_ptr_array_new();
|
||||
|
||||
return w;
|
||||
}
|
||||
|
||||
tvbparse_wanted_t* tvbparse_casestring(int id,
|
||||
const gchar* str,
|
||||
const void* data,
|
||||
tvbparse_action_t before_cb,
|
||||
tvbparse_action_t after_cb) {
|
||||
tvbparse_wanted_t* w = g_malloc(sizeof(tvbparse_wanted_t));
|
||||
|
||||
w->id = id;
|
||||
w->type = TVBPARSE_WANTED_SIMPLE_CASESTRING;
|
||||
w->ctl = str;
|
||||
w->len = strlen(str);
|
||||
w->min = 0;
|
||||
w->max = 0;
|
||||
w->data = data;
|
||||
w->before = before_cb;
|
||||
w->after = after_cb;
|
||||
w->elems = g_ptr_array_new();
|
||||
|
||||
return w;
|
||||
}
|
||||
|
||||
|
||||
tvbparse_wanted_t* tvbparse_set_oneof(int id,
|
||||
const void* data,
|
||||
tvbparse_action_t before_cb,
|
||||
tvbparse_action_t after_cb,
|
||||
...) {
|
||||
tvbparse_wanted_t* w = g_malloc(sizeof(tvbparse_wanted_t));
|
||||
tvbparse_t* el;
|
||||
va_list ap;
|
||||
|
||||
w->id = id;
|
||||
w->type = TVBPARSE_WANTED_SET_ONEOF;
|
||||
w->ctl = NULL;
|
||||
w->len = 0;
|
||||
w->min = 0;
|
||||
w->max = 0;
|
||||
w->data = data;
|
||||
w->before = before_cb;
|
||||
w->after = after_cb;
|
||||
w->elems = g_ptr_array_new();
|
||||
|
||||
va_start(ap,after_cb);
|
||||
|
||||
while(( el = va_arg(ap,tvbparse_t*) )) {
|
||||
g_ptr_array_add(w->elems,el);
|
||||
};
|
||||
|
||||
va_end(ap);
|
||||
|
||||
return w;
|
||||
}
|
||||
|
||||
tvbparse_wanted_t* tvbparse_set_seq(int id,
|
||||
const void* data,
|
||||
tvbparse_action_t before_cb,
|
||||
tvbparse_action_t after_cb,
|
||||
...) {
|
||||
tvbparse_wanted_t* w = g_malloc(sizeof(tvbparse_wanted_t));
|
||||
tvbparse_wanted_t* el = NULL;
|
||||
va_list ap;
|
||||
|
||||
w->id = id;
|
||||
w->type = TVBPARSE_WANTED_SET_SEQ;
|
||||
w->ctl = NULL;
|
||||
w->len = 0;
|
||||
w->min = 0;
|
||||
w->max = 0;
|
||||
w->data = data;
|
||||
w->before = before_cb;
|
||||
w->after = after_cb;
|
||||
w->elems = g_ptr_array_new();
|
||||
|
||||
va_start(ap,after_cb);
|
||||
|
||||
while(( el = va_arg(ap,tvbparse_wanted_t*) )) {
|
||||
g_ptr_array_add(w->elems,el);
|
||||
};
|
||||
|
||||
va_end(ap);
|
||||
return w;
|
||||
}
|
||||
|
||||
|
||||
tvbparse_wanted_t* tvbparse_some(int id,
|
||||
guint from,
|
||||
guint to,
|
||||
const void* data,
|
||||
tvbparse_action_t before_cb,
|
||||
tvbparse_action_t after_cb,
|
||||
const tvbparse_wanted_t* el) {
|
||||
|
||||
tvbparse_wanted_t* w = g_malloc(sizeof(tvbparse_wanted_t));
|
||||
|
||||
g_assert(from > 0 && from < to);
|
||||
|
||||
w->id = id;
|
||||
w->type = TVBPARSE_WANTED_CARDINALITY;
|
||||
w->ctl = NULL;
|
||||
w->len = 0;
|
||||
w->min = from;
|
||||
w->max = to;
|
||||
w->data = data;
|
||||
w->before = before_cb;
|
||||
w->after = after_cb;
|
||||
w->elems = g_ptr_array_new();
|
||||
|
||||
g_ptr_array_add(w->elems,(gpointer)el);
|
||||
|
||||
return w;
|
||||
}
|
||||
|
||||
tvbparse_wanted_t* tvbparse_until(int id,
|
||||
const void* data,
|
||||
tvbparse_action_t before_cb,
|
||||
tvbparse_action_t after_cb,
|
||||
const tvbparse_wanted_t* el,
|
||||
gboolean include_term) {
|
||||
tvbparse_wanted_t* w = g_malloc(sizeof(tvbparse_wanted_t));
|
||||
|
||||
w->id = id;
|
||||
w->type = TVBPARSE_WANTED_UNTIL;
|
||||
|
||||
/* XXX this is ugly */
|
||||
w->ctl = include_term ? "include" : "do not include";
|
||||
|
||||
w->len = 0;
|
||||
w->min = 0;
|
||||
w->max = 0;
|
||||
w->data = data;
|
||||
w->before = before_cb;
|
||||
w->after = after_cb;
|
||||
w->elems = g_ptr_array_new();
|
||||
|
||||
g_ptr_array_add(w->elems,(gpointer)el);
|
||||
|
||||
return w;
|
||||
}
|
||||
|
||||
|
||||
tvbparse_wanted_t* tvbparse_quoted(int id,
|
||||
const void* data,
|
||||
tvbparse_action_t before_cb,
|
||||
tvbparse_action_t after_cb,
|
||||
char quote,
|
||||
char esc) {
|
||||
|
||||
gchar* esc_quot = g_strdup_printf("%c%c",esc,quote);
|
||||
gchar* quot = g_strdup_printf("%c",quote);
|
||||
tvbparse_wanted_t* want_quot = tvbparse_char(-1,quot,NULL,NULL,NULL);
|
||||
|
||||
return tvbparse_set_oneof(id, data, before_cb, after_cb,
|
||||
tvbparse_set_seq(-1, NULL, NULL, NULL,
|
||||
want_quot,
|
||||
tvbparse_set_seq(-1,NULL,NULL,NULL,
|
||||
tvbparse_set_oneof(-1, NULL, NULL, NULL,
|
||||
tvbparse_string(-1,esc_quot,NULL,NULL,NULL),
|
||||
tvbparse_not_chars(-1,0,0,quot,NULL,NULL,NULL),
|
||||
NULL),
|
||||
NULL),
|
||||
want_quot,
|
||||
NULL),
|
||||
tvbparse_set_seq(-1, NULL, NULL, NULL,
|
||||
want_quot,
|
||||
want_quot,
|
||||
NULL),
|
||||
NULL);
|
||||
|
||||
}
|
||||
|
||||
void tvbparse_shrink_token_cb(void* tvbparse_data _U_,
|
||||
const void* wanted_data _U_,
|
||||
tvbparse_elem_t* tok) {
|
||||
tok->offset += 1;
|
||||
tok->len -= 2;
|
||||
}
|
||||
|
||||
tvbparse_t* tvbparse_init(tvbuff_t* tvb,
|
||||
int offset,
|
||||
int len,
|
||||
void* data,
|
||||
const tvbparse_wanted_t* ignore) {
|
||||
tvbparse_t* tt = ep_alloc(sizeof(tvbparse_t));
|
||||
|
||||
tt->tvb = tvb;
|
||||
tt->offset = offset;
|
||||
tt->max_len = (len == -1) ? (int) tvb_length(tvb) : len;
|
||||
tt->data = data;
|
||||
tt->ignore = ignore;
|
||||
tt->depth = 0;
|
||||
return tt;
|
||||
}
|
||||
|
||||
gboolean tvbparse_reset(tvbparse_t* tt,
|
||||
int offset,
|
||||
int len) {
|
||||
|
||||
len = (len == -1) ? (int) tvb_length(tt->tvb) : len;
|
||||
|
||||
if( tvb_length_remaining(tt->tvb, offset) >= len) {
|
||||
tt->offset = offset;
|
||||
tt->max_len = len;
|
||||
tt->depth = 0;
|
||||
return TRUE;
|
||||
} else {
|
||||
tt->depth = 0;
|
||||
return FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
static tvbparse_elem_t* new_tok(tvbparse_t* tt,
|
||||
int id,
|
||||
int offset,
|
||||
int len,
|
||||
const tvbparse_wanted_t* wanted) {
|
||||
tvbparse_elem_t* tok = ep_alloc(sizeof(tvbparse_elem_t));
|
||||
|
||||
tok->tvb = tt->tvb;
|
||||
tok->id = id;
|
||||
tok->offset = offset;
|
||||
tok->len = len;
|
||||
tok->data = NULL;
|
||||
tok->sub = NULL;
|
||||
tok->next = NULL;
|
||||
tok->wanted = wanted;
|
||||
tok->last = tok;
|
||||
|
||||
return tok;
|
||||
}
|
||||
|
||||
tvbparse_elem_t* tvbparse_get(tvbparse_t* tt,
|
||||
const tvbparse_wanted_t* wanted) {
|
||||
tvbparse_elem_t* tok = NULL;
|
||||
int save_offset = tt->offset;
|
||||
int save_len = tt->max_len;
|
||||
|
||||
tt->depth++;
|
||||
|
||||
if (tt->ignore && tt->ignore != wanted) {
|
||||
tvbparse_wanted_t* save = (void*)tt->ignore;
|
||||
tt->ignore = NULL;
|
||||
while ( tvbparse_get(tt,save) ) {
|
||||
;
|
||||
}
|
||||
tt->ignore = save;
|
||||
}
|
||||
|
||||
switch(wanted->type) {
|
||||
case TVBPARSE_WANTED_NONE:
|
||||
goto reject;
|
||||
case TVBPARSE_WANTED_SIMPLE_NOT_CHAR:
|
||||
{
|
||||
gchar c, t;
|
||||
guint i;
|
||||
gboolean not_matched = FALSE;
|
||||
|
||||
if (! tt->max_len )
|
||||
goto reject;
|
||||
|
||||
t = (gchar) tvb_get_guint8(tt->tvb,tt->offset);
|
||||
|
||||
for(i = 0; (c = wanted->ctl[i]) && tt->max_len; i++) {
|
||||
if ( c == t ) {
|
||||
not_matched = TRUE;
|
||||
}
|
||||
}
|
||||
|
||||
if (not_matched) {
|
||||
goto reject;
|
||||
} else {
|
||||
tt->offset++;
|
||||
tt->max_len--;
|
||||
tok = new_tok(tt,wanted->id,tt->offset-1,1,wanted);
|
||||
goto accept;
|
||||
}
|
||||
}
|
||||
case TVBPARSE_WANTED_SIMPLE_CHAR:
|
||||
{
|
||||
gchar c,t;
|
||||
guint i;
|
||||
|
||||
if (! tt->max_len )
|
||||
goto reject;
|
||||
|
||||
t = (gchar) tvb_get_guint8(tt->tvb,tt->offset);
|
||||
|
||||
for(i = 0; (c = wanted->ctl[i]) && tt->max_len; i++) {
|
||||
if ( c == t ) {
|
||||
tt->offset++;
|
||||
tt->max_len--;
|
||||
tok = new_tok(tt,wanted->id,tt->offset-1,1,wanted);
|
||||
goto accept;
|
||||
}
|
||||
}
|
||||
goto reject;
|
||||
}
|
||||
case TVBPARSE_WANTED_SIMPLE_NOT_CHARS:
|
||||
{
|
||||
gchar c, t;
|
||||
guint i;
|
||||
guint offset = tt->offset;
|
||||
guint length = 0;
|
||||
|
||||
while( tt->max_len && length < wanted->max) {
|
||||
gboolean not_matched = FALSE;
|
||||
t = (gchar) tvb_get_guint8(tt->tvb,tt->offset);
|
||||
i = 0;
|
||||
|
||||
while ( (c = wanted->ctl[i]) && tt->max_len ) {
|
||||
|
||||
if (c == t) {
|
||||
not_matched = TRUE;
|
||||
}
|
||||
|
||||
i++;
|
||||
}
|
||||
|
||||
if ( not_matched )
|
||||
break;
|
||||
|
||||
length++;
|
||||
tt->offset++;
|
||||
tt->max_len--;
|
||||
};
|
||||
|
||||
if ( length < wanted->min ) {
|
||||
goto reject;
|
||||
} else {
|
||||
tok = new_tok(tt,wanted->id,offset,length,wanted);
|
||||
goto accept;
|
||||
}
|
||||
}
|
||||
case TVBPARSE_WANTED_SIMPLE_CHARS:
|
||||
{
|
||||
gchar c, t;
|
||||
guint i;
|
||||
guint offset = tt->offset;
|
||||
guint length = 0;
|
||||
|
||||
while( tt->max_len && length < wanted->max) {
|
||||
gboolean matched = FALSE;
|
||||
t = (gchar) tvb_get_guint8(tt->tvb,tt->offset);
|
||||
i = 0;
|
||||
|
||||
while ( (c = wanted->ctl[i]) && tt->max_len ) {
|
||||
|
||||
if (c == t) {
|
||||
matched = TRUE;
|
||||
break;
|
||||
}
|
||||
|
||||
i++;
|
||||
}
|
||||
|
||||
if (! matched )
|
||||
break;
|
||||
|
||||
length++;
|
||||
tt->offset++;
|
||||
tt->max_len--;
|
||||
};
|
||||
|
||||
if (length < wanted->min) {
|
||||
goto reject;
|
||||
} else {
|
||||
tok = new_tok(tt,wanted->id,offset,length,wanted);
|
||||
goto accept;
|
||||
}
|
||||
}
|
||||
case TVBPARSE_WANTED_SIMPLE_STRING:
|
||||
{
|
||||
if ( tvb_strneql(tt->tvb, tt->offset, wanted->ctl, wanted->len) == 0 ) {
|
||||
int offset = tt->offset;
|
||||
tt->offset += wanted->len;
|
||||
tt->max_len -= wanted->len;
|
||||
tok = new_tok(tt,wanted->id,offset,wanted->len,wanted);
|
||||
goto accept;
|
||||
} else {
|
||||
goto reject;
|
||||
}
|
||||
}
|
||||
case TVBPARSE_WANTED_SIMPLE_CASESTRING:
|
||||
{
|
||||
if ( tvb_strncaseeql(tt->tvb, tt->offset, wanted->ctl, wanted->len) == 0 ) {
|
||||
int offset = tt->offset;
|
||||
tt->offset += wanted->len;
|
||||
tt->max_len -= wanted->len;
|
||||
tok = new_tok(tt,wanted->id,offset,wanted->len,wanted);
|
||||
goto accept;
|
||||
} else {
|
||||
goto reject;
|
||||
}
|
||||
}
|
||||
case TVBPARSE_WANTED_SET_ONEOF:
|
||||
{
|
||||
guint i;
|
||||
|
||||
for(i=0; i < wanted->elems->len; i++) {
|
||||
tvbparse_wanted_t* w = g_ptr_array_index(wanted->elems,i);
|
||||
tvbparse_elem_t* new = tvbparse_get(tt, w);
|
||||
|
||||
if (new) {
|
||||
tok = new_tok(tt, wanted->id, new->offset, new->len, wanted);
|
||||
tok->sub = new;
|
||||
goto accept;
|
||||
}
|
||||
}
|
||||
goto reject;
|
||||
}
|
||||
case TVBPARSE_WANTED_SET_SEQ:
|
||||
{
|
||||
guint i;
|
||||
|
||||
for(i=0; i < wanted->elems->len; i++) {
|
||||
tvbparse_wanted_t* w = g_ptr_array_index(wanted->elems,i);
|
||||
tvbparse_elem_t* new = tvbparse_get(tt, w);
|
||||
|
||||
if (new) {
|
||||
if (tok) {
|
||||
tok->len = (new->offset - tok->offset) + new->len;
|
||||
tok->sub->last->next = new;
|
||||
tok->sub->last = new;
|
||||
} else {
|
||||
tok = new_tok(tt, wanted->id, new->offset, new->len, wanted);
|
||||
tok->sub = new;
|
||||
}
|
||||
} else {
|
||||
goto reject;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
goto accept;
|
||||
}
|
||||
case TVBPARSE_WANTED_CARDINALITY:
|
||||
{
|
||||
guint got_so_far = 0;
|
||||
tvbparse_wanted_t* w = g_ptr_array_index(wanted->elems,0);
|
||||
|
||||
while (got_so_far < wanted->max) {
|
||||
tvbparse_elem_t* new = tvbparse_get(tt, w);
|
||||
|
||||
if(new) {
|
||||
if (tok) {
|
||||
tok->len = (new->offset - tok->offset) + new->len;
|
||||
tok->sub->last->next = new;
|
||||
tok->sub->last = new;
|
||||
} else {
|
||||
tok = new_tok(tt, wanted->id, new->offset, new->len, wanted);
|
||||
tok->sub = new;
|
||||
}
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
|
||||
got_so_far++;
|
||||
}
|
||||
|
||||
if(got_so_far < wanted->min) {
|
||||
goto reject;
|
||||
}
|
||||
|
||||
goto accept;
|
||||
}
|
||||
case TVBPARSE_WANTED_UNTIL:
|
||||
{
|
||||
int offset = tt->offset;
|
||||
tvbparse_wanted_t* w = g_ptr_array_index(wanted->elems,0);
|
||||
tvbparse_elem_t* new = tvbparse_find(tt, w);
|
||||
|
||||
if (new) {
|
||||
tok = new;
|
||||
|
||||
/* XXX this is ugly */
|
||||
if (*(wanted->ctl) == 'i' ) {
|
||||
tok->len = (tok->offset - offset) + tok->len;
|
||||
} else {
|
||||
tok->len = (tok->offset - offset);
|
||||
|
||||
tt->offset = save_offset + tok->len;
|
||||
tt->max_len = save_len - tok->len;
|
||||
}
|
||||
|
||||
tok->offset = offset;
|
||||
tok->id = wanted->id;
|
||||
tok->next = NULL;
|
||||
tok->last = tok;
|
||||
tok->wanted = wanted;
|
||||
|
||||
goto accept;
|
||||
} else {
|
||||
goto reject;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
DISSECTOR_ASSERT_NOT_REACHED();
|
||||
return NULL;
|
||||
|
||||
accept:
|
||||
if (tok) {
|
||||
if( tt->depth == 1 ) {
|
||||
GPtrArray* stack = g_ptr_array_new();
|
||||
tvbparse_elem_t* curr = tok;
|
||||
|
||||
while (curr) {
|
||||
|
||||
if(curr->wanted->before) {
|
||||
curr->wanted->before(tt->data, curr->wanted->data, curr);
|
||||
}
|
||||
|
||||
if(curr->sub) {
|
||||
g_ptr_array_add(stack,curr);
|
||||
curr = curr->sub;
|
||||
continue;
|
||||
} else {
|
||||
if(curr->wanted->after) curr->wanted->after(tt->data, curr->wanted->data, curr);
|
||||
}
|
||||
|
||||
curr = curr->next;
|
||||
|
||||
while( !curr && stack->len ) {
|
||||
curr = g_ptr_array_remove_index_fast(stack,stack->len - 1);
|
||||
if( curr->wanted->after ) curr->wanted->after(tt->data, curr->wanted->data, curr);
|
||||
curr = curr->next;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
g_ptr_array_free(stack,FALSE);
|
||||
}
|
||||
|
||||
tt->depth--;
|
||||
return tok;
|
||||
}
|
||||
|
||||
reject:
|
||||
tt->offset = save_offset;
|
||||
tt->max_len = save_len;
|
||||
tt->depth--;
|
||||
return NULL;
|
||||
|
||||
}
|
||||
|
||||
|
||||
tvbparse_elem_t* tvbparse_find(tvbparse_t* tt, const tvbparse_wanted_t* wanted) {
|
||||
int save_offset = tt->offset;
|
||||
int save_len = tt->max_len;
|
||||
tvbparse_elem_t* tok = NULL;
|
||||
|
||||
while ( tvb_length_remaining(tt->tvb,tt->offset) >= wanted->len ) {
|
||||
if (( tok = tvbparse_get(tt, wanted) )) {
|
||||
return tok;
|
||||
}
|
||||
tt->offset++;
|
||||
tt->max_len--;
|
||||
}
|
||||
|
||||
tt->offset = save_offset;
|
||||
tt->max_len = save_len;
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
|
@ -0,0 +1,329 @@
|
|||
|
||||
/* tvbparse.h
|
||||
*
|
||||
* an API for text tvb parsers
|
||||
*
|
||||
* Copyright 2005, Luis E. Garcia Ontanon <luis.ontanon@gmail.com>
|
||||
*
|
||||
* $Id: $
|
||||
*
|
||||
* Ethereal - Network traffic analyzer
|
||||
* By Gerald Combs <gerald@ethereal.com>
|
||||
* Copyright 1998 Gerald Combs
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version 2
|
||||
* of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||
*/
|
||||
|
||||
/*
|
||||
The intention behind this is to ease the writing of dissectors that have to
|
||||
parse text without the need of writing into buffers.
|
||||
|
||||
It was originally written to avoid using lex and yacc for the xml dissector.
|
||||
|
||||
the parser is able to look for wanted elements these can be:
|
||||
|
||||
simple tokens:
|
||||
- a char out of a string of needles
|
||||
- a char not belonging to a string of needles
|
||||
- a sequence of chars that belong to a set of chars
|
||||
- a sequence of chars that do not belong to a set of chars
|
||||
- a string
|
||||
- a caseless string
|
||||
- all the characters up to a certain wanted element (included or excluded)
|
||||
|
||||
composed elements:
|
||||
- one of a given group of wanted elements
|
||||
- a sequence of wanted elements
|
||||
- some (at least one) instances of a wanted element
|
||||
|
||||
Once a wanted element is successfully extracted, by either tvbparse_get or
|
||||
tvbparse_find, the parser will invoke a given callback
|
||||
before and another one after every of its component's subelement's callbacks
|
||||
are being called.
|
||||
|
||||
If tvbparse_get or tvbparse_find fail to extract the wanted element the
|
||||
subelements callbacks are not going to be invoked.
|
||||
|
||||
The wanted elements are instantiated once by the proto_register_xxx function.
|
||||
|
||||
The parser is isntantiated for every packet and it mantains its state.
|
||||
|
||||
The element's data is destroyed before the next packet is dissected.
|
||||
*/
|
||||
|
||||
#ifndef _TVB_PARSE_H_
|
||||
#define _TVB_PARSE_H_
|
||||
|
||||
#include <epan/tvbuff.h>
|
||||
#include <glib.h>
|
||||
|
||||
/* a definition of something we want to look for */
|
||||
typedef struct _tvbparse_wanted_t tvbparse_wanted_t;
|
||||
|
||||
/* an instance of a per packet parser */
|
||||
typedef struct _tvbparse_t tvbparse_t;
|
||||
|
||||
/* a matching token returned by either tvbparser_get or tvb_parser_find */
|
||||
typedef struct _tvbparse_elem_t {
|
||||
int id;
|
||||
|
||||
tvbuff_t* tvb;
|
||||
int offset;
|
||||
int len;
|
||||
|
||||
void* data;
|
||||
|
||||
struct _tvbparse_elem_t* sub;
|
||||
|
||||
struct _tvbparse_elem_t* next;
|
||||
struct _tvbparse_elem_t* last;
|
||||
|
||||
const tvbparse_wanted_t* wanted;
|
||||
} tvbparse_elem_t;
|
||||
|
||||
/*
|
||||
* a callback function to be called before or after an element has been successfuly extracted.
|
||||
* Note that if the token belongs to a composed token the callbacks of the components won't be called.
|
||||
*
|
||||
* tvbparse_data: the private data of the parser
|
||||
* wanted_data: the private data of the wanted element
|
||||
* elem: the extracted element
|
||||
*/
|
||||
typedef void (*tvbparse_action_t)(void* tvbparse_data, const void* wanted_data, struct _tvbparse_elem_t* elem);
|
||||
|
||||
|
||||
/*
|
||||
* definition of wanted token types
|
||||
*
|
||||
* the following functions define the tokens we will be able to look for in a tvb
|
||||
* common parameters are:
|
||||
*
|
||||
* id: an arbitrary id that will be copied to the eventual token (don't use 0)
|
||||
* private_data: persistent data to be passed to the callback action (wanted_data)
|
||||
* before_cb: an callback function to be called before those of the subelements
|
||||
* after_cb: an callback function to be called after those of the subelements
|
||||
*/
|
||||
|
||||
|
||||
/*
|
||||
* a char element.
|
||||
*
|
||||
* When looked for it returns a simple element one character long if the char
|
||||
* at the current offset matches one of the the needles.
|
||||
*/
|
||||
tvbparse_wanted_t* tvbparse_char(int id,
|
||||
const gchar* needles,
|
||||
const void* private_data,
|
||||
tvbparse_action_t before_cb,
|
||||
tvbparse_action_t after_cb);
|
||||
|
||||
/*
|
||||
* a not_char element.
|
||||
*
|
||||
* When looked for it returns a simple element one character long if the char
|
||||
* at the current offset does not match one of the the needles.
|
||||
*/
|
||||
tvbparse_wanted_t* tvbparse_not_char(int id,
|
||||
const gchar* needle,
|
||||
const void* private_data,
|
||||
tvbparse_action_t before_cb,
|
||||
tvbparse_action_t after_cb);
|
||||
|
||||
/*
|
||||
* a chars element
|
||||
*
|
||||
* When looked for it returns a simple element one or more characters long if
|
||||
* one or more char(s) starting from the current offset match one of the needles.
|
||||
* An element will be returned if at least min_len chars are given (1 if it's 0)
|
||||
* It will get at most max_len chars or as much as it can if max_len is 0.
|
||||
*/
|
||||
tvbparse_wanted_t* tvbparse_chars(int id,
|
||||
guint min_len,
|
||||
guint max_len,
|
||||
const gchar* needles,
|
||||
const void* private_data,
|
||||
tvbparse_action_t before_cb,
|
||||
tvbparse_action_t after_cb);
|
||||
|
||||
/*
|
||||
* a not_chars element
|
||||
*
|
||||
* When looked for it returns a simple element one or more characters long if
|
||||
* one or more char(s) starting from the current offset do not match one of the
|
||||
* needles.
|
||||
* An element will be returned if at least min_len chars are given (1 if it's 0)
|
||||
* It will get at most max_len chars or as much as it can if max_len is 0.
|
||||
*/
|
||||
tvbparse_wanted_t* tvbparse_not_chars(int id,
|
||||
guint min_len,
|
||||
guint max_len,
|
||||
const gchar* needles,
|
||||
const void* private_data,
|
||||
tvbparse_action_t before_cb,
|
||||
tvbparse_action_t after_cb);
|
||||
|
||||
/*
|
||||
* a string element
|
||||
*
|
||||
* When looked for it returns a simple element if we have the given string at
|
||||
* the current offset
|
||||
*/
|
||||
tvbparse_wanted_t* tvbparse_string(int id,
|
||||
const gchar* string,
|
||||
const void* private_data,
|
||||
tvbparse_action_t before_cb,
|
||||
tvbparse_action_t after_cb);
|
||||
|
||||
/*
|
||||
* casestring
|
||||
*
|
||||
* When looked for it returns a simple element if we have a matching string at
|
||||
* the current offset
|
||||
*/
|
||||
tvbparse_wanted_t* tvbparse_casestring(int id,
|
||||
const gchar* str,
|
||||
const void* data,
|
||||
tvbparse_action_t before_cb,
|
||||
tvbparse_action_t after_cb);
|
||||
|
||||
/*
|
||||
* until
|
||||
*
|
||||
* When looked for it returns a simple element containing all the characters
|
||||
* found until the first match of the ending element if the ending element is
|
||||
* found.
|
||||
*
|
||||
* It won't have a subelement, the ending's callbacks won't get called.
|
||||
*/
|
||||
tvbparse_wanted_t* tvbparse_until(int id,
|
||||
const void* private_data,
|
||||
tvbparse_action_t before_cb,
|
||||
tvbparse_action_t after_cb,
|
||||
const tvbparse_wanted_t* ending,
|
||||
gboolean include_ending);
|
||||
|
||||
|
||||
/*
|
||||
* one_of
|
||||
*
|
||||
* When looked for it will try to match to the given candidates and return a
|
||||
* composed element whose subelement is the first match.
|
||||
*
|
||||
* The list of candidates is terminated with a NULL
|
||||
*
|
||||
*/
|
||||
tvbparse_wanted_t* tvbparse_set_oneof(int id,
|
||||
const void* private_data,
|
||||
tvbparse_action_t before_cb,
|
||||
tvbparse_action_t after_cb,
|
||||
...);
|
||||
|
||||
/*
|
||||
* sequence
|
||||
*
|
||||
* When looked for it will try to match in order all the given candidates. If
|
||||
* every candidate is found in the given order it will return a composed
|
||||
* element whose subelements are the matcheed elemets.
|
||||
*
|
||||
* The list of candidates is terminated with a NULL.
|
||||
*
|
||||
*/
|
||||
tvbparse_wanted_t* tvbparse_set_seq(int id,
|
||||
const void* private_data,
|
||||
tvbparse_action_t before_cb,
|
||||
tvbparse_action_t after_cb,
|
||||
...);
|
||||
/*
|
||||
* some
|
||||
*
|
||||
* When looked for it will try to match the given candidate at least min times
|
||||
* and at most max times. If the given candidate is matched at least min times
|
||||
* a composed element is returned.
|
||||
*
|
||||
*/
|
||||
tvbparse_wanted_t* tvbparse_some(int id,
|
||||
guint min,
|
||||
guint max,
|
||||
const void* private_data,
|
||||
tvbparse_action_t before_cb,
|
||||
tvbparse_action_t after_cb,
|
||||
const tvbparse_wanted_t* wanted);
|
||||
|
||||
#define tvbparse_one_or_more(id, private_data, before_cb, after_cb, wanted)\
|
||||
tvbparse_some(id, 1, G_MAXINT, private_data, before_cb, after_cb, wanted)
|
||||
|
||||
/* quoted
|
||||
* this is a composed candidate, that will try to match a quoted string
|
||||
* (included the quotes) including into it every escaped quote.
|
||||
*
|
||||
* C strings are matched with tvbparse_quoted(-1,NULL,NULL,NULL,"\"","\\")
|
||||
*/
|
||||
tvbparse_wanted_t* tvbparse_quoted(int id,
|
||||
const void* data,
|
||||
tvbparse_action_t before_cb,
|
||||
tvbparse_action_t after_cb,
|
||||
char quote,
|
||||
char escape);
|
||||
|
||||
/*
|
||||
* a helper callback for quoted strings that will shrink the token to contain
|
||||
* only the string andnot the quotes
|
||||
*/
|
||||
void tvbparse_shrink_token_cb(void* tvbparse_data,
|
||||
const void* wanted_data,
|
||||
tvbparse_elem_t* tok);
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/* initialize the parser (at every packet)
|
||||
* tvb: what are we parsing?
|
||||
* offset: from where
|
||||
* len: for how many bytes
|
||||
* private_data: will be passed to the action callbacks
|
||||
* ignore: a wanted token type to be ignored (the associated cb WILL be called when it matches)
|
||||
*/
|
||||
tvbparse_t* tvbparse_init(tvbuff_t* tvb,
|
||||
int offset,
|
||||
int len,
|
||||
void* private_data,
|
||||
const tvbparse_wanted_t* ignore);
|
||||
|
||||
/* reset the parser */
|
||||
gboolean tvbparse_reset(tvbparse_t* tt, int offset, int len);
|
||||
|
||||
/* it will look for the wanted token at the current offset or after any given
|
||||
* number of ignored tokens returning NULL if there's no match.
|
||||
* if there is a match it will set the offset of the current parser after
|
||||
* the end of the token
|
||||
*/
|
||||
tvbparse_elem_t* tvbparse_get(tvbparse_t* tt,
|
||||
const tvbparse_wanted_t* wanted);
|
||||
|
||||
/* it will look for a wanted token even beyond the current offset
|
||||
* AVOID USING IT because:
|
||||
* is TOO slow,
|
||||
* if the wanted type is a composite type and is matched partially even more
|
||||
* times while looking for it the callbacks of the matched subtokens WILL be
|
||||
* called every time
|
||||
*/
|
||||
|
||||
tvbparse_elem_t* tvbparse_find(tvbparse_t* tt,
|
||||
const tvbparse_wanted_t* wanted);
|
||||
|
||||
#endif
|
Loading…
Reference in New Issue