Internal PCRE field type for efficient RE parsing in dfilters.

svn path=/trunk/; revision=9224
This commit is contained in:
Olivier Biot 2003-12-09 23:02:40 +00:00
parent 4b890b83fd
commit 0f18533b75
6 changed files with 332 additions and 48 deletions

View File

@ -1,5 +1,5 @@
/*
* $Id: semcheck.c,v 1.20 2003/12/06 16:35:19 gram Exp $
* $Id: semcheck.c,v 1.21 2003/12/09 23:02:40 obiot Exp $
*
* Ethereal - Network traffic analyzer
* By Gerald Combs <gerald@ethereal.com>
@ -34,6 +34,16 @@
#include <epan/exceptions.h>
#include <epan/packet.h>
/* Usage: DebugLog(("Error: string=%s\n", str)); */
#ifdef DEBUG_dfilter
#define DebugLog(x) \
printf("%s:%u: ", __FILE__, __LINE__); \
printf x; \
fflush(stdout)
#else
#define DebugLog(x) ;
#endif
static void
semcheck(stnode_t *st_node);
@ -101,6 +111,7 @@ compatible_ftypes(ftenum_t a, ftenum_t b)
return FALSE;
}
case FT_PCRE:
case FT_NUM_TYPES:
g_assert_not_reached();
}
@ -121,7 +132,6 @@ mk_uint32_fvalue(guint32 val)
return fv;
}
/* Try to make an fvalue from a string using a value_string or true_false_string.
* This works only for ftypes that are integers. Returns the created fvalue_t*
* or NULL if impossible. */
@ -151,6 +161,7 @@ mk_fvalue_from_val_string(header_field_info *hfinfo, char *s)
case FT_UINT_STRING:
case FT_UINT64:
case FT_INT64:
case FT_PCRE:
return FALSE;
case FT_BOOLEAN:
@ -212,7 +223,6 @@ mk_fvalue_from_val_string(header_field_info *hfinfo, char *s)
return FALSE;
}
static gboolean
is_bytes_type(enum ftenum type)
{
@ -246,6 +256,7 @@ is_bytes_type(enum ftenum type)
case FT_INT24:
case FT_INT32:
case FT_INT64:
case FT_PCRE:
return FALSE;
case FT_NUM_TYPES:
@ -284,7 +295,6 @@ check_relation_LHS_FIELD(const char *relation_string, FtypeCanFunc can_func,
THROW(TypeError);
}
if (type2 == STTYPE_FIELD) {
hfinfo2 = stnode_data(st_arg2);
ftype2 = hfinfo2->type;
@ -304,14 +314,19 @@ check_relation_LHS_FIELD(const char *relation_string, FtypeCanFunc can_func,
}
else if (type2 == STTYPE_STRING) {
s = stnode_data(st_arg2);
fvalue = fvalue_from_string(ftype1, s, dfilter_fail);
if (!fvalue) {
/* check value_string */
fvalue = mk_fvalue_from_val_string(hfinfo1, s);
if (strcmp(relation_string, "matches") == 0) {
/* Convert to a FT_PCRE */
fvalue = fvalue_from_string(FT_PCRE, s, dfilter_fail);
} else {
fvalue = fvalue_from_string(ftype1, s, dfilter_fail);
if (!fvalue) {
THROW(TypeError);
/* check value_string */
fvalue = mk_fvalue_from_val_string(hfinfo1, s);
}
}
if (!fvalue) {
THROW(TypeError);
}
new_st = stnode_new(STTYPE_FVALUE, fvalue);
sttype_test_set2_args(st_node, st_arg1, new_st);
@ -319,14 +334,19 @@ check_relation_LHS_FIELD(const char *relation_string, FtypeCanFunc can_func,
}
else if (type2 == STTYPE_UNPARSED) {
s = stnode_data(st_arg2);
fvalue = fvalue_from_unparsed(ftype1, s, allow_partial_value, dfilter_fail);
if (!fvalue) {
/* check value_string */
fvalue = mk_fvalue_from_val_string(hfinfo1, s);
if (strcmp(relation_string, "matches") == 0) {
/* Convert to a FT_PCRE */
fvalue = fvalue_from_unparsed(FT_PCRE, s, FALSE, dfilter_fail);
} else {
fvalue = fvalue_from_unparsed(ftype1, s, allow_partial_value, dfilter_fail);
if (!fvalue) {
THROW(TypeError);
/* check value_string */
fvalue = mk_fvalue_from_val_string(hfinfo1, s);
}
}
if (!fvalue) {
THROW(TypeError);
}
new_st = stnode_new(STTYPE_FVALUE, fvalue);
sttype_test_set2_args(st_node, st_arg1, new_st);
@ -618,6 +638,11 @@ check_relation(const char *relation_string, gboolean allow_partial_value,
FtypeCanFunc can_func, stnode_t *st_node,
stnode_t *st_arg1, stnode_t *st_arg2)
{
#ifdef DEBUG_dfilter
static guint i = 0;
#endif
DebugLog((" 4 check_relation(\"%s\") [%u]\n", relation_string, i++));
switch (stnode_type_id(st_arg1)) {
case STTYPE_FIELD:
check_relation_LHS_FIELD(relation_string, can_func,
@ -651,6 +676,11 @@ check_test(stnode_t *st_node)
{
test_op_t st_op;
stnode_t *st_arg1, *st_arg2;
#ifdef DEBUG_dfilter
static guint i = 0;
#endif
DebugLog((" 3 check_test(stnode_t *st_node = %p) [%u]\n", st_node, i));
sttype_test_get(st_node, &st_op, &st_arg1, &st_arg2);
@ -706,6 +736,7 @@ check_test(stnode_t *st_node)
default:
g_assert_not_reached();
}
DebugLog((" 3 check_test(stnode_t *st_node = %p) [%u] - End\n", st_node, i++));
}
@ -713,6 +744,10 @@ check_test(stnode_t *st_node)
static void
semcheck(stnode_t *st_node)
{
#ifdef DEBUG_dfilter
static guint i = 0;
#endif
DebugLog((" 2 semcheck(stnode_t *st_node = %p) [%u]\n", st_node, i++));
/* The parser assures that the top-most syntax-tree
* node will be a TEST node, no matter what. So assert that. */
switch (stnode_type_id(st_node)) {
@ -731,6 +766,11 @@ semcheck(stnode_t *st_node)
gboolean
dfw_semcheck(dfwork_t *dfw)
{
#ifdef DEBUG_dfilter
static guint i = 0;
#endif
DebugLog(("1 dfw_semcheck(dfwork_t *dfw = %p) [%u]\n", dfw, i));
/* Instead of having to check for errors at every stage of
* the semantic-checking, the semantic-checking code will
* throw an exception if a problem is found. */
@ -738,9 +778,13 @@ dfw_semcheck(dfwork_t *dfw)
semcheck(dfw->st_root);
}
CATCH(TypeError) {
DebugLog(("1 dfw_semcheck(dfwork_t *dfw = %p) [%u] - Returns FALSE\n",
dfw, i++));
return FALSE;
}
ENDTRY;
DebugLog(("1 dfw_semcheck(dfwork_t *dfw = %p) [%u] - Returns FALSE\n",
dfw, i++));
return TRUE;
}

View File

@ -1,6 +1,6 @@
# Makefile.am
#
# $Id: Makefile.am,v 1.4 2001/03/05 22:53:40 gram Exp $
# $Id: Makefile.am,v 1.5 2003/12/09 23:02:39 obiot Exp $
#
# Ethereal - Network traffic analyzer
# By Gerald Combs <gerald@zing.org>
@ -42,6 +42,7 @@ libftypes_a_SOURCES = \
ftype-integer.c \
ftype-ipv4.c \
ftype-none.c \
ftype-pcre.c \
ftype-string.c \
ftype-time.c \
ftype-tvbuff.c

235
epan/ftypes/ftype-pcre.c Executable file
View File

@ -0,0 +1,235 @@
/*
* $Id: ftype-pcre.c,v 1.1 2003/12/09 23:02:39 obiot Exp $
*
* Ethereal - Network traffic analyzer
* By Gerald Combs <gerald@ethereal.com>
* Copyright 2001 Gerald Combs
*
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
*/
/* Perl-Compatible Regular Expression (PCRE) internal field type.
* Used with the "matches" dfilter operator, allowing efficient
* compilation and studying of a PCRE pattern in dfilters.
*
* PCRE is provided with libpcre (http://www.pcre.org/).
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <ftypes-int.h>
#ifdef HAVE_LIBPCRE
#include <pcre.h>
/* Create a pcre_tuple_t object based on the given string pattern */
static pcre_tuple_t *
pcre_tuple_new(const char *value)
{
pcre_tuple_t *tuple;
const char *pcre_error_text;
int pcre_error_offset;
tuple = g_malloc(sizeof(pcre_tuple_t));
tuple->string = g_strdup(value); /* The RE as string */
/* Compile the RE */
tuple->re = pcre_compile(
value, /* pattern */
0, /* PCRE options */
&pcre_error_text, /* PCRE constant error string */
&pcre_error_offset, /* Start offset of error in pattern */
NULL /* Default char tables (C locale) */
);
if (pcre_error_text) {
tuple->error = g_strdup_printf("In regular expression \"%s\":\n"
"%s (character position %u)",
(char *)value, pcre_error_text, pcre_error_offset);
return tuple;
} else {
tuple->error = NULL;
}
/* Study the RE */
tuple->ex = pcre_study(tuple->re, 0, &pcre_error_text);
if (pcre_error_text) {
if (tuple->error) {
tuple->error = g_strdup_printf("In regular expression \"%s\":\n"
"%s. %s",
(char *)value, tuple->error, pcre_error_text);
} else {
tuple->error = g_strdup_printf("In regular expression \"%s\":\n"
"%s",
(char *)value, pcre_error_text);
}
}
return tuple;
}
static void
pcre_tuple_free(pcre_tuple_t *tuple)
{
if (tuple) {
if (tuple->string) g_free(tuple->string);
if (tuple->re) g_free(tuple->re);
if (tuple->ex) g_free(tuple->ex);
if (tuple->error) g_free(tuple->error);
g_free(tuple);
}
}
static void
pcre_fvalue_new(fvalue_t *fv)
{
fv->value.re = NULL;
}
static void
pcre_fvalue_free(fvalue_t *fv)
{
if (fv->value.re) {
pcre_tuple_free(fv->value.re);
}
}
/* Generate a FT_PCRE from a parsed string pattern.
* Uses the specified logfunc() to report errors. */
static gboolean
val_from_string(fvalue_t *fv, char *pattern, LogFunc logfunc)
{
/* Free up the old value, if we have one */
pcre_fvalue_free(fv);
fv->value.re = pcre_tuple_new(pattern);
if (fv->value.re->error) {
logfunc(fv->value.re->error);
return FALSE;
}
return TRUE;
}
/* Generate a FT_PCRE from an unparsed string pattern.
* Uses the specified logfunc() to report errors. */
static gboolean
val_from_unparsed(fvalue_t *fv, char *pattern, gboolean allow_partial_value _U_, LogFunc logfunc)
{
/* Free up the old value, if we have one */
pcre_fvalue_free(fv);
g_assert(! allow_partial_value);
fv->value.re = pcre_tuple_new(pattern);
if (fv->value.re->error) {
logfunc(fv->value.re->error);
return FALSE;
}
return TRUE;
}
/* BEHOLD - value contains the string representation of the regular expression,
* and we want to store the compiled PCRE RE object into the value. */
static void
pcre_fvalue_set(fvalue_t *fv, gpointer value, gboolean already_copied)
{
g_assert(value != NULL);
/* Free up the old value, if we have one */
pcre_fvalue_free(fv);
g_assert(! already_copied);
fv->value.re = pcre_tuple_new(value);
}
static gpointer
pcre_fvalue_get(fvalue_t *fv)
{
return fv->value.re;
}
void
ftype_register_pcre(void)
{
static ftype_t pcre_type = {
"FT_PCRE",
"Compiled Perl-Compatible Regular Expression object",
0, /* wire_size */
pcre_fvalue_new, /* new_value */
pcre_fvalue_free, /* free_value */
val_from_unparsed, /* val_from_unparsed */
val_from_string, /* val_from_string */
NULL, /* val_to_string_repr */
NULL, /* len_string_repr */
pcre_fvalue_set, /* set_value */
NULL, /* set_value_integer */
NULL, /* set_value_floating */
pcre_fvalue_get, /* get_value */
NULL, /* get_value_integer */
NULL, /* get_value_floating */
NULL, /* cmp_eq */
NULL, /* cmp_ne */
NULL, /* cmp_gt */
NULL, /* cmp_ge */
NULL, /* cmp_lt */
NULL, /* cmp_le */
NULL, /* cmp_contains */
NULL, /* cmp_matches */
NULL, /* len */
NULL, /* slice */
};
ftype_register(FT_PCRE, &pcre_type);
}
#else /* HAVE_LIBPCRE */
void
ftype_register_pcre(void)
{
static ftype_t pcre_type = {
"FT_PCRE",
"Compiled Perl-Compatible Regular Expression object",
0, /* wire_size */
NULL, /* new_value */
NULL, /* free_value */
NULL, /* val_from_unparsed */
NULL, /* val_from_string */
NULL, /* val_to_string_repr */
NULL, /* len_string_repr */
NULL, /* set_value */
NULL, /* set_value_integer */
NULL, /* set_value_floating */
NULL, /* get_value */
NULL, /* get_value_integer */
NULL, /* get_value_floating */
NULL, /* cmp_eq */
NULL, /* cmp_ne */
NULL, /* cmp_gt */
NULL, /* cmp_ge */
NULL, /* cmp_lt */
NULL, /* cmp_le */
NULL, /* cmp_contains */
NULL, /* cmp_matches */
NULL, /* len */
NULL, /* slice */
};
ftype_register(FT_PCRE, &pcre_type);
}
#endif /* HAVE_LIBPCRE */

View File

@ -1,5 +1,5 @@
/*
* $Id: ftype-string.c,v 1.16 2003/12/06 16:35:20 gram Exp $
* $Id: ftype-string.c,v 1.17 2003/12/09 23:02:39 obiot Exp $
*
* Ethereal - Network traffic analyzer
* By Gerald Combs <gerald@ethereal.com>
@ -237,46 +237,32 @@ cmp_contains(fvalue_t *fv_a, fvalue_t *fv_b)
static gboolean
cmp_matches(fvalue_t *fv_a, fvalue_t *fv_b)
{
pcre *re;
const char *pcre_error_text;
int pcre_error_offset;
int options = 0;
int rc;
pcre_extra *pe = NULL; /* TODO - pcre_study() */
re = pcre_compile(
fv_b->value.string, /* pattern */
options, /* PCRE options */
&pcre_error_text, /* PCRE constant error string */
&pcre_error_offset, /* Start offset of error in pattern */
NULL /* Default char tables (C locale) */
);
if (re == NULL) {
/* TODO - Do something with pcre_error and pcre_error_offset */
/* fv_b is always a FT_PCRE, otherwise the dfilter semcheck() would have
* warned us. For the same reason (and because we're using g_malloc()),
* fv_b->value.re is not NULL.
*/
if (strcmp(fv_b->ftype->name, "FT_PCRE") != 0) {
return FALSE;
}
/* TODO - Study the RE *if* the compile & study only happens once * /
pe = pcre_study(re, 0, &pcre_error_text);
if (pcre_error != NULL) {
/ * TODO - Do something with pcre_error and pcre_error_offset * /
if (! fv_b->value.re) {
return FALSE;
}
*/
rc = pcre_exec(
re, /* Compiled PCRE */
pe, /* PCRE extra from pcre_study() */
fv_a->value.string, /* The data to check for the pattern */
(int)strlen(fv_a->value.string), /* and its length */
0, /* Start offset within data */
options, /* PCRE options */
NULL, /* We are not interested in the matched string */
0 /* of the pattern; only in success or failure. */
(fv_b->value.re)->re, /* Compiled PCRE */
(fv_b->value.re)->ex, /* PCRE extra from pcre_study() */
fv_a->value.string, /* The data to check for the pattern... */
(int)strlen(fv_a->value.string), /* ... and its length */
0, /* Start offset within data */
options, /* PCRE options */
NULL, /* We are not interested in the matched string */
0 /* of the pattern; only in success or failure. */
);
/* if (pe != NULL)
g_free(pe); */
g_free(re);
if (rc == 0)
if (rc == 0) {
return TRUE;
}
return FALSE;
}
#endif

View File

@ -1,5 +1,5 @@
/*
* $Id: ftypes.c,v 1.19 2003/12/06 16:35:20 gram Exp $
* $Id: ftypes.c,v 1.20 2003/12/09 23:02:39 obiot Exp $
*
* Ethereal - Network traffic analyzer
* By Gerald Combs <gerald@ethereal.com>
@ -46,6 +46,7 @@ void ftype_register_none(void);
void ftype_register_string(void);
void ftype_register_time(void);
void ftype_register_tvbuff(void);
void ftype_register_pcre(void);
/* Initialize the ftype module. */
void
@ -59,6 +60,7 @@ ftypes_initialize(void)
ftype_register_string();
ftype_register_time();
ftype_register_tvbuff();
ftype_register_pcre();
}
/* Each ftype_t is registered via this function */

View File

@ -1,7 +1,7 @@
/* ftypes.h
* Definitions for field types
*
* $Id: ftypes.h,v 1.26 2003/12/06 16:35:20 gram Exp $
* $Id: ftypes.h,v 1.27 2003/12/09 23:02:39 obiot Exp $
*
* Ethereal - Network traffic analyzer
* By Gerald Combs <gerald@ethereal.com>
@ -26,6 +26,10 @@
#ifndef FTYPES_H
#define FTYPES_H
#ifdef HAVE_LIBPCRE
#include <pcre.h>
#endif /* HAVE_LIBPCRE */
#include <glib.h>
#include "../slab.h"
@ -59,6 +63,7 @@ enum ftenum {
FT_IPv6,
FT_IPXNET,
FT_FRAMENUM, /* a UINT32, but if selected lets you go to frame with that numbe */
FT_PCRE, /* a compiled Perl-Compatible Regular Expression object */
FT_NUM_TYPES /* last item number plus one */
};
@ -73,6 +78,14 @@ enum ftrepr {
typedef enum ftrepr ftrepr_t;
#ifdef HAVE_LIBPCRE
typedef struct _pcre_tuple_t {
char *string;
pcre *re;
pcre_extra *ex;
char *error;
} pcre_tuple_t;
#endif /* HAVE_LIBPCRE */
/* Initialize the ftypes subsytem. Called once. */
void
@ -141,6 +154,9 @@ typedef struct _fvalue_t {
ipv4_addr ipv4;
nstime_t time;
tvbuff_t *tvb;
#ifdef HAVE_LIBPCRE
pcre_tuple_t *re;
#endif /* HAVE_LIBPCRE */
} value;
/* The following is provided for private use