Move regex code to wsutil

This commit is contained in:
João Valverde 2021-11-12 15:55:14 +00:00
parent b9f2e4b7fa
commit 274531820a
17 changed files with 188 additions and 129 deletions

View File

@ -404,6 +404,10 @@ libwsutil.so.0 libwsutil0 #MINVER#
ws_pipe_spawn_async@Base 2.5.1
ws_pipe_spawn_sync@Base 2.5.1
ws_read_string_from_pipe@Base 2.5.0
ws_regex_compile@Base 3.7.0
ws_regex_free@Base 3.7.0
ws_regex_matches@Base 3.7.0
ws_regex_pattern@Base 3.7.0
ws_socket_ptoa@Base 3.1.1
ws_strtoi16@Base 2.3.0
ws_strtoi32@Base 2.3.0

View File

@ -338,7 +338,6 @@ target_link_libraries(epan
${LZ4_LIBRARIES}
${M_LIBRARIES}
${NGHTTP2_LIBRARIES}
${PCRE2_LIBRARIES}
${SMI_LIBRARIES}
${SNAPPY_LIBRARIES}
${WIN_PSAPI_LIBRARY}
@ -370,7 +369,6 @@ target_include_directories(epan
${LUA_INCLUDE_DIRS}
${LZ4_INCLUDE_DIRS}
${NGHTTP2_INCLUDE_DIRS}
${PCRE2_INCLUDE_DIRS}
${SMI_INCLUDE_DIRS}
${ZLIB_INCLUDE_DIRS}
${ZSTD_INCLUDE_DIRS}

View File

@ -38,7 +38,7 @@ dfvm_value_free(dfvm_value_t *v)
drange_free(v->value.drange);
break;
case PCRE:
fvalue_regex_free(v->value.pcre);
ws_regex_free(v->value.pcre);
break;
default:
/* nothing */
@ -112,7 +112,7 @@ dfvm_dump(FILE *f, dfilter_t *df)
case PUT_PCRE:
fprintf(f, "%05d PUT_PCRE \t%s <GRegex> -> reg#%u\n",
id,
fvalue_regex_pattern(arg1->value.pcre),
ws_regex_pattern(arg1->value.pcre),
arg2->value.numeric);
break;
case CHECK_EXISTS:
@ -371,7 +371,7 @@ put_fvalue(dfilter_t *df, fvalue_t *fv, int reg)
/* Put a constant PCRE in a register. These will not be cleared by
* free_register_overhead. */
static gboolean
put_pcre(dfilter_t *df, fvalue_regex_t *pcre, int reg)
put_pcre(dfilter_t *df, ws_regex_t *pcre, int reg)
{
df->registers[reg] = g_list_append(NULL, pcre);
df->owns_memory[reg] = FALSE;

View File

@ -9,6 +9,7 @@
#ifndef DFVM_H
#define DFVM_H
#include <wsutil/regex.h>
#include <epan/proto.h>
#include "dfilter-int.h"
#include "syntax-tree.h"
@ -36,7 +37,7 @@ typedef struct {
drange_t *drange;
header_field_info *hfinfo;
df_func_def_t *funcdef;
fvalue_regex_t *pcre;
ws_regex_t *pcre;
} value;
} dfvm_value_t;

View File

@ -234,7 +234,7 @@ dfw_append_function(dfwork_t *dfw, stnode_t *node, dfvm_value_t **p_jmp)
/* returns register number */
static int
dfw_append_put_pcre(dfwork_t *dfw, fvalue_regex_t *pcre)
dfw_append_put_pcre(dfwork_t *dfw, ws_regex_t *pcre)
{
dfvm_insn_t *insn;
dfvm_value_t *val1, *val2;

View File

@ -1030,7 +1030,7 @@ check_relation_matches(dfwork_t *dfw, stnode_t *st_node,
stnode_t *st_arg1, stnode_t *st_arg2)
{
static guint i = 0;
fvalue_regex_t *pcre;
ws_regex_t *pcre;
char *errmsg = NULL;
const char *patt;
@ -1045,7 +1045,7 @@ check_relation_matches(dfwork_t *dfw, stnode_t *st_node,
patt = stnode_data(st_arg2);
ws_debug("Compile regex pattern: %s", patt);
pcre = fvalue_regex_compile(patt, &errmsg);
pcre = ws_regex_compile(patt, &errmsg);
if (errmsg) {
dfilter_fail(dfw, "%s", errmsg);
g_free(errmsg);

View File

@ -27,7 +27,7 @@ sttype_fvalue_free(gpointer value)
static void
pcre_free(gpointer value)
{
fvalue_regex_t *pcre = value;
ws_regex_t *pcre = value;
/* If the data was not claimed with stnode_steal_data(), free it. */
if (pcre) {
@ -36,7 +36,7 @@ pcre_free(gpointer value)
* count; it'll get freed when the reference count drops
* to 0.
*/
fvalue_regex_free(pcre);
ws_regex_free(pcre);
}
}
@ -67,9 +67,7 @@ field_tostr(const void *data, gboolean pretty _U_)
static char *
pcre_tostr(const void *data, gboolean pretty _U_)
{
const fvalue_regex_t *pcre = data;
return g_strdup(fvalue_regex_pattern(pcre));
return g_strdup(ws_regex_pattern(data));
}
void

View File

@ -50,7 +50,6 @@ target_include_directories(ftypes
${CMAKE_CURRENT_BINARY_DIR}
${CMAKE_CURRENT_SOURCE_DIR}
${CMAKE_SOURCE_DIR}/epan
${PCRE2_INCLUDE_DIR}
)
set_target_properties(ftypes PROPERTIES

View File

@ -535,11 +535,11 @@ cmp_contains(const fvalue_t *fv_a, const fvalue_t *fv_b)
}
static gboolean
cmp_matches(const fvalue_t *fv, const fvalue_regex_t *regex)
cmp_matches(const fvalue_t *fv, const ws_regex_t *regex)
{
GByteArray *a = fv->value.bytes;
return fvalue_regex_matches(regex, a->data, a->len);
return ws_regex_matches(regex, a->data, a->len);
}
void

View File

@ -239,7 +239,7 @@ cmp_contains(const fvalue_t *fv_a, const fvalue_t *fv_b)
}
static gboolean
cmp_matches(const fvalue_t *fv, const fvalue_regex_t *regex)
cmp_matches(const fvalue_t *fv, const ws_regex_t *regex)
{
const protocol_value_t *a = (const protocol_value_t *)&fv->value.protocol;
volatile gboolean rc = FALSE;
@ -253,9 +253,9 @@ cmp_matches(const fvalue_t *fv, const fvalue_regex_t *regex)
if (a->tvb != NULL) {
tvb_len = tvb_captured_length(a->tvb);
data = (const char *)tvb_get_ptr(a->tvb, 0, tvb_len);
rc = fvalue_regex_matches(regex, data, tvb_len);
rc = ws_regex_matches(regex, data, tvb_len);
} else {
rc = fvalue_regex_matches(regex, a->proto_string, -1);
rc = ws_regex_matches(regex, a->proto_string, -1);
}
}
CATCH_ALL {

View File

@ -124,14 +124,14 @@ cmp_contains(const fvalue_t *fv_a, const fvalue_t *fv_b)
}
static gboolean
cmp_matches(const fvalue_t *fv, const fvalue_regex_t *regex)
cmp_matches(const fvalue_t *fv, const ws_regex_t *regex)
{
char *str = fv->value.string;
if (! regex) {
return FALSE;
}
return fvalue_regex_matches(regex, str, -1);
return ws_regex_matches(regex, str, -1);
}
void

View File

@ -41,7 +41,7 @@ typedef double (*FvalueGetFloatingFunc)(fvalue_t*);
typedef int (*FvalueCmp)(const fvalue_t*, const fvalue_t*);
typedef gboolean (*FvalueBitwiseAnd)(const fvalue_t*, const fvalue_t*);
typedef gboolean (*FvalueContains)(const fvalue_t*, const fvalue_t*);
typedef gboolean (*FvalueMatches)(const fvalue_t*, const fvalue_regex_t*);
typedef gboolean (*FvalueMatches)(const fvalue_t*, const ws_regex_t*);
typedef guint (*FvalueLen)(fvalue_t*);
typedef void (*FvalueSlice)(fvalue_t*, GByteArray *, guint offset, guint length);

View File

@ -11,13 +11,6 @@
#include "ftypes-int.h"
#include <wsutil/ws_assert.h>
#include <wsutil/ws_return.h>
#include <pcre2.h>
struct _fvalue_regex_t {
pcre2_code *code;
char *pattern;
};
/* Keep track of ftype_t's via their ftenum number */
static ftype_t* type_list[FT_NUM_TYPES];
@ -714,89 +707,10 @@ fvalue_contains(const fvalue_t *a, const fvalue_t *b)
}
gboolean
fvalue_matches(const fvalue_t *a, const fvalue_regex_t *b)
fvalue_matches(const fvalue_t *a, const ws_regex_t *re)
{
/* XXX - check compatibility of a and b */
ws_assert(a->ftype->cmp_matches);
return a->ftype->cmp_matches(a, b);
}
static pcre2_code *
_pcre2_compile(const char *patt, char **errmsg)
{
pcre2_code *code;
int errorcode;
PCRE2_SIZE erroroffset;
/* By default UTF-8 is off. */
code = pcre2_compile_8((PCRE2_SPTR)patt,
PCRE2_ZERO_TERMINATED,
PCRE2_NEVER_UTF,
&errorcode,
&erroroffset,
NULL);
if (code == NULL) {
*errmsg = g_malloc0(128);
pcre2_get_error_message(errorcode, *errmsg, 128);
return NULL;
}
return code;
}
static gboolean
_pcre2_matches(pcre2_code *code, const char *subj, gssize subj_size)
{
PCRE2_SIZE length;
pcre2_match_data *match_data;
int rc;
length = subj_size < 0 ? PCRE2_ZERO_TERMINATED : (PCRE2_SIZE)subj_size;
match_data = pcre2_match_data_create_from_pattern(code, NULL);
rc = pcre2_match(code, subj, length, 0, 0, match_data, NULL);
pcre2_match_data_free(match_data);
return rc < 0 ? FALSE : TRUE;
}
fvalue_regex_t *
fvalue_regex_compile(const char *patt, char **errmsg)
{
ws_return_val_if_null(patt, NULL);
pcre2_code *code = _pcre2_compile(patt, errmsg);
if (code == NULL)
return NULL;
fvalue_regex_t *re = g_new(fvalue_regex_t, 1);
re->code = code;
re->pattern = g_strdup(patt);
return re;
}
gboolean
fvalue_regex_matches(const fvalue_regex_t *regex, const char *subj, gssize subj_size)
{
ws_return_val_if_null(regex, FALSE);
ws_return_val_if_null(subj, FALSE);
return _pcre2_matches(regex->code, subj, subj_size);
}
void
fvalue_regex_free(fvalue_regex_t *regex)
{
pcre2_code_free(regex->code);
g_free(regex->pattern);
g_free(regex);
}
const char *
fvalue_regex_pattern(const fvalue_regex_t *regex)
{
return regex->pattern;
return a->ftype->cmp_matches(a, re);
}
/*

View File

@ -12,9 +12,10 @@
#ifndef __FTYPES_H__
#define __FTYPES_H__
#include <glib.h>
#include <wireshark.h>
#include <wsutil/regex.h>
#include <epan/wmem_scopes.h>
#include "ws_symbol_export.h"
#ifdef __cplusplus
extern "C" {
@ -131,9 +132,6 @@ typedef enum ft_framenum_type ft_framenum_type_t;
struct _ftype_t;
typedef struct _ftype_t ftype_t;
struct _fvalue_regex_t;
typedef struct _fvalue_regex_t fvalue_regex_t;
/* String representation types. */
enum ftrepr {
FTREPR_DISPLAY,
@ -350,19 +348,7 @@ gboolean
fvalue_contains(const fvalue_t *a, const fvalue_t *b);
gboolean
fvalue_matches(const fvalue_t *a, const fvalue_regex_t *re);
fvalue_regex_t *
fvalue_regex_compile(const char *patt, char **errmsg);
gboolean
fvalue_regex_matches(const fvalue_regex_t *regex, const char *subj, gssize subj_size);
void
fvalue_regex_free(fvalue_regex_t *regex);
const char *
fvalue_regex_pattern(const fvalue_regex_t *regex);
fvalue_matches(const fvalue_t *a, const ws_regex_t *re);
guint
fvalue_length(fvalue_t *fv);

View File

@ -56,6 +56,7 @@ set(WSUTIL_PUBLIC_HEADERS
pow2.h
privileges.h
processes.h
regex.h
report_message.h
sign_ext.h
sober128.h
@ -117,6 +118,7 @@ set(WSUTIL_COMMON_FILES
os_version_info.c
please_report_bug.c
privileges.c
regex.c
rsa.c
sober128.c
socket.c
@ -280,6 +282,7 @@ target_link_libraries(wsutil
${CMAKE_DL_LIBS}
${GCRYPT_LIBRARIES}
${GNUTLS_LIBRARIES}
${PCRE2_LIBRARIES}
${M_LIBRARIES}
${WIN_IPHLPAPI_LIBRARY}
${WIN_WS2_32_LIBRARY}
@ -289,6 +292,7 @@ target_include_directories(wsutil
SYSTEM PRIVATE
${GCRYPT_INCLUDE_DIRS}
${GNUTLS_INCLUDE_DIRS}
${PCRE2_INCLUDE_DIRS}
)
install(TARGETS wsutil

117
wsutil/regex.c Normal file
View File

@ -0,0 +1,117 @@
/*
* Wireshark - Network traffic analyzer
* By Gerald Combs <gerald@wireshark.org>
* Copyright 1998 Gerald Combs
*
* SPDX-License-Identifier: GPL-2.0-or-later
*/
#include "config.h"
#include "regex.h"
#include <wsutil/ws_return.h>
#include <pcre2.h>
struct _ws_regex {
pcre2_code *code;
char *pattern;
};
#define ERROR_MAXLEN_IN_CODE_UNITS 128
static pcre2_code *
_pcre2_compile(const char *patt, char **errmsg)
{
pcre2_code *code;
int errorcode;
PCRE2_SIZE erroroffset;
char *error_buffer;
/* By default UTF-8 is off. */
code = pcre2_compile_8((PCRE2_SPTR)patt,
PCRE2_ZERO_TERMINATED,
PCRE2_NEVER_UTF,
&errorcode,
&erroroffset,
NULL);
if (code == NULL) {
/*
* We have to provide a buffer and we don't know how long the
* error message is or even the maximum size. From pcre2api(3):
* "None of the messages are very long; a
* buffer size of 120 code units is ample."
*/
/* Code unit = one byte */
error_buffer = g_malloc(ERROR_MAXLEN_IN_CODE_UNITS);
/* Message is returned with a trailing zero. */
pcre2_get_error_message(errorcode, error_buffer, ERROR_MAXLEN_IN_CODE_UNITS);
/* One more at the end for good luck. */
error_buffer[ERROR_MAXLEN_IN_CODE_UNITS-1] = '\0';
*errmsg = error_buffer;
return NULL;
}
return code;
}
ws_regex_t *
ws_regex_compile(const char *patt, char **errmsg)
{
ws_return_val_if_null(patt, NULL);
pcre2_code *code = _pcre2_compile(patt, errmsg);
if (code == NULL)
return NULL;
ws_regex_t *re = g_new(ws_regex_t, 1);
re->code = code;
re->pattern = g_strdup(patt);
return re;
}
static bool
_pcre2_matches(pcre2_code *code, const char *subj, gssize subj_size)
{
PCRE2_SIZE length;
pcre2_match_data *match_data;
int rc;
length = subj_size < 0 ? PCRE2_ZERO_TERMINATED : (PCRE2_SIZE)subj_size;
match_data = pcre2_match_data_create_from_pattern(code, NULL);
rc = pcre2_match(code, subj, length, 0, 0, match_data, NULL);
pcre2_match_data_free(match_data);
return rc < 0 ? FALSE : TRUE;
}
bool
ws_regex_matches(const ws_regex_t *re, const char *subj, gssize subj_size)
{
ws_return_val_if_null(re, FALSE);
ws_return_val_if_null(subj, FALSE);
return _pcre2_matches(re->code, subj, subj_size);
}
void
ws_regex_free(ws_regex_t *re)
{
pcre2_code_free(re->code);
g_free(re->pattern);
g_free(re);
}
const char *
ws_regex_pattern(const ws_regex_t *re)
{
return re->pattern;
}

38
wsutil/regex.h Normal file
View File

@ -0,0 +1,38 @@
/* wsutil/regex.h
*
* Wireshark - Network traffic analyzer
* By Gerald Combs <gerald@wireshark.org>
* Copyright 1998 Gerald Combs
*
* SPDX-License-Identifier: GPL-2.0-or-later
*/
#ifndef __WSUTIL_REGEX_H__
#define __WSUTIL_REGEX_H__
#include <wireshark.h>
#ifdef __cplusplus
extern "C" {
#endif
struct _ws_regex;
typedef struct _ws_regex ws_regex_t;
WS_DLL_PUBLIC ws_regex_t *
ws_regex_compile(const char *patt, char **errmsg);
WS_DLL_PUBLIC bool
ws_regex_matches(const ws_regex_t *re, const char *subj, gssize subj_size);
WS_DLL_PUBLIC void
ws_regex_free(ws_regex_t *re);
WS_DLL_PUBLIC const char *
ws_regex_pattern(const ws_regex_t *re);
#ifdef __cplusplus
}
#endif
#endif /* __WSUTIL_REGEX_H__ */