dfilter: Add support for PCRE2

PCRE2 is the future of PCRE. The only advantage of GRegex is that
it comes bundled with GLib, which is not an advantage at all.
PCRE2 is widely available, the GRegex abstractions layer are not a
good fit and abstract things that don't need abstracting or that we
could handle better ourselves, there are open bugs (#12997) and
maintenance is spotty at best.

GRegex comes with many of the problems of bundled code, aggravated by
the fact that it completely falls outside of our control.
This commit is contained in:
João Valverde 2021-10-15 18:34:33 +01:00
parent 6630fd5260
commit ed8a02af17
12 changed files with 237 additions and 10 deletions

View File

@ -1288,6 +1288,9 @@ ws_find_package(SMI ENABLE_SMI HAVE_LIBSMI)
# Support for TLS decryption using RSA private keys.
ws_find_package(GNUTLS ENABLE_GNUTLS HAVE_LIBGNUTLS "3.3.0")
# PCRE2
ws_find_package(PCRE2 ENABLE_PCRE2 HAVE_PCRE2)
# Kerberos
ws_find_package(KERBEROS ENABLE_KERBEROS HAVE_KERBEROS)
@ -1836,6 +1839,11 @@ set_package_properties(SMI PROPERTIES
DESCRIPTION "Library to access SMI management information"
PURPOSE "Support MIB and PIB parsing and OID resolution"
)
set_package_properties(PCRE2 PROPERTIES
URL "https://www.pcre.org"
DESCRIPTION "Regular expression pattern matching using the same syntax and semantics as Perl 5"
PURPOSE "Support for regular expressions"
)
string(TOUPPER "${CMAKE_BUILD_TYPE}" _build_type)
message(STATUS "C-Flags: ${CMAKE_C_FLAGS} ${CMAKE_C_FLAGS_${_build_type}}")
@ -2068,6 +2076,10 @@ if(USE_REPOSITORY)
list (APPEND THIRD_PARTY_DLLS "${NGHTTP2_DLL_DIR}/${NGHTTP2_DLL}")
list (APPEND THIRD_PARTY_PDBS "${NGHTTP2_DLL_DIR}/${NGHTTP2_PDB}")
endif(NGHTTP2_FOUND)
if (PCRE2_FOUND)
list (APPEND THIRD_PARTY_DLLS "${PCRE2_DLL_DIR}/${PCRE2_DLL}")
list (APPEND THIRD_PARTY_PDBS "${PCRE2_DLL_DIR}/${PCRE2_PDB}")
endif(PCRE2_FOUND)
if (SBC_FOUND)
list (APPEND THIRD_PARTY_DLLS "${SBC_DLL_DIR}/${SBC_DLL}")
endif(SBC_FOUND)

View File

@ -97,6 +97,7 @@ option(ENABLE_NGHTTP2 "Build with HTTP/2 header decompression support" ON)
option(ENABLE_LUA "Build with Lua dissector support" ON)
option(ENABLE_SMI "Build with libsmi snmp support" ON)
option(ENABLE_GNUTLS "Build with RSA decryption support" ON)
option(ENABLE_PCRE2 "Build with regular expression support" ON)
if(WIN32)
option(ENABLE_WINSPARKLE "Enable automatic updates using WinSparkle" ON)
endif()

View File

@ -0,0 +1,70 @@
#
# - Find PCRE2 libraries
#
# PCRE2_INCLUDE_DIRS - where to find PCRE2 headers.
# PCRE2_LIBRARIES - List of libraries when using PCRE2.
# PCRE2_FOUND - True if PCRE2 is found.
# PCRE2_DLL_DIR - (Windows) Path to the PCRE2 DLL
# PCRE2_DLL - (Windows) Name of the PCRE2 DLL
# Note that the "8" in "libpcre2-8" refers to "PCRE library version 2 with
# support for 8-bit code units".
include( FindWSWinLibs )
FindWSWinLibs( "pcre2-.*" "PCRE2_HINTS" )
if( NOT WIN32)
find_package(PkgConfig QUIET)
pkg_search_module(PC_PCRE2 QUIET "libpcre2-8")
endif()
find_path(PCRE2_INCLUDE_DIR
NAMES
pcre2.h
HINTS
${PC_PCRE2_INCLUDE_DIRS}
${PCRE2_HINTS}/include
)
find_library(PCRE2_LIBRARY
NAMES
"pcre2-8"
HINTS
${PC_PCRE2_LIBRARY_DIRS}
${PCRE2_HINTS}/lib
)
include(FindPackageHandleStandardArgs)
find_package_handle_standard_args(PCRE2
REQUIRED_VARS PCRE2_LIBRARY PCRE2_INCLUDE_DIR
VERSION_VAR PC_PCRE2_VERSION
)
if(PCRE2_FOUND)
set(PCRE2_LIBRARIES ${PCRE2_LIBRARY})
set(PCRE2_INCLUDE_DIRS ${PCRE2_INCLUDE_DIR})
if (WIN32)
set (PCRE2_DLL_DIR "${PCRE2_HINTS}/bin"
CACHE PATH "Path to PCRE2 DLL"
)
file(GLOB _pcre2_dll RELATIVE "${PCRE2_DLL_DIR}"
"${PCRE2_DLL_DIR}/pcre2-8*.dll"
)
set (PCRE2_DLL ${_pcre2_dll}
# We're storing filenames only. Should we use STRING instead?
CACHE FILEPATH "PCRE2 DLL file name"
)
file(GLOB _pcre2_pdb RELATIVE "${PCRE2_DLL_DIR}"
"${PCRE2_DLL_DIR}/pcre2-8*.pdb"
)
set (PCRE2_PDB ${_pcre2_pdb}
CACHE FILEPATH "PCRE2 PDB file name"
)
mark_as_advanced(PCRE2_DLL_DIR PCRE2_DLL PCRE2_PDB)
endif()
else()
set(PCRE2_LIBRARIES)
set(PCRE2_INCLUDE_DIRS)
endif()
mark_as_advanced(PCRE2_LIBRARIES PCRE2_INCLUDE_DIRS)

View File

@ -91,6 +91,9 @@
/* Define to use kerberos */
#cmakedefine HAVE_KERBEROS 1
/* Define to use PCRE2 library */
#cmakedefine HAVE_PCRE2 1
/* Define to use nghttp2 */
#cmakedefine HAVE_NGHTTP2 1
@ -316,6 +319,10 @@
# endif
#endif
#ifdef HAVE_PCRE2
#define PCRE2_CODE_UNIT_WIDTH 8
#endif
#include <ws_log_defs.h>
#endif /* __CONFIG_H__ */

View File

@ -119,6 +119,9 @@ The following features are new (or have been significantly updated) since versio
** Adds support for some additional character escape sequences in double quoted strings. Besides octal and hex byte specification the following C escape
sequences are now supported with the same meaning: \a, \b, \f, \n, \r, \t, \v. Previously they were only supported with character constants. Note that
unrecognized escape sequences are treated as a literal character. This has not changed from previous versions.
** The display filter engine now supports PCRE2 instead of GRegex (GLib bindings to the older end-of-life PCRE library). The PCRE2 library (https://www.pcre.org/)
is recommended to support pattern matching using regular expressions. PCRE2 is compatible with PCRE so the user-visible changes should be minimal. Some
exotic patterns may now be invalid and require rewriting.
* Corrected calculation of mean jitter in RTP Stream Analysis dialog and IAX2 Stram Analysis dialog

View File

@ -338,6 +338,7 @@ target_link_libraries(epan
${LZ4_LIBRARIES}
${M_LIBRARIES}
${NGHTTP2_LIBRARIES}
${PCRE2_LIBRARIES}
${SMI_LIBRARIES}
${SNAPPY_LIBRARIES}
${WIN_PSAPI_LIBRARY}
@ -369,6 +370,7 @@ target_include_directories(epan
${LUA_INCLUDE_DIRS}
${LZ4_INCLUDE_DIRS}
${NGHTTP2_INCLUDE_DIRS}
${PCRE2_INCLUDE_DIRS}
${SMI_INCLUDE_DIRS}
${ZLIB_INCLUDE_DIRS}
${ZSTD_INCLUDE_DIRS}

View File

@ -65,11 +65,11 @@ field_tostr(const void *data, gboolean pretty _U_)
}
static char *
pcre_tostr(const void *data, gboolean pretty _U_)
pcre_tostr(const void *data, gboolean pretty)
{
const fvalue_regex_t *pcre = data;
return g_strdup(fvalue_regex_pattern(pcre));
return g_strdup(fvalue_regex_tostr(pcre, pretty));
}
void

View File

@ -97,6 +97,10 @@
#include <libxml/parser.h>
#endif
#ifdef HAVE_PCRE2
#include <pcre2.h>
#endif
#ifndef _WIN32
#include <signal.h>
#endif
@ -805,6 +809,13 @@ epan_get_compiled_version_info(GString *str)
g_string_append(str, ", without MaxMind DB resolver");
#endif /* HAVE_MAXMINDDB */
/* PCRE2 */
#ifdef HAVE_PCRE2
g_string_append(str, ", with PCRE2");
#else
g_string_append(str, ", without PCRE2");
#endif /* HAVE_PCRE2 */
/* nghttp2 */
#ifdef HAVE_NGHTTP2
g_string_append(str, ", with nghttp2 " NGHTTP2_VERSION);
@ -872,6 +883,17 @@ epan_get_runtime_version_info(GString *str)
/* Gcrypt */
g_string_append_printf(str, ", with Gcrypt %s", gcry_check_version(NULL));
/* PCRE2 */
#ifdef HAVE_PCRE2
int pcre2_size = pcre2_config(PCRE2_CONFIG_VERSION, NULL);
if (pcre2_size > 0 && pcre2_size <= 255) {
char *pcre2_str = g_malloc0(pcre2_size + 1);
pcre2_config(PCRE2_CONFIG_VERSION, pcre2_str);
g_string_append_printf(str, ", with PCRE2 %s", pcre2_str);
g_free(pcre2_str);
}
#endif /* HAVE_PCRE2 */
/* nghttp2 */
#if NGHTTP2_VERSION_AGE >= 1
nghttp2_info *nghttp2_ptr = nghttp2_version(0);

View File

@ -50,6 +50,7 @@ target_include_directories(ftypes
${CMAKE_CURRENT_BINARY_DIR}
${CMAKE_CURRENT_SOURCE_DIR}
${CMAKE_SOURCE_DIR}/epan
${PCRE2_INCLUDE_DIR}
)
set_target_properties(ftypes PROPERTIES

View File

@ -11,9 +11,18 @@
#include "ftypes-int.h"
#include <wsutil/ws_assert.h>
#ifdef HAVE_PCRE2
#include <pcre2.h>
#endif
struct _fvalue_regex_t {
#ifdef HAVE_PCRE2
pcre2_code *code;
#else
GRegex *code;
#endif
char *pattern;
char *repr_debug;
};
/* Keep track of ftype_t's via their ftenum number */
@ -718,8 +727,51 @@ fvalue_matches(const fvalue_t *a, const fvalue_regex_t *b)
return a->ftype->cmp_matches(a, b);
}
fvalue_regex_t *
fvalue_regex_compile(const char *patt, char **errmsg)
#ifdef HAVE_PCRE2
static pcre2_code *
_pcre2_compile(const char *patt, char **errmsg)
{
pcre2_code *code;
int errorcode;
PCRE2_SIZE erroroffset;
/* By default UTF-8 is off. */
code = pcre2_compile_8((PCRE2_SPTR)patt,
PCRE2_ZERO_TERMINATED,
PCRE2_NEVER_UTF,
&errorcode,
&erroroffset,
NULL);
if (code == NULL) {
*errmsg = g_malloc0(128);
pcre2_get_error_message(errorcode, *errmsg, 128);
return NULL;
}
return code;
}
static gboolean
_pcre2_matches(pcre2_code *code, const char *subj, gssize subj_size)
{
PCRE2_SIZE length;
pcre2_match_data *match_data;
int rc;
length = subj_size < 0 ? PCRE2_ZERO_TERMINATED : (PCRE2_SIZE)subj_size;
match_data = pcre2_match_data_create_from_pattern(code, NULL);
rc = pcre2_match(code, subj, length, 0, 0, match_data, NULL);
pcre2_match_data_free(match_data);
return rc < 0 ? FALSE : TRUE;
}
#else /* HAVE_PCRE2 */
static GRegex *
_gregex_compile(const char *patt, char **errmsg)
{
GError *regex_error = NULL;
GRegex *pcre;
@ -744,29 +796,81 @@ fvalue_regex_compile(const char *patt, char **errmsg)
return NULL;
}
struct _fvalue_regex_t *re = g_new(struct _fvalue_regex_t, 1);
re->code = pcre;
return pcre;
}
static gboolean
_gregex_matches(GRegex *code, const char *subj, gssize subj_size)
{
return g_regex_match_full(code, subj, subj_size, 0, 0, NULL, NULL);
}
#endif /* !HAVE_PCRE2 */
fvalue_regex_t *
fvalue_regex_compile(const char *patt, char **errmsg)
{
void *code;
#ifdef HAVE_PCRE2
code = _pcre2_compile(patt, errmsg);
#else
code = _gregex_compile(patt, errmsg);
#endif
if (code == NULL)
return NULL;
fvalue_regex_t *re = g_new(fvalue_regex_t, 1);
re->code = code;
re->pattern = g_strdup(patt);
re->repr_debug = NULL;
return re;
}
gboolean
fvalue_regex_matches(const fvalue_regex_t *regex, const char *subj, gssize subj_size)
{
return g_regex_match_full(regex->code, subj, subj_size, 0, 0, NULL, NULL);
#ifdef HAVE_PCRE2
return _pcre2_matches(regex->code, subj, subj_size);
#else
return _gregex_matches(regex->code, subj, subj_size);
#endif
}
void
fvalue_regex_free(fvalue_regex_t *regex)
{
#ifdef HAVE_PCRE2
pcre2_code_free(regex->code);
#else
g_regex_unref(regex->code);
#endif
g_free(regex->pattern);
g_free(regex->repr_debug);
g_free(regex);
}
const char *
fvalue_regex_tostr(const fvalue_regex_t *regex, gboolean pretty)
{
if (pretty)
return regex->pattern;
if (regex->repr_debug == NULL) {
#ifdef HAVE_PCRE2
const char *kind = "PCRE2";
#else
const char *kind = "GRegex";
#endif
((fvalue_regex_t *)regex)->repr_debug =
g_strdup_printf("(%s)%s", kind, regex->pattern);
}
return regex->repr_debug;
}
const char *
fvalue_regex_pattern(const fvalue_regex_t *regex)
{
return g_regex_get_pattern(regex->code);
return regex->pattern;
}
/*

View File

@ -361,6 +361,9 @@ fvalue_regex_matches(const fvalue_regex_t *regex, const char *subj, gssize subj_
void
fvalue_regex_free(fvalue_regex_t *regex);
const char *
fvalue_regex_tostr(const fvalue_regex_t *regex, gboolean pretty);
const char *
fvalue_regex_pattern(const fvalue_regex_t *regex);

View File

@ -69,8 +69,8 @@ Param(
# trouble instead of trying to catch exceptions everywhere.
$ErrorActionPreference = "Stop"
$Win64CurrentTag = "2021-09-29"
$Win32CurrentTag = "2021-09-29"
$Win64CurrentTag = "2021-11-11"
$Win32CurrentTag = "2021-11-11"
# Archive file / SHA256
$Win64Archives = @{
@ -91,6 +91,7 @@ $Win64Archives = @{
"minizip-1.2.11-4-win64ws.zip" = "dd6bf24e2d946465ad19aa4f8c38e0db91da6585887935de68011982cd6fb2cb";
"nghttp2-1.44.0-1-win64ws.zip" = "30e4925d48bbd401b03ce6502e8df01f81f114366f28682206e08423486cf161";
"opus-1.3.1-3-win64ws.zip" = "1f7a55a6d2d7215dffa4a43bca8ca05024bd4ba1ac3d0d0c405fd38b09cc2205";
"pcre2-10.39-1-win64ws.zip" = "d8b381515c4bae97a28a6b0a3faf66a4f890e85ef75f8ef9501ffa091db6ff8a";
"sbc-1.3-1-win64ws.zip" = "08cef6898c421277a6582ef3225d8820f74a037cbd5b6e673a4d8f4593ce80a1";
"snappy-1.1.9-1-win64ws.zip" = "fa907724be019bcc55d27ebe88257ba8898b5c38b719099b8164ac78600d81cc";
"spandsp-0.0.6-2-win64ws.zip" = "2eb8278633037f60f44815ea1606486ab5dcdf3bddc500b20c9fe356856236b2";
@ -117,6 +118,7 @@ $Win32Archives = @{
"minizip-1.2.11-4-win32ws.zip" = "41e113930902c2519c4644e8307a0cc51c5855e001e1e69768c48deb376142d0";
"nghttp2-1.44.0-1-win32ws.zip" = "3a19e076523ef263f6900749f345725b4e8bf2c4027e0f349404ad81c4613bde";
"opus-1.3.1-3-win32ws.zip" = "9700b14c8945fcfed2188b806a2ee7e8628922c22569a4c5183075f3dc133177";
"pcre2-10.39-1" = "6ad963036bd913fba680e867bbcf8bf9c2995c80dd0401bfa6bb35328c640c70";
"sbc-1.3-1-win32ws.zip" = "ad37825e9ace4b849a5442c08f1ed7e30634e6b774bba4307fb86f35f82e71ba";
"snappy-1.1.9-1-win32ws.zip" = "28bae646f1dff80ceb1b1756b1fdec0ebc47580a412a8a4980f3d61c63cb0858";
"spandsp-0.0.6-2-win32ws.zip" = "31a4b5ca228c719ab4190e1b46801f1483efb8756f1e33d10ecc915244612fca";