Convert capture file regex search to PCRE2.

Replace the use of the obsolete GRegex with PCRE2.

Fixes a crash reported in issue #17500.
This commit is contained in:
João Valverde 2022-07-27 07:12:27 +01:00 committed by A Wireshark GitLab Utility
parent 2cc887e80d
commit e7f439bc2f
7 changed files with 77 additions and 36 deletions

View File

@ -104,7 +104,7 @@ typedef struct _capture_file {
guint32 search_pos; /* Byte position of last byte found in a hex search */ guint32 search_pos; /* Byte position of last byte found in a hex search */
guint32 search_len; /* Length of bytes matching the search */ guint32 search_len; /* Length of bytes matching the search */
gboolean case_type; /* TRUE if case-insensitive text search */ gboolean case_type; /* TRUE if case-insensitive text search */
GRegex *regex; /* Set if regular expression search */ ws_regex_t *regex; /* Set if regular expression search */
search_charset_t scs_type; /* Character set for text search */ search_charset_t scs_type; /* Character set for text search */
search_direction dir; /* Direction in which to do searches */ search_direction dir; /* Direction in which to do searches */
gboolean search_in_progress; /* TRUE if user just clicked OK in the Find dialog or hit <control>N/B */ gboolean search_in_progress; /* TRUE if user just clicked OK in the Find dialog or hit <control>N/B */

20
file.c
View File

@ -3146,7 +3146,7 @@ match_subtree_text(proto_node *node, gpointer data)
} }
if (cf->regex) { if (cf->regex) {
if (g_regex_match(cf->regex, label_ptr, (GRegexMatchFlags) 0, NULL)) { if (ws_regex_matches(cf->regex, label_ptr)) {
mdata->frame_matched = TRUE; mdata->frame_matched = TRUE;
mdata->finfo = fi; mdata->finfo = fi;
return; return;
@ -3223,7 +3223,7 @@ match_summary_line(capture_file *cf, frame_data *fdata,
info_column = get_column_text(edt.pi.cinfo, colx); info_column = get_column_text(edt.pi.cinfo, colx);
info_column_len = strlen(info_column); info_column_len = strlen(info_column);
if (cf->regex) { if (cf->regex) {
if (g_regex_match(cf->regex, info_column, (GRegexMatchFlags) 0, NULL)) { if (ws_regex_matches(cf->regex, info_column)) {
result = MR_MATCHED; result = MR_MATCHED;
break; break;
} }
@ -3747,7 +3747,7 @@ match_regex(capture_file *cf, frame_data *fdata,
wtap_rec *rec, Buffer *buf, void *criterion _U_) wtap_rec *rec, Buffer *buf, void *criterion _U_)
{ {
match_result result = MR_NOTMATCHED; match_result result = MR_NOTMATCHED;
GMatchInfo *match_info = NULL; size_t result_pos[2] = {0, 0};
/* Load the frame's data. */ /* Load the frame's data. */
if (!cf_read_record(cf, fdata, rec, buf)) { if (!cf_read_record(cf, fdata, rec, buf)) {
@ -3755,13 +3755,13 @@ match_regex(capture_file *cf, frame_data *fdata,
return MR_ERROR; return MR_ERROR;
} }
if (g_regex_match_full(cf->regex, (const gchar *)ws_buffer_start_ptr(buf), fdata->cap_len, if (ws_regex_matches_pos(cf->regex,
0, (GRegexMatchFlags) 0, &match_info, NULL)) (const gchar *)ws_buffer_start_ptr(buf),
{ fdata->cap_len,
gint start_pos = 0, end_pos = 0; result_pos)) {
g_match_info_fetch_pos (match_info, 0, &start_pos, &end_pos); //TODO: Fix cast.
cf->search_pos = end_pos - 1; cf->search_pos = (guint32)(result_pos[1] - 1); /* last byte = end position - 1 */
cf->search_len = end_pos - start_pos; cf->search_len = (guint32)(result_pos[1] - result_pos[0]);
result = MR_MATCHED; result = MR_MATCHED;
} }
return result; return result;

View File

@ -439,6 +439,7 @@ libwsutil.so.0 libwsutil0 #MINVER#
ws_regex_free@Base 3.7.0 ws_regex_free@Base 3.7.0
ws_regex_matches@Base 3.7.0 ws_regex_matches@Base 3.7.0
ws_regex_matches_length@Base 3.7.0 ws_regex_matches_length@Base 3.7.0
ws_regex_matches_pos@Base 3.7.2
ws_regex_pattern@Base 3.7.0 ws_regex_pattern@Base 3.7.0
ws_socket_ptoa@Base 3.1.1 ws_socket_ptoa@Base 3.1.1
ws_strcasestr@Base 3.7.0 ws_strcasestr@Base 3.7.0

View File

@ -17,6 +17,7 @@
#include <epan/strutil.h> #include <epan/strutil.h>
#include <wsutil/utf8_entities.h> #include <wsutil/utf8_entities.h>
#include <wsutil/regex.h>
#include "main_application.h" #include "main_application.h"
#include <QKeyEvent> #include <QKeyEvent>
@ -63,7 +64,7 @@ SearchFrame::SearchFrame(QWidget *parent) :
SearchFrame::~SearchFrame() SearchFrame::~SearchFrame()
{ {
if (regex_) { if (regex_) {
g_regex_unref(regex_); ws_regex_free(regex_);
} }
delete sf_ui_; delete sf_ui_;
} }
@ -140,13 +141,13 @@ void SearchFrame::keyPressEvent(QKeyEvent *event)
bool SearchFrame::regexCompile() bool SearchFrame::regexCompile()
{ {
int flags = (G_REGEX_OPTIMIZE); unsigned flags = 0;
if (!sf_ui_->caseCheckBox->isChecked()) { if (!sf_ui_->caseCheckBox->isChecked()) {
flags |= G_REGEX_CASELESS; flags |= WS_REGEX_CASELESS;
} }
if (regex_) { if (regex_) {
g_regex_unref(regex_); ws_regex_free(regex_);
} }
if (sf_ui_->searchLineEdit->text().isEmpty()) { if (sf_ui_->searchLineEdit->text().isEmpty()) {
@ -154,12 +155,12 @@ bool SearchFrame::regexCompile()
return false; return false;
} }
GError *error = nullptr; char *errmsg = nullptr;
regex_ = g_regex_new(sf_ui_->searchLineEdit->text().toUtf8().constData(), regex_ = ws_regex_compile_ex(sf_ui_->searchLineEdit->text().toUtf8().constData(), -1,
(GRegexCompileFlags)flags, (GRegexMatchFlags) 0, &error); &errmsg, flags);
if (error) {
regex_error_ = error->message; if (errmsg != nullptr) {
g_error_free(error); regex_error_ = errmsg;
} }
return regex_ ? true : false; return regex_ ? true : false;

View File

@ -47,7 +47,7 @@ private:
Ui::SearchFrame *sf_ui_; Ui::SearchFrame *sf_ui_;
capture_file *cap_file_; capture_file *cap_file_;
GRegex *regex_; ws_regex_t *regex_;
QString regex_error_; QString regex_error_;
private slots: private slots:

View File

@ -103,14 +103,16 @@ ws_regex_compile(const char *patt, char **errmsg)
static bool static bool
match_pcre2(pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length) match_pcre2(pcre2_code *code, const char *subject, ssize_t subj_length,
pcre2_match_data *match_data)
{ {
pcre2_match_data *match_data; PCRE2_SIZE length;
int rc; int rc;
/* We don't use the matched substring but pcre2_match requires if (subj_length < 0)
* at least one pair of offsets. */ length = PCRE2_ZERO_TERMINATED;
match_data = pcre2_match_data_create(1, NULL); else
length = (PCRE2_SIZE)subj_length;
rc = pcre2_match(code, rc = pcre2_match(code,
subject, subject,
@ -120,8 +122,6 @@ match_pcre2(pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length)
match_data, match_data,
NULL); NULL);
pcre2_match_data_free(match_data);
if (rc < 0) { if (rc < 0) {
/* No match */ /* No match */
if (rc != PCRE2_ERROR_NOMATCH) { if (rc != PCRE2_ERROR_NOMATCH) {
@ -142,21 +142,49 @@ match_pcre2(pcre2_code *code, PCRE2_SPTR subject, PCRE2_SIZE length)
bool bool
ws_regex_matches(const ws_regex_t *re, const char *subj) ws_regex_matches(const ws_regex_t *re, const char *subj)
{ {
ws_return_val_if_null(re, FALSE); return ws_regex_matches_length(re, subj, -1);
ws_return_val_if_null(subj, FALSE);
return match_pcre2(re->code, (PCRE2_SPTR)subj, PCRE2_ZERO_TERMINATED);
} }
bool bool
ws_regex_matches_length(const ws_regex_t *re, ws_regex_matches_length(const ws_regex_t *re,
const char *subj, size_t subj_length) const char *subj, ssize_t subj_length)
{ {
bool matched;
pcre2_match_data *match_data;
ws_return_val_if_null(re, FALSE); ws_return_val_if_null(re, FALSE);
ws_return_val_if_null(subj, FALSE); ws_return_val_if_null(subj, FALSE);
return match_pcre2(re->code, (PCRE2_SPTR)subj, (PCRE2_SIZE)subj_length); /* We don't use the matched substring but pcre2_match requires
* at least one pair of offsets. */
match_data = pcre2_match_data_create(1, NULL);
matched = match_pcre2(re->code, subj, subj_length, match_data);
pcre2_match_data_free(match_data);
return matched;
}
bool
ws_regex_matches_pos(const ws_regex_t *re,
const char *subj, ssize_t subj_length,
size_t pos_vect[2])
{
bool matched;
pcre2_match_data *match_data;
ws_return_val_if_null(re, FALSE);
ws_return_val_if_null(subj, FALSE);
match_data = pcre2_match_data_create(1, NULL);
matched = match_pcre2(re->code, subj, subj_length, match_data);
if (matched && pos_vect) {
PCRE2_SIZE *ovect = pcre2_get_ovector_pointer(match_data);
pos_vect[0] = ovect[0];
pos_vect[1] = ovect[1];
}
pcre2_match_data_free(match_data);
return matched;
} }

View File

@ -37,7 +37,18 @@ ws_regex_matches(const ws_regex_t *re, const char *subj);
/** Matches a subject string length in 8 bit code units. */ /** Matches a subject string length in 8 bit code units. */
WS_DLL_PUBLIC bool WS_DLL_PUBLIC bool
ws_regex_matches_length(const ws_regex_t *re, ws_regex_matches_length(const ws_regex_t *re,
const char *subj, size_t subj_length); const char *subj, ssize_t subj_length);
/** Returns start and end position of the matched substring.
*
* pos_vect[0] is first codepoint in the matched substring.
* pos_vect[1] is the next to last codepoint in the matched substring.
* pos_vect[1] - pos_vect[0] is the matched substring length.
*/
WS_DLL_PUBLIC bool
ws_regex_matches_pos(const ws_regex_t *re,
const char *subj, ssize_t subj_length,
size_t pos_vect[2]);
WS_DLL_PUBLIC void WS_DLL_PUBLIC void
ws_regex_free(ws_regex_t *re); ws_regex_free(ws_regex_t *re);