File sets: Match both formats, compressed files

For file sets produced by multiple file captures, match both
the number before time and the newer time before number format.
Distinguish them in the return value, since files of different
formats are not part of the same set.

Also handle files with a compression suffix as well, as we can
produce that in captures. Since in multi file captures compression
is done when switching files, allow file sets to have a mixture of files
compressed and uncompressed. When doing a multi file capture and
compressing, the last file is not compressed.

Add information to the user guide and release notes

Related to #12371
This commit is contained in:
John Thacker 2024-01-26 20:54:21 -05:00
parent a8aebe5cf4
commit 2b5fd572f7
5 changed files with 148 additions and 83 deletions

View File

@ -122,6 +122,13 @@ The following features are new (or have been significantly updated) since versio
* Truncated fields in the detail view are now displayed as "Field name […]: data" instead of "Field name [truncated]: data"
* When capturing files in multiple file mode, a pattern that places the date and time
before the index number can be used (e.g., foo_20240714110102_00001.pcap instead of
foo_00001_20240714110102.pcap). This causes filenames to sort in chronological order
across file sets from different captures. The File Set dialog has been updated to
handle the new pattern, which has been capable of being produced by tshark since
version 3.6.0
=== Removed Features and Support
* The tshark `-G` option with no argument is deprecated and will be removed in

View File

@ -703,10 +703,13 @@ some features to handle these file sets in a convenient way.
.How does Wireshark detect the files of a file set?
****
A filename in a file set uses the format Prefix_Number_DateTimeSuffix which
might look something like `test_00001_20240714183910.pcap`. All files of a file
A filename in a file set uses the format Prefix_Number_DateTimeSuffix (or,
in Wireshark 4.4.0 and later, Prefix_DateTime_NumberSuffix) which might
look something like `test_00001_20240714183910.pcap`. All files of a file
set share the same prefix (e.g., “test”) and suffix (e.g., “.pcap”) and a
varying middle part.
varying middle part. Files are also allowed to have a second compression
suffix of types that Wireshark can open; the compression suffix does not
have to match for all files in a set.
To find the files of a file set, Wireshark scans the directory where the
currently loaded file resides and checks for files matching the filename pattern

187
fileset.c
View File

@ -23,6 +23,8 @@
#include <wsutil/filesystem.h>
#include <wsutil/ws_assert.h>
#include <wiretap/wtap.h>
#include <epan/strutil.h>
#include "fileset.h"
@ -58,104 +60,141 @@ static fileset set = { NULL, NULL};
#endif /* _WIN32 */
/* is this a probable file of a file set (does the naming pattern match)? */
gboolean
fileset_filename_match_pattern(const char *fname)
fileset_match_t
fileset_filename_match_pattern(const char *fname, char **prefix, char **suffix, char **time)
{
char *pfx;
size_t baselen;
size_t minlen = strlen("_00001_20050418010750");
char *sfx;
char *filename;
fileset_match_t ret = FILESET_NO_MATCH;
static char *pattern = "(?P<prefix>.*)_\\d{5}_(?P<time>\\d{14})$";
static char *pattern2 = "(?P<prefix>.*)_(?P<time>\\d{14})_\\d{5}$";
static GRegex *regex = NULL;
static GRegex *regex2 = NULL;
if (regex == NULL) {
GError *gerr = NULL;
regex = g_regex_new(pattern,
(GRegexCompileFlags)(G_REGEX_OPTIMIZE | G_REGEX_ANCHORED),
G_REGEX_MATCH_ANCHORED, NULL);
if (gerr) {
ws_warning("failed to compile regex: %s", gerr->message);
g_error_free(gerr);
regex = NULL;
return ret;
}
}
if (regex2 == NULL) {
GError *gerr = NULL;
regex2 = g_regex_new(pattern2,
(GRegexCompileFlags)(G_REGEX_OPTIMIZE | G_REGEX_ANCHORED),
G_REGEX_MATCH_ANCHORED, NULL);
if (gerr) {
ws_warning("failed to compile regex: %s", gerr->message);
g_error_free(gerr);
regex2 = NULL;
return ret;
}
}
/* d:\dir1\test_00001_20050418010750.cap */
filename = g_strdup(get_basename(fname));
filename = g_path_get_basename(fname);
/* test_00001_20050418010750.cap */
pfx = strrchr(filename, '.');
if(pfx == NULL) { /* suffix is optional */
pfx = filename + strlen(filename);
}
/* test_00001_20050418010750 */
*pfx = '\0';
/* filename long enough? */
baselen = strlen(filename);
if(baselen < minlen) {
g_free(filename);
return FALSE;
}
/* there must be two underscores at special places */
if(filename[baselen-minlen] != '_' || filename[baselen-minlen+6] != '_') {
g_free(filename);
return FALSE;
}
/* replace the two underscores by digits */
filename[baselen-minlen] = '0';
filename[baselen-minlen+6] = '0';
/* we should have only digits now */
while(minlen--) {
baselen--;
if(!g_ascii_isdigit( filename[baselen])) {
g_free(filename);
return FALSE;
sfx = strrchr(filename, '.');
if (sfx != NULL) {
*sfx = '\0';
GSList *compression_type_extensions = wtap_get_all_compression_type_extensions_list();
char *ext = g_ascii_strdown(sfx + 1, -1);
for (GSList *compression_extension = compression_type_extensions;
compression_extension != NULL;
compression_extension = g_slist_next(compression_extension)) {
if (g_strcmp0(ext, (const char*)compression_extension->data) == 0) {
sfx = strrchr(filename, '.');
if (sfx != NULL) {
*sfx = '\0';
}
break;
}
}
g_free(ext);
g_slist_free(compression_type_extensions);
} else { /* suffix is optional */
sfx = filename + strlen(filename);
}
/* test_00001_20050418010750 */
GMatchInfo *match_info;
g_regex_match(regex, filename, 0, &match_info);
if (g_match_info_matches(match_info)) {
if (prefix) {
*prefix = g_match_info_fetch_named(match_info, "prefix");
}
if (time) {
*time = g_match_info_fetch_named(match_info, "time");
}
if (suffix) {
*suffix = g_strdup(sfx);
}
ret = FILESET_NUM_TIME;
}
g_match_info_free(match_info);
if (ret == FILESET_NO_MATCH) {
g_regex_match(regex2, filename, 0, &match_info);
if (g_match_info_matches(match_info)) {
if (prefix) {
*prefix = g_match_info_fetch_named(match_info, "prefix");
}
if (time) {
*time = g_match_info_fetch_named(match_info, "time");
}
if (suffix) {
*suffix = g_strdup(sfx);
}
ret = FILESET_TIME_NUM;
}
g_match_info_free(match_info);
}
g_free(filename);
/* ok, seems to be good */
return TRUE;
return ret;
}
/* test, if both files could be in the same file set */
/* (the filenames must already be in correct shape) */
/* test if both files could be in the same file set */
/* (fname2 must already be in correct shape) */
static gboolean
fileset_is_file_in_set(const char *fname1, const char *fname2)
{
char *pfx1;
char *pfx2;
char *dup_f1;
char *dup_f2;
size_t minlen = strlen("_00001_20050418010750");
char *sfx1;
char *sfx2;
fileset_match_t match1;
fileset_match_t match2;
gboolean ret = FALSE;
match1 = fileset_filename_match_pattern(fname1, &pfx1, &sfx1, NULL);
if (match1 == FILESET_NO_MATCH) {
return FALSE;
}
match2 = fileset_filename_match_pattern(fname2, &pfx2, &sfx2, NULL);
/* just to be sure ... */
ws_assert(fileset_filename_match_pattern(fname1));
ws_assert(fileset_filename_match_pattern(fname2));
dup_f1 = g_strdup(fname1);
dup_f2 = g_strdup(fname2);
pfx1 = strrchr(dup_f1, '.');
pfx2 = strrchr(dup_f2, '.');
/* suffix is optional */
if (!pfx1) pfx1 = dup_f1 + strlen(dup_f1);
if (!pfx2) pfx2 = dup_f2 + strlen(dup_f2);
/* the optional suffix (file extension) must be equal */
if(strcmp(pfx1, pfx2) != 0) {
g_free(dup_f1);
g_free(dup_f2);
return FALSE;
ws_assert(match2 != FILESET_NO_MATCH);
if (match1 == match2 && g_strcmp0(pfx1, pfx2) == 0 && g_strcmp0(sfx1, sfx2) == 0) {
ret = TRUE;
}
*(pfx1-minlen) = '\0';
*(pfx2-minlen) = '\0';
g_free(pfx1);
g_free(pfx2);
g_free(sfx1);
g_free(sfx2);
if(strcmp(dup_f1, dup_f2) != 0) {
g_free(dup_f1);
g_free(dup_f2);
return FALSE;
}
g_free(dup_f1);
g_free(dup_f2);
return TRUE;
return ret;
}
/* GCompareFunc helper for g_list_find_custom() */
@ -292,12 +331,12 @@ fileset_add_dir(const char *fname, void *window)
dirname = g_string_append_c(dirname, G_DIR_SEPARATOR);
/* is the current file probably a part of any fileset? */
if(fileset_filename_match_pattern(fname)) {
if(fileset_filename_match_pattern(fname, NULL, NULL, NULL)) {
/* yes, go through the files in the directory and check if the file in question is part of the current file set */
if ((dir = ws_dir_open(dirname->str, 0, NULL)) != NULL) {
while ((file = ws_dir_read_name(dir)) != NULL) {
name = ws_dir_get_name(file);
if(fileset_filename_match_pattern(name) && fileset_is_file_in_set(name, get_basename(fname))) {
if(fileset_is_file_in_set(name, get_basename(fname))) {
fileset_add_file(dirname->str, name, strcmp(name, get_basename(fname))== 0 /* current */);
}
} /* while */

View File

@ -25,9 +25,26 @@ typedef struct _fileset_entry {
gboolean current; /* is this the currently loaded file? */
} fileset_entry;
typedef enum {
FILESET_NO_MATCH,
FILESET_TIME_NUM,
FILESET_NUM_TIME
} fileset_match_t;
/* helper: is this a probable file of a file set (does the naming pattern match)? */
extern gboolean fileset_filename_match_pattern(const char *fname);
/* helper: is this a probable file of a file set (does the naming pattern match)?
* Possible naming patterns are prefix_NNNNN_YYYYMMDDHHMMSS.ext[.gz] and
* prefix_YYYYMMDDHHMMSS_NNNNN.ext[.gz], where any compression suffix
* supported by libwiretap is allowed. The validation is minimal; e.g., the
* time is only checked to see if all 14 characters are digits.
*
* @param[in] fname The filename to check for a naming pattern.
* @param[out] prefix If not NULL and the filename matches, the prefix
* @param[out] suffix If not NULL and the filename matches, the suffix
* (file extension) not including the compression suffix
* @param[out] time If not NULL and the filename matches, the time component
* @return The type of pattern match, or FILESET_NO_MATCH.
* */
extern fileset_match_t fileset_filename_match_pattern(const char *fname, char **prefix, char **suffix, char **time);
extern void fileset_add_dir(const char *fname, void *window);

View File

@ -121,14 +121,13 @@ void FilesetEntryModel::clear()
}
QString FilesetEntryModel::nameToDate(const char *name) const {
char *date;
QString dn;
if (!fileset_filename_match_pattern(name))
if (fileset_filename_match_pattern(name, NULL, NULL, &date) == FILESET_NO_MATCH)
return NULL;
dn = name;
dn.remove(QRegularExpression(".*_"));
dn.truncate(14);
dn = gchar_free_to_qstring(date);
dn.insert(4, '-');
dn.insert(7, '-');
dn.insert(10, ' ');