File sets: Match both formats, compressed files

For file sets produced by multiple file captures, match both the number before time and the newer time before number format. Distinguish them in the return value, since files of different formats are not part of the same set. Also handle files with a compression suffix as well, as we can produce that in captures. Since in multi file captures compression is done when switching files, allow file sets to have a mixture of files compressed and uncompressed. When doing a multi file capture and compressing, the last file is not compressed. Add information to the user guide and release notes Related to #12371
2024-01-26 20:54:21 -05:00 · 2024-01-26 20:54:21 -05:00 · 2b5fd572f7
parent a8aebe5cf4
commit 2b5fd572f7
5 changed files with 148 additions and 83 deletions
--- a/doc/release-notes.adoc
+++ b/doc/release-notes.adoc
@ -122,6 +122,13 @@ The following features are new (or have been significantly updated) since versio

 * Truncated fields in the detail view are now displayed as "Field name […]: data" instead of "Field name [truncated]: data"

+* When capturing files in multiple file mode, a pattern that places the date and time
+  before the index number can be used (e.g., foo_20240714110102_00001.pcap instead of
+  foo_00001_20240714110102.pcap). This causes filenames to sort in chronological order
+  across file sets from different captures. The File Set dialog has been updated to
+  handle the new pattern, which has been capable of being produced by tshark since
+  version 3.6.0
+
 === Removed Features and Support

 * The tshark `-G` option with no argument is deprecated and will be removed in
--- a/docbook/wsug_src/wsug_io.adoc
+++ b/docbook/wsug_src/wsug_io.adoc
@ -703,10 +703,13 @@ some features to handle these file sets in a convenient way.

 .How does Wireshark detect the files of a file set?
 ****
-A filename in a file set uses the format Prefix_Number_DateTimeSuffix which
-might look something like `test_00001_20240714183910.pcap`. All files of a file
+A filename in a file set uses the format Prefix_Number_DateTimeSuffix (or,
+in Wireshark 4.4.0 and later, Prefix_DateTime_NumberSuffix) which might
+look something like `test_00001_20240714183910.pcap`. All files of a file
 set share the same prefix (e.g., “test”) and suffix (e.g., “.pcap”) and a
-varying middle part.
+varying middle part. Files are also allowed to have a second compression
+suffix of types that Wireshark can open; the compression suffix does not
+have to match for all files in a set.

 To find the files of a file set, Wireshark scans the directory where the
 currently loaded file resides and checks for files matching the filename pattern
--- a/fileset.c
+++ b/fileset.c
@ -23,6 +23,8 @@
 #include <wsutil/filesystem.h>
 #include <wsutil/ws_assert.h>

+#include <wiretap/wtap.h>
+
 #include <epan/strutil.h>

 #include "fileset.h"
@ -58,104 +60,141 @@ static fileset set = { NULL, NULL};
 #endif /* _WIN32 */

 /* is this a probable file of a file set (does the naming pattern match)? */
-gboolean
-fileset_filename_match_pattern(const char *fname)
+fileset_match_t
+fileset_filename_match_pattern(const char *fname, char **prefix, char **suffix, char **time)
 {
-    char        *pfx;
-    size_t       baselen;
-    size_t      minlen = strlen("_00001_20050418010750");
+    char        *sfx;
    char        *filename;
+    fileset_match_t ret = FILESET_NO_MATCH;
+    static char *pattern = "(?P<prefix>.*)_\\d{5}_(?P<time>\\d{14})$";
+    static char *pattern2 = "(?P<prefix>.*)_(?P<time>\\d{14})_\\d{5}$";
+    static GRegex *regex = NULL;
+    static GRegex *regex2 = NULL;

+    if (regex == NULL) {
+        GError *gerr = NULL;
+        regex = g_regex_new(pattern,
+                        (GRegexCompileFlags)(G_REGEX_OPTIMIZE | G_REGEX_ANCHORED),
+                        G_REGEX_MATCH_ANCHORED, NULL);
+        if (gerr) {
+                ws_warning("failed to compile regex: %s", gerr->message);
+                g_error_free(gerr);
+                regex = NULL;
+                return ret;
+        }
+    }
+
+    if (regex2 == NULL) {
+        GError *gerr = NULL;
+        regex2 = g_regex_new(pattern2,
+                        (GRegexCompileFlags)(G_REGEX_OPTIMIZE | G_REGEX_ANCHORED),
+                        G_REGEX_MATCH_ANCHORED, NULL);
+        if (gerr) {
+                ws_warning("failed to compile regex: %s", gerr->message);
+                g_error_free(gerr);
+                regex2 = NULL;
+                return ret;
+        }
+    }

    /* d:\dir1\test_00001_20050418010750.cap */
-    filename = g_strdup(get_basename(fname));
+    filename = g_path_get_basename(fname);

    /* test_00001_20050418010750.cap */
-    pfx = strrchr(filename, '.');
-    if(pfx == NULL) {  /* suffix is optional */
-        pfx = filename + strlen(filename);
-    }
-    /* test_00001_20050418010750 */
-    *pfx = '\0';
-
-    /* filename long enough? */
-    baselen = strlen(filename);
-    if(baselen < minlen) {
-        g_free(filename);
-        return FALSE;
-    }
-
-    /* there must be two underscores at special places */
-    if(filename[baselen-minlen] != '_' || filename[baselen-minlen+6] != '_') {
-        g_free(filename);
-        return FALSE;
-    }
-
-    /* replace the two underscores by digits */
-    filename[baselen-minlen] = '0';
-    filename[baselen-minlen+6] = '0';
-
-    /* we should have only digits now */
-    while(minlen--) {
-        baselen--;
-
-        if(!g_ascii_isdigit( filename[baselen])) {
-            g_free(filename);
-            return FALSE;
+    sfx = strrchr(filename, '.');
+    if (sfx != NULL) {
+        *sfx = '\0';
+        GSList *compression_type_extensions = wtap_get_all_compression_type_extensions_list();
+        char *ext = g_ascii_strdown(sfx + 1, -1);
+        for (GSList *compression_extension = compression_type_extensions;
+                compression_extension != NULL;
+                compression_extension = g_slist_next(compression_extension)) {
+            if (g_strcmp0(ext, (const char*)compression_extension->data) == 0) {
+                sfx = strrchr(filename, '.');
+                if (sfx != NULL) {
+                    *sfx = '\0';
+                }
+                break;
+            }
        }
+        g_free(ext);
+        g_slist_free(compression_type_extensions);
+    } else { /* suffix is optional */
+        sfx = filename + strlen(filename);
+    }
+
+    /* test_00001_20050418010750 */
+
+    GMatchInfo *match_info;
+    g_regex_match(regex, filename, 0, &match_info);
+    if (g_match_info_matches(match_info)) {
+        if (prefix) {
+            *prefix = g_match_info_fetch_named(match_info, "prefix");
+        }
+        if (time) {
+            *time = g_match_info_fetch_named(match_info, "time");
+        }
+        if (suffix) {
+            *suffix = g_strdup(sfx);
+        }
+        ret = FILESET_NUM_TIME;
+    }
+    g_match_info_free(match_info);
+
+    if (ret == FILESET_NO_MATCH) {
+        g_regex_match(regex2, filename, 0, &match_info);
+        if (g_match_info_matches(match_info)) {
+            if (prefix) {
+                *prefix = g_match_info_fetch_named(match_info, "prefix");
+            }
+            if (time) {
+                *time = g_match_info_fetch_named(match_info, "time");
+            }
+            if (suffix) {
+                *suffix = g_strdup(sfx);
+            }
+            ret = FILESET_TIME_NUM;
+        }
+        g_match_info_free(match_info);
    }

    g_free(filename);

-    /* ok, seems to be good */
-    return TRUE;
+    return ret;
 }


-/* test, if both files could be in the same file set */
-/* (the filenames must already be in correct shape) */
+/* test if both files could be in the same file set */
+/* (fname2 must already be in correct shape) */
 static gboolean
 fileset_is_file_in_set(const char *fname1, const char *fname2)
 {
    char        *pfx1;
    char        *pfx2;
-    char        *dup_f1;
-    char        *dup_f2;
-    size_t       minlen = strlen("_00001_20050418010750");
+    char        *sfx1;
+    char        *sfx2;
+    fileset_match_t match1;
+    fileset_match_t match2;
+    gboolean    ret = FALSE;

+    match1 = fileset_filename_match_pattern(fname1, &pfx1, &sfx1, NULL);
+    if (match1 == FILESET_NO_MATCH) {
+        return FALSE;
+    }

+    match2 = fileset_filename_match_pattern(fname2, &pfx2, &sfx2, NULL);
    /* just to be sure ... */
-    ws_assert(fileset_filename_match_pattern(fname1));
-    ws_assert(fileset_filename_match_pattern(fname2));
-
-    dup_f1 = g_strdup(fname1);
-    dup_f2 = g_strdup(fname2);
-
-    pfx1 = strrchr(dup_f1, '.');
-    pfx2 = strrchr(dup_f2, '.');
-    /* suffix is optional */
-    if (!pfx1) pfx1 = dup_f1 + strlen(dup_f1);
-    if (!pfx2) pfx2 = dup_f2 + strlen(dup_f2);
-
-    /* the optional suffix (file extension) must be equal */
-    if(strcmp(pfx1, pfx2) != 0) {
-        g_free(dup_f1);
-        g_free(dup_f2);
-        return FALSE;
+    ws_assert(match2 != FILESET_NO_MATCH);
+    if (match1 == match2 && g_strcmp0(pfx1, pfx2) == 0 && g_strcmp0(sfx1, sfx2) == 0) {
+        ret = TRUE;
    }

-    *(pfx1-minlen) = '\0';
-    *(pfx2-minlen) = '\0';
+    g_free(pfx1);
+    g_free(pfx2);
+    g_free(sfx1);
+    g_free(sfx2);

-    if(strcmp(dup_f1, dup_f2) != 0) {
-        g_free(dup_f1);
-        g_free(dup_f2);
-        return FALSE;
-    }
-
-    g_free(dup_f1);
-    g_free(dup_f2);
-    return TRUE;
+    return ret;
 }

 /* GCompareFunc helper for g_list_find_custom() */
@ -292,12 +331,12 @@ fileset_add_dir(const char *fname, void *window)
    dirname = g_string_append_c(dirname, G_DIR_SEPARATOR);

    /* is the current file probably a part of any fileset? */
-    if(fileset_filename_match_pattern(fname)) {
+    if(fileset_filename_match_pattern(fname, NULL, NULL, NULL)) {
        /* yes, go through the files in the directory and check if the file in question is part of the current file set */
        if ((dir = ws_dir_open(dirname->str, 0, NULL)) != NULL) {
            while ((file = ws_dir_read_name(dir)) != NULL) {
                name = ws_dir_get_name(file);
-                if(fileset_filename_match_pattern(name) && fileset_is_file_in_set(name, get_basename(fname))) {
+                if(fileset_is_file_in_set(name, get_basename(fname))) {
                    fileset_add_file(dirname->str, name, strcmp(name, get_basename(fname))== 0 /* current */);
                }
            } /* while */
--- a/fileset.h
+++ b/fileset.h
@ -25,9 +25,26 @@ typedef struct _fileset_entry {
    gboolean current;        /* is this the currently loaded file? */
 } fileset_entry;

+typedef enum {
+    FILESET_NO_MATCH,
+    FILESET_TIME_NUM,
+    FILESET_NUM_TIME
+} fileset_match_t;

-/* helper: is this a probable file of a file set (does the naming pattern match)? */
-extern gboolean fileset_filename_match_pattern(const char *fname);
+/* helper: is this a probable file of a file set (does the naming pattern match)?
+ * Possible naming patterns are prefix_NNNNN_YYYYMMDDHHMMSS.ext[.gz] and
+ * prefix_YYYYMMDDHHMMSS_NNNNN.ext[.gz], where any compression suffix
+ * supported by libwiretap is allowed. The validation is minimal; e.g., the
+ * time is only checked to see if all 14 characters are digits.
+ *
+ * @param[in] fname The filename to check for a naming pattern.
+ * @param[out] prefix If not NULL and the filename matches, the prefix
+ * @param[out] suffix If not NULL and the filename matches, the suffix
+ * (file extension) not including the compression suffix
+ * @param[out] time If not NULL and the filename matches, the time component
+ * @return The type of pattern match, or FILESET_NO_MATCH.
+ * */
+extern fileset_match_t fileset_filename_match_pattern(const char *fname, char **prefix, char **suffix, char **time);

 extern void fileset_add_dir(const char *fname, void *window);

--- a/ui/qt/models/fileset_entry_model.cpp
+++ b/ui/qt/models/fileset_entry_model.cpp
@ -121,14 +121,13 @@ void FilesetEntryModel::clear()
 }

 QString FilesetEntryModel::nameToDate(const char *name) const {
+    char *date;
    QString dn;

-    if (!fileset_filename_match_pattern(name))
+    if (fileset_filename_match_pattern(name, NULL, NULL, &date) == FILESET_NO_MATCH)
        return NULL;

-    dn = name;
-    dn.remove(QRegularExpression(".*_"));
-    dn.truncate(14);
+    dn = gchar_free_to_qstring(date);
    dn.insert(4, '-');
    dn.insert(7, '-');
    dn.insert(10, ' ');