From 5844c469f0486bd1e360282dc292e85b8aa9bbec Mon Sep 17 00:00:00 2001
From: Erik Rigtorp <erik@rigtorp.se>
Date: Thu, 19 Aug 2021 14:48:52 +0200
Subject: [PATCH] Add support for ZSTD and LZ4 decompression

- Describe zstd and lz4 support in man pages
- Update AUTHORS
- Update mime types to recognize zstd and lz4
---
 AUTHORS.src                      |   4 +
 README.md                        |  13 ++-
 doc/capinfos.pod                 |   2 +-
 doc/captype.pod                  |   2 +-
 doc/editcap.pod                  |   2 +-
 doc/mergecap.pod                 |   2 +-
 doc/reordercap.pod               |   2 +-
 doc/tshark.pod                   |   4 +-
 org.wireshark.Wireshark-mime.xml |  38 ++++++++
 wiretap/CMakeLists.txt           |   4 +
 wiretap/file_wrappers.c          | 162 ++++++++++++++++++++++++++++---
 11 files changed, 213 insertions(+), 22 deletions(-)
diff --git a/AUTHORS.src b/AUTHORS.src
index cd93d1f90a..b87e2b89aa 100644
--- a/AUTHORS.src
+++ b/AUTHORS.src
@@ -3738,6 +3738,10 @@ Jeffrey Nichols		<jsnichols[AT]suprocktech.com> {
 	Asphodel dissector
 }
 
+Erik Rigtorp		<erik[AT]rigtorp.se> {
+	Support for reading ZSTD and LZ4 compressed files
+}
+
 and by:
 
 Georgi Guninski		<guninski[AT]guninski.com>
diff --git a/README.md b/README.md
index eec9bf59a2..7c30528242 100644
--- a/README.md
+++ b/README.md
@@ -82,10 +82,15 @@ Wireshark can read packets from a number of different file types.  See
 the Wireshark man page or the Wireshark User's Guide for a list of
 supported file formats.
 
-Wireshark can transparently read gzipped versions of any of those files if
-zlib was available when Wireshark was compiled.  CMake will automatically
-use zlib if it is found on your system.  You can disable zlib support by
-running `cmake -DENABLE_ZLIB=OFF`.
+Wireshark can transparently read compressed versions of any of those files if
+the required compression library was available when Wireshark was compiled.
+Currently supported compression formats are:
+
+- GZIP
+- ZSTD
+- LZ4
+
+You can disable zlib support by running `cmake -DENABLE_ZLIB=OFF`.
 
 Although Wireshark can read AIX iptrace files, the documentation on
 AIX's iptrace packet-trace command is sparse.  The `iptrace` command
diff --git a/doc/capinfos.pod b/doc/capinfos.pod
index 0ae118bbef..762f1423aa 100644
--- a/doc/capinfos.pod
+++ b/doc/capinfos.pod
@@ -72,7 +72,7 @@ superseding or adding to earlier options.
 B<Capinfos> is able to detect and read the same capture files that are
 supported by B<Wireshark>.
 The input files don't need a specific filename extension; the file
-format and an optional gzip compression will be automatically detected.
+format and an optional gzip, zstd or lz4 compression will be automatically detected.
 Near the beginning of the DESCRIPTION section of wireshark(1) or
 L<https://www.wireshark.org/docs/man-pages/wireshark.html>
 is a detailed description of the way B<Wireshark> handles this, which is
diff --git a/doc/captype.pod b/doc/captype.pod
index ba1c086956..1ea31d2558 100644
--- a/doc/captype.pod
+++ b/doc/captype.pod
@@ -22,7 +22,7 @@ prints the capture file type of each E<lt>I<infile>E<gt>.
 B<Captype> is able to detect and read the same capture files that are
 supported by B<Wireshark>.
 The input files don't need a specific filename extension; the file
-format and an optional gzip compression will be automatically detected.
+format and an optional gzip, zstd or lz4 compression will be automatically detected.
 Near the beginning of the DESCRIPTION section of wireshark(1) or
 L<https://www.wireshark.org/docs/man-pages/wireshark.html>
 is a detailed description of the way B<Wireshark> handles this, which is
diff --git a/doc/editcap.pod b/doc/editcap.pod
index 7ce02115f9..edd4be7998 100644
--- a/doc/editcap.pod
+++ b/doc/editcap.pod
@@ -78,7 +78,7 @@ B<Editcap> can be used to assign comment strings to frame numbers.
 B<Editcap> is able to detect, read and write the same capture files that
 are supported by B<Wireshark>.
 The input file doesn't need a specific filename extension; the file
-format and an optional gzip compression will be automatically detected.
+format and an optional gzip, zstd or lz4 compression will be automatically detected.
 Near the beginning of the DESCRIPTION section of wireshark(1) or
 L<https://www.wireshark.org/docs/man-pages/wireshark.html>
 is a detailed description of the way B<Wireshark> handles this, which is
diff --git a/doc/mergecap.pod b/doc/mergecap.pod
index 8ed8b1090a..12e5e40755 100644
--- a/doc/mergecap.pod
+++ b/doc/mergecap.pod
@@ -35,7 +35,7 @@ writes all of the packets from the input capture files to the output file.
 B<Mergecap> is able to detect, read and write the same capture files that
 are supported by B<Wireshark>.
 The input files don't need a specific filename extension; the file
-format and an optional gzip compression will be automatically detected.
+format and an optional gzip, zstd or lz4 compression will be automatically detected.
 Near the beginning of the DESCRIPTION section of wireshark(1) or
 L<https://www.wireshark.org/docs/man-pages/wireshark.html>
 is a detailed description of the way B<Wireshark> handles this, which is
diff --git a/doc/reordercap.pod b/doc/reordercap.pod
index 32b983af5e..5135acbc48 100644
--- a/doc/reordercap.pod
+++ b/doc/reordercap.pod
@@ -31,7 +31,7 @@ capture file.
 B<Reordercap> is able to detect, read and write the same capture files that
 are supported by B<Wireshark>.
 The input file doesn't need a specific filename extension; the file
-format and an optional gzip compression will be detected automatically.
+format and an optional gzip, zstd or lz4 compression will be automatically detected.
 Near the beginning of the DESCRIPTION section of wireshark(1) or
 L<https://www.wireshark.org/docs/man-pages/wireshark.html>
 is a detailed description of the way B<Wireshark> handles this, which is
diff --git a/doc/tshark.pod b/doc/tshark.pod
index e0059d43cf..74fcbb8ae2 100644
--- a/doc/tshark.pod
+++ b/doc/tshark.pod
@@ -42,7 +42,7 @@ from the file and displaying a summary line on the standard output for
 each packet read.  B<TShark> is able to detect, read and write the same
 capture files that are supported by B<Wireshark>.  The input file
 doesn't need a specific filename extension; the file format and an
-optional gzip compression will be automatically detected.  Near the
+optional gzip, zstd or lz4 compression will be automatically detected.  Near the
 beginning of the DESCRIPTION section of wireshark(1) or
 L<https://www.wireshark.org/docs/man-pages/wireshark.html> is a detailed
 description of the way B<Wireshark> handles this, which is the same way
@@ -2086,7 +2086,7 @@ active.
 
 Sets the fatal log level. Fatal log levels cause the progeam to abort.
 The fatal log level be set to C<critical> or C<warning>. C<Error> is
-always fatal. 
+always fatal.
 
 =item WIRESHARK_LOG_DOMAINS
 
diff --git a/org.wireshark.Wireshark-mime.xml b/org.wireshark.Wireshark-mime.xml
index 0b21080619..69e3e4bf2d 100644
--- a/org.wireshark.Wireshark-mime.xml
+++ b/org.wireshark.Wireshark-mime.xml
@@ -43,6 +43,8 @@
     </magic>
     <glob pattern="*.pcap"/>
     <glob pattern="*.pcap.gz"/>
+    <glob pattern="*.pcap.zst"/>
+    <glob pattern="*.pcap.lz4"/>
   </mime-type>
 
   <mime-type type="application/x-pcapng">
@@ -60,6 +62,10 @@
     <glob pattern="*.ntar"/>
     <glob pattern="*.pcapng.gz"/>
     <glob pattern="*.ntar.gz"/>
+    <glob pattern="*.pcapng.zst"/>
+    <glob pattern="*.ntar.zst"/>
+    <glob pattern="*.pcapng.lz4"/>
+    <glob pattern="*.ntar.lz4"/>
   </mime-type>
 
   <mime-type type="application/x-snoop">
@@ -70,6 +76,8 @@
     </magic>
     <glob pattern="*.snoop"/>
     <glob pattern="*.snoop.gz"/>
+    <glob pattern="*.snoop.zst"/>
+    <glob pattern="*.snoop.lz4"/>
   </mime-type>
 
   <mime-type type="application/x-iptrace">
@@ -90,6 +98,8 @@
     </magic>
     <glob pattern="*.tr1"/>
     <glob pattern="*.tr1.gz"/>
+    <glob pattern="*.tr1.zst"/>
+    <glob pattern="*.tr1.lz4"/>
   </mime-type>
 
   <mime-type type="application/x-nettl">
@@ -102,6 +112,10 @@
     <glob pattern="*.trc1"/>
     <glob pattern="*.trc0.gz"/>
     <glob pattern="*.trc1.gz"/>
+    <glob pattern="*.trc0.zst"/>
+    <glob pattern="*.trc1.zst"/>
+    <glob pattern="*.trc0.lz4"/>
+    <glob pattern="*.trc1.lz4"/>
   </mime-type>
 
   <mime-type type="application/x-radcom">
@@ -126,6 +140,14 @@
     <glob pattern="*.pkt.gz"/>
     <glob pattern="*.tpc.gz"/>
     <glob pattern="*.wpz.gz"/>
+    <glob pattern="*.apc.zst"/>
+    <glob pattern="*.pkt.zst"/>
+    <glob pattern="*.tpc.zst"/>
+    <glob pattern="*.wpz.zst"/>
+    <glob pattern="*.apc.lz4"/>
+    <glob pattern="*.pkt.lz4"/>
+    <glob pattern="*.tpc.lz4"/>
+    <glob pattern="*.wpz.lz4"/>
   </mime-type>
 
   <mime-type type="application/x-visualnetworks">
@@ -144,6 +166,8 @@
     </magic>
     <glob pattern="*.bfr"/>
     <glob pattern="*.bfr.gz"/>
+    <glob pattern="*.bfr.zst"/>
+    <glob pattern="*.bfr.lz4"/>
   </mime-type>
 
   <mime-type type="application/x-5view">
@@ -154,6 +178,8 @@
     </magic>
     <glob pattern="*.5vw"/>
     <glob pattern="*.5vw.gz"/>
+    <glob pattern="*.5vw.zst"/>
+    <glob pattern="*.5vw.lz4"/>
   </mime-type>
 
   <mime-type type="application/x-tektronix-rf5">
@@ -164,6 +190,8 @@
     </magic>
     <glob pattern="*.rf5"/>
     <glob pattern="*.rf5.gz"/>
+    <glob pattern="*.rf5.zst"/>
+    <glob pattern="*.rf5.lz4"/>
   </mime-type>
 
   <mime-type type="application/x-micropross-mplog">
@@ -174,6 +202,8 @@
     </magic>
     <glob pattern="*.mplog"/>
     <glob pattern="*.mplog.gz"/>
+    <glob pattern="*.mplog.zst"/>
+    <glob pattern="*.mplog.lz4"/>
   </mime-type>
 
   <mime-type type="application/x-apple-packetlogger">
@@ -181,6 +211,8 @@
     <generic-icon name="org.wireshark.Wireshark-mimetype"/>
     <glob pattern="*.pklg"/>
     <glob pattern="*.pklg.gz"/>
+    <glob pattern="*.pklg.zst"/>
+    <glob pattern="*.pklg.lz4"/>
   </mime-type>
 
   <mime-type type="application/x-endace-erf">
@@ -188,6 +220,8 @@
     <generic-icon name="org.wireshark.Wireshark-mimetype"/>
     <glob pattern="*.erf"/>
     <glob pattern="*.erf.gz"/>
+    <glob pattern="*.erf.zst"/>
+    <glob pattern="*.erf.lz4"/>
   </mime-type>
 
   <mime-type type="application/ipfix">
@@ -195,6 +229,8 @@
     <generic-icon name="org.wireshark.Wireshark-mimetype"/>
     <glob pattern="*.ipfix"/>
     <glob pattern="*.ipfix.gz"/>
+    <glob pattern="*.ipfix.zst"/>
+    <glob pattern="*.ipfix.lz4"/>
     <!-- Don't register for .pfx: that extension has another (more common) use -->
   </mime-type>
 
@@ -203,5 +239,7 @@
     <generic-icon name="org.wireshark.Wireshark-mimetype"/>
     <glob pattern="*.vwr"/>
     <glob pattern="*.vwr.gz"/>
+    <glob pattern="*.vwr.zst"/>
+    <glob pattern="*.vwr.lz4"/>
   </mime-type>
 </mime-info>
diff --git a/wiretap/CMakeLists.txt b/wiretap/CMakeLists.txt
index f5054e773a..06dfb36831 100644
--- a/wiretap/CMakeLists.txt
+++ b/wiretap/CMakeLists.txt
@@ -183,11 +183,15 @@ target_link_libraries(wiretap
 		wsutil
 	PRIVATE
 		${ZLIB_LIBRARIES}
+		${ZSTD_LIBRARIES}
+		${LZ4_LIBRARIES}
 )
 
 target_include_directories(wiretap SYSTEM
 	PRIVATE
 		${ZLIB_INCLUDE_DIRS}
+		${ZSTD_INCLUDE_DIRS}
+		${LZ4_INCLUDE_DIRS}
 )
 
 target_include_directories(wiretap PUBLIC
diff --git a/wiretap/file_wrappers.c b/wiretap/file_wrappers.c
index 0f5bb89318..8bf8062bb3 100644
--- a/wiretap/file_wrappers.c
+++ b/wiretap/file_wrappers.c
@@ -16,6 +16,7 @@
 
 #include <config.h>
 
+#include <assert.h>
 #include <errno.h>
 #include <string.h>
 #include "wtap-int.h"
@@ -28,6 +29,14 @@
 #include <zlib.h>
 #endif /* HAVE_ZLIB */
 
+#ifdef HAVE_ZSTD
+#include <zstd.h>
+#endif
+
+#ifdef HAVE_LZ4
+#include <lz4frame.h>
+#endif
+
 /*
  * See RFC 1952:
  *
@@ -112,7 +121,13 @@ typedef enum {
     UNCOMPRESSED,  /* uncompressed - copy input directly */
 #ifdef HAVE_ZLIB
     ZLIB,          /* decompress a zlib stream */
-    GZIP_AFTER_HEADER
+    GZIP_AFTER_HEADER,
+#endif
+#ifdef HAVE_ZSTD
+    ZSTD,
+#endif
+#ifdef HAVE_LZ4
+    LZ4,
 #endif
 } compression_t;
 
@@ -153,6 +168,12 @@ struct wtap_reader {
     /* fast seeking */
     GPtrArray *fast_seek;
     void *fast_seek_cur;
+#ifdef HAVE_ZSTD
+    ZSTD_DCtx *zstd_dctx;
+#endif
+#ifdef HAVE_LZ4
+    LZ4F_dctx *lz4_dctx;
+#endif
 };
 
 /* Current read offset within a buffer. */
@@ -810,6 +831,42 @@ gz_head(FILE_T state)
     /* FD 37 7A 58 5A 00 */
 #endif
 
+    if (state->in.avail >= 4
+        && state->in.buf[0] == 0x28 && state->in.buf[1] == 0xb5
+        && state->in.buf[2] == 0x2f && state->in.buf[3] == 0xfd) {
+#ifdef HAVE_ZSTD
+        const size_t ret = ZSTD_DCtx_reset(state->zstd_dctx, ZSTD_reset_session_and_parameters);
+        if (ZSTD_isError(ret)) {
+            state->err = WTAP_ERR_DECOMPRESS;
+            state->err_info = ZSTD_getErrorName(ret);
+            return -1;
+        }
+
+        state->compression = ZSTD;
+        state->is_compressed = TRUE;
+        return 0;
+#else
+        state->err = WTAP_ERR_DECOMPRESSION_NOT_SUPPORTED;
+        state->err_info = "reading zstd-compressed files isn't supported";
+        return -1;
+#endif
+    }
+
+    if (state->in.avail >= 4
+        && state->in.buf[0] == 0x04 && state->in.buf[1] == 0x22
+        && state->in.buf[2] == 0x4d && state->in.buf[3] == 0x18) {
+#ifdef HAVE_LZ4
+        LZ4F_resetDecompressionContext(state->lz4_dctx);
+        state->compression = LZ4;
+        state->is_compressed = TRUE;
+        return 0;
+#else
+        state->err = WTAP_ERR_DECOMPRESSION_NOT_SUPPORTED;
+        state->err_info = "reading lz4-compressed files isn't supported";
+        return -1;
+#endif
+    }
+
     if (state->fast_seek)
         fast_seek_header(state, state->raw_pos - state->in.avail - state->out.avail, state->pos, UNCOMPRESSED);
 
@@ -849,6 +906,60 @@ fill_out_buffer(FILE_T state)
     else if (state->compression == ZLIB) {      /* decompress */
         zlib_read(state, state->out.buf, state->size << 1);
     }
+#endif
+#ifdef HAVE_ZSTD
+    else if (state->compression == ZSTD) {
+        assert(state->out.avail == 0);
+
+        if (state->in.avail == 0 && fill_in_buffer(state) == -1)
+            return -1;
+
+        ZSTD_outBuffer output = {state->out.buf, state->size << 1, 0};
+        ZSTD_inBuffer input = {state->in.next, state->in.avail, 0};
+        const size_t ret = ZSTD_decompressStream(state->zstd_dctx, &output, &input);
+        if (ZSTD_isError(ret)) {
+            state->err = WTAP_ERR_DECOMPRESS;
+            state->err_info = ZSTD_getErrorName(ret);
+            return -1;
+        }
+
+        state->in.next = state->in.next + input.pos;
+        state->in.avail -= input.pos;
+
+        state->out.next = output.dst;
+        state->out.avail = output.pos;
+
+        if (ret == 0) {
+            state->compression = UNKNOWN;
+        }
+    }
+#endif
+#ifdef HAVE_LZ4
+    else if (state->compression == LZ4) {
+        assert(state->out.avail == 0);
+
+        if (state->in.avail == 0 && fill_in_buffer(state) == -1)
+            return -1;
+
+        size_t outBufSize = state->size << 1;
+        size_t inBufSize = state->in.avail;
+        const size_t ret = LZ4F_decompress(state->lz4_dctx, state->out.buf, &outBufSize, state->in.next, &inBufSize, NULL);
+        if (LZ4F_isError(ret)) {
+            state->err = WTAP_ERR_DECOMPRESS;
+            state->err_info = LZ4F_getErrorName(ret);
+            return -1;
+        }
+
+        state->in.next = state->in.next + inBufSize;
+        state->in.avail -= inBufSize;
+
+        state->out.next = state->out.buf;
+        state->out.avail = outBufSize;
+
+        if (ret == 0) {
+            state->compression = UNKNOWN;
+        }
+    }
 #endif
     return 0;
 }
@@ -929,6 +1040,7 @@ file_fdopen(int fd)
 #endif
     int want = GZBUFSIZE;
     FILE_T state;
+    size_t ret;
 
     if (fd == -1)
         return NULL;
@@ -969,6 +1081,11 @@ file_fdopen(int fd)
         /* XXX, verify result? */
     }
 #endif
+#ifdef HAVE_ZSTD
+    /* we should have separate input and output buf sizes */
+    want = MAX(want, ZSTD_DStreamInSize());
+    want = MAX(want, ZSTD_DStreamOutSize());
+#endif
 
     /* allocate buffers */
     state->in.buf = (unsigned char *)g_try_malloc((gsize)want);
@@ -979,11 +1096,7 @@ file_fdopen(int fd)
     state->out.avail = 0;
     state->size = want;
     if (state->in.buf == NULL || state->out.buf == NULL) {
-        g_free(state->out.buf);
-        g_free(state->in.buf);
-        g_free(state);
-        errno = ENOMEM;
-        return NULL;
+       goto err;
     }
 
 #ifdef HAVE_ZLIB
@@ -994,18 +1107,45 @@ file_fdopen(int fd)
     state->strm.avail_in = 0;
     state->strm.next_in = Z_NULL;
     if (inflateInit2(&(state->strm), -15) != Z_OK) {    /* raw inflate */
-        g_free(state->out.buf);
-        g_free(state->in.buf);
-        g_free(state);
-        errno = ENOMEM;
-        return NULL;
+        goto err;
     }
 
     /* for now, assume we should check the crc */
     state->dont_check_crc = FALSE;
 #endif
+
+#ifdef HAVE_ZSTD
+    state->zstd_dctx = ZSTD_createDCtx();
+    if (state->zstd_dctx == NULL) {
+        goto err;
+    }
+#endif
+
+#ifdef HAVE_LZ4
+    ret = LZ4F_createDecompressionContext(&state->lz4_dctx, LZ4F_VERSION);
+    if (LZ4F_isError(ret)) {
+        goto err;
+    }
+#endif
+
     /* return stream */
     return state;
+
+err:
+#ifdef HAVE_ZLIB
+    inflateEnd(&state->strm);
+#endif
+#ifdef HAVE_ZSTD
+    ZSTD_freeDCtx(state->zstd_dctx);
+#endif
+#ifdef HAVE_LZ4
+    LZ4F_freeDecompressionContext(state->lz4_dctx);
+#endif
+    g_free(state->out.buf);
+    g_free(state->in.buf);
+    g_free(state);
+    errno = ENOMEM;
+    return NULL;
 }
 
 FILE_T