From 5844c469f0486bd1e360282dc292e85b8aa9bbec Mon Sep 17 00:00:00 2001 From: Erik Rigtorp Date: Thu, 19 Aug 2021 14:48:52 +0200 Subject: [PATCH] Add support for ZSTD and LZ4 decompression - Describe zstd and lz4 support in man pages - Update AUTHORS - Update mime types to recognize zstd and lz4 --- AUTHORS.src | 4 + README.md | 13 ++- doc/capinfos.pod | 2 +- doc/captype.pod | 2 +- doc/editcap.pod | 2 +- doc/mergecap.pod | 2 +- doc/reordercap.pod | 2 +- doc/tshark.pod | 4 +- org.wireshark.Wireshark-mime.xml | 38 ++++++++ wiretap/CMakeLists.txt | 4 + wiretap/file_wrappers.c | 162 ++++++++++++++++++++++++++++--- 11 files changed, 213 insertions(+), 22 deletions(-) diff --git a/AUTHORS.src b/AUTHORS.src index cd93d1f90a..b87e2b89aa 100644 --- a/AUTHORS.src +++ b/AUTHORS.src @@ -3738,6 +3738,10 @@ Jeffrey Nichols { Asphodel dissector } +Erik Rigtorp { + Support for reading ZSTD and LZ4 compressed files +} + and by: Georgi Guninski diff --git a/README.md b/README.md index eec9bf59a2..7c30528242 100644 --- a/README.md +++ b/README.md @@ -82,10 +82,15 @@ Wireshark can read packets from a number of different file types. See the Wireshark man page or the Wireshark User's Guide for a list of supported file formats. -Wireshark can transparently read gzipped versions of any of those files if -zlib was available when Wireshark was compiled. CMake will automatically -use zlib if it is found on your system. You can disable zlib support by -running `cmake -DENABLE_ZLIB=OFF`. +Wireshark can transparently read compressed versions of any of those files if +the required compression library was available when Wireshark was compiled. +Currently supported compression formats are: + +- GZIP +- ZSTD +- LZ4 + +You can disable zlib support by running `cmake -DENABLE_ZLIB=OFF`. Although Wireshark can read AIX iptrace files, the documentation on AIX's iptrace packet-trace command is sparse. The `iptrace` command diff --git a/doc/capinfos.pod b/doc/capinfos.pod index 0ae118bbef..762f1423aa 100644 --- a/doc/capinfos.pod +++ b/doc/capinfos.pod @@ -72,7 +72,7 @@ superseding or adding to earlier options. B is able to detect and read the same capture files that are supported by B. The input files don't need a specific filename extension; the file -format and an optional gzip compression will be automatically detected. +format and an optional gzip, zstd or lz4 compression will be automatically detected. Near the beginning of the DESCRIPTION section of wireshark(1) or L is a detailed description of the way B handles this, which is diff --git a/doc/captype.pod b/doc/captype.pod index ba1c086956..1ea31d2558 100644 --- a/doc/captype.pod +++ b/doc/captype.pod @@ -22,7 +22,7 @@ prints the capture file type of each EIE. B is able to detect and read the same capture files that are supported by B. The input files don't need a specific filename extension; the file -format and an optional gzip compression will be automatically detected. +format and an optional gzip, zstd or lz4 compression will be automatically detected. Near the beginning of the DESCRIPTION section of wireshark(1) or L is a detailed description of the way B handles this, which is diff --git a/doc/editcap.pod b/doc/editcap.pod index 7ce02115f9..edd4be7998 100644 --- a/doc/editcap.pod +++ b/doc/editcap.pod @@ -78,7 +78,7 @@ B can be used to assign comment strings to frame numbers. B is able to detect, read and write the same capture files that are supported by B. The input file doesn't need a specific filename extension; the file -format and an optional gzip compression will be automatically detected. +format and an optional gzip, zstd or lz4 compression will be automatically detected. Near the beginning of the DESCRIPTION section of wireshark(1) or L is a detailed description of the way B handles this, which is diff --git a/doc/mergecap.pod b/doc/mergecap.pod index 8ed8b1090a..12e5e40755 100644 --- a/doc/mergecap.pod +++ b/doc/mergecap.pod @@ -35,7 +35,7 @@ writes all of the packets from the input capture files to the output file. B is able to detect, read and write the same capture files that are supported by B. The input files don't need a specific filename extension; the file -format and an optional gzip compression will be automatically detected. +format and an optional gzip, zstd or lz4 compression will be automatically detected. Near the beginning of the DESCRIPTION section of wireshark(1) or L is a detailed description of the way B handles this, which is diff --git a/doc/reordercap.pod b/doc/reordercap.pod index 32b983af5e..5135acbc48 100644 --- a/doc/reordercap.pod +++ b/doc/reordercap.pod @@ -31,7 +31,7 @@ capture file. B is able to detect, read and write the same capture files that are supported by B. The input file doesn't need a specific filename extension; the file -format and an optional gzip compression will be detected automatically. +format and an optional gzip, zstd or lz4 compression will be automatically detected. Near the beginning of the DESCRIPTION section of wireshark(1) or L is a detailed description of the way B handles this, which is diff --git a/doc/tshark.pod b/doc/tshark.pod index e0059d43cf..74fcbb8ae2 100644 --- a/doc/tshark.pod +++ b/doc/tshark.pod @@ -42,7 +42,7 @@ from the file and displaying a summary line on the standard output for each packet read. B is able to detect, read and write the same capture files that are supported by B. The input file doesn't need a specific filename extension; the file format and an -optional gzip compression will be automatically detected. Near the +optional gzip, zstd or lz4 compression will be automatically detected. Near the beginning of the DESCRIPTION section of wireshark(1) or L is a detailed description of the way B handles this, which is the same way @@ -2086,7 +2086,7 @@ active. Sets the fatal log level. Fatal log levels cause the progeam to abort. The fatal log level be set to C or C. C is -always fatal. +always fatal. =item WIRESHARK_LOG_DOMAINS diff --git a/org.wireshark.Wireshark-mime.xml b/org.wireshark.Wireshark-mime.xml index 0b21080619..69e3e4bf2d 100644 --- a/org.wireshark.Wireshark-mime.xml +++ b/org.wireshark.Wireshark-mime.xml @@ -43,6 +43,8 @@ + + @@ -60,6 +62,10 @@ + + + + @@ -70,6 +76,8 @@ + + @@ -90,6 +98,8 @@ + + @@ -102,6 +112,10 @@ + + + + @@ -126,6 +140,14 @@ + + + + + + + + @@ -144,6 +166,8 @@ + + @@ -154,6 +178,8 @@ + + @@ -164,6 +190,8 @@ + + @@ -174,6 +202,8 @@ + + @@ -181,6 +211,8 @@ + + @@ -188,6 +220,8 @@ + + @@ -195,6 +229,8 @@ + + @@ -203,5 +239,7 @@ + + diff --git a/wiretap/CMakeLists.txt b/wiretap/CMakeLists.txt index f5054e773a..06dfb36831 100644 --- a/wiretap/CMakeLists.txt +++ b/wiretap/CMakeLists.txt @@ -183,11 +183,15 @@ target_link_libraries(wiretap wsutil PRIVATE ${ZLIB_LIBRARIES} + ${ZSTD_LIBRARIES} + ${LZ4_LIBRARIES} ) target_include_directories(wiretap SYSTEM PRIVATE ${ZLIB_INCLUDE_DIRS} + ${ZSTD_INCLUDE_DIRS} + ${LZ4_INCLUDE_DIRS} ) target_include_directories(wiretap PUBLIC diff --git a/wiretap/file_wrappers.c b/wiretap/file_wrappers.c index 0f5bb89318..8bf8062bb3 100644 --- a/wiretap/file_wrappers.c +++ b/wiretap/file_wrappers.c @@ -16,6 +16,7 @@ #include +#include #include #include #include "wtap-int.h" @@ -28,6 +29,14 @@ #include #endif /* HAVE_ZLIB */ +#ifdef HAVE_ZSTD +#include +#endif + +#ifdef HAVE_LZ4 +#include +#endif + /* * See RFC 1952: * @@ -112,7 +121,13 @@ typedef enum { UNCOMPRESSED, /* uncompressed - copy input directly */ #ifdef HAVE_ZLIB ZLIB, /* decompress a zlib stream */ - GZIP_AFTER_HEADER + GZIP_AFTER_HEADER, +#endif +#ifdef HAVE_ZSTD + ZSTD, +#endif +#ifdef HAVE_LZ4 + LZ4, #endif } compression_t; @@ -153,6 +168,12 @@ struct wtap_reader { /* fast seeking */ GPtrArray *fast_seek; void *fast_seek_cur; +#ifdef HAVE_ZSTD + ZSTD_DCtx *zstd_dctx; +#endif +#ifdef HAVE_LZ4 + LZ4F_dctx *lz4_dctx; +#endif }; /* Current read offset within a buffer. */ @@ -810,6 +831,42 @@ gz_head(FILE_T state) /* FD 37 7A 58 5A 00 */ #endif + if (state->in.avail >= 4 + && state->in.buf[0] == 0x28 && state->in.buf[1] == 0xb5 + && state->in.buf[2] == 0x2f && state->in.buf[3] == 0xfd) { +#ifdef HAVE_ZSTD + const size_t ret = ZSTD_DCtx_reset(state->zstd_dctx, ZSTD_reset_session_and_parameters); + if (ZSTD_isError(ret)) { + state->err = WTAP_ERR_DECOMPRESS; + state->err_info = ZSTD_getErrorName(ret); + return -1; + } + + state->compression = ZSTD; + state->is_compressed = TRUE; + return 0; +#else + state->err = WTAP_ERR_DECOMPRESSION_NOT_SUPPORTED; + state->err_info = "reading zstd-compressed files isn't supported"; + return -1; +#endif + } + + if (state->in.avail >= 4 + && state->in.buf[0] == 0x04 && state->in.buf[1] == 0x22 + && state->in.buf[2] == 0x4d && state->in.buf[3] == 0x18) { +#ifdef HAVE_LZ4 + LZ4F_resetDecompressionContext(state->lz4_dctx); + state->compression = LZ4; + state->is_compressed = TRUE; + return 0; +#else + state->err = WTAP_ERR_DECOMPRESSION_NOT_SUPPORTED; + state->err_info = "reading lz4-compressed files isn't supported"; + return -1; +#endif + } + if (state->fast_seek) fast_seek_header(state, state->raw_pos - state->in.avail - state->out.avail, state->pos, UNCOMPRESSED); @@ -849,6 +906,60 @@ fill_out_buffer(FILE_T state) else if (state->compression == ZLIB) { /* decompress */ zlib_read(state, state->out.buf, state->size << 1); } +#endif +#ifdef HAVE_ZSTD + else if (state->compression == ZSTD) { + assert(state->out.avail == 0); + + if (state->in.avail == 0 && fill_in_buffer(state) == -1) + return -1; + + ZSTD_outBuffer output = {state->out.buf, state->size << 1, 0}; + ZSTD_inBuffer input = {state->in.next, state->in.avail, 0}; + const size_t ret = ZSTD_decompressStream(state->zstd_dctx, &output, &input); + if (ZSTD_isError(ret)) { + state->err = WTAP_ERR_DECOMPRESS; + state->err_info = ZSTD_getErrorName(ret); + return -1; + } + + state->in.next = state->in.next + input.pos; + state->in.avail -= input.pos; + + state->out.next = output.dst; + state->out.avail = output.pos; + + if (ret == 0) { + state->compression = UNKNOWN; + } + } +#endif +#ifdef HAVE_LZ4 + else if (state->compression == LZ4) { + assert(state->out.avail == 0); + + if (state->in.avail == 0 && fill_in_buffer(state) == -1) + return -1; + + size_t outBufSize = state->size << 1; + size_t inBufSize = state->in.avail; + const size_t ret = LZ4F_decompress(state->lz4_dctx, state->out.buf, &outBufSize, state->in.next, &inBufSize, NULL); + if (LZ4F_isError(ret)) { + state->err = WTAP_ERR_DECOMPRESS; + state->err_info = LZ4F_getErrorName(ret); + return -1; + } + + state->in.next = state->in.next + inBufSize; + state->in.avail -= inBufSize; + + state->out.next = state->out.buf; + state->out.avail = outBufSize; + + if (ret == 0) { + state->compression = UNKNOWN; + } + } #endif return 0; } @@ -929,6 +1040,7 @@ file_fdopen(int fd) #endif int want = GZBUFSIZE; FILE_T state; + size_t ret; if (fd == -1) return NULL; @@ -969,6 +1081,11 @@ file_fdopen(int fd) /* XXX, verify result? */ } #endif +#ifdef HAVE_ZSTD + /* we should have separate input and output buf sizes */ + want = MAX(want, ZSTD_DStreamInSize()); + want = MAX(want, ZSTD_DStreamOutSize()); +#endif /* allocate buffers */ state->in.buf = (unsigned char *)g_try_malloc((gsize)want); @@ -979,11 +1096,7 @@ file_fdopen(int fd) state->out.avail = 0; state->size = want; if (state->in.buf == NULL || state->out.buf == NULL) { - g_free(state->out.buf); - g_free(state->in.buf); - g_free(state); - errno = ENOMEM; - return NULL; + goto err; } #ifdef HAVE_ZLIB @@ -994,18 +1107,45 @@ file_fdopen(int fd) state->strm.avail_in = 0; state->strm.next_in = Z_NULL; if (inflateInit2(&(state->strm), -15) != Z_OK) { /* raw inflate */ - g_free(state->out.buf); - g_free(state->in.buf); - g_free(state); - errno = ENOMEM; - return NULL; + goto err; } /* for now, assume we should check the crc */ state->dont_check_crc = FALSE; #endif + +#ifdef HAVE_ZSTD + state->zstd_dctx = ZSTD_createDCtx(); + if (state->zstd_dctx == NULL) { + goto err; + } +#endif + +#ifdef HAVE_LZ4 + ret = LZ4F_createDecompressionContext(&state->lz4_dctx, LZ4F_VERSION); + if (LZ4F_isError(ret)) { + goto err; + } +#endif + /* return stream */ return state; + +err: +#ifdef HAVE_ZLIB + inflateEnd(&state->strm); +#endif +#ifdef HAVE_ZSTD + ZSTD_freeDCtx(state->zstd_dctx); +#endif +#ifdef HAVE_LZ4 + LZ4F_freeDecompressionContext(state->lz4_dctx); +#endif + g_free(state->out.buf); + g_free(state->in.buf); + g_free(state); + errno = ENOMEM; + return NULL; } FILE_T