wireshark/epan/tvbuff_lz77.c

158 lines
3.7 KiB
C
Raw Normal View History

smb2: add support for decompression The latest iteration of Microsoft updates to SMB3 added compression to the protocol. This commit implements decompressing and dissecting compressed payloads. The compression algorithms that can be used are "Plain LZ77", "LZ77+Huffman" and "LZNT1" which you can read more about in the [MS-XCA] documentation. This set of algorithm is sometimes referred to as XPRESS. This commit reuses the existing uncompression API scheme already in place with zlib and brotli and adds 3 tvb_uncompress_*() function implemented in: * epan/tvbuff_lz77.c * epan/tvbuff_lz77huff.c * epan/tvbuff_lznt1.c A new function wmem_array_try_index() was added to the wmem_array API to make bound checked reads that fail gracefully. New tests for it have been added as well. Since both reads (tvb) and writes (wmem_array) are bound checked the risk for buffer overruns is drastically reduced. LZ77+Huffman has decoding tables and special care was taken to bound check these. Simplified versions of the implementations were succesfully tested against AFL (American Fuzzy Lop) for ~150 millions executions each. The SMB2/3 dissector was changed to deal with the new transform header for compressed packets (new protocol_id value) and READ request flags (COMPRESSED). Badly compressed or encrypted packets are now reported as such, and the decryption test suite was changed to reflect that. This commit also adds a test capture with 1 packet compressed with each algorithm as returned by Windows Server 2019, along with 3 matching tests in test/suite_dissection.py Change-Id: I2b84f56541f2f4ee7d886152794b993987dd10e7 Reviewed-on: https://code.wireshark.org/review/33855 Petri-Dish: Anders Broman <a.broman58@gmail.com> Tested-by: Petri Dish Buildbot Reviewed-by: Peter Wu <peter@lekensteyn.nl>
2019-07-05 14:08:18 +00:00
/*
* Decompression code for Plain LZ77. This encoding is used by
* Microsoft in various file formats and protocols including SMB3.
*
* See MS-XCA.
*
smb2: add support for decompression The latest iteration of Microsoft updates to SMB3 added compression to the protocol. This commit implements decompressing and dissecting compressed payloads. The compression algorithms that can be used are "Plain LZ77", "LZ77+Huffman" and "LZNT1" which you can read more about in the [MS-XCA] documentation. This set of algorithm is sometimes referred to as XPRESS. This commit reuses the existing uncompression API scheme already in place with zlib and brotli and adds 3 tvb_uncompress_*() function implemented in: * epan/tvbuff_lz77.c * epan/tvbuff_lz77huff.c * epan/tvbuff_lznt1.c A new function wmem_array_try_index() was added to the wmem_array API to make bound checked reads that fail gracefully. New tests for it have been added as well. Since both reads (tvb) and writes (wmem_array) are bound checked the risk for buffer overruns is drastically reduced. LZ77+Huffman has decoding tables and special care was taken to bound check these. Simplified versions of the implementations were succesfully tested against AFL (American Fuzzy Lop) for ~150 millions executions each. The SMB2/3 dissector was changed to deal with the new transform header for compressed packets (new protocol_id value) and READ request flags (COMPRESSED). Badly compressed or encrypted packets are now reported as such, and the decryption test suite was changed to reflect that. This commit also adds a test capture with 1 packet compressed with each algorithm as returned by Windows Server 2019, along with 3 matching tests in test/suite_dissection.py Change-Id: I2b84f56541f2f4ee7d886152794b993987dd10e7 Reviewed-on: https://code.wireshark.org/review/33855 Petri-Dish: Anders Broman <a.broman58@gmail.com> Tested-by: Petri Dish Buildbot Reviewed-by: Peter Wu <peter@lekensteyn.nl>
2019-07-05 14:08:18 +00:00
* Copyright (C) 2019 Aurélien Aptel
*
* SPDX-License-Identifier: GPL-2.0-or-later
*/
#include <glib.h>
#include <epan/exceptions.h>
#include <epan/tvbuff.h>
#include <epan/wmem/wmem.h>
#define MAX_INPUT_SIZE (16*1024*1024) /* 16MB */
static gboolean do_uncompress(tvbuff_t *tvb, int offset, int in_size,
wmem_array_t *obuf)
{
guint buf_flags = 0, buf_flag_count = 0;
int in_off = 0;
int last_length_half_byte = 0;
guint match_bytes, match_len, match_off;
guint i;
if (!tvb)
return FALSE;
if (in_size > MAX_INPUT_SIZE)
return FALSE;
while (1) {
if (buf_flag_count == 0) {
buf_flags = tvb_get_letohl(tvb, offset+in_off);
in_off += 4;
buf_flag_count = 32;
}
buf_flag_count--;
if ((buf_flags & (1 << buf_flag_count)) == 0) {
guint8 v = tvb_get_guint8(tvb, offset+in_off);
wmem_array_append_one(obuf, v);
in_off++;
} else {
if (in_off == in_size)
return TRUE;
match_bytes = tvb_get_letohs(tvb, offset+in_off);
in_off += 2;
match_len = match_bytes % 8;
match_off = (match_bytes/8) + 1;
if (match_len == 7) {
if (last_length_half_byte == 0) {
match_len = tvb_get_guint8(tvb, offset+in_off);
match_len = match_len % 16;
last_length_half_byte = in_off;
in_off++;
} else {
match_len = tvb_get_guint8(tvb, offset+last_length_half_byte);
match_len = match_len / 16;
last_length_half_byte = 0;
}
if (match_len == 15) {
match_len = tvb_get_guint8(tvb, offset+in_off);
in_off++;
if (match_len == 255) {
match_len = tvb_get_letohs(tvb, offset+in_off);
in_off += 2;
if (match_len == 0) {
/* This case isn't documented */
match_len = tvb_get_letohs(tvb, offset+in_off);
in_off += 4;
}
if (match_len < 15+7)
return FALSE;
match_len -= (15 + 7);
}
match_len += 15;
}
match_len += 7;
}
match_len += 3;
for (i = 0; i < match_len; i++) {
guint8 byte;
if (match_off > wmem_array_get_count(obuf))
return FALSE;
if (wmem_array_try_index(obuf, wmem_array_get_count(obuf)-match_off, &byte))
return FALSE;
wmem_array_append_one(obuf, byte);
}
}
}
return TRUE;
}
tvbuff_t *
tvb_uncompress_lz77(tvbuff_t *tvb, const int offset, int in_size)
{
volatile gboolean ok = FALSE;
wmem_allocator_t *pool;
wmem_array_t *obuf;
tvbuff_t *out;
pool = wmem_allocator_new(WMEM_ALLOCATOR_SIMPLE);
obuf = wmem_array_sized_new(pool, 1, in_size*2);
TRY {
ok = do_uncompress(tvb, offset, in_size, obuf);
} CATCH_ALL {
ok = FALSE;
}
ENDTRY;
if (ok) {
/*
* Cannot pass a tvb free callback that frees the wmem
* pool, so we make an extra copy that uses bare
* pointers. This could be optimized if tvb API had a
* free pool callback of some sort.
smb2: add support for decompression The latest iteration of Microsoft updates to SMB3 added compression to the protocol. This commit implements decompressing and dissecting compressed payloads. The compression algorithms that can be used are "Plain LZ77", "LZ77+Huffman" and "LZNT1" which you can read more about in the [MS-XCA] documentation. This set of algorithm is sometimes referred to as XPRESS. This commit reuses the existing uncompression API scheme already in place with zlib and brotli and adds 3 tvb_uncompress_*() function implemented in: * epan/tvbuff_lz77.c * epan/tvbuff_lz77huff.c * epan/tvbuff_lznt1.c A new function wmem_array_try_index() was added to the wmem_array API to make bound checked reads that fail gracefully. New tests for it have been added as well. Since both reads (tvb) and writes (wmem_array) are bound checked the risk for buffer overruns is drastically reduced. LZ77+Huffman has decoding tables and special care was taken to bound check these. Simplified versions of the implementations were succesfully tested against AFL (American Fuzzy Lop) for ~150 millions executions each. The SMB2/3 dissector was changed to deal with the new transform header for compressed packets (new protocol_id value) and READ request flags (COMPRESSED). Badly compressed or encrypted packets are now reported as such, and the decryption test suite was changed to reflect that. This commit also adds a test capture with 1 packet compressed with each algorithm as returned by Windows Server 2019, along with 3 matching tests in test/suite_dissection.py Change-Id: I2b84f56541f2f4ee7d886152794b993987dd10e7 Reviewed-on: https://code.wireshark.org/review/33855 Petri-Dish: Anders Broman <a.broman58@gmail.com> Tested-by: Petri Dish Buildbot Reviewed-by: Peter Wu <peter@lekensteyn.nl>
2019-07-05 14:08:18 +00:00
*/
guint size = wmem_array_get_count(obuf);
guint8 *p = (guint8 *)g_malloc(size);
memcpy(p, wmem_array_get_raw(obuf), size);
out = tvb_new_real_data(p, size, size);
tvb_set_free_cb(out, g_free);
} else {
out = NULL;
}
wmem_destroy_allocator(pool);
return out;
}
tvbuff_t *
tvb_child_uncompress_lz77(tvbuff_t *parent, tvbuff_t *tvb, const int offset, int in_size)
{
tvbuff_t *new_tvb = tvb_uncompress_lz77(tvb, offset, in_size);
if (new_tvb)
tvb_set_child_real_data_tvbuff(parent, new_tvb);
return new_tvb;
}
/*
* Editor modelines - https://www.wireshark.org/tools/modelines.html
*
* Local variables:
* c-basic-offset: 8
* tab-width: 8
* indent-tabs-mode: t
* End:
*
* vi: set shiftwidth=8 tabstop=8 noexpandtab:
* :indentSize=8:tabSize=8:noTabs=false:
*/