2019-07-05 14:08:18 +00:00
|
|
|
|
/*
|
|
|
|
|
* Decompression code for LZ77+Huffman. This encoding is used by
|
|
|
|
|
* Microsoft in various file formats and protocols including SMB3.
|
|
|
|
|
*
|
2019-08-04 01:08:46 +00:00
|
|
|
|
* See MS-XCA.
|
|
|
|
|
*
|
2019-07-05 14:08:18 +00:00
|
|
|
|
* Initial code from Samba re-licensed with Samuel's permission.
|
|
|
|
|
* Copyright (C) Samuel Cabrero 2017
|
|
|
|
|
*
|
|
|
|
|
* Glib-ification, extra error-checking and WS integration
|
|
|
|
|
* Copyright (C) Aurélien Aptel 2019
|
|
|
|
|
*
|
|
|
|
|
* Wireshark - Network traffic analyzer
|
|
|
|
|
* By Gerald Combs <gerald@wireshark.org>
|
|
|
|
|
* Copyright 1998 Gerald Combs
|
|
|
|
|
*
|
|
|
|
|
* SPDX-License-Identifier: GPL-2.0-or-later
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
#include <glib.h>
|
|
|
|
|
#include <stdlib.h> /* qsort */
|
|
|
|
|
#include <epan/exceptions.h>
|
|
|
|
|
#include <epan/tvbuff.h>
|
2021-07-26 00:31:17 +00:00
|
|
|
|
#include <epan/wmem_scopes.h>
|
2019-07-05 14:08:18 +00:00
|
|
|
|
|
|
|
|
|
#define MAX_INPUT_SIZE (16*1024*1024) /* 16MB */
|
|
|
|
|
|
|
|
|
|
#define TREE_SIZE 1024
|
|
|
|
|
#define ENCODED_TREE_SIZE 256
|
2019-07-16 09:08:13 +00:00
|
|
|
|
#define SYMBOL_INFO_SIZE (2*ENCODED_TREE_SIZE)
|
2019-07-05 14:08:18 +00:00
|
|
|
|
|
|
|
|
|
struct input {
|
|
|
|
|
tvbuff_t *tvb;
|
|
|
|
|
int offset;
|
|
|
|
|
gsize size;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Represents a node in a Huffman prefix code tree
|
|
|
|
|
*/
|
|
|
|
|
struct prefix_code_node {
|
|
|
|
|
/* Stores the symbol encoded by this node in the prefix code tree */
|
|
|
|
|
guint16 symbol;
|
|
|
|
|
|
|
|
|
|
/* Indicates whether this node is a leaf in the tree */
|
2019-07-16 09:08:13 +00:00
|
|
|
|
guint8 leaf;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Points to the node's two children. Values are indexes in
|
|
|
|
|
* the tree node array. The value -1 is used to indicate that
|
|
|
|
|
* a particular child does not exist
|
|
|
|
|
*/
|
|
|
|
|
gint16 child[2];
|
2019-07-05 14:08:18 +00:00
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Represent information about a Huffman-encoded symbol
|
|
|
|
|
*/
|
|
|
|
|
struct prefix_code_symbol {
|
|
|
|
|
/* Stores the symbol */
|
|
|
|
|
guint16 symbol;
|
|
|
|
|
|
|
|
|
|
/* Stores the symbol’s Huffman prefix code length */
|
|
|
|
|
guint16 length;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Represent a byte array as a bit string from which individual bits can
|
|
|
|
|
* be read
|
|
|
|
|
*/
|
|
|
|
|
struct bitstring {
|
|
|
|
|
/* The byte array */
|
|
|
|
|
const struct input *input;
|
|
|
|
|
|
|
|
|
|
/* The index in source from which the next set of bits will be pulled
|
|
|
|
|
* when the bits in mask have been consumed */
|
2019-07-30 06:54:14 +00:00
|
|
|
|
guint32 bitstring_index;
|
2019-07-05 14:08:18 +00:00
|
|
|
|
|
|
|
|
|
/* Stores the next bits to be consumed in the bit string */
|
|
|
|
|
guint32 mask;
|
|
|
|
|
|
|
|
|
|
/* Stores the number of bits in mask that remain to be consumed */
|
|
|
|
|
gint32 bits;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
struct hf_tree {
|
|
|
|
|
struct prefix_code_node *root;
|
2019-07-15 21:25:29 +00:00
|
|
|
|
struct prefix_code_node nodes[TREE_SIZE];
|
2019-07-05 14:08:18 +00:00
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
static gboolean is_node_valid(struct hf_tree *tree, struct prefix_code_node *node)
|
|
|
|
|
{
|
|
|
|
|
return (node && node >= tree->nodes && node < tree->nodes + TREE_SIZE);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Links a symbol's prefix_code_node into its correct position in a Huffman
|
|
|
|
|
* prefix code tree
|
|
|
|
|
*/
|
|
|
|
|
static int prefix_code_tree_add_leaf(struct hf_tree *tree,
|
|
|
|
|
guint32 leaf_index,
|
|
|
|
|
guint32 mask,
|
|
|
|
|
guint32 bits,
|
|
|
|
|
guint32 *out_index)
|
|
|
|
|
{
|
|
|
|
|
struct prefix_code_node *node = &tree->nodes[0];
|
|
|
|
|
guint32 i = leaf_index + 1;
|
|
|
|
|
guint32 child_index;
|
|
|
|
|
|
|
|
|
|
if (leaf_index >= TREE_SIZE)
|
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
|
|
while (bits > 1) {
|
|
|
|
|
bits = bits - 1;
|
|
|
|
|
child_index = (mask >> bits) & 1;
|
2019-07-16 09:08:13 +00:00
|
|
|
|
if (node->child[child_index] < 0) {
|
2019-07-05 14:08:18 +00:00
|
|
|
|
if (i >= TREE_SIZE)
|
|
|
|
|
return -1;
|
2019-07-16 09:08:13 +00:00
|
|
|
|
node->child[child_index] = i;
|
2019-07-05 14:08:18 +00:00
|
|
|
|
tree->nodes[i].leaf = FALSE;
|
|
|
|
|
i = i + 1;
|
|
|
|
|
}
|
2019-07-16 09:08:13 +00:00
|
|
|
|
node = tree->nodes + node->child[child_index];
|
2019-07-05 14:08:18 +00:00
|
|
|
|
if (!is_node_valid(tree, node))
|
|
|
|
|
return -1;
|
|
|
|
|
}
|
|
|
|
|
|
2019-07-16 09:08:13 +00:00
|
|
|
|
node->child[mask & 1] = leaf_index;
|
2019-07-05 14:08:18 +00:00
|
|
|
|
|
|
|
|
|
*out_index = i;
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Determines the sort order of one prefix_code_symbol relative to another
|
|
|
|
|
*/
|
|
|
|
|
static int compare_symbols(const void *ve1, const void *ve2)
|
|
|
|
|
{
|
|
|
|
|
const struct prefix_code_symbol *e1 = (const struct prefix_code_symbol *)ve1;
|
|
|
|
|
const struct prefix_code_symbol *e2 = (const struct prefix_code_symbol *)ve2;
|
|
|
|
|
|
|
|
|
|
if (e1->length < e2->length)
|
|
|
|
|
return -1;
|
|
|
|
|
else if (e1->length > e2->length)
|
|
|
|
|
return 1;
|
|
|
|
|
else if (e1->symbol < e2->symbol)
|
|
|
|
|
return -1;
|
|
|
|
|
else if (e1->symbol > e2->symbol)
|
|
|
|
|
return 1;
|
|
|
|
|
else
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Rebuilds the Huffman prefix code tree that will be used to decode symbols
|
|
|
|
|
* during decompression
|
|
|
|
|
*/
|
|
|
|
|
static int PrefixCodeTreeRebuild( struct hf_tree *tree,
|
|
|
|
|
const struct input *input)
|
|
|
|
|
{
|
2019-07-16 09:08:13 +00:00
|
|
|
|
struct prefix_code_symbol symbolInfo[SYMBOL_INFO_SIZE];
|
2019-07-05 14:08:18 +00:00
|
|
|
|
guint32 i, j, mask, bits;
|
|
|
|
|
int rc;
|
|
|
|
|
|
|
|
|
|
for (i = 0; i < TREE_SIZE; i++) {
|
|
|
|
|
tree->nodes[i].symbol = 0;
|
|
|
|
|
tree->nodes[i].leaf = FALSE;
|
2019-07-16 09:08:13 +00:00
|
|
|
|
tree->nodes[i].child[0] = -1;
|
|
|
|
|
tree->nodes[i].child[1] = -1;
|
2019-07-05 14:08:18 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (input->size < ENCODED_TREE_SIZE)
|
2019-10-20 20:13:54 +00:00
|
|
|
|
return -1;
|
2019-07-05 14:08:18 +00:00
|
|
|
|
|
|
|
|
|
for (i = 0; i < ENCODED_TREE_SIZE; i++) {
|
|
|
|
|
symbolInfo[2*i].symbol = 2*i;
|
|
|
|
|
symbolInfo[2*i].length = tvb_get_guint8(input->tvb, input->offset+i) & 15;
|
|
|
|
|
symbolInfo[2*i+1].symbol = 2*i+1;
|
|
|
|
|
symbolInfo[2*i+1].length = tvb_get_guint8(input->tvb, input->offset+i) >> 4;
|
|
|
|
|
}
|
|
|
|
|
|
2019-07-16 09:08:13 +00:00
|
|
|
|
qsort(symbolInfo, SYMBOL_INFO_SIZE, sizeof(symbolInfo[0]), compare_symbols);
|
2019-07-05 14:08:18 +00:00
|
|
|
|
|
|
|
|
|
i = 0;
|
2019-07-16 09:08:13 +00:00
|
|
|
|
while (i < SYMBOL_INFO_SIZE && symbolInfo[i].length == 0) {
|
2019-07-05 14:08:18 +00:00
|
|
|
|
i = i + 1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
mask = 0;
|
|
|
|
|
bits = 1;
|
|
|
|
|
|
|
|
|
|
tree->root = &tree->nodes[0];
|
|
|
|
|
tree->root->leaf = FALSE;
|
|
|
|
|
|
|
|
|
|
j = 1;
|
|
|
|
|
for (; i < 512; i++) {
|
|
|
|
|
tree->nodes[j].symbol = symbolInfo[i].symbol;
|
|
|
|
|
tree->nodes[j].leaf = TRUE;
|
|
|
|
|
mask <<= symbolInfo[i].length - bits;
|
|
|
|
|
bits = symbolInfo[i].length;
|
|
|
|
|
rc = prefix_code_tree_add_leaf(tree, j, mask, bits, &j);
|
|
|
|
|
if (rc)
|
|
|
|
|
return rc;
|
|
|
|
|
mask += 1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Initializes a bitstream data structure
|
|
|
|
|
*/
|
|
|
|
|
static void bitstring_init(struct bitstring *bstr,
|
|
|
|
|
const struct input *input,
|
2019-07-30 06:54:14 +00:00
|
|
|
|
guint32 bitstring_index)
|
2019-07-05 14:08:18 +00:00
|
|
|
|
{
|
2019-07-30 06:54:14 +00:00
|
|
|
|
bstr->mask = tvb_get_letohs(input->tvb, input->offset+bitstring_index);
|
2019-07-05 14:08:18 +00:00
|
|
|
|
bstr->mask <<= sizeof(bstr->mask) * 8 - 16;
|
2019-07-30 06:54:14 +00:00
|
|
|
|
bitstring_index += 2;
|
2019-07-05 14:08:18 +00:00
|
|
|
|
|
2019-07-30 06:54:14 +00:00
|
|
|
|
bstr->mask += tvb_get_letohs(input->tvb, input->offset+bitstring_index);
|
|
|
|
|
bitstring_index += 2;
|
2019-07-05 14:08:18 +00:00
|
|
|
|
|
|
|
|
|
bstr->bits = 32;
|
|
|
|
|
bstr->input = input;
|
2019-07-30 06:54:14 +00:00
|
|
|
|
bstr->bitstring_index = bitstring_index;
|
2019-07-05 14:08:18 +00:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Returns the next n bits from the front of a bit string.
|
|
|
|
|
*/
|
|
|
|
|
static guint32 bitstring_lookup(struct bitstring *bstr, guint32 n)
|
|
|
|
|
{
|
|
|
|
|
if (n == 0 || bstr->bits < 0 || n > (guint32)bstr->bits) {
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
return bstr->mask >> (sizeof(bstr->mask) * 8 - n);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Advances the bit string's cursor by n bits.
|
|
|
|
|
*/
|
|
|
|
|
static void bitstring_skip(struct bitstring *bstr, guint32 n)
|
|
|
|
|
{
|
|
|
|
|
bstr->mask = bstr->mask << n;
|
|
|
|
|
bstr->bits = bstr->bits - n;
|
|
|
|
|
|
|
|
|
|
if (bstr->bits < 16) {
|
|
|
|
|
bstr->mask += tvb_get_letohs(bstr->input->tvb,
|
2019-07-30 06:54:14 +00:00
|
|
|
|
bstr->input->offset + bstr->bitstring_index)
|
2019-07-05 14:08:18 +00:00
|
|
|
|
<< (16 - bstr->bits);
|
2019-07-30 06:54:14 +00:00
|
|
|
|
bstr->bitstring_index = bstr->bitstring_index + 2;
|
2019-07-05 14:08:18 +00:00
|
|
|
|
bstr->bits = bstr->bits + 16;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Returns the symbol encoded by the next prefix code in a bit string.
|
|
|
|
|
*/
|
|
|
|
|
static int prefix_code_tree_decode_symbol(struct hf_tree *tree,
|
|
|
|
|
struct bitstring *bstr,
|
|
|
|
|
guint32 *out_symbol)
|
|
|
|
|
{
|
|
|
|
|
guint32 bit;
|
|
|
|
|
struct prefix_code_node *node = tree->root;
|
|
|
|
|
|
|
|
|
|
do {
|
|
|
|
|
bit = bitstring_lookup(bstr, 1);
|
|
|
|
|
bitstring_skip(bstr, 1);
|
2019-07-16 09:08:13 +00:00
|
|
|
|
node = tree->nodes + node->child[bit];
|
2019-07-05 14:08:18 +00:00
|
|
|
|
if (!is_node_valid(tree, node))
|
|
|
|
|
return -1;
|
|
|
|
|
} while (node->leaf == FALSE);
|
|
|
|
|
|
|
|
|
|
*out_symbol = node->symbol;
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static gboolean do_uncompress(struct input *input,
|
|
|
|
|
wmem_array_t *obuf)
|
|
|
|
|
{
|
|
|
|
|
guint32 symbol;
|
|
|
|
|
guint32 length;
|
|
|
|
|
gint32 match_offset;
|
|
|
|
|
int rc;
|
|
|
|
|
struct hf_tree tree = {0};
|
|
|
|
|
struct bitstring bstr = {0};
|
|
|
|
|
|
|
|
|
|
if (!input->tvb)
|
|
|
|
|
return FALSE;
|
|
|
|
|
|
|
|
|
|
if (input->size > MAX_INPUT_SIZE)
|
|
|
|
|
return FALSE;
|
|
|
|
|
|
|
|
|
|
rc = PrefixCodeTreeRebuild(&tree, input);
|
|
|
|
|
if (rc)
|
|
|
|
|
return FALSE;
|
|
|
|
|
|
|
|
|
|
bitstring_init(&bstr, input, ENCODED_TREE_SIZE);
|
|
|
|
|
|
|
|
|
|
while (1) {
|
|
|
|
|
rc = prefix_code_tree_decode_symbol(&tree, &bstr, &symbol);
|
|
|
|
|
if (rc < 0)
|
|
|
|
|
return FALSE;
|
|
|
|
|
|
|
|
|
|
if (symbol < 256) {
|
|
|
|
|
guint8 v = symbol & 0xFF;
|
|
|
|
|
wmem_array_append_one(obuf, v);
|
|
|
|
|
} else {
|
|
|
|
|
if (symbol == 256) {
|
|
|
|
|
/* EOF symbol */
|
2019-07-30 06:54:14 +00:00
|
|
|
|
return bstr.bitstring_index == bstr.input->size;
|
2019-07-05 14:08:18 +00:00
|
|
|
|
}
|
|
|
|
|
symbol = symbol - 256;
|
|
|
|
|
length = symbol & 0xF;
|
|
|
|
|
symbol = symbol >> 4;
|
|
|
|
|
|
|
|
|
|
match_offset = (1U << symbol) + bitstring_lookup(&bstr, symbol);
|
|
|
|
|
match_offset *= -1;
|
|
|
|
|
|
|
|
|
|
if (length == 15) {
|
2019-07-30 06:54:14 +00:00
|
|
|
|
if (bstr.bitstring_index >= bstr.input->size)
|
2019-07-05 14:08:18 +00:00
|
|
|
|
return FALSE;
|
|
|
|
|
length = tvb_get_guint8(bstr.input->tvb,
|
2019-07-30 06:54:14 +00:00
|
|
|
|
bstr.input->offset+bstr.bitstring_index) + 15;
|
|
|
|
|
bstr.bitstring_index += 1;
|
2019-07-05 14:08:18 +00:00
|
|
|
|
|
|
|
|
|
if (length == 270) {
|
2019-07-30 06:54:14 +00:00
|
|
|
|
if (bstr.bitstring_index+1 >= bstr.input->size)
|
2019-07-05 14:08:18 +00:00
|
|
|
|
return FALSE;
|
2019-07-30 06:54:14 +00:00
|
|
|
|
length = tvb_get_letohs(bstr.input->tvb, bstr.input->offset+bstr.bitstring_index);
|
|
|
|
|
bstr.bitstring_index += 2;
|
2019-07-05 14:08:18 +00:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
bitstring_skip(&bstr, symbol);
|
|
|
|
|
|
|
|
|
|
length += 3;
|
|
|
|
|
do {
|
|
|
|
|
guint8 byte;
|
2019-07-30 06:54:14 +00:00
|
|
|
|
guint elem_count = wmem_array_get_count(obuf)+match_offset;
|
2019-07-05 14:08:18 +00:00
|
|
|
|
|
2019-07-30 06:54:14 +00:00
|
|
|
|
if (wmem_array_try_index(obuf, elem_count, &byte))
|
2019-07-05 14:08:18 +00:00
|
|
|
|
return FALSE;
|
|
|
|
|
wmem_array_append_one(obuf, byte);
|
|
|
|
|
length--;
|
|
|
|
|
} while (length != 0);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return TRUE;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
tvbuff_t *
|
|
|
|
|
tvb_uncompress_lz77huff(tvbuff_t *tvb,
|
|
|
|
|
const int offset,
|
|
|
|
|
int input_size)
|
|
|
|
|
{
|
|
|
|
|
volatile gboolean ok;
|
|
|
|
|
wmem_allocator_t *pool;
|
|
|
|
|
wmem_array_t *obuf;
|
|
|
|
|
tvbuff_t *out;
|
|
|
|
|
struct input input = {
|
|
|
|
|
.tvb = tvb,
|
|
|
|
|
.offset = offset,
|
|
|
|
|
.size = input_size
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
pool = wmem_allocator_new(WMEM_ALLOCATOR_SIMPLE);
|
|
|
|
|
obuf = wmem_array_sized_new(pool, 1, input_size*2);
|
|
|
|
|
|
|
|
|
|
TRY {
|
|
|
|
|
ok = do_uncompress(&input, obuf);
|
|
|
|
|
} CATCH_ALL {
|
|
|
|
|
ok = FALSE;
|
|
|
|
|
}
|
|
|
|
|
ENDTRY;
|
|
|
|
|
|
|
|
|
|
if (ok) {
|
|
|
|
|
/*
|
|
|
|
|
* Cannot pass a tvb free callback that frees the wmem
|
2019-07-17 11:49:18 +00:00
|
|
|
|
* pool, so we make an extra copy that uses bare
|
|
|
|
|
* pointers. This could be optimized if tvb API had a
|
|
|
|
|
* free pool callback of some sort.
|
2019-07-05 14:08:18 +00:00
|
|
|
|
*/
|
|
|
|
|
guint size = wmem_array_get_count(obuf);
|
|
|
|
|
guint8 *p = (guint8 *)g_malloc(size);
|
|
|
|
|
memcpy(p, wmem_array_get_raw(obuf), size);
|
|
|
|
|
out = tvb_new_real_data(p, size, size);
|
|
|
|
|
tvb_set_free_cb(out, g_free);
|
|
|
|
|
} else {
|
|
|
|
|
out = NULL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
wmem_destroy_allocator(pool);
|
|
|
|
|
|
|
|
|
|
return out;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
tvbuff_t *
|
|
|
|
|
tvb_child_uncompress_lz77huff(tvbuff_t *parent, tvbuff_t *tvb, const int offset, int in_size)
|
|
|
|
|
{
|
|
|
|
|
tvbuff_t *new_tvb = tvb_uncompress_lz77huff(tvb, offset, in_size);
|
|
|
|
|
if (new_tvb)
|
|
|
|
|
tvb_set_child_real_data_tvbuff(parent, new_tvb);
|
|
|
|
|
return new_tvb;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Editor modelines - https://www.wireshark.org/tools/modelines.html
|
|
|
|
|
*
|
|
|
|
|
* Local variables:
|
|
|
|
|
* c-basic-offset: 8
|
|
|
|
|
* tab-width: 8
|
|
|
|
|
* indent-tabs-mode: t
|
|
|
|
|
* End:
|
|
|
|
|
*
|
|
|
|
|
* vi: set shiftwidth=8 tabstop=8 noexpandtab:
|
|
|
|
|
* :indentSize=8:tabSize=8:noTabs=false:
|
|
|
|
|
*/
|