Hash map implementation for wmem.

This has two expected uses:
- Many current users of wmem_tree don't actually need the predecessor lookup
  it provides (the lookup_le function family). A hash map provides straight
  insertion and lookup much more efficiently than a wmem_tree when predecessor
  lookup isn't needed.

- Many current users of glib's hash table and hash functions use untrusted data
  for keys, making them vulnerable to algorithmic complexity attacks. Care has
  been taken to make this implementation secure against such attacks, so it
  should be used whenever data is untrusted.

In my benchmarks it is measurably slower than GHashTable, but not excessively
so. Given the additional security it provides this seems like a reasonable
trade-off (and it is still faster than a wmem_tree).

Change-Id: I2d67a0d06029f14c153eaa42d5cfc774aefd9918
Reviewed-on: https://code.wireshark.org/review/1272
Reviewed-by: Evan Huus <eapache@gmail.com>
This commit is contained in:
Evan Huus 2014-04-21 20:30:06 -04:00
parent 268104a1dd
commit 9ba4c6e091
8 changed files with 563 additions and 2 deletions

View File

@ -1464,6 +1464,7 @@ set(WMEM_FILES
wmem/wmem_allocator_simple.c
wmem/wmem_allocator_strict.c
wmem/wmem_list.c
wmem/wmem_map.c
wmem/wmem_miscutl.c
wmem/wmem_scopes.c
wmem/wmem_stack.c

View File

@ -28,6 +28,7 @@ LIBWMEM_SRC = \
wmem_allocator_simple.c \
wmem_allocator_strict.c \
wmem_list.c \
wmem_map.c \
wmem_miscutl.c \
wmem_scopes.c \
wmem_stack.c \
@ -45,6 +46,8 @@ LIBWMEM_INCLUDES = \
wmem_allocator_simple.h \
wmem_allocator_strict.h \
wmem_list.h \
wmem_map.h \
wmem_map_int.h \
wmem_miscutl.h \
wmem_queue.h \
wmem_scopes.h \

View File

@ -27,6 +27,7 @@
#include "wmem_array.h"
#include "wmem_core.h"
#include "wmem_list.h"
#include "wmem_map.h"
#include "wmem_miscutl.h"
#include "wmem_queue.h"
#include "wmem_scopes.h"

View File

@ -27,6 +27,7 @@
#include "wmem_core.h"
#include "wmem_scopes.h"
#include "wmem_map_int.h"
#include "wmem_user_cb_int.h"
#include "wmem_allocator.h"
#include "wmem_allocator_simple.h"
@ -206,6 +207,7 @@ wmem_init(void)
}
wmem_init_scopes();
wmem_init_hashing();
}
void

284
epan/wmem/wmem_map.c Normal file
View File

@ -0,0 +1,284 @@
/* wmem_map.c
* Wireshark Memory Manager Hash Map
* Copyright 2014, Evan Huus <eapache@gmail.com>
*
* Wireshark - Network traffic analyzer
* By Gerald Combs <gerald@wireshark.org>
* Copyright 1998 Gerald Combs
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#include <glib.h>
#include "wmem_core.h"
#include "wmem_map.h"
static guint32 x; /* Used for universal integer hashing (see the HASH macro) */
/* Used for the wmem_strong_hash() function */
static guint32 preseed;
static guint32 postseed;
void
wmem_init_hashing(void)
{
x = g_random_int();
preseed = g_random_int();
postseed = g_random_int();
}
typedef struct _wmem_map_item_t {
const void *key;
void *value;
struct _wmem_map_item_t *next;
} wmem_map_item_t;
struct _wmem_map_t {
guint count; /* number of items stored */
/* The base-2 logarithm of the actual size of the table. We store this
* value for efficiency in hashing, since finding the actual capacity
* becomes just a left-shift (see the CAPACITY macro) whereas taking
* logarithms is expensive. */
guint capacity;
wmem_map_item_t **table;
GHashFunc hash_func;
GEqualFunc eql_func;
wmem_allocator_t *allocator;
};
/* As per the comment on the 'capacity' member of the wmem_map_t struct, this is
* the base-2 logarithm, meaning the actual default capacity is 2^5 = 32 */
#define WMEM_MAP_DEFAULT_CAPACITY 5
/* Macro for calculating the real capacity of the map by using a left-shift to
* do the 2^x operation. */
#define CAPACITY(MAP) ((guint)(1 << (MAP)->capacity))
/* Efficient universal integer hashing:
* https://en.wikipedia.org/wiki/Universal_hashing#Avoiding_modular_arithmetic
*/
#define HASH(MAP, KEY) \
((guint32)(((MAP)->hash_func(KEY) * x) >> (32 - (MAP)->capacity)))
wmem_map_t *
wmem_map_new(wmem_allocator_t *allocator,
GHashFunc hash_func, GEqualFunc eql_func)
{
wmem_map_t *map;
map = wmem_new(allocator, wmem_map_t);
map->count = 0;
map->capacity = WMEM_MAP_DEFAULT_CAPACITY;
map->table = wmem_alloc0_array(allocator, wmem_map_item_t*, CAPACITY(map));
map->hash_func = hash_func;
map->eql_func = eql_func;
map->allocator = allocator;
return map;
}
static inline void
wmem_map_grow(wmem_map_t *map)
{
wmem_map_item_t **old_table, *cur, *nxt;
guint old_cap, i, slot;
/* store the old table and capacity */
old_table = map->table;
old_cap = CAPACITY(map);
/* double the size (capacity is base-2 logarithm, so this just means
* increment it) and allocate new table */
map->capacity++;
map->table = wmem_alloc0_array(map->allocator, wmem_map_item_t*, CAPACITY(map));
/* copy all the elements over from the old table */
for (i=0; i<old_cap; i++) {
cur = old_table[i];
while (cur) {
nxt = cur->next;
slot = HASH(map, cur->key);
cur->next = map->table[slot];
map->table[slot] = cur;
cur = nxt;
}
}
/* free the old table */
wmem_free(map->allocator, old_table);
}
void *
wmem_map_insert(wmem_map_t *map, const void *key, void *value)
{
wmem_map_item_t **item;
void *old_val;
/* get a pointer to the slot */
item = &(map->table[HASH(map, key)]);
/* check existing items in that slot */
while (*item) {
if (map->eql_func(key, (*item)->key)) {
/* replace and return old value for this key */
old_val = (*item)->value;
(*item)->value = value;
return old_val;
}
item = &((*item)->next);
}
/* insert new item */
(*item) = wmem_new(map->allocator, wmem_map_item_t);
(*item)->key = key;
(*item)->value = value;
(*item)->next = NULL;
map->count++;
/* increase size if we are over-full */
if (map->count >= CAPACITY(map)) {
wmem_map_grow(map);
}
/* no previous entry, return NULL */
return NULL;
}
void *
wmem_map_lookup(wmem_map_t *map, const void *key)
{
wmem_map_item_t *item;
/* find correct slot */
item = map->table[HASH(map, key)];
/* scan list of items in this slot for the correct value */
while (item) {
if (map->eql_func(key, item->key)) {
return item->value;
}
item = item->next;
}
return NULL;
}
void *
wmem_map_remove(wmem_map_t *map, const void *key)
{
wmem_map_item_t **item, *tmp;
void *value;
/* get a pointer to the slot */
item = &(map->table[HASH(map, key)]);
/* check the items in that slot */
while (*item) {
if (map->eql_func(key, (*item)->key)) {
/* found it */
tmp = (*item);
value = tmp->value;
(*item) = tmp->next;
wmem_free(map->allocator, tmp);
map->count--;
return value;
}
item = &((*item)->next);
}
/* didn't find it */
return NULL;
}
/* Borrowed from Perl 5.18. This is based on Bob Jenkin's one-at-a-time
* algorithm with some additional randomness seeded in. It is believed to be
* generally secure against collision attacks. See
* http://blog.booking.com/hardening-perls-hash-function.html
*/
guint32
wmem_strong_hash(const guint8 *buf, const size_t len)
{
const guint8 * const end = (const guint8 *)buf + len;
guint32 hash = preseed + len;
while (buf < end) {
hash += (hash << 10);
hash ^= (hash >> 6);
hash += *buf++;
}
hash += (hash << 10);
hash ^= (hash >> 6);
hash += ((guint8*)&postseed)[0];
hash += (hash << 10);
hash ^= (hash >> 6);
hash += ((guint8*)&postseed)[1];
hash += (hash << 10);
hash ^= (hash >> 6);
hash += ((guint8*)&postseed)[2];
hash += (hash << 10);
hash ^= (hash >> 6);
hash += ((guint8*)&postseed)[3];
hash += (hash << 10);
hash ^= (hash >> 6);
hash += (hash << 3);
hash ^= (hash >> 11);
return (hash + (hash << 15));
}
guint
wmem_str_hash(gconstpointer key)
{
return wmem_strong_hash((const guint8 *)key, strlen((const char *)key));
}
guint
wmem_int64_hash(gconstpointer key)
{
return wmem_strong_hash((const guint8 *)key, sizeof(guint64));
}
guint
wmem_double_hash(gconstpointer key)
{
return wmem_strong_hash((const guint8 *)key, sizeof(double));
}
/*
* Editor modelines - http://www.wireshark.org/tools/modelines.html
*
* Local variables:
* c-basic-offset: 4
* tab-width: 8
* indent-tabs-mode: nil
* End:
*
* vi: set shiftwidth=4 tabstop=8 expandtab:
* :indentSize=4:tabSize=8:noTabs=true:
*/

161
epan/wmem/wmem_map.h Normal file
View File

@ -0,0 +1,161 @@
/* wmem_map.h
* Definitions for the Wireshark Memory Manager Hash Map
* Copyright 2014, Evan Huus <eapache@gmail.com>
*
* Wireshark - Network traffic analyzer
* By Gerald Combs <gerald@wireshark.org>
* Copyright 1998 Gerald Combs
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#ifndef __WMEM_MAP_H__
#define __WMEM_MAP_H__
#include <glib.h>
#include "wmem_core.h"
#ifdef __cplusplus
extern "C" {
#endif /* __cplusplus */
/** @addtogroup wmem
* @{
* @defgroup wmem-map Hash Map
*
* A hash map implementation on top of wmem. Provides insertion, deletion and
* lookup in expected amortized constant time. Uses universal hashing to map
* keys into buckets, and provides a generic strong hash function that makes
* it secure against algorithmic complexity attacks, and suitable for use
* even with untrusted data.
*
* @{
*/
struct _wmem_map_t;
typedef struct _wmem_map_t wmem_map_t;
/** Creates a map with the given allocator scope. When the scope is emptied,
* the map is fully destroyed. Items stored in it will not be freed unless they
* were allocated from the same scope. For details on the GHashFunc and
* GEqualFunc parameters, see the glib documentation at:
* https://developer.gnome.org/glib/unstable/glib-Hash-Tables.html
*
* If the keys are coming from untrusted data, do *not* use glib's default hash
* functions for strings, int64s or doubles. Wmem provides stronger equivalents
* below. Feel free to use the g_direct_hash, g_int_hash, and any of the
* g_*_equal functions though, as they should be safe.
*
* @param allocator The allocator scope with which to create the map.
* @param hash_func The hash function used to place inserted keys.
* @param eql_func The equality function used to compare inserted keys.
* @return The newly-allocated map.
*/
WS_DLL_PUBLIC
wmem_map_t *
wmem_map_new(wmem_allocator_t *allocator,
GHashFunc hash_func, GEqualFunc eql_func)
G_GNUC_MALLOC;
/** Inserts a value into the map.
*
* @param map The map to insert into.
* @param key The key to insert by.
* @param value The value to insert.
* @return The previous value stored at this key if any, or NULL.
*/
WS_DLL_PUBLIC
void *
wmem_map_insert(wmem_map_t *map, const void *key, void *value);
/** Lookup a value in the map.
*
* @param map The map to search in.
* @param key The key to lookup.
* @return The value stored at the key if any, or NULL.
*/
WS_DLL_PUBLIC
void *
wmem_map_lookup(wmem_map_t *map, const void *key);
/** Remove a value from the map. If no value is stored at that key, nothing
* happens.
*
* @param map The map to remove from.
* @param key The key of the value to remove.
* @return The (removed) value stored at the key if any, or NULL.
*/
WS_DLL_PUBLIC
void *
wmem_map_remove(wmem_map_t *map, const void *key);
/** Compute a strong hash value for an arbitrary sequence of bytes. Use of this
* hash value should be secure against algorithmic complexity attacks, even for
* short keys. The computation uses a random seed which is generated on wmem
* initialization, so the same key will hash to different values on different
* runs of the application.
*
* @param buf The buffer of bytes (does not have to be aligned).
* @param len The length of buf to use for the hash computation.
* @return The hash value.
*/
WS_DLL_PUBLIC
guint32
wmem_strong_hash(const guint8 *buf, const size_t len);
/** An implementation of GHashFunc using wmem_strong_hash. Prefer this over
* g_str_hash when the data comes from an untrusted source.
*/
WS_DLL_PUBLIC
guint
wmem_str_hash(gconstpointer key);
/** An implementation of GHashFunc using wmem_strong_hash. Prefer this over
* g_int64_hash when the data comes from an untrusted source.
*/
WS_DLL_PUBLIC
guint
wmem_int64_hash(gconstpointer key);
/** An implementation of GHashFunc using wmem_strong_hash. Prefer this over
* g_double_hash when the data comes from an untrusted source.
*/
WS_DLL_PUBLIC
guint
wmem_double_hash(gconstpointer key);
/** @}
* @} */
#ifdef __cplusplus
}
#endif /* __cplusplus */
#endif /* __WMEM_MAP_H__ */
/*
* Editor modelines - http://www.wireshark.org/tools/modelines.html
*
* Local variables:
* c-basic-offset: 4
* tab-width: 8
* indent-tabs-mode: nil
* End:
*
* vi: set shiftwidth=4 tabstop=8 expandtab:
* :indentSize=4:tabSize=8:noTabs=true:
*/

52
epan/wmem/wmem_map_int.h Normal file
View File

@ -0,0 +1,52 @@
/* wmem_map_int.h
* Definitions for the Wireshark Memory Manager Hash Map Internals
* Copyright 2014, Evan Huus <eapache@gmail.com>
*
* Wireshark - Network traffic analyzer
* By Gerald Combs <gerald@wireshark.org>
* Copyright 1998 Gerald Combs
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along
* with this program; if not, write to the Free Software Foundation, Inc.,
* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
*/
#ifndef __WMEM_MAP_INT_H__
#define __WMEM_MAP_INT_H__
#ifdef __cplusplus
extern "C" {
#endif /* __cplusplus */
WS_DLL_LOCAL
void
wmem_init_hashing(void);
#ifdef __cplusplus
}
#endif /* __cplusplus */
#endif /* __WMEM_MAP_INT_H__ */
/*
* Editor modelines - http://www.wireshark.org/tools/modelines.html
*
* Local variables:
* c-basic-offset: 4
* tab-width: 8
* indent-tabs-mode: nil
* End:
*
* vi: set shiftwidth=4 tabstop=8 expandtab:
* :indentSize=4:tabSize=8:noTabs=true:
*/

View File

@ -35,7 +35,6 @@
#define MAX_ALLOC_SIZE (1024*64)
#define MAX_SIMULTANEOUS_ALLOCS 1024
#define CONTAINER_ITERS 10000
#define MANY_ITERS (CONTAINER_ITERS * 100)
typedef void (*wmem_verify_func)(wmem_allocator_t *allocator);
@ -606,6 +605,55 @@ wmem_test_list(void)
wmem_destroy_list(list);
}
static void
wmem_test_map(void)
{
wmem_allocator_t *allocator;
wmem_map_t *map;
gchar *str_key;
unsigned int i;
void *ret;
allocator = wmem_allocator_new(WMEM_ALLOCATOR_STRICT);
/* insertion, lookup and removal of simple integer keys */
map = wmem_map_new(allocator, g_direct_hash, g_direct_equal);
g_assert(map);
for (i=0; i<CONTAINER_ITERS; i++) {
ret = wmem_map_insert(map, GINT_TO_POINTER(i), GINT_TO_POINTER(777777));
g_assert(ret == NULL);
ret = wmem_map_insert(map, GINT_TO_POINTER(i), GINT_TO_POINTER(i));
g_assert(ret == GINT_TO_POINTER(777777));
ret = wmem_map_insert(map, GINT_TO_POINTER(i), GINT_TO_POINTER(i));
g_assert(ret == GINT_TO_POINTER(i));
}
for (i=0; i<CONTAINER_ITERS; i++) {
ret = wmem_map_lookup(map, GINT_TO_POINTER(i));
g_assert(ret == GINT_TO_POINTER(i));
ret = wmem_map_remove(map, GINT_TO_POINTER(i));
g_assert(ret == GINT_TO_POINTER(i));
ret = wmem_map_lookup(map, GINT_TO_POINTER(i));
g_assert(ret == NULL);
ret = wmem_map_remove(map, GINT_TO_POINTER(i));
g_assert(ret == NULL);
}
wmem_free_all(allocator);
map = wmem_map_new(allocator, wmem_str_hash, g_str_equal);
g_assert(map);
/* string keys and for-each */
for (i=0; i<CONTAINER_ITERS; i++) {
str_key = wmem_test_rand_string(allocator, 1, 64);
wmem_map_insert(map, str_key, GINT_TO_POINTER(i));
ret = wmem_map_lookup(map, str_key);
g_assert(ret == GINT_TO_POINTER(i));
}
wmem_destroy_allocator(allocator);
}
static void
wmem_test_queue(void)
{
@ -917,6 +965,10 @@ wmem_test_tree(void)
int
main(int argc, char **argv)
{
int ret;
wmem_init();
g_test_init(&argc, &argv, NULL);
g_test_add_func("/wmem/allocator/block", wmem_test_allocator_block);
@ -929,6 +981,7 @@ main(int argc, char **argv)
g_test_add_func("/wmem/datastruct/array", wmem_test_array);
g_test_add_func("/wmem/datastruct/list", wmem_test_list);
g_test_add_func("/wmem/datastruct/map", wmem_test_map);
g_test_add_func("/wmem/datastruct/queue", wmem_test_queue);
g_test_add_func("/wmem/datastruct/stack", wmem_test_stack);
g_test_add_func("/wmem/datastruct/strbuf", wmem_test_strbuf);
@ -936,7 +989,11 @@ main(int argc, char **argv)
g_test_add_func("/wmem/timing/allocators", wmem_time_allocators);
return g_test_run();
ret = g_test_run();
wmem_cleanup();
return ret;
}
/*