/* * Copyright (c) 2002-2005 Lev Walkin . All rights reserved. * Copyright (c) 2001-2004 Netli, Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $Id: genhash.c 447 2005-06-07 06:51:10Z vlm $ */ /* * Implementation of a hash data structure. * This particular implementation is supposed to be space-efficient * particularly in the case of tiny number of hash elements. * It also has an aggressive hash buckets expanding technique, which allows * to deal with increasing number of elements without a loss of search speed. * * Generally, one structure of type genhash_t is allocated per hash set. * This structure is supposed to hold all information related to the current * set, and also holds a tiny number of hash elements, when hash hasn't yet * grown up. When the number of elements reaches some point, part of the * genhash_t structure is reused to contain the pointers to the actual * hash buckets and LRU (Least Recently Used) list's head and tail. * Elements which were held inside genhash_t will be moved to the hash buckets. * * Said above effectively means two modes of operation: TINY and NORMAL. * They can be distinguished by examining the h->numbuckets value, which * is 0 for TINY and greater for NORMAL mode. * * In the TINY mode we use a lower part of the genhash_t structure * (lower 32 bytes from 64 bytes of genhash_t) to hold up to IH_VALUE (4) * key/value pairs. * * In the NORMAL mode we use the lower part of the genhash_t structure * to hold a set of pointers, including a pointer to the hash buckets. * We agressively expand hash buckets size when adding new elements * to lower the number of key comparisons. */ #include #include #include #include #include #include "genhash.h" /* 1M entries, 4M RAM */ #define DEFAULT_MAXIMUM_HASH_BUCKETS_NUMBER (1024 * 1024) static int maximum_hash_buckets_number = DEFAULT_MAXIMUM_HASH_BUCKETS_NUMBER; /* * A single hash element structure which binds a value to its key. */ typedef struct genhash_el_s { unsigned int key_hash; /* Saved hash of the key */ void *key; void *value; struct genhash_el_s *hash_next; /* Collision list inside the bucket */ struct genhash_el_s *hash_prev; struct genhash_el_s *lru_prev; /* Per-hash LRU list */ struct genhash_el_s *lru_next; } genhash_el; /* * A hash structure with buckets etc. */ struct genhash_s { int (*keycmpf) (const void *lkey1, const void *rkey2); unsigned int (*keyhashf) (const void *key); /* hash function */ void (*keydestroyf) (void *key); /* key destructor */ void (*valuedestroyf) (void *value); /* value destructor */ int numelements; /* Total number of hash elements */ int numbuckets; /* 0 means "use _TINY" */ int lru_limit; /* Must be initialized explicitly */ genhash_iter_t *iters; /* Active iterators */ /* 32-byte boundary here */ union { #define IH_VALUES 4 /* Internally held key/value pairs for TINY mode */ struct _internal_tiny_s { void *keys[IH_VALUES]; void *values[IH_VALUES]; } _TINY; /* 32-byte structure */ struct _internal_normal_s { genhash_el *lru_head; /* LRU list head */ genhash_el *lru_tail; /* LRU list tail */ genhash_el **buckets; /* Hash buckets */ /* void *unused; */ } _NORMAL; } un; #define tiny_keys un._TINY.keys #define tiny_values un._TINY.values #define lru_head un._NORMAL.lru_head #define lru_tail un._NORMAL.lru_tail #define buckets un._NORMAL.buckets }; static int _genhash_normal_add(genhash_t *h, genhash_el *el, void *key, void *value); genhash_t * genhash_new( int (*keycmpf) (const void *key1, const void *key2), unsigned int (*keyhashf) (const void *key), void (*keydestroyf) (void *key), void (*valuedestroyf) (void *value) ) { genhash_t *h; h = (genhash_t *)malloc(sizeof(genhash_t)); if (!h) return NULL; memset(h, 0, sizeof(genhash_t)); genhash_reinit(h, keycmpf, keyhashf, keydestroyf, valuedestroyf); return h; } int genhash_reinit( genhash_t *h, int (*keycmpf) (const void *key1, const void *key2), unsigned int (*keyhashf) (const void *key), void (*keydestroyf) (void *key), void (*valuedestroyf) (void *value) ) { assert(keycmpf && keyhashf); h->keycmpf = keycmpf; h->keyhashf = keyhashf; h->keydestroyf = keydestroyf; h->valuedestroyf = valuedestroyf; return 0; } int genhash_count(genhash_t *h) { if(h) { return h->numelements; } else { return 0; } } static void _remove_normal_hash_el(genhash_t *h, genhash_el *el) { genhash_iter_t *iter; void *kd_arg; void *vd_arg; /* Remove from the collision list */ if (el->hash_prev) { if((el->hash_prev->hash_next = el->hash_next)) el->hash_next->hash_prev = el->hash_prev; } else { if((h->buckets[el->key_hash % h->numbuckets] = el->hash_next)) el->hash_next->hash_prev = NULL; } /* Remove from LRU list */ if(el->lru_prev) { if((el->lru_prev->lru_next = el->lru_next)) el->lru_next->lru_prev = el->lru_prev; else h->lru_tail = el->lru_prev; } else { if(h->lru_head == el) { if((h->lru_head = el->lru_next) == NULL) h->lru_tail = NULL; else h->lru_head->lru_prev = NULL; } } /* Remember key and value */ kd_arg = el->key; vd_arg = el->value; /* Move iterators off the element being deleted */ for(iter = h->iters; iter; iter = iter->iter_next) { assert(iter->hash_ptr == h); if(iter->un.location == el) { iter->un.location = iter->order_lru_first ? el->lru_prev : el->lru_next; } } free(el); h->numelements--; /* Remove key and value */ if (h->keydestroyf) h->keydestroyf(kd_arg); if (h->valuedestroyf) h->valuedestroyf(vd_arg); } static inline void _genhash_normal_el_move2top(genhash_t *h, genhash_el *el) { /* Disable sorting if iterators are running */ if(h->iters) return; /* Move to the top of the hash bucket */ if(el->hash_prev) { int bucket = el->key_hash % h->numbuckets; /* Remove from the current location */ if((el->hash_prev->hash_next = el->hash_next)) el->hash_next->hash_prev = el->hash_prev; /* Move to the top of the hash bucket */ if((el->hash_next = h->buckets[bucket])) el->hash_next->hash_prev = el; h->buckets[bucket] = el; el->hash_prev = NULL; } /* Move to the top of LRU list */ if(h->lru_limit && el->lru_prev) { /* Remove from current location */ if((el->lru_prev->lru_next = el->lru_next)) el->lru_next->lru_prev = el->lru_prev; else h->lru_tail = el->lru_prev; /* Append to the head */ el->lru_prev = NULL; h->lru_head->lru_prev = el; el->lru_next = h->lru_head; h->lru_head = el; } } static int _expand_hash(genhash_t *h) { int newbuckets_count; genhash_el **newbuckets; /* * Compute a new number of buckets value. */ if(h->numbuckets) { newbuckets_count = h->numbuckets << 2; /* Too big hash table */ if(newbuckets_count > maximum_hash_buckets_number) { if(h->numbuckets < maximum_hash_buckets_number) { newbuckets_count = maximum_hash_buckets_number; } else { /* No need to set errno here. */ return -1; } } } else { /* 8 buckets -> 32 bytes of memory */ newbuckets_count = IH_VALUES << 1; if(newbuckets_count > maximum_hash_buckets_number) { if(maximum_hash_buckets_number) { newbuckets_count = maximum_hash_buckets_number; } else { /* Allowed to store only IH_VALUES elements */ errno = EPERM; return -1; } } } /* * Allocate a new storage for buckets. */ newbuckets = malloc(newbuckets_count * sizeof(*newbuckets)); if(newbuckets) { memset(newbuckets, 0, newbuckets_count * sizeof(*newbuckets)); } else { return -1; } if(h->numbuckets) { genhash_el *el; int bucket; /* * Rehash elements from old h->buckets to newbuckets. * No need to touch LRU pointers and other stuff - it is okay. */ for(el = h->lru_tail; el; el = el->lru_prev) { bucket = el->key_hash % newbuckets_count; el->hash_prev = NULL; if((el->hash_next = newbuckets[bucket])) el->hash_next->hash_prev = el; newbuckets[bucket] = el; } free(h->buckets); h->buckets = newbuckets; h->numbuckets = newbuckets_count; } else { /* * Moving from inline tiny storage into buckets. */ genhash_el *els[IH_VALUES] = { NULL }; struct _internal_tiny_s tiny_substruct; int i; int saved_numelements; int saved_lru_limit; genhash_iter_t *iter; /* Pre-allocate hash elements (for "undo") */ for(i = 0; i < h->numelements; i++) { els[i] = (genhash_el *)malloc(sizeof(genhash_el)); if(els[i] == NULL) { for(i = 0; i < h->numelements; i++) if(els[i]) free(els[i]); free(newbuckets); return -1; } } /* Save part of the union */ tiny_substruct = h->un._TINY; /* Re-initialize this part in NORMAL model */ memset(&h->un._NORMAL, 0, sizeof(h->un._NORMAL)); /* There was no allocated buckets, when in tiny hash mode. */ h->buckets = newbuckets; h->numbuckets = newbuckets_count; saved_numelements = h->numelements; saved_lru_limit = h->lru_limit; h->numelements = 0; h->lru_limit = 0; /* Disable LRU expiration for a while */ for(i = saved_numelements - 1; i >= 0; --i) { /* * genhash_normal_add won't fail, if we supply * an already allocated genhash_el *. */ (void)_genhash_normal_add(h, els[i], tiny_substruct.keys[i], tiny_substruct.values[i]); } /* Now, scan through iterators and convert them TINY->NORMAL */ for(iter = h->iters; iter; iter = iter->iter_next) { assert(iter->hash_ptr == h); if(iter->un.item_number < 0 || iter->un.item_number >= saved_numelements) { iter->un.location = 0; } else { iter->un.location = els[iter->un.item_number]; } } h->lru_limit = saved_lru_limit; } return 0; } /* * Won't return with error if el is provided. */ static int _genhash_normal_add(genhash_t *h, genhash_el *el, void *key, void *value) { genhash_el **bucket; if(el == NULL) { el = malloc(sizeof (*el)); if(el == NULL) { /* Errno will be set by malloc() */ return -1; } } /* Maintain maximum number of entries */ if(h->lru_limit) { while(h->numelements >= h->lru_limit) _remove_normal_hash_el(h, h->lru_tail); } memset(el, 0, sizeof(genhash_el)); /* Compute the index of the collision list */ el->key_hash = h->keyhashf(key); bucket = &h->buckets[el->key_hash % h->numbuckets]; el->key = key; el->value = value; /* * Add to the collision list */ el->hash_prev = NULL; if((el->hash_next = *bucket)) (*bucket)->hash_prev = el; *bucket = el; /* * Add to the LRU list. */ if(h->lru_head) { el->lru_next = h->lru_head; el->lru_next->lru_prev = el; h->lru_head = el; } else { h->lru_head = el; h->lru_tail = el; } h->numelements++; return 0; } int genhash_add(genhash_t *h, void *key, void *value) { if(key == NULL) { errno = EINVAL; return -1; } if(h->numbuckets == 0) { /* We have a tiny internally-held set of elements */ if(h->numelements < IH_VALUES) { h->tiny_keys[h->numelements] = key; h->tiny_values[h->numelements] = value; h->numelements++; return 0; } if(_expand_hash(h) == -1) return -1; } else { if((h->numelements / h->numbuckets) > 2) (void)_expand_hash(h); } return _genhash_normal_add(h, NULL, key, value); } int genhash_addunique(genhash_t *h, void *key, void *value) { if(genhash_get(h, key)) { errno = EEXIST; return -1; } return genhash_add(h, key, value); } void * genhash_get(genhash_t *h, const void *key) { if(h->numbuckets) { genhash_el *walk; int bucket = h->keyhashf(key) % h->numbuckets; for(walk = h->buckets[bucket]; walk; walk = walk->hash_next) { if (h->keycmpf(walk->key, key) == 0) { _genhash_normal_el_move2top(h, walk); return walk->value; } } } else { /* TINY mode */ int i; assert(h->numelements <= IH_VALUES); for(i = 0; i < h->numelements; i++) { if(h->keycmpf(h->tiny_keys[i], key) == 0) /* Don't reorder in TINY mode */ return h->tiny_values[i]; } } errno = ESRCH; return NULL; } int genhash_del(genhash_t *h, void *key) { if(h->numbuckets) { /* NORMAL mode */ genhash_el *walk; int bucket; if(h->numelements == 0) { errno = ESRCH; return -1; /* not found */ } bucket = h->keyhashf(key) % h->numbuckets; for(walk = h->buckets[bucket]; walk; walk = walk->hash_next) if(h->keycmpf(walk->key, key) == 0) break; if(walk) { _remove_normal_hash_el(h, walk); return 0; } } else { /* TINY mode */ int i; /* Look for matching key */ for(i = 0; i < h->numelements; i++) if(h->keycmpf(h->tiny_keys[i], key) == 0) break; if(i < h->numelements) { /* Remember values */ void *kd_arg = h->tiny_keys[i]; void *vd_arg = h->tiny_values[i]; h->numelements--; if(h->iters) { /* If iterators are involved, we have to * shift elements to maintain iteration order * and avoid duplications */ genhash_iter_t *iter; memmove(&h->tiny_keys[i], &h->tiny_keys[i+1], (h->numelements - i) * sizeof(h->tiny_keys[0])); memmove(&h->tiny_values[i], &h->tiny_values[i+1], (h->numelements - i) * sizeof(h->tiny_values[0])); /* Shift the iterator's indexes */ for(iter = h->iters; iter; iter = iter->iter_next) { int in = iter->un.item_number; if(iter->order_lru_first) { if(in > i) iter->un.item_number--; } else { if(in >= i) iter->un.item_number--; } } } else { /* Substitute it with the last one */ /* No harm if overwriting itself */ h->tiny_keys[i] = h->tiny_keys[h->numelements]; h->tiny_values[i] = h->tiny_values[h->numelements]; } h->tiny_keys[h->numelements] = 0; h->tiny_values[h->numelements] = 0; /* Delete for real */ if(h->keydestroyf) h->keydestroyf(kd_arg); if(h->valuedestroyf) h->valuedestroyf(vd_arg); return 0; } } errno = ESRCH; return -1; } /* * Initialize a hash iterator. */ int genhash_iter_init(genhash_iter_t *iter, genhash_t *h, int reverse_order) { iter->hash_ptr = h; iter->iter_prev = 0; /* Add itself to the iterators list */ iter->iter_next = h->iters; h->iters = iter; iter->order_lru_first = reverse_order; if(h->numbuckets) { /* NORMAL mode */ if(reverse_order) { /* Least recent first order */ iter->un.location = h->lru_tail; } else { /* Most recent first order */ iter->un.location = h->lru_head; } } else { /* TINY mode */ if(reverse_order) { iter->un.item_number = 0; } else { iter->un.item_number = h->numelements - 1; } } return h->numelements; } int genhash_iter(genhash_iter_t *iter, void *key_p, void *val_p) { void **key = key_p; void **val = val_p; genhash_t *h = iter->hash_ptr; if(h->numbuckets) { /* NORMAL mode */ genhash_el *cur_el = iter->un.location; if(!cur_el) /* Already finished */ return 0; if(key) *key = cur_el->key; if(val) *val = cur_el->value; /* Move pointer to the next hash element */ iter->un.location = iter->order_lru_first ? cur_el->lru_prev : cur_el->lru_next; } else { /* TINY mode */ if(iter->un.item_number < 0 || iter->un.item_number >= h->numelements || h->tiny_keys[iter->un.item_number] == 0) return 0; if(key) *key = h->tiny_keys[iter->un.item_number]; if(val) *val = h->tiny_values[iter->un.item_number]; /* Advance to the next element */ if(iter->order_lru_first) iter->un.item_number++; else iter->un.item_number--; } return 1; } void genhash_iter_done(genhash_iter_t *iter) { assert(iter->hash_ptr->iters); /* Remove itself from the iterators list */ if(iter->iter_next) iter->iter_next->iter_prev = iter->iter_prev; if(iter->iter_prev) iter->iter_prev->iter_next = iter->iter_next; else iter->hash_ptr->iters = iter->iter_next; /* Shift the head */ iter->hash_ptr = (void *)0xdeadbeef; } int genhash_set_lru_limit(genhash_t *h, int value) { if(h) { int prev_limit = h->lru_limit; if(value >= 0) h->lru_limit = value; return prev_limit; } else { errno = EINVAL; return -1; } } int genhash_set_buckets_limit(int value) { int prev_limit = maximum_hash_buckets_number; if(value > 0) { maximum_hash_buckets_number = value; } return prev_limit; } void genhash_destroy(genhash_t *h) { if(h) { assert(h->iters == 0); /* All iterators MUST be _done(). */ genhash_empty(h, 1, 1); free(h); } } void genhash_empty(genhash_t *h, int freekeys, int freevalues) { genhash_iter_t *iter; if(h == NULL) return; /* * Don't free what could not be freed. */ if(h->keydestroyf == NULL) freekeys = 0; if(h->valuedestroyf == NULL) freevalues = 0; if(h->numbuckets == 0) { while(h->numelements > 0) { int n = --h->numelements; void *kd_arg = h->tiny_keys[n]; void *vd_arg = h->tiny_values[n]; if (freekeys) h->keydestroyf(kd_arg); if (freevalues) h->valuedestroyf(vd_arg); } } else { genhash_el *el, *el_next; for(el = h->lru_head; el; el = el_next) { void *kd_arg = el->key; void *vd_arg = el->value; el_next = el->lru_next; free(el); h->numelements --; if (freekeys) h->keydestroyf(kd_arg); if (freevalues) h->valuedestroyf(vd_arg); } free(h->buckets); h->numbuckets = 0; /* Move back to TINY model */ } memset(&h->un, 0, sizeof(h->un)); /* Invalidate iterators in TINY model */ for(iter = h->iters; iter; iter = iter->iter_next) { assert(iter->hash_ptr == h); iter->un.item_number = -1; } assert(h->numelements == 0); } /*----- Simple hash and compare functions for common data types ------*/ unsigned int hashf_int (const void *key) { return (*(const int *)key ^ (*(const int *)key >> 16)); } int cmpf_int (const void *key1, const void *key2) { return (*(const int *)key1 != *(const int *)key2); } unsigned int hashf_void (const void *key) { return ((int)key ^ ((int)key >> 16)); } int cmpf_void (const void *key1, const void *key2) { return (key1 != key2); } /* * Phong's linear congruential hash */ #define dcharhash(h, c) ((h) = 0x63c63cd9*(h) + 0x9c39c33d + (c)) unsigned int hashf_string(const void *keyarg) { register const unsigned char *key; register unsigned int h; register unsigned char c; key = keyarg; for (h = 0; (c = *key++);) dcharhash(h, c); return (h); } int cmpf_string(const void *key1, const void *key2) { return strcmp((const char *)key1, (const char *)key2); }