From 6b28d81fea017382e098c24514ae229fcb0a4a7d Mon Sep 17 00:00:00 2001
From: Lennart Poettering <lennart@poettering.net>
Date: Tue, 1 Oct 2013 00:13:18 +0200
Subject: [PATCH] hashmap: size hashmap bucket array dynamically
Instead of fixing the hashmap bucket array to 127 entries dynamically
size it, starting with a smaller one of 31. As soon as a fill level of
75% is reached, quadruple the size, and so on.
This should siginficantly optimize the lookup time in large tables
(from O(n) back to O(1)), and save memory on smaller tables (which most
are).
---
src/shared/hashmap.c | 152 ++++++++++++++++++++++++++++++++++++------------
src/shared/hashmap.h | 1 +
src/test/test-hashmap.c | 28 ++++++++-
3 files changed, 143 insertions(+), 38 deletions(-)
diff --git a/src/shared/hashmap.c b/src/shared/hashmap.c
index 4ea1a0f..6330792 100644
--- a/src/shared/hashmap.c
+++ b/src/shared/hashmap.c
@@ -28,7 +28,7 @@
#include "hashmap.h"
#include "macro.h"
-#define NBUCKETS 127
+#define INITIAL_N_BUCKETS 31
struct hashmap_entry {
const void *key;
@@ -42,13 +42,13 @@ struct Hashmap {
compare_func_t compare_func;
struct hashmap_entry *iterate_list_head, *iterate_list_tail;
- unsigned n_entries;
+
+ struct hashmap_entry ** buckets;
+ unsigned n_buckets, n_entries;
bool from_pool;
};
-#define BY_HASH(h) ((struct hashmap_entry**) ((uint8_t*) (h) + ALIGN(sizeof(Hashmap))))
-
struct pool {
struct pool *next;
unsigned n_tiles;
@@ -64,6 +64,11 @@ static void *first_entry_tile = NULL;
static void* allocate_tile(struct pool **first_pool, void **first_tile, size_t tile_size) {
unsigned i;
+ /* When a tile is released we add it to the list and simply
+ * place the next pointer at its offset 0. */
+
+ assert(tile_size >= sizeof(void*));
+
if (*first_tile) {
void *r;
@@ -173,7 +178,7 @@ Hashmap *hashmap_new(hash_func_t hash_func, compare_func_t compare_func) {
b = is_main_thread();
- size = ALIGN(sizeof(Hashmap)) + NBUCKETS * sizeof(struct hashmap_entry*);
+ size = ALIGN(sizeof(Hashmap)) + INITIAL_N_BUCKETS * sizeof(struct hashmap_entry*);
if (b) {
h = allocate_tile(&first_hashmap_pool, &first_hashmap_tile, size);
@@ -191,23 +196,30 @@ Hashmap *hashmap_new(hash_func_t hash_func, compare_func_t compare_func) {
h->hash_func = hash_func ? hash_func : trivial_hash_func;
h->compare_func = compare_func ? compare_func : trivial_compare_func;
+ h->n_buckets = INITIAL_N_BUCKETS;
h->n_entries = 0;
h->iterate_list_head = h->iterate_list_tail = NULL;
+ h->buckets = (struct hashmap_entry**) ((uint8_t*) h + ALIGN(sizeof(Hashmap)));
+
h->from_pool = b;
return h;
}
int hashmap_ensure_allocated(Hashmap **h, hash_func_t hash_func, compare_func_t compare_func) {
+ Hashmap *q;
+
assert(h);
if (*h)
return 0;
- if (!(*h = hashmap_new(hash_func, compare_func)))
+ q = hashmap_new(hash_func, compare_func);
+ if (!q)
return -ENOMEM;
+ *h = q;
return 0;
}
@@ -216,11 +228,11 @@ static void link_entry(Hashmap *h, struct hashmap_entry *e, unsigned hash) {
assert(e);
/* Insert into hash table */
- e->bucket_next = BY_HASH(h)[hash];
+ e->bucket_next = h->buckets[hash];
e->bucket_previous = NULL;
- if (BY_HASH(h)[hash])
- BY_HASH(h)[hash]->bucket_previous = e;
- BY_HASH(h)[hash] = e;
+ if (h->buckets[hash])
+ h->buckets[hash]->bucket_previous = e;
+ h->buckets[hash] = e;
/* Insert into iteration list */
e->iterate_previous = h->iterate_list_tail;
@@ -260,7 +272,7 @@ static void unlink_entry(Hashmap *h, struct hashmap_entry *e, unsigned hash) {
if (e->bucket_previous)
e->bucket_previous->bucket_next = e->bucket_next;
else
- BY_HASH(h)[hash] = e->bucket_next;
+ h->buckets[hash] = e->bucket_next;
assert(h->n_entries >= 1);
h->n_entries--;
@@ -272,7 +284,7 @@ static void remove_entry(Hashmap *h, struct hashmap_entry *e) {
assert(h);
assert(e);
- hash = h->hash_func(e->key) % NBUCKETS;
+ hash = h->hash_func(e->key) % h->n_buckets;
unlink_entry(h, e, hash);
@@ -291,6 +303,9 @@ void hashmap_free(Hashmap*h) {
hashmap_clear(h);
+ if (h->buckets != (struct hashmap_entry**) ((uint8_t*) h + ALIGN(sizeof(Hashmap))))
+ free(h->buckets);
+
if (h->from_pool)
deallocate_tile(&first_hashmap_tile, h);
else
@@ -357,22 +372,72 @@ void hashmap_clear_free_free(Hashmap *h) {
static struct hashmap_entry *hash_scan(Hashmap *h, unsigned hash, const void *key) {
struct hashmap_entry *e;
assert(h);
- assert(hash < NBUCKETS);
+ assert(hash < h->n_buckets);
- for (e = BY_HASH(h)[hash]; e; e = e->bucket_next)
+ for (e = h->buckets[hash]; e; e = e->bucket_next)
if (h->compare_func(e->key, key) == 0)
return e;
return NULL;
}
+static bool resize_buckets(Hashmap *h) {
+ unsigned m;
+ struct hashmap_entry **n, *i;
+
+ assert(h);
+
+ if (_likely_(h->n_entries*4 < h->n_buckets*3))
+ return false;
+
+ /* Increase by four */
+ m = (h->n_entries+1)*4-1;
+
+ /* If we hit OOM we simply risk packed hashmaps... */
+ n = new0(struct hashmap_entry*, m);
+ if (!n)
+ return false;
+
+ for (i = h->iterate_list_head; i; i = i->iterate_next) {
+ unsigned hash, x;
+
+ hash = h->hash_func(i->key);
+
+ /* First, drop from old bucket table */
+ if (i->bucket_next)
+ i->bucket_next->bucket_previous = i->bucket_previous;
+
+ if (i->bucket_previous)
+ i->bucket_previous->bucket_next = i->bucket_next;
+ else
+ h->buckets[hash % h->n_buckets] = i->bucket_next;
+
+ /* Then, add to new backet table */
+ x = hash % m;
+
+ i->bucket_next = n[x];
+ i->bucket_previous = NULL;
+ if (n[x])
+ n[x]->bucket_previous = i;
+ n[x] = i;
+ }
+
+ if (h->buckets != (struct hashmap_entry**) ((uint8_t*) h + ALIGN(sizeof(Hashmap))))
+ free(h->buckets);
+
+ h->buckets = n;
+ h->n_buckets = m;
+
+ return true;
+}
+
int hashmap_put(Hashmap *h, const void *key, void *value) {
struct hashmap_entry *e;
unsigned hash;
assert(h);
- hash = h->hash_func(key) % NBUCKETS;
+ hash = h->hash_func(key) % h->n_buckets;
e = hash_scan(h, hash, key);
if (e) {
if (e->value == value)
@@ -380,6 +445,9 @@ int hashmap_put(Hashmap *h, const void *key, void *value) {
return -EEXIST;
}
+ if (resize_buckets(h))
+ hash = h->hash_func(key) % h->n_buckets;
+
if (h->from_pool)
e = allocate_tile(&first_entry_pool, &first_entry_tile, sizeof(struct hashmap_entry));
else
@@ -402,7 +470,7 @@ int hashmap_replace(Hashmap *h, const void *key, void *value) {
assert(h);
- hash = h->hash_func(key) % NBUCKETS;
+ hash = h->hash_func(key) % h->n_buckets;
e = hash_scan(h, hash, key);
if (e) {
e->key = key;
@@ -419,7 +487,7 @@ int hashmap_update(Hashmap *h, const void *key, void *value) {
assert(h);
- hash = h->hash_func(key) % NBUCKETS;
+ hash = h->hash_func(key) % h->n_buckets;
e = hash_scan(h, hash, key);
if (!e)
return -ENOENT;
@@ -435,7 +503,7 @@ void* hashmap_get(Hashmap *h, const void *key) {
if (!h)
return NULL;
- hash = h->hash_func(key) % NBUCKETS;
+ hash = h->hash_func(key) % h->n_buckets;
e = hash_scan(h, hash, key);
if (!e)
return NULL;
@@ -450,7 +518,7 @@ void* hashmap_get2(Hashmap *h, const void *key, void **key2) {
if (!h)
return NULL;
- hash = h->hash_func(key) % NBUCKETS;
+ hash = h->hash_func(key) % h->n_buckets;
e = hash_scan(h, hash, key);
if (!e)
return NULL;
@@ -467,7 +535,7 @@ bool hashmap_contains(Hashmap *h, const void *key) {
if (!h)
return false;
- hash = h->hash_func(key) % NBUCKETS;
+ hash = h->hash_func(key) % h->n_buckets;
if (!hash_scan(h, hash, key))
return false;
@@ -483,7 +551,7 @@ void* hashmap_remove(Hashmap *h, const void *key) {
if (!h)
return NULL;
- hash = h->hash_func(key) % NBUCKETS;
+ hash = h->hash_func(key) % h->n_buckets;
if (!(e = hash_scan(h, hash, key)))
return NULL;
@@ -501,11 +569,11 @@ int hashmap_remove_and_put(Hashmap *h, const void *old_key, const void *new_key,
if (!h)
return -ENOENT;
- old_hash = h->hash_func(old_key) % NBUCKETS;
+ old_hash = h->hash_func(old_key) % h->n_buckets;
if (!(e = hash_scan(h, old_hash, old_key)))
return -ENOENT;
- new_hash = h->hash_func(new_key) % NBUCKETS;
+ new_hash = h->hash_func(new_key) % h->n_buckets;
if (hash_scan(h, new_hash, new_key))
return -EEXIST;
@@ -526,11 +594,11 @@ int hashmap_remove_and_replace(Hashmap *h, const void *old_key, const void *new_
if (!h)
return -ENOENT;
- old_hash = h->hash_func(old_key) % NBUCKETS;
+ old_hash = h->hash_func(old_key) % h->n_buckets;
if (!(e = hash_scan(h, old_hash, old_key)))
return -ENOENT;
- new_hash = h->hash_func(new_key) % NBUCKETS;
+ new_hash = h->hash_func(new_key) % h->n_buckets;
if ((k = hash_scan(h, new_hash, new_key)))
if (e != k)
remove_entry(h, k);
@@ -552,9 +620,10 @@ void* hashmap_remove_value(Hashmap *h, const void *key, void *value) {
if (!h)
return NULL;
- hash = h->hash_func(key) % NBUCKETS;
+ hash = h->hash_func(key) % h->n_buckets;
- if (!(e = hash_scan(h, hash, key)))
+ e = hash_scan(h, hash, key);
+ if (!e)
return NULL;
if (e->value != value)
@@ -642,9 +711,10 @@ void *hashmap_iterate_skip(Hashmap *h, const void *key, Iterator *i) {
if (!h)
return NULL;
- hash = h->hash_func(key) % NBUCKETS;
+ hash = h->hash_func(key) % h->n_buckets;
- if (!(e = hash_scan(h, hash, key)))
+ e = hash_scan(h, hash, key);
+ if (!e)
return NULL;
*i = (Iterator) e;
@@ -723,6 +793,14 @@ unsigned hashmap_size(Hashmap *h) {
return h->n_entries;
}
+unsigned hashmap_buckets(Hashmap *h) {
+
+ if (!h)
+ return 0;
+
+ return h->n_buckets;
+}
+
bool hashmap_isempty(Hashmap *h) {
if (!h)
@@ -766,12 +844,12 @@ void hashmap_move(Hashmap *h, Hashmap *other) {
n = e->iterate_next;
- h_hash = h->hash_func(e->key) % NBUCKETS;
+ h_hash = h->hash_func(e->key) % h->n_buckets;
if (hash_scan(h, h_hash, e->key))
continue;
- other_hash = other->hash_func(e->key) % NBUCKETS;
+ other_hash = other->hash_func(e->key) % other->n_buckets;
unlink_entry(other, e, other_hash);
link_entry(h, e, h_hash);
@@ -787,12 +865,13 @@ int hashmap_move_one(Hashmap *h, Hashmap *other, const void *key) {
assert(h);
- h_hash = h->hash_func(key) % NBUCKETS;
+ h_hash = h->hash_func(key) % h->n_buckets;
if (hash_scan(h, h_hash, key))
return -EEXIST;
- other_hash = other->hash_func(key) % NBUCKETS;
- if (!(e = hash_scan(other, other_hash, key)))
+ other_hash = other->hash_func(key) % other->n_buckets;
+ e = hash_scan(other, other_hash, key);
+ if (!e)
return -ENOENT;
unlink_entry(other, e, other_hash);
@@ -806,7 +885,8 @@ Hashmap *hashmap_copy(Hashmap *h) {
assert(h);
- if (!(copy = hashmap_new(h->hash_func, h->compare_func)))
+ copy = hashmap_new(h->hash_func, h->compare_func);
+ if (!copy)
return NULL;
if (hashmap_merge(copy, h) < 0) {
@@ -845,7 +925,7 @@ void *hashmap_next(Hashmap *h, const void *key) {
if (!h)
return NULL;
- hash = h->hash_func(key) % NBUCKETS;
+ hash = h->hash_func(key) % h->n_buckets;
e = hash_scan(h, hash, key);
if (!e)
return NULL;
diff --git a/src/shared/hashmap.h b/src/shared/hashmap.h
index 15b7e27..3d4f672 100644
--- a/src/shared/hashmap.h
+++ b/src/shared/hashmap.h
@@ -76,6 +76,7 @@ int hashmap_move_one(Hashmap *h, Hashmap *other, const void *key);
unsigned hashmap_size(Hashmap *h) _pure_;
bool hashmap_isempty(Hashmap *h) _pure_;
+unsigned hashmap_buckets(Hashmap *h) _pure_;
void *hashmap_iterate(Hashmap *h, Iterator *i, const void **key);
void *hashmap_iterate_backwards(Hashmap *h, Iterator *i, const void **key);
diff --git a/src/test/test-hashmap.c b/src/test/test-hashmap.c
index 2aead79..349e8e5 100644
--- a/src/test/test-hashmap.c
+++ b/src/test/test-hashmap.c
@@ -467,6 +467,30 @@ static void test_hashmap_get(void) {
hashmap_free_free(m);
}
+static void test_hashmap_many(void) {
+ Hashmap *h;
+ unsigned i;
+
+#define N_ENTRIES 100000
+
+ assert_se(h = hashmap_new(NULL, NULL));
+
+ for (i = 1; i < N_ENTRIES*3; i+=3) {
+ assert_se(hashmap_put(h, UINT_TO_PTR(i), UINT_TO_PTR(i)) >= 0);
+ assert_se(PTR_TO_UINT(hashmap_get(h, UINT_TO_PTR(i))) == i);
+ }
+
+ for (i = 1; i < N_ENTRIES*3; i++)
+ assert_se(hashmap_contains(h, UINT_TO_PTR(i)) == (i % 3 == 1));
+
+ log_info("%u <= %u * 0.75 = %g", hashmap_size(h), hashmap_buckets(h), hashmap_buckets(h) * 0.75);
+
+ assert_se(hashmap_size(h) <= hashmap_buckets(h) * 0.75);
+ assert_se(hashmap_size(h) == N_ENTRIES);
+
+ hashmap_free(h);
+}
+
static void test_uint64_compare_func(void) {
assert_se(uint64_compare_func("a", "a") == 0);
assert_se(uint64_compare_func("a", "b") == -1);
@@ -484,8 +508,7 @@ static void test_string_compare_func(void) {
assert_se(string_compare_func("fred", "fred") == 0);
}
-int main(int argc, const char *argv[])
-{
+int main(int argc, const char *argv[]) {
test_hashmap_copy();
test_hashmap_get_strv();
test_hashmap_move_one();
@@ -502,6 +525,7 @@ int main(int argc, const char *argv[])
test_hashmap_isempty();
test_hashmap_get();
test_hashmap_size();
+ test_hashmap_many();
test_uint64_compare_func();
test_trivial_compare_func();
test_string_compare_func();