a4b143
From 6b28d81fea017382e098c24514ae229fcb0a4a7d Mon Sep 17 00:00:00 2001
a4b143
From: Lennart Poettering <lennart@poettering.net>
a4b143
Date: Tue, 1 Oct 2013 00:13:18 +0200
a4b143
Subject: [PATCH] hashmap: size hashmap bucket array dynamically
a4b143
a4b143
Instead of fixing the hashmap bucket array to 127 entries dynamically
a4b143
size it, starting with a smaller one of 31. As soon as a fill level of
a4b143
75% is reached, quadruple the size, and so on.
a4b143
a4b143
This should siginficantly optimize the lookup time in large tables
a4b143
(from O(n) back to O(1)), and save memory on smaller tables (which most
a4b143
are).
a4b143
---
a4b143
 src/shared/hashmap.c    | 152 ++++++++++++++++++++++++++++++++++++------------
a4b143
 src/shared/hashmap.h    |   1 +
a4b143
 src/test/test-hashmap.c |  28 ++++++++-
a4b143
 3 files changed, 143 insertions(+), 38 deletions(-)
a4b143
a4b143
diff --git a/src/shared/hashmap.c b/src/shared/hashmap.c
a4b143
index 4ea1a0f..6330792 100644
a4b143
--- a/src/shared/hashmap.c
a4b143
+++ b/src/shared/hashmap.c
a4b143
@@ -28,7 +28,7 @@
a4b143
 #include "hashmap.h"
a4b143
 #include "macro.h"
a4b143
 
a4b143
-#define NBUCKETS 127
a4b143
+#define INITIAL_N_BUCKETS 31
a4b143
 
a4b143
 struct hashmap_entry {
a4b143
         const void *key;
a4b143
@@ -42,13 +42,13 @@ struct Hashmap {
a4b143
         compare_func_t compare_func;
a4b143
 
a4b143
         struct hashmap_entry *iterate_list_head, *iterate_list_tail;
a4b143
-        unsigned n_entries;
a4b143
+
a4b143
+        struct hashmap_entry ** buckets;
a4b143
+        unsigned n_buckets, n_entries;
a4b143
 
a4b143
         bool from_pool;
a4b143
 };
a4b143
 
a4b143
-#define BY_HASH(h) ((struct hashmap_entry**) ((uint8_t*) (h) + ALIGN(sizeof(Hashmap))))
a4b143
-
a4b143
 struct pool {
a4b143
         struct pool *next;
a4b143
         unsigned n_tiles;
a4b143
@@ -64,6 +64,11 @@ static void *first_entry_tile = NULL;
a4b143
 static void* allocate_tile(struct pool **first_pool, void **first_tile, size_t tile_size) {
a4b143
         unsigned i;
a4b143
 
a4b143
+        /* When a tile is released we add it to the list and simply
a4b143
+         * place the next pointer at its offset 0. */
a4b143
+
a4b143
+        assert(tile_size >= sizeof(void*));
a4b143
+
a4b143
         if (*first_tile) {
a4b143
                 void *r;
a4b143
 
a4b143
@@ -173,7 +178,7 @@ Hashmap *hashmap_new(hash_func_t hash_func, compare_func_t compare_func) {
a4b143
 
a4b143
         b = is_main_thread();
a4b143
 
a4b143
-        size = ALIGN(sizeof(Hashmap)) + NBUCKETS * sizeof(struct hashmap_entry*);
a4b143
+        size = ALIGN(sizeof(Hashmap)) + INITIAL_N_BUCKETS * sizeof(struct hashmap_entry*);
a4b143
 
a4b143
         if (b) {
a4b143
                 h = allocate_tile(&first_hashmap_pool, &first_hashmap_tile, size);
a4b143
@@ -191,23 +196,30 @@ Hashmap *hashmap_new(hash_func_t hash_func, compare_func_t compare_func) {
a4b143
         h->hash_func = hash_func ? hash_func : trivial_hash_func;
a4b143
         h->compare_func = compare_func ? compare_func : trivial_compare_func;
a4b143
 
a4b143
+        h->n_buckets = INITIAL_N_BUCKETS;
a4b143
         h->n_entries = 0;
a4b143
         h->iterate_list_head = h->iterate_list_tail = NULL;
a4b143
 
a4b143
+        h->buckets = (struct hashmap_entry**) ((uint8_t*) h + ALIGN(sizeof(Hashmap)));
a4b143
+
a4b143
         h->from_pool = b;
a4b143
 
a4b143
         return h;
a4b143
 }
a4b143
 
a4b143
 int hashmap_ensure_allocated(Hashmap **h, hash_func_t hash_func, compare_func_t compare_func) {
a4b143
+        Hashmap *q;
a4b143
+
a4b143
         assert(h);
a4b143
 
a4b143
         if (*h)
a4b143
                 return 0;
a4b143
 
a4b143
-        if (!(*h = hashmap_new(hash_func, compare_func)))
a4b143
+        q = hashmap_new(hash_func, compare_func);
a4b143
+        if (!q)
a4b143
                 return -ENOMEM;
a4b143
 
a4b143
+        *h = q;
a4b143
         return 0;
a4b143
 }
a4b143
 
a4b143
@@ -216,11 +228,11 @@ static void link_entry(Hashmap *h, struct hashmap_entry *e, unsigned hash) {
a4b143
         assert(e);
a4b143
 
a4b143
         /* Insert into hash table */
a4b143
-        e->bucket_next = BY_HASH(h)[hash];
a4b143
+        e->bucket_next = h->buckets[hash];
a4b143
         e->bucket_previous = NULL;
a4b143
-        if (BY_HASH(h)[hash])
a4b143
-                BY_HASH(h)[hash]->bucket_previous = e;
a4b143
-        BY_HASH(h)[hash] = e;
a4b143
+        if (h->buckets[hash])
a4b143
+                h->buckets[hash]->bucket_previous = e;
a4b143
+        h->buckets[hash] = e;
a4b143
 
a4b143
         /* Insert into iteration list */
a4b143
         e->iterate_previous = h->iterate_list_tail;
a4b143
@@ -260,7 +272,7 @@ static void unlink_entry(Hashmap *h, struct hashmap_entry *e, unsigned hash) {
a4b143
         if (e->bucket_previous)
a4b143
                 e->bucket_previous->bucket_next = e->bucket_next;
a4b143
         else
a4b143
-                BY_HASH(h)[hash] = e->bucket_next;
a4b143
+                h->buckets[hash] = e->bucket_next;
a4b143
 
a4b143
         assert(h->n_entries >= 1);
a4b143
         h->n_entries--;
a4b143
@@ -272,7 +284,7 @@ static void remove_entry(Hashmap *h, struct hashmap_entry *e) {
a4b143
         assert(h);
a4b143
         assert(e);
a4b143
 
a4b143
-        hash = h->hash_func(e->key) % NBUCKETS;
a4b143
+        hash = h->hash_func(e->key) % h->n_buckets;
a4b143
 
a4b143
         unlink_entry(h, e, hash);
a4b143
 
a4b143
@@ -291,6 +303,9 @@ void hashmap_free(Hashmap*h) {
a4b143
 
a4b143
         hashmap_clear(h);
a4b143
 
a4b143
+        if (h->buckets != (struct hashmap_entry**) ((uint8_t*) h + ALIGN(sizeof(Hashmap))))
a4b143
+                free(h->buckets);
a4b143
+
a4b143
         if (h->from_pool)
a4b143
                 deallocate_tile(&first_hashmap_tile, h);
a4b143
         else
a4b143
@@ -357,22 +372,72 @@ void hashmap_clear_free_free(Hashmap *h) {
a4b143
 static struct hashmap_entry *hash_scan(Hashmap *h, unsigned hash, const void *key) {
a4b143
         struct hashmap_entry *e;
a4b143
         assert(h);
a4b143
-        assert(hash < NBUCKETS);
a4b143
+        assert(hash < h->n_buckets);
a4b143
 
a4b143
-        for (e = BY_HASH(h)[hash]; e; e = e->bucket_next)
a4b143
+        for (e = h->buckets[hash]; e; e = e->bucket_next)
a4b143
                 if (h->compare_func(e->key, key) == 0)
a4b143
                         return e;
a4b143
 
a4b143
         return NULL;
a4b143
 }
a4b143
 
a4b143
+static bool resize_buckets(Hashmap *h) {
a4b143
+        unsigned m;
a4b143
+        struct hashmap_entry **n, *i;
a4b143
+
a4b143
+        assert(h);
a4b143
+
a4b143
+        if (_likely_(h->n_entries*4 < h->n_buckets*3))
a4b143
+                return false;
a4b143
+
a4b143
+        /* Increase by four */
a4b143
+        m = (h->n_entries+1)*4-1;
a4b143
+
a4b143
+        /* If we hit OOM we simply risk packed hashmaps... */
a4b143
+        n = new0(struct hashmap_entry*, m);
a4b143
+        if (!n)
a4b143
+                return false;
a4b143
+
a4b143
+        for (i = h->iterate_list_head; i; i = i->iterate_next) {
a4b143
+                unsigned hash, x;
a4b143
+
a4b143
+                hash = h->hash_func(i->key);
a4b143
+
a4b143
+                /* First, drop from old bucket table */
a4b143
+                if (i->bucket_next)
a4b143
+                        i->bucket_next->bucket_previous = i->bucket_previous;
a4b143
+
a4b143
+                if (i->bucket_previous)
a4b143
+                        i->bucket_previous->bucket_next = i->bucket_next;
a4b143
+                else
a4b143
+                        h->buckets[hash % h->n_buckets] = i->bucket_next;
a4b143
+
a4b143
+                /* Then, add to new backet table */
a4b143
+                x = hash % m;
a4b143
+
a4b143
+                i->bucket_next = n[x];
a4b143
+                i->bucket_previous = NULL;
a4b143
+                if (n[x])
a4b143
+                        n[x]->bucket_previous = i;
a4b143
+                n[x] = i;
a4b143
+        }
a4b143
+
a4b143
+        if (h->buckets != (struct hashmap_entry**) ((uint8_t*) h + ALIGN(sizeof(Hashmap))))
a4b143
+                free(h->buckets);
a4b143
+
a4b143
+        h->buckets = n;
a4b143
+        h->n_buckets = m;
a4b143
+
a4b143
+        return true;
a4b143
+}
a4b143
+
a4b143
 int hashmap_put(Hashmap *h, const void *key, void *value) {
a4b143
         struct hashmap_entry *e;
a4b143
         unsigned hash;
a4b143
 
a4b143
         assert(h);
a4b143
 
a4b143
-        hash = h->hash_func(key) % NBUCKETS;
a4b143
+        hash = h->hash_func(key) % h->n_buckets;
a4b143
         e = hash_scan(h, hash, key);
a4b143
         if (e) {
a4b143
                 if (e->value == value)
a4b143
@@ -380,6 +445,9 @@ int hashmap_put(Hashmap *h, const void *key, void *value) {
a4b143
                 return -EEXIST;
a4b143
         }
a4b143
 
a4b143
+        if (resize_buckets(h))
a4b143
+                hash = h->hash_func(key) % h->n_buckets;
a4b143
+
a4b143
         if (h->from_pool)
a4b143
                 e = allocate_tile(&first_entry_pool, &first_entry_tile, sizeof(struct hashmap_entry));
a4b143
         else
a4b143
@@ -402,7 +470,7 @@ int hashmap_replace(Hashmap *h, const void *key, void *value) {
a4b143
 
a4b143
         assert(h);
a4b143
 
a4b143
-        hash = h->hash_func(key) % NBUCKETS;
a4b143
+        hash = h->hash_func(key) % h->n_buckets;
a4b143
         e = hash_scan(h, hash, key);
a4b143
         if (e) {
a4b143
                 e->key = key;
a4b143
@@ -419,7 +487,7 @@ int hashmap_update(Hashmap *h, const void *key, void *value) {
a4b143
 
a4b143
         assert(h);
a4b143
 
a4b143
-        hash = h->hash_func(key) % NBUCKETS;
a4b143
+        hash = h->hash_func(key) % h->n_buckets;
a4b143
         e = hash_scan(h, hash, key);
a4b143
         if (!e)
a4b143
                 return -ENOENT;
a4b143
@@ -435,7 +503,7 @@ void* hashmap_get(Hashmap *h, const void *key) {
a4b143
         if (!h)
a4b143
                 return NULL;
a4b143
 
a4b143
-        hash = h->hash_func(key) % NBUCKETS;
a4b143
+        hash = h->hash_func(key) % h->n_buckets;
a4b143
         e = hash_scan(h, hash, key);
a4b143
         if (!e)
a4b143
                 return NULL;
a4b143
@@ -450,7 +518,7 @@ void* hashmap_get2(Hashmap *h, const void *key, void **key2) {
a4b143
         if (!h)
a4b143
                 return NULL;
a4b143
 
a4b143
-        hash = h->hash_func(key) % NBUCKETS;
a4b143
+        hash = h->hash_func(key) % h->n_buckets;
a4b143
         e = hash_scan(h, hash, key);
a4b143
         if (!e)
a4b143
                 return NULL;
a4b143
@@ -467,7 +535,7 @@ bool hashmap_contains(Hashmap *h, const void *key) {
a4b143
         if (!h)
a4b143
                 return false;
a4b143
 
a4b143
-        hash = h->hash_func(key) % NBUCKETS;
a4b143
+        hash = h->hash_func(key) % h->n_buckets;
a4b143
 
a4b143
         if (!hash_scan(h, hash, key))
a4b143
                 return false;
a4b143
@@ -483,7 +551,7 @@ void* hashmap_remove(Hashmap *h, const void *key) {
a4b143
         if (!h)
a4b143
                 return NULL;
a4b143
 
a4b143
-        hash = h->hash_func(key) % NBUCKETS;
a4b143
+        hash = h->hash_func(key) % h->n_buckets;
a4b143
 
a4b143
         if (!(e = hash_scan(h, hash, key)))
a4b143
                 return NULL;
a4b143
@@ -501,11 +569,11 @@ int hashmap_remove_and_put(Hashmap *h, const void *old_key, const void *new_key,
a4b143
         if (!h)
a4b143
                 return -ENOENT;
a4b143
 
a4b143
-        old_hash = h->hash_func(old_key) % NBUCKETS;
a4b143
+        old_hash = h->hash_func(old_key) % h->n_buckets;
a4b143
         if (!(e = hash_scan(h, old_hash, old_key)))
a4b143
                 return -ENOENT;
a4b143
 
a4b143
-        new_hash = h->hash_func(new_key) % NBUCKETS;
a4b143
+        new_hash = h->hash_func(new_key) % h->n_buckets;
a4b143
         if (hash_scan(h, new_hash, new_key))
a4b143
                 return -EEXIST;
a4b143
 
a4b143
@@ -526,11 +594,11 @@ int hashmap_remove_and_replace(Hashmap *h, const void *old_key, const void *new_
a4b143
         if (!h)
a4b143
                 return -ENOENT;
a4b143
 
a4b143
-        old_hash = h->hash_func(old_key) % NBUCKETS;
a4b143
+        old_hash = h->hash_func(old_key) % h->n_buckets;
a4b143
         if (!(e = hash_scan(h, old_hash, old_key)))
a4b143
                 return -ENOENT;
a4b143
 
a4b143
-        new_hash = h->hash_func(new_key) % NBUCKETS;
a4b143
+        new_hash = h->hash_func(new_key) % h->n_buckets;
a4b143
         if ((k = hash_scan(h, new_hash, new_key)))
a4b143
                 if (e != k)
a4b143
                         remove_entry(h, k);
a4b143
@@ -552,9 +620,10 @@ void* hashmap_remove_value(Hashmap *h, const void *key, void *value) {
a4b143
         if (!h)
a4b143
                 return NULL;
a4b143
 
a4b143
-        hash = h->hash_func(key) % NBUCKETS;
a4b143
+        hash = h->hash_func(key) % h->n_buckets;
a4b143
 
a4b143
-        if (!(e = hash_scan(h, hash, key)))
a4b143
+        e = hash_scan(h, hash, key);
a4b143
+        if (!e)
a4b143
                 return NULL;
a4b143
 
a4b143
         if (e->value != value)
a4b143
@@ -642,9 +711,10 @@ void *hashmap_iterate_skip(Hashmap *h, const void *key, Iterator *i) {
a4b143
         if (!h)
a4b143
                 return NULL;
a4b143
 
a4b143
-        hash = h->hash_func(key) % NBUCKETS;
a4b143
+        hash = h->hash_func(key) % h->n_buckets;
a4b143
 
a4b143
-        if (!(e = hash_scan(h, hash, key)))
a4b143
+        e = hash_scan(h, hash, key);
a4b143
+        if (!e)
a4b143
                 return NULL;
a4b143
 
a4b143
         *i = (Iterator) e;
a4b143
@@ -723,6 +793,14 @@ unsigned hashmap_size(Hashmap *h) {
a4b143
         return h->n_entries;
a4b143
 }
a4b143
 
a4b143
+unsigned hashmap_buckets(Hashmap *h) {
a4b143
+
a4b143
+        if (!h)
a4b143
+                return 0;
a4b143
+
a4b143
+        return h->n_buckets;
a4b143
+}
a4b143
+
a4b143
 bool hashmap_isempty(Hashmap *h) {
a4b143
 
a4b143
         if (!h)
a4b143
@@ -766,12 +844,12 @@ void hashmap_move(Hashmap *h, Hashmap *other) {
a4b143
 
a4b143
                 n = e->iterate_next;
a4b143
 
a4b143
-                h_hash = h->hash_func(e->key) % NBUCKETS;
a4b143
+                h_hash = h->hash_func(e->key) % h->n_buckets;
a4b143
 
a4b143
                 if (hash_scan(h, h_hash, e->key))
a4b143
                         continue;
a4b143
 
a4b143
-                other_hash = other->hash_func(e->key) % NBUCKETS;
a4b143
+                other_hash = other->hash_func(e->key) % other->n_buckets;
a4b143
 
a4b143
                 unlink_entry(other, e, other_hash);
a4b143
                 link_entry(h, e, h_hash);
a4b143
@@ -787,12 +865,13 @@ int hashmap_move_one(Hashmap *h, Hashmap *other, const void *key) {
a4b143
 
a4b143
         assert(h);
a4b143
 
a4b143
-        h_hash = h->hash_func(key) % NBUCKETS;
a4b143
+        h_hash = h->hash_func(key) % h->n_buckets;
a4b143
         if (hash_scan(h, h_hash, key))
a4b143
                 return -EEXIST;
a4b143
 
a4b143
-        other_hash = other->hash_func(key) % NBUCKETS;
a4b143
-        if (!(e = hash_scan(other, other_hash, key)))
a4b143
+        other_hash = other->hash_func(key) % other->n_buckets;
a4b143
+        e = hash_scan(other, other_hash, key);
a4b143
+        if (!e)
a4b143
                 return -ENOENT;
a4b143
 
a4b143
         unlink_entry(other, e, other_hash);
a4b143
@@ -806,7 +885,8 @@ Hashmap *hashmap_copy(Hashmap *h) {
a4b143
 
a4b143
         assert(h);
a4b143
 
a4b143
-        if (!(copy = hashmap_new(h->hash_func, h->compare_func)))
a4b143
+        copy = hashmap_new(h->hash_func, h->compare_func);
a4b143
+        if (!copy)
a4b143
                 return NULL;
a4b143
 
a4b143
         if (hashmap_merge(copy, h) < 0) {
a4b143
@@ -845,7 +925,7 @@ void *hashmap_next(Hashmap *h, const void *key) {
a4b143
         if (!h)
a4b143
                 return NULL;
a4b143
 
a4b143
-        hash = h->hash_func(key) % NBUCKETS;
a4b143
+        hash = h->hash_func(key) % h->n_buckets;
a4b143
         e = hash_scan(h, hash, key);
a4b143
         if (!e)
a4b143
                 return NULL;
a4b143
diff --git a/src/shared/hashmap.h b/src/shared/hashmap.h
a4b143
index 15b7e27..3d4f672 100644
a4b143
--- a/src/shared/hashmap.h
a4b143
+++ b/src/shared/hashmap.h
a4b143
@@ -76,6 +76,7 @@ int hashmap_move_one(Hashmap *h, Hashmap *other, const void *key);
a4b143
 
a4b143
 unsigned hashmap_size(Hashmap *h) _pure_;
a4b143
 bool hashmap_isempty(Hashmap *h) _pure_;
a4b143
+unsigned hashmap_buckets(Hashmap *h) _pure_;
a4b143
 
a4b143
 void *hashmap_iterate(Hashmap *h, Iterator *i, const void **key);
a4b143
 void *hashmap_iterate_backwards(Hashmap *h, Iterator *i, const void **key);
a4b143
diff --git a/src/test/test-hashmap.c b/src/test/test-hashmap.c
a4b143
index 2aead79..349e8e5 100644
a4b143
--- a/src/test/test-hashmap.c
a4b143
+++ b/src/test/test-hashmap.c
a4b143
@@ -467,6 +467,30 @@ static void test_hashmap_get(void) {
a4b143
         hashmap_free_free(m);
a4b143
 }
a4b143
 
a4b143
+static void test_hashmap_many(void) {
a4b143
+        Hashmap *h;
a4b143
+        unsigned i;
a4b143
+
a4b143
+#define N_ENTRIES 100000
a4b143
+
a4b143
+        assert_se(h = hashmap_new(NULL, NULL));
a4b143
+
a4b143
+        for (i = 1; i < N_ENTRIES*3; i+=3) {
a4b143
+                assert_se(hashmap_put(h, UINT_TO_PTR(i), UINT_TO_PTR(i)) >= 0);
a4b143
+                assert_se(PTR_TO_UINT(hashmap_get(h, UINT_TO_PTR(i))) == i);
a4b143
+        }
a4b143
+
a4b143
+        for (i = 1; i < N_ENTRIES*3; i++)
a4b143
+                assert_se(hashmap_contains(h, UINT_TO_PTR(i)) == (i % 3 == 1));
a4b143
+
a4b143
+        log_info("%u <= %u * 0.75 = %g", hashmap_size(h), hashmap_buckets(h), hashmap_buckets(h) * 0.75);
a4b143
+
a4b143
+        assert_se(hashmap_size(h) <= hashmap_buckets(h) * 0.75);
a4b143
+        assert_se(hashmap_size(h) == N_ENTRIES);
a4b143
+
a4b143
+        hashmap_free(h);
a4b143
+}
a4b143
+
a4b143
 static void test_uint64_compare_func(void) {
a4b143
         assert_se(uint64_compare_func("a", "a") == 0);
a4b143
         assert_se(uint64_compare_func("a", "b") == -1);
a4b143
@@ -484,8 +508,7 @@ static void test_string_compare_func(void) {
a4b143
         assert_se(string_compare_func("fred", "fred") == 0);
a4b143
 }
a4b143
 
a4b143
-int main(int argc, const char *argv[])
a4b143
-{
a4b143
+int main(int argc, const char *argv[]) {
a4b143
         test_hashmap_copy();
a4b143
         test_hashmap_get_strv();
a4b143
         test_hashmap_move_one();
a4b143
@@ -502,6 +525,7 @@ int main(int argc, const char *argv[])
a4b143
         test_hashmap_isempty();
a4b143
         test_hashmap_get();
a4b143
         test_hashmap_size();
a4b143
+        test_hashmap_many();
a4b143
         test_uint64_compare_func();
a4b143
         test_trivial_compare_func();
a4b143
         test_string_compare_func();