e3c68b
From 0bf728030e0ad7a49e6e1737ea06ae74da9279d3 Mon Sep 17 00:00:00 2001
e3c68b
From: Xavi Hernandez <xhernandez@redhat.com>
e3c68b
Date: Fri, 21 Jun 2019 11:28:08 +0200
e3c68b
Subject: [PATCH 212/221] core: fix memory allocation issues
e3c68b
e3c68b
Two problems have been identified that caused that gluster's memory
e3c68b
usage were twice higher than required.
e3c68b
e3c68b
1. An off by 1 error caused that all objects allocated from the memory
e3c68b
   pools were taken from a pool bigger than required. Since each pool
e3c68b
   corresponds to a size equal to a power of two, this was wasting half
e3c68b
   of the available memory.
e3c68b
e3c68b
2. The header information used for accounting on each memory object was
e3c68b
   not taken into consideration when searching for a suitable memory
e3c68b
   pool. It was added later when each individual block was allocated.
e3c68b
   This made this space "invisible" to memory accounting.
e3c68b
e3c68b
Credits: Thanks to Nithya Balachandran for identifying this problem and
e3c68b
         testing this patch.
e3c68b
e3c68b
Upstream patch:
e3c68b
> BUG: 1722802
e3c68b
> Upstream patch link: https://review.gluster.org/c/glusterfs/+/22921
e3c68b
> Change-Id: I90e27ad795fe51ca11c13080f62207451f6c138c
e3c68b
> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
e3c68b
e3c68b
Fixes: bz#1722801
e3c68b
Change-Id: I90e27ad795fe51ca11c13080f62207451f6c138c
e3c68b
Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
e3c68b
Reviewed-on: https://code.engineering.redhat.com/gerrit/174714
e3c68b
Tested-by: RHGS Build Bot <nigelb@redhat.com>
e3c68b
Reviewed-by: Atin Mukherjee <amukherj@redhat.com>
e3c68b
---
e3c68b
 libglusterfs/src/glusterfs/mem-pool.h |  5 ++-
e3c68b
 libglusterfs/src/mem-pool.c           | 57 +++++++++++++++++++----------------
e3c68b
 2 files changed, 35 insertions(+), 27 deletions(-)
e3c68b
e3c68b
diff --git a/libglusterfs/src/glusterfs/mem-pool.h b/libglusterfs/src/glusterfs/mem-pool.h
e3c68b
index c5a486b..be0a26d 100644
e3c68b
--- a/libglusterfs/src/glusterfs/mem-pool.h
e3c68b
+++ b/libglusterfs/src/glusterfs/mem-pool.h
e3c68b
@@ -231,7 +231,10 @@ typedef struct pooled_obj_hdr {
e3c68b
     struct mem_pool *pool;
e3c68b
 } pooled_obj_hdr_t;
e3c68b
 
e3c68b
-#define AVAILABLE_SIZE(p2) (1 << (p2))
e3c68b
+/* Each memory block inside a pool has a fixed size that is a power of two.
e3c68b
+ * However each object will have a header that will reduce the available
e3c68b
+ * space. */
e3c68b
+#define AVAILABLE_SIZE(p2) ((1UL << (p2)) - sizeof(pooled_obj_hdr_t))
e3c68b
 
e3c68b
 typedef struct per_thread_pool {
e3c68b
     /* the pool that was used to request this allocation */
e3c68b
diff --git a/libglusterfs/src/mem-pool.c b/libglusterfs/src/mem-pool.c
e3c68b
index df167b6..d88041d 100644
e3c68b
--- a/libglusterfs/src/mem-pool.c
e3c68b
+++ b/libglusterfs/src/mem-pool.c
e3c68b
@@ -627,6 +627,7 @@ struct mem_pool *
e3c68b
 mem_pool_new_fn(glusterfs_ctx_t *ctx, unsigned long sizeof_type,
e3c68b
                 unsigned long count, char *name)
e3c68b
 {
e3c68b
+    unsigned long extra_size, size;
e3c68b
     unsigned int power;
e3c68b
     struct mem_pool *new = NULL;
e3c68b
     struct mem_pool_shared *pool = NULL;
e3c68b
@@ -637,10 +638,25 @@ mem_pool_new_fn(glusterfs_ctx_t *ctx, unsigned long sizeof_type,
e3c68b
         return NULL;
e3c68b
     }
e3c68b
 
e3c68b
-    /* We ensure sizeof_type > 1 and the next power of two will be, at least,
e3c68b
-     * 2^POOL_SMALLEST */
e3c68b
-    sizeof_type |= (1 << POOL_SMALLEST) - 1;
e3c68b
-    power = sizeof(sizeof_type) * 8 - __builtin_clzl(sizeof_type - 1) + 1;
e3c68b
+    /* This is the overhead we'll have because of memory accounting for each
e3c68b
+     * memory block. */
e3c68b
+    extra_size = sizeof(pooled_obj_hdr_t);
e3c68b
+
e3c68b
+    /* We need to compute the total space needed to hold the data type and
e3c68b
+     * the header. Given that the smallest block size we have in the pools
e3c68b
+     * is 2^POOL_SMALLEST, we need to take the MAX(size, 2^POOL_SMALLEST).
e3c68b
+     * However, since this value is only needed to compute its rounded
e3c68b
+     * logarithm in base 2, and this only depends on the highest bit set,
e3c68b
+     * we can simply do a bitwise or with the minimum size. We need to
e3c68b
+     * subtract 1 for correct handling of sizes that are exactly a power
e3c68b
+     * of 2. */
e3c68b
+    size = (sizeof_type + extra_size - 1UL) | ((1UL << POOL_SMALLEST) - 1UL);
e3c68b
+
e3c68b
+    /* We compute the logarithm in base 2 rounded up of the resulting size.
e3c68b
+     * This value will identify which pool we need to use from the pools of
e3c68b
+     * powers of 2. This is equivalent to finding the position of the highest
e3c68b
+     * bit set. */
e3c68b
+    power = sizeof(size) * 8 - __builtin_clzl(size);
e3c68b
     if (power > POOL_LARGEST) {
e3c68b
         gf_msg_callingfn("mem-pool", GF_LOG_ERROR, EINVAL, LG_MSG_INVALID_ARG,
e3c68b
                          "invalid argument");
e3c68b
@@ -732,8 +748,8 @@ mem_get_pool_list(void)
e3c68b
     return pool_list;
e3c68b
 }
e3c68b
 
e3c68b
-pooled_obj_hdr_t *
e3c68b
-mem_get_from_pool(struct mem_pool *mem_pool, struct mem_pool_shared *pool)
e3c68b
+static pooled_obj_hdr_t *
e3c68b
+mem_get_from_pool(struct mem_pool *mem_pool)
e3c68b
 {
e3c68b
     per_thread_pool_list_t *pool_list;
e3c68b
     per_thread_pool_t *pt_pool;
e3c68b
@@ -747,12 +763,7 @@ mem_get_from_pool(struct mem_pool *mem_pool, struct mem_pool_shared *pool)
e3c68b
         return NULL;
e3c68b
     }
e3c68b
 
e3c68b
-    if (mem_pool) {
e3c68b
-        pt_pool = &pool_list
e3c68b
-                       ->pools[mem_pool->pool->power_of_two - POOL_SMALLEST];
e3c68b
-    } else {
e3c68b
-        pt_pool = &pool_list->pools[pool->power_of_two - POOL_SMALLEST];
e3c68b
-    }
e3c68b
+    pt_pool = &pool_list->pools[mem_pool->pool->power_of_two - POOL_SMALLEST];
e3c68b
 
e3c68b
     (void)pthread_spin_lock(&pool_list->lock);
e3c68b
 
e3c68b
@@ -770,8 +781,7 @@ mem_get_from_pool(struct mem_pool *mem_pool, struct mem_pool_shared *pool)
e3c68b
         } else {
e3c68b
             (void)pthread_spin_unlock(&pool_list->lock);
e3c68b
             GF_ATOMIC_INC(pt_pool->parent->allocs_stdc);
e3c68b
-            retval = malloc((1 << pt_pool->parent->power_of_two) +
e3c68b
-                            sizeof(pooled_obj_hdr_t));
e3c68b
+            retval = malloc(1 << pt_pool->parent->power_of_two);
e3c68b
 #ifdef DEBUG
e3c68b
             hit = _gf_false;
e3c68b
 #endif
e3c68b
@@ -779,19 +789,14 @@ mem_get_from_pool(struct mem_pool *mem_pool, struct mem_pool_shared *pool)
e3c68b
     }
e3c68b
 
e3c68b
     if (retval != NULL) {
e3c68b
-        if (mem_pool) {
e3c68b
-            retval->pool = mem_pool;
e3c68b
-            retval->power_of_two = mem_pool->pool->power_of_two;
e3c68b
+        retval->pool = mem_pool;
e3c68b
+        retval->power_of_two = mem_pool->pool->power_of_two;
e3c68b
 #ifdef DEBUG
e3c68b
-            if (hit == _gf_true)
e3c68b
-                GF_ATOMIC_INC(mem_pool->hit);
e3c68b
-            else
e3c68b
-                GF_ATOMIC_INC(mem_pool->miss);
e3c68b
+        if (hit == _gf_true)
e3c68b
+            GF_ATOMIC_INC(mem_pool->hit);
e3c68b
+        else
e3c68b
+            GF_ATOMIC_INC(mem_pool->miss);
e3c68b
 #endif
e3c68b
-        } else {
e3c68b
-            retval->power_of_two = pool->power_of_two;
e3c68b
-            retval->pool = NULL;
e3c68b
-        }
e3c68b
         retval->magic = GF_MEM_HEADER_MAGIC;
e3c68b
         retval->pool_list = pool_list;
e3c68b
     }
e3c68b
@@ -811,7 +816,7 @@ mem_get(struct mem_pool *mem_pool)
e3c68b
 #if defined(GF_DISABLE_MEMPOOL)
e3c68b
     return GF_MALLOC(mem_pool->sizeof_type, gf_common_mt_mem_pool);
e3c68b
 #else
e3c68b
-    pooled_obj_hdr_t *retval = mem_get_from_pool(mem_pool, NULL);
e3c68b
+    pooled_obj_hdr_t *retval = mem_get_from_pool(mem_pool);
e3c68b
     if (!retval) {
e3c68b
         return NULL;
e3c68b
     }
e3c68b
-- 
e3c68b
1.8.3.1
e3c68b