887953
From a5471a84069631ab0d0605cf7b68f16285f5079f Mon Sep 17 00:00:00 2001
887953
From: Xavi Hernandez <xhernandez@redhat.com>
887953
Date: Fri, 14 Dec 2018 11:26:36 +0100
887953
Subject: [PATCH 478/493] libglusterfs: fix memory corruption caused by
887953
 per-thread mem pools
887953
887953
There was a race in the per-thread memory pool management that could lead
887953
to memory corruption. The race appeared when the following sequence of
887953
events happened:
887953
887953
1. Thread T1 allocated a memory object O1 from its own private pool P1
887953
2. T1 terminates and P1 is marked to be destroyed
887953
3. The mem-sweeper thread is woken up and scans all private pools
887953
4. It detects that P1 needs to be destroyed and starts releasing the
887953
   objects from hot and cold lists.
887953
5. Thread T2 releases O1
887953
6. O1 is added to the hot list of P1
887953
887953
The problem happens because steps 4 and 6 are protected by diferent locks,
887953
so they can run concurrently. This means that both T1 and T2 are modifying
887953
the same list at the same time, potentially causing corruption.
887953
887953
This patch fixes the problem using the following approach:
887953
887953
1. When an object is released, it's only returned to the hot list of the
887953
   corresponding memory pool if it's not marked to be destroyed. Otherwise
887953
   the memory is released to the system.
887953
2. Object release and mem-sweeper thread synchronize access to the deletion
887953
   mark of the memory pool to prevent simultaneous access to the list.
887953
887953
Some other minor adjustments are made to reduce the lengths of the locked
887953
regions.
887953
887953
This patch is not 100% identical to upstream version because changes
887953
coming from https://github.com/gluster/glusterfs/issues/307 are not
887953
backported.
887953
887953
Upstream patch: https://review.gluster.org/c/glusterfs/+/21583
887953
> Fixes: bz#1651165
887953
> Change-Id: I63be3893f92096e57f54a6150e0461340084ddde
887953
> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
887953
887953
Upstream patch: https://review.gluster.org/c/glusterfs/+/21727
887953
> Change-Id: Idbf23bda7f9228d60c644a1bea4b6c2cfc582090
887953
> updates: bz#1193929
887953
> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
887953
887953
Change-Id: I63be3893f92096e57f54a6150e0461340084ddde
887953
BUG: 1647499
887953
Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
887953
Reviewed-on: https://code.engineering.redhat.com/gerrit/158658
887953
Tested-by: RHGS Build Bot <nigelb@redhat.com>
887953
Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
887953
---
887953
 libglusterfs/src/mem-pool.c | 137 ++++++++++++++++++++++++++------------------
887953
 1 file changed, 81 insertions(+), 56 deletions(-)
887953
887953
diff --git a/libglusterfs/src/mem-pool.c b/libglusterfs/src/mem-pool.c
887953
index ba29137..8ff261c 100644
887953
--- a/libglusterfs/src/mem-pool.c
887953
+++ b/libglusterfs/src/mem-pool.c
887953
@@ -411,37 +411,34 @@ static unsigned int     init_count      = 0;
887953
 static pthread_t        sweeper_tid;
887953
 
887953
 
887953
-void
887953
+gf_boolean_t
887953
 collect_garbage (sweep_state_t *state, per_thread_pool_list_t *pool_list)
887953
 {
887953
         unsigned int            i;
887953
         per_thread_pool_t       *pt_pool;
887953
-
887953
-        if (pool_list->poison) {
887953
-                list_del (&pool_list->thr_list);
887953
-                list_add (&pool_list->thr_list, &state->death_row);
887953
-                return;
887953
-        }
887953
-
887953
-        if (state->n_cold_lists >= N_COLD_LISTS) {
887953
-                return;
887953
-        }
887953
+        gf_boolean_t            poisoned;
887953
 
887953
         (void) pthread_spin_lock (&pool_list->lock);
887953
-        for (i = 0; i < NPOOLS; ++i) {
887953
-                pt_pool = &pool_list->pools[i];
887953
-                if (pt_pool->cold_list) {
887953
-                        state->cold_lists[state->n_cold_lists++]
887953
-                                = pt_pool->cold_list;
887953
-                }
887953
-                pt_pool->cold_list = pt_pool->hot_list;
887953
-                pt_pool->hot_list = NULL;
887953
-                if (state->n_cold_lists >= N_COLD_LISTS) {
887953
-                        /* We'll just catch up on a future pass. */
887953
-                        break;
887953
+
887953
+        poisoned = pool_list->poison != 0;
887953
+        if (!poisoned) {
887953
+                for (i = 0; i < NPOOLS; ++i) {
887953
+                        pt_pool = &pool_list->pools[i];
887953
+                        if (pt_pool->cold_list) {
887953
+                                if (state->n_cold_lists >= N_COLD_LISTS) {
887953
+                                        break;
887953
+                                }
887953
+                                state->cold_lists[state->n_cold_lists++]
887953
+                                        = pt_pool->cold_list;
887953
+                        }
887953
+                        pt_pool->cold_list = pt_pool->hot_list;
887953
+                        pt_pool->hot_list = NULL;
887953
                 }
887953
         }
887953
+
887953
         (void) pthread_spin_unlock (&pool_list->lock);
887953
+
887953
+        return poisoned;
887953
 }
887953
 
887953
 
887953
@@ -469,6 +466,7 @@ pool_sweeper (void *arg)
887953
         struct timeval          begin_time;
887953
         struct timeval          end_time;
887953
         struct timeval          elapsed;
887953
+        gf_boolean_t            poisoned;
887953
 
887953
         /*
887953
          * This is all a bit inelegant, but the point is to avoid doing
887953
@@ -488,7 +486,13 @@ pool_sweeper (void *arg)
887953
                 (void) pthread_mutex_lock (&pool_lock);
887953
                 list_for_each_entry_safe (pool_list, next_pl,
887953
                                           &pool_threads, thr_list) {
887953
-                        collect_garbage (&state, pool_list);
887953
+                        (void) pthread_mutex_unlock (&pool_lock);
887953
+                        poisoned = collect_garbage (&state, pool_list);
887953
+                        (void) pthread_mutex_lock (&pool_lock);
887953
+                        if (poisoned) {
887953
+                                list_move(&pool_list->thr_list,
887953
+                                          &state.death_row);
887953
+                        }
887953
                 }
887953
                 (void) pthread_mutex_unlock (&pool_lock);
887953
                 (void) gettimeofday (&end_time, NULL);
887953
@@ -525,7 +529,15 @@ pool_destructor (void *arg)
887953
 {
887953
         per_thread_pool_list_t  *pool_list      = arg;
887953
 
887953
-        /* The pool-sweeper thread will take it from here. */
887953
+        /* The pool-sweeper thread will take it from here.
887953
+         *
887953
+         * We can change 'poison' here without taking locks because the change
887953
+         * itself doesn't interact with other parts of the code and a simple
887953
+         * write is already atomic from the point of view of the processor.
887953
+         *
887953
+         * This change can modify what mem_put() does, but both possibilities
887953
+         * are fine until the sweeper thread kicks in. The real synchronization
887953
+         * must be between mem_put() and the sweeper thread. */
887953
         pool_list->poison = 1;
887953
 }
887953
 
887953
@@ -736,7 +748,7 @@ mem_get_pool_list (void)
887953
         (void) pthread_mutex_unlock (&pool_free_lock);
887953
 
887953
         if (!pool_list) {
887953
-                pool_list = CALLOC (pool_list_size, 1);
887953
+                pool_list = MALLOC (pool_list_size);
887953
                 if (!pool_list) {
887953
                         return NULL;
887953
                 }
887953
@@ -761,26 +773,47 @@ mem_get_pool_list (void)
887953
 }
887953
 
887953
 pooled_obj_hdr_t *
887953
-mem_get_from_pool (per_thread_pool_t *pt_pool)
887953
+mem_get_from_pool (struct mem_pool *mem_pool)
887953
 {
887953
+        per_thread_pool_list_t  *pool_list;
887953
+        per_thread_pool_t       *pt_pool;
887953
         pooled_obj_hdr_t        *retval;
887953
 
887953
+        pool_list = mem_get_pool_list ();
887953
+        if (!pool_list || pool_list->poison) {
887953
+                return NULL;
887953
+        }
887953
+
887953
+        pt_pool = &pool_list->pools[mem_pool->power_of_two-POOL_SMALLEST];
887953
+
887953
+        (void) pthread_spin_lock (&pool_list->lock);
887953
+
887953
         retval = pt_pool->hot_list;
887953
         if (retval) {
887953
-                GF_ATOMIC_INC (pt_pool->parent->allocs_hot);
887953
                 pt_pool->hot_list = retval->next;
887953
-                return retval;
887953
+                (void) pthread_spin_unlock (&pool_list->lock);
887953
+                GF_ATOMIC_INC (pt_pool->parent->allocs_hot);
887953
+        } else {
887953
+                retval = pt_pool->cold_list;
887953
+                if (retval) {
887953
+                        pt_pool->cold_list = retval->next;
887953
+                        (void) pthread_spin_unlock (&pool_list->lock);
887953
+                        GF_ATOMIC_INC (pt_pool->parent->allocs_cold);
887953
+                } else {
887953
+                        (void) pthread_spin_unlock (&pool_list->lock);
887953
+                        GF_ATOMIC_INC (pt_pool->parent->allocs_stdc);
887953
+                        retval = malloc (1 << mem_pool->power_of_two);
887953
+                }
887953
         }
887953
 
887953
-        retval = pt_pool->cold_list;
887953
         if (retval) {
887953
-                GF_ATOMIC_INC (pt_pool->parent->allocs_cold);
887953
-                pt_pool->cold_list = retval->next;
887953
-                return retval;
887953
+                retval->magic = GF_MEM_HEADER_MAGIC;
887953
+                retval->next = NULL;
887953
+                retval->pool_list = pool_list;
887953
+                retval->power_of_two = mem_pool->power_of_two;
887953
         }
887953
 
887953
-        GF_ATOMIC_INC (pt_pool->parent->allocs_stdc);
887953
-        return malloc (1 << pt_pool->parent->power_of_two);
887953
+        return retval;
887953
 }
887953
 
887953
 
887953
@@ -791,8 +824,6 @@ mem_get (struct mem_pool *mem_pool)
887953
         return GF_CALLOC (1, AVAILABLE_SIZE (mem_pool->power_of_two),
887953
                           gf_common_mt_mem_pool);
887953
 #else
887953
-        per_thread_pool_list_t  *pool_list;
887953
-        per_thread_pool_t       *pt_pool;
887953
         pooled_obj_hdr_t        *retval;
887953
 
887953
         if (!mem_pool) {
887953
@@ -801,25 +832,11 @@ mem_get (struct mem_pool *mem_pool)
887953
                 return NULL;
887953
         }
887953
 
887953
-        pool_list = mem_get_pool_list ();
887953
-        if (!pool_list || pool_list->poison) {
887953
-                return NULL;
887953
-        }
887953
-
887953
-        (void) pthread_spin_lock (&pool_list->lock);
887953
-        pt_pool = &pool_list->pools[mem_pool->power_of_two-POOL_SMALLEST];
887953
-        retval = mem_get_from_pool (pt_pool);
887953
-        (void) pthread_spin_unlock (&pool_list->lock);
887953
-
887953
+        retval = mem_get_from_pool (mem_pool);
887953
         if (!retval) {
887953
                 return NULL;
887953
         }
887953
 
887953
-        retval->magic = GF_MEM_HEADER_MAGIC;
887953
-        retval->next = NULL;
887953
-        retval->pool_list = pool_list;;
887953
-        retval->power_of_two = mem_pool->power_of_two;
887953
-
887953
         return retval + 1;
887953
 #endif /* GF_DISABLE_MEMPOOL */
887953
 }
887953
@@ -849,12 +866,20 @@ mem_put (void *ptr)
887953
         pool_list = hdr->pool_list;
887953
         pt_pool = &pool_list->pools[hdr->power_of_two-POOL_SMALLEST];
887953
 
887953
-        (void) pthread_spin_lock (&pool_list->lock);
887953
         hdr->magic = GF_MEM_INVALID_MAGIC;
887953
-        hdr->next = pt_pool->hot_list;
887953
-        pt_pool->hot_list = hdr;
887953
-        GF_ATOMIC_INC (pt_pool->parent->frees_to_list);
887953
-        (void) pthread_spin_unlock (&pool_list->lock);
887953
+
887953
+        (void) pthread_spin_lock (&pool_list->lock);
887953
+        if (!pool_list->poison) {
887953
+                hdr->next = pt_pool->hot_list;
887953
+                pt_pool->hot_list = hdr;
887953
+                (void) pthread_spin_unlock (&pool_list->lock);
887953
+                GF_ATOMIC_INC (pt_pool->parent->frees_to_list);
887953
+        } else {
887953
+                (void) pthread_spin_unlock (&pool_list->lock);
887953
+                /* If the owner thread of this element has terminated, we
887953
+                 * simply release its memory. */
887953
+                free(hdr);
887953
+        }
887953
 #endif /* GF_DISABLE_MEMPOOL */
887953
 }
887953
 
887953
-- 
887953
1.8.3.1
887953