e7a346
From a5471a84069631ab0d0605cf7b68f16285f5079f Mon Sep 17 00:00:00 2001
e7a346
From: Xavi Hernandez <xhernandez@redhat.com>
e7a346
Date: Fri, 14 Dec 2018 11:26:36 +0100
e7a346
Subject: [PATCH 478/493] libglusterfs: fix memory corruption caused by
e7a346
 per-thread mem pools
e7a346
e7a346
There was a race in the per-thread memory pool management that could lead
e7a346
to memory corruption. The race appeared when the following sequence of
e7a346
events happened:
e7a346
e7a346
1. Thread T1 allocated a memory object O1 from its own private pool P1
e7a346
2. T1 terminates and P1 is marked to be destroyed
e7a346
3. The mem-sweeper thread is woken up and scans all private pools
e7a346
4. It detects that P1 needs to be destroyed and starts releasing the
e7a346
   objects from hot and cold lists.
e7a346
5. Thread T2 releases O1
e7a346
6. O1 is added to the hot list of P1
e7a346
e7a346
The problem happens because steps 4 and 6 are protected by diferent locks,
e7a346
so they can run concurrently. This means that both T1 and T2 are modifying
e7a346
the same list at the same time, potentially causing corruption.
e7a346
e7a346
This patch fixes the problem using the following approach:
e7a346
e7a346
1. When an object is released, it's only returned to the hot list of the
e7a346
   corresponding memory pool if it's not marked to be destroyed. Otherwise
e7a346
   the memory is released to the system.
e7a346
2. Object release and mem-sweeper thread synchronize access to the deletion
e7a346
   mark of the memory pool to prevent simultaneous access to the list.
e7a346
e7a346
Some other minor adjustments are made to reduce the lengths of the locked
e7a346
regions.
e7a346
e7a346
This patch is not 100% identical to upstream version because changes
e7a346
coming from https://github.com/gluster/glusterfs/issues/307 are not
e7a346
backported.
e7a346
e7a346
Upstream patch: https://review.gluster.org/c/glusterfs/+/21583
e7a346
> Fixes: bz#1651165
e7a346
> Change-Id: I63be3893f92096e57f54a6150e0461340084ddde
e7a346
> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
e7a346
e7a346
Upstream patch: https://review.gluster.org/c/glusterfs/+/21727
e7a346
> Change-Id: Idbf23bda7f9228d60c644a1bea4b6c2cfc582090
e7a346
> updates: bz#1193929
e7a346
> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
e7a346
e7a346
Change-Id: I63be3893f92096e57f54a6150e0461340084ddde
e7a346
BUG: 1647499
e7a346
Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
e7a346
Reviewed-on: https://code.engineering.redhat.com/gerrit/158658
e7a346
Tested-by: RHGS Build Bot <nigelb@redhat.com>
e7a346
Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
e7a346
---
e7a346
 libglusterfs/src/mem-pool.c | 137 ++++++++++++++++++++++++++------------------
e7a346
 1 file changed, 81 insertions(+), 56 deletions(-)
e7a346
e7a346
diff --git a/libglusterfs/src/mem-pool.c b/libglusterfs/src/mem-pool.c
e7a346
index ba29137..8ff261c 100644
e7a346
--- a/libglusterfs/src/mem-pool.c
e7a346
+++ b/libglusterfs/src/mem-pool.c
e7a346
@@ -411,37 +411,34 @@ static unsigned int     init_count      = 0;
e7a346
 static pthread_t        sweeper_tid;
e7a346
 
e7a346
 
e7a346
-void
e7a346
+gf_boolean_t
e7a346
 collect_garbage (sweep_state_t *state, per_thread_pool_list_t *pool_list)
e7a346
 {
e7a346
         unsigned int            i;
e7a346
         per_thread_pool_t       *pt_pool;
e7a346
-
e7a346
-        if (pool_list->poison) {
e7a346
-                list_del (&pool_list->thr_list);
e7a346
-                list_add (&pool_list->thr_list, &state->death_row);
e7a346
-                return;
e7a346
-        }
e7a346
-
e7a346
-        if (state->n_cold_lists >= N_COLD_LISTS) {
e7a346
-                return;
e7a346
-        }
e7a346
+        gf_boolean_t            poisoned;
e7a346
 
e7a346
         (void) pthread_spin_lock (&pool_list->lock);
e7a346
-        for (i = 0; i < NPOOLS; ++i) {
e7a346
-                pt_pool = &pool_list->pools[i];
e7a346
-                if (pt_pool->cold_list) {
e7a346
-                        state->cold_lists[state->n_cold_lists++]
e7a346
-                                = pt_pool->cold_list;
e7a346
-                }
e7a346
-                pt_pool->cold_list = pt_pool->hot_list;
e7a346
-                pt_pool->hot_list = NULL;
e7a346
-                if (state->n_cold_lists >= N_COLD_LISTS) {
e7a346
-                        /* We'll just catch up on a future pass. */
e7a346
-                        break;
e7a346
+
e7a346
+        poisoned = pool_list->poison != 0;
e7a346
+        if (!poisoned) {
e7a346
+                for (i = 0; i < NPOOLS; ++i) {
e7a346
+                        pt_pool = &pool_list->pools[i];
e7a346
+                        if (pt_pool->cold_list) {
e7a346
+                                if (state->n_cold_lists >= N_COLD_LISTS) {
e7a346
+                                        break;
e7a346
+                                }
e7a346
+                                state->cold_lists[state->n_cold_lists++]
e7a346
+                                        = pt_pool->cold_list;
e7a346
+                        }
e7a346
+                        pt_pool->cold_list = pt_pool->hot_list;
e7a346
+                        pt_pool->hot_list = NULL;
e7a346
                 }
e7a346
         }
e7a346
+
e7a346
         (void) pthread_spin_unlock (&pool_list->lock);
e7a346
+
e7a346
+        return poisoned;
e7a346
 }
e7a346
 
e7a346
 
e7a346
@@ -469,6 +466,7 @@ pool_sweeper (void *arg)
e7a346
         struct timeval          begin_time;
e7a346
         struct timeval          end_time;
e7a346
         struct timeval          elapsed;
e7a346
+        gf_boolean_t            poisoned;
e7a346
 
e7a346
         /*
e7a346
          * This is all a bit inelegant, but the point is to avoid doing
e7a346
@@ -488,7 +486,13 @@ pool_sweeper (void *arg)
e7a346
                 (void) pthread_mutex_lock (&pool_lock);
e7a346
                 list_for_each_entry_safe (pool_list, next_pl,
e7a346
                                           &pool_threads, thr_list) {
e7a346
-                        collect_garbage (&state, pool_list);
e7a346
+                        (void) pthread_mutex_unlock (&pool_lock);
e7a346
+                        poisoned = collect_garbage (&state, pool_list);
e7a346
+                        (void) pthread_mutex_lock (&pool_lock);
e7a346
+                        if (poisoned) {
e7a346
+                                list_move(&pool_list->thr_list,
e7a346
+                                          &state.death_row);
e7a346
+                        }
e7a346
                 }
e7a346
                 (void) pthread_mutex_unlock (&pool_lock);
e7a346
                 (void) gettimeofday (&end_time, NULL);
e7a346
@@ -525,7 +529,15 @@ pool_destructor (void *arg)
e7a346
 {
e7a346
         per_thread_pool_list_t  *pool_list      = arg;
e7a346
 
e7a346
-        /* The pool-sweeper thread will take it from here. */
e7a346
+        /* The pool-sweeper thread will take it from here.
e7a346
+         *
e7a346
+         * We can change 'poison' here without taking locks because the change
e7a346
+         * itself doesn't interact with other parts of the code and a simple
e7a346
+         * write is already atomic from the point of view of the processor.
e7a346
+         *
e7a346
+         * This change can modify what mem_put() does, but both possibilities
e7a346
+         * are fine until the sweeper thread kicks in. The real synchronization
e7a346
+         * must be between mem_put() and the sweeper thread. */
e7a346
         pool_list->poison = 1;
e7a346
 }
e7a346
 
e7a346
@@ -736,7 +748,7 @@ mem_get_pool_list (void)
e7a346
         (void) pthread_mutex_unlock (&pool_free_lock);
e7a346
 
e7a346
         if (!pool_list) {
e7a346
-                pool_list = CALLOC (pool_list_size, 1);
e7a346
+                pool_list = MALLOC (pool_list_size);
e7a346
                 if (!pool_list) {
e7a346
                         return NULL;
e7a346
                 }
e7a346
@@ -761,26 +773,47 @@ mem_get_pool_list (void)
e7a346
 }
e7a346
 
e7a346
 pooled_obj_hdr_t *
e7a346
-mem_get_from_pool (per_thread_pool_t *pt_pool)
e7a346
+mem_get_from_pool (struct mem_pool *mem_pool)
e7a346
 {
e7a346
+        per_thread_pool_list_t  *pool_list;
e7a346
+        per_thread_pool_t       *pt_pool;
e7a346
         pooled_obj_hdr_t        *retval;
e7a346
 
e7a346
+        pool_list = mem_get_pool_list ();
e7a346
+        if (!pool_list || pool_list->poison) {
e7a346
+                return NULL;
e7a346
+        }
e7a346
+
e7a346
+        pt_pool = &pool_list->pools[mem_pool->power_of_two-POOL_SMALLEST];
e7a346
+
e7a346
+        (void) pthread_spin_lock (&pool_list->lock);
e7a346
+
e7a346
         retval = pt_pool->hot_list;
e7a346
         if (retval) {
e7a346
-                GF_ATOMIC_INC (pt_pool->parent->allocs_hot);
e7a346
                 pt_pool->hot_list = retval->next;
e7a346
-                return retval;
e7a346
+                (void) pthread_spin_unlock (&pool_list->lock);
e7a346
+                GF_ATOMIC_INC (pt_pool->parent->allocs_hot);
e7a346
+        } else {
e7a346
+                retval = pt_pool->cold_list;
e7a346
+                if (retval) {
e7a346
+                        pt_pool->cold_list = retval->next;
e7a346
+                        (void) pthread_spin_unlock (&pool_list->lock);
e7a346
+                        GF_ATOMIC_INC (pt_pool->parent->allocs_cold);
e7a346
+                } else {
e7a346
+                        (void) pthread_spin_unlock (&pool_list->lock);
e7a346
+                        GF_ATOMIC_INC (pt_pool->parent->allocs_stdc);
e7a346
+                        retval = malloc (1 << mem_pool->power_of_two);
e7a346
+                }
e7a346
         }
e7a346
 
e7a346
-        retval = pt_pool->cold_list;
e7a346
         if (retval) {
e7a346
-                GF_ATOMIC_INC (pt_pool->parent->allocs_cold);
e7a346
-                pt_pool->cold_list = retval->next;
e7a346
-                return retval;
e7a346
+                retval->magic = GF_MEM_HEADER_MAGIC;
e7a346
+                retval->next = NULL;
e7a346
+                retval->pool_list = pool_list;
e7a346
+                retval->power_of_two = mem_pool->power_of_two;
e7a346
         }
e7a346
 
e7a346
-        GF_ATOMIC_INC (pt_pool->parent->allocs_stdc);
e7a346
-        return malloc (1 << pt_pool->parent->power_of_two);
e7a346
+        return retval;
e7a346
 }
e7a346
 
e7a346
 
e7a346
@@ -791,8 +824,6 @@ mem_get (struct mem_pool *mem_pool)
e7a346
         return GF_CALLOC (1, AVAILABLE_SIZE (mem_pool->power_of_two),
e7a346
                           gf_common_mt_mem_pool);
e7a346
 #else
e7a346
-        per_thread_pool_list_t  *pool_list;
e7a346
-        per_thread_pool_t       *pt_pool;
e7a346
         pooled_obj_hdr_t        *retval;
e7a346
 
e7a346
         if (!mem_pool) {
e7a346
@@ -801,25 +832,11 @@ mem_get (struct mem_pool *mem_pool)
e7a346
                 return NULL;
e7a346
         }
e7a346
 
e7a346
-        pool_list = mem_get_pool_list ();
e7a346
-        if (!pool_list || pool_list->poison) {
e7a346
-                return NULL;
e7a346
-        }
e7a346
-
e7a346
-        (void) pthread_spin_lock (&pool_list->lock);
e7a346
-        pt_pool = &pool_list->pools[mem_pool->power_of_two-POOL_SMALLEST];
e7a346
-        retval = mem_get_from_pool (pt_pool);
e7a346
-        (void) pthread_spin_unlock (&pool_list->lock);
e7a346
-
e7a346
+        retval = mem_get_from_pool (mem_pool);
e7a346
         if (!retval) {
e7a346
                 return NULL;
e7a346
         }
e7a346
 
e7a346
-        retval->magic = GF_MEM_HEADER_MAGIC;
e7a346
-        retval->next = NULL;
e7a346
-        retval->pool_list = pool_list;;
e7a346
-        retval->power_of_two = mem_pool->power_of_two;
e7a346
-
e7a346
         return retval + 1;
e7a346
 #endif /* GF_DISABLE_MEMPOOL */
e7a346
 }
e7a346
@@ -849,12 +866,20 @@ mem_put (void *ptr)
e7a346
         pool_list = hdr->pool_list;
e7a346
         pt_pool = &pool_list->pools[hdr->power_of_two-POOL_SMALLEST];
e7a346
 
e7a346
-        (void) pthread_spin_lock (&pool_list->lock);
e7a346
         hdr->magic = GF_MEM_INVALID_MAGIC;
e7a346
-        hdr->next = pt_pool->hot_list;
e7a346
-        pt_pool->hot_list = hdr;
e7a346
-        GF_ATOMIC_INC (pt_pool->parent->frees_to_list);
e7a346
-        (void) pthread_spin_unlock (&pool_list->lock);
e7a346
+
e7a346
+        (void) pthread_spin_lock (&pool_list->lock);
e7a346
+        if (!pool_list->poison) {
e7a346
+                hdr->next = pt_pool->hot_list;
e7a346
+                pt_pool->hot_list = hdr;
e7a346
+                (void) pthread_spin_unlock (&pool_list->lock);
e7a346
+                GF_ATOMIC_INC (pt_pool->parent->frees_to_list);
e7a346
+        } else {
e7a346
+                (void) pthread_spin_unlock (&pool_list->lock);
e7a346
+                /* If the owner thread of this element has terminated, we
e7a346
+                 * simply release its memory. */
e7a346
+                free(hdr);
e7a346
+        }
e7a346
 #endif /* GF_DISABLE_MEMPOOL */
e7a346
 }
e7a346
 
e7a346
-- 
e7a346
1.8.3.1
e7a346