Blob Blame History Raw
From a5471a84069631ab0d0605cf7b68f16285f5079f Mon Sep 17 00:00:00 2001
From: Xavi Hernandez <xhernandez@redhat.com>
Date: Fri, 14 Dec 2018 11:26:36 +0100
Subject: [PATCH 478/493] libglusterfs: fix memory corruption caused by
 per-thread mem pools

There was a race in the per-thread memory pool management that could lead
to memory corruption. The race appeared when the following sequence of
events happened:

1. Thread T1 allocated a memory object O1 from its own private pool P1
2. T1 terminates and P1 is marked to be destroyed
3. The mem-sweeper thread is woken up and scans all private pools
4. It detects that P1 needs to be destroyed and starts releasing the
   objects from hot and cold lists.
5. Thread T2 releases O1
6. O1 is added to the hot list of P1

The problem happens because steps 4 and 6 are protected by diferent locks,
so they can run concurrently. This means that both T1 and T2 are modifying
the same list at the same time, potentially causing corruption.

This patch fixes the problem using the following approach:

1. When an object is released, it's only returned to the hot list of the
   corresponding memory pool if it's not marked to be destroyed. Otherwise
   the memory is released to the system.
2. Object release and mem-sweeper thread synchronize access to the deletion
   mark of the memory pool to prevent simultaneous access to the list.

Some other minor adjustments are made to reduce the lengths of the locked
regions.

This patch is not 100% identical to upstream version because changes
coming from https://github.com/gluster/glusterfs/issues/307 are not
backported.

Upstream patch: https://review.gluster.org/c/glusterfs/+/21583
> Fixes: bz#1651165
> Change-Id: I63be3893f92096e57f54a6150e0461340084ddde
> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>

Upstream patch: https://review.gluster.org/c/glusterfs/+/21727
> Change-Id: Idbf23bda7f9228d60c644a1bea4b6c2cfc582090
> updates: bz#1193929
> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>

Change-Id: I63be3893f92096e57f54a6150e0461340084ddde
BUG: 1647499
Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
Reviewed-on: https://code.engineering.redhat.com/gerrit/158658
Tested-by: RHGS Build Bot <nigelb@redhat.com>
Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
---
 libglusterfs/src/mem-pool.c | 137 ++++++++++++++++++++++++++------------------
 1 file changed, 81 insertions(+), 56 deletions(-)

diff --git a/libglusterfs/src/mem-pool.c b/libglusterfs/src/mem-pool.c
index ba29137..8ff261c 100644
--- a/libglusterfs/src/mem-pool.c
+++ b/libglusterfs/src/mem-pool.c
@@ -411,37 +411,34 @@ static unsigned int     init_count      = 0;
 static pthread_t        sweeper_tid;
 
 
-void
+gf_boolean_t
 collect_garbage (sweep_state_t *state, per_thread_pool_list_t *pool_list)
 {
         unsigned int            i;
         per_thread_pool_t       *pt_pool;
-
-        if (pool_list->poison) {
-                list_del (&pool_list->thr_list);
-                list_add (&pool_list->thr_list, &state->death_row);
-                return;
-        }
-
-        if (state->n_cold_lists >= N_COLD_LISTS) {
-                return;
-        }
+        gf_boolean_t            poisoned;
 
         (void) pthread_spin_lock (&pool_list->lock);
-        for (i = 0; i < NPOOLS; ++i) {
-                pt_pool = &pool_list->pools[i];
-                if (pt_pool->cold_list) {
-                        state->cold_lists[state->n_cold_lists++]
-                                = pt_pool->cold_list;
-                }
-                pt_pool->cold_list = pt_pool->hot_list;
-                pt_pool->hot_list = NULL;
-                if (state->n_cold_lists >= N_COLD_LISTS) {
-                        /* We'll just catch up on a future pass. */
-                        break;
+
+        poisoned = pool_list->poison != 0;
+        if (!poisoned) {
+                for (i = 0; i < NPOOLS; ++i) {
+                        pt_pool = &pool_list->pools[i];
+                        if (pt_pool->cold_list) {
+                                if (state->n_cold_lists >= N_COLD_LISTS) {
+                                        break;
+                                }
+                                state->cold_lists[state->n_cold_lists++]
+                                        = pt_pool->cold_list;
+                        }
+                        pt_pool->cold_list = pt_pool->hot_list;
+                        pt_pool->hot_list = NULL;
                 }
         }
+
         (void) pthread_spin_unlock (&pool_list->lock);
+
+        return poisoned;
 }
 
 
@@ -469,6 +466,7 @@ pool_sweeper (void *arg)
         struct timeval          begin_time;
         struct timeval          end_time;
         struct timeval          elapsed;
+        gf_boolean_t            poisoned;
 
         /*
          * This is all a bit inelegant, but the point is to avoid doing
@@ -488,7 +486,13 @@ pool_sweeper (void *arg)
                 (void) pthread_mutex_lock (&pool_lock);
                 list_for_each_entry_safe (pool_list, next_pl,
                                           &pool_threads, thr_list) {
-                        collect_garbage (&state, pool_list);
+                        (void) pthread_mutex_unlock (&pool_lock);
+                        poisoned = collect_garbage (&state, pool_list);
+                        (void) pthread_mutex_lock (&pool_lock);
+                        if (poisoned) {
+                                list_move(&pool_list->thr_list,
+                                          &state.death_row);
+                        }
                 }
                 (void) pthread_mutex_unlock (&pool_lock);
                 (void) gettimeofday (&end_time, NULL);
@@ -525,7 +529,15 @@ pool_destructor (void *arg)
 {
         per_thread_pool_list_t  *pool_list      = arg;
 
-        /* The pool-sweeper thread will take it from here. */
+        /* The pool-sweeper thread will take it from here.
+         *
+         * We can change 'poison' here without taking locks because the change
+         * itself doesn't interact with other parts of the code and a simple
+         * write is already atomic from the point of view of the processor.
+         *
+         * This change can modify what mem_put() does, but both possibilities
+         * are fine until the sweeper thread kicks in. The real synchronization
+         * must be between mem_put() and the sweeper thread. */
         pool_list->poison = 1;
 }
 
@@ -736,7 +748,7 @@ mem_get_pool_list (void)
         (void) pthread_mutex_unlock (&pool_free_lock);
 
         if (!pool_list) {
-                pool_list = CALLOC (pool_list_size, 1);
+                pool_list = MALLOC (pool_list_size);
                 if (!pool_list) {
                         return NULL;
                 }
@@ -761,26 +773,47 @@ mem_get_pool_list (void)
 }
 
 pooled_obj_hdr_t *
-mem_get_from_pool (per_thread_pool_t *pt_pool)
+mem_get_from_pool (struct mem_pool *mem_pool)
 {
+        per_thread_pool_list_t  *pool_list;
+        per_thread_pool_t       *pt_pool;
         pooled_obj_hdr_t        *retval;
 
+        pool_list = mem_get_pool_list ();
+        if (!pool_list || pool_list->poison) {
+                return NULL;
+        }
+
+        pt_pool = &pool_list->pools[mem_pool->power_of_two-POOL_SMALLEST];
+
+        (void) pthread_spin_lock (&pool_list->lock);
+
         retval = pt_pool->hot_list;
         if (retval) {
-                GF_ATOMIC_INC (pt_pool->parent->allocs_hot);
                 pt_pool->hot_list = retval->next;
-                return retval;
+                (void) pthread_spin_unlock (&pool_list->lock);
+                GF_ATOMIC_INC (pt_pool->parent->allocs_hot);
+        } else {
+                retval = pt_pool->cold_list;
+                if (retval) {
+                        pt_pool->cold_list = retval->next;
+                        (void) pthread_spin_unlock (&pool_list->lock);
+                        GF_ATOMIC_INC (pt_pool->parent->allocs_cold);
+                } else {
+                        (void) pthread_spin_unlock (&pool_list->lock);
+                        GF_ATOMIC_INC (pt_pool->parent->allocs_stdc);
+                        retval = malloc (1 << mem_pool->power_of_two);
+                }
         }
 
-        retval = pt_pool->cold_list;
         if (retval) {
-                GF_ATOMIC_INC (pt_pool->parent->allocs_cold);
-                pt_pool->cold_list = retval->next;
-                return retval;
+                retval->magic = GF_MEM_HEADER_MAGIC;
+                retval->next = NULL;
+                retval->pool_list = pool_list;
+                retval->power_of_two = mem_pool->power_of_two;
         }
 
-        GF_ATOMIC_INC (pt_pool->parent->allocs_stdc);
-        return malloc (1 << pt_pool->parent->power_of_two);
+        return retval;
 }
 
 
@@ -791,8 +824,6 @@ mem_get (struct mem_pool *mem_pool)
         return GF_CALLOC (1, AVAILABLE_SIZE (mem_pool->power_of_two),
                           gf_common_mt_mem_pool);
 #else
-        per_thread_pool_list_t  *pool_list;
-        per_thread_pool_t       *pt_pool;
         pooled_obj_hdr_t        *retval;
 
         if (!mem_pool) {
@@ -801,25 +832,11 @@ mem_get (struct mem_pool *mem_pool)
                 return NULL;
         }
 
-        pool_list = mem_get_pool_list ();
-        if (!pool_list || pool_list->poison) {
-                return NULL;
-        }
-
-        (void) pthread_spin_lock (&pool_list->lock);
-        pt_pool = &pool_list->pools[mem_pool->power_of_two-POOL_SMALLEST];
-        retval = mem_get_from_pool (pt_pool);
-        (void) pthread_spin_unlock (&pool_list->lock);
-
+        retval = mem_get_from_pool (mem_pool);
         if (!retval) {
                 return NULL;
         }
 
-        retval->magic = GF_MEM_HEADER_MAGIC;
-        retval->next = NULL;
-        retval->pool_list = pool_list;;
-        retval->power_of_two = mem_pool->power_of_two;
-
         return retval + 1;
 #endif /* GF_DISABLE_MEMPOOL */
 }
@@ -849,12 +866,20 @@ mem_put (void *ptr)
         pool_list = hdr->pool_list;
         pt_pool = &pool_list->pools[hdr->power_of_two-POOL_SMALLEST];
 
-        (void) pthread_spin_lock (&pool_list->lock);
         hdr->magic = GF_MEM_INVALID_MAGIC;
-        hdr->next = pt_pool->hot_list;
-        pt_pool->hot_list = hdr;
-        GF_ATOMIC_INC (pt_pool->parent->frees_to_list);
-        (void) pthread_spin_unlock (&pool_list->lock);
+
+        (void) pthread_spin_lock (&pool_list->lock);
+        if (!pool_list->poison) {
+                hdr->next = pt_pool->hot_list;
+                pt_pool->hot_list = hdr;
+                (void) pthread_spin_unlock (&pool_list->lock);
+                GF_ATOMIC_INC (pt_pool->parent->frees_to_list);
+        } else {
+                (void) pthread_spin_unlock (&pool_list->lock);
+                /* If the owner thread of this element has terminated, we
+                 * simply release its memory. */
+                free(hdr);
+        }
 #endif /* GF_DISABLE_MEMPOOL */
 }
 
-- 
1.8.3.1