From a5471a84069631ab0d0605cf7b68f16285f5079f Mon Sep 17 00:00:00 2001 From: Xavi Hernandez Date: Fri, 14 Dec 2018 11:26:36 +0100 Subject: [PATCH 478/493] libglusterfs: fix memory corruption caused by per-thread mem pools There was a race in the per-thread memory pool management that could lead to memory corruption. The race appeared when the following sequence of events happened: 1. Thread T1 allocated a memory object O1 from its own private pool P1 2. T1 terminates and P1 is marked to be destroyed 3. The mem-sweeper thread is woken up and scans all private pools 4. It detects that P1 needs to be destroyed and starts releasing the objects from hot and cold lists. 5. Thread T2 releases O1 6. O1 is added to the hot list of P1 The problem happens because steps 4 and 6 are protected by diferent locks, so they can run concurrently. This means that both T1 and T2 are modifying the same list at the same time, potentially causing corruption. This patch fixes the problem using the following approach: 1. When an object is released, it's only returned to the hot list of the corresponding memory pool if it's not marked to be destroyed. Otherwise the memory is released to the system. 2. Object release and mem-sweeper thread synchronize access to the deletion mark of the memory pool to prevent simultaneous access to the list. Some other minor adjustments are made to reduce the lengths of the locked regions. This patch is not 100% identical to upstream version because changes coming from https://github.com/gluster/glusterfs/issues/307 are not backported. Upstream patch: https://review.gluster.org/c/glusterfs/+/21583 > Fixes: bz#1651165 > Change-Id: I63be3893f92096e57f54a6150e0461340084ddde > Signed-off-by: Xavi Hernandez Upstream patch: https://review.gluster.org/c/glusterfs/+/21727 > Change-Id: Idbf23bda7f9228d60c644a1bea4b6c2cfc582090 > updates: bz#1193929 > Signed-off-by: Xavi Hernandez Change-Id: I63be3893f92096e57f54a6150e0461340084ddde BUG: 1647499 Signed-off-by: Xavi Hernandez Reviewed-on: https://code.engineering.redhat.com/gerrit/158658 Tested-by: RHGS Build Bot Reviewed-by: Pranith Kumar Karampuri --- libglusterfs/src/mem-pool.c | 137 ++++++++++++++++++++++++++------------------ 1 file changed, 81 insertions(+), 56 deletions(-) diff --git a/libglusterfs/src/mem-pool.c b/libglusterfs/src/mem-pool.c index ba29137..8ff261c 100644 --- a/libglusterfs/src/mem-pool.c +++ b/libglusterfs/src/mem-pool.c @@ -411,37 +411,34 @@ static unsigned int init_count = 0; static pthread_t sweeper_tid; -void +gf_boolean_t collect_garbage (sweep_state_t *state, per_thread_pool_list_t *pool_list) { unsigned int i; per_thread_pool_t *pt_pool; - - if (pool_list->poison) { - list_del (&pool_list->thr_list); - list_add (&pool_list->thr_list, &state->death_row); - return; - } - - if (state->n_cold_lists >= N_COLD_LISTS) { - return; - } + gf_boolean_t poisoned; (void) pthread_spin_lock (&pool_list->lock); - for (i = 0; i < NPOOLS; ++i) { - pt_pool = &pool_list->pools[i]; - if (pt_pool->cold_list) { - state->cold_lists[state->n_cold_lists++] - = pt_pool->cold_list; - } - pt_pool->cold_list = pt_pool->hot_list; - pt_pool->hot_list = NULL; - if (state->n_cold_lists >= N_COLD_LISTS) { - /* We'll just catch up on a future pass. */ - break; + + poisoned = pool_list->poison != 0; + if (!poisoned) { + for (i = 0; i < NPOOLS; ++i) { + pt_pool = &pool_list->pools[i]; + if (pt_pool->cold_list) { + if (state->n_cold_lists >= N_COLD_LISTS) { + break; + } + state->cold_lists[state->n_cold_lists++] + = pt_pool->cold_list; + } + pt_pool->cold_list = pt_pool->hot_list; + pt_pool->hot_list = NULL; } } + (void) pthread_spin_unlock (&pool_list->lock); + + return poisoned; } @@ -469,6 +466,7 @@ pool_sweeper (void *arg) struct timeval begin_time; struct timeval end_time; struct timeval elapsed; + gf_boolean_t poisoned; /* * This is all a bit inelegant, but the point is to avoid doing @@ -488,7 +486,13 @@ pool_sweeper (void *arg) (void) pthread_mutex_lock (&pool_lock); list_for_each_entry_safe (pool_list, next_pl, &pool_threads, thr_list) { - collect_garbage (&state, pool_list); + (void) pthread_mutex_unlock (&pool_lock); + poisoned = collect_garbage (&state, pool_list); + (void) pthread_mutex_lock (&pool_lock); + if (poisoned) { + list_move(&pool_list->thr_list, + &state.death_row); + } } (void) pthread_mutex_unlock (&pool_lock); (void) gettimeofday (&end_time, NULL); @@ -525,7 +529,15 @@ pool_destructor (void *arg) { per_thread_pool_list_t *pool_list = arg; - /* The pool-sweeper thread will take it from here. */ + /* The pool-sweeper thread will take it from here. + * + * We can change 'poison' here without taking locks because the change + * itself doesn't interact with other parts of the code and a simple + * write is already atomic from the point of view of the processor. + * + * This change can modify what mem_put() does, but both possibilities + * are fine until the sweeper thread kicks in. The real synchronization + * must be between mem_put() and the sweeper thread. */ pool_list->poison = 1; } @@ -736,7 +748,7 @@ mem_get_pool_list (void) (void) pthread_mutex_unlock (&pool_free_lock); if (!pool_list) { - pool_list = CALLOC (pool_list_size, 1); + pool_list = MALLOC (pool_list_size); if (!pool_list) { return NULL; } @@ -761,26 +773,47 @@ mem_get_pool_list (void) } pooled_obj_hdr_t * -mem_get_from_pool (per_thread_pool_t *pt_pool) +mem_get_from_pool (struct mem_pool *mem_pool) { + per_thread_pool_list_t *pool_list; + per_thread_pool_t *pt_pool; pooled_obj_hdr_t *retval; + pool_list = mem_get_pool_list (); + if (!pool_list || pool_list->poison) { + return NULL; + } + + pt_pool = &pool_list->pools[mem_pool->power_of_two-POOL_SMALLEST]; + + (void) pthread_spin_lock (&pool_list->lock); + retval = pt_pool->hot_list; if (retval) { - GF_ATOMIC_INC (pt_pool->parent->allocs_hot); pt_pool->hot_list = retval->next; - return retval; + (void) pthread_spin_unlock (&pool_list->lock); + GF_ATOMIC_INC (pt_pool->parent->allocs_hot); + } else { + retval = pt_pool->cold_list; + if (retval) { + pt_pool->cold_list = retval->next; + (void) pthread_spin_unlock (&pool_list->lock); + GF_ATOMIC_INC (pt_pool->parent->allocs_cold); + } else { + (void) pthread_spin_unlock (&pool_list->lock); + GF_ATOMIC_INC (pt_pool->parent->allocs_stdc); + retval = malloc (1 << mem_pool->power_of_two); + } } - retval = pt_pool->cold_list; if (retval) { - GF_ATOMIC_INC (pt_pool->parent->allocs_cold); - pt_pool->cold_list = retval->next; - return retval; + retval->magic = GF_MEM_HEADER_MAGIC; + retval->next = NULL; + retval->pool_list = pool_list; + retval->power_of_two = mem_pool->power_of_two; } - GF_ATOMIC_INC (pt_pool->parent->allocs_stdc); - return malloc (1 << pt_pool->parent->power_of_two); + return retval; } @@ -791,8 +824,6 @@ mem_get (struct mem_pool *mem_pool) return GF_CALLOC (1, AVAILABLE_SIZE (mem_pool->power_of_two), gf_common_mt_mem_pool); #else - per_thread_pool_list_t *pool_list; - per_thread_pool_t *pt_pool; pooled_obj_hdr_t *retval; if (!mem_pool) { @@ -801,25 +832,11 @@ mem_get (struct mem_pool *mem_pool) return NULL; } - pool_list = mem_get_pool_list (); - if (!pool_list || pool_list->poison) { - return NULL; - } - - (void) pthread_spin_lock (&pool_list->lock); - pt_pool = &pool_list->pools[mem_pool->power_of_two-POOL_SMALLEST]; - retval = mem_get_from_pool (pt_pool); - (void) pthread_spin_unlock (&pool_list->lock); - + retval = mem_get_from_pool (mem_pool); if (!retval) { return NULL; } - retval->magic = GF_MEM_HEADER_MAGIC; - retval->next = NULL; - retval->pool_list = pool_list;; - retval->power_of_two = mem_pool->power_of_two; - return retval + 1; #endif /* GF_DISABLE_MEMPOOL */ } @@ -849,12 +866,20 @@ mem_put (void *ptr) pool_list = hdr->pool_list; pt_pool = &pool_list->pools[hdr->power_of_two-POOL_SMALLEST]; - (void) pthread_spin_lock (&pool_list->lock); hdr->magic = GF_MEM_INVALID_MAGIC; - hdr->next = pt_pool->hot_list; - pt_pool->hot_list = hdr; - GF_ATOMIC_INC (pt_pool->parent->frees_to_list); - (void) pthread_spin_unlock (&pool_list->lock); + + (void) pthread_spin_lock (&pool_list->lock); + if (!pool_list->poison) { + hdr->next = pt_pool->hot_list; + pt_pool->hot_list = hdr; + (void) pthread_spin_unlock (&pool_list->lock); + GF_ATOMIC_INC (pt_pool->parent->frees_to_list); + } else { + (void) pthread_spin_unlock (&pool_list->lock); + /* If the owner thread of this element has terminated, we + * simply release its memory. */ + free(hdr); + } #endif /* GF_DISABLE_MEMPOOL */ } -- 1.8.3.1