|
|
887953 |
From a5471a84069631ab0d0605cf7b68f16285f5079f Mon Sep 17 00:00:00 2001
|
|
|
887953 |
From: Xavi Hernandez <xhernandez@redhat.com>
|
|
|
887953 |
Date: Fri, 14 Dec 2018 11:26:36 +0100
|
|
|
887953 |
Subject: [PATCH 478/493] libglusterfs: fix memory corruption caused by
|
|
|
887953 |
per-thread mem pools
|
|
|
887953 |
|
|
|
887953 |
There was a race in the per-thread memory pool management that could lead
|
|
|
887953 |
to memory corruption. The race appeared when the following sequence of
|
|
|
887953 |
events happened:
|
|
|
887953 |
|
|
|
887953 |
1. Thread T1 allocated a memory object O1 from its own private pool P1
|
|
|
887953 |
2. T1 terminates and P1 is marked to be destroyed
|
|
|
887953 |
3. The mem-sweeper thread is woken up and scans all private pools
|
|
|
887953 |
4. It detects that P1 needs to be destroyed and starts releasing the
|
|
|
887953 |
objects from hot and cold lists.
|
|
|
887953 |
5. Thread T2 releases O1
|
|
|
887953 |
6. O1 is added to the hot list of P1
|
|
|
887953 |
|
|
|
887953 |
The problem happens because steps 4 and 6 are protected by diferent locks,
|
|
|
887953 |
so they can run concurrently. This means that both T1 and T2 are modifying
|
|
|
887953 |
the same list at the same time, potentially causing corruption.
|
|
|
887953 |
|
|
|
887953 |
This patch fixes the problem using the following approach:
|
|
|
887953 |
|
|
|
887953 |
1. When an object is released, it's only returned to the hot list of the
|
|
|
887953 |
corresponding memory pool if it's not marked to be destroyed. Otherwise
|
|
|
887953 |
the memory is released to the system.
|
|
|
887953 |
2. Object release and mem-sweeper thread synchronize access to the deletion
|
|
|
887953 |
mark of the memory pool to prevent simultaneous access to the list.
|
|
|
887953 |
|
|
|
887953 |
Some other minor adjustments are made to reduce the lengths of the locked
|
|
|
887953 |
regions.
|
|
|
887953 |
|
|
|
887953 |
This patch is not 100% identical to upstream version because changes
|
|
|
887953 |
coming from https://github.com/gluster/glusterfs/issues/307 are not
|
|
|
887953 |
backported.
|
|
|
887953 |
|
|
|
887953 |
Upstream patch: https://review.gluster.org/c/glusterfs/+/21583
|
|
|
887953 |
> Fixes: bz#1651165
|
|
|
887953 |
> Change-Id: I63be3893f92096e57f54a6150e0461340084ddde
|
|
|
887953 |
> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
|
|
|
887953 |
|
|
|
887953 |
Upstream patch: https://review.gluster.org/c/glusterfs/+/21727
|
|
|
887953 |
> Change-Id: Idbf23bda7f9228d60c644a1bea4b6c2cfc582090
|
|
|
887953 |
> updates: bz#1193929
|
|
|
887953 |
> Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
|
|
|
887953 |
|
|
|
887953 |
Change-Id: I63be3893f92096e57f54a6150e0461340084ddde
|
|
|
887953 |
BUG: 1647499
|
|
|
887953 |
Signed-off-by: Xavi Hernandez <xhernandez@redhat.com>
|
|
|
887953 |
Reviewed-on: https://code.engineering.redhat.com/gerrit/158658
|
|
|
887953 |
Tested-by: RHGS Build Bot <nigelb@redhat.com>
|
|
|
887953 |
Reviewed-by: Pranith Kumar Karampuri <pkarampu@redhat.com>
|
|
|
887953 |
---
|
|
|
887953 |
libglusterfs/src/mem-pool.c | 137 ++++++++++++++++++++++++++------------------
|
|
|
887953 |
1 file changed, 81 insertions(+), 56 deletions(-)
|
|
|
887953 |
|
|
|
887953 |
diff --git a/libglusterfs/src/mem-pool.c b/libglusterfs/src/mem-pool.c
|
|
|
887953 |
index ba29137..8ff261c 100644
|
|
|
887953 |
--- a/libglusterfs/src/mem-pool.c
|
|
|
887953 |
+++ b/libglusterfs/src/mem-pool.c
|
|
|
887953 |
@@ -411,37 +411,34 @@ static unsigned int init_count = 0;
|
|
|
887953 |
static pthread_t sweeper_tid;
|
|
|
887953 |
|
|
|
887953 |
|
|
|
887953 |
-void
|
|
|
887953 |
+gf_boolean_t
|
|
|
887953 |
collect_garbage (sweep_state_t *state, per_thread_pool_list_t *pool_list)
|
|
|
887953 |
{
|
|
|
887953 |
unsigned int i;
|
|
|
887953 |
per_thread_pool_t *pt_pool;
|
|
|
887953 |
-
|
|
|
887953 |
- if (pool_list->poison) {
|
|
|
887953 |
- list_del (&pool_list->thr_list);
|
|
|
887953 |
- list_add (&pool_list->thr_list, &state->death_row);
|
|
|
887953 |
- return;
|
|
|
887953 |
- }
|
|
|
887953 |
-
|
|
|
887953 |
- if (state->n_cold_lists >= N_COLD_LISTS) {
|
|
|
887953 |
- return;
|
|
|
887953 |
- }
|
|
|
887953 |
+ gf_boolean_t poisoned;
|
|
|
887953 |
|
|
|
887953 |
(void) pthread_spin_lock (&pool_list->lock);
|
|
|
887953 |
- for (i = 0; i < NPOOLS; ++i) {
|
|
|
887953 |
- pt_pool = &pool_list->pools[i];
|
|
|
887953 |
- if (pt_pool->cold_list) {
|
|
|
887953 |
- state->cold_lists[state->n_cold_lists++]
|
|
|
887953 |
- = pt_pool->cold_list;
|
|
|
887953 |
- }
|
|
|
887953 |
- pt_pool->cold_list = pt_pool->hot_list;
|
|
|
887953 |
- pt_pool->hot_list = NULL;
|
|
|
887953 |
- if (state->n_cold_lists >= N_COLD_LISTS) {
|
|
|
887953 |
- /* We'll just catch up on a future pass. */
|
|
|
887953 |
- break;
|
|
|
887953 |
+
|
|
|
887953 |
+ poisoned = pool_list->poison != 0;
|
|
|
887953 |
+ if (!poisoned) {
|
|
|
887953 |
+ for (i = 0; i < NPOOLS; ++i) {
|
|
|
887953 |
+ pt_pool = &pool_list->pools[i];
|
|
|
887953 |
+ if (pt_pool->cold_list) {
|
|
|
887953 |
+ if (state->n_cold_lists >= N_COLD_LISTS) {
|
|
|
887953 |
+ break;
|
|
|
887953 |
+ }
|
|
|
887953 |
+ state->cold_lists[state->n_cold_lists++]
|
|
|
887953 |
+ = pt_pool->cold_list;
|
|
|
887953 |
+ }
|
|
|
887953 |
+ pt_pool->cold_list = pt_pool->hot_list;
|
|
|
887953 |
+ pt_pool->hot_list = NULL;
|
|
|
887953 |
}
|
|
|
887953 |
}
|
|
|
887953 |
+
|
|
|
887953 |
(void) pthread_spin_unlock (&pool_list->lock);
|
|
|
887953 |
+
|
|
|
887953 |
+ return poisoned;
|
|
|
887953 |
}
|
|
|
887953 |
|
|
|
887953 |
|
|
|
887953 |
@@ -469,6 +466,7 @@ pool_sweeper (void *arg)
|
|
|
887953 |
struct timeval begin_time;
|
|
|
887953 |
struct timeval end_time;
|
|
|
887953 |
struct timeval elapsed;
|
|
|
887953 |
+ gf_boolean_t poisoned;
|
|
|
887953 |
|
|
|
887953 |
/*
|
|
|
887953 |
* This is all a bit inelegant, but the point is to avoid doing
|
|
|
887953 |
@@ -488,7 +486,13 @@ pool_sweeper (void *arg)
|
|
|
887953 |
(void) pthread_mutex_lock (&pool_lock);
|
|
|
887953 |
list_for_each_entry_safe (pool_list, next_pl,
|
|
|
887953 |
&pool_threads, thr_list) {
|
|
|
887953 |
- collect_garbage (&state, pool_list);
|
|
|
887953 |
+ (void) pthread_mutex_unlock (&pool_lock);
|
|
|
887953 |
+ poisoned = collect_garbage (&state, pool_list);
|
|
|
887953 |
+ (void) pthread_mutex_lock (&pool_lock);
|
|
|
887953 |
+ if (poisoned) {
|
|
|
887953 |
+ list_move(&pool_list->thr_list,
|
|
|
887953 |
+ &state.death_row);
|
|
|
887953 |
+ }
|
|
|
887953 |
}
|
|
|
887953 |
(void) pthread_mutex_unlock (&pool_lock);
|
|
|
887953 |
(void) gettimeofday (&end_time, NULL);
|
|
|
887953 |
@@ -525,7 +529,15 @@ pool_destructor (void *arg)
|
|
|
887953 |
{
|
|
|
887953 |
per_thread_pool_list_t *pool_list = arg;
|
|
|
887953 |
|
|
|
887953 |
- /* The pool-sweeper thread will take it from here. */
|
|
|
887953 |
+ /* The pool-sweeper thread will take it from here.
|
|
|
887953 |
+ *
|
|
|
887953 |
+ * We can change 'poison' here without taking locks because the change
|
|
|
887953 |
+ * itself doesn't interact with other parts of the code and a simple
|
|
|
887953 |
+ * write is already atomic from the point of view of the processor.
|
|
|
887953 |
+ *
|
|
|
887953 |
+ * This change can modify what mem_put() does, but both possibilities
|
|
|
887953 |
+ * are fine until the sweeper thread kicks in. The real synchronization
|
|
|
887953 |
+ * must be between mem_put() and the sweeper thread. */
|
|
|
887953 |
pool_list->poison = 1;
|
|
|
887953 |
}
|
|
|
887953 |
|
|
|
887953 |
@@ -736,7 +748,7 @@ mem_get_pool_list (void)
|
|
|
887953 |
(void) pthread_mutex_unlock (&pool_free_lock);
|
|
|
887953 |
|
|
|
887953 |
if (!pool_list) {
|
|
|
887953 |
- pool_list = CALLOC (pool_list_size, 1);
|
|
|
887953 |
+ pool_list = MALLOC (pool_list_size);
|
|
|
887953 |
if (!pool_list) {
|
|
|
887953 |
return NULL;
|
|
|
887953 |
}
|
|
|
887953 |
@@ -761,26 +773,47 @@ mem_get_pool_list (void)
|
|
|
887953 |
}
|
|
|
887953 |
|
|
|
887953 |
pooled_obj_hdr_t *
|
|
|
887953 |
-mem_get_from_pool (per_thread_pool_t *pt_pool)
|
|
|
887953 |
+mem_get_from_pool (struct mem_pool *mem_pool)
|
|
|
887953 |
{
|
|
|
887953 |
+ per_thread_pool_list_t *pool_list;
|
|
|
887953 |
+ per_thread_pool_t *pt_pool;
|
|
|
887953 |
pooled_obj_hdr_t *retval;
|
|
|
887953 |
|
|
|
887953 |
+ pool_list = mem_get_pool_list ();
|
|
|
887953 |
+ if (!pool_list || pool_list->poison) {
|
|
|
887953 |
+ return NULL;
|
|
|
887953 |
+ }
|
|
|
887953 |
+
|
|
|
887953 |
+ pt_pool = &pool_list->pools[mem_pool->power_of_two-POOL_SMALLEST];
|
|
|
887953 |
+
|
|
|
887953 |
+ (void) pthread_spin_lock (&pool_list->lock);
|
|
|
887953 |
+
|
|
|
887953 |
retval = pt_pool->hot_list;
|
|
|
887953 |
if (retval) {
|
|
|
887953 |
- GF_ATOMIC_INC (pt_pool->parent->allocs_hot);
|
|
|
887953 |
pt_pool->hot_list = retval->next;
|
|
|
887953 |
- return retval;
|
|
|
887953 |
+ (void) pthread_spin_unlock (&pool_list->lock);
|
|
|
887953 |
+ GF_ATOMIC_INC (pt_pool->parent->allocs_hot);
|
|
|
887953 |
+ } else {
|
|
|
887953 |
+ retval = pt_pool->cold_list;
|
|
|
887953 |
+ if (retval) {
|
|
|
887953 |
+ pt_pool->cold_list = retval->next;
|
|
|
887953 |
+ (void) pthread_spin_unlock (&pool_list->lock);
|
|
|
887953 |
+ GF_ATOMIC_INC (pt_pool->parent->allocs_cold);
|
|
|
887953 |
+ } else {
|
|
|
887953 |
+ (void) pthread_spin_unlock (&pool_list->lock);
|
|
|
887953 |
+ GF_ATOMIC_INC (pt_pool->parent->allocs_stdc);
|
|
|
887953 |
+ retval = malloc (1 << mem_pool->power_of_two);
|
|
|
887953 |
+ }
|
|
|
887953 |
}
|
|
|
887953 |
|
|
|
887953 |
- retval = pt_pool->cold_list;
|
|
|
887953 |
if (retval) {
|
|
|
887953 |
- GF_ATOMIC_INC (pt_pool->parent->allocs_cold);
|
|
|
887953 |
- pt_pool->cold_list = retval->next;
|
|
|
887953 |
- return retval;
|
|
|
887953 |
+ retval->magic = GF_MEM_HEADER_MAGIC;
|
|
|
887953 |
+ retval->next = NULL;
|
|
|
887953 |
+ retval->pool_list = pool_list;
|
|
|
887953 |
+ retval->power_of_two = mem_pool->power_of_two;
|
|
|
887953 |
}
|
|
|
887953 |
|
|
|
887953 |
- GF_ATOMIC_INC (pt_pool->parent->allocs_stdc);
|
|
|
887953 |
- return malloc (1 << pt_pool->parent->power_of_two);
|
|
|
887953 |
+ return retval;
|
|
|
887953 |
}
|
|
|
887953 |
|
|
|
887953 |
|
|
|
887953 |
@@ -791,8 +824,6 @@ mem_get (struct mem_pool *mem_pool)
|
|
|
887953 |
return GF_CALLOC (1, AVAILABLE_SIZE (mem_pool->power_of_two),
|
|
|
887953 |
gf_common_mt_mem_pool);
|
|
|
887953 |
#else
|
|
|
887953 |
- per_thread_pool_list_t *pool_list;
|
|
|
887953 |
- per_thread_pool_t *pt_pool;
|
|
|
887953 |
pooled_obj_hdr_t *retval;
|
|
|
887953 |
|
|
|
887953 |
if (!mem_pool) {
|
|
|
887953 |
@@ -801,25 +832,11 @@ mem_get (struct mem_pool *mem_pool)
|
|
|
887953 |
return NULL;
|
|
|
887953 |
}
|
|
|
887953 |
|
|
|
887953 |
- pool_list = mem_get_pool_list ();
|
|
|
887953 |
- if (!pool_list || pool_list->poison) {
|
|
|
887953 |
- return NULL;
|
|
|
887953 |
- }
|
|
|
887953 |
-
|
|
|
887953 |
- (void) pthread_spin_lock (&pool_list->lock);
|
|
|
887953 |
- pt_pool = &pool_list->pools[mem_pool->power_of_two-POOL_SMALLEST];
|
|
|
887953 |
- retval = mem_get_from_pool (pt_pool);
|
|
|
887953 |
- (void) pthread_spin_unlock (&pool_list->lock);
|
|
|
887953 |
-
|
|
|
887953 |
+ retval = mem_get_from_pool (mem_pool);
|
|
|
887953 |
if (!retval) {
|
|
|
887953 |
return NULL;
|
|
|
887953 |
}
|
|
|
887953 |
|
|
|
887953 |
- retval->magic = GF_MEM_HEADER_MAGIC;
|
|
|
887953 |
- retval->next = NULL;
|
|
|
887953 |
- retval->pool_list = pool_list;;
|
|
|
887953 |
- retval->power_of_two = mem_pool->power_of_two;
|
|
|
887953 |
-
|
|
|
887953 |
return retval + 1;
|
|
|
887953 |
#endif /* GF_DISABLE_MEMPOOL */
|
|
|
887953 |
}
|
|
|
887953 |
@@ -849,12 +866,20 @@ mem_put (void *ptr)
|
|
|
887953 |
pool_list = hdr->pool_list;
|
|
|
887953 |
pt_pool = &pool_list->pools[hdr->power_of_two-POOL_SMALLEST];
|
|
|
887953 |
|
|
|
887953 |
- (void) pthread_spin_lock (&pool_list->lock);
|
|
|
887953 |
hdr->magic = GF_MEM_INVALID_MAGIC;
|
|
|
887953 |
- hdr->next = pt_pool->hot_list;
|
|
|
887953 |
- pt_pool->hot_list = hdr;
|
|
|
887953 |
- GF_ATOMIC_INC (pt_pool->parent->frees_to_list);
|
|
|
887953 |
- (void) pthread_spin_unlock (&pool_list->lock);
|
|
|
887953 |
+
|
|
|
887953 |
+ (void) pthread_spin_lock (&pool_list->lock);
|
|
|
887953 |
+ if (!pool_list->poison) {
|
|
|
887953 |
+ hdr->next = pt_pool->hot_list;
|
|
|
887953 |
+ pt_pool->hot_list = hdr;
|
|
|
887953 |
+ (void) pthread_spin_unlock (&pool_list->lock);
|
|
|
887953 |
+ GF_ATOMIC_INC (pt_pool->parent->frees_to_list);
|
|
|
887953 |
+ } else {
|
|
|
887953 |
+ (void) pthread_spin_unlock (&pool_list->lock);
|
|
|
887953 |
+ /* If the owner thread of this element has terminated, we
|
|
|
887953 |
+ * simply release its memory. */
|
|
|
887953 |
+ free(hdr);
|
|
|
887953 |
+ }
|
|
|
887953 |
#endif /* GF_DISABLE_MEMPOOL */
|
|
|
887953 |
}
|
|
|
887953 |
|
|
|
887953 |
--
|
|
|
887953 |
1.8.3.1
|
|
|
887953 |
|