Upstream commits:
commit a62719ba90e2fa1728890ae7dc8df9e32a622e7b
Author: Florian Weimer <fweimer@redhat.com>
Date: Wed Oct 28 19:32:46 2015 +0100
malloc: Prevent arena free_list from turning cyclic [BZ #19048]
commit 3da825ce483903e3a881a016113b3e59fd4041de
Author: Florian Weimer <fweimer@redhat.com>
Date: Wed Dec 16 12:39:48 2015 +0100
malloc: Fix attached thread reference count handling [BZ #19243]
commit 90c400bd4904b0240a148f0b357a5cbc36179239
Author: Florian Weimer <fweimer@redhat.com>
Date: Mon Dec 21 16:42:46 2015 +0100
malloc: Fix list_lock/arena lock deadlock [BZ #19182]
commit 7962541a32eff5597bc4207e781cfac8d1bb0d87
Author: Florian Weimer <fweimer@redhat.com>
Date: Wed Dec 23 17:23:33 2015 +0100
malloc: Update comment for list_lock
commit 2a38688932243b5b16fb12d84c7ac1138ce50363
Author: Florian Weimer <fweimer@redhat.com>
Date: Fri Feb 19 14:11:32 2016 +0100
tst-malloc-thread-exit: Use fewer system resources
Also included is the following change, which has not yet been
committed upstream:
malloc: Preserve arena free list/thread count invariant [BZ #20370]
It is necessary to preserve the invariant that if an arena is
on the free list, it has thread attach count zero. Otherwise,
when arena_thread_freeres sees the zero attach count, it will
add it, and without the invariant, an arena could get pushed
to the list twice, resulting in a cycle.
One possible execution trace looks like this:
Thread 1 examines free list and observes it as empty.
Thread 2 exits and adds its arena to the free list,
with attached_threads == 0).
Thread 1 selects this arena in reused_arena (not from the free list).
Thread 1 increments attached_threads and attaches itself.
(The arena remains on the free list.)
Thread 1 exits, decrements attached_threads,
and adds the arena to the free list.
The final step creates a cycle in the usual way (by overwriting the
next_free member with the former list head, while there is another
list item pointing to the arena structure).
tst-malloc-thread-exit exhibits this issue, but it was only visible
with a debugger because the incorrect fix in bug 19243 removed
the assert from get_free_list.
Index: b/malloc/arena.c
===================================================================
--- a/malloc/arena.c
+++ b/malloc/arena.c
@@ -77,10 +77,30 @@ extern int sanity_check_heap_info_alignm
/* Thread specific data */
static tsd_key_t arena_key;
-static mutex_t list_lock = MUTEX_INITIALIZER;
+
+/* Arena free list. free_list_lock synchronizes access to the
+ free_list variable below, and the next_free and attached_threads
+ members of struct malloc_state objects. No other locks must be
+ acquired after free_list_lock has been acquired. */
+
+static mutex_t free_list_lock = MUTEX_INITIALIZER;
static size_t narenas = 1;
static mstate free_list;
+/* list_lock prevents concurrent writes to the next member of struct
+ malloc_state objects.
+
+ Read access to the next member is supposed to synchronize with the
+ atomic_write_barrier and the write to the next member in
+ _int_new_arena. This suffers from data races; see the FIXME
+ comments in _int_new_arena and reused_arena.
+
+ list_lock also prevents concurrent forks. At the time list_lock is
+ acquired, no arena lock must have been acquired, but it is
+ permitted to acquire arena locks subsequently, while list_lock is
+ acquired. */
+static mutex_t list_lock = MUTEX_INITIALIZER;
+
#if THREAD_STATS
static int stat_n_heaps;
#define THREAD_STAT(x) x
@@ -221,6 +241,10 @@ ptmalloc_lock_all (void)
if(__malloc_initialized < 1)
return;
+
+ /* We do not acquire free_list_lock here because we completely
+ reconstruct free_list in ptmalloc_unlock_all2. */
+
if (mutex_trylock(&list_lock))
{
void *my_arena;
@@ -242,7 +266,10 @@ ptmalloc_lock_all (void)
save_free_hook = __free_hook;
__malloc_hook = malloc_atfork;
__free_hook = free_atfork;
- /* Only the current thread may perform malloc/free calls now. */
+ /* Only the current thread may perform malloc/free calls now.
+ save_arena will be reattached to the current thread, in
+ ptmalloc_lock_all, so save_arena->attached_threads is not
+ updated. */
tsd_getspecific(arena_key, save_arena);
tsd_setspecific(arena_key, ATFORK_ARENA_PTR);
out:
@@ -258,6 +285,9 @@ ptmalloc_unlock_all (void)
return;
if (--atfork_recursive_cntr != 0)
return;
+ /* Replace ATFORK_ARENA_PTR with save_arena.
+ save_arena->attached_threads was not changed in ptmalloc_lock_all
+ and is still correct. */
tsd_setspecific(arena_key, save_arena);
__malloc_hook = save_malloc_hook;
__free_hook = save_free_hook;
@@ -286,16 +316,24 @@ ptmalloc_unlock_all2 (void)
tsd_setspecific(arena_key, save_arena);
__malloc_hook = save_malloc_hook;
__free_hook = save_free_hook;
+ /* Push all arenas to the free list, except save_arena, which is
+ attached to the current thread. */
+ mutex_init (&free_list_lock);
+ if (save_arena != NULL)
+ ((mstate) save_arena)->attached_threads = 1;
free_list = NULL;
for(ar_ptr = &main_arena;;) {
mutex_init(&ar_ptr->mutex);
if (ar_ptr != save_arena) {
+ /* This arena is no longer attached to any thread. */
+ ar_ptr->attached_threads = 0;
ar_ptr->next_free = free_list;
free_list = ar_ptr;
}
ar_ptr = ar_ptr->next;
if(ar_ptr == &main_arena) break;
}
+
mutex_init(&list_lock);
atfork_recursive_cntr = 0;
}
@@ -692,8 +730,25 @@ heap_trim(heap_info *heap, size_t pad)
return 1;
}
-/* Create a new arena with initial size "size". */
+/* If REPLACED_ARENA is not NULL, detach it from this thread. Must be
+ called while free_list_lock is held. */
+static void
+detach_arena (mstate replaced_arena)
+{
+ if (replaced_arena != NULL)
+ {
+ assert (replaced_arena->attached_threads > 0);
+ /* The current implementation only detaches from main_arena in
+ case of allocation failure. This means that it is likely not
+ beneficial to put the arena on free_list even if the
+ reference count reaches zero. */
+ --replaced_arena->attached_threads;
+ }
+}
+
+
+/* Create a new arena with initial size "size". */
static mstate
_int_new_arena(size_t size)
{
@@ -714,6 +769,7 @@ _int_new_arena(size_t size)
}
a = h->ar_ptr = (mstate)(h+1);
malloc_init_state(a);
+ a->attached_threads = 1;
/*a->next = NULL;*/
a->system_mem = a->max_system_mem = h->size;
arena_mem += h->size;
@@ -727,36 +783,68 @@ _int_new_arena(size_t size)
set_head(top(a), (((char*)h + h->size) - ptr) | PREV_INUSE);
LIBC_PROBE (memory_arena_new, 2, a, size);
+ mstate replaced_arena;
+ {
+ void *vptr = NULL;
+ replaced_arena = tsd_getspecific (arena_key, vptr);
+ }
tsd_setspecific(arena_key, (void *)a);
mutex_init(&a->mutex);
- (void)mutex_lock(&a->mutex);
(void)mutex_lock(&list_lock);
/* Add the new arena to the global list. */
a->next = main_arena.next;
+ /* FIXME: The barrier is an attempt to synchronize with read access
+ in reused_arena, which does not acquire list_lock while
+ traversing the list. */
atomic_write_barrier ();
main_arena.next = a;
(void)mutex_unlock(&list_lock);
+ (void) mutex_lock (&free_list_lock);
+ detach_arena (replaced_arena);
+ (void) mutex_unlock (&free_list_lock);
+
+ /* Lock this arena. NB: Another thread may have been attached to
+ this arena because the arena is now accessible from the
+ main_arena.next list and could have been picked by reused_arena.
+ This can only happen for the last arena created (before the arena
+ limit is reached). At this point, some arena has to be attached
+ to two threads. We could acquire the arena lock before list_lock
+ to make it less likely that reused_arena picks this new arena,
+ but this could result in a deadlock with ptmalloc_lock_all. */
+
+ (void) mutex_lock (&a->mutex);
+
THREAD_STAT(++(a->stat_lock_loop));
return a;
}
-
+/* Remove an arena from free_list. */
static mstate
get_free_list (void)
{
+ void *vptr = NULL;
+ mstate replaced_arena = tsd_getspecific (arena_key, vptr);
mstate result = free_list;
if (result != NULL)
{
- (void)mutex_lock(&list_lock);
+ (void)mutex_lock(&free_list_lock);
result = free_list;
if (result != NULL)
- free_list = result->next_free;
- (void)mutex_unlock(&list_lock);
+ {
+ free_list = result->next_free;
+
+ /* The arena will be attached to this thread. */
+ assert (result->attached_threads == 0);
+ result->attached_threads = 1;
+
+ detach_arena (replaced_arena);
+ }
+ (void)mutex_unlock(&free_list_lock);
if (result != NULL)
{
@@ -770,6 +858,26 @@ get_free_list (void)
return result;
}
+/* Remove the arena from the free list (if it is present).
+ free_list_lock must have been acquired by the caller. */
+static void
+remove_from_free_list (mstate arena)
+{
+ mstate *previous = &free_list;
+ for (mstate p = free_list; p != NULL; p = p->next_free)
+ {
+ assert (p->attached_threads == 0);
+ if (p == arena)
+ {
+ /* Remove the requested arena from the list. */
+ *previous = p->next_free;
+ break;
+ }
+ else
+ previous = &p->next_free;
+ }
+}
+
/* Lock and return an arena that can be reused for memory allocation.
Avoid AVOID_ARENA as we have already failed to allocate memory in
it and it is currently locked. */
@@ -777,16 +885,20 @@ static mstate
reused_arena (mstate avoid_arena)
{
mstate result;
+ /* FIXME: Access to next_to_use suffers from data races. */
static mstate next_to_use;
if (next_to_use == NULL)
next_to_use = &main_arena;
+ /* Iterate over all arenas (including those linked from
+ free_list). */
result = next_to_use;
do
{
if (!arena_is_corrupt (result) && !mutex_trylock(&result->mutex))
goto out;
+ /* FIXME: This is a data race, see _int_new_arena. */
result = result->next;
}
while (result != next_to_use);
@@ -815,6 +927,27 @@ reused_arena (mstate avoid_arena)
(void)mutex_lock(&result->mutex);
out:
+ /* Attach the arena to the current thread. */
+ {
+ /* Update the arena thread attachment counters. */
+ void *vptr = NULL;
+ mstate replaced_arena = tsd_getspecific (arena_key, vptr);
+ (void) mutex_lock (&free_list_lock);
+ detach_arena (replaced_arena);
+
+ /* We may have picked up an arena on the free list. We need to
+ preserve the invariant that no arena on the free list has a
+ positive attached_threads counter (otherwise,
+ arena_thread_freeres cannot use the counter to determine if the
+ arena needs to be put on the free list). We unconditionally
+ remove the selected arena from the free list. The caller of
+ reused_arena checked the free list and observed it to be empty,
+ so the list is very short. */
+ remove_from_free_list (result);
+
+ ++result->attached_threads;
+ (void) mutex_unlock (&free_list_lock);
+ }
LIBC_PROBE (memory_arena_reuse, 2, result, avoid_arena);
tsd_setspecific(arena_key, (void *)result);
THREAD_STAT(++(result->stat_lock_loop));
@@ -905,10 +1038,16 @@ arena_thread_freeres (void)
if (a != NULL)
{
- (void)mutex_lock(&list_lock);
- a->next_free = free_list;
- free_list = a;
- (void)mutex_unlock(&list_lock);
+ (void)mutex_lock(&free_list_lock);
+ /* If this was the last attached thread for this arena, put the
+ arena on the free list. */
+ assert (a->attached_threads > 0);
+ if (--a->attached_threads == 0)
+ {
+ a->next_free = free_list;
+ free_list = a;
+ }
+ (void)mutex_unlock(&free_list_lock);
}
}
text_set_element (__libc_thread_subfreeres, arena_thread_freeres);
Index: b/malloc/malloc.c
===================================================================
--- a/malloc/malloc.c
+++ b/malloc/malloc.c
@@ -1727,8 +1727,13 @@ struct malloc_state {
/* Linked list */
struct malloc_state *next;
- /* Linked list for free arenas. */
+ /* Linked list for free arenas. Access to this field is serialized
+ by free_list_lock in arena.c. */
struct malloc_state *next_free;
+ /* Number of threads attached to this arena. 0 if the arena is on
+ the free list. Access to this field is serialized by
+ free_list_lock in arena.c. */
+ INTERNAL_SIZE_T attached_threads;
/* Memory allocated from the system in this arena. */
INTERNAL_SIZE_T system_mem;
@@ -1772,7 +1777,8 @@ struct malloc_par {
static struct malloc_state main_arena =
{
.mutex = MUTEX_INITIALIZER,
- .next = &main_arena
+ .next = &main_arena,
+ .attached_threads = 1,
};
/* There is only one instance of the malloc parameters. */
Index: b/malloc/Makefile
===================================================================
--- a/malloc/Makefile
+++ b/malloc/Makefile
@@ -20,13 +20,14 @@
#
subdir := malloc
-all:
+include ../Makeconfig
dist-headers := malloc.h
headers := $(dist-headers) obstack.h mcheck.h
tests := mallocbug tst-malloc tst-valloc tst-calloc tst-obstack \
tst-mallocstate tst-mcheck tst-mallocfork tst-trim1 \
- tst-malloc-usable tst-malloc-backtrace
+ tst-malloc-usable \
+ tst-malloc-backtrace tst-malloc-thread-exit
test-srcs = tst-mtrace
routines = malloc morecore mcheck mtrace obstack
@@ -43,6 +44,8 @@ libmemusage-inhibit-o = $(filter-out .os
$(objpfx)tst-malloc-backtrace: $(common-objpfx)nptl/libpthread.so \
$(common-objpfx)nptl/libpthread_nonshared.a
+$(objpfx)tst-malloc-thread-exit: $(common-objpfx)nptl/libpthread.so \
+ $(common-objpfx)nptl/libpthread_nonshared.a
# These should be removed by `make clean'.
extra-objs = mcheck-init.o libmcheck.a
@@ -50,8 +53,6 @@ extra-objs = mcheck-init.o libmcheck.a
# Include the cleanup handler.
aux := set-freeres thread-freeres
-include ../Makeconfig
-
CPPFLAGS-memusagestat = -DNOT_IN_libc
# The Perl script to analyze the output of the mtrace functions.
Index: b/malloc/tst-malloc-thread-exit.c
===================================================================
--- /dev/null
+++ b/malloc/tst-malloc-thread-exit.c
@@ -0,0 +1,218 @@
+/* Test malloc with concurrent thread termination.
+ Copyright (C) 2015-2016 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* This thread spawns a number of outer threads, equal to the arena
+ limit. The outer threads run a loop which start and join two
+ different kinds of threads: the first kind allocates (attaching an
+ arena to the thread; malloc_first_thread) and waits, the second
+ kind waits and allocates (wait_first_threads). Both kinds of
+ threads exit immediately after waiting. The hope is that this will
+ exhibit races in thread termination and arena management,
+ particularly related to the arena free list. */
+
+#include <errno.h>
+#include <malloc.h>
+#include <pthread.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+static int do_test (void);
+
+#define TEST_FUNCTION do_test ()
+#include "../test-skeleton.c"
+
+static bool termination_requested;
+static int inner_thread_count = 4;
+static size_t malloc_size = 32;
+
+static void
+__attribute__ ((noinline, noclone))
+unoptimized_free (void *ptr)
+{
+ free (ptr);
+}
+
+static void *
+malloc_first_thread (void * closure)
+{
+ pthread_barrier_t *barrier = closure;
+ void *ptr = malloc (malloc_size);
+ if (ptr == NULL)
+ {
+ printf ("error: malloc: %m\n");
+ abort ();
+ }
+ int ret = pthread_barrier_wait (barrier);
+ if (ret != 0 && ret != PTHREAD_BARRIER_SERIAL_THREAD)
+ {
+ errno = ret;
+ printf ("error: pthread_barrier_wait: %m\n");
+ abort ();
+ }
+ unoptimized_free (ptr);
+ return NULL;
+}
+
+static void *
+wait_first_thread (void * closure)
+{
+ pthread_barrier_t *barrier = closure;
+ int ret = pthread_barrier_wait (barrier);
+ if (ret != 0 && ret != PTHREAD_BARRIER_SERIAL_THREAD)
+ {
+ errno = ret;
+ printf ("error: pthread_barrier_wait: %m\n");
+ abort ();
+ }
+ void *ptr = malloc (malloc_size);
+ if (ptr == NULL)
+ {
+ printf ("error: malloc: %m\n");
+ abort ();
+ }
+ unoptimized_free (ptr);
+ return NULL;
+}
+
+static void *
+outer_thread (void *closure)
+{
+ pthread_t *threads = calloc (sizeof (*threads), inner_thread_count);
+ if (threads == NULL)
+ {
+ printf ("error: calloc: %m\n");
+ abort ();
+ }
+
+ while (!__atomic_load_n (&termination_requested, __ATOMIC_RELAXED))
+ {
+ pthread_barrier_t barrier;
+ int ret = pthread_barrier_init (&barrier, NULL, inner_thread_count + 1);
+ if (ret != 0)
+ {
+ errno = ret;
+ printf ("pthread_barrier_init: %m\n");
+ abort ();
+ }
+ for (int i = 0; i < inner_thread_count; ++i)
+ {
+ void *(*func) (void *);
+ if ((i % 2) == 0)
+ func = malloc_first_thread;
+ else
+ func = wait_first_thread;
+ ret = pthread_create (threads + i, NULL, func, &barrier);
+ if (ret != 0)
+ {
+ errno = ret;
+ printf ("error: pthread_create: %m\n");
+ abort ();
+ }
+ }
+ ret = pthread_barrier_wait (&barrier);
+ if (ret != 0 && ret != PTHREAD_BARRIER_SERIAL_THREAD)
+ {
+ errno = ret;
+ printf ("pthread_wait: %m\n");
+ abort ();
+ }
+ for (int i = 0; i < inner_thread_count; ++i)
+ {
+ ret = pthread_join (threads[i], NULL);
+ if (ret != 0)
+ {
+ ret = errno;
+ printf ("error: pthread_join: %m\n");
+ abort ();
+ }
+ }
+ ret = pthread_barrier_destroy (&barrier);
+ if (ret != 0)
+ {
+ ret = errno;
+ printf ("pthread_barrier_destroy: %m\n");
+ abort ();
+ }
+ }
+
+ free (threads);
+
+ return NULL;
+}
+
+static int
+do_test (void)
+{
+ /* The number of threads should be smaller than the number of
+ arenas, so that there will be some free arenas to add to the
+ arena free list. */
+ enum { outer_thread_count = 2 };
+ if (mallopt (M_ARENA_MAX, 8) == 0)
+ {
+ printf ("error: mallopt (M_ARENA_MAX) failed\n");
+ return 1;
+ }
+
+ /* Leave some room for shutting down all threads gracefully. */
+ int timeout = 3;
+ if (timeout > TIMEOUT)
+ timeout = TIMEOUT - 1;
+
+ pthread_t *threads = calloc (sizeof (*threads), outer_thread_count);
+ if (threads == NULL)
+ {
+ printf ("error: calloc: %m\n");
+ abort ();
+ }
+
+ for (long i = 0; i < outer_thread_count; ++i)
+ {
+ int ret = pthread_create (threads + i, NULL, outer_thread, NULL);
+ if (ret != 0)
+ {
+ errno = ret;
+ printf ("error: pthread_create: %m\n");
+ abort ();
+ }
+ }
+
+ struct timespec ts = {timeout, 0};
+ if (nanosleep (&ts, NULL))
+ {
+ printf ("error: error: nanosleep: %m\n");
+ abort ();
+ }
+
+ __atomic_store_n (&termination_requested, true, __ATOMIC_RELAXED);
+
+ for (long i = 0; i < outer_thread_count; ++i)
+ {
+ int ret = pthread_join (threads[i], NULL);
+ if (ret != 0)
+ {
+ errno = ret;
+ printf ("error: pthread_join: %m\n");
+ abort ();
+ }
+ }
+ free (threads);
+
+ return 0;
+}