| Upstream commits: |
| |
| commit a62719ba90e2fa1728890ae7dc8df9e32a622e7b |
| Author: Florian Weimer <fweimer@redhat.com> |
| Date: Wed Oct 28 19:32:46 2015 +0100 |
| |
| malloc: Prevent arena free_list from turning cyclic [BZ #19048] |
| |
| commit 3da825ce483903e3a881a016113b3e59fd4041de |
| Author: Florian Weimer <fweimer@redhat.com> |
| Date: Wed Dec 16 12:39:48 2015 +0100 |
| |
| malloc: Fix attached thread reference count handling [BZ #19243] |
| |
| commit 90c400bd4904b0240a148f0b357a5cbc36179239 |
| Author: Florian Weimer <fweimer@redhat.com> |
| Date: Mon Dec 21 16:42:46 2015 +0100 |
| |
| malloc: Fix list_lock/arena lock deadlock [BZ #19182] |
| |
| commit 7962541a32eff5597bc4207e781cfac8d1bb0d87 |
| Author: Florian Weimer <fweimer@redhat.com> |
| Date: Wed Dec 23 17:23:33 2015 +0100 |
| |
| malloc: Update comment for list_lock |
| |
| commit 2a38688932243b5b16fb12d84c7ac1138ce50363 |
| Author: Florian Weimer <fweimer@redhat.com> |
| Date: Fri Feb 19 14:11:32 2016 +0100 |
| |
| tst-malloc-thread-exit: Use fewer system resources |
| |
| Also included is the following change, which has not yet been |
| committed upstream: |
| |
| malloc: Preserve arena free list/thread count invariant [BZ #20370] |
| |
| It is necessary to preserve the invariant that if an arena is |
| on the free list, it has thread attach count zero. Otherwise, |
| when arena_thread_freeres sees the zero attach count, it will |
| add it, and without the invariant, an arena could get pushed |
| to the list twice, resulting in a cycle. |
| |
| One possible execution trace looks like this: |
| |
| Thread 1 examines free list and observes it as empty. |
| Thread 2 exits and adds its arena to the free list, |
| with attached_threads == 0). |
| Thread 1 selects this arena in reused_arena (not from the free list). |
| Thread 1 increments attached_threads and attaches itself. |
| (The arena remains on the free list.) |
| Thread 1 exits, decrements attached_threads, |
| and adds the arena to the free list. |
| |
| The final step creates a cycle in the usual way (by overwriting the |
| next_free member with the former list head, while there is another |
| list item pointing to the arena structure). |
| |
| tst-malloc-thread-exit exhibits this issue, but it was only visible |
| with a debugger because the incorrect fix in bug 19243 removed |
| the assert from get_free_list. |
| |
| |
| |
| |
| |
| |
| @@ -77,10 +77,30 @@ extern int sanity_check_heap_info_alignm |
| /* Thread specific data */ |
| |
| static tsd_key_t arena_key; |
| -static mutex_t list_lock = MUTEX_INITIALIZER; |
| + |
| +/* Arena free list. free_list_lock synchronizes access to the |
| + free_list variable below, and the next_free and attached_threads |
| + members of struct malloc_state objects. No other locks must be |
| + acquired after free_list_lock has been acquired. */ |
| + |
| +static mutex_t free_list_lock = MUTEX_INITIALIZER; |
| static size_t narenas = 1; |
| static mstate free_list; |
| |
| +/* list_lock prevents concurrent writes to the next member of struct |
| + malloc_state objects. |
| + |
| + Read access to the next member is supposed to synchronize with the |
| + atomic_write_barrier and the write to the next member in |
| + _int_new_arena. This suffers from data races; see the FIXME |
| + comments in _int_new_arena and reused_arena. |
| + |
| + list_lock also prevents concurrent forks. At the time list_lock is |
| + acquired, no arena lock must have been acquired, but it is |
| + permitted to acquire arena locks subsequently, while list_lock is |
| + acquired. */ |
| +static mutex_t list_lock = MUTEX_INITIALIZER; |
| + |
| #if THREAD_STATS |
| static int stat_n_heaps; |
| #define THREAD_STAT(x) x |
| @@ -221,6 +241,10 @@ ptmalloc_lock_all (void) |
| |
| if(__malloc_initialized < 1) |
| return; |
| + |
| + /* We do not acquire free_list_lock here because we completely |
| + reconstruct free_list in ptmalloc_unlock_all2. */ |
| + |
| if (mutex_trylock(&list_lock)) |
| { |
| void *my_arena; |
| @@ -242,7 +266,10 @@ ptmalloc_lock_all (void) |
| save_free_hook = __free_hook; |
| __malloc_hook = malloc_atfork; |
| __free_hook = free_atfork; |
| - /* Only the current thread may perform malloc/free calls now. */ |
| + /* Only the current thread may perform malloc/free calls now. |
| + save_arena will be reattached to the current thread, in |
| + ptmalloc_lock_all, so save_arena->attached_threads is not |
| + updated. */ |
| tsd_getspecific(arena_key, save_arena); |
| tsd_setspecific(arena_key, ATFORK_ARENA_PTR); |
| out: |
| @@ -258,6 +285,9 @@ ptmalloc_unlock_all (void) |
| return; |
| if (--atfork_recursive_cntr != 0) |
| return; |
| + /* Replace ATFORK_ARENA_PTR with save_arena. |
| + save_arena->attached_threads was not changed in ptmalloc_lock_all |
| + and is still correct. */ |
| tsd_setspecific(arena_key, save_arena); |
| __malloc_hook = save_malloc_hook; |
| __free_hook = save_free_hook; |
| @@ -286,16 +316,24 @@ ptmalloc_unlock_all2 (void) |
| tsd_setspecific(arena_key, save_arena); |
| __malloc_hook = save_malloc_hook; |
| __free_hook = save_free_hook; |
| + /* Push all arenas to the free list, except save_arena, which is |
| + attached to the current thread. */ |
| + mutex_init (&free_list_lock); |
| + if (save_arena != NULL) |
| + ((mstate) save_arena)->attached_threads = 1; |
| free_list = NULL; |
| for(ar_ptr = &main_arena;;) { |
| mutex_init(&ar_ptr->mutex); |
| if (ar_ptr != save_arena) { |
| + /* This arena is no longer attached to any thread. */ |
| + ar_ptr->attached_threads = 0; |
| ar_ptr->next_free = free_list; |
| free_list = ar_ptr; |
| } |
| ar_ptr = ar_ptr->next; |
| if(ar_ptr == &main_arena) break; |
| } |
| + |
| mutex_init(&list_lock); |
| atfork_recursive_cntr = 0; |
| } |
| @@ -692,8 +730,25 @@ heap_trim(heap_info *heap, size_t pad) |
| return 1; |
| } |
| |
| -/* Create a new arena with initial size "size". */ |
| |
| +/* If REPLACED_ARENA is not NULL, detach it from this thread. Must be |
| + called while free_list_lock is held. */ |
| +static void |
| +detach_arena (mstate replaced_arena) |
| +{ |
| + if (replaced_arena != NULL) |
| + { |
| + assert (replaced_arena->attached_threads > 0); |
| + /* The current implementation only detaches from main_arena in |
| + case of allocation failure. This means that it is likely not |
| + beneficial to put the arena on free_list even if the |
| + reference count reaches zero. */ |
| + --replaced_arena->attached_threads; |
| + } |
| +} |
| + |
| + |
| +/* Create a new arena with initial size "size". */ |
| static mstate |
| _int_new_arena(size_t size) |
| { |
| @@ -714,6 +769,7 @@ _int_new_arena(size_t size) |
| } |
| a = h->ar_ptr = (mstate)(h+1); |
| malloc_init_state(a); |
| + a->attached_threads = 1; |
| /*a->next = NULL;*/ |
| a->system_mem = a->max_system_mem = h->size; |
| arena_mem += h->size; |
| @@ -727,36 +783,68 @@ _int_new_arena(size_t size) |
| set_head(top(a), (((char*)h + h->size) - ptr) | PREV_INUSE); |
| |
| LIBC_PROBE (memory_arena_new, 2, a, size); |
| + mstate replaced_arena; |
| + { |
| + void *vptr = NULL; |
| + replaced_arena = tsd_getspecific (arena_key, vptr); |
| + } |
| tsd_setspecific(arena_key, (void *)a); |
| mutex_init(&a->mutex); |
| - (void)mutex_lock(&a->mutex); |
| |
| (void)mutex_lock(&list_lock); |
| |
| /* Add the new arena to the global list. */ |
| a->next = main_arena.next; |
| + /* FIXME: The barrier is an attempt to synchronize with read access |
| + in reused_arena, which does not acquire list_lock while |
| + traversing the list. */ |
| atomic_write_barrier (); |
| main_arena.next = a; |
| |
| (void)mutex_unlock(&list_lock); |
| |
| + (void) mutex_lock (&free_list_lock); |
| + detach_arena (replaced_arena); |
| + (void) mutex_unlock (&free_list_lock); |
| + |
| + /* Lock this arena. NB: Another thread may have been attached to |
| + this arena because the arena is now accessible from the |
| + main_arena.next list and could have been picked by reused_arena. |
| + This can only happen for the last arena created (before the arena |
| + limit is reached). At this point, some arena has to be attached |
| + to two threads. We could acquire the arena lock before list_lock |
| + to make it less likely that reused_arena picks this new arena, |
| + but this could result in a deadlock with ptmalloc_lock_all. */ |
| + |
| + (void) mutex_lock (&a->mutex); |
| + |
| THREAD_STAT(++(a->stat_lock_loop)); |
| |
| return a; |
| } |
| |
| - |
| +/* Remove an arena from free_list. */ |
| static mstate |
| get_free_list (void) |
| { |
| + void *vptr = NULL; |
| + mstate replaced_arena = tsd_getspecific (arena_key, vptr); |
| mstate result = free_list; |
| if (result != NULL) |
| { |
| - (void)mutex_lock(&list_lock); |
| + (void)mutex_lock(&free_list_lock); |
| result = free_list; |
| if (result != NULL) |
| - free_list = result->next_free; |
| - (void)mutex_unlock(&list_lock); |
| + { |
| + free_list = result->next_free; |
| + |
| + /* The arena will be attached to this thread. */ |
| + assert (result->attached_threads == 0); |
| + result->attached_threads = 1; |
| + |
| + detach_arena (replaced_arena); |
| + } |
| + (void)mutex_unlock(&free_list_lock); |
| |
| if (result != NULL) |
| { |
| @@ -770,6 +858,26 @@ get_free_list (void) |
| return result; |
| } |
| |
| +/* Remove the arena from the free list (if it is present). |
| + free_list_lock must have been acquired by the caller. */ |
| +static void |
| +remove_from_free_list (mstate arena) |
| +{ |
| + mstate *previous = &free_list; |
| + for (mstate p = free_list; p != NULL; p = p->next_free) |
| + { |
| + assert (p->attached_threads == 0); |
| + if (p == arena) |
| + { |
| + /* Remove the requested arena from the list. */ |
| + *previous = p->next_free; |
| + break; |
| + } |
| + else |
| + previous = &p->next_free; |
| + } |
| +} |
| + |
| /* Lock and return an arena that can be reused for memory allocation. |
| Avoid AVOID_ARENA as we have already failed to allocate memory in |
| it and it is currently locked. */ |
| @@ -777,16 +885,20 @@ static mstate |
| reused_arena (mstate avoid_arena) |
| { |
| mstate result; |
| + /* FIXME: Access to next_to_use suffers from data races. */ |
| static mstate next_to_use; |
| if (next_to_use == NULL) |
| next_to_use = &main_arena; |
| |
| + /* Iterate over all arenas (including those linked from |
| + free_list). */ |
| result = next_to_use; |
| do |
| { |
| if (!arena_is_corrupt (result) && !mutex_trylock(&result->mutex)) |
| goto out; |
| |
| + /* FIXME: This is a data race, see _int_new_arena. */ |
| result = result->next; |
| } |
| while (result != next_to_use); |
| @@ -815,6 +927,27 @@ reused_arena (mstate avoid_arena) |
| (void)mutex_lock(&result->mutex); |
| |
| out: |
| + /* Attach the arena to the current thread. */ |
| + { |
| + /* Update the arena thread attachment counters. */ |
| + void *vptr = NULL; |
| + mstate replaced_arena = tsd_getspecific (arena_key, vptr); |
| + (void) mutex_lock (&free_list_lock); |
| + detach_arena (replaced_arena); |
| + |
| + /* We may have picked up an arena on the free list. We need to |
| + preserve the invariant that no arena on the free list has a |
| + positive attached_threads counter (otherwise, |
| + arena_thread_freeres cannot use the counter to determine if the |
| + arena needs to be put on the free list). We unconditionally |
| + remove the selected arena from the free list. The caller of |
| + reused_arena checked the free list and observed it to be empty, |
| + so the list is very short. */ |
| + remove_from_free_list (result); |
| + |
| + ++result->attached_threads; |
| + (void) mutex_unlock (&free_list_lock); |
| + } |
| LIBC_PROBE (memory_arena_reuse, 2, result, avoid_arena); |
| tsd_setspecific(arena_key, (void *)result); |
| THREAD_STAT(++(result->stat_lock_loop)); |
| @@ -905,10 +1038,16 @@ arena_thread_freeres (void) |
| |
| if (a != NULL) |
| { |
| - (void)mutex_lock(&list_lock); |
| - a->next_free = free_list; |
| - free_list = a; |
| - (void)mutex_unlock(&list_lock); |
| + (void)mutex_lock(&free_list_lock); |
| + /* If this was the last attached thread for this arena, put the |
| + arena on the free list. */ |
| + assert (a->attached_threads > 0); |
| + if (--a->attached_threads == 0) |
| + { |
| + a->next_free = free_list; |
| + free_list = a; |
| + } |
| + (void)mutex_unlock(&free_list_lock); |
| } |
| } |
| text_set_element (__libc_thread_subfreeres, arena_thread_freeres); |
| |
| |
| |
| |
| @@ -1727,8 +1727,13 @@ struct malloc_state { |
| /* Linked list */ |
| struct malloc_state *next; |
| |
| - /* Linked list for free arenas. */ |
| + /* Linked list for free arenas. Access to this field is serialized |
| + by free_list_lock in arena.c. */ |
| struct malloc_state *next_free; |
| + /* Number of threads attached to this arena. 0 if the arena is on |
| + the free list. Access to this field is serialized by |
| + free_list_lock in arena.c. */ |
| + INTERNAL_SIZE_T attached_threads; |
| |
| /* Memory allocated from the system in this arena. */ |
| INTERNAL_SIZE_T system_mem; |
| @@ -1772,7 +1777,8 @@ struct malloc_par { |
| static struct malloc_state main_arena = |
| { |
| .mutex = MUTEX_INITIALIZER, |
| - .next = &main_arena |
| + .next = &main_arena, |
| + .attached_threads = 1, |
| }; |
| |
| /* There is only one instance of the malloc parameters. */ |
| |
| |
| |
| |
| @@ -20,13 +20,14 @@ |
| # |
| subdir := malloc |
| |
| -all: |
| +include ../Makeconfig |
| |
| dist-headers := malloc.h |
| headers := $(dist-headers) obstack.h mcheck.h |
| tests := mallocbug tst-malloc tst-valloc tst-calloc tst-obstack \ |
| tst-mallocstate tst-mcheck tst-mallocfork tst-trim1 \ |
| - tst-malloc-usable tst-malloc-backtrace |
| + tst-malloc-usable \ |
| + tst-malloc-backtrace tst-malloc-thread-exit |
| test-srcs = tst-mtrace |
| |
| routines = malloc morecore mcheck mtrace obstack |
| @@ -43,6 +44,8 @@ libmemusage-inhibit-o = $(filter-out .os |
| |
| $(objpfx)tst-malloc-backtrace: $(common-objpfx)nptl/libpthread.so \ |
| $(common-objpfx)nptl/libpthread_nonshared.a |
| +$(objpfx)tst-malloc-thread-exit: $(common-objpfx)nptl/libpthread.so \ |
| + $(common-objpfx)nptl/libpthread_nonshared.a |
| |
| # These should be removed by `make clean'. |
| extra-objs = mcheck-init.o libmcheck.a |
| @@ -50,8 +53,6 @@ extra-objs = mcheck-init.o libmcheck.a |
| # Include the cleanup handler. |
| aux := set-freeres thread-freeres |
| |
| -include ../Makeconfig |
| - |
| CPPFLAGS-memusagestat = -DNOT_IN_libc |
| |
| # The Perl script to analyze the output of the mtrace functions. |
| |
| |
| |
| |
| @@ -0,0 +1,218 @@ |
| +/* Test malloc with concurrent thread termination. |
| + Copyright (C) 2015-2016 Free Software Foundation, Inc. |
| + This file is part of the GNU C Library. |
| + |
| + The GNU C Library is free software; you can redistribute it and/or |
| + modify it under the terms of the GNU Lesser General Public |
| + License as published by the Free Software Foundation; either |
| + version 2.1 of the License, or (at your option) any later version. |
| + |
| + The GNU C Library is distributed in the hope that it will be useful, |
| + but WITHOUT ANY WARRANTY; without even the implied warranty of |
| + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| + Lesser General Public License for more details. |
| + |
| + You should have received a copy of the GNU Lesser General Public |
| + License along with the GNU C Library; if not, see |
| + <http://www.gnu.org/licenses/>. */ |
| + |
| +/* This thread spawns a number of outer threads, equal to the arena |
| + limit. The outer threads run a loop which start and join two |
| + different kinds of threads: the first kind allocates (attaching an |
| + arena to the thread; malloc_first_thread) and waits, the second |
| + kind waits and allocates (wait_first_threads). Both kinds of |
| + threads exit immediately after waiting. The hope is that this will |
| + exhibit races in thread termination and arena management, |
| + particularly related to the arena free list. */ |
| + |
| +#include <errno.h> |
| +#include <malloc.h> |
| +#include <pthread.h> |
| +#include <stdbool.h> |
| +#include <stdio.h> |
| +#include <stdlib.h> |
| +#include <unistd.h> |
| + |
| +static int do_test (void); |
| + |
| +#define TEST_FUNCTION do_test () |
| +#include "../test-skeleton.c" |
| + |
| +static bool termination_requested; |
| +static int inner_thread_count = 4; |
| +static size_t malloc_size = 32; |
| + |
| +static void |
| +__attribute__ ((noinline, noclone)) |
| +unoptimized_free (void *ptr) |
| +{ |
| + free (ptr); |
| +} |
| + |
| +static void * |
| +malloc_first_thread (void * closure) |
| +{ |
| + pthread_barrier_t *barrier = closure; |
| + void *ptr = malloc (malloc_size); |
| + if (ptr == NULL) |
| + { |
| + printf ("error: malloc: %m\n"); |
| + abort (); |
| + } |
| + int ret = pthread_barrier_wait (barrier); |
| + if (ret != 0 && ret != PTHREAD_BARRIER_SERIAL_THREAD) |
| + { |
| + errno = ret; |
| + printf ("error: pthread_barrier_wait: %m\n"); |
| + abort (); |
| + } |
| + unoptimized_free (ptr); |
| + return NULL; |
| +} |
| + |
| +static void * |
| +wait_first_thread (void * closure) |
| +{ |
| + pthread_barrier_t *barrier = closure; |
| + int ret = pthread_barrier_wait (barrier); |
| + if (ret != 0 && ret != PTHREAD_BARRIER_SERIAL_THREAD) |
| + { |
| + errno = ret; |
| + printf ("error: pthread_barrier_wait: %m\n"); |
| + abort (); |
| + } |
| + void *ptr = malloc (malloc_size); |
| + if (ptr == NULL) |
| + { |
| + printf ("error: malloc: %m\n"); |
| + abort (); |
| + } |
| + unoptimized_free (ptr); |
| + return NULL; |
| +} |
| + |
| +static void * |
| +outer_thread (void *closure) |
| +{ |
| + pthread_t *threads = calloc (sizeof (*threads), inner_thread_count); |
| + if (threads == NULL) |
| + { |
| + printf ("error: calloc: %m\n"); |
| + abort (); |
| + } |
| + |
| + while (!__atomic_load_n (&termination_requested, __ATOMIC_RELAXED)) |
| + { |
| + pthread_barrier_t barrier; |
| + int ret = pthread_barrier_init (&barrier, NULL, inner_thread_count + 1); |
| + if (ret != 0) |
| + { |
| + errno = ret; |
| + printf ("pthread_barrier_init: %m\n"); |
| + abort (); |
| + } |
| + for (int i = 0; i < inner_thread_count; ++i) |
| + { |
| + void *(*func) (void *); |
| + if ((i % 2) == 0) |
| + func = malloc_first_thread; |
| + else |
| + func = wait_first_thread; |
| + ret = pthread_create (threads + i, NULL, func, &barrier); |
| + if (ret != 0) |
| + { |
| + errno = ret; |
| + printf ("error: pthread_create: %m\n"); |
| + abort (); |
| + } |
| + } |
| + ret = pthread_barrier_wait (&barrier); |
| + if (ret != 0 && ret != PTHREAD_BARRIER_SERIAL_THREAD) |
| + { |
| + errno = ret; |
| + printf ("pthread_wait: %m\n"); |
| + abort (); |
| + } |
| + for (int i = 0; i < inner_thread_count; ++i) |
| + { |
| + ret = pthread_join (threads[i], NULL); |
| + if (ret != 0) |
| + { |
| + ret = errno; |
| + printf ("error: pthread_join: %m\n"); |
| + abort (); |
| + } |
| + } |
| + ret = pthread_barrier_destroy (&barrier); |
| + if (ret != 0) |
| + { |
| + ret = errno; |
| + printf ("pthread_barrier_destroy: %m\n"); |
| + abort (); |
| + } |
| + } |
| + |
| + free (threads); |
| + |
| + return NULL; |
| +} |
| + |
| +static int |
| +do_test (void) |
| +{ |
| + /* The number of threads should be smaller than the number of |
| + arenas, so that there will be some free arenas to add to the |
| + arena free list. */ |
| + enum { outer_thread_count = 2 }; |
| + if (mallopt (M_ARENA_MAX, 8) == 0) |
| + { |
| + printf ("error: mallopt (M_ARENA_MAX) failed\n"); |
| + return 1; |
| + } |
| + |
| + /* Leave some room for shutting down all threads gracefully. */ |
| + int timeout = 3; |
| + if (timeout > TIMEOUT) |
| + timeout = TIMEOUT - 1; |
| + |
| + pthread_t *threads = calloc (sizeof (*threads), outer_thread_count); |
| + if (threads == NULL) |
| + { |
| + printf ("error: calloc: %m\n"); |
| + abort (); |
| + } |
| + |
| + for (long i = 0; i < outer_thread_count; ++i) |
| + { |
| + int ret = pthread_create (threads + i, NULL, outer_thread, NULL); |
| + if (ret != 0) |
| + { |
| + errno = ret; |
| + printf ("error: pthread_create: %m\n"); |
| + abort (); |
| + } |
| + } |
| + |
| + struct timespec ts = {timeout, 0}; |
| + if (nanosleep (&ts, NULL)) |
| + { |
| + printf ("error: error: nanosleep: %m\n"); |
| + abort (); |
| + } |
| + |
| + __atomic_store_n (&termination_requested, true, __ATOMIC_RELAXED); |
| + |
| + for (long i = 0; i < outer_thread_count; ++i) |
| + { |
| + int ret = pthread_join (threads[i], NULL); |
| + if (ret != 0) |
| + { |
| + errno = ret; |
| + printf ("error: pthread_join: %m\n"); |
| + abort (); |
| + } |
| + } |
| + free (threads); |
| + |
| + return 0; |
| +} |