olga / rpms / glibc

Forked from rpms/glibc 5 years ago
Clone
00db10
Upstream commits:
00db10
00db10
commit a62719ba90e2fa1728890ae7dc8df9e32a622e7b
00db10
Author: Florian Weimer <fweimer@redhat.com>
00db10
Date:   Wed Oct 28 19:32:46 2015 +0100
00db10
00db10
    malloc: Prevent arena free_list from turning cyclic [BZ #19048]
00db10
00db10
commit 3da825ce483903e3a881a016113b3e59fd4041de
00db10
Author: Florian Weimer <fweimer@redhat.com>
00db10
Date:   Wed Dec 16 12:39:48 2015 +0100
00db10
00db10
    malloc: Fix attached thread reference count handling [BZ #19243]
00db10
00db10
commit 90c400bd4904b0240a148f0b357a5cbc36179239
00db10
Author: Florian Weimer <fweimer@redhat.com>
00db10
Date:   Mon Dec 21 16:42:46 2015 +0100
00db10
00db10
    malloc: Fix list_lock/arena lock deadlock [BZ #19182]
00db10
00db10
commit 7962541a32eff5597bc4207e781cfac8d1bb0d87
00db10
Author: Florian Weimer <fweimer@redhat.com>
00db10
Date:   Wed Dec 23 17:23:33 2015 +0100
00db10
00db10
    malloc: Update comment for list_lock
00db10
00db10
commit 2a38688932243b5b16fb12d84c7ac1138ce50363
00db10
Author: Florian Weimer <fweimer@redhat.com>
00db10
Date:   Fri Feb 19 14:11:32 2016 +0100
00db10
00db10
    tst-malloc-thread-exit: Use fewer system resources
00db10
00db10
Also included is the following change, which has not yet been
00db10
committed upstream:
00db10
00db10
    malloc: Preserve arena free list/thread count invariant [BZ #20370]
00db10
    
00db10
    It is necessary to preserve the invariant that if an arena is
00db10
    on the free list, it has thread attach count zero.  Otherwise,
00db10
    when arena_thread_freeres sees the zero attach count, it will
00db10
    add it, and without the invariant, an arena could get pushed
00db10
    to the list twice, resulting in a cycle.
00db10
    
00db10
    One possible execution trace looks like this:
00db10
    
00db10
    Thread 1 examines free list and observes it as empty.
00db10
    Thread 2 exits and adds its arena to the free list,
00db10
      with attached_threads == 0).
00db10
    Thread 1 selects this arena in reused_arena (not from the free list).
00db10
    Thread 1 increments attached_threads and attaches itself.
00db10
      (The arena remains on the free list.)
00db10
    Thread 1 exits, decrements attached_threads,
00db10
      and adds the arena to the free list.
00db10
    
00db10
    The final step creates a cycle in the usual way (by overwriting the
00db10
    next_free member with the former list head, while there is another
00db10
    list item pointing to the arena structure).
00db10
    
00db10
    tst-malloc-thread-exit exhibits this issue, but it was only visible
00db10
    with a debugger because the incorrect fix in bug 19243 removed
00db10
    the assert from get_free_list.
00db10
00db10
00db10
Index: b/malloc/arena.c
00db10
===================================================================
00db10
--- a/malloc/arena.c
00db10
+++ b/malloc/arena.c
00db10
@@ -77,10 +77,30 @@ extern int sanity_check_heap_info_alignm
00db10
 /* Thread specific data */
00db10
 
00db10
 static tsd_key_t arena_key;
00db10
-static mutex_t list_lock = MUTEX_INITIALIZER;
00db10
+
00db10
+/* Arena free list.  free_list_lock synchronizes access to the
00db10
+   free_list variable below, and the next_free and attached_threads
00db10
+   members of struct malloc_state objects.  No other locks must be
00db10
+   acquired after free_list_lock has been acquired.  */
00db10
+
00db10
+static mutex_t free_list_lock = MUTEX_INITIALIZER;
00db10
 static size_t narenas = 1;
00db10
 static mstate free_list;
00db10
 
00db10
+/* list_lock prevents concurrent writes to the next member of struct
00db10
+   malloc_state objects.
00db10
+
00db10
+   Read access to the next member is supposed to synchronize with the
00db10
+   atomic_write_barrier and the write to the next member in
00db10
+   _int_new_arena.  This suffers from data races; see the FIXME
00db10
+   comments in _int_new_arena and reused_arena.
00db10
+
00db10
+   list_lock also prevents concurrent forks.  At the time list_lock is
00db10
+   acquired, no arena lock must have been acquired, but it is
00db10
+   permitted to acquire arena locks subsequently, while list_lock is
00db10
+   acquired.  */
00db10
+static mutex_t list_lock = MUTEX_INITIALIZER;
00db10
+
00db10
 #if THREAD_STATS
00db10
 static int stat_n_heaps;
00db10
 #define THREAD_STAT(x) x
00db10
@@ -221,6 +241,10 @@ ptmalloc_lock_all (void)
00db10
 
00db10
   if(__malloc_initialized < 1)
00db10
     return;
00db10
+
00db10
+  /* We do not acquire free_list_lock here because we completely
00db10
+     reconstruct free_list in ptmalloc_unlock_all2.  */
00db10
+
00db10
   if (mutex_trylock(&list_lock))
00db10
     {
00db10
       void *my_arena;
00db10
@@ -242,7 +266,10 @@ ptmalloc_lock_all (void)
00db10
   save_free_hook = __free_hook;
00db10
   __malloc_hook = malloc_atfork;
00db10
   __free_hook = free_atfork;
00db10
-  /* Only the current thread may perform malloc/free calls now. */
00db10
+  /* Only the current thread may perform malloc/free calls now.
00db10
+     save_arena will be reattached to the current thread, in
00db10
+     ptmalloc_lock_all, so save_arena->attached_threads is not
00db10
+     updated.  */
00db10
   tsd_getspecific(arena_key, save_arena);
00db10
   tsd_setspecific(arena_key, ATFORK_ARENA_PTR);
00db10
  out:
00db10
@@ -258,6 +285,9 @@ ptmalloc_unlock_all (void)
00db10
     return;
00db10
   if (--atfork_recursive_cntr != 0)
00db10
     return;
00db10
+  /* Replace ATFORK_ARENA_PTR with save_arena.
00db10
+     save_arena->attached_threads was not changed in ptmalloc_lock_all
00db10
+     and is still correct.  */
00db10
   tsd_setspecific(arena_key, save_arena);
00db10
   __malloc_hook = save_malloc_hook;
00db10
   __free_hook = save_free_hook;
00db10
@@ -286,16 +316,24 @@ ptmalloc_unlock_all2 (void)
00db10
   tsd_setspecific(arena_key, save_arena);
00db10
   __malloc_hook = save_malloc_hook;
00db10
   __free_hook = save_free_hook;
00db10
+  /* Push all arenas to the free list, except save_arena, which is
00db10
+     attached to the current thread.  */
00db10
+  mutex_init (&free_list_lock);
00db10
+  if (save_arena != NULL)
00db10
+    ((mstate) save_arena)->attached_threads = 1;
00db10
   free_list = NULL;
00db10
   for(ar_ptr = &main_arena;;) {
00db10
     mutex_init(&ar_ptr->mutex);
00db10
     if (ar_ptr != save_arena) {
00db10
+      /* This arena is no longer attached to any thread.  */
00db10
+      ar_ptr->attached_threads = 0;
00db10
       ar_ptr->next_free = free_list;
00db10
       free_list = ar_ptr;
00db10
     }
00db10
     ar_ptr = ar_ptr->next;
00db10
     if(ar_ptr == &main_arena) break;
00db10
   }
00db10
+
00db10
   mutex_init(&list_lock);
00db10
   atfork_recursive_cntr = 0;
00db10
 }
00db10
@@ -692,8 +730,25 @@ heap_trim(heap_info *heap, size_t pad)
00db10
   return 1;
00db10
 }
00db10
 
00db10
-/* Create a new arena with initial size "size".  */
00db10
 
00db10
+/* If REPLACED_ARENA is not NULL, detach it from this thread.  Must be
00db10
+   called while free_list_lock is held.  */
00db10
+static void
00db10
+detach_arena (mstate replaced_arena)
00db10
+{
00db10
+  if (replaced_arena != NULL)
00db10
+    {
00db10
+      assert (replaced_arena->attached_threads > 0);
00db10
+      /* The current implementation only detaches from main_arena in
00db10
+	 case of allocation failure.  This means that it is likely not
00db10
+	 beneficial to put the arena on free_list even if the
00db10
+	 reference count reaches zero.  */
00db10
+      --replaced_arena->attached_threads;
00db10
+    }
00db10
+}
00db10
+
00db10
+
00db10
+/* Create a new arena with initial size "size".  */
00db10
 static mstate
00db10
 _int_new_arena(size_t size)
00db10
 {
00db10
@@ -714,6 +769,7 @@ _int_new_arena(size_t size)
00db10
   }
00db10
   a = h->ar_ptr = (mstate)(h+1);
00db10
   malloc_init_state(a);
00db10
+  a->attached_threads = 1;
00db10
   /*a->next = NULL;*/
00db10
   a->system_mem = a->max_system_mem = h->size;
00db10
   arena_mem += h->size;
00db10
@@ -727,36 +783,68 @@ _int_new_arena(size_t size)
00db10
   set_head(top(a), (((char*)h + h->size) - ptr) | PREV_INUSE);
00db10
 
00db10
   LIBC_PROBE (memory_arena_new, 2, a, size);
00db10
+  mstate replaced_arena;
00db10
+  {
00db10
+    void *vptr = NULL;
00db10
+    replaced_arena = tsd_getspecific (arena_key, vptr);
00db10
+  }
00db10
   tsd_setspecific(arena_key, (void *)a);
00db10
   mutex_init(&a->mutex);
00db10
-  (void)mutex_lock(&a->mutex);
00db10
 
00db10
   (void)mutex_lock(&list_lock);
00db10
 
00db10
   /* Add the new arena to the global list.  */
00db10
   a->next = main_arena.next;
00db10
+  /* FIXME: The barrier is an attempt to synchronize with read access
00db10
+     in reused_arena, which does not acquire list_lock while
00db10
+     traversing the list.  */
00db10
   atomic_write_barrier ();
00db10
   main_arena.next = a;
00db10
 
00db10
   (void)mutex_unlock(&list_lock);
00db10
 
00db10
+  (void) mutex_lock (&free_list_lock);
00db10
+  detach_arena (replaced_arena);
00db10
+  (void) mutex_unlock (&free_list_lock);
00db10
+
00db10
+  /* Lock this arena.  NB: Another thread may have been attached to
00db10
+     this arena because the arena is now accessible from the
00db10
+     main_arena.next list and could have been picked by reused_arena.
00db10
+     This can only happen for the last arena created (before the arena
00db10
+     limit is reached).  At this point, some arena has to be attached
00db10
+     to two threads.  We could acquire the arena lock before list_lock
00db10
+     to make it less likely that reused_arena picks this new arena,
00db10
+     but this could result in a deadlock with ptmalloc_lock_all.  */
00db10
+
00db10
+  (void) mutex_lock (&a->mutex);
00db10
+
00db10
   THREAD_STAT(++(a->stat_lock_loop));
00db10
 
00db10
   return a;
00db10
 }
00db10
 
00db10
-
00db10
+/* Remove an arena from free_list.  */
00db10
 static mstate
00db10
 get_free_list (void)
00db10
 {
00db10
+  void *vptr = NULL;
00db10
+  mstate replaced_arena = tsd_getspecific (arena_key, vptr);
00db10
   mstate result = free_list;
00db10
   if (result != NULL)
00db10
     {
00db10
-      (void)mutex_lock(&list_lock);
00db10
+      (void)mutex_lock(&free_list_lock);
00db10
       result = free_list;
00db10
       if (result != NULL)
00db10
-	free_list = result->next_free;
00db10
-      (void)mutex_unlock(&list_lock);
00db10
+	{
00db10
+	  free_list = result->next_free;
00db10
+
00db10
+	  /* The arena will be attached to this thread.  */
00db10
+	  assert (result->attached_threads == 0);
00db10
+	  result->attached_threads = 1;
00db10
+
00db10
+	  detach_arena (replaced_arena);
00db10
+	}
00db10
+      (void)mutex_unlock(&free_list_lock);
00db10
 
00db10
       if (result != NULL)
00db10
 	{
00db10
@@ -770,6 +858,26 @@ get_free_list (void)
00db10
   return result;
00db10
 }
00db10
 
00db10
+/* Remove the arena from the free list (if it is present).
00db10
+   free_list_lock must have been acquired by the caller.  */
00db10
+static void
00db10
+remove_from_free_list (mstate arena)
00db10
+{
00db10
+  mstate *previous = &free_list;
00db10
+  for (mstate p = free_list; p != NULL; p = p->next_free)
00db10
+    {
00db10
+      assert (p->attached_threads == 0);
00db10
+      if (p == arena)
00db10
+	{
00db10
+	  /* Remove the requested arena from the list.  */
00db10
+	  *previous = p->next_free;
00db10
+	  break;
00db10
+	}
00db10
+      else
00db10
+	previous = &p->next_free;
00db10
+    }
00db10
+}
00db10
+
00db10
 /* Lock and return an arena that can be reused for memory allocation.
00db10
    Avoid AVOID_ARENA as we have already failed to allocate memory in
00db10
    it and it is currently locked.  */
00db10
@@ -777,16 +885,20 @@ static mstate
00db10
 reused_arena (mstate avoid_arena)
00db10
 {
00db10
   mstate result;
00db10
+  /* FIXME: Access to next_to_use suffers from data races.  */
00db10
   static mstate next_to_use;
00db10
   if (next_to_use == NULL)
00db10
     next_to_use = &main_arena;
00db10
 
00db10
+  /* Iterate over all arenas (including those linked from
00db10
+     free_list).  */
00db10
   result = next_to_use;
00db10
   do
00db10
     {
00db10
       if (!arena_is_corrupt (result) && !mutex_trylock(&result->mutex))
00db10
 	goto out;
00db10
 
00db10
+      /* FIXME: This is a data race, see _int_new_arena.  */
00db10
       result = result->next;
00db10
     }
00db10
   while (result != next_to_use);
00db10
@@ -815,6 +927,27 @@ reused_arena (mstate avoid_arena)
00db10
   (void)mutex_lock(&result->mutex);
00db10
 
00db10
  out:
00db10
+  /* Attach the arena to the current thread.  */
00db10
+  {
00db10
+    /* Update the arena thread attachment counters.   */
00db10
+    void *vptr = NULL;
00db10
+    mstate replaced_arena = tsd_getspecific (arena_key, vptr);
00db10
+    (void) mutex_lock (&free_list_lock);
00db10
+    detach_arena (replaced_arena);
00db10
+
00db10
+    /* We may have picked up an arena on the free list.  We need to
00db10
+       preserve the invariant that no arena on the free list has a
00db10
+       positive attached_threads counter (otherwise,
00db10
+       arena_thread_freeres cannot use the counter to determine if the
00db10
+       arena needs to be put on the free list).  We unconditionally
00db10
+       remove the selected arena from the free list.  The caller of
00db10
+       reused_arena checked the free list and observed it to be empty,
00db10
+       so the list is very short.  */
00db10
+    remove_from_free_list (result);
00db10
+
00db10
+    ++result->attached_threads;
00db10
+    (void) mutex_unlock (&free_list_lock);
00db10
+  }
00db10
   LIBC_PROBE (memory_arena_reuse, 2, result, avoid_arena);
00db10
   tsd_setspecific(arena_key, (void *)result);
00db10
   THREAD_STAT(++(result->stat_lock_loop));
00db10
@@ -905,10 +1038,16 @@ arena_thread_freeres (void)
00db10
 
00db10
   if (a != NULL)
00db10
     {
00db10
-      (void)mutex_lock(&list_lock);
00db10
-      a->next_free = free_list;
00db10
-      free_list = a;
00db10
-      (void)mutex_unlock(&list_lock);
00db10
+      (void)mutex_lock(&free_list_lock);
00db10
+      /* If this was the last attached thread for this arena, put the
00db10
+	 arena on the free list.  */
00db10
+      assert (a->attached_threads > 0);
00db10
+      if (--a->attached_threads == 0)
00db10
+	{
00db10
+	  a->next_free = free_list;
00db10
+	  free_list = a;
00db10
+	}
00db10
+      (void)mutex_unlock(&free_list_lock);
00db10
     }
00db10
 }
00db10
 text_set_element (__libc_thread_subfreeres, arena_thread_freeres);
00db10
Index: b/malloc/malloc.c
00db10
===================================================================
00db10
--- a/malloc/malloc.c
00db10
+++ b/malloc/malloc.c
00db10
@@ -1727,8 +1727,13 @@ struct malloc_state {
00db10
   /* Linked list */
00db10
   struct malloc_state *next;
00db10
 
00db10
-  /* Linked list for free arenas.  */
00db10
+  /* Linked list for free arenas.  Access to this field is serialized
00db10
+     by free_list_lock in arena.c. */
00db10
   struct malloc_state *next_free;
00db10
+  /* Number of threads attached to this arena.  0 if the arena is on
00db10
+     the free list.  Access to this field is serialized by
00db10
+     free_list_lock in arena.c.  */
00db10
+  INTERNAL_SIZE_T attached_threads;
00db10
 
00db10
   /* Memory allocated from the system in this arena.  */
00db10
   INTERNAL_SIZE_T system_mem;
00db10
@@ -1772,7 +1777,8 @@ struct malloc_par {
00db10
 static struct malloc_state main_arena =
00db10
   {
00db10
     .mutex = MUTEX_INITIALIZER,
00db10
-    .next = &main_arena
00db10
+    .next = &main_arena,
00db10
+    .attached_threads = 1,
00db10
   };
00db10
 
00db10
 /* There is only one instance of the malloc parameters.  */
00db10
Index: b/malloc/Makefile
00db10
===================================================================
00db10
--- a/malloc/Makefile
00db10
+++ b/malloc/Makefile
00db10
@@ -20,13 +20,14 @@
00db10
 #
00db10
 subdir	:= malloc
00db10
 
00db10
-all:
00db10
+include ../Makeconfig
00db10
 
00db10
 dist-headers := malloc.h
00db10
 headers := $(dist-headers) obstack.h mcheck.h
00db10
 tests := mallocbug tst-malloc tst-valloc tst-calloc tst-obstack \
00db10
 	 tst-mallocstate tst-mcheck tst-mallocfork tst-trim1 \
00db10
-	 tst-malloc-usable tst-malloc-backtrace
00db10
+	 tst-malloc-usable \
00db10
+	 tst-malloc-backtrace tst-malloc-thread-exit
00db10
 test-srcs = tst-mtrace
00db10
 
00db10
 routines = malloc morecore mcheck mtrace obstack
00db10
@@ -43,6 +44,8 @@ libmemusage-inhibit-o = $(filter-out .os
00db10
 
00db10
 $(objpfx)tst-malloc-backtrace: $(common-objpfx)nptl/libpthread.so \
00db10
 			       $(common-objpfx)nptl/libpthread_nonshared.a
00db10
+$(objpfx)tst-malloc-thread-exit: $(common-objpfx)nptl/libpthread.so \
00db10
+			       $(common-objpfx)nptl/libpthread_nonshared.a
00db10
 
00db10
 # These should be removed by `make clean'.
00db10
 extra-objs = mcheck-init.o libmcheck.a
00db10
@@ -50,8 +53,6 @@ extra-objs = mcheck-init.o libmcheck.a
00db10
 # Include the cleanup handler.
00db10
 aux := set-freeres thread-freeres
00db10
 
00db10
-include ../Makeconfig
00db10
-
00db10
 CPPFLAGS-memusagestat = -DNOT_IN_libc
00db10
 
00db10
 # The Perl script to analyze the output of the mtrace functions.
00db10
Index: b/malloc/tst-malloc-thread-exit.c
00db10
===================================================================
00db10
--- /dev/null
00db10
+++ b/malloc/tst-malloc-thread-exit.c
00db10
@@ -0,0 +1,218 @@
00db10
+/* Test malloc with concurrent thread termination.
00db10
+   Copyright (C) 2015-2016 Free Software Foundation, Inc.
00db10
+   This file is part of the GNU C Library.
00db10
+
00db10
+   The GNU C Library is free software; you can redistribute it and/or
00db10
+   modify it under the terms of the GNU Lesser General Public
00db10
+   License as published by the Free Software Foundation; either
00db10
+   version 2.1 of the License, or (at your option) any later version.
00db10
+
00db10
+   The GNU C Library is distributed in the hope that it will be useful,
00db10
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
00db10
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00db10
+   Lesser General Public License for more details.
00db10
+
00db10
+   You should have received a copy of the GNU Lesser General Public
00db10
+   License along with the GNU C Library; if not, see
00db10
+   <http://www.gnu.org/licenses/>.  */
00db10
+
00db10
+/* This thread spawns a number of outer threads, equal to the arena
00db10
+   limit.  The outer threads run a loop which start and join two
00db10
+   different kinds of threads: the first kind allocates (attaching an
00db10
+   arena to the thread; malloc_first_thread) and waits, the second
00db10
+   kind waits and allocates (wait_first_threads).  Both kinds of
00db10
+   threads exit immediately after waiting.  The hope is that this will
00db10
+   exhibit races in thread termination and arena management,
00db10
+   particularly related to the arena free list.  */
00db10
+
00db10
+#include <errno.h>
00db10
+#include <malloc.h>
00db10
+#include <pthread.h>
00db10
+#include <stdbool.h>
00db10
+#include <stdio.h>
00db10
+#include <stdlib.h>
00db10
+#include <unistd.h>
00db10
+
00db10
+static int do_test (void);
00db10
+
00db10
+#define TEST_FUNCTION do_test ()
00db10
+#include "../test-skeleton.c"
00db10
+
00db10
+static bool termination_requested;
00db10
+static int inner_thread_count = 4;
00db10
+static size_t malloc_size = 32;
00db10
+
00db10
+static void
00db10
+__attribute__ ((noinline, noclone))
00db10
+unoptimized_free (void *ptr)
00db10
+{
00db10
+  free (ptr);
00db10
+}
00db10
+
00db10
+static void *
00db10
+malloc_first_thread (void * closure)
00db10
+{
00db10
+  pthread_barrier_t *barrier = closure;
00db10
+  void *ptr = malloc (malloc_size);
00db10
+  if (ptr == NULL)
00db10
+    {
00db10
+      printf ("error: malloc: %m\n");
00db10
+      abort ();
00db10
+    }
00db10
+  int ret = pthread_barrier_wait (barrier);
00db10
+  if (ret != 0 && ret != PTHREAD_BARRIER_SERIAL_THREAD)
00db10
+    {
00db10
+      errno = ret;
00db10
+      printf ("error: pthread_barrier_wait: %m\n");
00db10
+      abort ();
00db10
+    }
00db10
+  unoptimized_free (ptr);
00db10
+  return NULL;
00db10
+}
00db10
+
00db10
+static void *
00db10
+wait_first_thread (void * closure)
00db10
+{
00db10
+  pthread_barrier_t *barrier = closure;
00db10
+  int ret = pthread_barrier_wait (barrier);
00db10
+  if (ret != 0 && ret != PTHREAD_BARRIER_SERIAL_THREAD)
00db10
+    {
00db10
+      errno = ret;
00db10
+      printf ("error: pthread_barrier_wait: %m\n");
00db10
+      abort ();
00db10
+    }
00db10
+  void *ptr = malloc (malloc_size);
00db10
+  if (ptr == NULL)
00db10
+    {
00db10
+      printf ("error: malloc: %m\n");
00db10
+      abort ();
00db10
+    }
00db10
+  unoptimized_free (ptr);
00db10
+  return NULL;
00db10
+}
00db10
+
00db10
+static void *
00db10
+outer_thread (void *closure)
00db10
+{
00db10
+  pthread_t *threads = calloc (sizeof (*threads), inner_thread_count);
00db10
+  if (threads == NULL)
00db10
+    {
00db10
+      printf ("error: calloc: %m\n");
00db10
+      abort ();
00db10
+    }
00db10
+
00db10
+  while (!__atomic_load_n (&termination_requested, __ATOMIC_RELAXED))
00db10
+    {
00db10
+      pthread_barrier_t barrier;
00db10
+      int ret = pthread_barrier_init (&barrier, NULL, inner_thread_count + 1);
00db10
+      if (ret != 0)
00db10
+        {
00db10
+          errno = ret;
00db10
+          printf ("pthread_barrier_init: %m\n");
00db10
+          abort ();
00db10
+        }
00db10
+      for (int i = 0; i < inner_thread_count; ++i)
00db10
+        {
00db10
+          void *(*func) (void *);
00db10
+          if ((i  % 2) == 0)
00db10
+            func = malloc_first_thread;
00db10
+          else
00db10
+            func = wait_first_thread;
00db10
+          ret = pthread_create (threads + i, NULL, func, &barrier);
00db10
+          if (ret != 0)
00db10
+            {
00db10
+              errno = ret;
00db10
+              printf ("error: pthread_create: %m\n");
00db10
+              abort ();
00db10
+            }
00db10
+        }
00db10
+      ret = pthread_barrier_wait (&barrier);
00db10
+      if (ret != 0 && ret != PTHREAD_BARRIER_SERIAL_THREAD)
00db10
+        {
00db10
+          errno = ret;
00db10
+          printf ("pthread_wait: %m\n");
00db10
+          abort ();
00db10
+        }
00db10
+      for (int i = 0; i < inner_thread_count; ++i)
00db10
+        {
00db10
+          ret = pthread_join (threads[i], NULL);
00db10
+          if (ret != 0)
00db10
+            {
00db10
+              ret = errno;
00db10
+              printf ("error: pthread_join: %m\n");
00db10
+              abort ();
00db10
+            }
00db10
+        }
00db10
+      ret = pthread_barrier_destroy (&barrier);
00db10
+      if (ret != 0)
00db10
+        {
00db10
+          ret = errno;
00db10
+          printf ("pthread_barrier_destroy: %m\n");
00db10
+          abort ();
00db10
+        }
00db10
+    }
00db10
+
00db10
+  free (threads);
00db10
+
00db10
+  return NULL;
00db10
+}
00db10
+
00db10
+static int
00db10
+do_test (void)
00db10
+{
00db10
+  /* The number of threads should be smaller than the number of
00db10
+     arenas, so that there will be some free arenas to add to the
00db10
+     arena free list.  */
00db10
+  enum { outer_thread_count = 2 };
00db10
+  if (mallopt (M_ARENA_MAX, 8) == 0)
00db10
+    {
00db10
+      printf ("error: mallopt (M_ARENA_MAX) failed\n");
00db10
+      return 1;
00db10
+    }
00db10
+
00db10
+  /* Leave some room for shutting down all threads gracefully.  */
00db10
+  int timeout = 3;
00db10
+  if (timeout > TIMEOUT)
00db10
+    timeout = TIMEOUT - 1;
00db10
+
00db10
+  pthread_t *threads = calloc (sizeof (*threads), outer_thread_count);
00db10
+  if (threads == NULL)
00db10
+    {
00db10
+      printf ("error: calloc: %m\n");
00db10
+      abort ();
00db10
+    }
00db10
+
00db10
+  for (long i = 0; i < outer_thread_count; ++i)
00db10
+    {
00db10
+      int ret = pthread_create (threads + i, NULL, outer_thread, NULL);
00db10
+      if (ret != 0)
00db10
+        {
00db10
+          errno = ret;
00db10
+          printf ("error: pthread_create: %m\n");
00db10
+          abort ();
00db10
+        }
00db10
+    }
00db10
+
00db10
+  struct timespec ts = {timeout, 0};
00db10
+  if (nanosleep (&ts, NULL))
00db10
+    {
00db10
+      printf ("error: error: nanosleep: %m\n");
00db10
+      abort ();
00db10
+    }
00db10
+
00db10
+  __atomic_store_n (&termination_requested, true, __ATOMIC_RELAXED);
00db10
+
00db10
+  for (long i = 0; i < outer_thread_count; ++i)
00db10
+    {
00db10
+      int ret = pthread_join (threads[i], NULL);
00db10
+      if (ret != 0)
00db10
+        {
00db10
+          errno = ret;
00db10
+          printf ("error: pthread_join: %m\n");
00db10
+          abort ();
00db10
+        }
00db10
+    }
00db10
+  free (threads);
00db10
+
00db10
+  return 0;
00db10
+}