179894
commit 83b5323261bb72313bffcf37476c1b8f0847c736
179894
Author: Szabolcs Nagy <szabolcs.nagy@arm.com>
179894
Date:   Wed Sep 15 15:16:19 2021 +0100
179894
179894
    elf: Avoid deadlock between pthread_create and ctors [BZ #28357]
179894
179894
    The fix for bug 19329 caused a regression such that pthread_create can
179894
    deadlock when concurrent ctors from dlopen are waiting for it to finish.
179894
    Use a new GL(dl_load_tls_lock) in pthread_create that is not taken
179894
    around ctors in dlopen.
179894
179894
    The new lock is also used in __tls_get_addr instead of GL(dl_load_lock).
179894
179894
    The new lock is held in _dl_open_worker and _dl_close_worker around
179894
    most of the logic before/after the init/fini routines.  When init/fini
179894
    routines are running then TLS is in a consistent, usable state.
179894
    In _dl_open_worker the new lock requires catching and reraising dlopen
179894
    failures that happen in the critical section.
179894
179894
    The new lock is reinitialized in a fork child, to keep the existing
179894
    behaviour and it is kept recursive in case malloc interposition or TLS
179894
    access from signal handlers can retake it.  It is not obvious if this
179894
    is necessary or helps, but avoids changing the preexisting behaviour.
179894
179894
    The new lock may be more appropriate for dl_iterate_phdr too than
179894
    GL(dl_load_write_lock), since TLS state of an incompletely loaded
179894
    module may be accessed.  If the new lock can replace the old one,
179894
    that can be a separate change.
179894
179894
    Fixes bug 28357.
179894
179894
    Reviewed-by: Adhemerval Zanella  <adhemerval.zanella@linaro.org>
179894
179894
Conflicts:
179894
	posix/fork.c
179894
	  (reworked due to file rename upstream and libpthread integration)
179894
	sysdeps/pthread/Makefile
179894
	  (htl testing support was missing downstream, reconstituted here;
179894
	  added $(libdl) required downstream)
179894
179894
diff --git a/elf/dl-close.c b/elf/dl-close.c
179894
index 18227fe992029364..7fe91bdd9aaf694e 100644
179894
--- a/elf/dl-close.c
179894
+++ b/elf/dl-close.c
179894
@@ -549,6 +549,9 @@ _dl_close_worker (struct link_map *map, bool force)
179894
   size_t tls_free_end;
179894
   tls_free_start = tls_free_end = NO_TLS_OFFSET;
179894
 
179894
+  /* Protects global and module specitic TLS state.  */
179894
+  __rtld_lock_lock_recursive (GL(dl_load_tls_lock));
179894
+
179894
   /* We modify the list of loaded objects.  */
179894
   __rtld_lock_lock_recursive (GL(dl_load_write_lock));
179894
 
179894
@@ -784,6 +787,9 @@ _dl_close_worker (struct link_map *map, bool force)
179894
 	GL(dl_tls_static_used) = tls_free_start;
179894
     }
179894
 
179894
+  /* TLS is cleaned up for the unloaded modules.  */
179894
+  __rtld_lock_unlock_recursive (GL(dl_load_tls_lock));
179894
+
179894
 #ifdef SHARED
179894
   /* Auditing checkpoint: we have deleted all objects.  */
179894
   if (__glibc_unlikely (do_audit))
179894
diff --git a/elf/dl-open.c b/elf/dl-open.c
179894
index 54727402750f4c0c..736df62ce6e46d34 100644
179894
--- a/elf/dl-open.c
179894
+++ b/elf/dl-open.c
179894
@@ -65,6 +65,9 @@ struct dl_open_args
179894
      libc_map value in the namespace in case of a dlopen failure.  */
179894
   bool libc_already_loaded;
179894
 
179894
+  /* Set to true if the end of dl_open_worker_begin was reached.  */
179894
+  bool worker_continue;
179894
+
179894
   /* Original parameters to the program and the current environment.  */
179894
   int argc;
179894
   char **argv;
179894
@@ -481,7 +484,7 @@ call_dl_init (void *closure)
179894
 }
179894
 
179894
 static void
179894
-dl_open_worker (void *a)
179894
+dl_open_worker_begin (void *a)
179894
 {
179894
   struct dl_open_args *args = a;
179894
   const char *file = args->file;
179894
@@ -772,6 +775,36 @@ dl_open_worker (void *a)
179894
   DL_STATIC_INIT (new);
179894
 #endif
179894
 
179894
+  args->worker_continue = true;
179894
+}
179894
+
179894
+static void
179894
+dl_open_worker (void *a)
179894
+{
179894
+  struct dl_open_args *args = a;
179894
+
179894
+  args->worker_continue = false;
179894
+
179894
+  {
179894
+    /* Protects global and module specific TLS state.  */
179894
+    __rtld_lock_lock_recursive (GL(dl_load_tls_lock));
179894
+
179894
+    struct dl_exception ex;
179894
+    int err = _dl_catch_exception (&ex, dl_open_worker_begin, args);
179894
+
179894
+    __rtld_lock_unlock_recursive (GL(dl_load_tls_lock));
179894
+
179894
+    if (__glibc_unlikely (ex.errstring != NULL))
179894
+      /* Reraise the error.  */
179894
+      _dl_signal_exception (err, &ex, NULL);
179894
+  }
179894
+
179894
+  if (!args->worker_continue)
179894
+    return;
179894
+
179894
+  int mode = args->mode;
179894
+  struct link_map *new = args->map;
179894
+
179894
   /* Run the initializer functions of new objects.  Temporarily
179894
      disable the exception handler, so that lazy binding failures are
179894
      fatal.  */
179894
diff --git a/elf/dl-support.c b/elf/dl-support.c
179894
index 34be8e5babfb6af3..3e5531138eaa18f8 100644
179894
--- a/elf/dl-support.c
179894
+++ b/elf/dl-support.c
179894
@@ -212,6 +212,13 @@ __rtld_lock_define_initialized_recursive (, _dl_load_lock)
179894
    list of loaded objects while an object is added to or removed from
179894
    that list.  */
179894
 __rtld_lock_define_initialized_recursive (, _dl_load_write_lock)
179894
+  /* This lock protects global and module specific TLS related data.
179894
+     E.g. it is held in dlopen and dlclose when GL(dl_tls_generation),
179894
+     GL(dl_tls_max_dtv_idx) or GL(dl_tls_dtv_slotinfo_list) are
179894
+     accessed and when TLS related relocations are processed for a
179894
+     module.  It was introduced to keep pthread_create accessing TLS
179894
+     state that is being set up.  */
179894
+__rtld_lock_define_initialized_recursive (, _dl_load_tls_lock)
179894
 
179894
 
179894
 #ifdef HAVE_AUX_VECTOR
179894
diff --git a/elf/dl-tls.c b/elf/dl-tls.c
179894
index 8c0f9e972d7a0eac..7865fc390c3f3f0a 100644
179894
--- a/elf/dl-tls.c
179894
+++ b/elf/dl-tls.c
179894
@@ -527,7 +527,7 @@ _dl_allocate_tls_init (void *result)
179894
   size_t maxgen = 0;
179894
 
179894
   /* Protects global dynamic TLS related state.  */
179894
-  __rtld_lock_lock_recursive (GL(dl_load_lock));
179894
+  __rtld_lock_lock_recursive (GL(dl_load_tls_lock));
179894
 
179894
   /* Check if the current dtv is big enough.   */
179894
   if (dtv[-1].counter < GL(dl_tls_max_dtv_idx))
179894
@@ -601,7 +601,7 @@ _dl_allocate_tls_init (void *result)
179894
       listp = listp->next;
179894
       assert (listp != NULL);
179894
     }
179894
-  __rtld_lock_unlock_recursive (GL(dl_load_lock));
179894
+  __rtld_lock_unlock_recursive (GL(dl_load_tls_lock));
179894
 
179894
   /* The DTV version is up-to-date now.  */
179894
   dtv[0].counter = maxgen;
179894
@@ -740,7 +740,7 @@ _dl_update_slotinfo (unsigned long int req_modid)
179894
 
179894
 	 Here the dtv needs to be updated to new_gen generation count.
179894
 
179894
-	 This code may be called during TLS access when GL(dl_load_lock)
179894
+	 This code may be called during TLS access when GL(dl_load_tls_lock)
179894
 	 is not held.  In that case the user code has to synchronize with
179894
 	 dlopen and dlclose calls of relevant modules.  A module m is
179894
 	 relevant if the generation of m <= new_gen and dlclose of m is
179894
@@ -862,11 +862,11 @@ tls_get_addr_tail (GET_ADDR_ARGS, dtv_t *dtv, struct link_map *the_map)
179894
   if (__glibc_unlikely (the_map->l_tls_offset
179894
 			!= FORCED_DYNAMIC_TLS_OFFSET))
179894
     {
179894
-      __rtld_lock_lock_recursive (GL(dl_load_lock));
179894
+      __rtld_lock_lock_recursive (GL(dl_load_tls_lock));
179894
       if (__glibc_likely (the_map->l_tls_offset == NO_TLS_OFFSET))
179894
 	{
179894
 	  the_map->l_tls_offset = FORCED_DYNAMIC_TLS_OFFSET;
179894
-	  __rtld_lock_unlock_recursive (GL(dl_load_lock));
179894
+	  __rtld_lock_unlock_recursive (GL(dl_load_tls_lock));
179894
 	}
179894
       else if (__glibc_likely (the_map->l_tls_offset
179894
 			       != FORCED_DYNAMIC_TLS_OFFSET))
179894
@@ -878,7 +878,7 @@ tls_get_addr_tail (GET_ADDR_ARGS, dtv_t *dtv, struct link_map *the_map)
179894
 #else
179894
 # error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
179894
 #endif
179894
-	  __rtld_lock_unlock_recursive (GL(dl_load_lock));
179894
+	  __rtld_lock_unlock_recursive (GL(dl_load_tls_lock));
179894
 
179894
 	  dtv[GET_ADDR_MODULE].pointer.to_free = NULL;
179894
 	  dtv[GET_ADDR_MODULE].pointer.val = p;
179894
@@ -886,7 +886,7 @@ tls_get_addr_tail (GET_ADDR_ARGS, dtv_t *dtv, struct link_map *the_map)
179894
 	  return (char *) p + GET_ADDR_OFFSET;
179894
 	}
179894
       else
179894
-	__rtld_lock_unlock_recursive (GL(dl_load_lock));
179894
+	__rtld_lock_unlock_recursive (GL(dl_load_tls_lock));
179894
     }
179894
   struct dtv_pointer result = allocate_and_init (the_map);
179894
   dtv[GET_ADDR_MODULE].pointer = result;
179894
@@ -957,7 +957,7 @@ _dl_tls_get_addr_soft (struct link_map *l)
179894
     return NULL;
179894
 
179894
   dtv_t *dtv = THREAD_DTV ();
179894
-  /* This may be called without holding the GL(dl_load_lock).  Reading
179894
+  /* This may be called without holding the GL(dl_load_tls_lock).  Reading
179894
      arbitrary gen value is fine since this is best effort code.  */
179894
   size_t gen = atomic_load_relaxed (&GL(dl_tls_generation));
179894
   if (__glibc_unlikely (dtv[0].counter != gen))
179894
diff --git a/elf/rtld.c b/elf/rtld.c
179894
index 118c454a2329573f..9e09896da078274d 100644
179894
--- a/elf/rtld.c
179894
+++ b/elf/rtld.c
179894
@@ -317,6 +317,7 @@ struct rtld_global _rtld_global =
179894
 #ifdef _LIBC_REENTRANT
179894
     ._dl_load_lock = _RTLD_LOCK_RECURSIVE_INITIALIZER,
179894
     ._dl_load_write_lock = _RTLD_LOCK_RECURSIVE_INITIALIZER,
179894
+    ._dl_load_tls_lock = _RTLD_LOCK_RECURSIVE_INITIALIZER,
179894
 #endif
179894
     ._dl_nns = 1,
179894
     ._dl_ns =
179894
diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h
179894
index 0138353ccb41c5f1..7b0a667629ddc06a 100644
179894
--- a/sysdeps/generic/ldsodefs.h
179894
+++ b/sysdeps/generic/ldsodefs.h
179894
@@ -373,6 +373,13 @@ struct rtld_global
179894
      list of loaded objects while an object is added to or removed
179894
      from that list.  */
179894
   __rtld_lock_define_recursive (EXTERN, _dl_load_write_lock)
179894
+  /* This lock protects global and module specific TLS related data.
179894
+     E.g. it is held in dlopen and dlclose when GL(dl_tls_generation),
179894
+     GL(dl_tls_max_dtv_idx) or GL(dl_tls_dtv_slotinfo_list) are
179894
+     accessed and when TLS related relocations are processed for a
179894
+     module.  It was introduced to keep pthread_create accessing TLS
179894
+     state that is being set up.  */
179894
+  __rtld_lock_define_recursive (EXTERN, _dl_load_tls_lock)
179894
 
179894
   /* Incremented whenever something may have been added to dl_loaded.  */
179894
   EXTERN unsigned long long _dl_load_adds;
179894
@@ -1192,7 +1199,7 @@ extern int _dl_scope_free (void *) attribute_hidden;
179894
 
179894
 /* Add module to slot information data.  If DO_ADD is false, only the
179894
    required memory is allocated.  Must be called with GL
179894
-   (dl_load_lock) acquired.  If the function has already been called
179894
+   (dl_load_tls_lock) acquired.  If the function has already been called
179894
    for the link map L with !do_add, then this function will not raise
179894
    an exception, otherwise it is possible that it encounters a memory
179894
    allocation failure.  */
179894
diff --git a/sysdeps/nptl/fork.c b/sysdeps/nptl/fork.c
179894
index 37db30f3d1e846b6..b4d20fa652f4ba3b 100644
179894
--- a/sysdeps/nptl/fork.c
179894
+++ b/sysdeps/nptl/fork.c
179894
@@ -125,6 +125,9 @@ __libc_fork (void)
179894
       /* Reset the lock the dynamic loader uses to protect its data.  */
179894
       __rtld_lock_initialize (GL(dl_load_lock));
179894
 
179894
+      /* Reset the lock protecting dynamic TLS related data.  */
179894
+      __rtld_lock_initialize (GL(dl_load_tls_lock));
179894
+
179894
       /* Run the handlers registered for the child.  */
179894
       __run_fork_handlers (atfork_run_child, multiple_threads);
179894
     }
179894
diff --git a/sysdeps/pthread/Makefile b/sysdeps/pthread/Makefile
179894
index ea4f8894891b2636..98a92f8d6bb119ba 100644
179894
--- a/sysdeps/pthread/Makefile
179894
+++ b/sysdeps/pthread/Makefile
179894
@@ -25,3 +25,24 @@ $(objpfx)tst-timer: $(objpfx)librt.a $(static-thread-library)
179894
 endif
179894
 
179894
 endif
179894
+
179894
+ifneq (,$(filter $(subdir),htl nptl))
179894
+ifeq ($(build-shared),yes)
179894
+tests += tst-create1
179894
+endif
179894
+
179894
+tst-create1mod.so-no-z-defs = yes
179894
+
179894
+ifeq ($(build-shared),yes)
179894
+# Build all the modules even when not actually running test programs.
179894
+tests: $(test-modules)
179894
+endif
179894
+
179894
+modules-names += tst-create1mod
179894
+test-modules = $(addprefix $(objpfx),$(addsuffix .so,$(modules-names)))
179894
+
179894
+LDFLAGS-tst-create1 = -Wl,-export-dynamic
179894
+$(objpfx)tst-create1: $(libdl) $(shared-thread-library)
179894
+$(objpfx)tst-create1.out: $(objpfx)tst-create1mod.so
179894
+
179894
+endif
179894
diff --git a/sysdeps/pthread/tst-create1.c b/sysdeps/pthread/tst-create1.c
179894
new file mode 100644
179894
index 0000000000000000..932586c30990d1d4
179894
--- /dev/null
179894
+++ b/sysdeps/pthread/tst-create1.c
179894
@@ -0,0 +1,119 @@
179894
+/* Verify that pthread_create does not deadlock when ctors take locks.
179894
+   Copyright (C) 2021 Free Software Foundation, Inc.
179894
+   This file is part of the GNU C Library.
179894
+
179894
+   The GNU C Library is free software; you can redistribute it and/or
179894
+   modify it under the terms of the GNU Lesser General Public
179894
+   License as published by the Free Software Foundation; either
179894
+   version 2.1 of the License, or (at your option) any later version.
179894
+
179894
+   The GNU C Library is distributed in the hope that it will be useful,
179894
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
179894
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
179894
+   Lesser General Public License for more details.
179894
+
179894
+   You should have received a copy of the GNU Lesser General Public
179894
+   License along with the GNU C Library; if not, see
179894
+   <https://www.gnu.org/licenses/>.  */
179894
+
179894
+#include <stdio.h>
179894
+#include <support/xdlfcn.h>
179894
+#include <support/xthread.h>
179894
+
179894
+/*
179894
+Check if ctor and pthread_create deadlocks in
179894
+
179894
+thread 1: dlopen -> ctor -> lock(user_lock)
179894
+thread 2: lock(user_lock) -> pthread_create
179894
+
179894
+or in
179894
+
179894
+thread 1: dlclose -> dtor -> lock(user_lock)
179894
+thread 2: lock(user_lock) -> pthread_create
179894
+*/
179894
+
179894
+static pthread_barrier_t bar_ctor;
179894
+static pthread_barrier_t bar_dtor;
179894
+static pthread_mutex_t user_lock = PTHREAD_MUTEX_INITIALIZER;
179894
+
179894
+void
179894
+ctor (void)
179894
+{
179894
+  xpthread_barrier_wait (&bar_ctor);
179894
+  dprintf (1, "thread 1: in ctor: started.\n");
179894
+  xpthread_mutex_lock (&user_lock);
179894
+  dprintf (1, "thread 1: in ctor: locked user_lock.\n");
179894
+  xpthread_mutex_unlock (&user_lock);
179894
+  dprintf (1, "thread 1: in ctor: unlocked user_lock.\n");
179894
+  dprintf (1, "thread 1: in ctor: done.\n");
179894
+}
179894
+
179894
+void
179894
+dtor (void)
179894
+{
179894
+  xpthread_barrier_wait (&bar_dtor);
179894
+  dprintf (1, "thread 1: in dtor: started.\n");
179894
+  xpthread_mutex_lock (&user_lock);
179894
+  dprintf (1, "thread 1: in dtor: locked user_lock.\n");
179894
+  xpthread_mutex_unlock (&user_lock);
179894
+  dprintf (1, "thread 1: in dtor: unlocked user_lock.\n");
179894
+  dprintf (1, "thread 1: in dtor: done.\n");
179894
+}
179894
+
179894
+static void *
179894
+thread3 (void *a)
179894
+{
179894
+  dprintf (1, "thread 3: started.\n");
179894
+  dprintf (1, "thread 3: done.\n");
179894
+  return 0;
179894
+}
179894
+
179894
+static void *
179894
+thread2 (void *a)
179894
+{
179894
+  pthread_t t3;
179894
+  dprintf (1, "thread 2: started.\n");
179894
+
179894
+  xpthread_mutex_lock (&user_lock);
179894
+  dprintf (1, "thread 2: locked user_lock.\n");
179894
+  xpthread_barrier_wait (&bar_ctor);
179894
+  t3 = xpthread_create (0, thread3, 0);
179894
+  xpthread_mutex_unlock (&user_lock);
179894
+  dprintf (1, "thread 2: unlocked user_lock.\n");
179894
+  xpthread_join (t3);
179894
+
179894
+  xpthread_mutex_lock (&user_lock);
179894
+  dprintf (1, "thread 2: locked user_lock.\n");
179894
+  xpthread_barrier_wait (&bar_dtor);
179894
+  t3 = xpthread_create (0, thread3, 0);
179894
+  xpthread_mutex_unlock (&user_lock);
179894
+  dprintf (1, "thread 2: unlocked user_lock.\n");
179894
+  xpthread_join (t3);
179894
+
179894
+  dprintf (1, "thread 2: done.\n");
179894
+  return 0;
179894
+}
179894
+
179894
+static void
179894
+thread1 (void)
179894
+{
179894
+  dprintf (1, "thread 1: started.\n");
179894
+  xpthread_barrier_init (&bar_ctor, NULL, 2);
179894
+  xpthread_barrier_init (&bar_dtor, NULL, 2);
179894
+  pthread_t t2 = xpthread_create (0, thread2, 0);
179894
+  void *p = xdlopen ("tst-create1mod.so", RTLD_NOW | RTLD_GLOBAL);
179894
+  dprintf (1, "thread 1: dlopen done.\n");
179894
+  xdlclose (p);
179894
+  dprintf (1, "thread 1: dlclose done.\n");
179894
+  xpthread_join (t2);
179894
+  dprintf (1, "thread 1: done.\n");
179894
+}
179894
+
179894
+static int
179894
+do_test (void)
179894
+{
179894
+  thread1 ();
179894
+  return 0;
179894
+}
179894
+
179894
+#include <support/test-driver.c>
179894
diff --git a/sysdeps/pthread/tst-create1mod.c b/sysdeps/pthread/tst-create1mod.c
179894
new file mode 100644
179894
index 0000000000000000..62c9006961683177
179894
--- /dev/null
179894
+++ b/sysdeps/pthread/tst-create1mod.c
179894
@@ -0,0 +1,41 @@
179894
+/* Verify that pthread_create does not deadlock when ctors take locks.
179894
+   Copyright (C) 2021 Free Software Foundation, Inc.
179894
+   This file is part of the GNU C Library.
179894
+
179894
+   The GNU C Library is free software; you can redistribute it and/or
179894
+   modify it under the terms of the GNU Lesser General Public
179894
+   License as published by the Free Software Foundation; either
179894
+   version 2.1 of the License, or (at your option) any later version.
179894
+
179894
+   The GNU C Library is distributed in the hope that it will be useful,
179894
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
179894
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
179894
+   Lesser General Public License for more details.
179894
+
179894
+   You should have received a copy of the GNU Lesser General Public
179894
+   License along with the GNU C Library; if not, see
179894
+   <https://www.gnu.org/licenses/>.  */
179894
+
179894
+#include <stdio.h>
179894
+
179894
+/* Require TLS setup for the module.  */
179894
+__thread int tlsvar;
179894
+
179894
+void ctor (void);
179894
+void dtor (void);
179894
+
179894
+static void __attribute__ ((constructor))
179894
+do_init (void)
179894
+{
179894
+  dprintf (1, "constructor started: %d.\n", tlsvar++);
179894
+  ctor ();
179894
+  dprintf (1, "constructor done: %d.\n", tlsvar++);
179894
+}
179894
+
179894
+static void __attribute__ ((destructor))
179894
+do_end (void)
179894
+{
179894
+  dprintf (1, "destructor started: %d.\n", tlsvar++);
179894
+  dtor ();
179894
+  dprintf (1, "destructor done: %d.\n", tlsvar++);
179894
+}