f31a89
This patch is a RHEL-8.7 backport of the following upstream commit:
f31a89
f31a89
commit 52a103e237329b9f88a28513fe7506ffc3bd8ced
f31a89
Author: Arjun Shankar <arjun@redhat.com>
f31a89
Date:   Tue May 24 17:57:36 2022 +0200
f31a89
f31a89
    Fix deadlock when pthread_atfork handler calls pthread_atfork or dlclose
f31a89
    
f31a89
    In multi-threaded programs, registering via pthread_atfork,
f31a89
    de-registering implicitly via dlclose, or running pthread_atfork
f31a89
    handlers during fork was protected by an internal lock.  This meant
f31a89
    that a pthread_atfork handler attempting to register another handler or
f31a89
    dlclose a dynamically loaded library would lead to a deadlock.
f31a89
    
f31a89
    This commit fixes the deadlock in the following way:
f31a89
    
f31a89
    During the execution of handlers at fork time, the atfork lock is
f31a89
    released prior to the execution of each handler and taken again upon its
f31a89
    return.  Any handler registrations or de-registrations that occurred
f31a89
    during the execution of the handler are accounted for before proceeding
f31a89
    with further handler execution.
f31a89
    
f31a89
    If a handler that hasn't been executed yet gets de-registered by another
f31a89
    handler during fork, it will not be executed.   If a handler gets
f31a89
    registered by another handler during fork, it will not be executed
f31a89
    during that particular fork.
f31a89
    
f31a89
    The possibility that handlers may now be registered or deregistered
f31a89
    during handler execution means that identifying the next handler to be
f31a89
    run after a given handler may register/de-register others requires some
f31a89
    bookkeeping.  The fork_handler struct has an additional field, 'id',
f31a89
    which is assigned sequentially during registration.  Thus, handlers are
f31a89
    executed in ascending order of 'id' during 'prepare', and descending
f31a89
    order of 'id' during parent/child handler execution after the fork.
f31a89
    
f31a89
    Two tests are included:
f31a89
    
f31a89
    * tst-atfork3: Adhemerval Zanella <adhemerval.zanella@linaro.org>
f31a89
      This test exercises calling dlclose from prepare, parent, and child
f31a89
      handlers.
f31a89
    
f31a89
    * tst-atfork4: This test exercises calling pthread_atfork and dlclose
f31a89
      from the prepare handler.
f31a89
    
f31a89
    [BZ #24595, BZ #27054]
f31a89
    
f31a89
    Co-authored-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
f31a89
    Reviewed-by: Adhemerval Zanella  <adhemerval.zanella@linaro.org>
f31a89
f31a89
diff --git a/nptl/Makefile b/nptl/Makefile
f31a89
index 70a3be23ecfcd9c9..76c914e23e8873f2 100644
f31a89
--- a/nptl/Makefile
f31a89
+++ b/nptl/Makefile
f31a89
@@ -382,8 +382,17 @@ tests += tst-cancelx2 tst-cancelx3 tst-cancelx4 tst-cancelx5 \
f31a89
 	 tst-cancelx16 tst-cancelx17 tst-cancelx18 tst-cancelx20 tst-cancelx21 \
f31a89
 	 tst-cleanupx0 tst-cleanupx1 tst-cleanupx2 tst-cleanupx3 tst-cleanupx4
f31a89
 ifeq ($(build-shared),yes)
f31a89
-tests += tst-atfork2 tst-tls4 tst-_res1 tst-fini1 tst-compat-forwarder \
f31a89
-	 tst-audit-threads
f31a89
+tests += \
f31a89
+  tst-atfork2 \
f31a89
+  tst-tls4 \
f31a89
+  tst-_res1 \
f31a89
+  tst-fini1 \
f31a89
+  tst-compat-forwarder \
f31a89
+  tst-audit-threads \
f31a89
+  tst-atfork3 \
f31a89
+  tst-atfork4 \
f31a89
+# tests
f31a89
+
f31a89
 tests-internal += tst-tls3 tst-tls3-malloc tst-tls5 tst-stackguard1
f31a89
 tests-nolibpthread += tst-fini1
f31a89
 ifeq ($(have-z-execstack),yes)
f31a89
@@ -391,18 +400,39 @@ tests += tst-execstack
f31a89
 endif
f31a89
 endif
f31a89
 
f31a89
-modules-names = tst-atfork2mod tst-tls3mod tst-tls4moda tst-tls4modb \
f31a89
-		tst-tls5mod tst-tls5moda tst-tls5modb tst-tls5modc \
f31a89
-		tst-tls5modd tst-tls5mode tst-tls5modf tst-stack4mod \
f31a89
-		tst-_res1mod1 tst-_res1mod2 tst-execstack-mod tst-fini1mod \
f31a89
-		tst-join7mod tst-compat-forwarder-mod tst-audit-threads-mod1 \
f31a89
-		tst-audit-threads-mod2
f31a89
+modules-names = \
f31a89
+  tst-atfork2mod \
f31a89
+  tst-tls3mod \
f31a89
+  tst-tls4moda \
f31a89
+  tst-tls4modb \
f31a89
+  tst-tls5mod \
f31a89
+  tst-tls5moda \
f31a89
+  tst-tls5modb \
f31a89
+  tst-tls5modc \
f31a89
+  tst-tls5modd \
f31a89
+  tst-tls5mode \
f31a89
+  tst-tls5modf \
f31a89
+  tst-stack4mod \
f31a89
+  tst-_res1mod1 \
f31a89
+  tst-_res1mod2 \
f31a89
+  tst-execstack-mod \
f31a89
+  tst-fini1mod \
f31a89
+  tst-join7mod \
f31a89
+  tst-compat-forwarder-mod \
f31a89
+  tst-audit-threads-mod1 \
f31a89
+  tst-audit-threads-mod2 \
f31a89
+  tst-atfork3mod \
f31a89
+  tst-atfork4mod \
f31a89
+# module-names
f31a89
+
f31a89
 extra-test-objs += $(addsuffix .os,$(strip $(modules-names))) \
f31a89
 		   tst-cleanup4aux.o tst-cleanupx4aux.o
f31a89
 test-extras += tst-cleanup4aux tst-cleanupx4aux
f31a89
 test-modules = $(addprefix $(objpfx),$(addsuffix .so,$(modules-names)))
f31a89
 
f31a89
 tst-atfork2mod.so-no-z-defs = yes
f31a89
+tst-atfork3mod.so-no-z-defs = yes
f31a89
+tst-atfork4mod.so-no-z-defs = yes
f31a89
 tst-tls3mod.so-no-z-defs = yes
f31a89
 tst-tls5mod.so-no-z-defs = yes
f31a89
 tst-tls5moda.so-no-z-defs = yes
f31a89
@@ -541,6 +571,14 @@ LDFLAGS-tst-atfork2 = -rdynamic
f31a89
 tst-atfork2-ENV = MALLOC_TRACE=$(objpfx)tst-atfork2.mtrace
f31a89
 $(objpfx)tst-atfork2mod.so: $(shared-thread-library)
f31a89
 
f31a89
+$(objpfx)tst-atfork3: $(libdl) $(shared-thread-library)
f31a89
+LDFLAGS-tst-atfork3 = -rdynamic
f31a89
+$(objpfx)tst-atfork3mod.so: $(shared-thread-library)
f31a89
+
f31a89
+$(objpfx)tst-atfork4: $(libdl) $(shared-thread-library)
f31a89
+LDFLAGS-tst-atfork4 = -rdynamic
f31a89
+$(objpfx)tst-atfork4mod.so: $(shared-thread-library)
f31a89
+
f31a89
 tst-stack3-ENV = MALLOC_TRACE=$(objpfx)tst-stack3.mtrace
f31a89
 $(objpfx)tst-stack3-mem.out: $(objpfx)tst-stack3.out
f31a89
 	$(common-objpfx)malloc/mtrace $(objpfx)tst-stack3.mtrace > $@; \
f31a89
@@ -640,6 +678,8 @@ $(objpfx)../libc.so: $(common-objpfx)libc.so ;
f31a89
 $(addprefix $(objpfx),$(tests-static) $(xtests-static)): $(objpfx)libpthread.a
f31a89
 
f31a89
 $(objpfx)tst-atfork2.out: $(objpfx)tst-atfork2mod.so
f31a89
+$(objpfx)tst-atfork3.out: $(objpfx)tst-atfork3mod.so
f31a89
+$(objpfx)tst-atfork4.out: $(objpfx)tst-atfork4mod.so
f31a89
 else
f31a89
 $(addprefix $(objpfx),$(tests) $(test-srcs)): $(objpfx)libpthread.a
f31a89
 endif
f31a89
diff --git a/nptl/register-atfork.c b/nptl/register-atfork.c
f31a89
index 9edb7d4bbb49fbed..4c1e20ae8cab005f 100644
f31a89
--- a/nptl/register-atfork.c
f31a89
+++ b/nptl/register-atfork.c
f31a89
@@ -21,6 +21,8 @@
f31a89
 #include <string.h>
f31a89
 #include <fork.h>
f31a89
 #include <atomic.h>
f31a89
+#include <intprops.h>
f31a89
+#include <stdio.h>
f31a89
 
f31a89
 #define DYNARRAY_ELEMENT           struct fork_handler
f31a89
 #define DYNARRAY_STRUCT            fork_handler_list
f31a89
@@ -29,7 +31,7 @@
f31a89
 #include <malloc/dynarray-skeleton.c>
f31a89
 
f31a89
 static struct fork_handler_list fork_handlers;
f31a89
-static bool fork_handler_init = false;
f31a89
+static uint64_t fork_handler_counter;
f31a89
 
f31a89
 static int atfork_lock = LLL_LOCK_INITIALIZER;
f31a89
 
f31a89
@@ -39,11 +41,8 @@ __register_atfork (void (*prepare) (void), void (*parent) (void),
f31a89
 {
f31a89
   lll_lock (atfork_lock, LLL_PRIVATE);
f31a89
 
f31a89
-  if (!fork_handler_init)
f31a89
-    {
f31a89
-      fork_handler_list_init (&fork_handlers);
f31a89
-      fork_handler_init = true;
f31a89
-    }
f31a89
+  if (fork_handler_counter == 0)
f31a89
+    fork_handler_list_init (&fork_handlers);
f31a89
 
f31a89
   struct fork_handler *newp = fork_handler_list_emplace (&fork_handlers);
f31a89
   if (newp != NULL)
f31a89
@@ -52,6 +51,13 @@ __register_atfork (void (*prepare) (void), void (*parent) (void),
f31a89
       newp->parent_handler = parent;
f31a89
       newp->child_handler = child;
f31a89
       newp->dso_handle = dso_handle;
f31a89
+
f31a89
+      /* IDs assigned to handlers start at 1 and increment with handler
f31a89
+         registration.  Un-registering a handlers discards the corresponding
f31a89
+         ID.  It is not reused in future registrations.  */
f31a89
+      if (INT_ADD_OVERFLOW (fork_handler_counter, 1))
f31a89
+        __libc_fatal ("fork handler counter overflow");
f31a89
+      newp->id = ++fork_handler_counter;
f31a89
     }
f31a89
 
f31a89
   /* Release the lock.  */
f31a89
@@ -106,37 +112,111 @@ __unregister_atfork (void *dso_handle)
f31a89
   lll_unlock (atfork_lock, LLL_PRIVATE);
f31a89
 }
f31a89
 
f31a89
-void
f31a89
-__run_fork_handlers (enum __run_fork_handler_type who, _Bool do_locking)
f31a89
+uint64_t
f31a89
+__run_prefork_handlers (_Bool do_locking)
f31a89
 {
f31a89
-  struct fork_handler *runp;
f31a89
+  uint64_t lastrun;
f31a89
 
f31a89
-  if (who == atfork_run_prepare)
f31a89
+  if (do_locking)
f31a89
+    lll_lock (atfork_lock, LLL_PRIVATE);
f31a89
+
f31a89
+  /* We run prepare handlers from last to first.  After fork, only
f31a89
+     handlers up to the last handler found here (pre-fork) will be run.
f31a89
+     Handlers registered during __run_prefork_handlers or
f31a89
+     __run_postfork_handlers will be positioned after this last handler, and
f31a89
+     since their prepare handlers won't be run now, their parent/child
f31a89
+     handlers should also be ignored.  */
f31a89
+  lastrun = fork_handler_counter;
f31a89
+
f31a89
+  size_t sl = fork_handler_list_size (&fork_handlers);
f31a89
+  for (size_t i = sl; i > 0;)
f31a89
     {
f31a89
-      if (do_locking)
f31a89
-	lll_lock (atfork_lock, LLL_PRIVATE);
f31a89
-      size_t sl = fork_handler_list_size (&fork_handlers);
f31a89
-      for (size_t i = sl; i > 0; i--)
f31a89
-	{
f31a89
-	  runp = fork_handler_list_at (&fork_handlers, i - 1);
f31a89
-	  if (runp->prepare_handler != NULL)
f31a89
-	    runp->prepare_handler ();
f31a89
-	}
f31a89
+      struct fork_handler *runp
f31a89
+        = fork_handler_list_at (&fork_handlers, i - 1);
f31a89
+
f31a89
+      uint64_t id = runp->id;
f31a89
+
f31a89
+      if (runp->prepare_handler != NULL)
f31a89
+        {
f31a89
+          if (do_locking)
f31a89
+            lll_unlock (atfork_lock, LLL_PRIVATE);
f31a89
+
f31a89
+          runp->prepare_handler ();
f31a89
+
f31a89
+          if (do_locking)
f31a89
+            lll_lock (atfork_lock, LLL_PRIVATE);
f31a89
+        }
f31a89
+
f31a89
+      /* We unlocked, ran the handler, and locked again.  In the
f31a89
+         meanwhile, one or more deregistrations could have occurred leading
f31a89
+         to the current (just run) handler being moved up the list or even
f31a89
+         removed from the list itself.  Since handler IDs are guaranteed to
f31a89
+         to be in increasing order, the next handler has to have:  */
f31a89
+
f31a89
+      /* A. An earlier position than the current one has.  */
f31a89
+      i--;
f31a89
+
f31a89
+      /* B. A lower ID than the current one does.  The code below skips
f31a89
+         any newly added handlers with higher IDs.  */
f31a89
+      while (i > 0
f31a89
+             && fork_handler_list_at (&fork_handlers, i - 1)->id >= id)
f31a89
+        i--;
f31a89
     }
f31a89
-  else
f31a89
+
f31a89
+  return lastrun;
f31a89
+}
f31a89
+
f31a89
+void
f31a89
+__run_postfork_handlers (enum __run_fork_handler_type who, _Bool do_locking,
f31a89
+                         uint64_t lastrun)
f31a89
+{
f31a89
+  size_t sl = fork_handler_list_size (&fork_handlers);
f31a89
+  for (size_t i = 0; i < sl;)
f31a89
     {
f31a89
-      size_t sl = fork_handler_list_size (&fork_handlers);
f31a89
-      for (size_t i = 0; i < sl; i++)
f31a89
-	{
f31a89
-	  runp = fork_handler_list_at (&fork_handlers, i);
f31a89
-	  if (who == atfork_run_child && runp->child_handler)
f31a89
-	    runp->child_handler ();
f31a89
-	  else if (who == atfork_run_parent && runp->parent_handler)
f31a89
-	    runp->parent_handler ();
f31a89
-	}
f31a89
+      struct fork_handler *runp = fork_handler_list_at (&fork_handlers, i);
f31a89
+      uint64_t id = runp->id;
f31a89
+
f31a89
+      /* prepare handlers were not run for handlers with ID > LASTRUN.
f31a89
+         Thus, parent/child handlers will also not be run.  */
f31a89
+      if (id > lastrun)
f31a89
+        break;
f31a89
+
f31a89
       if (do_locking)
f31a89
-	lll_unlock (atfork_lock, LLL_PRIVATE);
f31a89
+        lll_unlock (atfork_lock, LLL_PRIVATE);
f31a89
+
f31a89
+      if (who == atfork_run_child && runp->child_handler)
f31a89
+        runp->child_handler ();
f31a89
+      else if (who == atfork_run_parent && runp->parent_handler)
f31a89
+        runp->parent_handler ();
f31a89
+
f31a89
+      if (do_locking)
f31a89
+        lll_lock (atfork_lock, LLL_PRIVATE);
f31a89
+
f31a89
+      /* We unlocked, ran the handler, and locked again.  In the meanwhile,
f31a89
+         one or more [de]registrations could have occurred.  Due to this,
f31a89
+         the list size must be updated.  */
f31a89
+      sl = fork_handler_list_size (&fork_handlers);
f31a89
+
f31a89
+      /* The just-run handler could also have moved up the list. */
f31a89
+
f31a89
+      if (sl > i && fork_handler_list_at (&fork_handlers, i)->id == id)
f31a89
+        /* The position of the recently run handler hasn't changed.  The
f31a89
+           next handler to be run is an easy increment away.  */
f31a89
+        i++;
f31a89
+      else
f31a89
+        {
f31a89
+          /* The next handler to be run is the first handler in the list
f31a89
+             to have an ID higher than the current one.  */
f31a89
+          for (i = 0; i < sl; i++)
f31a89
+            {
f31a89
+              if (fork_handler_list_at (&fork_handlers, i)->id > id)
f31a89
+                break;
f31a89
+            }
f31a89
+        }
f31a89
     }
f31a89
+
f31a89
+  if (do_locking)
f31a89
+    lll_unlock (atfork_lock, LLL_PRIVATE);
f31a89
 }
f31a89
 
f31a89
 
f31a89
diff --git a/nptl/tst-atfork3.c b/nptl/tst-atfork3.c
f31a89
new file mode 100644
f31a89
index 0000000000000000..bb2250e432ab79ad
f31a89
--- /dev/null
f31a89
+++ b/nptl/tst-atfork3.c
f31a89
@@ -0,0 +1,118 @@
f31a89
+/* Check if pthread_atfork handler can call dlclose (BZ#24595).
f31a89
+   Copyright (C) 2022 Free Software Foundation, Inc.
f31a89
+   This file is part of the GNU C Library.
f31a89
+
f31a89
+   The GNU C Library is free software; you can redistribute it and/or
f31a89
+   modify it under the terms of the GNU Lesser General Public
f31a89
+   License as published by the Free Software Foundation; either
f31a89
+   version 2.1 of the License, or (at your option) any later version.
f31a89
+
f31a89
+   The GNU C Library is distributed in the hope that it will be useful,
f31a89
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
f31a89
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
f31a89
+   Lesser General Public License for more details.
f31a89
+
f31a89
+   You should have received a copy of the GNU Lesser General Public
f31a89
+   License along with the GNU C Library; if not, see
f31a89
+   <http://www.gnu.org/licenses/>.  */
f31a89
+
f31a89
+#include <stdio.h>
f31a89
+#include <pthread.h>
f31a89
+#include <unistd.h>
f31a89
+#include <stdlib.h>
f31a89
+#include <stdbool.h>
f31a89
+
f31a89
+#include <support/check.h>
f31a89
+#include <support/xthread.h>
f31a89
+#include <support/capture_subprocess.h>
f31a89
+#include <support/xdlfcn.h>
f31a89
+
f31a89
+/* Check if pthread_atfork handlers do not deadlock when calling a function
f31a89
+   that might alter the internal fork handle list, such as dlclose.
f31a89
+
f31a89
+   The test registers a callback set with pthread_atfork(), dlopen() a shared
f31a89
+   library (nptl/tst-atfork3mod.c), calls an exported symbol from the library
f31a89
+   (which in turn also registers atfork handlers), and calls fork to trigger
f31a89
+   the callbacks.  */
f31a89
+
f31a89
+static void *handler;
f31a89
+static bool run_dlclose_prepare;
f31a89
+static bool run_dlclose_parent;
f31a89
+static bool run_dlclose_child;
f31a89
+
f31a89
+static void
f31a89
+prepare (void)
f31a89
+{
f31a89
+  if (run_dlclose_prepare)
f31a89
+    xdlclose (handler);
f31a89
+}
f31a89
+
f31a89
+static void
f31a89
+parent (void)
f31a89
+{
f31a89
+  if (run_dlclose_parent)
f31a89
+    xdlclose (handler);
f31a89
+}
f31a89
+
f31a89
+static void
f31a89
+child (void)
f31a89
+{
f31a89
+  if (run_dlclose_child)
f31a89
+    xdlclose (handler);
f31a89
+}
f31a89
+
f31a89
+static void
f31a89
+proc_func (void *closure)
f31a89
+{
f31a89
+}
f31a89
+
f31a89
+static void
f31a89
+do_test_generic (bool dlclose_prepare, bool dlclose_parent, bool dlclose_child)
f31a89
+{
f31a89
+  run_dlclose_prepare = dlclose_prepare;
f31a89
+  run_dlclose_parent = dlclose_parent;
f31a89
+  run_dlclose_child = dlclose_child;
f31a89
+
f31a89
+  handler = xdlopen ("tst-atfork3mod.so", RTLD_NOW);
f31a89
+
f31a89
+  int (*atfork3mod_func)(void);
f31a89
+  atfork3mod_func = xdlsym (handler, "atfork3mod_func");
f31a89
+
f31a89
+  atfork3mod_func ();
f31a89
+
f31a89
+  struct support_capture_subprocess proc
f31a89
+    = support_capture_subprocess (proc_func, NULL);
f31a89
+  support_capture_subprocess_check (&proc, "tst-atfork3", 0, sc_allow_none);
f31a89
+
f31a89
+  handler = atfork3mod_func = NULL;
f31a89
+
f31a89
+  support_capture_subprocess_free (&proc;;
f31a89
+}
f31a89
+
f31a89
+static void *
f31a89
+thread_func (void *closure)
f31a89
+{
f31a89
+  return NULL;
f31a89
+}
f31a89
+
f31a89
+static int
f31a89
+do_test (void)
f31a89
+{
f31a89
+  {
f31a89
+    /* Make the process acts as multithread.  */
f31a89
+    pthread_attr_t attr;
f31a89
+    xpthread_attr_init (&attr);
f31a89
+    xpthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED);
f31a89
+    xpthread_create (&attr, thread_func, NULL);
f31a89
+  }
f31a89
+
f31a89
+  TEST_COMPARE (pthread_atfork (prepare, parent, child), 0);
f31a89
+
f31a89
+  do_test_generic (true  /* prepare */, false /* parent */, false /* child */);
f31a89
+  do_test_generic (false /* prepare */, true  /* parent */, false /* child */);
f31a89
+  do_test_generic (false /* prepare */, false /* parent */, true  /* child */);
f31a89
+
f31a89
+  return 0;
f31a89
+}
f31a89
+
f31a89
+#include <support/test-driver.c>
f31a89
diff --git a/nptl/tst-atfork3mod.c b/nptl/tst-atfork3mod.c
f31a89
new file mode 100644
f31a89
index 0000000000000000..6d0658cb9efdecbc
f31a89
--- /dev/null
f31a89
+++ b/nptl/tst-atfork3mod.c
f31a89
@@ -0,0 +1,44 @@
f31a89
+/* Copyright (C) 2022 Free Software Foundation, Inc.
f31a89
+   This file is part of the GNU C Library.
f31a89
+
f31a89
+   The GNU C Library is free software; you can redistribute it and/or
f31a89
+   modify it under the terms of the GNU Lesser General Public
f31a89
+   License as published by the Free Software Foundation; either
f31a89
+   version 2.1 of the License, or (at your option) any later version.
f31a89
+
f31a89
+   The GNU C Library is distributed in the hope that it will be useful,
f31a89
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
f31a89
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
f31a89
+   Lesser General Public License for more details.
f31a89
+
f31a89
+   You should have received a copy of the GNU Lesser General Public
f31a89
+   License along with the GNU C Library; if not, see
f31a89
+   <http://www.gnu.org/licenses/>.  */
f31a89
+
f31a89
+#include <unistd.h>
f31a89
+#include <stdlib.h>
f31a89
+#include <pthread.h>
f31a89
+
f31a89
+#include <support/check.h>
f31a89
+
f31a89
+static void
f31a89
+mod_prepare (void)
f31a89
+{
f31a89
+}
f31a89
+
f31a89
+static void
f31a89
+mod_parent (void)
f31a89
+{
f31a89
+}
f31a89
+
f31a89
+static void
f31a89
+mod_child (void)
f31a89
+{
f31a89
+}
f31a89
+
f31a89
+int atfork3mod_func (void)
f31a89
+{
f31a89
+  TEST_COMPARE (pthread_atfork (mod_prepare, mod_parent, mod_child), 0);
f31a89
+
f31a89
+  return 0;
f31a89
+}
f31a89
diff --git a/nptl/tst-atfork4.c b/nptl/tst-atfork4.c
f31a89
new file mode 100644
f31a89
index 0000000000000000..52dc87e73b846ab9
f31a89
--- /dev/null
f31a89
+++ b/nptl/tst-atfork4.c
f31a89
@@ -0,0 +1,128 @@
f31a89
+/* pthread_atfork supports handlers that call pthread_atfork or dlclose.
f31a89
+   Copyright (C) 2022 Free Software Foundation, Inc.
f31a89
+   This file is part of the GNU C Library.
f31a89
+
f31a89
+   The GNU C Library is free software; you can redistribute it and/or
f31a89
+   modify it under the terms of the GNU Lesser General Public
f31a89
+   License as published by the Free Software Foundation; either
f31a89
+   version 2.1 of the License, or (at your option) any later version.
f31a89
+
f31a89
+   The GNU C Library is distributed in the hope that it will be useful,
f31a89
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
f31a89
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
f31a89
+   Lesser General Public License for more details.
f31a89
+
f31a89
+   You should have received a copy of the GNU Lesser General Public
f31a89
+   License along with the GNU C Library; if not, see
f31a89
+   <https://www.gnu.org/licenses/>.  */
f31a89
+
f31a89
+#include <support/xdlfcn.h>
f31a89
+#include <stdio.h>
f31a89
+#include <support/xthread.h>
f31a89
+#include <sys/types.h>
f31a89
+#include <sys/wait.h>
f31a89
+#include <support/xunistd.h>
f31a89
+#include <support/check.h>
f31a89
+#include <stdlib.h>
f31a89
+
f31a89
+static void *
f31a89
+thread_func (void *x)
f31a89
+{
f31a89
+  return NULL;
f31a89
+}
f31a89
+
f31a89
+static unsigned int second_atfork_handler_runcount = 0;
f31a89
+
f31a89
+static void
f31a89
+second_atfork_handler (void)
f31a89
+{
f31a89
+  second_atfork_handler_runcount++;
f31a89
+}
f31a89
+
f31a89
+static void *h = NULL;
f31a89
+
f31a89
+static unsigned int atfork_handler_runcount = 0;
f31a89
+
f31a89
+static void
f31a89
+prepare (void)
f31a89
+{
f31a89
+  /* These atfork handlers are registered while atfork handlers are being
f31a89
+     executed and thus will not be executed during the corresponding
f31a89
+     fork.  */
f31a89
+  TEST_VERIFY_EXIT (pthread_atfork (second_atfork_handler,
f31a89
+                                    second_atfork_handler,
f31a89
+                                    second_atfork_handler) == 0);
f31a89
+
f31a89
+  /* This will de-register the atfork handlers registered by the dlopen'd
f31a89
+     library and so they will not be executed.  */
f31a89
+  if (h != NULL)
f31a89
+    {
f31a89
+      xdlclose (h);
f31a89
+      h = NULL;
f31a89
+    }
f31a89
+
f31a89
+  atfork_handler_runcount++;
f31a89
+}
f31a89
+
f31a89
+static void
f31a89
+after (void)
f31a89
+{
f31a89
+  atfork_handler_runcount++;
f31a89
+}
f31a89
+
f31a89
+static int
f31a89
+do_test (void)
f31a89
+{
f31a89
+  /* Make sure __libc_single_threaded is 0.  */
f31a89
+  pthread_attr_t attr;
f31a89
+  xpthread_attr_init (&attr);
f31a89
+  xpthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED);
f31a89
+  xpthread_create (&attr, thread_func, NULL);
f31a89
+
f31a89
+  void (*reg_atfork_handlers) (void);
f31a89
+
f31a89
+  h = xdlopen ("tst-atfork4mod.so", RTLD_LAZY);
f31a89
+
f31a89
+  reg_atfork_handlers = xdlsym (h, "reg_atfork_handlers");
f31a89
+
f31a89
+  reg_atfork_handlers ();
f31a89
+
f31a89
+  /* We register our atfork handlers *after* loading the module so that our
f31a89
+     prepare handler is called first at fork, where we then dlclose the
f31a89
+     module before its prepare handler has a chance to be called.  */
f31a89
+  TEST_VERIFY_EXIT (pthread_atfork (prepare, after, after) == 0);
f31a89
+
f31a89
+  pid_t pid = xfork ();
f31a89
+
f31a89
+  /* Both the parent and the child processes should observe this.  */
f31a89
+  TEST_VERIFY_EXIT (atfork_handler_runcount == 2);
f31a89
+  TEST_VERIFY_EXIT (second_atfork_handler_runcount == 0);
f31a89
+
f31a89
+  if (pid > 0)
f31a89
+    {
f31a89
+      int childstat;
f31a89
+
f31a89
+      xwaitpid (-1, &childstat, 0);
f31a89
+      TEST_VERIFY_EXIT (WIFEXITED (childstat)
f31a89
+                        && WEXITSTATUS (childstat) == 0);
f31a89
+
f31a89
+      /* This time, the second set of atfork handlers should also be called
f31a89
+         since the handlers are already in place before fork is called.  */
f31a89
+
f31a89
+      pid = xfork ();
f31a89
+
f31a89
+      TEST_VERIFY_EXIT (atfork_handler_runcount == 4);
f31a89
+      TEST_VERIFY_EXIT (second_atfork_handler_runcount == 2);
f31a89
+
f31a89
+      if (pid > 0)
f31a89
+        {
f31a89
+          xwaitpid (-1, &childstat, 0);
f31a89
+          TEST_VERIFY_EXIT (WIFEXITED (childstat)
f31a89
+                            && WEXITSTATUS (childstat) == 0);
f31a89
+        }
f31a89
+    }
f31a89
+
f31a89
+  return 0;
f31a89
+}
f31a89
+
f31a89
+#include <support/test-driver.c>
f31a89
diff --git a/nptl/tst-atfork4mod.c b/nptl/tst-atfork4mod.c
f31a89
new file mode 100644
f31a89
index 0000000000000000..e111efeb185916e0
f31a89
--- /dev/null
f31a89
+++ b/nptl/tst-atfork4mod.c
f31a89
@@ -0,0 +1,48 @@
f31a89
+/* pthread_atfork supports handlers that call pthread_atfork or dlclose.
f31a89
+   Copyright (C) 2022 Free Software Foundation, Inc.
f31a89
+   This file is part of the GNU C Library.
f31a89
+
f31a89
+   The GNU C Library is free software; you can redistribute it and/or
f31a89
+   modify it under the terms of the GNU Lesser General Public
f31a89
+   License as published by the Free Software Foundation; either
f31a89
+   version 2.1 of the License, or (at your option) any later version.
f31a89
+
f31a89
+   The GNU C Library is distributed in the hope that it will be useful,
f31a89
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
f31a89
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
f31a89
+   Lesser General Public License for more details.
f31a89
+
f31a89
+   You should have received a copy of the GNU Lesser General Public
f31a89
+   License along with the GNU C Library; if not, see
f31a89
+   <https://www.gnu.org/licenses/>.  */
f31a89
+
f31a89
+#include <pthread.h>
f31a89
+#include <stdlib.h>
f31a89
+
f31a89
+/* This dynamically loaded library simply registers its atfork handlers when
f31a89
+   asked to.  The atfork handlers should never be executed because the
f31a89
+   library is unloaded before fork is called by the test program.  */
f31a89
+
f31a89
+static void
f31a89
+prepare (void)
f31a89
+{
f31a89
+  abort ();
f31a89
+}
f31a89
+
f31a89
+static void
f31a89
+parent (void)
f31a89
+{
f31a89
+  abort ();
f31a89
+}
f31a89
+
f31a89
+static void
f31a89
+child (void)
f31a89
+{
f31a89
+  abort ();
f31a89
+}
f31a89
+
f31a89
+void
f31a89
+reg_atfork_handlers (void)
f31a89
+{
f31a89
+  pthread_atfork (prepare, parent, child);
f31a89
+}
f31a89
diff --git a/sysdeps/nptl/fork.c b/sysdeps/nptl/fork.c
f31a89
index b4d20fa652f4ba3b..1324b813136764fc 100644
f31a89
--- a/sysdeps/nptl/fork.c
f31a89
+++ b/sysdeps/nptl/fork.c
f31a89
@@ -54,8 +54,9 @@ __libc_fork (void)
f31a89
      signal handlers.  POSIX requires that fork is async-signal-safe,
f31a89
      but our current fork implementation is not.  */
f31a89
   bool multiple_threads = THREAD_GETMEM (THREAD_SELF, header.multiple_threads);
f31a89
+  uint64_t lastrun;
f31a89
 
f31a89
-  __run_fork_handlers (atfork_run_prepare, multiple_threads);
f31a89
+  lastrun = __run_prefork_handlers (multiple_threads);
f31a89
 
f31a89
   /* If we are not running multiple threads, we do not have to
f31a89
      preserve lock state.  If fork runs from a signal handler, only
f31a89
@@ -129,7 +130,7 @@ __libc_fork (void)
f31a89
       __rtld_lock_initialize (GL(dl_load_tls_lock));
f31a89
 
f31a89
       /* Run the handlers registered for the child.  */
f31a89
-      __run_fork_handlers (atfork_run_child, multiple_threads);
f31a89
+      __run_postfork_handlers (atfork_run_child, multiple_threads, lastrun);
f31a89
     }
f31a89
   else
f31a89
     {
f31a89
@@ -144,7 +145,7 @@ __libc_fork (void)
f31a89
 	}
f31a89
 
f31a89
       /* Run the handlers registered for the parent.  */
f31a89
-      __run_fork_handlers (atfork_run_parent, multiple_threads);
f31a89
+      __run_postfork_handlers (atfork_run_parent, multiple_threads, lastrun);
f31a89
     }
f31a89
 
f31a89
   return pid;
f31a89
diff --git a/sysdeps/nptl/fork.h b/sysdeps/nptl/fork.h
f31a89
index bef2b7a8a6af8635..222c4f618970a455 100644
f31a89
--- a/sysdeps/nptl/fork.h
f31a89
+++ b/sysdeps/nptl/fork.h
f31a89
@@ -31,6 +31,7 @@ struct fork_handler
f31a89
   void (*parent_handler) (void);
f31a89
   void (*child_handler) (void);
f31a89
   void *dso_handle;
f31a89
+  uint64_t id;
f31a89
 };
f31a89
 
f31a89
 /* Function to call to unregister fork handlers.  */
f31a89
@@ -44,19 +45,18 @@ enum __run_fork_handler_type
f31a89
   atfork_run_parent
f31a89
 };
f31a89
 
f31a89
-/* Run the atfork handlers and lock/unlock the internal lock depending
f31a89
-   of the WHO argument:
f31a89
+/* Run the atfork prepare handlers in the reverse order of registration and
f31a89
+   return the ID of the last registered handler.  If DO_LOCKING is true, the
f31a89
+   internal lock is held locked upon return.  */
f31a89
+extern uint64_t __run_prefork_handlers (_Bool do_locking) attribute_hidden;
f31a89
 
f31a89
-   - atfork_run_prepare: run all the PREPARE_HANDLER in reverse order of
f31a89
-			 insertion and locks the internal lock.
f31a89
-   - atfork_run_child: run all the CHILD_HANDLER and unlocks the internal
f31a89
-		       lock.
f31a89
-   - atfork_run_parent: run all the PARENT_HANDLER and unlocks the internal
f31a89
-			lock.
f31a89
-
f31a89
-   Perform locking only if DO_LOCKING.  */
f31a89
-extern void __run_fork_handlers (enum __run_fork_handler_type who,
f31a89
-				 _Bool do_locking) attribute_hidden;
f31a89
+/* Given a handler type (parent or child), run all the atfork handlers in
f31a89
+   the order of registration up to and including the handler with id equal
f31a89
+   to LASTRUN.  If DO_LOCKING is true, the internal lock is unlocked prior
f31a89
+   to return.  */
f31a89
+extern void __run_postfork_handlers (enum __run_fork_handler_type who,
f31a89
+                                     _Bool do_locking,
f31a89
+                                     uint64_t lastrun) attribute_hidden;
f31a89
 
f31a89
 /* C library side function to register new fork handlers.  */
f31a89
 extern int __register_atfork (void (*__prepare) (void),