83f30e
commit 15a0c5730d1d5aeb95f50c9ec7470640084feae8
83f30e
Author: Chung-Lin Tang <cltang@codesourcery.com>
83f30e
Date:   Thu Oct 21 21:41:22 2021 +0800
83f30e
83f30e
    elf: Fix slow DSO sorting behavior in dynamic loader (BZ #17645)
83f30e
    
83f30e
    This second patch contains the actual implementation of a new sorting algorithm
83f30e
    for shared objects in the dynamic loader, which solves the slow behavior that
83f30e
    the current "old" algorithm falls into when the DSO set contains circular
83f30e
    dependencies.
83f30e
    
83f30e
    The new algorithm implemented here is simply depth-first search (DFS) to obtain
83f30e
    the Reverse-Post Order (RPO) sequence, a topological sort. A new l_visited:1
83f30e
    bitfield is added to struct link_map to more elegantly facilitate such a search.
83f30e
    
83f30e
    The DFS algorithm is applied to the input maps[nmap-1] backwards towards
83f30e
    maps[0]. This has the effect of a more "shallow" recursion depth in general
83f30e
    since the input is in BFS. Also, when combined with the natural order of
83f30e
    processing l_initfini[] at each node, this creates a resulting output sorting
83f30e
    closer to the intuitive "left-to-right" order in most cases.
83f30e
    
83f30e
    Another notable implementation adjustment related to this _dl_sort_maps change
83f30e
    is the removing of two char arrays 'used' and 'done' in _dl_close_worker to
83f30e
    represent two per-map attributes. This has been changed to simply use two new
83f30e
    bit-fields l_map_used:1, l_map_done:1 added to struct link_map. This also allows
83f30e
    discarding the clunky 'used' array sorting that _dl_sort_maps had to sometimes
83f30e
    do along the way.
83f30e
    
83f30e
    Tunable support for switching between different sorting algorithms at runtime is
83f30e
    also added. A new tunable 'glibc.rtld.dynamic_sort' with current valid values 1
83f30e
    (old algorithm) and 2 (new DFS algorithm) has been added. At time of commit
83f30e
    of this patch, the default setting is 1 (old algorithm).
83f30e
    
83f30e
    Signed-off-by: Chung-Lin Tang  <cltang@codesourcery.com>
83f30e
    Reviewed-by: Adhemerval Zanella  <adhemerval.zanella@linaro.org>
83f30e
83f30e
Conflicts:
83f30e
	elf/dl-tunables.list
83f30e
	  (No mem.tagging tunable downstream.)
83f30e
83f30e
diff --git a/elf/dl-close.c b/elf/dl-close.c
83f30e
index 74ca9a85dd309780..22225efb3226c3e1 100644
83f30e
--- a/elf/dl-close.c
83f30e
+++ b/elf/dl-close.c
83f30e
@@ -167,8 +167,6 @@ _dl_close_worker (struct link_map *map, bool force)
83f30e
 
83f30e
   bool any_tls = false;
83f30e
   const unsigned int nloaded = ns->_ns_nloaded;
83f30e
-  char used[nloaded];
83f30e
-  char done[nloaded];
83f30e
   struct link_map *maps[nloaded];
83f30e
 
83f30e
   /* Run over the list and assign indexes to the link maps and enter
83f30e
@@ -176,24 +174,21 @@ _dl_close_worker (struct link_map *map, bool force)
83f30e
   int idx = 0;
83f30e
   for (struct link_map *l = ns->_ns_loaded; l != NULL; l = l->l_next)
83f30e
     {
83f30e
+      l->l_map_used = 0;
83f30e
+      l->l_map_done = 0;
83f30e
       l->l_idx = idx;
83f30e
       maps[idx] = l;
83f30e
       ++idx;
83f30e
-
83f30e
     }
83f30e
   assert (idx == nloaded);
83f30e
 
83f30e
-  /* Prepare the bitmaps.  */
83f30e
-  memset (used, '\0', sizeof (used));
83f30e
-  memset (done, '\0', sizeof (done));
83f30e
-
83f30e
   /* Keep track of the lowest index link map we have covered already.  */
83f30e
   int done_index = -1;
83f30e
   while (++done_index < nloaded)
83f30e
     {
83f30e
       struct link_map *l = maps[done_index];
83f30e
 
83f30e
-      if (done[done_index])
83f30e
+      if (l->l_map_done)
83f30e
 	/* Already handled.  */
83f30e
 	continue;
83f30e
 
83f30e
@@ -204,12 +199,12 @@ _dl_close_worker (struct link_map *map, bool force)
83f30e
 	  /* See CONCURRENCY NOTES in cxa_thread_atexit_impl.c to know why
83f30e
 	     acquire is sufficient and correct.  */
83f30e
 	  && atomic_load_acquire (&l->l_tls_dtor_count) == 0
83f30e
-	  && !used[done_index])
83f30e
+	  && !l->l_map_used)
83f30e
 	continue;
83f30e
 
83f30e
       /* We need this object and we handle it now.  */
83f30e
-      done[done_index] = 1;
83f30e
-      used[done_index] = 1;
83f30e
+      l->l_map_used = 1;
83f30e
+      l->l_map_done = 1;
83f30e
       /* Signal the object is still needed.  */
83f30e
       l->l_idx = IDX_STILL_USED;
83f30e
 
83f30e
@@ -225,9 +220,9 @@ _dl_close_worker (struct link_map *map, bool force)
83f30e
 		{
83f30e
 		  assert ((*lp)->l_idx >= 0 && (*lp)->l_idx < nloaded);
83f30e
 
83f30e
-		  if (!used[(*lp)->l_idx])
83f30e
+		  if (!(*lp)->l_map_used)
83f30e
 		    {
83f30e
-		      used[(*lp)->l_idx] = 1;
83f30e
+		      (*lp)->l_map_used = 1;
83f30e
 		      /* If we marked a new object as used, and we've
83f30e
 			 already processed it, then we need to go back
83f30e
 			 and process again from that point forward to
83f30e
@@ -250,9 +245,9 @@ _dl_close_worker (struct link_map *map, bool force)
83f30e
 	      {
83f30e
 		assert (jmap->l_idx >= 0 && jmap->l_idx < nloaded);
83f30e
 
83f30e
-		if (!used[jmap->l_idx])
83f30e
+		if (!jmap->l_map_used)
83f30e
 		  {
83f30e
-		    used[jmap->l_idx] = 1;
83f30e
+		    jmap->l_map_used = 1;
83f30e
 		    if (jmap->l_idx - 1 < done_index)
83f30e
 		      done_index = jmap->l_idx - 1;
83f30e
 		  }
83f30e
@@ -262,8 +257,7 @@ _dl_close_worker (struct link_map *map, bool force)
83f30e
 
83f30e
   /* Sort the entries.  We can skip looking for the binary itself which is
83f30e
      at the front of the search list for the main namespace.  */
83f30e
-  _dl_sort_maps (maps + (nsid == LM_ID_BASE), nloaded - (nsid == LM_ID_BASE),
83f30e
-		 used + (nsid == LM_ID_BASE), true);
83f30e
+  _dl_sort_maps (maps, nloaded, (nsid == LM_ID_BASE), true);
83f30e
 
83f30e
   /* Call all termination functions at once.  */
83f30e
   bool unload_any = false;
83f30e
@@ -277,7 +271,7 @@ _dl_close_worker (struct link_map *map, bool force)
83f30e
       /* All elements must be in the same namespace.  */
83f30e
       assert (imap->l_ns == nsid);
83f30e
 
83f30e
-      if (!used[i])
83f30e
+      if (!imap->l_map_used)
83f30e
 	{
83f30e
 	  assert (imap->l_type == lt_loaded && !imap->l_nodelete_active);
83f30e
 
83f30e
@@ -315,7 +309,7 @@ _dl_close_worker (struct link_map *map, bool force)
83f30e
 	  if (i < first_loaded)
83f30e
 	    first_loaded = i;
83f30e
 	}
83f30e
-      /* Else used[i].  */
83f30e
+      /* Else imap->l_map_used.  */
83f30e
       else if (imap->l_type == lt_loaded)
83f30e
 	{
83f30e
 	  struct r_scope_elem *new_list = NULL;
83f30e
@@ -524,7 +518,7 @@ _dl_close_worker (struct link_map *map, bool force)
83f30e
   for (unsigned int i = first_loaded; i < nloaded; ++i)
83f30e
     {
83f30e
       struct link_map *imap = maps[i];
83f30e
-      if (!used[i])
83f30e
+      if (!imap->l_map_used)
83f30e
 	{
83f30e
 	  assert (imap->l_type == lt_loaded);
83f30e
 
83f30e
diff --git a/elf/dl-deps.c b/elf/dl-deps.c
83f30e
index 007069f670eced95..9365d54c8e03e5f4 100644
83f30e
--- a/elf/dl-deps.c
83f30e
+++ b/elf/dl-deps.c
83f30e
@@ -612,10 +612,9 @@ Filters not supported with LD_TRACE_PRELINKING"));
83f30e
 
83f30e
   /* If libc.so.6 is the main map, it participates in the sort, so
83f30e
      that the relocation order is correct regarding libc.so.6.  */
83f30e
-  if (l_initfini[0] == GL (dl_ns)[l_initfini[0]->l_ns].libc_map)
83f30e
-    _dl_sort_maps (l_initfini, nlist, NULL, false);
83f30e
-  else
83f30e
-    _dl_sort_maps (&l_initfini[1], nlist - 1, NULL, false);
83f30e
+  _dl_sort_maps (l_initfini, nlist,
83f30e
+		 (l_initfini[0] != GL (dl_ns)[l_initfini[0]->l_ns].libc_map),
83f30e
+		 false);
83f30e
 
83f30e
   /* Terminate the list of dependencies.  */
83f30e
   l_initfini[nlist] = NULL;
83f30e
diff --git a/elf/dl-fini.c b/elf/dl-fini.c
83f30e
index eea9d8aad736a99e..e14259a3c8806e0d 100644
83f30e
--- a/elf/dl-fini.c
83f30e
+++ b/elf/dl-fini.c
83f30e
@@ -95,8 +95,7 @@ _dl_fini (void)
83f30e
 	  /* Now we have to do the sorting.  We can skip looking for the
83f30e
 	     binary itself which is at the front of the search list for
83f30e
 	     the main namespace.  */
83f30e
-	  _dl_sort_maps (maps + (ns == LM_ID_BASE), nmaps - (ns == LM_ID_BASE),
83f30e
-			 NULL, true);
83f30e
+	  _dl_sort_maps (maps, nmaps, (ns == LM_ID_BASE), true);
83f30e
 
83f30e
 	  /* We do not rely on the linked list of loaded object anymore
83f30e
 	     from this point on.  We have our own list here (maps).  The
83f30e
diff --git a/elf/dl-sort-maps.c b/elf/dl-sort-maps.c
83f30e
index b2a01ede627be1e9..398a08f28c4d9ff1 100644
83f30e
--- a/elf/dl-sort-maps.c
83f30e
+++ b/elf/dl-sort-maps.c
83f30e
@@ -16,16 +16,24 @@
83f30e
    License along with the GNU C Library; if not, see
83f30e
    <http://www.gnu.org/licenses/>.  */
83f30e
 
83f30e
+#include <assert.h>
83f30e
 #include <ldsodefs.h>
83f30e
+#include <elf/dl-tunables.h>
83f30e
 
83f30e
+/* Note: this is the older, "original" sorting algorithm, being used as
83f30e
+   default up to 2.35.
83f30e
 
83f30e
-/* Sort array MAPS according to dependencies of the contained objects.
83f30e
-   Array USED, if non-NULL, is permutated along MAPS.  If FOR_FINI this is
83f30e
-   called for finishing an object.  */
83f30e
-void
83f30e
-_dl_sort_maps (struct link_map **maps, unsigned int nmaps, char *used,
83f30e
-	       bool for_fini)
83f30e
+   Sort array MAPS according to dependencies of the contained objects.
83f30e
+   If FOR_FINI is true, this is called for finishing an object.  */
83f30e
+static void
83f30e
+_dl_sort_maps_original (struct link_map **maps, unsigned int nmaps,
83f30e
+			unsigned int skip, bool for_fini)
83f30e
 {
83f30e
+  /* Allows caller to do the common optimization of skipping the first map,
83f30e
+     usually the main binary.  */
83f30e
+  maps += skip;
83f30e
+  nmaps -= skip;
83f30e
+
83f30e
   /* A list of one element need not be sorted.  */
83f30e
   if (nmaps <= 1)
83f30e
     return;
83f30e
@@ -66,14 +74,6 @@ _dl_sort_maps (struct link_map **maps, unsigned int nmaps, char *used,
83f30e
 			   (k - i) * sizeof (maps[0]));
83f30e
 		  maps[k] = thisp;
83f30e
 
83f30e
-		  if (used != NULL)
83f30e
-		    {
83f30e
-		      char here_used = used[i];
83f30e
-		      memmove (&used[i], &used[i + 1],
83f30e
-			       (k - i) * sizeof (used[0]));
83f30e
-		      used[k] = here_used;
83f30e
-		    }
83f30e
-
83f30e
 		  if (seen[i + 1] > nmaps - i)
83f30e
 		    {
83f30e
 		      ++i;
83f30e
@@ -120,3 +120,183 @@ _dl_sort_maps (struct link_map **maps, unsigned int nmaps, char *used,
83f30e
     next:;
83f30e
     }
83f30e
 }
83f30e
+
83f30e
+#if !HAVE_TUNABLES
83f30e
+/* In this case, just default to the original algorithm.  */
83f30e
+strong_alias (_dl_sort_maps_original, _dl_sort_maps);
83f30e
+#else
83f30e
+
83f30e
+/* We use a recursive function due to its better clarity and ease of
83f30e
+   implementation, as well as faster execution speed. We already use
83f30e
+   alloca() for list allocation during the breadth-first search of
83f30e
+   dependencies in _dl_map_object_deps(), and this should be on the
83f30e
+   same order of worst-case stack usage.
83f30e
+
83f30e
+   Note: the '*rpo' parameter is supposed to point to one past the
83f30e
+   last element of the array where we save the sort results, and is
83f30e
+   decremented before storing the current map at each level.  */
83f30e
+
83f30e
+static void
83f30e
+dfs_traversal (struct link_map ***rpo, struct link_map *map,
83f30e
+	       bool *do_reldeps)
83f30e
+{
83f30e
+  if (map->l_visited)
83f30e
+    return;
83f30e
+
83f30e
+  map->l_visited = 1;
83f30e
+
83f30e
+  if (map->l_initfini)
83f30e
+    {
83f30e
+      for (int i = 0; map->l_initfini[i] != NULL; i++)
83f30e
+	{
83f30e
+	  struct link_map *dep = map->l_initfini[i];
83f30e
+	  if (dep->l_visited == 0
83f30e
+	      && dep->l_main_map == 0)
83f30e
+	    dfs_traversal (rpo, dep, do_reldeps);
83f30e
+	}
83f30e
+    }
83f30e
+
83f30e
+  if (__glibc_unlikely (do_reldeps != NULL && map->l_reldeps != NULL))
83f30e
+    {
83f30e
+      /* Indicate that we encountered relocation dependencies during
83f30e
+	 traversal.  */
83f30e
+      *do_reldeps = true;
83f30e
+
83f30e
+      for (int m = map->l_reldeps->act - 1; m >= 0; m--)
83f30e
+	{
83f30e
+	  struct link_map *dep = map->l_reldeps->list[m];
83f30e
+	  if (dep->l_visited == 0
83f30e
+	      && dep->l_main_map == 0)
83f30e
+	    dfs_traversal (rpo, dep, do_reldeps);
83f30e
+	}
83f30e
+    }
83f30e
+
83f30e
+  *rpo -= 1;
83f30e
+  **rpo = map;
83f30e
+}
83f30e
+
83f30e
+/* Topologically sort array MAPS according to dependencies of the contained
83f30e
+   objects.  */
83f30e
+
83f30e
+static void
83f30e
+_dl_sort_maps_dfs (struct link_map **maps, unsigned int nmaps,
83f30e
+		   unsigned int skip __attribute__ ((unused)), bool for_fini)
83f30e
+{
83f30e
+  for (int i = nmaps - 1; i >= 0; i--)
83f30e
+    maps[i]->l_visited = 0;
83f30e
+
83f30e
+  /* We apply DFS traversal for each of maps[i] until the whole total order
83f30e
+     is found and we're at the start of the Reverse-Postorder (RPO) sequence,
83f30e
+     which is a topological sort.
83f30e
+
83f30e
+     We go from maps[nmaps - 1] backwards towards maps[0] at this level.
83f30e
+     Due to the breadth-first search (BFS) ordering we receive, going
83f30e
+     backwards usually gives a more shallow depth-first recursion depth,
83f30e
+     adding more stack usage safety. Also, combined with the natural
83f30e
+     processing order of l_initfini[] at each node during DFS, this maintains
83f30e
+     an ordering closer to the original link ordering in the sorting results
83f30e
+     under most simpler cases.
83f30e
+
83f30e
+     Another reason we order the top level backwards, it that maps[0] is
83f30e
+     usually exactly the main object of which we're in the midst of
83f30e
+     _dl_map_object_deps() processing, and maps[0]->l_initfini[] is still
83f30e
+     blank. If we start the traversal from maps[0], since having no
83f30e
+     dependencies yet filled in, maps[0] will always be immediately
83f30e
+     incorrectly placed at the last place in the order (first in reverse).
83f30e
+     Adjusting the order so that maps[0] is last traversed naturally avoids
83f30e
+     this problem.
83f30e
+
83f30e
+     Further, the old "optimization" of skipping the main object at maps[0]
83f30e
+     from the call-site (i.e. _dl_sort_maps(maps+1,nmaps-1)) is in general
83f30e
+     no longer valid, since traversing along object dependency-links
83f30e
+     may "find" the main object even when it is not included in the initial
83f30e
+     order (e.g. a dlopen()'ed shared object can have circular dependencies
83f30e
+     linked back to itself). In such a case, traversing N-1 objects will
83f30e
+     create a N-object result, and raise problems.
83f30e
+
83f30e
+     To summarize, just passing in the full list, and iterating from back
83f30e
+     to front makes things much more straightforward.  */
83f30e
+
83f30e
+  /* Array to hold RPO sorting results, before we copy back to maps[].  */
83f30e
+  struct link_map *rpo[nmaps];
83f30e
+
83f30e
+  /* The 'head' position during each DFS iteration. Note that we start at
83f30e
+     one past the last element due to first-decrement-then-store (see the
83f30e
+     bottom of above dfs_traversal() routine).  */
83f30e
+  struct link_map **rpo_head = &rpo[nmaps];
83f30e
+
83f30e
+  bool do_reldeps = false;
83f30e
+  bool *do_reldeps_ref = (for_fini ? &do_reldeps : NULL);
83f30e
+
83f30e
+  for (int i = nmaps - 1; i >= 0; i--)
83f30e
+    {
83f30e
+      dfs_traversal (&rpo_head, maps[i], do_reldeps_ref);
83f30e
+
83f30e
+      /* We can break early if all objects are already placed.  */
83f30e
+      if (rpo_head == rpo)
83f30e
+	goto end;
83f30e
+    }
83f30e
+  assert (rpo_head == rpo);
83f30e
+
83f30e
+ end:
83f30e
+  /* Here we may do a second pass of sorting, using only l_initfini[]
83f30e
+     static dependency links. This is avoided if !FOR_FINI or if we didn't
83f30e
+     find any reldeps in the first DFS traversal.
83f30e
+
83f30e
+     The reason we do this is: while it is unspecified how circular
83f30e
+     dependencies should be handled, the presumed reasonable behavior is to
83f30e
+     have destructors to respect static dependency links as much as possible,
83f30e
+     overriding reldeps if needed. And the first sorting pass, which takes
83f30e
+     l_initfini/l_reldeps links equally, may not preserve this priority.
83f30e
+
83f30e
+     Hence we do a 2nd sorting pass, taking only DT_NEEDED links into account
83f30e
+     (see how the do_reldeps argument to dfs_traversal() is NULL below).  */
83f30e
+  if (do_reldeps)
83f30e
+    {
83f30e
+      for (int i = nmaps - 1; i >= 0; i--)
83f30e
+	rpo[i]->l_visited = 0;
83f30e
+
83f30e
+      struct link_map **maps_head = &maps[nmaps];
83f30e
+      for (int i = nmaps - 1; i >= 0; i--)
83f30e
+	{
83f30e
+	  dfs_traversal (&maps_head, rpo[i], NULL);
83f30e
+
83f30e
+	  /* We can break early if all objects are already placed.
83f30e
+	     The below memcpy is not needed in the do_reldeps case here,
83f30e
+	     since we wrote back to maps[] during DFS traversal.  */
83f30e
+	  if (maps_head == maps)
83f30e
+	    return;
83f30e
+	}
83f30e
+      assert (maps_head == maps);
83f30e
+      return;
83f30e
+    }
83f30e
+
83f30e
+  memcpy (maps, rpo, sizeof (struct link_map *) * nmaps);
83f30e
+}
83f30e
+
83f30e
+void
83f30e
+_dl_sort_maps_init (void)
83f30e
+{
83f30e
+  int32_t algorithm = TUNABLE_GET (glibc, rtld, dynamic_sort, int32_t, NULL);
83f30e
+  GLRO(dl_dso_sort_algo) = algorithm == 1 ? dso_sort_algorithm_original
83f30e
+					  : dso_sort_algorithm_dfs;
83f30e
+}
83f30e
+
83f30e
+void
83f30e
+_dl_sort_maps (struct link_map **maps, unsigned int nmaps,
83f30e
+	       unsigned int skip, bool for_fini)
83f30e
+{
83f30e
+  /* It can be tempting to use a static function pointer to store and call
83f30e
+     the current selected sorting algorithm routine, but experimentation
83f30e
+     shows that current processors still do not handle indirect branches
83f30e
+     that efficiently, plus a static function pointer will involve
83f30e
+     PTR_MANGLE/DEMANGLE, further impairing performance of small, common
83f30e
+     input cases. A simple if-case with direct function calls appears to
83f30e
+     be the fastest.  */
83f30e
+  if (__glibc_likely (GLRO(dl_dso_sort_algo) == dso_sort_algorithm_original))
83f30e
+    _dl_sort_maps_original (maps, nmaps, skip, for_fini);
83f30e
+  else
83f30e
+    _dl_sort_maps_dfs (maps, nmaps, skip, for_fini);
83f30e
+}
83f30e
+
83f30e
+#endif /* HAVE_TUNABLES.  */
83f30e
diff --git a/elf/dl-support.c b/elf/dl-support.c
83f30e
index e9943e889ef447ad..ae03aec9764e29d3 100644
83f30e
--- a/elf/dl-support.c
83f30e
+++ b/elf/dl-support.c
83f30e
@@ -155,6 +155,8 @@ size_t _dl_phnum;
83f30e
 uint64_t _dl_hwcap __attribute__ ((nocommon));
83f30e
 uint64_t _dl_hwcap2 __attribute__ ((nocommon));
83f30e
 
83f30e
+enum dso_sort_algorithm _dl_dso_sort_algo;
83f30e
+
83f30e
 /* The value of the FPU control word the kernel will preset in hardware.  */
83f30e
 fpu_control_t _dl_fpu_control = _FPU_DEFAULT;
83f30e
 
83f30e
diff --git a/elf/dl-sysdep.c b/elf/dl-sysdep.c
83f30e
index 998c5d52bcab8193..4e8a986541fc4c09 100644
83f30e
--- a/elf/dl-sysdep.c
83f30e
+++ b/elf/dl-sysdep.c
83f30e
@@ -223,6 +223,9 @@ _dl_sysdep_start (void **start_argptr,
83f30e
 
83f30e
   __tunables_init (_environ);
83f30e
 
83f30e
+  /* Initialize DSO sorting algorithm after tunables.  */
83f30e
+  _dl_sort_maps_init ();
83f30e
+
83f30e
 #ifdef DL_SYSDEP_INIT
83f30e
   DL_SYSDEP_INIT;
83f30e
 #endif
83f30e
diff --git a/elf/dl-tunables.list b/elf/dl-tunables.list
83f30e
index 6408a8e5ae92d2c6..54ef2a921310b229 100644
83f30e
--- a/elf/dl-tunables.list
83f30e
+++ b/elf/dl-tunables.list
83f30e
@@ -140,4 +140,13 @@ glibc {
83f30e
       default: 512
83f30e
     }
83f30e
   }
83f30e
+
83f30e
+  rtld {
83f30e
+    dynamic_sort {
83f30e
+      type: INT_32
83f30e
+      minval: 1
83f30e
+      maxval: 2
83f30e
+      default: 1
83f30e
+    }
83f30e
+  }
83f30e
 }
83f30e
diff --git a/elf/dso-sort-tests-1.def b/elf/dso-sort-tests-1.def
83f30e
index 873ddf55d91155c6..5f7f18ef270bc12d 100644
83f30e
--- a/elf/dso-sort-tests-1.def
83f30e
+++ b/elf/dso-sort-tests-1.def
83f30e
@@ -62,5 +62,5 @@ output: b>a>{}
83f30e
 # The below expected outputs are what the two algorithms currently produce
83f30e
 # respectively, for regression testing purposes.
83f30e
 tst-bz15311: {+a;+e;+f;+g;+d;%d;-d;-g;-f;-e;-a};a->b->c->d;d=>[ba];c=>a;b=>e=>a;c=>f=>b;d=>g=>c
83f30e
-xfail_output(glibc.rtld.dynamic_sort=1): {+a[d>c>b>a>];+e[e>];+f[f>];+g[g>];+d[];%d(b(e(a()))a()g(c(a()f(b(e(a()))))));-d[];-g[];-f[];-e[];-a[
83f30e
+output(glibc.rtld.dynamic_sort=1): {+a[d>c>b>a>];+e[e>];+f[f>];+g[g>];+d[];%d(b(e(a()))a()g(c(a()f(b(e(a()))))));-d[];-g[];-f[];-e[];-a[
83f30e
 output(glibc.rtld.dynamic_sort=2): {+a[d>c>b>a>];+e[e>];+f[f>];+g[g>];+d[];%d(b(e(a()))a()g(c(a()f(b(e(a()))))));-d[];-g[];-f[];-e[];-a[
83f30e
diff --git a/elf/rtld.c b/elf/rtld.c
83f30e
index b47e84ca2fb6f03c..cd2cc4024a3581c2 100644
83f30e
--- a/elf/rtld.c
83f30e
+++ b/elf/rtld.c
83f30e
@@ -1453,6 +1453,9 @@ dl_main (const ElfW(Phdr) *phdr,
83f30e
       main_map->l_name = (char *) "";
83f30e
       *user_entry = main_map->l_entry;
83f30e
 
83f30e
+      /* Set bit indicating this is the main program map.  */
83f30e
+      main_map->l_main_map = 1;
83f30e
+
83f30e
 #ifdef HAVE_AUX_VECTOR
83f30e
       /* Adjust the on-stack auxiliary vector so that it looks like the
83f30e
 	 binary was executed directly.  */
83f30e
diff --git a/elf/tst-rtld-list-tunables.exp b/elf/tst-rtld-list-tunables.exp
83f30e
index 4f3f7ee4e30a2b42..118afc271057afd4 100644
83f30e
--- a/elf/tst-rtld-list-tunables.exp
83f30e
+++ b/elf/tst-rtld-list-tunables.exp
83f30e
@@ -10,5 +10,6 @@ glibc.malloc.tcache_max: 0x0 (min: 0x0, max: 0x[f]+)
83f30e
 glibc.malloc.tcache_unsorted_limit: 0x0 (min: 0x0, max: 0x[f]+)
83f30e
 glibc.malloc.top_pad: 0x0 (min: 0x0, max: 0x[f]+)
83f30e
 glibc.malloc.trim_threshold: 0x0 (min: 0x0, max: 0x[f]+)
83f30e
+glibc.rtld.dynamic_sort: 1 (min: 1, max: 2)
83f30e
 glibc.rtld.nns: 0x4 (min: 0x1, max: 0x10)
83f30e
 glibc.rtld.optional_static_tls: 0x200 (min: 0x0, max: 0x[f]+)
83f30e
diff --git a/include/link.h b/include/link.h
83f30e
index dd491989beb41353..041ff5f753a9ee11 100644
83f30e
--- a/include/link.h
83f30e
+++ b/include/link.h
83f30e
@@ -181,6 +181,11 @@ struct link_map
83f30e
     unsigned int l_init_called:1; /* Nonzero if DT_INIT function called.  */
83f30e
     unsigned int l_global:1;	/* Nonzero if object in _dl_global_scope.  */
83f30e
     unsigned int l_reserved:2;	/* Reserved for internal use.  */
83f30e
+    unsigned int l_main_map:1;  /* Nonzero for the map of the main program.  */
83f30e
+    unsigned int l_visited:1;   /* Used internally for map dependency
83f30e
+				   graph traversal.  */
83f30e
+    unsigned int l_map_used:1;  /* These two bits are used during traversal */
83f30e
+    unsigned int l_map_done:1;  /* of maps in _dl_close_worker. */
83f30e
     unsigned int l_phdr_allocated:1; /* Nonzero if the data structure pointed
83f30e
 					to by `l_phdr' is allocated.  */
83f30e
     unsigned int l_soname_added:1; /* Nonzero if the SONAME is for sure in
83f30e
diff --git a/manual/tunables.texi b/manual/tunables.texi
83f30e
index 43272cf885d1e3e6..c3f96cdc85208926 100644
83f30e
--- a/manual/tunables.texi
83f30e
+++ b/manual/tunables.texi
83f30e
@@ -303,6 +303,17 @@ changed once allocated at process startup.  The default allocation of
83f30e
 optional static TLS is 512 bytes and is allocated in every thread.
83f30e
 @end deftp
83f30e
 
83f30e
+@deftp Tunable glibc.rtld.dynamic_sort
83f30e
+Sets the algorithm to use for DSO sorting, valid values are @samp{1} and
83f30e
+@samp{2}.  For value of @samp{1}, an older O(n^3) algorithm is used, which is
83f30e
+long time tested, but may have performance issues when dependencies between
83f30e
+shared objects contain cycles due to circular dependencies.  When set to the
83f30e
+value of @samp{2}, a different algorithm is used, which implements a
83f30e
+topological sort through depth-first search, and does not exhibit the
83f30e
+performance issues of @samp{1}.
83f30e
+
83f30e
+The default value of this tunable is @samp{1}.
83f30e
+@end deftp
83f30e
 
83f30e
 @node Elision Tunables
83f30e
 @section Elision Tunables
83f30e
diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h
83f30e
index 5e56550a4d556fa7..9f09a4a280396659 100644
83f30e
--- a/sysdeps/generic/ldsodefs.h
83f30e
+++ b/sysdeps/generic/ldsodefs.h
83f30e
@@ -240,6 +240,13 @@ enum allowmask
83f30e
   };
83f30e
 
83f30e
 
83f30e
+/* DSO sort algorithm to use (check dl-sort-maps.c).  */
83f30e
+enum dso_sort_algorithm
83f30e
+  {
83f30e
+    dso_sort_algorithm_original,
83f30e
+    dso_sort_algorithm_dfs
83f30e
+  };
83f30e
+
83f30e
 struct audit_ifaces
83f30e
 {
83f30e
   void (*activity) (uintptr_t *, unsigned int);
83f30e
@@ -633,6 +640,8 @@ struct rtld_global_ro
83f30e
      platforms.  */
83f30e
   EXTERN uint64_t _dl_hwcap2;
83f30e
 
83f30e
+  EXTERN enum dso_sort_algorithm _dl_dso_sort_algo;
83f30e
+
83f30e
 #ifdef SHARED
83f30e
   /* We add a function table to _rtld_global which is then used to
83f30e
      call the function instead of going through the PLT.  The result
83f30e
@@ -1049,7 +1058,7 @@ extern void _dl_fini (void) attribute_hidden;
83f30e
 
83f30e
 /* Sort array MAPS according to dependencies of the contained objects.  */
83f30e
 extern void _dl_sort_maps (struct link_map **maps, unsigned int nmaps,
83f30e
-			   char *used, bool for_fini) attribute_hidden;
83f30e
+			   unsigned int skip, bool for_fini) attribute_hidden;
83f30e
 
83f30e
 /* The dynamic linker calls this function before and having changing
83f30e
    any shared object mappings.  The `r_state' member of `struct r_debug'
83f30e
@@ -1167,6 +1176,9 @@ extern struct link_map * _dl_get_dl_main_map (void)
83f30e
 # endif
83f30e
 #endif
83f30e
 
83f30e
+/* Initialize the DSO sort algorithm to use.  */
83f30e
+extern void _dl_sort_maps_init (void) attribute_hidden;
83f30e
+
83f30e
 /* Initialization of libpthread for statically linked applications.
83f30e
    If libpthread is not linked in, this is an empty function.  */
83f30e
 void __pthread_initialize_minimal (void) weak_function;