e354a5
commit 0c7b002fac12dcb2f53ba83ee56bb3b5d2439447
e354a5
Author: Szabolcs Nagy <szabolcs.nagy@arm.com>
e354a5
Date:   Tue Jun 9 09:57:28 2020 +0100
e354a5
e354a5
    rtld: Add rtld.nns tunable for the number of supported namespaces
e354a5
    
e354a5
    TLS_STATIC_SURPLUS is 1664 bytes currently which is not enough to
e354a5
    support DL_NNS (== 16) number of dynamic link namespaces, if we
e354a5
    assume 192 bytes of TLS are reserved for libc use and 144 bytes
e354a5
    are reserved for other system libraries that use IE TLS.
e354a5
    
e354a5
    A new tunable is introduced to control the number of supported
e354a5
    namespaces and to adjust the surplus static TLS size as follows:
e354a5
    
e354a5
    surplus_tls = 192 * (rtld.nns-1) + 144 * rtld.nns + 512
e354a5
    
e354a5
    The default is rtld.nns == 4 and then the surplus TLS size is the
e354a5
    same as before, so the behaviour is unchanged by default. If an
e354a5
    application creates more namespaces than the rtld.nns setting
e354a5
    allows, then it is not guaranteed to work, but the limit is not
e354a5
    checked. So existing usage will continue to work, but in the
e354a5
    future if an application creates more than 4 dynamic link
e354a5
    namespaces then the tunable will need to be set.
e354a5
    
e354a5
    In this patch DL_NNS is a fixed value and provides a maximum to
e354a5
    the rtld.nns setting.
e354a5
    
e354a5
    Static linking used fixed 2048 bytes surplus TLS, this is changed
e354a5
    so the same contract is used as for dynamic linking.  With static
e354a5
    linking DL_NNS == 1 so rtld.nns tunable is forced to 1, so by
e354a5
    default the surplus TLS is reduced to 144 + 512 = 656 bytes. This
e354a5
    change is not expected to cause problems.
e354a5
    
e354a5
    Tested on aarch64-linux-gnu and x86_64-linux-gnu.
e354a5
    
e354a5
    Reviewed-by: Carlos O'Donell <carlos@redhat.com>
e354a5
e354a5
Conflicts:
e354a5
	elf/dl-tls.c
e354a5
	  (Different per-namespace TLS reservation defaults before
e354a5
	  this backport.)
e354a5
e354a5
diff --git a/csu/libc-tls.c b/csu/libc-tls.c
e354a5
index 28a79441cde379f7..08ed2b988b58ac6c 100644
e354a5
--- a/csu/libc-tls.c
e354a5
+++ b/csu/libc-tls.c
e354a5
@@ -52,13 +52,16 @@ bool _dl_tls_dtv_gaps;
e354a5
 struct dtv_slotinfo_list *_dl_tls_dtv_slotinfo_list;
e354a5
 /* Number of modules in the static TLS block.  */
e354a5
 size_t _dl_tls_static_nelem;
e354a5
-/* Size of the static TLS block.  Giving this initialized value
e354a5
-   preallocates some surplus bytes in the static TLS area.  */
e354a5
-size_t _dl_tls_static_size = 2048;
e354a5
+/* Size of the static TLS block.  */
e354a5
+size_t _dl_tls_static_size;
e354a5
 /* Size actually allocated in the static TLS block.  */
e354a5
 size_t _dl_tls_static_used;
e354a5
 /* Alignment requirement of the static TLS block.  */
e354a5
 size_t _dl_tls_static_align;
e354a5
+/* Size of surplus space in the static TLS area for dynamically
e354a5
+   loaded modules with IE-model TLS or for TLSDESC optimization.
e354a5
+   See comments in elf/dl-tls.c where it is initialized.  */
e354a5
+size_t _dl_tls_static_surplus;
e354a5
 
e354a5
 /* Generation counter for the dtv.  */
e354a5
 size_t _dl_tls_generation;
e354a5
@@ -87,10 +90,8 @@ init_slotinfo (void)
e354a5
 static void
e354a5
 init_static_tls (size_t memsz, size_t align)
e354a5
 {
e354a5
-  /* That is the size of the TLS memory for this object.  The initialized
e354a5
-     value of _dl_tls_static_size is provided by dl-open.c to request some
e354a5
-     surplus that permits dynamic loading of modules with IE-model TLS.  */
e354a5
-  GL(dl_tls_static_size) = roundup (memsz + GL(dl_tls_static_size),
e354a5
+  /* That is the size of the TLS memory for this object.  */
e354a5
+  GL(dl_tls_static_size) = roundup (memsz + GLRO(dl_tls_static_surplus),
e354a5
 				    TLS_TCB_ALIGN);
e354a5
 #if TLS_TCB_AT_TP
e354a5
   GL(dl_tls_static_size) += TLS_TCB_SIZE;
e354a5
@@ -131,25 +132,24 @@ __libc_setup_tls (void)
e354a5
 	  break;
e354a5
 	}
e354a5
 
e354a5
+  /* Calculate the size of the static TLS surplus.  */
e354a5
+  _dl_tls_static_surplus_init ();
e354a5
+
e354a5
   /* We have to set up the TCB block which also (possibly) contains
e354a5
      'errno'.  Therefore we avoid 'malloc' which might touch 'errno'.
e354a5
      Instead we use 'sbrk' which would only uses 'errno' if it fails.
e354a5
      In this case we are right away out of memory and the user gets
e354a5
-     what she/he deserves.
e354a5
-
e354a5
-     The initialized value of _dl_tls_static_size is provided by dl-open.c
e354a5
-     to request some surplus that permits dynamic loading of modules with
e354a5
-     IE-model TLS.  */
e354a5
+     what she/he deserves.  */
e354a5
 #if TLS_TCB_AT_TP
e354a5
   /* Align the TCB offset to the maximum alignment, as
e354a5
      _dl_allocate_tls_storage (in elf/dl-tls.c) does using __libc_memalign
e354a5
      and dl_tls_static_align.  */
e354a5
-  tcb_offset = roundup (memsz + GL(dl_tls_static_size), max_align);
e354a5
+  tcb_offset = roundup (memsz + GLRO(dl_tls_static_surplus), max_align);
e354a5
   tlsblock = __sbrk (tcb_offset + TLS_INIT_TCB_SIZE + max_align);
e354a5
 #elif TLS_DTV_AT_TP
e354a5
   tcb_offset = roundup (TLS_INIT_TCB_SIZE, align ?: 1);
e354a5
   tlsblock = __sbrk (tcb_offset + memsz + max_align
e354a5
-		     + TLS_PRE_TCB_SIZE + GL(dl_tls_static_size));
e354a5
+		     + TLS_PRE_TCB_SIZE + GLRO(dl_tls_static_surplus));
e354a5
   tlsblock += TLS_PRE_TCB_SIZE;
e354a5
 #else
e354a5
   /* In case a model with a different layout for the TCB and DTV
e354a5
diff --git a/elf/dl-tls.c b/elf/dl-tls.c
e354a5
index a2def280b7096960..ef57a21391bb36fa 100644
e354a5
--- a/elf/dl-tls.c
e354a5
+++ b/elf/dl-tls.c
e354a5
@@ -29,10 +29,54 @@
e354a5
 #include <dl-tls.h>
e354a5
 #include <ldsodefs.h>
e354a5
 
e354a5
-/* Amount of excess space to allocate in the static TLS area
e354a5
-   to allow dynamic loading of modules defining IE-model TLS data.  */
e354a5
-#define TLS_STATIC_SURPLUS	64 + DL_NNS * 100
e354a5
+#define TUNABLE_NAMESPACE rtld
e354a5
+#include <dl-tunables.h>
e354a5
+
e354a5
+/* Surplus static TLS, GLRO(dl_tls_static_surplus), is used for
e354a5
+
e354a5
+   - IE TLS in libc.so for all dlmopen namespaces except in the initial
e354a5
+     one where libc.so is not loaded dynamically but at startup time,
e354a5
+   - IE TLS in other libraries which may be dynamically loaded even in the
e354a5
+     initial namespace,
e354a5
+   - and optionally for optimizing dynamic TLS access.
e354a5
+
e354a5
+   The maximum number of namespaces is DL_NNS, but to support that many
e354a5
+   namespaces correctly the static TLS allocation should be significantly
e354a5
+   increased, which may cause problems with small thread stacks due to the
e354a5
+   way static TLS is accounted (bug 11787).
e354a5
+
e354a5
+   So there is a rtld.nns tunable limit on the number of supported namespaces
e354a5
+   that affects the size of the static TLS and by default it's small enough
e354a5
+   not to cause problems with existing applications. The limit is not
e354a5
+   enforced or checked: it is the user's responsibility to increase rtld.nns
e354a5
+   if more dlmopen namespaces are used.  */
e354a5
+
e354a5
+/* Size of initial-exec TLS in libc.so.  */
e354a5
+#define LIBC_IE_TLS 192
e354a5
+/* Size of initial-exec TLS in libraries other than libc.so.
e354a5
+   This should be large enough to cover runtime libraries of the
e354a5
+   compiler such as libgomp and libraries in libc other than libc.so.  */
e354a5
+#define OTHER_IE_TLS 144
e354a5
+/* Size of additional surplus TLS, placeholder for TLS optimizations.  */
e354a5
+#define OPT_SURPLUS_TLS 512
e354a5
 
e354a5
+void
e354a5
+_dl_tls_static_surplus_init (void)
e354a5
+{
e354a5
+  size_t nns;
e354a5
+
e354a5
+#if HAVE_TUNABLES
e354a5
+  nns = TUNABLE_GET (nns, size_t, NULL);
e354a5
+#else
e354a5
+  /* Default values of the tunables.  */
e354a5
+  nns = 4;
e354a5
+#endif
e354a5
+  if (nns > DL_NNS)
e354a5
+    nns = DL_NNS;
e354a5
+  GLRO(dl_tls_static_surplus) = ((nns - 1) * LIBC_IE_TLS
e354a5
+				 + nns * OTHER_IE_TLS
e354a5
+				 + OPT_SURPLUS_TLS);
e354a5
+}
e354a5
 
e354a5
 /* Out-of-memory handler.  */
e354a5
 static void
e354a5
@@ -218,7 +262,8 @@ _dl_determine_tlsoffset (void)
e354a5
     }
e354a5
 
e354a5
   GL(dl_tls_static_used) = offset;
e354a5
-  GL(dl_tls_static_size) = (roundup (offset + TLS_STATIC_SURPLUS, max_align)
e354a5
+  GL(dl_tls_static_size) = (roundup (offset + GLRO(dl_tls_static_surplus),
e354a5
+				     max_align)
e354a5
 			    + TLS_TCB_SIZE);
e354a5
 #elif TLS_DTV_AT_TP
e354a5
   /* The TLS blocks start right after the TCB.  */
e354a5
@@ -262,7 +307,7 @@ _dl_determine_tlsoffset (void)
e354a5
     }
e354a5
 
e354a5
   GL(dl_tls_static_used) = offset;
e354a5
-  GL(dl_tls_static_size) = roundup (offset + TLS_STATIC_SURPLUS,
e354a5
+  GL(dl_tls_static_size) = roundup (offset + GLRO(dl_tls_static_surplus),
e354a5
 				    TLS_TCB_ALIGN);
e354a5
 #else
e354a5
 # error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
e354a5
diff --git a/elf/dl-tunables.list b/elf/dl-tunables.list
e354a5
index b7cc79f8bfe0a7c6..7337fb85062c91a7 100644
e354a5
--- a/elf/dl-tunables.list
e354a5
+++ b/elf/dl-tunables.list
e354a5
@@ -126,4 +126,13 @@ glibc {
e354a5
       default: 3
e354a5
     }
e354a5
   }
e354a5
+
e354a5
+  rtld {
e354a5
+    nns {
e354a5
+      type: SIZE_T
e354a5
+      minval: 1
e354a5
+      maxval: 16
e354a5
+      default: 4
e354a5
+    }
e354a5
+  }
e354a5
 }
e354a5
diff --git a/elf/rtld.c b/elf/rtld.c
e354a5
index 772aff5160359b7b..a440741f4c1b3c91 100644
e354a5
--- a/elf/rtld.c
e354a5
+++ b/elf/rtld.c
e354a5
@@ -776,6 +776,9 @@ init_tls (void)
e354a5
       }
e354a5
   assert (i == GL(dl_tls_max_dtv_idx));
e354a5
 
e354a5
+  /* Calculate the size of the static TLS surplus.  */
e354a5
+  _dl_tls_static_surplus_init ();
e354a5
+
e354a5
   /* Compute the TLS offsets for the various blocks.  */
e354a5
   _dl_determine_tlsoffset ();
e354a5
 
e354a5
diff --git a/manual/tunables.texi b/manual/tunables.texi
e354a5
index 55d5dfb14db4dfb8..e092b8e81a18d739 100644
e354a5
--- a/manual/tunables.texi
e354a5
+++ b/manual/tunables.texi
e354a5
@@ -31,6 +31,7 @@ their own namespace.
e354a5
 @menu
e354a5
 * Tunable names::  The structure of a tunable name
e354a5
 * Memory Allocation Tunables::  Tunables in the memory allocation subsystem
e354a5
+* Dynamic Linking Tunables:: Tunables in the dynamic linking subsystem
e354a5
 * Elision Tunables::  Tunables in elision subsystem
e354a5
 * Hardware Capability Tunables::  Tunables that modify the hardware
e354a5
 				  capabilities seen by @theglibc{}
e354a5
@@ -225,6 +226,26 @@ pointer, so add 4 on 32-bit systems or 8 on 64-bit systems to the size
e354a5
 passed to @code{malloc} for the largest bin size to enable.
e354a5
 @end deftp
e354a5
 
e354a5
+@node Dynamic Linking Tunables
e354a5
+@section Dynamic Linking Tunables
e354a5
+@cindex dynamic linking tunables
e354a5
+@cindex rtld tunables
e354a5
+
e354a5
+@deftp {Tunable namespace} glibc.rtld
e354a5
+Dynamic linker behavior can be modified by setting the
e354a5
+following tunables in the @code{rtld} namespace:
e354a5
+@end deftp
e354a5
+
e354a5
+@deftp Tunable glibc.rtld.nns
e354a5
+Sets the number of supported dynamic link namespaces (see @code{dlmopen}).
e354a5
+Currently this limit can be set between 1 and 16 inclusive, the default is 4.
e354a5
+Each link namespace consumes some memory in all thread, and thus raising the
e354a5
+limit will increase the amount of memory each thread uses. Raising the limit
e354a5
+is useful when your application uses more than 4 dynamic linker audit modules
e354a5
+e.g. @env{LD_AUDIT}, or will use more than 4 dynamic link namespaces as created
e354a5
+by @code{dlmopen} with an lmid argument of @code{LM_ID_NEWLM}.
e354a5
+@end deftp
e354a5
+
e354a5
 @node Elision Tunables
e354a5
 @section Elision Tunables
e354a5
 @cindex elision tunables
e354a5
diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h
e354a5
index ccec08929e4ad4e7..e54105848c3cb7d1 100644
e354a5
--- a/sysdeps/generic/ldsodefs.h
e354a5
+++ b/sysdeps/generic/ldsodefs.h
e354a5
@@ -582,6 +582,11 @@ struct rtld_global_ro
e354a5
      binaries, don't honor for PIEs).  */
e354a5
   EXTERN ElfW(Addr) _dl_use_load_bias;
e354a5
 
e354a5
+  /* Size of surplus space in the static TLS area for dynamically
e354a5
+     loaded modules with IE-model TLS or for TLSDESC optimization.
e354a5
+     See comments in elf/dl-tls.c where it is initialized.  */
e354a5
+  EXTERN size_t _dl_tls_static_surplus;
e354a5
+
e354a5
   /* Name of the shared object to be profiled (if any).  */
e354a5
   EXTERN const char *_dl_profile;
e354a5
   /* Filename of the output file.  */
e354a5
@@ -1099,6 +1104,9 @@ extern size_t _dl_count_modids (void) attribute_hidden;
e354a5
 /* Calculate offset of the TLS blocks in the static TLS block.  */
e354a5
 extern void _dl_determine_tlsoffset (void) attribute_hidden;
e354a5
 
e354a5
+/* Calculate the size of the static TLS surplus.  */
e354a5
+void _dl_tls_static_surplus_init (void) attribute_hidden;
e354a5
+
e354a5
 #ifndef SHARED
e354a5
 /* Set up the TCB for statically linked applications.  This is called
e354a5
    early during startup because we always use TLS (for errno and the