446cf2
commit 0c7b002fac12dcb2f53ba83ee56bb3b5d2439447
446cf2
Author: Szabolcs Nagy <szabolcs.nagy@arm.com>
446cf2
Date:   Tue Jun 9 09:57:28 2020 +0100
446cf2
446cf2
    rtld: Add rtld.nns tunable for the number of supported namespaces
446cf2
    
446cf2
    TLS_STATIC_SURPLUS is 1664 bytes currently which is not enough to
446cf2
    support DL_NNS (== 16) number of dynamic link namespaces, if we
446cf2
    assume 192 bytes of TLS are reserved for libc use and 144 bytes
446cf2
    are reserved for other system libraries that use IE TLS.
446cf2
    
446cf2
    A new tunable is introduced to control the number of supported
446cf2
    namespaces and to adjust the surplus static TLS size as follows:
446cf2
    
446cf2
    surplus_tls = 192 * (rtld.nns-1) + 144 * rtld.nns + 512
446cf2
    
446cf2
    The default is rtld.nns == 4 and then the surplus TLS size is the
446cf2
    same as before, so the behaviour is unchanged by default. If an
446cf2
    application creates more namespaces than the rtld.nns setting
446cf2
    allows, then it is not guaranteed to work, but the limit is not
446cf2
    checked. So existing usage will continue to work, but in the
446cf2
    future if an application creates more than 4 dynamic link
446cf2
    namespaces then the tunable will need to be set.
446cf2
    
446cf2
    In this patch DL_NNS is a fixed value and provides a maximum to
446cf2
    the rtld.nns setting.
446cf2
    
446cf2
    Static linking used fixed 2048 bytes surplus TLS, this is changed
446cf2
    so the same contract is used as for dynamic linking.  With static
446cf2
    linking DL_NNS == 1 so rtld.nns tunable is forced to 1, so by
446cf2
    default the surplus TLS is reduced to 144 + 512 = 656 bytes. This
446cf2
    change is not expected to cause problems.
446cf2
    
446cf2
    Tested on aarch64-linux-gnu and x86_64-linux-gnu.
446cf2
    
446cf2
    Reviewed-by: Carlos O'Donell <carlos@redhat.com>
446cf2
446cf2
Conflicts:
446cf2
	elf/dl-tls.c
446cf2
	  (Different per-namespace TLS reservation defaults before
446cf2
	  this backport.)
446cf2
446cf2
diff --git a/csu/libc-tls.c b/csu/libc-tls.c
446cf2
index 28a79441cde379f7..08ed2b988b58ac6c 100644
446cf2
--- a/csu/libc-tls.c
446cf2
+++ b/csu/libc-tls.c
446cf2
@@ -52,13 +52,16 @@ bool _dl_tls_dtv_gaps;
446cf2
 struct dtv_slotinfo_list *_dl_tls_dtv_slotinfo_list;
446cf2
 /* Number of modules in the static TLS block.  */
446cf2
 size_t _dl_tls_static_nelem;
446cf2
-/* Size of the static TLS block.  Giving this initialized value
446cf2
-   preallocates some surplus bytes in the static TLS area.  */
446cf2
-size_t _dl_tls_static_size = 2048;
446cf2
+/* Size of the static TLS block.  */
446cf2
+size_t _dl_tls_static_size;
446cf2
 /* Size actually allocated in the static TLS block.  */
446cf2
 size_t _dl_tls_static_used;
446cf2
 /* Alignment requirement of the static TLS block.  */
446cf2
 size_t _dl_tls_static_align;
446cf2
+/* Size of surplus space in the static TLS area for dynamically
446cf2
+   loaded modules with IE-model TLS or for TLSDESC optimization.
446cf2
+   See comments in elf/dl-tls.c where it is initialized.  */
446cf2
+size_t _dl_tls_static_surplus;
446cf2
 
446cf2
 /* Generation counter for the dtv.  */
446cf2
 size_t _dl_tls_generation;
446cf2
@@ -87,10 +90,8 @@ init_slotinfo (void)
446cf2
 static void
446cf2
 init_static_tls (size_t memsz, size_t align)
446cf2
 {
446cf2
-  /* That is the size of the TLS memory for this object.  The initialized
446cf2
-     value of _dl_tls_static_size is provided by dl-open.c to request some
446cf2
-     surplus that permits dynamic loading of modules with IE-model TLS.  */
446cf2
-  GL(dl_tls_static_size) = roundup (memsz + GL(dl_tls_static_size),
446cf2
+  /* That is the size of the TLS memory for this object.  */
446cf2
+  GL(dl_tls_static_size) = roundup (memsz + GLRO(dl_tls_static_surplus),
446cf2
 				    TLS_TCB_ALIGN);
446cf2
 #if TLS_TCB_AT_TP
446cf2
   GL(dl_tls_static_size) += TLS_TCB_SIZE;
446cf2
@@ -131,25 +132,24 @@ __libc_setup_tls (void)
446cf2
 	  break;
446cf2
 	}
446cf2
 
446cf2
+  /* Calculate the size of the static TLS surplus.  */
446cf2
+  _dl_tls_static_surplus_init ();
446cf2
+
446cf2
   /* We have to set up the TCB block which also (possibly) contains
446cf2
      'errno'.  Therefore we avoid 'malloc' which might touch 'errno'.
446cf2
      Instead we use 'sbrk' which would only uses 'errno' if it fails.
446cf2
      In this case we are right away out of memory and the user gets
446cf2
-     what she/he deserves.
446cf2
-
446cf2
-     The initialized value of _dl_tls_static_size is provided by dl-open.c
446cf2
-     to request some surplus that permits dynamic loading of modules with
446cf2
-     IE-model TLS.  */
446cf2
+     what she/he deserves.  */
446cf2
 #if TLS_TCB_AT_TP
446cf2
   /* Align the TCB offset to the maximum alignment, as
446cf2
      _dl_allocate_tls_storage (in elf/dl-tls.c) does using __libc_memalign
446cf2
      and dl_tls_static_align.  */
446cf2
-  tcb_offset = roundup (memsz + GL(dl_tls_static_size), max_align);
446cf2
+  tcb_offset = roundup (memsz + GLRO(dl_tls_static_surplus), max_align);
446cf2
   tlsblock = __sbrk (tcb_offset + TLS_INIT_TCB_SIZE + max_align);
446cf2
 #elif TLS_DTV_AT_TP
446cf2
   tcb_offset = roundup (TLS_INIT_TCB_SIZE, align ?: 1);
446cf2
   tlsblock = __sbrk (tcb_offset + memsz + max_align
446cf2
-		     + TLS_PRE_TCB_SIZE + GL(dl_tls_static_size));
446cf2
+		     + TLS_PRE_TCB_SIZE + GLRO(dl_tls_static_surplus));
446cf2
   tlsblock += TLS_PRE_TCB_SIZE;
446cf2
 #else
446cf2
   /* In case a model with a different layout for the TCB and DTV
446cf2
diff --git a/elf/dl-tls.c b/elf/dl-tls.c
446cf2
index a2def280b7096960..ef57a21391bb36fa 100644
446cf2
--- a/elf/dl-tls.c
446cf2
+++ b/elf/dl-tls.c
446cf2
@@ -29,10 +29,54 @@
446cf2
 #include <dl-tls.h>
446cf2
 #include <ldsodefs.h>
446cf2
 
446cf2
-/* Amount of excess space to allocate in the static TLS area
446cf2
-   to allow dynamic loading of modules defining IE-model TLS data.  */
446cf2
-#define TLS_STATIC_SURPLUS	64 + DL_NNS * 100
446cf2
+#define TUNABLE_NAMESPACE rtld
446cf2
+#include <dl-tunables.h>
446cf2
+
446cf2
+/* Surplus static TLS, GLRO(dl_tls_static_surplus), is used for
446cf2
+
446cf2
+   - IE TLS in libc.so for all dlmopen namespaces except in the initial
446cf2
+     one where libc.so is not loaded dynamically but at startup time,
446cf2
+   - IE TLS in other libraries which may be dynamically loaded even in the
446cf2
+     initial namespace,
446cf2
+   - and optionally for optimizing dynamic TLS access.
446cf2
+
446cf2
+   The maximum number of namespaces is DL_NNS, but to support that many
446cf2
+   namespaces correctly the static TLS allocation should be significantly
446cf2
+   increased, which may cause problems with small thread stacks due to the
446cf2
+   way static TLS is accounted (bug 11787).
446cf2
+
446cf2
+   So there is a rtld.nns tunable limit on the number of supported namespaces
446cf2
+   that affects the size of the static TLS and by default it's small enough
446cf2
+   not to cause problems with existing applications. The limit is not
446cf2
+   enforced or checked: it is the user's responsibility to increase rtld.nns
446cf2
+   if more dlmopen namespaces are used.  */
446cf2
+
446cf2
+/* Size of initial-exec TLS in libc.so.  */
446cf2
+#define LIBC_IE_TLS 192
446cf2
+/* Size of initial-exec TLS in libraries other than libc.so.
446cf2
+   This should be large enough to cover runtime libraries of the
446cf2
+   compiler such as libgomp and libraries in libc other than libc.so.  */
446cf2
+#define OTHER_IE_TLS 144
446cf2
+/* Size of additional surplus TLS, placeholder for TLS optimizations.  */
446cf2
+#define OPT_SURPLUS_TLS 512
446cf2
 
446cf2
+void
446cf2
+_dl_tls_static_surplus_init (void)
446cf2
+{
446cf2
+  size_t nns;
446cf2
+
446cf2
+#if HAVE_TUNABLES
446cf2
+  nns = TUNABLE_GET (nns, size_t, NULL);
446cf2
+#else
446cf2
+  /* Default values of the tunables.  */
446cf2
+  nns = 4;
446cf2
+#endif
446cf2
+  if (nns > DL_NNS)
446cf2
+    nns = DL_NNS;
446cf2
+  GLRO(dl_tls_static_surplus) = ((nns - 1) * LIBC_IE_TLS
446cf2
+				 + nns * OTHER_IE_TLS
446cf2
+				 + OPT_SURPLUS_TLS);
446cf2
+}
446cf2
 
446cf2
 /* Out-of-memory handler.  */
446cf2
 static void
446cf2
@@ -218,7 +262,8 @@ _dl_determine_tlsoffset (void)
446cf2
     }
446cf2
 
446cf2
   GL(dl_tls_static_used) = offset;
446cf2
-  GL(dl_tls_static_size) = (roundup (offset + TLS_STATIC_SURPLUS, max_align)
446cf2
+  GL(dl_tls_static_size) = (roundup (offset + GLRO(dl_tls_static_surplus),
446cf2
+				     max_align)
446cf2
 			    + TLS_TCB_SIZE);
446cf2
 #elif TLS_DTV_AT_TP
446cf2
   /* The TLS blocks start right after the TCB.  */
446cf2
@@ -262,7 +307,7 @@ _dl_determine_tlsoffset (void)
446cf2
     }
446cf2
 
446cf2
   GL(dl_tls_static_used) = offset;
446cf2
-  GL(dl_tls_static_size) = roundup (offset + TLS_STATIC_SURPLUS,
446cf2
+  GL(dl_tls_static_size) = roundup (offset + GLRO(dl_tls_static_surplus),
446cf2
 				    TLS_TCB_ALIGN);
446cf2
 #else
446cf2
 # error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
446cf2
diff --git a/elf/dl-tunables.list b/elf/dl-tunables.list
446cf2
index b7cc79f8bfe0a7c6..7337fb85062c91a7 100644
446cf2
--- a/elf/dl-tunables.list
446cf2
+++ b/elf/dl-tunables.list
446cf2
@@ -126,4 +126,13 @@ glibc {
446cf2
       default: 3
446cf2
     }
446cf2
   }
446cf2
+
446cf2
+  rtld {
446cf2
+    nns {
446cf2
+      type: SIZE_T
446cf2
+      minval: 1
446cf2
+      maxval: 16
446cf2
+      default: 4
446cf2
+    }
446cf2
+  }
446cf2
 }
446cf2
diff --git a/elf/rtld.c b/elf/rtld.c
446cf2
index 772aff5160359b7b..a440741f4c1b3c91 100644
446cf2
--- a/elf/rtld.c
446cf2
+++ b/elf/rtld.c
446cf2
@@ -776,6 +776,9 @@ init_tls (void)
446cf2
       }
446cf2
   assert (i == GL(dl_tls_max_dtv_idx));
446cf2
 
446cf2
+  /* Calculate the size of the static TLS surplus.  */
446cf2
+  _dl_tls_static_surplus_init ();
446cf2
+
446cf2
   /* Compute the TLS offsets for the various blocks.  */
446cf2
   _dl_determine_tlsoffset ();
446cf2
 
446cf2
diff --git a/manual/tunables.texi b/manual/tunables.texi
446cf2
index 55d5dfb14db4dfb8..e092b8e81a18d739 100644
446cf2
--- a/manual/tunables.texi
446cf2
+++ b/manual/tunables.texi
446cf2
@@ -31,6 +31,7 @@ their own namespace.
446cf2
 @menu
446cf2
 * Tunable names::  The structure of a tunable name
446cf2
 * Memory Allocation Tunables::  Tunables in the memory allocation subsystem
446cf2
+* Dynamic Linking Tunables:: Tunables in the dynamic linking subsystem
446cf2
 * Elision Tunables::  Tunables in elision subsystem
446cf2
 * Hardware Capability Tunables::  Tunables that modify the hardware
446cf2
 				  capabilities seen by @theglibc{}
446cf2
@@ -225,6 +226,26 @@ pointer, so add 4 on 32-bit systems or 8 on 64-bit systems to the size
446cf2
 passed to @code{malloc} for the largest bin size to enable.
446cf2
 @end deftp
446cf2
 
446cf2
+@node Dynamic Linking Tunables
446cf2
+@section Dynamic Linking Tunables
446cf2
+@cindex dynamic linking tunables
446cf2
+@cindex rtld tunables
446cf2
+
446cf2
+@deftp {Tunable namespace} glibc.rtld
446cf2
+Dynamic linker behavior can be modified by setting the
446cf2
+following tunables in the @code{rtld} namespace:
446cf2
+@end deftp
446cf2
+
446cf2
+@deftp Tunable glibc.rtld.nns
446cf2
+Sets the number of supported dynamic link namespaces (see @code{dlmopen}).
446cf2
+Currently this limit can be set between 1 and 16 inclusive, the default is 4.
446cf2
+Each link namespace consumes some memory in all thread, and thus raising the
446cf2
+limit will increase the amount of memory each thread uses. Raising the limit
446cf2
+is useful when your application uses more than 4 dynamic linker audit modules
446cf2
+e.g. @env{LD_AUDIT}, or will use more than 4 dynamic link namespaces as created
446cf2
+by @code{dlmopen} with an lmid argument of @code{LM_ID_NEWLM}.
446cf2
+@end deftp
446cf2
+
446cf2
 @node Elision Tunables
446cf2
 @section Elision Tunables
446cf2
 @cindex elision tunables
446cf2
diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h
446cf2
index ccec08929e4ad4e7..e54105848c3cb7d1 100644
446cf2
--- a/sysdeps/generic/ldsodefs.h
446cf2
+++ b/sysdeps/generic/ldsodefs.h
446cf2
@@ -582,6 +582,11 @@ struct rtld_global_ro
446cf2
      binaries, don't honor for PIEs).  */
446cf2
   EXTERN ElfW(Addr) _dl_use_load_bias;
446cf2
 
446cf2
+  /* Size of surplus space in the static TLS area for dynamically
446cf2
+     loaded modules with IE-model TLS or for TLSDESC optimization.
446cf2
+     See comments in elf/dl-tls.c where it is initialized.  */
446cf2
+  EXTERN size_t _dl_tls_static_surplus;
446cf2
+
446cf2
   /* Name of the shared object to be profiled (if any).  */
446cf2
   EXTERN const char *_dl_profile;
446cf2
   /* Filename of the output file.  */
446cf2
@@ -1099,6 +1104,9 @@ extern size_t _dl_count_modids (void) attribute_hidden;
446cf2
 /* Calculate offset of the TLS blocks in the static TLS block.  */
446cf2
 extern void _dl_determine_tlsoffset (void) attribute_hidden;
446cf2
 
446cf2
+/* Calculate the size of the static TLS surplus.  */
446cf2
+void _dl_tls_static_surplus_init (void) attribute_hidden;
446cf2
+
446cf2
 #ifndef SHARED
446cf2
 /* Set up the TCB for statically linked applications.  This is called
446cf2
    early during startup because we always use TLS (for errno and the