b1dca6
commit 0c7b002fac12dcb2f53ba83ee56bb3b5d2439447
b1dca6
Author: Szabolcs Nagy <szabolcs.nagy@arm.com>
b1dca6
Date:   Tue Jun 9 09:57:28 2020 +0100
b1dca6
b1dca6
    rtld: Add rtld.nns tunable for the number of supported namespaces
b1dca6
    
b1dca6
    TLS_STATIC_SURPLUS is 1664 bytes currently which is not enough to
b1dca6
    support DL_NNS (== 16) number of dynamic link namespaces, if we
b1dca6
    assume 192 bytes of TLS are reserved for libc use and 144 bytes
b1dca6
    are reserved for other system libraries that use IE TLS.
b1dca6
    
b1dca6
    A new tunable is introduced to control the number of supported
b1dca6
    namespaces and to adjust the surplus static TLS size as follows:
b1dca6
    
b1dca6
    surplus_tls = 192 * (rtld.nns-1) + 144 * rtld.nns + 512
b1dca6
    
b1dca6
    The default is rtld.nns == 4 and then the surplus TLS size is the
b1dca6
    same as before, so the behaviour is unchanged by default. If an
b1dca6
    application creates more namespaces than the rtld.nns setting
b1dca6
    allows, then it is not guaranteed to work, but the limit is not
b1dca6
    checked. So existing usage will continue to work, but in the
b1dca6
    future if an application creates more than 4 dynamic link
b1dca6
    namespaces then the tunable will need to be set.
b1dca6
    
b1dca6
    In this patch DL_NNS is a fixed value and provides a maximum to
b1dca6
    the rtld.nns setting.
b1dca6
    
b1dca6
    Static linking used fixed 2048 bytes surplus TLS, this is changed
b1dca6
    so the same contract is used as for dynamic linking.  With static
b1dca6
    linking DL_NNS == 1 so rtld.nns tunable is forced to 1, so by
b1dca6
    default the surplus TLS is reduced to 144 + 512 = 656 bytes. This
b1dca6
    change is not expected to cause problems.
b1dca6
    
b1dca6
    Tested on aarch64-linux-gnu and x86_64-linux-gnu.
b1dca6
    
b1dca6
    Reviewed-by: Carlos O'Donell <carlos@redhat.com>
b1dca6
b1dca6
Conflicts:
b1dca6
	elf/dl-tls.c
b1dca6
	  (Different per-namespace TLS reservation defaults before
b1dca6
	  this backport.)
b1dca6
b1dca6
diff --git a/csu/libc-tls.c b/csu/libc-tls.c
b1dca6
index 28a79441cde379f7..08ed2b988b58ac6c 100644
b1dca6
--- a/csu/libc-tls.c
b1dca6
+++ b/csu/libc-tls.c
b1dca6
@@ -52,13 +52,16 @@ bool _dl_tls_dtv_gaps;
b1dca6
 struct dtv_slotinfo_list *_dl_tls_dtv_slotinfo_list;
b1dca6
 /* Number of modules in the static TLS block.  */
b1dca6
 size_t _dl_tls_static_nelem;
b1dca6
-/* Size of the static TLS block.  Giving this initialized value
b1dca6
-   preallocates some surplus bytes in the static TLS area.  */
b1dca6
-size_t _dl_tls_static_size = 2048;
b1dca6
+/* Size of the static TLS block.  */
b1dca6
+size_t _dl_tls_static_size;
b1dca6
 /* Size actually allocated in the static TLS block.  */
b1dca6
 size_t _dl_tls_static_used;
b1dca6
 /* Alignment requirement of the static TLS block.  */
b1dca6
 size_t _dl_tls_static_align;
b1dca6
+/* Size of surplus space in the static TLS area for dynamically
b1dca6
+   loaded modules with IE-model TLS or for TLSDESC optimization.
b1dca6
+   See comments in elf/dl-tls.c where it is initialized.  */
b1dca6
+size_t _dl_tls_static_surplus;
b1dca6
 
b1dca6
 /* Generation counter for the dtv.  */
b1dca6
 size_t _dl_tls_generation;
b1dca6
@@ -87,10 +90,8 @@ init_slotinfo (void)
b1dca6
 static void
b1dca6
 init_static_tls (size_t memsz, size_t align)
b1dca6
 {
b1dca6
-  /* That is the size of the TLS memory for this object.  The initialized
b1dca6
-     value of _dl_tls_static_size is provided by dl-open.c to request some
b1dca6
-     surplus that permits dynamic loading of modules with IE-model TLS.  */
b1dca6
-  GL(dl_tls_static_size) = roundup (memsz + GL(dl_tls_static_size),
b1dca6
+  /* That is the size of the TLS memory for this object.  */
b1dca6
+  GL(dl_tls_static_size) = roundup (memsz + GLRO(dl_tls_static_surplus),
b1dca6
 				    TLS_TCB_ALIGN);
b1dca6
 #if TLS_TCB_AT_TP
b1dca6
   GL(dl_tls_static_size) += TLS_TCB_SIZE;
b1dca6
@@ -131,25 +132,24 @@ __libc_setup_tls (void)
b1dca6
 	  break;
b1dca6
 	}
b1dca6
 
b1dca6
+  /* Calculate the size of the static TLS surplus.  */
b1dca6
+  _dl_tls_static_surplus_init ();
b1dca6
+
b1dca6
   /* We have to set up the TCB block which also (possibly) contains
b1dca6
      'errno'.  Therefore we avoid 'malloc' which might touch 'errno'.
b1dca6
      Instead we use 'sbrk' which would only uses 'errno' if it fails.
b1dca6
      In this case we are right away out of memory and the user gets
b1dca6
-     what she/he deserves.
b1dca6
-
b1dca6
-     The initialized value of _dl_tls_static_size is provided by dl-open.c
b1dca6
-     to request some surplus that permits dynamic loading of modules with
b1dca6
-     IE-model TLS.  */
b1dca6
+     what she/he deserves.  */
b1dca6
 #if TLS_TCB_AT_TP
b1dca6
   /* Align the TCB offset to the maximum alignment, as
b1dca6
      _dl_allocate_tls_storage (in elf/dl-tls.c) does using __libc_memalign
b1dca6
      and dl_tls_static_align.  */
b1dca6
-  tcb_offset = roundup (memsz + GL(dl_tls_static_size), max_align);
b1dca6
+  tcb_offset = roundup (memsz + GLRO(dl_tls_static_surplus), max_align);
b1dca6
   tlsblock = __sbrk (tcb_offset + TLS_INIT_TCB_SIZE + max_align);
b1dca6
 #elif TLS_DTV_AT_TP
b1dca6
   tcb_offset = roundup (TLS_INIT_TCB_SIZE, align ?: 1);
b1dca6
   tlsblock = __sbrk (tcb_offset + memsz + max_align
b1dca6
-		     + TLS_PRE_TCB_SIZE + GL(dl_tls_static_size));
b1dca6
+		     + TLS_PRE_TCB_SIZE + GLRO(dl_tls_static_surplus));
b1dca6
   tlsblock += TLS_PRE_TCB_SIZE;
b1dca6
 #else
b1dca6
   /* In case a model with a different layout for the TCB and DTV
b1dca6
diff --git a/elf/dl-tls.c b/elf/dl-tls.c
b1dca6
index a2def280b7096960..ef57a21391bb36fa 100644
b1dca6
--- a/elf/dl-tls.c
b1dca6
+++ b/elf/dl-tls.c
b1dca6
@@ -29,10 +29,54 @@
b1dca6
 #include <dl-tls.h>
b1dca6
 #include <ldsodefs.h>
b1dca6
 
b1dca6
-/* Amount of excess space to allocate in the static TLS area
b1dca6
-   to allow dynamic loading of modules defining IE-model TLS data.  */
b1dca6
-#define TLS_STATIC_SURPLUS	64 + DL_NNS * 100
b1dca6
+#define TUNABLE_NAMESPACE rtld
b1dca6
+#include <dl-tunables.h>
b1dca6
+
b1dca6
+/* Surplus static TLS, GLRO(dl_tls_static_surplus), is used for
b1dca6
+
b1dca6
+   - IE TLS in libc.so for all dlmopen namespaces except in the initial
b1dca6
+     one where libc.so is not loaded dynamically but at startup time,
b1dca6
+   - IE TLS in other libraries which may be dynamically loaded even in the
b1dca6
+     initial namespace,
b1dca6
+   - and optionally for optimizing dynamic TLS access.
b1dca6
+
b1dca6
+   The maximum number of namespaces is DL_NNS, but to support that many
b1dca6
+   namespaces correctly the static TLS allocation should be significantly
b1dca6
+   increased, which may cause problems with small thread stacks due to the
b1dca6
+   way static TLS is accounted (bug 11787).
b1dca6
+
b1dca6
+   So there is a rtld.nns tunable limit on the number of supported namespaces
b1dca6
+   that affects the size of the static TLS and by default it's small enough
b1dca6
+   not to cause problems with existing applications. The limit is not
b1dca6
+   enforced or checked: it is the user's responsibility to increase rtld.nns
b1dca6
+   if more dlmopen namespaces are used.  */
b1dca6
+
b1dca6
+/* Size of initial-exec TLS in libc.so.  */
b1dca6
+#define LIBC_IE_TLS 192
b1dca6
+/* Size of initial-exec TLS in libraries other than libc.so.
b1dca6
+   This should be large enough to cover runtime libraries of the
b1dca6
+   compiler such as libgomp and libraries in libc other than libc.so.  */
b1dca6
+#define OTHER_IE_TLS 144
b1dca6
+/* Size of additional surplus TLS, placeholder for TLS optimizations.  */
b1dca6
+#define OPT_SURPLUS_TLS 512
b1dca6
 
b1dca6
+void
b1dca6
+_dl_tls_static_surplus_init (void)
b1dca6
+{
b1dca6
+  size_t nns;
b1dca6
+
b1dca6
+#if HAVE_TUNABLES
b1dca6
+  nns = TUNABLE_GET (nns, size_t, NULL);
b1dca6
+#else
b1dca6
+  /* Default values of the tunables.  */
b1dca6
+  nns = 4;
b1dca6
+#endif
b1dca6
+  if (nns > DL_NNS)
b1dca6
+    nns = DL_NNS;
b1dca6
+  GLRO(dl_tls_static_surplus) = ((nns - 1) * LIBC_IE_TLS
b1dca6
+				 + nns * OTHER_IE_TLS
b1dca6
+				 + OPT_SURPLUS_TLS);
b1dca6
+}
b1dca6
 
b1dca6
 /* Out-of-memory handler.  */
b1dca6
 static void
b1dca6
@@ -218,7 +262,8 @@ _dl_determine_tlsoffset (void)
b1dca6
     }
b1dca6
 
b1dca6
   GL(dl_tls_static_used) = offset;
b1dca6
-  GL(dl_tls_static_size) = (roundup (offset + TLS_STATIC_SURPLUS, max_align)
b1dca6
+  GL(dl_tls_static_size) = (roundup (offset + GLRO(dl_tls_static_surplus),
b1dca6
+				     max_align)
b1dca6
 			    + TLS_TCB_SIZE);
b1dca6
 #elif TLS_DTV_AT_TP
b1dca6
   /* The TLS blocks start right after the TCB.  */
b1dca6
@@ -262,7 +307,7 @@ _dl_determine_tlsoffset (void)
b1dca6
     }
b1dca6
 
b1dca6
   GL(dl_tls_static_used) = offset;
b1dca6
-  GL(dl_tls_static_size) = roundup (offset + TLS_STATIC_SURPLUS,
b1dca6
+  GL(dl_tls_static_size) = roundup (offset + GLRO(dl_tls_static_surplus),
b1dca6
 				    TLS_TCB_ALIGN);
b1dca6
 #else
b1dca6
 # error "Either TLS_TCB_AT_TP or TLS_DTV_AT_TP must be defined"
b1dca6
diff --git a/elf/dl-tunables.list b/elf/dl-tunables.list
b1dca6
index b7cc79f8bfe0a7c6..7337fb85062c91a7 100644
b1dca6
--- a/elf/dl-tunables.list
b1dca6
+++ b/elf/dl-tunables.list
b1dca6
@@ -126,4 +126,13 @@ glibc {
b1dca6
       default: 3
b1dca6
     }
b1dca6
   }
b1dca6
+
b1dca6
+  rtld {
b1dca6
+    nns {
b1dca6
+      type: SIZE_T
b1dca6
+      minval: 1
b1dca6
+      maxval: 16
b1dca6
+      default: 4
b1dca6
+    }
b1dca6
+  }
b1dca6
 }
b1dca6
diff --git a/elf/rtld.c b/elf/rtld.c
b1dca6
index 772aff5160359b7b..a440741f4c1b3c91 100644
b1dca6
--- a/elf/rtld.c
b1dca6
+++ b/elf/rtld.c
b1dca6
@@ -776,6 +776,9 @@ init_tls (void)
b1dca6
       }
b1dca6
   assert (i == GL(dl_tls_max_dtv_idx));
b1dca6
 
b1dca6
+  /* Calculate the size of the static TLS surplus.  */
b1dca6
+  _dl_tls_static_surplus_init ();
b1dca6
+
b1dca6
   /* Compute the TLS offsets for the various blocks.  */
b1dca6
   _dl_determine_tlsoffset ();
b1dca6
 
b1dca6
diff --git a/manual/tunables.texi b/manual/tunables.texi
b1dca6
index 55d5dfb14db4dfb8..e092b8e81a18d739 100644
b1dca6
--- a/manual/tunables.texi
b1dca6
+++ b/manual/tunables.texi
b1dca6
@@ -31,6 +31,7 @@ their own namespace.
b1dca6
 @menu
b1dca6
 * Tunable names::  The structure of a tunable name
b1dca6
 * Memory Allocation Tunables::  Tunables in the memory allocation subsystem
b1dca6
+* Dynamic Linking Tunables:: Tunables in the dynamic linking subsystem
b1dca6
 * Elision Tunables::  Tunables in elision subsystem
b1dca6
 * Hardware Capability Tunables::  Tunables that modify the hardware
b1dca6
 				  capabilities seen by @theglibc{}
b1dca6
@@ -225,6 +226,26 @@ pointer, so add 4 on 32-bit systems or 8 on 64-bit systems to the size
b1dca6
 passed to @code{malloc} for the largest bin size to enable.
b1dca6
 @end deftp
b1dca6
 
b1dca6
+@node Dynamic Linking Tunables
b1dca6
+@section Dynamic Linking Tunables
b1dca6
+@cindex dynamic linking tunables
b1dca6
+@cindex rtld tunables
b1dca6
+
b1dca6
+@deftp {Tunable namespace} glibc.rtld
b1dca6
+Dynamic linker behavior can be modified by setting the
b1dca6
+following tunables in the @code{rtld} namespace:
b1dca6
+@end deftp
b1dca6
+
b1dca6
+@deftp Tunable glibc.rtld.nns
b1dca6
+Sets the number of supported dynamic link namespaces (see @code{dlmopen}).
b1dca6
+Currently this limit can be set between 1 and 16 inclusive, the default is 4.
b1dca6
+Each link namespace consumes some memory in all thread, and thus raising the
b1dca6
+limit will increase the amount of memory each thread uses. Raising the limit
b1dca6
+is useful when your application uses more than 4 dynamic linker audit modules
b1dca6
+e.g. @env{LD_AUDIT}, or will use more than 4 dynamic link namespaces as created
b1dca6
+by @code{dlmopen} with an lmid argument of @code{LM_ID_NEWLM}.
b1dca6
+@end deftp
b1dca6
+
b1dca6
 @node Elision Tunables
b1dca6
 @section Elision Tunables
b1dca6
 @cindex elision tunables
b1dca6
diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h
b1dca6
index ccec08929e4ad4e7..e54105848c3cb7d1 100644
b1dca6
--- a/sysdeps/generic/ldsodefs.h
b1dca6
+++ b/sysdeps/generic/ldsodefs.h
b1dca6
@@ -582,6 +582,11 @@ struct rtld_global_ro
b1dca6
      binaries, don't honor for PIEs).  */
b1dca6
   EXTERN ElfW(Addr) _dl_use_load_bias;
b1dca6
 
b1dca6
+  /* Size of surplus space in the static TLS area for dynamically
b1dca6
+     loaded modules with IE-model TLS or for TLSDESC optimization.
b1dca6
+     See comments in elf/dl-tls.c where it is initialized.  */
b1dca6
+  EXTERN size_t _dl_tls_static_surplus;
b1dca6
+
b1dca6
   /* Name of the shared object to be profiled (if any).  */
b1dca6
   EXTERN const char *_dl_profile;
b1dca6
   /* Filename of the output file.  */
b1dca6
@@ -1099,6 +1104,9 @@ extern size_t _dl_count_modids (void) attribute_hidden;
b1dca6
 /* Calculate offset of the TLS blocks in the static TLS block.  */
b1dca6
 extern void _dl_determine_tlsoffset (void) attribute_hidden;
b1dca6
 
b1dca6
+/* Calculate the size of the static TLS surplus.  */
b1dca6
+void _dl_tls_static_surplus_init (void) attribute_hidden;
b1dca6
+
b1dca6
 #ifndef SHARED
b1dca6
 /* Set up the TCB for statically linked applications.  This is called
b1dca6
    early during startup because we always use TLS (for errno and the