olga / rpms / glibc

Forked from rpms/glibc 5 years ago
Clone
00db10
commit e5d262effe3a87164308a3f37e61b32d0348692a
00db10
Author: Tulio Magno Quites Machado Filho <tuliom@linux.ibm.com>
00db10
Date:   Fri Nov 30 18:05:32 2018 -0200
00db10
00db10
    Fix _dl_profile_fixup data-dependency issue (Bug 23690)
00db10
    
00db10
    There is a data-dependency between the fields of struct l_reloc_result
00db10
    and the field used as the initialization guard. Users of the guard
00db10
    expect writes to the structure to be observable when they also observe
00db10
    the guard initialized. The solution for this problem is to use an acquire
00db10
    and release load and store to ensure previous writes to the structure are
00db10
    observable if the guard is initialized.
00db10
    
00db10
    The previous implementation used DL_FIXUP_VALUE_ADDR (l_reloc_result->addr)
00db10
    as the initialization guard, making it impossible for some architectures
00db10
    to load and store it atomically, i.e. hppa and ia64, due to its larger size.
00db10
    
00db10
    This commit adds an unsigned int to l_reloc_result to be used as the new
00db10
    initialization guard of the struct, making it possible to load and store
00db10
    it atomically in all architectures. The fix ensures that the values
00db10
    observed in l_reloc_result are consistent and do not lead to crashes.
00db10
    The algorithm is documented in the code in elf/dl-runtime.c
00db10
    (_dl_profile_fixup). Not all data races have been eliminated.
00db10
    
00db10
    Tested with build-many-glibcs and on powerpc, powerpc64, and powerpc64le.
00db10
    
00db10
            [BZ #23690]
00db10
            * elf/dl-runtime.c (_dl_profile_fixup): Guarantee memory
00db10
            modification order when accessing reloc_result->addr.
00db10
            * include/link.h (reloc_result): Add field init.
00db10
            * nptl/Makefile (tests): Add tst-audit-threads.
00db10
            (modules-names): Add tst-audit-threads-mod1 and
00db10
            tst-audit-threads-mod2.
00db10
            Add rules to build tst-audit-threads.
00db10
            * nptl/tst-audit-threads-mod1.c: New file.
00db10
            * nptl/tst-audit-threads-mod2.c: Likewise.
00db10
            * nptl/tst-audit-threads.c: Likewise.
00db10
            * nptl/tst-audit-threads.h: Likewise.
00db10
    
00db10
    Signed-off-by: Tulio Magno Quites Machado Filho <tuliom@linux.ibm.com>
00db10
    Reviewed-by: Carlos O'Donell <carlos@redhat.com>
00db10
00db10
(elf/dl-runtime.c adjusted here for lack of __builtin_expect cleanup,
00db10
nptl/Makefile for the usual test-related conflicts.)
00db10
00db10
diff --git a/elf/dl-runtime.c b/elf/dl-runtime.c
00db10
index a42e3c4924e067ba..3678a98c98d726f3 100644
00db10
--- a/elf/dl-runtime.c
00db10
+++ b/elf/dl-runtime.c
00db10
@@ -183,10 +183,36 @@ _dl_profile_fixup (
00db10
   /* This is the address in the array where we store the result of previous
00db10
      relocations.  */
00db10
   struct reloc_result *reloc_result = &l->l_reloc_result[reloc_index];
00db10
-  DL_FIXUP_VALUE_TYPE *resultp = &reloc_result->addr;
00db10
 
00db10
-  DL_FIXUP_VALUE_TYPE value = *resultp;
00db10
-  if (DL_FIXUP_VALUE_CODE_ADDR (value) == 0)
00db10
+ /* CONCURRENCY NOTES:
00db10
+
00db10
+  Multiple threads may be calling the same PLT sequence and with
00db10
+  LD_AUDIT enabled they will be calling into _dl_profile_fixup to
00db10
+  update the reloc_result with the result of the lazy resolution.
00db10
+  The reloc_result guard variable is reloc_init, and we use
00db10
+  acquire/release loads and store to it to ensure that the results of
00db10
+  the structure are consistent with the loaded value of the guard.
00db10
+  This does not fix all of the data races that occur when two or more
00db10
+  threads read reloc_result->reloc_init with a value of zero and read
00db10
+  and write to that reloc_result concurrently.  The expectation is
00db10
+  generally that while this is a data race it works because the
00db10
+  threads write the same values.  Until the data races are fixed
00db10
+  there is a potential for problems to arise from these data races.
00db10
+  The reloc result updates should happen in parallel but there should
00db10
+  be an atomic RMW which does the final update to the real result
00db10
+  entry (see bug 23790).
00db10
+
00db10
+  The following code uses reloc_result->init set to 0 to indicate if it is
00db10
+  the first time this object is being relocated, otherwise 1 which
00db10
+  indicates the object has already been relocated.
00db10
+
00db10
+  Reading/Writing from/to reloc_result->reloc_init must not happen
00db10
+  before previous writes to reloc_result complete as they could
00db10
+  end-up with an incomplete struct.  */
00db10
+  DL_FIXUP_VALUE_TYPE value;
00db10
+  unsigned int init = atomic_load_acquire (&reloc_result->init);
00db10
+
00db10
+  if (init == 0)
00db10
     {
00db10
       /* This is the first time we have to relocate this object.  */
00db10
       const ElfW(Sym) *const symtab
00db10
@@ -347,20 +373,32 @@ _dl_profile_fixup (
00db10
 #endif
00db10
 
00db10
       /* Store the result for later runs.  */
00db10
-      if (__builtin_expect (! GLRO(dl_bind_not), 1))
00db10
-	*resultp = value;
00db10
+      if (__glibc_likely (! GLRO(dl_bind_not)))
00db10
+	{
00db10
+	  reloc_result->addr = value;
00db10
+	  /* Guarantee all previous writes complete before
00db10
+	     init is updated.  See CONCURRENCY NOTES earlier  */
00db10
+	  atomic_store_release (&reloc_result->init, 1);
00db10
+	}
00db10
+      init = 1;
00db10
     }
00db10
+  else
00db10
+    value = reloc_result->addr;
00db10
 
00db10
   /* By default we do not call the pltexit function.  */
00db10
   long int framesize = -1;
00db10
 
00db10
+
00db10
 #ifdef SHARED
00db10
   /* Auditing checkpoint: report the PLT entering and allow the
00db10
      auditors to change the value.  */
00db10
-  if (DL_FIXUP_VALUE_CODE_ADDR (value) != 0 && GLRO(dl_naudit) > 0
00db10
+  if (GLRO(dl_naudit) > 0
00db10
       /* Don't do anything if no auditor wants to intercept this call.  */
00db10
       && (reloc_result->enterexit & LA_SYMB_NOPLTENTER) == 0)
00db10
     {
00db10
+      /* Sanity check:  DL_FIXUP_VALUE_CODE_ADDR (value) should have been
00db10
+	 initialized earlier in this function or in another thread.  */
00db10
+      assert (DL_FIXUP_VALUE_CODE_ADDR (value) != 0);
00db10
       ElfW(Sym) *defsym = ((ElfW(Sym) *) D_PTR (reloc_result->bound,
00db10
 						l_info[DT_SYMTAB])
00db10
 			   + reloc_result->boundndx);
00db10
diff --git a/include/link.h b/include/link.h
00db10
index d7590640aa9285e5..22d020d833ae3a7c 100644
00db10
--- a/include/link.h
00db10
+++ b/include/link.h
00db10
@@ -206,6 +206,10 @@ struct link_map
00db10
       unsigned int boundndx;
00db10
       uint32_t enterexit;
00db10
       unsigned int flags;
00db10
+      /* CONCURRENCY NOTE: This is used to guard the concurrent initialization
00db10
+	 of the relocation result across multiple threads.  See the more
00db10
+	 detailed notes in elf/dl-runtime.c.  */
00db10
+      unsigned int init;
00db10
     } *l_reloc_result;
00db10
 
00db10
     /* Pointer to the version information if available.  */
00db10
diff --git a/nptl/Makefile b/nptl/Makefile
00db10
index cf47a6f097916766..1b9639f3566a63fd 100644
00db10
--- a/nptl/Makefile
00db10
+++ b/nptl/Makefile
00db10
@@ -298,7 +298,7 @@ tests += tst-cancelx2 tst-cancelx3 tst-cancelx4 tst-cancelx5 \
00db10
 endif
00db10
 ifeq ($(build-shared),yes)
00db10
 tests += tst-atfork2 tst-tls3 tst-tls4 tst-tls5 tst-_res1 tst-fini1 \
00db10
-	 tst-stackguard1
00db10
+	 tst-stackguard1 tst-audit-threads
00db10
 tests-nolibpthread += tst-fini1
00db10
 ifeq ($(have-z-execstack),yes)
00db10
 tests += tst-execstack
00db10
@@ -309,7 +309,7 @@ modules-names = tst-atfork2mod tst-tls3mod tst-tls4moda tst-tls4modb \
00db10
 		tst-tls5mod tst-tls5moda tst-tls5modb tst-tls5modc \
00db10
 		tst-tls5modd tst-tls5mode tst-tls5modf tst-stack4mod \
00db10
 		tst-_res1mod1 tst-_res1mod2 tst-execstack-mod tst-fini1mod \
00db10
-		tst-join7mod
00db10
+		tst-join7mod tst-audit-threads-mod1 tst-audit-threads-mod2
00db10
 extra-test-objs += $(addsuffix .os,$(strip $(modules-names))) tst-cleanup4aux.o
00db10
 test-extras += $(modules-names) tst-cleanup4aux
00db10
 test-modules = $(addprefix $(objpfx),$(addsuffix .so,$(modules-names)))
00db10
@@ -627,6 +627,14 @@ $(objpfx)tst-oddstacklimit.out: $(objpfx)tst-oddstacklimit $(objpfx)tst-basic1
00db10
 	$(run-program-prefix) $< --command '$(host-built-program-cmd)' > $@
00db10
 endif
00db10
 
00db10
+# Protect against a build using -Wl,-z,now.
00db10
+LDFLAGS-tst-audit-threads-mod1.so = -Wl,-z,lazy
00db10
+LDFLAGS-tst-audit-threads-mod2.so = -Wl,-z,lazy
00db10
+LDFLAGS-tst-audit-threads = -Wl,-z,lazy
00db10
+$(objpfx)tst-audit-threads: $(objpfx)tst-audit-threads-mod2.so
00db10
+$(objpfx)tst-audit-threads.out: $(objpfx)tst-audit-threads-mod1.so
00db10
+tst-audit-threads-ENV = LD_AUDIT=$(objpfx)tst-audit-threads-mod1.so
00db10
+
00db10
 # The tests here better do not run in parallel
00db10
 ifneq ($(filter %tests,$(MAKECMDGOALS)),)
00db10
 .NOTPARALLEL:
00db10
diff --git a/nptl/tst-audit-threads-mod1.c b/nptl/tst-audit-threads-mod1.c
00db10
new file mode 100644
00db10
index 0000000000000000..615d5ee5121962df
00db10
--- /dev/null
00db10
+++ b/nptl/tst-audit-threads-mod1.c
00db10
@@ -0,0 +1,74 @@
00db10
+/* Dummy audit library for test-audit-threads.
00db10
+
00db10
+   Copyright (C) 2018 Free Software Foundation, Inc.
00db10
+   This file is part of the GNU C Library.
00db10
+
00db10
+   The GNU C Library is free software; you can redistribute it and/or
00db10
+   modify it under the terms of the GNU Lesser General Public
00db10
+   License as published by the Free Software Foundation; either
00db10
+   version 2.1 of the License, or (at your option) any later version.
00db10
+
00db10
+   The GNU C Library is distributed in the hope that it will be useful,
00db10
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
00db10
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00db10
+   Lesser General Public License for more details.
00db10
+
00db10
+   You should have received a copy of the GNU Lesser General Public
00db10
+   License along with the GNU C Library; if not, see
00db10
+   <http://www.gnu.org/licenses/>.  */
00db10
+
00db10
+#include <elf.h>
00db10
+#include <link.h>
00db10
+#include <stdio.h>
00db10
+#include <assert.h>
00db10
+#include <string.h>
00db10
+
00db10
+/* We must use a dummy LD_AUDIT module to force the dynamic loader to
00db10
+   *not* update the real PLT, and instead use a cached value for the
00db10
+   lazy resolution result.  It is the update of that cached value that
00db10
+   we are testing for correctness by doing this.  */
00db10
+
00db10
+/* Library to be audited.  */
00db10
+#define LIB "tst-audit-threads-mod2.so"
00db10
+/* CALLNUM is the number of retNum functions.  */
00db10
+#define CALLNUM 7999
00db10
+
00db10
+#define CONCATX(a, b) __CONCAT (a, b)
00db10
+
00db10
+static int previous = 0;
00db10
+
00db10
+unsigned int
00db10
+la_version (unsigned int ver)
00db10
+{
00db10
+  return 1;
00db10
+}
00db10
+
00db10
+unsigned int
00db10
+la_objopen (struct link_map *map, Lmid_t lmid, uintptr_t *cookie)
00db10
+{
00db10
+  return LA_FLG_BINDTO | LA_FLG_BINDFROM;
00db10
+}
00db10
+
00db10
+uintptr_t
00db10
+CONCATX(la_symbind, __ELF_NATIVE_CLASS) (ElfW(Sym) *sym,
00db10
+					unsigned int ndx,
00db10
+					uintptr_t *refcook,
00db10
+					uintptr_t *defcook,
00db10
+					unsigned int *flags,
00db10
+					const char *symname)
00db10
+{
00db10
+  const char * retnum = "retNum";
00db10
+  char * num = strstr (symname, retnum);
00db10
+  int n;
00db10
+  /* Validate if the symbols are getting called in the correct order.
00db10
+     This code is here to verify binutils does not optimize out the PLT
00db10
+     entries that require the symbol binding.  */
00db10
+  if (num != NULL)
00db10
+    {
00db10
+      n = atoi (num);
00db10
+      assert (n >= previous);
00db10
+      assert (n <= CALLNUM);
00db10
+      previous = n;
00db10
+    }
00db10
+  return sym->st_value;
00db10
+}
00db10
diff --git a/nptl/tst-audit-threads-mod2.c b/nptl/tst-audit-threads-mod2.c
00db10
new file mode 100644
00db10
index 0000000000000000..f9817dd3dc7f4910
00db10
--- /dev/null
00db10
+++ b/nptl/tst-audit-threads-mod2.c
00db10
@@ -0,0 +1,22 @@
00db10
+/* Shared object with a huge number of functions for test-audit-threads.
00db10
+
00db10
+   Copyright (C) 2018 Free Software Foundation, Inc.
00db10
+   This file is part of the GNU C Library.
00db10
+
00db10
+   The GNU C Library is free software; you can redistribute it and/or
00db10
+   modify it under the terms of the GNU Lesser General Public
00db10
+   License as published by the Free Software Foundation; either
00db10
+   version 2.1 of the License, or (at your option) any later version.
00db10
+
00db10
+   The GNU C Library is distributed in the hope that it will be useful,
00db10
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
00db10
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00db10
+   Lesser General Public License for more details.
00db10
+
00db10
+   You should have received a copy of the GNU Lesser General Public
00db10
+   License along with the GNU C Library; if not, see
00db10
+   <http://www.gnu.org/licenses/>.  */
00db10
+
00db10
+/* Define all the retNumN functions in a library.  */
00db10
+#define definenum
00db10
+#include "tst-audit-threads.h"
00db10
diff --git a/nptl/tst-audit-threads.c b/nptl/tst-audit-threads.c
00db10
new file mode 100644
00db10
index 0000000000000000..e4bf433bd85f3715
00db10
--- /dev/null
00db10
+++ b/nptl/tst-audit-threads.c
00db10
@@ -0,0 +1,97 @@
00db10
+/* Test multi-threading using LD_AUDIT.
00db10
+
00db10
+   Copyright (C) 2018 Free Software Foundation, Inc.
00db10
+   This file is part of the GNU C Library.
00db10
+
00db10
+   The GNU C Library is free software; you can redistribute it and/or
00db10
+   modify it under the terms of the GNU Lesser General Public
00db10
+   License as published by the Free Software Foundation; either
00db10
+   version 2.1 of the License, or (at your option) any later version.
00db10
+
00db10
+   The GNU C Library is distributed in the hope that it will be useful,
00db10
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
00db10
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00db10
+   Lesser General Public License for more details.
00db10
+
00db10
+   You should have received a copy of the GNU Lesser General Public
00db10
+   License along with the GNU C Library; if not, see
00db10
+   <http://www.gnu.org/licenses/>.  */
00db10
+
00db10
+/* This test uses a dummy LD_AUDIT library (test-audit-threads-mod1) and a
00db10
+   library with a huge number of functions in order to validate lazy symbol
00db10
+   binding with an audit library.  We use one thread per CPU to test that
00db10
+   concurrent lazy resolution does not have any defects which would cause
00db10
+   the process to fail.  We use an LD_AUDIT library to force the testing of
00db10
+   the relocation resolution caching code in the dynamic loader i.e.
00db10
+   _dl_runtime_profile and _dl_profile_fixup.  */
00db10
+
00db10
+#include <support/xthread.h>
00db10
+#include <strings.h>
00db10
+#include <stdlib.h>
00db10
+#include <sys/sysinfo.h>
00db10
+
00db10
+static int do_test (void);
00db10
+
00db10
+/* This test usually takes less than 3s to run.  However, there are cases that
00db10
+   take up to 30s.  */
00db10
+#define TIMEOUT 60
00db10
+#define TEST_FUNCTION do_test ()
00db10
+#include "../test-skeleton.c"
00db10
+
00db10
+/* Declare the functions we are going to call.  */
00db10
+#define externnum
00db10
+#include "tst-audit-threads.h"
00db10
+#undef externnum
00db10
+
00db10
+int num_threads;
00db10
+pthread_barrier_t barrier;
00db10
+
00db10
+void
00db10
+sync_all (int num)
00db10
+{
00db10
+  pthread_barrier_wait (&barrier);
00db10
+}
00db10
+
00db10
+void
00db10
+call_all_ret_nums (void)
00db10
+{
00db10
+  /* Call each function one at a time from all threads.  */
00db10
+#define callnum
00db10
+#include "tst-audit-threads.h"
00db10
+#undef callnum
00db10
+}
00db10
+
00db10
+void *
00db10
+thread_main (void *unused)
00db10
+{
00db10
+  call_all_ret_nums ();
00db10
+  return NULL;
00db10
+}
00db10
+
00db10
+#define STR2(X) #X
00db10
+#define STR(X) STR2(X)
00db10
+
00db10
+static int
00db10
+do_test (void)
00db10
+{
00db10
+  int i;
00db10
+  pthread_t *threads;
00db10
+
00db10
+  num_threads = get_nprocs ();
00db10
+  if (num_threads <= 1)
00db10
+    num_threads = 2;
00db10
+
00db10
+  /* Used to synchronize all the threads after calling each retNumN.  */
00db10
+  xpthread_barrier_init (&barrier, NULL, num_threads);
00db10
+
00db10
+  threads = (pthread_t *) xcalloc (num_threads, sizeof(pthread_t));
00db10
+  for (i = 0; i < num_threads; i++)
00db10
+    threads[i] = xpthread_create(NULL, thread_main, NULL);
00db10
+
00db10
+  for (i = 0; i < num_threads; i++)
00db10
+    xpthread_join(threads[i]);
00db10
+
00db10
+  free (threads);
00db10
+
00db10
+  return 0;
00db10
+}
00db10
diff --git a/nptl/tst-audit-threads.h b/nptl/tst-audit-threads.h
00db10
new file mode 100644
00db10
index 0000000000000000..1c9ecc08dfcd3e65
00db10
--- /dev/null
00db10
+++ b/nptl/tst-audit-threads.h
00db10
@@ -0,0 +1,92 @@
00db10
+/* Helper header for test-audit-threads.
00db10
+
00db10
+   Copyright (C) 2018 Free Software Foundation, Inc.
00db10
+   This file is part of the GNU C Library.
00db10
+
00db10
+   The GNU C Library is free software; you can redistribute it and/or
00db10
+   modify it under the terms of the GNU Lesser General Public
00db10
+   License as published by the Free Software Foundation; either
00db10
+   version 2.1 of the License, or (at your option) any later version.
00db10
+
00db10
+   The GNU C Library is distributed in the hope that it will be useful,
00db10
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
00db10
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00db10
+   Lesser General Public License for more details.
00db10
+
00db10
+   You should have received a copy of the GNU Lesser General Public
00db10
+   License along with the GNU C Library; if not, see
00db10
+   <http://www.gnu.org/licenses/>.  */
00db10
+
00db10
+/* We use this helper to create a large number of functions, all of
00db10
+   which will be resolved lazily and thus have their PLT updated.
00db10
+   This is done to provide enough functions that we can statistically
00db10
+   observe a thread vs. PLT resolution failure if one exists.  */
00db10
+
00db10
+#define CONCAT(a, b) a ## b
00db10
+#define NUM(x, y) CONCAT (x, y)
00db10
+
00db10
+#define FUNC10(x)	\
00db10
+  FUNC (NUM (x, 0));	\
00db10
+  FUNC (NUM (x, 1));	\
00db10
+  FUNC (NUM (x, 2));	\
00db10
+  FUNC (NUM (x, 3));	\
00db10
+  FUNC (NUM (x, 4));	\
00db10
+  FUNC (NUM (x, 5));	\
00db10
+  FUNC (NUM (x, 6));	\
00db10
+  FUNC (NUM (x, 7));	\
00db10
+  FUNC (NUM (x, 8));	\
00db10
+  FUNC (NUM (x, 9))
00db10
+
00db10
+#define FUNC100(x)	\
00db10
+  FUNC10 (NUM (x, 0));	\
00db10
+  FUNC10 (NUM (x, 1));	\
00db10
+  FUNC10 (NUM (x, 2));	\
00db10
+  FUNC10 (NUM (x, 3));	\
00db10
+  FUNC10 (NUM (x, 4));	\
00db10
+  FUNC10 (NUM (x, 5));	\
00db10
+  FUNC10 (NUM (x, 6));	\
00db10
+  FUNC10 (NUM (x, 7));	\
00db10
+  FUNC10 (NUM (x, 8));	\
00db10
+  FUNC10 (NUM (x, 9))
00db10
+
00db10
+#define FUNC1000(x)		\
00db10
+  FUNC100 (NUM (x, 0));		\
00db10
+  FUNC100 (NUM (x, 1));		\
00db10
+  FUNC100 (NUM (x, 2));		\
00db10
+  FUNC100 (NUM (x, 3));		\
00db10
+  FUNC100 (NUM (x, 4));		\
00db10
+  FUNC100 (NUM (x, 5));		\
00db10
+  FUNC100 (NUM (x, 6));		\
00db10
+  FUNC100 (NUM (x, 7));		\
00db10
+  FUNC100 (NUM (x, 8));		\
00db10
+  FUNC100 (NUM (x, 9))
00db10
+
00db10
+#define FUNC7000()	\
00db10
+  FUNC1000 (1);		\
00db10
+  FUNC1000 (2);		\
00db10
+  FUNC1000 (3);		\
00db10
+  FUNC1000 (4);		\
00db10
+  FUNC1000 (5);		\
00db10
+  FUNC1000 (6);		\
00db10
+  FUNC1000 (7);
00db10
+
00db10
+#ifdef FUNC
00db10
+# undef FUNC
00db10
+#endif
00db10
+
00db10
+#ifdef externnum
00db10
+# define FUNC(x) extern int CONCAT (retNum, x) (void)
00db10
+#endif
00db10
+
00db10
+#ifdef definenum
00db10
+# define FUNC(x) int CONCAT (retNum, x) (void) { return x; }
00db10
+#endif
00db10
+
00db10
+#ifdef callnum
00db10
+# define FUNC(x) CONCAT (retNum, x) (); sync_all (x)
00db10
+#endif
00db10
+
00db10
+/* A value of 7000 functions is chosen as an arbitrarily large
00db10
+   number of functions that will allow us enough attempts to
00db10
+   verify lazy resolution operation.  */
00db10
+FUNC7000 ();