| commit e5d262effe3a87164308a3f37e61b32d0348692a |
| Author: Tulio Magno Quites Machado Filho <tuliom@linux.ibm.com> |
| Date: Fri Nov 30 18:05:32 2018 -0200 |
| |
| Fix _dl_profile_fixup data-dependency issue (Bug 23690) |
| |
| There is a data-dependency between the fields of struct l_reloc_result |
| and the field used as the initialization guard. Users of the guard |
| expect writes to the structure to be observable when they also observe |
| the guard initialized. The solution for this problem is to use an acquire |
| and release load and store to ensure previous writes to the structure are |
| observable if the guard is initialized. |
| |
| The previous implementation used DL_FIXUP_VALUE_ADDR (l_reloc_result->addr) |
| as the initialization guard, making it impossible for some architectures |
| to load and store it atomically, i.e. hppa and ia64, due to its larger size. |
| |
| This commit adds an unsigned int to l_reloc_result to be used as the new |
| initialization guard of the struct, making it possible to load and store |
| it atomically in all architectures. The fix ensures that the values |
| observed in l_reloc_result are consistent and do not lead to crashes. |
| The algorithm is documented in the code in elf/dl-runtime.c |
| (_dl_profile_fixup). Not all data races have been eliminated. |
| |
| Tested with build-many-glibcs and on powerpc, powerpc64, and powerpc64le. |
| |
| [BZ #23690] |
| * elf/dl-runtime.c (_dl_profile_fixup): Guarantee memory |
| modification order when accessing reloc_result->addr. |
| * include/link.h (reloc_result): Add field init. |
| * nptl/Makefile (tests): Add tst-audit-threads. |
| (modules-names): Add tst-audit-threads-mod1 and |
| tst-audit-threads-mod2. |
| Add rules to build tst-audit-threads. |
| * nptl/tst-audit-threads-mod1.c: New file. |
| * nptl/tst-audit-threads-mod2.c: Likewise. |
| * nptl/tst-audit-threads.c: Likewise. |
| * nptl/tst-audit-threads.h: Likewise. |
| |
| Signed-off-by: Tulio Magno Quites Machado Filho <tuliom@linux.ibm.com> |
| Reviewed-by: Carlos O'Donell <carlos@redhat.com> |
| |
| diff --git a/elf/dl-runtime.c b/elf/dl-runtime.c |
| index 63bbc89776..3d2f4a7a76 100644 |
| |
| |
| @@ -183,10 +183,36 @@ _dl_profile_fixup ( |
| /* This is the address in the array where we store the result of previous |
| relocations. */ |
| struct reloc_result *reloc_result = &l->l_reloc_result[reloc_index]; |
| - DL_FIXUP_VALUE_TYPE *resultp = &reloc_result->addr; |
| |
| - DL_FIXUP_VALUE_TYPE value = *resultp; |
| - if (DL_FIXUP_VALUE_CODE_ADDR (value) == 0) |
| + /* CONCURRENCY NOTES: |
| + |
| + Multiple threads may be calling the same PLT sequence and with |
| + LD_AUDIT enabled they will be calling into _dl_profile_fixup to |
| + update the reloc_result with the result of the lazy resolution. |
| + The reloc_result guard variable is reloc_init, and we use |
| + acquire/release loads and store to it to ensure that the results of |
| + the structure are consistent with the loaded value of the guard. |
| + This does not fix all of the data races that occur when two or more |
| + threads read reloc_result->reloc_init with a value of zero and read |
| + and write to that reloc_result concurrently. The expectation is |
| + generally that while this is a data race it works because the |
| + threads write the same values. Until the data races are fixed |
| + there is a potential for problems to arise from these data races. |
| + The reloc result updates should happen in parallel but there should |
| + be an atomic RMW which does the final update to the real result |
| + entry (see bug 23790). |
| + |
| + The following code uses reloc_result->init set to 0 to indicate if it is |
| + the first time this object is being relocated, otherwise 1 which |
| + indicates the object has already been relocated. |
| + |
| + Reading/Writing from/to reloc_result->reloc_init must not happen |
| + before previous writes to reloc_result complete as they could |
| + end-up with an incomplete struct. */ |
| + DL_FIXUP_VALUE_TYPE value; |
| + unsigned int init = atomic_load_acquire (&reloc_result->init); |
| + |
| + if (init == 0) |
| { |
| /* This is the first time we have to relocate this object. */ |
| const ElfW(Sym) *const symtab |
| @@ -346,19 +372,31 @@ _dl_profile_fixup ( |
| |
| /* Store the result for later runs. */ |
| if (__glibc_likely (! GLRO(dl_bind_not))) |
| - *resultp = value; |
| + { |
| + reloc_result->addr = value; |
| + /* Guarantee all previous writes complete before |
| + init is updated. See CONCURRENCY NOTES earlier */ |
| + atomic_store_release (&reloc_result->init, 1); |
| + } |
| + init = 1; |
| } |
| + else |
| + value = reloc_result->addr; |
| |
| /* By default we do not call the pltexit function. */ |
| long int framesize = -1; |
| |
| + |
| #ifdef SHARED |
| /* Auditing checkpoint: report the PLT entering and allow the |
| auditors to change the value. */ |
| - if (DL_FIXUP_VALUE_CODE_ADDR (value) != 0 && GLRO(dl_naudit) > 0 |
| + if (GLRO(dl_naudit) > 0 |
| /* Don't do anything if no auditor wants to intercept this call. */ |
| && (reloc_result->enterexit & LA_SYMB_NOPLTENTER) == 0) |
| { |
| + /* Sanity check: DL_FIXUP_VALUE_CODE_ADDR (value) should have been |
| + initialized earlier in this function or in another thread. */ |
| + assert (DL_FIXUP_VALUE_CODE_ADDR (value) != 0); |
| ElfW(Sym) *defsym = ((ElfW(Sym) *) D_PTR (reloc_result->bound, |
| l_info[DT_SYMTAB]) |
| + reloc_result->boundndx); |
| diff --git a/include/link.h b/include/link.h |
| index 5924594548..83b1c34b7b 100644 |
| |
| |
| @@ -216,6 +216,10 @@ struct link_map |
| unsigned int boundndx; |
| uint32_t enterexit; |
| unsigned int flags; |
| + /* CONCURRENCY NOTE: This is used to guard the concurrent initialization |
| + of the relocation result across multiple threads. See the more |
| + detailed notes in elf/dl-runtime.c. */ |
| + unsigned int init; |
| } *l_reloc_result; |
| |
| /* Pointer to the version information if available. */ |
| diff --git a/nptl/Makefile b/nptl/Makefile |
| index 982e43adfa..98b0aa01c7 100644 |
| |
| |
| @@ -382,7 +382,8 @@ tests += tst-cancelx2 tst-cancelx3 tst-cancelx4 tst-cancelx5 \ |
| tst-cleanupx0 tst-cleanupx1 tst-cleanupx2 tst-cleanupx3 tst-cleanupx4 \ |
| tst-oncex3 tst-oncex4 |
| ifeq ($(build-shared),yes) |
| -tests += tst-atfork2 tst-tls4 tst-_res1 tst-fini1 tst-compat-forwarder |
| +tests += tst-atfork2 tst-tls4 tst-_res1 tst-fini1 tst-compat-forwarder \ |
| + tst-audit-threads |
| tests-internal += tst-tls3 tst-tls3-malloc tst-tls5 tst-stackguard1 |
| tests-nolibpthread += tst-fini1 |
| ifeq ($(have-z-execstack),yes) |
| @@ -394,7 +395,8 @@ modules-names = tst-atfork2mod tst-tls3mod tst-tls4moda tst-tls4modb \ |
| tst-tls5mod tst-tls5moda tst-tls5modb tst-tls5modc \ |
| tst-tls5modd tst-tls5mode tst-tls5modf tst-stack4mod \ |
| tst-_res1mod1 tst-_res1mod2 tst-execstack-mod tst-fini1mod \ |
| - tst-join7mod tst-compat-forwarder-mod |
| + tst-join7mod tst-compat-forwarder-mod tst-audit-threads-mod1 \ |
| + tst-audit-threads-mod2 |
| extra-test-objs += $(addsuffix .os,$(strip $(modules-names))) \ |
| tst-cleanup4aux.o tst-cleanupx4aux.o |
| test-extras += tst-cleanup4aux tst-cleanupx4aux |
| @@ -712,6 +714,14 @@ $(objpfx)tst-compat-forwarder: $(objpfx)tst-compat-forwarder-mod.so |
| |
| tst-mutex10-ENV = GLIBC_TUNABLES=glibc.elision.enable=1 |
| |
| +# Protect against a build using -Wl,-z,now. |
| +LDFLAGS-tst-audit-threads-mod1.so = -Wl,-z,lazy |
| +LDFLAGS-tst-audit-threads-mod2.so = -Wl,-z,lazy |
| +LDFLAGS-tst-audit-threads = -Wl,-z,lazy |
| +$(objpfx)tst-audit-threads: $(objpfx)tst-audit-threads-mod2.so |
| +$(objpfx)tst-audit-threads.out: $(objpfx)tst-audit-threads-mod1.so |
| +tst-audit-threads-ENV = LD_AUDIT=$(objpfx)tst-audit-threads-mod1.so |
| + |
| # The tests here better do not run in parallel |
| ifneq ($(filter %tests,$(MAKECMDGOALS)),) |
| .NOTPARALLEL: |
| diff --git a/nptl/tst-audit-threads-mod1.c b/nptl/tst-audit-threads-mod1.c |
| new file mode 100644 |
| index 0000000000..615d5ee512 |
| |
| |
| @@ -0,0 +1,74 @@ |
| +/* Dummy audit library for test-audit-threads. |
| + |
| + Copyright (C) 2018 Free Software Foundation, Inc. |
| + This file is part of the GNU C Library. |
| + |
| + The GNU C Library is free software; you can redistribute it and/or |
| + modify it under the terms of the GNU Lesser General Public |
| + License as published by the Free Software Foundation; either |
| + version 2.1 of the License, or (at your option) any later version. |
| + |
| + The GNU C Library is distributed in the hope that it will be useful, |
| + but WITHOUT ANY WARRANTY; without even the implied warranty of |
| + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| + Lesser General Public License for more details. |
| + |
| + You should have received a copy of the GNU Lesser General Public |
| + License along with the GNU C Library; if not, see |
| + <http://www.gnu.org/licenses/>. */ |
| + |
| +#include <elf.h> |
| +#include <link.h> |
| +#include <stdio.h> |
| +#include <assert.h> |
| +#include <string.h> |
| + |
| +/* We must use a dummy LD_AUDIT module to force the dynamic loader to |
| + *not* update the real PLT, and instead use a cached value for the |
| + lazy resolution result. It is the update of that cached value that |
| + we are testing for correctness by doing this. */ |
| + |
| +/* Library to be audited. */ |
| +#define LIB "tst-audit-threads-mod2.so" |
| +/* CALLNUM is the number of retNum functions. */ |
| +#define CALLNUM 7999 |
| + |
| +#define CONCATX(a, b) __CONCAT (a, b) |
| + |
| +static int previous = 0; |
| + |
| +unsigned int |
| +la_version (unsigned int ver) |
| +{ |
| + return 1; |
| +} |
| + |
| +unsigned int |
| +la_objopen (struct link_map *map, Lmid_t lmid, uintptr_t *cookie) |
| +{ |
| + return LA_FLG_BINDTO | LA_FLG_BINDFROM; |
| +} |
| + |
| +uintptr_t |
| +CONCATX(la_symbind, __ELF_NATIVE_CLASS) (ElfW(Sym) *sym, |
| + unsigned int ndx, |
| + uintptr_t *refcook, |
| + uintptr_t *defcook, |
| + unsigned int *flags, |
| + const char *symname) |
| +{ |
| + const char * retnum = "retNum"; |
| + char * num = strstr (symname, retnum); |
| + int n; |
| + /* Validate if the symbols are getting called in the correct order. |
| + This code is here to verify binutils does not optimize out the PLT |
| + entries that require the symbol binding. */ |
| + if (num != NULL) |
| + { |
| + n = atoi (num); |
| + assert (n >= previous); |
| + assert (n <= CALLNUM); |
| + previous = n; |
| + } |
| + return sym->st_value; |
| +} |
| diff --git a/nptl/tst-audit-threads-mod2.c b/nptl/tst-audit-threads-mod2.c |
| new file mode 100644 |
| index 0000000000..f9817dd3dc |
| |
| |
| @@ -0,0 +1,22 @@ |
| +/* Shared object with a huge number of functions for test-audit-threads. |
| + |
| + Copyright (C) 2018 Free Software Foundation, Inc. |
| + This file is part of the GNU C Library. |
| + |
| + The GNU C Library is free software; you can redistribute it and/or |
| + modify it under the terms of the GNU Lesser General Public |
| + License as published by the Free Software Foundation; either |
| + version 2.1 of the License, or (at your option) any later version. |
| + |
| + The GNU C Library is distributed in the hope that it will be useful, |
| + but WITHOUT ANY WARRANTY; without even the implied warranty of |
| + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| + Lesser General Public License for more details. |
| + |
| + You should have received a copy of the GNU Lesser General Public |
| + License along with the GNU C Library; if not, see |
| + <http://www.gnu.org/licenses/>. */ |
| + |
| +/* Define all the retNumN functions in a library. */ |
| +#define definenum |
| +#include "tst-audit-threads.h" |
| diff --git a/nptl/tst-audit-threads.c b/nptl/tst-audit-threads.c |
| new file mode 100644 |
| index 0000000000..e4bf433bd8 |
| |
| |
| @@ -0,0 +1,97 @@ |
| +/* Test multi-threading using LD_AUDIT. |
| + |
| + Copyright (C) 2018 Free Software Foundation, Inc. |
| + This file is part of the GNU C Library. |
| + |
| + The GNU C Library is free software; you can redistribute it and/or |
| + modify it under the terms of the GNU Lesser General Public |
| + License as published by the Free Software Foundation; either |
| + version 2.1 of the License, or (at your option) any later version. |
| + |
| + The GNU C Library is distributed in the hope that it will be useful, |
| + but WITHOUT ANY WARRANTY; without even the implied warranty of |
| + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| + Lesser General Public License for more details. |
| + |
| + You should have received a copy of the GNU Lesser General Public |
| + License along with the GNU C Library; if not, see |
| + <http://www.gnu.org/licenses/>. */ |
| + |
| +/* This test uses a dummy LD_AUDIT library (test-audit-threads-mod1) and a |
| + library with a huge number of functions in order to validate lazy symbol |
| + binding with an audit library. We use one thread per CPU to test that |
| + concurrent lazy resolution does not have any defects which would cause |
| + the process to fail. We use an LD_AUDIT library to force the testing of |
| + the relocation resolution caching code in the dynamic loader i.e. |
| + _dl_runtime_profile and _dl_profile_fixup. */ |
| + |
| +#include <support/xthread.h> |
| +#include <strings.h> |
| +#include <stdlib.h> |
| +#include <sys/sysinfo.h> |
| + |
| +static int do_test (void); |
| + |
| +/* This test usually takes less than 3s to run. However, there are cases that |
| + take up to 30s. */ |
| +#define TIMEOUT 60 |
| +#define TEST_FUNCTION do_test () |
| +#include "../test-skeleton.c" |
| + |
| +/* Declare the functions we are going to call. */ |
| +#define externnum |
| +#include "tst-audit-threads.h" |
| +#undef externnum |
| + |
| +int num_threads; |
| +pthread_barrier_t barrier; |
| + |
| +void |
| +sync_all (int num) |
| +{ |
| + pthread_barrier_wait (&barrier); |
| +} |
| + |
| +void |
| +call_all_ret_nums (void) |
| +{ |
| + /* Call each function one at a time from all threads. */ |
| +#define callnum |
| +#include "tst-audit-threads.h" |
| +#undef callnum |
| +} |
| + |
| +void * |
| +thread_main (void *unused) |
| +{ |
| + call_all_ret_nums (); |
| + return NULL; |
| +} |
| + |
| +#define STR2(X) #X |
| +#define STR(X) STR2(X) |
| + |
| +static int |
| +do_test (void) |
| +{ |
| + int i; |
| + pthread_t *threads; |
| + |
| + num_threads = get_nprocs (); |
| + if (num_threads <= 1) |
| + num_threads = 2; |
| + |
| + /* Used to synchronize all the threads after calling each retNumN. */ |
| + xpthread_barrier_init (&barrier, NULL, num_threads); |
| + |
| + threads = (pthread_t *) xcalloc (num_threads, sizeof(pthread_t)); |
| + for (i = 0; i < num_threads; i++) |
| + threads[i] = xpthread_create(NULL, thread_main, NULL); |
| + |
| + for (i = 0; i < num_threads; i++) |
| + xpthread_join(threads[i]); |
| + |
| + free (threads); |
| + |
| + return 0; |
| +} |
| diff --git a/nptl/tst-audit-threads.h b/nptl/tst-audit-threads.h |
| new file mode 100644 |
| index 0000000000..1c9ecc08df |
| |
| |
| @@ -0,0 +1,92 @@ |
| +/* Helper header for test-audit-threads. |
| + |
| + Copyright (C) 2018 Free Software Foundation, Inc. |
| + This file is part of the GNU C Library. |
| + |
| + The GNU C Library is free software; you can redistribute it and/or |
| + modify it under the terms of the GNU Lesser General Public |
| + License as published by the Free Software Foundation; either |
| + version 2.1 of the License, or (at your option) any later version. |
| + |
| + The GNU C Library is distributed in the hope that it will be useful, |
| + but WITHOUT ANY WARRANTY; without even the implied warranty of |
| + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| + Lesser General Public License for more details. |
| + |
| + You should have received a copy of the GNU Lesser General Public |
| + License along with the GNU C Library; if not, see |
| + <http://www.gnu.org/licenses/>. */ |
| + |
| +/* We use this helper to create a large number of functions, all of |
| + which will be resolved lazily and thus have their PLT updated. |
| + This is done to provide enough functions that we can statistically |
| + observe a thread vs. PLT resolution failure if one exists. */ |
| + |
| +#define CONCAT(a, b) a ## b |
| +#define NUM(x, y) CONCAT (x, y) |
| + |
| +#define FUNC10(x) \ |
| + FUNC (NUM (x, 0)); \ |
| + FUNC (NUM (x, 1)); \ |
| + FUNC (NUM (x, 2)); \ |
| + FUNC (NUM (x, 3)); \ |
| + FUNC (NUM (x, 4)); \ |
| + FUNC (NUM (x, 5)); \ |
| + FUNC (NUM (x, 6)); \ |
| + FUNC (NUM (x, 7)); \ |
| + FUNC (NUM (x, 8)); \ |
| + FUNC (NUM (x, 9)) |
| + |
| +#define FUNC100(x) \ |
| + FUNC10 (NUM (x, 0)); \ |
| + FUNC10 (NUM (x, 1)); \ |
| + FUNC10 (NUM (x, 2)); \ |
| + FUNC10 (NUM (x, 3)); \ |
| + FUNC10 (NUM (x, 4)); \ |
| + FUNC10 (NUM (x, 5)); \ |
| + FUNC10 (NUM (x, 6)); \ |
| + FUNC10 (NUM (x, 7)); \ |
| + FUNC10 (NUM (x, 8)); \ |
| + FUNC10 (NUM (x, 9)) |
| + |
| +#define FUNC1000(x) \ |
| + FUNC100 (NUM (x, 0)); \ |
| + FUNC100 (NUM (x, 1)); \ |
| + FUNC100 (NUM (x, 2)); \ |
| + FUNC100 (NUM (x, 3)); \ |
| + FUNC100 (NUM (x, 4)); \ |
| + FUNC100 (NUM (x, 5)); \ |
| + FUNC100 (NUM (x, 6)); \ |
| + FUNC100 (NUM (x, 7)); \ |
| + FUNC100 (NUM (x, 8)); \ |
| + FUNC100 (NUM (x, 9)) |
| + |
| +#define FUNC7000() \ |
| + FUNC1000 (1); \ |
| + FUNC1000 (2); \ |
| + FUNC1000 (3); \ |
| + FUNC1000 (4); \ |
| + FUNC1000 (5); \ |
| + FUNC1000 (6); \ |
| + FUNC1000 (7); |
| + |
| +#ifdef FUNC |
| +# undef FUNC |
| +#endif |
| + |
| +#ifdef externnum |
| +# define FUNC(x) extern int CONCAT (retNum, x) (void) |
| +#endif |
| + |
| +#ifdef definenum |
| +# define FUNC(x) int CONCAT (retNum, x) (void) { return x; } |
| +#endif |
| + |
| +#ifdef callnum |
| +# define FUNC(x) CONCAT (retNum, x) (); sync_all (x) |
| +#endif |
| + |
| +/* A value of 7000 functions is chosen as an arbitrarily large |
| + number of functions that will allow us enough attempts to |
| + verify lazy resolution operation. */ |
| +FUNC7000 (); |