|
|
00db10 |
Note: the context of this patch differs from upstream slightly,
|
|
|
00db10 |
to accomodate the lack of ILP32 in RHEL.
|
|
|
00db10 |
|
|
|
00db10 |
commit b7cf203b5c17dd6d9878537d41e0c7cc3d270a67
|
|
|
00db10 |
Author: Szabolcs Nagy <szabolcs.nagy@arm.com>
|
|
|
00db10 |
Date: Wed Sep 27 16:55:14 2017 +0100
|
|
|
00db10 |
|
|
|
00db10 |
aarch64: Disable lazy symbol binding of TLSDESC
|
|
|
00db10 |
|
|
|
00db10 |
Always do TLS descriptor initialization at load time during relocation
|
|
|
00db10 |
processing to avoid barriers at every TLS access. In non-dlopened shared
|
|
|
00db10 |
libraries the overhead of tls access vs static global access is > 3x
|
|
|
00db10 |
bigger when lazy initialization is used (_dl_tlsdesc_return_lazy)
|
|
|
00db10 |
compared to bind-now (_dl_tlsdesc_return) so the barriers dominate tls
|
|
|
00db10 |
access performance.
|
|
|
00db10 |
|
|
|
00db10 |
TLSDESC relocs are in DT_JMPREL which are processed at load time using
|
|
|
00db10 |
elf_machine_lazy_rel which is only supposed to do lightweight
|
|
|
00db10 |
initialization using the DT_TLSDESC_PLT trampoline (the trampoline code
|
|
|
00db10 |
jumps to the entry point in DT_TLSDESC_GOT which does the lazy tlsdesc
|
|
|
00db10 |
initialization at runtime). This patch changes elf_machine_lazy_rel
|
|
|
00db10 |
in aarch64 to do the symbol binding and initialization as if DF_BIND_NOW
|
|
|
00db10 |
was set, so the non-lazy code path of elf/do-rel.h was replicated.
|
|
|
00db10 |
|
|
|
00db10 |
The static linker could be changed to emit TLSDESC relocs in DT_REL*,
|
|
|
00db10 |
which are processed non-lazily, but the goal of this patch is to always
|
|
|
00db10 |
guarantee bind-now semantics, even if the binary was produced with an
|
|
|
00db10 |
old linker, so the barriers can be dropped in tls descriptor functions.
|
|
|
00db10 |
|
|
|
00db10 |
After this change the synchronizing ldar instructions can be dropped
|
|
|
00db10 |
as well as the lazy initialization machinery including the DT_TLSDESC_GOT
|
|
|
00db10 |
setup.
|
|
|
00db10 |
|
|
|
00db10 |
I believe this should be done on all targets, including ones where no
|
|
|
00db10 |
barrier is needed for lazy initialization. There is very little gain in
|
|
|
00db10 |
optimizing for large number of symbolic tlsdesc relocations which is an
|
|
|
00db10 |
extremely uncommon case. And currently the tlsdesc entries are only
|
|
|
00db10 |
readonly protected with -z now and some hardennings against writable
|
|
|
00db10 |
JUMPSLOT relocs don't work for TLSDESC so they are a security hazard.
|
|
|
00db10 |
(But to fix that the static linker has to be changed.)
|
|
|
00db10 |
|
|
|
00db10 |
* sysdeps/aarch64/dl-machine.h (elf_machine_lazy_rel): Do symbol
|
|
|
00db10 |
binding and initialization non-lazily for R_AARCH64_TLSDESC.
|
|
|
00db10 |
|
|
|
00db10 |
diff -rup a/sysdeps/aarch64/dl-machine.h b/sysdeps/aarch64/dl-machine.h
|
|
|
00db10 |
--- a/sysdeps/aarch64/dl-machine.h 2018-10-16 12:07:31.588149003 -0400
|
|
|
00db10 |
+++ b/sysdeps/aarch64/dl-machine.h 2018-10-16 12:18:46.214078837 -0400
|
|
|
00db10 |
@@ -376,12 +376,21 @@ elf_machine_lazy_rel (struct link_map *m
|
|
|
00db10 |
}
|
|
|
00db10 |
else if (__builtin_expect (r_type == R_AARCH64_TLSDESC, 1))
|
|
|
00db10 |
{
|
|
|
00db10 |
- struct tlsdesc volatile *td =
|
|
|
00db10 |
- (struct tlsdesc volatile *)reloc_addr;
|
|
|
00db10 |
+ const Elf_Symndx symndx = ELFW (R_SYM) (reloc->r_info);
|
|
|
00db10 |
+ const ElfW (Sym) *symtab = (const void *)D_PTR (map, l_info[DT_SYMTAB]);
|
|
|
00db10 |
+ const ElfW (Sym) *sym = &symtab[symndx];
|
|
|
00db10 |
+ const struct r_found_version *version = NULL;
|
|
|
00db10 |
|
|
|
00db10 |
- td->arg = (void*)reloc;
|
|
|
00db10 |
- td->entry = (void*)(D_PTR (map, l_info[ADDRIDX (DT_TLSDESC_PLT)])
|
|
|
00db10 |
- + map->l_addr);
|
|
|
00db10 |
+ if (map->l_info[VERSYMIDX (DT_VERSYM)] != NULL)
|
|
|
00db10 |
+ {
|
|
|
00db10 |
+ const ElfW (Half) *vernum =
|
|
|
00db10 |
+ (const void *)D_PTR (map, l_info[VERSYMIDX (DT_VERSYM)]);
|
|
|
00db10 |
+ version = &map->l_versions[vernum[symndx] & 0x7fff];
|
|
|
00db10 |
+ }
|
|
|
00db10 |
+
|
|
|
00db10 |
+ /* Always initialize TLS descriptors completely, because lazy
|
|
|
00db10 |
+ initialization requires synchronization at every TLS access. */
|
|
|
00db10 |
+ elf_machine_rela (map, reloc, sym, version, reloc_addr, skip_ifunc);
|
|
|
00db10 |
}
|
|
|
00db10 |
else if (__glibc_unlikely (r_type == R_AARCH64_IRELATIVE))
|
|
|
00db10 |
{
|