5de29b
# commit 122b66defdb9e4ded3ccc5c2b290f0520c6fa3cd
5de29b
# Author: Ulrich Weigand <Ulrich.Weigand@de.ibm.com>
5de29b
# Date:   Wed Dec 4 06:52:40 2013 -0600
5de29b
# 
5de29b
#     PowerPC64 ELFv2 ABI 3/6: PLT local entry point optimization
5de29b
#     
5de29b
#     This is a follow-on to the previous patch to support the ELFv2 ABI in the
5de29b
#     dynamic loader, split off into its own patch since it is just an optional
5de29b
#     optimization.
5de29b
#     
5de29b
#     In the ELFv2 ABI, most functions define both a global and a local entry
5de29b
#     point; the local entry requires r2 to be already set up by the caller
5de29b
#     to point to the callee's TOC; while the global entry does not require
5de29b
#     the caller to know about the callee's TOC, but it needs to set up r12
5de29b
#     to the callee's entry point address.
5de29b
#     
5de29b
#     Now, when setting up a PLT slot, the dynamic linker will usually need
5de29b
#     to enter the target function's global entry point.  However, if the
5de29b
#     linker can prove that the target function is in the same DSO as the
5de29b
#     PLT slot itself, and the whole DSO only uses a single TOC (which the
5de29b
#     linker will let ld.so know via a DT_PPC64_OPT entry), then it is
5de29b
#     possible to actually enter the local entry point address into the
5de29b
#     PLT slot, for a slight improvement in performance.
5de29b
#     
5de29b
#     Note that this uncovered a problem on the first call via _dl_runtime_resolve,
5de29b
#     because that routine neglected to restore the caller's TOC before calling
5de29b
#     the target function for the first time, since it assumed that function
5de29b
#     would always reload its own TOC anyway ...
5de29b
# 
12745e
diff -urN glibc-2.17-c758a686/elf/elf.h glibc-2.17-c758a686/elf/elf.h
12745e
--- glibc-2.17-c758a686/elf/elf.h	2014-05-29 14:08:44.000000000 -0500
12745e
+++ glibc-2.17-c758a686/elf/elf.h	2014-05-29 14:08:44.000000000 -0500
5de29b
@@ -2273,8 +2273,19 @@
5de29b
 #define DT_PPC64_GLINK  (DT_LOPROC + 0)
5de29b
 #define DT_PPC64_OPD	(DT_LOPROC + 1)
5de29b
 #define DT_PPC64_OPDSZ	(DT_LOPROC + 2)
5de29b
+#define DT_PPC64_OPT	(DT_LOPROC + 3)
5de29b
 #define DT_PPC64_NUM    3
5de29b
 
5de29b
+/* PowerPC64 specific values for the DT_PPC64_OPT Dyn entry.  */
5de29b
+#define PPC64_OPT_TLS		1
5de29b
+#define PPC64_OPT_MULTI_TOC	2
5de29b
+
5de29b
+/* PowerPC64 specific values for the Elf64_Sym st_other field.  */
5de29b
+#define STO_PPC64_LOCAL_BIT	5
5de29b
+#define STO_PPC64_LOCAL_MASK	(7 << STO_PPC64_LOCAL_BIT)
5de29b
+#define PPC64_LOCAL_ENTRY_OFFSET(other)				\
5de29b
+ (((1 << (((other) & STO_PPC64_LOCAL_MASK) >> STO_PPC64_LOCAL_BIT)) >> 2) << 2)
5de29b
+
5de29b
 
5de29b
 /* ARM specific declarations */
5de29b
 
12745e
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/dl-machine.h glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/dl-machine.h
12745e
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/dl-machine.h	2014-05-29 14:08:40.000000000 -0500
12745e
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/dl-machine.h	2014-05-29 14:08:44.000000000 -0500
5de29b
@@ -425,6 +425,42 @@
5de29b
   return lazy;
5de29b
 }
5de29b
 
5de29b
+#if _CALL_ELF == 2
5de29b
+/* If the PLT entry whose reloc is 'reloc' resolves to a function in
5de29b
+   the same object, return the target function's local entry point
5de29b
+   offset if usable.  */
5de29b
+static inline Elf64_Addr __attribute__ ((always_inline))
5de29b
+ppc64_local_entry_offset (struct link_map *map, lookup_t sym_map,
5de29b
+			  const Elf64_Rela *reloc)
5de29b
+{
5de29b
+  const Elf64_Sym *symtab;
5de29b
+  const Elf64_Sym *sym;
5de29b
+
5de29b
+  /* If the target function is in a different object, we cannot
5de29b
+     use the local entry point.  */
5de29b
+  if (sym_map != map)
5de29b
+    return 0;
5de29b
+
5de29b
+  /* If the linker inserted multiple TOCs, we cannot use the
5de29b
+     local entry point.  */
5de29b
+  if (map->l_info[DT_PPC64(OPT)]
5de29b
+      && (map->l_info[DT_PPC64(OPT)]->d_un.d_val & PPC64_OPT_MULTI_TOC))
5de29b
+    return 0;
5de29b
+
5de29b
+  /* Otherwise, we can use the local entry point.  Retrieve its offset
5de29b
+     from the symbol's ELF st_other field.  */
5de29b
+  symtab = (const void *) D_PTR (map, l_info[DT_SYMTAB]);
5de29b
+  sym = &symtab[ELFW(R_SYM) (reloc->r_info)];
5de29b
+
5de29b
+  /* If the target function is an ifunc then the local entry offset is
5de29b
+     for the resolver, not the final destination.  */
5de29b
+  if (__builtin_expect (ELFW(ST_TYPE) (sym->st_info) == STT_GNU_IFUNC, 0))
5de29b
+    return 0;
5de29b
+
5de29b
+  return PPC64_LOCAL_ENTRY_OFFSET (sym->st_other);
5de29b
+}
5de29b
+#endif
5de29b
+
5de29b
 /* Change the PLT entry whose reloc is 'reloc' to call the actual
5de29b
    routine.  */
5de29b
 static inline Elf64_Addr __attribute__ ((always_inline))
5de29b
@@ -471,6 +507,7 @@
5de29b
   PPC_DCBST (&plt->fd_func);
5de29b
   PPC_ISYNC;
5de29b
 #else
5de29b
+  finaladdr += ppc64_local_entry_offset (map, sym_map, reloc);
5de29b
   *reloc_addr = finaladdr;
5de29b
 #endif
5de29b
 
5de29b
@@ -478,7 +515,9 @@
5de29b
 }
5de29b
 
5de29b
 static inline void __attribute__ ((always_inline))
5de29b
-elf_machine_plt_conflict (Elf64_Addr *reloc_addr, Elf64_Addr finaladdr)
5de29b
+elf_machine_plt_conflict (struct link_map *map, lookup_t sym_map,
5de29b
+			  const Elf64_Rela *reloc,
5de29b
+			  Elf64_Addr *reloc_addr, Elf64_Addr finaladdr)
5de29b
 {
5de29b
 #if _CALL_ELF != 2
5de29b
   Elf64_FuncDesc *plt = (Elf64_FuncDesc *) reloc_addr;
5de29b
@@ -492,6 +531,7 @@
5de29b
   PPC_DCBST (&plt->fd_toc);
5de29b
   PPC_SYNC;
5de29b
 #else
5de29b
+  finaladdr += ppc64_local_entry_offset (map, sym_map, reloc);
5de29b
   *reloc_addr = finaladdr;
5de29b
 #endif
5de29b
 }
5de29b
@@ -641,7 +681,7 @@
5de29b
       /* Fall thru */
5de29b
     case R_PPC64_JMP_SLOT:
5de29b
 #ifdef RESOLVE_CONFLICT_FIND_MAP
5de29b
-      elf_machine_plt_conflict (reloc_addr, value);
5de29b
+      elf_machine_plt_conflict (map, sym_map, reloc, reloc_addr, value);
5de29b
 #else
5de29b
       elf_machine_fixup_plt (map, sym_map, reloc, reloc_addr, value);
5de29b
 #endif
12745e
diff -urN glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/dl-trampoline.S glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/dl-trampoline.S
12745e
--- glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/dl-trampoline.S	2014-05-29 14:08:40.000000000 -0500
12745e
+++ glibc-2.17-c758a686/sysdeps/powerpc/powerpc64/dl-trampoline.S	2014-05-29 14:08:44.000000000 -0500
5de29b
@@ -74,6 +74,10 @@
5de29b
 /* Prepare for calling the function returned by fixup.  */
5de29b
 	PPC64_LOAD_FUNCPTR r3
5de29b
 	ld	r3,INT_PARMS+0(r1)
5de29b
+#if _CALL_ELF == 2
5de29b
+/* Restore the caller's TOC in case we jump to a local entry point.  */
5de29b
+	ld	r2,FRAME_SIZE+40(r1)
5de29b
+#endif
5de29b
 /* Unwind the stack frame, and jump.  */
5de29b
 	addi	r1,r1,FRAME_SIZE
5de29b
 	bctr
5de29b
@@ -321,6 +325,10 @@
5de29b
 /* Prepare for calling the function returned by fixup.  */
5de29b
 	PPC64_LOAD_FUNCPTR r3
5de29b
 	ld	r3,INT_PARMS+0(r1)
5de29b
+#if _CALL_ELF == 2
5de29b
+/* Restore the caller's TOC in case we jump to a local entry point.  */
5de29b
+	ld	r2,FRAME_SIZE+40(r1)
5de29b
+#endif
5de29b
 /* Load the floating point registers.  */
5de29b
 	lfd	fp1,FPR_PARMS+0(r1)
5de29b
 	lfd	fp2,FPR_PARMS+8(r1)