08c3a6
commit f610d2935f041c5f41ddcb96924ea42ca2fb5ea5
08c3a6
Author: John David Anglin <danglin@gcc.gnu.org>
08c3a6
Date:   Tue Feb 22 17:28:46 2022 +0000
08c3a6
08c3a6
    hppa: Revise gettext trampoline design
08c3a6
    
08c3a6
    The current getcontext return trampoline is overly complex and it
08c3a6
    unnecessarily clobbers several registers.  By saving the context
08c3a6
    pointer (r26) in the context, __getcontext_ret can restore any
08c3a6
    registers not restored by setcontext.  This allows getcontext to
08c3a6
    save and restore the entire register context present when getcontext
08c3a6
    is entered.  We use the unused oR0 context slot for the return
08c3a6
    from __getcontext_ret.
08c3a6
    
08c3a6
    While this is not directly useful in C, it can be exploited in
08c3a6
    assembly code.  Registers r20, r23, r24 and r25 are not clobbered
08c3a6
    in the call path to getcontext.  This allows a small simplification
08c3a6
    of swapcontext.
08c3a6
    
08c3a6
    It also allows saving and restoring the 6-bit SAR register in the
08c3a6
    LSB of the oSAR context slot.  The getcontext flag value can be
08c3a6
    stored in the MSB of the oSAR slot.
08c3a6
    
08c3a6
    (cherry picked from commit 9e7e5fda38471e00d1190479ea91d7b08ae3e304)
08c3a6
08c3a6
diff --git a/sysdeps/unix/sysv/linux/hppa/getcontext.S b/sysdeps/unix/sysv/linux/hppa/getcontext.S
08c3a6
index c8b690aab8ecc47c..4f2e2587d60effc8 100644
08c3a6
--- a/sysdeps/unix/sysv/linux/hppa/getcontext.S
08c3a6
+++ b/sysdeps/unix/sysv/linux/hppa/getcontext.S
08c3a6
@@ -22,22 +22,28 @@
08c3a6
 #include "ucontext_i.h"
08c3a6
 
08c3a6
 
08c3a6
-	/* Trampoline function. Non-standard calling ABI.  */
08c3a6
+	/* Trampoline function.  Non-standard calling ABI.  */
08c3a6
 	/* Can not use ENTRY(__getcontext_ret) here.  */
08c3a6
 	.type	__getcontext_ret, @function
08c3a6
 	.hidden	__getcontext_ret
08c3a6
 __getcontext_ret:
08c3a6
 	.proc
08c3a6
 	.callinfo FRAME=0,NO_CALLS
08c3a6
-	/* r26-r23 contain original r3-r6, but because setcontext
08c3a6
-	   does not reload r3-r6 (it's using them as temporaries)
08c3a6
-	   we must save them elsewhere and swap them back in.  */
08c3a6
-	copy	%r23, %r3
08c3a6
-	copy	%r24, %r4
08c3a6
-	copy	%r25, %r5
08c3a6
-	copy	%r26, %r6
08c3a6
-	/* r20 contains original return pointer.  */
08c3a6
-	bv	0(%r20)
08c3a6
+	/* Because setcontext does not reload r3-r6 (it's using them
08c3a6
+	   as temporaries), we must load them ourself.  */
08c3a6
+	ldw	oR3(%r26), %r3
08c3a6
+	ldw	oR4(%r26), %r4
08c3a6
+	ldw	oR5(%r26), %r5
08c3a6
+	ldw	oR6(%r26), %r6
08c3a6
+
08c3a6
+	/* Also reload registers clobbered by $$dyncall.  */
08c3a6
+	ldw	oR21(%r26), %r21
08c3a6
+	ldw	oR22(%r26), %r22
08c3a6
+	ldw	oR31(%r26), %r31
08c3a6
+
08c3a6
+	/* oR0 contains original return pointer.  */
08c3a6
+	ldw	oR0(%r26), %rp
08c3a6
+	bv	0(%rp)
08c3a6
 	copy	%r0, %ret0
08c3a6
 	.procend
08c3a6
 	.size	__getcontext_ret, .-__getcontext_ret
08c3a6
@@ -65,13 +71,13 @@ ENTRY(__getcontext)
08c3a6
 	stw	%r17, oR17(%r26)
08c3a6
 	stw	%r18, oR18(%r26)
08c3a6
 	stw	%r19, oR19(%r26)
08c3a6
-	/* stw	%r20, oR20(%r26) - used for trampoline.  */
08c3a6
+	stw	%r20, oR20(%r26)
08c3a6
 	stw	%r21, oR21(%r26)
08c3a6
 	stw	%r22, oR22(%r26)
08c3a6
-	/* stw	%r23, oR23(%r26) - used for trampoline.  */
08c3a6
-	/* stw	%r24, oR24(%r26) - used for trampoline.  */
08c3a6
-	/* stw	%r25, oR25(%r26) - used for trampoline.  */
08c3a6
-	/* stw	%r26, oR26(%r26) - used for trampoline.  */
08c3a6
+	stw	%r23, oR23(%r26)
08c3a6
+	stw	%r24, oR24(%r26)
08c3a6
+	stw	%r25, oR25(%r26)
08c3a6
+	stw	%r26, oR26(%r26)
08c3a6
 	stw	%r27, oR27(%r26)
08c3a6
 	stw	%r28, oR28(%r26)
08c3a6
 	stw	%r29, oR29(%r26)
08c3a6
@@ -90,7 +96,10 @@ ENTRY(__getcontext)
08c3a6
 	stw	%r0, oIASQ1(%r26)
08c3a6
 	stw	%r0, oIAOQ0(%r26)
08c3a6
 	stw	%r0, oIAOQ1(%r26)
08c3a6
-	stw	%r0, oSAR(%r26) /* used as flag in swapcontext().  */
08c3a6
+
08c3a6
+	/* Save SAR register.  */
08c3a6
+	mfctl	%sar, %r1
08c3a6
+	stw	%r1, oSAR(%r26) /* MSB used as flag in swapcontext().  */
08c3a6
 
08c3a6
 
08c3a6
 	/* Store floating-point regs.  */
08c3a6
@@ -142,13 +151,8 @@ ENTRY(__getcontext)
08c3a6
 	.cfi_offset 29, 4
08c3a6
 
08c3a6
 	/* Set up the trampoline registers.
08c3a6
-	   r20, r23, r24, r25, r26 and r2 are clobbered
08c3a6
-	   by call to getcontext() anyway. Reuse them.  */
08c3a6
-	stw	%r2, oR20(%r26)
08c3a6
-	stw	%r3, oR23(%r26)
08c3a6
-	stw	%r4, oR24(%r26)
08c3a6
-	stw	%r5, oR25(%r26)
08c3a6
-	stw	%r6, oR26(%r26)
08c3a6
+	   Use oR0 context slot to save return value.  */
08c3a6
+	stw	%r2, oR0(%r26)
08c3a6
 #ifdef PIC
08c3a6
 	addil	LT%__getcontext_ret, %r19
08c3a6
 	ldw     RT%__getcontext_ret(%r1), %r1
08c3a6
diff --git a/sysdeps/unix/sysv/linux/hppa/setcontext.S b/sysdeps/unix/sysv/linux/hppa/setcontext.S
08c3a6
index e1ae3aefcaac198d..616405b80c61d531 100644
08c3a6
--- a/sysdeps/unix/sysv/linux/hppa/setcontext.S
08c3a6
+++ b/sysdeps/unix/sysv/linux/hppa/setcontext.S
08c3a6
@@ -76,7 +76,7 @@ ENTRY(__setcontext)
08c3a6
 	ldw	oR18(%r3), %r18
08c3a6
 	ldw	oR19(%r3), %r19
08c3a6
 	ldw	oR20(%r3), %r20
08c3a6
-	ldw	oR21(%r3), %r21
08c3a6
+	ldw	oR21(%r3), %r21 /* maybe clobbered by dyncall */
08c3a6
 	/* ldw	oR22(%r3), %r22 - dyncall arg.  */
08c3a6
 	ldw	oR23(%r3), %r23
08c3a6
 	ldw	oR24(%r3), %r24
08c3a6
@@ -88,6 +88,10 @@ ENTRY(__setcontext)
08c3a6
 	ldw	oR30(%r3), %sp
08c3a6
 	/* ldw	oR31(%r3), %r31 - dyncall scratch register */
08c3a6
 
08c3a6
+	/* Restore SAR register.  */
08c3a6
+	ldw	oSAR(%r3), %r22
08c3a6
+	mtsar	%r22
08c3a6
+
08c3a6
 	/* Restore floating-point registers.  */
08c3a6
 	ldo	 oFPREGS31(%r3), %r22
08c3a6
 	fldds	  0(%r22), %fr31
08c3a6
diff --git a/sysdeps/unix/sysv/linux/hppa/swapcontext.c b/sysdeps/unix/sysv/linux/hppa/swapcontext.c
08c3a6
index 562f00ff0546177d..1664f68c7b9982e8 100644
08c3a6
--- a/sysdeps/unix/sysv/linux/hppa/swapcontext.c
08c3a6
+++ b/sysdeps/unix/sysv/linux/hppa/swapcontext.c
08c3a6
@@ -26,10 +26,6 @@ extern int __setcontext (const ucontext_t *ucp);
08c3a6
 int
08c3a6
 __swapcontext (ucontext_t *oucp, const ucontext_t *ucp)
08c3a6
 {
08c3a6
-  /* Save ucp in stack argument slot.  */
08c3a6
-  asm ("stw %r25,-40(%sp)");
08c3a6
-  asm (".cfi_offset 25, -40");
08c3a6
-
08c3a6
   /* Save rp for debugger.  */
08c3a6
   asm ("stw %rp,-20(%sp)");
08c3a6
   asm (".cfi_offset 2, -20");
08c3a6
@@ -60,7 +56,7 @@ __swapcontext (ucontext_t *oucp, const ucontext_t *ucp)
08c3a6
   asm ("bv,n %r0(%rp)");
08c3a6
 
08c3a6
   /* Load sc_sar flag.  */
08c3a6
-  asm ("ldw %0(%%ret1),%%r20" : : "i" (oSAR));
08c3a6
+  asm ("ldb %0(%%ret1),%%r20" : : "i" (oSAR));
08c3a6
 
08c3a6
   /* Return if oucp context has been reactivated.  */
08c3a6
   asm ("or,= %r0,%r20,%r0");
08c3a6
@@ -68,11 +64,11 @@ __swapcontext (ucontext_t *oucp, const ucontext_t *ucp)
08c3a6
 
08c3a6
   /* Mark sc_sar flag.  */
08c3a6
   asm ("1: ldi 1,%r20");
08c3a6
-  asm ("stw %%r20,%0(%%ret1)" : : "i" (oSAR));
08c3a6
+  asm ("stb %%r20,%0(%%ret1)" : : "i" (oSAR));
08c3a6
 
08c3a6
   /* Activate the machine context in ucp.  */
08c3a6
   asm ("bl __setcontext,%rp");
08c3a6
-  asm ("ldw -40(%sp),%r26");
08c3a6
+  asm ("ldw %0(%%ret1),%%r26" : : "i" (oR25));
08c3a6
 
08c3a6
   /* Load return pointer.  */
08c3a6
   asm ("ldw %0(%%ret1),%%rp" : : "i" (oR28));