bca718
commit 5cdd1989d1d2f135d02e66250f37ba8e767f9772
bca718
Author: Stefan Liebler <stli@linux.vnet.ibm.com>
bca718
Date:   Thu Mar 31 17:37:16 2016 +0200
bca718
bca718
    S390: Extend structs La_s390_regs / La_s390_retval with vector-registers.
bca718
    
bca718
    Starting with z13, vector registers can also occur as argument registers.
bca718
    Thus the passed input/output register structs for
bca718
    la_s390_[32|64]_gnu_plt[enter|exit] functions should reflect those new
bca718
    registers. This patch extends these structs La_s390_regs and La_s390_retval
bca718
    and adjusts _dl_runtime_profile() to handle those fields in case of
bca718
    running on a z13 machine.
bca718
    
bca718
    ChangeLog:
bca718
    
bca718
    	* sysdeps/s390/bits/link.h: (La_s390_vr) New typedef.
bca718
    	(La_s390_32_regs): Append vector register lr_v24-lr_v31.
bca718
    	(La_s390_64_regs): Likewise.
bca718
    	(La_s390_32_retval): Append vector register lrv_v24.
bca718
    	(La_s390_64_retval): Likeweise.
bca718
    	* sysdeps/s390/s390-32/dl-trampoline.h (_dl_runtime_profile):
bca718
    	Handle extended structs La_s390_32_regs and La_s390_32_retval.
bca718
    	* sysdeps/s390/s390-64/dl-trampoline.h (_dl_runtime_profile):
bca718
    	Handle extended structs La_s390_64_regs and La_s390_64_retval.
bca718
bca718
diff --git a/sysdeps/s390/bits/link.h b/sysdeps/s390/bits/link.h
bca718
index 2ef7f44..e27ed67 100644
bca718
--- a/sysdeps/s390/bits/link.h
bca718
+++ b/sysdeps/s390/bits/link.h
bca718
@@ -19,6 +19,9 @@
bca718
 # error "Never include <bits/link.h> directly; use <link.h> instead."
bca718
 #endif
bca718
 
bca718
+#if defined HAVE_S390_VX_ASM_SUPPORT
bca718
+typedef char La_s390_vr[16];
bca718
+#endif
bca718
 
bca718
 #if __ELF_NATIVE_CLASS == 32
bca718
 
bca718
@@ -32,6 +35,16 @@ typedef struct La_s390_32_regs
bca718
   uint32_t lr_r6;
bca718
   double lr_fp0;
bca718
   double lr_fp2;
bca718
+# if defined HAVE_S390_VX_ASM_SUPPORT
bca718
+  La_s390_vr lr_v24;
bca718
+  La_s390_vr lr_v25;
bca718
+  La_s390_vr lr_v26;
bca718
+  La_s390_vr lr_v27;
bca718
+  La_s390_vr lr_v28;
bca718
+  La_s390_vr lr_v29;
bca718
+  La_s390_vr lr_v30;
bca718
+  La_s390_vr lr_v31;
bca718
+# endif
bca718
 } La_s390_32_regs;
bca718
 
bca718
 /* Return values for calls from PLT on s390-32.  */
bca718
@@ -40,6 +53,9 @@ typedef struct La_s390_32_retval
bca718
   uint32_t lrv_r2;
bca718
   uint32_t lrv_r3;
bca718
   double lrv_fp0;
bca718
+# if defined HAVE_S390_VX_ASM_SUPPORT
bca718
+  La_s390_vr lrv_v24;
bca718
+# endif
bca718
 } La_s390_32_retval;
bca718
 
bca718
 
bca718
@@ -77,6 +93,16 @@ typedef struct La_s390_64_regs
bca718
   double lr_fp2;
bca718
   double lr_fp4;
bca718
   double lr_fp6;
bca718
+# if defined HAVE_S390_VX_ASM_SUPPORT
bca718
+  La_s390_vr lr_v24;
bca718
+  La_s390_vr lr_v25;
bca718
+  La_s390_vr lr_v26;
bca718
+  La_s390_vr lr_v27;
bca718
+  La_s390_vr lr_v28;
bca718
+  La_s390_vr lr_v29;
bca718
+  La_s390_vr lr_v30;
bca718
+  La_s390_vr lr_v31;
bca718
+# endif
bca718
 } La_s390_64_regs;
bca718
 
bca718
 /* Return values for calls from PLT on s390-64.  */
bca718
@@ -84,6 +110,9 @@ typedef struct La_s390_64_retval
bca718
 {
bca718
   uint64_t lrv_r2;
bca718
   double lrv_fp0;
bca718
+# if defined HAVE_S390_VX_ASM_SUPPORT
bca718
+  La_s390_vr lrv_v24;
bca718
+# endif
bca718
 } La_s390_64_retval;
bca718
 
bca718
 
bca718
diff --git a/sysdeps/s390/s390-32/dl-trampoline.h b/sysdeps/s390/s390-32/dl-trampoline.h
bca718
index a152a7b..bb74d27 100644
bca718
--- a/sysdeps/s390/s390-32/dl-trampoline.h
bca718
+++ b/sysdeps/s390/s390-32/dl-trampoline.h
bca718
@@ -112,28 +112,31 @@ _dl_runtime_resolve:
bca718
 	cfi_startproc
bca718
 	.align 16
bca718
 _dl_runtime_profile:
bca718
-	stm    %r2,%r6,32(%r15)		# save registers
bca718
-	cfi_offset (r2, -64)		# + r6 needed as arg for
bca718
-	cfi_offset (r3, -60)		#  _dl_profile_fixup
bca718
-	cfi_offset (r4, -56)
bca718
-	cfi_offset (r5, -52)
bca718
-	cfi_offset (r6, -48)
bca718
-	std    %f0,56(%r15)
bca718
-	cfi_offset (f0, -40)
bca718
-	std    %f2,64(%r15)
bca718
-	cfi_offset (f2, -32)
bca718
 	st     %r12,12(%r15)		# r12 is used as backup of r15
bca718
 	cfi_offset (r12, -84)
bca718
 	st     %r14,16(%r15)
bca718
 	cfi_offset (r14, -80)
bca718
 	lr     %r12,%r15		# backup stack pointer
bca718
 	cfi_def_cfa_register (12)
bca718
+	ahi    %r15,-264		# create stack frame:
bca718
+					# 96 + sizeof(La_s390_32_regs)
bca718
+	st     %r12,0(%r15)		# save backchain
bca718
+
bca718
+	stm    %r2,%r6,96(%r15)		# save registers
bca718
+	cfi_offset (r2, -264)		# + r6 needed as arg for
bca718
+	cfi_offset (r3, -260)		#  _dl_profile_fixup
bca718
+	cfi_offset (r4, -256)
bca718
+	cfi_offset (r5, -252)
bca718
+	cfi_offset (r6, -248)
bca718
+	std    %f0,120(%r15)
bca718
+	cfi_offset (f0, -240)
bca718
+	std    %f2,128(%r15)
bca718
+	cfi_offset (f2, -232)
bca718
 #ifdef RESTORE_VRS
bca718
-	ahi    %r15,-224		# create stack frame
bca718
 	.machine push
bca718
 	.machine "z13"
bca718
 	.machinemode "zarch_nohighgprs"
bca718
-	vstm   %v24,%v31,96(%r15)	# store call-clobbered vr arguments
bca718
+	vstm   %v24,%v31,136(%r15)	# store call-clobbered vr arguments
bca718
 	cfi_offset (v24, -224)
bca718
 	cfi_offset (v25, -208)
bca718
 	cfi_offset (v26, -192)
bca718
@@ -143,31 +146,31 @@ _dl_runtime_profile:
bca718
 	cfi_offset (v30, -128)
bca718
 	cfi_offset (v31, -112)
bca718
 	.machine pop
bca718
-#else
bca718
-	ahi    %r15,-96			# create stack frame
bca718
 #endif
bca718
-	st     %r12,0(%r15)		# save backchain
bca718
+
bca718
 	lm     %r2,%r3,24(%r12)		# load arguments saved by PLT
bca718
 	lr     %r4,%r14			# return address as third parameter
bca718
 	basr   %r1,0
bca718
 0:	l      %r14,6f-0b(%r1)
bca718
-	la     %r5,32(%r12)		# pointer to struct La_s390_32_regs
bca718
+	la     %r5,96(%r15)		# pointer to struct La_s390_32_regs
bca718
 	la     %r6,20(%r12)		# long int * framesize
bca718
 	bas    %r14,0(%r14,%r1)		# call resolver
bca718
 	lr     %r1,%r2			# function addr returned in r2
bca718
-	ld     %f0,56(%r12)		# restore call-clobbered arg fprs
bca718
-	ld     %f2,64(%r12)
bca718
+	ld     %f0,120(%r15)		# restore call-clobbered arg fprs
bca718
+	ld     %f2,128(%r15)
bca718
 #ifdef RESTORE_VRS
bca718
 	.machine push
bca718
 	.machine "z13"
bca718
 	.machinemode "zarch_nohighgprs"
bca718
-	vlm    %v24,%v31,96(%r15)	# restore call-clobbered arg vrs
bca718
+	vlm    %v24,%v31,136(%r15)	# restore call-clobbered arg vrs
bca718
 	.machine pop
bca718
 #endif
bca718
 	icm    %r0,15,20(%r12)		# load & test framesize
bca718
 	jnm    2f
bca718
 
bca718
-	lm     %r2,%r6,32(%r12)
bca718
+	lm     %r2,%r6,96(%r15)		# framesize < 0 means no pltexit call
bca718
+					# so we can do a tail call without
bca718
+					# copying the arg overflow area
bca718
 	lr     %r15,%r12		# remove stack frame
bca718
 	cfi_def_cfa_register (15)
bca718
 	l      %r14,16(%r15)		# restore registers
bca718
@@ -175,7 +178,9 @@ _dl_runtime_profile:
bca718
 	br     %r1			# tail-call to the resolved function
bca718
 
bca718
 	cfi_def_cfa_register (12)
bca718
-2:	jz     4f			# framesize == 0 ?
bca718
+2:	la     %r4,96(%r15)		# pointer to struct La_s390_32_regs
bca718
+	st     %r4,32(%r12)
bca718
+	jz     4f			# framesize == 0 ?
bca718
 	ahi    %r0,7			# align framesize to 8
bca718
 	lhi    %r2,-8
bca718
 	nr     %r0,%r2
bca718
@@ -188,24 +193,35 @@ _dl_runtime_profile:
bca718
 	la     %r2,8(%r2)
bca718
 	la     %r3,8(%r3)
bca718
 	brct   %r0,3b
bca718
-4:	lm     %r2,%r6,32(%r12)		# load register parameters
bca718
+4:	lm     %r2,%r6,0(%r4)		# load register parameters
bca718
 	basr   %r14,%r1			# call resolved function
bca718
-	stm    %r2,%r3,72(%r12)		# store return values r2, r3, f0
bca718
-	std    %f0,80(%r12)		# to struct La_s390_32_retval
bca718
-	lm     %r2,%r3,24(%r12)		# load arguments saved by PLT
bca718
+	stm    %r2,%r3,40(%r12)		# store return values r2, r3, f0
bca718
+	std    %f0,48(%r12)		# to struct La_s390_32_retval
bca718
+#ifdef RESTORE_VRS
bca718
+	.machine push
bca718
+	.machine "z13"
bca718
+	vst    %v24,56(%r12)		# store return value v24
bca718
+	.machine pop
bca718
+#endif
bca718
+	lm     %r2,%r4,24(%r12)		# r2, r3: load arguments saved by PLT
bca718
+					# r4: pointer to struct La_s390_32_regs
bca718
 	basr   %r1,0
bca718
 5:	l      %r14,7f-5b(%r1)
bca718
-	la     %r4,32(%r12)		# pointer to struct La_s390_32_regs
bca718
-	la     %r5,72(%r12)		# pointer to struct La_s390_32_retval
bca718
+	la     %r5,40(%r12)		# pointer to struct La_s390_32_retval
bca718
 	bas    %r14,0(%r14,%r1)		# call _dl_call_pltexit
bca718
 
bca718
 	lr     %r15,%r12		# remove stack frame
bca718
 	cfi_def_cfa_register (15)
bca718
 	l      %r14,16(%r15)		# restore registers
bca718
 	l      %r12,12(%r15)
bca718
-	l      %r2,72(%r15)		# restore return values
bca718
-	l      %r3,76(%r15)
bca718
-	ld     %f0,80(%r15)
bca718
+	lm     %r2,%r3,40(%r15)		# restore return values
bca718
+	ld     %f0,48(%r15)
bca718
+#ifdef RESTORE_VRS
bca718
+	.machine push
bca718
+	.machine "z13"
bca718
+	vl    %v24,56(%r15)		# restore return value v24
bca718
+	.machine pop
bca718
+#endif
bca718
 	br     %r14
bca718
 
bca718
 6:	.long  _dl_profile_fixup - 0b
bca718
diff --git a/sysdeps/s390/s390-64/dl-trampoline.h b/sysdeps/s390/s390-64/dl-trampoline.h
bca718
index 658e3a3..33ea3de 100644
bca718
--- a/sysdeps/s390/s390-64/dl-trampoline.h
bca718
+++ b/sysdeps/s390/s390-64/dl-trampoline.h
bca718
@@ -109,31 +109,34 @@ _dl_runtime_resolve:
bca718
 	cfi_startproc
bca718
 	.align 16
bca718
 _dl_runtime_profile:
bca718
-	stmg   %r2,%r6,64(%r15)		# save call-clobbered arg regs
bca718
-	cfi_offset (r2, -96)		# + r6 needed as arg for
bca718
-	cfi_offset (r3, -88)		#  _dl_profile_fixup
bca718
-	cfi_offset (r4, -80)
bca718
-	cfi_offset (r5, -72)
bca718
-	cfi_offset (r6, -64)
bca718
-	std    %f0,104(%r15)
bca718
-	cfi_offset (f0, -56)
bca718
-	std    %f2,112(%r15)
bca718
-	cfi_offset (f2, -48)
bca718
-	std    %f4,120(%r15)
bca718
-	cfi_offset (f4, -40)
bca718
-	std    %f6,128(%r15)
bca718
-	cfi_offset (f6, -32)
bca718
 	stg    %r12,24(%r15)		# r12 is used as backup of r15
bca718
 	cfi_offset (r12, -136)
bca718
 	stg    %r14,32(%r15)
bca718
 	cfi_offset (r14, -128)
bca718
 	lgr    %r12,%r15		# backup stack pointer
bca718
 	cfi_def_cfa_register (12)
bca718
+	aghi   %r15,-360		# create stack frame:
bca718
+					# 160 + sizeof(La_s390_64_regs)
bca718
+	stg    %r12,0(%r15)		# save backchain
bca718
+
bca718
+	stmg   %r2,%r6,160(%r15)	# save call-clobbered arg regs
bca718
+	cfi_offset (r2, -360)		# + r6 needed as arg for
bca718
+	cfi_offset (r3, -352)		#  _dl_profile_fixup
bca718
+	cfi_offset (r4, -344)
bca718
+	cfi_offset (r5, -336)
bca718
+	cfi_offset (r6, -328)
bca718
+	std    %f0,200(%r15)
bca718
+	cfi_offset (f0, -320)
bca718
+	std    %f2,208(%r15)
bca718
+	cfi_offset (f2, -312)
bca718
+	std    %f4,216(%r15)
bca718
+	cfi_offset (f4, -304)
bca718
+	std    %f6,224(%r15)
bca718
+	cfi_offset (f6, -296)
bca718
 #ifdef RESTORE_VRS
bca718
-	aghi   %r15,-288		# create stack frame
bca718
 	.machine push
bca718
 	.machine "z13"
bca718
-	vstm   %v24,%v31,160(%r15)# store call-clobbered vector argument registers
bca718
+	vstm   %v24,%v31,232(%r15)      # store call-clobbered vector arguments
bca718
 	cfi_offset (v24, -288)
bca718
 	cfi_offset (v25, -272)
bca718
 	cfi_offset (v26, -256)
bca718
@@ -143,31 +146,28 @@ _dl_runtime_profile:
bca718
 	cfi_offset (v30, -192)
bca718
 	cfi_offset (v31, -176)
bca718
 	.machine pop
bca718
-#else
bca718
-	aghi   %r15,-160		# create stack frame
bca718
 #endif
bca718
-	stg    %r12,0(%r15)		# save backchain
bca718
 	lmg    %r2,%r3,48(%r12)		# load arguments saved by PLT
bca718
 	lgr    %r4,%r14			# return address as third parameter
bca718
-	la     %r5,64(%r12)		# pointer to struct La_s390_64_regs
bca718
+	la     %r5,160(%r15)		# pointer to struct La_s390_64_regs
bca718
 	la     %r6,40(%r12)		# long int * framesize
bca718
 	brasl  %r14,_dl_profile_fixup	# call resolver
bca718
 	lgr    %r1,%r2			# function addr returned in r2
bca718
-	ld     %f0,104(%r12)		# restore call-clobbered arg fprs
bca718
-	ld     %f2,112(%r12)
bca718
-	ld     %f4,120(%r12)
bca718
-	ld     %f6,128(%r12)
bca718
+	ld     %f0,200(%r15)		# restore call-clobbered arg fprs
bca718
+	ld     %f2,208(%r15)
bca718
+	ld     %f4,216(%r15)
bca718
+	ld     %f6,224(%r15)
bca718
 #ifdef RESTORE_VRS
bca718
 	.machine push
bca718
 	.machine "z13"
bca718
-	vlm    %v24,%v31,160(%r15)	# restore call-clobbered arg vrs
bca718
+	vlm    %v24,%v31,232(%r15)	# restore call-clobbered arg vrs
bca718
 	.machine pop
bca718
 #endif
bca718
 	lg     %r0,40(%r12)		# load framesize
bca718
 	ltgr   %r0,%r0
bca718
 	jnm    1f
bca718
 
bca718
-	lmg    %r2,%r6,64(%r12)		# framesize < 0 means no pltexit call
bca718
+	lmg    %r2,%r6,160(%r15)	# framesize < 0 means no pltexit call
bca718
 					# so we can do a tail call without
bca718
 					# copying the arg overflow area
bca718
 	lgr    %r15,%r12		# remove stack frame
bca718
@@ -177,7 +177,9 @@ _dl_runtime_profile:
bca718
 	br     %r1			# tail-call to resolved function
bca718
 
bca718
 	cfi_def_cfa_register (12)
bca718
-1:	jz     4f			# framesize == 0 ?
bca718
+1:	la     %r4,160(%r15)		# pointer to struct La_s390_64_regs
bca718
+	stg    %r4,64(%r12)
bca718
+	jz     4f			# framesize == 0 ?
bca718
 	aghi   %r0,7			# align framesize to 8
bca718
 	nill   %r0,0xfff8
bca718
 	slgr   %r15,%r0			# make room for framesize bytes
bca718
@@ -189,21 +191,33 @@ _dl_runtime_profile:
bca718
 	la     %r2,8(%r2)		# depending on framesize
bca718
 	la     %r3,8(%r3)
bca718
 	brctg  %r0,3b
bca718
-4:	lmg    %r2,%r6,64(%r12)		# restore call-clobbered arg gprs
bca718
+4:	lmg    %r2,%r6,0(%r4)		# restore call-clobbered arg gprs
bca718
 	basr   %r14,%r1			# call resolved function
bca718
-	stg    %r2,136(%r12)		# store return values r2, f0
bca718
-	std    %f0,144(%r12)		# to struct La_s390_64_retval
bca718
-	lmg    %r2,%r3,48(%r12)		# load arguments saved by PLT
bca718
-	la     %r4,64(%r12)		# pointer to struct La_s390_64_regs
bca718
-	la     %r5,136(%r12)		# pointer to struct La_s390_64_retval
bca718
+	stg    %r2,72(%r12)		# store return values r2, f0
bca718
+	std    %f0,80(%r12)		# to struct La_s390_64_retval
bca718
+#ifdef RESTORE_VRS
bca718
+	.machine push
bca718
+	.machine "z13"
bca718
+	vst    %v24,88(%r12)		# store return value v24
bca718
+	.machine pop
bca718
+#endif
bca718
+	lmg    %r2,%r4,48(%r12)		# r2, r3: load arguments saved by PLT
bca718
+					# r4: pointer to struct La_s390_64_regs
bca718
+	la     %r5,72(%r12)		# pointer to struct La_s390_64_retval
bca718
 	brasl  %r14,_dl_call_pltexit
bca718
 
bca718
 	lgr    %r15,%r12		# remove stack frame
bca718
 	cfi_def_cfa_register (15)
bca718
 	lg     %r14,32(%r15)		# restore registers
bca718
 	lg     %r12,24(%r15)
bca718
-	lg     %r2,136(%r15)		# restore return values
bca718
-	ld     %f0,144(%r15)
bca718
+	lg     %r2,72(%r15)		# restore return values
bca718
+	ld     %f0,80(%r15)
bca718
+#ifdef RESTORE_VRS
bca718
+	.machine push
bca718
+	.machine "z13"
bca718
+	vl    %v24,88(%r15)		# restore return value v24
bca718
+	.machine pop
bca718
+#endif
bca718
 	br     %r14			# Jump back to caller
bca718
 
bca718
 	cfi_endproc