| commit 5cdd1989d1d2f135d02e66250f37ba8e767f9772 |
| Author: Stefan Liebler <stli@linux.vnet.ibm.com> |
| Date: Thu Mar 31 17:37:16 2016 +0200 |
| |
| S390: Extend structs La_s390_regs / La_s390_retval with vector-registers. |
| |
| Starting with z13, vector registers can also occur as argument registers. |
| Thus the passed input/output register structs for |
| la_s390_[32|64]_gnu_plt[enter|exit] functions should reflect those new |
| registers. This patch extends these structs La_s390_regs and La_s390_retval |
| and adjusts _dl_runtime_profile() to handle those fields in case of |
| running on a z13 machine. |
| |
| ChangeLog: |
| |
| * sysdeps/s390/bits/link.h: (La_s390_vr) New typedef. |
| (La_s390_32_regs): Append vector register lr_v24-lr_v31. |
| (La_s390_64_regs): Likewise. |
| (La_s390_32_retval): Append vector register lrv_v24. |
| (La_s390_64_retval): Likeweise. |
| * sysdeps/s390/s390-32/dl-trampoline.h (_dl_runtime_profile): |
| Handle extended structs La_s390_32_regs and La_s390_32_retval. |
| * sysdeps/s390/s390-64/dl-trampoline.h (_dl_runtime_profile): |
| Handle extended structs La_s390_64_regs and La_s390_64_retval. |
| |
| diff --git a/sysdeps/s390/bits/link.h b/sysdeps/s390/bits/link.h |
| index 2ef7f44..e27ed67 100644 |
| |
| |
| @@ -19,6 +19,9 @@ |
| # error "Never include <bits/link.h> directly; use <link.h> instead." |
| #endif |
| |
| +#if defined HAVE_S390_VX_ASM_SUPPORT |
| +typedef char La_s390_vr[16]; |
| +#endif |
| |
| #if __ELF_NATIVE_CLASS == 32 |
| |
| @@ -32,6 +35,16 @@ typedef struct La_s390_32_regs |
| uint32_t lr_r6; |
| double lr_fp0; |
| double lr_fp2; |
| +# if defined HAVE_S390_VX_ASM_SUPPORT |
| + La_s390_vr lr_v24; |
| + La_s390_vr lr_v25; |
| + La_s390_vr lr_v26; |
| + La_s390_vr lr_v27; |
| + La_s390_vr lr_v28; |
| + La_s390_vr lr_v29; |
| + La_s390_vr lr_v30; |
| + La_s390_vr lr_v31; |
| +# endif |
| } La_s390_32_regs; |
| |
| /* Return values for calls from PLT on s390-32. */ |
| @@ -40,6 +53,9 @@ typedef struct La_s390_32_retval |
| uint32_t lrv_r2; |
| uint32_t lrv_r3; |
| double lrv_fp0; |
| +# if defined HAVE_S390_VX_ASM_SUPPORT |
| + La_s390_vr lrv_v24; |
| +# endif |
| } La_s390_32_retval; |
| |
| |
| @@ -77,6 +93,16 @@ typedef struct La_s390_64_regs |
| double lr_fp2; |
| double lr_fp4; |
| double lr_fp6; |
| +# if defined HAVE_S390_VX_ASM_SUPPORT |
| + La_s390_vr lr_v24; |
| + La_s390_vr lr_v25; |
| + La_s390_vr lr_v26; |
| + La_s390_vr lr_v27; |
| + La_s390_vr lr_v28; |
| + La_s390_vr lr_v29; |
| + La_s390_vr lr_v30; |
| + La_s390_vr lr_v31; |
| +# endif |
| } La_s390_64_regs; |
| |
| /* Return values for calls from PLT on s390-64. */ |
| @@ -84,6 +110,9 @@ typedef struct La_s390_64_retval |
| { |
| uint64_t lrv_r2; |
| double lrv_fp0; |
| +# if defined HAVE_S390_VX_ASM_SUPPORT |
| + La_s390_vr lrv_v24; |
| +# endif |
| } La_s390_64_retval; |
| |
| |
| diff --git a/sysdeps/s390/s390-32/dl-trampoline.h b/sysdeps/s390/s390-32/dl-trampoline.h |
| index a152a7b..bb74d27 100644 |
| |
| |
| @@ -112,28 +112,31 @@ _dl_runtime_resolve: |
| cfi_startproc |
| .align 16 |
| _dl_runtime_profile: |
| - stm %r2,%r6,32(%r15) # save registers |
| - cfi_offset (r2, -64) # + r6 needed as arg for |
| - cfi_offset (r3, -60) # _dl_profile_fixup |
| - cfi_offset (r4, -56) |
| - cfi_offset (r5, -52) |
| - cfi_offset (r6, -48) |
| - std %f0,56(%r15) |
| - cfi_offset (f0, -40) |
| - std %f2,64(%r15) |
| - cfi_offset (f2, -32) |
| st %r12,12(%r15) # r12 is used as backup of r15 |
| cfi_offset (r12, -84) |
| st %r14,16(%r15) |
| cfi_offset (r14, -80) |
| lr %r12,%r15 # backup stack pointer |
| cfi_def_cfa_register (12) |
| + ahi %r15,-264 # create stack frame: |
| + # 96 + sizeof(La_s390_32_regs) |
| + st %r12,0(%r15) # save backchain |
| + |
| + stm %r2,%r6,96(%r15) # save registers |
| + cfi_offset (r2, -264) # + r6 needed as arg for |
| + cfi_offset (r3, -260) # _dl_profile_fixup |
| + cfi_offset (r4, -256) |
| + cfi_offset (r5, -252) |
| + cfi_offset (r6, -248) |
| + std %f0,120(%r15) |
| + cfi_offset (f0, -240) |
| + std %f2,128(%r15) |
| + cfi_offset (f2, -232) |
| #ifdef RESTORE_VRS |
| - ahi %r15,-224 # create stack frame |
| .machine push |
| .machine "z13" |
| .machinemode "zarch_nohighgprs" |
| - vstm %v24,%v31,96(%r15) # store call-clobbered vr arguments |
| + vstm %v24,%v31,136(%r15) # store call-clobbered vr arguments |
| cfi_offset (v24, -224) |
| cfi_offset (v25, -208) |
| cfi_offset (v26, -192) |
| @@ -143,31 +146,31 @@ _dl_runtime_profile: |
| cfi_offset (v30, -128) |
| cfi_offset (v31, -112) |
| .machine pop |
| -#else |
| - ahi %r15,-96 # create stack frame |
| #endif |
| - st %r12,0(%r15) # save backchain |
| + |
| lm %r2,%r3,24(%r12) # load arguments saved by PLT |
| lr %r4,%r14 # return address as third parameter |
| basr %r1,0 |
| 0: l %r14,6f-0b(%r1) |
| - la %r5,32(%r12) # pointer to struct La_s390_32_regs |
| + la %r5,96(%r15) # pointer to struct La_s390_32_regs |
| la %r6,20(%r12) # long int * framesize |
| bas %r14,0(%r14,%r1) # call resolver |
| lr %r1,%r2 # function addr returned in r2 |
| - ld %f0,56(%r12) # restore call-clobbered arg fprs |
| - ld %f2,64(%r12) |
| + ld %f0,120(%r15) # restore call-clobbered arg fprs |
| + ld %f2,128(%r15) |
| #ifdef RESTORE_VRS |
| .machine push |
| .machine "z13" |
| .machinemode "zarch_nohighgprs" |
| - vlm %v24,%v31,96(%r15) # restore call-clobbered arg vrs |
| + vlm %v24,%v31,136(%r15) # restore call-clobbered arg vrs |
| .machine pop |
| #endif |
| icm %r0,15,20(%r12) # load & test framesize |
| jnm 2f |
| |
| - lm %r2,%r6,32(%r12) |
| + lm %r2,%r6,96(%r15) # framesize < 0 means no pltexit call |
| + # so we can do a tail call without |
| + # copying the arg overflow area |
| lr %r15,%r12 # remove stack frame |
| cfi_def_cfa_register (15) |
| l %r14,16(%r15) # restore registers |
| @@ -175,7 +178,9 @@ _dl_runtime_profile: |
| br %r1 # tail-call to the resolved function |
| |
| cfi_def_cfa_register (12) |
| -2: jz 4f # framesize == 0 ? |
| +2: la %r4,96(%r15) # pointer to struct La_s390_32_regs |
| + st %r4,32(%r12) |
| + jz 4f # framesize == 0 ? |
| ahi %r0,7 # align framesize to 8 |
| lhi %r2,-8 |
| nr %r0,%r2 |
| @@ -188,24 +193,35 @@ _dl_runtime_profile: |
| la %r2,8(%r2) |
| la %r3,8(%r3) |
| brct %r0,3b |
| -4: lm %r2,%r6,32(%r12) # load register parameters |
| +4: lm %r2,%r6,0(%r4) # load register parameters |
| basr %r14,%r1 # call resolved function |
| - stm %r2,%r3,72(%r12) # store return values r2, r3, f0 |
| - std %f0,80(%r12) # to struct La_s390_32_retval |
| - lm %r2,%r3,24(%r12) # load arguments saved by PLT |
| + stm %r2,%r3,40(%r12) # store return values r2, r3, f0 |
| + std %f0,48(%r12) # to struct La_s390_32_retval |
| +#ifdef RESTORE_VRS |
| + .machine push |
| + .machine "z13" |
| + vst %v24,56(%r12) # store return value v24 |
| + .machine pop |
| +#endif |
| + lm %r2,%r4,24(%r12) # r2, r3: load arguments saved by PLT |
| + # r4: pointer to struct La_s390_32_regs |
| basr %r1,0 |
| 5: l %r14,7f-5b(%r1) |
| - la %r4,32(%r12) # pointer to struct La_s390_32_regs |
| - la %r5,72(%r12) # pointer to struct La_s390_32_retval |
| + la %r5,40(%r12) # pointer to struct La_s390_32_retval |
| bas %r14,0(%r14,%r1) # call _dl_call_pltexit |
| |
| lr %r15,%r12 # remove stack frame |
| cfi_def_cfa_register (15) |
| l %r14,16(%r15) # restore registers |
| l %r12,12(%r15) |
| - l %r2,72(%r15) # restore return values |
| - l %r3,76(%r15) |
| - ld %f0,80(%r15) |
| + lm %r2,%r3,40(%r15) # restore return values |
| + ld %f0,48(%r15) |
| +#ifdef RESTORE_VRS |
| + .machine push |
| + .machine "z13" |
| + vl %v24,56(%r15) # restore return value v24 |
| + .machine pop |
| +#endif |
| br %r14 |
| |
| 6: .long _dl_profile_fixup - 0b |
| diff --git a/sysdeps/s390/s390-64/dl-trampoline.h b/sysdeps/s390/s390-64/dl-trampoline.h |
| index 658e3a3..33ea3de 100644 |
| |
| |
| @@ -109,31 +109,34 @@ _dl_runtime_resolve: |
| cfi_startproc |
| .align 16 |
| _dl_runtime_profile: |
| - stmg %r2,%r6,64(%r15) # save call-clobbered arg regs |
| - cfi_offset (r2, -96) # + r6 needed as arg for |
| - cfi_offset (r3, -88) # _dl_profile_fixup |
| - cfi_offset (r4, -80) |
| - cfi_offset (r5, -72) |
| - cfi_offset (r6, -64) |
| - std %f0,104(%r15) |
| - cfi_offset (f0, -56) |
| - std %f2,112(%r15) |
| - cfi_offset (f2, -48) |
| - std %f4,120(%r15) |
| - cfi_offset (f4, -40) |
| - std %f6,128(%r15) |
| - cfi_offset (f6, -32) |
| stg %r12,24(%r15) # r12 is used as backup of r15 |
| cfi_offset (r12, -136) |
| stg %r14,32(%r15) |
| cfi_offset (r14, -128) |
| lgr %r12,%r15 # backup stack pointer |
| cfi_def_cfa_register (12) |
| + aghi %r15,-360 # create stack frame: |
| + # 160 + sizeof(La_s390_64_regs) |
| + stg %r12,0(%r15) # save backchain |
| + |
| + stmg %r2,%r6,160(%r15) # save call-clobbered arg regs |
| + cfi_offset (r2, -360) # + r6 needed as arg for |
| + cfi_offset (r3, -352) # _dl_profile_fixup |
| + cfi_offset (r4, -344) |
| + cfi_offset (r5, -336) |
| + cfi_offset (r6, -328) |
| + std %f0,200(%r15) |
| + cfi_offset (f0, -320) |
| + std %f2,208(%r15) |
| + cfi_offset (f2, -312) |
| + std %f4,216(%r15) |
| + cfi_offset (f4, -304) |
| + std %f6,224(%r15) |
| + cfi_offset (f6, -296) |
| #ifdef RESTORE_VRS |
| - aghi %r15,-288 # create stack frame |
| .machine push |
| .machine "z13" |
| - vstm %v24,%v31,160(%r15)# store call-clobbered vector argument registers |
| + vstm %v24,%v31,232(%r15) # store call-clobbered vector arguments |
| cfi_offset (v24, -288) |
| cfi_offset (v25, -272) |
| cfi_offset (v26, -256) |
| @@ -143,31 +146,28 @@ _dl_runtime_profile: |
| cfi_offset (v30, -192) |
| cfi_offset (v31, -176) |
| .machine pop |
| -#else |
| - aghi %r15,-160 # create stack frame |
| #endif |
| - stg %r12,0(%r15) # save backchain |
| lmg %r2,%r3,48(%r12) # load arguments saved by PLT |
| lgr %r4,%r14 # return address as third parameter |
| - la %r5,64(%r12) # pointer to struct La_s390_64_regs |
| + la %r5,160(%r15) # pointer to struct La_s390_64_regs |
| la %r6,40(%r12) # long int * framesize |
| brasl %r14,_dl_profile_fixup # call resolver |
| lgr %r1,%r2 # function addr returned in r2 |
| - ld %f0,104(%r12) # restore call-clobbered arg fprs |
| - ld %f2,112(%r12) |
| - ld %f4,120(%r12) |
| - ld %f6,128(%r12) |
| + ld %f0,200(%r15) # restore call-clobbered arg fprs |
| + ld %f2,208(%r15) |
| + ld %f4,216(%r15) |
| + ld %f6,224(%r15) |
| #ifdef RESTORE_VRS |
| .machine push |
| .machine "z13" |
| - vlm %v24,%v31,160(%r15) # restore call-clobbered arg vrs |
| + vlm %v24,%v31,232(%r15) # restore call-clobbered arg vrs |
| .machine pop |
| #endif |
| lg %r0,40(%r12) # load framesize |
| ltgr %r0,%r0 |
| jnm 1f |
| |
| - lmg %r2,%r6,64(%r12) # framesize < 0 means no pltexit call |
| + lmg %r2,%r6,160(%r15) # framesize < 0 means no pltexit call |
| # so we can do a tail call without |
| # copying the arg overflow area |
| lgr %r15,%r12 # remove stack frame |
| @@ -177,7 +177,9 @@ _dl_runtime_profile: |
| br %r1 # tail-call to resolved function |
| |
| cfi_def_cfa_register (12) |
| -1: jz 4f # framesize == 0 ? |
| +1: la %r4,160(%r15) # pointer to struct La_s390_64_regs |
| + stg %r4,64(%r12) |
| + jz 4f # framesize == 0 ? |
| aghi %r0,7 # align framesize to 8 |
| nill %r0,0xfff8 |
| slgr %r15,%r0 # make room for framesize bytes |
| @@ -189,21 +191,33 @@ _dl_runtime_profile: |
| la %r2,8(%r2) # depending on framesize |
| la %r3,8(%r3) |
| brctg %r0,3b |
| -4: lmg %r2,%r6,64(%r12) # restore call-clobbered arg gprs |
| +4: lmg %r2,%r6,0(%r4) # restore call-clobbered arg gprs |
| basr %r14,%r1 # call resolved function |
| - stg %r2,136(%r12) # store return values r2, f0 |
| - std %f0,144(%r12) # to struct La_s390_64_retval |
| - lmg %r2,%r3,48(%r12) # load arguments saved by PLT |
| - la %r4,64(%r12) # pointer to struct La_s390_64_regs |
| - la %r5,136(%r12) # pointer to struct La_s390_64_retval |
| + stg %r2,72(%r12) # store return values r2, f0 |
| + std %f0,80(%r12) # to struct La_s390_64_retval |
| +#ifdef RESTORE_VRS |
| + .machine push |
| + .machine "z13" |
| + vst %v24,88(%r12) # store return value v24 |
| + .machine pop |
| +#endif |
| + lmg %r2,%r4,48(%r12) # r2, r3: load arguments saved by PLT |
| + # r4: pointer to struct La_s390_64_regs |
| + la %r5,72(%r12) # pointer to struct La_s390_64_retval |
| brasl %r14,_dl_call_pltexit |
| |
| lgr %r15,%r12 # remove stack frame |
| cfi_def_cfa_register (15) |
| lg %r14,32(%r15) # restore registers |
| lg %r12,24(%r15) |
| - lg %r2,136(%r15) # restore return values |
| - ld %f0,144(%r15) |
| + lg %r2,72(%r15) # restore return values |
| + ld %f0,80(%r15) |
| +#ifdef RESTORE_VRS |
| + .machine push |
| + .machine "z13" |
| + vl %v24,88(%r15) # restore return value v24 |
| + .machine pop |
| +#endif |
| br %r14 # Jump back to caller |
| |
| cfi_endproc |