|
|
bdc76f |
commit 96fbb9a328232e42814334d6e29a9a9c7995c01d
|
|
|
bdc76f |
Author: Stefan Liebler <stli@linux.ibm.com>
|
|
|
bdc76f |
Date: Fri Mar 22 11:14:08 2019 +0100
|
|
|
bdc76f |
|
|
|
bdc76f |
S390: Add arch13 memmove ifunc variant.
|
|
|
bdc76f |
|
|
|
bdc76f |
This patch introduces the new arch13 ifunc variant for memmove.
|
|
|
bdc76f |
For the forward or non-overlapping case it is just using memcpy.
|
|
|
bdc76f |
For the backward case it relies on the new instruction mvcrl.
|
|
|
bdc76f |
The instruction copies up to 256 bytes at once.
|
|
|
bdc76f |
In case of an overlap, it copies the bytes like copying them
|
|
|
bdc76f |
one by one starting from right to left.
|
|
|
bdc76f |
|
|
|
bdc76f |
ChangeLog:
|
|
|
bdc76f |
|
|
|
bdc76f |
* sysdeps/s390/ifunc-memcpy.h (HAVE_MEMMOVE_ARCH13, MEMMOVE_ARCH13
|
|
|
bdc76f |
HAVE_MEMMOVE_IFUNC_AND_ARCH13_SUPPORT): New defines.
|
|
|
bdc76f |
* sysdeps/s390/memcpy-z900.S: Add arch13 memmove implementation.
|
|
|
bdc76f |
* sysdeps/s390/memmove.c (memmove): Add arch13 variant in
|
|
|
bdc76f |
ifunc selector.
|
|
|
bdc76f |
* sysdeps/s390/multiarch/ifunc-impl-list.c
|
|
|
bdc76f |
(__libc_ifunc_impl_list): Add ifunc variant for arch13 memmove.
|
|
|
bdc76f |
* sysdeps/s390/multiarch/ifunc-resolve.h (S390_STFLE_BITS_ARCH13_MIE3,
|
|
|
bdc76f |
S390_IS_ARCH13_MIE3): New defines.
|
|
|
bdc76f |
|
|
|
bdc76f |
diff --git a/sysdeps/s390/ifunc-memcpy.h b/sysdeps/s390/ifunc-memcpy.h
|
|
|
bdc76f |
index 0e701968c8f39014..e8cd794587b44922 100644
|
|
|
bdc76f |
--- a/sysdeps/s390/ifunc-memcpy.h
|
|
|
bdc76f |
+++ b/sysdeps/s390/ifunc-memcpy.h
|
|
|
bdc76f |
@@ -44,7 +44,7 @@
|
|
|
bdc76f |
#endif
|
|
|
bdc76f |
|
|
|
bdc76f |
#if defined SHARED && defined USE_MULTIARCH && IS_IN (libc) \
|
|
|
bdc76f |
- && ! defined HAVE_S390_MIN_Z13_ZARCH_ASM_SUPPORT
|
|
|
bdc76f |
+ && ! defined HAVE_S390_MIN_ARCH13_ZARCH_ASM_SUPPORT
|
|
|
bdc76f |
# define HAVE_MEMMOVE_IFUNC 1
|
|
|
bdc76f |
#else
|
|
|
bdc76f |
# define HAVE_MEMMOVE_IFUNC 0
|
|
|
bdc76f |
@@ -56,14 +56,27 @@
|
|
|
bdc76f |
# define HAVE_MEMMOVE_IFUNC_AND_VX_SUPPORT 0
|
|
|
bdc76f |
#endif
|
|
|
bdc76f |
|
|
|
bdc76f |
-#if defined HAVE_S390_MIN_Z13_ZARCH_ASM_SUPPORT
|
|
|
bdc76f |
+#ifdef HAVE_S390_ARCH13_ASM_SUPPORT
|
|
|
bdc76f |
+# define HAVE_MEMMOVE_IFUNC_AND_ARCH13_SUPPORT HAVE_MEMMOVE_IFUNC
|
|
|
bdc76f |
+#else
|
|
|
bdc76f |
+# define HAVE_MEMMOVE_IFUNC_AND_ARCH13_SUPPORT 0
|
|
|
bdc76f |
+#endif
|
|
|
bdc76f |
+
|
|
|
bdc76f |
+#if defined HAVE_S390_MIN_ARCH13_ZARCH_ASM_SUPPORT
|
|
|
bdc76f |
+# define MEMMOVE_DEFAULT MEMMOVE_ARCH13
|
|
|
bdc76f |
+# define HAVE_MEMMOVE_C 0
|
|
|
bdc76f |
+# define HAVE_MEMMOVE_Z13 0
|
|
|
bdc76f |
+# define HAVE_MEMMOVE_ARCH13 1
|
|
|
bdc76f |
+#elif defined HAVE_S390_MIN_Z13_ZARCH_ASM_SUPPORT
|
|
|
bdc76f |
# define MEMMOVE_DEFAULT MEMMOVE_Z13
|
|
|
bdc76f |
# define HAVE_MEMMOVE_C 0
|
|
|
bdc76f |
# define HAVE_MEMMOVE_Z13 1
|
|
|
bdc76f |
+# define HAVE_MEMMOVE_ARCH13 HAVE_MEMMOVE_IFUNC_AND_ARCH13_SUPPORT
|
|
|
bdc76f |
#else
|
|
|
bdc76f |
# define MEMMOVE_DEFAULT MEMMOVE_C
|
|
|
bdc76f |
# define HAVE_MEMMOVE_C 1
|
|
|
bdc76f |
# define HAVE_MEMMOVE_Z13 HAVE_MEMMOVE_IFUNC_AND_VX_SUPPORT
|
|
|
bdc76f |
+# define HAVE_MEMMOVE_ARCH13 HAVE_MEMMOVE_IFUNC_AND_ARCH13_SUPPORT
|
|
|
bdc76f |
#endif
|
|
|
bdc76f |
|
|
|
bdc76f |
#if HAVE_MEMCPY_Z900_G5
|
|
|
bdc76f |
@@ -101,3 +114,9 @@
|
|
|
bdc76f |
#else
|
|
|
bdc76f |
# define MEMMOVE_Z13 NULL
|
|
|
bdc76f |
#endif
|
|
|
bdc76f |
+
|
|
|
bdc76f |
+#if HAVE_MEMMOVE_ARCH13
|
|
|
bdc76f |
+# define MEMMOVE_ARCH13 __memmove_arch13
|
|
|
bdc76f |
+#else
|
|
|
bdc76f |
+# define MEMMOVE_ARCH13 NULL
|
|
|
bdc76f |
+#endif
|
|
|
bdc76f |
diff --git a/sysdeps/s390/memcpy-z900.S b/sysdeps/s390/memcpy-z900.S
|
|
|
bdc76f |
index bd3b1950ee442c0c..45eddc67a48e991e 100644
|
|
|
bdc76f |
--- a/sysdeps/s390/memcpy-z900.S
|
|
|
bdc76f |
+++ b/sysdeps/s390/memcpy-z900.S
|
|
|
bdc76f |
@@ -277,6 +277,61 @@ ENTRY(MEMMOVE_Z13)
|
|
|
bdc76f |
END(MEMMOVE_Z13)
|
|
|
bdc76f |
#endif /* HAVE_MEMMOVE_Z13 */
|
|
|
bdc76f |
|
|
|
bdc76f |
+#if HAVE_MEMMOVE_ARCH13
|
|
|
bdc76f |
+ENTRY(MEMMOVE_ARCH13)
|
|
|
bdc76f |
+ .machine "arch13"
|
|
|
bdc76f |
+ .machinemode "zarch_nohighgprs"
|
|
|
bdc76f |
+# if ! defined __s390x__
|
|
|
bdc76f |
+ /* Note: The 31bit dst and src pointers are prefixed with zeroes. */
|
|
|
bdc76f |
+ llgfr %r4,%r4
|
|
|
bdc76f |
+ llgfr %r3,%r3
|
|
|
bdc76f |
+ llgfr %r2,%r2
|
|
|
bdc76f |
+# endif /* ! defined __s390x__ */
|
|
|
bdc76f |
+ sgrk %r5,%r2,%r3
|
|
|
bdc76f |
+ aghik %r0,%r4,-1 /* Both vstl and mvcrl needs highest index. */
|
|
|
bdc76f |
+ clgijh %r4,16,.L_MEMMOVE_ARCH13_LARGE
|
|
|
bdc76f |
+.L_MEMMOVE_ARCH13_SMALL:
|
|
|
bdc76f |
+ jl .L_MEMMOVE_ARCH13_END /* Return if len was zero (cc of aghik). */
|
|
|
bdc76f |
+ /* Store up to 16 bytes with vll/vstl (needs highest index). */
|
|
|
bdc76f |
+ vll %v16,%r0,0(%r3)
|
|
|
bdc76f |
+ vstl %v16,%r0,0(%r2)
|
|
|
bdc76f |
+.L_MEMMOVE_ARCH13_END:
|
|
|
bdc76f |
+ br %r14
|
|
|
bdc76f |
+.L_MEMMOVE_ARCH13_LARGE:
|
|
|
bdc76f |
+ lgr %r1,%r2 /* For memcpy: r1: Use as dest ; r2: Return dest */
|
|
|
bdc76f |
+ /* The unsigned comparison (dst - src >= len) determines if we can
|
|
|
bdc76f |
+ execute the forward case with memcpy. */
|
|
|
bdc76f |
+#if ! HAVE_MEMCPY_Z196
|
|
|
bdc76f |
+# error The arch13 variant of memmove needs the z196 variant of memcpy!
|
|
|
bdc76f |
+#endif
|
|
|
bdc76f |
+ /* Backward case. */
|
|
|
bdc76f |
+ clgrjhe %r5,%r4,.L_Z196_start2
|
|
|
bdc76f |
+ clgijh %r0,255,.L_MEMMOVE_ARCH13_LARGER_256B
|
|
|
bdc76f |
+ /* Move up to 256bytes with mvcrl (move right to left). */
|
|
|
bdc76f |
+ mvcrl 0(%r1),0(%r3) /* Move (r0 + 1) bytes from r3 to r1. */
|
|
|
bdc76f |
+ br %r14
|
|
|
bdc76f |
+.L_MEMMOVE_ARCH13_LARGER_256B:
|
|
|
bdc76f |
+ /* First move the "remaining" block of up to 256 bytes at the end of
|
|
|
bdc76f |
+ src/dst buffers. Then move blocks of 256bytes in a loop starting
|
|
|
bdc76f |
+ with the block at the end.
|
|
|
bdc76f |
+ (If src/dst pointers are aligned e.g. to 256 bytes, then the pointers
|
|
|
bdc76f |
+ passed to mvcrl instructions are aligned, too) */
|
|
|
bdc76f |
+ risbgn %r5,%r0,8,128+63,56 /* r5 = r0 / 256 */
|
|
|
bdc76f |
+ risbgn %r0,%r0,56,128+63,0 /* r0 = r0 & 0xFF */
|
|
|
bdc76f |
+ slgr %r4,%r0
|
|
|
bdc76f |
+ lay %r1,-1(%r4,%r1)
|
|
|
bdc76f |
+ lay %r3,-1(%r4,%r3)
|
|
|
bdc76f |
+ mvcrl 0(%r1),0(%r3) /* Move (r0 + 1) bytes from r3 to r1. */
|
|
|
bdc76f |
+ lghi %r0,255 /* Always copy 256 bytes in the loop below! */
|
|
|
bdc76f |
+.L_MEMMOVE_ARCH13_LARGE_256B_LOOP:
|
|
|
bdc76f |
+ aghi %r1,-256
|
|
|
bdc76f |
+ aghi %r3,-256
|
|
|
bdc76f |
+ mvcrl 0(%r1),0(%r3) /* Move (r0 + 1) bytes from r3 to r1. */
|
|
|
bdc76f |
+ brctg %r5,.L_MEMMOVE_ARCH13_LARGE_256B_LOOP
|
|
|
bdc76f |
+ br %r14
|
|
|
bdc76f |
+END(MEMMOVE_ARCH13)
|
|
|
bdc76f |
+#endif /* HAVE_MEMMOVE_ARCH13 */
|
|
|
bdc76f |
+
|
|
|
bdc76f |
#if ! HAVE_MEMCPY_IFUNC
|
|
|
bdc76f |
/* If we don't use ifunc, define an alias for mem[p]cpy here.
|
|
|
bdc76f |
Otherwise see sysdeps/s390/mem[p]cpy.c. */
|
|
|
bdc76f |
diff --git a/sysdeps/s390/memmove.c b/sysdeps/s390/memmove.c
|
|
|
bdc76f |
index ac34edf80f2678cd..f6d31a4fcd56355b 100644
|
|
|
bdc76f |
--- a/sysdeps/s390/memmove.c
|
|
|
bdc76f |
+++ b/sysdeps/s390/memmove.c
|
|
|
bdc76f |
@@ -36,9 +36,19 @@ extern __typeof (__redirect_memmove) MEMMOVE_C attribute_hidden;
|
|
|
bdc76f |
extern __typeof (__redirect_memmove) MEMMOVE_Z13 attribute_hidden;
|
|
|
bdc76f |
# endif
|
|
|
bdc76f |
|
|
|
bdc76f |
+# if HAVE_MEMMOVE_ARCH13
|
|
|
bdc76f |
+extern __typeof (__redirect_memmove) MEMMOVE_ARCH13 attribute_hidden;
|
|
|
bdc76f |
+# endif
|
|
|
bdc76f |
+
|
|
|
bdc76f |
s390_libc_ifunc_expr (__redirect_memmove, memmove,
|
|
|
bdc76f |
- (HAVE_MEMMOVE_Z13 && (hwcap & HWCAP_S390_VX))
|
|
|
bdc76f |
- ? MEMMOVE_Z13
|
|
|
bdc76f |
- : MEMMOVE_DEFAULT
|
|
|
bdc76f |
+ ({
|
|
|
bdc76f |
+ s390_libc_ifunc_expr_stfle_init ();
|
|
|
bdc76f |
+ (HAVE_MEMMOVE_ARCH13
|
|
|
bdc76f |
+ && S390_IS_ARCH13_MIE3 (stfle_bits))
|
|
|
bdc76f |
+ ? MEMMOVE_ARCH13
|
|
|
bdc76f |
+ : (HAVE_MEMMOVE_Z13 && (hwcap & HWCAP_S390_VX))
|
|
|
bdc76f |
+ ? MEMMOVE_Z13
|
|
|
bdc76f |
+ : MEMMOVE_DEFAULT;
|
|
|
bdc76f |
+ })
|
|
|
bdc76f |
)
|
|
|
bdc76f |
#endif
|
|
|
bdc76f |
diff --git a/sysdeps/s390/multiarch/ifunc-impl-list.c b/sysdeps/s390/multiarch/ifunc-impl-list.c
|
|
|
bdc76f |
index 177c5fd6fe269d9b..c24bfc95f2d7a22d 100644
|
|
|
bdc76f |
--- a/sysdeps/s390/multiarch/ifunc-impl-list.c
|
|
|
bdc76f |
+++ b/sysdeps/s390/multiarch/ifunc-impl-list.c
|
|
|
bdc76f |
@@ -169,6 +169,11 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
|
|
bdc76f |
|
|
|
bdc76f |
#if HAVE_MEMMOVE_IFUNC
|
|
|
bdc76f |
IFUNC_IMPL (i, name, memmove,
|
|
|
bdc76f |
+# if HAVE_MEMMOVE_ARCH13
|
|
|
bdc76f |
+ IFUNC_IMPL_ADD (array, i, memmove,
|
|
|
bdc76f |
+ S390_IS_ARCH13_MIE3 (stfle_bits),
|
|
|
bdc76f |
+ MEMMOVE_ARCH13)
|
|
|
bdc76f |
+# endif
|
|
|
bdc76f |
# if HAVE_MEMMOVE_Z13
|
|
|
bdc76f |
IFUNC_IMPL_ADD (array, i, memmove,
|
|
|
bdc76f |
dl_hwcap & HWCAP_S390_VX, MEMMOVE_Z13)
|
|
|
bdc76f |
diff --git a/sysdeps/s390/multiarch/ifunc-resolve.h b/sysdeps/s390/multiarch/ifunc-resolve.h
|
|
|
bdc76f |
index b2be015401313d4b..db735bb341ab6b86 100644
|
|
|
bdc76f |
--- a/sysdeps/s390/multiarch/ifunc-resolve.h
|
|
|
bdc76f |
+++ b/sysdeps/s390/multiarch/ifunc-resolve.h
|
|
|
bdc76f |
@@ -22,6 +22,11 @@
|
|
|
bdc76f |
|
|
|
bdc76f |
#define S390_STFLE_BITS_Z10 34 /* General instructions extension */
|
|
|
bdc76f |
#define S390_STFLE_BITS_Z196 45 /* Distinct operands, pop ... */
|
|
|
bdc76f |
+#define S390_STFLE_BITS_ARCH13_MIE3 61 /* Miscellaneous-Instruction-Extensions
|
|
|
bdc76f |
+ Facility 3, e.g. mvcrl. */
|
|
|
bdc76f |
+
|
|
|
bdc76f |
+#define S390_IS_ARCH13_MIE3(STFLE_BITS) \
|
|
|
bdc76f |
+ ((STFLE_BITS & (1ULL << (63 - S390_STFLE_BITS_ARCH13_MIE3))) != 0)
|
|
|
bdc76f |
|
|
|
bdc76f |
#define S390_IS_Z196(STFLE_BITS) \
|
|
|
bdc76f |
((STFLE_BITS & (1ULL << (63 - S390_STFLE_BITS_Z196))) != 0)
|