077c9d
commit 96fbb9a328232e42814334d6e29a9a9c7995c01d
077c9d
Author: Stefan Liebler <stli@linux.ibm.com>
077c9d
Date:   Fri Mar 22 11:14:08 2019 +0100
077c9d
077c9d
    S390: Add arch13 memmove ifunc variant.
077c9d
    
077c9d
    This patch introduces the new arch13 ifunc variant for memmove.
077c9d
    For the forward or non-overlapping case it is just using memcpy.
077c9d
    For the backward case it relies on the new instruction mvcrl.
077c9d
    The instruction copies up to 256 bytes at once.
077c9d
    In case of an overlap, it copies the bytes like copying them
077c9d
    one by one starting from right to left.
077c9d
    
077c9d
    ChangeLog:
077c9d
    
077c9d
            * sysdeps/s390/ifunc-memcpy.h (HAVE_MEMMOVE_ARCH13, MEMMOVE_ARCH13
077c9d
            HAVE_MEMMOVE_IFUNC_AND_ARCH13_SUPPORT): New defines.
077c9d
            * sysdeps/s390/memcpy-z900.S: Add arch13 memmove implementation.
077c9d
            * sysdeps/s390/memmove.c (memmove): Add arch13 variant in
077c9d
            ifunc selector.
077c9d
            * sysdeps/s390/multiarch/ifunc-impl-list.c
077c9d
            (__libc_ifunc_impl_list): Add ifunc variant for arch13 memmove.
077c9d
            * sysdeps/s390/multiarch/ifunc-resolve.h (S390_STFLE_BITS_ARCH13_MIE3,
077c9d
            S390_IS_ARCH13_MIE3): New defines.
077c9d
077c9d
diff --git a/sysdeps/s390/ifunc-memcpy.h b/sysdeps/s390/ifunc-memcpy.h
077c9d
index 0e701968c8f39014..e8cd794587b44922 100644
077c9d
--- a/sysdeps/s390/ifunc-memcpy.h
077c9d
+++ b/sysdeps/s390/ifunc-memcpy.h
077c9d
@@ -44,7 +44,7 @@
077c9d
 #endif
077c9d
 
077c9d
 #if defined SHARED && defined USE_MULTIARCH && IS_IN (libc)	\
077c9d
-  && ! defined HAVE_S390_MIN_Z13_ZARCH_ASM_SUPPORT
077c9d
+  && ! defined HAVE_S390_MIN_ARCH13_ZARCH_ASM_SUPPORT
077c9d
 # define HAVE_MEMMOVE_IFUNC	1
077c9d
 #else
077c9d
 # define HAVE_MEMMOVE_IFUNC	0
077c9d
@@ -56,14 +56,27 @@
077c9d
 # define HAVE_MEMMOVE_IFUNC_AND_VX_SUPPORT 0
077c9d
 #endif
077c9d
 
077c9d
-#if defined HAVE_S390_MIN_Z13_ZARCH_ASM_SUPPORT
077c9d
+#ifdef HAVE_S390_ARCH13_ASM_SUPPORT
077c9d
+# define HAVE_MEMMOVE_IFUNC_AND_ARCH13_SUPPORT HAVE_MEMMOVE_IFUNC
077c9d
+#else
077c9d
+# define HAVE_MEMMOVE_IFUNC_AND_ARCH13_SUPPORT 0
077c9d
+#endif
077c9d
+
077c9d
+#if defined HAVE_S390_MIN_ARCH13_ZARCH_ASM_SUPPORT
077c9d
+# define MEMMOVE_DEFAULT	MEMMOVE_ARCH13
077c9d
+# define HAVE_MEMMOVE_C		0
077c9d
+# define HAVE_MEMMOVE_Z13	0
077c9d
+# define HAVE_MEMMOVE_ARCH13	1
077c9d
+#elif defined HAVE_S390_MIN_Z13_ZARCH_ASM_SUPPORT
077c9d
 # define MEMMOVE_DEFAULT	MEMMOVE_Z13
077c9d
 # define HAVE_MEMMOVE_C		0
077c9d
 # define HAVE_MEMMOVE_Z13	1
077c9d
+# define HAVE_MEMMOVE_ARCH13	HAVE_MEMMOVE_IFUNC_AND_ARCH13_SUPPORT
077c9d
 #else
077c9d
 # define MEMMOVE_DEFAULT	MEMMOVE_C
077c9d
 # define HAVE_MEMMOVE_C		1
077c9d
 # define HAVE_MEMMOVE_Z13	HAVE_MEMMOVE_IFUNC_AND_VX_SUPPORT
077c9d
+# define HAVE_MEMMOVE_ARCH13	HAVE_MEMMOVE_IFUNC_AND_ARCH13_SUPPORT
077c9d
 #endif
077c9d
 
077c9d
 #if HAVE_MEMCPY_Z900_G5
077c9d
@@ -101,3 +114,9 @@
077c9d
 #else
077c9d
 # define MEMMOVE_Z13		NULL
077c9d
 #endif
077c9d
+
077c9d
+#if HAVE_MEMMOVE_ARCH13
077c9d
+# define MEMMOVE_ARCH13		__memmove_arch13
077c9d
+#else
077c9d
+# define MEMMOVE_ARCH13		NULL
077c9d
+#endif
077c9d
diff --git a/sysdeps/s390/memcpy-z900.S b/sysdeps/s390/memcpy-z900.S
077c9d
index bd3b1950ee442c0c..45eddc67a48e991e 100644
077c9d
--- a/sysdeps/s390/memcpy-z900.S
077c9d
+++ b/sysdeps/s390/memcpy-z900.S
077c9d
@@ -277,6 +277,61 @@ ENTRY(MEMMOVE_Z13)
077c9d
 END(MEMMOVE_Z13)
077c9d
 #endif /* HAVE_MEMMOVE_Z13  */
077c9d
 
077c9d
+#if HAVE_MEMMOVE_ARCH13
077c9d
+ENTRY(MEMMOVE_ARCH13)
077c9d
+	.machine "arch13"
077c9d
+	.machinemode "zarch_nohighgprs"
077c9d
+# if ! defined __s390x__
077c9d
+	/* Note: The 31bit dst and src pointers are prefixed with zeroes.  */
077c9d
+	llgfr	%r4,%r4
077c9d
+	llgfr	%r3,%r3
077c9d
+	llgfr	%r2,%r2
077c9d
+# endif /* ! defined __s390x__ */
077c9d
+	sgrk	%r5,%r2,%r3
077c9d
+	aghik	%r0,%r4,-1	/* Both vstl and mvcrl needs highest index.  */
077c9d
+	clgijh	%r4,16,.L_MEMMOVE_ARCH13_LARGE
077c9d
+.L_MEMMOVE_ARCH13_SMALL:
077c9d
+	jl .L_MEMMOVE_ARCH13_END /* Return if len was zero (cc of aghik).  */
077c9d
+	/* Store up to 16 bytes with vll/vstl (needs highest index).  */
077c9d
+	vll	%v16,%r0,0(%r3)
077c9d
+	vstl	%v16,%r0,0(%r2)
077c9d
+.L_MEMMOVE_ARCH13_END:
077c9d
+	br      %r14
077c9d
+.L_MEMMOVE_ARCH13_LARGE:
077c9d
+	lgr     %r1,%r2	/* For memcpy: r1: Use as dest ; r2: Return dest  */
077c9d
+	/* The unsigned comparison (dst - src >= len) determines if we can
077c9d
+	   execute the forward case with memcpy.  */
077c9d
+#if ! HAVE_MEMCPY_Z196
077c9d
+# error The arch13 variant of memmove needs the z196 variant of memcpy!
077c9d
+#endif
077c9d
+	/* Backward case.  */
077c9d
+	clgrjhe %r5,%r4,.L_Z196_start2
077c9d
+	clgijh	%r0,255,.L_MEMMOVE_ARCH13_LARGER_256B
077c9d
+	/* Move up to 256bytes with mvcrl (move right to left).  */
077c9d
+	mvcrl	0(%r1),0(%r3)	/* Move (r0 + 1) bytes from r3 to r1.  */
077c9d
+	br      %r14
077c9d
+.L_MEMMOVE_ARCH13_LARGER_256B:
077c9d
+	/* First move the "remaining" block of up to 256 bytes at the end of
077c9d
+	   src/dst buffers.  Then move blocks of 256bytes in a loop starting
077c9d
+	   with the block at the end.
077c9d
+	   (If src/dst pointers are aligned e.g. to 256 bytes, then the pointers
077c9d
+	   passed to mvcrl instructions are aligned, too)  */
077c9d
+	risbgn	%r5,%r0,8,128+63,56	/* r5 = r0 / 256  */
077c9d
+	risbgn	%r0,%r0,56,128+63,0	/* r0 = r0 & 0xFF  */
077c9d
+	slgr	%r4,%r0
077c9d
+	lay	%r1,-1(%r4,%r1)
077c9d
+	lay	%r3,-1(%r4,%r3)
077c9d
+	mvcrl	0(%r1),0(%r3)	/* Move (r0 + 1) bytes from r3 to r1.  */
077c9d
+	lghi	%r0,255		/* Always copy 256 bytes in the loop below!  */
077c9d
+.L_MEMMOVE_ARCH13_LARGE_256B_LOOP:
077c9d
+	aghi	%r1,-256
077c9d
+	aghi	%r3,-256
077c9d
+	mvcrl	0(%r1),0(%r3)	/* Move (r0 + 1) bytes from r3 to r1.  */
077c9d
+	brctg	%r5,.L_MEMMOVE_ARCH13_LARGE_256B_LOOP
077c9d
+	br      %r14
077c9d
+END(MEMMOVE_ARCH13)
077c9d
+#endif /* HAVE_MEMMOVE_ARCH13  */
077c9d
+
077c9d
 #if ! HAVE_MEMCPY_IFUNC
077c9d
 /* If we don't use ifunc, define an alias for mem[p]cpy here.
077c9d
    Otherwise see sysdeps/s390/mem[p]cpy.c.  */
077c9d
diff --git a/sysdeps/s390/memmove.c b/sysdeps/s390/memmove.c
077c9d
index ac34edf80f2678cd..f6d31a4fcd56355b 100644
077c9d
--- a/sysdeps/s390/memmove.c
077c9d
+++ b/sysdeps/s390/memmove.c
077c9d
@@ -36,9 +36,19 @@ extern __typeof (__redirect_memmove) MEMMOVE_C attribute_hidden;
077c9d
 extern __typeof (__redirect_memmove) MEMMOVE_Z13 attribute_hidden;
077c9d
 # endif
077c9d
 
077c9d
+# if HAVE_MEMMOVE_ARCH13
077c9d
+extern __typeof (__redirect_memmove) MEMMOVE_ARCH13 attribute_hidden;
077c9d
+# endif
077c9d
+
077c9d
 s390_libc_ifunc_expr (__redirect_memmove, memmove,
077c9d
-		      (HAVE_MEMMOVE_Z13 && (hwcap & HWCAP_S390_VX))
077c9d
-		      ? MEMMOVE_Z13
077c9d
-		      : MEMMOVE_DEFAULT
077c9d
+		      ({
077c9d
+			s390_libc_ifunc_expr_stfle_init ();
077c9d
+			(HAVE_MEMMOVE_ARCH13
077c9d
+			 && S390_IS_ARCH13_MIE3 (stfle_bits))
077c9d
+			  ? MEMMOVE_ARCH13
077c9d
+			  : (HAVE_MEMMOVE_Z13 && (hwcap & HWCAP_S390_VX))
077c9d
+			  ? MEMMOVE_Z13
077c9d
+			  : MEMMOVE_DEFAULT;
077c9d
+		      })
077c9d
 		      )
077c9d
 #endif
077c9d
diff --git a/sysdeps/s390/multiarch/ifunc-impl-list.c b/sysdeps/s390/multiarch/ifunc-impl-list.c
077c9d
index 177c5fd6fe269d9b..c24bfc95f2d7a22d 100644
077c9d
--- a/sysdeps/s390/multiarch/ifunc-impl-list.c
077c9d
+++ b/sysdeps/s390/multiarch/ifunc-impl-list.c
077c9d
@@ -169,6 +169,11 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
077c9d
 
077c9d
 #if HAVE_MEMMOVE_IFUNC
077c9d
     IFUNC_IMPL (i, name, memmove,
077c9d
+# if HAVE_MEMMOVE_ARCH13
077c9d
+		IFUNC_IMPL_ADD (array, i, memmove,
077c9d
+				S390_IS_ARCH13_MIE3 (stfle_bits),
077c9d
+				MEMMOVE_ARCH13)
077c9d
+# endif
077c9d
 # if HAVE_MEMMOVE_Z13
077c9d
 		IFUNC_IMPL_ADD (array, i, memmove,
077c9d
 				dl_hwcap & HWCAP_S390_VX, MEMMOVE_Z13)
077c9d
diff --git a/sysdeps/s390/multiarch/ifunc-resolve.h b/sysdeps/s390/multiarch/ifunc-resolve.h
077c9d
index b2be015401313d4b..db735bb341ab6b86 100644
077c9d
--- a/sysdeps/s390/multiarch/ifunc-resolve.h
077c9d
+++ b/sysdeps/s390/multiarch/ifunc-resolve.h
077c9d
@@ -22,6 +22,11 @@
077c9d
 
077c9d
 #define S390_STFLE_BITS_Z10  34 /* General instructions extension */
077c9d
 #define S390_STFLE_BITS_Z196 45 /* Distinct operands, pop ... */
077c9d
+#define S390_STFLE_BITS_ARCH13_MIE3 61 /* Miscellaneous-Instruction-Extensions
077c9d
+					  Facility 3, e.g. mvcrl.  */
077c9d
+
077c9d
+#define S390_IS_ARCH13_MIE3(STFLE_BITS)			\
077c9d
+  ((STFLE_BITS & (1ULL << (63 - S390_STFLE_BITS_ARCH13_MIE3))) != 0)
077c9d
 
077c9d
 #define S390_IS_Z196(STFLE_BITS)			\
077c9d
   ((STFLE_BITS & (1ULL << (63 - S390_STFLE_BITS_Z196))) != 0)