olga / rpms / glibc

Forked from rpms/glibc 5 years ago
Clone

Blame SOURCES/glibc-rh1375235-9.patch

00db10
From 8430e4e5d5da2ed14d33638c476654c2a0474d2a Mon Sep 17 00:00:00 2001
00db10
From: Stefan Liebler <stli@linux.vnet.ibm.com>
00db10
Date: Thu, 27 Jul 2017 10:53:59 +0200
00db10
Subject: [PATCH 09/10] S390: Use cu24 instruction for converting from utf16 to
00db10
 utf32.
00db10
00db10
upstream-commit f0137ff31da85bc7d9bd4e621af958d8b7249475
00db10
00db10
This patch adds an ifunc variant to use the cu instruction on arch12 CPUs.
00db10
This new ifunc variant can be built if binutils support z13 vector
00db10
instructions.  At runtime, HWCAP_S390_VXE decides if we can use the
00db10
cu24 instruction.
00db10
00db10
ChangeLog:
00db10
00db10
	* sysdeps/s390/utf16-utf32-z9.c (__from_utf16_loop_vx_cu):
00db10
	Use vector and cu24 instruction.
00db10
    	* sysdeps/s390/multiarch/utf16-utf32-z9.c:
00db10
    	Add __from_utf16_loop_vx_cu in ifunc resolver.
00db10
---
00db10
 sysdeps/s390/multiarch/utf16-utf32-z9.c |   8 ++-
00db10
 sysdeps/s390/utf16-utf32-z9.c           | 113 +++++++++++++++++++++++++++++++-
00db10
 2 files changed, 117 insertions(+), 4 deletions(-)
00db10
00db10
diff --git a/sysdeps/s390/multiarch/utf16-utf32-z9.c b/sysdeps/s390/multiarch/utf16-utf32-z9.c
00db10
index ded3cc2..917de5f 100644
00db10
--- a/sysdeps/s390/multiarch/utf16-utf32-z9.c
00db10
+++ b/sysdeps/s390/multiarch/utf16-utf32-z9.c
00db10
@@ -32,9 +32,11 @@ strong_alias (SINGLE_NAME (TO_LOOP_DEFAULT), SINGLE_NAME (TO_LOOP))
00db10
 
00db10
 /* Generate ifunc'ed loop functions for FROM/TO_LOOP.  */
00db10
 s390_libc_ifunc_expr (FROM_LOOP_DEFAULT, FROM_LOOP,
00db10
-		      (HAVE_FROM_VX && (hwcap & HWCAP_S390_VX))
00db10
-		      ? FROM_LOOP_VX
00db10
-		      : FROM_LOOP_DEFAULT);
00db10
+		      (HAVE_FROM_VX_CU && (hwcap & HWCAP_S390_VXE))
00db10
+		      ? FROM_LOOP_VX_CU
00db10
+		      : (HAVE_FROM_VX && (hwcap & HWCAP_S390_VX))
00db10
+			? FROM_LOOP_VX
00db10
+			: FROM_LOOP_DEFAULT);
00db10
 
00db10
 s390_libc_ifunc_expr (TO_LOOP_DEFAULT, TO_LOOP,
00db10
 		      (HAVE_TO_VX_CU && (hwcap & HWCAP_S390_VXE))
00db10
diff --git a/sysdeps/s390/utf16-utf32-z9.c b/sysdeps/s390/utf16-utf32-z9.c
00db10
index b1728d6..c09a425 100644
00db10
--- a/sysdeps/s390/utf16-utf32-z9.c
00db10
+++ b/sysdeps/s390/utf16-utf32-z9.c
00db10
@@ -39,10 +39,12 @@
00db10
 
00db10
 #if defined HAVE_S390_VX_ASM_SUPPORT && defined USE_MULTIARCH
00db10
 # define HAVE_FROM_VX		1
00db10
+# define HAVE_FROM_VX_CU	1
00db10
 # define HAVE_TO_VX		1
00db10
 # define HAVE_TO_VX_CU		1
00db10
 #else
00db10
 # define HAVE_FROM_VX		0
00db10
+# define HAVE_FROM_VX_CU	0
00db10
 # define HAVE_TO_VX		0
00db10
 # define HAVE_TO_VX_CU		0
00db10
 #endif
00db10
@@ -383,7 +385,6 @@ gconv_end (struct __gconv_step *data)
00db10
     STANDARD_FROM_LOOP_ERR_HANDLER (2);					\
00db10
   }
00db10
 
00db10
-
00db10
 /* Generate loop-function with hardware vector instructions.  */
00db10
 # define MIN_NEEDED_INPUT	MIN_NEEDED_FROM
00db10
 # define MAX_NEEDED_INPUT	MAX_NEEDED_FROM
00db10
@@ -397,6 +398,116 @@ gconv_end (struct __gconv_step *data)
00db10
 # define FROM_LOOP_VX		NULL
00db10
 #endif /* HAVE_FROM_VX != 1  */
00db10
 
00db10
+#if HAVE_FROM_VX_CU == 1
00db10
+#define BODY_FROM_VX_CU							\
00db10
+  {									\
00db10
+    register const unsigned char* pInput asm ("8") = inptr;		\
00db10
+    register size_t inlen asm ("9") = inend - inptr;			\
00db10
+    register unsigned char* pOutput asm ("10") = outptr;		\
00db10
+    register size_t outlen asm ("11") = outend - outptr;		\
00db10
+    unsigned long tmp, tmp2, tmp3;					\
00db10
+    asm volatile (".machine push\n\t"					\
00db10
+		  ".machine \"z13\"\n\t"				\
00db10
+		  ".machinemode \"zarch_nohighgprs\"\n\t"		\
00db10
+		  /* Setup to check for surrogates.  */			\
00db10
+		  "    larl %[R_TMP],9f\n\t"				\
00db10
+		  "    vlm %%v30,%%v31,0(%[R_TMP])\n\t"			\
00db10
+		  CONVERT_32BIT_SIZE_T ([R_INLEN])			\
00db10
+		  CONVERT_32BIT_SIZE_T ([R_OUTLEN])			\
00db10
+		  /* Loop which handles UTF-16 chars <0xd800, >0xdfff.  */ \
00db10
+		  "0:  clgijl %[R_INLEN],16,20f\n\t"			\
00db10
+		  "    clgijl %[R_OUTLEN],32,20f\n\t"			\
00db10
+		  "1:  vl %%v16,0(%[R_IN])\n\t"				\
00db10
+		  /* Check for surrogate chars.  */			\
00db10
+		  "    vstrchs %%v19,%%v16,%%v30,%%v31\n\t"		\
00db10
+		  "    jno 10f\n\t"					\
00db10
+		  /* Enlarge to UTF-32.  */				\
00db10
+		  "    vuplhh %%v17,%%v16\n\t"				\
00db10
+		  "    la %[R_IN],16(%[R_IN])\n\t"			\
00db10
+		  "    vupllh %%v18,%%v16\n\t"				\
00db10
+		  "    aghi %[R_INLEN],-16\n\t"				\
00db10
+		  /* Store 32 bytes to buf_out.  */			\
00db10
+		  "    vstm %%v17,%%v18,0(%[R_OUT])\n\t"		\
00db10
+		  "    aghi %[R_OUTLEN],-32\n\t"			\
00db10
+		  "    la %[R_OUT],32(%[R_OUT])\n\t"			\
00db10
+		  "    clgijl %[R_INLEN],16,20f\n\t"			\
00db10
+		  "    clgijl %[R_OUTLEN],32,20f\n\t"			\
00db10
+		  "    j 1b\n\t"					\
00db10
+		  /* Setup to check for ch >= 0xd800 && ch <= 0xdfff. (v30, v31)  */ \
00db10
+		  "9:  .short 0xd800,0xdfff,0x0,0x0,0x0,0x0,0x0,0x0\n\t" \
00db10
+		  "    .short 0xa000,0xc000,0x0,0x0,0x0,0x0,0x0,0x0\n\t" \
00db10
+		  /* At least one uint16_t is in range of surrogates.	\
00db10
+		     Store the preceding chars.  */			\
00db10
+		  "10: vlgvb %[R_TMP],%%v19,7\n\t"			\
00db10
+		  "    vuplhh %%v17,%%v16\n\t"				\
00db10
+		  "    sllg %[R_TMP3],%[R_TMP],1\n\t" /* Number of out bytes.  */ \
00db10
+		  "    ahik %[R_TMP2],%[R_TMP3],-1\n\t" /* Highest index to store.  */ \
00db10
+		  "    jl 20f\n\t"					\
00db10
+		  "    vstl %%v17,%[R_TMP2],0(%[R_OUT])\n\t"		\
00db10
+		  "    vupllh %%v18,%%v16\n\t"				\
00db10
+		  "    ahi %[R_TMP2],-16\n\t"				\
00db10
+		  "    jl 11f\n\t"					\
00db10
+		  "    vstl %%v18,%[R_TMP2],16(%[R_OUT])\n\t"		\
00db10
+		  "11: \n\t" /* Update pointers.  */			\
00db10
+		  "    la %[R_IN],0(%[R_TMP],%[R_IN])\n\t"		\
00db10
+		  "    slgr %[R_INLEN],%[R_TMP]\n\t"			\
00db10
+		  "    la %[R_OUT],0(%[R_TMP3],%[R_OUT])\n\t"		\
00db10
+		  "    slgr %[R_OUTLEN],%[R_TMP3]\n\t"			\
00db10
+		  /* Handles UTF16 surrogates with convert instruction.  */ \
00db10
+		  "20: cu24 %[R_OUT],%[R_IN],1\n\t"			\
00db10
+		  "    jo 0b\n\t" /* Try vector implemenation again.  */ \
00db10
+		  "    lochil %[R_RES],%[RES_OUT_FULL]\n\t" /* cc == 1.  */ \
00db10
+		  "    lochih %[R_RES],%[RES_IN_ILL]\n\t" /* cc == 2.  */ \
00db10
+		  ".machine pop"					\
00db10
+		  : /* outputs */ [R_IN] "+a" (pInput)			\
00db10
+		    , [R_INLEN] "+d" (inlen), [R_OUT] "+a" (pOutput)	\
00db10
+		    , [R_OUTLEN] "+d" (outlen), [R_TMP] "=a" (tmp)	\
00db10
+		    , [R_TMP2] "=d" (tmp2), [R_TMP3] "=a" (tmp3)	\
00db10
+		    , [R_RES] "+d" (result)				\
00db10
+		  : /* inputs */					\
00db10
+		    [RES_OUT_FULL] "i" (__GCONV_FULL_OUTPUT)		\
00db10
+		    , [RES_IN_ILL] "i" (__GCONV_ILLEGAL_INPUT)		\
00db10
+		  : /* clobber list */ "memory", "cc"			\
00db10
+		    ASM_CLOBBER_VR ("v16") ASM_CLOBBER_VR ("v17")	\
00db10
+		    ASM_CLOBBER_VR ("v18") ASM_CLOBBER_VR ("v19")	\
00db10
+		    ASM_CLOBBER_VR ("v30") ASM_CLOBBER_VR ("v31")	\
00db10
+		  );							\
00db10
+    inptr = pInput;							\
00db10
+    outptr = pOutput;							\
00db10
+									\
00db10
+    if (__glibc_likely (inlen == 0)					\
00db10
+	|| result == __GCONV_FULL_OUTPUT)				\
00db10
+      break;								\
00db10
+    if (inlen == 1)							\
00db10
+      {									\
00db10
+	/* Input does not contain a complete utf16 character.  */	\
00db10
+	result = __GCONV_INCOMPLETE_INPUT;				\
00db10
+	break;								\
00db10
+      }									\
00db10
+    else if (result != __GCONV_ILLEGAL_INPUT)				\
00db10
+      {									\
00db10
+	/* Input is >= 2 and < 4 bytes (as cu24 would have processed	\
00db10
+	   a possible next utf16 character) and not illegal.		\
00db10
+	   => we have a single high surrogate at end of input.  */	\
00db10
+	result = __GCONV_INCOMPLETE_INPUT;				\
00db10
+	break;								\
00db10
+      }									\
00db10
+									\
00db10
+    STANDARD_FROM_LOOP_ERR_HANDLER (2);					\
00db10
+  }
00db10
+
00db10
+/* Generate loop-function with hardware vector and utf-convert instructions.  */
00db10
+# define MIN_NEEDED_INPUT	MIN_NEEDED_FROM
00db10
+# define MAX_NEEDED_INPUT	MAX_NEEDED_FROM
00db10
+# define MIN_NEEDED_OUTPUT	MIN_NEEDED_TO
00db10
+# define FROM_LOOP_VX_CU	__from_utf16_loop_vx_cu
00db10
+# define LOOPFCT		FROM_LOOP_VX_CU
00db10
+# define LOOP_NEED_FLAGS
00db10
+# define BODY			BODY_FROM_VX_CU
00db10
+# include <iconv/loop.c>
00db10
+#else
00db10
+# define FROM_LOOP_VX_CU	NULL
00db10
+#endif /* HAVE_FROM_VX_CU != 1  */
00db10
 
00db10
 /* Conversion from UTF-32 internal/BE to UTF-16.  */
00db10
 
00db10
-- 
00db10
1.8.3.1
00db10