ce426f
From 53f860e80162b09c44b48f207342c1452289072c Mon Sep 17 00:00:00 2001
ce426f
From: Stefan Liebler <stli@linux.vnet.ibm.com>
ce426f
Date: Mon, 7 Nov 2016 15:37:29 +0100
ce426f
Subject: [PATCH 04/17] S390: Optimize builtin iconv-modules.
ce426f
ce426f
Upstream commit 3b704e26b33e35d99de920f8462d8e438f89be39
ce426f
ce426f
This patch introduces a s390 specific gconv_simple.c file which provides
ce426f
optimized versions for z13 with vector instructions, which will be chosen at
ce426f
runtime via ifunc.
ce426f
The optimized conversions can convert between internal and ascii, ucs4, ucs4le,
ce426f
ucs2, ucs2le.
ce426f
If the build-environment lacks vector support, then iconv/gconv_simple.c
ce426f
is used wihtout any change. Otherwise iconvdata/gconv_simple.c is used to create
ce426f
conversion loop routines without vector instructions as fallback, if vector
ce426f
instructions aren't available at runtime.
ce426f
ce426f
ChangeLog:
ce426f
ce426f
	* sysdeps/s390/multiarch/gconv_simple.c: New File.
ce426f
	* sysdeps/s390/multiarch/Makefile (sysdep_routines): Add gconv_simple.
ce426f
---
ce426f
 sysdeps/s390/multiarch/Makefile       |    4 +
ce426f
 sysdeps/s390/multiarch/gconv_simple.c | 1266 +++++++++++++++++++++++++++++++++
ce426f
 2 files changed, 1270 insertions(+)
ce426f
 create mode 100644 sysdeps/s390/multiarch/gconv_simple.c
ce426f
ce426f
diff --git a/sysdeps/s390/multiarch/Makefile b/sysdeps/s390/multiarch/Makefile
ce426f
index 11ad2b9..24949cd 100644
ce426f
--- a/sysdeps/s390/multiarch/Makefile
ce426f
+++ b/sysdeps/s390/multiarch/Makefile
ce426f
@@ -52,3 +52,7 @@ $(move-if-change) $(@:stmp=T) $(@:stmp=h)
ce426f
 touch $@
ce426f
 endef
ce426f
 endif
ce426f
+
ce426f
+ifeq ($(subdir),iconv)
ce426f
+sysdep_routines += gconv_simple
ce426f
+endif
ce426f
diff --git a/sysdeps/s390/multiarch/gconv_simple.c b/sysdeps/s390/multiarch/gconv_simple.c
ce426f
new file mode 100644
ce426f
index 0000000..dc53a48
ce426f
--- /dev/null
ce426f
+++ b/sysdeps/s390/multiarch/gconv_simple.c
ce426f
@@ -0,0 +1,1266 @@
ce426f
+/* Simple transformations functions - s390 version.
ce426f
+   Copyright (C) 2016 Free Software Foundation, Inc.
ce426f
+   This file is part of the GNU C Library.
ce426f
+
ce426f
+   The GNU C Library is free software; you can redistribute it and/or
ce426f
+   modify it under the terms of the GNU Lesser General Public
ce426f
+   License as published by the Free Software Foundation; either
ce426f
+   version 2.1 of the License, or (at your option) any later version.
ce426f
+
ce426f
+   The GNU C Library is distributed in the hope that it will be useful,
ce426f
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
ce426f
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
ce426f
+   Lesser General Public License for more details.
ce426f
+
ce426f
+   You should have received a copy of the GNU Lesser General Public
ce426f
+   License along with the GNU C Library; if not, see
ce426f
+   <http://www.gnu.org/licenses/>.  */
ce426f
+
ce426f
+#if defined HAVE_S390_VX_ASM_SUPPORT
ce426f
+# include <ifunc-resolve.h>
ce426f
+
ce426f
+# if defined HAVE_S390_VX_GCC_SUPPORT
ce426f
+#  define ASM_CLOBBER_VR(NR) , NR
ce426f
+# else
ce426f
+#  define ASM_CLOBBER_VR(NR)
ce426f
+# endif
ce426f
+
ce426f
+# define ICONV_C_NAME(NAME) __##NAME##_c
ce426f
+# define ICONV_VX_NAME(NAME) __##NAME##_vx
ce426f
+# define ICONV_VX_IFUNC(FUNC)						\
ce426f
+  extern __typeof (ICONV_C_NAME (FUNC)) __##FUNC;			\
ce426f
+  s390_vx_libc_ifunc (__##FUNC)						\
ce426f
+  int FUNC (struct __gconv_step *step, struct __gconv_step_data *data,	\
ce426f
+	    const unsigned char **inptrp, const unsigned char *inend,	\
ce426f
+	    unsigned char **outbufstart, size_t *irreversible,		\
ce426f
+	    int do_flush, int consume_incomplete)			\
ce426f
+  {									\
ce426f
+    return __##FUNC (step, data, inptrp, inend,outbufstart,		\
ce426f
+		     irreversible, do_flush, consume_incomplete);	\
ce426f
+  }
ce426f
+# define ICONV_VX_SINGLE(NAME)						\
ce426f
+  static __typeof (NAME##_single) __##NAME##_vx_single __attribute__((alias(#NAME "_single")));
ce426f
+
ce426f
+/* Generate the transformations which are used, if the target machine does not
ce426f
+   support vector instructions.  */
ce426f
+# define __gconv_transform_ascii_internal		\
ce426f
+  ICONV_C_NAME (__gconv_transform_ascii_internal)
ce426f
+# define __gconv_transform_internal_ascii		\
ce426f
+  ICONV_C_NAME (__gconv_transform_internal_ascii)
ce426f
+# define __gconv_transform_internal_ucs4le		\
ce426f
+  ICONV_C_NAME (__gconv_transform_internal_ucs4le)
ce426f
+# define __gconv_transform_ucs4_internal		\
ce426f
+  ICONV_C_NAME (__gconv_transform_ucs4_internal)
ce426f
+# define __gconv_transform_ucs4le_internal		\
ce426f
+  ICONV_C_NAME (__gconv_transform_ucs4le_internal)
ce426f
+# define __gconv_transform_ucs2_internal		\
ce426f
+  ICONV_C_NAME (__gconv_transform_ucs2_internal)
ce426f
+# define __gconv_transform_ucs2reverse_internal		\
ce426f
+  ICONV_C_NAME (__gconv_transform_ucs2reverse_internal)
ce426f
+# define __gconv_transform_internal_ucs2		\
ce426f
+  ICONV_C_NAME (__gconv_transform_internal_ucs2)
ce426f
+# define __gconv_transform_internal_ucs2reverse		\
ce426f
+  ICONV_C_NAME (__gconv_transform_internal_ucs2reverse)
ce426f
+
ce426f
+
ce426f
+# include <iconv/gconv_simple.c>
ce426f
+
ce426f
+# undef __gconv_transform_ascii_internal
ce426f
+# undef __gconv_transform_internal_ascii
ce426f
+# undef __gconv_transform_internal_ucs4le
ce426f
+# undef __gconv_transform_ucs4_internal
ce426f
+# undef __gconv_transform_ucs4le_internal
ce426f
+# undef __gconv_transform_ucs2_internal
ce426f
+# undef __gconv_transform_ucs2reverse_internal
ce426f
+# undef __gconv_transform_internal_ucs2
ce426f
+# undef __gconv_transform_internal_ucs2reverse
ce426f
+
ce426f
+/* Now define the functions with vector support.  */
ce426f
+# if defined __s390x__
ce426f
+#  define CONVERT_32BIT_SIZE_T(REG)
ce426f
+# else
ce426f
+#  define CONVERT_32BIT_SIZE_T(REG) "llgfr %" #REG ",%" #REG "\n\t"
ce426f
+# endif
ce426f
+
ce426f
+/* Convert from ISO 646-IRV to the internal (UCS4-like) format.  */
ce426f
+# define DEFINE_INIT		0
ce426f
+# define DEFINE_FINI		0
ce426f
+# define MIN_NEEDED_FROM	1
ce426f
+# define MIN_NEEDED_TO		4
ce426f
+# define FROM_DIRECTION		1
ce426f
+# define FROM_LOOP		ICONV_VX_NAME (ascii_internal_loop)
ce426f
+# define TO_LOOP		ICONV_VX_NAME (ascii_internal_loop) /* This is not used.  */
ce426f
+# define FUNCTION_NAME		ICONV_VX_NAME (__gconv_transform_ascii_internal)
ce426f
+# define ONE_DIRECTION		1
ce426f
+
ce426f
+# define MIN_NEEDED_INPUT	MIN_NEEDED_FROM
ce426f
+# define MIN_NEEDED_OUTPUT	MIN_NEEDED_TO
ce426f
+# define LOOPFCT		FROM_LOOP
ce426f
+# define BODY_ORIG_ERROR						\
ce426f
+    /* The value is too large.  We don't try transliteration here since \
ce426f
+       this is not an error because of the lack of possibilities to	\
ce426f
+       represent the result.  This is a genuine bug in the input since	\
ce426f
+       ASCII does not allow such values.  */				\
ce426f
+    STANDARD_FROM_LOOP_ERR_HANDLER (1);
ce426f
+
ce426f
+# define BODY_ORIG							\
ce426f
+  {									\
ce426f
+    if (__glibc_unlikely (*inptr > '\x7f'))				\
ce426f
+      {									\
ce426f
+	BODY_ORIG_ERROR							\
ce426f
+      }									\
ce426f
+    else								\
ce426f
+      {									\
ce426f
+	/* It's an one byte sequence.  */				\
ce426f
+	*((uint32_t *) outptr) = *inptr++;				\
ce426f
+	outptr += sizeof (uint32_t);					\
ce426f
+      }									\
ce426f
+  }
ce426f
+# define BODY								\
ce426f
+  {									\
ce426f
+    size_t len = inend - inptr;						\
ce426f
+    if (len > (outend - outptr) / 4)					\
ce426f
+      len = (outend - outptr) / 4;					\
ce426f
+    size_t loop_count, tmp;						\
ce426f
+    __asm__ volatile (".machine push\n\t"				\
ce426f
+		      ".machine \"z13\"\n\t"				\
ce426f
+		      ".machinemode \"zarch_nohighgprs\"\n\t"		\
ce426f
+		      CONVERT_32BIT_SIZE_T ([R_LEN])			\
ce426f
+		      "    vrepib %%v30,0x7f\n\t" /* For compare > 0x7f.  */ \
ce426f
+		      "    srlg %[R_LI],%[R_LEN],4\n\t"			\
ce426f
+		      "    vrepib %%v31,0x20\n\t"			\
ce426f
+		      "    clgije %[R_LI],0,1f\n\t"			\
ce426f
+		      "0:  \n\t" /* Handle 16-byte blocks.  */		\
ce426f
+		      "    vl %%v16,0(%[R_IN])\n\t"			\
ce426f
+		      /* Checking for values > 0x7f.  */		\
ce426f
+		      "    vstrcbs %%v17,%%v16,%%v30,%%v31\n\t"		\
ce426f
+		      "    jno 10f\n\t"					\
ce426f
+		      /* Enlarge to UCS4.  */				\
ce426f
+		      "    vuplhb %%v17,%%v16\n\t"			\
ce426f
+		      "    vupllb %%v18,%%v16\n\t"			\
ce426f
+		      "    vuplhh %%v19,%%v17\n\t"			\
ce426f
+		      "    vupllh %%v20,%%v17\n\t"			\
ce426f
+		      "    vuplhh %%v21,%%v18\n\t"			\
ce426f
+		      "    vupllh %%v22,%%v18\n\t"			\
ce426f
+		      /* Store 64bytes to buf_out.  */			\
ce426f
+		      "    vstm %%v19,%%v22,0(%[R_OUT])\n\t"		\
ce426f
+		      "    la %[R_IN],16(%[R_IN])\n\t"			\
ce426f
+		      "    la %[R_OUT],64(%[R_OUT])\n\t"		\
ce426f
+		      "    brctg %[R_LI],0b\n\t"			\
ce426f
+		      "    lghi %[R_LI],15\n\t"				\
ce426f
+		      "    ngr %[R_LEN],%[R_LI]\n\t"			\
ce426f
+		      "    je 20f\n\t" /* Jump away if no remaining bytes.  */ \
ce426f
+		      /* Handle remaining bytes.  */			\
ce426f
+		      "1: aghik %[R_LI],%[R_LEN],-1\n\t"		\
ce426f
+		      "    jl 20f\n\t" /* Jump away if no remaining bytes.  */ \
ce426f
+		      "    vll %%v16,%[R_LI],0(%[R_IN])\n\t"		\
ce426f
+		      /* Checking for values > 0x7f.  */		\
ce426f
+		      "    vstrcbs %%v17,%%v16,%%v30,%%v31\n\t"		\
ce426f
+		      "    vlgvb %[R_TMP],%%v17,7\n\t"			\
ce426f
+		      "    clr %[R_TMP],%[R_LI]\n\t"			\
ce426f
+		      "    locrh %[R_TMP],%[R_LEN]\n\t"			\
ce426f
+		      "    locghih %[R_LEN],0\n\t"			\
ce426f
+		      "    j 12f\n\t"					\
ce426f
+		      "10:\n\t"						\
ce426f
+		      /* Found a value > 0x7f.				\
ce426f
+			 Store the preceding chars.  */			\
ce426f
+		      "    vlgvb %[R_TMP],%%v17,7\n\t"			\
ce426f
+		      "12: la %[R_IN],0(%[R_TMP],%[R_IN])\n\t"		\
ce426f
+		      "    sllk %[R_TMP],%[R_TMP],2\n\t"		\
ce426f
+		      "    ahi %[R_TMP],-1\n\t"				\
ce426f
+		      "    jl 20f\n\t"					\
ce426f
+		      "    lgr %[R_LI],%[R_TMP]\n\t"			\
ce426f
+		      "    vuplhb %%v17,%%v16\n\t"			\
ce426f
+		      "    vuplhh %%v19,%%v17\n\t"			\
ce426f
+		      "    vstl %%v19,%[R_LI],0(%[R_OUT])\n\t"		\
ce426f
+		      "    ahi %[R_LI],-16\n\t"				\
ce426f
+		      "    jl 11f\n\t"					\
ce426f
+		      "    vupllh %%v20,%%v17\n\t"			\
ce426f
+		      "    vstl %%v20,%[R_LI],16(%[R_OUT])\n\t"		\
ce426f
+		      "    ahi %[R_LI],-16\n\t"				\
ce426f
+		      "    jl 11f\n\t"					\
ce426f
+		      "    vupllb %%v18,%%v16\n\t"			\
ce426f
+		      "    vuplhh %%v21,%%v18\n\t"			\
ce426f
+		      "    vstl %%v21,%[R_LI],32(%[R_OUT])\n\t"		\
ce426f
+		      "    ahi %[R_LI],-16\n\t"				\
ce426f
+		      "    jl 11f\n\t"					\
ce426f
+		      "    vupllh %%v22,%%v18\n\t"			\
ce426f
+		      "    vstl %%v22,%[R_LI],48(%[R_OUT])\n\t"		\
ce426f
+		      "11:\n\t"						\
ce426f
+		      "    la %[R_OUT],1(%[R_TMP],%[R_OUT])\n\t"	\
ce426f
+		      "20:\n\t"						\
ce426f
+		      ".machine pop"					\
ce426f
+		      : /* outputs */ [R_OUT] "+a" (outptr)		\
ce426f
+			, [R_IN] "+a" (inptr)				\
ce426f
+			, [R_LEN] "+d" (len)				\
ce426f
+			, [R_LI] "=d" (loop_count)			\
ce426f
+			, [R_TMP] "=a" (tmp)				\
ce426f
+		      : /* inputs */					\
ce426f
+		      : /* clobber list*/ "memory", "cc"		\
ce426f
+			ASM_CLOBBER_VR ("v16") ASM_CLOBBER_VR ("v17")	\
ce426f
+			ASM_CLOBBER_VR ("v18") ASM_CLOBBER_VR ("v19")	\
ce426f
+			ASM_CLOBBER_VR ("v20") ASM_CLOBBER_VR ("v21")	\
ce426f
+			ASM_CLOBBER_VR ("v22") ASM_CLOBBER_VR ("v30")	\
ce426f
+			ASM_CLOBBER_VR ("v31")				\
ce426f
+		      );						\
ce426f
+    if (len > 0)							\
ce426f
+      {									\
ce426f
+	/* Found an invalid character at the next input byte.  */	\
ce426f
+	BODY_ORIG_ERROR							\
ce426f
+      }									\
ce426f
+  }
ce426f
+
ce426f
+# define LOOP_NEED_FLAGS
ce426f
+# include <iconv/loop.c>
ce426f
+# include <iconv/skeleton.c>
ce426f
+# undef BODY_ORIG
ce426f
+# undef BODY_ORIG_ERROR
ce426f
+ICONV_VX_IFUNC (__gconv_transform_ascii_internal)
ce426f
+
ce426f
+/* Convert from the internal (UCS4-like) format to ISO 646-IRV.  */
ce426f
+# define DEFINE_INIT		0
ce426f
+# define DEFINE_FINI		0
ce426f
+# define MIN_NEEDED_FROM	4
ce426f
+# define MIN_NEEDED_TO		1
ce426f
+# define FROM_DIRECTION		1
ce426f
+# define FROM_LOOP		ICONV_VX_NAME (internal_ascii_loop)
ce426f
+# define TO_LOOP		ICONV_VX_NAME (internal_ascii_loop) /* This is not used.  */
ce426f
+# define FUNCTION_NAME		ICONV_VX_NAME (__gconv_transform_internal_ascii)
ce426f
+# define ONE_DIRECTION		1
ce426f
+
ce426f
+# define MIN_NEEDED_INPUT	MIN_NEEDED_FROM
ce426f
+# define MIN_NEEDED_OUTPUT	MIN_NEEDED_TO
ce426f
+# define LOOPFCT		FROM_LOOP
ce426f
+# define BODY_ORIG_ERROR						\
ce426f
+  UNICODE_TAG_HANDLER (*((const uint32_t *) inptr), 4);			\
ce426f
+  STANDARD_TO_LOOP_ERR_HANDLER (4);
ce426f
+
ce426f
+# define BODY_ORIG							\
ce426f
+  {									\
ce426f
+    if (__glibc_unlikely (*((const uint32_t *) inptr) > 0x7f))		\
ce426f
+      {									\
ce426f
+	BODY_ORIG_ERROR							\
ce426f
+      }									\
ce426f
+    else								\
ce426f
+      {									\
ce426f
+	/* It's an one byte sequence.  */				\
ce426f
+	*outptr++ = *((const uint32_t *) inptr);			\
ce426f
+	inptr += sizeof (uint32_t);					\
ce426f
+      }									\
ce426f
+  }
ce426f
+# define BODY								\
ce426f
+  {									\
ce426f
+    size_t len = (inend - inptr) / 4;					\
ce426f
+    if (len > outend - outptr)						\
ce426f
+      len = outend - outptr;						\
ce426f
+    size_t loop_count, tmp, tmp2;					\
ce426f
+    __asm__ volatile (".machine push\n\t"				\
ce426f
+		      ".machine \"z13\"\n\t"				\
ce426f
+		      ".machinemode \"zarch_nohighgprs\"\n\t"		\
ce426f
+		      CONVERT_32BIT_SIZE_T ([R_LEN])			\
ce426f
+		      /* Setup to check for ch > 0x7f.  */		\
ce426f
+		      "    vzero %%v21\n\t"				\
ce426f
+		      "    srlg %[R_LI],%[R_LEN],4\n\t"			\
ce426f
+		      "    vleih %%v21,8192,0\n\t"  /* element 0:   >  */ \
ce426f
+		      "    vleih %%v21,-8192,2\n\t" /* element 1: =<>  */ \
ce426f
+		      "    vleif %%v20,127,0\n\t"   /* element 0: 127  */ \
ce426f
+		      "    lghi %[R_TMP],0\n\t"				\
ce426f
+		      "    clgije %[R_LI],0,1f\n\t"			\
ce426f
+		      "0:\n\t"						\
ce426f
+		      "    vlm %%v16,%%v19,0(%[R_IN])\n\t"		\
ce426f
+		      /* Shorten to byte values.  */			\
ce426f
+		      "    vpkf %%v23,%%v16,%%v17\n\t"			\
ce426f
+		      "    vpkf %%v24,%%v18,%%v19\n\t"			\
ce426f
+		      "    vpkh %%v23,%%v23,%%v24\n\t"			\
ce426f
+		      /* Checking for values > 0x7f.  */		\
ce426f
+		      "    vstrcfs %%v22,%%v16,%%v20,%%v21\n\t"		\
ce426f
+		      "    jno 10f\n\t"					\
ce426f
+		      "    vstrcfs %%v22,%%v17,%%v20,%%v21\n\t"		\
ce426f
+		      "    jno 11f\n\t"					\
ce426f
+		      "    vstrcfs %%v22,%%v18,%%v20,%%v21\n\t"		\
ce426f
+		      "    jno 12f\n\t"					\
ce426f
+		      "    vstrcfs %%v22,%%v19,%%v20,%%v21\n\t"		\
ce426f
+		      "    jno 13f\n\t"					\
ce426f
+		      /* Store 16bytes to outptr.  */			\
ce426f
+		      "    vst %%v23,0(%[R_OUT])\n\t"			\
ce426f
+		      "    la %[R_IN],64(%[R_IN])\n\t"			\
ce426f
+		      "    la %[R_OUT],16(%[R_OUT])\n\t"		\
ce426f
+		      "    brctg %[R_LI],0b\n\t"			\
ce426f
+		      "    lghi %[R_LI],15\n\t"				\
ce426f
+		      "    ngr %[R_LEN],%[R_LI]\n\t"			\
ce426f
+		      "    je 20f\n\t" /* Jump away if no remaining bytes.  */ \
ce426f
+		      /* Handle remaining bytes.  */			\
ce426f
+		      "1: sllg %[R_LI],%[R_LEN],2\n\t"			\
ce426f
+		      "    aghi %[R_LI],-1\n\t"				\
ce426f
+		      "    jl 20f\n\t" /* Jump away if no remaining bytes.  */ \
ce426f
+		      /* Load remaining 1...63 bytes.  */		\
ce426f
+		      "    vll %%v16,%[R_LI],0(%[R_IN])\n\t"		\
ce426f
+		      "    ahi %[R_LI],-16\n\t"				\
ce426f
+		      "    jl 2f\n\t"					\
ce426f
+		      "    vll %%v17,%[R_LI],16(%[R_IN])\n\t"		\
ce426f
+		      "    ahi %[R_LI],-16\n\t"				\
ce426f
+		      "    jl 2f\n\t"					\
ce426f
+		      "    vll %%v18,%[R_LI],32(%[R_IN])\n\t"		\
ce426f
+		      "    ahi %[R_LI],-16\n\t"				\
ce426f
+		      "    jl 2f\n\t"					\
ce426f
+		      "    vll %%v19,%[R_LI],48(%[R_IN])\n\t"		\
ce426f
+		      "2:\n\t"						\
ce426f
+		      /* Shorten to byte values.  */			\
ce426f
+		      "    vpkf %%v23,%%v16,%%v17\n\t"			\
ce426f
+		      "    vpkf %%v24,%%v18,%%v19\n\t"			\
ce426f
+		      "    vpkh %%v23,%%v23,%%v24\n\t"			\
ce426f
+		      "    sllg %[R_LI],%[R_LEN],2\n\t"			\
ce426f
+		      "    aghi %[R_LI],-16\n\t"			\
ce426f
+		      "    jl 3f\n\t" /* v16 is not fully loaded.  */	\
ce426f
+		      "    vstrcfs %%v22,%%v16,%%v20,%%v21\n\t"		\
ce426f
+		      "    jno 10f\n\t"					\
ce426f
+		      "    aghi %[R_LI],-16\n\t"			\
ce426f
+		      "    jl 4f\n\t" /* v17 is not fully loaded.  */	\
ce426f
+		      "    vstrcfs %%v22,%%v17,%%v20,%%v21\n\t"		\
ce426f
+		      "    jno 11f\n\t"					\
ce426f
+		      "    aghi %[R_LI],-16\n\t"			\
ce426f
+		      "    jl 5f\n\t" /* v18 is not fully loaded.  */	\
ce426f
+		      "    vstrcfs %%v22,%%v18,%%v20,%%v21\n\t"		\
ce426f
+		      "    jno 12f\n\t"					\
ce426f
+		      "    aghi %[R_LI],-16\n\t"			\
ce426f
+		      /* v19 is not fully loaded. */			\
ce426f
+		      "    lghi %[R_TMP],12\n\t"			\
ce426f
+		      "    vstrcfs %%v22,%%v19,%%v20,%%v21\n\t"		\
ce426f
+		      "6: vlgvb %[R_I],%%v22,7\n\t"			\
ce426f
+		      "    aghi %[R_LI],16\n\t"				\
ce426f
+		      "    clrjl %[R_I],%[R_LI],14f\n\t"		\
ce426f
+		      "    lgr %[R_I],%[R_LEN]\n\t"			\
ce426f
+		      "    lghi %[R_LEN],0\n\t"				\
ce426f
+		      "    j 15f\n\t"					\
ce426f
+		      "3: vstrcfs %%v22,%%v16,%%v20,%%v21\n\t"		\
ce426f
+		      "    j 6b\n\t"					\
ce426f
+		      "4: vstrcfs %%v22,%%v17,%%v20,%%v21\n\t"		\
ce426f
+		      "    lghi %[R_TMP],4\n\t"				\
ce426f
+		      "    j 6b\n\t"					\
ce426f
+		      "5: vstrcfs %%v22,%%v17,%%v20,%%v21\n\t"		\
ce426f
+		      "    lghi %[R_TMP],8\n\t"				\
ce426f
+		      "    j 6b\n\t"					\
ce426f
+		      /* Found a value > 0x7f.  */			\
ce426f
+		      "13: ahi %[R_TMP],4\n\t"				\
ce426f
+		      "12: ahi %[R_TMP],4\n\t"				\
ce426f
+		      "11: ahi %[R_TMP],4\n\t"				\
ce426f
+		      "10: vlgvb %[R_I],%%v22,7\n\t"			\
ce426f
+		      "14: srlg %[R_I],%[R_I],2\n\t"			\
ce426f
+		      "    agr %[R_I],%[R_TMP]\n\t"			\
ce426f
+		      "    je 20f\n\t"					\
ce426f
+		      /* Store characters before invalid one...  */	\
ce426f
+		      "15: aghi %[R_I],-1\n\t"				\
ce426f
+		      "    vstl %%v23,%[R_I],0(%[R_OUT])\n\t"		\
ce426f
+		      /* ... and update pointers.  */			\
ce426f
+		      "    la %[R_OUT],1(%[R_I],%[R_OUT])\n\t"		\
ce426f
+		      "    sllg %[R_I],%[R_I],2\n\t"			\
ce426f
+		      "    la %[R_IN],4(%[R_I],%[R_IN])\n\t"		\
ce426f
+		      "20:\n\t"						\
ce426f
+		      ".machine pop"					\
ce426f
+		      : /* outputs */ [R_OUT] "+a" (outptr)		\
ce426f
+			, [R_IN] "+a" (inptr)				\
ce426f
+			, [R_LEN] "+d" (len)				\
ce426f
+			, [R_LI] "=d" (loop_count)			\
ce426f
+			, [R_I] "=a" (tmp2)				\
ce426f
+			, [R_TMP] "=d" (tmp)				\
ce426f
+		      : /* inputs */					\
ce426f
+		      : /* clobber list*/ "memory", "cc"		\
ce426f
+			ASM_CLOBBER_VR ("v16") ASM_CLOBBER_VR ("v17")	\
ce426f
+			ASM_CLOBBER_VR ("v18") ASM_CLOBBER_VR ("v19")	\
ce426f
+			ASM_CLOBBER_VR ("v20") ASM_CLOBBER_VR ("v21")	\
ce426f
+			ASM_CLOBBER_VR ("v22") ASM_CLOBBER_VR ("v23")	\
ce426f
+			ASM_CLOBBER_VR ("v24")				\
ce426f
+		      );						\
ce426f
+    if (len > 0)							\
ce426f
+      {									\
ce426f
+	/* Found an invalid character > 0x7f at next character.  */	\
ce426f
+	BODY_ORIG_ERROR							\
ce426f
+      }									\
ce426f
+  }
ce426f
+# define LOOP_NEED_FLAGS
ce426f
+# include <iconv/loop.c>
ce426f
+# include <iconv/skeleton.c>
ce426f
+# undef BODY_ORIG
ce426f
+# undef BODY_ORIG_ERROR
ce426f
+ICONV_VX_IFUNC (__gconv_transform_internal_ascii)
ce426f
+
ce426f
+
ce426f
+/* Convert from internal UCS4 to UCS4 little endian form.  */
ce426f
+# define DEFINE_INIT		0
ce426f
+# define DEFINE_FINI		0
ce426f
+# define MIN_NEEDED_FROM	4
ce426f
+# define MIN_NEEDED_TO		4
ce426f
+# define FROM_DIRECTION		1
ce426f
+# define FROM_LOOP		ICONV_VX_NAME (internal_ucs4le_loop)
ce426f
+# define TO_LOOP		ICONV_VX_NAME (internal_ucs4le_loop) /* This is not used.  */
ce426f
+# define FUNCTION_NAME		ICONV_VX_NAME (__gconv_transform_internal_ucs4le)
ce426f
+# define ONE_DIRECTION		0
ce426f
+
ce426f
+static inline int
ce426f
+__attribute ((always_inline))
ce426f
+ICONV_VX_NAME (internal_ucs4le_loop) (struct __gconv_step *step,
ce426f
+				      struct __gconv_step_data *step_data,
ce426f
+				      const unsigned char **inptrp,
ce426f
+				      const unsigned char *inend,
ce426f
+				      unsigned char **outptrp,
ce426f
+				      unsigned char *outend,
ce426f
+				      size_t *irreversible)
ce426f
+{
ce426f
+  const unsigned char *inptr = *inptrp;
ce426f
+  unsigned char *outptr = *outptrp;
ce426f
+  int result;
ce426f
+  size_t len = MIN (inend - inptr, outend - outptr) / 4;
ce426f
+  size_t loop_count;
ce426f
+  __asm__ volatile (".machine push\n\t"
ce426f
+		    ".machine \"z13\"\n\t"
ce426f
+		    ".machinemode \"zarch_nohighgprs\"\n\t"
ce426f
+		    CONVERT_32BIT_SIZE_T ([R_LEN])
ce426f
+		    "    bras %[R_LI],1f\n\t"
ce426f
+		    /* Vector permute mask:  */
ce426f
+		    "    .long 0x03020100,0x7060504,0x0B0A0908,0x0F0E0D0C\n\t"
ce426f
+		    "1:  vl %%v20,0(%[R_LI])\n\t"
ce426f
+		    /* Process 64byte (16char) blocks.  */
ce426f
+		    "    srlg %[R_LI],%[R_LEN],4\n\t"
ce426f
+		    "    clgije %[R_LI],0,10f\n\t"
ce426f
+		    "0:  vlm %%v16,%%v19,0(%[R_IN])\n\t"
ce426f
+		    "    vperm %%v16,%%v16,%%v16,%%v20\n\t"
ce426f
+		    "    vperm %%v17,%%v17,%%v17,%%v20\n\t"
ce426f
+		    "    vperm %%v18,%%v18,%%v18,%%v20\n\t"
ce426f
+		    "    vperm %%v19,%%v19,%%v19,%%v20\n\t"
ce426f
+		    "    vstm %%v16,%%v19,0(%[R_OUT])\n\t"
ce426f
+		    "    la %[R_IN],64(%[R_IN])\n\t"
ce426f
+		    "    la %[R_OUT],64(%[R_OUT])\n\t"
ce426f
+		    "    brctg %[R_LI],0b\n\t"
ce426f
+		    "    llgfr %[R_LEN],%[R_LEN]\n\t"
ce426f
+		    "    nilf %[R_LEN],15\n\t"
ce426f
+		    /* Process 16byte (4char) blocks.  */
ce426f
+		    "10: srlg %[R_LI],%[R_LEN],2\n\t"
ce426f
+		    "    clgije %[R_LI],0,20f\n\t"
ce426f
+		    "11: vl %%v16,0(%[R_IN])\n\t"
ce426f
+		    "    vperm %%v16,%%v16,%%v16,%%v20\n\t"
ce426f
+		    "    vst %%v16,0(%[R_OUT])\n\t"
ce426f
+		    "    la %[R_IN],16(%[R_IN])\n\t"
ce426f
+		    "    la %[R_OUT],16(%[R_OUT])\n\t"
ce426f
+		    "    brctg %[R_LI],11b\n\t"
ce426f
+		    "    nill %[R_LEN],3\n\t"
ce426f
+		    /* Process <16bytes.  */
ce426f
+		    "20: sll %[R_LEN],2\n\t"
ce426f
+		    "    ahi %[R_LEN],-1\n\t"
ce426f
+		    "    jl 30f\n\t"
ce426f
+		    "    vll %%v16,%[R_LEN],0(%[R_IN])\n\t"
ce426f
+		    "    vperm %%v16,%%v16,%%v16,%%v20\n\t"
ce426f
+		    "    vstl %%v16,%[R_LEN],0(%[R_OUT])\n\t"
ce426f
+		    "    la %[R_IN],1(%[R_LEN],%[R_IN])\n\t"
ce426f
+		    "    la %[R_OUT],1(%[R_LEN],%[R_OUT])\n\t"
ce426f
+		    "30: \n\t"
ce426f
+		    ".machine pop"
ce426f
+		    : /* outputs */ [R_OUT] "+a" (outptr)
ce426f
+		      , [R_IN] "+a" (inptr)
ce426f
+		      , [R_LI] "=a" (loop_count)
ce426f
+		      , [R_LEN] "+a" (len)
ce426f
+		    : /* inputs */
ce426f
+		    : /* clobber list*/ "memory", "cc"
ce426f
+		      ASM_CLOBBER_VR ("v16") ASM_CLOBBER_VR ("v17")
ce426f
+		      ASM_CLOBBER_VR ("v18") ASM_CLOBBER_VR ("v19")
ce426f
+		      ASM_CLOBBER_VR ("v20")
ce426f
+		    );
ce426f
+  *inptrp = inptr;
ce426f
+  *outptrp = outptr;
ce426f
+
ce426f
+  /* Determine the status.  */
ce426f
+  if (*inptrp == inend)
ce426f
+    result = __GCONV_EMPTY_INPUT;
ce426f
+  else if (*outptrp + 4 > outend)
ce426f
+    result = __GCONV_FULL_OUTPUT;
ce426f
+  else
ce426f
+    result = __GCONV_INCOMPLETE_INPUT;
ce426f
+
ce426f
+  return result;
ce426f
+}
ce426f
+
ce426f
+ICONV_VX_SINGLE (internal_ucs4le_loop)
ce426f
+# include <iconv/skeleton.c>
ce426f
+ICONV_VX_IFUNC (__gconv_transform_internal_ucs4le)
ce426f
+
ce426f
+
ce426f
+/* Transform from UCS4 to the internal, UCS4-like format.  Unlike
ce426f
+   for the other direction we have to check for correct values here.  */
ce426f
+# define DEFINE_INIT		0
ce426f
+# define DEFINE_FINI		0
ce426f
+# define MIN_NEEDED_FROM	4
ce426f
+# define MIN_NEEDED_TO		4
ce426f
+# define FROM_DIRECTION		1
ce426f
+# define FROM_LOOP		ICONV_VX_NAME (ucs4_internal_loop)
ce426f
+# define TO_LOOP		ICONV_VX_NAME (ucs4_internal_loop) /* This is not used.  */
ce426f
+# define FUNCTION_NAME		ICONV_VX_NAME (__gconv_transform_ucs4_internal)
ce426f
+# define ONE_DIRECTION		0
ce426f
+
ce426f
+
ce426f
+static inline int
ce426f
+__attribute ((always_inline))
ce426f
+ICONV_VX_NAME (ucs4_internal_loop) (struct __gconv_step *step,
ce426f
+				    struct __gconv_step_data *step_data,
ce426f
+				    const unsigned char **inptrp,
ce426f
+				    const unsigned char *inend,
ce426f
+				    unsigned char **outptrp,
ce426f
+				    unsigned char *outend,
ce426f
+				    size_t *irreversible)
ce426f
+{
ce426f
+  int flags = step_data->__flags;
ce426f
+  const unsigned char *inptr = *inptrp;
ce426f
+  unsigned char *outptr = *outptrp;
ce426f
+  int result;
ce426f
+  size_t len, loop_count;
ce426f
+  do
ce426f
+    {
ce426f
+      len = MIN (inend - inptr, outend - outptr) / 4;
ce426f
+      __asm__ volatile (".machine push\n\t"
ce426f
+			".machine \"z13\"\n\t"
ce426f
+			".machinemode \"zarch_nohighgprs\"\n\t"
ce426f
+			CONVERT_32BIT_SIZE_T ([R_LEN])
ce426f
+			/* Setup to check for ch > 0x7fffffff.  */
ce426f
+			"    larl %[R_LI],9f\n\t"
ce426f
+			"    vlm %%v20,%%v21,0(%[R_LI])\n\t"
ce426f
+			"    srlg %[R_LI],%[R_LEN],2\n\t"
ce426f
+			"    clgije %[R_LI],0,1f\n\t"
ce426f
+			/* Process 16byte (4char) blocks.  */
ce426f
+			"0:  vl %%v16,0(%[R_IN])\n\t"
ce426f
+			"    vstrcfs %%v22,%%v16,%%v20,%%v21\n\t"
ce426f
+			"    jno 10f\n\t"
ce426f
+			"    vst %%v16,0(%[R_OUT])\n\t"
ce426f
+			"    la %[R_IN],16(%[R_IN])\n\t"
ce426f
+			"    la %[R_OUT],16(%[R_OUT])\n\t"
ce426f
+			"    brctg %[R_LI],0b\n\t"
ce426f
+			"    llgfr %[R_LEN],%[R_LEN]\n\t"
ce426f
+			"    nilf %[R_LEN],3\n\t"
ce426f
+			/* Process <16bytes.  */
ce426f
+			"1:  sll %[R_LEN],2\n\t"
ce426f
+			"    ahik %[R_LI],%[R_LEN],-1\n\t"
ce426f
+			"    jl 20f\n\t" /* No further bytes available.  */
ce426f
+			"    vll %%v16,%[R_LI],0(%[R_IN])\n\t"
ce426f
+			"    vstrcfs %%v22,%%v16,%%v20,%%v21\n\t"
ce426f
+			"    vlgvb %[R_LI],%%v22,7\n\t"
ce426f
+			"    clr %[R_LI],%[R_LEN]\n\t"
ce426f
+			"    locgrhe %[R_LI],%[R_LEN]\n\t"
ce426f
+			"    locghihe %[R_LEN],0\n\t"
ce426f
+			"    j 11f\n\t"
ce426f
+			/* v20: Vector string range compare values.  */
ce426f
+			"9:  .long 0x7fffffff,0x0,0x0,0x0\n\t"
ce426f
+			/* v21: Vector string range compare control-bits.
ce426f
+			   element 0: >; element 1: =<> (always true)  */
ce426f
+			"    .long 0x20000000,0xE0000000,0x0,0x0\n\t"
ce426f
+			/* Found a value > 0x7fffffff.  */
ce426f
+			"10: vlgvb %[R_LI],%%v22,7\n\t"
ce426f
+			/* Store characters before invalid one.  */
ce426f
+			"11: aghi %[R_LI],-1\n\t"
ce426f
+			"    jl 20f\n\t"
ce426f
+			"    vstl %%v16,%[R_LI],0(%[R_OUT])\n\t"
ce426f
+			"    la %[R_IN],1(%[R_LI],%[R_IN])\n\t"
ce426f
+			"    la %[R_OUT],1(%[R_LI],%[R_OUT])\n\t"
ce426f
+			"20:\n\t"
ce426f
+			".machine pop"
ce426f
+			: /* outputs */ [R_OUT] "+a" (outptr)
ce426f
+			  , [R_IN] "+a" (inptr)
ce426f
+			  , [R_LI] "=a" (loop_count)
ce426f
+			  , [R_LEN] "+d" (len)
ce426f
+			: /* inputs */
ce426f
+			: /* clobber list*/ "memory", "cc"
ce426f
+			  ASM_CLOBBER_VR ("v16") ASM_CLOBBER_VR ("v20")
ce426f
+			  ASM_CLOBBER_VR ("v21") ASM_CLOBBER_VR ("v22")
ce426f
+			);
ce426f
+      if (len > 0)
ce426f
+	{
ce426f
+	  /* The value is too large.  We don't try transliteration here since
ce426f
+	     this is not an error because of the lack of possibilities to
ce426f
+	     represent the result.  This is a genuine bug in the input since
ce426f
+	     UCS4 does not allow such values.  */
ce426f
+	  if (irreversible == NULL)
ce426f
+	    /* We are transliterating, don't try to correct anything.  */
ce426f
+	    return __GCONV_ILLEGAL_INPUT;
ce426f
+
ce426f
+	  if (flags & __GCONV_IGNORE_ERRORS)
ce426f
+	    {
ce426f
+	      /* Just ignore this character.  */
ce426f
+	      ++*irreversible;
ce426f
+	      inptr += 4;
ce426f
+	      continue;
ce426f
+	    }
ce426f
+
ce426f
+	  *inptrp = inptr;
ce426f
+	  *outptrp = outptr;
ce426f
+	  return __GCONV_ILLEGAL_INPUT;
ce426f
+	}
ce426f
+    }
ce426f
+  while (len > 0);
ce426f
+
ce426f
+  *inptrp = inptr;
ce426f
+  *outptrp = outptr;
ce426f
+
ce426f
+  /* Determine the status.  */
ce426f
+  if (*inptrp == inend)
ce426f
+    result = __GCONV_EMPTY_INPUT;
ce426f
+  else if (*outptrp + 4 > outend)
ce426f
+    result = __GCONV_FULL_OUTPUT;
ce426f
+  else
ce426f
+    result = __GCONV_INCOMPLETE_INPUT;
ce426f
+
ce426f
+  return result;
ce426f
+}
ce426f
+
ce426f
+ICONV_VX_SINGLE (ucs4_internal_loop)
ce426f
+# include <iconv/skeleton.c>
ce426f
+ICONV_VX_IFUNC (__gconv_transform_ucs4_internal)
ce426f
+
ce426f
+
ce426f
+/* Transform from UCS4-LE to the internal encoding.  */
ce426f
+# define DEFINE_INIT		0
ce426f
+# define DEFINE_FINI		0
ce426f
+# define MIN_NEEDED_FROM	4
ce426f
+# define MIN_NEEDED_TO		4
ce426f
+# define FROM_DIRECTION		1
ce426f
+# define FROM_LOOP		ICONV_VX_NAME (ucs4le_internal_loop)
ce426f
+# define TO_LOOP		ICONV_VX_NAME (ucs4le_internal_loop) /* This is not used.  */
ce426f
+# define FUNCTION_NAME		ICONV_VX_NAME (__gconv_transform_ucs4le_internal)
ce426f
+# define ONE_DIRECTION		0
ce426f
+
ce426f
+static inline int
ce426f
+__attribute ((always_inline))
ce426f
+ICONV_VX_NAME (ucs4le_internal_loop) (struct __gconv_step *step,
ce426f
+				      struct __gconv_step_data *step_data,
ce426f
+				      const unsigned char **inptrp,
ce426f
+				      const unsigned char *inend,
ce426f
+				      unsigned char **outptrp,
ce426f
+				      unsigned char *outend,
ce426f
+				      size_t *irreversible)
ce426f
+{
ce426f
+  int flags = step_data->__flags;
ce426f
+  const unsigned char *inptr = *inptrp;
ce426f
+  unsigned char *outptr = *outptrp;
ce426f
+  int result;
ce426f
+  size_t len, loop_count;
ce426f
+  do
ce426f
+    {
ce426f
+      len = MIN (inend - inptr, outend - outptr) / 4;
ce426f
+      __asm__ volatile (".machine push\n\t"
ce426f
+			".machine \"z13\"\n\t"
ce426f
+			".machinemode \"zarch_nohighgprs\"\n\t"
ce426f
+			CONVERT_32BIT_SIZE_T ([R_LEN])
ce426f
+			/* Setup to check for ch > 0x7fffffff.  */
ce426f
+			"    larl %[R_LI],9f\n\t"
ce426f
+			"    vlm %%v20,%%v22,0(%[R_LI])\n\t"
ce426f
+			"    srlg %[R_LI],%[R_LEN],2\n\t"
ce426f
+			"    clgije %[R_LI],0,1f\n\t"
ce426f
+			/* Process 16byte (4char) blocks.  */
ce426f
+			"0:  vl %%v16,0(%[R_IN])\n\t"
ce426f
+			"    vperm %%v16,%%v16,%%v16,%%v22\n\t"
ce426f
+			"    vstrcfs %%v23,%%v16,%%v20,%%v21\n\t"
ce426f
+			"    jno 10f\n\t"
ce426f
+			"    vst %%v16,0(%[R_OUT])\n\t"
ce426f
+			"    la %[R_IN],16(%[R_IN])\n\t"
ce426f
+			"    la %[R_OUT],16(%[R_OUT])\n\t"
ce426f
+			"    brctg %[R_LI],0b\n\t"
ce426f
+			"    llgfr %[R_LEN],%[R_LEN]\n\t"
ce426f
+			"    nilf %[R_LEN],3\n\t"
ce426f
+			/* Process <16bytes.  */
ce426f
+			"1:  sll %[R_LEN],2\n\t"
ce426f
+			"    ahik %[R_LI],%[R_LEN],-1\n\t"
ce426f
+			"    jl 20f\n\t" /* No further bytes available.  */
ce426f
+			"    vll %%v16,%[R_LI],0(%[R_IN])\n\t"
ce426f
+			"    vperm %%v16,%%v16,%%v16,%%v22\n\t"
ce426f
+			"    vstrcfs %%v23,%%v16,%%v20,%%v21\n\t"
ce426f
+			"    vlgvb %[R_LI],%%v23,7\n\t"
ce426f
+			"    clr %[R_LI],%[R_LEN]\n\t"
ce426f
+			"    locgrhe %[R_LI],%[R_LEN]\n\t"
ce426f
+			"    locghihe %[R_LEN],0\n\t"
ce426f
+			"    j 11f\n\t"
ce426f
+			/* v20: Vector string range compare values.  */
ce426f
+			"9: .long 0x7fffffff,0x0,0x0,0x0\n\t"
ce426f
+			/* v21: Vector string range compare control-bits.
ce426f
+			   element 0: >; element 1: =<> (always true)  */
ce426f
+			"    .long 0x20000000,0xE0000000,0x0,0x0\n\t"
ce426f
+			/* v22: Vector permute mask.  */
ce426f
+			"    .long 0x03020100,0x7060504,0x0B0A0908,0x0F0E0D0C\n\t"
ce426f
+			/* Found a value > 0x7fffffff.  */
ce426f
+			"10: vlgvb %[R_LI],%%v23,7\n\t"
ce426f
+			/* Store characters before invalid one.  */
ce426f
+			"11: aghi %[R_LI],-1\n\t"
ce426f
+			"    jl 20f\n\t"
ce426f
+			"    vstl %%v16,%[R_LI],0(%[R_OUT])\n\t"
ce426f
+			"    la %[R_IN],1(%[R_LI],%[R_IN])\n\t"
ce426f
+			"    la %[R_OUT],1(%[R_LI],%[R_OUT])\n\t"
ce426f
+			"20:\n\t"
ce426f
+			".machine pop"
ce426f
+			: /* outputs */ [R_OUT] "+a" (outptr)
ce426f
+			  , [R_IN] "+a" (inptr)
ce426f
+			  , [R_LI] "=a" (loop_count)
ce426f
+			  , [R_LEN] "+d" (len)
ce426f
+			: /* inputs */
ce426f
+			: /* clobber list*/ "memory", "cc"
ce426f
+			  ASM_CLOBBER_VR ("v16") ASM_CLOBBER_VR ("v20")
ce426f
+			  ASM_CLOBBER_VR ("v21") ASM_CLOBBER_VR ("v22")
ce426f
+			  ASM_CLOBBER_VR ("v23")
ce426f
+			);
ce426f
+      if (len > 0)
ce426f
+	{
ce426f
+	  /* The value is too large.  We don't try transliteration here since
ce426f
+	     this is not an error because of the lack of possibilities to
ce426f
+	     represent the result.  This is a genuine bug in the input since
ce426f
+	     UCS4 does not allow such values.  */
ce426f
+	  if (irreversible == NULL)
ce426f
+	    /* We are transliterating, don't try to correct anything.  */
ce426f
+	    return __GCONV_ILLEGAL_INPUT;
ce426f
+
ce426f
+	  if (flags & __GCONV_IGNORE_ERRORS)
ce426f
+	    {
ce426f
+	      /* Just ignore this character.  */
ce426f
+	      ++*irreversible;
ce426f
+	      inptr += 4;
ce426f
+	      continue;
ce426f
+	    }
ce426f
+
ce426f
+	  *inptrp = inptr;
ce426f
+	  *outptrp = outptr;
ce426f
+	  return __GCONV_ILLEGAL_INPUT;
ce426f
+	}
ce426f
+    }
ce426f
+  while (len > 0);
ce426f
+
ce426f
+  *inptrp = inptr;
ce426f
+  *outptrp = outptr;
ce426f
+
ce426f
+  /* Determine the status.  */
ce426f
+  if (*inptrp == inend)
ce426f
+    result = __GCONV_EMPTY_INPUT;
ce426f
+  else if (*inptrp + 4 > inend)
ce426f
+    result = __GCONV_INCOMPLETE_INPUT;
ce426f
+  else
ce426f
+    {
ce426f
+      assert (*outptrp + 4 > outend);
ce426f
+      result = __GCONV_FULL_OUTPUT;
ce426f
+    }
ce426f
+
ce426f
+  return result;
ce426f
+}
ce426f
+ICONV_VX_SINGLE (ucs4le_internal_loop)
ce426f
+# include <iconv/skeleton.c>
ce426f
+ICONV_VX_IFUNC (__gconv_transform_ucs4le_internal)
ce426f
+
ce426f
+/* Convert from UCS2 to the internal (UCS4-like) format.  */
ce426f
+# define DEFINE_INIT		0
ce426f
+# define DEFINE_FINI		0
ce426f
+# define MIN_NEEDED_FROM	2
ce426f
+# define MIN_NEEDED_TO		4
ce426f
+# define FROM_DIRECTION		1
ce426f
+# define FROM_LOOP		ICONV_VX_NAME (ucs2_internal_loop)
ce426f
+# define TO_LOOP		ICONV_VX_NAME (ucs2_internal_loop) /* This is not used.  */
ce426f
+# define FUNCTION_NAME		ICONV_VX_NAME (__gconv_transform_ucs2_internal)
ce426f
+# define ONE_DIRECTION		1
ce426f
+
ce426f
+# define MIN_NEEDED_INPUT	MIN_NEEDED_FROM
ce426f
+# define MIN_NEEDED_OUTPUT	MIN_NEEDED_TO
ce426f
+# define LOOPFCT		FROM_LOOP
ce426f
+# define BODY_ORIG_ERROR						\
ce426f
+  /* Surrogate characters in UCS-2 input are not valid.  Reject		\
ce426f
+     them.  (Catching this here is not security relevant.)  */		\
ce426f
+  STANDARD_FROM_LOOP_ERR_HANDLER (2);
ce426f
+# define BODY_ORIG							\
ce426f
+  {									\
ce426f
+    uint16_t u1 = get16 (inptr);					\
ce426f
+									\
ce426f
+    if (__glibc_unlikely (u1 >= 0xd800 && u1 < 0xe000))			\
ce426f
+      {									\
ce426f
+	BODY_ORIG_ERROR							\
ce426f
+      }									\
ce426f
+									\
ce426f
+    *((uint32_t *) outptr) = u1;					\
ce426f
+    outptr += sizeof (uint32_t);					\
ce426f
+    inptr += 2;								\
ce426f
+  }
ce426f
+# define BODY								\
ce426f
+  {									\
ce426f
+    size_t len, tmp, tmp2;						\
ce426f
+    len = MIN ((inend - inptr) / 2, (outend - outptr) / 4);		\
ce426f
+    __asm__ volatile (".machine push\n\t"				\
ce426f
+		      ".machine \"z13\"\n\t"				\
ce426f
+		      ".machinemode \"zarch_nohighgprs\"\n\t"		\
ce426f
+		      CONVERT_32BIT_SIZE_T ([R_LEN])			\
ce426f
+		      /* Setup to check for ch >= 0xd800 && ch < 0xe000.  */ \
ce426f
+		      "    larl %[R_TMP],9f\n\t"			\
ce426f
+		      "    vlm %%v20,%%v21,0(%[R_TMP])\n\t"		\
ce426f
+		      "    srlg %[R_TMP],%[R_LEN],3\n\t"		\
ce426f
+		      "    clgije %[R_TMP],0,1f\n\t"			\
ce426f
+		      /* Process 16byte (8char) blocks.  */		\
ce426f
+		      "0:  vl %%v16,0(%[R_IN])\n\t"			\
ce426f
+		      "    vstrchs %%v19,%%v16,%%v20,%%v21\n\t"		\
ce426f
+		      /* Enlarge UCS2 to UCS4.  */			\
ce426f
+		      "    vuplhh %%v17,%%v16\n\t"			\
ce426f
+		      "    vupllh %%v18,%%v16\n\t"			\
ce426f
+		      "    jno 10f\n\t"					\
ce426f
+		      /* Store 32bytes to buf_out.  */			\
ce426f
+		      "    vstm %%v17,%%v18,0(%[R_OUT])\n\t"		\
ce426f
+		      "    la %[R_IN],16(%[R_IN])\n\t"			\
ce426f
+		      "    la %[R_OUT],32(%[R_OUT])\n\t"		\
ce426f
+		      "    brctg %[R_TMP],0b\n\t"			\
ce426f
+		      "    llgfr %[R_LEN],%[R_LEN]\n\t"			\
ce426f
+		      "    nilf %[R_LEN],7\n\t"				\
ce426f
+		      /* Process <16bytes.  */				\
ce426f
+		      "1:  sll %[R_LEN],1\n\t"				\
ce426f
+		      "    ahik %[R_TMP],%[R_LEN],-1\n\t"		\
ce426f
+		      "    jl 20f\n\t" /* No further bytes available.  */ \
ce426f
+		      "    vll %%v16,%[R_TMP],0(%[R_IN])\n\t"		\
ce426f
+		      "    vstrchs %%v19,%%v16,%%v20,%%v21\n\t"		\
ce426f
+		      /* Enlarge UCS2 to UCS4.  */			\
ce426f
+		      "    vuplhh %%v17,%%v16\n\t"			\
ce426f
+		      "    vupllh %%v18,%%v16\n\t"			\
ce426f
+		      "    vlgvb %[R_TMP],%%v19,7\n\t"			\
ce426f
+		      "    clr %[R_TMP],%[R_LEN]\n\t"			\
ce426f
+		      "    locgrhe %[R_TMP],%[R_LEN]\n\t"		\
ce426f
+		      "    locghihe %[R_LEN],0\n\t"			\
ce426f
+		      "    j 11f\n\t"					\
ce426f
+		      /* v20: Vector string range compare values.  */	\
ce426f
+		      "9:  .short 0xd800,0xe000,0x0,0x0,0x0,0x0,0x0,0x0\n\t" \
ce426f
+		      /* v21: Vector string range compare control-bits.	\
ce426f
+			 element 0: =>; element 1: <  */		\
ce426f
+		      "    .short 0xa000,0x4000,0x0,0x0,0x0,0x0,0x0,0x0\n\t" \
ce426f
+		      /* Found an element: ch >= 0xd800 && ch < 0xe000  */ \
ce426f
+		      "10: vlgvb %[R_TMP],%%v19,7\n\t"			\
ce426f
+		      "11: la %[R_IN],0(%[R_TMP],%[R_IN])\n\t"		\
ce426f
+		      "    sll %[R_TMP],1\n\t"				\
ce426f
+		      "    lgr %[R_TMP2],%[R_TMP]\n\t"			\
ce426f
+		      "    ahi %[R_TMP],-1\n\t"				\
ce426f
+		      "    jl 20f\n\t"					\
ce426f
+		      "    vstl %%v17,%[R_TMP],0(%[R_OUT])\n\t"		\
ce426f
+		      "    ahi %[R_TMP],-16\n\t"			\
ce426f
+		      "    jl 19f\n\t"					\
ce426f
+		      "    vstl %%v18,%[R_TMP],16(%[R_OUT])\n\t"	\
ce426f
+		      "19: la %[R_OUT],0(%[R_TMP2],%[R_OUT])\n\t"	\
ce426f
+		      "20: \n\t"					\
ce426f
+		      ".machine pop"					\
ce426f
+		      : /* outputs */ [R_OUT] "+a" (outptr)		\
ce426f
+			, [R_IN] "+a" (inptr)				\
ce426f
+			, [R_TMP] "=a" (tmp)				\
ce426f
+			, [R_TMP2] "=a" (tmp2)				\
ce426f
+			, [R_LEN] "+d" (len)				\
ce426f
+		      : /* inputs */					\
ce426f
+		      : /* clobber list*/ "memory", "cc"		\
ce426f
+			ASM_CLOBBER_VR ("v16") ASM_CLOBBER_VR ("v17")	\
ce426f
+			ASM_CLOBBER_VR ("v18") ASM_CLOBBER_VR ("v19")	\
ce426f
+			ASM_CLOBBER_VR ("v20") ASM_CLOBBER_VR ("v21")	\
ce426f
+		      );						\
ce426f
+    if (len > 0)							\
ce426f
+      {									\
ce426f
+	/* Found an invalid character at next input-char.  */		\
ce426f
+	BODY_ORIG_ERROR							\
ce426f
+      }									\
ce426f
+  }
ce426f
+
ce426f
+# define LOOP_NEED_FLAGS
ce426f
+# include <iconv/loop.c>
ce426f
+# include <iconv/skeleton.c>
ce426f
+# undef BODY_ORIG
ce426f
+# undef BODY_ORIG_ERROR
ce426f
+ICONV_VX_IFUNC (__gconv_transform_ucs2_internal)
ce426f
+
ce426f
+/* Convert from UCS2 in other endianness to the internal (UCS4-like) format. */
ce426f
+# define DEFINE_INIT		0
ce426f
+# define DEFINE_FINI		0
ce426f
+# define MIN_NEEDED_FROM	2
ce426f
+# define MIN_NEEDED_TO		4
ce426f
+# define FROM_DIRECTION		1
ce426f
+# define FROM_LOOP		ICONV_VX_NAME (ucs2reverse_internal_loop)
ce426f
+# define TO_LOOP		ICONV_VX_NAME (ucs2reverse_internal_loop) /* This is not used.*/
ce426f
+# define FUNCTION_NAME		ICONV_VX_NAME (__gconv_transform_ucs2reverse_internal)
ce426f
+# define ONE_DIRECTION		1
ce426f
+
ce426f
+# define MIN_NEEDED_INPUT	MIN_NEEDED_FROM
ce426f
+# define MIN_NEEDED_OUTPUT	MIN_NEEDED_TO
ce426f
+# define LOOPFCT		FROM_LOOP
ce426f
+# define BODY_ORIG_ERROR						\
ce426f
+  /* Surrogate characters in UCS-2 input are not valid.  Reject		\
ce426f
+     them.  (Catching this here is not security relevant.)  */		\
ce426f
+  if (! ignore_errors_p ())						\
ce426f
+    {									\
ce426f
+      result = __GCONV_ILLEGAL_INPUT;					\
ce426f
+      break;								\
ce426f
+    }									\
ce426f
+  inptr += 2;								\
ce426f
+  ++*irreversible;							\
ce426f
+  continue;
ce426f
+
ce426f
+# define BODY_ORIG \
ce426f
+  {									\
ce426f
+    uint16_t u1 = bswap_16 (get16 (inptr));				\
ce426f
+									\
ce426f
+    if (__glibc_unlikely (u1 >= 0xd800 && u1 < 0xe000))			\
ce426f
+      {									\
ce426f
+	BODY_ORIG_ERROR							\
ce426f
+      }									\
ce426f
+									\
ce426f
+    *((uint32_t *) outptr) = u1;					\
ce426f
+    outptr += sizeof (uint32_t);					\
ce426f
+    inptr += 2;								\
ce426f
+  }
ce426f
+# define BODY								\
ce426f
+  {									\
ce426f
+    size_t len, tmp, tmp2;						\
ce426f
+    len = MIN ((inend - inptr) / 2, (outend - outptr) / 4);		\
ce426f
+    __asm__ volatile (".machine push\n\t"				\
ce426f
+		      ".machine \"z13\"\n\t"				\
ce426f
+		      ".machinemode \"zarch_nohighgprs\"\n\t"		\
ce426f
+		      CONVERT_32BIT_SIZE_T ([R_LEN])			\
ce426f
+		      /* Setup to check for ch >= 0xd800 && ch < 0xe000.  */ \
ce426f
+		      "    larl %[R_TMP],9f\n\t"			\
ce426f
+		      "    vlm %%v20,%%v22,0(%[R_TMP])\n\t"		\
ce426f
+		      "    srlg %[R_TMP],%[R_LEN],3\n\t"		\
ce426f
+		      "    clgije %[R_TMP],0,1f\n\t"			\
ce426f
+		      /* Process 16byte (8char) blocks.  */		\
ce426f
+		      "0:  vl %%v16,0(%[R_IN])\n\t"			\
ce426f
+		      "    vperm %%v16,%%v16,%%v16,%%v22\n\t"		\
ce426f
+		      "    vstrchs %%v19,%%v16,%%v20,%%v21\n\t"		\
ce426f
+		      /* Enlarge UCS2 to UCS4.  */			\
ce426f
+		      "    vuplhh %%v17,%%v16\n\t"			\
ce426f
+		      "    vupllh %%v18,%%v16\n\t"			\
ce426f
+		      "    jno 10f\n\t"					\
ce426f
+		      /* Store 32bytes to buf_out.  */			\
ce426f
+		      "    vstm %%v17,%%v18,0(%[R_OUT])\n\t"		\
ce426f
+		      "    la %[R_IN],16(%[R_IN])\n\t"			\
ce426f
+		      "    la %[R_OUT],32(%[R_OUT])\n\t"		\
ce426f
+		      "    brctg %[R_TMP],0b\n\t"			\
ce426f
+		      "    llgfr %[R_LEN],%[R_LEN]\n\t"			\
ce426f
+		      "    nilf %[R_LEN],7\n\t"				\
ce426f
+		      /* Process <16bytes.  */				\
ce426f
+		      "1:  sll %[R_LEN],1\n\t"				\
ce426f
+		      "    ahik %[R_TMP],%[R_LEN],-1\n\t"		\
ce426f
+		      "    jl 20f\n\t" /* No further bytes available.  */ \
ce426f
+		      "    vll %%v16,%[R_TMP],0(%[R_IN])\n\t"		\
ce426f
+		      "    vperm %%v16,%%v16,%%v16,%%v22\n\t"		\
ce426f
+		      "    vstrchs %%v19,%%v16,%%v20,%%v21\n\t"		\
ce426f
+		      /* Enlarge UCS2 to UCS4.  */			\
ce426f
+		      "    vuplhh %%v17,%%v16\n\t"			\
ce426f
+		      "    vupllh %%v18,%%v16\n\t"			\
ce426f
+		      "    vlgvb %[R_TMP],%%v19,7\n\t"			\
ce426f
+		      "    clr %[R_TMP],%[R_LEN]\n\t"			\
ce426f
+		      "    locgrhe %[R_TMP],%[R_LEN]\n\t"		\
ce426f
+		      "    locghihe %[R_LEN],0\n\t"			\
ce426f
+		      "    j 11f\n\t"					\
ce426f
+		      /* v20: Vector string range compare values.  */	\
ce426f
+		      "9:  .short 0xd800,0xe000,0x0,0x0,0x0,0x0,0x0,0x0\n\t" \
ce426f
+		      /* v21: Vector string range compare control-bits.	\
ce426f
+			 element 0: =>; element 1: <  */		\
ce426f
+		      "    .short 0xa000,0x4000,0x0,0x0,0x0,0x0,0x0,0x0\n\t" \
ce426f
+		      /* v22: Vector permute mask.  */			\
ce426f
+		      "    .short 0x0100,0x0302,0x0504,0x0706\n\t"	\
ce426f
+		      "    .short 0x0908,0x0b0a,0x0d0c,0x0f0e\n\t"	\
ce426f
+		      /* Found an element: ch >= 0xd800 && ch < 0xe000  */ \
ce426f
+		      "10: vlgvb %[R_TMP],%%v19,7\n\t"			\
ce426f
+		      "11: la %[R_IN],0(%[R_TMP],%[R_IN])\n\t"		\
ce426f
+		      "    sll %[R_TMP],1\n\t"				\
ce426f
+		      "    lgr %[R_TMP2],%[R_TMP]\n\t"			\
ce426f
+		      "    ahi %[R_TMP],-1\n\t"				\
ce426f
+		      "    jl 20f\n\t"					\
ce426f
+		      "    vstl %%v17,%[R_TMP],0(%[R_OUT])\n\t"		\
ce426f
+		      "    ahi %[R_TMP],-16\n\t"			\
ce426f
+		      "    jl 19f\n\t"					\
ce426f
+		      "    vstl %%v18,%[R_TMP],16(%[R_OUT])\n\t"	\
ce426f
+		      "19: la %[R_OUT],0(%[R_TMP2],%[R_OUT])\n\t"	\
ce426f
+		      "20: \n\t"					\
ce426f
+		      ".machine pop"					\
ce426f
+		      : /* outputs */ [R_OUT] "+a" (outptr)		\
ce426f
+			, [R_IN] "+a" (inptr)				\
ce426f
+			, [R_TMP] "=a" (tmp)				\
ce426f
+			, [R_TMP2] "=a" (tmp2)				\
ce426f
+			, [R_LEN] "+d" (len)				\
ce426f
+		      : /* inputs */					\
ce426f
+		      : /* clobber list*/ "memory", "cc"		\
ce426f
+			ASM_CLOBBER_VR ("v16") ASM_CLOBBER_VR ("v17")	\
ce426f
+			ASM_CLOBBER_VR ("v18") ASM_CLOBBER_VR ("v19")	\
ce426f
+			ASM_CLOBBER_VR ("v20") ASM_CLOBBER_VR ("v21")	\
ce426f
+			ASM_CLOBBER_VR ("v22")				\
ce426f
+		      );						\
ce426f
+    if (len > 0)							\
ce426f
+      {									\
ce426f
+	/* Found an invalid character at next input-char.  */		\
ce426f
+	BODY_ORIG_ERROR							\
ce426f
+      }									\
ce426f
+  }
ce426f
+# define LOOP_NEED_FLAGS
ce426f
+# include <iconv/loop.c>
ce426f
+# include <iconv/skeleton.c>
ce426f
+# undef BODY_ORIG
ce426f
+# undef BODY_ORIG_ERROR
ce426f
+ICONV_VX_IFUNC (__gconv_transform_ucs2reverse_internal)
ce426f
+
ce426f
+/* Convert from the internal (UCS4-like) format to UCS2.  */
ce426f
+#define DEFINE_INIT		0
ce426f
+#define DEFINE_FINI		0
ce426f
+#define MIN_NEEDED_FROM		4
ce426f
+#define MIN_NEEDED_TO		2
ce426f
+#define FROM_DIRECTION		1
ce426f
+#define FROM_LOOP		ICONV_VX_NAME (internal_ucs2_loop)
ce426f
+#define TO_LOOP			ICONV_VX_NAME (internal_ucs2_loop) /* This is not used.  */
ce426f
+#define FUNCTION_NAME		ICONV_VX_NAME (__gconv_transform_internal_ucs2)
ce426f
+#define ONE_DIRECTION		1
ce426f
+
ce426f
+#define MIN_NEEDED_INPUT	MIN_NEEDED_FROM
ce426f
+#define MIN_NEEDED_OUTPUT	MIN_NEEDED_TO
ce426f
+#define LOOPFCT			FROM_LOOP
ce426f
+#define BODY_ORIG							\
ce426f
+  {									\
ce426f
+    uint32_t val = *((const uint32_t *) inptr);				\
ce426f
+									\
ce426f
+    if (__glibc_unlikely (val >= 0x10000))				\
ce426f
+      {									\
ce426f
+	UNICODE_TAG_HANDLER (val, 4);					\
ce426f
+	STANDARD_TO_LOOP_ERR_HANDLER (4);				\
ce426f
+      }									\
ce426f
+    else if (__glibc_unlikely (val >= 0xd800 && val < 0xe000))		\
ce426f
+      {									\
ce426f
+	/* Surrogate characters in UCS-4 input are not valid.		\
ce426f
+	   We must catch this, because the UCS-2 output might be	\
ce426f
+	   interpreted as UTF-16 by other programs.  If we let		\
ce426f
+	   surrogates pass through, attackers could make a security	\
ce426f
+	   hole exploit by synthesizing any desired plane 1-16		\
ce426f
+	   character.  */						\
ce426f
+	result = __GCONV_ILLEGAL_INPUT;					\
ce426f
+	if (! ignore_errors_p ())					\
ce426f
+	  break;							\
ce426f
+	inptr += 4;							\
ce426f
+	++*irreversible;						\
ce426f
+	continue;							\
ce426f
+      }									\
ce426f
+    else								\
ce426f
+      {									\
ce426f
+	put16 (outptr, val);						\
ce426f
+	outptr += sizeof (uint16_t);					\
ce426f
+	inptr += 4;							\
ce426f
+      }									\
ce426f
+  }
ce426f
+# define BODY								\
ce426f
+  {									\
ce426f
+    if (__builtin_expect (inend - inptr < 32, 1)			\
ce426f
+	|| outend - outptr < 16)					\
ce426f
+      /* Convert remaining bytes with c code.  */			\
ce426f
+      BODY_ORIG								\
ce426f
+    else								\
ce426f
+      {									\
ce426f
+	/* Convert in 32 byte blocks.  */				\
ce426f
+	size_t loop_count = (inend - inptr) / 32;			\
ce426f
+	size_t tmp, tmp2;						\
ce426f
+	if (loop_count > (outend - outptr) / 16)			\
ce426f
+	  loop_count = (outend - outptr) / 16;				\
ce426f
+	__asm__ volatile (".machine push\n\t"				\
ce426f
+			  ".machine \"z13\"\n\t"			\
ce426f
+			  ".machinemode \"zarch_nohighgprs\"\n\t"	\
ce426f
+			  CONVERT_32BIT_SIZE_T ([R_LI])			\
ce426f
+			  "    larl %[R_I],3f\n\t"			\
ce426f
+			  "    vlm %%v20,%%v23,0(%[R_I])\n\t"		\
ce426f
+			  "0:  \n\t"					\
ce426f
+			  "    vlm %%v16,%%v17,0(%[R_IN])\n\t"		\
ce426f
+			  /* Shorten UCS4 to UCS2.  */			\
ce426f
+			  "    vpkf %%v18,%%v16,%%v17\n\t"		\
ce426f
+			  "    vstrcfs %%v19,%%v16,%%v20,%%v21\n\t"	\
ce426f
+			  "    jno 11f\n\t"				\
ce426f
+			  "1:  vstrcfs %%v19,%%v17,%%v20,%%v21\n\t"	\
ce426f
+			  "    jno 10f\n\t"				\
ce426f
+			  /* Store 16bytes to buf_out.  */		\
ce426f
+			  "2:  vst %%v18,0(%[R_OUT])\n\t"		\
ce426f
+			  "    la %[R_IN],32(%[R_IN])\n\t"		\
ce426f
+			  "    la %[R_OUT],16(%[R_OUT])\n\t"		\
ce426f
+			  "    brctg %[R_LI],0b\n\t"			\
ce426f
+			  "    j 20f\n\t"				\
ce426f
+			  /* Setup to check for ch >= 0xd800. (v20, v21)  */ \
ce426f
+			  "3:  .long 0xd800,0xd800,0x0,0x0\n\t"		\
ce426f
+			  "    .long 0xa0000000,0xa0000000,0x0,0x0\n\t"	\
ce426f
+			  /* Setup to check for ch >= 0xe000		\
ce426f
+			     && ch < 0x10000. (v22,v23)  */		\
ce426f
+			  "    .long 0xe000,0x10000,0x0,0x0\n\t"	\
ce426f
+			  "    .long 0xa0000000,0x40000000,0x0,0x0\n\t"	\
ce426f
+			  /* v16 contains only valid chars. Check in v17: \
ce426f
+			     ch >= 0xe000 && ch <= 0xffff.  */		\
ce426f
+			  "10: vstrcfs %%v19,%%v17,%%v22,%%v23,8\n\t"	\
ce426f
+			  "    jo 2b\n\t" /* All ch's in this range, proceed.   */ \
ce426f
+			  "    lghi %[R_TMP],16\n\t"			\
ce426f
+			  "    j 12f\n\t"				\
ce426f
+			  /* Maybe v16 contains invalid chars.		\
ce426f
+			     Check ch >= 0xe000 && ch <= 0xffff.  */	\
ce426f
+			  "11: vstrcfs %%v19,%%v16,%%v22,%%v23,8\n\t"	\
ce426f
+			  "    jo 1b\n\t" /* All ch's in this range, proceed.   */ \
ce426f
+			  "    lghi %[R_TMP],0\n\t"			\
ce426f
+			  "12: vlgvb %[R_I],%%v19,7\n\t"		\
ce426f
+			  "    agr %[R_I],%[R_TMP]\n\t"			\
ce426f
+			  "    la %[R_IN],0(%[R_I],%[R_IN])\n\t"	\
ce426f
+			  "    srl %[R_I],1\n\t"			\
ce426f
+			  "    ahi %[R_I],-1\n\t"			\
ce426f
+			  "    jl 20f\n\t"				\
ce426f
+			  "    vstl %%v18,%[R_I],0(%[R_OUT])\n\t"	\
ce426f
+			  "    la %[R_OUT],1(%[R_I],%[R_OUT])\n\t"	\
ce426f
+			  "20:\n\t"					\
ce426f
+			  ".machine pop"				\
ce426f
+			  : /* outputs */ [R_OUT] "+a" (outptr)		\
ce426f
+			    , [R_IN] "+a" (inptr)			\
ce426f
+			    , [R_LI] "+d" (loop_count)			\
ce426f
+			    , [R_I] "=a" (tmp2)				\
ce426f
+			    , [R_TMP] "=d" (tmp)			\
ce426f
+			  : /* inputs */				\
ce426f
+			  : /* clobber list*/ "memory", "cc"		\
ce426f
+			    ASM_CLOBBER_VR ("v16") ASM_CLOBBER_VR ("v17") \
ce426f
+			    ASM_CLOBBER_VR ("v18") ASM_CLOBBER_VR ("v19") \
ce426f
+			    ASM_CLOBBER_VR ("v20") ASM_CLOBBER_VR ("v21") \
ce426f
+			    ASM_CLOBBER_VR ("v22") ASM_CLOBBER_VR ("v23") \
ce426f
+			  );						\
ce426f
+	if (loop_count > 0)						\
ce426f
+	  {								\
ce426f
+	    /* Found an invalid character at next character.  */	\
ce426f
+	    BODY_ORIG							\
ce426f
+	  }								\
ce426f
+      }									\
ce426f
+  }
ce426f
+#define LOOP_NEED_FLAGS
ce426f
+#include <iconv/loop.c>
ce426f
+#include <iconv/skeleton.c>
ce426f
+# undef BODY_ORIG
ce426f
+ICONV_VX_IFUNC (__gconv_transform_internal_ucs2)
ce426f
+
ce426f
+/* Convert from the internal (UCS4-like) format to UCS2 in other endianness. */
ce426f
+#define DEFINE_INIT		0
ce426f
+#define DEFINE_FINI		0
ce426f
+#define MIN_NEEDED_FROM		4
ce426f
+#define MIN_NEEDED_TO		2
ce426f
+#define FROM_DIRECTION		1
ce426f
+#define FROM_LOOP		ICONV_VX_NAME (internal_ucs2reverse_loop)
ce426f
+#define TO_LOOP			ICONV_VX_NAME (internal_ucs2reverse_loop)/* This is not used.*/
ce426f
+#define FUNCTION_NAME		ICONV_VX_NAME (__gconv_transform_internal_ucs2reverse)
ce426f
+#define ONE_DIRECTION		1
ce426f
+
ce426f
+#define MIN_NEEDED_INPUT	MIN_NEEDED_FROM
ce426f
+#define MIN_NEEDED_OUTPUT	MIN_NEEDED_TO
ce426f
+#define LOOPFCT			FROM_LOOP
ce426f
+#define BODY_ORIG							\
ce426f
+  {									\
ce426f
+    uint32_t val = *((const uint32_t *) inptr);				\
ce426f
+    if (__glibc_unlikely (val >= 0x10000))				\
ce426f
+      {									\
ce426f
+	UNICODE_TAG_HANDLER (val, 4);					\
ce426f
+	STANDARD_TO_LOOP_ERR_HANDLER (4);				\
ce426f
+      }									\
ce426f
+    else if (__glibc_unlikely (val >= 0xd800 && val < 0xe000))		\
ce426f
+      {									\
ce426f
+	/* Surrogate characters in UCS-4 input are not valid.		\
ce426f
+	   We must catch this, because the UCS-2 output might be	\
ce426f
+	   interpreted as UTF-16 by other programs.  If we let		\
ce426f
+	   surrogates pass through, attackers could make a security	\
ce426f
+	   hole exploit by synthesizing any desired plane 1-16		\
ce426f
+	   character.  */						\
ce426f
+	if (! ignore_errors_p ())					\
ce426f
+	  {								\
ce426f
+	    result = __GCONV_ILLEGAL_INPUT;				\
ce426f
+	    break;							\
ce426f
+	  }								\
ce426f
+	inptr += 4;							\
ce426f
+	++*irreversible;						\
ce426f
+	continue;							\
ce426f
+      }									\
ce426f
+    else								\
ce426f
+      {									\
ce426f
+	put16 (outptr, bswap_16 (val));					\
ce426f
+	outptr += sizeof (uint16_t);					\
ce426f
+	inptr += 4;							\
ce426f
+      }									\
ce426f
+  }
ce426f
+# define BODY								\
ce426f
+  {									\
ce426f
+    if (__builtin_expect (inend - inptr < 32, 1)			\
ce426f
+	|| outend - outptr < 16)					\
ce426f
+      /* Convert remaining bytes with c code.  */			\
ce426f
+      BODY_ORIG								\
ce426f
+    else								\
ce426f
+      {									\
ce426f
+	/* Convert in 32 byte blocks.  */				\
ce426f
+	size_t loop_count = (inend - inptr) / 32;			\
ce426f
+	size_t tmp, tmp2;						\
ce426f
+	if (loop_count > (outend - outptr) / 16)			\
ce426f
+	  loop_count = (outend - outptr) / 16;				\
ce426f
+	__asm__ volatile (".machine push\n\t"				\
ce426f
+			  ".machine \"z13\"\n\t"			\
ce426f
+			  ".machinemode \"zarch_nohighgprs\"\n\t"	\
ce426f
+			  CONVERT_32BIT_SIZE_T ([R_LI])			\
ce426f
+			  "    larl %[R_I],3f\n\t"			\
ce426f
+			  "    vlm %%v20,%%v24,0(%[R_I])\n\t"		\
ce426f
+			  "0:  \n\t"					\
ce426f
+			  "    vlm %%v16,%%v17,0(%[R_IN])\n\t"		\
ce426f
+			  /* Shorten UCS4 to UCS2 and byteswap.  */	\
ce426f
+			  "    vpkf %%v18,%%v16,%%v17\n\t"		\
ce426f
+			  "    vperm %%v18,%%v18,%%v18,%%v24\n\t"	\
ce426f
+			  "    vstrcfs %%v19,%%v16,%%v20,%%v21\n\t"	\
ce426f
+			  "    jno 11f\n\t"				\
ce426f
+			  "1:  vstrcfs %%v19,%%v17,%%v20,%%v21\n\t"	\
ce426f
+			  "    jno 10f\n\t"				\
ce426f
+			  /* Store 16bytes to buf_out.  */		\
ce426f
+			  "2: vst %%v18,0(%[R_OUT])\n\t"		\
ce426f
+			  "    la %[R_IN],32(%[R_IN])\n\t"		\
ce426f
+			  "    la %[R_OUT],16(%[R_OUT])\n\t"		\
ce426f
+			  "    brctg %[R_LI],0b\n\t"			\
ce426f
+			  "    j 20f\n\t"				\
ce426f
+			  /* Setup to check for ch >= 0xd800. (v20, v21)  */ \
ce426f
+			  "3: .long 0xd800,0xd800,0x0,0x0\n\t"		\
ce426f
+			  "    .long 0xa0000000,0xa0000000,0x0,0x0\n\t"	\
ce426f
+			  /* Setup to check for ch >= 0xe000		\
ce426f
+			     && ch < 0x10000. (v22,v23)  */		\
ce426f
+			  "    .long 0xe000,0x10000,0x0,0x0\n\t"	\
ce426f
+			  "    .long 0xa0000000,0x40000000,0x0,0x0\n\t"	\
ce426f
+			  /* Vector permute mask (v24)  */		\
ce426f
+			  "    .short 0x0100,0x0302,0x0504,0x0706\n\t"	\
ce426f
+			  "    .short 0x0908,0x0b0a,0x0d0c,0x0f0e\n\t"	\
ce426f
+			  /* v16 contains only valid chars. Check in v17: \
ce426f
+			     ch >= 0xe000 && ch <= 0xffff.  */		\
ce426f
+			  "10: vstrcfs %%v19,%%v17,%%v22,%%v23,8\n\t"	\
ce426f
+			  "    jo 2b\n\t" /* All ch's in this range, proceed.  */ \
ce426f
+			  "    lghi %[R_TMP],16\n\t"			\
ce426f
+			  "    j 12f\n\t"				\
ce426f
+			  /* Maybe v16 contains invalid chars.		\
ce426f
+			     Check ch >= 0xe000 && ch <= 0xffff.  */	\
ce426f
+			  "11: vstrcfs %%v19,%%v16,%%v22,%%v23,8\n\t"	\
ce426f
+			  "    jo 1b\n\t" /* All ch's in this range, proceed.  */ \
ce426f
+			  "    lghi %[R_TMP],0\n\t"			\
ce426f
+			  "12: vlgvb %[R_I],%%v19,7\n\t"		\
ce426f
+			  "    agr %[R_I],%[R_TMP]\n\t"			\
ce426f
+			  "    la %[R_IN],0(%[R_I],%[R_IN])\n\t"	\
ce426f
+			  "    srl %[R_I],1\n\t"			\
ce426f
+			  "    ahi %[R_I],-1\n\t"			\
ce426f
+			  "    jl 20f\n\t"				\
ce426f
+			  "    vstl %%v18,%[R_I],0(%[R_OUT])\n\t"	\
ce426f
+			  "    la %[R_OUT],1(%[R_I],%[R_OUT])\n\t"	\
ce426f
+			  "20:\n\t"					\
ce426f
+			  ".machine pop"				\
ce426f
+			  : /* outputs */ [R_OUT] "+a" (outptr)		\
ce426f
+			    , [R_IN] "+a" (inptr)			\
ce426f
+			    , [R_LI] "+d" (loop_count)			\
ce426f
+			    , [R_I] "=a" (tmp2)				\
ce426f
+			    , [R_TMP] "=d" (tmp)			\
ce426f
+			  : /* inputs */				\
ce426f
+			  : /* clobber list*/ "memory", "cc"		\
ce426f
+			    ASM_CLOBBER_VR ("v16") ASM_CLOBBER_VR ("v17") \
ce426f
+			    ASM_CLOBBER_VR ("v18") ASM_CLOBBER_VR ("v19") \
ce426f
+			    ASM_CLOBBER_VR ("v20") ASM_CLOBBER_VR ("v21") \
ce426f
+			    ASM_CLOBBER_VR ("v22") ASM_CLOBBER_VR ("v23") \
ce426f
+			    ASM_CLOBBER_VR ("v24")			\
ce426f
+			  );						\
ce426f
+	if (loop_count > 0)						\
ce426f
+	  {								\
ce426f
+	    /* Found an invalid character at next character.  */	\
ce426f
+	    BODY_ORIG							\
ce426f
+	  }								\
ce426f
+      }									\
ce426f
+  }
ce426f
+#define LOOP_NEED_FLAGS
ce426f
+#include <iconv/loop.c>
ce426f
+#include <iconv/skeleton.c>
ce426f
+# undef BODY_ORIG
ce426f
+ICONV_VX_IFUNC (__gconv_transform_internal_ucs2reverse)
ce426f
+
ce426f
+
ce426f
+#else
ce426f
+/* Generate the internal transformations without ifunc if build environment
ce426f
+   lacks vector support. Instead simply include the common version.  */
ce426f
+# include <iconv/gconv_simple.c>
ce426f
+#endif /* !defined HAVE_S390_VX_ASM_SUPPORT */
ce426f
-- 
ce426f
1.8.3.1
ce426f