8ae002
From 53f860e80162b09c44b48f207342c1452289072c Mon Sep 17 00:00:00 2001
8ae002
From: Stefan Liebler <stli@linux.vnet.ibm.com>
8ae002
Date: Mon, 7 Nov 2016 15:37:29 +0100
8ae002
Subject: [PATCH 04/17] S390: Optimize builtin iconv-modules.
8ae002
8ae002
Upstream commit 3b704e26b33e35d99de920f8462d8e438f89be39
8ae002
8ae002
This patch introduces a s390 specific gconv_simple.c file which provides
8ae002
optimized versions for z13 with vector instructions, which will be chosen at
8ae002
runtime via ifunc.
8ae002
The optimized conversions can convert between internal and ascii, ucs4, ucs4le,
8ae002
ucs2, ucs2le.
8ae002
If the build-environment lacks vector support, then iconv/gconv_simple.c
8ae002
is used wihtout any change. Otherwise iconvdata/gconv_simple.c is used to create
8ae002
conversion loop routines without vector instructions as fallback, if vector
8ae002
instructions aren't available at runtime.
8ae002
8ae002
ChangeLog:
8ae002
8ae002
	* sysdeps/s390/multiarch/gconv_simple.c: New File.
8ae002
	* sysdeps/s390/multiarch/Makefile (sysdep_routines): Add gconv_simple.
8ae002
---
8ae002
 sysdeps/s390/multiarch/Makefile       |    4 +
8ae002
 sysdeps/s390/multiarch/gconv_simple.c | 1266 +++++++++++++++++++++++++++++++++
8ae002
 2 files changed, 1270 insertions(+)
8ae002
 create mode 100644 sysdeps/s390/multiarch/gconv_simple.c
8ae002
8ae002
diff --git a/sysdeps/s390/multiarch/Makefile b/sysdeps/s390/multiarch/Makefile
8ae002
index 11ad2b9..24949cd 100644
8ae002
--- a/sysdeps/s390/multiarch/Makefile
8ae002
+++ b/sysdeps/s390/multiarch/Makefile
8ae002
@@ -52,3 +52,7 @@ $(move-if-change) $(@:stmp=T) $(@:stmp=h)
8ae002
 touch $@
8ae002
 endef
8ae002
 endif
8ae002
+
8ae002
+ifeq ($(subdir),iconv)
8ae002
+sysdep_routines += gconv_simple
8ae002
+endif
8ae002
diff --git a/sysdeps/s390/multiarch/gconv_simple.c b/sysdeps/s390/multiarch/gconv_simple.c
8ae002
new file mode 100644
8ae002
index 0000000..dc53a48
8ae002
--- /dev/null
8ae002
+++ b/sysdeps/s390/multiarch/gconv_simple.c
8ae002
@@ -0,0 +1,1266 @@
8ae002
+/* Simple transformations functions - s390 version.
8ae002
+   Copyright (C) 2016 Free Software Foundation, Inc.
8ae002
+   This file is part of the GNU C Library.
8ae002
+
8ae002
+   The GNU C Library is free software; you can redistribute it and/or
8ae002
+   modify it under the terms of the GNU Lesser General Public
8ae002
+   License as published by the Free Software Foundation; either
8ae002
+   version 2.1 of the License, or (at your option) any later version.
8ae002
+
8ae002
+   The GNU C Library is distributed in the hope that it will be useful,
8ae002
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
8ae002
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
8ae002
+   Lesser General Public License for more details.
8ae002
+
8ae002
+   You should have received a copy of the GNU Lesser General Public
8ae002
+   License along with the GNU C Library; if not, see
8ae002
+   <http://www.gnu.org/licenses/>.  */
8ae002
+
8ae002
+#if defined HAVE_S390_VX_ASM_SUPPORT
8ae002
+# include <ifunc-resolve.h>
8ae002
+
8ae002
+# if defined HAVE_S390_VX_GCC_SUPPORT
8ae002
+#  define ASM_CLOBBER_VR(NR) , NR
8ae002
+# else
8ae002
+#  define ASM_CLOBBER_VR(NR)
8ae002
+# endif
8ae002
+
8ae002
+# define ICONV_C_NAME(NAME) __##NAME##_c
8ae002
+# define ICONV_VX_NAME(NAME) __##NAME##_vx
8ae002
+# define ICONV_VX_IFUNC(FUNC)						\
8ae002
+  extern __typeof (ICONV_C_NAME (FUNC)) __##FUNC;			\
8ae002
+  s390_vx_libc_ifunc (__##FUNC)						\
8ae002
+  int FUNC (struct __gconv_step *step, struct __gconv_step_data *data,	\
8ae002
+	    const unsigned char **inptrp, const unsigned char *inend,	\
8ae002
+	    unsigned char **outbufstart, size_t *irreversible,		\
8ae002
+	    int do_flush, int consume_incomplete)			\
8ae002
+  {									\
8ae002
+    return __##FUNC (step, data, inptrp, inend,outbufstart,		\
8ae002
+		     irreversible, do_flush, consume_incomplete);	\
8ae002
+  }
8ae002
+# define ICONV_VX_SINGLE(NAME)						\
8ae002
+  static __typeof (NAME##_single) __##NAME##_vx_single __attribute__((alias(#NAME "_single")));
8ae002
+
8ae002
+/* Generate the transformations which are used, if the target machine does not
8ae002
+   support vector instructions.  */
8ae002
+# define __gconv_transform_ascii_internal		\
8ae002
+  ICONV_C_NAME (__gconv_transform_ascii_internal)
8ae002
+# define __gconv_transform_internal_ascii		\
8ae002
+  ICONV_C_NAME (__gconv_transform_internal_ascii)
8ae002
+# define __gconv_transform_internal_ucs4le		\
8ae002
+  ICONV_C_NAME (__gconv_transform_internal_ucs4le)
8ae002
+# define __gconv_transform_ucs4_internal		\
8ae002
+  ICONV_C_NAME (__gconv_transform_ucs4_internal)
8ae002
+# define __gconv_transform_ucs4le_internal		\
8ae002
+  ICONV_C_NAME (__gconv_transform_ucs4le_internal)
8ae002
+# define __gconv_transform_ucs2_internal		\
8ae002
+  ICONV_C_NAME (__gconv_transform_ucs2_internal)
8ae002
+# define __gconv_transform_ucs2reverse_internal		\
8ae002
+  ICONV_C_NAME (__gconv_transform_ucs2reverse_internal)
8ae002
+# define __gconv_transform_internal_ucs2		\
8ae002
+  ICONV_C_NAME (__gconv_transform_internal_ucs2)
8ae002
+# define __gconv_transform_internal_ucs2reverse		\
8ae002
+  ICONV_C_NAME (__gconv_transform_internal_ucs2reverse)
8ae002
+
8ae002
+
8ae002
+# include <iconv/gconv_simple.c>
8ae002
+
8ae002
+# undef __gconv_transform_ascii_internal
8ae002
+# undef __gconv_transform_internal_ascii
8ae002
+# undef __gconv_transform_internal_ucs4le
8ae002
+# undef __gconv_transform_ucs4_internal
8ae002
+# undef __gconv_transform_ucs4le_internal
8ae002
+# undef __gconv_transform_ucs2_internal
8ae002
+# undef __gconv_transform_ucs2reverse_internal
8ae002
+# undef __gconv_transform_internal_ucs2
8ae002
+# undef __gconv_transform_internal_ucs2reverse
8ae002
+
8ae002
+/* Now define the functions with vector support.  */
8ae002
+# if defined __s390x__
8ae002
+#  define CONVERT_32BIT_SIZE_T(REG)
8ae002
+# else
8ae002
+#  define CONVERT_32BIT_SIZE_T(REG) "llgfr %" #REG ",%" #REG "\n\t"
8ae002
+# endif
8ae002
+
8ae002
+/* Convert from ISO 646-IRV to the internal (UCS4-like) format.  */
8ae002
+# define DEFINE_INIT		0
8ae002
+# define DEFINE_FINI		0
8ae002
+# define MIN_NEEDED_FROM	1
8ae002
+# define MIN_NEEDED_TO		4
8ae002
+# define FROM_DIRECTION		1
8ae002
+# define FROM_LOOP		ICONV_VX_NAME (ascii_internal_loop)
8ae002
+# define TO_LOOP		ICONV_VX_NAME (ascii_internal_loop) /* This is not used.  */
8ae002
+# define FUNCTION_NAME		ICONV_VX_NAME (__gconv_transform_ascii_internal)
8ae002
+# define ONE_DIRECTION		1
8ae002
+
8ae002
+# define MIN_NEEDED_INPUT	MIN_NEEDED_FROM
8ae002
+# define MIN_NEEDED_OUTPUT	MIN_NEEDED_TO
8ae002
+# define LOOPFCT		FROM_LOOP
8ae002
+# define BODY_ORIG_ERROR						\
8ae002
+    /* The value is too large.  We don't try transliteration here since \
8ae002
+       this is not an error because of the lack of possibilities to	\
8ae002
+       represent the result.  This is a genuine bug in the input since	\
8ae002
+       ASCII does not allow such values.  */				\
8ae002
+    STANDARD_FROM_LOOP_ERR_HANDLER (1);
8ae002
+
8ae002
+# define BODY_ORIG							\
8ae002
+  {									\
8ae002
+    if (__glibc_unlikely (*inptr > '\x7f'))				\
8ae002
+      {									\
8ae002
+	BODY_ORIG_ERROR							\
8ae002
+      }									\
8ae002
+    else								\
8ae002
+      {									\
8ae002
+	/* It's an one byte sequence.  */				\
8ae002
+	*((uint32_t *) outptr) = *inptr++;				\
8ae002
+	outptr += sizeof (uint32_t);					\
8ae002
+      }									\
8ae002
+  }
8ae002
+# define BODY								\
8ae002
+  {									\
8ae002
+    size_t len = inend - inptr;						\
8ae002
+    if (len > (outend - outptr) / 4)					\
8ae002
+      len = (outend - outptr) / 4;					\
8ae002
+    size_t loop_count, tmp;						\
8ae002
+    __asm__ volatile (".machine push\n\t"				\
8ae002
+		      ".machine \"z13\"\n\t"				\
8ae002
+		      ".machinemode \"zarch_nohighgprs\"\n\t"		\
8ae002
+		      CONVERT_32BIT_SIZE_T ([R_LEN])			\
8ae002
+		      "    vrepib %%v30,0x7f\n\t" /* For compare > 0x7f.  */ \
8ae002
+		      "    srlg %[R_LI],%[R_LEN],4\n\t"			\
8ae002
+		      "    vrepib %%v31,0x20\n\t"			\
8ae002
+		      "    clgije %[R_LI],0,1f\n\t"			\
8ae002
+		      "0:  \n\t" /* Handle 16-byte blocks.  */		\
8ae002
+		      "    vl %%v16,0(%[R_IN])\n\t"			\
8ae002
+		      /* Checking for values > 0x7f.  */		\
8ae002
+		      "    vstrcbs %%v17,%%v16,%%v30,%%v31\n\t"		\
8ae002
+		      "    jno 10f\n\t"					\
8ae002
+		      /* Enlarge to UCS4.  */				\
8ae002
+		      "    vuplhb %%v17,%%v16\n\t"			\
8ae002
+		      "    vupllb %%v18,%%v16\n\t"			\
8ae002
+		      "    vuplhh %%v19,%%v17\n\t"			\
8ae002
+		      "    vupllh %%v20,%%v17\n\t"			\
8ae002
+		      "    vuplhh %%v21,%%v18\n\t"			\
8ae002
+		      "    vupllh %%v22,%%v18\n\t"			\
8ae002
+		      /* Store 64bytes to buf_out.  */			\
8ae002
+		      "    vstm %%v19,%%v22,0(%[R_OUT])\n\t"		\
8ae002
+		      "    la %[R_IN],16(%[R_IN])\n\t"			\
8ae002
+		      "    la %[R_OUT],64(%[R_OUT])\n\t"		\
8ae002
+		      "    brctg %[R_LI],0b\n\t"			\
8ae002
+		      "    lghi %[R_LI],15\n\t"				\
8ae002
+		      "    ngr %[R_LEN],%[R_LI]\n\t"			\
8ae002
+		      "    je 20f\n\t" /* Jump away if no remaining bytes.  */ \
8ae002
+		      /* Handle remaining bytes.  */			\
8ae002
+		      "1: aghik %[R_LI],%[R_LEN],-1\n\t"		\
8ae002
+		      "    jl 20f\n\t" /* Jump away if no remaining bytes.  */ \
8ae002
+		      "    vll %%v16,%[R_LI],0(%[R_IN])\n\t"		\
8ae002
+		      /* Checking for values > 0x7f.  */		\
8ae002
+		      "    vstrcbs %%v17,%%v16,%%v30,%%v31\n\t"		\
8ae002
+		      "    vlgvb %[R_TMP],%%v17,7\n\t"			\
8ae002
+		      "    clr %[R_TMP],%[R_LI]\n\t"			\
8ae002
+		      "    locrh %[R_TMP],%[R_LEN]\n\t"			\
8ae002
+		      "    locghih %[R_LEN],0\n\t"			\
8ae002
+		      "    j 12f\n\t"					\
8ae002
+		      "10:\n\t"						\
8ae002
+		      /* Found a value > 0x7f.				\
8ae002
+			 Store the preceding chars.  */			\
8ae002
+		      "    vlgvb %[R_TMP],%%v17,7\n\t"			\
8ae002
+		      "12: la %[R_IN],0(%[R_TMP],%[R_IN])\n\t"		\
8ae002
+		      "    sllk %[R_TMP],%[R_TMP],2\n\t"		\
8ae002
+		      "    ahi %[R_TMP],-1\n\t"				\
8ae002
+		      "    jl 20f\n\t"					\
8ae002
+		      "    lgr %[R_LI],%[R_TMP]\n\t"			\
8ae002
+		      "    vuplhb %%v17,%%v16\n\t"			\
8ae002
+		      "    vuplhh %%v19,%%v17\n\t"			\
8ae002
+		      "    vstl %%v19,%[R_LI],0(%[R_OUT])\n\t"		\
8ae002
+		      "    ahi %[R_LI],-16\n\t"				\
8ae002
+		      "    jl 11f\n\t"					\
8ae002
+		      "    vupllh %%v20,%%v17\n\t"			\
8ae002
+		      "    vstl %%v20,%[R_LI],16(%[R_OUT])\n\t"		\
8ae002
+		      "    ahi %[R_LI],-16\n\t"				\
8ae002
+		      "    jl 11f\n\t"					\
8ae002
+		      "    vupllb %%v18,%%v16\n\t"			\
8ae002
+		      "    vuplhh %%v21,%%v18\n\t"			\
8ae002
+		      "    vstl %%v21,%[R_LI],32(%[R_OUT])\n\t"		\
8ae002
+		      "    ahi %[R_LI],-16\n\t"				\
8ae002
+		      "    jl 11f\n\t"					\
8ae002
+		      "    vupllh %%v22,%%v18\n\t"			\
8ae002
+		      "    vstl %%v22,%[R_LI],48(%[R_OUT])\n\t"		\
8ae002
+		      "11:\n\t"						\
8ae002
+		      "    la %[R_OUT],1(%[R_TMP],%[R_OUT])\n\t"	\
8ae002
+		      "20:\n\t"						\
8ae002
+		      ".machine pop"					\
8ae002
+		      : /* outputs */ [R_OUT] "+a" (outptr)		\
8ae002
+			, [R_IN] "+a" (inptr)				\
8ae002
+			, [R_LEN] "+d" (len)				\
8ae002
+			, [R_LI] "=d" (loop_count)			\
8ae002
+			, [R_TMP] "=a" (tmp)				\
8ae002
+		      : /* inputs */					\
8ae002
+		      : /* clobber list*/ "memory", "cc"		\
8ae002
+			ASM_CLOBBER_VR ("v16") ASM_CLOBBER_VR ("v17")	\
8ae002
+			ASM_CLOBBER_VR ("v18") ASM_CLOBBER_VR ("v19")	\
8ae002
+			ASM_CLOBBER_VR ("v20") ASM_CLOBBER_VR ("v21")	\
8ae002
+			ASM_CLOBBER_VR ("v22") ASM_CLOBBER_VR ("v30")	\
8ae002
+			ASM_CLOBBER_VR ("v31")				\
8ae002
+		      );						\
8ae002
+    if (len > 0)							\
8ae002
+      {									\
8ae002
+	/* Found an invalid character at the next input byte.  */	\
8ae002
+	BODY_ORIG_ERROR							\
8ae002
+      }									\
8ae002
+  }
8ae002
+
8ae002
+# define LOOP_NEED_FLAGS
8ae002
+# include <iconv/loop.c>
8ae002
+# include <iconv/skeleton.c>
8ae002
+# undef BODY_ORIG
8ae002
+# undef BODY_ORIG_ERROR
8ae002
+ICONV_VX_IFUNC (__gconv_transform_ascii_internal)
8ae002
+
8ae002
+/* Convert from the internal (UCS4-like) format to ISO 646-IRV.  */
8ae002
+# define DEFINE_INIT		0
8ae002
+# define DEFINE_FINI		0
8ae002
+# define MIN_NEEDED_FROM	4
8ae002
+# define MIN_NEEDED_TO		1
8ae002
+# define FROM_DIRECTION		1
8ae002
+# define FROM_LOOP		ICONV_VX_NAME (internal_ascii_loop)
8ae002
+# define TO_LOOP		ICONV_VX_NAME (internal_ascii_loop) /* This is not used.  */
8ae002
+# define FUNCTION_NAME		ICONV_VX_NAME (__gconv_transform_internal_ascii)
8ae002
+# define ONE_DIRECTION		1
8ae002
+
8ae002
+# define MIN_NEEDED_INPUT	MIN_NEEDED_FROM
8ae002
+# define MIN_NEEDED_OUTPUT	MIN_NEEDED_TO
8ae002
+# define LOOPFCT		FROM_LOOP
8ae002
+# define BODY_ORIG_ERROR						\
8ae002
+  UNICODE_TAG_HANDLER (*((const uint32_t *) inptr), 4);			\
8ae002
+  STANDARD_TO_LOOP_ERR_HANDLER (4);
8ae002
+
8ae002
+# define BODY_ORIG							\
8ae002
+  {									\
8ae002
+    if (__glibc_unlikely (*((const uint32_t *) inptr) > 0x7f))		\
8ae002
+      {									\
8ae002
+	BODY_ORIG_ERROR							\
8ae002
+      }									\
8ae002
+    else								\
8ae002
+      {									\
8ae002
+	/* It's an one byte sequence.  */				\
8ae002
+	*outptr++ = *((const uint32_t *) inptr);			\
8ae002
+	inptr += sizeof (uint32_t);					\
8ae002
+      }									\
8ae002
+  }
8ae002
+# define BODY								\
8ae002
+  {									\
8ae002
+    size_t len = (inend - inptr) / 4;					\
8ae002
+    if (len > outend - outptr)						\
8ae002
+      len = outend - outptr;						\
8ae002
+    size_t loop_count, tmp, tmp2;					\
8ae002
+    __asm__ volatile (".machine push\n\t"				\
8ae002
+		      ".machine \"z13\"\n\t"				\
8ae002
+		      ".machinemode \"zarch_nohighgprs\"\n\t"		\
8ae002
+		      CONVERT_32BIT_SIZE_T ([R_LEN])			\
8ae002
+		      /* Setup to check for ch > 0x7f.  */		\
8ae002
+		      "    vzero %%v21\n\t"				\
8ae002
+		      "    srlg %[R_LI],%[R_LEN],4\n\t"			\
8ae002
+		      "    vleih %%v21,8192,0\n\t"  /* element 0:   >  */ \
8ae002
+		      "    vleih %%v21,-8192,2\n\t" /* element 1: =<>  */ \
8ae002
+		      "    vleif %%v20,127,0\n\t"   /* element 0: 127  */ \
8ae002
+		      "    lghi %[R_TMP],0\n\t"				\
8ae002
+		      "    clgije %[R_LI],0,1f\n\t"			\
8ae002
+		      "0:\n\t"						\
8ae002
+		      "    vlm %%v16,%%v19,0(%[R_IN])\n\t"		\
8ae002
+		      /* Shorten to byte values.  */			\
8ae002
+		      "    vpkf %%v23,%%v16,%%v17\n\t"			\
8ae002
+		      "    vpkf %%v24,%%v18,%%v19\n\t"			\
8ae002
+		      "    vpkh %%v23,%%v23,%%v24\n\t"			\
8ae002
+		      /* Checking for values > 0x7f.  */		\
8ae002
+		      "    vstrcfs %%v22,%%v16,%%v20,%%v21\n\t"		\
8ae002
+		      "    jno 10f\n\t"					\
8ae002
+		      "    vstrcfs %%v22,%%v17,%%v20,%%v21\n\t"		\
8ae002
+		      "    jno 11f\n\t"					\
8ae002
+		      "    vstrcfs %%v22,%%v18,%%v20,%%v21\n\t"		\
8ae002
+		      "    jno 12f\n\t"					\
8ae002
+		      "    vstrcfs %%v22,%%v19,%%v20,%%v21\n\t"		\
8ae002
+		      "    jno 13f\n\t"					\
8ae002
+		      /* Store 16bytes to outptr.  */			\
8ae002
+		      "    vst %%v23,0(%[R_OUT])\n\t"			\
8ae002
+		      "    la %[R_IN],64(%[R_IN])\n\t"			\
8ae002
+		      "    la %[R_OUT],16(%[R_OUT])\n\t"		\
8ae002
+		      "    brctg %[R_LI],0b\n\t"			\
8ae002
+		      "    lghi %[R_LI],15\n\t"				\
8ae002
+		      "    ngr %[R_LEN],%[R_LI]\n\t"			\
8ae002
+		      "    je 20f\n\t" /* Jump away if no remaining bytes.  */ \
8ae002
+		      /* Handle remaining bytes.  */			\
8ae002
+		      "1: sllg %[R_LI],%[R_LEN],2\n\t"			\
8ae002
+		      "    aghi %[R_LI],-1\n\t"				\
8ae002
+		      "    jl 20f\n\t" /* Jump away if no remaining bytes.  */ \
8ae002
+		      /* Load remaining 1...63 bytes.  */		\
8ae002
+		      "    vll %%v16,%[R_LI],0(%[R_IN])\n\t"		\
8ae002
+		      "    ahi %[R_LI],-16\n\t"				\
8ae002
+		      "    jl 2f\n\t"					\
8ae002
+		      "    vll %%v17,%[R_LI],16(%[R_IN])\n\t"		\
8ae002
+		      "    ahi %[R_LI],-16\n\t"				\
8ae002
+		      "    jl 2f\n\t"					\
8ae002
+		      "    vll %%v18,%[R_LI],32(%[R_IN])\n\t"		\
8ae002
+		      "    ahi %[R_LI],-16\n\t"				\
8ae002
+		      "    jl 2f\n\t"					\
8ae002
+		      "    vll %%v19,%[R_LI],48(%[R_IN])\n\t"		\
8ae002
+		      "2:\n\t"						\
8ae002
+		      /* Shorten to byte values.  */			\
8ae002
+		      "    vpkf %%v23,%%v16,%%v17\n\t"			\
8ae002
+		      "    vpkf %%v24,%%v18,%%v19\n\t"			\
8ae002
+		      "    vpkh %%v23,%%v23,%%v24\n\t"			\
8ae002
+		      "    sllg %[R_LI],%[R_LEN],2\n\t"			\
8ae002
+		      "    aghi %[R_LI],-16\n\t"			\
8ae002
+		      "    jl 3f\n\t" /* v16 is not fully loaded.  */	\
8ae002
+		      "    vstrcfs %%v22,%%v16,%%v20,%%v21\n\t"		\
8ae002
+		      "    jno 10f\n\t"					\
8ae002
+		      "    aghi %[R_LI],-16\n\t"			\
8ae002
+		      "    jl 4f\n\t" /* v17 is not fully loaded.  */	\
8ae002
+		      "    vstrcfs %%v22,%%v17,%%v20,%%v21\n\t"		\
8ae002
+		      "    jno 11f\n\t"					\
8ae002
+		      "    aghi %[R_LI],-16\n\t"			\
8ae002
+		      "    jl 5f\n\t" /* v18 is not fully loaded.  */	\
8ae002
+		      "    vstrcfs %%v22,%%v18,%%v20,%%v21\n\t"		\
8ae002
+		      "    jno 12f\n\t"					\
8ae002
+		      "    aghi %[R_LI],-16\n\t"			\
8ae002
+		      /* v19 is not fully loaded. */			\
8ae002
+		      "    lghi %[R_TMP],12\n\t"			\
8ae002
+		      "    vstrcfs %%v22,%%v19,%%v20,%%v21\n\t"		\
8ae002
+		      "6: vlgvb %[R_I],%%v22,7\n\t"			\
8ae002
+		      "    aghi %[R_LI],16\n\t"				\
8ae002
+		      "    clrjl %[R_I],%[R_LI],14f\n\t"		\
8ae002
+		      "    lgr %[R_I],%[R_LEN]\n\t"			\
8ae002
+		      "    lghi %[R_LEN],0\n\t"				\
8ae002
+		      "    j 15f\n\t"					\
8ae002
+		      "3: vstrcfs %%v22,%%v16,%%v20,%%v21\n\t"		\
8ae002
+		      "    j 6b\n\t"					\
8ae002
+		      "4: vstrcfs %%v22,%%v17,%%v20,%%v21\n\t"		\
8ae002
+		      "    lghi %[R_TMP],4\n\t"				\
8ae002
+		      "    j 6b\n\t"					\
8ae002
+		      "5: vstrcfs %%v22,%%v17,%%v20,%%v21\n\t"		\
8ae002
+		      "    lghi %[R_TMP],8\n\t"				\
8ae002
+		      "    j 6b\n\t"					\
8ae002
+		      /* Found a value > 0x7f.  */			\
8ae002
+		      "13: ahi %[R_TMP],4\n\t"				\
8ae002
+		      "12: ahi %[R_TMP],4\n\t"				\
8ae002
+		      "11: ahi %[R_TMP],4\n\t"				\
8ae002
+		      "10: vlgvb %[R_I],%%v22,7\n\t"			\
8ae002
+		      "14: srlg %[R_I],%[R_I],2\n\t"			\
8ae002
+		      "    agr %[R_I],%[R_TMP]\n\t"			\
8ae002
+		      "    je 20f\n\t"					\
8ae002
+		      /* Store characters before invalid one...  */	\
8ae002
+		      "15: aghi %[R_I],-1\n\t"				\
8ae002
+		      "    vstl %%v23,%[R_I],0(%[R_OUT])\n\t"		\
8ae002
+		      /* ... and update pointers.  */			\
8ae002
+		      "    la %[R_OUT],1(%[R_I],%[R_OUT])\n\t"		\
8ae002
+		      "    sllg %[R_I],%[R_I],2\n\t"			\
8ae002
+		      "    la %[R_IN],4(%[R_I],%[R_IN])\n\t"		\
8ae002
+		      "20:\n\t"						\
8ae002
+		      ".machine pop"					\
8ae002
+		      : /* outputs */ [R_OUT] "+a" (outptr)		\
8ae002
+			, [R_IN] "+a" (inptr)				\
8ae002
+			, [R_LEN] "+d" (len)				\
8ae002
+			, [R_LI] "=d" (loop_count)			\
8ae002
+			, [R_I] "=a" (tmp2)				\
8ae002
+			, [R_TMP] "=d" (tmp)				\
8ae002
+		      : /* inputs */					\
8ae002
+		      : /* clobber list*/ "memory", "cc"		\
8ae002
+			ASM_CLOBBER_VR ("v16") ASM_CLOBBER_VR ("v17")	\
8ae002
+			ASM_CLOBBER_VR ("v18") ASM_CLOBBER_VR ("v19")	\
8ae002
+			ASM_CLOBBER_VR ("v20") ASM_CLOBBER_VR ("v21")	\
8ae002
+			ASM_CLOBBER_VR ("v22") ASM_CLOBBER_VR ("v23")	\
8ae002
+			ASM_CLOBBER_VR ("v24")				\
8ae002
+		      );						\
8ae002
+    if (len > 0)							\
8ae002
+      {									\
8ae002
+	/* Found an invalid character > 0x7f at next character.  */	\
8ae002
+	BODY_ORIG_ERROR							\
8ae002
+      }									\
8ae002
+  }
8ae002
+# define LOOP_NEED_FLAGS
8ae002
+# include <iconv/loop.c>
8ae002
+# include <iconv/skeleton.c>
8ae002
+# undef BODY_ORIG
8ae002
+# undef BODY_ORIG_ERROR
8ae002
+ICONV_VX_IFUNC (__gconv_transform_internal_ascii)
8ae002
+
8ae002
+
8ae002
+/* Convert from internal UCS4 to UCS4 little endian form.  */
8ae002
+# define DEFINE_INIT		0
8ae002
+# define DEFINE_FINI		0
8ae002
+# define MIN_NEEDED_FROM	4
8ae002
+# define MIN_NEEDED_TO		4
8ae002
+# define FROM_DIRECTION		1
8ae002
+# define FROM_LOOP		ICONV_VX_NAME (internal_ucs4le_loop)
8ae002
+# define TO_LOOP		ICONV_VX_NAME (internal_ucs4le_loop) /* This is not used.  */
8ae002
+# define FUNCTION_NAME		ICONV_VX_NAME (__gconv_transform_internal_ucs4le)
8ae002
+# define ONE_DIRECTION		0
8ae002
+
8ae002
+static inline int
8ae002
+__attribute ((always_inline))
8ae002
+ICONV_VX_NAME (internal_ucs4le_loop) (struct __gconv_step *step,
8ae002
+				      struct __gconv_step_data *step_data,
8ae002
+				      const unsigned char **inptrp,
8ae002
+				      const unsigned char *inend,
8ae002
+				      unsigned char **outptrp,
8ae002
+				      unsigned char *outend,
8ae002
+				      size_t *irreversible)
8ae002
+{
8ae002
+  const unsigned char *inptr = *inptrp;
8ae002
+  unsigned char *outptr = *outptrp;
8ae002
+  int result;
8ae002
+  size_t len = MIN (inend - inptr, outend - outptr) / 4;
8ae002
+  size_t loop_count;
8ae002
+  __asm__ volatile (".machine push\n\t"
8ae002
+		    ".machine \"z13\"\n\t"
8ae002
+		    ".machinemode \"zarch_nohighgprs\"\n\t"
8ae002
+		    CONVERT_32BIT_SIZE_T ([R_LEN])
8ae002
+		    "    bras %[R_LI],1f\n\t"
8ae002
+		    /* Vector permute mask:  */
8ae002
+		    "    .long 0x03020100,0x7060504,0x0B0A0908,0x0F0E0D0C\n\t"
8ae002
+		    "1:  vl %%v20,0(%[R_LI])\n\t"
8ae002
+		    /* Process 64byte (16char) blocks.  */
8ae002
+		    "    srlg %[R_LI],%[R_LEN],4\n\t"
8ae002
+		    "    clgije %[R_LI],0,10f\n\t"
8ae002
+		    "0:  vlm %%v16,%%v19,0(%[R_IN])\n\t"
8ae002
+		    "    vperm %%v16,%%v16,%%v16,%%v20\n\t"
8ae002
+		    "    vperm %%v17,%%v17,%%v17,%%v20\n\t"
8ae002
+		    "    vperm %%v18,%%v18,%%v18,%%v20\n\t"
8ae002
+		    "    vperm %%v19,%%v19,%%v19,%%v20\n\t"
8ae002
+		    "    vstm %%v16,%%v19,0(%[R_OUT])\n\t"
8ae002
+		    "    la %[R_IN],64(%[R_IN])\n\t"
8ae002
+		    "    la %[R_OUT],64(%[R_OUT])\n\t"
8ae002
+		    "    brctg %[R_LI],0b\n\t"
8ae002
+		    "    llgfr %[R_LEN],%[R_LEN]\n\t"
8ae002
+		    "    nilf %[R_LEN],15\n\t"
8ae002
+		    /* Process 16byte (4char) blocks.  */
8ae002
+		    "10: srlg %[R_LI],%[R_LEN],2\n\t"
8ae002
+		    "    clgije %[R_LI],0,20f\n\t"
8ae002
+		    "11: vl %%v16,0(%[R_IN])\n\t"
8ae002
+		    "    vperm %%v16,%%v16,%%v16,%%v20\n\t"
8ae002
+		    "    vst %%v16,0(%[R_OUT])\n\t"
8ae002
+		    "    la %[R_IN],16(%[R_IN])\n\t"
8ae002
+		    "    la %[R_OUT],16(%[R_OUT])\n\t"
8ae002
+		    "    brctg %[R_LI],11b\n\t"
8ae002
+		    "    nill %[R_LEN],3\n\t"
8ae002
+		    /* Process <16bytes.  */
8ae002
+		    "20: sll %[R_LEN],2\n\t"
8ae002
+		    "    ahi %[R_LEN],-1\n\t"
8ae002
+		    "    jl 30f\n\t"
8ae002
+		    "    vll %%v16,%[R_LEN],0(%[R_IN])\n\t"
8ae002
+		    "    vperm %%v16,%%v16,%%v16,%%v20\n\t"
8ae002
+		    "    vstl %%v16,%[R_LEN],0(%[R_OUT])\n\t"
8ae002
+		    "    la %[R_IN],1(%[R_LEN],%[R_IN])\n\t"
8ae002
+		    "    la %[R_OUT],1(%[R_LEN],%[R_OUT])\n\t"
8ae002
+		    "30: \n\t"
8ae002
+		    ".machine pop"
8ae002
+		    : /* outputs */ [R_OUT] "+a" (outptr)
8ae002
+		      , [R_IN] "+a" (inptr)
8ae002
+		      , [R_LI] "=a" (loop_count)
8ae002
+		      , [R_LEN] "+a" (len)
8ae002
+		    : /* inputs */
8ae002
+		    : /* clobber list*/ "memory", "cc"
8ae002
+		      ASM_CLOBBER_VR ("v16") ASM_CLOBBER_VR ("v17")
8ae002
+		      ASM_CLOBBER_VR ("v18") ASM_CLOBBER_VR ("v19")
8ae002
+		      ASM_CLOBBER_VR ("v20")
8ae002
+		    );
8ae002
+  *inptrp = inptr;
8ae002
+  *outptrp = outptr;
8ae002
+
8ae002
+  /* Determine the status.  */
8ae002
+  if (*inptrp == inend)
8ae002
+    result = __GCONV_EMPTY_INPUT;
8ae002
+  else if (*outptrp + 4 > outend)
8ae002
+    result = __GCONV_FULL_OUTPUT;
8ae002
+  else
8ae002
+    result = __GCONV_INCOMPLETE_INPUT;
8ae002
+
8ae002
+  return result;
8ae002
+}
8ae002
+
8ae002
+ICONV_VX_SINGLE (internal_ucs4le_loop)
8ae002
+# include <iconv/skeleton.c>
8ae002
+ICONV_VX_IFUNC (__gconv_transform_internal_ucs4le)
8ae002
+
8ae002
+
8ae002
+/* Transform from UCS4 to the internal, UCS4-like format.  Unlike
8ae002
+   for the other direction we have to check for correct values here.  */
8ae002
+# define DEFINE_INIT		0
8ae002
+# define DEFINE_FINI		0
8ae002
+# define MIN_NEEDED_FROM	4
8ae002
+# define MIN_NEEDED_TO		4
8ae002
+# define FROM_DIRECTION		1
8ae002
+# define FROM_LOOP		ICONV_VX_NAME (ucs4_internal_loop)
8ae002
+# define TO_LOOP		ICONV_VX_NAME (ucs4_internal_loop) /* This is not used.  */
8ae002
+# define FUNCTION_NAME		ICONV_VX_NAME (__gconv_transform_ucs4_internal)
8ae002
+# define ONE_DIRECTION		0
8ae002
+
8ae002
+
8ae002
+static inline int
8ae002
+__attribute ((always_inline))
8ae002
+ICONV_VX_NAME (ucs4_internal_loop) (struct __gconv_step *step,
8ae002
+				    struct __gconv_step_data *step_data,
8ae002
+				    const unsigned char **inptrp,
8ae002
+				    const unsigned char *inend,
8ae002
+				    unsigned char **outptrp,
8ae002
+				    unsigned char *outend,
8ae002
+				    size_t *irreversible)
8ae002
+{
8ae002
+  int flags = step_data->__flags;
8ae002
+  const unsigned char *inptr = *inptrp;
8ae002
+  unsigned char *outptr = *outptrp;
8ae002
+  int result;
8ae002
+  size_t len, loop_count;
8ae002
+  do
8ae002
+    {
8ae002
+      len = MIN (inend - inptr, outend - outptr) / 4;
8ae002
+      __asm__ volatile (".machine push\n\t"
8ae002
+			".machine \"z13\"\n\t"
8ae002
+			".machinemode \"zarch_nohighgprs\"\n\t"
8ae002
+			CONVERT_32BIT_SIZE_T ([R_LEN])
8ae002
+			/* Setup to check for ch > 0x7fffffff.  */
8ae002
+			"    larl %[R_LI],9f\n\t"
8ae002
+			"    vlm %%v20,%%v21,0(%[R_LI])\n\t"
8ae002
+			"    srlg %[R_LI],%[R_LEN],2\n\t"
8ae002
+			"    clgije %[R_LI],0,1f\n\t"
8ae002
+			/* Process 16byte (4char) blocks.  */
8ae002
+			"0:  vl %%v16,0(%[R_IN])\n\t"
8ae002
+			"    vstrcfs %%v22,%%v16,%%v20,%%v21\n\t"
8ae002
+			"    jno 10f\n\t"
8ae002
+			"    vst %%v16,0(%[R_OUT])\n\t"
8ae002
+			"    la %[R_IN],16(%[R_IN])\n\t"
8ae002
+			"    la %[R_OUT],16(%[R_OUT])\n\t"
8ae002
+			"    brctg %[R_LI],0b\n\t"
8ae002
+			"    llgfr %[R_LEN],%[R_LEN]\n\t"
8ae002
+			"    nilf %[R_LEN],3\n\t"
8ae002
+			/* Process <16bytes.  */
8ae002
+			"1:  sll %[R_LEN],2\n\t"
8ae002
+			"    ahik %[R_LI],%[R_LEN],-1\n\t"
8ae002
+			"    jl 20f\n\t" /* No further bytes available.  */
8ae002
+			"    vll %%v16,%[R_LI],0(%[R_IN])\n\t"
8ae002
+			"    vstrcfs %%v22,%%v16,%%v20,%%v21\n\t"
8ae002
+			"    vlgvb %[R_LI],%%v22,7\n\t"
8ae002
+			"    clr %[R_LI],%[R_LEN]\n\t"
8ae002
+			"    locgrhe %[R_LI],%[R_LEN]\n\t"
8ae002
+			"    locghihe %[R_LEN],0\n\t"
8ae002
+			"    j 11f\n\t"
8ae002
+			/* v20: Vector string range compare values.  */
8ae002
+			"9:  .long 0x7fffffff,0x0,0x0,0x0\n\t"
8ae002
+			/* v21: Vector string range compare control-bits.
8ae002
+			   element 0: >; element 1: =<> (always true)  */
8ae002
+			"    .long 0x20000000,0xE0000000,0x0,0x0\n\t"
8ae002
+			/* Found a value > 0x7fffffff.  */
8ae002
+			"10: vlgvb %[R_LI],%%v22,7\n\t"
8ae002
+			/* Store characters before invalid one.  */
8ae002
+			"11: aghi %[R_LI],-1\n\t"
8ae002
+			"    jl 20f\n\t"
8ae002
+			"    vstl %%v16,%[R_LI],0(%[R_OUT])\n\t"
8ae002
+			"    la %[R_IN],1(%[R_LI],%[R_IN])\n\t"
8ae002
+			"    la %[R_OUT],1(%[R_LI],%[R_OUT])\n\t"
8ae002
+			"20:\n\t"
8ae002
+			".machine pop"
8ae002
+			: /* outputs */ [R_OUT] "+a" (outptr)
8ae002
+			  , [R_IN] "+a" (inptr)
8ae002
+			  , [R_LI] "=a" (loop_count)
8ae002
+			  , [R_LEN] "+d" (len)
8ae002
+			: /* inputs */
8ae002
+			: /* clobber list*/ "memory", "cc"
8ae002
+			  ASM_CLOBBER_VR ("v16") ASM_CLOBBER_VR ("v20")
8ae002
+			  ASM_CLOBBER_VR ("v21") ASM_CLOBBER_VR ("v22")
8ae002
+			);
8ae002
+      if (len > 0)
8ae002
+	{
8ae002
+	  /* The value is too large.  We don't try transliteration here since
8ae002
+	     this is not an error because of the lack of possibilities to
8ae002
+	     represent the result.  This is a genuine bug in the input since
8ae002
+	     UCS4 does not allow such values.  */
8ae002
+	  if (irreversible == NULL)
8ae002
+	    /* We are transliterating, don't try to correct anything.  */
8ae002
+	    return __GCONV_ILLEGAL_INPUT;
8ae002
+
8ae002
+	  if (flags & __GCONV_IGNORE_ERRORS)
8ae002
+	    {
8ae002
+	      /* Just ignore this character.  */
8ae002
+	      ++*irreversible;
8ae002
+	      inptr += 4;
8ae002
+	      continue;
8ae002
+	    }
8ae002
+
8ae002
+	  *inptrp = inptr;
8ae002
+	  *outptrp = outptr;
8ae002
+	  return __GCONV_ILLEGAL_INPUT;
8ae002
+	}
8ae002
+    }
8ae002
+  while (len > 0);
8ae002
+
8ae002
+  *inptrp = inptr;
8ae002
+  *outptrp = outptr;
8ae002
+
8ae002
+  /* Determine the status.  */
8ae002
+  if (*inptrp == inend)
8ae002
+    result = __GCONV_EMPTY_INPUT;
8ae002
+  else if (*outptrp + 4 > outend)
8ae002
+    result = __GCONV_FULL_OUTPUT;
8ae002
+  else
8ae002
+    result = __GCONV_INCOMPLETE_INPUT;
8ae002
+
8ae002
+  return result;
8ae002
+}
8ae002
+
8ae002
+ICONV_VX_SINGLE (ucs4_internal_loop)
8ae002
+# include <iconv/skeleton.c>
8ae002
+ICONV_VX_IFUNC (__gconv_transform_ucs4_internal)
8ae002
+
8ae002
+
8ae002
+/* Transform from UCS4-LE to the internal encoding.  */
8ae002
+# define DEFINE_INIT		0
8ae002
+# define DEFINE_FINI		0
8ae002
+# define MIN_NEEDED_FROM	4
8ae002
+# define MIN_NEEDED_TO		4
8ae002
+# define FROM_DIRECTION		1
8ae002
+# define FROM_LOOP		ICONV_VX_NAME (ucs4le_internal_loop)
8ae002
+# define TO_LOOP		ICONV_VX_NAME (ucs4le_internal_loop) /* This is not used.  */
8ae002
+# define FUNCTION_NAME		ICONV_VX_NAME (__gconv_transform_ucs4le_internal)
8ae002
+# define ONE_DIRECTION		0
8ae002
+
8ae002
+static inline int
8ae002
+__attribute ((always_inline))
8ae002
+ICONV_VX_NAME (ucs4le_internal_loop) (struct __gconv_step *step,
8ae002
+				      struct __gconv_step_data *step_data,
8ae002
+				      const unsigned char **inptrp,
8ae002
+				      const unsigned char *inend,
8ae002
+				      unsigned char **outptrp,
8ae002
+				      unsigned char *outend,
8ae002
+				      size_t *irreversible)
8ae002
+{
8ae002
+  int flags = step_data->__flags;
8ae002
+  const unsigned char *inptr = *inptrp;
8ae002
+  unsigned char *outptr = *outptrp;
8ae002
+  int result;
8ae002
+  size_t len, loop_count;
8ae002
+  do
8ae002
+    {
8ae002
+      len = MIN (inend - inptr, outend - outptr) / 4;
8ae002
+      __asm__ volatile (".machine push\n\t"
8ae002
+			".machine \"z13\"\n\t"
8ae002
+			".machinemode \"zarch_nohighgprs\"\n\t"
8ae002
+			CONVERT_32BIT_SIZE_T ([R_LEN])
8ae002
+			/* Setup to check for ch > 0x7fffffff.  */
8ae002
+			"    larl %[R_LI],9f\n\t"
8ae002
+			"    vlm %%v20,%%v22,0(%[R_LI])\n\t"
8ae002
+			"    srlg %[R_LI],%[R_LEN],2\n\t"
8ae002
+			"    clgije %[R_LI],0,1f\n\t"
8ae002
+			/* Process 16byte (4char) blocks.  */
8ae002
+			"0:  vl %%v16,0(%[R_IN])\n\t"
8ae002
+			"    vperm %%v16,%%v16,%%v16,%%v22\n\t"
8ae002
+			"    vstrcfs %%v23,%%v16,%%v20,%%v21\n\t"
8ae002
+			"    jno 10f\n\t"
8ae002
+			"    vst %%v16,0(%[R_OUT])\n\t"
8ae002
+			"    la %[R_IN],16(%[R_IN])\n\t"
8ae002
+			"    la %[R_OUT],16(%[R_OUT])\n\t"
8ae002
+			"    brctg %[R_LI],0b\n\t"
8ae002
+			"    llgfr %[R_LEN],%[R_LEN]\n\t"
8ae002
+			"    nilf %[R_LEN],3\n\t"
8ae002
+			/* Process <16bytes.  */
8ae002
+			"1:  sll %[R_LEN],2\n\t"
8ae002
+			"    ahik %[R_LI],%[R_LEN],-1\n\t"
8ae002
+			"    jl 20f\n\t" /* No further bytes available.  */
8ae002
+			"    vll %%v16,%[R_LI],0(%[R_IN])\n\t"
8ae002
+			"    vperm %%v16,%%v16,%%v16,%%v22\n\t"
8ae002
+			"    vstrcfs %%v23,%%v16,%%v20,%%v21\n\t"
8ae002
+			"    vlgvb %[R_LI],%%v23,7\n\t"
8ae002
+			"    clr %[R_LI],%[R_LEN]\n\t"
8ae002
+			"    locgrhe %[R_LI],%[R_LEN]\n\t"
8ae002
+			"    locghihe %[R_LEN],0\n\t"
8ae002
+			"    j 11f\n\t"
8ae002
+			/* v20: Vector string range compare values.  */
8ae002
+			"9: .long 0x7fffffff,0x0,0x0,0x0\n\t"
8ae002
+			/* v21: Vector string range compare control-bits.
8ae002
+			   element 0: >; element 1: =<> (always true)  */
8ae002
+			"    .long 0x20000000,0xE0000000,0x0,0x0\n\t"
8ae002
+			/* v22: Vector permute mask.  */
8ae002
+			"    .long 0x03020100,0x7060504,0x0B0A0908,0x0F0E0D0C\n\t"
8ae002
+			/* Found a value > 0x7fffffff.  */
8ae002
+			"10: vlgvb %[R_LI],%%v23,7\n\t"
8ae002
+			/* Store characters before invalid one.  */
8ae002
+			"11: aghi %[R_LI],-1\n\t"
8ae002
+			"    jl 20f\n\t"
8ae002
+			"    vstl %%v16,%[R_LI],0(%[R_OUT])\n\t"
8ae002
+			"    la %[R_IN],1(%[R_LI],%[R_IN])\n\t"
8ae002
+			"    la %[R_OUT],1(%[R_LI],%[R_OUT])\n\t"
8ae002
+			"20:\n\t"
8ae002
+			".machine pop"
8ae002
+			: /* outputs */ [R_OUT] "+a" (outptr)
8ae002
+			  , [R_IN] "+a" (inptr)
8ae002
+			  , [R_LI] "=a" (loop_count)
8ae002
+			  , [R_LEN] "+d" (len)
8ae002
+			: /* inputs */
8ae002
+			: /* clobber list*/ "memory", "cc"
8ae002
+			  ASM_CLOBBER_VR ("v16") ASM_CLOBBER_VR ("v20")
8ae002
+			  ASM_CLOBBER_VR ("v21") ASM_CLOBBER_VR ("v22")
8ae002
+			  ASM_CLOBBER_VR ("v23")
8ae002
+			);
8ae002
+      if (len > 0)
8ae002
+	{
8ae002
+	  /* The value is too large.  We don't try transliteration here since
8ae002
+	     this is not an error because of the lack of possibilities to
8ae002
+	     represent the result.  This is a genuine bug in the input since
8ae002
+	     UCS4 does not allow such values.  */
8ae002
+	  if (irreversible == NULL)
8ae002
+	    /* We are transliterating, don't try to correct anything.  */
8ae002
+	    return __GCONV_ILLEGAL_INPUT;
8ae002
+
8ae002
+	  if (flags & __GCONV_IGNORE_ERRORS)
8ae002
+	    {
8ae002
+	      /* Just ignore this character.  */
8ae002
+	      ++*irreversible;
8ae002
+	      inptr += 4;
8ae002
+	      continue;
8ae002
+	    }
8ae002
+
8ae002
+	  *inptrp = inptr;
8ae002
+	  *outptrp = outptr;
8ae002
+	  return __GCONV_ILLEGAL_INPUT;
8ae002
+	}
8ae002
+    }
8ae002
+  while (len > 0);
8ae002
+
8ae002
+  *inptrp = inptr;
8ae002
+  *outptrp = outptr;
8ae002
+
8ae002
+  /* Determine the status.  */
8ae002
+  if (*inptrp == inend)
8ae002
+    result = __GCONV_EMPTY_INPUT;
8ae002
+  else if (*inptrp + 4 > inend)
8ae002
+    result = __GCONV_INCOMPLETE_INPUT;
8ae002
+  else
8ae002
+    {
8ae002
+      assert (*outptrp + 4 > outend);
8ae002
+      result = __GCONV_FULL_OUTPUT;
8ae002
+    }
8ae002
+
8ae002
+  return result;
8ae002
+}
8ae002
+ICONV_VX_SINGLE (ucs4le_internal_loop)
8ae002
+# include <iconv/skeleton.c>
8ae002
+ICONV_VX_IFUNC (__gconv_transform_ucs4le_internal)
8ae002
+
8ae002
+/* Convert from UCS2 to the internal (UCS4-like) format.  */
8ae002
+# define DEFINE_INIT		0
8ae002
+# define DEFINE_FINI		0
8ae002
+# define MIN_NEEDED_FROM	2
8ae002
+# define MIN_NEEDED_TO		4
8ae002
+# define FROM_DIRECTION		1
8ae002
+# define FROM_LOOP		ICONV_VX_NAME (ucs2_internal_loop)
8ae002
+# define TO_LOOP		ICONV_VX_NAME (ucs2_internal_loop) /* This is not used.  */
8ae002
+# define FUNCTION_NAME		ICONV_VX_NAME (__gconv_transform_ucs2_internal)
8ae002
+# define ONE_DIRECTION		1
8ae002
+
8ae002
+# define MIN_NEEDED_INPUT	MIN_NEEDED_FROM
8ae002
+# define MIN_NEEDED_OUTPUT	MIN_NEEDED_TO
8ae002
+# define LOOPFCT		FROM_LOOP
8ae002
+# define BODY_ORIG_ERROR						\
8ae002
+  /* Surrogate characters in UCS-2 input are not valid.  Reject		\
8ae002
+     them.  (Catching this here is not security relevant.)  */		\
8ae002
+  STANDARD_FROM_LOOP_ERR_HANDLER (2);
8ae002
+# define BODY_ORIG							\
8ae002
+  {									\
8ae002
+    uint16_t u1 = get16 (inptr);					\
8ae002
+									\
8ae002
+    if (__glibc_unlikely (u1 >= 0xd800 && u1 < 0xe000))			\
8ae002
+      {									\
8ae002
+	BODY_ORIG_ERROR							\
8ae002
+      }									\
8ae002
+									\
8ae002
+    *((uint32_t *) outptr) = u1;					\
8ae002
+    outptr += sizeof (uint32_t);					\
8ae002
+    inptr += 2;								\
8ae002
+  }
8ae002
+# define BODY								\
8ae002
+  {									\
8ae002
+    size_t len, tmp, tmp2;						\
8ae002
+    len = MIN ((inend - inptr) / 2, (outend - outptr) / 4);		\
8ae002
+    __asm__ volatile (".machine push\n\t"				\
8ae002
+		      ".machine \"z13\"\n\t"				\
8ae002
+		      ".machinemode \"zarch_nohighgprs\"\n\t"		\
8ae002
+		      CONVERT_32BIT_SIZE_T ([R_LEN])			\
8ae002
+		      /* Setup to check for ch >= 0xd800 && ch < 0xe000.  */ \
8ae002
+		      "    larl %[R_TMP],9f\n\t"			\
8ae002
+		      "    vlm %%v20,%%v21,0(%[R_TMP])\n\t"		\
8ae002
+		      "    srlg %[R_TMP],%[R_LEN],3\n\t"		\
8ae002
+		      "    clgije %[R_TMP],0,1f\n\t"			\
8ae002
+		      /* Process 16byte (8char) blocks.  */		\
8ae002
+		      "0:  vl %%v16,0(%[R_IN])\n\t"			\
8ae002
+		      "    vstrchs %%v19,%%v16,%%v20,%%v21\n\t"		\
8ae002
+		      /* Enlarge UCS2 to UCS4.  */			\
8ae002
+		      "    vuplhh %%v17,%%v16\n\t"			\
8ae002
+		      "    vupllh %%v18,%%v16\n\t"			\
8ae002
+		      "    jno 10f\n\t"					\
8ae002
+		      /* Store 32bytes to buf_out.  */			\
8ae002
+		      "    vstm %%v17,%%v18,0(%[R_OUT])\n\t"		\
8ae002
+		      "    la %[R_IN],16(%[R_IN])\n\t"			\
8ae002
+		      "    la %[R_OUT],32(%[R_OUT])\n\t"		\
8ae002
+		      "    brctg %[R_TMP],0b\n\t"			\
8ae002
+		      "    llgfr %[R_LEN],%[R_LEN]\n\t"			\
8ae002
+		      "    nilf %[R_LEN],7\n\t"				\
8ae002
+		      /* Process <16bytes.  */				\
8ae002
+		      "1:  sll %[R_LEN],1\n\t"				\
8ae002
+		      "    ahik %[R_TMP],%[R_LEN],-1\n\t"		\
8ae002
+		      "    jl 20f\n\t" /* No further bytes available.  */ \
8ae002
+		      "    vll %%v16,%[R_TMP],0(%[R_IN])\n\t"		\
8ae002
+		      "    vstrchs %%v19,%%v16,%%v20,%%v21\n\t"		\
8ae002
+		      /* Enlarge UCS2 to UCS4.  */			\
8ae002
+		      "    vuplhh %%v17,%%v16\n\t"			\
8ae002
+		      "    vupllh %%v18,%%v16\n\t"			\
8ae002
+		      "    vlgvb %[R_TMP],%%v19,7\n\t"			\
8ae002
+		      "    clr %[R_TMP],%[R_LEN]\n\t"			\
8ae002
+		      "    locgrhe %[R_TMP],%[R_LEN]\n\t"		\
8ae002
+		      "    locghihe %[R_LEN],0\n\t"			\
8ae002
+		      "    j 11f\n\t"					\
8ae002
+		      /* v20: Vector string range compare values.  */	\
8ae002
+		      "9:  .short 0xd800,0xe000,0x0,0x0,0x0,0x0,0x0,0x0\n\t" \
8ae002
+		      /* v21: Vector string range compare control-bits.	\
8ae002
+			 element 0: =>; element 1: <  */		\
8ae002
+		      "    .short 0xa000,0x4000,0x0,0x0,0x0,0x0,0x0,0x0\n\t" \
8ae002
+		      /* Found an element: ch >= 0xd800 && ch < 0xe000  */ \
8ae002
+		      "10: vlgvb %[R_TMP],%%v19,7\n\t"			\
8ae002
+		      "11: la %[R_IN],0(%[R_TMP],%[R_IN])\n\t"		\
8ae002
+		      "    sll %[R_TMP],1\n\t"				\
8ae002
+		      "    lgr %[R_TMP2],%[R_TMP]\n\t"			\
8ae002
+		      "    ahi %[R_TMP],-1\n\t"				\
8ae002
+		      "    jl 20f\n\t"					\
8ae002
+		      "    vstl %%v17,%[R_TMP],0(%[R_OUT])\n\t"		\
8ae002
+		      "    ahi %[R_TMP],-16\n\t"			\
8ae002
+		      "    jl 19f\n\t"					\
8ae002
+		      "    vstl %%v18,%[R_TMP],16(%[R_OUT])\n\t"	\
8ae002
+		      "19: la %[R_OUT],0(%[R_TMP2],%[R_OUT])\n\t"	\
8ae002
+		      "20: \n\t"					\
8ae002
+		      ".machine pop"					\
8ae002
+		      : /* outputs */ [R_OUT] "+a" (outptr)		\
8ae002
+			, [R_IN] "+a" (inptr)				\
8ae002
+			, [R_TMP] "=a" (tmp)				\
8ae002
+			, [R_TMP2] "=a" (tmp2)				\
8ae002
+			, [R_LEN] "+d" (len)				\
8ae002
+		      : /* inputs */					\
8ae002
+		      : /* clobber list*/ "memory", "cc"		\
8ae002
+			ASM_CLOBBER_VR ("v16") ASM_CLOBBER_VR ("v17")	\
8ae002
+			ASM_CLOBBER_VR ("v18") ASM_CLOBBER_VR ("v19")	\
8ae002
+			ASM_CLOBBER_VR ("v20") ASM_CLOBBER_VR ("v21")	\
8ae002
+		      );						\
8ae002
+    if (len > 0)							\
8ae002
+      {									\
8ae002
+	/* Found an invalid character at next input-char.  */		\
8ae002
+	BODY_ORIG_ERROR							\
8ae002
+      }									\
8ae002
+  }
8ae002
+
8ae002
+# define LOOP_NEED_FLAGS
8ae002
+# include <iconv/loop.c>
8ae002
+# include <iconv/skeleton.c>
8ae002
+# undef BODY_ORIG
8ae002
+# undef BODY_ORIG_ERROR
8ae002
+ICONV_VX_IFUNC (__gconv_transform_ucs2_internal)
8ae002
+
8ae002
+/* Convert from UCS2 in other endianness to the internal (UCS4-like) format. */
8ae002
+# define DEFINE_INIT		0
8ae002
+# define DEFINE_FINI		0
8ae002
+# define MIN_NEEDED_FROM	2
8ae002
+# define MIN_NEEDED_TO		4
8ae002
+# define FROM_DIRECTION		1
8ae002
+# define FROM_LOOP		ICONV_VX_NAME (ucs2reverse_internal_loop)
8ae002
+# define TO_LOOP		ICONV_VX_NAME (ucs2reverse_internal_loop) /* This is not used.*/
8ae002
+# define FUNCTION_NAME		ICONV_VX_NAME (__gconv_transform_ucs2reverse_internal)
8ae002
+# define ONE_DIRECTION		1
8ae002
+
8ae002
+# define MIN_NEEDED_INPUT	MIN_NEEDED_FROM
8ae002
+# define MIN_NEEDED_OUTPUT	MIN_NEEDED_TO
8ae002
+# define LOOPFCT		FROM_LOOP
8ae002
+# define BODY_ORIG_ERROR						\
8ae002
+  /* Surrogate characters in UCS-2 input are not valid.  Reject		\
8ae002
+     them.  (Catching this here is not security relevant.)  */		\
8ae002
+  if (! ignore_errors_p ())						\
8ae002
+    {									\
8ae002
+      result = __GCONV_ILLEGAL_INPUT;					\
8ae002
+      break;								\
8ae002
+    }									\
8ae002
+  inptr += 2;								\
8ae002
+  ++*irreversible;							\
8ae002
+  continue;
8ae002
+
8ae002
+# define BODY_ORIG \
8ae002
+  {									\
8ae002
+    uint16_t u1 = bswap_16 (get16 (inptr));				\
8ae002
+									\
8ae002
+    if (__glibc_unlikely (u1 >= 0xd800 && u1 < 0xe000))			\
8ae002
+      {									\
8ae002
+	BODY_ORIG_ERROR							\
8ae002
+      }									\
8ae002
+									\
8ae002
+    *((uint32_t *) outptr) = u1;					\
8ae002
+    outptr += sizeof (uint32_t);					\
8ae002
+    inptr += 2;								\
8ae002
+  }
8ae002
+# define BODY								\
8ae002
+  {									\
8ae002
+    size_t len, tmp, tmp2;						\
8ae002
+    len = MIN ((inend - inptr) / 2, (outend - outptr) / 4);		\
8ae002
+    __asm__ volatile (".machine push\n\t"				\
8ae002
+		      ".machine \"z13\"\n\t"				\
8ae002
+		      ".machinemode \"zarch_nohighgprs\"\n\t"		\
8ae002
+		      CONVERT_32BIT_SIZE_T ([R_LEN])			\
8ae002
+		      /* Setup to check for ch >= 0xd800 && ch < 0xe000.  */ \
8ae002
+		      "    larl %[R_TMP],9f\n\t"			\
8ae002
+		      "    vlm %%v20,%%v22,0(%[R_TMP])\n\t"		\
8ae002
+		      "    srlg %[R_TMP],%[R_LEN],3\n\t"		\
8ae002
+		      "    clgije %[R_TMP],0,1f\n\t"			\
8ae002
+		      /* Process 16byte (8char) blocks.  */		\
8ae002
+		      "0:  vl %%v16,0(%[R_IN])\n\t"			\
8ae002
+		      "    vperm %%v16,%%v16,%%v16,%%v22\n\t"		\
8ae002
+		      "    vstrchs %%v19,%%v16,%%v20,%%v21\n\t"		\
8ae002
+		      /* Enlarge UCS2 to UCS4.  */			\
8ae002
+		      "    vuplhh %%v17,%%v16\n\t"			\
8ae002
+		      "    vupllh %%v18,%%v16\n\t"			\
8ae002
+		      "    jno 10f\n\t"					\
8ae002
+		      /* Store 32bytes to buf_out.  */			\
8ae002
+		      "    vstm %%v17,%%v18,0(%[R_OUT])\n\t"		\
8ae002
+		      "    la %[R_IN],16(%[R_IN])\n\t"			\
8ae002
+		      "    la %[R_OUT],32(%[R_OUT])\n\t"		\
8ae002
+		      "    brctg %[R_TMP],0b\n\t"			\
8ae002
+		      "    llgfr %[R_LEN],%[R_LEN]\n\t"			\
8ae002
+		      "    nilf %[R_LEN],7\n\t"				\
8ae002
+		      /* Process <16bytes.  */				\
8ae002
+		      "1:  sll %[R_LEN],1\n\t"				\
8ae002
+		      "    ahik %[R_TMP],%[R_LEN],-1\n\t"		\
8ae002
+		      "    jl 20f\n\t" /* No further bytes available.  */ \
8ae002
+		      "    vll %%v16,%[R_TMP],0(%[R_IN])\n\t"		\
8ae002
+		      "    vperm %%v16,%%v16,%%v16,%%v22\n\t"		\
8ae002
+		      "    vstrchs %%v19,%%v16,%%v20,%%v21\n\t"		\
8ae002
+		      /* Enlarge UCS2 to UCS4.  */			\
8ae002
+		      "    vuplhh %%v17,%%v16\n\t"			\
8ae002
+		      "    vupllh %%v18,%%v16\n\t"			\
8ae002
+		      "    vlgvb %[R_TMP],%%v19,7\n\t"			\
8ae002
+		      "    clr %[R_TMP],%[R_LEN]\n\t"			\
8ae002
+		      "    locgrhe %[R_TMP],%[R_LEN]\n\t"		\
8ae002
+		      "    locghihe %[R_LEN],0\n\t"			\
8ae002
+		      "    j 11f\n\t"					\
8ae002
+		      /* v20: Vector string range compare values.  */	\
8ae002
+		      "9:  .short 0xd800,0xe000,0x0,0x0,0x0,0x0,0x0,0x0\n\t" \
8ae002
+		      /* v21: Vector string range compare control-bits.	\
8ae002
+			 element 0: =>; element 1: <  */		\
8ae002
+		      "    .short 0xa000,0x4000,0x0,0x0,0x0,0x0,0x0,0x0\n\t" \
8ae002
+		      /* v22: Vector permute mask.  */			\
8ae002
+		      "    .short 0x0100,0x0302,0x0504,0x0706\n\t"	\
8ae002
+		      "    .short 0x0908,0x0b0a,0x0d0c,0x0f0e\n\t"	\
8ae002
+		      /* Found an element: ch >= 0xd800 && ch < 0xe000  */ \
8ae002
+		      "10: vlgvb %[R_TMP],%%v19,7\n\t"			\
8ae002
+		      "11: la %[R_IN],0(%[R_TMP],%[R_IN])\n\t"		\
8ae002
+		      "    sll %[R_TMP],1\n\t"				\
8ae002
+		      "    lgr %[R_TMP2],%[R_TMP]\n\t"			\
8ae002
+		      "    ahi %[R_TMP],-1\n\t"				\
8ae002
+		      "    jl 20f\n\t"					\
8ae002
+		      "    vstl %%v17,%[R_TMP],0(%[R_OUT])\n\t"		\
8ae002
+		      "    ahi %[R_TMP],-16\n\t"			\
8ae002
+		      "    jl 19f\n\t"					\
8ae002
+		      "    vstl %%v18,%[R_TMP],16(%[R_OUT])\n\t"	\
8ae002
+		      "19: la %[R_OUT],0(%[R_TMP2],%[R_OUT])\n\t"	\
8ae002
+		      "20: \n\t"					\
8ae002
+		      ".machine pop"					\
8ae002
+		      : /* outputs */ [R_OUT] "+a" (outptr)		\
8ae002
+			, [R_IN] "+a" (inptr)				\
8ae002
+			, [R_TMP] "=a" (tmp)				\
8ae002
+			, [R_TMP2] "=a" (tmp2)				\
8ae002
+			, [R_LEN] "+d" (len)				\
8ae002
+		      : /* inputs */					\
8ae002
+		      : /* clobber list*/ "memory", "cc"		\
8ae002
+			ASM_CLOBBER_VR ("v16") ASM_CLOBBER_VR ("v17")	\
8ae002
+			ASM_CLOBBER_VR ("v18") ASM_CLOBBER_VR ("v19")	\
8ae002
+			ASM_CLOBBER_VR ("v20") ASM_CLOBBER_VR ("v21")	\
8ae002
+			ASM_CLOBBER_VR ("v22")				\
8ae002
+		      );						\
8ae002
+    if (len > 0)							\
8ae002
+      {									\
8ae002
+	/* Found an invalid character at next input-char.  */		\
8ae002
+	BODY_ORIG_ERROR							\
8ae002
+      }									\
8ae002
+  }
8ae002
+# define LOOP_NEED_FLAGS
8ae002
+# include <iconv/loop.c>
8ae002
+# include <iconv/skeleton.c>
8ae002
+# undef BODY_ORIG
8ae002
+# undef BODY_ORIG_ERROR
8ae002
+ICONV_VX_IFUNC (__gconv_transform_ucs2reverse_internal)
8ae002
+
8ae002
+/* Convert from the internal (UCS4-like) format to UCS2.  */
8ae002
+#define DEFINE_INIT		0
8ae002
+#define DEFINE_FINI		0
8ae002
+#define MIN_NEEDED_FROM		4
8ae002
+#define MIN_NEEDED_TO		2
8ae002
+#define FROM_DIRECTION		1
8ae002
+#define FROM_LOOP		ICONV_VX_NAME (internal_ucs2_loop)
8ae002
+#define TO_LOOP			ICONV_VX_NAME (internal_ucs2_loop) /* This is not used.  */
8ae002
+#define FUNCTION_NAME		ICONV_VX_NAME (__gconv_transform_internal_ucs2)
8ae002
+#define ONE_DIRECTION		1
8ae002
+
8ae002
+#define MIN_NEEDED_INPUT	MIN_NEEDED_FROM
8ae002
+#define MIN_NEEDED_OUTPUT	MIN_NEEDED_TO
8ae002
+#define LOOPFCT			FROM_LOOP
8ae002
+#define BODY_ORIG							\
8ae002
+  {									\
8ae002
+    uint32_t val = *((const uint32_t *) inptr);				\
8ae002
+									\
8ae002
+    if (__glibc_unlikely (val >= 0x10000))				\
8ae002
+      {									\
8ae002
+	UNICODE_TAG_HANDLER (val, 4);					\
8ae002
+	STANDARD_TO_LOOP_ERR_HANDLER (4);				\
8ae002
+      }									\
8ae002
+    else if (__glibc_unlikely (val >= 0xd800 && val < 0xe000))		\
8ae002
+      {									\
8ae002
+	/* Surrogate characters in UCS-4 input are not valid.		\
8ae002
+	   We must catch this, because the UCS-2 output might be	\
8ae002
+	   interpreted as UTF-16 by other programs.  If we let		\
8ae002
+	   surrogates pass through, attackers could make a security	\
8ae002
+	   hole exploit by synthesizing any desired plane 1-16		\
8ae002
+	   character.  */						\
8ae002
+	result = __GCONV_ILLEGAL_INPUT;					\
8ae002
+	if (! ignore_errors_p ())					\
8ae002
+	  break;							\
8ae002
+	inptr += 4;							\
8ae002
+	++*irreversible;						\
8ae002
+	continue;							\
8ae002
+      }									\
8ae002
+    else								\
8ae002
+      {									\
8ae002
+	put16 (outptr, val);						\
8ae002
+	outptr += sizeof (uint16_t);					\
8ae002
+	inptr += 4;							\
8ae002
+      }									\
8ae002
+  }
8ae002
+# define BODY								\
8ae002
+  {									\
8ae002
+    if (__builtin_expect (inend - inptr < 32, 1)			\
8ae002
+	|| outend - outptr < 16)					\
8ae002
+      /* Convert remaining bytes with c code.  */			\
8ae002
+      BODY_ORIG								\
8ae002
+    else								\
8ae002
+      {									\
8ae002
+	/* Convert in 32 byte blocks.  */				\
8ae002
+	size_t loop_count = (inend - inptr) / 32;			\
8ae002
+	size_t tmp, tmp2;						\
8ae002
+	if (loop_count > (outend - outptr) / 16)			\
8ae002
+	  loop_count = (outend - outptr) / 16;				\
8ae002
+	__asm__ volatile (".machine push\n\t"				\
8ae002
+			  ".machine \"z13\"\n\t"			\
8ae002
+			  ".machinemode \"zarch_nohighgprs\"\n\t"	\
8ae002
+			  CONVERT_32BIT_SIZE_T ([R_LI])			\
8ae002
+			  "    larl %[R_I],3f\n\t"			\
8ae002
+			  "    vlm %%v20,%%v23,0(%[R_I])\n\t"		\
8ae002
+			  "0:  \n\t"					\
8ae002
+			  "    vlm %%v16,%%v17,0(%[R_IN])\n\t"		\
8ae002
+			  /* Shorten UCS4 to UCS2.  */			\
8ae002
+			  "    vpkf %%v18,%%v16,%%v17\n\t"		\
8ae002
+			  "    vstrcfs %%v19,%%v16,%%v20,%%v21\n\t"	\
8ae002
+			  "    jno 11f\n\t"				\
8ae002
+			  "1:  vstrcfs %%v19,%%v17,%%v20,%%v21\n\t"	\
8ae002
+			  "    jno 10f\n\t"				\
8ae002
+			  /* Store 16bytes to buf_out.  */		\
8ae002
+			  "2:  vst %%v18,0(%[R_OUT])\n\t"		\
8ae002
+			  "    la %[R_IN],32(%[R_IN])\n\t"		\
8ae002
+			  "    la %[R_OUT],16(%[R_OUT])\n\t"		\
8ae002
+			  "    brctg %[R_LI],0b\n\t"			\
8ae002
+			  "    j 20f\n\t"				\
8ae002
+			  /* Setup to check for ch >= 0xd800. (v20, v21)  */ \
8ae002
+			  "3:  .long 0xd800,0xd800,0x0,0x0\n\t"		\
8ae002
+			  "    .long 0xa0000000,0xa0000000,0x0,0x0\n\t"	\
8ae002
+			  /* Setup to check for ch >= 0xe000		\
8ae002
+			     && ch < 0x10000. (v22,v23)  */		\
8ae002
+			  "    .long 0xe000,0x10000,0x0,0x0\n\t"	\
8ae002
+			  "    .long 0xa0000000,0x40000000,0x0,0x0\n\t"	\
8ae002
+			  /* v16 contains only valid chars. Check in v17: \
8ae002
+			     ch >= 0xe000 && ch <= 0xffff.  */		\
8ae002
+			  "10: vstrcfs %%v19,%%v17,%%v22,%%v23,8\n\t"	\
8ae002
+			  "    jo 2b\n\t" /* All ch's in this range, proceed.   */ \
8ae002
+			  "    lghi %[R_TMP],16\n\t"			\
8ae002
+			  "    j 12f\n\t"				\
8ae002
+			  /* Maybe v16 contains invalid chars.		\
8ae002
+			     Check ch >= 0xe000 && ch <= 0xffff.  */	\
8ae002
+			  "11: vstrcfs %%v19,%%v16,%%v22,%%v23,8\n\t"	\
8ae002
+			  "    jo 1b\n\t" /* All ch's in this range, proceed.   */ \
8ae002
+			  "    lghi %[R_TMP],0\n\t"			\
8ae002
+			  "12: vlgvb %[R_I],%%v19,7\n\t"		\
8ae002
+			  "    agr %[R_I],%[R_TMP]\n\t"			\
8ae002
+			  "    la %[R_IN],0(%[R_I],%[R_IN])\n\t"	\
8ae002
+			  "    srl %[R_I],1\n\t"			\
8ae002
+			  "    ahi %[R_I],-1\n\t"			\
8ae002
+			  "    jl 20f\n\t"				\
8ae002
+			  "    vstl %%v18,%[R_I],0(%[R_OUT])\n\t"	\
8ae002
+			  "    la %[R_OUT],1(%[R_I],%[R_OUT])\n\t"	\
8ae002
+			  "20:\n\t"					\
8ae002
+			  ".machine pop"				\
8ae002
+			  : /* outputs */ [R_OUT] "+a" (outptr)		\
8ae002
+			    , [R_IN] "+a" (inptr)			\
8ae002
+			    , [R_LI] "+d" (loop_count)			\
8ae002
+			    , [R_I] "=a" (tmp2)				\
8ae002
+			    , [R_TMP] "=d" (tmp)			\
8ae002
+			  : /* inputs */				\
8ae002
+			  : /* clobber list*/ "memory", "cc"		\
8ae002
+			    ASM_CLOBBER_VR ("v16") ASM_CLOBBER_VR ("v17") \
8ae002
+			    ASM_CLOBBER_VR ("v18") ASM_CLOBBER_VR ("v19") \
8ae002
+			    ASM_CLOBBER_VR ("v20") ASM_CLOBBER_VR ("v21") \
8ae002
+			    ASM_CLOBBER_VR ("v22") ASM_CLOBBER_VR ("v23") \
8ae002
+			  );						\
8ae002
+	if (loop_count > 0)						\
8ae002
+	  {								\
8ae002
+	    /* Found an invalid character at next character.  */	\
8ae002
+	    BODY_ORIG							\
8ae002
+	  }								\
8ae002
+      }									\
8ae002
+  }
8ae002
+#define LOOP_NEED_FLAGS
8ae002
+#include <iconv/loop.c>
8ae002
+#include <iconv/skeleton.c>
8ae002
+# undef BODY_ORIG
8ae002
+ICONV_VX_IFUNC (__gconv_transform_internal_ucs2)
8ae002
+
8ae002
+/* Convert from the internal (UCS4-like) format to UCS2 in other endianness. */
8ae002
+#define DEFINE_INIT		0
8ae002
+#define DEFINE_FINI		0
8ae002
+#define MIN_NEEDED_FROM		4
8ae002
+#define MIN_NEEDED_TO		2
8ae002
+#define FROM_DIRECTION		1
8ae002
+#define FROM_LOOP		ICONV_VX_NAME (internal_ucs2reverse_loop)
8ae002
+#define TO_LOOP			ICONV_VX_NAME (internal_ucs2reverse_loop)/* This is not used.*/
8ae002
+#define FUNCTION_NAME		ICONV_VX_NAME (__gconv_transform_internal_ucs2reverse)
8ae002
+#define ONE_DIRECTION		1
8ae002
+
8ae002
+#define MIN_NEEDED_INPUT	MIN_NEEDED_FROM
8ae002
+#define MIN_NEEDED_OUTPUT	MIN_NEEDED_TO
8ae002
+#define LOOPFCT			FROM_LOOP
8ae002
+#define BODY_ORIG							\
8ae002
+  {									\
8ae002
+    uint32_t val = *((const uint32_t *) inptr);				\
8ae002
+    if (__glibc_unlikely (val >= 0x10000))				\
8ae002
+      {									\
8ae002
+	UNICODE_TAG_HANDLER (val, 4);					\
8ae002
+	STANDARD_TO_LOOP_ERR_HANDLER (4);				\
8ae002
+      }									\
8ae002
+    else if (__glibc_unlikely (val >= 0xd800 && val < 0xe000))		\
8ae002
+      {									\
8ae002
+	/* Surrogate characters in UCS-4 input are not valid.		\
8ae002
+	   We must catch this, because the UCS-2 output might be	\
8ae002
+	   interpreted as UTF-16 by other programs.  If we let		\
8ae002
+	   surrogates pass through, attackers could make a security	\
8ae002
+	   hole exploit by synthesizing any desired plane 1-16		\
8ae002
+	   character.  */						\
8ae002
+	if (! ignore_errors_p ())					\
8ae002
+	  {								\
8ae002
+	    result = __GCONV_ILLEGAL_INPUT;				\
8ae002
+	    break;							\
8ae002
+	  }								\
8ae002
+	inptr += 4;							\
8ae002
+	++*irreversible;						\
8ae002
+	continue;							\
8ae002
+      }									\
8ae002
+    else								\
8ae002
+      {									\
8ae002
+	put16 (outptr, bswap_16 (val));					\
8ae002
+	outptr += sizeof (uint16_t);					\
8ae002
+	inptr += 4;							\
8ae002
+      }									\
8ae002
+  }
8ae002
+# define BODY								\
8ae002
+  {									\
8ae002
+    if (__builtin_expect (inend - inptr < 32, 1)			\
8ae002
+	|| outend - outptr < 16)					\
8ae002
+      /* Convert remaining bytes with c code.  */			\
8ae002
+      BODY_ORIG								\
8ae002
+    else								\
8ae002
+      {									\
8ae002
+	/* Convert in 32 byte blocks.  */				\
8ae002
+	size_t loop_count = (inend - inptr) / 32;			\
8ae002
+	size_t tmp, tmp2;						\
8ae002
+	if (loop_count > (outend - outptr) / 16)			\
8ae002
+	  loop_count = (outend - outptr) / 16;				\
8ae002
+	__asm__ volatile (".machine push\n\t"				\
8ae002
+			  ".machine \"z13\"\n\t"			\
8ae002
+			  ".machinemode \"zarch_nohighgprs\"\n\t"	\
8ae002
+			  CONVERT_32BIT_SIZE_T ([R_LI])			\
8ae002
+			  "    larl %[R_I],3f\n\t"			\
8ae002
+			  "    vlm %%v20,%%v24,0(%[R_I])\n\t"		\
8ae002
+			  "0:  \n\t"					\
8ae002
+			  "    vlm %%v16,%%v17,0(%[R_IN])\n\t"		\
8ae002
+			  /* Shorten UCS4 to UCS2 and byteswap.  */	\
8ae002
+			  "    vpkf %%v18,%%v16,%%v17\n\t"		\
8ae002
+			  "    vperm %%v18,%%v18,%%v18,%%v24\n\t"	\
8ae002
+			  "    vstrcfs %%v19,%%v16,%%v20,%%v21\n\t"	\
8ae002
+			  "    jno 11f\n\t"				\
8ae002
+			  "1:  vstrcfs %%v19,%%v17,%%v20,%%v21\n\t"	\
8ae002
+			  "    jno 10f\n\t"				\
8ae002
+			  /* Store 16bytes to buf_out.  */		\
8ae002
+			  "2: vst %%v18,0(%[R_OUT])\n\t"		\
8ae002
+			  "    la %[R_IN],32(%[R_IN])\n\t"		\
8ae002
+			  "    la %[R_OUT],16(%[R_OUT])\n\t"		\
8ae002
+			  "    brctg %[R_LI],0b\n\t"			\
8ae002
+			  "    j 20f\n\t"				\
8ae002
+			  /* Setup to check for ch >= 0xd800. (v20, v21)  */ \
8ae002
+			  "3: .long 0xd800,0xd800,0x0,0x0\n\t"		\
8ae002
+			  "    .long 0xa0000000,0xa0000000,0x0,0x0\n\t"	\
8ae002
+			  /* Setup to check for ch >= 0xe000		\
8ae002
+			     && ch < 0x10000. (v22,v23)  */		\
8ae002
+			  "    .long 0xe000,0x10000,0x0,0x0\n\t"	\
8ae002
+			  "    .long 0xa0000000,0x40000000,0x0,0x0\n\t"	\
8ae002
+			  /* Vector permute mask (v24)  */		\
8ae002
+			  "    .short 0x0100,0x0302,0x0504,0x0706\n\t"	\
8ae002
+			  "    .short 0x0908,0x0b0a,0x0d0c,0x0f0e\n\t"	\
8ae002
+			  /* v16 contains only valid chars. Check in v17: \
8ae002
+			     ch >= 0xe000 && ch <= 0xffff.  */		\
8ae002
+			  "10: vstrcfs %%v19,%%v17,%%v22,%%v23,8\n\t"	\
8ae002
+			  "    jo 2b\n\t" /* All ch's in this range, proceed.  */ \
8ae002
+			  "    lghi %[R_TMP],16\n\t"			\
8ae002
+			  "    j 12f\n\t"				\
8ae002
+			  /* Maybe v16 contains invalid chars.		\
8ae002
+			     Check ch >= 0xe000 && ch <= 0xffff.  */	\
8ae002
+			  "11: vstrcfs %%v19,%%v16,%%v22,%%v23,8\n\t"	\
8ae002
+			  "    jo 1b\n\t" /* All ch's in this range, proceed.  */ \
8ae002
+			  "    lghi %[R_TMP],0\n\t"			\
8ae002
+			  "12: vlgvb %[R_I],%%v19,7\n\t"		\
8ae002
+			  "    agr %[R_I],%[R_TMP]\n\t"			\
8ae002
+			  "    la %[R_IN],0(%[R_I],%[R_IN])\n\t"	\
8ae002
+			  "    srl %[R_I],1\n\t"			\
8ae002
+			  "    ahi %[R_I],-1\n\t"			\
8ae002
+			  "    jl 20f\n\t"				\
8ae002
+			  "    vstl %%v18,%[R_I],0(%[R_OUT])\n\t"	\
8ae002
+			  "    la %[R_OUT],1(%[R_I],%[R_OUT])\n\t"	\
8ae002
+			  "20:\n\t"					\
8ae002
+			  ".machine pop"				\
8ae002
+			  : /* outputs */ [R_OUT] "+a" (outptr)		\
8ae002
+			    , [R_IN] "+a" (inptr)			\
8ae002
+			    , [R_LI] "+d" (loop_count)			\
8ae002
+			    , [R_I] "=a" (tmp2)				\
8ae002
+			    , [R_TMP] "=d" (tmp)			\
8ae002
+			  : /* inputs */				\
8ae002
+			  : /* clobber list*/ "memory", "cc"		\
8ae002
+			    ASM_CLOBBER_VR ("v16") ASM_CLOBBER_VR ("v17") \
8ae002
+			    ASM_CLOBBER_VR ("v18") ASM_CLOBBER_VR ("v19") \
8ae002
+			    ASM_CLOBBER_VR ("v20") ASM_CLOBBER_VR ("v21") \
8ae002
+			    ASM_CLOBBER_VR ("v22") ASM_CLOBBER_VR ("v23") \
8ae002
+			    ASM_CLOBBER_VR ("v24")			\
8ae002
+			  );						\
8ae002
+	if (loop_count > 0)						\
8ae002
+	  {								\
8ae002
+	    /* Found an invalid character at next character.  */	\
8ae002
+	    BODY_ORIG							\
8ae002
+	  }								\
8ae002
+      }									\
8ae002
+  }
8ae002
+#define LOOP_NEED_FLAGS
8ae002
+#include <iconv/loop.c>
8ae002
+#include <iconv/skeleton.c>
8ae002
+# undef BODY_ORIG
8ae002
+ICONV_VX_IFUNC (__gconv_transform_internal_ucs2reverse)
8ae002
+
8ae002
+
8ae002
+#else
8ae002
+/* Generate the internal transformations without ifunc if build environment
8ae002
+   lacks vector support. Instead simply include the common version.  */
8ae002
+# include <iconv/gconv_simple.c>
8ae002
+#endif /* !defined HAVE_S390_VX_ASM_SUPPORT */
8ae002
-- 
8ae002
1.8.3.1
8ae002