|
|
ce426f |
From 832572eac8a661d25efe0f2bcc6a861e2c29c3b8 Mon Sep 17 00:00:00 2001
|
|
|
ce426f |
From: Stefan Liebler <stli@linux.vnet.ibm.com>
|
|
|
ce426f |
Date: Mon, 7 Nov 2016 15:50:46 +0100
|
|
|
ce426f |
Subject: [PATCH 05/17] S390: Optimize iso-8859-1 to ibm037 iconv-module.
|
|
|
ce426f |
|
|
|
ce426f |
Upstream commit 81c6380887c6d62c56e5f0f85a241f759f58b2fd
|
|
|
ce426f |
|
|
|
ce426f |
This patch reworks the s390 specific module which used the z900
|
|
|
ce426f |
translate one to one instruction. Now the g5 translate instruction is used,
|
|
|
ce426f |
because it outperforms the troo instruction.
|
|
|
ce426f |
|
|
|
ce426f |
ChangeLog:
|
|
|
ce426f |
|
|
|
ce426f |
* sysdeps/s390/s390-64/iso-8859-1_cp037_z900.c (TROO_LOOP):
|
|
|
ce426f |
Rename to TR_LOOP and usage of tr instead of troo instruction.
|
|
|
ce426f |
---
|
|
|
ce426f |
sysdeps/s390/s390-64/iso-8859-1_cp037_z900.c | 95 +++++++++++++++++-----------
|
|
|
ce426f |
1 file changed, 57 insertions(+), 38 deletions(-)
|
|
|
ce426f |
|
|
|
ce426f |
diff --git a/sysdeps/s390/s390-64/iso-8859-1_cp037_z900.c b/sysdeps/s390/s390-64/iso-8859-1_cp037_z900.c
|
|
|
ce426f |
index 58641f5..3b63e6a 100644
|
|
|
ce426f |
--- a/sysdeps/s390/s390-64/iso-8859-1_cp037_z900.c
|
|
|
ce426f |
+++ b/sysdeps/s390/s390-64/iso-8859-1_cp037_z900.c
|
|
|
ce426f |
@@ -1,8 +1,7 @@
|
|
|
ce426f |
/* Conversion between ISO 8859-1 and IBM037.
|
|
|
ce426f |
|
|
|
ce426f |
- This module uses the Z900 variant of the Translate One To One
|
|
|
ce426f |
- instruction.
|
|
|
ce426f |
- Copyright (C) 1997-2009 Free Software Foundation, Inc.
|
|
|
ce426f |
+ This module uses the translate instruction.
|
|
|
ce426f |
+ Copyright (C) 1997-2016 Free Software Foundation, Inc.
|
|
|
ce426f |
|
|
|
ce426f |
Author: Andreas Krebbel <Andreas.Krebbel@de.ibm.com>
|
|
|
ce426f |
Based on the work by Ulrich Drepper <drepper@cygnus.com>, 1997.
|
|
|
ce426f |
@@ -176,50 +175,70 @@ __attribute__ ((aligned (8))) =
|
|
|
ce426f |
#define MIN_NEEDED_FROM 1
|
|
|
ce426f |
#define MIN_NEEDED_TO 1
|
|
|
ce426f |
|
|
|
ce426f |
-/* The Z900 variant of troo forces us to always specify a test
|
|
|
ce426f |
- character which ends the translation. So if we run into the
|
|
|
ce426f |
- situation where the translation has been interrupted due to the
|
|
|
ce426f |
- test character we translate the character by hand and jump back
|
|
|
ce426f |
- into the instruction. */
|
|
|
ce426f |
-
|
|
|
ce426f |
-#define TROO_LOOP(TABLE) \
|
|
|
ce426f |
+#define TR_LOOP(TABLE) \
|
|
|
ce426f |
{ \
|
|
|
ce426f |
- register const unsigned char test asm ("0") = 0; \
|
|
|
ce426f |
- register const unsigned char *pTable asm ("1") = TABLE; \
|
|
|
ce426f |
- register unsigned char *pOutput asm ("2") = outptr; \
|
|
|
ce426f |
- register uint64_t length asm ("3"); \
|
|
|
ce426f |
- const unsigned char* pInput = inptr; \
|
|
|
ce426f |
- uint64_t tmp; \
|
|
|
ce426f |
- \
|
|
|
ce426f |
- length = (inend - inptr < outend - outptr \
|
|
|
ce426f |
- ? inend - inptr : outend - outptr); \
|
|
|
ce426f |
+ size_t length = (inend - inptr < outend - outptr \
|
|
|
ce426f |
+ ? inend - inptr : outend - outptr); \
|
|
|
ce426f |
\
|
|
|
ce426f |
- asm volatile ("0: \n\t" \
|
|
|
ce426f |
- " troo %0,%1 \n\t" \
|
|
|
ce426f |
- " jz 1f \n\t" \
|
|
|
ce426f |
- " jo 0b \n\t" \
|
|
|
ce426f |
- " llgc %3,0(%1) \n\t" \
|
|
|
ce426f |
- " la %3,0(%3,%4) \n\t" \
|
|
|
ce426f |
- " mvc 0(1,%0),0(%3) \n\t" \
|
|
|
ce426f |
- " aghi %1,1 \n\t" \
|
|
|
ce426f |
- " aghi %0,1 \n\t" \
|
|
|
ce426f |
- " aghi %2,-1 \n\t" \
|
|
|
ce426f |
- " j 0b \n\t" \
|
|
|
ce426f |
- "1: \n" \
|
|
|
ce426f |
+ /* Process in 256 byte blocks. */ \
|
|
|
ce426f |
+ if (__builtin_expect (length >= 256, 0)) \
|
|
|
ce426f |
+ { \
|
|
|
ce426f |
+ size_t blocks = length / 256; \
|
|
|
ce426f |
+ __asm__ __volatile__("0: mvc 0(256,%[R_OUT]),0(%[R_IN])\n\t" \
|
|
|
ce426f |
+ " tr 0(256,%[R_OUT]),0(%[R_TBL])\n\t" \
|
|
|
ce426f |
+ " la %[R_IN],256(%[R_IN])\n\t" \
|
|
|
ce426f |
+ " la %[R_OUT],256(%[R_OUT])\n\t" \
|
|
|
ce426f |
+ " brctg %[R_LI],0b\n\t" \
|
|
|
ce426f |
+ : /* outputs */ [R_IN] "+a" (inptr) \
|
|
|
ce426f |
+ , [R_OUT] "+a" (outptr), [R_LI] "+d" (blocks) \
|
|
|
ce426f |
+ : /* inputs */ [R_TBL] "a" (TABLE) \
|
|
|
ce426f |
+ : /* clobber list */ "memory" \
|
|
|
ce426f |
+ ); \
|
|
|
ce426f |
+ length = length % 256; \
|
|
|
ce426f |
+ } \
|
|
|
ce426f |
\
|
|
|
ce426f |
- : "+a" (pOutput), "+a" (pInput), "+d" (length), "=&a" (tmp) \
|
|
|
ce426f |
- : "a" (pTable), "d" (test) \
|
|
|
ce426f |
- : "cc"); \
|
|
|
ce426f |
+ /* Process remaining 0...248 bytes in 8byte blocks. */ \
|
|
|
ce426f |
+ if (length >= 8) \
|
|
|
ce426f |
+ { \
|
|
|
ce426f |
+ size_t blocks = length / 8; \
|
|
|
ce426f |
+ for (int i = 0; i < blocks; i++) \
|
|
|
ce426f |
+ { \
|
|
|
ce426f |
+ outptr[0] = TABLE[inptr[0]]; \
|
|
|
ce426f |
+ outptr[1] = TABLE[inptr[1]]; \
|
|
|
ce426f |
+ outptr[2] = TABLE[inptr[2]]; \
|
|
|
ce426f |
+ outptr[3] = TABLE[inptr[3]]; \
|
|
|
ce426f |
+ outptr[4] = TABLE[inptr[4]]; \
|
|
|
ce426f |
+ outptr[5] = TABLE[inptr[5]]; \
|
|
|
ce426f |
+ outptr[6] = TABLE[inptr[6]]; \
|
|
|
ce426f |
+ outptr[7] = TABLE[inptr[7]]; \
|
|
|
ce426f |
+ inptr += 8; \
|
|
|
ce426f |
+ outptr += 8; \
|
|
|
ce426f |
+ } \
|
|
|
ce426f |
+ length = length % 8; \
|
|
|
ce426f |
+ } \
|
|
|
ce426f |
\
|
|
|
ce426f |
- inptr = pInput; \
|
|
|
ce426f |
- outptr = pOutput; \
|
|
|
ce426f |
+ /* Process remaining 0...7 bytes. */ \
|
|
|
ce426f |
+ switch (length) \
|
|
|
ce426f |
+ { \
|
|
|
ce426f |
+ case 7: outptr[6] = TABLE[inptr[6]]; \
|
|
|
ce426f |
+ case 6: outptr[5] = TABLE[inptr[5]]; \
|
|
|
ce426f |
+ case 5: outptr[4] = TABLE[inptr[4]]; \
|
|
|
ce426f |
+ case 4: outptr[3] = TABLE[inptr[3]]; \
|
|
|
ce426f |
+ case 3: outptr[2] = TABLE[inptr[2]]; \
|
|
|
ce426f |
+ case 2: outptr[1] = TABLE[inptr[1]]; \
|
|
|
ce426f |
+ case 1: outptr[0] = TABLE[inptr[0]]; \
|
|
|
ce426f |
+ case 0: break; \
|
|
|
ce426f |
+ } \
|
|
|
ce426f |
+ inptr += length; \
|
|
|
ce426f |
+ outptr += length; \
|
|
|
ce426f |
}
|
|
|
ce426f |
|
|
|
ce426f |
+
|
|
|
ce426f |
/* First define the conversion function from ISO 8859-1 to CP037. */
|
|
|
ce426f |
#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
|
|
|
ce426f |
#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
|
|
|
ce426f |
#define LOOPFCT FROM_LOOP
|
|
|
ce426f |
-#define BODY TROO_LOOP (table_iso8859_1_to_cp037)
|
|
|
ce426f |
+#define BODY TR_LOOP (table_iso8859_1_to_cp037)
|
|
|
ce426f |
|
|
|
ce426f |
#include <iconv/loop.c>
|
|
|
ce426f |
|
|
|
ce426f |
@@ -228,7 +247,7 @@ __attribute__ ((aligned (8))) =
|
|
|
ce426f |
#define MIN_NEEDED_INPUT MIN_NEEDED_TO
|
|
|
ce426f |
#define MIN_NEEDED_OUTPUT MIN_NEEDED_FROM
|
|
|
ce426f |
#define LOOPFCT TO_LOOP
|
|
|
ce426f |
-#define BODY TROO_LOOP (table_cp037_iso8859_1);
|
|
|
ce426f |
+#define BODY TR_LOOP (table_cp037_iso8859_1);
|
|
|
ce426f |
|
|
|
ce426f |
#include <iconv/loop.c>
|
|
|
ce426f |
|
|
|
ce426f |
--
|
|
|
ce426f |
1.8.3.1
|
|
|
ce426f |
|