| From 832572eac8a661d25efe0f2bcc6a861e2c29c3b8 Mon Sep 17 00:00:00 2001 |
| From: Stefan Liebler <stli@linux.vnet.ibm.com> |
| Date: Mon, 7 Nov 2016 15:50:46 +0100 |
| Subject: [PATCH 05/17] S390: Optimize iso-8859-1 to ibm037 iconv-module. |
| |
| Upstream commit 81c6380887c6d62c56e5f0f85a241f759f58b2fd |
| |
| This patch reworks the s390 specific module which used the z900 |
| translate one to one instruction. Now the g5 translate instruction is used, |
| because it outperforms the troo instruction. |
| |
| ChangeLog: |
| |
| * sysdeps/s390/s390-64/iso-8859-1_cp037_z900.c (TROO_LOOP): |
| Rename to TR_LOOP and usage of tr instead of troo instruction. |
| |
| sysdeps/s390/s390-64/iso-8859-1_cp037_z900.c | 95 +++++++++++++++++----------- |
| 1 file changed, 57 insertions(+), 38 deletions(-) |
| |
| diff --git a/sysdeps/s390/s390-64/iso-8859-1_cp037_z900.c b/sysdeps/s390/s390-64/iso-8859-1_cp037_z900.c |
| index 58641f5..3b63e6a 100644 |
| |
| |
| @@ -1,8 +1,7 @@ |
| /* Conversion between ISO 8859-1 and IBM037. |
| |
| - This module uses the Z900 variant of the Translate One To One |
| - instruction. |
| - Copyright (C) 1997-2009 Free Software Foundation, Inc. |
| + This module uses the translate instruction. |
| + Copyright (C) 1997-2016 Free Software Foundation, Inc. |
| |
| Author: Andreas Krebbel <Andreas.Krebbel@de.ibm.com> |
| Based on the work by Ulrich Drepper <drepper@cygnus.com>, 1997. |
| @@ -176,50 +175,70 @@ __attribute__ ((aligned (8))) = |
| #define MIN_NEEDED_FROM 1 |
| #define MIN_NEEDED_TO 1 |
| |
| -/* The Z900 variant of troo forces us to always specify a test |
| - character which ends the translation. So if we run into the |
| - situation where the translation has been interrupted due to the |
| - test character we translate the character by hand and jump back |
| - into the instruction. */ |
| - |
| -#define TROO_LOOP(TABLE) \ |
| +#define TR_LOOP(TABLE) \ |
| { \ |
| - register const unsigned char test asm ("0") = 0; \ |
| - register const unsigned char *pTable asm ("1") = TABLE; \ |
| - register unsigned char *pOutput asm ("2") = outptr; \ |
| - register uint64_t length asm ("3"); \ |
| - const unsigned char* pInput = inptr; \ |
| - uint64_t tmp; \ |
| - \ |
| - length = (inend - inptr < outend - outptr \ |
| - ? inend - inptr : outend - outptr); \ |
| + size_t length = (inend - inptr < outend - outptr \ |
| + ? inend - inptr : outend - outptr); \ |
| \ |
| - asm volatile ("0: \n\t" \ |
| - " troo %0,%1 \n\t" \ |
| - " jz 1f \n\t" \ |
| - " jo 0b \n\t" \ |
| - " llgc %3,0(%1) \n\t" \ |
| - " la %3,0(%3,%4) \n\t" \ |
| - " mvc 0(1,%0),0(%3) \n\t" \ |
| - " aghi %1,1 \n\t" \ |
| - " aghi %0,1 \n\t" \ |
| - " aghi %2,-1 \n\t" \ |
| - " j 0b \n\t" \ |
| - "1: \n" \ |
| + /* Process in 256 byte blocks. */ \ |
| + if (__builtin_expect (length >= 256, 0)) \ |
| + { \ |
| + size_t blocks = length / 256; \ |
| + __asm__ __volatile__("0: mvc 0(256,%[R_OUT]),0(%[R_IN])\n\t" \ |
| + " tr 0(256,%[R_OUT]),0(%[R_TBL])\n\t" \ |
| + " la %[R_IN],256(%[R_IN])\n\t" \ |
| + " la %[R_OUT],256(%[R_OUT])\n\t" \ |
| + " brctg %[R_LI],0b\n\t" \ |
| + : /* outputs */ [R_IN] "+a" (inptr) \ |
| + , [R_OUT] "+a" (outptr), [R_LI] "+d" (blocks) \ |
| + : /* inputs */ [R_TBL] "a" (TABLE) \ |
| + : /* clobber list */ "memory" \ |
| + ); \ |
| + length = length % 256; \ |
| + } \ |
| \ |
| - : "+a" (pOutput), "+a" (pInput), "+d" (length), "=&a" (tmp) \ |
| - : "a" (pTable), "d" (test) \ |
| - : "cc"); \ |
| + /* Process remaining 0...248 bytes in 8byte blocks. */ \ |
| + if (length >= 8) \ |
| + { \ |
| + size_t blocks = length / 8; \ |
| + for (int i = 0; i < blocks; i++) \ |
| + { \ |
| + outptr[0] = TABLE[inptr[0]]; \ |
| + outptr[1] = TABLE[inptr[1]]; \ |
| + outptr[2] = TABLE[inptr[2]]; \ |
| + outptr[3] = TABLE[inptr[3]]; \ |
| + outptr[4] = TABLE[inptr[4]]; \ |
| + outptr[5] = TABLE[inptr[5]]; \ |
| + outptr[6] = TABLE[inptr[6]]; \ |
| + outptr[7] = TABLE[inptr[7]]; \ |
| + inptr += 8; \ |
| + outptr += 8; \ |
| + } \ |
| + length = length % 8; \ |
| + } \ |
| \ |
| - inptr = pInput; \ |
| - outptr = pOutput; \ |
| + /* Process remaining 0...7 bytes. */ \ |
| + switch (length) \ |
| + { \ |
| + case 7: outptr[6] = TABLE[inptr[6]]; \ |
| + case 6: outptr[5] = TABLE[inptr[5]]; \ |
| + case 5: outptr[4] = TABLE[inptr[4]]; \ |
| + case 4: outptr[3] = TABLE[inptr[3]]; \ |
| + case 3: outptr[2] = TABLE[inptr[2]]; \ |
| + case 2: outptr[1] = TABLE[inptr[1]]; \ |
| + case 1: outptr[0] = TABLE[inptr[0]]; \ |
| + case 0: break; \ |
| + } \ |
| + inptr += length; \ |
| + outptr += length; \ |
| } |
| |
| + |
| /* First define the conversion function from ISO 8859-1 to CP037. */ |
| #define MIN_NEEDED_INPUT MIN_NEEDED_FROM |
| #define MIN_NEEDED_OUTPUT MIN_NEEDED_TO |
| #define LOOPFCT FROM_LOOP |
| -#define BODY TROO_LOOP (table_iso8859_1_to_cp037) |
| +#define BODY TR_LOOP (table_iso8859_1_to_cp037) |
| |
| #include <iconv/loop.c> |
| |
| @@ -228,7 +247,7 @@ __attribute__ ((aligned (8))) = |
| #define MIN_NEEDED_INPUT MIN_NEEDED_TO |
| #define MIN_NEEDED_OUTPUT MIN_NEEDED_FROM |
| #define LOOPFCT TO_LOOP |
| -#define BODY TROO_LOOP (table_cp037_iso8859_1); |
| +#define BODY TR_LOOP (table_cp037_iso8859_1); |
| |
| #include <iconv/loop.c> |
| |
| -- |
| 1.8.3.1 |
| |