| From 7dd60718b327b3eb6112ec3900750007b0259189 Mon Sep 17 00:00:00 2001 |
| From: raji <raji@oc4354787705.ibm.com> |
| Date: Tue, 14 Jun 2016 14:51:16 +0530 |
| Subject: [PATCH] powerpc: strcasecmp/strncasecmp optmization for power8 |
| |
| This implementation utilizes vectors to improve performance |
| compared to current byte by byte implementation for POWER7. |
| The performance improvement is upto 4x. This patch is tested |
| on powerpc64 and powerpc64le. |
| |
| (cherry picked from commit c8376f3e07602aaef9cb843bb73cb5f2b860634a) |
| |
| Conflicts: |
| sysdeps/powerpc/powerpc64/multiarch/strcasecmp-power7.S |
| sysdeps/powerpc/powerpc64/multiarch/strcasecmp.c |
| |
| ChangeLog | 22 + |
| sysdeps/powerpc/powerpc64/multiarch/Makefile | 4 +- |
| .../powerpc/powerpc64/multiarch/ifunc-impl-list.c | 6 + |
| .../powerpc64/multiarch/strcasecmp-power7.S | 20 +- |
| .../powerpc64/multiarch/strcasecmp-power8.S | 28 ++ |
| .../powerpc/powerpc64/multiarch/strcasecmp-ppc64.c | 21 + |
| sysdeps/powerpc/powerpc64/multiarch/strcasecmp.c | 32 +- |
| .../powerpc/powerpc64/multiarch/strncase-power8.S | 28 ++ |
| .../powerpc/powerpc64/multiarch/strncase-ppc64.c | 21 + |
| sysdeps/powerpc/powerpc64/multiarch/strncase.c | 25 +- |
| sysdeps/powerpc/powerpc64/power8/strcasecmp.S | 446 +++++++++++++++++++++ |
| sysdeps/powerpc/powerpc64/power8/strncase.S | 20 + |
| 12 files changed, 622 insertions(+), 51 deletions(-) |
| create mode 100644 sysdeps/powerpc/powerpc64/multiarch/strcasecmp-power8.S |
| create mode 100644 sysdeps/powerpc/powerpc64/multiarch/strcasecmp-ppc64.c |
| create mode 100644 sysdeps/powerpc/powerpc64/multiarch/strncase-power8.S |
| create mode 100644 sysdeps/powerpc/powerpc64/multiarch/strncase-ppc64.c |
| create mode 100644 sysdeps/powerpc/powerpc64/power8/strcasecmp.S |
| create mode 100644 sysdeps/powerpc/powerpc64/power8/strncase.S |
| |
| diff --git a/ChangeLog b/ChangeLog |
| index c01d1a0..9385bd0 100644 |
| diff --git a/sysdeps/powerpc/powerpc64/multiarch/Makefile b/sysdeps/powerpc/powerpc64/multiarch/Makefile |
| index 9ee9bc2..e3ac285 100644 |
| |
| |
| @@ -21,6 +21,8 @@ sysdep_routines += memcpy-power7 memcpy-a2 memcpy-power6 memcpy-cell \ |
| mempcpy-power7 mempcpy-ppc64 memchr-power7 memchr-ppc64 \ |
| memrchr-power7 memrchr-ppc64 rawmemchr-power7 \ |
| stpcpy-power8 stpcpy-power7 stpcpy-ppc64 \ |
| + strcasecmp-ppc64 strcasecmp-power8 \ |
| + strncase-ppc64 strncase-power8 \ |
| strcasestr-power8 strcasestr-ppc64 \ |
| strcat-power8 strcat-power7 strcat-ppc64 \ |
| strcmp-power8 strcmp-power7 strcmp-ppc64 \ |
| diff --git a/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c b/sysdeps/powerpc/powerpc64/multiarch/ifunc-impl-list.c |
| index 228891f..aabd7bc 100644 |
| |
| |
| @@ -204,6 +204,9 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, |
| /* Support sysdeps/powerpc/powerpc64/multiarch/strcasecmp.c. */ |
| IFUNC_IMPL (i, name, strcasecmp, |
| IFUNC_IMPL_ADD (array, i, strcasecmp, |
| + hwcap2 & PPC_FEATURE2_ARCH_2_07, |
| + __strcasecmp_power8) |
| + IFUNC_IMPL_ADD (array, i, strcasecmp, |
| hwcap & PPC_FEATURE_HAS_VSX, |
| __strcasecmp_power7) |
| IFUNC_IMPL_ADD (array, i, strcasecmp, 1, __strcasecmp_ppc)) |
| @@ -219,6 +222,9 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array, |
| /* Support sysdeps/powerpc/powerpc64/multiarch/strncase.c. */ |
| IFUNC_IMPL (i, name, strncasecmp, |
| IFUNC_IMPL_ADD (array, i, strncasecmp, |
| + hwcap2 & PPC_FEATURE2_ARCH_2_07, |
| + __strncasecmp_power8) |
| + IFUNC_IMPL_ADD (array, i, strncasecmp, |
| hwcap & PPC_FEATURE_HAS_VSX, |
| __strncasecmp_power7) |
| IFUNC_IMPL_ADD (array, i, strncasecmp, 1, __strncasecmp_ppc)) |
| diff --git a/sysdeps/powerpc/powerpc64/multiarch/strcasecmp-power7.S b/sysdeps/powerpc/powerpc64/multiarch/strcasecmp-power7.S |
| index 56eed9a..99cd7bd 100644 |
| |
| |
| @@ -1,5 +1,5 @@ |
| -/* Optimized strcasecmp implementation foOWER7. |
| - Copyright (C) 2013-2014 Free Software Foundation, Inc. |
| +/* Optimized strcasecmp implementation for POWER7. |
| + Copyright (C) 2013-2016 Free Software Foundation, Inc. |
| This file is part of the GNU C Library. |
| |
| The GNU C Library is free software; you can redistribute it and/or |
| @@ -18,21 +18,7 @@ |
| |
| #include <sysdep.h> |
| |
| -#undef ENTRY |
| -#define ENTRY(name) \ |
| - .section ".text"; \ |
| - ENTRY_2(__strcasecmp_power7) \ |
| - .align ALIGNARG(2); \ |
| - BODY_LABEL(__strcasecmp_power7): \ |
| - cfi_startproc; \ |
| - LOCALENTRY(__strcasecmp_power7) |
| - |
| -#undef END |
| -#define END(name) \ |
| - cfi_endproc; \ |
| - TRACEBACK(__strcasecmp_power7) \ |
| - END_2(__strcasecmp_power7) |
| - |
| +#define __strcasecmp __strcasecmp_power7 |
| #undef weak_alias |
| #define weak_alias(name, alias) |
| |
| diff --git a/sysdeps/powerpc/powerpc64/multiarch/strcasecmp-power8.S b/sysdeps/powerpc/powerpc64/multiarch/strcasecmp-power8.S |
| new file mode 100644 |
| index 0000000..492047a |
| |
| |
| @@ -0,0 +1,28 @@ |
| +/* Optimized strcasecmp implementation for POWER8. |
| + Copyright (C) 2016 Free Software Foundation, Inc. |
| + This file is part of the GNU C Library. |
| + |
| + The GNU C Library is free software; you can redistribute it and/or |
| + modify it under the terms of the GNU Lesser General Public |
| + License as published by the Free Software Foundation; either |
| + version 2.1 of the License, or (at your option) any later version. |
| + |
| + The GNU C Library is distributed in the hope that it will be useful, |
| + but WITHOUT ANY WARRANTY; without even the implied warranty of |
| + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| + Lesser General Public License for more details. |
| + |
| + You should have received a copy of the GNU Lesser General Public |
| + License along with the GNU C Library; if not, see |
| + <http://www.gnu.org/licenses/>. */ |
| + |
| +#include <sysdep.h> |
| + |
| +#define __strcasecmp __strcasecmp_power8 |
| +#undef weak_alias |
| +#define weak_alias(name, alias) |
| + |
| +#undef libc_hidden_builtin_def |
| +#define libc_hidden_builtin_def(name) |
| + |
| +#include <sysdeps/powerpc/powerpc64/power8/strcasecmp.S> |
| diff --git a/sysdeps/powerpc/powerpc64/multiarch/strcasecmp-ppc64.c b/sysdeps/powerpc/powerpc64/multiarch/strcasecmp-ppc64.c |
| new file mode 100644 |
| index 0000000..6318b4a |
| |
| |
| @@ -0,0 +1,21 @@ |
| +/* Multiarch strcasecmp for PPC64. |
| + Copyright (C) 2016 Free Software Foundation, Inc. |
| + This file is part of the GNU C Library. |
| + |
| + The GNU C Library is free software; you can redistribute it and/or |
| + modify it under the terms of the GNU Lesser General Public |
| + License as published by the Free Software Foundation; either |
| + version 2.1 of the License, or (at your option) any later version. |
| + |
| + The GNU C Library is distributed in the hope that it will be useful, |
| + but WITHOUT ANY WARRANTY; without even the implied warranty of |
| + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| + Lesser General Public License for more details. |
| + |
| + You should have received a copy of the GNU Lesser General Public |
| + License along with the GNU C Library; if not, see |
| + <http://www.gnu.org/licenses/>. */ |
| + |
| +#define strcasecmp __strcasecmp_ppc |
| + |
| +#include <string/strcasecmp.c> |
| diff --git a/sysdeps/powerpc/powerpc64/multiarch/strcasecmp.c b/sysdeps/powerpc/powerpc64/multiarch/strcasecmp.c |
| index 979e9f1..5ec6885 100644 |
| |
| |
| @@ -1,5 +1,5 @@ |
| -/* Multiple versions of strcasecmp. |
| - Copyright (C) 2013-2014 Free Software Foundation, Inc. |
| +/* Multiple versions of strcasecmp |
| + Copyright (C) 2013-2016 Free Software Foundation, Inc. |
| This file is part of the GNU C Library. |
| |
| The GNU C Library is free software; you can redistribute it and/or |
| @@ -16,25 +16,21 @@ |
| License along with the GNU C Library; if not, see |
| <http://www.gnu.org/licenses/>. */ |
| |
| -#if IS_IN (libc) |
| -# include <string.h> |
| -# define strcasecmp __strcasecmp_ppc |
| -extern __typeof (__strcasecmp) __strcasecmp_ppc attribute_hidden; |
| -extern __typeof (__strcasecmp) __strcasecmp_power7 attribute_hidden; |
| -#endif |
| +#include <string.h> |
| +#include <shlib-compat.h> |
| +#include "init-arch.h" |
| |
| -#include <string/strcasecmp.c> |
| -#undef strcasecmp |
| +extern __typeof (__strcasecmp) __libc_strcasecmp; |
| |
| -#if IS_IN (libc) |
| -# include <shlib-compat.h> |
| -# include "init-arch.h" |
| +extern __typeof (__strcasecmp) __strcasecmp_ppc attribute_hidden; |
| +extern __typeof (__strcasecmp) __strcasecmp_power7 attribute_hidden; |
| +extern __typeof (__strcasecmp) __strcasecmp_power8 attribute_hidden; |
| |
| -extern __typeof (__strcasecmp) __libc_strcasecmp; |
| libc_ifunc (__libc_strcasecmp, |
| - (hwcap & PPC_FEATURE_HAS_VSX) |
| - ? __strcasecmp_power7 |
| - : __strcasecmp_ppc); |
| + (hwcap2 & PPC_FEATURE2_ARCH_2_07) |
| + ? __strcasecmp_power8: |
| + (hwcap & PPC_FEATURE_HAS_VSX) |
| + ? __strcasecmp_power7 |
| + : __strcasecmp_ppc); |
| |
| weak_alias (__libc_strcasecmp, strcasecmp) |
| -#endif |
| diff --git a/sysdeps/powerpc/powerpc64/multiarch/strncase-power8.S b/sysdeps/powerpc/powerpc64/multiarch/strncase-power8.S |
| new file mode 100644 |
| index 0000000..01a63b5 |
| |
| |
| @@ -0,0 +1,28 @@ |
| +/* Optimized strncasecmp implementation for POWER8. |
| + Copyright (C) 2016 Free Software Foundation, Inc. |
| + This file is part of the GNU C Library. |
| + |
| + The GNU C Library is free software; you can redistribute it and/or |
| + modify it under the terms of the GNU Lesser General Public |
| + License as published by the Free Software Foundation; either |
| + version 2.1 of the License, or (at your option) any later version. |
| + |
| + The GNU C Library is distributed in the hope that it will be useful, |
| + but WITHOUT ANY WARRANTY; without even the implied warranty of |
| + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| + Lesser General Public License for more details. |
| + |
| + You should have received a copy of the GNU Lesser General Public |
| + License along with the GNU C Library; if not, see |
| + <http://www.gnu.org/licenses/>. */ |
| + |
| +#include <sysdep.h> |
| + |
| +#define __strncasecmp __strncasecmp_power8 |
| +#undef weak_alias |
| +#define weak_alias(name, alias) |
| + |
| +#undef libc_hidden_builtin_def |
| +#define libc_hidden_builtin_def(name) |
| + |
| +#include <sysdeps/powerpc/powerpc64/power8/strncase.S> |
| diff --git a/sysdeps/powerpc/powerpc64/multiarch/strncase-ppc64.c b/sysdeps/powerpc/powerpc64/multiarch/strncase-ppc64.c |
| new file mode 100644 |
| index 0000000..c245d77 |
| |
| |
| @@ -0,0 +1,21 @@ |
| +/* Multiarch strncasecmp for PPC64. |
| + Copyright (C) 2016 Free Software Foundation, Inc. |
| + This file is part of the GNU C Library. |
| + |
| + The GNU C Library is free software; you can redistribute it and/or |
| + modify it under the terms of the GNU Lesser General Public |
| + License as published by the Free Software Foundation; either |
| + version 2.1 of the License, or (at your option) any later version. |
| + |
| + The GNU C Library is distributed in the hope that it will be useful, |
| + but WITHOUT ANY WARRANTY; without even the implied warranty of |
| + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| + Lesser General Public License for more details. |
| + |
| + You should have received a copy of the GNU Lesser General Public |
| + License along with the GNU C Library; if not, see |
| + <http://www.gnu.org/licenses/>. */ |
| + |
| +#define strncasecmp __strncasecmp_ppc |
| + |
| +#include <string/strncase.c> |
| diff --git a/sysdeps/powerpc/powerpc64/multiarch/strncase.c b/sysdeps/powerpc/powerpc64/multiarch/strncase.c |
| index 4339f3a..5bfaf65 100644 |
| |
| |
| @@ -16,26 +16,21 @@ |
| License along with the GNU C Library; if not, see |
| <http://www.gnu.org/licenses/>. */ |
| |
| -#if IS_IN (libc) |
| -# include <string.h> |
| -# define strncasecmp __strncasecmp_ppc |
| -extern __typeof (__strncasecmp) __strncasecmp_ppc attribute_hidden; |
| -extern __typeof (__strncasecmp) __strncasecmp_power7 attribute_hidden; |
| -#endif |
| +#include <string.h> |
| +#include <shlib-compat.h> |
| +#include "init-arch.h" |
| |
| -#include <string/strncase.c> |
| -#undef strncasecmp |
| +extern __typeof (__strncasecmp) __libc_strncasecmp; |
| |
| -#if IS_IN (libc) |
| -# include <shlib-compat.h> |
| -# include "init-arch.h" |
| +extern __typeof (__strncasecmp) __strncasecmp_ppc attribute_hidden; |
| +extern __typeof (__strncasecmp) __strncasecmp_power7 attribute_hidden; |
| +extern __typeof (__strncasecmp) __strncasecmp_power8 attribute_hidden; |
| |
| -/* Avoid DWARF definition DIE on ifunc symbol so that GDB can handle |
| - ifunc symbol properly. */ |
| -extern __typeof (__strncasecmp) __libc_strncasecmp; |
| libc_ifunc (__libc_strncasecmp, |
| + (hwcap2 & PPC_FEATURE2_ARCH_2_07) |
| + ? __strncasecmp_power8: |
| (hwcap & PPC_FEATURE_HAS_VSX) |
| ? __strncasecmp_power7 |
| : __strncasecmp_ppc); |
| + |
| weak_alias (__libc_strncasecmp, strncasecmp) |
| -#endif |
| diff --git a/sysdeps/powerpc/powerpc64/power8/strcasecmp.S b/sysdeps/powerpc/powerpc64/power8/strcasecmp.S |
| new file mode 100644 |
| index 0000000..63f6217 |
| |
| |
| @@ -0,0 +1,446 @@ |
| +/* Optimized strcasecmp implementation for PowerPC64. |
| + Copyright (C) 2016 Free Software Foundation, Inc. |
| + This file is part of the GNU C Library. |
| + |
| + The GNU C Library is free software; you can redistribute it and/or |
| + modify it under the terms of the GNU Lesser General Public |
| + License as published by the Free Software Foundation; either |
| + version 2.1 of the License, or (at your option) any later version. |
| + |
| + The GNU C Library is distributed in the hope that it will be useful, |
| + but WITHOUT ANY WARRANTY; without even the implied warranty of |
| + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| + Lesser General Public License for more details. |
| + |
| + You should have received a copy of the GNU Lesser General Public |
| + License along with the GNU C Library; if not, see |
| + <http://www.gnu.org/licenses/>. */ |
| + |
| +#include <sysdep.h> |
| +#include <locale-defines.h> |
| + |
| +/* int [r3] strcasecmp (const char *s1 [r3], const char *s2 [r4] ) */ |
| + |
| +#ifndef USE_AS_STRNCASECMP |
| +# define __STRCASECMP __strcasecmp |
| +# define STRCASECMP strcasecmp |
| +#else |
| +# define __STRCASECMP __strncasecmp |
| +# define STRCASECMP strncasecmp |
| +#endif |
| +/* Convert 16 bytes to lowercase and compare */ |
| +#define TOLOWER() \ |
| + vaddubm v8, v4, v1; \ |
| + vaddubm v7, v4, v3; \ |
| + vcmpgtub v8, v8, v2; \ |
| + vsel v4, v7, v4, v8; \ |
| + vaddubm v8, v5, v1; \ |
| + vaddubm v7, v5, v3; \ |
| + vcmpgtub v8, v8, v2; \ |
| + vsel v5, v7, v5, v8; \ |
| + vcmpequb. v7, v5, v4; |
| + |
| +/* Get 16 bytes for unaligned case. */ |
| +#ifdef __LITTLE_ENDIAN__ |
| +#define GET16BYTES(reg1, reg2, reg3) \ |
| + lvx reg1, 0, reg2; \ |
| + vcmpequb. v8, v0, reg1; \ |
| + beq cr6, 1f; \ |
| + vspltisb v9, 0; \ |
| + b 2f; \ |
| + .align 4; \ |
| +1: \ |
| + addi r6, reg2, 16; \ |
| + lvx v9, 0, r6; \ |
| +2: \ |
| + vperm reg1, v9, reg1, reg3; |
| +#else |
| +#define GET16BYTES(reg1, reg2, reg3) \ |
| + lvx reg1, 0, reg2; \ |
| + vcmpequb. v8, v0, reg1; \ |
| + beq cr6, 1f; \ |
| + vspltisb v9, 0; \ |
| + b 2f; \ |
| + .align 4; \ |
| +1: \ |
| + addi r6, reg2, 16; \ |
| + lvx v9, 0, r6; \ |
| +2: \ |
| + vperm reg1, reg1, v9, reg3; |
| +#endif |
| + |
| +/* Check null in v4, v5 and convert to lower. */ |
| +#define CHECKNULLANDCONVERT() \ |
| + vcmpequb. v7, v0, v5; \ |
| + beq cr6, 3f; \ |
| + vcmpequb. v7, v0, v4; \ |
| + beq cr6, 3f; \ |
| + b L(null_found); \ |
| + .align 4; \ |
| +3: \ |
| + TOLOWER() |
| + |
| +#ifdef _ARCH_PWR8 |
| +# define VCLZD_V8_v7 vclzd v8, v7; |
| +# define MFVRD_R3_V1 mfvrd r3, v1; |
| +# define VSUBUDM_V9_V8 vsubudm v9, v9, v8; |
| +# define VPOPCNTD_V8_V8 vpopcntd v8, v8; |
| +# define VADDUQM_V7_V8 vadduqm v9, v7, v8; |
| +#else |
| +# define VCLZD_V8_v7 .long 0x11003fc2 |
| +# define MFVRD_R3_V1 .long 0x7c230067 |
| +# define VSUBUDM_V9_V8 .long 0x112944c0 |
| +# define VPOPCNTD_V8_V8 .long 0x110047c3 |
| +# define VADDUQM_V7_V8 .long 0x11274100 |
| +#endif |
| + |
| + .machine power7 |
| + |
| +ENTRY (__STRCASECMP) |
| +#ifdef USE_AS_STRNCASECMP |
| + CALL_MCOUNT 3 |
| +#else |
| + CALL_MCOUNT 2 |
| +#endif |
| +#define rRTN r3 /* Return value */ |
| +#define rSTR1 r10 /* 1st string */ |
| +#define rSTR2 r4 /* 2nd string */ |
| +#define rCHAR1 r6 /* Byte read from 1st string */ |
| +#define rCHAR2 r7 /* Byte read from 2nd string */ |
| +#define rADDR1 r8 /* Address of tolower(rCHAR1) */ |
| +#define rADDR2 r12 /* Address of tolower(rCHAR2) */ |
| +#define rLWR1 r8 /* Word tolower(rCHAR1) */ |
| +#define rLWR2 r12 /* Word tolower(rCHAR2) */ |
| +#define rTMP r9 |
| +#define rLOC r11 /* Default locale address */ |
| + |
| + cmpd cr7, rRTN, rSTR2 |
| + |
| + /* Get locale address. */ |
| + ld rTMP, __libc_tsd_LOCALE@got@tprel(r2) |
| + add rLOC, rTMP, __libc_tsd_LOCALE@tls |
| + ld rLOC, 0(rLOC) |
| + |
| + mr rSTR1, rRTN |
| + li rRTN, 0 |
| + beqlr cr7 |
| +#ifdef USE_AS_STRNCASECMP |
| + cmpdi cr7, r5, 0 |
| + beq cr7, L(retnull) |
| + cmpdi cr7, r5, 16 |
| + blt cr7, L(bytebybyte) |
| +#endif |
| + vspltisb v0, 0 |
| + vspltisb v8, -1 |
| + /* Check for null in initial characters. |
| + Check max of 16 char depending on the alignment. |
| + If null is present, proceed byte by byte. */ |
| + lvx v4, 0, rSTR1 |
| +#ifdef __LITTLE_ENDIAN__ |
| + lvsr v10, 0, rSTR1 /* Compute mask. */ |
| + vperm v9, v8, v4, v10 /* Mask bits that are not part of string. */ |
| +#else |
| + lvsl v10, 0, rSTR1 |
| + vperm v9, v4, v8, v10 |
| +#endif |
| + vcmpequb. v9, v0, v9 /* Check for null bytes. */ |
| + bne cr6, L(bytebybyte) |
| + lvx v5, 0, rSTR2 |
| + /* Calculate alignment. */ |
| +#ifdef __LITTLE_ENDIAN__ |
| + lvsr v6, 0, rSTR2 |
| + vperm v9, v8, v5, v6 /* Mask bits that are not part of string. */ |
| +#else |
| + lvsl v6, 0, rSTR2 |
| + vperm v9, v5, v8, v6 |
| +#endif |
| + vcmpequb. v9, v0, v9 /* Check for null bytes. */ |
| + bne cr6, L(bytebybyte) |
| + /* Check if locale has non ascii characters. */ |
| + ld rTMP, 0(rLOC) |
| + addi r6, rTMP,LOCALE_DATA_VALUES+_NL_CTYPE_NONASCII_CASE*SIZEOF_VALUES |
| + lwz rTMP, 0(r6) |
| + cmpdi cr7, rTMP, 1 |
| + beq cr7, L(bytebybyte) |
| + |
| + /* Load vector registers with values used for TOLOWER. */ |
| + /* Load v1 = 0xbf, v2 = 0x19 v3 = 0x20 in each byte. */ |
| + vspltisb v3, 2 |
| + vspltisb v9, 4 |
| + vsl v3, v3, v9 |
| + vaddubm v1, v3, v3 |
| + vnor v1, v1, v1 |
| + vspltisb v2, 7 |
| + vsububm v2, v3, v2 |
| + |
| + andi. rADDR1, rSTR1, 0xF |
| + beq cr0, L(align) |
| + addi r6, rSTR1, 16 |
| + lvx v9, 0, r6 |
| + /* Compute 16 bytes from previous two loads. */ |
| +#ifdef __LITTLE_ENDIAN__ |
| + vperm v4, v9, v4, v10 |
| +#else |
| + vperm v4, v4, v9, v10 |
| +#endif |
| +L(align): |
| + andi. rADDR2, rSTR2, 0xF |
| + beq cr0, L(align1) |
| + addi r6, rSTR2, 16 |
| + lvx v9, 0, r6 |
| + /* Compute 16 bytes from previous two loads. */ |
| +#ifdef __LITTLE_ENDIAN__ |
| + vperm v5, v9, v5, v6 |
| +#else |
| + vperm v5, v5, v9, v6 |
| +#endif |
| +L(align1): |
| + CHECKNULLANDCONVERT() |
| + blt cr6, L(match) |
| + b L(different) |
| + .align 4 |
| +L(match): |
| + clrldi r6, rSTR1, 60 |
| + subfic r7, r6, 16 |
| +#ifdef USE_AS_STRNCASECMP |
| + sub r5, r5, r7 |
| +#endif |
| + add rSTR1, rSTR1, r7 |
| + add rSTR2, rSTR2, r7 |
| + andi. rADDR2, rSTR2, 0xF |
| + addi rSTR1, rSTR1, -16 |
| + addi rSTR2, rSTR2, -16 |
| + beq cr0, L(aligned) |
| +#ifdef __LITTLE_ENDIAN__ |
| + lvsr v6, 0, rSTR2 |
| +#else |
| + lvsl v6, 0, rSTR2 |
| +#endif |
| + /* There are 2 loops depending on the input alignment. |
| + Each loop gets 16 bytes from s1 and s2, check for null, |
| + convert to lowercase and compare. Loop till difference |
| + or null occurs. */ |
| +L(s1_align): |
| + addi rSTR1, rSTR1, 16 |
| + addi rSTR2, rSTR2, 16 |
| +#ifdef USE_AS_STRNCASECMP |
| + cmpdi cr7, r5, 16 |
| + blt cr7, L(bytebybyte) |
| + addi r5, r5, -16 |
| +#endif |
| + lvx v4, 0, rSTR1 |
| + GET16BYTES(v5, rSTR2, v6) |
| + CHECKNULLANDCONVERT() |
| + blt cr6, L(s1_align) |
| + b L(different) |
| + .align 4 |
| +L(aligned): |
| + addi rSTR1, rSTR1, 16 |
| + addi rSTR2, rSTR2, 16 |
| +#ifdef USE_AS_STRNCASECMP |
| + cmpdi cr7, r5, 16 |
| + blt cr7, L(bytebybyte) |
| + addi r5, r5, -16 |
| +#endif |
| + lvx v4, 0, rSTR1 |
| + lvx v5, 0, rSTR2 |
| + CHECKNULLANDCONVERT() |
| + blt cr6, L(aligned) |
| + |
| + /* Calculate and return the difference. */ |
| +L(different): |
| + vaddubm v1, v3, v3 |
| + vcmpequb v7, v0, v7 |
| +#ifdef __LITTLE_ENDIAN__ |
| + /* Count trailing zero. */ |
| + vspltisb v8, -1 |
| + VADDUQM_V7_V8 |
| + vandc v8, v9, v7 |
| + VPOPCNTD_V8_V8 |
| + vspltb v6, v8, 15 |
| + vcmpequb. v6, v6, v1 |
| + blt cr6, L(shift8) |
| +#else |
| + /* Count leading zero. */ |
| + VCLZD_V8_v7 |
| + vspltb v6, v8, 7 |
| + vcmpequb. v6, v6, v1 |
| + blt cr6, L(shift8) |
| + vsro v8, v8, v1 |
| +#endif |
| + b L(skipsum) |
| + .align 4 |
| +L(shift8): |
| + vsumsws v8, v8, v0 |
| +L(skipsum): |
| +#ifdef __LITTLE_ENDIAN__ |
| + /* Shift registers based on leading zero count. */ |
| + vsro v6, v5, v8 |
| + vsro v7, v4, v8 |
| + /* Merge and move to GPR. */ |
| + vmrglb v6, v6, v7 |
| + vslo v1, v6, v1 |
| + MFVRD_R3_V1 |
| + /* Place the characters that are different in first position. */ |
| + sldi rSTR2, rRTN, 56 |
| + srdi rSTR2, rSTR2, 56 |
| + sldi rSTR1, rRTN, 48 |
| + srdi rSTR1, rSTR1, 56 |
| +#else |
| + vslo v6, v5, v8 |
| + vslo v7, v4, v8 |
| + vmrghb v1, v6, v7 |
| + MFVRD_R3_V1 |
| + srdi rSTR2, rRTN, 48 |
| + sldi rSTR2, rSTR2, 56 |
| + srdi rSTR2, rSTR2, 56 |
| + srdi rSTR1, rRTN, 56 |
| +#endif |
| + subf rRTN, rSTR1, rSTR2 |
| + extsw rRTN, rRTN |
| + blr |
| + |
| + .align 4 |
| + /* OK. We've hit the end of the string. We need to be careful that |
| + we don't compare two strings as different because of junk beyond |
| + the end of the strings... */ |
| +L(null_found): |
| + vaddubm v10, v3, v3 |
| +#ifdef __LITTLE_ENDIAN__ |
| + /* Count trailing zero. */ |
| + vspltisb v8, -1 |
| + VADDUQM_V7_V8 |
| + vandc v8, v9, v7 |
| + VPOPCNTD_V8_V8 |
| + vspltb v6, v8, 15 |
| + vcmpequb. v6, v6, v10 |
| + blt cr6, L(shift_8) |
| +#else |
| + /* Count leading zero. */ |
| + VCLZD_V8_v7 |
| + vspltb v6, v8, 7 |
| + vcmpequb. v6, v6, v10 |
| + blt cr6, L(shift_8) |
| + vsro v8, v8, v10 |
| +#endif |
| + b L(skipsum1) |
| + .align 4 |
| +L(shift_8): |
| + vsumsws v8, v8, v0 |
| +L(skipsum1): |
| + /* Calculate shift count based on count of zero. */ |
| + vspltisb v10, 7 |
| + vslb v10, v10, v10 |
| + vsldoi v9, v0, v10, 1 |
| + VSUBUDM_V9_V8 |
| + vspltisb v8, 8 |
| + vsldoi v8, v0, v8, 1 |
| + VSUBUDM_V9_V8 |
| + /* Shift and remove junk after null character. */ |
| +#ifdef __LITTLE_ENDIAN__ |
| + vslo v5, v5, v9 |
| + vslo v4, v4, v9 |
| +#else |
| + vsro v5, v5, v9 |
| + vsro v4, v4, v9 |
| +#endif |
| + /* Convert and compare 16 bytes. */ |
| + TOLOWER() |
| + blt cr6, L(retnull) |
| + b L(different) |
| + .align 4 |
| +L(retnull): |
| + li rRTN, 0 |
| + blr |
| + .align 4 |
| +L(bytebybyte): |
| + /* Unrolling loop for POWER: loads are done with 'lbz' plus |
| + offset and string descriptors are only updated in the end |
| + of loop unrolling. */ |
| + ld rLOC, LOCALE_CTYPE_TOLOWER(rLOC) |
| + lbz rCHAR1, 0(rSTR1) /* Load char from s1 */ |
| + lbz rCHAR2, 0(rSTR2) /* Load char from s2 */ |
| +#ifdef USE_AS_STRNCASECMP |
| + rldicl rTMP, r5, 62, 2 |
| + cmpdi cr7, rTMP, 0 |
| + beq cr7, L(lessthan4) |
| + mtctr rTMP |
| +#endif |
| +L(loop): |
| + cmpdi rCHAR1, 0 /* *s1 == '\0' ? */ |
| + sldi rADDR1, rCHAR1, 2 /* Calculate address for tolower(*s1) */ |
| + sldi rADDR2, rCHAR2, 2 /* Calculate address for tolower(*s2) */ |
| + lwzx rLWR1, rLOC, rADDR1 /* Load tolower(*s1) */ |
| + lwzx rLWR2, rLOC, rADDR2 /* Load tolower(*s2) */ |
| + cmpw cr1, rLWR1, rLWR2 /* r = tolower(*s1) == tolower(*s2) ? */ |
| + crorc 4*cr1+eq,eq,4*cr1+eq /* (*s1 != '\0') || (r == 1) */ |
| + beq cr1, L(done) |
| + lbz rCHAR1, 1(rSTR1) |
| + lbz rCHAR2, 1(rSTR2) |
| + cmpdi rCHAR1, 0 |
| + sldi rADDR1, rCHAR1, 2 |
| + sldi rADDR2, rCHAR2, 2 |
| + lwzx rLWR1, rLOC, rADDR1 |
| + lwzx rLWR2, rLOC, rADDR2 |
| + cmpw cr1, rLWR1, rLWR2 |
| + crorc 4*cr1+eq,eq,4*cr1+eq |
| + beq cr1, L(done) |
| + lbz rCHAR1, 2(rSTR1) |
| + lbz rCHAR2, 2(rSTR2) |
| + cmpdi rCHAR1, 0 |
| + sldi rADDR1, rCHAR1, 2 |
| + sldi rADDR2, rCHAR2, 2 |
| + lwzx rLWR1, rLOC, rADDR1 |
| + lwzx rLWR2, rLOC, rADDR2 |
| + cmpw cr1, rLWR1, rLWR2 |
| + crorc 4*cr1+eq,eq,4*cr1+eq |
| + beq cr1, L(done) |
| + lbz rCHAR1, 3(rSTR1) |
| + lbz rCHAR2, 3(rSTR2) |
| + cmpdi rCHAR1, 0 |
| + /* Increment both string descriptors */ |
| + addi rSTR1, rSTR1, 4 |
| + addi rSTR2, rSTR2, 4 |
| + sldi rADDR1, rCHAR1, 2 |
| + sldi rADDR2, rCHAR2, 2 |
| + lwzx rLWR1, rLOC, rADDR1 |
| + lwzx rLWR2, rLOC, rADDR2 |
| + cmpw cr1, rLWR1, rLWR2 |
| + crorc 4*cr1+eq,eq,4*cr1+eq |
| + beq cr1, L(done) |
| + lbz rCHAR1, 0(rSTR1) /* Load char from s1 */ |
| + lbz rCHAR2, 0(rSTR2) /* Load char from s2 */ |
| +#ifdef USE_AS_STRNCASECMP |
| + bdnz L(loop) |
| +#else |
| + b L(loop) |
| +#endif |
| +#ifdef USE_AS_STRNCASECMP |
| +L(lessthan4): |
| + clrldi r5, r5, 62 |
| + cmpdi cr7, r5, 0 |
| + beq cr7, L(retnull) |
| + mtctr r5 |
| +L(loop1): |
| + cmpdi rCHAR1, 0 |
| + sldi rADDR1, rCHAR1, 2 |
| + sldi rADDR2, rCHAR2, 2 |
| + lwzx rLWR1, rLOC, rADDR1 |
| + lwzx rLWR2, rLOC, rADDR2 |
| + cmpw cr1, rLWR1, rLWR2 |
| + crorc 4*cr1+eq,eq,4*cr1+eq |
| + beq cr1, L(done) |
| + addi rSTR1, rSTR1, 1 |
| + addi rSTR2, rSTR2, 1 |
| + lbz rCHAR1, 0(rSTR1) |
| + lbz rCHAR2, 0(rSTR2) |
| + bdnz L(loop1) |
| +#endif |
| +L(done): |
| + subf r0, rLWR2, rLWR1 |
| + extsw rRTN, r0 |
| + blr |
| +END (__STRCASECMP) |
| + |
| +weak_alias (__STRCASECMP, STRCASECMP) |
| +libc_hidden_builtin_def (__STRCASECMP) |
| diff --git a/sysdeps/powerpc/powerpc64/power8/strncase.S b/sysdeps/powerpc/powerpc64/power8/strncase.S |
| new file mode 100644 |
| index 0000000..7ce2ed0 |
| |
| |
| @@ -0,0 +1,20 @@ |
| +/* Optimized strncasecmp implementation for POWER8. |
| + Copyright (C) 2016 Free Software Foundation, Inc. |
| + This file is part of the GNU C Library. |
| + |
| + The GNU C Library is free software; you can redistribute it and/or |
| + modify it under the terms of the GNU Lesser General Public |
| + License as published by the Free Software Foundation; either |
| + version 2.1 of the License, or (at your option) any later version. |
| + |
| + The GNU C Library is distributed in the hope that it will be useful, |
| + but WITHOUT ANY WARRANTY; without even the implied warranty of |
| + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| + Lesser General Public License for more details. |
| + |
| + You should have received a copy of the GNU Lesser General Public |
| + License along with the GNU C Library; if not, see |
| + <http://www.gnu.org/licenses/>. */ |
| + |
| +#define USE_AS_STRNCASECMP 1 |
| +#include <sysdeps/powerpc/powerpc64/power8/strcasecmp.S> |
| -- |
| 2.1.0 |
| |