|
|
513694 |
From ea4c320faffe618f70854985887c7ca08a1dcf4b Mon Sep 17 00:00:00 2001
|
|
|
513694 |
From: Noah Goldstein <goldstein.w.n@gmail.com>
|
|
|
513694 |
Date: Wed, 23 Mar 2022 16:57:46 -0500
|
|
|
513694 |
Subject: [PATCH] x86: Remove AVX str{n}casecmp
|
|
|
513694 |
|
|
|
513694 |
The rational is:
|
|
|
513694 |
|
|
|
513694 |
1. SSE42 has nearly identical logic so any benefit is minimal (3.4%
|
|
|
513694 |
regression on Tigerlake using SSE42 versus AVX across the
|
|
|
513694 |
benchtest suite).
|
|
|
513694 |
2. AVX2 version covers the majority of targets that previously
|
|
|
513694 |
prefered it.
|
|
|
513694 |
3. The targets where AVX would still be best (SnB and IVB) are
|
|
|
513694 |
becoming outdated.
|
|
|
513694 |
|
|
|
513694 |
All in all the saving the code size is worth it.
|
|
|
513694 |
|
|
|
513694 |
All string/memory tests pass.
|
|
|
513694 |
Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
|
|
|
513694 |
|
|
|
513694 |
(cherry picked from commit 305769b2a15c2e96f9e1b5195d3c4e0d6f0f4b68)
|
|
|
513694 |
---
|
|
|
513694 |
sysdeps/x86_64/multiarch/Makefile | 2 -
|
|
|
513694 |
sysdeps/x86_64/multiarch/ifunc-impl-list.c | 12 -
|
|
|
513694 |
sysdeps/x86_64/multiarch/ifunc-strcasecmp.h | 4 -
|
|
|
513694 |
sysdeps/x86_64/multiarch/strcasecmp_l-avx.S | 22 --
|
|
|
513694 |
sysdeps/x86_64/multiarch/strcmp-sse42.S | 240 +++++++++-----------
|
|
|
513694 |
sysdeps/x86_64/multiarch/strncase_l-avx.S | 22 --
|
|
|
513694 |
6 files changed, 105 insertions(+), 197 deletions(-)
|
|
|
513694 |
delete mode 100644 sysdeps/x86_64/multiarch/strcasecmp_l-avx.S
|
|
|
513694 |
delete mode 100644 sysdeps/x86_64/multiarch/strncase_l-avx.S
|
|
|
513694 |
|
|
|
513694 |
diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile
|
|
|
513694 |
index 359712c1..bca82e38 100644
|
|
|
513694 |
--- a/sysdeps/x86_64/multiarch/Makefile
|
|
|
513694 |
+++ b/sysdeps/x86_64/multiarch/Makefile
|
|
|
513694 |
@@ -50,7 +50,6 @@ sysdep_routines += \
|
|
|
513694 |
stpncpy-evex \
|
|
|
513694 |
stpncpy-sse2-unaligned \
|
|
|
513694 |
stpncpy-ssse3 \
|
|
|
513694 |
- strcasecmp_l-avx \
|
|
|
513694 |
strcasecmp_l-avx2 \
|
|
|
513694 |
strcasecmp_l-avx2-rtm \
|
|
|
513694 |
strcasecmp_l-evex \
|
|
|
513694 |
@@ -91,7 +90,6 @@ sysdep_routines += \
|
|
|
513694 |
strlen-avx2-rtm \
|
|
|
513694 |
strlen-evex \
|
|
|
513694 |
strlen-sse2 \
|
|
|
513694 |
- strncase_l-avx \
|
|
|
513694 |
strncase_l-avx2 \
|
|
|
513694 |
strncase_l-avx2-rtm \
|
|
|
513694 |
strncase_l-evex \
|
|
|
513694 |
diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
|
|
|
513694 |
index 1dedc637..14314367 100644
|
|
|
513694 |
--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
|
|
|
513694 |
+++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
|
|
|
513694 |
@@ -429,9 +429,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
|
|
513694 |
(CPU_FEATURE_USABLE (AVX2)
|
|
|
513694 |
&& CPU_FEATURE_USABLE (RTM)),
|
|
|
513694 |
__strcasecmp_avx2_rtm)
|
|
|
513694 |
- IFUNC_IMPL_ADD (array, i, strcasecmp,
|
|
|
513694 |
- CPU_FEATURE_USABLE (AVX),
|
|
|
513694 |
- __strcasecmp_avx)
|
|
|
513694 |
IFUNC_IMPL_ADD (array, i, strcasecmp,
|
|
|
513694 |
CPU_FEATURE_USABLE (SSE4_2),
|
|
|
513694 |
__strcasecmp_sse42)
|
|
|
513694 |
@@ -453,9 +450,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
|
|
513694 |
(CPU_FEATURE_USABLE (AVX2)
|
|
|
513694 |
&& CPU_FEATURE_USABLE (RTM)),
|
|
|
513694 |
__strcasecmp_l_avx2_rtm)
|
|
|
513694 |
- IFUNC_IMPL_ADD (array, i, strcasecmp_l,
|
|
|
513694 |
- CPU_FEATURE_USABLE (AVX),
|
|
|
513694 |
- __strcasecmp_l_avx)
|
|
|
513694 |
IFUNC_IMPL_ADD (array, i, strcasecmp_l,
|
|
|
513694 |
CPU_FEATURE_USABLE (SSE4_2),
|
|
|
513694 |
__strcasecmp_l_sse42)
|
|
|
513694 |
@@ -591,9 +585,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
|
|
513694 |
(CPU_FEATURE_USABLE (AVX2)
|
|
|
513694 |
&& CPU_FEATURE_USABLE (RTM)),
|
|
|
513694 |
__strncasecmp_avx2_rtm)
|
|
|
513694 |
- IFUNC_IMPL_ADD (array, i, strncasecmp,
|
|
|
513694 |
- CPU_FEATURE_USABLE (AVX),
|
|
|
513694 |
- __strncasecmp_avx)
|
|
|
513694 |
IFUNC_IMPL_ADD (array, i, strncasecmp,
|
|
|
513694 |
CPU_FEATURE_USABLE (SSE4_2),
|
|
|
513694 |
__strncasecmp_sse42)
|
|
|
513694 |
@@ -616,9 +607,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
|
|
513694 |
(CPU_FEATURE_USABLE (AVX2)
|
|
|
513694 |
&& CPU_FEATURE_USABLE (RTM)),
|
|
|
513694 |
__strncasecmp_l_avx2_rtm)
|
|
|
513694 |
- IFUNC_IMPL_ADD (array, i, strncasecmp_l,
|
|
|
513694 |
- CPU_FEATURE_USABLE (AVX),
|
|
|
513694 |
- __strncasecmp_l_avx)
|
|
|
513694 |
IFUNC_IMPL_ADD (array, i, strncasecmp_l,
|
|
|
513694 |
CPU_FEATURE_USABLE (SSE4_2),
|
|
|
513694 |
__strncasecmp_l_sse42)
|
|
|
513694 |
diff --git a/sysdeps/x86_64/multiarch/ifunc-strcasecmp.h b/sysdeps/x86_64/multiarch/ifunc-strcasecmp.h
|
|
|
513694 |
index 6dd49a21..34cfbb8f 100644
|
|
|
513694 |
--- a/sysdeps/x86_64/multiarch/ifunc-strcasecmp.h
|
|
|
513694 |
+++ b/sysdeps/x86_64/multiarch/ifunc-strcasecmp.h
|
|
|
513694 |
@@ -22,7 +22,6 @@
|
|
|
513694 |
extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
|
|
|
513694 |
extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3) attribute_hidden;
|
|
|
513694 |
extern __typeof (REDIRECT_NAME) OPTIMIZE (sse42) attribute_hidden;
|
|
|
513694 |
-extern __typeof (REDIRECT_NAME) OPTIMIZE (avx) attribute_hidden;
|
|
|
513694 |
extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
|
|
|
513694 |
extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden;
|
|
|
513694 |
extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden;
|
|
|
513694 |
@@ -46,9 +45,6 @@ IFUNC_SELECTOR (void)
|
|
|
513694 |
return OPTIMIZE (avx2);
|
|
|
513694 |
}
|
|
|
513694 |
|
|
|
513694 |
- if (CPU_FEATURE_USABLE_P (cpu_features, AVX))
|
|
|
513694 |
- return OPTIMIZE (avx);
|
|
|
513694 |
-
|
|
|
513694 |
if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_2)
|
|
|
513694 |
&& !CPU_FEATURES_ARCH_P (cpu_features, Slow_SSE4_2))
|
|
|
513694 |
return OPTIMIZE (sse42);
|
|
|
513694 |
diff --git a/sysdeps/x86_64/multiarch/strcasecmp_l-avx.S b/sysdeps/x86_64/multiarch/strcasecmp_l-avx.S
|
|
|
513694 |
deleted file mode 100644
|
|
|
513694 |
index 56a03547..00000000
|
|
|
513694 |
--- a/sysdeps/x86_64/multiarch/strcasecmp_l-avx.S
|
|
|
513694 |
+++ /dev/null
|
|
|
513694 |
@@ -1,22 +0,0 @@
|
|
|
513694 |
-/* strcasecmp_l optimized with AVX.
|
|
|
513694 |
- Copyright (C) 2017-2018 Free Software Foundation, Inc.
|
|
|
513694 |
- This file is part of the GNU C Library.
|
|
|
513694 |
-
|
|
|
513694 |
- The GNU C Library is free software; you can redistribute it and/or
|
|
|
513694 |
- modify it under the terms of the GNU Lesser General Public
|
|
|
513694 |
- License as published by the Free Software Foundation; either
|
|
|
513694 |
- version 2.1 of the License, or (at your option) any later version.
|
|
|
513694 |
-
|
|
|
513694 |
- The GNU C Library is distributed in the hope that it will be useful,
|
|
|
513694 |
- but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
513694 |
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
513694 |
- Lesser General Public License for more details.
|
|
|
513694 |
-
|
|
|
513694 |
- You should have received a copy of the GNU Lesser General Public
|
|
|
513694 |
- License along with the GNU C Library; if not, see
|
|
|
513694 |
- <http://www.gnu.org/licenses/>. */
|
|
|
513694 |
-
|
|
|
513694 |
-#define STRCMP_SSE42 __strcasecmp_l_avx
|
|
|
513694 |
-#define USE_AVX 1
|
|
|
513694 |
-#define USE_AS_STRCASECMP_L
|
|
|
513694 |
-#include "strcmp-sse42.S"
|
|
|
513694 |
diff --git a/sysdeps/x86_64/multiarch/strcmp-sse42.S b/sysdeps/x86_64/multiarch/strcmp-sse42.S
|
|
|
513694 |
index 59e8ddfc..0a42b7a4 100644
|
|
|
513694 |
--- a/sysdeps/x86_64/multiarch/strcmp-sse42.S
|
|
|
513694 |
+++ b/sysdeps/x86_64/multiarch/strcmp-sse42.S
|
|
|
513694 |
@@ -42,13 +42,8 @@
|
|
|
513694 |
# define UPDATE_STRNCMP_COUNTER
|
|
|
513694 |
#endif
|
|
|
513694 |
|
|
|
513694 |
-#ifdef USE_AVX
|
|
|
513694 |
-# define SECTION avx
|
|
|
513694 |
-# define GLABEL(l) l##_avx
|
|
|
513694 |
-#else
|
|
|
513694 |
-# define SECTION sse4.2
|
|
|
513694 |
-# define GLABEL(l) l##_sse42
|
|
|
513694 |
-#endif
|
|
|
513694 |
+#define SECTION sse4.2
|
|
|
513694 |
+#define GLABEL(l) l##_sse42
|
|
|
513694 |
|
|
|
513694 |
#define LABEL(l) .L##l
|
|
|
513694 |
|
|
|
513694 |
@@ -106,21 +101,7 @@ END (GLABEL(__strncasecmp))
|
|
|
513694 |
#endif
|
|
|
513694 |
|
|
|
513694 |
|
|
|
513694 |
-#ifdef USE_AVX
|
|
|
513694 |
-# define movdqa vmovdqa
|
|
|
513694 |
-# define movdqu vmovdqu
|
|
|
513694 |
-# define pmovmskb vpmovmskb
|
|
|
513694 |
-# define pcmpistri vpcmpistri
|
|
|
513694 |
-# define psubb vpsubb
|
|
|
513694 |
-# define pcmpeqb vpcmpeqb
|
|
|
513694 |
-# define psrldq vpsrldq
|
|
|
513694 |
-# define pslldq vpslldq
|
|
|
513694 |
-# define palignr vpalignr
|
|
|
513694 |
-# define pxor vpxor
|
|
|
513694 |
-# define D(arg) arg, arg
|
|
|
513694 |
-#else
|
|
|
513694 |
-# define D(arg) arg
|
|
|
513694 |
-#endif
|
|
|
513694 |
+#define arg arg
|
|
|
513694 |
|
|
|
513694 |
STRCMP_SSE42:
|
|
|
513694 |
cfi_startproc
|
|
|
513694 |
@@ -192,18 +173,7 @@ LABEL(case_add):
|
|
|
513694 |
movdqu (%rdi), %xmm1
|
|
|
513694 |
movdqu (%rsi), %xmm2
|
|
|
513694 |
#if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
|
|
|
513694 |
-# ifdef USE_AVX
|
|
|
513694 |
-# define TOLOWER(reg1, reg2) \
|
|
|
513694 |
- vpaddb LCASE_MIN_reg, reg1, %xmm7; \
|
|
|
513694 |
- vpaddb LCASE_MIN_reg, reg2, %xmm8; \
|
|
|
513694 |
- vpcmpgtb LCASE_MAX_reg, %xmm7, %xmm7; \
|
|
|
513694 |
- vpcmpgtb LCASE_MAX_reg, %xmm8, %xmm8; \
|
|
|
513694 |
- vpandn CASE_ADD_reg, %xmm7, %xmm7; \
|
|
|
513694 |
- vpandn CASE_ADD_reg, %xmm8, %xmm8; \
|
|
|
513694 |
- vpaddb %xmm7, reg1, reg1; \
|
|
|
513694 |
- vpaddb %xmm8, reg2, reg2
|
|
|
513694 |
-# else
|
|
|
513694 |
-# define TOLOWER(reg1, reg2) \
|
|
|
513694 |
+# define TOLOWER(reg1, reg2) \
|
|
|
513694 |
movdqa LCASE_MIN_reg, %xmm7; \
|
|
|
513694 |
movdqa LCASE_MIN_reg, %xmm8; \
|
|
|
513694 |
paddb reg1, %xmm7; \
|
|
|
513694 |
@@ -214,15 +184,15 @@ LABEL(case_add):
|
|
|
513694 |
pandn CASE_ADD_reg, %xmm8; \
|
|
|
513694 |
paddb %xmm7, reg1; \
|
|
|
513694 |
paddb %xmm8, reg2
|
|
|
513694 |
-# endif
|
|
|
513694 |
+
|
|
|
513694 |
TOLOWER (%xmm1, %xmm2)
|
|
|
513694 |
#else
|
|
|
513694 |
# define TOLOWER(reg1, reg2)
|
|
|
513694 |
#endif
|
|
|
513694 |
- pxor %xmm0, D(%xmm0) /* clear %xmm0 for null char checks */
|
|
|
513694 |
- pcmpeqb %xmm1, D(%xmm0) /* Any null chars? */
|
|
|
513694 |
- pcmpeqb %xmm2, D(%xmm1) /* compare first 16 bytes for equality */
|
|
|
513694 |
- psubb %xmm0, D(%xmm1) /* packed sub of comparison results*/
|
|
|
513694 |
+ pxor %xmm0, %xmm0 /* clear %xmm0 for null char checks */
|
|
|
513694 |
+ pcmpeqb %xmm1, %xmm0 /* Any null chars? */
|
|
|
513694 |
+ pcmpeqb %xmm2, %xmm1 /* compare first 16 bytes for equality */
|
|
|
513694 |
+ psubb %xmm0, %xmm1 /* packed sub of comparison results*/
|
|
|
513694 |
pmovmskb %xmm1, %edx
|
|
|
513694 |
sub $0xffff, %edx /* if first 16 bytes are same, edx == 0xffff */
|
|
|
513694 |
jnz LABEL(less16bytes)/* If not, find different value or null char */
|
|
|
513694 |
@@ -246,7 +216,7 @@ LABEL(crosscache):
|
|
|
513694 |
xor %r8d, %r8d
|
|
|
513694 |
and $0xf, %ecx /* offset of rsi */
|
|
|
513694 |
and $0xf, %eax /* offset of rdi */
|
|
|
513694 |
- pxor %xmm0, D(%xmm0) /* clear %xmm0 for null char check */
|
|
|
513694 |
+ pxor %xmm0, %xmm0 /* clear %xmm0 for null char check */
|
|
|
513694 |
cmp %eax, %ecx
|
|
|
513694 |
je LABEL(ashr_0) /* rsi and rdi relative offset same */
|
|
|
513694 |
ja LABEL(bigger)
|
|
|
513694 |
@@ -260,7 +230,7 @@ LABEL(bigger):
|
|
|
513694 |
sub %rcx, %r9
|
|
|
513694 |
lea LABEL(unaligned_table)(%rip), %r10
|
|
|
513694 |
movslq (%r10, %r9,4), %r9
|
|
|
513694 |
- pcmpeqb %xmm1, D(%xmm0) /* Any null chars? */
|
|
|
513694 |
+ pcmpeqb %xmm1, %xmm0 /* Any null chars? */
|
|
|
513694 |
lea (%r10, %r9), %r10
|
|
|
513694 |
_CET_NOTRACK jmp *%r10 /* jump to corresponding case */
|
|
|
513694 |
|
|
|
513694 |
@@ -273,15 +243,15 @@ LABEL(bigger):
|
|
|
513694 |
LABEL(ashr_0):
|
|
|
513694 |
|
|
|
513694 |
movdqa (%rsi), %xmm1
|
|
|
513694 |
- pcmpeqb %xmm1, D(%xmm0) /* Any null chars? */
|
|
|
513694 |
+ pcmpeqb %xmm1, %xmm0 /* Any null chars? */
|
|
|
513694 |
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
|
|
|
513694 |
- pcmpeqb (%rdi), D(%xmm1) /* compare 16 bytes for equality */
|
|
|
513694 |
+ pcmpeqb (%rdi), %xmm1 /* compare 16 bytes for equality */
|
|
|
513694 |
#else
|
|
|
513694 |
movdqa (%rdi), %xmm2
|
|
|
513694 |
TOLOWER (%xmm1, %xmm2)
|
|
|
513694 |
- pcmpeqb %xmm2, D(%xmm1) /* compare 16 bytes for equality */
|
|
|
513694 |
+ pcmpeqb %xmm2, %xmm1 /* compare 16 bytes for equality */
|
|
|
513694 |
#endif
|
|
|
513694 |
- psubb %xmm0, D(%xmm1) /* packed sub of comparison results*/
|
|
|
513694 |
+ psubb %xmm0, %xmm1 /* packed sub of comparison results*/
|
|
|
513694 |
pmovmskb %xmm1, %r9d
|
|
|
513694 |
shr %cl, %edx /* adjust 0xffff for offset */
|
|
|
513694 |
shr %cl, %r9d /* adjust for 16-byte offset */
|
|
|
513694 |
@@ -361,10 +331,10 @@ LABEL(ashr_0_exit_use):
|
|
|
513694 |
*/
|
|
|
513694 |
.p2align 4
|
|
|
513694 |
LABEL(ashr_1):
|
|
|
513694 |
- pslldq $15, D(%xmm2) /* shift first string to align with second */
|
|
|
513694 |
+ pslldq $15, %xmm2 /* shift first string to align with second */
|
|
|
513694 |
TOLOWER (%xmm1, %xmm2)
|
|
|
513694 |
- pcmpeqb %xmm1, D(%xmm2) /* compare 16 bytes for equality */
|
|
|
513694 |
- psubb %xmm0, D(%xmm2) /* packed sub of comparison results*/
|
|
|
513694 |
+ pcmpeqb %xmm1, %xmm2 /* compare 16 bytes for equality */
|
|
|
513694 |
+ psubb %xmm0, %xmm2 /* packed sub of comparison results*/
|
|
|
513694 |
pmovmskb %xmm2, %r9d
|
|
|
513694 |
shr %cl, %edx /* adjust 0xffff for offset */
|
|
|
513694 |
shr %cl, %r9d /* adjust for 16-byte offset */
|
|
|
513694 |
@@ -392,7 +362,7 @@ LABEL(loop_ashr_1_use):
|
|
|
513694 |
|
|
|
513694 |
LABEL(nibble_ashr_1_restart_use):
|
|
|
513694 |
movdqa (%rdi, %rdx), %xmm0
|
|
|
513694 |
- palignr $1, -16(%rdi, %rdx), D(%xmm0)
|
|
|
513694 |
+ palignr $1, -16(%rdi, %rdx), %xmm0
|
|
|
513694 |
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
|
|
|
513694 |
pcmpistri $0x1a,(%rsi,%rdx), %xmm0
|
|
|
513694 |
#else
|
|
|
513694 |
@@ -411,7 +381,7 @@ LABEL(nibble_ashr_1_restart_use):
|
|
|
513694 |
jg LABEL(nibble_ashr_1_use)
|
|
|
513694 |
|
|
|
513694 |
movdqa (%rdi, %rdx), %xmm0
|
|
|
513694 |
- palignr $1, -16(%rdi, %rdx), D(%xmm0)
|
|
|
513694 |
+ palignr $1, -16(%rdi, %rdx), %xmm0
|
|
|
513694 |
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
|
|
|
513694 |
pcmpistri $0x1a,(%rsi,%rdx), %xmm0
|
|
|
513694 |
#else
|
|
|
513694 |
@@ -431,7 +401,7 @@ LABEL(nibble_ashr_1_restart_use):
|
|
|
513694 |
LABEL(nibble_ashr_1_use):
|
|
|
513694 |
sub $0x1000, %r10
|
|
|
513694 |
movdqa -16(%rdi, %rdx), %xmm0
|
|
|
513694 |
- psrldq $1, D(%xmm0)
|
|
|
513694 |
+ psrldq $1, %xmm0
|
|
|
513694 |
pcmpistri $0x3a,%xmm0, %xmm0
|
|
|
513694 |
#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
|
|
|
513694 |
cmp %r11, %rcx
|
|
|
513694 |
@@ -449,10 +419,10 @@ LABEL(nibble_ashr_1_use):
|
|
|
513694 |
*/
|
|
|
513694 |
.p2align 4
|
|
|
513694 |
LABEL(ashr_2):
|
|
|
513694 |
- pslldq $14, D(%xmm2)
|
|
|
513694 |
+ pslldq $14, %xmm2
|
|
|
513694 |
TOLOWER (%xmm1, %xmm2)
|
|
|
513694 |
- pcmpeqb %xmm1, D(%xmm2)
|
|
|
513694 |
- psubb %xmm0, D(%xmm2)
|
|
|
513694 |
+ pcmpeqb %xmm1, %xmm2
|
|
|
513694 |
+ psubb %xmm0, %xmm2
|
|
|
513694 |
pmovmskb %xmm2, %r9d
|
|
|
513694 |
shr %cl, %edx
|
|
|
513694 |
shr %cl, %r9d
|
|
|
513694 |
@@ -480,7 +450,7 @@ LABEL(loop_ashr_2_use):
|
|
|
513694 |
|
|
|
513694 |
LABEL(nibble_ashr_2_restart_use):
|
|
|
513694 |
movdqa (%rdi, %rdx), %xmm0
|
|
|
513694 |
- palignr $2, -16(%rdi, %rdx), D(%xmm0)
|
|
|
513694 |
+ palignr $2, -16(%rdi, %rdx), %xmm0
|
|
|
513694 |
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
|
|
|
513694 |
pcmpistri $0x1a,(%rsi,%rdx), %xmm0
|
|
|
513694 |
#else
|
|
|
513694 |
@@ -499,7 +469,7 @@ LABEL(nibble_ashr_2_restart_use):
|
|
|
513694 |
jg LABEL(nibble_ashr_2_use)
|
|
|
513694 |
|
|
|
513694 |
movdqa (%rdi, %rdx), %xmm0
|
|
|
513694 |
- palignr $2, -16(%rdi, %rdx), D(%xmm0)
|
|
|
513694 |
+ palignr $2, -16(%rdi, %rdx), %xmm0
|
|
|
513694 |
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
|
|
|
513694 |
pcmpistri $0x1a,(%rsi,%rdx), %xmm0
|
|
|
513694 |
#else
|
|
|
513694 |
@@ -519,7 +489,7 @@ LABEL(nibble_ashr_2_restart_use):
|
|
|
513694 |
LABEL(nibble_ashr_2_use):
|
|
|
513694 |
sub $0x1000, %r10
|
|
|
513694 |
movdqa -16(%rdi, %rdx), %xmm0
|
|
|
513694 |
- psrldq $2, D(%xmm0)
|
|
|
513694 |
+ psrldq $2, %xmm0
|
|
|
513694 |
pcmpistri $0x3a,%xmm0, %xmm0
|
|
|
513694 |
#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
|
|
|
513694 |
cmp %r11, %rcx
|
|
|
513694 |
@@ -537,10 +507,10 @@ LABEL(nibble_ashr_2_use):
|
|
|
513694 |
*/
|
|
|
513694 |
.p2align 4
|
|
|
513694 |
LABEL(ashr_3):
|
|
|
513694 |
- pslldq $13, D(%xmm2)
|
|
|
513694 |
+ pslldq $13, %xmm2
|
|
|
513694 |
TOLOWER (%xmm1, %xmm2)
|
|
|
513694 |
- pcmpeqb %xmm1, D(%xmm2)
|
|
|
513694 |
- psubb %xmm0, D(%xmm2)
|
|
|
513694 |
+ pcmpeqb %xmm1, %xmm2
|
|
|
513694 |
+ psubb %xmm0, %xmm2
|
|
|
513694 |
pmovmskb %xmm2, %r9d
|
|
|
513694 |
shr %cl, %edx
|
|
|
513694 |
shr %cl, %r9d
|
|
|
513694 |
@@ -568,7 +538,7 @@ LABEL(loop_ashr_3_use):
|
|
|
513694 |
|
|
|
513694 |
LABEL(nibble_ashr_3_restart_use):
|
|
|
513694 |
movdqa (%rdi, %rdx), %xmm0
|
|
|
513694 |
- palignr $3, -16(%rdi, %rdx), D(%xmm0)
|
|
|
513694 |
+ palignr $3, -16(%rdi, %rdx), %xmm0
|
|
|
513694 |
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
|
|
|
513694 |
pcmpistri $0x1a,(%rsi,%rdx), %xmm0
|
|
|
513694 |
#else
|
|
|
513694 |
@@ -587,7 +557,7 @@ LABEL(nibble_ashr_3_restart_use):
|
|
|
513694 |
jg LABEL(nibble_ashr_3_use)
|
|
|
513694 |
|
|
|
513694 |
movdqa (%rdi, %rdx), %xmm0
|
|
|
513694 |
- palignr $3, -16(%rdi, %rdx), D(%xmm0)
|
|
|
513694 |
+ palignr $3, -16(%rdi, %rdx), %xmm0
|
|
|
513694 |
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
|
|
|
513694 |
pcmpistri $0x1a,(%rsi,%rdx), %xmm0
|
|
|
513694 |
#else
|
|
|
513694 |
@@ -607,7 +577,7 @@ LABEL(nibble_ashr_3_restart_use):
|
|
|
513694 |
LABEL(nibble_ashr_3_use):
|
|
|
513694 |
sub $0x1000, %r10
|
|
|
513694 |
movdqa -16(%rdi, %rdx), %xmm0
|
|
|
513694 |
- psrldq $3, D(%xmm0)
|
|
|
513694 |
+ psrldq $3, %xmm0
|
|
|
513694 |
pcmpistri $0x3a,%xmm0, %xmm0
|
|
|
513694 |
#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
|
|
|
513694 |
cmp %r11, %rcx
|
|
|
513694 |
@@ -625,10 +595,10 @@ LABEL(nibble_ashr_3_use):
|
|
|
513694 |
*/
|
|
|
513694 |
.p2align 4
|
|
|
513694 |
LABEL(ashr_4):
|
|
|
513694 |
- pslldq $12, D(%xmm2)
|
|
|
513694 |
+ pslldq $12, %xmm2
|
|
|
513694 |
TOLOWER (%xmm1, %xmm2)
|
|
|
513694 |
- pcmpeqb %xmm1, D(%xmm2)
|
|
|
513694 |
- psubb %xmm0, D(%xmm2)
|
|
|
513694 |
+ pcmpeqb %xmm1, %xmm2
|
|
|
513694 |
+ psubb %xmm0, %xmm2
|
|
|
513694 |
pmovmskb %xmm2, %r9d
|
|
|
513694 |
shr %cl, %edx
|
|
|
513694 |
shr %cl, %r9d
|
|
|
513694 |
@@ -657,7 +627,7 @@ LABEL(loop_ashr_4_use):
|
|
|
513694 |
|
|
|
513694 |
LABEL(nibble_ashr_4_restart_use):
|
|
|
513694 |
movdqa (%rdi, %rdx), %xmm0
|
|
|
513694 |
- palignr $4, -16(%rdi, %rdx), D(%xmm0)
|
|
|
513694 |
+ palignr $4, -16(%rdi, %rdx), %xmm0
|
|
|
513694 |
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
|
|
|
513694 |
pcmpistri $0x1a,(%rsi,%rdx), %xmm0
|
|
|
513694 |
#else
|
|
|
513694 |
@@ -676,7 +646,7 @@ LABEL(nibble_ashr_4_restart_use):
|
|
|
513694 |
jg LABEL(nibble_ashr_4_use)
|
|
|
513694 |
|
|
|
513694 |
movdqa (%rdi, %rdx), %xmm0
|
|
|
513694 |
- palignr $4, -16(%rdi, %rdx), D(%xmm0)
|
|
|
513694 |
+ palignr $4, -16(%rdi, %rdx), %xmm0
|
|
|
513694 |
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
|
|
|
513694 |
pcmpistri $0x1a,(%rsi,%rdx), %xmm0
|
|
|
513694 |
#else
|
|
|
513694 |
@@ -696,7 +666,7 @@ LABEL(nibble_ashr_4_restart_use):
|
|
|
513694 |
LABEL(nibble_ashr_4_use):
|
|
|
513694 |
sub $0x1000, %r10
|
|
|
513694 |
movdqa -16(%rdi, %rdx), %xmm0
|
|
|
513694 |
- psrldq $4, D(%xmm0)
|
|
|
513694 |
+ psrldq $4, %xmm0
|
|
|
513694 |
pcmpistri $0x3a,%xmm0, %xmm0
|
|
|
513694 |
#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
|
|
|
513694 |
cmp %r11, %rcx
|
|
|
513694 |
@@ -714,10 +684,10 @@ LABEL(nibble_ashr_4_use):
|
|
|
513694 |
*/
|
|
|
513694 |
.p2align 4
|
|
|
513694 |
LABEL(ashr_5):
|
|
|
513694 |
- pslldq $11, D(%xmm2)
|
|
|
513694 |
+ pslldq $11, %xmm2
|
|
|
513694 |
TOLOWER (%xmm1, %xmm2)
|
|
|
513694 |
- pcmpeqb %xmm1, D(%xmm2)
|
|
|
513694 |
- psubb %xmm0, D(%xmm2)
|
|
|
513694 |
+ pcmpeqb %xmm1, %xmm2
|
|
|
513694 |
+ psubb %xmm0, %xmm2
|
|
|
513694 |
pmovmskb %xmm2, %r9d
|
|
|
513694 |
shr %cl, %edx
|
|
|
513694 |
shr %cl, %r9d
|
|
|
513694 |
@@ -746,7 +716,7 @@ LABEL(loop_ashr_5_use):
|
|
|
513694 |
|
|
|
513694 |
LABEL(nibble_ashr_5_restart_use):
|
|
|
513694 |
movdqa (%rdi, %rdx), %xmm0
|
|
|
513694 |
- palignr $5, -16(%rdi, %rdx), D(%xmm0)
|
|
|
513694 |
+ palignr $5, -16(%rdi, %rdx), %xmm0
|
|
|
513694 |
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
|
|
|
513694 |
pcmpistri $0x1a,(%rsi,%rdx), %xmm0
|
|
|
513694 |
#else
|
|
|
513694 |
@@ -766,7 +736,7 @@ LABEL(nibble_ashr_5_restart_use):
|
|
|
513694 |
|
|
|
513694 |
movdqa (%rdi, %rdx), %xmm0
|
|
|
513694 |
|
|
|
513694 |
- palignr $5, -16(%rdi, %rdx), D(%xmm0)
|
|
|
513694 |
+ palignr $5, -16(%rdi, %rdx), %xmm0
|
|
|
513694 |
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
|
|
|
513694 |
pcmpistri $0x1a,(%rsi,%rdx), %xmm0
|
|
|
513694 |
#else
|
|
|
513694 |
@@ -786,7 +756,7 @@ LABEL(nibble_ashr_5_restart_use):
|
|
|
513694 |
LABEL(nibble_ashr_5_use):
|
|
|
513694 |
sub $0x1000, %r10
|
|
|
513694 |
movdqa -16(%rdi, %rdx), %xmm0
|
|
|
513694 |
- psrldq $5, D(%xmm0)
|
|
|
513694 |
+ psrldq $5, %xmm0
|
|
|
513694 |
pcmpistri $0x3a,%xmm0, %xmm0
|
|
|
513694 |
#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
|
|
|
513694 |
cmp %r11, %rcx
|
|
|
513694 |
@@ -804,10 +774,10 @@ LABEL(nibble_ashr_5_use):
|
|
|
513694 |
*/
|
|
|
513694 |
.p2align 4
|
|
|
513694 |
LABEL(ashr_6):
|
|
|
513694 |
- pslldq $10, D(%xmm2)
|
|
|
513694 |
+ pslldq $10, %xmm2
|
|
|
513694 |
TOLOWER (%xmm1, %xmm2)
|
|
|
513694 |
- pcmpeqb %xmm1, D(%xmm2)
|
|
|
513694 |
- psubb %xmm0, D(%xmm2)
|
|
|
513694 |
+ pcmpeqb %xmm1, %xmm2
|
|
|
513694 |
+ psubb %xmm0, %xmm2
|
|
|
513694 |
pmovmskb %xmm2, %r9d
|
|
|
513694 |
shr %cl, %edx
|
|
|
513694 |
shr %cl, %r9d
|
|
|
513694 |
@@ -836,7 +806,7 @@ LABEL(loop_ashr_6_use):
|
|
|
513694 |
|
|
|
513694 |
LABEL(nibble_ashr_6_restart_use):
|
|
|
513694 |
movdqa (%rdi, %rdx), %xmm0
|
|
|
513694 |
- palignr $6, -16(%rdi, %rdx), D(%xmm0)
|
|
|
513694 |
+ palignr $6, -16(%rdi, %rdx), %xmm0
|
|
|
513694 |
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
|
|
|
513694 |
pcmpistri $0x1a,(%rsi,%rdx), %xmm0
|
|
|
513694 |
#else
|
|
|
513694 |
@@ -855,7 +825,7 @@ LABEL(nibble_ashr_6_restart_use):
|
|
|
513694 |
jg LABEL(nibble_ashr_6_use)
|
|
|
513694 |
|
|
|
513694 |
movdqa (%rdi, %rdx), %xmm0
|
|
|
513694 |
- palignr $6, -16(%rdi, %rdx), D(%xmm0)
|
|
|
513694 |
+ palignr $6, -16(%rdi, %rdx), %xmm0
|
|
|
513694 |
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
|
|
|
513694 |
pcmpistri $0x1a,(%rsi,%rdx), %xmm0
|
|
|
513694 |
#else
|
|
|
513694 |
@@ -875,7 +845,7 @@ LABEL(nibble_ashr_6_restart_use):
|
|
|
513694 |
LABEL(nibble_ashr_6_use):
|
|
|
513694 |
sub $0x1000, %r10
|
|
|
513694 |
movdqa -16(%rdi, %rdx), %xmm0
|
|
|
513694 |
- psrldq $6, D(%xmm0)
|
|
|
513694 |
+ psrldq $6, %xmm0
|
|
|
513694 |
pcmpistri $0x3a,%xmm0, %xmm0
|
|
|
513694 |
#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
|
|
|
513694 |
cmp %r11, %rcx
|
|
|
513694 |
@@ -893,10 +863,10 @@ LABEL(nibble_ashr_6_use):
|
|
|
513694 |
*/
|
|
|
513694 |
.p2align 4
|
|
|
513694 |
LABEL(ashr_7):
|
|
|
513694 |
- pslldq $9, D(%xmm2)
|
|
|
513694 |
+ pslldq $9, %xmm2
|
|
|
513694 |
TOLOWER (%xmm1, %xmm2)
|
|
|
513694 |
- pcmpeqb %xmm1, D(%xmm2)
|
|
|
513694 |
- psubb %xmm0, D(%xmm2)
|
|
|
513694 |
+ pcmpeqb %xmm1, %xmm2
|
|
|
513694 |
+ psubb %xmm0, %xmm2
|
|
|
513694 |
pmovmskb %xmm2, %r9d
|
|
|
513694 |
shr %cl, %edx
|
|
|
513694 |
shr %cl, %r9d
|
|
|
513694 |
@@ -925,7 +895,7 @@ LABEL(loop_ashr_7_use):
|
|
|
513694 |
|
|
|
513694 |
LABEL(nibble_ashr_7_restart_use):
|
|
|
513694 |
movdqa (%rdi, %rdx), %xmm0
|
|
|
513694 |
- palignr $7, -16(%rdi, %rdx), D(%xmm0)
|
|
|
513694 |
+ palignr $7, -16(%rdi, %rdx), %xmm0
|
|
|
513694 |
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
|
|
|
513694 |
pcmpistri $0x1a,(%rsi,%rdx), %xmm0
|
|
|
513694 |
#else
|
|
|
513694 |
@@ -944,7 +914,7 @@ LABEL(nibble_ashr_7_restart_use):
|
|
|
513694 |
jg LABEL(nibble_ashr_7_use)
|
|
|
513694 |
|
|
|
513694 |
movdqa (%rdi, %rdx), %xmm0
|
|
|
513694 |
- palignr $7, -16(%rdi, %rdx), D(%xmm0)
|
|
|
513694 |
+ palignr $7, -16(%rdi, %rdx), %xmm0
|
|
|
513694 |
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
|
|
|
513694 |
pcmpistri $0x1a,(%rsi,%rdx), %xmm0
|
|
|
513694 |
#else
|
|
|
513694 |
@@ -964,7 +934,7 @@ LABEL(nibble_ashr_7_restart_use):
|
|
|
513694 |
LABEL(nibble_ashr_7_use):
|
|
|
513694 |
sub $0x1000, %r10
|
|
|
513694 |
movdqa -16(%rdi, %rdx), %xmm0
|
|
|
513694 |
- psrldq $7, D(%xmm0)
|
|
|
513694 |
+ psrldq $7, %xmm0
|
|
|
513694 |
pcmpistri $0x3a,%xmm0, %xmm0
|
|
|
513694 |
#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
|
|
|
513694 |
cmp %r11, %rcx
|
|
|
513694 |
@@ -982,10 +952,10 @@ LABEL(nibble_ashr_7_use):
|
|
|
513694 |
*/
|
|
|
513694 |
.p2align 4
|
|
|
513694 |
LABEL(ashr_8):
|
|
|
513694 |
- pslldq $8, D(%xmm2)
|
|
|
513694 |
+ pslldq $8, %xmm2
|
|
|
513694 |
TOLOWER (%xmm1, %xmm2)
|
|
|
513694 |
- pcmpeqb %xmm1, D(%xmm2)
|
|
|
513694 |
- psubb %xmm0, D(%xmm2)
|
|
|
513694 |
+ pcmpeqb %xmm1, %xmm2
|
|
|
513694 |
+ psubb %xmm0, %xmm2
|
|
|
513694 |
pmovmskb %xmm2, %r9d
|
|
|
513694 |
shr %cl, %edx
|
|
|
513694 |
shr %cl, %r9d
|
|
|
513694 |
@@ -1014,7 +984,7 @@ LABEL(loop_ashr_8_use):
|
|
|
513694 |
|
|
|
513694 |
LABEL(nibble_ashr_8_restart_use):
|
|
|
513694 |
movdqa (%rdi, %rdx), %xmm0
|
|
|
513694 |
- palignr $8, -16(%rdi, %rdx), D(%xmm0)
|
|
|
513694 |
+ palignr $8, -16(%rdi, %rdx), %xmm0
|
|
|
513694 |
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
|
|
|
513694 |
pcmpistri $0x1a, (%rsi,%rdx), %xmm0
|
|
|
513694 |
#else
|
|
|
513694 |
@@ -1033,7 +1003,7 @@ LABEL(nibble_ashr_8_restart_use):
|
|
|
513694 |
jg LABEL(nibble_ashr_8_use)
|
|
|
513694 |
|
|
|
513694 |
movdqa (%rdi, %rdx), %xmm0
|
|
|
513694 |
- palignr $8, -16(%rdi, %rdx), D(%xmm0)
|
|
|
513694 |
+ palignr $8, -16(%rdi, %rdx), %xmm0
|
|
|
513694 |
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
|
|
|
513694 |
pcmpistri $0x1a, (%rsi,%rdx), %xmm0
|
|
|
513694 |
#else
|
|
|
513694 |
@@ -1053,7 +1023,7 @@ LABEL(nibble_ashr_8_restart_use):
|
|
|
513694 |
LABEL(nibble_ashr_8_use):
|
|
|
513694 |
sub $0x1000, %r10
|
|
|
513694 |
movdqa -16(%rdi, %rdx), %xmm0
|
|
|
513694 |
- psrldq $8, D(%xmm0)
|
|
|
513694 |
+ psrldq $8, %xmm0
|
|
|
513694 |
pcmpistri $0x3a,%xmm0, %xmm0
|
|
|
513694 |
#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
|
|
|
513694 |
cmp %r11, %rcx
|
|
|
513694 |
@@ -1071,10 +1041,10 @@ LABEL(nibble_ashr_8_use):
|
|
|
513694 |
*/
|
|
|
513694 |
.p2align 4
|
|
|
513694 |
LABEL(ashr_9):
|
|
|
513694 |
- pslldq $7, D(%xmm2)
|
|
|
513694 |
+ pslldq $7, %xmm2
|
|
|
513694 |
TOLOWER (%xmm1, %xmm2)
|
|
|
513694 |
- pcmpeqb %xmm1, D(%xmm2)
|
|
|
513694 |
- psubb %xmm0, D(%xmm2)
|
|
|
513694 |
+ pcmpeqb %xmm1, %xmm2
|
|
|
513694 |
+ psubb %xmm0, %xmm2
|
|
|
513694 |
pmovmskb %xmm2, %r9d
|
|
|
513694 |
shr %cl, %edx
|
|
|
513694 |
shr %cl, %r9d
|
|
|
513694 |
@@ -1104,7 +1074,7 @@ LABEL(loop_ashr_9_use):
|
|
|
513694 |
LABEL(nibble_ashr_9_restart_use):
|
|
|
513694 |
movdqa (%rdi, %rdx), %xmm0
|
|
|
513694 |
|
|
|
513694 |
- palignr $9, -16(%rdi, %rdx), D(%xmm0)
|
|
|
513694 |
+ palignr $9, -16(%rdi, %rdx), %xmm0
|
|
|
513694 |
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
|
|
|
513694 |
pcmpistri $0x1a, (%rsi,%rdx), %xmm0
|
|
|
513694 |
#else
|
|
|
513694 |
@@ -1123,7 +1093,7 @@ LABEL(nibble_ashr_9_restart_use):
|
|
|
513694 |
jg LABEL(nibble_ashr_9_use)
|
|
|
513694 |
|
|
|
513694 |
movdqa (%rdi, %rdx), %xmm0
|
|
|
513694 |
- palignr $9, -16(%rdi, %rdx), D(%xmm0)
|
|
|
513694 |
+ palignr $9, -16(%rdi, %rdx), %xmm0
|
|
|
513694 |
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
|
|
|
513694 |
pcmpistri $0x1a, (%rsi,%rdx), %xmm0
|
|
|
513694 |
#else
|
|
|
513694 |
@@ -1143,7 +1113,7 @@ LABEL(nibble_ashr_9_restart_use):
|
|
|
513694 |
LABEL(nibble_ashr_9_use):
|
|
|
513694 |
sub $0x1000, %r10
|
|
|
513694 |
movdqa -16(%rdi, %rdx), %xmm0
|
|
|
513694 |
- psrldq $9, D(%xmm0)
|
|
|
513694 |
+ psrldq $9, %xmm0
|
|
|
513694 |
pcmpistri $0x3a,%xmm0, %xmm0
|
|
|
513694 |
#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
|
|
|
513694 |
cmp %r11, %rcx
|
|
|
513694 |
@@ -1161,10 +1131,10 @@ LABEL(nibble_ashr_9_use):
|
|
|
513694 |
*/
|
|
|
513694 |
.p2align 4
|
|
|
513694 |
LABEL(ashr_10):
|
|
|
513694 |
- pslldq $6, D(%xmm2)
|
|
|
513694 |
+ pslldq $6, %xmm2
|
|
|
513694 |
TOLOWER (%xmm1, %xmm2)
|
|
|
513694 |
- pcmpeqb %xmm1, D(%xmm2)
|
|
|
513694 |
- psubb %xmm0, D(%xmm2)
|
|
|
513694 |
+ pcmpeqb %xmm1, %xmm2
|
|
|
513694 |
+ psubb %xmm0, %xmm2
|
|
|
513694 |
pmovmskb %xmm2, %r9d
|
|
|
513694 |
shr %cl, %edx
|
|
|
513694 |
shr %cl, %r9d
|
|
|
513694 |
@@ -1193,7 +1163,7 @@ LABEL(loop_ashr_10_use):
|
|
|
513694 |
|
|
|
513694 |
LABEL(nibble_ashr_10_restart_use):
|
|
|
513694 |
movdqa (%rdi, %rdx), %xmm0
|
|
|
513694 |
- palignr $10, -16(%rdi, %rdx), D(%xmm0)
|
|
|
513694 |
+ palignr $10, -16(%rdi, %rdx), %xmm0
|
|
|
513694 |
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
|
|
|
513694 |
pcmpistri $0x1a, (%rsi,%rdx), %xmm0
|
|
|
513694 |
#else
|
|
|
513694 |
@@ -1212,7 +1182,7 @@ LABEL(nibble_ashr_10_restart_use):
|
|
|
513694 |
jg LABEL(nibble_ashr_10_use)
|
|
|
513694 |
|
|
|
513694 |
movdqa (%rdi, %rdx), %xmm0
|
|
|
513694 |
- palignr $10, -16(%rdi, %rdx), D(%xmm0)
|
|
|
513694 |
+ palignr $10, -16(%rdi, %rdx), %xmm0
|
|
|
513694 |
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
|
|
|
513694 |
pcmpistri $0x1a, (%rsi,%rdx), %xmm0
|
|
|
513694 |
#else
|
|
|
513694 |
@@ -1232,7 +1202,7 @@ LABEL(nibble_ashr_10_restart_use):
|
|
|
513694 |
LABEL(nibble_ashr_10_use):
|
|
|
513694 |
sub $0x1000, %r10
|
|
|
513694 |
movdqa -16(%rdi, %rdx), %xmm0
|
|
|
513694 |
- psrldq $10, D(%xmm0)
|
|
|
513694 |
+ psrldq $10, %xmm0
|
|
|
513694 |
pcmpistri $0x3a,%xmm0, %xmm0
|
|
|
513694 |
#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
|
|
|
513694 |
cmp %r11, %rcx
|
|
|
513694 |
@@ -1250,10 +1220,10 @@ LABEL(nibble_ashr_10_use):
|
|
|
513694 |
*/
|
|
|
513694 |
.p2align 4
|
|
|
513694 |
LABEL(ashr_11):
|
|
|
513694 |
- pslldq $5, D(%xmm2)
|
|
|
513694 |
+ pslldq $5, %xmm2
|
|
|
513694 |
TOLOWER (%xmm1, %xmm2)
|
|
|
513694 |
- pcmpeqb %xmm1, D(%xmm2)
|
|
|
513694 |
- psubb %xmm0, D(%xmm2)
|
|
|
513694 |
+ pcmpeqb %xmm1, %xmm2
|
|
|
513694 |
+ psubb %xmm0, %xmm2
|
|
|
513694 |
pmovmskb %xmm2, %r9d
|
|
|
513694 |
shr %cl, %edx
|
|
|
513694 |
shr %cl, %r9d
|
|
|
513694 |
@@ -1282,7 +1252,7 @@ LABEL(loop_ashr_11_use):
|
|
|
513694 |
|
|
|
513694 |
LABEL(nibble_ashr_11_restart_use):
|
|
|
513694 |
movdqa (%rdi, %rdx), %xmm0
|
|
|
513694 |
- palignr $11, -16(%rdi, %rdx), D(%xmm0)
|
|
|
513694 |
+ palignr $11, -16(%rdi, %rdx), %xmm0
|
|
|
513694 |
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
|
|
|
513694 |
pcmpistri $0x1a, (%rsi,%rdx), %xmm0
|
|
|
513694 |
#else
|
|
|
513694 |
@@ -1301,7 +1271,7 @@ LABEL(nibble_ashr_11_restart_use):
|
|
|
513694 |
jg LABEL(nibble_ashr_11_use)
|
|
|
513694 |
|
|
|
513694 |
movdqa (%rdi, %rdx), %xmm0
|
|
|
513694 |
- palignr $11, -16(%rdi, %rdx), D(%xmm0)
|
|
|
513694 |
+ palignr $11, -16(%rdi, %rdx), %xmm0
|
|
|
513694 |
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
|
|
|
513694 |
pcmpistri $0x1a, (%rsi,%rdx), %xmm0
|
|
|
513694 |
#else
|
|
|
513694 |
@@ -1321,7 +1291,7 @@ LABEL(nibble_ashr_11_restart_use):
|
|
|
513694 |
LABEL(nibble_ashr_11_use):
|
|
|
513694 |
sub $0x1000, %r10
|
|
|
513694 |
movdqa -16(%rdi, %rdx), %xmm0
|
|
|
513694 |
- psrldq $11, D(%xmm0)
|
|
|
513694 |
+ psrldq $11, %xmm0
|
|
|
513694 |
pcmpistri $0x3a,%xmm0, %xmm0
|
|
|
513694 |
#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
|
|
|
513694 |
cmp %r11, %rcx
|
|
|
513694 |
@@ -1339,10 +1309,10 @@ LABEL(nibble_ashr_11_use):
|
|
|
513694 |
*/
|
|
|
513694 |
.p2align 4
|
|
|
513694 |
LABEL(ashr_12):
|
|
|
513694 |
- pslldq $4, D(%xmm2)
|
|
|
513694 |
+ pslldq $4, %xmm2
|
|
|
513694 |
TOLOWER (%xmm1, %xmm2)
|
|
|
513694 |
- pcmpeqb %xmm1, D(%xmm2)
|
|
|
513694 |
- psubb %xmm0, D(%xmm2)
|
|
|
513694 |
+ pcmpeqb %xmm1, %xmm2
|
|
|
513694 |
+ psubb %xmm0, %xmm2
|
|
|
513694 |
pmovmskb %xmm2, %r9d
|
|
|
513694 |
shr %cl, %edx
|
|
|
513694 |
shr %cl, %r9d
|
|
|
513694 |
@@ -1371,7 +1341,7 @@ LABEL(loop_ashr_12_use):
|
|
|
513694 |
|
|
|
513694 |
LABEL(nibble_ashr_12_restart_use):
|
|
|
513694 |
movdqa (%rdi, %rdx), %xmm0
|
|
|
513694 |
- palignr $12, -16(%rdi, %rdx), D(%xmm0)
|
|
|
513694 |
+ palignr $12, -16(%rdi, %rdx), %xmm0
|
|
|
513694 |
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
|
|
|
513694 |
pcmpistri $0x1a, (%rsi,%rdx), %xmm0
|
|
|
513694 |
#else
|
|
|
513694 |
@@ -1390,7 +1360,7 @@ LABEL(nibble_ashr_12_restart_use):
|
|
|
513694 |
jg LABEL(nibble_ashr_12_use)
|
|
|
513694 |
|
|
|
513694 |
movdqa (%rdi, %rdx), %xmm0
|
|
|
513694 |
- palignr $12, -16(%rdi, %rdx), D(%xmm0)
|
|
|
513694 |
+ palignr $12, -16(%rdi, %rdx), %xmm0
|
|
|
513694 |
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
|
|
|
513694 |
pcmpistri $0x1a, (%rsi,%rdx), %xmm0
|
|
|
513694 |
#else
|
|
|
513694 |
@@ -1410,7 +1380,7 @@ LABEL(nibble_ashr_12_restart_use):
|
|
|
513694 |
LABEL(nibble_ashr_12_use):
|
|
|
513694 |
sub $0x1000, %r10
|
|
|
513694 |
movdqa -16(%rdi, %rdx), %xmm0
|
|
|
513694 |
- psrldq $12, D(%xmm0)
|
|
|
513694 |
+ psrldq $12, %xmm0
|
|
|
513694 |
pcmpistri $0x3a,%xmm0, %xmm0
|
|
|
513694 |
#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
|
|
|
513694 |
cmp %r11, %rcx
|
|
|
513694 |
@@ -1428,10 +1398,10 @@ LABEL(nibble_ashr_12_use):
|
|
|
513694 |
*/
|
|
|
513694 |
.p2align 4
|
|
|
513694 |
LABEL(ashr_13):
|
|
|
513694 |
- pslldq $3, D(%xmm2)
|
|
|
513694 |
+ pslldq $3, %xmm2
|
|
|
513694 |
TOLOWER (%xmm1, %xmm2)
|
|
|
513694 |
- pcmpeqb %xmm1, D(%xmm2)
|
|
|
513694 |
- psubb %xmm0, D(%xmm2)
|
|
|
513694 |
+ pcmpeqb %xmm1, %xmm2
|
|
|
513694 |
+ psubb %xmm0, %xmm2
|
|
|
513694 |
pmovmskb %xmm2, %r9d
|
|
|
513694 |
shr %cl, %edx
|
|
|
513694 |
shr %cl, %r9d
|
|
|
513694 |
@@ -1461,7 +1431,7 @@ LABEL(loop_ashr_13_use):
|
|
|
513694 |
|
|
|
513694 |
LABEL(nibble_ashr_13_restart_use):
|
|
|
513694 |
movdqa (%rdi, %rdx), %xmm0
|
|
|
513694 |
- palignr $13, -16(%rdi, %rdx), D(%xmm0)
|
|
|
513694 |
+ palignr $13, -16(%rdi, %rdx), %xmm0
|
|
|
513694 |
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
|
|
|
513694 |
pcmpistri $0x1a, (%rsi,%rdx), %xmm0
|
|
|
513694 |
#else
|
|
|
513694 |
@@ -1480,7 +1450,7 @@ LABEL(nibble_ashr_13_restart_use):
|
|
|
513694 |
jg LABEL(nibble_ashr_13_use)
|
|
|
513694 |
|
|
|
513694 |
movdqa (%rdi, %rdx), %xmm0
|
|
|
513694 |
- palignr $13, -16(%rdi, %rdx), D(%xmm0)
|
|
|
513694 |
+ palignr $13, -16(%rdi, %rdx), %xmm0
|
|
|
513694 |
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
|
|
|
513694 |
pcmpistri $0x1a, (%rsi,%rdx), %xmm0
|
|
|
513694 |
#else
|
|
|
513694 |
@@ -1500,7 +1470,7 @@ LABEL(nibble_ashr_13_restart_use):
|
|
|
513694 |
LABEL(nibble_ashr_13_use):
|
|
|
513694 |
sub $0x1000, %r10
|
|
|
513694 |
movdqa -16(%rdi, %rdx), %xmm0
|
|
|
513694 |
- psrldq $13, D(%xmm0)
|
|
|
513694 |
+ psrldq $13, %xmm0
|
|
|
513694 |
pcmpistri $0x3a,%xmm0, %xmm0
|
|
|
513694 |
#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
|
|
|
513694 |
cmp %r11, %rcx
|
|
|
513694 |
@@ -1518,10 +1488,10 @@ LABEL(nibble_ashr_13_use):
|
|
|
513694 |
*/
|
|
|
513694 |
.p2align 4
|
|
|
513694 |
LABEL(ashr_14):
|
|
|
513694 |
- pslldq $2, D(%xmm2)
|
|
|
513694 |
+ pslldq $2, %xmm2
|
|
|
513694 |
TOLOWER (%xmm1, %xmm2)
|
|
|
513694 |
- pcmpeqb %xmm1, D(%xmm2)
|
|
|
513694 |
- psubb %xmm0, D(%xmm2)
|
|
|
513694 |
+ pcmpeqb %xmm1, %xmm2
|
|
|
513694 |
+ psubb %xmm0, %xmm2
|
|
|
513694 |
pmovmskb %xmm2, %r9d
|
|
|
513694 |
shr %cl, %edx
|
|
|
513694 |
shr %cl, %r9d
|
|
|
513694 |
@@ -1551,7 +1521,7 @@ LABEL(loop_ashr_14_use):
|
|
|
513694 |
|
|
|
513694 |
LABEL(nibble_ashr_14_restart_use):
|
|
|
513694 |
movdqa (%rdi, %rdx), %xmm0
|
|
|
513694 |
- palignr $14, -16(%rdi, %rdx), D(%xmm0)
|
|
|
513694 |
+ palignr $14, -16(%rdi, %rdx), %xmm0
|
|
|
513694 |
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
|
|
|
513694 |
pcmpistri $0x1a, (%rsi,%rdx), %xmm0
|
|
|
513694 |
#else
|
|
|
513694 |
@@ -1570,7 +1540,7 @@ LABEL(nibble_ashr_14_restart_use):
|
|
|
513694 |
jg LABEL(nibble_ashr_14_use)
|
|
|
513694 |
|
|
|
513694 |
movdqa (%rdi, %rdx), %xmm0
|
|
|
513694 |
- palignr $14, -16(%rdi, %rdx), D(%xmm0)
|
|
|
513694 |
+ palignr $14, -16(%rdi, %rdx), %xmm0
|
|
|
513694 |
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
|
|
|
513694 |
pcmpistri $0x1a, (%rsi,%rdx), %xmm0
|
|
|
513694 |
#else
|
|
|
513694 |
@@ -1590,7 +1560,7 @@ LABEL(nibble_ashr_14_restart_use):
|
|
|
513694 |
LABEL(nibble_ashr_14_use):
|
|
|
513694 |
sub $0x1000, %r10
|
|
|
513694 |
movdqa -16(%rdi, %rdx), %xmm0
|
|
|
513694 |
- psrldq $14, D(%xmm0)
|
|
|
513694 |
+ psrldq $14, %xmm0
|
|
|
513694 |
pcmpistri $0x3a,%xmm0, %xmm0
|
|
|
513694 |
#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
|
|
|
513694 |
cmp %r11, %rcx
|
|
|
513694 |
@@ -1608,10 +1578,10 @@ LABEL(nibble_ashr_14_use):
|
|
|
513694 |
*/
|
|
|
513694 |
.p2align 4
|
|
|
513694 |
LABEL(ashr_15):
|
|
|
513694 |
- pslldq $1, D(%xmm2)
|
|
|
513694 |
+ pslldq $1, %xmm2
|
|
|
513694 |
TOLOWER (%xmm1, %xmm2)
|
|
|
513694 |
- pcmpeqb %xmm1, D(%xmm2)
|
|
|
513694 |
- psubb %xmm0, D(%xmm2)
|
|
|
513694 |
+ pcmpeqb %xmm1, %xmm2
|
|
|
513694 |
+ psubb %xmm0, %xmm2
|
|
|
513694 |
pmovmskb %xmm2, %r9d
|
|
|
513694 |
shr %cl, %edx
|
|
|
513694 |
shr %cl, %r9d
|
|
|
513694 |
@@ -1643,7 +1613,7 @@ LABEL(loop_ashr_15_use):
|
|
|
513694 |
|
|
|
513694 |
LABEL(nibble_ashr_15_restart_use):
|
|
|
513694 |
movdqa (%rdi, %rdx), %xmm0
|
|
|
513694 |
- palignr $15, -16(%rdi, %rdx), D(%xmm0)
|
|
|
513694 |
+ palignr $15, -16(%rdi, %rdx), %xmm0
|
|
|
513694 |
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
|
|
|
513694 |
pcmpistri $0x1a, (%rsi,%rdx), %xmm0
|
|
|
513694 |
#else
|
|
|
513694 |
@@ -1662,7 +1632,7 @@ LABEL(nibble_ashr_15_restart_use):
|
|
|
513694 |
jg LABEL(nibble_ashr_15_use)
|
|
|
513694 |
|
|
|
513694 |
movdqa (%rdi, %rdx), %xmm0
|
|
|
513694 |
- palignr $15, -16(%rdi, %rdx), D(%xmm0)
|
|
|
513694 |
+ palignr $15, -16(%rdi, %rdx), %xmm0
|
|
|
513694 |
#if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
|
|
|
513694 |
pcmpistri $0x1a, (%rsi,%rdx), %xmm0
|
|
|
513694 |
#else
|
|
|
513694 |
@@ -1682,7 +1652,7 @@ LABEL(nibble_ashr_15_restart_use):
|
|
|
513694 |
LABEL(nibble_ashr_15_use):
|
|
|
513694 |
sub $0x1000, %r10
|
|
|
513694 |
movdqa -16(%rdi, %rdx), %xmm0
|
|
|
513694 |
- psrldq $15, D(%xmm0)
|
|
|
513694 |
+ psrldq $15, %xmm0
|
|
|
513694 |
pcmpistri $0x3a,%xmm0, %xmm0
|
|
|
513694 |
#if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
|
|
|
513694 |
cmp %r11, %rcx
|
|
|
513694 |
diff --git a/sysdeps/x86_64/multiarch/strncase_l-avx.S b/sysdeps/x86_64/multiarch/strncase_l-avx.S
|
|
|
513694 |
deleted file mode 100644
|
|
|
513694 |
index 0c4e525b..00000000
|
|
|
513694 |
--- a/sysdeps/x86_64/multiarch/strncase_l-avx.S
|
|
|
513694 |
+++ /dev/null
|
|
|
513694 |
@@ -1,22 +0,0 @@
|
|
|
513694 |
-/* strncasecmp_l optimized with AVX.
|
|
|
513694 |
- Copyright (C) 2017-2018 Free Software Foundation, Inc.
|
|
|
513694 |
- This file is part of the GNU C Library.
|
|
|
513694 |
-
|
|
|
513694 |
- The GNU C Library is free software; you can redistribute it and/or
|
|
|
513694 |
- modify it under the terms of the GNU Lesser General Public
|
|
|
513694 |
- License as published by the Free Software Foundation; either
|
|
|
513694 |
- version 2.1 of the License, or (at your option) any later version.
|
|
|
513694 |
-
|
|
|
513694 |
- The GNU C Library is distributed in the hope that it will be useful,
|
|
|
513694 |
- but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
513694 |
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
513694 |
- Lesser General Public License for more details.
|
|
|
513694 |
-
|
|
|
513694 |
- You should have received a copy of the GNU Lesser General Public
|
|
|
513694 |
- License along with the GNU C Library; if not, see
|
|
|
513694 |
- <http://www.gnu.org/licenses/>. */
|
|
|
513694 |
-
|
|
|
513694 |
-#define STRCMP_SSE42 __strncasecmp_l_avx
|
|
|
513694 |
-#define USE_AVX 1
|
|
|
513694 |
-#define USE_AS_STRNCASECMP_L
|
|
|
513694 |
-#include "strcmp-sse42.S"
|
|
|
513694 |
--
|
|
|
513694 |
GitLab
|
|
|
513694 |
|