08c3a6
commit 80883f43545f4f9afcb26beef9358dfdcd021bd6
08c3a6
Author: Noah Goldstein <goldstein.w.n@gmail.com>
08c3a6
Date:   Wed Mar 23 16:57:46 2022 -0500
08c3a6
08c3a6
    x86: Remove AVX str{n}casecmp
08c3a6
    
08c3a6
    The rational is:
08c3a6
    
08c3a6
    1. SSE42 has nearly identical logic so any benefit is minimal (3.4%
08c3a6
       regression on Tigerlake using SSE42 versus AVX across the
08c3a6
       benchtest suite).
08c3a6
    2. AVX2 version covers the majority of targets that previously
08c3a6
       prefered it.
08c3a6
    3. The targets where AVX would still be best (SnB and IVB) are
08c3a6
       becoming outdated.
08c3a6
    
08c3a6
    All in all the saving the code size is worth it.
08c3a6
    
08c3a6
    All string/memory tests pass.
08c3a6
    Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
08c3a6
    
08c3a6
    (cherry picked from commit 305769b2a15c2e96f9e1b5195d3c4e0d6f0f4b68)
08c3a6
08c3a6
diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile
08c3a6
index 359712c1491a2431..bca82e38d86cc440 100644
08c3a6
--- a/sysdeps/x86_64/multiarch/Makefile
08c3a6
+++ b/sysdeps/x86_64/multiarch/Makefile
08c3a6
@@ -50,7 +50,6 @@ sysdep_routines += \
08c3a6
   stpncpy-evex \
08c3a6
   stpncpy-sse2-unaligned \
08c3a6
   stpncpy-ssse3 \
08c3a6
-  strcasecmp_l-avx \
08c3a6
   strcasecmp_l-avx2 \
08c3a6
   strcasecmp_l-avx2-rtm \
08c3a6
   strcasecmp_l-evex \
08c3a6
@@ -91,7 +90,6 @@ sysdep_routines += \
08c3a6
   strlen-avx2-rtm \
08c3a6
   strlen-evex \
08c3a6
   strlen-sse2 \
08c3a6
-  strncase_l-avx \
08c3a6
   strncase_l-avx2 \
08c3a6
   strncase_l-avx2-rtm \
08c3a6
   strncase_l-evex \
08c3a6
diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
08c3a6
index f6994e5406933d53..4c7834dd0b951fa4 100644
08c3a6
--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
08c3a6
+++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
08c3a6
@@ -429,9 +429,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
08c3a6
 			      (CPU_FEATURE_USABLE (AVX2)
08c3a6
 			       && CPU_FEATURE_USABLE (RTM)),
08c3a6
 			      __strcasecmp_avx2_rtm)
08c3a6
-	      IFUNC_IMPL_ADD (array, i, strcasecmp,
08c3a6
-			      CPU_FEATURE_USABLE (AVX),
08c3a6
-			      __strcasecmp_avx)
08c3a6
 	      IFUNC_IMPL_ADD (array, i, strcasecmp,
08c3a6
 			      CPU_FEATURE_USABLE (SSE4_2),
08c3a6
 			      __strcasecmp_sse42)
08c3a6
@@ -453,9 +450,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
08c3a6
 			      (CPU_FEATURE_USABLE (AVX2)
08c3a6
 			       && CPU_FEATURE_USABLE (RTM)),
08c3a6
 			      __strcasecmp_l_avx2_rtm)
08c3a6
-	      IFUNC_IMPL_ADD (array, i, strcasecmp_l,
08c3a6
-			      CPU_FEATURE_USABLE (AVX),
08c3a6
-			      __strcasecmp_l_avx)
08c3a6
 	      IFUNC_IMPL_ADD (array, i, strcasecmp_l,
08c3a6
 			      CPU_FEATURE_USABLE (SSE4_2),
08c3a6
 			      __strcasecmp_l_sse42)
08c3a6
@@ -591,9 +585,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
08c3a6
 			      (CPU_FEATURE_USABLE (AVX2)
08c3a6
 			       && CPU_FEATURE_USABLE (RTM)),
08c3a6
 			      __strncasecmp_avx2_rtm)
08c3a6
-	      IFUNC_IMPL_ADD (array, i, strncasecmp,
08c3a6
-			      CPU_FEATURE_USABLE (AVX),
08c3a6
-			      __strncasecmp_avx)
08c3a6
 	      IFUNC_IMPL_ADD (array, i, strncasecmp,
08c3a6
 			      CPU_FEATURE_USABLE (SSE4_2),
08c3a6
 			      __strncasecmp_sse42)
08c3a6
@@ -616,9 +607,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
08c3a6
 			      (CPU_FEATURE_USABLE (AVX2)
08c3a6
 			       && CPU_FEATURE_USABLE (RTM)),
08c3a6
 			      __strncasecmp_l_avx2_rtm)
08c3a6
-	      IFUNC_IMPL_ADD (array, i, strncasecmp_l,
08c3a6
-			      CPU_FEATURE_USABLE (AVX),
08c3a6
-			      __strncasecmp_l_avx)
08c3a6
 	      IFUNC_IMPL_ADD (array, i, strncasecmp_l,
08c3a6
 			      CPU_FEATURE_USABLE (SSE4_2),
08c3a6
 			      __strncasecmp_l_sse42)
08c3a6
diff --git a/sysdeps/x86_64/multiarch/ifunc-strcasecmp.h b/sysdeps/x86_64/multiarch/ifunc-strcasecmp.h
08c3a6
index 488e99e4997f379b..40819caf5ab10337 100644
08c3a6
--- a/sysdeps/x86_64/multiarch/ifunc-strcasecmp.h
08c3a6
+++ b/sysdeps/x86_64/multiarch/ifunc-strcasecmp.h
08c3a6
@@ -22,7 +22,6 @@
08c3a6
 extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
08c3a6
 extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3) attribute_hidden;
08c3a6
 extern __typeof (REDIRECT_NAME) OPTIMIZE (sse42) attribute_hidden;
08c3a6
-extern __typeof (REDIRECT_NAME) OPTIMIZE (avx) attribute_hidden;
08c3a6
 extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
08c3a6
 extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden;
08c3a6
 extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden;
08c3a6
@@ -46,9 +45,6 @@ IFUNC_SELECTOR (void)
08c3a6
         return OPTIMIZE (avx2);
08c3a6
     }
08c3a6
 
08c3a6
-  if (CPU_FEATURE_USABLE_P (cpu_features, AVX))
08c3a6
-    return OPTIMIZE (avx);
08c3a6
-
08c3a6
   if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_2)
08c3a6
       && !CPU_FEATURES_ARCH_P (cpu_features, Slow_SSE4_2))
08c3a6
     return OPTIMIZE (sse42);
08c3a6
diff --git a/sysdeps/x86_64/multiarch/strcasecmp_l-avx.S b/sysdeps/x86_64/multiarch/strcasecmp_l-avx.S
08c3a6
deleted file mode 100644
08c3a6
index 647aa05714d7a36c..0000000000000000
08c3a6
--- a/sysdeps/x86_64/multiarch/strcasecmp_l-avx.S
08c3a6
+++ /dev/null
08c3a6
@@ -1,22 +0,0 @@
08c3a6
-/* strcasecmp_l optimized with AVX.
08c3a6
-   Copyright (C) 2017-2021 Free Software Foundation, Inc.
08c3a6
-   This file is part of the GNU C Library.
08c3a6
-
08c3a6
-   The GNU C Library is free software; you can redistribute it and/or
08c3a6
-   modify it under the terms of the GNU Lesser General Public
08c3a6
-   License as published by the Free Software Foundation; either
08c3a6
-   version 2.1 of the License, or (at your option) any later version.
08c3a6
-
08c3a6
-   The GNU C Library is distributed in the hope that it will be useful,
08c3a6
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
08c3a6
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
08c3a6
-   Lesser General Public License for more details.
08c3a6
-
08c3a6
-   You should have received a copy of the GNU Lesser General Public
08c3a6
-   License along with the GNU C Library; if not, see
08c3a6
-   <https://www.gnu.org/licenses/>.  */
08c3a6
-
08c3a6
-#define STRCMP_SSE42 __strcasecmp_l_avx
08c3a6
-#define USE_AVX 1
08c3a6
-#define USE_AS_STRCASECMP_L
08c3a6
-#include "strcmp-sse42.S"
08c3a6
diff --git a/sysdeps/x86_64/multiarch/strcmp-sse42.S b/sysdeps/x86_64/multiarch/strcmp-sse42.S
08c3a6
index a6825de8195ad8c6..466c6a92a612ebcb 100644
08c3a6
--- a/sysdeps/x86_64/multiarch/strcmp-sse42.S
08c3a6
+++ b/sysdeps/x86_64/multiarch/strcmp-sse42.S
08c3a6
@@ -42,13 +42,8 @@
08c3a6
 # define UPDATE_STRNCMP_COUNTER
08c3a6
 #endif
08c3a6
 
08c3a6
-#ifdef USE_AVX
08c3a6
-# define SECTION	avx
08c3a6
-# define GLABEL(l)	l##_avx
08c3a6
-#else
08c3a6
-# define SECTION	sse4.2
08c3a6
-# define GLABEL(l)	l##_sse42
08c3a6
-#endif
08c3a6
+#define SECTION	sse4.2
08c3a6
+#define GLABEL(l)	l##_sse42
08c3a6
 
08c3a6
 #define LABEL(l)	.L##l
08c3a6
 
08c3a6
@@ -106,21 +101,7 @@ END (GLABEL(__strncasecmp))
08c3a6
 #endif
08c3a6
 
08c3a6
 
08c3a6
-#ifdef USE_AVX
08c3a6
-# define movdqa vmovdqa
08c3a6
-# define movdqu vmovdqu
08c3a6
-# define pmovmskb vpmovmskb
08c3a6
-# define pcmpistri vpcmpistri
08c3a6
-# define psubb vpsubb
08c3a6
-# define pcmpeqb vpcmpeqb
08c3a6
-# define psrldq vpsrldq
08c3a6
-# define pslldq vpslldq
08c3a6
-# define palignr vpalignr
08c3a6
-# define pxor vpxor
08c3a6
-# define D(arg) arg, arg
08c3a6
-#else
08c3a6
-# define D(arg) arg
08c3a6
-#endif
08c3a6
+#define arg arg
08c3a6
 
08c3a6
 STRCMP_SSE42:
08c3a6
 	cfi_startproc
08c3a6
@@ -192,18 +173,7 @@ LABEL(case_add):
08c3a6
 	movdqu	(%rdi), %xmm1
08c3a6
 	movdqu	(%rsi), %xmm2
08c3a6
 #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
08c3a6
-# ifdef USE_AVX
08c3a6
-#  define TOLOWER(reg1, reg2) \
08c3a6
-	vpaddb	LCASE_MIN_reg, reg1, %xmm7;					\
08c3a6
-	vpaddb	LCASE_MIN_reg, reg2, %xmm8;					\
08c3a6
-	vpcmpgtb LCASE_MAX_reg, %xmm7, %xmm7;					\
08c3a6
-	vpcmpgtb LCASE_MAX_reg, %xmm8, %xmm8;					\
08c3a6
-	vpandn	CASE_ADD_reg, %xmm7, %xmm7;					\
08c3a6
-	vpandn	CASE_ADD_reg, %xmm8, %xmm8;					\
08c3a6
-	vpaddb	%xmm7, reg1, reg1;					\
08c3a6
-	vpaddb	%xmm8, reg2, reg2
08c3a6
-# else
08c3a6
-#  define TOLOWER(reg1, reg2) \
08c3a6
+# define TOLOWER(reg1, reg2) \
08c3a6
 	movdqa	LCASE_MIN_reg, %xmm7;					\
08c3a6
 	movdqa	LCASE_MIN_reg, %xmm8;					\
08c3a6
 	paddb	reg1, %xmm7;					\
08c3a6
@@ -214,15 +184,15 @@ LABEL(case_add):
08c3a6
 	pandn	CASE_ADD_reg, %xmm8;					\
08c3a6
 	paddb	%xmm7, reg1;					\
08c3a6
 	paddb	%xmm8, reg2
08c3a6
-# endif
08c3a6
+
08c3a6
 	TOLOWER (%xmm1, %xmm2)
08c3a6
 #else
08c3a6
 # define TOLOWER(reg1, reg2)
08c3a6
 #endif
08c3a6
-	pxor	%xmm0, D(%xmm0)		/* clear %xmm0 for null char checks */
08c3a6
-	pcmpeqb	%xmm1, D(%xmm0)		/* Any null chars? */
08c3a6
-	pcmpeqb	%xmm2, D(%xmm1)		/* compare first 16 bytes for equality */
08c3a6
-	psubb	%xmm0, D(%xmm1)		/* packed sub of comparison results*/
08c3a6
+	pxor	%xmm0, %xmm0		/* clear %xmm0 for null char checks */
08c3a6
+	pcmpeqb	%xmm1, %xmm0		/* Any null chars? */
08c3a6
+	pcmpeqb	%xmm2, %xmm1		/* compare first 16 bytes for equality */
08c3a6
+	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
08c3a6
 	pmovmskb %xmm1, %edx
08c3a6
 	sub	$0xffff, %edx		/* if first 16 bytes are same, edx == 0xffff */
08c3a6
 	jnz	LABEL(less16bytes)/* If not, find different value or null char */
08c3a6
@@ -246,7 +216,7 @@ LABEL(crosscache):
08c3a6
 	xor	%r8d, %r8d
08c3a6
 	and	$0xf, %ecx		/* offset of rsi */
08c3a6
 	and	$0xf, %eax		/* offset of rdi */
08c3a6
-	pxor	%xmm0, D(%xmm0)		/* clear %xmm0 for null char check */
08c3a6
+	pxor	%xmm0, %xmm0		/* clear %xmm0 for null char check */
08c3a6
 	cmp	%eax, %ecx
08c3a6
 	je	LABEL(ashr_0)		/* rsi and rdi relative offset same */
08c3a6
 	ja	LABEL(bigger)
08c3a6
@@ -260,7 +230,7 @@ LABEL(bigger):
08c3a6
 	sub	%rcx, %r9
08c3a6
 	lea	LABEL(unaligned_table)(%rip), %r10
08c3a6
 	movslq	(%r10, %r9,4), %r9
08c3a6
-	pcmpeqb	%xmm1, D(%xmm0)		/* Any null chars? */
08c3a6
+	pcmpeqb	%xmm1, %xmm0		/* Any null chars? */
08c3a6
 	lea	(%r10, %r9), %r10
08c3a6
 	_CET_NOTRACK jmp *%r10		/* jump to corresponding case */
08c3a6
 
08c3a6
@@ -273,15 +243,15 @@ LABEL(bigger):
08c3a6
 LABEL(ashr_0):
08c3a6
 
08c3a6
 	movdqa	(%rsi), %xmm1
08c3a6
-	pcmpeqb	%xmm1, D(%xmm0)		/* Any null chars? */
08c3a6
+	pcmpeqb	%xmm1, %xmm0		/* Any null chars? */
08c3a6
 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
08c3a6
-	pcmpeqb	(%rdi), D(%xmm1)	/* compare 16 bytes for equality */
08c3a6
+	pcmpeqb	(%rdi), %xmm1		/* compare 16 bytes for equality */
08c3a6
 #else
08c3a6
 	movdqa	(%rdi), %xmm2
08c3a6
 	TOLOWER (%xmm1, %xmm2)
08c3a6
-	pcmpeqb	%xmm2, D(%xmm1)		/* compare 16 bytes for equality */
08c3a6
+	pcmpeqb	%xmm2, %xmm1		/* compare 16 bytes for equality */
08c3a6
 #endif
08c3a6
-	psubb	%xmm0, D(%xmm1)		/* packed sub of comparison results*/
08c3a6
+	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
08c3a6
 	pmovmskb %xmm1, %r9d
08c3a6
 	shr	%cl, %edx		/* adjust 0xffff for offset */
08c3a6
 	shr	%cl, %r9d		/* adjust for 16-byte offset */
08c3a6
@@ -361,10 +331,10 @@ LABEL(ashr_0_exit_use):
08c3a6
  */
08c3a6
 	.p2align 4
08c3a6
 LABEL(ashr_1):
08c3a6
-	pslldq	$15, D(%xmm2)		/* shift first string to align with second */
08c3a6
+	pslldq	$15, %xmm2		/* shift first string to align with second */
08c3a6
 	TOLOWER (%xmm1, %xmm2)
08c3a6
-	pcmpeqb	%xmm1, D(%xmm2)		/* compare 16 bytes for equality */
08c3a6
-	psubb	%xmm0, D(%xmm2)		/* packed sub of comparison results*/
08c3a6
+	pcmpeqb	%xmm1, %xmm2		/* compare 16 bytes for equality */
08c3a6
+	psubb	%xmm0, %xmm2		/* packed sub of comparison results*/
08c3a6
 	pmovmskb %xmm2, %r9d
08c3a6
 	shr	%cl, %edx		/* adjust 0xffff for offset */
08c3a6
 	shr	%cl, %r9d		/* adjust for 16-byte offset */
08c3a6
@@ -392,7 +362,7 @@ LABEL(loop_ashr_1_use):
08c3a6
 
08c3a6
 LABEL(nibble_ashr_1_restart_use):
08c3a6
 	movdqa	(%rdi, %rdx), %xmm0
08c3a6
-	palignr $1, -16(%rdi, %rdx), D(%xmm0)
08c3a6
+	palignr $1, -16(%rdi, %rdx), %xmm0
08c3a6
 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
08c3a6
 	pcmpistri	$0x1a,(%rsi,%rdx), %xmm0
08c3a6
 #else
08c3a6
@@ -411,7 +381,7 @@ LABEL(nibble_ashr_1_restart_use):
08c3a6
 	jg	LABEL(nibble_ashr_1_use)
08c3a6
 
08c3a6
 	movdqa	(%rdi, %rdx), %xmm0
08c3a6
-	palignr $1, -16(%rdi, %rdx), D(%xmm0)
08c3a6
+	palignr $1, -16(%rdi, %rdx), %xmm0
08c3a6
 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
08c3a6
 	pcmpistri	$0x1a,(%rsi,%rdx), %xmm0
08c3a6
 #else
08c3a6
@@ -431,7 +401,7 @@ LABEL(nibble_ashr_1_restart_use):
08c3a6
 LABEL(nibble_ashr_1_use):
08c3a6
 	sub	$0x1000, %r10
08c3a6
 	movdqa	-16(%rdi, %rdx), %xmm0
08c3a6
-	psrldq	$1, D(%xmm0)
08c3a6
+	psrldq	$1, %xmm0
08c3a6
 	pcmpistri      $0x3a,%xmm0, %xmm0
08c3a6
 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
08c3a6
 	cmp	%r11, %rcx
08c3a6
@@ -449,10 +419,10 @@ LABEL(nibble_ashr_1_use):
08c3a6
  */
08c3a6
 	.p2align 4
08c3a6
 LABEL(ashr_2):
08c3a6
-	pslldq	$14, D(%xmm2)
08c3a6
+	pslldq	$14, %xmm2
08c3a6
 	TOLOWER (%xmm1, %xmm2)
08c3a6
-	pcmpeqb	%xmm1, D(%xmm2)
08c3a6
-	psubb	%xmm0, D(%xmm2)
08c3a6
+	pcmpeqb	%xmm1, %xmm2
08c3a6
+	psubb	%xmm0, %xmm2
08c3a6
 	pmovmskb %xmm2, %r9d
08c3a6
 	shr	%cl, %edx
08c3a6
 	shr	%cl, %r9d
08c3a6
@@ -480,7 +450,7 @@ LABEL(loop_ashr_2_use):
08c3a6
 
08c3a6
 LABEL(nibble_ashr_2_restart_use):
08c3a6
 	movdqa	(%rdi, %rdx), %xmm0
08c3a6
-	palignr $2, -16(%rdi, %rdx), D(%xmm0)
08c3a6
+	palignr $2, -16(%rdi, %rdx), %xmm0
08c3a6
 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
08c3a6
 	pcmpistri	$0x1a,(%rsi,%rdx), %xmm0
08c3a6
 #else
08c3a6
@@ -499,7 +469,7 @@ LABEL(nibble_ashr_2_restart_use):
08c3a6
 	jg	LABEL(nibble_ashr_2_use)
08c3a6
 
08c3a6
 	movdqa	(%rdi, %rdx), %xmm0
08c3a6
-	palignr $2, -16(%rdi, %rdx), D(%xmm0)
08c3a6
+	palignr $2, -16(%rdi, %rdx), %xmm0
08c3a6
 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
08c3a6
 	pcmpistri	$0x1a,(%rsi,%rdx), %xmm0
08c3a6
 #else
08c3a6
@@ -519,7 +489,7 @@ LABEL(nibble_ashr_2_restart_use):
08c3a6
 LABEL(nibble_ashr_2_use):
08c3a6
 	sub	$0x1000, %r10
08c3a6
 	movdqa	-16(%rdi, %rdx), %xmm0
08c3a6
-	psrldq	$2, D(%xmm0)
08c3a6
+	psrldq	$2, %xmm0
08c3a6
 	pcmpistri      $0x3a,%xmm0, %xmm0
08c3a6
 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
08c3a6
 	cmp	%r11, %rcx
08c3a6
@@ -537,10 +507,10 @@ LABEL(nibble_ashr_2_use):
08c3a6
  */
08c3a6
 	.p2align 4
08c3a6
 LABEL(ashr_3):
08c3a6
-	pslldq	$13, D(%xmm2)
08c3a6
+	pslldq	$13, %xmm2
08c3a6
 	TOLOWER (%xmm1, %xmm2)
08c3a6
-	pcmpeqb	%xmm1, D(%xmm2)
08c3a6
-	psubb	%xmm0, D(%xmm2)
08c3a6
+	pcmpeqb	%xmm1, %xmm2
08c3a6
+	psubb	%xmm0, %xmm2
08c3a6
 	pmovmskb %xmm2, %r9d
08c3a6
 	shr	%cl, %edx
08c3a6
 	shr	%cl, %r9d
08c3a6
@@ -568,7 +538,7 @@ LABEL(loop_ashr_3_use):
08c3a6
 
08c3a6
 LABEL(nibble_ashr_3_restart_use):
08c3a6
 	movdqa	(%rdi, %rdx), %xmm0
08c3a6
-	palignr $3, -16(%rdi, %rdx), D(%xmm0)
08c3a6
+	palignr $3, -16(%rdi, %rdx), %xmm0
08c3a6
 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
08c3a6
 	pcmpistri	$0x1a,(%rsi,%rdx), %xmm0
08c3a6
 #else
08c3a6
@@ -587,7 +557,7 @@ LABEL(nibble_ashr_3_restart_use):
08c3a6
 	jg	LABEL(nibble_ashr_3_use)
08c3a6
 
08c3a6
 	movdqa	(%rdi, %rdx), %xmm0
08c3a6
-	palignr $3, -16(%rdi, %rdx), D(%xmm0)
08c3a6
+	palignr $3, -16(%rdi, %rdx), %xmm0
08c3a6
 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
08c3a6
 	pcmpistri	$0x1a,(%rsi,%rdx), %xmm0
08c3a6
 #else
08c3a6
@@ -607,7 +577,7 @@ LABEL(nibble_ashr_3_restart_use):
08c3a6
 LABEL(nibble_ashr_3_use):
08c3a6
 	sub	$0x1000, %r10
08c3a6
 	movdqa	-16(%rdi, %rdx), %xmm0
08c3a6
-	psrldq	$3, D(%xmm0)
08c3a6
+	psrldq	$3, %xmm0
08c3a6
 	pcmpistri      $0x3a,%xmm0, %xmm0
08c3a6
 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
08c3a6
 	cmp	%r11, %rcx
08c3a6
@@ -625,10 +595,10 @@ LABEL(nibble_ashr_3_use):
08c3a6
  */
08c3a6
 	.p2align 4
08c3a6
 LABEL(ashr_4):
08c3a6
-	pslldq	$12, D(%xmm2)
08c3a6
+	pslldq	$12, %xmm2
08c3a6
 	TOLOWER (%xmm1, %xmm2)
08c3a6
-	pcmpeqb	%xmm1, D(%xmm2)
08c3a6
-	psubb	%xmm0, D(%xmm2)
08c3a6
+	pcmpeqb	%xmm1, %xmm2
08c3a6
+	psubb	%xmm0, %xmm2
08c3a6
 	pmovmskb %xmm2, %r9d
08c3a6
 	shr	%cl, %edx
08c3a6
 	shr	%cl, %r9d
08c3a6
@@ -657,7 +627,7 @@ LABEL(loop_ashr_4_use):
08c3a6
 
08c3a6
 LABEL(nibble_ashr_4_restart_use):
08c3a6
 	movdqa	(%rdi, %rdx), %xmm0
08c3a6
-	palignr $4, -16(%rdi, %rdx), D(%xmm0)
08c3a6
+	palignr $4, -16(%rdi, %rdx), %xmm0
08c3a6
 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
08c3a6
 	pcmpistri	$0x1a,(%rsi,%rdx), %xmm0
08c3a6
 #else
08c3a6
@@ -676,7 +646,7 @@ LABEL(nibble_ashr_4_restart_use):
08c3a6
 	jg	LABEL(nibble_ashr_4_use)
08c3a6
 
08c3a6
 	movdqa	(%rdi, %rdx), %xmm0
08c3a6
-	palignr $4, -16(%rdi, %rdx), D(%xmm0)
08c3a6
+	palignr $4, -16(%rdi, %rdx), %xmm0
08c3a6
 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
08c3a6
 	pcmpistri	$0x1a,(%rsi,%rdx), %xmm0
08c3a6
 #else
08c3a6
@@ -696,7 +666,7 @@ LABEL(nibble_ashr_4_restart_use):
08c3a6
 LABEL(nibble_ashr_4_use):
08c3a6
 	sub	$0x1000, %r10
08c3a6
 	movdqa	-16(%rdi, %rdx), %xmm0
08c3a6
-	psrldq	$4, D(%xmm0)
08c3a6
+	psrldq	$4, %xmm0
08c3a6
 	pcmpistri      $0x3a,%xmm0, %xmm0
08c3a6
 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
08c3a6
 	cmp	%r11, %rcx
08c3a6
@@ -714,10 +684,10 @@ LABEL(nibble_ashr_4_use):
08c3a6
  */
08c3a6
 	.p2align 4
08c3a6
 LABEL(ashr_5):
08c3a6
-	pslldq	$11, D(%xmm2)
08c3a6
+	pslldq	$11, %xmm2
08c3a6
 	TOLOWER (%xmm1, %xmm2)
08c3a6
-	pcmpeqb	%xmm1, D(%xmm2)
08c3a6
-	psubb	%xmm0, D(%xmm2)
08c3a6
+	pcmpeqb	%xmm1, %xmm2
08c3a6
+	psubb	%xmm0, %xmm2
08c3a6
 	pmovmskb %xmm2, %r9d
08c3a6
 	shr	%cl, %edx
08c3a6
 	shr	%cl, %r9d
08c3a6
@@ -746,7 +716,7 @@ LABEL(loop_ashr_5_use):
08c3a6
 
08c3a6
 LABEL(nibble_ashr_5_restart_use):
08c3a6
 	movdqa	(%rdi, %rdx), %xmm0
08c3a6
-	palignr $5, -16(%rdi, %rdx), D(%xmm0)
08c3a6
+	palignr $5, -16(%rdi, %rdx), %xmm0
08c3a6
 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
08c3a6
 	pcmpistri	$0x1a,(%rsi,%rdx), %xmm0
08c3a6
 #else
08c3a6
@@ -766,7 +736,7 @@ LABEL(nibble_ashr_5_restart_use):
08c3a6
 
08c3a6
 	movdqa	(%rdi, %rdx), %xmm0
08c3a6
 
08c3a6
-	palignr $5, -16(%rdi, %rdx), D(%xmm0)
08c3a6
+	palignr $5, -16(%rdi, %rdx), %xmm0
08c3a6
 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
08c3a6
 	pcmpistri	$0x1a,(%rsi,%rdx), %xmm0
08c3a6
 #else
08c3a6
@@ -786,7 +756,7 @@ LABEL(nibble_ashr_5_restart_use):
08c3a6
 LABEL(nibble_ashr_5_use):
08c3a6
 	sub	$0x1000, %r10
08c3a6
 	movdqa	-16(%rdi, %rdx), %xmm0
08c3a6
-	psrldq	$5, D(%xmm0)
08c3a6
+	psrldq	$5, %xmm0
08c3a6
 	pcmpistri      $0x3a,%xmm0, %xmm0
08c3a6
 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
08c3a6
 	cmp	%r11, %rcx
08c3a6
@@ -804,10 +774,10 @@ LABEL(nibble_ashr_5_use):
08c3a6
  */
08c3a6
 	.p2align 4
08c3a6
 LABEL(ashr_6):
08c3a6
-	pslldq	$10, D(%xmm2)
08c3a6
+	pslldq	$10, %xmm2
08c3a6
 	TOLOWER (%xmm1, %xmm2)
08c3a6
-	pcmpeqb	%xmm1, D(%xmm2)
08c3a6
-	psubb	%xmm0, D(%xmm2)
08c3a6
+	pcmpeqb	%xmm1, %xmm2
08c3a6
+	psubb	%xmm0, %xmm2
08c3a6
 	pmovmskb %xmm2, %r9d
08c3a6
 	shr	%cl, %edx
08c3a6
 	shr	%cl, %r9d
08c3a6
@@ -836,7 +806,7 @@ LABEL(loop_ashr_6_use):
08c3a6
 
08c3a6
 LABEL(nibble_ashr_6_restart_use):
08c3a6
 	movdqa	(%rdi, %rdx), %xmm0
08c3a6
-	palignr $6, -16(%rdi, %rdx), D(%xmm0)
08c3a6
+	palignr $6, -16(%rdi, %rdx), %xmm0
08c3a6
 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
08c3a6
 	pcmpistri $0x1a,(%rsi,%rdx), %xmm0
08c3a6
 #else
08c3a6
@@ -855,7 +825,7 @@ LABEL(nibble_ashr_6_restart_use):
08c3a6
 	jg	LABEL(nibble_ashr_6_use)
08c3a6
 
08c3a6
 	movdqa	(%rdi, %rdx), %xmm0
08c3a6
-	palignr $6, -16(%rdi, %rdx), D(%xmm0)
08c3a6
+	palignr $6, -16(%rdi, %rdx), %xmm0
08c3a6
 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
08c3a6
 	pcmpistri $0x1a,(%rsi,%rdx), %xmm0
08c3a6
 #else
08c3a6
@@ -875,7 +845,7 @@ LABEL(nibble_ashr_6_restart_use):
08c3a6
 LABEL(nibble_ashr_6_use):
08c3a6
 	sub	$0x1000, %r10
08c3a6
 	movdqa	-16(%rdi, %rdx), %xmm0
08c3a6
-	psrldq	$6, D(%xmm0)
08c3a6
+	psrldq	$6, %xmm0
08c3a6
 	pcmpistri      $0x3a,%xmm0, %xmm0
08c3a6
 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
08c3a6
 	cmp	%r11, %rcx
08c3a6
@@ -893,10 +863,10 @@ LABEL(nibble_ashr_6_use):
08c3a6
  */
08c3a6
 	.p2align 4
08c3a6
 LABEL(ashr_7):
08c3a6
-	pslldq	$9, D(%xmm2)
08c3a6
+	pslldq	$9, %xmm2
08c3a6
 	TOLOWER (%xmm1, %xmm2)
08c3a6
-	pcmpeqb	%xmm1, D(%xmm2)
08c3a6
-	psubb	%xmm0, D(%xmm2)
08c3a6
+	pcmpeqb	%xmm1, %xmm2
08c3a6
+	psubb	%xmm0, %xmm2
08c3a6
 	pmovmskb %xmm2, %r9d
08c3a6
 	shr	%cl, %edx
08c3a6
 	shr	%cl, %r9d
08c3a6
@@ -925,7 +895,7 @@ LABEL(loop_ashr_7_use):
08c3a6
 
08c3a6
 LABEL(nibble_ashr_7_restart_use):
08c3a6
 	movdqa	(%rdi, %rdx), %xmm0
08c3a6
-	palignr $7, -16(%rdi, %rdx), D(%xmm0)
08c3a6
+	palignr $7, -16(%rdi, %rdx), %xmm0
08c3a6
 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
08c3a6
 	pcmpistri	$0x1a,(%rsi,%rdx), %xmm0
08c3a6
 #else
08c3a6
@@ -944,7 +914,7 @@ LABEL(nibble_ashr_7_restart_use):
08c3a6
 	jg	LABEL(nibble_ashr_7_use)
08c3a6
 
08c3a6
 	movdqa	(%rdi, %rdx), %xmm0
08c3a6
-	palignr $7, -16(%rdi, %rdx), D(%xmm0)
08c3a6
+	palignr $7, -16(%rdi, %rdx), %xmm0
08c3a6
 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
08c3a6
 	pcmpistri	$0x1a,(%rsi,%rdx), %xmm0
08c3a6
 #else
08c3a6
@@ -964,7 +934,7 @@ LABEL(nibble_ashr_7_restart_use):
08c3a6
 LABEL(nibble_ashr_7_use):
08c3a6
 	sub	$0x1000, %r10
08c3a6
 	movdqa	-16(%rdi, %rdx), %xmm0
08c3a6
-	psrldq	$7, D(%xmm0)
08c3a6
+	psrldq	$7, %xmm0
08c3a6
 	pcmpistri      $0x3a,%xmm0, %xmm0
08c3a6
 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
08c3a6
 	cmp	%r11, %rcx
08c3a6
@@ -982,10 +952,10 @@ LABEL(nibble_ashr_7_use):
08c3a6
  */
08c3a6
 	.p2align 4
08c3a6
 LABEL(ashr_8):
08c3a6
-	pslldq	$8, D(%xmm2)
08c3a6
+	pslldq	$8, %xmm2
08c3a6
 	TOLOWER (%xmm1, %xmm2)
08c3a6
-	pcmpeqb	%xmm1, D(%xmm2)
08c3a6
-	psubb	%xmm0, D(%xmm2)
08c3a6
+	pcmpeqb	%xmm1, %xmm2
08c3a6
+	psubb	%xmm0, %xmm2
08c3a6
 	pmovmskb %xmm2, %r9d
08c3a6
 	shr	%cl, %edx
08c3a6
 	shr	%cl, %r9d
08c3a6
@@ -1014,7 +984,7 @@ LABEL(loop_ashr_8_use):
08c3a6
 
08c3a6
 LABEL(nibble_ashr_8_restart_use):
08c3a6
 	movdqa	(%rdi, %rdx), %xmm0
08c3a6
-	palignr $8, -16(%rdi, %rdx), D(%xmm0)
08c3a6
+	palignr $8, -16(%rdi, %rdx), %xmm0
08c3a6
 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
08c3a6
 	pcmpistri $0x1a, (%rsi,%rdx), %xmm0
08c3a6
 #else
08c3a6
@@ -1033,7 +1003,7 @@ LABEL(nibble_ashr_8_restart_use):
08c3a6
 	jg	LABEL(nibble_ashr_8_use)
08c3a6
 
08c3a6
 	movdqa	(%rdi, %rdx), %xmm0
08c3a6
-	palignr $8, -16(%rdi, %rdx), D(%xmm0)
08c3a6
+	palignr $8, -16(%rdi, %rdx), %xmm0
08c3a6
 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
08c3a6
 	pcmpistri $0x1a, (%rsi,%rdx), %xmm0
08c3a6
 #else
08c3a6
@@ -1053,7 +1023,7 @@ LABEL(nibble_ashr_8_restart_use):
08c3a6
 LABEL(nibble_ashr_8_use):
08c3a6
 	sub	$0x1000, %r10
08c3a6
 	movdqa	-16(%rdi, %rdx), %xmm0
08c3a6
-	psrldq	$8, D(%xmm0)
08c3a6
+	psrldq	$8, %xmm0
08c3a6
 	pcmpistri      $0x3a,%xmm0, %xmm0
08c3a6
 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
08c3a6
 	cmp	%r11, %rcx
08c3a6
@@ -1071,10 +1041,10 @@ LABEL(nibble_ashr_8_use):
08c3a6
  */
08c3a6
 	.p2align 4
08c3a6
 LABEL(ashr_9):
08c3a6
-	pslldq	$7, D(%xmm2)
08c3a6
+	pslldq	$7, %xmm2
08c3a6
 	TOLOWER (%xmm1, %xmm2)
08c3a6
-	pcmpeqb	%xmm1, D(%xmm2)
08c3a6
-	psubb	%xmm0, D(%xmm2)
08c3a6
+	pcmpeqb	%xmm1, %xmm2
08c3a6
+	psubb	%xmm0, %xmm2
08c3a6
 	pmovmskb %xmm2, %r9d
08c3a6
 	shr	%cl, %edx
08c3a6
 	shr	%cl, %r9d
08c3a6
@@ -1104,7 +1074,7 @@ LABEL(loop_ashr_9_use):
08c3a6
 LABEL(nibble_ashr_9_restart_use):
08c3a6
 	movdqa	(%rdi, %rdx), %xmm0
08c3a6
 
08c3a6
-	palignr $9, -16(%rdi, %rdx), D(%xmm0)
08c3a6
+	palignr $9, -16(%rdi, %rdx), %xmm0
08c3a6
 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
08c3a6
 	pcmpistri $0x1a, (%rsi,%rdx), %xmm0
08c3a6
 #else
08c3a6
@@ -1123,7 +1093,7 @@ LABEL(nibble_ashr_9_restart_use):
08c3a6
 	jg	LABEL(nibble_ashr_9_use)
08c3a6
 
08c3a6
 	movdqa	(%rdi, %rdx), %xmm0
08c3a6
-	palignr $9, -16(%rdi, %rdx), D(%xmm0)
08c3a6
+	palignr $9, -16(%rdi, %rdx), %xmm0
08c3a6
 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
08c3a6
 	pcmpistri $0x1a, (%rsi,%rdx), %xmm0
08c3a6
 #else
08c3a6
@@ -1143,7 +1113,7 @@ LABEL(nibble_ashr_9_restart_use):
08c3a6
 LABEL(nibble_ashr_9_use):
08c3a6
 	sub	$0x1000, %r10
08c3a6
 	movdqa	-16(%rdi, %rdx), %xmm0
08c3a6
-	psrldq	$9, D(%xmm0)
08c3a6
+	psrldq	$9, %xmm0
08c3a6
 	pcmpistri      $0x3a,%xmm0, %xmm0
08c3a6
 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
08c3a6
 	cmp	%r11, %rcx
08c3a6
@@ -1161,10 +1131,10 @@ LABEL(nibble_ashr_9_use):
08c3a6
  */
08c3a6
 	.p2align 4
08c3a6
 LABEL(ashr_10):
08c3a6
-	pslldq	$6, D(%xmm2)
08c3a6
+	pslldq	$6, %xmm2
08c3a6
 	TOLOWER (%xmm1, %xmm2)
08c3a6
-	pcmpeqb	%xmm1, D(%xmm2)
08c3a6
-	psubb	%xmm0, D(%xmm2)
08c3a6
+	pcmpeqb	%xmm1, %xmm2
08c3a6
+	psubb	%xmm0, %xmm2
08c3a6
 	pmovmskb %xmm2, %r9d
08c3a6
 	shr	%cl, %edx
08c3a6
 	shr	%cl, %r9d
08c3a6
@@ -1193,7 +1163,7 @@ LABEL(loop_ashr_10_use):
08c3a6
 
08c3a6
 LABEL(nibble_ashr_10_restart_use):
08c3a6
 	movdqa	(%rdi, %rdx), %xmm0
08c3a6
-	palignr $10, -16(%rdi, %rdx), D(%xmm0)
08c3a6
+	palignr $10, -16(%rdi, %rdx), %xmm0
08c3a6
 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
08c3a6
 	pcmpistri $0x1a, (%rsi,%rdx), %xmm0
08c3a6
 #else
08c3a6
@@ -1212,7 +1182,7 @@ LABEL(nibble_ashr_10_restart_use):
08c3a6
 	jg	LABEL(nibble_ashr_10_use)
08c3a6
 
08c3a6
 	movdqa	(%rdi, %rdx), %xmm0
08c3a6
-	palignr $10, -16(%rdi, %rdx), D(%xmm0)
08c3a6
+	palignr $10, -16(%rdi, %rdx), %xmm0
08c3a6
 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
08c3a6
 	pcmpistri $0x1a, (%rsi,%rdx), %xmm0
08c3a6
 #else
08c3a6
@@ -1232,7 +1202,7 @@ LABEL(nibble_ashr_10_restart_use):
08c3a6
 LABEL(nibble_ashr_10_use):
08c3a6
 	sub	$0x1000, %r10
08c3a6
 	movdqa	-16(%rdi, %rdx), %xmm0
08c3a6
-	psrldq	$10, D(%xmm0)
08c3a6
+	psrldq	$10, %xmm0
08c3a6
 	pcmpistri      $0x3a,%xmm0, %xmm0
08c3a6
 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
08c3a6
 	cmp	%r11, %rcx
08c3a6
@@ -1250,10 +1220,10 @@ LABEL(nibble_ashr_10_use):
08c3a6
  */
08c3a6
 	.p2align 4
08c3a6
 LABEL(ashr_11):
08c3a6
-	pslldq	$5, D(%xmm2)
08c3a6
+	pslldq	$5, %xmm2
08c3a6
 	TOLOWER (%xmm1, %xmm2)
08c3a6
-	pcmpeqb	%xmm1, D(%xmm2)
08c3a6
-	psubb	%xmm0, D(%xmm2)
08c3a6
+	pcmpeqb	%xmm1, %xmm2
08c3a6
+	psubb	%xmm0, %xmm2
08c3a6
 	pmovmskb %xmm2, %r9d
08c3a6
 	shr	%cl, %edx
08c3a6
 	shr	%cl, %r9d
08c3a6
@@ -1282,7 +1252,7 @@ LABEL(loop_ashr_11_use):
08c3a6
 
08c3a6
 LABEL(nibble_ashr_11_restart_use):
08c3a6
 	movdqa	(%rdi, %rdx), %xmm0
08c3a6
-	palignr $11, -16(%rdi, %rdx), D(%xmm0)
08c3a6
+	palignr $11, -16(%rdi, %rdx), %xmm0
08c3a6
 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
08c3a6
 	pcmpistri $0x1a, (%rsi,%rdx), %xmm0
08c3a6
 #else
08c3a6
@@ -1301,7 +1271,7 @@ LABEL(nibble_ashr_11_restart_use):
08c3a6
 	jg	LABEL(nibble_ashr_11_use)
08c3a6
 
08c3a6
 	movdqa	(%rdi, %rdx), %xmm0
08c3a6
-	palignr $11, -16(%rdi, %rdx), D(%xmm0)
08c3a6
+	palignr $11, -16(%rdi, %rdx), %xmm0
08c3a6
 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
08c3a6
 	pcmpistri $0x1a, (%rsi,%rdx), %xmm0
08c3a6
 #else
08c3a6
@@ -1321,7 +1291,7 @@ LABEL(nibble_ashr_11_restart_use):
08c3a6
 LABEL(nibble_ashr_11_use):
08c3a6
 	sub	$0x1000, %r10
08c3a6
 	movdqa	-16(%rdi, %rdx), %xmm0
08c3a6
-	psrldq	$11, D(%xmm0)
08c3a6
+	psrldq	$11, %xmm0
08c3a6
 	pcmpistri      $0x3a,%xmm0, %xmm0
08c3a6
 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
08c3a6
 	cmp	%r11, %rcx
08c3a6
@@ -1339,10 +1309,10 @@ LABEL(nibble_ashr_11_use):
08c3a6
  */
08c3a6
 	.p2align 4
08c3a6
 LABEL(ashr_12):
08c3a6
-	pslldq	$4, D(%xmm2)
08c3a6
+	pslldq	$4, %xmm2
08c3a6
 	TOLOWER (%xmm1, %xmm2)
08c3a6
-	pcmpeqb	%xmm1, D(%xmm2)
08c3a6
-	psubb	%xmm0, D(%xmm2)
08c3a6
+	pcmpeqb	%xmm1, %xmm2
08c3a6
+	psubb	%xmm0, %xmm2
08c3a6
 	pmovmskb %xmm2, %r9d
08c3a6
 	shr	%cl, %edx
08c3a6
 	shr	%cl, %r9d
08c3a6
@@ -1371,7 +1341,7 @@ LABEL(loop_ashr_12_use):
08c3a6
 
08c3a6
 LABEL(nibble_ashr_12_restart_use):
08c3a6
 	movdqa	(%rdi, %rdx), %xmm0
08c3a6
-	palignr $12, -16(%rdi, %rdx), D(%xmm0)
08c3a6
+	palignr $12, -16(%rdi, %rdx), %xmm0
08c3a6
 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
08c3a6
 	pcmpistri $0x1a, (%rsi,%rdx), %xmm0
08c3a6
 #else
08c3a6
@@ -1390,7 +1360,7 @@ LABEL(nibble_ashr_12_restart_use):
08c3a6
 	jg	LABEL(nibble_ashr_12_use)
08c3a6
 
08c3a6
 	movdqa	(%rdi, %rdx), %xmm0
08c3a6
-	palignr $12, -16(%rdi, %rdx), D(%xmm0)
08c3a6
+	palignr $12, -16(%rdi, %rdx), %xmm0
08c3a6
 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
08c3a6
 	pcmpistri $0x1a, (%rsi,%rdx), %xmm0
08c3a6
 #else
08c3a6
@@ -1410,7 +1380,7 @@ LABEL(nibble_ashr_12_restart_use):
08c3a6
 LABEL(nibble_ashr_12_use):
08c3a6
 	sub	$0x1000, %r10
08c3a6
 	movdqa	-16(%rdi, %rdx), %xmm0
08c3a6
-	psrldq	$12, D(%xmm0)
08c3a6
+	psrldq	$12, %xmm0
08c3a6
 	pcmpistri      $0x3a,%xmm0, %xmm0
08c3a6
 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
08c3a6
 	cmp	%r11, %rcx
08c3a6
@@ -1428,10 +1398,10 @@ LABEL(nibble_ashr_12_use):
08c3a6
  */
08c3a6
 	.p2align 4
08c3a6
 LABEL(ashr_13):
08c3a6
-	pslldq	$3, D(%xmm2)
08c3a6
+	pslldq	$3, %xmm2
08c3a6
 	TOLOWER (%xmm1, %xmm2)
08c3a6
-	pcmpeqb	%xmm1, D(%xmm2)
08c3a6
-	psubb	%xmm0, D(%xmm2)
08c3a6
+	pcmpeqb	%xmm1, %xmm2
08c3a6
+	psubb	%xmm0, %xmm2
08c3a6
 	pmovmskb %xmm2, %r9d
08c3a6
 	shr	%cl, %edx
08c3a6
 	shr	%cl, %r9d
08c3a6
@@ -1461,7 +1431,7 @@ LABEL(loop_ashr_13_use):
08c3a6
 
08c3a6
 LABEL(nibble_ashr_13_restart_use):
08c3a6
 	movdqa	(%rdi, %rdx), %xmm0
08c3a6
-	palignr $13, -16(%rdi, %rdx), D(%xmm0)
08c3a6
+	palignr $13, -16(%rdi, %rdx), %xmm0
08c3a6
 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
08c3a6
 	pcmpistri $0x1a, (%rsi,%rdx), %xmm0
08c3a6
 #else
08c3a6
@@ -1480,7 +1450,7 @@ LABEL(nibble_ashr_13_restart_use):
08c3a6
 	jg	LABEL(nibble_ashr_13_use)
08c3a6
 
08c3a6
 	movdqa	(%rdi, %rdx), %xmm0
08c3a6
-	palignr $13, -16(%rdi, %rdx), D(%xmm0)
08c3a6
+	palignr $13, -16(%rdi, %rdx), %xmm0
08c3a6
 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
08c3a6
 	pcmpistri $0x1a, (%rsi,%rdx), %xmm0
08c3a6
 #else
08c3a6
@@ -1500,7 +1470,7 @@ LABEL(nibble_ashr_13_restart_use):
08c3a6
 LABEL(nibble_ashr_13_use):
08c3a6
 	sub	$0x1000, %r10
08c3a6
 	movdqa	-16(%rdi, %rdx), %xmm0
08c3a6
-	psrldq	$13, D(%xmm0)
08c3a6
+	psrldq	$13, %xmm0
08c3a6
 	pcmpistri      $0x3a,%xmm0, %xmm0
08c3a6
 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
08c3a6
 	cmp	%r11, %rcx
08c3a6
@@ -1518,10 +1488,10 @@ LABEL(nibble_ashr_13_use):
08c3a6
  */
08c3a6
 	.p2align 4
08c3a6
 LABEL(ashr_14):
08c3a6
-	pslldq  $2, D(%xmm2)
08c3a6
+	pslldq  $2, %xmm2
08c3a6
 	TOLOWER (%xmm1, %xmm2)
08c3a6
-	pcmpeqb	%xmm1, D(%xmm2)
08c3a6
-	psubb	%xmm0, D(%xmm2)
08c3a6
+	pcmpeqb	%xmm1, %xmm2
08c3a6
+	psubb	%xmm0, %xmm2
08c3a6
 	pmovmskb %xmm2, %r9d
08c3a6
 	shr	%cl, %edx
08c3a6
 	shr	%cl, %r9d
08c3a6
@@ -1551,7 +1521,7 @@ LABEL(loop_ashr_14_use):
08c3a6
 
08c3a6
 LABEL(nibble_ashr_14_restart_use):
08c3a6
 	movdqa	(%rdi, %rdx), %xmm0
08c3a6
-	palignr $14, -16(%rdi, %rdx), D(%xmm0)
08c3a6
+	palignr $14, -16(%rdi, %rdx), %xmm0
08c3a6
 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
08c3a6
 	pcmpistri $0x1a, (%rsi,%rdx), %xmm0
08c3a6
 #else
08c3a6
@@ -1570,7 +1540,7 @@ LABEL(nibble_ashr_14_restart_use):
08c3a6
 	jg	LABEL(nibble_ashr_14_use)
08c3a6
 
08c3a6
 	movdqa	(%rdi, %rdx), %xmm0
08c3a6
-	palignr $14, -16(%rdi, %rdx), D(%xmm0)
08c3a6
+	palignr $14, -16(%rdi, %rdx), %xmm0
08c3a6
 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
08c3a6
 	pcmpistri $0x1a, (%rsi,%rdx), %xmm0
08c3a6
 #else
08c3a6
@@ -1590,7 +1560,7 @@ LABEL(nibble_ashr_14_restart_use):
08c3a6
 LABEL(nibble_ashr_14_use):
08c3a6
 	sub	$0x1000, %r10
08c3a6
 	movdqa	-16(%rdi, %rdx), %xmm0
08c3a6
-	psrldq	$14, D(%xmm0)
08c3a6
+	psrldq	$14, %xmm0
08c3a6
 	pcmpistri      $0x3a,%xmm0, %xmm0
08c3a6
 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
08c3a6
 	cmp	%r11, %rcx
08c3a6
@@ -1608,10 +1578,10 @@ LABEL(nibble_ashr_14_use):
08c3a6
  */
08c3a6
 	.p2align 4
08c3a6
 LABEL(ashr_15):
08c3a6
-	pslldq	$1, D(%xmm2)
08c3a6
+	pslldq	$1, %xmm2
08c3a6
 	TOLOWER (%xmm1, %xmm2)
08c3a6
-	pcmpeqb	%xmm1, D(%xmm2)
08c3a6
-	psubb	%xmm0, D(%xmm2)
08c3a6
+	pcmpeqb	%xmm1, %xmm2
08c3a6
+	psubb	%xmm0, %xmm2
08c3a6
 	pmovmskb %xmm2, %r9d
08c3a6
 	shr	%cl, %edx
08c3a6
 	shr	%cl, %r9d
08c3a6
@@ -1643,7 +1613,7 @@ LABEL(loop_ashr_15_use):
08c3a6
 
08c3a6
 LABEL(nibble_ashr_15_restart_use):
08c3a6
 	movdqa	(%rdi, %rdx), %xmm0
08c3a6
-	palignr $15, -16(%rdi, %rdx), D(%xmm0)
08c3a6
+	palignr $15, -16(%rdi, %rdx), %xmm0
08c3a6
 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
08c3a6
 	pcmpistri $0x1a, (%rsi,%rdx), %xmm0
08c3a6
 #else
08c3a6
@@ -1662,7 +1632,7 @@ LABEL(nibble_ashr_15_restart_use):
08c3a6
 	jg	LABEL(nibble_ashr_15_use)
08c3a6
 
08c3a6
 	movdqa	(%rdi, %rdx), %xmm0
08c3a6
-	palignr $15, -16(%rdi, %rdx), D(%xmm0)
08c3a6
+	palignr $15, -16(%rdi, %rdx), %xmm0
08c3a6
 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
08c3a6
 	pcmpistri $0x1a, (%rsi,%rdx), %xmm0
08c3a6
 #else
08c3a6
@@ -1682,7 +1652,7 @@ LABEL(nibble_ashr_15_restart_use):
08c3a6
 LABEL(nibble_ashr_15_use):
08c3a6
 	sub	$0x1000, %r10
08c3a6
 	movdqa	-16(%rdi, %rdx), %xmm0
08c3a6
-	psrldq	$15, D(%xmm0)
08c3a6
+	psrldq	$15, %xmm0
08c3a6
 	pcmpistri      $0x3a,%xmm0, %xmm0
08c3a6
 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
08c3a6
 	cmp	%r11, %rcx
08c3a6
diff --git a/sysdeps/x86_64/multiarch/strncase_l-avx.S b/sysdeps/x86_64/multiarch/strncase_l-avx.S
08c3a6
deleted file mode 100644
08c3a6
index f1d3fefdd94674b8..0000000000000000
08c3a6
--- a/sysdeps/x86_64/multiarch/strncase_l-avx.S
08c3a6
+++ /dev/null
08c3a6
@@ -1,22 +0,0 @@
08c3a6
-/* strncasecmp_l optimized with AVX.
08c3a6
-   Copyright (C) 2017-2021 Free Software Foundation, Inc.
08c3a6
-   This file is part of the GNU C Library.
08c3a6
-
08c3a6
-   The GNU C Library is free software; you can redistribute it and/or
08c3a6
-   modify it under the terms of the GNU Lesser General Public
08c3a6
-   License as published by the Free Software Foundation; either
08c3a6
-   version 2.1 of the License, or (at your option) any later version.
08c3a6
-
08c3a6
-   The GNU C Library is distributed in the hope that it will be useful,
08c3a6
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
08c3a6
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
08c3a6
-   Lesser General Public License for more details.
08c3a6
-
08c3a6
-   You should have received a copy of the GNU Lesser General Public
08c3a6
-   License along with the GNU C Library; if not, see
08c3a6
-   <https://www.gnu.org/licenses/>.  */
08c3a6
-
08c3a6
-#define STRCMP_SSE42 __strncasecmp_l_avx
08c3a6
-#define USE_AVX 1
08c3a6
-#define USE_AS_STRNCASECMP_L
08c3a6
-#include "strcmp-sse42.S"