513694
From ea4c320faffe618f70854985887c7ca08a1dcf4b Mon Sep 17 00:00:00 2001
513694
From: Noah Goldstein <goldstein.w.n@gmail.com>
513694
Date: Wed, 23 Mar 2022 16:57:46 -0500
513694
Subject: [PATCH] x86: Remove AVX str{n}casecmp
513694
513694
The rational is:
513694
513694
1. SSE42 has nearly identical logic so any benefit is minimal (3.4%
513694
   regression on Tigerlake using SSE42 versus AVX across the
513694
   benchtest suite).
513694
2. AVX2 version covers the majority of targets that previously
513694
   prefered it.
513694
3. The targets where AVX would still be best (SnB and IVB) are
513694
   becoming outdated.
513694
513694
All in all the saving the code size is worth it.
513694
513694
All string/memory tests pass.
513694
Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
513694
513694
(cherry picked from commit 305769b2a15c2e96f9e1b5195d3c4e0d6f0f4b68)
513694
---
513694
 sysdeps/x86_64/multiarch/Makefile           |   2 -
513694
 sysdeps/x86_64/multiarch/ifunc-impl-list.c  |  12 -
513694
 sysdeps/x86_64/multiarch/ifunc-strcasecmp.h |   4 -
513694
 sysdeps/x86_64/multiarch/strcasecmp_l-avx.S |  22 --
513694
 sysdeps/x86_64/multiarch/strcmp-sse42.S     | 240 +++++++++-----------
513694
 sysdeps/x86_64/multiarch/strncase_l-avx.S   |  22 --
513694
 6 files changed, 105 insertions(+), 197 deletions(-)
513694
 delete mode 100644 sysdeps/x86_64/multiarch/strcasecmp_l-avx.S
513694
 delete mode 100644 sysdeps/x86_64/multiarch/strncase_l-avx.S
513694
513694
diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile
513694
index 359712c1..bca82e38 100644
513694
--- a/sysdeps/x86_64/multiarch/Makefile
513694
+++ b/sysdeps/x86_64/multiarch/Makefile
513694
@@ -50,7 +50,6 @@ sysdep_routines += \
513694
   stpncpy-evex \
513694
   stpncpy-sse2-unaligned \
513694
   stpncpy-ssse3 \
513694
-  strcasecmp_l-avx \
513694
   strcasecmp_l-avx2 \
513694
   strcasecmp_l-avx2-rtm \
513694
   strcasecmp_l-evex \
513694
@@ -91,7 +90,6 @@ sysdep_routines += \
513694
   strlen-avx2-rtm \
513694
   strlen-evex \
513694
   strlen-sse2 \
513694
-  strncase_l-avx \
513694
   strncase_l-avx2 \
513694
   strncase_l-avx2-rtm \
513694
   strncase_l-evex \
513694
diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
513694
index 1dedc637..14314367 100644
513694
--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
513694
+++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
513694
@@ -429,9 +429,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
513694
 			      (CPU_FEATURE_USABLE (AVX2)
513694
 			       && CPU_FEATURE_USABLE (RTM)),
513694
 			      __strcasecmp_avx2_rtm)
513694
-	      IFUNC_IMPL_ADD (array, i, strcasecmp,
513694
-			      CPU_FEATURE_USABLE (AVX),
513694
-			      __strcasecmp_avx)
513694
 	      IFUNC_IMPL_ADD (array, i, strcasecmp,
513694
 			      CPU_FEATURE_USABLE (SSE4_2),
513694
 			      __strcasecmp_sse42)
513694
@@ -453,9 +450,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
513694
 			      (CPU_FEATURE_USABLE (AVX2)
513694
 			       && CPU_FEATURE_USABLE (RTM)),
513694
 			      __strcasecmp_l_avx2_rtm)
513694
-	      IFUNC_IMPL_ADD (array, i, strcasecmp_l,
513694
-			      CPU_FEATURE_USABLE (AVX),
513694
-			      __strcasecmp_l_avx)
513694
 	      IFUNC_IMPL_ADD (array, i, strcasecmp_l,
513694
 			      CPU_FEATURE_USABLE (SSE4_2),
513694
 			      __strcasecmp_l_sse42)
513694
@@ -591,9 +585,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
513694
 			      (CPU_FEATURE_USABLE (AVX2)
513694
 			       && CPU_FEATURE_USABLE (RTM)),
513694
 			      __strncasecmp_avx2_rtm)
513694
-	      IFUNC_IMPL_ADD (array, i, strncasecmp,
513694
-			      CPU_FEATURE_USABLE (AVX),
513694
-			      __strncasecmp_avx)
513694
 	      IFUNC_IMPL_ADD (array, i, strncasecmp,
513694
 			      CPU_FEATURE_USABLE (SSE4_2),
513694
 			      __strncasecmp_sse42)
513694
@@ -616,9 +607,6 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
513694
 			      (CPU_FEATURE_USABLE (AVX2)
513694
 			       && CPU_FEATURE_USABLE (RTM)),
513694
 			      __strncasecmp_l_avx2_rtm)
513694
-	      IFUNC_IMPL_ADD (array, i, strncasecmp_l,
513694
-			      CPU_FEATURE_USABLE (AVX),
513694
-			      __strncasecmp_l_avx)
513694
 	      IFUNC_IMPL_ADD (array, i, strncasecmp_l,
513694
 			      CPU_FEATURE_USABLE (SSE4_2),
513694
 			      __strncasecmp_l_sse42)
513694
diff --git a/sysdeps/x86_64/multiarch/ifunc-strcasecmp.h b/sysdeps/x86_64/multiarch/ifunc-strcasecmp.h
513694
index 6dd49a21..34cfbb8f 100644
513694
--- a/sysdeps/x86_64/multiarch/ifunc-strcasecmp.h
513694
+++ b/sysdeps/x86_64/multiarch/ifunc-strcasecmp.h
513694
@@ -22,7 +22,6 @@
513694
 extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
513694
 extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3) attribute_hidden;
513694
 extern __typeof (REDIRECT_NAME) OPTIMIZE (sse42) attribute_hidden;
513694
-extern __typeof (REDIRECT_NAME) OPTIMIZE (avx) attribute_hidden;
513694
 extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
513694
 extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden;
513694
 extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden;
513694
@@ -46,9 +45,6 @@ IFUNC_SELECTOR (void)
513694
         return OPTIMIZE (avx2);
513694
     }
513694
 
513694
-  if (CPU_FEATURE_USABLE_P (cpu_features, AVX))
513694
-    return OPTIMIZE (avx);
513694
-
513694
   if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_2)
513694
       && !CPU_FEATURES_ARCH_P (cpu_features, Slow_SSE4_2))
513694
     return OPTIMIZE (sse42);
513694
diff --git a/sysdeps/x86_64/multiarch/strcasecmp_l-avx.S b/sysdeps/x86_64/multiarch/strcasecmp_l-avx.S
513694
deleted file mode 100644
513694
index 56a03547..00000000
513694
--- a/sysdeps/x86_64/multiarch/strcasecmp_l-avx.S
513694
+++ /dev/null
513694
@@ -1,22 +0,0 @@
513694
-/* strcasecmp_l optimized with AVX.
513694
-   Copyright (C) 2017-2018 Free Software Foundation, Inc.
513694
-   This file is part of the GNU C Library.
513694
-
513694
-   The GNU C Library is free software; you can redistribute it and/or
513694
-   modify it under the terms of the GNU Lesser General Public
513694
-   License as published by the Free Software Foundation; either
513694
-   version 2.1 of the License, or (at your option) any later version.
513694
-
513694
-   The GNU C Library is distributed in the hope that it will be useful,
513694
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
513694
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
513694
-   Lesser General Public License for more details.
513694
-
513694
-   You should have received a copy of the GNU Lesser General Public
513694
-   License along with the GNU C Library; if not, see
513694
-   <http://www.gnu.org/licenses/>.  */
513694
-
513694
-#define STRCMP_SSE42 __strcasecmp_l_avx
513694
-#define USE_AVX 1
513694
-#define USE_AS_STRCASECMP_L
513694
-#include "strcmp-sse42.S"
513694
diff --git a/sysdeps/x86_64/multiarch/strcmp-sse42.S b/sysdeps/x86_64/multiarch/strcmp-sse42.S
513694
index 59e8ddfc..0a42b7a4 100644
513694
--- a/sysdeps/x86_64/multiarch/strcmp-sse42.S
513694
+++ b/sysdeps/x86_64/multiarch/strcmp-sse42.S
513694
@@ -42,13 +42,8 @@
513694
 # define UPDATE_STRNCMP_COUNTER
513694
 #endif
513694
 
513694
-#ifdef USE_AVX
513694
-# define SECTION	avx
513694
-# define GLABEL(l)	l##_avx
513694
-#else
513694
-# define SECTION	sse4.2
513694
-# define GLABEL(l)	l##_sse42
513694
-#endif
513694
+#define SECTION	sse4.2
513694
+#define GLABEL(l)	l##_sse42
513694
 
513694
 #define LABEL(l)	.L##l
513694
 
513694
@@ -106,21 +101,7 @@ END (GLABEL(__strncasecmp))
513694
 #endif
513694
 
513694
 
513694
-#ifdef USE_AVX
513694
-# define movdqa vmovdqa
513694
-# define movdqu vmovdqu
513694
-# define pmovmskb vpmovmskb
513694
-# define pcmpistri vpcmpistri
513694
-# define psubb vpsubb
513694
-# define pcmpeqb vpcmpeqb
513694
-# define psrldq vpsrldq
513694
-# define pslldq vpslldq
513694
-# define palignr vpalignr
513694
-# define pxor vpxor
513694
-# define D(arg) arg, arg
513694
-#else
513694
-# define D(arg) arg
513694
-#endif
513694
+#define arg arg
513694
 
513694
 STRCMP_SSE42:
513694
 	cfi_startproc
513694
@@ -192,18 +173,7 @@ LABEL(case_add):
513694
 	movdqu	(%rdi), %xmm1
513694
 	movdqu	(%rsi), %xmm2
513694
 #if defined USE_AS_STRCASECMP_L || defined USE_AS_STRNCASECMP_L
513694
-# ifdef USE_AVX
513694
-#  define TOLOWER(reg1, reg2) \
513694
-	vpaddb	LCASE_MIN_reg, reg1, %xmm7;					\
513694
-	vpaddb	LCASE_MIN_reg, reg2, %xmm8;					\
513694
-	vpcmpgtb LCASE_MAX_reg, %xmm7, %xmm7;					\
513694
-	vpcmpgtb LCASE_MAX_reg, %xmm8, %xmm8;					\
513694
-	vpandn	CASE_ADD_reg, %xmm7, %xmm7;					\
513694
-	vpandn	CASE_ADD_reg, %xmm8, %xmm8;					\
513694
-	vpaddb	%xmm7, reg1, reg1;					\
513694
-	vpaddb	%xmm8, reg2, reg2
513694
-# else
513694
-#  define TOLOWER(reg1, reg2) \
513694
+# define TOLOWER(reg1, reg2) \
513694
 	movdqa	LCASE_MIN_reg, %xmm7;					\
513694
 	movdqa	LCASE_MIN_reg, %xmm8;					\
513694
 	paddb	reg1, %xmm7;					\
513694
@@ -214,15 +184,15 @@ LABEL(case_add):
513694
 	pandn	CASE_ADD_reg, %xmm8;					\
513694
 	paddb	%xmm7, reg1;					\
513694
 	paddb	%xmm8, reg2
513694
-# endif
513694
+
513694
 	TOLOWER (%xmm1, %xmm2)
513694
 #else
513694
 # define TOLOWER(reg1, reg2)
513694
 #endif
513694
-	pxor	%xmm0, D(%xmm0)		/* clear %xmm0 for null char checks */
513694
-	pcmpeqb	%xmm1, D(%xmm0)		/* Any null chars? */
513694
-	pcmpeqb	%xmm2, D(%xmm1)		/* compare first 16 bytes for equality */
513694
-	psubb	%xmm0, D(%xmm1)		/* packed sub of comparison results*/
513694
+	pxor	%xmm0, %xmm0		/* clear %xmm0 for null char checks */
513694
+	pcmpeqb	%xmm1, %xmm0		/* Any null chars? */
513694
+	pcmpeqb	%xmm2, %xmm1		/* compare first 16 bytes for equality */
513694
+	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
513694
 	pmovmskb %xmm1, %edx
513694
 	sub	$0xffff, %edx		/* if first 16 bytes are same, edx == 0xffff */
513694
 	jnz	LABEL(less16bytes)/* If not, find different value or null char */
513694
@@ -246,7 +216,7 @@ LABEL(crosscache):
513694
 	xor	%r8d, %r8d
513694
 	and	$0xf, %ecx		/* offset of rsi */
513694
 	and	$0xf, %eax		/* offset of rdi */
513694
-	pxor	%xmm0, D(%xmm0)		/* clear %xmm0 for null char check */
513694
+	pxor	%xmm0, %xmm0		/* clear %xmm0 for null char check */
513694
 	cmp	%eax, %ecx
513694
 	je	LABEL(ashr_0)		/* rsi and rdi relative offset same */
513694
 	ja	LABEL(bigger)
513694
@@ -260,7 +230,7 @@ LABEL(bigger):
513694
 	sub	%rcx, %r9
513694
 	lea	LABEL(unaligned_table)(%rip), %r10
513694
 	movslq	(%r10, %r9,4), %r9
513694
-	pcmpeqb	%xmm1, D(%xmm0)		/* Any null chars? */
513694
+	pcmpeqb	%xmm1, %xmm0		/* Any null chars? */
513694
 	lea	(%r10, %r9), %r10
513694
 	_CET_NOTRACK jmp *%r10		/* jump to corresponding case */
513694
 
513694
@@ -273,15 +243,15 @@ LABEL(bigger):
513694
 LABEL(ashr_0):
513694
 
513694
 	movdqa	(%rsi), %xmm1
513694
-	pcmpeqb	%xmm1, D(%xmm0)		/* Any null chars? */
513694
+	pcmpeqb	%xmm1, %xmm0		/* Any null chars? */
513694
 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
513694
-	pcmpeqb	(%rdi), D(%xmm1)	/* compare 16 bytes for equality */
513694
+	pcmpeqb	(%rdi), %xmm1		/* compare 16 bytes for equality */
513694
 #else
513694
 	movdqa	(%rdi), %xmm2
513694
 	TOLOWER (%xmm1, %xmm2)
513694
-	pcmpeqb	%xmm2, D(%xmm1)		/* compare 16 bytes for equality */
513694
+	pcmpeqb	%xmm2, %xmm1		/* compare 16 bytes for equality */
513694
 #endif
513694
-	psubb	%xmm0, D(%xmm1)		/* packed sub of comparison results*/
513694
+	psubb	%xmm0, %xmm1		/* packed sub of comparison results*/
513694
 	pmovmskb %xmm1, %r9d
513694
 	shr	%cl, %edx		/* adjust 0xffff for offset */
513694
 	shr	%cl, %r9d		/* adjust for 16-byte offset */
513694
@@ -361,10 +331,10 @@ LABEL(ashr_0_exit_use):
513694
  */
513694
 	.p2align 4
513694
 LABEL(ashr_1):
513694
-	pslldq	$15, D(%xmm2)		/* shift first string to align with second */
513694
+	pslldq	$15, %xmm2		/* shift first string to align with second */
513694
 	TOLOWER (%xmm1, %xmm2)
513694
-	pcmpeqb	%xmm1, D(%xmm2)		/* compare 16 bytes for equality */
513694
-	psubb	%xmm0, D(%xmm2)		/* packed sub of comparison results*/
513694
+	pcmpeqb	%xmm1, %xmm2		/* compare 16 bytes for equality */
513694
+	psubb	%xmm0, %xmm2		/* packed sub of comparison results*/
513694
 	pmovmskb %xmm2, %r9d
513694
 	shr	%cl, %edx		/* adjust 0xffff for offset */
513694
 	shr	%cl, %r9d		/* adjust for 16-byte offset */
513694
@@ -392,7 +362,7 @@ LABEL(loop_ashr_1_use):
513694
 
513694
 LABEL(nibble_ashr_1_restart_use):
513694
 	movdqa	(%rdi, %rdx), %xmm0
513694
-	palignr $1, -16(%rdi, %rdx), D(%xmm0)
513694
+	palignr $1, -16(%rdi, %rdx), %xmm0
513694
 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
513694
 	pcmpistri	$0x1a,(%rsi,%rdx), %xmm0
513694
 #else
513694
@@ -411,7 +381,7 @@ LABEL(nibble_ashr_1_restart_use):
513694
 	jg	LABEL(nibble_ashr_1_use)
513694
 
513694
 	movdqa	(%rdi, %rdx), %xmm0
513694
-	palignr $1, -16(%rdi, %rdx), D(%xmm0)
513694
+	palignr $1, -16(%rdi, %rdx), %xmm0
513694
 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
513694
 	pcmpistri	$0x1a,(%rsi,%rdx), %xmm0
513694
 #else
513694
@@ -431,7 +401,7 @@ LABEL(nibble_ashr_1_restart_use):
513694
 LABEL(nibble_ashr_1_use):
513694
 	sub	$0x1000, %r10
513694
 	movdqa	-16(%rdi, %rdx), %xmm0
513694
-	psrldq	$1, D(%xmm0)
513694
+	psrldq	$1, %xmm0
513694
 	pcmpistri      $0x3a,%xmm0, %xmm0
513694
 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
513694
 	cmp	%r11, %rcx
513694
@@ -449,10 +419,10 @@ LABEL(nibble_ashr_1_use):
513694
  */
513694
 	.p2align 4
513694
 LABEL(ashr_2):
513694
-	pslldq	$14, D(%xmm2)
513694
+	pslldq	$14, %xmm2
513694
 	TOLOWER (%xmm1, %xmm2)
513694
-	pcmpeqb	%xmm1, D(%xmm2)
513694
-	psubb	%xmm0, D(%xmm2)
513694
+	pcmpeqb	%xmm1, %xmm2
513694
+	psubb	%xmm0, %xmm2
513694
 	pmovmskb %xmm2, %r9d
513694
 	shr	%cl, %edx
513694
 	shr	%cl, %r9d
513694
@@ -480,7 +450,7 @@ LABEL(loop_ashr_2_use):
513694
 
513694
 LABEL(nibble_ashr_2_restart_use):
513694
 	movdqa	(%rdi, %rdx), %xmm0
513694
-	palignr $2, -16(%rdi, %rdx), D(%xmm0)
513694
+	palignr $2, -16(%rdi, %rdx), %xmm0
513694
 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
513694
 	pcmpistri	$0x1a,(%rsi,%rdx), %xmm0
513694
 #else
513694
@@ -499,7 +469,7 @@ LABEL(nibble_ashr_2_restart_use):
513694
 	jg	LABEL(nibble_ashr_2_use)
513694
 
513694
 	movdqa	(%rdi, %rdx), %xmm0
513694
-	palignr $2, -16(%rdi, %rdx), D(%xmm0)
513694
+	palignr $2, -16(%rdi, %rdx), %xmm0
513694
 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
513694
 	pcmpistri	$0x1a,(%rsi,%rdx), %xmm0
513694
 #else
513694
@@ -519,7 +489,7 @@ LABEL(nibble_ashr_2_restart_use):
513694
 LABEL(nibble_ashr_2_use):
513694
 	sub	$0x1000, %r10
513694
 	movdqa	-16(%rdi, %rdx), %xmm0
513694
-	psrldq	$2, D(%xmm0)
513694
+	psrldq	$2, %xmm0
513694
 	pcmpistri      $0x3a,%xmm0, %xmm0
513694
 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
513694
 	cmp	%r11, %rcx
513694
@@ -537,10 +507,10 @@ LABEL(nibble_ashr_2_use):
513694
  */
513694
 	.p2align 4
513694
 LABEL(ashr_3):
513694
-	pslldq	$13, D(%xmm2)
513694
+	pslldq	$13, %xmm2
513694
 	TOLOWER (%xmm1, %xmm2)
513694
-	pcmpeqb	%xmm1, D(%xmm2)
513694
-	psubb	%xmm0, D(%xmm2)
513694
+	pcmpeqb	%xmm1, %xmm2
513694
+	psubb	%xmm0, %xmm2
513694
 	pmovmskb %xmm2, %r9d
513694
 	shr	%cl, %edx
513694
 	shr	%cl, %r9d
513694
@@ -568,7 +538,7 @@ LABEL(loop_ashr_3_use):
513694
 
513694
 LABEL(nibble_ashr_3_restart_use):
513694
 	movdqa	(%rdi, %rdx), %xmm0
513694
-	palignr $3, -16(%rdi, %rdx), D(%xmm0)
513694
+	palignr $3, -16(%rdi, %rdx), %xmm0
513694
 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
513694
 	pcmpistri	$0x1a,(%rsi,%rdx), %xmm0
513694
 #else
513694
@@ -587,7 +557,7 @@ LABEL(nibble_ashr_3_restart_use):
513694
 	jg	LABEL(nibble_ashr_3_use)
513694
 
513694
 	movdqa	(%rdi, %rdx), %xmm0
513694
-	palignr $3, -16(%rdi, %rdx), D(%xmm0)
513694
+	palignr $3, -16(%rdi, %rdx), %xmm0
513694
 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
513694
 	pcmpistri	$0x1a,(%rsi,%rdx), %xmm0
513694
 #else
513694
@@ -607,7 +577,7 @@ LABEL(nibble_ashr_3_restart_use):
513694
 LABEL(nibble_ashr_3_use):
513694
 	sub	$0x1000, %r10
513694
 	movdqa	-16(%rdi, %rdx), %xmm0
513694
-	psrldq	$3, D(%xmm0)
513694
+	psrldq	$3, %xmm0
513694
 	pcmpistri      $0x3a,%xmm0, %xmm0
513694
 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
513694
 	cmp	%r11, %rcx
513694
@@ -625,10 +595,10 @@ LABEL(nibble_ashr_3_use):
513694
  */
513694
 	.p2align 4
513694
 LABEL(ashr_4):
513694
-	pslldq	$12, D(%xmm2)
513694
+	pslldq	$12, %xmm2
513694
 	TOLOWER (%xmm1, %xmm2)
513694
-	pcmpeqb	%xmm1, D(%xmm2)
513694
-	psubb	%xmm0, D(%xmm2)
513694
+	pcmpeqb	%xmm1, %xmm2
513694
+	psubb	%xmm0, %xmm2
513694
 	pmovmskb %xmm2, %r9d
513694
 	shr	%cl, %edx
513694
 	shr	%cl, %r9d
513694
@@ -657,7 +627,7 @@ LABEL(loop_ashr_4_use):
513694
 
513694
 LABEL(nibble_ashr_4_restart_use):
513694
 	movdqa	(%rdi, %rdx), %xmm0
513694
-	palignr $4, -16(%rdi, %rdx), D(%xmm0)
513694
+	palignr $4, -16(%rdi, %rdx), %xmm0
513694
 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
513694
 	pcmpistri	$0x1a,(%rsi,%rdx), %xmm0
513694
 #else
513694
@@ -676,7 +646,7 @@ LABEL(nibble_ashr_4_restart_use):
513694
 	jg	LABEL(nibble_ashr_4_use)
513694
 
513694
 	movdqa	(%rdi, %rdx), %xmm0
513694
-	palignr $4, -16(%rdi, %rdx), D(%xmm0)
513694
+	palignr $4, -16(%rdi, %rdx), %xmm0
513694
 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
513694
 	pcmpistri	$0x1a,(%rsi,%rdx), %xmm0
513694
 #else
513694
@@ -696,7 +666,7 @@ LABEL(nibble_ashr_4_restart_use):
513694
 LABEL(nibble_ashr_4_use):
513694
 	sub	$0x1000, %r10
513694
 	movdqa	-16(%rdi, %rdx), %xmm0
513694
-	psrldq	$4, D(%xmm0)
513694
+	psrldq	$4, %xmm0
513694
 	pcmpistri      $0x3a,%xmm0, %xmm0
513694
 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
513694
 	cmp	%r11, %rcx
513694
@@ -714,10 +684,10 @@ LABEL(nibble_ashr_4_use):
513694
  */
513694
 	.p2align 4
513694
 LABEL(ashr_5):
513694
-	pslldq	$11, D(%xmm2)
513694
+	pslldq	$11, %xmm2
513694
 	TOLOWER (%xmm1, %xmm2)
513694
-	pcmpeqb	%xmm1, D(%xmm2)
513694
-	psubb	%xmm0, D(%xmm2)
513694
+	pcmpeqb	%xmm1, %xmm2
513694
+	psubb	%xmm0, %xmm2
513694
 	pmovmskb %xmm2, %r9d
513694
 	shr	%cl, %edx
513694
 	shr	%cl, %r9d
513694
@@ -746,7 +716,7 @@ LABEL(loop_ashr_5_use):
513694
 
513694
 LABEL(nibble_ashr_5_restart_use):
513694
 	movdqa	(%rdi, %rdx), %xmm0
513694
-	palignr $5, -16(%rdi, %rdx), D(%xmm0)
513694
+	palignr $5, -16(%rdi, %rdx), %xmm0
513694
 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
513694
 	pcmpistri	$0x1a,(%rsi,%rdx), %xmm0
513694
 #else
513694
@@ -766,7 +736,7 @@ LABEL(nibble_ashr_5_restart_use):
513694
 
513694
 	movdqa	(%rdi, %rdx), %xmm0
513694
 
513694
-	palignr $5, -16(%rdi, %rdx), D(%xmm0)
513694
+	palignr $5, -16(%rdi, %rdx), %xmm0
513694
 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
513694
 	pcmpistri	$0x1a,(%rsi,%rdx), %xmm0
513694
 #else
513694
@@ -786,7 +756,7 @@ LABEL(nibble_ashr_5_restart_use):
513694
 LABEL(nibble_ashr_5_use):
513694
 	sub	$0x1000, %r10
513694
 	movdqa	-16(%rdi, %rdx), %xmm0
513694
-	psrldq	$5, D(%xmm0)
513694
+	psrldq	$5, %xmm0
513694
 	pcmpistri      $0x3a,%xmm0, %xmm0
513694
 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
513694
 	cmp	%r11, %rcx
513694
@@ -804,10 +774,10 @@ LABEL(nibble_ashr_5_use):
513694
  */
513694
 	.p2align 4
513694
 LABEL(ashr_6):
513694
-	pslldq	$10, D(%xmm2)
513694
+	pslldq	$10, %xmm2
513694
 	TOLOWER (%xmm1, %xmm2)
513694
-	pcmpeqb	%xmm1, D(%xmm2)
513694
-	psubb	%xmm0, D(%xmm2)
513694
+	pcmpeqb	%xmm1, %xmm2
513694
+	psubb	%xmm0, %xmm2
513694
 	pmovmskb %xmm2, %r9d
513694
 	shr	%cl, %edx
513694
 	shr	%cl, %r9d
513694
@@ -836,7 +806,7 @@ LABEL(loop_ashr_6_use):
513694
 
513694
 LABEL(nibble_ashr_6_restart_use):
513694
 	movdqa	(%rdi, %rdx), %xmm0
513694
-	palignr $6, -16(%rdi, %rdx), D(%xmm0)
513694
+	palignr $6, -16(%rdi, %rdx), %xmm0
513694
 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
513694
 	pcmpistri $0x1a,(%rsi,%rdx), %xmm0
513694
 #else
513694
@@ -855,7 +825,7 @@ LABEL(nibble_ashr_6_restart_use):
513694
 	jg	LABEL(nibble_ashr_6_use)
513694
 
513694
 	movdqa	(%rdi, %rdx), %xmm0
513694
-	palignr $6, -16(%rdi, %rdx), D(%xmm0)
513694
+	palignr $6, -16(%rdi, %rdx), %xmm0
513694
 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
513694
 	pcmpistri $0x1a,(%rsi,%rdx), %xmm0
513694
 #else
513694
@@ -875,7 +845,7 @@ LABEL(nibble_ashr_6_restart_use):
513694
 LABEL(nibble_ashr_6_use):
513694
 	sub	$0x1000, %r10
513694
 	movdqa	-16(%rdi, %rdx), %xmm0
513694
-	psrldq	$6, D(%xmm0)
513694
+	psrldq	$6, %xmm0
513694
 	pcmpistri      $0x3a,%xmm0, %xmm0
513694
 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
513694
 	cmp	%r11, %rcx
513694
@@ -893,10 +863,10 @@ LABEL(nibble_ashr_6_use):
513694
  */
513694
 	.p2align 4
513694
 LABEL(ashr_7):
513694
-	pslldq	$9, D(%xmm2)
513694
+	pslldq	$9, %xmm2
513694
 	TOLOWER (%xmm1, %xmm2)
513694
-	pcmpeqb	%xmm1, D(%xmm2)
513694
-	psubb	%xmm0, D(%xmm2)
513694
+	pcmpeqb	%xmm1, %xmm2
513694
+	psubb	%xmm0, %xmm2
513694
 	pmovmskb %xmm2, %r9d
513694
 	shr	%cl, %edx
513694
 	shr	%cl, %r9d
513694
@@ -925,7 +895,7 @@ LABEL(loop_ashr_7_use):
513694
 
513694
 LABEL(nibble_ashr_7_restart_use):
513694
 	movdqa	(%rdi, %rdx), %xmm0
513694
-	palignr $7, -16(%rdi, %rdx), D(%xmm0)
513694
+	palignr $7, -16(%rdi, %rdx), %xmm0
513694
 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
513694
 	pcmpistri	$0x1a,(%rsi,%rdx), %xmm0
513694
 #else
513694
@@ -944,7 +914,7 @@ LABEL(nibble_ashr_7_restart_use):
513694
 	jg	LABEL(nibble_ashr_7_use)
513694
 
513694
 	movdqa	(%rdi, %rdx), %xmm0
513694
-	palignr $7, -16(%rdi, %rdx), D(%xmm0)
513694
+	palignr $7, -16(%rdi, %rdx), %xmm0
513694
 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
513694
 	pcmpistri	$0x1a,(%rsi,%rdx), %xmm0
513694
 #else
513694
@@ -964,7 +934,7 @@ LABEL(nibble_ashr_7_restart_use):
513694
 LABEL(nibble_ashr_7_use):
513694
 	sub	$0x1000, %r10
513694
 	movdqa	-16(%rdi, %rdx), %xmm0
513694
-	psrldq	$7, D(%xmm0)
513694
+	psrldq	$7, %xmm0
513694
 	pcmpistri      $0x3a,%xmm0, %xmm0
513694
 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
513694
 	cmp	%r11, %rcx
513694
@@ -982,10 +952,10 @@ LABEL(nibble_ashr_7_use):
513694
  */
513694
 	.p2align 4
513694
 LABEL(ashr_8):
513694
-	pslldq	$8, D(%xmm2)
513694
+	pslldq	$8, %xmm2
513694
 	TOLOWER (%xmm1, %xmm2)
513694
-	pcmpeqb	%xmm1, D(%xmm2)
513694
-	psubb	%xmm0, D(%xmm2)
513694
+	pcmpeqb	%xmm1, %xmm2
513694
+	psubb	%xmm0, %xmm2
513694
 	pmovmskb %xmm2, %r9d
513694
 	shr	%cl, %edx
513694
 	shr	%cl, %r9d
513694
@@ -1014,7 +984,7 @@ LABEL(loop_ashr_8_use):
513694
 
513694
 LABEL(nibble_ashr_8_restart_use):
513694
 	movdqa	(%rdi, %rdx), %xmm0
513694
-	palignr $8, -16(%rdi, %rdx), D(%xmm0)
513694
+	palignr $8, -16(%rdi, %rdx), %xmm0
513694
 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
513694
 	pcmpistri $0x1a, (%rsi,%rdx), %xmm0
513694
 #else
513694
@@ -1033,7 +1003,7 @@ LABEL(nibble_ashr_8_restart_use):
513694
 	jg	LABEL(nibble_ashr_8_use)
513694
 
513694
 	movdqa	(%rdi, %rdx), %xmm0
513694
-	palignr $8, -16(%rdi, %rdx), D(%xmm0)
513694
+	palignr $8, -16(%rdi, %rdx), %xmm0
513694
 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
513694
 	pcmpistri $0x1a, (%rsi,%rdx), %xmm0
513694
 #else
513694
@@ -1053,7 +1023,7 @@ LABEL(nibble_ashr_8_restart_use):
513694
 LABEL(nibble_ashr_8_use):
513694
 	sub	$0x1000, %r10
513694
 	movdqa	-16(%rdi, %rdx), %xmm0
513694
-	psrldq	$8, D(%xmm0)
513694
+	psrldq	$8, %xmm0
513694
 	pcmpistri      $0x3a,%xmm0, %xmm0
513694
 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
513694
 	cmp	%r11, %rcx
513694
@@ -1071,10 +1041,10 @@ LABEL(nibble_ashr_8_use):
513694
  */
513694
 	.p2align 4
513694
 LABEL(ashr_9):
513694
-	pslldq	$7, D(%xmm2)
513694
+	pslldq	$7, %xmm2
513694
 	TOLOWER (%xmm1, %xmm2)
513694
-	pcmpeqb	%xmm1, D(%xmm2)
513694
-	psubb	%xmm0, D(%xmm2)
513694
+	pcmpeqb	%xmm1, %xmm2
513694
+	psubb	%xmm0, %xmm2
513694
 	pmovmskb %xmm2, %r9d
513694
 	shr	%cl, %edx
513694
 	shr	%cl, %r9d
513694
@@ -1104,7 +1074,7 @@ LABEL(loop_ashr_9_use):
513694
 LABEL(nibble_ashr_9_restart_use):
513694
 	movdqa	(%rdi, %rdx), %xmm0
513694
 
513694
-	palignr $9, -16(%rdi, %rdx), D(%xmm0)
513694
+	palignr $9, -16(%rdi, %rdx), %xmm0
513694
 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
513694
 	pcmpistri $0x1a, (%rsi,%rdx), %xmm0
513694
 #else
513694
@@ -1123,7 +1093,7 @@ LABEL(nibble_ashr_9_restart_use):
513694
 	jg	LABEL(nibble_ashr_9_use)
513694
 
513694
 	movdqa	(%rdi, %rdx), %xmm0
513694
-	palignr $9, -16(%rdi, %rdx), D(%xmm0)
513694
+	palignr $9, -16(%rdi, %rdx), %xmm0
513694
 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
513694
 	pcmpistri $0x1a, (%rsi,%rdx), %xmm0
513694
 #else
513694
@@ -1143,7 +1113,7 @@ LABEL(nibble_ashr_9_restart_use):
513694
 LABEL(nibble_ashr_9_use):
513694
 	sub	$0x1000, %r10
513694
 	movdqa	-16(%rdi, %rdx), %xmm0
513694
-	psrldq	$9, D(%xmm0)
513694
+	psrldq	$9, %xmm0
513694
 	pcmpistri      $0x3a,%xmm0, %xmm0
513694
 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
513694
 	cmp	%r11, %rcx
513694
@@ -1161,10 +1131,10 @@ LABEL(nibble_ashr_9_use):
513694
  */
513694
 	.p2align 4
513694
 LABEL(ashr_10):
513694
-	pslldq	$6, D(%xmm2)
513694
+	pslldq	$6, %xmm2
513694
 	TOLOWER (%xmm1, %xmm2)
513694
-	pcmpeqb	%xmm1, D(%xmm2)
513694
-	psubb	%xmm0, D(%xmm2)
513694
+	pcmpeqb	%xmm1, %xmm2
513694
+	psubb	%xmm0, %xmm2
513694
 	pmovmskb %xmm2, %r9d
513694
 	shr	%cl, %edx
513694
 	shr	%cl, %r9d
513694
@@ -1193,7 +1163,7 @@ LABEL(loop_ashr_10_use):
513694
 
513694
 LABEL(nibble_ashr_10_restart_use):
513694
 	movdqa	(%rdi, %rdx), %xmm0
513694
-	palignr $10, -16(%rdi, %rdx), D(%xmm0)
513694
+	palignr $10, -16(%rdi, %rdx), %xmm0
513694
 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
513694
 	pcmpistri $0x1a, (%rsi,%rdx), %xmm0
513694
 #else
513694
@@ -1212,7 +1182,7 @@ LABEL(nibble_ashr_10_restart_use):
513694
 	jg	LABEL(nibble_ashr_10_use)
513694
 
513694
 	movdqa	(%rdi, %rdx), %xmm0
513694
-	palignr $10, -16(%rdi, %rdx), D(%xmm0)
513694
+	palignr $10, -16(%rdi, %rdx), %xmm0
513694
 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
513694
 	pcmpistri $0x1a, (%rsi,%rdx), %xmm0
513694
 #else
513694
@@ -1232,7 +1202,7 @@ LABEL(nibble_ashr_10_restart_use):
513694
 LABEL(nibble_ashr_10_use):
513694
 	sub	$0x1000, %r10
513694
 	movdqa	-16(%rdi, %rdx), %xmm0
513694
-	psrldq	$10, D(%xmm0)
513694
+	psrldq	$10, %xmm0
513694
 	pcmpistri      $0x3a,%xmm0, %xmm0
513694
 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
513694
 	cmp	%r11, %rcx
513694
@@ -1250,10 +1220,10 @@ LABEL(nibble_ashr_10_use):
513694
  */
513694
 	.p2align 4
513694
 LABEL(ashr_11):
513694
-	pslldq	$5, D(%xmm2)
513694
+	pslldq	$5, %xmm2
513694
 	TOLOWER (%xmm1, %xmm2)
513694
-	pcmpeqb	%xmm1, D(%xmm2)
513694
-	psubb	%xmm0, D(%xmm2)
513694
+	pcmpeqb	%xmm1, %xmm2
513694
+	psubb	%xmm0, %xmm2
513694
 	pmovmskb %xmm2, %r9d
513694
 	shr	%cl, %edx
513694
 	shr	%cl, %r9d
513694
@@ -1282,7 +1252,7 @@ LABEL(loop_ashr_11_use):
513694
 
513694
 LABEL(nibble_ashr_11_restart_use):
513694
 	movdqa	(%rdi, %rdx), %xmm0
513694
-	palignr $11, -16(%rdi, %rdx), D(%xmm0)
513694
+	palignr $11, -16(%rdi, %rdx), %xmm0
513694
 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
513694
 	pcmpistri $0x1a, (%rsi,%rdx), %xmm0
513694
 #else
513694
@@ -1301,7 +1271,7 @@ LABEL(nibble_ashr_11_restart_use):
513694
 	jg	LABEL(nibble_ashr_11_use)
513694
 
513694
 	movdqa	(%rdi, %rdx), %xmm0
513694
-	palignr $11, -16(%rdi, %rdx), D(%xmm0)
513694
+	palignr $11, -16(%rdi, %rdx), %xmm0
513694
 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
513694
 	pcmpistri $0x1a, (%rsi,%rdx), %xmm0
513694
 #else
513694
@@ -1321,7 +1291,7 @@ LABEL(nibble_ashr_11_restart_use):
513694
 LABEL(nibble_ashr_11_use):
513694
 	sub	$0x1000, %r10
513694
 	movdqa	-16(%rdi, %rdx), %xmm0
513694
-	psrldq	$11, D(%xmm0)
513694
+	psrldq	$11, %xmm0
513694
 	pcmpistri      $0x3a,%xmm0, %xmm0
513694
 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
513694
 	cmp	%r11, %rcx
513694
@@ -1339,10 +1309,10 @@ LABEL(nibble_ashr_11_use):
513694
  */
513694
 	.p2align 4
513694
 LABEL(ashr_12):
513694
-	pslldq	$4, D(%xmm2)
513694
+	pslldq	$4, %xmm2
513694
 	TOLOWER (%xmm1, %xmm2)
513694
-	pcmpeqb	%xmm1, D(%xmm2)
513694
-	psubb	%xmm0, D(%xmm2)
513694
+	pcmpeqb	%xmm1, %xmm2
513694
+	psubb	%xmm0, %xmm2
513694
 	pmovmskb %xmm2, %r9d
513694
 	shr	%cl, %edx
513694
 	shr	%cl, %r9d
513694
@@ -1371,7 +1341,7 @@ LABEL(loop_ashr_12_use):
513694
 
513694
 LABEL(nibble_ashr_12_restart_use):
513694
 	movdqa	(%rdi, %rdx), %xmm0
513694
-	palignr $12, -16(%rdi, %rdx), D(%xmm0)
513694
+	palignr $12, -16(%rdi, %rdx), %xmm0
513694
 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
513694
 	pcmpistri $0x1a, (%rsi,%rdx), %xmm0
513694
 #else
513694
@@ -1390,7 +1360,7 @@ LABEL(nibble_ashr_12_restart_use):
513694
 	jg	LABEL(nibble_ashr_12_use)
513694
 
513694
 	movdqa	(%rdi, %rdx), %xmm0
513694
-	palignr $12, -16(%rdi, %rdx), D(%xmm0)
513694
+	palignr $12, -16(%rdi, %rdx), %xmm0
513694
 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
513694
 	pcmpistri $0x1a, (%rsi,%rdx), %xmm0
513694
 #else
513694
@@ -1410,7 +1380,7 @@ LABEL(nibble_ashr_12_restart_use):
513694
 LABEL(nibble_ashr_12_use):
513694
 	sub	$0x1000, %r10
513694
 	movdqa	-16(%rdi, %rdx), %xmm0
513694
-	psrldq	$12, D(%xmm0)
513694
+	psrldq	$12, %xmm0
513694
 	pcmpistri      $0x3a,%xmm0, %xmm0
513694
 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
513694
 	cmp	%r11, %rcx
513694
@@ -1428,10 +1398,10 @@ LABEL(nibble_ashr_12_use):
513694
  */
513694
 	.p2align 4
513694
 LABEL(ashr_13):
513694
-	pslldq	$3, D(%xmm2)
513694
+	pslldq	$3, %xmm2
513694
 	TOLOWER (%xmm1, %xmm2)
513694
-	pcmpeqb	%xmm1, D(%xmm2)
513694
-	psubb	%xmm0, D(%xmm2)
513694
+	pcmpeqb	%xmm1, %xmm2
513694
+	psubb	%xmm0, %xmm2
513694
 	pmovmskb %xmm2, %r9d
513694
 	shr	%cl, %edx
513694
 	shr	%cl, %r9d
513694
@@ -1461,7 +1431,7 @@ LABEL(loop_ashr_13_use):
513694
 
513694
 LABEL(nibble_ashr_13_restart_use):
513694
 	movdqa	(%rdi, %rdx), %xmm0
513694
-	palignr $13, -16(%rdi, %rdx), D(%xmm0)
513694
+	palignr $13, -16(%rdi, %rdx), %xmm0
513694
 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
513694
 	pcmpistri $0x1a, (%rsi,%rdx), %xmm0
513694
 #else
513694
@@ -1480,7 +1450,7 @@ LABEL(nibble_ashr_13_restart_use):
513694
 	jg	LABEL(nibble_ashr_13_use)
513694
 
513694
 	movdqa	(%rdi, %rdx), %xmm0
513694
-	palignr $13, -16(%rdi, %rdx), D(%xmm0)
513694
+	palignr $13, -16(%rdi, %rdx), %xmm0
513694
 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
513694
 	pcmpistri $0x1a, (%rsi,%rdx), %xmm0
513694
 #else
513694
@@ -1500,7 +1470,7 @@ LABEL(nibble_ashr_13_restart_use):
513694
 LABEL(nibble_ashr_13_use):
513694
 	sub	$0x1000, %r10
513694
 	movdqa	-16(%rdi, %rdx), %xmm0
513694
-	psrldq	$13, D(%xmm0)
513694
+	psrldq	$13, %xmm0
513694
 	pcmpistri      $0x3a,%xmm0, %xmm0
513694
 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
513694
 	cmp	%r11, %rcx
513694
@@ -1518,10 +1488,10 @@ LABEL(nibble_ashr_13_use):
513694
  */
513694
 	.p2align 4
513694
 LABEL(ashr_14):
513694
-	pslldq  $2, D(%xmm2)
513694
+	pslldq  $2, %xmm2
513694
 	TOLOWER (%xmm1, %xmm2)
513694
-	pcmpeqb	%xmm1, D(%xmm2)
513694
-	psubb	%xmm0, D(%xmm2)
513694
+	pcmpeqb	%xmm1, %xmm2
513694
+	psubb	%xmm0, %xmm2
513694
 	pmovmskb %xmm2, %r9d
513694
 	shr	%cl, %edx
513694
 	shr	%cl, %r9d
513694
@@ -1551,7 +1521,7 @@ LABEL(loop_ashr_14_use):
513694
 
513694
 LABEL(nibble_ashr_14_restart_use):
513694
 	movdqa	(%rdi, %rdx), %xmm0
513694
-	palignr $14, -16(%rdi, %rdx), D(%xmm0)
513694
+	palignr $14, -16(%rdi, %rdx), %xmm0
513694
 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
513694
 	pcmpistri $0x1a, (%rsi,%rdx), %xmm0
513694
 #else
513694
@@ -1570,7 +1540,7 @@ LABEL(nibble_ashr_14_restart_use):
513694
 	jg	LABEL(nibble_ashr_14_use)
513694
 
513694
 	movdqa	(%rdi, %rdx), %xmm0
513694
-	palignr $14, -16(%rdi, %rdx), D(%xmm0)
513694
+	palignr $14, -16(%rdi, %rdx), %xmm0
513694
 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
513694
 	pcmpistri $0x1a, (%rsi,%rdx), %xmm0
513694
 #else
513694
@@ -1590,7 +1560,7 @@ LABEL(nibble_ashr_14_restart_use):
513694
 LABEL(nibble_ashr_14_use):
513694
 	sub	$0x1000, %r10
513694
 	movdqa	-16(%rdi, %rdx), %xmm0
513694
-	psrldq	$14, D(%xmm0)
513694
+	psrldq	$14, %xmm0
513694
 	pcmpistri      $0x3a,%xmm0, %xmm0
513694
 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
513694
 	cmp	%r11, %rcx
513694
@@ -1608,10 +1578,10 @@ LABEL(nibble_ashr_14_use):
513694
  */
513694
 	.p2align 4
513694
 LABEL(ashr_15):
513694
-	pslldq	$1, D(%xmm2)
513694
+	pslldq	$1, %xmm2
513694
 	TOLOWER (%xmm1, %xmm2)
513694
-	pcmpeqb	%xmm1, D(%xmm2)
513694
-	psubb	%xmm0, D(%xmm2)
513694
+	pcmpeqb	%xmm1, %xmm2
513694
+	psubb	%xmm0, %xmm2
513694
 	pmovmskb %xmm2, %r9d
513694
 	shr	%cl, %edx
513694
 	shr	%cl, %r9d
513694
@@ -1643,7 +1613,7 @@ LABEL(loop_ashr_15_use):
513694
 
513694
 LABEL(nibble_ashr_15_restart_use):
513694
 	movdqa	(%rdi, %rdx), %xmm0
513694
-	palignr $15, -16(%rdi, %rdx), D(%xmm0)
513694
+	palignr $15, -16(%rdi, %rdx), %xmm0
513694
 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
513694
 	pcmpistri $0x1a, (%rsi,%rdx), %xmm0
513694
 #else
513694
@@ -1662,7 +1632,7 @@ LABEL(nibble_ashr_15_restart_use):
513694
 	jg	LABEL(nibble_ashr_15_use)
513694
 
513694
 	movdqa	(%rdi, %rdx), %xmm0
513694
-	palignr $15, -16(%rdi, %rdx), D(%xmm0)
513694
+	palignr $15, -16(%rdi, %rdx), %xmm0
513694
 #if !defined USE_AS_STRCASECMP_L && !defined USE_AS_STRNCASECMP_L
513694
 	pcmpistri $0x1a, (%rsi,%rdx), %xmm0
513694
 #else
513694
@@ -1682,7 +1652,7 @@ LABEL(nibble_ashr_15_restart_use):
513694
 LABEL(nibble_ashr_15_use):
513694
 	sub	$0x1000, %r10
513694
 	movdqa	-16(%rdi, %rdx), %xmm0
513694
-	psrldq	$15, D(%xmm0)
513694
+	psrldq	$15, %xmm0
513694
 	pcmpistri      $0x3a,%xmm0, %xmm0
513694
 #if defined USE_AS_STRNCMP || defined USE_AS_STRNCASECMP_L
513694
 	cmp	%r11, %rcx
513694
diff --git a/sysdeps/x86_64/multiarch/strncase_l-avx.S b/sysdeps/x86_64/multiarch/strncase_l-avx.S
513694
deleted file mode 100644
513694
index 0c4e525b..00000000
513694
--- a/sysdeps/x86_64/multiarch/strncase_l-avx.S
513694
+++ /dev/null
513694
@@ -1,22 +0,0 @@
513694
-/* strncasecmp_l optimized with AVX.
513694
-   Copyright (C) 2017-2018 Free Software Foundation, Inc.
513694
-   This file is part of the GNU C Library.
513694
-
513694
-   The GNU C Library is free software; you can redistribute it and/or
513694
-   modify it under the terms of the GNU Lesser General Public
513694
-   License as published by the Free Software Foundation; either
513694
-   version 2.1 of the License, or (at your option) any later version.
513694
-
513694
-   The GNU C Library is distributed in the hope that it will be useful,
513694
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
513694
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
513694
-   Lesser General Public License for more details.
513694
-
513694
-   You should have received a copy of the GNU Lesser General Public
513694
-   License along with the GNU C Library; if not, see
513694
-   <http://www.gnu.org/licenses/>.  */
513694
-
513694
-#define STRCMP_SSE42 __strncasecmp_l_avx
513694
-#define USE_AVX 1
513694
-#define USE_AS_STRNCASECMP_L
513694
-#include "strcmp-sse42.S"
513694
-- 
513694
GitLab
513694