076f82
commit 0dafa75e3c42994d0f23db62651d1802577272f2
076f82
Author: Noah Goldstein <goldstein.w.n@gmail.com>
076f82
Date:   Wed Mar 23 16:57:26 2022 -0500
076f82
076f82
    x86: Remove strcspn-sse2.S and use the generic implementation
076f82
    
076f82
    The generic implementation is faster.
076f82
    
076f82
    geometric_mean(N=20) of all benchmarks New / Original: .678
076f82
    
076f82
    All string/memory tests pass.
076f82
    Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
076f82
    
076f82
    (cherry picked from commit fe28e7d9d9535ebab4081d195c553b4fbf39d9ae)
076f82
076f82
diff --git a/sysdeps/x86_64/multiarch/strcspn-sse2.S b/sysdeps/x86_64/multiarch/strcspn-sse2.c
076f82
similarity index 89%
076f82
rename from sysdeps/x86_64/multiarch/strcspn-sse2.S
076f82
rename to sysdeps/x86_64/multiarch/strcspn-sse2.c
076f82
index 63b260a9ed265230..9bd3dac82d90b3a5 100644
076f82
--- a/sysdeps/x86_64/multiarch/strcspn-sse2.S
076f82
+++ b/sysdeps/x86_64/multiarch/strcspn-sse2.c
076f82
@@ -19,10 +19,10 @@
076f82
 #if IS_IN (libc)
076f82
 
076f82
 # include <sysdep.h>
076f82
-# define strcspn __strcspn_sse2
076f82
+# define STRCSPN __strcspn_sse2
076f82
 
076f82
 # undef libc_hidden_builtin_def
076f82
-# define libc_hidden_builtin_def(strcspn)
076f82
+# define libc_hidden_builtin_def(STRCSPN)
076f82
 #endif
076f82
 
076f82
-#include <sysdeps/x86_64/strcspn.S>
076f82
+#include <string/strcspn.c>
076f82
diff --git a/sysdeps/x86_64/strcspn.S b/sysdeps/x86_64/strcspn.S
076f82
deleted file mode 100644
076f82
index 6035a274c87bafb0..0000000000000000
076f82
--- a/sysdeps/x86_64/strcspn.S
076f82
+++ /dev/null
076f82
@@ -1,122 +0,0 @@
076f82
-/* strcspn (str, ss) -- Return the length of the initial segment of STR
076f82
-			which contains no characters from SS.
076f82
-   For AMD x86-64.
076f82
-   Copyright (C) 1994-2021 Free Software Foundation, Inc.
076f82
-   This file is part of the GNU C Library.
076f82
-   Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>.
076f82
-   Bug fixes by Alan Modra <Alan@SPRI.Levels.UniSA.Edu.Au>.
076f82
-   Adopted for x86-64 by Andreas Jaeger <aj@suse.de>.
076f82
-
076f82
-   The GNU C Library is free software; you can redistribute it and/or
076f82
-   modify it under the terms of the GNU Lesser General Public
076f82
-   License as published by the Free Software Foundation; either
076f82
-   version 2.1 of the License, or (at your option) any later version.
076f82
-
076f82
-   The GNU C Library is distributed in the hope that it will be useful,
076f82
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
076f82
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
076f82
-   Lesser General Public License for more details.
076f82
-
076f82
-   You should have received a copy of the GNU Lesser General Public
076f82
-   License along with the GNU C Library; if not, see
076f82
-   <https://www.gnu.org/licenses/>.  */
076f82
-
076f82
-#include <sysdep.h>
076f82
-#include "asm-syntax.h"
076f82
-
076f82
-	.text
076f82
-ENTRY (strcspn)
076f82
-
076f82
-	movq %rdi, %rdx		/* Save SRC.  */
076f82
-
076f82
-	/* First we create a table with flags for all possible characters.
076f82
-	   For the ASCII (7bit/8bit) or ISO-8859-X character sets which are
076f82
-	   supported by the C string functions we have 256 characters.
076f82
-	   Before inserting marks for the stop characters we clear the whole
076f82
-	   table.  */
076f82
-	movq %rdi, %r8			/* Save value.  */
076f82
-	subq $256, %rsp			/* Make space for 256 bytes.  */
076f82
-	cfi_adjust_cfa_offset(256)
076f82
-	movl $32,  %ecx			/* 32*8 bytes = 256 bytes.  */
076f82
-	movq %rsp, %rdi
076f82
-	xorl %eax, %eax			/* We store 0s.  */
076f82
-	cld
076f82
-	rep
076f82
-	stosq
076f82
-
076f82
-	movq %rsi, %rax			/* Setup skipset.  */
076f82
-
076f82
-/* For understanding the following code remember that %rcx == 0 now.
076f82
-   Although all the following instruction only modify %cl we always
076f82
-   have a correct zero-extended 64-bit value in %rcx.  */
076f82
-
076f82
-	.p2align 4
076f82
-L(2):	movb (%rax), %cl	/* get byte from skipset */
076f82
-	testb %cl, %cl		/* is NUL char? */
076f82
-	jz L(1)			/* yes => start compare loop */
076f82
-	movb %cl, (%rsp,%rcx)	/* set corresponding byte in skipset table */
076f82
-
076f82
-	movb 1(%rax), %cl	/* get byte from skipset */
076f82
-	testb $0xff, %cl	/* is NUL char? */
076f82
-	jz L(1)			/* yes => start compare loop */
076f82
-	movb %cl, (%rsp,%rcx)	/* set corresponding byte in skipset table */
076f82
-
076f82
-	movb 2(%rax), %cl	/* get byte from skipset */
076f82
-	testb $0xff, %cl	/* is NUL char? */
076f82
-	jz L(1)			/* yes => start compare loop */
076f82
-	movb %cl, (%rsp,%rcx)	/* set corresponding byte in skipset table */
076f82
-
076f82
-	movb 3(%rax), %cl	/* get byte from skipset */
076f82
-	addq $4, %rax		/* increment skipset pointer */
076f82
-	movb %cl, (%rsp,%rcx)	/* set corresponding byte in skipset table */
076f82
-	testb $0xff, %cl	/* is NUL char? */
076f82
-	jnz L(2)		/* no => process next dword from skipset */
076f82
-
076f82
-L(1):	leaq -4(%rdx), %rax	/* prepare loop */
076f82
-
076f82
-	/* We use a neat trick for the following loop.  Normally we would
076f82
-	   have to test for two termination conditions
076f82
-	   1. a character in the skipset was found
076f82
-	   and
076f82
-	   2. the end of the string was found
076f82
-	   But as a sign that the character is in the skipset we store its
076f82
-	   value in the table.  But the value of NUL is NUL so the loop
076f82
-	   terminates for NUL in every case.  */
076f82
-
076f82
-	.p2align 4
076f82
-L(3):	addq $4, %rax		/* adjust pointer for full loop round */
076f82
-
076f82
-	movb (%rax), %cl	/* get byte from string */
076f82
-	cmpb %cl, (%rsp,%rcx)	/* is it contained in skipset? */
076f82
-	je L(4)			/* yes => return */
076f82
-
076f82
-	movb 1(%rax), %cl	/* get byte from string */
076f82
-	cmpb %cl, (%rsp,%rcx)	/* is it contained in skipset? */
076f82
-	je L(5)			/* yes => return */
076f82
-
076f82
-	movb 2(%rax), %cl	/* get byte from string */
076f82
-	cmpb %cl, (%rsp,%rcx)	/* is it contained in skipset? */
076f82
-	jz L(6)			/* yes => return */
076f82
-
076f82
-	movb 3(%rax), %cl	/* get byte from string */
076f82
-	cmpb %cl, (%rsp,%rcx)	/* is it contained in skipset? */
076f82
-	jne L(3)		/* no => start loop again */
076f82
-
076f82
-	incq %rax		/* adjust pointer */
076f82
-L(6):	incq %rax
076f82
-L(5):	incq %rax
076f82
-
076f82
-L(4):	addq $256, %rsp		/* remove skipset */
076f82
-	cfi_adjust_cfa_offset(-256)
076f82
-#ifdef USE_AS_STRPBRK
076f82
-	xorl %edx,%edx
076f82
-	orb %cl, %cl		/* was last character NUL? */
076f82
-	cmovzq %rdx, %rax	/* Yes:	return NULL */
076f82
-#else
076f82
-	subq %rdx, %rax		/* we have to return the number of valid
076f82
-				   characters, so compute distance to first
076f82
-				   non-valid character */
076f82
-#endif
076f82
-	ret
076f82
-END (strcspn)
076f82
-libc_hidden_builtin_def (strcspn)