513694
From bb034f8ae84535c1263032311594f229fd3ad1a9 Mon Sep 17 00:00:00 2001
513694
From: Noah Goldstein <goldstein.w.n@gmail.com>
513694
Date: Wed, 23 Mar 2022 16:57:26 -0500
513694
Subject: [PATCH] x86: Remove strcspn-sse2.S and use the generic implementation
513694
513694
The generic implementation is faster.
513694
513694
geometric_mean(N=20) of all benchmarks New / Original: .678
513694
513694
All string/memory tests pass.
513694
Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
513694
513694
(cherry picked from commit fe28e7d9d9535ebab4081d195c553b4fbf39d9ae)
513694
---
513694
 .../{strcspn-sse2.S => strcspn-sse2.c}        |   6 +-
513694
 sysdeps/x86_64/strcspn.S                      | 122 ------------------
513694
 2 files changed, 3 insertions(+), 125 deletions(-)
513694
 rename sysdeps/x86_64/multiarch/{strcspn-sse2.S => strcspn-sse2.c} (89%)
513694
 delete mode 100644 sysdeps/x86_64/strcspn.S
513694
513694
diff --git a/sysdeps/x86_64/multiarch/strcspn-sse2.S b/sysdeps/x86_64/multiarch/strcspn-sse2.c
513694
similarity index 89%
513694
rename from sysdeps/x86_64/multiarch/strcspn-sse2.S
513694
rename to sysdeps/x86_64/multiarch/strcspn-sse2.c
513694
index 8a0c69d7..32debee4 100644
513694
--- a/sysdeps/x86_64/multiarch/strcspn-sse2.S
513694
+++ b/sysdeps/x86_64/multiarch/strcspn-sse2.c
513694
@@ -19,10 +19,10 @@
513694
 #if IS_IN (libc)
513694
 
513694
 # include <sysdep.h>
513694
-# define strcspn __strcspn_sse2
513694
+# define STRCSPN __strcspn_sse2
513694
 
513694
 # undef libc_hidden_builtin_def
513694
-# define libc_hidden_builtin_def(strcspn)
513694
+# define libc_hidden_builtin_def(STRCSPN)
513694
 #endif
513694
 
513694
-#include <sysdeps/x86_64/strcspn.S>
513694
+#include <string/strcspn.c>
513694
diff --git a/sysdeps/x86_64/strcspn.S b/sysdeps/x86_64/strcspn.S
513694
deleted file mode 100644
513694
index 7f9202d6..00000000
513694
--- a/sysdeps/x86_64/strcspn.S
513694
+++ /dev/null
513694
@@ -1,122 +0,0 @@
513694
-/* strcspn (str, ss) -- Return the length of the initial segment of STR
513694
-			which contains no characters from SS.
513694
-   For AMD x86-64.
513694
-   Copyright (C) 1994-2018 Free Software Foundation, Inc.
513694
-   This file is part of the GNU C Library.
513694
-   Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>.
513694
-   Bug fixes by Alan Modra <Alan@SPRI.Levels.UniSA.Edu.Au>.
513694
-   Adopted for x86-64 by Andreas Jaeger <aj@suse.de>.
513694
-
513694
-   The GNU C Library is free software; you can redistribute it and/or
513694
-   modify it under the terms of the GNU Lesser General Public
513694
-   License as published by the Free Software Foundation; either
513694
-   version 2.1 of the License, or (at your option) any later version.
513694
-
513694
-   The GNU C Library is distributed in the hope that it will be useful,
513694
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
513694
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
513694
-   Lesser General Public License for more details.
513694
-
513694
-   You should have received a copy of the GNU Lesser General Public
513694
-   License along with the GNU C Library; if not, see
513694
-   <http://www.gnu.org/licenses/>.  */
513694
-
513694
-#include <sysdep.h>
513694
-#include "asm-syntax.h"
513694
-
513694
-	.text
513694
-ENTRY (strcspn)
513694
-
513694
-	movq %rdi, %rdx		/* Save SRC.  */
513694
-
513694
-	/* First we create a table with flags for all possible characters.
513694
-	   For the ASCII (7bit/8bit) or ISO-8859-X character sets which are
513694
-	   supported by the C string functions we have 256 characters.
513694
-	   Before inserting marks for the stop characters we clear the whole
513694
-	   table.  */
513694
-	movq %rdi, %r8			/* Save value.  */
513694
-	subq $256, %rsp			/* Make space for 256 bytes.  */
513694
-	cfi_adjust_cfa_offset(256)
513694
-	movl $32,  %ecx			/* 32*8 bytes = 256 bytes.  */
513694
-	movq %rsp, %rdi
513694
-	xorl %eax, %eax			/* We store 0s.  */
513694
-	cld
513694
-	rep
513694
-	stosq
513694
-
513694
-	movq %rsi, %rax			/* Setup skipset.  */
513694
-
513694
-/* For understanding the following code remember that %rcx == 0 now.
513694
-   Although all the following instruction only modify %cl we always
513694
-   have a correct zero-extended 64-bit value in %rcx.  */
513694
-
513694
-	.p2align 4
513694
-L(2):	movb (%rax), %cl	/* get byte from skipset */
513694
-	testb %cl, %cl		/* is NUL char? */
513694
-	jz L(1)			/* yes => start compare loop */
513694
-	movb %cl, (%rsp,%rcx)	/* set corresponding byte in skipset table */
513694
-
513694
-	movb 1(%rax), %cl	/* get byte from skipset */
513694
-	testb $0xff, %cl	/* is NUL char? */
513694
-	jz L(1)			/* yes => start compare loop */
513694
-	movb %cl, (%rsp,%rcx)	/* set corresponding byte in skipset table */
513694
-
513694
-	movb 2(%rax), %cl	/* get byte from skipset */
513694
-	testb $0xff, %cl	/* is NUL char? */
513694
-	jz L(1)			/* yes => start compare loop */
513694
-	movb %cl, (%rsp,%rcx)	/* set corresponding byte in skipset table */
513694
-
513694
-	movb 3(%rax), %cl	/* get byte from skipset */
513694
-	addq $4, %rax		/* increment skipset pointer */
513694
-	movb %cl, (%rsp,%rcx)	/* set corresponding byte in skipset table */
513694
-	testb $0xff, %cl	/* is NUL char? */
513694
-	jnz L(2)		/* no => process next dword from skipset */
513694
-
513694
-L(1):	leaq -4(%rdx), %rax	/* prepare loop */
513694
-
513694
-	/* We use a neat trick for the following loop.  Normally we would
513694
-	   have to test for two termination conditions
513694
-	   1. a character in the skipset was found
513694
-	   and
513694
-	   2. the end of the string was found
513694
-	   But as a sign that the character is in the skipset we store its
513694
-	   value in the table.  But the value of NUL is NUL so the loop
513694
-	   terminates for NUL in every case.  */
513694
-
513694
-	.p2align 4
513694
-L(3):	addq $4, %rax		/* adjust pointer for full loop round */
513694
-
513694
-	movb (%rax), %cl	/* get byte from string */
513694
-	cmpb %cl, (%rsp,%rcx)	/* is it contained in skipset? */
513694
-	je L(4)			/* yes => return */
513694
-
513694
-	movb 1(%rax), %cl	/* get byte from string */
513694
-	cmpb %cl, (%rsp,%rcx)	/* is it contained in skipset? */
513694
-	je L(5)			/* yes => return */
513694
-
513694
-	movb 2(%rax), %cl	/* get byte from string */
513694
-	cmpb %cl, (%rsp,%rcx)	/* is it contained in skipset? */
513694
-	jz L(6)			/* yes => return */
513694
-
513694
-	movb 3(%rax), %cl	/* get byte from string */
513694
-	cmpb %cl, (%rsp,%rcx)	/* is it contained in skipset? */
513694
-	jne L(3)		/* no => start loop again */
513694
-
513694
-	incq %rax		/* adjust pointer */
513694
-L(6):	incq %rax
513694
-L(5):	incq %rax
513694
-
513694
-L(4):	addq $256, %rsp		/* remove skipset */
513694
-	cfi_adjust_cfa_offset(-256)
513694
-#ifdef USE_AS_STRPBRK
513694
-	xorl %edx,%edx
513694
-	orb %cl, %cl		/* was last character NUL? */
513694
-	cmovzq %rdx, %rax	/* Yes:	return NULL */
513694
-#else
513694
-	subq %rdx, %rax		/* we have to return the number of valid
513694
-				   characters, so compute distance to first
513694
-				   non-valid character */
513694
-#endif
513694
-	ret
513694
-END (strcspn)
513694
-libc_hidden_builtin_def (strcspn)
513694
-- 
513694
GitLab
513694