|
|
513694 |
From 9356e90aa423ef4335404da233617ee85c3a05e4 Mon Sep 17 00:00:00 2001
|
|
|
513694 |
From: Noah Goldstein <goldstein.w.n@gmail.com>
|
|
|
513694 |
Date: Wed, 23 Mar 2022 16:57:29 -0500
|
|
|
513694 |
Subject: [PATCH] x86: Remove strspn-sse2.S and use the generic implementation
|
|
|
513694 |
|
|
|
513694 |
The generic implementation is faster.
|
|
|
513694 |
|
|
|
513694 |
geometric_mean(N=20) of all benchmarks New / Original: .710
|
|
|
513694 |
|
|
|
513694 |
All string/memory tests pass.
|
|
|
513694 |
Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
|
|
|
513694 |
|
|
|
513694 |
(cherry picked from commit 9c8a6ad620b49a27120ecdd7049c26bf05900397)
|
|
|
513694 |
---
|
|
|
513694 |
.../{strspn-sse2.S => strspn-sse2.c} | 6 +-
|
|
|
513694 |
sysdeps/x86_64/strspn.S | 115 ------------------
|
|
|
513694 |
2 files changed, 3 insertions(+), 118 deletions(-)
|
|
|
513694 |
rename sysdeps/x86_64/multiarch/{strspn-sse2.S => strspn-sse2.c} (89%)
|
|
|
513694 |
delete mode 100644 sysdeps/x86_64/strspn.S
|
|
|
513694 |
|
|
|
513694 |
diff --git a/sysdeps/x86_64/multiarch/strspn-sse2.S b/sysdeps/x86_64/multiarch/strspn-sse2.c
|
|
|
513694 |
similarity index 89%
|
|
|
513694 |
rename from sysdeps/x86_64/multiarch/strspn-sse2.S
|
|
|
513694 |
rename to sysdeps/x86_64/multiarch/strspn-sse2.c
|
|
|
513694 |
index 4686cdd5..ab0dae40 100644
|
|
|
513694 |
--- a/sysdeps/x86_64/multiarch/strspn-sse2.S
|
|
|
513694 |
+++ b/sysdeps/x86_64/multiarch/strspn-sse2.c
|
|
|
513694 |
@@ -19,10 +19,10 @@
|
|
|
513694 |
#if IS_IN (libc)
|
|
|
513694 |
|
|
|
513694 |
# include <sysdep.h>
|
|
|
513694 |
-# define strspn __strspn_sse2
|
|
|
513694 |
+# define STRSPN __strspn_sse2
|
|
|
513694 |
|
|
|
513694 |
# undef libc_hidden_builtin_def
|
|
|
513694 |
-# define libc_hidden_builtin_def(strspn)
|
|
|
513694 |
+# define libc_hidden_builtin_def(STRSPN)
|
|
|
513694 |
#endif
|
|
|
513694 |
|
|
|
513694 |
-#include <sysdeps/x86_64/strspn.S>
|
|
|
513694 |
+#include <string/strspn.c>
|
|
|
513694 |
diff --git a/sysdeps/x86_64/strspn.S b/sysdeps/x86_64/strspn.S
|
|
|
513694 |
deleted file mode 100644
|
|
|
513694 |
index 635f1bc6..00000000
|
|
|
513694 |
--- a/sysdeps/x86_64/strspn.S
|
|
|
513694 |
+++ /dev/null
|
|
|
513694 |
@@ -1,115 +0,0 @@
|
|
|
513694 |
-/* strspn (str, ss) -- Return the length of the initial segment of STR
|
|
|
513694 |
- which contains only characters from SS.
|
|
|
513694 |
- For AMD x86-64.
|
|
|
513694 |
- Copyright (C) 1994-2018 Free Software Foundation, Inc.
|
|
|
513694 |
- This file is part of the GNU C Library.
|
|
|
513694 |
- Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>.
|
|
|
513694 |
- Bug fixes by Alan Modra <Alan@SPRI.Levels.UniSA.Edu.Au>.
|
|
|
513694 |
- Adopted for x86-64 by Andreas Jaeger <aj@suse.de>.
|
|
|
513694 |
-
|
|
|
513694 |
- The GNU C Library is free software; you can redistribute it and/or
|
|
|
513694 |
- modify it under the terms of the GNU Lesser General Public
|
|
|
513694 |
- License as published by the Free Software Foundation; either
|
|
|
513694 |
- version 2.1 of the License, or (at your option) any later version.
|
|
|
513694 |
-
|
|
|
513694 |
- The GNU C Library is distributed in the hope that it will be useful,
|
|
|
513694 |
- but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
513694 |
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
513694 |
- Lesser General Public License for more details.
|
|
|
513694 |
-
|
|
|
513694 |
- You should have received a copy of the GNU Lesser General Public
|
|
|
513694 |
- License along with the GNU C Library; if not, see
|
|
|
513694 |
- <http://www.gnu.org/licenses/>. */
|
|
|
513694 |
-
|
|
|
513694 |
-#include <sysdep.h>
|
|
|
513694 |
-
|
|
|
513694 |
- .text
|
|
|
513694 |
-ENTRY (strspn)
|
|
|
513694 |
-
|
|
|
513694 |
- movq %rdi, %rdx /* Save SRC. */
|
|
|
513694 |
-
|
|
|
513694 |
- /* First we create a table with flags for all possible characters.
|
|
|
513694 |
- For the ASCII (7bit/8bit) or ISO-8859-X character sets which are
|
|
|
513694 |
- supported by the C string functions we have 256 characters.
|
|
|
513694 |
- Before inserting marks for the stop characters we clear the whole
|
|
|
513694 |
- table. */
|
|
|
513694 |
- movq %rdi, %r8 /* Save value. */
|
|
|
513694 |
- subq $256, %rsp /* Make space for 256 bytes. */
|
|
|
513694 |
- cfi_adjust_cfa_offset(256)
|
|
|
513694 |
- movl $32, %ecx /* 32*8 bytes = 256 bytes. */
|
|
|
513694 |
- movq %rsp, %rdi
|
|
|
513694 |
- xorl %eax, %eax /* We store 0s. */
|
|
|
513694 |
- cld
|
|
|
513694 |
- rep
|
|
|
513694 |
- stosq
|
|
|
513694 |
-
|
|
|
513694 |
- movq %rsi, %rax /* Setup stopset. */
|
|
|
513694 |
-
|
|
|
513694 |
-/* For understanding the following code remember that %rcx == 0 now.
|
|
|
513694 |
- Although all the following instruction only modify %cl we always
|
|
|
513694 |
- have a correct zero-extended 64-bit value in %rcx. */
|
|
|
513694 |
-
|
|
|
513694 |
- .p2align 4
|
|
|
513694 |
-L(2): movb (%rax), %cl /* get byte from stopset */
|
|
|
513694 |
- testb %cl, %cl /* is NUL char? */
|
|
|
513694 |
- jz L(1) /* yes => start compare loop */
|
|
|
513694 |
- movb %cl, (%rsp,%rcx) /* set corresponding byte in stopset table */
|
|
|
513694 |
-
|
|
|
513694 |
- movb 1(%rax), %cl /* get byte from stopset */
|
|
|
513694 |
- testb $0xff, %cl /* is NUL char? */
|
|
|
513694 |
- jz L(1) /* yes => start compare loop */
|
|
|
513694 |
- movb %cl, (%rsp,%rcx) /* set corresponding byte in stopset table */
|
|
|
513694 |
-
|
|
|
513694 |
- movb 2(%rax), %cl /* get byte from stopset */
|
|
|
513694 |
- testb $0xff, %cl /* is NUL char? */
|
|
|
513694 |
- jz L(1) /* yes => start compare loop */
|
|
|
513694 |
- movb %cl, (%rsp,%rcx) /* set corresponding byte in stopset table */
|
|
|
513694 |
-
|
|
|
513694 |
- movb 3(%rax), %cl /* get byte from stopset */
|
|
|
513694 |
- addq $4, %rax /* increment stopset pointer */
|
|
|
513694 |
- movb %cl, (%rsp,%rcx) /* set corresponding byte in stopset table */
|
|
|
513694 |
- testb $0xff, %cl /* is NUL char? */
|
|
|
513694 |
- jnz L(2) /* no => process next dword from stopset */
|
|
|
513694 |
-
|
|
|
513694 |
-L(1): leaq -4(%rdx), %rax /* prepare loop */
|
|
|
513694 |
-
|
|
|
513694 |
- /* We use a neat trick for the following loop. Normally we would
|
|
|
513694 |
- have to test for two termination conditions
|
|
|
513694 |
- 1. a character in the stopset was found
|
|
|
513694 |
- and
|
|
|
513694 |
- 2. the end of the string was found
|
|
|
513694 |
- But as a sign that the character is in the stopset we store its
|
|
|
513694 |
- value in the table. But the value of NUL is NUL so the loop
|
|
|
513694 |
- terminates for NUL in every case. */
|
|
|
513694 |
-
|
|
|
513694 |
- .p2align 4
|
|
|
513694 |
-L(3): addq $4, %rax /* adjust pointer for full loop round */
|
|
|
513694 |
-
|
|
|
513694 |
- movb (%rax), %cl /* get byte from string */
|
|
|
513694 |
- testb %cl, (%rsp,%rcx) /* is it contained in skipset? */
|
|
|
513694 |
- jz L(4) /* no => return */
|
|
|
513694 |
-
|
|
|
513694 |
- movb 1(%rax), %cl /* get byte from string */
|
|
|
513694 |
- testb %cl, (%rsp,%rcx) /* is it contained in skipset? */
|
|
|
513694 |
- jz L(5) /* no => return */
|
|
|
513694 |
-
|
|
|
513694 |
- movb 2(%rax), %cl /* get byte from string */
|
|
|
513694 |
- testb %cl, (%rsp,%rcx) /* is it contained in skipset? */
|
|
|
513694 |
- jz L(6) /* no => return */
|
|
|
513694 |
-
|
|
|
513694 |
- movb 3(%rax), %cl /* get byte from string */
|
|
|
513694 |
- testb %cl, (%rsp,%rcx) /* is it contained in skipset? */
|
|
|
513694 |
- jnz L(3) /* yes => start loop again */
|
|
|
513694 |
-
|
|
|
513694 |
- incq %rax /* adjust pointer */
|
|
|
513694 |
-L(6): incq %rax
|
|
|
513694 |
-L(5): incq %rax
|
|
|
513694 |
-
|
|
|
513694 |
-L(4): addq $256, %rsp /* remove stopset */
|
|
|
513694 |
- cfi_adjust_cfa_offset(-256)
|
|
|
513694 |
- subq %rdx, %rax /* we have to return the number of valid
|
|
|
513694 |
- characters, so compute distance to first
|
|
|
513694 |
- non-valid character */
|
|
|
513694 |
- ret
|
|
|
513694 |
-END (strspn)
|
|
|
513694 |
-libc_hidden_builtin_def (strspn)
|
|
|
513694 |
--
|
|
|
513694 |
GitLab
|
|
|
513694 |
|