|
|
190885 |
From f98c4e939dc562ee4f687cead51b6fc5fb5ad18f Mon Sep 17 00:00:00 2001
|
|
|
190885 |
From: Noah Goldstein <goldstein.w.n@gmail.com>
|
|
|
190885 |
Date: Wed, 23 Jun 2021 01:19:34 -0400
|
|
|
190885 |
Subject: [PATCH] x86-64: Add wcslen optimize for sse4.1
|
|
|
190885 |
|
|
|
190885 |
No bug. This comment adds the ifunc / build infrastructure
|
|
|
190885 |
necessary for wcslen to prefer the sse4.1 implementation
|
|
|
190885 |
in strlen-vec.S. test-wcslen.c is passing.
|
|
|
190885 |
|
|
|
190885 |
Signed-off-by: Noah Goldstein <goldstein.w.n@gmail.com>
|
|
|
190885 |
Reviewed-by: H.J. Lu <hjl.tools@gmail.com>
|
|
|
190885 |
(cherry picked from commit 6f573a27b6c8b4236445810a44660612323f5a73)
|
|
|
190885 |
---
|
|
|
190885 |
sysdeps/x86_64/multiarch/Makefile | 4 +-
|
|
|
190885 |
sysdeps/x86_64/multiarch/ifunc-impl-list.c | 3 ++
|
|
|
190885 |
sysdeps/x86_64/multiarch/ifunc-wcslen.h | 52 ++++++++++++++++++++++
|
|
|
190885 |
sysdeps/x86_64/multiarch/wcslen-sse4_1.S | 4 ++
|
|
|
190885 |
sysdeps/x86_64/multiarch/wcslen.c | 2 +-
|
|
|
190885 |
sysdeps/x86_64/multiarch/wcsnlen.c | 34 +-------------
|
|
|
190885 |
6 files changed, 63 insertions(+), 36 deletions(-)
|
|
|
190885 |
create mode 100644 sysdeps/x86_64/multiarch/ifunc-wcslen.h
|
|
|
190885 |
create mode 100644 sysdeps/x86_64/multiarch/wcslen-sse4_1.S
|
|
|
190885 |
|
|
|
190885 |
diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile
|
|
|
190885 |
index 491c7698..65fde4eb 100644
|
|
|
190885 |
--- a/sysdeps/x86_64/multiarch/Makefile
|
|
|
190885 |
+++ b/sysdeps/x86_64/multiarch/Makefile
|
|
|
190885 |
@@ -93,8 +93,8 @@ sysdep_routines += wmemcmp-sse4 wmemcmp-ssse3 wmemcmp-c \
|
|
|
190885 |
wcscpy-ssse3 wcscpy-c \
|
|
|
190885 |
wcschr-sse2 wcschr-avx2 \
|
|
|
190885 |
wcsrchr-sse2 wcsrchr-avx2 \
|
|
|
190885 |
- wcsnlen-sse4_1 wcsnlen-c \
|
|
|
190885 |
- wcslen-sse2 wcslen-avx2 wcsnlen-avx2 \
|
|
|
190885 |
+ wcslen-sse2 wcslen-sse4_1 wcslen-avx2 \
|
|
|
190885 |
+ wcsnlen-c wcsnlen-sse4_1 wcsnlen-avx2 \
|
|
|
190885 |
wcschr-avx2-rtm \
|
|
|
190885 |
wcscmp-avx2-rtm \
|
|
|
190885 |
wcslen-avx2-rtm \
|
|
|
190885 |
diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
|
|
|
190885 |
index f1a6460a..580913ca 100644
|
|
|
190885 |
--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
|
|
|
190885 |
+++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
|
|
|
190885 |
@@ -657,6 +657,9 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
|
|
|
190885 |
&& CPU_FEATURE_USABLE (AVX512BW)
|
|
|
190885 |
&& CPU_FEATURE_USABLE (BMI2)),
|
|
|
190885 |
__wcslen_evex)
|
|
|
190885 |
+ IFUNC_IMPL_ADD (array, i, wcsnlen,
|
|
|
190885 |
+ CPU_FEATURE_USABLE (SSE4_1),
|
|
|
190885 |
+ __wcsnlen_sse4_1)
|
|
|
190885 |
IFUNC_IMPL_ADD (array, i, wcslen, 1, __wcslen_sse2))
|
|
|
190885 |
|
|
|
190885 |
/* Support sysdeps/x86_64/multiarch/wcsnlen.c. */
|
|
|
190885 |
diff --git a/sysdeps/x86_64/multiarch/ifunc-wcslen.h b/sysdeps/x86_64/multiarch/ifunc-wcslen.h
|
|
|
190885 |
new file mode 100644
|
|
|
190885 |
index 00000000..39e33473
|
|
|
190885 |
--- /dev/null
|
|
|
190885 |
+++ b/sysdeps/x86_64/multiarch/ifunc-wcslen.h
|
|
|
190885 |
@@ -0,0 +1,52 @@
|
|
|
190885 |
+/* Common definition for ifunc selections for wcslen and wcsnlen
|
|
|
190885 |
+ All versions must be listed in ifunc-impl-list.c.
|
|
|
190885 |
+ Copyright (C) 2017-2021 Free Software Foundation, Inc.
|
|
|
190885 |
+ This file is part of the GNU C Library.
|
|
|
190885 |
+
|
|
|
190885 |
+ The GNU C Library is free software; you can redistribute it and/or
|
|
|
190885 |
+ modify it under the terms of the GNU Lesser General Public
|
|
|
190885 |
+ License as published by the Free Software Foundation; either
|
|
|
190885 |
+ version 2.1 of the License, or (at your option) any later version.
|
|
|
190885 |
+
|
|
|
190885 |
+ The GNU C Library is distributed in the hope that it will be useful,
|
|
|
190885 |
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
190885 |
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
190885 |
+ Lesser General Public License for more details.
|
|
|
190885 |
+
|
|
|
190885 |
+ You should have received a copy of the GNU Lesser General Public
|
|
|
190885 |
+ License along with the GNU C Library; if not, see
|
|
|
190885 |
+ <https://www.gnu.org/licenses/>. */
|
|
|
190885 |
+
|
|
|
190885 |
+#include <init-arch.h>
|
|
|
190885 |
+
|
|
|
190885 |
+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
|
|
|
190885 |
+extern __typeof (REDIRECT_NAME) OPTIMIZE (sse4_1) attribute_hidden;
|
|
|
190885 |
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
|
|
|
190885 |
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden;
|
|
|
190885 |
+extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden;
|
|
|
190885 |
+
|
|
|
190885 |
+static inline void *
|
|
|
190885 |
+IFUNC_SELECTOR (void)
|
|
|
190885 |
+{
|
|
|
190885 |
+ const struct cpu_features* cpu_features = __get_cpu_features ();
|
|
|
190885 |
+
|
|
|
190885 |
+ if (CPU_FEATURE_USABLE_P (cpu_features, AVX2)
|
|
|
190885 |
+ && CPU_FEATURE_USABLE_P (cpu_features, BMI2)
|
|
|
190885 |
+ && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
|
|
|
190885 |
+ {
|
|
|
190885 |
+ if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
|
|
|
190885 |
+ && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW))
|
|
|
190885 |
+ return OPTIMIZE (evex);
|
|
|
190885 |
+
|
|
|
190885 |
+ if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
|
|
|
190885 |
+ return OPTIMIZE (avx2_rtm);
|
|
|
190885 |
+
|
|
|
190885 |
+ if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
|
|
|
190885 |
+ return OPTIMIZE (avx2);
|
|
|
190885 |
+ }
|
|
|
190885 |
+
|
|
|
190885 |
+ if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_1))
|
|
|
190885 |
+ return OPTIMIZE (sse4_1);
|
|
|
190885 |
+
|
|
|
190885 |
+ return OPTIMIZE (sse2);
|
|
|
190885 |
+}
|
|
|
190885 |
diff --git a/sysdeps/x86_64/multiarch/wcslen-sse4_1.S b/sysdeps/x86_64/multiarch/wcslen-sse4_1.S
|
|
|
190885 |
new file mode 100644
|
|
|
190885 |
index 00000000..7e62621a
|
|
|
190885 |
--- /dev/null
|
|
|
190885 |
+++ b/sysdeps/x86_64/multiarch/wcslen-sse4_1.S
|
|
|
190885 |
@@ -0,0 +1,4 @@
|
|
|
190885 |
+#define AS_WCSLEN
|
|
|
190885 |
+#define strlen __wcslen_sse4_1
|
|
|
190885 |
+
|
|
|
190885 |
+#include "strlen-vec.S"
|
|
|
190885 |
diff --git a/sysdeps/x86_64/multiarch/wcslen.c b/sysdeps/x86_64/multiarch/wcslen.c
|
|
|
190885 |
index 6d06e47c..3b04b75b 100644
|
|
|
190885 |
--- a/sysdeps/x86_64/multiarch/wcslen.c
|
|
|
190885 |
+++ b/sysdeps/x86_64/multiarch/wcslen.c
|
|
|
190885 |
@@ -24,7 +24,7 @@
|
|
|
190885 |
# undef __wcslen
|
|
|
190885 |
|
|
|
190885 |
# define SYMBOL_NAME wcslen
|
|
|
190885 |
-# include "ifunc-avx2.h"
|
|
|
190885 |
+# include "ifunc-wcslen.h"
|
|
|
190885 |
|
|
|
190885 |
libc_ifunc_redirected (__redirect_wcslen, __wcslen, IFUNC_SELECTOR ());
|
|
|
190885 |
weak_alias (__wcslen, wcslen);
|
|
|
190885 |
diff --git a/sysdeps/x86_64/multiarch/wcsnlen.c b/sysdeps/x86_64/multiarch/wcsnlen.c
|
|
|
190885 |
index 20b731ae..06736410 100644
|
|
|
190885 |
--- a/sysdeps/x86_64/multiarch/wcsnlen.c
|
|
|
190885 |
+++ b/sysdeps/x86_64/multiarch/wcsnlen.c
|
|
|
190885 |
@@ -24,39 +24,7 @@
|
|
|
190885 |
# undef __wcsnlen
|
|
|
190885 |
|
|
|
190885 |
# define SYMBOL_NAME wcsnlen
|
|
|
190885 |
-# include <init-arch.h>
|
|
|
190885 |
-
|
|
|
190885 |
-extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
|
|
|
190885 |
-extern __typeof (REDIRECT_NAME) OPTIMIZE (sse4_1) attribute_hidden;
|
|
|
190885 |
-extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
|
|
|
190885 |
-extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden;
|
|
|
190885 |
-extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden;
|
|
|
190885 |
-
|
|
|
190885 |
-static inline void *
|
|
|
190885 |
-IFUNC_SELECTOR (void)
|
|
|
190885 |
-{
|
|
|
190885 |
- const struct cpu_features* cpu_features = __get_cpu_features ();
|
|
|
190885 |
-
|
|
|
190885 |
- if (CPU_FEATURE_USABLE_P (cpu_features, AVX2)
|
|
|
190885 |
- && CPU_FEATURES_ARCH_P (cpu_features, AVX_Fast_Unaligned_Load))
|
|
|
190885 |
- {
|
|
|
190885 |
- if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL)
|
|
|
190885 |
- && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW)
|
|
|
190885 |
- && CPU_FEATURE_USABLE_P (cpu_features, BMI2))
|
|
|
190885 |
- return OPTIMIZE (evex);
|
|
|
190885 |
-
|
|
|
190885 |
- if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
|
|
|
190885 |
- return OPTIMIZE (avx2_rtm);
|
|
|
190885 |
-
|
|
|
190885 |
- if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
|
|
|
190885 |
- return OPTIMIZE (avx2);
|
|
|
190885 |
- }
|
|
|
190885 |
-
|
|
|
190885 |
- if (CPU_FEATURE_USABLE_P (cpu_features, SSE4_1))
|
|
|
190885 |
- return OPTIMIZE (sse4_1);
|
|
|
190885 |
-
|
|
|
190885 |
- return OPTIMIZE (sse2);
|
|
|
190885 |
-}
|
|
|
190885 |
+# include "ifunc-wcslen.h"
|
|
|
190885 |
|
|
|
190885 |
libc_ifunc_redirected (__redirect_wcsnlen, __wcsnlen, IFUNC_SELECTOR ());
|
|
|
190885 |
weak_alias (__wcsnlen, wcsnlen);
|
|
|
190885 |
--
|
|
|
190885 |
GitLab
|
|
|
190885 |
|