190885
From 041fc69e69905aa9193178c49b44dce7bb8b5d6d Mon Sep 17 00:00:00 2001
190885
From: "H.J. Lu" <hjl.tools@gmail.com>
190885
Date: Wed, 2 Mar 2022 14:25:33 -0800
190885
Subject: [PATCH]  x86-64: Add AVX optimized string/memory functions for RTM
190885
190885
Since VZEROUPPER triggers RTM abort while VZEROALL won't, select AVX
190885
optimized string/memory functions with
190885
190885
	xtest
190885
	jz	1f
190885
	vzeroall
190885
	ret
190885
1:
190885
	vzeroupper
190885
	ret
190885
190885
at function exit on processors with usable RTM, but without 256-bit EVEX
190885
instructions to avoid VZEROUPPER inside a transactionally executing RTM
190885
region.
190885
190885
(cherry picked from commit 7ebba91361badf7531d4e75050627a88d424872f)
190885
---
190885
 sysdeps/x86_64/multiarch/Makefile             |  27 +++
190885
 sysdeps/x86_64/multiarch/ifunc-avx2.h         |   4 +
190885
 sysdeps/x86_64/multiarch/ifunc-impl-list.c    | 170 ++++++++++++++++++
190885
 sysdeps/x86_64/multiarch/ifunc-memcmp.h       |   4 +
190885
 sysdeps/x86_64/multiarch/ifunc-memmove.h      |  12 ++
190885
 sysdeps/x86_64/multiarch/ifunc-memset.h       |  12 ++
190885
 sysdeps/x86_64/multiarch/ifunc-strcpy.h       |   4 +
190885
 sysdeps/x86_64/multiarch/ifunc-wmemset.h      |   5 +
190885
 sysdeps/x86_64/multiarch/memchr-avx2-rtm.S    |  12 ++
190885
 sysdeps/x86_64/multiarch/memchr-avx2.S        |  45 +++--
190885
 .../x86_64/multiarch/memcmp-avx2-movbe-rtm.S  |  12 ++
190885
 sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S  |  28 ++-
190885
 .../memmove-avx-unaligned-erms-rtm.S          |  17 ++
190885
 .../multiarch/memmove-vec-unaligned-erms.S    |  33 ++--
190885
 sysdeps/x86_64/multiarch/memrchr-avx2-rtm.S   |  12 ++
190885
 sysdeps/x86_64/multiarch/memrchr-avx2.S       |  53 +++---
190885
 .../memset-avx2-unaligned-erms-rtm.S          |  10 ++
190885
 .../multiarch/memset-avx2-unaligned-erms.S    |  12 +-
190885
 .../multiarch/memset-vec-unaligned-erms.S     |  41 ++---
190885
 sysdeps/x86_64/multiarch/rawmemchr-avx2-rtm.S |   4 +
190885
 sysdeps/x86_64/multiarch/stpcpy-avx2-rtm.S    |   3 +
190885
 sysdeps/x86_64/multiarch/stpncpy-avx2-rtm.S   |   4 +
190885
 sysdeps/x86_64/multiarch/strcat-avx2-rtm.S    |  12 ++
190885
 sysdeps/x86_64/multiarch/strcat-avx2.S        |   6 +-
190885
 sysdeps/x86_64/multiarch/strchr-avx2-rtm.S    |  12 ++
190885
 sysdeps/x86_64/multiarch/strchr-avx2.S        |  22 +--
190885
 sysdeps/x86_64/multiarch/strchr.c             |   4 +
190885
 sysdeps/x86_64/multiarch/strchrnul-avx2-rtm.S |   3 +
190885
 sysdeps/x86_64/multiarch/strcmp-avx2-rtm.S    |  12 ++
190885
 sysdeps/x86_64/multiarch/strcmp-avx2.S        |  55 +++---
190885
 sysdeps/x86_64/multiarch/strcmp.c             |   4 +
190885
 sysdeps/x86_64/multiarch/strcpy-avx2-rtm.S    |  12 ++
190885
 sysdeps/x86_64/multiarch/strcpy-avx2.S        |  85 ++++-----
190885
 sysdeps/x86_64/multiarch/strlen-avx2-rtm.S    |  12 ++
190885
 sysdeps/x86_64/multiarch/strlen-avx2.S        |  43 ++---
190885
 sysdeps/x86_64/multiarch/strncat-avx2-rtm.S   |   3 +
190885
 sysdeps/x86_64/multiarch/strncmp-avx2-rtm.S   |   3 +
190885
 sysdeps/x86_64/multiarch/strncmp.c            |   4 +
190885
 sysdeps/x86_64/multiarch/strncpy-avx2-rtm.S   |   3 +
190885
 sysdeps/x86_64/multiarch/strnlen-avx2-rtm.S   |   4 +
190885
 sysdeps/x86_64/multiarch/strrchr-avx2-rtm.S   |  12 ++
190885
 sysdeps/x86_64/multiarch/strrchr-avx2.S       |  19 +-
190885
 sysdeps/x86_64/multiarch/wcschr-avx2-rtm.S    |   3 +
190885
 sysdeps/x86_64/multiarch/wcscmp-avx2-rtm.S    |   4 +
190885
 sysdeps/x86_64/multiarch/wcslen-avx2-rtm.S    |   4 +
190885
 sysdeps/x86_64/multiarch/wcsncmp-avx2-rtm.S   |   5 +
190885
 sysdeps/x86_64/multiarch/wcsnlen-avx2-rtm.S   |   5 +
190885
 sysdeps/x86_64/multiarch/wcsnlen.c            |   4 +
190885
 sysdeps/x86_64/multiarch/wcsrchr-avx2-rtm.S   |   3 +
190885
 sysdeps/x86_64/multiarch/wmemchr-avx2-rtm.S   |   4 +
190885
 .../x86_64/multiarch/wmemcmp-avx2-movbe-rtm.S |   4 +
190885
 sysdeps/x86_64/sysdep.h                       |  22 +++
190885
 52 files changed, 668 insertions(+), 244 deletions(-)
190885
 create mode 100644 sysdeps/x86_64/multiarch/memchr-avx2-rtm.S
190885
 create mode 100644 sysdeps/x86_64/multiarch/memcmp-avx2-movbe-rtm.S
190885
 create mode 100644 sysdeps/x86_64/multiarch/memmove-avx-unaligned-erms-rtm.S
190885
 create mode 100644 sysdeps/x86_64/multiarch/memrchr-avx2-rtm.S
190885
 create mode 100644 sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms-rtm.S
190885
 create mode 100644 sysdeps/x86_64/multiarch/rawmemchr-avx2-rtm.S
190885
 create mode 100644 sysdeps/x86_64/multiarch/stpcpy-avx2-rtm.S
190885
 create mode 100644 sysdeps/x86_64/multiarch/stpncpy-avx2-rtm.S
190885
 create mode 100644 sysdeps/x86_64/multiarch/strcat-avx2-rtm.S
190885
 create mode 100644 sysdeps/x86_64/multiarch/strchr-avx2-rtm.S
190885
 create mode 100644 sysdeps/x86_64/multiarch/strchrnul-avx2-rtm.S
190885
 create mode 100644 sysdeps/x86_64/multiarch/strcmp-avx2-rtm.S
190885
 create mode 100644 sysdeps/x86_64/multiarch/strcpy-avx2-rtm.S
190885
 create mode 100644 sysdeps/x86_64/multiarch/strlen-avx2-rtm.S
190885
 create mode 100644 sysdeps/x86_64/multiarch/strncat-avx2-rtm.S
190885
 create mode 100644 sysdeps/x86_64/multiarch/strncmp-avx2-rtm.S
190885
 create mode 100644 sysdeps/x86_64/multiarch/strncpy-avx2-rtm.S
190885
 create mode 100644 sysdeps/x86_64/multiarch/strnlen-avx2-rtm.S
190885
 create mode 100644 sysdeps/x86_64/multiarch/strrchr-avx2-rtm.S
190885
 create mode 100644 sysdeps/x86_64/multiarch/wcschr-avx2-rtm.S
190885
 create mode 100644 sysdeps/x86_64/multiarch/wcscmp-avx2-rtm.S
190885
 create mode 100644 sysdeps/x86_64/multiarch/wcslen-avx2-rtm.S
190885
 create mode 100644 sysdeps/x86_64/multiarch/wcsncmp-avx2-rtm.S
190885
 create mode 100644 sysdeps/x86_64/multiarch/wcsnlen-avx2-rtm.S
190885
 create mode 100644 sysdeps/x86_64/multiarch/wcsrchr-avx2-rtm.S
190885
 create mode 100644 sysdeps/x86_64/multiarch/wmemchr-avx2-rtm.S
190885
 create mode 100644 sysdeps/x86_64/multiarch/wmemcmp-avx2-movbe-rtm.S
190885
190885
diff --git a/sysdeps/x86_64/multiarch/Makefile b/sysdeps/x86_64/multiarch/Makefile
190885
index 9d79b138..491c7698 100644
190885
--- a/sysdeps/x86_64/multiarch/Makefile
190885
+++ b/sysdeps/x86_64/multiarch/Makefile
190885
@@ -40,6 +40,25 @@ sysdep_routines += strncat-c stpncpy-c strncpy-c \
190885
 		   memset-sse2-unaligned-erms \
190885
 		   memset-avx2-unaligned-erms \
190885
 		   memset-avx512-unaligned-erms \
190885
+		   memchr-avx2-rtm \
190885
+		   memcmp-avx2-movbe-rtm \
190885
+		   memmove-avx-unaligned-erms-rtm \
190885
+		   memrchr-avx2-rtm \
190885
+		   memset-avx2-unaligned-erms-rtm \
190885
+		   rawmemchr-avx2-rtm \
190885
+		   strchr-avx2-rtm \
190885
+		   strcmp-avx2-rtm \
190885
+		   strchrnul-avx2-rtm \
190885
+		   stpcpy-avx2-rtm \
190885
+		   stpncpy-avx2-rtm \
190885
+		   strcat-avx2-rtm \
190885
+		   strcpy-avx2-rtm \
190885
+		   strlen-avx2-rtm \
190885
+		   strncat-avx2-rtm \
190885
+		   strncmp-avx2-rtm \
190885
+		   strncpy-avx2-rtm \
190885
+		   strnlen-avx2-rtm \
190885
+		   strrchr-avx2-rtm \
190885
 		   memchr-evex \
190885
 		   memcmp-evex-movbe \
190885
 		   memmove-evex-unaligned-erms \
190885
@@ -76,6 +95,14 @@ sysdep_routines += wmemcmp-sse4 wmemcmp-ssse3 wmemcmp-c \
190885
 		   wcsrchr-sse2 wcsrchr-avx2 \
190885
 		   wcsnlen-sse4_1 wcsnlen-c \
190885
 		   wcslen-sse2 wcslen-avx2 wcsnlen-avx2 \
190885
+		   wcschr-avx2-rtm \
190885
+		   wcscmp-avx2-rtm \
190885
+		   wcslen-avx2-rtm \
190885
+		   wcsncmp-avx2-rtm \
190885
+		   wcsnlen-avx2-rtm \
190885
+		   wcsrchr-avx2-rtm \
190885
+		   wmemchr-avx2-rtm \
190885
+		   wmemcmp-avx2-movbe-rtm \
190885
 		   wcschr-evex \
190885
 		   wcscmp-evex \
190885
 		   wcslen-evex \
190885
diff --git a/sysdeps/x86_64/multiarch/ifunc-avx2.h b/sysdeps/x86_64/multiarch/ifunc-avx2.h
190885
index 7081b0c9..e0f30e61 100644
190885
--- a/sysdeps/x86_64/multiarch/ifunc-avx2.h
190885
+++ b/sysdeps/x86_64/multiarch/ifunc-avx2.h
190885
@@ -21,6 +21,7 @@
190885
 
190885
 extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
190885
 extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
190885
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden;
190885
 extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden;
190885
 
190885
 static inline void *
190885
@@ -36,6 +37,9 @@ IFUNC_SELECTOR (void)
190885
 	  && CPU_FEATURE_USABLE_P (cpu_features, BMI2))
190885
 	return OPTIMIZE (evex);
190885
 
190885
+      if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
190885
+	return OPTIMIZE (avx2_rtm);
190885
+
190885
       if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
190885
 	return OPTIMIZE (avx2);
190885
     }
190885
diff --git a/sysdeps/x86_64/multiarch/ifunc-impl-list.c b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
190885
index c8da910e..c1efeec0 100644
190885
--- a/sysdeps/x86_64/multiarch/ifunc-impl-list.c
190885
+++ b/sysdeps/x86_64/multiarch/ifunc-impl-list.c
190885
@@ -43,6 +43,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
190885
 	      IFUNC_IMPL_ADD (array, i, memchr,
190885
 			      CPU_FEATURE_USABLE (AVX2),
190885
 			      __memchr_avx2)
190885
+	      IFUNC_IMPL_ADD (array, i, memchr,
190885
+			      (CPU_FEATURE_USABLE (AVX2)
190885
+			       && CPU_FEATURE_USABLE (RTM)),
190885
+			      __memchr_avx2_rtm)
190885
 	      IFUNC_IMPL_ADD (array, i, memchr,
190885
 			      (CPU_FEATURE_USABLE (AVX512VL)
190885
 			       && CPU_FEATURE_USABLE (AVX512BW)
190885
@@ -56,6 +60,11 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
190885
 			      (CPU_FEATURE_USABLE (AVX2)
190885
 			       && CPU_FEATURE_USABLE (MOVBE)),
190885
 			      __memcmp_avx2_movbe)
190885
+	      IFUNC_IMPL_ADD (array, i, memcmp,
190885
+			      (CPU_FEATURE_USABLE (AVX2)
190885
+			       && CPU_FEATURE_USABLE (MOVBE)
190885
+			       && CPU_FEATURE_USABLE (RTM)),
190885
+			      __memcmp_avx2_movbe_rtm)
190885
 	      IFUNC_IMPL_ADD (array, i, memcmp,
190885
 			      (CPU_FEATURE_USABLE (AVX512VL)
190885
 			       && CPU_FEATURE_USABLE (AVX512BW)
190885
@@ -85,6 +94,14 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
190885
 	      IFUNC_IMPL_ADD (array, i, __memmove_chk,
190885
 			      CPU_FEATURE_USABLE (AVX),
190885
 			      __memmove_chk_avx_unaligned_erms)
190885
+	      IFUNC_IMPL_ADD (array, i, __memmove_chk,
190885
+			      (CPU_FEATURE_USABLE (AVX)
190885
+			       && CPU_FEATURE_USABLE (RTM)),
190885
+			      __memmove_chk_avx_unaligned_rtm)
190885
+	      IFUNC_IMPL_ADD (array, i, __memmove_chk,
190885
+			      (CPU_FEATURE_USABLE (AVX)
190885
+			       && CPU_FEATURE_USABLE (RTM)),
190885
+			      __memmove_chk_avx_unaligned_erms_rtm)
190885
 	      IFUNC_IMPL_ADD (array, i, __memmove_chk,
190885
 			      CPU_FEATURE_USABLE (AVX512VL),
190885
 			      __memmove_chk_evex_unaligned)
190885
@@ -113,6 +130,14 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
190885
 	      IFUNC_IMPL_ADD (array, i, memmove,
190885
 			      CPU_FEATURE_USABLE (AVX),
190885
 			      __memmove_avx_unaligned_erms)
190885
+	      IFUNC_IMPL_ADD (array, i, memmove,
190885
+			      (CPU_FEATURE_USABLE (AVX)
190885
+			       && CPU_FEATURE_USABLE (RTM)),
190885
+			      __memmove_avx_unaligned_rtm)
190885
+	      IFUNC_IMPL_ADD (array, i, memmove,
190885
+			      (CPU_FEATURE_USABLE (AVX)
190885
+			       && CPU_FEATURE_USABLE (RTM)),
190885
+			      __memmove_avx_unaligned_erms_rtm)
190885
 	      IFUNC_IMPL_ADD (array, i, memmove,
190885
 			      CPU_FEATURE_USABLE (AVX512VL),
190885
 			      __memmove_evex_unaligned)
190885
@@ -143,6 +168,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
190885
 	      IFUNC_IMPL_ADD (array, i, memrchr,
190885
 			      CPU_FEATURE_USABLE (AVX2),
190885
 			      __memrchr_avx2)
190885
+	      IFUNC_IMPL_ADD (array, i, memrchr,
190885
+			      (CPU_FEATURE_USABLE (AVX2)
190885
+			       && CPU_FEATURE_USABLE (RTM)),
190885
+			      __memrchr_avx2_rtm)
190885
 	      IFUNC_IMPL_ADD (array, i, memrchr,
190885
 			      (CPU_FEATURE_USABLE (AVX512VL)
190885
 			       && CPU_FEATURE_USABLE (AVX512BW)),
190885
@@ -165,6 +194,14 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
190885
 	      IFUNC_IMPL_ADD (array, i, __memset_chk,
190885
 			      CPU_FEATURE_USABLE (AVX2),
190885
 			      __memset_chk_avx2_unaligned_erms)
190885
+	      IFUNC_IMPL_ADD (array, i, __memset_chk,
190885
+			      (CPU_FEATURE_USABLE (AVX2)
190885
+			       && CPU_FEATURE_USABLE (RTM)),
190885
+			      __memset_chk_avx2_unaligned_rtm)
190885
+	      IFUNC_IMPL_ADD (array, i, __memset_chk,
190885
+			      (CPU_FEATURE_USABLE (AVX2)
190885
+			       && CPU_FEATURE_USABLE (RTM)),
190885
+			      __memset_chk_avx2_unaligned_erms_rtm)
190885
 	      IFUNC_IMPL_ADD (array, i, __memset_chk,
190885
 			      (CPU_FEATURE_USABLE (AVX512VL)
190885
 			       && CPU_FEATURE_USABLE (AVX512BW)),
190885
@@ -198,6 +235,14 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
190885
 	      IFUNC_IMPL_ADD (array, i, memset,
190885
 			      CPU_FEATURE_USABLE (AVX2),
190885
 			      __memset_avx2_unaligned_erms)
190885
+	      IFUNC_IMPL_ADD (array, i, memset,
190885
+			      (CPU_FEATURE_USABLE (AVX2)
190885
+			       && CPU_FEATURE_USABLE (RTM)),
190885
+			      __memset_avx2_unaligned_rtm)
190885
+	      IFUNC_IMPL_ADD (array, i, memset,
190885
+			      (CPU_FEATURE_USABLE (AVX2)
190885
+			       && CPU_FEATURE_USABLE (RTM)),
190885
+			      __memset_avx2_unaligned_erms_rtm)
190885
 	      IFUNC_IMPL_ADD (array, i, memset,
190885
 			      (CPU_FEATURE_USABLE (AVX512VL)
190885
 			       && CPU_FEATURE_USABLE (AVX512BW)),
190885
@@ -222,6 +267,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
190885
 	      IFUNC_IMPL_ADD (array, i, rawmemchr,
190885
 			      CPU_FEATURE_USABLE (AVX2),
190885
 			      __rawmemchr_avx2)
190885
+	      IFUNC_IMPL_ADD (array, i, rawmemchr,
190885
+			      (CPU_FEATURE_USABLE (AVX2)
190885
+			       && CPU_FEATURE_USABLE (RTM)),
190885
+			      __rawmemchr_avx2_rtm)
190885
 	      IFUNC_IMPL_ADD (array, i, rawmemchr,
190885
 			      (CPU_FEATURE_USABLE (AVX512VL)
190885
 			       && CPU_FEATURE_USABLE (AVX512BW)
190885
@@ -234,6 +283,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
190885
 	      IFUNC_IMPL_ADD (array, i, strlen,
190885
 			      CPU_FEATURE_USABLE (AVX2),
190885
 			      __strlen_avx2)
190885
+	      IFUNC_IMPL_ADD (array, i, strlen,
190885
+			      (CPU_FEATURE_USABLE (AVX2)
190885
+			       && CPU_FEATURE_USABLE (RTM)),
190885
+			      __strlen_avx2_rtm)
190885
 	      IFUNC_IMPL_ADD (array, i, strlen,
190885
 			      (CPU_FEATURE_USABLE (AVX512VL)
190885
 			       && CPU_FEATURE_USABLE (AVX512BW)),
190885
@@ -245,6 +298,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
190885
 	      IFUNC_IMPL_ADD (array, i, strnlen,
190885
 			      CPU_FEATURE_USABLE (AVX2),
190885
 			      __strnlen_avx2)
190885
+	      IFUNC_IMPL_ADD (array, i, strnlen,
190885
+			      (CPU_FEATURE_USABLE (AVX2)
190885
+			       && CPU_FEATURE_USABLE (RTM)),
190885
+			      __strnlen_avx2_rtm)
190885
 	      IFUNC_IMPL_ADD (array, i, strnlen,
190885
 			      (CPU_FEATURE_USABLE (AVX512VL)
190885
 			       && CPU_FEATURE_USABLE (AVX512BW)),
190885
@@ -257,6 +314,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
190885
 			      __stpncpy_ssse3)
190885
 	      IFUNC_IMPL_ADD (array, i, stpncpy, CPU_FEATURE_USABLE (AVX2),
190885
 			      __stpncpy_avx2)
190885
+	      IFUNC_IMPL_ADD (array, i, stpncpy,
190885
+			      (CPU_FEATURE_USABLE (AVX2)
190885
+			       && CPU_FEATURE_USABLE (RTM)),
190885
+			      __stpncpy_avx2_rtm)
190885
 	      IFUNC_IMPL_ADD (array, i, stpncpy,
190885
 			      (CPU_FEATURE_USABLE (AVX512VL)
190885
 			       && CPU_FEATURE_USABLE (AVX512BW)),
190885
@@ -271,6 +332,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
190885
 			      __stpcpy_ssse3)
190885
 	      IFUNC_IMPL_ADD (array, i, stpcpy, CPU_FEATURE_USABLE (AVX2),
190885
 			      __stpcpy_avx2)
190885
+	      IFUNC_IMPL_ADD (array, i, stpcpy,
190885
+			      (CPU_FEATURE_USABLE (AVX2)
190885
+			       && CPU_FEATURE_USABLE (RTM)),
190885
+			      __stpcpy_avx2_rtm)
190885
 	      IFUNC_IMPL_ADD (array, i, stpcpy,
190885
 			      (CPU_FEATURE_USABLE (AVX512VL)
190885
 			       && CPU_FEATURE_USABLE (AVX512BW)),
190885
@@ -309,6 +374,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
190885
   IFUNC_IMPL (i, name, strcat,
190885
 	      IFUNC_IMPL_ADD (array, i, strcat, CPU_FEATURE_USABLE (AVX2),
190885
 			      __strcat_avx2)
190885
+	      IFUNC_IMPL_ADD (array, i, strcat,
190885
+			      (CPU_FEATURE_USABLE (AVX2)
190885
+			       && CPU_FEATURE_USABLE (RTM)),
190885
+			      __strcat_avx2_rtm)
190885
 	      IFUNC_IMPL_ADD (array, i, strcat,
190885
 			      (CPU_FEATURE_USABLE (AVX512VL)
190885
 			       && CPU_FEATURE_USABLE (AVX512BW)),
190885
@@ -323,6 +392,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
190885
 	      IFUNC_IMPL_ADD (array, i, strchr,
190885
 			      CPU_FEATURE_USABLE (AVX2),
190885
 			      __strchr_avx2)
190885
+	      IFUNC_IMPL_ADD (array, i, strchr,
190885
+			      (CPU_FEATURE_USABLE (AVX2)
190885
+			       && CPU_FEATURE_USABLE (RTM)),
190885
+			      __strchr_avx2_rtm)
190885
 	      IFUNC_IMPL_ADD (array, i, strchr,
190885
 			      (CPU_FEATURE_USABLE (AVX512VL)
190885
 			       && CPU_FEATURE_USABLE (AVX512BW)
190885
@@ -336,6 +409,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
190885
 	      IFUNC_IMPL_ADD (array, i, strchrnul,
190885
 			      CPU_FEATURE_USABLE (AVX2),
190885
 			      __strchrnul_avx2)
190885
+	      IFUNC_IMPL_ADD (array, i, strchrnul,
190885
+			      (CPU_FEATURE_USABLE (AVX2)
190885
+			       && CPU_FEATURE_USABLE (RTM)),
190885
+			      __strchrnul_avx2_rtm)
190885
 	      IFUNC_IMPL_ADD (array, i, strchrnul,
190885
 			      (CPU_FEATURE_USABLE (AVX512VL)
190885
 			       && CPU_FEATURE_USABLE (AVX512BW)
190885
@@ -348,6 +425,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
190885
 	      IFUNC_IMPL_ADD (array, i, strrchr,
190885
 			      CPU_FEATURE_USABLE (AVX2),
190885
 			      __strrchr_avx2)
190885
+	      IFUNC_IMPL_ADD (array, i, strrchr,
190885
+			      (CPU_FEATURE_USABLE (AVX2)
190885
+			       && CPU_FEATURE_USABLE (RTM)),
190885
+			      __strrchr_avx2_rtm)
190885
 	      IFUNC_IMPL_ADD (array, i, strrchr,
190885
 			      (CPU_FEATURE_USABLE (AVX512VL)
190885
 			       && CPU_FEATURE_USABLE (AVX512BW)),
190885
@@ -359,6 +440,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
190885
 	      IFUNC_IMPL_ADD (array, i, strcmp,
190885
 			      CPU_FEATURE_USABLE (AVX2),
190885
 			      __strcmp_avx2)
190885
+	      IFUNC_IMPL_ADD (array, i, strcmp,
190885
+			      (CPU_FEATURE_USABLE (AVX2)
190885
+			       && CPU_FEATURE_USABLE (RTM)),
190885
+			      __strcmp_avx2_rtm)
190885
 	      IFUNC_IMPL_ADD (array, i, strcmp,
190885
 			      (CPU_FEATURE_USABLE (AVX512VL)
190885
 			       && CPU_FEATURE_USABLE (AVX512BW)
190885
@@ -375,6 +460,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
190885
   IFUNC_IMPL (i, name, strcpy,
190885
 	      IFUNC_IMPL_ADD (array, i, strcpy, CPU_FEATURE_USABLE (AVX2),
190885
 			      __strcpy_avx2)
190885
+	      IFUNC_IMPL_ADD (array, i, strcpy,
190885
+			      (CPU_FEATURE_USABLE (AVX2)
190885
+			       && CPU_FEATURE_USABLE (RTM)),
190885
+			      __strcpy_avx2_rtm)
190885
 	      IFUNC_IMPL_ADD (array, i, strcpy,
190885
 			      (CPU_FEATURE_USABLE (AVX512VL)
190885
 			       && CPU_FEATURE_USABLE (AVX512BW)),
190885
@@ -422,6 +511,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
190885
   IFUNC_IMPL (i, name, strncat,
190885
 	      IFUNC_IMPL_ADD (array, i, strncat, CPU_FEATURE_USABLE (AVX2),
190885
 			      __strncat_avx2)
190885
+	      IFUNC_IMPL_ADD (array, i, strncat,
190885
+			      (CPU_FEATURE_USABLE (AVX2)
190885
+			       && CPU_FEATURE_USABLE (RTM)),
190885
+			      __strncat_avx2_rtm)
190885
 	      IFUNC_IMPL_ADD (array, i, strncat,
190885
 			      (CPU_FEATURE_USABLE (AVX512VL)
190885
 			       && CPU_FEATURE_USABLE (AVX512BW)),
190885
@@ -436,6 +529,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
190885
   IFUNC_IMPL (i, name, strncpy,
190885
 	      IFUNC_IMPL_ADD (array, i, strncpy, CPU_FEATURE_USABLE (AVX2),
190885
 			      __strncpy_avx2)
190885
+	      IFUNC_IMPL_ADD (array, i, strncpy,
190885
+			      (CPU_FEATURE_USABLE (AVX2)
190885
+			       && CPU_FEATURE_USABLE (RTM)),
190885
+			      __strncpy_avx2_rtm)
190885
 	      IFUNC_IMPL_ADD (array, i, strncpy,
190885
 			      (CPU_FEATURE_USABLE (AVX512VL)
190885
 			       && CPU_FEATURE_USABLE (AVX512BW)),
190885
@@ -469,6 +566,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
190885
 	      IFUNC_IMPL_ADD (array, i, wcschr,
190885
 			      CPU_FEATURE_USABLE (AVX2),
190885
 			      __wcschr_avx2)
190885
+	      IFUNC_IMPL_ADD (array, i, wcschr,
190885
+			      (CPU_FEATURE_USABLE (AVX2)
190885
+			       && CPU_FEATURE_USABLE (RTM)),
190885
+			      __wcschr_avx2_rtm)
190885
 	      IFUNC_IMPL_ADD (array, i, wcschr,
190885
 			      (CPU_FEATURE_USABLE (AVX512VL)
190885
 			       && CPU_FEATURE_USABLE (AVX512BW)
190885
@@ -481,6 +582,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
190885
 	      IFUNC_IMPL_ADD (array, i, wcsrchr,
190885
 			      CPU_FEATURE_USABLE (AVX2),
190885
 			      __wcsrchr_avx2)
190885
+	      IFUNC_IMPL_ADD (array, i, wcsrchr,
190885
+			      (CPU_FEATURE_USABLE (AVX2)
190885
+			       && CPU_FEATURE_USABLE (RTM)),
190885
+			      __wcsrchr_avx2_rtm)
190885
 	      IFUNC_IMPL_ADD (array, i, wcsrchr,
190885
 			      (CPU_FEATURE_USABLE (AVX512VL)
190885
 			       && CPU_FEATURE_USABLE (AVX512BW)
190885
@@ -493,6 +598,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
190885
 	      IFUNC_IMPL_ADD (array, i, wcscmp,
190885
 			      CPU_FEATURE_USABLE (AVX2),
190885
 			      __wcscmp_avx2)
190885
+	      IFUNC_IMPL_ADD (array, i, wcscmp,
190885
+			      (CPU_FEATURE_USABLE (AVX2)
190885
+			       && CPU_FEATURE_USABLE (RTM)),
190885
+			      __wcscmp_avx2_rtm)
190885
 	      IFUNC_IMPL_ADD (array, i, wcscmp,
190885
 			      (CPU_FEATURE_USABLE (AVX512VL)
190885
 			       && CPU_FEATURE_USABLE (AVX512BW)
190885
@@ -505,6 +614,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
190885
 	      IFUNC_IMPL_ADD (array, i, wcsncmp,
190885
 			      CPU_FEATURE_USABLE (AVX2),
190885
 			      __wcsncmp_avx2)
190885
+	      IFUNC_IMPL_ADD (array, i, wcsncmp,
190885
+			      (CPU_FEATURE_USABLE (AVX2)
190885
+			       && CPU_FEATURE_USABLE (RTM)),
190885
+			      __wcsncmp_avx2_rtm)
190885
 	      IFUNC_IMPL_ADD (array, i, wcsncmp,
190885
 			      (CPU_FEATURE_USABLE (AVX512VL)
190885
 			       && CPU_FEATURE_USABLE (AVX512BW)
190885
@@ -523,6 +636,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
190885
 	      IFUNC_IMPL_ADD (array, i, wcslen,
190885
 			      CPU_FEATURE_USABLE (AVX2),
190885
 			      __wcslen_avx2)
190885
+	      IFUNC_IMPL_ADD (array, i, wcslen,
190885
+			      (CPU_FEATURE_USABLE (AVX2)
190885
+			       && CPU_FEATURE_USABLE (RTM)),
190885
+			      __wcslen_avx2_rtm)
190885
 	      IFUNC_IMPL_ADD (array, i, wcslen,
190885
 			      (CPU_FEATURE_USABLE (AVX512VL)
190885
 			       && CPU_FEATURE_USABLE (AVX512BW)
190885
@@ -535,6 +652,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
190885
 	      IFUNC_IMPL_ADD (array, i, wcsnlen,
190885
 			      CPU_FEATURE_USABLE (AVX2),
190885
 			      __wcsnlen_avx2)
190885
+	      IFUNC_IMPL_ADD (array, i, wcsnlen,
190885
+			      (CPU_FEATURE_USABLE (AVX2)
190885
+			       && CPU_FEATURE_USABLE (RTM)),
190885
+			      __wcsnlen_avx2_rtm)
190885
 	      IFUNC_IMPL_ADD (array, i, wcsnlen,
190885
 			      (CPU_FEATURE_USABLE (AVX512VL)
190885
 			       && CPU_FEATURE_USABLE (AVX512BW)
190885
@@ -550,6 +671,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
190885
 	      IFUNC_IMPL_ADD (array, i, wmemchr,
190885
 			      CPU_FEATURE_USABLE (AVX2),
190885
 			      __wmemchr_avx2)
190885
+	      IFUNC_IMPL_ADD (array, i, wmemchr,
190885
+			      (CPU_FEATURE_USABLE (AVX2)
190885
+			       && CPU_FEATURE_USABLE (RTM)),
190885
+			      __wmemchr_avx2_rtm)
190885
 	      IFUNC_IMPL_ADD (array, i, wmemchr,
190885
 			      (CPU_FEATURE_USABLE (AVX512VL)
190885
 			       && CPU_FEATURE_USABLE (AVX512BW)
190885
@@ -563,6 +688,11 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
190885
 			      (CPU_FEATURE_USABLE (AVX2)
190885
 			       && CPU_FEATURE_USABLE (MOVBE)),
190885
 			      __wmemcmp_avx2_movbe)
190885
+	      IFUNC_IMPL_ADD (array, i, wmemcmp,
190885
+			      (CPU_FEATURE_USABLE (AVX2)
190885
+			       && CPU_FEATURE_USABLE (MOVBE)
190885
+			       && CPU_FEATURE_USABLE (RTM)),
190885
+			      __wmemcmp_avx2_movbe_rtm)
190885
 	      IFUNC_IMPL_ADD (array, i, wmemcmp,
190885
 			      (CPU_FEATURE_USABLE (AVX512VL)
190885
 			       && CPU_FEATURE_USABLE (AVX512BW)
190885
@@ -581,6 +711,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
190885
 	      IFUNC_IMPL_ADD (array, i, wmemset,
190885
 			      CPU_FEATURE_USABLE (AVX2),
190885
 			      __wmemset_avx2_unaligned)
190885
+	      IFUNC_IMPL_ADD (array, i, wmemset,
190885
+			      (CPU_FEATURE_USABLE (AVX2)
190885
+			       && CPU_FEATURE_USABLE (RTM)),
190885
+			      __wmemset_avx2_unaligned_rtm)
190885
 	      IFUNC_IMPL_ADD (array, i, wmemset,
190885
 			      CPU_FEATURE_USABLE (AVX512VL),
190885
 			      __wmemset_evex_unaligned)
190885
@@ -606,6 +740,14 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
190885
 	      IFUNC_IMPL_ADD (array, i, __memcpy_chk,
190885
 			      CPU_FEATURE_USABLE (AVX),
190885
 			      __memcpy_chk_avx_unaligned_erms)
190885
+	      IFUNC_IMPL_ADD (array, i, __memcpy_chk,
190885
+			      (CPU_FEATURE_USABLE (AVX)
190885
+			       && CPU_FEATURE_USABLE (RTM)),
190885
+			      __memcpy_chk_avx_unaligned_rtm)
190885
+	      IFUNC_IMPL_ADD (array, i, __memcpy_chk,
190885
+			      (CPU_FEATURE_USABLE (AVX)
190885
+			       && CPU_FEATURE_USABLE (RTM)),
190885
+			      __memcpy_chk_avx_unaligned_erms_rtm)
190885
 	      IFUNC_IMPL_ADD (array, i, __memcpy_chk,
190885
 			      CPU_FEATURE_USABLE (AVX512VL),
190885
 			      __memcpy_chk_evex_unaligned)
190885
@@ -634,6 +776,14 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
190885
 	      IFUNC_IMPL_ADD (array, i, memcpy,
190885
 			      CPU_FEATURE_USABLE (AVX),
190885
 			      __memcpy_avx_unaligned_erms)
190885
+	      IFUNC_IMPL_ADD (array, i, memcpy,
190885
+			      (CPU_FEATURE_USABLE (AVX)
190885
+			       && CPU_FEATURE_USABLE (RTM)),
190885
+			      __memcpy_avx_unaligned_rtm)
190885
+	      IFUNC_IMPL_ADD (array, i, memcpy,
190885
+			      (CPU_FEATURE_USABLE (AVX)
190885
+			       && CPU_FEATURE_USABLE (RTM)),
190885
+			      __memcpy_avx_unaligned_erms_rtm)
190885
 	      IFUNC_IMPL_ADD (array, i, memcpy,
190885
 			      CPU_FEATURE_USABLE (AVX512VL),
190885
 			      __memcpy_evex_unaligned)
190885
@@ -676,6 +826,14 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
190885
 	      IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
190885
 			      CPU_FEATURE_USABLE (AVX),
190885
 			      __mempcpy_chk_avx_unaligned_erms)
190885
+	      IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
190885
+			      (CPU_FEATURE_USABLE (AVX)
190885
+			       && CPU_FEATURE_USABLE (RTM)),
190885
+			      __mempcpy_chk_avx_unaligned_rtm)
190885
+	      IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
190885
+			      (CPU_FEATURE_USABLE (AVX)
190885
+			       && CPU_FEATURE_USABLE (RTM)),
190885
+			      __mempcpy_chk_avx_unaligned_erms_rtm)
190885
 	      IFUNC_IMPL_ADD (array, i, __mempcpy_chk,
190885
 			      CPU_FEATURE_USABLE (AVX512VL),
190885
 			      __mempcpy_chk_evex_unaligned)
190885
@@ -713,6 +871,14 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
190885
 	      IFUNC_IMPL_ADD (array, i, mempcpy,
190885
 			      CPU_FEATURE_USABLE (AVX),
190885
 			      __mempcpy_avx_unaligned_erms)
190885
+	      IFUNC_IMPL_ADD (array, i, mempcpy,
190885
+			      (CPU_FEATURE_USABLE (AVX)
190885
+			       && CPU_FEATURE_USABLE (RTM)),
190885
+			      __mempcpy_avx_unaligned_rtm)
190885
+	      IFUNC_IMPL_ADD (array, i, mempcpy,
190885
+			      (CPU_FEATURE_USABLE (AVX)
190885
+			       && CPU_FEATURE_USABLE (RTM)),
190885
+			      __mempcpy_avx_unaligned_erms_rtm)
190885
 	      IFUNC_IMPL_ADD (array, i, mempcpy,
190885
 			      CPU_FEATURE_USABLE (AVX512VL),
190885
 			      __mempcpy_evex_unaligned)
190885
@@ -734,6 +900,10 @@ __libc_ifunc_impl_list (const char *name, struct libc_ifunc_impl *array,
190885
 	      IFUNC_IMPL_ADD (array, i, strncmp,
190885
 			      CPU_FEATURE_USABLE (AVX2),
190885
 			      __strncmp_avx2)
190885
+	      IFUNC_IMPL_ADD (array, i, strncmp,
190885
+			      (CPU_FEATURE_USABLE (AVX2)
190885
+			       && CPU_FEATURE_USABLE (RTM)),
190885
+			      __strncmp_avx2_rtm)
190885
 	      IFUNC_IMPL_ADD (array, i, strncmp,
190885
 			      (CPU_FEATURE_USABLE (AVX512VL)
190885
 			       && CPU_FEATURE_USABLE (AVX512BW)),
190885
diff --git a/sysdeps/x86_64/multiarch/ifunc-memcmp.h b/sysdeps/x86_64/multiarch/ifunc-memcmp.h
190885
index 3ca1f0a6..8043c635 100644
190885
--- a/sysdeps/x86_64/multiarch/ifunc-memcmp.h
190885
+++ b/sysdeps/x86_64/multiarch/ifunc-memcmp.h
190885
@@ -23,6 +23,7 @@ extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
190885
 extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3) attribute_hidden;
190885
 extern __typeof (REDIRECT_NAME) OPTIMIZE (sse4_1) attribute_hidden;
190885
 extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_movbe) attribute_hidden;
190885
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_movbe_rtm) attribute_hidden;
190885
 extern __typeof (REDIRECT_NAME) OPTIMIZE (evex_movbe) attribute_hidden;
190885
 
190885
 static inline void *
190885
@@ -38,6 +39,9 @@ IFUNC_SELECTOR (void)
190885
 	  && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW))
190885
 	return OPTIMIZE (evex_movbe);
190885
 
190885
+      if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
190885
+	return OPTIMIZE (avx2_movbe_rtm);
190885
+
190885
       if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
190885
 	return OPTIMIZE (avx2_movbe);
190885
     }
190885
diff --git a/sysdeps/x86_64/multiarch/ifunc-memmove.h b/sysdeps/x86_64/multiarch/ifunc-memmove.h
190885
index 6f8bce5f..fa09b9fb 100644
190885
--- a/sysdeps/x86_64/multiarch/ifunc-memmove.h
190885
+++ b/sysdeps/x86_64/multiarch/ifunc-memmove.h
190885
@@ -29,6 +29,10 @@ extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3_back) attribute_hidden;
190885
 extern __typeof (REDIRECT_NAME) OPTIMIZE (avx_unaligned) attribute_hidden;
190885
 extern __typeof (REDIRECT_NAME) OPTIMIZE (avx_unaligned_erms)
190885
   attribute_hidden;
190885
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx_unaligned_rtm)
190885
+  attribute_hidden;
190885
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx_unaligned_erms_rtm)
190885
+  attribute_hidden;
190885
 extern __typeof (REDIRECT_NAME) OPTIMIZE (evex_unaligned)
190885
   attribute_hidden;
190885
 extern __typeof (REDIRECT_NAME) OPTIMIZE (evex_unaligned_erms)
190885
@@ -71,6 +75,14 @@ IFUNC_SELECTOR (void)
190885
 	  return OPTIMIZE (evex_unaligned);
190885
 	}
190885
 
190885
+      if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
190885
+	{
190885
+	  if (CPU_FEATURE_USABLE_P (cpu_features, ERMS))
190885
+	    return OPTIMIZE (avx_unaligned_erms_rtm);
190885
+
190885
+	  return OPTIMIZE (avx_unaligned_rtm);
190885
+	}
190885
+
190885
       if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
190885
 	{
190885
 	  if (CPU_FEATURE_USABLE_P (cpu_features, ERMS))
190885
diff --git a/sysdeps/x86_64/multiarch/ifunc-memset.h b/sysdeps/x86_64/multiarch/ifunc-memset.h
190885
index 6f31f4dc..6f3375cc 100644
190885
--- a/sysdeps/x86_64/multiarch/ifunc-memset.h
190885
+++ b/sysdeps/x86_64/multiarch/ifunc-memset.h
190885
@@ -27,6 +27,10 @@ extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned_erms)
190885
 extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_unaligned) attribute_hidden;
190885
 extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_unaligned_erms)
190885
   attribute_hidden;
190885
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_unaligned_rtm)
190885
+  attribute_hidden;
190885
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_unaligned_erms_rtm)
190885
+  attribute_hidden;
190885
 extern __typeof (REDIRECT_NAME) OPTIMIZE (evex_unaligned)
190885
   attribute_hidden;
190885
 extern __typeof (REDIRECT_NAME) OPTIMIZE (evex_unaligned_erms)
190885
@@ -69,6 +73,14 @@ IFUNC_SELECTOR (void)
190885
 	  return OPTIMIZE (evex_unaligned);
190885
 	}
190885
 
190885
+      if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
190885
+	{
190885
+	  if (CPU_FEATURE_USABLE_P (cpu_features, ERMS))
190885
+	    return OPTIMIZE (avx2_unaligned_erms_rtm);
190885
+
190885
+	  return OPTIMIZE (avx2_unaligned_rtm);
190885
+	}
190885
+
190885
       if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
190885
 	{
190885
 	  if (CPU_FEATURE_USABLE_P (cpu_features, ERMS))
190885
diff --git a/sysdeps/x86_64/multiarch/ifunc-strcpy.h b/sysdeps/x86_64/multiarch/ifunc-strcpy.h
190885
index deae6348..a924762e 100644
190885
--- a/sysdeps/x86_64/multiarch/ifunc-strcpy.h
190885
+++ b/sysdeps/x86_64/multiarch/ifunc-strcpy.h
190885
@@ -25,6 +25,7 @@ extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned)
190885
   attribute_hidden;
190885
 extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3) attribute_hidden;
190885
 extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
190885
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden;
190885
 extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden;
190885
 
190885
 static inline void *
190885
@@ -39,6 +40,9 @@ IFUNC_SELECTOR (void)
190885
 	  && CPU_FEATURE_USABLE_P (cpu_features, AVX512BW))
190885
 	return OPTIMIZE (evex);
190885
 
190885
+      if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
190885
+	return OPTIMIZE (avx2_rtm);
190885
+
190885
       if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
190885
 	return OPTIMIZE (avx2);
190885
     }
190885
diff --git a/sysdeps/x86_64/multiarch/ifunc-wmemset.h b/sysdeps/x86_64/multiarch/ifunc-wmemset.h
190885
index 9290c4bf..bdc94c6c 100644
190885
--- a/sysdeps/x86_64/multiarch/ifunc-wmemset.h
190885
+++ b/sysdeps/x86_64/multiarch/ifunc-wmemset.h
190885
@@ -20,6 +20,8 @@
190885
 
190885
 extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned) attribute_hidden;
190885
 extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_unaligned) attribute_hidden;
190885
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_unaligned_rtm)
190885
+  attribute_hidden;
190885
 extern __typeof (REDIRECT_NAME) OPTIMIZE (evex_unaligned) attribute_hidden;
190885
 extern __typeof (REDIRECT_NAME) OPTIMIZE (avx512_unaligned) attribute_hidden;
190885
 
190885
@@ -39,6 +41,9 @@ IFUNC_SELECTOR (void)
190885
       if (CPU_FEATURE_USABLE_P (cpu_features, AVX512VL))
190885
 	return OPTIMIZE (evex_unaligned);
190885
 
190885
+      if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
190885
+	return OPTIMIZE (avx2_unaligned_rtm);
190885
+
190885
       if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
190885
 	return OPTIMIZE (avx2_unaligned);
190885
     }
190885
diff --git a/sysdeps/x86_64/multiarch/memchr-avx2-rtm.S b/sysdeps/x86_64/multiarch/memchr-avx2-rtm.S
190885
new file mode 100644
190885
index 00000000..87b076c7
190885
--- /dev/null
190885
+++ b/sysdeps/x86_64/multiarch/memchr-avx2-rtm.S
190885
@@ -0,0 +1,12 @@
190885
+#ifndef MEMCHR
190885
+# define MEMCHR __memchr_avx2_rtm
190885
+#endif
190885
+
190885
+#define ZERO_UPPER_VEC_REGISTERS_RETURN \
190885
+  ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST
190885
+
190885
+#define VZEROUPPER_RETURN jmp	 L(return_vzeroupper)
190885
+
190885
+#define SECTION(p) p##.avx.rtm
190885
+
190885
+#include "memchr-avx2.S"
190885
diff --git a/sysdeps/x86_64/multiarch/memchr-avx2.S b/sysdeps/x86_64/multiarch/memchr-avx2.S
190885
index c81da19b..cf893e77 100644
190885
--- a/sysdeps/x86_64/multiarch/memchr-avx2.S
190885
+++ b/sysdeps/x86_64/multiarch/memchr-avx2.S
190885
@@ -34,9 +34,13 @@
190885
 #  define VZEROUPPER	vzeroupper
190885
 # endif
190885
 
190885
+# ifndef SECTION
190885
+#  define SECTION(p)	p##.avx
190885
+# endif
190885
+
190885
 # define VEC_SIZE 32
190885
 
190885
-	.section .text.avx,"ax",@progbits
190885
+	.section SECTION(.text),"ax",@progbits
190885
 ENTRY (MEMCHR)
190885
 # ifndef USE_AS_RAWMEMCHR
190885
 	/* Check for zero length.  */
190885
@@ -107,8 +111,8 @@ L(cros_page_boundary):
190885
 # endif
190885
 	addq	%rdi, %rax
190885
 	addq	%rcx, %rax
190885
-	VZEROUPPER
190885
-	ret
190885
+L(return_vzeroupper):
190885
+	ZERO_UPPER_VEC_REGISTERS_RETURN
190885
 
190885
 	.p2align 4
190885
 L(aligned_more):
190885
@@ -224,8 +228,7 @@ L(last_4x_vec_or_less):
190885
 
190885
 	jnz	L(first_vec_x3_check)
190885
 	xorl	%eax, %eax
190885
-	VZEROUPPER
190885
-	ret
190885
+	VZEROUPPER_RETURN
190885
 
190885
 	.p2align 4
190885
 L(last_2x_vec):
190885
@@ -243,8 +246,7 @@ L(last_2x_vec):
190885
 	testl	%eax, %eax
190885
 	jnz	L(first_vec_x1_check)
190885
 	xorl	%eax, %eax
190885
-	VZEROUPPER
190885
-	ret
190885
+	VZEROUPPER_RETURN
190885
 
190885
 	.p2align 4
190885
 L(first_vec_x0_check):
190885
@@ -253,8 +255,7 @@ L(first_vec_x0_check):
190885
 	cmpq	%rax, %rdx
190885
 	jbe	L(zero)
190885
 	addq	%rdi, %rax
190885
-	VZEROUPPER
190885
-	ret
190885
+	VZEROUPPER_RETURN
190885
 
190885
 	.p2align 4
190885
 L(first_vec_x1_check):
190885
@@ -264,8 +265,7 @@ L(first_vec_x1_check):
190885
 	jbe	L(zero)
190885
 	addq	$VEC_SIZE, %rax
190885
 	addq	%rdi, %rax
190885
-	VZEROUPPER
190885
-	ret
190885
+	VZEROUPPER_RETURN
190885
 
190885
 	.p2align 4
190885
 L(first_vec_x2_check):
190885
@@ -275,8 +275,7 @@ L(first_vec_x2_check):
190885
 	jbe	L(zero)
190885
 	addq	$(VEC_SIZE * 2), %rax
190885
 	addq	%rdi, %rax
190885
-	VZEROUPPER
190885
-	ret
190885
+	VZEROUPPER_RETURN
190885
 
190885
 	.p2align 4
190885
 L(first_vec_x3_check):
190885
@@ -286,12 +285,14 @@ L(first_vec_x3_check):
190885
 	jbe	L(zero)
190885
 	addq	$(VEC_SIZE * 3), %rax
190885
 	addq	%rdi, %rax
190885
-	VZEROUPPER
190885
-	ret
190885
+	VZEROUPPER_RETURN
190885
 
190885
 	.p2align 4
190885
 L(zero):
190885
-	VZEROUPPER
190885
+	xorl	%eax, %eax
190885
+	jmp     L(return_vzeroupper)
190885
+
190885
+	.p2align 4
190885
 L(null):
190885
 	xorl	%eax, %eax
190885
 	ret
190885
@@ -301,24 +302,21 @@ L(null):
190885
 L(first_vec_x0):
190885
 	tzcntl	%eax, %eax
190885
 	addq	%rdi, %rax
190885
-	VZEROUPPER
190885
-	ret
190885
+	VZEROUPPER_RETURN
190885
 
190885
 	.p2align 4
190885
 L(first_vec_x1):
190885
 	tzcntl	%eax, %eax
190885
 	addq	$VEC_SIZE, %rax
190885
 	addq	%rdi, %rax
190885
-	VZEROUPPER
190885
-	ret
190885
+	VZEROUPPER_RETURN
190885
 
190885
 	.p2align 4
190885
 L(first_vec_x2):
190885
 	tzcntl	%eax, %eax
190885
 	addq	$(VEC_SIZE * 2), %rax
190885
 	addq	%rdi, %rax
190885
-	VZEROUPPER
190885
-	ret
190885
+	VZEROUPPER_RETURN
190885
 
190885
 	.p2align 4
190885
 L(4x_vec_end):
190885
@@ -337,8 +335,7 @@ L(first_vec_x3):
190885
 	tzcntl	%eax, %eax
190885
 	addq	$(VEC_SIZE * 3), %rax
190885
 	addq	%rdi, %rax
190885
-	VZEROUPPER
190885
-	ret
190885
+	VZEROUPPER_RETURN
190885
 
190885
 END (MEMCHR)
190885
 #endif
190885
diff --git a/sysdeps/x86_64/multiarch/memcmp-avx2-movbe-rtm.S b/sysdeps/x86_64/multiarch/memcmp-avx2-movbe-rtm.S
190885
new file mode 100644
190885
index 00000000..cf4eff5d
190885
--- /dev/null
190885
+++ b/sysdeps/x86_64/multiarch/memcmp-avx2-movbe-rtm.S
190885
@@ -0,0 +1,12 @@
190885
+#ifndef MEMCMP
190885
+# define MEMCMP __memcmp_avx2_movbe_rtm
190885
+#endif
190885
+
190885
+#define ZERO_UPPER_VEC_REGISTERS_RETURN \
190885
+  ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST
190885
+
190885
+#define VZEROUPPER_RETURN jmp	 L(return_vzeroupper)
190885
+
190885
+#define SECTION(p) p##.avx.rtm
190885
+
190885
+#include "memcmp-avx2-movbe.S"
190885
diff --git a/sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S b/sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S
190885
index e3a35b89..9d5c9c72 100644
190885
--- a/sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S
190885
+++ b/sysdeps/x86_64/multiarch/memcmp-avx2-movbe.S
190885
@@ -47,6 +47,10 @@
190885
 #  define VZEROUPPER	vzeroupper
190885
 # endif
190885
 
190885
+# ifndef SECTION
190885
+#  define SECTION(p)	p##.avx
190885
+# endif
190885
+
190885
 # define VEC_SIZE 32
190885
 # define VEC_MASK ((1 << VEC_SIZE) - 1)
190885
 
190885
@@ -55,7 +59,7 @@
190885
            memcmp has to use UNSIGNED comparison for elemnts.
190885
 */
190885
 
190885
-	.section .text.avx,"ax",@progbits
190885
+	.section SECTION(.text),"ax",@progbits
190885
 ENTRY (MEMCMP)
190885
 # ifdef USE_AS_WMEMCMP
190885
 	shl	$2, %RDX_LP
190885
@@ -123,8 +127,8 @@ ENTRY (MEMCMP)
190885
 	vptest	%ymm0, %ymm5
190885
 	jnc	L(4x_vec_end)
190885
 	xorl	%eax, %eax
190885
-	VZEROUPPER
190885
-	ret
190885
+L(return_vzeroupper):
190885
+	ZERO_UPPER_VEC_REGISTERS_RETURN
190885
 
190885
 	.p2align 4
190885
 L(last_2x_vec):
190885
@@ -144,8 +148,7 @@ L(last_vec):
190885
 	vpmovmskb %ymm2, %eax
190885
 	subl    $VEC_MASK, %eax
190885
 	jnz	L(first_vec)
190885
-	VZEROUPPER
190885
-	ret
190885
+	VZEROUPPER_RETURN
190885
 
190885
 	.p2align 4
190885
 L(first_vec):
190885
@@ -164,8 +167,7 @@ L(wmemcmp_return):
190885
 	movzbl	(%rsi, %rcx), %edx
190885
 	sub	%edx, %eax
190885
 # endif
190885
-	VZEROUPPER
190885
-	ret
190885
+	VZEROUPPER_RETURN
190885
 
190885
 # ifdef USE_AS_WMEMCMP
190885
 	.p2align 4
190885
@@ -367,8 +369,7 @@ L(last_4x_vec):
190885
 	vpmovmskb %ymm2, %eax
190885
 	subl    $VEC_MASK, %eax
190885
 	jnz	L(first_vec)
190885
-	VZEROUPPER
190885
-	ret
190885
+	VZEROUPPER_RETURN
190885
 
190885
 	.p2align 4
190885
 L(4x_vec_end):
190885
@@ -394,8 +395,7 @@ L(4x_vec_end):
190885
 	movzbl	(VEC_SIZE * 3)(%rsi, %rcx), %edx
190885
 	sub	%edx, %eax
190885
 # endif
190885
-	VZEROUPPER
190885
-	ret
190885
+	VZEROUPPER_RETURN
190885
 
190885
 	.p2align 4
190885
 L(first_vec_x1):
190885
@@ -410,8 +410,7 @@ L(first_vec_x1):
190885
 	movzbl	VEC_SIZE(%rsi, %rcx), %edx
190885
 	sub	%edx, %eax
190885
 # endif
190885
-	VZEROUPPER
190885
-	ret
190885
+	VZEROUPPER_RETURN
190885
 
190885
 	.p2align 4
190885
 L(first_vec_x2):
190885
@@ -426,7 +425,6 @@ L(first_vec_x2):
190885
 	movzbl	(VEC_SIZE * 2)(%rsi, %rcx), %edx
190885
 	sub	%edx, %eax
190885
 # endif
190885
-	VZEROUPPER
190885
-	ret
190885
+	VZEROUPPER_RETURN
190885
 END (MEMCMP)
190885
 #endif
190885
diff --git a/sysdeps/x86_64/multiarch/memmove-avx-unaligned-erms-rtm.S b/sysdeps/x86_64/multiarch/memmove-avx-unaligned-erms-rtm.S
190885
new file mode 100644
190885
index 00000000..1ec1962e
190885
--- /dev/null
190885
+++ b/sysdeps/x86_64/multiarch/memmove-avx-unaligned-erms-rtm.S
190885
@@ -0,0 +1,17 @@
190885
+#if IS_IN (libc)
190885
+# define VEC_SIZE	32
190885
+# define VEC(i)		ymm##i
190885
+# define VMOVNT		vmovntdq
190885
+# define VMOVU		vmovdqu
190885
+# define VMOVA		vmovdqa
190885
+
190885
+# define ZERO_UPPER_VEC_REGISTERS_RETURN \
190885
+  ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST
190885
+
190885
+# define VZEROUPPER_RETURN jmp	 L(return)
190885
+
190885
+# define SECTION(p)		p##.avx.rtm
190885
+# define MEMMOVE_SYMBOL(p,s)	p##_avx_##s##_rtm
190885
+
190885
+# include "memmove-vec-unaligned-erms.S"
190885
+#endif
190885
diff --git a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
190885
index 08e21692..71f5954d 100644
190885
--- a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
190885
+++ b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
190885
@@ -140,11 +140,12 @@ L(last_2x_vec):
190885
 	VMOVU	-VEC_SIZE(%rsi,%rdx), %VEC(1)
190885
 	VMOVU	%VEC(0), (%rdi)
190885
 	VMOVU	%VEC(1), -VEC_SIZE(%rdi,%rdx)
190885
-	VZEROUPPER
190885
 #if !defined USE_MULTIARCH || !IS_IN (libc)
190885
 L(nop):
190885
-#endif
190885
 	ret
190885
+#else
190885
+	VZEROUPPER_RETURN
190885
+#endif
190885
 #if defined USE_MULTIARCH && IS_IN (libc)
190885
 END (MEMMOVE_SYMBOL (__memmove, unaligned))
190885
 
190885
@@ -237,8 +238,11 @@ L(last_2x_vec):
190885
 	VMOVU	%VEC(0), (%rdi)
190885
 	VMOVU	%VEC(1), -VEC_SIZE(%rdi,%rdx)
190885
 L(return):
190885
-	VZEROUPPER
190885
+#if VEC_SIZE > 16
190885
+	ZERO_UPPER_VEC_REGISTERS_RETURN
190885
+#else
190885
 	ret
190885
+#endif
190885
 
190885
 L(movsb):
190885
 	cmpq	__x86_shared_non_temporal_threshold(%rip), %rdx
190885
@@ -289,8 +293,7 @@ L(between_32_63):
190885
 	VMOVU	-32(%rsi,%rdx), %YMM1
190885
 	VMOVU	%YMM0, (%rdi)
190885
 	VMOVU	%YMM1, -32(%rdi,%rdx)
190885
-	VZEROUPPER
190885
-	ret
190885
+	VZEROUPPER_RETURN
190885
 #endif
190885
 #if VEC_SIZE > 16
190885
 	/* From 16 to 31.  No branch when size == 16.  */
190885
@@ -299,7 +302,7 @@ L(between_16_31):
190885
 	VMOVU	-16(%rsi,%rdx), %XMM1
190885
 	VMOVU	%XMM0, (%rdi)
190885
 	VMOVU	%XMM1, -16(%rdi,%rdx)
190885
-	ret
190885
+	VZEROUPPER_RETURN
190885
 #endif
190885
 L(between_8_15):
190885
 	/* From 8 to 15.  No branch when size == 8.  */
190885
@@ -352,8 +355,7 @@ L(more_2x_vec):
190885
 	VMOVU	%VEC(5), -(VEC_SIZE * 2)(%rdi,%rdx)
190885
 	VMOVU	%VEC(6), -(VEC_SIZE * 3)(%rdi,%rdx)
190885
 	VMOVU	%VEC(7), -(VEC_SIZE * 4)(%rdi,%rdx)
190885
-	VZEROUPPER
190885
-	ret
190885
+	VZEROUPPER_RETURN
190885
 L(last_4x_vec):
190885
 	/* Copy from 2 * VEC to 4 * VEC. */
190885
 	VMOVU	(%rsi), %VEC(0)
190885
@@ -364,8 +366,7 @@ L(last_4x_vec):
190885
 	VMOVU	%VEC(1), VEC_SIZE(%rdi)
190885
 	VMOVU	%VEC(2), -VEC_SIZE(%rdi,%rdx)
190885
 	VMOVU	%VEC(3), -(VEC_SIZE * 2)(%rdi,%rdx)
190885
-	VZEROUPPER
190885
-	ret
190885
+	VZEROUPPER_RETURN
190885
 
190885
 L(more_8x_vec):
190885
 	cmpq	%rsi, %rdi
190885
@@ -421,8 +422,7 @@ L(loop_4x_vec_forward):
190885
 	VMOVU	%VEC(8), -(VEC_SIZE * 3)(%rcx)
190885
 	/* Store the first VEC.  */
190885
 	VMOVU	%VEC(4), (%r11)
190885
-	VZEROUPPER
190885
-	ret
190885
+	VZEROUPPER_RETURN
190885
 
190885
 L(more_8x_vec_backward):
190885
 	/* Load the first 4 * VEC and last VEC to support overlapping
190885
@@ -473,8 +473,7 @@ L(loop_4x_vec_backward):
190885
 	VMOVU	%VEC(7), (VEC_SIZE * 3)(%rdi)
190885
 	/* Store the last VEC.  */
190885
 	VMOVU	%VEC(8), (%r11)
190885
-	VZEROUPPER
190885
-	ret
190885
+	VZEROUPPER_RETURN
190885
 
190885
 #if (defined USE_MULTIARCH || VEC_SIZE == 16) && IS_IN (libc)
190885
 L(large_forward):
190885
@@ -509,8 +508,7 @@ L(loop_large_forward):
190885
 	VMOVU	%VEC(8), -(VEC_SIZE * 3)(%rcx)
190885
 	/* Store the first VEC.  */
190885
 	VMOVU	%VEC(4), (%r11)
190885
-	VZEROUPPER
190885
-	ret
190885
+	VZEROUPPER_RETURN
190885
 
190885
 L(large_backward):
190885
 	/* Don't use non-temporal store if there is overlap between
190885
@@ -544,8 +542,7 @@ L(loop_large_backward):
190885
 	VMOVU	%VEC(7), (VEC_SIZE * 3)(%rdi)
190885
 	/* Store the last VEC.  */
190885
 	VMOVU	%VEC(8), (%r11)
190885
-	VZEROUPPER
190885
-	ret
190885
+	VZEROUPPER_RETURN
190885
 #endif
190885
 END (MEMMOVE_SYMBOL (__memmove, unaligned_erms))
190885
 
190885
diff --git a/sysdeps/x86_64/multiarch/memrchr-avx2-rtm.S b/sysdeps/x86_64/multiarch/memrchr-avx2-rtm.S
190885
new file mode 100644
190885
index 00000000..cea2d2a7
190885
--- /dev/null
190885
+++ b/sysdeps/x86_64/multiarch/memrchr-avx2-rtm.S
190885
@@ -0,0 +1,12 @@
190885
+#ifndef MEMRCHR
190885
+# define MEMRCHR __memrchr_avx2_rtm
190885
+#endif
190885
+
190885
+#define ZERO_UPPER_VEC_REGISTERS_RETURN \
190885
+  ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST
190885
+
190885
+#define VZEROUPPER_RETURN jmp	 L(return_vzeroupper)
190885
+
190885
+#define SECTION(p) p##.avx.rtm
190885
+
190885
+#include "memrchr-avx2.S"
190885
diff --git a/sysdeps/x86_64/multiarch/memrchr-avx2.S b/sysdeps/x86_64/multiarch/memrchr-avx2.S
190885
index ce488dd9..20efe7ac 100644
190885
--- a/sysdeps/x86_64/multiarch/memrchr-avx2.S
190885
+++ b/sysdeps/x86_64/multiarch/memrchr-avx2.S
190885
@@ -20,14 +20,22 @@
190885
 
190885
 # include <sysdep.h>
190885
 
190885
+# ifndef MEMRCHR
190885
+#  define MEMRCHR	__memrchr_avx2
190885
+# endif
190885
+
190885
 # ifndef VZEROUPPER
190885
 #  define VZEROUPPER	vzeroupper
190885
 # endif
190885
 
190885
+# ifndef SECTION
190885
+#  define SECTION(p)	p##.avx
190885
+# endif
190885
+
190885
 # define VEC_SIZE 32
190885
 
190885
-	.section .text.avx,"ax",@progbits
190885
-ENTRY (__memrchr_avx2)
190885
+	.section SECTION(.text),"ax",@progbits
190885
+ENTRY (MEMRCHR)
190885
 	/* Broadcast CHAR to YMM0.  */
190885
 	vmovd	%esi, %xmm0
190885
 	vpbroadcastb %xmm0, %ymm0
190885
@@ -134,8 +142,8 @@ L(loop_4x_vec):
190885
 	vpmovmskb %ymm1, %eax
190885
 	bsrl	%eax, %eax
190885
 	addq	%rdi, %rax
190885
-	VZEROUPPER
190885
-	ret
190885
+L(return_vzeroupper):
190885
+	ZERO_UPPER_VEC_REGISTERS_RETURN
190885
 
190885
 	.p2align 4
190885
 L(last_4x_vec_or_less):
190885
@@ -169,8 +177,7 @@ L(last_4x_vec_or_less):
190885
 	addq	%rax, %rdx
190885
 	jl	L(zero)
190885
 	addq	%rdi, %rax
190885
-	VZEROUPPER
190885
-	ret
190885
+	VZEROUPPER_RETURN
190885
 
190885
 	.p2align 4
190885
 L(last_2x_vec):
190885
@@ -191,31 +198,27 @@ L(last_2x_vec):
190885
 	jl	L(zero)
190885
 	addl	$(VEC_SIZE * 2), %eax
190885
 	addq	%rdi, %rax
190885
-	VZEROUPPER
190885
-	ret
190885
+	VZEROUPPER_RETURN
190885
 
190885
 	.p2align 4
190885
 L(last_vec_x0):
190885
 	bsrl	%eax, %eax
190885
 	addq	%rdi, %rax
190885
-	VZEROUPPER
190885
-	ret
190885
+	VZEROUPPER_RETURN
190885
 
190885
 	.p2align 4
190885
 L(last_vec_x1):
190885
 	bsrl	%eax, %eax
190885
 	addl	$VEC_SIZE, %eax
190885
 	addq	%rdi, %rax
190885
-	VZEROUPPER
190885
-	ret
190885
+	VZEROUPPER_RETURN
190885
 
190885
 	.p2align 4
190885
 L(last_vec_x2):
190885
 	bsrl	%eax, %eax
190885
 	addl	$(VEC_SIZE * 2), %eax
190885
 	addq	%rdi, %rax
190885
-	VZEROUPPER
190885
-	ret
190885
+	VZEROUPPER_RETURN
190885
 
190885
 	.p2align 4
190885
 L(last_vec_x3):
190885
@@ -232,8 +235,7 @@ L(last_vec_x1_check):
190885
 	jl	L(zero)
190885
 	addl	$VEC_SIZE, %eax
190885
 	addq	%rdi, %rax
190885
-	VZEROUPPER
190885
-	ret
190885
+	VZEROUPPER_RETURN
190885
 
190885
 	.p2align 4
190885
 L(last_vec_x3_check):
190885
@@ -243,12 +245,14 @@ L(last_vec_x3_check):
190885
 	jl	L(zero)
190885
 	addl	$(VEC_SIZE * 3), %eax
190885
 	addq	%rdi, %rax
190885
-	VZEROUPPER
190885
-	ret
190885
+	VZEROUPPER_RETURN
190885
 
190885
 	.p2align 4
190885
 L(zero):
190885
-	VZEROUPPER
190885
+	xorl	%eax, %eax
190885
+	VZEROUPPER_RETURN
190885
+
190885
+	.p2align 4
190885
 L(null):
190885
 	xorl	%eax, %eax
190885
 	ret
190885
@@ -273,8 +277,7 @@ L(last_vec_or_less_aligned):
190885
 
190885
 	bsrl	%eax, %eax
190885
 	addq	%rdi, %rax
190885
-	VZEROUPPER
190885
-	ret
190885
+	VZEROUPPER_RETURN
190885
 
190885
 	.p2align 4
190885
 L(last_vec_or_less):
190885
@@ -315,8 +318,7 @@ L(last_vec_or_less):
190885
 	bsrl	%eax, %eax
190885
 	addq	%rdi, %rax
190885
 	addq	%r8, %rax
190885
-	VZEROUPPER
190885
-	ret
190885
+	VZEROUPPER_RETURN
190885
 
190885
 	.p2align 4
190885
 L(last_vec_2x_aligned):
190885
@@ -353,7 +355,6 @@ L(last_vec_2x_aligned):
190885
 	bsrl	%eax, %eax
190885
 	addq	%rdi, %rax
190885
 	addq	%r8, %rax
190885
-	VZEROUPPER
190885
-	ret
190885
-END (__memrchr_avx2)
190885
+	VZEROUPPER_RETURN
190885
+END (MEMRCHR)
190885
 #endif
190885
diff --git a/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms-rtm.S b/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms-rtm.S
190885
new file mode 100644
190885
index 00000000..8ac3e479
190885
--- /dev/null
190885
+++ b/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms-rtm.S
190885
@@ -0,0 +1,10 @@
190885
+#define ZERO_UPPER_VEC_REGISTERS_RETURN \
190885
+  ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST
190885
+
190885
+#define VZEROUPPER_RETURN jmp	 L(return)
190885
+
190885
+#define SECTION(p) p##.avx.rtm
190885
+#define MEMSET_SYMBOL(p,s)	p##_avx2_##s##_rtm
190885
+#define WMEMSET_SYMBOL(p,s)	p##_avx2_##s##_rtm
190885
+
190885
+#include "memset-avx2-unaligned-erms.S"
190885
diff --git a/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S
190885
index 7ab3d898..ae0860f3 100644
190885
--- a/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S
190885
+++ b/sysdeps/x86_64/multiarch/memset-avx2-unaligned-erms.S
190885
@@ -14,9 +14,15 @@
190885
   movq r, %rax; \
190885
   vpbroadcastd %xmm0, %ymm0
190885
 
190885
-# define SECTION(p)		p##.avx
190885
-# define MEMSET_SYMBOL(p,s)	p##_avx2_##s
190885
-# define WMEMSET_SYMBOL(p,s)	p##_avx2_##s
190885
+# ifndef SECTION
190885
+#  define SECTION(p)		p##.avx
190885
+# endif
190885
+# ifndef MEMSET_SYMBOL
190885
+#  define MEMSET_SYMBOL(p,s)	p##_avx2_##s
190885
+# endif
190885
+# ifndef WMEMSET_SYMBOL
190885
+#  define WMEMSET_SYMBOL(p,s)	p##_avx2_##s
190885
+# endif
190885
 
190885
 # include "memset-vec-unaligned-erms.S"
190885
 #endif
190885
diff --git a/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S
190885
index 71e91a8f..bae5cba4 100644
190885
--- a/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S
190885
+++ b/sysdeps/x86_64/multiarch/memset-vec-unaligned-erms.S
190885
@@ -45,17 +45,14 @@
190885
 #ifndef VZEROUPPER
190885
 # if VEC_SIZE > 16
190885
 #  define VZEROUPPER			vzeroupper
190885
+#  define VZEROUPPER_SHORT_RETURN	vzeroupper; ret
190885
 # else
190885
 #  define VZEROUPPER
190885
 # endif
190885
 #endif
190885
 
190885
 #ifndef VZEROUPPER_SHORT_RETURN
190885
-# if VEC_SIZE > 16
190885
-#  define VZEROUPPER_SHORT_RETURN	vzeroupper
190885
-# else
190885
-#  define VZEROUPPER_SHORT_RETURN	rep
190885
-# endif
190885
+# define VZEROUPPER_SHORT_RETURN	rep; ret
190885
 #endif
190885
 
190885
 #ifndef MOVQ
190885
@@ -117,8 +114,7 @@ L(entry_from_bzero):
190885
 	/* From VEC and to 2 * VEC.  No branch when size == VEC_SIZE.  */
190885
 	VMOVU	%VEC(0), -VEC_SIZE(%rdi,%rdx)
190885
 	VMOVU	%VEC(0), (%rdi)
190885
-	VZEROUPPER
190885
-	ret
190885
+	VZEROUPPER_RETURN
190885
 #if defined USE_MULTIARCH && IS_IN (libc)
190885
 END (MEMSET_SYMBOL (__memset, unaligned))
190885
 
190885
@@ -141,14 +137,12 @@ ENTRY (__memset_erms)
190885
 ENTRY (MEMSET_SYMBOL (__memset, erms))
190885
 # endif
190885
 L(stosb):
190885
-	/* Issue vzeroupper before rep stosb.  */
190885
-	VZEROUPPER
190885
 	mov	%RDX_LP, %RCX_LP
190885
 	movzbl	%sil, %eax
190885
 	mov	%RDI_LP, %RDX_LP
190885
 	rep stosb
190885
 	mov	%RDX_LP, %RAX_LP
190885
-	ret
190885
+	VZEROUPPER_RETURN
190885
 # if VEC_SIZE == 16
190885
 END (__memset_erms)
190885
 # else
190885
@@ -175,8 +169,7 @@ ENTRY (MEMSET_SYMBOL (__memset, unaligned_erms))
190885
 	/* From VEC and to 2 * VEC.  No branch when size == VEC_SIZE.  */
190885
 	VMOVU	%VEC(0), -VEC_SIZE(%rdi,%rdx)
190885
 	VMOVU	%VEC(0), (%rdi)
190885
-	VZEROUPPER
190885
-	ret
190885
+	VZEROUPPER_RETURN
190885
 
190885
 L(stosb_more_2x_vec):
190885
 	cmp	__x86_rep_stosb_threshold(%rip), %RDX_LP
190885
@@ -190,8 +183,11 @@ L(more_2x_vec):
190885
 	VMOVU	%VEC(0), -VEC_SIZE(%rdi,%rdx)
190885
 	VMOVU	%VEC(0), -(VEC_SIZE * 2)(%rdi,%rdx)
190885
 L(return):
190885
-	VZEROUPPER
190885
+#if VEC_SIZE > 16
190885
+	ZERO_UPPER_VEC_REGISTERS_RETURN
190885
+#else
190885
 	ret
190885
+#endif
190885
 
190885
 L(loop_start):
190885
 	leaq	(VEC_SIZE * 4)(%rdi), %rcx
190885
@@ -217,7 +213,6 @@ L(loop):
190885
 	cmpq	%rcx, %rdx
190885
 	jne	L(loop)
190885
 	VZEROUPPER_SHORT_RETURN
190885
-	ret
190885
 L(less_vec):
190885
 	/* Less than 1 VEC.  */
190885
 # if VEC_SIZE != 16 && VEC_SIZE != 32 && VEC_SIZE != 64
190885
@@ -241,40 +236,34 @@ L(less_vec):
190885
 	jb	1f
190885
 	movb	%cl, (%rdi)
190885
 1:
190885
-	VZEROUPPER
190885
-	ret
190885
+	VZEROUPPER_RETURN
190885
 # if VEC_SIZE > 32
190885
 	/* From 32 to 63.  No branch when size == 32.  */
190885
 L(between_32_63):
190885
 	VMOVU	%YMM0, -32(%rdi,%rdx)
190885
 	VMOVU	%YMM0, (%rdi)
190885
-	VZEROUPPER
190885
-	ret
190885
+	VZEROUPPER_RETURN
190885
 # endif
190885
 # if VEC_SIZE > 16
190885
 	/* From 16 to 31.  No branch when size == 16.  */
190885
 L(between_16_31):
190885
 	VMOVU	%XMM0, -16(%rdi,%rdx)
190885
 	VMOVU	%XMM0, (%rdi)
190885
-	VZEROUPPER
190885
-	ret
190885
+	VZEROUPPER_RETURN
190885
 # endif
190885
 	/* From 8 to 15.  No branch when size == 8.  */
190885
 L(between_8_15):
190885
 	movq	%rcx, -8(%rdi,%rdx)
190885
 	movq	%rcx, (%rdi)
190885
-	VZEROUPPER
190885
-	ret
190885
+	VZEROUPPER_RETURN
190885
 L(between_4_7):
190885
 	/* From 4 to 7.  No branch when size == 4.  */
190885
 	movl	%ecx, -4(%rdi,%rdx)
190885
 	movl	%ecx, (%rdi)
190885
-	VZEROUPPER
190885
-	ret
190885
+	VZEROUPPER_RETURN
190885
 L(between_2_3):
190885
 	/* From 2 to 3.  No branch when size == 2.  */
190885
 	movw	%cx, -2(%rdi,%rdx)
190885
 	movw	%cx, (%rdi)
190885
-	VZEROUPPER
190885
-	ret
190885
+	VZEROUPPER_RETURN
190885
 END (MEMSET_SYMBOL (__memset, unaligned_erms))
190885
diff --git a/sysdeps/x86_64/multiarch/rawmemchr-avx2-rtm.S b/sysdeps/x86_64/multiarch/rawmemchr-avx2-rtm.S
190885
new file mode 100644
190885
index 00000000..acc5f6e2
190885
--- /dev/null
190885
+++ b/sysdeps/x86_64/multiarch/rawmemchr-avx2-rtm.S
190885
@@ -0,0 +1,4 @@
190885
+#define MEMCHR __rawmemchr_avx2_rtm
190885
+#define USE_AS_RAWMEMCHR 1
190885
+
190885
+#include "memchr-avx2-rtm.S"
190885
diff --git a/sysdeps/x86_64/multiarch/stpcpy-avx2-rtm.S b/sysdeps/x86_64/multiarch/stpcpy-avx2-rtm.S
190885
new file mode 100644
190885
index 00000000..2b9c07a5
190885
--- /dev/null
190885
+++ b/sysdeps/x86_64/multiarch/stpcpy-avx2-rtm.S
190885
@@ -0,0 +1,3 @@
190885
+#define USE_AS_STPCPY
190885
+#define STRCPY __stpcpy_avx2_rtm
190885
+#include "strcpy-avx2-rtm.S"
190885
diff --git a/sysdeps/x86_64/multiarch/stpncpy-avx2-rtm.S b/sysdeps/x86_64/multiarch/stpncpy-avx2-rtm.S
190885
new file mode 100644
190885
index 00000000..60a2ccfe
190885
--- /dev/null
190885
+++ b/sysdeps/x86_64/multiarch/stpncpy-avx2-rtm.S
190885
@@ -0,0 +1,4 @@
190885
+#define USE_AS_STPCPY
190885
+#define USE_AS_STRNCPY
190885
+#define STRCPY __stpncpy_avx2_rtm
190885
+#include "strcpy-avx2-rtm.S"
190885
diff --git a/sysdeps/x86_64/multiarch/strcat-avx2-rtm.S b/sysdeps/x86_64/multiarch/strcat-avx2-rtm.S
190885
new file mode 100644
190885
index 00000000..637fb557
190885
--- /dev/null
190885
+++ b/sysdeps/x86_64/multiarch/strcat-avx2-rtm.S
190885
@@ -0,0 +1,12 @@
190885
+#ifndef STRCAT
190885
+# define STRCAT __strcat_avx2_rtm
190885
+#endif
190885
+
190885
+#define ZERO_UPPER_VEC_REGISTERS_RETURN \
190885
+  ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST
190885
+
190885
+#define VZEROUPPER_RETURN jmp	 L(return_vzeroupper)
190885
+
190885
+#define SECTION(p) p##.avx.rtm
190885
+
190885
+#include "strcat-avx2.S"
190885
diff --git a/sysdeps/x86_64/multiarch/strcat-avx2.S b/sysdeps/x86_64/multiarch/strcat-avx2.S
190885
index b0623564..aa48c058 100644
190885
--- a/sysdeps/x86_64/multiarch/strcat-avx2.S
190885
+++ b/sysdeps/x86_64/multiarch/strcat-avx2.S
190885
@@ -30,7 +30,11 @@
190885
 /* Number of bytes in a vector register */
190885
 # define VEC_SIZE	32
190885
 
190885
-	.section .text.avx,"ax",@progbits
190885
+# ifndef SECTION
190885
+#  define SECTION(p)	p##.avx
190885
+# endif
190885
+
190885
+	.section SECTION(.text),"ax",@progbits
190885
 ENTRY (STRCAT)
190885
 	mov	%rdi, %r9
190885
 # ifdef USE_AS_STRNCAT
190885
diff --git a/sysdeps/x86_64/multiarch/strchr-avx2-rtm.S b/sysdeps/x86_64/multiarch/strchr-avx2-rtm.S
190885
new file mode 100644
190885
index 00000000..81f20d1d
190885
--- /dev/null
190885
+++ b/sysdeps/x86_64/multiarch/strchr-avx2-rtm.S
190885
@@ -0,0 +1,12 @@
190885
+#ifndef STRCHR
190885
+# define STRCHR __strchr_avx2_rtm
190885
+#endif
190885
+
190885
+#define ZERO_UPPER_VEC_REGISTERS_RETURN \
190885
+  ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST
190885
+
190885
+#define VZEROUPPER_RETURN jmp	 L(return_vzeroupper)
190885
+
190885
+#define SECTION(p) p##.avx.rtm
190885
+
190885
+#include "strchr-avx2.S"
190885
diff --git a/sysdeps/x86_64/multiarch/strchr-avx2.S b/sysdeps/x86_64/multiarch/strchr-avx2.S
190885
index 47bc3c99..da7d2620 100644
190885
--- a/sysdeps/x86_64/multiarch/strchr-avx2.S
190885
+++ b/sysdeps/x86_64/multiarch/strchr-avx2.S
190885
@@ -38,9 +38,13 @@
190885
 #  define VZEROUPPER	vzeroupper
190885
 # endif
190885
 
190885
+# ifndef SECTION
190885
+#  define SECTION(p)	p##.avx
190885
+# endif
190885
+
190885
 # define VEC_SIZE 32
190885
 
190885
-	.section .text.avx,"ax",@progbits
190885
+	.section SECTION(.text),"ax",@progbits
190885
 ENTRY (STRCHR)
190885
 	movl	%edi, %ecx
190885
 	/* Broadcast CHAR to YMM0.  */
190885
@@ -93,8 +97,8 @@ L(cros_page_boundary):
190885
 	cmp	(%rax), %CHAR_REG
190885
 	cmovne	%rdx, %rax
190885
 # endif
190885
-	VZEROUPPER
190885
-	ret
190885
+L(return_vzeroupper):
190885
+	ZERO_UPPER_VEC_REGISTERS_RETURN
190885
 
190885
 	.p2align 4
190885
 L(aligned_more):
190885
@@ -190,8 +194,7 @@ L(first_vec_x0):
190885
 	cmp	(%rax), %CHAR_REG
190885
 	cmovne	%rdx, %rax
190885
 # endif
190885
-	VZEROUPPER
190885
-	ret
190885
+	VZEROUPPER_RETURN
190885
 
190885
 	.p2align 4
190885
 L(first_vec_x1):
190885
@@ -205,8 +208,7 @@ L(first_vec_x1):
190885
 	cmp	(%rax), %CHAR_REG
190885
 	cmovne	%rdx, %rax
190885
 # endif
190885
-	VZEROUPPER
190885
-	ret
190885
+	VZEROUPPER_RETURN
190885
 
190885
 	.p2align 4
190885
 L(first_vec_x2):
190885
@@ -220,8 +222,7 @@ L(first_vec_x2):
190885
 	cmp	(%rax), %CHAR_REG
190885
 	cmovne	%rdx, %rax
190885
 # endif
190885
-	VZEROUPPER
190885
-	ret
190885
+	VZEROUPPER_RETURN
190885
 
190885
 	.p2align 4
190885
 L(4x_vec_end):
190885
@@ -247,8 +248,7 @@ L(first_vec_x3):
190885
 	cmp	(%rax), %CHAR_REG
190885
 	cmovne	%rdx, %rax
190885
 # endif
190885
-	VZEROUPPER
190885
-	ret
190885
+	VZEROUPPER_RETURN
190885
 
190885
 END (STRCHR)
190885
 #endif
190885
diff --git a/sysdeps/x86_64/multiarch/strchr.c b/sysdeps/x86_64/multiarch/strchr.c
190885
index be05e197..7e582f02 100644
190885
--- a/sysdeps/x86_64/multiarch/strchr.c
190885
+++ b/sysdeps/x86_64/multiarch/strchr.c
190885
@@ -29,6 +29,7 @@
190885
 extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
190885
 extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_no_bsf) attribute_hidden;
190885
 extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
190885
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden;
190885
 extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden;
190885
 
190885
 static inline void *
190885
@@ -44,6 +45,9 @@ IFUNC_SELECTOR (void)
190885
 	  && CPU_FEATURE_USABLE_P (cpu_features, BMI2))
190885
 	return OPTIMIZE (evex);
190885
 
190885
+      if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
190885
+	return OPTIMIZE (avx2_rtm);
190885
+
190885
       if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
190885
 	return OPTIMIZE (avx2);
190885
     }
190885
diff --git a/sysdeps/x86_64/multiarch/strchrnul-avx2-rtm.S b/sysdeps/x86_64/multiarch/strchrnul-avx2-rtm.S
190885
new file mode 100644
190885
index 00000000..cdcf818b
190885
--- /dev/null
190885
+++ b/sysdeps/x86_64/multiarch/strchrnul-avx2-rtm.S
190885
@@ -0,0 +1,3 @@
190885
+#define STRCHR __strchrnul_avx2_rtm
190885
+#define USE_AS_STRCHRNUL 1
190885
+#include "strchr-avx2-rtm.S"
190885
diff --git a/sysdeps/x86_64/multiarch/strcmp-avx2-rtm.S b/sysdeps/x86_64/multiarch/strcmp-avx2-rtm.S
190885
new file mode 100644
190885
index 00000000..aecd30d9
190885
--- /dev/null
190885
+++ b/sysdeps/x86_64/multiarch/strcmp-avx2-rtm.S
190885
@@ -0,0 +1,12 @@
190885
+#ifndef STRCMP
190885
+# define STRCMP __strcmp_avx2_rtm
190885
+#endif
190885
+
190885
+#define ZERO_UPPER_VEC_REGISTERS_RETURN \
190885
+  ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST
190885
+
190885
+#define VZEROUPPER_RETURN jmp	 L(return_vzeroupper)
190885
+
190885
+#define SECTION(p) p##.avx.rtm
190885
+
190885
+#include "strcmp-avx2.S"
190885
diff --git a/sysdeps/x86_64/multiarch/strcmp-avx2.S b/sysdeps/x86_64/multiarch/strcmp-avx2.S
190885
index 8fb8eedc..5d1c9d90 100644
190885
--- a/sysdeps/x86_64/multiarch/strcmp-avx2.S
190885
+++ b/sysdeps/x86_64/multiarch/strcmp-avx2.S
190885
@@ -55,6 +55,10 @@
190885
 #  define VZEROUPPER	vzeroupper
190885
 # endif
190885
 
190885
+# ifndef SECTION
190885
+#  define SECTION(p)	p##.avx
190885
+# endif
190885
+
190885
 /* Warning!
190885
            wcscmp/wcsncmp have to use SIGNED comparison for elements.
190885
            strcmp/strncmp have to use UNSIGNED comparison for elements.
190885
@@ -75,7 +79,7 @@
190885
    the maximum offset is reached before a difference is found, zero is
190885
    returned.  */
190885
 
190885
-	.section .text.avx,"ax",@progbits
190885
+	.section SECTION(.text),"ax",@progbits
190885
 ENTRY (STRCMP)
190885
 # ifdef USE_AS_STRNCMP
190885
 	/* Check for simple cases (0 or 1) in offset.  */
190885
@@ -137,8 +141,8 @@ L(return):
190885
 	movzbl	(%rsi, %rdx), %edx
190885
 	subl	%edx, %eax
190885
 # endif
190885
-	VZEROUPPER
190885
-	ret
190885
+L(return_vzeroupper):
190885
+	ZERO_UPPER_VEC_REGISTERS_RETURN
190885
 
190885
 	.p2align 4
190885
 L(return_vec_size):
190885
@@ -171,8 +175,7 @@ L(return_vec_size):
190885
 	subl	%edx, %eax
190885
 #  endif
190885
 # endif
190885
-	VZEROUPPER
190885
-	ret
190885
+	VZEROUPPER_RETURN
190885
 
190885
 	.p2align 4
190885
 L(return_2_vec_size):
190885
@@ -205,8 +208,7 @@ L(return_2_vec_size):
190885
 	subl	%edx, %eax
190885
 #  endif
190885
 # endif
190885
-	VZEROUPPER
190885
-	ret
190885
+	VZEROUPPER_RETURN
190885
 
190885
 	.p2align 4
190885
 L(return_3_vec_size):
190885
@@ -239,8 +241,7 @@ L(return_3_vec_size):
190885
 	subl	%edx, %eax
190885
 #  endif
190885
 # endif
190885
-	VZEROUPPER
190885
-	ret
190885
+	VZEROUPPER_RETURN
190885
 
190885
 	.p2align 4
190885
 L(next_3_vectors):
190885
@@ -366,8 +367,7 @@ L(back_to_loop):
190885
 	subl	%edx, %eax
190885
 #  endif
190885
 # endif
190885
-	VZEROUPPER
190885
-	ret
190885
+	VZEROUPPER_RETURN
190885
 
190885
 	.p2align 4
190885
 L(test_vec):
190885
@@ -410,8 +410,7 @@ L(test_vec):
190885
 	subl	%edx, %eax
190885
 #  endif
190885
 # endif
190885
-	VZEROUPPER
190885
-	ret
190885
+	VZEROUPPER_RETURN
190885
 
190885
 	.p2align 4
190885
 L(test_2_vec):
190885
@@ -454,8 +453,7 @@ L(test_2_vec):
190885
 	subl	%edx, %eax
190885
 #  endif
190885
 # endif
190885
-	VZEROUPPER
190885
-	ret
190885
+	VZEROUPPER_RETURN
190885
 
190885
 	.p2align 4
190885
 L(test_3_vec):
190885
@@ -496,8 +494,7 @@ L(test_3_vec):
190885
 	subl	%edx, %eax
190885
 #  endif
190885
 # endif
190885
-	VZEROUPPER
190885
-	ret
190885
+	VZEROUPPER_RETURN
190885
 
190885
 	.p2align 4
190885
 L(loop_cross_page):
190885
@@ -566,8 +563,7 @@ L(loop_cross_page):
190885
 	subl	%edx, %eax
190885
 #  endif
190885
 # endif
190885
-	VZEROUPPER
190885
-	ret
190885
+	VZEROUPPER_RETURN
190885
 
190885
 	.p2align 4
190885
 L(loop_cross_page_2_vec):
190885
@@ -641,8 +637,7 @@ L(loop_cross_page_2_vec):
190885
 	subl	%edx, %eax
190885
 #  endif
190885
 # endif
190885
-	VZEROUPPER
190885
-	ret
190885
+	VZEROUPPER_RETURN
190885
 
190885
 # ifdef USE_AS_STRNCMP
190885
 L(string_nbyte_offset_check):
190885
@@ -684,8 +679,7 @@ L(cross_page_loop):
190885
 # ifndef USE_AS_WCSCMP
190885
 L(different):
190885
 # endif
190885
-	VZEROUPPER
190885
-	ret
190885
+	VZEROUPPER_RETURN
190885
 
190885
 # ifdef USE_AS_WCSCMP
190885
 	.p2align 4
190885
@@ -695,16 +689,14 @@ L(different):
190885
 	setl	%al
190885
 	negl	%eax
190885
 	orl	$1, %eax
190885
-	VZEROUPPER
190885
-	ret
190885
+	VZEROUPPER_RETURN
190885
 # endif
190885
 
190885
 # ifdef USE_AS_STRNCMP
190885
 	.p2align 4
190885
 L(zero):
190885
 	xorl	%eax, %eax
190885
-	VZEROUPPER
190885
-	ret
190885
+	VZEROUPPER_RETURN
190885
 
190885
 	.p2align 4
190885
 L(char0):
190885
@@ -718,8 +710,7 @@ L(char0):
190885
 	movzbl	(%rdi), %eax
190885
 	subl	%ecx, %eax
190885
 #  endif
190885
-	VZEROUPPER
190885
-	ret
190885
+	VZEROUPPER_RETURN
190885
 # endif
190885
 
190885
 	.p2align 4
190885
@@ -744,8 +735,7 @@ L(last_vector):
190885
 	movzbl	(%rsi, %rdx), %edx
190885
 	subl	%edx, %eax
190885
 # endif
190885
-	VZEROUPPER
190885
-	ret
190885
+	VZEROUPPER_RETURN
190885
 
190885
 	/* Comparing on page boundary region requires special treatment:
190885
 	   It must done one vector at the time, starting with the wider
190885
@@ -866,7 +856,6 @@ L(cross_page_4bytes):
190885
 	testl	%eax, %eax
190885
 	jne	L(cross_page_loop)
190885
 	subl	%ecx, %eax
190885
-	VZEROUPPER
190885
-	ret
190885
+	VZEROUPPER_RETURN
190885
 END (STRCMP)
190885
 #endif
190885
diff --git a/sysdeps/x86_64/multiarch/strcmp.c b/sysdeps/x86_64/multiarch/strcmp.c
190885
index c5f38510..11bbea2b 100644
190885
--- a/sysdeps/x86_64/multiarch/strcmp.c
190885
+++ b/sysdeps/x86_64/multiarch/strcmp.c
190885
@@ -30,6 +30,7 @@ extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
190885
 extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2_unaligned) attribute_hidden;
190885
 extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3) attribute_hidden;
190885
 extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
190885
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden;
190885
 extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden;
190885
 
190885
 static inline void *
190885
@@ -46,6 +47,9 @@ IFUNC_SELECTOR (void)
190885
 	  && !CPU_FEATURES_ARCH_P (cpu_features, Prefer_AVX2_STRCMP))
190885
 	return OPTIMIZE (evex);
190885
 
190885
+      if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
190885
+	return OPTIMIZE (avx2_rtm);
190885
+
190885
       if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
190885
 	return OPTIMIZE (avx2);
190885
     }
190885
diff --git a/sysdeps/x86_64/multiarch/strcpy-avx2-rtm.S b/sysdeps/x86_64/multiarch/strcpy-avx2-rtm.S
190885
new file mode 100644
190885
index 00000000..c2c581ec
190885
--- /dev/null
190885
+++ b/sysdeps/x86_64/multiarch/strcpy-avx2-rtm.S
190885
@@ -0,0 +1,12 @@
190885
+#ifndef STRCPY
190885
+# define STRCPY __strcpy_avx2_rtm
190885
+#endif
190885
+
190885
+#define ZERO_UPPER_VEC_REGISTERS_RETURN \
190885
+  ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST
190885
+
190885
+#define VZEROUPPER_RETURN jmp	 L(return_vzeroupper)
190885
+
190885
+#define SECTION(p) p##.avx.rtm
190885
+
190885
+#include "strcpy-avx2.S"
190885
diff --git a/sysdeps/x86_64/multiarch/strcpy-avx2.S b/sysdeps/x86_64/multiarch/strcpy-avx2.S
190885
index 81677f90..613c59aa 100644
190885
--- a/sysdeps/x86_64/multiarch/strcpy-avx2.S
190885
+++ b/sysdeps/x86_64/multiarch/strcpy-avx2.S
190885
@@ -37,6 +37,10 @@
190885
 #  define VZEROUPPER	vzeroupper
190885
 # endif
190885
 
190885
+# ifndef SECTION
190885
+#  define SECTION(p)	p##.avx
190885
+# endif
190885
+
190885
 /* zero register */
190885
 #define xmmZ	xmm0
190885
 #define ymmZ	ymm0
190885
@@ -46,7 +50,7 @@
190885
 
190885
 # ifndef USE_AS_STRCAT
190885
 
190885
-	.section .text.avx,"ax",@progbits
190885
+	.section SECTION(.text),"ax",@progbits
190885
 ENTRY (STRCPY)
190885
 #  ifdef USE_AS_STRNCPY
190885
 	mov	%rdx, %r8
190885
@@ -369,8 +373,8 @@ L(CopyVecSizeExit):
190885
 	lea	1(%rdi), %rdi
190885
 	jnz	L(StrncpyFillTailWithZero)
190885
 # endif
190885
-	VZEROUPPER
190885
-	ret
190885
+L(return_vzeroupper):
190885
+	ZERO_UPPER_VEC_REGISTERS_RETURN
190885
 
190885
 	.p2align 4
190885
 L(CopyTwoVecSize1):
190885
@@ -553,8 +557,7 @@ L(Exit1):
190885
 	lea	2(%rdi), %rdi
190885
 	jnz	L(StrncpyFillTailWithZero)
190885
 # endif
190885
-	VZEROUPPER
190885
-	ret
190885
+	VZEROUPPER_RETURN
190885
 
190885
 	.p2align 4
190885
 L(Exit2):
190885
@@ -569,8 +572,7 @@ L(Exit2):
190885
 	lea	3(%rdi), %rdi
190885
 	jnz	L(StrncpyFillTailWithZero)
190885
 # endif
190885
-	VZEROUPPER
190885
-	ret
190885
+	VZEROUPPER_RETURN
190885
 
190885
 	.p2align 4
190885
 L(Exit3):
190885
@@ -584,8 +586,7 @@ L(Exit3):
190885
 	lea	4(%rdi), %rdi
190885
 	jnz	L(StrncpyFillTailWithZero)
190885
 # endif
190885
-	VZEROUPPER
190885
-	ret
190885
+	VZEROUPPER_RETURN
190885
 
190885
 	.p2align 4
190885
 L(Exit4_7):
190885
@@ -602,8 +603,7 @@ L(Exit4_7):
190885
 	lea	1(%rdi, %rdx), %rdi
190885
 	jnz	L(StrncpyFillTailWithZero)
190885
 # endif
190885
-	VZEROUPPER
190885
-	ret
190885
+	VZEROUPPER_RETURN
190885
 
190885
 	.p2align 4
190885
 L(Exit8_15):
190885
@@ -620,8 +620,7 @@ L(Exit8_15):
190885
 	lea	1(%rdi, %rdx), %rdi
190885
 	jnz	L(StrncpyFillTailWithZero)
190885
 # endif
190885
-	VZEROUPPER
190885
-	ret
190885
+	VZEROUPPER_RETURN
190885
 
190885
 	.p2align 4
190885
 L(Exit16_31):
190885
@@ -638,8 +637,7 @@ L(Exit16_31):
190885
 	lea 1(%rdi, %rdx), %rdi
190885
 	jnz L(StrncpyFillTailWithZero)
190885
 # endif
190885
-	VZEROUPPER
190885
-	ret
190885
+	VZEROUPPER_RETURN
190885
 
190885
 	.p2align 4
190885
 L(Exit32_63):
190885
@@ -656,8 +654,7 @@ L(Exit32_63):
190885
 	lea	1(%rdi, %rdx), %rdi
190885
 	jnz	L(StrncpyFillTailWithZero)
190885
 # endif
190885
-	VZEROUPPER
190885
-	ret
190885
+	VZEROUPPER_RETURN
190885
 
190885
 # ifdef USE_AS_STRNCPY
190885
 
190885
@@ -671,8 +668,7 @@ L(StrncpyExit1):
190885
 #  ifdef USE_AS_STRCAT
190885
 	movb	$0, 1(%rdi)
190885
 #  endif
190885
-	VZEROUPPER
190885
-	ret
190885
+	VZEROUPPER_RETURN
190885
 
190885
 	.p2align 4
190885
 L(StrncpyExit2):
190885
@@ -684,8 +680,7 @@ L(StrncpyExit2):
190885
 #  ifdef USE_AS_STRCAT
190885
 	movb	$0, 2(%rdi)
190885
 #  endif
190885
-	VZEROUPPER
190885
-	ret
190885
+	VZEROUPPER_RETURN
190885
 
190885
 	.p2align 4
190885
 L(StrncpyExit3_4):
190885
@@ -699,8 +694,7 @@ L(StrncpyExit3_4):
190885
 #  ifdef USE_AS_STRCAT
190885
 	movb	$0, (%rdi, %r8)
190885
 #  endif
190885
-	VZEROUPPER
190885
-	ret
190885
+	VZEROUPPER_RETURN
190885
 
190885
 	.p2align 4
190885
 L(StrncpyExit5_8):
190885
@@ -714,8 +708,7 @@ L(StrncpyExit5_8):
190885
 #  ifdef USE_AS_STRCAT
190885
 	movb	$0, (%rdi, %r8)
190885
 #  endif
190885
-	VZEROUPPER
190885
-	ret
190885
+	VZEROUPPER_RETURN
190885
 
190885
 	.p2align 4
190885
 L(StrncpyExit9_16):
190885
@@ -729,8 +722,7 @@ L(StrncpyExit9_16):
190885
 #  ifdef USE_AS_STRCAT
190885
 	movb	$0, (%rdi, %r8)
190885
 #  endif
190885
-	VZEROUPPER
190885
-	ret
190885
+	VZEROUPPER_RETURN
190885
 
190885
 	.p2align 4
190885
 L(StrncpyExit17_32):
190885
@@ -744,8 +736,7 @@ L(StrncpyExit17_32):
190885
 #  ifdef USE_AS_STRCAT
190885
 	movb	$0, (%rdi, %r8)
190885
 #  endif
190885
-	VZEROUPPER
190885
-	ret
190885
+	VZEROUPPER_RETURN
190885
 
190885
 	.p2align 4
190885
 L(StrncpyExit33_64):
190885
@@ -760,8 +751,7 @@ L(StrncpyExit33_64):
190885
 #  ifdef USE_AS_STRCAT
190885
 	movb	$0, (%rdi, %r8)
190885
 #  endif
190885
-	VZEROUPPER
190885
-	ret
190885
+	VZEROUPPER_RETURN
190885
 
190885
 	.p2align 4
190885
 L(StrncpyExit65):
190885
@@ -778,50 +768,43 @@ L(StrncpyExit65):
190885
 #  ifdef USE_AS_STRCAT
190885
 	movb	$0, 65(%rdi)
190885
 #  endif
190885
-	VZEROUPPER
190885
-	ret
190885
+	VZEROUPPER_RETURN
190885
 
190885
 #  ifndef USE_AS_STRCAT
190885
 
190885
 	.p2align 4
190885
 L(Fill1):
190885
 	mov	%dl, (%rdi)
190885
-	VZEROUPPER
190885
-	ret
190885
+	VZEROUPPER_RETURN
190885
 
190885
 	.p2align 4
190885
 L(Fill2):
190885
 	mov	%dx, (%rdi)
190885
-	VZEROUPPER
190885
-	ret
190885
+	VZEROUPPER_RETURN
190885
 
190885
 	.p2align 4
190885
 L(Fill3_4):
190885
 	mov	%dx, (%rdi)
190885
 	mov     %dx, -2(%rdi, %r8)
190885
-	VZEROUPPER
190885
-	ret
190885
+	VZEROUPPER_RETURN
190885
 
190885
 	.p2align 4
190885
 L(Fill5_8):
190885
 	mov	%edx, (%rdi)
190885
 	mov     %edx, -4(%rdi, %r8)
190885
-	VZEROUPPER
190885
-	ret
190885
+	VZEROUPPER_RETURN
190885
 
190885
 	.p2align 4
190885
 L(Fill9_16):
190885
 	mov	%rdx, (%rdi)
190885
 	mov	%rdx, -8(%rdi, %r8)
190885
-	VZEROUPPER
190885
-	ret
190885
+	VZEROUPPER_RETURN
190885
 
190885
 	.p2align 4
190885
 L(Fill17_32):
190885
 	vmovdqu %xmmZ, (%rdi)
190885
 	vmovdqu %xmmZ, -16(%rdi, %r8)
190885
-	VZEROUPPER
190885
-	ret
190885
+	VZEROUPPER_RETURN
190885
 
190885
 	.p2align 4
190885
 L(CopyVecSizeUnalignedVec2):
190885
@@ -898,8 +881,7 @@ L(Fill):
190885
 	cmp	$1, %r8d
190885
 	ja	L(Fill2)
190885
 	je	L(Fill1)
190885
-	VZEROUPPER
190885
-	ret
190885
+	VZEROUPPER_RETURN
190885
 
190885
 /* end of ifndef USE_AS_STRCAT */
190885
 #  endif
190885
@@ -929,8 +911,7 @@ L(UnalignedFourVecSizeLeaveCase3):
190885
 #  ifdef USE_AS_STRCAT
190885
 	movb	$0, (VEC_SIZE * 4)(%rdi)
190885
 #  endif
190885
-	VZEROUPPER
190885
-	ret
190885
+	VZEROUPPER_RETURN
190885
 
190885
 	.p2align 4
190885
 L(UnalignedFourVecSizeLeaveCase2):
190885
@@ -1001,16 +982,14 @@ L(StrncpyExit):
190885
 #  ifdef USE_AS_STRCAT
190885
 	movb	$0, (%rdi)
190885
 #  endif
190885
-	VZEROUPPER
190885
-	ret
190885
+	VZEROUPPER_RETURN
190885
 
190885
 	.p2align 4
190885
 L(ExitZero):
190885
 #  ifndef USE_AS_STRCAT
190885
 	mov	%rdi, %rax
190885
 #  endif
190885
-	VZEROUPPER
190885
-	ret
190885
+	VZEROUPPER_RETURN
190885
 
190885
 # endif
190885
 
190885
diff --git a/sysdeps/x86_64/multiarch/strlen-avx2-rtm.S b/sysdeps/x86_64/multiarch/strlen-avx2-rtm.S
190885
new file mode 100644
190885
index 00000000..75b4b761
190885
--- /dev/null
190885
+++ b/sysdeps/x86_64/multiarch/strlen-avx2-rtm.S
190885
@@ -0,0 +1,12 @@
190885
+#ifndef STRLEN
190885
+# define STRLEN __strlen_avx2_rtm
190885
+#endif
190885
+
190885
+#define ZERO_UPPER_VEC_REGISTERS_RETURN \
190885
+  ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST
190885
+
190885
+#define VZEROUPPER_RETURN jmp	 L(return_vzeroupper)
190885
+
190885
+#define SECTION(p) p##.avx.rtm
190885
+
190885
+#include "strlen-avx2.S"
190885
diff --git a/sysdeps/x86_64/multiarch/strlen-avx2.S b/sysdeps/x86_64/multiarch/strlen-avx2.S
190885
index 645e0446..82826e10 100644
190885
--- a/sysdeps/x86_64/multiarch/strlen-avx2.S
190885
+++ b/sysdeps/x86_64/multiarch/strlen-avx2.S
190885
@@ -36,9 +36,13 @@
190885
 #  define VZEROUPPER	vzeroupper
190885
 # endif
190885
 
190885
+# ifndef SECTION
190885
+#  define SECTION(p)	p##.avx
190885
+# endif
190885
+
190885
 # define VEC_SIZE 32
190885
 
190885
-	.section .text.avx,"ax",@progbits
190885
+	.section SECTION(.text),"ax",@progbits
190885
 ENTRY (STRLEN)
190885
 # ifdef USE_AS_STRNLEN
190885
 	/* Check for zero length.  */
190885
@@ -111,8 +115,8 @@ L(cros_page_boundary):
190885
 # ifdef USE_AS_WCSLEN
190885
 	shrq	$2, %rax
190885
 # endif
190885
-	VZEROUPPER
190885
-	ret
190885
+L(return_vzeroupper):
190885
+	ZERO_UPPER_VEC_REGISTERS_RETURN
190885
 
190885
 	.p2align 4
190885
 L(aligned_more):
190885
@@ -231,8 +235,7 @@ L(last_4x_vec_or_less):
190885
 #  ifdef USE_AS_WCSLEN
190885
 	shrq	$2, %rax
190885
 #  endif
190885
-	VZEROUPPER
190885
-	ret
190885
+	VZEROUPPER_RETURN
190885
 
190885
 	.p2align 4
190885
 L(last_2x_vec):
190885
@@ -253,8 +256,7 @@ L(last_2x_vec):
190885
 #  ifdef USE_AS_WCSLEN
190885
 	shrq	$2, %rax
190885
 #  endif
190885
-	VZEROUPPER
190885
-	ret
190885
+	VZEROUPPER_RETURN
190885
 
190885
 	.p2align 4
190885
 L(first_vec_x0_check):
190885
@@ -267,8 +269,7 @@ L(first_vec_x0_check):
190885
 #  ifdef USE_AS_WCSLEN
190885
 	shrq	$2, %rax
190885
 #  endif
190885
-	VZEROUPPER
190885
-	ret
190885
+	VZEROUPPER_RETURN
190885
 
190885
 	.p2align 4
190885
 L(first_vec_x1_check):
190885
@@ -282,8 +283,7 @@ L(first_vec_x1_check):
190885
 #  ifdef USE_AS_WCSLEN
190885
 	shrq	$2, %rax
190885
 #  endif
190885
-	VZEROUPPER
190885
-	ret
190885
+	VZEROUPPER_RETURN
190885
 
190885
 	.p2align 4
190885
 L(first_vec_x2_check):
190885
@@ -297,8 +297,7 @@ L(first_vec_x2_check):
190885
 #  ifdef USE_AS_WCSLEN
190885
 	shrq	$2, %rax
190885
 #  endif
190885
-	VZEROUPPER
190885
-	ret
190885
+	VZEROUPPER_RETURN
190885
 
190885
 	.p2align 4
190885
 L(first_vec_x3_check):
190885
@@ -312,8 +311,7 @@ L(first_vec_x3_check):
190885
 #  ifdef USE_AS_WCSLEN
190885
 	shrq	$2, %rax
190885
 #  endif
190885
-	VZEROUPPER
190885
-	ret
190885
+	VZEROUPPER_RETURN
190885
 
190885
 	.p2align 4
190885
 L(max):
190885
@@ -321,8 +319,7 @@ L(max):
190885
 #  ifdef USE_AS_WCSLEN
190885
 	shrq	$2, %rax
190885
 #  endif
190885
-	VZEROUPPER
190885
-	ret
190885
+	VZEROUPPER_RETURN
190885
 
190885
 	.p2align 4
190885
 L(zero):
190885
@@ -338,8 +335,7 @@ L(first_vec_x0):
190885
 # ifdef USE_AS_WCSLEN
190885
 	shrq	$2, %rax
190885
 # endif
190885
-	VZEROUPPER
190885
-	ret
190885
+	VZEROUPPER_RETURN
190885
 
190885
 	.p2align 4
190885
 L(first_vec_x1):
190885
@@ -350,8 +346,7 @@ L(first_vec_x1):
190885
 # ifdef USE_AS_WCSLEN
190885
 	shrq	$2, %rax
190885
 # endif
190885
-	VZEROUPPER
190885
-	ret
190885
+	VZEROUPPER_RETURN
190885
 
190885
 	.p2align 4
190885
 L(first_vec_x2):
190885
@@ -362,8 +357,7 @@ L(first_vec_x2):
190885
 # ifdef USE_AS_WCSLEN
190885
 	shrq	$2, %rax
190885
 # endif
190885
-	VZEROUPPER
190885
-	ret
190885
+	VZEROUPPER_RETURN
190885
 
190885
 	.p2align 4
190885
 L(4x_vec_end):
190885
@@ -389,8 +383,7 @@ L(first_vec_x3):
190885
 # ifdef USE_AS_WCSLEN
190885
 	shrq	$2, %rax
190885
 # endif
190885
-	VZEROUPPER
190885
-	ret
190885
+	VZEROUPPER_RETURN
190885
 
190885
 END (STRLEN)
190885
 #endif
190885
diff --git a/sysdeps/x86_64/multiarch/strncat-avx2-rtm.S b/sysdeps/x86_64/multiarch/strncat-avx2-rtm.S
190885
new file mode 100644
190885
index 00000000..0dcea18d
190885
--- /dev/null
190885
+++ b/sysdeps/x86_64/multiarch/strncat-avx2-rtm.S
190885
@@ -0,0 +1,3 @@
190885
+#define USE_AS_STRNCAT
190885
+#define STRCAT __strncat_avx2_rtm
190885
+#include "strcat-avx2-rtm.S"
190885
diff --git a/sysdeps/x86_64/multiarch/strncmp-avx2-rtm.S b/sysdeps/x86_64/multiarch/strncmp-avx2-rtm.S
190885
new file mode 100644
190885
index 00000000..37d1224b
190885
--- /dev/null
190885
+++ b/sysdeps/x86_64/multiarch/strncmp-avx2-rtm.S
190885
@@ -0,0 +1,3 @@
190885
+#define STRCMP	__strncmp_avx2_rtm
190885
+#define USE_AS_STRNCMP 1
190885
+#include "strcmp-avx2-rtm.S"
190885
diff --git a/sysdeps/x86_64/multiarch/strncmp.c b/sysdeps/x86_64/multiarch/strncmp.c
190885
index 4c15542f..44c85116 100644
190885
--- a/sysdeps/x86_64/multiarch/strncmp.c
190885
+++ b/sysdeps/x86_64/multiarch/strncmp.c
190885
@@ -30,6 +30,7 @@ extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
190885
 extern __typeof (REDIRECT_NAME) OPTIMIZE (ssse3) attribute_hidden;
190885
 extern __typeof (REDIRECT_NAME) OPTIMIZE (sse42) attribute_hidden;
190885
 extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
190885
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden;
190885
 extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden;
190885
 
190885
 static inline void *
190885
@@ -46,6 +47,9 @@ IFUNC_SELECTOR (void)
190885
 	  && !CPU_FEATURES_ARCH_P (cpu_features, Prefer_AVX2_STRCMP))
190885
 	return OPTIMIZE (evex);
190885
 
190885
+      if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
190885
+	return OPTIMIZE (avx2_rtm);
190885
+
190885
       if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
190885
 	return OPTIMIZE (avx2);
190885
     }
190885
diff --git a/sysdeps/x86_64/multiarch/strncpy-avx2-rtm.S b/sysdeps/x86_64/multiarch/strncpy-avx2-rtm.S
190885
new file mode 100644
190885
index 00000000..79e70832
190885
--- /dev/null
190885
+++ b/sysdeps/x86_64/multiarch/strncpy-avx2-rtm.S
190885
@@ -0,0 +1,3 @@
190885
+#define USE_AS_STRNCPY
190885
+#define STRCPY __strncpy_avx2_rtm
190885
+#include "strcpy-avx2-rtm.S"
190885
diff --git a/sysdeps/x86_64/multiarch/strnlen-avx2-rtm.S b/sysdeps/x86_64/multiarch/strnlen-avx2-rtm.S
190885
new file mode 100644
190885
index 00000000..04f1626a
190885
--- /dev/null
190885
+++ b/sysdeps/x86_64/multiarch/strnlen-avx2-rtm.S
190885
@@ -0,0 +1,4 @@
190885
+#define STRLEN __strnlen_avx2_rtm
190885
+#define USE_AS_STRNLEN 1
190885
+
190885
+#include "strlen-avx2-rtm.S"
190885
diff --git a/sysdeps/x86_64/multiarch/strrchr-avx2-rtm.S b/sysdeps/x86_64/multiarch/strrchr-avx2-rtm.S
190885
new file mode 100644
190885
index 00000000..5def14ec
190885
--- /dev/null
190885
+++ b/sysdeps/x86_64/multiarch/strrchr-avx2-rtm.S
190885
@@ -0,0 +1,12 @@
190885
+#ifndef STRRCHR
190885
+# define STRRCHR __strrchr_avx2_rtm
190885
+#endif
190885
+
190885
+#define ZERO_UPPER_VEC_REGISTERS_RETURN \
190885
+  ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST
190885
+
190885
+#define VZEROUPPER_RETURN jmp	 L(return_vzeroupper)
190885
+
190885
+#define SECTION(p) p##.avx.rtm
190885
+
190885
+#include "strrchr-avx2.S"
190885
diff --git a/sysdeps/x86_64/multiarch/strrchr-avx2.S b/sysdeps/x86_64/multiarch/strrchr-avx2.S
190885
index 4381e6ab..9f22a15e 100644
190885
--- a/sysdeps/x86_64/multiarch/strrchr-avx2.S
190885
+++ b/sysdeps/x86_64/multiarch/strrchr-avx2.S
190885
@@ -36,9 +36,13 @@
190885
 #  define VZEROUPPER	vzeroupper
190885
 # endif
190885
 
190885
+# ifndef SECTION
190885
+#  define SECTION(p)	p##.avx
190885
+# endif
190885
+
190885
 # define VEC_SIZE	32
190885
 
190885
-	.section .text.avx,"ax",@progbits
190885
+	.section SECTION(.text),"ax",@progbits
190885
 ENTRY (STRRCHR)
190885
 	movd	%esi, %xmm4
190885
 	movl	%edi, %ecx
190885
@@ -166,8 +170,8 @@ L(return_value):
190885
 # endif
190885
 	bsrl	%eax, %eax
190885
 	leaq	-VEC_SIZE(%rdi, %rax), %rax
190885
-	VZEROUPPER
190885
-	ret
190885
+L(return_vzeroupper):
190885
+	ZERO_UPPER_VEC_REGISTERS_RETURN
190885
 
190885
 	.p2align 4
190885
 L(match):
190885
@@ -198,8 +202,7 @@ L(find_nul):
190885
 	jz	L(return_value)
190885
 	bsrl	%eax, %eax
190885
 	leaq	-VEC_SIZE(%rdi, %rax), %rax
190885
-	VZEROUPPER
190885
-	ret
190885
+	VZEROUPPER_RETURN
190885
 
190885
 	.p2align 4
190885
 L(char_and_nul):
190885
@@ -222,14 +225,12 @@ L(char_and_nul_in_first_vec):
190885
 	jz	L(return_null)
190885
 	bsrl	%eax, %eax
190885
 	leaq	-VEC_SIZE(%rdi, %rax), %rax
190885
-	VZEROUPPER
190885
-	ret
190885
+	VZEROUPPER_RETURN
190885
 
190885
 	.p2align 4
190885
 L(return_null):
190885
 	xorl	%eax, %eax
190885
-	VZEROUPPER
190885
-	ret
190885
+	VZEROUPPER_RETURN
190885
 
190885
 END (STRRCHR)
190885
 #endif
190885
diff --git a/sysdeps/x86_64/multiarch/wcschr-avx2-rtm.S b/sysdeps/x86_64/multiarch/wcschr-avx2-rtm.S
190885
new file mode 100644
190885
index 00000000..d49dbbf0
190885
--- /dev/null
190885
+++ b/sysdeps/x86_64/multiarch/wcschr-avx2-rtm.S
190885
@@ -0,0 +1,3 @@
190885
+#define STRCHR __wcschr_avx2_rtm
190885
+#define USE_AS_WCSCHR 1
190885
+#include "strchr-avx2-rtm.S"
190885
diff --git a/sysdeps/x86_64/multiarch/wcscmp-avx2-rtm.S b/sysdeps/x86_64/multiarch/wcscmp-avx2-rtm.S
190885
new file mode 100644
190885
index 00000000..d6ca2b80
190885
--- /dev/null
190885
+++ b/sysdeps/x86_64/multiarch/wcscmp-avx2-rtm.S
190885
@@ -0,0 +1,4 @@
190885
+#define STRCMP __wcscmp_avx2_rtm
190885
+#define USE_AS_WCSCMP 1
190885
+
190885
+#include "strcmp-avx2-rtm.S"
190885
diff --git a/sysdeps/x86_64/multiarch/wcslen-avx2-rtm.S b/sysdeps/x86_64/multiarch/wcslen-avx2-rtm.S
190885
new file mode 100644
190885
index 00000000..35658d73
190885
--- /dev/null
190885
+++ b/sysdeps/x86_64/multiarch/wcslen-avx2-rtm.S
190885
@@ -0,0 +1,4 @@
190885
+#define STRLEN __wcslen_avx2_rtm
190885
+#define USE_AS_WCSLEN 1
190885
+
190885
+#include "strlen-avx2-rtm.S"
190885
diff --git a/sysdeps/x86_64/multiarch/wcsncmp-avx2-rtm.S b/sysdeps/x86_64/multiarch/wcsncmp-avx2-rtm.S
190885
new file mode 100644
190885
index 00000000..4e88c70c
190885
--- /dev/null
190885
+++ b/sysdeps/x86_64/multiarch/wcsncmp-avx2-rtm.S
190885
@@ -0,0 +1,5 @@
190885
+#define STRCMP __wcsncmp_avx2_rtm
190885
+#define USE_AS_STRNCMP 1
190885
+#define USE_AS_WCSCMP 1
190885
+
190885
+#include "strcmp-avx2-rtm.S"
190885
diff --git a/sysdeps/x86_64/multiarch/wcsnlen-avx2-rtm.S b/sysdeps/x86_64/multiarch/wcsnlen-avx2-rtm.S
190885
new file mode 100644
190885
index 00000000..7437ebee
190885
--- /dev/null
190885
+++ b/sysdeps/x86_64/multiarch/wcsnlen-avx2-rtm.S
190885
@@ -0,0 +1,5 @@
190885
+#define STRLEN __wcsnlen_avx2_rtm
190885
+#define USE_AS_WCSLEN 1
190885
+#define USE_AS_STRNLEN 1
190885
+
190885
+#include "strlen-avx2-rtm.S"
190885
diff --git a/sysdeps/x86_64/multiarch/wcsnlen.c b/sysdeps/x86_64/multiarch/wcsnlen.c
190885
index 84254b83..20b731ae 100644
190885
--- a/sysdeps/x86_64/multiarch/wcsnlen.c
190885
+++ b/sysdeps/x86_64/multiarch/wcsnlen.c
190885
@@ -29,6 +29,7 @@
190885
 extern __typeof (REDIRECT_NAME) OPTIMIZE (sse2) attribute_hidden;
190885
 extern __typeof (REDIRECT_NAME) OPTIMIZE (sse4_1) attribute_hidden;
190885
 extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2) attribute_hidden;
190885
+extern __typeof (REDIRECT_NAME) OPTIMIZE (avx2_rtm) attribute_hidden;
190885
 extern __typeof (REDIRECT_NAME) OPTIMIZE (evex) attribute_hidden;
190885
 
190885
 static inline void *
190885
@@ -44,6 +45,9 @@ IFUNC_SELECTOR (void)
190885
 	  && CPU_FEATURE_USABLE_P (cpu_features, BMI2))
190885
 	return OPTIMIZE (evex);
190885
 
190885
+      if (CPU_FEATURE_USABLE_P (cpu_features, RTM))
190885
+	return OPTIMIZE (avx2_rtm);
190885
+
190885
       if (!CPU_FEATURES_ARCH_P (cpu_features, Prefer_No_VZEROUPPER))
190885
 	return OPTIMIZE (avx2);
190885
     }
190885
diff --git a/sysdeps/x86_64/multiarch/wcsrchr-avx2-rtm.S b/sysdeps/x86_64/multiarch/wcsrchr-avx2-rtm.S
190885
new file mode 100644
190885
index 00000000..9bf76083
190885
--- /dev/null
190885
+++ b/sysdeps/x86_64/multiarch/wcsrchr-avx2-rtm.S
190885
@@ -0,0 +1,3 @@
190885
+#define STRRCHR __wcsrchr_avx2_rtm
190885
+#define USE_AS_WCSRCHR 1
190885
+#include "strrchr-avx2-rtm.S"
190885
diff --git a/sysdeps/x86_64/multiarch/wmemchr-avx2-rtm.S b/sysdeps/x86_64/multiarch/wmemchr-avx2-rtm.S
190885
new file mode 100644
190885
index 00000000..58ed21db
190885
--- /dev/null
190885
+++ b/sysdeps/x86_64/multiarch/wmemchr-avx2-rtm.S
190885
@@ -0,0 +1,4 @@
190885
+#define MEMCHR __wmemchr_avx2_rtm
190885
+#define USE_AS_WMEMCHR 1
190885
+
190885
+#include "memchr-avx2-rtm.S"
190885
diff --git a/sysdeps/x86_64/multiarch/wmemcmp-avx2-movbe-rtm.S b/sysdeps/x86_64/multiarch/wmemcmp-avx2-movbe-rtm.S
190885
new file mode 100644
190885
index 00000000..31104d12
190885
--- /dev/null
190885
+++ b/sysdeps/x86_64/multiarch/wmemcmp-avx2-movbe-rtm.S
190885
@@ -0,0 +1,4 @@
190885
+#define MEMCMP __wmemcmp_avx2_movbe_rtm
190885
+#define USE_AS_WMEMCMP 1
190885
+
190885
+#include "memcmp-avx2-movbe-rtm.S"
190885
diff --git a/sysdeps/x86_64/sysdep.h b/sysdeps/x86_64/sysdep.h
190885
index 1738d7f9..223f1a59 100644
190885
--- a/sysdeps/x86_64/sysdep.h
190885
+++ b/sysdeps/x86_64/sysdep.h
190885
@@ -95,6 +95,28 @@ lose:									      \
190885
 #define R14_LP	r14
190885
 #define R15_LP	r15
190885
 
190885
+/* Zero upper vector registers and return with xtest.  NB: Use VZEROALL
190885
+   to avoid RTM abort triggered by VZEROUPPER inside transactionally.  */
190885
+#define ZERO_UPPER_VEC_REGISTERS_RETURN_XTEST \
190885
+	xtest;							\
190885
+	jz	1f;						\
190885
+	vzeroall;						\
190885
+	ret;							\
190885
+1:								\
190885
+	vzeroupper;						\
190885
+	ret
190885
+
190885
+/* Zero upper vector registers and return.  */
190885
+#ifndef ZERO_UPPER_VEC_REGISTERS_RETURN
190885
+# define ZERO_UPPER_VEC_REGISTERS_RETURN \
190885
+	VZEROUPPER;						\
190885
+	ret
190885
+#endif
190885
+
190885
+#ifndef VZEROUPPER_RETURN
190885
+# define VZEROUPPER_RETURN	VZEROUPPER; ret
190885
+#endif
190885
+
190885
 #else	/* __ASSEMBLER__ */
190885
 
190885
 /* Long and pointer size in bytes.  */
190885
-- 
190885
GitLab
190885