190885
From b4cf48b235d64d0eab5739e6d496717c0023a32e Mon Sep 17 00:00:00 2001
190885
From: "H.J. Lu" <hjl.tools@gmail.com>
190885
Date: Wed, 2 Mar 2022 15:17:29 -0800
190885
Subject: [PATCH] x86-64: Avoid rep movsb with short distance [BZ #27130]
190885
190885
When copying with "rep movsb", if the distance between source and
190885
destination is N*4GB + [1..63] with N >= 0, performance may be very
190885
slow.  This patch updates memmove-vec-unaligned-erms.S for AVX and
190885
AVX512 versions with the distance in RCX:
190885
190885
	cmpl	$63, %ecx
190885
	// Don't use "rep movsb" if ECX <= 63
190885
	jbe	L(Don't use rep movsb")
190885
	Use "rep movsb"
190885
190885
Benchtests data with bench-memcpy, bench-memcpy-large, bench-memcpy-random
190885
and bench-memcpy-walk on Skylake, Ice Lake and Tiger Lake show that its
190885
performance impact is within noise range as "rep movsb" is only used for
190885
data size >= 4KB.
190885
190885
(cherry picked from commit 3ec5d83d2a237d39e7fd6ef7a0bc8ac4c171a4a5)
190885
---
190885
 .../multiarch/memmove-vec-unaligned-erms.S    | 21 +++++++++++++++++++
190885
 1 file changed, 21 insertions(+)
190885
190885
diff --git a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
190885
index 673b73aa..c475fed4 100644
190885
--- a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
190885
+++ b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
190885
@@ -64,6 +64,13 @@
190885
 # endif
190885
 #endif
190885
 
190885
+/* Avoid short distance rep movsb only with non-SSE vector.  */
190885
+#ifndef AVOID_SHORT_DISTANCE_REP_MOVSB
190885
+# define AVOID_SHORT_DISTANCE_REP_MOVSB (VEC_SIZE > 16)
190885
+#else
190885
+# define AVOID_SHORT_DISTANCE_REP_MOVSB 0
190885
+#endif
190885
+
190885
 #ifndef PREFETCH
190885
 # define PREFETCH(addr) prefetcht0 addr
190885
 #endif
190885
@@ -255,7 +262,21 @@ L(movsb):
190885
 	cmpq	%r9, %rdi
190885
 	/* Avoid slow backward REP MOVSB.  */
190885
 	jb	L(more_8x_vec_backward)
190885
+# if AVOID_SHORT_DISTANCE_REP_MOVSB
190885
+	movq	%rdi, %rcx
190885
+	subq	%rsi, %rcx
190885
+	jmp	2f
190885
+# endif
190885
 1:
190885
+# if AVOID_SHORT_DISTANCE_REP_MOVSB
190885
+	movq	%rsi, %rcx
190885
+	subq	%rdi, %rcx
190885
+2:
190885
+/* Avoid "rep movsb" if RCX, the distance between source and destination,
190885
+   is N*4GB + [1..63] with N >= 0.  */
190885
+	cmpl	$63, %ecx
190885
+	jbe	L(more_2x_vec)	/* Avoid "rep movsb" if ECX <= 63.  */
190885
+# endif
190885
 	mov	%RDX_LP, %RCX_LP
190885
 	rep movsb
190885
 L(nop):
190885
-- 
190885
GitLab
190885