|
|
190885 |
From b4cf48b235d64d0eab5739e6d496717c0023a32e Mon Sep 17 00:00:00 2001
|
|
|
190885 |
From: "H.J. Lu" <hjl.tools@gmail.com>
|
|
|
190885 |
Date: Wed, 2 Mar 2022 15:17:29 -0800
|
|
|
190885 |
Subject: [PATCH] x86-64: Avoid rep movsb with short distance [BZ #27130]
|
|
|
190885 |
|
|
|
190885 |
When copying with "rep movsb", if the distance between source and
|
|
|
190885 |
destination is N*4GB + [1..63] with N >= 0, performance may be very
|
|
|
190885 |
slow. This patch updates memmove-vec-unaligned-erms.S for AVX and
|
|
|
190885 |
AVX512 versions with the distance in RCX:
|
|
|
190885 |
|
|
|
190885 |
cmpl $63, %ecx
|
|
|
190885 |
// Don't use "rep movsb" if ECX <= 63
|
|
|
190885 |
jbe L(Don't use rep movsb")
|
|
|
190885 |
Use "rep movsb"
|
|
|
190885 |
|
|
|
190885 |
Benchtests data with bench-memcpy, bench-memcpy-large, bench-memcpy-random
|
|
|
190885 |
and bench-memcpy-walk on Skylake, Ice Lake and Tiger Lake show that its
|
|
|
190885 |
performance impact is within noise range as "rep movsb" is only used for
|
|
|
190885 |
data size >= 4KB.
|
|
|
190885 |
|
|
|
190885 |
(cherry picked from commit 3ec5d83d2a237d39e7fd6ef7a0bc8ac4c171a4a5)
|
|
|
190885 |
---
|
|
|
190885 |
.../multiarch/memmove-vec-unaligned-erms.S | 21 +++++++++++++++++++
|
|
|
190885 |
1 file changed, 21 insertions(+)
|
|
|
190885 |
|
|
|
190885 |
diff --git a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
|
|
|
190885 |
index 673b73aa..c475fed4 100644
|
|
|
190885 |
--- a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
|
|
|
190885 |
+++ b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
|
|
|
190885 |
@@ -64,6 +64,13 @@
|
|
|
190885 |
# endif
|
|
|
190885 |
#endif
|
|
|
190885 |
|
|
|
190885 |
+/* Avoid short distance rep movsb only with non-SSE vector. */
|
|
|
190885 |
+#ifndef AVOID_SHORT_DISTANCE_REP_MOVSB
|
|
|
190885 |
+# define AVOID_SHORT_DISTANCE_REP_MOVSB (VEC_SIZE > 16)
|
|
|
190885 |
+#else
|
|
|
190885 |
+# define AVOID_SHORT_DISTANCE_REP_MOVSB 0
|
|
|
190885 |
+#endif
|
|
|
190885 |
+
|
|
|
190885 |
#ifndef PREFETCH
|
|
|
190885 |
# define PREFETCH(addr) prefetcht0 addr
|
|
|
190885 |
#endif
|
|
|
190885 |
@@ -255,7 +262,21 @@ L(movsb):
|
|
|
190885 |
cmpq %r9, %rdi
|
|
|
190885 |
/* Avoid slow backward REP MOVSB. */
|
|
|
190885 |
jb L(more_8x_vec_backward)
|
|
|
190885 |
+# if AVOID_SHORT_DISTANCE_REP_MOVSB
|
|
|
190885 |
+ movq %rdi, %rcx
|
|
|
190885 |
+ subq %rsi, %rcx
|
|
|
190885 |
+ jmp 2f
|
|
|
190885 |
+# endif
|
|
|
190885 |
1:
|
|
|
190885 |
+# if AVOID_SHORT_DISTANCE_REP_MOVSB
|
|
|
190885 |
+ movq %rsi, %rcx
|
|
|
190885 |
+ subq %rdi, %rcx
|
|
|
190885 |
+2:
|
|
|
190885 |
+/* Avoid "rep movsb" if RCX, the distance between source and destination,
|
|
|
190885 |
+ is N*4GB + [1..63] with N >= 0. */
|
|
|
190885 |
+ cmpl $63, %ecx
|
|
|
190885 |
+ jbe L(more_2x_vec) /* Avoid "rep movsb" if ECX <= 63. */
|
|
|
190885 |
+# endif
|
|
|
190885 |
mov %RDX_LP, %RCX_LP
|
|
|
190885 |
rep movsb
|
|
|
190885 |
L(nop):
|
|
|
190885 |
--
|
|
|
190885 |
GitLab
|
|
|
190885 |
|