|
|
3c8a07 |
From ff7e299914a2dfa1c3fd7abd267535f7586ac413 Mon Sep 17 00:00:00 2001
|
|
|
3c8a07 |
From: Wangyang Guo <wangyang.guo@intel.com>
|
|
|
3c8a07 |
Date: Mon, 15 Nov 2021 06:12:16 +0000
|
|
|
3c8a07 |
Subject: [PATCH 1/2] Avoid short distance rep movsb
|
|
|
3c8a07 |
|
|
|
3c8a07 |
---
|
|
|
3c8a07 |
.../multiarch/memmove-vec-unaligned-erms.S | 22 +++++++++++++++++++
|
|
|
3c8a07 |
1 file changed, 22 insertions(+)
|
|
|
3c8a07 |
|
|
|
3c8a07 |
diff --git a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
|
|
|
3c8a07 |
index c952576c..07299dc0 100644
|
|
|
3c8a07 |
--- a/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
|
|
|
3c8a07 |
+++ b/sysdeps/x86_64/multiarch/memmove-vec-unaligned-erms.S
|
|
|
3c8a07 |
@@ -56,6 +56,14 @@
|
|
|
3c8a07 |
# endif
|
|
|
3c8a07 |
#endif
|
|
|
3c8a07 |
|
|
|
3c8a07 |
+/* Avoid short distance rep movsb only with non-SSE vector. */
|
|
|
3c8a07 |
+#ifndef AVOID_SHORT_DISTANCE_REP_MOVSB
|
|
|
3c8a07 |
+# define AVOID_SHORT_DISTANCE_REP_MOVSB 1
|
|
|
3c8a07 |
+#else
|
|
|
3c8a07 |
+# define AVOID_SHORT_DISTANCE_REP_MOVSB 0
|
|
|
3c8a07 |
+#endif
|
|
|
3c8a07 |
+
|
|
|
3c8a07 |
+
|
|
|
3c8a07 |
#ifndef PREFETCH
|
|
|
3c8a07 |
# define PREFETCH(addr) prefetcht0 addr
|
|
|
3c8a07 |
#endif
|
|
|
3c8a07 |
@@ -235,7 +243,21 @@ L(movsb):
|
|
|
3c8a07 |
cmpq %r9, %rdi
|
|
|
3c8a07 |
/* Avoid slow backward REP MOVSB. */
|
|
|
3c8a07 |
jb L(more_8x_vec_backward)
|
|
|
3c8a07 |
+# if AVOID_SHORT_DISTANCE_REP_MOVSB
|
|
|
3c8a07 |
+ movq %rdi, %rcx
|
|
|
3c8a07 |
+ subq %rsi, %rcx
|
|
|
3c8a07 |
+ jmp 2f
|
|
|
3c8a07 |
+# endif
|
|
|
3c8a07 |
1:
|
|
|
3c8a07 |
+# if AVOID_SHORT_DISTANCE_REP_MOVSB
|
|
|
3c8a07 |
+ movq %rsi, %rcx
|
|
|
3c8a07 |
+ subq %rdi, %rcx
|
|
|
3c8a07 |
+2:
|
|
|
3c8a07 |
+/* Avoid "rep movsb" if RCX, the distance between source and destination,
|
|
|
3c8a07 |
+ is N*4GB + [1..63] with N >= 0. */
|
|
|
3c8a07 |
+ cmpl $63, %ecx
|
|
|
3c8a07 |
+ jbe L(more_2x_vec) /* Avoid "rep movsb" if ECX <= 63. */
|
|
|
3c8a07 |
+# endif
|
|
|
3c8a07 |
movq %rdx, %rcx
|
|
|
3c8a07 |
rep movsb
|
|
|
3c8a07 |
L(nop):
|
|
|
3c8a07 |
--
|
|
|
3c8a07 |
2.27.0
|
|
|
3c8a07 |
|