Blame SOURCES/glibc-rh1498925-2.patch
|
|
c65238 |
The memmove related fix is dropped in this patch because rhel-7.5
|
|
|
c65238 |
does not have optimized memmove for POWER7.
|
|
|
c65238 |
|
|
|
c65238 |
commit 63da5cd4a097d089033d980c42254c3356fa723f
|
|
|
c65238 |
Author: Rajalakshmi Srinivasaraghavan <raji@linux.vnet.ibm.com>
|
|
|
c65238 |
Date: Wed Oct 25 13:13:53 2017 -0200
|
|
|
c65238 |
|
|
|
c65238 |
powerpc: Replace lxvd2x/stxvd2x with lvx/stvx in P7's memcpy/memmove
|
|
|
c65238 |
|
|
|
c65238 |
POWER9 DD2.1 and earlier has an issue where some cache inhibited
|
|
|
c65238 |
vector load traps to the kernel, causing a performance degradation. To
|
|
|
c65238 |
handle this in memcpy and memmove, lvx/stvx is used for aligned
|
|
|
c65238 |
addresses instead of lxvd2x/stxvd2x.
|
|
|
c65238 |
|
|
|
c65238 |
Reference: https://patchwork.ozlabs.org/patch/814059/
|
|
|
c65238 |
|
|
|
c65238 |
* sysdeps/powerpc/powerpc64/power7/memcpy.S: Replace
|
|
|
c65238 |
lxvd2x/stxvd2x with lvx/stvx.
|
|
|
c65238 |
* sysdeps/powerpc/powerpc64/power7/memmove.S: Likewise.
|
|
|
c65238 |
|
|
|
c65238 |
Reviewed-by: Tulio Magno Quites Machado Filho <tuliom@linux.vnet.ibm.com>
|
|
|
c65238 |
Reviewed-by: Adhemerval Zanella <adhemerval.zanella@linaro.org>
|
|
|
c65238 |
|
|
|
c65238 |
diff --git a/sysdeps/powerpc/powerpc64/power7/memcpy.S b/sysdeps/powerpc/powerpc64/power7/memcpy.S
|
|
|
c65238 |
index 1ccbc2e..a7cdf8b 100644
|
|
|
c65238 |
--- a/sysdeps/powerpc/powerpc64/power7/memcpy.S
|
|
|
c65238 |
+++ b/sysdeps/powerpc/powerpc64/power7/memcpy.S
|
|
|
c65238 |
@@ -91,63 +91,63 @@ L(aligned_copy):
|
|
|
c65238 |
srdi 12,cnt,7
|
|
|
c65238 |
cmpdi 12,0
|
|
|
c65238 |
beq L(aligned_tail)
|
|
|
c65238 |
- lxvd2x 6,0,src
|
|
|
c65238 |
- lxvd2x 7,src,6
|
|
|
c65238 |
+ lvx 6,0,src
|
|
|
c65238 |
+ lvx 7,src,6
|
|
|
c65238 |
mtctr 12
|
|
|
c65238 |
b L(aligned_128loop)
|
|
|
c65238 |
|
|
|
c65238 |
.align 4
|
|
|
c65238 |
L(aligned_128head):
|
|
|
c65238 |
/* for the 2nd + iteration of this loop. */
|
|
|
c65238 |
- lxvd2x 6,0,src
|
|
|
c65238 |
- lxvd2x 7,src,6
|
|
|
c65238 |
+ lvx 6,0,src
|
|
|
c65238 |
+ lvx 7,src,6
|
|
|
c65238 |
L(aligned_128loop):
|
|
|
c65238 |
- lxvd2x 8,src,7
|
|
|
c65238 |
- lxvd2x 9,src,8
|
|
|
c65238 |
- stxvd2x 6,0,dst
|
|
|
c65238 |
+ lvx 8,src,7
|
|
|
c65238 |
+ lvx 9,src,8
|
|
|
c65238 |
+ stvx 6,0,dst
|
|
|
c65238 |
addi src,src,64
|
|
|
c65238 |
- stxvd2x 7,dst,6
|
|
|
c65238 |
- stxvd2x 8,dst,7
|
|
|
c65238 |
- stxvd2x 9,dst,8
|
|
|
c65238 |
- lxvd2x 6,0,src
|
|
|
c65238 |
- lxvd2x 7,src,6
|
|
|
c65238 |
+ stvx 7,dst,6
|
|
|
c65238 |
+ stvx 8,dst,7
|
|
|
c65238 |
+ stvx 9,dst,8
|
|
|
c65238 |
+ lvx 6,0,src
|
|
|
c65238 |
+ lvx 7,src,6
|
|
|
c65238 |
addi dst,dst,64
|
|
|
c65238 |
- lxvd2x 8,src,7
|
|
|
c65238 |
- lxvd2x 9,src,8
|
|
|
c65238 |
+ lvx 8,src,7
|
|
|
c65238 |
+ lvx 9,src,8
|
|
|
c65238 |
addi src,src,64
|
|
|
c65238 |
- stxvd2x 6,0,dst
|
|
|
c65238 |
- stxvd2x 7,dst,6
|
|
|
c65238 |
- stxvd2x 8,dst,7
|
|
|
c65238 |
- stxvd2x 9,dst,8
|
|
|
c65238 |
+ stvx 6,0,dst
|
|
|
c65238 |
+ stvx 7,dst,6
|
|
|
c65238 |
+ stvx 8,dst,7
|
|
|
c65238 |
+ stvx 9,dst,8
|
|
|
c65238 |
addi dst,dst,64
|
|
|
c65238 |
bdnz L(aligned_128head)
|
|
|
c65238 |
|
|
|
c65238 |
L(aligned_tail):
|
|
|
c65238 |
mtocrf 0x01,cnt
|
|
|
c65238 |
bf 25,32f
|
|
|
c65238 |
- lxvd2x 6,0,src
|
|
|
c65238 |
- lxvd2x 7,src,6
|
|
|
c65238 |
- lxvd2x 8,src,7
|
|
|
c65238 |
- lxvd2x 9,src,8
|
|
|
c65238 |
+ lvx 6,0,src
|
|
|
c65238 |
+ lvx 7,src,6
|
|
|
c65238 |
+ lvx 8,src,7
|
|
|
c65238 |
+ lvx 9,src,8
|
|
|
c65238 |
addi src,src,64
|
|
|
c65238 |
- stxvd2x 6,0,dst
|
|
|
c65238 |
- stxvd2x 7,dst,6
|
|
|
c65238 |
- stxvd2x 8,dst,7
|
|
|
c65238 |
- stxvd2x 9,dst,8
|
|
|
c65238 |
+ stvx 6,0,dst
|
|
|
c65238 |
+ stvx 7,dst,6
|
|
|
c65238 |
+ stvx 8,dst,7
|
|
|
c65238 |
+ stvx 9,dst,8
|
|
|
c65238 |
addi dst,dst,64
|
|
|
c65238 |
32:
|
|
|
c65238 |
bf 26,16f
|
|
|
c65238 |
- lxvd2x 6,0,src
|
|
|
c65238 |
- lxvd2x 7,src,6
|
|
|
c65238 |
+ lvx 6,0,src
|
|
|
c65238 |
+ lvx 7,src,6
|
|
|
c65238 |
addi src,src,32
|
|
|
c65238 |
- stxvd2x 6,0,dst
|
|
|
c65238 |
- stxvd2x 7,dst,6
|
|
|
c65238 |
+ stvx 6,0,dst
|
|
|
c65238 |
+ stvx 7,dst,6
|
|
|
c65238 |
addi dst,dst,32
|
|
|
c65238 |
16:
|
|
|
c65238 |
bf 27,8f
|
|
|
c65238 |
- lxvd2x 6,0,src
|
|
|
c65238 |
+ lvx 6,0,src
|
|
|
c65238 |
addi src,src,16
|
|
|
c65238 |
- stxvd2x 6,0,dst
|
|
|
c65238 |
+ stvx 6,0,dst
|
|
|
c65238 |
addi dst,dst,16
|
|
|
c65238 |
8:
|
|
|
c65238 |
bf 28,4f
|