Blame SOURCES/glibc-rh1871395-1.patch
|
|
e354a5 |
commit 0792c8ae1aebf538de45ff9a0e2e401a60525de2
|
|
|
e354a5 |
Author: Stefan Liebler <stli@linux.ibm.com>
|
|
|
e354a5 |
Date: Fri Jun 26 09:45:11 2020 +0200
|
|
|
e354a5 |
|
|
|
e354a5 |
S390: Optimize __memcpy_z196.
|
|
|
e354a5 |
|
|
|
e354a5 |
This patch introduces an extra loop without pfd instructions
|
|
|
e354a5 |
as it turned out that the pfd instructions are usefull
|
|
|
e354a5 |
for copies >=64KB but are counterproductive for smaller copies.
|
|
|
e354a5 |
|
|
|
e354a5 |
diff --git a/sysdeps/s390/memcpy-z900.S b/sysdeps/s390/memcpy-z900.S
|
|
|
e354a5 |
index f2e9aaeb2d..dc2f491ec3 100644
|
|
|
e354a5 |
--- a/sysdeps/s390/memcpy-z900.S
|
|
|
e354a5 |
+++ b/sysdeps/s390/memcpy-z900.S
|
|
|
e354a5 |
@@ -184,25 +184,34 @@ ENTRY(MEMCPY_Z196)
|
|
|
e354a5 |
je .L_Z196_4
|
|
|
e354a5 |
.L_Z196_start2:
|
|
|
e354a5 |
aghi %r4,-1
|
|
|
e354a5 |
- srlg %r5,%r4,8
|
|
|
e354a5 |
- ltgr %r5,%r5
|
|
|
e354a5 |
+ risbg %r5,%r4,8,128+63,56 # r0 = r5 / 256
|
|
|
e354a5 |
jne .L_Z196_5
|
|
|
e354a5 |
.L_Z196_3:
|
|
|
e354a5 |
exrl %r4,.L_Z196_14
|
|
|
e354a5 |
.L_Z196_4:
|
|
|
e354a5 |
br %r14
|
|
|
e354a5 |
.L_Z196_5:
|
|
|
e354a5 |
- cgfi %r5,262144 # Switch to mvcle for copies >64MB
|
|
|
e354a5 |
- jh __memcpy_mvcle
|
|
|
e354a5 |
+ cgfi %r5,255 # Switch to loop with pfd for copies >=64kB
|
|
|
e354a5 |
+ jh .L_Z196_6
|
|
|
e354a5 |
.L_Z196_2:
|
|
|
e354a5 |
- pfd 1,768(%r3)
|
|
|
e354a5 |
- pfd 2,768(%r1)
|
|
|
e354a5 |
mvc 0(256,%r1),0(%r3)
|
|
|
e354a5 |
aghi %r5,-1
|
|
|
e354a5 |
la %r1,256(%r1)
|
|
|
e354a5 |
la %r3,256(%r3)
|
|
|
e354a5 |
jne .L_Z196_2
|
|
|
e354a5 |
j .L_Z196_3
|
|
|
e354a5 |
+.L_Z196_6:
|
|
|
e354a5 |
+ cgfi %r5,262144 # Switch to mvcle for copies >64MB
|
|
|
e354a5 |
+ jh __memcpy_mvcle
|
|
|
e354a5 |
+.L_Z196_7:
|
|
|
e354a5 |
+ pfd 1,1024(%r3)
|
|
|
e354a5 |
+ pfd 2,1024(%r1)
|
|
|
e354a5 |
+ mvc 0(256,%r1),0(%r3)
|
|
|
e354a5 |
+ aghi %r5,-1
|
|
|
e354a5 |
+ la %r1,256(%r1)
|
|
|
e354a5 |
+ la %r3,256(%r3)
|
|
|
e354a5 |
+ jne .L_Z196_7
|
|
|
e354a5 |
+ j .L_Z196_3
|
|
|
e354a5 |
.L_Z196_14:
|
|
|
e354a5 |
mvc 0(1,%r1),0(%r3)
|
|
|
e354a5 |
END(MEMCPY_Z196)
|