Blame SOURCES/glibc-rh1871395-2.patch
|
|
e354a5 |
commit 1d21fb1061cbeb50414a8f371abb36548d90f150
|
|
|
e354a5 |
Author: Stefan Liebler <stli@linux.ibm.com>
|
|
|
e354a5 |
Date: Fri Jun 26 09:45:11 2020 +0200
|
|
|
e354a5 |
|
|
|
e354a5 |
S390: Optimize __memset_z196.
|
|
|
e354a5 |
|
|
|
e354a5 |
It turned out that an 256b-mvc instruction which depends on the
|
|
|
e354a5 |
result of a previous 256b-mvc instruction is counterproductive.
|
|
|
e354a5 |
Therefore this patch adjusts the 256b-loop by storing the
|
|
|
e354a5 |
first byte with stc and setting the remaining 255b with mvc.
|
|
|
e354a5 |
Now the 255b-mvc instruction depends on the stc instruction.
|
|
|
e354a5 |
|
|
|
e354a5 |
diff --git a/sysdeps/s390/memset-z900.S b/sysdeps/s390/memset-z900.S
|
|
|
e354a5 |
index ca3eac0522..1e0c334156 100644
|
|
|
e354a5 |
--- a/sysdeps/s390/memset-z900.S
|
|
|
e354a5 |
+++ b/sysdeps/s390/memset-z900.S
|
|
|
e354a5 |
@@ -157,28 +157,27 @@ ENTRY(MEMSET_Z196)
|
|
|
e354a5 |
# if !defined __s390x__
|
|
|
e354a5 |
llgfr %r4,%r4
|
|
|
e354a5 |
# endif /* !defined __s390x__ */
|
|
|
e354a5 |
- ltgr %r4,%r4
|
|
|
e354a5 |
- je .L_Z196_4
|
|
|
e354a5 |
+ clgfi %r4,1
|
|
|
e354a5 |
+ jl .L_Z196_4 # n == 0
|
|
|
e354a5 |
stc %r3,0(%r2)
|
|
|
e354a5 |
+ je .L_Z196_4 # n == 1
|
|
|
e354a5 |
+ aghi %r4,-2
|
|
|
e354a5 |
lgr %r1,%r2
|
|
|
e354a5 |
- cghi %r4,1
|
|
|
e354a5 |
- je .L_Z196_4
|
|
|
e354a5 |
- aghi %r4,-2
|
|
|
e354a5 |
- srlg %r5,%r4,8
|
|
|
e354a5 |
- ltgr %r5,%r5
|
|
|
e354a5 |
- jne .L_Z196_1
|
|
|
e354a5 |
+ risbg %r5,%r4,8,128+63,56 # r5 = n / 256
|
|
|
e354a5 |
+ jne .L_Z196_1 # Jump away if r5 != 0
|
|
|
e354a5 |
.L_Z196_3:
|
|
|
e354a5 |
exrl %r4,.L_Z196_17
|
|
|
e354a5 |
.L_Z196_4:
|
|
|
e354a5 |
br %r14
|
|
|
e354a5 |
.L_Z196_1:
|
|
|
e354a5 |
cgfi %r5,1048576
|
|
|
e354a5 |
- jh __memset_mvcle # Switch to mvcle for >256MB
|
|
|
e354a5 |
+ jh __memset_mvcle # Switch to mvcle for >256MB
|
|
|
e354a5 |
.L_Z196_2:
|
|
|
e354a5 |
pfd 2,1024(%r1)
|
|
|
e354a5 |
- mvc 1(256,%r1),0(%r1)
|
|
|
e354a5 |
+ mvc 1(255,%r1),0(%r1)
|
|
|
e354a5 |
aghi %r5,-1
|
|
|
e354a5 |
la %r1,256(%r1)
|
|
|
e354a5 |
+ stc %r3,0(%r1)
|
|
|
e354a5 |
jne .L_Z196_2
|
|
|
e354a5 |
j .L_Z196_3
|
|
|
e354a5 |
.L_Z196_17:
|