|
|
4aabfd |
From 519786ec38bb5123547071021e4bd7678d7673ad Mon Sep 17 00:00:00 2001
|
|
|
4aabfd |
From: Andreas Krebbel <krebbel@linux.ibm.com>
|
|
|
4aabfd |
Date: Mon, 23 Mar 2020 09:08:27 +0100
|
|
|
4aabfd |
Subject: [PATCH 3/4] Optimize s390x instructions
|
|
|
4aabfd |
|
|
|
4aabfd |
---
|
|
|
4aabfd |
src/asm/jump_s390x_sysv_elf_gas.S | 84 ++++++++++--------------------
|
|
|
4aabfd |
src/asm/make_s390x_sysv_elf_gas.S | 27 ++++++----
|
|
|
4aabfd |
src/asm/ontop_s390x_sysv_elf_gas.S | 81 +++++++++-------------------
|
|
|
4aabfd |
3 files changed, 70 insertions(+), 122 deletions(-)
|
|
|
4aabfd |
|
|
|
4aabfd |
diff --git a/src/asm/jump_s390x_sysv_elf_gas.S b/src/asm/jump_s390x_sysv_elf_gas.S
|
|
|
4aabfd |
index c011d53..b2163cc 100644
|
|
|
4aabfd |
--- a/libs/context/src/asm/jump_s390x_sysv_elf_gas.S
|
|
|
4aabfd |
+++ b/libs/context/src/asm/jump_s390x_sysv_elf_gas.S
|
|
|
4aabfd |
@@ -36,47 +36,34 @@
|
|
|
4aabfd |
.global jump_fcontext
|
|
|
4aabfd |
.type jump_fcontext, @function
|
|
|
4aabfd |
|
|
|
4aabfd |
+#define GR_OFFSET 0
|
|
|
4aabfd |
+#define LR_OFFSET 64
|
|
|
4aabfd |
+#define SP_OFFSET 72
|
|
|
4aabfd |
+#define FP_OFFSET 80
|
|
|
4aabfd |
+#define PC_OFFSET 112
|
|
|
4aabfd |
+#define L_CTX 120
|
|
|
4aabfd |
+#define L_STACKFRAME 120
|
|
|
4aabfd |
+
|
|
|
4aabfd |
jump_fcontext:
|
|
|
4aabfd |
|
|
|
4aabfd |
# Reserved the space for stack to store the data of current context
|
|
|
4aabfd |
# before we jump to the new context.
|
|
|
4aabfd |
- lay 15,-120(15)
|
|
|
4aabfd |
+ aghi %r15,-L_STACKFRAME
|
|
|
4aabfd |
|
|
|
4aabfd |
# save the registers to the stack
|
|
|
4aabfd |
- stg 6, 0(15) # save R6
|
|
|
4aabfd |
- stg 7, 8(15) # save R7
|
|
|
4aabfd |
- stg 8, 16(15) # save R8
|
|
|
4aabfd |
- stg 9, 24(15) # save R9
|
|
|
4aabfd |
- stg 10, 32(15) # save R10
|
|
|
4aabfd |
- stg 11, 40(15) # save R11
|
|
|
4aabfd |
- stg 12, 48(15) # save R12
|
|
|
4aabfd |
- stg 13, 56(15) # save R13
|
|
|
4aabfd |
- stg 14, 64(15) # save R14
|
|
|
4aabfd |
- stg 15, 72(15) # save R15
|
|
|
4aabfd |
+ stmg %r6, %r15, GR_OFFSET(%r15)
|
|
|
4aabfd |
|
|
|
4aabfd |
# save the floating point registers
|
|
|
4aabfd |
- # Load the FPR into R0 then save it to the stack
|
|
|
4aabfd |
- # Load F1 into R0
|
|
|
4aabfd |
- lgdr 0,1
|
|
|
4aabfd |
- stg 0,80(15) # save F1
|
|
|
4aabfd |
-
|
|
|
4aabfd |
- # Load F3 into R0
|
|
|
4aabfd |
- lgdr 0,3
|
|
|
4aabfd |
- stg 0,88(15) # save F3
|
|
|
4aabfd |
-
|
|
|
4aabfd |
- # Load F5 into R0
|
|
|
4aabfd |
- lgdr 0,5
|
|
|
4aabfd |
- stg 0,96(15) # save F5
|
|
|
4aabfd |
-
|
|
|
4aabfd |
- # Load F7 into R0
|
|
|
4aabfd |
- lgdr 0,7
|
|
|
4aabfd |
- stg 0,104(15) # save F7
|
|
|
4aabfd |
+ std %f0,FP_OFFSET(%r15)
|
|
|
4aabfd |
+ std %f3,FP_OFFSET+8(%r15)
|
|
|
4aabfd |
+ std %f5,FP_OFFSET+16(%r15)
|
|
|
4aabfd |
+ std %f7,FP_OFFSET+24(%r15)
|
|
|
4aabfd |
|
|
|
4aabfd |
# Save LR as PC
|
|
|
4aabfd |
- stg 14,112(15)
|
|
|
4aabfd |
+ stg %r14,PC_OFFSET(%r15)
|
|
|
4aabfd |
|
|
|
4aabfd |
# Store the SP pointing to the old context-data into R0
|
|
|
4aabfd |
- lgr 0,15
|
|
|
4aabfd |
+ lgr %r0,%r15
|
|
|
4aabfd |
|
|
|
4aabfd |
# Get the SP pointing to the new context-data
|
|
|
4aabfd |
# Note: Since the return type of the jump_fcontext is struct whose
|
|
|
4aabfd |
@@ -88,46 +75,31 @@ jump_fcontext:
|
|
|
4aabfd |
# R2 --> Address of the return transfer_t struct
|
|
|
4aabfd |
# R3 --> Context we want to switch to
|
|
|
4aabfd |
# R4 --> Data
|
|
|
4aabfd |
- lgr 15,3
|
|
|
4aabfd |
+ lgr %r15,%r3
|
|
|
4aabfd |
|
|
|
4aabfd |
# Load the registers with the data present in context-data of the
|
|
|
4aabfd |
# context we are going to switch to
|
|
|
4aabfd |
- lg 6, 0(15) # restore R6
|
|
|
4aabfd |
- lg 7, 8(15) # restore R7
|
|
|
4aabfd |
- lg 8, 16(15) # restore R8
|
|
|
4aabfd |
- lg 9, 24(15) # restore R9
|
|
|
4aabfd |
- lg 10, 32(15) # restore R10
|
|
|
4aabfd |
- lg 11, 40(15) # restore R11
|
|
|
4aabfd |
- lg 12, 48(15) # restore R12
|
|
|
4aabfd |
- lg 13, 56(15) # restore R13
|
|
|
4aabfd |
- lg 14, 64(15) # restore R14
|
|
|
4aabfd |
+ lmg %r6, %r14, GR_OFFSET(%r15)
|
|
|
4aabfd |
|
|
|
4aabfd |
# Restore Floating point registers
|
|
|
4aabfd |
- lg 1,80(15)
|
|
|
4aabfd |
- ldgr 1,1 # restore F1
|
|
|
4aabfd |
-
|
|
|
4aabfd |
- lg 1,88(15)
|
|
|
4aabfd |
- ldgr 1,3 # restore F3
|
|
|
4aabfd |
-
|
|
|
4aabfd |
- lg 1,96(15)
|
|
|
4aabfd |
- ldgr 1,5 # restore F5
|
|
|
4aabfd |
-
|
|
|
4aabfd |
- lg 1,104(15)
|
|
|
4aabfd |
- ldgr 1,7 # restore F7
|
|
|
4aabfd |
+ ld %f1,FP_OFFSET(%r15)
|
|
|
4aabfd |
+ ld %f3,FP_OFFSET+8(%r15)
|
|
|
4aabfd |
+ ld %f5,FP_OFFSET+16(%r15)
|
|
|
4aabfd |
+ ld %f7,FP_OFFSET+24(%r15)
|
|
|
4aabfd |
|
|
|
4aabfd |
# Load PC
|
|
|
4aabfd |
- lg 1,112(15)
|
|
|
4aabfd |
+ lg %r1,PC_OFFSET(%r15)
|
|
|
4aabfd |
|
|
|
4aabfd |
- # Adjust the stack
|
|
|
4aabfd |
- lay 15, 120(15)
|
|
|
4aabfd |
+ # Adjust the stack
|
|
|
4aabfd |
+ aghi %r15,120
|
|
|
4aabfd |
|
|
|
4aabfd |
# R2 --> Address where the return transfer_t is stored
|
|
|
4aabfd |
# R0 --> FCTX
|
|
|
4aabfd |
# R4 --> DATA
|
|
|
4aabfd |
|
|
|
4aabfd |
# Store the elements to return transfer_t
|
|
|
4aabfd |
- stg 15, 0(2)
|
|
|
4aabfd |
- stg 4, 8(2)
|
|
|
4aabfd |
+ stg %r15, 0(%r2)
|
|
|
4aabfd |
+ stg %r4, 8(%r2)
|
|
|
4aabfd |
|
|
|
4aabfd |
# Note: The address in R2 points to the place where the return
|
|
|
4aabfd |
# transfer_t is stored. Since context_function take transfer_t
|
|
|
4aabfd |
@@ -135,7 +107,7 @@ jump_fcontext:
|
|
|
4aabfd |
# first parameter value.
|
|
|
4aabfd |
|
|
|
4aabfd |
#jump to context
|
|
|
4aabfd |
- br 1
|
|
|
4aabfd |
+ br %r1
|
|
|
4aabfd |
|
|
|
4aabfd |
.size jump_fcontext,.-jump_fcontext
|
|
|
4aabfd |
# Mark that we don't need executable stack.
|
|
|
4aabfd |
diff --git a/src/asm/make_s390x_sysv_elf_gas.S b/src/asm/make_s390x_sysv_elf_gas.S
|
|
|
4aabfd |
index f566533..d02856c 100644
|
|
|
4aabfd |
--- a/libs/context/src/asm/make_s390x_sysv_elf_gas.S
|
|
|
4aabfd |
+++ b/libs/context/src/asm/make_s390x_sysv_elf_gas.S
|
|
|
4aabfd |
@@ -36,6 +36,14 @@
|
|
|
4aabfd |
.global make_fcontext
|
|
|
4aabfd |
.type make_fcontext, @function
|
|
|
4aabfd |
|
|
|
4aabfd |
+#define GR_OFFSET 0
|
|
|
4aabfd |
+#define LR_OFFSET 64
|
|
|
4aabfd |
+#define SP_OFFSET 72
|
|
|
4aabfd |
+#define FP_OFFSET 80
|
|
|
4aabfd |
+#define PC_OFFSET 112
|
|
|
4aabfd |
+#define L_CTX 120
|
|
|
4aabfd |
+#define L_STACKFRAME 120
|
|
|
4aabfd |
+
|
|
|
4aabfd |
make_fcontext:
|
|
|
4aabfd |
|
|
|
4aabfd |
# make_fcontext takes in 3 arguments
|
|
|
4aabfd |
@@ -56,40 +64,39 @@ make_fcontext:
|
|
|
4aabfd |
# address is zero or not. If not AND it with `-8`.
|
|
|
4aabfd |
|
|
|
4aabfd |
# Here we AND the lower 16 bits of the memory address present in the
|
|
|
4aabfd |
- # R2 with the bits 1111 1111 1111 1000 which when converted into
|
|
|
4aabfd |
- # decimal is 65528
|
|
|
4aabfd |
- nill 2,65528
|
|
|
4aabfd |
+ # R2 with the bits 1111 1111 1111 1000
|
|
|
4aabfd |
+ nill %r2,0xfff0
|
|
|
4aabfd |
|
|
|
4aabfd |
# Reserve space for context-data on context-stack.
|
|
|
4aabfd |
# This is done by shifting the SP/address by 112 bytes.
|
|
|
4aabfd |
- lay 2,-120(2)
|
|
|
4aabfd |
+ aghi %r2,-L_CTX
|
|
|
4aabfd |
|
|
|
4aabfd |
# third arg of make_fcontext() == address of the context-function
|
|
|
4aabfd |
# Store the address as a PC to jump in, whenever we call the
|
|
|
4aabfd |
# make_fcontext.
|
|
|
4aabfd |
- stg 4,112(2)
|
|
|
4aabfd |
+ stg %r4,PC_OFFSET(%r2)
|
|
|
4aabfd |
|
|
|
4aabfd |
# Save the address of finish as return-address for context-function
|
|
|
4aabfd |
# This will be entered after context-function return
|
|
|
4aabfd |
# The address of finish will be saved in Link register, this register
|
|
|
4aabfd |
# specifies where we need to jump after the function executes
|
|
|
4aabfd |
# completely.
|
|
|
4aabfd |
- larl 1,finish
|
|
|
4aabfd |
- stg 1,64(2)
|
|
|
4aabfd |
+ larl %r1,finish
|
|
|
4aabfd |
+ stg %r1,LR_OFFSET(%r2)
|
|
|
4aabfd |
|
|
|
4aabfd |
# Return pointer to context data
|
|
|
4aabfd |
# R14 acts as the link register
|
|
|
4aabfd |
# R2 holds the address of the context stack. When we return from the
|
|
|
4aabfd |
# make_fcontext, R2 is passed back.
|
|
|
4aabfd |
- br 14
|
|
|
4aabfd |
+ br %r14
|
|
|
4aabfd |
|
|
|
4aabfd |
finish:
|
|
|
4aabfd |
|
|
|
4aabfd |
# In finish tasks, you load the exit code and exit the make_fcontext
|
|
|
4aabfd |
# This is called when the context-function is entirely executed
|
|
|
4aabfd |
|
|
|
4aabfd |
- lghi 2,0
|
|
|
4aabfd |
- brasl 14,_exit
|
|
|
4aabfd |
+ lghi %r2,0
|
|
|
4aabfd |
+ brasl %r14,_exit@PLT
|
|
|
4aabfd |
|
|
|
4aabfd |
.size make_fcontext,.-make_fcontext
|
|
|
4aabfd |
# Mark that we don't need executable stack.
|
|
|
4aabfd |
diff --git a/src/asm/ontop_s390x_sysv_elf_gas.S b/src/asm/ontop_s390x_sysv_elf_gas.S
|
|
|
4aabfd |
index 7ab2cf5..4488654 100644
|
|
|
4aabfd |
--- a/libs/context/src/asm/ontop_s390x_sysv_elf_gas.S
|
|
|
4aabfd |
+++ b/libs/context/src/asm/ontop_s390x_sysv_elf_gas.S
|
|
|
4aabfd |
@@ -36,47 +36,32 @@
|
|
|
4aabfd |
.global ontop_fcontext
|
|
|
4aabfd |
.type ontop_fcontext, @function
|
|
|
4aabfd |
|
|
|
4aabfd |
+#define GR_OFFSET 0
|
|
|
4aabfd |
+#define LR_OFFSET 64
|
|
|
4aabfd |
+#define SP_OFFSET 72
|
|
|
4aabfd |
+#define FP_OFFSET 80
|
|
|
4aabfd |
+#define PC_OFFSET 112
|
|
|
4aabfd |
+#define L_CTX 120
|
|
|
4aabfd |
+
|
|
|
4aabfd |
ontop_fcontext:
|
|
|
4aabfd |
|
|
|
4aabfd |
# Reserved the space for stack to store the data of current context
|
|
|
4aabfd |
# before we jump to the new context.
|
|
|
4aabfd |
- lay 15,-120(15)
|
|
|
4aabfd |
+ aghi %r15,-L_CTX
|
|
|
4aabfd |
|
|
|
4aabfd |
# save the registers to the stack
|
|
|
4aabfd |
- stg 6, 0(15) # save R6
|
|
|
4aabfd |
- stg 7, 8(15) # save R7
|
|
|
4aabfd |
- stg 8, 16(15) # save R8
|
|
|
4aabfd |
- stg 9, 24(15) # save R9
|
|
|
4aabfd |
- stg 10, 32(15) # save R10
|
|
|
4aabfd |
- stg 11, 40(15) # save R11
|
|
|
4aabfd |
- stg 12, 48(15) # save R12
|
|
|
4aabfd |
- stg 13, 56(15) # save R13
|
|
|
4aabfd |
- stg 14, 64(15) # save R14
|
|
|
4aabfd |
- stg 15, 72(15) # save R15
|
|
|
4aabfd |
+ stmg %r6, %r15, GR_OFFSET(%r15)
|
|
|
4aabfd |
|
|
|
4aabfd |
# save the floating point registers
|
|
|
4aabfd |
- # Load the FPR into R0 then save it to the stack
|
|
|
4aabfd |
- # Load F1 into R0
|
|
|
4aabfd |
- lgdr 0,1
|
|
|
4aabfd |
- stg 0,80(15) # save F1
|
|
|
4aabfd |
-
|
|
|
4aabfd |
- # Load F3 into R0
|
|
|
4aabfd |
- lgdr 0,3
|
|
|
4aabfd |
- stg 0,88(15) # save F3
|
|
|
4aabfd |
-
|
|
|
4aabfd |
- # Load F5 into R0
|
|
|
4aabfd |
- lgdr 0,5
|
|
|
4aabfd |
- stg 0,96(15) # save F5
|
|
|
4aabfd |
-
|
|
|
4aabfd |
- # Load F7 into R0
|
|
|
4aabfd |
- lgdr 0,7
|
|
|
4aabfd |
- stg 0,104(15) # save F7
|
|
|
4aabfd |
-
|
|
|
4aabfd |
+ std %f0,FP_OFFSET(%r15)
|
|
|
4aabfd |
+ std %f3,FP_OFFSET+8(%r15)
|
|
|
4aabfd |
+ std %f5,FP_OFFSET+16(%r15)
|
|
|
4aabfd |
+ std %f7,FP_OFFSET+24(%r15)
|
|
|
4aabfd |
# Save LR as PC
|
|
|
4aabfd |
- stg 14,112(15)
|
|
|
4aabfd |
+ stg %r14,PC_OFFSET(%r15)
|
|
|
4aabfd |
|
|
|
4aabfd |
# Store the SP pointing to the old context-data into R0
|
|
|
4aabfd |
- lgr 0,15
|
|
|
4aabfd |
+ lgr %r0,%r15
|
|
|
4aabfd |
|
|
|
4aabfd |
# Get the SP pointing to the new context-data
|
|
|
4aabfd |
# Note: Since the return type of the jump_fcontext is struct whose
|
|
|
4aabfd |
@@ -88,38 +73,22 @@ ontop_fcontext:
|
|
|
4aabfd |
# R2 --> Address of the return transfer_t struct
|
|
|
4aabfd |
# R3 --> Context we want to switch to
|
|
|
4aabfd |
# R4 --> Data
|
|
|
4aabfd |
- lgr 15,3
|
|
|
4aabfd |
+ lgr %r15,%r3
|
|
|
4aabfd |
|
|
|
4aabfd |
# Load the registers with the data present in context-data of the
|
|
|
4aabfd |
# context we are going to switch to
|
|
|
4aabfd |
- lg 6, 0(15) # restore R6
|
|
|
4aabfd |
- lg 7, 8(15) # restore R7
|
|
|
4aabfd |
- lg 8, 16(15) # restore R8
|
|
|
4aabfd |
- lg 9, 24(15) # restore R9
|
|
|
4aabfd |
- lg 10, 32(15) # restore R10
|
|
|
4aabfd |
- lg 11, 40(15) # restore R11
|
|
|
4aabfd |
- lg 12, 48(15) # restore R12
|
|
|
4aabfd |
- lg 13, 56(15) # restore R13
|
|
|
4aabfd |
- lg 14, 64(15) # restore R14
|
|
|
4aabfd |
- lg 15, 72(15) # restore R15
|
|
|
4aabfd |
+ lmg %r6,%r15,GR_OFFSET(%r15)
|
|
|
4aabfd |
|
|
|
4aabfd |
# Restore Floating point registers
|
|
|
4aabfd |
- lg 1,80(15)
|
|
|
4aabfd |
- ldgr 1,1 # restore F1
|
|
|
4aabfd |
-
|
|
|
4aabfd |
- lg 1,88(15)
|
|
|
4aabfd |
- ldgr 1,3 # restore F3
|
|
|
4aabfd |
-
|
|
|
4aabfd |
- lg 1,96(15)
|
|
|
4aabfd |
- ldgr 1,5 # restore F5
|
|
|
4aabfd |
-
|
|
|
4aabfd |
- lg 1,104(15)
|
|
|
4aabfd |
- ldgr 1,7 # restore F7
|
|
|
4aabfd |
+ ld %f1,FP_OFFSET(%r15)
|
|
|
4aabfd |
+ ld %f3,FP_OFFSET+8(%r15)
|
|
|
4aabfd |
+ ld %f5,FP_OFFSET+16(%r15)
|
|
|
4aabfd |
+ ld %f7,FP_OFFSET+24(%r15)
|
|
|
4aabfd |
|
|
|
4aabfd |
# Skip PC
|
|
|
4aabfd |
|
|
|
4aabfd |
# Adjust the stack
|
|
|
4aabfd |
- lay 15, 120(15)
|
|
|
4aabfd |
+ aghi %r15,L_CTX
|
|
|
4aabfd |
|
|
|
4aabfd |
# R2 --> Address where the return transfer_t is stored
|
|
|
4aabfd |
# R0 --> FCTX
|
|
|
4aabfd |
@@ -127,8 +96,8 @@ ontop_fcontext:
|
|
|
4aabfd |
# R5 --> Context function
|
|
|
4aabfd |
|
|
|
4aabfd |
# Store the elements to return transfer_t
|
|
|
4aabfd |
- stg 15, 0(2)
|
|
|
4aabfd |
- stg 4, 8(2)
|
|
|
4aabfd |
+ stg %r15, 0(%r2)
|
|
|
4aabfd |
+ stg %r4, 8(%r2)
|
|
|
4aabfd |
|
|
|
4aabfd |
# Note: The address in R2 points to the place where the return
|
|
|
4aabfd |
# transfer_t is stored. Since context_function take transfer_t
|
|
|
4aabfd |
@@ -136,7 +105,7 @@ ontop_fcontext:
|
|
|
4aabfd |
# first parameter value.
|
|
|
4aabfd |
|
|
|
4aabfd |
#jump to context function
|
|
|
4aabfd |
- br 5
|
|
|
4aabfd |
+ br %r5
|
|
|
4aabfd |
|
|
|
4aabfd |
.size ontop_fcontext,.-ontop_fcontext
|
|
|
4aabfd |
# Mark that we don't need executable stack.
|
|
|
4aabfd |
--
|
|
|
4aabfd |
2.18.1
|
|
|
4aabfd |
|