From 519786ec38bb5123547071021e4bd7678d7673ad Mon Sep 17 00:00:00 2001 From: Andreas Krebbel Date: Mon, 23 Mar 2020 09:08:27 +0100 Subject: [PATCH 3/4] Optimize s390x instructions --- src/asm/jump_s390x_sysv_elf_gas.S | 84 ++++++++++-------------------- src/asm/make_s390x_sysv_elf_gas.S | 27 ++++++---- src/asm/ontop_s390x_sysv_elf_gas.S | 81 +++++++++------------------- 3 files changed, 70 insertions(+), 122 deletions(-) diff --git a/src/asm/jump_s390x_sysv_elf_gas.S b/src/asm/jump_s390x_sysv_elf_gas.S index c011d53..b2163cc 100644 --- a/libs/context/src/asm/jump_s390x_sysv_elf_gas.S +++ b/libs/context/src/asm/jump_s390x_sysv_elf_gas.S @@ -36,47 +36,34 @@ .global jump_fcontext .type jump_fcontext, @function +#define GR_OFFSET 0 +#define LR_OFFSET 64 +#define SP_OFFSET 72 +#define FP_OFFSET 80 +#define PC_OFFSET 112 +#define L_CTX 120 +#define L_STACKFRAME 120 + jump_fcontext: # Reserved the space for stack to store the data of current context # before we jump to the new context. - lay 15,-120(15) + aghi %r15,-L_STACKFRAME # save the registers to the stack - stg 6, 0(15) # save R6 - stg 7, 8(15) # save R7 - stg 8, 16(15) # save R8 - stg 9, 24(15) # save R9 - stg 10, 32(15) # save R10 - stg 11, 40(15) # save R11 - stg 12, 48(15) # save R12 - stg 13, 56(15) # save R13 - stg 14, 64(15) # save R14 - stg 15, 72(15) # save R15 + stmg %r6, %r15, GR_OFFSET(%r15) # save the floating point registers - # Load the FPR into R0 then save it to the stack - # Load F1 into R0 - lgdr 0,1 - stg 0,80(15) # save F1 - - # Load F3 into R0 - lgdr 0,3 - stg 0,88(15) # save F3 - - # Load F5 into R0 - lgdr 0,5 - stg 0,96(15) # save F5 - - # Load F7 into R0 - lgdr 0,7 - stg 0,104(15) # save F7 + std %f0,FP_OFFSET(%r15) + std %f3,FP_OFFSET+8(%r15) + std %f5,FP_OFFSET+16(%r15) + std %f7,FP_OFFSET+24(%r15) # Save LR as PC - stg 14,112(15) + stg %r14,PC_OFFSET(%r15) # Store the SP pointing to the old context-data into R0 - lgr 0,15 + lgr %r0,%r15 # Get the SP pointing to the new context-data # Note: Since the return type of the jump_fcontext is struct whose @@ -88,46 +75,31 @@ jump_fcontext: # R2 --> Address of the return transfer_t struct # R3 --> Context we want to switch to # R4 --> Data - lgr 15,3 + lgr %r15,%r3 # Load the registers with the data present in context-data of the # context we are going to switch to - lg 6, 0(15) # restore R6 - lg 7, 8(15) # restore R7 - lg 8, 16(15) # restore R8 - lg 9, 24(15) # restore R9 - lg 10, 32(15) # restore R10 - lg 11, 40(15) # restore R11 - lg 12, 48(15) # restore R12 - lg 13, 56(15) # restore R13 - lg 14, 64(15) # restore R14 + lmg %r6, %r14, GR_OFFSET(%r15) # Restore Floating point registers - lg 1,80(15) - ldgr 1,1 # restore F1 - - lg 1,88(15) - ldgr 1,3 # restore F3 - - lg 1,96(15) - ldgr 1,5 # restore F5 - - lg 1,104(15) - ldgr 1,7 # restore F7 + ld %f1,FP_OFFSET(%r15) + ld %f3,FP_OFFSET+8(%r15) + ld %f5,FP_OFFSET+16(%r15) + ld %f7,FP_OFFSET+24(%r15) # Load PC - lg 1,112(15) + lg %r1,PC_OFFSET(%r15) - # Adjust the stack - lay 15, 120(15) + # Adjust the stack + aghi %r15,120 # R2 --> Address where the return transfer_t is stored # R0 --> FCTX # R4 --> DATA # Store the elements to return transfer_t - stg 15, 0(2) - stg 4, 8(2) + stg %r15, 0(%r2) + stg %r4, 8(%r2) # Note: The address in R2 points to the place where the return # transfer_t is stored. Since context_function take transfer_t @@ -135,7 +107,7 @@ jump_fcontext: # first parameter value. #jump to context - br 1 + br %r1 .size jump_fcontext,.-jump_fcontext # Mark that we don't need executable stack. diff --git a/src/asm/make_s390x_sysv_elf_gas.S b/src/asm/make_s390x_sysv_elf_gas.S index f566533..d02856c 100644 --- a/libs/context/src/asm/make_s390x_sysv_elf_gas.S +++ b/libs/context/src/asm/make_s390x_sysv_elf_gas.S @@ -36,6 +36,14 @@ .global make_fcontext .type make_fcontext, @function +#define GR_OFFSET 0 +#define LR_OFFSET 64 +#define SP_OFFSET 72 +#define FP_OFFSET 80 +#define PC_OFFSET 112 +#define L_CTX 120 +#define L_STACKFRAME 120 + make_fcontext: # make_fcontext takes in 3 arguments @@ -56,40 +64,39 @@ make_fcontext: # address is zero or not. If not AND it with `-8`. # Here we AND the lower 16 bits of the memory address present in the - # R2 with the bits 1111 1111 1111 1000 which when converted into - # decimal is 65528 - nill 2,65528 + # R2 with the bits 1111 1111 1111 1000 + nill %r2,0xfff0 # Reserve space for context-data on context-stack. # This is done by shifting the SP/address by 112 bytes. - lay 2,-120(2) + aghi %r2,-L_CTX # third arg of make_fcontext() == address of the context-function # Store the address as a PC to jump in, whenever we call the # make_fcontext. - stg 4,112(2) + stg %r4,PC_OFFSET(%r2) # Save the address of finish as return-address for context-function # This will be entered after context-function return # The address of finish will be saved in Link register, this register # specifies where we need to jump after the function executes # completely. - larl 1,finish - stg 1,64(2) + larl %r1,finish + stg %r1,LR_OFFSET(%r2) # Return pointer to context data # R14 acts as the link register # R2 holds the address of the context stack. When we return from the # make_fcontext, R2 is passed back. - br 14 + br %r14 finish: # In finish tasks, you load the exit code and exit the make_fcontext # This is called when the context-function is entirely executed - lghi 2,0 - brasl 14,_exit + lghi %r2,0 + brasl %r14,_exit@PLT .size make_fcontext,.-make_fcontext # Mark that we don't need executable stack. diff --git a/src/asm/ontop_s390x_sysv_elf_gas.S b/src/asm/ontop_s390x_sysv_elf_gas.S index 7ab2cf5..4488654 100644 --- a/libs/context/src/asm/ontop_s390x_sysv_elf_gas.S +++ b/libs/context/src/asm/ontop_s390x_sysv_elf_gas.S @@ -36,47 +36,32 @@ .global ontop_fcontext .type ontop_fcontext, @function +#define GR_OFFSET 0 +#define LR_OFFSET 64 +#define SP_OFFSET 72 +#define FP_OFFSET 80 +#define PC_OFFSET 112 +#define L_CTX 120 + ontop_fcontext: # Reserved the space for stack to store the data of current context # before we jump to the new context. - lay 15,-120(15) + aghi %r15,-L_CTX # save the registers to the stack - stg 6, 0(15) # save R6 - stg 7, 8(15) # save R7 - stg 8, 16(15) # save R8 - stg 9, 24(15) # save R9 - stg 10, 32(15) # save R10 - stg 11, 40(15) # save R11 - stg 12, 48(15) # save R12 - stg 13, 56(15) # save R13 - stg 14, 64(15) # save R14 - stg 15, 72(15) # save R15 + stmg %r6, %r15, GR_OFFSET(%r15) # save the floating point registers - # Load the FPR into R0 then save it to the stack - # Load F1 into R0 - lgdr 0,1 - stg 0,80(15) # save F1 - - # Load F3 into R0 - lgdr 0,3 - stg 0,88(15) # save F3 - - # Load F5 into R0 - lgdr 0,5 - stg 0,96(15) # save F5 - - # Load F7 into R0 - lgdr 0,7 - stg 0,104(15) # save F7 - + std %f0,FP_OFFSET(%r15) + std %f3,FP_OFFSET+8(%r15) + std %f5,FP_OFFSET+16(%r15) + std %f7,FP_OFFSET+24(%r15) # Save LR as PC - stg 14,112(15) + stg %r14,PC_OFFSET(%r15) # Store the SP pointing to the old context-data into R0 - lgr 0,15 + lgr %r0,%r15 # Get the SP pointing to the new context-data # Note: Since the return type of the jump_fcontext is struct whose @@ -88,38 +73,22 @@ ontop_fcontext: # R2 --> Address of the return transfer_t struct # R3 --> Context we want to switch to # R4 --> Data - lgr 15,3 + lgr %r15,%r3 # Load the registers with the data present in context-data of the # context we are going to switch to - lg 6, 0(15) # restore R6 - lg 7, 8(15) # restore R7 - lg 8, 16(15) # restore R8 - lg 9, 24(15) # restore R9 - lg 10, 32(15) # restore R10 - lg 11, 40(15) # restore R11 - lg 12, 48(15) # restore R12 - lg 13, 56(15) # restore R13 - lg 14, 64(15) # restore R14 - lg 15, 72(15) # restore R15 + lmg %r6,%r15,GR_OFFSET(%r15) # Restore Floating point registers - lg 1,80(15) - ldgr 1,1 # restore F1 - - lg 1,88(15) - ldgr 1,3 # restore F3 - - lg 1,96(15) - ldgr 1,5 # restore F5 - - lg 1,104(15) - ldgr 1,7 # restore F7 + ld %f1,FP_OFFSET(%r15) + ld %f3,FP_OFFSET+8(%r15) + ld %f5,FP_OFFSET+16(%r15) + ld %f7,FP_OFFSET+24(%r15) # Skip PC # Adjust the stack - lay 15, 120(15) + aghi %r15,L_CTX # R2 --> Address where the return transfer_t is stored # R0 --> FCTX @@ -127,8 +96,8 @@ ontop_fcontext: # R5 --> Context function # Store the elements to return transfer_t - stg 15, 0(2) - stg 4, 8(2) + stg %r15, 0(%r2) + stg %r4, 8(%r2) # Note: The address in R2 points to the place where the return # transfer_t is stored. Since context_function take transfer_t @@ -136,7 +105,7 @@ ontop_fcontext: # first parameter value. #jump to context function - br 5 + br %r5 .size ontop_fcontext,.-ontop_fcontext # Mark that we don't need executable stack. -- 2.18.1