Blame SOURCES/valgrind-3.14.0-ppc64-ldbrx.patch

560544
commit 7bdd6731f8337fd57bf91772aa1917e44239d7c2
560544
Author: Mark Wielaard <mark@klomp.org>
560544
Date:   Fri Dec 7 10:42:22 2018 -0500
560544
560544
    Implement ppc64 ldbrx as 64-bit load and Iop_Reverse8sIn64_x1.
560544
    
560544
    This makes it possible for memcheck to analyse the new gcc strcmp
560544
    inlined code correctly even if the ldbrx load is partly beyond an
560544
    addressable block.
560544
    
560544
    Partially resolves bug 386945.
560544
560544
diff --git a/VEX/priv/guest_ppc_toIR.c b/VEX/priv/guest_ppc_toIR.c
560544
index 8977d4f..a81dace 100644
560544
--- a/VEX/priv/guest_ppc_toIR.c
560544
+++ b/VEX/priv/guest_ppc_toIR.c
560544
@@ -9178,24 +9178,28 @@ static Bool dis_int_ldst_rev ( UInt theInstr )
560544
 
560544
       case 0x214: // ldbrx (Load Doubleword Byte-Reverse Indexed)
560544
       {
560544
-         // JRS FIXME:
560544
-         // * is the host_endness conditional below actually necessary?
560544
-         // * can we just do a 64-bit load followed by by Iop_Reverse8sIn64_x1?
560544
-         //   That would be a lot more efficient.
560544
-         IRExpr * nextAddr;
560544
-         IRTemp w3 = newTemp( Ity_I32 );
560544
-         IRTemp w4 = newTemp( Ity_I32 );
560544
-         DIP("ldbrx r%u,r%u,r%u\n", rD_addr, rA_addr, rB_addr);
560544
-         assign( w1, load( Ity_I32, mkexpr( EA ) ) );
560544
-         assign( w2, gen_byterev32( w1 ) );
560544
-         nextAddr = binop( mkSzOp( ty, Iop_Add8 ), mkexpr( EA ),
560544
-                           ty == Ity_I64 ? mkU64( 4 ) : mkU32( 4 ) );
560544
-         assign( w3, load( Ity_I32, nextAddr ) );
560544
-         assign( w4, gen_byterev32( w3 ) );
560544
-         if (host_endness == VexEndnessLE)
560544
-            putIReg( rD_addr, binop( Iop_32HLto64, mkexpr( w2 ), mkexpr( w4 ) ) );
560544
+         /* Caller makes sure we are only called in mode64. */
560544
+
560544
+         /* If we supported swapping LE/BE loads in the backend then we could
560544
+            just load the value with the bytes reversed by doing a BE load
560544
+            on an LE machine and a LE load on a BE machine.
560544
+
560544
+         IRTemp dw1 = newTemp(Ity_I64);
560544
+         if (host_endness == VexEndnessBE)
560544
+            assign( dw1, IRExpr_Load(Iend_LE, Ity_I64, mkexpr(EA)));
560544
          else
560544
-            putIReg( rD_addr, binop( Iop_32HLto64, mkexpr( w4 ), mkexpr( w2 ) ) );
560544
+            assign( dw1, IRExpr_Load(Iend_BE, Ity_I64, mkexpr(EA)));
560544
+         putIReg( rD_addr, mkexpr(dw1) );
560544
+
560544
+         But since we currently don't we load the value as is and then
560544
+         switch it around with Iop_Reverse8sIn64_x1. */
560544
+
560544
+         IRTemp dw1 = newTemp(Ity_I64);
560544
+         IRTemp dw2 = newTemp(Ity_I64);
560544
+         DIP("ldbrx r%u,r%u,r%u\n", rD_addr, rA_addr, rB_addr);
560544
+         assign( dw1, load(Ity_I64, mkexpr(EA)) );
560544
+         assign( dw2, unop(Iop_Reverse8sIn64_x1, mkexpr(dw1)) );
560544
+         putIReg( rD_addr, mkexpr(dw2) );
560544
          break;
560544
       }
560544
 
560544
diff --git a/VEX/priv/host_ppc_isel.c b/VEX/priv/host_ppc_isel.c
560544
index 750cf8d..4fc3eb5 100644
560544
--- a/VEX/priv/host_ppc_isel.c
560544
+++ b/VEX/priv/host_ppc_isel.c
560544
@@ -2210,6 +2210,63 @@ static HReg iselWordExpr_R_wrk ( ISelEnv* env, const IRExpr* e,
560544
          return rr;
560544
       }
560544
 
560544
+      case Iop_Reverse8sIn64_x1: {
560544
+	 /* See Iop_Reverse8sIn32_x1, but extended to 64bit.
560544
+            Can only be used in 64bit mode.  */
560544
+         vassert (mode64);
560544
+
560544
+         HReg r_src  = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
560544
+         HReg rr     = newVRegI(env);
560544
+         HReg rMask  = newVRegI(env);
560544
+         HReg rnMask = newVRegI(env);
560544
+         HReg rtHi   = newVRegI(env);
560544
+         HReg rtLo   = newVRegI(env);
560544
+
560544
+         // Copy r_src since we need to modify it
560544
+         addInstr(env, mk_iMOVds_RR(rr, r_src));
560544
+
560544
+         // r = (r & 0x00FF00FF00FF00FF) << 8 | (r & 0xFF00FF00FF00FF00) >> 8
560544
+         addInstr(env, PPCInstr_LI(rMask, 0x00FF00FF00FF00FFULL,
560544
+                                   True/* 64bit imm*/));
560544
+         addInstr(env, PPCInstr_Unary(Pun_NOT, rnMask, rMask));
560544
+         addInstr(env, PPCInstr_Alu(Palu_AND, rtHi, rr, PPCRH_Reg(rMask)));
560544
+         addInstr(env, PPCInstr_Shft(Pshft_SHL, False/*64 bit shift*/,
560544
+                                     rtHi, rtHi,
560544
+                                     PPCRH_Imm(False/*!signed imm*/, 8)));
560544
+         addInstr(env, PPCInstr_Alu(Palu_AND, rtLo, rr, PPCRH_Reg(rnMask)));
560544
+         addInstr(env, PPCInstr_Shft(Pshft_SHR, False/*64 bit shift*/,
560544
+                                     rtLo, rtLo,
560544
+                                     PPCRH_Imm(False/*!signed imm*/, 8)));
560544
+         addInstr(env, PPCInstr_Alu(Palu_OR, rr, rtHi, PPCRH_Reg(rtLo)));
560544
+
560544
+         // r = (r & 0x0000FFFF0000FFFF) << 16 | (r & 0xFFFF0000FFFF0000) >> 16
560544
+         addInstr(env, PPCInstr_LI(rMask, 0x0000FFFF0000FFFFULL,
560544
+                                   True/* !64bit imm*/));
560544
+         addInstr(env, PPCInstr_Unary(Pun_NOT, rnMask, rMask));
560544
+         addInstr(env, PPCInstr_Alu(Palu_AND, rtHi, rr, PPCRH_Reg(rMask)));
560544
+         addInstr(env, PPCInstr_Shft(Pshft_SHL, False/*64 bit shift*/,
560544
+                                     rtHi, rtHi,
560544
+                                     PPCRH_Imm(False/*!signed imm*/, 16)));
560544
+         addInstr(env, PPCInstr_Alu(Palu_AND, rtLo, rr, PPCRH_Reg(rnMask)));
560544
+         addInstr(env, PPCInstr_Shft(Pshft_SHR, False/*64 bit shift*/,
560544
+                                     rtLo, rtLo,
560544
+                                     PPCRH_Imm(False/*!signed imm*/, 16)));
560544
+         addInstr(env, PPCInstr_Alu(Palu_OR, rr, rtHi, PPCRH_Reg(rtLo)));
560544
+
560544
+         // r = (r & 0x00000000FFFFFFFF) << 32 | (r & 0xFFFFFFFF00000000) >> 32
560544
+         /* We don't need to mask anymore, just two more shifts and an or.  */
560544
+         addInstr(env, mk_iMOVds_RR(rtLo, rr));
560544
+         addInstr(env, PPCInstr_Shft(Pshft_SHL, False/*64 bit shift*/,
560544
+                                     rtLo, rtLo,
560544
+                                     PPCRH_Imm(False/*!signed imm*/, 32)));
560544
+         addInstr(env, PPCInstr_Shft(Pshft_SHR, False/*64 bit shift*/,
560544
+                                     rr, rr,
560544
+                                     PPCRH_Imm(False/*!signed imm*/, 32)));
560544
+         addInstr(env, PPCInstr_Alu(Palu_OR, rr, rr, PPCRH_Reg(rtLo)));
560544
+
560544
+         return rr;
560544
+      }
560544
+
560544
       case Iop_Left8:
560544
       case Iop_Left16:
560544
       case Iop_Left32: