cb589a
commit 7bdd6731f8337fd57bf91772aa1917e44239d7c2
cb589a
Author: Mark Wielaard <mark@klomp.org>
cb589a
Date:   Fri Dec 7 10:42:22 2018 -0500
cb589a
cb589a
    Implement ppc64 ldbrx as 64-bit load and Iop_Reverse8sIn64_x1.
cb589a
    
cb589a
    This makes it possible for memcheck to analyse the new gcc strcmp
cb589a
    inlined code correctly even if the ldbrx load is partly beyond an
cb589a
    addressable block.
cb589a
    
cb589a
    Partially resolves bug 386945.
cb589a
cb589a
diff --git a/VEX/priv/guest_ppc_toIR.c b/VEX/priv/guest_ppc_toIR.c
cb589a
index 8977d4f..a81dace 100644
cb589a
--- a/VEX/priv/guest_ppc_toIR.c
cb589a
+++ b/VEX/priv/guest_ppc_toIR.c
cb589a
@@ -9178,24 +9178,28 @@ static Bool dis_int_ldst_rev ( UInt theInstr )
cb589a
 
cb589a
       case 0x214: // ldbrx (Load Doubleword Byte-Reverse Indexed)
cb589a
       {
cb589a
-         // JRS FIXME:
cb589a
-         // * is the host_endness conditional below actually necessary?
cb589a
-         // * can we just do a 64-bit load followed by by Iop_Reverse8sIn64_x1?
cb589a
-         //   That would be a lot more efficient.
cb589a
-         IRExpr * nextAddr;
cb589a
-         IRTemp w3 = newTemp( Ity_I32 );
cb589a
-         IRTemp w4 = newTemp( Ity_I32 );
cb589a
-         DIP("ldbrx r%u,r%u,r%u\n", rD_addr, rA_addr, rB_addr);
cb589a
-         assign( w1, load( Ity_I32, mkexpr( EA ) ) );
cb589a
-         assign( w2, gen_byterev32( w1 ) );
cb589a
-         nextAddr = binop( mkSzOp( ty, Iop_Add8 ), mkexpr( EA ),
cb589a
-                           ty == Ity_I64 ? mkU64( 4 ) : mkU32( 4 ) );
cb589a
-         assign( w3, load( Ity_I32, nextAddr ) );
cb589a
-         assign( w4, gen_byterev32( w3 ) );
cb589a
-         if (host_endness == VexEndnessLE)
cb589a
-            putIReg( rD_addr, binop( Iop_32HLto64, mkexpr( w2 ), mkexpr( w4 ) ) );
cb589a
+         /* Caller makes sure we are only called in mode64. */
cb589a
+
cb589a
+         /* If we supported swapping LE/BE loads in the backend then we could
cb589a
+            just load the value with the bytes reversed by doing a BE load
cb589a
+            on an LE machine and a LE load on a BE machine.
cb589a
+
cb589a
+         IRTemp dw1 = newTemp(Ity_I64);
cb589a
+         if (host_endness == VexEndnessBE)
cb589a
+            assign( dw1, IRExpr_Load(Iend_LE, Ity_I64, mkexpr(EA)));
cb589a
          else
cb589a
-            putIReg( rD_addr, binop( Iop_32HLto64, mkexpr( w4 ), mkexpr( w2 ) ) );
cb589a
+            assign( dw1, IRExpr_Load(Iend_BE, Ity_I64, mkexpr(EA)));
cb589a
+         putIReg( rD_addr, mkexpr(dw1) );
cb589a
+
cb589a
+         But since we currently don't we load the value as is and then
cb589a
+         switch it around with Iop_Reverse8sIn64_x1. */
cb589a
+
cb589a
+         IRTemp dw1 = newTemp(Ity_I64);
cb589a
+         IRTemp dw2 = newTemp(Ity_I64);
cb589a
+         DIP("ldbrx r%u,r%u,r%u\n", rD_addr, rA_addr, rB_addr);
cb589a
+         assign( dw1, load(Ity_I64, mkexpr(EA)) );
cb589a
+         assign( dw2, unop(Iop_Reverse8sIn64_x1, mkexpr(dw1)) );
cb589a
+         putIReg( rD_addr, mkexpr(dw2) );
cb589a
          break;
cb589a
       }
cb589a
 
cb589a
diff --git a/VEX/priv/host_ppc_isel.c b/VEX/priv/host_ppc_isel.c
cb589a
index 750cf8d..4fc3eb5 100644
cb589a
--- a/VEX/priv/host_ppc_isel.c
cb589a
+++ b/VEX/priv/host_ppc_isel.c
cb589a
@@ -2210,6 +2210,63 @@ static HReg iselWordExpr_R_wrk ( ISelEnv* env, const IRExpr* e,
cb589a
          return rr;
cb589a
       }
cb589a
 
cb589a
+      case Iop_Reverse8sIn64_x1: {
cb589a
+	 /* See Iop_Reverse8sIn32_x1, but extended to 64bit.
cb589a
+            Can only be used in 64bit mode.  */
cb589a
+         vassert (mode64);
cb589a
+
cb589a
+         HReg r_src  = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
cb589a
+         HReg rr     = newVRegI(env);
cb589a
+         HReg rMask  = newVRegI(env);
cb589a
+         HReg rnMask = newVRegI(env);
cb589a
+         HReg rtHi   = newVRegI(env);
cb589a
+         HReg rtLo   = newVRegI(env);
cb589a
+
cb589a
+         // Copy r_src since we need to modify it
cb589a
+         addInstr(env, mk_iMOVds_RR(rr, r_src));
cb589a
+
cb589a
+         // r = (r & 0x00FF00FF00FF00FF) << 8 | (r & 0xFF00FF00FF00FF00) >> 8
cb589a
+         addInstr(env, PPCInstr_LI(rMask, 0x00FF00FF00FF00FFULL,
cb589a
+                                   True/* 64bit imm*/));
cb589a
+         addInstr(env, PPCInstr_Unary(Pun_NOT, rnMask, rMask));
cb589a
+         addInstr(env, PPCInstr_Alu(Palu_AND, rtHi, rr, PPCRH_Reg(rMask)));
cb589a
+         addInstr(env, PPCInstr_Shft(Pshft_SHL, False/*64 bit shift*/,
cb589a
+                                     rtHi, rtHi,
cb589a
+                                     PPCRH_Imm(False/*!signed imm*/, 8)));
cb589a
+         addInstr(env, PPCInstr_Alu(Palu_AND, rtLo, rr, PPCRH_Reg(rnMask)));
cb589a
+         addInstr(env, PPCInstr_Shft(Pshft_SHR, False/*64 bit shift*/,
cb589a
+                                     rtLo, rtLo,
cb589a
+                                     PPCRH_Imm(False/*!signed imm*/, 8)));
cb589a
+         addInstr(env, PPCInstr_Alu(Palu_OR, rr, rtHi, PPCRH_Reg(rtLo)));
cb589a
+
cb589a
+         // r = (r & 0x0000FFFF0000FFFF) << 16 | (r & 0xFFFF0000FFFF0000) >> 16
cb589a
+         addInstr(env, PPCInstr_LI(rMask, 0x0000FFFF0000FFFFULL,
cb589a
+                                   True/* !64bit imm*/));
cb589a
+         addInstr(env, PPCInstr_Unary(Pun_NOT, rnMask, rMask));
cb589a
+         addInstr(env, PPCInstr_Alu(Palu_AND, rtHi, rr, PPCRH_Reg(rMask)));
cb589a
+         addInstr(env, PPCInstr_Shft(Pshft_SHL, False/*64 bit shift*/,
cb589a
+                                     rtHi, rtHi,
cb589a
+                                     PPCRH_Imm(False/*!signed imm*/, 16)));
cb589a
+         addInstr(env, PPCInstr_Alu(Palu_AND, rtLo, rr, PPCRH_Reg(rnMask)));
cb589a
+         addInstr(env, PPCInstr_Shft(Pshft_SHR, False/*64 bit shift*/,
cb589a
+                                     rtLo, rtLo,
cb589a
+                                     PPCRH_Imm(False/*!signed imm*/, 16)));
cb589a
+         addInstr(env, PPCInstr_Alu(Palu_OR, rr, rtHi, PPCRH_Reg(rtLo)));
cb589a
+
cb589a
+         // r = (r & 0x00000000FFFFFFFF) << 32 | (r & 0xFFFFFFFF00000000) >> 32
cb589a
+         /* We don't need to mask anymore, just two more shifts and an or.  */
cb589a
+         addInstr(env, mk_iMOVds_RR(rtLo, rr));
cb589a
+         addInstr(env, PPCInstr_Shft(Pshft_SHL, False/*64 bit shift*/,
cb589a
+                                     rtLo, rtLo,
cb589a
+                                     PPCRH_Imm(False/*!signed imm*/, 32)));
cb589a
+         addInstr(env, PPCInstr_Shft(Pshft_SHR, False/*64 bit shift*/,
cb589a
+                                     rr, rr,
cb589a
+                                     PPCRH_Imm(False/*!signed imm*/, 32)));
cb589a
+         addInstr(env, PPCInstr_Alu(Palu_OR, rr, rr, PPCRH_Reg(rtLo)));
cb589a
+
cb589a
+         return rr;
cb589a
+      }
cb589a
+
cb589a
       case Iop_Left8:
cb589a
       case Iop_Left16:
cb589a
       case Iop_Left32: