Mark Wielaard be4097
commit 7bdd6731f8337fd57bf91772aa1917e44239d7c2
Mark Wielaard be4097
Author: Mark Wielaard <mark@klomp.org>
Mark Wielaard be4097
Date:   Fri Dec 7 10:42:22 2018 -0500
Mark Wielaard be4097
Mark Wielaard be4097
    Implement ppc64 ldbrx as 64-bit load and Iop_Reverse8sIn64_x1.
Mark Wielaard be4097
    
Mark Wielaard be4097
    This makes it possible for memcheck to analyse the new gcc strcmp
Mark Wielaard be4097
    inlined code correctly even if the ldbrx load is partly beyond an
Mark Wielaard be4097
    addressable block.
Mark Wielaard be4097
    
Mark Wielaard be4097
    Partially resolves bug 386945.
Mark Wielaard be4097
Mark Wielaard be4097
diff --git a/VEX/priv/guest_ppc_toIR.c b/VEX/priv/guest_ppc_toIR.c
Mark Wielaard be4097
index 8977d4f..a81dace 100644
Mark Wielaard be4097
--- a/VEX/priv/guest_ppc_toIR.c
Mark Wielaard be4097
+++ b/VEX/priv/guest_ppc_toIR.c
Mark Wielaard be4097
@@ -9178,24 +9178,28 @@ static Bool dis_int_ldst_rev ( UInt theInstr )
Mark Wielaard be4097
 
Mark Wielaard be4097
       case 0x214: // ldbrx (Load Doubleword Byte-Reverse Indexed)
Mark Wielaard be4097
       {
Mark Wielaard be4097
-         // JRS FIXME:
Mark Wielaard be4097
-         // * is the host_endness conditional below actually necessary?
Mark Wielaard be4097
-         // * can we just do a 64-bit load followed by by Iop_Reverse8sIn64_x1?
Mark Wielaard be4097
-         //   That would be a lot more efficient.
Mark Wielaard be4097
-         IRExpr * nextAddr;
Mark Wielaard be4097
-         IRTemp w3 = newTemp( Ity_I32 );
Mark Wielaard be4097
-         IRTemp w4 = newTemp( Ity_I32 );
Mark Wielaard be4097
-         DIP("ldbrx r%u,r%u,r%u\n", rD_addr, rA_addr, rB_addr);
Mark Wielaard be4097
-         assign( w1, load( Ity_I32, mkexpr( EA ) ) );
Mark Wielaard be4097
-         assign( w2, gen_byterev32( w1 ) );
Mark Wielaard be4097
-         nextAddr = binop( mkSzOp( ty, Iop_Add8 ), mkexpr( EA ),
Mark Wielaard be4097
-                           ty == Ity_I64 ? mkU64( 4 ) : mkU32( 4 ) );
Mark Wielaard be4097
-         assign( w3, load( Ity_I32, nextAddr ) );
Mark Wielaard be4097
-         assign( w4, gen_byterev32( w3 ) );
Mark Wielaard be4097
-         if (host_endness == VexEndnessLE)
Mark Wielaard be4097
-            putIReg( rD_addr, binop( Iop_32HLto64, mkexpr( w2 ), mkexpr( w4 ) ) );
Mark Wielaard be4097
+         /* Caller makes sure we are only called in mode64. */
Mark Wielaard be4097
+
Mark Wielaard be4097
+         /* If we supported swapping LE/BE loads in the backend then we could
Mark Wielaard be4097
+            just load the value with the bytes reversed by doing a BE load
Mark Wielaard be4097
+            on an LE machine and a LE load on a BE machine.
Mark Wielaard be4097
+
Mark Wielaard be4097
+         IRTemp dw1 = newTemp(Ity_I64);
Mark Wielaard be4097
+         if (host_endness == VexEndnessBE)
Mark Wielaard be4097
+            assign( dw1, IRExpr_Load(Iend_LE, Ity_I64, mkexpr(EA)));
Mark Wielaard be4097
          else
Mark Wielaard be4097
-            putIReg( rD_addr, binop( Iop_32HLto64, mkexpr( w4 ), mkexpr( w2 ) ) );
Mark Wielaard be4097
+            assign( dw1, IRExpr_Load(Iend_BE, Ity_I64, mkexpr(EA)));
Mark Wielaard be4097
+         putIReg( rD_addr, mkexpr(dw1) );
Mark Wielaard be4097
+
Mark Wielaard be4097
+         But since we currently don't we load the value as is and then
Mark Wielaard be4097
+         switch it around with Iop_Reverse8sIn64_x1. */
Mark Wielaard be4097
+
Mark Wielaard be4097
+         IRTemp dw1 = newTemp(Ity_I64);
Mark Wielaard be4097
+         IRTemp dw2 = newTemp(Ity_I64);
Mark Wielaard be4097
+         DIP("ldbrx r%u,r%u,r%u\n", rD_addr, rA_addr, rB_addr);
Mark Wielaard be4097
+         assign( dw1, load(Ity_I64, mkexpr(EA)) );
Mark Wielaard be4097
+         assign( dw2, unop(Iop_Reverse8sIn64_x1, mkexpr(dw1)) );
Mark Wielaard be4097
+         putIReg( rD_addr, mkexpr(dw2) );
Mark Wielaard be4097
          break;
Mark Wielaard be4097
       }
Mark Wielaard be4097
 
Mark Wielaard be4097
diff --git a/VEX/priv/host_ppc_isel.c b/VEX/priv/host_ppc_isel.c
Mark Wielaard be4097
index 750cf8d..4fc3eb5 100644
Mark Wielaard be4097
--- a/VEX/priv/host_ppc_isel.c
Mark Wielaard be4097
+++ b/VEX/priv/host_ppc_isel.c
Mark Wielaard be4097
@@ -2210,6 +2210,63 @@ static HReg iselWordExpr_R_wrk ( ISelEnv* env, const IRExpr* e,
Mark Wielaard be4097
          return rr;
Mark Wielaard be4097
       }
Mark Wielaard be4097
 
Mark Wielaard be4097
+      case Iop_Reverse8sIn64_x1: {
Mark Wielaard be4097
+	 /* See Iop_Reverse8sIn32_x1, but extended to 64bit.
Mark Wielaard be4097
+            Can only be used in 64bit mode.  */
Mark Wielaard be4097
+         vassert (mode64);
Mark Wielaard be4097
+
Mark Wielaard be4097
+         HReg r_src  = iselWordExpr_R(env, e->Iex.Unop.arg, IEndianess);
Mark Wielaard be4097
+         HReg rr     = newVRegI(env);
Mark Wielaard be4097
+         HReg rMask  = newVRegI(env);
Mark Wielaard be4097
+         HReg rnMask = newVRegI(env);
Mark Wielaard be4097
+         HReg rtHi   = newVRegI(env);
Mark Wielaard be4097
+         HReg rtLo   = newVRegI(env);
Mark Wielaard be4097
+
Mark Wielaard be4097
+         // Copy r_src since we need to modify it
Mark Wielaard be4097
+         addInstr(env, mk_iMOVds_RR(rr, r_src));
Mark Wielaard be4097
+
Mark Wielaard be4097
+         // r = (r & 0x00FF00FF00FF00FF) << 8 | (r & 0xFF00FF00FF00FF00) >> 8
Mark Wielaard be4097
+         addInstr(env, PPCInstr_LI(rMask, 0x00FF00FF00FF00FFULL,
Mark Wielaard be4097
+                                   True/* 64bit imm*/));
Mark Wielaard be4097
+         addInstr(env, PPCInstr_Unary(Pun_NOT, rnMask, rMask));
Mark Wielaard be4097
+         addInstr(env, PPCInstr_Alu(Palu_AND, rtHi, rr, PPCRH_Reg(rMask)));
Mark Wielaard be4097
+         addInstr(env, PPCInstr_Shft(Pshft_SHL, False/*64 bit shift*/,
Mark Wielaard be4097
+                                     rtHi, rtHi,
Mark Wielaard be4097
+                                     PPCRH_Imm(False/*!signed imm*/, 8)));
Mark Wielaard be4097
+         addInstr(env, PPCInstr_Alu(Palu_AND, rtLo, rr, PPCRH_Reg(rnMask)));
Mark Wielaard be4097
+         addInstr(env, PPCInstr_Shft(Pshft_SHR, False/*64 bit shift*/,
Mark Wielaard be4097
+                                     rtLo, rtLo,
Mark Wielaard be4097
+                                     PPCRH_Imm(False/*!signed imm*/, 8)));
Mark Wielaard be4097
+         addInstr(env, PPCInstr_Alu(Palu_OR, rr, rtHi, PPCRH_Reg(rtLo)));
Mark Wielaard be4097
+
Mark Wielaard be4097
+         // r = (r & 0x0000FFFF0000FFFF) << 16 | (r & 0xFFFF0000FFFF0000) >> 16
Mark Wielaard be4097
+         addInstr(env, PPCInstr_LI(rMask, 0x0000FFFF0000FFFFULL,
Mark Wielaard be4097
+                                   True/* !64bit imm*/));
Mark Wielaard be4097
+         addInstr(env, PPCInstr_Unary(Pun_NOT, rnMask, rMask));
Mark Wielaard be4097
+         addInstr(env, PPCInstr_Alu(Palu_AND, rtHi, rr, PPCRH_Reg(rMask)));
Mark Wielaard be4097
+         addInstr(env, PPCInstr_Shft(Pshft_SHL, False/*64 bit shift*/,
Mark Wielaard be4097
+                                     rtHi, rtHi,
Mark Wielaard be4097
+                                     PPCRH_Imm(False/*!signed imm*/, 16)));
Mark Wielaard be4097
+         addInstr(env, PPCInstr_Alu(Palu_AND, rtLo, rr, PPCRH_Reg(rnMask)));
Mark Wielaard be4097
+         addInstr(env, PPCInstr_Shft(Pshft_SHR, False/*64 bit shift*/,
Mark Wielaard be4097
+                                     rtLo, rtLo,
Mark Wielaard be4097
+                                     PPCRH_Imm(False/*!signed imm*/, 16)));
Mark Wielaard be4097
+         addInstr(env, PPCInstr_Alu(Palu_OR, rr, rtHi, PPCRH_Reg(rtLo)));
Mark Wielaard be4097
+
Mark Wielaard be4097
+         // r = (r & 0x00000000FFFFFFFF) << 32 | (r & 0xFFFFFFFF00000000) >> 32
Mark Wielaard be4097
+         /* We don't need to mask anymore, just two more shifts and an or.  */
Mark Wielaard be4097
+         addInstr(env, mk_iMOVds_RR(rtLo, rr));
Mark Wielaard be4097
+         addInstr(env, PPCInstr_Shft(Pshft_SHL, False/*64 bit shift*/,
Mark Wielaard be4097
+                                     rtLo, rtLo,
Mark Wielaard be4097
+                                     PPCRH_Imm(False/*!signed imm*/, 32)));
Mark Wielaard be4097
+         addInstr(env, PPCInstr_Shft(Pshft_SHR, False/*64 bit shift*/,
Mark Wielaard be4097
+                                     rr, rr,
Mark Wielaard be4097
+                                     PPCRH_Imm(False/*!signed imm*/, 32)));
Mark Wielaard be4097
+         addInstr(env, PPCInstr_Alu(Palu_OR, rr, rr, PPCRH_Reg(rtLo)));
Mark Wielaard be4097
+
Mark Wielaard be4097
+         return rr;
Mark Wielaard be4097
+      }
Mark Wielaard be4097
+
Mark Wielaard be4097
       case Iop_Left8:
Mark Wielaard be4097
       case Iop_Left16:
Mark Wielaard be4097
       case Iop_Left32: