7a6b42
commit 5c00e04a1b61475a7f731f8cfede114201815e0a
7a6b42
Author: Mark Wielaard <mark@klomp.org>
7a6b42
Date:   Sun Dec 9 23:25:05 2018 +0100
7a6b42
7a6b42
    Implement ppc64 lxvb16x as 128-bit vector load with reversed double words.
7a6b42
    
7a6b42
    This makes it possible for memcheck to know which part of the 128bit
7a6b42
    vector is defined, even if the load is partly beyond an addressable block.
7a6b42
    
7a6b42
    Partially resolves bug 386945.
7a6b42
7a6b42
diff --git a/VEX/priv/guest_ppc_toIR.c b/VEX/priv/guest_ppc_toIR.c
7a6b42
index 7af4973..ec2f90a 100644
7a6b42
--- a/VEX/priv/guest_ppc_toIR.c
7a6b42
+++ b/VEX/priv/guest_ppc_toIR.c
7a6b42
@@ -20702,54 +20702,29 @@ dis_vx_load ( UInt theInstr )
7a6b42
    {
7a6b42
       DIP("lxvb16x %d,r%u,r%u\n", (UInt)XT, rA_addr, rB_addr);
7a6b42
 
7a6b42
-      IRTemp byte[16];
7a6b42
-      int i;
7a6b42
-      UInt ea_off = 0;
7a6b42
-      IRExpr* irx_addr;
7a6b42
-      IRTemp tmp_low[9];
7a6b42
-      IRTemp tmp_hi[9];
7a6b42
+      /* The result of lxvb16x should be the same on big and little
7a6b42
+         endian systems. We do a host load, then reverse the bytes in
7a6b42
+         the double words. If the host load was little endian we swap
7a6b42
+         them around again. */
7a6b42
 
7a6b42
-      tmp_low[0] = newTemp( Ity_I64 );
7a6b42
-      tmp_hi[0] = newTemp( Ity_I64 );
7a6b42
-      assign( tmp_low[0], mkU64( 0 ) );
7a6b42
-      assign( tmp_hi[0], mkU64( 0 ) );
7a6b42
-
7a6b42
-      for ( i = 0; i < 8; i++ ) {
7a6b42
-         byte[i] = newTemp( Ity_I64 );
7a6b42
-         tmp_low[i+1] = newTemp( Ity_I64 );
7a6b42
-
7a6b42
-         irx_addr = binop( mkSzOp( ty, Iop_Add8 ), mkexpr( EA ),
7a6b42
-                           ty == Ity_I64 ? mkU64( ea_off ) : mkU32( ea_off ) );
7a6b42
-         ea_off += 1;
7a6b42
-
7a6b42
-         assign( byte[i], binop( Iop_Shl64,
7a6b42
-                                 unop( Iop_8Uto64,
7a6b42
-                                       load( Ity_I8, irx_addr ) ),
7a6b42
-                                 mkU8( 8 * ( 7 - i ) ) ) );
7a6b42
+      IRTemp high = newTemp(Ity_I64);
7a6b42
+      IRTemp high_rev = newTemp(Ity_I64);
7a6b42
+      IRTemp low = newTemp(Ity_I64);
7a6b42
+      IRTemp low_rev = newTemp(Ity_I64);
7a6b42
 
7a6b42
-         assign( tmp_low[i+1],
7a6b42
-                 binop( Iop_Or64,
7a6b42
-                        mkexpr( byte[i] ), mkexpr( tmp_low[i] ) ) );
7a6b42
-      }
7a6b42
+      IRExpr *t128 = load( Ity_V128, mkexpr( EA ) );
7a6b42
 
7a6b42
-      for ( i = 0; i < 8; i++ ) {
7a6b42
-         byte[i + 8] = newTemp( Ity_I64 );
7a6b42
-         tmp_hi[i+1] = newTemp( Ity_I64 );
7a6b42
+      assign( high, unop(Iop_V128HIto64, t128) );
7a6b42
+      assign( high_rev, unop(Iop_Reverse8sIn64_x1, mkexpr(high)) );
7a6b42
+      assign( low, unop(Iop_V128to64, t128) );
7a6b42
+      assign( low_rev, unop(Iop_Reverse8sIn64_x1, mkexpr(low)) );
7a6b42
 
7a6b42
-         irx_addr = binop( mkSzOp( ty, Iop_Add8 ), mkexpr( EA ),
7a6b42
-                           ty == Ity_I64 ? mkU64( ea_off ) : mkU32( ea_off ) );
7a6b42
-         ea_off += 1;
7a6b42
+      if (host_endness == VexEndnessLE)
7a6b42
+         t128 = binop( Iop_64HLtoV128, mkexpr (low_rev), mkexpr (high_rev) );
7a6b42
+      else
7a6b42
+         t128 = binop( Iop_64HLtoV128, mkexpr (high_rev), mkexpr (low_rev) );
7a6b42
 
7a6b42
-         assign( byte[i+8], binop( Iop_Shl64,
7a6b42
-                                   unop( Iop_8Uto64,
7a6b42
-                                         load( Ity_I8, irx_addr ) ),
7a6b42
-                                   mkU8( 8 * ( 7 - i ) ) ) );
7a6b42
-         assign( tmp_hi[i+1], binop( Iop_Or64,
7a6b42
-                                     mkexpr( byte[i+8] ),
7a6b42
-                                     mkexpr( tmp_hi[i] ) ) );
7a6b42
-      }
7a6b42
-      putVSReg( XT, binop( Iop_64HLtoV128,
7a6b42
-                           mkexpr( tmp_low[8] ), mkexpr( tmp_hi[8] ) ) );
7a6b42
+      putVSReg( XT, t128 );
7a6b42
       break;
7a6b42
    }
7a6b42