Mark Wielaard b3eda9
commit e221eca26be6b2396e3fcbf4117e630fc22e79f6
Mark Wielaard b3eda9
Author: Julian Seward <jseward@acm.org>
Mark Wielaard b3eda9
Date:   Tue Nov 20 11:28:42 2018 +0100
Mark Wielaard b3eda9
Mark Wielaard b3eda9
    Add Memcheck support for IROps added in 42719898.
Mark Wielaard b3eda9
    
Mark Wielaard b3eda9
    memcheck/mc_translate.c:
Mark Wielaard b3eda9
    
Mark Wielaard b3eda9
    Add mkRight{32,64} as right-travelling analogues to mkLeft{32,64}.
Mark Wielaard b3eda9
    
Mark Wielaard b3eda9
    doCmpORD: for the cases of a signed comparison against zero, compute
Mark Wielaard b3eda9
    definedness of the 3 result bits (lt,gt,eq) separately, and, for the lt and eq
Mark Wielaard b3eda9
    bits, do it exactly accurately.
Mark Wielaard b3eda9
    
Mark Wielaard b3eda9
    expensiveCountTrailingZeroes: no functional change.  Re-analyse/verify and add
Mark Wielaard b3eda9
    comments.
Mark Wielaard b3eda9
    
Mark Wielaard b3eda9
    expensiveCountLeadingZeroes: add.  Very similar to
Mark Wielaard b3eda9
    expensiveCountTrailingZeroes.
Mark Wielaard b3eda9
    
Mark Wielaard b3eda9
    Add some comments to mark unary ops which are self-shadowing.
Mark Wielaard b3eda9
    
Mark Wielaard b3eda9
    Route Iop_Ctz{,Nat}{32,64} through expensiveCountTrailingZeroes.
Mark Wielaard b3eda9
    Route Iop_Clz{,Nat}{32,64} through expensiveCountLeadingZeroes.
Mark Wielaard b3eda9
    
Mark Wielaard b3eda9
    Add instrumentation for Iop_PopCount{32,64} and Iop_Reverse8sIn32_x1.
Mark Wielaard b3eda9
    
Mark Wielaard b3eda9
    memcheck/tests/vbit-test/irops.c
Mark Wielaard b3eda9
    
Mark Wielaard b3eda9
    Add dummy new entries for all new IROps, just enough to make it compile and
Mark Wielaard b3eda9
    run.
Mark Wielaard b3eda9
Mark Wielaard b3eda9
diff --git a/memcheck/mc_translate.c b/memcheck/mc_translate.c
Mark Wielaard b3eda9
index 68a2ab3..c24db91 100644
Mark Wielaard b3eda9
--- a/memcheck/mc_translate.c
Mark Wielaard b3eda9
+++ b/memcheck/mc_translate.c
Mark Wielaard b3eda9
@@ -737,6 +737,34 @@ static IRAtom* mkLeft64 ( MCEnv* mce, IRAtom* a1 ) {
Mark Wielaard b3eda9
    return assignNew('V', mce, Ity_I64, unop(Iop_Left64, a1));
Mark Wielaard b3eda9
 }
Mark Wielaard b3eda9
 
Mark Wielaard b3eda9
+/* --------- The Right-family of operations. --------- */
Mark Wielaard b3eda9
+
Mark Wielaard b3eda9
+/* Unfortunately these are a lot more expensive then their Left
Mark Wielaard b3eda9
+   counterparts.  Fortunately they are only very rarely used -- only for
Mark Wielaard b3eda9
+   count-leading-zeroes instrumentation. */
Mark Wielaard b3eda9
+
Mark Wielaard b3eda9
+static IRAtom* mkRight32 ( MCEnv* mce, IRAtom* a1 )
Mark Wielaard b3eda9
+{
Mark Wielaard b3eda9
+   for (Int i = 1; i <= 16; i *= 2) {
Mark Wielaard b3eda9
+      // a1 |= (a1 >>u i)
Mark Wielaard b3eda9
+      IRAtom* tmp
Mark Wielaard b3eda9
+         = assignNew('V', mce, Ity_I32, binop(Iop_Shr32, a1, mkU8(i)));
Mark Wielaard b3eda9
+      a1 = assignNew('V', mce, Ity_I32, binop(Iop_Or32, a1, tmp));
Mark Wielaard b3eda9
+   }
Mark Wielaard b3eda9
+   return a1;
Mark Wielaard b3eda9
+}
Mark Wielaard b3eda9
+
Mark Wielaard b3eda9
+static IRAtom* mkRight64 ( MCEnv* mce, IRAtom* a1 )
Mark Wielaard b3eda9
+{
Mark Wielaard b3eda9
+   for (Int i = 1; i <= 32; i *= 2) {
Mark Wielaard b3eda9
+      // a1 |= (a1 >>u i)
Mark Wielaard b3eda9
+      IRAtom* tmp
Mark Wielaard b3eda9
+         = assignNew('V', mce, Ity_I64, binop(Iop_Shr64, a1, mkU8(i)));
Mark Wielaard b3eda9
+      a1 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, a1, tmp));
Mark Wielaard b3eda9
+   }
Mark Wielaard b3eda9
+   return a1;
Mark Wielaard b3eda9
+}
Mark Wielaard b3eda9
+
Mark Wielaard b3eda9
 /* --------- 'Improvement' functions for AND/OR. --------- */
Mark Wielaard b3eda9
 
Mark Wielaard b3eda9
 /* ImproveAND(data, vbits) = data OR vbits.  Defined (0) data 0s give
Mark Wielaard b3eda9
@@ -1280,20 +1308,18 @@ static IRAtom* doCmpORD ( MCEnv*  mce,
Mark Wielaard b3eda9
                           IRAtom* xxhash, IRAtom* yyhash, 
Mark Wielaard b3eda9
                           IRAtom* xx,     IRAtom* yy )
Mark Wielaard b3eda9
 {
Mark Wielaard b3eda9
-   Bool   m64    = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD64U;
Mark Wielaard b3eda9
-   Bool   syned  = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD32S;
Mark Wielaard b3eda9
-   IROp   opOR   = m64 ? Iop_Or64  : Iop_Or32;
Mark Wielaard b3eda9
-   IROp   opAND  = m64 ? Iop_And64 : Iop_And32;
Mark Wielaard b3eda9
-   IROp   opSHL  = m64 ? Iop_Shl64 : Iop_Shl32;
Mark Wielaard b3eda9
-   IROp   opSHR  = m64 ? Iop_Shr64 : Iop_Shr32;
Mark Wielaard b3eda9
-   IRType ty     = m64 ? Ity_I64   : Ity_I32;
Mark Wielaard b3eda9
-   Int    width  = m64 ? 64        : 32;
Mark Wielaard b3eda9
+   Bool   m64      = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD64U;
Mark Wielaard b3eda9
+   Bool   syned    = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD32S;
Mark Wielaard b3eda9
+   IROp   opOR     = m64 ? Iop_Or64   : Iop_Or32;
Mark Wielaard b3eda9
+   IROp   opAND    = m64 ? Iop_And64  : Iop_And32;
Mark Wielaard b3eda9
+   IROp   opSHL    = m64 ? Iop_Shl64  : Iop_Shl32;
Mark Wielaard b3eda9
+   IROp   opSHR    = m64 ? Iop_Shr64  : Iop_Shr32;
Mark Wielaard b3eda9
+   IROp   op1UtoWS = m64 ? Iop_1Uto64 : Iop_1Uto32;
Mark Wielaard b3eda9
+   IRType ty       = m64 ? Ity_I64    : Ity_I32;
Mark Wielaard b3eda9
+   Int    width    = m64 ? 64         : 32;
Mark Wielaard b3eda9
 
Mark Wielaard b3eda9
    Bool (*isZero)(IRAtom*) = m64 ? isZeroU64 : isZeroU32;
Mark Wielaard b3eda9
 
Mark Wielaard b3eda9
-   IRAtom* threeLeft1 = NULL;
Mark Wielaard b3eda9
-   IRAtom* sevenLeft1 = NULL;
Mark Wielaard b3eda9
-
Mark Wielaard b3eda9
    tl_assert(isShadowAtom(mce,xxhash));
Mark Wielaard b3eda9
    tl_assert(isShadowAtom(mce,yyhash));
Mark Wielaard b3eda9
    tl_assert(isOriginalAtom(mce,xx));
Mark Wielaard b3eda9
@@ -1312,30 +1338,55 @@ static IRAtom* doCmpORD ( MCEnv*  mce,
Mark Wielaard b3eda9
       /* fancy interpretation */
Mark Wielaard b3eda9
       /* if yy is zero, then it must be fully defined (zero#). */
Mark Wielaard b3eda9
       tl_assert(isZero(yyhash));
Mark Wielaard b3eda9
-      threeLeft1 = m64 ? mkU64(3<<1) : mkU32(3<<1);
Mark Wielaard b3eda9
+      // This is still inaccurate, but I don't think it matters, since
Mark Wielaard b3eda9
+      // nobody writes code of the form
Mark Wielaard b3eda9
+      // "is <partially-undefined-value> signedly greater than zero?".
Mark Wielaard b3eda9
+      // We therefore simply declare "x >s 0" to be undefined if any bit in
Mark Wielaard b3eda9
+      // x is undefined.  That's clearly suboptimal in some cases.  Eg, if
Mark Wielaard b3eda9
+      // the highest order bit is a defined 1 then x is negative so it
Mark Wielaard b3eda9
+      // doesn't matter whether the remaining bits are defined or not.
Mark Wielaard b3eda9
+      IRAtom* t_0_gt_0_0
Mark Wielaard b3eda9
+         = assignNew(
Mark Wielaard b3eda9
+              'V', mce,ty,
Mark Wielaard b3eda9
+              binop(
Mark Wielaard b3eda9
+                 opAND,
Mark Wielaard b3eda9
+                 mkPCastTo(mce,ty, xxhash),
Mark Wielaard b3eda9
+                 m64 ? mkU64(1<<2) : mkU32(1<<2)
Mark Wielaard b3eda9
+              ));
Mark Wielaard b3eda9
+      // For "x 
Mark Wielaard b3eda9
+      // and we have a precise result.
Mark Wielaard b3eda9
+      IRAtom* t_lt_0_0_0
Mark Wielaard b3eda9
+         = assignNew(
Mark Wielaard b3eda9
+              'V', mce,ty,
Mark Wielaard b3eda9
+              binop(
Mark Wielaard b3eda9
+                 opSHL,
Mark Wielaard b3eda9
+                 assignNew(
Mark Wielaard b3eda9
+                    'V', mce,ty,
Mark Wielaard b3eda9
+                    binop(opSHR, xxhash, mkU8(width-1))),
Mark Wielaard b3eda9
+                 mkU8(3)
Mark Wielaard b3eda9
+              ));
Mark Wielaard b3eda9
+      // For "x == 0" we can hand the problem off to expensiveCmpEQorNE.
Mark Wielaard b3eda9
+      IRAtom* t_0_0_eq_0
Mark Wielaard b3eda9
+         = assignNew(
Mark Wielaard b3eda9
+              'V', mce,ty,
Mark Wielaard b3eda9
+              binop(
Mark Wielaard b3eda9
+                 opSHL,
Mark Wielaard b3eda9
+                 assignNew('V', mce,ty,
Mark Wielaard b3eda9
+                    unop(
Mark Wielaard b3eda9
+                    op1UtoWS,
Mark Wielaard b3eda9
+                    expensiveCmpEQorNE(mce, ty, xxhash, yyhash, xx, yy))
Mark Wielaard b3eda9
+                 ),
Mark Wielaard b3eda9
+                 mkU8(1)
Mark Wielaard b3eda9
+              ));
Mark Wielaard b3eda9
       return
Mark Wielaard b3eda9
          binop(
Mark Wielaard b3eda9
             opOR,
Mark Wielaard b3eda9
-            assignNew(
Mark Wielaard b3eda9
-               'V', mce,ty,
Mark Wielaard b3eda9
-               binop(
Mark Wielaard b3eda9
-                  opAND,
Mark Wielaard b3eda9
-                  mkPCastTo(mce,ty, xxhash), 
Mark Wielaard b3eda9
-                  threeLeft1
Mark Wielaard b3eda9
-               )),
Mark Wielaard b3eda9
-            assignNew(
Mark Wielaard b3eda9
-               'V', mce,ty,
Mark Wielaard b3eda9
-               binop(
Mark Wielaard b3eda9
-                  opSHL,
Mark Wielaard b3eda9
-                  assignNew(
Mark Wielaard b3eda9
-                     'V', mce,ty,
Mark Wielaard b3eda9
-                     binop(opSHR, xxhash, mkU8(width-1))),
Mark Wielaard b3eda9
-                  mkU8(3)
Mark Wielaard b3eda9
-               ))
Mark Wielaard b3eda9
-	 );
Mark Wielaard b3eda9
+            assignNew('V', mce,ty, binop(opOR, t_lt_0_0_0, t_0_gt_0_0)),
Mark Wielaard b3eda9
+            t_0_0_eq_0
Mark Wielaard b3eda9
+         );
Mark Wielaard b3eda9
    } else {
Mark Wielaard b3eda9
       /* standard interpretation */
Mark Wielaard b3eda9
-      sevenLeft1 = m64 ? mkU64(7<<1) : mkU32(7<<1);
Mark Wielaard b3eda9
+      IRAtom* sevenLeft1 = m64 ? mkU64(7<<1) : mkU32(7<<1);
Mark Wielaard b3eda9
       return 
Mark Wielaard b3eda9
          binop( 
Mark Wielaard b3eda9
             opAND, 
Mark Wielaard b3eda9
@@ -2211,14 +2262,14 @@ IRAtom* expensiveCountTrailingZeroes ( MCEnv* mce, IROp czop,
Mark Wielaard b3eda9
    tl_assert(sameKindedAtoms(atom,vatom));
Mark Wielaard b3eda9
 
Mark Wielaard b3eda9
    switch (czop) {
Mark Wielaard b3eda9
-      case Iop_Ctz32:
Mark Wielaard b3eda9
+      case Iop_Ctz32: case Iop_CtzNat32:
Mark Wielaard b3eda9
          ty = Ity_I32;
Mark Wielaard b3eda9
          xorOp = Iop_Xor32;
Mark Wielaard b3eda9
          subOp = Iop_Sub32;
Mark Wielaard b3eda9
          andOp = Iop_And32;
Mark Wielaard b3eda9
          one = mkU32(1);
Mark Wielaard b3eda9
          break;
Mark Wielaard b3eda9
-      case Iop_Ctz64:
Mark Wielaard b3eda9
+      case Iop_Ctz64: case Iop_CtzNat64:
Mark Wielaard b3eda9
          ty = Ity_I64;
Mark Wielaard b3eda9
          xorOp = Iop_Xor64;
Mark Wielaard b3eda9
          subOp = Iop_Sub64;
Mark Wielaard b3eda9
@@ -2232,8 +2283,30 @@ IRAtom* expensiveCountTrailingZeroes ( MCEnv* mce, IROp czop,
Mark Wielaard b3eda9
 
Mark Wielaard b3eda9
    // improver = atom ^ (atom - 1)
Mark Wielaard b3eda9
    //
Mark Wielaard b3eda9
-   // That is, improver has its low ctz(atom) bits equal to one;
Mark Wielaard b3eda9
-   // higher bits (if any) equal to zero.
Mark Wielaard b3eda9
+   // That is, improver has its low ctz(atom)+1 bits equal to one;
Mark Wielaard b3eda9
+   // higher bits (if any) equal to zero.  So it's exactly the right
Mark Wielaard b3eda9
+   // mask to use to remove the irrelevant undefined input bits.
Mark Wielaard b3eda9
+   /* Here are some examples:
Mark Wielaard b3eda9
+         atom   = U...U 1 0...0
Mark Wielaard b3eda9
+         atom-1 = U...U 0 1...1
Mark Wielaard b3eda9
+         ^ed    = 0...0 1 11111, which correctly describes which bits of |atom|
Mark Wielaard b3eda9
+                                 actually influence the result
Mark Wielaard b3eda9
+      A boundary case
Mark Wielaard b3eda9
+         atom   = 0...0
Mark Wielaard b3eda9
+         atom-1 = 1...1
Mark Wielaard b3eda9
+         ^ed    = 11111, also a correct mask for the input: all input bits
Mark Wielaard b3eda9
+                         are relevant
Mark Wielaard b3eda9
+      Another boundary case
Mark Wielaard b3eda9
+         atom   = 1..1 1
Mark Wielaard b3eda9
+         atom-1 = 1..1 0
Mark Wielaard b3eda9
+         ^ed    = 0..0 1, also a correct mask: only the rightmost input bit
Mark Wielaard b3eda9
+                          is relevant
Mark Wielaard b3eda9
+      Now with misc U bits interspersed:
Mark Wielaard b3eda9
+         atom   = U...U 1 0 U...U 0 1 0...0
Mark Wielaard b3eda9
+         atom-1 = U...U 1 0 U...U 0 0 1...1
Mark Wielaard b3eda9
+         ^ed    = 0...0 0 0 0...0 0 1 1...1, also correct
Mark Wielaard b3eda9
+      (Per re-check/analysis of 14 Nov 2018)
Mark Wielaard b3eda9
+   */
Mark Wielaard b3eda9
    improver = assignNew('V', mce,ty,
Mark Wielaard b3eda9
                         binop(xorOp,
Mark Wielaard b3eda9
                               atom,
Mark Wielaard b3eda9
@@ -2242,8 +2315,96 @@ IRAtom* expensiveCountTrailingZeroes ( MCEnv* mce, IROp czop,
Mark Wielaard b3eda9
 
Mark Wielaard b3eda9
    // improved = vatom & improver
Mark Wielaard b3eda9
    //
Mark Wielaard b3eda9
-   // That is, treat any V bits above the first ctz(atom) bits as
Mark Wielaard b3eda9
-   // "defined".
Mark Wielaard b3eda9
+   // That is, treat any V bits to the left of the rightmost ctz(atom)+1
Mark Wielaard b3eda9
+   // bits as "defined".
Mark Wielaard b3eda9
+   improved = assignNew('V', mce, ty,
Mark Wielaard b3eda9
+                        binop(andOp, vatom, improver));
Mark Wielaard b3eda9
+
Mark Wielaard b3eda9
+   // Return pessimizing cast of improved.
Mark Wielaard b3eda9
+   return mkPCastTo(mce, ty, improved);
Mark Wielaard b3eda9
+}
Mark Wielaard b3eda9
+
Mark Wielaard b3eda9
+static
Mark Wielaard b3eda9
+IRAtom* expensiveCountLeadingZeroes ( MCEnv* mce, IROp czop,
Mark Wielaard b3eda9
+                                      IRAtom* atom, IRAtom* vatom )
Mark Wielaard b3eda9
+{
Mark Wielaard b3eda9
+   IRType ty;
Mark Wielaard b3eda9
+   IROp shrOp, notOp, andOp;
Mark Wielaard b3eda9
+   IRAtom* (*mkRight)(MCEnv*, IRAtom*);
Mark Wielaard b3eda9
+   IRAtom *improver, *improved;
Mark Wielaard b3eda9
+   tl_assert(isShadowAtom(mce,vatom));
Mark Wielaard b3eda9
+   tl_assert(isOriginalAtom(mce,atom));
Mark Wielaard b3eda9
+   tl_assert(sameKindedAtoms(atom,vatom));
Mark Wielaard b3eda9
+
Mark Wielaard b3eda9
+   switch (czop) {
Mark Wielaard b3eda9
+      case Iop_Clz32: case Iop_ClzNat32:
Mark Wielaard b3eda9
+         ty = Ity_I32;
Mark Wielaard b3eda9
+         shrOp = Iop_Shr32;
Mark Wielaard b3eda9
+         notOp = Iop_Not32;
Mark Wielaard b3eda9
+         andOp = Iop_And32;
Mark Wielaard b3eda9
+         mkRight = mkRight32;
Mark Wielaard b3eda9
+         break;
Mark Wielaard b3eda9
+      case Iop_Clz64: case Iop_ClzNat64:
Mark Wielaard b3eda9
+         ty = Ity_I64;
Mark Wielaard b3eda9
+         shrOp = Iop_Shr64;
Mark Wielaard b3eda9
+         notOp = Iop_Not64;
Mark Wielaard b3eda9
+         andOp = Iop_And64;
Mark Wielaard b3eda9
+         mkRight = mkRight64;
Mark Wielaard b3eda9
+         break;
Mark Wielaard b3eda9
+      default:
Mark Wielaard b3eda9
+         ppIROp(czop);
Mark Wielaard b3eda9
+         VG_(tool_panic)("memcheck:expensiveCountLeadingZeroes");
Mark Wielaard b3eda9
+   }
Mark Wielaard b3eda9
+
Mark Wielaard b3eda9
+   // This is in principle very similar to how expensiveCountTrailingZeroes
Mark Wielaard b3eda9
+   // works.  That function computed an "improver", which it used to mask
Mark Wielaard b3eda9
+   // off all but the rightmost 1-bit and the zeroes to the right of it,
Mark Wielaard b3eda9
+   // hence removing irrelevant bits from the input.  Here, we play the
Mark Wielaard b3eda9
+   // exact same game but with the left-vs-right roles interchanged.
Mark Wielaard b3eda9
+   // Unfortunately calculation of the improver in this case is
Mark Wielaard b3eda9
+   // significantly more expensive.
Mark Wielaard b3eda9
+   //
Mark Wielaard b3eda9
+   // improver = ~(RIGHT(atom) >>u 1)
Mark Wielaard b3eda9
+   //
Mark Wielaard b3eda9
+   // That is, improver has its upper clz(atom)+1 bits equal to one;
Mark Wielaard b3eda9
+   // lower bits (if any) equal to zero.  So it's exactly the right
Mark Wielaard b3eda9
+   // mask to use to remove the irrelevant undefined input bits.
Mark Wielaard b3eda9
+   /* Here are some examples:
Mark Wielaard b3eda9
+         atom             = 0...0 1 U...U
Mark Wielaard b3eda9
+         R(atom)          = 0...0 1 1...1
Mark Wielaard b3eda9
+         R(atom) >>u 1    = 0...0 0 1...1
Mark Wielaard b3eda9
+         ~(R(atom) >>u 1) = 1...1 1 0...0
Mark Wielaard b3eda9
+                            which correctly describes which bits of |atom|
Mark Wielaard b3eda9
+                            actually influence the result
Mark Wielaard b3eda9
+      A boundary case
Mark Wielaard b3eda9
+         atom             = 0...0
Mark Wielaard b3eda9
+         R(atom)          = 0...0
Mark Wielaard b3eda9
+         R(atom) >>u 1    = 0...0
Mark Wielaard b3eda9
+         ~(R(atom) >>u 1) = 1...1
Mark Wielaard b3eda9
+                            also a correct mask for the input: all input bits
Mark Wielaard b3eda9
+                            are relevant
Mark Wielaard b3eda9
+      Another boundary case
Mark Wielaard b3eda9
+         atom             = 1 1..1
Mark Wielaard b3eda9
+         R(atom)          = 1 1..1
Mark Wielaard b3eda9
+         R(atom) >>u 1    = 0 1..1
Mark Wielaard b3eda9
+         ~(R(atom) >>u 1) = 1 0..0
Mark Wielaard b3eda9
+                            also a correct mask: only the leftmost input bit
Mark Wielaard b3eda9
+                            is relevant
Mark Wielaard b3eda9
+      Now with misc U bits interspersed:
Mark Wielaard b3eda9
+         atom             = 0...0 1 U...U 0 1 U...U
Mark Wielaard b3eda9
+         R(atom)          = 0...0 1 1...1 1 1 1...1
Mark Wielaard b3eda9
+         R(atom) >>u 1    = 0...0 0 1...1 1 1 1...1
Mark Wielaard b3eda9
+         ~(R(atom) >>u 1) = 1...1 1 0...0 0 0 0...0, also correct
Mark Wielaard b3eda9
+      (Per initial implementation of 15 Nov 2018)
Mark Wielaard b3eda9
+   */
Mark Wielaard b3eda9
+   improver = mkRight(mce, atom);
Mark Wielaard b3eda9
+   improver = assignNew('V', mce, ty, binop(shrOp, improver, mkU8(1)));
Mark Wielaard b3eda9
+   improver = assignNew('V', mce, ty, unop(notOp, improver));
Mark Wielaard b3eda9
+
Mark Wielaard b3eda9
+   // improved = vatom & improver
Mark Wielaard b3eda9
+   //
Mark Wielaard b3eda9
+   // That is, treat any V bits to the right of the leftmost clz(atom)+1
Mark Wielaard b3eda9
+   // bits as "defined".
Mark Wielaard b3eda9
    improved = assignNew('V', mce, ty,
Mark Wielaard b3eda9
                         binop(andOp, vatom, improver));
Mark Wielaard b3eda9
 
Mark Wielaard b3eda9
@@ -4705,6 +4866,7 @@ IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
Mark Wielaard b3eda9
       case Iop_RecipEst32F0x4:
Mark Wielaard b3eda9
          return unary32F0x4(mce, vatom);
Mark Wielaard b3eda9
 
Mark Wielaard b3eda9
+      // These are self-shadowing.
Mark Wielaard b3eda9
       case Iop_32UtoV128:
Mark Wielaard b3eda9
       case Iop_64UtoV128:
Mark Wielaard b3eda9
       case Iop_Dup8x16:
Mark Wielaard b3eda9
@@ -4745,6 +4907,7 @@ IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
Mark Wielaard b3eda9
       case Iop_MulI128by10Carry:
Mark Wielaard b3eda9
       case Iop_F16toF64x2:
Mark Wielaard b3eda9
       case Iop_F64toF16x2:
Mark Wielaard b3eda9
+         // FIXME JRS 2018-Nov-15.  This is surely not correct!
Mark Wielaard b3eda9
          return vatom;
Mark Wielaard b3eda9
 
Mark Wielaard b3eda9
       case Iop_I32StoF128: /* signed I32 -> F128 */
Mark Wielaard b3eda9
@@ -4770,7 +4933,6 @@ IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
Mark Wielaard b3eda9
       case Iop_RoundF64toF64_NegINF:
Mark Wielaard b3eda9
       case Iop_RoundF64toF64_PosINF:
Mark Wielaard b3eda9
       case Iop_RoundF64toF64_ZERO:
Mark Wielaard b3eda9
-      case Iop_Clz64:
Mark Wielaard b3eda9
       case Iop_D32toD64:
Mark Wielaard b3eda9
       case Iop_I32StoD64:
Mark Wielaard b3eda9
       case Iop_I32UtoD64:
Mark Wielaard b3eda9
@@ -4785,17 +4947,32 @@ IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
Mark Wielaard b3eda9
       case Iop_D64toD128:
Mark Wielaard b3eda9
          return mkPCastTo(mce, Ity_I128, vatom);
Mark Wielaard b3eda9
 
Mark Wielaard b3eda9
-      case Iop_Clz32:
Mark Wielaard b3eda9
       case Iop_TruncF64asF32:
Mark Wielaard b3eda9
       case Iop_NegF32:
Mark Wielaard b3eda9
       case Iop_AbsF32:
Mark Wielaard b3eda9
       case Iop_F16toF32: 
Mark Wielaard b3eda9
          return mkPCastTo(mce, Ity_I32, vatom);
Mark Wielaard b3eda9
 
Mark Wielaard b3eda9
-      case Iop_Ctz32:
Mark Wielaard b3eda9
-      case Iop_Ctz64:
Mark Wielaard b3eda9
+      case Iop_Ctz32: case Iop_CtzNat32:
Mark Wielaard b3eda9
+      case Iop_Ctz64: case Iop_CtzNat64:
Mark Wielaard b3eda9
          return expensiveCountTrailingZeroes(mce, op, atom, vatom);
Mark Wielaard b3eda9
 
Mark Wielaard b3eda9
+      case Iop_Clz32: case Iop_ClzNat32:
Mark Wielaard b3eda9
+      case Iop_Clz64: case Iop_ClzNat64:
Mark Wielaard b3eda9
+         return expensiveCountLeadingZeroes(mce, op, atom, vatom);
Mark Wielaard b3eda9
+
Mark Wielaard b3eda9
+      // PopCount32: this is slightly pessimistic.  It is true that the
Mark Wielaard b3eda9
+      // result depends on all input bits, so that aspect of the PCast is
Mark Wielaard b3eda9
+      // correct.  However, regardless of the input, only the lowest 5 bits
Mark Wielaard b3eda9
+      // out of the output can ever be undefined.  So we could actually
Mark Wielaard b3eda9
+      // "improve" the results here by marking the top 27 bits of output as
Mark Wielaard b3eda9
+      // defined.  A similar comment applies for PopCount64.
Mark Wielaard b3eda9
+      case Iop_PopCount32:
Mark Wielaard b3eda9
+         return mkPCastTo(mce, Ity_I32, vatom);
Mark Wielaard b3eda9
+      case Iop_PopCount64:
Mark Wielaard b3eda9
+         return mkPCastTo(mce, Ity_I64, vatom);
Mark Wielaard b3eda9
+
Mark Wielaard b3eda9
+      // These are self-shadowing.
Mark Wielaard b3eda9
       case Iop_1Uto64:
Mark Wielaard b3eda9
       case Iop_1Sto64:
Mark Wielaard b3eda9
       case Iop_8Uto64:
Mark Wielaard b3eda9
@@ -4821,6 +4998,7 @@ IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
Mark Wielaard b3eda9
       case Iop_V256to64_2: case Iop_V256to64_3:
Mark Wielaard b3eda9
          return assignNew('V', mce, Ity_I64, unop(op, vatom));
Mark Wielaard b3eda9
 
Mark Wielaard b3eda9
+      // These are self-shadowing.
Mark Wielaard b3eda9
       case Iop_64to32:
Mark Wielaard b3eda9
       case Iop_64HIto32:
Mark Wielaard b3eda9
       case Iop_1Uto32:
Mark Wielaard b3eda9
@@ -4830,8 +5008,10 @@ IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
Mark Wielaard b3eda9
       case Iop_16Sto32:
Mark Wielaard b3eda9
       case Iop_8Sto32:
Mark Wielaard b3eda9
       case Iop_V128to32:
Mark Wielaard b3eda9
+      case Iop_Reverse8sIn32_x1:
Mark Wielaard b3eda9
          return assignNew('V', mce, Ity_I32, unop(op, vatom));
Mark Wielaard b3eda9
 
Mark Wielaard b3eda9
+      // These are self-shadowing.
Mark Wielaard b3eda9
       case Iop_8Sto16:
Mark Wielaard b3eda9
       case Iop_8Uto16:
Mark Wielaard b3eda9
       case Iop_32to16:
Mark Wielaard b3eda9
@@ -4840,6 +5020,7 @@ IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
Mark Wielaard b3eda9
       case Iop_GetMSBs8x16:
Mark Wielaard b3eda9
          return assignNew('V', mce, Ity_I16, unop(op, vatom));
Mark Wielaard b3eda9
 
Mark Wielaard b3eda9
+      // These are self-shadowing.
Mark Wielaard b3eda9
       case Iop_1Uto8:
Mark Wielaard b3eda9
       case Iop_1Sto8:
Mark Wielaard b3eda9
       case Iop_16to8:
Mark Wielaard b3eda9
@@ -4868,6 +5049,7 @@ IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
Mark Wielaard b3eda9
       case Iop_Not16:
Mark Wielaard b3eda9
       case Iop_Not8:
Mark Wielaard b3eda9
       case Iop_Not1:
Mark Wielaard b3eda9
+         // FIXME JRS 2018-Nov-15.  This is surely not correct!
Mark Wielaard b3eda9
          return vatom;
Mark Wielaard b3eda9
 
Mark Wielaard b3eda9
       case Iop_CmpNEZ8x8:
Mark Wielaard b3eda9
@@ -4929,6 +5111,7 @@ IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
Mark Wielaard b3eda9
       case Iop_Ctz64x2:
Mark Wielaard b3eda9
          return mkPCast64x2(mce, vatom);
Mark Wielaard b3eda9
 
Mark Wielaard b3eda9
+      // This is self-shadowing.
Mark Wielaard b3eda9
       case Iop_PwBitMtxXpose64x2:
Mark Wielaard b3eda9
          return assignNew('V', mce, Ity_V128, unop(op, vatom));
Mark Wielaard b3eda9
 
Mark Wielaard b3eda9
diff --git a/memcheck/tests/vbit-test/irops.c b/memcheck/tests/vbit-test/irops.c
Mark Wielaard b3eda9
index bfd82fc..e8bf67d 100644
Mark Wielaard b3eda9
--- a/memcheck/tests/vbit-test/irops.c
Mark Wielaard b3eda9
+++ b/memcheck/tests/vbit-test/irops.c
Mark Wielaard b3eda9
@@ -111,6 +111,12 @@ static irop_t irops[] = {
Mark Wielaard b3eda9
   { DEFOP(Iop_Clz32,      UNDEF_ALL),  .s390x = 0, .amd64 = 0, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 =1, .mips64 = 1 },
Mark Wielaard b3eda9
   { DEFOP(Iop_Ctz64,      UNDEF_ALL),  .s390x = 0, .amd64 = 1, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 =0, .mips64 = 0 },
Mark Wielaard b3eda9
   { DEFOP(Iop_Ctz32,      UNDEF_ALL),  .s390x = 0, .amd64 = 0, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 =0, .mips64 = 0 },
Mark Wielaard b3eda9
+  { DEFOP(Iop_ClzNat64,   UNDEF_ALL),  .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 =0, .mips64 = 0 }, // ppc32 asserts
Mark Wielaard b3eda9
+  { DEFOP(Iop_ClzNat32,   UNDEF_ALL),  .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 =0, .mips64 = 0 },
Mark Wielaard b3eda9
+  { DEFOP(Iop_CtzNat64,   UNDEF_ALL),  .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 =0, .mips64 = 0 },
Mark Wielaard b3eda9
+  { DEFOP(Iop_CtzNat32,   UNDEF_ALL),  .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 1, .mips32 =0, .mips64 = 0 },
Mark Wielaard b3eda9
+  { DEFOP(Iop_PopCount64, UNDEF_ALL),  .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 =0, .mips64 = 0 },
Mark Wielaard b3eda9
+  { DEFOP(Iop_PopCount32, UNDEF_ALL),  .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 =0, .mips64 = 0 },
Mark Wielaard b3eda9
   { DEFOP(Iop_CmpLT32S,   UNDEF_ALL),  .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 =1, .mips64 = 1 },
Mark Wielaard b3eda9
   { DEFOP(Iop_CmpLT64S,   UNDEF_ALL),  .s390x = 1, .amd64 = 1, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 =0, .mips64 = 1 }, // ppc, mips assert
Mark Wielaard b3eda9
   { DEFOP(Iop_CmpLE32S,   UNDEF_ALL),  .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 =1, .mips64 = 1 },
Mark Wielaard b3eda9
@@ -336,6 +342,7 @@ static irop_t irops[] = {
Mark Wielaard b3eda9
   { DEFOP(Iop_Sad8Ux4, UNDEF_UNKNOWN), },
Mark Wielaard b3eda9
   { DEFOP(Iop_CmpNEZ16x2, UNDEF_UNKNOWN), },
Mark Wielaard b3eda9
   { DEFOP(Iop_CmpNEZ8x4, UNDEF_UNKNOWN), },
Mark Wielaard b3eda9
+  { DEFOP(Iop_Reverse8sIn32_x1, UNDEF_UNKNOWN) },
Mark Wielaard b3eda9
   /* ------------------ 64-bit SIMD FP ------------------------ */
Mark Wielaard b3eda9
   { DEFOP(Iop_I32UtoFx2, UNDEF_UNKNOWN), },
Mark Wielaard b3eda9
   { DEFOP(Iop_I32StoFx2, UNDEF_UNKNOWN), },