|
Mark Wielaard |
b3eda9 |
commit e221eca26be6b2396e3fcbf4117e630fc22e79f6
|
|
Mark Wielaard |
b3eda9 |
Author: Julian Seward <jseward@acm.org>
|
|
Mark Wielaard |
b3eda9 |
Date: Tue Nov 20 11:28:42 2018 +0100
|
|
Mark Wielaard |
b3eda9 |
|
|
Mark Wielaard |
b3eda9 |
Add Memcheck support for IROps added in 42719898.
|
|
Mark Wielaard |
b3eda9 |
|
|
Mark Wielaard |
b3eda9 |
memcheck/mc_translate.c:
|
|
Mark Wielaard |
b3eda9 |
|
|
Mark Wielaard |
b3eda9 |
Add mkRight{32,64} as right-travelling analogues to mkLeft{32,64}.
|
|
Mark Wielaard |
b3eda9 |
|
|
Mark Wielaard |
b3eda9 |
doCmpORD: for the cases of a signed comparison against zero, compute
|
|
Mark Wielaard |
b3eda9 |
definedness of the 3 result bits (lt,gt,eq) separately, and, for the lt and eq
|
|
Mark Wielaard |
b3eda9 |
bits, do it exactly accurately.
|
|
Mark Wielaard |
b3eda9 |
|
|
Mark Wielaard |
b3eda9 |
expensiveCountTrailingZeroes: no functional change. Re-analyse/verify and add
|
|
Mark Wielaard |
b3eda9 |
comments.
|
|
Mark Wielaard |
b3eda9 |
|
|
Mark Wielaard |
b3eda9 |
expensiveCountLeadingZeroes: add. Very similar to
|
|
Mark Wielaard |
b3eda9 |
expensiveCountTrailingZeroes.
|
|
Mark Wielaard |
b3eda9 |
|
|
Mark Wielaard |
b3eda9 |
Add some comments to mark unary ops which are self-shadowing.
|
|
Mark Wielaard |
b3eda9 |
|
|
Mark Wielaard |
b3eda9 |
Route Iop_Ctz{,Nat}{32,64} through expensiveCountTrailingZeroes.
|
|
Mark Wielaard |
b3eda9 |
Route Iop_Clz{,Nat}{32,64} through expensiveCountLeadingZeroes.
|
|
Mark Wielaard |
b3eda9 |
|
|
Mark Wielaard |
b3eda9 |
Add instrumentation for Iop_PopCount{32,64} and Iop_Reverse8sIn32_x1.
|
|
Mark Wielaard |
b3eda9 |
|
|
Mark Wielaard |
b3eda9 |
memcheck/tests/vbit-test/irops.c
|
|
Mark Wielaard |
b3eda9 |
|
|
Mark Wielaard |
b3eda9 |
Add dummy new entries for all new IROps, just enough to make it compile and
|
|
Mark Wielaard |
b3eda9 |
run.
|
|
Mark Wielaard |
b3eda9 |
|
|
Mark Wielaard |
b3eda9 |
diff --git a/memcheck/mc_translate.c b/memcheck/mc_translate.c
|
|
Mark Wielaard |
b3eda9 |
index 68a2ab3..c24db91 100644
|
|
Mark Wielaard |
b3eda9 |
--- a/memcheck/mc_translate.c
|
|
Mark Wielaard |
b3eda9 |
+++ b/memcheck/mc_translate.c
|
|
Mark Wielaard |
b3eda9 |
@@ -737,6 +737,34 @@ static IRAtom* mkLeft64 ( MCEnv* mce, IRAtom* a1 ) {
|
|
Mark Wielaard |
b3eda9 |
return assignNew('V', mce, Ity_I64, unop(Iop_Left64, a1));
|
|
Mark Wielaard |
b3eda9 |
}
|
|
Mark Wielaard |
b3eda9 |
|
|
Mark Wielaard |
b3eda9 |
+/* --------- The Right-family of operations. --------- */
|
|
Mark Wielaard |
b3eda9 |
+
|
|
Mark Wielaard |
b3eda9 |
+/* Unfortunately these are a lot more expensive then their Left
|
|
Mark Wielaard |
b3eda9 |
+ counterparts. Fortunately they are only very rarely used -- only for
|
|
Mark Wielaard |
b3eda9 |
+ count-leading-zeroes instrumentation. */
|
|
Mark Wielaard |
b3eda9 |
+
|
|
Mark Wielaard |
b3eda9 |
+static IRAtom* mkRight32 ( MCEnv* mce, IRAtom* a1 )
|
|
Mark Wielaard |
b3eda9 |
+{
|
|
Mark Wielaard |
b3eda9 |
+ for (Int i = 1; i <= 16; i *= 2) {
|
|
Mark Wielaard |
b3eda9 |
+ // a1 |= (a1 >>u i)
|
|
Mark Wielaard |
b3eda9 |
+ IRAtom* tmp
|
|
Mark Wielaard |
b3eda9 |
+ = assignNew('V', mce, Ity_I32, binop(Iop_Shr32, a1, mkU8(i)));
|
|
Mark Wielaard |
b3eda9 |
+ a1 = assignNew('V', mce, Ity_I32, binop(Iop_Or32, a1, tmp));
|
|
Mark Wielaard |
b3eda9 |
+ }
|
|
Mark Wielaard |
b3eda9 |
+ return a1;
|
|
Mark Wielaard |
b3eda9 |
+}
|
|
Mark Wielaard |
b3eda9 |
+
|
|
Mark Wielaard |
b3eda9 |
+static IRAtom* mkRight64 ( MCEnv* mce, IRAtom* a1 )
|
|
Mark Wielaard |
b3eda9 |
+{
|
|
Mark Wielaard |
b3eda9 |
+ for (Int i = 1; i <= 32; i *= 2) {
|
|
Mark Wielaard |
b3eda9 |
+ // a1 |= (a1 >>u i)
|
|
Mark Wielaard |
b3eda9 |
+ IRAtom* tmp
|
|
Mark Wielaard |
b3eda9 |
+ = assignNew('V', mce, Ity_I64, binop(Iop_Shr64, a1, mkU8(i)));
|
|
Mark Wielaard |
b3eda9 |
+ a1 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, a1, tmp));
|
|
Mark Wielaard |
b3eda9 |
+ }
|
|
Mark Wielaard |
b3eda9 |
+ return a1;
|
|
Mark Wielaard |
b3eda9 |
+}
|
|
Mark Wielaard |
b3eda9 |
+
|
|
Mark Wielaard |
b3eda9 |
/* --------- 'Improvement' functions for AND/OR. --------- */
|
|
Mark Wielaard |
b3eda9 |
|
|
Mark Wielaard |
b3eda9 |
/* ImproveAND(data, vbits) = data OR vbits. Defined (0) data 0s give
|
|
Mark Wielaard |
b3eda9 |
@@ -1280,20 +1308,18 @@ static IRAtom* doCmpORD ( MCEnv* mce,
|
|
Mark Wielaard |
b3eda9 |
IRAtom* xxhash, IRAtom* yyhash,
|
|
Mark Wielaard |
b3eda9 |
IRAtom* xx, IRAtom* yy )
|
|
Mark Wielaard |
b3eda9 |
{
|
|
Mark Wielaard |
b3eda9 |
- Bool m64 = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD64U;
|
|
Mark Wielaard |
b3eda9 |
- Bool syned = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD32S;
|
|
Mark Wielaard |
b3eda9 |
- IROp opOR = m64 ? Iop_Or64 : Iop_Or32;
|
|
Mark Wielaard |
b3eda9 |
- IROp opAND = m64 ? Iop_And64 : Iop_And32;
|
|
Mark Wielaard |
b3eda9 |
- IROp opSHL = m64 ? Iop_Shl64 : Iop_Shl32;
|
|
Mark Wielaard |
b3eda9 |
- IROp opSHR = m64 ? Iop_Shr64 : Iop_Shr32;
|
|
Mark Wielaard |
b3eda9 |
- IRType ty = m64 ? Ity_I64 : Ity_I32;
|
|
Mark Wielaard |
b3eda9 |
- Int width = m64 ? 64 : 32;
|
|
Mark Wielaard |
b3eda9 |
+ Bool m64 = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD64U;
|
|
Mark Wielaard |
b3eda9 |
+ Bool syned = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD32S;
|
|
Mark Wielaard |
b3eda9 |
+ IROp opOR = m64 ? Iop_Or64 : Iop_Or32;
|
|
Mark Wielaard |
b3eda9 |
+ IROp opAND = m64 ? Iop_And64 : Iop_And32;
|
|
Mark Wielaard |
b3eda9 |
+ IROp opSHL = m64 ? Iop_Shl64 : Iop_Shl32;
|
|
Mark Wielaard |
b3eda9 |
+ IROp opSHR = m64 ? Iop_Shr64 : Iop_Shr32;
|
|
Mark Wielaard |
b3eda9 |
+ IROp op1UtoWS = m64 ? Iop_1Uto64 : Iop_1Uto32;
|
|
Mark Wielaard |
b3eda9 |
+ IRType ty = m64 ? Ity_I64 : Ity_I32;
|
|
Mark Wielaard |
b3eda9 |
+ Int width = m64 ? 64 : 32;
|
|
Mark Wielaard |
b3eda9 |
|
|
Mark Wielaard |
b3eda9 |
Bool (*isZero)(IRAtom*) = m64 ? isZeroU64 : isZeroU32;
|
|
Mark Wielaard |
b3eda9 |
|
|
Mark Wielaard |
b3eda9 |
- IRAtom* threeLeft1 = NULL;
|
|
Mark Wielaard |
b3eda9 |
- IRAtom* sevenLeft1 = NULL;
|
|
Mark Wielaard |
b3eda9 |
-
|
|
Mark Wielaard |
b3eda9 |
tl_assert(isShadowAtom(mce,xxhash));
|
|
Mark Wielaard |
b3eda9 |
tl_assert(isShadowAtom(mce,yyhash));
|
|
Mark Wielaard |
b3eda9 |
tl_assert(isOriginalAtom(mce,xx));
|
|
Mark Wielaard |
b3eda9 |
@@ -1312,30 +1338,55 @@ static IRAtom* doCmpORD ( MCEnv* mce,
|
|
Mark Wielaard |
b3eda9 |
/* fancy interpretation */
|
|
Mark Wielaard |
b3eda9 |
/* if yy is zero, then it must be fully defined (zero#). */
|
|
Mark Wielaard |
b3eda9 |
tl_assert(isZero(yyhash));
|
|
Mark Wielaard |
b3eda9 |
- threeLeft1 = m64 ? mkU64(3<<1) : mkU32(3<<1);
|
|
Mark Wielaard |
b3eda9 |
+ // This is still inaccurate, but I don't think it matters, since
|
|
Mark Wielaard |
b3eda9 |
+ // nobody writes code of the form
|
|
Mark Wielaard |
b3eda9 |
+ // "is <partially-undefined-value> signedly greater than zero?".
|
|
Mark Wielaard |
b3eda9 |
+ // We therefore simply declare "x >s 0" to be undefined if any bit in
|
|
Mark Wielaard |
b3eda9 |
+ // x is undefined. That's clearly suboptimal in some cases. Eg, if
|
|
Mark Wielaard |
b3eda9 |
+ // the highest order bit is a defined 1 then x is negative so it
|
|
Mark Wielaard |
b3eda9 |
+ // doesn't matter whether the remaining bits are defined or not.
|
|
Mark Wielaard |
b3eda9 |
+ IRAtom* t_0_gt_0_0
|
|
Mark Wielaard |
b3eda9 |
+ = assignNew(
|
|
Mark Wielaard |
b3eda9 |
+ 'V', mce,ty,
|
|
Mark Wielaard |
b3eda9 |
+ binop(
|
|
Mark Wielaard |
b3eda9 |
+ opAND,
|
|
Mark Wielaard |
b3eda9 |
+ mkPCastTo(mce,ty, xxhash),
|
|
Mark Wielaard |
b3eda9 |
+ m64 ? mkU64(1<<2) : mkU32(1<<2)
|
|
Mark Wielaard |
b3eda9 |
+ ));
|
|
Mark Wielaard |
b3eda9 |
+ // For "x
|
|
Mark Wielaard |
b3eda9 |
+ // and we have a precise result.
|
|
Mark Wielaard |
b3eda9 |
+ IRAtom* t_lt_0_0_0
|
|
Mark Wielaard |
b3eda9 |
+ = assignNew(
|
|
Mark Wielaard |
b3eda9 |
+ 'V', mce,ty,
|
|
Mark Wielaard |
b3eda9 |
+ binop(
|
|
Mark Wielaard |
b3eda9 |
+ opSHL,
|
|
Mark Wielaard |
b3eda9 |
+ assignNew(
|
|
Mark Wielaard |
b3eda9 |
+ 'V', mce,ty,
|
|
Mark Wielaard |
b3eda9 |
+ binop(opSHR, xxhash, mkU8(width-1))),
|
|
Mark Wielaard |
b3eda9 |
+ mkU8(3)
|
|
Mark Wielaard |
b3eda9 |
+ ));
|
|
Mark Wielaard |
b3eda9 |
+ // For "x == 0" we can hand the problem off to expensiveCmpEQorNE.
|
|
Mark Wielaard |
b3eda9 |
+ IRAtom* t_0_0_eq_0
|
|
Mark Wielaard |
b3eda9 |
+ = assignNew(
|
|
Mark Wielaard |
b3eda9 |
+ 'V', mce,ty,
|
|
Mark Wielaard |
b3eda9 |
+ binop(
|
|
Mark Wielaard |
b3eda9 |
+ opSHL,
|
|
Mark Wielaard |
b3eda9 |
+ assignNew('V', mce,ty,
|
|
Mark Wielaard |
b3eda9 |
+ unop(
|
|
Mark Wielaard |
b3eda9 |
+ op1UtoWS,
|
|
Mark Wielaard |
b3eda9 |
+ expensiveCmpEQorNE(mce, ty, xxhash, yyhash, xx, yy))
|
|
Mark Wielaard |
b3eda9 |
+ ),
|
|
Mark Wielaard |
b3eda9 |
+ mkU8(1)
|
|
Mark Wielaard |
b3eda9 |
+ ));
|
|
Mark Wielaard |
b3eda9 |
return
|
|
Mark Wielaard |
b3eda9 |
binop(
|
|
Mark Wielaard |
b3eda9 |
opOR,
|
|
Mark Wielaard |
b3eda9 |
- assignNew(
|
|
Mark Wielaard |
b3eda9 |
- 'V', mce,ty,
|
|
Mark Wielaard |
b3eda9 |
- binop(
|
|
Mark Wielaard |
b3eda9 |
- opAND,
|
|
Mark Wielaard |
b3eda9 |
- mkPCastTo(mce,ty, xxhash),
|
|
Mark Wielaard |
b3eda9 |
- threeLeft1
|
|
Mark Wielaard |
b3eda9 |
- )),
|
|
Mark Wielaard |
b3eda9 |
- assignNew(
|
|
Mark Wielaard |
b3eda9 |
- 'V', mce,ty,
|
|
Mark Wielaard |
b3eda9 |
- binop(
|
|
Mark Wielaard |
b3eda9 |
- opSHL,
|
|
Mark Wielaard |
b3eda9 |
- assignNew(
|
|
Mark Wielaard |
b3eda9 |
- 'V', mce,ty,
|
|
Mark Wielaard |
b3eda9 |
- binop(opSHR, xxhash, mkU8(width-1))),
|
|
Mark Wielaard |
b3eda9 |
- mkU8(3)
|
|
Mark Wielaard |
b3eda9 |
- ))
|
|
Mark Wielaard |
b3eda9 |
- );
|
|
Mark Wielaard |
b3eda9 |
+ assignNew('V', mce,ty, binop(opOR, t_lt_0_0_0, t_0_gt_0_0)),
|
|
Mark Wielaard |
b3eda9 |
+ t_0_0_eq_0
|
|
Mark Wielaard |
b3eda9 |
+ );
|
|
Mark Wielaard |
b3eda9 |
} else {
|
|
Mark Wielaard |
b3eda9 |
/* standard interpretation */
|
|
Mark Wielaard |
b3eda9 |
- sevenLeft1 = m64 ? mkU64(7<<1) : mkU32(7<<1);
|
|
Mark Wielaard |
b3eda9 |
+ IRAtom* sevenLeft1 = m64 ? mkU64(7<<1) : mkU32(7<<1);
|
|
Mark Wielaard |
b3eda9 |
return
|
|
Mark Wielaard |
b3eda9 |
binop(
|
|
Mark Wielaard |
b3eda9 |
opAND,
|
|
Mark Wielaard |
b3eda9 |
@@ -2211,14 +2262,14 @@ IRAtom* expensiveCountTrailingZeroes ( MCEnv* mce, IROp czop,
|
|
Mark Wielaard |
b3eda9 |
tl_assert(sameKindedAtoms(atom,vatom));
|
|
Mark Wielaard |
b3eda9 |
|
|
Mark Wielaard |
b3eda9 |
switch (czop) {
|
|
Mark Wielaard |
b3eda9 |
- case Iop_Ctz32:
|
|
Mark Wielaard |
b3eda9 |
+ case Iop_Ctz32: case Iop_CtzNat32:
|
|
Mark Wielaard |
b3eda9 |
ty = Ity_I32;
|
|
Mark Wielaard |
b3eda9 |
xorOp = Iop_Xor32;
|
|
Mark Wielaard |
b3eda9 |
subOp = Iop_Sub32;
|
|
Mark Wielaard |
b3eda9 |
andOp = Iop_And32;
|
|
Mark Wielaard |
b3eda9 |
one = mkU32(1);
|
|
Mark Wielaard |
b3eda9 |
break;
|
|
Mark Wielaard |
b3eda9 |
- case Iop_Ctz64:
|
|
Mark Wielaard |
b3eda9 |
+ case Iop_Ctz64: case Iop_CtzNat64:
|
|
Mark Wielaard |
b3eda9 |
ty = Ity_I64;
|
|
Mark Wielaard |
b3eda9 |
xorOp = Iop_Xor64;
|
|
Mark Wielaard |
b3eda9 |
subOp = Iop_Sub64;
|
|
Mark Wielaard |
b3eda9 |
@@ -2232,8 +2283,30 @@ IRAtom* expensiveCountTrailingZeroes ( MCEnv* mce, IROp czop,
|
|
Mark Wielaard |
b3eda9 |
|
|
Mark Wielaard |
b3eda9 |
// improver = atom ^ (atom - 1)
|
|
Mark Wielaard |
b3eda9 |
//
|
|
Mark Wielaard |
b3eda9 |
- // That is, improver has its low ctz(atom) bits equal to one;
|
|
Mark Wielaard |
b3eda9 |
- // higher bits (if any) equal to zero.
|
|
Mark Wielaard |
b3eda9 |
+ // That is, improver has its low ctz(atom)+1 bits equal to one;
|
|
Mark Wielaard |
b3eda9 |
+ // higher bits (if any) equal to zero. So it's exactly the right
|
|
Mark Wielaard |
b3eda9 |
+ // mask to use to remove the irrelevant undefined input bits.
|
|
Mark Wielaard |
b3eda9 |
+ /* Here are some examples:
|
|
Mark Wielaard |
b3eda9 |
+ atom = U...U 1 0...0
|
|
Mark Wielaard |
b3eda9 |
+ atom-1 = U...U 0 1...1
|
|
Mark Wielaard |
b3eda9 |
+ ^ed = 0...0 1 11111, which correctly describes which bits of |atom|
|
|
Mark Wielaard |
b3eda9 |
+ actually influence the result
|
|
Mark Wielaard |
b3eda9 |
+ A boundary case
|
|
Mark Wielaard |
b3eda9 |
+ atom = 0...0
|
|
Mark Wielaard |
b3eda9 |
+ atom-1 = 1...1
|
|
Mark Wielaard |
b3eda9 |
+ ^ed = 11111, also a correct mask for the input: all input bits
|
|
Mark Wielaard |
b3eda9 |
+ are relevant
|
|
Mark Wielaard |
b3eda9 |
+ Another boundary case
|
|
Mark Wielaard |
b3eda9 |
+ atom = 1..1 1
|
|
Mark Wielaard |
b3eda9 |
+ atom-1 = 1..1 0
|
|
Mark Wielaard |
b3eda9 |
+ ^ed = 0..0 1, also a correct mask: only the rightmost input bit
|
|
Mark Wielaard |
b3eda9 |
+ is relevant
|
|
Mark Wielaard |
b3eda9 |
+ Now with misc U bits interspersed:
|
|
Mark Wielaard |
b3eda9 |
+ atom = U...U 1 0 U...U 0 1 0...0
|
|
Mark Wielaard |
b3eda9 |
+ atom-1 = U...U 1 0 U...U 0 0 1...1
|
|
Mark Wielaard |
b3eda9 |
+ ^ed = 0...0 0 0 0...0 0 1 1...1, also correct
|
|
Mark Wielaard |
b3eda9 |
+ (Per re-check/analysis of 14 Nov 2018)
|
|
Mark Wielaard |
b3eda9 |
+ */
|
|
Mark Wielaard |
b3eda9 |
improver = assignNew('V', mce,ty,
|
|
Mark Wielaard |
b3eda9 |
binop(xorOp,
|
|
Mark Wielaard |
b3eda9 |
atom,
|
|
Mark Wielaard |
b3eda9 |
@@ -2242,8 +2315,96 @@ IRAtom* expensiveCountTrailingZeroes ( MCEnv* mce, IROp czop,
|
|
Mark Wielaard |
b3eda9 |
|
|
Mark Wielaard |
b3eda9 |
// improved = vatom & improver
|
|
Mark Wielaard |
b3eda9 |
//
|
|
Mark Wielaard |
b3eda9 |
- // That is, treat any V bits above the first ctz(atom) bits as
|
|
Mark Wielaard |
b3eda9 |
- // "defined".
|
|
Mark Wielaard |
b3eda9 |
+ // That is, treat any V bits to the left of the rightmost ctz(atom)+1
|
|
Mark Wielaard |
b3eda9 |
+ // bits as "defined".
|
|
Mark Wielaard |
b3eda9 |
+ improved = assignNew('V', mce, ty,
|
|
Mark Wielaard |
b3eda9 |
+ binop(andOp, vatom, improver));
|
|
Mark Wielaard |
b3eda9 |
+
|
|
Mark Wielaard |
b3eda9 |
+ // Return pessimizing cast of improved.
|
|
Mark Wielaard |
b3eda9 |
+ return mkPCastTo(mce, ty, improved);
|
|
Mark Wielaard |
b3eda9 |
+}
|
|
Mark Wielaard |
b3eda9 |
+
|
|
Mark Wielaard |
b3eda9 |
+static
|
|
Mark Wielaard |
b3eda9 |
+IRAtom* expensiveCountLeadingZeroes ( MCEnv* mce, IROp czop,
|
|
Mark Wielaard |
b3eda9 |
+ IRAtom* atom, IRAtom* vatom )
|
|
Mark Wielaard |
b3eda9 |
+{
|
|
Mark Wielaard |
b3eda9 |
+ IRType ty;
|
|
Mark Wielaard |
b3eda9 |
+ IROp shrOp, notOp, andOp;
|
|
Mark Wielaard |
b3eda9 |
+ IRAtom* (*mkRight)(MCEnv*, IRAtom*);
|
|
Mark Wielaard |
b3eda9 |
+ IRAtom *improver, *improved;
|
|
Mark Wielaard |
b3eda9 |
+ tl_assert(isShadowAtom(mce,vatom));
|
|
Mark Wielaard |
b3eda9 |
+ tl_assert(isOriginalAtom(mce,atom));
|
|
Mark Wielaard |
b3eda9 |
+ tl_assert(sameKindedAtoms(atom,vatom));
|
|
Mark Wielaard |
b3eda9 |
+
|
|
Mark Wielaard |
b3eda9 |
+ switch (czop) {
|
|
Mark Wielaard |
b3eda9 |
+ case Iop_Clz32: case Iop_ClzNat32:
|
|
Mark Wielaard |
b3eda9 |
+ ty = Ity_I32;
|
|
Mark Wielaard |
b3eda9 |
+ shrOp = Iop_Shr32;
|
|
Mark Wielaard |
b3eda9 |
+ notOp = Iop_Not32;
|
|
Mark Wielaard |
b3eda9 |
+ andOp = Iop_And32;
|
|
Mark Wielaard |
b3eda9 |
+ mkRight = mkRight32;
|
|
Mark Wielaard |
b3eda9 |
+ break;
|
|
Mark Wielaard |
b3eda9 |
+ case Iop_Clz64: case Iop_ClzNat64:
|
|
Mark Wielaard |
b3eda9 |
+ ty = Ity_I64;
|
|
Mark Wielaard |
b3eda9 |
+ shrOp = Iop_Shr64;
|
|
Mark Wielaard |
b3eda9 |
+ notOp = Iop_Not64;
|
|
Mark Wielaard |
b3eda9 |
+ andOp = Iop_And64;
|
|
Mark Wielaard |
b3eda9 |
+ mkRight = mkRight64;
|
|
Mark Wielaard |
b3eda9 |
+ break;
|
|
Mark Wielaard |
b3eda9 |
+ default:
|
|
Mark Wielaard |
b3eda9 |
+ ppIROp(czop);
|
|
Mark Wielaard |
b3eda9 |
+ VG_(tool_panic)("memcheck:expensiveCountLeadingZeroes");
|
|
Mark Wielaard |
b3eda9 |
+ }
|
|
Mark Wielaard |
b3eda9 |
+
|
|
Mark Wielaard |
b3eda9 |
+ // This is in principle very similar to how expensiveCountTrailingZeroes
|
|
Mark Wielaard |
b3eda9 |
+ // works. That function computed an "improver", which it used to mask
|
|
Mark Wielaard |
b3eda9 |
+ // off all but the rightmost 1-bit and the zeroes to the right of it,
|
|
Mark Wielaard |
b3eda9 |
+ // hence removing irrelevant bits from the input. Here, we play the
|
|
Mark Wielaard |
b3eda9 |
+ // exact same game but with the left-vs-right roles interchanged.
|
|
Mark Wielaard |
b3eda9 |
+ // Unfortunately calculation of the improver in this case is
|
|
Mark Wielaard |
b3eda9 |
+ // significantly more expensive.
|
|
Mark Wielaard |
b3eda9 |
+ //
|
|
Mark Wielaard |
b3eda9 |
+ // improver = ~(RIGHT(atom) >>u 1)
|
|
Mark Wielaard |
b3eda9 |
+ //
|
|
Mark Wielaard |
b3eda9 |
+ // That is, improver has its upper clz(atom)+1 bits equal to one;
|
|
Mark Wielaard |
b3eda9 |
+ // lower bits (if any) equal to zero. So it's exactly the right
|
|
Mark Wielaard |
b3eda9 |
+ // mask to use to remove the irrelevant undefined input bits.
|
|
Mark Wielaard |
b3eda9 |
+ /* Here are some examples:
|
|
Mark Wielaard |
b3eda9 |
+ atom = 0...0 1 U...U
|
|
Mark Wielaard |
b3eda9 |
+ R(atom) = 0...0 1 1...1
|
|
Mark Wielaard |
b3eda9 |
+ R(atom) >>u 1 = 0...0 0 1...1
|
|
Mark Wielaard |
b3eda9 |
+ ~(R(atom) >>u 1) = 1...1 1 0...0
|
|
Mark Wielaard |
b3eda9 |
+ which correctly describes which bits of |atom|
|
|
Mark Wielaard |
b3eda9 |
+ actually influence the result
|
|
Mark Wielaard |
b3eda9 |
+ A boundary case
|
|
Mark Wielaard |
b3eda9 |
+ atom = 0...0
|
|
Mark Wielaard |
b3eda9 |
+ R(atom) = 0...0
|
|
Mark Wielaard |
b3eda9 |
+ R(atom) >>u 1 = 0...0
|
|
Mark Wielaard |
b3eda9 |
+ ~(R(atom) >>u 1) = 1...1
|
|
Mark Wielaard |
b3eda9 |
+ also a correct mask for the input: all input bits
|
|
Mark Wielaard |
b3eda9 |
+ are relevant
|
|
Mark Wielaard |
b3eda9 |
+ Another boundary case
|
|
Mark Wielaard |
b3eda9 |
+ atom = 1 1..1
|
|
Mark Wielaard |
b3eda9 |
+ R(atom) = 1 1..1
|
|
Mark Wielaard |
b3eda9 |
+ R(atom) >>u 1 = 0 1..1
|
|
Mark Wielaard |
b3eda9 |
+ ~(R(atom) >>u 1) = 1 0..0
|
|
Mark Wielaard |
b3eda9 |
+ also a correct mask: only the leftmost input bit
|
|
Mark Wielaard |
b3eda9 |
+ is relevant
|
|
Mark Wielaard |
b3eda9 |
+ Now with misc U bits interspersed:
|
|
Mark Wielaard |
b3eda9 |
+ atom = 0...0 1 U...U 0 1 U...U
|
|
Mark Wielaard |
b3eda9 |
+ R(atom) = 0...0 1 1...1 1 1 1...1
|
|
Mark Wielaard |
b3eda9 |
+ R(atom) >>u 1 = 0...0 0 1...1 1 1 1...1
|
|
Mark Wielaard |
b3eda9 |
+ ~(R(atom) >>u 1) = 1...1 1 0...0 0 0 0...0, also correct
|
|
Mark Wielaard |
b3eda9 |
+ (Per initial implementation of 15 Nov 2018)
|
|
Mark Wielaard |
b3eda9 |
+ */
|
|
Mark Wielaard |
b3eda9 |
+ improver = mkRight(mce, atom);
|
|
Mark Wielaard |
b3eda9 |
+ improver = assignNew('V', mce, ty, binop(shrOp, improver, mkU8(1)));
|
|
Mark Wielaard |
b3eda9 |
+ improver = assignNew('V', mce, ty, unop(notOp, improver));
|
|
Mark Wielaard |
b3eda9 |
+
|
|
Mark Wielaard |
b3eda9 |
+ // improved = vatom & improver
|
|
Mark Wielaard |
b3eda9 |
+ //
|
|
Mark Wielaard |
b3eda9 |
+ // That is, treat any V bits to the right of the leftmost clz(atom)+1
|
|
Mark Wielaard |
b3eda9 |
+ // bits as "defined".
|
|
Mark Wielaard |
b3eda9 |
improved = assignNew('V', mce, ty,
|
|
Mark Wielaard |
b3eda9 |
binop(andOp, vatom, improver));
|
|
Mark Wielaard |
b3eda9 |
|
|
Mark Wielaard |
b3eda9 |
@@ -4705,6 +4866,7 @@ IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
|
|
Mark Wielaard |
b3eda9 |
case Iop_RecipEst32F0x4:
|
|
Mark Wielaard |
b3eda9 |
return unary32F0x4(mce, vatom);
|
|
Mark Wielaard |
b3eda9 |
|
|
Mark Wielaard |
b3eda9 |
+ // These are self-shadowing.
|
|
Mark Wielaard |
b3eda9 |
case Iop_32UtoV128:
|
|
Mark Wielaard |
b3eda9 |
case Iop_64UtoV128:
|
|
Mark Wielaard |
b3eda9 |
case Iop_Dup8x16:
|
|
Mark Wielaard |
b3eda9 |
@@ -4745,6 +4907,7 @@ IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
|
|
Mark Wielaard |
b3eda9 |
case Iop_MulI128by10Carry:
|
|
Mark Wielaard |
b3eda9 |
case Iop_F16toF64x2:
|
|
Mark Wielaard |
b3eda9 |
case Iop_F64toF16x2:
|
|
Mark Wielaard |
b3eda9 |
+ // FIXME JRS 2018-Nov-15. This is surely not correct!
|
|
Mark Wielaard |
b3eda9 |
return vatom;
|
|
Mark Wielaard |
b3eda9 |
|
|
Mark Wielaard |
b3eda9 |
case Iop_I32StoF128: /* signed I32 -> F128 */
|
|
Mark Wielaard |
b3eda9 |
@@ -4770,7 +4933,6 @@ IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
|
|
Mark Wielaard |
b3eda9 |
case Iop_RoundF64toF64_NegINF:
|
|
Mark Wielaard |
b3eda9 |
case Iop_RoundF64toF64_PosINF:
|
|
Mark Wielaard |
b3eda9 |
case Iop_RoundF64toF64_ZERO:
|
|
Mark Wielaard |
b3eda9 |
- case Iop_Clz64:
|
|
Mark Wielaard |
b3eda9 |
case Iop_D32toD64:
|
|
Mark Wielaard |
b3eda9 |
case Iop_I32StoD64:
|
|
Mark Wielaard |
b3eda9 |
case Iop_I32UtoD64:
|
|
Mark Wielaard |
b3eda9 |
@@ -4785,17 +4947,32 @@ IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
|
|
Mark Wielaard |
b3eda9 |
case Iop_D64toD128:
|
|
Mark Wielaard |
b3eda9 |
return mkPCastTo(mce, Ity_I128, vatom);
|
|
Mark Wielaard |
b3eda9 |
|
|
Mark Wielaard |
b3eda9 |
- case Iop_Clz32:
|
|
Mark Wielaard |
b3eda9 |
case Iop_TruncF64asF32:
|
|
Mark Wielaard |
b3eda9 |
case Iop_NegF32:
|
|
Mark Wielaard |
b3eda9 |
case Iop_AbsF32:
|
|
Mark Wielaard |
b3eda9 |
case Iop_F16toF32:
|
|
Mark Wielaard |
b3eda9 |
return mkPCastTo(mce, Ity_I32, vatom);
|
|
Mark Wielaard |
b3eda9 |
|
|
Mark Wielaard |
b3eda9 |
- case Iop_Ctz32:
|
|
Mark Wielaard |
b3eda9 |
- case Iop_Ctz64:
|
|
Mark Wielaard |
b3eda9 |
+ case Iop_Ctz32: case Iop_CtzNat32:
|
|
Mark Wielaard |
b3eda9 |
+ case Iop_Ctz64: case Iop_CtzNat64:
|
|
Mark Wielaard |
b3eda9 |
return expensiveCountTrailingZeroes(mce, op, atom, vatom);
|
|
Mark Wielaard |
b3eda9 |
|
|
Mark Wielaard |
b3eda9 |
+ case Iop_Clz32: case Iop_ClzNat32:
|
|
Mark Wielaard |
b3eda9 |
+ case Iop_Clz64: case Iop_ClzNat64:
|
|
Mark Wielaard |
b3eda9 |
+ return expensiveCountLeadingZeroes(mce, op, atom, vatom);
|
|
Mark Wielaard |
b3eda9 |
+
|
|
Mark Wielaard |
b3eda9 |
+ // PopCount32: this is slightly pessimistic. It is true that the
|
|
Mark Wielaard |
b3eda9 |
+ // result depends on all input bits, so that aspect of the PCast is
|
|
Mark Wielaard |
b3eda9 |
+ // correct. However, regardless of the input, only the lowest 5 bits
|
|
Mark Wielaard |
b3eda9 |
+ // out of the output can ever be undefined. So we could actually
|
|
Mark Wielaard |
b3eda9 |
+ // "improve" the results here by marking the top 27 bits of output as
|
|
Mark Wielaard |
b3eda9 |
+ // defined. A similar comment applies for PopCount64.
|
|
Mark Wielaard |
b3eda9 |
+ case Iop_PopCount32:
|
|
Mark Wielaard |
b3eda9 |
+ return mkPCastTo(mce, Ity_I32, vatom);
|
|
Mark Wielaard |
b3eda9 |
+ case Iop_PopCount64:
|
|
Mark Wielaard |
b3eda9 |
+ return mkPCastTo(mce, Ity_I64, vatom);
|
|
Mark Wielaard |
b3eda9 |
+
|
|
Mark Wielaard |
b3eda9 |
+ // These are self-shadowing.
|
|
Mark Wielaard |
b3eda9 |
case Iop_1Uto64:
|
|
Mark Wielaard |
b3eda9 |
case Iop_1Sto64:
|
|
Mark Wielaard |
b3eda9 |
case Iop_8Uto64:
|
|
Mark Wielaard |
b3eda9 |
@@ -4821,6 +4998,7 @@ IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
|
|
Mark Wielaard |
b3eda9 |
case Iop_V256to64_2: case Iop_V256to64_3:
|
|
Mark Wielaard |
b3eda9 |
return assignNew('V', mce, Ity_I64, unop(op, vatom));
|
|
Mark Wielaard |
b3eda9 |
|
|
Mark Wielaard |
b3eda9 |
+ // These are self-shadowing.
|
|
Mark Wielaard |
b3eda9 |
case Iop_64to32:
|
|
Mark Wielaard |
b3eda9 |
case Iop_64HIto32:
|
|
Mark Wielaard |
b3eda9 |
case Iop_1Uto32:
|
|
Mark Wielaard |
b3eda9 |
@@ -4830,8 +5008,10 @@ IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
|
|
Mark Wielaard |
b3eda9 |
case Iop_16Sto32:
|
|
Mark Wielaard |
b3eda9 |
case Iop_8Sto32:
|
|
Mark Wielaard |
b3eda9 |
case Iop_V128to32:
|
|
Mark Wielaard |
b3eda9 |
+ case Iop_Reverse8sIn32_x1:
|
|
Mark Wielaard |
b3eda9 |
return assignNew('V', mce, Ity_I32, unop(op, vatom));
|
|
Mark Wielaard |
b3eda9 |
|
|
Mark Wielaard |
b3eda9 |
+ // These are self-shadowing.
|
|
Mark Wielaard |
b3eda9 |
case Iop_8Sto16:
|
|
Mark Wielaard |
b3eda9 |
case Iop_8Uto16:
|
|
Mark Wielaard |
b3eda9 |
case Iop_32to16:
|
|
Mark Wielaard |
b3eda9 |
@@ -4840,6 +5020,7 @@ IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
|
|
Mark Wielaard |
b3eda9 |
case Iop_GetMSBs8x16:
|
|
Mark Wielaard |
b3eda9 |
return assignNew('V', mce, Ity_I16, unop(op, vatom));
|
|
Mark Wielaard |
b3eda9 |
|
|
Mark Wielaard |
b3eda9 |
+ // These are self-shadowing.
|
|
Mark Wielaard |
b3eda9 |
case Iop_1Uto8:
|
|
Mark Wielaard |
b3eda9 |
case Iop_1Sto8:
|
|
Mark Wielaard |
b3eda9 |
case Iop_16to8:
|
|
Mark Wielaard |
b3eda9 |
@@ -4868,6 +5049,7 @@ IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
|
|
Mark Wielaard |
b3eda9 |
case Iop_Not16:
|
|
Mark Wielaard |
b3eda9 |
case Iop_Not8:
|
|
Mark Wielaard |
b3eda9 |
case Iop_Not1:
|
|
Mark Wielaard |
b3eda9 |
+ // FIXME JRS 2018-Nov-15. This is surely not correct!
|
|
Mark Wielaard |
b3eda9 |
return vatom;
|
|
Mark Wielaard |
b3eda9 |
|
|
Mark Wielaard |
b3eda9 |
case Iop_CmpNEZ8x8:
|
|
Mark Wielaard |
b3eda9 |
@@ -4929,6 +5111,7 @@ IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
|
|
Mark Wielaard |
b3eda9 |
case Iop_Ctz64x2:
|
|
Mark Wielaard |
b3eda9 |
return mkPCast64x2(mce, vatom);
|
|
Mark Wielaard |
b3eda9 |
|
|
Mark Wielaard |
b3eda9 |
+ // This is self-shadowing.
|
|
Mark Wielaard |
b3eda9 |
case Iop_PwBitMtxXpose64x2:
|
|
Mark Wielaard |
b3eda9 |
return assignNew('V', mce, Ity_V128, unop(op, vatom));
|
|
Mark Wielaard |
b3eda9 |
|
|
Mark Wielaard |
b3eda9 |
diff --git a/memcheck/tests/vbit-test/irops.c b/memcheck/tests/vbit-test/irops.c
|
|
Mark Wielaard |
b3eda9 |
index bfd82fc..e8bf67d 100644
|
|
Mark Wielaard |
b3eda9 |
--- a/memcheck/tests/vbit-test/irops.c
|
|
Mark Wielaard |
b3eda9 |
+++ b/memcheck/tests/vbit-test/irops.c
|
|
Mark Wielaard |
b3eda9 |
@@ -111,6 +111,12 @@ static irop_t irops[] = {
|
|
Mark Wielaard |
b3eda9 |
{ DEFOP(Iop_Clz32, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 =1, .mips64 = 1 },
|
|
Mark Wielaard |
b3eda9 |
{ DEFOP(Iop_Ctz64, UNDEF_ALL), .s390x = 0, .amd64 = 1, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 =0, .mips64 = 0 },
|
|
Mark Wielaard |
b3eda9 |
{ DEFOP(Iop_Ctz32, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 1, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 =0, .mips64 = 0 },
|
|
Mark Wielaard |
b3eda9 |
+ { DEFOP(Iop_ClzNat64, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 =0, .mips64 = 0 }, // ppc32 asserts
|
|
Mark Wielaard |
b3eda9 |
+ { DEFOP(Iop_ClzNat32, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 =0, .mips64 = 0 },
|
|
Mark Wielaard |
b3eda9 |
+ { DEFOP(Iop_CtzNat64, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 =0, .mips64 = 0 },
|
|
Mark Wielaard |
b3eda9 |
+ { DEFOP(Iop_CtzNat32, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 1, .mips32 =0, .mips64 = 0 },
|
|
Mark Wielaard |
b3eda9 |
+ { DEFOP(Iop_PopCount64, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 0, .mips32 =0, .mips64 = 0 },
|
|
Mark Wielaard |
b3eda9 |
+ { DEFOP(Iop_PopCount32, UNDEF_ALL), .s390x = 0, .amd64 = 0, .x86 = 0, .arm = 0, .ppc64 = 1, .ppc32 = 1, .mips32 =0, .mips64 = 0 },
|
|
Mark Wielaard |
b3eda9 |
{ DEFOP(Iop_CmpLT32S, UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 =1, .mips64 = 1 },
|
|
Mark Wielaard |
b3eda9 |
{ DEFOP(Iop_CmpLT64S, UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 0, .arm = 0, .ppc64 = 0, .ppc32 = 0, .mips32 =0, .mips64 = 1 }, // ppc, mips assert
|
|
Mark Wielaard |
b3eda9 |
{ DEFOP(Iop_CmpLE32S, UNDEF_ALL), .s390x = 1, .amd64 = 1, .x86 = 1, .arm = 1, .ppc64 = 1, .ppc32 = 1, .mips32 =1, .mips64 = 1 },
|
|
Mark Wielaard |
b3eda9 |
@@ -336,6 +342,7 @@ static irop_t irops[] = {
|
|
Mark Wielaard |
b3eda9 |
{ DEFOP(Iop_Sad8Ux4, UNDEF_UNKNOWN), },
|
|
Mark Wielaard |
b3eda9 |
{ DEFOP(Iop_CmpNEZ16x2, UNDEF_UNKNOWN), },
|
|
Mark Wielaard |
b3eda9 |
{ DEFOP(Iop_CmpNEZ8x4, UNDEF_UNKNOWN), },
|
|
Mark Wielaard |
b3eda9 |
+ { DEFOP(Iop_Reverse8sIn32_x1, UNDEF_UNKNOWN) },
|
|
Mark Wielaard |
b3eda9 |
/* ------------------ 64-bit SIMD FP ------------------------ */
|
|
Mark Wielaard |
b3eda9 |
{ DEFOP(Iop_I32UtoFx2, UNDEF_UNKNOWN), },
|
|
Mark Wielaard |
b3eda9 |
{ DEFOP(Iop_I32StoFx2, UNDEF_UNKNOWN), },
|