diff --git a/valgrind-3.14.0-set_AV_CR6.patch b/valgrind-3.14.0-set_AV_CR6.patch new file mode 100644 index 0000000..0dc67cd --- /dev/null +++ b/valgrind-3.14.0-set_AV_CR6.patch @@ -0,0 +1,145 @@ +commit dc1523fb3550b4ed9dd4c178741626daaa474da7 +Author: Mark Wielaard +Date: Mon Dec 10 17:18:20 2018 +0100 + + PR386945 set_AV_CR6 patch + + https://bugs.kde.org/show_bug.cgi?id=386945#c62 + +diff --git a/VEX/priv/guest_ppc_toIR.c b/VEX/priv/guest_ppc_toIR.c +index ec2f90a..c3cc6d0 100644 +--- a/VEX/priv/guest_ppc_toIR.c ++++ b/VEX/priv/guest_ppc_toIR.c +@@ -2062,45 +2062,88 @@ static void set_CR0 ( IRExpr* result ) + static void set_AV_CR6 ( IRExpr* result, Bool test_all_ones ) + { + /* CR6[0:3] = {all_ones, 0, all_zeros, 0} +- all_ones = (v[0] && v[1] && v[2] && v[3]) +- all_zeros = ~(v[0] || v[1] || v[2] || v[3]) ++ 32 bit: all_zeros = (v[0] || v[1] || v[2] || v[3]) == 0x0000'0000 ++ all_ones = ~(v[0] && v[1] && v[2] && v[3]) == 0x0000'0000 ++ where v[] denotes 32-bit lanes ++ or ++ 64 bit: all_zeros = (v[0] || v[1]) == 0x0000'0000'0000'0000 ++ all_ones = ~(v[0] && v[1]) == 0x0000'0000'0000'0000 ++ where v[] denotes 64-bit lanes ++ ++ The 32- and 64-bit versions compute the same thing, but the 64-bit one ++ tries to be a bit more efficient. + */ +- IRTemp v0 = newTemp(Ity_V128); +- IRTemp v1 = newTemp(Ity_V128); +- IRTemp v2 = newTemp(Ity_V128); +- IRTemp v3 = newTemp(Ity_V128); +- IRTemp rOnes = newTemp(Ity_I8); +- IRTemp rZeros = newTemp(Ity_I8); +- + vassert(typeOfIRExpr(irsb->tyenv,result) == Ity_V128); + +- assign( v0, result ); +- assign( v1, binop(Iop_ShrV128, result, mkU8(32)) ); +- assign( v2, binop(Iop_ShrV128, result, mkU8(64)) ); +- assign( v3, binop(Iop_ShrV128, result, mkU8(96)) ); ++ IRTemp overlappedOred = newTemp(Ity_V128); ++ IRTemp overlappedAnded = newTemp(Ity_V128); ++ ++ if (mode64) { ++ IRTemp v0 = newTemp(Ity_V128); ++ IRTemp v1 = newTemp(Ity_V128); ++ assign( v0, result ); ++ assign( v1, binop(Iop_ShrV128, result, mkU8(64)) ); ++ assign(overlappedOred, ++ binop(Iop_OrV128, mkexpr(v0), mkexpr(v1))); ++ assign(overlappedAnded, ++ binop(Iop_AndV128, mkexpr(v0), mkexpr(v1))); ++ } else { ++ IRTemp v0 = newTemp(Ity_V128); ++ IRTemp v1 = newTemp(Ity_V128); ++ IRTemp v2 = newTemp(Ity_V128); ++ IRTemp v3 = newTemp(Ity_V128); ++ assign( v0, result ); ++ assign( v1, binop(Iop_ShrV128, result, mkU8(32)) ); ++ assign( v2, binop(Iop_ShrV128, result, mkU8(64)) ); ++ assign( v3, binop(Iop_ShrV128, result, mkU8(96)) ); ++ assign(overlappedOred, ++ binop(Iop_OrV128, ++ binop(Iop_OrV128, mkexpr(v0), mkexpr(v1)), ++ binop(Iop_OrV128, mkexpr(v2), mkexpr(v3)))); ++ assign(overlappedAnded, ++ binop(Iop_AndV128, ++ binop(Iop_AndV128, mkexpr(v0), mkexpr(v1)), ++ binop(Iop_AndV128, mkexpr(v2), mkexpr(v3)))); ++ } ++ ++ IRTemp rOnes = newTemp(Ity_I8); ++ IRTemp rZeroes = newTemp(Ity_I8); + +- assign( rZeros, unop(Iop_1Uto8, +- binop(Iop_CmpEQ32, mkU32(0xFFFFFFFF), +- unop(Iop_Not32, +- unop(Iop_V128to32, +- binop(Iop_OrV128, +- binop(Iop_OrV128, mkexpr(v0), mkexpr(v1)), +- binop(Iop_OrV128, mkexpr(v2), mkexpr(v3)))) +- ))) ); ++ if (mode64) { ++ assign(rZeroes, ++ unop(Iop_1Uto8, ++ binop(Iop_CmpEQ64, ++ mkU64(0), ++ unop(Iop_V128to64, mkexpr(overlappedOred))))); ++ assign(rOnes, ++ unop(Iop_1Uto8, ++ binop(Iop_CmpEQ64, ++ mkU64(0), ++ unop(Iop_Not64, ++ unop(Iop_V128to64, mkexpr(overlappedAnded)))))); ++ } else { ++ assign(rZeroes, ++ unop(Iop_1Uto8, ++ binop(Iop_CmpEQ32, ++ mkU32(0), ++ unop(Iop_V128to32, mkexpr(overlappedOred))))); ++ assign(rOnes, ++ unop(Iop_1Uto8, ++ binop(Iop_CmpEQ32, ++ mkU32(0), ++ unop(Iop_Not32, ++ unop(Iop_V128to32, mkexpr(overlappedAnded)))))); ++ } ++ ++ // rOnes might not be used below. But iropt will remove it, so there's no ++ // inefficiency as a result. + + if (test_all_ones) { +- assign( rOnes, unop(Iop_1Uto8, +- binop(Iop_CmpEQ32, mkU32(0xFFFFFFFF), +- unop(Iop_V128to32, +- binop(Iop_AndV128, +- binop(Iop_AndV128, mkexpr(v0), mkexpr(v1)), +- binop(Iop_AndV128, mkexpr(v2), mkexpr(v3))) +- ))) ); + putCR321( 6, binop(Iop_Or8, + binop(Iop_Shl8, mkexpr(rOnes), mkU8(3)), +- binop(Iop_Shl8, mkexpr(rZeros), mkU8(1))) ); ++ binop(Iop_Shl8, mkexpr(rZeroes), mkU8(1))) ); + } else { +- putCR321( 6, binop(Iop_Shl8, mkexpr(rZeros), mkU8(1)) ); ++ putCR321( 6, binop(Iop_Shl8, mkexpr(rZeroes), mkU8(1)) ); + } + putCR0( 6, mkU8(0) ); + } +diff --git a/memcheck/mc_translate.c b/memcheck/mc_translate.c +index c24db91..7f69ee3 100644 +--- a/memcheck/mc_translate.c ++++ b/memcheck/mc_translate.c +@@ -8322,6 +8322,9 @@ IRSB* MC_(instrument) ( VgCallbackClosure* closure, + # elif defined(VGA_amd64) + mce.dlbo.dl_Add64 = DLauto; + mce.dlbo.dl_CmpEQ32_CmpNE32 = DLexpensive; ++# elif defined(VGA_ppc64le) ++ // Needed by (at least) set_AV_CR6() in the front end. ++ mce.dlbo.dl_CmpEQ64_CmpNE64 = DLexpensive; + # endif + + /* preInstrumentationAnalysis() will allocate &mce.tmpHowUsed and then diff --git a/valgrind.spec b/valgrind.spec index 2c82094..68ca585 100644 --- a/valgrind.spec +++ b/valgrind.spec @@ -142,6 +142,7 @@ Patch20: valgrind-3.14.0-ppc64-unaligned-words.patch Patch21: valgrind-3.14.0-ppc64-lxvd2x.patch Patch22: valgrind-3.14.0-ppc64-unaligned-vecs.patch Patch23: valgrind-3.14.0-ppc64-lxvb16x.patch +Patch24: valgrind-3.14.0-set_AV_CR6.patch %if %{build_multilib} # Ensure glibc{,-devel} is installed for both multilib arches @@ -298,6 +299,7 @@ Valgrind User Manual for details. %patch21 -p1 %patch22 -p1 %patch23 -p1 +%patch24 -p1 %build CC=gcc @@ -539,6 +541,7 @@ fi - Add valgrind-3.14.0-ppc64-lxvd2x.patch - Add valgrind-3.14.0-ppc64-unaligned-vecs.patch - Add valgrind-3.14.0-ppc64-lxvb16x.patch +- Add valgrind-3.14.0-set_AV_CR6.patch * Sat Dec 1 2018 Mark Wielaard - 3.14.0.5 - Add valgrind-3.14.0-wcsncmp.patch (#1645971)