From c858e5c9f7ba7d56e28605d066a2577d536654b0 Mon Sep 17 00:00:00 2001 From: Mark Wielaard Date: Dec 15 2020 15:42:31 +0000 Subject: 3.16.1-10 - Add valgrind-3.16.1-arm64-fma.patch --- diff --git a/valgrind-3.16.1-arm64-fma.patch b/valgrind-3.16.1-arm64-fma.patch new file mode 100644 index 0000000..e00292f --- /dev/null +++ b/valgrind-3.16.1-arm64-fma.patch @@ -0,0 +1,716 @@ +From 04cdc29b007594a0e58ffef0c9dd87df3ea595ea Mon Sep 17 00:00:00 2001 +From: Mark Wielaard +Date: Wed, 14 Oct 2020 06:11:34 -0400 +Subject: [PATCH] arm64 VEX frontend and backend support for + Iop_M{Add,Sub}F{32,64} + +The arm64 frontend used to implement the scalar fmadd, fmsub, fnmadd +and fnmsub iinstructions into separate addition/substraction and +multiplication instructions, which caused rounding issues. + +This patch turns them into Iop_M{Add,Sub}F{32,64} instructions +(with some arguments negated). And the backend now emits fmadd or fmsub +instructions. + +Alexandra Hajkova added tests and fixed up the +implementation to make sure rounding (and sign) are correct now. + +https://bugs.kde.org/show_bug.cgi?id=426014 +--- + VEX/priv/guest_arm64_toIR.c | 58 ++++++++--- + VEX/priv/host_arm64_defs.c | 136 +++++++++++++++++++++++++- + VEX/priv/host_arm64_defs.h | 30 ++++++ + VEX/priv/host_arm64_isel.c | 39 ++++++++ + none/tests/arm64/Makefile.am | 6 +- + none/tests/arm64/fmadd_sub.c | 98 +++++++++++++++++++ + none/tests/arm64/fmadd_sub.stderr.exp | 0 + none/tests/arm64/fmadd_sub.stdout.exp | 125 +++++++++++++++++++++++ + none/tests/arm64/fmadd_sub.vgtest | 3 + + 9 files changed, 479 insertions(+), 16 deletions(-) + create mode 100644 none/tests/arm64/fmadd_sub.c + create mode 100644 none/tests/arm64/fmadd_sub.stderr.exp + create mode 100644 none/tests/arm64/fmadd_sub.stdout.exp + create mode 100644 none/tests/arm64/fmadd_sub.vgtest + +diff --git a/VEX/priv/guest_arm64_toIR.c b/VEX/priv/guest_arm64_toIR.c +index 556b85a6a..d242d43c0 100644 +--- a/VEX/priv/guest_arm64_toIR.c ++++ b/VEX/priv/guest_arm64_toIR.c +@@ -286,6 +286,12 @@ static IRExpr* triop ( IROp op, IRExpr* a1, IRExpr* a2, IRExpr* a3 ) + return IRExpr_Triop(op, a1, a2, a3); + } + ++static IRExpr* qop ( IROp op, IRExpr* a1, IRExpr* a2, ++ IRExpr* a3, IRExpr* a4 ) ++{ ++ return IRExpr_Qop(op, a1, a2, a3, a4); ++} ++ + static IRExpr* loadLE ( IRType ty, IRExpr* addr ) + { + return IRExpr_Load(Iend_LE, ty, addr); +@@ -532,6 +538,22 @@ static IROp mkADDF ( IRType ty ) { + } + } + ++static IROp mkFMADDF ( IRType ty ) { ++ switch (ty) { ++ case Ity_F32: return Iop_MAddF32; ++ case Ity_F64: return Iop_MAddF64; ++ default: vpanic("mkFMADDF"); ++ } ++} ++ ++static IROp mkFMSUBF ( IRType ty ) { ++ switch (ty) { ++ case Ity_F32: return Iop_MSubF32; ++ case Ity_F64: return Iop_MSubF64; ++ default: vpanic("mkFMSUBF"); ++ } ++} ++ + static IROp mkSUBF ( IRType ty ) { + switch (ty) { + case Ity_F32: return Iop_SubF32; +@@ -14368,30 +14390,40 @@ Bool dis_AdvSIMD_fp_data_proc_3_source(/*MB_OUT*/DisResult* dres, UInt insn) + where Fx=Dx when sz=1, Fx=Sx when sz=0 + + -----SPEC------ ----IMPL---- +- fmadd a + n * m a + n * m +- fmsub a + (-n) * m a - n * m +- fnmadd (-a) + (-n) * m -(a + n * m) +- fnmsub (-a) + n * m -(a - n * m) ++ fmadd a + n * m fmadd (a, n, m) ++ fmsub a + (-n) * m fmsub (a, n, m) ++ fnmadd (-a) + (-n) * m fmadd (-a, -n, m) ++ fnmsub (-a) + n * m fmadd (-a, n, m) ++ ++ Note Iop_MAdd/SubF32/64 take arguments in the order: rm, N, M, A + */ + Bool isD = (ty & 1) == 1; + UInt ix = (bitO1 << 1) | bitO0; + IRType ity = isD ? Ity_F64 : Ity_F32; +- IROp opADD = mkADDF(ity); +- IROp opSUB = mkSUBF(ity); +- IROp opMUL = mkMULF(ity); ++ IROp opFMADD = mkFMADDF(ity); ++ IROp opFMSUB = mkFMSUBF(ity); + IROp opNEG = mkNEGF(ity); + IRTemp res = newTemp(ity); + IRExpr* eA = getQRegLO(aa, ity); + IRExpr* eN = getQRegLO(nn, ity); + IRExpr* eM = getQRegLO(mm, ity); + IRExpr* rm = mkexpr(mk_get_IR_rounding_mode()); +- IRExpr* eNxM = triop(opMUL, rm, eN, eM); + switch (ix) { +- case 0: assign(res, triop(opADD, rm, eA, eNxM)); break; +- case 1: assign(res, triop(opSUB, rm, eA, eNxM)); break; +- case 2: assign(res, unop(opNEG, triop(opADD, rm, eA, eNxM))); break; +- case 3: assign(res, unop(opNEG, triop(opSUB, rm, eA, eNxM))); break; +- default: vassert(0); ++ case 0: /* FMADD */ ++ assign(res, qop(opFMADD, rm, eN, eM, eA)); ++ break; ++ case 1: /* FMSUB */ ++ assign(res, qop(opFMSUB, rm, eN, eM, eA)); ++ break; ++ case 2: /* FNMADD */ ++ assign(res, qop(opFMADD, rm, unop(opNEG, eN), eM, ++ unop(opNEG,eA))); ++ break; ++ case 3: /* FNMSUB */ ++ assign(res, qop(opFMADD, rm, eN, eM, unop(opNEG, eA))); ++ break; ++ default: ++ vassert(0); + } + putQReg128(dd, mkV128(0x0000)); + putQRegLO(dd, mkexpr(res)); +diff --git a/VEX/priv/host_arm64_defs.c b/VEX/priv/host_arm64_defs.c +index e4ef56986..13b497f60 100644 +--- a/VEX/priv/host_arm64_defs.c ++++ b/VEX/priv/host_arm64_defs.c +@@ -546,6 +546,14 @@ static const HChar* showARM64FpBinOp ( ARM64FpBinOp op ) { + } + } + ++static const HChar* showARM64FpTriOp ( ARM64FpTriOp op ) { ++ switch (op) { ++ case ARM64fpt_FMADD: return "fmadd"; ++ case ARM64fpt_FMSUB: return "fmsub"; ++ default: vpanic("showARM64FpTriOp"); ++ } ++} ++ + static const HChar* showARM64FpUnaryOp ( ARM64FpUnaryOp op ) { + switch (op) { + case ARM64fpu_NEG: return "neg "; +@@ -1154,6 +1162,28 @@ ARM64Instr* ARM64Instr_VBinS ( ARM64FpBinOp op, + i->ARM64in.VBinS.argR = argR; + return i; + } ++ARM64Instr* ARM64Instr_VTriD ( ARM64FpTriOp op, ++ HReg dst, HReg arg1, HReg arg2, HReg arg3 ) { ++ ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr)); ++ i->tag = ARM64in_VTriD; ++ i->ARM64in.VTriD.op = op; ++ i->ARM64in.VTriD.dst = dst; ++ i->ARM64in.VTriD.arg1 = arg1; ++ i->ARM64in.VTriD.arg2 = arg2; ++ i->ARM64in.VTriD.arg3 = arg3; ++ return i; ++} ++ARM64Instr* ARM64Instr_VTriS ( ARM64FpTriOp op, ++ HReg dst, HReg arg1, HReg arg2, HReg arg3 ) { ++ ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr)); ++ i->tag = ARM64in_VTriS; ++ i->ARM64in.VTriS.op = op; ++ i->ARM64in.VTriS.dst = dst; ++ i->ARM64in.VTriS.arg1 = arg1; ++ i->ARM64in.VTriS.arg2 = arg2; ++ i->ARM64in.VTriS.arg3 = arg3; ++ return i; ++} + ARM64Instr* ARM64Instr_VCmpD ( HReg argL, HReg argR ) { + ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr)); + i->tag = ARM64in_VCmpD; +@@ -1756,6 +1786,26 @@ void ppARM64Instr ( const ARM64Instr* i ) { + vex_printf(", "); + ppHRegARM64asSreg(i->ARM64in.VBinS.argR); + return; ++ case ARM64in_VTriD: ++ vex_printf("f%s ", showARM64FpTriOp(i->ARM64in.VTriD.op)); ++ ppHRegARM64(i->ARM64in.VTriD.dst); ++ vex_printf(", "); ++ ppHRegARM64(i->ARM64in.VTriD.arg1); ++ vex_printf(", "); ++ ppHRegARM64(i->ARM64in.VTriD.arg2); ++ vex_printf(", "); ++ ppHRegARM64(i->ARM64in.VTriD.arg3); ++ return; ++ case ARM64in_VTriS: ++ vex_printf("f%s ", showARM64FpTriOp(i->ARM64in.VTriS.op)); ++ ppHRegARM64asSreg(i->ARM64in.VTriS.dst); ++ vex_printf(", "); ++ ppHRegARM64asSreg(i->ARM64in.VTriS.arg1); ++ vex_printf(", "); ++ ppHRegARM64asSreg(i->ARM64in.VTriS.arg2); ++ vex_printf(", "); ++ ppHRegARM64asSreg(i->ARM64in.VTriS.arg3); ++ return; + case ARM64in_VCmpD: + vex_printf("fcmp "); + ppHRegARM64(i->ARM64in.VCmpD.argL); +@@ -2197,6 +2247,18 @@ void getRegUsage_ARM64Instr ( HRegUsage* u, const ARM64Instr* i, Bool mode64 ) + addHRegUse(u, HRmRead, i->ARM64in.VBinS.argL); + addHRegUse(u, HRmRead, i->ARM64in.VBinS.argR); + return; ++ case ARM64in_VTriD: ++ addHRegUse(u, HRmWrite, i->ARM64in.VTriD.dst); ++ addHRegUse(u, HRmRead, i->ARM64in.VTriD.arg1); ++ addHRegUse(u, HRmRead, i->ARM64in.VTriD.arg2); ++ addHRegUse(u, HRmRead, i->ARM64in.VTriD.arg3); ++ return; ++ case ARM64in_VTriS: ++ addHRegUse(u, HRmWrite, i->ARM64in.VTriS.dst); ++ addHRegUse(u, HRmRead, i->ARM64in.VTriS.arg1); ++ addHRegUse(u, HRmRead, i->ARM64in.VTriS.arg2); ++ addHRegUse(u, HRmRead, i->ARM64in.VTriS.arg3); ++ return; + case ARM64in_VCmpD: + addHRegUse(u, HRmRead, i->ARM64in.VCmpD.argL); + addHRegUse(u, HRmRead, i->ARM64in.VCmpD.argR); +@@ -2454,6 +2516,18 @@ void mapRegs_ARM64Instr ( HRegRemap* m, ARM64Instr* i, Bool mode64 ) + i->ARM64in.VBinS.argL = lookupHRegRemap(m, i->ARM64in.VBinS.argL); + i->ARM64in.VBinS.argR = lookupHRegRemap(m, i->ARM64in.VBinS.argR); + return; ++ case ARM64in_VTriD: ++ i->ARM64in.VTriD.dst = lookupHRegRemap(m, i->ARM64in.VTriD.dst); ++ i->ARM64in.VTriD.arg1 = lookupHRegRemap(m, i->ARM64in.VTriD.arg1); ++ i->ARM64in.VTriD.arg2 = lookupHRegRemap(m, i->ARM64in.VTriD.arg2); ++ i->ARM64in.VTriD.arg3 = lookupHRegRemap(m, i->ARM64in.VTriD.arg3); ++ return; ++ case ARM64in_VTriS: ++ i->ARM64in.VTriS.dst = lookupHRegRemap(m, i->ARM64in.VTriS.dst); ++ i->ARM64in.VTriS.arg1 = lookupHRegRemap(m, i->ARM64in.VTriS.arg1); ++ i->ARM64in.VTriS.arg2 = lookupHRegRemap(m, i->ARM64in.VTriS.arg2); ++ i->ARM64in.VTriS.arg3 = lookupHRegRemap(m, i->ARM64in.VTriS.arg3); ++ return; + case ARM64in_VCmpD: + i->ARM64in.VCmpD.argL = lookupHRegRemap(m, i->ARM64in.VCmpD.argL); + i->ARM64in.VCmpD.argR = lookupHRegRemap(m, i->ARM64in.VCmpD.argR); +@@ -2812,7 +2886,8 @@ static inline UInt qregEnc ( HReg r ) + #define X11110011 BITS8(1,1,1,1,0,0,1,1) + #define X11110101 BITS8(1,1,1,1,0,1,0,1) + #define X11110111 BITS8(1,1,1,1,0,1,1,1) +- ++#define X11111000 BITS8(1,1,1,1,1,0,0,0) ++#define X11111010 BITS8(1,1,1,1,1,0,1,0) + + /* --- 4 fields --- */ + +@@ -2972,6 +3047,27 @@ static inline UInt X_3_6_1_6_6_5_5 ( UInt f1, UInt f2, UInt f3, + } + + ++static inline UInt X_3_8_5_1_5_5_5 ( UInt f1, UInt f2, UInt f3, UInt f4, ++ UInt f5, UInt f6, UInt f7 ) { ++ vassert(3+8+5+1+5+5+5 == 32); ++ vassert(f1 < (1<<3)); ++ vassert(f2 < (1<<8)); ++ vassert(f3 < (1<<5)); ++ vassert(f4 < (1<<1)); ++ vassert(f5 < (1<<5)); ++ vassert(f6 < (1<<5)); ++ vassert(f7 < (1<<5)); ++ UInt w = 0; ++ w = (w << 3) | f1; ++ w = (w << 8) | f2; ++ w = (w << 5) | f3; ++ w = (w << 1) | f4; ++ w = (w << 5) | f5; ++ w = (w << 5) | f6; ++ w = (w << 5) | f7; ++ return w; ++} ++ + //ZZ #define X0000 BITS4(0,0,0,0) + //ZZ #define X0001 BITS4(0,0,0,1) + //ZZ #define X0010 BITS4(0,0,1,0) +@@ -4339,6 +4435,44 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc, + = X_3_8_5_6_5_5(X000, X11110001, sM, (b1512 << 2) | X10, sN, sD); + goto done; + } ++ case ARM64in_VTriD: { ++ /* 31 20 15 14 9 4 ++ 000 11111 010 m 0 a n d FMADD Dd,Dn,Dm,Da ++ ---------------- 1 ------ FMSUB ----------- ++ */ ++ UInt dD = dregEnc(i->ARM64in.VTriD.dst); ++ UInt dN = dregEnc(i->ARM64in.VTriD.arg1); ++ UInt dM = dregEnc(i->ARM64in.VTriD.arg2); ++ UInt dA = dregEnc(i->ARM64in.VTriD.arg3); ++ UInt b15 = 2; /* impossible */ ++ switch (i->ARM64in.VTriD.op) { ++ case ARM64fpt_FMADD: b15 = 0; break; ++ case ARM64fpt_FMSUB: b15 = 1; break; ++ default: goto bad; ++ } ++ vassert(b15 < 2); ++ *p++ = X_3_8_5_1_5_5_5(X000, X11111010, dM, b15, dA, dN, dD); ++ goto done; ++ } ++ case ARM64in_VTriS: { ++ /* 31 20 15 14 9 4 ++ 000 11111 000 m 0 a n d FMADD Dd,Dn,Dm,Da ++ ---------------- 1 ------ FMSUB ----------- ++ */ ++ UInt dD = dregEnc(i->ARM64in.VTriD.dst); ++ UInt dN = dregEnc(i->ARM64in.VTriD.arg1); ++ UInt dM = dregEnc(i->ARM64in.VTriD.arg2); ++ UInt dA = dregEnc(i->ARM64in.VTriD.arg3); ++ UInt b15 = 2; /* impossible */ ++ switch (i->ARM64in.VTriD.op) { ++ case ARM64fpt_FMADD: b15 = 0; break; ++ case ARM64fpt_FMSUB: b15 = 1; break; ++ default: goto bad; ++ } ++ vassert(b15 < 2); ++ *p++ = X_3_8_5_1_5_5_5(X000, X11111000, dM, b15, dA, dN, dD); ++ goto done; ++ } + case ARM64in_VCmpD: { + /* 000 11110 01 1 m 00 1000 n 00 000 FCMP Dn, Dm */ + UInt dN = dregEnc(i->ARM64in.VCmpD.argL); +diff --git a/VEX/priv/host_arm64_defs.h b/VEX/priv/host_arm64_defs.h +index 05dba7ab8..5a82564ce 100644 +--- a/VEX/priv/host_arm64_defs.h ++++ b/VEX/priv/host_arm64_defs.h +@@ -289,6 +289,14 @@ typedef + } + ARM64FpBinOp; + ++typedef ++ enum { ++ ARM64fpt_FMADD=105, ++ ARM64fpt_FMSUB, ++ ARM64fpt_INVALID ++ } ++ ARM64FpTriOp; ++ + typedef + enum { + ARM64fpu_NEG=110, +@@ -498,6 +506,8 @@ typedef + ARM64in_VUnaryS, + ARM64in_VBinD, + ARM64in_VBinS, ++ ARM64in_VTriD, ++ ARM64in_VTriS, + ARM64in_VCmpD, + ARM64in_VCmpS, + ARM64in_VFCSel, +@@ -799,6 +809,22 @@ typedef + HReg argL; + HReg argR; + } VBinS; ++ /* 64-bit FP ternary arithmetic */ ++ struct { ++ ARM64FpTriOp op; ++ HReg dst; ++ HReg arg1; ++ HReg arg2; ++ HReg arg3; ++ } VTriD; ++ /* 32-bit FP ternary arithmetic */ ++ struct { ++ ARM64FpTriOp op; ++ HReg dst; ++ HReg arg1; ++ HReg arg2; ++ HReg arg3; ++ } VTriS; + /* 64-bit FP compare */ + struct { + HReg argL; +@@ -970,6 +996,10 @@ extern ARM64Instr* ARM64Instr_VUnaryD ( ARM64FpUnaryOp op, HReg dst, HReg src ); + extern ARM64Instr* ARM64Instr_VUnaryS ( ARM64FpUnaryOp op, HReg dst, HReg src ); + extern ARM64Instr* ARM64Instr_VBinD ( ARM64FpBinOp op, HReg, HReg, HReg ); + extern ARM64Instr* ARM64Instr_VBinS ( ARM64FpBinOp op, HReg, HReg, HReg ); ++extern ARM64Instr* ARM64Instr_VTriD ( ARM64FpTriOp op, HReg dst, ++ HReg, HReg, HReg ); ++extern ARM64Instr* ARM64Instr_VTriS ( ARM64FpTriOp op, HReg dst, ++ HReg, HReg, HReg ); + extern ARM64Instr* ARM64Instr_VCmpD ( HReg argL, HReg argR ); + extern ARM64Instr* ARM64Instr_VCmpS ( HReg argL, HReg argR ); + extern ARM64Instr* ARM64Instr_VFCSel ( HReg dst, HReg argL, HReg argR, +diff --git a/VEX/priv/host_arm64_isel.c b/VEX/priv/host_arm64_isel.c +index 2f19eab81..da1218715 100644 +--- a/VEX/priv/host_arm64_isel.c ++++ b/VEX/priv/host_arm64_isel.c +@@ -3255,6 +3255,25 @@ static HReg iselDblExpr_wrk ( ISelEnv* env, IRExpr* e ) + } + } + ++ if (e->tag == Iex_Qop) { ++ IRQop* qop = e->Iex.Qop.details; ++ ARM64FpTriOp triop = ARM64fpt_INVALID; ++ switch (qop->op) { ++ case Iop_MAddF64: triop = ARM64fpt_FMADD; break; ++ case Iop_MSubF64: triop = ARM64fpt_FMSUB; break; ++ default: break; ++ } ++ if (triop != ARM64fpt_INVALID) { ++ HReg N = iselDblExpr(env, qop->arg2); ++ HReg M = iselDblExpr(env, qop->arg3); ++ HReg A = iselDblExpr(env, qop->arg4); ++ HReg dst = newVRegD(env); ++ set_FPCR_rounding_mode(env, qop->arg1); ++ addInstr(env, ARM64Instr_VTriD(triop, dst, N, M, A)); ++ return dst; ++ } ++ } ++ + if (e->tag == Iex_ITE) { + /* ITE(ccexpr, iftrue, iffalse) */ + ARM64CondCode cc; +@@ -3450,6 +3469,26 @@ static HReg iselFltExpr_wrk ( ISelEnv* env, IRExpr* e ) + return dst; + } + ++ if (e->tag == Iex_Qop) { ++ IRQop* qop = e->Iex.Qop.details; ++ ARM64FpTriOp triop = ARM64fpt_INVALID; ++ switch (qop->op) { ++ case Iop_MAddF32: triop = ARM64fpt_FMADD; break; ++ case Iop_MSubF32: triop = ARM64fpt_FMSUB; break; ++ default: break; ++ } ++ ++ if (triop != ARM64fpt_INVALID) { ++ HReg N = iselFltExpr(env, qop->arg2); ++ HReg M = iselFltExpr(env, qop->arg3); ++ HReg A = iselFltExpr(env, qop->arg4); ++ HReg dst = newVRegD(env); ++ set_FPCR_rounding_mode(env, qop->arg1); ++ addInstr(env, ARM64Instr_VTriS(triop, dst, N, M, A)); ++ return dst; ++ } ++ } ++ + ppIRExpr(e); + vpanic("iselFltExpr_wrk"); + } +diff --git a/none/tests/arm64/Makefile.am b/none/tests/arm64/Makefile.am +index 7b3ebbdca..4ecab36ad 100644 +--- a/none/tests/arm64/Makefile.am ++++ b/none/tests/arm64/Makefile.am +@@ -10,14 +10,16 @@ EXTRA_DIST = \ + integer.stdout.exp integer.stderr.exp integer.vgtest \ + memory.stdout.exp memory.stderr.exp memory.vgtest \ + atomics_v81.stdout.exp atomics_v81.stderr.exp atomics_v81.vgtest \ +- simd_v81.stdout.exp simd_v81.stderr.exp simd_v81.vgtest ++ simd_v81.stdout.exp simd_v81.stderr.exp simd_v81.vgtest \ ++ fmadd_sub.stdout.exp fmadd_sub.stderr.exp fmadd_sub.vgtest + + check_PROGRAMS = \ + allexec \ + cvtf_imm \ + fp_and_simd \ + integer \ +- memory ++ memory \ ++ fmadd_sub + + if BUILD_ARMV8_CRC_TESTS + check_PROGRAMS += crc32 +diff --git a/none/tests/arm64/fmadd_sub.c b/none/tests/arm64/fmadd_sub.c +new file mode 100644 +index 000000000..dcab22d1b +--- /dev/null ++++ b/none/tests/arm64/fmadd_sub.c +@@ -0,0 +1,98 @@ ++#include ++#include ++#include ++#include ++ ++#define COUNT 5 ++ ++static void ++print_float(const char *ident, float x) ++{ ++ union ++ { ++ float f; ++ uint32_t i; ++ } u; ++ ++ u.f = x; ++ printf("%s = %08x = %.17g\n", ident, u.i, x); ++} ++ ++static void ++print_double(const char *ident, double x) ++{ ++ union ++ { ++ double f; ++ uint64_t i; ++ } u; ++ ++ u.f = x; ++ printf("%s = %016lx = %.17g\n", ident, u.i, x); ++} ++ ++int ++main(int argc, char **argv) ++{ ++ float x[] = { 55, 0.98076171874999996, 0, 1, 0xFFFFFFFF } ; ++ float y[] = { 0.69314718055994529, 1.015625, 0, 1, 0xFFFFFFFF }; ++ float z[] = { 38.123094930796988, 1, 0, 1, 0xFFFFFFFF }; ++ float dst = -5; ++ ++ double dx[] = { 55, 0.98076171874999996, 0, 1, 0xFFFFFFFF } ; ++ double dy[] = { 0.69314718055994529, 1.015625, 0, 1, 0xFFFFFFFF }; ++ double dz[] = { 38.123094930796988, 1, 0, 1, 0xFFFFFFFF }; ++ double ddst= -5; ++ ++ int i; ++ ++ for (i = 0; i < COUNT; i++) { ++ //32bit variant ++ asm("fmadd %s0, %s1, %s2, %s3\n;" : "=w"(dst) : "w"(x[i]), "w"(y[i]), "w"(z[i])); ++ printf("FMADD 32bit: dst = z + x * y\n"); ++ printf("%f = %f + %f * %f\n", dst, z[i], x[i], y[i]); ++ print_float("dst", dst); ++ ++ // Floating-point negated fused multiply-add ++ asm("fnmadd %s0, %s1, %s2, %s3\n;" : "=w"(dst) : "w"(x[i]), "w"(y[i]), "w"(z[i])); ++ printf("FNMADD 32bit: dst = -z + (-x) * y\n"); ++ printf("%f = -%f + (-%f) * %f\n", dst, z[i], x[i], y[i]); ++ print_float("dst", dst); ++ ++ asm("fmsub %s0, %s1, %s2, %s3\n;" : "=w"(dst) : "w"(x[i]), "w"(y[i]), "w"(z[i])); ++ printf("FMSUB 32bit: dst = z + (-x) * y\n"); ++ printf("%f = %f + (-%f) * %f\n", dst, z[i], x[i], y[i]); ++ print_float("dst", dst); ++ ++ asm("fnmsub %s0, %s1, %s2, %s3\n;" : "=w"(dst) : "w"(x[i]), "w"(y[i]), "w"(z[i])); ++ printf("FNMSUB 32bit: dst = -z + x * y\n"); ++ printf("%f = -%f + %f * %f\n", dst, z[i], x[i], y[i]); ++ print_float("dst", dst); ++ ++ //64bit variant ++ asm("fmadd %d0, %d1, %d2, %d3\n;" : "=w"(ddst) : "w"(dx[i]), "w"(dy[i]), "w"(dz[i])); ++ printf("FMADD 64bit: dst = z + x * y\n"); ++ printf("%f = %f + %f * %f\n", ddst, dz[i], dx[i], dy[i]); ++ print_double("dst", ddst); ++ ++ asm("fnmadd %d0, %d1, %d2, %d3\n;" : "=w"(ddst) : "w"(dx[i]), "w"(dy[i]), "w"(dz[i])); ++ printf("FNMADD 64bit: dst = -z + (-x) * y\n"); ++ printf("%f = -%f - %f * %f\n", ddst, dz[i], dx[i], dy[i]); ++ print_double("dst", ddst); ++ ++ asm("fmsub %d0, %d1, %d2, %d3\n;" : "=w"(ddst) : "w"(dx[i]), "w"(dy[i]), "w"(dz[i])); ++ printf("FMSUB 64bit: dst = z + (-x) * y\n"); ++ printf("%f = %f + (-%f) * %f\n", ddst, dz[i], dx[i], dy[i]); ++ print_double("dst", ddst); ++ ++ asm("fnmsub %d0, %d1, %d2, %d3\n;" : "=w"(ddst) : "w"(dx[i]), "w"(dy[i]), "w"(dz[i])); ++ printf("FNMSUB 64bit: dst = -z + x * y\n"); ++ printf("%f = -%f + %f * %f\n", ddst, dz[i], dx[i], dy[i]); ++ print_double("dst", ddst); ++ ++ printf("\n"); ++ } ++ ++ return 0; ++} ++ +diff --git a/none/tests/arm64/fmadd_sub.stderr.exp b/none/tests/arm64/fmadd_sub.stderr.exp +new file mode 100644 +index 000000000..e69de29bb +diff --git a/none/tests/arm64/fmadd_sub.stdout.exp b/none/tests/arm64/fmadd_sub.stdout.exp +new file mode 100644 +index 000000000..f1824b12b +--- /dev/null ++++ b/none/tests/arm64/fmadd_sub.stdout.exp +@@ -0,0 +1,125 @@ ++FMADD 32bit: dst = z + x * y ++76.246193 = 38.123096 + 55.000000 * 0.693147 ++dst = 42987e0d = 76.246192932128906 ++FNMADD 32bit: dst = -z + (-x) * y ++-76.246193 = -38.123096 + (-55.000000) * 0.693147 ++dst = c2987e0d = -76.246192932128906 ++FMSUB 32bit: dst = z + (-x) * y ++0.000001 = 38.123096 + (-55.000000) * 0.693147 ++dst = 35c00000 = 1.430511474609375e-06 ++FNMSUB 32bit: dst = -z + x * y ++-0.000001 = -38.123096 + 55.000000 * 0.693147 ++dst = b5c00000 = -1.430511474609375e-06 ++FMADD 64bit: dst = z + x * y ++76.246190 = 38.123095 + 55.000000 * 0.693147 ++dst = 40530fc1931f09c9 = 76.246189861593976 ++FNMADD 64bit: dst = -z + (-x) * y ++-76.246190 = -38.123095 - 55.000000 * 0.693147 ++dst = c0530fc1931f09c9 = -76.246189861593976 ++FMSUB 64bit: dst = z + (-x) * y ++-0.000000 = 38.123095 + (-55.000000) * 0.693147 ++dst = bce9000000000000 = -2.7755575615628914e-15 ++FNMSUB 64bit: dst = -z + x * y ++0.000000 = -38.123095 + 55.000000 * 0.693147 ++dst = 3ce9000000000000 = 2.7755575615628914e-15 ++ ++FMADD 32bit: dst = z + x * y ++1.996086 = 1.000000 + 0.980762 * 1.015625 ++dst = 3fff7fc0 = 1.9960861206054688 ++FNMADD 32bit: dst = -z + (-x) * y ++-1.996086 = -1.000000 + (-0.980762) * 1.015625 ++dst = bfff7fc0 = -1.9960861206054688 ++FMSUB 32bit: dst = z + (-x) * y ++0.003914 = 1.000000 + (-0.980762) * 1.015625 ++dst = 3b80401a = 0.00391389150172472 ++FNMSUB 32bit: dst = -z + x * y ++-0.003914 = -1.000000 + 0.980762 * 1.015625 ++dst = bb80401a = -0.00391389150172472 ++FMADD 64bit: dst = z + x * y ++1.996086 = 1.000000 + 0.980762 * 1.015625 ++dst = 3fffeff800000000 = 1.9960861206054688 ++FNMADD 64bit: dst = -z + (-x) * y ++-1.996086 = -1.000000 - 0.980762 * 1.015625 ++dst = bfffeff800000000 = -1.9960861206054688 ++FMSUB 64bit: dst = z + (-x) * y ++0.003914 = 1.000000 + (-0.980762) * 1.015625 ++dst = 3f70080000000034 = 0.0039138793945312951 ++FNMSUB 64bit: dst = -z + x * y ++-0.003914 = -1.000000 + 0.980762 * 1.015625 ++dst = bf70080000000034 = -0.0039138793945312951 ++ ++FMADD 32bit: dst = z + x * y ++0.000000 = 0.000000 + 0.000000 * 0.000000 ++dst = 00000000 = 0 ++FNMADD 32bit: dst = -z + (-x) * y ++-0.000000 = -0.000000 + (-0.000000) * 0.000000 ++dst = 80000000 = -0 ++FMSUB 32bit: dst = z + (-x) * y ++0.000000 = 0.000000 + (-0.000000) * 0.000000 ++dst = 00000000 = 0 ++FNMSUB 32bit: dst = -z + x * y ++0.000000 = -0.000000 + 0.000000 * 0.000000 ++dst = 00000000 = 0 ++FMADD 64bit: dst = z + x * y ++0.000000 = 0.000000 + 0.000000 * 0.000000 ++dst = 0000000000000000 = 0 ++FNMADD 64bit: dst = -z + (-x) * y ++-0.000000 = -0.000000 - 0.000000 * 0.000000 ++dst = 8000000000000000 = -0 ++FMSUB 64bit: dst = z + (-x) * y ++0.000000 = 0.000000 + (-0.000000) * 0.000000 ++dst = 0000000000000000 = 0 ++FNMSUB 64bit: dst = -z + x * y ++0.000000 = -0.000000 + 0.000000 * 0.000000 ++dst = 0000000000000000 = 0 ++ ++FMADD 32bit: dst = z + x * y ++2.000000 = 1.000000 + 1.000000 * 1.000000 ++dst = 40000000 = 2 ++FNMADD 32bit: dst = -z + (-x) * y ++-2.000000 = -1.000000 + (-1.000000) * 1.000000 ++dst = c0000000 = -2 ++FMSUB 32bit: dst = z + (-x) * y ++0.000000 = 1.000000 + (-1.000000) * 1.000000 ++dst = 00000000 = 0 ++FNMSUB 32bit: dst = -z + x * y ++0.000000 = -1.000000 + 1.000000 * 1.000000 ++dst = 00000000 = 0 ++FMADD 64bit: dst = z + x * y ++2.000000 = 1.000000 + 1.000000 * 1.000000 ++dst = 4000000000000000 = 2 ++FNMADD 64bit: dst = -z + (-x) * y ++-2.000000 = -1.000000 - 1.000000 * 1.000000 ++dst = c000000000000000 = -2 ++FMSUB 64bit: dst = z + (-x) * y ++0.000000 = 1.000000 + (-1.000000) * 1.000000 ++dst = 0000000000000000 = 0 ++FNMSUB 64bit: dst = -z + x * y ++0.000000 = -1.000000 + 1.000000 * 1.000000 ++dst = 0000000000000000 = 0 ++ ++FMADD 32bit: dst = z + x * y ++18446744073709551616.000000 = 4294967296.000000 + 4294967296.000000 * 4294967296.000000 ++dst = 5f800000 = 1.8446744073709552e+19 ++FNMADD 32bit: dst = -z + (-x) * y ++-18446744073709551616.000000 = -4294967296.000000 + (-4294967296.000000) * 4294967296.000000 ++dst = df800000 = -1.8446744073709552e+19 ++FMSUB 32bit: dst = z + (-x) * y ++-18446744073709551616.000000 = 4294967296.000000 + (-4294967296.000000) * 4294967296.000000 ++dst = df800000 = -1.8446744073709552e+19 ++FNMSUB 32bit: dst = -z + x * y ++18446744073709551616.000000 = -4294967296.000000 + 4294967296.000000 * 4294967296.000000 ++dst = 5f800000 = 1.8446744073709552e+19 ++FMADD 64bit: dst = z + x * y ++18446744069414584320.000000 = 4294967295.000000 + 4294967295.000000 * 4294967295.000000 ++dst = 43efffffffe00000 = 1.8446744069414584e+19 ++FNMADD 64bit: dst = -z + (-x) * y ++-18446744069414584320.000000 = -4294967295.000000 - 4294967295.000000 * 4294967295.000000 ++dst = c3efffffffe00000 = -1.8446744069414584e+19 ++FMSUB 64bit: dst = z + (-x) * y ++-18446744060824649728.000000 = 4294967295.000000 + (-4294967295.000000) * 4294967295.000000 ++dst = c3efffffffa00000 = -1.844674406082465e+19 ++FNMSUB 64bit: dst = -z + x * y ++18446744060824649728.000000 = -4294967295.000000 + 4294967295.000000 * 4294967295.000000 ++dst = 43efffffffa00000 = 1.844674406082465e+19 ++ +diff --git a/none/tests/arm64/fmadd_sub.vgtest b/none/tests/arm64/fmadd_sub.vgtest +new file mode 100644 +index 000000000..b4c53eea4 +--- /dev/null ++++ b/none/tests/arm64/fmadd_sub.vgtest +@@ -0,0 +1,3 @@ ++prog: fmadd_sub ++prereq: test -x fmadd_sub ++vgopts: -q +-- +2.18.4 + diff --git a/valgrind.spec b/valgrind.spec index 151f270..93634cd 100644 --- a/valgrind.spec +++ b/valgrind.spec @@ -3,7 +3,7 @@ Summary: Tool for finding memory management bugs in programs Name: %{?scl_prefix}valgrind Version: 3.16.1 -Release: 9%{?dist} +Release: 10%{?dist} Epoch: 1 License: GPLv2+ URL: http://www.valgrind.org/ @@ -126,6 +126,9 @@ Patch16: valgrind-3.16.1-s390x-z14-vector.patch # KDE#430354 ppc stxsibx and stxsihx instructions write too much data Patch17: valgrind-3.16.1-stxsibx-stxsihx.patch +# KDE#426014 arm64: implement fmadd and fmsub as Iop_MAdd/Sub +Patch18: valgrind-3.16.1-arm64-fma.patch + BuildRequires: glibc-devel %if %{build_openmpi} @@ -271,6 +274,7 @@ Valgrind User Manual for details. %patch15 -p1 %patch16 -p1 %patch17 -p1 +%patch18 -p1 %build # LTO triggers undefined symbols in valgrind. Valgrind has a --enable-lto @@ -495,6 +499,9 @@ fi %endif %changelog +* Tue Dec 15 2020 Mark Wielaard - 3.16.1-10 +- Add valgrind-3.16.1-arm64-fma.patch + * Sun Dec 13 2020 Mark Wielaard - 3.16.1-9 - Add valgrind-3.16.1-stxsibx-stxsihx.patch