|
|
e94d64 |
Only arm64. Removed the MIPS part.
|
|
|
e94d64 |
|
|
|
e94d64 |
commit 6b72dc54b722af5f6a87ebe258d3da6bcba059b7
|
|
|
e94d64 |
Author: Julian Seward <jseward@acm.org>
|
|
|
e94d64 |
Date: Mon Apr 24 09:23:43 2017 +0000
|
|
|
e94d64 |
|
|
|
e94d64 |
Bug 369459 - valgrind on arm64 violates the ARMv8 spec (ldxr/stxr)
|
|
|
e94d64 |
|
|
|
e94d64 |
This implements a fallback LL/SC implementation as described in bug 344524.
|
|
|
e94d64 |
|
|
|
e94d64 |
The fallback implementation is not enabled by default, and there is no
|
|
|
e94d64 |
auto-detection for when it should be used. To use it, run with the
|
|
|
e94d64 |
flag --sim-hints=fallback-llsc. This commit also allows the existing
|
|
|
e94d64 |
MIPS fallback implementation to be enabled with that flag.
|
|
|
e94d64 |
|
|
|
e94d64 |
VEX side changes:
|
|
|
e94d64 |
|
|
|
e94d64 |
* priv/main_main.c, pub/libvex.h
|
|
|
e94d64 |
|
|
|
e94d64 |
Adds new field guest__use_fallback_LLSC to VexAbiInfo
|
|
|
e94d64 |
|
|
|
e94d64 |
* pub/libvex_guest_arm64.h priv/guest_arm64_toIR.c
|
|
|
e94d64 |
|
|
|
e94d64 |
add front end support, new guest state fields
|
|
|
e94d64 |
guest_LLSC_{SIZE,ADDR,DATA}, also documentation of the scheme
|
|
|
e94d64 |
|
|
|
e94d64 |
* priv/guest_mips_toIR.c
|
|
|
e94d64 |
|
|
|
e94d64 |
allow manual selection of fallback implementation via
|
|
|
e94d64 |
--sim-hints=fallback-llsc
|
|
|
e94d64 |
|
|
|
e94d64 |
* priv/host_arm64_defs.c priv/host_arm64_defs.h priv/host_arm64_isel.c
|
|
|
e94d64 |
|
|
|
e94d64 |
Add support for generating CAS on arm64, as needed by the front end changes
|
|
|
e94d64 |
|
|
|
e94d64 |
|
|
|
e94d64 |
|
|
|
e94d64 |
git-svn-id: svn://svn.valgrind.org/vex/trunk@3352
|
|
|
e94d64 |
|
|
|
e94d64 |
diff --git a/VEX/priv/guest_arm64_toIR.c b/VEX/priv/guest_arm64_toIR.c
|
|
|
e94d64 |
index 088af55..421db37 100644
|
|
|
e94d64 |
--- a/VEX/priv/guest_arm64_toIR.c
|
|
|
e94d64 |
+++ b/VEX/priv/guest_arm64_toIR.c
|
|
|
e94d64 |
@@ -1147,6 +1147,10 @@ static IRExpr* narrowFrom64 ( IRType dstTy, IRExpr* e )
|
|
|
e94d64 |
#define OFFB_CMSTART offsetof(VexGuestARM64State,guest_CMSTART)
|
|
|
e94d64 |
#define OFFB_CMLEN offsetof(VexGuestARM64State,guest_CMLEN)
|
|
|
e94d64 |
|
|
|
e94d64 |
+#define OFFB_LLSC_SIZE offsetof(VexGuestARM64State,guest_LLSC_SIZE)
|
|
|
e94d64 |
+#define OFFB_LLSC_ADDR offsetof(VexGuestARM64State,guest_LLSC_ADDR)
|
|
|
e94d64 |
+#define OFFB_LLSC_DATA offsetof(VexGuestARM64State,guest_LLSC_DATA)
|
|
|
e94d64 |
+
|
|
|
e94d64 |
|
|
|
e94d64 |
/* ---------------- Integer registers ---------------- */
|
|
|
e94d64 |
|
|
|
e94d64 |
@@ -4702,7 +4706,9 @@ const HChar* nameArr_Q_SZ ( UInt bitQ, UInt size )
|
|
|
e94d64 |
|
|
|
e94d64 |
|
|
|
e94d64 |
static
|
|
|
e94d64 |
-Bool dis_ARM64_load_store(/*MB_OUT*/DisResult* dres, UInt insn)
|
|
|
e94d64 |
+Bool dis_ARM64_load_store(/*MB_OUT*/DisResult* dres, UInt insn,
|
|
|
e94d64 |
+ const VexAbiInfo* abiinfo
|
|
|
e94d64 |
+)
|
|
|
e94d64 |
{
|
|
|
e94d64 |
# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
|
|
|
e94d64 |
|
|
|
e94d64 |
@@ -6457,6 +6463,32 @@ Bool dis_ARM64_load_store(/*MB_OUT*/DisResult* dres, UInt insn)
|
|
|
e94d64 |
sz 001000 000 s 0 11111 n t STX{R,RH,RB} Ws, Rt, [Xn|SP]
|
|
|
e94d64 |
sz 001000 000 s 1 11111 n t STLX{R,RH,RB} Ws, Rt, [Xn|SP]
|
|
|
e94d64 |
*/
|
|
|
e94d64 |
+ /* For the "standard" implementation we pass through the LL and SC to
|
|
|
e94d64 |
+ the host. For the "fallback" implementation, for details see
|
|
|
e94d64 |
+ https://bugs.kde.org/show_bug.cgi?id=344524 and
|
|
|
e94d64 |
+ https://bugs.kde.org/show_bug.cgi?id=369459,
|
|
|
e94d64 |
+ but in short:
|
|
|
e94d64 |
+
|
|
|
e94d64 |
+ LoadLinked(addr)
|
|
|
e94d64 |
+ gs.LLsize = load_size // 1, 2, 4 or 8
|
|
|
e94d64 |
+ gs.LLaddr = addr
|
|
|
e94d64 |
+ gs.LLdata = zeroExtend(*addr)
|
|
|
e94d64 |
+
|
|
|
e94d64 |
+ StoreCond(addr, data)
|
|
|
e94d64 |
+ tmp_LLsize = gs.LLsize
|
|
|
e94d64 |
+ gs.LLsize = 0 // "no transaction"
|
|
|
e94d64 |
+ if tmp_LLsize != store_size -> fail
|
|
|
e94d64 |
+ if addr != gs.LLaddr -> fail
|
|
|
e94d64 |
+ if zeroExtend(*addr) != gs.LLdata -> fail
|
|
|
e94d64 |
+ cas_ok = CAS(store_size, addr, gs.LLdata -> data)
|
|
|
e94d64 |
+ if !cas_ok -> fail
|
|
|
e94d64 |
+ succeed
|
|
|
e94d64 |
+
|
|
|
e94d64 |
+ When thread scheduled
|
|
|
e94d64 |
+ gs.LLsize = 0 // "no transaction"
|
|
|
e94d64 |
+ (coregrind/m_scheduler/scheduler.c, run_thread_for_a_while()
|
|
|
e94d64 |
+ has to do this bit)
|
|
|
e94d64 |
+ */
|
|
|
e94d64 |
if (INSN(29,23) == BITS7(0,0,1,0,0,0,0)
|
|
|
e94d64 |
&& (INSN(23,21) & BITS3(1,0,1)) == BITS3(0,0,0)
|
|
|
e94d64 |
&& INSN(14,10) == BITS5(1,1,1,1,1)) {
|
|
|
e94d64 |
@@ -6478,29 +6510,99 @@ Bool dis_ARM64_load_store(/*MB_OUT*/DisResult* dres, UInt insn)
|
|
|
e94d64 |
|
|
|
e94d64 |
if (isLD && ss == BITS5(1,1,1,1,1)) {
|
|
|
e94d64 |
IRTemp res = newTemp(ty);
|
|
|
e94d64 |
- stmt(IRStmt_LLSC(Iend_LE, res, mkexpr(ea), NULL/*LL*/));
|
|
|
e94d64 |
- putIReg64orZR(tt, widenUto64(ty, mkexpr(res)));
|
|
|
e94d64 |
+ if (abiinfo->guest__use_fallback_LLSC) {
|
|
|
e94d64 |
+ // Do the load first so we don't update any guest state
|
|
|
e94d64 |
+ // if it faults.
|
|
|
e94d64 |
+ IRTemp loaded_data64 = newTemp(Ity_I64);
|
|
|
e94d64 |
+ assign(loaded_data64, widenUto64(ty, loadLE(ty, mkexpr(ea))));
|
|
|
e94d64 |
+ stmt( IRStmt_Put( OFFB_LLSC_DATA, mkexpr(loaded_data64) ));
|
|
|
e94d64 |
+ stmt( IRStmt_Put( OFFB_LLSC_ADDR, mkexpr(ea) ));
|
|
|
e94d64 |
+ stmt( IRStmt_Put( OFFB_LLSC_SIZE, mkU64(szB) ));
|
|
|
e94d64 |
+ putIReg64orZR(tt, mkexpr(loaded_data64));
|
|
|
e94d64 |
+ } else {
|
|
|
e94d64 |
+ stmt(IRStmt_LLSC(Iend_LE, res, mkexpr(ea), NULL/*LL*/));
|
|
|
e94d64 |
+ putIReg64orZR(tt, widenUto64(ty, mkexpr(res)));
|
|
|
e94d64 |
+ }
|
|
|
e94d64 |
if (isAcqOrRel) {
|
|
|
e94d64 |
stmt(IRStmt_MBE(Imbe_Fence));
|
|
|
e94d64 |
}
|
|
|
e94d64 |
- DIP("ld%sx%s %s, [%s]\n", isAcqOrRel ? "a" : "", suffix[szBlg2],
|
|
|
e94d64 |
- nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn));
|
|
|
e94d64 |
+ DIP("ld%sx%s %s, [%s] %s\n", isAcqOrRel ? "a" : "", suffix[szBlg2],
|
|
|
e94d64 |
+ nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn),
|
|
|
e94d64 |
+ abiinfo->guest__use_fallback_LLSC
|
|
|
e94d64 |
+ ? "(fallback implementation)" : "");
|
|
|
e94d64 |
return True;
|
|
|
e94d64 |
}
|
|
|
e94d64 |
if (!isLD) {
|
|
|
e94d64 |
if (isAcqOrRel) {
|
|
|
e94d64 |
stmt(IRStmt_MBE(Imbe_Fence));
|
|
|
e94d64 |
}
|
|
|
e94d64 |
- IRTemp res = newTemp(Ity_I1);
|
|
|
e94d64 |
IRExpr* data = narrowFrom64(ty, getIReg64orZR(tt));
|
|
|
e94d64 |
- stmt(IRStmt_LLSC(Iend_LE, res, mkexpr(ea), data));
|
|
|
e94d64 |
- /* IR semantics: res is 1 if store succeeds, 0 if it fails.
|
|
|
e94d64 |
- Need to set rS to 1 on failure, 0 on success. */
|
|
|
e94d64 |
- putIReg64orZR(ss, binop(Iop_Xor64, unop(Iop_1Uto64, mkexpr(res)),
|
|
|
e94d64 |
- mkU64(1)));
|
|
|
e94d64 |
- DIP("st%sx%s %s, %s, [%s]\n", isAcqOrRel ? "a" : "", suffix[szBlg2],
|
|
|
e94d64 |
+ if (abiinfo->guest__use_fallback_LLSC) {
|
|
|
e94d64 |
+ // This is really ugly, since we don't have any way to do
|
|
|
e94d64 |
+ // proper if-then-else. First, set up as if the SC failed,
|
|
|
e94d64 |
+ // and jump forwards if it really has failed.
|
|
|
e94d64 |
+
|
|
|
e94d64 |
+ // Continuation address
|
|
|
e94d64 |
+ IRConst* nia = IRConst_U64(guest_PC_curr_instr + 4);
|
|
|
e94d64 |
+
|
|
|
e94d64 |
+ // "the SC failed". Any non-zero value means failure.
|
|
|
e94d64 |
+ putIReg64orZR(ss, mkU64(1));
|
|
|
e94d64 |
+
|
|
|
e94d64 |
+ IRTemp tmp_LLsize = newTemp(Ity_I64);
|
|
|
e94d64 |
+ assign(tmp_LLsize, IRExpr_Get(OFFB_LLSC_SIZE, Ity_I64));
|
|
|
e94d64 |
+ stmt( IRStmt_Put( OFFB_LLSC_SIZE, mkU64(0) // "no transaction"
|
|
|
e94d64 |
+ ));
|
|
|
e94d64 |
+ // Fail if no or wrong-size transaction
|
|
|
e94d64 |
+ vassert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
|
|
|
e94d64 |
+ stmt( IRStmt_Exit(
|
|
|
e94d64 |
+ binop(Iop_CmpNE64, mkexpr(tmp_LLsize), mkU64(szB)),
|
|
|
e94d64 |
+ Ijk_Boring, nia, OFFB_PC
|
|
|
e94d64 |
+ ));
|
|
|
e94d64 |
+ // Fail if the address doesn't match the LL address
|
|
|
e94d64 |
+ stmt( IRStmt_Exit(
|
|
|
e94d64 |
+ binop(Iop_CmpNE64, mkexpr(ea),
|
|
|
e94d64 |
+ IRExpr_Get(OFFB_LLSC_ADDR, Ity_I64)),
|
|
|
e94d64 |
+ Ijk_Boring, nia, OFFB_PC
|
|
|
e94d64 |
+ ));
|
|
|
e94d64 |
+ // Fail if the data doesn't match the LL data
|
|
|
e94d64 |
+ IRTemp llsc_data64 = newTemp(Ity_I64);
|
|
|
e94d64 |
+ assign(llsc_data64, IRExpr_Get(OFFB_LLSC_DATA, Ity_I64));
|
|
|
e94d64 |
+ stmt( IRStmt_Exit(
|
|
|
e94d64 |
+ binop(Iop_CmpNE64, widenUto64(ty, loadLE(ty, mkexpr(ea))),
|
|
|
e94d64 |
+ mkexpr(llsc_data64)),
|
|
|
e94d64 |
+ Ijk_Boring, nia, OFFB_PC
|
|
|
e94d64 |
+ ));
|
|
|
e94d64 |
+ // Try to CAS the new value in.
|
|
|
e94d64 |
+ IRTemp old = newTemp(ty);
|
|
|
e94d64 |
+ IRTemp expd = newTemp(ty);
|
|
|
e94d64 |
+ assign(expd, narrowFrom64(ty, mkexpr(llsc_data64)));
|
|
|
e94d64 |
+ stmt( IRStmt_CAS(mkIRCAS(/*oldHi*/IRTemp_INVALID, old,
|
|
|
e94d64 |
+ Iend_LE, mkexpr(ea),
|
|
|
e94d64 |
+ /*expdHi*/NULL, mkexpr(expd),
|
|
|
e94d64 |
+ /*dataHi*/NULL, data
|
|
|
e94d64 |
+ )));
|
|
|
e94d64 |
+ // Fail if the CAS failed (viz, old != expd)
|
|
|
e94d64 |
+ stmt( IRStmt_Exit(
|
|
|
e94d64 |
+ binop(Iop_CmpNE64,
|
|
|
e94d64 |
+ widenUto64(ty, mkexpr(old)),
|
|
|
e94d64 |
+ widenUto64(ty, mkexpr(expd))),
|
|
|
e94d64 |
+ Ijk_Boring, nia, OFFB_PC
|
|
|
e94d64 |
+ ));
|
|
|
e94d64 |
+ // Otherwise we succeeded (!)
|
|
|
e94d64 |
+ putIReg64orZR(ss, mkU64(0));
|
|
|
e94d64 |
+ } else {
|
|
|
e94d64 |
+ IRTemp res = newTemp(Ity_I1);
|
|
|
e94d64 |
+ stmt(IRStmt_LLSC(Iend_LE, res, mkexpr(ea), data));
|
|
|
e94d64 |
+ /* IR semantics: res is 1 if store succeeds, 0 if it fails.
|
|
|
e94d64 |
+ Need to set rS to 1 on failure, 0 on success. */
|
|
|
e94d64 |
+ putIReg64orZR(ss, binop(Iop_Xor64, unop(Iop_1Uto64, mkexpr(res)),
|
|
|
e94d64 |
+ mkU64(1)));
|
|
|
e94d64 |
+ }
|
|
|
e94d64 |
+ DIP("st%sx%s %s, %s, [%s] %s\n", isAcqOrRel ? "a" : "", suffix[szBlg2],
|
|
|
e94d64 |
nameIRegOrZR(False, ss),
|
|
|
e94d64 |
- nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn));
|
|
|
e94d64 |
+ nameIRegOrZR(szB == 8, tt), nameIReg64orSP(nn),
|
|
|
e94d64 |
+ abiinfo->guest__use_fallback_LLSC
|
|
|
e94d64 |
+ ? "(fallback implementation)" : "");
|
|
|
e94d64 |
return True;
|
|
|
e94d64 |
}
|
|
|
e94d64 |
/* else fall through */
|
|
|
e94d64 |
@@ -6589,7 +6691,8 @@ Bool dis_ARM64_load_store(/*MB_OUT*/DisResult* dres, UInt insn)
|
|
|
e94d64 |
|
|
|
e94d64 |
static
|
|
|
e94d64 |
Bool dis_ARM64_branch_etc(/*MB_OUT*/DisResult* dres, UInt insn,
|
|
|
e94d64 |
- const VexArchInfo* archinfo)
|
|
|
e94d64 |
+ const VexArchInfo* archinfo,
|
|
|
e94d64 |
+ const VexAbiInfo* abiinfo)
|
|
|
e94d64 |
{
|
|
|
e94d64 |
# define INSN(_bMax,_bMin) SLICE_UInt(insn, (_bMax), (_bMin))
|
|
|
e94d64 |
|
|
|
e94d64 |
@@ -7048,7 +7151,11 @@ Bool dis_ARM64_branch_etc(/*MB_OUT*/DisResult* dres, UInt insn,
|
|
|
e94d64 |
/* AFAICS, this simply cancels a (all?) reservations made by a
|
|
|
e94d64 |
(any?) preceding LDREX(es). Arrange to hand it through to
|
|
|
e94d64 |
the back end. */
|
|
|
e94d64 |
- stmt( IRStmt_MBE(Imbe_CancelReservation) );
|
|
|
e94d64 |
+ if (abiinfo->guest__use_fallback_LLSC) {
|
|
|
e94d64 |
+ stmt( IRStmt_Put( OFFB_LLSC_SIZE, mkU64(0) )); // "no transaction"
|
|
|
e94d64 |
+ } else {
|
|
|
e94d64 |
+ stmt( IRStmt_MBE(Imbe_CancelReservation) );
|
|
|
e94d64 |
+ }
|
|
|
e94d64 |
DIP("clrex #%u\n", mm);
|
|
|
e94d64 |
return True;
|
|
|
e94d64 |
}
|
|
|
e94d64 |
@@ -14411,12 +14518,12 @@ Bool disInstr_ARM64_WRK (
|
|
|
e94d64 |
break;
|
|
|
e94d64 |
case BITS4(1,0,1,0): case BITS4(1,0,1,1):
|
|
|
e94d64 |
// Branch, exception generation and system instructions
|
|
|
e94d64 |
- ok = dis_ARM64_branch_etc(dres, insn, archinfo);
|
|
|
e94d64 |
+ ok = dis_ARM64_branch_etc(dres, insn, archinfo, abiinfo);
|
|
|
e94d64 |
break;
|
|
|
e94d64 |
case BITS4(0,1,0,0): case BITS4(0,1,1,0):
|
|
|
e94d64 |
case BITS4(1,1,0,0): case BITS4(1,1,1,0):
|
|
|
e94d64 |
// Loads and stores
|
|
|
e94d64 |
- ok = dis_ARM64_load_store(dres, insn);
|
|
|
e94d64 |
+ ok = dis_ARM64_load_store(dres, insn, abiinfo);
|
|
|
e94d64 |
break;
|
|
|
e94d64 |
case BITS4(0,1,0,1): case BITS4(1,1,0,1):
|
|
|
e94d64 |
// Data processing - register
|
|
|
e94d64 |
diff --git a/VEX/priv/host_arm64_defs.c b/VEX/priv/host_arm64_defs.c
|
|
|
e94d64 |
index cc7c832..c9affbd 100644
|
|
|
e94d64 |
--- a/VEX/priv/host_arm64_defs.c
|
|
|
e94d64 |
+++ b/VEX/priv/host_arm64_defs.c
|
|
|
e94d64 |
@@ -1005,6 +1005,13 @@ ARM64Instr* ARM64Instr_StrEX ( Int szB ) {
|
|
|
e94d64 |
vassert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
|
|
|
e94d64 |
return i;
|
|
|
e94d64 |
}
|
|
|
e94d64 |
+ARM64Instr* ARM64Instr_CAS ( Int szB ) {
|
|
|
e94d64 |
+ ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
|
|
|
e94d64 |
+ i->tag = ARM64in_CAS;
|
|
|
e94d64 |
+ i->ARM64in.CAS.szB = szB;
|
|
|
e94d64 |
+ vassert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
|
|
|
e94d64 |
+ return i;
|
|
|
e94d64 |
+}
|
|
|
e94d64 |
ARM64Instr* ARM64Instr_MFence ( void ) {
|
|
|
e94d64 |
ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
|
|
|
e94d64 |
i->tag = ARM64in_MFence;
|
|
|
e94d64 |
@@ -1569,6 +1576,10 @@ void ppARM64Instr ( const ARM64Instr* i ) {
|
|
|
e94d64 |
sz, i->ARM64in.StrEX.szB == 8 ? 'x' : 'w');
|
|
|
e94d64 |
return;
|
|
|
e94d64 |
}
|
|
|
e94d64 |
+ case ARM64in_CAS: {
|
|
|
e94d64 |
+ vex_printf("x1 = cas(%dbit)(x3, x5 -> x7)", 8 * i->ARM64in.CAS.szB);
|
|
|
e94d64 |
+ return;
|
|
|
e94d64 |
+ }
|
|
|
e94d64 |
case ARM64in_MFence:
|
|
|
e94d64 |
vex_printf("(mfence) dsb sy; dmb sy; isb");
|
|
|
e94d64 |
return;
|
|
|
e94d64 |
@@ -2064,6 +2075,14 @@ void getRegUsage_ARM64Instr ( HRegUsage* u, const ARM64Instr* i, Bool mode64 )
|
|
|
e94d64 |
addHRegUse(u, HRmWrite, hregARM64_X0());
|
|
|
e94d64 |
addHRegUse(u, HRmRead, hregARM64_X2());
|
|
|
e94d64 |
return;
|
|
|
e94d64 |
+ case ARM64in_CAS:
|
|
|
e94d64 |
+ addHRegUse(u, HRmRead, hregARM64_X3());
|
|
|
e94d64 |
+ addHRegUse(u, HRmRead, hregARM64_X5());
|
|
|
e94d64 |
+ addHRegUse(u, HRmRead, hregARM64_X7());
|
|
|
e94d64 |
+ addHRegUse(u, HRmWrite, hregARM64_X1());
|
|
|
e94d64 |
+ /* Pointless to state this since X8 is not available to RA. */
|
|
|
e94d64 |
+ addHRegUse(u, HRmWrite, hregARM64_X8());
|
|
|
e94d64 |
+ break;
|
|
|
e94d64 |
case ARM64in_MFence:
|
|
|
e94d64 |
return;
|
|
|
e94d64 |
case ARM64in_ClrEX:
|
|
|
e94d64 |
@@ -2326,6 +2345,8 @@ void mapRegs_ARM64Instr ( HRegRemap* m, ARM64Instr* i, Bool mode64 )
|
|
|
e94d64 |
return;
|
|
|
e94d64 |
case ARM64in_StrEX:
|
|
|
e94d64 |
return;
|
|
|
e94d64 |
+ case ARM64in_CAS:
|
|
|
e94d64 |
+ return;
|
|
|
e94d64 |
case ARM64in_MFence:
|
|
|
e94d64 |
return;
|
|
|
e94d64 |
case ARM64in_ClrEX:
|
|
|
e94d64 |
@@ -3803,6 +3824,61 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc,
|
|
|
e94d64 |
}
|
|
|
e94d64 |
goto bad;
|
|
|
e94d64 |
}
|
|
|
e94d64 |
+ case ARM64in_CAS: {
|
|
|
e94d64 |
+ /* This isn't simple. For an explanation see the comment in
|
|
|
e94d64 |
+ host_arm64_defs.h on the the definition of ARM64Instr case
|
|
|
e94d64 |
+ CAS. */
|
|
|
e94d64 |
+ /* Generate:
|
|
|
e94d64 |
+ -- one of:
|
|
|
e94d64 |
+ mov x8, x5 // AA0503E8
|
|
|
e94d64 |
+ and x8, x5, #0xFFFFFFFF // 92407CA8
|
|
|
e94d64 |
+ and x8, x5, #0xFFFF // 92403CA8
|
|
|
e94d64 |
+ and x8, x5, #0xFF // 92401CA8
|
|
|
e94d64 |
+
|
|
|
e94d64 |
+ -- one of:
|
|
|
e94d64 |
+ ldxr x1, [x3] // C85F7C61
|
|
|
e94d64 |
+ ldxr w1, [x3] // 885F7C61
|
|
|
e94d64 |
+ ldxrh w1, [x3] // 485F7C61
|
|
|
e94d64 |
+ ldxrb w1, [x3] // 085F7C61
|
|
|
e94d64 |
+
|
|
|
e94d64 |
+ -- always:
|
|
|
e94d64 |
+ cmp x1, x8 // EB08003F
|
|
|
e94d64 |
+ bne out // 54000061
|
|
|
e94d64 |
+
|
|
|
e94d64 |
+ -- one of:
|
|
|
e94d64 |
+ stxr w1, x7, [x3] // C8017C67
|
|
|
e94d64 |
+ stxr w1, w7, [x3] // 88017C67
|
|
|
e94d64 |
+ stxrh w1, w7, [x3] // 48017C67
|
|
|
e94d64 |
+ stxrb w1, w7, [x3] // 08017C67
|
|
|
e94d64 |
+
|
|
|
e94d64 |
+ -- always:
|
|
|
e94d64 |
+ eor x1, x5, x1 // CA0100A1
|
|
|
e94d64 |
+ out:
|
|
|
e94d64 |
+ */
|
|
|
e94d64 |
+ switch (i->ARM64in.CAS.szB) {
|
|
|
e94d64 |
+ case 8: *p++ = 0xAA0503E8; break;
|
|
|
e94d64 |
+ case 4: *p++ = 0x92407CA8; break;
|
|
|
e94d64 |
+ case 2: *p++ = 0x92403CA8; break;
|
|
|
e94d64 |
+ case 1: *p++ = 0x92401CA8; break;
|
|
|
e94d64 |
+ default: vassert(0);
|
|
|
e94d64 |
+ }
|
|
|
e94d64 |
+ switch (i->ARM64in.CAS.szB) {
|
|
|
e94d64 |
+ case 8: *p++ = 0xC85F7C61; break;
|
|
|
e94d64 |
+ case 4: *p++ = 0x885F7C61; break;
|
|
|
e94d64 |
+ case 2: *p++ = 0x485F7C61; break;
|
|
|
e94d64 |
+ case 1: *p++ = 0x085F7C61; break;
|
|
|
e94d64 |
+ }
|
|
|
e94d64 |
+ *p++ = 0xEB08003F;
|
|
|
e94d64 |
+ *p++ = 0x54000061;
|
|
|
e94d64 |
+ switch (i->ARM64in.CAS.szB) {
|
|
|
e94d64 |
+ case 8: *p++ = 0xC8017C67; break;
|
|
|
e94d64 |
+ case 4: *p++ = 0x88017C67; break;
|
|
|
e94d64 |
+ case 2: *p++ = 0x48017C67; break;
|
|
|
e94d64 |
+ case 1: *p++ = 0x08017C67; break;
|
|
|
e94d64 |
+ }
|
|
|
e94d64 |
+ *p++ = 0xCA0100A1;
|
|
|
e94d64 |
+ goto done;
|
|
|
e94d64 |
+ }
|
|
|
e94d64 |
case ARM64in_MFence: {
|
|
|
e94d64 |
*p++ = 0xD5033F9F; /* DSB sy */
|
|
|
e94d64 |
*p++ = 0xD5033FBF; /* DMB sy */
|
|
|
e94d64 |
diff --git a/VEX/priv/host_arm64_defs.h b/VEX/priv/host_arm64_defs.h
|
|
|
e94d64 |
index 62b25fd..92d247e 100644
|
|
|
e94d64 |
--- a/VEX/priv/host_arm64_defs.h
|
|
|
e94d64 |
+++ b/VEX/priv/host_arm64_defs.h
|
|
|
e94d64 |
@@ -481,6 +481,7 @@ typedef
|
|
|
e94d64 |
ARM64in_Mul,
|
|
|
e94d64 |
ARM64in_LdrEX,
|
|
|
e94d64 |
ARM64in_StrEX,
|
|
|
e94d64 |
+ ARM64in_CAS,
|
|
|
e94d64 |
ARM64in_MFence,
|
|
|
e94d64 |
ARM64in_ClrEX,
|
|
|
e94d64 |
/* ARM64in_V*: scalar ops involving vector registers */
|
|
|
e94d64 |
@@ -668,6 +669,32 @@ typedef
|
|
|
e94d64 |
struct {
|
|
|
e94d64 |
Int szB; /* 1, 2, 4 or 8 */
|
|
|
e94d64 |
} StrEX;
|
|
|
e94d64 |
+ /* x1 = CAS(x3(addr), x5(expected) -> x7(new)),
|
|
|
e94d64 |
+ where x1[8*szB-1 : 0] == x5[8*szB-1 : 0] indicates success,
|
|
|
e94d64 |
+ x1[8*szB-1 : 0] != x5[8*szB-1 : 0] indicates failure.
|
|
|
e94d64 |
+ Uses x8 as scratch (but that's not allocatable).
|
|
|
e94d64 |
+ Hence: RD x3, x5, x7; WR x1
|
|
|
e94d64 |
+
|
|
|
e94d64 |
+ (szB=8) mov x8, x5
|
|
|
e94d64 |
+ (szB=4) and x8, x5, #0xFFFFFFFF
|
|
|
e94d64 |
+ (szB=2) and x8, x5, #0xFFFF
|
|
|
e94d64 |
+ (szB=1) and x8, x5, #0xFF
|
|
|
e94d64 |
+ -- x8 is correctly zero-extended expected value
|
|
|
e94d64 |
+ ldxr x1, [x3]
|
|
|
e94d64 |
+ -- x1 is correctly zero-extended actual value
|
|
|
e94d64 |
+ cmp x1, x8
|
|
|
e94d64 |
+ bne after
|
|
|
e94d64 |
+ -- if branch taken, failure; x1[[8*szB-1 : 0] holds old value
|
|
|
e94d64 |
+ -- attempt to store
|
|
|
e94d64 |
+ stxr w1, x7, [x3]
|
|
|
e94d64 |
+ -- if store successful, x1==0, so the eor is "x1 := x5"
|
|
|
e94d64 |
+ -- if store failed, x1==1, so the eor makes x1 != x5
|
|
|
e94d64 |
+ eor x1, x5, x1
|
|
|
e94d64 |
+ after:
|
|
|
e94d64 |
+ */
|
|
|
e94d64 |
+ struct {
|
|
|
e94d64 |
+ Int szB; /* 1, 2, 4 or 8 */
|
|
|
e94d64 |
+ } CAS;
|
|
|
e94d64 |
/* Mem fence. An insn which fences all loads and stores as
|
|
|
e94d64 |
much as possible before continuing. On ARM64 we emit the
|
|
|
e94d64 |
sequence "dsb sy ; dmb sy ; isb sy", which is probably
|
|
|
e94d64 |
@@ -912,6 +939,7 @@ extern ARM64Instr* ARM64Instr_Mul ( HReg dst, HReg argL, HReg argR,
|
|
|
e94d64 |
ARM64MulOp op );
|
|
|
e94d64 |
extern ARM64Instr* ARM64Instr_LdrEX ( Int szB );
|
|
|
e94d64 |
extern ARM64Instr* ARM64Instr_StrEX ( Int szB );
|
|
|
e94d64 |
+extern ARM64Instr* ARM64Instr_CAS ( Int szB );
|
|
|
e94d64 |
extern ARM64Instr* ARM64Instr_MFence ( void );
|
|
|
e94d64 |
extern ARM64Instr* ARM64Instr_ClrEX ( void );
|
|
|
e94d64 |
extern ARM64Instr* ARM64Instr_VLdStH ( Bool isLoad, HReg sD, HReg rN,
|
|
|
e94d64 |
diff --git a/VEX/priv/host_arm64_isel.c b/VEX/priv/host_arm64_isel.c
|
|
|
e94d64 |
index 42748e4..07ce87a 100644
|
|
|
e94d64 |
--- a/VEX/priv/host_arm64_isel.c
|
|
|
e94d64 |
+++ b/VEX/priv/host_arm64_isel.c
|
|
|
e94d64 |
@@ -1383,12 +1383,13 @@ static ARM64CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e )
|
|
|
e94d64 |
|| e->Iex.Binop.op == Iop_CmpLT64S
|
|
|
e94d64 |
|| e->Iex.Binop.op == Iop_CmpLT64U
|
|
|
e94d64 |
|| e->Iex.Binop.op == Iop_CmpLE64S
|
|
|
e94d64 |
- || e->Iex.Binop.op == Iop_CmpLE64U)) {
|
|
|
e94d64 |
+ || e->Iex.Binop.op == Iop_CmpLE64U
|
|
|
e94d64 |
+ || e->Iex.Binop.op == Iop_CasCmpEQ64)) {
|
|
|
e94d64 |
HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
|
|
|
e94d64 |
ARM64RIA* argR = iselIntExpr_RIA(env, e->Iex.Binop.arg2);
|
|
|
e94d64 |
addInstr(env, ARM64Instr_Cmp(argL, argR, True/*is64*/));
|
|
|
e94d64 |
switch (e->Iex.Binop.op) {
|
|
|
e94d64 |
- case Iop_CmpEQ64: return ARM64cc_EQ;
|
|
|
e94d64 |
+ case Iop_CmpEQ64: case Iop_CasCmpEQ64: return ARM64cc_EQ;
|
|
|
e94d64 |
case Iop_CmpNE64: return ARM64cc_NE;
|
|
|
e94d64 |
case Iop_CmpLT64S: return ARM64cc_LT;
|
|
|
e94d64 |
case Iop_CmpLT64U: return ARM64cc_CC;
|
|
|
e94d64 |
@@ -1405,12 +1406,13 @@ static ARM64CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e )
|
|
|
e94d64 |
|| e->Iex.Binop.op == Iop_CmpLT32S
|
|
|
e94d64 |
|| e->Iex.Binop.op == Iop_CmpLT32U
|
|
|
e94d64 |
|| e->Iex.Binop.op == Iop_CmpLE32S
|
|
|
e94d64 |
- || e->Iex.Binop.op == Iop_CmpLE32U)) {
|
|
|
e94d64 |
+ || e->Iex.Binop.op == Iop_CmpLE32U
|
|
|
e94d64 |
+ || e->Iex.Binop.op == Iop_CasCmpEQ32)) {
|
|
|
e94d64 |
HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
|
|
|
e94d64 |
ARM64RIA* argR = iselIntExpr_RIA(env, e->Iex.Binop.arg2);
|
|
|
e94d64 |
addInstr(env, ARM64Instr_Cmp(argL, argR, False/*!is64*/));
|
|
|
e94d64 |
switch (e->Iex.Binop.op) {
|
|
|
e94d64 |
- case Iop_CmpEQ32: return ARM64cc_EQ;
|
|
|
e94d64 |
+ case Iop_CmpEQ32: case Iop_CasCmpEQ32: return ARM64cc_EQ;
|
|
|
e94d64 |
case Iop_CmpNE32: return ARM64cc_NE;
|
|
|
e94d64 |
case Iop_CmpLT32S: return ARM64cc_LT;
|
|
|
e94d64 |
case Iop_CmpLT32U: return ARM64cc_CC;
|
|
|
e94d64 |
@@ -1420,6 +1422,34 @@ static ARM64CondCode iselCondCode_wrk ( ISelEnv* env, IRExpr* e )
|
|
|
e94d64 |
}
|
|
|
e94d64 |
}
|
|
|
e94d64 |
|
|
|
e94d64 |
+ /* --- Cmp*16*(x,y) --- */
|
|
|
e94d64 |
+ if (e->tag == Iex_Binop
|
|
|
e94d64 |
+ && (e->Iex.Binop.op == Iop_CasCmpEQ16)) {
|
|
|
e94d64 |
+ HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
|
|
|
e94d64 |
+ HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
|
|
|
e94d64 |
+ HReg argL2 = widen_z_16_to_64(env, argL);
|
|
|
e94d64 |
+ HReg argR2 = widen_z_16_to_64(env, argR);
|
|
|
e94d64 |
+ addInstr(env, ARM64Instr_Cmp(argL2, ARM64RIA_R(argR2), True/*is64*/));
|
|
|
e94d64 |
+ switch (e->Iex.Binop.op) {
|
|
|
e94d64 |
+ case Iop_CasCmpEQ16: return ARM64cc_EQ;
|
|
|
e94d64 |
+ default: vpanic("iselCondCode(arm64): CmpXX16");
|
|
|
e94d64 |
+ }
|
|
|
e94d64 |
+ }
|
|
|
e94d64 |
+
|
|
|
e94d64 |
+ /* --- Cmp*8*(x,y) --- */
|
|
|
e94d64 |
+ if (e->tag == Iex_Binop
|
|
|
e94d64 |
+ && (e->Iex.Binop.op == Iop_CasCmpEQ8)) {
|
|
|
e94d64 |
+ HReg argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
|
|
|
e94d64 |
+ HReg argR = iselIntExpr_R(env, e->Iex.Binop.arg2);
|
|
|
e94d64 |
+ HReg argL2 = widen_z_8_to_64(env, argL);
|
|
|
e94d64 |
+ HReg argR2 = widen_z_8_to_64(env, argR);
|
|
|
e94d64 |
+ addInstr(env, ARM64Instr_Cmp(argL2, ARM64RIA_R(argR2), True/*is64*/));
|
|
|
e94d64 |
+ switch (e->Iex.Binop.op) {
|
|
|
e94d64 |
+ case Iop_CasCmpEQ8: return ARM64cc_EQ;
|
|
|
e94d64 |
+ default: vpanic("iselCondCode(arm64): CmpXX8");
|
|
|
e94d64 |
+ }
|
|
|
e94d64 |
+ }
|
|
|
e94d64 |
+
|
|
|
e94d64 |
ppIRExpr(e);
|
|
|
e94d64 |
vpanic("iselCondCode");
|
|
|
e94d64 |
}
|
|
|
e94d64 |
@@ -3833,6 +3863,57 @@ static void iselStmt ( ISelEnv* env, IRStmt* stmt )
|
|
|
e94d64 |
break;
|
|
|
e94d64 |
}
|
|
|
e94d64 |
|
|
|
e94d64 |
+ /* --------- ACAS --------- */
|
|
|
e94d64 |
+ case Ist_CAS: {
|
|
|
e94d64 |
+ if (stmt->Ist.CAS.details->oldHi == IRTemp_INVALID) {
|
|
|
e94d64 |
+ /* "normal" singleton CAS */
|
|
|
e94d64 |
+ UChar sz;
|
|
|
e94d64 |
+ IRCAS* cas = stmt->Ist.CAS.details;
|
|
|
e94d64 |
+ IRType ty = typeOfIRExpr(env->type_env, cas->dataLo);
|
|
|
e94d64 |
+ switch (ty) {
|
|
|
e94d64 |
+ case Ity_I64: sz = 8; break;
|
|
|
e94d64 |
+ case Ity_I32: sz = 4; break;
|
|
|
e94d64 |
+ case Ity_I16: sz = 2; break;
|
|
|
e94d64 |
+ case Ity_I8: sz = 1; break;
|
|
|
e94d64 |
+ default: goto unhandled_cas;
|
|
|
e94d64 |
+ }
|
|
|
e94d64 |
+ HReg rAddr = iselIntExpr_R(env, cas->addr);
|
|
|
e94d64 |
+ HReg rExpd = iselIntExpr_R(env, cas->expdLo);
|
|
|
e94d64 |
+ HReg rData = iselIntExpr_R(env, cas->dataLo);
|
|
|
e94d64 |
+ vassert(cas->expdHi == NULL);
|
|
|
e94d64 |
+ vassert(cas->dataHi == NULL);
|
|
|
e94d64 |
+ addInstr(env, ARM64Instr_MovI(hregARM64_X3(), rAddr));
|
|
|
e94d64 |
+ addInstr(env, ARM64Instr_MovI(hregARM64_X5(), rExpd));
|
|
|
e94d64 |
+ addInstr(env, ARM64Instr_MovI(hregARM64_X7(), rData));
|
|
|
e94d64 |
+ addInstr(env, ARM64Instr_CAS(sz));
|
|
|
e94d64 |
+ /* Now we have the lowest szB bytes of x1 are either equal to
|
|
|
e94d64 |
+ the lowest szB bytes of x5, indicating success, or they
|
|
|
e94d64 |
+ aren't, indicating failure. The IR semantics actually
|
|
|
e94d64 |
+ require us to return the old value at the location,
|
|
|
e94d64 |
+ regardless of success or failure, but in the case of
|
|
|
e94d64 |
+ failure it's not clear how to do this, since
|
|
|
e94d64 |
+ ARM64Instr_CAS can't provide that. Instead we'll just
|
|
|
e94d64 |
+ return the relevant bit of x1, since that's at least
|
|
|
e94d64 |
+ guaranteed to be different from the lowest bits of x5 on
|
|
|
e94d64 |
+ failure. */
|
|
|
e94d64 |
+ HReg rResult = hregARM64_X1();
|
|
|
e94d64 |
+ switch (sz) {
|
|
|
e94d64 |
+ case 8: break;
|
|
|
e94d64 |
+ case 4: rResult = widen_z_32_to_64(env, rResult); break;
|
|
|
e94d64 |
+ case 2: rResult = widen_z_16_to_64(env, rResult); break;
|
|
|
e94d64 |
+ case 1: rResult = widen_z_8_to_64(env, rResult); break;
|
|
|
e94d64 |
+ default: vassert(0);
|
|
|
e94d64 |
+ }
|
|
|
e94d64 |
+ // "old" in this case is interpreted somewhat liberally, per
|
|
|
e94d64 |
+ // the previous comment.
|
|
|
e94d64 |
+ HReg rOld = lookupIRTemp(env, cas->oldLo);
|
|
|
e94d64 |
+ addInstr(env, ARM64Instr_MovI(rOld, rResult));
|
|
|
e94d64 |
+ return;
|
|
|
e94d64 |
+ }
|
|
|
e94d64 |
+ unhandled_cas:
|
|
|
e94d64 |
+ break;
|
|
|
e94d64 |
+ }
|
|
|
e94d64 |
+
|
|
|
e94d64 |
/* --------- MEM FENCE --------- */
|
|
|
e94d64 |
case Ist_MBE:
|
|
|
e94d64 |
switch (stmt->Ist.MBE.event) {
|
|
|
e94d64 |
diff --git a/VEX/priv/main_main.c b/VEX/priv/main_main.c
|
|
|
e94d64 |
index 8c4845e..26e9880 100644
|
|
|
e94d64 |
--- a/VEX/priv/main_main.c
|
|
|
e94d64 |
+++ b/VEX/priv/main_main.c
|
|
|
e94d64 |
@@ -1556,6 +1556,7 @@ void LibVEX_default_VexAbiInfo ( /*OUT*/VexAbiInfo* vbi )
|
|
|
e94d64 |
vbi->guest_amd64_assume_gs_is_const = False;
|
|
|
e94d64 |
vbi->guest_ppc_zap_RZ_at_blr = False;
|
|
|
e94d64 |
vbi->guest_ppc_zap_RZ_at_bl = NULL;
|
|
|
e94d64 |
+ vbi->guest__use_fallback_LLSC = False;
|
|
|
e94d64 |
vbi->host_ppc_calls_use_fndescrs = False;
|
|
|
e94d64 |
}
|
|
|
e94d64 |
|
|
|
e94d64 |
diff --git a/VEX/pub/libvex.h b/VEX/pub/libvex.h
|
|
|
e94d64 |
index 8ac3d9f..cbbb1ad 100644
|
|
|
e94d64 |
--- a/VEX/pub/libvex.h
|
|
|
e94d64 |
+++ b/VEX/pub/libvex.h
|
|
|
e94d64 |
@@ -369,6 +369,11 @@ void LibVEX_default_VexArchInfo ( /*OUT*/VexArchInfo* vai );
|
|
|
e94d64 |
guest is ppc32-linux ==> const False
|
|
|
e94d64 |
guest is other ==> inapplicable
|
|
|
e94d64 |
|
|
|
e94d64 |
+ guest__use_fallback_LLSC
|
|
|
e94d64 |
+ guest is mips32 ==> applicable, default True
|
|
|
e94d64 |
+ guest is mips64 ==> applicable, default True
|
|
|
e94d64 |
+ guest is arm64 ==> applicable, default False
|
|
|
e94d64 |
+
|
|
|
e94d64 |
host_ppc_calls_use_fndescrs:
|
|
|
e94d64 |
host is ppc32-linux ==> False
|
|
|
e94d64 |
host is ppc64-linux ==> True
|
|
|
e94d64 |
@@ -401,11 +406,17 @@ typedef
|
|
|
e94d64 |
is assumed equivalent to a fn which always returns False. */
|
|
|
e94d64 |
Bool (*guest_ppc_zap_RZ_at_bl)(Addr);
|
|
|
e94d64 |
|
|
|
e94d64 |
+ /* Potentially for all guests that use LL/SC: use the fallback
|
|
|
e94d64 |
+ (synthesised) implementation rather than passing LL/SC on to
|
|
|
e94d64 |
+ the host? */
|
|
|
e94d64 |
+ Bool guest__use_fallback_LLSC;
|
|
|
e94d64 |
+
|
|
|
e94d64 |
/* PPC32/PPC64 HOSTS only: does '&f' give us a pointer to a
|
|
|
e94d64 |
function descriptor on the host, or to the function code
|
|
|
e94d64 |
itself? True => descriptor, False => code. */
|
|
|
e94d64 |
Bool host_ppc_calls_use_fndescrs;
|
|
|
e94d64 |
|
|
|
e94d64 |
+ /* ??? Description ??? */
|
|
|
e94d64 |
Bool guest_mips_fp_mode64;
|
|
|
e94d64 |
}
|
|
|
e94d64 |
VexAbiInfo;
|
|
|
e94d64 |
diff --git a/VEX/pub/libvex_guest_arm64.h b/VEX/pub/libvex_guest_arm64.h
|
|
|
e94d64 |
index c438c1e..8b62cdd 100644
|
|
|
e94d64 |
--- a/VEX/pub/libvex_guest_arm64.h
|
|
|
e94d64 |
+++ b/VEX/pub/libvex_guest_arm64.h
|
|
|
e94d64 |
@@ -159,9 +159,14 @@ typedef
|
|
|
e94d64 |
note of bits 23 and 22. */
|
|
|
e94d64 |
UInt guest_FPCR;
|
|
|
e94d64 |
|
|
|
e94d64 |
+ /* Fallback LL/SC support. See bugs 344524 and 369459. */
|
|
|
e94d64 |
+ ULong guest_LLSC_SIZE; // 0==no current transaction, else 1,2,4 or 8.
|
|
|
e94d64 |
+ ULong guest_LLSC_ADDR; // Address of transaction.
|
|
|
e94d64 |
+ ULong guest_LLSC_DATA; // Original value at _ADDR, zero-extended.
|
|
|
e94d64 |
+
|
|
|
e94d64 |
/* Padding to make it have an 16-aligned size */
|
|
|
e94d64 |
/* UInt pad_end_0; */
|
|
|
e94d64 |
- /* ULong pad_end_1; */
|
|
|
e94d64 |
+ ULong pad_end_1;
|
|
|
e94d64 |
}
|
|
|
e94d64 |
VexGuestARM64State;
|
|
|
e94d64 |
|