|
|
81fcc1 |
commit 7dbe2fed72886874f2eaf57dc07929542ae55b58
|
|
|
81fcc1 |
Author: Julian Seward <jseward@acm.org>
|
|
|
81fcc1 |
Date: Fri Nov 12 10:40:48 2021 +0100
|
|
|
81fcc1 |
|
|
|
81fcc1 |
Bug 445354 - arm64 backend: incorrect code emitted for doubleword CAS.
|
|
|
81fcc1 |
|
|
|
81fcc1 |
The sequence of instructions emitted by the arm64 backend for doubleword
|
|
|
81fcc1 |
compare-and-swap is incorrect. This could lead to incorrect simulation of the
|
|
|
81fcc1 |
AArch8.1 atomic instructions (CASP, at least). It also causes failures in the
|
|
|
81fcc1 |
upcoming fix for v8.0 support for LD{,A}XP/ST{,L}XP in bug 444399, at least
|
|
|
81fcc1 |
when running with the fallback LL/SC implementation
|
|
|
81fcc1 |
(`--sim-hints=fallback-llsc`, or as autoselected at startup). In the worst
|
|
|
81fcc1 |
case it can cause segfaulting in the generated code, because it could jump
|
|
|
81fcc1 |
backwards unexpectedly far.
|
|
|
81fcc1 |
|
|
|
81fcc1 |
The problem is the sequence emitted for ARM64in_CASP:
|
|
|
81fcc1 |
|
|
|
81fcc1 |
* the jump offsets are incorrect, both for `bne out` (x 2) and `cbnz w1, loop`.
|
|
|
81fcc1 |
|
|
|
81fcc1 |
* using w1 to hold the success indication of the stxp instruction trashes the
|
|
|
81fcc1 |
previous value in x1. But the value in x1 is an output of ARM64in_CASP,
|
|
|
81fcc1 |
hence one of the two output registers is corrupted. That confuses any code
|
|
|
81fcc1 |
downstream that want to inspect those values to find out whether or not the
|
|
|
81fcc1 |
transaction succeeded.
|
|
|
81fcc1 |
|
|
|
81fcc1 |
The fixes are to
|
|
|
81fcc1 |
|
|
|
81fcc1 |
* fix the branch offsets
|
|
|
81fcc1 |
|
|
|
81fcc1 |
* use a different register to hold the stxp success indication. w3 is a
|
|
|
81fcc1 |
convenient check.
|
|
|
81fcc1 |
|
|
|
81fcc1 |
diff --git a/VEX/priv/host_arm64_defs.c b/VEX/priv/host_arm64_defs.c
|
|
|
81fcc1 |
index 5dccc0495..5657bcab9 100644
|
|
|
81fcc1 |
--- a/VEX/priv/host_arm64_defs.c
|
|
|
81fcc1 |
+++ b/VEX/priv/host_arm64_defs.c
|
|
|
81fcc1 |
@@ -2271,6 +2271,7 @@ void getRegUsage_ARM64Instr ( HRegUsage* u, const ARM64Instr* i, Bool mode64 )
|
|
|
81fcc1 |
addHRegUse(u, HRmWrite, hregARM64_X1());
|
|
|
81fcc1 |
addHRegUse(u, HRmWrite, hregARM64_X9());
|
|
|
81fcc1 |
addHRegUse(u, HRmWrite, hregARM64_X8());
|
|
|
81fcc1 |
+ addHRegUse(u, HRmWrite, hregARM64_X3());
|
|
|
81fcc1 |
break;
|
|
|
81fcc1 |
case ARM64in_MFence:
|
|
|
81fcc1 |
return;
|
|
|
81fcc1 |
@@ -4254,16 +4255,16 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc,
|
|
|
81fcc1 |
|
|
|
81fcc1 |
-- always:
|
|
|
81fcc1 |
cmp x0, x8 // EB08001F
|
|
|
81fcc1 |
- bne out // 540000E1 (b.ne #28 <out>)
|
|
|
81fcc1 |
+ bne out // 540000A1
|
|
|
81fcc1 |
cmp x1, x9 // EB09003F
|
|
|
81fcc1 |
- bne out // 540000A1 (b.ne #20 <out>)
|
|
|
81fcc1 |
+ bne out // 54000061
|
|
|
81fcc1 |
|
|
|
81fcc1 |
-- one of:
|
|
|
81fcc1 |
- stxp w1, x6, x7, [x2] // C8211C46
|
|
|
81fcc1 |
- stxp w1, w6, w7, [x2] // 88211C46
|
|
|
81fcc1 |
+ stxp w3, x6, x7, [x2] // C8231C46
|
|
|
81fcc1 |
+ stxp w3, w6, w7, [x2] // 88231C46
|
|
|
81fcc1 |
|
|
|
81fcc1 |
-- always:
|
|
|
81fcc1 |
- cbnz w1, loop // 35FFFE81 (cbnz w1, #-48 <loop>)
|
|
|
81fcc1 |
+ cbnz w3, loop // 35FFFF03
|
|
|
81fcc1 |
out:
|
|
|
81fcc1 |
*/
|
|
|
81fcc1 |
switch (i->ARM64in.CASP.szB) {
|
|
|
81fcc1 |
@@ -4277,15 +4278,15 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc,
|
|
|
81fcc1 |
default: vassert(0);
|
|
|
81fcc1 |
}
|
|
|
81fcc1 |
*p++ = 0xEB08001F;
|
|
|
81fcc1 |
- *p++ = 0x540000E1;
|
|
|
81fcc1 |
- *p++ = 0xEB09003F;
|
|
|
81fcc1 |
*p++ = 0x540000A1;
|
|
|
81fcc1 |
+ *p++ = 0xEB09003F;
|
|
|
81fcc1 |
+ *p++ = 0x54000061;
|
|
|
81fcc1 |
switch (i->ARM64in.CASP.szB) {
|
|
|
81fcc1 |
- case 8: *p++ = 0xC8211C46; break;
|
|
|
81fcc1 |
- case 4: *p++ = 0x88211C46; break;
|
|
|
81fcc1 |
+ case 8: *p++ = 0xC8231C46; break;
|
|
|
81fcc1 |
+ case 4: *p++ = 0x88231C46; break;
|
|
|
81fcc1 |
default: vassert(0);
|
|
|
81fcc1 |
}
|
|
|
81fcc1 |
- *p++ = 0x35FFFE81;
|
|
|
81fcc1 |
+ *p++ = 0x35FFFF03;
|
|
|
81fcc1 |
goto done;
|
|
|
81fcc1 |
}
|
|
|
81fcc1 |
case ARM64in_MFence: {
|
|
|
81fcc1 |
diff --git a/VEX/priv/host_arm64_defs.h b/VEX/priv/host_arm64_defs.h
|
|
|
81fcc1 |
index f0737f2c6..01fb5708e 100644
|
|
|
81fcc1 |
--- a/VEX/priv/host_arm64_defs.h
|
|
|
81fcc1 |
+++ b/VEX/priv/host_arm64_defs.h
|
|
|
81fcc1 |
@@ -720,6 +720,7 @@ typedef
|
|
|
81fcc1 |
Int szB; /* 1, 2, 4 or 8 */
|
|
|
81fcc1 |
} StrEX;
|
|
|
81fcc1 |
/* x1 = CAS(x3(addr), x5(expected) -> x7(new)),
|
|
|
81fcc1 |
+ and trashes x8
|
|
|
81fcc1 |
where x1[8*szB-1 : 0] == x5[8*szB-1 : 0] indicates success,
|
|
|
81fcc1 |
x1[8*szB-1 : 0] != x5[8*szB-1 : 0] indicates failure.
|
|
|
81fcc1 |
Uses x8 as scratch (but that's not allocatable).
|
|
|
81fcc1 |
@@ -738,7 +739,7 @@ typedef
|
|
|
81fcc1 |
-- if branch taken, failure; x1[[8*szB-1 : 0] holds old value
|
|
|
81fcc1 |
-- attempt to store
|
|
|
81fcc1 |
stxr w8, x7, [x3]
|
|
|
81fcc1 |
- -- if store successful, x1==0, so the eor is "x1 := x5"
|
|
|
81fcc1 |
+ -- if store successful, x8==0
|
|
|
81fcc1 |
-- if store failed, branch back and try again.
|
|
|
81fcc1 |
cbne w8, loop
|
|
|
81fcc1 |
after:
|
|
|
81fcc1 |
@@ -746,6 +747,12 @@ typedef
|
|
|
81fcc1 |
struct {
|
|
|
81fcc1 |
Int szB; /* 1, 2, 4 or 8 */
|
|
|
81fcc1 |
} CAS;
|
|
|
81fcc1 |
+ /* Doubleworld CAS, 2 x 32 bit or 2 x 64 bit
|
|
|
81fcc1 |
+ x0(oldLSW),x1(oldMSW)
|
|
|
81fcc1 |
+ = DCAS(x2(addr), x4(expectedLSW),x5(expectedMSW)
|
|
|
81fcc1 |
+ -> x6(newLSW),x7(newMSW))
|
|
|
81fcc1 |
+ and trashes x8, x9 and x3
|
|
|
81fcc1 |
+ */
|
|
|
81fcc1 |
struct {
|
|
|
81fcc1 |
Int szB; /* 4 or 8 */
|
|
|
81fcc1 |
} CASP;
|