Mark Wielaard beb1f8
commit 7dbe2fed72886874f2eaf57dc07929542ae55b58
Mark Wielaard beb1f8
Author: Julian Seward <jseward@acm.org>
Mark Wielaard beb1f8
Date:   Fri Nov 12 10:40:48 2021 +0100
Mark Wielaard beb1f8
Mark Wielaard beb1f8
    Bug 445354 - arm64 backend: incorrect code emitted for doubleword CAS.
Mark Wielaard beb1f8
    
Mark Wielaard beb1f8
    The sequence of instructions emitted by the arm64 backend for doubleword
Mark Wielaard beb1f8
    compare-and-swap is incorrect.  This could lead to incorrect simulation of the
Mark Wielaard beb1f8
    AArch8.1 atomic instructions (CASP, at least).  It also causes failures in the
Mark Wielaard beb1f8
    upcoming fix for v8.0 support for LD{,A}XP/ST{,L}XP in bug 444399, at least
Mark Wielaard beb1f8
    when running with the fallback LL/SC implementation
Mark Wielaard beb1f8
    (`--sim-hints=fallback-llsc`, or as autoselected at startup).  In the worst
Mark Wielaard beb1f8
    case it can cause segfaulting in the generated code, because it could jump
Mark Wielaard beb1f8
    backwards unexpectedly far.
Mark Wielaard beb1f8
    
Mark Wielaard beb1f8
    The problem is the sequence emitted for ARM64in_CASP:
Mark Wielaard beb1f8
    
Mark Wielaard beb1f8
    * the jump offsets are incorrect, both for `bne out` (x 2) and `cbnz w1, loop`.
Mark Wielaard beb1f8
    
Mark Wielaard beb1f8
    * using w1 to hold the success indication of the stxp instruction trashes the
Mark Wielaard beb1f8
      previous value in x1.  But the value in x1 is an output of ARM64in_CASP,
Mark Wielaard beb1f8
      hence one of the two output registers is corrupted.  That confuses any code
Mark Wielaard beb1f8
      downstream that want to inspect those values to find out whether or not the
Mark Wielaard beb1f8
      transaction succeeded.
Mark Wielaard beb1f8
    
Mark Wielaard beb1f8
    The fixes are to
Mark Wielaard beb1f8
    
Mark Wielaard beb1f8
    * fix the branch offsets
Mark Wielaard beb1f8
    
Mark Wielaard beb1f8
    * use a different register to hold the stxp success indication.  w3 is a
Mark Wielaard beb1f8
      convenient check.
Mark Wielaard beb1f8
Mark Wielaard beb1f8
diff --git a/VEX/priv/host_arm64_defs.c b/VEX/priv/host_arm64_defs.c
Mark Wielaard beb1f8
index 5dccc0495..5657bcab9 100644
Mark Wielaard beb1f8
--- a/VEX/priv/host_arm64_defs.c
Mark Wielaard beb1f8
+++ b/VEX/priv/host_arm64_defs.c
Mark Wielaard beb1f8
@@ -2271,6 +2271,7 @@ void getRegUsage_ARM64Instr ( HRegUsage* u, const ARM64Instr* i, Bool mode64 )
Mark Wielaard beb1f8
          addHRegUse(u, HRmWrite, hregARM64_X1());
Mark Wielaard beb1f8
          addHRegUse(u, HRmWrite, hregARM64_X9());
Mark Wielaard beb1f8
          addHRegUse(u, HRmWrite, hregARM64_X8());
Mark Wielaard beb1f8
+         addHRegUse(u, HRmWrite, hregARM64_X3());
Mark Wielaard beb1f8
          break;
Mark Wielaard beb1f8
       case ARM64in_MFence:
Mark Wielaard beb1f8
          return;
Mark Wielaard beb1f8
@@ -4254,16 +4255,16 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc,
Mark Wielaard beb1f8
 
Mark Wielaard beb1f8
               -- always:
Mark Wielaard beb1f8
               cmp     x0, x8                 // EB08001F
Mark Wielaard beb1f8
-              bne     out                    // 540000E1 (b.ne #28 <out>)
Mark Wielaard beb1f8
+              bne     out                    // 540000A1
Mark Wielaard beb1f8
               cmp     x1, x9                 // EB09003F
Mark Wielaard beb1f8
-              bne     out                    // 540000A1 (b.ne #20 <out>)
Mark Wielaard beb1f8
+              bne     out                    // 54000061
Mark Wielaard beb1f8
 
Mark Wielaard beb1f8
               -- one of:
Mark Wielaard beb1f8
-              stxp    w1, x6, x7, [x2]       // C8211C46
Mark Wielaard beb1f8
-              stxp    w1, w6, w7, [x2]       // 88211C46
Mark Wielaard beb1f8
+              stxp    w3, x6, x7, [x2]       // C8231C46
Mark Wielaard beb1f8
+              stxp    w3, w6, w7, [x2]       // 88231C46
Mark Wielaard beb1f8
 
Mark Wielaard beb1f8
               -- always:
Mark Wielaard beb1f8
-              cbnz    w1, loop               // 35FFFE81 (cbnz w1, #-48 <loop>)
Mark Wielaard beb1f8
+              cbnz    w3, loop               // 35FFFF03
Mark Wielaard beb1f8
             out:
Mark Wielaard beb1f8
          */
Mark Wielaard beb1f8
          switch (i->ARM64in.CASP.szB) {
Mark Wielaard beb1f8
@@ -4277,15 +4278,15 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc,
Mark Wielaard beb1f8
             default: vassert(0);
Mark Wielaard beb1f8
          }
Mark Wielaard beb1f8
          *p++ = 0xEB08001F;
Mark Wielaard beb1f8
-         *p++ = 0x540000E1;
Mark Wielaard beb1f8
-         *p++ = 0xEB09003F;
Mark Wielaard beb1f8
          *p++ = 0x540000A1;
Mark Wielaard beb1f8
+         *p++ = 0xEB09003F;
Mark Wielaard beb1f8
+         *p++ = 0x54000061;
Mark Wielaard beb1f8
          switch (i->ARM64in.CASP.szB) {
Mark Wielaard beb1f8
-            case 8:  *p++ = 0xC8211C46; break;
Mark Wielaard beb1f8
-            case 4:  *p++ = 0x88211C46; break;
Mark Wielaard beb1f8
+            case 8:  *p++ = 0xC8231C46; break;
Mark Wielaard beb1f8
+            case 4:  *p++ = 0x88231C46; break;
Mark Wielaard beb1f8
             default: vassert(0);
Mark Wielaard beb1f8
          }
Mark Wielaard beb1f8
-         *p++ = 0x35FFFE81;
Mark Wielaard beb1f8
+         *p++ = 0x35FFFF03;
Mark Wielaard beb1f8
          goto done;
Mark Wielaard beb1f8
       }
Mark Wielaard beb1f8
       case ARM64in_MFence: {
Mark Wielaard beb1f8
diff --git a/VEX/priv/host_arm64_defs.h b/VEX/priv/host_arm64_defs.h
Mark Wielaard beb1f8
index f0737f2c6..01fb5708e 100644
Mark Wielaard beb1f8
--- a/VEX/priv/host_arm64_defs.h
Mark Wielaard beb1f8
+++ b/VEX/priv/host_arm64_defs.h
Mark Wielaard beb1f8
@@ -720,6 +720,7 @@ typedef
Mark Wielaard beb1f8
             Int  szB; /* 1, 2, 4 or 8 */
Mark Wielaard beb1f8
          } StrEX;
Mark Wielaard beb1f8
          /* x1 = CAS(x3(addr), x5(expected) -> x7(new)),
Mark Wielaard beb1f8
+            and trashes x8
Mark Wielaard beb1f8
             where x1[8*szB-1 : 0] == x5[8*szB-1 : 0] indicates success,
Mark Wielaard beb1f8
                   x1[8*szB-1 : 0] != x5[8*szB-1 : 0] indicates failure.
Mark Wielaard beb1f8
             Uses x8 as scratch (but that's not allocatable).
Mark Wielaard beb1f8
@@ -738,7 +739,7 @@ typedef
Mark Wielaard beb1f8
             -- if branch taken, failure; x1[[8*szB-1 : 0] holds old value
Mark Wielaard beb1f8
             -- attempt to store
Mark Wielaard beb1f8
             stxr    w8, x7, [x3]
Mark Wielaard beb1f8
-            -- if store successful, x1==0, so the eor is "x1 := x5"
Mark Wielaard beb1f8
+            -- if store successful, x8==0
Mark Wielaard beb1f8
             -- if store failed,     branch back and try again.
Mark Wielaard beb1f8
             cbne    w8, loop
Mark Wielaard beb1f8
            after:
Mark Wielaard beb1f8
@@ -746,6 +747,12 @@ typedef
Mark Wielaard beb1f8
          struct {
Mark Wielaard beb1f8
             Int szB; /* 1, 2, 4 or 8 */
Mark Wielaard beb1f8
          } CAS;
Mark Wielaard beb1f8
+         /* Doubleworld CAS, 2 x 32 bit or 2 x 64 bit
Mark Wielaard beb1f8
+            x0(oldLSW),x1(oldMSW)
Mark Wielaard beb1f8
+               = DCAS(x2(addr), x4(expectedLSW),x5(expectedMSW)
Mark Wielaard beb1f8
+                                -> x6(newLSW),x7(newMSW))
Mark Wielaard beb1f8
+            and trashes x8, x9 and x3
Mark Wielaard beb1f8
+         */
Mark Wielaard beb1f8
          struct {
Mark Wielaard beb1f8
             Int szB; /* 4 or 8 */
Mark Wielaard beb1f8
          } CASP;