Blame SOURCES/valgrind-3.18.1-arm64-doubleword-cas.patch

81fcc1
commit 7dbe2fed72886874f2eaf57dc07929542ae55b58
81fcc1
Author: Julian Seward <jseward@acm.org>
81fcc1
Date:   Fri Nov 12 10:40:48 2021 +0100
81fcc1
81fcc1
    Bug 445354 - arm64 backend: incorrect code emitted for doubleword CAS.
81fcc1
    
81fcc1
    The sequence of instructions emitted by the arm64 backend for doubleword
81fcc1
    compare-and-swap is incorrect.  This could lead to incorrect simulation of the
81fcc1
    AArch8.1 atomic instructions (CASP, at least).  It also causes failures in the
81fcc1
    upcoming fix for v8.0 support for LD{,A}XP/ST{,L}XP in bug 444399, at least
81fcc1
    when running with the fallback LL/SC implementation
81fcc1
    (`--sim-hints=fallback-llsc`, or as autoselected at startup).  In the worst
81fcc1
    case it can cause segfaulting in the generated code, because it could jump
81fcc1
    backwards unexpectedly far.
81fcc1
    
81fcc1
    The problem is the sequence emitted for ARM64in_CASP:
81fcc1
    
81fcc1
    * the jump offsets are incorrect, both for `bne out` (x 2) and `cbnz w1, loop`.
81fcc1
    
81fcc1
    * using w1 to hold the success indication of the stxp instruction trashes the
81fcc1
      previous value in x1.  But the value in x1 is an output of ARM64in_CASP,
81fcc1
      hence one of the two output registers is corrupted.  That confuses any code
81fcc1
      downstream that want to inspect those values to find out whether or not the
81fcc1
      transaction succeeded.
81fcc1
    
81fcc1
    The fixes are to
81fcc1
    
81fcc1
    * fix the branch offsets
81fcc1
    
81fcc1
    * use a different register to hold the stxp success indication.  w3 is a
81fcc1
      convenient check.
81fcc1
81fcc1
diff --git a/VEX/priv/host_arm64_defs.c b/VEX/priv/host_arm64_defs.c
81fcc1
index 5dccc0495..5657bcab9 100644
81fcc1
--- a/VEX/priv/host_arm64_defs.c
81fcc1
+++ b/VEX/priv/host_arm64_defs.c
81fcc1
@@ -2271,6 +2271,7 @@ void getRegUsage_ARM64Instr ( HRegUsage* u, const ARM64Instr* i, Bool mode64 )
81fcc1
          addHRegUse(u, HRmWrite, hregARM64_X1());
81fcc1
          addHRegUse(u, HRmWrite, hregARM64_X9());
81fcc1
          addHRegUse(u, HRmWrite, hregARM64_X8());
81fcc1
+         addHRegUse(u, HRmWrite, hregARM64_X3());
81fcc1
          break;
81fcc1
       case ARM64in_MFence:
81fcc1
          return;
81fcc1
@@ -4254,16 +4255,16 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc,
81fcc1
 
81fcc1
               -- always:
81fcc1
               cmp     x0, x8                 // EB08001F
81fcc1
-              bne     out                    // 540000E1 (b.ne #28 <out>)
81fcc1
+              bne     out                    // 540000A1
81fcc1
               cmp     x1, x9                 // EB09003F
81fcc1
-              bne     out                    // 540000A1 (b.ne #20 <out>)
81fcc1
+              bne     out                    // 54000061
81fcc1
 
81fcc1
               -- one of:
81fcc1
-              stxp    w1, x6, x7, [x2]       // C8211C46
81fcc1
-              stxp    w1, w6, w7, [x2]       // 88211C46
81fcc1
+              stxp    w3, x6, x7, [x2]       // C8231C46
81fcc1
+              stxp    w3, w6, w7, [x2]       // 88231C46
81fcc1
 
81fcc1
               -- always:
81fcc1
-              cbnz    w1, loop               // 35FFFE81 (cbnz w1, #-48 <loop>)
81fcc1
+              cbnz    w3, loop               // 35FFFF03
81fcc1
             out:
81fcc1
          */
81fcc1
          switch (i->ARM64in.CASP.szB) {
81fcc1
@@ -4277,15 +4278,15 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc,
81fcc1
             default: vassert(0);
81fcc1
          }
81fcc1
          *p++ = 0xEB08001F;
81fcc1
-         *p++ = 0x540000E1;
81fcc1
-         *p++ = 0xEB09003F;
81fcc1
          *p++ = 0x540000A1;
81fcc1
+         *p++ = 0xEB09003F;
81fcc1
+         *p++ = 0x54000061;
81fcc1
          switch (i->ARM64in.CASP.szB) {
81fcc1
-            case 8:  *p++ = 0xC8211C46; break;
81fcc1
-            case 4:  *p++ = 0x88211C46; break;
81fcc1
+            case 8:  *p++ = 0xC8231C46; break;
81fcc1
+            case 4:  *p++ = 0x88231C46; break;
81fcc1
             default: vassert(0);
81fcc1
          }
81fcc1
-         *p++ = 0x35FFFE81;
81fcc1
+         *p++ = 0x35FFFF03;
81fcc1
          goto done;
81fcc1
       }
81fcc1
       case ARM64in_MFence: {
81fcc1
diff --git a/VEX/priv/host_arm64_defs.h b/VEX/priv/host_arm64_defs.h
81fcc1
index f0737f2c6..01fb5708e 100644
81fcc1
--- a/VEX/priv/host_arm64_defs.h
81fcc1
+++ b/VEX/priv/host_arm64_defs.h
81fcc1
@@ -720,6 +720,7 @@ typedef
81fcc1
             Int  szB; /* 1, 2, 4 or 8 */
81fcc1
          } StrEX;
81fcc1
          /* x1 = CAS(x3(addr), x5(expected) -> x7(new)),
81fcc1
+            and trashes x8
81fcc1
             where x1[8*szB-1 : 0] == x5[8*szB-1 : 0] indicates success,
81fcc1
                   x1[8*szB-1 : 0] != x5[8*szB-1 : 0] indicates failure.
81fcc1
             Uses x8 as scratch (but that's not allocatable).
81fcc1
@@ -738,7 +739,7 @@ typedef
81fcc1
             -- if branch taken, failure; x1[[8*szB-1 : 0] holds old value
81fcc1
             -- attempt to store
81fcc1
             stxr    w8, x7, [x3]
81fcc1
-            -- if store successful, x1==0, so the eor is "x1 := x5"
81fcc1
+            -- if store successful, x8==0
81fcc1
             -- if store failed,     branch back and try again.
81fcc1
             cbne    w8, loop
81fcc1
            after:
81fcc1
@@ -746,6 +747,12 @@ typedef
81fcc1
          struct {
81fcc1
             Int szB; /* 1, 2, 4 or 8 */
81fcc1
          } CAS;
81fcc1
+         /* Doubleworld CAS, 2 x 32 bit or 2 x 64 bit
81fcc1
+            x0(oldLSW),x1(oldMSW)
81fcc1
+               = DCAS(x2(addr), x4(expectedLSW),x5(expectedMSW)
81fcc1
+                                -> x6(newLSW),x7(newMSW))
81fcc1
+            and trashes x8, x9 and x3
81fcc1
+         */
81fcc1
          struct {
81fcc1
             Int szB; /* 4 or 8 */
81fcc1
          } CASP;