Blame SOURCES/valgrind-3.18.1-arm64-atomic-align.patch

4db4a6
commit 2be719921e700a9ac9b85f470ed87cb8adf8151b
4db4a6
Author: Julian Seward <jseward@acm.org>
4db4a6
Date:   Sat Nov 13 09:27:01 2021 +0100
4db4a6
4db4a6
    Bug 445415 - arm64 front end: alignment checks missing for atomic instructions.
4db4a6
    
4db4a6
    For the arm64 front end, none of the atomic instructions have address
4db4a6
    alignment checks included in their IR.  They all should.  The effect of
4db4a6
    missing alignment checks in the IR is that, since this IR will in most cases
4db4a6
    be translated back to atomic instructions in the back end, we will get
4db4a6
    alignment traps (SIGBUS) on the host side and not on the guest side, which is
4db4a6
    (very) incorrect behaviour of the simulation.
4db4a6
4db4a6
 
4db4a6
diff --git a/VEX/priv/guest_arm64_toIR.c b/VEX/priv/guest_arm64_toIR.c
4db4a6
index ee018c6a9..16a7e075f 100644
4db4a6
--- a/VEX/priv/guest_arm64_toIR.c
4db4a6
+++ b/VEX/priv/guest_arm64_toIR.c
4db4a6
@@ -4833,6 +4833,34 @@ static IRTemp gen_zwidening_load ( UInt szB, IRTemp addr )
4db4a6
 }
4db4a6
 
4db4a6
 
4db4a6
+/* Generate a SIGBUS followed by a restart of the current instruction if
4db4a6
+   `effective_addr` is `align`-aligned.  This is required behaviour for atomic
4db4a6
+   instructions.  This assumes that guest_RIP_curr_instr is set correctly!
4db4a6
+
4db4a6
+   This is hardwired to generate SIGBUS because so far the only supported arm64
4db4a6
+   (arm64-linux) does that.  Should we need to later extend it to generate some
4db4a6
+   other signal, use the same scheme as with gen_SIGNAL_if_not_XX_aligned in
4db4a6
+   guest_amd64_toIR.c. */
4db4a6
+static
4db4a6
+void gen_SIGBUS_if_not_XX_aligned ( IRTemp effective_addr, ULong align )
4db4a6
+{
4db4a6
+   if (align == 1) {
4db4a6
+      return;
4db4a6
+   }
4db4a6
+   vassert(align == 16 || align == 8 || align == 4 || align == 2);
4db4a6
+   stmt(
4db4a6
+      IRStmt_Exit(
4db4a6
+         binop(Iop_CmpNE64,
4db4a6
+               binop(Iop_And64,mkexpr(effective_addr),mkU64(align-1)),
4db4a6
+               mkU64(0)),
4db4a6
+         Ijk_SigBUS,
4db4a6
+         IRConst_U64(guest_PC_curr_instr),
4db4a6
+         OFFB_PC
4db4a6
+      )
4db4a6
+   );
4db4a6
+}
4db4a6
+
4db4a6
+
4db4a6
 /* Generate a "standard 7" name, from bitQ and size.  But also
4db4a6
    allow ".1d" since that's occasionally useful. */
4db4a6
 static
4db4a6
@@ -6670,7 +6698,7 @@ Bool dis_ARM64_load_store(/*MB_OUT*/DisResult* dres, UInt insn,
4db4a6
 
4db4a6
       IRTemp ea = newTemp(Ity_I64);
4db4a6
       assign(ea, getIReg64orSP(nn));
4db4a6
-      /* FIXME generate check that ea is szB-aligned */
4db4a6
+      gen_SIGBUS_if_not_XX_aligned(ea, szB);
4db4a6
 
4db4a6
       if (isLD && ss == BITS5(1,1,1,1,1)) {
4db4a6
          IRTemp res = newTemp(ty);
4db4a6
@@ -6803,7 +6831,7 @@ Bool dis_ARM64_load_store(/*MB_OUT*/DisResult* dres, UInt insn,
4db4a6
 
4db4a6
       IRTemp ea = newTemp(Ity_I64);
4db4a6
       assign(ea, getIReg64orSP(nn));
4db4a6
-      /* FIXME generate check that ea is 2*elemSzB-aligned */
4db4a6
+      gen_SIGBUS_if_not_XX_aligned(ea, fullSzB);
4db4a6
 
4db4a6
       if (isLD && ss == BITS5(1,1,1,1,1)) {
4db4a6
          if (abiinfo->guest__use_fallback_LLSC) {
4db4a6
@@ -7044,7 +7072,7 @@ Bool dis_ARM64_load_store(/*MB_OUT*/DisResult* dres, UInt insn,
4db4a6
 
4db4a6
       IRTemp ea = newTemp(Ity_I64);
4db4a6
       assign(ea, getIReg64orSP(nn));
4db4a6
-      /* FIXME generate check that ea is szB-aligned */
4db4a6
+      gen_SIGBUS_if_not_XX_aligned(ea, szB);
4db4a6
 
4db4a6
       if (isLD) {
4db4a6
          IRTemp res = newTemp(ty);
4db4a6
@@ -7159,6 +7187,7 @@ Bool dis_ARM64_load_store(/*MB_OUT*/DisResult* dres, UInt insn,
4db4a6
 
4db4a6
       IRTemp ea = newTemp(Ity_I64);
4db4a6
       assign(ea, getIReg64orSP(nn));
4db4a6
+      gen_SIGBUS_if_not_XX_aligned(ea, szB);
4db4a6
 
4db4a6
       // Insert barrier before loading for acquire and acquire-release variants:
4db4a6
       // A and AL.
4db4a6
@@ -7266,6 +7295,10 @@ Bool dis_ARM64_load_store(/*MB_OUT*/DisResult* dres, UInt insn,
4db4a6
       IRType ty = integerIRTypeOfSize(szB);
4db4a6
       Bool is64 = szB == 8;
4db4a6
 
4db4a6
+      IRTemp ea = newTemp(Ity_I64);
4db4a6
+      assign(ea, getIReg64orSP(nn));
4db4a6
+      gen_SIGBUS_if_not_XX_aligned(ea, szB);
4db4a6
+
4db4a6
       IRExpr *exp = narrowFrom64(ty, getIReg64orZR(ss));
4db4a6
       IRExpr *new = narrowFrom64(ty, getIReg64orZR(tt));
4db4a6
 
4db4a6
@@ -7275,7 +7308,7 @@ Bool dis_ARM64_load_store(/*MB_OUT*/DisResult* dres, UInt insn,
4db4a6
       // Store the result back if LHS remains unchanged in memory.
4db4a6
       IRTemp old = newTemp(ty);
4db4a6
       stmt( IRStmt_CAS(mkIRCAS(/*oldHi*/IRTemp_INVALID, old,
4db4a6
-                               Iend_LE, getIReg64orSP(nn),
4db4a6
+                               Iend_LE, mkexpr(ea),
4db4a6
                                /*expdHi*/NULL, exp,
4db4a6
                                /*dataHi*/NULL, new)) );
4db4a6
 
4db4a6
@@ -7307,6 +7340,10 @@ Bool dis_ARM64_load_store(/*MB_OUT*/DisResult* dres, UInt insn,
4db4a6
       if ((ss & 0x1) || (tt & 0x1)) {
4db4a6
          /* undefined; fall through */
4db4a6
       } else {
4db4a6
+         IRTemp ea = newTemp(Ity_I64);
4db4a6
+         assign(ea, getIReg64orSP(nn));
4db4a6
+         gen_SIGBUS_if_not_XX_aligned(ea, is64 ? 16 : 8);
4db4a6
+
4db4a6
          IRExpr *expLo = getIRegOrZR(is64, ss);
4db4a6
          IRExpr *expHi = getIRegOrZR(is64, ss + 1);
4db4a6
          IRExpr *newLo = getIRegOrZR(is64, tt);
4db4a6
@@ -7318,7 +7355,7 @@ Bool dis_ARM64_load_store(/*MB_OUT*/DisResult* dres, UInt insn,
4db4a6
             stmt(IRStmt_MBE(Imbe_Fence));
4db4a6
 
4db4a6
          stmt( IRStmt_CAS(mkIRCAS(oldHi, oldLo,
4db4a6
-                                  Iend_LE, getIReg64orSP(nn),
4db4a6
+                                  Iend_LE, mkexpr(ea),
4db4a6
                                   expHi, expLo,
4db4a6
                                   newHi, newLo)) );
4db4a6
 
4db4a6
diff --git a/VEX/priv/host_arm64_defs.c b/VEX/priv/host_arm64_defs.c
4db4a6
index b65e27db4..39c6aaa46 100644
4db4a6
--- a/VEX/priv/host_arm64_defs.c
4db4a6
+++ b/VEX/priv/host_arm64_defs.c
4db4a6
@@ -4033,6 +4033,7 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc,
4db4a6
             case Ijk_FlushDCache: trcval = VEX_TRC_JMP_FLUSHDCACHE; break;
4db4a6
             case Ijk_NoRedir:     trcval = VEX_TRC_JMP_NOREDIR;     break;
4db4a6
             case Ijk_SigTRAP:     trcval = VEX_TRC_JMP_SIGTRAP;     break;
4db4a6
+            case Ijk_SigBUS:      trcval = VEX_TRC_JMP_SIGBUS;      break;
4db4a6
             //case Ijk_SigSEGV:     trcval = VEX_TRC_JMP_SIGSEGV;     break;
4db4a6
             case Ijk_Boring:      trcval = VEX_TRC_JMP_BORING;      break;
4db4a6
             /* We don't expect to see the following being assisted. */
4db4a6
diff --git a/VEX/priv/host_arm64_isel.c b/VEX/priv/host_arm64_isel.c
4db4a6
index 094e7e74b..82cb2d78c 100644
4db4a6
--- a/VEX/priv/host_arm64_isel.c
4db4a6
+++ b/VEX/priv/host_arm64_isel.c
4db4a6
@@ -4483,6 +4483,7 @@ static void iselStmt ( ISelEnv* env, IRStmt* stmt )
4db4a6
          case Ijk_InvalICache:
4db4a6
          case Ijk_FlushDCache:
4db4a6
          case Ijk_SigTRAP:
4db4a6
+         case Ijk_SigBUS:
4db4a6
          case Ijk_Yield: {
4db4a6
             HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
4db4a6
             addInstr(env, ARM64Instr_XAssisted(r, amPC, cc,
4db4a6
@@ -4576,8 +4577,8 @@ static void iselNext ( ISelEnv* env,
4db4a6
       case Ijk_InvalICache:
4db4a6
       case Ijk_FlushDCache:
4db4a6
       case Ijk_SigTRAP:
4db4a6
-      case Ijk_Yield:
4db4a6
-      {
4db4a6
+      case Ijk_SigBUS:
4db4a6
+      case Ijk_Yield: {
4db4a6
          HReg        r    = iselIntExpr_R(env, next);
4db4a6
          ARM64AMode* amPC = mk_baseblock_64bit_access_amode(offsIP);
4db4a6
          addInstr(env, ARM64Instr_XAssisted(r, amPC, ARM64cc_AL, jk));