diff --git a/.gitignore b/.gitignore
index 7b0f078..cbec41e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -43,3 +43,4 @@
 /valgrind-3.17.0.tar.bz2
 /valgrind-3.18.0.RC1.tar.bz2
 /valgrind-3.18.1.tar.bz2
+/valgrind-3.19.0.tar.bz2
diff --git a/sources b/sources
index 29f3ab8..e93a40c 100644
--- a/sources
+++ b/sources
@@ -1 +1 @@
-SHA512 (valgrind-3.18.1.tar.bz2) = a03b5cd7eafab4a1cea07f46464c1546ae1cb3d106649626b1e55658badf90e58d1f3854a38a33d5dffd8237f5555ae7e1f27a4b40e06254f87825c7fc61b59b
+SHA512 (valgrind-3.19.0.tar.bz2) = f720a89dc4c4989cc5714bff9efe97529f71990bcfad7a92b889ce099c4326d6da07fa4d5fbab2e9125e20f352354f6178471e49e419b613a3c82c2a1c667ab2
diff --git a/valgrind-3.18.1-amd64-more-spec-rules.patch b/valgrind-3.18.1-amd64-more-spec-rules.patch
deleted file mode 100644
index 87794ee..0000000
--- a/valgrind-3.18.1-amd64-more-spec-rules.patch
+++ /dev/null
@@ -1,105 +0,0 @@
-commit 595341b150312d2407bd43304449bf39ec3e1fa8
-Author: Julian Seward <jseward@acm.org>
-Date:   Sat Nov 13 19:59:07 2021 +0100
-
-    amd64 front end: add more spec rules:
-    
-       S  after SHRQ
-       Z  after SHLQ
-       NZ after SHLQ
-       Z  after SHLL
-       S  after SHLL
-    
-    The lack of at least one of these was observed to cause occasional false
-    positives in Memcheck.
-    
-    Plus add commented-out cases so as to complete the set of 12 rules
-    {Z,NZ,S,NS} after {SHRQ,SHLQ,SHLL}.  The commented-out ones are commented
-    out because I so far didn't find any use cases for them.
-
-diff --git a/VEX/priv/guest_amd64_helpers.c b/VEX/priv/guest_amd64_helpers.c
-index 9d61e7a0f..ba71c1b62 100644
---- a/VEX/priv/guest_amd64_helpers.c
-+++ b/VEX/priv/guest_amd64_helpers.c
-@@ -1823,16 +1823,26 @@ IRExpr* guest_amd64_spechelper ( const HChar* function_name,
-       /*---------------- SHRQ ----------------*/
- 
-       if (isU64(cc_op, AMD64G_CC_OP_SHRQ) && isU64(cond, AMD64CondZ)) {
--         /* SHRQ, then Z --> test dep1 == 0 */
-+         /* SHRQ, then Z --> test result[63:0] == 0 */
-          return unop(Iop_1Uto64,
-                      binop(Iop_CmpEQ64, cc_dep1, mkU64(0)));
-       }
-       if (isU64(cc_op, AMD64G_CC_OP_SHRQ) && isU64(cond, AMD64CondNZ)) {
--         /* SHRQ, then NZ --> test dep1 != 0 */
-+         /* SHRQ, then NZ --> test result[63:0] != 0 */
-          return unop(Iop_1Uto64,
-                      binop(Iop_CmpNE64, cc_dep1, mkU64(0)));
-       }
- 
-+      if (isU64(cc_op, AMD64G_CC_OP_SHRQ) && isU64(cond, AMD64CondS)) {
-+         /* SHRQ, then S --> (ULong)result[63] (result is in dep1) */
-+         return binop(Iop_Shr64, cc_dep1, mkU8(63));
-+      }
-+      // No known test case for this, hence disabled:
-+      //if (isU64(cc_op, AMD64G_CC_OP_SHRQ) && isU64(cond, AMD64CondNS)) {
-+      //   /* SHRQ, then NS --> (ULong) ~ result[63] */
-+      //   vassert(0);
-+      //}
-+
-       /*---------------- SHRL ----------------*/
- 
-       if (isU64(cc_op, AMD64G_CC_OP_SHRL) && isU64(cond, AMD64CondZ)) {
-@@ -1881,6 +1891,52 @@ IRExpr* guest_amd64_spechelper ( const HChar* function_name,
-       //                     mkU32(0)));
-       //}
- 
-+      /*---------------- SHLQ ----------------*/
-+
-+      if (isU64(cc_op, AMD64G_CC_OP_SHLQ) && isU64(cond, AMD64CondZ)) {
-+         /* SHLQ, then Z --> test dep1 == 0 */
-+         return unop(Iop_1Uto64,
-+                     binop(Iop_CmpEQ64, cc_dep1, mkU64(0)));
-+      }
-+      if (isU64(cc_op, AMD64G_CC_OP_SHLQ) && isU64(cond, AMD64CondNZ)) {
-+         /* SHLQ, then NZ --> test dep1 != 0 */
-+         return unop(Iop_1Uto64,
-+                     binop(Iop_CmpNE64, cc_dep1, mkU64(0)));
-+      }
-+
-+      //if (isU64(cc_op, AMD64G_CC_OP_SHLQ) && isU64(cond, AMD64CondS)) {
-+      //   /* SHLQ, then S --> (ULong)result[63] */
-+      //   vassert(0);
-+      //}
-+      //if (isU64(cc_op, AMD64G_CC_OP_SHLQ) && isU64(cond, AMD64CondNS)) {
-+      //   /* SHLQ, then NS --> (ULong) ~ result[63] */
-+      //   vassert(0);
-+      //}
-+
-+      /*---------------- SHLL ----------------*/
-+
-+      if (isU64(cc_op, AMD64G_CC_OP_SHLL) && isU64(cond, AMD64CondZ)) {
-+         /* SHLL, then Z --> test result[31:0] == 0 */
-+         return unop(Iop_1Uto64,
-+                     binop(Iop_CmpEQ32, unop(Iop_64to32, cc_dep1),
-+                           mkU32(0)));
-+      }
-+      //if (isU64(cc_op, AMD64G_CC_OP_SHLL) && isU64(cond, AMD64CondNZ)) {
-+      //   /* SHLL, then NZ --> test dep1 != 0 */
-+      //   vassert(0);
-+      //}
-+
-+      if (isU64(cc_op, AMD64G_CC_OP_SHLL) && isU64(cond, AMD64CondS)) {
-+         /* SHLL, then S --> (ULong)result[31] */
-+         return binop(Iop_And64,
-+                      binop(Iop_Shr64, cc_dep1, mkU8(31)),
-+                      mkU64(1));
-+      }
-+      //if (isU64(cc_op, AMD64G_CC_OP_SHLL) && isU64(cond, AMD64CondNS)) {
-+      //   /* SHLL, then NS --> (ULong) ~ result[31] */
-+      //   vassert(0);
-+      //}
-+
-       /*---------------- COPY ----------------*/
-       /* This can happen, as a result of amd64 FP compares: "comisd ... ;
-          jbe" for example. */
diff --git a/valgrind-3.18.1-arm64-atomic-align.patch b/valgrind-3.18.1-arm64-atomic-align.patch
deleted file mode 100644
index 8cce35f..0000000
--- a/valgrind-3.18.1-arm64-atomic-align.patch
+++ /dev/null
@@ -1,163 +0,0 @@
-commit 2be719921e700a9ac9b85f470ed87cb8adf8151b
-Author: Julian Seward <jseward@acm.org>
-Date:   Sat Nov 13 09:27:01 2021 +0100
-
-    Bug 445415 - arm64 front end: alignment checks missing for atomic instructions.
-    
-    For the arm64 front end, none of the atomic instructions have address
-    alignment checks included in their IR.  They all should.  The effect of
-    missing alignment checks in the IR is that, since this IR will in most cases
-    be translated back to atomic instructions in the back end, we will get
-    alignment traps (SIGBUS) on the host side and not on the guest side, which is
-    (very) incorrect behaviour of the simulation.
-
- 
-diff --git a/VEX/priv/guest_arm64_toIR.c b/VEX/priv/guest_arm64_toIR.c
-index ee018c6a9..16a7e075f 100644
---- a/VEX/priv/guest_arm64_toIR.c
-+++ b/VEX/priv/guest_arm64_toIR.c
-@@ -4833,6 +4833,34 @@ static IRTemp gen_zwidening_load ( UInt szB, IRTemp addr )
- }
- 
- 
-+/* Generate a SIGBUS followed by a restart of the current instruction if
-+   `effective_addr` is `align`-aligned.  This is required behaviour for atomic
-+   instructions.  This assumes that guest_RIP_curr_instr is set correctly!
-+
-+   This is hardwired to generate SIGBUS because so far the only supported arm64
-+   (arm64-linux) does that.  Should we need to later extend it to generate some
-+   other signal, use the same scheme as with gen_SIGNAL_if_not_XX_aligned in
-+   guest_amd64_toIR.c. */
-+static
-+void gen_SIGBUS_if_not_XX_aligned ( IRTemp effective_addr, ULong align )
-+{
-+   if (align == 1) {
-+      return;
-+   }
-+   vassert(align == 16 || align == 8 || align == 4 || align == 2);
-+   stmt(
-+      IRStmt_Exit(
-+         binop(Iop_CmpNE64,
-+               binop(Iop_And64,mkexpr(effective_addr),mkU64(align-1)),
-+               mkU64(0)),
-+         Ijk_SigBUS,
-+         IRConst_U64(guest_PC_curr_instr),
-+         OFFB_PC
-+      )
-+   );
-+}
-+
-+
- /* Generate a "standard 7" name, from bitQ and size.  But also
-    allow ".1d" since that's occasionally useful. */
- static
-@@ -6670,7 +6698,7 @@ Bool dis_ARM64_load_store(/*MB_OUT*/DisResult* dres, UInt insn,
- 
-       IRTemp ea = newTemp(Ity_I64);
-       assign(ea, getIReg64orSP(nn));
--      /* FIXME generate check that ea is szB-aligned */
-+      gen_SIGBUS_if_not_XX_aligned(ea, szB);
- 
-       if (isLD && ss == BITS5(1,1,1,1,1)) {
-          IRTemp res = newTemp(ty);
-@@ -6803,7 +6831,7 @@ Bool dis_ARM64_load_store(/*MB_OUT*/DisResult* dres, UInt insn,
- 
-       IRTemp ea = newTemp(Ity_I64);
-       assign(ea, getIReg64orSP(nn));
--      /* FIXME generate check that ea is 2*elemSzB-aligned */
-+      gen_SIGBUS_if_not_XX_aligned(ea, fullSzB);
- 
-       if (isLD && ss == BITS5(1,1,1,1,1)) {
-          if (abiinfo->guest__use_fallback_LLSC) {
-@@ -7044,7 +7072,7 @@ Bool dis_ARM64_load_store(/*MB_OUT*/DisResult* dres, UInt insn,
- 
-       IRTemp ea = newTemp(Ity_I64);
-       assign(ea, getIReg64orSP(nn));
--      /* FIXME generate check that ea is szB-aligned */
-+      gen_SIGBUS_if_not_XX_aligned(ea, szB);
- 
-       if (isLD) {
-          IRTemp res = newTemp(ty);
-@@ -7159,6 +7187,7 @@ Bool dis_ARM64_load_store(/*MB_OUT*/DisResult* dres, UInt insn,
- 
-       IRTemp ea = newTemp(Ity_I64);
-       assign(ea, getIReg64orSP(nn));
-+      gen_SIGBUS_if_not_XX_aligned(ea, szB);
- 
-       // Insert barrier before loading for acquire and acquire-release variants:
-       // A and AL.
-@@ -7266,6 +7295,10 @@ Bool dis_ARM64_load_store(/*MB_OUT*/DisResult* dres, UInt insn,
-       IRType ty = integerIRTypeOfSize(szB);
-       Bool is64 = szB == 8;
- 
-+      IRTemp ea = newTemp(Ity_I64);
-+      assign(ea, getIReg64orSP(nn));
-+      gen_SIGBUS_if_not_XX_aligned(ea, szB);
-+
-       IRExpr *exp = narrowFrom64(ty, getIReg64orZR(ss));
-       IRExpr *new = narrowFrom64(ty, getIReg64orZR(tt));
- 
-@@ -7275,7 +7308,7 @@ Bool dis_ARM64_load_store(/*MB_OUT*/DisResult* dres, UInt insn,
-       // Store the result back if LHS remains unchanged in memory.
-       IRTemp old = newTemp(ty);
-       stmt( IRStmt_CAS(mkIRCAS(/*oldHi*/IRTemp_INVALID, old,
--                               Iend_LE, getIReg64orSP(nn),
-+                               Iend_LE, mkexpr(ea),
-                                /*expdHi*/NULL, exp,
-                                /*dataHi*/NULL, new)) );
- 
-@@ -7307,6 +7340,10 @@ Bool dis_ARM64_load_store(/*MB_OUT*/DisResult* dres, UInt insn,
-       if ((ss & 0x1) || (tt & 0x1)) {
-          /* undefined; fall through */
-       } else {
-+         IRTemp ea = newTemp(Ity_I64);
-+         assign(ea, getIReg64orSP(nn));
-+         gen_SIGBUS_if_not_XX_aligned(ea, is64 ? 16 : 8);
-+
-          IRExpr *expLo = getIRegOrZR(is64, ss);
-          IRExpr *expHi = getIRegOrZR(is64, ss + 1);
-          IRExpr *newLo = getIRegOrZR(is64, tt);
-@@ -7318,7 +7355,7 @@ Bool dis_ARM64_load_store(/*MB_OUT*/DisResult* dres, UInt insn,
-             stmt(IRStmt_MBE(Imbe_Fence));
- 
-          stmt( IRStmt_CAS(mkIRCAS(oldHi, oldLo,
--                                  Iend_LE, getIReg64orSP(nn),
-+                                  Iend_LE, mkexpr(ea),
-                                   expHi, expLo,
-                                   newHi, newLo)) );
- 
-diff --git a/VEX/priv/host_arm64_defs.c b/VEX/priv/host_arm64_defs.c
-index b65e27db4..39c6aaa46 100644
---- a/VEX/priv/host_arm64_defs.c
-+++ b/VEX/priv/host_arm64_defs.c
-@@ -4033,6 +4033,7 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc,
-             case Ijk_FlushDCache: trcval = VEX_TRC_JMP_FLUSHDCACHE; break;
-             case Ijk_NoRedir:     trcval = VEX_TRC_JMP_NOREDIR;     break;
-             case Ijk_SigTRAP:     trcval = VEX_TRC_JMP_SIGTRAP;     break;
-+            case Ijk_SigBUS:      trcval = VEX_TRC_JMP_SIGBUS;      break;
-             //case Ijk_SigSEGV:     trcval = VEX_TRC_JMP_SIGSEGV;     break;
-             case Ijk_Boring:      trcval = VEX_TRC_JMP_BORING;      break;
-             /* We don't expect to see the following being assisted. */
-diff --git a/VEX/priv/host_arm64_isel.c b/VEX/priv/host_arm64_isel.c
-index 094e7e74b..82cb2d78c 100644
---- a/VEX/priv/host_arm64_isel.c
-+++ b/VEX/priv/host_arm64_isel.c
-@@ -4483,6 +4483,7 @@ static void iselStmt ( ISelEnv* env, IRStmt* stmt )
-          case Ijk_InvalICache:
-          case Ijk_FlushDCache:
-          case Ijk_SigTRAP:
-+         case Ijk_SigBUS:
-          case Ijk_Yield: {
-             HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
-             addInstr(env, ARM64Instr_XAssisted(r, amPC, cc,
-@@ -4576,8 +4577,8 @@ static void iselNext ( ISelEnv* env,
-       case Ijk_InvalICache:
-       case Ijk_FlushDCache:
-       case Ijk_SigTRAP:
--      case Ijk_Yield:
--      {
-+      case Ijk_SigBUS:
-+      case Ijk_Yield: {
-          HReg        r    = iselIntExpr_R(env, next);
-          ARM64AMode* amPC = mk_baseblock_64bit_access_amode(offsIP);
-          addInstr(env, ARM64Instr_XAssisted(r, amPC, ARM64cc_AL, jk));
diff --git a/valgrind-3.18.1-arm64-atomics-rdm.patch b/valgrind-3.18.1-arm64-atomics-rdm.patch
deleted file mode 100644
index 47cb91a..0000000
--- a/valgrind-3.18.1-arm64-atomics-rdm.patch
+++ /dev/null
@@ -1,16 +0,0 @@
-diff --git a/VEX/priv/main_main.c b/VEX/priv/main_main.c
-index 1253cf588..482047c7a 100644
---- a/VEX/priv/main_main.c
-+++ b/VEX/priv/main_main.c
-@@ -2163,11 +2163,6 @@ static void check_hwcaps ( VexArch arch, UInt hwcaps )
-          if (have_fp16 != have_vfp16)
-             invalid_hwcaps(arch, hwcaps,
-                     "Mismatch detected between scalar and vector FP16 features.\n");
--         Bool have_rdm = ((hwcaps & VEX_HWCAPS_ARM64_RDM) != 0);
--         Bool have_atomics = ((hwcaps & VEX_HWCAPS_ARM64_ATOMICS) != 0);
--         if (have_rdm != have_atomics)
--            invalid_hwcaps(arch, hwcaps,
--                    "Mismatch detected between RDMA and atomics features.\n");
-          return;
-       }
- 
diff --git a/valgrind-3.18.1-arm64-doubleword-cas.patch b/valgrind-3.18.1-arm64-doubleword-cas.patch
deleted file mode 100644
index 7cf0bf5..0000000
--- a/valgrind-3.18.1-arm64-doubleword-cas.patch
+++ /dev/null
@@ -1,121 +0,0 @@
-commit 7dbe2fed72886874f2eaf57dc07929542ae55b58
-Author: Julian Seward <jseward@acm.org>
-Date:   Fri Nov 12 10:40:48 2021 +0100
-
-    Bug 445354 - arm64 backend: incorrect code emitted for doubleword CAS.
-    
-    The sequence of instructions emitted by the arm64 backend for doubleword
-    compare-and-swap is incorrect.  This could lead to incorrect simulation of the
-    AArch8.1 atomic instructions (CASP, at least).  It also causes failures in the
-    upcoming fix for v8.0 support for LD{,A}XP/ST{,L}XP in bug 444399, at least
-    when running with the fallback LL/SC implementation
-    (`--sim-hints=fallback-llsc`, or as autoselected at startup).  In the worst
-    case it can cause segfaulting in the generated code, because it could jump
-    backwards unexpectedly far.
-    
-    The problem is the sequence emitted for ARM64in_CASP:
-    
-    * the jump offsets are incorrect, both for `bne out` (x 2) and `cbnz w1, loop`.
-    
-    * using w1 to hold the success indication of the stxp instruction trashes the
-      previous value in x1.  But the value in x1 is an output of ARM64in_CASP,
-      hence one of the two output registers is corrupted.  That confuses any code
-      downstream that want to inspect those values to find out whether or not the
-      transaction succeeded.
-    
-    The fixes are to
-    
-    * fix the branch offsets
-    
-    * use a different register to hold the stxp success indication.  w3 is a
-      convenient check.
-
-diff --git a/VEX/priv/host_arm64_defs.c b/VEX/priv/host_arm64_defs.c
-index 5dccc0495..5657bcab9 100644
---- a/VEX/priv/host_arm64_defs.c
-+++ b/VEX/priv/host_arm64_defs.c
-@@ -2271,6 +2271,7 @@ void getRegUsage_ARM64Instr ( HRegUsage* u, const ARM64Instr* i, Bool mode64 )
-          addHRegUse(u, HRmWrite, hregARM64_X1());
-          addHRegUse(u, HRmWrite, hregARM64_X9());
-          addHRegUse(u, HRmWrite, hregARM64_X8());
-+         addHRegUse(u, HRmWrite, hregARM64_X3());
-          break;
-       case ARM64in_MFence:
-          return;
-@@ -4254,16 +4255,16 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc,
- 
-               -- always:
-               cmp     x0, x8                 // EB08001F
--              bne     out                    // 540000E1 (b.ne #28 <out>)
-+              bne     out                    // 540000A1
-               cmp     x1, x9                 // EB09003F
--              bne     out                    // 540000A1 (b.ne #20 <out>)
-+              bne     out                    // 54000061
- 
-               -- one of:
--              stxp    w1, x6, x7, [x2]       // C8211C46
--              stxp    w1, w6, w7, [x2]       // 88211C46
-+              stxp    w3, x6, x7, [x2]       // C8231C46
-+              stxp    w3, w6, w7, [x2]       // 88231C46
- 
-               -- always:
--              cbnz    w1, loop               // 35FFFE81 (cbnz w1, #-48 <loop>)
-+              cbnz    w3, loop               // 35FFFF03
-             out:
-          */
-          switch (i->ARM64in.CASP.szB) {
-@@ -4277,15 +4278,15 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc,
-             default: vassert(0);
-          }
-          *p++ = 0xEB08001F;
--         *p++ = 0x540000E1;
--         *p++ = 0xEB09003F;
-          *p++ = 0x540000A1;
-+         *p++ = 0xEB09003F;
-+         *p++ = 0x54000061;
-          switch (i->ARM64in.CASP.szB) {
--            case 8:  *p++ = 0xC8211C46; break;
--            case 4:  *p++ = 0x88211C46; break;
-+            case 8:  *p++ = 0xC8231C46; break;
-+            case 4:  *p++ = 0x88231C46; break;
-             default: vassert(0);
-          }
--         *p++ = 0x35FFFE81;
-+         *p++ = 0x35FFFF03;
-          goto done;
-       }
-       case ARM64in_MFence: {
-diff --git a/VEX/priv/host_arm64_defs.h b/VEX/priv/host_arm64_defs.h
-index f0737f2c6..01fb5708e 100644
---- a/VEX/priv/host_arm64_defs.h
-+++ b/VEX/priv/host_arm64_defs.h
-@@ -720,6 +720,7 @@ typedef
-             Int  szB; /* 1, 2, 4 or 8 */
-          } StrEX;
-          /* x1 = CAS(x3(addr), x5(expected) -> x7(new)),
-+            and trashes x8
-             where x1[8*szB-1 : 0] == x5[8*szB-1 : 0] indicates success,
-                   x1[8*szB-1 : 0] != x5[8*szB-1 : 0] indicates failure.
-             Uses x8 as scratch (but that's not allocatable).
-@@ -738,7 +739,7 @@ typedef
-             -- if branch taken, failure; x1[[8*szB-1 : 0] holds old value
-             -- attempt to store
-             stxr    w8, x7, [x3]
--            -- if store successful, x1==0, so the eor is "x1 := x5"
-+            -- if store successful, x8==0
-             -- if store failed,     branch back and try again.
-             cbne    w8, loop
-            after:
-@@ -746,6 +747,12 @@ typedef
-          struct {
-             Int szB; /* 1, 2, 4 or 8 */
-          } CAS;
-+         /* Doubleworld CAS, 2 x 32 bit or 2 x 64 bit
-+            x0(oldLSW),x1(oldMSW)
-+               = DCAS(x2(addr), x4(expectedLSW),x5(expectedMSW)
-+                                -> x6(newLSW),x7(newMSW))
-+            and trashes x8, x9 and x3
-+         */
-          struct {
-             Int szB; /* 4 or 8 */
-          } CASP;
diff --git a/valgrind-3.18.1-arm64-ldaxp-stlxp.patch b/valgrind-3.18.1-arm64-ldaxp-stlxp.patch
deleted file mode 100644
index d118cc6..0000000
--- a/valgrind-3.18.1-arm64-ldaxp-stlxp.patch
+++ /dev/null
@@ -1,1440 +0,0 @@
-commit 530df882b8f60ecacaf2b9b8a719f7ea1c1d1650
-Author: Julian Seward <jseward@acm.org>
-Date:   Fri Nov 12 12:13:45 2021 +0100
-
-    Bug 444399 - disInstr(arm64): unhandled instruction 0xC87F2D89 (LD{,A}XP and ST{,L}XP).
-    
-    This is unfortunately a big and complex patch, to implement LD{,A}XP and
-    ST{,L}XP.  These were omitted from the original AArch64 v8.0 implementation
-    for unknown reasons.
-    
-    (Background) the patch is made significantly more complex because for AArch64
-    we actually have two implementations of the underlying
-    Load-Linked/Store-Conditional (LL/SC) machinery: a "primary" implementation,
-    which translates LL/SC more or less directly into IR and re-emits them at the
-    back end, and a "fallback" implementation that implements LL/SC "manually", by
-    taking advantage of the fact that V serialises thread execution, so we can
-    "implement" LL/SC by simulating a reservation using fields LLSC_* in the guest
-    state, and invalidating the reservation at every thread switch.
-    
-    (Background) the fallback scheme is needed because the primary scheme is in
-    violation of the ARMv8 semantics in that it can (easily) introduce extra
-    memory references between the LL and SC, hence on some hardware causing the
-    reservation to always fail and so the simulated program to wind up looping
-    forever.
-    
-    For these instructions, big picture:
-    
-    * for the primary implementation, we take advantage of the fact that
-      IRStmt_LLSC allows I128 bit transactions to be represented.  Hence we bundle
-      up the two 64-bit data elements into an I128 (or vice versa) and present a
-      single I128-typed IRStmt_LLSC in the IR.  In the backend, those are
-      re-emitted as LDXP/STXP respectively.  For LL/SC on 32-bit register pairs,
-      that bundling produces a single 64-bit item, and so the existing LL/SC
-      backend machinery handles it.  The effect is that a doubleword 32-bit LL/SC
-      in the front end translates into a single 64-bit LL/SC in the back end.
-      Overall, though, the implementation is straightforward.
-    
-    * for the fallback implementation, it is necessary to extend the guest state
-      field `guest_LLSC_DATA` to represent a 128-bit transaction, by splitting it
-      into _DATA_LO64 and DATA_HI64.  Then, the implementation is an exact
-      analogue of the fallback implementation for single-word LL/SC.  It takes
-      advantage of the fact that the backend already supports 128-bit CAS, as
-      fixed in bug 445354.  As with the primary implementation, doubleword 32-bit
-      LL/SC is bundled into a single 64-bit transaction.
-    
-    Detailed changes:
-    
-    * new arm64 guest state fields LLSC_DATA_LO64/LLSC_DATA_LO64 to replace
-      guest_LLSC_DATA
-    
-    * (ridealong fix) arm64 front end: a fix to a minor and harmless decoding bug
-      for the single-word LDX/STX case.
-    
-    * arm64 front end: IR generation for LD{,A}XP/ST{,L}XP: tedious and
-      longwinded, but per comments above, an exact(ish) analogue of the singleword
-      case
-    
-    * arm64 backend: new insns ARM64Instr_LdrEXP / ARM64Instr_StrEXP to wrap up 2
-      x 64 exclusive loads/stores.  Per comments above, there's no need to handle
-      the 2 x 32 case.
-    
-    * arm64 isel: translate I128-typed IRStmt_LLSC into the above two insns
-    
-    * arm64 isel: some auxiliary bits and pieces needed to handle I128 values;
-      this is standard doubleword isel stuff
-    
-    * arm64 isel: (ridealong fix): Ist_CAS: check for endianness of the CAS!
-    
-    * arm64 isel: (ridealong) a couple of formatting fixes
-    
-    * IR infrastructure: add support for I128 constants, done the same as V128
-      constants
-    
-    * memcheck: handle shadow loads and stores for I128 values
-    
-    * testcase: memcheck/tests/atomic_incs.c: on arm64, also test 128-bit atomic
-      addition, to check we really have atomicity right
-    
-    * testcase: new test none/tests/arm64/ldxp_stxp.c, tests operation but not
-      atomicity.  (Smoke test).
-
-diff --git a/VEX/priv/guest_arm64_toIR.c b/VEX/priv/guest_arm64_toIR.c
-index 12a1c5978..ee018c6a9 100644
---- a/VEX/priv/guest_arm64_toIR.c
-+++ b/VEX/priv/guest_arm64_toIR.c
-@@ -1184,9 +1184,10 @@ static IRExpr* narrowFrom64 ( IRType dstTy, IRExpr* e )
- #define OFFB_CMSTART  offsetof(VexGuestARM64State,guest_CMSTART)
- #define OFFB_CMLEN    offsetof(VexGuestARM64State,guest_CMLEN)
- 
--#define OFFB_LLSC_SIZE offsetof(VexGuestARM64State,guest_LLSC_SIZE)
--#define OFFB_LLSC_ADDR offsetof(VexGuestARM64State,guest_LLSC_ADDR)
--#define OFFB_LLSC_DATA offsetof(VexGuestARM64State,guest_LLSC_DATA)
-+#define OFFB_LLSC_SIZE      offsetof(VexGuestARM64State,guest_LLSC_SIZE)
-+#define OFFB_LLSC_ADDR      offsetof(VexGuestARM64State,guest_LLSC_ADDR)
-+#define OFFB_LLSC_DATA_LO64 offsetof(VexGuestARM64State,guest_LLSC_DATA_LO64)
-+#define OFFB_LLSC_DATA_HI64 offsetof(VexGuestARM64State,guest_LLSC_DATA_HI64)
- 
- 
- /* ---------------- Integer registers ---------------- */
-@@ -6652,7 +6653,7 @@ Bool dis_ARM64_load_store(/*MB_OUT*/DisResult* dres, UInt insn,
-         (coregrind/m_scheduler/scheduler.c, run_thread_for_a_while()
-          has to do this bit)
-    */   
--   if (INSN(29,23) == BITS7(0,0,1,0,0,0,0)
-+   if (INSN(29,24) == BITS6(0,0,1,0,0,0)
-        && (INSN(23,21) & BITS3(1,0,1)) == BITS3(0,0,0)
-        && INSN(14,10) == BITS5(1,1,1,1,1)) {
-       UInt szBlg2     = INSN(31,30);
-@@ -6678,7 +6679,8 @@ Bool dis_ARM64_load_store(/*MB_OUT*/DisResult* dres, UInt insn,
-             // if it faults.
-             IRTemp loaded_data64 = newTemp(Ity_I64);
-             assign(loaded_data64, widenUto64(ty, loadLE(ty, mkexpr(ea))));
--            stmt( IRStmt_Put( OFFB_LLSC_DATA, mkexpr(loaded_data64) ));
-+            stmt( IRStmt_Put( OFFB_LLSC_DATA_LO64, mkexpr(loaded_data64) ));
-+            stmt( IRStmt_Put( OFFB_LLSC_DATA_HI64, mkU64(0) ));
-             stmt( IRStmt_Put( OFFB_LLSC_ADDR, mkexpr(ea) ));
-             stmt( IRStmt_Put( OFFB_LLSC_SIZE, mkU64(szB) ));
-             putIReg64orZR(tt, mkexpr(loaded_data64));
-@@ -6729,7 +6731,7 @@ Bool dis_ARM64_load_store(/*MB_OUT*/DisResult* dres, UInt insn,
-             ));
-             // Fail if the data doesn't match the LL data
-             IRTemp llsc_data64 = newTemp(Ity_I64);
--            assign(llsc_data64, IRExpr_Get(OFFB_LLSC_DATA, Ity_I64));
-+            assign(llsc_data64, IRExpr_Get(OFFB_LLSC_DATA_LO64, Ity_I64));
-             stmt( IRStmt_Exit(
-                       binop(Iop_CmpNE64, widenUto64(ty, loadLE(ty, mkexpr(ea))),
-                                          mkexpr(llsc_data64)),
-@@ -6771,6 +6773,257 @@ Bool dis_ARM64_load_store(/*MB_OUT*/DisResult* dres, UInt insn,
-       /* else fall through */
-    }
- 
-+   /* -------------------- LD{,A}XP -------------------- */
-+   /* -------------------- ST{,L}XP -------------------- */
-+   /* 31 30 29     23  20    15 14  9  4
-+       1 sz 001000 011 11111 0  t2  n  t1   LDXP  Rt1, Rt2, [Xn|SP]
-+       1 sz 001000 011 11111 1  t2  n  t1   LDAXP Rt1, Rt2, [Xn|SP]
-+       1 sz 001000 001 s     0  t2  n  t1   STXP  Ws, Rt1, Rt2, [Xn|SP]
-+       1 sz 001000 001 s     1  t2  n  t1   STLXP Ws, Rt1, Rt2, [Xn|SP]
-+   */
-+   /* See just above, "LD{,A}X{R,RH,RB} / ST{,L}X{R,RH,RB}", for detailed
-+      comments about this implementation.  Note the 'sz' field here is only 1
-+      bit; above, it is 2 bits, and has a different encoding.
-+   */
-+   if (INSN(31,31) == 1
-+       && INSN(29,24) == BITS6(0,0,1,0,0,0)
-+       && (INSN(23,21) & BITS3(1,0,1)) == BITS3(0,0,1)) {
-+      Bool elemIs64   = INSN(30,30) == 1;
-+      Bool isLD       = INSN(22,22) == 1;
-+      Bool isAcqOrRel = INSN(15,15) == 1;
-+      UInt ss         = INSN(20,16);
-+      UInt tt2        = INSN(14,10);
-+      UInt nn         = INSN(9,5);
-+      UInt tt1        = INSN(4,0);
-+
-+      UInt   elemSzB = elemIs64 ? 8 : 4;
-+      UInt   fullSzB = 2 * elemSzB;
-+      IRType elemTy  = integerIRTypeOfSize(elemSzB);
-+      IRType fullTy  = integerIRTypeOfSize(fullSzB);
-+
-+      IRTemp ea = newTemp(Ity_I64);
-+      assign(ea, getIReg64orSP(nn));
-+      /* FIXME generate check that ea is 2*elemSzB-aligned */
-+
-+      if (isLD && ss == BITS5(1,1,1,1,1)) {
-+         if (abiinfo->guest__use_fallback_LLSC) {
-+            // Fallback implementation of LL.
-+            // Do the load first so we don't update any guest state if it
-+            // faults.  Assumes little-endian guest.
-+            if (fullTy == Ity_I64) {
-+               vassert(elemSzB == 4);
-+               IRTemp loaded_data64 = newTemp(Ity_I64);
-+               assign(loaded_data64, loadLE(fullTy, mkexpr(ea)));
-+               stmt( IRStmt_Put( OFFB_LLSC_DATA_LO64, mkexpr(loaded_data64) ));
-+               stmt( IRStmt_Put( OFFB_LLSC_DATA_HI64, mkU64(0) ));
-+               stmt( IRStmt_Put( OFFB_LLSC_ADDR, mkexpr(ea) ));
-+               stmt( IRStmt_Put( OFFB_LLSC_SIZE, mkU64(8) ));
-+               putIReg64orZR(tt1, unop(Iop_32Uto64,
-+                                       unop(Iop_64to32,
-+                                            mkexpr(loaded_data64))));
-+               putIReg64orZR(tt2, unop(Iop_32Uto64,
-+                                       unop(Iop_64HIto32,
-+                                            mkexpr(loaded_data64))));
-+            } else {
-+               vassert(elemSzB == 8 && fullTy == Ity_I128);
-+               IRTemp loaded_data128 = newTemp(Ity_I128);
-+               // Hack: do the load as V128 rather than I128 so as to avoid
-+               // having to implement I128 loads in the arm64 back end.
-+               assign(loaded_data128, unop(Iop_ReinterpV128asI128,
-+                                           loadLE(Ity_V128, mkexpr(ea))));
-+               IRTemp loaded_data_lo64 = newTemp(Ity_I64);
-+               IRTemp loaded_data_hi64 = newTemp(Ity_I64);
-+               assign(loaded_data_lo64, unop(Iop_128to64,
-+                                             mkexpr(loaded_data128)));
-+               assign(loaded_data_hi64, unop(Iop_128HIto64,
-+                                             mkexpr(loaded_data128)));
-+               stmt( IRStmt_Put( OFFB_LLSC_DATA_LO64,
-+                                 mkexpr(loaded_data_lo64) ));
-+               stmt( IRStmt_Put( OFFB_LLSC_DATA_HI64,
-+                                 mkexpr(loaded_data_hi64) ));
-+               stmt( IRStmt_Put( OFFB_LLSC_ADDR, mkexpr(ea) ));
-+               stmt( IRStmt_Put( OFFB_LLSC_SIZE, mkU64(16) ));
-+               putIReg64orZR(tt1, mkexpr(loaded_data_lo64));
-+               putIReg64orZR(tt2, mkexpr(loaded_data_hi64));
-+            }
-+         } else {
-+            // Non-fallback implementation of LL.
-+            IRTemp res = newTemp(fullTy); // I64 or I128
-+            stmt(IRStmt_LLSC(Iend_LE, res, mkexpr(ea), NULL/*LL*/));
-+            // Assuming a little-endian guest here.  Rt1 goes at the lower
-+            // address, so it must live in the least significant half of `res`.
-+            IROp opGetLO = fullTy == Ity_I128 ? Iop_128to64   : Iop_64to32;
-+            IROp opGetHI = fullTy == Ity_I128 ? Iop_128HIto64 : Iop_64HIto32;
-+            putIReg64orZR(tt1, widenUto64(elemTy, unop(opGetLO, mkexpr(res))));
-+            putIReg64orZR(tt2, widenUto64(elemTy, unop(opGetHI, mkexpr(res))));
-+         }
-+         if (isAcqOrRel) {
-+            stmt(IRStmt_MBE(Imbe_Fence));
-+         }
-+         DIP("ld%sxp %s, %s, [%s] %s\n",
-+             isAcqOrRel ? (isLD ? "a" : "l") : "",
-+             nameIRegOrZR(elemSzB == 8, tt1),
-+             nameIRegOrZR(elemSzB == 8, tt2),
-+             nameIReg64orSP(nn),
-+             abiinfo->guest__use_fallback_LLSC
-+                ? "(fallback implementation)" : "");
-+         return True;
-+      }
-+      if (!isLD) {
-+         if (isAcqOrRel) {
-+            stmt(IRStmt_MBE(Imbe_Fence));
-+         }
-+         if (abiinfo->guest__use_fallback_LLSC) {
-+            // Fallback implementation of SC.
-+            // This is really ugly, since we don't have any way to do
-+            // proper if-then-else.  First, set up as if the SC failed,
-+            // and jump forwards if it really has failed.
-+
-+            // Continuation address
-+            IRConst* nia = IRConst_U64(guest_PC_curr_instr + 4);
-+
-+            // "the SC failed".  Any non-zero value means failure.
-+            putIReg64orZR(ss, mkU64(1));
-+
-+            IRTemp tmp_LLsize = newTemp(Ity_I64);
-+            assign(tmp_LLsize, IRExpr_Get(OFFB_LLSC_SIZE, Ity_I64));
-+            stmt( IRStmt_Put( OFFB_LLSC_SIZE, mkU64(0) // "no transaction"
-+            ));
-+            // Fail if no or wrong-size transaction
-+            vassert((fullSzB == 8 && fullTy == Ity_I64)
-+                    || (fullSzB == 16 && fullTy == Ity_I128));
-+            stmt( IRStmt_Exit(
-+                     binop(Iop_CmpNE64, mkexpr(tmp_LLsize), mkU64(fullSzB)),
-+                     Ijk_Boring, nia, OFFB_PC
-+            ));
-+            // Fail if the address doesn't match the LL address
-+            stmt( IRStmt_Exit(
-+                      binop(Iop_CmpNE64, mkexpr(ea),
-+                                         IRExpr_Get(OFFB_LLSC_ADDR, Ity_I64)),
-+                      Ijk_Boring, nia, OFFB_PC
-+            ));
-+            // The data to be stored.
-+            IRTemp store_data = newTemp(fullTy);
-+            if (fullTy == Ity_I64) {
-+               assign(store_data,
-+                      binop(Iop_32HLto64,
-+                            narrowFrom64(Ity_I32, getIReg64orZR(tt2)),
-+                            narrowFrom64(Ity_I32, getIReg64orZR(tt1))));
-+            } else {
-+               assign(store_data,
-+                      binop(Iop_64HLto128,
-+                            getIReg64orZR(tt2), getIReg64orZR(tt1)));
-+            }
-+
-+            if (fullTy == Ity_I64) {
-+               // 64 bit (2x32 bit) path
-+               // Fail if the data in memory doesn't match the data stashed by
-+               // the LL.
-+               IRTemp llsc_data_lo64 = newTemp(Ity_I64);
-+               assign(llsc_data_lo64,
-+                      IRExpr_Get(OFFB_LLSC_DATA_LO64, Ity_I64));
-+               stmt( IRStmt_Exit(
-+                         binop(Iop_CmpNE64, loadLE(Ity_I64, mkexpr(ea)),
-+                                            mkexpr(llsc_data_lo64)),
-+                      Ijk_Boring, nia, OFFB_PC
-+               ));
-+               // Try to CAS the new value in.
-+               IRTemp old = newTemp(Ity_I64);
-+               IRTemp expd = newTemp(Ity_I64);
-+               assign(expd, mkexpr(llsc_data_lo64));
-+               stmt( IRStmt_CAS(mkIRCAS(/*oldHi*/IRTemp_INVALID, old,
-+                                        Iend_LE, mkexpr(ea),
-+                                        /*expdHi*/NULL, mkexpr(expd),
-+                                        /*dataHi*/NULL, mkexpr(store_data)
-+               )));
-+               // Fail if the CAS failed (viz, old != expd)
-+               stmt( IRStmt_Exit(
-+                         binop(Iop_CmpNE64, mkexpr(old), mkexpr(expd)),
-+                         Ijk_Boring, nia, OFFB_PC
-+               ));
-+            } else {
-+               // 128 bit (2x64 bit) path
-+               // Fail if the data in memory doesn't match the data stashed by
-+               // the LL.
-+               IRTemp llsc_data_lo64 = newTemp(Ity_I64);
-+               assign(llsc_data_lo64,
-+                      IRExpr_Get(OFFB_LLSC_DATA_LO64, Ity_I64));
-+               IRTemp llsc_data_hi64 = newTemp(Ity_I64);
-+               assign(llsc_data_hi64,
-+                      IRExpr_Get(OFFB_LLSC_DATA_HI64, Ity_I64));
-+               IRTemp data_at_ea = newTemp(Ity_I128);
-+               assign(data_at_ea,
-+                      unop(Iop_ReinterpV128asI128,
-+                           loadLE(Ity_V128, mkexpr(ea))));
-+               stmt( IRStmt_Exit(
-+                        binop(Iop_CmpNE64,
-+                              unop(Iop_128to64, mkexpr(data_at_ea)),
-+                              mkexpr(llsc_data_lo64)),
-+                        Ijk_Boring, nia, OFFB_PC
-+               ));
-+               stmt( IRStmt_Exit(
-+                        binop(Iop_CmpNE64,
-+                              unop(Iop_128HIto64, mkexpr(data_at_ea)),
-+                              mkexpr(llsc_data_hi64)),
-+                        Ijk_Boring, nia, OFFB_PC
-+               ));
-+               // Try to CAS the new value in.
-+               IRTemp old_lo64 = newTemp(Ity_I64);
-+               IRTemp old_hi64 = newTemp(Ity_I64);
-+               IRTemp expd_lo64 = newTemp(Ity_I64);
-+               IRTemp expd_hi64 = newTemp(Ity_I64);
-+               IRTemp store_data_lo64 = newTemp(Ity_I64);
-+               IRTemp store_data_hi64 = newTemp(Ity_I64);
-+               assign(expd_lo64, mkexpr(llsc_data_lo64));
-+               assign(expd_hi64, mkexpr(llsc_data_hi64));
-+               assign(store_data_lo64, unop(Iop_128to64, mkexpr(store_data)));
-+               assign(store_data_hi64, unop(Iop_128HIto64, mkexpr(store_data)));
-+               stmt( IRStmt_CAS(mkIRCAS(old_hi64, old_lo64,
-+                                        Iend_LE, mkexpr(ea),
-+                                        mkexpr(expd_hi64), mkexpr(expd_lo64),
-+                                        mkexpr(store_data_hi64),
-+                                        mkexpr(store_data_lo64)
-+               )));
-+               // Fail if the CAS failed (viz, old != expd)
-+               stmt( IRStmt_Exit(
-+                        binop(Iop_CmpNE64, mkexpr(old_lo64), mkexpr(expd_lo64)),
-+                        Ijk_Boring, nia, OFFB_PC
-+               ));
-+               stmt( IRStmt_Exit(
-+                        binop(Iop_CmpNE64, mkexpr(old_hi64), mkexpr(expd_hi64)),
-+                        Ijk_Boring, nia, OFFB_PC
-+               ));
-+            }
-+            // Otherwise we succeeded (!)
-+            putIReg64orZR(ss, mkU64(0));
-+         } else {
-+            // Non-fallback implementation of SC.
-+            IRTemp  res     = newTemp(Ity_I1);
-+            IRExpr* dataLO  = narrowFrom64(elemTy, getIReg64orZR(tt1));
-+            IRExpr* dataHI  = narrowFrom64(elemTy, getIReg64orZR(tt2));
-+            IROp    opMerge = fullTy == Ity_I128 ? Iop_64HLto128 : Iop_32HLto64;
-+            IRExpr* data    = binop(opMerge, dataHI, dataLO);
-+            // Assuming a little-endian guest here.  Rt1 goes at the lower
-+            // address, so it must live in the least significant half of `data`.
-+            stmt(IRStmt_LLSC(Iend_LE, res, mkexpr(ea), data));
-+            /* IR semantics: res is 1 if store succeeds, 0 if it fails.
-+               Need to set rS to 1 on failure, 0 on success. */
-+            putIReg64orZR(ss, binop(Iop_Xor64, unop(Iop_1Uto64, mkexpr(res)),
-+                                               mkU64(1)));
-+         }
-+         DIP("st%sxp %s, %s, %s, [%s] %s\n",
-+             isAcqOrRel ? (isLD ? "a" : "l") : "",
-+             nameIRegOrZR(False, ss),
-+             nameIRegOrZR(elemSzB == 8, tt1),
-+             nameIRegOrZR(elemSzB == 8, tt2),
-+             nameIReg64orSP(nn),
-+             abiinfo->guest__use_fallback_LLSC
-+                ? "(fallback implementation)" : "");
-+         return True;
-+      }
-+      /* else fall through */
-+   }
-+
-    /* ------------------ LDA{R,RH,RB} ------------------ */
-    /* ------------------ STL{R,RH,RB} ------------------ */
-    /* 31 29     23  20      14    9 4
-diff --git a/VEX/priv/host_arm64_defs.c b/VEX/priv/host_arm64_defs.c
-index 5657bcab9..b65e27db4 100644
---- a/VEX/priv/host_arm64_defs.c
-+++ b/VEX/priv/host_arm64_defs.c
-@@ -1059,6 +1059,16 @@ ARM64Instr* ARM64Instr_StrEX ( Int szB ) {
-    vassert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
-    return i;
- }
-+ARM64Instr* ARM64Instr_LdrEXP ( void ) {
-+   ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
-+   i->tag        = ARM64in_LdrEXP;
-+   return i;
-+}
-+ARM64Instr* ARM64Instr_StrEXP ( void ) {
-+   ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
-+   i->tag        = ARM64in_StrEXP;
-+   return i;
-+}
- ARM64Instr* ARM64Instr_CAS ( Int szB ) {
-    ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
-    i->tag             = ARM64in_CAS;
-@@ -1699,12 +1709,19 @@ void ppARM64Instr ( const ARM64Instr* i ) {
-                     sz, i->ARM64in.StrEX.szB == 8 ? 'x' : 'w');
-          return;
-       }
-+      case ARM64in_LdrEXP:
-+         vex_printf("ldxp   x2, x3, [x4]");
-+         return;
-+      case ARM64in_StrEXP:
-+         vex_printf("stxp   w0, x2, x3, [x4]");
-+         return;
-       case ARM64in_CAS: {
-          vex_printf("x1 = cas(%dbit)(x3, x5 -> x7)", 8 * i->ARM64in.CAS.szB);
-          return;
-       }
-       case ARM64in_CASP: {
--         vex_printf("x0,x1 = casp(%dbit)(x2, x4,x5 -> x6,x7)", 8 * i->ARM64in.CASP.szB);
-+         vex_printf("x0,x1 = casp(2x%dbit)(x2, x4,x5 -> x6,x7)",
-+                    8 * i->ARM64in.CASP.szB);
-          return;
-       }
-       case ARM64in_MFence:
-@@ -2253,6 +2270,17 @@ void getRegUsage_ARM64Instr ( HRegUsage* u, const ARM64Instr* i, Bool mode64 )
-          addHRegUse(u, HRmWrite, hregARM64_X0());
-          addHRegUse(u, HRmRead, hregARM64_X2());
-          return;
-+      case ARM64in_LdrEXP:
-+         addHRegUse(u, HRmRead, hregARM64_X4());
-+         addHRegUse(u, HRmWrite, hregARM64_X2());
-+         addHRegUse(u, HRmWrite, hregARM64_X3());
-+         return;
-+      case ARM64in_StrEXP:
-+         addHRegUse(u, HRmRead, hregARM64_X4());
-+         addHRegUse(u, HRmWrite, hregARM64_X0());
-+         addHRegUse(u, HRmRead, hregARM64_X2());
-+         addHRegUse(u, HRmRead, hregARM64_X3());
-+         return;
-       case ARM64in_CAS:
-          addHRegUse(u, HRmRead, hregARM64_X3());
-          addHRegUse(u, HRmRead, hregARM64_X5());
-@@ -2571,6 +2599,10 @@ void mapRegs_ARM64Instr ( HRegRemap* m, ARM64Instr* i, Bool mode64 )
-          return;
-       case ARM64in_StrEX:
-          return;
-+      case ARM64in_LdrEXP:
-+         return;
-+      case ARM64in_StrEXP:
-+         return;
-       case ARM64in_CAS:
-          return;
-       case ARM64in_CASP:
-@@ -4167,6 +4199,16 @@ Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc,
-          }
-          goto bad;
-       }
-+      case ARM64in_LdrEXP: {
-+         // 820C7FC8   ldxp x2, x3, [x4]
-+         *p++ = 0xC87F0C82;
-+         goto done;
-+      }
-+      case ARM64in_StrEXP: {
-+         // 820C20C8   stxp w0, x2, x3, [x4]
-+         *p++ = 0xC8200C82;
-+         goto done;
-+      }
-       case ARM64in_CAS: {
-          /* This isn't simple.  For an explanation see the comment in
-             host_arm64_defs.h on the definition of ARM64Instr case CAS.
-diff --git a/VEX/priv/host_arm64_defs.h b/VEX/priv/host_arm64_defs.h
-index 01fb5708e..dc686dff7 100644
---- a/VEX/priv/host_arm64_defs.h
-+++ b/VEX/priv/host_arm64_defs.h
-@@ -509,8 +509,10 @@ typedef
-       ARM64in_AddToSP,     /* move SP by small, signed constant */
-       ARM64in_FromSP,      /* move SP to integer register */
-       ARM64in_Mul,
--      ARM64in_LdrEX,
--      ARM64in_StrEX,
-+      ARM64in_LdrEX,       /* load exclusive, single register */
-+      ARM64in_StrEX,       /* store exclusive, single register */
-+      ARM64in_LdrEXP,      /* load exclusive, register pair, 2x64-bit only */
-+      ARM64in_StrEXP,      /* store exclusive, register pair, 2x64-bit only */
-       ARM64in_CAS,
-       ARM64in_CASP,
-       ARM64in_MFence,
-@@ -719,6 +721,12 @@ typedef
-          struct {
-             Int  szB; /* 1, 2, 4 or 8 */
-          } StrEX;
-+         /* LDXP x2, x3, [x4].  This is 2x64-bit only. */
-+         struct {
-+         } LdrEXP;
-+         /* STXP w0, x2, x3, [x4].  This is 2x64-bit only. */
-+         struct {
-+         } StrEXP;
-          /* x1 = CAS(x3(addr), x5(expected) -> x7(new)),
-             and trashes x8
-             where x1[8*szB-1 : 0] == x5[8*szB-1 : 0] indicates success,
-@@ -1037,6 +1045,8 @@ extern ARM64Instr* ARM64Instr_Mul     ( HReg dst, HReg argL, HReg argR,
-                                         ARM64MulOp op );
- extern ARM64Instr* ARM64Instr_LdrEX   ( Int szB );
- extern ARM64Instr* ARM64Instr_StrEX   ( Int szB );
-+extern ARM64Instr* ARM64Instr_LdrEXP  ( void );
-+extern ARM64Instr* ARM64Instr_StrEXP  ( void );
- extern ARM64Instr* ARM64Instr_CAS     ( Int szB );
- extern ARM64Instr* ARM64Instr_CASP    ( Int szB );
- extern ARM64Instr* ARM64Instr_MFence  ( void );
-diff --git a/VEX/priv/host_arm64_isel.c b/VEX/priv/host_arm64_isel.c
-index 4b1d8c846..094e7e74b 100644
---- a/VEX/priv/host_arm64_isel.c
-+++ b/VEX/priv/host_arm64_isel.c
-@@ -196,9 +196,9 @@ static HReg        iselCondCode_R        ( ISelEnv* env, IRExpr* e );
- static HReg        iselIntExpr_R_wrk     ( ISelEnv* env, IRExpr* e );
- static HReg        iselIntExpr_R         ( ISelEnv* env, IRExpr* e );
- 
--static void        iselInt128Expr_wrk    ( /*OUT*/HReg* rHi, HReg* rLo, 
-+static void        iselInt128Expr_wrk    ( /*OUT*/HReg* rHi, /*OUT*/HReg* rLo,
-                                            ISelEnv* env, IRExpr* e );
--static void        iselInt128Expr        ( /*OUT*/HReg* rHi, HReg* rLo, 
-+static void        iselInt128Expr        ( /*OUT*/HReg* rHi, /*OUT*/HReg* rLo,
-                                            ISelEnv* env, IRExpr* e );
- 
- static HReg        iselDblExpr_wrk        ( ISelEnv* env, IRExpr* e );
-@@ -1759,9 +1759,12 @@ static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e )
- 
-       /* AND/OR/XOR(e1, e2) (for any e1, e2) */
-       switch (e->Iex.Binop.op) {
--         case Iop_And64: case Iop_And32: lop = ARM64lo_AND; goto log_binop;
--         case Iop_Or64:  case Iop_Or32:  case Iop_Or16: lop = ARM64lo_OR;  goto log_binop;
--         case Iop_Xor64: case Iop_Xor32: lop = ARM64lo_XOR; goto log_binop;
-+         case Iop_And64: case Iop_And32:
-+            lop = ARM64lo_AND; goto log_binop;
-+         case Iop_Or64:  case Iop_Or32:  case Iop_Or16:
-+            lop = ARM64lo_OR;  goto log_binop;
-+         case Iop_Xor64: case Iop_Xor32:
-+            lop = ARM64lo_XOR; goto log_binop;
-          log_binop: {
-             HReg      dst  = newVRegI(env);
-             HReg      argL = iselIntExpr_R(env, e->Iex.Binop.arg1);
-@@ -2013,6 +2016,11 @@ static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e )
-             iselInt128Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
-             return rHi; /* and abandon rLo */
-          }
-+         case Iop_128to64: {
-+            HReg rHi, rLo;
-+            iselInt128Expr(&rHi,&rLo, env, e->Iex.Unop.arg);
-+            return rLo; /* and abandon rHi */
-+         }
-          case Iop_8Sto32: case Iop_8Sto64: {
-             IRExpr* arg = e->Iex.Unop.arg;
-             HReg    src = iselIntExpr_R(env, arg);
-@@ -2185,13 +2193,19 @@ static HReg iselIntExpr_R_wrk ( ISelEnv* env, IRExpr* e )
-             }
-             return dst;
-          }
-+         case Iop_64HIto32: {
-+            HReg dst = newVRegI(env);
-+            HReg src = iselIntExpr_R(env, e->Iex.Unop.arg);
-+            addInstr(env, ARM64Instr_Shift(dst, src, ARM64RI6_I6(32),
-+                                           ARM64sh_SHR));
-+            return dst;
-+         }
-          case Iop_64to32:
-          case Iop_64to16:
-          case Iop_64to8:
-          case Iop_32to16:
-             /* These are no-ops. */
-             return iselIntExpr_R(env, e->Iex.Unop.arg);
--
-          default:
-             break;
-       }
-@@ -2335,6 +2349,43 @@ static void iselInt128Expr_wrk ( HReg* rHi, HReg* rLo,
-    vassert(e);
-    vassert(typeOfIRExpr(env->type_env,e) == Ity_I128);
- 
-+   /* --------- TEMP --------- */
-+   if (e->tag == Iex_RdTmp) {
-+      lookupIRTempPair(rHi, rLo, env, e->Iex.RdTmp.tmp);
-+      return;
-+   }
-+
-+   /* --------- CONST --------- */
-+   if (e->tag == Iex_Const) {
-+      IRConst* c = e->Iex.Const.con;
-+      vassert(c->tag == Ico_U128);
-+      if (c->Ico.U128 == 0) {
-+         // The only case we need to handle (so far)
-+         HReg zero = newVRegI(env);
-+         addInstr(env, ARM64Instr_Imm64(zero, 0));
-+         *rHi = *rLo = zero;
-+         return;
-+      }
-+   }
-+
-+   /* --------- UNARY ops --------- */
-+   if (e->tag == Iex_Unop) {
-+      switch (e->Iex.Unop.op) {
-+         case Iop_ReinterpV128asI128: {
-+            HReg dstHi = newVRegI(env);
-+            HReg dstLo = newVRegI(env);
-+            HReg src    = iselV128Expr(env, e->Iex.Unop.arg);
-+            addInstr(env, ARM64Instr_VXfromQ(dstHi, src, 1));
-+            addInstr(env, ARM64Instr_VXfromQ(dstLo, src, 0));
-+            *rHi = dstHi;
-+            *rLo = dstLo;
-+            return;
-+         }
-+         default:
-+            break;
-+      }
-+   }
-+
-    /* --------- BINARY ops --------- */
-    if (e->tag == Iex_Binop) {
-       switch (e->Iex.Binop.op) {
-@@ -4086,6 +4137,14 @@ static void iselStmt ( ISelEnv* env, IRStmt* stmt )
-          addInstr(env, ARM64Instr_VMov(8/*yes, really*/, dst, src));
-          return;
-       }
-+      if (ty == Ity_I128) {
-+         HReg rHi, rLo, dstHi, dstLo;
-+         iselInt128Expr(&rHi,&rLo, env, stmt->Ist.WrTmp.data);
-+         lookupIRTempPair( &dstHi, &dstLo, env, tmp);
-+         addInstr(env, ARM64Instr_MovI(dstHi, rHi));
-+         addInstr(env, ARM64Instr_MovI(dstLo, rLo));
-+         return;
-+      }
-       if (ty == Ity_V128) {
-          HReg src = iselV128Expr(env, stmt->Ist.WrTmp.data);
-          HReg dst = lookupIRTemp(env, tmp);
-@@ -4183,42 +4242,67 @@ static void iselStmt ( ISelEnv* env, IRStmt* stmt )
-          /* LL */
-          IRTemp res = stmt->Ist.LLSC.result;
-          IRType ty  = typeOfIRTemp(env->type_env, res);
--         if (ty == Ity_I64 || ty == Ity_I32 
-+         if (ty == Ity_I128 || ty == Ity_I64 || ty == Ity_I32
-              || ty == Ity_I16 || ty == Ity_I8) {
-             Int  szB   = 0;
--            HReg r_dst = lookupIRTemp(env, res);
-             HReg raddr = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
-             switch (ty) {
--               case Ity_I8:  szB = 1; break;
--               case Ity_I16: szB = 2; break;
--               case Ity_I32: szB = 4; break;
--               case Ity_I64: szB = 8; break;
--               default:      vassert(0);
-+               case Ity_I8:   szB = 1;  break;
-+               case Ity_I16:  szB = 2;  break;
-+               case Ity_I32:  szB = 4;  break;
-+               case Ity_I64:  szB = 8;  break;
-+               case Ity_I128: szB = 16; break;
-+               default:       vassert(0);
-+            }
-+            if (szB == 16) {
-+               HReg r_dstMSword = INVALID_HREG;
-+               HReg r_dstLSword = INVALID_HREG;
-+               lookupIRTempPair(&r_dstMSword, &r_dstLSword, env, res);
-+               addInstr(env, ARM64Instr_MovI(hregARM64_X4(), raddr));
-+               addInstr(env, ARM64Instr_LdrEXP());
-+               addInstr(env, ARM64Instr_MovI(r_dstLSword, hregARM64_X2()));
-+               addInstr(env, ARM64Instr_MovI(r_dstMSword, hregARM64_X3()));
-+            } else {
-+               vassert(szB != 0);
-+               HReg r_dst = lookupIRTemp(env, res);
-+               addInstr(env, ARM64Instr_MovI(hregARM64_X4(), raddr));
-+               addInstr(env, ARM64Instr_LdrEX(szB));
-+               addInstr(env, ARM64Instr_MovI(r_dst, hregARM64_X2()));
-             }
--            addInstr(env, ARM64Instr_MovI(hregARM64_X4(), raddr));
--            addInstr(env, ARM64Instr_LdrEX(szB));
--            addInstr(env, ARM64Instr_MovI(r_dst, hregARM64_X2()));
-             return;
-          }
-          goto stmt_fail;
-       } else {
-          /* SC */
-          IRType tyd = typeOfIRExpr(env->type_env, stmt->Ist.LLSC.storedata);
--         if (tyd == Ity_I64 || tyd == Ity_I32
-+         if (tyd == Ity_I128 || tyd == Ity_I64 || tyd == Ity_I32
-              || tyd == Ity_I16 || tyd == Ity_I8) {
-             Int  szB = 0;
--            HReg rD  = iselIntExpr_R(env, stmt->Ist.LLSC.storedata);
-             HReg rA  = iselIntExpr_R(env, stmt->Ist.LLSC.addr);
-             switch (tyd) {
--               case Ity_I8:  szB = 1; break;
--               case Ity_I16: szB = 2; break;
--               case Ity_I32: szB = 4; break;
--               case Ity_I64: szB = 8; break;
--               default:      vassert(0);
-+               case Ity_I8:   szB = 1; break;
-+               case Ity_I16:  szB = 2; break;
-+               case Ity_I32:  szB = 4; break;
-+               case Ity_I64:  szB = 8; break;
-+               case Ity_I128: szB = 16; break;
-+               default:       vassert(0);
-+            }
-+            if (szB == 16) {
-+               HReg rD_MSword = INVALID_HREG;
-+               HReg rD_LSword = INVALID_HREG;
-+               iselInt128Expr(&rD_MSword,
-+                              &rD_LSword, env, stmt->Ist.LLSC.storedata);
-+               addInstr(env, ARM64Instr_MovI(hregARM64_X2(), rD_LSword));
-+               addInstr(env, ARM64Instr_MovI(hregARM64_X3(), rD_MSword));
-+               addInstr(env, ARM64Instr_MovI(hregARM64_X4(), rA));
-+               addInstr(env, ARM64Instr_StrEXP());
-+            } else {
-+               vassert(szB != 0);
-+               HReg rD  = iselIntExpr_R(env, stmt->Ist.LLSC.storedata);
-+               addInstr(env, ARM64Instr_MovI(hregARM64_X2(), rD));
-+               addInstr(env, ARM64Instr_MovI(hregARM64_X4(), rA));
-+               addInstr(env, ARM64Instr_StrEX(szB));
-             }
--            addInstr(env, ARM64Instr_MovI(hregARM64_X2(), rD));
--            addInstr(env, ARM64Instr_MovI(hregARM64_X4(), rA));
--            addInstr(env, ARM64Instr_StrEX(szB));
-          } else {
-             goto stmt_fail;
-          }
-@@ -4243,10 +4327,10 @@ static void iselStmt ( ISelEnv* env, IRStmt* stmt )
- 
-    /* --------- ACAS --------- */
-    case Ist_CAS: {
--      if (stmt->Ist.CAS.details->oldHi == IRTemp_INVALID) {
-+      IRCAS* cas = stmt->Ist.CAS.details;
-+      if (cas->oldHi == IRTemp_INVALID && cas->end == Iend_LE) {
-          /* "normal" singleton CAS */
-          UChar  sz;
--         IRCAS* cas = stmt->Ist.CAS.details;
-          IRType ty  = typeOfIRExpr(env->type_env, cas->dataLo);
-          switch (ty) { 
-             case Ity_I64: sz = 8; break;
-@@ -4281,10 +4365,9 @@ static void iselStmt ( ISelEnv* env, IRStmt* stmt )
-          addInstr(env, ARM64Instr_MovI(rOld, rResult));
-          return;
-       }
--      else {
-+      if (cas->oldHi != IRTemp_INVALID && cas->end == Iend_LE) {
-          /* Paired register CAS, i.e. CASP */
-          UChar  sz;
--         IRCAS* cas = stmt->Ist.CAS.details;
-          IRType ty  = typeOfIRExpr(env->type_env, cas->dataLo);
-          switch (ty) {
-             case Ity_I64: sz = 8; break;
-diff --git a/VEX/priv/ir_defs.c b/VEX/priv/ir_defs.c
-index 25566c41c..2d82c41a1 100644
---- a/VEX/priv/ir_defs.c
-+++ b/VEX/priv/ir_defs.c
-@@ -76,6 +76,7 @@ void ppIRConst ( const IRConst* con )
-       case Ico_U16:  vex_printf( "0x%x:I16",     (UInt)(con->Ico.U16)); break;
-       case Ico_U32:  vex_printf( "0x%x:I32",     (UInt)(con->Ico.U32)); break;
-       case Ico_U64:  vex_printf( "0x%llx:I64",   (ULong)(con->Ico.U64)); break;
-+      case Ico_U128: vex_printf( "I128{0x%04x}", (UInt)(con->Ico.U128)); break;
-       case Ico_F32:  u.f32 = con->Ico.F32;
-                      vex_printf( "F32{0x%x}",   u.i32);
-                      break;
-@@ -2266,6 +2267,13 @@ IRConst* IRConst_U64 ( ULong u64 )
-    c->Ico.U64 = u64;
-    return c;
- }
-+IRConst* IRConst_U128 ( UShort con )
-+{
-+   IRConst* c  = LibVEX_Alloc_inline(sizeof(IRConst));
-+   c->tag      = Ico_U128;
-+   c->Ico.U128 = con;
-+   return c;
-+}
- IRConst* IRConst_F32 ( Float f32 )
- {
-    IRConst* c = LibVEX_Alloc_inline(sizeof(IRConst));
-@@ -4230,6 +4238,7 @@ IRType typeOfIRConst ( const IRConst* con )
-       case Ico_U16:   return Ity_I16;
-       case Ico_U32:   return Ity_I32;
-       case Ico_U64:   return Ity_I64;
-+      case Ico_U128:  return Ity_I128;
-       case Ico_F32:   return Ity_F32;
-       case Ico_F32i:  return Ity_F32;
-       case Ico_F64:   return Ity_F64;
-@@ -5129,7 +5138,7 @@ void tcStmt ( const IRSB* bb, const IRStmt* stmt, IRType gWordTy )
-          tyRes = typeOfIRTemp(tyenv, stmt->Ist.LLSC.result);
-          if (stmt->Ist.LLSC.storedata == NULL) {
-             /* it's a LL */
--            if (tyRes != Ity_I64 && tyRes != Ity_I32
-+            if (tyRes != Ity_I128 && tyRes != Ity_I64 && tyRes != Ity_I32
-                 && tyRes != Ity_I16 && tyRes != Ity_I8)
-                sanityCheckFail(bb,stmt,"Ist.LLSC(LL).result :: bogus");
-          } else {
-@@ -5137,7 +5146,7 @@ void tcStmt ( const IRSB* bb, const IRStmt* stmt, IRType gWordTy )
-             if (tyRes != Ity_I1)
-                sanityCheckFail(bb,stmt,"Ist.LLSC(SC).result: not :: Ity_I1");
-             tyData = typeOfIRExpr(tyenv, stmt->Ist.LLSC.storedata);
--            if (tyData != Ity_I64 && tyData != Ity_I32
-+            if (tyData != Ity_I128 && tyData != Ity_I64 && tyData != Ity_I32
-                 && tyData != Ity_I16 && tyData != Ity_I8)
-                sanityCheckFail(bb,stmt,
-                                "Ist.LLSC(SC).result :: storedata bogus");
-@@ -5385,6 +5394,7 @@ Int sizeofIRType ( IRType ty )
- IRType integerIRTypeOfSize ( Int szB )
- {
-    switch (szB) {
-+      case 16: return Ity_I128;
-       case 8: return Ity_I64;
-       case 4: return Ity_I32;
-       case 2: return Ity_I16;
-diff --git a/VEX/pub/libvex_guest_arm64.h b/VEX/pub/libvex_guest_arm64.h
-index 39b6ecdc2..91d06bd75 100644
---- a/VEX/pub/libvex_guest_arm64.h
-+++ b/VEX/pub/libvex_guest_arm64.h
-@@ -157,14 +157,18 @@ typedef
-          note of bits 23 and 22. */
-       UInt  guest_FPCR;
- 
--      /* Fallback LL/SC support.  See bugs 344524 and 369459. */
--      ULong guest_LLSC_SIZE; // 0==no current transaction, else 1,2,4 or 8.
-+      /* Fallback LL/SC support.  See bugs 344524 and 369459.  _LO64 and _HI64
-+         contain the original contents of _ADDR+0 .. _ADDR+15, but only _SIZE
-+         number of bytes of it.  The remaining 16-_SIZE bytes of them must be
-+         zero. */
-+      ULong guest_LLSC_SIZE; // 0==no current transaction, else 1,2,4,8 or 16.
-       ULong guest_LLSC_ADDR; // Address of transaction.
--      ULong guest_LLSC_DATA; // Original value at _ADDR, zero-extended.
-+      ULong guest_LLSC_DATA_LO64; // Original value at _ADDR+0.
-+      ULong guest_LLSC_DATA_HI64; // Original value at _ADDR+8.
- 
-       /* Padding to make it have an 16-aligned size */
-       /* UInt  pad_end_0; */
--      ULong pad_end_1;
-+      /* ULong pad_end_1; */
-    }
-    VexGuestARM64State;
- 
-diff --git a/VEX/pub/libvex_ir.h b/VEX/pub/libvex_ir.h
-index deaa044c1..85805bb69 100644
---- a/VEX/pub/libvex_ir.h
-+++ b/VEX/pub/libvex_ir.h
-@@ -269,6 +269,8 @@ typedef
-       Ico_U16, 
-       Ico_U32, 
-       Ico_U64,
-+      Ico_U128,  /* 128-bit restricted integer constant,
-+                    same encoding scheme as V128 */
-       Ico_F32,   /* 32-bit IEEE754 floating */
-       Ico_F32i,  /* 32-bit unsigned int to be interpreted literally
-                     as a IEEE754 single value. */
-@@ -295,6 +297,7 @@ typedef
-          UShort U16;
-          UInt   U32;
-          ULong  U64;
-+         UShort U128;
-          Float  F32;
-          UInt   F32i;
-          Double F64;
-@@ -311,6 +314,7 @@ extern IRConst* IRConst_U8   ( UChar );
- extern IRConst* IRConst_U16  ( UShort );
- extern IRConst* IRConst_U32  ( UInt );
- extern IRConst* IRConst_U64  ( ULong );
-+extern IRConst* IRConst_U128 ( UShort );
- extern IRConst* IRConst_F32  ( Float );
- extern IRConst* IRConst_F32i ( UInt );
- extern IRConst* IRConst_F64  ( Double );
-diff --git a/memcheck/mc_machine.c b/memcheck/mc_machine.c
-index 919c7fae8..176c8e5cb 100644
---- a/memcheck/mc_machine.c
-+++ b/memcheck/mc_machine.c
-@@ -1115,9 +1115,10 @@ static Int get_otrack_shadow_offset_wrk ( Int offset, Int szB )
-    if (o == GOF(CMSTART) && sz == 8) return -1; // untracked
-    if (o == GOF(CMLEN)   && sz == 8) return -1; // untracked
- 
--   if (o == GOF(LLSC_SIZE) && sz == 8) return -1; // untracked
--   if (o == GOF(LLSC_ADDR) && sz == 8) return o;
--   if (o == GOF(LLSC_DATA) && sz == 8) return o;
-+   if (o == GOF(LLSC_SIZE)      && sz == 8) return -1; // untracked
-+   if (o == GOF(LLSC_ADDR)      && sz == 8) return o;
-+   if (o == GOF(LLSC_DATA_LO64) && sz == 8) return o;
-+   if (o == GOF(LLSC_DATA_HI64) && sz == 8) return o;
- 
-    VG_(printf)("MC_(get_otrack_shadow_offset)(arm64)(off=%d,sz=%d)\n",
-                offset,szB);
-diff --git a/memcheck/mc_translate.c b/memcheck/mc_translate.c
-index c6fd2653f..72ccb3c8c 100644
---- a/memcheck/mc_translate.c
-+++ b/memcheck/mc_translate.c
-@@ -5497,8 +5497,11 @@ IRAtom* expr2vbits_Load_WRK ( MCEnv* mce,
-       the address (shadow) to 'defined' following the test. */
-    complainIfUndefined( mce, addr, guard );
- 
--   /* Now cook up a call to the relevant helper function, to read the
--      data V bits from shadow memory. */
-+   /* Now cook up a call to the relevant helper function, to read the data V
-+      bits from shadow memory.  Note that I128 loads are done by pretending
-+      we're doing a V128 load, and then converting the resulting V128 vbits
-+      word to an I128, right at the end of this function -- see `castedToI128`
-+      below.  (It's only a minor hack :-) This pertains to bug 444399. */
-    ty = shadowTypeV(ty);
- 
-    void*        helper           = NULL;
-@@ -5511,6 +5514,7 @@ IRAtom* expr2vbits_Load_WRK ( MCEnv* mce,
-                         hname = "MC_(helperc_LOADV256le)";
-                         ret_via_outparam = True;
-                         break;
-+         case Ity_I128: // fallthrough.  See comment above.
-          case Ity_V128: helper = &MC_(helperc_LOADV128le);
-                         hname = "MC_(helperc_LOADV128le)";
-                         ret_via_outparam = True;
-@@ -5576,7 +5580,7 @@ IRAtom* expr2vbits_Load_WRK ( MCEnv* mce,
- 
-    /* We need to have a place to park the V bits we're just about to
-       read. */
--   IRTemp datavbits = newTemp(mce, ty, VSh);
-+   IRTemp datavbits = newTemp(mce, ty == Ity_I128 ? Ity_V128 : ty, VSh);
- 
-    /* Here's the call. */
-    IRDirty* di;
-@@ -5603,7 +5607,14 @@ IRAtom* expr2vbits_Load_WRK ( MCEnv* mce,
-    }
-    stmt( 'V', mce, IRStmt_Dirty(di) );
- 
--   return mkexpr(datavbits);
-+   if (ty == Ity_I128) {
-+      IRAtom* castedToI128
-+         = assignNew('V', mce, Ity_I128,
-+                     unop(Iop_ReinterpV128asI128, mkexpr(datavbits)));
-+      return castedToI128;
-+   } else {
-+      return mkexpr(datavbits);
-+   }
- }
- 
- 
-@@ -5631,6 +5642,7 @@ IRAtom* expr2vbits_Load ( MCEnv* mce,
-       case Ity_I16:
-       case Ity_I32:
-       case Ity_I64:
-+      case Ity_I128:
-       case Ity_V128:
-       case Ity_V256:
-          return expr2vbits_Load_WRK(mce, end, ty, addr, bias, guard);
-@@ -5928,6 +5940,7 @@ void do_shadow_Store ( MCEnv* mce,
-                         c = IRConst_V256(V_BITS32_DEFINED); break;
-          case Ity_V128: // V128 weirdness -- used twice
-                         c = IRConst_V128(V_BITS16_DEFINED); break;
-+         case Ity_I128: c = IRConst_U128(V_BITS16_DEFINED); break;
-          case Ity_I64:  c = IRConst_U64 (V_BITS64_DEFINED); break;
-          case Ity_I32:  c = IRConst_U32 (V_BITS32_DEFINED); break;
-          case Ity_I16:  c = IRConst_U16 (V_BITS16_DEFINED); break;
-@@ -5948,6 +5961,7 @@ void do_shadow_Store ( MCEnv* mce,
-       switch (ty) {
-          case Ity_V256: /* we'll use the helper four times */
-          case Ity_V128: /* we'll use the helper twice */
-+         case Ity_I128: /* we'll use the helper twice */
-          case Ity_I64: helper = &MC_(helperc_STOREV64le);
-                        hname = "MC_(helperc_STOREV64le)";
-                        break;
-@@ -6051,9 +6065,9 @@ void do_shadow_Store ( MCEnv* mce,
-       stmt( 'V', mce, IRStmt_Dirty(diQ3) );
- 
-    } 
--   else if (UNLIKELY(ty == Ity_V128)) {
-+   else if (UNLIKELY(ty == Ity_V128 || ty == Ity_I128)) {
- 
--      /* V128-bit case */
-+      /* V128/I128-bit case */
-       /* See comment in next clause re 64-bit regparms */
-       /* also, need to be careful about endianness */
- 
-@@ -6062,6 +6076,7 @@ void do_shadow_Store ( MCEnv* mce,
-       IRAtom  *addrLo64, *addrHi64;
-       IRAtom  *vdataLo64, *vdataHi64;
-       IRAtom  *eBiasLo64, *eBiasHi64;
-+      IROp    opGetLO64,  opGetHI64;
- 
-       if (end == Iend_LE) {
-          offLo64 = 0;
-@@ -6071,9 +6086,17 @@ void do_shadow_Store ( MCEnv* mce,
-          offHi64 = 0;
-       }
- 
-+      if (ty == Ity_V128) {
-+         opGetLO64 = Iop_V128to64;
-+         opGetHI64 = Iop_V128HIto64;
-+      } else {
-+         opGetLO64 = Iop_128to64;
-+         opGetHI64 = Iop_128HIto64;
-+      }
-+
-       eBiasLo64 = tyAddr==Ity_I32 ? mkU32(bias+offLo64) : mkU64(bias+offLo64);
-       addrLo64  = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasLo64) );
--      vdataLo64 = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, vdata));
-+      vdataLo64 = assignNew('V', mce, Ity_I64, unop(opGetLO64, vdata));
-       diLo64    = unsafeIRDirty_0_N( 
-                      1/*regparms*/, 
-                      hname, VG_(fnptr_to_fnentry)( helper ), 
-@@ -6081,7 +6104,7 @@ void do_shadow_Store ( MCEnv* mce,
-                   );
-       eBiasHi64 = tyAddr==Ity_I32 ? mkU32(bias+offHi64) : mkU64(bias+offHi64);
-       addrHi64  = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasHi64) );
--      vdataHi64 = assignNew('V', mce, Ity_I64, unop(Iop_V128HIto64, vdata));
-+      vdataHi64 = assignNew('V', mce, Ity_I64, unop(opGetHI64, vdata));
-       diHi64    = unsafeIRDirty_0_N( 
-                      1/*regparms*/, 
-                      hname, VG_(fnptr_to_fnentry)( helper ), 
-@@ -6888,7 +6911,7 @@ static void do_shadow_LLSC ( MCEnv*    mce,
-       /* Just treat this as a normal load, followed by an assignment of
-          the value to .result. */
-       /* Stay sane */
--      tl_assert(resTy == Ity_I64 || resTy == Ity_I32
-+      tl_assert(resTy == Ity_I128 || resTy == Ity_I64 || resTy == Ity_I32
-                 || resTy == Ity_I16 || resTy == Ity_I8);
-       assign( 'V', mce, resTmp,
-                    expr2vbits_Load(
-@@ -6899,7 +6922,7 @@ static void do_shadow_LLSC ( MCEnv*    mce,
-       /* Stay sane */
-       IRType dataTy = typeOfIRExpr(mce->sb->tyenv,
-                                    stStoredata);
--      tl_assert(dataTy == Ity_I64 || dataTy == Ity_I32
-+      tl_assert(dataTy == Ity_I128 || dataTy == Ity_I64 || dataTy == Ity_I32
-                 || dataTy == Ity_I16 || dataTy == Ity_I8);
-       do_shadow_Store( mce, stEnd,
-                             stAddr, 0/* addr bias */,
-@@ -7684,7 +7707,7 @@ static void schemeS ( MCEnv* mce, IRStmt* st )
-                = typeOfIRTemp(mce->sb->tyenv, st->Ist.LLSC.result);
-             IRExpr* vanillaLoad
-                = IRExpr_Load(st->Ist.LLSC.end, resTy, st->Ist.LLSC.addr);
--            tl_assert(resTy == Ity_I64 || resTy == Ity_I32
-+            tl_assert(resTy == Ity_I128 || resTy == Ity_I64 || resTy == Ity_I32
-                       || resTy == Ity_I16 || resTy == Ity_I8);
-             assign( 'B', mce, findShadowTmpB(mce, st->Ist.LLSC.result),
-                               schemeE(mce, vanillaLoad));
-diff --git a/memcheck/tests/Makefile.am b/memcheck/tests/Makefile.am
-index 449710020..2b43ef7d7 100644
---- a/memcheck/tests/Makefile.am
-+++ b/memcheck/tests/Makefile.am
-@@ -90,6 +90,7 @@ EXTRA_DIST = \
- 	addressable.stderr.exp addressable.stdout.exp addressable.vgtest \
- 	atomic_incs.stderr.exp atomic_incs.vgtest \
- 	atomic_incs.stdout.exp-32bit atomic_incs.stdout.exp-64bit \
-+	atomic_incs.stdout.exp-64bit-and-128bit \
- 	badaddrvalue.stderr.exp \
- 	badaddrvalue.stdout.exp badaddrvalue.vgtest \
-         exit_on_first_error.stderr.exp \
-diff --git a/memcheck/tests/atomic_incs.c b/memcheck/tests/atomic_incs.c
-index f931750f4..1c738c530 100644
---- a/memcheck/tests/atomic_incs.c
-+++ b/memcheck/tests/atomic_incs.c
-@@ -22,6 +22,17 @@
- #define NNN 3456987
- 
- #define IS_8_ALIGNED(_ptr)   (0 == (((unsigned long)(_ptr)) & 7))
-+#define IS_16_ALIGNED(_ptr)  (0 == (((unsigned long)(_ptr)) & 15))
-+
-+// U128 from libvex_basictypes.h is a 4-x-UInt array, which is a bit
-+// inconvenient, hence:
-+typedef
-+   struct {
-+      // assuming little-endianness
-+      unsigned long long int lo64;
-+      unsigned long long int hi64;
-+   }
-+   MyU128;
- 
- 
- __attribute__((noinline)) void atomic_add_8bit ( char* p, int n ) 
-@@ -712,6 +723,40 @@ __attribute__((noinline)) void atomic_add_64bit ( long long int* p, int n )
- #endif
- }
- 
-+__attribute__((noinline)) void atomic_add_128bit ( MyU128* p,
-+                                                   unsigned long long int n )
-+{
-+#if defined(VGA_x86) || defined(VGA_ppc32) || defined(VGA_mips32) \
-+    || defined (VGA_nanomips) || defined(VGA_mips64) \
-+    || defined(VGA_amd64) \
-+    || defined(VGA_ppc64be) || defined(VGA_ppc64le) \
-+    || defined(VGA_arm) \
-+    || defined(VGA_s390x)
-+   /* do nothing; is not supported */
-+#elif defined(VGA_arm64)
-+   unsigned long long int block[3]
-+      = { (unsigned long long int)p, (unsigned long long int)n,
-+          0xFFFFFFFFFFFFFFFFULL};
-+   do {
-+      __asm__ __volatile__(
-+         "mov   x5, %0"             "\n\t" // &block[0]
-+         "ldr   x9, [x5, #0]"       "\n\t" // p
-+         "ldr   x10, [x5, #8]"      "\n\t" // n
-+         "ldxp  x7, x8, [x9]"       "\n\t"
-+         "adds  x7, x7, x10"        "\n\t"
-+         "adc   x8, x8, xzr"        "\n\t"
-+         "stxp  w4, x7, x8, [x9]"   "\n\t"
-+         "str   x4, [x5, #16]"      "\n\t"
-+         : /*out*/
-+         : /*in*/ "r"(&block[0])
-+         : /*trash*/ "memory", "cc", "x5", "x7", "x8", "x9", "x10", "x4"
-+      );
-+   } while (block[2] != 0);
-+#else
-+# error "Unsupported arch"
-+#endif
-+}
-+
- int main ( int argc, char** argv )
- {
-    int    i, status;
-@@ -720,8 +765,12 @@ int main ( int argc, char** argv )
-    short* p16;
-    int*   p32;
-    long long int* p64;
-+   MyU128*  p128;
-    pid_t  child, p2;
- 
-+   assert(sizeof(MyU128) == 16);
-+   assert(sysconf(_SC_PAGESIZE) >= 4096);
-+
-    printf("parent, pre-fork\n");
- 
-    page = mmap( 0, sysconf(_SC_PAGESIZE),
-@@ -736,11 +785,13 @@ int main ( int argc, char** argv )
-    p16 = (short*)(page+256);
-    p32 = (int*)(page+512);
-    p64 = (long long int*)(page+768);
-+   p128 = (MyU128*)(page+1024);
- 
-    assert( IS_8_ALIGNED(p8) );
-    assert( IS_8_ALIGNED(p16) );
-    assert( IS_8_ALIGNED(p32) );
-    assert( IS_8_ALIGNED(p64) );
-+   assert( IS_16_ALIGNED(p128) );
- 
-    memset(page, 0, 1024);
- 
-@@ -748,6 +799,7 @@ int main ( int argc, char** argv )
-    *p16 = 0;
-    *p32 = 0;
-    *p64 = 0;
-+   p128->lo64 = p128->hi64 = 0;
- 
-    child = fork();
-    if (child == -1) {
-@@ -763,6 +815,7 @@ int main ( int argc, char** argv )
-          atomic_add_16bit(p16, 1);
-          atomic_add_32bit(p32, 1);
-          atomic_add_64bit(p64, 98765 ); /* ensure we hit the upper 32 bits */
-+         atomic_add_128bit(p128, 0x1000000013374771ULL); // ditto re upper 64
-       }
-       return 1;
-       /* NOTREACHED */
-@@ -778,6 +831,7 @@ int main ( int argc, char** argv )
-       atomic_add_16bit(p16, 1);
-       atomic_add_32bit(p32, 1);
-       atomic_add_64bit(p64, 98765 ); /* ensure we hit the upper 32 bits */
-+      atomic_add_128bit(p128, 0x1000000013374771ULL); // ditto re upper 64
-    }
- 
-    p2 = waitpid(child, &status, 0);
-@@ -788,11 +842,17 @@ int main ( int argc, char** argv )
- 
-    printf("FINAL VALUES:  8 bit %d,  16 bit %d,  32 bit %d,  64 bit %lld\n",
-           (int)(*(signed char*)p8), (int)(*p16), *p32, *p64 );
-+   printf("               128 bit 0x%016llx:0x%016llx\n",
-+          p128->hi64, p128->lo64);
- 
-    if (-74 == (int)(*(signed char*)p8) 
-        && 32694 == (int)(*p16) 
-        && 6913974 == *p32
--       && (0LL == *p64 || 682858642110LL == *p64)) {
-+       && (0LL == *p64 || 682858642110LL == *p64)
-+       && ((0 == p128->hi64 && 0 == p128->lo64)
-+           || (0x00000000000697fb == p128->hi64
-+               && 0x6007eb426316d956ULL == p128->lo64))
-+      ) {
-       printf("PASS\n");
-    } else {
-       printf("FAIL -- see source code for expected values\n");
-diff --git a/memcheck/tests/atomic_incs.stdout.exp-32bit b/memcheck/tests/atomic_incs.stdout.exp-32bit
-index c5b8781e5..55e5044b5 100644
---- a/memcheck/tests/atomic_incs.stdout.exp-32bit
-+++ b/memcheck/tests/atomic_incs.stdout.exp-32bit
-@@ -3,5 +3,6 @@ child
- parent, pre-fork
- parent
- FINAL VALUES:  8 bit -74,  16 bit 32694,  32 bit 6913974,  64 bit 0
-+               128 bit 0x0000000000000000:0x0000000000000000
- PASS
- parent exits
-diff --git a/memcheck/tests/atomic_incs.stdout.exp-64bit b/memcheck/tests/atomic_incs.stdout.exp-64bit
-index 82405c520..ca2f4fc97 100644
---- a/memcheck/tests/atomic_incs.stdout.exp-64bit
-+++ b/memcheck/tests/atomic_incs.stdout.exp-64bit
-@@ -3,5 +3,6 @@ child
- parent, pre-fork
- parent
- FINAL VALUES:  8 bit -74,  16 bit 32694,  32 bit 6913974,  64 bit 682858642110
-+               128 bit 0x0000000000000000:0x0000000000000000
- PASS
- parent exits
-diff --git a/memcheck/tests/atomic_incs.stdout.exp-64bit-and-128bit b/memcheck/tests/atomic_incs.stdout.exp-64bit-and-128bit
-new file mode 100644
-index 000000000..ef6580917
---- /dev/null
-+++ b/memcheck/tests/atomic_incs.stdout.exp-64bit-and-128bit
-@@ -0,0 +1,8 @@
-+parent, pre-fork
-+child
-+parent, pre-fork
-+parent
-+FINAL VALUES:  8 bit -74,  16 bit 32694,  32 bit 6913974,  64 bit 682858642110
-+               128 bit 0x00000000000697fb:0x6007eb426316d956
-+PASS
-+parent exits
-diff --git a/none/tests/arm64/Makefile.am b/none/tests/arm64/Makefile.am
-index 00cbfa52c..9efb49b27 100644
---- a/none/tests/arm64/Makefile.am
-+++ b/none/tests/arm64/Makefile.am
-@@ -12,7 +12,10 @@ EXTRA_DIST = \
- 	atomics_v81.stdout.exp atomics_v81.stderr.exp atomics_v81.vgtest \
- 	simd_v81.stdout.exp simd_v81.stderr.exp simd_v81.vgtest \
-         fmadd_sub.stdout.exp fmadd_sub.stderr.exp fmadd_sub.vgtest \
--	fp_and_simd_v82.stdout.exp fp_and_simd_v82.stderr.exp fp_and_simd_v82.vgtest
-+	fp_and_simd_v82.stdout.exp fp_and_simd_v82.stderr.exp \
-+	fp_and_simd_v82.vgtest \
-+	ldxp_stxp.stdout.exp ldxp_stxp.stderr.exp \
-+	ldxp_stxp_basisimpl.vgtest ldxp_stxp_fallbackimpl.vgtest
- 
- check_PROGRAMS = \
- 	allexec \
-@@ -20,7 +23,8 @@ check_PROGRAMS = \
- 	fp_and_simd \
- 	integer \
- 	memory \
--	fmadd_sub
-+	fmadd_sub \
-+	ldxp_stxp
- 
- if BUILD_ARMV8_CRC_TESTS
-   check_PROGRAMS += crc32
-diff --git a/none/tests/arm64/ldxp_stxp.c b/none/tests/arm64/ldxp_stxp.c
-new file mode 100644
-index 000000000..b5f6ea121
---- /dev/null
-+++ b/none/tests/arm64/ldxp_stxp.c
-@@ -0,0 +1,93 @@
-+
-+/* Note, this is only a basic smoke test of LD{A}XP and ST{L}XP.  Their
-+   atomicity properties are tested by memcheck/tests/atomic_incs.c. */
-+
-+#include <stdio.h>
-+#include <stdlib.h>
-+#include <malloc.h>
-+#include <assert.h>
-+
-+typedef  unsigned int            UInt;
-+typedef  unsigned long long int  ULong;
-+
-+
-+void initBlock ( ULong* block )
-+{
-+   block[0] = 0x0001020304050607ULL;
-+   block[1] = 0x1011121314151617ULL;
-+   block[2] = 0x2021222324252627ULL;
-+   block[3] = 0x3031323334353637ULL;
-+   block[4] = 0x4041424344454647ULL;
-+   block[5] = 0x5051525354555657ULL;
-+}
-+
-+void printBlock ( const char* who,
-+                  ULong* block, ULong rt1contents, ULong rt2contents,
-+                  UInt zeroIfSuccess )
-+{
-+   printf("Block %s (%s)\n", who, zeroIfSuccess == 0 ? "success" : "FAILURE" );
-+   for (int i = 0; i < 6; i++) {
-+      printf("0x%016llx\n", block[i]);
-+   }
-+   printf("0x%016llx rt1contents\n", rt1contents);
-+   printf("0x%016llx rt2contents\n", rt2contents);
-+   printf("\n");
-+}
-+
-+int main ( void )
-+{
-+   ULong* block = memalign(16, 6 * sizeof(ULong));
-+   assert(block);
-+
-+   ULong rt1in, rt2in, rt1out, rt2out;
-+   UInt scRes;
-+
-+   // Do ldxp then stxp with x-registers
-+   initBlock(block);
-+   rt1in  = 0x5555666677778888ULL;
-+   rt2in  = 0xAAAA9999BBBB0000ULL;
-+   rt1out = 0x1111222233334444ULL;
-+   rt2out = 0xFFFFEEEEDDDDCCCCULL;
-+   scRes  = 0x55555555;
-+   __asm__ __volatile__(
-+      "ldxp %1, %2, [%5]"       "\n\t"
-+      "stxp %w0, %3, %4, [%5]"  "\n\t"
-+      : /*OUT*/
-+        "=&r"(scRes),  // %0
-+        "=&r"(rt1out), // %1
-+        "=&r"(rt2out)  // %2
-+      : /*IN*/
-+        "r"(rt1in),    // %3
-+        "r"(rt2in),    // %4
-+        "r"(&block[2]) // %5
-+      : /*TRASH*/
-+        "memory","cc"
-+   );
-+   printBlock("after ldxp/stxp 2x64-bit", block, rt1out, rt2out, scRes);
-+
-+   // Do ldxp then stxp with w-registers
-+   initBlock(block);
-+   rt1in  = 0x5555666677778888ULL;
-+   rt2in  = 0xAAAA9999BBBB0000ULL;
-+   rt1out = 0x1111222233334444ULL;
-+   rt2out = 0xFFFFEEEEDDDDCCCCULL;
-+   scRes  = 0x55555555;
-+   __asm__ __volatile__(
-+      "ldxp %w1, %w2, [%5]"       "\n\t"
-+      "stxp %w0, %w3, %w4, [%5]"  "\n\t"
-+      : /*OUT*/
-+        "=&r"(scRes),  // %0
-+        "=&r"(rt1out), // %1
-+        "=&r"(rt2out)  // %2
-+      : /*IN*/
-+        "r"(rt1in),    // %3
-+        "r"(rt2in),    // %4
-+        "r"(&block[2]) // %5
-+      : /*TRASH*/
-+        "memory","cc"
-+   );
-+   printBlock("after ldxp/stxp 2x32-bit", block, rt1out, rt2out, scRes);
-+
-+   free(block);
-+   return 0;
-+}
-diff --git a/none/tests/arm64/ldxp_stxp_basisimpl.stderr.exp b/none/tests/arm64/ldxp_stxp_basisimpl.stderr.exp
-new file mode 100644
-index 000000000..e69de29bb
-diff --git a/none/tests/arm64/ldxp_stxp_basisimpl.stdout.exp b/none/tests/arm64/ldxp_stxp_basisimpl.stdout.exp
-new file mode 100644
-index 000000000..f269ecdcc
---- /dev/null
-+++ b/none/tests/arm64/ldxp_stxp_basisimpl.stdout.exp
-@@ -0,0 +1,20 @@
-+Block after ldxp/stxp 2x64-bit (success)
-+0x0001020304050607
-+0x1011121314151617
-+0x5555666677778888
-+0xaaaa9999bbbb0000
-+0x4041424344454647
-+0x5051525354555657
-+0x2021222324252627 rt1contents
-+0x3031323334353637 rt2contents
-+
-+Block after ldxp/stxp 2x32-bit (success)
-+0x0001020304050607
-+0x1011121314151617
-+0xbbbb000077778888
-+0x3031323334353637
-+0x4041424344454647
-+0x5051525354555657
-+0x0000000024252627 rt1contents
-+0x0000000020212223 rt2contents
-+
-diff --git a/none/tests/arm64/ldxp_stxp_basisimpl.vgtest b/none/tests/arm64/ldxp_stxp_basisimpl.vgtest
-new file mode 100644
-index 000000000..29133729a
---- /dev/null
-+++ b/none/tests/arm64/ldxp_stxp_basisimpl.vgtest
-@@ -0,0 +1,2 @@
-+prog: ldxp_stxp
-+vgopts: -q
-diff --git a/none/tests/arm64/ldxp_stxp_fallbackimpl.stderr.exp b/none/tests/arm64/ldxp_stxp_fallbackimpl.stderr.exp
-new file mode 100644
-index 000000000..e69de29bb
-diff --git a/none/tests/arm64/ldxp_stxp_fallbackimpl.stdout.exp b/none/tests/arm64/ldxp_stxp_fallbackimpl.stdout.exp
-new file mode 100644
-index 000000000..f269ecdcc
---- /dev/null
-+++ b/none/tests/arm64/ldxp_stxp_fallbackimpl.stdout.exp
-@@ -0,0 +1,20 @@
-+Block after ldxp/stxp 2x64-bit (success)
-+0x0001020304050607
-+0x1011121314151617
-+0x5555666677778888
-+0xaaaa9999bbbb0000
-+0x4041424344454647
-+0x5051525354555657
-+0x2021222324252627 rt1contents
-+0x3031323334353637 rt2contents
-+
-+Block after ldxp/stxp 2x32-bit (success)
-+0x0001020304050607
-+0x1011121314151617
-+0xbbbb000077778888
-+0x3031323334353637
-+0x4041424344454647
-+0x5051525354555657
-+0x0000000024252627 rt1contents
-+0x0000000020212223 rt2contents
-+
-diff --git a/none/tests/arm64/ldxp_stxp_fallbackimpl.vgtest b/none/tests/arm64/ldxp_stxp_fallbackimpl.vgtest
-new file mode 100644
-index 000000000..474282a03
---- /dev/null
-+++ b/none/tests/arm64/ldxp_stxp_fallbackimpl.vgtest
-@@ -0,0 +1,2 @@
-+prog: ldxp_stxp
-+vgopts: -q --sim-hints=fallback-llsc
-
-commit 0d38ca5dd6b446c70738031132d41f09de0f7a8a
-Author: Julian Seward <jseward@acm.org>
-Date:   Fri Nov 12 13:08:45 2021 +0100
-
-    Bug 444399 - disInstr(arm64): unhandled instruction 0xC87F2D89 (LD{,A}XP and ST{,L}XP).  FOLLOWUP FIX.
-    
-    This is an attempt to un-break 'make dist', as broken by the main commit for
-    this bug, which was 530df882b8f60ecacaf2b9b8a719f7ea1c1d1650.
-
-diff --git a/none/tests/arm64/Makefile.am b/none/tests/arm64/Makefile.am
-index 9efb49b27..4a06f0996 100644
---- a/none/tests/arm64/Makefile.am
-+++ b/none/tests/arm64/Makefile.am
-@@ -14,8 +14,10 @@ EXTRA_DIST = \
-         fmadd_sub.stdout.exp fmadd_sub.stderr.exp fmadd_sub.vgtest \
- 	fp_and_simd_v82.stdout.exp fp_and_simd_v82.stderr.exp \
- 	fp_and_simd_v82.vgtest \
--	ldxp_stxp.stdout.exp ldxp_stxp.stderr.exp \
--	ldxp_stxp_basisimpl.vgtest ldxp_stxp_fallbackimpl.vgtest
-+	ldxp_stxp_basisimpl.stdout.exp ldxp_stxp_basisimpl.stderr.exp \
-+	ldxp_stxp_basisimpl.vgtest \
-+	ldxp_stxp_fallbackimpl.stdout.exp ldxp_stxp_fallbackimpl.stderr.exp \
-+	ldxp_stxp_fallbackimpl.vgtest
- 
- check_PROGRAMS = \
- 	allexec \
diff --git a/valgrind-3.18.1-condvar.patch b/valgrind-3.18.1-condvar.patch
deleted file mode 100644
index e129326..0000000
--- a/valgrind-3.18.1-condvar.patch
+++ /dev/null
@@ -1,284 +0,0 @@
-commit 9abfed23c0d430aafb85de6397d171316c982792
-Author: Paul Floyd <pjfloyd@wanadoo.fr>
-Date:   Fri Nov 19 08:34:53 2021 +0100
-
-    Bug 445504 Using C++ condition_variable results in bogus "mutex is locked simultaneously by two threads" warning(edit)
-    
-    Add intercepts for pthread_cond_clockwait to DRD and Helgrind
-    Also testcase from bugzilla done by Bart, with configure check
-
-diff --git a/configure.ac b/configure.ac
-index e7381f205..cb836dbff 100755
---- a/configure.ac
-+++ b/configure.ac
-@@ -1989,6 +1989,27 @@ AC_LANG(C)
- 
- AM_CONDITIONAL(CXX_CAN_INCLUDE_THREAD_HEADER, test x$ac_cxx_can_include_thread_header = xyes)
- 
-+# Check whether compiler can process #include <condition_variable> without errors
-+
-+AC_MSG_CHECKING([that C++ compiler can include <condition_variable> header file])
-+AC_LANG(C++)
-+safe_CXXFLAGS=$CXXFLAGS
-+CXXFLAGS=-std=c++0x
-+
-+AC_COMPILE_IFELSE([AC_LANG_SOURCE([
-+#include <condition_variable> 
-+])],
-+[
-+ac_cxx_can_include_condition_variable_header=yes
-+AC_MSG_RESULT([yes])
-+], [
-+ac_cxx_can_include_condition_variable_header=no
-+AC_MSG_RESULT([no])
-+])
-+CXXFLAGS=$safe_CXXFLAGS
-+AC_LANG(C)
-+
-+AM_CONDITIONAL(CXX_CAN_INCLUDE_CONDITION_VARIABLE_HEADER, test x$ac_cxx_can_include_condition_variable_header = xyes)
- 
- # On aarch64 before glibc 2.20 we would get the kernel user_pt_regs instead
- # of the user_regs_struct from sys/user.h. They are structurally the same
-diff --git a/drd/drd_pthread_intercepts.c b/drd/drd_pthread_intercepts.c
-index 8b4454364..95127b42c 100644
---- a/drd/drd_pthread_intercepts.c
-+++ b/drd/drd_pthread_intercepts.c
-@@ -1175,6 +1175,30 @@ PTH_FUNCS(int, condZureltimedwait, pthread_cond_timedwait_intercept,
-           (cond, mutex, timeout));
- #endif /* VGO_solaris */
- 
-+
-+static __always_inline
-+int pthread_cond_clockwait_intercept(pthread_cond_t *cond,
-+                                     pthread_mutex_t *mutex,
-+                                     clockid_t clockid,
-+                                     const struct timespec* abstime)
-+{
-+   int   ret;
-+   OrigFn fn;
-+   VALGRIND_GET_ORIG_FN(fn);
-+   VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__PRE_COND_WAIT,
-+                                   cond, mutex, DRD_(mutex_type)(mutex), 0, 0);
-+   CALL_FN_W_WWWW(ret, fn, cond, mutex, clockid, abstime);
-+   VALGRIND_DO_CLIENT_REQUEST_STMT(VG_USERREQ__POST_COND_WAIT,
-+                                   cond, mutex, 1, 0, 0);
-+   return ret;
-+}
-+
-+PTH_FUNCS(int, pthreadZucondZuclockwait, pthread_cond_clockwait_intercept,
-+          (pthread_cond_t *cond, pthread_mutex_t *mutex,
-+            clockid_t clockid, const struct timespec* abstime),
-+          (cond, mutex, clockid, abstime));
-+
-+
- // NOTE: be careful to intercept only pthread_cond_signal() and not Darwin's
- // pthread_cond_signal_thread_np(). The former accepts one argument; the latter
- // two. Intercepting all pthread_cond_signal* functions will cause only one
-diff --git a/drd/tests/Makefile.am b/drd/tests/Makefile.am
-index 4cb2f7f84..c804391e8 100755
---- a/drd/tests/Makefile.am
-+++ b/drd/tests/Makefile.am
-@@ -105,6 +105,8 @@ EXTRA_DIST =                                        \
- 	circular_buffer.vgtest			    \
- 	concurrent_close.stderr.exp		    \
- 	concurrent_close.vgtest			    \
-+	condvar.stderr.exp			    \
-+	condvar.vgtest				    \
- 	custom_alloc.stderr.exp			    \
- 	custom_alloc.vgtest			    \
- 	custom_alloc_fiw.stderr.exp		    \
-@@ -458,6 +460,11 @@ check_PROGRAMS += \
- endif
- endif
- 
-+if CXX_CAN_INCLUDE_CONDITION_VARIABLE_HEADER
-+check_PROGRAMS += \
-+    condvar
-+endif
-+
- if HAVE_OPENMP
- check_PROGRAMS += omp_matinv omp_prime omp_printf
- endif
-@@ -502,6 +509,8 @@ LDADD = -lpthread
- 
- 
- bug322621_SOURCES           = bug322621.cpp
-+condvar_SOURCES		    = condvar.cpp
-+condvar_CXXFLAGS            = $(AM_CXXFLAGS) -std=c++0x
- concurrent_close_SOURCES    = concurrent_close.cpp
- if !VGCONF_OS_IS_FREEBSD
- dlopen_main_LDADD           = -ldl
-diff --git a/drd/tests/condvar.cpp b/drd/tests/condvar.cpp
-new file mode 100644
-index 000000000..18ecb3f8a
---- /dev/null
-+++ b/drd/tests/condvar.cpp
-@@ -0,0 +1,55 @@
-+/* See also https://bugs.kde.org/show_bug.cgi?id=445504 */
-+
-+#include <condition_variable>
-+#include <future>
-+#include <iostream>
-+#include <mutex>
-+#include <thread>
-+#include <vector>
-+
-+using lock_guard = std::lock_guard<std::mutex>;
-+using unique_lock = std::unique_lock<std::mutex>;
-+
-+struct state {
-+  std::mutex m;
-+  std::vector<int> v;
-+  std::condition_variable cv;
-+
-+  state() {
-+    // Call pthread_cond_init() explicitly to let DRD know about 'cv'.
-+    pthread_cond_init(cv.native_handle(), NULL);
-+  }
-+};
-+
-+void other_thread(state *sp) {
-+  state &s = *sp;
-+  std::cerr << "Other thread: waiting for notify\n";
-+  unique_lock l{s.m};
-+  while (true) {
-+    if (s.cv.wait_for(l, std::chrono::seconds(3)) !=
-+	std::cv_status::timeout) {
-+      std::cerr << "Other thread: notified\n";
-+      break;
-+    }
-+  }
-+  return;
-+}
-+
-+
-+int main() {
-+  state s;
-+  auto future = std::async(std::launch::async, other_thread, &s);
-+
-+  if (future.wait_for(std::chrono::seconds(1)) != std::future_status::timeout) {
-+    std::cerr << "Main: other thread returned too early!\n";
-+    return 2;
-+  }
-+
-+  {
-+    std::lock_guard<std::mutex> g{s.m};
-+    s.v.push_back(1);
-+    s.v.push_back(2);
-+    s.cv.notify_all();
-+  }
-+  return 0;
-+}
-diff --git a/drd/tests/condvar.stderr.exp b/drd/tests/condvar.stderr.exp
-new file mode 100644
-index 000000000..be1de9f97
---- /dev/null
-+++ b/drd/tests/condvar.stderr.exp
-@@ -0,0 +1,5 @@
-+
-+Other thread: waiting for notify
-+Other thread: notified
-+
-+ERROR SUMMARY: 0 errors from 0 contexts (suppressed: 0 from 0)
-diff --git a/drd/tests/condvar.vgtest b/drd/tests/condvar.vgtest
-new file mode 100644
-index 000000000..2e7d49f5a
---- /dev/null
-+++ b/drd/tests/condvar.vgtest
-@@ -0,0 +1,3 @@
-+prereq: ./supported_libpthread && [ -e condvar ]
-+vgopts: --check-stack-var=yes --read-var-info=yes
-+prog: condvar
-diff --git a/helgrind/hg_intercepts.c b/helgrind/hg_intercepts.c
-index 866efdbaa..49c3ddcd9 100644
---- a/helgrind/hg_intercepts.c
-+++ b/helgrind/hg_intercepts.c
-@@ -1409,6 +1409,88 @@ static int pthread_cond_timedwait_WRK(pthread_cond_t* cond,
- #  error "Unsupported OS"
- #endif
- 
-+//-----------------------------------------------------------
-+// glibc:   pthread_cond_clockwait
-+//
-+__attribute__((noinline))
-+static int pthread_cond_clockwait_WRK(pthread_cond_t* cond,
-+                                      pthread_mutex_t* mutex,
-+                                      clockid_t clockid,
-+                                      struct timespec* abstime,
-+                                      int timeout_error)
-+{
-+   int ret;
-+   OrigFn fn;
-+   unsigned long mutex_is_valid;
-+   Bool abstime_is_valid;
-+   VALGRIND_GET_ORIG_FN(fn);
-+
-+   if (TRACE_PTH_FNS) {
-+      fprintf(stderr, "<< pthread_cond_clockwait %p %p %p",
-+                      cond, mutex, abstime);
-+      fflush(stderr);
-+   }
-+
-+   /* Tell the tool a cond-wait is about to happen, so it can check
-+      for bogus argument values.  In return it tells us whether it
-+      thinks the mutex is valid or not. */
-+   DO_CREQ_W_WW(mutex_is_valid,
-+                _VG_USERREQ__HG_PTHREAD_COND_WAIT_PRE,
-+                pthread_cond_t*,cond, pthread_mutex_t*,mutex);
-+   assert(mutex_is_valid == 1 || mutex_is_valid == 0);
-+
-+   abstime_is_valid = abstime->tv_nsec >= 0 && abstime->tv_nsec < 1000000000;
-+
-+   /* Tell the tool we're about to drop the mutex.  This reflects the
-+      fact that in a cond_wait, we show up holding the mutex, and the
-+      call atomically drops the mutex and waits for the cv to be
-+      signalled. */
-+   if (mutex_is_valid && abstime_is_valid) {
-+      DO_CREQ_v_W(_VG_USERREQ__HG_PTHREAD_MUTEX_UNLOCK_PRE,
-+                  pthread_mutex_t*,mutex);
-+   }
-+
-+   CALL_FN_W_WWWW(ret, fn, cond,mutex,clockid,abstime);
-+
-+   if (mutex_is_valid && !abstime_is_valid && ret != EINVAL) {
-+      DO_PthAPIerror("Bug in libpthread: pthread_cond_clockwait "
-+                     "invalid abstime did not cause"
-+                     " EINVAL", ret);
-+   }
-+
-+   if (mutex_is_valid && abstime_is_valid) {
-+      /* and now we have the mutex again if (ret == 0 || ret == timeout) */
-+      DO_CREQ_v_WW(_VG_USERREQ__HG_PTHREAD_MUTEX_LOCK_POST,
-+                   pthread_mutex_t *, mutex,
-+                   long, (ret == 0 || ret == timeout_error) ? True : False);
-+   }
-+
-+   DO_CREQ_v_WWWW(_VG_USERREQ__HG_PTHREAD_COND_WAIT_POST,
-+                  pthread_cond_t*,cond, pthread_mutex_t*,mutex,
-+                  long,ret == timeout_error,
-+                  long, (ret == 0 || ret == timeout_error) && mutex_is_valid
-+                        ? True : False);
-+
-+   if (ret != 0 && ret != timeout_error) {
-+      DO_PthAPIerror( "pthread_cond_clockwait", ret );
-+   }
-+
-+   if (TRACE_PTH_FNS) {
-+      fprintf(stderr, " cotimedwait -> %d >>\n", ret);
-+   }
-+
-+   return ret;
-+}
-+
-+#if defined(VGO_linux)
-+   PTH_FUNC(int, pthreadZucondZuclockwait, // pthread_cond_clockwait
-+                 pthread_cond_t* cond, pthread_mutex_t* mutex,
-+                 clockid_t clockid,
-+                 struct timespec* abstime) {
-+      return pthread_cond_clockwait_WRK(cond, mutex, clockid, abstime, ETIMEDOUT);
-+   }
-+#endif
-+
- 
- //-----------------------------------------------------------
- // glibc:   pthread_cond_signal@GLIBC_2.0
diff --git a/valgrind-3.18.1-demangle-namespace.patch b/valgrind-3.18.1-demangle-namespace.patch
deleted file mode 100644
index 25ddf92..0000000
--- a/valgrind-3.18.1-demangle-namespace.patch
+++ /dev/null
@@ -1,35 +0,0 @@
-commit 542447d4708d4418a08e678dcf467af92b90b7ad
-Author: Mark Wielaard <mark@klomp.org>
-Date:   Mon Nov 22 13:07:59 2021 +0100
-
-    readdwarf3.c (parse_inl_DIE) inlined_subroutine can appear in namespaces
-    
-    This was broken by commit 75e3ef0f3 "readdwarf3: Skip units without
-    addresses when looking for inlined functions". Specifically by this
-    part: "Also use skip_DIE instead of read_DIE when not parsing
-    (skipping) children"
-    
-    rustc puts concrete function instances in namespaces (which is
-    allowed in DWARF since there is no strict separation between type
-    declarations and program scope entries in a DIE tree), the inline
-    parser didn't expect this and so skipped any DIE under a namespace
-    entry. This wasn't an issue before because "skipping" a DIE tree was
-    done by reading it, so it wasn't actually skipped. But now that we
-    really skip the DIE (sub)tree (which is faster than actually parsing
-    it) some entries were missed in the rustc case.
-    
-    https://bugs.kde.org/show_bug.cgi?id=445668
-
-diff --git a/coregrind/m_debuginfo/readdwarf3.c b/coregrind/m_debuginfo/readdwarf3.c
-index 18eecea9f..5489f8d13 100644
---- a/coregrind/m_debuginfo/readdwarf3.c
-+++ b/coregrind/m_debuginfo/readdwarf3.c
-@@ -3358,7 +3358,7 @@ static Bool parse_inl_DIE (
-    // might maybe contain a DW_TAG_inlined_subroutine:
-    Bool ret = (unit_has_addrs
-                || dtag == DW_TAG_lexical_block || dtag == DW_TAG_subprogram
--               || dtag == DW_TAG_inlined_subroutine);
-+               || dtag == DW_TAG_inlined_subroutine || dtag == DW_TAG_namespace);
-    return ret;
- 
-   bad_DIE:
diff --git a/valgrind-3.18.1-dhat-tests-copy.patch b/valgrind-3.18.1-dhat-tests-copy.patch
deleted file mode 100644
index 8e183b9..0000000
--- a/valgrind-3.18.1-dhat-tests-copy.patch
+++ /dev/null
@@ -1,20 +0,0 @@
-commit 33aba8eef68b1745d3de96b609ff8296b70d9a1c
-Author: Paul Floyd <pjfloyd@wanadoo.fr>
-Date:   Wed Oct 27 21:37:00 2021 +0200
-
-    Bug 444495 - dhat/tests/copy fails on s390x
-    
-    Add -fno-builtin to ensure that the copy functions get called and so dhat
-    can intercept and count them.
-
-diff --git a/dhat/tests/Makefile.am b/dhat/tests/Makefile.am
-index 86a9b6d64..b86fc416d 100644
---- a/dhat/tests/Makefile.am
-+++ b/dhat/tests/Makefile.am
-@@ -29,3 +29,6 @@ AM_CXXFLAGS += $(AM_FLAG_M3264_PRI)
- # We don't care about uninitialized or unused malloc results
- basic_CFLAGS  = $(AM_CFLAGS) -Wno-uninitialized
- big_CFLAGS  = $(AM_CFLAGS) -Wno-unused-result
-+
-+# Prevent the copying functions from being inlined
-+copy_CFLAGS = $(AM_CFLAGS) -fno-builtin
diff --git a/valgrind-3.18.1-gdbserver_tests-hwcap.patch b/valgrind-3.18.1-gdbserver_tests-hwcap.patch
deleted file mode 100644
index 2d952cd..0000000
--- a/valgrind-3.18.1-gdbserver_tests-hwcap.patch
+++ /dev/null
@@ -1,25 +0,0 @@
-commit 64ab89162906d5b9e2de6c3afe476fec861ef7ec
-Author: Mark Wielaard <mark@klomp.org>
-Date:   Tue Nov 2 14:27:45 2021 +0100
-
-    gdbserver_tests: Filter out glibc hwcaps libc.so
-    
-    On some systems the gdbserver_tests would fail because the filter
-    for the optimized hwcaps subdir didn't match because the file is
-    called slightly differently, with the version number before .so
-    instead of after. For example: /lib64/glibc-hwcaps/power9/libc-2.28.so
-    
-    Add one extra filter for this pattern.
-
-diff --git a/gdbserver_tests/filter_gdb.in b/gdbserver_tests/filter_gdb.in
-index d0c94f3f1..b753e0168 100755
---- a/gdbserver_tests/filter_gdb.in
-+++ b/gdbserver_tests/filter_gdb.in
-@@ -134,6 +134,7 @@ s/in \(.__\)\{0,1\}select () from \/.*$/in syscall .../
- /^   from \/lib\/libc.so.*$/d
- /^   from \/lib64\/libc.so.*$/d
- /^   from \/lib64\/.*\/libc.so.*$/d
-+/^   from \/lib64\/.*\/libc-.*.so/d
- 
- #       and yet another (gdb 7.0 way) to get a system call
- s/in select ()$/in syscall .../
diff --git a/valgrind-3.18.1-ppc-hwcaps.patch b/valgrind-3.18.1-ppc-hwcaps.patch
deleted file mode 100644
index 27f0439..0000000
--- a/valgrind-3.18.1-ppc-hwcaps.patch
+++ /dev/null
@@ -1,130 +0,0 @@
-commit 3ea8d4327003c3cefe8e82c59be8e92dcfe1a60f
-Author: Carl Love <cel@us.ibm.com>
-Date:   Fri Jan 14 23:04:44 2022 +0000
-
-    Assorted changes to protect from side affects from the feature checking code.
-    
-    Patch contributed by Will Schmidt <will_schmidt@vnet.ibm.com>
-    
-    This problem was initially reported by Tulio, he assisted me in
-    identifying the underlying issue here.
-    
-    This was discovered on a Power10, and occurs since the ISA 3.1 support
-    check uses the brh instruction via a hardcoded ".long 0x7f1401b6" asm stanza.
-    That encoding writes to r20, and since the stanza does not contain a clobber
-    the compiler did not know to save or restore that register upon entry or exit.
-    The junk value remaining in r20 subsequently caused a segfault.
-    
-    This patch adds clobber masks to the instruction stanzas, as well as
-    updates the associated comments to clarify which registers are being
-    used.
-        As part of this change I've also
-        - updated the .long for the cnttzw instruction to write to r20, and
-          zeroed the reserved bits from that instruction so it is properly
-          decoded by the disassembler.
-        - updated the .long for the dadd instruction to write to f0.
-    
-        I've inspected the current codegen with these changes in place, and
-        confirm that r20 is now saved and restored on entry and exit from the
-        machine_get_hwcaps() function.
-    
-    bugzilla 447995   Valgrind segfault on power10 due to hwcap checking code
-
-diff --git a/coregrind/m_machine.c b/coregrind/m_machine.c
-index 0b60ecc0f..089acee64 100644
---- a/coregrind/m_machine.c
-+++ b/coregrind/m_machine.c
-@@ -1246,7 +1246,7 @@ Bool VG_(machine_get_hwcaps)( void )
-      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
-         have_isa_3_0 = False;
-      } else {
--        __asm__ __volatile__(".long 0x7d205434"); /* cnttzw RT, RB */
-+        __asm__ __volatile__(".long 0x7f140434":::"r20"); /* cnttzw r20,r24 */
-      }
- 
-      // ISA 3.1 not supported on 32-bit systems
-@@ -1358,7 +1358,7 @@ Bool VG_(machine_get_hwcaps)( void )
-      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
-         have_V = False;
-      } else {
--        __asm__ __volatile__(".long 0x10000484"); /*vor 0,0,0*/
-+        __asm__ __volatile__(".long 0x10000484"); /* vor v0,v0,v0 */
-      }
- 
-      /* General-Purpose optional (fsqrt, fsqrts) */
-@@ -1366,7 +1366,7 @@ Bool VG_(machine_get_hwcaps)( void )
-      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
-         have_FX = False;
-      } else {
--        __asm__ __volatile__(".long 0xFC00002C"); /*fsqrt 0,0*/
-+        __asm__ __volatile__(".long 0xFC00002C"); /* fsqrt f0,f0 */
-      }
- 
-      /* Graphics optional (stfiwx, fres, frsqrte, fsel) */
-@@ -1374,7 +1374,7 @@ Bool VG_(machine_get_hwcaps)( void )
-      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
-         have_GX = False;
-      } else {
--        __asm__ __volatile__(".long 0xFC000034"); /*frsqrte 0,0*/
-+        __asm__ __volatile__(".long 0xFC000034"); /* frsqrte f0,f0 */
-      }
- 
-      /* VSX support implies Power ISA 2.06 */
-@@ -1382,7 +1382,7 @@ Bool VG_(machine_get_hwcaps)( void )
-      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
-         have_VX = False;
-      } else {
--        __asm__ __volatile__(".long 0xf0000564"); /* xsabsdp XT,XB */
-+        __asm__ __volatile__(".long 0xf0000564"); /* xsabsdp vs0,vs0 */
-      }
- 
-      /* Check for Decimal Floating Point (DFP) support. */
-@@ -1390,7 +1390,7 @@ Bool VG_(machine_get_hwcaps)( void )
-      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
-         have_DFP = False;
-      } else {
--        __asm__ __volatile__(".long 0xee4e8005"); /* dadd  FRT,FRA, FRB */
-+        __asm__ __volatile__(".long 0xec0e8005"); /* dadd f0,f14,f16 */
-      }
- 
-      /* Check for ISA 2.07 support. */
-@@ -1398,7 +1398,7 @@ Bool VG_(machine_get_hwcaps)( void )
-      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
-         have_isa_2_07 = False;
-      } else {
--        __asm__ __volatile__(".long 0x7c000166"); /* mtvsrd XT,RA */
-+        __asm__ __volatile__(".long 0x7c000166"); /* mtvsrd f0,r0 */
-      }
- 
-      /* Check for ISA 3.0 support. */
-@@ -1406,7 +1406,7 @@ Bool VG_(machine_get_hwcaps)( void )
-      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
-         have_isa_3_0 = False;
-      } else {
--        __asm__ __volatile__(".long  0x7d205434"); /* cnttzw RT, RB */
-+        __asm__ __volatile__(".long 0x7f140434":::"r20"); /* cnttzw r20,r24 */
-      }
- 
-      /* Check for ISA 3.1 support. */
-@@ -1414,7 +1414,7 @@ Bool VG_(machine_get_hwcaps)( void )
-      if (VG_MINIMAL_SETJMP(env_unsup_insn)) {
-         have_isa_3_1 = False;
-      } else {
--        __asm__ __volatile__(".long 0x7f1401b6"); /* brh  RA, RS */
-+        __asm__ __volatile__(".long 0x7f1401b6":::"r20"); /* brh r20,r24 */
-      }
- 
-      /* Check if Host supports scv instruction */
-@@ -1424,9 +1424,9 @@ Bool VG_(machine_get_hwcaps)( void )
-      } else {
-         /* Set r0 to 13 for the system time call.  Don't want to make a random
-            system call.  */
--        __asm__ __volatile__(".long 0x7c000278"); /* clear r0 */
--        __asm__ __volatile__(".long 0x6009000d"); /* set r0 to 13 */
--        __asm__ __volatile__(".long 0x44000001"); /* scv */
-+        __asm__ __volatile__(".long 0x7c000278"); /* clear r0 with xor r0,r0,r0 */
-+        __asm__ __volatile__(".long 0x6000000d"); /* set r0 to 13 with ori r0,r0,13 */
-+        __asm__ __volatile__(".long 0x44000001"); /* scv 0 */
-      }
- 
-      /* determine dcbz/dcbzl sizes while we still have the signal
diff --git a/valgrind-3.18.1-ppc-pstq-tests.patch b/valgrind-3.18.1-ppc-pstq-tests.patch
deleted file mode 100644
index 58498f2..0000000
--- a/valgrind-3.18.1-ppc-pstq-tests.patch
+++ /dev/null
@@ -1,1876 +0,0 @@
-commit 3950c5d661ee09526cddcf24daf5fc22bc83f70c
-Author: Carl Love <cel@us.ibm.com>
-Date:   Mon Nov 1 11:18:32 2021 -0500
-
-    Valgrind Add powerpc R=1 tests
-    
-    Contributed by Will Schmidt <will_schmidt@vnet.ibm.com>
-    
-    This includes updates and adjustments as suggested by Carl.
-    
-    Add tests that exercise PCRelative instructions.
-    These instructions are encoded with R==1, which indicate that
-    the memory accessed by the instruction is at a location
-    relative to the currently executing instruction.
-    
-    These tests are built using -Wl,-text and -Wl,-bss
-    options to ensure the location of the target array is at a
-    location with a specific offset from the currently
-    executing instruction.
-    
-    The write instructions are aimed at a large buffer in
-    the bss section; which is checked for updates at the
-    completion of each test.
-    
-    In order to ensure consistent output across assorted
-    systems, the tests have been padded with ori, nop instructions
-    and align directives.
-    
-    Detailed changes:
-     * Makefile.am: Add test_isa_3_1_R1_RT and test_isa_3_1_R1_XT tests.
-     * isa_3_1_helpers.h: Add identify_instruction_by_func_name() helper function
-       to indicate if the test is for R==1.
-       Add helpers to initialize and print changes to the pcrelative_write_target
-       array.
-       Add #define to help pad code with a series of eyecatcher ORI instructions.
-         * test_isa_3_1_R1_RT.c: New test.
-         * test_isa_3_1_R1_XT.c: New test.
-         * test_isa_3_1_R1_XT.stdout.exp: New expected output.
-         * test_isa_3_1_R1_XT.stdout.exp: New expected output.
-         * test_isa_3_1_R1_RT.stderr.exp: New expected output.
-         * test_isa_3_1_R1_RT.stderr.exp: New expected output.
-    
-         * test_isa_3_1_R1_RT.vgtest: New test handler.
-         * test_isa_3_1_R1_XT.vgtest: New test handler.
-    
-         * test_isa_3_1_common.c: Add indicators (updates_byte,updates_halfword,
-           updates_word) indicators to control the output from the R==1 tests.
-           Add helper check for "_R1" to indicate if instruction is coded with R==1.
-           Add init and print helpers for the pcrelative_write_target array.
-
-diff --git a/none/tests/ppc64/Makefile.am b/none/tests/ppc64/Makefile.am
-index b709f3ef4..f8eab9fc0 100644
---- a/none/tests/ppc64/Makefile.am
-+++ b/none/tests/ppc64/Makefile.am
-@@ -61,6 +61,8 @@ EXTRA_DIST = \
- 	test_isa_3_1_VRT.vgtest test_isa_3_1_VRT.stderr.exp test_isa_3_1_VRT.stdout.exp \
- 	test_isa_3_1_Misc.vgtest test_isa_3_1_Misc.stderr.exp test_isa_3_1_Misc.stdout.exp \
- 	test_isa_3_1_AT.vgtest test_isa_3_1_AT.stderr.exp test_isa_3_1_AT.stdout.exp \
-+	test_isa_3_1_R1_RT.vgtest test_isa_3_1_R1_RT.stderr.exp test_isa_3_1_R1_RT.stdout.exp \
-+	test_isa_3_1_R1_XT.vgtest test_isa_3_1_R1_XT.stderr.exp test_isa_3_1_R1_XT.stdout.exp \
- 	subnormal_test.stderr.exp  subnormal_test.stdout.exp \
- 	subnormal_test.vgtest test_darn_inst.stderr.exp \
- 	test_darn_inst.stdout.exp test_darn_inst.vgtest \
-@@ -68,8 +70,8 @@ EXTRA_DIST = \
- 	test_copy_paste.stderr.exp test_copy_paste.stdout.exp \
- 	test_copy_paste.vgtest \
- 	test_mcrxrx.vgtest test_mcrxrx.stderr.exp test_mcrxrx.stdout.exp \
--	test_lxvx_stxvx.vgtest test_lxvx_stxvx.stderr.exp test_lxvx_stxvx.stdout.exp-p8  test_lxvx_stxvx.stdout.exp-p9
--
-+	test_lxvx_stxvx.vgtest test_lxvx_stxvx.stderr.exp \
-+	test_lxvx_stxvx.stdout.exp-p8  test_lxvx_stxvx.stdout.exp-p9
- 
- check_PROGRAMS = \
- 	allexec \
-@@ -80,11 +82,12 @@ check_PROGRAMS = \
- 	test_isa_3_0 test_mod_instructions \
- 	test_isa_3_1_RT test_isa_3_1_XT test_isa_3_1_VRT \
- 	test_isa_3_1_Misc test_isa_3_1_AT \
-+	test_isa_3_1_R1_RT test_isa_3_1_R1_XT \
- 	subnormal_test test_darn_inst test_copy_paste \
- 	test_tm test_touch_tm data-cache-instructions \
- 	std_reg_imm \
- 	twi_tdi tw_td power6_bcmp scv_test \
--	test_mcrxrx  test_lxvx_stxvx
-+	test_mcrxrx test_lxvx_stxvx
- 
- # lmw, stmw, lswi, lswx, stswi, stswx compile (and run) only on big endian.
- if VGCONF_PLATFORMS_INCLUDE_PPC64BE_LINUX
-@@ -106,6 +109,8 @@ test_isa_3_1_RT_SOURCES = test_isa_3_1_RT.c test_isa_3_1_common.c
- test_isa_3_1_VRT_SOURCES = test_isa_3_1_VRT.c test_isa_3_1_common.c
- test_isa_3_1_Misc_SOURCES = test_isa_3_1_Misc.c test_isa_3_1_common.c
- test_isa_3_1_AT_SOURCES = test_isa_3_1_AT.c test_isa_3_1_common.c
-+test_isa_3_1_R1_XT_SOURCES = test_isa_3_1_R1_XT.c test_isa_3_1_common.c
-+test_isa_3_1_R1_RT_SOURCES = test_isa_3_1_R1_RT.c test_isa_3_1_common.c
- test_darn_inst_SOURCES = test_darn_inst.c
- 
- if HAS_ALTIVEC
-@@ -224,6 +229,11 @@ test_isa_3_1_VRT_CFLAGS = $(test_isa_3_1_CFLAGS)
- test_isa_3_1_Misc_CFLAGS = $(test_isa_3_1_CFLAGS)
- test_isa_3_1_AT_CFLAGS = $(test_isa_3_1_CFLAGS)
- 
-+# The _R1_foo tests exercise pc-relative instructions, so require the bss and text sections
-+# exist at known offsets with respect to each other.
-+test_isa_3_1_R1_RT_CFLAGS = $(test_isa_3_1_CFLAGS) -Wl,-Tbss,0x20000 -Wl,-Ttext,0x40000
-+test_isa_3_1_R1_XT_CFLAGS = $(test_isa_3_1_CFLAGS) -Wl,-Tbss,0x20000 -Wl,-Ttext,0x40000
-+
- subnormal_test_CFLAGS = $(AM_CFLAGS) -Winline -Wall -O -g -mregnames $(VSX_FLAG) $(ISA_2_06_FLAG) \
- 			@FLAG_M64@ $(ALTIVEC_FLAG) $(BUILD_FLAG_VSX) $(BUILD_FLAGS_ISA_2_06)
- 
-diff --git a/none/tests/ppc64/isa_3_1_helpers.h b/none/tests/ppc64/isa_3_1_helpers.h
-index 338f55526..716a6277b 100644
---- a/none/tests/ppc64/isa_3_1_helpers.h
-+++ b/none/tests/ppc64/isa_3_1_helpers.h
-@@ -43,6 +43,9 @@ extern void debug_show_current_iteration();
- extern void debug_dump_buffer();
- 
- extern void identify_form_components(const char *, const char *);
-+extern void identify_instruction_by_func_name(const char *);
-+extern void init_pcrelative_write_target();
-+extern void print_pcrelative_write_target();
- extern void dump_vsxargs();
- extern void generic_prologue();
- extern void build_args_table();
-@@ -58,6 +61,21 @@ extern void initialize_source_registers();
- extern void set_up_iterators();
- extern void initialize_buffer(int);
- 
-+/* This (TEXT_BSS_DELTA) is the relative distance between those
-+   sections as set by the linker options for the R==1 tests. */
-+#define TEXT_BSS_DELTA 0x20000
-+#define RELOC_BUFFER_SIZE 0x1000
-+extern unsigned long long pcrelative_buff_addr(int);
-+#define PAD_ORI	\
-+	__asm__ __volatile__ ("ori 21,21,21"); \
-+	__asm__ __volatile__ ("ori 22,22,22");\
-+	__asm__ __volatile__ ("ori 23,23,23");\
-+	__asm__ __volatile__ ("ori 24,24,24");\
-+	__asm__ __volatile__ ("ori 25,25,25");\
-+	__asm__ __volatile__ ("ori 26,26,26");\
-+	__asm__ __volatile__ ("ori 27,27,27");\
-+	__asm__ __volatile__ ("ori 28,28,28");
-+
- extern int verbose;
- #define debug_printf(X) if (verbose>0) printf(X);
- #define debug_show_labels (verbose>0)
-diff --git a/none/tests/ppc64/test_isa_3_1_R1_RT.c b/none/tests/ppc64/test_isa_3_1_R1_RT.c
-new file mode 100644
-index 000000000..d73b84b10
---- /dev/null
-+++ b/none/tests/ppc64/test_isa_3_1_R1_RT.c
-@@ -0,0 +1,624 @@
-+/*
-+ * Valgrind testcase for PowerPC ISA 3.1
-+ *
-+ * Copyright (C) 2019-2020 Will Schmidt <will_schmidt@vnet.ibm.com>
-+ *
-+ * 64bit build:
-+ *    gcc -Winline -Wall -g -O -mregnames -maltivec -m64
-+ */
-+
-+/*
-+ *   This program is free software; you can redistribute it and/or
-+ *   modify it under the terms of the GNU General Public License as
-+ *   published by the Free Software Foundation; either version 2 of the
-+ *   License, or (at your option) any later version.
-+ *
-+ *   This program is distributed in the hope that it will be useful,
-+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
-+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-+ *   GNU General Public License for more details.
-+ *
-+ *   You should have received a copy of the GNU General Public License
-+ *   along with this program; if not, write to the Free Software
-+ *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-+ */
-+
-+#include <stdio.h>
-+#ifdef HAS_ISA_3_1
-+#include <stdint.h>
-+#include <assert.h>
-+#include <stdlib.h>
-+#include <string.h>
-+#include <unistd.h>
-+#include <altivec.h>
-+#include <malloc.h>
-+
-+#include <string.h>
-+#include <signal.h>
-+#include <setjmp.h>
-+
-+/* Condition Register fields.
-+   These are used to capture the condition register values immediately after
-+   the instruction under test is executed. This is done to help prevent other
-+   test overhead (switch statements, result compares, etc) from disturbing
-+   the test case results.  */
-+unsigned long current_cr;
-+unsigned long current_fpscr;
-+
-+struct test_list_t current_test;
-+
-+#include "isa_3_1_helpers.h"
-+
-+static void test_plxvp_off0_R1 (void) {
-+	PAD_ORI
-+  __asm__ __volatile__ ("plxvp 20, +0(0),1"  );
-+	PAD_ORI
-+}
-+static void test_plxvp_off8_R1 (void) {
-+	PAD_ORI
-+  __asm__ __volatile__ ("plxvp 20, +8(0),1" );
-+	PAD_ORI
-+}
-+static void test_plxvp_off16_R1 (void) {
-+	PAD_ORI
-+  __asm__ __volatile__ ("plxvp 20, +16(0),1" );
-+	PAD_ORI
-+}
-+static void test_plxvp_off24_R1 (void) {
-+	PAD_ORI
-+  __asm__ __volatile__ ("plxvp 20, +24(0),1" );
-+	PAD_ORI
-+}
-+static void test_plxvp_off32_R1 (void) {
-+	PAD_ORI
-+  __asm__ __volatile__ ("plxvp 20, +32(0),1" );
-+	PAD_ORI
-+}
-+static void test_plbz_off0_R1 (void) {
-+	PAD_ORI
-+  __asm__ __volatile__ ("plbz %0, +0(0), 1" : "=r" (rt) );
-+	PAD_ORI
-+}
-+static void test_plbz_off8_R1 (void) {
-+	PAD_ORI
-+  __asm__ __volatile__ ("plbz %0, +8(0), 1" : "=r" (rt) );
-+	PAD_ORI
-+}
-+static void test_plbz_off16_R1 (void) {
-+	PAD_ORI
-+  __asm__ __volatile__ ("plbz %0, +16(0), 1" : "=r" (rt) );
-+	PAD_ORI
-+}
-+static void test_plbz_off32_R1 (void) {
-+	PAD_ORI
-+  __asm__ __volatile__ ("plbz %0, +32(0), 1" : "=r" (rt) );
-+	PAD_ORI
-+}
-+static void test_plbz_off64_R1 (void) {
-+	PAD_ORI
-+  __asm__ __volatile__ ("plbz %0, +64(0), 1" : "=r" (rt) );
-+	PAD_ORI
-+	PAD_ORI
-+}
-+static void test_plhz_off0_R1 (void) {
-+	PAD_ORI
-+  __asm__ __volatile__ ("plhz %0, +0(0), 1" : "=r" (rt) );
-+	PAD_ORI
-+}
-+static void test_plhz_off8_R1 (void) {
-+	PAD_ORI
-+  __asm__ __volatile__ ("plhz %0, +8(0), 1" : "=r" (rt) );
-+	PAD_ORI
-+}
-+static void test_plhz_off16_R1 (void) {
-+	PAD_ORI
-+  __asm__ __volatile__ ("plhz %0, +16(0), 1" : "=r" (rt) );
-+	PAD_ORI
-+}
-+static void test_plhz_off32_R1 (void) {
-+	PAD_ORI
-+  __asm__ __volatile__ ("plhz %0, +32(0), 1" : "=r" (rt) );
-+	PAD_ORI
-+}
-+static void test_plhz_off64_R1 (void) {
-+	PAD_ORI
-+  __asm__ __volatile__ ("plhz %0, +64(0), 1" : "=r" (rt) );
-+	PAD_ORI
-+	PAD_ORI
-+}
-+static void test_plha_off0_R1 (void) {
-+	PAD_ORI
-+  __asm__ __volatile__ ("plha %0, +0(0), 1" : "=r" (rt) );
-+	PAD_ORI
-+}
-+static void test_plha_off8_R1 (void) {
-+	PAD_ORI
-+  __asm__ __volatile__ ("plha %0, +8(0), 1" : "=r" (rt)  );
-+	PAD_ORI
-+}
-+static void test_plha_off16_R1 (void) {
-+	PAD_ORI
-+  __asm__ __volatile__ ("plha %0, +16(0), 1" : "=r" (rt) );
-+	PAD_ORI
-+}
-+static void test_plha_off32_R1 (void) {
-+	PAD_ORI
-+  __asm__ __volatile__ ("plha %0, +32(0), 1" : "=r" (rt) );
-+	PAD_ORI
-+}
-+static void test_plha_off64_R1 (void) {
-+	PAD_ORI
-+  __asm__ __volatile__ ("plha %0, +64(0), 1" : "=r" (rt) );
-+	PAD_ORI
-+	PAD_ORI
-+}
-+static void test_plwz_off0_R1 (void) {
-+  __asm__ __volatile__ ("plwz %0, +0(0), 1" : "=r" (rt)  );
-+}
-+static void test_plwz_off8_R1 (void) {
-+  __asm__ __volatile__ ("plwz %0, +8(0), 1" : "=r" (rt) );
-+}
-+static void test_plwz_off16_R1 (void) {
-+  __asm__ __volatile__ ("plwz %0, +16(0), 1" : "=r" (rt) );
-+}
-+static void test_plwz_off32_R1 (void) {
-+	PAD_ORI
-+  __asm__ __volatile__ ("plwz %0, +32(0), 1" : "=r" (rt) );
-+	PAD_ORI
-+}
-+static void test_plwz_off64_R1 (void) {
-+	PAD_ORI
-+  __asm__ __volatile__ ("plwz %0, +64(0), 1" : "=r" (rt) );
-+	PAD_ORI
-+	PAD_ORI
-+}
-+static void test_plwa_off0_R1 (void) {
-+  __asm__ __volatile__ ("plwa %0, +0(0), 1" : "=r" (rt)  );
-+}
-+static void test_plwa_off8_R1 (void) {
-+  __asm__ __volatile__ ("plwa %0, +8(0), 1" : "=r" (rt)  );
-+}
-+static void test_plwa_off16_R1 (void) {
-+  __asm__ __volatile__ ("plwa %0, +16(0), 1" : "=r" (rt) );
-+}
-+static void test_plwa_off32_R1 (void) {
-+	PAD_ORI
-+  __asm__ __volatile__ ("plwa %0, +32(0), 1" : "=r" (rt) );
-+	PAD_ORI
-+}
-+static void test_plwa_off64_R1 (void) {
-+	PAD_ORI
-+  __asm__ __volatile__ ("plwa %0, +64(0), 1" : "=r" (rt) );
-+	PAD_ORI
-+	PAD_ORI
-+}
-+static void test_pld_off0_R1 (void) {
-+  __asm__ __volatile__ ("pld %0, +0(0), 1" : "=r" (rt)  );
-+}
-+static void test_pld_off8_R1 (void) {
-+  __asm__ __volatile__ ("pld %0, +8(0), 1" : "=r" (rt)  );
-+}
-+static void test_pld_off16_R1 (void) {
-+	PAD_ORI
-+  __asm__ __volatile__ ("pld %0, +16(0), 1" : "=r" (rt) );
-+	PAD_ORI
-+}
-+static void test_pld_off32_R1 (void) {
-+	PAD_ORI
-+  __asm__ __volatile__ ("pld %0, +32(0), 1" : "=r" (rt) );
-+	PAD_ORI
-+}
-+static void test_pld_off64_R1 (void) {
-+	PAD_ORI
-+  __asm__ __volatile__ ("pld %0, +64(0), 1" : "=r" (rt) );
-+	PAD_ORI
-+	PAD_ORI
-+}
-+static void test_pstb_off0_R1 (void) {
-+  __asm__ __volatile__ ("pstb %0, -0x1f400+0(0), 1" :: "r" (rs) );
-+}
-+static void test_pstb_off8_R1 (void) {
-+  __asm__ __volatile__ ("pstb %0, -0x1f400+8(0), 1" :: "r" (rs) );
-+}
-+static void test_pstb_off16_R1 (void) {
-+  __asm__ __volatile__ ("pstb %0, -0x1f400+16(0), 1" :: "r" (rs) );
-+}
-+static void test_pstb_off32_R1 (void) {
-+  __asm__ __volatile__ ("pstb %0, -0x1f400+32(0), 1" :: "r" (rs) );
-+}
-+static void test_psth_off0_R1 (void) {
-+  __asm__ __volatile__ ("psth %0, -0x1f400+0(0), 1" :: "r" (rs) );
-+}
-+static void test_psth_off8_R1 (void) {
-+  __asm__ __volatile__ ("psth %0, -0x1f400+8(0), 1" :: "r" (rs) );
-+}
-+static void test_psth_off16_R1 (void) {
-+  __asm__ __volatile__ ("psth %0, -0x1f400+16(0), 1" :: "r" (rs) );
-+}
-+static void test_psth_off32_R1 (void) {
-+  __asm__ __volatile__ ("psth %0, -0x1f400+32(0), 1" :: "r" (rs) );
-+}
-+static void test_pstw_off0_R1 (void) {
-+  __asm__ __volatile__ ("pstw %0, -0x1f400+0(0), 1" :: "r" (rs) );
-+}
-+static void test_pstw_off8_R1 (void) {
-+  __asm__ __volatile__ ("pstw %0, -0x1f400+8(0), 1" :: "r" (rs) );
-+}
-+static void test_pstw_off16_R1 (void) {
-+  __asm__ __volatile__ ("pstw %0, -0x1f400+16(0), 1" :: "r" (rs) );
-+}
-+static void test_pstw_off32_R1 (void) {
-+  __asm__ __volatile__ ("pstw %0, -0x1f400+32(0), 1" :: "r" (rs) );
-+}
-+static void test_pstd_off0_R1 (void) {
-+  __asm__ __volatile__ ("pstd %0, -0x1f400+0(0), 1" :: "r" (rs) );
-+}
-+static void test_pstd_off8_R1 (void) {
-+  __asm__ __volatile__ ("pstd %0, -0x1f400+8(0), 1" :: "r" (rs) );
-+}
-+static void test_pstd_off16_R1 (void) {
-+  __asm__ __volatile__ ("pstd %0, -0x1f400+16(0), 1" :: "r" (rs) );
-+}
-+static void test_pstd_off32_R1 (void) {
-+  __asm__ __volatile__ ("pstd %0, -0x1f400+32(0), 1" :: "r" (rs) );
-+}
-+  /* For the paddi tests; although we can get close to a read/write target
-+     due to forcing where the .text and .bss sections are placed, there is
-+     still enough codegen variability that having a raw value in the exp
-+     file will not be determinative for these instructions.
-+     Thus, compromise and just ensure that the generated value is an
-+     address that lands within the reloc buffer, and use quasi magic
-+     eyecatcher values in the return to indicate success.  */
-+static void test_paddi_0_R1 (void) {
-+  __asm__ __volatile__ ("paddi %0, 0, 0+0, 1" : "=r" (rt)  );
-+  rt = rt - TEXT_BSS_DELTA;
-+  if (rt > pcrelative_buff_addr(0) &&
-+		  rt < pcrelative_buff_addr(RELOC_BUFFER_SIZE))
-+	  rt = 0xffff0000;
-+}
-+static void test_paddi_12_R1 (void) {
-+  __asm__ __volatile__ ("paddi %0, 0, 0+12, 1" : "=r" (rt)  );
-+  rt = rt - TEXT_BSS_DELTA;
-+  if (rt > pcrelative_buff_addr(0) &&
-+		  rt < pcrelative_buff_addr(RELOC_BUFFER_SIZE))
-+	  rt = 0xffff0012;
-+}
-+static void test_paddi_48_R1 (void) {
-+  __asm__ __volatile__ ("paddi %0, 0, 0+48, 1" : "=r" (rt)  );
-+  rt = rt - TEXT_BSS_DELTA;
-+  if (rt > pcrelative_buff_addr(0) &&
-+		  rt < pcrelative_buff_addr(RELOC_BUFFER_SIZE))
-+	  rt = 0xffff0048;
-+}
-+static void test_paddi_98_R1 (void) {
-+  __asm__ __volatile__ ("paddi %0, 0, 0+98, 1" : "=r" (rt) );
-+  rt = rt - TEXT_BSS_DELTA;
-+  if (rt > pcrelative_buff_addr(0) &&
-+		  rt < pcrelative_buff_addr(RELOC_BUFFER_SIZE))
-+	  rt = 0xffff0098;
-+}
-+static void test_plq_off0_R1 (void) {
-+	PAD_ORI
-+  __asm__ __volatile__ ("plq 26, +0(0), 1"  );
-+	PAD_ORI
-+}
-+static void test_plq_off8_R1 (void) {
-+	PAD_ORI
-+  __asm__ __volatile__ ("plq 26, +8(0), 1"  );
-+	PAD_ORI
-+}
-+static void test_plq_off16_R1 (void) {
-+	PAD_ORI
-+  __asm__ __volatile__ ("plq 26, +16(0), 1"  );
-+	PAD_ORI
-+}
-+static void test_plq_off32_R1 (void) {
-+	PAD_ORI
-+  __asm__ __volatile__ ("plq 26, +32(0), 1"  );
-+	PAD_ORI
-+}
-+static void test_plq_off48_R1 (void) {
-+	PAD_ORI
-+  __asm__ __volatile__ ("plq 26, +48(0), 1"  );
-+	PAD_ORI
-+}
-+static void test_plq_off64_R1 (void) {
-+	PAD_ORI
-+  __asm__ __volatile__ ("plq 26, +64(0), 1"  );
-+	PAD_ORI
-+	PAD_ORI
-+}
-+static void test_pstq_off0_R1 (void) {
-+  __asm__ __volatile__ ("pstq 24, -0x1f400+0(0), 1"  );
-+}
-+static void test_pstq_off8_R1 (void) {
-+  __asm__ __volatile__ ("pstq 24, -0x1f400+8(0), 1"  );
-+}
-+static void test_pstq_off16_R1 (void) {
-+  __asm__ __volatile__ ("pstq 24, -0x1f400+16(0), 1"  );
-+}
-+static void test_pstq_off32_R1 (void) {
-+  __asm__ __volatile__ ("pstq 24, -0x1f400+32(0), 1"  );
-+}
-+static void test_pstq_off64_R1 (void) {
-+  __asm__ __volatile__ ("pstq 24, -0x1f400+64(0), 1"  );
-+}
-+
-+static test_list_t testgroup_generic[] = {
-+  { &test_paddi_0_R1, "paddi 0_R1", "RT,RA,SI,R"}, /* bcwp */
-+  { &test_paddi_12_R1, "paddi 12_R1", "RT,RA,SI,R"}, /* bcwp */
-+  { &test_paddi_48_R1, "paddi 48_R1", "RT,RA,SI,R"}, /* bcwp */
-+  { &test_paddi_98_R1, "paddi 98_R1", "RT,RA,SI,R"}, /* bcwp */
-+  { &test_plbz_off0_R1, "plbz off0_R1", "RT,D(RA),R"}, /* bcwp */
-+  { &test_plbz_off8_R1, "plbz off8_R1", "RT,D(RA),R"}, /* bcwp */
-+  { &test_plbz_off16_R1, "plbz off16_R1", "RT,D(RA),R"}, /* bcwp */
-+  { &test_plbz_off32_R1, "plbz off32_R1", "RT,D(RA),R"}, /* bcwp */
-+  { &test_plbz_off64_R1, "plbz off64_R1", "RT,D(RA),R"}, /* bcwp */
-+  { &test_pld_off0_R1, "pld off0_R1", "RT,D(RA),R"}, /* bcwp */
-+  { &test_pld_off8_R1, "pld off8_R1", "RT,D(RA),R"}, /* bcwp */
-+  { &test_pld_off16_R1, "pld off16_R1", "RT,D(RA),R"}, /* bcwp */
-+  { &test_pld_off32_R1, "pld off32_R1", "RT,D(RA),R"}, /* bcwp */
-+  { &test_pld_off64_R1, "pld off64_R1", "RT,D(RA),R"}, /* bcwp */
-+  { &test_plha_off0_R1, "plha off0_R1", "RT,D(RA),R"}, /* bcwp */
-+  { &test_plha_off8_R1, "plha off8_R1", "RT,D(RA),R"}, /* bcwp */
-+  { &test_plha_off16_R1, "plha off16_R1", "RT,D(RA),R"}, /* bcwp */
-+  { &test_plha_off32_R1, "plha off32_R1", "RT,D(RA),R"}, /* bcwp */
-+  { &test_plha_off64_R1, "plha off64_R1", "RT,D(RA),R"}, /* bcwp */
-+  { &test_plhz_off0_R1, "plhz off0_R1", "RT,D(RA),R"}, /* bcwp */
-+  { &test_plhz_off8_R1, "plhz off8_R1", "RT,D(RA),R"}, /* bcwp */
-+  { &test_plhz_off16_R1, "plhz off16_R1", "RT,D(RA),R"}, /* bcwp */
-+  { &test_plhz_off32_R1, "plhz off32_R1", "RT,D(RA),R"}, /* bcwp */
-+  { &test_plhz_off64_R1, "plhz off64_R1", "RT,D(RA),R"}, /* bcwp */
-+  { &test_plq_off0_R1, "plq off0_R1", "RTp,D(RA),R"}, /* bcwp */
-+  { &test_plq_off8_R1, "plq off8_R1", "RTp,D(RA),R"}, /* bcwp */
-+  { &test_plq_off16_R1, "plq off16_R1", "RTp,D(RA),R"}, /* bcwp */
-+  { &test_plq_off32_R1, "plq off32_R1", "RTp,D(RA),R"}, /* bcwp */
-+  { &test_plq_off48_R1, "plq off48_R1", "RTp,D(RA),R"}, /* bcwp */
-+  { &test_plq_off64_R1, "plq off64_R1", "RTp,D(RA),R"}, /* bcwp */
-+  { &test_plwa_off0_R1, "plwa off0_R1", "RT,D(RA),R"}, /* bcwp */
-+  { &test_plwa_off8_R1, "plwa off8_R1", "RT,D(RA),R"}, /* bcwp */
-+  { &test_plwa_off16_R1, "plwa off16_R1", "RT,D(RA),R"}, /* bcwp */
-+  { &test_plwa_off32_R1, "plwa off32_R1", "RT,D(RA),R"}, /* bcwp */
-+  { &test_plwa_off64_R1, "plwa off64_R1", "RT,D(RA),R"}, /* bcwp */
-+  { &test_plwz_off0_R1, "plwz off0_R1", "RT,D(RA),R"}, /* bcwp */
-+  { &test_plwz_off8_R1, "plwz off8_R1", "RT,D(RA),R"}, /* bcwp */
-+  { &test_plwz_off16_R1, "plwz off16_R1", "RT,D(RA),R"}, /* bcwp */
-+  { &test_plwz_off32_R1, "plwz off32_R1", "RT,D(RA),R"}, /* bcwp */
-+  { &test_plwz_off64_R1, "plwz off64_R1", "RT,D(RA),R"}, /* bcwp */
-+  { &test_plxvp_off0_R1, "plxvp off0_R1", "XTp,D(RA),R"}, /* bcwp */
-+  { &test_plxvp_off8_R1, "plxvp off8_R1", "XTp,D(RA),R"}, /* bcwp */
-+  { &test_plxvp_off16_R1, "plxvp off16_R1", "XTp,D(RA),R"}, /* bcwp */
-+  { &test_plxvp_off24_R1, "plxvp off24_R1", "XTp,D(RA),R"}, /* bcwp */
-+  { &test_plxvp_off32_R1, "plxvp off32_R1", "XTp,D(RA),R"}, /* bcwp */
-+  { &test_pstb_off0_R1, "pstb off0_R1", "RS,D(RA),R"}, /* bcwp */
-+  { &test_pstb_off8_R1, "pstb off8_R1", "RS,D(RA),R"}, /* bcwp */
-+  { &test_pstb_off16_R1, "pstb off16_R1", "RS,D(RA),R"}, /* bcwp */
-+  { &test_pstb_off32_R1, "pstb off32_R1", "RS,D(RA),R"}, /* bcwp */
-+  { &test_pstd_off0_R1, "pstd off0_R1", "RS,D(RA),R"}, /* bcwp */
-+  { &test_pstd_off8_R1, "pstd off8_R1", "RS,D(RA),R"}, /* bcwp */
-+  { &test_pstd_off16_R1, "pstd off16_R1", "RS,D(RA),R"}, /* bcwp */
-+  { &test_pstd_off32_R1, "pstd off32_R1", "RS,D(RA),R"}, /* bcwp */
-+  { &test_psth_off0_R1, "psth off0_R1", "RS,D(RA),R"}, /* bcwp */
-+  { &test_psth_off8_R1, "psth off8_R1", "RS,D(RA),R"}, /* bcwp */
-+  { &test_psth_off16_R1, "psth off16_R1", "RS,D(RA),R"}, /* bcwp */
-+  { &test_psth_off32_R1, "psth off32_R1", "RS,D(RA),R"}, /* bcwp */
-+  { &test_pstq_off0_R1, "pstq off0_R1", "RSp,D(RA),R"}, /* bcwp */
-+  { &test_pstq_off8_R1, "pstq off8_R1", "RSp,D(RA),R"}, /* bcwp */
-+  { &test_pstq_off16_R1, "pstq off16_R1", "RSp,D(RA),R"}, /* bcwp */
-+  { &test_pstq_off32_R1, "pstq off32_R1", "RSp,D(RA),R"}, /* bcwp */
-+  { &test_pstq_off64_R1, "pstq off64_R1", "RSp,D(RA),R"}, /* bcwp */
-+  { &test_pstw_off0_R1, "pstw off0_R1", "RS,D(RA),R"}, /* bcwp */
-+  { &test_pstw_off8_R1, "pstw off8_R1", "RS,D(RA),R"}, /* bcwp */
-+  { &test_pstw_off16_R1, "pstw off16_R1", "RS,D(RA),R"}, /* bcwp */
-+  { &test_pstw_off32_R1, "pstw off32_R1", "RS,D(RA),R"}, /* bcwp */
-+	{ NULL, 	    NULL },
-+};
-+
-+/*  Allow skipping of tests. */
-+unsigned long test_count=0xffff;
-+unsigned long skip_count=0;
-+unsigned long setup_only=0;
-+
-+/*  Set up a setjmp/longjmp to gently handle our SIGILLs and SIGSEGVs.  */
-+static jmp_buf mybuf;
-+
-+/* This (testfunction_generic) is meant to handle all of the instruction
-+   variations.  The helpers set up the register and iterator values
-+   as is appropriate for the instruction being tested.  */
-+static void testfunction_generic (const char* instruction_name,
-+				  test_func_t test_function,
-+				  unsigned int ignore_flags,
-+				  char * cur_form) {
-+
-+   identify_form_components (instruction_name , cur_form);
-+   debug_show_form (instruction_name, cur_form);
-+   set_up_iterators ();
-+   debug_show_iter_ranges ();
-+   initialize_buffer (0);
-+   init_pcrelative_write_target ();
-+   debug_dump_buffer ();
-+
-+   for (vrai = a_start; vrai < a_iters ; vrai+=a_inc) {
-+      for (vrbi = b_start; vrbi < b_iters ; vrbi+=b_inc) {
-+	 for (vrci = c_start; vrci < c_iters ; vrci+=c_inc) {
-+	    for (vrmi = m_start; (vrmi < m_iters) ; vrmi+=m_inc) {
-+		CHECK_OVERRIDES
-+		debug_show_current_iteration ();
-+		// Be sure to initialize the target registers first.
-+		initialize_target_registers ();
-+		initialize_source_registers ();
-+		printf ("%s", instruction_name);
-+		print_register_header ();
-+		printf( " =>"); fflush (stdout);
-+		if (!setup_only) {
-+		  if (enable_setjmp) {
-+		   if ( setjmp ( mybuf ) ) {
-+		     printf("signal tripped. (FIXME)\n");
-+		     continue;
-+		   }
-+		  }
-+		  (*test_function) ();
-+		}
-+		print_register_footer ();
-+		print_result_buffer ();
-+		print_pcrelative_write_target ();
-+		printf ("\n");
-+	    }
-+	 }
-+      }
-+   }
-+}
-+
-+void mykillhandler ( int x ) { longjmp (mybuf, 1); }
-+void mysegvhandler ( int x ) { longjmp (mybuf, 1); }
-+
-+static void do_tests ( void )
-+{
-+   int groupcount;
-+   char * cur_form;
-+   test_group_t group_function = &testfunction_generic;
-+   test_list_t *tests = testgroup_generic;
-+
-+   struct sigaction kill_action, segv_action;
-+   struct sigaction old_kill_action, old_segv_action;
-+   if (enable_setjmp) {
-+      kill_action.sa_handler = mykillhandler;
-+      segv_action.sa_handler = mysegvhandler;
-+      sigemptyset ( &kill_action.sa_mask );
-+      sigemptyset ( &segv_action.sa_mask );
-+      kill_action.sa_flags = SA_NODEFER;
-+      segv_action.sa_flags = SA_NODEFER;
-+      sigaction ( SIGILL, &kill_action, &old_kill_action);
-+      sigaction ( SIGSEGV, &segv_action, &old_segv_action);
-+   }
-+
-+   for (groupcount = 0; tests[groupcount].name != NULL; groupcount++) {
-+	cur_form = strdup(tests[groupcount].form);
-+	current_test = tests[groupcount];
-+	identify_instruction_by_func_name (current_test.name);
-+	if (groupcount < skip_count) continue;
-+	if (verbose) printf("Test #%d ,", groupcount);
-+	if (verbose > 1) printf(" instruction %s (v=%d)", current_test.name, verbose);
-+	(*group_function) (current_test.name, current_test.func, 0, cur_form );
-+	printf ("\n");
-+	if (groupcount >= (skip_count+test_count)) break;
-+   }
-+   if (debug_show_labels) printf("\n");
-+   printf ("All done. Tested %d different instruction groups\n", groupcount);
-+}
-+
-+static void usage (void)
-+{
-+   fprintf(stderr,
-+      "Usage: test_isa_XXX [OPTIONS]\n"
-+      "\t-h: display this help and exit\n"
-+      "\t-v: increase verbosity\n"
-+      "\t-a <foo> : limit number of a-iterations to <foo>\n"
-+      "\t-b <foo> : limit number of b-iterations to <foo>\n"
-+      "\t-c <foo> : limit number of c-iterations to <foo>\n"
-+      "\t-n <foo> : limit to this number of tests.\n"
-+      "\t-r <foo>: run only test # <foo> \n"
-+      "\t\n"
-+      "\t-j :enable setjmp to recover from illegal insns. \n"
-+      "\t-m :(dev only?) lock VRM value to zero.\n"
-+      "\t-z :(dev only?) lock MC value to zero.\n"
-+      "\t-p :(dev only?) disable prefix instructions\n"
-+      "\t-s <foo>: skip <foo> tests \n"
-+      "\t-c <foo>: stop after running <foo> # of tests \n"
-+      "\t-f : Do the test setup but do not actually execute the test instruction. \n"
-+   );
-+}
-+
-+int main (int argc, char **argv)
-+{
-+   int c;
-+   while ((c = getopt(argc, argv, "dhjvmpfzs:a:b:c:n:r:")) != -1) {
-+      switch (c) {
-+	 case 'h':
-+	    usage();
-+	    return 0;
-+
-+	 case 'v':
-+	    verbose++;
-+	    break;
-+
-+	 /* Options related to limiting the test iterations.  */
-+	 case 'a':
-+	    a_limit=atoi (optarg);
-+	    printf ("limiting a-iters to %ld.\n", a_limit);
-+	    break;
-+	 case 'b':
-+	    b_limit=atoi (optarg);
-+	    printf ("limiting b-iters to %ld.\n", b_limit);
-+	    break;
-+	 case 'c':
-+	    c_limit=atoi (optarg);
-+	    printf ("limiting c-iters to %ld.\n", c_limit);
-+	    break;
-+	 case 'n': // run this number of tests.
-+	    test_count=atoi (optarg);
-+	    printf ("limiting to %ld tests\n", test_count);
-+	    break;
-+	 case 'r': // run just test #<foo>.
-+	    skip_count=atoi (optarg);
-+	    test_count=0;
-+	    if (verbose) printf("Running test number %ld\n", skip_count);
-+	    break;
-+	 case 's': // skip this number of tests.
-+	    skip_count=atoi (optarg);
-+	    printf ("skipping %ld tests\n", skip_count);
-+	    break;
-+
-+	 /* debug options.  */
-+	 case 'd':
-+	    dump_tables=1;
-+	    printf("DEBUG:dump_tables.\n");
-+	    break;
-+	 case 'f':
-+	    setup_only=1;
-+	    printf("DEBUG:setup_only.\n");
-+	    break;
-+	 case 'j':
-+	    enable_setjmp=1;
-+	    printf ("DEBUG:setjmp enabled.\n");
-+	    break;
-+	 case 'm':
-+	    vrm_override=1;
-+	    printf ("DEBUG:vrm override enabled.\n");
-+	    break;
-+	 case 'p':
-+	    prefix_override=1;
-+	    printf ("DEBUG:prefix override enabled.\n");
-+	    break;
-+	 case 'z':
-+	    mc_override=1;
-+	    printf ("DEBUG:MC override enabled.\n");
-+	    break;
-+	 default:
-+	    usage();
-+	    fprintf(stderr, "Unknown argument: '%c'\n", c);
-+	   }
-+	}
-+
-+	generic_prologue ();
-+	build_vsx_table ();
-+	build_args_table ();
-+	build_float_vsx_tables ();
-+
-+	if (dump_tables) {
-+	   dump_float_vsx_tables ();
-+	   dump_vsxargs ();
-+	}
-+
-+	do_tests ();
-+
-+	return 0;
-+}
-+
-+#else	   // HAS_ISA_3_1
-+int main (int argc, char **argv)
-+{
-+   printf("NO ISA 3.1 SUPPORT\n");
-+   return 0;
-+}
-+#endif
-diff --git a/none/tests/ppc64/test_isa_3_1_R1_RT.stderr.exp b/none/tests/ppc64/test_isa_3_1_R1_RT.stderr.exp
-new file mode 100644
-index 000000000..139597f9c
---- /dev/null
-+++ b/none/tests/ppc64/test_isa_3_1_R1_RT.stderr.exp
-@@ -0,0 +1,2 @@
-+
-+
-diff --git a/none/tests/ppc64/test_isa_3_1_R1_RT.stdout.exp b/none/tests/ppc64/test_isa_3_1_R1_RT.stdout.exp
-new file mode 100644
-index 000000000..87594748f
---- /dev/null
-+++ b/none/tests/ppc64/test_isa_3_1_R1_RT.stdout.exp
-@@ -0,0 +1,138 @@
-+paddi 0_R1 =>         ffff0000
-+
-+paddi 12_R1 =>         ffff0012
-+
-+paddi 48_R1 =>         ffff0048
-+
-+paddi 98_R1 =>         ffff0098
-+
-+plbz off0_R1 =>               1a
-+
-+plbz off8_R1 =>               1f
-+
-+plbz off16_R1 =>               1f
-+
-+plbz off32_R1 =>               1b
-+
-+plbz off64_R1 =>               1b
-+
-+pld off0_R1 => e740000004100000
-+
-+pld off8_R1 =>         4e800020
-+
-+pld off16_R1 => 6318001862f7001f
-+
-+pld off32_R1 => 639c001c637b001b
-+
-+pld off64_R1 => 639c001c637b001b
-+
-+plha off0_R1 =>               1a
-+
-+plha off8_R1 =>               1f
-+
-+plha off16_R1 =>               1f
-+
-+plha off32_R1 =>               1b
-+
-+plha off64_R1 =>               1b
-+
-+plhz off0_R1 =>               1a
-+
-+plhz off8_R1 =>               1f
-+
-+plhz off16_R1 =>               1f
-+
-+plhz off32_R1 =>               1b
-+
-+plhz off64_R1 =>               1b
-+
-+plq off0_R1 => e34000000410001a 62d6001662b5001f
-+
-+plq off8_R1 => 62d6001662b5001f 6318001862f7001f
-+
-+plq off16_R1 => 6318001862f7001f 635a001a6339001b
-+
-+plq off32_R1 => 639c001c637b001b         4e80003b
-+
-+plq off48_R1 =>               1a 62d6001662b5001f
-+
-+plq off64_R1 => 639c001c637b001b         4e80003b
-+
-+plwa off0_R1 =>          4100000
-+
-+plwa off8_R1 =>         4e800020
-+
-+plwa off16_R1 =>                0
-+
-+plwa off32_R1 =>         637b001b
-+
-+plwa off64_R1 =>         637b001b
-+
-+plwz off0_R1 =>          6100000
-+
-+plwz off8_R1 =>         4e800020
-+
-+plwz off16_R1 =>                0
-+
-+plwz off32_R1 =>         637b001b
-+
-+plwz off64_R1 =>         637b001b
-+
-+plxvp off0_R1 => 6318001862f70017 635a001a63390019 ea80000004100000 62d6001662b50015
-+
-+plxvp off8_R1 => 635a001a63390019 639c001c637b001b 62d6001662b50015 6318001862f70017
-+
-+plxvp off16_R1 => 639c001c637b001b 000000004e800020 6318001862f70017 635a001a63390019
-+
-+plxvp off24_R1 => 000000004e800020 0000000000000000 635a001a63390019 639c001c637b001b
-+
-+plxvp off32_R1 => 0000000000000000 62d6001662b50015 639c001c637b001b 000000004e800020
-+
-+pstb off0_R1 102030405060708 => 08              
-+
-+pstb off8_R1 102030405060708 => 08              
-+
-+pstb off16_R1 102030405060708 => 08              
-+
-+pstb off32_R1 102030405060708 => 08              
-+
-+pstd off0_R1 102030405060708 => 0102030405060708 
-+
-+pstd off8_R1 102030405060708 => 0102030405060708 
-+
-+pstd off16_R1 102030405060708 => 0102030405060708 
-+
-+pstd off32_R1 102030405060708 => 0102030405060708 
-+
-+psth off0_R1 102030405060708 => 0708      
-+
-+psth off8_R1 102030405060708 => 0708      
-+
-+psth off16_R1 102030405060708 => 0708      
-+
-+psth off32_R1 102030405060708 => 0708      
-+
-+pstq off0_R1 102030405060708 a5b4c3d2e1f00918 => 0102030405060708  a5b4c3d2e1f00918 
-+pstq off0_R1 102030405060708 a5b4c3d2e1f00918 => 0102030405060708  a5b4c3d2e1f00918 
-+
-+pstq off8_R1 102030405060708 a5b4c3d2e1f00918 => 0102030405060708  a5b4c3d2e1f00918 
-+pstq off8_R1 102030405060708 a5b4c3d2e1f00918 => 0102030405060708  a5b4c3d2e1f00918 
-+
-+pstq off16_R1 102030405060708 a5b4c3d2e1f00918 => 0102030405060708  a5b4c3d2e1f00918 
-+pstq off16_R1 102030405060708 a5b4c3d2e1f00918 => 0102030405060708  a5b4c3d2e1f00918 
-+
-+pstq off32_R1 102030405060708 a5b4c3d2e1f00918 => 0102030405060708  a5b4c3d2e1f00918 
-+pstq off32_R1 102030405060708 a5b4c3d2e1f00918 => 0102030405060708  a5b4c3d2e1f00918 
-+
-+pstq off64_R1 102030405060708 a5b4c3d2e1f00918 => 0102030405060708  a5b4c3d2e1f00918 
-+pstq off64_R1 102030405060708 a5b4c3d2e1f00918 => 0102030405060708  a5b4c3d2e1f00918 
-+
-+pstw off0_R1 102030405060708 => 05060708  
-+
-+pstw off8_R1 102030405060708 => 05060708  
-+
-+pstw off16_R1 102030405060708 => 05060708  
-+
-+pstw off32_R1 102030405060708 => 05060708  
-+
-+All done. Tested 66 different instruction groups
-diff --git a/none/tests/ppc64/test_isa_3_1_R1_RT.vgtest b/none/tests/ppc64/test_isa_3_1_R1_RT.vgtest
-new file mode 100644
-index 000000000..61d7f65a1
---- /dev/null
-+++ b/none/tests/ppc64/test_isa_3_1_R1_RT.vgtest
-@@ -0,0 +1,2 @@
-+prereq: ../../../tests/check_ppc64_auxv_cap arch_3_1
-+prog: test_isa_3_1_R1_RT
-diff --git a/none/tests/ppc64/test_isa_3_1_R1_XT.c b/none/tests/ppc64/test_isa_3_1_R1_XT.c
-new file mode 100644
-index 000000000..58885b8d3
---- /dev/null
-+++ b/none/tests/ppc64/test_isa_3_1_R1_XT.c
-@@ -0,0 +1,534 @@
-+/*
-+ * Valgrind testcase for PowerPC ISA 3.1
-+ *
-+ * Copyright (C) 2019-2020 Will Schmidt <will_schmidt@vnet.ibm.com>
-+ *
-+ * 64bit build:
-+ *    gcc -Winline -Wall -g -O -mregnames -maltivec -m64
-+ */
-+
-+/*
-+ *   This program is free software; you can redistribute it and/or
-+ *   modify it under the terms of the GNU General Public License as
-+ *   published by the Free Software Foundation; either version 2 of the
-+ *   License, or (at your option) any later version.
-+ *
-+ *   This program is distributed in the hope that it will be useful,
-+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
-+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-+ *   GNU General Public License for more details.
-+ *
-+ *   You should have received a copy of the GNU General Public License
-+ *   along with this program; if not, write to the Free Software
-+ *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-+ */
-+
-+#include <stdio.h>
-+#ifdef HAS_ISA_3_1
-+#include <stdint.h>
-+#include <assert.h>
-+#include <stdlib.h>
-+#include <string.h>
-+#include <unistd.h>
-+#include <altivec.h>
-+#include <malloc.h>
-+
-+#include <string.h>
-+#include <signal.h>
-+#include <setjmp.h>
-+
-+/* Condition Register fields.
-+   These are used to capture the condition register values immediately after
-+   the instruction under test is executed. This is done to help prevent other
-+   test overhead (switch statements, result compares, etc) from disturbing
-+   the test case results.  */
-+unsigned long current_cr;
-+unsigned long current_fpscr;
-+
-+struct test_list_t current_test;
-+
-+#include "isa_3_1_helpers.h"
-+static void test_pstxvp_off0_R1 (void) {
-+  __asm__ __volatile__ ("pstxvp 20, -0x1f400+0(0),1");
-+}
-+static void test_pstxvp_off16_R1 (void) {
-+  __asm__ __volatile__ ("pstxvp 20, -0x1f400+16(0),1");
-+}
-+static void test_pstxvp_off32_R1 (void) {
-+  __asm__ __volatile__ ("pstxvp 20, -0x1f400+32(0),1");
-+}
-+static void test_pstxvp_off48_R1 (void) {
-+  __asm__ __volatile__ ("pstxvp 20, -0x1f400+48(0),1");
-+}
-+static void test_plfd_64_R1 (void) {
-+  __asm__ __volatile__ ("plfd 28, +64(0), 1");
-+	PAD_ORI
-+	PAD_ORI
-+}
-+static void test_plfd_32_R1 (void) {
-+  __asm__ __volatile__ ("plfd 28, +32(0), 1");
-+	PAD_ORI
-+}
-+static void test_plfd_16_R1 (void) {
-+  __asm__ __volatile__ ("plfd 28, +16(0), 1");
-+	PAD_ORI
-+}
-+static void test_plfd_8_R1 (void) {
-+  __asm__ __volatile__ ("plfd 28, +8(0), 1");
-+	PAD_ORI
-+}
-+static void test_plfd_4_R1 (void) {
-+  __asm__ __volatile__ ("plfd 28, +4(0), 1");
-+	PAD_ORI
-+}
-+static void test_plfd_0_R1 (void) {
-+  __asm__ __volatile__ ("plfd 28, +0(0), 1");
-+	PAD_ORI
-+}
-+static void test_plfs_64_R1 (void) {
-+  __asm__ __volatile__ ("plfs 28, +64(0), 1");
-+	PAD_ORI
-+	PAD_ORI
-+}
-+static void test_plfs_32_R1 (void) {
-+  __asm__ __volatile__ ("plfs 28, +32(0), 1");
-+	PAD_ORI
-+}
-+static void test_plfs_16_R1 (void) {
-+  __asm__ __volatile__ ("plfs 28, +16(0), 1");
-+	PAD_ORI
-+}
-+static void test_plfs_8_R1 (void) {
-+  __asm__ __volatile__ ("plfs 28, +8(0), 1");
-+	PAD_ORI
-+}
-+static void test_plfs_4_R1 (void) {
-+  __asm__ __volatile__ ("plfs 28, +4(0), 1");
-+	PAD_ORI
-+}
-+static void test_plfs_0_R1 (void) {
-+  __asm__ __volatile__ ("plfs 28, +0(0), 1");
-+	PAD_ORI
-+}
-+static void test_pstfd_32_R1 (void) {
-+  __asm__ __volatile__ ("pstfd 26, -0x1f400+32(0), 1");
-+}
-+static void test_pstfd_16_R1 (void) {
-+  __asm__ __volatile__ ("pstfd 26, -0x1f400+16(0), 1");
-+}
-+static void test_pstfd_8_R1 (void) {
-+  __asm__ __volatile__ ("pstfd 26, -0x1f400+8(0), 1");
-+}
-+static void test_pstfd_4_R1 (void) {
-+  __asm__ __volatile__ ("pstfd 26, -0x1f400+4(0), 1");
-+}
-+static void test_pstfd_0_R1 (void) {
-+  __asm__ __volatile__ ("pstfd 26, -0x1f400+0(0), 1");
-+}
-+static void test_pstfs_32_R1 (void) {
-+  __asm__ __volatile__ ("pstfs 26, -0x1f400+32(0), 1");
-+}
-+static void test_pstfs_16_R1 (void) {
-+  __asm__ __volatile__ ("pstfs 26, -0x1f400+16(0), 1");
-+}
-+static void test_pstfs_8_R1 (void) {
-+  __asm__ __volatile__ ("pstfs 26, -0x1f400+8(0), 1");
-+}
-+static void test_pstfs_4_R1 (void) {
-+  __asm__ __volatile__ ("pstfs 26, -0x1f400+4(0), 1");
-+}
-+static void test_pstfs_0_R1 (void) {
-+  __asm__ __volatile__ ("pstfs 26, -0x1f400+0(0), 1");
-+}
-+static void test_plxsd_64_R1 (void) {
-+  __asm__ __volatile__ ("plxsd %0, +64(0), 1" : "=v" (vrt) );
-+	PAD_ORI
-+	PAD_ORI
-+}
-+static void test_plxsd_32_R1 (void) {
-+  __asm__ __volatile__ (".align 2 ; plxsd %0, +32(0), 1" : "=v" (vrt) );
-+	PAD_ORI
-+}
-+static void test_plxsd_16_R1 (void) {
-+  __asm__ __volatile__ ("plxsd %0, +16(0), 1; pnop;pnop;pnop; " : "=v" (vrt) );
-+	PAD_ORI
-+}
-+static void test_plxsd_8_R1 (void) {
-+  __asm__ __volatile__ ("plxsd %0, +8(0), 1; pnop;pnop;pnop; " : "=v" (vrt) );
-+	PAD_ORI
-+}
-+static void test_plxsd_4_R1 (void) {
-+  __asm__ __volatile__ ("plxsd %0, +4(0), 1; pnop;pnop;pnop; "  : "=v" (vrt) );
-+	PAD_ORI
-+}
-+static void test_plxsd_0_R1 (void) {
-+  __asm__ __volatile__ ("plxsd %0, +0(0), 1; pnop;pnop;pnop; " : "=v" (vrt) );
-+	PAD_ORI
-+}
-+static void test_plxssp_64_R1 (void) {
-+  __asm__ __volatile__ ("plxssp %0, +64(0), 1; pnop;pnop;pnop; " : "=v" (vrt) );
-+	PAD_ORI
-+	PAD_ORI
-+}
-+static void test_plxssp_32_R1 (void) {
-+  __asm__ __volatile__ ("plxssp %0, +32(0), 1; pnop; " : "=v" (vrt) );
-+	PAD_ORI
-+}
-+static void test_plxssp_16_R1 (void) {
-+  __asm__ __volatile__ ("plxssp %0, +16(0), 1; pnop;pnop;pnop; " : "=v" (vrt) );
-+	PAD_ORI
-+}
-+static void test_plxssp_8_R1 (void) {
-+  __asm__ __volatile__ ("plxssp %0, +8(0), 1; pnop;pnop;pnop; " : "=v" (vrt) );
-+	PAD_ORI
-+}
-+static void test_plxssp_4_R1 (void) {
-+  __asm__ __volatile__ ("plxssp %0, +4(0), 1; pnop;pnop;pnop; " : "=v" (vrt) );
-+	PAD_ORI
-+}
-+static void test_plxssp_0_R1 (void) {
-+  __asm__ __volatile__ ("plxssp %0, +0(0), 1; pnop;pnop;pnop; " : "=v" (vrt) );
-+	PAD_ORI
-+}
-+/* Follow the short-range plxv instructions with nop in order to
-+   pad out subsequent instructions.  When written there are found
-+   to be fluctuations in the instructions to store the result back
-+   into the target variable.  (pla,pstxv...).
-+   */
-+static void test_plxv_16_R1 (void) {
-+  __asm__ __volatile__ ("plxv %x0, +16(0), 1; pnop;pnop;pnop;" : "=wa" (vec_xt) );
-+	PAD_ORI
-+}
-+static void test_plxv_8_R1 (void) {
-+  __asm__ __volatile__ ("plxv %x0, +8(0), 1; pnop;pnop;pnop;" : "=wa" (vec_xt) );
-+	PAD_ORI
-+}
-+static void test_plxv_4_R1 (void) {
-+  __asm__ __volatile__ ("plxv %x0, +4(0), 1; pnop;pnop;pnop;" : "=wa" (vec_xt) );
-+	PAD_ORI
-+}
-+static void test_plxv_0_R1 (void) {
-+  __asm__ __volatile__ ("plxv %x0, +0(0), 1; pnop;pnop;pnop; " : "=wa" (vec_xt) );
-+	PAD_ORI
-+}
-+static void test_pstxsd_64_R1 (void) {
-+  __asm__ __volatile__ (".align 2 ; pstxsd 22, -0x1f400+64(0), 1" );
-+}
-+static void test_pstxsd_32_R1 (void) {
-+  __asm__ __volatile__ (".align 2 ; pstxsd 22, -0x1f400+32(0), 1" );
-+}
-+static void test_pstxsd_16_R1 (void) {
-+  __asm__ __volatile__ (".align 2 ; pstxsd 22, -0x1f400+16(0), 1" );
-+}
-+static void test_pstxsd_8_R1 (void) {
-+  __asm__ __volatile__ (".align 2 ; pstxsd 22, -0x1f400+8(0), 1" );
-+}
-+static void test_pstxsd_4_R1 (void) {
-+  __asm__ __volatile__ (".align 2 ; pstxsd 22, -0x1f400+4(0), 1"  );
-+}
-+static void test_pstxsd_0_R1 (void) {
-+  __asm__ __volatile__ (".align 2 ; pstxsd 22, -0x1f400+0(0), 1" );
-+}
-+static void test_pstxssp_64_R1 (void) {
-+  __asm__ __volatile__ ("pstxssp 22, -0x1f400+64(0), 1" );
-+}
-+static void test_pstxssp_32_R1 (void) {
-+  __asm__ __volatile__ ("pstxssp 22, -0x1f400+32(0), 1");
-+}
-+static void test_pstxssp_16_R1 (void) {
-+  __asm__ __volatile__ ("pstxssp 22, -0x1f400+16(0), 1");
-+}
-+static void test_pstxssp_8_R1 (void) {
-+  __asm__ __volatile__ ("pstxssp 22, -0x1f400+8(0), 1");
-+}
-+static void test_pstxssp_4_R1 (void) {
-+  __asm__ __volatile__ ("pstxssp 22, -0x1f400+4(0), 1");
-+}
-+static void test_pstxssp_0_R1 (void) {
-+  __asm__ __volatile__ ("pstxssp 22, -0x1f400+0(0), 1");
-+}
-+static void test_pstxv_16_R1 (void) {
-+  __asm__ __volatile__ ("pstxv %x0, -0x1f400+16(0), 1" :: "wa" (vec_xs));
-+}
-+static void test_pstxv_8_R1 (void) {
-+  __asm__ __volatile__ ("pstxv %x0, -0x1f400+8(0), 1" :: "wa" (vec_xs));
-+}
-+static void test_pstxv_4_R1 (void) {
-+  __asm__ __volatile__ ("pstxv %x0, -0x1f400+4(0), 1" :: "wa" (vec_xs));
-+}
-+static void test_pstxv_0_R1 (void) {
-+  __asm__ __volatile__ ("pstxv %x0, -0x1f400+0(0), 1" :: "wa" (vec_xs));
-+}
-+
-+static test_list_t testgroup_generic[] = {
-+  { &test_plfd_0_R1, "plfd 0_R1", "FRT,D(RA),R"}, /* bcwp */
-+  { &test_plfd_4_R1, "plfd 4_R1", "FRT,D(RA),R"}, /* bcwp */
-+  { &test_plfd_8_R1, "plfd 8_R1", "FRT,D(RA),R"}, /* bcwp */
-+  { &test_plfd_16_R1, "plfd 16_R1", "FRT,D(RA),R"}, /* bcwp */
-+  { &test_plfd_32_R1, "plfd 32_R1", "FRT,D(RA),R"}, /* bcwp */
-+  { &test_plfd_64_R1, "plfd 64_R1", "FRT,D(RA),R"}, /* bcwp */
-+  { &test_plfs_0_R1, "plfs 0_R1", "FRT,D(RA),R"}, /* bcwp */
-+  { &test_plfs_4_R1, "plfs 4_R1", "FRT,D(RA),R"}, /* bcwp */
-+  { &test_plfs_8_R1, "plfs 8_R1", "FRT,D(RA),R"}, /* bcwp */
-+  { &test_plfs_16_R1, "plfs 16_R1", "FRT,D(RA),R"}, /* bcwp */
-+  { &test_plfs_32_R1, "plfs 32_R1", "FRT,D(RA),R"}, /* bcwp */
-+  { &test_plfs_64_R1, "plfs 64_R1", "FRT,D(RA),R"}, /* bcwp */
-+  { &test_plxsd_0_R1, "plxsd 0_R1", "VRT,D(RA),R", 0b00110000}, /* bcwp */
-+  { &test_plxsd_4_R1, "plxsd 4_R1", "VRT,D(RA),R", 0b00110000}, /* bcwp */
-+  { &test_plxsd_8_R1, "plxsd 8_R1", "VRT,D(RA),R", 0b00110000}, /* bcwp */
-+  { &test_plxsd_16_R1, "plxsd 16_R1", "VRT,D(RA),R", 0b00110000}, /* bcwp */
-+  { &test_plxsd_32_R1, "plxsd 32_R1", "VRT,D(RA),R", 0b00110000}, /* bcwp */
-+  { &test_plxsd_64_R1, "plxsd 64_R1", "VRT,D(RA),R", 0b00110000}, /* bcwp */
-+  { &test_plxssp_0_R1, "plxssp 0_R1", "VRT,D(RA),R", 0b00001111}, /* bcwp */
-+  { &test_plxssp_4_R1, "plxssp 4_R1", "VRT,D(RA),R", 0b00001111}, /* bcwp */
-+  { &test_plxssp_8_R1, "plxssp 8_R1", "VRT,D(RA),R", 0b00001111}, /* bcwp */
-+  { &test_plxssp_16_R1, "plxssp 16_R1", "VRT,D(RA),R", 0b00001111}, /* bcwp */
-+  { &test_plxssp_32_R1, "plxssp 32_R1", "VRT,D(RA),R", 0b00001111}, /* bcwp */
-+  { &test_plxssp_64_R1, "plxssp 64_R1", "VRT,D(RA),R", 0b00001111}, /* bcwp */
-+  { &test_plxv_0_R1, "plxv 0_R1", "XT,D(RA),R"}, /* bcwp */
-+  { &test_plxv_4_R1, "plxv 4_R1", "XT,D(RA),R"}, /* bcwp */
-+  { &test_plxv_8_R1, "plxv 8_R1", "XT,D(RA),R"}, /* bcwp */
-+  { &test_plxv_16_R1, "plxv 16_R1", "XT,D(RA),R"}, /* bcwp */
-+  { &test_pstfd_0_R1, "pstfd 0_R1", "FRS,D(RA),R", 0b00110000}, /* bcwp */
-+  { &test_pstfd_4_R1, "pstfd 4_R1", "FRS,D(RA),R", 0b00110000}, /* bcwp */
-+  { &test_pstfd_8_R1, "pstfd 8_R1", "FRS,D(RA),R", 0b00110000}, /* bcwp */
-+  { &test_pstfd_16_R1, "pstfd 16_R1", "FRS,D(RA),R", 0b00110000}, /* bcwp */
-+  { &test_pstfd_32_R1, "pstfd 32_R1", "FRS,D(RA),R", 0b00110000}, /* bcwp */
-+  { &test_pstfs_0_R1, "pstfs 0_R1", "FRS,D(RA),R", 0b00001111}, /* bcwp */
-+  { &test_pstfs_4_R1, "pstfs 4_R1", "FRS,D(RA),R", 0b00001111}, /* bcwp */
-+  { &test_pstfs_8_R1, "pstfs 8_R1", "FRS,D(RA),R", 0b00001111}, /* bcwp */
-+  { &test_pstfs_16_R1, "pstfs 16_R1", "FRS,D(RA),R", 0b00001111}, /* bcwp */
-+  { &test_pstfs_32_R1, "pstfs 32_R1", "FRS,D(RA),R", 0b00001111}, /* bcwp */
-+  { &test_pstxsd_0_R1, "pstxsd 0_R1", "VRS,D(RA),R"}, /* bcwp */
-+  { &test_pstxsd_4_R1, "pstxsd 4_R1", "VRS,D(RA),R"}, /* bcwp */
-+  { &test_pstxsd_8_R1, "pstxsd 8_R1", "VRS,D(RA),R"}, /* bcwp */
-+  { &test_pstxsd_16_R1, "pstxsd 16_R1", "VRS,D(RA),R"}, /* bcwp */
-+  { &test_pstxsd_32_R1, "pstxsd 32_R1", "VRS,D(RA),R"}, /* bcwp */
-+  { &test_pstxsd_64_R1, "pstxsd 64_R1", "VRS,D(RA),R"}, /* bcwp */
-+  { &test_pstxssp_0_R1, "pstxssp 0_R1", "VRS,D(RA),R"}, /* bcwp */
-+  { &test_pstxssp_4_R1, "pstxssp 4_R1", "VRS,D(RA),R"}, /* bcwp */
-+  { &test_pstxssp_8_R1, "pstxssp 8_R1", "VRS,D(RA),R"}, /* bcwp */
-+  { &test_pstxssp_16_R1, "pstxssp 16_R1", "VRS,D(RA),R"}, /* bcwp */
-+  { &test_pstxssp_32_R1, "pstxssp 32_R1", "VRS,D(RA),R"}, /* bcwp */
-+  { &test_pstxssp_64_R1, "pstxssp 64_R1", "VRS,D(RA),R"}, /* bcwp */
-+  { &test_pstxvp_off0_R1, "pstxvp off0_R1", "XSp,D(RA),R"}, /* bcwp */
-+  { &test_pstxvp_off16_R1, "pstxvp off16_R1", "XSp,D(RA),R"}, /* bcwp */
-+  { &test_pstxvp_off32_R1, "pstxvp off32_R1", "XSp,D(RA),R"}, /* bcwp */
-+  { &test_pstxvp_off48_R1, "pstxvp off48_R1", "XSp,D(RA),R"}, /* bcwp */
-+  { &test_pstxv_0_R1, "pstxv 0_R1", "XS,D(RA),R"}, /* bcwp */
-+  { &test_pstxv_4_R1, "pstxv 4_R1", "XS,D(RA),R"}, /* bcwp */
-+  { &test_pstxv_8_R1, "pstxv 8_R1", "XS,D(RA),R"}, /* bcwp */
-+  { &test_pstxv_16_R1, "pstxv 16_R1", "XS,D(RA),R"}, /* bcwp */
-+	{ NULL, 	    NULL },
-+};
-+
-+/*  Allow skipping of tests. */
-+unsigned long test_count=0xffff;
-+unsigned long skip_count=0;
-+unsigned long setup_only=0;
-+
-+/*  Set up a setjmp/longjmp to gently handle our SIGILLs and SIGSEGVs.  */
-+static jmp_buf mybuf;
-+
-+/* This (testfunction_generic) is meant to handle all of the instruction
-+   variations.  The helpers set up the register and iterator values
-+   as is appropriate for the instruction being tested.  */
-+static void testfunction_generic (const char* instruction_name,
-+				  test_func_t test_function,
-+				  unsigned int ignore_flags,
-+				  char * cur_form) {
-+
-+   identify_form_components (instruction_name , cur_form);
-+   debug_show_form (instruction_name, cur_form);
-+   set_up_iterators ();
-+   debug_show_iter_ranges ();
-+   initialize_buffer (0);
-+   init_pcrelative_write_target ();
-+   debug_dump_buffer ();
-+
-+   for (vrai = a_start; vrai < a_iters ; vrai+=a_inc) {
-+      for (vrbi = b_start; vrbi < b_iters ; vrbi+=b_inc) {
-+	 for (vrci = c_start; vrci < c_iters ; vrci+=c_inc) {
-+	    for (vrmi = m_start; (vrmi < m_iters) ; vrmi+=m_inc) {
-+		CHECK_OVERRIDES
-+		debug_show_current_iteration ();
-+		// Be sure to initialize the target registers first.
-+		initialize_target_registers ();
-+		initialize_source_registers ();
-+		vec_xa[0]=0x1234;
-+		vec_xa[1]=0x4567;
-+		printf ("%s", instruction_name);
-+		print_register_header ();
-+		printf( " =>"); fflush (stdout);
-+		if (!setup_only) {
-+		  if (enable_setjmp) {
-+		   if ( setjmp ( mybuf ) ) {
-+		     printf("signal tripped. (FIXME)\n");
-+		     continue;
-+		   }
-+		  }
-+		  (*test_function) ();
-+		}
-+		print_register_footer ();
-+		print_result_buffer ();
-+		print_pcrelative_write_target ();
-+		printf ("\n");
-+	    }
-+	 }
-+      }
-+   }
-+}
-+
-+void mykillhandler ( int x ) { longjmp (mybuf, 1); }
-+void mysegvhandler ( int x ) { longjmp (mybuf, 1); }
-+
-+static void do_tests ( void )
-+{
-+   int groupcount;
-+   char * cur_form;
-+   test_group_t group_function = &testfunction_generic;
-+   test_list_t *tests = testgroup_generic;
-+
-+   struct sigaction kill_action, segv_action;
-+   struct sigaction old_kill_action, old_segv_action;
-+   if (enable_setjmp) {
-+      kill_action.sa_handler = mykillhandler;
-+      segv_action.sa_handler = mysegvhandler;
-+      sigemptyset ( &kill_action.sa_mask );
-+      sigemptyset ( &segv_action.sa_mask );
-+      kill_action.sa_flags = SA_NODEFER;
-+      segv_action.sa_flags = SA_NODEFER;
-+      sigaction ( SIGILL, &kill_action, &old_kill_action);
-+      sigaction ( SIGSEGV, &segv_action, &old_segv_action);
-+   }
-+
-+   for (groupcount = 0; tests[groupcount].name != NULL; groupcount++) {
-+	cur_form = strdup(tests[groupcount].form);
-+	current_test = tests[groupcount];
-+	identify_instruction_by_func_name (current_test.name);
-+	if (groupcount < skip_count) continue;
-+	if (verbose) printf("Test #%d ,", groupcount);
-+	if (verbose > 1) printf(" instruction %s (v=%d)", current_test.name, verbose);
-+	(*group_function) (current_test.name, current_test.func, 0, cur_form );
-+	printf ("\n");
-+	if (groupcount >= (skip_count+test_count)) break;
-+   }
-+   if (debug_show_labels) printf("\n");
-+   printf ("All done. Tested %d different instruction groups\n", groupcount);
-+}
-+
-+static void usage (void)
-+{
-+   fprintf(stderr,
-+      "Usage: test_isa_XXX [OPTIONS]\n"
-+      "\t-h: display this help and exit\n"
-+      "\t-v: increase verbosity\n"
-+      "\t-a <foo> : limit number of a-iterations to <foo>\n"
-+      "\t-b <foo> : limit number of b-iterations to <foo>\n"
-+      "\t-c <foo> : limit number of c-iterations to <foo>\n"
-+      "\t-n <foo> : limit to this number of tests.\n"
-+      "\t-r <foo>: run only test # <foo> \n"
-+      "\t\n"
-+      "\t-j :enable setjmp to recover from illegal insns. \n"
-+      "\t-m :(dev only?) lock VRM value to zero.\n"
-+      "\t-z :(dev only?) lock MC value to zero.\n"
-+      "\t-p :(dev only?) disable prefix instructions\n"
-+      "\t-s <foo>: skip <foo> tests \n"
-+      "\t-c <foo>: stop after running <foo> # of tests \n"
-+      "\t-f : Do the test setup but do not actually execute the test instruction. \n"
-+   );
-+}
-+
-+int main (int argc, char **argv)
-+{
-+   int c;
-+   while ((c = getopt(argc, argv, "dhjvmpfzs:a:b:c:n:r:")) != -1) {
-+      switch (c) {
-+	 case 'h':
-+	    usage();
-+	    return 0;
-+
-+	 case 'v':
-+	    verbose++;
-+	    break;
-+
-+	 /* Options related to limiting the test iterations.  */
-+	 case 'a':
-+	    a_limit=atoi (optarg);
-+	    printf ("limiting a-iters to %ld.\n", a_limit);
-+	    break;
-+	 case 'b':
-+	    b_limit=atoi (optarg);
-+	    printf ("limiting b-iters to %ld.\n", b_limit);
-+	    break;
-+	 case 'c':
-+	    c_limit=atoi (optarg);
-+	    printf ("limiting c-iters to %ld.\n", c_limit);
-+	    break;
-+	 case 'n': // run this number of tests.
-+	    test_count=atoi (optarg);
-+	    printf ("limiting to %ld tests\n", test_count);
-+	    break;
-+	 case 'r': // run just test #<foo>.
-+	    skip_count=atoi (optarg);
-+	    test_count=0;
-+	    if (verbose) printf("Running test number %ld\n", skip_count);
-+	    break;
-+	 case 's': // skip this number of tests.
-+	    skip_count=atoi (optarg);
-+	    printf ("skipping %ld tests\n", skip_count);
-+	    break;
-+
-+	 /* debug options.  */
-+	 case 'd':
-+	    dump_tables=1;
-+	    printf("DEBUG:dump_tables.\n");
-+	    break;
-+	 case 'f':
-+	    setup_only=1;
-+	    printf("DEBUG:setup_only.\n");
-+	    break;
-+	 case 'j':
-+	    enable_setjmp=1;
-+	    printf ("DEBUG:setjmp enabled.\n");
-+	    break;
-+	 case 'm':
-+	    vrm_override=1;
-+	    printf ("DEBUG:vrm override enabled.\n");
-+	    break;
-+	 case 'p':
-+	    prefix_override=1;
-+	    printf ("DEBUG:prefix override enabled.\n");
-+	    break;
-+	 case 'z':
-+	    mc_override=1;
-+	    printf ("DEBUG:MC override enabled.\n");
-+	    break;
-+	 default:
-+	    usage();
-+	    fprintf(stderr, "Unknown argument: '%c'\n", c);
-+	   }
-+	}
-+
-+	generic_prologue ();
-+	build_vsx_table ();
-+	build_args_table ();
-+	build_float_vsx_tables ();
-+
-+	if (dump_tables) {
-+	   dump_float_vsx_tables ();
-+	   dump_vsxargs ();
-+	}
-+
-+	do_tests ();
-+
-+	return 0;
-+}
-+
-+#else	   // HAS_ISA_3_1
-+int main (int argc, char **argv)
-+{
-+   printf("NO ISA 3.1 SUPPORT\n");
-+   return 0;
-+}
-+#endif
-diff --git a/none/tests/ppc64/test_isa_3_1_R1_XT.stderr.exp b/none/tests/ppc64/test_isa_3_1_R1_XT.stderr.exp
-new file mode 100644
-index 000000000..139597f9c
---- /dev/null
-+++ b/none/tests/ppc64/test_isa_3_1_R1_XT.stderr.exp
-@@ -0,0 +1,2 @@
-+
-+
-diff --git a/none/tests/ppc64/test_isa_3_1_R1_XT.stdout.exp b/none/tests/ppc64/test_isa_3_1_R1_XT.stdout.exp
-new file mode 100644
-index 000000000..48d591f4d
---- /dev/null
-+++ b/none/tests/ppc64/test_isa_3_1_R1_XT.stdout.exp
-@@ -0,0 +1,127 @@
-+plfd 0_R1 =>_ -4.903986e+55 _  cb80000006100000, 0 
-+
-+plfd 4_R1 =>_ 3.095878e+167 _  62b50015cb800004, 0 
-+
-+plfd 8_R1 =>_ 1.297320e+168 _  62d6001662b50015, 0 
-+
-+plfd 16_R1 =>_ 2.264413e+169 _  6318001862f70017, 0 
-+
-+plfd 32_R1 =>_ 6.763045e+171 _  639c001c637b001b, 0 
-+
-+plfd 64_R1 =>_ 6.763045e+171 _  639c001c637b001b, 0 
-+
-+plfs 0_R1 =>_ 2.708339e-35 _  38c2000000000000, 0 
-+
-+plfs 4_R1 =>_ -2.560001e+02 _  c070000080000000, 0 
-+
-+plfs 8_R1 =>_ 1.669433e+21 _  4456a002a0000000, 0 
-+
-+plfs 16_R1 =>_ 2.278176e+21 _  445ee002e0000000, 0 
-+
-+plfs 32_R1 =>_ 4.630140e+21 _  446f600360000000, 0 
-+
-+plfs 64_R1 =>_ 4.630140e+21 _  446f600360000000, 0 
-+
-+plxsd 0_R1 => a800000004100000,0000000000000000 -5.07588375e-116            +Zero
-+
-+plxsd 4_R1 =>  7000000a8000004,0000000000000000  5.77662562e-275            +Zero
-+
-+plxsd 8_R1 =>  700000060000000,0000000000000000  5.77662407e-275            +Zero
-+
-+plxsd 16_R1 =>          7000000,0000000000000000             +Den            +Zero
-+
-+plxsd 32_R1 => 6339001963180018,0000000000000000  9.43505226e+169            +Zero
-+
-+plxsd 64_R1 => 6339001963180018,0000000000000000  9.43505226e+169            +Zero
-+
-+plxssp 0_R1 => 3882000000000000,0000000000000000     6.19888e-05           +Zero           +Zero           +Zero
-+
-+plxssp 4_R1 => bd80000080000000,0000000000000000    -6.25000e-02           -Zero           +Zero           +Zero
-+
-+plxssp 8_R1 => 38e0000000000000,0000000000000000     1.06812e-04           +Zero           +Zero           +Zero
-+
-+plxssp 16_R1 => 38e0000000000000,0000000000000000     1.06812e-04           +Zero           +Zero           +Zero
-+
-+plxssp 32_R1 => 445ac002c0000000,0000000000000000     8.75000e+02    -2.00000e+00           +Zero           +Zero
-+
-+plxssp 64_R1 => 446b400340000000,0000000000000000     9.41000e+02     2.00000e+00           +Zero           +Zero
-+
-+plxv 0_R1 => c800000004100000          7000000
-+
-+plxv 4_R1 =>  7000000c8000004  700000000000000
-+
-+plxv 8_R1 =>          7000000          7000000
-+
-+plxv 16_R1 =>          7000000          7000000
-+
-+pstfd 0_R1 43dfe000003fe000 43eff000000ff000 => e000003fe00043df
-+pstfd 0_R1 43eff000000ff000 43efefffffcff000 => f000000ff00043ef
-+
-+pstfd 4_R1 43dfe000003fe000 43eff000000ff000 =>     e000003f e00043df    
-+pstfd 4_R1 43eff000000ff000 43efefffffcff000 =>     f000000f f00043ef    
-+
-+pstfd 8_R1 43dfe000003fe000 43eff000000ff000 => e000003fe00043df
-+pstfd 8_R1 43eff000000ff000 43efefffffcff000 => f000000ff00043ef
-+
-+pstfd 16_R1 43dfe000003fe000 43eff000000ff000 => e000003fe00043df
-+pstfd 16_R1 43eff000000ff000 43efefffffcff000 => f000000ff00043ef
-+
-+pstfd 32_R1 43dfe000003fe000 43eff000000ff000 => e000003fe00043df
-+pstfd 32_R1 43eff000000ff000 43efefffffcff000 => f000000ff00043ef
-+
-+pstfs 0_R1 000000005eff0000 000000005f7f8000 => 00005eff    
-+pstfs 0_R1 000000005f7f8000 000000005f7f8000 => 80005f7f    
-+
-+pstfs 4_R1 000000005eff0000 000000005f7f8000 =>     00005eff
-+pstfs 4_R1 000000005f7f8000 000000005f7f8000 =>     80005f7f
-+
-+pstfs 8_R1 000000005eff0000 000000005f7f8000 => 00005eff    
-+pstfs 8_R1 000000005f7f8000 000000005f7f8000 => 80005f7f    
-+
-+pstfs 16_R1 000000005eff0000 000000005f7f8000 => 00005eff    
-+pstfs 16_R1 000000005f7f8000 000000005f7f8000 => 80005f7f    
-+
-+pstfs 32_R1 000000005eff0000 000000005f7f8000 => 00005eff    
-+pstfs 32_R1 000000005f7f8000 000000005f7f8000 => 80005f7f    
-+
-+pstxsd 0_R1 => 0000000000000000
-+
-+pstxsd 4_R1 =>     00000000 00000000    
-+
-+pstxsd 8_R1 => 0000000000000000
-+
-+pstxsd 16_R1 => 0000000000000000
-+
-+pstxsd 32_R1 => 0000000000000000
-+
-+pstxsd 64_R1 => 0000000000000000
-+
-+pstxssp 0_R1 => 00000000    
-+
-+pstxssp 4_R1 =>     00000000
-+
-+pstxssp 8_R1 => 00000000    
-+
-+pstxssp 16_R1 => 00000000    
-+
-+pstxssp 32_R1 => 00000000    
-+
-+pstxssp 64_R1 => 00000000    
-+
-+pstxvp off0_R1 0180055e0180077e 0080000e8080000e ff7ffffe7f7ffffe ff8000007f800000 => fffe7f7ffffeff7f 00007f800000ff80 077e0180055e0180 000e8080000e0080
-+
-+pstxvp off16_R1 0180055e0180077e 0080000e8080000e ff7ffffe7f7ffffe ff8000007f800000 => fffe7f7ffffeff7f 00007f800000ff80 077e0180055e0180 000e8080000e0080
-+
-+pstxvp off32_R1 0180055e0180077e 0080000e8080000e ff7ffffe7f7ffffe ff8000007f800000 => fffe7f7ffffeff7f 00007f800000ff80 077e0180055e0180 000e8080000e0080
-+
-+pstxvp off48_R1 0180055e0180077e 0080000e8080000e ff7ffffe7f7ffffe ff8000007f800000 => fffe7f7ffffeff7f 00007f800000ff80 077e0180055e0180 000e8080000e0080
-+
-+pstxv 0_R1 ff7ffffe7f7ffffe,ff8000007f800000 => fffe7f7ffffeff7f 00007f800000ff80
-+
-+pstxv 4_R1 ff7ffffe7f7ffffe,ff8000007f800000 => fffe7f7ffffeff7f 00007f800000ff80
-+
-+pstxv 8_R1 ff7ffffe7f7ffffe,ff8000007f800000 => fffe7f7ffffeff7f 00007f800000ff80
-+
-+pstxv 16_R1 ff7ffffe7f7ffffe,ff8000007f800000 =>     fffe7f7f fffeff7f00007f80 0000ff80    
-+
-+All done. Tested 58 different instruction groups
-diff --git a/none/tests/ppc64/test_isa_3_1_R1_XT.vgtest b/none/tests/ppc64/test_isa_3_1_R1_XT.vgtest
-new file mode 100644
-index 000000000..7331aafad
---- /dev/null
-+++ b/none/tests/ppc64/test_isa_3_1_R1_XT.vgtest
-@@ -0,0 +1,2 @@
-+prereq: ../../../tests/check_ppc64_auxv_cap arch_3_1
-+prog: test_isa_3_1_R1_XT
-diff --git a/none/tests/ppc64/test_isa_3_1_common.c b/none/tests/ppc64/test_isa_3_1_common.c
-index 7c3dc6f00..b3320277b 100644
---- a/none/tests/ppc64/test_isa_3_1_common.c
-+++ b/none/tests/ppc64/test_isa_3_1_common.c
-@@ -134,11 +134,13 @@ bool uses_acc_vsrs;
- bool uses_pmsk;
- bool uses_buffer;  // Buffer related.
- bool uses_load_buffer, uses_store_buffer, uses_any_buffer;
-+bool updates_byte, updates_halfword, updates_word; // output helpers.
- bool uses_quad;
- unsigned long output_mask;  // Output field special handling.
- bool instruction_is_sp, instruction_is_sp_estimate;
- bool instruction_is_dp, instruction_is_dp_estimate;
- bool instruction_is_b16;
-+bool instruction_is_relative;
- 
- unsigned long long min (unsigned long long a, unsigned long long b) {
-    if ( a < b )
-@@ -236,6 +238,18 @@ void identify_form_components (const char *instruction_name,
-       (strncmp (instruction_name, "pmst", 4) == 0) ||
-       (strncmp (instruction_name, "pst", 3) == 0) ||
-       (strncmp (instruction_name, "st", 2) == 0));
-+   updates_byte = (
-+      (strncmp (instruction_name, "pstb", 4) == 0) );
-+   updates_halfword = (
-+      (strncmp (instruction_name, "psth", 4) == 0) ||
-+       (strncmp (instruction_name, "pstfs", 4) == 0) ||
-+       (strncmp (instruction_name, "pstxsd", 4) == 0) ||
-+       (strncmp (instruction_name, "pstxssp", 4) == 0) ||
-+       (strncmp (instruction_name, "pstxv", 4) == 0) ||
-+       (strncmp (instruction_name, "psfs", 4) == 0) );
-+   updates_word = (
-+      (strncmp (instruction_name, "pstw", 4) == 0) );
-+
-    uses_any_buffer = (strstr (cur_form, "(RA)") != NULL);
-    uses_buffer = uses_any_buffer||uses_load_buffer||uses_store_buffer;
- 
-@@ -268,6 +282,15 @@ void identify_form_components (const char *instruction_name,
-    instruction_is_b16 =         ( current_test.mask & B16_MASK        );
- }
- 
-+/* Parse the provided function name to set assorted values.
-+   In particular, set an indicator when the instruction test has
-+   indicated it will run with R==1 that indicates it is a PC-relative
-+   instruction.  Those tests should all have "_R1" as part of
-+   the function name.  */
-+void identify_instruction_by_func_name(const char * function_name) {
-+   instruction_is_relative = ( (strstr (function_name, "R1") != NULL));
-+}
-+
- void display_form_components (char * cur_form) {
-    printf (" %s\n", cur_form);
-    printf ("Instruction form elements: ");
-@@ -288,7 +311,7 @@ void display_form_components (char * cur_form) {
-    if (has_frbp) printf ("frbp ");
-    if (has_frs)  printf ("frs ");
-    if (has_frsp) printf ("frsp ");
--   if (has_frt)  printf ("frt ");
-+   if (has_frt)  printf ("frt%s ",(instruction_is_relative)?"-raw":"");
-    if (has_frtp) printf ("frtp ");
-    if (has_xa)   printf ("xa ");
-    if (has_xap)  printf ("xap ");
-@@ -298,6 +321,7 @@ void display_form_components (char * cur_form) {
-    if (has_xsp)  printf ("xsp ");
-    if (has_xt)   printf ("xt ");
-    if (has_xtp)  printf ("xtp ");
-+   if (instruction_is_relative)  printf ("R==1 ");
-    if (uses_acc_src) printf ("AS ");
-    if (uses_acc_dest) printf ("AT ");
-    printf ("\n");
-@@ -991,6 +1015,107 @@ if (debug_show_values) printf (" buffer:");
-   }
- }
- 
-+/* **** Reloc Buffer **************************************** */
-+/* Create a large buffer to be the destination for pc-relative
-+ * writes.  This test is built with linker hints in order
-+ * to ensure our buffer, stored in the .bss section, is at a
-+ * mostly known offset from the instructions being exercised,
-+ * so a hardcoded offset from the PC (pc-relative) will be
-+ * on-target.
-+ * If there are significant reworks to the code, the bss or
-+ * text sections, or the offsets used may need to change.
-+ *
-+ * The linker hints are specifically -Tbss and -Ttext.
-+ * gcc foo.c test_isa_3_1_common.c -I../../../   -Wl,-Tbss 0x20000 -Wl,-Ttext 0x40000
-+ */
-+ /* RELOC_BUFFER_SIZE is defined to 0x1000 in isa_3_1_helpers.h  */
-+#define RELOC_BUFFER_PATTERN 0x0001000100010001
-+volatile unsigned long long pcrelative_write_target[RELOC_BUFFER_SIZE];
-+
-+/* Initialize the buffer to known values. */
-+void init_pcrelative_write_target() {
-+       int i;
-+       for (i=0;i<RELOC_BUFFER_SIZE;i++)
-+               pcrelative_write_target[i]=i*RELOC_BUFFER_PATTERN;
-+}
-+
-+/* Review the pcrelative_write_target buffer; and print any
-+   elements that vary from the initialized value.
-+   Exclude portions of the output as appropriate if the current test
-+   is marked for byte,halfword,word.  */
-+void print_pcrelative_write_target() {
-+  int i,z,rshift;
-+  unsigned long long curr_value;
-+  unsigned long long ref_value;
-+  unsigned long long curr_token,init_token;
-+  for (i=0;i<RELOC_BUFFER_SIZE;i++) {
-+    ref_value=i*RELOC_BUFFER_PATTERN;
-+    curr_value = pcrelative_write_target[i];
-+    if (ref_value != curr_value) {
-+      printf(" ");
-+      if (verbose)
-+	printf("delta found: %d %llx -> %llx\n",i,ref_value,curr_value);
-+      if (updates_byte) {
-+	for (z=0;z<8;z++) {
-+	  rshift=z*8;
-+	  if (verbose) printf("z:%d ",z);
-+	  init_token = (ref_value>>rshift) & 0xff;
-+	  curr_token = (curr_value>>rshift) & 0xff;
-+	  if (verbose)
-+	    printf("wms byte:: %llx -> %llx \n",init_token,curr_token);
-+	  if (init_token == curr_token && (updates_byte||updates_halfword||updates_word) ) {
-+	     printf("%2s","  ");
-+	  } else {
-+	    printf("%02llx",curr_token);
-+	  }
-+        }
-+      }
-+      else if (updates_halfword) {
-+	for (z=0;z<4;z++) {
-+	  rshift=z*16;
-+	  if (verbose) printf("z:%d ",z);
-+	  init_token = (ref_value>>rshift) & 0xffff;
-+	  curr_token = (curr_value>>rshift) & 0xffff;
-+	  if (verbose)
-+	    printf("wms half:: %llx -> %llx \n",init_token,curr_token);
-+	  if (init_token == curr_token) {
-+	     printf("%2s","  ");
-+	  } else {
-+	    printf("%04llx",curr_token);
-+	  }
-+        }
-+      }
-+      else if (updates_word) {
-+	for (z=0;z<2;z++) {
-+	  rshift=z*32;
-+	  if (verbose) printf("z:%d ",z);
-+	  init_token = (ref_value>>rshift) & 0xffffffff;
-+	  curr_token = (curr_value>>rshift) & 0xffffffff;
-+	  if (verbose)
-+	    printf("wms word:: %llx -> %llx \n",init_token,curr_token);
-+	  if (init_token == curr_token ) {
-+	     printf("%2s","  ");
-+	  } else {
-+	    printf("%08llx",curr_token);
-+	  }
-+        }
-+      }
-+      else {
-+	printf("%016llx ",curr_value);
-+      }
-+    }
-+  }
-+}
-+
-+/* Helper that returns the address of the pcrelative_write_target buffer.
-+   Due to variances in where the sections land in memory, this value is
-+   used to normalize the results.  (see paddi tests for usage).   */
-+unsigned long long pcrelative_buff_addr(int x) {
-+   /* Return the base address of the array.  The base address will be
-+      a function of the code load address.  */
-+   return (unsigned long long) &pcrelative_write_target[x];
-+}
-+
- void print_undefined () {
-    if (debug_show_values)
-       printf (" [Undef]");
-@@ -1339,7 +1464,7 @@ void print_frt () {
-       /* If the result is a dfp128 value, the dfp128 value is
-          contained in the frt, frtp values which are split across
-          a pair of VSRs.  */
--      if (uses_dfp128_output) {
-+      if (!instruction_is_relative && uses_dfp128_output) {
- 	 if (verbose) print_vsr (28);
- 	 if (verbose) print_vsr (29);
- 	 value1 = get_vsrhd_vs28 ();
-@@ -1347,7 +1472,12 @@ void print_frt () {
- 	 dissect_dfp128_float (value1, value3);
-       } else {
- 	 if (debug_show_raw_values) generic_print_float_as_hex (frt);
--	 printf (" %e", frt);
-+	 if (instruction_is_relative) {
-+	    printf ("_ %e _ ", frt);
-+	    print_vsr (28);
-+	 } else {
-+		printf (" %e", frt);
-+	  }
- 	 if (has_frtp) {
- 	    if (debug_show_raw_values) generic_print_float_as_hex (frtp);
- 	    printf (" %e", frtp);
-@@ -1652,7 +1782,15 @@ void print_all() {
- void print_register_header () {
-   post_test = 0;
-   if (debug_show_all_regs) print_all();
--  if (has_ra) print_ra ();
-+
-+  if (has_ra) {
-+	  /* Suppress the print of RA if the instruction has
-+	     R==1, since the ra value must be zero for the
-+	     instruction to be valid.  */
-+	  if (!instruction_is_relative)
-+		 print_ra();
-+  }
-+
-   if (has_rb) print_rb ();
-   if (has_rc) print_rc ();
-   if (has_rs) print_rs();
-@@ -1894,6 +2032,11 @@ void set_up_iterators () {
-    } else {
- 	   a_start=0; b_start=0; c_start=0; m_start=0;
-    }
-+   /* Special casing for R==1 tests. */
-+   if (instruction_is_relative) {
-+	  a_iters = 1;
-+	  m_start=3; m_iters=4;
-+   }
-    if ((has_vra+has_vrb+has_vrc+has_vrm+has_xa+has_xb+uses_MC > 2) &&
-        (!debug_enable_all_iters)) {
-       /* Instruction tests using multiple fields will generate a lot of
-@@ -2196,15 +2339,12 @@ void initialize_source_registers () {
- 	  vrb[0] = vsxargs[ (vrbi  ) % isr_modulo];
- 	  vrb[1] = vsxargs[ (vrbi+1) % isr_modulo];
-    }
-- 
--  if (has_xa) { 
--    vec_xa[0] = vsxargs[ (vrai  ) % isr_modulo];
--    vec_xa[1] = vsxargs[ (vrai+1) % isr_modulo];
--  }
--  if (has_xb) {
--    vec_xb[0] = vsxargs[ (vrbi  ) % isr_modulo];
--    vec_xb[1] = vsxargs[ (vrbi+1) % isr_modulo];
--  }
-+
-+   if (instruction_is_relative) {
-+     /* for pstxsd and friends using R=1 */
-+     vec_xa[0] = vsxargs[ (vrai+2  ) % isr_modulo];
-+     vec_xa[1] = vsxargs[ (vrai+3  ) % isr_modulo];
-+   }
- 
-    // xap 'shares' with the second half of an xa-pair.
-   if (has_xap ) {
diff --git a/valgrind-3.18.1-ppc-pstq.patch b/valgrind-3.18.1-ppc-pstq.patch
deleted file mode 100644
index 2e23d18..0000000
--- a/valgrind-3.18.1-ppc-pstq.patch
+++ /dev/null
@@ -1,47 +0,0 @@
-commit ae8c6de01417023e78763de145b1c0e6ddd87277
-Author: Carl Love <cel@us.ibm.com>
-Date:   Wed Oct 20 20:40:13 2021 +0000
-
-    Fix for the prefixed stq instruction in PC relative mode.
-    
-    The pstq instruction for R=1, was not using the correct effective address.
-    The EA_hi and EA_lo should have been based on the value of EA as calculated
-    by the function calculate_prefix_EA.  Unfortuanely, the EA_hi and EA_lo
-    addresses were still using the previous code (not PC relative) to calculate
-    the address from the contants of RA plus the offset.
-
-diff --git a/VEX/priv/guest_ppc_toIR.c b/VEX/priv/guest_ppc_toIR.c
-index 8afd77490..543fa9574 100644
---- a/VEX/priv/guest_ppc_toIR.c
-+++ b/VEX/priv/guest_ppc_toIR.c
-@@ -9838,23 +9838,24 @@ static Bool dis_int_store_ds_prefix ( UInt prefix,
-             if (host_endness == VexEndnessBE) {
- 
-                /* upper 64-bits */
--               assign( EA_hi, ea_rAor0_simm( rA_addr, immediate_val ) );
-+               assign( EA_hi, mkexpr(EA));
- 
-                /* lower 64-bits */
--               assign( EA_lo, ea_rAor0_simm( rA_addr, immediate_val+8 ) );
-+               assign( EA_lo, binop(Iop_Add64, mkexpr(EA), mkU64(8)));
-+
-             } else {
-                /* upper 64-bits */
--               assign( EA_hi, ea_rAor0_simm( rA_addr, immediate_val+8 ) );
-+               assign( EA_hi, binop(Iop_Add64, mkexpr(EA), mkU64(8)));
- 
-                /* lower 64-bits */
--               assign( EA_lo, ea_rAor0_simm( rA_addr, immediate_val ) );
-+               assign( EA_lo, mkexpr(EA));
-             }
-          } else {
-             /* upper half of upper 64-bits */
--            assign( EA_hi, ea_rAor0_simm( rA_addr, immediate_val+4 ) );
-+            assign( EA_hi, binop(Iop_Add32, mkexpr(EA), mkU32(4)));
- 
-             /* lower half of upper 64-bits */
--            assign( EA_lo, ea_rAor0_simm( rA_addr, immediate_val+12 ) );
-+            assign( EA_lo, binop(Iop_Add32, mkexpr(EA), mkU32(12)));
-          }
- 
-          /* Note, the store order for stq instruction is the same for BE
diff --git a/valgrind-3.18.1-ppc64-cmov.patch b/valgrind-3.18.1-ppc64-cmov.patch
deleted file mode 100644
index 9e5ee62..0000000
--- a/valgrind-3.18.1-ppc64-cmov.patch
+++ /dev/null
@@ -1,33 +0,0 @@
-commit fb6a77ed78876083e8ba4c2f92384db5c2e41be8
-Author: Mark Wielaard <mark@klomp.org>
-Date:   Tue Feb 8 16:36:08 2022 +0100
-
-    ppc64 --track-origins=yes failure because of bad cmov addHRegUse
-    
-    For Pin_CMov getRegUsage_PPCInstr called addHRegUse for the dst
-    register with HRmWrite, but since this is a conditional move the
-    register could be both read and written (read + write = modify).
-    This matches the dst of Pin_FpCMov and Pin_AvCMov.
-    
-    In a very rare case, and only with --track-origins=yes, this
-    could cause bad code generation.
-    
-    This is slightly amazing, this code is from 2005 and as far as
-    I know we never seen an issue with --track-origins=yes on power
-    before. And I have been unable to come up simple reproducer.
-    
-    https://bugs.kde.org/show_bug.cgi?id=449672
-
-diff --git a/VEX/priv/host_ppc_defs.c b/VEX/priv/host_ppc_defs.c
-index 3ae0f6e08..4222b4786 100644
---- a/VEX/priv/host_ppc_defs.c
-+++ b/VEX/priv/host_ppc_defs.c
-@@ -2590,7 +2590,7 @@ void getRegUsage_PPCInstr ( HRegUsage* u, const PPCInstr* i, Bool mode64 )
-       return;
-    case Pin_CMov:
-       addRegUsage_PPCRI(u,  i->Pin.CMov.src);
--      addHRegUse(u, HRmWrite, i->Pin.CMov.dst);
-+      addHRegUse(u, HRmModify, i->Pin.CMov.dst);
-       return;
-    case Pin_Load:
-       addRegUsage_PPCAMode(u, i->Pin.Load.src);
diff --git a/valgrind-3.18.1-ppc64-lxsibzx-lxsihzx.patch b/valgrind-3.18.1-ppc64-lxsibzx-lxsihzx.patch
deleted file mode 100644
index bb36c80..0000000
--- a/valgrind-3.18.1-ppc64-lxsibzx-lxsihzx.patch
+++ /dev/null
@@ -1,60 +0,0 @@
-commit 6e08ee95f7f1b1c3fd434fa380cc5b2cc3e3f7c7
-Author: Carl Love <cel@us.ibm.com>
-Date:   Fri Oct 29 16:30:33 2021 -0500
-
-    Bug 444571 - PPC, fix the lxsibzx and lxsihzx so they only load their respective sized data.
-    
-    The lxsibzx was doing a 64-bit load.  The result was initializing
-    additional bytes in the register that should not have been initialized.
-    The memcheck/tests/linux/dlclose_leak test detected the issue.  The
-    code generation uses lxsibzx and stxsibx with -mcpu=power9.  Previously
-    the lbz and stb instructions were generated.
-    
-    The same issue was noted and fixed with the lxsihzx instruction.  The
-    memcheck/tests/linux/badrw test now passes as well.
-    
-    https://bugs.kde.org/show_bug.cgi?id=444571
-
-diff --git a/VEX/priv/guest_ppc_toIR.c b/VEX/priv/guest_ppc_toIR.c
-index d90d566ed..8afd77490 100644
---- a/VEX/priv/guest_ppc_toIR.c
-+++ b/VEX/priv/guest_ppc_toIR.c
-@@ -25359,19 +25359,17 @@ dis_vx_load ( UInt prefix, UInt theInstr )
- 
-       else
-          irx_addr = mkexpr( EA );
--
--      byte = load( Ity_I64, irx_addr );
-+      /* byte load */
-+      byte = load( Ity_I8, irx_addr );
-       putVSReg( XT, binop( Iop_64HLtoV128,
--                            binop( Iop_And64,
--                                   byte,
--                                   mkU64( 0xFF ) ),
-+                           unop( Iop_8Uto64, byte ),
-                            mkU64( 0 ) ) );
-       break;
-    }
- 
-    case 0x32D: // lxsihzx
-    {
--      IRExpr *byte;
-+      IRExpr *hword;
-       IRExpr* irx_addr;
- 
-       DIP("lxsihzx %u,r%u,r%u\n", (UInt)XT, rA_addr, rB_addr);
-@@ -25382,11 +25380,10 @@ dis_vx_load ( UInt prefix, UInt theInstr )
-       else
-          irx_addr = mkexpr( EA );
- 
--      byte = load( Ity_I64, irx_addr );
-+      hword = load( Ity_I16, irx_addr );
-       putVSReg( XT, binop( Iop_64HLtoV128,
--                            binop( Iop_And64,
--                                   byte,
--                                   mkU64( 0xFFFF ) ),
-+                            unop( Iop_16Uto64,
-+                                  hword ),
-                            mkU64( 0 ) ) );
-       break;
-    }
diff --git a/valgrind-3.18.1-rseq-enosys.patch b/valgrind-3.18.1-rseq-enosys.patch
deleted file mode 100644
index 40664c3..0000000
--- a/valgrind-3.18.1-rseq-enosys.patch
+++ /dev/null
@@ -1,180 +0,0 @@
-commit 67e7b20eb256baec225b3d7df1f03d731bf5e939
-Author: Mark Wielaard <mark@klomp.org>
-Date:   Fri Dec 10 17:41:59 2021 +0100
-
-    Implement linux rseq syscall as ENOSYS
-    
-    This implements rseq for amd64, arm, arm64, ppc32, ppc64,
-    s390x and x86 linux as ENOSYS (without warning).
-    
-    glibc will start using rseq to accelerate sched_getcpu, if
-    available. This would cause a warning from valgrind every
-    time a new thread is started.
-    
-    Real rseq (restartable sequences) support is pretty hard, so
-    for now just explicitly return ENOSYS (just like we do for clone3).
-    
-    https://sourceware.org/pipermail/libc-alpha/2021-December/133656.html
-
-diff --git a/coregrind/m_syswrap/syswrap-amd64-linux.c b/coregrind/m_syswrap/syswrap-amd64-linux.c
-index 5062324a1..18b25f80a 100644
---- a/coregrind/m_syswrap/syswrap-amd64-linux.c
-+++ b/coregrind/m_syswrap/syswrap-amd64-linux.c
-@@ -862,6 +862,8 @@ static SyscallTableEntry syscall_table[] = {
- 
-    LINXY(__NR_statx,             sys_statx),             // 332
- 
-+   GENX_(__NR_rseq,              sys_ni_syscall),        // 334
-+
-    LINX_(__NR_membarrier,        sys_membarrier),        // 324
- 
-    LINX_(__NR_copy_file_range,   sys_copy_file_range),   // 326
-diff --git a/coregrind/m_syswrap/syswrap-arm-linux.c b/coregrind/m_syswrap/syswrap-arm-linux.c
-index 556dd844b..d583cef0c 100644
---- a/coregrind/m_syswrap/syswrap-arm-linux.c
-+++ b/coregrind/m_syswrap/syswrap-arm-linux.c
-@@ -1024,6 +1024,7 @@ static SyscallTableEntry syscall_main_table[] = {
-    LINX_(__NR_pwritev2,          sys_pwritev2),         // 393
- 
-    LINXY(__NR_statx,             sys_statx),            // 397
-+   GENX_(__NR_rseq,              sys_ni_syscall),       // 398
- 
-    LINXY(__NR_clock_gettime64,   sys_clock_gettime64),  // 403
-    LINX_(__NR_clock_settime64,   sys_clock_settime64),  // 404
-diff --git a/coregrind/m_syswrap/syswrap-arm64-linux.c b/coregrind/m_syswrap/syswrap-arm64-linux.c
-index b87107727..2066a38ea 100644
---- a/coregrind/m_syswrap/syswrap-arm64-linux.c
-+++ b/coregrind/m_syswrap/syswrap-arm64-linux.c
-@@ -823,8 +823,9 @@ static SyscallTableEntry syscall_main_table[] = {
-    //   (__NR_pkey_mprotect,     sys_ni_syscall),        // 288
-    //   (__NR_pkey_alloc,        sys_ni_syscall),        // 289
-    //   (__NR_pkey_free,         sys_ni_syscall),        // 290
-+   LINXY(__NR_statx,             sys_statx),             // 291
- 
--   LINXY(__NR_statx,             sys_statx),             // 397
-+   GENX_(__NR_rseq,              sys_ni_syscall),        // 293
- 
-    LINXY(__NR_io_uring_setup,    sys_io_uring_setup),    // 425
-    LINXY(__NR_io_uring_enter,    sys_io_uring_enter),    // 426
-diff --git a/coregrind/m_syswrap/syswrap-ppc32-linux.c b/coregrind/m_syswrap/syswrap-ppc32-linux.c
-index 6263ab845..637b2504e 100644
---- a/coregrind/m_syswrap/syswrap-ppc32-linux.c
-+++ b/coregrind/m_syswrap/syswrap-ppc32-linux.c
-@@ -1028,6 +1028,8 @@ static SyscallTableEntry syscall_table[] = {
- 
-    LINXY(__NR_statx,             sys_statx),            // 383
- 
-+   GENX_(__NR_rseq,              sys_ni_syscall),       // 387
-+
-    LINXY(__NR_clock_gettime64,   sys_clock_gettime64),  // 403
-    LINX_(__NR_clock_settime64,   sys_clock_settime64),  // 404
- 
-diff --git a/coregrind/m_syswrap/syswrap-ppc64-linux.c b/coregrind/m_syswrap/syswrap-ppc64-linux.c
-index a26b41c32..93956d3cc 100644
---- a/coregrind/m_syswrap/syswrap-ppc64-linux.c
-+++ b/coregrind/m_syswrap/syswrap-ppc64-linux.c
-@@ -1019,6 +1019,8 @@ static SyscallTableEntry syscall_table[] = {
- 
-    LINXY(__NR_statx,             sys_statx),            // 383
- 
-+   GENX_(__NR_rseq,              sys_ni_syscall),       // 387
-+
-    LINXY(__NR_io_uring_setup,    sys_io_uring_setup),    // 425
-    LINXY(__NR_io_uring_enter,    sys_io_uring_enter),    // 426
-    LINXY(__NR_io_uring_register, sys_io_uring_register), // 427
-diff --git a/coregrind/m_syswrap/syswrap-s390x-linux.c b/coregrind/m_syswrap/syswrap-s390x-linux.c
-index 5c9209859..73f9684c4 100644
---- a/coregrind/m_syswrap/syswrap-s390x-linux.c
-+++ b/coregrind/m_syswrap/syswrap-s390x-linux.c
-@@ -860,6 +860,8 @@ static SyscallTableEntry syscall_table[] = {
- 
-    LINXY(__NR_statx, sys_statx),                                      // 379
- 
-+   GENX_(__NR_rseq, sys_ni_syscall),                                  // 381
-+
-    LINXY(__NR_io_uring_setup, sys_io_uring_setup),                    // 425
-    LINXY(__NR_io_uring_enter, sys_io_uring_enter),                    // 426
-    LINXY(__NR_io_uring_register, sys_io_uring_register),              // 427
-diff --git a/coregrind/m_syswrap/syswrap-x86-linux.c b/coregrind/m_syswrap/syswrap-x86-linux.c
-index 1d8f45d33..8662ff501 100644
---- a/coregrind/m_syswrap/syswrap-x86-linux.c
-+++ b/coregrind/m_syswrap/syswrap-x86-linux.c
-@@ -1619,6 +1619,8 @@ static SyscallTableEntry syscall_table[] = {
-    /* Explicitly not supported on i386 yet. */
-    GENX_(__NR_arch_prctl,        sys_ni_syscall),       // 384
- 
-+   GENX_(__NR_rseq,              sys_ni_syscall),       // 386
-+
-    LINXY(__NR_clock_gettime64,   sys_clock_gettime64),  // 403
-    LINX_(__NR_clock_settime64,   sys_clock_settime64),  // 404
- 
-diff --git a/include/vki/vki-scnums-arm-linux.h b/include/vki/vki-scnums-arm-linux.h
-index ff560e19d..485db8b26 100644
---- a/include/vki/vki-scnums-arm-linux.h
-+++ b/include/vki/vki-scnums-arm-linux.h
-@@ -432,6 +432,7 @@
- #define __NR_pkey_alloc                 395
- #define __NR_pkey_free                  396
- #define __NR_statx                      397
-+#define __NR_rseq                       398
- 
- 
- 
-diff --git a/include/vki/vki-scnums-arm64-linux.h b/include/vki/vki-scnums-arm64-linux.h
-index 9aa3b2b5f..acdfb39c6 100644
---- a/include/vki/vki-scnums-arm64-linux.h
-+++ b/include/vki/vki-scnums-arm64-linux.h
-@@ -323,9 +323,11 @@
- #define __NR_pkey_alloc 289
- #define __NR_pkey_free 290
- #define __NR_statx 291
-+#define __NR_io_pgetevents 291
-+#define __NR_rseq 293
- 
- #undef __NR_syscalls
--#define __NR_syscalls 292
-+#define __NR_syscalls 294
- 
- ///*
- // * All syscalls below here should go away really,
-diff --git a/include/vki/vki-scnums-ppc32-linux.h b/include/vki/vki-scnums-ppc32-linux.h
-index 6987ad941..08fa77df0 100644
---- a/include/vki/vki-scnums-ppc32-linux.h
-+++ b/include/vki/vki-scnums-ppc32-linux.h
-@@ -415,6 +415,7 @@
- #define __NR_pkey_alloc		384
- #define __NR_pkey_free		385
- #define __NR_pkey_mprotect	386
-+#define __NR_rseq		387
- 
- #endif /* __VKI_SCNUMS_PPC32_LINUX_H */
- 
-diff --git a/include/vki/vki-scnums-ppc64-linux.h b/include/vki/vki-scnums-ppc64-linux.h
-index 6827964fd..a76fa6d32 100644
---- a/include/vki/vki-scnums-ppc64-linux.h
-+++ b/include/vki/vki-scnums-ppc64-linux.h
-@@ -407,6 +407,7 @@
- #define __NR_pkey_alloc		384
- #define __NR_pkey_free		385
- #define __NR_pkey_mprotect	386
-+#define __NR_rseq		387
- 
- #endif /* __VKI_SCNUMS_PPC64_LINUX_H */
- 
-diff --git a/include/vki/vki-scnums-s390x-linux.h b/include/vki/vki-scnums-s390x-linux.h
-index 6487e20c9..869c04584 100644
---- a/include/vki/vki-scnums-s390x-linux.h
-+++ b/include/vki/vki-scnums-s390x-linux.h
-@@ -342,8 +342,11 @@
- #define __NR_s390_guarded_storage	378
- #define __NR_statx			379
- #define __NR_s390_sthyi			380
-+#define __NR_kexec_file_load		381
-+#define __NR_io_pgetevents		382
-+#define __NR_rseq			383
- 
--#define NR_syscalls 381
-+#define NR_syscalls 384
- 
- /* 
-  * There are some system calls that are not present on 64 bit, some
diff --git a/valgrind-3.18.1-rust-demangle-suffix.patch b/valgrind-3.18.1-rust-demangle-suffix.patch
deleted file mode 100644
index 58de409..0000000
--- a/valgrind-3.18.1-rust-demangle-suffix.patch
+++ /dev/null
@@ -1,370 +0,0 @@
-commit e0b62fe05559003b731b4d786f3b71e9a66fb94d
-Author: Mark Wielaard <mark@klomp.org>
-Date:   Thu Feb 17 18:35:38 2022 +0100
-
-    Update libiberty demangler
-    
-    Update the libiberty demangler using the auxprogs/update-demangler
-    script to gcc git commit d3b2ead595467166c849950ecd3710501a5094d9.
-    
-    This update includes:
-    
-    - libiberty rust-demangle, ignore .suffix
-    - libiberty: Fix infinite recursion in rust demangler
-    - Update copyright years
-    - libiberty: support digits in cpp mangled clone names
-    - d-demangle: properly skip anonymous symbols
-    - d-demangle: remove parenthesis where it is not needed
-
-diff --git a/auxprogs/update-demangler b/auxprogs/update-demangler
-index 00c090467..307a0ea36 100755
---- a/auxprogs/update-demangler
-+++ b/auxprogs/update-demangler
-@@ -17,8 +17,8 @@ set -e
- #---------------------------------------------------------------------
- 
- # You need to modify these revision numbers for your update.
--old_gcc_revision=01d92cfd79872e4cffc78bf233bb9b767336beb8 # the revision of the previous update
--new_gcc_revision=b3585c0836e729bed56b9afd4292177673a25ca0 # the revision for this update
-+old_gcc_revision=b3585c0836e729bed56b9afd4292177673a25ca0 # the revision of the previous update
-+new_gcc_revision=d3b2ead595467166c849950ecd3710501a5094d9 # the revision for this update
- 
- # Unless the organization of demangler related files has changed, no
- # changes below this line should be necessary.
-diff --git a/coregrind/m_demangle/ansidecl.h b/coregrind/m_demangle/ansidecl.h
-index 2329c8655..4275c9b9c 100644
---- a/coregrind/m_demangle/ansidecl.h
-+++ b/coregrind/m_demangle/ansidecl.h
-@@ -1,5 +1,5 @@
--/* ANSI and traditional C compatibility macros
--   Copyright (C) 1991-2021 Free Software Foundation, Inc.
-+/* ANSI and traditional C compatability macros
-+   Copyright (C) 1991-2022 Free Software Foundation, Inc.
-    This file is part of the GNU C Library.
- 
- This program is free software; you can redistribute it and/or modify
-diff --git a/coregrind/m_demangle/cp-demangle.c b/coregrind/m_demangle/cp-demangle.c
-index 1f4cd3d28..ca82c330d 100644
---- a/coregrind/m_demangle/cp-demangle.c
-+++ b/coregrind/m_demangle/cp-demangle.c
-@@ -1,5 +1,5 @@
- /* Demangler for g++ V3 ABI.
--   Copyright (C) 2003-2021 Free Software Foundation, Inc.
-+   Copyright (C) 2003-2022 Free Software Foundation, Inc.
-    Written by Ian Lance Taylor <ian@wasabisystems.com>.
- 
-    This file is part of the libiberty library, which is part of GCC.
-@@ -3901,10 +3901,11 @@ d_clone_suffix (struct d_info *di, struct demangle_component *encoding)
-   const char *pend = suffix;
-   struct demangle_component *n;
- 
--  if (*pend == '.' && (IS_LOWER (pend[1]) || pend[1] == '_'))
-+  if (*pend == '.' && (IS_LOWER (pend[1]) || IS_DIGIT (pend[1])
-+		       || pend[1] == '_'))
-     {
-       pend += 2;
--      while (IS_LOWER (*pend) || *pend == '_')
-+      while (IS_LOWER (*pend) || IS_DIGIT (*pend) || *pend == '_')
- 	++pend;
-     }
-   while (*pend == '.' && IS_DIGIT (pend[1]))
-diff --git a/coregrind/m_demangle/cp-demangle.h b/coregrind/m_demangle/cp-demangle.h
-index cb47bdf0d..c6445036d 100644
---- a/coregrind/m_demangle/cp-demangle.h
-+++ b/coregrind/m_demangle/cp-demangle.h
-@@ -1,5 +1,5 @@
- /* Internal demangler interface for g++ V3 ABI.
--   Copyright (C) 2003-2021 Free Software Foundation, Inc.
-+   Copyright (C) 2003-2022 Free Software Foundation, Inc.
-    Written by Ian Lance Taylor <ian@wasabisystems.com>.
- 
-    This file is part of the libiberty library, which is part of GCC.
-diff --git a/coregrind/m_demangle/cplus-dem.c b/coregrind/m_demangle/cplus-dem.c
-index bf4379054..5d6e04d96 100644
---- a/coregrind/m_demangle/cplus-dem.c
-+++ b/coregrind/m_demangle/cplus-dem.c
-@@ -1,5 +1,5 @@
- /* Demangler for GNU C++
--   Copyright (C) 1989-2021 Free Software Foundation, Inc.
-+   Copyright (C) 1989-2022 Free Software Foundation, Inc.
-    Written by James Clark (jjc@jclark.uucp)
-    Rewritten by Fred Fish (fnf@cygnus.com) for ARM and Lucid demangling
-    Modified by Satish Pai (pai@apollo.hp.com) for HP demangling
-diff --git a/coregrind/m_demangle/d-demangle.c b/coregrind/m_demangle/d-demangle.c
-index 4525c48d4..c2c3e08c8 100644
---- a/coregrind/m_demangle/d-demangle.c
-+++ b/coregrind/m_demangle/d-demangle.c
-@@ -1,5 +1,5 @@
- /* Demangler for the D programming language
--   Copyright (C) 2014-2021 Free Software Foundation, Inc.
-+   Copyright (C) 2014-2022 Free Software Foundation, Inc.
-    Written by Iain Buclaw (ibuclaw@gdcproject.org)
- 
- This file is part of the libiberty library.
-@@ -269,15 +269,15 @@ dlang_hexdigit (const char *mangled, char *ret)
- 
-   c = mangled[0];
-   if (!ISDIGIT (c))
--    (*ret) = (c - (ISUPPER (c) ? 'A' : 'a') + 10);
-+    *ret = c - (ISUPPER (c) ? 'A' : 'a') + 10;
-   else
--    (*ret) = (c - '0');
-+    *ret = c - '0';
- 
-   c = mangled[1];
-   if (!ISDIGIT (c))
--    (*ret) = (*ret << 4) | (c - (ISUPPER (c) ? 'A' : 'a') + 10);
-+    *ret = (*ret << 4) | (c - (ISUPPER (c) ? 'A' : 'a') + 10);
-   else
--    (*ret) = (*ret << 4) | (c - '0');
-+    *ret = (*ret << 4) | (c - '0');
- 
-   mangled += 2;
- 
-@@ -354,7 +354,7 @@ dlang_decode_backref (const char *mangled, long *ret)
- static const char *
- dlang_backref (const char *mangled, const char **ret, struct dlang_info *info)
- {
--  (*ret) = NULL;
-+  *ret = NULL;
- 
-   if (mangled == NULL || *mangled != 'Q')
-     return NULL;
-@@ -372,7 +372,7 @@ dlang_backref (const char *mangled, const char **ret, struct dlang_info *info)
-     return NULL;
- 
-   /* Set the position of the back reference.  */
--  (*ret) = qpos - refpos;
-+  *ret = qpos - refpos;
- 
-   return mangled;
- }
-@@ -1666,13 +1666,19 @@ dlang_parse_qualified (string *decl, const char *mangled,
-   size_t n = 0;
-   do
-     {
-+      /* Skip over anonymous symbols.  */
-+      if (*mangled == '0')
-+      {
-+	do
-+	  mangled++;
-+	while (*mangled == '0');
-+
-+	continue;
-+      }
-+
-       if (n++)
- 	string_append (decl, ".");
- 
--      /* Skip over anonymous symbols.  */
--      while (*mangled == '0')
--	mangled++;
--
-       mangled = dlang_identifier (decl, mangled, info);
- 
-       /* Consume the encoded arguments.  However if this is not followed by the
-diff --git a/coregrind/m_demangle/demangle.h b/coregrind/m_demangle/demangle.h
-index 2acb3bd4e..bbce948c5 100644
---- a/coregrind/m_demangle/demangle.h
-+++ b/coregrind/m_demangle/demangle.h
-@@ -1,5 +1,5 @@
- /* Defs for interface to demanglers.
--   Copyright (C) 1992-2021 Free Software Foundation, Inc.
-+   Copyright (C) 1992-2022 Free Software Foundation, Inc.
- 
-    This program is free software; you can redistribute it and/or
-    modify it under the terms of the GNU Library General Public License
-diff --git a/coregrind/m_demangle/dyn-string.c b/coregrind/m_demangle/dyn-string.c
-index 66948debf..89ce8e12c 100644
---- a/coregrind/m_demangle/dyn-string.c
-+++ b/coregrind/m_demangle/dyn-string.c
-@@ -1,5 +1,5 @@
- /* An abstract string datatype.
--   Copyright (C) 1998-2021 Free Software Foundation, Inc.
-+   Copyright (C) 1998-2022 Free Software Foundation, Inc.
-    Contributed by Mark Mitchell (mark@markmitchell.com).
- 
- This file is part of GNU CC.
-diff --git a/coregrind/m_demangle/dyn-string.h b/coregrind/m_demangle/dyn-string.h
-index 6c5e66012..be2184aa9 100644
---- a/coregrind/m_demangle/dyn-string.h
-+++ b/coregrind/m_demangle/dyn-string.h
-@@ -1,5 +1,5 @@
- /* An abstract string datatype.
--   Copyright (C) 1998-2021 Free Software Foundation, Inc.
-+   Copyright (C) 1998-2022 Free Software Foundation, Inc.
-    Contributed by Mark Mitchell (mark@markmitchell.com).
- 
- This file is part of GCC.
-diff --git a/coregrind/m_demangle/rust-demangle.c b/coregrind/m_demangle/rust-demangle.c
-index 0cafa3df9..0a9331ac2 100644
---- a/coregrind/m_demangle/rust-demangle.c
-+++ b/coregrind/m_demangle/rust-demangle.c
-@@ -1,5 +1,5 @@
- /* Demangler for the Rust programming language
--   Copyright (C) 2016-2021 Free Software Foundation, Inc.
-+   Copyright (C) 2016-2022 Free Software Foundation, Inc.
-    Written by David Tolnay (dtolnay@gmail.com).
-    Rewritten by Eduard-Mihai Burtescu (eddyb@lyken.rs) for v0 support.
- 
-@@ -101,6 +101,12 @@ struct rust_demangler
-   /* Rust mangling version, with legacy mangling being -1. */
-   int version;
- 
-+  /* Recursion depth.  */
-+  unsigned int recursion;
-+  /* Maximum number of times demangle_path may be called recursively.  */
-+#define RUST_MAX_RECURSION_COUNT  1024
-+#define RUST_NO_RECURSION_LIMIT   ((unsigned int) -1)
-+
-   uint64_t bound_lifetime_depth;
- };
- 
-@@ -698,6 +704,15 @@ demangle_path (struct rust_demangler *rdm, int in_value)
-   if (rdm->errored)
-     return;
- 
-+  if (rdm->recursion != RUST_NO_RECURSION_LIMIT)
-+    {
-+      ++ rdm->recursion;
-+      if (rdm->recursion > RUST_MAX_RECURSION_COUNT)
-+	/* FIXME: There ought to be a way to report
-+	   that the recursion limit has been reached.  */
-+	goto fail_return;
-+    }
-+
-   switch (tag = next (rdm))
-     {
-     case 'C':
-@@ -715,10 +730,7 @@ demangle_path (struct rust_demangler *rdm, int in_value)
-     case 'N':
-       ns = next (rdm);
-       if (!ISLOWER (ns) && !ISUPPER (ns))
--        {
--          rdm->errored = 1;
--          return;
--        }
-+	goto fail_return;
- 
-       demangle_path (rdm, in_value);
- 
-@@ -803,9 +815,15 @@ demangle_path (struct rust_demangler *rdm, int in_value)
-         }
-       break;
-     default:
--      rdm->errored = 1;
--      return;
-+      goto fail_return;
-     }
-+  goto pass_return;
-+
-+ fail_return:
-+  rdm->errored = 1;
-+ pass_return:
-+  if (rdm->recursion != RUST_NO_RECURSION_LIMIT)
-+    -- rdm->recursion;
- }
- 
- static void
-@@ -897,6 +915,19 @@ demangle_type (struct rust_demangler *rdm)
-       return;
-     }
- 
-+   if (rdm->recursion != RUST_NO_RECURSION_LIMIT)
-+    {
-+      ++ rdm->recursion;
-+      if (rdm->recursion > RUST_MAX_RECURSION_COUNT)
-+	/* FIXME: There ought to be a way to report
-+	   that the recursion limit has been reached.  */
-+	{
-+	  rdm->errored = 1;
-+	  -- rdm->recursion;
-+	  return;
-+	}
-+    }
-+
-   switch (tag)
-     {
-     case 'R':
-@@ -1057,6 +1088,9 @@ demangle_type (struct rust_demangler *rdm)
-       rdm->next--;
-       demangle_path (rdm, 0);
-     }
-+
-+  if (rdm->recursion != RUST_NO_RECURSION_LIMIT)
-+    -- rdm->recursion;
- }
- 
- /* A trait in a trait object may have some "existential projections"
-@@ -1347,6 +1381,7 @@ rust_demangle_callback (const char *mangled, int options,
-   rdm.skipping_printing = 0;
-   rdm.verbose = (options & DMGL_VERBOSE) != 0;
-   rdm.version = 0;
-+  rdm.recursion = (options & DMGL_NO_RECURSE_LIMIT) ? RUST_NO_RECURSION_LIMIT : 0;
-   rdm.bound_lifetime_depth = 0;
- 
-   /* Rust symbols always start with _R (v0) or _ZN (legacy). */
-@@ -1367,13 +1402,19 @@ rust_demangle_callback (const char *mangled, int options,
-   /* Rust symbols (v0) use only [_0-9a-zA-Z] characters. */
-   for (p = rdm.sym; *p; p++)
-     {
-+      /* Rust v0 symbols can have '.' suffixes, ignore those.  */
-+      if (rdm.version == 0 && *p == '.')
-+        break;
-+
-       rdm.sym_len++;
- 
-       if (*p == '_' || ISALNUM (*p))
-         continue;
- 
--      /* Legacy Rust symbols can also contain [.:$] characters. */
--      if (rdm.version == -1 && (*p == '$' || *p == '.' || *p == ':'))
-+      /* Legacy Rust symbols can also contain [.:$] characters.
-+         Or @ in the .suffix (which will be skipped, see below). */
-+      if (rdm.version == -1 && (*p == '$' || *p == '.' || *p == ':'
-+                                || *p == '@'))
-         continue;
- 
-       return 0;
-@@ -1382,7 +1423,16 @@ rust_demangle_callback (const char *mangled, int options,
-   /* Legacy Rust symbols need to be handled separately. */
-   if (rdm.version == -1)
-     {
--      /* Legacy Rust symbols always end with E. */
-+      /* Legacy Rust symbols always end with E.  But can be followed by a
-+         .suffix (which we want to ignore).  */
-+      int dot_suffix = 1;
-+      while (rdm.sym_len > 0 &&
-+             !(dot_suffix && rdm.sym[rdm.sym_len - 1] == 'E'))
-+        {
-+          dot_suffix = rdm.sym[rdm.sym_len - 1] == '.';
-+          rdm.sym_len--;
-+        }
-+
-       if (!(rdm.sym_len > 0 && rdm.sym[rdm.sym_len - 1] == 'E'))
-         return 0;
-       rdm.sym_len--;
-diff --git a/coregrind/m_demangle/safe-ctype.c b/coregrind/m_demangle/safe-ctype.c
-index 14da11918..97bc43667 100644
---- a/coregrind/m_demangle/safe-ctype.c
-+++ b/coregrind/m_demangle/safe-ctype.c
-@@ -1,6 +1,6 @@
- /* <ctype.h> replacement macros.
- 
--   Copyright (C) 2000-2021 Free Software Foundation, Inc.
-+   Copyright (C) 2000-2022 Free Software Foundation, Inc.
-    Contributed by Zack Weinberg <zackw@stanford.edu>.
- 
- This file is part of the libiberty library.
-diff --git a/coregrind/m_demangle/safe-ctype.h b/coregrind/m_demangle/safe-ctype.h
-index a7389c32e..86157ed4b 100644
---- a/coregrind/m_demangle/safe-ctype.h
-+++ b/coregrind/m_demangle/safe-ctype.h
-@@ -1,6 +1,6 @@
- /* <ctype.h> replacement macros.
- 
--   Copyright (C) 2000-2021 Free Software Foundation, Inc.
-+   Copyright (C) 2000-2022 Free Software Foundation, Inc.
-    Contributed by Zack Weinberg <zackw@stanford.edu>.
- 
- This file is part of the libiberty library.
diff --git a/valgrind-3.18.1-rust-v0-demangle.patch b/valgrind-3.18.1-rust-v0-demangle.patch
deleted file mode 100644
index e48a106..0000000
--- a/valgrind-3.18.1-rust-v0-demangle.patch
+++ /dev/null
@@ -1,137 +0,0 @@
-commit 4831385c6706b377851284adc4c4545fff4c6564
-Author: Nicholas Nethercote <nnethercote@apple.com>
-Date:   Tue Nov 9 12:30:07 2021 +1100
-
-    Fix Rust v0 demangling.
-    
-    It's currently broken due to a silly test that prevents the v0
-    demangling code from even running.
-    
-    The commit also adds a test, to avoid such problems in the future.
-
-diff --git a/coregrind/m_demangle/demangle.c b/coregrind/m_demangle/demangle.c
-index 16161da2a..3fd7cb75f 100644
---- a/coregrind/m_demangle/demangle.c
-+++ b/coregrind/m_demangle/demangle.c
-@@ -118,8 +118,13 @@ void VG_(demangle) ( Bool do_cxx_demangling, Bool do_z_demangling,
-    }
- 
-    /* Possibly undo (1) */
-+   // - C++ mangled symbols start with "_Z" (possibly with exceptions?)
-+   // - Rust "legacy" mangled symbols start with "_Z".
-+   // - Rust "v0" mangled symbols start with "_R".
-+   // XXX: the Java/Rust/Ada demangling here probably doesn't work. See
-+   // https://bugs.kde.org/show_bug.cgi?id=445235 for details.
-    if (do_cxx_demangling && VG_(clo_demangle)
--       && orig != NULL && orig[0] == '_' && orig[1] == 'Z') {
-+       && orig != NULL && orig[0] == '_' && (orig[1] == 'Z' || orig[1] == 'R')) {
-       /* !!! vvv STATIC vvv !!! */
-       static HChar* demangled = NULL;
-       /* !!! ^^^ STATIC ^^^ !!! */
-diff --git a/memcheck/tests/demangle-rust.c b/memcheck/tests/demangle-rust.c
-new file mode 100644
-index 000000000..f2a458b2a
---- /dev/null
-+++ b/memcheck/tests/demangle-rust.c
-@@ -0,0 +1,31 @@
-+// Valgrind supports demangling Rust symbols (both the "v0" and "legacy"
-+// mangling schemes), but we don't want to add a dependency on the Rust
-+// compiler for a single test. So this is a C program with function names that
-+// are mangled Rust symbols. In the output, they become demangled Rust names.
-+// It's a hack, but a useful one.
-+
-+#include <stdlib.h>
-+
-+// A v0 symbol that demangles to: <rustc_middle::ty::PredicateKind as rustc_middle::ty::fold::TypeFoldable>::fold_with::<rustc_infer::infer::resolve::OpportunisticVarResolver>
-+int _RINvYNtNtCs4uGc65yWeeX_12rustc_middle2ty13PredicateKindNtNtB5_4fold12TypeFoldable9fold_withNtNtNtCsgI90OQiJWEs_11rustc_infer5infer7resolve24OpportunisticVarResolverECsdozMG8X9FIu_21rustc_trait_selection(int *p)
-+{
-+   return *p ? 1 : 2;
-+}
-+
-+// A v0 symbol that demangles to: rustc_expand::mbe::macro_parser::parse_tt
-+int _RNvNtNtCsaqSe1lZGvEL_12rustc_expand3mbe12macro_parser8parse_tt(int* p)
-+{
-+   return _RINvYNtNtCs4uGc65yWeeX_12rustc_middle2ty13PredicateKindNtNtB5_4fold12TypeFoldable9fold_withNtNtNtCsgI90OQiJWEs_11rustc_infer5infer7resolve24OpportunisticVarResolverECsdozMG8X9FIu_21rustc_trait_selection(p);
-+}
-+
-+// A legacy symbol that demangles to: core::str::lossy::Utf8Lossy::from_bytes
-+int _ZN4core3str5lossy9Utf8Lossy10from_bytes17heb1677c8cb728b0bE(int* p)
-+{
-+   return _RNvNtNtCsaqSe1lZGvEL_12rustc_expand3mbe12macro_parser8parse_tt(p);
-+}
-+
-+int main(void)
-+{
-+   return _ZN4core3str5lossy9Utf8Lossy10from_bytes17heb1677c8cb728b0bE(malloc(sizeof(int)));
-+}
-+
-diff --git a/memcheck/tests/demangle-rust.stderr.exp b/memcheck/tests/demangle-rust.stderr.exp
-new file mode 100644
-index 000000000..f04bb625b
---- /dev/null
-+++ b/memcheck/tests/demangle-rust.stderr.exp
-@@ -0,0 +1,6 @@
-+Conditional jump or move depends on uninitialised value(s)
-+   at 0x........: <rustc_middle::ty::PredicateKind as rustc_middle::ty::fold::TypeFoldable>::fold_with::<rustc_infer::infer::resolve::OpportunisticVarResolver> (demangle-rust.c:12)
-+   by 0x........: rustc_expand::mbe::macro_parser::parse_tt (demangle-rust.c:18)
-+   by 0x........: core::str::lossy::Utf8Lossy::from_bytes (demangle-rust.c:24)
-+   by 0x........: main (demangle-rust.c:29)
-+
-diff --git a/memcheck/tests/demangle-rust.vgtest b/memcheck/tests/demangle-rust.vgtest
-new file mode 100644
-index 000000000..d726c6b2e
---- /dev/null
-+++ b/memcheck/tests/demangle-rust.vgtest
-@@ -0,0 +1,2 @@
-+prog: demangle-rust
-+vgopts: -q
-
-commit c1bfa115f985633722f25922d2996c231e8c9d8d
-Author: Mark Wielaard <mark@klomp.org>
-Date:   Wed Nov 10 09:02:36 2021 +0100
-
-    Add demangle-rust.vgtest demangle-rust.stderr.exp to EXTRA_DIST
-
-diff --git a/memcheck/tests/Makefile.am b/memcheck/tests/Makefile.am
-index 4d0476e2d..7837d87c7 100644
---- a/memcheck/tests/Makefile.am
-+++ b/memcheck/tests/Makefile.am
-@@ -281,6 +281,7 @@ EXTRA_DIST = \
- 	realloc3.stderr.exp realloc3.vgtest \
- 	recursive-merge.stderr.exp recursive-merge.vgtest \
- 	resvn_stack.stderr.exp resvn_stack.vgtest \
-+	demangle-rust.vgtest demangle-rust.stderr.exp \
- 	sbfragment.stdout.exp sbfragment.stderr.exp sbfragment.vgtest \
- 	sem.stderr.exp sem.vgtest \
- 	sendmsg.stderr.exp sendmsg.stderr.exp-solaris sendmsg.vgtest \
-
-commit d151907e5d8ff393f4fef126c8ae445ea8813661
-Author: Mark Wielaard <mark@klomp.org>
-Date:   Thu Nov 11 18:02:09 2021 +0100
-
-    Add demangle-rust to check_PROGRAMS
-    
-    The demangle-rust.vgtest would fail because the demangle-rust binary
-    wasn't build by default. Add it to check_PROGRAMS and define
-    demangle_rust_SOURCES to make sure it is always build.
-
-diff --git a/memcheck/tests/Makefile.am b/memcheck/tests/Makefile.am
-index 7837d87c7..449710020 100644
---- a/memcheck/tests/Makefile.am
-+++ b/memcheck/tests/Makefile.am
-@@ -392,6 +392,7 @@ check_PROGRAMS = \
- 	custom_alloc \
- 	custom-overlap \
- 	demangle \
-+	demangle-rust \
- 	big_debuginfo_symbol \
- 	deep-backtrace \
- 	describe-block \
-@@ -505,6 +506,7 @@ endif
- leak_cpp_interior_SOURCES	= leak_cpp_interior.cpp
- 
- demangle_SOURCES = demangle.cpp
-+demangle_rust_SOURCES = demangle-rust.c
- 
- # Suppress various gcc warnings which are correct, but for things
- # we are actually testing for at runtime.
diff --git a/valgrind-3.18.1-s390x-EXRL.patch b/valgrind-3.18.1-s390x-EXRL.patch
deleted file mode 100644
index 6927cc3..0000000
--- a/valgrind-3.18.1-s390x-EXRL.patch
+++ /dev/null
@@ -1,549 +0,0 @@
-commit b77dbefe72e4a5c7bcf1576a02c909010bd56991
-Author: Andreas Arnez <arnez@linux.ibm.com>
-Date:   Fri Oct 22 19:55:12 2021 +0200
-
-    Bug 444242 - s390x: Sign-extend "relative long" offset in EXRL
-    
-    In s390_irgen_EXRL, the offset is zero-extended instead of sign-extended,
-    typically causing Valgrind to crash when a negative offset occurs.
-    
-    Fix this with a new helper function that calculates a "relative long"
-    address from a 32-bit offset.  Replace other calculations of "relative
-    long" addresses by invocations of this function as well.  And for
-    consistency, do the same with "relative" (short) addresses.
-
-diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c
-index 72222ab04..fffc563d4 100644
---- a/VEX/priv/guest_s390_toIR.c
-+++ b/VEX/priv/guest_s390_toIR.c
-@@ -399,6 +399,22 @@ mkF64i(ULong value)
-    return IRExpr_Const(IRConst_F64i(value));
- }
- 
-+/* Return the 64-bit address with the given 32-bit "relative long" offset from
-+   the current guest instruction being translated. */
-+static __inline__ Addr64
-+addr_rel_long(UInt offset)
-+{
-+   return guest_IA_curr_instr + ((Addr64)(Long)(Int)offset << 1);
-+}
-+
-+/* Return the 64-bit address with the given 16-bit "relative" offset from the
-+   current guest instruction being translated. */
-+static __inline__ Addr64
-+addr_relative(UShort offset)
-+{
-+   return guest_IA_curr_instr + ((Addr64)(Long)(Short)offset << 1);
-+}
-+
- /* Little helper function for my sanity. ITE = if-then-else */
- static IRExpr *
- mkite(IRExpr *condition, IRExpr *iftrue, IRExpr *iffalse)
-@@ -5516,7 +5532,7 @@ static const HChar *
- s390_irgen_BRAS(UChar r1, UShort i2)
- {
-    put_gpr_dw0(r1, mkU64(guest_IA_curr_instr + 4ULL));
--   call_function_and_chase(guest_IA_curr_instr + ((ULong)(Long)(Short)i2 << 1));
-+   call_function_and_chase(addr_relative(i2));
- 
-    return "bras";
- }
-@@ -5525,7 +5541,7 @@ static const HChar *
- s390_irgen_BRASL(UChar r1, UInt i2)
- {
-    put_gpr_dw0(r1, mkU64(guest_IA_curr_instr + 6ULL));
--   call_function_and_chase(guest_IA_curr_instr + ((ULong)(Long)(Int)i2 << 1));
-+   call_function_and_chase(addr_rel_long(i2));
- 
-    return "brasl";
- }
-@@ -5538,12 +5554,11 @@ s390_irgen_BRC(UChar r1, UShort i2)
-    if (r1 == 0) {
-    } else {
-       if (r1 == 15) {
--         always_goto_and_chase(
--               guest_IA_curr_instr + ((ULong)(Long)(Short)i2 << 1));
-+         always_goto_and_chase(addr_relative(i2));
-       } else {
-          assign(cond, s390_call_calculate_cond(r1));
-          if_condition_goto(binop(Iop_CmpNE32, mkexpr(cond), mkU32(0)),
--                           guest_IA_curr_instr + ((ULong)(Long)(Short)i2 << 1));
-+                           addr_relative(i2));
- 
-       }
-    }
-@@ -5561,11 +5576,11 @@ s390_irgen_BRCL(UChar r1, UInt i2)
-    if (r1 == 0) {
-    } else {
-       if (r1 == 15) {
--         always_goto_and_chase(guest_IA_curr_instr + ((ULong)(Long)(Int)i2 << 1));
-+         always_goto_and_chase(addr_rel_long(i2));
-       } else {
-          assign(cond, s390_call_calculate_cond(r1));
-          if_condition_goto(binop(Iop_CmpNE32, mkexpr(cond), mkU32(0)),
--                           guest_IA_curr_instr + ((ULong)(Long)(Int)i2 << 1));
-+                           addr_rel_long(i2));
-       }
-    }
-    if (UNLIKELY(vex_traceflags & VEX_TRACE_FE))
-@@ -5579,7 +5594,7 @@ s390_irgen_BRCT(UChar r1, UShort i2)
- {
-    put_gpr_w1(r1, binop(Iop_Sub32, get_gpr_w1(r1), mkU32(1)));
-    if_condition_goto(binop(Iop_CmpNE32, get_gpr_w1(r1), mkU32(0)),
--                     guest_IA_curr_instr + ((ULong)(Long)(Short)i2 << 1));
-+                     addr_relative(i2));
- 
-    return "brct";
- }
-@@ -5589,7 +5604,7 @@ s390_irgen_BRCTH(UChar r1, UInt i2)
- {
-    put_gpr_w0(r1, binop(Iop_Sub32, get_gpr_w0(r1), mkU32(1)));
-    if_condition_goto(binop(Iop_CmpNE32, get_gpr_w0(r1), mkU32(0)),
--                     guest_IA_curr_instr + ((ULong)(Long)(Short)i2 << 1));
-+                     addr_relative(i2));
- 
-    return "brcth";
- }
-@@ -5599,7 +5614,7 @@ s390_irgen_BRCTG(UChar r1, UShort i2)
- {
-    put_gpr_dw0(r1, binop(Iop_Sub64, get_gpr_dw0(r1), mkU64(1)));
-    if_condition_goto(binop(Iop_CmpNE64, get_gpr_dw0(r1), mkU64(0)),
--                     guest_IA_curr_instr + ((ULong)(Long)(Short)i2 << 1));
-+                     addr_relative(i2));
- 
-    return "brctg";
- }
-@@ -5612,7 +5627,7 @@ s390_irgen_BRXH(UChar r1, UChar r3, UShort i2)
-    assign(value, get_gpr_w1(r3 | 1));
-    put_gpr_w1(r1, binop(Iop_Add32, get_gpr_w1(r1), get_gpr_w1(r3)));
-    if_condition_goto(binop(Iop_CmpLT32S, mkexpr(value), get_gpr_w1(r1)),
--                     guest_IA_curr_instr + ((ULong)(Long)(Short)i2 << 1));
-+                     addr_relative(i2));
- 
-    return "brxh";
- }
-@@ -5625,7 +5640,7 @@ s390_irgen_BRXHG(UChar r1, UChar r3, UShort i2)
-    assign(value, get_gpr_dw0(r3 | 1));
-    put_gpr_dw0(r1, binop(Iop_Add64, get_gpr_dw0(r1), get_gpr_dw0(r3)));
-    if_condition_goto(binop(Iop_CmpLT64S, mkexpr(value), get_gpr_dw0(r1)),
--                     guest_IA_curr_instr + ((ULong)(Long)(Short)i2 << 1));
-+                     addr_relative(i2));
- 
-    return "brxhg";
- }
-@@ -5638,7 +5653,7 @@ s390_irgen_BRXLE(UChar r1, UChar r3, UShort i2)
-    assign(value, get_gpr_w1(r3 | 1));
-    put_gpr_w1(r1, binop(Iop_Add32, get_gpr_w1(r1), get_gpr_w1(r3)));
-    if_condition_goto(binop(Iop_CmpLE32S, get_gpr_w1(r1), mkexpr(value)),
--                     guest_IA_curr_instr + ((ULong)(Long)(Short)i2 << 1));
-+                     addr_relative(i2));
- 
-    return "brxle";
- }
-@@ -5651,7 +5666,7 @@ s390_irgen_BRXLG(UChar r1, UChar r3, UShort i2)
-    assign(value, get_gpr_dw0(r3 | 1));
-    put_gpr_dw0(r1, binop(Iop_Add64, get_gpr_dw0(r1), get_gpr_dw0(r3)));
-    if_condition_goto(binop(Iop_CmpLE64S, get_gpr_dw0(r1), mkexpr(value)),
--                     guest_IA_curr_instr + ((ULong)(Long)(Short)i2 << 1));
-+                     addr_relative(i2));
- 
-    return "brxlg";
- }
-@@ -5782,8 +5797,7 @@ s390_irgen_CRL(UChar r1, UInt i2)
-    IRTemp op2 = newTemp(Ity_I32);
- 
-    assign(op1, get_gpr_w1(r1));
--   assign(op2, load(Ity_I32, mkU64(guest_IA_curr_instr + ((ULong)(Long)(Int)
--          i2 << 1))));
-+   assign(op2, load(Ity_I32, mkU64(addr_rel_long(i2))));
-    s390_cc_thunk_putSS(S390_CC_OP_SIGNED_COMPARE, op1, op2);
- 
-    return "crl";
-@@ -5796,8 +5810,7 @@ s390_irgen_CGRL(UChar r1, UInt i2)
-    IRTemp op2 = newTemp(Ity_I64);
- 
-    assign(op1, get_gpr_dw0(r1));
--   assign(op2, load(Ity_I64, mkU64(guest_IA_curr_instr + ((ULong)(Long)(Int)
--          i2 << 1))));
-+   assign(op2, load(Ity_I64, mkU64(addr_rel_long(i2))));
-    s390_cc_thunk_putSS(S390_CC_OP_SIGNED_COMPARE, op1, op2);
- 
-    return "cgrl";
-@@ -5810,8 +5823,7 @@ s390_irgen_CGFRL(UChar r1, UInt i2)
-    IRTemp op2 = newTemp(Ity_I64);
- 
-    assign(op1, get_gpr_dw0(r1));
--   assign(op2, unop(Iop_32Sto64, load(Ity_I32, mkU64(guest_IA_curr_instr +
--          ((ULong)(Long)(Int)i2 << 1)))));
-+   assign(op2, unop(Iop_32Sto64, load(Ity_I32, mkU64(addr_rel_long(i2)))));
-    s390_cc_thunk_putSS(S390_CC_OP_SIGNED_COMPARE, op1, op2);
- 
-    return "cgfrl";
-@@ -5875,15 +5887,14 @@ s390_irgen_CRJ(UChar r1, UChar r2, UShort i4, UChar m3)
-    if (m3 == 0) {
-    } else {
-       if (m3 == 14) {
--         always_goto_and_chase(
--                guest_IA_curr_instr + ((ULong)(Long)(Short)i4 << 1));
-+         always_goto_and_chase(addr_relative(i4));
-       } else {
-          assign(op1, get_gpr_w1(r1));
-          assign(op2, get_gpr_w1(r2));
-          assign(cond, s390_call_calculate_icc(m3, S390_CC_OP_SIGNED_COMPARE,
-                                               op1, op2));
-          if_condition_goto(binop(Iop_CmpNE32, mkexpr(cond), mkU32(0)),
--                           guest_IA_curr_instr + ((ULong)(Long)(Short)i4 << 1));
-+                           addr_relative(i4));
- 
-       }
-    }
-@@ -5901,15 +5912,14 @@ s390_irgen_CGRJ(UChar r1, UChar r2, UShort i4, UChar m3)
-    if (m3 == 0) {
-    } else {
-       if (m3 == 14) {
--         always_goto_and_chase(
--                guest_IA_curr_instr + ((ULong)(Long)(Short)i4 << 1));
-+         always_goto_and_chase(addr_relative(i4));
-       } else {
-          assign(op1, get_gpr_dw0(r1));
-          assign(op2, get_gpr_dw0(r2));
-          assign(cond, s390_call_calculate_icc(m3, S390_CC_OP_SIGNED_COMPARE,
-                                               op1, op2));
-          if_condition_goto(binop(Iop_CmpNE32, mkexpr(cond), mkU32(0)),
--                           guest_IA_curr_instr + ((ULong)(Long)(Short)i4 << 1));
-+                           addr_relative(i4));
- 
-       }
-    }
-@@ -5975,14 +5985,14 @@ s390_irgen_CIJ(UChar r1, UChar m3, UShort i4, UChar i2)
-    if (m3 == 0) {
-    } else {
-       if (m3 == 14) {
--         always_goto_and_chase(guest_IA_curr_instr + ((ULong)(Long)(Short)i4 << 1));
-+         always_goto_and_chase(addr_relative(i4));
-       } else {
-          assign(op1, get_gpr_w1(r1));
-          op2 = (Int)(Char)i2;
-          assign(cond, s390_call_calculate_icc(m3, S390_CC_OP_SIGNED_COMPARE, op1,
-                                               mktemp(Ity_I32, mkU32((UInt)op2))));
-          if_condition_goto(binop(Iop_CmpNE32, mkexpr(cond), mkU32(0)),
--                           guest_IA_curr_instr + ((ULong)(Long)(Short)i4 << 1));
-+                           addr_relative(i4));
- 
-       }
-    }
-@@ -6000,14 +6010,14 @@ s390_irgen_CGIJ(UChar r1, UChar m3, UShort i4, UChar i2)
-    if (m3 == 0) {
-    } else {
-       if (m3 == 14) {
--         always_goto_and_chase(guest_IA_curr_instr + ((ULong)(Long)(Short)i4 << 1));
-+         always_goto_and_chase(addr_relative(i4));
-       } else {
-          assign(op1, get_gpr_dw0(r1));
-          op2 = (Long)(Char)i2;
-          assign(cond, s390_call_calculate_icc(m3, S390_CC_OP_SIGNED_COMPARE, op1,
-                                               mktemp(Ity_I64, mkU64((ULong)op2))));
-          if_condition_goto(binop(Iop_CmpNE32, mkexpr(cond), mkU32(0)),
--                           guest_IA_curr_instr + ((ULong)(Long)(Short)i4 << 1));
-+                           addr_relative(i4));
- 
-       }
-    }
-@@ -6131,8 +6141,7 @@ s390_irgen_CHRL(UChar r1, UInt i2)
-    IRTemp op2 = newTemp(Ity_I32);
- 
-    assign(op1, get_gpr_w1(r1));
--   assign(op2, unop(Iop_16Sto32, load(Ity_I16, mkU64(guest_IA_curr_instr +
--          ((ULong)(Long)(Int)i2 << 1)))));
-+   assign(op2, unop(Iop_16Sto32, load(Ity_I16, mkU64(addr_rel_long(i2)))));
-    s390_cc_thunk_putSS(S390_CC_OP_SIGNED_COMPARE, op1, op2);
- 
-    return "chrl";
-@@ -6145,8 +6154,7 @@ s390_irgen_CGHRL(UChar r1, UInt i2)
-    IRTemp op2 = newTemp(Ity_I64);
- 
-    assign(op1, get_gpr_dw0(r1));
--   assign(op2, unop(Iop_16Sto64, load(Ity_I16, mkU64(guest_IA_curr_instr +
--          ((ULong)(Long)(Int)i2 << 1)))));
-+   assign(op2, unop(Iop_16Sto64, load(Ity_I16, mkU64(addr_rel_long(i2)))));
-    s390_cc_thunk_putSS(S390_CC_OP_SIGNED_COMPARE, op1, op2);
- 
-    return "cghrl";
-@@ -6401,8 +6409,7 @@ s390_irgen_CLRL(UChar r1, UInt i2)
-    IRTemp op2 = newTemp(Ity_I32);
- 
-    assign(op1, get_gpr_w1(r1));
--   assign(op2, load(Ity_I32, mkU64(guest_IA_curr_instr + ((ULong)(Long)(Int)
--          i2 << 1))));
-+   assign(op2, load(Ity_I32, mkU64(addr_rel_long(i2))));
-    s390_cc_thunk_putZZ(S390_CC_OP_UNSIGNED_COMPARE, op1, op2);
- 
-    return "clrl";
-@@ -6415,8 +6422,7 @@ s390_irgen_CLGRL(UChar r1, UInt i2)
-    IRTemp op2 = newTemp(Ity_I64);
- 
-    assign(op1, get_gpr_dw0(r1));
--   assign(op2, load(Ity_I64, mkU64(guest_IA_curr_instr + ((ULong)(Long)(Int)
--          i2 << 1))));
-+   assign(op2, load(Ity_I64, mkU64(addr_rel_long(i2))));
-    s390_cc_thunk_putZZ(S390_CC_OP_UNSIGNED_COMPARE, op1, op2);
- 
-    return "clgrl";
-@@ -6429,8 +6435,7 @@ s390_irgen_CLGFRL(UChar r1, UInt i2)
-    IRTemp op2 = newTemp(Ity_I64);
- 
-    assign(op1, get_gpr_dw0(r1));
--   assign(op2, unop(Iop_32Uto64, load(Ity_I32, mkU64(guest_IA_curr_instr +
--          ((ULong)(Long)(Int)i2 << 1)))));
-+   assign(op2, unop(Iop_32Uto64, load(Ity_I32, mkU64(addr_rel_long(i2)))));
-    s390_cc_thunk_putZZ(S390_CC_OP_UNSIGNED_COMPARE, op1, op2);
- 
-    return "clgfrl";
-@@ -6443,8 +6448,7 @@ s390_irgen_CLHRL(UChar r1, UInt i2)
-    IRTemp op2 = newTemp(Ity_I32);
- 
-    assign(op1, get_gpr_w1(r1));
--   assign(op2, unop(Iop_16Uto32, load(Ity_I16, mkU64(guest_IA_curr_instr +
--          ((ULong)(Long)(Int)i2 << 1)))));
-+   assign(op2, unop(Iop_16Uto32, load(Ity_I16, mkU64(addr_rel_long(i2)))));
-    s390_cc_thunk_putZZ(S390_CC_OP_UNSIGNED_COMPARE, op1, op2);
- 
-    return "clhrl";
-@@ -6457,8 +6461,7 @@ s390_irgen_CLGHRL(UChar r1, UInt i2)
-    IRTemp op2 = newTemp(Ity_I64);
- 
-    assign(op1, get_gpr_dw0(r1));
--   assign(op2, unop(Iop_16Uto64, load(Ity_I16, mkU64(guest_IA_curr_instr +
--          ((ULong)(Long)(Int)i2 << 1)))));
-+   assign(op2, unop(Iop_16Uto64, load(Ity_I16, mkU64(addr_rel_long(i2)))));
-    s390_cc_thunk_putZZ(S390_CC_OP_UNSIGNED_COMPARE, op1, op2);
- 
-    return "clghrl";
-@@ -6730,14 +6733,14 @@ s390_irgen_CLRJ(UChar r1, UChar r2, UShort i4, UChar m3)
-    if (m3 == 0) {
-    } else {
-       if (m3 == 14) {
--         always_goto_and_chase(guest_IA_curr_instr + ((ULong)(Long)(Short)i4 << 1));
-+         always_goto_and_chase(addr_relative(i4));
-       } else {
-          assign(op1, get_gpr_w1(r1));
-          assign(op2, get_gpr_w1(r2));
-          assign(cond, s390_call_calculate_icc(m3, S390_CC_OP_UNSIGNED_COMPARE,
-                                               op1, op2));
-          if_condition_goto(binop(Iop_CmpNE32, mkexpr(cond), mkU32(0)),
--                           guest_IA_curr_instr + ((ULong)(Long)(Short)i4 << 1));
-+                           addr_relative(i4));
- 
-       }
-    }
-@@ -6755,14 +6758,14 @@ s390_irgen_CLGRJ(UChar r1, UChar r2, UShort i4, UChar m3)
-    if (m3 == 0) {
-    } else {
-       if (m3 == 14) {
--         always_goto_and_chase(guest_IA_curr_instr + ((ULong)(Long)(Short)i4 << 1));
-+         always_goto_and_chase(addr_relative(i4));
-       } else {
-          assign(op1, get_gpr_dw0(r1));
-          assign(op2, get_gpr_dw0(r2));
-          assign(cond, s390_call_calculate_icc(m3, S390_CC_OP_UNSIGNED_COMPARE,
-                                               op1, op2));
-          if_condition_goto(binop(Iop_CmpNE32, mkexpr(cond), mkU32(0)),
--                           guest_IA_curr_instr + ((ULong)(Long)(Short)i4 << 1));
-+                           addr_relative(i4));
- 
-       }
-    }
-@@ -6828,14 +6831,14 @@ s390_irgen_CLIJ(UChar r1, UChar m3, UShort i4, UChar i2)
-    if (m3 == 0) {
-    } else {
-       if (m3 == 14) {
--         always_goto_and_chase(guest_IA_curr_instr + ((ULong)(Long)(Short)i4 << 1));
-+         always_goto_and_chase(addr_relative(i4));
-       } else {
-          assign(op1, get_gpr_w1(r1));
-          op2 = (UInt)i2;
-          assign(cond, s390_call_calculate_icc(m3, S390_CC_OP_UNSIGNED_COMPARE, op1,
-                                               mktemp(Ity_I32, mkU32(op2))));
-          if_condition_goto(binop(Iop_CmpNE32, mkexpr(cond), mkU32(0)),
--                           guest_IA_curr_instr + ((ULong)(Long)(Short)i4 << 1));
-+                           addr_relative(i4));
- 
-       }
-    }
-@@ -6853,14 +6856,14 @@ s390_irgen_CLGIJ(UChar r1, UChar m3, UShort i4, UChar i2)
-    if (m3 == 0) {
-    } else {
-       if (m3 == 14) {
--         always_goto_and_chase(guest_IA_curr_instr + ((ULong)(Long)(Short)i4 << 1));
-+         always_goto_and_chase(addr_relative(i4));
-       } else {
-          assign(op1, get_gpr_dw0(r1));
-          op2 = (ULong)i2;
-          assign(cond, s390_call_calculate_icc(m3, S390_CC_OP_UNSIGNED_COMPARE, op1,
-                                               mktemp(Ity_I64, mkU64(op2))));
-          if_condition_goto(binop(Iop_CmpNE32, mkexpr(cond), mkU32(0)),
--                           guest_IA_curr_instr + ((ULong)(Long)(Short)i4 << 1));
-+                           addr_relative(i4));
- 
-       }
-    }
-@@ -7539,8 +7542,7 @@ s390_irgen_LGFI(UChar r1, UInt i2)
- static const HChar *
- s390_irgen_LRL(UChar r1, UInt i2)
- {
--   put_gpr_w1(r1, load(Ity_I32, mkU64(guest_IA_curr_instr + ((ULong)(Long)(Int)
--              i2 << 1))));
-+   put_gpr_w1(r1, load(Ity_I32, mkU64(addr_rel_long(i2))));
- 
-    return "lrl";
- }
-@@ -7548,8 +7550,7 @@ s390_irgen_LRL(UChar r1, UInt i2)
- static const HChar *
- s390_irgen_LGRL(UChar r1, UInt i2)
- {
--   put_gpr_dw0(r1, load(Ity_I64, mkU64(guest_IA_curr_instr + ((ULong)(Long)(Int)
--               i2 << 1))));
-+   put_gpr_dw0(r1, load(Ity_I64, mkU64(addr_rel_long(i2))));
- 
-    return "lgrl";
- }
-@@ -7557,8 +7558,7 @@ s390_irgen_LGRL(UChar r1, UInt i2)
- static const HChar *
- s390_irgen_LGFRL(UChar r1, UInt i2)
- {
--   put_gpr_dw0(r1, unop(Iop_32Sto64, load(Ity_I32, mkU64(guest_IA_curr_instr +
--               ((ULong)(Long)(Int)i2 << 1)))));
-+   put_gpr_dw0(r1, unop(Iop_32Sto64, load(Ity_I32, mkU64(addr_rel_long(i2)))));
- 
-    return "lgfrl";
- }
-@@ -7598,7 +7598,7 @@ s390_irgen_LAEY(UChar r1, IRTemp op2addr)
- static const HChar *
- s390_irgen_LARL(UChar r1, UInt i2)
- {
--   put_gpr_dw0(r1, mkU64(guest_IA_curr_instr + ((ULong)(Long)(Int)i2 << 1)));
-+   put_gpr_dw0(r1, mkU64(addr_rel_long(i2)));
- 
-    return "larl";
- }
-@@ -8038,8 +8038,7 @@ s390_irgen_LGHI(UChar r1, UShort i2)
- static const HChar *
- s390_irgen_LHRL(UChar r1, UInt i2)
- {
--   put_gpr_w1(r1, unop(Iop_16Sto32, load(Ity_I16, mkU64(guest_IA_curr_instr +
--              ((ULong)(Long)(Int)i2 << 1)))));
-+   put_gpr_w1(r1, unop(Iop_16Sto32, load(Ity_I16, mkU64(addr_rel_long(i2)))));
- 
-    return "lhrl";
- }
-@@ -8047,8 +8046,7 @@ s390_irgen_LHRL(UChar r1, UInt i2)
- static const HChar *
- s390_irgen_LGHRL(UChar r1, UInt i2)
- {
--   put_gpr_dw0(r1, unop(Iop_16Sto64, load(Ity_I16, mkU64(guest_IA_curr_instr +
--               ((ULong)(Long)(Int)i2 << 1)))));
-+   put_gpr_dw0(r1, unop(Iop_16Sto64, load(Ity_I16, mkU64(addr_rel_long(i2)))));
- 
-    return "lghrl";
- }
-@@ -8088,8 +8086,7 @@ s390_irgen_LLGF(UChar r1, IRTemp op2addr)
- static const HChar *
- s390_irgen_LLGFRL(UChar r1, UInt i2)
- {
--   put_gpr_dw0(r1, unop(Iop_32Uto64, load(Ity_I32, mkU64(guest_IA_curr_instr +
--               ((ULong)(Long)(Int)i2 << 1)))));
-+   put_gpr_dw0(r1, unop(Iop_32Uto64, load(Ity_I32, mkU64(addr_rel_long(i2)))));
- 
-    return "llgfrl";
- }
-@@ -8169,8 +8166,7 @@ s390_irgen_LLGH(UChar r1, IRTemp op2addr)
- static const HChar *
- s390_irgen_LLHRL(UChar r1, UInt i2)
- {
--   put_gpr_w1(r1, unop(Iop_16Uto32, load(Ity_I16, mkU64(guest_IA_curr_instr +
--              ((ULong)(Long)(Int)i2 << 1)))));
-+   put_gpr_w1(r1, unop(Iop_16Uto32, load(Ity_I16, mkU64(addr_rel_long(i2)))));
- 
-    return "llhrl";
- }
-@@ -8178,8 +8174,7 @@ s390_irgen_LLHRL(UChar r1, UInt i2)
- static const HChar *
- s390_irgen_LLGHRL(UChar r1, UInt i2)
- {
--   put_gpr_dw0(r1, unop(Iop_16Uto64, load(Ity_I16, mkU64(guest_IA_curr_instr +
--               ((ULong)(Long)(Int)i2 << 1)))));
-+   put_gpr_dw0(r1, unop(Iop_16Uto64, load(Ity_I16, mkU64(addr_rel_long(i2)))));
- 
-    return "llghrl";
- }
-@@ -10064,8 +10059,7 @@ s390_irgen_STG(UChar r1, IRTemp op2addr)
- static const HChar *
- s390_irgen_STRL(UChar r1, UInt i2)
- {
--   store(mkU64(guest_IA_curr_instr + ((ULong)(Long)(Int)i2 << 1)),
--         get_gpr_w1(r1));
-+   store(mkU64(addr_rel_long(i2)), get_gpr_w1(r1));
- 
-    return "strl";
- }
-@@ -10073,8 +10067,7 @@ s390_irgen_STRL(UChar r1, UInt i2)
- static const HChar *
- s390_irgen_STGRL(UChar r1, UInt i2)
- {
--   store(mkU64(guest_IA_curr_instr + ((ULong)(Long)(Int)i2 << 1)),
--         get_gpr_dw0(r1));
-+   store(mkU64(addr_rel_long(i2)), get_gpr_dw0(r1));
- 
-    return "stgrl";
- }
-@@ -10203,8 +10196,7 @@ s390_irgen_STHY(UChar r1, IRTemp op2addr)
- static const HChar *
- s390_irgen_STHRL(UChar r1, UInt i2)
- {
--   store(mkU64(guest_IA_curr_instr + ((ULong)(Long)(Int)i2 << 1)),
--         get_gpr_hw3(r1));
-+   store(mkU64(addr_rel_long(i2)), get_gpr_hw3(r1));
- 
-    return "sthrl";
- }
-@@ -13282,7 +13274,7 @@ static const HChar *
- s390_irgen_EXRL(UChar r1, UInt offset)
- {
-    IRTemp addr = newTemp(Ity_I64);
--   Addr64 bytes_addr = guest_IA_curr_instr + offset * 2UL;
-+   Addr64 bytes_addr = addr_rel_long(offset);
-    UChar *bytes = (UChar *)(HWord)bytes_addr;
-    /* we might save one round trip because we know the target */
-    if (!last_execute_target)
-diff --git a/none/tests/s390x/exrl.c b/none/tests/s390x/exrl.c
-index 2c99602d8..e669e484f 100644
---- a/none/tests/s390x/exrl.c
-+++ b/none/tests/s390x/exrl.c
-@@ -54,6 +54,17 @@ int main(void)
-    printf("|\n");
-    printf("\n");
- 
-+   printf("------- EXRL with negative offset\n");
-+   asm volatile( "j    2f\n\t"
-+                 "1:\n\t"
-+                 "mvc  2(1,%0),0(%0)\n\t"
-+                 "2:\n\t"
-+                 "lghi 1,8\n\t"
-+                 ".insn ril,0xc60000000000,1,1b\n\t" // exrl 1, 1b
-+                 : : "a" (target)
-+                 : "1", "2", "3", "4");
-+   printf("        target = |%s|\n", target);
-+
-    return 0;
- }
- 
-diff --git a/none/tests/s390x/exrl.stdout.exp b/none/tests/s390x/exrl.stdout.exp
-index 520919e92..30dcde829 100644
---- a/none/tests/s390x/exrl.stdout.exp
-+++ b/none/tests/s390x/exrl.stdout.exp
-@@ -11,3 +11,5 @@ after:  target = |0123456789aXXXXX|
- ------- EXRL to OR in the syscall number (writes out target)
-         target = |0123456789aXXXXX|
- 
-+------- EXRL with negative offset
-+        target = |01010101010XXXXX|
diff --git a/valgrind-3.18.1-s390x-vdso.patch b/valgrind-3.18.1-s390x-vdso.patch
deleted file mode 100644
index 501fc56..0000000
--- a/valgrind-3.18.1-s390x-vdso.patch
+++ /dev/null
@@ -1,28 +0,0 @@
-commit 99bf5dabf7865aaea7f2192373633e026c6fb16e
-Author: Andreas Arnez <arnez@linux.ibm.com>
-Date:   Thu Dec 9 15:27:41 2021 +0100
-
-    Bug 444481 - Don't unmap the vDSO on s390x
-    
-    Newer Linux kernels on s390x may use the vDSO as a "trampoline" for
-    syscall restart.  This means that the vDSO is no longer optional, and
-    unmapping it may lead to a segmentation fault when a system call restart
-    is performed.
-    
-    So far Valgrind has been unmapping the vDSO on s390x.  Just don't do this
-    anymore.
-
-diff --git a/coregrind/m_initimg/initimg-linux.c b/coregrind/m_initimg/initimg-linux.c
-index 7d02d5567..95508ad1e 100644
---- a/coregrind/m_initimg/initimg-linux.c
-+++ b/coregrind/m_initimg/initimg-linux.c
-@@ -892,7 +892,8 @@ Addr setup_client_stack( void*  init_sp,
- #        if !defined(VGP_ppc32_linux) && !defined(VGP_ppc64be_linux) \
-             && !defined(VGP_ppc64le_linux) \
-             && !defined(VGP_mips32_linux) && !defined(VGP_mips64_linux) \
--            && !defined(VGP_nanomips_linux)
-+            && !defined(VGP_nanomips_linux) \
-+            && !defined(VGP_s390x_linux)
-          case AT_SYSINFO_EHDR: {
-             /* Trash this, because we don't reproduce it */
-             const NSegment* ehdrseg = VG_(am_find_nsegment)((Addr)auxv->u.a_ptr);
diff --git a/valgrind-3.18.1-s390x-wflrx.patch b/valgrind-3.18.1-s390x-wflrx.patch
deleted file mode 100644
index 6639756..0000000
--- a/valgrind-3.18.1-s390x-wflrx.patch
+++ /dev/null
@@ -1,33 +0,0 @@
-From da3b331c63a6aec0ec3206b1d9ca0df9bced3338 Mon Sep 17 00:00:00 2001
-From: Andreas Arnez <arnez@linux.ibm.com>
-Date: Mon, 3 Jan 2022 18:15:05 +0100
-Subject: [PATCH] s390: Fix VFLRX and WFLRX instructions
-
-Due to a typo in s390_irgen_VFLR, the VFLR instruction behaves incorrectly
-when its m3 field contains 4, meaning extended format.  In that case VFLR
-is also written as VFLRX (or WFLRX) and supposed to round down from the
-extended 128-bit format to the long 64-bit format.  However, the typo
-checks for m3 == 2 instead, so the value of 4 is unhandled, causing
-Valgrind to throw a specification exception.
-
-This fixes the typo.
----
- VEX/priv/guest_s390_toIR.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c
-index fffc563d4..3ef104fcd 100644
---- a/VEX/priv/guest_s390_toIR.c
-+++ b/VEX/priv/guest_s390_toIR.c
-@@ -19008,7 +19008,7 @@ s390_irgen_VFLL(UChar v1, UChar v2, UChar m3, UChar m4, UChar m5)
- static const HChar *
- s390_irgen_VFLR(UChar v1, UChar v2, UChar m3, UChar m4, UChar m5)
- {
--   s390_insn_assert("vflr", m3 == 3 || (s390_host_has_vxe && m3 == 2));
-+   s390_insn_assert("vflr", m3 == 3 || (s390_host_has_vxe && m3 == 4));
- 
-    if (m3 == 3)
-       s390_vector_fp_convert(Iop_F64toF32, Ity_F64, Ity_F32, True,
--- 
-2.31.1
-
diff --git a/valgrind.spec b/valgrind.spec
index 6bbee75..fdf40bd 100644
--- a/valgrind.spec
+++ b/valgrind.spec
@@ -1,12 +1,12 @@
 %{?scl:%scl_package valgrind}
 
-Summary: Tool for finding memory management bugs in programs
+Summary: Dynamic analysis tools to detect memory or thread bugs and profile
 Name: %{?scl_prefix}valgrind
-Version: 3.18.1
-Release: 9%{?dist}
+Version: 3.19.0
+Release: 1%{?dist}
 Epoch: 1
 License: GPLv2+
-URL: http://www.valgrind.org/
+URL: https://www.valgrind.org/
 
 # Only necessary for RHEL, will be ignored on Fedora
 
@@ -77,77 +77,13 @@ Source0: https://sourceware.org/pub/valgrind/valgrind-%{version}.tar.bz2
 Patch1: valgrind-3.9.0-cachegrind-improvements.patch
 
 # Make ld.so supressions slightly less specific.
-Patch3: valgrind-3.9.0-ldso-supp.patch
+Patch2: valgrind-3.9.0-ldso-supp.patch
 
 # Add some stack-protector
-Patch4: valgrind-3.16.0-some-stack-protector.patch
+Patch3: valgrind-3.16.0-some-stack-protector.patch
 
 # Add some -Wl,z,now.
-Patch5: valgrind-3.16.0-some-Wl-z-now.patch
-
-# KDE#444495 dhat/tests/copy fails on s390x
-Patch6: valgrind-3.18.1-dhat-tests-copy.patch
-
-# KDE#444242 s390x: Sign-extend "relative long" offset in EXRL
-Patch7: valgrind-3.18.1-s390x-EXRL.patch
-
-# KDE#444571 - PPC, fix lxsibzx and lxsihzx
-Patch8: valgrind-3.18.1-ppc64-lxsibzx-lxsihzx.patch
-
-# commit ae8c6de01417023e78763de145b1c0e6ddd87277
-# commit 3950c5d661ee09526cddcf24daf5fc22bc83f70c
-# Fix for the prefixed stq instruction in PC relative mode.
-# KDE#444836 pstq instruction for R=1 is not storing to the correct address
-Patch9: valgrind-3.18.1-ppc-pstq.patch
-Patch10: valgrind-3.18.1-ppc-pstq-tests.patch
-
-# commit 64ab89162906d5b9e2de6c3afe476fec861ef7ec
-# gdbserver_tests: Filter out glibc hwcaps libc.so
-Patch11: valgrind-3.18.1-gdbserver_tests-hwcap.patch
-
-# KDE#445184 Rust v0 symbol demangling is broken 
-Patch12: valgrind-3.18.1-rust-v0-demangle.patch
-
-# KDE#445354 arm64 backend: incorrect code emitted for doubleword CAS
-Patch13: valgrind-3.18.1-arm64-doubleword-cas.patch
-
-# KDE#444399 arm64: unhandled instruction LD{,A}XP and ST{,L}XP
-Patch14: valgrind-3.18.1-arm64-ldaxp-stlxp.patch
-
-# KDE#445415 arm64 front end: alignment checks missing for atomic instructions.
-Patch15: valgrind-3.18.1-arm64-atomic-align.patch
-
-# commit 595341b150312d2407bd43304449bf39ec3e1fa8
-# amd64 front end: add more spec rules
-Patch16: valgrind-3.18.1-amd64-more-spec-rules.patch
-
-# KDE#445504 Using C++ condition_variable results in bogus
-# "mutex is locked simultaneously by two threads" warning
-Patch17: valgrind-3.18.1-condvar.patch
-
-# KDE#445668 Inline stack frame generation is broken for Rust binaries
-Patch18: valgrind-3.18.1-demangle-namespace.patch
-
-# KDE#405377 Handle new Linux kernel feature: Restartable Sequences ("rseq")
-Patch19: valgrind-3.18.1-rseq-enosys.patch
-
-# KDE#444481  gdb_server test failures on s390x
-Patch20: valgrind-3.18.1-s390x-vdso.patch
-
-# KDE#447995 Valgrind segfault on power10 due to hwcap checking code
-Patch21: valgrind-3.18.1-ppc-hwcaps.patch
-
-# KDE#447991 s390x: Valgrind indicates illegal instruction on wflrx
-Patch22: valgrind-3.18.1-s390x-wflrx.patch
-
-# KDE#449672 ppc64 --track-origins=yes failure because of bad cmov addHRegUse
-Patch23: valgrind-3.18.1-ppc64-cmov.patch
-
-# KDE#449494 arm64: Mismatch detected between RDMA and atomics features
-Patch24: valgrind-3.18.1-arm64-atomics-rdm.patch
-
-# KDE#445916 Demangle Rust v0 symbols with .llvm suffix
-Patch25: valgrind-3.18.1-rust-demangle-suffix.patch
+Patch4: valgrind-3.16.0-some-Wl-z-now.patch
 
 BuildRequires: make
 BuildRequires: glibc-devel
@@ -278,34 +214,14 @@ Valgrind User Manual for details.
 %setup -q -n %{?scl:%{pkg_name}}%{!?scl:%{name}}-%{version}
 
 %patch1 -p1
-%patch3 -p1
+%patch2 -p1
 
 # Old rhel gcc doesn't have -fstack-protector-strong.
 %if 0%{?fedora} || 0%{?rhel} >= 7
+%patch3 -p1
 %patch4 -p1
-%patch5 -p1
 %endif
 
-%patch6 -p1
-%patch7 -p1
-%patch8 -p1
-%patch9 -p1
-%patch10 -p1
-%patch11 -p1
-%patch12 -p1
-%patch13 -p1
-%patch14 -p1
-%patch15 -p1
-%patch16 -p1
-%patch17 -p1
-%patch18 -p1
-%patch19 -p1
-%patch20 -p1
-%patch21 -p1
-%patch22 -p1
-%patch23 -p1
-%patch24 -p1
-%patch25 -p1
 
 %build
 # LTO triggers undefined symbols in valgrind.  Valgrind has a --enable-lto
@@ -535,6 +451,9 @@ fi
 %endif
 
 %changelog
+* Tue Apr 12 2022 Mark Wielaard <mjw@fedoraproject.org> - 3.19.0-1
+- Upgrade to valgrind 3.19.0. Drop old patches.
+
 * Tue Feb  8 2022 Mark Wielaard <mjw@fedoraproject.org>
 - Add valgrind-3.18.1-ppc64-cmov.patch
 - Add valgrind-3.18.1-arm64-atomics-rdm.patch