diff --git a/valgrind-3.17.0-s390-prep.patch b/valgrind-3.17.0-s390-prep.patch
new file mode 100644
index 0000000..8f2dbb1
--- /dev/null
+++ b/valgrind-3.17.0-s390-prep.patch
@@ -0,0 +1,2283 @@
+commit d74a637206ef5532ccd2ccb2e31ee2762f184e60
+Author: Andreas Arnez <arnez@linux.ibm.com>
+Date:   Wed Apr 28 18:52:30 2021 +0200
+
+    Bug 433863 - s390x: Remove memcheck test cases for cs, cds, and csg
+    
+    The fix for bug 429864 - "s390x: C++ atomic test_and_set yields
+    false-positive memcheck diagnostics" changes the memcheck behavior at
+    various compare-and-swap instructions.  The comparison between the old and
+    expected value now always yields a defined result, even if the input
+    values are (partially) undefined.  However, some existing test cases
+    explicitly verify that memcheck complains about the use of uninitialised
+    values here.  These test cases are no longer valid.  Remove them.
+
+diff --git a/memcheck/tests/s390x/Makefile.am b/memcheck/tests/s390x/Makefile.am
+index 67ae8c293..e4e69eb38 100644
+--- a/memcheck/tests/s390x/Makefile.am
++++ b/memcheck/tests/s390x/Makefile.am
+@@ -2,7 +2,7 @@ include $(top_srcdir)/Makefile.tool-tests.am
+ 
+ dist_noinst_SCRIPTS = filter_stderr
+ 
+-INSN_TESTS = cs csg cds cdsg cu21 cu42 ltgjhe
++INSN_TESTS = cdsg cu21 cu42 ltgjhe
+ 
+ check_PROGRAMS = $(INSN_TESTS) 
+ 
+@@ -14,7 +14,3 @@ EXTRA_DIST = \
+ AM_CFLAGS    += @FLAG_M64@
+ AM_CXXFLAGS  += @FLAG_M64@
+ AM_CCASFLAGS += @FLAG_M64@
+-
+-cs_CFLAGS     = $(AM_CFLAGS) @FLAG_W_NO_UNINITIALIZED@
+-csg_CFLAGS    = $(AM_CFLAGS) @FLAG_W_NO_UNINITIALIZED@
+-cds_CFLAGS    = $(AM_CFLAGS) @FLAG_W_NO_UNINITIALIZED@
+diff --git a/memcheck/tests/s390x/cds.c b/memcheck/tests/s390x/cds.c
+deleted file mode 100644
+index ec5c533e0..000000000
+--- a/memcheck/tests/s390x/cds.c
++++ /dev/null
+@@ -1,82 +0,0 @@
+-#include <stdint.h>
+-#include <stdio.h>
+-
+-typedef struct {
+-   uint64_t high;
+-   uint64_t low;
+-} quad_word;
+-
+-void 
+-test(quad_word op1_init, uint64_t op2_init, quad_word op3_init)
+-{
+-   int cc; // unused
+-   quad_word op1 = op1_init;
+-   uint64_t  op2 = op2_init;
+-   quad_word op3 = op3_init;
+-
+-   __asm__ volatile (
+-                     "lmg     %%r0,%%r1,%1\n\t"
+-                     "lmg     %%r2,%%r3,%3\n\t"
+-                     "cds     %%r0,%%r2,%2\n\t"  //  cds 1st,3rd,2nd
+-                     "stmg    %%r0,%%r1,%1\n"    // store r0,r1 to op1
+-                     "stmg    %%r2,%%r3,%3\n"    // store r2,r3 to op3
+-                     : "=d" (cc), "+QS" (op1), "+QS" (op2), "+QS" (op3)
+-                     :
+-                     : "r0", "r1", "r2", "r3", "cc");
+-
+-}
+-
+-// Return a quad-word that only bits low[32:63] are undefined
+-quad_word
+-make_undefined(void)
+-{
+-   quad_word val;
+-
+-   val.high = 0;
+-   val.low |= 0xFFFFFFFF00000000ull;
+-
+-   return val;
+-}
+-
+-void op1_undefined(void)
+-{
+-   quad_word op1, op3;
+-   uint64_t op2;
+-
+-   // op1 undefined
+-   op1 = make_undefined();
+-   op2 = 42;
+-   op3.high = op3.low = 0xdeadbeefdeadbabeull;
+-   test(op1, op2, op3);  // complaint
+-}
+-
+-void op2_undefined(void)
+-{
+-   quad_word op1, op3;
+-   uint64_t op2;
+-
+-   op1.high = op1.low = 42;
+-   // op2 undefined
+-   op3.high = op3.low = 0xdeadbeefdeadbabeull;
+-   test(op1, op2, op3);  // complaint
+-}
+-
+-void op3_undefined(void)
+-{
+-   quad_word op1, op3;
+-   uint64_t op2;
+-
+-   op1.high = op1.low = 42;
+-   op2 = 100;
+-   op3 = make_undefined();
+-   test(op1, op2, op3);  // no complaint; op3 is just copied around
+-}
+-
+-int main ()
+-{
+-   op1_undefined();
+-   op2_undefined();
+-   op3_undefined();
+-
+-   return 0;
+-}
+diff --git a/memcheck/tests/s390x/cds.stderr.exp b/memcheck/tests/s390x/cds.stderr.exp
+deleted file mode 100644
+index e72de94c8..000000000
+--- a/memcheck/tests/s390x/cds.stderr.exp
++++ /dev/null
+@@ -1,10 +0,0 @@
+-Conditional jump or move depends on uninitialised value(s)
+-   at 0x........: test (cds.c:17)
+-   by 0x........: op1_undefined (cds.c:50)
+-   by 0x........: main (cds.c:77)
+-
+-Conditional jump or move depends on uninitialised value(s)
+-   at 0x........: test (cds.c:17)
+-   by 0x........: op2_undefined (cds.c:61)
+-   by 0x........: main (cds.c:78)
+-
+diff --git a/memcheck/tests/s390x/cds.stdout.exp b/memcheck/tests/s390x/cds.stdout.exp
+deleted file mode 100644
+index e69de29bb..000000000
+diff --git a/memcheck/tests/s390x/cds.vgtest b/memcheck/tests/s390x/cds.vgtest
+deleted file mode 100644
+index 5195887e2..000000000
+--- a/memcheck/tests/s390x/cds.vgtest
++++ /dev/null
+@@ -1,2 +0,0 @@
+-prog: cds
+-vgopts: -q
+diff --git a/memcheck/tests/s390x/cs.c b/memcheck/tests/s390x/cs.c
+deleted file mode 100644
+index 9a298cef9..000000000
+--- a/memcheck/tests/s390x/cs.c
++++ /dev/null
+@@ -1,32 +0,0 @@
+-#include <stdint.h>
+-#include <stdio.h>
+-#include <string.h>
+-
+-void 
+-test(int32_t op1_init, int32_t op2_init, int32_t op3_init)
+-{
+-   register int32_t op1 asm("8") = op1_init;
+-   register int32_t op3 asm("9") = op3_init;
+-   
+-   int32_t op2 = op2_init;
+-   int cc = 1; 
+-
+-   __asm__ volatile (
+-           "cs      8,9,%1\n\t"
+-           "ipm     %0\n\t"
+-           "srl     %0,28\n\t"
+-           : "=d" (cc), "+Q" (op2), "+d"(op1), "+d"(op3)
+-           : 
+-           : "cc");
+-}
+-
+-int main ()
+-{
+-   int op1, op2, op3;
+-
+-   test(op1, 0x10000000, 0x12345678);   // complaint
+-   test(0x10000000, op2, 0x12345678);   // complaint
+-   test(0x10000000, 0x01000000, op3);   // no complaint
+-
+-   return 0;
+-}
+diff --git a/memcheck/tests/s390x/cs.stderr.exp b/memcheck/tests/s390x/cs.stderr.exp
+deleted file mode 100644
+index e45dc99cd..000000000
+--- a/memcheck/tests/s390x/cs.stderr.exp
++++ /dev/null
+@@ -1,8 +0,0 @@
+-Conditional jump or move depends on uninitialised value(s)
+-   at 0x........: test (cs.c:14)
+-   by 0x........: main (cs.c:27)
+-
+-Conditional jump or move depends on uninitialised value(s)
+-   at 0x........: test (cs.c:14)
+-   by 0x........: main (cs.c:28)
+-
+diff --git a/memcheck/tests/s390x/cs.stdout.exp b/memcheck/tests/s390x/cs.stdout.exp
+deleted file mode 100644
+index e69de29bb..000000000
+diff --git a/memcheck/tests/s390x/cs.vgtest b/memcheck/tests/s390x/cs.vgtest
+deleted file mode 100644
+index 323cce80c..000000000
+--- a/memcheck/tests/s390x/cs.vgtest
++++ /dev/null
+@@ -1,2 +0,0 @@
+-prog: cs
+-vgopts: -q
+diff --git a/memcheck/tests/s390x/csg.c b/memcheck/tests/s390x/csg.c
+deleted file mode 100644
+index 7f9d8c88e..000000000
+--- a/memcheck/tests/s390x/csg.c
++++ /dev/null
+@@ -1,32 +0,0 @@
+-#include <stdint.h>
+-#include <stdio.h>
+-#include <string.h>
+-
+-void 
+-test(int64_t op1_init, int64_t op2_init, int64_t op3_init)
+-{
+-   register int64_t op1 asm("8") = op1_init;
+-   register int64_t op3 asm("9") = op3_init;
+-   
+-   int64_t op2 = op2_init;
+-   int cc = 1; 
+-
+-   __asm__ volatile (
+-           "csg     8,9,%1\n\t"
+-           "ipm     %0\n\t"
+-           "srl     %0,28\n\t"
+-           : "=d" (cc), "+Q" (op2), "+d"(op1), "+d"(op3)
+-           : 
+-           : "cc");
+-}
+-
+-int main ()
+-{
+-   int64_t op1, op2, op3;
+-
+-   test(op1, 0x1000000000000000ull, 0x1234567887654321ull);  // complaint
+-   test(0x1000000000000000ull, op2, 0x1234567887654321ull);  // complaint
+-   test(0x1000000000000000ull, 0x1000000000000000ull, op3);  // no complaint
+-
+-   return 0;
+-}
+diff --git a/memcheck/tests/s390x/csg.stderr.exp b/memcheck/tests/s390x/csg.stderr.exp
+deleted file mode 100644
+index fda2021ce..000000000
+--- a/memcheck/tests/s390x/csg.stderr.exp
++++ /dev/null
+@@ -1,8 +0,0 @@
+-Conditional jump or move depends on uninitialised value(s)
+-   at 0x........: test (csg.c:14)
+-   by 0x........: main (csg.c:27)
+-
+-Conditional jump or move depends on uninitialised value(s)
+-   at 0x........: test (csg.c:14)
+-   by 0x........: main (csg.c:28)
+-
+diff --git a/memcheck/tests/s390x/csg.stdout.exp b/memcheck/tests/s390x/csg.stdout.exp
+deleted file mode 100644
+index e69de29bb..000000000
+diff --git a/memcheck/tests/s390x/csg.vgtest b/memcheck/tests/s390x/csg.vgtest
+deleted file mode 100644
+index 6de75c1d6..000000000
+--- a/memcheck/tests/s390x/csg.vgtest
++++ /dev/null
+@@ -1,2 +0,0 @@
+-prog: csg
+-vgopts: -q
+
+commit 18ddcc47c951427efd3b790ba2481159b9bd1598
+Author: Andreas Arnez <arnez@linux.ibm.com>
+Date:   Wed Apr 7 16:48:29 2021 +0200
+
+    s390x: Support "expensive" comparisons Iop_ExpCmpNE32/64
+    
+    Add support for Iop_ExpCmpNE32 and Iop_ExpCmpNE64 in the s390x instruction
+    selector.  Handle them exactly like the "inexpensive" variants Iop_CmpNE32
+    and Iop_CmpNE64.
+
+diff --git a/VEX/priv/host_s390_isel.c b/VEX/priv/host_s390_isel.c
+index 2000ec224..5f79280c0 100644
+--- a/VEX/priv/host_s390_isel.c
++++ b/VEX/priv/host_s390_isel.c
+@@ -3611,6 +3611,8 @@ s390_isel_cc(ISelEnv *env, IRExpr *cond)
+ 
+       case Iop_CmpNE32:
+       case Iop_CmpNE64:
++      case Iop_ExpCmpNE32:
++      case Iop_ExpCmpNE64:
+       case Iop_CasCmpNE32:
+       case Iop_CasCmpNE64:
+          result = S390_CC_NE;
+
+commit 5db3f929c43bf46f4707178706cfe90f43acdd19
+Author: Andreas Arnez <arnez@linux.ibm.com>
+Date:   Wed Apr 7 12:30:20 2021 +0200
+
+    s390x: Add convenience function mkV128()
+    
+    Provide mkV128() as a short-hand notation for creating a vector constant from
+    a bit pattern, similar to other such functions like mkU64().
+
+diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c
+index 339377007..7d54cb551 100644
+--- a/VEX/priv/guest_s390_toIR.c
++++ b/VEX/priv/guest_s390_toIR.c
+@@ -376,6 +376,13 @@ mkU64(ULong value)
+    return IRExpr_Const(IRConst_U64(value));
+ }
+ 
++/* Create an expression node for a 128-bit vector constant */
++static __inline__ IRExpr *
++mkV128(UShort value)
++{
++   return IRExpr_Const(IRConst_V128(value));
++}
++
+ /* Create an expression node for a 32-bit floating point constant
+    whose value is given by a bit pattern. */
+ static __inline__ IRExpr *
+@@ -16249,7 +16256,7 @@ s390_irgen_VLGV(UChar r1, IRTemp op2addr, UChar v3, UChar m4)
+ static const HChar *
+ s390_irgen_VGBM(UChar v1, UShort i2, UChar m3 __attribute__((unused)))
+ {
+-   put_vr_qw(v1, IRExpr_Const(IRConst_V128(i2)));
++   put_vr_qw(v1, mkV128(i2));
+ 
+    return "vgbm";
+ }
+@@ -18160,11 +18167,11 @@ s390_irgen_VSUM(UChar v1, UChar v2, UChar v3, UChar m4)
+    switch(type) {
+    case Ity_I8:
+       sum = unop(Iop_PwAddL16Ux8, unop(Iop_PwAddL8Ux16, get_vr_qw(v2)));
+-      mask = IRExpr_Const(IRConst_V128(0b0001000100010001));
++      mask = mkV128(0b0001000100010001);
+       break;
+    case Ity_I16:
+       sum = unop(Iop_PwAddL16Ux8, get_vr_qw(v2));
+-      mask = IRExpr_Const(IRConst_V128(0b0011001100110011));
++      mask = mkV128(0b0011001100110011);
+       break;
+    default:
+       vpanic("s390_irgen_VSUM: invalid type ");
+@@ -18185,11 +18192,11 @@ s390_irgen_VSUMG(UChar v1, UChar v2, UChar v3, UChar m4)
+    switch(type) {
+    case Ity_I16:
+       sum = unop(Iop_PwAddL32Ux4, unop(Iop_PwAddL16Ux8, get_vr_qw(v2)));
+-      mask = IRExpr_Const(IRConst_V128(0b0000001100000011));
++      mask = mkV128(0b0000001100000011);
+       break;
+    case Ity_I32:
+       sum = unop(Iop_PwAddL32Ux4, get_vr_qw(v2));
+-      mask = IRExpr_Const(IRConst_V128(0b0000111100001111));
++      mask = mkV128(0b0000111100001111);
+       break;
+    default:
+       vpanic("s390_irgen_VSUMG: invalid type ");
+@@ -18210,11 +18217,11 @@ s390_irgen_VSUMQ(UChar v1, UChar v2, UChar v3, UChar m4)
+    switch(type) {
+    case Ity_I32:
+       sum = unop(Iop_PwAddL64Ux2, unop(Iop_PwAddL32Ux4, get_vr_qw(v2)));
+-      mask = IRExpr_Const(IRConst_V128(0b0000000000001111));
++      mask = mkV128(0b0000000000001111);
+       break;
+    case Ity_I64:
+       sum = unop(Iop_PwAddL64Ux2, get_vr_qw(v2));
+-      mask = IRExpr_Const(IRConst_V128(0b0000000011111111));
++      mask = mkV128(0b0000000011111111);
+       break;
+    default:
+       vpanic("s390_irgen_VSUMQ: invalid type ");
+@@ -18943,8 +18950,8 @@ s390_irgen_VFCx(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5, UChar m6,
+          assign(cond, binop(Iop_CmpEQ32, mkexpr(result), mkU32(cmp)));
+       }
+       put_vr_qw(v1, mkite(mkexpr(cond),
+-                          IRExpr_Const(IRConst_V128(0xffff)),
+-                          IRExpr_Const(IRConst_V128(0))));
++                          mkV128(0xffff),
++                          mkV128(0)));
+       if (s390_vr_is_cs_set(m6)) {
+          IRTemp cc = newTemp(Ity_I64);
+          assign(cc, mkite(mkexpr(cond), mkU64(0), mkU64(3)));
+
+commit e78bd78d3043729033b426218ab8c6dae9c51e96
+Author: Andreas Arnez <arnez@linux.ibm.com>
+Date:   Thu Mar 18 18:01:10 2021 +0100
+
+    Bug 434296 - s390x: Rework IR conversion of VSTRC, VFAE, and VFEE
+    
+    The z/Architecture instructions "vector string range compare" (VSTRC),
+    "vector find any element equal" (VFAE), and "vector find element
+    equal" (VFEE) are each implemented with a dirty helper that executes the
+    instruction.  Unfortunately this approach leads to memcheck false
+    positives, because these instructions may yield a defined result even if
+    parts of the input vectors are undefined.  There are multiple ways this
+    can happen: Wherever the flags in the fourth operand to VSTRC indicate
+    "match always" or "match never", the corresponding elements in the third
+    operand don't affect the result.  The same is true for the elements
+    following the first zero-element in the second operand if the ZS flag is
+    set, or for the elements following the first matching element, if any.
+    
+    Re-implement the instructions without dirty helpers and transform into
+    lengthy IR instead.
+
+diff --git a/VEX/priv/guest_s390_defs.h b/VEX/priv/guest_s390_defs.h
+index 905429015..49b6cd5dd 100644
+--- a/VEX/priv/guest_s390_defs.h
++++ b/VEX/priv/guest_s390_defs.h
+@@ -265,11 +265,8 @@ typedef enum {
+    S390_VEC_OP_INVALID = 0,
+    S390_VEC_OP_VPKS,
+    S390_VEC_OP_VPKLS,
+-   S390_VEC_OP_VFAE,
+-   S390_VEC_OP_VFEE,
+    S390_VEC_OP_VFENE,
+    S390_VEC_OP_VISTR,
+-   S390_VEC_OP_VSTRC,
+    S390_VEC_OP_VCEQ,
+    S390_VEC_OP_VTM,
+    S390_VEC_OP_VGFM,
+diff --git a/VEX/priv/guest_s390_helpers.c b/VEX/priv/guest_s390_helpers.c
+index b71b621ae..63d2e8ce5 100644
+--- a/VEX/priv/guest_s390_helpers.c
++++ b/VEX/priv/guest_s390_helpers.c
+@@ -2538,11 +2538,8 @@ s390x_dirtyhelper_vec_op(VexGuestS390XState *guest_state,
+       {0x00, 0x00}, /* invalid */
+       [S390_VEC_OP_VPKS]  = {0xe7, 0x97},
+       [S390_VEC_OP_VPKLS] = {0xe7, 0x95},
+-      [S390_VEC_OP_VFAE]  = {0xe7, 0x82},
+-      [S390_VEC_OP_VFEE]  = {0xe7, 0x80},
+       [S390_VEC_OP_VFENE] = {0xe7, 0x81},
+       [S390_VEC_OP_VISTR] = {0xe7, 0x5c},
+-      [S390_VEC_OP_VSTRC] = {0xe7, 0x8a},
+       [S390_VEC_OP_VCEQ]  = {0xe7, 0xf8},
+       [S390_VEC_OP_VTM]   = {0xe7, 0xd8},
+       [S390_VEC_OP_VGFM]  = {0xe7, 0xb4},
+@@ -2630,8 +2627,6 @@ s390x_dirtyhelper_vec_op(VexGuestS390XState *guest_state,
+ 
+    case S390_VEC_OP_VPKS:
+    case S390_VEC_OP_VPKLS:
+-   case S390_VEC_OP_VFAE:
+-   case S390_VEC_OP_VFEE:
+    case S390_VEC_OP_VFENE:
+    case S390_VEC_OP_VCEQ:
+    case S390_VEC_OP_VGFM:
+@@ -2645,7 +2640,6 @@ s390x_dirtyhelper_vec_op(VexGuestS390XState *guest_state,
+       the_insn.VRR.m5 = d->m5;
+       break;
+ 
+-   case S390_VEC_OP_VSTRC:
+    case S390_VEC_OP_VGFMA:
+    case S390_VEC_OP_VMAH:
+    case S390_VEC_OP_VMALH:
+diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c
+index 7d54cb551..26a947813 100644
+--- a/VEX/priv/guest_s390_toIR.c
++++ b/VEX/priv/guest_s390_toIR.c
+@@ -17156,90 +17156,205 @@ s390_irgen_PPNO(UChar r1, UChar r2)
+    return "ppno";
+ }
+ 
+-static const HChar *
+-s390_irgen_VFAE(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5)
+-{
+-   IRDirty* d;
+-   IRTemp cc = newTemp(Ity_I64);
++enum s390_VStrX {
++   s390_VStrX_VSTRC,
++   s390_VStrX_VFAE,
++   s390_VStrX_VFEE
++};
+ 
+-   /* Check for specification exception */
+-   vassert(m4 < 3);
++#define S390_VEC_OP3(m, op0, op1, op2)                                  \
++   (m) == 0 ? op0 : (m) == 1 ? op1 : (m) == 2 ? op2 : Iop_INVALID;
+ 
+-   s390x_vec_op_details_t details = { .serialized = 0ULL };
+-   details.op = S390_VEC_OP_VFAE;
+-   details.v1 = v1;
+-   details.v2 = v2;
+-   details.v3 = v3;
+-   details.m4 = m4;
+-   details.m5 = m5;
+-
+-   d = unsafeIRDirty_1_N(cc, 0, "s390x_dirtyhelper_vec_op",
+-                         &s390x_dirtyhelper_vec_op,
+-                         mkIRExprVec_2(IRExpr_GSPTR(),
+-                                       mkU64(details.serialized)));
++/* Helper function for transforming VSTRC, VFAE, or VFEE.  These instructions
++   share much of the same logic. */
++static void
++s390_irgen_VStrX(UChar v1, UChar v2, UChar v3, UChar v4, UChar m5,
++                 UChar m6, enum s390_VStrX which_insn)
++{
++   IRTemp op2 = newTemp(Ity_V128);
++   IRTemp op3 = newTemp(Ity_V128);
++   IRExpr* tmp;
++   IRExpr* match = NULL;
++   UChar bitwidth = 8 << m5;
++   UChar n_elem = 16 >> m5;
++   IROp sub_op = S390_VEC_OP3(m5, Iop_Sub8x16, Iop_Sub16x8, Iop_Sub32x4);
++   IROp sar_op = S390_VEC_OP3(m5, Iop_SarN8x16, Iop_SarN16x8, Iop_SarN32x4);
++   IROp shl_op = S390_VEC_OP3(m5, Iop_ShlN8x16, Iop_ShlN16x8, Iop_ShlN32x4);
++   IROp dup_op = S390_VEC_OP3(m5, Iop_Dup8x16, Iop_Dup16x8, Iop_Dup32x4);
++   IROp cmpeq_op = S390_VEC_OP3(m5, Iop_CmpEQ8x16,
++                                    Iop_CmpEQ16x8, Iop_CmpEQ32x4);
++   IROp cmpgt_op = S390_VEC_OP3(m5, Iop_CmpGT8Ux16,
++                                    Iop_CmpGT16Ux8, Iop_CmpGT32Ux4);
++   IROp getelem_op = S390_VEC_OP3(m5, Iop_GetElem8x16,
++                                      Iop_GetElem16x8, Iop_GetElem32x4);
++
++   assign(op2, get_vr_qw(v2));
++   assign(op3, get_vr_qw(v3));
++
++   switch (which_insn) {
++
++   case s390_VStrX_VSTRC: {
++      IRTemp op4 = newTemp(Ity_V128);
++      assign(op4, get_vr_qw(v4));
++
++      /* Mask off insignificant range boundaries from op3, i.e., all those for
++         which the corresponding field in op4 has all or no bits set ("match
++         always" / "match never"). */
++      IRTemp bounds = newTemp(Ity_V128);
++      tmp = unop(Iop_NotV128,
++                 binop(cmpeq_op, mkV128(0),
++                       binop(sar_op,
++                             binop(sub_op,
++                                   binop(sar_op, mkexpr(op4),
++                                         mkU8(bitwidth - 3)),
++                                   mkV128(-1)),
++                             mkU8(1))));
++      assign(bounds, binop(Iop_AndV128, mkexpr(op3), tmp));
++
++      IRTemp flags_eq = newTemp(Ity_V128);
++      IRTemp flags_lt = newTemp(Ity_V128);
++      IRTemp flags_gt = newTemp(Ity_V128);
++      assign(flags_eq, binop(sar_op, mkexpr(op4), mkU8(bitwidth - 1)));
++      assign(flags_lt, binop(sar_op, binop(shl_op, mkexpr(op4), mkU8(1)),
++                             mkU8(bitwidth - 1)));
++      assign(flags_gt, binop(sar_op, binop(shl_op, mkexpr(op4), mkU8(2)),
++                             mkU8(bitwidth - 1)));
++
++      for (UChar idx = 0; idx < n_elem; idx += 2) {
++         /* Match according to the even/odd pairs in op3 and op4 at idx */
++         IRTemp part[2];
++
++         for (UChar j = 0; j < 2; j++) {
++            IRTemp a = newTemp(Ity_V128);
++            assign(a, unop(dup_op,
++                           binop(getelem_op, mkexpr(bounds), mkU8(idx + j))));
++
++            IRExpr* m[] = {
++               binop(cmpeq_op, mkexpr(op2), mkexpr(a)),
++               binop(cmpgt_op, mkexpr(a), mkexpr(op2)),
++               binop(cmpgt_op, mkexpr(op2), mkexpr(a))
++            };
++            IRExpr* f[] = {
++               unop(dup_op, binop(getelem_op, mkexpr(flags_eq), mkU8(idx + j))),
++               unop(dup_op, binop(getelem_op, mkexpr(flags_lt), mkU8(idx + j))),
++               unop(dup_op, binop(getelem_op, mkexpr(flags_gt), mkU8(idx + j)))
++            };
++            part[j] = newTemp(Ity_V128);
++            assign(part[j], binop(Iop_OrV128,
++                                  binop(Iop_OrV128,
++                                        binop(Iop_AndV128, f[0], m[0]),
++                                        binop(Iop_AndV128, f[1], m[1])),
++                                  binop(Iop_AndV128, f[2], m[2])));
++         }
++         tmp = binop(Iop_AndV128, mkexpr(part[0]), mkexpr(part[1]));
++         match = idx == 0 ? tmp : binop(Iop_OrV128, match, tmp);
++      }
++      break;
++   }
+ 
+-   d->nFxState = 3;
+-   vex_bzero(&d->fxState, sizeof(d->fxState));
+-   d->fxState[0].fx     = Ifx_Read;
+-   d->fxState[0].offset = S390X_GUEST_OFFSET(guest_v0) + v2 * sizeof(V128);
+-   d->fxState[0].size   = sizeof(V128);
+-   d->fxState[1].fx     = Ifx_Read;
+-   d->fxState[1].offset = S390X_GUEST_OFFSET(guest_v0) + v3 * sizeof(V128);
+-   d->fxState[1].size   = sizeof(V128);
+-   d->fxState[2].fx     = Ifx_Write;
+-   d->fxState[2].offset = S390X_GUEST_OFFSET(guest_v0) + v1 * sizeof(V128);
+-   d->fxState[2].size   = sizeof(V128);
++   case s390_VStrX_VFAE:
++      for (UChar idx = 0; idx < n_elem; idx++) {
++         IRTemp a = newTemp(Ity_V128);
++         assign(a, binop(cmpeq_op, mkexpr(op2),
++                         unop(dup_op,
++                              binop(getelem_op, mkexpr(op3), mkU8(idx)))));
++         match = idx == 0 ? mkexpr(a) : binop(Iop_OrV128, match, mkexpr(a));
++      }
++      break;
+ 
+-   stmt(IRStmt_Dirty(d));
++   case s390_VStrX_VFEE:
++      match = binop(cmpeq_op, mkexpr(op2), mkexpr(op3));
++      break;
+ 
+-   if (s390_vr_is_cs_set(m5)) {
+-      s390_cc_set(cc);
++   default:
++      vpanic("s390_irgen_VStrX: unknown insn");
+    }
+ 
+-   return "vfae";
+-}
++   /* Invert first intermediate result if requested */
++   if (m6 & 8)
++      match = unop(Iop_NotV128, match);
+ 
+-static const HChar *
+-s390_irgen_VFEE(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5)
+-{
+-   IRDirty* d;
+-   IRTemp cc = newTemp(Ity_I64);
++   IRTemp inter1 = newTemp(Ity_V128);
++   IRTemp inter2 = newTemp(Ity_V128);
++   IRTemp accu = newTemp(Ity_V128);
++   assign(inter1, match);
+ 
+-   /* Check for specification exception */
+-   vassert(m4 < 3);
+-   vassert((m5 & 0b1100) == 0);
++   /* Determine second intermediate and accumulated result */
++   if (s390_vr_is_zs_set(m6)) {
++      assign(inter2, binop(cmpeq_op, mkexpr(op2), mkV128(0)));
++      assign(accu, binop(Iop_OrV128, mkexpr(inter1), mkexpr(inter2)));
++   } else {
++      assign(inter2, mkV128(0));
++      assign(accu, mkexpr(inter1));
++   }
+ 
+-   s390x_vec_op_details_t details = { .serialized = 0ULL };
+-   details.op = S390_VEC_OP_VFEE;
+-   details.v1 = v1;
+-   details.v2 = v2;
+-   details.v3 = v3;
+-   details.m4 = m4;
+-   details.m5 = m5;
++   IRTemp accu0 = newTemp(Ity_I64);
++   IRTemp is_match0 = newTemp(Ity_I1);
++   IRTemp mismatch_bits = newTemp(Ity_I64);
+ 
+-   d = unsafeIRDirty_1_N(cc, 0, "s390x_dirtyhelper_vec_op",
+-                         &s390x_dirtyhelper_vec_op,
+-                         mkIRExprVec_2(IRExpr_GSPTR(),
+-                                       mkU64(details.serialized)));
++   assign(accu0, unop(Iop_V128HIto64, mkexpr(accu)));
++   assign(is_match0, binop(Iop_ExpCmpNE64, mkexpr(accu0), mkU64(0)));
++   assign(mismatch_bits, unop(Iop_ClzNat64,
++                              mkite(mkexpr(is_match0), mkexpr(accu0),
++                                    unop(Iop_V128to64, mkexpr(accu)))));
+ 
+-   d->nFxState = 3;
+-   vex_bzero(&d->fxState, sizeof(d->fxState));
+-   d->fxState[0].fx     = Ifx_Read;
+-   d->fxState[0].offset = S390X_GUEST_OFFSET(guest_v0) + v2 * sizeof(V128);
+-   d->fxState[0].size   = sizeof(V128);
+-   d->fxState[1].fx     = Ifx_Read;
+-   d->fxState[1].offset = S390X_GUEST_OFFSET(guest_v0) + v3 * sizeof(V128);
+-   d->fxState[1].size   = sizeof(V128);
+-   d->fxState[2].fx     = Ifx_Write;
+-   d->fxState[2].offset = S390X_GUEST_OFFSET(guest_v0) + v1 * sizeof(V128);
+-   d->fxState[2].size   = sizeof(V128);
++   if (m6 & 4) {
++      put_vr_qw(v1, mkexpr(inter1));
++   } else {
++      /* Determine byte position of first match */
++      tmp = binop(Iop_Add64,
++                  binop(Iop_Shr64, mkexpr(mismatch_bits), mkU8(3)),
++                  mkite(mkexpr(is_match0), mkU64(0), mkU64(8)));
++      put_vr_qw(v1, binop(Iop_64HLtoV128, tmp, mkU64(0)));
++   }
+ 
+-   stmt(IRStmt_Dirty(d));
++   if (s390_vr_is_cs_set(m6)) {
++      /* Set condition code depending on...
++                   zero found
++                      n  y
++                    +------
++         match    n | 3  0
++          found   y | 1  2   */
+ 
+-   if (s390_vr_is_cs_set(m5)) {
++      IRTemp cc = newTemp(Ity_I64);
++
++      tmp = binop(Iop_Shr64,
++                  mkite(mkexpr(is_match0),
++                        unop(Iop_V128HIto64, mkexpr(inter1)),
++                        unop(Iop_V128to64, mkexpr(inter1))),
++                  unop(Iop_64to8,
++                       binop(Iop_Sub64, mkU64(63), mkexpr(mismatch_bits))));
++      tmp = binop(Iop_Shl64, tmp, mkU8(1));
++      if (s390_vr_is_zs_set(m6)) {
++         tmp = binop(Iop_Xor64, tmp,
++                     mkite(binop(Iop_ExpCmpNE64, mkU64(0),
++                                 binop(Iop_Or64,
++                                       unop(Iop_V128HIto64, mkexpr(inter2)),
++                                       unop(Iop_V128to64, mkexpr(inter2)))),
++                           mkU64(0),
++                           mkU64(3)));
++      } else {
++         tmp = binop(Iop_Xor64, tmp, mkU64(3));
++      }
++      assign(cc, tmp);
+       s390_cc_set(cc);
+    }
++   dis_res->hint = Dis_HintVerbose;
++}
+ 
++static const HChar *
++s390_irgen_VFAE(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5)
++{
++   s390_insn_assert("vfae", m4 <= 2);
++   s390_irgen_VStrX(v1, v2, v3, 255, m4, m5, s390_VStrX_VFAE);
++   return "vfae";
++}
++
++static const HChar *
++s390_irgen_VFEE(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5)
++{
++   s390_insn_assert("vfee", m4 < 3 && m5 == (m5 & 3));
++   s390_irgen_VStrX(v1, v2, v3, 255, m4, m5, s390_VStrX_VFEE);
+    return "vfee";
+ }
+ 
+@@ -17406,47 +17521,8 @@ s390_irgen_VISTR(UChar v1, UChar v2, UChar m3, UChar m5)
+ static const HChar *
+ s390_irgen_VSTRC(UChar v1, UChar v2, UChar v3, UChar v4, UChar m5, UChar m6)
+ {
+-   IRDirty* d;
+-   IRTemp cc = newTemp(Ity_I64);
+-
+-   /* Check for specification exception */
+-   vassert(m5 < 3);
+-
+-   s390x_vec_op_details_t details = { .serialized = 0ULL };
+-   details.op = S390_VEC_OP_VSTRC;
+-   details.v1 = v1;
+-   details.v2 = v2;
+-   details.v3 = v3;
+-   details.v4 = v4;
+-   details.m4 = m5;
+-   details.m5 = m6;
+-
+-   d = unsafeIRDirty_1_N(cc, 0, "s390x_dirtyhelper_vec_op",
+-                         &s390x_dirtyhelper_vec_op,
+-                         mkIRExprVec_2(IRExpr_GSPTR(),
+-                                       mkU64(details.serialized)));
+-
+-   d->nFxState = 4;
+-   vex_bzero(&d->fxState, sizeof(d->fxState));
+-   d->fxState[0].fx     = Ifx_Read;
+-   d->fxState[0].offset = S390X_GUEST_OFFSET(guest_v0) + v2 * sizeof(V128);
+-   d->fxState[0].size   = sizeof(V128);
+-   d->fxState[1].fx     = Ifx_Read;
+-   d->fxState[1].offset = S390X_GUEST_OFFSET(guest_v0) + v3 * sizeof(V128);
+-   d->fxState[1].size   = sizeof(V128);
+-   d->fxState[2].fx     = Ifx_Read;
+-   d->fxState[2].offset = S390X_GUEST_OFFSET(guest_v0) + v4 * sizeof(V128);
+-   d->fxState[2].size   = sizeof(V128);
+-   d->fxState[3].fx     = Ifx_Write;
+-   d->fxState[3].offset = S390X_GUEST_OFFSET(guest_v0) + v1 * sizeof(V128);
+-   d->fxState[3].size   = sizeof(V128);
+-
+-   stmt(IRStmt_Dirty(d));
+-
+-   if (s390_vr_is_cs_set(m6)) {
+-      s390_cc_set(cc);
+-   }
+-
++   s390_insn_assert("vstrc", m5 <= 2);
++   s390_irgen_VStrX(v1, v2, v3, v4, m5, m6, s390_VStrX_VSTRC);
+    return "vstrc";
+ }
+ 
+
+commit 4f17a067c4f8245c05611d6e8aa36e8841bab376
+Author: Andreas Arnez <arnez@linux.ibm.com>
+Date:   Tue Mar 2 14:12:29 2021 +0100
+
+    Bug 434296 - s390x: Rework IR conversion of VFENE
+    
+    So far the z/Architecture instruction "vector find element not
+    equal" (VFENE) is transformed to a loop.  This can cause spurious
+    "conditional jump or move depends on uninitialised value(s)" messages by
+    memcheck.  Re-implement without a loop.
+
+diff --git a/VEX/priv/guest_s390_defs.h b/VEX/priv/guest_s390_defs.h
+index 49b6cd5dd..caec3108e 100644
+--- a/VEX/priv/guest_s390_defs.h
++++ b/VEX/priv/guest_s390_defs.h
+@@ -265,7 +265,6 @@ typedef enum {
+    S390_VEC_OP_INVALID = 0,
+    S390_VEC_OP_VPKS,
+    S390_VEC_OP_VPKLS,
+-   S390_VEC_OP_VFENE,
+    S390_VEC_OP_VISTR,
+    S390_VEC_OP_VCEQ,
+    S390_VEC_OP_VTM,
+diff --git a/VEX/priv/guest_s390_helpers.c b/VEX/priv/guest_s390_helpers.c
+index 63d2e8ce5..2188ce5c1 100644
+--- a/VEX/priv/guest_s390_helpers.c
++++ b/VEX/priv/guest_s390_helpers.c
+@@ -2538,7 +2538,6 @@ s390x_dirtyhelper_vec_op(VexGuestS390XState *guest_state,
+       {0x00, 0x00}, /* invalid */
+       [S390_VEC_OP_VPKS]  = {0xe7, 0x97},
+       [S390_VEC_OP_VPKLS] = {0xe7, 0x95},
+-      [S390_VEC_OP_VFENE] = {0xe7, 0x81},
+       [S390_VEC_OP_VISTR] = {0xe7, 0x5c},
+       [S390_VEC_OP_VCEQ]  = {0xe7, 0xf8},
+       [S390_VEC_OP_VTM]   = {0xe7, 0xd8},
+@@ -2627,7 +2626,6 @@ s390x_dirtyhelper_vec_op(VexGuestS390XState *guest_state,
+ 
+    case S390_VEC_OP_VPKS:
+    case S390_VEC_OP_VPKLS:
+-   case S390_VEC_OP_VFENE:
+    case S390_VEC_OP_VCEQ:
+    case S390_VEC_OP_VGFM:
+    case S390_VEC_OP_VCH:
+diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c
+index 26a947813..c8dc3ec18 100644
+--- a/VEX/priv/guest_s390_toIR.c
++++ b/VEX/priv/guest_s390_toIR.c
+@@ -17361,120 +17361,86 @@ s390_irgen_VFEE(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5)
+ static const HChar *
+ s390_irgen_VFENE(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5)
+ {
+-   const Bool negateComparison = True;
+-   const IRType type = s390_vr_get_type(m4);
++   s390_insn_assert("vfene", m4 < 3 && m5 == (m5 & 3));
+ 
+-   /* Check for specification exception */
+-   vassert(m4 < 3);
+-   vassert((m5 & 0b1100) == 0);
+-
+-   static const IROp elementGetters[] = {
+-      Iop_GetElem8x16, Iop_GetElem16x8, Iop_GetElem32x4
++   static const IROp compare_op[3] = {
++      Iop_CmpEQ8x16, Iop_CmpEQ16x8, Iop_CmpEQ32x4
+    };
+-   IROp getter = elementGetters[m4];
+-
+-   static const IROp elementComparators[] = {
+-      Iop_CmpEQ8, Iop_CmpEQ16, Iop_CmpEQ32
++   static const IROp abs_op[3] = {
++      Iop_Abs8x16, Iop_Abs16x8, Iop_Abs32x4
+    };
+-   IROp comparator = elementComparators[m4];
+-
+-   static const IROp resultConverter[] = {Iop_64to8, Iop_64to16, Iop_64to32};
+-   IROp converter = resultConverter[m4];
+-
+-   IRTemp isZeroElem;
+-
+-   IRTemp counter = newTemp(Ity_I64);
+-   assign(counter, get_counter_dw0());
+-
+-   IRTemp arg1 = newTemp(type);
+-   assign(arg1, binop(getter, get_vr_qw(v2), unop(Iop_64to8, mkexpr(counter))));
+-   IRTemp arg2 = newTemp(type);
+-   assign(arg2, binop(getter, get_vr_qw(v3), unop(Iop_64to8, mkexpr(counter))));
++   IRTemp op2 = newTemp(Ity_V128);
++   IRTemp op3 = newTemp(Ity_V128);
++   IRTemp op2zero = newTemp(Ity_V128);
++   IRTemp diff = newTemp(Ity_V128);
++   IRTemp diff0 = newTemp(Ity_I64);
++   IRTemp neq0 = newTemp(Ity_I1);
++   IRTemp samebits = newTemp(Ity_I64);
++   IRExpr* tmp;
+ 
+-   IRTemp isGoodPair = newTemp(Ity_I1);
+-   if(negateComparison) {
+-      assign(isGoodPair, unop(Iop_Not1, binop(comparator, mkexpr(arg1),
+-                                              mkexpr(arg2))));
+-   } else {
+-      assign(isGoodPair, binop(comparator, mkexpr(arg1), mkexpr(arg2)));
+-   }
++   assign(op2, get_vr_qw(v2));
++   assign(op3, get_vr_qw(v3));
+ 
+-   if(s390_vr_is_zs_set(m5)) {
+-      isZeroElem = newTemp(Ity_I1);
+-      assign(isZeroElem, binop(comparator, mkexpr(arg1),
+-                               unop(converter, mkU64(0))));
++   tmp = mkV128(0);
++   if (s390_vr_is_zs_set(m5)) {
++      tmp = binop(compare_op[m4], mkexpr(op2), tmp);
++      if (s390_vr_is_cs_set(m5) && v3 != v2) {
++         /* Count leading equal bits in the terminating element too */
++         tmp = unop(abs_op[m4], tmp);
++      }
++      assign(op2zero, tmp);
++      tmp = mkexpr(op2zero);
+    }
+-
+-   static const UChar invalidIndices[] = {16, 8, 4};
+-   const UChar invalidIndex = invalidIndices[m4];
+-   IRTemp endOfVectorIsReached = newTemp(Ity_I1);
+-   assign(endOfVectorIsReached, binop(Iop_CmpEQ64, mkexpr(counter),
+-                                      mkU64(invalidIndex)));
+-
+-   put_counter_dw0(binop(Iop_Add64, mkexpr(counter), mkU64(1)));
+-   IRExpr* shouldBreak = binop(Iop_Or32,
+-                               unop(Iop_1Uto32, mkexpr(isGoodPair)),
+-                               unop(Iop_1Uto32, mkexpr(endOfVectorIsReached))
+-                              );
+-   if(s390_vr_is_zs_set(m5)) {
+-      shouldBreak = binop(Iop_Or32,
+-                          shouldBreak,
+-                          unop(Iop_1Uto32, mkexpr(isZeroElem)));
+-   }
+-   iterate_if(binop(Iop_CmpEQ32, shouldBreak, mkU32(0)));
+-
+-   IRExpr* foundIndex = binop(Iop_Sub64, get_counter_dw0(), mkU64(1));
+-   if(m4 > 0) {
+-      /* We should return index of byte but we found index of element in
+-         general case.
+-            if byte elem (m4 == 0) then indexOfByte = indexOfElement
+-            if halfword elem (m4 == 1) then indexOfByte = 2 * indexOfElement
+-                                                        = indexOfElement << 1
+-            if word elem (m4 == 2) then indexOfByte = 4 * indexOfElement
+-                                                    = indexOfElement << 2
+-      */
+-      foundIndex = binop(Iop_Shl64, foundIndex, mkU8(m4));
++   if (v3 != v2) {
++      tmp = binop(Iop_XorV128, mkexpr(op2), mkexpr(op3));
++      if (s390_vr_is_zs_set(m5))
++         tmp = binop(Iop_OrV128, tmp, mkexpr(op2zero));
+    }
+ 
+-   IRTemp result = newTemp(Ity_I64);
+-   assign(result, mkite(mkexpr(endOfVectorIsReached),
+-                        mkU64(16),
+-                        foundIndex));
+-   put_vr_qw(v1, binop(Iop_64HLtoV128, mkexpr(result), mkU64(0)));
++   assign(diff, tmp);
++   assign(diff0, unop(Iop_V128HIto64, mkexpr(diff)));
++   assign(neq0, binop(Iop_ExpCmpNE64, mkexpr(diff0), mkU64(0)));
++   assign(samebits, unop(Iop_ClzNat64,
++                         mkite(mkexpr(neq0), mkexpr(diff0),
++                               unop(Iop_V128to64, mkexpr(diff)))));
+ 
++   /* Determine the byte size of the initial equal-elements sequence */
++   tmp = binop(Iop_Shr64, mkexpr(samebits), mkU8(m4 + 3));
++   if (m4 != 0)
++      tmp = binop(Iop_Shl64, tmp, mkU8(m4));
++   tmp = binop(Iop_Add64, tmp, mkite(mkexpr(neq0), mkU64(0), mkU64(8)));
++   put_vr_qw(v1, binop(Iop_64HLtoV128, tmp, mkU64(0)));
+ 
+    if (s390_vr_is_cs_set(m5)) {
+-      static const IROp to64Converters[] = {Iop_8Uto64, Iop_16Uto64, Iop_32Uto64};
+-      IROp to64Converter = to64Converters[m4];
+-
+-      IRExpr* arg1IsLessThanArg2 = binop(Iop_CmpLT64U,
+-                                         unop(to64Converter, mkexpr(arg1)),
+-                                         unop(to64Converter, mkexpr(arg2)));
+-
+-      IRExpr* ccexp = mkite(binop(Iop_CmpEQ32,
+-                                  unop(Iop_1Uto32, mkexpr(isGoodPair)),
+-                                  mkU32(1)),
+-                            mkite(arg1IsLessThanArg2, mkU64(1), mkU64(2)),
+-                            mkU64(3));
+-
+-      if(s390_vr_is_zs_set(m5)) {
+-         IRExpr* arg2IsZero = binop(comparator, mkexpr(arg2),
+-                                    unop(converter, mkU64(0)));
+-         IRExpr* bothArgsAreZero = binop(Iop_And32,
+-                                         unop(Iop_1Uto32, mkexpr(isZeroElem)),
+-                                         unop(Iop_1Uto32, arg2IsZero));
+-         ccexp = mkite(binop(Iop_CmpEQ32, bothArgsAreZero, mkU32(1)),
+-                       mkU64(0),
+-                       ccexp);
+-      }
++      /* Set condition code like follows --
++         0: operands equal up to and including zero element
++         1: op2 < op3    2: op2 > op3    3: op2 = op3 */
+       IRTemp cc = newTemp(Ity_I64);
+-      assign(cc, ccexp);
+-
++      if (v3 == v2) {
++         tmp = mkU64(0);
++      } else {
++         IRTemp shift = newTemp(Ity_I8);
++         IRExpr* op2half = mkite(mkexpr(neq0),
++                                 unop(Iop_V128HIto64, mkexpr(op2)),
++                                 unop(Iop_V128to64, mkexpr(op2)));
++         IRExpr* op3half = mkite(mkexpr(neq0),
++                                 unop(Iop_V128HIto64, mkexpr(op3)),
++                                 unop(Iop_V128to64, mkexpr(op3)));
++         assign(shift, unop(Iop_64to8,
++                            binop(Iop_Sub64, mkU64(63), mkexpr(samebits))));
++         tmp = binop(Iop_Or64,
++                     binop(Iop_Shl64,
++                           binop(Iop_And64, mkU64(1),
++                                 binop(Iop_Shr64, op2half, mkexpr(shift))),
++                           mkU8(1)),
++                     binop(Iop_And64, mkU64(1),
++                           binop(Iop_Shr64, op3half, mkexpr(shift))));
++      }
++      assign(cc, mkite(binop(Iop_CmpEQ64, mkexpr(samebits), mkU64(64)),
++                       mkU64(3), tmp));
+       s390_cc_set(cc);
+    }
+-
+-
+-   put_counter_dw0(mkU64(0));
++   dis_res->hint = Dis_HintVerbose;
+    return "vfene";
+ }
+ 
+
+commit 9bd78ebd8bb5cd4ebb3f081ceba46836cc485551
+Author: Andreas Arnez <arnez@linux.ibm.com>
+Date:   Tue Apr 27 20:13:26 2021 +0200
+
+    Bug 434296 - s390x: Rework IR conversion of VISTR
+    
+    The z/Architecture instruction VISTR is currently transformed to a dirty
+    helper that executes the instruction.  This can cause false positives with
+    memcheck if the input string contains undefined characters after the
+    string terminator.  Implement without a dirty helper and emulate the
+    instruction instead.
+
+diff --git a/VEX/priv/guest_s390_defs.h b/VEX/priv/guest_s390_defs.h
+index caec3108e..24f3798c1 100644
+--- a/VEX/priv/guest_s390_defs.h
++++ b/VEX/priv/guest_s390_defs.h
+@@ -265,7 +265,6 @@ typedef enum {
+    S390_VEC_OP_INVALID = 0,
+    S390_VEC_OP_VPKS,
+    S390_VEC_OP_VPKLS,
+-   S390_VEC_OP_VISTR,
+    S390_VEC_OP_VCEQ,
+    S390_VEC_OP_VTM,
+    S390_VEC_OP_VGFM,
+diff --git a/VEX/priv/guest_s390_helpers.c b/VEX/priv/guest_s390_helpers.c
+index 2188ce5c1..1e04f601a 100644
+--- a/VEX/priv/guest_s390_helpers.c
++++ b/VEX/priv/guest_s390_helpers.c
+@@ -2538,7 +2538,6 @@ s390x_dirtyhelper_vec_op(VexGuestS390XState *guest_state,
+       {0x00, 0x00}, /* invalid */
+       [S390_VEC_OP_VPKS]  = {0xe7, 0x97},
+       [S390_VEC_OP_VPKLS] = {0xe7, 0x95},
+-      [S390_VEC_OP_VISTR] = {0xe7, 0x5c},
+       [S390_VEC_OP_VCEQ]  = {0xe7, 0xf8},
+       [S390_VEC_OP_VTM]   = {0xe7, 0xd8},
+       [S390_VEC_OP_VGFM]  = {0xe7, 0xb4},
+@@ -2610,14 +2609,6 @@ s390x_dirtyhelper_vec_op(VexGuestS390XState *guest_state,
+    the_insn.VRR.op2 = opcodes[d->op][1];
+ 
+    switch(d->op) {
+-   case S390_VEC_OP_VISTR:
+-      the_insn.VRR.v1 = 1;
+-      the_insn.VRR.v2 = 2;
+-      the_insn.VRR.rxb = 0b1100;
+-      the_insn.VRR.m4 = d->m4;
+-      the_insn.VRR.m5 = d->m5;
+-      break;
+-
+    case S390_VEC_OP_VTM:
+       the_insn.VRR.v1 = 2;
+       the_insn.VRR.v2 = 3;
+diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c
+index c8dc3ec18..dfea54259 100644
+--- a/VEX/priv/guest_s390_toIR.c
++++ b/VEX/priv/guest_s390_toIR.c
+@@ -17447,40 +17447,34 @@ s390_irgen_VFENE(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5)
+ static const HChar *
+ s390_irgen_VISTR(UChar v1, UChar v2, UChar m3, UChar m5)
+ {
+-   IRDirty* d;
+-   IRTemp cc = newTemp(Ity_I64);
+-
+-   /* Check for specification exception */
+-   vassert(m3 < 3);
+-   vassert((m5 & 0b1110) == 0);
++   s390_insn_assert("vistr", m3 < 3 && m5 == (m5 & 1));
+ 
+-   s390x_vec_op_details_t details = { .serialized = 0ULL };
+-   details.op = S390_VEC_OP_VISTR;
+-   details.v1 = v1;
+-   details.v2 = v2;
+-   details.m4 = m3;
+-   details.m5 = m5;
+-
+-   d = unsafeIRDirty_1_N(cc, 0, "s390x_dirtyhelper_vec_op",
+-                         &s390x_dirtyhelper_vec_op,
+-                         mkIRExprVec_2(IRExpr_GSPTR(),
+-                                       mkU64(details.serialized)));
++   static const IROp compare_op[3] = {
++      Iop_CmpEQ8x16, Iop_CmpEQ16x8, Iop_CmpEQ32x4
++   };
++   IRExpr* t;
++   IRTemp op2 = newTemp(Ity_V128);
++   IRTemp op2term = newTemp(Ity_V128);
++   IRTemp mask = newTemp(Ity_V128);
+ 
+-   d->nFxState = 2;
+-   vex_bzero(&d->fxState, sizeof(d->fxState));
+-   d->fxState[0].fx     = Ifx_Read;
+-   d->fxState[0].offset = S390X_GUEST_OFFSET(guest_v0) + v2 * sizeof(V128);
+-   d->fxState[0].size   = sizeof(V128);
+-   d->fxState[1].fx     = Ifx_Write;
+-   d->fxState[1].offset = S390X_GUEST_OFFSET(guest_v0) + v1 * sizeof(V128);
+-   d->fxState[1].size   = sizeof(V128);
++   assign(op2, get_vr_qw(v2));
++   assign(op2term, binop(compare_op[m3], mkexpr(op2), mkV128(0)));
++   t = mkexpr(op2term);
+ 
+-   stmt(IRStmt_Dirty(d));
++   for (UChar i = m3; i < 4; i++) {
++      IRTemp s = newTemp(Ity_V128);
++      assign(s, binop(Iop_OrV128, t, binop(Iop_ShrV128, t, mkU8(8 << i))));
++      t = mkexpr(s);
++   }
++   assign(mask, unop(Iop_NotV128, t));
++   put_vr_qw(v1, binop(Iop_AndV128, mkexpr(op2), mkexpr(mask)));
+ 
+    if (s390_vr_is_cs_set(m5)) {
++      IRTemp cc = newTemp(Ity_I64);
++      assign(cc, binop(Iop_And64, mkU64(3), unop(Iop_V128to64, mkexpr(mask))));
+       s390_cc_set(cc);
+    }
+-
++   dis_res->hint = Dis_HintVerbose;
+    return "vistr";
+ }
+ 
+
+commit 32312d588b77c5b5b5a0145bb0cc6f795b447790
+Author: Andreas Arnez <arnez@linux.ibm.com>
+Date:   Fri Apr 16 12:44:44 2021 +0200
+
+    Bug 434296 - s390x: Add memcheck test cases for vector string insns
+    
+    Bug 434296 addresses memcheck false positives with the vector string
+    instructions VISTR, VSTRC, VFAE, VFEE, and VFENE.  Add test cases that
+    verify the fix for that bug.  Without the fix, memcheck yields many
+    complains with these tests, most of which are false positives.
+
+diff --git a/memcheck/tests/s390x/Makefile.am b/memcheck/tests/s390x/Makefile.am
+index e4e69eb38..d183841ef 100644
+--- a/memcheck/tests/s390x/Makefile.am
++++ b/memcheck/tests/s390x/Makefile.am
+@@ -2,7 +2,7 @@ include $(top_srcdir)/Makefile.tool-tests.am
+ 
+ dist_noinst_SCRIPTS = filter_stderr
+ 
+-INSN_TESTS = cdsg cu21 cu42 ltgjhe
++INSN_TESTS = cdsg cu21 cu42 ltgjhe vstrc vfae vistr
+ 
+ check_PROGRAMS = $(INSN_TESTS) 
+ 
+@@ -14,3 +14,7 @@ EXTRA_DIST = \
+ AM_CFLAGS    += @FLAG_M64@
+ AM_CXXFLAGS  += @FLAG_M64@
+ AM_CCASFLAGS += @FLAG_M64@
++
++vstrc_CFLAGS  = $(AM_CFLAGS) -march=z13
++vfae_CFLAGS   = $(AM_CFLAGS) -march=z13
++vistr_CFLAGS  = $(AM_CFLAGS) -march=z13
+diff --git a/memcheck/tests/s390x/vfae.c b/memcheck/tests/s390x/vfae.c
+new file mode 100644
+index 000000000..68781e7fb
+--- /dev/null
++++ b/memcheck/tests/s390x/vfae.c
+@@ -0,0 +1,72 @@
++#include <stdio.h>
++#include <string.h>
++
++#define VECTOR __attribute__ ((vector_size (16)))
++
++typedef char VECTOR char_v;
++
++volatile char tmp;
++static const char *hex_digit = "0123456789abcdefGHIJKLMNOPQRSTUV";
++
++static char_v to_char_vec(const char *str)
++{
++   char_v v;
++   char buf[17];
++   int len = strlen(str);
++
++   memcpy(buf, str, (len && str[len - 1] == '~') ? len - 1 : len + 1);
++   v = *(char_v *) buf;
++   return v;
++}
++
++#define GENERATE_TEST(mnem)                                          \
++static void test_ ## mnem ## _char(const char *str, const char *match, \
++                                   int expect_res, int expect_cc)    \
++{                                                                    \
++   int cc;                                                           \
++   char_v v1;                                                        \
++   char_v v2 = to_char_vec(str);                                     \
++   char_v v3 = to_char_vec(match);                                   \
++                                                                     \
++   __asm__(                                                          \
++      "cr    0,0\n\t"           /* Clear CC */                       \
++      #mnem "  %[v1],%[v2],%[v3],0,3\n\t"                            \
++      "ipm   %[cc]\n\t"                                              \
++      "srl   %[cc],28"                                               \
++      : [v1] "=v" (v1),                                              \
++        [cc] "=d" (cc)                                               \
++      : [v2] "v" (v2),                                               \
++        [v3] "v" (v3)                                                \
++      : "cc");                                                       \
++                                                                     \
++   tmp = hex_digit[v1[7] & 0x1f];                                    \
++   if (expect_res >= 0  && v1[7] != expect_res)                      \
++      printf("result %u != %d\n", v1[7], expect_res);                \
++                                                                     \
++   tmp = hex_digit[cc & 0xf];                                        \
++   if (expect_cc >= 0 && cc != expect_cc)                            \
++      printf("CC %d != %d\n", cc, expect_cc);                        \
++}
++
++GENERATE_TEST(vfae)
++
++GENERATE_TEST(vfee)
++
++GENERATE_TEST(vfene)
++
++int main()
++{
++   test_vfae_char("not found", "................", 9, 0);
++   test_vfae_char("xy", "zzzzzzzzyyyyyyyy", 1, 2);
++   test_vfae_char("incomplete~", "xxxxxxxxxxxxxxxx", -1, -1);
++
++   test_vfee_char("same char here", "..........here", 10, 2);
++   test_vfee_char("and here too ...", "_________t~", 9, 1);
++   test_vfee_char("equality!~", "========!!~", 8, -1);
++
++   test_vfene_char("strings equal", "strings equal", 13, 0);
++   test_vfene_char(hex_digit, hex_digit, 16, 3);
++   test_vfene_char("undef~", "undefined", -1, -1);
++   test_vfene_char("active~", "actually ok", 3, 1);
++   return 0;
++}
+diff --git a/memcheck/tests/s390x/vfae.stderr.exp b/memcheck/tests/s390x/vfae.stderr.exp
+new file mode 100644
+index 000000000..8aad3c87f
+--- /dev/null
++++ b/memcheck/tests/s390x/vfae.stderr.exp
+@@ -0,0 +1,20 @@
++Use of uninitialised value of size 8
++   at 0x........: test_vfae_char (vfae.c:51)
++   by 0x........: main (vfae.c:61)
++
++Use of uninitialised value of size 8
++   at 0x........: test_vfae_char (vfae.c:51)
++   by 0x........: main (vfae.c:61)
++
++Use of uninitialised value of size 8
++   at 0x........: test_vfee_char (vfae.c:53)
++   by 0x........: main (vfae.c:65)
++
++Use of uninitialised value of size 8
++   at 0x........: test_vfene_char (vfae.c:55)
++   by 0x........: main (vfae.c:69)
++
++Use of uninitialised value of size 8
++   at 0x........: test_vfene_char (vfae.c:55)
++   by 0x........: main (vfae.c:69)
++
+diff --git a/memcheck/tests/s390x/vfae.stdout.exp b/memcheck/tests/s390x/vfae.stdout.exp
+new file mode 100644
+index 000000000..e69de29bb
+diff --git a/memcheck/tests/s390x/vfae.vgtest b/memcheck/tests/s390x/vfae.vgtest
+new file mode 100644
+index 000000000..ae36c22fe
+--- /dev/null
++++ b/memcheck/tests/s390x/vfae.vgtest
+@@ -0,0 +1,2 @@
++prog: vfae
++vgopts: -q
+diff --git a/memcheck/tests/s390x/vistr.c b/memcheck/tests/s390x/vistr.c
+new file mode 100644
+index 000000000..7ed59b94b
+--- /dev/null
++++ b/memcheck/tests/s390x/vistr.c
+@@ -0,0 +1,76 @@
++#include <stdio.h>
++#include <string.h>
++
++#define VECTOR __attribute__ ((vector_size (16)))
++
++typedef char VECTOR char_v;
++
++volatile char tmp;
++static const char *hex_digit = "0123456789abcdef";
++
++static char_v to_char_vec(const char *str, char_v *maskp)
++{
++   char buf[17];
++   char_v v;
++   char_v mask = {0};
++
++   for (int i = 0; i < sizeof(buf); i++) {
++      char ch = str[i];
++      if (ch == '\0')
++         break;
++      else if (ch == '$') {
++         buf[i] = '\0';
++         mask[i] = -1;
++      } else if (ch != '~') {
++         buf[i] = ch;
++         mask[i] = -1;
++      }
++   }
++   v = *(char_v *) buf;
++   *maskp = mask;
++   return v;
++}
++
++static void test_vistr_char(const char *str, const char *expect_res,
++                            int expect_cc)
++{
++   int cc, count;
++   char_v v1, mask;
++   char_v v2 = to_char_vec(str, &mask);
++   char_v exp_v1 = to_char_vec(expect_res, &mask);
++   char equal[16];
++
++   __asm__(
++      "cr    0,0\n\t"           /* Clear CC */
++      "vistr %[v1],%[v2],0,1\n\t"
++      "ipm   %[cc]\n\t"
++      "srl   %[cc],28"
++      : [v1] "=v" (v1),
++        [cc] "=d" (cc)
++      : [v2] "v" (v2)
++      : "cc");
++
++   *(char_v *) equal = (v1 & mask) == (exp_v1 & mask);
++   if (memchr(equal, 0, sizeof(equal)))
++      printf("Result doesn't match `%s'\n", expect_res);
++
++   count = 0;
++   for (int i = 0; i < 16; i++) {
++      if (v1[i] == 0) count++;
++   }
++   tmp = hex_digit[count];
++
++   tmp = hex_digit[cc & 0xf];
++   if (expect_cc >= 0 && cc != expect_cc)
++      printf("CC %d != %d\n", cc, expect_cc);
++}
++
++int main()
++{
++   test_vistr_char("terminated$====~", "terminated$$$$$$", 0);
++   test_vistr_char("undef~~~~~~~~~~~", "undef", -1);
++   test_vistr_char("undef, 2nd half~", "undef, 2nd half", -1);
++   test_vistr_char("Not. Terminated.", "Not. Terminated.", 3);
++   test_vistr_char("partiallyOK~~$~~", "partiallyOK~~$$$", 0);
++   return 0;
++}
+diff --git a/memcheck/tests/s390x/vistr.stderr.exp b/memcheck/tests/s390x/vistr.stderr.exp
+new file mode 100644
+index 000000000..e4f35fd74
+--- /dev/null
++++ b/memcheck/tests/s390x/vistr.stderr.exp
+@@ -0,0 +1,20 @@
++Conditional jump or move depends on uninitialised value(s)
++   at 0x........: test_vistr_char (vistr.c:59)
++   by 0x........: main (vistr.c:71)
++
++Use of uninitialised value of size 8
++   at 0x........: test_vistr_char (vistr.c:63)
++   by 0x........: main (vistr.c:71)
++
++Conditional jump or move depends on uninitialised value(s)
++   at 0x........: test_vistr_char (vistr.c:59)
++   by 0x........: main (vistr.c:72)
++
++Use of uninitialised value of size 8
++   at 0x........: test_vistr_char (vistr.c:63)
++   by 0x........: main (vistr.c:72)
++
++Conditional jump or move depends on uninitialised value(s)
++   at 0x........: test_vistr_char (vistr.c:59)
++   by 0x........: main (vistr.c:74)
++
+diff --git a/memcheck/tests/s390x/vistr.vgtest b/memcheck/tests/s390x/vistr.vgtest
+new file mode 100644
+index 000000000..f99749d85
+--- /dev/null
++++ b/memcheck/tests/s390x/vistr.vgtest
+@@ -0,0 +1,2 @@
++prog: vistr
++vgopts: -q
+diff --git a/memcheck/tests/s390x/vstrc.c b/memcheck/tests/s390x/vstrc.c
+new file mode 100644
+index 000000000..268e2f858
+--- /dev/null
++++ b/memcheck/tests/s390x/vstrc.c
+@@ -0,0 +1,92 @@
++#include <stdio.h>
++#include <string.h>
++
++#define VECTOR __attribute__ ((vector_size (16)))
++
++typedef char VECTOR char_v;
++
++struct vstrc_char_rng {
++   unsigned char range[16];
++   unsigned char flags[16];
++};
++
++#define RNG_FLAG_EQ   0x80
++#define RNG_FLAG_LT   0x40
++#define RNG_FLAG_GT   0x20
++#define RNG_FLAG_ANY  0xe0
++#define RNG_FLAG_NONE 0x00
++
++volatile char tmp;
++static const char *hex_digit = "0123456789abcdefGHIJKLMNOPQRSTUV";
++
++static void test_vstrc_char(const char *str, const struct vstrc_char_rng *rng,
++                            int expect_res, int expect_cc)
++{
++   int cc;
++   char_v v1;
++   char_v v2 = *(const char_v *) str;
++   char_v v3 = *(const char_v *) rng->range;
++   char_v v4 = *(const char_v *) rng->flags;
++
++   __asm__(
++      "cr    0,0\n\t"           /* Clear CC */
++      "vstrc %[v1],%[v2],%[v3],%[v4],0,3\n\t"
++      "ipm   %[cc]\n\t"
++      "srl   %[cc],28"
++      : [v1] "=v" (v1),
++        [cc] "=d" (cc)
++      : [v2] "v" (v2),
++        [v3] "v" (v3),
++        [v4] "v" (v4)
++      : "cc");
++
++   tmp = hex_digit[v1[7] & 0x1f];
++   if (expect_res >= 0  && v1[7] != expect_res)
++      printf("result %u != %d\n", v1[7], expect_res);
++
++   tmp = hex_digit[cc & 0xf];
++   if (expect_cc >= 0 && cc != expect_cc)
++      printf("CC %d != %d\n", cc, expect_cc);
++}
++
++int main()
++{
++   struct vstrc_char_rng rng;
++   char buf[16];
++
++   memset(rng.flags, RNG_FLAG_NONE, 16);
++
++   rng.range[4] = 'z';
++   rng.flags[4] = RNG_FLAG_GT | RNG_FLAG_EQ;
++   rng.flags[5] = RNG_FLAG_ANY;
++   /* OK: match at the 'z' */
++   test_vstrc_char("find the z", &rng, 9, 2);
++
++   rng.flags[12] = RNG_FLAG_GT | RNG_FLAG_EQ;
++   rng.flags[13] = RNG_FLAG_LT | RNG_FLAG_EQ;
++   /* Bad: undefined range */
++   test_vstrc_char("undefined", &rng, -1, -1);
++
++   rng.range[12] = 'a';
++   rng.range[13] = 'c';
++   /* OK: match at the 'a' */
++   test_vstrc_char("get the abc", &rng, 8, 2);
++
++   rng.flags[12] = RNG_FLAG_LT;
++   rng.flags[13] = RNG_FLAG_GT;
++   /* OK: no match up to null terminator */
++   test_vstrc_char("no match", &rng, 8, 0);
++
++   /* OK: no match, no null terminator */
++   test_vstrc_char("0123456789abcdef", &rng, 16, 3);
++
++   buf[0] = 'x';
++   /* Bad: undefined string */
++   test_vstrc_char(buf, &rng, -1, -1);
++
++   buf[1] = 'z';
++   /* Bad: valid match, but CC undefined */
++   test_vstrc_char(buf, &rng, 1, -1);
++
++   return 0;
++}
+diff --git a/memcheck/tests/s390x/vstrc.stderr.exp b/memcheck/tests/s390x/vstrc.stderr.exp
+new file mode 100644
+index 000000000..c1125bea1
+--- /dev/null
++++ b/memcheck/tests/s390x/vstrc.stderr.exp
+@@ -0,0 +1,20 @@
++Use of uninitialised value of size 8
++   at 0x........: test_vstrc_char (vstrc.c:43)
++   by 0x........: main (vstrc.c:68)
++
++Use of uninitialised value of size 8
++   at 0x........: test_vstrc_char (vstrc.c:47)
++   by 0x........: main (vstrc.c:68)
++
++Use of uninitialised value of size 8
++   at 0x........: test_vstrc_char (vstrc.c:43)
++   by 0x........: main (vstrc.c:85)
++
++Use of uninitialised value of size 8
++   at 0x........: test_vstrc_char (vstrc.c:47)
++   by 0x........: main (vstrc.c:85)
++
++Use of uninitialised value of size 8
++   at 0x........: test_vstrc_char (vstrc.c:47)
++   by 0x........: main (vstrc.c:89)
++
+diff --git a/memcheck/tests/s390x/vstrc.stdout.exp b/memcheck/tests/s390x/vstrc.stdout.exp
+new file mode 100644
+index 000000000..e69de29bb
+diff --git a/memcheck/tests/s390x/vstrc.vgtest b/memcheck/tests/s390x/vstrc.vgtest
+new file mode 100644
+index 000000000..26f5db99b
+--- /dev/null
++++ b/memcheck/tests/s390x/vstrc.vgtest
+@@ -0,0 +1,2 @@
++prog: vstrc
++vgopts: -q
+
+commit a0bb049ace14ab52d386bb1d49a399f39eec4986
+Author: Andreas Arnez <arnez@linux.ibm.com>
+Date:   Tue Mar 23 14:55:09 2021 +0100
+
+    s390x: Improve handling of amodes without base register
+    
+    Addressing modes without a base or index register represent constants.
+    They can occur in some special cases such as shift operations and when
+    accessing individual vector elements.  Perform some minor improvements to
+    the handling of such amodes.
+
+diff --git a/VEX/priv/host_s390_defs.c b/VEX/priv/host_s390_defs.c
+index 6e0734ae0..2587f81a1 100644
+--- a/VEX/priv/host_s390_defs.c
++++ b/VEX/priv/host_s390_defs.c
+@@ -360,7 +360,8 @@ s390_amode_is_sane(const s390_amode *am)
+ {
+    switch (am->tag) {
+    case S390_AMODE_B12:
+-      return is_virtual_gpr(am->b) && fits_unsigned_12bit(am->d);
++      return (is_virtual_gpr(am->b) || sameHReg(am->b, s390_hreg_gpr(0))) &&
++             fits_unsigned_12bit(am->d);
+ 
+    case S390_AMODE_B20:
+       return is_virtual_gpr(am->b) && fits_signed_20bit(am->d);
+@@ -378,47 +379,31 @@ s390_amode_is_sane(const s390_amode *am)
+    }
+ }
+ 
++static Bool
++s390_amode_is_constant(const s390_amode *am)
++{
++   return am->tag == S390_AMODE_B12 && sameHReg(am->b, s390_hreg_gpr(0));
++}
++
+ 
+ /* Record the register use of an amode */
+ static void
+ s390_amode_get_reg_usage(HRegUsage *u, const s390_amode *am)
+ {
+-   switch (am->tag) {
+-   case S390_AMODE_B12:
+-   case S390_AMODE_B20:
+-      addHRegUse(u, HRmRead, am->b);
+-      return;
+-
+-   case S390_AMODE_BX12:
+-   case S390_AMODE_BX20:
++   if (!sameHReg(am->b, s390_hreg_gpr(0)))
+       addHRegUse(u, HRmRead, am->b);
++   if (!sameHReg(am->x, s390_hreg_gpr(0)))
+       addHRegUse(u, HRmRead, am->x);
+-      return;
+-
+-   default:
+-      vpanic("s390_amode_get_reg_usage");
+-   }
+ }
+ 
+ 
+ static void
+ s390_amode_map_regs(HRegRemap *m, s390_amode *am)
+ {
+-   switch (am->tag) {
+-   case S390_AMODE_B12:
+-   case S390_AMODE_B20:
+-      am->b = lookupHRegRemap(m, am->b);
+-      return;
+-
+-   case S390_AMODE_BX12:
+-   case S390_AMODE_BX20:
++   if (!sameHReg(am->b, s390_hreg_gpr(0)))
+       am->b = lookupHRegRemap(m, am->b);
++   if (!sameHReg(am->x, s390_hreg_gpr(0)))
+       am->x = lookupHRegRemap(m, am->x);
+-      return;
+-
+-   default:
+-      vpanic("s390_amode_map_regs");
+-   }
+ }
+ 
+ 
+@@ -653,6 +638,16 @@ directReload_S390(HInstr* i, HReg vreg, Short spill_off)
+                            insn->variant.alu.dst, vreg_opnd);
+    }
+ 
++   /* v-vgetelem <reg>,<vreg> */
++   if (insn->tag == S390_INSN_VEC_AMODEOP
++       && insn->variant.vec_amodeop.tag == S390_VEC_GET_ELEM
++       && insn->size == 8
++       && sameHReg(insn->variant.vec_amodeop.op1, vreg)
++       && s390_amode_is_constant(insn->variant.vec_amodeop.op2)) {
++      vreg_am->d += 8 * insn->variant.vec_amodeop.op2->d;
++      return s390_insn_load(insn->size, insn->variant.vec_amodeop.dst, vreg_am);
++   }
++
+    /* v-<unop> <reg>,<vreg> */
+    if (insn->tag == S390_INSN_UNOP
+        && insn->variant.unop.src.tag == S390_OPND_REG
+diff --git a/VEX/priv/host_s390_isel.c b/VEX/priv/host_s390_isel.c
+index 5f79280c0..ceca6836e 100644
+--- a/VEX/priv/host_s390_isel.c
++++ b/VEX/priv/host_s390_isel.c
+@@ -312,7 +312,18 @@ s390_isel_amode_wrk(ISelEnv *env, IRExpr *expr,
+                     Bool no_index __attribute__((unused)),
+                     Bool short_displacement)
+ {
+-   if (expr->tag == Iex_Binop && expr->Iex.Binop.op == Iop_Add64) {
++   if (expr->tag == Iex_Unop && expr->Iex.Unop.op == Iop_8Uto64 &&
++       expr->Iex.Unop.arg->tag == Iex_Const) {
++      UChar value = expr->Iex.Unop.arg->Iex.Const.con->Ico.U8;
++      return s390_amode_b12((Int)value, s390_hreg_gpr(0));
++
++   } else if (expr->tag == Iex_Const) {
++      ULong value = expr->Iex.Const.con->Ico.U64;
++      if (ulong_fits_unsigned_12bit(value)) {
++         return s390_amode_b12((Int)value, s390_hreg_gpr(0));
++      }
++
++   } else if (expr->tag == Iex_Binop && expr->Iex.Binop.op == Iop_Add64) {
+       IRExpr *arg1 = expr->Iex.Binop.arg1;
+       IRExpr *arg2 = expr->Iex.Binop.arg2;
+ 
+
+commit fd935e238d907d9c523a311ba795077d95ad6912
+Author: Andreas Arnez <arnez@linux.ibm.com>
+Date:   Fri Mar 26 19:27:47 2021 +0100
+
+    s390x: Rework insn "v-vdup" and add "v-vrep"
+    
+    So far the only s390x insn for filling a vector with copies of the same
+    element is "v-vdup" (S390_VEC_DUPLICATE), which replicates the first
+    element of its vector argument.  This is fairly restrictive and can lead
+    to unnecessarily long code sequences.
+    
+    Redefine "v-vdup" to replicate any scalar value instead.  And add
+    "v-vrep" (S390_INSN_VEC_REPLICATE) for replicating any given element of a
+    vector.  Select the latter for suitable expressions like
+    
+      Iop_Dup8x16(Iop_GetElem8x16(vector_expr, i))
+    
+    This improves the generated code for some vector string instructions,
+    where a lot of element replications are performed.
+
+diff --git a/VEX/priv/host_s390_defs.c b/VEX/priv/host_s390_defs.c
+index 2587f81a1..c764d6ef9 100644
+--- a/VEX/priv/host_s390_defs.c
++++ b/VEX/priv/host_s390_defs.c
+@@ -670,6 +670,14 @@ directReload_S390(HInstr* i, HReg vreg, Short spill_off)
+                             insn->variant.unop.dst, vreg_opnd);
+    }
+ 
++   /* v-vrep <reg>,<vreg>,<idx> */
++   if (insn->tag == S390_INSN_VEC_REPLICATE
++       && sameHReg(insn->variant.vec_replicate.op1, vreg)) {
++      vreg_am->d += insn->size * insn->variant.vec_replicate.idx;
++      return s390_insn_unop(insn->size, S390_VEC_DUPLICATE,
++                            insn->variant.vec_replicate.dst, vreg_opnd);
++   }
++
+ no_match:
+    return NULL;
+ }
+@@ -1050,6 +1058,11 @@ s390_insn_get_reg_usage(HRegUsage *u, const s390_insn *insn)
+       addHRegUse(u, HRmRead, insn->variant.vec_triop.op3);
+       break;
+ 
++   case S390_INSN_VEC_REPLICATE:
++      addHRegUse(u, HRmWrite, insn->variant.vec_replicate.dst);
++      addHRegUse(u, HRmRead, insn->variant.vec_replicate.op1);
++      break;
++
+    default:
+       vpanic("s390_insn_get_reg_usage");
+    }
+@@ -1433,6 +1446,14 @@ s390_insn_map_regs(HRegRemap *m, s390_insn *insn)
+       insn->variant.vec_triop.op3 =
+          lookupHRegRemap(m, insn->variant.vec_triop.op3);
+       break;
++
++   case S390_INSN_VEC_REPLICATE:
++      insn->variant.vec_replicate.dst =
++         lookupHRegRemap(m, insn->variant.vec_replicate.dst);
++      insn->variant.vec_replicate.op1 =
++         lookupHRegRemap(m, insn->variant.vec_replicate.op1);
++      break;
++
+    default:
+       vpanic("s390_insn_map_regs");
+    }
+@@ -1767,7 +1788,39 @@ emit_VRI_VI(UChar *p, ULong op, UChar v1, UShort i2)
+ 
+ 
+ static UChar *
+-emit_VRX(UChar *p, ULong op, UChar v1, UChar x2, UChar b2, UShort d2)
++emit_VRI_VIM(UChar *p, ULong op, UChar v1, UShort i2, UChar m3)
++{
++   ULong the_insn = op;
++   ULong rxb = s390_update_rxb(0, 1, &v1);
++
++   the_insn |= ((ULong)v1) << 36;
++   the_insn |= ((ULong)i2) << 16;
++   the_insn |= ((ULong)m3) << 12;
++   the_insn |= ((ULong)rxb)<< 8;
++
++   return emit_6bytes(p, the_insn);
++}
++
++
++static UChar *
++emit_VRI_VVMM(UChar *p, ULong op, UChar v1, UChar v3, UShort i2, UChar m4)
++{
++   ULong the_insn = op;
++   ULong rxb = s390_update_rxb(0, 1, &v1);
++   rxb = s390_update_rxb(rxb, 2, &v3);
++
++   the_insn |= ((ULong)v1) << 36;
++   the_insn |= ((ULong)v3) << 32;
++   the_insn |= ((ULong)i2) << 16;
++   the_insn |= ((ULong)m4) << 12;
++   the_insn |= ((ULong)rxb) << 8;
++
++   return emit_6bytes(p, the_insn);
++}
++
++
++static UChar *
++emit_VRX(UChar *p, ULong op, UChar v1, UChar x2, UChar b2, UShort d2, UChar m3)
+ {
+    ULong the_insn = op;
+    ULong rxb = s390_update_rxb(0, 1, &v1);
+@@ -1776,6 +1829,7 @@ emit_VRX(UChar *p, ULong op, UChar v1, UChar x2, UChar b2, UShort d2)
+    the_insn |= ((ULong)x2) << 32;
+    the_insn |= ((ULong)b2) << 28;
+    the_insn |= ((ULong)d2) << 16;
++   the_insn |= ((ULong)m3) << 12;
+    the_insn |= ((ULong)rxb)<< 8;
+ 
+    return emit_6bytes(p, the_insn);
+@@ -5782,7 +5836,7 @@ s390_emit_VL(UChar *p, UChar v1, UChar x2, UChar b2, UShort d2)
+    if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+       s390_disasm(ENC3(MNM, VR, UDXB), "vl", v1, d2, x2, b2);
+ 
+-   return emit_VRX(p, 0xE70000000006ULL, v1, x2, b2, d2);
++   return emit_VRX(p, 0xE70000000006ULL, v1, x2, b2, d2, 0);
+ }
+ 
+ static UChar *
+@@ -5795,13 +5849,23 @@ s390_emit_VLR(UChar *p, UChar v1, UChar v2)
+ }
+ 
+ 
++static UChar *
++s390_emit_VLREP(UChar *p, UChar v1, UChar x2, UChar b2, UShort d2, UShort m3)
++{
++   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
++      s390_disasm(ENC4(MNM, VR, UDXB, UINT), "vlrep", v1, d2, x2, b2, m3);
++
++   return emit_VRX(p, 0xE70000000005ULL, v1, x2, b2, d2, m3);
++}
++
++
+ static UChar *
+ s390_emit_VST(UChar *p, UChar v1, UChar x2, UChar b2, UShort d2)
+ {
+    if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+       s390_disasm(ENC3(MNM, VR, UDXB), "vst", v1, d2, x2, b2);
+ 
+-   return emit_VRX(p, 0xE7000000000eULL, v1, x2, b2, d2);
++   return emit_VRX(p, 0xE7000000000eULL, v1, x2, b2, d2, 0);
+ }
+ 
+ 
+@@ -5912,15 +5976,24 @@ s390_emit_VPKLS(UChar *p, UChar v1, UChar v2, UChar v3, UChar m4)
+ 
+ 
+ static UChar *
+-s390_emit_VREP(UChar *p, UChar v1, UChar v3, UChar m3)
++s390_emit_VREP(UChar *p, UChar v1, UChar v3, UShort i2, UChar m4)
+ {
+    if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+-      s390_disasm(ENC5(MNM, VR, VR, UINT, UINT), "vrep", v1, v3, 0, m3);
++      s390_disasm(ENC5(MNM, VR, VR, UINT, UINT), "vrep", v1, v3, i2, m4);
+ 
+-   return emit_VRR_VVM(p, 0xE7000000004DULL, v1, v3, m3);
++   return emit_VRI_VVMM(p, 0xE7000000004DULL, v1, v3, i2, m4);
+ }
+ 
+ 
++static UChar *
++s390_emit_VREPI(UChar *p, UChar v1, UShort i2, UChar m3)
++{
++   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
++      s390_disasm(ENC4(MNM, VR, UINT, UINT), "vrepi", v1, i2, m3);
++
++   return emit_VRI_VIM(p, 0xE70000000045ULL, v1, i2, m3);
++}
++
+ 
+ static UChar *
+ s390_emit_VUPH(UChar *p, UChar v1, UChar v3, UChar m3)
+@@ -7560,6 +7633,20 @@ s390_insn *s390_insn_vec_triop(UChar size, s390_vec_triop_t tag, HReg dst,
+    return insn;
+ }
+ 
++s390_insn *s390_insn_vec_replicate(UChar size, HReg dst, HReg op1,
++                                   UChar idx)
++{
++   s390_insn *insn = LibVEX_Alloc_inline(sizeof(s390_insn));
++
++   insn->tag  = S390_INSN_VEC_REPLICATE;
++   insn->size = size;
++   insn->variant.vec_replicate.dst = dst;
++   insn->variant.vec_replicate.op1 = op1;
++   insn->variant.vec_replicate.idx = idx;
++
++   return insn;
++}
++
+ /*---------------------------------------------------------------*/
+ /*--- Debug print                                             ---*/
+ /*---------------------------------------------------------------*/
+@@ -8284,6 +8371,13 @@ s390_insn_as_string(const s390_insn *insn)
+                    insn->variant.vec_triop.op3);
+       break;
+ 
++   case S390_INSN_VEC_REPLICATE:
++      s390_sprintf(buf, "%M %R, %R, %I", "v-vrep",
++                   insn->variant.vec_replicate.dst,
++                   insn->variant.vec_replicate.op1,
++                   insn->variant.vec_replicate.idx);
++      break;
++
+    default: goto fail;
+    }
+ 
+@@ -9386,6 +9480,56 @@ s390_negate_emit(UChar *buf, const s390_insn *insn)
+ }
+ 
+ 
++static UChar *
++s390_vec_duplicate_emit(UChar *buf, const s390_insn *insn)
++{
++   UChar v1 = hregNumber(insn->variant.unop.dst);
++   s390_opnd_RMI opnd = insn->variant.unop.src;
++   UChar r2;
++
++   switch (opnd.tag) {
++   case S390_OPND_AMODE: {
++      s390_amode* am = opnd.variant.am;
++      UInt b = hregNumber(am->b);
++      UInt x = hregNumber(am->x);
++      UInt d = am->d;
++
++      if (fits_unsigned_12bit(d)) {
++         return s390_emit_VLREP(buf, v1, x, b, d,
++                                s390_getM_from_size(insn->size));
++      }
++      buf = s390_emit_load_mem(buf, insn->size, R0, am);
++      r2 = R0;
++      goto duplicate_from_gpr;
++   }
++
++   case S390_OPND_IMMEDIATE: {
++      ULong val = opnd.variant.imm;
++
++      if (ulong_fits_signed_16bit(val)) {
++         return s390_emit_VREPI(buf, v1, val, s390_getM_from_size(insn->size));
++      }
++      buf = s390_emit_load_64imm(buf, R0, val);
++      r2 = R0;
++      goto duplicate_from_gpr;
++   }
++
++   case S390_OPND_REG:
++      r2 = hregNumber(opnd.variant.reg);
++
++   duplicate_from_gpr:
++      buf = s390_emit_VLVGP(buf, v1, r2, r2);
++      if (insn->size != 8) {
++         buf = s390_emit_VREP(buf, v1, v1, 8 / insn->size - 1,
++                              s390_getM_from_size(insn->size));
++      }
++      return buf;
++   }
++
++   vpanic("s390_vec_duplicate_emit");
++}
++
++
+ static UChar *
+ s390_insn_unop_emit(UChar *buf, const s390_insn *insn)
+ {
+@@ -9405,12 +9549,7 @@ s390_insn_unop_emit(UChar *buf, const s390_insn *insn)
+       UShort i2 = insn->variant.unop.src.variant.imm;
+       return s390_emit_VGBM(buf, v1, i2);
+       }
+-   case S390_VEC_DUPLICATE: {
+-      vassert(insn->variant.unop.src.tag == S390_OPND_REG);
+-      UChar v1 = hregNumber(insn->variant.unop.dst);
+-      UChar v2 = hregNumber(insn->variant.unop.src.variant.reg);
+-      return s390_emit_VREP(buf, v1, v2, s390_getM_from_size(insn->size));
+-      }
++   case S390_VEC_DUPLICATE:  return s390_vec_duplicate_emit(buf, insn);
+    case S390_VEC_UNPACKLOWS: {
+       vassert(insn->variant.unop.src.tag == S390_OPND_REG);
+       vassert(insn->size < 8);
+@@ -11595,6 +11734,16 @@ s390_insn_vec_triop_emit(UChar *buf, const s390_insn *insn)
+ }
+ 
+ 
++static UChar *
++s390_insn_vec_replicate_emit(UChar *buf, const s390_insn *insn)
++{
++   UChar v1 = hregNumber(insn->variant.vec_replicate.dst);
++   UChar v2 = hregNumber(insn->variant.vec_replicate.op1);
++   UShort idx = (UShort) insn->variant.vec_replicate.idx;
++   return s390_emit_VREP(buf, v1, v2, idx, s390_getM_from_size(insn->size));
++}
++
++
+ Int
+ emit_S390Instr(Bool *is_profinc, UChar *buf, Int nbuf, const s390_insn *insn,
+                Bool mode64, VexEndness endness_host,
+@@ -11791,6 +11940,11 @@ emit_S390Instr(Bool *is_profinc, UChar *buf, Int nbuf, const s390_insn *insn,
+    case S390_INSN_VEC_TRIOP:
+       end = s390_insn_vec_triop_emit(buf, insn);
+       break;
++
++   case S390_INSN_VEC_REPLICATE:
++      end = s390_insn_vec_replicate_emit(buf, insn);
++      break;
++
+    fail:
+    default:
+       vpanic("emit_S390Instr");
+diff --git a/VEX/priv/host_s390_defs.h b/VEX/priv/host_s390_defs.h
+index 9b69f4d38..063fd3800 100644
+--- a/VEX/priv/host_s390_defs.h
++++ b/VEX/priv/host_s390_defs.h
+@@ -166,7 +166,8 @@ typedef enum {
+    S390_INSN_VEC_AMODEINTOP,
+    S390_INSN_VEC_UNOP,
+    S390_INSN_VEC_BINOP,
+-   S390_INSN_VEC_TRIOP
++   S390_INSN_VEC_TRIOP,
++   S390_INSN_VEC_REPLICATE
+ } s390_insn_tag;
+ 
+ 
+@@ -738,6 +739,11 @@ typedef struct {
+          HReg          op2;    /* 128-bit second operand */
+          HReg          op3;    /* 128-bit third operand */
+       } vec_triop;
++      struct {
++         HReg          dst;    /* 128-bit result */
++         HReg          op1;    /* 128-bit first operand */
++         UChar         idx;    /* index of element to replicate */
++      } vec_replicate;
+    } variant;
+ } s390_insn;
+ 
+@@ -853,6 +859,7 @@ s390_insn *s390_insn_vec_binop(UChar size, s390_vec_binop_t, HReg dst, HReg op1,
+                                HReg op2);
+ s390_insn *s390_insn_vec_triop(UChar size, s390_vec_triop_t, HReg dst, HReg op1,
+                                HReg op2, HReg op3);
++s390_insn *s390_insn_vec_replicate(UChar size, HReg dst, HReg op1, UChar idx);
+ 
+ const HChar *s390_insn_as_string(const s390_insn *);
+ 
+diff --git a/VEX/priv/host_s390_isel.c b/VEX/priv/host_s390_isel.c
+index ceca6836e..968122596 100644
+--- a/VEX/priv/host_s390_isel.c
++++ b/VEX/priv/host_s390_isel.c
+@@ -3778,12 +3778,12 @@ s390_isel_vec_expr_wrk(ISelEnv *env, IRExpr *expr)
+    }
+    /* --------- UNARY OP --------- */
+    case Iex_Unop: {
+-      UChar size_for_int_arg = 0;
+       HReg dst = INVALID_HREG;
+       HReg reg1 = INVALID_HREG;
+       s390_unop_t vec_unop = S390_UNOP_T_INVALID;
+       s390_vec_binop_t vec_binop = S390_VEC_BINOP_T_INVALID;
+       IROp op = expr->Iex.Unop.op;
++      IROp arg_op = Iop_INVALID;
+       IRExpr* arg = expr->Iex.Unop.arg;
+       switch(op) {
+       case Iop_NotV128:
+@@ -3839,59 +3839,63 @@ s390_isel_vec_expr_wrk(ISelEnv *env, IRExpr *expr)
+       }
+ 
+       case Iop_Dup8x16:
+-         size = size_for_int_arg = 1;
+-         vec_unop = S390_VEC_DUPLICATE;
+-         goto Iop_V_int_wrk;
++         size = 1;
++         arg_op = Iop_GetElem8x16;
++         goto Iop_V_dup_wrk;
+       case Iop_Dup16x8:
+-         size = size_for_int_arg = 2;
+-         vec_unop = S390_VEC_DUPLICATE;
+-         goto Iop_V_int_wrk;
++         size = 2;
++         arg_op = Iop_GetElem16x8;
++         goto Iop_V_dup_wrk;
+       case Iop_Dup32x4:
+-         size = size_for_int_arg = 4;
+-         vec_unop = S390_VEC_DUPLICATE;
+-         goto Iop_V_int_wrk;
++         size = 4;
++         arg_op = Iop_GetElem32x4;
++         goto Iop_V_dup_wrk;
++
++      Iop_V_dup_wrk: {
++         dst = newVRegV(env);
++         if (arg->tag == Iex_Binop && arg->Iex.Binop.op == arg_op &&
++             arg->Iex.Binop.arg2->tag == Iex_Const) {
++            ULong idx;
++            idx = get_const_value_as_ulong(arg->Iex.Binop.arg2-> Iex.Const.con);
++            reg1 = s390_isel_vec_expr(env, arg->Iex.Binop.arg1);
++            addInstr(env, s390_insn_vec_replicate(size, dst, reg1, (UChar)idx));
++         } else {
++            s390_opnd_RMI src = s390_isel_int_expr_RMI(env, arg);
++            addInstr(env, s390_insn_unop(size, S390_VEC_DUPLICATE, dst, src));
++         }
++         return dst;
++      }
+ 
+       case Iop_Widen8Sto16x8:
+          size = 1;
+-         size_for_int_arg = 8;
+          vec_unop = S390_VEC_UNPACKLOWS;
+-         goto Iop_V_int_wrk;
++         goto Iop_V_widen_wrk;
+       case Iop_Widen16Sto32x4:
+          size = 2;
+-         size_for_int_arg = 8;
+          vec_unop = S390_VEC_UNPACKLOWS;
+-         goto Iop_V_int_wrk;
++         goto Iop_V_widen_wrk;
+       case Iop_Widen32Sto64x2:
+          size = 4;
+-         size_for_int_arg = 8;
+          vec_unop = S390_VEC_UNPACKLOWS;
+-         goto Iop_V_int_wrk;
++         goto Iop_V_widen_wrk;
+       case Iop_Widen8Uto16x8:
+          size = 1;
+-         size_for_int_arg = 8;
+          vec_unop = S390_VEC_UNPACKLOWU;
+-         goto Iop_V_int_wrk;
++         goto Iop_V_widen_wrk;
+       case Iop_Widen16Uto32x4:
+          size = 2;
+-         size_for_int_arg = 8;
+          vec_unop = S390_VEC_UNPACKLOWU;
+-         goto Iop_V_int_wrk;
++         goto Iop_V_widen_wrk;
+       case Iop_Widen32Uto64x2:
+          size = 4;
+-         size_for_int_arg = 8;
+          vec_unop = S390_VEC_UNPACKLOWU;
+-         goto Iop_V_int_wrk;
+-
+-      Iop_V_int_wrk: {
+-         HReg vr1 = vec_generate_zeroes(env);
+-         s390_amode* amode2 = s390_isel_amode(env, IRExpr_Const(IRConst_U64(0)));
+-         reg1 = s390_isel_int_expr(env, arg);
++         goto Iop_V_widen_wrk;
+ 
++      Iop_V_widen_wrk: {
+          vassert(vec_unop != S390_UNOP_T_INVALID);
+-         addInstr(env,
+-                  s390_insn_vec_amodeintop(size_for_int_arg, S390_VEC_SET_ELEM,
+-                                           vr1, amode2, reg1));
+-
++         s390_opnd_RMI src = s390_isel_int_expr_RMI(env, arg);
++         HReg vr1 = newVRegV(env);
++         addInstr(env, s390_insn_unop(8, S390_VEC_DUPLICATE, vr1, src));
+          dst = newVRegV(env);
+          addInstr(env, s390_insn_unop(size, vec_unop, dst, s390_opnd_reg(vr1)));
+          return dst;
+
+commit 6c1cb1a0128b00858b973ef9344e12d6ddbaaf57
+Author: Andreas Arnez <arnez@linux.ibm.com>
+Date:   Thu Mar 25 18:48:07 2021 +0100
+
+    s390x: Add support for emitting "vector or with complement"
+    
+    In the instruction selector, look out for IR expressions that fit "vector
+    or with complement (VOC)".  Emit when applicable.
+    
+    This slighly reduces the generated code sometimes, such as for certain
+    vector string instructions, where such expressions occur quite frequently.
+
+diff --git a/VEX/priv/host_s390_defs.c b/VEX/priv/host_s390_defs.c
+index c764d6ef9..239d9d299 100644
+--- a/VEX/priv/host_s390_defs.c
++++ b/VEX/priv/host_s390_defs.c
+@@ -5907,6 +5907,15 @@ s390_emit_VO(UChar *p, UChar v1, UChar v2, UChar v3)
+    return emit_VRR_VVV(p, 0xE7000000006aULL, v1, v2, v3);
+ }
+ 
++static UChar *
++s390_emit_VOC(UChar *p, UChar v1, UChar v2, UChar v3)
++{
++   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
++      s390_disasm(ENC4(MNM, VR, VR, VR), "voc", v1, v2, v3);
++
++   return emit_VRR_VVV(p, 0xE7000000006fULL, v1, v2, v3);
++}
++
+ static UChar *
+ s390_emit_VX(UChar *p, UChar v1, UChar v2, UChar v3)
+ {
+@@ -8312,6 +8321,7 @@ s390_insn_as_string(const s390_insn *insn)
+       case S390_VEC_PACK_SATURU:    op = "v-vpacksaturu"; break;
+       case S390_VEC_COMPARE_EQUAL:  op = "v-vcmpeq"; break;
+       case S390_VEC_OR:             op = "v-vor"; break;
++      case S390_VEC_ORC:            op = "v-vorc"; break;
+       case S390_VEC_XOR:            op = "v-vxor";  break;
+       case S390_VEC_AND:            op = "v-vand"; break;
+       case S390_VEC_MERGEL:         op = "v-vmergel"; break;
+@@ -11609,6 +11619,8 @@ s390_insn_vec_binop_emit(UChar *buf, const s390_insn *insn)
+          return s390_emit_VCEQ(buf, v1, v2, v3, s390_getM_from_size(size));
+       case S390_VEC_OR:
+          return s390_emit_VO(buf, v1, v2, v3);
++      case S390_VEC_ORC:
++         return s390_emit_VOC(buf, v1, v2, v3);
+       case S390_VEC_XOR:
+          return s390_emit_VX(buf, v1, v2, v3);
+       case S390_VEC_AND:
+diff --git a/VEX/priv/host_s390_defs.h b/VEX/priv/host_s390_defs.h
+index 063fd3800..dc116106e 100644
+--- a/VEX/priv/host_s390_defs.h
++++ b/VEX/priv/host_s390_defs.h
+@@ -366,6 +366,7 @@ typedef enum {
+    S390_VEC_PACK_SATURU,
+    S390_VEC_COMPARE_EQUAL,
+    S390_VEC_OR,
++   S390_VEC_ORC,
+    S390_VEC_XOR,
+    S390_VEC_AND,
+    S390_VEC_MERGEL,
+diff --git a/VEX/priv/host_s390_isel.c b/VEX/priv/host_s390_isel.c
+index 968122596..53d76fe8a 100644
+--- a/VEX/priv/host_s390_isel.c
++++ b/VEX/priv/host_s390_isel.c
+@@ -4102,6 +4102,15 @@ s390_isel_vec_expr_wrk(ISelEnv *env, IRExpr *expr)
+       case Iop_OrV128:
+          size = 16;
+          vec_binop = S390_VEC_OR;
++         if (arg1->tag == Iex_Unop && arg1->Iex.Unop.op == Iop_NotV128) {
++            IRExpr* orig_arg1 = arg1;
++            arg1 = arg2;
++            arg2 = orig_arg1->Iex.Unop.arg;
++            vec_binop = S390_VEC_ORC;
++         } else if (arg2->tag == Iex_Unop && arg2->Iex.Unop.op == Iop_NotV128) {
++            arg2 = arg2->Iex.Unop.arg;
++            vec_binop = S390_VEC_ORC;
++         }
+          goto Iop_VV_wrk;
+ 
+       case Iop_XorV128:
+
+commit 0bd4263326b2d48f782339a9bbe1a069c7de45c7
+Author: Andreas Arnez <arnez@linux.ibm.com>
+Date:   Tue Mar 30 17:45:20 2021 +0200
+
+    s390x: Fix/optimize Iop_64HLtoV128
+    
+    In s390_vr_fill() in guest_s390_toIR.c, filling a vector with two copies
+    of a 64-bit value is realized with Iop_64HLtoV128, since there is no such
+    operator as Iop_Dup64x2.  But the two args to Iop_64HLtoV128 use the same
+    expression, referenced twice.  Although this hasn't been seen to cause
+    real trouble yet, it's problematic and potentially inefficient, so change
+    it: Assign to a temp and pass that twice instead.
+    
+    In the instruction selector, if Iop_64HLtoV128 is found to be used for a
+    duplication as above, select "v-vdup" instead of "v-vinitfromgprs".  This
+    mimicks the behavior we'd get if there actually was an operator
+    Iop_Dup64x2.
+
+diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c
+index dfea54259..a73dcfb14 100644
+--- a/VEX/priv/guest_s390_toIR.c
++++ b/VEX/priv/guest_s390_toIR.c
+@@ -2299,9 +2299,12 @@ s390_vr_fill(UChar v1, IRExpr *o2)
+    case Ity_I32:
+       put_vr_qw(v1, unop(Iop_Dup32x4, o2));
+       break;
+-   case Ity_I64:
+-      put_vr_qw(v1, binop(Iop_64HLtoV128, o2, o2));
++   case Ity_I64: {
++      IRTemp val = newTemp(Ity_I64);
++      assign(val, o2);
++      put_vr_qw(v1, binop(Iop_64HLtoV128, mkexpr(val), mkexpr(val)));
+       break;
++   }
+    default:
+       ppIRType(o2type);
+       vpanic("s390_vr_fill: invalid IRType");
+diff --git a/VEX/priv/host_s390_isel.c b/VEX/priv/host_s390_isel.c
+index 53d76fe8a..ee20c6711 100644
+--- a/VEX/priv/host_s390_isel.c
++++ b/VEX/priv/host_s390_isel.c
+@@ -4662,12 +4662,16 @@ s390_isel_vec_expr_wrk(ISelEnv *env, IRExpr *expr)
+       }
+ 
+       case Iop_64HLtoV128:
+-         reg1 = s390_isel_int_expr(env, arg1);
+-         reg2 = s390_isel_int_expr(env, arg2);
+-
+-         addInstr(env, s390_insn_vec_binop(size, S390_VEC_INIT_FROM_GPRS,
+-                  dst, reg1, reg2));
+-
++         if (arg1->tag == Iex_RdTmp && arg2->tag == Iex_RdTmp &&
++             arg1->Iex.RdTmp.tmp == arg2->Iex.RdTmp.tmp) {
++            s390_opnd_RMI src = s390_isel_int_expr_RMI(env, arg1);
++            addInstr(env, s390_insn_unop(8, S390_VEC_DUPLICATE, dst, src));
++         } else {
++            reg1 = s390_isel_int_expr(env, arg1);
++            reg2 = s390_isel_int_expr(env, arg2);
++            addInstr(env, s390_insn_vec_binop(size, S390_VEC_INIT_FROM_GPRS,
++                                              dst, reg1, reg2));
++         }
+          return dst;
+ 
+       default:
+
+commit cae5062b05b95e0303b1122a0ea9aadc197e4f0a
+Author: Andreas Arnez <arnez@linux.ibm.com>
+Date:   Fri May 7 18:13:03 2021 +0200
+
+    s390x: Add missing stdout.exp for vector string memcheck test
+    
+    The file vistr.stdout.exp was missing from commit 32312d588.  Add it.
+
+diff --git a/memcheck/tests/s390x/vistr.stdout.exp b/memcheck/tests/s390x/vistr.stdout.exp
+new file mode 100644
+index 000000000..e69de29bb
diff --git a/valgrind-3.17.0-s390-z13-vec-fix.patch b/valgrind-3.17.0-s390-z13-vec-fix.patch
new file mode 100644
index 0000000..959e5f8
--- /dev/null
+++ b/valgrind-3.17.0-s390-z13-vec-fix.patch
@@ -0,0 +1,46 @@
+commit 124ae6cfa303f0cc71ffd685620cb57c4f8f02bb
+Author: Andreas Arnez <arnez@linux.ibm.com>
+Date:   Mon Jun 7 14:01:53 2021 +0200
+
+    s390x: Don't emit "vector or with complement" on z13
+    
+    The z/Architecture instruction "vector or with complement" (VOC) can be
+    used as an optimization to combine "vector or" with "vector nor".  This is
+    exploited in Valgrind since commit 6c1cb1a0128b00858b973e.  However, VOC
+    requires the vector-enhancements facility 1, which is not installed on a
+    z13 CPU.  Thus Valgrind can now run into SIGILL on z13 when trying to
+    execute vector string instructions.
+    
+    Fix this by suppressing the VOC optimization unless the
+    vector-enhancements facility 1 is recognized on the host.
+
+diff --git a/VEX/priv/host_s390_isel.c b/VEX/priv/host_s390_isel.c
+index ee20c6711..15ca92a6b 100644
+--- a/VEX/priv/host_s390_isel.c
++++ b/VEX/priv/host_s390_isel.c
+@@ -4102,14 +4102,17 @@ s390_isel_vec_expr_wrk(ISelEnv *env, IRExpr *expr)
+       case Iop_OrV128:
+          size = 16;
+          vec_binop = S390_VEC_OR;
+-         if (arg1->tag == Iex_Unop && arg1->Iex.Unop.op == Iop_NotV128) {
+-            IRExpr* orig_arg1 = arg1;
+-            arg1 = arg2;
+-            arg2 = orig_arg1->Iex.Unop.arg;
+-            vec_binop = S390_VEC_ORC;
+-         } else if (arg2->tag == Iex_Unop && arg2->Iex.Unop.op == Iop_NotV128) {
+-            arg2 = arg2->Iex.Unop.arg;
+-            vec_binop = S390_VEC_ORC;
++         if (s390_host_has_vxe) {
++            if (arg1->tag == Iex_Unop && arg1->Iex.Unop.op == Iop_NotV128) {
++               IRExpr* orig_arg1 = arg1;
++               arg1 = arg2;
++               arg2 = orig_arg1->Iex.Unop.arg;
++               vec_binop = S390_VEC_ORC;
++            } else if (arg2->tag == Iex_Unop &&
++                       arg2->Iex.Unop.op == Iop_NotV128) {
++               arg2 = arg2->Iex.Unop.arg;
++               vec_binop = S390_VEC_ORC;
++            }
+          }
+          goto Iop_VV_wrk;
+ 
diff --git a/valgrind-3.17.0-s390-z15.patch b/valgrind-3.17.0-s390-z15.patch
new file mode 100644
index 0000000..2ec3c2f
--- /dev/null
+++ b/valgrind-3.17.0-s390-z15.patch
@@ -0,0 +1,2413 @@
+From 3fbde55a5696c9273084ee2c44daca752e407597 Mon Sep 17 00:00:00 2001
+From: Andreas Arnez <arnez@linux.ibm.com>
+Date: Tue, 26 Jan 2021 15:06:47 +0100
+Subject: [PATCH 01/13] s390x: Misc-insn-3, bitwise logical 3-way instructions
+
+Add support for the instructions NCRK, NCGRK, NNRK, NNGRK, NORK, NOGRK,
+NXRK, NXGRK, OCRK, and OCGRK.  Introduce a common helper and use it for
+the existing instructions NRK, NGRK, XRK, XGRK, ORK, and OGRK as well.
+---
+ VEX/priv/guest_s390_toIR.c | 154 ++++++++++++++++++++++++++-----------
+ 1 file changed, 109 insertions(+), 45 deletions(-)
+
+diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c
+index a73dcfb14..f8afd5b96 100644
+--- a/VEX/priv/guest_s390_toIR.c
++++ b/VEX/priv/guest_s390_toIR.c
+@@ -5022,8 +5022,12 @@ s390_irgen_NGR(UChar r1, UChar r2)
+    return "ngr";
+ }
+ 
++/* Helper for bitwise logical instructions with two 32-bit input operands and a
++   32-bit output operand.  `inv3' and `inv' indicate whether to invert (build
++   bitwise complement of) operand 3 or the result, respectively. */
+ static const HChar *
+-s390_irgen_NRK(UChar r3, UChar r1, UChar r2)
++s390_irgen_logicalK32(UChar r3, UChar r1, UChar r2,
++                      const HChar *mnem, IROp op, Bool inv3, Bool inv)
+ {
+    IRTemp op2 = newTemp(Ity_I32);
+    IRTemp op3 = newTemp(Ity_I32);
+@@ -5031,15 +5035,19 @@ s390_irgen_NRK(UChar r3, UChar r1, UChar r2)
+ 
+    assign(op2, get_gpr_w1(r2));
+    assign(op3, get_gpr_w1(r3));
+-   assign(result, binop(Iop_And32, mkexpr(op2), mkexpr(op3)));
++   IRExpr* tmp = binop(op, mkexpr(op2),
++                       inv3 ? unop(Iop_Not32, mkexpr(op3)) : mkexpr(op3));
++   assign(result, inv ? unop(Iop_Not32, tmp) : tmp);
+    s390_cc_thunk_putZ(S390_CC_OP_BITWISE, result);
+    put_gpr_w1(r1, mkexpr(result));
+ 
+-   return "nrk";
++   return mnem;
+ }
+ 
++/* Same as s390_irgen_logicalK32, but for 64-bit operands. */
+ static const HChar *
+-s390_irgen_NGRK(UChar r3, UChar r1, UChar r2)
++s390_irgen_logicalK64(UChar r3, UChar r1, UChar r2,
++                      const HChar *mnem, IROp op, Bool inv3, Bool inv)
+ {
+    IRTemp op2 = newTemp(Ity_I64);
+    IRTemp op3 = newTemp(Ity_I64);
+@@ -5047,11 +5055,49 @@ s390_irgen_NGRK(UChar r3, UChar r1, UChar r2)
+ 
+    assign(op2, get_gpr_dw0(r2));
+    assign(op3, get_gpr_dw0(r3));
+-   assign(result, binop(Iop_And64, mkexpr(op2), mkexpr(op3)));
++   IRExpr* tmp = binop(op, mkexpr(op2),
++                       inv3 ? unop(Iop_Not64, mkexpr(op3)) : mkexpr(op3));
++   assign(result, inv ? unop(Iop_Not64, tmp) : tmp);
+    s390_cc_thunk_putZ(S390_CC_OP_BITWISE, result);
+    put_gpr_dw0(r1, mkexpr(result));
+ 
+-   return "ngrk";
++   return mnem;
++}
++
++static const HChar *
++s390_irgen_NRK(UChar r3, UChar r1, UChar r2)
++{
++   return s390_irgen_logicalK32(r3, r1, r2, "nrk", Iop_And32, False, False);
++}
++
++static const HChar *
++s390_irgen_NGRK(UChar r3, UChar r1, UChar r2)
++{
++   return s390_irgen_logicalK64(r3, r1, r2, "ngrk", Iop_And64, False, False);
++}
++
++static const HChar *
++s390_irgen_NCRK(UChar r3, UChar r1, UChar r2)
++{
++   return s390_irgen_logicalK32(r3, r1, r2, "ncrk", Iop_And32, True, False);
++}
++
++static const HChar *
++s390_irgen_NCGRK(UChar r3, UChar r1, UChar r2)
++{
++   return s390_irgen_logicalK64(r3, r1, r2, "ncgrk", Iop_And64, True, False);
++}
++
++static const HChar *
++s390_irgen_NNRK(UChar r3, UChar r1, UChar r2)
++{
++   return s390_irgen_logicalK32(r3, r1, r2, "nnrk", Iop_And32, False, True);
++}
++
++static const HChar *
++s390_irgen_NNGRK(UChar r3, UChar r1, UChar r2)
++{
++   return s390_irgen_logicalK64(r3, r1, r2, "nngrk", Iop_And64, False, True);
+ }
+ 
+ static const HChar *
+@@ -7071,33 +7117,25 @@ s390_irgen_XGR(UChar r1, UChar r2)
+ static const HChar *
+ s390_irgen_XRK(UChar r3, UChar r1, UChar r2)
+ {
+-   IRTemp op2 = newTemp(Ity_I32);
+-   IRTemp op3 = newTemp(Ity_I32);
+-   IRTemp result = newTemp(Ity_I32);
+-
+-   assign(op2, get_gpr_w1(r2));
+-   assign(op3, get_gpr_w1(r3));
+-   assign(result, binop(Iop_Xor32, mkexpr(op2), mkexpr(op3)));
+-   s390_cc_thunk_putZ(S390_CC_OP_BITWISE, result);
+-   put_gpr_w1(r1, mkexpr(result));
+-
+-   return "xrk";
++   return s390_irgen_logicalK32(r3, r1, r2, "xrk", Iop_Xor32, False, False);
+ }
+ 
+ static const HChar *
+ s390_irgen_XGRK(UChar r3, UChar r1, UChar r2)
+ {
+-   IRTemp op2 = newTemp(Ity_I64);
+-   IRTemp op3 = newTemp(Ity_I64);
+-   IRTemp result = newTemp(Ity_I64);
++   return s390_irgen_logicalK64(r3, r1, r2, "xgrk", Iop_Xor64, False, False);
++}
+ 
+-   assign(op2, get_gpr_dw0(r2));
+-   assign(op3, get_gpr_dw0(r3));
+-   assign(result, binop(Iop_Xor64, mkexpr(op2), mkexpr(op3)));
+-   s390_cc_thunk_putZ(S390_CC_OP_BITWISE, result);
+-   put_gpr_dw0(r1, mkexpr(result));
++static const HChar *
++s390_irgen_NXRK(UChar r3, UChar r1, UChar r2)
++{
++   return s390_irgen_logicalK32(r3, r1, r2, "nxrk", Iop_Xor32, False, True);
++}
+ 
+-   return "xgrk";
++static const HChar *
++s390_irgen_NXGRK(UChar r3, UChar r1, UChar r2)
++{
++   return s390_irgen_logicalK64(r3, r1, r2, "nxgrk", Iop_Xor64, False, True);
+ }
+ 
+ static const HChar *
+@@ -8920,33 +8958,37 @@ s390_irgen_OGR(UChar r1, UChar r2)
+ static const HChar *
+ s390_irgen_ORK(UChar r3, UChar r1, UChar r2)
+ {
+-   IRTemp op2 = newTemp(Ity_I32);
+-   IRTemp op3 = newTemp(Ity_I32);
+-   IRTemp result = newTemp(Ity_I32);
++   return s390_irgen_logicalK32(r3, r1, r2, "ork", Iop_Or32, False, False);
++}
+ 
+-   assign(op2, get_gpr_w1(r2));
+-   assign(op3, get_gpr_w1(r3));
+-   assign(result, binop(Iop_Or32, mkexpr(op2), mkexpr(op3)));
+-   s390_cc_thunk_putZ(S390_CC_OP_BITWISE, result);
+-   put_gpr_w1(r1, mkexpr(result));
++static const HChar *
++s390_irgen_OGRK(UChar r3, UChar r1, UChar r2)
++{
++   return s390_irgen_logicalK64(r3, r1, r2, "ogrk", Iop_Or64, False, False);
++}
+ 
+-   return "ork";
++static const HChar *
++s390_irgen_OCRK(UChar r3, UChar r1, UChar r2)
++{
++   return s390_irgen_logicalK32(r3, r1, r2, "ocrk", Iop_Or32, True, False);
+ }
+ 
+ static const HChar *
+-s390_irgen_OGRK(UChar r3, UChar r1, UChar r2)
++s390_irgen_OCGRK(UChar r3, UChar r1, UChar r2)
+ {
+-   IRTemp op2 = newTemp(Ity_I64);
+-   IRTemp op3 = newTemp(Ity_I64);
+-   IRTemp result = newTemp(Ity_I64);
++   return s390_irgen_logicalK64(r3, r1, r2, "ocgrk", Iop_Or64, True, False);
++}
+ 
+-   assign(op2, get_gpr_dw0(r2));
+-   assign(op3, get_gpr_dw0(r3));
+-   assign(result, binop(Iop_Or64, mkexpr(op2), mkexpr(op3)));
+-   s390_cc_thunk_putZ(S390_CC_OP_BITWISE, result);
+-   put_gpr_dw0(r1, mkexpr(result));
++static const HChar *
++s390_irgen_NORK(UChar r3, UChar r1, UChar r2)
++{
++   return s390_irgen_logicalK32(r3, r1, r2, "nork", Iop_Or32, False, True);
++}
+ 
+-   return "ogrk";
++static const HChar *
++s390_irgen_NOGRK(UChar r3, UChar r1, UChar r2)
++{
++   return s390_irgen_logicalK64(r3, r1, r2, "nogrk", Iop_Or64, False, True);
+ }
+ 
+ static const HChar *
+@@ -20031,12 +20073,28 @@ s390_decode_4byte_and_irgen(const UChar *bytes)
+    case 0xb961: s390_format_RRF_U0RR(s390_irgen_CLGRT, RRF2_m3(ovl),
+                                      RRF2_r1(ovl), RRF2_r2(ovl),
+                                      S390_XMNM_CAB); goto ok;
++   case 0xb964: s390_format_RRF_R0RR2(s390_irgen_NNGRK, RRF4_r3(ovl),
++                                      RRF4_r1(ovl), RRF4_r2(ovl)); goto ok;
++   case 0xb965: s390_format_RRF_R0RR2(s390_irgen_OCGRK, RRF4_r3(ovl),
++                                      RRF4_r1(ovl), RRF4_r2(ovl)); goto ok;
++   case 0xb966: s390_format_RRF_R0RR2(s390_irgen_NOGRK, RRF4_r3(ovl),
++                                      RRF4_r1(ovl), RRF4_r2(ovl)); goto ok;
++   case 0xb967: s390_format_RRF_R0RR2(s390_irgen_NXGRK, RRF4_r3(ovl),
++                                      RRF4_r1(ovl), RRF4_r2(ovl)); goto ok;
+    case 0xb972: s390_format_RRF_U0RR(s390_irgen_CRT, RRF2_m3(ovl),
+                                      RRF2_r1(ovl), RRF2_r2(ovl),
+                                      S390_XMNM_CAB); goto ok;
+    case 0xb973: s390_format_RRF_U0RR(s390_irgen_CLRT, RRF2_m3(ovl),
+                                      RRF2_r1(ovl), RRF2_r2(ovl),
+                                      S390_XMNM_CAB); goto ok;
++   case 0xb974: s390_format_RRF_R0RR2(s390_irgen_NNRK, RRF4_r3(ovl),
++                                      RRF4_r1(ovl), RRF4_r2(ovl)); goto ok;
++   case 0xb975: s390_format_RRF_R0RR2(s390_irgen_OCRK, RRF4_r3(ovl),
++                                      RRF4_r1(ovl), RRF4_r2(ovl)); goto ok;
++   case 0xb976: s390_format_RRF_R0RR2(s390_irgen_NORK, RRF4_r3(ovl),
++                                      RRF4_r1(ovl), RRF4_r2(ovl)); goto ok;
++   case 0xb977: s390_format_RRF_R0RR2(s390_irgen_NXRK, RRF4_r3(ovl),
++                                      RRF4_r1(ovl), RRF4_r2(ovl)); goto ok;
+    case 0xb980: s390_format_RRE_RR(s390_irgen_NGR, RRE_r1(ovl),
+                                    RRE_r2(ovl));  goto ok;
+    case 0xb981: s390_format_RRE_RR(s390_irgen_OGR, RRE_r1(ovl),
+@@ -20148,6 +20206,9 @@ s390_decode_4byte_and_irgen(const UChar *bytes)
+    case 0xb9e4: s390_format_RRF_R0RR2(s390_irgen_NGRK, RRF4_r3(ovl),
+                                       RRF4_r1(ovl), RRF4_r2(ovl));
+                                       goto ok;
++   case 0xb9e5: s390_format_RRF_R0RR2(s390_irgen_NCGRK, RRF4_r3(ovl),
++                                      RRF4_r1(ovl), RRF4_r2(ovl));
++                                      goto ok;
+    case 0xb9e6: s390_format_RRF_R0RR2(s390_irgen_OGRK, RRF4_r3(ovl),
+                                       RRF4_r1(ovl), RRF4_r2(ovl));
+                                       goto ok;
+@@ -20178,6 +20239,9 @@ s390_decode_4byte_and_irgen(const UChar *bytes)
+    case 0xb9f4: s390_format_RRF_R0RR2(s390_irgen_NRK, RRF4_r3(ovl),
+                                       RRF4_r1(ovl), RRF4_r2(ovl));
+                                       goto ok;
++   case 0xb9f5: s390_format_RRF_R0RR2(s390_irgen_NCRK, RRF4_r3(ovl),
++                                      RRF4_r1(ovl), RRF4_r2(ovl));
++                                      goto ok;
+    case 0xb9f6: s390_format_RRF_R0RR2(s390_irgen_ORK, RRF4_r3(ovl),
+                                       RRF4_r1(ovl), RRF4_r2(ovl));
+                                       goto ok;
+-- 
+2.23.0
+
+From 748421b31ab6b15cc849bd6b9588ad759b807324 Mon Sep 17 00:00:00 2001
+From: Andreas Arnez <arnez@linux.ibm.com>
+Date: Wed, 27 Jan 2021 18:11:06 +0100
+Subject: [PATCH 02/13] s390x: Misc-insn-3, "select" instructions
+
+Add support for the instructions SELR, SELGR, and SELFHR.
+---
+ VEX/priv/guest_s390_toIR.c | 43 ++++++++++++++++++++++++++++++++++++++
+ 1 file changed, 43 insertions(+)
+
+diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c
+index f8afd5b96..41265631b 100644
+--- a/VEX/priv/guest_s390_toIR.c
++++ b/VEX/priv/guest_s390_toIR.c
+@@ -3113,6 +3113,16 @@ s390_format_RRF_FUFF2(const HChar *(*irgen)(UChar, UChar, UChar, UChar),
+       s390_disasm(ENC5(MNM, FPR, FPR, FPR, UINT), mnm, r1, r2, r3, m4);
+ }
+ 
++static void
++s390_format_RRF_RURR(const HChar *(*irgen)(UChar, UChar, UChar, UChar),
++                     UChar r3, UChar m4, UChar r1, UChar r2)
++{
++   const HChar *mnm = irgen(r3, m4, r1, r2);
++
++   if (UNLIKELY(vex_traceflags & VEX_TRACE_FE))
++      s390_disasm(ENC5(MNM, GPR, GPR, GPR, UINT), mnm, r1, r3, r2, m4);
++}
++
+ static void
+ s390_format_RRF_R0RR2(const HChar *(*irgen)(UChar r3, UChar r1, UChar r2),
+                       UChar r3, UChar r1, UChar r2)
+@@ -19254,6 +19264,30 @@ s390_irgen_VBPERM(UChar v1, UChar v2, UChar v3)
+    return "vbperm";
+ }
+ 
++static const HChar *
++s390_irgen_SELR(UChar r3, UChar m4, UChar r1, UChar r2)
++{
++   IRExpr* cond = binop(Iop_CmpNE32, s390_call_calculate_cond(m4), mkU32(0));
++   put_gpr_w1(r1, mkite(cond, get_gpr_w1(r2), get_gpr_w1(r3)));
++   return "selr";
++}
++
++static const HChar *
++s390_irgen_SELGR(UChar r3, UChar m4, UChar r1, UChar r2)
++{
++   IRExpr* cond = binop(Iop_CmpNE32, s390_call_calculate_cond(m4), mkU32(0));
++   put_gpr_dw0(r1, mkite(cond, get_gpr_dw0(r2), get_gpr_dw0(r3)));
++   return "selgr";
++}
++
++static const HChar *
++s390_irgen_SELFHR(UChar r3, UChar m4, UChar r1, UChar r2)
++{
++   IRExpr* cond = binop(Iop_CmpNE32, s390_call_calculate_cond(m4), mkU32(0));
++   put_gpr_w0(r1, mkite(cond, get_gpr_w0(r2), get_gpr_w0(r3)));
++   return "selfhr";
++}
++
+ /* New insns are added here.
+    If an insn is contingent on a facility being installed also
+    check whether the list of supported facilities in function
+@@ -20163,6 +20197,9 @@ s390_decode_4byte_and_irgen(const UChar *bytes)
+    case 0xb9bd: /* TRTRE */ goto unimplemented;
+    case 0xb9be: /* SRSTU */ goto unimplemented;
+    case 0xb9bf: /* TRTE */ goto unimplemented;
++   case 0xb9c0: s390_format_RRF_RURR(s390_irgen_SELFHR, RRF4_r3(ovl),
++                                     RRF4_m4(ovl), RRF4_r1(ovl),
++                                     RRF4_r2(ovl)); goto ok;
+    case 0xb9c8: s390_format_RRF_R0RR2(s390_irgen_AHHHR, RRF4_r3(ovl),
+                                       RRF4_r1(ovl), RRF4_r2(ovl));
+                                       goto ok;
+@@ -20203,6 +20240,9 @@ s390_decode_4byte_and_irgen(const UChar *bytes)
+    case 0xb9e2: s390_format_RRF_U0RR(s390_irgen_LOCGR, RRF3_r3(ovl),
+                                      RRF3_r1(ovl), RRF3_r2(ovl),
+                                      S390_XMNM_LOCGR);  goto ok;
++   case 0xb9e3: s390_format_RRF_RURR(s390_irgen_SELGR, RRF4_r3(ovl),
++                                     RRF4_m4(ovl), RRF4_r1(ovl),
++                                     RRF4_r2(ovl)); goto ok;
+    case 0xb9e4: s390_format_RRF_R0RR2(s390_irgen_NGRK, RRF4_r3(ovl),
+                                       RRF4_r1(ovl), RRF4_r2(ovl));
+                                       goto ok;
+@@ -20233,6 +20273,9 @@ s390_decode_4byte_and_irgen(const UChar *bytes)
+    case 0xb9ed: s390_format_RRF_R0RR2(s390_irgen_MSGRKC, RRF4_r3(ovl),
+                                       RRF4_r1(ovl), RRF4_r2(ovl));
+                                       goto ok;
++   case 0xb9f0: s390_format_RRF_RURR(s390_irgen_SELR, RRF4_r3(ovl),
++                                     RRF4_m4(ovl), RRF4_r1(ovl),
++                                     RRF4_r2(ovl)); goto ok;
+    case 0xb9f2: s390_format_RRF_U0RR(s390_irgen_LOCR, RRF3_r3(ovl),
+                                      RRF3_r1(ovl), RRF3_r2(ovl),
+                                      S390_XMNM_LOCR);  goto ok;
+-- 
+2.23.0
+
+From 31cbd583e858f47a86ada087d21a6abc13ba04f2 Mon Sep 17 00:00:00 2001
+From: Andreas Arnez <arnez@linux.ibm.com>
+Date: Thu, 28 Jan 2021 19:47:00 +0100
+Subject: [PATCH 03/13] s390x: Misc-insn-3, new POPCNT variant
+
+Add support for the new POPCNT variant that has bit 0 of the M3 field set
+and yields the total number of one bits in its 64-bit operand.
+---
+ VEX/priv/guest_s390_toIR.c | 44 ++++++++++++++++++++++++++------------
+ 1 file changed, 30 insertions(+), 14 deletions(-)
+
+diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c
+index 41265631b..ca9e6dc03 100644
+--- a/VEX/priv/guest_s390_toIR.c
++++ b/VEX/priv/guest_s390_toIR.c
+@@ -3073,6 +3073,20 @@ s390_format_RRF_U0RR(const HChar *(*irgen)(UChar m3, UChar r1, UChar r2),
+       s390_disasm(ENC3(XMNM, GPR, GPR), xmnm_kind, m3, r1, r2);
+ }
+ 
++static void
++s390_format_RRFa_U0RR(const HChar *(*irgen)(UChar m3, UChar r1, UChar r2),
++                      UChar m3, UChar r1, UChar r2)
++{
++   const HChar *mnm = irgen(m3, r1, r2);
++
++   if (UNLIKELY(vex_traceflags & VEX_TRACE_FE)) {
++      if (m3 != 0)
++         s390_disasm(ENC4(MNM, GPR, GPR, UINT), mnm, r1, r2, m3);
++      else
++         s390_disasm(ENC3(MNM, GPR, GPR), mnm, r1, r2);
++   }
++}
++
+ static void
+ s390_format_RRF_F0FF2(const HChar *(*irgen)(UChar, UChar, UChar),
+                       UChar r3, UChar r1, UChar r2)
+@@ -15112,30 +15126,32 @@ s390_irgen_FLOGR(UChar r1, UChar r2)
+ }
+ 
+ static const HChar *
+-s390_irgen_POPCNT(UChar r1, UChar r2)
++s390_irgen_POPCNT(UChar m3, UChar r1, UChar r2)
+ {
+-   Int i;
++   s390_insn_assert("popcnt", (m3 & 7) == 0);
++
++   static const ULong masks[] = {
++      0x5555555555555555, 0x3333333333333333, 0x0F0F0F0F0F0F0F0F,
++      0x00FF00FF00FF00FF, 0x0000FFFF0000FFFF, 0x00000000FFFFFFFF,
++   };
++   Int i, n;
+    IRTemp val = newTemp(Ity_I64);
+-   IRTemp mask[3];
+ 
+    assign(val, get_gpr_dw0(r2));
+-   for (i = 0; i < 3; i++) {
+-      mask[i] = newTemp(Ity_I64);
+-   }
+-   assign(mask[0], mkU64(0x5555555555555555ULL));
+-   assign(mask[1], mkU64(0x3333333333333333ULL));
+-   assign(mask[2], mkU64(0x0F0F0F0F0F0F0F0FULL));
+-   for (i = 0; i < 3; i++) {
++   n = (m3 & 8) ? 6 : 3;
++   for (i = 0; i < n; i++) {
++      IRTemp mask = newTemp(Ity_I64);
+       IRTemp tmp = newTemp(Ity_I64);
+ 
++      assign (mask, mkU64(masks[i]));
+       assign(tmp,
+              binop(Iop_Add64,
+                    binop(Iop_And64,
+                          mkexpr(val),
+-                         mkexpr(mask[i])),
++                         mkexpr(mask)),
+                    binop(Iop_And64,
+                          binop(Iop_Shr64, mkexpr(val), mkU8(1 << i)),
+-                         mkexpr(mask[i]))));
++                         mkexpr(mask))));
+       val = tmp;
+    }
+    s390_cc_thunk_putZ(S390_CC_OP_BITWISE, val);
+@@ -20235,8 +20251,8 @@ s390_decode_4byte_and_irgen(const UChar *bytes)
+    case 0xb9e0: s390_format_RRF_U0RR(s390_irgen_LOCFHR, RRF3_r3(ovl),
+                                      RRF3_r1(ovl), RRF3_r2(ovl),
+                                      S390_XMNM_LOCFHR);  goto ok;
+-   case 0xb9e1: s390_format_RRE_RR(s390_irgen_POPCNT, RRE_r1(ovl),
+-                                   RRE_r2(ovl));  goto ok;
++   case 0xb9e1: s390_format_RRFa_U0RR(s390_irgen_POPCNT, RRF3_r3(ovl),
++                                      RRF3_r1(ovl), RRF3_r2(ovl));  goto ok;
+    case 0xb9e2: s390_format_RRF_U0RR(s390_irgen_LOCGR, RRF3_r3(ovl),
+                                      RRF3_r1(ovl), RRF3_r2(ovl),
+                                      S390_XMNM_LOCGR);  goto ok;
+-- 
+2.23.0
+
+From 64352d57f93711ce76fd481558dcf6d65e26b19f Mon Sep 17 00:00:00 2001
+From: Andreas Arnez <arnez@linux.ibm.com>
+Date: Fri, 29 Jan 2021 20:13:05 +0100
+Subject: [PATCH 04/13] s390x: Misc-insn-3, MVCRL
+
+Add support for the "move right to left" instruction MVCRL.
+---
+ VEX/priv/guest_s390_toIR.c | 47 ++++++++++++++++++++++++++++++++++++++
+ 1 file changed, 47 insertions(+)
+
+diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c
+index ca9e6dc03..9f7d98f8c 100644
+--- a/VEX/priv/guest_s390_toIR.c
++++ b/VEX/priv/guest_s390_toIR.c
+@@ -3562,6 +3562,25 @@ s390_format_SS_L0RDRD(const HChar *(*irgen)(UChar, IRTemp, IRTemp),
+       s390_disasm(ENC3(MNM, UDLB, UDXB), mnm, d1, l, b1, d2, 0, b2);
+ }
+ 
++static void
++s390_format_SSE_RDRD(const HChar *(*irgen)(IRTemp, IRTemp),
++                     UChar b1, UShort d1, UChar b2, UShort d2)
++{
++   const HChar *mnm;
++   IRTemp op1addr = newTemp(Ity_I64);
++   IRTemp op2addr = newTemp(Ity_I64);
++
++   assign(op1addr, binop(Iop_Add64, mkU64(d1), b1 != 0 ? get_gpr_dw0(b1) :
++          mkU64(0)));
++   assign(op2addr, binop(Iop_Add64, mkU64(d2), b2 != 0 ? get_gpr_dw0(b2) :
++          mkU64(0)));
++
++   mnm = irgen(op1addr, op2addr);
++
++   if (UNLIKELY(vex_traceflags & VEX_TRACE_FE))
++      s390_disasm(ENC2(UDXB, UDXB), mnm, d1, 0, b1, d2, 0, b2);
++}
++
+ static void
+ s390_format_SIL_RDI(const HChar *(*irgen)(UShort i2, IRTemp op1addr),
+                     UChar b1, UShort d1, UShort i2)
+@@ -13667,6 +13686,31 @@ s390_irgen_MVCIN(UChar length, IRTemp start1, IRTemp start2)
+    return "mvcin";
+ }
+ 
++static const HChar *
++s390_irgen_MVCRL(IRTemp op1addr, IRTemp op2addr)
++{
++   IRTemp counter = newTemp(Ity_I64);
++   IRTemp offset = newTemp(Ity_I64);
++
++   assign(counter, get_counter_dw0());
++   /* offset = length - 1 - counter, where length-1 is specified in r0 */
++   assign(offset,
++          binop(Iop_Sub64,
++                unop(Iop_16Uto64,
++                     binop(Iop_And16, get_gpr_hw3(0), mkU16(0xfff))),
++                mkexpr(counter)));
++
++   store(binop(Iop_Add64, mkexpr(op1addr), mkexpr(offset)),
++         load(Ity_I8, binop(Iop_Add64, mkexpr(op2addr), mkexpr(offset))));
++
++   /* Check for end of field */
++   put_counter_dw0(binop(Iop_Add64, mkexpr(counter), mkU64(1)));
++   iterate_if(binop(Iop_CmpNE64, mkexpr(offset), mkU64(0)));
++   put_counter_dw0(mkU64(0));
++
++   return "mvcrl";
++}
++
+ static const HChar *
+ s390_irgen_MVCL(UChar r1, UChar r2)
+ {
+@@ -22217,6 +22261,9 @@ s390_decode_6byte_and_irgen(const UChar *bytes)
+    case 0xe500ULL: /* LASP */ goto unimplemented;
+    case 0xe501ULL: /* TPROT */ goto unimplemented;
+    case 0xe502ULL: /* STRAG */ goto unimplemented;
++   case 0xe50aULL: s390_format_SSE_RDRD(s390_irgen_MVCRL,
++                                        SS_b1(ovl), SS_d1(ovl),
++                                        SS_b2(ovl), SS_d2(ovl));  goto ok;
+    case 0xe50eULL: /* MVCSK */ goto unimplemented;
+    case 0xe50fULL: /* MVCDK */ goto unimplemented;
+    case 0xe544ULL: s390_format_SIL_RDI(s390_irgen_MVHHI, SIL_b1(ovl),
+-- 
+2.23.0
+
+From 6cc4d66cc3a999253d9a57e2b5c75aeb67f77918 Mon Sep 17 00:00:00 2001
+From: Andreas Arnez <arnez@linux.ibm.com>
+Date: Tue, 2 Feb 2021 20:15:02 +0100
+Subject: [PATCH 05/13] s390x: Misc-insn-3, test case
+
+Add a test case for the new instructions in the miscellaneous instruction
+extensions facitility 3.
+---
+ .gitignore                        |   1 +
+ none/tests/s390x/Makefile.am      |   3 +-
+ none/tests/s390x/misc3.c          | 182 ++++++++++++++++++++++++++++++
+ none/tests/s390x/misc3.stderr.exp |   2 +
+ none/tests/s390x/misc3.stdout.exp | 103 +++++++++++++++++
+ none/tests/s390x/misc3.vgtest     |   1 +
+ 6 files changed, 291 insertions(+), 1 deletion(-)
+ create mode 100644 none/tests/s390x/misc3.c
+ create mode 100644 none/tests/s390x/misc3.stderr.exp
+ create mode 100644 none/tests/s390x/misc3.stdout.exp
+ create mode 100644 none/tests/s390x/misc3.vgtest
+
+diff --git a/none/tests/s390x/Makefile.am b/none/tests/s390x/Makefile.am
+index a0fb92ef5..2fd45ec1e 100644
+--- a/none/tests/s390x/Makefile.am
++++ b/none/tests/s390x/Makefile.am
+@@ -19,7 +19,8 @@ INSN_TESTS = clc clcle cvb cvd icm lpr tcxb lam_stam xc mvst add sub mul \
+ 	     spechelper-ltr spechelper-or   \
+ 	     spechelper-icm-1  spechelper-icm-2 spechelper-tmll \
+ 	     spechelper-tm laa vector lsc2 ppno vector_string vector_integer \
+-	     vector_float add-z14 sub-z14 mul-z14 bic
++	     vector_float add-z14 sub-z14 mul-z14 bic \
++	     misc3
+ 
+ if BUILD_DFP_TESTS
+   INSN_TESTS += dfp-1 dfp-2 dfp-3 dfp-4 dfptest dfpext dfpconv srnmt pfpo
+diff --git a/none/tests/s390x/misc3.c b/none/tests/s390x/misc3.c
+new file mode 100644
+index 000000000..ae6e8d4c2
+--- /dev/null
++++ b/none/tests/s390x/misc3.c
+@@ -0,0 +1,182 @@
++#include <stdio.h>
++
++/* -- Logical instructions -- */
++
++#define TEST_GENERATE(opcode,insn)                              \
++   static void test_##insn(unsigned long a, unsigned long b)    \
++   {                                                            \
++      unsigned long out = 0xdecaffee42424242;                   \
++      int cc;                                                   \
++                                                                \
++      __asm__(                                                  \
++         "cr    0,0\n\t"               /* Clear CC */           \
++         ".insn rrf,0x" #opcode "0000,%[out],%[a],%[b],0\n\t"   \
++         "ipm   %[cc]\n\t"                                      \
++         "srl   %[cc],28\n"                                     \
++         : [out] "+d" (out),                                    \
++           [cc] "=d" (cc)                                       \
++         : [a] "d" (a),                                         \
++           [b] "d" (b)                                          \
++         : "cc");                                               \
++                                                                \
++      printf("\t%016lx %016lx -> %016lx cc=%d\n",               \
++             a, b, out, cc);                                    \
++   }
++
++#define TEST_EXEC(opcode,insn)                             \
++   do {                                                    \
++      puts(#insn);                                         \
++      test_##insn(0, 0);                                   \
++      test_##insn(0, -1);                                  \
++      test_##insn(-1, 0);                                  \
++      test_##insn(-1, -1);                                 \
++      test_##insn(0x012345678abcdef, 0);                   \
++      test_##insn(0x012345678abcdef, -1);                  \
++      test_##insn(0x55555555aaaaaaaa, 0xaaaaaaaa55555555); \
++   } while (0)
++
++#define INSNS                                    \
++   XTEST(b9f5,ncrk);                             \
++   XTEST(b9e5,ncgrk);                            \
++   XTEST(b974,nnrk);                             \
++   XTEST(b964,nngrk);                            \
++   XTEST(b976,nork);                             \
++   XTEST(b966,nogrk);                            \
++   XTEST(b977,nxrk);                             \
++   XTEST(b967,nxgrk);                            \
++   XTEST(b975,ocrk);                             \
++   XTEST(b965,ocgrk);
++
++#define XTEST TEST_GENERATE
++INSNS
++#undef XTEST
++
++static void test_all_logical_insns()
++{
++#define XTEST TEST_EXEC
++   INSNS
++#undef XTEST
++}
++#undef INSNS
++#undef TEST_GENERATE
++#undef TEST_EXEC
++
++
++/* -- Full population count -- */
++
++static void test_popcnt(unsigned long op2)
++{
++   unsigned long result;
++   int cc;
++
++   __asm__(".insn   rrf,0xb9e10000,%[result],%[op2],8,0\n\t"
++           "ipm     %[cc]\n\t"
++           "srl     %[cc],28\n"
++           : [result]"=d" (result),
++             [cc]"=d" (cc)
++           : [op2]"d" (op2)
++           : "cc");
++   printf("\t%016lx -> %2lu cc=%d\n", op2, result, cc);
++}
++
++static int test_all_popcnt()
++{
++   puts("popcnt");
++   test_popcnt(0);
++   test_popcnt(1);
++   test_popcnt(0x8000000000000000);
++   test_popcnt(-1UL);
++   test_popcnt(0xff427e3800556bcd);
++   return 0;
++}
++
++/* -- Select -- */
++
++#define TEST_GENERATE(opcode,insn)                              \
++   static void test_##insn(unsigned long a, unsigned long b)    \
++   {                                                            \
++      unsigned long out0 = 0x0cafebad0badcafe;                  \
++      unsigned long out1 = 0x0badcafe0cafebad;                  \
++                                                                \
++      __asm__(                                                  \
++         "cr    0,0\n\t"               /* Clear CC */           \
++         ".insn rrf,0x" #opcode "0000,%[out0],%[a],%[b],8\n\t"  \
++         ".insn rrf,0x" #opcode "0000,%[out1],%[a],%[b],7\n\t"  \
++         : [out0] "+d" (out0),                                  \
++           [out1] "+d" (out1)                                   \
++         : [a] "d" (a),                                         \
++           [b] "d" (b)                                          \
++         : );                                                   \
++                                                                \
++      printf("\t%016lx %016lx -> %016lx %016lx\n",              \
++             a, b, out0, out1);                                 \
++   }
++
++#define TEST_EXEC(opcode,insn)                             \
++   do {                                                    \
++      puts(#insn);                                         \
++      test_##insn(-1, 0);                                  \
++      test_##insn(0, -1);                                  \
++      test_##insn(0x1234567890abcdef, 0xfedcba9876543210); \
++   } while (0)
++
++#define INSNS                                    \
++   XTEST(b9f0,selr);                             \
++   XTEST(b9e3,selgr);                            \
++   XTEST(b9c0,selfhr);
++
++#define XTEST TEST_GENERATE
++INSNS
++#undef XTEST
++
++static void test_all_select()
++{
++#define XTEST TEST_EXEC
++   INSNS
++#undef XTEST
++}
++#undef INSNS
++#undef TEST_GENERATE
++#undef TEST_EXEC
++
++
++/* -- Move right to left -- */
++
++static void test_mvcrl(void *to, void *from, size_t len)
++{
++   len -= 1;
++   __asm__("lgr    0,%[len]\n\t"
++           ".insn  sse,0xe50a00000000,%[to],%[from]\n\t"
++           : [to] "+Q" (*(struct { char c[len]; } *) to)
++           : [from] "Q" (*(struct { char c[len]; } *) from),
++             [len] "d" (len)
++           : );
++}
++
++static void test_all_mvcrl()
++{
++   static const char pattern[] =
++      "abcdefghijklmnopqrstuvwxyz-0123456789.ABCDEFGHIJKLMNOPQRSTUVWXYZ";
++   char buf[4 * sizeof(pattern) - 2];
++
++   test_mvcrl(buf, (char *) pattern, sizeof(pattern));
++   test_mvcrl(buf + sizeof(pattern) - 1, buf, sizeof(pattern));
++   test_mvcrl(buf + 2 * sizeof(pattern) - 2, buf, 2 * sizeof(pattern) - 1);
++   test_mvcrl(buf + 32, buf + 10, 63);
++   test_mvcrl(buf + 2, buf + 1, 256);
++   test_mvcrl(buf + 254, buf + 256, 2);
++   puts("mvcrl");
++   for (int i = 0; i < 256; i += 64) {
++      printf("\t%.64s\n", buf + i);
++   }
++}
++
++
++int main()
++{
++   test_all_logical_insns();
++   test_all_popcnt();
++   test_all_select();
++   test_all_mvcrl();
++   return 0;
++}
+diff --git a/none/tests/s390x/misc3.stderr.exp b/none/tests/s390x/misc3.stderr.exp
+new file mode 100644
+index 000000000..139597f9c
+--- /dev/null
++++ b/none/tests/s390x/misc3.stderr.exp
+@@ -0,0 +1,2 @@
++
++
+diff --git a/none/tests/s390x/misc3.stdout.exp b/none/tests/s390x/misc3.stdout.exp
+new file mode 100644
+index 000000000..caaba4960
+--- /dev/null
++++ b/none/tests/s390x/misc3.stdout.exp
+@@ -0,0 +1,103 @@
++ncrk
++	0000000000000000 0000000000000000 -> decaffee00000000 cc=0
++	0000000000000000 ffffffffffffffff -> decaffee00000000 cc=0
++	ffffffffffffffff 0000000000000000 -> decaffeeffffffff cc=1
++	ffffffffffffffff ffffffffffffffff -> decaffee00000000 cc=0
++	0012345678abcdef 0000000000000000 -> decaffee78abcdef cc=1
++	0012345678abcdef ffffffffffffffff -> decaffee00000000 cc=0
++	55555555aaaaaaaa aaaaaaaa55555555 -> decaffeeaaaaaaaa cc=1
++ncgrk
++	0000000000000000 0000000000000000 -> 0000000000000000 cc=0
++	0000000000000000 ffffffffffffffff -> 0000000000000000 cc=0
++	ffffffffffffffff 0000000000000000 -> ffffffffffffffff cc=1
++	ffffffffffffffff ffffffffffffffff -> 0000000000000000 cc=0
++	0012345678abcdef 0000000000000000 -> 0012345678abcdef cc=1
++	0012345678abcdef ffffffffffffffff -> 0000000000000000 cc=0
++	55555555aaaaaaaa aaaaaaaa55555555 -> 55555555aaaaaaaa cc=1
++nnrk
++	0000000000000000 0000000000000000 -> decaffeeffffffff cc=1
++	0000000000000000 ffffffffffffffff -> decaffeeffffffff cc=1
++	ffffffffffffffff 0000000000000000 -> decaffeeffffffff cc=1
++	ffffffffffffffff ffffffffffffffff -> decaffee00000000 cc=0
++	0012345678abcdef 0000000000000000 -> decaffeeffffffff cc=1
++	0012345678abcdef ffffffffffffffff -> decaffee87543210 cc=1
++	55555555aaaaaaaa aaaaaaaa55555555 -> decaffeeffffffff cc=1
++nngrk
++	0000000000000000 0000000000000000 -> ffffffffffffffff cc=1
++	0000000000000000 ffffffffffffffff -> ffffffffffffffff cc=1
++	ffffffffffffffff 0000000000000000 -> ffffffffffffffff cc=1
++	ffffffffffffffff ffffffffffffffff -> 0000000000000000 cc=0
++	0012345678abcdef 0000000000000000 -> ffffffffffffffff cc=1
++	0012345678abcdef ffffffffffffffff -> ffedcba987543210 cc=1
++	55555555aaaaaaaa aaaaaaaa55555555 -> ffffffffffffffff cc=1
++nork
++	0000000000000000 0000000000000000 -> decaffeeffffffff cc=1
++	0000000000000000 ffffffffffffffff -> decaffee00000000 cc=0
++	ffffffffffffffff 0000000000000000 -> decaffee00000000 cc=0
++	ffffffffffffffff ffffffffffffffff -> decaffee00000000 cc=0
++	0012345678abcdef 0000000000000000 -> decaffee87543210 cc=1
++	0012345678abcdef ffffffffffffffff -> decaffee00000000 cc=0
++	55555555aaaaaaaa aaaaaaaa55555555 -> decaffee00000000 cc=0
++nogrk
++	0000000000000000 0000000000000000 -> ffffffffffffffff cc=1
++	0000000000000000 ffffffffffffffff -> 0000000000000000 cc=0
++	ffffffffffffffff 0000000000000000 -> 0000000000000000 cc=0
++	ffffffffffffffff ffffffffffffffff -> 0000000000000000 cc=0
++	0012345678abcdef 0000000000000000 -> ffedcba987543210 cc=1
++	0012345678abcdef ffffffffffffffff -> 0000000000000000 cc=0
++	55555555aaaaaaaa aaaaaaaa55555555 -> 0000000000000000 cc=0
++nxrk
++	0000000000000000 0000000000000000 -> decaffeeffffffff cc=1
++	0000000000000000 ffffffffffffffff -> decaffee00000000 cc=0
++	ffffffffffffffff 0000000000000000 -> decaffee00000000 cc=0
++	ffffffffffffffff ffffffffffffffff -> decaffeeffffffff cc=1
++	0012345678abcdef 0000000000000000 -> decaffee87543210 cc=1
++	0012345678abcdef ffffffffffffffff -> decaffee78abcdef cc=1
++	55555555aaaaaaaa aaaaaaaa55555555 -> decaffee00000000 cc=0
++nxgrk
++	0000000000000000 0000000000000000 -> ffffffffffffffff cc=1
++	0000000000000000 ffffffffffffffff -> 0000000000000000 cc=0
++	ffffffffffffffff 0000000000000000 -> 0000000000000000 cc=0
++	ffffffffffffffff ffffffffffffffff -> ffffffffffffffff cc=1
++	0012345678abcdef 0000000000000000 -> ffedcba987543210 cc=1
++	0012345678abcdef ffffffffffffffff -> 0012345678abcdef cc=1
++	55555555aaaaaaaa aaaaaaaa55555555 -> 0000000000000000 cc=0
++ocrk
++	0000000000000000 0000000000000000 -> decaffeeffffffff cc=1
++	0000000000000000 ffffffffffffffff -> decaffee00000000 cc=0
++	ffffffffffffffff 0000000000000000 -> decaffeeffffffff cc=1
++	ffffffffffffffff ffffffffffffffff -> decaffeeffffffff cc=1
++	0012345678abcdef 0000000000000000 -> decaffeeffffffff cc=1
++	0012345678abcdef ffffffffffffffff -> decaffee78abcdef cc=1
++	55555555aaaaaaaa aaaaaaaa55555555 -> decaffeeaaaaaaaa cc=1
++ocgrk
++	0000000000000000 0000000000000000 -> ffffffffffffffff cc=1
++	0000000000000000 ffffffffffffffff -> 0000000000000000 cc=0
++	ffffffffffffffff 0000000000000000 -> ffffffffffffffff cc=1
++	ffffffffffffffff ffffffffffffffff -> ffffffffffffffff cc=1
++	0012345678abcdef 0000000000000000 -> ffffffffffffffff cc=1
++	0012345678abcdef ffffffffffffffff -> 0012345678abcdef cc=1
++	55555555aaaaaaaa aaaaaaaa55555555 -> 55555555aaaaaaaa cc=1
++popcnt
++	0000000000000000 ->  0 cc=0
++	0000000000000001 ->  1 cc=1
++	8000000000000000 ->  1 cc=1
++	ffffffffffffffff -> 64 cc=1
++	ff427e3800556bcd -> 33 cc=1
++selr
++	ffffffffffffffff 0000000000000000 -> 0cafebadffffffff 0badcafe00000000
++	0000000000000000 ffffffffffffffff -> 0cafebad00000000 0badcafeffffffff
++	1234567890abcdef fedcba9876543210 -> 0cafebad90abcdef 0badcafe76543210
++selgr
++	ffffffffffffffff 0000000000000000 -> ffffffffffffffff 0000000000000000
++	0000000000000000 ffffffffffffffff -> 0000000000000000 ffffffffffffffff
++	1234567890abcdef fedcba9876543210 -> 1234567890abcdef fedcba9876543210
++selfhr
++	ffffffffffffffff 0000000000000000 -> ffffffff0badcafe 000000000cafebad
++	0000000000000000 ffffffffffffffff -> 000000000badcafe ffffffff0cafebad
++	1234567890abcdef fedcba9876543210 -> 123456780badcafe fedcba980cafebad
++mvcrl
++	abbcdefghijklmnopqrstuvwxyz-01234klmnopqrstuvwxyz-0123456789.ABC
++	DEFGHIJKLMNOPQRSTUVWXYZabcdefghi456789.ABCDEFGHIJKLMNOPQRSTUVWXY
++	Zabcdefghijklmnopqrstuvwxyz-0123456789.ABCDEFGHIJKLMNOPQRSTUVWXY
++	Zabcdefghijklmnopqrstuvwxyz-0123456789.ABCDEFGHIJKLMNOPQRSTUVWZ
+diff --git a/none/tests/s390x/misc3.vgtest b/none/tests/s390x/misc3.vgtest
+new file mode 100644
+index 000000000..d051a06bd
+--- /dev/null
++++ b/none/tests/s390x/misc3.vgtest
+@@ -0,0 +1 @@
++prog: misc3
+-- 
+2.23.0
+
+From 401b51d79886362d1962dc487db45ac91462eaa0 Mon Sep 17 00:00:00 2001
+From: Andreas Arnez <arnez@linux.ibm.com>
+Date: Wed, 7 Apr 2021 12:29:32 +0200
+Subject: [PATCH 06/13] s390x: Vec-enh-2, extend VSL, VSRA, and VSRL
+
+The vector-enhancements facility 2 extends the existing bitwise vector
+shift instructions VSL, VSRA, and VSRL.  Now they allow the shift
+vector (the third operand) to contain different shift amounts for each
+byte.  Add support for these new forms.
+---
+ VEX/priv/guest_s390_toIR.c | 58 ++++++++++++++++++++++++++++++--------
+ 1 file changed, 47 insertions(+), 11 deletions(-)
+
+diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c
+index 9f7d98f8c..622d5a02e 100644
+--- a/VEX/priv/guest_s390_toIR.c
++++ b/VEX/priv/guest_s390_toIR.c
+@@ -17983,30 +17983,66 @@ s390_irgen_VERLL(UChar v1, IRTemp op2addr, UChar v3, UChar m4)
+ static const HChar *
+ s390_irgen_VSL(UChar v1, UChar v2, UChar v3)
+ {
+-   IRTemp shift_amount = newTemp(Ity_I8);
+-   assign(shift_amount, binop(Iop_And8, get_vr_b7(v3), mkU8(0b00000111)));
+-
+-   put_vr_qw(v1, binop(Iop_ShlV128, get_vr_qw(v2), mkexpr(shift_amount)));
++   IRTemp a = newTemp(Ity_V128);
++   IRTemp b = newTemp(Ity_V128);
++
++   assign(a, get_vr_qw(v2));
++   assign(b, get_vr_qw(v3));
++
++   put_vr_qw(v1,
++             binop(Iop_OrV128,
++                   binop(Iop_Shl8x16, mkexpr(a), mkexpr(b)),
++                   binop(Iop_Shr8x16,
++                         binop(Iop_Shr8x16,
++                               binop(Iop_ShlV128, mkexpr(a), mkU8(8)),
++                               unop(Iop_NotV128, mkexpr(b))),
++                         unop(Iop_Dup8x16, mkU8(1)))));
+    return "vsl";
+ }
+ 
+ static const HChar *
+ s390_irgen_VSRL(UChar v1, UChar v2, UChar v3)
+ {
+-   IRTemp shift_amount = newTemp(Ity_I8);
+-   assign(shift_amount, binop(Iop_And8, get_vr_b7(v3), mkU8(0b00000111)));
++   IRTemp a = newTemp(Ity_V128);
++   IRTemp b = newTemp(Ity_V128);
+ 
+-   put_vr_qw(v1, binop(Iop_ShrV128, get_vr_qw(v2), mkexpr(shift_amount)));
++   assign(a, get_vr_qw(v2));
++   assign(b, get_vr_qw(v3));
++
++   put_vr_qw(v1,
++             binop(Iop_OrV128,
++                   binop(Iop_Shr8x16, mkexpr(a), mkexpr(b)),
++                   binop(Iop_Shl8x16,
++                         binop(Iop_Shl8x16,
++                               binop(Iop_ShrV128, mkexpr(a), mkU8(8)),
++                               unop(Iop_NotV128, mkexpr(b))),
++                         unop(Iop_Dup8x16, mkU8(1)))));
+    return "vsrl";
+ }
+ 
+ static const HChar *
+ s390_irgen_VSRA(UChar v1, UChar v2, UChar v3)
+ {
+-   IRTemp shift_amount = newTemp(Ity_I8);
+-   assign(shift_amount, binop(Iop_And8, get_vr_b7(v3), mkU8(0b00000111)));
+-
+-   put_vr_qw(v1, binop(Iop_SarV128, get_vr_qw(v2), mkexpr(shift_amount)));
++   IRTemp a = newTemp(Ity_V128);
++   IRTemp b = newTemp(Ity_V128);
++
++   assign(a, get_vr_qw(v2));
++   assign(b, get_vr_qw(v3));
++
++   /* Shift-right: first byte arithmetically, all others logically */
++   IRExpr* elems_shifted =
++      binop(Iop_Sar8x16,
++            binop(Iop_Shr8x16, mkexpr(a),
++                  binop(Iop_AndV128, mkexpr(b), mkV128(0x7fff))),
++            binop(Iop_AndV128, mkexpr(b), mkV128(0x8000)));
++   /* Then OR the appropriate bits from the byte to the left */
++   put_vr_qw(v1,
++             binop(Iop_OrV128, elems_shifted,
++                   binop(Iop_Shl8x16,
++                         binop(Iop_Shl8x16,
++                               binop(Iop_ShrV128, mkexpr(a), mkU8(8)),
++                               unop(Iop_NotV128, mkexpr(b))),
++                         unop(Iop_Dup8x16, mkU8(1)))));
+    return "vsra";
+ }
+ 
+-- 
+2.23.0
+
+From 3fdf065d0bf26a02d6d93a812a6571a287379c36 Mon Sep 17 00:00:00 2001
+From: Andreas Arnez <arnez@linux.ibm.com>
+Date: Thu, 11 Feb 2021 20:02:03 +0100
+Subject: [PATCH 07/13] s390x: Vec-enh-2, extend VCDG, VCDLG, VCGD, and VCLGD
+
+The vector-enhancements facility 2 extends the vector floating-point
+conversion instructions VCDG, VCDLG, VCGD, and VCLGD.  In addition to
+64-bit elements, they now also handle 32-bit elements.  Add support for
+these new forms.
+---
+ VEX/priv/guest_s390_toIR.c | 36 ++++++++++++++++++++----------------
+ 1 file changed, 20 insertions(+), 16 deletions(-)
+
+diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c
+index 622d5a02e..11271a1c9 100644
+--- a/VEX/priv/guest_s390_toIR.c
++++ b/VEX/priv/guest_s390_toIR.c
+@@ -18794,44 +18794,48 @@ s390_vector_fp_convert(IROp op, IRType fromType, IRType toType, Bool rounding,
+ static const HChar *
+ s390_irgen_VCDG(UChar v1, UChar v2, UChar m3, UChar m4, UChar m5)
+ {
+-   s390_insn_assert("vcdg", m3 == 3);
+-
+-   s390_vector_fp_convert(Iop_I64StoF64, Ity_I64, Ity_F64, True,
+-                          v1, v2, m3, m4, m5);
++   s390_insn_assert("vcdg", m3 == 2 || m3 == 3);
+ 
++   s390_vector_fp_convert(m3 == 2 ? Iop_I32StoF32 : Iop_I64StoF64,
++                          m3 == 2 ? Ity_I32       : Ity_I64,
++                          m3 == 2 ? Ity_F32       : Ity_F64,
++                          True, v1, v2, m3, m4, m5);
+    return "vcdg";
+ }
+ 
+ static const HChar *
+ s390_irgen_VCDLG(UChar v1, UChar v2, UChar m3, UChar m4, UChar m5)
+ {
+-   s390_insn_assert("vcdlg", m3 == 3);
+-
+-   s390_vector_fp_convert(Iop_I64UtoF64, Ity_I64, Ity_F64, True,
+-                          v1, v2, m3, m4, m5);
++   s390_insn_assert("vcdlg", m3 == 2 || m3 == 3);
+ 
++   s390_vector_fp_convert(m3 == 2 ? Iop_I32UtoF32 : Iop_I64UtoF64,
++                          m3 == 2 ? Ity_I32       : Ity_I64,
++                          m3 == 2 ? Ity_F32       : Ity_F64,
++                          True, v1, v2, m3, m4, m5);
+    return "vcdlg";
+ }
+ 
+ static const HChar *
+ s390_irgen_VCGD(UChar v1, UChar v2, UChar m3, UChar m4, UChar m5)
+ {
+-   s390_insn_assert("vcgd", m3 == 3);
+-
+-   s390_vector_fp_convert(Iop_F64toI64S, Ity_F64, Ity_I64, True,
+-                          v1, v2, m3, m4, m5);
++   s390_insn_assert("vcgd", m3 == 2 || m3 == 3);
+ 
++   s390_vector_fp_convert(m3 == 2 ? Iop_F32toI32S : Iop_F64toI64S,
++                          m3 == 2 ? Ity_F32       : Ity_F64,
++                          m3 == 2 ? Ity_I32       : Ity_I64,
++                          True, v1, v2, m3, m4, m5);
+    return "vcgd";
+ }
+ 
+ static const HChar *
+ s390_irgen_VCLGD(UChar v1, UChar v2, UChar m3, UChar m4, UChar m5)
+ {
+-   s390_insn_assert("vclgd", m3 == 3);
+-
+-   s390_vector_fp_convert(Iop_F64toI64U, Ity_F64, Ity_I64, True,
+-                          v1, v2, m3, m4, m5);
++   s390_insn_assert("vclgd", m3 == 2 || m3 == 3);
+ 
++   s390_vector_fp_convert(m3 == 2 ? Iop_F32toI32U : Iop_F64toI64U,
++                          m3 == 2 ? Ity_F32       : Ity_F64,
++                          m3 == 2 ? Ity_I32       : Ity_I64,
++                          True, v1, v2, m3, m4, m5);
+    return "vclgd";
+ }
+ 
+-- 
+2.23.0
+
+From d195bf17388572e85474c7ded4b5bd0e4774637d Mon Sep 17 00:00:00 2001
+From: Andreas Arnez <arnez@linux.ibm.com>
+Date: Tue, 16 Feb 2021 16:19:31 +0100
+Subject: [PATCH 08/13] s390x: Vec-enh-2, VLBR and friends
+
+Add support for the new byte- and element-swapping vector load/store
+instructions VLEBRH, VLEBRG, VLEBRF, VLLEBRZ, VLBRREP, VLBR, VLER,
+VSTEBRH, VSTEBRG, VSTEBRF, VSTBR, and VSTER.
+---
+ VEX/priv/guest_s390_toIR.c | 256 +++++++++++++++++++++++++++++++++++++
+ VEX/priv/host_s390_isel.c  |   9 ++
+ 2 files changed, 265 insertions(+)
+
+diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c
+index 11271a1c9..f65b42705 100644
+--- a/VEX/priv/guest_s390_toIR.c
++++ b/VEX/priv/guest_s390_toIR.c
+@@ -19388,6 +19388,209 @@ s390_irgen_SELFHR(UChar r3, UChar m4, UChar r1, UChar r2)
+    return "selfhr";
+ }
+ 
++/* Helper function that byte-swaps each element of its V128 input operand */
++static IRExpr *
++s390_byteswap_elements(IRExpr* v, UChar m)
++{
++   static const ULong perm[4][2] = {
++      { 0x0100030205040706, 0x09080b0a0d0c0f0e }, /* 2-byte elements */
++      { 0x0302010007060504, 0x0b0a09080f0e0d0c }, /* 4-byte elements */
++      { 0x0706050403020100, 0x0f0e0d0c0b0a0908 }, /* 8-byte elements */
++      { 0x0f0e0d0c0b0a0908, 0x0706050403020100 }, /* whole vector */
++   };
++   return binop(Iop_Perm8x16, v, binop(Iop_64HLtoV128,
++                                       mkU64(perm[m - 1][0]),
++                                       mkU64(perm[m - 1][1])));
++}
++
++/* Helper function that reverses the elements of its V128 input operand */
++static IRExpr *
++s390_reverse_elements(IRExpr* v, UChar m)
++{
++   static const ULong perm[3][2] = {
++      { 0x0e0f0c0d0a0b0809, 0x0607040502030001 }, /* 2-byte elements */
++      { 0x0c0d0e0f08090a0b, 0x0405060700010203 }, /* 4-byte elements */
++      { 0x08090a0b0c0d0e0f, 0x0001020304050607 }, /* 8-byte elements */
++   };
++   return binop(Iop_Perm8x16, v, binop(Iop_64HLtoV128,
++                                       mkU64(perm[m - 1][0]),
++                                       mkU64(perm[m - 1][1])));
++}
++
++static const HChar *
++s390_irgen_VLBR(UChar v1, IRTemp op2addr, UChar m3)
++{
++   s390_insn_assert("vlbr", m3 >= 1 && m3 <= 4);
++   put_vr_qw(v1, s390_byteswap_elements(load(Ity_V128, mkexpr(op2addr)), m3));
++   return "vlbr";
++}
++
++static const HChar *
++s390_irgen_VSTBR(UChar v1, IRTemp op2addr, UChar m3)
++{
++   s390_insn_assert("vstbr", m3 >= 1 && m3 <= 4);
++   store(mkexpr(op2addr), s390_byteswap_elements(get_vr_qw(v1), m3));
++   return "vstbr";
++}
++
++static const HChar *
++s390_irgen_VLER(UChar v1, IRTemp op2addr, UChar m3)
++{
++   s390_insn_assert("vler", m3 >= 1 && m3 <= 3);
++   put_vr_qw(v1, s390_reverse_elements(load(Ity_V128, mkexpr(op2addr)), m3));
++   return "vler";
++}
++
++static const HChar *
++s390_irgen_VSTER(UChar v1, IRTemp op2addr, UChar m3)
++{
++   s390_insn_assert("vstbr", m3 >= 1 && m3 <= 4);
++   store(mkexpr(op2addr), s390_reverse_elements(get_vr_qw(v1), m3));
++   return "vstbr";
++}
++
++/* Helper function that combines its two V128 operands by replacing element 'to'
++   in 'a' by byte-swapped element 'from' in 'b' */
++static IRExpr *
++s390_insert_byteswapped(IRExpr* a, IRExpr* b, UChar m, UChar to, UChar from)
++{
++   UInt elem_size = 1U << m;
++   UInt start = elem_size * to;
++   UInt end = start + elem_size - 1;
++   UInt offs = end + elem_size * from + 16;
++   UInt i;
++
++   ULong permH = 0;
++   for (i = 0; i < 8; i++) {
++      permH = (permH << 8) | (i >= start && i <= end ? offs - i : i);
++   }
++   ULong permL = 0;
++   for (i = 8; i < 16; i++) {
++      permL = (permL << 8) | (i >= start && i <= end ? offs - i : i);
++   }
++   return triop(Iop_Perm8x16x2, a, b, binop(Iop_64HLtoV128,
++                                            mkU64(permH), mkU64(permL)));
++}
++
++static const HChar *
++s390_irgen_VLEBRH(UChar v1, IRTemp op2addr, UChar m3)
++{
++   s390_insn_assert("vlebrh", m3 <= 7);
++   IRTemp op2 = newTemp(Ity_I16);
++   assign(op2, load(Ity_I16, mkexpr(op2addr)));
++   put_vr(v1, Ity_I16, m3, binop(Iop_Or16,
++                                 binop(Iop_Shl16, mkexpr(op2), mkU8(8)),
++                                 binop(Iop_Shr16, mkexpr(op2), mkU8(8))));
++   return "vlebrh";
++}
++
++static const HChar *
++s390_irgen_VLEBRF(UChar v1, IRTemp op2addr, UChar m3)
++{
++   s390_insn_assert("vlebrf", m3 <= 3);
++   IRTemp op1 = newTemp(Ity_V128);
++   assign(op1, get_vr_qw(v1));
++   IRTemp op2 = newTemp(Ity_I64);
++   assign(op2, unop(Iop_32Uto64, load(Ity_I32, mkexpr(op2addr))));
++   IRExpr* b = binop(Iop_64HLtoV128, mkexpr(op2), mkexpr(op2));
++   put_vr_qw(v1, s390_insert_byteswapped(mkexpr(op1), b, 2, m3, 3));
++   return "vlebrf";
++}
++
++static const HChar *
++s390_irgen_VLEBRG(UChar v1, IRTemp op2addr, UChar m3)
++{
++   s390_insn_assert("vlebrg", m3 <= 1);
++   IRTemp op1 = newTemp(Ity_V128);
++   assign(op1, get_vr_qw(v1));
++   IRTemp op2 = newTemp(Ity_I64);
++   assign(op2, load(Ity_I64, mkexpr(op2addr)));
++   IRExpr* b = binop(Iop_64HLtoV128, mkexpr(op2), mkexpr(op2));
++   put_vr_qw(v1, s390_insert_byteswapped(mkexpr(op1), b, 3, m3, 1));
++   return "vlebrg";
++}
++
++static const HChar *
++s390_irgen_VLBRREP(UChar v1, IRTemp op2addr, UChar m3)
++{
++   s390_insn_assert("vlbrrep", m3 >= 1 && m3 <= 3);
++   static const ULong perm[3] = {
++      0x0f0e0f0e0f0e0f0e,       /* 2-byte element */
++      0x0f0e0d0c0f0e0d0c,       /* 4-byte element */
++      0x0f0e0d0c0b0a0908        /* 8-byte element */
++   };
++   IRExpr* permHL = mkU64(perm[m3 - 1]);
++   IRTemp op2 = newTemp(Ity_I64);
++   if (m3 == 3)
++      assign(op2, load(Ity_I64, mkexpr(op2addr)));
++   else
++      assign(op2, unop(m3 == 2 ? Iop_32Uto64 : Iop_16Uto64,
++                       load(s390_vr_get_type(m3), mkexpr(op2addr))));
++   put_vr_qw(v1, binop(Iop_Perm8x16,
++                       binop(Iop_64HLtoV128, mkexpr(op2), mkexpr(op2)),
++                       binop(Iop_64HLtoV128, permHL, permHL)));
++   return "vlbrrep";
++}
++
++static const HChar *
++s390_irgen_VLLEBRZ(UChar v1, IRTemp op2addr, UChar m3)
++{
++   s390_insn_assert("vllebrz", (m3 >= 1 && m3 <= 3) || m3 == 6);
++   static const ULong perm[6] = {
++      0x0000000000000f0e,       /* 2-byte element */
++      0x000000000f0e0d0c,       /* 4-byte element */
++      0x0f0e0d0c0b0a0908,       /* 8-byte element */
++      0,                        /* invalid (4) */
++      0,                        /* invalid (5) */
++      0x0f0e0d0c00000000,       /* 4-byte element, left-aligned */
++   };
++   IRExpr* permH = mkU64(perm[m3 - 1]);
++   IRTemp op2 = newTemp(Ity_I64);
++   if (m3 == 3)
++      assign(op2, load(Ity_I64, mkexpr(op2addr)));
++   else
++      assign(op2, unop((m3 & 3) == 2 ? Iop_32Uto64 : Iop_16Uto64,
++                       load(s390_vr_get_type(m3 & 3), mkexpr(op2addr))));
++   put_vr_qw(v1, binop(Iop_Perm8x16,
++                       binop(Iop_64HLtoV128, mkU64(0), mkexpr(op2)),
++                       binop(Iop_64HLtoV128, permH, mkU64(0))));
++   return "vllebrz";
++}
++
++static const HChar *
++s390_irgen_VSTEBRH(UChar v1, IRTemp op2addr, UChar m3)
++{
++   s390_insn_assert("vstebrh", m3 <= 7);
++   IRTemp op1 = newTemp(Ity_I16);
++   assign(op1, get_vr(v1, Ity_I16, m3));
++   store(mkexpr(op2addr), binop(Iop_Or16,
++                                binop(Iop_Shl16, mkexpr(op1), mkU8(8)),
++                                binop(Iop_Shr16, mkexpr(op1), mkU8(8))));
++   return "vstebrh";
++}
++
++static const HChar *
++s390_irgen_VSTEBRF(UChar v1, IRTemp op2addr, UChar m3)
++{
++   s390_insn_assert("vstebrf", m3 <= 3);
++   IRTemp op1 = newTemp(Ity_V128);
++   assign(op1, get_vr_qw(v1));
++   IRExpr* b = s390_insert_byteswapped(mkexpr(op1), mkexpr(op1), 2, 3, m3);
++   store(mkexpr(op2addr), unop(Iop_V128to32, b));
++   return "vstebrf";
++}
++
++static const HChar *
++s390_irgen_VSTEBRG(UChar v1, IRTemp op2addr, UChar m3)
++{
++   s390_insn_assert("vstebrg", m3 <= 1);
++   IRTemp op1 = newTemp(Ity_V128);
++   assign(op1, get_vr_qw(v1));
++   IRExpr* b = s390_insert_byteswapped(mkexpr(op1), mkexpr(op1), 3, 1, m3);
++   store(mkexpr(op2addr), unop(Iop_V128to64, b));
++   return "vstebrg";
++}
++
+ /* New insns are added here.
+    If an insn is contingent on a facility being installed also
+    check whether the list of supported facilities in function
+@@ -21003,6 +21206,59 @@ s390_decode_6byte_and_irgen(const UChar *bytes)
+                                                 RXY_x2(ovl), RXY_b2(ovl),
+                                                 RXY_dl2(ovl),
+                                                 RXY_dh2(ovl));  goto ok;
++   case 0xe60000000001ULL: s390_format_VRX_VRRDM(s390_irgen_VLEBRH, VRX_v1(ovl),
++                                                 VRX_x2(ovl), VRX_b2(ovl),
++                                                 VRX_d2(ovl), VRX_m3(ovl),
++                                                 VRX_rxb(ovl));  goto ok;
++   case 0xe60000000002ULL: s390_format_VRX_VRRDM(s390_irgen_VLEBRG, VRX_v1(ovl),
++                                                 VRX_x2(ovl), VRX_b2(ovl),
++                                                 VRX_d2(ovl), VRX_m3(ovl),
++                                                 VRX_rxb(ovl));  goto ok;
++   case 0xe60000000003ULL: s390_format_VRX_VRRDM(s390_irgen_VLEBRF, VRX_v1(ovl),
++                                                 VRX_x2(ovl), VRX_b2(ovl),
++                                                 VRX_d2(ovl), VRX_m3(ovl),
++                                                 VRX_rxb(ovl));  goto ok;
++   case 0xe60000000004ULL: s390_format_VRX_VRRDM(s390_irgen_VLLEBRZ,
++                                                 VRX_v1(ovl),
++                                                 VRX_x2(ovl), VRX_b2(ovl),
++                                                 VRX_d2(ovl), VRX_m3(ovl),
++                                                 VRX_rxb(ovl));  goto ok;
++   case 0xe60000000005ULL: s390_format_VRX_VRRDM(s390_irgen_VLBRREP,
++                                                 VRX_v1(ovl),
++                                                 VRX_x2(ovl), VRX_b2(ovl),
++                                                 VRX_d2(ovl), VRX_m3(ovl),
++                                                 VRX_rxb(ovl));  goto ok;
++   case 0xe60000000006ULL: s390_format_VRX_VRRDM(s390_irgen_VLBR, VRX_v1(ovl),
++                                                 VRX_x2(ovl), VRX_b2(ovl),
++                                                 VRX_d2(ovl), VRX_m3(ovl),
++                                                 VRX_rxb(ovl));  goto ok;
++   case 0xe60000000007ULL: s390_format_VRX_VRRDM(s390_irgen_VLER, VRX_v1(ovl),
++                                                 VRX_x2(ovl), VRX_b2(ovl),
++                                                 VRX_d2(ovl), VRX_m3(ovl),
++                                                 VRX_rxb(ovl));  goto ok;
++   case 0xe60000000009ULL: s390_format_VRX_VRRDM(s390_irgen_VSTEBRH,
++                                                 VRX_v1(ovl),
++                                                 VRX_x2(ovl), VRX_b2(ovl),
++                                                 VRX_d2(ovl), VRX_m3(ovl),
++                                                 VRX_rxb(ovl));  goto ok;
++   case 0xe6000000000aULL: s390_format_VRX_VRRDM(s390_irgen_VSTEBRG,
++                                                 VRX_v1(ovl),
++                                                 VRX_x2(ovl), VRX_b2(ovl),
++                                                 VRX_d2(ovl), VRX_m3(ovl),
++                                                 VRX_rxb(ovl));  goto ok;
++   case 0xe6000000000bULL: s390_format_VRX_VRRDM(s390_irgen_VSTEBRF,
++                                                 VRX_v1(ovl),
++                                                 VRX_x2(ovl), VRX_b2(ovl),
++                                                 VRX_d2(ovl), VRX_m3(ovl),
++                                                 VRX_rxb(ovl));  goto ok;
++   case 0xe6000000000eULL: s390_format_VRX_VRRDM(s390_irgen_VSTBR, VRX_v1(ovl),
++                                                 VRX_x2(ovl), VRX_b2(ovl),
++                                                 VRX_d2(ovl), VRX_m3(ovl),
++                                                 VRX_rxb(ovl));  goto ok;
++   case 0xe6000000000fULL: s390_format_VRX_VRRDM(s390_irgen_VSTER, VRX_v1(ovl),
++                                                 VRX_x2(ovl), VRX_b2(ovl),
++                                                 VRX_d2(ovl), VRX_m3(ovl),
++                                                 VRX_rxb(ovl));  goto ok;
+    case 0xe60000000034ULL: /* VPKZ */ goto unimplemented;
+    case 0xe60000000035ULL: s390_format_VSI_URDV(s390_irgen_VLRL, VSI_v1(ovl),
+                                                 VSI_b2(ovl), VSI_d2(ovl),
+diff --git a/VEX/priv/host_s390_isel.c b/VEX/priv/host_s390_isel.c
+index ee20c6711..06e195957 100644
+--- a/VEX/priv/host_s390_isel.c
++++ b/VEX/priv/host_s390_isel.c
+@@ -4189,6 +4189,15 @@ s390_isel_vec_expr_wrk(ISelEnv *env, IRExpr *expr)
+          return dst;
+       }
+ 
++      case Iop_Perm8x16:
++         size = 16;
++         reg1 = s390_isel_vec_expr(env, arg1);
++         reg2 = s390_isel_vec_expr(env, arg2);
++
++         addInstr(env, s390_insn_vec_triop(size, S390_VEC_PERM,
++                                           dst, reg1, reg1, reg2));
++         return dst;
++
+       case Iop_CmpEQ8x16:
+          size = 1;
+          vec_binop = S390_VEC_COMPARE_EQUAL;
+-- 
+2.23.0
+
+From f7447f4c73b2d0fb4eb3827c3709f378f6c9c656 Mon Sep 17 00:00:00 2001
+From: Andreas Arnez <arnez@linux.ibm.com>
+Date: Tue, 23 Feb 2021 19:10:37 +0100
+Subject: [PATCH 09/13] s390x: Vec-enh-2, VSLD and VSRD
+
+Support the new "vector shift left/right double by bit" instructions VSLD
+and VSRD.
+---
+ VEX/priv/guest_s390_toIR.c | 50 ++++++++++++++++++++++++++++++++++++++
+ 1 file changed, 50 insertions(+)
+
+diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c
+index f65b42705..aa429d085 100644
+--- a/VEX/priv/guest_s390_toIR.c
++++ b/VEX/priv/guest_s390_toIR.c
+@@ -18228,6 +18228,48 @@ s390_irgen_VSLDB(UChar v1, UChar v2, UChar v3, UChar i4)
+    return "vsldb";
+ }
+ 
++static const HChar *
++s390_irgen_VSLD(UChar v1, UChar v2, UChar v3, UChar i4)
++{
++   s390_insn_assert("vsld", i4 <= 7);
++
++   if (i4 == 0) {
++      /* Just copy v2. */
++      put_vr_qw(v1, get_vr_qw(v2));
++   } else {
++      /* Concatenate v2's tail with v3's head. */
++      put_vr_qw(v1,
++                binop(Iop_OrV128,
++                      binop(Iop_ShlV128, get_vr_qw(v2), mkU8(i4)),
++                      binop(Iop_ShrV128, get_vr_qw(v3), mkU8(128 - i4))
++                     )
++               );
++   }
++
++   return "vsld";
++}
++
++static const HChar *
++s390_irgen_VSRD(UChar v1, UChar v2, UChar v3, UChar i4)
++{
++   s390_insn_assert("vsrd", i4 <= 7);
++
++   if (i4 == 0) {
++      /* Just copy v3. */
++      put_vr_qw(v1, get_vr_qw(v3));
++   } else {
++      /* Concatenate v2's tail with v3's head. */
++      put_vr_qw(v1,
++                binop(Iop_OrV128,
++                      binop(Iop_ShlV128, get_vr_qw(v2), mkU8(128 - i4)),
++                      binop(Iop_ShrV128, get_vr_qw(v3), mkU8(i4))
++                     )
++               );
++   }
++
++   return "vsrd";
++}
++
+ static const HChar *
+ s390_irgen_VMO(UChar v1, UChar v2, UChar v3, UChar m4)
+ {
+@@ -21541,6 +21583,14 @@ s390_decode_6byte_and_irgen(const UChar *bytes)
+    case 0xe70000000085ULL: s390_format_VRR_VVV(s390_irgen_VBPERM, VRR_v1(ovl),
+                                                VRR_v2(ovl), VRR_r3(ovl),
+                                                VRR_rxb(ovl));  goto ok;
++   case 0xe70000000086ULL: s390_format_VRId_VVVI(s390_irgen_VSLD, VRId_v1(ovl),
++                                                 VRId_v2(ovl), VRId_v3(ovl),
++                                                 VRId_i4(ovl),
++                                                 VRId_rxb(ovl));  goto ok;
++   case 0xe70000000087ULL: s390_format_VRId_VVVI(s390_irgen_VSRD, VRId_v1(ovl),
++                                                 VRId_v2(ovl), VRId_v3(ovl),
++                                                 VRId_i4(ovl),
++                                                 VRId_rxb(ovl));  goto ok;
+    case 0xe7000000008aULL: s390_format_VRR_VVVVMM(s390_irgen_VSTRC, VRRd_v1(ovl),
+                                                   VRRd_v2(ovl), VRRd_v3(ovl),
+                                                   VRRd_v4(ovl), VRRd_m5(ovl),
+-- 
+2.23.0
+
+From 388082bca7146f8a15814798dbfe570af2aab2a9 Mon Sep 17 00:00:00 2001
+From: Andreas Arnez <arnez@linux.ibm.com>
+Date: Wed, 10 Mar 2021 19:22:51 +0100
+Subject: [PATCH 10/13] s390x: Vec-enh-2, VSTRS
+
+Support the new "vector string search" instruction VSTRS.  The
+implementation is a full emulation and follows a similar approach as for
+the other vector string instructions.
+---
+ VEX/priv/guest_s390_toIR.c | 104 +++++++++++++++++++++++++++++++++++++
+ 1 file changed, 104 insertions(+)
+
+diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c
+index aa429d085..46a867475 100644
+--- a/VEX/priv/guest_s390_toIR.c
++++ b/VEX/priv/guest_s390_toIR.c
+@@ -17601,6 +17601,105 @@ s390_irgen_VSTRC(UChar v1, UChar v2, UChar v3, UChar v4, UChar m5, UChar m6)
+    return "vstrc";
+ }
+ 
++static const HChar *
++s390_irgen_VSTRS(UChar v1, UChar v2, UChar v3, UChar v4, UChar m5, UChar m6)
++{
++   s390_insn_assert("vstrs", m5 <= 2 && m6 == (m6 & 2));
++
++   IRTemp op2 = newTemp(Ity_V128);
++   IRTemp op3 = newTemp(Ity_V128);
++   IRTemp op4 = newTemp(Ity_I8);
++   IRTemp op2clean = newTemp(Ity_V128);
++   IRTemp op3mask = newTemp(Ity_V128);
++   IRTemp result = newTemp(Ity_V128);
++   IRTemp ccnomatch = newTemp(Ity_I64);
++   IRExpr* tmp;
++   IRExpr* match = NULL;
++   UChar elem_bits = 8 << m5;
++   IROp cmpeq_op = S390_VEC_OP3(m5, Iop_CmpEQ8x16,
++                                Iop_CmpEQ16x8, Iop_CmpEQ32x4);
++
++   assign(op2, get_vr_qw(v2));
++   assign(op3, get_vr_qw(v3));
++   assign(op4, get_vr_b7(v4));
++
++   tmp = unop(Iop_Dup32x4,
++              unop(Iop_1Sto32, binop(Iop_CmpNE8, mkexpr(op4), mkU8(16))));
++   tmp = binop(Iop_ShrV128, tmp, binop(Iop_Shl8, mkexpr(op4), mkU8(3)));
++
++   if (s390_vr_is_zs_set(m6)) {
++      IRTemp op2eos = newTemp(Ity_V128);
++      IRExpr* t;
++      t = binop(cmpeq_op, mkexpr(op2), mkV128(0));
++      for (UChar i = m5; i < 4; i++) {
++         IRTemp s = newTemp(Ity_V128);
++         assign(s, t);
++         t = binop(Iop_OrV128, mkexpr(s), binop(Iop_ShrV128, mkexpr(s),
++                                                mkU8(8 << i)));
++      }
++      assign(op2eos, t);
++      assign(op2clean, binop(Iop_AndV128, mkexpr(op2),
++                             unop(Iop_NotV128, mkexpr(op2eos))));
++      assign(ccnomatch, binop(Iop_And64, mkU64(1),
++                              unop(Iop_V128to64, mkexpr(op2eos))));
++
++      t = binop(cmpeq_op, mkexpr(op3), mkV128(0));
++      for (UChar i = m5; i < 4; i++) {
++         IRTemp s = newTemp(Ity_V128);
++         assign(s, t);
++         t = binop(Iop_OrV128, mkexpr(s), binop(Iop_ShrV128, mkexpr(s),
++                                                mkU8(8 << i)));
++      }
++      tmp = binop(Iop_OrV128, tmp, t);
++   } else {
++      assign(op2clean, mkexpr(op2));
++   }
++   assign(op3mask, unop(Iop_NotV128, tmp));
++
++   for (UChar shift = 0; shift < 128; shift += elem_bits) {
++      IRTemp s = newTemp(Ity_V128);
++      tmp = unop(Iop_NotV128,
++                 binop(cmpeq_op, mkexpr(op2clean),
++                       binop(Iop_ShrV128, mkexpr(op3), mkU8(shift))));
++      assign(s, binop(Iop_CmpEQ64x2, mkV128(0),
++                      binop(Iop_AndV128, mkexpr(op3mask),
++                            binop(Iop_ShlV128, tmp, mkU8(shift)))));
++      tmp = mkexpr(s);
++      if (shift < 64) {
++         tmp = binop(Iop_AndV128, tmp,
++                     unop(Iop_Dup16x8, binop(Iop_GetElem16x8, tmp, mkU8(4))));
++      }
++      tmp = binop(Iop_AndV128, tmp,
++                  unop(Iop_Dup16x8, mkU16(1 << (15 - shift / 8))));
++      if (shift)
++         match = binop(Iop_OrV128, mkexpr(mktemp(Ity_V128, match)), tmp);
++      else
++         match = tmp;
++   }
++   assign(result, unop(Iop_ClzNat64,
++                       binop(Iop_Or64,
++                             unop(Iop_V128HIto64, match),
++                             mkU64((1UL << 48) - 1))));
++   put_vr_qw(v1, binop(Iop_64HLtoV128, mkexpr(result), mkU64(0)));
++
++   /* Set condition code.
++      0: no match, no string terminator in op2
++      1: no match, string terminator found
++      2: full match
++      3: partial match */
++   IRTemp cc = newTemp(Ity_I64);
++   tmp = binop(Iop_CmpLE64U,
++               binop(Iop_Add64, mkexpr(result), unop(Iop_8Uto64, mkexpr(op4))),
++               mkU64(16));
++   assign(cc, mkite(binop(Iop_CmpEQ64, mkexpr(result), mkU64(16)),
++                    s390_vr_is_zs_set(m6) ? mkexpr(ccnomatch) : mkU64(0),
++                    mkite(tmp, mkU64(2), mkU64(3))));
++   s390_cc_set(cc);
++
++   dis_res->hint = Dis_HintVerbose;
++   return "vstrs";
++}
++
+ static const HChar *
+ s390_irgen_VNC(UChar v1, UChar v2, UChar v3)
+ {
+@@ -21596,6 +21695,11 @@ s390_decode_6byte_and_irgen(const UChar *bytes)
+                                                   VRRd_v4(ovl), VRRd_m5(ovl),
+                                                   VRRd_m6(ovl),
+                                                   VRRd_rxb(ovl));  goto ok;
++   case 0xe7000000008bULL: s390_format_VRR_VVVVMM(s390_irgen_VSTRS, VRRd_v1(ovl),
++                                                  VRRd_v2(ovl), VRRd_v3(ovl),
++                                                  VRRd_v4(ovl), VRRd_m5(ovl),
++                                                  VRRd_m6(ovl),
++                                                  VRRd_rxb(ovl));  goto ok;
+    case 0xe7000000008cULL: s390_format_VRR_VVVV(s390_irgen_VPERM, VRR_v1(ovl),
+                                                VRR_v2(ovl), VRR_r3(ovl),
+                                                VRR_m4(ovl), VRR_rxb(ovl));  goto ok;
+-- 
+2.23.0
+
+From 8a079b405467fa127c6c311d7ae3c649e76106c6 Mon Sep 17 00:00:00 2001
+From: Andreas Arnez <arnez@linux.ibm.com>
+Date: Tue, 16 Feb 2021 17:52:09 +0100
+Subject: [PATCH 11/13] s390x: Mark arch13 features as supported
+
+Make the STFLE instruction report the miscellaneous-instruction-extensions
+facility 3 and the vector-enhancements facility 2 as supported.  Indicate
+support for the latter in the HWCAP vector as well.
+---
+ VEX/priv/guest_s390_helpers.c       | 9 +++------
+ coregrind/m_initimg/initimg-linux.c | 3 ++-
+ include/vki/vki-s390x-linux.h       | 1 +
+ 3 files changed, 6 insertions(+), 7 deletions(-)
+
+diff --git a/VEX/priv/guest_s390_helpers.c b/VEX/priv/guest_s390_helpers.c
+index 1e04f601a..804b92a29 100644
+--- a/VEX/priv/guest_s390_helpers.c
++++ b/VEX/priv/guest_s390_helpers.c
+@@ -356,9 +356,7 @@ s390x_dirtyhelper_STFLE(VexGuestS390XState *guest_state, ULong *addr)
+        | s390_stfle_range(51, 55)
+        /* 56: unassigned */
+        /* 57: MSA5, not supported */
+-       | s390_stfle_range(58, 60)
+-       /* 61: miscellaneous-instruction 3, not supported */
+-       | s390_stfle_range(62, 63)),
++       | s390_stfle_range(58, 63)),
+ 
+       /* ===  64 .. 127  === */
+       (s390_stfle_range(64, 72)
+@@ -384,11 +382,10 @@ s390x_dirtyhelper_STFLE(VexGuestS390XState *guest_state, ULong *addr)
+        /* 143: unassigned */
+        | s390_stfle_range(144, 145)
+        /* 146: MSA8, not supported */
+-       | s390_stfle_range(147, 147)
+-       /* 148: vector-enhancements 2, not supported */
+-       | s390_stfle_range(149, 149)
++       | s390_stfle_range(147, 149)
+        /* 150: unassigned */
+        /* 151: DEFLATE-conversion, not supported */
++       /* 152: vector packed decimal enhancement, not supported */
+        /* 153: unassigned */
+        /* 154: unassigned */
+        /* 155: MSA9, not supported */
+diff --git a/coregrind/m_initimg/initimg-linux.c b/coregrind/m_initimg/initimg-linux.c
+index fc1a32ecf..37d005168 100644
+--- a/coregrind/m_initimg/initimg-linux.c
++++ b/coregrind/m_initimg/initimg-linux.c
+@@ -703,7 +703,8 @@ Addr setup_client_stack( void*  init_sp,
+                   itself, is not supported by Valgrind. */
+                auxv->u.a_val &= ((VKI_HWCAP_S390_TE - 1)
+                                  | VKI_HWCAP_S390_VXRS
+-                                 | VKI_HWCAP_S390_VXRS_EXT);
++                                 | VKI_HWCAP_S390_VXRS_EXT
++                                 | VKI_HWCAP_S390_VXRS_EXT2);
+             }
+ #           elif defined(VGP_arm64_linux)
+             {
+diff --git a/include/vki/vki-s390x-linux.h b/include/vki/vki-s390x-linux.h
+index 4ab2d3334..71b363029 100644
+--- a/include/vki/vki-s390x-linux.h
++++ b/include/vki/vki-s390x-linux.h
+@@ -807,6 +807,7 @@ typedef vki_s390_regs vki_elf_gregset_t;
+ #define VKI_HWCAP_S390_TE           1024
+ #define VKI_HWCAP_S390_VXRS         2048
+ #define VKI_HWCAP_S390_VXRS_EXT     8192
++#define VKI_HWCAP_S390_VXRS_EXT2   32768
+ 
+ 
+ //----------------------------------------------------------------------
+-- 
+2.23.0
+
+From 1461d9b8d0b12e55b648fbf50c5dcee30785afa2 Mon Sep 17 00:00:00 2001
+From: Andreas Arnez <arnez@linux.ibm.com>
+Date: Mon, 17 May 2021 15:34:15 +0200
+Subject: [PATCH 12/13] s390x: Vec-enh-2, test cases
+
+Add test cases for verifying the new/enhanced instructions in the
+vector-enhancements facility 2.  For "vector string search" VSTRS add a
+memcheck test case.
+---
+ .gitignore                            |   2 +
+ memcheck/tests/s390x/Makefile.am      |   3 +-
+ memcheck/tests/s390x/vstrs.c          |  68 ++++++
+ memcheck/tests/s390x/vstrs.stderr.exp |  16 ++
+ memcheck/tests/s390x/vstrs.stdout.exp |   0
+ memcheck/tests/s390x/vstrs.vgtest     |   2 +
+ none/tests/s390x/Makefile.am          |   3 +-
+ none/tests/s390x/vec2.c               | 314 ++++++++++++++++++++++++++
+ none/tests/s390x/vec2.stderr.exp      |   2 +
+ none/tests/s390x/vec2.stdout.exp      | 168 ++++++++++++++
+ none/tests/s390x/vec2.vgtest          |   2 +
+ tests/s390x_features.c                |   4 +
+ 12 files changed, 582 insertions(+), 2 deletions(-)
+ create mode 100644 memcheck/tests/s390x/vstrs.c
+ create mode 100644 memcheck/tests/s390x/vstrs.stderr.exp
+ create mode 100644 memcheck/tests/s390x/vstrs.stdout.exp
+ create mode 100644 memcheck/tests/s390x/vstrs.vgtest
+ create mode 100644 none/tests/s390x/vec2.c
+ create mode 100644 none/tests/s390x/vec2.stderr.exp
+ create mode 100644 none/tests/s390x/vec2.stdout.exp
+ create mode 100644 none/tests/s390x/vec2.vgtest
+
+diff --git a/memcheck/tests/s390x/Makefile.am b/memcheck/tests/s390x/Makefile.am
+index d183841ef..668fd9933 100644
+--- a/memcheck/tests/s390x/Makefile.am
++++ b/memcheck/tests/s390x/Makefile.am
+@@ -2,7 +2,7 @@ include $(top_srcdir)/Makefile.tool-tests.am
+ 
+ dist_noinst_SCRIPTS = filter_stderr
+ 
+-INSN_TESTS = cdsg cu21 cu42 ltgjhe vstrc vfae vistr
++INSN_TESTS = cdsg cu21 cu42 ltgjhe vstrc vfae vistr vstrs
+ 
+ check_PROGRAMS = $(INSN_TESTS) 
+ 
+@@ -18,3 +18,4 @@ AM_CCASFLAGS += @FLAG_M64@
+ vstrc_CFLAGS  = $(AM_CFLAGS) -march=z13
+ vfae_CFLAGS   = $(AM_CFLAGS) -march=z13
+ vistr_CFLAGS  = $(AM_CFLAGS) -march=z13
++vstrs_CFLAGS  = $(AM_CFLAGS) -march=z13
+diff --git a/memcheck/tests/s390x/vstrs.c b/memcheck/tests/s390x/vstrs.c
+new file mode 100644
+index 000000000..3354c2e53
+--- /dev/null
++++ b/memcheck/tests/s390x/vstrs.c
+@@ -0,0 +1,68 @@
++#include <stdio.h>
++#include <string.h>
++
++#define VECTOR __attribute__ ((vector_size (16)))
++
++typedef char VECTOR char_v;
++
++volatile char tmp;
++static const char *hex_digit = "0123456789abcdefGHIJKLMNOPQRSTUV";
++
++static char_v to_char_vec(const char *str)
++{
++   char buf[17];
++   char_v v;
++
++   for (int i = 0; i < sizeof(buf); i++) {
++      char ch = str[i];
++      if (ch == '\0')
++         break;
++      else if (ch == '$')
++         buf[i] = '\0';
++      else if (ch != '~')
++         buf[i] = ch;
++   }
++   v = *(char_v *) buf;
++   return v;
++}
++
++static void test_vstrs_char(const char *haystack, const char *needle,
++                            int expect_res, int expect_cc)
++{
++   int cc;
++   char_v v2val = to_char_vec(haystack);
++   char_v v3val = to_char_vec(needle);
++
++   register unsigned long VECTOR v4 __asm__("v4") = { strlen(needle), 0 };
++   register char_v v1 __asm__("v1");
++   register char_v v2 __asm__("v2") = v2val;
++   register char_v v3 __asm__("v3") = v3val;
++
++   __asm__(
++      "cr     0,0\n\t"                  /* Clear CC */
++      ".short 0xe712,0x3020,0x408b\n\t" /* vstrs %v1,%v2,%v3,%v4,0,2 */
++      "ipm    %[cc]\n\t"
++      "srl    %[cc],28"
++      : "=v" (v1), [cc] "=d" (cc)
++      : "v" (v2), "v" (v3), "v" (v4)
++      : "cc");
++
++   tmp = hex_digit[v1[7] & 0x1f];
++   if (expect_res >= 0  && v1[7] != expect_res)
++      printf("result %u != %d\n", v1[7], expect_res);
++
++   tmp = hex_digit[cc & 0xf];
++   if (expect_cc >= 0 && cc != expect_cc)
++      printf("CC %d != %d\n", cc, expect_cc);
++}
++
++int main()
++{
++   test_vstrs_char("haystack$needle", "needle$haystack", 16, 1);
++   test_vstrs_char("haystack, needle", "needle, haystack", 10, 3);
++   test_vstrs_char("ABCDEFGH", "DEFGHI", -1, -1);
++   test_vstrs_char("match in UNDEF", "UN", 9, 2);
++   test_vstrs_char("after ~ UNDEF", "DEF", -1, -1);
++   test_vstrs_char("", "", 0, 2);
++   return 0;
++}
+diff --git a/memcheck/tests/s390x/vstrs.stderr.exp b/memcheck/tests/s390x/vstrs.stderr.exp
+new file mode 100644
+index 000000000..c5c3ef705
+--- /dev/null
++++ b/memcheck/tests/s390x/vstrs.stderr.exp
+@@ -0,0 +1,16 @@
++Use of uninitialised value of size 8
++   at 0x........: test_vstrs_char (vstrs.c:50)
++   by 0x........: main (vstrs.c:63)
++
++Use of uninitialised value of size 8
++   at 0x........: test_vstrs_char (vstrs.c:54)
++   by 0x........: main (vstrs.c:63)
++
++Use of uninitialised value of size 8
++   at 0x........: test_vstrs_char (vstrs.c:50)
++   by 0x........: main (vstrs.c:65)
++
++Use of uninitialised value of size 8
++   at 0x........: test_vstrs_char (vstrs.c:54)
++   by 0x........: main (vstrs.c:65)
++
+diff --git a/memcheck/tests/s390x/vstrs.stdout.exp b/memcheck/tests/s390x/vstrs.stdout.exp
+new file mode 100644
+index 000000000..e69de29bb
+diff --git a/memcheck/tests/s390x/vstrs.vgtest b/memcheck/tests/s390x/vstrs.vgtest
+new file mode 100644
+index 000000000..fd2a29873
+--- /dev/null
++++ b/memcheck/tests/s390x/vstrs.vgtest
+@@ -0,0 +1,2 @@
++prog: vstrs
++vgopts: -q
+diff --git a/none/tests/s390x/Makefile.am b/none/tests/s390x/Makefile.am
+index 2fd45ec1e..ca38db935 100644
+--- a/none/tests/s390x/Makefile.am
++++ b/none/tests/s390x/Makefile.am
+@@ -20,7 +20,7 @@ INSN_TESTS = clc clcle cvb cvd icm lpr tcxb lam_stam xc mvst add sub mul \
+ 	     spechelper-icm-1  spechelper-icm-2 spechelper-tmll \
+ 	     spechelper-tm laa vector lsc2 ppno vector_string vector_integer \
+ 	     vector_float add-z14 sub-z14 mul-z14 bic \
+-	     misc3
++	     misc3 vec2
+ 
+ if BUILD_DFP_TESTS
+   INSN_TESTS += dfp-1 dfp-2 dfp-3 dfp-4 dfptest dfpext dfpconv srnmt pfpo
+@@ -74,3 +74,4 @@ lsc2_CFLAGS       = -march=z13 -DS390_TESTS_NOCOLOR
+ vector_string_CFLAGS = $(AM_CFLAGS) -march=z13 -DS390_TEST_COUNT=5
+ vector_integer_CFLAGS    = $(AM_CFLAGS) -march=z13 -DS390_TEST_COUNT=4
+ vector_float_CFLAGS    = $(AM_CFLAGS) -march=z13 -DS390_TEST_COUNT=4
++vec2_CFLAGS      = $(AM_CFLAGS) -march=z13
+diff --git a/none/tests/s390x/vec2.c b/none/tests/s390x/vec2.c
+new file mode 100644
+index 000000000..73b04dee4
+--- /dev/null
++++ b/none/tests/s390x/vec2.c
+@@ -0,0 +1,314 @@
++#include <stdio.h>
++
++#define VECTOR __attribute__ ((vector_size (16)))
++
++typedef unsigned long VECTOR ulong_v;
++typedef float VECTOR float_v;
++
++static const ulong_v vec_a   = { 0x0123456789abcdef, 0xfedcba9876543210 };
++static const ulong_v vec_b   = { 0xfedcba9876543210, 0x0123456789abcdef };
++static const ulong_v vec_c   = { 0x8040201008040201, 0x7fbfdfeff7fbfdfe };
++static const ulong_v vec_one = { -1, -1 };
++static const ulong_v vec_ini = { 0x0112233445566778, 0x899aabbccddeeff0 };
++
++static const float_v vec_fa  = { 16777215., -16777215., 42.5, 10000. };
++static const float_v vec_fb  = { 4., 3., 2., 1. };
++
++/* -- Vector shift -- */
++
++#define TEST_GENERATE(insn)                             \
++   static void test_##insn(ulong_v a, ulong_v b)        \
++   {                                                    \
++      ulong_v out;                                      \
++      __asm__(                                          \
++         #insn " %[out],%[a],%[b]"                      \
++         : [out] "=v" (out)                             \
++         : [a] "v" (a),                                 \
++           [b] "v" (b)                                  \
++         : );                                           \
++      printf("\t%016lx %016lx\n", out[0], out[1]);      \
++   }
++
++#define TEST_EXEC(insn)                         \
++   do {                                         \
++      puts(#insn);                              \
++      test_##insn(vec_a, vec_b);                \
++      test_##insn(vec_b, vec_a);                \
++      test_##insn(vec_c, vec_a);                \
++      test_##insn(vec_one, vec_b);              \
++   } while (0)
++
++#define INSNS                                   \
++   XTEST(vsl);                                  \
++   XTEST(vsrl);                                 \
++   XTEST(vsra);
++
++#define XTEST TEST_GENERATE
++INSNS
++#undef XTEST
++
++static void test_all_single_bitshifts()
++{
++#define XTEST TEST_EXEC
++   INSNS
++#undef XTEST
++}
++#undef INSNS
++#undef TEST_EXEC
++#undef TEST_GENERATE
++
++/* -- Vector load element-/byte-swapped -- */
++
++#define TEST_EXEC(opc1,opc2,insn,m3)            \
++   do {                                         \
++      puts(#insn " " #m3);                      \
++      test_##insn##_##m3(vec_a);                \
++      test_##insn##_##m3(vec_b);                \
++   } while (0)
++
++#define TEST_GENERATE(opc1,opc2,insn,m3)                                \
++   static void test_##insn##_##m3(ulong_v a)                            \
++   {                                                                    \
++      ulong_v out = vec_ini;                                            \
++      __asm__(                                                          \
++         ".insn vrx,0x" #opc1 "00000000" #opc2 ",%[out],%[a]," #m3      \
++         : [out] "+v" (out)                                             \
++         : [a] "R" (a)                                                  \
++         : );                                                           \
++      printf("\t%016lx %016lx\n", out[0], out[1]);                      \
++   }
++
++#define INSNS                                   \
++   XTEST(e6,01, vlebrh, 0);                     \
++   XTEST(e6,01, vlebrh, 7);                     \
++   XTEST(e6,01, vlebrh, 2);                     \
++   XTEST(e6,03, vlebrf, 0);                     \
++   XTEST(e6,03, vlebrf, 3);                     \
++   XTEST(e6,03, vlebrf, 1);                     \
++   XTEST(e6,02, vlebrg, 0);                     \
++   XTEST(e6,02, vlebrg, 1);                     \
++   XTEST(e6,04, vllebrz, 1);                    \
++   XTEST(e6,04, vllebrz, 2);                    \
++   XTEST(e6,04, vllebrz, 3);                    \
++   XTEST(e6,04, vllebrz, 6);                    \
++   XTEST(e6,05, vlbrrep, 1);                    \
++   XTEST(e6,05, vlbrrep, 2);                    \
++   XTEST(e6,05, vlbrrep, 3);                    \
++   XTEST(e6,06, vlbr, 1);                       \
++   XTEST(e6,06, vlbr, 2);                       \
++   XTEST(e6,06, vlbr, 3);                       \
++   XTEST(e6,06, vlbr, 4);                       \
++   XTEST(e6,07, vler, 1);                       \
++   XTEST(e6,07, vler, 2);                       \
++   XTEST(e6,07, vler, 3);
++
++#define XTEST TEST_GENERATE
++INSNS
++#undef XTEST
++
++static void test_all_swapped_loads()
++{
++#define XTEST TEST_EXEC
++   INSNS
++#undef XTEST
++}
++
++#undef INSNS
++#undef TEST_GENERATE
++
++/* -- Vector store element-/byte-swapped -- */
++
++#define TEST_GENERATE(opc1,opc2,insn,m3)                                \
++   static void test_##insn##_##m3(ulong_v a)                            \
++   {                                                                    \
++      ulong_v out = vec_ini;                                            \
++      __asm__(                                                          \
++         ".insn vrx,0x" #opc1 "00000000" #opc2 ",%[a],%[out]," #m3      \
++         : [out] "+R" (out)                                             \
++         : [a] "v" (a)                                                  \
++         : );                                                           \
++      printf("\t%016lx %016lx\n", out[0], out[1]);                      \
++   }
++
++#define INSNS                                   \
++   XTEST(e6,09, vstebrh, 0);                    \
++   XTEST(e6,09, vstebrh, 7);                    \
++   XTEST(e6,09, vstebrh, 2);                    \
++   XTEST(e6,0b, vstebrf, 0);                    \
++   XTEST(e6,0b, vstebrf, 3);                    \
++   XTEST(e6,0b, vstebrf, 1);                    \
++   XTEST(e6,0a, vstebrg, 0);                    \
++   XTEST(e6,0a, vstebrg, 1);                    \
++   XTEST(e6,0e, vstbr, 1);                      \
++   XTEST(e6,0e, vstbr, 2);                      \
++   XTEST(e6,0e, vstbr, 3);                      \
++   XTEST(e6,0e, vstbr, 4);                      \
++   XTEST(e6,0f, vster, 1);                      \
++   XTEST(e6,0f, vster, 2);                      \
++   XTEST(e6,0f, vster, 3);
++
++#define XTEST TEST_GENERATE
++INSNS
++#undef XTEST
++
++static void test_all_swapped_stores()
++{
++#define XTEST TEST_EXEC
++   INSNS
++#undef XTEST
++}
++
++#undef INSNS
++#undef TEST_EXEC
++#undef TEST_GENERATE
++
++/* -- Vector shift double by bit -- */
++
++#define TEST_GENERATE(opc1,opc2,insn,i4)                \
++   static void test_##insn##_##i4(ulong_v a, ulong_v b) \
++   {                                                    \
++      ulong_v out = vec_ini;                            \
++      __asm__(                                          \
++         ".insn vrr,0x" #opc1 "00000000" #opc2          \
++         ",%[out],%[a],%[b],0," #i4 ",0"                \
++         : [out] "+v" (out)                             \
++         : [a] "v" (a),                                 \
++           [b] "v" (b)                                  \
++         : );                                           \
++      printf("\t%016lx %016lx\n", out[0], out[1]);      \
++   }
++
++#define TEST_EXEC(opc1,opc2,insn,i4)            \
++   do {                                         \
++      puts(#insn " " #i4);                      \
++      test_##insn##_##i4(vec_a, vec_one);       \
++      test_##insn##_##i4(vec_b, vec_a);         \
++   } while (0)
++
++#define INSNS                                   \
++   XTEST(e7,86,vsld,0);                         \
++   XTEST(e7,86,vsld,7);                         \
++   XTEST(e7,86,vsld,4);                         \
++   XTEST(e7,87,vsrd,0);                         \
++   XTEST(e7,87,vsrd,7);                         \
++   XTEST(e7,87,vsrd,4);
++
++#define XTEST TEST_GENERATE
++INSNS
++#undef XTEST
++
++static void test_all_double_bitshifts()
++{
++#define XTEST TEST_EXEC
++   INSNS
++#undef XTEST
++}
++
++#undef INSNS
++#undef TEST_EXEC
++#undef TEST_GENERATE
++
++/* -- Vector integer -> FP conversions -- */
++
++#define TEST_GENERATE(opc1,opc2,insn,m4)                                \
++   static void test_##insn##_##m4(ulong_v a)                            \
++   {                                                                    \
++      float_v out;                                                      \
++      __asm__(                                                          \
++         ".insn vrr,0x" #opc1 "00000000" #opc2                          \
++         ",%[out],%[a],0,2," #m4 ",0"                                   \
++         : [out] "=v" (out)                                             \
++         : [a] "v" (a)                                                  \
++         : );                                                           \
++      if (m4 & 8)                                                       \
++         printf("\t%a - - -\n", out[0]);                                \
++      else                                                              \
++         printf("\t%a %a %a %a\n", out[0], out[1], out[2], out[3]);     \
++   }
++
++#define TEST_EXEC(opc1,opc2,insn,m4)            \
++   do {                                         \
++      puts(#insn " " #m4);                      \
++      test_##insn##_##m4(vec_a);                \
++      test_##insn##_##m4(vec_c);                \
++   } while (0)
++
++#define INSNS                                   \
++   XTEST(e7,c1,vcfpl,0);                        \
++   XTEST(e7,c1,vcfpl,8);                        \
++   XTEST(e7,c3,vcfps,0);                        \
++   XTEST(e7,c3,vcfps,8);
++
++#define XTEST TEST_GENERATE
++INSNS
++#undef XTEST
++
++static void test_all_int_fp_conversions()
++{
++#define XTEST TEST_EXEC
++   INSNS
++#undef XTEST
++}
++
++#undef INSNS
++#undef TEST_EXEC
++#undef TEST_GENERATE
++
++/* -- Vector FP -> integer conversions -- */
++
++#define TEST_GENERATE(opc1,opc2,insn,m4)                                \
++   static void test_##insn##_##m4(float_v a)                            \
++   {                                                                    \
++      unsigned int VECTOR out;                                          \
++      __asm__(                                                          \
++         ".insn vrr,0x" #opc1 "00000000" #opc2                          \
++         ",%[out],%[a],0,2," #m4 ",0"                                   \
++         : [out] "=v" (out)                                             \
++         : [a] "v" (a)                                                  \
++         : );                                                           \
++      if (m4 & 8)                                                       \
++         printf("\t%08x - - -\n", out[0]);                              \
++      else                                                              \
++         printf("\t%08x %08x %08x %08x\n",                              \
++                out[0], out[1], out[2], out[3]);                        \
++   }
++
++#define TEST_EXEC(opc1,opc2,insn,m4)            \
++   do {                                         \
++      puts(#insn " " #m4);                      \
++      test_##insn##_##m4(vec_fa);               \
++      test_##insn##_##m4(vec_fb);               \
++   } while (0)
++
++#define INSNS                                   \
++   XTEST(e7,c0,vclfp,0);                        \
++   XTEST(e7,c0,vclfp,8);                        \
++   XTEST(e7,c2,vcsfp,0);                        \
++   XTEST(e7,c2,vcsfp,8);
++
++#define XTEST TEST_GENERATE
++INSNS
++#undef XTEST
++
++static void test_all_fp_int_conversions()
++{
++#define XTEST TEST_EXEC
++   INSNS
++#undef XTEST
++}
++
++#undef INSNS
++#undef TEST_EXEC
++#undef TEST_GENERATE
++
++
++int main()
++{
++   test_all_single_bitshifts();
++   test_all_swapped_loads();
++   test_all_swapped_stores();
++   test_all_double_bitshifts();
++   test_all_int_fp_conversions();
++   test_all_fp_int_conversions();
++   return 0;
++}
+diff --git a/none/tests/s390x/vec2.stderr.exp b/none/tests/s390x/vec2.stderr.exp
+new file mode 100644
+index 000000000..139597f9c
+--- /dev/null
++++ b/none/tests/s390x/vec2.stderr.exp
+@@ -0,0 +1,2 @@
++
++
+diff --git a/none/tests/s390x/vec2.stdout.exp b/none/tests/s390x/vec2.stdout.exp
+new file mode 100644
+index 000000000..b32cbe1bc
+--- /dev/null
++++ b/none/tests/s390x/vec2.stdout.exp
+@@ -0,0 +1,168 @@
++vsl
++	483415676abc37ef fde5533beca14200
++	fde5533beca14200 483415676abc37ef
++	00010204102040bf effd7feffebff7fe
++	ffffffffffffffff ffffffffffffff80
++vsrl
++	0012d1679e9af3ef ffdbe5753bcaa164
++	7fdbe5753bcaa164 4012d1679e9af3ef
++	4008014004002004 05fbf7efbf7ffffe
++	03ffffffffffffff ffffffffffffffff
++vsra
++	0012d1679e9af3ef ffdbe5753bcaa164
++	ffdbe5753bcaa164 4012d1679e9af3ef
++	c008014004002004 05fbf7efbf7ffffe
++	ffffffffffffffff ffffffffffffffff
++vlebrh 0
++	2301233445566778 899aabbccddeeff0
++	dcfe233445566778 899aabbccddeeff0
++vlebrh 7
++	0112233445566778 899aabbccdde2301
++	0112233445566778 899aabbccddedcfe
++vlebrh 2
++	0112233423016778 899aabbccddeeff0
++	01122334dcfe6778 899aabbccddeeff0
++vlebrf 0
++	6745230145566778 899aabbccddeeff0
++	98badcfe45566778 899aabbccddeeff0
++vlebrf 3
++	0112233445566778 899aabbc67452301
++	0112233445566778 899aabbc98badcfe
++vlebrf 1
++	0112233467452301 899aabbccddeeff0
++	0112233498badcfe 899aabbccddeeff0
++vlebrg 0
++	efcdab8967452301 899aabbccddeeff0
++	1032547698badcfe 899aabbccddeeff0
++vlebrg 1
++	0112233445566778 efcdab8967452301
++	0112233445566778 1032547698badcfe
++vllebrz 1
++	0000000000002301 0000000000000000
++	000000000000dcfe 0000000000000000
++vllebrz 2
++	0000000067452301 0000000000000000
++	0000000098badcfe 0000000000000000
++vllebrz 3
++	efcdab8967452301 0000000000000000
++	1032547698badcfe 0000000000000000
++vllebrz 6
++	6745230100000000 0000000000000000
++	98badcfe00000000 0000000000000000
++vlbrrep 1
++	2301230123012301 2301230123012301
++	dcfedcfedcfedcfe dcfedcfedcfedcfe
++vlbrrep 2
++	6745230167452301 6745230167452301
++	98badcfe98badcfe 98badcfe98badcfe
++vlbrrep 3
++	efcdab8967452301 efcdab8967452301
++	1032547698badcfe 1032547698badcfe
++vlbr 1
++	23016745ab89efcd dcfe98ba54761032
++	dcfe98ba54761032 23016745ab89efcd
++vlbr 2
++	67452301efcdab89 98badcfe10325476
++	98badcfe10325476 67452301efcdab89
++vlbr 3
++	efcdab8967452301 1032547698badcfe
++	1032547698badcfe efcdab8967452301
++vlbr 4
++	1032547698badcfe efcdab8967452301
++	efcdab8967452301 1032547698badcfe
++vler 1
++	32107654ba98fedc cdef89ab45670123
++	cdef89ab45670123 32107654ba98fedc
++vler 2
++	76543210fedcba98 89abcdef01234567
++	89abcdef01234567 76543210fedcba98
++vler 3
++	fedcba9876543210 0123456789abcdef
++	0123456789abcdef fedcba9876543210
++vstebrh 0
++	2301233445566778 899aabbccddeeff0
++	dcfe233445566778 899aabbccddeeff0
++vstebrh 7
++	1032233445566778 899aabbccddeeff0
++	efcd233445566778 899aabbccddeeff0
++vstebrh 2
++	ab89233445566778 899aabbccddeeff0
++	5476233445566778 899aabbccddeeff0
++vstebrf 0
++	6745230145566778 899aabbccddeeff0
++	98badcfe45566778 899aabbccddeeff0
++vstebrf 3
++	1032547645566778 899aabbccddeeff0
++	efcdab8945566778 899aabbccddeeff0
++vstebrf 1
++	efcdab8945566778 899aabbccddeeff0
++	1032547645566778 899aabbccddeeff0
++vstebrg 0
++	efcdab8967452301 899aabbccddeeff0
++	1032547698badcfe 899aabbccddeeff0
++vstebrg 1
++	1032547698badcfe 899aabbccddeeff0
++	efcdab8967452301 899aabbccddeeff0
++vstbr 1
++	23016745ab89efcd dcfe98ba54761032
++	dcfe98ba54761032 23016745ab89efcd
++vstbr 2
++	67452301efcdab89 98badcfe10325476
++	98badcfe10325476 67452301efcdab89
++vstbr 3
++	efcdab8967452301 1032547698badcfe
++	1032547698badcfe efcdab8967452301
++vstbr 4
++	1032547698badcfe efcdab8967452301
++	efcdab8967452301 1032547698badcfe
++vster 1
++	32107654ba98fedc cdef89ab45670123
++	cdef89ab45670123 32107654ba98fedc
++vster 2
++	76543210fedcba98 89abcdef01234567
++	89abcdef01234567 76543210fedcba98
++vster 3
++	fedcba9876543210 0123456789abcdef
++	0123456789abcdef fedcba9876543210
++vsld 0
++	0123456789abcdef fedcba9876543210
++	fedcba9876543210 0123456789abcdef
++vsld 7
++	91a2b3c4d5e6f7ff 6e5d4c3b2a19087f
++	6e5d4c3b2a190800 91a2b3c4d5e6f780
++vsld 4
++	123456789abcdeff edcba9876543210f
++	edcba98765432100 123456789abcdef0
++vsrd 0
++	ffffffffffffffff ffffffffffffffff
++	0123456789abcdef fedcba9876543210
++vsrd 7
++	21ffffffffffffff ffffffffffffffff
++	de02468acf13579b dffdb97530eca864
++vsrd 4
++	0fffffffffffffff ffffffffffffffff
++	f0123456789abcde ffedcba987654321
++vcfpl 0
++	0x1.234568p+24 0x1.13579cp+31 0x1.fdb976p+31 0x1.d950c8p+30
++	0x1.00804p+31 0x1.00804p+27 0x1.feff8p+30 0x1.eff7fcp+31
++vcfpl 8
++	0x1.234568p+24 - - -
++	0x1.00804p+31 - - -
++vcfps 0
++	0x1.234568p+24 -0x1.d950c8p+30 -0x1.234568p+24 0x1.d950c8p+30
++	-0x1.feff8p+30 0x1.00804p+27 0x1.feff8p+30 -0x1.00804p+27
++vcfps 8
++	0x1.234568p+24 - - -
++	-0x1.feff8p+30 - - -
++vclfp 0
++	00ffffff 00000000 0000002a 00002710
++	00000004 00000003 00000002 00000001
++vclfp 8
++	00ffffff - - -
++	00000004 - - -
++vcsfp 0
++	00ffffff ff000001 0000002a 00002710
++	00000004 00000003 00000002 00000001
++vcsfp 8
++	00ffffff - - -
++	00000004 - - -
+diff --git a/none/tests/s390x/vec2.vgtest b/none/tests/s390x/vec2.vgtest
+new file mode 100644
+index 000000000..45e942e64
+--- /dev/null
++++ b/none/tests/s390x/vec2.vgtest
+@@ -0,0 +1,2 @@
++prog: vec2
++prereq: test -e vec2 && ../../../tests/s390x_features s390x-vx
+diff --git a/tests/s390x_features.c b/tests/s390x_features.c
+index 25b98f3a3..e7939c463 100644
+--- a/tests/s390x_features.c
++++ b/tests/s390x_features.c
+@@ -270,6 +270,10 @@ static int go(char *feature, char *cpu)
+       match = facilities[0] & FAC_BIT(57); /* message security assist 5 facility */
+    } else if (strcmp(feature, "s390x-mi2") == 0 ) {
+       match = facilities[0] & FAC_BIT(58);
++   } else if (strcmp(feature, "s390x-mi3") == 0 ) {
++      match = facilities[0] & FAC_BIT(61);
++   } else if (strcmp(feature, "s390x-vx2") == 0 ) {
++      match = facilities[2] & FAC_BIT(20);
+    } else {
+       return 2;          // Unrecognised feature.
+    }
+-- 
+2.23.0
+
+From d9364bc90ee894c43ee742840f806571edc08ab3 Mon Sep 17 00:00:00 2001
+From: Andreas Arnez <arnez@linux.ibm.com>
+Date: Tue, 18 May 2021 19:59:32 +0200
+Subject: [PATCH 13/13] s390x: Wrap up misc-insn-3 and vec-enh-2 support
+
+Wrap up support for the miscellaneous-instruction-extensions facility 3
+and the vector-enhancements facility 2: Add 'case' statements for the
+remaining unhandled arch13 instructions to 'guest_s390_toIR.c', document
+the new support in 's390-opcodes.csv', adjust 's390-check-opcodes.pl', and
+announce the new feature in 'NEWS'.
+---
+ NEWS                            |  5 ++
+ VEX/priv/guest_s390_toIR.c      |  5 +-
+ auxprogs/s390-check-opcodes.pl  | 22 ++++++++-
+ docs/internals/s390-opcodes.csv | 81 +++++++++++++++++++++++++++++++--
+ 4 files changed, 108 insertions(+), 5 deletions(-)
+
+diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c
+index 46a867475..1bd18f760 100644
+--- a/VEX/priv/guest_s390_toIR.c
++++ b/VEX/priv/guest_s390_toIR.c
+@@ -8,7 +8,7 @@
+    This file is part of Valgrind, a dynamic binary instrumentation
+    framework.
+ 
+-   Copyright IBM Corp. 2010-2020
++   Copyright IBM Corp. 2010-2021
+ 
+    This program is free software; you can redistribute it and/or
+    modify it under the terms of the GNU General Public License as
+@@ -20503,6 +20503,9 @@ s390_decode_4byte_and_irgen(const UChar *bytes)
+                                    RRE_r2(ovl));  goto ok;
+    case 0xb931: s390_format_RRE_RR(s390_irgen_CLGFR, RRE_r1(ovl),
+                                    RRE_r2(ovl));  goto ok;
++   case 0xb938: /* SORTL */ goto unimplemented;
++   case 0xb939: /* DFLTCC */ goto unimplemented;
++   case 0xb93a: /* KDSA */ goto unimplemented;
+    case 0xb93c: s390_format_RRE_RR(s390_irgen_PPNO, RRE_r1(ovl),
+                                    RRE_r2(ovl));  goto ok;
+    case 0xb93e: /* KIMD */ goto unimplemented;
+-- 
+2.23.0
+
diff --git a/valgrind.spec b/valgrind.spec
index df311de..e9a3c40 100644
--- a/valgrind.spec
+++ b/valgrind.spec
@@ -3,7 +3,7 @@
 Summary: Tool for finding memory management bugs in programs
 Name: %{?scl_prefix}valgrind
 Version: 3.17.0
-Release: 4%{?dist}
+Release: 5%{?dist}
 Epoch: 1
 License: GPLv2+
 URL: http://www.valgrind.org/
@@ -116,6 +116,39 @@ Patch9: valgrind-3.17.0-debuginfod.patch
 # KDE#423963 Only process clone results in the parent thread
 Patch10: valgrind-3.17.0-clone-parent-res.patch
 
+# commit d74a637206ef5532ccd2ccb2e31ee2762f184e60
+# Bug 433863 - s390x: Remove memcheck test cases for cs, cds, and csg
+# commit 18ddcc47c951427efd3b790ba2481159b9bd1598
+# s390x: Support "expensive" comparisons Iop_ExpCmpNE32/64
+# commit 5db3f929c43bf46f4707178706cfe90f43acdd19
+# s390x: Add convenience function mkV128()
+# commit e78bd78d3043729033b426218ab8c6dae9c51e96
+# Bug 434296 - s390x: Rework IR conversion of VSTRC, VFAE, and VFEE
+# commit 4f17a067c4f8245c05611d6e8aa36e8841bab376
+# Bug 434296 - s390x: Rework IR conversion of VFENE
+# commit 9bd78ebd8bb5cd4ebb3f081ceba46836cc485551
+# Bug 434296 - s390x: Rework IR conversion of VISTR
+# commit 32312d588b77c5b5b5a0145bb0cc6f795b447790
+# Bug 434296 - s390x: Add memcheck test cases for vector string insns
+# commit a0bb049ace14ab52d386bb1d49a399f39eec4986
+# s390x: Improve handling of amodes without base register
+# commit fd935e238d907d9c523a311ba795077d95ad6912
+# s390x: Rework insn "v-vdup" and add "v-vrep"
+# commit 6c1cb1a0128b00858b973ef9344e12d6ddbaaf57
+# s390x: Add support for emitting "vector or with complement"
+# commit 0bd4263326b2d48f782339a9bbe1a069c7de45c7
+# s390x: Fix/optimize Iop_64HLtoV128
+# commit cae5062b05b95e0303b1122a0ea9aadc197e4f0a
+# s390x: Add missing stdout.exp for vector string memcheck test
+Patch11: valgrind-3.17.0-s390-prep.patch
+
+# KDE#432387 - s390x: z15 instructions support 
+Patch12: valgrind-3.17.0-s390-z15.patch
+
+# commit 124ae6cfa303f0cc71ffd685620cb57c4f8f02bb
+# s390x: Don't emit "vector or with complement" on z13
+Patch13: valgrind-3.17.0-s390-z13-vec-fix.patch
+
 BuildRequires: make
 BuildRequires: glibc-devel
 
@@ -264,6 +297,11 @@ Valgrind User Manual for details.
 %patch9 -p1
 %patch10 -p1
 
+%patch11 -p1
+touch memcheck/tests/s390x/vistr.stdout.exp
+%patch12 -p1
+%patch13 -p1
+
 %build
 # LTO triggers undefined symbols in valgrind.  Valgrind has a --enable-lto
 # configure time option, but that doesn't seem to help.
@@ -488,6 +526,11 @@ fi
 %endif
 
 %changelog
+* Fri Jun 18 2021 Mark Wielaard <mjw@fedoraproject.org> - 3.17.0-5
+- Add valgrind-3.17.0-s390-prep.patch
+- Add valgrind-3.17.0-s390-z15.patch
+- Add valgrind-3.17.0-s390-z13-vec-fix.patch
+
 * Thu Jun  3 2021 Mark Wielaard <mjw@fedoraproject.org> - 3.17.0-4
 - Add valgrind-3.17.0-s390_insn_as_string.patch
 - Add valgrind-3.17.0-debuginfod.patch