b7dbaa
commit d74a637206ef5532ccd2ccb2e31ee2762f184e60
b7dbaa
Author: Andreas Arnez <arnez@linux.ibm.com>
b7dbaa
Date:   Wed Apr 28 18:52:30 2021 +0200
b7dbaa
b7dbaa
    Bug 433863 - s390x: Remove memcheck test cases for cs, cds, and csg
b7dbaa
    
b7dbaa
    The fix for bug 429864 - "s390x: C++ atomic test_and_set yields
b7dbaa
    false-positive memcheck diagnostics" changes the memcheck behavior at
b7dbaa
    various compare-and-swap instructions.  The comparison between the old and
b7dbaa
    expected value now always yields a defined result, even if the input
b7dbaa
    values are (partially) undefined.  However, some existing test cases
b7dbaa
    explicitly verify that memcheck complains about the use of uninitialised
b7dbaa
    values here.  These test cases are no longer valid.  Remove them.
b7dbaa
b7dbaa
diff --git a/memcheck/tests/s390x/Makefile.am b/memcheck/tests/s390x/Makefile.am
b7dbaa
index 67ae8c293..e4e69eb38 100644
b7dbaa
--- a/memcheck/tests/s390x/Makefile.am
b7dbaa
+++ b/memcheck/tests/s390x/Makefile.am
b7dbaa
@@ -2,7 +2,7 @@ include $(top_srcdir)/Makefile.tool-tests.am
b7dbaa
 
b7dbaa
 dist_noinst_SCRIPTS = filter_stderr
b7dbaa
 
b7dbaa
-INSN_TESTS = cs csg cds cdsg cu21 cu42 ltgjhe
b7dbaa
+INSN_TESTS = cdsg cu21 cu42 ltgjhe
b7dbaa
 
b7dbaa
 check_PROGRAMS = $(INSN_TESTS) 
b7dbaa
 
b7dbaa
@@ -14,7 +14,3 @@ EXTRA_DIST = \
b7dbaa
 AM_CFLAGS    += @FLAG_M64@
b7dbaa
 AM_CXXFLAGS  += @FLAG_M64@
b7dbaa
 AM_CCASFLAGS += @FLAG_M64@
b7dbaa
-
b7dbaa
-cs_CFLAGS     = $(AM_CFLAGS) @FLAG_W_NO_UNINITIALIZED@
b7dbaa
-csg_CFLAGS    = $(AM_CFLAGS) @FLAG_W_NO_UNINITIALIZED@
b7dbaa
-cds_CFLAGS    = $(AM_CFLAGS) @FLAG_W_NO_UNINITIALIZED@
b7dbaa
diff --git a/memcheck/tests/s390x/cds.c b/memcheck/tests/s390x/cds.c
b7dbaa
deleted file mode 100644
b7dbaa
index ec5c533e0..000000000
b7dbaa
--- a/memcheck/tests/s390x/cds.c
b7dbaa
+++ /dev/null
b7dbaa
@@ -1,82 +0,0 @@
b7dbaa
-#include <stdint.h>
b7dbaa
-#include <stdio.h>
b7dbaa
-
b7dbaa
-typedef struct {
b7dbaa
-   uint64_t high;
b7dbaa
-   uint64_t low;
b7dbaa
-} quad_word;
b7dbaa
-
b7dbaa
-void 
b7dbaa
-test(quad_word op1_init, uint64_t op2_init, quad_word op3_init)
b7dbaa
-{
b7dbaa
-   int cc; // unused
b7dbaa
-   quad_word op1 = op1_init;
b7dbaa
-   uint64_t  op2 = op2_init;
b7dbaa
-   quad_word op3 = op3_init;
b7dbaa
-
b7dbaa
-   __asm__ volatile (
b7dbaa
-                     "lmg     %%r0,%%r1,%1\n\t"
b7dbaa
-                     "lmg     %%r2,%%r3,%3\n\t"
b7dbaa
-                     "cds     %%r0,%%r2,%2\n\t"  //  cds 1st,3rd,2nd
b7dbaa
-                     "stmg    %%r0,%%r1,%1\n"    // store r0,r1 to op1
b7dbaa
-                     "stmg    %%r2,%%r3,%3\n"    // store r2,r3 to op3
b7dbaa
-                     : "=d" (cc), "+QS" (op1), "+QS" (op2), "+QS" (op3)
b7dbaa
-                     :
b7dbaa
-                     : "r0", "r1", "r2", "r3", "cc");
b7dbaa
-
b7dbaa
-}
b7dbaa
-
b7dbaa
-// Return a quad-word that only bits low[32:63] are undefined
b7dbaa
-quad_word
b7dbaa
-make_undefined(void)
b7dbaa
-{
b7dbaa
-   quad_word val;
b7dbaa
-
b7dbaa
-   val.high = 0;
b7dbaa
-   val.low |= 0xFFFFFFFF00000000ull;
b7dbaa
-
b7dbaa
-   return val;
b7dbaa
-}
b7dbaa
-
b7dbaa
-void op1_undefined(void)
b7dbaa
-{
b7dbaa
-   quad_word op1, op3;
b7dbaa
-   uint64_t op2;
b7dbaa
-
b7dbaa
-   // op1 undefined
b7dbaa
-   op1 = make_undefined();
b7dbaa
-   op2 = 42;
b7dbaa
-   op3.high = op3.low = 0xdeadbeefdeadbabeull;
b7dbaa
-   test(op1, op2, op3);  // complaint
b7dbaa
-}
b7dbaa
-
b7dbaa
-void op2_undefined(void)
b7dbaa
-{
b7dbaa
-   quad_word op1, op3;
b7dbaa
-   uint64_t op2;
b7dbaa
-
b7dbaa
-   op1.high = op1.low = 42;
b7dbaa
-   // op2 undefined
b7dbaa
-   op3.high = op3.low = 0xdeadbeefdeadbabeull;
b7dbaa
-   test(op1, op2, op3);  // complaint
b7dbaa
-}
b7dbaa
-
b7dbaa
-void op3_undefined(void)
b7dbaa
-{
b7dbaa
-   quad_word op1, op3;
b7dbaa
-   uint64_t op2;
b7dbaa
-
b7dbaa
-   op1.high = op1.low = 42;
b7dbaa
-   op2 = 100;
b7dbaa
-   op3 = make_undefined();
b7dbaa
-   test(op1, op2, op3);  // no complaint; op3 is just copied around
b7dbaa
-}
b7dbaa
-
b7dbaa
-int main ()
b7dbaa
-{
b7dbaa
-   op1_undefined();
b7dbaa
-   op2_undefined();
b7dbaa
-   op3_undefined();
b7dbaa
-
b7dbaa
-   return 0;
b7dbaa
-}
b7dbaa
diff --git a/memcheck/tests/s390x/cds.stderr.exp b/memcheck/tests/s390x/cds.stderr.exp
b7dbaa
deleted file mode 100644
b7dbaa
index e72de94c8..000000000
b7dbaa
--- a/memcheck/tests/s390x/cds.stderr.exp
b7dbaa
+++ /dev/null
b7dbaa
@@ -1,10 +0,0 @@
b7dbaa
-Conditional jump or move depends on uninitialised value(s)
b7dbaa
-   at 0x........: test (cds.c:17)
b7dbaa
-   by 0x........: op1_undefined (cds.c:50)
b7dbaa
-   by 0x........: main (cds.c:77)
b7dbaa
-
b7dbaa
-Conditional jump or move depends on uninitialised value(s)
b7dbaa
-   at 0x........: test (cds.c:17)
b7dbaa
-   by 0x........: op2_undefined (cds.c:61)
b7dbaa
-   by 0x........: main (cds.c:78)
b7dbaa
-
b7dbaa
diff --git a/memcheck/tests/s390x/cds.stdout.exp b/memcheck/tests/s390x/cds.stdout.exp
b7dbaa
deleted file mode 100644
b7dbaa
index e69de29bb..000000000
b7dbaa
diff --git a/memcheck/tests/s390x/cds.vgtest b/memcheck/tests/s390x/cds.vgtest
b7dbaa
deleted file mode 100644
b7dbaa
index 5195887e2..000000000
b7dbaa
--- a/memcheck/tests/s390x/cds.vgtest
b7dbaa
+++ /dev/null
b7dbaa
@@ -1,2 +0,0 @@
b7dbaa
-prog: cds
b7dbaa
-vgopts: -q
b7dbaa
diff --git a/memcheck/tests/s390x/cs.c b/memcheck/tests/s390x/cs.c
b7dbaa
deleted file mode 100644
b7dbaa
index 9a298cef9..000000000
b7dbaa
--- a/memcheck/tests/s390x/cs.c
b7dbaa
+++ /dev/null
b7dbaa
@@ -1,32 +0,0 @@
b7dbaa
-#include <stdint.h>
b7dbaa
-#include <stdio.h>
b7dbaa
-#include <string.h>
b7dbaa
-
b7dbaa
-void 
b7dbaa
-test(int32_t op1_init, int32_t op2_init, int32_t op3_init)
b7dbaa
-{
b7dbaa
-   register int32_t op1 asm("8") = op1_init;
b7dbaa
-   register int32_t op3 asm("9") = op3_init;
b7dbaa
-   
b7dbaa
-   int32_t op2 = op2_init;
b7dbaa
-   int cc = 1; 
b7dbaa
-
b7dbaa
-   __asm__ volatile (
b7dbaa
-           "cs      8,9,%1\n\t"
b7dbaa
-           "ipm     %0\n\t"
b7dbaa
-           "srl     %0,28\n\t"
b7dbaa
-           : "=d" (cc), "+Q" (op2), "+d"(op1), "+d"(op3)
b7dbaa
-           : 
b7dbaa
-           : "cc");
b7dbaa
-}
b7dbaa
-
b7dbaa
-int main ()
b7dbaa
-{
b7dbaa
-   int op1, op2, op3;
b7dbaa
-
b7dbaa
-   test(op1, 0x10000000, 0x12345678);   // complaint
b7dbaa
-   test(0x10000000, op2, 0x12345678);   // complaint
b7dbaa
-   test(0x10000000, 0x01000000, op3);   // no complaint
b7dbaa
-
b7dbaa
-   return 0;
b7dbaa
-}
b7dbaa
diff --git a/memcheck/tests/s390x/cs.stderr.exp b/memcheck/tests/s390x/cs.stderr.exp
b7dbaa
deleted file mode 100644
b7dbaa
index e45dc99cd..000000000
b7dbaa
--- a/memcheck/tests/s390x/cs.stderr.exp
b7dbaa
+++ /dev/null
b7dbaa
@@ -1,8 +0,0 @@
b7dbaa
-Conditional jump or move depends on uninitialised value(s)
b7dbaa
-   at 0x........: test (cs.c:14)
b7dbaa
-   by 0x........: main (cs.c:27)
b7dbaa
-
b7dbaa
-Conditional jump or move depends on uninitialised value(s)
b7dbaa
-   at 0x........: test (cs.c:14)
b7dbaa
-   by 0x........: main (cs.c:28)
b7dbaa
-
b7dbaa
diff --git a/memcheck/tests/s390x/cs.stdout.exp b/memcheck/tests/s390x/cs.stdout.exp
b7dbaa
deleted file mode 100644
b7dbaa
index e69de29bb..000000000
b7dbaa
diff --git a/memcheck/tests/s390x/cs.vgtest b/memcheck/tests/s390x/cs.vgtest
b7dbaa
deleted file mode 100644
b7dbaa
index 323cce80c..000000000
b7dbaa
--- a/memcheck/tests/s390x/cs.vgtest
b7dbaa
+++ /dev/null
b7dbaa
@@ -1,2 +0,0 @@
b7dbaa
-prog: cs
b7dbaa
-vgopts: -q
b7dbaa
diff --git a/memcheck/tests/s390x/csg.c b/memcheck/tests/s390x/csg.c
b7dbaa
deleted file mode 100644
b7dbaa
index 7f9d8c88e..000000000
b7dbaa
--- a/memcheck/tests/s390x/csg.c
b7dbaa
+++ /dev/null
b7dbaa
@@ -1,32 +0,0 @@
b7dbaa
-#include <stdint.h>
b7dbaa
-#include <stdio.h>
b7dbaa
-#include <string.h>
b7dbaa
-
b7dbaa
-void 
b7dbaa
-test(int64_t op1_init, int64_t op2_init, int64_t op3_init)
b7dbaa
-{
b7dbaa
-   register int64_t op1 asm("8") = op1_init;
b7dbaa
-   register int64_t op3 asm("9") = op3_init;
b7dbaa
-   
b7dbaa
-   int64_t op2 = op2_init;
b7dbaa
-   int cc = 1; 
b7dbaa
-
b7dbaa
-   __asm__ volatile (
b7dbaa
-           "csg     8,9,%1\n\t"
b7dbaa
-           "ipm     %0\n\t"
b7dbaa
-           "srl     %0,28\n\t"
b7dbaa
-           : "=d" (cc), "+Q" (op2), "+d"(op1), "+d"(op3)
b7dbaa
-           : 
b7dbaa
-           : "cc");
b7dbaa
-}
b7dbaa
-
b7dbaa
-int main ()
b7dbaa
-{
b7dbaa
-   int64_t op1, op2, op3;
b7dbaa
-
b7dbaa
-   test(op1, 0x1000000000000000ull, 0x1234567887654321ull);  // complaint
b7dbaa
-   test(0x1000000000000000ull, op2, 0x1234567887654321ull);  // complaint
b7dbaa
-   test(0x1000000000000000ull, 0x1000000000000000ull, op3);  // no complaint
b7dbaa
-
b7dbaa
-   return 0;
b7dbaa
-}
b7dbaa
diff --git a/memcheck/tests/s390x/csg.stderr.exp b/memcheck/tests/s390x/csg.stderr.exp
b7dbaa
deleted file mode 100644
b7dbaa
index fda2021ce..000000000
b7dbaa
--- a/memcheck/tests/s390x/csg.stderr.exp
b7dbaa
+++ /dev/null
b7dbaa
@@ -1,8 +0,0 @@
b7dbaa
-Conditional jump or move depends on uninitialised value(s)
b7dbaa
-   at 0x........: test (csg.c:14)
b7dbaa
-   by 0x........: main (csg.c:27)
b7dbaa
-
b7dbaa
-Conditional jump or move depends on uninitialised value(s)
b7dbaa
-   at 0x........: test (csg.c:14)
b7dbaa
-   by 0x........: main (csg.c:28)
b7dbaa
-
b7dbaa
diff --git a/memcheck/tests/s390x/csg.stdout.exp b/memcheck/tests/s390x/csg.stdout.exp
b7dbaa
deleted file mode 100644
b7dbaa
index e69de29bb..000000000
b7dbaa
diff --git a/memcheck/tests/s390x/csg.vgtest b/memcheck/tests/s390x/csg.vgtest
b7dbaa
deleted file mode 100644
b7dbaa
index 6de75c1d6..000000000
b7dbaa
--- a/memcheck/tests/s390x/csg.vgtest
b7dbaa
+++ /dev/null
b7dbaa
@@ -1,2 +0,0 @@
b7dbaa
-prog: csg
b7dbaa
-vgopts: -q
b7dbaa
b7dbaa
commit 18ddcc47c951427efd3b790ba2481159b9bd1598
b7dbaa
Author: Andreas Arnez <arnez@linux.ibm.com>
b7dbaa
Date:   Wed Apr 7 16:48:29 2021 +0200
b7dbaa
b7dbaa
    s390x: Support "expensive" comparisons Iop_ExpCmpNE32/64
b7dbaa
    
b7dbaa
    Add support for Iop_ExpCmpNE32 and Iop_ExpCmpNE64 in the s390x instruction
b7dbaa
    selector.  Handle them exactly like the "inexpensive" variants Iop_CmpNE32
b7dbaa
    and Iop_CmpNE64.
b7dbaa
b7dbaa
diff --git a/VEX/priv/host_s390_isel.c b/VEX/priv/host_s390_isel.c
b7dbaa
index 2000ec224..5f79280c0 100644
b7dbaa
--- a/VEX/priv/host_s390_isel.c
b7dbaa
+++ b/VEX/priv/host_s390_isel.c
b7dbaa
@@ -3611,6 +3611,8 @@ s390_isel_cc(ISelEnv *env, IRExpr *cond)
b7dbaa
 
b7dbaa
       case Iop_CmpNE32:
b7dbaa
       case Iop_CmpNE64:
b7dbaa
+      case Iop_ExpCmpNE32:
b7dbaa
+      case Iop_ExpCmpNE64:
b7dbaa
       case Iop_CasCmpNE32:
b7dbaa
       case Iop_CasCmpNE64:
b7dbaa
          result = S390_CC_NE;
b7dbaa
b7dbaa
commit 5db3f929c43bf46f4707178706cfe90f43acdd19
b7dbaa
Author: Andreas Arnez <arnez@linux.ibm.com>
b7dbaa
Date:   Wed Apr 7 12:30:20 2021 +0200
b7dbaa
b7dbaa
    s390x: Add convenience function mkV128()
b7dbaa
    
b7dbaa
    Provide mkV128() as a short-hand notation for creating a vector constant from
b7dbaa
    a bit pattern, similar to other such functions like mkU64().
b7dbaa
b7dbaa
diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c
b7dbaa
index 339377007..7d54cb551 100644
b7dbaa
--- a/VEX/priv/guest_s390_toIR.c
b7dbaa
+++ b/VEX/priv/guest_s390_toIR.c
b7dbaa
@@ -376,6 +376,13 @@ mkU64(ULong value)
b7dbaa
    return IRExpr_Const(IRConst_U64(value));
b7dbaa
 }
b7dbaa
 
b7dbaa
+/* Create an expression node for a 128-bit vector constant */
b7dbaa
+static __inline__ IRExpr *
b7dbaa
+mkV128(UShort value)
b7dbaa
+{
b7dbaa
+   return IRExpr_Const(IRConst_V128(value));
b7dbaa
+}
b7dbaa
+
b7dbaa
 /* Create an expression node for a 32-bit floating point constant
b7dbaa
    whose value is given by a bit pattern. */
b7dbaa
 static __inline__ IRExpr *
b7dbaa
@@ -16249,7 +16256,7 @@ s390_irgen_VLGV(UChar r1, IRTemp op2addr, UChar v3, UChar m4)
b7dbaa
 static const HChar *
b7dbaa
 s390_irgen_VGBM(UChar v1, UShort i2, UChar m3 __attribute__((unused)))
b7dbaa
 {
b7dbaa
-   put_vr_qw(v1, IRExpr_Const(IRConst_V128(i2)));
b7dbaa
+   put_vr_qw(v1, mkV128(i2));
b7dbaa
 
b7dbaa
    return "vgbm";
b7dbaa
 }
b7dbaa
@@ -18160,11 +18167,11 @@ s390_irgen_VSUM(UChar v1, UChar v2, UChar v3, UChar m4)
b7dbaa
    switch(type) {
b7dbaa
    case Ity_I8:
b7dbaa
       sum = unop(Iop_PwAddL16Ux8, unop(Iop_PwAddL8Ux16, get_vr_qw(v2)));
b7dbaa
-      mask = IRExpr_Const(IRConst_V128(0b0001000100010001));
b7dbaa
+      mask = mkV128(0b0001000100010001);
b7dbaa
       break;
b7dbaa
    case Ity_I16:
b7dbaa
       sum = unop(Iop_PwAddL16Ux8, get_vr_qw(v2));
b7dbaa
-      mask = IRExpr_Const(IRConst_V128(0b0011001100110011));
b7dbaa
+      mask = mkV128(0b0011001100110011);
b7dbaa
       break;
b7dbaa
    default:
b7dbaa
       vpanic("s390_irgen_VSUM: invalid type ");
b7dbaa
@@ -18185,11 +18192,11 @@ s390_irgen_VSUMG(UChar v1, UChar v2, UChar v3, UChar m4)
b7dbaa
    switch(type) {
b7dbaa
    case Ity_I16:
b7dbaa
       sum = unop(Iop_PwAddL32Ux4, unop(Iop_PwAddL16Ux8, get_vr_qw(v2)));
b7dbaa
-      mask = IRExpr_Const(IRConst_V128(0b0000001100000011));
b7dbaa
+      mask = mkV128(0b0000001100000011);
b7dbaa
       break;
b7dbaa
    case Ity_I32:
b7dbaa
       sum = unop(Iop_PwAddL32Ux4, get_vr_qw(v2));
b7dbaa
-      mask = IRExpr_Const(IRConst_V128(0b0000111100001111));
b7dbaa
+      mask = mkV128(0b0000111100001111);
b7dbaa
       break;
b7dbaa
    default:
b7dbaa
       vpanic("s390_irgen_VSUMG: invalid type ");
b7dbaa
@@ -18210,11 +18217,11 @@ s390_irgen_VSUMQ(UChar v1, UChar v2, UChar v3, UChar m4)
b7dbaa
    switch(type) {
b7dbaa
    case Ity_I32:
b7dbaa
       sum = unop(Iop_PwAddL64Ux2, unop(Iop_PwAddL32Ux4, get_vr_qw(v2)));
b7dbaa
-      mask = IRExpr_Const(IRConst_V128(0b0000000000001111));
b7dbaa
+      mask = mkV128(0b0000000000001111);
b7dbaa
       break;
b7dbaa
    case Ity_I64:
b7dbaa
       sum = unop(Iop_PwAddL64Ux2, get_vr_qw(v2));
b7dbaa
-      mask = IRExpr_Const(IRConst_V128(0b0000000011111111));
b7dbaa
+      mask = mkV128(0b0000000011111111);
b7dbaa
       break;
b7dbaa
    default:
b7dbaa
       vpanic("s390_irgen_VSUMQ: invalid type ");
b7dbaa
@@ -18943,8 +18950,8 @@ s390_irgen_VFCx(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5, UChar m6,
b7dbaa
          assign(cond, binop(Iop_CmpEQ32, mkexpr(result), mkU32(cmp)));
b7dbaa
       }
b7dbaa
       put_vr_qw(v1, mkite(mkexpr(cond),
b7dbaa
-                          IRExpr_Const(IRConst_V128(0xffff)),
b7dbaa
-                          IRExpr_Const(IRConst_V128(0))));
b7dbaa
+                          mkV128(0xffff),
b7dbaa
+                          mkV128(0)));
b7dbaa
       if (s390_vr_is_cs_set(m6)) {
b7dbaa
          IRTemp cc = newTemp(Ity_I64);
b7dbaa
          assign(cc, mkite(mkexpr(cond), mkU64(0), mkU64(3)));
b7dbaa
b7dbaa
commit e78bd78d3043729033b426218ab8c6dae9c51e96
b7dbaa
Author: Andreas Arnez <arnez@linux.ibm.com>
b7dbaa
Date:   Thu Mar 18 18:01:10 2021 +0100
b7dbaa
b7dbaa
    Bug 434296 - s390x: Rework IR conversion of VSTRC, VFAE, and VFEE
b7dbaa
    
b7dbaa
    The z/Architecture instructions "vector string range compare" (VSTRC),
b7dbaa
    "vector find any element equal" (VFAE), and "vector find element
b7dbaa
    equal" (VFEE) are each implemented with a dirty helper that executes the
b7dbaa
    instruction.  Unfortunately this approach leads to memcheck false
b7dbaa
    positives, because these instructions may yield a defined result even if
b7dbaa
    parts of the input vectors are undefined.  There are multiple ways this
b7dbaa
    can happen: Wherever the flags in the fourth operand to VSTRC indicate
b7dbaa
    "match always" or "match never", the corresponding elements in the third
b7dbaa
    operand don't affect the result.  The same is true for the elements
b7dbaa
    following the first zero-element in the second operand if the ZS flag is
b7dbaa
    set, or for the elements following the first matching element, if any.
b7dbaa
    
b7dbaa
    Re-implement the instructions without dirty helpers and transform into
b7dbaa
    lengthy IR instead.
b7dbaa
b7dbaa
diff --git a/VEX/priv/guest_s390_defs.h b/VEX/priv/guest_s390_defs.h
b7dbaa
index 905429015..49b6cd5dd 100644
b7dbaa
--- a/VEX/priv/guest_s390_defs.h
b7dbaa
+++ b/VEX/priv/guest_s390_defs.h
b7dbaa
@@ -265,11 +265,8 @@ typedef enum {
b7dbaa
    S390_VEC_OP_INVALID = 0,
b7dbaa
    S390_VEC_OP_VPKS,
b7dbaa
    S390_VEC_OP_VPKLS,
b7dbaa
-   S390_VEC_OP_VFAE,
b7dbaa
-   S390_VEC_OP_VFEE,
b7dbaa
    S390_VEC_OP_VFENE,
b7dbaa
    S390_VEC_OP_VISTR,
b7dbaa
-   S390_VEC_OP_VSTRC,
b7dbaa
    S390_VEC_OP_VCEQ,
b7dbaa
    S390_VEC_OP_VTM,
b7dbaa
    S390_VEC_OP_VGFM,
b7dbaa
diff --git a/VEX/priv/guest_s390_helpers.c b/VEX/priv/guest_s390_helpers.c
b7dbaa
index b71b621ae..63d2e8ce5 100644
b7dbaa
--- a/VEX/priv/guest_s390_helpers.c
b7dbaa
+++ b/VEX/priv/guest_s390_helpers.c
b7dbaa
@@ -2538,11 +2538,8 @@ s390x_dirtyhelper_vec_op(VexGuestS390XState *guest_state,
b7dbaa
       {0x00, 0x00}, /* invalid */
b7dbaa
       [S390_VEC_OP_VPKS]  = {0xe7, 0x97},
b7dbaa
       [S390_VEC_OP_VPKLS] = {0xe7, 0x95},
b7dbaa
-      [S390_VEC_OP_VFAE]  = {0xe7, 0x82},
b7dbaa
-      [S390_VEC_OP_VFEE]  = {0xe7, 0x80},
b7dbaa
       [S390_VEC_OP_VFENE] = {0xe7, 0x81},
b7dbaa
       [S390_VEC_OP_VISTR] = {0xe7, 0x5c},
b7dbaa
-      [S390_VEC_OP_VSTRC] = {0xe7, 0x8a},
b7dbaa
       [S390_VEC_OP_VCEQ]  = {0xe7, 0xf8},
b7dbaa
       [S390_VEC_OP_VTM]   = {0xe7, 0xd8},
b7dbaa
       [S390_VEC_OP_VGFM]  = {0xe7, 0xb4},
b7dbaa
@@ -2630,8 +2627,6 @@ s390x_dirtyhelper_vec_op(VexGuestS390XState *guest_state,
b7dbaa
 
b7dbaa
    case S390_VEC_OP_VPKS:
b7dbaa
    case S390_VEC_OP_VPKLS:
b7dbaa
-   case S390_VEC_OP_VFAE:
b7dbaa
-   case S390_VEC_OP_VFEE:
b7dbaa
    case S390_VEC_OP_VFENE:
b7dbaa
    case S390_VEC_OP_VCEQ:
b7dbaa
    case S390_VEC_OP_VGFM:
b7dbaa
@@ -2645,7 +2640,6 @@ s390x_dirtyhelper_vec_op(VexGuestS390XState *guest_state,
b7dbaa
       the_insn.VRR.m5 = d->m5;
b7dbaa
       break;
b7dbaa
 
b7dbaa
-   case S390_VEC_OP_VSTRC:
b7dbaa
    case S390_VEC_OP_VGFMA:
b7dbaa
    case S390_VEC_OP_VMAH:
b7dbaa
    case S390_VEC_OP_VMALH:
b7dbaa
diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c
b7dbaa
index 7d54cb551..26a947813 100644
b7dbaa
--- a/VEX/priv/guest_s390_toIR.c
b7dbaa
+++ b/VEX/priv/guest_s390_toIR.c
b7dbaa
@@ -17156,90 +17156,205 @@ s390_irgen_PPNO(UChar r1, UChar r2)
b7dbaa
    return "ppno";
b7dbaa
 }
b7dbaa
 
b7dbaa
-static const HChar *
b7dbaa
-s390_irgen_VFAE(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5)
b7dbaa
-{
b7dbaa
-   IRDirty* d;
b7dbaa
-   IRTemp cc = newTemp(Ity_I64);
b7dbaa
+enum s390_VStrX {
b7dbaa
+   s390_VStrX_VSTRC,
b7dbaa
+   s390_VStrX_VFAE,
b7dbaa
+   s390_VStrX_VFEE
b7dbaa
+};
b7dbaa
 
b7dbaa
-   /* Check for specification exception */
b7dbaa
-   vassert(m4 < 3);
b7dbaa
+#define S390_VEC_OP3(m, op0, op1, op2)                                  \
b7dbaa
+   (m) == 0 ? op0 : (m) == 1 ? op1 : (m) == 2 ? op2 : Iop_INVALID;
b7dbaa
 
b7dbaa
-   s390x_vec_op_details_t details = { .serialized = 0ULL };
b7dbaa
-   details.op = S390_VEC_OP_VFAE;
b7dbaa
-   details.v1 = v1;
b7dbaa
-   details.v2 = v2;
b7dbaa
-   details.v3 = v3;
b7dbaa
-   details.m4 = m4;
b7dbaa
-   details.m5 = m5;
b7dbaa
-
b7dbaa
-   d = unsafeIRDirty_1_N(cc, 0, "s390x_dirtyhelper_vec_op",
b7dbaa
-                         &s390x_dirtyhelper_vec_op,
b7dbaa
-                         mkIRExprVec_2(IRExpr_GSPTR(),
b7dbaa
-                                       mkU64(details.serialized)));
b7dbaa
+/* Helper function for transforming VSTRC, VFAE, or VFEE.  These instructions
b7dbaa
+   share much of the same logic. */
b7dbaa
+static void
b7dbaa
+s390_irgen_VStrX(UChar v1, UChar v2, UChar v3, UChar v4, UChar m5,
b7dbaa
+                 UChar m6, enum s390_VStrX which_insn)
b7dbaa
+{
b7dbaa
+   IRTemp op2 = newTemp(Ity_V128);
b7dbaa
+   IRTemp op3 = newTemp(Ity_V128);
b7dbaa
+   IRExpr* tmp;
b7dbaa
+   IRExpr* match = NULL;
b7dbaa
+   UChar bitwidth = 8 << m5;
b7dbaa
+   UChar n_elem = 16 >> m5;
b7dbaa
+   IROp sub_op = S390_VEC_OP3(m5, Iop_Sub8x16, Iop_Sub16x8, Iop_Sub32x4);
b7dbaa
+   IROp sar_op = S390_VEC_OP3(m5, Iop_SarN8x16, Iop_SarN16x8, Iop_SarN32x4);
b7dbaa
+   IROp shl_op = S390_VEC_OP3(m5, Iop_ShlN8x16, Iop_ShlN16x8, Iop_ShlN32x4);
b7dbaa
+   IROp dup_op = S390_VEC_OP3(m5, Iop_Dup8x16, Iop_Dup16x8, Iop_Dup32x4);
b7dbaa
+   IROp cmpeq_op = S390_VEC_OP3(m5, Iop_CmpEQ8x16,
b7dbaa
+                                    Iop_CmpEQ16x8, Iop_CmpEQ32x4);
b7dbaa
+   IROp cmpgt_op = S390_VEC_OP3(m5, Iop_CmpGT8Ux16,
b7dbaa
+                                    Iop_CmpGT16Ux8, Iop_CmpGT32Ux4);
b7dbaa
+   IROp getelem_op = S390_VEC_OP3(m5, Iop_GetElem8x16,
b7dbaa
+                                      Iop_GetElem16x8, Iop_GetElem32x4);
b7dbaa
+
b7dbaa
+   assign(op2, get_vr_qw(v2));
b7dbaa
+   assign(op3, get_vr_qw(v3));
b7dbaa
+
b7dbaa
+   switch (which_insn) {
b7dbaa
+
b7dbaa
+   case s390_VStrX_VSTRC: {
b7dbaa
+      IRTemp op4 = newTemp(Ity_V128);
b7dbaa
+      assign(op4, get_vr_qw(v4));
b7dbaa
+
b7dbaa
+      /* Mask off insignificant range boundaries from op3, i.e., all those for
b7dbaa
+         which the corresponding field in op4 has all or no bits set ("match
b7dbaa
+         always" / "match never"). */
b7dbaa
+      IRTemp bounds = newTemp(Ity_V128);
b7dbaa
+      tmp = unop(Iop_NotV128,
b7dbaa
+                 binop(cmpeq_op, mkV128(0),
b7dbaa
+                       binop(sar_op,
b7dbaa
+                             binop(sub_op,
b7dbaa
+                                   binop(sar_op, mkexpr(op4),
b7dbaa
+                                         mkU8(bitwidth - 3)),
b7dbaa
+                                   mkV128(-1)),
b7dbaa
+                             mkU8(1))));
b7dbaa
+      assign(bounds, binop(Iop_AndV128, mkexpr(op3), tmp));
b7dbaa
+
b7dbaa
+      IRTemp flags_eq = newTemp(Ity_V128);
b7dbaa
+      IRTemp flags_lt = newTemp(Ity_V128);
b7dbaa
+      IRTemp flags_gt = newTemp(Ity_V128);
b7dbaa
+      assign(flags_eq, binop(sar_op, mkexpr(op4), mkU8(bitwidth - 1)));
b7dbaa
+      assign(flags_lt, binop(sar_op, binop(shl_op, mkexpr(op4), mkU8(1)),
b7dbaa
+                             mkU8(bitwidth - 1)));
b7dbaa
+      assign(flags_gt, binop(sar_op, binop(shl_op, mkexpr(op4), mkU8(2)),
b7dbaa
+                             mkU8(bitwidth - 1)));
b7dbaa
+
b7dbaa
+      for (UChar idx = 0; idx < n_elem; idx += 2) {
b7dbaa
+         /* Match according to the even/odd pairs in op3 and op4 at idx */
b7dbaa
+         IRTemp part[2];
b7dbaa
+
b7dbaa
+         for (UChar j = 0; j < 2; j++) {
b7dbaa
+            IRTemp a = newTemp(Ity_V128);
b7dbaa
+            assign(a, unop(dup_op,
b7dbaa
+                           binop(getelem_op, mkexpr(bounds), mkU8(idx + j))));
b7dbaa
+
b7dbaa
+            IRExpr* m[] = {
b7dbaa
+               binop(cmpeq_op, mkexpr(op2), mkexpr(a)),
b7dbaa
+               binop(cmpgt_op, mkexpr(a), mkexpr(op2)),
b7dbaa
+               binop(cmpgt_op, mkexpr(op2), mkexpr(a))
b7dbaa
+            };
b7dbaa
+            IRExpr* f[] = {
b7dbaa
+               unop(dup_op, binop(getelem_op, mkexpr(flags_eq), mkU8(idx + j))),
b7dbaa
+               unop(dup_op, binop(getelem_op, mkexpr(flags_lt), mkU8(idx + j))),
b7dbaa
+               unop(dup_op, binop(getelem_op, mkexpr(flags_gt), mkU8(idx + j)))
b7dbaa
+            };
b7dbaa
+            part[j] = newTemp(Ity_V128);
b7dbaa
+            assign(part[j], binop(Iop_OrV128,
b7dbaa
+                                  binop(Iop_OrV128,
b7dbaa
+                                        binop(Iop_AndV128, f[0], m[0]),
b7dbaa
+                                        binop(Iop_AndV128, f[1], m[1])),
b7dbaa
+                                  binop(Iop_AndV128, f[2], m[2])));
b7dbaa
+         }
b7dbaa
+         tmp = binop(Iop_AndV128, mkexpr(part[0]), mkexpr(part[1]));
b7dbaa
+         match = idx == 0 ? tmp : binop(Iop_OrV128, match, tmp);
b7dbaa
+      }
b7dbaa
+      break;
b7dbaa
+   }
b7dbaa
 
b7dbaa
-   d->nFxState = 3;
b7dbaa
-   vex_bzero(&d->fxState, sizeof(d->fxState));
b7dbaa
-   d->fxState[0].fx     = Ifx_Read;
b7dbaa
-   d->fxState[0].offset = S390X_GUEST_OFFSET(guest_v0) + v2 * sizeof(V128);
b7dbaa
-   d->fxState[0].size   = sizeof(V128);
b7dbaa
-   d->fxState[1].fx     = Ifx_Read;
b7dbaa
-   d->fxState[1].offset = S390X_GUEST_OFFSET(guest_v0) + v3 * sizeof(V128);
b7dbaa
-   d->fxState[1].size   = sizeof(V128);
b7dbaa
-   d->fxState[2].fx     = Ifx_Write;
b7dbaa
-   d->fxState[2].offset = S390X_GUEST_OFFSET(guest_v0) + v1 * sizeof(V128);
b7dbaa
-   d->fxState[2].size   = sizeof(V128);
b7dbaa
+   case s390_VStrX_VFAE:
b7dbaa
+      for (UChar idx = 0; idx < n_elem; idx++) {
b7dbaa
+         IRTemp a = newTemp(Ity_V128);
b7dbaa
+         assign(a, binop(cmpeq_op, mkexpr(op2),
b7dbaa
+                         unop(dup_op,
b7dbaa
+                              binop(getelem_op, mkexpr(op3), mkU8(idx)))));
b7dbaa
+         match = idx == 0 ? mkexpr(a) : binop(Iop_OrV128, match, mkexpr(a));
b7dbaa
+      }
b7dbaa
+      break;
b7dbaa
 
b7dbaa
-   stmt(IRStmt_Dirty(d));
b7dbaa
+   case s390_VStrX_VFEE:
b7dbaa
+      match = binop(cmpeq_op, mkexpr(op2), mkexpr(op3));
b7dbaa
+      break;
b7dbaa
 
b7dbaa
-   if (s390_vr_is_cs_set(m5)) {
b7dbaa
-      s390_cc_set(cc);
b7dbaa
+   default:
b7dbaa
+      vpanic("s390_irgen_VStrX: unknown insn");
b7dbaa
    }
b7dbaa
 
b7dbaa
-   return "vfae";
b7dbaa
-}
b7dbaa
+   /* Invert first intermediate result if requested */
b7dbaa
+   if (m6 & 8)
b7dbaa
+      match = unop(Iop_NotV128, match);
b7dbaa
 
b7dbaa
-static const HChar *
b7dbaa
-s390_irgen_VFEE(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5)
b7dbaa
-{
b7dbaa
-   IRDirty* d;
b7dbaa
-   IRTemp cc = newTemp(Ity_I64);
b7dbaa
+   IRTemp inter1 = newTemp(Ity_V128);
b7dbaa
+   IRTemp inter2 = newTemp(Ity_V128);
b7dbaa
+   IRTemp accu = newTemp(Ity_V128);
b7dbaa
+   assign(inter1, match);
b7dbaa
 
b7dbaa
-   /* Check for specification exception */
b7dbaa
-   vassert(m4 < 3);
b7dbaa
-   vassert((m5 & 0b1100) == 0);
b7dbaa
+   /* Determine second intermediate and accumulated result */
b7dbaa
+   if (s390_vr_is_zs_set(m6)) {
b7dbaa
+      assign(inter2, binop(cmpeq_op, mkexpr(op2), mkV128(0)));
b7dbaa
+      assign(accu, binop(Iop_OrV128, mkexpr(inter1), mkexpr(inter2)));
b7dbaa
+   } else {
b7dbaa
+      assign(inter2, mkV128(0));
b7dbaa
+      assign(accu, mkexpr(inter1));
b7dbaa
+   }
b7dbaa
 
b7dbaa
-   s390x_vec_op_details_t details = { .serialized = 0ULL };
b7dbaa
-   details.op = S390_VEC_OP_VFEE;
b7dbaa
-   details.v1 = v1;
b7dbaa
-   details.v2 = v2;
b7dbaa
-   details.v3 = v3;
b7dbaa
-   details.m4 = m4;
b7dbaa
-   details.m5 = m5;
b7dbaa
+   IRTemp accu0 = newTemp(Ity_I64);
b7dbaa
+   IRTemp is_match0 = newTemp(Ity_I1);
b7dbaa
+   IRTemp mismatch_bits = newTemp(Ity_I64);
b7dbaa
 
b7dbaa
-   d = unsafeIRDirty_1_N(cc, 0, "s390x_dirtyhelper_vec_op",
b7dbaa
-                         &s390x_dirtyhelper_vec_op,
b7dbaa
-                         mkIRExprVec_2(IRExpr_GSPTR(),
b7dbaa
-                                       mkU64(details.serialized)));
b7dbaa
+   assign(accu0, unop(Iop_V128HIto64, mkexpr(accu)));
b7dbaa
+   assign(is_match0, binop(Iop_ExpCmpNE64, mkexpr(accu0), mkU64(0)));
b7dbaa
+   assign(mismatch_bits, unop(Iop_ClzNat64,
b7dbaa
+                              mkite(mkexpr(is_match0), mkexpr(accu0),
b7dbaa
+                                    unop(Iop_V128to64, mkexpr(accu)))));
b7dbaa
 
b7dbaa
-   d->nFxState = 3;
b7dbaa
-   vex_bzero(&d->fxState, sizeof(d->fxState));
b7dbaa
-   d->fxState[0].fx     = Ifx_Read;
b7dbaa
-   d->fxState[0].offset = S390X_GUEST_OFFSET(guest_v0) + v2 * sizeof(V128);
b7dbaa
-   d->fxState[0].size   = sizeof(V128);
b7dbaa
-   d->fxState[1].fx     = Ifx_Read;
b7dbaa
-   d->fxState[1].offset = S390X_GUEST_OFFSET(guest_v0) + v3 * sizeof(V128);
b7dbaa
-   d->fxState[1].size   = sizeof(V128);
b7dbaa
-   d->fxState[2].fx     = Ifx_Write;
b7dbaa
-   d->fxState[2].offset = S390X_GUEST_OFFSET(guest_v0) + v1 * sizeof(V128);
b7dbaa
-   d->fxState[2].size   = sizeof(V128);
b7dbaa
+   if (m6 & 4) {
b7dbaa
+      put_vr_qw(v1, mkexpr(inter1));
b7dbaa
+   } else {
b7dbaa
+      /* Determine byte position of first match */
b7dbaa
+      tmp = binop(Iop_Add64,
b7dbaa
+                  binop(Iop_Shr64, mkexpr(mismatch_bits), mkU8(3)),
b7dbaa
+                  mkite(mkexpr(is_match0), mkU64(0), mkU64(8)));
b7dbaa
+      put_vr_qw(v1, binop(Iop_64HLtoV128, tmp, mkU64(0)));
b7dbaa
+   }
b7dbaa
 
b7dbaa
-   stmt(IRStmt_Dirty(d));
b7dbaa
+   if (s390_vr_is_cs_set(m6)) {
b7dbaa
+      /* Set condition code depending on...
b7dbaa
+                   zero found
b7dbaa
+                      n  y
b7dbaa
+                    +------
b7dbaa
+         match    n | 3  0
b7dbaa
+          found   y | 1  2   */
b7dbaa
 
b7dbaa
-   if (s390_vr_is_cs_set(m5)) {
b7dbaa
+      IRTemp cc = newTemp(Ity_I64);
b7dbaa
+
b7dbaa
+      tmp = binop(Iop_Shr64,
b7dbaa
+                  mkite(mkexpr(is_match0),
b7dbaa
+                        unop(Iop_V128HIto64, mkexpr(inter1)),
b7dbaa
+                        unop(Iop_V128to64, mkexpr(inter1))),
b7dbaa
+                  unop(Iop_64to8,
b7dbaa
+                       binop(Iop_Sub64, mkU64(63), mkexpr(mismatch_bits))));
b7dbaa
+      tmp = binop(Iop_Shl64, tmp, mkU8(1));
b7dbaa
+      if (s390_vr_is_zs_set(m6)) {
b7dbaa
+         tmp = binop(Iop_Xor64, tmp,
b7dbaa
+                     mkite(binop(Iop_ExpCmpNE64, mkU64(0),
b7dbaa
+                                 binop(Iop_Or64,
b7dbaa
+                                       unop(Iop_V128HIto64, mkexpr(inter2)),
b7dbaa
+                                       unop(Iop_V128to64, mkexpr(inter2)))),
b7dbaa
+                           mkU64(0),
b7dbaa
+                           mkU64(3)));
b7dbaa
+      } else {
b7dbaa
+         tmp = binop(Iop_Xor64, tmp, mkU64(3));
b7dbaa
+      }
b7dbaa
+      assign(cc, tmp);
b7dbaa
       s390_cc_set(cc);
b7dbaa
    }
b7dbaa
+   dis_res->hint = Dis_HintVerbose;
b7dbaa
+}
b7dbaa
 
b7dbaa
+static const HChar *
b7dbaa
+s390_irgen_VFAE(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5)
b7dbaa
+{
b7dbaa
+   s390_insn_assert("vfae", m4 <= 2);
b7dbaa
+   s390_irgen_VStrX(v1, v2, v3, 255, m4, m5, s390_VStrX_VFAE);
b7dbaa
+   return "vfae";
b7dbaa
+}
b7dbaa
+
b7dbaa
+static const HChar *
b7dbaa
+s390_irgen_VFEE(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5)
b7dbaa
+{
b7dbaa
+   s390_insn_assert("vfee", m4 < 3 && m5 == (m5 & 3));
b7dbaa
+   s390_irgen_VStrX(v1, v2, v3, 255, m4, m5, s390_VStrX_VFEE);
b7dbaa
    return "vfee";
b7dbaa
 }
b7dbaa
 
b7dbaa
@@ -17406,47 +17521,8 @@ s390_irgen_VISTR(UChar v1, UChar v2, UChar m3, UChar m5)
b7dbaa
 static const HChar *
b7dbaa
 s390_irgen_VSTRC(UChar v1, UChar v2, UChar v3, UChar v4, UChar m5, UChar m6)
b7dbaa
 {
b7dbaa
-   IRDirty* d;
b7dbaa
-   IRTemp cc = newTemp(Ity_I64);
b7dbaa
-
b7dbaa
-   /* Check for specification exception */
b7dbaa
-   vassert(m5 < 3);
b7dbaa
-
b7dbaa
-   s390x_vec_op_details_t details = { .serialized = 0ULL };
b7dbaa
-   details.op = S390_VEC_OP_VSTRC;
b7dbaa
-   details.v1 = v1;
b7dbaa
-   details.v2 = v2;
b7dbaa
-   details.v3 = v3;
b7dbaa
-   details.v4 = v4;
b7dbaa
-   details.m4 = m5;
b7dbaa
-   details.m5 = m6;
b7dbaa
-
b7dbaa
-   d = unsafeIRDirty_1_N(cc, 0, "s390x_dirtyhelper_vec_op",
b7dbaa
-                         &s390x_dirtyhelper_vec_op,
b7dbaa
-                         mkIRExprVec_2(IRExpr_GSPTR(),
b7dbaa
-                                       mkU64(details.serialized)));
b7dbaa
-
b7dbaa
-   d->nFxState = 4;
b7dbaa
-   vex_bzero(&d->fxState, sizeof(d->fxState));
b7dbaa
-   d->fxState[0].fx     = Ifx_Read;
b7dbaa
-   d->fxState[0].offset = S390X_GUEST_OFFSET(guest_v0) + v2 * sizeof(V128);
b7dbaa
-   d->fxState[0].size   = sizeof(V128);
b7dbaa
-   d->fxState[1].fx     = Ifx_Read;
b7dbaa
-   d->fxState[1].offset = S390X_GUEST_OFFSET(guest_v0) + v3 * sizeof(V128);
b7dbaa
-   d->fxState[1].size   = sizeof(V128);
b7dbaa
-   d->fxState[2].fx     = Ifx_Read;
b7dbaa
-   d->fxState[2].offset = S390X_GUEST_OFFSET(guest_v0) + v4 * sizeof(V128);
b7dbaa
-   d->fxState[2].size   = sizeof(V128);
b7dbaa
-   d->fxState[3].fx     = Ifx_Write;
b7dbaa
-   d->fxState[3].offset = S390X_GUEST_OFFSET(guest_v0) + v1 * sizeof(V128);
b7dbaa
-   d->fxState[3].size   = sizeof(V128);
b7dbaa
-
b7dbaa
-   stmt(IRStmt_Dirty(d));
b7dbaa
-
b7dbaa
-   if (s390_vr_is_cs_set(m6)) {
b7dbaa
-      s390_cc_set(cc);
b7dbaa
-   }
b7dbaa
-
b7dbaa
+   s390_insn_assert("vstrc", m5 <= 2);
b7dbaa
+   s390_irgen_VStrX(v1, v2, v3, v4, m5, m6, s390_VStrX_VSTRC);
b7dbaa
    return "vstrc";
b7dbaa
 }
b7dbaa
 
b7dbaa
b7dbaa
commit 4f17a067c4f8245c05611d6e8aa36e8841bab376
b7dbaa
Author: Andreas Arnez <arnez@linux.ibm.com>
b7dbaa
Date:   Tue Mar 2 14:12:29 2021 +0100
b7dbaa
b7dbaa
    Bug 434296 - s390x: Rework IR conversion of VFENE
b7dbaa
    
b7dbaa
    So far the z/Architecture instruction "vector find element not
b7dbaa
    equal" (VFENE) is transformed to a loop.  This can cause spurious
b7dbaa
    "conditional jump or move depends on uninitialised value(s)" messages by
b7dbaa
    memcheck.  Re-implement without a loop.
b7dbaa
b7dbaa
diff --git a/VEX/priv/guest_s390_defs.h b/VEX/priv/guest_s390_defs.h
b7dbaa
index 49b6cd5dd..caec3108e 100644
b7dbaa
--- a/VEX/priv/guest_s390_defs.h
b7dbaa
+++ b/VEX/priv/guest_s390_defs.h
b7dbaa
@@ -265,7 +265,6 @@ typedef enum {
b7dbaa
    S390_VEC_OP_INVALID = 0,
b7dbaa
    S390_VEC_OP_VPKS,
b7dbaa
    S390_VEC_OP_VPKLS,
b7dbaa
-   S390_VEC_OP_VFENE,
b7dbaa
    S390_VEC_OP_VISTR,
b7dbaa
    S390_VEC_OP_VCEQ,
b7dbaa
    S390_VEC_OP_VTM,
b7dbaa
diff --git a/VEX/priv/guest_s390_helpers.c b/VEX/priv/guest_s390_helpers.c
b7dbaa
index 63d2e8ce5..2188ce5c1 100644
b7dbaa
--- a/VEX/priv/guest_s390_helpers.c
b7dbaa
+++ b/VEX/priv/guest_s390_helpers.c
b7dbaa
@@ -2538,7 +2538,6 @@ s390x_dirtyhelper_vec_op(VexGuestS390XState *guest_state,
b7dbaa
       {0x00, 0x00}, /* invalid */
b7dbaa
       [S390_VEC_OP_VPKS]  = {0xe7, 0x97},
b7dbaa
       [S390_VEC_OP_VPKLS] = {0xe7, 0x95},
b7dbaa
-      [S390_VEC_OP_VFENE] = {0xe7, 0x81},
b7dbaa
       [S390_VEC_OP_VISTR] = {0xe7, 0x5c},
b7dbaa
       [S390_VEC_OP_VCEQ]  = {0xe7, 0xf8},
b7dbaa
       [S390_VEC_OP_VTM]   = {0xe7, 0xd8},
b7dbaa
@@ -2627,7 +2626,6 @@ s390x_dirtyhelper_vec_op(VexGuestS390XState *guest_state,
b7dbaa
 
b7dbaa
    case S390_VEC_OP_VPKS:
b7dbaa
    case S390_VEC_OP_VPKLS:
b7dbaa
-   case S390_VEC_OP_VFENE:
b7dbaa
    case S390_VEC_OP_VCEQ:
b7dbaa
    case S390_VEC_OP_VGFM:
b7dbaa
    case S390_VEC_OP_VCH:
b7dbaa
diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c
b7dbaa
index 26a947813..c8dc3ec18 100644
b7dbaa
--- a/VEX/priv/guest_s390_toIR.c
b7dbaa
+++ b/VEX/priv/guest_s390_toIR.c
b7dbaa
@@ -17361,120 +17361,86 @@ s390_irgen_VFEE(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5)
b7dbaa
 static const HChar *
b7dbaa
 s390_irgen_VFENE(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5)
b7dbaa
 {
b7dbaa
-   const Bool negateComparison = True;
b7dbaa
-   const IRType type = s390_vr_get_type(m4);
b7dbaa
+   s390_insn_assert("vfene", m4 < 3 && m5 == (m5 & 3));
b7dbaa
 
b7dbaa
-   /* Check for specification exception */
b7dbaa
-   vassert(m4 < 3);
b7dbaa
-   vassert((m5 & 0b1100) == 0);
b7dbaa
-
b7dbaa
-   static const IROp elementGetters[] = {
b7dbaa
-      Iop_GetElem8x16, Iop_GetElem16x8, Iop_GetElem32x4
b7dbaa
+   static const IROp compare_op[3] = {
b7dbaa
+      Iop_CmpEQ8x16, Iop_CmpEQ16x8, Iop_CmpEQ32x4
b7dbaa
    };
b7dbaa
-   IROp getter = elementGetters[m4];
b7dbaa
-
b7dbaa
-   static const IROp elementComparators[] = {
b7dbaa
-      Iop_CmpEQ8, Iop_CmpEQ16, Iop_CmpEQ32
b7dbaa
+   static const IROp abs_op[3] = {
b7dbaa
+      Iop_Abs8x16, Iop_Abs16x8, Iop_Abs32x4
b7dbaa
    };
b7dbaa
-   IROp comparator = elementComparators[m4];
b7dbaa
-
b7dbaa
-   static const IROp resultConverter[] = {Iop_64to8, Iop_64to16, Iop_64to32};
b7dbaa
-   IROp converter = resultConverter[m4];
b7dbaa
-
b7dbaa
-   IRTemp isZeroElem;
b7dbaa
-
b7dbaa
-   IRTemp counter = newTemp(Ity_I64);
b7dbaa
-   assign(counter, get_counter_dw0());
b7dbaa
-
b7dbaa
-   IRTemp arg1 = newTemp(type);
b7dbaa
-   assign(arg1, binop(getter, get_vr_qw(v2), unop(Iop_64to8, mkexpr(counter))));
b7dbaa
-   IRTemp arg2 = newTemp(type);
b7dbaa
-   assign(arg2, binop(getter, get_vr_qw(v3), unop(Iop_64to8, mkexpr(counter))));
b7dbaa
+   IRTemp op2 = newTemp(Ity_V128);
b7dbaa
+   IRTemp op3 = newTemp(Ity_V128);
b7dbaa
+   IRTemp op2zero = newTemp(Ity_V128);
b7dbaa
+   IRTemp diff = newTemp(Ity_V128);
b7dbaa
+   IRTemp diff0 = newTemp(Ity_I64);
b7dbaa
+   IRTemp neq0 = newTemp(Ity_I1);
b7dbaa
+   IRTemp samebits = newTemp(Ity_I64);
b7dbaa
+   IRExpr* tmp;
b7dbaa
 
b7dbaa
-   IRTemp isGoodPair = newTemp(Ity_I1);
b7dbaa
-   if(negateComparison) {
b7dbaa
-      assign(isGoodPair, unop(Iop_Not1, binop(comparator, mkexpr(arg1),
b7dbaa
-                                              mkexpr(arg2))));
b7dbaa
-   } else {
b7dbaa
-      assign(isGoodPair, binop(comparator, mkexpr(arg1), mkexpr(arg2)));
b7dbaa
-   }
b7dbaa
+   assign(op2, get_vr_qw(v2));
b7dbaa
+   assign(op3, get_vr_qw(v3));
b7dbaa
 
b7dbaa
-   if(s390_vr_is_zs_set(m5)) {
b7dbaa
-      isZeroElem = newTemp(Ity_I1);
b7dbaa
-      assign(isZeroElem, binop(comparator, mkexpr(arg1),
b7dbaa
-                               unop(converter, mkU64(0))));
b7dbaa
+   tmp = mkV128(0);
b7dbaa
+   if (s390_vr_is_zs_set(m5)) {
b7dbaa
+      tmp = binop(compare_op[m4], mkexpr(op2), tmp);
b7dbaa
+      if (s390_vr_is_cs_set(m5) && v3 != v2) {
b7dbaa
+         /* Count leading equal bits in the terminating element too */
b7dbaa
+         tmp = unop(abs_op[m4], tmp);
b7dbaa
+      }
b7dbaa
+      assign(op2zero, tmp);
b7dbaa
+      tmp = mkexpr(op2zero);
b7dbaa
    }
b7dbaa
-
b7dbaa
-   static const UChar invalidIndices[] = {16, 8, 4};
b7dbaa
-   const UChar invalidIndex = invalidIndices[m4];
b7dbaa
-   IRTemp endOfVectorIsReached = newTemp(Ity_I1);
b7dbaa
-   assign(endOfVectorIsReached, binop(Iop_CmpEQ64, mkexpr(counter),
b7dbaa
-                                      mkU64(invalidIndex)));
b7dbaa
-
b7dbaa
-   put_counter_dw0(binop(Iop_Add64, mkexpr(counter), mkU64(1)));
b7dbaa
-   IRExpr* shouldBreak = binop(Iop_Or32,
b7dbaa
-                               unop(Iop_1Uto32, mkexpr(isGoodPair)),
b7dbaa
-                               unop(Iop_1Uto32, mkexpr(endOfVectorIsReached))
b7dbaa
-                              );
b7dbaa
-   if(s390_vr_is_zs_set(m5)) {
b7dbaa
-      shouldBreak = binop(Iop_Or32,
b7dbaa
-                          shouldBreak,
b7dbaa
-                          unop(Iop_1Uto32, mkexpr(isZeroElem)));
b7dbaa
-   }
b7dbaa
-   iterate_if(binop(Iop_CmpEQ32, shouldBreak, mkU32(0)));
b7dbaa
-
b7dbaa
-   IRExpr* foundIndex = binop(Iop_Sub64, get_counter_dw0(), mkU64(1));
b7dbaa
-   if(m4 > 0) {
b7dbaa
-      /* We should return index of byte but we found index of element in
b7dbaa
-         general case.
b7dbaa
-            if byte elem (m4 == 0) then indexOfByte = indexOfElement
b7dbaa
-            if halfword elem (m4 == 1) then indexOfByte = 2 * indexOfElement
b7dbaa
-                                                        = indexOfElement << 1
b7dbaa
-            if word elem (m4 == 2) then indexOfByte = 4 * indexOfElement
b7dbaa
-                                                    = indexOfElement << 2
b7dbaa
-      */
b7dbaa
-      foundIndex = binop(Iop_Shl64, foundIndex, mkU8(m4));
b7dbaa
+   if (v3 != v2) {
b7dbaa
+      tmp = binop(Iop_XorV128, mkexpr(op2), mkexpr(op3));
b7dbaa
+      if (s390_vr_is_zs_set(m5))
b7dbaa
+         tmp = binop(Iop_OrV128, tmp, mkexpr(op2zero));
b7dbaa
    }
b7dbaa
 
b7dbaa
-   IRTemp result = newTemp(Ity_I64);
b7dbaa
-   assign(result, mkite(mkexpr(endOfVectorIsReached),
b7dbaa
-                        mkU64(16),
b7dbaa
-                        foundIndex));
b7dbaa
-   put_vr_qw(v1, binop(Iop_64HLtoV128, mkexpr(result), mkU64(0)));
b7dbaa
+   assign(diff, tmp);
b7dbaa
+   assign(diff0, unop(Iop_V128HIto64, mkexpr(diff)));
b7dbaa
+   assign(neq0, binop(Iop_ExpCmpNE64, mkexpr(diff0), mkU64(0)));
b7dbaa
+   assign(samebits, unop(Iop_ClzNat64,
b7dbaa
+                         mkite(mkexpr(neq0), mkexpr(diff0),
b7dbaa
+                               unop(Iop_V128to64, mkexpr(diff)))));
b7dbaa
 
b7dbaa
+   /* Determine the byte size of the initial equal-elements sequence */
b7dbaa
+   tmp = binop(Iop_Shr64, mkexpr(samebits), mkU8(m4 + 3));
b7dbaa
+   if (m4 != 0)
b7dbaa
+      tmp = binop(Iop_Shl64, tmp, mkU8(m4));
b7dbaa
+   tmp = binop(Iop_Add64, tmp, mkite(mkexpr(neq0), mkU64(0), mkU64(8)));
b7dbaa
+   put_vr_qw(v1, binop(Iop_64HLtoV128, tmp, mkU64(0)));
b7dbaa
 
b7dbaa
    if (s390_vr_is_cs_set(m5)) {
b7dbaa
-      static const IROp to64Converters[] = {Iop_8Uto64, Iop_16Uto64, Iop_32Uto64};
b7dbaa
-      IROp to64Converter = to64Converters[m4];
b7dbaa
-
b7dbaa
-      IRExpr* arg1IsLessThanArg2 = binop(Iop_CmpLT64U,
b7dbaa
-                                         unop(to64Converter, mkexpr(arg1)),
b7dbaa
-                                         unop(to64Converter, mkexpr(arg2)));
b7dbaa
-
b7dbaa
-      IRExpr* ccexp = mkite(binop(Iop_CmpEQ32,
b7dbaa
-                                  unop(Iop_1Uto32, mkexpr(isGoodPair)),
b7dbaa
-                                  mkU32(1)),
b7dbaa
-                            mkite(arg1IsLessThanArg2, mkU64(1), mkU64(2)),
b7dbaa
-                            mkU64(3));
b7dbaa
-
b7dbaa
-      if(s390_vr_is_zs_set(m5)) {
b7dbaa
-         IRExpr* arg2IsZero = binop(comparator, mkexpr(arg2),
b7dbaa
-                                    unop(converter, mkU64(0)));
b7dbaa
-         IRExpr* bothArgsAreZero = binop(Iop_And32,
b7dbaa
-                                         unop(Iop_1Uto32, mkexpr(isZeroElem)),
b7dbaa
-                                         unop(Iop_1Uto32, arg2IsZero));
b7dbaa
-         ccexp = mkite(binop(Iop_CmpEQ32, bothArgsAreZero, mkU32(1)),
b7dbaa
-                       mkU64(0),
b7dbaa
-                       ccexp);
b7dbaa
-      }
b7dbaa
+      /* Set condition code like follows --
b7dbaa
+         0: operands equal up to and including zero element
b7dbaa
+         1: op2 < op3    2: op2 > op3    3: op2 = op3 */
b7dbaa
       IRTemp cc = newTemp(Ity_I64);
b7dbaa
-      assign(cc, ccexp);
b7dbaa
-
b7dbaa
+      if (v3 == v2) {
b7dbaa
+         tmp = mkU64(0);
b7dbaa
+      } else {
b7dbaa
+         IRTemp shift = newTemp(Ity_I8);
b7dbaa
+         IRExpr* op2half = mkite(mkexpr(neq0),
b7dbaa
+                                 unop(Iop_V128HIto64, mkexpr(op2)),
b7dbaa
+                                 unop(Iop_V128to64, mkexpr(op2)));
b7dbaa
+         IRExpr* op3half = mkite(mkexpr(neq0),
b7dbaa
+                                 unop(Iop_V128HIto64, mkexpr(op3)),
b7dbaa
+                                 unop(Iop_V128to64, mkexpr(op3)));
b7dbaa
+         assign(shift, unop(Iop_64to8,
b7dbaa
+                            binop(Iop_Sub64, mkU64(63), mkexpr(samebits))));
b7dbaa
+         tmp = binop(Iop_Or64,
b7dbaa
+                     binop(Iop_Shl64,
b7dbaa
+                           binop(Iop_And64, mkU64(1),
b7dbaa
+                                 binop(Iop_Shr64, op2half, mkexpr(shift))),
b7dbaa
+                           mkU8(1)),
b7dbaa
+                     binop(Iop_And64, mkU64(1),
b7dbaa
+                           binop(Iop_Shr64, op3half, mkexpr(shift))));
b7dbaa
+      }
b7dbaa
+      assign(cc, mkite(binop(Iop_CmpEQ64, mkexpr(samebits), mkU64(64)),
b7dbaa
+                       mkU64(3), tmp));
b7dbaa
       s390_cc_set(cc);
b7dbaa
    }
b7dbaa
-
b7dbaa
-
b7dbaa
-   put_counter_dw0(mkU64(0));
b7dbaa
+   dis_res->hint = Dis_HintVerbose;
b7dbaa
    return "vfene";
b7dbaa
 }
b7dbaa
 
b7dbaa
b7dbaa
commit 9bd78ebd8bb5cd4ebb3f081ceba46836cc485551
b7dbaa
Author: Andreas Arnez <arnez@linux.ibm.com>
b7dbaa
Date:   Tue Apr 27 20:13:26 2021 +0200
b7dbaa
b7dbaa
    Bug 434296 - s390x: Rework IR conversion of VISTR
b7dbaa
    
b7dbaa
    The z/Architecture instruction VISTR is currently transformed to a dirty
b7dbaa
    helper that executes the instruction.  This can cause false positives with
b7dbaa
    memcheck if the input string contains undefined characters after the
b7dbaa
    string terminator.  Implement without a dirty helper and emulate the
b7dbaa
    instruction instead.
b7dbaa
b7dbaa
diff --git a/VEX/priv/guest_s390_defs.h b/VEX/priv/guest_s390_defs.h
b7dbaa
index caec3108e..24f3798c1 100644
b7dbaa
--- a/VEX/priv/guest_s390_defs.h
b7dbaa
+++ b/VEX/priv/guest_s390_defs.h
b7dbaa
@@ -265,7 +265,6 @@ typedef enum {
b7dbaa
    S390_VEC_OP_INVALID = 0,
b7dbaa
    S390_VEC_OP_VPKS,
b7dbaa
    S390_VEC_OP_VPKLS,
b7dbaa
-   S390_VEC_OP_VISTR,
b7dbaa
    S390_VEC_OP_VCEQ,
b7dbaa
    S390_VEC_OP_VTM,
b7dbaa
    S390_VEC_OP_VGFM,
b7dbaa
diff --git a/VEX/priv/guest_s390_helpers.c b/VEX/priv/guest_s390_helpers.c
b7dbaa
index 2188ce5c1..1e04f601a 100644
b7dbaa
--- a/VEX/priv/guest_s390_helpers.c
b7dbaa
+++ b/VEX/priv/guest_s390_helpers.c
b7dbaa
@@ -2538,7 +2538,6 @@ s390x_dirtyhelper_vec_op(VexGuestS390XState *guest_state,
b7dbaa
       {0x00, 0x00}, /* invalid */
b7dbaa
       [S390_VEC_OP_VPKS]  = {0xe7, 0x97},
b7dbaa
       [S390_VEC_OP_VPKLS] = {0xe7, 0x95},
b7dbaa
-      [S390_VEC_OP_VISTR] = {0xe7, 0x5c},
b7dbaa
       [S390_VEC_OP_VCEQ]  = {0xe7, 0xf8},
b7dbaa
       [S390_VEC_OP_VTM]   = {0xe7, 0xd8},
b7dbaa
       [S390_VEC_OP_VGFM]  = {0xe7, 0xb4},
b7dbaa
@@ -2610,14 +2609,6 @@ s390x_dirtyhelper_vec_op(VexGuestS390XState *guest_state,
b7dbaa
    the_insn.VRR.op2 = opcodes[d->op][1];
b7dbaa
 
b7dbaa
    switch(d->op) {
b7dbaa
-   case S390_VEC_OP_VISTR:
b7dbaa
-      the_insn.VRR.v1 = 1;
b7dbaa
-      the_insn.VRR.v2 = 2;
b7dbaa
-      the_insn.VRR.rxb = 0b1100;
b7dbaa
-      the_insn.VRR.m4 = d->m4;
b7dbaa
-      the_insn.VRR.m5 = d->m5;
b7dbaa
-      break;
b7dbaa
-
b7dbaa
    case S390_VEC_OP_VTM:
b7dbaa
       the_insn.VRR.v1 = 2;
b7dbaa
       the_insn.VRR.v2 = 3;
b7dbaa
diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c
b7dbaa
index c8dc3ec18..dfea54259 100644
b7dbaa
--- a/VEX/priv/guest_s390_toIR.c
b7dbaa
+++ b/VEX/priv/guest_s390_toIR.c
b7dbaa
@@ -17447,40 +17447,34 @@ s390_irgen_VFENE(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5)
b7dbaa
 static const HChar *
b7dbaa
 s390_irgen_VISTR(UChar v1, UChar v2, UChar m3, UChar m5)
b7dbaa
 {
b7dbaa
-   IRDirty* d;
b7dbaa
-   IRTemp cc = newTemp(Ity_I64);
b7dbaa
-
b7dbaa
-   /* Check for specification exception */
b7dbaa
-   vassert(m3 < 3);
b7dbaa
-   vassert((m5 & 0b1110) == 0);
b7dbaa
+   s390_insn_assert("vistr", m3 < 3 && m5 == (m5 & 1));
b7dbaa
 
b7dbaa
-   s390x_vec_op_details_t details = { .serialized = 0ULL };
b7dbaa
-   details.op = S390_VEC_OP_VISTR;
b7dbaa
-   details.v1 = v1;
b7dbaa
-   details.v2 = v2;
b7dbaa
-   details.m4 = m3;
b7dbaa
-   details.m5 = m5;
b7dbaa
-
b7dbaa
-   d = unsafeIRDirty_1_N(cc, 0, "s390x_dirtyhelper_vec_op",
b7dbaa
-                         &s390x_dirtyhelper_vec_op,
b7dbaa
-                         mkIRExprVec_2(IRExpr_GSPTR(),
b7dbaa
-                                       mkU64(details.serialized)));
b7dbaa
+   static const IROp compare_op[3] = {
b7dbaa
+      Iop_CmpEQ8x16, Iop_CmpEQ16x8, Iop_CmpEQ32x4
b7dbaa
+   };
b7dbaa
+   IRExpr* t;
b7dbaa
+   IRTemp op2 = newTemp(Ity_V128);
b7dbaa
+   IRTemp op2term = newTemp(Ity_V128);
b7dbaa
+   IRTemp mask = newTemp(Ity_V128);
b7dbaa
 
b7dbaa
-   d->nFxState = 2;
b7dbaa
-   vex_bzero(&d->fxState, sizeof(d->fxState));
b7dbaa
-   d->fxState[0].fx     = Ifx_Read;
b7dbaa
-   d->fxState[0].offset = S390X_GUEST_OFFSET(guest_v0) + v2 * sizeof(V128);
b7dbaa
-   d->fxState[0].size   = sizeof(V128);
b7dbaa
-   d->fxState[1].fx     = Ifx_Write;
b7dbaa
-   d->fxState[1].offset = S390X_GUEST_OFFSET(guest_v0) + v1 * sizeof(V128);
b7dbaa
-   d->fxState[1].size   = sizeof(V128);
b7dbaa
+   assign(op2, get_vr_qw(v2));
b7dbaa
+   assign(op2term, binop(compare_op[m3], mkexpr(op2), mkV128(0)));
b7dbaa
+   t = mkexpr(op2term);
b7dbaa
 
b7dbaa
-   stmt(IRStmt_Dirty(d));
b7dbaa
+   for (UChar i = m3; i < 4; i++) {
b7dbaa
+      IRTemp s = newTemp(Ity_V128);
b7dbaa
+      assign(s, binop(Iop_OrV128, t, binop(Iop_ShrV128, t, mkU8(8 << i))));
b7dbaa
+      t = mkexpr(s);
b7dbaa
+   }
b7dbaa
+   assign(mask, unop(Iop_NotV128, t));
b7dbaa
+   put_vr_qw(v1, binop(Iop_AndV128, mkexpr(op2), mkexpr(mask)));
b7dbaa
 
b7dbaa
    if (s390_vr_is_cs_set(m5)) {
b7dbaa
+      IRTemp cc = newTemp(Ity_I64);
b7dbaa
+      assign(cc, binop(Iop_And64, mkU64(3), unop(Iop_V128to64, mkexpr(mask))));
b7dbaa
       s390_cc_set(cc);
b7dbaa
    }
b7dbaa
-
b7dbaa
+   dis_res->hint = Dis_HintVerbose;
b7dbaa
    return "vistr";
b7dbaa
 }
b7dbaa
 
b7dbaa
b7dbaa
commit 32312d588b77c5b5b5a0145bb0cc6f795b447790
b7dbaa
Author: Andreas Arnez <arnez@linux.ibm.com>
b7dbaa
Date:   Fri Apr 16 12:44:44 2021 +0200
b7dbaa
b7dbaa
    Bug 434296 - s390x: Add memcheck test cases for vector string insns
b7dbaa
    
b7dbaa
    Bug 434296 addresses memcheck false positives with the vector string
b7dbaa
    instructions VISTR, VSTRC, VFAE, VFEE, and VFENE.  Add test cases that
b7dbaa
    verify the fix for that bug.  Without the fix, memcheck yields many
b7dbaa
    complains with these tests, most of which are false positives.
b7dbaa
b7dbaa
diff --git a/memcheck/tests/s390x/Makefile.am b/memcheck/tests/s390x/Makefile.am
b7dbaa
index e4e69eb38..d183841ef 100644
b7dbaa
--- a/memcheck/tests/s390x/Makefile.am
b7dbaa
+++ b/memcheck/tests/s390x/Makefile.am
b7dbaa
@@ -2,7 +2,7 @@ include $(top_srcdir)/Makefile.tool-tests.am
b7dbaa
 
b7dbaa
 dist_noinst_SCRIPTS = filter_stderr
b7dbaa
 
b7dbaa
-INSN_TESTS = cdsg cu21 cu42 ltgjhe
b7dbaa
+INSN_TESTS = cdsg cu21 cu42 ltgjhe vstrc vfae vistr
b7dbaa
 
b7dbaa
 check_PROGRAMS = $(INSN_TESTS) 
b7dbaa
 
b7dbaa
@@ -14,3 +14,7 @@ EXTRA_DIST = \
b7dbaa
 AM_CFLAGS    += @FLAG_M64@
b7dbaa
 AM_CXXFLAGS  += @FLAG_M64@
b7dbaa
 AM_CCASFLAGS += @FLAG_M64@
b7dbaa
+
b7dbaa
+vstrc_CFLAGS  = $(AM_CFLAGS) -march=z13
b7dbaa
+vfae_CFLAGS   = $(AM_CFLAGS) -march=z13
b7dbaa
+vistr_CFLAGS  = $(AM_CFLAGS) -march=z13
b7dbaa
diff --git a/memcheck/tests/s390x/vfae.c b/memcheck/tests/s390x/vfae.c
b7dbaa
new file mode 100644
b7dbaa
index 000000000..68781e7fb
b7dbaa
--- /dev/null
b7dbaa
+++ b/memcheck/tests/s390x/vfae.c
b7dbaa
@@ -0,0 +1,72 @@
b7dbaa
+#include <stdio.h>
b7dbaa
+#include <string.h>
b7dbaa
+
b7dbaa
+#define VECTOR __attribute__ ((vector_size (16)))
b7dbaa
+
b7dbaa
+typedef char VECTOR char_v;
b7dbaa
+
b7dbaa
+volatile char tmp;
b7dbaa
+static const char *hex_digit = "0123456789abcdefGHIJKLMNOPQRSTUV";
b7dbaa
+
b7dbaa
+static char_v to_char_vec(const char *str)
b7dbaa
+{
b7dbaa
+   char_v v;
b7dbaa
+   char buf[17];
b7dbaa
+   int len = strlen(str);
b7dbaa
+
b7dbaa
+   memcpy(buf, str, (len && str[len - 1] == '~') ? len - 1 : len + 1);
b7dbaa
+   v = *(char_v *) buf;
b7dbaa
+   return v;
b7dbaa
+}
b7dbaa
+
b7dbaa
+#define GENERATE_TEST(mnem)                                          \
b7dbaa
+static void test_ ## mnem ## _char(const char *str, const char *match, \
b7dbaa
+                                   int expect_res, int expect_cc)    \
b7dbaa
+{                                                                    \
b7dbaa
+   int cc;                                                           \
b7dbaa
+   char_v v1;                                                        \
b7dbaa
+   char_v v2 = to_char_vec(str);                                     \
b7dbaa
+   char_v v3 = to_char_vec(match);                                   \
b7dbaa
+                                                                     \
b7dbaa
+   __asm__(                                                          \
b7dbaa
+      "cr    0,0\n\t"           /* Clear CC */                       \
b7dbaa
+      #mnem "  %[v1],%[v2],%[v3],0,3\n\t"                            \
b7dbaa
+      "ipm   %[cc]\n\t"                                              \
b7dbaa
+      "srl   %[cc],28"                                               \
b7dbaa
+      : [v1] "=v" (v1),                                              \
b7dbaa
+        [cc] "=d" (cc)                                               \
b7dbaa
+      : [v2] "v" (v2),                                               \
b7dbaa
+        [v3] "v" (v3)                                                \
b7dbaa
+      : "cc");                                                       \
b7dbaa
+                                                                     \
b7dbaa
+   tmp = hex_digit[v1[7] & 0x1f];                                    \
b7dbaa
+   if (expect_res >= 0  && v1[7] != expect_res)                      \
b7dbaa
+      printf("result %u != %d\n", v1[7], expect_res);                \
b7dbaa
+                                                                     \
b7dbaa
+   tmp = hex_digit[cc & 0xf];                                        \
b7dbaa
+   if (expect_cc >= 0 && cc != expect_cc)                            \
b7dbaa
+      printf("CC %d != %d\n", cc, expect_cc);                        \
b7dbaa
+}
b7dbaa
+
b7dbaa
+GENERATE_TEST(vfae)
b7dbaa
+
b7dbaa
+GENERATE_TEST(vfee)
b7dbaa
+
b7dbaa
+GENERATE_TEST(vfene)
b7dbaa
+
b7dbaa
+int main()
b7dbaa
+{
b7dbaa
+   test_vfae_char("not found", "................", 9, 0);
b7dbaa
+   test_vfae_char("xy", "zzzzzzzzyyyyyyyy", 1, 2);
b7dbaa
+   test_vfae_char("incomplete~", "xxxxxxxxxxxxxxxx", -1, -1);
b7dbaa
+
b7dbaa
+   test_vfee_char("same char here", "..........here", 10, 2);
b7dbaa
+   test_vfee_char("and here too ...", "_________t~", 9, 1);
b7dbaa
+   test_vfee_char("equality!~", "========!!~", 8, -1);
b7dbaa
+
b7dbaa
+   test_vfene_char("strings equal", "strings equal", 13, 0);
b7dbaa
+   test_vfene_char(hex_digit, hex_digit, 16, 3);
b7dbaa
+   test_vfene_char("undef~", "undefined", -1, -1);
b7dbaa
+   test_vfene_char("active~", "actually ok", 3, 1);
b7dbaa
+   return 0;
b7dbaa
+}
b7dbaa
diff --git a/memcheck/tests/s390x/vfae.stderr.exp b/memcheck/tests/s390x/vfae.stderr.exp
b7dbaa
new file mode 100644
b7dbaa
index 000000000..8aad3c87f
b7dbaa
--- /dev/null
b7dbaa
+++ b/memcheck/tests/s390x/vfae.stderr.exp
b7dbaa
@@ -0,0 +1,20 @@
b7dbaa
+Use of uninitialised value of size 8
b7dbaa
+   at 0x........: test_vfae_char (vfae.c:51)
b7dbaa
+   by 0x........: main (vfae.c:61)
b7dbaa
+
b7dbaa
+Use of uninitialised value of size 8
b7dbaa
+   at 0x........: test_vfae_char (vfae.c:51)
b7dbaa
+   by 0x........: main (vfae.c:61)
b7dbaa
+
b7dbaa
+Use of uninitialised value of size 8
b7dbaa
+   at 0x........: test_vfee_char (vfae.c:53)
b7dbaa
+   by 0x........: main (vfae.c:65)
b7dbaa
+
b7dbaa
+Use of uninitialised value of size 8
b7dbaa
+   at 0x........: test_vfene_char (vfae.c:55)
b7dbaa
+   by 0x........: main (vfae.c:69)
b7dbaa
+
b7dbaa
+Use of uninitialised value of size 8
b7dbaa
+   at 0x........: test_vfene_char (vfae.c:55)
b7dbaa
+   by 0x........: main (vfae.c:69)
b7dbaa
+
b7dbaa
diff --git a/memcheck/tests/s390x/vfae.stdout.exp b/memcheck/tests/s390x/vfae.stdout.exp
b7dbaa
new file mode 100644
b7dbaa
index 000000000..e69de29bb
b7dbaa
diff --git a/memcheck/tests/s390x/vfae.vgtest b/memcheck/tests/s390x/vfae.vgtest
b7dbaa
new file mode 100644
b7dbaa
index 000000000..ae36c22fe
b7dbaa
--- /dev/null
b7dbaa
+++ b/memcheck/tests/s390x/vfae.vgtest
b7dbaa
@@ -0,0 +1,2 @@
b7dbaa
+prog: vfae
b7dbaa
+vgopts: -q
b7dbaa
diff --git a/memcheck/tests/s390x/vistr.c b/memcheck/tests/s390x/vistr.c
b7dbaa
new file mode 100644
b7dbaa
index 000000000..7ed59b94b
b7dbaa
--- /dev/null
b7dbaa
+++ b/memcheck/tests/s390x/vistr.c
b7dbaa
@@ -0,0 +1,76 @@
b7dbaa
+#include <stdio.h>
b7dbaa
+#include <string.h>
b7dbaa
+
b7dbaa
+#define VECTOR __attribute__ ((vector_size (16)))
b7dbaa
+
b7dbaa
+typedef char VECTOR char_v;
b7dbaa
+
b7dbaa
+volatile char tmp;
b7dbaa
+static const char *hex_digit = "0123456789abcdef";
b7dbaa
+
b7dbaa
+static char_v to_char_vec(const char *str, char_v *maskp)
b7dbaa
+{
b7dbaa
+   char buf[17];
b7dbaa
+   char_v v;
b7dbaa
+   char_v mask = {0};
b7dbaa
+
b7dbaa
+   for (int i = 0; i < sizeof(buf); i++) {
b7dbaa
+      char ch = str[i];
b7dbaa
+      if (ch == '\0')
b7dbaa
+         break;
b7dbaa
+      else if (ch == '$') {
b7dbaa
+         buf[i] = '\0';
b7dbaa
+         mask[i] = -1;
b7dbaa
+      } else if (ch != '~') {
b7dbaa
+         buf[i] = ch;
b7dbaa
+         mask[i] = -1;
b7dbaa
+      }
b7dbaa
+   }
b7dbaa
+   v = *(char_v *) buf;
b7dbaa
+   *maskp = mask;
b7dbaa
+   return v;
b7dbaa
+}
b7dbaa
+
b7dbaa
+static void test_vistr_char(const char *str, const char *expect_res,
b7dbaa
+                            int expect_cc)
b7dbaa
+{
b7dbaa
+   int cc, count;
b7dbaa
+   char_v v1, mask;
b7dbaa
+   char_v v2 = to_char_vec(str, &mask);
b7dbaa
+   char_v exp_v1 = to_char_vec(expect_res, &mask);
b7dbaa
+   char equal[16];
b7dbaa
+
b7dbaa
+   __asm__(
b7dbaa
+      "cr    0,0\n\t"           /* Clear CC */
b7dbaa
+      "vistr %[v1],%[v2],0,1\n\t"
b7dbaa
+      "ipm   %[cc]\n\t"
b7dbaa
+      "srl   %[cc],28"
b7dbaa
+      : [v1] "=v" (v1),
b7dbaa
+        [cc] "=d" (cc)
b7dbaa
+      : [v2] "v" (v2)
b7dbaa
+      : "cc");
b7dbaa
+
b7dbaa
+   *(char_v *) equal = (v1 & mask) == (exp_v1 & mask);
b7dbaa
+   if (memchr(equal, 0, sizeof(equal)))
b7dbaa
+      printf("Result doesn't match `%s'\n", expect_res);
b7dbaa
+
b7dbaa
+   count = 0;
b7dbaa
+   for (int i = 0; i < 16; i++) {
b7dbaa
+      if (v1[i] == 0) count++;
b7dbaa
+   }
b7dbaa
+   tmp = hex_digit[count];
b7dbaa
+
b7dbaa
+   tmp = hex_digit[cc & 0xf];
b7dbaa
+   if (expect_cc >= 0 && cc != expect_cc)
b7dbaa
+      printf("CC %d != %d\n", cc, expect_cc);
b7dbaa
+}
b7dbaa
+
b7dbaa
+int main()
b7dbaa
+{
b7dbaa
+   test_vistr_char("terminated$====~", "terminated$$$$$$", 0);
b7dbaa
+   test_vistr_char("undef~~~~~~~~~~~", "undef", -1);
b7dbaa
+   test_vistr_char("undef, 2nd half~", "undef, 2nd half", -1);
b7dbaa
+   test_vistr_char("Not. Terminated.", "Not. Terminated.", 3);
b7dbaa
+   test_vistr_char("partiallyOK~~$~~", "partiallyOK~~$$$", 0);
b7dbaa
+   return 0;
b7dbaa
+}
b7dbaa
diff --git a/memcheck/tests/s390x/vistr.stderr.exp b/memcheck/tests/s390x/vistr.stderr.exp
b7dbaa
new file mode 100644
b7dbaa
index 000000000..e4f35fd74
b7dbaa
--- /dev/null
b7dbaa
+++ b/memcheck/tests/s390x/vistr.stderr.exp
b7dbaa
@@ -0,0 +1,20 @@
b7dbaa
+Conditional jump or move depends on uninitialised value(s)
b7dbaa
+   at 0x........: test_vistr_char (vistr.c:59)
b7dbaa
+   by 0x........: main (vistr.c:71)
b7dbaa
+
b7dbaa
+Use of uninitialised value of size 8
b7dbaa
+   at 0x........: test_vistr_char (vistr.c:63)
b7dbaa
+   by 0x........: main (vistr.c:71)
b7dbaa
+
b7dbaa
+Conditional jump or move depends on uninitialised value(s)
b7dbaa
+   at 0x........: test_vistr_char (vistr.c:59)
b7dbaa
+   by 0x........: main (vistr.c:72)
b7dbaa
+
b7dbaa
+Use of uninitialised value of size 8
b7dbaa
+   at 0x........: test_vistr_char (vistr.c:63)
b7dbaa
+   by 0x........: main (vistr.c:72)
b7dbaa
+
b7dbaa
+Conditional jump or move depends on uninitialised value(s)
b7dbaa
+   at 0x........: test_vistr_char (vistr.c:59)
b7dbaa
+   by 0x........: main (vistr.c:74)
b7dbaa
+
b7dbaa
diff --git a/memcheck/tests/s390x/vistr.vgtest b/memcheck/tests/s390x/vistr.vgtest
b7dbaa
new file mode 100644
b7dbaa
index 000000000..f99749d85
b7dbaa
--- /dev/null
b7dbaa
+++ b/memcheck/tests/s390x/vistr.vgtest
b7dbaa
@@ -0,0 +1,2 @@
b7dbaa
+prog: vistr
b7dbaa
+vgopts: -q
b7dbaa
diff --git a/memcheck/tests/s390x/vstrc.c b/memcheck/tests/s390x/vstrc.c
b7dbaa
new file mode 100644
b7dbaa
index 000000000..268e2f858
b7dbaa
--- /dev/null
b7dbaa
+++ b/memcheck/tests/s390x/vstrc.c
b7dbaa
@@ -0,0 +1,92 @@
b7dbaa
+#include <stdio.h>
b7dbaa
+#include <string.h>
b7dbaa
+
b7dbaa
+#define VECTOR __attribute__ ((vector_size (16)))
b7dbaa
+
b7dbaa
+typedef char VECTOR char_v;
b7dbaa
+
b7dbaa
+struct vstrc_char_rng {
b7dbaa
+   unsigned char range[16];
b7dbaa
+   unsigned char flags[16];
b7dbaa
+};
b7dbaa
+
b7dbaa
+#define RNG_FLAG_EQ   0x80
b7dbaa
+#define RNG_FLAG_LT   0x40
b7dbaa
+#define RNG_FLAG_GT   0x20
b7dbaa
+#define RNG_FLAG_ANY  0xe0
b7dbaa
+#define RNG_FLAG_NONE 0x00
b7dbaa
+
b7dbaa
+volatile char tmp;
b7dbaa
+static const char *hex_digit = "0123456789abcdefGHIJKLMNOPQRSTUV";
b7dbaa
+
b7dbaa
+static void test_vstrc_char(const char *str, const struct vstrc_char_rng *rng,
b7dbaa
+                            int expect_res, int expect_cc)
b7dbaa
+{
b7dbaa
+   int cc;
b7dbaa
+   char_v v1;
b7dbaa
+   char_v v2 = *(const char_v *) str;
b7dbaa
+   char_v v3 = *(const char_v *) rng->range;
b7dbaa
+   char_v v4 = *(const char_v *) rng->flags;
b7dbaa
+
b7dbaa
+   __asm__(
b7dbaa
+      "cr    0,0\n\t"           /* Clear CC */
b7dbaa
+      "vstrc %[v1],%[v2],%[v3],%[v4],0,3\n\t"
b7dbaa
+      "ipm   %[cc]\n\t"
b7dbaa
+      "srl   %[cc],28"
b7dbaa
+      : [v1] "=v" (v1),
b7dbaa
+        [cc] "=d" (cc)
b7dbaa
+      : [v2] "v" (v2),
b7dbaa
+        [v3] "v" (v3),
b7dbaa
+        [v4] "v" (v4)
b7dbaa
+      : "cc");
b7dbaa
+
b7dbaa
+   tmp = hex_digit[v1[7] & 0x1f];
b7dbaa
+   if (expect_res >= 0  && v1[7] != expect_res)
b7dbaa
+      printf("result %u != %d\n", v1[7], expect_res);
b7dbaa
+
b7dbaa
+   tmp = hex_digit[cc & 0xf];
b7dbaa
+   if (expect_cc >= 0 && cc != expect_cc)
b7dbaa
+      printf("CC %d != %d\n", cc, expect_cc);
b7dbaa
+}
b7dbaa
+
b7dbaa
+int main()
b7dbaa
+{
b7dbaa
+   struct vstrc_char_rng rng;
b7dbaa
+   char buf[16];
b7dbaa
+
b7dbaa
+   memset(rng.flags, RNG_FLAG_NONE, 16);
b7dbaa
+
b7dbaa
+   rng.range[4] = 'z';
b7dbaa
+   rng.flags[4] = RNG_FLAG_GT | RNG_FLAG_EQ;
b7dbaa
+   rng.flags[5] = RNG_FLAG_ANY;
b7dbaa
+   /* OK: match at the 'z' */
b7dbaa
+   test_vstrc_char("find the z", &rng, 9, 2);
b7dbaa
+
b7dbaa
+   rng.flags[12] = RNG_FLAG_GT | RNG_FLAG_EQ;
b7dbaa
+   rng.flags[13] = RNG_FLAG_LT | RNG_FLAG_EQ;
b7dbaa
+   /* Bad: undefined range */
b7dbaa
+   test_vstrc_char("undefined", &rng, -1, -1);
b7dbaa
+
b7dbaa
+   rng.range[12] = 'a';
b7dbaa
+   rng.range[13] = 'c';
b7dbaa
+   /* OK: match at the 'a' */
b7dbaa
+   test_vstrc_char("get the abc", &rng, 8, 2);
b7dbaa
+
b7dbaa
+   rng.flags[12] = RNG_FLAG_LT;
b7dbaa
+   rng.flags[13] = RNG_FLAG_GT;
b7dbaa
+   /* OK: no match up to null terminator */
b7dbaa
+   test_vstrc_char("no match", &rng, 8, 0);
b7dbaa
+
b7dbaa
+   /* OK: no match, no null terminator */
b7dbaa
+   test_vstrc_char("0123456789abcdef", &rng, 16, 3);
b7dbaa
+
b7dbaa
+   buf[0] = 'x';
b7dbaa
+   /* Bad: undefined string */
b7dbaa
+   test_vstrc_char(buf, &rng, -1, -1);
b7dbaa
+
b7dbaa
+   buf[1] = 'z';
b7dbaa
+   /* Bad: valid match, but CC undefined */
b7dbaa
+   test_vstrc_char(buf, &rng, 1, -1);
b7dbaa
+
b7dbaa
+   return 0;
b7dbaa
+}
b7dbaa
diff --git a/memcheck/tests/s390x/vstrc.stderr.exp b/memcheck/tests/s390x/vstrc.stderr.exp
b7dbaa
new file mode 100644
b7dbaa
index 000000000..c1125bea1
b7dbaa
--- /dev/null
b7dbaa
+++ b/memcheck/tests/s390x/vstrc.stderr.exp
b7dbaa
@@ -0,0 +1,20 @@
b7dbaa
+Use of uninitialised value of size 8
b7dbaa
+   at 0x........: test_vstrc_char (vstrc.c:43)
b7dbaa
+   by 0x........: main (vstrc.c:68)
b7dbaa
+
b7dbaa
+Use of uninitialised value of size 8
b7dbaa
+   at 0x........: test_vstrc_char (vstrc.c:47)
b7dbaa
+   by 0x........: main (vstrc.c:68)
b7dbaa
+
b7dbaa
+Use of uninitialised value of size 8
b7dbaa
+   at 0x........: test_vstrc_char (vstrc.c:43)
b7dbaa
+   by 0x........: main (vstrc.c:85)
b7dbaa
+
b7dbaa
+Use of uninitialised value of size 8
b7dbaa
+   at 0x........: test_vstrc_char (vstrc.c:47)
b7dbaa
+   by 0x........: main (vstrc.c:85)
b7dbaa
+
b7dbaa
+Use of uninitialised value of size 8
b7dbaa
+   at 0x........: test_vstrc_char (vstrc.c:47)
b7dbaa
+   by 0x........: main (vstrc.c:89)
b7dbaa
+
b7dbaa
diff --git a/memcheck/tests/s390x/vstrc.stdout.exp b/memcheck/tests/s390x/vstrc.stdout.exp
b7dbaa
new file mode 100644
b7dbaa
index 000000000..e69de29bb
b7dbaa
diff --git a/memcheck/tests/s390x/vstrc.vgtest b/memcheck/tests/s390x/vstrc.vgtest
b7dbaa
new file mode 100644
b7dbaa
index 000000000..26f5db99b
b7dbaa
--- /dev/null
b7dbaa
+++ b/memcheck/tests/s390x/vstrc.vgtest
b7dbaa
@@ -0,0 +1,2 @@
b7dbaa
+prog: vstrc
b7dbaa
+vgopts: -q
b7dbaa
b7dbaa
commit a0bb049ace14ab52d386bb1d49a399f39eec4986
b7dbaa
Author: Andreas Arnez <arnez@linux.ibm.com>
b7dbaa
Date:   Tue Mar 23 14:55:09 2021 +0100
b7dbaa
b7dbaa
    s390x: Improve handling of amodes without base register
b7dbaa
    
b7dbaa
    Addressing modes without a base or index register represent constants.
b7dbaa
    They can occur in some special cases such as shift operations and when
b7dbaa
    accessing individual vector elements.  Perform some minor improvements to
b7dbaa
    the handling of such amodes.
b7dbaa
b7dbaa
diff --git a/VEX/priv/host_s390_defs.c b/VEX/priv/host_s390_defs.c
b7dbaa
index 6e0734ae0..2587f81a1 100644
b7dbaa
--- a/VEX/priv/host_s390_defs.c
b7dbaa
+++ b/VEX/priv/host_s390_defs.c
b7dbaa
@@ -360,7 +360,8 @@ s390_amode_is_sane(const s390_amode *am)
b7dbaa
 {
b7dbaa
    switch (am->tag) {
b7dbaa
    case S390_AMODE_B12:
b7dbaa
-      return is_virtual_gpr(am->b) && fits_unsigned_12bit(am->d);
b7dbaa
+      return (is_virtual_gpr(am->b) || sameHReg(am->b, s390_hreg_gpr(0))) &&
b7dbaa
+             fits_unsigned_12bit(am->d);
b7dbaa
 
b7dbaa
    case S390_AMODE_B20:
b7dbaa
       return is_virtual_gpr(am->b) && fits_signed_20bit(am->d);
b7dbaa
@@ -378,47 +379,31 @@ s390_amode_is_sane(const s390_amode *am)
b7dbaa
    }
b7dbaa
 }
b7dbaa
 
b7dbaa
+static Bool
b7dbaa
+s390_amode_is_constant(const s390_amode *am)
b7dbaa
+{
b7dbaa
+   return am->tag == S390_AMODE_B12 && sameHReg(am->b, s390_hreg_gpr(0));
b7dbaa
+}
b7dbaa
+
b7dbaa
 
b7dbaa
 /* Record the register use of an amode */
b7dbaa
 static void
b7dbaa
 s390_amode_get_reg_usage(HRegUsage *u, const s390_amode *am)
b7dbaa
 {
b7dbaa
-   switch (am->tag) {
b7dbaa
-   case S390_AMODE_B12:
b7dbaa
-   case S390_AMODE_B20:
b7dbaa
-      addHRegUse(u, HRmRead, am->b);
b7dbaa
-      return;
b7dbaa
-
b7dbaa
-   case S390_AMODE_BX12:
b7dbaa
-   case S390_AMODE_BX20:
b7dbaa
+   if (!sameHReg(am->b, s390_hreg_gpr(0)))
b7dbaa
       addHRegUse(u, HRmRead, am->b);
b7dbaa
+   if (!sameHReg(am->x, s390_hreg_gpr(0)))
b7dbaa
       addHRegUse(u, HRmRead, am->x);
b7dbaa
-      return;
b7dbaa
-
b7dbaa
-   default:
b7dbaa
-      vpanic("s390_amode_get_reg_usage");
b7dbaa
-   }
b7dbaa
 }
b7dbaa
 
b7dbaa
 
b7dbaa
 static void
b7dbaa
 s390_amode_map_regs(HRegRemap *m, s390_amode *am)
b7dbaa
 {
b7dbaa
-   switch (am->tag) {
b7dbaa
-   case S390_AMODE_B12:
b7dbaa
-   case S390_AMODE_B20:
b7dbaa
-      am->b = lookupHRegRemap(m, am->b);
b7dbaa
-      return;
b7dbaa
-
b7dbaa
-   case S390_AMODE_BX12:
b7dbaa
-   case S390_AMODE_BX20:
b7dbaa
+   if (!sameHReg(am->b, s390_hreg_gpr(0)))
b7dbaa
       am->b = lookupHRegRemap(m, am->b);
b7dbaa
+   if (!sameHReg(am->x, s390_hreg_gpr(0)))
b7dbaa
       am->x = lookupHRegRemap(m, am->x);
b7dbaa
-      return;
b7dbaa
-
b7dbaa
-   default:
b7dbaa
-      vpanic("s390_amode_map_regs");
b7dbaa
-   }
b7dbaa
 }
b7dbaa
 
b7dbaa
 
b7dbaa
@@ -653,6 +638,16 @@ directReload_S390(HInstr* i, HReg vreg, Short spill_off)
b7dbaa
                            insn->variant.alu.dst, vreg_opnd);
b7dbaa
    }
b7dbaa
 
b7dbaa
+   /* v-vgetelem <reg>,<vreg> */
b7dbaa
+   if (insn->tag == S390_INSN_VEC_AMODEOP
b7dbaa
+       && insn->variant.vec_amodeop.tag == S390_VEC_GET_ELEM
b7dbaa
+       && insn->size == 8
b7dbaa
+       && sameHReg(insn->variant.vec_amodeop.op1, vreg)
b7dbaa
+       && s390_amode_is_constant(insn->variant.vec_amodeop.op2)) {
b7dbaa
+      vreg_am->d += 8 * insn->variant.vec_amodeop.op2->d;
b7dbaa
+      return s390_insn_load(insn->size, insn->variant.vec_amodeop.dst, vreg_am);
b7dbaa
+   }
b7dbaa
+
b7dbaa
    /* v-<unop> <reg>,<vreg> */
b7dbaa
    if (insn->tag == S390_INSN_UNOP
b7dbaa
        && insn->variant.unop.src.tag == S390_OPND_REG
b7dbaa
diff --git a/VEX/priv/host_s390_isel.c b/VEX/priv/host_s390_isel.c
b7dbaa
index 5f79280c0..ceca6836e 100644
b7dbaa
--- a/VEX/priv/host_s390_isel.c
b7dbaa
+++ b/VEX/priv/host_s390_isel.c
b7dbaa
@@ -312,7 +312,18 @@ s390_isel_amode_wrk(ISelEnv *env, IRExpr *expr,
b7dbaa
                     Bool no_index __attribute__((unused)),
b7dbaa
                     Bool short_displacement)
b7dbaa
 {
b7dbaa
-   if (expr->tag == Iex_Binop && expr->Iex.Binop.op == Iop_Add64) {
b7dbaa
+   if (expr->tag == Iex_Unop && expr->Iex.Unop.op == Iop_8Uto64 &&
b7dbaa
+       expr->Iex.Unop.arg->tag == Iex_Const) {
b7dbaa
+      UChar value = expr->Iex.Unop.arg->Iex.Const.con->Ico.U8;
b7dbaa
+      return s390_amode_b12((Int)value, s390_hreg_gpr(0));
b7dbaa
+
b7dbaa
+   } else if (expr->tag == Iex_Const) {
b7dbaa
+      ULong value = expr->Iex.Const.con->Ico.U64;
b7dbaa
+      if (ulong_fits_unsigned_12bit(value)) {
b7dbaa
+         return s390_amode_b12((Int)value, s390_hreg_gpr(0));
b7dbaa
+      }
b7dbaa
+
b7dbaa
+   } else if (expr->tag == Iex_Binop && expr->Iex.Binop.op == Iop_Add64) {
b7dbaa
       IRExpr *arg1 = expr->Iex.Binop.arg1;
b7dbaa
       IRExpr *arg2 = expr->Iex.Binop.arg2;
b7dbaa
 
b7dbaa
b7dbaa
commit fd935e238d907d9c523a311ba795077d95ad6912
b7dbaa
Author: Andreas Arnez <arnez@linux.ibm.com>
b7dbaa
Date:   Fri Mar 26 19:27:47 2021 +0100
b7dbaa
b7dbaa
    s390x: Rework insn "v-vdup" and add "v-vrep"
b7dbaa
    
b7dbaa
    So far the only s390x insn for filling a vector with copies of the same
b7dbaa
    element is "v-vdup" (S390_VEC_DUPLICATE), which replicates the first
b7dbaa
    element of its vector argument.  This is fairly restrictive and can lead
b7dbaa
    to unnecessarily long code sequences.
b7dbaa
    
b7dbaa
    Redefine "v-vdup" to replicate any scalar value instead.  And add
b7dbaa
    "v-vrep" (S390_INSN_VEC_REPLICATE) for replicating any given element of a
b7dbaa
    vector.  Select the latter for suitable expressions like
b7dbaa
    
b7dbaa
      Iop_Dup8x16(Iop_GetElem8x16(vector_expr, i))
b7dbaa
    
b7dbaa
    This improves the generated code for some vector string instructions,
b7dbaa
    where a lot of element replications are performed.
b7dbaa
b7dbaa
diff --git a/VEX/priv/host_s390_defs.c b/VEX/priv/host_s390_defs.c
b7dbaa
index 2587f81a1..c764d6ef9 100644
b7dbaa
--- a/VEX/priv/host_s390_defs.c
b7dbaa
+++ b/VEX/priv/host_s390_defs.c
b7dbaa
@@ -670,6 +670,14 @@ directReload_S390(HInstr* i, HReg vreg, Short spill_off)
b7dbaa
                             insn->variant.unop.dst, vreg_opnd);
b7dbaa
    }
b7dbaa
 
b7dbaa
+   /* v-vrep <reg>,<vreg>,<idx> */
b7dbaa
+   if (insn->tag == S390_INSN_VEC_REPLICATE
b7dbaa
+       && sameHReg(insn->variant.vec_replicate.op1, vreg)) {
b7dbaa
+      vreg_am->d += insn->size * insn->variant.vec_replicate.idx;
b7dbaa
+      return s390_insn_unop(insn->size, S390_VEC_DUPLICATE,
b7dbaa
+                            insn->variant.vec_replicate.dst, vreg_opnd);
b7dbaa
+   }
b7dbaa
+
b7dbaa
 no_match:
b7dbaa
    return NULL;
b7dbaa
 }
b7dbaa
@@ -1050,6 +1058,11 @@ s390_insn_get_reg_usage(HRegUsage *u, const s390_insn *insn)
b7dbaa
       addHRegUse(u, HRmRead, insn->variant.vec_triop.op3);
b7dbaa
       break;
b7dbaa
 
b7dbaa
+   case S390_INSN_VEC_REPLICATE:
b7dbaa
+      addHRegUse(u, HRmWrite, insn->variant.vec_replicate.dst);
b7dbaa
+      addHRegUse(u, HRmRead, insn->variant.vec_replicate.op1);
b7dbaa
+      break;
b7dbaa
+
b7dbaa
    default:
b7dbaa
       vpanic("s390_insn_get_reg_usage");
b7dbaa
    }
b7dbaa
@@ -1433,6 +1446,14 @@ s390_insn_map_regs(HRegRemap *m, s390_insn *insn)
b7dbaa
       insn->variant.vec_triop.op3 =
b7dbaa
          lookupHRegRemap(m, insn->variant.vec_triop.op3);
b7dbaa
       break;
b7dbaa
+
b7dbaa
+   case S390_INSN_VEC_REPLICATE:
b7dbaa
+      insn->variant.vec_replicate.dst =
b7dbaa
+         lookupHRegRemap(m, insn->variant.vec_replicate.dst);
b7dbaa
+      insn->variant.vec_replicate.op1 =
b7dbaa
+         lookupHRegRemap(m, insn->variant.vec_replicate.op1);
b7dbaa
+      break;
b7dbaa
+
b7dbaa
    default:
b7dbaa
       vpanic("s390_insn_map_regs");
b7dbaa
    }
b7dbaa
@@ -1767,7 +1788,39 @@ emit_VRI_VI(UChar *p, ULong op, UChar v1, UShort i2)
b7dbaa
 
b7dbaa
 
b7dbaa
 static UChar *
b7dbaa
-emit_VRX(UChar *p, ULong op, UChar v1, UChar x2, UChar b2, UShort d2)
b7dbaa
+emit_VRI_VIM(UChar *p, ULong op, UChar v1, UShort i2, UChar m3)
b7dbaa
+{
b7dbaa
+   ULong the_insn = op;
b7dbaa
+   ULong rxb = s390_update_rxb(0, 1, &v1;;
b7dbaa
+
b7dbaa
+   the_insn |= ((ULong)v1) << 36;
b7dbaa
+   the_insn |= ((ULong)i2) << 16;
b7dbaa
+   the_insn |= ((ULong)m3) << 12;
b7dbaa
+   the_insn |= ((ULong)rxb)<< 8;
b7dbaa
+
b7dbaa
+   return emit_6bytes(p, the_insn);
b7dbaa
+}
b7dbaa
+
b7dbaa
+
b7dbaa
+static UChar *
b7dbaa
+emit_VRI_VVMM(UChar *p, ULong op, UChar v1, UChar v3, UShort i2, UChar m4)
b7dbaa
+{
b7dbaa
+   ULong the_insn = op;
b7dbaa
+   ULong rxb = s390_update_rxb(0, 1, &v1;;
b7dbaa
+   rxb = s390_update_rxb(rxb, 2, &v3;;
b7dbaa
+
b7dbaa
+   the_insn |= ((ULong)v1) << 36;
b7dbaa
+   the_insn |= ((ULong)v3) << 32;
b7dbaa
+   the_insn |= ((ULong)i2) << 16;
b7dbaa
+   the_insn |= ((ULong)m4) << 12;
b7dbaa
+   the_insn |= ((ULong)rxb) << 8;
b7dbaa
+
b7dbaa
+   return emit_6bytes(p, the_insn);
b7dbaa
+}
b7dbaa
+
b7dbaa
+
b7dbaa
+static UChar *
b7dbaa
+emit_VRX(UChar *p, ULong op, UChar v1, UChar x2, UChar b2, UShort d2, UChar m3)
b7dbaa
 {
b7dbaa
    ULong the_insn = op;
b7dbaa
    ULong rxb = s390_update_rxb(0, 1, &v1;;
b7dbaa
@@ -1776,6 +1829,7 @@ emit_VRX(UChar *p, ULong op, UChar v1, UChar x2, UChar b2, UShort d2)
b7dbaa
    the_insn |= ((ULong)x2) << 32;
b7dbaa
    the_insn |= ((ULong)b2) << 28;
b7dbaa
    the_insn |= ((ULong)d2) << 16;
b7dbaa
+   the_insn |= ((ULong)m3) << 12;
b7dbaa
    the_insn |= ((ULong)rxb)<< 8;
b7dbaa
 
b7dbaa
    return emit_6bytes(p, the_insn);
b7dbaa
@@ -5782,7 +5836,7 @@ s390_emit_VL(UChar *p, UChar v1, UChar x2, UChar b2, UShort d2)
b7dbaa
    if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
b7dbaa
       s390_disasm(ENC3(MNM, VR, UDXB), "vl", v1, d2, x2, b2);
b7dbaa
 
b7dbaa
-   return emit_VRX(p, 0xE70000000006ULL, v1, x2, b2, d2);
b7dbaa
+   return emit_VRX(p, 0xE70000000006ULL, v1, x2, b2, d2, 0);
b7dbaa
 }
b7dbaa
 
b7dbaa
 static UChar *
b7dbaa
@@ -5795,13 +5849,23 @@ s390_emit_VLR(UChar *p, UChar v1, UChar v2)
b7dbaa
 }
b7dbaa
 
b7dbaa
 
b7dbaa
+static UChar *
b7dbaa
+s390_emit_VLREP(UChar *p, UChar v1, UChar x2, UChar b2, UShort d2, UShort m3)
b7dbaa
+{
b7dbaa
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
b7dbaa
+      s390_disasm(ENC4(MNM, VR, UDXB, UINT), "vlrep", v1, d2, x2, b2, m3);
b7dbaa
+
b7dbaa
+   return emit_VRX(p, 0xE70000000005ULL, v1, x2, b2, d2, m3);
b7dbaa
+}
b7dbaa
+
b7dbaa
+
b7dbaa
 static UChar *
b7dbaa
 s390_emit_VST(UChar *p, UChar v1, UChar x2, UChar b2, UShort d2)
b7dbaa
 {
b7dbaa
    if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
b7dbaa
       s390_disasm(ENC3(MNM, VR, UDXB), "vst", v1, d2, x2, b2);
b7dbaa
 
b7dbaa
-   return emit_VRX(p, 0xE7000000000eULL, v1, x2, b2, d2);
b7dbaa
+   return emit_VRX(p, 0xE7000000000eULL, v1, x2, b2, d2, 0);
b7dbaa
 }
b7dbaa
 
b7dbaa
 
b7dbaa
@@ -5912,15 +5976,24 @@ s390_emit_VPKLS(UChar *p, UChar v1, UChar v2, UChar v3, UChar m4)
b7dbaa
 
b7dbaa
 
b7dbaa
 static UChar *
b7dbaa
-s390_emit_VREP(UChar *p, UChar v1, UChar v3, UChar m3)
b7dbaa
+s390_emit_VREP(UChar *p, UChar v1, UChar v3, UShort i2, UChar m4)
b7dbaa
 {
b7dbaa
    if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
b7dbaa
-      s390_disasm(ENC5(MNM, VR, VR, UINT, UINT), "vrep", v1, v3, 0, m3);
b7dbaa
+      s390_disasm(ENC5(MNM, VR, VR, UINT, UINT), "vrep", v1, v3, i2, m4);
b7dbaa
 
b7dbaa
-   return emit_VRR_VVM(p, 0xE7000000004DULL, v1, v3, m3);
b7dbaa
+   return emit_VRI_VVMM(p, 0xE7000000004DULL, v1, v3, i2, m4);
b7dbaa
 }
b7dbaa
 
b7dbaa
 
b7dbaa
+static UChar *
b7dbaa
+s390_emit_VREPI(UChar *p, UChar v1, UShort i2, UChar m3)
b7dbaa
+{
b7dbaa
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
b7dbaa
+      s390_disasm(ENC4(MNM, VR, UINT, UINT), "vrepi", v1, i2, m3);
b7dbaa
+
b7dbaa
+   return emit_VRI_VIM(p, 0xE70000000045ULL, v1, i2, m3);
b7dbaa
+}
b7dbaa
+
b7dbaa
 
b7dbaa
 static UChar *
b7dbaa
 s390_emit_VUPH(UChar *p, UChar v1, UChar v3, UChar m3)
b7dbaa
@@ -7560,6 +7633,20 @@ s390_insn *s390_insn_vec_triop(UChar size, s390_vec_triop_t tag, HReg dst,
b7dbaa
    return insn;
b7dbaa
 }
b7dbaa
 
b7dbaa
+s390_insn *s390_insn_vec_replicate(UChar size, HReg dst, HReg op1,
b7dbaa
+                                   UChar idx)
b7dbaa
+{
b7dbaa
+   s390_insn *insn = LibVEX_Alloc_inline(sizeof(s390_insn));
b7dbaa
+
b7dbaa
+   insn->tag  = S390_INSN_VEC_REPLICATE;
b7dbaa
+   insn->size = size;
b7dbaa
+   insn->variant.vec_replicate.dst = dst;
b7dbaa
+   insn->variant.vec_replicate.op1 = op1;
b7dbaa
+   insn->variant.vec_replicate.idx = idx;
b7dbaa
+
b7dbaa
+   return insn;
b7dbaa
+}
b7dbaa
+
b7dbaa
 /*---------------------------------------------------------------*/
b7dbaa
 /*--- Debug print                                             ---*/
b7dbaa
 /*---------------------------------------------------------------*/
b7dbaa
@@ -8284,6 +8371,13 @@ s390_insn_as_string(const s390_insn *insn)
b7dbaa
                    insn->variant.vec_triop.op3);
b7dbaa
       break;
b7dbaa
 
b7dbaa
+   case S390_INSN_VEC_REPLICATE:
b7dbaa
+      s390_sprintf(buf, "%M %R, %R, %I", "v-vrep",
b7dbaa
+                   insn->variant.vec_replicate.dst,
b7dbaa
+                   insn->variant.vec_replicate.op1,
b7dbaa
+                   insn->variant.vec_replicate.idx);
b7dbaa
+      break;
b7dbaa
+
b7dbaa
    default: goto fail;
b7dbaa
    }
b7dbaa
 
b7dbaa
@@ -9386,6 +9480,56 @@ s390_negate_emit(UChar *buf, const s390_insn *insn)
b7dbaa
 }
b7dbaa
 
b7dbaa
 
b7dbaa
+static UChar *
b7dbaa
+s390_vec_duplicate_emit(UChar *buf, const s390_insn *insn)
b7dbaa
+{
b7dbaa
+   UChar v1 = hregNumber(insn->variant.unop.dst);
b7dbaa
+   s390_opnd_RMI opnd = insn->variant.unop.src;
b7dbaa
+   UChar r2;
b7dbaa
+
b7dbaa
+   switch (opnd.tag) {
b7dbaa
+   case S390_OPND_AMODE: {
b7dbaa
+      s390_amode* am = opnd.variant.am;
b7dbaa
+      UInt b = hregNumber(am->b);
b7dbaa
+      UInt x = hregNumber(am->x);
b7dbaa
+      UInt d = am->d;
b7dbaa
+
b7dbaa
+      if (fits_unsigned_12bit(d)) {
b7dbaa
+         return s390_emit_VLREP(buf, v1, x, b, d,
b7dbaa
+                                s390_getM_from_size(insn->size));
b7dbaa
+      }
b7dbaa
+      buf = s390_emit_load_mem(buf, insn->size, R0, am);
b7dbaa
+      r2 = R0;
b7dbaa
+      goto duplicate_from_gpr;
b7dbaa
+   }
b7dbaa
+
b7dbaa
+   case S390_OPND_IMMEDIATE: {
b7dbaa
+      ULong val = opnd.variant.imm;
b7dbaa
+
b7dbaa
+      if (ulong_fits_signed_16bit(val)) {
b7dbaa
+         return s390_emit_VREPI(buf, v1, val, s390_getM_from_size(insn->size));
b7dbaa
+      }
b7dbaa
+      buf = s390_emit_load_64imm(buf, R0, val);
b7dbaa
+      r2 = R0;
b7dbaa
+      goto duplicate_from_gpr;
b7dbaa
+   }
b7dbaa
+
b7dbaa
+   case S390_OPND_REG:
b7dbaa
+      r2 = hregNumber(opnd.variant.reg);
b7dbaa
+
b7dbaa
+   duplicate_from_gpr:
b7dbaa
+      buf = s390_emit_VLVGP(buf, v1, r2, r2);
b7dbaa
+      if (insn->size != 8) {
b7dbaa
+         buf = s390_emit_VREP(buf, v1, v1, 8 / insn->size - 1,
b7dbaa
+                              s390_getM_from_size(insn->size));
b7dbaa
+      }
b7dbaa
+      return buf;
b7dbaa
+   }
b7dbaa
+
b7dbaa
+   vpanic("s390_vec_duplicate_emit");
b7dbaa
+}
b7dbaa
+
b7dbaa
+
b7dbaa
 static UChar *
b7dbaa
 s390_insn_unop_emit(UChar *buf, const s390_insn *insn)
b7dbaa
 {
b7dbaa
@@ -9405,12 +9549,7 @@ s390_insn_unop_emit(UChar *buf, const s390_insn *insn)
b7dbaa
       UShort i2 = insn->variant.unop.src.variant.imm;
b7dbaa
       return s390_emit_VGBM(buf, v1, i2);
b7dbaa
       }
b7dbaa
-   case S390_VEC_DUPLICATE: {
b7dbaa
-      vassert(insn->variant.unop.src.tag == S390_OPND_REG);
b7dbaa
-      UChar v1 = hregNumber(insn->variant.unop.dst);
b7dbaa
-      UChar v2 = hregNumber(insn->variant.unop.src.variant.reg);
b7dbaa
-      return s390_emit_VREP(buf, v1, v2, s390_getM_from_size(insn->size));
b7dbaa
-      }
b7dbaa
+   case S390_VEC_DUPLICATE:  return s390_vec_duplicate_emit(buf, insn);
b7dbaa
    case S390_VEC_UNPACKLOWS: {
b7dbaa
       vassert(insn->variant.unop.src.tag == S390_OPND_REG);
b7dbaa
       vassert(insn->size < 8);
b7dbaa
@@ -11595,6 +11734,16 @@ s390_insn_vec_triop_emit(UChar *buf, const s390_insn *insn)
b7dbaa
 }
b7dbaa
 
b7dbaa
 
b7dbaa
+static UChar *
b7dbaa
+s390_insn_vec_replicate_emit(UChar *buf, const s390_insn *insn)
b7dbaa
+{
b7dbaa
+   UChar v1 = hregNumber(insn->variant.vec_replicate.dst);
b7dbaa
+   UChar v2 = hregNumber(insn->variant.vec_replicate.op1);
b7dbaa
+   UShort idx = (UShort) insn->variant.vec_replicate.idx;
b7dbaa
+   return s390_emit_VREP(buf, v1, v2, idx, s390_getM_from_size(insn->size));
b7dbaa
+}
b7dbaa
+
b7dbaa
+
b7dbaa
 Int
b7dbaa
 emit_S390Instr(Bool *is_profinc, UChar *buf, Int nbuf, const s390_insn *insn,
b7dbaa
                Bool mode64, VexEndness endness_host,
b7dbaa
@@ -11791,6 +11940,11 @@ emit_S390Instr(Bool *is_profinc, UChar *buf, Int nbuf, const s390_insn *insn,
b7dbaa
    case S390_INSN_VEC_TRIOP:
b7dbaa
       end = s390_insn_vec_triop_emit(buf, insn);
b7dbaa
       break;
b7dbaa
+
b7dbaa
+   case S390_INSN_VEC_REPLICATE:
b7dbaa
+      end = s390_insn_vec_replicate_emit(buf, insn);
b7dbaa
+      break;
b7dbaa
+
b7dbaa
    fail:
b7dbaa
    default:
b7dbaa
       vpanic("emit_S390Instr");
b7dbaa
diff --git a/VEX/priv/host_s390_defs.h b/VEX/priv/host_s390_defs.h
b7dbaa
index 9b69f4d38..063fd3800 100644
b7dbaa
--- a/VEX/priv/host_s390_defs.h
b7dbaa
+++ b/VEX/priv/host_s390_defs.h
b7dbaa
@@ -166,7 +166,8 @@ typedef enum {
b7dbaa
    S390_INSN_VEC_AMODEINTOP,
b7dbaa
    S390_INSN_VEC_UNOP,
b7dbaa
    S390_INSN_VEC_BINOP,
b7dbaa
-   S390_INSN_VEC_TRIOP
b7dbaa
+   S390_INSN_VEC_TRIOP,
b7dbaa
+   S390_INSN_VEC_REPLICATE
b7dbaa
 } s390_insn_tag;
b7dbaa
 
b7dbaa
 
b7dbaa
@@ -738,6 +739,11 @@ typedef struct {
b7dbaa
          HReg          op2;    /* 128-bit second operand */
b7dbaa
          HReg          op3;    /* 128-bit third operand */
b7dbaa
       } vec_triop;
b7dbaa
+      struct {
b7dbaa
+         HReg          dst;    /* 128-bit result */
b7dbaa
+         HReg          op1;    /* 128-bit first operand */
b7dbaa
+         UChar         idx;    /* index of element to replicate */
b7dbaa
+      } vec_replicate;
b7dbaa
    } variant;
b7dbaa
 } s390_insn;
b7dbaa
 
b7dbaa
@@ -853,6 +859,7 @@ s390_insn *s390_insn_vec_binop(UChar size, s390_vec_binop_t, HReg dst, HReg op1,
b7dbaa
                                HReg op2);
b7dbaa
 s390_insn *s390_insn_vec_triop(UChar size, s390_vec_triop_t, HReg dst, HReg op1,
b7dbaa
                                HReg op2, HReg op3);
b7dbaa
+s390_insn *s390_insn_vec_replicate(UChar size, HReg dst, HReg op1, UChar idx);
b7dbaa
 
b7dbaa
 const HChar *s390_insn_as_string(const s390_insn *);
b7dbaa
 
b7dbaa
diff --git a/VEX/priv/host_s390_isel.c b/VEX/priv/host_s390_isel.c
b7dbaa
index ceca6836e..968122596 100644
b7dbaa
--- a/VEX/priv/host_s390_isel.c
b7dbaa
+++ b/VEX/priv/host_s390_isel.c
b7dbaa
@@ -3778,12 +3778,12 @@ s390_isel_vec_expr_wrk(ISelEnv *env, IRExpr *expr)
b7dbaa
    }
b7dbaa
    /* --------- UNARY OP --------- */
b7dbaa
    case Iex_Unop: {
b7dbaa
-      UChar size_for_int_arg = 0;
b7dbaa
       HReg dst = INVALID_HREG;
b7dbaa
       HReg reg1 = INVALID_HREG;
b7dbaa
       s390_unop_t vec_unop = S390_UNOP_T_INVALID;
b7dbaa
       s390_vec_binop_t vec_binop = S390_VEC_BINOP_T_INVALID;
b7dbaa
       IROp op = expr->Iex.Unop.op;
b7dbaa
+      IROp arg_op = Iop_INVALID;
b7dbaa
       IRExpr* arg = expr->Iex.Unop.arg;
b7dbaa
       switch(op) {
b7dbaa
       case Iop_NotV128:
b7dbaa
@@ -3839,59 +3839,63 @@ s390_isel_vec_expr_wrk(ISelEnv *env, IRExpr *expr)
b7dbaa
       }
b7dbaa
 
b7dbaa
       case Iop_Dup8x16:
b7dbaa
-         size = size_for_int_arg = 1;
b7dbaa
-         vec_unop = S390_VEC_DUPLICATE;
b7dbaa
-         goto Iop_V_int_wrk;
b7dbaa
+         size = 1;
b7dbaa
+         arg_op = Iop_GetElem8x16;
b7dbaa
+         goto Iop_V_dup_wrk;
b7dbaa
       case Iop_Dup16x8:
b7dbaa
-         size = size_for_int_arg = 2;
b7dbaa
-         vec_unop = S390_VEC_DUPLICATE;
b7dbaa
-         goto Iop_V_int_wrk;
b7dbaa
+         size = 2;
b7dbaa
+         arg_op = Iop_GetElem16x8;
b7dbaa
+         goto Iop_V_dup_wrk;
b7dbaa
       case Iop_Dup32x4:
b7dbaa
-         size = size_for_int_arg = 4;
b7dbaa
-         vec_unop = S390_VEC_DUPLICATE;
b7dbaa
-         goto Iop_V_int_wrk;
b7dbaa
+         size = 4;
b7dbaa
+         arg_op = Iop_GetElem32x4;
b7dbaa
+         goto Iop_V_dup_wrk;
b7dbaa
+
b7dbaa
+      Iop_V_dup_wrk: {
b7dbaa
+         dst = newVRegV(env);
b7dbaa
+         if (arg->tag == Iex_Binop && arg->Iex.Binop.op == arg_op &&
b7dbaa
+             arg->Iex.Binop.arg2->tag == Iex_Const) {
b7dbaa
+            ULong idx;
b7dbaa
+            idx = get_const_value_as_ulong(arg->Iex.Binop.arg2-> Iex.Const.con);
b7dbaa
+            reg1 = s390_isel_vec_expr(env, arg->Iex.Binop.arg1);
b7dbaa
+            addInstr(env, s390_insn_vec_replicate(size, dst, reg1, (UChar)idx));
b7dbaa
+         } else {
b7dbaa
+            s390_opnd_RMI src = s390_isel_int_expr_RMI(env, arg);
b7dbaa
+            addInstr(env, s390_insn_unop(size, S390_VEC_DUPLICATE, dst, src));
b7dbaa
+         }
b7dbaa
+         return dst;
b7dbaa
+      }
b7dbaa
 
b7dbaa
       case Iop_Widen8Sto16x8:
b7dbaa
          size = 1;
b7dbaa
-         size_for_int_arg = 8;
b7dbaa
          vec_unop = S390_VEC_UNPACKLOWS;
b7dbaa
-         goto Iop_V_int_wrk;
b7dbaa
+         goto Iop_V_widen_wrk;
b7dbaa
       case Iop_Widen16Sto32x4:
b7dbaa
          size = 2;
b7dbaa
-         size_for_int_arg = 8;
b7dbaa
          vec_unop = S390_VEC_UNPACKLOWS;
b7dbaa
-         goto Iop_V_int_wrk;
b7dbaa
+         goto Iop_V_widen_wrk;
b7dbaa
       case Iop_Widen32Sto64x2:
b7dbaa
          size = 4;
b7dbaa
-         size_for_int_arg = 8;
b7dbaa
          vec_unop = S390_VEC_UNPACKLOWS;
b7dbaa
-         goto Iop_V_int_wrk;
b7dbaa
+         goto Iop_V_widen_wrk;
b7dbaa
       case Iop_Widen8Uto16x8:
b7dbaa
          size = 1;
b7dbaa
-         size_for_int_arg = 8;
b7dbaa
          vec_unop = S390_VEC_UNPACKLOWU;
b7dbaa
-         goto Iop_V_int_wrk;
b7dbaa
+         goto Iop_V_widen_wrk;
b7dbaa
       case Iop_Widen16Uto32x4:
b7dbaa
          size = 2;
b7dbaa
-         size_for_int_arg = 8;
b7dbaa
          vec_unop = S390_VEC_UNPACKLOWU;
b7dbaa
-         goto Iop_V_int_wrk;
b7dbaa
+         goto Iop_V_widen_wrk;
b7dbaa
       case Iop_Widen32Uto64x2:
b7dbaa
          size = 4;
b7dbaa
-         size_for_int_arg = 8;
b7dbaa
          vec_unop = S390_VEC_UNPACKLOWU;
b7dbaa
-         goto Iop_V_int_wrk;
b7dbaa
-
b7dbaa
-      Iop_V_int_wrk: {
b7dbaa
-         HReg vr1 = vec_generate_zeroes(env);
b7dbaa
-         s390_amode* amode2 = s390_isel_amode(env, IRExpr_Const(IRConst_U64(0)));
b7dbaa
-         reg1 = s390_isel_int_expr(env, arg);
b7dbaa
+         goto Iop_V_widen_wrk;
b7dbaa
 
b7dbaa
+      Iop_V_widen_wrk: {
b7dbaa
          vassert(vec_unop != S390_UNOP_T_INVALID);
b7dbaa
-         addInstr(env,
b7dbaa
-                  s390_insn_vec_amodeintop(size_for_int_arg, S390_VEC_SET_ELEM,
b7dbaa
-                                           vr1, amode2, reg1));
b7dbaa
-
b7dbaa
+         s390_opnd_RMI src = s390_isel_int_expr_RMI(env, arg);
b7dbaa
+         HReg vr1 = newVRegV(env);
b7dbaa
+         addInstr(env, s390_insn_unop(8, S390_VEC_DUPLICATE, vr1, src));
b7dbaa
          dst = newVRegV(env);
b7dbaa
          addInstr(env, s390_insn_unop(size, vec_unop, dst, s390_opnd_reg(vr1)));
b7dbaa
          return dst;
b7dbaa
b7dbaa
commit 6c1cb1a0128b00858b973ef9344e12d6ddbaaf57
b7dbaa
Author: Andreas Arnez <arnez@linux.ibm.com>
b7dbaa
Date:   Thu Mar 25 18:48:07 2021 +0100
b7dbaa
b7dbaa
    s390x: Add support for emitting "vector or with complement"
b7dbaa
    
b7dbaa
    In the instruction selector, look out for IR expressions that fit "vector
b7dbaa
    or with complement (VOC)".  Emit when applicable.
b7dbaa
    
b7dbaa
    This slighly reduces the generated code sometimes, such as for certain
b7dbaa
    vector string instructions, where such expressions occur quite frequently.
b7dbaa
b7dbaa
diff --git a/VEX/priv/host_s390_defs.c b/VEX/priv/host_s390_defs.c
b7dbaa
index c764d6ef9..239d9d299 100644
b7dbaa
--- a/VEX/priv/host_s390_defs.c
b7dbaa
+++ b/VEX/priv/host_s390_defs.c
b7dbaa
@@ -5907,6 +5907,15 @@ s390_emit_VO(UChar *p, UChar v1, UChar v2, UChar v3)
b7dbaa
    return emit_VRR_VVV(p, 0xE7000000006aULL, v1, v2, v3);
b7dbaa
 }
b7dbaa
 
b7dbaa
+static UChar *
b7dbaa
+s390_emit_VOC(UChar *p, UChar v1, UChar v2, UChar v3)
b7dbaa
+{
b7dbaa
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
b7dbaa
+      s390_disasm(ENC4(MNM, VR, VR, VR), "voc", v1, v2, v3);
b7dbaa
+
b7dbaa
+   return emit_VRR_VVV(p, 0xE7000000006fULL, v1, v2, v3);
b7dbaa
+}
b7dbaa
+
b7dbaa
 static UChar *
b7dbaa
 s390_emit_VX(UChar *p, UChar v1, UChar v2, UChar v3)
b7dbaa
 {
b7dbaa
@@ -8312,6 +8321,7 @@ s390_insn_as_string(const s390_insn *insn)
b7dbaa
       case S390_VEC_PACK_SATURU:    op = "v-vpacksaturu"; break;
b7dbaa
       case S390_VEC_COMPARE_EQUAL:  op = "v-vcmpeq"; break;
b7dbaa
       case S390_VEC_OR:             op = "v-vor"; break;
b7dbaa
+      case S390_VEC_ORC:            op = "v-vorc"; break;
b7dbaa
       case S390_VEC_XOR:            op = "v-vxor";  break;
b7dbaa
       case S390_VEC_AND:            op = "v-vand"; break;
b7dbaa
       case S390_VEC_MERGEL:         op = "v-vmergel"; break;
b7dbaa
@@ -11609,6 +11619,8 @@ s390_insn_vec_binop_emit(UChar *buf, const s390_insn *insn)
b7dbaa
          return s390_emit_VCEQ(buf, v1, v2, v3, s390_getM_from_size(size));
b7dbaa
       case S390_VEC_OR:
b7dbaa
          return s390_emit_VO(buf, v1, v2, v3);
b7dbaa
+      case S390_VEC_ORC:
b7dbaa
+         return s390_emit_VOC(buf, v1, v2, v3);
b7dbaa
       case S390_VEC_XOR:
b7dbaa
          return s390_emit_VX(buf, v1, v2, v3);
b7dbaa
       case S390_VEC_AND:
b7dbaa
diff --git a/VEX/priv/host_s390_defs.h b/VEX/priv/host_s390_defs.h
b7dbaa
index 063fd3800..dc116106e 100644
b7dbaa
--- a/VEX/priv/host_s390_defs.h
b7dbaa
+++ b/VEX/priv/host_s390_defs.h
b7dbaa
@@ -366,6 +366,7 @@ typedef enum {
b7dbaa
    S390_VEC_PACK_SATURU,
b7dbaa
    S390_VEC_COMPARE_EQUAL,
b7dbaa
    S390_VEC_OR,
b7dbaa
+   S390_VEC_ORC,
b7dbaa
    S390_VEC_XOR,
b7dbaa
    S390_VEC_AND,
b7dbaa
    S390_VEC_MERGEL,
b7dbaa
diff --git a/VEX/priv/host_s390_isel.c b/VEX/priv/host_s390_isel.c
b7dbaa
index 968122596..53d76fe8a 100644
b7dbaa
--- a/VEX/priv/host_s390_isel.c
b7dbaa
+++ b/VEX/priv/host_s390_isel.c
b7dbaa
@@ -4102,6 +4102,15 @@ s390_isel_vec_expr_wrk(ISelEnv *env, IRExpr *expr)
b7dbaa
       case Iop_OrV128:
b7dbaa
          size = 16;
b7dbaa
          vec_binop = S390_VEC_OR;
b7dbaa
+         if (arg1->tag == Iex_Unop && arg1->Iex.Unop.op == Iop_NotV128) {
b7dbaa
+            IRExpr* orig_arg1 = arg1;
b7dbaa
+            arg1 = arg2;
b7dbaa
+            arg2 = orig_arg1->Iex.Unop.arg;
b7dbaa
+            vec_binop = S390_VEC_ORC;
b7dbaa
+         } else if (arg2->tag == Iex_Unop && arg2->Iex.Unop.op == Iop_NotV128) {
b7dbaa
+            arg2 = arg2->Iex.Unop.arg;
b7dbaa
+            vec_binop = S390_VEC_ORC;
b7dbaa
+         }
b7dbaa
          goto Iop_VV_wrk;
b7dbaa
 
b7dbaa
       case Iop_XorV128:
b7dbaa
b7dbaa
commit 0bd4263326b2d48f782339a9bbe1a069c7de45c7
b7dbaa
Author: Andreas Arnez <arnez@linux.ibm.com>
b7dbaa
Date:   Tue Mar 30 17:45:20 2021 +0200
b7dbaa
b7dbaa
    s390x: Fix/optimize Iop_64HLtoV128
b7dbaa
    
b7dbaa
    In s390_vr_fill() in guest_s390_toIR.c, filling a vector with two copies
b7dbaa
    of a 64-bit value is realized with Iop_64HLtoV128, since there is no such
b7dbaa
    operator as Iop_Dup64x2.  But the two args to Iop_64HLtoV128 use the same
b7dbaa
    expression, referenced twice.  Although this hasn't been seen to cause
b7dbaa
    real trouble yet, it's problematic and potentially inefficient, so change
b7dbaa
    it: Assign to a temp and pass that twice instead.
b7dbaa
    
b7dbaa
    In the instruction selector, if Iop_64HLtoV128 is found to be used for a
b7dbaa
    duplication as above, select "v-vdup" instead of "v-vinitfromgprs".  This
b7dbaa
    mimicks the behavior we'd get if there actually was an operator
b7dbaa
    Iop_Dup64x2.
b7dbaa
b7dbaa
diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c
b7dbaa
index dfea54259..a73dcfb14 100644
b7dbaa
--- a/VEX/priv/guest_s390_toIR.c
b7dbaa
+++ b/VEX/priv/guest_s390_toIR.c
b7dbaa
@@ -2299,9 +2299,12 @@ s390_vr_fill(UChar v1, IRExpr *o2)
b7dbaa
    case Ity_I32:
b7dbaa
       put_vr_qw(v1, unop(Iop_Dup32x4, o2));
b7dbaa
       break;
b7dbaa
-   case Ity_I64:
b7dbaa
-      put_vr_qw(v1, binop(Iop_64HLtoV128, o2, o2));
b7dbaa
+   case Ity_I64: {
b7dbaa
+      IRTemp val = newTemp(Ity_I64);
b7dbaa
+      assign(val, o2);
b7dbaa
+      put_vr_qw(v1, binop(Iop_64HLtoV128, mkexpr(val), mkexpr(val)));
b7dbaa
       break;
b7dbaa
+   }
b7dbaa
    default:
b7dbaa
       ppIRType(o2type);
b7dbaa
       vpanic("s390_vr_fill: invalid IRType");
b7dbaa
diff --git a/VEX/priv/host_s390_isel.c b/VEX/priv/host_s390_isel.c
b7dbaa
index 53d76fe8a..ee20c6711 100644
b7dbaa
--- a/VEX/priv/host_s390_isel.c
b7dbaa
+++ b/VEX/priv/host_s390_isel.c
b7dbaa
@@ -4662,12 +4662,16 @@ s390_isel_vec_expr_wrk(ISelEnv *env, IRExpr *expr)
b7dbaa
       }
b7dbaa
 
b7dbaa
       case Iop_64HLtoV128:
b7dbaa
-         reg1 = s390_isel_int_expr(env, arg1);
b7dbaa
-         reg2 = s390_isel_int_expr(env, arg2);
b7dbaa
-
b7dbaa
-         addInstr(env, s390_insn_vec_binop(size, S390_VEC_INIT_FROM_GPRS,
b7dbaa
-                  dst, reg1, reg2));
b7dbaa
-
b7dbaa
+         if (arg1->tag == Iex_RdTmp && arg2->tag == Iex_RdTmp &&
b7dbaa
+             arg1->Iex.RdTmp.tmp == arg2->Iex.RdTmp.tmp) {
b7dbaa
+            s390_opnd_RMI src = s390_isel_int_expr_RMI(env, arg1);
b7dbaa
+            addInstr(env, s390_insn_unop(8, S390_VEC_DUPLICATE, dst, src));
b7dbaa
+         } else {
b7dbaa
+            reg1 = s390_isel_int_expr(env, arg1);
b7dbaa
+            reg2 = s390_isel_int_expr(env, arg2);
b7dbaa
+            addInstr(env, s390_insn_vec_binop(size, S390_VEC_INIT_FROM_GPRS,
b7dbaa
+                                              dst, reg1, reg2));
b7dbaa
+         }
b7dbaa
          return dst;
b7dbaa
 
b7dbaa
       default:
b7dbaa
b7dbaa
commit cae5062b05b95e0303b1122a0ea9aadc197e4f0a
b7dbaa
Author: Andreas Arnez <arnez@linux.ibm.com>
b7dbaa
Date:   Fri May 7 18:13:03 2021 +0200
b7dbaa
b7dbaa
    s390x: Add missing stdout.exp for vector string memcheck test
b7dbaa
    
b7dbaa
    The file vistr.stdout.exp was missing from commit 32312d588.  Add it.
b7dbaa
b7dbaa
diff --git a/memcheck/tests/s390x/vistr.stdout.exp b/memcheck/tests/s390x/vistr.stdout.exp
b7dbaa
new file mode 100644
b7dbaa
index 000000000..e69de29bb