90dfed
commit d74a637206ef5532ccd2ccb2e31ee2762f184e60
90dfed
Author: Andreas Arnez <arnez@linux.ibm.com>
90dfed
Date:   Wed Apr 28 18:52:30 2021 +0200
90dfed
90dfed
    Bug 433863 - s390x: Remove memcheck test cases for cs, cds, and csg
90dfed
    
90dfed
    The fix for bug 429864 - "s390x: C++ atomic test_and_set yields
90dfed
    false-positive memcheck diagnostics" changes the memcheck behavior at
90dfed
    various compare-and-swap instructions.  The comparison between the old and
90dfed
    expected value now always yields a defined result, even if the input
90dfed
    values are (partially) undefined.  However, some existing test cases
90dfed
    explicitly verify that memcheck complains about the use of uninitialised
90dfed
    values here.  These test cases are no longer valid.  Remove them.
90dfed
90dfed
diff --git a/memcheck/tests/s390x/Makefile.am b/memcheck/tests/s390x/Makefile.am
90dfed
index 67ae8c293..e4e69eb38 100644
90dfed
--- a/memcheck/tests/s390x/Makefile.am
90dfed
+++ b/memcheck/tests/s390x/Makefile.am
90dfed
@@ -2,7 +2,7 @@ include $(top_srcdir)/Makefile.tool-tests.am
90dfed
 
90dfed
 dist_noinst_SCRIPTS = filter_stderr
90dfed
 
90dfed
-INSN_TESTS = cs csg cds cdsg cu21 cu42 ltgjhe
90dfed
+INSN_TESTS = cdsg cu21 cu42 ltgjhe
90dfed
 
90dfed
 check_PROGRAMS = $(INSN_TESTS) 
90dfed
 
90dfed
@@ -14,7 +14,3 @@ EXTRA_DIST = \
90dfed
 AM_CFLAGS    += @FLAG_M64@
90dfed
 AM_CXXFLAGS  += @FLAG_M64@
90dfed
 AM_CCASFLAGS += @FLAG_M64@
90dfed
-
90dfed
-cs_CFLAGS     = $(AM_CFLAGS) @FLAG_W_NO_UNINITIALIZED@
90dfed
-csg_CFLAGS    = $(AM_CFLAGS) @FLAG_W_NO_UNINITIALIZED@
90dfed
-cds_CFLAGS    = $(AM_CFLAGS) @FLAG_W_NO_UNINITIALIZED@
90dfed
diff --git a/memcheck/tests/s390x/cds.c b/memcheck/tests/s390x/cds.c
90dfed
deleted file mode 100644
90dfed
index ec5c533e0..000000000
90dfed
--- a/memcheck/tests/s390x/cds.c
90dfed
+++ /dev/null
90dfed
@@ -1,82 +0,0 @@
90dfed
-#include <stdint.h>
90dfed
-#include <stdio.h>
90dfed
-
90dfed
-typedef struct {
90dfed
-   uint64_t high;
90dfed
-   uint64_t low;
90dfed
-} quad_word;
90dfed
-
90dfed
-void 
90dfed
-test(quad_word op1_init, uint64_t op2_init, quad_word op3_init)
90dfed
-{
90dfed
-   int cc; // unused
90dfed
-   quad_word op1 = op1_init;
90dfed
-   uint64_t  op2 = op2_init;
90dfed
-   quad_word op3 = op3_init;
90dfed
-
90dfed
-   __asm__ volatile (
90dfed
-                     "lmg     %%r0,%%r1,%1\n\t"
90dfed
-                     "lmg     %%r2,%%r3,%3\n\t"
90dfed
-                     "cds     %%r0,%%r2,%2\n\t"  //  cds 1st,3rd,2nd
90dfed
-                     "stmg    %%r0,%%r1,%1\n"    // store r0,r1 to op1
90dfed
-                     "stmg    %%r2,%%r3,%3\n"    // store r2,r3 to op3
90dfed
-                     : "=d" (cc), "+QS" (op1), "+QS" (op2), "+QS" (op3)
90dfed
-                     :
90dfed
-                     : "r0", "r1", "r2", "r3", "cc");
90dfed
-
90dfed
-}
90dfed
-
90dfed
-// Return a quad-word that only bits low[32:63] are undefined
90dfed
-quad_word
90dfed
-make_undefined(void)
90dfed
-{
90dfed
-   quad_word val;
90dfed
-
90dfed
-   val.high = 0;
90dfed
-   val.low |= 0xFFFFFFFF00000000ull;
90dfed
-
90dfed
-   return val;
90dfed
-}
90dfed
-
90dfed
-void op1_undefined(void)
90dfed
-{
90dfed
-   quad_word op1, op3;
90dfed
-   uint64_t op2;
90dfed
-
90dfed
-   // op1 undefined
90dfed
-   op1 = make_undefined();
90dfed
-   op2 = 42;
90dfed
-   op3.high = op3.low = 0xdeadbeefdeadbabeull;
90dfed
-   test(op1, op2, op3);  // complaint
90dfed
-}
90dfed
-
90dfed
-void op2_undefined(void)
90dfed
-{
90dfed
-   quad_word op1, op3;
90dfed
-   uint64_t op2;
90dfed
-
90dfed
-   op1.high = op1.low = 42;
90dfed
-   // op2 undefined
90dfed
-   op3.high = op3.low = 0xdeadbeefdeadbabeull;
90dfed
-   test(op1, op2, op3);  // complaint
90dfed
-}
90dfed
-
90dfed
-void op3_undefined(void)
90dfed
-{
90dfed
-   quad_word op1, op3;
90dfed
-   uint64_t op2;
90dfed
-
90dfed
-   op1.high = op1.low = 42;
90dfed
-   op2 = 100;
90dfed
-   op3 = make_undefined();
90dfed
-   test(op1, op2, op3);  // no complaint; op3 is just copied around
90dfed
-}
90dfed
-
90dfed
-int main ()
90dfed
-{
90dfed
-   op1_undefined();
90dfed
-   op2_undefined();
90dfed
-   op3_undefined();
90dfed
-
90dfed
-   return 0;
90dfed
-}
90dfed
diff --git a/memcheck/tests/s390x/cds.stderr.exp b/memcheck/tests/s390x/cds.stderr.exp
90dfed
deleted file mode 100644
90dfed
index e72de94c8..000000000
90dfed
--- a/memcheck/tests/s390x/cds.stderr.exp
90dfed
+++ /dev/null
90dfed
@@ -1,10 +0,0 @@
90dfed
-Conditional jump or move depends on uninitialised value(s)
90dfed
-   at 0x........: test (cds.c:17)
90dfed
-   by 0x........: op1_undefined (cds.c:50)
90dfed
-   by 0x........: main (cds.c:77)
90dfed
-
90dfed
-Conditional jump or move depends on uninitialised value(s)
90dfed
-   at 0x........: test (cds.c:17)
90dfed
-   by 0x........: op2_undefined (cds.c:61)
90dfed
-   by 0x........: main (cds.c:78)
90dfed
-
90dfed
diff --git a/memcheck/tests/s390x/cds.stdout.exp b/memcheck/tests/s390x/cds.stdout.exp
90dfed
deleted file mode 100644
90dfed
index e69de29bb..000000000
90dfed
diff --git a/memcheck/tests/s390x/cds.vgtest b/memcheck/tests/s390x/cds.vgtest
90dfed
deleted file mode 100644
90dfed
index 5195887e2..000000000
90dfed
--- a/memcheck/tests/s390x/cds.vgtest
90dfed
+++ /dev/null
90dfed
@@ -1,2 +0,0 @@
90dfed
-prog: cds
90dfed
-vgopts: -q
90dfed
diff --git a/memcheck/tests/s390x/cs.c b/memcheck/tests/s390x/cs.c
90dfed
deleted file mode 100644
90dfed
index 9a298cef9..000000000
90dfed
--- a/memcheck/tests/s390x/cs.c
90dfed
+++ /dev/null
90dfed
@@ -1,32 +0,0 @@
90dfed
-#include <stdint.h>
90dfed
-#include <stdio.h>
90dfed
-#include <string.h>
90dfed
-
90dfed
-void 
90dfed
-test(int32_t op1_init, int32_t op2_init, int32_t op3_init)
90dfed
-{
90dfed
-   register int32_t op1 asm("8") = op1_init;
90dfed
-   register int32_t op3 asm("9") = op3_init;
90dfed
-   
90dfed
-   int32_t op2 = op2_init;
90dfed
-   int cc = 1; 
90dfed
-
90dfed
-   __asm__ volatile (
90dfed
-           "cs      8,9,%1\n\t"
90dfed
-           "ipm     %0\n\t"
90dfed
-           "srl     %0,28\n\t"
90dfed
-           : "=d" (cc), "+Q" (op2), "+d"(op1), "+d"(op3)
90dfed
-           : 
90dfed
-           : "cc");
90dfed
-}
90dfed
-
90dfed
-int main ()
90dfed
-{
90dfed
-   int op1, op2, op3;
90dfed
-
90dfed
-   test(op1, 0x10000000, 0x12345678);   // complaint
90dfed
-   test(0x10000000, op2, 0x12345678);   // complaint
90dfed
-   test(0x10000000, 0x01000000, op3);   // no complaint
90dfed
-
90dfed
-   return 0;
90dfed
-}
90dfed
diff --git a/memcheck/tests/s390x/cs.stderr.exp b/memcheck/tests/s390x/cs.stderr.exp
90dfed
deleted file mode 100644
90dfed
index e45dc99cd..000000000
90dfed
--- a/memcheck/tests/s390x/cs.stderr.exp
90dfed
+++ /dev/null
90dfed
@@ -1,8 +0,0 @@
90dfed
-Conditional jump or move depends on uninitialised value(s)
90dfed
-   at 0x........: test (cs.c:14)
90dfed
-   by 0x........: main (cs.c:27)
90dfed
-
90dfed
-Conditional jump or move depends on uninitialised value(s)
90dfed
-   at 0x........: test (cs.c:14)
90dfed
-   by 0x........: main (cs.c:28)
90dfed
-
90dfed
diff --git a/memcheck/tests/s390x/cs.stdout.exp b/memcheck/tests/s390x/cs.stdout.exp
90dfed
deleted file mode 100644
90dfed
index e69de29bb..000000000
90dfed
diff --git a/memcheck/tests/s390x/cs.vgtest b/memcheck/tests/s390x/cs.vgtest
90dfed
deleted file mode 100644
90dfed
index 323cce80c..000000000
90dfed
--- a/memcheck/tests/s390x/cs.vgtest
90dfed
+++ /dev/null
90dfed
@@ -1,2 +0,0 @@
90dfed
-prog: cs
90dfed
-vgopts: -q
90dfed
diff --git a/memcheck/tests/s390x/csg.c b/memcheck/tests/s390x/csg.c
90dfed
deleted file mode 100644
90dfed
index 7f9d8c88e..000000000
90dfed
--- a/memcheck/tests/s390x/csg.c
90dfed
+++ /dev/null
90dfed
@@ -1,32 +0,0 @@
90dfed
-#include <stdint.h>
90dfed
-#include <stdio.h>
90dfed
-#include <string.h>
90dfed
-
90dfed
-void 
90dfed
-test(int64_t op1_init, int64_t op2_init, int64_t op3_init)
90dfed
-{
90dfed
-   register int64_t op1 asm("8") = op1_init;
90dfed
-   register int64_t op3 asm("9") = op3_init;
90dfed
-   
90dfed
-   int64_t op2 = op2_init;
90dfed
-   int cc = 1; 
90dfed
-
90dfed
-   __asm__ volatile (
90dfed
-           "csg     8,9,%1\n\t"
90dfed
-           "ipm     %0\n\t"
90dfed
-           "srl     %0,28\n\t"
90dfed
-           : "=d" (cc), "+Q" (op2), "+d"(op1), "+d"(op3)
90dfed
-           : 
90dfed
-           : "cc");
90dfed
-}
90dfed
-
90dfed
-int main ()
90dfed
-{
90dfed
-   int64_t op1, op2, op3;
90dfed
-
90dfed
-   test(op1, 0x1000000000000000ull, 0x1234567887654321ull);  // complaint
90dfed
-   test(0x1000000000000000ull, op2, 0x1234567887654321ull);  // complaint
90dfed
-   test(0x1000000000000000ull, 0x1000000000000000ull, op3);  // no complaint
90dfed
-
90dfed
-   return 0;
90dfed
-}
90dfed
diff --git a/memcheck/tests/s390x/csg.stderr.exp b/memcheck/tests/s390x/csg.stderr.exp
90dfed
deleted file mode 100644
90dfed
index fda2021ce..000000000
90dfed
--- a/memcheck/tests/s390x/csg.stderr.exp
90dfed
+++ /dev/null
90dfed
@@ -1,8 +0,0 @@
90dfed
-Conditional jump or move depends on uninitialised value(s)
90dfed
-   at 0x........: test (csg.c:14)
90dfed
-   by 0x........: main (csg.c:27)
90dfed
-
90dfed
-Conditional jump or move depends on uninitialised value(s)
90dfed
-   at 0x........: test (csg.c:14)
90dfed
-   by 0x........: main (csg.c:28)
90dfed
-
90dfed
diff --git a/memcheck/tests/s390x/csg.stdout.exp b/memcheck/tests/s390x/csg.stdout.exp
90dfed
deleted file mode 100644
90dfed
index e69de29bb..000000000
90dfed
diff --git a/memcheck/tests/s390x/csg.vgtest b/memcheck/tests/s390x/csg.vgtest
90dfed
deleted file mode 100644
90dfed
index 6de75c1d6..000000000
90dfed
--- a/memcheck/tests/s390x/csg.vgtest
90dfed
+++ /dev/null
90dfed
@@ -1,2 +0,0 @@
90dfed
-prog: csg
90dfed
-vgopts: -q
90dfed
90dfed
commit 18ddcc47c951427efd3b790ba2481159b9bd1598
90dfed
Author: Andreas Arnez <arnez@linux.ibm.com>
90dfed
Date:   Wed Apr 7 16:48:29 2021 +0200
90dfed
90dfed
    s390x: Support "expensive" comparisons Iop_ExpCmpNE32/64
90dfed
    
90dfed
    Add support for Iop_ExpCmpNE32 and Iop_ExpCmpNE64 in the s390x instruction
90dfed
    selector.  Handle them exactly like the "inexpensive" variants Iop_CmpNE32
90dfed
    and Iop_CmpNE64.
90dfed
90dfed
diff --git a/VEX/priv/host_s390_isel.c b/VEX/priv/host_s390_isel.c
90dfed
index 2000ec224..5f79280c0 100644
90dfed
--- a/VEX/priv/host_s390_isel.c
90dfed
+++ b/VEX/priv/host_s390_isel.c
90dfed
@@ -3611,6 +3611,8 @@ s390_isel_cc(ISelEnv *env, IRExpr *cond)
90dfed
 
90dfed
       case Iop_CmpNE32:
90dfed
       case Iop_CmpNE64:
90dfed
+      case Iop_ExpCmpNE32:
90dfed
+      case Iop_ExpCmpNE64:
90dfed
       case Iop_CasCmpNE32:
90dfed
       case Iop_CasCmpNE64:
90dfed
          result = S390_CC_NE;
90dfed
90dfed
commit 5db3f929c43bf46f4707178706cfe90f43acdd19
90dfed
Author: Andreas Arnez <arnez@linux.ibm.com>
90dfed
Date:   Wed Apr 7 12:30:20 2021 +0200
90dfed
90dfed
    s390x: Add convenience function mkV128()
90dfed
    
90dfed
    Provide mkV128() as a short-hand notation for creating a vector constant from
90dfed
    a bit pattern, similar to other such functions like mkU64().
90dfed
90dfed
diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c
90dfed
index 339377007..7d54cb551 100644
90dfed
--- a/VEX/priv/guest_s390_toIR.c
90dfed
+++ b/VEX/priv/guest_s390_toIR.c
90dfed
@@ -376,6 +376,13 @@ mkU64(ULong value)
90dfed
    return IRExpr_Const(IRConst_U64(value));
90dfed
 }
90dfed
 
90dfed
+/* Create an expression node for a 128-bit vector constant */
90dfed
+static __inline__ IRExpr *
90dfed
+mkV128(UShort value)
90dfed
+{
90dfed
+   return IRExpr_Const(IRConst_V128(value));
90dfed
+}
90dfed
+
90dfed
 /* Create an expression node for a 32-bit floating point constant
90dfed
    whose value is given by a bit pattern. */
90dfed
 static __inline__ IRExpr *
90dfed
@@ -16249,7 +16256,7 @@ s390_irgen_VLGV(UChar r1, IRTemp op2addr, UChar v3, UChar m4)
90dfed
 static const HChar *
90dfed
 s390_irgen_VGBM(UChar v1, UShort i2, UChar m3 __attribute__((unused)))
90dfed
 {
90dfed
-   put_vr_qw(v1, IRExpr_Const(IRConst_V128(i2)));
90dfed
+   put_vr_qw(v1, mkV128(i2));
90dfed
 
90dfed
    return "vgbm";
90dfed
 }
90dfed
@@ -18160,11 +18167,11 @@ s390_irgen_VSUM(UChar v1, UChar v2, UChar v3, UChar m4)
90dfed
    switch(type) {
90dfed
    case Ity_I8:
90dfed
       sum = unop(Iop_PwAddL16Ux8, unop(Iop_PwAddL8Ux16, get_vr_qw(v2)));
90dfed
-      mask = IRExpr_Const(IRConst_V128(0b0001000100010001));
90dfed
+      mask = mkV128(0b0001000100010001);
90dfed
       break;
90dfed
    case Ity_I16:
90dfed
       sum = unop(Iop_PwAddL16Ux8, get_vr_qw(v2));
90dfed
-      mask = IRExpr_Const(IRConst_V128(0b0011001100110011));
90dfed
+      mask = mkV128(0b0011001100110011);
90dfed
       break;
90dfed
    default:
90dfed
       vpanic("s390_irgen_VSUM: invalid type ");
90dfed
@@ -18185,11 +18192,11 @@ s390_irgen_VSUMG(UChar v1, UChar v2, UChar v3, UChar m4)
90dfed
    switch(type) {
90dfed
    case Ity_I16:
90dfed
       sum = unop(Iop_PwAddL32Ux4, unop(Iop_PwAddL16Ux8, get_vr_qw(v2)));
90dfed
-      mask = IRExpr_Const(IRConst_V128(0b0000001100000011));
90dfed
+      mask = mkV128(0b0000001100000011);
90dfed
       break;
90dfed
    case Ity_I32:
90dfed
       sum = unop(Iop_PwAddL32Ux4, get_vr_qw(v2));
90dfed
-      mask = IRExpr_Const(IRConst_V128(0b0000111100001111));
90dfed
+      mask = mkV128(0b0000111100001111);
90dfed
       break;
90dfed
    default:
90dfed
       vpanic("s390_irgen_VSUMG: invalid type ");
90dfed
@@ -18210,11 +18217,11 @@ s390_irgen_VSUMQ(UChar v1, UChar v2, UChar v3, UChar m4)
90dfed
    switch(type) {
90dfed
    case Ity_I32:
90dfed
       sum = unop(Iop_PwAddL64Ux2, unop(Iop_PwAddL32Ux4, get_vr_qw(v2)));
90dfed
-      mask = IRExpr_Const(IRConst_V128(0b0000000000001111));
90dfed
+      mask = mkV128(0b0000000000001111);
90dfed
       break;
90dfed
    case Ity_I64:
90dfed
       sum = unop(Iop_PwAddL64Ux2, get_vr_qw(v2));
90dfed
-      mask = IRExpr_Const(IRConst_V128(0b0000000011111111));
90dfed
+      mask = mkV128(0b0000000011111111);
90dfed
       break;
90dfed
    default:
90dfed
       vpanic("s390_irgen_VSUMQ: invalid type ");
90dfed
@@ -18943,8 +18950,8 @@ s390_irgen_VFCx(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5, UChar m6,
90dfed
          assign(cond, binop(Iop_CmpEQ32, mkexpr(result), mkU32(cmp)));
90dfed
       }
90dfed
       put_vr_qw(v1, mkite(mkexpr(cond),
90dfed
-                          IRExpr_Const(IRConst_V128(0xffff)),
90dfed
-                          IRExpr_Const(IRConst_V128(0))));
90dfed
+                          mkV128(0xffff),
90dfed
+                          mkV128(0)));
90dfed
       if (s390_vr_is_cs_set(m6)) {
90dfed
          IRTemp cc = newTemp(Ity_I64);
90dfed
          assign(cc, mkite(mkexpr(cond), mkU64(0), mkU64(3)));
90dfed
90dfed
commit e78bd78d3043729033b426218ab8c6dae9c51e96
90dfed
Author: Andreas Arnez <arnez@linux.ibm.com>
90dfed
Date:   Thu Mar 18 18:01:10 2021 +0100
90dfed
90dfed
    Bug 434296 - s390x: Rework IR conversion of VSTRC, VFAE, and VFEE
90dfed
    
90dfed
    The z/Architecture instructions "vector string range compare" (VSTRC),
90dfed
    "vector find any element equal" (VFAE), and "vector find element
90dfed
    equal" (VFEE) are each implemented with a dirty helper that executes the
90dfed
    instruction.  Unfortunately this approach leads to memcheck false
90dfed
    positives, because these instructions may yield a defined result even if
90dfed
    parts of the input vectors are undefined.  There are multiple ways this
90dfed
    can happen: Wherever the flags in the fourth operand to VSTRC indicate
90dfed
    "match always" or "match never", the corresponding elements in the third
90dfed
    operand don't affect the result.  The same is true for the elements
90dfed
    following the first zero-element in the second operand if the ZS flag is
90dfed
    set, or for the elements following the first matching element, if any.
90dfed
    
90dfed
    Re-implement the instructions without dirty helpers and transform into
90dfed
    lengthy IR instead.
90dfed
90dfed
diff --git a/VEX/priv/guest_s390_defs.h b/VEX/priv/guest_s390_defs.h
90dfed
index 905429015..49b6cd5dd 100644
90dfed
--- a/VEX/priv/guest_s390_defs.h
90dfed
+++ b/VEX/priv/guest_s390_defs.h
90dfed
@@ -265,11 +265,8 @@ typedef enum {
90dfed
    S390_VEC_OP_INVALID = 0,
90dfed
    S390_VEC_OP_VPKS,
90dfed
    S390_VEC_OP_VPKLS,
90dfed
-   S390_VEC_OP_VFAE,
90dfed
-   S390_VEC_OP_VFEE,
90dfed
    S390_VEC_OP_VFENE,
90dfed
    S390_VEC_OP_VISTR,
90dfed
-   S390_VEC_OP_VSTRC,
90dfed
    S390_VEC_OP_VCEQ,
90dfed
    S390_VEC_OP_VTM,
90dfed
    S390_VEC_OP_VGFM,
90dfed
diff --git a/VEX/priv/guest_s390_helpers.c b/VEX/priv/guest_s390_helpers.c
90dfed
index b71b621ae..63d2e8ce5 100644
90dfed
--- a/VEX/priv/guest_s390_helpers.c
90dfed
+++ b/VEX/priv/guest_s390_helpers.c
90dfed
@@ -2538,11 +2538,8 @@ s390x_dirtyhelper_vec_op(VexGuestS390XState *guest_state,
90dfed
       {0x00, 0x00}, /* invalid */
90dfed
       [S390_VEC_OP_VPKS]  = {0xe7, 0x97},
90dfed
       [S390_VEC_OP_VPKLS] = {0xe7, 0x95},
90dfed
-      [S390_VEC_OP_VFAE]  = {0xe7, 0x82},
90dfed
-      [S390_VEC_OP_VFEE]  = {0xe7, 0x80},
90dfed
       [S390_VEC_OP_VFENE] = {0xe7, 0x81},
90dfed
       [S390_VEC_OP_VISTR] = {0xe7, 0x5c},
90dfed
-      [S390_VEC_OP_VSTRC] = {0xe7, 0x8a},
90dfed
       [S390_VEC_OP_VCEQ]  = {0xe7, 0xf8},
90dfed
       [S390_VEC_OP_VTM]   = {0xe7, 0xd8},
90dfed
       [S390_VEC_OP_VGFM]  = {0xe7, 0xb4},
90dfed
@@ -2630,8 +2627,6 @@ s390x_dirtyhelper_vec_op(VexGuestS390XState *guest_state,
90dfed
 
90dfed
    case S390_VEC_OP_VPKS:
90dfed
    case S390_VEC_OP_VPKLS:
90dfed
-   case S390_VEC_OP_VFAE:
90dfed
-   case S390_VEC_OP_VFEE:
90dfed
    case S390_VEC_OP_VFENE:
90dfed
    case S390_VEC_OP_VCEQ:
90dfed
    case S390_VEC_OP_VGFM:
90dfed
@@ -2645,7 +2640,6 @@ s390x_dirtyhelper_vec_op(VexGuestS390XState *guest_state,
90dfed
       the_insn.VRR.m5 = d->m5;
90dfed
       break;
90dfed
 
90dfed
-   case S390_VEC_OP_VSTRC:
90dfed
    case S390_VEC_OP_VGFMA:
90dfed
    case S390_VEC_OP_VMAH:
90dfed
    case S390_VEC_OP_VMALH:
90dfed
diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c
90dfed
index 7d54cb551..26a947813 100644
90dfed
--- a/VEX/priv/guest_s390_toIR.c
90dfed
+++ b/VEX/priv/guest_s390_toIR.c
90dfed
@@ -17156,90 +17156,205 @@ s390_irgen_PPNO(UChar r1, UChar r2)
90dfed
    return "ppno";
90dfed
 }
90dfed
 
90dfed
-static const HChar *
90dfed
-s390_irgen_VFAE(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5)
90dfed
-{
90dfed
-   IRDirty* d;
90dfed
-   IRTemp cc = newTemp(Ity_I64);
90dfed
+enum s390_VStrX {
90dfed
+   s390_VStrX_VSTRC,
90dfed
+   s390_VStrX_VFAE,
90dfed
+   s390_VStrX_VFEE
90dfed
+};
90dfed
 
90dfed
-   /* Check for specification exception */
90dfed
-   vassert(m4 < 3);
90dfed
+#define S390_VEC_OP3(m, op0, op1, op2)                                  \
90dfed
+   (m) == 0 ? op0 : (m) == 1 ? op1 : (m) == 2 ? op2 : Iop_INVALID;
90dfed
 
90dfed
-   s390x_vec_op_details_t details = { .serialized = 0ULL };
90dfed
-   details.op = S390_VEC_OP_VFAE;
90dfed
-   details.v1 = v1;
90dfed
-   details.v2 = v2;
90dfed
-   details.v3 = v3;
90dfed
-   details.m4 = m4;
90dfed
-   details.m5 = m5;
90dfed
-
90dfed
-   d = unsafeIRDirty_1_N(cc, 0, "s390x_dirtyhelper_vec_op",
90dfed
-                         &s390x_dirtyhelper_vec_op,
90dfed
-                         mkIRExprVec_2(IRExpr_GSPTR(),
90dfed
-                                       mkU64(details.serialized)));
90dfed
+/* Helper function for transforming VSTRC, VFAE, or VFEE.  These instructions
90dfed
+   share much of the same logic. */
90dfed
+static void
90dfed
+s390_irgen_VStrX(UChar v1, UChar v2, UChar v3, UChar v4, UChar m5,
90dfed
+                 UChar m6, enum s390_VStrX which_insn)
90dfed
+{
90dfed
+   IRTemp op2 = newTemp(Ity_V128);
90dfed
+   IRTemp op3 = newTemp(Ity_V128);
90dfed
+   IRExpr* tmp;
90dfed
+   IRExpr* match = NULL;
90dfed
+   UChar bitwidth = 8 << m5;
90dfed
+   UChar n_elem = 16 >> m5;
90dfed
+   IROp sub_op = S390_VEC_OP3(m5, Iop_Sub8x16, Iop_Sub16x8, Iop_Sub32x4);
90dfed
+   IROp sar_op = S390_VEC_OP3(m5, Iop_SarN8x16, Iop_SarN16x8, Iop_SarN32x4);
90dfed
+   IROp shl_op = S390_VEC_OP3(m5, Iop_ShlN8x16, Iop_ShlN16x8, Iop_ShlN32x4);
90dfed
+   IROp dup_op = S390_VEC_OP3(m5, Iop_Dup8x16, Iop_Dup16x8, Iop_Dup32x4);
90dfed
+   IROp cmpeq_op = S390_VEC_OP3(m5, Iop_CmpEQ8x16,
90dfed
+                                    Iop_CmpEQ16x8, Iop_CmpEQ32x4);
90dfed
+   IROp cmpgt_op = S390_VEC_OP3(m5, Iop_CmpGT8Ux16,
90dfed
+                                    Iop_CmpGT16Ux8, Iop_CmpGT32Ux4);
90dfed
+   IROp getelem_op = S390_VEC_OP3(m5, Iop_GetElem8x16,
90dfed
+                                      Iop_GetElem16x8, Iop_GetElem32x4);
90dfed
+
90dfed
+   assign(op2, get_vr_qw(v2));
90dfed
+   assign(op3, get_vr_qw(v3));
90dfed
+
90dfed
+   switch (which_insn) {
90dfed
+
90dfed
+   case s390_VStrX_VSTRC: {
90dfed
+      IRTemp op4 = newTemp(Ity_V128);
90dfed
+      assign(op4, get_vr_qw(v4));
90dfed
+
90dfed
+      /* Mask off insignificant range boundaries from op3, i.e., all those for
90dfed
+         which the corresponding field in op4 has all or no bits set ("match
90dfed
+         always" / "match never"). */
90dfed
+      IRTemp bounds = newTemp(Ity_V128);
90dfed
+      tmp = unop(Iop_NotV128,
90dfed
+                 binop(cmpeq_op, mkV128(0),
90dfed
+                       binop(sar_op,
90dfed
+                             binop(sub_op,
90dfed
+                                   binop(sar_op, mkexpr(op4),
90dfed
+                                         mkU8(bitwidth - 3)),
90dfed
+                                   mkV128(-1)),
90dfed
+                             mkU8(1))));
90dfed
+      assign(bounds, binop(Iop_AndV128, mkexpr(op3), tmp));
90dfed
+
90dfed
+      IRTemp flags_eq = newTemp(Ity_V128);
90dfed
+      IRTemp flags_lt = newTemp(Ity_V128);
90dfed
+      IRTemp flags_gt = newTemp(Ity_V128);
90dfed
+      assign(flags_eq, binop(sar_op, mkexpr(op4), mkU8(bitwidth - 1)));
90dfed
+      assign(flags_lt, binop(sar_op, binop(shl_op, mkexpr(op4), mkU8(1)),
90dfed
+                             mkU8(bitwidth - 1)));
90dfed
+      assign(flags_gt, binop(sar_op, binop(shl_op, mkexpr(op4), mkU8(2)),
90dfed
+                             mkU8(bitwidth - 1)));
90dfed
+
90dfed
+      for (UChar idx = 0; idx < n_elem; idx += 2) {
90dfed
+         /* Match according to the even/odd pairs in op3 and op4 at idx */
90dfed
+         IRTemp part[2];
90dfed
+
90dfed
+         for (UChar j = 0; j < 2; j++) {
90dfed
+            IRTemp a = newTemp(Ity_V128);
90dfed
+            assign(a, unop(dup_op,
90dfed
+                           binop(getelem_op, mkexpr(bounds), mkU8(idx + j))));
90dfed
+
90dfed
+            IRExpr* m[] = {
90dfed
+               binop(cmpeq_op, mkexpr(op2), mkexpr(a)),
90dfed
+               binop(cmpgt_op, mkexpr(a), mkexpr(op2)),
90dfed
+               binop(cmpgt_op, mkexpr(op2), mkexpr(a))
90dfed
+            };
90dfed
+            IRExpr* f[] = {
90dfed
+               unop(dup_op, binop(getelem_op, mkexpr(flags_eq), mkU8(idx + j))),
90dfed
+               unop(dup_op, binop(getelem_op, mkexpr(flags_lt), mkU8(idx + j))),
90dfed
+               unop(dup_op, binop(getelem_op, mkexpr(flags_gt), mkU8(idx + j)))
90dfed
+            };
90dfed
+            part[j] = newTemp(Ity_V128);
90dfed
+            assign(part[j], binop(Iop_OrV128,
90dfed
+                                  binop(Iop_OrV128,
90dfed
+                                        binop(Iop_AndV128, f[0], m[0]),
90dfed
+                                        binop(Iop_AndV128, f[1], m[1])),
90dfed
+                                  binop(Iop_AndV128, f[2], m[2])));
90dfed
+         }
90dfed
+         tmp = binop(Iop_AndV128, mkexpr(part[0]), mkexpr(part[1]));
90dfed
+         match = idx == 0 ? tmp : binop(Iop_OrV128, match, tmp);
90dfed
+      }
90dfed
+      break;
90dfed
+   }
90dfed
 
90dfed
-   d->nFxState = 3;
90dfed
-   vex_bzero(&d->fxState, sizeof(d->fxState));
90dfed
-   d->fxState[0].fx     = Ifx_Read;
90dfed
-   d->fxState[0].offset = S390X_GUEST_OFFSET(guest_v0) + v2 * sizeof(V128);
90dfed
-   d->fxState[0].size   = sizeof(V128);
90dfed
-   d->fxState[1].fx     = Ifx_Read;
90dfed
-   d->fxState[1].offset = S390X_GUEST_OFFSET(guest_v0) + v3 * sizeof(V128);
90dfed
-   d->fxState[1].size   = sizeof(V128);
90dfed
-   d->fxState[2].fx     = Ifx_Write;
90dfed
-   d->fxState[2].offset = S390X_GUEST_OFFSET(guest_v0) + v1 * sizeof(V128);
90dfed
-   d->fxState[2].size   = sizeof(V128);
90dfed
+   case s390_VStrX_VFAE:
90dfed
+      for (UChar idx = 0; idx < n_elem; idx++) {
90dfed
+         IRTemp a = newTemp(Ity_V128);
90dfed
+         assign(a, binop(cmpeq_op, mkexpr(op2),
90dfed
+                         unop(dup_op,
90dfed
+                              binop(getelem_op, mkexpr(op3), mkU8(idx)))));
90dfed
+         match = idx == 0 ? mkexpr(a) : binop(Iop_OrV128, match, mkexpr(a));
90dfed
+      }
90dfed
+      break;
90dfed
 
90dfed
-   stmt(IRStmt_Dirty(d));
90dfed
+   case s390_VStrX_VFEE:
90dfed
+      match = binop(cmpeq_op, mkexpr(op2), mkexpr(op3));
90dfed
+      break;
90dfed
 
90dfed
-   if (s390_vr_is_cs_set(m5)) {
90dfed
-      s390_cc_set(cc);
90dfed
+   default:
90dfed
+      vpanic("s390_irgen_VStrX: unknown insn");
90dfed
    }
90dfed
 
90dfed
-   return "vfae";
90dfed
-}
90dfed
+   /* Invert first intermediate result if requested */
90dfed
+   if (m6 & 8)
90dfed
+      match = unop(Iop_NotV128, match);
90dfed
 
90dfed
-static const HChar *
90dfed
-s390_irgen_VFEE(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5)
90dfed
-{
90dfed
-   IRDirty* d;
90dfed
-   IRTemp cc = newTemp(Ity_I64);
90dfed
+   IRTemp inter1 = newTemp(Ity_V128);
90dfed
+   IRTemp inter2 = newTemp(Ity_V128);
90dfed
+   IRTemp accu = newTemp(Ity_V128);
90dfed
+   assign(inter1, match);
90dfed
 
90dfed
-   /* Check for specification exception */
90dfed
-   vassert(m4 < 3);
90dfed
-   vassert((m5 & 0b1100) == 0);
90dfed
+   /* Determine second intermediate and accumulated result */
90dfed
+   if (s390_vr_is_zs_set(m6)) {
90dfed
+      assign(inter2, binop(cmpeq_op, mkexpr(op2), mkV128(0)));
90dfed
+      assign(accu, binop(Iop_OrV128, mkexpr(inter1), mkexpr(inter2)));
90dfed
+   } else {
90dfed
+      assign(inter2, mkV128(0));
90dfed
+      assign(accu, mkexpr(inter1));
90dfed
+   }
90dfed
 
90dfed
-   s390x_vec_op_details_t details = { .serialized = 0ULL };
90dfed
-   details.op = S390_VEC_OP_VFEE;
90dfed
-   details.v1 = v1;
90dfed
-   details.v2 = v2;
90dfed
-   details.v3 = v3;
90dfed
-   details.m4 = m4;
90dfed
-   details.m5 = m5;
90dfed
+   IRTemp accu0 = newTemp(Ity_I64);
90dfed
+   IRTemp is_match0 = newTemp(Ity_I1);
90dfed
+   IRTemp mismatch_bits = newTemp(Ity_I64);
90dfed
 
90dfed
-   d = unsafeIRDirty_1_N(cc, 0, "s390x_dirtyhelper_vec_op",
90dfed
-                         &s390x_dirtyhelper_vec_op,
90dfed
-                         mkIRExprVec_2(IRExpr_GSPTR(),
90dfed
-                                       mkU64(details.serialized)));
90dfed
+   assign(accu0, unop(Iop_V128HIto64, mkexpr(accu)));
90dfed
+   assign(is_match0, binop(Iop_ExpCmpNE64, mkexpr(accu0), mkU64(0)));
90dfed
+   assign(mismatch_bits, unop(Iop_ClzNat64,
90dfed
+                              mkite(mkexpr(is_match0), mkexpr(accu0),
90dfed
+                                    unop(Iop_V128to64, mkexpr(accu)))));
90dfed
 
90dfed
-   d->nFxState = 3;
90dfed
-   vex_bzero(&d->fxState, sizeof(d->fxState));
90dfed
-   d->fxState[0].fx     = Ifx_Read;
90dfed
-   d->fxState[0].offset = S390X_GUEST_OFFSET(guest_v0) + v2 * sizeof(V128);
90dfed
-   d->fxState[0].size   = sizeof(V128);
90dfed
-   d->fxState[1].fx     = Ifx_Read;
90dfed
-   d->fxState[1].offset = S390X_GUEST_OFFSET(guest_v0) + v3 * sizeof(V128);
90dfed
-   d->fxState[1].size   = sizeof(V128);
90dfed
-   d->fxState[2].fx     = Ifx_Write;
90dfed
-   d->fxState[2].offset = S390X_GUEST_OFFSET(guest_v0) + v1 * sizeof(V128);
90dfed
-   d->fxState[2].size   = sizeof(V128);
90dfed
+   if (m6 & 4) {
90dfed
+      put_vr_qw(v1, mkexpr(inter1));
90dfed
+   } else {
90dfed
+      /* Determine byte position of first match */
90dfed
+      tmp = binop(Iop_Add64,
90dfed
+                  binop(Iop_Shr64, mkexpr(mismatch_bits), mkU8(3)),
90dfed
+                  mkite(mkexpr(is_match0), mkU64(0), mkU64(8)));
90dfed
+      put_vr_qw(v1, binop(Iop_64HLtoV128, tmp, mkU64(0)));
90dfed
+   }
90dfed
 
90dfed
-   stmt(IRStmt_Dirty(d));
90dfed
+   if (s390_vr_is_cs_set(m6)) {
90dfed
+      /* Set condition code depending on...
90dfed
+                   zero found
90dfed
+                      n  y
90dfed
+                    +------
90dfed
+         match    n | 3  0
90dfed
+          found   y | 1  2   */
90dfed
 
90dfed
-   if (s390_vr_is_cs_set(m5)) {
90dfed
+      IRTemp cc = newTemp(Ity_I64);
90dfed
+
90dfed
+      tmp = binop(Iop_Shr64,
90dfed
+                  mkite(mkexpr(is_match0),
90dfed
+                        unop(Iop_V128HIto64, mkexpr(inter1)),
90dfed
+                        unop(Iop_V128to64, mkexpr(inter1))),
90dfed
+                  unop(Iop_64to8,
90dfed
+                       binop(Iop_Sub64, mkU64(63), mkexpr(mismatch_bits))));
90dfed
+      tmp = binop(Iop_Shl64, tmp, mkU8(1));
90dfed
+      if (s390_vr_is_zs_set(m6)) {
90dfed
+         tmp = binop(Iop_Xor64, tmp,
90dfed
+                     mkite(binop(Iop_ExpCmpNE64, mkU64(0),
90dfed
+                                 binop(Iop_Or64,
90dfed
+                                       unop(Iop_V128HIto64, mkexpr(inter2)),
90dfed
+                                       unop(Iop_V128to64, mkexpr(inter2)))),
90dfed
+                           mkU64(0),
90dfed
+                           mkU64(3)));
90dfed
+      } else {
90dfed
+         tmp = binop(Iop_Xor64, tmp, mkU64(3));
90dfed
+      }
90dfed
+      assign(cc, tmp);
90dfed
       s390_cc_set(cc);
90dfed
    }
90dfed
+   dis_res->hint = Dis_HintVerbose;
90dfed
+}
90dfed
 
90dfed
+static const HChar *
90dfed
+s390_irgen_VFAE(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5)
90dfed
+{
90dfed
+   s390_insn_assert("vfae", m4 <= 2);
90dfed
+   s390_irgen_VStrX(v1, v2, v3, 255, m4, m5, s390_VStrX_VFAE);
90dfed
+   return "vfae";
90dfed
+}
90dfed
+
90dfed
+static const HChar *
90dfed
+s390_irgen_VFEE(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5)
90dfed
+{
90dfed
+   s390_insn_assert("vfee", m4 < 3 && m5 == (m5 & 3));
90dfed
+   s390_irgen_VStrX(v1, v2, v3, 255, m4, m5, s390_VStrX_VFEE);
90dfed
    return "vfee";
90dfed
 }
90dfed
 
90dfed
@@ -17406,47 +17521,8 @@ s390_irgen_VISTR(UChar v1, UChar v2, UChar m3, UChar m5)
90dfed
 static const HChar *
90dfed
 s390_irgen_VSTRC(UChar v1, UChar v2, UChar v3, UChar v4, UChar m5, UChar m6)
90dfed
 {
90dfed
-   IRDirty* d;
90dfed
-   IRTemp cc = newTemp(Ity_I64);
90dfed
-
90dfed
-   /* Check for specification exception */
90dfed
-   vassert(m5 < 3);
90dfed
-
90dfed
-   s390x_vec_op_details_t details = { .serialized = 0ULL };
90dfed
-   details.op = S390_VEC_OP_VSTRC;
90dfed
-   details.v1 = v1;
90dfed
-   details.v2 = v2;
90dfed
-   details.v3 = v3;
90dfed
-   details.v4 = v4;
90dfed
-   details.m4 = m5;
90dfed
-   details.m5 = m6;
90dfed
-
90dfed
-   d = unsafeIRDirty_1_N(cc, 0, "s390x_dirtyhelper_vec_op",
90dfed
-                         &s390x_dirtyhelper_vec_op,
90dfed
-                         mkIRExprVec_2(IRExpr_GSPTR(),
90dfed
-                                       mkU64(details.serialized)));
90dfed
-
90dfed
-   d->nFxState = 4;
90dfed
-   vex_bzero(&d->fxState, sizeof(d->fxState));
90dfed
-   d->fxState[0].fx     = Ifx_Read;
90dfed
-   d->fxState[0].offset = S390X_GUEST_OFFSET(guest_v0) + v2 * sizeof(V128);
90dfed
-   d->fxState[0].size   = sizeof(V128);
90dfed
-   d->fxState[1].fx     = Ifx_Read;
90dfed
-   d->fxState[1].offset = S390X_GUEST_OFFSET(guest_v0) + v3 * sizeof(V128);
90dfed
-   d->fxState[1].size   = sizeof(V128);
90dfed
-   d->fxState[2].fx     = Ifx_Read;
90dfed
-   d->fxState[2].offset = S390X_GUEST_OFFSET(guest_v0) + v4 * sizeof(V128);
90dfed
-   d->fxState[2].size   = sizeof(V128);
90dfed
-   d->fxState[3].fx     = Ifx_Write;
90dfed
-   d->fxState[3].offset = S390X_GUEST_OFFSET(guest_v0) + v1 * sizeof(V128);
90dfed
-   d->fxState[3].size   = sizeof(V128);
90dfed
-
90dfed
-   stmt(IRStmt_Dirty(d));
90dfed
-
90dfed
-   if (s390_vr_is_cs_set(m6)) {
90dfed
-      s390_cc_set(cc);
90dfed
-   }
90dfed
-
90dfed
+   s390_insn_assert("vstrc", m5 <= 2);
90dfed
+   s390_irgen_VStrX(v1, v2, v3, v4, m5, m6, s390_VStrX_VSTRC);
90dfed
    return "vstrc";
90dfed
 }
90dfed
 
90dfed
90dfed
commit 4f17a067c4f8245c05611d6e8aa36e8841bab376
90dfed
Author: Andreas Arnez <arnez@linux.ibm.com>
90dfed
Date:   Tue Mar 2 14:12:29 2021 +0100
90dfed
90dfed
    Bug 434296 - s390x: Rework IR conversion of VFENE
90dfed
    
90dfed
    So far the z/Architecture instruction "vector find element not
90dfed
    equal" (VFENE) is transformed to a loop.  This can cause spurious
90dfed
    "conditional jump or move depends on uninitialised value(s)" messages by
90dfed
    memcheck.  Re-implement without a loop.
90dfed
90dfed
diff --git a/VEX/priv/guest_s390_defs.h b/VEX/priv/guest_s390_defs.h
90dfed
index 49b6cd5dd..caec3108e 100644
90dfed
--- a/VEX/priv/guest_s390_defs.h
90dfed
+++ b/VEX/priv/guest_s390_defs.h
90dfed
@@ -265,7 +265,6 @@ typedef enum {
90dfed
    S390_VEC_OP_INVALID = 0,
90dfed
    S390_VEC_OP_VPKS,
90dfed
    S390_VEC_OP_VPKLS,
90dfed
-   S390_VEC_OP_VFENE,
90dfed
    S390_VEC_OP_VISTR,
90dfed
    S390_VEC_OP_VCEQ,
90dfed
    S390_VEC_OP_VTM,
90dfed
diff --git a/VEX/priv/guest_s390_helpers.c b/VEX/priv/guest_s390_helpers.c
90dfed
index 63d2e8ce5..2188ce5c1 100644
90dfed
--- a/VEX/priv/guest_s390_helpers.c
90dfed
+++ b/VEX/priv/guest_s390_helpers.c
90dfed
@@ -2538,7 +2538,6 @@ s390x_dirtyhelper_vec_op(VexGuestS390XState *guest_state,
90dfed
       {0x00, 0x00}, /* invalid */
90dfed
       [S390_VEC_OP_VPKS]  = {0xe7, 0x97},
90dfed
       [S390_VEC_OP_VPKLS] = {0xe7, 0x95},
90dfed
-      [S390_VEC_OP_VFENE] = {0xe7, 0x81},
90dfed
       [S390_VEC_OP_VISTR] = {0xe7, 0x5c},
90dfed
       [S390_VEC_OP_VCEQ]  = {0xe7, 0xf8},
90dfed
       [S390_VEC_OP_VTM]   = {0xe7, 0xd8},
90dfed
@@ -2627,7 +2626,6 @@ s390x_dirtyhelper_vec_op(VexGuestS390XState *guest_state,
90dfed
 
90dfed
    case S390_VEC_OP_VPKS:
90dfed
    case S390_VEC_OP_VPKLS:
90dfed
-   case S390_VEC_OP_VFENE:
90dfed
    case S390_VEC_OP_VCEQ:
90dfed
    case S390_VEC_OP_VGFM:
90dfed
    case S390_VEC_OP_VCH:
90dfed
diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c
90dfed
index 26a947813..c8dc3ec18 100644
90dfed
--- a/VEX/priv/guest_s390_toIR.c
90dfed
+++ b/VEX/priv/guest_s390_toIR.c
90dfed
@@ -17361,120 +17361,86 @@ s390_irgen_VFEE(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5)
90dfed
 static const HChar *
90dfed
 s390_irgen_VFENE(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5)
90dfed
 {
90dfed
-   const Bool negateComparison = True;
90dfed
-   const IRType type = s390_vr_get_type(m4);
90dfed
+   s390_insn_assert("vfene", m4 < 3 && m5 == (m5 & 3));
90dfed
 
90dfed
-   /* Check for specification exception */
90dfed
-   vassert(m4 < 3);
90dfed
-   vassert((m5 & 0b1100) == 0);
90dfed
-
90dfed
-   static const IROp elementGetters[] = {
90dfed
-      Iop_GetElem8x16, Iop_GetElem16x8, Iop_GetElem32x4
90dfed
+   static const IROp compare_op[3] = {
90dfed
+      Iop_CmpEQ8x16, Iop_CmpEQ16x8, Iop_CmpEQ32x4
90dfed
    };
90dfed
-   IROp getter = elementGetters[m4];
90dfed
-
90dfed
-   static const IROp elementComparators[] = {
90dfed
-      Iop_CmpEQ8, Iop_CmpEQ16, Iop_CmpEQ32
90dfed
+   static const IROp abs_op[3] = {
90dfed
+      Iop_Abs8x16, Iop_Abs16x8, Iop_Abs32x4
90dfed
    };
90dfed
-   IROp comparator = elementComparators[m4];
90dfed
-
90dfed
-   static const IROp resultConverter[] = {Iop_64to8, Iop_64to16, Iop_64to32};
90dfed
-   IROp converter = resultConverter[m4];
90dfed
-
90dfed
-   IRTemp isZeroElem;
90dfed
-
90dfed
-   IRTemp counter = newTemp(Ity_I64);
90dfed
-   assign(counter, get_counter_dw0());
90dfed
-
90dfed
-   IRTemp arg1 = newTemp(type);
90dfed
-   assign(arg1, binop(getter, get_vr_qw(v2), unop(Iop_64to8, mkexpr(counter))));
90dfed
-   IRTemp arg2 = newTemp(type);
90dfed
-   assign(arg2, binop(getter, get_vr_qw(v3), unop(Iop_64to8, mkexpr(counter))));
90dfed
+   IRTemp op2 = newTemp(Ity_V128);
90dfed
+   IRTemp op3 = newTemp(Ity_V128);
90dfed
+   IRTemp op2zero = newTemp(Ity_V128);
90dfed
+   IRTemp diff = newTemp(Ity_V128);
90dfed
+   IRTemp diff0 = newTemp(Ity_I64);
90dfed
+   IRTemp neq0 = newTemp(Ity_I1);
90dfed
+   IRTemp samebits = newTemp(Ity_I64);
90dfed
+   IRExpr* tmp;
90dfed
 
90dfed
-   IRTemp isGoodPair = newTemp(Ity_I1);
90dfed
-   if(negateComparison) {
90dfed
-      assign(isGoodPair, unop(Iop_Not1, binop(comparator, mkexpr(arg1),
90dfed
-                                              mkexpr(arg2))));
90dfed
-   } else {
90dfed
-      assign(isGoodPair, binop(comparator, mkexpr(arg1), mkexpr(arg2)));
90dfed
-   }
90dfed
+   assign(op2, get_vr_qw(v2));
90dfed
+   assign(op3, get_vr_qw(v3));
90dfed
 
90dfed
-   if(s390_vr_is_zs_set(m5)) {
90dfed
-      isZeroElem = newTemp(Ity_I1);
90dfed
-      assign(isZeroElem, binop(comparator, mkexpr(arg1),
90dfed
-                               unop(converter, mkU64(0))));
90dfed
+   tmp = mkV128(0);
90dfed
+   if (s390_vr_is_zs_set(m5)) {
90dfed
+      tmp = binop(compare_op[m4], mkexpr(op2), tmp);
90dfed
+      if (s390_vr_is_cs_set(m5) && v3 != v2) {
90dfed
+         /* Count leading equal bits in the terminating element too */
90dfed
+         tmp = unop(abs_op[m4], tmp);
90dfed
+      }
90dfed
+      assign(op2zero, tmp);
90dfed
+      tmp = mkexpr(op2zero);
90dfed
    }
90dfed
-
90dfed
-   static const UChar invalidIndices[] = {16, 8, 4};
90dfed
-   const UChar invalidIndex = invalidIndices[m4];
90dfed
-   IRTemp endOfVectorIsReached = newTemp(Ity_I1);
90dfed
-   assign(endOfVectorIsReached, binop(Iop_CmpEQ64, mkexpr(counter),
90dfed
-                                      mkU64(invalidIndex)));
90dfed
-
90dfed
-   put_counter_dw0(binop(Iop_Add64, mkexpr(counter), mkU64(1)));
90dfed
-   IRExpr* shouldBreak = binop(Iop_Or32,
90dfed
-                               unop(Iop_1Uto32, mkexpr(isGoodPair)),
90dfed
-                               unop(Iop_1Uto32, mkexpr(endOfVectorIsReached))
90dfed
-                              );
90dfed
-   if(s390_vr_is_zs_set(m5)) {
90dfed
-      shouldBreak = binop(Iop_Or32,
90dfed
-                          shouldBreak,
90dfed
-                          unop(Iop_1Uto32, mkexpr(isZeroElem)));
90dfed
-   }
90dfed
-   iterate_if(binop(Iop_CmpEQ32, shouldBreak, mkU32(0)));
90dfed
-
90dfed
-   IRExpr* foundIndex = binop(Iop_Sub64, get_counter_dw0(), mkU64(1));
90dfed
-   if(m4 > 0) {
90dfed
-      /* We should return index of byte but we found index of element in
90dfed
-         general case.
90dfed
-            if byte elem (m4 == 0) then indexOfByte = indexOfElement
90dfed
-            if halfword elem (m4 == 1) then indexOfByte = 2 * indexOfElement
90dfed
-                                                        = indexOfElement << 1
90dfed
-            if word elem (m4 == 2) then indexOfByte = 4 * indexOfElement
90dfed
-                                                    = indexOfElement << 2
90dfed
-      */
90dfed
-      foundIndex = binop(Iop_Shl64, foundIndex, mkU8(m4));
90dfed
+   if (v3 != v2) {
90dfed
+      tmp = binop(Iop_XorV128, mkexpr(op2), mkexpr(op3));
90dfed
+      if (s390_vr_is_zs_set(m5))
90dfed
+         tmp = binop(Iop_OrV128, tmp, mkexpr(op2zero));
90dfed
    }
90dfed
 
90dfed
-   IRTemp result = newTemp(Ity_I64);
90dfed
-   assign(result, mkite(mkexpr(endOfVectorIsReached),
90dfed
-                        mkU64(16),
90dfed
-                        foundIndex));
90dfed
-   put_vr_qw(v1, binop(Iop_64HLtoV128, mkexpr(result), mkU64(0)));
90dfed
+   assign(diff, tmp);
90dfed
+   assign(diff0, unop(Iop_V128HIto64, mkexpr(diff)));
90dfed
+   assign(neq0, binop(Iop_ExpCmpNE64, mkexpr(diff0), mkU64(0)));
90dfed
+   assign(samebits, unop(Iop_ClzNat64,
90dfed
+                         mkite(mkexpr(neq0), mkexpr(diff0),
90dfed
+                               unop(Iop_V128to64, mkexpr(diff)))));
90dfed
 
90dfed
+   /* Determine the byte size of the initial equal-elements sequence */
90dfed
+   tmp = binop(Iop_Shr64, mkexpr(samebits), mkU8(m4 + 3));
90dfed
+   if (m4 != 0)
90dfed
+      tmp = binop(Iop_Shl64, tmp, mkU8(m4));
90dfed
+   tmp = binop(Iop_Add64, tmp, mkite(mkexpr(neq0), mkU64(0), mkU64(8)));
90dfed
+   put_vr_qw(v1, binop(Iop_64HLtoV128, tmp, mkU64(0)));
90dfed
 
90dfed
    if (s390_vr_is_cs_set(m5)) {
90dfed
-      static const IROp to64Converters[] = {Iop_8Uto64, Iop_16Uto64, Iop_32Uto64};
90dfed
-      IROp to64Converter = to64Converters[m4];
90dfed
-
90dfed
-      IRExpr* arg1IsLessThanArg2 = binop(Iop_CmpLT64U,
90dfed
-                                         unop(to64Converter, mkexpr(arg1)),
90dfed
-                                         unop(to64Converter, mkexpr(arg2)));
90dfed
-
90dfed
-      IRExpr* ccexp = mkite(binop(Iop_CmpEQ32,
90dfed
-                                  unop(Iop_1Uto32, mkexpr(isGoodPair)),
90dfed
-                                  mkU32(1)),
90dfed
-                            mkite(arg1IsLessThanArg2, mkU64(1), mkU64(2)),
90dfed
-                            mkU64(3));
90dfed
-
90dfed
-      if(s390_vr_is_zs_set(m5)) {
90dfed
-         IRExpr* arg2IsZero = binop(comparator, mkexpr(arg2),
90dfed
-                                    unop(converter, mkU64(0)));
90dfed
-         IRExpr* bothArgsAreZero = binop(Iop_And32,
90dfed
-                                         unop(Iop_1Uto32, mkexpr(isZeroElem)),
90dfed
-                                         unop(Iop_1Uto32, arg2IsZero));
90dfed
-         ccexp = mkite(binop(Iop_CmpEQ32, bothArgsAreZero, mkU32(1)),
90dfed
-                       mkU64(0),
90dfed
-                       ccexp);
90dfed
-      }
90dfed
+      /* Set condition code like follows --
90dfed
+         0: operands equal up to and including zero element
90dfed
+         1: op2 < op3    2: op2 > op3    3: op2 = op3 */
90dfed
       IRTemp cc = newTemp(Ity_I64);
90dfed
-      assign(cc, ccexp);
90dfed
-
90dfed
+      if (v3 == v2) {
90dfed
+         tmp = mkU64(0);
90dfed
+      } else {
90dfed
+         IRTemp shift = newTemp(Ity_I8);
90dfed
+         IRExpr* op2half = mkite(mkexpr(neq0),
90dfed
+                                 unop(Iop_V128HIto64, mkexpr(op2)),
90dfed
+                                 unop(Iop_V128to64, mkexpr(op2)));
90dfed
+         IRExpr* op3half = mkite(mkexpr(neq0),
90dfed
+                                 unop(Iop_V128HIto64, mkexpr(op3)),
90dfed
+                                 unop(Iop_V128to64, mkexpr(op3)));
90dfed
+         assign(shift, unop(Iop_64to8,
90dfed
+                            binop(Iop_Sub64, mkU64(63), mkexpr(samebits))));
90dfed
+         tmp = binop(Iop_Or64,
90dfed
+                     binop(Iop_Shl64,
90dfed
+                           binop(Iop_And64, mkU64(1),
90dfed
+                                 binop(Iop_Shr64, op2half, mkexpr(shift))),
90dfed
+                           mkU8(1)),
90dfed
+                     binop(Iop_And64, mkU64(1),
90dfed
+                           binop(Iop_Shr64, op3half, mkexpr(shift))));
90dfed
+      }
90dfed
+      assign(cc, mkite(binop(Iop_CmpEQ64, mkexpr(samebits), mkU64(64)),
90dfed
+                       mkU64(3), tmp));
90dfed
       s390_cc_set(cc);
90dfed
    }
90dfed
-
90dfed
-
90dfed
-   put_counter_dw0(mkU64(0));
90dfed
+   dis_res->hint = Dis_HintVerbose;
90dfed
    return "vfene";
90dfed
 }
90dfed
 
90dfed
90dfed
commit 9bd78ebd8bb5cd4ebb3f081ceba46836cc485551
90dfed
Author: Andreas Arnez <arnez@linux.ibm.com>
90dfed
Date:   Tue Apr 27 20:13:26 2021 +0200
90dfed
90dfed
    Bug 434296 - s390x: Rework IR conversion of VISTR
90dfed
    
90dfed
    The z/Architecture instruction VISTR is currently transformed to a dirty
90dfed
    helper that executes the instruction.  This can cause false positives with
90dfed
    memcheck if the input string contains undefined characters after the
90dfed
    string terminator.  Implement without a dirty helper and emulate the
90dfed
    instruction instead.
90dfed
90dfed
diff --git a/VEX/priv/guest_s390_defs.h b/VEX/priv/guest_s390_defs.h
90dfed
index caec3108e..24f3798c1 100644
90dfed
--- a/VEX/priv/guest_s390_defs.h
90dfed
+++ b/VEX/priv/guest_s390_defs.h
90dfed
@@ -265,7 +265,6 @@ typedef enum {
90dfed
    S390_VEC_OP_INVALID = 0,
90dfed
    S390_VEC_OP_VPKS,
90dfed
    S390_VEC_OP_VPKLS,
90dfed
-   S390_VEC_OP_VISTR,
90dfed
    S390_VEC_OP_VCEQ,
90dfed
    S390_VEC_OP_VTM,
90dfed
    S390_VEC_OP_VGFM,
90dfed
diff --git a/VEX/priv/guest_s390_helpers.c b/VEX/priv/guest_s390_helpers.c
90dfed
index 2188ce5c1..1e04f601a 100644
90dfed
--- a/VEX/priv/guest_s390_helpers.c
90dfed
+++ b/VEX/priv/guest_s390_helpers.c
90dfed
@@ -2538,7 +2538,6 @@ s390x_dirtyhelper_vec_op(VexGuestS390XState *guest_state,
90dfed
       {0x00, 0x00}, /* invalid */
90dfed
       [S390_VEC_OP_VPKS]  = {0xe7, 0x97},
90dfed
       [S390_VEC_OP_VPKLS] = {0xe7, 0x95},
90dfed
-      [S390_VEC_OP_VISTR] = {0xe7, 0x5c},
90dfed
       [S390_VEC_OP_VCEQ]  = {0xe7, 0xf8},
90dfed
       [S390_VEC_OP_VTM]   = {0xe7, 0xd8},
90dfed
       [S390_VEC_OP_VGFM]  = {0xe7, 0xb4},
90dfed
@@ -2610,14 +2609,6 @@ s390x_dirtyhelper_vec_op(VexGuestS390XState *guest_state,
90dfed
    the_insn.VRR.op2 = opcodes[d->op][1];
90dfed
 
90dfed
    switch(d->op) {
90dfed
-   case S390_VEC_OP_VISTR:
90dfed
-      the_insn.VRR.v1 = 1;
90dfed
-      the_insn.VRR.v2 = 2;
90dfed
-      the_insn.VRR.rxb = 0b1100;
90dfed
-      the_insn.VRR.m4 = d->m4;
90dfed
-      the_insn.VRR.m5 = d->m5;
90dfed
-      break;
90dfed
-
90dfed
    case S390_VEC_OP_VTM:
90dfed
       the_insn.VRR.v1 = 2;
90dfed
       the_insn.VRR.v2 = 3;
90dfed
diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c
90dfed
index c8dc3ec18..dfea54259 100644
90dfed
--- a/VEX/priv/guest_s390_toIR.c
90dfed
+++ b/VEX/priv/guest_s390_toIR.c
90dfed
@@ -17447,40 +17447,34 @@ s390_irgen_VFENE(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5)
90dfed
 static const HChar *
90dfed
 s390_irgen_VISTR(UChar v1, UChar v2, UChar m3, UChar m5)
90dfed
 {
90dfed
-   IRDirty* d;
90dfed
-   IRTemp cc = newTemp(Ity_I64);
90dfed
-
90dfed
-   /* Check for specification exception */
90dfed
-   vassert(m3 < 3);
90dfed
-   vassert((m5 & 0b1110) == 0);
90dfed
+   s390_insn_assert("vistr", m3 < 3 && m5 == (m5 & 1));
90dfed
 
90dfed
-   s390x_vec_op_details_t details = { .serialized = 0ULL };
90dfed
-   details.op = S390_VEC_OP_VISTR;
90dfed
-   details.v1 = v1;
90dfed
-   details.v2 = v2;
90dfed
-   details.m4 = m3;
90dfed
-   details.m5 = m5;
90dfed
-
90dfed
-   d = unsafeIRDirty_1_N(cc, 0, "s390x_dirtyhelper_vec_op",
90dfed
-                         &s390x_dirtyhelper_vec_op,
90dfed
-                         mkIRExprVec_2(IRExpr_GSPTR(),
90dfed
-                                       mkU64(details.serialized)));
90dfed
+   static const IROp compare_op[3] = {
90dfed
+      Iop_CmpEQ8x16, Iop_CmpEQ16x8, Iop_CmpEQ32x4
90dfed
+   };
90dfed
+   IRExpr* t;
90dfed
+   IRTemp op2 = newTemp(Ity_V128);
90dfed
+   IRTemp op2term = newTemp(Ity_V128);
90dfed
+   IRTemp mask = newTemp(Ity_V128);
90dfed
 
90dfed
-   d->nFxState = 2;
90dfed
-   vex_bzero(&d->fxState, sizeof(d->fxState));
90dfed
-   d->fxState[0].fx     = Ifx_Read;
90dfed
-   d->fxState[0].offset = S390X_GUEST_OFFSET(guest_v0) + v2 * sizeof(V128);
90dfed
-   d->fxState[0].size   = sizeof(V128);
90dfed
-   d->fxState[1].fx     = Ifx_Write;
90dfed
-   d->fxState[1].offset = S390X_GUEST_OFFSET(guest_v0) + v1 * sizeof(V128);
90dfed
-   d->fxState[1].size   = sizeof(V128);
90dfed
+   assign(op2, get_vr_qw(v2));
90dfed
+   assign(op2term, binop(compare_op[m3], mkexpr(op2), mkV128(0)));
90dfed
+   t = mkexpr(op2term);
90dfed
 
90dfed
-   stmt(IRStmt_Dirty(d));
90dfed
+   for (UChar i = m3; i < 4; i++) {
90dfed
+      IRTemp s = newTemp(Ity_V128);
90dfed
+      assign(s, binop(Iop_OrV128, t, binop(Iop_ShrV128, t, mkU8(8 << i))));
90dfed
+      t = mkexpr(s);
90dfed
+   }
90dfed
+   assign(mask, unop(Iop_NotV128, t));
90dfed
+   put_vr_qw(v1, binop(Iop_AndV128, mkexpr(op2), mkexpr(mask)));
90dfed
 
90dfed
    if (s390_vr_is_cs_set(m5)) {
90dfed
+      IRTemp cc = newTemp(Ity_I64);
90dfed
+      assign(cc, binop(Iop_And64, mkU64(3), unop(Iop_V128to64, mkexpr(mask))));
90dfed
       s390_cc_set(cc);
90dfed
    }
90dfed
-
90dfed
+   dis_res->hint = Dis_HintVerbose;
90dfed
    return "vistr";
90dfed
 }
90dfed
 
90dfed
90dfed
commit 32312d588b77c5b5b5a0145bb0cc6f795b447790
90dfed
Author: Andreas Arnez <arnez@linux.ibm.com>
90dfed
Date:   Fri Apr 16 12:44:44 2021 +0200
90dfed
90dfed
    Bug 434296 - s390x: Add memcheck test cases for vector string insns
90dfed
    
90dfed
    Bug 434296 addresses memcheck false positives with the vector string
90dfed
    instructions VISTR, VSTRC, VFAE, VFEE, and VFENE.  Add test cases that
90dfed
    verify the fix for that bug.  Without the fix, memcheck yields many
90dfed
    complains with these tests, most of which are false positives.
90dfed
90dfed
diff --git a/memcheck/tests/s390x/Makefile.am b/memcheck/tests/s390x/Makefile.am
90dfed
index e4e69eb38..d183841ef 100644
90dfed
--- a/memcheck/tests/s390x/Makefile.am
90dfed
+++ b/memcheck/tests/s390x/Makefile.am
90dfed
@@ -2,7 +2,7 @@ include $(top_srcdir)/Makefile.tool-tests.am
90dfed
 
90dfed
 dist_noinst_SCRIPTS = filter_stderr
90dfed
 
90dfed
-INSN_TESTS = cdsg cu21 cu42 ltgjhe
90dfed
+INSN_TESTS = cdsg cu21 cu42 ltgjhe vstrc vfae vistr
90dfed
 
90dfed
 check_PROGRAMS = $(INSN_TESTS) 
90dfed
 
90dfed
@@ -14,3 +14,7 @@ EXTRA_DIST = \
90dfed
 AM_CFLAGS    += @FLAG_M64@
90dfed
 AM_CXXFLAGS  += @FLAG_M64@
90dfed
 AM_CCASFLAGS += @FLAG_M64@
90dfed
+
90dfed
+vstrc_CFLAGS  = $(AM_CFLAGS) -march=z13
90dfed
+vfae_CFLAGS   = $(AM_CFLAGS) -march=z13
90dfed
+vistr_CFLAGS  = $(AM_CFLAGS) -march=z13
90dfed
diff --git a/memcheck/tests/s390x/vfae.c b/memcheck/tests/s390x/vfae.c
90dfed
new file mode 100644
90dfed
index 000000000..68781e7fb
90dfed
--- /dev/null
90dfed
+++ b/memcheck/tests/s390x/vfae.c
90dfed
@@ -0,0 +1,72 @@
90dfed
+#include <stdio.h>
90dfed
+#include <string.h>
90dfed
+
90dfed
+#define VECTOR __attribute__ ((vector_size (16)))
90dfed
+
90dfed
+typedef char VECTOR char_v;
90dfed
+
90dfed
+volatile char tmp;
90dfed
+static const char *hex_digit = "0123456789abcdefGHIJKLMNOPQRSTUV";
90dfed
+
90dfed
+static char_v to_char_vec(const char *str)
90dfed
+{
90dfed
+   char_v v;
90dfed
+   char buf[17];
90dfed
+   int len = strlen(str);
90dfed
+
90dfed
+   memcpy(buf, str, (len && str[len - 1] == '~') ? len - 1 : len + 1);
90dfed
+   v = *(char_v *) buf;
90dfed
+   return v;
90dfed
+}
90dfed
+
90dfed
+#define GENERATE_TEST(mnem)                                          \
90dfed
+static void test_ ## mnem ## _char(const char *str, const char *match, \
90dfed
+                                   int expect_res, int expect_cc)    \
90dfed
+{                                                                    \
90dfed
+   int cc;                                                           \
90dfed
+   char_v v1;                                                        \
90dfed
+   char_v v2 = to_char_vec(str);                                     \
90dfed
+   char_v v3 = to_char_vec(match);                                   \
90dfed
+                                                                     \
90dfed
+   __asm__(                                                          \
90dfed
+      "cr    0,0\n\t"           /* Clear CC */                       \
90dfed
+      #mnem "  %[v1],%[v2],%[v3],0,3\n\t"                            \
90dfed
+      "ipm   %[cc]\n\t"                                              \
90dfed
+      "srl   %[cc],28"                                               \
90dfed
+      : [v1] "=v" (v1),                                              \
90dfed
+        [cc] "=d" (cc)                                               \
90dfed
+      : [v2] "v" (v2),                                               \
90dfed
+        [v3] "v" (v3)                                                \
90dfed
+      : "cc");                                                       \
90dfed
+                                                                     \
90dfed
+   tmp = hex_digit[v1[7] & 0x1f];                                    \
90dfed
+   if (expect_res >= 0  && v1[7] != expect_res)                      \
90dfed
+      printf("result %u != %d\n", v1[7], expect_res);                \
90dfed
+                                                                     \
90dfed
+   tmp = hex_digit[cc & 0xf];                                        \
90dfed
+   if (expect_cc >= 0 && cc != expect_cc)                            \
90dfed
+      printf("CC %d != %d\n", cc, expect_cc);                        \
90dfed
+}
90dfed
+
90dfed
+GENERATE_TEST(vfae)
90dfed
+
90dfed
+GENERATE_TEST(vfee)
90dfed
+
90dfed
+GENERATE_TEST(vfene)
90dfed
+
90dfed
+int main()
90dfed
+{
90dfed
+   test_vfae_char("not found", "................", 9, 0);
90dfed
+   test_vfae_char("xy", "zzzzzzzzyyyyyyyy", 1, 2);
90dfed
+   test_vfae_char("incomplete~", "xxxxxxxxxxxxxxxx", -1, -1);
90dfed
+
90dfed
+   test_vfee_char("same char here", "..........here", 10, 2);
90dfed
+   test_vfee_char("and here too ...", "_________t~", 9, 1);
90dfed
+   test_vfee_char("equality!~", "========!!~", 8, -1);
90dfed
+
90dfed
+   test_vfene_char("strings equal", "strings equal", 13, 0);
90dfed
+   test_vfene_char(hex_digit, hex_digit, 16, 3);
90dfed
+   test_vfene_char("undef~", "undefined", -1, -1);
90dfed
+   test_vfene_char("active~", "actually ok", 3, 1);
90dfed
+   return 0;
90dfed
+}
90dfed
diff --git a/memcheck/tests/s390x/vfae.stderr.exp b/memcheck/tests/s390x/vfae.stderr.exp
90dfed
new file mode 100644
90dfed
index 000000000..8aad3c87f
90dfed
--- /dev/null
90dfed
+++ b/memcheck/tests/s390x/vfae.stderr.exp
90dfed
@@ -0,0 +1,20 @@
90dfed
+Use of uninitialised value of size 8
90dfed
+   at 0x........: test_vfae_char (vfae.c:51)
90dfed
+   by 0x........: main (vfae.c:61)
90dfed
+
90dfed
+Use of uninitialised value of size 8
90dfed
+   at 0x........: test_vfae_char (vfae.c:51)
90dfed
+   by 0x........: main (vfae.c:61)
90dfed
+
90dfed
+Use of uninitialised value of size 8
90dfed
+   at 0x........: test_vfee_char (vfae.c:53)
90dfed
+   by 0x........: main (vfae.c:65)
90dfed
+
90dfed
+Use of uninitialised value of size 8
90dfed
+   at 0x........: test_vfene_char (vfae.c:55)
90dfed
+   by 0x........: main (vfae.c:69)
90dfed
+
90dfed
+Use of uninitialised value of size 8
90dfed
+   at 0x........: test_vfene_char (vfae.c:55)
90dfed
+   by 0x........: main (vfae.c:69)
90dfed
+
90dfed
diff --git a/memcheck/tests/s390x/vfae.stdout.exp b/memcheck/tests/s390x/vfae.stdout.exp
90dfed
new file mode 100644
90dfed
index 000000000..e69de29bb
90dfed
diff --git a/memcheck/tests/s390x/vfae.vgtest b/memcheck/tests/s390x/vfae.vgtest
90dfed
new file mode 100644
90dfed
index 000000000..ae36c22fe
90dfed
--- /dev/null
90dfed
+++ b/memcheck/tests/s390x/vfae.vgtest
90dfed
@@ -0,0 +1,2 @@
90dfed
+prog: vfae
90dfed
+vgopts: -q
90dfed
diff --git a/memcheck/tests/s390x/vistr.c b/memcheck/tests/s390x/vistr.c
90dfed
new file mode 100644
90dfed
index 000000000..7ed59b94b
90dfed
--- /dev/null
90dfed
+++ b/memcheck/tests/s390x/vistr.c
90dfed
@@ -0,0 +1,76 @@
90dfed
+#include <stdio.h>
90dfed
+#include <string.h>
90dfed
+
90dfed
+#define VECTOR __attribute__ ((vector_size (16)))
90dfed
+
90dfed
+typedef char VECTOR char_v;
90dfed
+
90dfed
+volatile char tmp;
90dfed
+static const char *hex_digit = "0123456789abcdef";
90dfed
+
90dfed
+static char_v to_char_vec(const char *str, char_v *maskp)
90dfed
+{
90dfed
+   char buf[17];
90dfed
+   char_v v;
90dfed
+   char_v mask = {0};
90dfed
+
90dfed
+   for (int i = 0; i < sizeof(buf); i++) {
90dfed
+      char ch = str[i];
90dfed
+      if (ch == '\0')
90dfed
+         break;
90dfed
+      else if (ch == '$') {
90dfed
+         buf[i] = '\0';
90dfed
+         mask[i] = -1;
90dfed
+      } else if (ch != '~') {
90dfed
+         buf[i] = ch;
90dfed
+         mask[i] = -1;
90dfed
+      }
90dfed
+   }
90dfed
+   v = *(char_v *) buf;
90dfed
+   *maskp = mask;
90dfed
+   return v;
90dfed
+}
90dfed
+
90dfed
+static void test_vistr_char(const char *str, const char *expect_res,
90dfed
+                            int expect_cc)
90dfed
+{
90dfed
+   int cc, count;
90dfed
+   char_v v1, mask;
90dfed
+   char_v v2 = to_char_vec(str, &mask);
90dfed
+   char_v exp_v1 = to_char_vec(expect_res, &mask);
90dfed
+   char equal[16];
90dfed
+
90dfed
+   __asm__(
90dfed
+      "cr    0,0\n\t"           /* Clear CC */
90dfed
+      "vistr %[v1],%[v2],0,1\n\t"
90dfed
+      "ipm   %[cc]\n\t"
90dfed
+      "srl   %[cc],28"
90dfed
+      : [v1] "=v" (v1),
90dfed
+        [cc] "=d" (cc)
90dfed
+      : [v2] "v" (v2)
90dfed
+      : "cc");
90dfed
+
90dfed
+   *(char_v *) equal = (v1 & mask) == (exp_v1 & mask);
90dfed
+   if (memchr(equal, 0, sizeof(equal)))
90dfed
+      printf("Result doesn't match `%s'\n", expect_res);
90dfed
+
90dfed
+   count = 0;
90dfed
+   for (int i = 0; i < 16; i++) {
90dfed
+      if (v1[i] == 0) count++;
90dfed
+   }
90dfed
+   tmp = hex_digit[count];
90dfed
+
90dfed
+   tmp = hex_digit[cc & 0xf];
90dfed
+   if (expect_cc >= 0 && cc != expect_cc)
90dfed
+      printf("CC %d != %d\n", cc, expect_cc);
90dfed
+}
90dfed
+
90dfed
+int main()
90dfed
+{
90dfed
+   test_vistr_char("terminated$====~", "terminated$$$$$$", 0);
90dfed
+   test_vistr_char("undef~~~~~~~~~~~", "undef", -1);
90dfed
+   test_vistr_char("undef, 2nd half~", "undef, 2nd half", -1);
90dfed
+   test_vistr_char("Not. Terminated.", "Not. Terminated.", 3);
90dfed
+   test_vistr_char("partiallyOK~~$~~", "partiallyOK~~$$$", 0);
90dfed
+   return 0;
90dfed
+}
90dfed
diff --git a/memcheck/tests/s390x/vistr.stderr.exp b/memcheck/tests/s390x/vistr.stderr.exp
90dfed
new file mode 100644
90dfed
index 000000000..e4f35fd74
90dfed
--- /dev/null
90dfed
+++ b/memcheck/tests/s390x/vistr.stderr.exp
90dfed
@@ -0,0 +1,20 @@
90dfed
+Conditional jump or move depends on uninitialised value(s)
90dfed
+   at 0x........: test_vistr_char (vistr.c:59)
90dfed
+   by 0x........: main (vistr.c:71)
90dfed
+
90dfed
+Use of uninitialised value of size 8
90dfed
+   at 0x........: test_vistr_char (vistr.c:63)
90dfed
+   by 0x........: main (vistr.c:71)
90dfed
+
90dfed
+Conditional jump or move depends on uninitialised value(s)
90dfed
+   at 0x........: test_vistr_char (vistr.c:59)
90dfed
+   by 0x........: main (vistr.c:72)
90dfed
+
90dfed
+Use of uninitialised value of size 8
90dfed
+   at 0x........: test_vistr_char (vistr.c:63)
90dfed
+   by 0x........: main (vistr.c:72)
90dfed
+
90dfed
+Conditional jump or move depends on uninitialised value(s)
90dfed
+   at 0x........: test_vistr_char (vistr.c:59)
90dfed
+   by 0x........: main (vistr.c:74)
90dfed
+
90dfed
diff --git a/memcheck/tests/s390x/vistr.vgtest b/memcheck/tests/s390x/vistr.vgtest
90dfed
new file mode 100644
90dfed
index 000000000..f99749d85
90dfed
--- /dev/null
90dfed
+++ b/memcheck/tests/s390x/vistr.vgtest
90dfed
@@ -0,0 +1,2 @@
90dfed
+prog: vistr
90dfed
+vgopts: -q
90dfed
diff --git a/memcheck/tests/s390x/vstrc.c b/memcheck/tests/s390x/vstrc.c
90dfed
new file mode 100644
90dfed
index 000000000..268e2f858
90dfed
--- /dev/null
90dfed
+++ b/memcheck/tests/s390x/vstrc.c
90dfed
@@ -0,0 +1,92 @@
90dfed
+#include <stdio.h>
90dfed
+#include <string.h>
90dfed
+
90dfed
+#define VECTOR __attribute__ ((vector_size (16)))
90dfed
+
90dfed
+typedef char VECTOR char_v;
90dfed
+
90dfed
+struct vstrc_char_rng {
90dfed
+   unsigned char range[16];
90dfed
+   unsigned char flags[16];
90dfed
+};
90dfed
+
90dfed
+#define RNG_FLAG_EQ   0x80
90dfed
+#define RNG_FLAG_LT   0x40
90dfed
+#define RNG_FLAG_GT   0x20
90dfed
+#define RNG_FLAG_ANY  0xe0
90dfed
+#define RNG_FLAG_NONE 0x00
90dfed
+
90dfed
+volatile char tmp;
90dfed
+static const char *hex_digit = "0123456789abcdefGHIJKLMNOPQRSTUV";
90dfed
+
90dfed
+static void test_vstrc_char(const char *str, const struct vstrc_char_rng *rng,
90dfed
+                            int expect_res, int expect_cc)
90dfed
+{
90dfed
+   int cc;
90dfed
+   char_v v1;
90dfed
+   char_v v2 = *(const char_v *) str;
90dfed
+   char_v v3 = *(const char_v *) rng->range;
90dfed
+   char_v v4 = *(const char_v *) rng->flags;
90dfed
+
90dfed
+   __asm__(
90dfed
+      "cr    0,0\n\t"           /* Clear CC */
90dfed
+      "vstrc %[v1],%[v2],%[v3],%[v4],0,3\n\t"
90dfed
+      "ipm   %[cc]\n\t"
90dfed
+      "srl   %[cc],28"
90dfed
+      : [v1] "=v" (v1),
90dfed
+        [cc] "=d" (cc)
90dfed
+      : [v2] "v" (v2),
90dfed
+        [v3] "v" (v3),
90dfed
+        [v4] "v" (v4)
90dfed
+      : "cc");
90dfed
+
90dfed
+   tmp = hex_digit[v1[7] & 0x1f];
90dfed
+   if (expect_res >= 0  && v1[7] != expect_res)
90dfed
+      printf("result %u != %d\n", v1[7], expect_res);
90dfed
+
90dfed
+   tmp = hex_digit[cc & 0xf];
90dfed
+   if (expect_cc >= 0 && cc != expect_cc)
90dfed
+      printf("CC %d != %d\n", cc, expect_cc);
90dfed
+}
90dfed
+
90dfed
+int main()
90dfed
+{
90dfed
+   struct vstrc_char_rng rng;
90dfed
+   char buf[16];
90dfed
+
90dfed
+   memset(rng.flags, RNG_FLAG_NONE, 16);
90dfed
+
90dfed
+   rng.range[4] = 'z';
90dfed
+   rng.flags[4] = RNG_FLAG_GT | RNG_FLAG_EQ;
90dfed
+   rng.flags[5] = RNG_FLAG_ANY;
90dfed
+   /* OK: match at the 'z' */
90dfed
+   test_vstrc_char("find the z", &rng, 9, 2);
90dfed
+
90dfed
+   rng.flags[12] = RNG_FLAG_GT | RNG_FLAG_EQ;
90dfed
+   rng.flags[13] = RNG_FLAG_LT | RNG_FLAG_EQ;
90dfed
+   /* Bad: undefined range */
90dfed
+   test_vstrc_char("undefined", &rng, -1, -1);
90dfed
+
90dfed
+   rng.range[12] = 'a';
90dfed
+   rng.range[13] = 'c';
90dfed
+   /* OK: match at the 'a' */
90dfed
+   test_vstrc_char("get the abc", &rng, 8, 2);
90dfed
+
90dfed
+   rng.flags[12] = RNG_FLAG_LT;
90dfed
+   rng.flags[13] = RNG_FLAG_GT;
90dfed
+   /* OK: no match up to null terminator */
90dfed
+   test_vstrc_char("no match", &rng, 8, 0);
90dfed
+
90dfed
+   /* OK: no match, no null terminator */
90dfed
+   test_vstrc_char("0123456789abcdef", &rng, 16, 3);
90dfed
+
90dfed
+   buf[0] = 'x';
90dfed
+   /* Bad: undefined string */
90dfed
+   test_vstrc_char(buf, &rng, -1, -1);
90dfed
+
90dfed
+   buf[1] = 'z';
90dfed
+   /* Bad: valid match, but CC undefined */
90dfed
+   test_vstrc_char(buf, &rng, 1, -1);
90dfed
+
90dfed
+   return 0;
90dfed
+}
90dfed
diff --git a/memcheck/tests/s390x/vstrc.stderr.exp b/memcheck/tests/s390x/vstrc.stderr.exp
90dfed
new file mode 100644
90dfed
index 000000000..c1125bea1
90dfed
--- /dev/null
90dfed
+++ b/memcheck/tests/s390x/vstrc.stderr.exp
90dfed
@@ -0,0 +1,20 @@
90dfed
+Use of uninitialised value of size 8
90dfed
+   at 0x........: test_vstrc_char (vstrc.c:43)
90dfed
+   by 0x........: main (vstrc.c:68)
90dfed
+
90dfed
+Use of uninitialised value of size 8
90dfed
+   at 0x........: test_vstrc_char (vstrc.c:47)
90dfed
+   by 0x........: main (vstrc.c:68)
90dfed
+
90dfed
+Use of uninitialised value of size 8
90dfed
+   at 0x........: test_vstrc_char (vstrc.c:43)
90dfed
+   by 0x........: main (vstrc.c:85)
90dfed
+
90dfed
+Use of uninitialised value of size 8
90dfed
+   at 0x........: test_vstrc_char (vstrc.c:47)
90dfed
+   by 0x........: main (vstrc.c:85)
90dfed
+
90dfed
+Use of uninitialised value of size 8
90dfed
+   at 0x........: test_vstrc_char (vstrc.c:47)
90dfed
+   by 0x........: main (vstrc.c:89)
90dfed
+
90dfed
diff --git a/memcheck/tests/s390x/vstrc.stdout.exp b/memcheck/tests/s390x/vstrc.stdout.exp
90dfed
new file mode 100644
90dfed
index 000000000..e69de29bb
90dfed
diff --git a/memcheck/tests/s390x/vstrc.vgtest b/memcheck/tests/s390x/vstrc.vgtest
90dfed
new file mode 100644
90dfed
index 000000000..26f5db99b
90dfed
--- /dev/null
90dfed
+++ b/memcheck/tests/s390x/vstrc.vgtest
90dfed
@@ -0,0 +1,2 @@
90dfed
+prog: vstrc
90dfed
+vgopts: -q
90dfed
90dfed
commit a0bb049ace14ab52d386bb1d49a399f39eec4986
90dfed
Author: Andreas Arnez <arnez@linux.ibm.com>
90dfed
Date:   Tue Mar 23 14:55:09 2021 +0100
90dfed
90dfed
    s390x: Improve handling of amodes without base register
90dfed
    
90dfed
    Addressing modes without a base or index register represent constants.
90dfed
    They can occur in some special cases such as shift operations and when
90dfed
    accessing individual vector elements.  Perform some minor improvements to
90dfed
    the handling of such amodes.
90dfed
90dfed
diff --git a/VEX/priv/host_s390_defs.c b/VEX/priv/host_s390_defs.c
90dfed
index 6e0734ae0..2587f81a1 100644
90dfed
--- a/VEX/priv/host_s390_defs.c
90dfed
+++ b/VEX/priv/host_s390_defs.c
90dfed
@@ -360,7 +360,8 @@ s390_amode_is_sane(const s390_amode *am)
90dfed
 {
90dfed
    switch (am->tag) {
90dfed
    case S390_AMODE_B12:
90dfed
-      return is_virtual_gpr(am->b) && fits_unsigned_12bit(am->d);
90dfed
+      return (is_virtual_gpr(am->b) || sameHReg(am->b, s390_hreg_gpr(0))) &&
90dfed
+             fits_unsigned_12bit(am->d);
90dfed
 
90dfed
    case S390_AMODE_B20:
90dfed
       return is_virtual_gpr(am->b) && fits_signed_20bit(am->d);
90dfed
@@ -378,47 +379,31 @@ s390_amode_is_sane(const s390_amode *am)
90dfed
    }
90dfed
 }
90dfed
 
90dfed
+static Bool
90dfed
+s390_amode_is_constant(const s390_amode *am)
90dfed
+{
90dfed
+   return am->tag == S390_AMODE_B12 && sameHReg(am->b, s390_hreg_gpr(0));
90dfed
+}
90dfed
+
90dfed
 
90dfed
 /* Record the register use of an amode */
90dfed
 static void
90dfed
 s390_amode_get_reg_usage(HRegUsage *u, const s390_amode *am)
90dfed
 {
90dfed
-   switch (am->tag) {
90dfed
-   case S390_AMODE_B12:
90dfed
-   case S390_AMODE_B20:
90dfed
-      addHRegUse(u, HRmRead, am->b);
90dfed
-      return;
90dfed
-
90dfed
-   case S390_AMODE_BX12:
90dfed
-   case S390_AMODE_BX20:
90dfed
+   if (!sameHReg(am->b, s390_hreg_gpr(0)))
90dfed
       addHRegUse(u, HRmRead, am->b);
90dfed
+   if (!sameHReg(am->x, s390_hreg_gpr(0)))
90dfed
       addHRegUse(u, HRmRead, am->x);
90dfed
-      return;
90dfed
-
90dfed
-   default:
90dfed
-      vpanic("s390_amode_get_reg_usage");
90dfed
-   }
90dfed
 }
90dfed
 
90dfed
 
90dfed
 static void
90dfed
 s390_amode_map_regs(HRegRemap *m, s390_amode *am)
90dfed
 {
90dfed
-   switch (am->tag) {
90dfed
-   case S390_AMODE_B12:
90dfed
-   case S390_AMODE_B20:
90dfed
-      am->b = lookupHRegRemap(m, am->b);
90dfed
-      return;
90dfed
-
90dfed
-   case S390_AMODE_BX12:
90dfed
-   case S390_AMODE_BX20:
90dfed
+   if (!sameHReg(am->b, s390_hreg_gpr(0)))
90dfed
       am->b = lookupHRegRemap(m, am->b);
90dfed
+   if (!sameHReg(am->x, s390_hreg_gpr(0)))
90dfed
       am->x = lookupHRegRemap(m, am->x);
90dfed
-      return;
90dfed
-
90dfed
-   default:
90dfed
-      vpanic("s390_amode_map_regs");
90dfed
-   }
90dfed
 }
90dfed
 
90dfed
 
90dfed
@@ -653,6 +638,16 @@ directReload_S390(HInstr* i, HReg vreg, Short spill_off)
90dfed
                            insn->variant.alu.dst, vreg_opnd);
90dfed
    }
90dfed
 
90dfed
+   /* v-vgetelem <reg>,<vreg> */
90dfed
+   if (insn->tag == S390_INSN_VEC_AMODEOP
90dfed
+       && insn->variant.vec_amodeop.tag == S390_VEC_GET_ELEM
90dfed
+       && insn->size == 8
90dfed
+       && sameHReg(insn->variant.vec_amodeop.op1, vreg)
90dfed
+       && s390_amode_is_constant(insn->variant.vec_amodeop.op2)) {
90dfed
+      vreg_am->d += 8 * insn->variant.vec_amodeop.op2->d;
90dfed
+      return s390_insn_load(insn->size, insn->variant.vec_amodeop.dst, vreg_am);
90dfed
+   }
90dfed
+
90dfed
    /* v-<unop> <reg>,<vreg> */
90dfed
    if (insn->tag == S390_INSN_UNOP
90dfed
        && insn->variant.unop.src.tag == S390_OPND_REG
90dfed
diff --git a/VEX/priv/host_s390_isel.c b/VEX/priv/host_s390_isel.c
90dfed
index 5f79280c0..ceca6836e 100644
90dfed
--- a/VEX/priv/host_s390_isel.c
90dfed
+++ b/VEX/priv/host_s390_isel.c
90dfed
@@ -312,7 +312,18 @@ s390_isel_amode_wrk(ISelEnv *env, IRExpr *expr,
90dfed
                     Bool no_index __attribute__((unused)),
90dfed
                     Bool short_displacement)
90dfed
 {
90dfed
-   if (expr->tag == Iex_Binop && expr->Iex.Binop.op == Iop_Add64) {
90dfed
+   if (expr->tag == Iex_Unop && expr->Iex.Unop.op == Iop_8Uto64 &&
90dfed
+       expr->Iex.Unop.arg->tag == Iex_Const) {
90dfed
+      UChar value = expr->Iex.Unop.arg->Iex.Const.con->Ico.U8;
90dfed
+      return s390_amode_b12((Int)value, s390_hreg_gpr(0));
90dfed
+
90dfed
+   } else if (expr->tag == Iex_Const) {
90dfed
+      ULong value = expr->Iex.Const.con->Ico.U64;
90dfed
+      if (ulong_fits_unsigned_12bit(value)) {
90dfed
+         return s390_amode_b12((Int)value, s390_hreg_gpr(0));
90dfed
+      }
90dfed
+
90dfed
+   } else if (expr->tag == Iex_Binop && expr->Iex.Binop.op == Iop_Add64) {
90dfed
       IRExpr *arg1 = expr->Iex.Binop.arg1;
90dfed
       IRExpr *arg2 = expr->Iex.Binop.arg2;
90dfed
 
90dfed
90dfed
commit fd935e238d907d9c523a311ba795077d95ad6912
90dfed
Author: Andreas Arnez <arnez@linux.ibm.com>
90dfed
Date:   Fri Mar 26 19:27:47 2021 +0100
90dfed
90dfed
    s390x: Rework insn "v-vdup" and add "v-vrep"
90dfed
    
90dfed
    So far the only s390x insn for filling a vector with copies of the same
90dfed
    element is "v-vdup" (S390_VEC_DUPLICATE), which replicates the first
90dfed
    element of its vector argument.  This is fairly restrictive and can lead
90dfed
    to unnecessarily long code sequences.
90dfed
    
90dfed
    Redefine "v-vdup" to replicate any scalar value instead.  And add
90dfed
    "v-vrep" (S390_INSN_VEC_REPLICATE) for replicating any given element of a
90dfed
    vector.  Select the latter for suitable expressions like
90dfed
    
90dfed
      Iop_Dup8x16(Iop_GetElem8x16(vector_expr, i))
90dfed
    
90dfed
    This improves the generated code for some vector string instructions,
90dfed
    where a lot of element replications are performed.
90dfed
90dfed
diff --git a/VEX/priv/host_s390_defs.c b/VEX/priv/host_s390_defs.c
90dfed
index 2587f81a1..c764d6ef9 100644
90dfed
--- a/VEX/priv/host_s390_defs.c
90dfed
+++ b/VEX/priv/host_s390_defs.c
90dfed
@@ -670,6 +670,14 @@ directReload_S390(HInstr* i, HReg vreg, Short spill_off)
90dfed
                             insn->variant.unop.dst, vreg_opnd);
90dfed
    }
90dfed
 
90dfed
+   /* v-vrep <reg>,<vreg>,<idx> */
90dfed
+   if (insn->tag == S390_INSN_VEC_REPLICATE
90dfed
+       && sameHReg(insn->variant.vec_replicate.op1, vreg)) {
90dfed
+      vreg_am->d += insn->size * insn->variant.vec_replicate.idx;
90dfed
+      return s390_insn_unop(insn->size, S390_VEC_DUPLICATE,
90dfed
+                            insn->variant.vec_replicate.dst, vreg_opnd);
90dfed
+   }
90dfed
+
90dfed
 no_match:
90dfed
    return NULL;
90dfed
 }
90dfed
@@ -1050,6 +1058,11 @@ s390_insn_get_reg_usage(HRegUsage *u, const s390_insn *insn)
90dfed
       addHRegUse(u, HRmRead, insn->variant.vec_triop.op3);
90dfed
       break;
90dfed
 
90dfed
+   case S390_INSN_VEC_REPLICATE:
90dfed
+      addHRegUse(u, HRmWrite, insn->variant.vec_replicate.dst);
90dfed
+      addHRegUse(u, HRmRead, insn->variant.vec_replicate.op1);
90dfed
+      break;
90dfed
+
90dfed
    default:
90dfed
       vpanic("s390_insn_get_reg_usage");
90dfed
    }
90dfed
@@ -1433,6 +1446,14 @@ s390_insn_map_regs(HRegRemap *m, s390_insn *insn)
90dfed
       insn->variant.vec_triop.op3 =
90dfed
          lookupHRegRemap(m, insn->variant.vec_triop.op3);
90dfed
       break;
90dfed
+
90dfed
+   case S390_INSN_VEC_REPLICATE:
90dfed
+      insn->variant.vec_replicate.dst =
90dfed
+         lookupHRegRemap(m, insn->variant.vec_replicate.dst);
90dfed
+      insn->variant.vec_replicate.op1 =
90dfed
+         lookupHRegRemap(m, insn->variant.vec_replicate.op1);
90dfed
+      break;
90dfed
+
90dfed
    default:
90dfed
       vpanic("s390_insn_map_regs");
90dfed
    }
90dfed
@@ -1767,7 +1788,39 @@ emit_VRI_VI(UChar *p, ULong op, UChar v1, UShort i2)
90dfed
 
90dfed
 
90dfed
 static UChar *
90dfed
-emit_VRX(UChar *p, ULong op, UChar v1, UChar x2, UChar b2, UShort d2)
90dfed
+emit_VRI_VIM(UChar *p, ULong op, UChar v1, UShort i2, UChar m3)
90dfed
+{
90dfed
+   ULong the_insn = op;
90dfed
+   ULong rxb = s390_update_rxb(0, 1, &v1;;
90dfed
+
90dfed
+   the_insn |= ((ULong)v1) << 36;
90dfed
+   the_insn |= ((ULong)i2) << 16;
90dfed
+   the_insn |= ((ULong)m3) << 12;
90dfed
+   the_insn |= ((ULong)rxb)<< 8;
90dfed
+
90dfed
+   return emit_6bytes(p, the_insn);
90dfed
+}
90dfed
+
90dfed
+
90dfed
+static UChar *
90dfed
+emit_VRI_VVMM(UChar *p, ULong op, UChar v1, UChar v3, UShort i2, UChar m4)
90dfed
+{
90dfed
+   ULong the_insn = op;
90dfed
+   ULong rxb = s390_update_rxb(0, 1, &v1;;
90dfed
+   rxb = s390_update_rxb(rxb, 2, &v3;;
90dfed
+
90dfed
+   the_insn |= ((ULong)v1) << 36;
90dfed
+   the_insn |= ((ULong)v3) << 32;
90dfed
+   the_insn |= ((ULong)i2) << 16;
90dfed
+   the_insn |= ((ULong)m4) << 12;
90dfed
+   the_insn |= ((ULong)rxb) << 8;
90dfed
+
90dfed
+   return emit_6bytes(p, the_insn);
90dfed
+}
90dfed
+
90dfed
+
90dfed
+static UChar *
90dfed
+emit_VRX(UChar *p, ULong op, UChar v1, UChar x2, UChar b2, UShort d2, UChar m3)
90dfed
 {
90dfed
    ULong the_insn = op;
90dfed
    ULong rxb = s390_update_rxb(0, 1, &v1;;
90dfed
@@ -1776,6 +1829,7 @@ emit_VRX(UChar *p, ULong op, UChar v1, UChar x2, UChar b2, UShort d2)
90dfed
    the_insn |= ((ULong)x2) << 32;
90dfed
    the_insn |= ((ULong)b2) << 28;
90dfed
    the_insn |= ((ULong)d2) << 16;
90dfed
+   the_insn |= ((ULong)m3) << 12;
90dfed
    the_insn |= ((ULong)rxb)<< 8;
90dfed
 
90dfed
    return emit_6bytes(p, the_insn);
90dfed
@@ -5782,7 +5836,7 @@ s390_emit_VL(UChar *p, UChar v1, UChar x2, UChar b2, UShort d2)
90dfed
    if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
90dfed
       s390_disasm(ENC3(MNM, VR, UDXB), "vl", v1, d2, x2, b2);
90dfed
 
90dfed
-   return emit_VRX(p, 0xE70000000006ULL, v1, x2, b2, d2);
90dfed
+   return emit_VRX(p, 0xE70000000006ULL, v1, x2, b2, d2, 0);
90dfed
 }
90dfed
 
90dfed
 static UChar *
90dfed
@@ -5795,13 +5849,23 @@ s390_emit_VLR(UChar *p, UChar v1, UChar v2)
90dfed
 }
90dfed
 
90dfed
 
90dfed
+static UChar *
90dfed
+s390_emit_VLREP(UChar *p, UChar v1, UChar x2, UChar b2, UShort d2, UShort m3)
90dfed
+{
90dfed
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
90dfed
+      s390_disasm(ENC4(MNM, VR, UDXB, UINT), "vlrep", v1, d2, x2, b2, m3);
90dfed
+
90dfed
+   return emit_VRX(p, 0xE70000000005ULL, v1, x2, b2, d2, m3);
90dfed
+}
90dfed
+
90dfed
+
90dfed
 static UChar *
90dfed
 s390_emit_VST(UChar *p, UChar v1, UChar x2, UChar b2, UShort d2)
90dfed
 {
90dfed
    if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
90dfed
       s390_disasm(ENC3(MNM, VR, UDXB), "vst", v1, d2, x2, b2);
90dfed
 
90dfed
-   return emit_VRX(p, 0xE7000000000eULL, v1, x2, b2, d2);
90dfed
+   return emit_VRX(p, 0xE7000000000eULL, v1, x2, b2, d2, 0);
90dfed
 }
90dfed
 
90dfed
 
90dfed
@@ -5912,15 +5976,24 @@ s390_emit_VPKLS(UChar *p, UChar v1, UChar v2, UChar v3, UChar m4)
90dfed
 
90dfed
 
90dfed
 static UChar *
90dfed
-s390_emit_VREP(UChar *p, UChar v1, UChar v3, UChar m3)
90dfed
+s390_emit_VREP(UChar *p, UChar v1, UChar v3, UShort i2, UChar m4)
90dfed
 {
90dfed
    if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
90dfed
-      s390_disasm(ENC5(MNM, VR, VR, UINT, UINT), "vrep", v1, v3, 0, m3);
90dfed
+      s390_disasm(ENC5(MNM, VR, VR, UINT, UINT), "vrep", v1, v3, i2, m4);
90dfed
 
90dfed
-   return emit_VRR_VVM(p, 0xE7000000004DULL, v1, v3, m3);
90dfed
+   return emit_VRI_VVMM(p, 0xE7000000004DULL, v1, v3, i2, m4);
90dfed
 }
90dfed
 
90dfed
 
90dfed
+static UChar *
90dfed
+s390_emit_VREPI(UChar *p, UChar v1, UShort i2, UChar m3)
90dfed
+{
90dfed
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
90dfed
+      s390_disasm(ENC4(MNM, VR, UINT, UINT), "vrepi", v1, i2, m3);
90dfed
+
90dfed
+   return emit_VRI_VIM(p, 0xE70000000045ULL, v1, i2, m3);
90dfed
+}
90dfed
+
90dfed
 
90dfed
 static UChar *
90dfed
 s390_emit_VUPH(UChar *p, UChar v1, UChar v3, UChar m3)
90dfed
@@ -7560,6 +7633,20 @@ s390_insn *s390_insn_vec_triop(UChar size, s390_vec_triop_t tag, HReg dst,
90dfed
    return insn;
90dfed
 }
90dfed
 
90dfed
+s390_insn *s390_insn_vec_replicate(UChar size, HReg dst, HReg op1,
90dfed
+                                   UChar idx)
90dfed
+{
90dfed
+   s390_insn *insn = LibVEX_Alloc_inline(sizeof(s390_insn));
90dfed
+
90dfed
+   insn->tag  = S390_INSN_VEC_REPLICATE;
90dfed
+   insn->size = size;
90dfed
+   insn->variant.vec_replicate.dst = dst;
90dfed
+   insn->variant.vec_replicate.op1 = op1;
90dfed
+   insn->variant.vec_replicate.idx = idx;
90dfed
+
90dfed
+   return insn;
90dfed
+}
90dfed
+
90dfed
 /*---------------------------------------------------------------*/
90dfed
 /*--- Debug print                                             ---*/
90dfed
 /*---------------------------------------------------------------*/
90dfed
@@ -8284,6 +8371,13 @@ s390_insn_as_string(const s390_insn *insn)
90dfed
                    insn->variant.vec_triop.op3);
90dfed
       break;
90dfed
 
90dfed
+   case S390_INSN_VEC_REPLICATE:
90dfed
+      s390_sprintf(buf, "%M %R, %R, %I", "v-vrep",
90dfed
+                   insn->variant.vec_replicate.dst,
90dfed
+                   insn->variant.vec_replicate.op1,
90dfed
+                   insn->variant.vec_replicate.idx);
90dfed
+      break;
90dfed
+
90dfed
    default: goto fail;
90dfed
    }
90dfed
 
90dfed
@@ -9386,6 +9480,56 @@ s390_negate_emit(UChar *buf, const s390_insn *insn)
90dfed
 }
90dfed
 
90dfed
 
90dfed
+static UChar *
90dfed
+s390_vec_duplicate_emit(UChar *buf, const s390_insn *insn)
90dfed
+{
90dfed
+   UChar v1 = hregNumber(insn->variant.unop.dst);
90dfed
+   s390_opnd_RMI opnd = insn->variant.unop.src;
90dfed
+   UChar r2;
90dfed
+
90dfed
+   switch (opnd.tag) {
90dfed
+   case S390_OPND_AMODE: {
90dfed
+      s390_amode* am = opnd.variant.am;
90dfed
+      UInt b = hregNumber(am->b);
90dfed
+      UInt x = hregNumber(am->x);
90dfed
+      UInt d = am->d;
90dfed
+
90dfed
+      if (fits_unsigned_12bit(d)) {
90dfed
+         return s390_emit_VLREP(buf, v1, x, b, d,
90dfed
+                                s390_getM_from_size(insn->size));
90dfed
+      }
90dfed
+      buf = s390_emit_load_mem(buf, insn->size, R0, am);
90dfed
+      r2 = R0;
90dfed
+      goto duplicate_from_gpr;
90dfed
+   }
90dfed
+
90dfed
+   case S390_OPND_IMMEDIATE: {
90dfed
+      ULong val = opnd.variant.imm;
90dfed
+
90dfed
+      if (ulong_fits_signed_16bit(val)) {
90dfed
+         return s390_emit_VREPI(buf, v1, val, s390_getM_from_size(insn->size));
90dfed
+      }
90dfed
+      buf = s390_emit_load_64imm(buf, R0, val);
90dfed
+      r2 = R0;
90dfed
+      goto duplicate_from_gpr;
90dfed
+   }
90dfed
+
90dfed
+   case S390_OPND_REG:
90dfed
+      r2 = hregNumber(opnd.variant.reg);
90dfed
+
90dfed
+   duplicate_from_gpr:
90dfed
+      buf = s390_emit_VLVGP(buf, v1, r2, r2);
90dfed
+      if (insn->size != 8) {
90dfed
+         buf = s390_emit_VREP(buf, v1, v1, 8 / insn->size - 1,
90dfed
+                              s390_getM_from_size(insn->size));
90dfed
+      }
90dfed
+      return buf;
90dfed
+   }
90dfed
+
90dfed
+   vpanic("s390_vec_duplicate_emit");
90dfed
+}
90dfed
+
90dfed
+
90dfed
 static UChar *
90dfed
 s390_insn_unop_emit(UChar *buf, const s390_insn *insn)
90dfed
 {
90dfed
@@ -9405,12 +9549,7 @@ s390_insn_unop_emit(UChar *buf, const s390_insn *insn)
90dfed
       UShort i2 = insn->variant.unop.src.variant.imm;
90dfed
       return s390_emit_VGBM(buf, v1, i2);
90dfed
       }
90dfed
-   case S390_VEC_DUPLICATE: {
90dfed
-      vassert(insn->variant.unop.src.tag == S390_OPND_REG);
90dfed
-      UChar v1 = hregNumber(insn->variant.unop.dst);
90dfed
-      UChar v2 = hregNumber(insn->variant.unop.src.variant.reg);
90dfed
-      return s390_emit_VREP(buf, v1, v2, s390_getM_from_size(insn->size));
90dfed
-      }
90dfed
+   case S390_VEC_DUPLICATE:  return s390_vec_duplicate_emit(buf, insn);
90dfed
    case S390_VEC_UNPACKLOWS: {
90dfed
       vassert(insn->variant.unop.src.tag == S390_OPND_REG);
90dfed
       vassert(insn->size < 8);
90dfed
@@ -11595,6 +11734,16 @@ s390_insn_vec_triop_emit(UChar *buf, const s390_insn *insn)
90dfed
 }
90dfed
 
90dfed
 
90dfed
+static UChar *
90dfed
+s390_insn_vec_replicate_emit(UChar *buf, const s390_insn *insn)
90dfed
+{
90dfed
+   UChar v1 = hregNumber(insn->variant.vec_replicate.dst);
90dfed
+   UChar v2 = hregNumber(insn->variant.vec_replicate.op1);
90dfed
+   UShort idx = (UShort) insn->variant.vec_replicate.idx;
90dfed
+   return s390_emit_VREP(buf, v1, v2, idx, s390_getM_from_size(insn->size));
90dfed
+}
90dfed
+
90dfed
+
90dfed
 Int
90dfed
 emit_S390Instr(Bool *is_profinc, UChar *buf, Int nbuf, const s390_insn *insn,
90dfed
                Bool mode64, VexEndness endness_host,
90dfed
@@ -11791,6 +11940,11 @@ emit_S390Instr(Bool *is_profinc, UChar *buf, Int nbuf, const s390_insn *insn,
90dfed
    case S390_INSN_VEC_TRIOP:
90dfed
       end = s390_insn_vec_triop_emit(buf, insn);
90dfed
       break;
90dfed
+
90dfed
+   case S390_INSN_VEC_REPLICATE:
90dfed
+      end = s390_insn_vec_replicate_emit(buf, insn);
90dfed
+      break;
90dfed
+
90dfed
    fail:
90dfed
    default:
90dfed
       vpanic("emit_S390Instr");
90dfed
diff --git a/VEX/priv/host_s390_defs.h b/VEX/priv/host_s390_defs.h
90dfed
index 9b69f4d38..063fd3800 100644
90dfed
--- a/VEX/priv/host_s390_defs.h
90dfed
+++ b/VEX/priv/host_s390_defs.h
90dfed
@@ -166,7 +166,8 @@ typedef enum {
90dfed
    S390_INSN_VEC_AMODEINTOP,
90dfed
    S390_INSN_VEC_UNOP,
90dfed
    S390_INSN_VEC_BINOP,
90dfed
-   S390_INSN_VEC_TRIOP
90dfed
+   S390_INSN_VEC_TRIOP,
90dfed
+   S390_INSN_VEC_REPLICATE
90dfed
 } s390_insn_tag;
90dfed
 
90dfed
 
90dfed
@@ -738,6 +739,11 @@ typedef struct {
90dfed
          HReg          op2;    /* 128-bit second operand */
90dfed
          HReg          op3;    /* 128-bit third operand */
90dfed
       } vec_triop;
90dfed
+      struct {
90dfed
+         HReg          dst;    /* 128-bit result */
90dfed
+         HReg          op1;    /* 128-bit first operand */
90dfed
+         UChar         idx;    /* index of element to replicate */
90dfed
+      } vec_replicate;
90dfed
    } variant;
90dfed
 } s390_insn;
90dfed
 
90dfed
@@ -853,6 +859,7 @@ s390_insn *s390_insn_vec_binop(UChar size, s390_vec_binop_t, HReg dst, HReg op1,
90dfed
                                HReg op2);
90dfed
 s390_insn *s390_insn_vec_triop(UChar size, s390_vec_triop_t, HReg dst, HReg op1,
90dfed
                                HReg op2, HReg op3);
90dfed
+s390_insn *s390_insn_vec_replicate(UChar size, HReg dst, HReg op1, UChar idx);
90dfed
 
90dfed
 const HChar *s390_insn_as_string(const s390_insn *);
90dfed
 
90dfed
diff --git a/VEX/priv/host_s390_isel.c b/VEX/priv/host_s390_isel.c
90dfed
index ceca6836e..968122596 100644
90dfed
--- a/VEX/priv/host_s390_isel.c
90dfed
+++ b/VEX/priv/host_s390_isel.c
90dfed
@@ -3778,12 +3778,12 @@ s390_isel_vec_expr_wrk(ISelEnv *env, IRExpr *expr)
90dfed
    }
90dfed
    /* --------- UNARY OP --------- */
90dfed
    case Iex_Unop: {
90dfed
-      UChar size_for_int_arg = 0;
90dfed
       HReg dst = INVALID_HREG;
90dfed
       HReg reg1 = INVALID_HREG;
90dfed
       s390_unop_t vec_unop = S390_UNOP_T_INVALID;
90dfed
       s390_vec_binop_t vec_binop = S390_VEC_BINOP_T_INVALID;
90dfed
       IROp op = expr->Iex.Unop.op;
90dfed
+      IROp arg_op = Iop_INVALID;
90dfed
       IRExpr* arg = expr->Iex.Unop.arg;
90dfed
       switch(op) {
90dfed
       case Iop_NotV128:
90dfed
@@ -3839,59 +3839,63 @@ s390_isel_vec_expr_wrk(ISelEnv *env, IRExpr *expr)
90dfed
       }
90dfed
 
90dfed
       case Iop_Dup8x16:
90dfed
-         size = size_for_int_arg = 1;
90dfed
-         vec_unop = S390_VEC_DUPLICATE;
90dfed
-         goto Iop_V_int_wrk;
90dfed
+         size = 1;
90dfed
+         arg_op = Iop_GetElem8x16;
90dfed
+         goto Iop_V_dup_wrk;
90dfed
       case Iop_Dup16x8:
90dfed
-         size = size_for_int_arg = 2;
90dfed
-         vec_unop = S390_VEC_DUPLICATE;
90dfed
-         goto Iop_V_int_wrk;
90dfed
+         size = 2;
90dfed
+         arg_op = Iop_GetElem16x8;
90dfed
+         goto Iop_V_dup_wrk;
90dfed
       case Iop_Dup32x4:
90dfed
-         size = size_for_int_arg = 4;
90dfed
-         vec_unop = S390_VEC_DUPLICATE;
90dfed
-         goto Iop_V_int_wrk;
90dfed
+         size = 4;
90dfed
+         arg_op = Iop_GetElem32x4;
90dfed
+         goto Iop_V_dup_wrk;
90dfed
+
90dfed
+      Iop_V_dup_wrk: {
90dfed
+         dst = newVRegV(env);
90dfed
+         if (arg->tag == Iex_Binop && arg->Iex.Binop.op == arg_op &&
90dfed
+             arg->Iex.Binop.arg2->tag == Iex_Const) {
90dfed
+            ULong idx;
90dfed
+            idx = get_const_value_as_ulong(arg->Iex.Binop.arg2-> Iex.Const.con);
90dfed
+            reg1 = s390_isel_vec_expr(env, arg->Iex.Binop.arg1);
90dfed
+            addInstr(env, s390_insn_vec_replicate(size, dst, reg1, (UChar)idx));
90dfed
+         } else {
90dfed
+            s390_opnd_RMI src = s390_isel_int_expr_RMI(env, arg);
90dfed
+            addInstr(env, s390_insn_unop(size, S390_VEC_DUPLICATE, dst, src));
90dfed
+         }
90dfed
+         return dst;
90dfed
+      }
90dfed
 
90dfed
       case Iop_Widen8Sto16x8:
90dfed
          size = 1;
90dfed
-         size_for_int_arg = 8;
90dfed
          vec_unop = S390_VEC_UNPACKLOWS;
90dfed
-         goto Iop_V_int_wrk;
90dfed
+         goto Iop_V_widen_wrk;
90dfed
       case Iop_Widen16Sto32x4:
90dfed
          size = 2;
90dfed
-         size_for_int_arg = 8;
90dfed
          vec_unop = S390_VEC_UNPACKLOWS;
90dfed
-         goto Iop_V_int_wrk;
90dfed
+         goto Iop_V_widen_wrk;
90dfed
       case Iop_Widen32Sto64x2:
90dfed
          size = 4;
90dfed
-         size_for_int_arg = 8;
90dfed
          vec_unop = S390_VEC_UNPACKLOWS;
90dfed
-         goto Iop_V_int_wrk;
90dfed
+         goto Iop_V_widen_wrk;
90dfed
       case Iop_Widen8Uto16x8:
90dfed
          size = 1;
90dfed
-         size_for_int_arg = 8;
90dfed
          vec_unop = S390_VEC_UNPACKLOWU;
90dfed
-         goto Iop_V_int_wrk;
90dfed
+         goto Iop_V_widen_wrk;
90dfed
       case Iop_Widen16Uto32x4:
90dfed
          size = 2;
90dfed
-         size_for_int_arg = 8;
90dfed
          vec_unop = S390_VEC_UNPACKLOWU;
90dfed
-         goto Iop_V_int_wrk;
90dfed
+         goto Iop_V_widen_wrk;
90dfed
       case Iop_Widen32Uto64x2:
90dfed
          size = 4;
90dfed
-         size_for_int_arg = 8;
90dfed
          vec_unop = S390_VEC_UNPACKLOWU;
90dfed
-         goto Iop_V_int_wrk;
90dfed
-
90dfed
-      Iop_V_int_wrk: {
90dfed
-         HReg vr1 = vec_generate_zeroes(env);
90dfed
-         s390_amode* amode2 = s390_isel_amode(env, IRExpr_Const(IRConst_U64(0)));
90dfed
-         reg1 = s390_isel_int_expr(env, arg);
90dfed
+         goto Iop_V_widen_wrk;
90dfed
 
90dfed
+      Iop_V_widen_wrk: {
90dfed
          vassert(vec_unop != S390_UNOP_T_INVALID);
90dfed
-         addInstr(env,
90dfed
-                  s390_insn_vec_amodeintop(size_for_int_arg, S390_VEC_SET_ELEM,
90dfed
-                                           vr1, amode2, reg1));
90dfed
-
90dfed
+         s390_opnd_RMI src = s390_isel_int_expr_RMI(env, arg);
90dfed
+         HReg vr1 = newVRegV(env);
90dfed
+         addInstr(env, s390_insn_unop(8, S390_VEC_DUPLICATE, vr1, src));
90dfed
          dst = newVRegV(env);
90dfed
          addInstr(env, s390_insn_unop(size, vec_unop, dst, s390_opnd_reg(vr1)));
90dfed
          return dst;
90dfed
90dfed
commit 6c1cb1a0128b00858b973ef9344e12d6ddbaaf57
90dfed
Author: Andreas Arnez <arnez@linux.ibm.com>
90dfed
Date:   Thu Mar 25 18:48:07 2021 +0100
90dfed
90dfed
    s390x: Add support for emitting "vector or with complement"
90dfed
    
90dfed
    In the instruction selector, look out for IR expressions that fit "vector
90dfed
    or with complement (VOC)".  Emit when applicable.
90dfed
    
90dfed
    This slighly reduces the generated code sometimes, such as for certain
90dfed
    vector string instructions, where such expressions occur quite frequently.
90dfed
90dfed
diff --git a/VEX/priv/host_s390_defs.c b/VEX/priv/host_s390_defs.c
90dfed
index c764d6ef9..239d9d299 100644
90dfed
--- a/VEX/priv/host_s390_defs.c
90dfed
+++ b/VEX/priv/host_s390_defs.c
90dfed
@@ -5907,6 +5907,15 @@ s390_emit_VO(UChar *p, UChar v1, UChar v2, UChar v3)
90dfed
    return emit_VRR_VVV(p, 0xE7000000006aULL, v1, v2, v3);
90dfed
 }
90dfed
 
90dfed
+static UChar *
90dfed
+s390_emit_VOC(UChar *p, UChar v1, UChar v2, UChar v3)
90dfed
+{
90dfed
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
90dfed
+      s390_disasm(ENC4(MNM, VR, VR, VR), "voc", v1, v2, v3);
90dfed
+
90dfed
+   return emit_VRR_VVV(p, 0xE7000000006fULL, v1, v2, v3);
90dfed
+}
90dfed
+
90dfed
 static UChar *
90dfed
 s390_emit_VX(UChar *p, UChar v1, UChar v2, UChar v3)
90dfed
 {
90dfed
@@ -8312,6 +8321,7 @@ s390_insn_as_string(const s390_insn *insn)
90dfed
       case S390_VEC_PACK_SATURU:    op = "v-vpacksaturu"; break;
90dfed
       case S390_VEC_COMPARE_EQUAL:  op = "v-vcmpeq"; break;
90dfed
       case S390_VEC_OR:             op = "v-vor"; break;
90dfed
+      case S390_VEC_ORC:            op = "v-vorc"; break;
90dfed
       case S390_VEC_XOR:            op = "v-vxor";  break;
90dfed
       case S390_VEC_AND:            op = "v-vand"; break;
90dfed
       case S390_VEC_MERGEL:         op = "v-vmergel"; break;
90dfed
@@ -11609,6 +11619,8 @@ s390_insn_vec_binop_emit(UChar *buf, const s390_insn *insn)
90dfed
          return s390_emit_VCEQ(buf, v1, v2, v3, s390_getM_from_size(size));
90dfed
       case S390_VEC_OR:
90dfed
          return s390_emit_VO(buf, v1, v2, v3);
90dfed
+      case S390_VEC_ORC:
90dfed
+         return s390_emit_VOC(buf, v1, v2, v3);
90dfed
       case S390_VEC_XOR:
90dfed
          return s390_emit_VX(buf, v1, v2, v3);
90dfed
       case S390_VEC_AND:
90dfed
diff --git a/VEX/priv/host_s390_defs.h b/VEX/priv/host_s390_defs.h
90dfed
index 063fd3800..dc116106e 100644
90dfed
--- a/VEX/priv/host_s390_defs.h
90dfed
+++ b/VEX/priv/host_s390_defs.h
90dfed
@@ -366,6 +366,7 @@ typedef enum {
90dfed
    S390_VEC_PACK_SATURU,
90dfed
    S390_VEC_COMPARE_EQUAL,
90dfed
    S390_VEC_OR,
90dfed
+   S390_VEC_ORC,
90dfed
    S390_VEC_XOR,
90dfed
    S390_VEC_AND,
90dfed
    S390_VEC_MERGEL,
90dfed
diff --git a/VEX/priv/host_s390_isel.c b/VEX/priv/host_s390_isel.c
90dfed
index 968122596..53d76fe8a 100644
90dfed
--- a/VEX/priv/host_s390_isel.c
90dfed
+++ b/VEX/priv/host_s390_isel.c
90dfed
@@ -4102,6 +4102,15 @@ s390_isel_vec_expr_wrk(ISelEnv *env, IRExpr *expr)
90dfed
       case Iop_OrV128:
90dfed
          size = 16;
90dfed
          vec_binop = S390_VEC_OR;
90dfed
+         if (arg1->tag == Iex_Unop && arg1->Iex.Unop.op == Iop_NotV128) {
90dfed
+            IRExpr* orig_arg1 = arg1;
90dfed
+            arg1 = arg2;
90dfed
+            arg2 = orig_arg1->Iex.Unop.arg;
90dfed
+            vec_binop = S390_VEC_ORC;
90dfed
+         } else if (arg2->tag == Iex_Unop && arg2->Iex.Unop.op == Iop_NotV128) {
90dfed
+            arg2 = arg2->Iex.Unop.arg;
90dfed
+            vec_binop = S390_VEC_ORC;
90dfed
+         }
90dfed
          goto Iop_VV_wrk;
90dfed
 
90dfed
       case Iop_XorV128:
90dfed
90dfed
commit 0bd4263326b2d48f782339a9bbe1a069c7de45c7
90dfed
Author: Andreas Arnez <arnez@linux.ibm.com>
90dfed
Date:   Tue Mar 30 17:45:20 2021 +0200
90dfed
90dfed
    s390x: Fix/optimize Iop_64HLtoV128
90dfed
    
90dfed
    In s390_vr_fill() in guest_s390_toIR.c, filling a vector with two copies
90dfed
    of a 64-bit value is realized with Iop_64HLtoV128, since there is no such
90dfed
    operator as Iop_Dup64x2.  But the two args to Iop_64HLtoV128 use the same
90dfed
    expression, referenced twice.  Although this hasn't been seen to cause
90dfed
    real trouble yet, it's problematic and potentially inefficient, so change
90dfed
    it: Assign to a temp and pass that twice instead.
90dfed
    
90dfed
    In the instruction selector, if Iop_64HLtoV128 is found to be used for a
90dfed
    duplication as above, select "v-vdup" instead of "v-vinitfromgprs".  This
90dfed
    mimicks the behavior we'd get if there actually was an operator
90dfed
    Iop_Dup64x2.
90dfed
90dfed
diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c
90dfed
index dfea54259..a73dcfb14 100644
90dfed
--- a/VEX/priv/guest_s390_toIR.c
90dfed
+++ b/VEX/priv/guest_s390_toIR.c
90dfed
@@ -2299,9 +2299,12 @@ s390_vr_fill(UChar v1, IRExpr *o2)
90dfed
    case Ity_I32:
90dfed
       put_vr_qw(v1, unop(Iop_Dup32x4, o2));
90dfed
       break;
90dfed
-   case Ity_I64:
90dfed
-      put_vr_qw(v1, binop(Iop_64HLtoV128, o2, o2));
90dfed
+   case Ity_I64: {
90dfed
+      IRTemp val = newTemp(Ity_I64);
90dfed
+      assign(val, o2);
90dfed
+      put_vr_qw(v1, binop(Iop_64HLtoV128, mkexpr(val), mkexpr(val)));
90dfed
       break;
90dfed
+   }
90dfed
    default:
90dfed
       ppIRType(o2type);
90dfed
       vpanic("s390_vr_fill: invalid IRType");
90dfed
diff --git a/VEX/priv/host_s390_isel.c b/VEX/priv/host_s390_isel.c
90dfed
index 53d76fe8a..ee20c6711 100644
90dfed
--- a/VEX/priv/host_s390_isel.c
90dfed
+++ b/VEX/priv/host_s390_isel.c
90dfed
@@ -4662,12 +4662,16 @@ s390_isel_vec_expr_wrk(ISelEnv *env, IRExpr *expr)
90dfed
       }
90dfed
 
90dfed
       case Iop_64HLtoV128:
90dfed
-         reg1 = s390_isel_int_expr(env, arg1);
90dfed
-         reg2 = s390_isel_int_expr(env, arg2);
90dfed
-
90dfed
-         addInstr(env, s390_insn_vec_binop(size, S390_VEC_INIT_FROM_GPRS,
90dfed
-                  dst, reg1, reg2));
90dfed
-
90dfed
+         if (arg1->tag == Iex_RdTmp && arg2->tag == Iex_RdTmp &&
90dfed
+             arg1->Iex.RdTmp.tmp == arg2->Iex.RdTmp.tmp) {
90dfed
+            s390_opnd_RMI src = s390_isel_int_expr_RMI(env, arg1);
90dfed
+            addInstr(env, s390_insn_unop(8, S390_VEC_DUPLICATE, dst, src));
90dfed
+         } else {
90dfed
+            reg1 = s390_isel_int_expr(env, arg1);
90dfed
+            reg2 = s390_isel_int_expr(env, arg2);
90dfed
+            addInstr(env, s390_insn_vec_binop(size, S390_VEC_INIT_FROM_GPRS,
90dfed
+                                              dst, reg1, reg2));
90dfed
+         }
90dfed
          return dst;
90dfed
 
90dfed
       default:
90dfed
90dfed
commit cae5062b05b95e0303b1122a0ea9aadc197e4f0a
90dfed
Author: Andreas Arnez <arnez@linux.ibm.com>
90dfed
Date:   Fri May 7 18:13:03 2021 +0200
90dfed
90dfed
    s390x: Add missing stdout.exp for vector string memcheck test
90dfed
    
90dfed
    The file vistr.stdout.exp was missing from commit 32312d588.  Add it.
90dfed
90dfed
diff --git a/memcheck/tests/s390x/vistr.stdout.exp b/memcheck/tests/s390x/vistr.stdout.exp
90dfed
new file mode 100644
90dfed
index 000000000..e69de29bb