Mark Wielaard 75fe92
commit d74a637206ef5532ccd2ccb2e31ee2762f184e60
Mark Wielaard 75fe92
Author: Andreas Arnez <arnez@linux.ibm.com>
Mark Wielaard 75fe92
Date:   Wed Apr 28 18:52:30 2021 +0200
Mark Wielaard 75fe92
Mark Wielaard 75fe92
    Bug 433863 - s390x: Remove memcheck test cases for cs, cds, and csg
Mark Wielaard 75fe92
    
Mark Wielaard 75fe92
    The fix for bug 429864 - "s390x: C++ atomic test_and_set yields
Mark Wielaard 75fe92
    false-positive memcheck diagnostics" changes the memcheck behavior at
Mark Wielaard 75fe92
    various compare-and-swap instructions.  The comparison between the old and
Mark Wielaard 75fe92
    expected value now always yields a defined result, even if the input
Mark Wielaard 75fe92
    values are (partially) undefined.  However, some existing test cases
Mark Wielaard 75fe92
    explicitly verify that memcheck complains about the use of uninitialised
Mark Wielaard 75fe92
    values here.  These test cases are no longer valid.  Remove them.
Mark Wielaard 75fe92
Mark Wielaard 75fe92
diff --git a/memcheck/tests/s390x/Makefile.am b/memcheck/tests/s390x/Makefile.am
Mark Wielaard 75fe92
index 67ae8c293..e4e69eb38 100644
Mark Wielaard 75fe92
--- a/memcheck/tests/s390x/Makefile.am
Mark Wielaard 75fe92
+++ b/memcheck/tests/s390x/Makefile.am
Mark Wielaard 75fe92
@@ -2,7 +2,7 @@ include $(top_srcdir)/Makefile.tool-tests.am
Mark Wielaard 75fe92
 
Mark Wielaard 75fe92
 dist_noinst_SCRIPTS = filter_stderr
Mark Wielaard 75fe92
 
Mark Wielaard 75fe92
-INSN_TESTS = cs csg cds cdsg cu21 cu42 ltgjhe
Mark Wielaard 75fe92
+INSN_TESTS = cdsg cu21 cu42 ltgjhe
Mark Wielaard 75fe92
 
Mark Wielaard 75fe92
 check_PROGRAMS = $(INSN_TESTS) 
Mark Wielaard 75fe92
 
Mark Wielaard 75fe92
@@ -14,7 +14,3 @@ EXTRA_DIST = \
Mark Wielaard 75fe92
 AM_CFLAGS    += @FLAG_M64@
Mark Wielaard 75fe92
 AM_CXXFLAGS  += @FLAG_M64@
Mark Wielaard 75fe92
 AM_CCASFLAGS += @FLAG_M64@
Mark Wielaard 75fe92
-
Mark Wielaard 75fe92
-cs_CFLAGS     = $(AM_CFLAGS) @FLAG_W_NO_UNINITIALIZED@
Mark Wielaard 75fe92
-csg_CFLAGS    = $(AM_CFLAGS) @FLAG_W_NO_UNINITIALIZED@
Mark Wielaard 75fe92
-cds_CFLAGS    = $(AM_CFLAGS) @FLAG_W_NO_UNINITIALIZED@
Mark Wielaard 75fe92
diff --git a/memcheck/tests/s390x/cds.c b/memcheck/tests/s390x/cds.c
Mark Wielaard 75fe92
deleted file mode 100644
Mark Wielaard 75fe92
index ec5c533e0..000000000
Mark Wielaard 75fe92
--- a/memcheck/tests/s390x/cds.c
Mark Wielaard 75fe92
+++ /dev/null
Mark Wielaard 75fe92
@@ -1,82 +0,0 @@
Mark Wielaard 75fe92
-#include <stdint.h>
Mark Wielaard 75fe92
-#include <stdio.h>
Mark Wielaard 75fe92
-
Mark Wielaard 75fe92
-typedef struct {
Mark Wielaard 75fe92
-   uint64_t high;
Mark Wielaard 75fe92
-   uint64_t low;
Mark Wielaard 75fe92
-} quad_word;
Mark Wielaard 75fe92
-
Mark Wielaard 75fe92
-void 
Mark Wielaard 75fe92
-test(quad_word op1_init, uint64_t op2_init, quad_word op3_init)
Mark Wielaard 75fe92
-{
Mark Wielaard 75fe92
-   int cc; // unused
Mark Wielaard 75fe92
-   quad_word op1 = op1_init;
Mark Wielaard 75fe92
-   uint64_t  op2 = op2_init;
Mark Wielaard 75fe92
-   quad_word op3 = op3_init;
Mark Wielaard 75fe92
-
Mark Wielaard 75fe92
-   __asm__ volatile (
Mark Wielaard 75fe92
-                     "lmg     %%r0,%%r1,%1\n\t"
Mark Wielaard 75fe92
-                     "lmg     %%r2,%%r3,%3\n\t"
Mark Wielaard 75fe92
-                     "cds     %%r0,%%r2,%2\n\t"  //  cds 1st,3rd,2nd
Mark Wielaard 75fe92
-                     "stmg    %%r0,%%r1,%1\n"    // store r0,r1 to op1
Mark Wielaard 75fe92
-                     "stmg    %%r2,%%r3,%3\n"    // store r2,r3 to op3
Mark Wielaard 75fe92
-                     : "=d" (cc), "+QS" (op1), "+QS" (op2), "+QS" (op3)
Mark Wielaard 75fe92
-                     :
Mark Wielaard 75fe92
-                     : "r0", "r1", "r2", "r3", "cc");
Mark Wielaard 75fe92
-
Mark Wielaard 75fe92
-}
Mark Wielaard 75fe92
-
Mark Wielaard 75fe92
-// Return a quad-word that only bits low[32:63] are undefined
Mark Wielaard 75fe92
-quad_word
Mark Wielaard 75fe92
-make_undefined(void)
Mark Wielaard 75fe92
-{
Mark Wielaard 75fe92
-   quad_word val;
Mark Wielaard 75fe92
-
Mark Wielaard 75fe92
-   val.high = 0;
Mark Wielaard 75fe92
-   val.low |= 0xFFFFFFFF00000000ull;
Mark Wielaard 75fe92
-
Mark Wielaard 75fe92
-   return val;
Mark Wielaard 75fe92
-}
Mark Wielaard 75fe92
-
Mark Wielaard 75fe92
-void op1_undefined(void)
Mark Wielaard 75fe92
-{
Mark Wielaard 75fe92
-   quad_word op1, op3;
Mark Wielaard 75fe92
-   uint64_t op2;
Mark Wielaard 75fe92
-
Mark Wielaard 75fe92
-   // op1 undefined
Mark Wielaard 75fe92
-   op1 = make_undefined();
Mark Wielaard 75fe92
-   op2 = 42;
Mark Wielaard 75fe92
-   op3.high = op3.low = 0xdeadbeefdeadbabeull;
Mark Wielaard 75fe92
-   test(op1, op2, op3);  // complaint
Mark Wielaard 75fe92
-}
Mark Wielaard 75fe92
-
Mark Wielaard 75fe92
-void op2_undefined(void)
Mark Wielaard 75fe92
-{
Mark Wielaard 75fe92
-   quad_word op1, op3;
Mark Wielaard 75fe92
-   uint64_t op2;
Mark Wielaard 75fe92
-
Mark Wielaard 75fe92
-   op1.high = op1.low = 42;
Mark Wielaard 75fe92
-   // op2 undefined
Mark Wielaard 75fe92
-   op3.high = op3.low = 0xdeadbeefdeadbabeull;
Mark Wielaard 75fe92
-   test(op1, op2, op3);  // complaint
Mark Wielaard 75fe92
-}
Mark Wielaard 75fe92
-
Mark Wielaard 75fe92
-void op3_undefined(void)
Mark Wielaard 75fe92
-{
Mark Wielaard 75fe92
-   quad_word op1, op3;
Mark Wielaard 75fe92
-   uint64_t op2;
Mark Wielaard 75fe92
-
Mark Wielaard 75fe92
-   op1.high = op1.low = 42;
Mark Wielaard 75fe92
-   op2 = 100;
Mark Wielaard 75fe92
-   op3 = make_undefined();
Mark Wielaard 75fe92
-   test(op1, op2, op3);  // no complaint; op3 is just copied around
Mark Wielaard 75fe92
-}
Mark Wielaard 75fe92
-
Mark Wielaard 75fe92
-int main ()
Mark Wielaard 75fe92
-{
Mark Wielaard 75fe92
-   op1_undefined();
Mark Wielaard 75fe92
-   op2_undefined();
Mark Wielaard 75fe92
-   op3_undefined();
Mark Wielaard 75fe92
-
Mark Wielaard 75fe92
-   return 0;
Mark Wielaard 75fe92
-}
Mark Wielaard 75fe92
diff --git a/memcheck/tests/s390x/cds.stderr.exp b/memcheck/tests/s390x/cds.stderr.exp
Mark Wielaard 75fe92
deleted file mode 100644
Mark Wielaard 75fe92
index e72de94c8..000000000
Mark Wielaard 75fe92
--- a/memcheck/tests/s390x/cds.stderr.exp
Mark Wielaard 75fe92
+++ /dev/null
Mark Wielaard 75fe92
@@ -1,10 +0,0 @@
Mark Wielaard 75fe92
-Conditional jump or move depends on uninitialised value(s)
Mark Wielaard 75fe92
-   at 0x........: test (cds.c:17)
Mark Wielaard 75fe92
-   by 0x........: op1_undefined (cds.c:50)
Mark Wielaard 75fe92
-   by 0x........: main (cds.c:77)
Mark Wielaard 75fe92
-
Mark Wielaard 75fe92
-Conditional jump or move depends on uninitialised value(s)
Mark Wielaard 75fe92
-   at 0x........: test (cds.c:17)
Mark Wielaard 75fe92
-   by 0x........: op2_undefined (cds.c:61)
Mark Wielaard 75fe92
-   by 0x........: main (cds.c:78)
Mark Wielaard 75fe92
-
Mark Wielaard 75fe92
diff --git a/memcheck/tests/s390x/cds.stdout.exp b/memcheck/tests/s390x/cds.stdout.exp
Mark Wielaard 75fe92
deleted file mode 100644
Mark Wielaard 75fe92
index e69de29bb..000000000
Mark Wielaard 75fe92
diff --git a/memcheck/tests/s390x/cds.vgtest b/memcheck/tests/s390x/cds.vgtest
Mark Wielaard 75fe92
deleted file mode 100644
Mark Wielaard 75fe92
index 5195887e2..000000000
Mark Wielaard 75fe92
--- a/memcheck/tests/s390x/cds.vgtest
Mark Wielaard 75fe92
+++ /dev/null
Mark Wielaard 75fe92
@@ -1,2 +0,0 @@
Mark Wielaard 75fe92
-prog: cds
Mark Wielaard 75fe92
-vgopts: -q
Mark Wielaard 75fe92
diff --git a/memcheck/tests/s390x/cs.c b/memcheck/tests/s390x/cs.c
Mark Wielaard 75fe92
deleted file mode 100644
Mark Wielaard 75fe92
index 9a298cef9..000000000
Mark Wielaard 75fe92
--- a/memcheck/tests/s390x/cs.c
Mark Wielaard 75fe92
+++ /dev/null
Mark Wielaard 75fe92
@@ -1,32 +0,0 @@
Mark Wielaard 75fe92
-#include <stdint.h>
Mark Wielaard 75fe92
-#include <stdio.h>
Mark Wielaard 75fe92
-#include <string.h>
Mark Wielaard 75fe92
-
Mark Wielaard 75fe92
-void 
Mark Wielaard 75fe92
-test(int32_t op1_init, int32_t op2_init, int32_t op3_init)
Mark Wielaard 75fe92
-{
Mark Wielaard 75fe92
-   register int32_t op1 asm("8") = op1_init;
Mark Wielaard 75fe92
-   register int32_t op3 asm("9") = op3_init;
Mark Wielaard 75fe92
-   
Mark Wielaard 75fe92
-   int32_t op2 = op2_init;
Mark Wielaard 75fe92
-   int cc = 1; 
Mark Wielaard 75fe92
-
Mark Wielaard 75fe92
-   __asm__ volatile (
Mark Wielaard 75fe92
-           "cs      8,9,%1\n\t"
Mark Wielaard 75fe92
-           "ipm     %0\n\t"
Mark Wielaard 75fe92
-           "srl     %0,28\n\t"
Mark Wielaard 75fe92
-           : "=d" (cc), "+Q" (op2), "+d"(op1), "+d"(op3)
Mark Wielaard 75fe92
-           : 
Mark Wielaard 75fe92
-           : "cc");
Mark Wielaard 75fe92
-}
Mark Wielaard 75fe92
-
Mark Wielaard 75fe92
-int main ()
Mark Wielaard 75fe92
-{
Mark Wielaard 75fe92
-   int op1, op2, op3;
Mark Wielaard 75fe92
-
Mark Wielaard 75fe92
-   test(op1, 0x10000000, 0x12345678);   // complaint
Mark Wielaard 75fe92
-   test(0x10000000, op2, 0x12345678);   // complaint
Mark Wielaard 75fe92
-   test(0x10000000, 0x01000000, op3);   // no complaint
Mark Wielaard 75fe92
-
Mark Wielaard 75fe92
-   return 0;
Mark Wielaard 75fe92
-}
Mark Wielaard 75fe92
diff --git a/memcheck/tests/s390x/cs.stderr.exp b/memcheck/tests/s390x/cs.stderr.exp
Mark Wielaard 75fe92
deleted file mode 100644
Mark Wielaard 75fe92
index e45dc99cd..000000000
Mark Wielaard 75fe92
--- a/memcheck/tests/s390x/cs.stderr.exp
Mark Wielaard 75fe92
+++ /dev/null
Mark Wielaard 75fe92
@@ -1,8 +0,0 @@
Mark Wielaard 75fe92
-Conditional jump or move depends on uninitialised value(s)
Mark Wielaard 75fe92
-   at 0x........: test (cs.c:14)
Mark Wielaard 75fe92
-   by 0x........: main (cs.c:27)
Mark Wielaard 75fe92
-
Mark Wielaard 75fe92
-Conditional jump or move depends on uninitialised value(s)
Mark Wielaard 75fe92
-   at 0x........: test (cs.c:14)
Mark Wielaard 75fe92
-   by 0x........: main (cs.c:28)
Mark Wielaard 75fe92
-
Mark Wielaard 75fe92
diff --git a/memcheck/tests/s390x/cs.stdout.exp b/memcheck/tests/s390x/cs.stdout.exp
Mark Wielaard 75fe92
deleted file mode 100644
Mark Wielaard 75fe92
index e69de29bb..000000000
Mark Wielaard 75fe92
diff --git a/memcheck/tests/s390x/cs.vgtest b/memcheck/tests/s390x/cs.vgtest
Mark Wielaard 75fe92
deleted file mode 100644
Mark Wielaard 75fe92
index 323cce80c..000000000
Mark Wielaard 75fe92
--- a/memcheck/tests/s390x/cs.vgtest
Mark Wielaard 75fe92
+++ /dev/null
Mark Wielaard 75fe92
@@ -1,2 +0,0 @@
Mark Wielaard 75fe92
-prog: cs
Mark Wielaard 75fe92
-vgopts: -q
Mark Wielaard 75fe92
diff --git a/memcheck/tests/s390x/csg.c b/memcheck/tests/s390x/csg.c
Mark Wielaard 75fe92
deleted file mode 100644
Mark Wielaard 75fe92
index 7f9d8c88e..000000000
Mark Wielaard 75fe92
--- a/memcheck/tests/s390x/csg.c
Mark Wielaard 75fe92
+++ /dev/null
Mark Wielaard 75fe92
@@ -1,32 +0,0 @@
Mark Wielaard 75fe92
-#include <stdint.h>
Mark Wielaard 75fe92
-#include <stdio.h>
Mark Wielaard 75fe92
-#include <string.h>
Mark Wielaard 75fe92
-
Mark Wielaard 75fe92
-void 
Mark Wielaard 75fe92
-test(int64_t op1_init, int64_t op2_init, int64_t op3_init)
Mark Wielaard 75fe92
-{
Mark Wielaard 75fe92
-   register int64_t op1 asm("8") = op1_init;
Mark Wielaard 75fe92
-   register int64_t op3 asm("9") = op3_init;
Mark Wielaard 75fe92
-   
Mark Wielaard 75fe92
-   int64_t op2 = op2_init;
Mark Wielaard 75fe92
-   int cc = 1; 
Mark Wielaard 75fe92
-
Mark Wielaard 75fe92
-   __asm__ volatile (
Mark Wielaard 75fe92
-           "csg     8,9,%1\n\t"
Mark Wielaard 75fe92
-           "ipm     %0\n\t"
Mark Wielaard 75fe92
-           "srl     %0,28\n\t"
Mark Wielaard 75fe92
-           : "=d" (cc), "+Q" (op2), "+d"(op1), "+d"(op3)
Mark Wielaard 75fe92
-           : 
Mark Wielaard 75fe92
-           : "cc");
Mark Wielaard 75fe92
-}
Mark Wielaard 75fe92
-
Mark Wielaard 75fe92
-int main ()
Mark Wielaard 75fe92
-{
Mark Wielaard 75fe92
-   int64_t op1, op2, op3;
Mark Wielaard 75fe92
-
Mark Wielaard 75fe92
-   test(op1, 0x1000000000000000ull, 0x1234567887654321ull);  // complaint
Mark Wielaard 75fe92
-   test(0x1000000000000000ull, op2, 0x1234567887654321ull);  // complaint
Mark Wielaard 75fe92
-   test(0x1000000000000000ull, 0x1000000000000000ull, op3);  // no complaint
Mark Wielaard 75fe92
-
Mark Wielaard 75fe92
-   return 0;
Mark Wielaard 75fe92
-}
Mark Wielaard 75fe92
diff --git a/memcheck/tests/s390x/csg.stderr.exp b/memcheck/tests/s390x/csg.stderr.exp
Mark Wielaard 75fe92
deleted file mode 100644
Mark Wielaard 75fe92
index fda2021ce..000000000
Mark Wielaard 75fe92
--- a/memcheck/tests/s390x/csg.stderr.exp
Mark Wielaard 75fe92
+++ /dev/null
Mark Wielaard 75fe92
@@ -1,8 +0,0 @@
Mark Wielaard 75fe92
-Conditional jump or move depends on uninitialised value(s)
Mark Wielaard 75fe92
-   at 0x........: test (csg.c:14)
Mark Wielaard 75fe92
-   by 0x........: main (csg.c:27)
Mark Wielaard 75fe92
-
Mark Wielaard 75fe92
-Conditional jump or move depends on uninitialised value(s)
Mark Wielaard 75fe92
-   at 0x........: test (csg.c:14)
Mark Wielaard 75fe92
-   by 0x........: main (csg.c:28)
Mark Wielaard 75fe92
-
Mark Wielaard 75fe92
diff --git a/memcheck/tests/s390x/csg.stdout.exp b/memcheck/tests/s390x/csg.stdout.exp
Mark Wielaard 75fe92
deleted file mode 100644
Mark Wielaard 75fe92
index e69de29bb..000000000
Mark Wielaard 75fe92
diff --git a/memcheck/tests/s390x/csg.vgtest b/memcheck/tests/s390x/csg.vgtest
Mark Wielaard 75fe92
deleted file mode 100644
Mark Wielaard 75fe92
index 6de75c1d6..000000000
Mark Wielaard 75fe92
--- a/memcheck/tests/s390x/csg.vgtest
Mark Wielaard 75fe92
+++ /dev/null
Mark Wielaard 75fe92
@@ -1,2 +0,0 @@
Mark Wielaard 75fe92
-prog: csg
Mark Wielaard 75fe92
-vgopts: -q
Mark Wielaard 75fe92
Mark Wielaard 75fe92
commit 18ddcc47c951427efd3b790ba2481159b9bd1598
Mark Wielaard 75fe92
Author: Andreas Arnez <arnez@linux.ibm.com>
Mark Wielaard 75fe92
Date:   Wed Apr 7 16:48:29 2021 +0200
Mark Wielaard 75fe92
Mark Wielaard 75fe92
    s390x: Support "expensive" comparisons Iop_ExpCmpNE32/64
Mark Wielaard 75fe92
    
Mark Wielaard 75fe92
    Add support for Iop_ExpCmpNE32 and Iop_ExpCmpNE64 in the s390x instruction
Mark Wielaard 75fe92
    selector.  Handle them exactly like the "inexpensive" variants Iop_CmpNE32
Mark Wielaard 75fe92
    and Iop_CmpNE64.
Mark Wielaard 75fe92
Mark Wielaard 75fe92
diff --git a/VEX/priv/host_s390_isel.c b/VEX/priv/host_s390_isel.c
Mark Wielaard 75fe92
index 2000ec224..5f79280c0 100644
Mark Wielaard 75fe92
--- a/VEX/priv/host_s390_isel.c
Mark Wielaard 75fe92
+++ b/VEX/priv/host_s390_isel.c
Mark Wielaard 75fe92
@@ -3611,6 +3611,8 @@ s390_isel_cc(ISelEnv *env, IRExpr *cond)
Mark Wielaard 75fe92
 
Mark Wielaard 75fe92
       case Iop_CmpNE32:
Mark Wielaard 75fe92
       case Iop_CmpNE64:
Mark Wielaard 75fe92
+      case Iop_ExpCmpNE32:
Mark Wielaard 75fe92
+      case Iop_ExpCmpNE64:
Mark Wielaard 75fe92
       case Iop_CasCmpNE32:
Mark Wielaard 75fe92
       case Iop_CasCmpNE64:
Mark Wielaard 75fe92
          result = S390_CC_NE;
Mark Wielaard 75fe92
Mark Wielaard 75fe92
commit 5db3f929c43bf46f4707178706cfe90f43acdd19
Mark Wielaard 75fe92
Author: Andreas Arnez <arnez@linux.ibm.com>
Mark Wielaard 75fe92
Date:   Wed Apr 7 12:30:20 2021 +0200
Mark Wielaard 75fe92
Mark Wielaard 75fe92
    s390x: Add convenience function mkV128()
Mark Wielaard 75fe92
    
Mark Wielaard 75fe92
    Provide mkV128() as a short-hand notation for creating a vector constant from
Mark Wielaard 75fe92
    a bit pattern, similar to other such functions like mkU64().
Mark Wielaard 75fe92
Mark Wielaard 75fe92
diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c
Mark Wielaard 75fe92
index 339377007..7d54cb551 100644
Mark Wielaard 75fe92
--- a/VEX/priv/guest_s390_toIR.c
Mark Wielaard 75fe92
+++ b/VEX/priv/guest_s390_toIR.c
Mark Wielaard 75fe92
@@ -376,6 +376,13 @@ mkU64(ULong value)
Mark Wielaard 75fe92
    return IRExpr_Const(IRConst_U64(value));
Mark Wielaard 75fe92
 }
Mark Wielaard 75fe92
 
Mark Wielaard 75fe92
+/* Create an expression node for a 128-bit vector constant */
Mark Wielaard 75fe92
+static __inline__ IRExpr *
Mark Wielaard 75fe92
+mkV128(UShort value)
Mark Wielaard 75fe92
+{
Mark Wielaard 75fe92
+   return IRExpr_Const(IRConst_V128(value));
Mark Wielaard 75fe92
+}
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
 /* Create an expression node for a 32-bit floating point constant
Mark Wielaard 75fe92
    whose value is given by a bit pattern. */
Mark Wielaard 75fe92
 static __inline__ IRExpr *
Mark Wielaard 75fe92
@@ -16249,7 +16256,7 @@ s390_irgen_VLGV(UChar r1, IRTemp op2addr, UChar v3, UChar m4)
Mark Wielaard 75fe92
 static const HChar *
Mark Wielaard 75fe92
 s390_irgen_VGBM(UChar v1, UShort i2, UChar m3 __attribute__((unused)))
Mark Wielaard 75fe92
 {
Mark Wielaard 75fe92
-   put_vr_qw(v1, IRExpr_Const(IRConst_V128(i2)));
Mark Wielaard 75fe92
+   put_vr_qw(v1, mkV128(i2));
Mark Wielaard 75fe92
 
Mark Wielaard 75fe92
    return "vgbm";
Mark Wielaard 75fe92
 }
Mark Wielaard 75fe92
@@ -18160,11 +18167,11 @@ s390_irgen_VSUM(UChar v1, UChar v2, UChar v3, UChar m4)
Mark Wielaard 75fe92
    switch(type) {
Mark Wielaard 75fe92
    case Ity_I8:
Mark Wielaard 75fe92
       sum = unop(Iop_PwAddL16Ux8, unop(Iop_PwAddL8Ux16, get_vr_qw(v2)));
Mark Wielaard 75fe92
-      mask = IRExpr_Const(IRConst_V128(0b0001000100010001));
Mark Wielaard 75fe92
+      mask = mkV128(0b0001000100010001);
Mark Wielaard 75fe92
       break;
Mark Wielaard 75fe92
    case Ity_I16:
Mark Wielaard 75fe92
       sum = unop(Iop_PwAddL16Ux8, get_vr_qw(v2));
Mark Wielaard 75fe92
-      mask = IRExpr_Const(IRConst_V128(0b0011001100110011));
Mark Wielaard 75fe92
+      mask = mkV128(0b0011001100110011);
Mark Wielaard 75fe92
       break;
Mark Wielaard 75fe92
    default:
Mark Wielaard 75fe92
       vpanic("s390_irgen_VSUM: invalid type ");
Mark Wielaard 75fe92
@@ -18185,11 +18192,11 @@ s390_irgen_VSUMG(UChar v1, UChar v2, UChar v3, UChar m4)
Mark Wielaard 75fe92
    switch(type) {
Mark Wielaard 75fe92
    case Ity_I16:
Mark Wielaard 75fe92
       sum = unop(Iop_PwAddL32Ux4, unop(Iop_PwAddL16Ux8, get_vr_qw(v2)));
Mark Wielaard 75fe92
-      mask = IRExpr_Const(IRConst_V128(0b0000001100000011));
Mark Wielaard 75fe92
+      mask = mkV128(0b0000001100000011);
Mark Wielaard 75fe92
       break;
Mark Wielaard 75fe92
    case Ity_I32:
Mark Wielaard 75fe92
       sum = unop(Iop_PwAddL32Ux4, get_vr_qw(v2));
Mark Wielaard 75fe92
-      mask = IRExpr_Const(IRConst_V128(0b0000111100001111));
Mark Wielaard 75fe92
+      mask = mkV128(0b0000111100001111);
Mark Wielaard 75fe92
       break;
Mark Wielaard 75fe92
    default:
Mark Wielaard 75fe92
       vpanic("s390_irgen_VSUMG: invalid type ");
Mark Wielaard 75fe92
@@ -18210,11 +18217,11 @@ s390_irgen_VSUMQ(UChar v1, UChar v2, UChar v3, UChar m4)
Mark Wielaard 75fe92
    switch(type) {
Mark Wielaard 75fe92
    case Ity_I32:
Mark Wielaard 75fe92
       sum = unop(Iop_PwAddL64Ux2, unop(Iop_PwAddL32Ux4, get_vr_qw(v2)));
Mark Wielaard 75fe92
-      mask = IRExpr_Const(IRConst_V128(0b0000000000001111));
Mark Wielaard 75fe92
+      mask = mkV128(0b0000000000001111);
Mark Wielaard 75fe92
       break;
Mark Wielaard 75fe92
    case Ity_I64:
Mark Wielaard 75fe92
       sum = unop(Iop_PwAddL64Ux2, get_vr_qw(v2));
Mark Wielaard 75fe92
-      mask = IRExpr_Const(IRConst_V128(0b0000000011111111));
Mark Wielaard 75fe92
+      mask = mkV128(0b0000000011111111);
Mark Wielaard 75fe92
       break;
Mark Wielaard 75fe92
    default:
Mark Wielaard 75fe92
       vpanic("s390_irgen_VSUMQ: invalid type ");
Mark Wielaard 75fe92
@@ -18943,8 +18950,8 @@ s390_irgen_VFCx(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5, UChar m6,
Mark Wielaard 75fe92
          assign(cond, binop(Iop_CmpEQ32, mkexpr(result), mkU32(cmp)));
Mark Wielaard 75fe92
       }
Mark Wielaard 75fe92
       put_vr_qw(v1, mkite(mkexpr(cond),
Mark Wielaard 75fe92
-                          IRExpr_Const(IRConst_V128(0xffff)),
Mark Wielaard 75fe92
-                          IRExpr_Const(IRConst_V128(0))));
Mark Wielaard 75fe92
+                          mkV128(0xffff),
Mark Wielaard 75fe92
+                          mkV128(0)));
Mark Wielaard 75fe92
       if (s390_vr_is_cs_set(m6)) {
Mark Wielaard 75fe92
          IRTemp cc = newTemp(Ity_I64);
Mark Wielaard 75fe92
          assign(cc, mkite(mkexpr(cond), mkU64(0), mkU64(3)));
Mark Wielaard 75fe92
Mark Wielaard 75fe92
commit e78bd78d3043729033b426218ab8c6dae9c51e96
Mark Wielaard 75fe92
Author: Andreas Arnez <arnez@linux.ibm.com>
Mark Wielaard 75fe92
Date:   Thu Mar 18 18:01:10 2021 +0100
Mark Wielaard 75fe92
Mark Wielaard 75fe92
    Bug 434296 - s390x: Rework IR conversion of VSTRC, VFAE, and VFEE
Mark Wielaard 75fe92
    
Mark Wielaard 75fe92
    The z/Architecture instructions "vector string range compare" (VSTRC),
Mark Wielaard 75fe92
    "vector find any element equal" (VFAE), and "vector find element
Mark Wielaard 75fe92
    equal" (VFEE) are each implemented with a dirty helper that executes the
Mark Wielaard 75fe92
    instruction.  Unfortunately this approach leads to memcheck false
Mark Wielaard 75fe92
    positives, because these instructions may yield a defined result even if
Mark Wielaard 75fe92
    parts of the input vectors are undefined.  There are multiple ways this
Mark Wielaard 75fe92
    can happen: Wherever the flags in the fourth operand to VSTRC indicate
Mark Wielaard 75fe92
    "match always" or "match never", the corresponding elements in the third
Mark Wielaard 75fe92
    operand don't affect the result.  The same is true for the elements
Mark Wielaard 75fe92
    following the first zero-element in the second operand if the ZS flag is
Mark Wielaard 75fe92
    set, or for the elements following the first matching element, if any.
Mark Wielaard 75fe92
    
Mark Wielaard 75fe92
    Re-implement the instructions without dirty helpers and transform into
Mark Wielaard 75fe92
    lengthy IR instead.
Mark Wielaard 75fe92
Mark Wielaard 75fe92
diff --git a/VEX/priv/guest_s390_defs.h b/VEX/priv/guest_s390_defs.h
Mark Wielaard 75fe92
index 905429015..49b6cd5dd 100644
Mark Wielaard 75fe92
--- a/VEX/priv/guest_s390_defs.h
Mark Wielaard 75fe92
+++ b/VEX/priv/guest_s390_defs.h
Mark Wielaard 75fe92
@@ -265,11 +265,8 @@ typedef enum {
Mark Wielaard 75fe92
    S390_VEC_OP_INVALID = 0,
Mark Wielaard 75fe92
    S390_VEC_OP_VPKS,
Mark Wielaard 75fe92
    S390_VEC_OP_VPKLS,
Mark Wielaard 75fe92
-   S390_VEC_OP_VFAE,
Mark Wielaard 75fe92
-   S390_VEC_OP_VFEE,
Mark Wielaard 75fe92
    S390_VEC_OP_VFENE,
Mark Wielaard 75fe92
    S390_VEC_OP_VISTR,
Mark Wielaard 75fe92
-   S390_VEC_OP_VSTRC,
Mark Wielaard 75fe92
    S390_VEC_OP_VCEQ,
Mark Wielaard 75fe92
    S390_VEC_OP_VTM,
Mark Wielaard 75fe92
    S390_VEC_OP_VGFM,
Mark Wielaard 75fe92
diff --git a/VEX/priv/guest_s390_helpers.c b/VEX/priv/guest_s390_helpers.c
Mark Wielaard 75fe92
index b71b621ae..63d2e8ce5 100644
Mark Wielaard 75fe92
--- a/VEX/priv/guest_s390_helpers.c
Mark Wielaard 75fe92
+++ b/VEX/priv/guest_s390_helpers.c
Mark Wielaard 75fe92
@@ -2538,11 +2538,8 @@ s390x_dirtyhelper_vec_op(VexGuestS390XState *guest_state,
Mark Wielaard 75fe92
       {0x00, 0x00}, /* invalid */
Mark Wielaard 75fe92
       [S390_VEC_OP_VPKS]  = {0xe7, 0x97},
Mark Wielaard 75fe92
       [S390_VEC_OP_VPKLS] = {0xe7, 0x95},
Mark Wielaard 75fe92
-      [S390_VEC_OP_VFAE]  = {0xe7, 0x82},
Mark Wielaard 75fe92
-      [S390_VEC_OP_VFEE]  = {0xe7, 0x80},
Mark Wielaard 75fe92
       [S390_VEC_OP_VFENE] = {0xe7, 0x81},
Mark Wielaard 75fe92
       [S390_VEC_OP_VISTR] = {0xe7, 0x5c},
Mark Wielaard 75fe92
-      [S390_VEC_OP_VSTRC] = {0xe7, 0x8a},
Mark Wielaard 75fe92
       [S390_VEC_OP_VCEQ]  = {0xe7, 0xf8},
Mark Wielaard 75fe92
       [S390_VEC_OP_VTM]   = {0xe7, 0xd8},
Mark Wielaard 75fe92
       [S390_VEC_OP_VGFM]  = {0xe7, 0xb4},
Mark Wielaard 75fe92
@@ -2630,8 +2627,6 @@ s390x_dirtyhelper_vec_op(VexGuestS390XState *guest_state,
Mark Wielaard 75fe92
 
Mark Wielaard 75fe92
    case S390_VEC_OP_VPKS:
Mark Wielaard 75fe92
    case S390_VEC_OP_VPKLS:
Mark Wielaard 75fe92
-   case S390_VEC_OP_VFAE:
Mark Wielaard 75fe92
-   case S390_VEC_OP_VFEE:
Mark Wielaard 75fe92
    case S390_VEC_OP_VFENE:
Mark Wielaard 75fe92
    case S390_VEC_OP_VCEQ:
Mark Wielaard 75fe92
    case S390_VEC_OP_VGFM:
Mark Wielaard 75fe92
@@ -2645,7 +2640,6 @@ s390x_dirtyhelper_vec_op(VexGuestS390XState *guest_state,
Mark Wielaard 75fe92
       the_insn.VRR.m5 = d->m5;
Mark Wielaard 75fe92
       break;
Mark Wielaard 75fe92
 
Mark Wielaard 75fe92
-   case S390_VEC_OP_VSTRC:
Mark Wielaard 75fe92
    case S390_VEC_OP_VGFMA:
Mark Wielaard 75fe92
    case S390_VEC_OP_VMAH:
Mark Wielaard 75fe92
    case S390_VEC_OP_VMALH:
Mark Wielaard 75fe92
diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c
Mark Wielaard 75fe92
index 7d54cb551..26a947813 100644
Mark Wielaard 75fe92
--- a/VEX/priv/guest_s390_toIR.c
Mark Wielaard 75fe92
+++ b/VEX/priv/guest_s390_toIR.c
Mark Wielaard 75fe92
@@ -17156,90 +17156,205 @@ s390_irgen_PPNO(UChar r1, UChar r2)
Mark Wielaard 75fe92
    return "ppno";
Mark Wielaard 75fe92
 }
Mark Wielaard 75fe92
 
Mark Wielaard 75fe92
-static const HChar *
Mark Wielaard 75fe92
-s390_irgen_VFAE(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5)
Mark Wielaard 75fe92
-{
Mark Wielaard 75fe92
-   IRDirty* d;
Mark Wielaard 75fe92
-   IRTemp cc = newTemp(Ity_I64);
Mark Wielaard 75fe92
+enum s390_VStrX {
Mark Wielaard 75fe92
+   s390_VStrX_VSTRC,
Mark Wielaard 75fe92
+   s390_VStrX_VFAE,
Mark Wielaard 75fe92
+   s390_VStrX_VFEE
Mark Wielaard 75fe92
+};
Mark Wielaard 75fe92
 
Mark Wielaard 75fe92
-   /* Check for specification exception */
Mark Wielaard 75fe92
-   vassert(m4 < 3);
Mark Wielaard 75fe92
+#define S390_VEC_OP3(m, op0, op1, op2)                                  \
Mark Wielaard 75fe92
+   (m) == 0 ? op0 : (m) == 1 ? op1 : (m) == 2 ? op2 : Iop_INVALID;
Mark Wielaard 75fe92
 
Mark Wielaard 75fe92
-   s390x_vec_op_details_t details = { .serialized = 0ULL };
Mark Wielaard 75fe92
-   details.op = S390_VEC_OP_VFAE;
Mark Wielaard 75fe92
-   details.v1 = v1;
Mark Wielaard 75fe92
-   details.v2 = v2;
Mark Wielaard 75fe92
-   details.v3 = v3;
Mark Wielaard 75fe92
-   details.m4 = m4;
Mark Wielaard 75fe92
-   details.m5 = m5;
Mark Wielaard 75fe92
-
Mark Wielaard 75fe92
-   d = unsafeIRDirty_1_N(cc, 0, "s390x_dirtyhelper_vec_op",
Mark Wielaard 75fe92
-                         &s390x_dirtyhelper_vec_op,
Mark Wielaard 75fe92
-                         mkIRExprVec_2(IRExpr_GSPTR(),
Mark Wielaard 75fe92
-                                       mkU64(details.serialized)));
Mark Wielaard 75fe92
+/* Helper function for transforming VSTRC, VFAE, or VFEE.  These instructions
Mark Wielaard 75fe92
+   share much of the same logic. */
Mark Wielaard 75fe92
+static void
Mark Wielaard 75fe92
+s390_irgen_VStrX(UChar v1, UChar v2, UChar v3, UChar v4, UChar m5,
Mark Wielaard 75fe92
+                 UChar m6, enum s390_VStrX which_insn)
Mark Wielaard 75fe92
+{
Mark Wielaard 75fe92
+   IRTemp op2 = newTemp(Ity_V128);
Mark Wielaard 75fe92
+   IRTemp op3 = newTemp(Ity_V128);
Mark Wielaard 75fe92
+   IRExpr* tmp;
Mark Wielaard 75fe92
+   IRExpr* match = NULL;
Mark Wielaard 75fe92
+   UChar bitwidth = 8 << m5;
Mark Wielaard 75fe92
+   UChar n_elem = 16 >> m5;
Mark Wielaard 75fe92
+   IROp sub_op = S390_VEC_OP3(m5, Iop_Sub8x16, Iop_Sub16x8, Iop_Sub32x4);
Mark Wielaard 75fe92
+   IROp sar_op = S390_VEC_OP3(m5, Iop_SarN8x16, Iop_SarN16x8, Iop_SarN32x4);
Mark Wielaard 75fe92
+   IROp shl_op = S390_VEC_OP3(m5, Iop_ShlN8x16, Iop_ShlN16x8, Iop_ShlN32x4);
Mark Wielaard 75fe92
+   IROp dup_op = S390_VEC_OP3(m5, Iop_Dup8x16, Iop_Dup16x8, Iop_Dup32x4);
Mark Wielaard 75fe92
+   IROp cmpeq_op = S390_VEC_OP3(m5, Iop_CmpEQ8x16,
Mark Wielaard 75fe92
+                                    Iop_CmpEQ16x8, Iop_CmpEQ32x4);
Mark Wielaard 75fe92
+   IROp cmpgt_op = S390_VEC_OP3(m5, Iop_CmpGT8Ux16,
Mark Wielaard 75fe92
+                                    Iop_CmpGT16Ux8, Iop_CmpGT32Ux4);
Mark Wielaard 75fe92
+   IROp getelem_op = S390_VEC_OP3(m5, Iop_GetElem8x16,
Mark Wielaard 75fe92
+                                      Iop_GetElem16x8, Iop_GetElem32x4);
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
+   assign(op2, get_vr_qw(v2));
Mark Wielaard 75fe92
+   assign(op3, get_vr_qw(v3));
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
+   switch (which_insn) {
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
+   case s390_VStrX_VSTRC: {
Mark Wielaard 75fe92
+      IRTemp op4 = newTemp(Ity_V128);
Mark Wielaard 75fe92
+      assign(op4, get_vr_qw(v4));
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
+      /* Mask off insignificant range boundaries from op3, i.e., all those for
Mark Wielaard 75fe92
+         which the corresponding field in op4 has all or no bits set ("match
Mark Wielaard 75fe92
+         always" / "match never"). */
Mark Wielaard 75fe92
+      IRTemp bounds = newTemp(Ity_V128);
Mark Wielaard 75fe92
+      tmp = unop(Iop_NotV128,
Mark Wielaard 75fe92
+                 binop(cmpeq_op, mkV128(0),
Mark Wielaard 75fe92
+                       binop(sar_op,
Mark Wielaard 75fe92
+                             binop(sub_op,
Mark Wielaard 75fe92
+                                   binop(sar_op, mkexpr(op4),
Mark Wielaard 75fe92
+                                         mkU8(bitwidth - 3)),
Mark Wielaard 75fe92
+                                   mkV128(-1)),
Mark Wielaard 75fe92
+                             mkU8(1))));
Mark Wielaard 75fe92
+      assign(bounds, binop(Iop_AndV128, mkexpr(op3), tmp));
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
+      IRTemp flags_eq = newTemp(Ity_V128);
Mark Wielaard 75fe92
+      IRTemp flags_lt = newTemp(Ity_V128);
Mark Wielaard 75fe92
+      IRTemp flags_gt = newTemp(Ity_V128);
Mark Wielaard 75fe92
+      assign(flags_eq, binop(sar_op, mkexpr(op4), mkU8(bitwidth - 1)));
Mark Wielaard 75fe92
+      assign(flags_lt, binop(sar_op, binop(shl_op, mkexpr(op4), mkU8(1)),
Mark Wielaard 75fe92
+                             mkU8(bitwidth - 1)));
Mark Wielaard 75fe92
+      assign(flags_gt, binop(sar_op, binop(shl_op, mkexpr(op4), mkU8(2)),
Mark Wielaard 75fe92
+                             mkU8(bitwidth - 1)));
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
+      for (UChar idx = 0; idx < n_elem; idx += 2) {
Mark Wielaard 75fe92
+         /* Match according to the even/odd pairs in op3 and op4 at idx */
Mark Wielaard 75fe92
+         IRTemp part[2];
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
+         for (UChar j = 0; j < 2; j++) {
Mark Wielaard 75fe92
+            IRTemp a = newTemp(Ity_V128);
Mark Wielaard 75fe92
+            assign(a, unop(dup_op,
Mark Wielaard 75fe92
+                           binop(getelem_op, mkexpr(bounds), mkU8(idx + j))));
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
+            IRExpr* m[] = {
Mark Wielaard 75fe92
+               binop(cmpeq_op, mkexpr(op2), mkexpr(a)),
Mark Wielaard 75fe92
+               binop(cmpgt_op, mkexpr(a), mkexpr(op2)),
Mark Wielaard 75fe92
+               binop(cmpgt_op, mkexpr(op2), mkexpr(a))
Mark Wielaard 75fe92
+            };
Mark Wielaard 75fe92
+            IRExpr* f[] = {
Mark Wielaard 75fe92
+               unop(dup_op, binop(getelem_op, mkexpr(flags_eq), mkU8(idx + j))),
Mark Wielaard 75fe92
+               unop(dup_op, binop(getelem_op, mkexpr(flags_lt), mkU8(idx + j))),
Mark Wielaard 75fe92
+               unop(dup_op, binop(getelem_op, mkexpr(flags_gt), mkU8(idx + j)))
Mark Wielaard 75fe92
+            };
Mark Wielaard 75fe92
+            part[j] = newTemp(Ity_V128);
Mark Wielaard 75fe92
+            assign(part[j], binop(Iop_OrV128,
Mark Wielaard 75fe92
+                                  binop(Iop_OrV128,
Mark Wielaard 75fe92
+                                        binop(Iop_AndV128, f[0], m[0]),
Mark Wielaard 75fe92
+                                        binop(Iop_AndV128, f[1], m[1])),
Mark Wielaard 75fe92
+                                  binop(Iop_AndV128, f[2], m[2])));
Mark Wielaard 75fe92
+         }
Mark Wielaard 75fe92
+         tmp = binop(Iop_AndV128, mkexpr(part[0]), mkexpr(part[1]));
Mark Wielaard 75fe92
+         match = idx == 0 ? tmp : binop(Iop_OrV128, match, tmp);
Mark Wielaard 75fe92
+      }
Mark Wielaard 75fe92
+      break;
Mark Wielaard 75fe92
+   }
Mark Wielaard 75fe92
 
Mark Wielaard 75fe92
-   d->nFxState = 3;
Mark Wielaard 75fe92
-   vex_bzero(&d->fxState, sizeof(d->fxState));
Mark Wielaard 75fe92
-   d->fxState[0].fx     = Ifx_Read;
Mark Wielaard 75fe92
-   d->fxState[0].offset = S390X_GUEST_OFFSET(guest_v0) + v2 * sizeof(V128);
Mark Wielaard 75fe92
-   d->fxState[0].size   = sizeof(V128);
Mark Wielaard 75fe92
-   d->fxState[1].fx     = Ifx_Read;
Mark Wielaard 75fe92
-   d->fxState[1].offset = S390X_GUEST_OFFSET(guest_v0) + v3 * sizeof(V128);
Mark Wielaard 75fe92
-   d->fxState[1].size   = sizeof(V128);
Mark Wielaard 75fe92
-   d->fxState[2].fx     = Ifx_Write;
Mark Wielaard 75fe92
-   d->fxState[2].offset = S390X_GUEST_OFFSET(guest_v0) + v1 * sizeof(V128);
Mark Wielaard 75fe92
-   d->fxState[2].size   = sizeof(V128);
Mark Wielaard 75fe92
+   case s390_VStrX_VFAE:
Mark Wielaard 75fe92
+      for (UChar idx = 0; idx < n_elem; idx++) {
Mark Wielaard 75fe92
+         IRTemp a = newTemp(Ity_V128);
Mark Wielaard 75fe92
+         assign(a, binop(cmpeq_op, mkexpr(op2),
Mark Wielaard 75fe92
+                         unop(dup_op,
Mark Wielaard 75fe92
+                              binop(getelem_op, mkexpr(op3), mkU8(idx)))));
Mark Wielaard 75fe92
+         match = idx == 0 ? mkexpr(a) : binop(Iop_OrV128, match, mkexpr(a));
Mark Wielaard 75fe92
+      }
Mark Wielaard 75fe92
+      break;
Mark Wielaard 75fe92
 
Mark Wielaard 75fe92
-   stmt(IRStmt_Dirty(d));
Mark Wielaard 75fe92
+   case s390_VStrX_VFEE:
Mark Wielaard 75fe92
+      match = binop(cmpeq_op, mkexpr(op2), mkexpr(op3));
Mark Wielaard 75fe92
+      break;
Mark Wielaard 75fe92
 
Mark Wielaard 75fe92
-   if (s390_vr_is_cs_set(m5)) {
Mark Wielaard 75fe92
-      s390_cc_set(cc);
Mark Wielaard 75fe92
+   default:
Mark Wielaard 75fe92
+      vpanic("s390_irgen_VStrX: unknown insn");
Mark Wielaard 75fe92
    }
Mark Wielaard 75fe92
 
Mark Wielaard 75fe92
-   return "vfae";
Mark Wielaard 75fe92
-}
Mark Wielaard 75fe92
+   /* Invert first intermediate result if requested */
Mark Wielaard 75fe92
+   if (m6 & 8)
Mark Wielaard 75fe92
+      match = unop(Iop_NotV128, match);
Mark Wielaard 75fe92
 
Mark Wielaard 75fe92
-static const HChar *
Mark Wielaard 75fe92
-s390_irgen_VFEE(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5)
Mark Wielaard 75fe92
-{
Mark Wielaard 75fe92
-   IRDirty* d;
Mark Wielaard 75fe92
-   IRTemp cc = newTemp(Ity_I64);
Mark Wielaard 75fe92
+   IRTemp inter1 = newTemp(Ity_V128);
Mark Wielaard 75fe92
+   IRTemp inter2 = newTemp(Ity_V128);
Mark Wielaard 75fe92
+   IRTemp accu = newTemp(Ity_V128);
Mark Wielaard 75fe92
+   assign(inter1, match);
Mark Wielaard 75fe92
 
Mark Wielaard 75fe92
-   /* Check for specification exception */
Mark Wielaard 75fe92
-   vassert(m4 < 3);
Mark Wielaard 75fe92
-   vassert((m5 & 0b1100) == 0);
Mark Wielaard 75fe92
+   /* Determine second intermediate and accumulated result */
Mark Wielaard 75fe92
+   if (s390_vr_is_zs_set(m6)) {
Mark Wielaard 75fe92
+      assign(inter2, binop(cmpeq_op, mkexpr(op2), mkV128(0)));
Mark Wielaard 75fe92
+      assign(accu, binop(Iop_OrV128, mkexpr(inter1), mkexpr(inter2)));
Mark Wielaard 75fe92
+   } else {
Mark Wielaard 75fe92
+      assign(inter2, mkV128(0));
Mark Wielaard 75fe92
+      assign(accu, mkexpr(inter1));
Mark Wielaard 75fe92
+   }
Mark Wielaard 75fe92
 
Mark Wielaard 75fe92
-   s390x_vec_op_details_t details = { .serialized = 0ULL };
Mark Wielaard 75fe92
-   details.op = S390_VEC_OP_VFEE;
Mark Wielaard 75fe92
-   details.v1 = v1;
Mark Wielaard 75fe92
-   details.v2 = v2;
Mark Wielaard 75fe92
-   details.v3 = v3;
Mark Wielaard 75fe92
-   details.m4 = m4;
Mark Wielaard 75fe92
-   details.m5 = m5;
Mark Wielaard 75fe92
+   IRTemp accu0 = newTemp(Ity_I64);
Mark Wielaard 75fe92
+   IRTemp is_match0 = newTemp(Ity_I1);
Mark Wielaard 75fe92
+   IRTemp mismatch_bits = newTemp(Ity_I64);
Mark Wielaard 75fe92
 
Mark Wielaard 75fe92
-   d = unsafeIRDirty_1_N(cc, 0, "s390x_dirtyhelper_vec_op",
Mark Wielaard 75fe92
-                         &s390x_dirtyhelper_vec_op,
Mark Wielaard 75fe92
-                         mkIRExprVec_2(IRExpr_GSPTR(),
Mark Wielaard 75fe92
-                                       mkU64(details.serialized)));
Mark Wielaard 75fe92
+   assign(accu0, unop(Iop_V128HIto64, mkexpr(accu)));
Mark Wielaard 75fe92
+   assign(is_match0, binop(Iop_ExpCmpNE64, mkexpr(accu0), mkU64(0)));
Mark Wielaard 75fe92
+   assign(mismatch_bits, unop(Iop_ClzNat64,
Mark Wielaard 75fe92
+                              mkite(mkexpr(is_match0), mkexpr(accu0),
Mark Wielaard 75fe92
+                                    unop(Iop_V128to64, mkexpr(accu)))));
Mark Wielaard 75fe92
 
Mark Wielaard 75fe92
-   d->nFxState = 3;
Mark Wielaard 75fe92
-   vex_bzero(&d->fxState, sizeof(d->fxState));
Mark Wielaard 75fe92
-   d->fxState[0].fx     = Ifx_Read;
Mark Wielaard 75fe92
-   d->fxState[0].offset = S390X_GUEST_OFFSET(guest_v0) + v2 * sizeof(V128);
Mark Wielaard 75fe92
-   d->fxState[0].size   = sizeof(V128);
Mark Wielaard 75fe92
-   d->fxState[1].fx     = Ifx_Read;
Mark Wielaard 75fe92
-   d->fxState[1].offset = S390X_GUEST_OFFSET(guest_v0) + v3 * sizeof(V128);
Mark Wielaard 75fe92
-   d->fxState[1].size   = sizeof(V128);
Mark Wielaard 75fe92
-   d->fxState[2].fx     = Ifx_Write;
Mark Wielaard 75fe92
-   d->fxState[2].offset = S390X_GUEST_OFFSET(guest_v0) + v1 * sizeof(V128);
Mark Wielaard 75fe92
-   d->fxState[2].size   = sizeof(V128);
Mark Wielaard 75fe92
+   if (m6 & 4) {
Mark Wielaard 75fe92
+      put_vr_qw(v1, mkexpr(inter1));
Mark Wielaard 75fe92
+   } else {
Mark Wielaard 75fe92
+      /* Determine byte position of first match */
Mark Wielaard 75fe92
+      tmp = binop(Iop_Add64,
Mark Wielaard 75fe92
+                  binop(Iop_Shr64, mkexpr(mismatch_bits), mkU8(3)),
Mark Wielaard 75fe92
+                  mkite(mkexpr(is_match0), mkU64(0), mkU64(8)));
Mark Wielaard 75fe92
+      put_vr_qw(v1, binop(Iop_64HLtoV128, tmp, mkU64(0)));
Mark Wielaard 75fe92
+   }
Mark Wielaard 75fe92
 
Mark Wielaard 75fe92
-   stmt(IRStmt_Dirty(d));
Mark Wielaard 75fe92
+   if (s390_vr_is_cs_set(m6)) {
Mark Wielaard 75fe92
+      /* Set condition code depending on...
Mark Wielaard 75fe92
+                   zero found
Mark Wielaard 75fe92
+                      n  y
Mark Wielaard 75fe92
+                    +------
Mark Wielaard 75fe92
+         match    n | 3  0
Mark Wielaard 75fe92
+          found   y | 1  2   */
Mark Wielaard 75fe92
 
Mark Wielaard 75fe92
-   if (s390_vr_is_cs_set(m5)) {
Mark Wielaard 75fe92
+      IRTemp cc = newTemp(Ity_I64);
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
+      tmp = binop(Iop_Shr64,
Mark Wielaard 75fe92
+                  mkite(mkexpr(is_match0),
Mark Wielaard 75fe92
+                        unop(Iop_V128HIto64, mkexpr(inter1)),
Mark Wielaard 75fe92
+                        unop(Iop_V128to64, mkexpr(inter1))),
Mark Wielaard 75fe92
+                  unop(Iop_64to8,
Mark Wielaard 75fe92
+                       binop(Iop_Sub64, mkU64(63), mkexpr(mismatch_bits))));
Mark Wielaard 75fe92
+      tmp = binop(Iop_Shl64, tmp, mkU8(1));
Mark Wielaard 75fe92
+      if (s390_vr_is_zs_set(m6)) {
Mark Wielaard 75fe92
+         tmp = binop(Iop_Xor64, tmp,
Mark Wielaard 75fe92
+                     mkite(binop(Iop_ExpCmpNE64, mkU64(0),
Mark Wielaard 75fe92
+                                 binop(Iop_Or64,
Mark Wielaard 75fe92
+                                       unop(Iop_V128HIto64, mkexpr(inter2)),
Mark Wielaard 75fe92
+                                       unop(Iop_V128to64, mkexpr(inter2)))),
Mark Wielaard 75fe92
+                           mkU64(0),
Mark Wielaard 75fe92
+                           mkU64(3)));
Mark Wielaard 75fe92
+      } else {
Mark Wielaard 75fe92
+         tmp = binop(Iop_Xor64, tmp, mkU64(3));
Mark Wielaard 75fe92
+      }
Mark Wielaard 75fe92
+      assign(cc, tmp);
Mark Wielaard 75fe92
       s390_cc_set(cc);
Mark Wielaard 75fe92
    }
Mark Wielaard 75fe92
+   dis_res->hint = Dis_HintVerbose;
Mark Wielaard 75fe92
+}
Mark Wielaard 75fe92
 
Mark Wielaard 75fe92
+static const HChar *
Mark Wielaard 75fe92
+s390_irgen_VFAE(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5)
Mark Wielaard 75fe92
+{
Mark Wielaard 75fe92
+   s390_insn_assert("vfae", m4 <= 2);
Mark Wielaard 75fe92
+   s390_irgen_VStrX(v1, v2, v3, 255, m4, m5, s390_VStrX_VFAE);
Mark Wielaard 75fe92
+   return "vfae";
Mark Wielaard 75fe92
+}
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
+static const HChar *
Mark Wielaard 75fe92
+s390_irgen_VFEE(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5)
Mark Wielaard 75fe92
+{
Mark Wielaard 75fe92
+   s390_insn_assert("vfee", m4 < 3 && m5 == (m5 & 3));
Mark Wielaard 75fe92
+   s390_irgen_VStrX(v1, v2, v3, 255, m4, m5, s390_VStrX_VFEE);
Mark Wielaard 75fe92
    return "vfee";
Mark Wielaard 75fe92
 }
Mark Wielaard 75fe92
 
Mark Wielaard 75fe92
@@ -17406,47 +17521,8 @@ s390_irgen_VISTR(UChar v1, UChar v2, UChar m3, UChar m5)
Mark Wielaard 75fe92
 static const HChar *
Mark Wielaard 75fe92
 s390_irgen_VSTRC(UChar v1, UChar v2, UChar v3, UChar v4, UChar m5, UChar m6)
Mark Wielaard 75fe92
 {
Mark Wielaard 75fe92
-   IRDirty* d;
Mark Wielaard 75fe92
-   IRTemp cc = newTemp(Ity_I64);
Mark Wielaard 75fe92
-
Mark Wielaard 75fe92
-   /* Check for specification exception */
Mark Wielaard 75fe92
-   vassert(m5 < 3);
Mark Wielaard 75fe92
-
Mark Wielaard 75fe92
-   s390x_vec_op_details_t details = { .serialized = 0ULL };
Mark Wielaard 75fe92
-   details.op = S390_VEC_OP_VSTRC;
Mark Wielaard 75fe92
-   details.v1 = v1;
Mark Wielaard 75fe92
-   details.v2 = v2;
Mark Wielaard 75fe92
-   details.v3 = v3;
Mark Wielaard 75fe92
-   details.v4 = v4;
Mark Wielaard 75fe92
-   details.m4 = m5;
Mark Wielaard 75fe92
-   details.m5 = m6;
Mark Wielaard 75fe92
-
Mark Wielaard 75fe92
-   d = unsafeIRDirty_1_N(cc, 0, "s390x_dirtyhelper_vec_op",
Mark Wielaard 75fe92
-                         &s390x_dirtyhelper_vec_op,
Mark Wielaard 75fe92
-                         mkIRExprVec_2(IRExpr_GSPTR(),
Mark Wielaard 75fe92
-                                       mkU64(details.serialized)));
Mark Wielaard 75fe92
-
Mark Wielaard 75fe92
-   d->nFxState = 4;
Mark Wielaard 75fe92
-   vex_bzero(&d->fxState, sizeof(d->fxState));
Mark Wielaard 75fe92
-   d->fxState[0].fx     = Ifx_Read;
Mark Wielaard 75fe92
-   d->fxState[0].offset = S390X_GUEST_OFFSET(guest_v0) + v2 * sizeof(V128);
Mark Wielaard 75fe92
-   d->fxState[0].size   = sizeof(V128);
Mark Wielaard 75fe92
-   d->fxState[1].fx     = Ifx_Read;
Mark Wielaard 75fe92
-   d->fxState[1].offset = S390X_GUEST_OFFSET(guest_v0) + v3 * sizeof(V128);
Mark Wielaard 75fe92
-   d->fxState[1].size   = sizeof(V128);
Mark Wielaard 75fe92
-   d->fxState[2].fx     = Ifx_Read;
Mark Wielaard 75fe92
-   d->fxState[2].offset = S390X_GUEST_OFFSET(guest_v0) + v4 * sizeof(V128);
Mark Wielaard 75fe92
-   d->fxState[2].size   = sizeof(V128);
Mark Wielaard 75fe92
-   d->fxState[3].fx     = Ifx_Write;
Mark Wielaard 75fe92
-   d->fxState[3].offset = S390X_GUEST_OFFSET(guest_v0) + v1 * sizeof(V128);
Mark Wielaard 75fe92
-   d->fxState[3].size   = sizeof(V128);
Mark Wielaard 75fe92
-
Mark Wielaard 75fe92
-   stmt(IRStmt_Dirty(d));
Mark Wielaard 75fe92
-
Mark Wielaard 75fe92
-   if (s390_vr_is_cs_set(m6)) {
Mark Wielaard 75fe92
-      s390_cc_set(cc);
Mark Wielaard 75fe92
-   }
Mark Wielaard 75fe92
-
Mark Wielaard 75fe92
+   s390_insn_assert("vstrc", m5 <= 2);
Mark Wielaard 75fe92
+   s390_irgen_VStrX(v1, v2, v3, v4, m5, m6, s390_VStrX_VSTRC);
Mark Wielaard 75fe92
    return "vstrc";
Mark Wielaard 75fe92
 }
Mark Wielaard 75fe92
 
Mark Wielaard 75fe92
Mark Wielaard 75fe92
commit 4f17a067c4f8245c05611d6e8aa36e8841bab376
Mark Wielaard 75fe92
Author: Andreas Arnez <arnez@linux.ibm.com>
Mark Wielaard 75fe92
Date:   Tue Mar 2 14:12:29 2021 +0100
Mark Wielaard 75fe92
Mark Wielaard 75fe92
    Bug 434296 - s390x: Rework IR conversion of VFENE
Mark Wielaard 75fe92
    
Mark Wielaard 75fe92
    So far the z/Architecture instruction "vector find element not
Mark Wielaard 75fe92
    equal" (VFENE) is transformed to a loop.  This can cause spurious
Mark Wielaard 75fe92
    "conditional jump or move depends on uninitialised value(s)" messages by
Mark Wielaard 75fe92
    memcheck.  Re-implement without a loop.
Mark Wielaard 75fe92
Mark Wielaard 75fe92
diff --git a/VEX/priv/guest_s390_defs.h b/VEX/priv/guest_s390_defs.h
Mark Wielaard 75fe92
index 49b6cd5dd..caec3108e 100644
Mark Wielaard 75fe92
--- a/VEX/priv/guest_s390_defs.h
Mark Wielaard 75fe92
+++ b/VEX/priv/guest_s390_defs.h
Mark Wielaard 75fe92
@@ -265,7 +265,6 @@ typedef enum {
Mark Wielaard 75fe92
    S390_VEC_OP_INVALID = 0,
Mark Wielaard 75fe92
    S390_VEC_OP_VPKS,
Mark Wielaard 75fe92
    S390_VEC_OP_VPKLS,
Mark Wielaard 75fe92
-   S390_VEC_OP_VFENE,
Mark Wielaard 75fe92
    S390_VEC_OP_VISTR,
Mark Wielaard 75fe92
    S390_VEC_OP_VCEQ,
Mark Wielaard 75fe92
    S390_VEC_OP_VTM,
Mark Wielaard 75fe92
diff --git a/VEX/priv/guest_s390_helpers.c b/VEX/priv/guest_s390_helpers.c
Mark Wielaard 75fe92
index 63d2e8ce5..2188ce5c1 100644
Mark Wielaard 75fe92
--- a/VEX/priv/guest_s390_helpers.c
Mark Wielaard 75fe92
+++ b/VEX/priv/guest_s390_helpers.c
Mark Wielaard 75fe92
@@ -2538,7 +2538,6 @@ s390x_dirtyhelper_vec_op(VexGuestS390XState *guest_state,
Mark Wielaard 75fe92
       {0x00, 0x00}, /* invalid */
Mark Wielaard 75fe92
       [S390_VEC_OP_VPKS]  = {0xe7, 0x97},
Mark Wielaard 75fe92
       [S390_VEC_OP_VPKLS] = {0xe7, 0x95},
Mark Wielaard 75fe92
-      [S390_VEC_OP_VFENE] = {0xe7, 0x81},
Mark Wielaard 75fe92
       [S390_VEC_OP_VISTR] = {0xe7, 0x5c},
Mark Wielaard 75fe92
       [S390_VEC_OP_VCEQ]  = {0xe7, 0xf8},
Mark Wielaard 75fe92
       [S390_VEC_OP_VTM]   = {0xe7, 0xd8},
Mark Wielaard 75fe92
@@ -2627,7 +2626,6 @@ s390x_dirtyhelper_vec_op(VexGuestS390XState *guest_state,
Mark Wielaard 75fe92
 
Mark Wielaard 75fe92
    case S390_VEC_OP_VPKS:
Mark Wielaard 75fe92
    case S390_VEC_OP_VPKLS:
Mark Wielaard 75fe92
-   case S390_VEC_OP_VFENE:
Mark Wielaard 75fe92
    case S390_VEC_OP_VCEQ:
Mark Wielaard 75fe92
    case S390_VEC_OP_VGFM:
Mark Wielaard 75fe92
    case S390_VEC_OP_VCH:
Mark Wielaard 75fe92
diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c
Mark Wielaard 75fe92
index 26a947813..c8dc3ec18 100644
Mark Wielaard 75fe92
--- a/VEX/priv/guest_s390_toIR.c
Mark Wielaard 75fe92
+++ b/VEX/priv/guest_s390_toIR.c
Mark Wielaard 75fe92
@@ -17361,120 +17361,86 @@ s390_irgen_VFEE(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5)
Mark Wielaard 75fe92
 static const HChar *
Mark Wielaard 75fe92
 s390_irgen_VFENE(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5)
Mark Wielaard 75fe92
 {
Mark Wielaard 75fe92
-   const Bool negateComparison = True;
Mark Wielaard 75fe92
-   const IRType type = s390_vr_get_type(m4);
Mark Wielaard 75fe92
+   s390_insn_assert("vfene", m4 < 3 && m5 == (m5 & 3));
Mark Wielaard 75fe92
 
Mark Wielaard 75fe92
-   /* Check for specification exception */
Mark Wielaard 75fe92
-   vassert(m4 < 3);
Mark Wielaard 75fe92
-   vassert((m5 & 0b1100) == 0);
Mark Wielaard 75fe92
-
Mark Wielaard 75fe92
-   static const IROp elementGetters[] = {
Mark Wielaard 75fe92
-      Iop_GetElem8x16, Iop_GetElem16x8, Iop_GetElem32x4
Mark Wielaard 75fe92
+   static const IROp compare_op[3] = {
Mark Wielaard 75fe92
+      Iop_CmpEQ8x16, Iop_CmpEQ16x8, Iop_CmpEQ32x4
Mark Wielaard 75fe92
    };
Mark Wielaard 75fe92
-   IROp getter = elementGetters[m4];
Mark Wielaard 75fe92
-
Mark Wielaard 75fe92
-   static const IROp elementComparators[] = {
Mark Wielaard 75fe92
-      Iop_CmpEQ8, Iop_CmpEQ16, Iop_CmpEQ32
Mark Wielaard 75fe92
+   static const IROp abs_op[3] = {
Mark Wielaard 75fe92
+      Iop_Abs8x16, Iop_Abs16x8, Iop_Abs32x4
Mark Wielaard 75fe92
    };
Mark Wielaard 75fe92
-   IROp comparator = elementComparators[m4];
Mark Wielaard 75fe92
-
Mark Wielaard 75fe92
-   static const IROp resultConverter[] = {Iop_64to8, Iop_64to16, Iop_64to32};
Mark Wielaard 75fe92
-   IROp converter = resultConverter[m4];
Mark Wielaard 75fe92
-
Mark Wielaard 75fe92
-   IRTemp isZeroElem;
Mark Wielaard 75fe92
-
Mark Wielaard 75fe92
-   IRTemp counter = newTemp(Ity_I64);
Mark Wielaard 75fe92
-   assign(counter, get_counter_dw0());
Mark Wielaard 75fe92
-
Mark Wielaard 75fe92
-   IRTemp arg1 = newTemp(type);
Mark Wielaard 75fe92
-   assign(arg1, binop(getter, get_vr_qw(v2), unop(Iop_64to8, mkexpr(counter))));
Mark Wielaard 75fe92
-   IRTemp arg2 = newTemp(type);
Mark Wielaard 75fe92
-   assign(arg2, binop(getter, get_vr_qw(v3), unop(Iop_64to8, mkexpr(counter))));
Mark Wielaard 75fe92
+   IRTemp op2 = newTemp(Ity_V128);
Mark Wielaard 75fe92
+   IRTemp op3 = newTemp(Ity_V128);
Mark Wielaard 75fe92
+   IRTemp op2zero = newTemp(Ity_V128);
Mark Wielaard 75fe92
+   IRTemp diff = newTemp(Ity_V128);
Mark Wielaard 75fe92
+   IRTemp diff0 = newTemp(Ity_I64);
Mark Wielaard 75fe92
+   IRTemp neq0 = newTemp(Ity_I1);
Mark Wielaard 75fe92
+   IRTemp samebits = newTemp(Ity_I64);
Mark Wielaard 75fe92
+   IRExpr* tmp;
Mark Wielaard 75fe92
 
Mark Wielaard 75fe92
-   IRTemp isGoodPair = newTemp(Ity_I1);
Mark Wielaard 75fe92
-   if(negateComparison) {
Mark Wielaard 75fe92
-      assign(isGoodPair, unop(Iop_Not1, binop(comparator, mkexpr(arg1),
Mark Wielaard 75fe92
-                                              mkexpr(arg2))));
Mark Wielaard 75fe92
-   } else {
Mark Wielaard 75fe92
-      assign(isGoodPair, binop(comparator, mkexpr(arg1), mkexpr(arg2)));
Mark Wielaard 75fe92
-   }
Mark Wielaard 75fe92
+   assign(op2, get_vr_qw(v2));
Mark Wielaard 75fe92
+   assign(op3, get_vr_qw(v3));
Mark Wielaard 75fe92
 
Mark Wielaard 75fe92
-   if(s390_vr_is_zs_set(m5)) {
Mark Wielaard 75fe92
-      isZeroElem = newTemp(Ity_I1);
Mark Wielaard 75fe92
-      assign(isZeroElem, binop(comparator, mkexpr(arg1),
Mark Wielaard 75fe92
-                               unop(converter, mkU64(0))));
Mark Wielaard 75fe92
+   tmp = mkV128(0);
Mark Wielaard 75fe92
+   if (s390_vr_is_zs_set(m5)) {
Mark Wielaard 75fe92
+      tmp = binop(compare_op[m4], mkexpr(op2), tmp);
Mark Wielaard 75fe92
+      if (s390_vr_is_cs_set(m5) && v3 != v2) {
Mark Wielaard 75fe92
+         /* Count leading equal bits in the terminating element too */
Mark Wielaard 75fe92
+         tmp = unop(abs_op[m4], tmp);
Mark Wielaard 75fe92
+      }
Mark Wielaard 75fe92
+      assign(op2zero, tmp);
Mark Wielaard 75fe92
+      tmp = mkexpr(op2zero);
Mark Wielaard 75fe92
    }
Mark Wielaard 75fe92
-
Mark Wielaard 75fe92
-   static const UChar invalidIndices[] = {16, 8, 4};
Mark Wielaard 75fe92
-   const UChar invalidIndex = invalidIndices[m4];
Mark Wielaard 75fe92
-   IRTemp endOfVectorIsReached = newTemp(Ity_I1);
Mark Wielaard 75fe92
-   assign(endOfVectorIsReached, binop(Iop_CmpEQ64, mkexpr(counter),
Mark Wielaard 75fe92
-                                      mkU64(invalidIndex)));
Mark Wielaard 75fe92
-
Mark Wielaard 75fe92
-   put_counter_dw0(binop(Iop_Add64, mkexpr(counter), mkU64(1)));
Mark Wielaard 75fe92
-   IRExpr* shouldBreak = binop(Iop_Or32,
Mark Wielaard 75fe92
-                               unop(Iop_1Uto32, mkexpr(isGoodPair)),
Mark Wielaard 75fe92
-                               unop(Iop_1Uto32, mkexpr(endOfVectorIsReached))
Mark Wielaard 75fe92
-                              );
Mark Wielaard 75fe92
-   if(s390_vr_is_zs_set(m5)) {
Mark Wielaard 75fe92
-      shouldBreak = binop(Iop_Or32,
Mark Wielaard 75fe92
-                          shouldBreak,
Mark Wielaard 75fe92
-                          unop(Iop_1Uto32, mkexpr(isZeroElem)));
Mark Wielaard 75fe92
-   }
Mark Wielaard 75fe92
-   iterate_if(binop(Iop_CmpEQ32, shouldBreak, mkU32(0)));
Mark Wielaard 75fe92
-
Mark Wielaard 75fe92
-   IRExpr* foundIndex = binop(Iop_Sub64, get_counter_dw0(), mkU64(1));
Mark Wielaard 75fe92
-   if(m4 > 0) {
Mark Wielaard 75fe92
-      /* We should return index of byte but we found index of element in
Mark Wielaard 75fe92
-         general case.
Mark Wielaard 75fe92
-            if byte elem (m4 == 0) then indexOfByte = indexOfElement
Mark Wielaard 75fe92
-            if halfword elem (m4 == 1) then indexOfByte = 2 * indexOfElement
Mark Wielaard 75fe92
-                                                        = indexOfElement << 1
Mark Wielaard 75fe92
-            if word elem (m4 == 2) then indexOfByte = 4 * indexOfElement
Mark Wielaard 75fe92
-                                                    = indexOfElement << 2
Mark Wielaard 75fe92
-      */
Mark Wielaard 75fe92
-      foundIndex = binop(Iop_Shl64, foundIndex, mkU8(m4));
Mark Wielaard 75fe92
+   if (v3 != v2) {
Mark Wielaard 75fe92
+      tmp = binop(Iop_XorV128, mkexpr(op2), mkexpr(op3));
Mark Wielaard 75fe92
+      if (s390_vr_is_zs_set(m5))
Mark Wielaard 75fe92
+         tmp = binop(Iop_OrV128, tmp, mkexpr(op2zero));
Mark Wielaard 75fe92
    }
Mark Wielaard 75fe92
 
Mark Wielaard 75fe92
-   IRTemp result = newTemp(Ity_I64);
Mark Wielaard 75fe92
-   assign(result, mkite(mkexpr(endOfVectorIsReached),
Mark Wielaard 75fe92
-                        mkU64(16),
Mark Wielaard 75fe92
-                        foundIndex));
Mark Wielaard 75fe92
-   put_vr_qw(v1, binop(Iop_64HLtoV128, mkexpr(result), mkU64(0)));
Mark Wielaard 75fe92
+   assign(diff, tmp);
Mark Wielaard 75fe92
+   assign(diff0, unop(Iop_V128HIto64, mkexpr(diff)));
Mark Wielaard 75fe92
+   assign(neq0, binop(Iop_ExpCmpNE64, mkexpr(diff0), mkU64(0)));
Mark Wielaard 75fe92
+   assign(samebits, unop(Iop_ClzNat64,
Mark Wielaard 75fe92
+                         mkite(mkexpr(neq0), mkexpr(diff0),
Mark Wielaard 75fe92
+                               unop(Iop_V128to64, mkexpr(diff)))));
Mark Wielaard 75fe92
 
Mark Wielaard 75fe92
+   /* Determine the byte size of the initial equal-elements sequence */
Mark Wielaard 75fe92
+   tmp = binop(Iop_Shr64, mkexpr(samebits), mkU8(m4 + 3));
Mark Wielaard 75fe92
+   if (m4 != 0)
Mark Wielaard 75fe92
+      tmp = binop(Iop_Shl64, tmp, mkU8(m4));
Mark Wielaard 75fe92
+   tmp = binop(Iop_Add64, tmp, mkite(mkexpr(neq0), mkU64(0), mkU64(8)));
Mark Wielaard 75fe92
+   put_vr_qw(v1, binop(Iop_64HLtoV128, tmp, mkU64(0)));
Mark Wielaard 75fe92
 
Mark Wielaard 75fe92
    if (s390_vr_is_cs_set(m5)) {
Mark Wielaard 75fe92
-      static const IROp to64Converters[] = {Iop_8Uto64, Iop_16Uto64, Iop_32Uto64};
Mark Wielaard 75fe92
-      IROp to64Converter = to64Converters[m4];
Mark Wielaard 75fe92
-
Mark Wielaard 75fe92
-      IRExpr* arg1IsLessThanArg2 = binop(Iop_CmpLT64U,
Mark Wielaard 75fe92
-                                         unop(to64Converter, mkexpr(arg1)),
Mark Wielaard 75fe92
-                                         unop(to64Converter, mkexpr(arg2)));
Mark Wielaard 75fe92
-
Mark Wielaard 75fe92
-      IRExpr* ccexp = mkite(binop(Iop_CmpEQ32,
Mark Wielaard 75fe92
-                                  unop(Iop_1Uto32, mkexpr(isGoodPair)),
Mark Wielaard 75fe92
-                                  mkU32(1)),
Mark Wielaard 75fe92
-                            mkite(arg1IsLessThanArg2, mkU64(1), mkU64(2)),
Mark Wielaard 75fe92
-                            mkU64(3));
Mark Wielaard 75fe92
-
Mark Wielaard 75fe92
-      if(s390_vr_is_zs_set(m5)) {
Mark Wielaard 75fe92
-         IRExpr* arg2IsZero = binop(comparator, mkexpr(arg2),
Mark Wielaard 75fe92
-                                    unop(converter, mkU64(0)));
Mark Wielaard 75fe92
-         IRExpr* bothArgsAreZero = binop(Iop_And32,
Mark Wielaard 75fe92
-                                         unop(Iop_1Uto32, mkexpr(isZeroElem)),
Mark Wielaard 75fe92
-                                         unop(Iop_1Uto32, arg2IsZero));
Mark Wielaard 75fe92
-         ccexp = mkite(binop(Iop_CmpEQ32, bothArgsAreZero, mkU32(1)),
Mark Wielaard 75fe92
-                       mkU64(0),
Mark Wielaard 75fe92
-                       ccexp);
Mark Wielaard 75fe92
-      }
Mark Wielaard 75fe92
+      /* Set condition code like follows --
Mark Wielaard 75fe92
+         0: operands equal up to and including zero element
Mark Wielaard 75fe92
+         1: op2 < op3    2: op2 > op3    3: op2 = op3 */
Mark Wielaard 75fe92
       IRTemp cc = newTemp(Ity_I64);
Mark Wielaard 75fe92
-      assign(cc, ccexp);
Mark Wielaard 75fe92
-
Mark Wielaard 75fe92
+      if (v3 == v2) {
Mark Wielaard 75fe92
+         tmp = mkU64(0);
Mark Wielaard 75fe92
+      } else {
Mark Wielaard 75fe92
+         IRTemp shift = newTemp(Ity_I8);
Mark Wielaard 75fe92
+         IRExpr* op2half = mkite(mkexpr(neq0),
Mark Wielaard 75fe92
+                                 unop(Iop_V128HIto64, mkexpr(op2)),
Mark Wielaard 75fe92
+                                 unop(Iop_V128to64, mkexpr(op2)));
Mark Wielaard 75fe92
+         IRExpr* op3half = mkite(mkexpr(neq0),
Mark Wielaard 75fe92
+                                 unop(Iop_V128HIto64, mkexpr(op3)),
Mark Wielaard 75fe92
+                                 unop(Iop_V128to64, mkexpr(op3)));
Mark Wielaard 75fe92
+         assign(shift, unop(Iop_64to8,
Mark Wielaard 75fe92
+                            binop(Iop_Sub64, mkU64(63), mkexpr(samebits))));
Mark Wielaard 75fe92
+         tmp = binop(Iop_Or64,
Mark Wielaard 75fe92
+                     binop(Iop_Shl64,
Mark Wielaard 75fe92
+                           binop(Iop_And64, mkU64(1),
Mark Wielaard 75fe92
+                                 binop(Iop_Shr64, op2half, mkexpr(shift))),
Mark Wielaard 75fe92
+                           mkU8(1)),
Mark Wielaard 75fe92
+                     binop(Iop_And64, mkU64(1),
Mark Wielaard 75fe92
+                           binop(Iop_Shr64, op3half, mkexpr(shift))));
Mark Wielaard 75fe92
+      }
Mark Wielaard 75fe92
+      assign(cc, mkite(binop(Iop_CmpEQ64, mkexpr(samebits), mkU64(64)),
Mark Wielaard 75fe92
+                       mkU64(3), tmp));
Mark Wielaard 75fe92
       s390_cc_set(cc);
Mark Wielaard 75fe92
    }
Mark Wielaard 75fe92
-
Mark Wielaard 75fe92
-
Mark Wielaard 75fe92
-   put_counter_dw0(mkU64(0));
Mark Wielaard 75fe92
+   dis_res->hint = Dis_HintVerbose;
Mark Wielaard 75fe92
    return "vfene";
Mark Wielaard 75fe92
 }
Mark Wielaard 75fe92
 
Mark Wielaard 75fe92
Mark Wielaard 75fe92
commit 9bd78ebd8bb5cd4ebb3f081ceba46836cc485551
Mark Wielaard 75fe92
Author: Andreas Arnez <arnez@linux.ibm.com>
Mark Wielaard 75fe92
Date:   Tue Apr 27 20:13:26 2021 +0200
Mark Wielaard 75fe92
Mark Wielaard 75fe92
    Bug 434296 - s390x: Rework IR conversion of VISTR
Mark Wielaard 75fe92
    
Mark Wielaard 75fe92
    The z/Architecture instruction VISTR is currently transformed to a dirty
Mark Wielaard 75fe92
    helper that executes the instruction.  This can cause false positives with
Mark Wielaard 75fe92
    memcheck if the input string contains undefined characters after the
Mark Wielaard 75fe92
    string terminator.  Implement without a dirty helper and emulate the
Mark Wielaard 75fe92
    instruction instead.
Mark Wielaard 75fe92
Mark Wielaard 75fe92
diff --git a/VEX/priv/guest_s390_defs.h b/VEX/priv/guest_s390_defs.h
Mark Wielaard 75fe92
index caec3108e..24f3798c1 100644
Mark Wielaard 75fe92
--- a/VEX/priv/guest_s390_defs.h
Mark Wielaard 75fe92
+++ b/VEX/priv/guest_s390_defs.h
Mark Wielaard 75fe92
@@ -265,7 +265,6 @@ typedef enum {
Mark Wielaard 75fe92
    S390_VEC_OP_INVALID = 0,
Mark Wielaard 75fe92
    S390_VEC_OP_VPKS,
Mark Wielaard 75fe92
    S390_VEC_OP_VPKLS,
Mark Wielaard 75fe92
-   S390_VEC_OP_VISTR,
Mark Wielaard 75fe92
    S390_VEC_OP_VCEQ,
Mark Wielaard 75fe92
    S390_VEC_OP_VTM,
Mark Wielaard 75fe92
    S390_VEC_OP_VGFM,
Mark Wielaard 75fe92
diff --git a/VEX/priv/guest_s390_helpers.c b/VEX/priv/guest_s390_helpers.c
Mark Wielaard 75fe92
index 2188ce5c1..1e04f601a 100644
Mark Wielaard 75fe92
--- a/VEX/priv/guest_s390_helpers.c
Mark Wielaard 75fe92
+++ b/VEX/priv/guest_s390_helpers.c
Mark Wielaard 75fe92
@@ -2538,7 +2538,6 @@ s390x_dirtyhelper_vec_op(VexGuestS390XState *guest_state,
Mark Wielaard 75fe92
       {0x00, 0x00}, /* invalid */
Mark Wielaard 75fe92
       [S390_VEC_OP_VPKS]  = {0xe7, 0x97},
Mark Wielaard 75fe92
       [S390_VEC_OP_VPKLS] = {0xe7, 0x95},
Mark Wielaard 75fe92
-      [S390_VEC_OP_VISTR] = {0xe7, 0x5c},
Mark Wielaard 75fe92
       [S390_VEC_OP_VCEQ]  = {0xe7, 0xf8},
Mark Wielaard 75fe92
       [S390_VEC_OP_VTM]   = {0xe7, 0xd8},
Mark Wielaard 75fe92
       [S390_VEC_OP_VGFM]  = {0xe7, 0xb4},
Mark Wielaard 75fe92
@@ -2610,14 +2609,6 @@ s390x_dirtyhelper_vec_op(VexGuestS390XState *guest_state,
Mark Wielaard 75fe92
    the_insn.VRR.op2 = opcodes[d->op][1];
Mark Wielaard 75fe92
 
Mark Wielaard 75fe92
    switch(d->op) {
Mark Wielaard 75fe92
-   case S390_VEC_OP_VISTR:
Mark Wielaard 75fe92
-      the_insn.VRR.v1 = 1;
Mark Wielaard 75fe92
-      the_insn.VRR.v2 = 2;
Mark Wielaard 75fe92
-      the_insn.VRR.rxb = 0b1100;
Mark Wielaard 75fe92
-      the_insn.VRR.m4 = d->m4;
Mark Wielaard 75fe92
-      the_insn.VRR.m5 = d->m5;
Mark Wielaard 75fe92
-      break;
Mark Wielaard 75fe92
-
Mark Wielaard 75fe92
    case S390_VEC_OP_VTM:
Mark Wielaard 75fe92
       the_insn.VRR.v1 = 2;
Mark Wielaard 75fe92
       the_insn.VRR.v2 = 3;
Mark Wielaard 75fe92
diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c
Mark Wielaard 75fe92
index c8dc3ec18..dfea54259 100644
Mark Wielaard 75fe92
--- a/VEX/priv/guest_s390_toIR.c
Mark Wielaard 75fe92
+++ b/VEX/priv/guest_s390_toIR.c
Mark Wielaard 75fe92
@@ -17447,40 +17447,34 @@ s390_irgen_VFENE(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5)
Mark Wielaard 75fe92
 static const HChar *
Mark Wielaard 75fe92
 s390_irgen_VISTR(UChar v1, UChar v2, UChar m3, UChar m5)
Mark Wielaard 75fe92
 {
Mark Wielaard 75fe92
-   IRDirty* d;
Mark Wielaard 75fe92
-   IRTemp cc = newTemp(Ity_I64);
Mark Wielaard 75fe92
-
Mark Wielaard 75fe92
-   /* Check for specification exception */
Mark Wielaard 75fe92
-   vassert(m3 < 3);
Mark Wielaard 75fe92
-   vassert((m5 & 0b1110) == 0);
Mark Wielaard 75fe92
+   s390_insn_assert("vistr", m3 < 3 && m5 == (m5 & 1));
Mark Wielaard 75fe92
 
Mark Wielaard 75fe92
-   s390x_vec_op_details_t details = { .serialized = 0ULL };
Mark Wielaard 75fe92
-   details.op = S390_VEC_OP_VISTR;
Mark Wielaard 75fe92
-   details.v1 = v1;
Mark Wielaard 75fe92
-   details.v2 = v2;
Mark Wielaard 75fe92
-   details.m4 = m3;
Mark Wielaard 75fe92
-   details.m5 = m5;
Mark Wielaard 75fe92
-
Mark Wielaard 75fe92
-   d = unsafeIRDirty_1_N(cc, 0, "s390x_dirtyhelper_vec_op",
Mark Wielaard 75fe92
-                         &s390x_dirtyhelper_vec_op,
Mark Wielaard 75fe92
-                         mkIRExprVec_2(IRExpr_GSPTR(),
Mark Wielaard 75fe92
-                                       mkU64(details.serialized)));
Mark Wielaard 75fe92
+   static const IROp compare_op[3] = {
Mark Wielaard 75fe92
+      Iop_CmpEQ8x16, Iop_CmpEQ16x8, Iop_CmpEQ32x4
Mark Wielaard 75fe92
+   };
Mark Wielaard 75fe92
+   IRExpr* t;
Mark Wielaard 75fe92
+   IRTemp op2 = newTemp(Ity_V128);
Mark Wielaard 75fe92
+   IRTemp op2term = newTemp(Ity_V128);
Mark Wielaard 75fe92
+   IRTemp mask = newTemp(Ity_V128);
Mark Wielaard 75fe92
 
Mark Wielaard 75fe92
-   d->nFxState = 2;
Mark Wielaard 75fe92
-   vex_bzero(&d->fxState, sizeof(d->fxState));
Mark Wielaard 75fe92
-   d->fxState[0].fx     = Ifx_Read;
Mark Wielaard 75fe92
-   d->fxState[0].offset = S390X_GUEST_OFFSET(guest_v0) + v2 * sizeof(V128);
Mark Wielaard 75fe92
-   d->fxState[0].size   = sizeof(V128);
Mark Wielaard 75fe92
-   d->fxState[1].fx     = Ifx_Write;
Mark Wielaard 75fe92
-   d->fxState[1].offset = S390X_GUEST_OFFSET(guest_v0) + v1 * sizeof(V128);
Mark Wielaard 75fe92
-   d->fxState[1].size   = sizeof(V128);
Mark Wielaard 75fe92
+   assign(op2, get_vr_qw(v2));
Mark Wielaard 75fe92
+   assign(op2term, binop(compare_op[m3], mkexpr(op2), mkV128(0)));
Mark Wielaard 75fe92
+   t = mkexpr(op2term);
Mark Wielaard 75fe92
 
Mark Wielaard 75fe92
-   stmt(IRStmt_Dirty(d));
Mark Wielaard 75fe92
+   for (UChar i = m3; i < 4; i++) {
Mark Wielaard 75fe92
+      IRTemp s = newTemp(Ity_V128);
Mark Wielaard 75fe92
+      assign(s, binop(Iop_OrV128, t, binop(Iop_ShrV128, t, mkU8(8 << i))));
Mark Wielaard 75fe92
+      t = mkexpr(s);
Mark Wielaard 75fe92
+   }
Mark Wielaard 75fe92
+   assign(mask, unop(Iop_NotV128, t));
Mark Wielaard 75fe92
+   put_vr_qw(v1, binop(Iop_AndV128, mkexpr(op2), mkexpr(mask)));
Mark Wielaard 75fe92
 
Mark Wielaard 75fe92
    if (s390_vr_is_cs_set(m5)) {
Mark Wielaard 75fe92
+      IRTemp cc = newTemp(Ity_I64);
Mark Wielaard 75fe92
+      assign(cc, binop(Iop_And64, mkU64(3), unop(Iop_V128to64, mkexpr(mask))));
Mark Wielaard 75fe92
       s390_cc_set(cc);
Mark Wielaard 75fe92
    }
Mark Wielaard 75fe92
-
Mark Wielaard 75fe92
+   dis_res->hint = Dis_HintVerbose;
Mark Wielaard 75fe92
    return "vistr";
Mark Wielaard 75fe92
 }
Mark Wielaard 75fe92
 
Mark Wielaard 75fe92
Mark Wielaard 75fe92
commit 32312d588b77c5b5b5a0145bb0cc6f795b447790
Mark Wielaard 75fe92
Author: Andreas Arnez <arnez@linux.ibm.com>
Mark Wielaard 75fe92
Date:   Fri Apr 16 12:44:44 2021 +0200
Mark Wielaard 75fe92
Mark Wielaard 75fe92
    Bug 434296 - s390x: Add memcheck test cases for vector string insns
Mark Wielaard 75fe92
    
Mark Wielaard 75fe92
    Bug 434296 addresses memcheck false positives with the vector string
Mark Wielaard 75fe92
    instructions VISTR, VSTRC, VFAE, VFEE, and VFENE.  Add test cases that
Mark Wielaard 75fe92
    verify the fix for that bug.  Without the fix, memcheck yields many
Mark Wielaard 75fe92
    complains with these tests, most of which are false positives.
Mark Wielaard 75fe92
Mark Wielaard 75fe92
diff --git a/memcheck/tests/s390x/Makefile.am b/memcheck/tests/s390x/Makefile.am
Mark Wielaard 75fe92
index e4e69eb38..d183841ef 100644
Mark Wielaard 75fe92
--- a/memcheck/tests/s390x/Makefile.am
Mark Wielaard 75fe92
+++ b/memcheck/tests/s390x/Makefile.am
Mark Wielaard 75fe92
@@ -2,7 +2,7 @@ include $(top_srcdir)/Makefile.tool-tests.am
Mark Wielaard 75fe92
 
Mark Wielaard 75fe92
 dist_noinst_SCRIPTS = filter_stderr
Mark Wielaard 75fe92
 
Mark Wielaard 75fe92
-INSN_TESTS = cdsg cu21 cu42 ltgjhe
Mark Wielaard 75fe92
+INSN_TESTS = cdsg cu21 cu42 ltgjhe vstrc vfae vistr
Mark Wielaard 75fe92
 
Mark Wielaard 75fe92
 check_PROGRAMS = $(INSN_TESTS) 
Mark Wielaard 75fe92
 
Mark Wielaard 75fe92
@@ -14,3 +14,7 @@ EXTRA_DIST = \
Mark Wielaard 75fe92
 AM_CFLAGS    += @FLAG_M64@
Mark Wielaard 75fe92
 AM_CXXFLAGS  += @FLAG_M64@
Mark Wielaard 75fe92
 AM_CCASFLAGS += @FLAG_M64@
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
+vstrc_CFLAGS  = $(AM_CFLAGS) -march=z13
Mark Wielaard 75fe92
+vfae_CFLAGS   = $(AM_CFLAGS) -march=z13
Mark Wielaard 75fe92
+vistr_CFLAGS  = $(AM_CFLAGS) -march=z13
Mark Wielaard 75fe92
diff --git a/memcheck/tests/s390x/vfae.c b/memcheck/tests/s390x/vfae.c
Mark Wielaard 75fe92
new file mode 100644
Mark Wielaard 75fe92
index 000000000..68781e7fb
Mark Wielaard 75fe92
--- /dev/null
Mark Wielaard 75fe92
+++ b/memcheck/tests/s390x/vfae.c
Mark Wielaard 75fe92
@@ -0,0 +1,72 @@
Mark Wielaard 75fe92
+#include <stdio.h>
Mark Wielaard 75fe92
+#include <string.h>
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
+#define VECTOR __attribute__ ((vector_size (16)))
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
+typedef char VECTOR char_v;
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
+volatile char tmp;
Mark Wielaard 75fe92
+static const char *hex_digit = "0123456789abcdefGHIJKLMNOPQRSTUV";
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
+static char_v to_char_vec(const char *str)
Mark Wielaard 75fe92
+{
Mark Wielaard 75fe92
+   char_v v;
Mark Wielaard 75fe92
+   char buf[17];
Mark Wielaard 75fe92
+   int len = strlen(str);
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
+   memcpy(buf, str, (len && str[len - 1] == '~') ? len - 1 : len + 1);
Mark Wielaard 75fe92
+   v = *(char_v *) buf;
Mark Wielaard 75fe92
+   return v;
Mark Wielaard 75fe92
+}
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
+#define GENERATE_TEST(mnem)                                          \
Mark Wielaard 75fe92
+static void test_ ## mnem ## _char(const char *str, const char *match, \
Mark Wielaard 75fe92
+                                   int expect_res, int expect_cc)    \
Mark Wielaard 75fe92
+{                                                                    \
Mark Wielaard 75fe92
+   int cc;                                                           \
Mark Wielaard 75fe92
+   char_v v1;                                                        \
Mark Wielaard 75fe92
+   char_v v2 = to_char_vec(str);                                     \
Mark Wielaard 75fe92
+   char_v v3 = to_char_vec(match);                                   \
Mark Wielaard 75fe92
+                                                                     \
Mark Wielaard 75fe92
+   __asm__(                                                          \
Mark Wielaard 75fe92
+      "cr    0,0\n\t"           /* Clear CC */                       \
Mark Wielaard 75fe92
+      #mnem "  %[v1],%[v2],%[v3],0,3\n\t"                            \
Mark Wielaard 75fe92
+      "ipm   %[cc]\n\t"                                              \
Mark Wielaard 75fe92
+      "srl   %[cc],28"                                               \
Mark Wielaard 75fe92
+      : [v1] "=v" (v1),                                              \
Mark Wielaard 75fe92
+        [cc] "=d" (cc)                                               \
Mark Wielaard 75fe92
+      : [v2] "v" (v2),                                               \
Mark Wielaard 75fe92
+        [v3] "v" (v3)                                                \
Mark Wielaard 75fe92
+      : "cc");                                                       \
Mark Wielaard 75fe92
+                                                                     \
Mark Wielaard 75fe92
+   tmp = hex_digit[v1[7] & 0x1f];                                    \
Mark Wielaard 75fe92
+   if (expect_res >= 0  && v1[7] != expect_res)                      \
Mark Wielaard 75fe92
+      printf("result %u != %d\n", v1[7], expect_res);                \
Mark Wielaard 75fe92
+                                                                     \
Mark Wielaard 75fe92
+   tmp = hex_digit[cc & 0xf];                                        \
Mark Wielaard 75fe92
+   if (expect_cc >= 0 && cc != expect_cc)                            \
Mark Wielaard 75fe92
+      printf("CC %d != %d\n", cc, expect_cc);                        \
Mark Wielaard 75fe92
+}
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
+GENERATE_TEST(vfae)
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
+GENERATE_TEST(vfee)
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
+GENERATE_TEST(vfene)
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
+int main()
Mark Wielaard 75fe92
+{
Mark Wielaard 75fe92
+   test_vfae_char("not found", "................", 9, 0);
Mark Wielaard 75fe92
+   test_vfae_char("xy", "zzzzzzzzyyyyyyyy", 1, 2);
Mark Wielaard 75fe92
+   test_vfae_char("incomplete~", "xxxxxxxxxxxxxxxx", -1, -1);
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
+   test_vfee_char("same char here", "..........here", 10, 2);
Mark Wielaard 75fe92
+   test_vfee_char("and here too ...", "_________t~", 9, 1);
Mark Wielaard 75fe92
+   test_vfee_char("equality!~", "========!!~", 8, -1);
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
+   test_vfene_char("strings equal", "strings equal", 13, 0);
Mark Wielaard 75fe92
+   test_vfene_char(hex_digit, hex_digit, 16, 3);
Mark Wielaard 75fe92
+   test_vfene_char("undef~", "undefined", -1, -1);
Mark Wielaard 75fe92
+   test_vfene_char("active~", "actually ok", 3, 1);
Mark Wielaard 75fe92
+   return 0;
Mark Wielaard 75fe92
+}
Mark Wielaard 75fe92
diff --git a/memcheck/tests/s390x/vfae.stderr.exp b/memcheck/tests/s390x/vfae.stderr.exp
Mark Wielaard 75fe92
new file mode 100644
Mark Wielaard 75fe92
index 000000000..8aad3c87f
Mark Wielaard 75fe92
--- /dev/null
Mark Wielaard 75fe92
+++ b/memcheck/tests/s390x/vfae.stderr.exp
Mark Wielaard 75fe92
@@ -0,0 +1,20 @@
Mark Wielaard 75fe92
+Use of uninitialised value of size 8
Mark Wielaard 75fe92
+   at 0x........: test_vfae_char (vfae.c:51)
Mark Wielaard 75fe92
+   by 0x........: main (vfae.c:61)
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
+Use of uninitialised value of size 8
Mark Wielaard 75fe92
+   at 0x........: test_vfae_char (vfae.c:51)
Mark Wielaard 75fe92
+   by 0x........: main (vfae.c:61)
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
+Use of uninitialised value of size 8
Mark Wielaard 75fe92
+   at 0x........: test_vfee_char (vfae.c:53)
Mark Wielaard 75fe92
+   by 0x........: main (vfae.c:65)
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
+Use of uninitialised value of size 8
Mark Wielaard 75fe92
+   at 0x........: test_vfene_char (vfae.c:55)
Mark Wielaard 75fe92
+   by 0x........: main (vfae.c:69)
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
+Use of uninitialised value of size 8
Mark Wielaard 75fe92
+   at 0x........: test_vfene_char (vfae.c:55)
Mark Wielaard 75fe92
+   by 0x........: main (vfae.c:69)
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
diff --git a/memcheck/tests/s390x/vfae.stdout.exp b/memcheck/tests/s390x/vfae.stdout.exp
Mark Wielaard 75fe92
new file mode 100644
Mark Wielaard 75fe92
index 000000000..e69de29bb
Mark Wielaard 75fe92
diff --git a/memcheck/tests/s390x/vfae.vgtest b/memcheck/tests/s390x/vfae.vgtest
Mark Wielaard 75fe92
new file mode 100644
Mark Wielaard 75fe92
index 000000000..ae36c22fe
Mark Wielaard 75fe92
--- /dev/null
Mark Wielaard 75fe92
+++ b/memcheck/tests/s390x/vfae.vgtest
Mark Wielaard 75fe92
@@ -0,0 +1,2 @@
Mark Wielaard 75fe92
+prog: vfae
Mark Wielaard 75fe92
+vgopts: -q
Mark Wielaard 75fe92
diff --git a/memcheck/tests/s390x/vistr.c b/memcheck/tests/s390x/vistr.c
Mark Wielaard 75fe92
new file mode 100644
Mark Wielaard 75fe92
index 000000000..7ed59b94b
Mark Wielaard 75fe92
--- /dev/null
Mark Wielaard 75fe92
+++ b/memcheck/tests/s390x/vistr.c
Mark Wielaard 75fe92
@@ -0,0 +1,76 @@
Mark Wielaard 75fe92
+#include <stdio.h>
Mark Wielaard 75fe92
+#include <string.h>
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
+#define VECTOR __attribute__ ((vector_size (16)))
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
+typedef char VECTOR char_v;
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
+volatile char tmp;
Mark Wielaard 75fe92
+static const char *hex_digit = "0123456789abcdef";
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
+static char_v to_char_vec(const char *str, char_v *maskp)
Mark Wielaard 75fe92
+{
Mark Wielaard 75fe92
+   char buf[17];
Mark Wielaard 75fe92
+   char_v v;
Mark Wielaard 75fe92
+   char_v mask = {0};
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
+   for (int i = 0; i < sizeof(buf); i++) {
Mark Wielaard 75fe92
+      char ch = str[i];
Mark Wielaard 75fe92
+      if (ch == '\0')
Mark Wielaard 75fe92
+         break;
Mark Wielaard 75fe92
+      else if (ch == '$') {
Mark Wielaard 75fe92
+         buf[i] = '\0';
Mark Wielaard 75fe92
+         mask[i] = -1;
Mark Wielaard 75fe92
+      } else if (ch != '~') {
Mark Wielaard 75fe92
+         buf[i] = ch;
Mark Wielaard 75fe92
+         mask[i] = -1;
Mark Wielaard 75fe92
+      }
Mark Wielaard 75fe92
+   }
Mark Wielaard 75fe92
+   v = *(char_v *) buf;
Mark Wielaard 75fe92
+   *maskp = mask;
Mark Wielaard 75fe92
+   return v;
Mark Wielaard 75fe92
+}
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
+static void test_vistr_char(const char *str, const char *expect_res,
Mark Wielaard 75fe92
+                            int expect_cc)
Mark Wielaard 75fe92
+{
Mark Wielaard 75fe92
+   int cc, count;
Mark Wielaard 75fe92
+   char_v v1, mask;
Mark Wielaard 75fe92
+   char_v v2 = to_char_vec(str, &mask);
Mark Wielaard 75fe92
+   char_v exp_v1 = to_char_vec(expect_res, &mask);
Mark Wielaard 75fe92
+   char equal[16];
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
+   __asm__(
Mark Wielaard 75fe92
+      "cr    0,0\n\t"           /* Clear CC */
Mark Wielaard 75fe92
+      "vistr %[v1],%[v2],0,1\n\t"
Mark Wielaard 75fe92
+      "ipm   %[cc]\n\t"
Mark Wielaard 75fe92
+      "srl   %[cc],28"
Mark Wielaard 75fe92
+      : [v1] "=v" (v1),
Mark Wielaard 75fe92
+        [cc] "=d" (cc)
Mark Wielaard 75fe92
+      : [v2] "v" (v2)
Mark Wielaard 75fe92
+      : "cc");
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
+   *(char_v *) equal = (v1 & mask) == (exp_v1 & mask);
Mark Wielaard 75fe92
+   if (memchr(equal, 0, sizeof(equal)))
Mark Wielaard 75fe92
+      printf("Result doesn't match `%s'\n", expect_res);
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
+   count = 0;
Mark Wielaard 75fe92
+   for (int i = 0; i < 16; i++) {
Mark Wielaard 75fe92
+      if (v1[i] == 0) count++;
Mark Wielaard 75fe92
+   }
Mark Wielaard 75fe92
+   tmp = hex_digit[count];
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
+   tmp = hex_digit[cc & 0xf];
Mark Wielaard 75fe92
+   if (expect_cc >= 0 && cc != expect_cc)
Mark Wielaard 75fe92
+      printf("CC %d != %d\n", cc, expect_cc);
Mark Wielaard 75fe92
+}
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
+int main()
Mark Wielaard 75fe92
+{
Mark Wielaard 75fe92
+   test_vistr_char("terminated$====~", "terminated$$$$$$", 0);
Mark Wielaard 75fe92
+   test_vistr_char("undef~~~~~~~~~~~", "undef", -1);
Mark Wielaard 75fe92
+   test_vistr_char("undef, 2nd half~", "undef, 2nd half", -1);
Mark Wielaard 75fe92
+   test_vistr_char("Not. Terminated.", "Not. Terminated.", 3);
Mark Wielaard 75fe92
+   test_vistr_char("partiallyOK~~$~~", "partiallyOK~~$$$", 0);
Mark Wielaard 75fe92
+   return 0;
Mark Wielaard 75fe92
+}
Mark Wielaard 75fe92
diff --git a/memcheck/tests/s390x/vistr.stderr.exp b/memcheck/tests/s390x/vistr.stderr.exp
Mark Wielaard 75fe92
new file mode 100644
Mark Wielaard 75fe92
index 000000000..e4f35fd74
Mark Wielaard 75fe92
--- /dev/null
Mark Wielaard 75fe92
+++ b/memcheck/tests/s390x/vistr.stderr.exp
Mark Wielaard 75fe92
@@ -0,0 +1,20 @@
Mark Wielaard 75fe92
+Conditional jump or move depends on uninitialised value(s)
Mark Wielaard 75fe92
+   at 0x........: test_vistr_char (vistr.c:59)
Mark Wielaard 75fe92
+   by 0x........: main (vistr.c:71)
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
+Use of uninitialised value of size 8
Mark Wielaard 75fe92
+   at 0x........: test_vistr_char (vistr.c:63)
Mark Wielaard 75fe92
+   by 0x........: main (vistr.c:71)
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
+Conditional jump or move depends on uninitialised value(s)
Mark Wielaard 75fe92
+   at 0x........: test_vistr_char (vistr.c:59)
Mark Wielaard 75fe92
+   by 0x........: main (vistr.c:72)
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
+Use of uninitialised value of size 8
Mark Wielaard 75fe92
+   at 0x........: test_vistr_char (vistr.c:63)
Mark Wielaard 75fe92
+   by 0x........: main (vistr.c:72)
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
+Conditional jump or move depends on uninitialised value(s)
Mark Wielaard 75fe92
+   at 0x........: test_vistr_char (vistr.c:59)
Mark Wielaard 75fe92
+   by 0x........: main (vistr.c:74)
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
diff --git a/memcheck/tests/s390x/vistr.vgtest b/memcheck/tests/s390x/vistr.vgtest
Mark Wielaard 75fe92
new file mode 100644
Mark Wielaard 75fe92
index 000000000..f99749d85
Mark Wielaard 75fe92
--- /dev/null
Mark Wielaard 75fe92
+++ b/memcheck/tests/s390x/vistr.vgtest
Mark Wielaard 75fe92
@@ -0,0 +1,2 @@
Mark Wielaard 75fe92
+prog: vistr
Mark Wielaard 75fe92
+vgopts: -q
Mark Wielaard 75fe92
diff --git a/memcheck/tests/s390x/vstrc.c b/memcheck/tests/s390x/vstrc.c
Mark Wielaard 75fe92
new file mode 100644
Mark Wielaard 75fe92
index 000000000..268e2f858
Mark Wielaard 75fe92
--- /dev/null
Mark Wielaard 75fe92
+++ b/memcheck/tests/s390x/vstrc.c
Mark Wielaard 75fe92
@@ -0,0 +1,92 @@
Mark Wielaard 75fe92
+#include <stdio.h>
Mark Wielaard 75fe92
+#include <string.h>
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
+#define VECTOR __attribute__ ((vector_size (16)))
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
+typedef char VECTOR char_v;
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
+struct vstrc_char_rng {
Mark Wielaard 75fe92
+   unsigned char range[16];
Mark Wielaard 75fe92
+   unsigned char flags[16];
Mark Wielaard 75fe92
+};
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
+#define RNG_FLAG_EQ   0x80
Mark Wielaard 75fe92
+#define RNG_FLAG_LT   0x40
Mark Wielaard 75fe92
+#define RNG_FLAG_GT   0x20
Mark Wielaard 75fe92
+#define RNG_FLAG_ANY  0xe0
Mark Wielaard 75fe92
+#define RNG_FLAG_NONE 0x00
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
+volatile char tmp;
Mark Wielaard 75fe92
+static const char *hex_digit = "0123456789abcdefGHIJKLMNOPQRSTUV";
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
+static void test_vstrc_char(const char *str, const struct vstrc_char_rng *rng,
Mark Wielaard 75fe92
+                            int expect_res, int expect_cc)
Mark Wielaard 75fe92
+{
Mark Wielaard 75fe92
+   int cc;
Mark Wielaard 75fe92
+   char_v v1;
Mark Wielaard 75fe92
+   char_v v2 = *(const char_v *) str;
Mark Wielaard 75fe92
+   char_v v3 = *(const char_v *) rng->range;
Mark Wielaard 75fe92
+   char_v v4 = *(const char_v *) rng->flags;
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
+   __asm__(
Mark Wielaard 75fe92
+      "cr    0,0\n\t"           /* Clear CC */
Mark Wielaard 75fe92
+      "vstrc %[v1],%[v2],%[v3],%[v4],0,3\n\t"
Mark Wielaard 75fe92
+      "ipm   %[cc]\n\t"
Mark Wielaard 75fe92
+      "srl   %[cc],28"
Mark Wielaard 75fe92
+      : [v1] "=v" (v1),
Mark Wielaard 75fe92
+        [cc] "=d" (cc)
Mark Wielaard 75fe92
+      : [v2] "v" (v2),
Mark Wielaard 75fe92
+        [v3] "v" (v3),
Mark Wielaard 75fe92
+        [v4] "v" (v4)
Mark Wielaard 75fe92
+      : "cc");
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
+   tmp = hex_digit[v1[7] & 0x1f];
Mark Wielaard 75fe92
+   if (expect_res >= 0  && v1[7] != expect_res)
Mark Wielaard 75fe92
+      printf("result %u != %d\n", v1[7], expect_res);
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
+   tmp = hex_digit[cc & 0xf];
Mark Wielaard 75fe92
+   if (expect_cc >= 0 && cc != expect_cc)
Mark Wielaard 75fe92
+      printf("CC %d != %d\n", cc, expect_cc);
Mark Wielaard 75fe92
+}
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
+int main()
Mark Wielaard 75fe92
+{
Mark Wielaard 75fe92
+   struct vstrc_char_rng rng;
Mark Wielaard 75fe92
+   char buf[16];
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
+   memset(rng.flags, RNG_FLAG_NONE, 16);
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
+   rng.range[4] = 'z';
Mark Wielaard 75fe92
+   rng.flags[4] = RNG_FLAG_GT | RNG_FLAG_EQ;
Mark Wielaard 75fe92
+   rng.flags[5] = RNG_FLAG_ANY;
Mark Wielaard 75fe92
+   /* OK: match at the 'z' */
Mark Wielaard 75fe92
+   test_vstrc_char("find the z", &rng, 9, 2);
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
+   rng.flags[12] = RNG_FLAG_GT | RNG_FLAG_EQ;
Mark Wielaard 75fe92
+   rng.flags[13] = RNG_FLAG_LT | RNG_FLAG_EQ;
Mark Wielaard 75fe92
+   /* Bad: undefined range */
Mark Wielaard 75fe92
+   test_vstrc_char("undefined", &rng, -1, -1);
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
+   rng.range[12] = 'a';
Mark Wielaard 75fe92
+   rng.range[13] = 'c';
Mark Wielaard 75fe92
+   /* OK: match at the 'a' */
Mark Wielaard 75fe92
+   test_vstrc_char("get the abc", &rng, 8, 2);
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
+   rng.flags[12] = RNG_FLAG_LT;
Mark Wielaard 75fe92
+   rng.flags[13] = RNG_FLAG_GT;
Mark Wielaard 75fe92
+   /* OK: no match up to null terminator */
Mark Wielaard 75fe92
+   test_vstrc_char("no match", &rng, 8, 0);
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
+   /* OK: no match, no null terminator */
Mark Wielaard 75fe92
+   test_vstrc_char("0123456789abcdef", &rng, 16, 3);
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
+   buf[0] = 'x';
Mark Wielaard 75fe92
+   /* Bad: undefined string */
Mark Wielaard 75fe92
+   test_vstrc_char(buf, &rng, -1, -1);
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
+   buf[1] = 'z';
Mark Wielaard 75fe92
+   /* Bad: valid match, but CC undefined */
Mark Wielaard 75fe92
+   test_vstrc_char(buf, &rng, 1, -1);
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
+   return 0;
Mark Wielaard 75fe92
+}
Mark Wielaard 75fe92
diff --git a/memcheck/tests/s390x/vstrc.stderr.exp b/memcheck/tests/s390x/vstrc.stderr.exp
Mark Wielaard 75fe92
new file mode 100644
Mark Wielaard 75fe92
index 000000000..c1125bea1
Mark Wielaard 75fe92
--- /dev/null
Mark Wielaard 75fe92
+++ b/memcheck/tests/s390x/vstrc.stderr.exp
Mark Wielaard 75fe92
@@ -0,0 +1,20 @@
Mark Wielaard 75fe92
+Use of uninitialised value of size 8
Mark Wielaard 75fe92
+   at 0x........: test_vstrc_char (vstrc.c:43)
Mark Wielaard 75fe92
+   by 0x........: main (vstrc.c:68)
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
+Use of uninitialised value of size 8
Mark Wielaard 75fe92
+   at 0x........: test_vstrc_char (vstrc.c:47)
Mark Wielaard 75fe92
+   by 0x........: main (vstrc.c:68)
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
+Use of uninitialised value of size 8
Mark Wielaard 75fe92
+   at 0x........: test_vstrc_char (vstrc.c:43)
Mark Wielaard 75fe92
+   by 0x........: main (vstrc.c:85)
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
+Use of uninitialised value of size 8
Mark Wielaard 75fe92
+   at 0x........: test_vstrc_char (vstrc.c:47)
Mark Wielaard 75fe92
+   by 0x........: main (vstrc.c:85)
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
+Use of uninitialised value of size 8
Mark Wielaard 75fe92
+   at 0x........: test_vstrc_char (vstrc.c:47)
Mark Wielaard 75fe92
+   by 0x........: main (vstrc.c:89)
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
diff --git a/memcheck/tests/s390x/vstrc.stdout.exp b/memcheck/tests/s390x/vstrc.stdout.exp
Mark Wielaard 75fe92
new file mode 100644
Mark Wielaard 75fe92
index 000000000..e69de29bb
Mark Wielaard 75fe92
diff --git a/memcheck/tests/s390x/vstrc.vgtest b/memcheck/tests/s390x/vstrc.vgtest
Mark Wielaard 75fe92
new file mode 100644
Mark Wielaard 75fe92
index 000000000..26f5db99b
Mark Wielaard 75fe92
--- /dev/null
Mark Wielaard 75fe92
+++ b/memcheck/tests/s390x/vstrc.vgtest
Mark Wielaard 75fe92
@@ -0,0 +1,2 @@
Mark Wielaard 75fe92
+prog: vstrc
Mark Wielaard 75fe92
+vgopts: -q
Mark Wielaard 75fe92
Mark Wielaard 75fe92
commit a0bb049ace14ab52d386bb1d49a399f39eec4986
Mark Wielaard 75fe92
Author: Andreas Arnez <arnez@linux.ibm.com>
Mark Wielaard 75fe92
Date:   Tue Mar 23 14:55:09 2021 +0100
Mark Wielaard 75fe92
Mark Wielaard 75fe92
    s390x: Improve handling of amodes without base register
Mark Wielaard 75fe92
    
Mark Wielaard 75fe92
    Addressing modes without a base or index register represent constants.
Mark Wielaard 75fe92
    They can occur in some special cases such as shift operations and when
Mark Wielaard 75fe92
    accessing individual vector elements.  Perform some minor improvements to
Mark Wielaard 75fe92
    the handling of such amodes.
Mark Wielaard 75fe92
Mark Wielaard 75fe92
diff --git a/VEX/priv/host_s390_defs.c b/VEX/priv/host_s390_defs.c
Mark Wielaard 75fe92
index 6e0734ae0..2587f81a1 100644
Mark Wielaard 75fe92
--- a/VEX/priv/host_s390_defs.c
Mark Wielaard 75fe92
+++ b/VEX/priv/host_s390_defs.c
Mark Wielaard 75fe92
@@ -360,7 +360,8 @@ s390_amode_is_sane(const s390_amode *am)
Mark Wielaard 75fe92
 {
Mark Wielaard 75fe92
    switch (am->tag) {
Mark Wielaard 75fe92
    case S390_AMODE_B12:
Mark Wielaard 75fe92
-      return is_virtual_gpr(am->b) && fits_unsigned_12bit(am->d);
Mark Wielaard 75fe92
+      return (is_virtual_gpr(am->b) || sameHReg(am->b, s390_hreg_gpr(0))) &&
Mark Wielaard 75fe92
+             fits_unsigned_12bit(am->d);
Mark Wielaard 75fe92
 
Mark Wielaard 75fe92
    case S390_AMODE_B20:
Mark Wielaard 75fe92
       return is_virtual_gpr(am->b) && fits_signed_20bit(am->d);
Mark Wielaard 75fe92
@@ -378,47 +379,31 @@ s390_amode_is_sane(const s390_amode *am)
Mark Wielaard 75fe92
    }
Mark Wielaard 75fe92
 }
Mark Wielaard 75fe92
 
Mark Wielaard 75fe92
+static Bool
Mark Wielaard 75fe92
+s390_amode_is_constant(const s390_amode *am)
Mark Wielaard 75fe92
+{
Mark Wielaard 75fe92
+   return am->tag == S390_AMODE_B12 && sameHReg(am->b, s390_hreg_gpr(0));
Mark Wielaard 75fe92
+}
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
 
Mark Wielaard 75fe92
 /* Record the register use of an amode */
Mark Wielaard 75fe92
 static void
Mark Wielaard 75fe92
 s390_amode_get_reg_usage(HRegUsage *u, const s390_amode *am)
Mark Wielaard 75fe92
 {
Mark Wielaard 75fe92
-   switch (am->tag) {
Mark Wielaard 75fe92
-   case S390_AMODE_B12:
Mark Wielaard 75fe92
-   case S390_AMODE_B20:
Mark Wielaard 75fe92
-      addHRegUse(u, HRmRead, am->b);
Mark Wielaard 75fe92
-      return;
Mark Wielaard 75fe92
-
Mark Wielaard 75fe92
-   case S390_AMODE_BX12:
Mark Wielaard 75fe92
-   case S390_AMODE_BX20:
Mark Wielaard 75fe92
+   if (!sameHReg(am->b, s390_hreg_gpr(0)))
Mark Wielaard 75fe92
       addHRegUse(u, HRmRead, am->b);
Mark Wielaard 75fe92
+   if (!sameHReg(am->x, s390_hreg_gpr(0)))
Mark Wielaard 75fe92
       addHRegUse(u, HRmRead, am->x);
Mark Wielaard 75fe92
-      return;
Mark Wielaard 75fe92
-
Mark Wielaard 75fe92
-   default:
Mark Wielaard 75fe92
-      vpanic("s390_amode_get_reg_usage");
Mark Wielaard 75fe92
-   }
Mark Wielaard 75fe92
 }
Mark Wielaard 75fe92
 
Mark Wielaard 75fe92
 
Mark Wielaard 75fe92
 static void
Mark Wielaard 75fe92
 s390_amode_map_regs(HRegRemap *m, s390_amode *am)
Mark Wielaard 75fe92
 {
Mark Wielaard 75fe92
-   switch (am->tag) {
Mark Wielaard 75fe92
-   case S390_AMODE_B12:
Mark Wielaard 75fe92
-   case S390_AMODE_B20:
Mark Wielaard 75fe92
-      am->b = lookupHRegRemap(m, am->b);
Mark Wielaard 75fe92
-      return;
Mark Wielaard 75fe92
-
Mark Wielaard 75fe92
-   case S390_AMODE_BX12:
Mark Wielaard 75fe92
-   case S390_AMODE_BX20:
Mark Wielaard 75fe92
+   if (!sameHReg(am->b, s390_hreg_gpr(0)))
Mark Wielaard 75fe92
       am->b = lookupHRegRemap(m, am->b);
Mark Wielaard 75fe92
+   if (!sameHReg(am->x, s390_hreg_gpr(0)))
Mark Wielaard 75fe92
       am->x = lookupHRegRemap(m, am->x);
Mark Wielaard 75fe92
-      return;
Mark Wielaard 75fe92
-
Mark Wielaard 75fe92
-   default:
Mark Wielaard 75fe92
-      vpanic("s390_amode_map_regs");
Mark Wielaard 75fe92
-   }
Mark Wielaard 75fe92
 }
Mark Wielaard 75fe92
 
Mark Wielaard 75fe92
 
Mark Wielaard 75fe92
@@ -653,6 +638,16 @@ directReload_S390(HInstr* i, HReg vreg, Short spill_off)
Mark Wielaard 75fe92
                            insn->variant.alu.dst, vreg_opnd);
Mark Wielaard 75fe92
    }
Mark Wielaard 75fe92
 
Mark Wielaard 75fe92
+   /* v-vgetelem <reg>,<vreg> */
Mark Wielaard 75fe92
+   if (insn->tag == S390_INSN_VEC_AMODEOP
Mark Wielaard 75fe92
+       && insn->variant.vec_amodeop.tag == S390_VEC_GET_ELEM
Mark Wielaard 75fe92
+       && insn->size == 8
Mark Wielaard 75fe92
+       && sameHReg(insn->variant.vec_amodeop.op1, vreg)
Mark Wielaard 75fe92
+       && s390_amode_is_constant(insn->variant.vec_amodeop.op2)) {
Mark Wielaard 75fe92
+      vreg_am->d += 8 * insn->variant.vec_amodeop.op2->d;
Mark Wielaard 75fe92
+      return s390_insn_load(insn->size, insn->variant.vec_amodeop.dst, vreg_am);
Mark Wielaard 75fe92
+   }
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
    /* v-<unop> <reg>,<vreg> */
Mark Wielaard 75fe92
    if (insn->tag == S390_INSN_UNOP
Mark Wielaard 75fe92
        && insn->variant.unop.src.tag == S390_OPND_REG
Mark Wielaard 75fe92
diff --git a/VEX/priv/host_s390_isel.c b/VEX/priv/host_s390_isel.c
Mark Wielaard 75fe92
index 5f79280c0..ceca6836e 100644
Mark Wielaard 75fe92
--- a/VEX/priv/host_s390_isel.c
Mark Wielaard 75fe92
+++ b/VEX/priv/host_s390_isel.c
Mark Wielaard 75fe92
@@ -312,7 +312,18 @@ s390_isel_amode_wrk(ISelEnv *env, IRExpr *expr,
Mark Wielaard 75fe92
                     Bool no_index __attribute__((unused)),
Mark Wielaard 75fe92
                     Bool short_displacement)
Mark Wielaard 75fe92
 {
Mark Wielaard 75fe92
-   if (expr->tag == Iex_Binop && expr->Iex.Binop.op == Iop_Add64) {
Mark Wielaard 75fe92
+   if (expr->tag == Iex_Unop && expr->Iex.Unop.op == Iop_8Uto64 &&
Mark Wielaard 75fe92
+       expr->Iex.Unop.arg->tag == Iex_Const) {
Mark Wielaard 75fe92
+      UChar value = expr->Iex.Unop.arg->Iex.Const.con->Ico.U8;
Mark Wielaard 75fe92
+      return s390_amode_b12((Int)value, s390_hreg_gpr(0));
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
+   } else if (expr->tag == Iex_Const) {
Mark Wielaard 75fe92
+      ULong value = expr->Iex.Const.con->Ico.U64;
Mark Wielaard 75fe92
+      if (ulong_fits_unsigned_12bit(value)) {
Mark Wielaard 75fe92
+         return s390_amode_b12((Int)value, s390_hreg_gpr(0));
Mark Wielaard 75fe92
+      }
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
+   } else if (expr->tag == Iex_Binop && expr->Iex.Binop.op == Iop_Add64) {
Mark Wielaard 75fe92
       IRExpr *arg1 = expr->Iex.Binop.arg1;
Mark Wielaard 75fe92
       IRExpr *arg2 = expr->Iex.Binop.arg2;
Mark Wielaard 75fe92
 
Mark Wielaard 75fe92
Mark Wielaard 75fe92
commit fd935e238d907d9c523a311ba795077d95ad6912
Mark Wielaard 75fe92
Author: Andreas Arnez <arnez@linux.ibm.com>
Mark Wielaard 75fe92
Date:   Fri Mar 26 19:27:47 2021 +0100
Mark Wielaard 75fe92
Mark Wielaard 75fe92
    s390x: Rework insn "v-vdup" and add "v-vrep"
Mark Wielaard 75fe92
    
Mark Wielaard 75fe92
    So far the only s390x insn for filling a vector with copies of the same
Mark Wielaard 75fe92
    element is "v-vdup" (S390_VEC_DUPLICATE), which replicates the first
Mark Wielaard 75fe92
    element of its vector argument.  This is fairly restrictive and can lead
Mark Wielaard 75fe92
    to unnecessarily long code sequences.
Mark Wielaard 75fe92
    
Mark Wielaard 75fe92
    Redefine "v-vdup" to replicate any scalar value instead.  And add
Mark Wielaard 75fe92
    "v-vrep" (S390_INSN_VEC_REPLICATE) for replicating any given element of a
Mark Wielaard 75fe92
    vector.  Select the latter for suitable expressions like
Mark Wielaard 75fe92
    
Mark Wielaard 75fe92
      Iop_Dup8x16(Iop_GetElem8x16(vector_expr, i))
Mark Wielaard 75fe92
    
Mark Wielaard 75fe92
    This improves the generated code for some vector string instructions,
Mark Wielaard 75fe92
    where a lot of element replications are performed.
Mark Wielaard 75fe92
Mark Wielaard 75fe92
diff --git a/VEX/priv/host_s390_defs.c b/VEX/priv/host_s390_defs.c
Mark Wielaard 75fe92
index 2587f81a1..c764d6ef9 100644
Mark Wielaard 75fe92
--- a/VEX/priv/host_s390_defs.c
Mark Wielaard 75fe92
+++ b/VEX/priv/host_s390_defs.c
Mark Wielaard 75fe92
@@ -670,6 +670,14 @@ directReload_S390(HInstr* i, HReg vreg, Short spill_off)
Mark Wielaard 75fe92
                             insn->variant.unop.dst, vreg_opnd);
Mark Wielaard 75fe92
    }
Mark Wielaard 75fe92
 
Mark Wielaard 75fe92
+   /* v-vrep <reg>,<vreg>,<idx> */
Mark Wielaard 75fe92
+   if (insn->tag == S390_INSN_VEC_REPLICATE
Mark Wielaard 75fe92
+       && sameHReg(insn->variant.vec_replicate.op1, vreg)) {
Mark Wielaard 75fe92
+      vreg_am->d += insn->size * insn->variant.vec_replicate.idx;
Mark Wielaard 75fe92
+      return s390_insn_unop(insn->size, S390_VEC_DUPLICATE,
Mark Wielaard 75fe92
+                            insn->variant.vec_replicate.dst, vreg_opnd);
Mark Wielaard 75fe92
+   }
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
 no_match:
Mark Wielaard 75fe92
    return NULL;
Mark Wielaard 75fe92
 }
Mark Wielaard 75fe92
@@ -1050,6 +1058,11 @@ s390_insn_get_reg_usage(HRegUsage *u, const s390_insn *insn)
Mark Wielaard 75fe92
       addHRegUse(u, HRmRead, insn->variant.vec_triop.op3);
Mark Wielaard 75fe92
       break;
Mark Wielaard 75fe92
 
Mark Wielaard 75fe92
+   case S390_INSN_VEC_REPLICATE:
Mark Wielaard 75fe92
+      addHRegUse(u, HRmWrite, insn->variant.vec_replicate.dst);
Mark Wielaard 75fe92
+      addHRegUse(u, HRmRead, insn->variant.vec_replicate.op1);
Mark Wielaard 75fe92
+      break;
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
    default:
Mark Wielaard 75fe92
       vpanic("s390_insn_get_reg_usage");
Mark Wielaard 75fe92
    }
Mark Wielaard 75fe92
@@ -1433,6 +1446,14 @@ s390_insn_map_regs(HRegRemap *m, s390_insn *insn)
Mark Wielaard 75fe92
       insn->variant.vec_triop.op3 =
Mark Wielaard 75fe92
          lookupHRegRemap(m, insn->variant.vec_triop.op3);
Mark Wielaard 75fe92
       break;
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
+   case S390_INSN_VEC_REPLICATE:
Mark Wielaard 75fe92
+      insn->variant.vec_replicate.dst =
Mark Wielaard 75fe92
+         lookupHRegRemap(m, insn->variant.vec_replicate.dst);
Mark Wielaard 75fe92
+      insn->variant.vec_replicate.op1 =
Mark Wielaard 75fe92
+         lookupHRegRemap(m, insn->variant.vec_replicate.op1);
Mark Wielaard 75fe92
+      break;
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
    default:
Mark Wielaard 75fe92
       vpanic("s390_insn_map_regs");
Mark Wielaard 75fe92
    }
Mark Wielaard 75fe92
@@ -1767,7 +1788,39 @@ emit_VRI_VI(UChar *p, ULong op, UChar v1, UShort i2)
Mark Wielaard 75fe92
 
Mark Wielaard 75fe92
 
Mark Wielaard 75fe92
 static UChar *
Mark Wielaard 75fe92
-emit_VRX(UChar *p, ULong op, UChar v1, UChar x2, UChar b2, UShort d2)
Mark Wielaard 75fe92
+emit_VRI_VIM(UChar *p, ULong op, UChar v1, UShort i2, UChar m3)
Mark Wielaard 75fe92
+{
Mark Wielaard 75fe92
+   ULong the_insn = op;
Mark Wielaard 75fe92
+   ULong rxb = s390_update_rxb(0, 1, &v1;;
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
+   the_insn |= ((ULong)v1) << 36;
Mark Wielaard 75fe92
+   the_insn |= ((ULong)i2) << 16;
Mark Wielaard 75fe92
+   the_insn |= ((ULong)m3) << 12;
Mark Wielaard 75fe92
+   the_insn |= ((ULong)rxb)<< 8;
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
+   return emit_6bytes(p, the_insn);
Mark Wielaard 75fe92
+}
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
+static UChar *
Mark Wielaard 75fe92
+emit_VRI_VVMM(UChar *p, ULong op, UChar v1, UChar v3, UShort i2, UChar m4)
Mark Wielaard 75fe92
+{
Mark Wielaard 75fe92
+   ULong the_insn = op;
Mark Wielaard 75fe92
+   ULong rxb = s390_update_rxb(0, 1, &v1;;
Mark Wielaard 75fe92
+   rxb = s390_update_rxb(rxb, 2, &v3;;
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
+   the_insn |= ((ULong)v1) << 36;
Mark Wielaard 75fe92
+   the_insn |= ((ULong)v3) << 32;
Mark Wielaard 75fe92
+   the_insn |= ((ULong)i2) << 16;
Mark Wielaard 75fe92
+   the_insn |= ((ULong)m4) << 12;
Mark Wielaard 75fe92
+   the_insn |= ((ULong)rxb) << 8;
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
+   return emit_6bytes(p, the_insn);
Mark Wielaard 75fe92
+}
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
+static UChar *
Mark Wielaard 75fe92
+emit_VRX(UChar *p, ULong op, UChar v1, UChar x2, UChar b2, UShort d2, UChar m3)
Mark Wielaard 75fe92
 {
Mark Wielaard 75fe92
    ULong the_insn = op;
Mark Wielaard 75fe92
    ULong rxb = s390_update_rxb(0, 1, &v1;;
Mark Wielaard 75fe92
@@ -1776,6 +1829,7 @@ emit_VRX(UChar *p, ULong op, UChar v1, UChar x2, UChar b2, UShort d2)
Mark Wielaard 75fe92
    the_insn |= ((ULong)x2) << 32;
Mark Wielaard 75fe92
    the_insn |= ((ULong)b2) << 28;
Mark Wielaard 75fe92
    the_insn |= ((ULong)d2) << 16;
Mark Wielaard 75fe92
+   the_insn |= ((ULong)m3) << 12;
Mark Wielaard 75fe92
    the_insn |= ((ULong)rxb)<< 8;
Mark Wielaard 75fe92
 
Mark Wielaard 75fe92
    return emit_6bytes(p, the_insn);
Mark Wielaard 75fe92
@@ -5782,7 +5836,7 @@ s390_emit_VL(UChar *p, UChar v1, UChar x2, UChar b2, UShort d2)
Mark Wielaard 75fe92
    if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
Mark Wielaard 75fe92
       s390_disasm(ENC3(MNM, VR, UDXB), "vl", v1, d2, x2, b2);
Mark Wielaard 75fe92
 
Mark Wielaard 75fe92
-   return emit_VRX(p, 0xE70000000006ULL, v1, x2, b2, d2);
Mark Wielaard 75fe92
+   return emit_VRX(p, 0xE70000000006ULL, v1, x2, b2, d2, 0);
Mark Wielaard 75fe92
 }
Mark Wielaard 75fe92
 
Mark Wielaard 75fe92
 static UChar *
Mark Wielaard 75fe92
@@ -5795,13 +5849,23 @@ s390_emit_VLR(UChar *p, UChar v1, UChar v2)
Mark Wielaard 75fe92
 }
Mark Wielaard 75fe92
 
Mark Wielaard 75fe92
 
Mark Wielaard 75fe92
+static UChar *
Mark Wielaard 75fe92
+s390_emit_VLREP(UChar *p, UChar v1, UChar x2, UChar b2, UShort d2, UShort m3)
Mark Wielaard 75fe92
+{
Mark Wielaard 75fe92
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
Mark Wielaard 75fe92
+      s390_disasm(ENC4(MNM, VR, UDXB, UINT), "vlrep", v1, d2, x2, b2, m3);
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
+   return emit_VRX(p, 0xE70000000005ULL, v1, x2, b2, d2, m3);
Mark Wielaard 75fe92
+}
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
 static UChar *
Mark Wielaard 75fe92
 s390_emit_VST(UChar *p, UChar v1, UChar x2, UChar b2, UShort d2)
Mark Wielaard 75fe92
 {
Mark Wielaard 75fe92
    if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
Mark Wielaard 75fe92
       s390_disasm(ENC3(MNM, VR, UDXB), "vst", v1, d2, x2, b2);
Mark Wielaard 75fe92
 
Mark Wielaard 75fe92
-   return emit_VRX(p, 0xE7000000000eULL, v1, x2, b2, d2);
Mark Wielaard 75fe92
+   return emit_VRX(p, 0xE7000000000eULL, v1, x2, b2, d2, 0);
Mark Wielaard 75fe92
 }
Mark Wielaard 75fe92
 
Mark Wielaard 75fe92
 
Mark Wielaard 75fe92
@@ -5912,15 +5976,24 @@ s390_emit_VPKLS(UChar *p, UChar v1, UChar v2, UChar v3, UChar m4)
Mark Wielaard 75fe92
 
Mark Wielaard 75fe92
 
Mark Wielaard 75fe92
 static UChar *
Mark Wielaard 75fe92
-s390_emit_VREP(UChar *p, UChar v1, UChar v3, UChar m3)
Mark Wielaard 75fe92
+s390_emit_VREP(UChar *p, UChar v1, UChar v3, UShort i2, UChar m4)
Mark Wielaard 75fe92
 {
Mark Wielaard 75fe92
    if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
Mark Wielaard 75fe92
-      s390_disasm(ENC5(MNM, VR, VR, UINT, UINT), "vrep", v1, v3, 0, m3);
Mark Wielaard 75fe92
+      s390_disasm(ENC5(MNM, VR, VR, UINT, UINT), "vrep", v1, v3, i2, m4);
Mark Wielaard 75fe92
 
Mark Wielaard 75fe92
-   return emit_VRR_VVM(p, 0xE7000000004DULL, v1, v3, m3);
Mark Wielaard 75fe92
+   return emit_VRI_VVMM(p, 0xE7000000004DULL, v1, v3, i2, m4);
Mark Wielaard 75fe92
 }
Mark Wielaard 75fe92
 
Mark Wielaard 75fe92
 
Mark Wielaard 75fe92
+static UChar *
Mark Wielaard 75fe92
+s390_emit_VREPI(UChar *p, UChar v1, UShort i2, UChar m3)
Mark Wielaard 75fe92
+{
Mark Wielaard 75fe92
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
Mark Wielaard 75fe92
+      s390_disasm(ENC4(MNM, VR, UINT, UINT), "vrepi", v1, i2, m3);
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
+   return emit_VRI_VIM(p, 0xE70000000045ULL, v1, i2, m3);
Mark Wielaard 75fe92
+}
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
 
Mark Wielaard 75fe92
 static UChar *
Mark Wielaard 75fe92
 s390_emit_VUPH(UChar *p, UChar v1, UChar v3, UChar m3)
Mark Wielaard 75fe92
@@ -7560,6 +7633,20 @@ s390_insn *s390_insn_vec_triop(UChar size, s390_vec_triop_t tag, HReg dst,
Mark Wielaard 75fe92
    return insn;
Mark Wielaard 75fe92
 }
Mark Wielaard 75fe92
 
Mark Wielaard 75fe92
+s390_insn *s390_insn_vec_replicate(UChar size, HReg dst, HReg op1,
Mark Wielaard 75fe92
+                                   UChar idx)
Mark Wielaard 75fe92
+{
Mark Wielaard 75fe92
+   s390_insn *insn = LibVEX_Alloc_inline(sizeof(s390_insn));
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
+   insn->tag  = S390_INSN_VEC_REPLICATE;
Mark Wielaard 75fe92
+   insn->size = size;
Mark Wielaard 75fe92
+   insn->variant.vec_replicate.dst = dst;
Mark Wielaard 75fe92
+   insn->variant.vec_replicate.op1 = op1;
Mark Wielaard 75fe92
+   insn->variant.vec_replicate.idx = idx;
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
+   return insn;
Mark Wielaard 75fe92
+}
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
 /*---------------------------------------------------------------*/
Mark Wielaard 75fe92
 /*--- Debug print                                             ---*/
Mark Wielaard 75fe92
 /*---------------------------------------------------------------*/
Mark Wielaard 75fe92
@@ -8284,6 +8371,13 @@ s390_insn_as_string(const s390_insn *insn)
Mark Wielaard 75fe92
                    insn->variant.vec_triop.op3);
Mark Wielaard 75fe92
       break;
Mark Wielaard 75fe92
 
Mark Wielaard 75fe92
+   case S390_INSN_VEC_REPLICATE:
Mark Wielaard 75fe92
+      s390_sprintf(buf, "%M %R, %R, %I", "v-vrep",
Mark Wielaard 75fe92
+                   insn->variant.vec_replicate.dst,
Mark Wielaard 75fe92
+                   insn->variant.vec_replicate.op1,
Mark Wielaard 75fe92
+                   insn->variant.vec_replicate.idx);
Mark Wielaard 75fe92
+      break;
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
    default: goto fail;
Mark Wielaard 75fe92
    }
Mark Wielaard 75fe92
 
Mark Wielaard 75fe92
@@ -9386,6 +9480,56 @@ s390_negate_emit(UChar *buf, const s390_insn *insn)
Mark Wielaard 75fe92
 }
Mark Wielaard 75fe92
 
Mark Wielaard 75fe92
 
Mark Wielaard 75fe92
+static UChar *
Mark Wielaard 75fe92
+s390_vec_duplicate_emit(UChar *buf, const s390_insn *insn)
Mark Wielaard 75fe92
+{
Mark Wielaard 75fe92
+   UChar v1 = hregNumber(insn->variant.unop.dst);
Mark Wielaard 75fe92
+   s390_opnd_RMI opnd = insn->variant.unop.src;
Mark Wielaard 75fe92
+   UChar r2;
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
+   switch (opnd.tag) {
Mark Wielaard 75fe92
+   case S390_OPND_AMODE: {
Mark Wielaard 75fe92
+      s390_amode* am = opnd.variant.am;
Mark Wielaard 75fe92
+      UInt b = hregNumber(am->b);
Mark Wielaard 75fe92
+      UInt x = hregNumber(am->x);
Mark Wielaard 75fe92
+      UInt d = am->d;
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
+      if (fits_unsigned_12bit(d)) {
Mark Wielaard 75fe92
+         return s390_emit_VLREP(buf, v1, x, b, d,
Mark Wielaard 75fe92
+                                s390_getM_from_size(insn->size));
Mark Wielaard 75fe92
+      }
Mark Wielaard 75fe92
+      buf = s390_emit_load_mem(buf, insn->size, R0, am);
Mark Wielaard 75fe92
+      r2 = R0;
Mark Wielaard 75fe92
+      goto duplicate_from_gpr;
Mark Wielaard 75fe92
+   }
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
+   case S390_OPND_IMMEDIATE: {
Mark Wielaard 75fe92
+      ULong val = opnd.variant.imm;
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
+      if (ulong_fits_signed_16bit(val)) {
Mark Wielaard 75fe92
+         return s390_emit_VREPI(buf, v1, val, s390_getM_from_size(insn->size));
Mark Wielaard 75fe92
+      }
Mark Wielaard 75fe92
+      buf = s390_emit_load_64imm(buf, R0, val);
Mark Wielaard 75fe92
+      r2 = R0;
Mark Wielaard 75fe92
+      goto duplicate_from_gpr;
Mark Wielaard 75fe92
+   }
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
+   case S390_OPND_REG:
Mark Wielaard 75fe92
+      r2 = hregNumber(opnd.variant.reg);
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
+   duplicate_from_gpr:
Mark Wielaard 75fe92
+      buf = s390_emit_VLVGP(buf, v1, r2, r2);
Mark Wielaard 75fe92
+      if (insn->size != 8) {
Mark Wielaard 75fe92
+         buf = s390_emit_VREP(buf, v1, v1, 8 / insn->size - 1,
Mark Wielaard 75fe92
+                              s390_getM_from_size(insn->size));
Mark Wielaard 75fe92
+      }
Mark Wielaard 75fe92
+      return buf;
Mark Wielaard 75fe92
+   }
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
+   vpanic("s390_vec_duplicate_emit");
Mark Wielaard 75fe92
+}
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
 static UChar *
Mark Wielaard 75fe92
 s390_insn_unop_emit(UChar *buf, const s390_insn *insn)
Mark Wielaard 75fe92
 {
Mark Wielaard 75fe92
@@ -9405,12 +9549,7 @@ s390_insn_unop_emit(UChar *buf, const s390_insn *insn)
Mark Wielaard 75fe92
       UShort i2 = insn->variant.unop.src.variant.imm;
Mark Wielaard 75fe92
       return s390_emit_VGBM(buf, v1, i2);
Mark Wielaard 75fe92
       }
Mark Wielaard 75fe92
-   case S390_VEC_DUPLICATE: {
Mark Wielaard 75fe92
-      vassert(insn->variant.unop.src.tag == S390_OPND_REG);
Mark Wielaard 75fe92
-      UChar v1 = hregNumber(insn->variant.unop.dst);
Mark Wielaard 75fe92
-      UChar v2 = hregNumber(insn->variant.unop.src.variant.reg);
Mark Wielaard 75fe92
-      return s390_emit_VREP(buf, v1, v2, s390_getM_from_size(insn->size));
Mark Wielaard 75fe92
-      }
Mark Wielaard 75fe92
+   case S390_VEC_DUPLICATE:  return s390_vec_duplicate_emit(buf, insn);
Mark Wielaard 75fe92
    case S390_VEC_UNPACKLOWS: {
Mark Wielaard 75fe92
       vassert(insn->variant.unop.src.tag == S390_OPND_REG);
Mark Wielaard 75fe92
       vassert(insn->size < 8);
Mark Wielaard 75fe92
@@ -11595,6 +11734,16 @@ s390_insn_vec_triop_emit(UChar *buf, const s390_insn *insn)
Mark Wielaard 75fe92
 }
Mark Wielaard 75fe92
 
Mark Wielaard 75fe92
 
Mark Wielaard 75fe92
+static UChar *
Mark Wielaard 75fe92
+s390_insn_vec_replicate_emit(UChar *buf, const s390_insn *insn)
Mark Wielaard 75fe92
+{
Mark Wielaard 75fe92
+   UChar v1 = hregNumber(insn->variant.vec_replicate.dst);
Mark Wielaard 75fe92
+   UChar v2 = hregNumber(insn->variant.vec_replicate.op1);
Mark Wielaard 75fe92
+   UShort idx = (UShort) insn->variant.vec_replicate.idx;
Mark Wielaard 75fe92
+   return s390_emit_VREP(buf, v1, v2, idx, s390_getM_from_size(insn->size));
Mark Wielaard 75fe92
+}
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
 Int
Mark Wielaard 75fe92
 emit_S390Instr(Bool *is_profinc, UChar *buf, Int nbuf, const s390_insn *insn,
Mark Wielaard 75fe92
                Bool mode64, VexEndness endness_host,
Mark Wielaard 75fe92
@@ -11791,6 +11940,11 @@ emit_S390Instr(Bool *is_profinc, UChar *buf, Int nbuf, const s390_insn *insn,
Mark Wielaard 75fe92
    case S390_INSN_VEC_TRIOP:
Mark Wielaard 75fe92
       end = s390_insn_vec_triop_emit(buf, insn);
Mark Wielaard 75fe92
       break;
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
+   case S390_INSN_VEC_REPLICATE:
Mark Wielaard 75fe92
+      end = s390_insn_vec_replicate_emit(buf, insn);
Mark Wielaard 75fe92
+      break;
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
    fail:
Mark Wielaard 75fe92
    default:
Mark Wielaard 75fe92
       vpanic("emit_S390Instr");
Mark Wielaard 75fe92
diff --git a/VEX/priv/host_s390_defs.h b/VEX/priv/host_s390_defs.h
Mark Wielaard 75fe92
index 9b69f4d38..063fd3800 100644
Mark Wielaard 75fe92
--- a/VEX/priv/host_s390_defs.h
Mark Wielaard 75fe92
+++ b/VEX/priv/host_s390_defs.h
Mark Wielaard 75fe92
@@ -166,7 +166,8 @@ typedef enum {
Mark Wielaard 75fe92
    S390_INSN_VEC_AMODEINTOP,
Mark Wielaard 75fe92
    S390_INSN_VEC_UNOP,
Mark Wielaard 75fe92
    S390_INSN_VEC_BINOP,
Mark Wielaard 75fe92
-   S390_INSN_VEC_TRIOP
Mark Wielaard 75fe92
+   S390_INSN_VEC_TRIOP,
Mark Wielaard 75fe92
+   S390_INSN_VEC_REPLICATE
Mark Wielaard 75fe92
 } s390_insn_tag;
Mark Wielaard 75fe92
 
Mark Wielaard 75fe92
 
Mark Wielaard 75fe92
@@ -738,6 +739,11 @@ typedef struct {
Mark Wielaard 75fe92
          HReg          op2;    /* 128-bit second operand */
Mark Wielaard 75fe92
          HReg          op3;    /* 128-bit third operand */
Mark Wielaard 75fe92
       } vec_triop;
Mark Wielaard 75fe92
+      struct {
Mark Wielaard 75fe92
+         HReg          dst;    /* 128-bit result */
Mark Wielaard 75fe92
+         HReg          op1;    /* 128-bit first operand */
Mark Wielaard 75fe92
+         UChar         idx;    /* index of element to replicate */
Mark Wielaard 75fe92
+      } vec_replicate;
Mark Wielaard 75fe92
    } variant;
Mark Wielaard 75fe92
 } s390_insn;
Mark Wielaard 75fe92
 
Mark Wielaard 75fe92
@@ -853,6 +859,7 @@ s390_insn *s390_insn_vec_binop(UChar size, s390_vec_binop_t, HReg dst, HReg op1,
Mark Wielaard 75fe92
                                HReg op2);
Mark Wielaard 75fe92
 s390_insn *s390_insn_vec_triop(UChar size, s390_vec_triop_t, HReg dst, HReg op1,
Mark Wielaard 75fe92
                                HReg op2, HReg op3);
Mark Wielaard 75fe92
+s390_insn *s390_insn_vec_replicate(UChar size, HReg dst, HReg op1, UChar idx);
Mark Wielaard 75fe92
 
Mark Wielaard 75fe92
 const HChar *s390_insn_as_string(const s390_insn *);
Mark Wielaard 75fe92
 
Mark Wielaard 75fe92
diff --git a/VEX/priv/host_s390_isel.c b/VEX/priv/host_s390_isel.c
Mark Wielaard 75fe92
index ceca6836e..968122596 100644
Mark Wielaard 75fe92
--- a/VEX/priv/host_s390_isel.c
Mark Wielaard 75fe92
+++ b/VEX/priv/host_s390_isel.c
Mark Wielaard 75fe92
@@ -3778,12 +3778,12 @@ s390_isel_vec_expr_wrk(ISelEnv *env, IRExpr *expr)
Mark Wielaard 75fe92
    }
Mark Wielaard 75fe92
    /* --------- UNARY OP --------- */
Mark Wielaard 75fe92
    case Iex_Unop: {
Mark Wielaard 75fe92
-      UChar size_for_int_arg = 0;
Mark Wielaard 75fe92
       HReg dst = INVALID_HREG;
Mark Wielaard 75fe92
       HReg reg1 = INVALID_HREG;
Mark Wielaard 75fe92
       s390_unop_t vec_unop = S390_UNOP_T_INVALID;
Mark Wielaard 75fe92
       s390_vec_binop_t vec_binop = S390_VEC_BINOP_T_INVALID;
Mark Wielaard 75fe92
       IROp op = expr->Iex.Unop.op;
Mark Wielaard 75fe92
+      IROp arg_op = Iop_INVALID;
Mark Wielaard 75fe92
       IRExpr* arg = expr->Iex.Unop.arg;
Mark Wielaard 75fe92
       switch(op) {
Mark Wielaard 75fe92
       case Iop_NotV128:
Mark Wielaard 75fe92
@@ -3839,59 +3839,63 @@ s390_isel_vec_expr_wrk(ISelEnv *env, IRExpr *expr)
Mark Wielaard 75fe92
       }
Mark Wielaard 75fe92
 
Mark Wielaard 75fe92
       case Iop_Dup8x16:
Mark Wielaard 75fe92
-         size = size_for_int_arg = 1;
Mark Wielaard 75fe92
-         vec_unop = S390_VEC_DUPLICATE;
Mark Wielaard 75fe92
-         goto Iop_V_int_wrk;
Mark Wielaard 75fe92
+         size = 1;
Mark Wielaard 75fe92
+         arg_op = Iop_GetElem8x16;
Mark Wielaard 75fe92
+         goto Iop_V_dup_wrk;
Mark Wielaard 75fe92
       case Iop_Dup16x8:
Mark Wielaard 75fe92
-         size = size_for_int_arg = 2;
Mark Wielaard 75fe92
-         vec_unop = S390_VEC_DUPLICATE;
Mark Wielaard 75fe92
-         goto Iop_V_int_wrk;
Mark Wielaard 75fe92
+         size = 2;
Mark Wielaard 75fe92
+         arg_op = Iop_GetElem16x8;
Mark Wielaard 75fe92
+         goto Iop_V_dup_wrk;
Mark Wielaard 75fe92
       case Iop_Dup32x4:
Mark Wielaard 75fe92
-         size = size_for_int_arg = 4;
Mark Wielaard 75fe92
-         vec_unop = S390_VEC_DUPLICATE;
Mark Wielaard 75fe92
-         goto Iop_V_int_wrk;
Mark Wielaard 75fe92
+         size = 4;
Mark Wielaard 75fe92
+         arg_op = Iop_GetElem32x4;
Mark Wielaard 75fe92
+         goto Iop_V_dup_wrk;
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
+      Iop_V_dup_wrk: {
Mark Wielaard 75fe92
+         dst = newVRegV(env);
Mark Wielaard 75fe92
+         if (arg->tag == Iex_Binop && arg->Iex.Binop.op == arg_op &&
Mark Wielaard 75fe92
+             arg->Iex.Binop.arg2->tag == Iex_Const) {
Mark Wielaard 75fe92
+            ULong idx;
Mark Wielaard 75fe92
+            idx = get_const_value_as_ulong(arg->Iex.Binop.arg2-> Iex.Const.con);
Mark Wielaard 75fe92
+            reg1 = s390_isel_vec_expr(env, arg->Iex.Binop.arg1);
Mark Wielaard 75fe92
+            addInstr(env, s390_insn_vec_replicate(size, dst, reg1, (UChar)idx));
Mark Wielaard 75fe92
+         } else {
Mark Wielaard 75fe92
+            s390_opnd_RMI src = s390_isel_int_expr_RMI(env, arg);
Mark Wielaard 75fe92
+            addInstr(env, s390_insn_unop(size, S390_VEC_DUPLICATE, dst, src));
Mark Wielaard 75fe92
+         }
Mark Wielaard 75fe92
+         return dst;
Mark Wielaard 75fe92
+      }
Mark Wielaard 75fe92
 
Mark Wielaard 75fe92
       case Iop_Widen8Sto16x8:
Mark Wielaard 75fe92
          size = 1;
Mark Wielaard 75fe92
-         size_for_int_arg = 8;
Mark Wielaard 75fe92
          vec_unop = S390_VEC_UNPACKLOWS;
Mark Wielaard 75fe92
-         goto Iop_V_int_wrk;
Mark Wielaard 75fe92
+         goto Iop_V_widen_wrk;
Mark Wielaard 75fe92
       case Iop_Widen16Sto32x4:
Mark Wielaard 75fe92
          size = 2;
Mark Wielaard 75fe92
-         size_for_int_arg = 8;
Mark Wielaard 75fe92
          vec_unop = S390_VEC_UNPACKLOWS;
Mark Wielaard 75fe92
-         goto Iop_V_int_wrk;
Mark Wielaard 75fe92
+         goto Iop_V_widen_wrk;
Mark Wielaard 75fe92
       case Iop_Widen32Sto64x2:
Mark Wielaard 75fe92
          size = 4;
Mark Wielaard 75fe92
-         size_for_int_arg = 8;
Mark Wielaard 75fe92
          vec_unop = S390_VEC_UNPACKLOWS;
Mark Wielaard 75fe92
-         goto Iop_V_int_wrk;
Mark Wielaard 75fe92
+         goto Iop_V_widen_wrk;
Mark Wielaard 75fe92
       case Iop_Widen8Uto16x8:
Mark Wielaard 75fe92
          size = 1;
Mark Wielaard 75fe92
-         size_for_int_arg = 8;
Mark Wielaard 75fe92
          vec_unop = S390_VEC_UNPACKLOWU;
Mark Wielaard 75fe92
-         goto Iop_V_int_wrk;
Mark Wielaard 75fe92
+         goto Iop_V_widen_wrk;
Mark Wielaard 75fe92
       case Iop_Widen16Uto32x4:
Mark Wielaard 75fe92
          size = 2;
Mark Wielaard 75fe92
-         size_for_int_arg = 8;
Mark Wielaard 75fe92
          vec_unop = S390_VEC_UNPACKLOWU;
Mark Wielaard 75fe92
-         goto Iop_V_int_wrk;
Mark Wielaard 75fe92
+         goto Iop_V_widen_wrk;
Mark Wielaard 75fe92
       case Iop_Widen32Uto64x2:
Mark Wielaard 75fe92
          size = 4;
Mark Wielaard 75fe92
-         size_for_int_arg = 8;
Mark Wielaard 75fe92
          vec_unop = S390_VEC_UNPACKLOWU;
Mark Wielaard 75fe92
-         goto Iop_V_int_wrk;
Mark Wielaard 75fe92
-
Mark Wielaard 75fe92
-      Iop_V_int_wrk: {
Mark Wielaard 75fe92
-         HReg vr1 = vec_generate_zeroes(env);
Mark Wielaard 75fe92
-         s390_amode* amode2 = s390_isel_amode(env, IRExpr_Const(IRConst_U64(0)));
Mark Wielaard 75fe92
-         reg1 = s390_isel_int_expr(env, arg);
Mark Wielaard 75fe92
+         goto Iop_V_widen_wrk;
Mark Wielaard 75fe92
 
Mark Wielaard 75fe92
+      Iop_V_widen_wrk: {
Mark Wielaard 75fe92
          vassert(vec_unop != S390_UNOP_T_INVALID);
Mark Wielaard 75fe92
-         addInstr(env,
Mark Wielaard 75fe92
-                  s390_insn_vec_amodeintop(size_for_int_arg, S390_VEC_SET_ELEM,
Mark Wielaard 75fe92
-                                           vr1, amode2, reg1));
Mark Wielaard 75fe92
-
Mark Wielaard 75fe92
+         s390_opnd_RMI src = s390_isel_int_expr_RMI(env, arg);
Mark Wielaard 75fe92
+         HReg vr1 = newVRegV(env);
Mark Wielaard 75fe92
+         addInstr(env, s390_insn_unop(8, S390_VEC_DUPLICATE, vr1, src));
Mark Wielaard 75fe92
          dst = newVRegV(env);
Mark Wielaard 75fe92
          addInstr(env, s390_insn_unop(size, vec_unop, dst, s390_opnd_reg(vr1)));
Mark Wielaard 75fe92
          return dst;
Mark Wielaard 75fe92
Mark Wielaard 75fe92
commit 6c1cb1a0128b00858b973ef9344e12d6ddbaaf57
Mark Wielaard 75fe92
Author: Andreas Arnez <arnez@linux.ibm.com>
Mark Wielaard 75fe92
Date:   Thu Mar 25 18:48:07 2021 +0100
Mark Wielaard 75fe92
Mark Wielaard 75fe92
    s390x: Add support for emitting "vector or with complement"
Mark Wielaard 75fe92
    
Mark Wielaard 75fe92
    In the instruction selector, look out for IR expressions that fit "vector
Mark Wielaard 75fe92
    or with complement (VOC)".  Emit when applicable.
Mark Wielaard 75fe92
    
Mark Wielaard 75fe92
    This slighly reduces the generated code sometimes, such as for certain
Mark Wielaard 75fe92
    vector string instructions, where such expressions occur quite frequently.
Mark Wielaard 75fe92
Mark Wielaard 75fe92
diff --git a/VEX/priv/host_s390_defs.c b/VEX/priv/host_s390_defs.c
Mark Wielaard 75fe92
index c764d6ef9..239d9d299 100644
Mark Wielaard 75fe92
--- a/VEX/priv/host_s390_defs.c
Mark Wielaard 75fe92
+++ b/VEX/priv/host_s390_defs.c
Mark Wielaard 75fe92
@@ -5907,6 +5907,15 @@ s390_emit_VO(UChar *p, UChar v1, UChar v2, UChar v3)
Mark Wielaard 75fe92
    return emit_VRR_VVV(p, 0xE7000000006aULL, v1, v2, v3);
Mark Wielaard 75fe92
 }
Mark Wielaard 75fe92
 
Mark Wielaard 75fe92
+static UChar *
Mark Wielaard 75fe92
+s390_emit_VOC(UChar *p, UChar v1, UChar v2, UChar v3)
Mark Wielaard 75fe92
+{
Mark Wielaard 75fe92
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
Mark Wielaard 75fe92
+      s390_disasm(ENC4(MNM, VR, VR, VR), "voc", v1, v2, v3);
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
+   return emit_VRR_VVV(p, 0xE7000000006fULL, v1, v2, v3);
Mark Wielaard 75fe92
+}
Mark Wielaard 75fe92
+
Mark Wielaard 75fe92
 static UChar *
Mark Wielaard 75fe92
 s390_emit_VX(UChar *p, UChar v1, UChar v2, UChar v3)
Mark Wielaard 75fe92
 {
Mark Wielaard 75fe92
@@ -8312,6 +8321,7 @@ s390_insn_as_string(const s390_insn *insn)
Mark Wielaard 75fe92
       case S390_VEC_PACK_SATURU:    op = "v-vpacksaturu"; break;
Mark Wielaard 75fe92
       case S390_VEC_COMPARE_EQUAL:  op = "v-vcmpeq"; break;
Mark Wielaard 75fe92
       case S390_VEC_OR:             op = "v-vor"; break;
Mark Wielaard 75fe92
+      case S390_VEC_ORC:            op = "v-vorc"; break;
Mark Wielaard 75fe92
       case S390_VEC_XOR:            op = "v-vxor";  break;
Mark Wielaard 75fe92
       case S390_VEC_AND:            op = "v-vand"; break;
Mark Wielaard 75fe92
       case S390_VEC_MERGEL:         op = "v-vmergel"; break;
Mark Wielaard 75fe92
@@ -11609,6 +11619,8 @@ s390_insn_vec_binop_emit(UChar *buf, const s390_insn *insn)
Mark Wielaard 75fe92
          return s390_emit_VCEQ(buf, v1, v2, v3, s390_getM_from_size(size));
Mark Wielaard 75fe92
       case S390_VEC_OR:
Mark Wielaard 75fe92
          return s390_emit_VO(buf, v1, v2, v3);
Mark Wielaard 75fe92
+      case S390_VEC_ORC:
Mark Wielaard 75fe92
+         return s390_emit_VOC(buf, v1, v2, v3);
Mark Wielaard 75fe92
       case S390_VEC_XOR:
Mark Wielaard 75fe92
          return s390_emit_VX(buf, v1, v2, v3);
Mark Wielaard 75fe92
       case S390_VEC_AND:
Mark Wielaard 75fe92
diff --git a/VEX/priv/host_s390_defs.h b/VEX/priv/host_s390_defs.h
Mark Wielaard 75fe92
index 063fd3800..dc116106e 100644
Mark Wielaard 75fe92
--- a/VEX/priv/host_s390_defs.h
Mark Wielaard 75fe92
+++ b/VEX/priv/host_s390_defs.h
Mark Wielaard 75fe92
@@ -366,6 +366,7 @@ typedef enum {
Mark Wielaard 75fe92
    S390_VEC_PACK_SATURU,
Mark Wielaard 75fe92
    S390_VEC_COMPARE_EQUAL,
Mark Wielaard 75fe92
    S390_VEC_OR,
Mark Wielaard 75fe92
+   S390_VEC_ORC,
Mark Wielaard 75fe92
    S390_VEC_XOR,
Mark Wielaard 75fe92
    S390_VEC_AND,
Mark Wielaard 75fe92
    S390_VEC_MERGEL,
Mark Wielaard 75fe92
diff --git a/VEX/priv/host_s390_isel.c b/VEX/priv/host_s390_isel.c
Mark Wielaard 75fe92
index 968122596..53d76fe8a 100644
Mark Wielaard 75fe92
--- a/VEX/priv/host_s390_isel.c
Mark Wielaard 75fe92
+++ b/VEX/priv/host_s390_isel.c
Mark Wielaard 75fe92
@@ -4102,6 +4102,15 @@ s390_isel_vec_expr_wrk(ISelEnv *env, IRExpr *expr)
Mark Wielaard 75fe92
       case Iop_OrV128:
Mark Wielaard 75fe92
          size = 16;
Mark Wielaard 75fe92
          vec_binop = S390_VEC_OR;
Mark Wielaard 75fe92
+         if (arg1->tag == Iex_Unop && arg1->Iex.Unop.op == Iop_NotV128) {
Mark Wielaard 75fe92
+            IRExpr* orig_arg1 = arg1;
Mark Wielaard 75fe92
+            arg1 = arg2;
Mark Wielaard 75fe92
+            arg2 = orig_arg1->Iex.Unop.arg;
Mark Wielaard 75fe92
+            vec_binop = S390_VEC_ORC;
Mark Wielaard 75fe92
+         } else if (arg2->tag == Iex_Unop && arg2->Iex.Unop.op == Iop_NotV128) {
Mark Wielaard 75fe92
+            arg2 = arg2->Iex.Unop.arg;
Mark Wielaard 75fe92
+            vec_binop = S390_VEC_ORC;
Mark Wielaard 75fe92
+         }
Mark Wielaard 75fe92
          goto Iop_VV_wrk;
Mark Wielaard 75fe92
 
Mark Wielaard 75fe92
       case Iop_XorV128:
Mark Wielaard 75fe92
Mark Wielaard 75fe92
commit 0bd4263326b2d48f782339a9bbe1a069c7de45c7
Mark Wielaard 75fe92
Author: Andreas Arnez <arnez@linux.ibm.com>
Mark Wielaard 75fe92
Date:   Tue Mar 30 17:45:20 2021 +0200
Mark Wielaard 75fe92
Mark Wielaard 75fe92
    s390x: Fix/optimize Iop_64HLtoV128
Mark Wielaard 75fe92
    
Mark Wielaard 75fe92
    In s390_vr_fill() in guest_s390_toIR.c, filling a vector with two copies
Mark Wielaard 75fe92
    of a 64-bit value is realized with Iop_64HLtoV128, since there is no such
Mark Wielaard 75fe92
    operator as Iop_Dup64x2.  But the two args to Iop_64HLtoV128 use the same
Mark Wielaard 75fe92
    expression, referenced twice.  Although this hasn't been seen to cause
Mark Wielaard 75fe92
    real trouble yet, it's problematic and potentially inefficient, so change
Mark Wielaard 75fe92
    it: Assign to a temp and pass that twice instead.
Mark Wielaard 75fe92
    
Mark Wielaard 75fe92
    In the instruction selector, if Iop_64HLtoV128 is found to be used for a
Mark Wielaard 75fe92
    duplication as above, select "v-vdup" instead of "v-vinitfromgprs".  This
Mark Wielaard 75fe92
    mimicks the behavior we'd get if there actually was an operator
Mark Wielaard 75fe92
    Iop_Dup64x2.
Mark Wielaard 75fe92
Mark Wielaard 75fe92
diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c
Mark Wielaard 75fe92
index dfea54259..a73dcfb14 100644
Mark Wielaard 75fe92
--- a/VEX/priv/guest_s390_toIR.c
Mark Wielaard 75fe92
+++ b/VEX/priv/guest_s390_toIR.c
Mark Wielaard 75fe92
@@ -2299,9 +2299,12 @@ s390_vr_fill(UChar v1, IRExpr *o2)
Mark Wielaard 75fe92
    case Ity_I32:
Mark Wielaard 75fe92
       put_vr_qw(v1, unop(Iop_Dup32x4, o2));
Mark Wielaard 75fe92
       break;
Mark Wielaard 75fe92
-   case Ity_I64:
Mark Wielaard 75fe92
-      put_vr_qw(v1, binop(Iop_64HLtoV128, o2, o2));
Mark Wielaard 75fe92
+   case Ity_I64: {
Mark Wielaard 75fe92
+      IRTemp val = newTemp(Ity_I64);
Mark Wielaard 75fe92
+      assign(val, o2);
Mark Wielaard 75fe92
+      put_vr_qw(v1, binop(Iop_64HLtoV128, mkexpr(val), mkexpr(val)));
Mark Wielaard 75fe92
       break;
Mark Wielaard 75fe92
+   }
Mark Wielaard 75fe92
    default:
Mark Wielaard 75fe92
       ppIRType(o2type);
Mark Wielaard 75fe92
       vpanic("s390_vr_fill: invalid IRType");
Mark Wielaard 75fe92
diff --git a/VEX/priv/host_s390_isel.c b/VEX/priv/host_s390_isel.c
Mark Wielaard 75fe92
index 53d76fe8a..ee20c6711 100644
Mark Wielaard 75fe92
--- a/VEX/priv/host_s390_isel.c
Mark Wielaard 75fe92
+++ b/VEX/priv/host_s390_isel.c
Mark Wielaard 75fe92
@@ -4662,12 +4662,16 @@ s390_isel_vec_expr_wrk(ISelEnv *env, IRExpr *expr)
Mark Wielaard 75fe92
       }
Mark Wielaard 75fe92
 
Mark Wielaard 75fe92
       case Iop_64HLtoV128:
Mark Wielaard 75fe92
-         reg1 = s390_isel_int_expr(env, arg1);
Mark Wielaard 75fe92
-         reg2 = s390_isel_int_expr(env, arg2);
Mark Wielaard 75fe92
-
Mark Wielaard 75fe92
-         addInstr(env, s390_insn_vec_binop(size, S390_VEC_INIT_FROM_GPRS,
Mark Wielaard 75fe92
-                  dst, reg1, reg2));
Mark Wielaard 75fe92
-
Mark Wielaard 75fe92
+         if (arg1->tag == Iex_RdTmp && arg2->tag == Iex_RdTmp &&
Mark Wielaard 75fe92
+             arg1->Iex.RdTmp.tmp == arg2->Iex.RdTmp.tmp) {
Mark Wielaard 75fe92
+            s390_opnd_RMI src = s390_isel_int_expr_RMI(env, arg1);
Mark Wielaard 75fe92
+            addInstr(env, s390_insn_unop(8, S390_VEC_DUPLICATE, dst, src));
Mark Wielaard 75fe92
+         } else {
Mark Wielaard 75fe92
+            reg1 = s390_isel_int_expr(env, arg1);
Mark Wielaard 75fe92
+            reg2 = s390_isel_int_expr(env, arg2);
Mark Wielaard 75fe92
+            addInstr(env, s390_insn_vec_binop(size, S390_VEC_INIT_FROM_GPRS,
Mark Wielaard 75fe92
+                                              dst, reg1, reg2));
Mark Wielaard 75fe92
+         }
Mark Wielaard 75fe92
          return dst;
Mark Wielaard 75fe92
 
Mark Wielaard 75fe92
       default:
Mark Wielaard 75fe92
Mark Wielaard 75fe92
commit cae5062b05b95e0303b1122a0ea9aadc197e4f0a
Mark Wielaard 75fe92
Author: Andreas Arnez <arnez@linux.ibm.com>
Mark Wielaard 75fe92
Date:   Fri May 7 18:13:03 2021 +0200
Mark Wielaard 75fe92
Mark Wielaard 75fe92
    s390x: Add missing stdout.exp for vector string memcheck test
Mark Wielaard 75fe92
    
Mark Wielaard 75fe92
    The file vistr.stdout.exp was missing from commit 32312d588.  Add it.
Mark Wielaard 75fe92
Mark Wielaard 75fe92
diff --git a/memcheck/tests/s390x/vistr.stdout.exp b/memcheck/tests/s390x/vistr.stdout.exp
Mark Wielaard 75fe92
new file mode 100644
Mark Wielaard 75fe92
index 000000000..e69de29bb