diff --git a/.gitignore b/.gitignore index dcde9c4..8036c6c 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1 @@ -SOURCES/valgrind-3.16.0.tar.bz2 +SOURCES/valgrind-3.17.0.tar.bz2 diff --git a/.valgrind.metadata b/.valgrind.metadata index 364b313..c47f2af 100644 --- a/.valgrind.metadata +++ b/.valgrind.metadata @@ -1 +1 @@ -cf9fba00b597d9baa3f673cc0960b23a40473ff1 SOURCES/valgrind-3.16.0.tar.bz2 +7770912c7465f93a90c5a9d5c1b1b036ebec04fd SOURCES/valgrind-3.17.0.tar.bz2 diff --git a/SOURCES/valgrind-3.16.0-387-float.patch b/SOURCES/valgrind-3.16.0-387-float.patch deleted file mode 100644 index b64866e..0000000 --- a/SOURCES/valgrind-3.16.0-387-float.patch +++ /dev/null @@ -1,64 +0,0 @@ -commit 6aa4f7e7e76b40c183fb29650540d119ce1b4a4a -Author: Julian Seward -Date: Thu Jun 11 09:01:52 2020 +0200 - - expr_is_guardable, stmt_is_guardable, add_guarded_stmt_to_end_of: handle GetI/PutI cases. - - This fixes #422715. - -diff --git a/VEX/priv/guest_generic_bb_to_IR.c b/VEX/priv/guest_generic_bb_to_IR.c -index 2f204c5b0..0cee970e4 100644 ---- a/VEX/priv/guest_generic_bb_to_IR.c -+++ b/VEX/priv/guest_generic_bb_to_IR.c -@@ -425,6 +425,7 @@ static Bool expr_is_guardable ( const IRExpr* e ) - case Iex_ITE: - case Iex_CCall: - case Iex_Get: -+ case Iex_GetI: - case Iex_Const: - case Iex_RdTmp: - return True; -@@ -450,6 +451,7 @@ static Bool stmt_is_guardable ( const IRStmt* st ) - case Ist_NoOp: - case Ist_IMark: - case Ist_Put: -+ case Ist_PutI: - return True; - // These are definitely not guardable, or at least it's way too much - // hassle to do so. -@@ -506,7 +508,7 @@ static void add_guarded_stmt_to_end_of ( /*MOD*/IRSB* bb, - // Put(offs, e) ==> Put(offs, ITE(guard, e, Get(offs, sizeof(e)))) - // Which when flattened out is: - // t1 = Get(offs, sizeof(e)) -- // t2 = ITE(guard, e, t2) -+ // t2 = ITE(guard, e, t1) - // Put(offs, t2) - Int offset = st->Ist.Put.offset; - IRExpr* e = st->Ist.Put.data; -@@ -519,6 +521,26 @@ static void add_guarded_stmt_to_end_of ( /*MOD*/IRSB* bb, - addStmtToIRSB(bb, IRStmt_Put(offset, IRExpr_RdTmp(t2))); - break; - } -+ case Ist_PutI: { -+ // PutI(descr,ix,bias, e) ==> Put(descr,ix,bias, ITE(guard, e, GetI(descr,ix,bias))) -+ // Which when flattened out is: -+ // t1 = GetI(descr,ix,bias) -+ // t2 = ITE(guard, e, t1) -+ // PutI(descr,ix,bias, t2) -+ IRPutI* details = st->Ist.PutI.details; -+ IRRegArray* descr = details->descr; -+ IRExpr* ix = details->ix; -+ Int bias = details->bias; -+ IRExpr* e = details->data; -+ IRType ty = typeOfIRExpr(bb->tyenv, e); -+ IRTemp t1 = newIRTemp(bb->tyenv, ty); -+ IRTemp t2 = newIRTemp(bb->tyenv, ty); -+ addStmtToIRSB(bb, IRStmt_WrTmp(t1, IRExpr_GetI(descr,ix,bias))); -+ addStmtToIRSB(bb, IRStmt_WrTmp(t2, IRExpr_ITE(IRExpr_RdTmp(guard), -+ e, IRExpr_RdTmp(t1)))); -+ addStmtToIRSB(bb, IRStmt_PutI(mkIRPutI(descr,ix,bias, IRExpr_RdTmp(t2)))); -+ break; -+ } - case Ist_Exit: { - // Exit(xguard, dst, jk, offsIP) - // ==> t1 = And1(xguard, guard) diff --git a/SOURCES/valgrind-3.16.0-pkglibexecdir.patch b/SOURCES/valgrind-3.16.0-pkglibexecdir.patch deleted file mode 100644 index cd52729..0000000 --- a/SOURCES/valgrind-3.16.0-pkglibexecdir.patch +++ /dev/null @@ -1,117 +0,0 @@ -diff --git a/Makefile.all.am b/Makefile.all.am -index 3786e34..1befef5 100644 ---- a/Makefile.all.am -+++ b/Makefile.all.am -@@ -50,20 +50,20 @@ inplace-noinst_DSYMS: build-noinst_DSYMS - done - - # This is used by coregrind/Makefile.am and by /Makefile.am for doing --# "make install". It copies $(noinst_PROGRAMS) into $prefix/lib/valgrind/. -+# "make install". It copies $(noinst_PROGRAMS) into $prefix/libexec/valgrind/. - # It needs to be depended on by an 'install-exec-local' rule. - install-noinst_PROGRAMS: $(noinst_PROGRAMS) -- $(mkinstalldirs) $(DESTDIR)$(pkglibdir); \ -+ $(mkinstalldirs) $(DESTDIR)$(pkglibexecdir); \ - for f in $(noinst_PROGRAMS); do \ -- $(INSTALL_PROGRAM) $$f $(DESTDIR)$(pkglibdir); \ -+ $(INSTALL_PROGRAM) $$f $(DESTDIR)$(pkglibexecdir); \ - done - - # This is used by coregrind/Makefile.am and by /Makefile.am for doing --# "make uninstall". It removes $(noinst_PROGRAMS) from $prefix/lib/valgrind/. -+# "make uninstall". It removes $(noinst_PROGRAMS) from $prefix/libexec/valgrind/. - # It needs to be depended on by an 'uninstall-local' rule. - uninstall-noinst_PROGRAMS: - for f in $(noinst_PROGRAMS); do \ -- rm -f $(DESTDIR)$(pkglibdir)/$$f; \ -+ rm -f $(DESTDIR)$(pkglibexecdir)/$$f; \ - done - - # Similar to install-noinst_PROGRAMS. -@@ -71,15 +71,15 @@ uninstall-noinst_PROGRAMS: - # directories. XXX: not sure whether the resulting permissions will be - # correct when using 'cp -R'... - install-noinst_DSYMS: build-noinst_DSYMS -- $(mkinstalldirs) $(DESTDIR)$(pkglibdir); \ -+ $(mkinstalldirs) $(DESTDIR)$(pkglibexecdir); \ - for f in $(noinst_DSYMS); do \ -- cp -R $$f.dSYM $(DESTDIR)$(pkglibdir); \ -+ cp -R $$f.dSYM $(DESTDIR)$(pkglibexecdir); \ - done - - # Similar to uninstall-noinst_PROGRAMS. - uninstall-noinst_DSYMS: - for f in $(noinst_DSYMS); do \ -- rm -f $(DESTDIR)$(pkglibdir)/$$f.dSYM; \ -+ rm -f $(DESTDIR)$(pkglibexecdir)/$$f.dSYM; \ - done - - # This needs to be depended on by a 'clean-local' rule. -diff --git a/Makefile.am b/Makefile.am -index 242b38a..3b7c806 100644 ---- a/Makefile.am -+++ b/Makefile.am -@@ -58,7 +58,7 @@ DEFAULT_SUPP_FILES = @DEFAULT_SUPP@ - # default.supp, as it is built from the base .supp files at compile-time. - dist_noinst_DATA = $(SUPP_FILES) - --vglibdir = $(pkglibdir) -+vglibdir = $(pkglibexecdir) - vglib_DATA = default.supp - - pkgconfigdir = $(libdir)/pkgconfig -diff --git a/coregrind/Makefile.am b/coregrind/Makefile.am -index 94030fd..f09763a 100644 ---- a/coregrind/Makefile.am -+++ b/coregrind/Makefile.am -@@ -11,12 +11,12 @@ include $(top_srcdir)/Makefile.all.am - - AM_CPPFLAGS_@VGCONF_PLATFORM_PRI_CAPS@ += \ - -I$(top_srcdir)/coregrind \ -- -DVG_LIBDIR="\"$(pkglibdir)"\" \ -+ -DVG_LIBDIR="\"$(pkglibexecdir)"\" \ - -DVG_PLATFORM="\"@VGCONF_ARCH_PRI@-@VGCONF_OS@\"" - if VGCONF_HAVE_PLATFORM_SEC - AM_CPPFLAGS_@VGCONF_PLATFORM_SEC_CAPS@ += \ - -I$(top_srcdir)/coregrind \ -- -DVG_LIBDIR="\"$(pkglibdir)"\" \ -+ -DVG_LIBDIR="\"$(pkglibexecdir)"\" \ - -DVG_PLATFORM="\"@VGCONF_ARCH_SEC@-@VGCONF_OS@\"" - endif - -@@ -714,7 +714,7 @@ GDBSERVER_XML_FILES = \ - m_gdbserver/mips64-fpu.xml - - # so as to make sure these get copied into the install tree --vglibdir = $(pkglibdir) -+vglibdir = $(pkglibexecdir) - vglib_DATA = $(GDBSERVER_XML_FILES) - - # so as to make sure these get copied into the tarball -diff --git a/mpi/Makefile.am b/mpi/Makefile.am -index 7ad9a25..471fee0 100644 ---- a/mpi/Makefile.am -+++ b/mpi/Makefile.am -@@ -18,16 +18,18 @@ EXTRA_DIST = \ - # libmpiwrap-.so - #---------------------------------------------------------------------------- - --noinst_PROGRAMS = -+# These are really real libraries, so they should go to libdir, not libexec. -+mpidir = $(pkglibdir) -+mpi_PROGRAMS = - if BUILD_MPIWRAP_PRI --noinst_PROGRAMS += libmpiwrap-@VGCONF_ARCH_PRI@-@VGCONF_OS@.so -+mpi_PROGRAMS += libmpiwrap-@VGCONF_ARCH_PRI@-@VGCONF_OS@.so - endif - if BUILD_MPIWRAP_SEC --noinst_PROGRAMS += libmpiwrap-@VGCONF_ARCH_SEC@-@VGCONF_OS@.so -+mpi_PROGRAMS += libmpiwrap-@VGCONF_ARCH_SEC@-@VGCONF_OS@.so - endif - - if VGCONF_OS_IS_DARWIN --noinst_DSYMS = $(noinst_PROGRAMS) -+mpi_DSYMS = $(mpi_PROGRAMS) - endif - - diff --git a/SOURCES/valgrind-3.16.0-ppc-L-field.patch b/SOURCES/valgrind-3.16.0-ppc-L-field.patch deleted file mode 100644 index ee1d225..0000000 --- a/SOURCES/valgrind-3.16.0-ppc-L-field.patch +++ /dev/null @@ -1,29 +0,0 @@ -commit fb6f7abcbc92506d302fb18a2c5fc853d2929248 -Author: Carl Love -Date: Tue Jun 9 10:42:03 2020 -0500 - - Power PC Fix extraction of the L field for sync instruction - - The L field is currently a two bit[22:21] field in ISA 3.0. The size of the - L field has changed over time. - - Currently the ISA 3.0 Valgrind sync instruction support code sets the - flag_L for the instruction L field to a five bit value that includes bits - that are marked reserved the sync instruction. This patch fixes the issue for ISA 3.0 - to only setting flag_L the specified two bits. - - Valgrind bugzilla: https://bugs.kde.org/show_bug.cgi?id=422677 - -diff --git a/VEX/priv/guest_ppc_toIR.c b/VEX/priv/guest_ppc_toIR.c -index 582c59ec0..c4965a19e 100644 ---- a/VEX/priv/guest_ppc_toIR.c -+++ b/VEX/priv/guest_ppc_toIR.c -@@ -8777,7 +8777,7 @@ static Bool dis_memsync ( UInt theInstr ) - /* X-Form, XL-Form */ - UChar opc1 = ifieldOPC(theInstr); - UInt b11to25 = IFIELD(theInstr, 11, 15); -- UChar flag_L = ifieldRegDS(theInstr); -+ UChar flag_L = IFIELD(theInstr, 21, 2); //ISA 3.0 - UInt b11to20 = IFIELD(theInstr, 11, 10); - UInt M0 = IFIELD(theInstr, 11, 5); - UChar rD_addr = ifieldRegDS(theInstr); diff --git a/SOURCES/valgrind-3.16.0-some-stack-protector.patch b/SOURCES/valgrind-3.16.0-some-stack-protector.patch index bde2aa5..324c7d3 100644 --- a/SOURCES/valgrind-3.16.0-some-stack-protector.patch +++ b/SOURCES/valgrind-3.16.0-some-stack-protector.patch @@ -52,8 +52,8 @@ diff --git a/configure.ac b/configure.ac index f8c798b..ccc8f52 100755 --- a/configure.ac +++ b/configure.ac -@@ -2188,24 +2188,24 @@ AC_LANG(C) - AC_SUBST(FLAG_FALIGNED_NEW) +@@ -2352,24 +2352,24 @@ + AM_CONDITIONAL([HAVE_ALIGNED_CXX_ALLOC], [test x$ac_have_aligned_cxx_alloc = xyes]) # does this compiler support -fno-stack-protector ? -AC_MSG_CHECKING([if gcc accepts -fno-stack-protector]) diff --git a/SOURCES/valgrind-3.16.1-REX-prefix-JMP.patch b/SOURCES/valgrind-3.16.1-REX-prefix-JMP.patch deleted file mode 100644 index f780fb1..0000000 --- a/SOURCES/valgrind-3.16.1-REX-prefix-JMP.patch +++ /dev/null @@ -1,38 +0,0 @@ -commit e2dec0ff9b1e071779bee2c4e6fc82f8194b1c1d -Author: Mark Wielaard -Date: Sun Jul 26 21:17:23 2020 +0200 - - Handle REX prefixed JMP instruction. - - The NET Core runtime might generate a JMP with a REX prefix. - For Jv (32bit offset) and Jb (8bit offset) this is valid. - Prefixes that change operand size are ignored for such JMPs. - So remove the check for sz == 4 and force sz = 4 for Jv. - - https://bugs.kde.org/show_bug.cgi?id=422174 - -diff --git a/VEX/priv/guest_amd64_toIR.c b/VEX/priv/guest_amd64_toIR.c -index fadf47d41..7888132eb 100644 ---- a/VEX/priv/guest_amd64_toIR.c -+++ b/VEX/priv/guest_amd64_toIR.c -@@ -21392,8 +21392,8 @@ Long dis_ESC_NONE ( - - case 0xE9: /* Jv (jump, 16/32 offset) */ - if (haveF3(pfx)) goto decode_failure; -- if (sz != 4) -- goto decode_failure; /* JRS added 2004 July 11 */ -+ sz = 4; /* Prefixes that change operand size are ignored for this -+ instruction. Operand size is forced to 32bit. */ - if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */ - d64 = (guest_RIP_bbstart+delta+sz) + getSDisp(sz,delta); - delta += sz; -@@ -21404,8 +21404,7 @@ Long dis_ESC_NONE ( - - case 0xEB: /* Jb (jump, byte offset) */ - if (haveF3(pfx)) goto decode_failure; -- if (sz != 4) -- goto decode_failure; /* JRS added 2004 July 11 */ -+ /* Prefixes that change operand size are ignored for this instruction. */ - if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */ - d64 = (guest_RIP_bbstart+delta+1) + getSDisp8(delta); - delta++; diff --git a/SOURCES/valgrind-3.16.1-dl_runtime_resolve.patch b/SOURCES/valgrind-3.16.1-dl_runtime_resolve.patch deleted file mode 100644 index 0a34759..0000000 --- a/SOURCES/valgrind-3.16.1-dl_runtime_resolve.patch +++ /dev/null @@ -1,206 +0,0 @@ -commit f4abcc05fdba3f25890a9b30b71d511ccc906d46 -Author: Mark Wielaard -Date: Mon Jul 27 22:43:28 2020 +0200 - - Incorrect call-graph tracking due to new _dl_runtime_resolve_xsave* - - Newer glibc have alternate ld.so _ld_runtime_resolve functions. - Namely _dl_runtime_resolve_xsave and _dl_runtime_resolve_xsave'2 - - This patch recognizes the xsave, xsvec and fxsave variants and - changes callgrind so that any variant counts as _dl_runtime_resolve. - - Original patch by paulo.cesar.pereira.de.andrade@gmail.com - https://bugs.kde.org/show_bug.cgi?id=415293 - -diff --git a/callgrind/fn.c b/callgrind/fn.c -index e9d8dd214..7cce1a0c7 100644 ---- a/callgrind/fn.c -+++ b/callgrind/fn.c -@@ -30,8 +30,11 @@ - - static fn_array current_fn_active; - --static Addr runtime_resolve_addr = 0; --static int runtime_resolve_length = 0; -+/* x86_64 defines 4 variants. */ -+#define MAX_RESOLVE_ADDRS 4 -+static int runtime_resolve_addrs = 0; -+static Addr runtime_resolve_addr[MAX_RESOLVE_ADDRS]; -+static int runtime_resolve_length[MAX_RESOLVE_ADDRS]; - - // a code pattern is a list of tuples (start offset, length) - struct chunk_t { int start, len; }; -@@ -56,6 +59,9 @@ static Bool check_code(obj_node* obj, - /* first chunk of pattern should always start at offset 0 and - * have at least 3 bytes */ - CLG_ASSERT((pat->chunk[0].start == 0) && (pat->chunk[0].len >2)); -+ -+ /* and we cannot be called more than MAX_RESOLVE_ADDRS times */ -+ CLG_ASSERT(runtime_resolve_addrs < MAX_RESOLVE_ADDRS); - - CLG_DEBUG(1, "check_code: %s, pattern %s, check %d bytes of [%x %x %x...]\n", - obj->name, pat->name, pat->chunk[0].len, code[0], code[1], code[2]); -@@ -93,8 +99,9 @@ static Bool check_code(obj_node* obj, - pat->name, obj->name + obj->last_slash_pos, - addr - obj->start, addr, pat->len); - -- runtime_resolve_addr = addr; -- runtime_resolve_length = pat->len; -+ runtime_resolve_addr[runtime_resolve_addrs] = addr; -+ runtime_resolve_length[runtime_resolve_addrs] = pat->len; -+ runtime_resolve_addrs++; - return True; - } - } -@@ -138,8 +145,9 @@ static Bool search_runtime_resolve(obj_node* obj) - "x86-glibc2.8", 30, {{ 0,12 }, { 16,14 }, { 30,0}} }; - - if (VG_(strncmp)(obj->name, "/lib/ld", 7) != 0) return False; -- if (check_code(obj, code, &pat)) return True; -- if (check_code(obj, code_28, &pat_28)) return True; -+ Bool pat_p = check_code(obj, code, &pat); -+ Bool pat_28_p = check_code(obj, code_28, &pat_28); -+ if (pat_p || pat_28_p) return True; - return False; - #endif - -@@ -186,9 +194,98 @@ static Bool search_runtime_resolve(obj_node* obj) - static struct pattern pat = { - "amd64-def", 110, {{ 0,62 }, { 66,44 }, { 110,0 }} }; - -+ static UChar code_xsavec[] = { -+ /* 0*/ 0x53, 0x48, 0x89, 0xe3, 0x48, 0x83, 0xe4, 0xc0, -+ /* 8*/ 0x48, 0x2b, 0x25, 0x00, 0x00, 0x00, 0x00, /* sub (%rip),%rsp */ -+ /*15*/ 0x48, -+ /*16*/ 0x89, 0x04, 0x24, 0x48, 0x89, 0x4c, 0x24, 0x08, -+ /*24*/ 0x48, 0x89, 0x54, 0x24, 0x10, 0x48, 0x89, 0x74, -+ /*32*/ 0x24, 0x18, 0x48, 0x89, 0x7c, 0x24, 0x20, 0x4c, -+ /*40*/ 0x89, 0x44, 0x24, 0x28, 0x4c, 0x89, 0x4c, 0x24, -+ /*48*/ 0x30, 0xb8, 0xee, 0x00, 0x00, 0x00, 0x31, 0xd2, -+ /*56*/ 0x48, 0x89, 0x94, 0x24, 0x50, 0x02, 0x00, 0x00, -+ /*64*/ 0x48, 0x89, 0x94, 0x24, 0x58, 0x02, 0x00, 0x00, -+ /*72*/ 0x48, 0x89, 0x94, 0x24, 0x60, 0x02, 0x00, 0x00, -+ /*80*/ 0x48, 0x89, 0x94, 0x24, 0x68, 0x02, 0x00, 0x00, -+ /*88*/ 0x48, 0x89, 0x94, 0x24, 0x70, 0x02, 0x00, 0x00, -+ /*96*/ 0x48, 0x89, 0x94, 0x24, 0x78, 0x02, 0x00, 0x00, -+ /*04*/ 0x0f, 0xc7, 0x64, 0x24, 0x40, 0x48, 0x8b, 0x73, -+ /*112*/0x10, 0x48, 0x8b, 0x7b, 0x08, -+ /*117*/0xe8, 0x00, 0x00, 0x00, 0x00, /* callq <_dl_fixup> */ -+ /*122*/0x49, 0x89, 0xc3, 0xb8, 0xee, 0x00, -+ /*128*/0x00, 0x00, 0x31, 0xd2, 0x0f, 0xae, 0x6c, 0x24, -+ /*136*/0x40, 0x4c, 0x8b, 0x4c, 0x24, 0x30, 0x4c, 0x8b, -+ /*144*/0x44, 0x24, 0x28, 0x48, 0x8b, 0x7c, 0x24, 0x20, -+ /*152*/0x48, 0x8b, 0x74, 0x24, 0x18, 0x48, 0x8b, 0x54, -+ /*160*/0x24, 0x10, 0x48, 0x8b, 0x4c, 0x24, 0x08, 0x48, -+ /*168*/0x8b, 0x04, 0x24, 0x48, 0x89, 0xdc, 0x48, 0x8b, -+ /*176*/0x1c, 0x24, 0x48, 0x83, 0xc4, 0x18, 0xf2, 0x41, -+ /*184*/0xff, 0xe3 }; -+ static struct pattern pat_xsavec = { -+ "amd64-xsavec", 186, {{ 0,11 }, { 15,103 }, {122,64}, { 186,0 }} }; -+ -+ static UChar code_xsave[] = { -+ /* 0*/ 0x53, 0x48, 0x89, 0xe3, 0x48, 0x83, 0xe4, 0xc0, -+ /* 8*/ 0x48, 0x2b, 0x25, 0x00, 0x00, 0x00, 0x00, /* sub (%rip),%rsp */ -+ /*15*/ 0x48, -+ /*16*/ 0x89, 0x04, 0x24, 0x48, 0x89, 0x4c, 0x24, 0x08, -+ /*24*/ 0x48, 0x89, 0x54, 0x24, 0x10, 0x48, 0x89, 0x74, -+ /*32*/ 0x24, 0x18, 0x48, 0x89, 0x7c, 0x24, 0x20, 0x4c, -+ /*40*/ 0x89, 0x44, 0x24, 0x28, 0x4c, 0x89, 0x4c, 0x24, -+ /*48*/ 0x30, 0xb8, 0xee, 0x00, 0x00, 0x00, 0x31, 0xd2, -+ /*56*/ 0x48, 0x89, 0x94, 0x24, 0x40, 0x02, 0x00, 0x00, -+ /*64*/ 0x48, 0x89, 0x94, 0x24, 0x48, 0x02, 0x00, 0x00, -+ /*72*/ 0x48, 0x89, 0x94, 0x24, 0x50, 0x02, 0x00, 0x00, -+ /*80*/ 0x48, 0x89, 0x94, 0x24, 0x58, 0x02, 0x00, 0x00, -+ /*88*/ 0x48, 0x89, 0x94, 0x24, 0x60, 0x02, 0x00, 0x00, -+ /*96*/ 0x48, 0x89, 0x94, 0x24, 0x68, 0x02, 0x00, 0x00, -+ /*104*/0x48, 0x89, 0x94, 0x24, 0x70, 0x02, 0x00, 0x00, -+ /*112*/0x48, 0x89, 0x94, 0x24, 0x78, 0x02, 0x00, 0x00, -+ /*120*/0x0f, 0xae, 0x64, 0x24, 0x40, 0x48, 0x8b, 0x73, -+ /*128*/0x10, 0x48, 0x8b, 0x7b, 0x08, -+ /*133*/0xe8, 0x00, 0x00, 0x00, 0x00, /* callq <_dl_fixup> */ -+ /*138*/0x49, 0x89, 0xc3, 0xb8, 0xee, 0x00, -+ /*144*/0x00, 0x00, 0x31, 0xd2, 0x0f, 0xae, 0x6c, 0x24, -+ /*152*/0x40, 0x4c, 0x8b, 0x4c, 0x24, 0x30, 0x4c, 0x8b, -+ /*160*/0x44, 0x24, 0x28, 0x48, 0x8b, 0x7c, 0x24, 0x20, -+ /*168*/0x48, 0x8b, 0x74, 0x24, 0x18, 0x48, 0x8b, 0x54, -+ /*176*/0x24, 0x10, 0x48, 0x8b, 0x4c, 0x24, 0x08, 0x48, -+ /*184*/0x8b, 0x04, 0x24, 0x48, 0x89, 0xdc, 0x48, 0x8b, -+ /*192*/0x1c, 0x24, 0x48, 0x83, 0xc4, 0x18, 0xf2, 0x41, -+ /*200*/0xff, 0xe3 }; -+ static struct pattern pat_xsave = { -+ "amd64-xsave", 202, {{ 0,11 }, { 15,119 }, {138,64}, { 202,0 }} }; -+ -+ static UChar code_fxsave[] = { -+ /* 0*/ 0x53, 0x48, 0x89, 0xe3, 0x48, 0x83, 0xe4, 0xf0, -+ /* 8*/ 0x48, 0x81, 0xec, 0x40, 0x02, 0x00, 0x00, 0x48, -+ /*16*/ 0x89, 0x04, 0x24, 0x48, 0x89, 0x4c, 0x24, 0x08, -+ /*24*/ 0x48, 0x89, 0x54, 0x24, 0x10, 0x48, 0x89, 0x74, -+ /*32*/ 0x24, 0x18, 0x48, 0x89, 0x7c, 0x24, 0x20, 0x4c, -+ /*40*/ 0x89, 0x44, 0x24, 0x28, 0x4c, 0x89, 0x4c, 0x24, -+ /*48*/ 0x30, 0x0f, 0xae, 0x44, 0x24, 0x40, 0x48, 0x8b, -+ /*56*/ 0x73, 0x10, 0x48, 0x8b, 0x7b, 0x08, -+ /*62*/ 0xe8, 0x00, 0x00, 0x00, 0x00, /* callq <_dl_fixup> */ -+ /*67*/ 0x49, 0x89, 0xc3, 0x0f, 0xae, -+ /*72*/ 0x4c, 0x24, 0x40, 0x4c, 0x8b, 0x4c, 0x24, 0x30, -+ /*80*/ 0x4c, 0x8b, 0x44, 0x24, 0x28, 0x48, 0x8b, 0x7c, -+ /*88*/ 0x24, 0x20, 0x48, 0x8b, 0x74, 0x24, 0x18, 0x48, -+ /*96*/ 0x8b, 0x54, 0x24, 0x10, 0x48, 0x8b, 0x4c, 0x24, -+ /*104*/0x08, 0x48, 0x8b, 0x04, 0x24, 0x48, 0x89, 0xdc, -+ /*112*/0x48, 0x8b, 0x1c, 0x24, 0x48, 0x83, 0xc4, 0x18, -+ /*120*/0xf2, 0x41, 0xff, 0xe3 }; -+ static struct pattern pat_fxsave = { -+ "amd64-fxsave", 124, {{ 0,63 }, { 67,57 }, { 124,0 }} }; -+ - if ((VG_(strncmp)(obj->name, "/lib/ld", 7) != 0) && -- (VG_(strncmp)(obj->name, "/lib64/ld", 9) != 0)) return False; -- return check_code(obj, code, &pat); -+ (VG_(strncmp)(obj->name, "/lib64/ld", 9) != 0) && -+ (VG_(strncmp)(obj->name, "/usr/lib/ld", 11) != 0) && -+ (VG_(strncmp)(obj->name, "/usr/lib64/ld", 13) != 0)) return False; -+ Bool pat_p = check_code(obj, code, &pat); -+ Bool pat_xsavec_p = check_code(obj, code_xsavec, &pat_xsavec); -+ Bool pat_xsave_p = check_code(obj, code_xsave, &pat_xsave); -+ Bool pat_fxsave_p = check_code(obj, code_fxsave, &pat_fxsave); -+ if (pat_p || pat_xsavec_p || pat_xsave_p || pat_fxsave_p) return True; - #endif - - /* For other platforms, no patterns known */ -@@ -254,7 +351,7 @@ obj_node* new_obj_node(DebugInfo* di, obj_node* next) - i++; - } - -- if (runtime_resolve_addr == 0) search_runtime_resolve(obj); -+ if (runtime_resolve_addrs == 0) search_runtime_resolve(obj); - - return obj; - } -@@ -490,6 +587,7 @@ fn_node* CLG_(get_fn_node)(BB* bb) - DebugInfo* di; - UInt line_num; - fn_node* fn; -+ Int i; - - /* fn from debug info is idempotent for a BB */ - if (bb->fn) return bb->fn; -@@ -538,12 +636,14 @@ fn_node* CLG_(get_fn_node)(BB* bb) - } - if (0 == VG_(strcmp)(fnname, "_exit") && !exit_bb) - exit_bb = bb; -- -- if (runtime_resolve_addr && -- (bb_addr(bb) >= runtime_resolve_addr) && -- (bb_addr(bb) < runtime_resolve_addr + runtime_resolve_length)) { -- /* BB in runtime_resolve found by code check; use this name */ -- fnname = "_dl_runtime_resolve"; -+ -+ for (i = 0; i < runtime_resolve_addrs; i++) { -+ if ((bb_addr(bb) >= runtime_resolve_addr[i]) && -+ (bb_addr(bb) < runtime_resolve_addr[i] + runtime_resolve_length[i])) { -+ /* BB in runtime_resolve found by code check; use this name */ -+ fnname = "_dl_runtime_resolve"; -+ break; -+ } - } - - /* get fn_node struct for this function */ diff --git a/SOURCES/valgrind-3.16.1-epoll.patch b/SOURCES/valgrind-3.16.1-epoll.patch deleted file mode 100644 index c6a0411..0000000 --- a/SOURCES/valgrind-3.16.1-epoll.patch +++ /dev/null @@ -1,117 +0,0 @@ -commit f326d68d762edf4b0e9604daa446b6f8ca25725a -Author: Mark Wielaard -Date: Sun Jul 26 22:40:22 2020 +0200 - - epoll_ctl warns for uninitialized padding on non-amd64 64bit arches - - struct vki_epoll_event is packed on x86_64, but not on other 64bit - arches. This means that on 64bit arches there can be padding in the - epoll_event struct. Seperately the data field is only used by user - space (which might not set the data field if it doesn't need to). - - Only check the events field on epoll_ctl. But assume both events - and data are both written to by epoll_[p]wait (exclude padding). - - https://bugs.kde.org/show_bug.cgi?id=422623 - -diff --git a/coregrind/m_syswrap/syswrap-linux.c b/coregrind/m_syswrap/syswrap-linux.c -index 5b5b7eee6..929a4d9af 100644 ---- a/coregrind/m_syswrap/syswrap-linux.c -+++ b/coregrind/m_syswrap/syswrap-linux.c -@@ -2099,8 +2099,29 @@ PRE(sys_epoll_ctl) - SARG1, ( ARG2<3 ? epoll_ctl_s[ARG2] : "?" ), SARG3, ARG4); - PRE_REG_READ4(long, "epoll_ctl", - int, epfd, int, op, int, fd, struct vki_epoll_event *, event); -- if (ARG2 != VKI_EPOLL_CTL_DEL) -- PRE_MEM_READ( "epoll_ctl(event)", ARG4, sizeof(struct vki_epoll_event) ); -+ if (ARG2 != VKI_EPOLL_CTL_DEL) { -+ /* Just check the events field, the data field is for user space and -+ unused by the kernel. */ -+ struct vki_epoll_event *event = (struct vki_epoll_event *) ARG4; -+ PRE_MEM_READ( "epoll_ctl(event)", (Addr) &event->events, -+ sizeof(__vki_u32) ); -+ } -+} -+ -+/* RES event records have been written (exclude padding). */ -+static void epoll_post_helper ( ThreadId tid, SyscallArgs* arrghs, -+ SyscallStatus* status ) -+{ -+ vg_assert(SUCCESS); -+ if (RES > 0) { -+ Int i; -+ struct vki_epoll_event **events = (struct vki_epoll_event**)(Addr)ARG2; -+ for (i = 0; i < RES; i++) { -+ /* Assume both events and data are set (data is user space only). */ -+ POST_FIELD_WRITE(events[i]->events); -+ POST_FIELD_WRITE(events[i]->data); -+ } -+ } - } - - PRE(sys_epoll_wait) -@@ -2111,13 +2132,12 @@ PRE(sys_epoll_wait) - PRE_REG_READ4(long, "epoll_wait", - int, epfd, struct vki_epoll_event *, events, - int, maxevents, int, timeout); -+ /* Assume all (maxevents) events records should be (fully) writable. */ - PRE_MEM_WRITE( "epoll_wait(events)", ARG2, sizeof(struct vki_epoll_event)*ARG3); - } - POST(sys_epoll_wait) - { -- vg_assert(SUCCESS); -- if (RES > 0) -- POST_MEM_WRITE( ARG2, sizeof(struct vki_epoll_event)*RES ) ; -+ epoll_post_helper (tid, arrghs, status); - } - - PRE(sys_epoll_pwait) -@@ -2130,15 +2150,14 @@ PRE(sys_epoll_pwait) - int, epfd, struct vki_epoll_event *, events, - int, maxevents, int, timeout, vki_sigset_t *, sigmask, - vki_size_t, sigsetsize); -+ /* Assume all (maxevents) events records should be (fully) writable. */ - PRE_MEM_WRITE( "epoll_pwait(events)", ARG2, sizeof(struct vki_epoll_event)*ARG3); - if (ARG5) - PRE_MEM_READ( "epoll_pwait(sigmask)", ARG5, sizeof(vki_sigset_t) ); - } - POST(sys_epoll_pwait) - { -- vg_assert(SUCCESS); -- if (RES > 0) -- POST_MEM_WRITE( ARG2, sizeof(struct vki_epoll_event)*RES ) ; -+ epoll_post_helper (tid, arrghs, status); - } - - PRE(sys_eventfd) -commit b74f9f23c8758c77367f18368ea95baa858544cb -Author: Mark Wielaard -Date: Tue Aug 18 23:58:55 2020 +0200 - - Fix epoll_ctl setting of array event and data fields. - - Fix for https://bugs.kde.org/show_bug.cgi?id=422623 in commit ecf5ba119 - epoll_ctl warns for uninitialized padding on non-amd64 64bit arches - contained a bug. A pointer to an array is not a pointer to a pointer to - an array. Found by a Fedora user: - https://bugzilla.redhat.com/show_bug.cgi?id=1844778#c10 - -diff --git a/coregrind/m_syswrap/syswrap-linux.c b/coregrind/m_syswrap/syswrap-linux.c -index 0850487e9..3f488795a 100644 ---- a/coregrind/m_syswrap/syswrap-linux.c -+++ b/coregrind/m_syswrap/syswrap-linux.c -@@ -2115,11 +2115,11 @@ static void epoll_post_helper ( ThreadId tid, SyscallArgs* arrghs, - vg_assert(SUCCESS); - if (RES > 0) { - Int i; -- struct vki_epoll_event **events = (struct vki_epoll_event**)(Addr)ARG2; -+ struct vki_epoll_event *events = (struct vki_epoll_event*)(Addr)ARG2; - for (i = 0; i < RES; i++) { - /* Assume both events and data are set (data is user space only). */ -- POST_FIELD_WRITE(events[i]->events); -- POST_FIELD_WRITE(events[i]->data); -+ POST_FIELD_WRITE(events[i].events); -+ POST_FIELD_WRITE(events[i].data); - } - } - } diff --git a/SOURCES/valgrind-3.16.1-s390_emit_load_mem.patch b/SOURCES/valgrind-3.16.1-s390_emit_load_mem.patch deleted file mode 100644 index 95da59f..0000000 --- a/SOURCES/valgrind-3.16.1-s390_emit_load_mem.patch +++ /dev/null @@ -1,27 +0,0 @@ -commit ba73f8d2ebe4b5fe8163ee5ab806f0e50961ebdf -Author: Andreas Arnez -Date: Tue Nov 3 18:17:30 2020 +0100 - - Bug 428648 - s390x: Force 12-bit amode for vector loads in isel - - Similar to Bug 417452, where the instruction selector sometimes attempted - to generate vector stores with a 20-bit displacement, the same problem has - now been reported with vector loads. - - The problem is caused in s390_isel_vec_expr_wrk(), where the addressing - mode is generated with s390_isel_amode() instead of - s390_isel_amode_short(). This is fixed. - -diff --git a/VEX/priv/host_s390_isel.c b/VEX/priv/host_s390_isel.c -index 2f80dd850..134f3eb6f 100644 ---- a/VEX/priv/host_s390_isel.c -+++ b/VEX/priv/host_s390_isel.c -@@ -3741,7 +3741,7 @@ s390_isel_vec_expr_wrk(ISelEnv *env, IRExpr *expr) - /* --------- LOAD --------- */ - case Iex_Load: { - HReg dst = newVRegV(env); -- s390_amode *am = s390_isel_amode(env, expr->Iex.Load.addr); -+ s390_amode *am = s390_isel_amode_short(env, expr->Iex.Load.addr); - - if (expr->Iex.Load.end != Iend_BE) - goto irreducible; diff --git a/SOURCES/valgrind-3.16.1-s390x-z14-vector.patch b/SOURCES/valgrind-3.16.1-s390x-z14-vector.patch deleted file mode 100644 index 747c8a8..0000000 --- a/SOURCES/valgrind-3.16.1-s390x-z14-vector.patch +++ /dev/null @@ -1,2977 +0,0 @@ -diff --git a/VEX/priv/guest_s390_defs.h b/VEX/priv/guest_s390_defs.h -index 9f93cff19..905429015 100644 ---- a/VEX/priv/guest_s390_defs.h -+++ b/VEX/priv/guest_s390_defs.h -@@ -8,7 +8,7 @@ - This file is part of Valgrind, a dynamic binary instrumentation - framework. - -- Copyright IBM Corp. 2010-2017 -+ Copyright IBM Corp. 2010-2020 - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as -@@ -263,26 +263,27 @@ extern ULong last_execute_target; - before S390_VEC_OP_LAST. */ - typedef enum { - S390_VEC_OP_INVALID = 0, -- S390_VEC_OP_VPKS = 1, -- S390_VEC_OP_VPKLS = 2, -- S390_VEC_OP_VFAE = 3, -- S390_VEC_OP_VFEE = 4, -- S390_VEC_OP_VFENE = 5, -- S390_VEC_OP_VISTR = 6, -- S390_VEC_OP_VSTRC = 7, -- S390_VEC_OP_VCEQ = 8, -- S390_VEC_OP_VTM = 9, -- S390_VEC_OP_VGFM = 10, -- S390_VEC_OP_VGFMA = 11, -- S390_VEC_OP_VMAH = 12, -- S390_VEC_OP_VMALH = 13, -- S390_VEC_OP_VCH = 14, -- S390_VEC_OP_VCHL = 15, -- S390_VEC_OP_VFCE = 16, -- S390_VEC_OP_VFCH = 17, -- S390_VEC_OP_VFCHE = 18, -- S390_VEC_OP_VFTCI = 19, -- S390_VEC_OP_LAST = 20 // supposed to be the last element in enum -+ S390_VEC_OP_VPKS, -+ S390_VEC_OP_VPKLS, -+ S390_VEC_OP_VFAE, -+ S390_VEC_OP_VFEE, -+ S390_VEC_OP_VFENE, -+ S390_VEC_OP_VISTR, -+ S390_VEC_OP_VSTRC, -+ S390_VEC_OP_VCEQ, -+ S390_VEC_OP_VTM, -+ S390_VEC_OP_VGFM, -+ S390_VEC_OP_VGFMA, -+ S390_VEC_OP_VMAH, -+ S390_VEC_OP_VMALH, -+ S390_VEC_OP_VCH, -+ S390_VEC_OP_VCHL, -+ S390_VEC_OP_VFTCI, -+ S390_VEC_OP_VFMIN, -+ S390_VEC_OP_VFMAX, -+ S390_VEC_OP_VBPERM, -+ S390_VEC_OP_VMSL, -+ S390_VEC_OP_LAST // supposed to be the last element in enum - } s390x_vec_op_t; - - /* Arguments of s390x_dirtyhelper_vec_op(...) which are packed into one -diff --git a/VEX/priv/guest_s390_helpers.c b/VEX/priv/guest_s390_helpers.c -index a470d9f8d..b71b621ae 100644 ---- a/VEX/priv/guest_s390_helpers.c -+++ b/VEX/priv/guest_s390_helpers.c -@@ -8,7 +8,7 @@ - This file is part of Valgrind, a dynamic binary instrumentation - framework. - -- Copyright IBM Corp. 2010-2017 -+ Copyright IBM Corp. 2010-2020 - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as -@@ -314,20 +314,11 @@ ULong s390x_dirtyhelper_STCKE(ULong *addr) {return 3;} - /*--- Dirty helper for Store Facility instruction ---*/ - /*------------------------------------------------------------*/ - #if defined(VGA_s390x) --static void --s390_set_facility_bit(ULong *addr, UInt bitno, UInt value) --{ -- addr += bitno / 64; -- bitno = bitno % 64; -- -- ULong mask = 1; -- mask <<= (63 - bitno); - -- if (value == 1) { -- *addr |= mask; // set -- } else { -- *addr &= ~mask; // clear -- } -+static ULong -+s390_stfle_range(UInt lo, UInt hi) -+{ -+ return ((1UL << (hi + 1 - lo)) - 1) << (63 - (hi % 64)); - } - - ULong -@@ -336,6 +327,77 @@ s390x_dirtyhelper_STFLE(VexGuestS390XState *guest_state, ULong *addr) - ULong hoststfle[S390_NUM_FACILITY_DW], cc, num_dw, i; - register ULong reg0 asm("0") = guest_state->guest_r0 & 0xF; /* r0[56:63] */ - -+ /* Restrict to facilities that we know about and that we assume to be -+ compatible with Valgrind. Of course, in this way we may reject features -+ that Valgrind is not really involved in (and thus would be compatible -+ with), but quering for such features doesn't seem like a typical use -+ case. */ -+ ULong accepted_facility[S390_NUM_FACILITY_DW] = { -+ /* === 0 .. 63 === */ -+ (s390_stfle_range(0, 16) -+ /* 17: message-security-assist, not supported */ -+ | s390_stfle_range(18, 19) -+ /* 20: HFP-multiply-and-add/subtract, not supported */ -+ | s390_stfle_range(21, 22) -+ /* 23: HFP-unnormalized-extension, not supported */ -+ | s390_stfle_range(24, 25) -+ /* 26: parsing-enhancement, not supported */ -+ | s390_stfle_range(27, 28) -+ /* 29: unassigned */ -+ | s390_stfle_range(30, 30) -+ /* 31: extract-CPU-time, not supported */ -+ | s390_stfle_range(32, 41) -+ /* 42-43: DFP, not fully supported */ -+ /* 44: PFPO, not fully supported */ -+ | s390_stfle_range(45, 47) -+ /* 48: DFP zoned-conversion, not supported */ -+ /* 49: includes PPA, not supported */ -+ /* 50: constrained transactional-execution, not supported */ -+ | s390_stfle_range(51, 55) -+ /* 56: unassigned */ -+ /* 57: MSA5, not supported */ -+ | s390_stfle_range(58, 60) -+ /* 61: miscellaneous-instruction 3, not supported */ -+ | s390_stfle_range(62, 63)), -+ -+ /* === 64 .. 127 === */ -+ (s390_stfle_range(64, 72) -+ /* 73: transactional-execution, not supported */ -+ | s390_stfle_range(74, 75) -+ /* 76: MSA3, not supported */ -+ /* 77: MSA4, not supported */ -+ | s390_stfle_range(78, 78) -+ /* 80: DFP packed-conversion, not supported */ -+ /* 81: PPA-in-order, not supported */ -+ | s390_stfle_range(82, 82) -+ /* 83-127: unassigned */ ), -+ -+ /* === 128 .. 191 === */ -+ (s390_stfle_range(128, 131) -+ /* 132: unassigned */ -+ /* 133: guarded-storage, not supported */ -+ /* 134: vector packed decimal, not supported */ -+ | s390_stfle_range(135, 135) -+ /* 136: unassigned */ -+ /* 137: unassigned */ -+ | s390_stfle_range(138, 142) -+ /* 143: unassigned */ -+ | s390_stfle_range(144, 145) -+ /* 146: MSA8, not supported */ -+ | s390_stfle_range(147, 147) -+ /* 148: vector-enhancements 2, not supported */ -+ | s390_stfle_range(149, 149) -+ /* 150: unassigned */ -+ /* 151: DEFLATE-conversion, not supported */ -+ /* 153: unassigned */ -+ /* 154: unassigned */ -+ /* 155: MSA9, not supported */ -+ | s390_stfle_range(156, 156) -+ /* 157-167: unassigned */ -+ | s390_stfle_range(168, 168) -+ /* 168-191: unassigned */ ), -+ }; -+ - /* We cannot store more than S390_NUM_FACILITY_DW - (and it makes not much sense to do so anyhow) */ - if (reg0 > S390_NUM_FACILITY_DW - 1) -@@ -351,35 +413,9 @@ s390x_dirtyhelper_STFLE(VexGuestS390XState *guest_state, ULong *addr) - /* Update guest register 0 with what STFLE set r0 to */ - guest_state->guest_r0 = reg0; - -- /* Set default: VM facilities = host facilities */ -+ /* VM facilities = host facilities, filtered by acceptance */ - for (i = 0; i < num_dw; ++i) -- addr[i] = hoststfle[i]; -- -- /* Now adjust the VM facilities according to what the VM supports */ -- s390_set_facility_bit(addr, S390_FAC_LDISP, 1); -- s390_set_facility_bit(addr, S390_FAC_EIMM, 1); -- s390_set_facility_bit(addr, S390_FAC_ETF2, 1); -- s390_set_facility_bit(addr, S390_FAC_ETF3, 1); -- s390_set_facility_bit(addr, S390_FAC_GIE, 1); -- s390_set_facility_bit(addr, S390_FAC_EXEXT, 1); -- s390_set_facility_bit(addr, S390_FAC_HIGHW, 1); -- s390_set_facility_bit(addr, S390_FAC_LSC2, 1); -- -- s390_set_facility_bit(addr, S390_FAC_HFPMAS, 0); -- s390_set_facility_bit(addr, S390_FAC_HFPUNX, 0); -- s390_set_facility_bit(addr, S390_FAC_XCPUT, 0); -- s390_set_facility_bit(addr, S390_FAC_MSA, 0); -- s390_set_facility_bit(addr, S390_FAC_PENH, 0); -- s390_set_facility_bit(addr, S390_FAC_DFP, 0); -- s390_set_facility_bit(addr, S390_FAC_PFPO, 0); -- s390_set_facility_bit(addr, S390_FAC_DFPZC, 0); -- s390_set_facility_bit(addr, S390_FAC_MISC, 0); -- s390_set_facility_bit(addr, S390_FAC_CTREXE, 0); -- s390_set_facility_bit(addr, S390_FAC_TREXE, 0); -- s390_set_facility_bit(addr, S390_FAC_MSA4, 0); -- s390_set_facility_bit(addr, S390_FAC_VXE, 0); -- s390_set_facility_bit(addr, S390_FAC_VXE2, 0); -- s390_set_facility_bit(addr, S390_FAC_DFLT, 0); -+ addr[i] = hoststfle[i] & accepted_facility[i]; - - return cc; - } -@@ -2500,25 +2536,26 @@ s390x_dirtyhelper_vec_op(VexGuestS390XState *guest_state, - vassert(d->op > S390_VEC_OP_INVALID && d->op < S390_VEC_OP_LAST); - static const UChar opcodes[][2] = { - {0x00, 0x00}, /* invalid */ -- {0xe7, 0x97}, /* VPKS */ -- {0xe7, 0x95}, /* VPKLS */ -- {0xe7, 0x82}, /* VFAE */ -- {0xe7, 0x80}, /* VFEE */ -- {0xe7, 0x81}, /* VFENE */ -- {0xe7, 0x5c}, /* VISTR */ -- {0xe7, 0x8a}, /* VSTRC */ -- {0xe7, 0xf8}, /* VCEQ */ -- {0xe7, 0xd8}, /* VTM */ -- {0xe7, 0xb4}, /* VGFM */ -- {0xe7, 0xbc}, /* VGFMA */ -- {0xe7, 0xab}, /* VMAH */ -- {0xe7, 0xa9}, /* VMALH */ -- {0xe7, 0xfb}, /* VCH */ -- {0xe7, 0xf9}, /* VCHL */ -- {0xe7, 0xe8}, /* VFCE */ -- {0xe7, 0xeb}, /* VFCH */ -- {0xe7, 0xea}, /* VFCHE */ -- {0xe7, 0x4a} /* VFTCI */ -+ [S390_VEC_OP_VPKS] = {0xe7, 0x97}, -+ [S390_VEC_OP_VPKLS] = {0xe7, 0x95}, -+ [S390_VEC_OP_VFAE] = {0xe7, 0x82}, -+ [S390_VEC_OP_VFEE] = {0xe7, 0x80}, -+ [S390_VEC_OP_VFENE] = {0xe7, 0x81}, -+ [S390_VEC_OP_VISTR] = {0xe7, 0x5c}, -+ [S390_VEC_OP_VSTRC] = {0xe7, 0x8a}, -+ [S390_VEC_OP_VCEQ] = {0xe7, 0xf8}, -+ [S390_VEC_OP_VTM] = {0xe7, 0xd8}, -+ [S390_VEC_OP_VGFM] = {0xe7, 0xb4}, -+ [S390_VEC_OP_VGFMA] = {0xe7, 0xbc}, -+ [S390_VEC_OP_VMAH] = {0xe7, 0xab}, -+ [S390_VEC_OP_VMALH] = {0xe7, 0xa9}, -+ [S390_VEC_OP_VCH] = {0xe7, 0xfb}, -+ [S390_VEC_OP_VCHL] = {0xe7, 0xf9}, -+ [S390_VEC_OP_VFTCI] = {0xe7, 0x4a}, -+ [S390_VEC_OP_VFMIN] = {0xe7, 0xee}, -+ [S390_VEC_OP_VFMAX] = {0xe7, 0xef}, -+ [S390_VEC_OP_VBPERM]= {0xe7, 0x85}, -+ [S390_VEC_OP_VMSL] = {0xe7, 0xb8}, - }; - - union { -@@ -2612,6 +2649,7 @@ s390x_dirtyhelper_vec_op(VexGuestS390XState *guest_state, - case S390_VEC_OP_VGFMA: - case S390_VEC_OP_VMAH: - case S390_VEC_OP_VMALH: -+ case S390_VEC_OP_VMSL: - the_insn.VRRd.v1 = 1; - the_insn.VRRd.v2 = 2; - the_insn.VRRd.v3 = 3; -@@ -2621,9 +2659,9 @@ s390x_dirtyhelper_vec_op(VexGuestS390XState *guest_state, - the_insn.VRRd.m6 = d->m5; - break; - -- case S390_VEC_OP_VFCE: -- case S390_VEC_OP_VFCH: -- case S390_VEC_OP_VFCHE: -+ case S390_VEC_OP_VFMIN: -+ case S390_VEC_OP_VFMAX: -+ case S390_VEC_OP_VBPERM: - the_insn.VRRc.v1 = 1; - the_insn.VRRc.v2 = 2; - the_insn.VRRc.v3 = 3; -diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c -index c27a8d3fe..5f2c5ce98 100644 ---- a/VEX/priv/guest_s390_toIR.c -+++ b/VEX/priv/guest_s390_toIR.c -@@ -8,7 +8,7 @@ - This file is part of Valgrind, a dynamic binary instrumentation - framework. - -- Copyright IBM Corp. 2010-2017 -+ Copyright IBM Corp. 2010-2020 - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as -@@ -248,6 +248,13 @@ typedef enum { - #define VRS_d2(insn) (((insn) >> 32) & 0xfff) - #define VRS_m4(insn) (((insn) >> 28) & 0xf) - #define VRS_rxb(insn) (((insn) >> 24) & 0xf) -+#define VRSd_v1(insn) (((insn) >> 28) & 0xf) -+#define VRSd_r3(insn) (((insn) >> 48) & 0xf) -+#define VSI_i3(insn) (((insn) >> 48) & 0xff) -+#define VSI_b2(insn) (((insn) >> 44) & 0xf) -+#define VSI_d2(insn) (((insn) >> 32) & 0xfff) -+#define VSI_v1(insn) (((insn) >> 28) & 0xf) -+#define VSI_rxb(insn) (((insn) >> 24) & 0xf) - - - /*------------------------------------------------------------*/ -@@ -1934,6 +1941,26 @@ s390_vr_get_type(const UChar m) - return results[m]; - } - -+/* Determine IRType from instruction's floating-point format field */ -+static IRType -+s390_vr_get_ftype(const UChar m) -+{ -+ static const IRType results[] = {Ity_F32, Ity_F64, Ity_F128}; -+ if (m >= 2 && m <= 4) -+ return results[m - 2]; -+ return Ity_INVALID; -+} -+ -+/* Determine number of elements from instruction's floating-point format -+ field */ -+static UChar -+s390_vr_get_n_elem(const UChar m) -+{ -+ if (m >= 2 && m <= 4) -+ return 1 << (4 - m); -+ return 0; -+} -+ - /* Determine if Condition Code Set (CS) flag is set in m field */ - #define s390_vr_is_cs_set(m) (((m) & 0x1) != 0) - -@@ -2188,12 +2215,15 @@ s390_vr_offset_by_index(UInt archreg,IRType type, UChar index) - goto invalidIndex; - } - return vr_offset(archreg) + sizeof(ULong) * index; -+ - case Ity_V128: -+ case Ity_F128: - if(index == 0) { - return vr_qw_offset(archreg); - } else { - goto invalidIndex; - } -+ - default: - vpanic("s390_vr_offset_by_index: unknown type"); - } -@@ -2211,7 +2241,14 @@ put_vr(UInt archreg, IRType type, UChar index, IRExpr *expr) - UInt offset = s390_vr_offset_by_index(archreg, type, index); - vassert(typeOfIRExpr(irsb->tyenv, expr) == type); - -- stmt(IRStmt_Put(offset, expr)); -+ if (type == Ity_F128) { -+ IRTemp val = newTemp(Ity_F128); -+ assign(val, expr); -+ stmt(IRStmt_Put(offset, unop(Iop_F128HItoF64, mkexpr(val)))); -+ stmt(IRStmt_Put(offset + 8, unop(Iop_F128LOtoF64, mkexpr(val)))); -+ } else { -+ stmt(IRStmt_Put(offset, expr)); -+ } - } - - /* Read type sized part specified by index of a vr register. */ -@@ -2219,6 +2256,11 @@ static IRExpr * - get_vr(UInt archreg, IRType type, UChar index) - { - UInt offset = s390_vr_offset_by_index(archreg, type, index); -+ if (type == Ity_F128) { -+ return binop(Iop_F64HLtoF128, -+ IRExpr_Get(offset, Ity_F64), -+ IRExpr_Get(offset + 8, Ity_F64)); -+ } - return IRExpr_Get(offset, type); - } - -@@ -2294,11 +2336,11 @@ s390_getCountToBlockBoundary(IRTemp op2addr, UChar m) - return mkexpr(output); - } - --/* Load bytes into v1. -- maxIndex specifies max index to load and must be Ity_I32. -- If maxIndex >= 15, all 16 bytes are loaded. -- All bytes after maxIndex are zeroed. */ --static void s390_vr_loadWithLength(UChar v1, IRTemp addr, IRExpr *maxIndex) -+/* Starting from addr, load at most maxIndex + 1 bytes into v1. Fill the -+ leftmost or rightmost bytes of v1, depending on whether `rightmost' is set. -+ If maxIndex >= 15, load all 16 bytes; otherwise clear the remaining bytes. */ -+static void -+s390_vr_loadWithLength(UChar v1, IRTemp addr, IRExpr *maxIndex, Bool rightmost) - { - IRTemp maxIdx = newTemp(Ity_I32); - IRTemp cappedMax = newTemp(Ity_I64); -@@ -2311,8 +2353,8 @@ static void s390_vr_loadWithLength(UChar v1, IRTemp addr, IRExpr *maxIndex) - crossed if and only if the real insn would have crossed it as well. - Thus, if the bytes to load are fully contained in an aligned 16-byte - chunk, load the whole 16-byte aligned chunk, and otherwise load 16 bytes -- from the unaligned address. Then shift the loaded data left-aligned -- into the target vector register. */ -+ from the unaligned address. Then shift the loaded data left- or -+ right-aligned into the target vector register. */ - - assign(maxIdx, maxIndex); - assign(cappedMax, mkite(binop(Iop_CmpLT32U, mkexpr(maxIdx), mkU32(15)), -@@ -2325,20 +2367,60 @@ static void s390_vr_loadWithLength(UChar v1, IRTemp addr, IRExpr *maxIndex) - assign(back, mkite(binop(Iop_CmpLE64U, mkexpr(offset), mkexpr(zeroed)), - mkexpr(offset), mkU64(0))); - -- /* How much to shift the loaded 16-byte vector to the right, and then to -- the left. Since both 'zeroed' and 'back' range from 0 to 15, the shift -- amounts range from 0 to 120. */ -- IRExpr *shrAmount = binop(Iop_Shl64, -- binop(Iop_Sub64, mkexpr(zeroed), mkexpr(back)), -- mkU8(3)); -- IRExpr *shlAmount = binop(Iop_Shl64, mkexpr(zeroed), mkU8(3)); -+ IRExpr* chunk = load(Ity_V128, binop(Iop_Sub64, mkexpr(addr), mkexpr(back))); -+ -+ /* Shift the loaded 16-byte vector to the right, then to the left, or vice -+ versa, where each shift amount ranges from 0 to 120. */ -+ IRExpr* shift1; -+ IRExpr* shift2 = unop(Iop_64to8, binop(Iop_Shl64, mkexpr(zeroed), mkU8(3))); -+ -+ if (rightmost) { -+ shift1 = unop(Iop_64to8, binop(Iop_Shl64, mkexpr(back), mkU8(3))); -+ put_vr_qw(v1, binop(Iop_ShrV128, -+ binop(Iop_ShlV128, chunk, shift1), -+ shift2)); -+ } else { -+ shift1 = unop(Iop_64to8, -+ binop(Iop_Shl64, -+ binop(Iop_Sub64, mkexpr(zeroed), mkexpr(back)), -+ mkU8(3))); -+ put_vr_qw(v1, binop(Iop_ShlV128, -+ binop(Iop_ShrV128, chunk, shift1), -+ shift2)); -+ } -+} -+ -+/* Store at most maxIndex + 1 bytes from v1 to addr. Store the leftmost or -+ rightmost bytes of v1, depending on whether `rightmost' is set. If maxIndex -+ >= 15, store all 16 bytes. */ -+static void -+s390_vr_storeWithLength(UChar v1, IRTemp addr, IRExpr *maxIndex, Bool rightmost) -+{ -+ IRTemp maxIdx = newTemp(Ity_I32); -+ IRTemp cappedMax = newTemp(Ity_I64); -+ IRTemp counter = newTemp(Ity_I64); -+ IRExpr* offset; -+ -+ assign(maxIdx, maxIndex); -+ assign(cappedMax, mkite(binop(Iop_CmpLT32U, mkexpr(maxIdx), mkU32(15)), -+ unop(Iop_32Uto64, mkexpr(maxIdx)), mkU64(15))); -+ -+ assign(counter, get_counter_dw0()); -+ -+ if (rightmost) -+ offset = binop(Iop_Add64, -+ binop(Iop_Sub64, mkU64(15), mkexpr(cappedMax)), -+ mkexpr(counter)); -+ else -+ offset = mkexpr(counter); -+ -+ store(binop(Iop_Add64, mkexpr(addr), mkexpr(counter)), -+ binop(Iop_GetElem8x16, get_vr_qw(v1), unop(Iop_64to8, offset))); - -- put_vr_qw(v1, binop(Iop_ShlV128, -- binop(Iop_ShrV128, -- load(Ity_V128, -- binop(Iop_Sub64, mkexpr(addr), mkexpr(back))), -- unop(Iop_64to8, shrAmount)), -- unop(Iop_64to8, shlAmount))); -+ /* Check for end of field */ -+ put_counter_dw0(binop(Iop_Add64, mkexpr(counter), mkU64(1))); -+ iterate_if(binop(Iop_CmpNE64, mkexpr(counter), mkexpr(cappedMax))); -+ put_counter_dw0(mkU64(0)); - } - - /* Bitwise vCond ? v1 : v2 -@@ -3749,6 +3831,28 @@ s390_format_VRS_RRDVM(const HChar *(*irgen)(UChar r1, IRTemp op2addr, UChar v3, - s390_disasm(ENC5(MNM, GPR, UDXB, VR, UINT), mnm, r1, d2, 0, b2, v3, m4); - } - -+static void -+s390_format_VRS_RRDV(const HChar *(*irgen)(UChar v1, UChar r3, IRTemp op2addr), -+ UChar v1, UChar r3, UChar b2, UShort d2, UChar rxb) -+{ -+ const HChar *mnm; -+ IRTemp op2addr = newTemp(Ity_I64); -+ -+ if (! s390_host_has_vx) { -+ emulation_failure(EmFail_S390X_vx); -+ return; -+ } -+ -+ assign(op2addr, binop(Iop_Add64, mkU64(d2), b2 != 0 ? get_gpr_dw0(b2) : -+ mkU64(0))); -+ -+ v1 = s390_vr_getVRindex(v1, 4, rxb); -+ mnm = irgen(v1, r3, op2addr); -+ -+ if (UNLIKELY(vex_traceflags & VEX_TRACE_FE)) -+ s390_disasm(ENC4(MNM, VR, GPR, UDXB), mnm, v1, r3, d2, 0, b2); -+} -+ - - static void - s390_format_VRS_VRDVM(const HChar *(*irgen)(UChar v1, IRTemp op2addr, UChar v3, -@@ -4081,6 +4185,29 @@ s390_format_VRRa_VVVMMM(const HChar *(*irgen)(UChar v1, UChar v2, UChar v3, - mnm, v1, v2, v3, m4, m5, m6); - } - -+static void -+s390_format_VSI_URDV(const HChar *(*irgen)(UChar v1, IRTemp op2addr, UChar i3), -+ UChar v1, UChar b2, UChar d2, UChar i3, UChar rxb) -+{ -+ const HChar *mnm; -+ IRTemp op2addr = newTemp(Ity_I64); -+ -+ if (!s390_host_has_vx) { -+ emulation_failure(EmFail_S390X_vx); -+ return; -+ } -+ -+ v1 = s390_vr_getVRindex(v1, 4, rxb); -+ -+ assign(op2addr, binop(Iop_Add64, mkU64(d2), b2 != 0 ? get_gpr_dw0(b2) : -+ mkU64(0))); -+ -+ mnm = irgen(v1, op2addr, i3); -+ -+ if (vex_traceflags & VEX_TRACE_FE) -+ s390_disasm(ENC4(MNM, VR, UDXB, UINT), mnm, v1, d2, 0, b2, i3); -+} -+ - /*------------------------------------------------------------*/ - /*--- Build IR for opcodes ---*/ - /*------------------------------------------------------------*/ -@@ -16186,7 +16313,9 @@ s390_irgen_VGM(UChar v1, UShort i2, UChar m3) - static const HChar * - s390_irgen_VLLEZ(UChar v1, IRTemp op2addr, UChar m3) - { -- IRType type = s390_vr_get_type(m3); -+ s390_insn_assert("vllez", m3 <= 3 || m3 == 6); -+ -+ IRType type = s390_vr_get_type(m3 & 3); - IRExpr* op2 = load(type, mkexpr(op2addr)); - IRExpr* op2as64bit; - switch (type) { -@@ -16206,7 +16335,13 @@ s390_irgen_VLLEZ(UChar v1, IRTemp op2addr, UChar m3) - vpanic("s390_irgen_VLLEZ: unknown type"); - } - -- put_vr_dw0(v1, op2as64bit); -+ if (m3 == 6) { -+ /* left-aligned */ -+ put_vr_dw0(v1, binop(Iop_Shl64, op2as64bit, mkU8(32))); -+ } else { -+ /* right-aligned */ -+ put_vr_dw0(v1, op2as64bit); -+ } - put_vr_dw1(v1, mkU64(0)); - return "vllez"; - } -@@ -16615,7 +16750,7 @@ s390_irgen_VLBB(UChar v1, IRTemp addr, UChar m3) - s390_getCountToBlockBoundary(addr, m3), - mkU32(1)); - -- s390_vr_loadWithLength(v1, addr, maxIndex); -+ s390_vr_loadWithLength(v1, addr, maxIndex, False); - - return "vlbb"; - } -@@ -16623,41 +16758,50 @@ s390_irgen_VLBB(UChar v1, IRTemp addr, UChar m3) - static const HChar * - s390_irgen_VLL(UChar v1, IRTemp addr, UChar r3) - { -- s390_vr_loadWithLength(v1, addr, get_gpr_w1(r3)); -+ s390_vr_loadWithLength(v1, addr, get_gpr_w1(r3), False); - - return "vll"; - } - - static const HChar * --s390_irgen_VSTL(UChar v1, IRTemp addr, UChar r3) -+s390_irgen_VLRL(UChar v1, IRTemp addr, UChar i3) - { -- IRTemp counter = newTemp(Ity_I64); -- IRTemp maxIndexToStore = newTemp(Ity_I64); -- IRTemp gpr3 = newTemp(Ity_I64); -- -- assign(gpr3, unop(Iop_32Uto64, get_gpr_w1(r3))); -- assign(maxIndexToStore, mkite(binop(Iop_CmpLE64U, -- mkexpr(gpr3), -- mkU64(16) -- ), -- mkexpr(gpr3), -- mkU64(16) -- ) -- ); -+ s390_insn_assert("vlrl", (i3 & 0xf0) == 0); -+ s390_vr_loadWithLength(v1, addr, mkU32((UInt) i3), True); - -- assign(counter, get_counter_dw0()); -+ return "vlrl"; -+} - -- store(binop(Iop_Add64, mkexpr(addr), mkexpr(counter)), -- binop(Iop_GetElem8x16, get_vr_qw(v1), unop(Iop_64to8, mkexpr(counter)))); -+static const HChar * -+s390_irgen_VLRLR(UChar v1, UChar r3, IRTemp addr) -+{ -+ s390_vr_loadWithLength(v1, addr, get_gpr_w1(r3), True); - -- /* Check for end of field */ -- put_counter_dw0(binop(Iop_Add64, mkexpr(counter), mkU64(1))); -- iterate_if(binop(Iop_CmpNE64, mkexpr(counter), mkexpr(maxIndexToStore))); -- put_counter_dw0(mkU64(0)); -+ return "vlrlr"; -+} - -+static const HChar * -+s390_irgen_VSTL(UChar v1, IRTemp addr, UChar r3) -+{ -+ s390_vr_storeWithLength(v1, addr, get_gpr_w1(r3), False); - return "vstl"; - } - -+static const HChar * -+s390_irgen_VSTRL(UChar v1, IRTemp addr, UChar i3) -+{ -+ s390_insn_assert("vstrl", (i3 & 0xf0) == 0); -+ s390_vr_storeWithLength(v1, addr, mkU32((UInt) i3), True); -+ return "vstrl"; -+} -+ -+static const HChar * -+s390_irgen_VSTRLR(UChar v1, UChar r3, IRTemp addr) -+{ -+ s390_vr_storeWithLength(v1, addr, get_gpr_w1(r3), True); -+ return "vstrlr"; -+} -+ - static const HChar * - s390_irgen_VX(UChar v1, UChar v2, UChar v3) - { -@@ -16682,6 +16826,24 @@ s390_irgen_VO(UChar v1, UChar v2, UChar v3) - return "vo"; - } - -+static const HChar * -+s390_irgen_VOC(UChar v1, UChar v2, UChar v3) -+{ -+ put_vr_qw(v1, binop(Iop_OrV128, get_vr_qw(v2), -+ unop(Iop_NotV128, get_vr_qw(v3)))); -+ -+ return "voc"; -+} -+ -+static const HChar * -+s390_irgen_VNN(UChar v1, UChar v2, UChar v3) -+{ -+ put_vr_qw(v1, unop(Iop_NotV128, -+ binop(Iop_AndV128, get_vr_qw(v2), get_vr_qw(v3)))); -+ -+ return "vnn"; -+} -+ - static const HChar * - s390_irgen_VNO(UChar v1, UChar v2, UChar v3) - { -@@ -16691,6 +16853,15 @@ s390_irgen_VNO(UChar v1, UChar v2, UChar v3) - return "vno"; - } - -+static const HChar * -+s390_irgen_VNX(UChar v1, UChar v2, UChar v3) -+{ -+ put_vr_qw(v1, unop(Iop_NotV128, -+ binop(Iop_XorV128, get_vr_qw(v2), get_vr_qw(v3)))); -+ -+ return "vnx"; -+} -+ - static const HChar * - s390_irgen_LZRF(UChar r1, IRTemp op2addr) - { -@@ -17499,9 +17670,19 @@ s390_irgen_VCTZ(UChar v1, UChar v2, UChar m3) - static const HChar * - s390_irgen_VPOPCT(UChar v1, UChar v2, UChar m3) - { -- vassert(m3 == 0); -+ s390_insn_assert("vpopct", m3 <= 3); -+ -+ IRExpr* cnt = unop(Iop_Cnt8x16, get_vr_qw(v2)); - -- put_vr_qw(v1, unop(Iop_Cnt8x16, get_vr_qw(v2))); -+ if (m3 >= 1) { -+ cnt = unop(Iop_PwAddL8Ux16, cnt); -+ if (m3 >= 2) { -+ cnt = unop(Iop_PwAddL16Ux8, cnt); -+ if (m3 == 3) -+ cnt = unop(Iop_PwAddL32Ux4, cnt); -+ } -+ } -+ put_vr_qw(v1, cnt); - - return "vpopct"; - } -@@ -18335,12 +18516,53 @@ s390_irgen_VMALH(UChar v1, UChar v2, UChar v3, UChar v4, UChar m5) - return "vmalh"; - } - -+static const HChar * -+s390_irgen_VMSL(UChar v1, UChar v2, UChar v3, UChar v4, UChar m5, UChar m6) -+{ -+ s390_insn_assert("vmsl", m5 == 3 && (m6 & 3) == 0); -+ -+ IRDirty* d; -+ IRTemp cc = newTemp(Ity_I64); -+ -+ s390x_vec_op_details_t details = { .serialized = 0ULL }; -+ details.op = S390_VEC_OP_VMSL; -+ details.v1 = v1; -+ details.v2 = v2; -+ details.v3 = v3; -+ details.v4 = v4; -+ details.m4 = m5; -+ details.m5 = m6; -+ -+ d = unsafeIRDirty_1_N(cc, 0, "s390x_dirtyhelper_vec_op", -+ &s390x_dirtyhelper_vec_op, -+ mkIRExprVec_2(IRExpr_GSPTR(), -+ mkU64(details.serialized))); -+ -+ d->nFxState = 4; -+ vex_bzero(&d->fxState, sizeof(d->fxState)); -+ d->fxState[0].fx = Ifx_Read; -+ d->fxState[0].offset = S390X_GUEST_OFFSET(guest_v0) + v2 * sizeof(V128); -+ d->fxState[0].size = sizeof(V128); -+ d->fxState[1].fx = Ifx_Read; -+ d->fxState[1].offset = S390X_GUEST_OFFSET(guest_v0) + v3 * sizeof(V128); -+ d->fxState[1].size = sizeof(V128); -+ d->fxState[2].fx = Ifx_Read; -+ d->fxState[2].offset = S390X_GUEST_OFFSET(guest_v0) + v4 * sizeof(V128); -+ d->fxState[2].size = sizeof(V128); -+ d->fxState[3].fx = Ifx_Write; -+ d->fxState[3].offset = S390X_GUEST_OFFSET(guest_v0) + v1 * sizeof(V128); -+ d->fxState[3].size = sizeof(V128); -+ -+ stmt(IRStmt_Dirty(d)); -+ -+ return "vmsl"; -+} -+ - static void --s390_vector_fp_convert(IROp op, IRType fromType, IRType toType, -+s390_vector_fp_convert(IROp op, IRType fromType, IRType toType, Bool rounding, - UChar v1, UChar v2, UChar m3, UChar m4, UChar m5) - { - Bool isSingleElementOp = s390_vr_is_single_element_control_set(m4); -- UChar maxIndex = isSingleElementOp ? 0 : 1; - - /* For Iop_F32toF64 we do this: - f32[0] -> f64[0] -@@ -18353,14 +18575,21 @@ s390_vector_fp_convert(IROp op, IRType fromType, IRType toType, - The magic below with scaling factors is used to achieve the logic - described above. - */ -- const UChar sourceIndexScaleFactor = (op == Iop_F32toF64) ? 2 : 1; -- const UChar destinationIndexScaleFactor = (op == Iop_F64toF32) ? 2 : 1; -- -- const Bool isUnary = (op == Iop_F32toF64); -- for (UChar i = 0; i <= maxIndex; i++) { -+ Int size_diff = sizeofIRType(toType) - sizeofIRType(fromType); -+ const UChar sourceIndexScaleFactor = size_diff > 0 ? 2 : 1; -+ const UChar destinationIndexScaleFactor = size_diff < 0 ? 2 : 1; -+ UChar n_elem = (isSingleElementOp ? 1 : -+ 16 / (size_diff > 0 ? -+ sizeofIRType(toType) : sizeofIRType(fromType))); -+ -+ for (UChar i = 0; i < n_elem; i++) { - IRExpr* argument = get_vr(v2, fromType, i * sourceIndexScaleFactor); - IRExpr* result; -- if (!isUnary) { -+ if (rounding) { -+ if (!s390_host_has_fpext && m5 != S390_BFP_ROUND_PER_FPC) { -+ emulation_warning(EmWarn_S390X_fpext_rounding); -+ m5 = S390_BFP_ROUND_PER_FPC; -+ } - result = binop(op, - mkexpr(encode_bfp_rounding_mode(m5)), - argument); -@@ -18369,10 +18598,6 @@ s390_vector_fp_convert(IROp op, IRType fromType, IRType toType, - } - put_vr(v1, toType, i * destinationIndexScaleFactor, result); - } -- -- if (isSingleElementOp) { -- put_vr_dw1(v1, mkU64(0)); -- } - } - - static const HChar * -@@ -18380,12 +18605,8 @@ s390_irgen_VCDG(UChar v1, UChar v2, UChar m3, UChar m4, UChar m5) - { - s390_insn_assert("vcdg", m3 == 3); - -- if (!s390_host_has_fpext && m5 != S390_BFP_ROUND_PER_FPC) { -- emulation_warning(EmWarn_S390X_fpext_rounding); -- m5 = S390_BFP_ROUND_PER_FPC; -- } -- -- s390_vector_fp_convert(Iop_I64StoF64, Ity_I64, Ity_F64, v1, v2, m3, m4, m5); -+ s390_vector_fp_convert(Iop_I64StoF64, Ity_I64, Ity_F64, True, -+ v1, v2, m3, m4, m5); - - return "vcdg"; - } -@@ -18395,12 +18616,8 @@ s390_irgen_VCDLG(UChar v1, UChar v2, UChar m3, UChar m4, UChar m5) - { - s390_insn_assert("vcdlg", m3 == 3); - -- if (!s390_host_has_fpext && m5 != S390_BFP_ROUND_PER_FPC) { -- emulation_warning(EmWarn_S390X_fpext_rounding); -- m5 = S390_BFP_ROUND_PER_FPC; -- } -- -- s390_vector_fp_convert(Iop_I64UtoF64, Ity_I64, Ity_F64, v1, v2, m3, m4, m5); -+ s390_vector_fp_convert(Iop_I64UtoF64, Ity_I64, Ity_F64, True, -+ v1, v2, m3, m4, m5); - - return "vcdlg"; - } -@@ -18410,12 +18627,8 @@ s390_irgen_VCGD(UChar v1, UChar v2, UChar m3, UChar m4, UChar m5) - { - s390_insn_assert("vcgd", m3 == 3); - -- if (!s390_host_has_fpext && m5 != S390_BFP_ROUND_PER_FPC) { -- emulation_warning(EmWarn_S390X_fpext_rounding); -- m5 = S390_BFP_ROUND_PER_FPC; -- } -- -- s390_vector_fp_convert(Iop_F64toI64S, Ity_F64, Ity_I64, v1, v2, m3, m4, m5); -+ s390_vector_fp_convert(Iop_F64toI64S, Ity_F64, Ity_I64, True, -+ v1, v2, m3, m4, m5); - - return "vcgd"; - } -@@ -18425,12 +18638,8 @@ s390_irgen_VCLGD(UChar v1, UChar v2, UChar m3, UChar m4, UChar m5) - { - s390_insn_assert("vclgd", m3 == 3); - -- if (!s390_host_has_fpext && m5 != S390_BFP_ROUND_PER_FPC) { -- emulation_warning(EmWarn_S390X_fpext_rounding); -- m5 = S390_BFP_ROUND_PER_FPC; -- } -- -- s390_vector_fp_convert(Iop_F64toI64U, Ity_F64, Ity_I64, v1, v2, m3, m4, m5); -+ s390_vector_fp_convert(Iop_F64toI64U, Ity_F64, Ity_I64, True, -+ v1, v2, m3, m4, m5); - - return "vclgd"; - } -@@ -18438,246 +18647,262 @@ s390_irgen_VCLGD(UChar v1, UChar v2, UChar m3, UChar m4, UChar m5) - static const HChar * - s390_irgen_VFI(UChar v1, UChar v2, UChar m3, UChar m4, UChar m5) - { -- s390_insn_assert("vfi", m3 == 3); -+ s390_insn_assert("vfi", -+ (m3 == 3 || (s390_host_has_vxe && m3 >= 2 && m3 <= 4))); - -- if (!s390_host_has_fpext && m5 != S390_BFP_ROUND_PER_FPC) { -- emulation_warning(EmWarn_S390X_fpext_rounding); -- m5 = S390_BFP_ROUND_PER_FPC; -+ switch (m3) { -+ case 2: s390_vector_fp_convert(Iop_RoundF32toInt, Ity_F32, Ity_F32, True, -+ v1, v2, m3, m4, m5); break; -+ case 3: s390_vector_fp_convert(Iop_RoundF64toInt, Ity_F64, Ity_F64, True, -+ v1, v2, m3, m4, m5); break; -+ case 4: s390_vector_fp_convert(Iop_RoundF128toInt, Ity_F128, Ity_F128, True, -+ v1, v2, m3, m4, m5); break; - } - -- s390_vector_fp_convert(Iop_RoundF64toInt, Ity_F64, Ity_F64, -- v1, v2, m3, m4, m5); -- -- return "vcgld"; -+ return "vfi"; - } - - static const HChar * --s390_irgen_VLDE(UChar v1, UChar v2, UChar m3, UChar m4, UChar m5) -+s390_irgen_VFLL(UChar v1, UChar v2, UChar m3, UChar m4, UChar m5) - { -- s390_insn_assert("vlde", m3 == 2); -+ s390_insn_assert("vfll", m3 == 2 || (s390_host_has_vxe && m3 == 3)); - -- s390_vector_fp_convert(Iop_F32toF64, Ity_F32, Ity_F64, v1, v2, m3, m4, m5); -+ if (m3 == 2) -+ s390_vector_fp_convert(Iop_F32toF64, Ity_F32, Ity_F64, False, -+ v1, v2, m3, m4, m5); -+ else -+ s390_vector_fp_convert(Iop_F64toF128, Ity_F64, Ity_F128, False, -+ v1, v2, m3, m4, m5); - -- return "vlde"; -+ return "vfll"; - } - - static const HChar * --s390_irgen_VLED(UChar v1, UChar v2, UChar m3, UChar m4, UChar m5) -+s390_irgen_VFLR(UChar v1, UChar v2, UChar m3, UChar m4, UChar m5) - { -- s390_insn_assert("vled", m3 == 3); -- -- if (!s390_host_has_fpext && m5 != S390_BFP_ROUND_PER_FPC) { -- m5 = S390_BFP_ROUND_PER_FPC; -- } -+ s390_insn_assert("vflr", m3 == 3 || (s390_host_has_vxe && m3 == 2)); - -- s390_vector_fp_convert(Iop_F64toF32, Ity_F64, Ity_F32, v1, v2, m3, m4, m5); -+ if (m3 == 3) -+ s390_vector_fp_convert(Iop_F64toF32, Ity_F64, Ity_F32, True, -+ v1, v2, m3, m4, m5); -+ else -+ s390_vector_fp_convert(Iop_F128toF64, Ity_F128, Ity_F64, True, -+ v1, v2, m3, m4, m5); - -- return "vled"; -+ return "vflr"; - } - - static const HChar * - s390_irgen_VFPSO(UChar v1, UChar v2, UChar m3, UChar m4, UChar m5) - { -- s390_insn_assert("vfpso", m3 == 3); -- -- IRExpr* result; -- switch (m5) { -- case 0: { -- /* Invert sign */ -- if (!s390_vr_is_single_element_control_set(m4)) { -- result = unop(Iop_Neg64Fx2, get_vr_qw(v2)); -- } -- else { -- result = binop(Iop_64HLtoV128, -- unop(Iop_ReinterpF64asI64, -- unop(Iop_NegF64, get_vr(v2, Ity_F64, 0))), -- mkU64(0)); -- } -- break; -- } -+ s390_insn_assert("vfpso", m5 <= 2 && -+ (m3 == 3 || (s390_host_has_vxe && m3 >= 2 && m3 <= 4))); - -- case 1: { -- /* Set sign to negative */ -- IRExpr* highHalf = mkU64(0x8000000000000000ULL); -- if (!s390_vr_is_single_element_control_set(m4)) { -- IRExpr* lowHalf = highHalf; -- IRExpr* mask = binop(Iop_64HLtoV128, highHalf, lowHalf); -- result = binop(Iop_OrV128, get_vr_qw(v2), mask); -- } -- else { -- result = binop(Iop_64HLtoV128, -- binop(Iop_Or64, get_vr_dw0(v2), highHalf), -- mkU64(0ULL)); -- } -+ Bool single = s390_vr_is_single_element_control_set(m4) || m3 == 4; -+ IRType type = single ? s390_vr_get_ftype(m3) : Ity_V128; -+ int idx = 2 * (m3 - 2) + (single ? 0 : 1); - -- break; -- } -- -- case 2: { -- /* Set sign to positive */ -- if (!s390_vr_is_single_element_control_set(m4)) { -- result = unop(Iop_Abs64Fx2, get_vr_qw(v2)); -- } -- else { -- result = binop(Iop_64HLtoV128, -- unop(Iop_ReinterpF64asI64, -- unop(Iop_AbsF64, get_vr(v2, Ity_F64, 0))), -- mkU64(0)); -- } -- -- break; -- } -- -- default: -- vpanic("s390_irgen_VFPSO: Invalid m5 value"); -- } -+ static const IROp negate_ops[] = { -+ Iop_NegF32, Iop_Neg32Fx4, -+ Iop_NegF64, Iop_Neg64Fx2, -+ Iop_NegF128 -+ }; -+ static const IROp abs_ops[] = { -+ Iop_AbsF32, Iop_Abs32Fx4, -+ Iop_AbsF64, Iop_Abs64Fx2, -+ Iop_AbsF128 -+ }; - -- put_vr_qw(v1, result); -- if (s390_vr_is_single_element_control_set(m4)) { -- put_vr_dw1(v1, mkU64(0ULL)); -+ if (m5 == 1) { -+ /* Set sign to negative */ -+ put_vr(v1, type, 0, -+ unop(negate_ops[idx], -+ unop(abs_ops[idx], get_vr(v2, type, 0)))); -+ } else { -+ /* m5 == 0: invert sign; m5 == 2: set sign to positive */ -+ const IROp *ops = m5 == 2 ? abs_ops : negate_ops; -+ put_vr(v1, type, 0, unop(ops[idx], get_vr(v2, type, 0))); - } - - return "vfpso"; - } - --static void s390x_vec_fp_binary_op(IROp generalOp, IROp singleElementOp, -- UChar v1, UChar v2, UChar v3, UChar m4, -- UChar m5) -+static const HChar * -+s390x_vec_fp_binary_op(const HChar* mnm, const IROp ops[], -+ UChar v1, UChar v2, UChar v3, -+ UChar m4, UChar m5) - { -- IRExpr* result; -- if (!s390_vr_is_single_element_control_set(m5)) { -- result = triop(generalOp, get_bfp_rounding_mode_from_fpc(), -- get_vr_qw(v2), get_vr_qw(v3)); -+ s390_insn_assert(mnm, (m5 & 7) == 0 && -+ (m4 == 3 || (s390_host_has_vxe && m4 >= 2 && m4 <= 4))); -+ -+ int idx = 2 * (m4 - 2); -+ -+ if (m4 == 4 || s390_vr_is_single_element_control_set(m5)) { -+ IRType type = s390_vr_get_ftype(m4); -+ put_vr(v1, type, 0, -+ triop(ops[idx], get_bfp_rounding_mode_from_fpc(), -+ get_vr(v2, type, 0), get_vr(v3, type, 0))); - } else { -- IRExpr* highHalf = triop(singleElementOp, -- get_bfp_rounding_mode_from_fpc(), -- get_vr(v2, Ity_F64, 0), -- get_vr(v3, Ity_F64, 0)); -- result = binop(Iop_64HLtoV128, unop(Iop_ReinterpF64asI64, highHalf), -- mkU64(0ULL)); -+ put_vr_qw(v1, triop(ops[idx + 1], get_bfp_rounding_mode_from_fpc(), -+ get_vr_qw(v2), get_vr_qw(v3))); - } - -- put_vr_qw(v1, result); -+ return mnm; - } - --static void s390x_vec_fp_unary_op(IROp generalOp, IROp singleElementOp, -- UChar v1, UChar v2, UChar m3, UChar m4) -+static const HChar * -+s390x_vec_fp_unary_op(const HChar* mnm, const IROp ops[], -+ UChar v1, UChar v2, UChar m3, UChar m4) - { -- IRExpr* result; -- if (!s390_vr_is_single_element_control_set(m4)) { -- result = binop(generalOp, get_bfp_rounding_mode_from_fpc(), -- get_vr_qw(v2)); -+ s390_insn_assert(mnm, (m4 & 7) == 0 && -+ (m3 == 3 || (s390_host_has_vxe && m3 >= 2 && m3 <= 4))); -+ -+ int idx = 2 * (m3 - 2); -+ -+ if (m3 == 4 || s390_vr_is_single_element_control_set(m4)) { -+ IRType type = s390_vr_get_ftype(m3); -+ put_vr(v1, type, 0, -+ binop(ops[idx], get_bfp_rounding_mode_from_fpc(), -+ get_vr(v2, type, 0))); - } - else { -- IRExpr* highHalf = binop(singleElementOp, -- get_bfp_rounding_mode_from_fpc(), -- get_vr(v2, Ity_F64, 0)); -- result = binop(Iop_64HLtoV128, unop(Iop_ReinterpF64asI64, highHalf), -- mkU64(0ULL)); -+ put_vr_qw(v1, binop(ops[idx + 1], get_bfp_rounding_mode_from_fpc(), -+ get_vr_qw(v2))); - } - -- put_vr_qw(v1, result); -+ return mnm; - } - - --static void --s390_vector_fp_mulAddOrSub(IROp singleElementOp, -- UChar v1, UChar v2, UChar v3, UChar v4, -- UChar m5, UChar m6) -+static const HChar * -+s390_vector_fp_mulAddOrSub(UChar v1, UChar v2, UChar v3, UChar v4, -+ UChar m5, UChar m6, -+ const HChar* mnm, const IROp single_ops[], -+ Bool negate) - { -- Bool isSingleElementOp = s390_vr_is_single_element_control_set(m5); -+ s390_insn_assert(mnm, m6 == 3 || (s390_host_has_vxe && m6 >= 2 && m6 <= 4)); -+ -+ static const IROp negate_ops[] = { Iop_NegF32, Iop_NegF64, Iop_NegF128 }; -+ IRType type = s390_vr_get_ftype(m6); -+ Bool single = s390_vr_is_single_element_control_set(m5) || m6 == 4; -+ UChar n_elem = single ? 1 : s390_vr_get_n_elem(m6); - IRTemp irrm_temp = newTemp(Ity_I32); - assign(irrm_temp, get_bfp_rounding_mode_from_fpc()); - IRExpr* irrm = mkexpr(irrm_temp); -- IRExpr* result; -- IRExpr* highHalf = qop(singleElementOp, -- irrm, -- get_vr(v2, Ity_F64, 0), -- get_vr(v3, Ity_F64, 0), -- get_vr(v4, Ity_F64, 0)); -- -- if (isSingleElementOp) { -- result = binop(Iop_64HLtoV128, unop(Iop_ReinterpF64asI64, highHalf), -- mkU64(0ULL)); -- } else { -- IRExpr* lowHalf = qop(singleElementOp, -- irrm, -- get_vr(v2, Ity_F64, 1), -- get_vr(v3, Ity_F64, 1), -- get_vr(v4, Ity_F64, 1)); -- result = binop(Iop_64HLtoV128, unop(Iop_ReinterpF64asI64, highHalf), -- unop(Iop_ReinterpF64asI64, lowHalf)); -- } - -- put_vr_qw(v1, result); -+ for (UChar idx = 0; idx < n_elem; idx++) { -+ IRExpr* result = qop(single_ops[m6 - 2], -+ irrm, -+ get_vr(v2, type, idx), -+ get_vr(v3, type, idx), -+ get_vr(v4, type, idx)); -+ put_vr(v1, type, idx, negate ? unop(negate_ops[m6 - 2], result) : result); -+ } -+ return mnm; - } - - static const HChar * - s390_irgen_VFA(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5) - { -- s390_insn_assert("vfa", m4 == 3); -- s390x_vec_fp_binary_op(Iop_Add64Fx2, Iop_AddF64, v1, v2, v3, m4, m5); -- return "vfa"; -+ static const IROp vfa_ops[] = { -+ Iop_AddF32, Iop_Add32Fx4, -+ Iop_AddF64, Iop_Add64Fx2, -+ Iop_AddF128, -+ }; -+ return s390x_vec_fp_binary_op("vfa", vfa_ops, v1, v2, v3, m4, m5); - } - - static const HChar * - s390_irgen_VFS(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5) - { -- s390_insn_assert("vfs", m4 == 3); -- s390x_vec_fp_binary_op(Iop_Sub64Fx2, Iop_SubF64, v1, v2, v3, m4, m5); -- return "vfs"; -+ static const IROp vfs_ops[] = { -+ Iop_SubF32, Iop_Sub32Fx4, -+ Iop_SubF64, Iop_Sub64Fx2, -+ Iop_SubF128, -+ }; -+ return s390x_vec_fp_binary_op("vfs", vfs_ops, v1, v2, v3, m4, m5); - } - - static const HChar * - s390_irgen_VFM(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5) - { -- s390_insn_assert("vfm", m4 == 3); -- s390x_vec_fp_binary_op(Iop_Mul64Fx2, Iop_MulF64, v1, v2, v3, m4, m5); -- return "vfm"; -+ static const IROp vfm_ops[] = { -+ Iop_MulF32, Iop_Mul32Fx4, -+ Iop_MulF64, Iop_Mul64Fx2, -+ Iop_MulF128, -+ }; -+ return s390x_vec_fp_binary_op("vfm", vfm_ops, v1, v2, v3, m4, m5); - } - - static const HChar * - s390_irgen_VFD(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5) - { -- s390_insn_assert("vfd", m4 == 3); -- s390x_vec_fp_binary_op(Iop_Div64Fx2, Iop_DivF64, v1, v2, v3, m4, m5); -- return "vfd"; -+ static const IROp vfd_ops[] = { -+ Iop_DivF32, Iop_Div32Fx4, -+ Iop_DivF64, Iop_Div64Fx2, -+ Iop_DivF128, -+ }; -+ return s390x_vec_fp_binary_op("vfd", vfd_ops, v1, v2, v3, m4, m5); - } - - static const HChar * - s390_irgen_VFSQ(UChar v1, UChar v2, UChar m3, UChar m4) - { -- s390_insn_assert("vfsq", m3 == 3); -- s390x_vec_fp_unary_op(Iop_Sqrt64Fx2, Iop_SqrtF64, v1, v2, m3, m4); -- -- return "vfsq"; -+ static const IROp vfsq_ops[] = { -+ Iop_SqrtF32, Iop_Sqrt32Fx4, -+ Iop_SqrtF64, Iop_Sqrt64Fx2, -+ Iop_SqrtF128 -+ }; -+ return s390x_vec_fp_unary_op("vfsq", vfsq_ops, v1, v2, m3, m4); - } - -+static const IROp FMA_single_ops[] = { -+ Iop_MAddF32, Iop_MAddF64, Iop_MAddF128 -+}; -+ - static const HChar * - s390_irgen_VFMA(UChar v1, UChar v2, UChar v3, UChar v4, UChar m5, UChar m6) - { -- s390_insn_assert("vfma", m6 == 3); -- s390_vector_fp_mulAddOrSub(Iop_MAddF64, v1, v2, v3, v4, m5, m6); -- return "vfma"; -+ return s390_vector_fp_mulAddOrSub(v1, v2, v3, v4, m5, m6, -+ "vfma", FMA_single_ops, False); - } - -+static const HChar * -+s390_irgen_VFNMA(UChar v1, UChar v2, UChar v3, UChar v4, UChar m5, UChar m6) -+{ -+ return s390_vector_fp_mulAddOrSub(v1, v2, v3, v4, m5, m6, -+ "vfnma", FMA_single_ops, True); -+} -+ -+static const IROp FMS_single_ops[] = { -+ Iop_MSubF32, Iop_MSubF64, Iop_MSubF128 -+}; -+ - static const HChar * - s390_irgen_VFMS(UChar v1, UChar v2, UChar v3, UChar v4, UChar m5, UChar m6) - { -- s390_insn_assert("vfms", m6 == 3); -- s390_vector_fp_mulAddOrSub(Iop_MSubF64, v1, v2, v3, v4, m5, m6); -- return "vfms"; -+ return s390_vector_fp_mulAddOrSub(v1, v2, v3, v4, m5, m6, -+ "vfms", FMS_single_ops, False); -+} -+ -+static const HChar * -+s390_irgen_VFNMS(UChar v1, UChar v2, UChar v3, UChar v4, UChar m5, UChar m6) -+{ -+ return s390_vector_fp_mulAddOrSub(v1, v2, v3, v4, m5, m6, -+ "vfnms", FMS_single_ops, True); - } - - static const HChar * - s390_irgen_WFC(UChar v1, UChar v2, UChar m3, UChar m4) - { -- s390_insn_assert("wfc", m3 == 3); -- s390_insn_assert("wfc", m4 == 0); -+ s390_insn_assert("wfc", m4 == 0 && -+ (m3 == 3 || (s390_host_has_vxe && m3 >= 2 && m3 <= 4))); -+ -+ static const IROp ops[] = { Iop_CmpF32, Iop_CmpF64, Iop_CmpF128 }; -+ IRType type = s390_vr_get_ftype(m3); - - IRTemp cc_vex = newTemp(Ity_I32); -- assign(cc_vex, binop(Iop_CmpF64, -- get_vr(v1, Ity_F64, 0), get_vr(v2, Ity_F64, 0))); -+ assign(cc_vex, binop(ops[m3 - 2], get_vr(v1, type, 0), get_vr(v2, type, 0))); - - IRTemp cc_s390 = newTemp(Ity_I32); - assign(cc_s390, convert_vex_bfpcc_to_s390(cc_vex)); -@@ -18695,213 +18920,253 @@ s390_irgen_WFK(UChar v1, UChar v2, UChar m3, UChar m4) - } - - static const HChar * --s390_irgen_VFCE(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5, UChar m6) -+s390_irgen_VFCx(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5, UChar m6, -+ const HChar *mnem, IRCmpFResult cmp, Bool equal_ok, -+ IROp cmp32, IROp cmp64) - { -- s390_insn_assert("vfce", m4 == 3); -+ s390_insn_assert(mnem, (m5 & 3) == 0 && (m6 & 14) == 0 && -+ (m4 == 3 || (s390_host_has_vxe && m4 >= 2 && m4 <= 4))); - -- Bool isSingleElementOp = s390_vr_is_single_element_control_set(m5); -- if (!s390_vr_is_cs_set(m6)) { -- if (!isSingleElementOp) { -- put_vr_qw(v1, binop(Iop_CmpEQ64Fx2, get_vr_qw(v2), get_vr_qw(v3))); -+ Bool single = s390_vr_is_single_element_control_set(m5) || m4 == 4; -+ -+ if (single) { -+ static const IROp ops[] = { Iop_CmpF32, Iop_CmpF64, Iop_CmpF128 }; -+ IRType type = s390_vr_get_ftype(m4); -+ IRTemp result = newTemp(Ity_I32); -+ IRTemp cond = newTemp(Ity_I1); -+ -+ assign(result, binop(ops[m4 - 2], -+ get_vr(v2, type, 0), get_vr(v3, type, 0))); -+ if (equal_ok) { -+ assign(cond, -+ binop(Iop_Or1, -+ binop(Iop_CmpEQ32, mkexpr(result), mkU32(cmp)), -+ binop(Iop_CmpEQ32, mkexpr(result), mkU32(Ircr_EQ)))); - } else { -- IRExpr* comparisonResult = binop(Iop_CmpF64, get_vr(v2, Ity_F64, 0), -- get_vr(v3, Ity_F64, 0)); -- IRExpr* result = mkite(binop(Iop_CmpEQ32, comparisonResult, -- mkU32(Ircr_EQ)), -- mkU64(0xffffffffffffffffULL), -- mkU64(0ULL)); -- put_vr_qw(v1, binop(Iop_64HLtoV128, result, mkU64(0ULL))); -+ assign(cond, binop(Iop_CmpEQ32, mkexpr(result), mkU32(cmp))); -+ } -+ put_vr_qw(v1, mkite(mkexpr(cond), -+ IRExpr_Const(IRConst_V128(0xffff)), -+ IRExpr_Const(IRConst_V128(0)))); -+ if (s390_vr_is_cs_set(m6)) { -+ IRTemp cc = newTemp(Ity_I64); -+ assign(cc, mkite(mkexpr(cond), mkU64(0), mkU64(3))); -+ s390_cc_set(cc); - } - } else { -- IRDirty* d; -- IRTemp cc = newTemp(Ity_I64); -- -- s390x_vec_op_details_t details = { .serialized = 0ULL }; -- details.op = S390_VEC_OP_VFCE; -- details.v1 = v1; -- details.v2 = v2; -- details.v3 = v3; -- details.m4 = m4; -- details.m5 = m5; -- details.m6 = m6; -+ IRTemp result = newTemp(Ity_V128); -+ -+ assign(result, binop(m4 == 2 ? cmp32 : cmp64, -+ get_vr_qw(v2), get_vr_qw(v3))); -+ put_vr_qw(v1, mkexpr(result)); -+ if (s390_vr_is_cs_set(m6)) { -+ IRTemp cc = newTemp(Ity_I64); -+ assign(cc, -+ mkite(binop(Iop_CmpEQ64, -+ binop(Iop_And64, -+ unop(Iop_V128to64, mkexpr(result)), -+ unop(Iop_V128HIto64, mkexpr(result))), -+ mkU64(-1ULL)), -+ mkU64(0), /* all comparison results are true */ -+ mkite(binop(Iop_CmpEQ64, -+ binop(Iop_Or64, -+ unop(Iop_V128to64, mkexpr(result)), -+ unop(Iop_V128HIto64, mkexpr(result))), -+ mkU64(0)), -+ mkU64(3), /* all false */ -+ mkU64(1)))); /* mixed true/false */ -+ s390_cc_set(cc); -+ } -+ } - -- d = unsafeIRDirty_1_N(cc, 0, "s390x_dirtyhelper_vec_op", -- &s390x_dirtyhelper_vec_op, -- mkIRExprVec_2(IRExpr_GSPTR(), -- mkU64(details.serialized))); -+ return mnem; -+} - -- const UChar elementSize = isSingleElementOp ? sizeof(ULong) : sizeof(V128); -- d->nFxState = 3; -- vex_bzero(&d->fxState, sizeof(d->fxState)); -- d->fxState[0].fx = Ifx_Read; -- d->fxState[0].offset = S390X_GUEST_OFFSET(guest_v0) + v2 * sizeof(V128); -- d->fxState[0].size = elementSize; -- d->fxState[1].fx = Ifx_Read; -- d->fxState[1].offset = S390X_GUEST_OFFSET(guest_v0) + v3 * sizeof(V128); -- d->fxState[1].size = elementSize; -- d->fxState[2].fx = Ifx_Write; -- d->fxState[2].offset = S390X_GUEST_OFFSET(guest_v0) + v1 * sizeof(V128); -- d->fxState[2].size = sizeof(V128); -+static const HChar * -+s390_irgen_VFCE(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5, UChar m6) -+{ -+ return s390_irgen_VFCx(v1, v2, v3, m4, m5, m6, "vfce", Ircr_EQ, -+ False, Iop_CmpEQ32Fx4, Iop_CmpEQ64Fx2); -+} - -- stmt(IRStmt_Dirty(d)); -- s390_cc_set(cc); -- } -+static const HChar * -+s390_irgen_VFCH(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5, UChar m6) -+{ -+ /* Swap arguments and compare "low" instead. */ -+ return s390_irgen_VFCx(v1, v3, v2, m4, m5, m6, "vfch", Ircr_LT, -+ False, Iop_CmpLT32Fx4, Iop_CmpLT64Fx2); -+} - -- return "vfce"; -+static const HChar * -+s390_irgen_VFCHE(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5, UChar m6) -+{ -+ /* Swap arguments and compare "low or equal" instead. */ -+ return s390_irgen_VFCx(v1, v3, v2, m4, m5, m6, "vfche", Ircr_LT, -+ True, Iop_CmpLE32Fx4, Iop_CmpLE64Fx2); - } - - static const HChar * --s390_irgen_VFCH(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5, UChar m6) -+s390_irgen_VFTCI(UChar v1, UChar v2, UShort i3, UChar m4, UChar m5) - { -- vassert(m4 == 3); -+ s390_insn_assert("vftci", -+ (m4 == 3 || (s390_host_has_vxe && m4 >= 2 && m4 <= 4))); - - Bool isSingleElementOp = s390_vr_is_single_element_control_set(m5); -- if (!s390_vr_is_cs_set(m6)) { -- if (!isSingleElementOp) { -- put_vr_qw(v1, binop(Iop_CmpLE64Fx2, get_vr_qw(v3), get_vr_qw(v2))); -- } else { -- IRExpr* comparisonResult = binop(Iop_CmpF64, get_vr(v2, Ity_F64, 0), -- get_vr(v3, Ity_F64, 0)); -- IRExpr* result = mkite(binop(Iop_CmpEQ32, comparisonResult, -- mkU32(Ircr_GT)), -- mkU64(0xffffffffffffffffULL), -- mkU64(0ULL)); -- put_vr_qw(v1, binop(Iop_64HLtoV128, result, mkU64(0ULL))); -- } -- } -- else { -- IRDirty* d; -- IRTemp cc = newTemp(Ity_I64); - -- s390x_vec_op_details_t details = { .serialized = 0ULL }; -- details.op = S390_VEC_OP_VFCH; -- details.v1 = v1; -- details.v2 = v2; -- details.v3 = v3; -- details.m4 = m4; -- details.m5 = m5; -- details.m6 = m6; -+ IRDirty* d; -+ IRTemp cc = newTemp(Ity_I64); - -- d = unsafeIRDirty_1_N(cc, 0, "s390x_dirtyhelper_vec_op", -- &s390x_dirtyhelper_vec_op, -- mkIRExprVec_2(IRExpr_GSPTR(), -- mkU64(details.serialized))); -+ s390x_vec_op_details_t details = { .serialized = 0ULL }; -+ details.op = S390_VEC_OP_VFTCI; -+ details.v1 = v1; -+ details.v2 = v2; -+ details.i3 = i3; -+ details.m4 = m4; -+ details.m5 = m5; - -- const UChar elementSize = isSingleElementOp ? sizeof(ULong) : sizeof(V128); -- d->nFxState = 3; -- vex_bzero(&d->fxState, sizeof(d->fxState)); -- d->fxState[0].fx = Ifx_Read; -- d->fxState[0].offset = S390X_GUEST_OFFSET(guest_v0) + v2 * sizeof(V128); -- d->fxState[0].size = elementSize; -- d->fxState[1].fx = Ifx_Read; -- d->fxState[1].offset = S390X_GUEST_OFFSET(guest_v0) + v3 * sizeof(V128); -- d->fxState[1].size = elementSize; -- d->fxState[2].fx = Ifx_Write; -- d->fxState[2].offset = S390X_GUEST_OFFSET(guest_v0) + v1 * sizeof(V128); -- d->fxState[2].size = sizeof(V128); -+ d = unsafeIRDirty_1_N(cc, 0, "s390x_dirtyhelper_vec_op", -+ &s390x_dirtyhelper_vec_op, -+ mkIRExprVec_2(IRExpr_GSPTR(), -+ mkU64(details.serialized))); - -- stmt(IRStmt_Dirty(d)); -- s390_cc_set(cc); -- } -+ const UChar elementSize = isSingleElementOp ? -+ sizeofIRType(s390_vr_get_ftype(m4)) : sizeof(V128); -+ d->nFxState = 2; -+ vex_bzero(&d->fxState, sizeof(d->fxState)); -+ d->fxState[0].fx = Ifx_Read; -+ d->fxState[0].offset = S390X_GUEST_OFFSET(guest_v0) + v2 * sizeof(V128); -+ d->fxState[0].size = elementSize; -+ d->fxState[1].fx = Ifx_Write; -+ d->fxState[1].offset = S390X_GUEST_OFFSET(guest_v0) + v1 * sizeof(V128); -+ d->fxState[1].size = sizeof(V128); -+ -+ stmt(IRStmt_Dirty(d)); -+ s390_cc_set(cc); - -- return "vfch"; -+ return "vftci"; - } - - static const HChar * --s390_irgen_VFCHE(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5, UChar m6) -+s390_irgen_VFMIN(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5, UChar m6) - { -- s390_insn_assert("vfche", m4 == 3); -+ s390_insn_assert("vfmin", -+ (m4 == 3 || (s390_host_has_vxe && m4 >= 2 && m4 <= 4))); - - Bool isSingleElementOp = s390_vr_is_single_element_control_set(m5); -- if (!s390_vr_is_cs_set(m6)) { -- if (!isSingleElementOp) { -- put_vr_qw(v1, binop(Iop_CmpLT64Fx2, get_vr_qw(v3), get_vr_qw(v2))); -- } -- else { -- IRExpr* comparisonResult = binop(Iop_CmpF64, get_vr(v3, Ity_F64, 0), -- get_vr(v2, Ity_F64, 0)); -- IRExpr* result = mkite(binop(Iop_CmpEQ32, comparisonResult, -- mkU32(Ircr_LT)), -- mkU64(0xffffffffffffffffULL), -- mkU64(0ULL)); -- put_vr_qw(v1, binop(Iop_64HLtoV128, result, mkU64(0ULL))); -- } -- } -- else { -- IRDirty* d; -- IRTemp cc = newTemp(Ity_I64); -- -- s390x_vec_op_details_t details = { .serialized = 0ULL }; -- details.op = S390_VEC_OP_VFCHE; -- details.v1 = v1; -- details.v2 = v2; -- details.v3 = v3; -- details.m4 = m4; -- details.m5 = m5; -- details.m6 = m6; -+ IRDirty* d; -+ IRTemp cc = newTemp(Ity_I64); - -- d = unsafeIRDirty_1_N(cc, 0, "s390x_dirtyhelper_vec_op", -- &s390x_dirtyhelper_vec_op, -- mkIRExprVec_2(IRExpr_GSPTR(), -- mkU64(details.serialized))); -+ s390x_vec_op_details_t details = { .serialized = 0ULL }; -+ details.op = S390_VEC_OP_VFMIN; -+ details.v1 = v1; -+ details.v2 = v2; -+ details.v3 = v3; -+ details.m4 = m4; -+ details.m5 = m5; -+ details.m6 = m6; - -- const UChar elementSize = isSingleElementOp ? sizeof(ULong) : sizeof(V128); -- d->nFxState = 3; -- vex_bzero(&d->fxState, sizeof(d->fxState)); -- d->fxState[0].fx = Ifx_Read; -- d->fxState[0].offset = S390X_GUEST_OFFSET(guest_v0) + v2 * sizeof(V128); -- d->fxState[0].size = elementSize; -- d->fxState[1].fx = Ifx_Read; -- d->fxState[1].offset = S390X_GUEST_OFFSET(guest_v0) + v3 * sizeof(V128); -- d->fxState[1].size = elementSize; -- d->fxState[2].fx = Ifx_Write; -- d->fxState[2].offset = S390X_GUEST_OFFSET(guest_v0) + v1 * sizeof(V128); -- d->fxState[2].size = sizeof(V128); -+ d = unsafeIRDirty_1_N(cc, 0, "s390x_dirtyhelper_vec_op", -+ &s390x_dirtyhelper_vec_op, -+ mkIRExprVec_2(IRExpr_GSPTR(), -+ mkU64(details.serialized))); - -- stmt(IRStmt_Dirty(d)); -- s390_cc_set(cc); -- } -+ const UChar elementSize = isSingleElementOp ? -+ sizeofIRType(s390_vr_get_ftype(m4)) : sizeof(V128); -+ d->nFxState = 3; -+ vex_bzero(&d->fxState, sizeof(d->fxState)); -+ d->fxState[0].fx = Ifx_Read; -+ d->fxState[0].offset = S390X_GUEST_OFFSET(guest_v0) + v2 * sizeof(V128); -+ d->fxState[0].size = elementSize; -+ d->fxState[1].fx = Ifx_Read; -+ d->fxState[1].offset = S390X_GUEST_OFFSET(guest_v0) + v3 * sizeof(V128); -+ d->fxState[1].size = elementSize; -+ d->fxState[2].fx = Ifx_Write; -+ d->fxState[2].offset = S390X_GUEST_OFFSET(guest_v0) + v1 * sizeof(V128); -+ d->fxState[2].size = sizeof(V128); - -- return "vfche"; -+ stmt(IRStmt_Dirty(d)); -+ s390_cc_set(cc); -+ return "vfmin"; - } - - static const HChar * --s390_irgen_VFTCI(UChar v1, UChar v2, UShort i3, UChar m4, UChar m5) -+s390_irgen_VFMAX(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5, UChar m6) - { -- s390_insn_assert("vftci", m4 == 3); -+ s390_insn_assert("vfmax", -+ (m4 == 3 || (s390_host_has_vxe && m4 >= 2 && m4 <= 4))); - - Bool isSingleElementOp = s390_vr_is_single_element_control_set(m5); -- - IRDirty* d; - IRTemp cc = newTemp(Ity_I64); - - s390x_vec_op_details_t details = { .serialized = 0ULL }; -- details.op = S390_VEC_OP_VFTCI; -+ details.op = S390_VEC_OP_VFMAX; - details.v1 = v1; - details.v2 = v2; -- details.i3 = i3; -+ details.v3 = v3; - details.m4 = m4; - details.m5 = m5; -+ details.m6 = m6; - - d = unsafeIRDirty_1_N(cc, 0, "s390x_dirtyhelper_vec_op", - &s390x_dirtyhelper_vec_op, - mkIRExprVec_2(IRExpr_GSPTR(), - mkU64(details.serialized))); - -- const UChar elementSize = isSingleElementOp ? sizeof(ULong) : sizeof(V128); -- d->nFxState = 2; -+ const UChar elementSize = isSingleElementOp ? -+ sizeofIRType(s390_vr_get_ftype(m4)) : sizeof(V128); -+ d->nFxState = 3; - vex_bzero(&d->fxState, sizeof(d->fxState)); - d->fxState[0].fx = Ifx_Read; - d->fxState[0].offset = S390X_GUEST_OFFSET(guest_v0) + v2 * sizeof(V128); - d->fxState[0].size = elementSize; -- d->fxState[1].fx = Ifx_Write; -- d->fxState[1].offset = S390X_GUEST_OFFSET(guest_v0) + v1 * sizeof(V128); -- d->fxState[1].size = sizeof(V128); -+ d->fxState[1].fx = Ifx_Read; -+ d->fxState[1].offset = S390X_GUEST_OFFSET(guest_v0) + v3 * sizeof(V128); -+ d->fxState[1].size = elementSize; -+ d->fxState[2].fx = Ifx_Write; -+ d->fxState[2].offset = S390X_GUEST_OFFSET(guest_v0) + v1 * sizeof(V128); -+ d->fxState[2].size = sizeof(V128); - - stmt(IRStmt_Dirty(d)); - s390_cc_set(cc); -+ return "vfmax"; -+} - -- return "vftci"; -+static const HChar * -+s390_irgen_VBPERM(UChar v1, UChar v2, UChar v3) -+{ -+ IRDirty* d; -+ IRTemp cc = newTemp(Ity_I64); -+ -+ s390x_vec_op_details_t details = { .serialized = 0ULL }; -+ details.op = S390_VEC_OP_VBPERM; -+ details.v1 = v1; -+ details.v2 = v2; -+ details.v3 = v3; -+ details.m4 = 0; -+ details.m5 = 0; -+ details.m6 = 0; -+ -+ d = unsafeIRDirty_1_N(cc, 0, "s390x_dirtyhelper_vec_op", -+ &s390x_dirtyhelper_vec_op, -+ mkIRExprVec_2(IRExpr_GSPTR(), -+ mkU64(details.serialized))); -+ -+ d->nFxState = 3; -+ vex_bzero(&d->fxState, sizeof(d->fxState)); -+ d->fxState[0].fx = Ifx_Read; -+ d->fxState[0].offset = S390X_GUEST_OFFSET(guest_v0) + v2 * sizeof(V128); -+ d->fxState[0].size = sizeof(V128); -+ d->fxState[1].fx = Ifx_Read; -+ d->fxState[1].offset = S390X_GUEST_OFFSET(guest_v0) + v3 * sizeof(V128); -+ d->fxState[1].size = sizeof(V128); -+ d->fxState[2].fx = Ifx_Write; -+ d->fxState[2].offset = S390X_GUEST_OFFSET(guest_v0) + v1 * sizeof(V128); -+ d->fxState[2].size = sizeof(V128); -+ -+ stmt(IRStmt_Dirty(d)); -+ s390_cc_set(cc); -+ return "vbperm"; - } - - /* New insns are added here. -@@ -20489,11 +20754,23 @@ s390_decode_6byte_and_irgen(const UChar *bytes) - RXY_dl2(ovl), - RXY_dh2(ovl)); goto ok; - case 0xe60000000034ULL: /* VPKZ */ goto unimplemented; -- case 0xe60000000035ULL: /* VLRL */ goto unimplemented; -- case 0xe60000000037ULL: /* VLRLR */ goto unimplemented; -+ case 0xe60000000035ULL: s390_format_VSI_URDV(s390_irgen_VLRL, VSI_v1(ovl), -+ VSI_b2(ovl), VSI_d2(ovl), -+ VSI_i3(ovl), -+ VSI_rxb(ovl)); goto ok; -+ case 0xe60000000037ULL: s390_format_VRS_RRDV(s390_irgen_VLRLR, VRSd_v1(ovl), -+ VRSd_r3(ovl), VRS_b2(ovl), -+ VRS_d2(ovl), -+ VRS_rxb(ovl)); goto ok; - case 0xe6000000003cULL: /* VUPKZ */ goto unimplemented; -- case 0xe6000000003dULL: /* VSTRL */ goto unimplemented; -- case 0xe6000000003fULL: /* VSTRLR */ goto unimplemented; -+ case 0xe6000000003dULL: s390_format_VSI_URDV(s390_irgen_VSTRL, VSI_v1(ovl), -+ VSI_b2(ovl), VSI_d2(ovl), -+ VSI_i3(ovl), -+ VSI_rxb(ovl)); goto ok; -+ case 0xe6000000003fULL: s390_format_VRS_RRDV(s390_irgen_VSTRLR, VRSd_v1(ovl), -+ VRSd_r3(ovl), VRS_b2(ovl), -+ VRS_d2(ovl), -+ VRS_rxb(ovl)); goto ok; - case 0xe60000000049ULL: /* VLIP */ goto unimplemented; - case 0xe60000000050ULL: /* VCVB */ goto unimplemented; - case 0xe60000000052ULL: /* VCVBG */ goto unimplemented; -@@ -20691,12 +20968,18 @@ s390_decode_6byte_and_irgen(const UChar *bytes) - case 0xe7000000006bULL: s390_format_VRR_VVV(s390_irgen_VNO, VRR_v1(ovl), - VRR_v2(ovl), VRR_r3(ovl), - VRR_rxb(ovl)); goto ok; -- case 0xe7000000006cULL: /* VNX */ goto unimplemented; -+ case 0xe7000000006cULL: s390_format_VRR_VVV(s390_irgen_VNX, VRR_v1(ovl), -+ VRR_v2(ovl), VRR_r3(ovl), -+ VRR_rxb(ovl)); goto ok; - case 0xe7000000006dULL: s390_format_VRR_VVV(s390_irgen_VX, VRR_v1(ovl), - VRR_v2(ovl), VRR_r3(ovl), - VRR_rxb(ovl)); goto ok; -- case 0xe7000000006eULL: /* VNN */ goto unimplemented; -- case 0xe7000000006fULL: /* VOC */ goto unimplemented; -+ case 0xe7000000006eULL: s390_format_VRR_VVV(s390_irgen_VNN, VRR_v1(ovl), -+ VRR_v2(ovl), VRR_r3(ovl), -+ VRR_rxb(ovl)); goto ok; -+ case 0xe7000000006fULL: s390_format_VRR_VVV(s390_irgen_VOC, VRR_v1(ovl), -+ VRR_v2(ovl), VRR_r3(ovl), -+ VRR_rxb(ovl)); goto ok; - case 0xe70000000070ULL: s390_format_VRR_VVVM(s390_irgen_VESLV, VRR_v1(ovl), - VRR_v2(ovl), VRR_r3(ovl), - VRR_m4(ovl), VRR_rxb(ovl)); goto ok; -@@ -20749,7 +21032,9 @@ s390_decode_6byte_and_irgen(const UChar *bytes) - case 0xe70000000084ULL: s390_format_VRR_VVVM(s390_irgen_VPDI, VRR_v1(ovl), - VRR_v2(ovl), VRR_r3(ovl), - VRR_m4(ovl), VRR_rxb(ovl)); goto ok; -- case 0xe70000000085ULL: /* VBPERM */ goto unimplemented; -+ case 0xe70000000085ULL: s390_format_VRR_VVV(s390_irgen_VBPERM, VRR_v1(ovl), -+ VRR_v2(ovl), VRR_r3(ovl), -+ VRR_rxb(ovl)); goto ok; - case 0xe7000000008aULL: s390_format_VRR_VVVVMM(s390_irgen_VSTRC, VRRd_v1(ovl), - VRRd_v2(ovl), VRRd_v3(ovl), - VRRd_v4(ovl), VRRd_m5(ovl), -@@ -20780,8 +21065,16 @@ s390_decode_6byte_and_irgen(const UChar *bytes) - case 0xe70000000097ULL: s390_format_VRR_VVVMM(s390_irgen_VPKS, VRR_v1(ovl), - VRR_v2(ovl), VRR_r3(ovl), - VRR_m4(ovl), VRR_m5(ovl), VRR_rxb(ovl)); goto ok; -- case 0xe7000000009eULL: /* VFNMS */ goto unimplemented; -- case 0xe7000000009fULL: /* VFNMA */ goto unimplemented; -+ case 0xe7000000009eULL: s390_format_VRR_VVVVMM(s390_irgen_VFNMS, VRRe_v1(ovl), -+ VRRe_v2(ovl), VRRe_v3(ovl), -+ VRRe_v4(ovl), VRRe_m5(ovl), -+ VRRe_m6(ovl), -+ VRRe_rxb(ovl)); goto ok; -+ case 0xe7000000009fULL: s390_format_VRR_VVVVMM(s390_irgen_VFNMA, VRRe_v1(ovl), -+ VRRe_v2(ovl), VRRe_v3(ovl), -+ VRRe_v4(ovl), VRRe_m5(ovl), -+ VRRe_m6(ovl), -+ VRRe_rxb(ovl)); goto ok; - case 0xe700000000a1ULL: s390_format_VRR_VVVM(s390_irgen_VMLH, VRR_v1(ovl), - VRR_v2(ovl), VRR_r3(ovl), - VRR_m4(ovl), VRR_rxb(ovl)); goto ok; -@@ -20834,7 +21127,11 @@ s390_decode_6byte_and_irgen(const UChar *bytes) - case 0xe700000000b4ULL: s390_format_VRR_VVVM(s390_irgen_VGFM, VRR_v1(ovl), - VRR_v2(ovl), VRR_r3(ovl), - VRR_m4(ovl), VRR_rxb(ovl)); goto ok; -- case 0xe700000000b8ULL: /* VMSL */ goto unimplemented; -+ case 0xe700000000b8ULL: s390_format_VRR_VVVVMM(s390_irgen_VMSL, VRRd_v1(ovl), -+ VRRd_v2(ovl), VRRd_v3(ovl), -+ VRRd_v4(ovl), VRRd_m5(ovl), -+ VRRd_m6(ovl), -+ VRRd_rxb(ovl)); goto ok; - case 0xe700000000b9ULL: s390_format_VRRd_VVVVM(s390_irgen_VACCC, VRRd_v1(ovl), - VRRd_v2(ovl), VRRd_v3(ovl), - VRRd_v4(ovl), VRRd_m5(ovl), -@@ -20871,11 +21168,11 @@ s390_decode_6byte_and_irgen(const UChar *bytes) - VRRa_v2(ovl), VRRa_m3(ovl), - VRRa_m4(ovl), VRRa_m5(ovl), - VRRa_rxb(ovl)); goto ok; -- case 0xe700000000c4ULL: s390_format_VRRa_VVMMM(s390_irgen_VLDE, VRRa_v1(ovl), -+ case 0xe700000000c4ULL: s390_format_VRRa_VVMMM(s390_irgen_VFLL, VRRa_v1(ovl), - VRRa_v2(ovl), VRRa_m3(ovl), - VRRa_m4(ovl), VRRa_m5(ovl), - VRRa_rxb(ovl)); goto ok; -- case 0xe700000000c5ULL: s390_format_VRRa_VVMMM(s390_irgen_VLED, VRRa_v1(ovl), -+ case 0xe700000000c5ULL: s390_format_VRRa_VVMMM(s390_irgen_VFLR, VRRa_v1(ovl), - VRRa_v2(ovl), VRRa_m3(ovl), - VRRa_m4(ovl), VRRa_m5(ovl), - VRRa_rxb(ovl)); goto ok; -@@ -20956,8 +21253,16 @@ s390_decode_6byte_and_irgen(const UChar *bytes) - VRRa_m3(ovl), VRRa_m4(ovl), - VRRa_m5(ovl), - VRRa_rxb(ovl)); goto ok; -- case 0xe700000000eeULL: /* VFMIN */ goto unimplemented; -- case 0xe700000000efULL: /* VFMAX */ goto unimplemented; -+ case 0xe700000000eeULL: s390_format_VRRa_VVVMMM(s390_irgen_VFMIN, VRRa_v1(ovl), -+ VRRa_v2(ovl), VRRa_v3(ovl), -+ VRRa_m3(ovl), VRRa_m4(ovl), -+ VRRa_m5(ovl), -+ VRRa_rxb(ovl)); goto ok; -+ case 0xe700000000efULL: s390_format_VRRa_VVVMMM(s390_irgen_VFMAX, VRRa_v1(ovl), -+ VRRa_v2(ovl), VRRa_v3(ovl), -+ VRRa_m3(ovl), VRRa_m4(ovl), -+ VRRa_m5(ovl), -+ VRRa_rxb(ovl)); goto ok; - case 0xe700000000f0ULL: s390_format_VRR_VVVM(s390_irgen_VAVGL, VRR_v1(ovl), - VRR_v2(ovl), VRR_r3(ovl), - VRR_m4(ovl), VRR_rxb(ovl)); goto ok; -diff --git a/VEX/priv/host_s390_defs.c b/VEX/priv/host_s390_defs.c -index 3b6121fec..8762975b2 100644 ---- a/VEX/priv/host_s390_defs.c -+++ b/VEX/priv/host_s390_defs.c -@@ -8,7 +8,7 @@ - This file is part of Valgrind, a dynamic binary instrumentation - framework. - -- Copyright IBM Corp. 2010-2017 -+ Copyright IBM Corp. 2010-2020 - Copyright (C) 2012-2017 Florian Krohm (britzel@acm.org) - - This program is free software; you can redistribute it and/or -@@ -684,6 +684,8 @@ s390_insn* genMove_S390(HReg from, HReg to, Bool mode64) - switch (hregClass(from)) { - case HRcInt64: - return s390_insn_move(sizeofIRType(Ity_I64), to, from); -+ case HRcFlt64: -+ return s390_insn_move(sizeofIRType(Ity_F64), to, from); - case HRcVec128: - return s390_insn_move(sizeofIRType(Ity_V128), to, from); - default: -@@ -7870,6 +7872,10 @@ s390_insn_as_string(const s390_insn *insn) - op = "v-vfloatabs"; - break; - -+ case S390_VEC_FLOAT_NABS: -+ op = "v-vfloatnabs"; -+ break; -+ - default: - goto fail; - } -@@ -9439,21 +9445,28 @@ s390_insn_unop_emit(UChar *buf, const s390_insn *insn) - - case S390_VEC_FLOAT_NEG: { - vassert(insn->variant.unop.src.tag == S390_OPND_REG); -- vassert(insn->size == 8); -+ vassert(insn->size >= 4); - UChar v1 = hregNumber(insn->variant.unop.dst); - UChar v2 = hregNumber(insn->variant.unop.src.variant.reg); - return s390_emit_VFPSO(buf, v1, v2, s390_getM_from_size(insn->size), 0, 0); - } - case S390_VEC_FLOAT_ABS: { - vassert(insn->variant.unop.src.tag == S390_OPND_REG); -- vassert(insn->size == 8); -+ vassert(insn->size >= 4); - UChar v1 = hregNumber(insn->variant.unop.dst); - UChar v2 = hregNumber(insn->variant.unop.src.variant.reg); - return s390_emit_VFPSO(buf, v1, v2, s390_getM_from_size(insn->size), 0, 2); - } -+ case S390_VEC_FLOAT_NABS: { -+ vassert(insn->variant.unop.src.tag == S390_OPND_REG); -+ vassert(insn->size >= 4); -+ UChar v1 = hregNumber(insn->variant.unop.dst); -+ UChar v2 = hregNumber(insn->variant.unop.src.variant.reg); -+ return s390_emit_VFPSO(buf, v1, v2, s390_getM_from_size(insn->size), 0, 1); -+ } - case S390_VEC_FLOAT_SQRT: { - vassert(insn->variant.unop.src.tag == S390_OPND_REG); -- vassert(insn->size == 8); -+ vassert(insn->size >= 4); - UChar v1 = hregNumber(insn->variant.unop.dst); - UChar v2 = hregNumber(insn->variant.unop.src.variant.reg); - return s390_emit_VFSQ(buf, v1, v2, s390_getM_from_size(insn->size), 0); -diff --git a/VEX/priv/host_s390_defs.h b/VEX/priv/host_s390_defs.h -index 3f6473e10..9b69f4d38 100644 ---- a/VEX/priv/host_s390_defs.h -+++ b/VEX/priv/host_s390_defs.h -@@ -8,7 +8,7 @@ - This file is part of Valgrind, a dynamic binary instrumentation - framework. - -- Copyright IBM Corp. 2010-2017 -+ Copyright IBM Corp. 2010-2020 - - This program is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as -@@ -205,6 +205,7 @@ typedef enum { - S390_VEC_COUNT_ONES, - S390_VEC_FLOAT_NEG, - S390_VEC_FLOAT_ABS, -+ S390_VEC_FLOAT_NABS, - S390_VEC_FLOAT_SQRT, - S390_UNOP_T_INVALID - } s390_unop_t; -@@ -931,6 +932,8 @@ extern UInt s390_host_hwcaps; - (s390_host_hwcaps & (VEX_HWCAPS_S390X_MSA5)) - #define s390_host_has_lsc2 \ - (s390_host_hwcaps & (VEX_HWCAPS_S390X_LSC2)) -+#define s390_host_has_vxe \ -+ (s390_host_hwcaps & (VEX_HWCAPS_S390X_VXE)) - #endif /* ndef __VEX_HOST_S390_DEFS_H */ - - /*---------------------------------------------------------------*/ -diff --git a/VEX/priv/host_s390_isel.c b/VEX/priv/host_s390_isel.c -index 134f3eb6f..2f9854038 100644 ---- a/VEX/priv/host_s390_isel.c -+++ b/VEX/priv/host_s390_isel.c -@@ -8,7 +8,7 @@ - This file is part of Valgrind, a dynamic binary instrumentation - framework. - -- Copyright IBM Corp. 2010-2017 -+ Copyright IBM Corp. 2010-2020 - Copyright (C) 2012-2017 Florian Krohm (britzel@acm.org) - - This program is free software; you can redistribute it and/or -@@ -2362,9 +2362,10 @@ s390_isel_float128_expr_wrk(HReg *dst_hi, HReg *dst_lo, ISelEnv *env, - case Iop_NegF128: - if (left->tag == Iex_Unop && - (left->Iex.Unop.op == Iop_AbsF32 || -- left->Iex.Unop.op == Iop_AbsF64)) -+ left->Iex.Unop.op == Iop_AbsF64)) { - bfpop = S390_BFP_NABS; -- else -+ left = left->Iex.Unop.arg; -+ } else - bfpop = S390_BFP_NEG; - goto float128_opnd; - case Iop_AbsF128: bfpop = S390_BFP_ABS; goto float128_opnd; -@@ -2726,9 +2727,10 @@ s390_isel_float_expr_wrk(ISelEnv *env, IRExpr *expr) - case Iop_NegF64: - if (left->tag == Iex_Unop && - (left->Iex.Unop.op == Iop_AbsF32 || -- left->Iex.Unop.op == Iop_AbsF64)) -+ left->Iex.Unop.op == Iop_AbsF64)) { - bfpop = S390_BFP_NABS; -- else -+ left = left->Iex.Unop.arg; -+ } else - bfpop = S390_BFP_NEG; - break; - -@@ -3944,11 +3946,27 @@ s390_isel_vec_expr_wrk(ISelEnv *env, IRExpr *expr) - vec_unop = S390_VEC_COUNT_ONES; - goto Iop_V_wrk; - -+ case Iop_Neg32Fx4: -+ size = 4; -+ vec_unop = S390_VEC_FLOAT_NEG; -+ if (arg->tag == Iex_Unop && arg->Iex.Unop.op == Iop_Abs32Fx4) { -+ vec_unop = S390_VEC_FLOAT_NABS; -+ arg = arg->Iex.Unop.arg; -+ } -+ goto Iop_V_wrk; - case Iop_Neg64Fx2: - size = 8; - vec_unop = S390_VEC_FLOAT_NEG; -+ if (arg->tag == Iex_Unop && arg->Iex.Unop.op == Iop_Abs64Fx2) { -+ vec_unop = S390_VEC_FLOAT_NABS; -+ arg = arg->Iex.Unop.arg; -+ } - goto Iop_V_wrk; - -+ case Iop_Abs32Fx4: -+ size = 4; -+ vec_unop = S390_VEC_FLOAT_ABS; -+ goto Iop_V_wrk; - case Iop_Abs64Fx2: - size = 8; - vec_unop = S390_VEC_FLOAT_ABS; -@@ -4474,17 +4492,29 @@ s390_isel_vec_expr_wrk(ISelEnv *env, IRExpr *expr) - vec_binop = S390_VEC_ELEM_ROLL_V; - goto Iop_VV_wrk; - -+ case Iop_CmpEQ32Fx4: -+ size = 4; -+ vec_binop = S390_VEC_FLOAT_COMPARE_EQUAL; -+ goto Iop_VV_wrk; - case Iop_CmpEQ64Fx2: - size = 8; - vec_binop = S390_VEC_FLOAT_COMPARE_EQUAL; - goto Iop_VV_wrk; - -+ case Iop_CmpLE32Fx4: -+ size = 4; -+ vec_binop = S390_VEC_FLOAT_COMPARE_LESS_OR_EQUAL; -+ goto Iop_VV_wrk; - case Iop_CmpLE64Fx2: { - size = 8; - vec_binop = S390_VEC_FLOAT_COMPARE_LESS_OR_EQUAL; - goto Iop_VV_wrk; - } - -+ case Iop_CmpLT32Fx4: -+ size = 4; -+ vec_binop = S390_VEC_FLOAT_COMPARE_LESS; -+ goto Iop_VV_wrk; - case Iop_CmpLT64Fx2: { - size = 8; - vec_binop = S390_VEC_FLOAT_COMPARE_LESS; -@@ -4671,20 +4701,41 @@ s390_isel_vec_expr_wrk(ISelEnv *env, IRExpr *expr) - dst, reg1, reg2, reg3)); - return dst; - -+ case Iop_Add32Fx4: -+ size = 4; -+ vec_binop = S390_VEC_FLOAT_ADD; -+ goto Iop_irrm_VV_wrk; -+ - case Iop_Add64Fx2: - size = 8; - vec_binop = S390_VEC_FLOAT_ADD; - goto Iop_irrm_VV_wrk; - -+ case Iop_Sub32Fx4: -+ size = 4; -+ vec_binop = S390_VEC_FLOAT_SUB; -+ goto Iop_irrm_VV_wrk; -+ - case Iop_Sub64Fx2: - size = 8; - vec_binop = S390_VEC_FLOAT_SUB; - goto Iop_irrm_VV_wrk; - -+ case Iop_Mul32Fx4: -+ size = 4; -+ vec_binop = S390_VEC_FLOAT_MUL; -+ goto Iop_irrm_VV_wrk; -+ - case Iop_Mul64Fx2: - size = 8; - vec_binop = S390_VEC_FLOAT_MUL; - goto Iop_irrm_VV_wrk; -+ -+ case Iop_Div32Fx4: -+ size = 4; -+ vec_binop = S390_VEC_FLOAT_DIV; -+ goto Iop_irrm_VV_wrk; -+ - case Iop_Div64Fx2: - size = 8; - vec_binop = S390_VEC_FLOAT_DIV; -diff --git a/VEX/priv/main_main.c b/VEX/priv/main_main.c -index 72f419988..12f521d8c 100644 ---- a/VEX/priv/main_main.c -+++ b/VEX/priv/main_main.c -@@ -1795,6 +1795,7 @@ static const HChar* show_hwcaps_s390x ( UInt hwcaps ) - { VEX_HWCAPS_S390X_MSA5, "msa5" }, - { VEX_HWCAPS_S390X_MI2, "mi2" }, - { VEX_HWCAPS_S390X_LSC2, "lsc2" }, -+ { VEX_HWCAPS_S390X_LSC2, "vxe" }, - }; - /* Allocate a large enough buffer */ - static HChar buf[sizeof prefix + -diff --git a/VEX/pub/libvex.h b/VEX/pub/libvex.h -index 53e3705da..2ffed0ad0 100644 ---- a/VEX/pub/libvex.h -+++ b/VEX/pub/libvex.h -@@ -171,7 +171,7 @@ typedef - #define VEX_HWCAPS_S390X_MSA5 (1<<19) /* message security assistance facility */ - #define VEX_HWCAPS_S390X_MI2 (1<<20) /* miscellaneous-instruction-extensions facility 2 */ - #define VEX_HWCAPS_S390X_LSC2 (1<<21) /* Conditional load/store facility2 */ -- -+#define VEX_HWCAPS_S390X_VXE (1<<22) /* Vector-enhancements facility */ - - /* Special value representing all available s390x hwcaps */ - #define VEX_HWCAPS_S390X_ALL (VEX_HWCAPS_S390X_LDISP | \ -@@ -189,7 +189,8 @@ typedef - VEX_HWCAPS_S390X_VX | \ - VEX_HWCAPS_S390X_MSA5 | \ - VEX_HWCAPS_S390X_MI2 | \ -- VEX_HWCAPS_S390X_LSC2) -+ VEX_HWCAPS_S390X_LSC2 | \ -+ VEX_HWCAPS_S390X_VXE) - - #define VEX_HWCAPS_S390X(x) ((x) & ~VEX_S390X_MODEL_MASK) - #define VEX_S390X_MODEL(x) ((x) & VEX_S390X_MODEL_MASK) -diff --git a/VEX/pub/libvex_emnote.h b/VEX/pub/libvex_emnote.h -index be033b4da..77880a270 100644 ---- a/VEX/pub/libvex_emnote.h -+++ b/VEX/pub/libvex_emnote.h -@@ -124,6 +124,10 @@ typedef - /* ppno insn is not supported on this host */ - EmFail_S390X_ppno, - -+ /* insn needs vector-enhancements facility which is not available on this -+ host */ -+ EmFail_S390X_vxe, -+ - EmNote_NUMBER - } - VexEmNote; -diff --git a/coregrind/m_initimg/initimg-linux.c b/coregrind/m_initimg/initimg-linux.c -index 365942c4f..ba84fa6e9 100644 ---- a/coregrind/m_initimg/initimg-linux.c -+++ b/coregrind/m_initimg/initimg-linux.c -@@ -697,9 +697,13 @@ Addr setup_client_stack( void* init_sp, - } - # elif defined(VGP_s390x_linux) - { -- /* Advertise hardware features "below" TE and VXRS. TE itself -- and anything above VXRS is not supported by Valgrind. */ -- auxv->u.a_val &= (VKI_HWCAP_S390_TE - 1) | VKI_HWCAP_S390_VXRS; -+ /* Out of the hardware features available on the platform, -+ advertise those "below" TE, as well as the ones explicitly -+ ORed in the expression below. Anything else, such as TE -+ itself, is not supported by Valgrind. */ -+ auxv->u.a_val &= ((VKI_HWCAP_S390_TE - 1) -+ | VKI_HWCAP_S390_VXRS -+ | VKI_HWCAP_S390_VXRS_EXT); - } - # elif defined(VGP_arm64_linux) - { -diff --git a/coregrind/m_machine.c b/coregrind/m_machine.c -index e7877e636..228ae2554 100644 ---- a/coregrind/m_machine.c -+++ b/coregrind/m_machine.c -@@ -1555,6 +1555,7 @@ Bool VG_(machine_get_hwcaps)( void ) - { False, S390_FAC_MSA5, VEX_HWCAPS_S390X_MSA5, "MSA5" }, - { False, S390_FAC_MI2, VEX_HWCAPS_S390X_MI2, "MI2" }, - { False, S390_FAC_LSC2, VEX_HWCAPS_S390X_LSC2, "LSC2" }, -+ { False, S390_FAC_VXE, VEX_HWCAPS_S390X_VXE, "VXE" }, - }; - - /* Set hwcaps according to the detected facilities */ -diff --git a/include/vki/vki-s390x-linux.h b/include/vki/vki-s390x-linux.h -index 7b863a324..4ab2d3334 100644 ---- a/include/vki/vki-s390x-linux.h -+++ b/include/vki/vki-s390x-linux.h -@@ -806,6 +806,7 @@ typedef vki_s390_regs vki_elf_gregset_t; - - #define VKI_HWCAP_S390_TE 1024 - #define VKI_HWCAP_S390_VXRS 2048 -+#define VKI_HWCAP_S390_VXRS_EXT 8192 - - - //---------------------------------------------------------------------- -diff --git a/none/tests/s390x/vector.h b/none/tests/s390x/vector.h -index de2391480..632c2cb9c 100644 ---- a/none/tests/s390x/vector.h -+++ b/none/tests/s390x/vector.h -@@ -86,6 +86,13 @@ void print_hex(const V128 value) { - printf("%016lx | %016lx\n", value.u64[0], value.u64[1]); - } - -+void print_hex64(const V128 value, int zero_only) { -+ if (zero_only) -+ printf("%016lx | --\n", value.u64[0]); -+ else -+ printf("%016lx | %016lx\n", value.u64[0], value.u64[1]); -+} -+ - void print_f32(const V128 value, int even_only, int zero_only) { - if (zero_only) - printf("%a | -- | -- | --\n", value.f32[0]); -@@ -222,8 +229,10 @@ static void test_##insn##_selective(const s390x_test_usageInfo info) \ - {printf(" v_arg2 = "); print_hex(v_arg2);} \ - if (info & V128_V_ARG3_AS_INT) \ - {printf(" v_arg3 = "); print_hex(v_arg3);} \ -- if (info & V128_V_RES_AS_INT) \ -- {printf(" v_result = "); print_hex(v_result);} \ -+ if (info & V128_V_RES_AS_INT) { \ -+ printf(" v_result = "); \ -+ print_hex64(v_result, info & V128_V_RES_ZERO_ONLY); \ -+ } \ - \ - if (info & V128_V_ARG1_AS_FLOAT64) \ - {printf(" v_arg1 = "); print_f64(v_arg1, 0);} \ -diff --git a/none/tests/s390x/vector_float.c b/none/tests/s390x/vector_float.c -index 52f3a296f..20853f381 100644 ---- a/none/tests/s390x/vector_float.c -+++ b/none/tests/s390x/vector_float.c -@@ -114,50 +114,59 @@ int main() - test_with_selective_printing(vldeb, (V128_V_RES_AS_FLOAT64 | - V128_V_ARG1_AS_FLOAT64)); - test_with_selective_printing(wldeb, (V128_V_RES_AS_FLOAT64 | -- V128_V_ARG1_AS_FLOAT64)); -+ V128_V_ARG1_AS_FLOAT64 | -+ V128_V_RES_ZERO_ONLY)); - - test_with_selective_printing(vflcdb, (V128_V_RES_AS_FLOAT64 | - V128_V_ARG1_AS_FLOAT64)); - test_with_selective_printing(wflcdb, (V128_V_RES_AS_FLOAT64 | -- V128_V_ARG1_AS_FLOAT64)); -+ V128_V_ARG1_AS_FLOAT64 | -+ V128_V_RES_ZERO_ONLY)); - test_with_selective_printing(vflndb, (V128_V_RES_AS_FLOAT64 | - V128_V_ARG1_AS_FLOAT64)); - test_with_selective_printing(wflndb, (V128_V_RES_AS_FLOAT64 | -- V128_V_ARG1_AS_FLOAT64)); -+ V128_V_ARG1_AS_FLOAT64 | -+ V128_V_RES_ZERO_ONLY)); - test_with_selective_printing(vflpdb, (V128_V_RES_AS_FLOAT64 | - V128_V_ARG1_AS_FLOAT64)); - test_with_selective_printing(wflpdb, (V128_V_RES_AS_FLOAT64 | -- V128_V_ARG1_AS_FLOAT64)); -+ V128_V_ARG1_AS_FLOAT64 | -+ V128_V_RES_ZERO_ONLY)); - - test_with_selective_printing(vfadb, (V128_V_RES_AS_FLOAT64 | - V128_V_ARG1_AS_FLOAT64 | - V128_V_ARG2_AS_FLOAT64)); - test_with_selective_printing(wfadb, (V128_V_RES_AS_FLOAT64 | - V128_V_ARG1_AS_FLOAT64 | -- V128_V_ARG2_AS_FLOAT64)); -+ V128_V_ARG2_AS_FLOAT64 | -+ V128_V_RES_ZERO_ONLY)); - test_with_selective_printing(vfsdb, (V128_V_RES_AS_FLOAT64 | - V128_V_ARG1_AS_FLOAT64 | - V128_V_ARG2_AS_FLOAT64)); - test_with_selective_printing(wfsdb, (V128_V_RES_AS_FLOAT64 | - V128_V_ARG1_AS_FLOAT64 | -- V128_V_ARG2_AS_FLOAT64)); -+ V128_V_ARG2_AS_FLOAT64 | -+ V128_V_RES_ZERO_ONLY)); - test_with_selective_printing(vfmdb, (V128_V_RES_AS_FLOAT64 | - V128_V_ARG1_AS_FLOAT64 | - V128_V_ARG2_AS_FLOAT64)); - test_with_selective_printing(wfmdb, (V128_V_RES_AS_FLOAT64 | - V128_V_ARG1_AS_FLOAT64 | -- V128_V_ARG2_AS_FLOAT64)); -+ V128_V_ARG2_AS_FLOAT64 | -+ V128_V_RES_ZERO_ONLY)); - test_with_selective_printing(vfddb, (V128_V_RES_AS_FLOAT64 | - V128_V_ARG1_AS_FLOAT64 | - V128_V_ARG2_AS_FLOAT64)); - test_with_selective_printing(wfddb, (V128_V_RES_AS_FLOAT64 | - V128_V_ARG1_AS_FLOAT64 | -- V128_V_ARG2_AS_FLOAT64)); -+ V128_V_ARG2_AS_FLOAT64 | -+ V128_V_RES_ZERO_ONLY)); - - test_with_selective_printing(vfsqdb, (V128_V_RES_AS_FLOAT64 | - V128_V_ARG1_AS_FLOAT64)); - test_with_selective_printing(wfsqdb, (V128_V_RES_AS_FLOAT64 | -- V128_V_ARG1_AS_FLOAT64)); -+ V128_V_ARG1_AS_FLOAT64 | -+ V128_V_RES_ZERO_ONLY)); - - test_with_selective_printing(vfmadb, (V128_V_RES_AS_FLOAT64 | - V128_V_ARG1_AS_FLOAT64 | -@@ -166,7 +175,8 @@ int main() - test_with_selective_printing(wfmadb, (V128_V_RES_AS_FLOAT64 | - V128_V_ARG1_AS_FLOAT64 | - V128_V_ARG2_AS_FLOAT64 | -- V128_V_ARG3_AS_FLOAT64)); -+ V128_V_ARG3_AS_FLOAT64 | -+ V128_V_RES_ZERO_ONLY)); - test_with_selective_printing(vfmsdb, (V128_V_RES_AS_FLOAT64 | - V128_V_ARG1_AS_FLOAT64 | - V128_V_ARG2_AS_FLOAT64 | -@@ -174,21 +184,25 @@ int main() - test_with_selective_printing(wfmsdb, (V128_V_RES_AS_FLOAT64 | - V128_V_ARG1_AS_FLOAT64 | - V128_V_ARG2_AS_FLOAT64 | -- V128_V_ARG3_AS_FLOAT64)); -+ V128_V_ARG3_AS_FLOAT64 | -+ V128_V_RES_ZERO_ONLY)); - - test_with_selective_printing(wfcdb, (V128_V_ARG1_AS_FLOAT64 | - V128_V_ARG2_AS_FLOAT64 | -- V128_R_RES)); -+ V128_R_RES | -+ V128_V_RES_ZERO_ONLY)); - test_with_selective_printing(wfkdb, (V128_V_ARG1_AS_FLOAT64 | - V128_V_ARG2_AS_FLOAT64 | -- V128_R_RES)); -+ V128_R_RES | -+ V128_V_RES_ZERO_ONLY)); - - test_with_selective_printing(vfcedb, (V128_V_RES_AS_INT | - V128_V_ARG1_AS_FLOAT64 | - V128_V_ARG2_AS_FLOAT64)); - test_with_selective_printing(wfcedb, (V128_V_RES_AS_INT | - V128_V_ARG1_AS_FLOAT64 | -- V128_V_ARG2_AS_FLOAT64)); -+ V128_V_ARG2_AS_FLOAT64 | -+ V128_V_RES_ZERO_ONLY)); - test_with_selective_printing(vfcedbs, (V128_V_RES_AS_INT | - V128_V_ARG1_AS_FLOAT64 | - V128_V_ARG2_AS_FLOAT64 | -@@ -196,14 +210,16 @@ int main() - test_with_selective_printing(wfcedbs, (V128_V_RES_AS_INT | - V128_V_ARG1_AS_FLOAT64 | - V128_V_ARG2_AS_FLOAT64 | -- V128_R_RES)); -+ V128_R_RES | -+ V128_V_RES_ZERO_ONLY)); - - test_with_selective_printing(vfchdb, (V128_V_RES_AS_INT | - V128_V_ARG1_AS_FLOAT64 | - V128_V_ARG2_AS_FLOAT64)); - test_with_selective_printing(wfchdb, (V128_V_RES_AS_INT | - V128_V_ARG1_AS_FLOAT64 | -- V128_V_ARG2_AS_FLOAT64)); -+ V128_V_ARG2_AS_FLOAT64 | -+ V128_V_RES_ZERO_ONLY)); - test_with_selective_printing(vfchdbs, (V128_V_RES_AS_INT | - V128_V_ARG1_AS_FLOAT64 | - V128_V_ARG2_AS_FLOAT64 | -@@ -211,14 +227,16 @@ int main() - test_with_selective_printing(wfchdbs, (V128_V_RES_AS_INT | - V128_V_ARG1_AS_FLOAT64 | - V128_V_ARG2_AS_FLOAT64 | -- V128_R_RES)); -+ V128_R_RES | -+ V128_V_RES_ZERO_ONLY)); - - test_with_selective_printing(vfchedb, (V128_V_RES_AS_INT | - V128_V_ARG1_AS_FLOAT64 | - V128_V_ARG2_AS_FLOAT64)); - test_with_selective_printing(wfchedb, (V128_V_RES_AS_INT | - V128_V_ARG1_AS_FLOAT64 | -- V128_V_ARG2_AS_FLOAT64)); -+ V128_V_ARG2_AS_FLOAT64 | -+ V128_V_RES_ZERO_ONLY)); - test_with_selective_printing(vfchedbs, (V128_V_RES_AS_INT | - V128_V_ARG1_AS_FLOAT64 | - V128_V_ARG2_AS_FLOAT64 | -@@ -226,7 +244,8 @@ int main() - test_with_selective_printing(wfchedbs, (V128_V_RES_AS_INT | - V128_V_ARG1_AS_FLOAT64 | - V128_V_ARG2_AS_FLOAT64 | -- V128_R_RES)); -+ V128_R_RES | -+ V128_V_RES_ZERO_ONLY)); - - test_with_selective_printing(vftcidb0, (V128_V_RES_AS_INT | - V128_V_ARG1_AS_FLOAT64 | -diff --git a/none/tests/s390x/vector_float.stdout.exp b/none/tests/s390x/vector_float.stdout.exp -index eac525041..a330ac832 100644 ---- a/none/tests/s390x/vector_float.stdout.exp -+++ b/none/tests/s390x/vector_float.stdout.exp -@@ -419,88 +419,88 @@ insn vcgdb07: - v_result = 7fffffffffffffff | 7fffffffffffffff - v_arg1 = 0x1.fed2f087c21p+341 | 0x1.180e4c1d87fc4p+682 - insn wcgdb00: -- v_result = 7fffffffffffffff | 0000000000000000 -+ v_result = 7fffffffffffffff | -- - v_arg1 = 0x1.d7fd9222e8b86p+670 | 0x1.c272612672a3p+798 - insn wcgdb00: -- v_result = 0000000000000000 | 0000000000000000 -+ v_result = 0000000000000000 | -- - v_arg1 = 0x1.745cd360987e5p-496 | -0x1.f3b404919f358p-321 - insn wcgdb00: -- v_result = 8000000000000000 | 0000000000000000 -+ v_result = 8000000000000000 | -- - v_arg1 = -0x1.9523565cd92d5p+643 | 0x1.253677d6d3be2p-556 - insn wcgdb00: -- v_result = 7fffffffffffffff | 0000000000000000 -+ v_result = 7fffffffffffffff | -- - v_arg1 = 0x1.b6eb576ec3e6ap+845 | -0x1.c7e102c503d91p+266 - insn wcgdb01: -- v_result = 0000000000000000 | 0000000000000000 -+ v_result = 0000000000000000 | -- - v_arg1 = -0x1.3d4319841f4d6p-1011 | -0x1.2feabf7dfc506p-680 - insn wcgdb01: -- v_result = 0000000000000000 | 0000000000000000 -+ v_result = 0000000000000000 | -- - v_arg1 = -0x1.6fb8d1cd8b32cp-843 | -0x1.50f6a6922f97ep+33 - insn wcgdb01: -- v_result = 0000000000000000 | 0000000000000000 -+ v_result = 0000000000000000 | -- - v_arg1 = -0x1.64a673daccf1ap-566 | -0x1.69ef9b1d01499p+824 - insn wcgdb01: -- v_result = 8000000000000000 | 0000000000000000 -+ v_result = 8000000000000000 | -- - v_arg1 = -0x1.3e2ddd862b4adp+1005 | -0x1.312466410271p+184 - insn wcgdb03: -- v_result = 0000000000000001 | 0000000000000000 -+ v_result = 0000000000000001 | -- - v_arg1 = 0x1.d594c3412a11p-953 | -0x1.a07393d34d77cp-224 - insn wcgdb03: -- v_result = 8000000000000000 | 0000000000000000 -+ v_result = 8000000000000000 | -- - v_arg1 = -0x1.f7a0dbcfd6e4cp+104 | -0x1.40f7cde7f2214p-702 - insn wcgdb03: -- v_result = 8000000000000000 | 0000000000000000 -+ v_result = 8000000000000000 | -- - v_arg1 = -0x1.40739c1574808p+560 | -0x1.970328ddf1b6ep-374 - insn wcgdb03: -- v_result = 0000000000000001 | 0000000000000000 -+ v_result = 0000000000000001 | -- - v_arg1 = 0x1.477653afd7048p-38 | 0x1.1eac2f8b2a93cp-384 - insn wcgdb04: -- v_result = ffffffffe9479a7d | 0000000000000000 -+ v_result = ffffffffe9479a7d | -- - v_arg1 = -0x1.6b865833eff3p+28 | 0x1.06e8cf1834d0ep-722 - insn wcgdb04: -- v_result = 0000000000000000 | 0000000000000000 -+ v_result = 0000000000000000 | -- - v_arg1 = 0x1.eef0b2294a5cp-544 | -0x1.8e8b133ccda15p+752 - insn wcgdb04: -- v_result = 0000000000000000 | 0000000000000000 -+ v_result = 0000000000000000 | -- - v_arg1 = -0x1.f34e77e6b6698p-894 | -0x1.9f7ce1cb53bddp-896 - insn wcgdb04: -- v_result = 7fffffffffffffff | 0000000000000000 -+ v_result = 7fffffffffffffff | -- - v_arg1 = 0x1.95707a6d75db5p+1018 | -0x1.3b0c072d23011p-224 - insn wcgdb05: -- v_result = 0000000000000000 | 0000000000000000 -+ v_result = 0000000000000000 | -- - v_arg1 = -0x1.a9fb71160793p-968 | 0x1.05f601fe8123ap-986 - insn wcgdb05: -- v_result = 8000000000000000 | 0000000000000000 -+ v_result = 8000000000000000 | -- - v_arg1 = -0x1.0864159b94305p+451 | -0x1.d4647f5a78b7ep-599 - insn wcgdb05: -- v_result = 7fffffffffffffff | 0000000000000000 -+ v_result = 7fffffffffffffff | -- - v_arg1 = 0x1.37eadff8397c8p+432 | -0x1.15d896b6f6063p+464 - insn wcgdb05: -- v_result = 0000000000000000 | 0000000000000000 -+ v_result = 0000000000000000 | -- - v_arg1 = 0x1.eb0812b0d677p-781 | 0x1.3117c5e0e288cp-202 - insn wcgdb06: -- v_result = 0000000000000001 | 0000000000000000 -+ v_result = 0000000000000001 | -- - v_arg1 = 0x1.6b88069167c0fp-662 | -0x1.70571d27e1279p+254 - insn wcgdb06: -- v_result = 7fffffffffffffff | 0000000000000000 -+ v_result = 7fffffffffffffff | -- - v_arg1 = 0x1.f6a6d6e883596p+260 | 0x1.0d578afaaa34ap+604 - insn wcgdb06: -- v_result = 0000000000000001 | 0000000000000000 -+ v_result = 0000000000000001 | -- - v_arg1 = 0x1.d91c7d13c4694p-475 | -0x1.ecf1f8529767bp+830 - insn wcgdb06: -- v_result = 0000000000000001 | 0000000000000000 -+ v_result = 0000000000000001 | -- - v_arg1 = 0x1.fac8dd3bb7af6p-101 | 0x1.fb8324a00fba8p+959 - insn wcgdb07: -- v_result = 7fffffffffffffff | 0000000000000000 -+ v_result = 7fffffffffffffff | -- - v_arg1 = 0x1.4b0fa18fa73c7p+111 | -0x1.08e7b17633a49p+61 - insn wcgdb07: -- v_result = e636b693e39a1100 | 0000000000000000 -+ v_result = e636b693e39a1100 | -- - v_arg1 = -0x1.9c9496c1c65efp+60 | 0x1.c4182ee728d76p-572 - insn wcgdb07: -- v_result = ffffffffffffffff | 0000000000000000 -+ v_result = ffffffffffffffff | -- - v_arg1 = -0x1.819718032dff7p-303 | 0x1.a784c77ff6aa2p-622 - insn wcgdb07: -- v_result = 7fffffffffffffff | 0000000000000000 -+ v_result = 7fffffffffffffff | -- - v_arg1 = 0x1.978e8abfd83c2p+152 | 0x1.2531ebf451762p+315 - insn vclgdb00: - v_result = 0000000000000000 | 0000000000000000 -@@ -587,88 +587,88 @@ insn vclgdb07: - v_result = 0000000000000000 | 0000000000000000 - v_arg1 = -0x1.137bbb51f08bdp+306 | 0x1.18d2a1063356p-795 - insn wclgdb00: -- v_result = 0000000000000000 | 0000000000000000 -+ v_result = 0000000000000000 | -- - v_arg1 = -0x1.e66f55dcc2639p-1013 | -0x1.733ee56929f3bp-304 - insn wclgdb00: -- v_result = 0000000000000000 | 0000000000000000 -+ v_result = 0000000000000000 | -- - v_arg1 = 0x1.8802fd9ab740cp-986 | -0x1.64d4d2c7c145fp-1015 - insn wclgdb00: -- v_result = 0000000000000000 | 0000000000000000 -+ v_result = 0000000000000000 | -- - v_arg1 = 0x1.a67209b8c407bp-645 | -0x1.6410ff9b1c801p+487 - insn wclgdb00: -- v_result = 0000000000000000 | 0000000000000000 -+ v_result = 0000000000000000 | -- - v_arg1 = -0x1.cb2febaefeb2dp+49 | 0x1.dee368b2ec375p-502 - insn wclgdb01: -- v_result = 0000000000000000 | 0000000000000000 -+ v_result = 0000000000000000 | -- - v_arg1 = 0x1.5703db3c1b0e2p-728 | 0x1.068c4d51ea4ebp+617 - insn wclgdb01: -- v_result = 0000000000000000 | 0000000000000000 -+ v_result = 0000000000000000 | -- - v_arg1 = -0x1.ae350291e5b3ep+291 | 0x1.1b87bb09b6032p+376 - insn wclgdb01: -- v_result = ffffffffffffffff | 0000000000000000 -+ v_result = ffffffffffffffff | -- - v_arg1 = 0x1.c4666a710127ep+424 | -0x1.19e969b6c0076p+491 - insn wclgdb01: -- v_result = ffffffffffffffff | 0000000000000000 -+ v_result = ffffffffffffffff | -- - v_arg1 = 0x1.c892c5a4d103fp+105 | -0x1.d4f937cc76704p+749 - insn wclgdb03: -- v_result = 0000000000000001 | 0000000000000000 -+ v_result = 0000000000000001 | -- - v_arg1 = 0x1.81090d8fc663dp-111 | 0x1.337ec5e0f0904p+1 - insn wclgdb03: -- v_result = 0000000000000000 | 0000000000000000 -+ v_result = 0000000000000000 | -- - v_arg1 = -0x1.e787adc70b91p-593 | 0x1.db8d83196b53cp-762 - insn wclgdb03: -- v_result = ffffffffffffffff | 0000000000000000 -+ v_result = ffffffffffffffff | -- - v_arg1 = 0x1.6529307e907efp+389 | -0x1.3ea0d8d5b4dd2p+589 - insn wclgdb03: -- v_result = 0000000000000000 | 0000000000000000 -+ v_result = 0000000000000000 | -- - v_arg1 = -0x1.be701a158637p-385 | 0x1.c5a7f70cb8a09p+107 - insn wclgdb04: -- v_result = 0000000000000000 | 0000000000000000 -+ v_result = 0000000000000000 | -- - v_arg1 = -0x1.2f328571ab445p+21 | -0x1.dcc21fc82ba01p-930 - insn wclgdb04: -- v_result = 0000000000000000 | 0000000000000000 -+ v_result = 0000000000000000 | -- - v_arg1 = -0x1.06b69fcbb7bffp-415 | 0x1.6f9a13a0a827ap+915 - insn wclgdb04: -- v_result = 0000000000000000 | 0000000000000000 -+ v_result = 0000000000000000 | -- - v_arg1 = -0x1.738e549b38bcdp+479 | 0x1.a522edb999c9p-45 - insn wclgdb04: -- v_result = 0000000000000000 | 0000000000000000 -+ v_result = 0000000000000000 | -- - v_arg1 = 0x1.7f9399d2bcf3bp-215 | -0x1.7bc35f2d69a7fp+818 - insn wclgdb05: -- v_result = ffffffffffffffff | 0000000000000000 -+ v_result = ffffffffffffffff | -- - v_arg1 = 0x1.fc542bdb707f6p+880 | -0x1.8521ebc93a25fp-969 - insn wclgdb05: -- v_result = 1ce8d9951b8c8600 | 0000000000000000 -+ v_result = 1ce8d9951b8c8600 | -- - v_arg1 = 0x1.ce8d9951b8c86p+60 | 0x1.92712589230e7p+475 - insn wclgdb05: -- v_result = 0000000000000000 | 0000000000000000 -+ v_result = 0000000000000000 | -- - v_arg1 = -0x1.8a297f60a0811p-156 | 0x1.102b79043d82cp-204 - insn wclgdb05: -- v_result = 0000000000000000 | 0000000000000000 -+ v_result = 0000000000000000 | -- - v_arg1 = 0x1.beb9057e1401dp-196 | -0x1.820f18f830262p+15 - insn wclgdb06: -- v_result = 0000000000000001 | 0000000000000000 -+ v_result = 0000000000000001 | -- - v_arg1 = 0x1.c321a966ecb4dp-430 | -0x1.2f6a1a95ead99p-943 - insn wclgdb06: -- v_result = 0000000000000000 | 0000000000000000 -+ v_result = 0000000000000000 | -- - v_arg1 = -0x1.f1a86b4aed821p-56 | -0x1.1ee6717cc2d7fp-899 - insn wclgdb06: -- v_result = 0000000000000000 | 0000000000000000 -+ v_result = 0000000000000000 | -- - v_arg1 = -0x1.73ce49d89ecb9p-302 | 0x1.52663b975ed23p-716 - insn wclgdb06: -- v_result = 0000000000000000 | 0000000000000000 -+ v_result = 0000000000000000 | -- - v_arg1 = -0x1.3e9c2de97a292p+879 | 0x1.d34eed36f2eafp+960 - insn wclgdb07: -- v_result = 0000000000000000 | 0000000000000000 -+ v_result = 0000000000000000 | -- - v_arg1 = -0x1.4e6ec6ddc6a45p-632 | -0x1.6e564d0fec72bp+369 - insn wclgdb07: -- v_result = ffffffffffffffff | 0000000000000000 -+ v_result = ffffffffffffffff | -- - v_arg1 = 0x1.42e2c658e4c4dp+459 | -0x1.9f9dc0252e44p+85 - insn wclgdb07: -- v_result = 0000000000000000 | 0000000000000000 -+ v_result = 0000000000000000 | -- - v_arg1 = -0x1.fb40ac8cda3c1p-762 | 0x1.0e9ed614bc8f1p-342 - insn wclgdb07: -- v_result = 0000000000000000 | 0000000000000000 -+ v_result = 0000000000000000 | -- - v_arg1 = -0x1.c1f8b3c68e214p+118 | -0x1.1a26a49368b61p+756 - insn vfidb00: - v_arg1 = -0x1.38df4cf9d52dbp-545 | -0x1.049253d90dd92p+94 -@@ -1020,16 +1020,16 @@ insn vldeb: - v_result = -0x1.6f5fb2p+70 | -0x1.0d2df6p-107 - insn wldeb: - v_arg1 = -0x1.d26169729db2ap-435 | 0x1.d6fd080793e8cp+767 -- v_result = -0x1.9a4c2cp-54 | 0x0p+0 -+ v_result = -0x1.9a4c2cp-54 | -- - insn wldeb: - v_arg1 = -0x1.f4b59107fce61p-930 | 0x1.cdf2816e253f4p-168 -- v_result = -0x1.be96b2p-116 | 0x0p+0 -+ v_result = -0x1.be96b2p-116 | -- - insn wldeb: - v_arg1 = -0x1.9603a2997928cp-441 | -0x1.aada85e355a11p-767 -- v_result = -0x1.d2c074p-55 | 0x0p+0 -+ v_result = -0x1.d2c074p-55 | -- - insn wldeb: - v_arg1 = 0x1.25ccf5bd0e83p+620 | 0x1.e1635864ebb17p-88 -- v_result = 0x1.64b99ep+78 | 0x0p+0 -+ v_result = 0x1.64b99ep+78 | -- - insn vflcdb: - v_arg1 = 0x1.0ae6d82f76afp-166 | -0x1.e8fb1e03a7415p-191 - v_result = -0x1.0ae6d82f76afp-166 | 0x1.e8fb1e03a7415p-191 -@@ -1044,16 +1044,16 @@ insn vflcdb: - v_result = -0x1.19520153d35b4p-301 | -0x1.ac5325cd23253p+396 - insn wflcdb: - v_arg1 = 0x1.ffd3eecfd54d7p-831 | -0x1.97854fa523a77p+146 -- v_result = -0x1.ffd3eecfd54d7p-831 | 0x0p+0 -+ v_result = -0x1.ffd3eecfd54d7p-831 | -- - insn wflcdb: - v_arg1 = -0x1.508ea45606447p-442 | 0x1.ae7f0e6cf9d2bp+583 -- v_result = 0x1.508ea45606447p-442 | 0x0p+0 -+ v_result = 0x1.508ea45606447p-442 | -- - insn wflcdb: - v_arg1 = 0x1.da8ab2188c21ap+94 | 0x1.78a9c152aa074p-808 -- v_result = -0x1.da8ab2188c21ap+94 | 0x0p+0 -+ v_result = -0x1.da8ab2188c21ap+94 | -- - insn wflcdb: - v_arg1 = -0x1.086882645e0c5p-1001 | -0x1.54e2de5af5a74p-262 -- v_result = 0x1.086882645e0c5p-1001 | 0x0p+0 -+ v_result = 0x1.086882645e0c5p-1001 | -- - insn vflndb: - v_arg1 = -0x1.5bec561d407dcp+819 | -0x1.a5773dadb7a2dp+935 - v_result = -0x1.5bec561d407dcp+819 | -0x1.a5773dadb7a2dp+935 -@@ -1068,16 +1068,16 @@ insn vflndb: - v_result = -0x1.c5bc39a06d4e2p-259 | -0x1.c5e61ad849e77p-833 - insn wflndb: - v_arg1 = -0x1.e9f3e6d1beffap-117 | -0x1.d58cc8bf123b3p-714 -- v_result = -0x1.e9f3e6d1beffap-117 | 0x0p+0 -+ v_result = -0x1.e9f3e6d1beffap-117 | -- - insn wflndb: - v_arg1 = -0x1.3fc4ef2e7485ep-691 | 0x1.eb328986081efp-775 -- v_result = -0x1.3fc4ef2e7485ep-691 | 0x0p+0 -+ v_result = -0x1.3fc4ef2e7485ep-691 | -- - insn wflndb: - v_arg1 = -0x1.7146c5afdec16p+23 | -0x1.597fcfa1fab2p-708 -- v_result = -0x1.7146c5afdec16p+23 | 0x0p+0 -+ v_result = -0x1.7146c5afdec16p+23 | -- - insn wflndb: - v_arg1 = 0x1.03f8d7e9afe84p-947 | 0x1.9a10c3feb6b57p-118 -- v_result = -0x1.03f8d7e9afe84p-947 | 0x0p+0 -+ v_result = -0x1.03f8d7e9afe84p-947 | -- - insn vflpdb: - v_arg1 = 0x1.64ae59b6c762ep-407 | -0x1.fa7191ab21e86p+533 - v_result = 0x1.64ae59b6c762ep-407 | 0x1.fa7191ab21e86p+533 -@@ -1092,16 +1092,16 @@ insn vflpdb: - v_result = 0x1.85fa2de1d492ap+170 | 0x1.ac36828822c11p-968 - insn wflpdb: - v_arg1 = 0x1.a6cf677640a73p-871 | 0x1.b6f1792385922p-278 -- v_result = 0x1.a6cf677640a73p-871 | 0x0p+0 -+ v_result = 0x1.a6cf677640a73p-871 | -- - insn wflpdb: - v_arg1 = -0x1.b886774f6d888p-191 | -0x1.6a2b08d735d22p-643 -- v_result = 0x1.b886774f6d888p-191 | 0x0p+0 -+ v_result = 0x1.b886774f6d888p-191 | -- - insn wflpdb: - v_arg1 = 0x1.5045d37d46f5fp+943 | -0x1.333a86ef2dcf6p-1013 -- v_result = 0x1.5045d37d46f5fp+943 | 0x0p+0 -+ v_result = 0x1.5045d37d46f5fp+943 | -- - insn wflpdb: - v_arg1 = 0x1.1e7bec6ada14dp+252 | 0x1.a70b3f3e24dap-153 -- v_result = 0x1.1e7bec6ada14dp+252 | 0x0p+0 -+ v_result = 0x1.1e7bec6ada14dp+252 | -- - insn vfadb: - v_arg1 = 0x1.5b1ad8e9f17c6p-294 | -0x1.ddd8300a0bf02p+122 - v_arg2 = -0x1.9b49c31ca8ac6p+926 | 0x1.fdbc992926268p+677 -@@ -1121,19 +1121,19 @@ insn vfadb: - insn wfadb: - v_arg1 = 0x1.3c5466cb80722p+489 | -0x1.11e1770053ca2p+924 - v_arg2 = 0x1.d876cd721a726p-946 | 0x1.5c04ceb79c9bcp+1001 -- v_result = 0x1.3c5466cb80722p+489 | 0x0p+0 -+ v_result = 0x1.3c5466cb80722p+489 | -- - insn wfadb: - v_arg1 = 0x1.b0b142d6b76a3p+577 | 0x1.3146824e993a2p+432 - v_arg2 = -0x1.f7f3b7582925fp-684 | -0x1.9700143c2b935p-837 -- v_result = 0x1.b0b142d6b76a2p+577 | 0x0p+0 -+ v_result = 0x1.b0b142d6b76a2p+577 | -- - insn wfadb: - v_arg1 = -0x1.8d65e15edabd6p+244 | 0x1.3be7fd08492d6p-141 - v_arg2 = -0x1.5eef86490fb0ap+481 | 0x1.7b26c897cb6dfp+810 -- v_result = -0x1.5eef86490fb0ap+481 | 0x0p+0 -+ v_result = -0x1.5eef86490fb0ap+481 | -- - insn wfadb: - v_arg1 = -0x1.2dffa5b5f29p+34 | 0x1.71a026274602fp-881 - v_arg2 = 0x1.4dad707287289p+756 | -0x1.1500d55807247p-616 -- v_result = 0x1.4dad707287288p+756 | 0x0p+0 -+ v_result = 0x1.4dad707287288p+756 | -- - insn vfsdb: - v_arg1 = 0x1.054fd9c4d4883p+644 | 0x1.45c90ed85bd7fp-780 - v_arg2 = 0x1.f3bc7a611dadap+494 | -0x1.7c9e1e858ba5bp-301 -@@ -1153,19 +1153,19 @@ insn vfsdb: - insn wfsdb: - v_arg1 = 0x1.9090dabf846e7p-648 | 0x1.1c4ab843a2d15p+329 - v_arg2 = -0x1.a7ceb293690dep+316 | 0x1.22245954a20cp+42 -- v_result = 0x1.a7ceb293690dep+316 | 0x0p+0 -+ v_result = 0x1.a7ceb293690dep+316 | -- - insn wfsdb: - v_arg1 = 0x1.4e5347c27819p-933 | -0x1.56a30bda28351p-64 - v_arg2 = -0x1.dedb9f3935b56p-155 | 0x1.8c5b6ed76816cp-522 -- v_result = 0x1.dedb9f3935b56p-155 | 0x0p+0 -+ v_result = 0x1.dedb9f3935b56p-155 | -- - insn wfsdb: - v_arg1 = 0x1.0ec4e562a015bp-491 | 0x1.3996381b52d9fp-686 - v_arg2 = 0x1.1dcce4e81819p+960 | -0x1.32fa425e8fc08p-263 -- v_result = -0x1.1dcce4e81818fp+960 | 0x0p+0 -+ v_result = -0x1.1dcce4e81818fp+960 | -- - insn wfsdb: - v_arg1 = -0x1.587229f90f77dp-19 | 0x1.100d8eb8105e4p-784 - v_arg2 = -0x1.afb4cce4c43ddp+530 | -0x1.6da7f05e7f512p-869 -- v_result = 0x1.afb4cce4c43dcp+530 | 0x0p+0 -+ v_result = 0x1.afb4cce4c43dcp+530 | -- - insn vfmdb: - v_arg1 = 0x1.892b425556c47p-124 | 0x1.38222404079dfp-656 - v_arg2 = 0x1.af612ed2c342dp-267 | -0x1.1f735fd6ce768p-877 -@@ -1185,19 +1185,19 @@ insn vfmdb: - insn wfmdb: - v_arg1 = -0x1.b992d950126a1p-683 | -0x1.9c1b22eb58c59p-497 - v_arg2 = 0x1.b557a7d8e32c3p-25 | -0x1.f746b2ddafccep+227 -- v_result = -0x1.792f6fb13894ap-707 | 0x0p+0 -+ v_result = -0x1.792f6fb13894ap-707 | -- - insn wfmdb: - v_arg1 = -0x1.677a8c20a5a2fp+876 | 0x1.c03e7b97e8c0dp-645 - v_arg2 = 0x1.dab44be430937p-1011 | -0x1.3f51352c67be9p-916 -- v_result = -0x1.4d4b0a1827064p-134 | 0x0p+0 -+ v_result = -0x1.4d4b0a1827064p-134 | -- - insn wfmdb: - v_arg1 = -0x1.da60f596ad0cep+254 | 0x1.52332e0650e33p+966 - v_arg2 = 0x1.a042c52ed993cp+215 | 0x1.8f380c84aa133p+204 -- v_result = -0x1.81aca4bbcbd24p+470 | 0x0p+0 -+ v_result = -0x1.81aca4bbcbd24p+470 | -- - insn wfmdb: - v_arg1 = -0x1.83d17f11f6aa3p-469 | -0x1.98117efe89b9ep-361 - v_arg2 = 0x1.8c445fd46d214p-701 | -0x1.f98118821821cp+596 -- v_result = -0x0p+0 | 0x0p+0 -+ v_result = -0x0p+0 | -- - insn vfddb: - v_arg1 = -0x1.ecbb48899e0f1p+969 | 0x1.caf175ab352p-20 - v_arg2 = -0x1.9455d67f9f79dp+208 | 0x1.bc4a431b04a6fp+482 -@@ -1217,19 +1217,19 @@ insn vfddb: - insn wfddb: - v_arg1 = 0x1.bd48489b60731p-114 | 0x1.a760dcf57b74fp-51 - v_arg2 = -0x1.171f83409eeb6p-402 | -0x1.e159d1409bdc6p-972 -- v_result = -0x1.9864f1511f8cp+288 | 0x0p+0 -+ v_result = -0x1.9864f1511f8cp+288 | -- - insn wfddb: - v_arg1 = -0x1.120505ef4606p-637 | -0x1.83f6f775c0eb7p+272 - v_arg2 = -0x1.d18ba3872fde1p+298 | 0x1.c60f8d191068cp-454 -- v_result = 0x1.2d5cdb15a686cp-936 | 0x0p+0 -+ v_result = 0x1.2d5cdb15a686cp-936 | -- - insn wfddb: - v_arg1 = 0x1.f637f7f8c790fp-97 | -0x1.7bdce4d74947p+189 - v_arg2 = -0x1.1c8f2d1b3a2edp-218 | -0x1.55fdfd1840241p-350 -- v_result = -0x1.c3d0799c1420fp+121 | 0x0p+0 -+ v_result = -0x1.c3d0799c1420fp+121 | -- - insn wfddb: - v_arg1 = -0x1.c63b7b2eee253p+250 | 0x1.dfd9dcd8b823fp-125 - v_arg2 = 0x1.094a1f1f87e0cp+629 | 0x1.eeaa23c0d7843p-814 -- v_result = -0x1.b653a10ebdeccp-379 | 0x0p+0 -+ v_result = -0x1.b653a10ebdeccp-379 | -- - insn vfsqdb: - v_arg1 = 0x1.f60db25f7066p-703 | -0x1.d43509abca8c3p+631 - v_result = 0x1.fb009ab25ec11p-352 | nan -@@ -1244,16 +1244,16 @@ insn vfsqdb: - v_result = 0x1.833dba0954bccp+249 | nan - insn wfsqdb: - v_arg1 = 0x1.71af4e7f64978p+481 | -0x1.3429dc60011d7p-879 -- v_result = 0x1.b30fc65551133p+240 | 0x0p+0 -+ v_result = 0x1.b30fc65551133p+240 | -- - insn wfsqdb: - v_arg1 = 0x1.5410db1c5f403p+173 | 0x1.97fa6581e692fp+108 -- v_result = 0x1.a144f43a592c1p+86 | 0x0p+0 -+ v_result = 0x1.a144f43a592c1p+86 | -- - insn wfsqdb: - v_arg1 = -0x1.5838027725afep+6 | 0x1.ac61529c11f38p+565 -- v_result = nan | 0x0p+0 -+ v_result = nan | -- - insn wfsqdb: - v_arg1 = -0x1.159e341dcc06ep-439 | 0x1.ed54ce5481ba5p-574 -- v_result = nan | 0x0p+0 -+ v_result = nan | -- - insn vfmadb: - v_arg1 = -0x1.eb00a5c503d75p+538 | 0x1.89fae603ddc07p+767 - v_arg2 = -0x1.71c72712c3957p+715 | 0x1.1bd5773442feap+762 -@@ -1278,22 +1278,22 @@ insn wfmadb: - v_arg1 = 0x1.1cc5b10a14d54p+668 | -0x1.686407390f7d1p+616 - v_arg2 = -0x1.bf34549e73246p+676 | -0x1.dc5a34cc470f3p+595 - v_arg3 = -0x1.95e0fdcf13974p-811 | -0x1.79c7cc1a8ec83p-558 -- v_result = -0x1.fffffffffffffp+1023 | 0x0p+0 -+ v_result = -0x1.fffffffffffffp+1023 | -- - insn wfmadb: - v_arg1 = 0x1.138bc1a5d75f8p+713 | -0x1.e226ebba2fe54p+381 - v_arg2 = -0x1.081ebb7cc3414p-772 | 0x1.369d99e174fc3p+922 - v_arg3 = -0x1.0671c682a5d0cp-1016 | 0x1.03c9530dd0377p+378 -- v_result = -0x1.1c4933e117d95p-59 | 0x0p+0 -+ v_result = -0x1.1c4933e117d95p-59 | -- - insn wfmadb: - v_arg1 = -0x1.166f0b1fad67bp+64 | -0x1.e9ee8d32e1069p-452 - v_arg2 = -0x1.4a235bdd109e2p-65 | 0x1.bacaa96fc7e81p-403 - v_arg3 = -0x1.d2e19acf7c4bdp+99 | 0x1.f901130f685adp-963 -- v_result = -0x1.d2e19acf7c4bcp+99 | 0x0p+0 -+ v_result = -0x1.d2e19acf7c4bcp+99 | -- - insn wfmadb: - v_arg1 = -0x1.77d7bfec863d2p-988 | -0x1.b68029700c6b1p-206 - v_arg2 = -0x1.aca05ad00aec1p+737 | 0x1.ac746bd7e216bp+51 - v_arg3 = 0x1.17342292078b4p+188 | -0x1.49efaf9392301p+555 -- v_result = 0x1.17342292078b4p+188 | 0x0p+0 -+ v_result = 0x1.17342292078b4p+188 | -- - insn vfmsdb: - v_arg1 = -0x1.a1b218e84e61p+34 | 0x1.b220f0d144daep-111 - v_arg2 = 0x1.564fcc2527961p-265 | 0x1.ea85a4154721ep+733 -@@ -1318,22 +1318,22 @@ insn wfmsdb: - v_arg1 = -0x1.7499a639673a6p-100 | -0x1.2a0d737e6cb1cp-207 - v_arg2 = -0x1.01ad4670a7aa3p-911 | 0x1.f94385e1021e8p+317 - v_arg3 = 0x1.aa42b2bb17af9p+982 | 0x1.c550e471711p+786 -- v_result = -0x1.aa42b2bb17af8p+982 | 0x0p+0 -+ v_result = -0x1.aa42b2bb17af8p+982 | -- - insn wfmsdb: - v_arg1 = 0x1.76840f99b431ep+500 | -0x1.989a500c92c08p+594 - v_arg2 = 0x1.33c657cb8385cp-84 | -0x1.2c795ad92ce17p+807 - v_arg3 = -0x1.ee58a39f02d54p-351 | -0x1.18695ed9a280ap+48 -- v_result = 0x1.c242894a0068p+416 | 0x0p+0 -+ v_result = 0x1.c242894a0068p+416 | -- - insn wfmsdb: - v_arg1 = -0x1.16db07e054a65p-469 | -0x1.3a627ab99c6e4p+689 - v_arg2 = 0x1.17872eae826e5p-538 | 0x1.44ed513fb5873p-929 - v_arg3 = 0x1.5ca912008e077p-217 | -0x1.982a6f7359876p-23 -- v_result = -0x1.5ca912008e077p-217 | 0x0p+0 -+ v_result = -0x1.5ca912008e077p-217 | -- - insn wfmsdb: - v_arg1 = -0x1.d315f4a932c6p+122 | 0x1.616a04493e143p+513 - v_arg2 = -0x1.cf1cd3516f23fp+552 | 0x1.7121749c3932cp-750 - v_arg3 = 0x1.dc26d92304d7fp-192 | -0x1.1fc3cca9ec20ep+371 -- v_result = 0x1.a67ca6ba395bcp+675 | 0x0p+0 -+ v_result = 0x1.a67ca6ba395bcp+675 | -- - insn wfcdb: - v_arg1 = 0x1.302001b736011p-633 | -0x1.72d5300225c97p-468 - v_arg2 = -0x1.8c007c5aba108p-17 | -0x1.bb3f9ae136acdp+569 -@@ -1383,19 +1383,19 @@ insn vfcedb: - v_arg1 = 0x1.d8e5c9930c19dp+623 | -0x1.cf1facff4e194p-605 - v_arg2 = -0x1.ed6ba02646d0dp+441 | -0x1.2d677e710620bp+810 - insn wfcedb: -- v_result = 0000000000000000 | 0000000000000000 -+ v_result = 0000000000000000 | -- - v_arg1 = -0x1.a252009e1a12cp-442 | 0x1.4dc608268bb29p-513 - v_arg2 = -0x1.81020aa1a36e6p-687 | -0x1.300e64ce414f1p-899 - insn wfcedb: -- v_result = 0000000000000000 | 0000000000000000 -+ v_result = 0000000000000000 | -- - v_arg1 = 0x1.cec439a8d4781p-175 | -0x1.d20e3b281d599p+893 - v_arg2 = 0x1.ca17cf16cf0aap-879 | 0x1.61506f8596092p+545 - insn wfcedb: -- v_result = 0000000000000000 | 0000000000000000 -+ v_result = 0000000000000000 | -- - v_arg1 = 0x1.0659f5f24a004p+877 | 0x1.fc46867ed0338p-680 - v_arg2 = -0x1.1d6849587155ep-1010 | -0x1.f68171edc235fp+575 - insn wfcedb: -- v_result = 0000000000000000 | 0000000000000000 -+ v_result = 0000000000000000 | -- - v_arg1 = 0x1.dc88a0d46ad79p-816 | 0x1.245140dcaed79p+851 - v_arg2 = 0x1.b33e977c7b3ep-818 | -0x1.04319d7c69367p+787 - insn vfcedbs: -@@ -1419,22 +1419,22 @@ insn vfcedbs: - v_arg2 = 0x1.ae2c06ea88ff4p+332 | -0x1.f668ce4f8ef9ap+821 - r_result = 0000000000000003 - insn wfcedbs: -- v_result = 0000000000000000 | 0000000000000000 -+ v_result = 0000000000000000 | -- - v_arg1 = 0x1.645261bf86b1fp-996 | 0x1.abd13c95397aap+992 - v_arg2 = -0x1.ba09e8fc66a8cp+113 | 0x1.75dbfe92c16c4p-786 - r_result = 0000000000000003 - insn wfcedbs: -- v_result = 0000000000000000 | 0000000000000000 -+ v_result = 0000000000000000 | -- - v_arg1 = -0x1.d02831d003e7dp+415 | -0x1.611a9dfd10f36p-80 - v_arg2 = -0x1.10bda62f4647p+723 | 0x1.cc47af6653378p-614 - r_result = 0000000000000003 - insn wfcedbs: -- v_result = 0000000000000000 | 0000000000000000 -+ v_result = 0000000000000000 | -- - v_arg1 = 0x1.f168f32f84178p-321 | -0x1.79a2a0b9549d1p-136 - v_arg2 = 0x1.41e19d1cfa692p+11 | -0x1.2a0ed6e7fd517p-453 - r_result = 0000000000000003 - insn wfcedbs: -- v_result = 0000000000000000 | 0000000000000000 -+ v_result = 0000000000000000 | -- - v_arg1 = -0x1.76a9144ee26c5p+188 | -0x1.386aaea2d9cddp-542 - v_arg2 = 0x1.810fcf222efc4p-999 | -0x1.ce90a9a43e2a1p+80 - r_result = 0000000000000003 -@@ -1455,19 +1455,19 @@ insn vfchdb: - v_arg1 = 0x1.82be31fb88a2dp+946 | -0x1.7ca9e9ff31953p-931 - v_arg2 = 0x1.fe75a1052beccp+490 | 0x1.179d18543d678p-255 - insn wfchdb: -- v_result = ffffffffffffffff | 0000000000000000 -+ v_result = ffffffffffffffff | -- - v_arg1 = 0x1.0af85d8d8d609p-464 | -0x1.9f639a686e0fep+203 - v_arg2 = -0x1.3142b77b55761p-673 | 0x1.ca9c474339da1p+472 - insn wfchdb: -- v_result = ffffffffffffffff | 0000000000000000 -+ v_result = ffffffffffffffff | -- - v_arg1 = -0x1.6cf16959a022bp+213 | 0x1.445606e4363e1p+942 - v_arg2 = -0x1.8c343201bbd2p+939 | -0x1.e5095ad0c37a4p-434 - insn wfchdb: -- v_result = ffffffffffffffff | 0000000000000000 -+ v_result = ffffffffffffffff | -- - v_arg1 = 0x1.36b4fc9cf5bdap-52 | -0x1.f1fd95cbcd533p+540 - v_arg2 = 0x1.5a2362891c9edp-175 | -0x1.e1f68c319e5d2p+58 - insn wfchdb: -- v_result = ffffffffffffffff | 0000000000000000 -+ v_result = ffffffffffffffff | -- - v_arg1 = 0x1.11c6489f544bbp+811 | 0x1.262a740ec3d47p+456 - v_arg2 = -0x1.d9394d354e989p-154 | 0x1.cc21b3094391ap-972 - insn vfchdbs: -@@ -1491,22 +1491,22 @@ insn vfchdbs: - v_arg2 = 0x1.e426748435a76p+370 | 0x1.8702527d17783p-871 - r_result = 0000000000000003 - insn wfchdbs: -- v_result = ffffffffffffffff | 0000000000000000 -+ v_result = ffffffffffffffff | -- - v_arg1 = 0x1.6c51b9f6442c8p+639 | 0x1.1e6b37adff703p+702 - v_arg2 = 0x1.0cba9c1c75e43p+520 | -0x1.145d44ed90967p+346 - r_result = 0000000000000000 - insn wfchdbs: -- v_result = ffffffffffffffff | 0000000000000000 -+ v_result = ffffffffffffffff | -- - v_arg1 = 0x1.7b3dd643bf36bp+816 | -0x1.61ce7bfb9307ap-683 - v_arg2 = -0x1.f2c998dc15c9ap-776 | 0x1.e16397f2dcdf5p+571 - r_result = 0000000000000000 - insn wfchdbs: -- v_result = ffffffffffffffff | 0000000000000000 -+ v_result = ffffffffffffffff | -- - v_arg1 = 0x1.cc3be81884e0ap-865 | -0x1.8b353bd41064p+820 - v_arg2 = -0x1.2c1bafaafdd4ep-34 | -0x1.24666808ab16ep-435 - r_result = 0000000000000000 - insn wfchdbs: -- v_result = ffffffffffffffff | 0000000000000000 -+ v_result = ffffffffffffffff | -- - v_arg1 = 0x1.c3de33d3b673ap+554 | 0x1.d39ed71e53096p-798 - v_arg2 = -0x1.c1e8f7b3c001p-828 | 0x1.22e2cf797fabp-787 - r_result = 0000000000000000 -@@ -1527,19 +1527,19 @@ insn vfchedb: - v_arg1 = -0x1.6c5599e7ba923p+829 | -0x1.5d1a1191ed6eap-994 - v_arg2 = -0x1.555c8775bc4d2p-478 | -0x1.4aa6a2c82319cp+493 - insn wfchedb: -- v_result = ffffffffffffffff | 0000000000000000 -+ v_result = ffffffffffffffff | -- - v_arg1 = 0x1.ae6cad07b0f3ep-232 | -0x1.2ed61a43f3b99p-74 - v_arg2 = -0x1.226f7cddbde13p-902 | -0x1.790d1d6febbf8p+336 - insn wfchedb: -- v_result = ffffffffffffffff | 0000000000000000 -+ v_result = ffffffffffffffff | -- - v_arg1 = 0x1.20eb8eac3711dp-385 | 0x1.ef71d3312d7e1p+739 - v_arg2 = 0x1.7a3ba08c5a0bdp-823 | -0x1.a7845ccaa544dp-129 - insn wfchedb: -- v_result = 0000000000000000 | 0000000000000000 -+ v_result = 0000000000000000 | -- - v_arg1 = -0x1.97ebdbc057be8p+824 | 0x1.2b7798b063cd6p+237 - v_arg2 = 0x1.cdb87a6074294p-81 | -0x1.074c902b19bccp-416 - insn wfchedb: -- v_result = 0000000000000000 | 0000000000000000 -+ v_result = 0000000000000000 | -- - v_arg1 = -0x1.82deebf9ff023p+937 | 0x1.56c5adcf9d4abp-672 - v_arg2 = -0x1.311ce49bc9439p+561 | 0x1.c8e1c512d8544p+103 - insn vfchedbs: -@@ -1563,22 +1563,22 @@ insn vfchedbs: - v_arg2 = -0x1.47f5dfc7a5bcp-569 | 0x1.5877ef33664a3p-758 - r_result = 0000000000000003 - insn wfchedbs: -- v_result = 0000000000000000 | 0000000000000000 -+ v_result = 0000000000000000 | -- - v_arg1 = -0x1.a7370ccfd9e49p+505 | 0x1.c6b2385850ca2p-591 - v_arg2 = 0x1.984f4fcd338b1p+675 | -0x1.feb996c821232p-39 - r_result = 0000000000000003 - insn wfchedbs: -- v_result = ffffffffffffffff | 0000000000000000 -+ v_result = ffffffffffffffff | -- - v_arg1 = 0x1.641878612dd2p+207 | 0x1.b35e3292db7f6p+567 - v_arg2 = -0x1.18a87f209e96bp+299 | -0x1.3d598f3612d8ap+1016 - r_result = 0000000000000000 - insn wfchedbs: -- v_result = ffffffffffffffff | 0000000000000000 -+ v_result = ffffffffffffffff | -- - v_arg1 = 0x1.cfc2cda244153p+404 | 0x1.d8b2b28e9d8d7p+276 - v_arg2 = 0x1.3517b8c7a59a1p-828 | 0x1.6096fab7003ccp-415 - r_result = 0000000000000000 - insn wfchedbs: -- v_result = 0000000000000000 | 0000000000000000 -+ v_result = 0000000000000000 | -- - v_arg1 = -0x1.54d656f033e56p-603 | -0x1.95ad0e2088967p+254 - v_arg2 = 0x1.4cb319db206e4p-614 | 0x1.b41cd9e3739b6p-862 - r_result = 0000000000000003 diff --git a/SOURCES/valgrind-3.16.1-sched_getsetattr.patch b/SOURCES/valgrind-3.16.1-sched_getsetattr.patch deleted file mode 100644 index b95267d..0000000 --- a/SOURCES/valgrind-3.16.1-sched_getsetattr.patch +++ /dev/null @@ -1,201 +0,0 @@ -commit a53adb79711ccfc76a4ee32b20253045cdab55c7 -Author: Mark Wielaard -Date: Mon Jul 27 16:36:17 2020 +0200 - - Handle linux syscalls sched_getattr and sched_setattr - - The only "special" thing about these syscalls is that the given - struct sched_attr determines its own size for future expansion. - - Original fix by "ISHIKAWA,chiaki" - - https://bugs.kde.org/show_bug.cgi?id=369029 - -diff --git a/coregrind/m_syswrap/priv_syswrap-linux.h b/coregrind/m_syswrap/priv_syswrap-linux.h -index cdc73c1e6..eb0b320ca 100644 ---- a/coregrind/m_syswrap/priv_syswrap-linux.h -+++ b/coregrind/m_syswrap/priv_syswrap-linux.h -@@ -227,6 +227,8 @@ DECL_TEMPLATE(linux, sys_fremovexattr); - // syscalls. - DECL_TEMPLATE(linux, sys_sched_setparam); - DECL_TEMPLATE(linux, sys_sched_getparam); -+DECL_TEMPLATE(linux, sys_sched_setattr); -+DECL_TEMPLATE(linux, sys_sched_getattr); - DECL_TEMPLATE(linux, sys_sched_setscheduler); - DECL_TEMPLATE(linux, sys_sched_getscheduler); - DECL_TEMPLATE(linux, sys_sched_yield); -diff --git a/coregrind/m_syswrap/syswrap-amd64-linux.c b/coregrind/m_syswrap/syswrap-amd64-linux.c -index 28d90135a..d6f3eb910 100644 ---- a/coregrind/m_syswrap/syswrap-amd64-linux.c -+++ b/coregrind/m_syswrap/syswrap-amd64-linux.c -@@ -846,9 +846,8 @@ static SyscallTableEntry syscall_table[] = { - LINX_(__NR_process_vm_writev, sys_process_vm_writev),// 311 - LINX_(__NR_kcmp, sys_kcmp), // 312 - LINX_(__NR_finit_module, sys_finit_module), // 313 --// LIN__(__NR_sched_setattr, sys_ni_syscall), // 314 -- --// LIN__(__NR_sched_getattr, sys_ni_syscall), // 315 -+ LINX_(__NR_sched_setattr, sys_sched_setattr), // 314 -+ LINXY(__NR_sched_getattr, sys_sched_getattr), // 315 - LINX_(__NR_renameat2, sys_renameat2), // 316 - // LIN__(__NR_seccomp, sys_ni_syscall), // 317 - LINXY(__NR_getrandom, sys_getrandom), // 318 -diff --git a/coregrind/m_syswrap/syswrap-arm-linux.c b/coregrind/m_syswrap/syswrap-arm-linux.c -index 579542785..70700e53f 100644 ---- a/coregrind/m_syswrap/syswrap-arm-linux.c -+++ b/coregrind/m_syswrap/syswrap-arm-linux.c -@@ -1009,6 +1009,8 @@ static SyscallTableEntry syscall_main_table[] = { - LINXY(__NR_process_vm_readv, sys_process_vm_readv), // 376 - LINX_(__NR_process_vm_writev, sys_process_vm_writev),// 377 - -+ LINX_(__NR_sched_setattr, sys_sched_setattr), // 380 -+ LINXY(__NR_sched_getattr, sys_sched_getattr), // 381 - LINX_(__NR_renameat2, sys_renameat2), // 382 - - LINXY(__NR_getrandom, sys_getrandom), // 384 -diff --git a/coregrind/m_syswrap/syswrap-arm64-linux.c b/coregrind/m_syswrap/syswrap-arm64-linux.c -index 81e01456f..acca02442 100644 ---- a/coregrind/m_syswrap/syswrap-arm64-linux.c -+++ b/coregrind/m_syswrap/syswrap-arm64-linux.c -@@ -806,8 +806,8 @@ static SyscallTableEntry syscall_main_table[] = { - LINX_(__NR_process_vm_writev, sys_process_vm_writev), // 271 - LINX_(__NR_kcmp, sys_kcmp), // 272 - LINX_(__NR_finit_module, sys_finit_module), // 273 -- // (__NR_sched_setattr, sys_ni_syscall), // 274 -- // (__NR_sched_getattr, sys_ni_syscall), // 275 -+ LINX_(__NR_sched_setattr, sys_sched_setattr), // 274 -+ LINXY(__NR_sched_getattr, sys_sched_getattr), // 275 - LINX_(__NR_renameat2, sys_renameat2), // 276 - // (__NR_seccomp, sys_ni_syscall), // 277 - LINXY(__NR_getrandom, sys_getrandom), // 278 -diff --git a/coregrind/m_syswrap/syswrap-linux.c b/coregrind/m_syswrap/syswrap-linux.c -index 5b5b7eee6..56be3032d 100644 ---- a/coregrind/m_syswrap/syswrap-linux.c -+++ b/coregrind/m_syswrap/syswrap-linux.c -@@ -3677,6 +3677,41 @@ POST(sys_sched_getparam) - POST_MEM_WRITE( ARG2, sizeof(struct vki_sched_param) ); - } - -+PRE(sys_sched_setattr) -+{ -+ struct vki_sched_attr *attr; -+ PRINT("sched_setattr ( %ld, %#" FMT_REGWORD "x, %#" -+ FMT_REGWORD "x )", SARG1, ARG2, ARG3 ); -+ PRE_REG_READ3(long, "sched_setattr", -+ vki_pid_t, pid, struct sched_attr *, p, unsigned int, flags); -+ /* We need to be able to read at least the size field. */ -+ PRE_MEM_READ( "sched_setattr(attr->size)", ARG2, sizeof(vki_uint32_t) ); -+ attr = (struct vki_sched_attr *)(Addr)ARG2; -+ if (ML_(safe_to_deref)(attr,sizeof(vki_uint32_t))) -+ PRE_MEM_READ( "sched_setattr(attr)", (Addr)attr, attr->size); -+} -+ -+PRE(sys_sched_getattr) -+{ -+ struct vki_sched_attr *attr; -+ PRINT("sched_getattr ( %ld, %#" FMT_REGWORD "x, %ld, %#" -+ FMT_REGWORD "x )", SARG1, ARG2, ARG3, ARG4 ); -+ PRE_REG_READ4(long, "sched_getattr", -+ vki_pid_t, pid, struct sched_attr *, p, -+ unsigned int, size, unsigned int, flags); -+ /* We need to be able to read at least the size field. */ -+ PRE_MEM_READ( "sched_setattr(attr->size)", ARG2, sizeof(vki_uint32_t) ); -+ /* And the kernel needs to be able to write to the whole struct size. */ -+ attr = (struct vki_sched_attr *)(Addr)ARG2; -+ if (ML_(safe_to_deref)(attr,sizeof(vki_uint32_t))) -+ PRE_MEM_WRITE( "sched_setattr(attr)", (Addr)attr, attr->size); -+} -+POST(sys_sched_getattr) -+{ -+ struct vki_sched_attr *attr = (struct vki_sched_attr *)(Addr)ARG2; -+ POST_MEM_WRITE( (Addr)attr, attr->size ); -+} -+ - PRE(sys_sched_getscheduler) - { - PRINT("sys_sched_getscheduler ( %ld )", SARG1); -diff --git a/coregrind/m_syswrap/syswrap-ppc32-linux.c b/coregrind/m_syswrap/syswrap-ppc32-linux.c -index eed12a1bc..c19cb9e0e 100644 ---- a/coregrind/m_syswrap/syswrap-ppc32-linux.c -+++ b/coregrind/m_syswrap/syswrap-ppc32-linux.c -@@ -1016,6 +1016,9 @@ static SyscallTableEntry syscall_table[] = { - LINXY(__NR_process_vm_readv, sys_process_vm_readv), // 351 - LINX_(__NR_process_vm_writev, sys_process_vm_writev),// 352 - -+ LINX_(__NR_sched_setattr, sys_sched_setattr), // 355 -+ LINXY(__NR_sched_getattr, sys_sched_getattr), // 356 -+ - LINXY(__NR_getrandom, sys_getrandom), // 359 - LINXY(__NR_memfd_create, sys_memfd_create), // 360 - -diff --git a/coregrind/m_syswrap/syswrap-ppc64-linux.c b/coregrind/m_syswrap/syswrap-ppc64-linux.c -index d58200b49..b6422a765 100644 ---- a/coregrind/m_syswrap/syswrap-ppc64-linux.c -+++ b/coregrind/m_syswrap/syswrap-ppc64-linux.c -@@ -998,6 +998,8 @@ static SyscallTableEntry syscall_table[] = { - LINXY(__NR_process_vm_readv, sys_process_vm_readv), // 351 - LINX_(__NR_process_vm_writev, sys_process_vm_writev),// 352 - -+ LINX_(__NR_sched_setattr, sys_sched_setattr), // 355 -+ LINXY(__NR_sched_getattr, sys_sched_getattr), // 356 - LINX_(__NR_renameat2, sys_renameat2), // 357 - - LINXY(__NR_getrandom, sys_getrandom), // 359 -diff --git a/coregrind/m_syswrap/syswrap-s390x-linux.c b/coregrind/m_syswrap/syswrap-s390x-linux.c -index a0a330aa2..3427fee16 100644 ---- a/coregrind/m_syswrap/syswrap-s390x-linux.c -+++ b/coregrind/m_syswrap/syswrap-s390x-linux.c -@@ -825,8 +825,8 @@ static SyscallTableEntry syscall_table[] = { - LINX_(__NR_kcmp, sys_kcmp), // 343 - // ?????(__NR_finit_module, ), // 344 - --// ?????(__NR_sched_setattr, ), // 345 --// ?????(__NR_sched_getattr, ), // 346 -+ LINX_(__NR_sched_setattr, sys_sched_setattr), // 345 -+ LINXY(__NR_sched_getattr, sys_sched_getattr), // 346 - LINX_(__NR_renameat2, sys_renameat2), // 347 - // ?????(__NR_seccomp, ), // 348 - LINXY(__NR_getrandom, sys_getrandom), // 349 -diff --git a/coregrind/m_syswrap/syswrap-x86-linux.c b/coregrind/m_syswrap/syswrap-x86-linux.c -index 332ed0bf2..b59d96f37 100644 ---- a/coregrind/m_syswrap/syswrap-x86-linux.c -+++ b/coregrind/m_syswrap/syswrap-x86-linux.c -@@ -1580,8 +1580,8 @@ static SyscallTableEntry syscall_table[] = { - LINX_(__NR_kcmp, sys_kcmp), // 349 - - // LIN__(__NR_finit_module, sys_ni_syscall), // 350 --// LIN__(__NR_sched_setattr, sys_ni_syscall), // 351 --// LIN__(__NR_sched_getattr, sys_ni_syscall), // 352 -+ LINX_(__NR_sched_setattr, sys_sched_setattr), // 351 -+ LINXY(__NR_sched_getattr, sys_sched_getattr), // 352 - LINX_(__NR_renameat2, sys_renameat2), // 353 - // LIN__(__NR_seccomp, sys_ni_syscall), // 354 - -diff --git a/include/vki/vki-linux.h b/include/vki/vki-linux.h -index 75b583165..ef93b9258 100644 ---- a/include/vki/vki-linux.h -+++ b/include/vki/vki-linux.h -@@ -410,6 +410,23 @@ struct vki_sched_param { - int sched_priority; - }; - -+struct vki_sched_attr { -+ vki_uint32_t size; -+ vki_uint32_t sched_policy; -+ vki_uint64_t sched_flags; -+ -+ /* SCHED_NORMAL, SCHED_BATCH */ -+ vki_int32_t sched_nice; -+ -+ /* SCHED_FIFO, SCHED_RR */ -+ vki_uint32_t sched_priority; -+ -+ /* SCHED_DEADLINE */ -+ vki_uint64_t sched_runtime; -+ vki_uint64_t sched_deadline; -+ vki_uint64_t sched_period; -+}; -+ - #define VKI_TASK_COMM_LEN 16 - - //---------------------------------------------------------------------- diff --git a/SOURCES/valgrind-3.17.0-clone-parent-res.patch b/SOURCES/valgrind-3.17.0-clone-parent-res.patch new file mode 100644 index 0000000..8d03cea --- /dev/null +++ b/SOURCES/valgrind-3.17.0-clone-parent-res.patch @@ -0,0 +1,21 @@ +commit e08a82991a9b9dc87c13f2b89273f25f97d14baf +Author: Tom Hughes +Date: Tue Apr 6 22:44:36 2021 +0100 + + Only process clone results in the parent thread + + Fixes BZ#423963 + +diff --git a/coregrind/m_syswrap/syswrap-linux.c b/coregrind/m_syswrap/syswrap-linux.c +index 5ae4e6613..c59d8ee26 100644 +--- a/coregrind/m_syswrap/syswrap-linux.c ++++ b/coregrind/m_syswrap/syswrap-linux.c +@@ -940,7 +940,7 @@ PRE(sys_clone) + ("Valgrind does not support general clone()."); + } + +- if (SUCCESS) { ++ if (SUCCESS && RES != 0) { + if (ARG_FLAGS & (VKI_CLONE_PARENT_SETTID | VKI_CLONE_PIDFD)) + POST_MEM_WRITE(ARG3, sizeof(Int)); + if (ARG_FLAGS & (VKI_CLONE_CHILD_SETTID | VKI_CLONE_CHILD_CLEARTID)) diff --git a/SOURCES/valgrind-3.17.0-debuginfod.patch b/SOURCES/valgrind-3.17.0-debuginfod.patch new file mode 100644 index 0000000..5109a4b --- /dev/null +++ b/SOURCES/valgrind-3.17.0-debuginfod.patch @@ -0,0 +1,30 @@ +commit 93104368952c37268da724231487058ea3eaf1dc +Author: Tom Hughes +Date: Thu May 20 17:16:06 2021 +0100 + + Don't look for separate debuginfo if the image has a .debug_info section + + Fixes BZ#435908 + +diff --git a/coregrind/m_debuginfo/readelf.c b/coregrind/m_debuginfo/readelf.c +index b0f062ddc..e424e3e7e 100644 +--- a/coregrind/m_debuginfo/readelf.c ++++ b/coregrind/m_debuginfo/readelf.c +@@ -2879,13 +2879,15 @@ Bool ML_(read_elf_debug_info) ( struct _DebugInfo* di ) + /* Look for a build-id */ + HChar* buildid = find_buildid(mimg, False, False); + +- /* Look for a debug image that matches either the build-id or ++ /* If we don't have a .debug_info section in the main image then ++ look for a debug image that matches either the build-id or + the debuglink-CRC32 in the main image. If the main image + doesn't contain either of those then this won't even bother + to try looking. This looks in all known places, including + the --extra-debuginfo-path if specified and on the + --debuginfo-server if specified. */ +- if (buildid != NULL || debuglink_escn.img != NULL) { ++ if (debug_info_escn.img == NULL && ++ (buildid != NULL || debuglink_escn.img != NULL)) { + /* Do have a debuglink section? */ + if (debuglink_escn.img != NULL) { + UInt crc_offset diff --git a/SOURCES/valgrind-3.17.0-ppc64-isa-3.1-tests.patch b/SOURCES/valgrind-3.17.0-ppc64-isa-3.1-tests.patch new file mode 100644 index 0000000..b7f1a6c --- /dev/null +++ b/SOURCES/valgrind-3.17.0-ppc64-isa-3.1-tests.patch @@ -0,0 +1,1712 @@ +commit 4bcc6c8a97c10c4dd41b35bd3b3035ec4037d524 +Author: Carl Love +Date: Mon Nov 16 19:09:47 2020 -0600 + + VSX Permute Control Vector Generate Operation tests. + +diff --git a/NEWS b/NEWS +index aa398cf54..2e42e74b2 100644 +--- a/NEWS ++++ b/NEWS +@@ -154,6 +154,7 @@ where XXXXXX is the bug number as listed below. + 428909 helgrind: need to intercept duplicate libc definitions for Fedora 33 + 429352 PPC ISA 3.1 support is missing, part 7 + 429354 PPC ISA 3.1 support is missing, part 8 ++429375 PPC ISA 3.1 support is missing, part 9 + 429692 unhandled ppc64le-linux syscall: 147 (getsid) + 429864 s390x: C++ atomic test_and_set yields false-positive memcheck + diagnostics +diff --git a/none/tests/ppc64/test_isa_3_1_XT.c b/none/tests/ppc64/test_isa_3_1_XT.c +index c16ddedac..a54e8763a 100644 +--- a/none/tests/ppc64/test_isa_3_1_XT.c ++++ b/none/tests/ppc64/test_isa_3_1_XT.c +@@ -491,6 +491,54 @@ static void test_pstxv_4 (void) { + static void test_pstxv_0 (void) { + __asm__ __volatile__ ("pstxv %x0, 0(%1), 0" :: "wa" (vec_xs), "r" (ra) ); + } ++static void test_xxgenpcvbm_imm0 (void) { ++ __asm__ __volatile__ ("xxgenpcvbm %x0, %1, 0" : "=wa" (vec_xt) : "v" (vrb) ); ++} ++static void test_xxgenpcvbm_imm1 (void) { ++ __asm__ __volatile__ ("xxgenpcvbm %x0, %1, 1" : "=wa" (vec_xt) : "v" (vrb) ); ++} ++static void test_xxgenpcvbm_imm2 (void) { ++ __asm__ __volatile__ ("xxgenpcvbm %x0, %1, 2" : "=wa" (vec_xt) : "v" (vrb) ); ++} ++static void test_xxgenpcvbm_imm3 (void) { ++ __asm__ __volatile__ ("xxgenpcvbm %x0, %1, 3" : "=wa" (vec_xt) : "v" (vrb) ); ++} ++static void test_xxgenpcvhm_imm0 (void) { ++ __asm__ __volatile__ ("xxgenpcvhm %x0, %1, 0" : "=wa" (vec_xt) : "v" (vrb) ); ++} ++static void test_xxgenpcvhm_imm1 (void) { ++ __asm__ __volatile__ ("xxgenpcvhm %x0, %1, 1" : "=wa" (vec_xt) : "v" (vrb) ); ++} ++static void test_xxgenpcvhm_imm2 (void) { ++ __asm__ __volatile__ ("xxgenpcvhm %x0, %1, 2" : "=wa" (vec_xt) : "v" (vrb) ); ++} ++static void test_xxgenpcvhm_imm3 (void) { ++ __asm__ __volatile__ ("xxgenpcvhm %x0, %1, 3" : "=wa" (vec_xt) : "v" (vrb) ); ++} ++static void test_xxgenpcvwm_imm0 (void) { ++ __asm__ __volatile__ ("xxgenpcvwm %x0, %1, 0" : "=wa" (vec_xt) : "v" (vrb) ); ++} ++static void test_xxgenpcvwm_imm1 (void) { ++ __asm__ __volatile__ ("xxgenpcvwm %x0, %1, 1" : "=wa" (vec_xt) : "v" (vrb) ); ++} ++static void test_xxgenpcvwm_imm2 (void) { ++ __asm__ __volatile__ ("xxgenpcvwm %x0, %1, 2" : "=wa" (vec_xt) : "v" (vrb) ); ++} ++static void test_xxgenpcvwm_imm3 (void) { ++ __asm__ __volatile__ ("xxgenpcvwm %x0, %1, 3" : "=wa" (vec_xt) : "v" (vrb) ); ++} ++static void test_xxgenpcvdm_imm0 (void) { ++ __asm__ __volatile__ ("xxgenpcvdm %x0, %1, 0" : "=wa" (vec_xt) : "v" (vrb) ); ++} ++static void test_xxgenpcvdm_imm1 (void) { ++ __asm__ __volatile__ ("xxgenpcvdm %x0, %1, 1" : "=wa" (vec_xt) : "v" (vrb) ); ++} ++static void test_xxgenpcvdm_imm2 (void) { ++ __asm__ __volatile__ ("xxgenpcvdm %x0, %1, 2" : "=wa" (vec_xt) : "v" (vrb) ); ++} ++static void test_xxgenpcvdm_imm3 (void) { ++ __asm__ __volatile__ ("xxgenpcvdm %x0, %1, 3" : "=wa" (vec_xt) : "v" (vrb) ); ++} + + static test_list_t testgroup_generic[] = { + { &test_lxvkq_imm1, "lxvkq imm1", "XT,UIM"}, /* bcwp */ +@@ -592,6 +640,22 @@ static test_list_t testgroup_generic[] = { + { &test_xxblendvw, "xxblendvw", "XT,XA,XB,XC"}, /* bcs */ + { &test_xxeval_imm0, "xxeval imm0", "XT,XA,XB,XC,IMM"}, /* bcwp */ + { &test_xxeval_imm3, "xxeval imm3", "XT,XA,XB,XC,IMM"}, /* bcwp */ ++ { &test_xxgenpcvbm_imm0, "xxgenpcvbm imm0", "XT,VRB,IMM"}, /* bcwp */ ++ { &test_xxgenpcvbm_imm1, "xxgenpcvbm imm1", "XT,VRB,IMM"}, /* bcwp */ ++ { &test_xxgenpcvbm_imm2, "xxgenpcvbm imm2", "XT,VRB,IMM"}, /* bcwp */ ++ { &test_xxgenpcvbm_imm3, "xxgenpcvbm imm3", "XT,VRB,IMM"}, /* bcwp */ ++ { &test_xxgenpcvdm_imm0, "xxgenpcvdm imm0", "XT,VRB,IMM"}, /* bcwp */ ++ { &test_xxgenpcvdm_imm1, "xxgenpcvdm imm1", "XT,VRB,IMM"}, /* bcwp */ ++ { &test_xxgenpcvdm_imm2, "xxgenpcvdm imm2", "XT,VRB,IMM"}, /* bcwp */ ++ { &test_xxgenpcvdm_imm3, "xxgenpcvdm imm3", "XT,VRB,IMM"}, /* bcwp */ ++ { &test_xxgenpcvhm_imm0, "xxgenpcvhm imm0", "XT,VRB,IMM"}, /* bcwp */ ++ { &test_xxgenpcvhm_imm1, "xxgenpcvhm imm1", "XT,VRB,IMM"}, /* bcwp */ ++ { &test_xxgenpcvhm_imm2, "xxgenpcvhm imm2", "XT,VRB,IMM"}, /* bcwp */ ++ { &test_xxgenpcvhm_imm3, "xxgenpcvhm imm3", "XT,VRB,IMM"}, /* bcwp */ ++ { &test_xxgenpcvwm_imm0, "xxgenpcvwm imm0", "XT,VRB,IMM"}, /* bcwp */ ++ { &test_xxgenpcvwm_imm1, "xxgenpcvwm imm1", "XT,VRB,IMM"}, /* bcwp */ ++ { &test_xxgenpcvwm_imm2, "xxgenpcvwm imm2", "XT,VRB,IMM"}, /* bcwp */ ++ { &test_xxgenpcvwm_imm3, "xxgenpcvwm imm3", "XT,VRB,IMM"}, /* bcwp */ + { &test_xxpermx_imm0, "xxpermx imm0", "XT,XA,XB,XC,UIM"}, /* bcwp */ + { &test_xxpermx_imm3, "xxpermx imm3", "XT,XA,XB,XC,UIM"}, /* bcwp */ + { &test_xxsplti32dx_ix0_imm0xa5a5a5a5, "xxsplti32dx ix0_imm0xa5a5a5a5", "XT,IX,IMM32"}, /* bcwp */ +diff --git a/none/tests/ppc64/test_isa_3_1_XT.stdout.exp b/none/tests/ppc64/test_isa_3_1_XT.stdout.exp +index efa95884e..8b5f1d1a3 100644 +--- a/none/tests/ppc64/test_isa_3_1_XT.stdout.exp ++++ b/none/tests/ppc64/test_isa_3_1_XT.stdout.exp +@@ -4644,6 +4644,230 @@ xxeval imm3 8000000000000000,7f800000ff800000 8000000000000000,7f800000ff800000 + xxeval imm3 8000000000000000,7f800000ff800000 8000000000000000,7f800000ff800000 8000000000000000,7f800000ff800000 => 8000000000000000 7f800000ff800000 + xxeval imm3 8000000000000000,7f800000ff800000 8000000000000000,7f800000ff800000 8000000000000000,7f800000ff800000 => 8000000000000000 7f800000ff800000 + ++xxgenpcvbm imm0 7f800000ff800000,ff8000007f800000 => 18031a1b04051e1f 1121314021617 ++xxgenpcvbm imm0 ff8000007f800000,ff7ffffe7f7ffffe => 5061a1b1c071e1f 11010214150304 ++xxgenpcvbm imm0 ff7ffffe7f7ffffe,0080000e8080000e => 31904051c1d0607 1000121301021617 ++xxgenpcvbm imm0 0080000e8080000e,0180055e0180077e => 18021a1b03041e1f 1000121314011617 ++xxgenpcvbm imm0 0180055e0180077e,0000111e8000222e => 18011a1b1c021e1f 1011121300151617 ++xxgenpcvbm imm0 0000111e8000222e,7ff0000000000000 => 18191a1b011d1e1f 1000121314151617 ++xxgenpcvbm imm0 7ff0000000000000,fff0000000000000 => 18021a1b1c1d1e1f 1121314151617 ++xxgenpcvbm imm0 fff0000000000000,2208400000000000 => 11a1b1c1d1e1f 1011121314151617 ++xxgenpcvbm imm0 2208400000000000,0000000000000009 => 18191a1b1c1d1e1f 1011121314151617 ++xxgenpcvbm imm0 0000000000000009,ffff000180000001 => 18191a1b1c1d1e1f 1121302151617 ++xxgenpcvbm imm0 ffff000180000001,0000000000000000 => 11a1b021d1e1f 1011121314151617 ++xxgenpcvbm imm0 0000000000000000,8000000000000000 => 18191a1b1c1d1e1f 11121314151617 ++xxgenpcvbm imm0 8000000000000000,7f800000ff800000 => 3191a1b1c1d1e1f 1000121301021617 ++ ++xxgenpcvbm imm1 7f800000ff800000,ff8000007f800000 => 0 105090c0d0000 ++xxgenpcvbm imm1 ff8000007f800000,ff7ffffe7f7ffffe => 0 203060708090d ++xxgenpcvbm imm1 ff7ffffe7f7ffffe,0080000e8080000e => 0 10405080a0b0e0f ++xxgenpcvbm imm1 0080000e8080000e,0180055e0180077e => 0 105090c0d000000 ++xxgenpcvbm imm1 0180055e0180077e,0000111e8000222e => 0 4090d0000000000 ++xxgenpcvbm imm1 0000111e8000222e,7ff0000000000000 => 0 10c000000000000 ++xxgenpcvbm imm1 7ff0000000000000,fff0000000000000 => 0 1090000000000 ++xxgenpcvbm imm1 fff0000000000000,2208400000000000 => 0 809000000000000 ++xxgenpcvbm imm1 2208400000000000,0000000000000009 => 0 0 ++xxgenpcvbm imm1 0000000000000009,ffff000180000001 => 0 1040000000000 ++xxgenpcvbm imm1 ffff000180000001,0000000000000000 => 0 8090c0000000000 ++xxgenpcvbm imm1 0000000000000000,8000000000000000 => 0 0 ++xxgenpcvbm imm1 8000000000000000,7f800000ff800000 => 0 104050800000000 ++ ++xxgenpcvbm imm2 7f800000ff800000,ff8000007f800000 => 1702151401001110 5041d1c1b031918 ++xxgenpcvbm imm2 ff8000007f800000,ff7ffffe7f7ffffe => 201151413001110 71e06051b1a0403 ++xxgenpcvbm imm2 ff7ffffe7f7ffffe,0080000e8080000e => 416030213120100 1f071d1c06051918 ++xxgenpcvbm imm2 0080000e8080000e,0180055e0180077e => 1702151401001110 1f041d1c1b031918 ++xxgenpcvbm imm2 0180055e0180077e,0000111e8000222e => 1701151413001110 1f1e1d1c021a1918 ++xxgenpcvbm imm2 0000111e8000222e,7ff0000000000000 => 1716151400121110 1f011d1c1b1a1918 ++xxgenpcvbm imm2 7ff0000000000000,fff0000000000000 => 1700151413121110 2011d1c1b1a1918 ++xxgenpcvbm imm2 fff0000000000000,2208400000000000 => 100151413121110 1f1e1d1c1b1a1918 ++xxgenpcvbm imm2 2208400000000000,0000000000000009 => 1716151413121110 1f1e1d1c1b1a1918 ++xxgenpcvbm imm2 0000000000000009,ffff000180000001 => 1716151413121110 2011d1c001a1918 ++xxgenpcvbm imm2 ffff000180000001,0000000000000000 => 201151400121110 1f1e1d1c1b1a1918 ++xxgenpcvbm imm2 0000000000000000,8000000000000000 => 1716151413121110 1e1d1c1b1a1918 ++xxgenpcvbm imm2 8000000000000000,7f800000ff800000 => 16151413121110 1f031d1c02011918 ++ ++xxgenpcvbm imm3 7f800000ff800000,ff8000007f800000 => f0e0a060302 0 ++xxgenpcvbm imm3 ff8000007f800000,ff7ffffe7f7ffffe => f0d0c0908070602 0 ++xxgenpcvbm imm3 ff7ffffe7f7ffffe,0080000e8080000e => e0b0a0705040100 0 ++xxgenpcvbm imm3 0080000e8080000e,0180055e0180077e => e0a060302 0 ++xxgenpcvbm imm3 0180055e0180077e,0000111e8000222e => b0602 0 ++xxgenpcvbm imm3 0000111e8000222e,7ff0000000000000 => e03 0 ++xxgenpcvbm imm3 7ff0000000000000,fff0000000000000 => f0e06 0 ++xxgenpcvbm imm3 fff0000000000000,2208400000000000 => 706 0 ++xxgenpcvbm imm3 2208400000000000,0000000000000009 => 0 0 ++xxgenpcvbm imm3 0000000000000009,ffff000180000001 => f0e0b 0 ++xxgenpcvbm imm3 ffff000180000001,0000000000000000 => 70603 0 ++xxgenpcvbm imm3 0000000000000000,8000000000000000 => f 0 ++xxgenpcvbm imm3 8000000000000000,7f800000ff800000 => e0b0a07 0 ++ ++xxgenpcvdm imm0 7f800000ff800000,ff8000007f800000 => 18191a1b1c1d1e1f 1020304050607 ++xxgenpcvdm imm0 ff8000007f800000,ff7ffffe7f7ffffe => 8090a0b0c0d0e0f 1020304050607 ++xxgenpcvdm imm0 ff7ffffe7f7ffffe,0080000e8080000e => 1020304050607 1011121314151617 ++xxgenpcvdm imm0 0080000e8080000e,0180055e0180077e => 18191a1b1c1d1e1f 1011121314151617 ++xxgenpcvdm imm0 0180055e0180077e,0000111e8000222e => 18191a1b1c1d1e1f 1011121314151617 ++xxgenpcvdm imm0 0000111e8000222e,7ff0000000000000 => 18191a1b1c1d1e1f 1011121314151617 ++xxgenpcvdm imm0 7ff0000000000000,fff0000000000000 => 18191a1b1c1d1e1f 1020304050607 ++xxgenpcvdm imm0 fff0000000000000,2208400000000000 => 1020304050607 1011121314151617 ++xxgenpcvdm imm0 2208400000000000,0000000000000009 => 18191a1b1c1d1e1f 1011121314151617 ++xxgenpcvdm imm0 0000000000000009,ffff000180000001 => 18191a1b1c1d1e1f 1020304050607 ++xxgenpcvdm imm0 ffff000180000001,0000000000000000 => 1020304050607 1011121314151617 ++xxgenpcvdm imm0 0000000000000000,8000000000000000 => 18191a1b1c1d1e1f 1020304050607 ++xxgenpcvdm imm0 8000000000000000,7f800000ff800000 => 1020304050607 1011121314151617 ++ ++xxgenpcvdm imm1 7f800000ff800000,ff8000007f800000 => 0 1020304050607 ++xxgenpcvdm imm1 ff8000007f800000,ff7ffffe7f7ffffe => 8090a0b0c0d0e0f 1020304050607 ++xxgenpcvdm imm1 ff7ffffe7f7ffffe,0080000e8080000e => 0 8090a0b0c0d0e0f ++xxgenpcvdm imm1 0080000e8080000e,0180055e0180077e => 0 0 ++xxgenpcvdm imm1 0180055e0180077e,0000111e8000222e => 0 0 ++xxgenpcvdm imm1 0000111e8000222e,7ff0000000000000 => 0 0 ++xxgenpcvdm imm1 7ff0000000000000,fff0000000000000 => 0 1020304050607 ++xxgenpcvdm imm1 fff0000000000000,2208400000000000 => 0 8090a0b0c0d0e0f ++xxgenpcvdm imm1 2208400000000000,0000000000000009 => 0 0 ++xxgenpcvdm imm1 0000000000000009,ffff000180000001 => 0 1020304050607 ++xxgenpcvdm imm1 ffff000180000001,0000000000000000 => 0 8090a0b0c0d0e0f ++xxgenpcvdm imm1 0000000000000000,8000000000000000 => 0 1020304050607 ++xxgenpcvdm imm1 8000000000000000,7f800000ff800000 => 0 8090a0b0c0d0e0f ++ ++xxgenpcvdm imm2 7f800000ff800000,ff8000007f800000 => 1716151413121110 706050403020100 ++xxgenpcvdm imm2 ff8000007f800000,ff7ffffe7f7ffffe => 706050403020100 f0e0d0c0b0a0908 ++xxgenpcvdm imm2 ff7ffffe7f7ffffe,0080000e8080000e => 706050403020100 1f1e1d1c1b1a1918 ++xxgenpcvdm imm2 0080000e8080000e,0180055e0180077e => 1716151413121110 1f1e1d1c1b1a1918 ++xxgenpcvdm imm2 0180055e0180077e,0000111e8000222e => 1716151413121110 1f1e1d1c1b1a1918 ++xxgenpcvdm imm2 0000111e8000222e,7ff0000000000000 => 1716151413121110 1f1e1d1c1b1a1918 ++xxgenpcvdm imm2 7ff0000000000000,fff0000000000000 => 1716151413121110 706050403020100 ++xxgenpcvdm imm2 fff0000000000000,2208400000000000 => 706050403020100 1f1e1d1c1b1a1918 ++xxgenpcvdm imm2 2208400000000000,0000000000000009 => 1716151413121110 1f1e1d1c1b1a1918 ++xxgenpcvdm imm2 0000000000000009,ffff000180000001 => 1716151413121110 706050403020100 ++xxgenpcvdm imm2 ffff000180000001,0000000000000000 => 706050403020100 1f1e1d1c1b1a1918 ++xxgenpcvdm imm2 0000000000000000,8000000000000000 => 1716151413121110 706050403020100 ++xxgenpcvdm imm2 8000000000000000,7f800000ff800000 => 706050403020100 1f1e1d1c1b1a1918 ++ ++xxgenpcvdm imm3 7f800000ff800000,ff8000007f800000 => f0e0d0c0b0a0908 0 ++xxgenpcvdm imm3 ff8000007f800000,ff7ffffe7f7ffffe => 706050403020100 f0e0d0c0b0a0908 ++xxgenpcvdm imm3 ff7ffffe7f7ffffe,0080000e8080000e => 706050403020100 0 ++xxgenpcvdm imm3 0080000e8080000e,0180055e0180077e => 0 0 ++xxgenpcvdm imm3 0180055e0180077e,0000111e8000222e => 0 0 ++xxgenpcvdm imm3 0000111e8000222e,7ff0000000000000 => 0 0 ++xxgenpcvdm imm3 7ff0000000000000,fff0000000000000 => f0e0d0c0b0a0908 0 ++xxgenpcvdm imm3 fff0000000000000,2208400000000000 => 706050403020100 0 ++xxgenpcvdm imm3 2208400000000000,0000000000000009 => 0 0 ++xxgenpcvdm imm3 0000000000000009,ffff000180000001 => f0e0d0c0b0a0908 0 ++xxgenpcvdm imm3 ffff000180000001,0000000000000000 => 706050403020100 0 ++xxgenpcvdm imm3 0000000000000000,8000000000000000 => f0e0d0c0b0a0908 0 ++xxgenpcvdm imm3 8000000000000000,7f800000ff800000 => 706050403020100 0 ++ ++xxgenpcvhm imm0 7f800000ff800000,ff8000007f800000 => 18191a1b02031e1f 1121314151617 ++xxgenpcvhm imm0 ff8000007f800000,ff7ffffe7f7ffffe => 6071a1b1c1d1e1f 1020314150405 ++xxgenpcvhm imm0 ff7ffffe7f7ffffe,0080000e8080000e => 20304051c1d0607 1011121300011617 ++xxgenpcvhm imm0 0080000e8080000e,0180055e0180077e => 18191a1b00011e1f 1011121314151617 ++xxgenpcvhm imm0 0180055e0180077e,0000111e8000222e => 18191a1b1c1d1e1f 1011121300011617 ++xxgenpcvhm imm0 0000111e8000222e,7ff0000000000000 => 18191a1b00011e1f 1011121314151617 ++xxgenpcvhm imm0 7ff0000000000000,fff0000000000000 => 18191a1b1c1d1e1f 1121314151617 ++xxgenpcvhm imm0 fff0000000000000,2208400000000000 => 11a1b1c1d1e1f 1011121314151617 ++xxgenpcvhm imm0 2208400000000000,0000000000000009 => 18191a1b1c1d1e1f 1011121314151617 ++xxgenpcvhm imm0 0000000000000009,ffff000180000001 => 18191a1b1c1d1e1f 1121302031617 ++xxgenpcvhm imm0 ffff000180000001,0000000000000000 => 11a1b02031e1f 1011121314151617 ++xxgenpcvhm imm0 0000000000000000,8000000000000000 => 18191a1b1c1d1e1f 1121314151617 ++xxgenpcvhm imm0 8000000000000000,7f800000ff800000 => 2031a1b1c1d1e1f 1011121300011617 ++ ++xxgenpcvhm imm1 7f800000ff800000,ff8000007f800000 => 0 10c0d00000000 ++xxgenpcvhm imm1 ff8000007f800000,ff7ffffe7f7ffffe => 0 1020306070809 ++xxgenpcvhm imm1 ff7ffffe7f7ffffe,0080000e8080000e => 0 40508090a0b0e0f ++xxgenpcvhm imm1 0080000e8080000e,0180055e0180077e => 0 c0d000000000000 ++xxgenpcvhm imm1 0180055e0180077e,0000111e8000222e => 0 405000000000000 ++xxgenpcvhm imm1 0000111e8000222e,7ff0000000000000 => 0 c0d000000000000 ++xxgenpcvhm imm1 7ff0000000000000,fff0000000000000 => 0 1000000000000 ++xxgenpcvhm imm1 fff0000000000000,2208400000000000 => 0 809000000000000 ++xxgenpcvhm imm1 2208400000000000,0000000000000009 => 0 0 ++xxgenpcvhm imm1 0000000000000009,ffff000180000001 => 0 1040500000000 ++xxgenpcvhm imm1 ffff000180000001,0000000000000000 => 0 8090c0d00000000 ++xxgenpcvhm imm1 0000000000000000,8000000000000000 => 0 1000000000000 ++xxgenpcvhm imm1 8000000000000000,7f800000ff800000 => 0 405080900000000 ++ ++xxgenpcvhm imm2 7f800000ff800000,ff8000007f800000 => 1716151401001110 3021d1c1b1a1918 ++xxgenpcvhm imm2 ff8000007f800000,ff7ffffe7f7ffffe => 100151413121110 70605041b1a0302 ++xxgenpcvhm imm2 ff7ffffe7f7ffffe,0080000e8080000e => 504030213120100 1f1e1d1c07061918 ++xxgenpcvhm imm2 0080000e8080000e,0180055e0180077e => 1716151401001110 1f1e1d1c1b1a1918 ++xxgenpcvhm imm2 0180055e0180077e,0000111e8000222e => 1716151413121110 1f1e1d1c01001918 ++xxgenpcvhm imm2 0000111e8000222e,7ff0000000000000 => 1716151401001110 1f1e1d1c1b1a1918 ++xxgenpcvhm imm2 7ff0000000000000,fff0000000000000 => 1716151413121110 1001d1c1b1a1918 ++xxgenpcvhm imm2 fff0000000000000,2208400000000000 => 100151413121110 1f1e1d1c1b1a1918 ++xxgenpcvhm imm2 2208400000000000,0000000000000009 => 1716151413121110 1f1e1d1c1b1a1918 ++xxgenpcvhm imm2 0000000000000009,ffff000180000001 => 1716151413121110 3021d1c01001918 ++xxgenpcvhm imm2 ffff000180000001,0000000000000000 => 302151401001110 1f1e1d1c1b1a1918 ++xxgenpcvhm imm2 0000000000000000,8000000000000000 => 1716151413121110 1001d1c1b1a1918 ++xxgenpcvhm imm2 8000000000000000,7f800000ff800000 => 100151413121110 1f1e1d1c03021918 ++ ++xxgenpcvhm imm3 7f800000ff800000,ff8000007f800000 => f0e0302 0 ++xxgenpcvhm imm3 ff8000007f800000,ff7ffffe7f7ffffe => f0e0d0c09080706 0 ++xxgenpcvhm imm3 ff7ffffe7f7ffffe,0080000e8080000e => b0a070605040100 0 ++xxgenpcvhm imm3 0080000e8080000e,0180055e0180077e => 302 0 ++xxgenpcvhm imm3 0180055e0180077e,0000111e8000222e => b0a 0 ++xxgenpcvhm imm3 0000111e8000222e,7ff0000000000000 => 302 0 ++xxgenpcvhm imm3 7ff0000000000000,fff0000000000000 => f0e 0 ++xxgenpcvhm imm3 fff0000000000000,2208400000000000 => 706 0 ++xxgenpcvhm imm3 2208400000000000,0000000000000009 => 0 0 ++xxgenpcvhm imm3 0000000000000009,ffff000180000001 => f0e0b0a 0 ++xxgenpcvhm imm3 ffff000180000001,0000000000000000 => 7060302 0 ++xxgenpcvhm imm3 0000000000000000,8000000000000000 => f0e 0 ++xxgenpcvhm imm3 8000000000000000,7f800000ff800000 => b0a0706 0 ++ ++xxgenpcvwm imm0 7f800000ff800000,ff8000007f800000 => 18191a1b04050607 1020314151617 ++xxgenpcvwm imm0 ff8000007f800000,ff7ffffe7f7ffffe => 40506071c1d1e1f 1020314151617 ++xxgenpcvwm imm0 ff7ffffe7f7ffffe,0080000e8080000e => 40506071c1d1e1f 1011121300010203 ++xxgenpcvwm imm0 0080000e8080000e,0180055e0180077e => 18191a1b00010203 1011121314151617 ++xxgenpcvwm imm0 0180055e0180077e,0000111e8000222e => 18191a1b1c1d1e1f 1011121300010203 ++xxgenpcvwm imm0 0000111e8000222e,7ff0000000000000 => 18191a1b00010203 1011121314151617 ++xxgenpcvwm imm0 7ff0000000000000,fff0000000000000 => 18191a1b1c1d1e1f 1020314151617 ++xxgenpcvwm imm0 fff0000000000000,2208400000000000 => 102031c1d1e1f 1011121314151617 ++xxgenpcvwm imm0 2208400000000000,0000000000000009 => 18191a1b1c1d1e1f 1011121314151617 ++xxgenpcvwm imm0 0000000000000009,ffff000180000001 => 18191a1b1c1d1e1f 1020304050607 ++xxgenpcvwm imm0 ffff000180000001,0000000000000000 => 1020304050607 1011121314151617 ++xxgenpcvwm imm0 0000000000000000,8000000000000000 => 18191a1b1c1d1e1f 1020314151617 ++xxgenpcvwm imm0 8000000000000000,7f800000ff800000 => 40506071c1d1e1f 1011121300010203 ++ ++xxgenpcvwm imm1 7f800000ff800000,ff8000007f800000 => 0 102030c0d0e0f ++xxgenpcvwm imm1 ff8000007f800000,ff7ffffe7f7ffffe => 0 1020308090a0b ++xxgenpcvwm imm1 ff7ffffe7f7ffffe,0080000e8080000e => 0 405060708090a0b ++xxgenpcvwm imm1 0080000e8080000e,0180055e0180077e => 0 c0d0e0f00000000 ++xxgenpcvwm imm1 0180055e0180077e,0000111e8000222e => 0 405060700000000 ++xxgenpcvwm imm1 0000111e8000222e,7ff0000000000000 => 0 c0d0e0f00000000 ++xxgenpcvwm imm1 7ff0000000000000,fff0000000000000 => 0 1020300000000 ++xxgenpcvwm imm1 fff0000000000000,2208400000000000 => 0 8090a0b00000000 ++xxgenpcvwm imm1 2208400000000000,0000000000000009 => 0 0 ++xxgenpcvwm imm1 0000000000000009,ffff000180000001 => 0 1020304050607 ++xxgenpcvwm imm1 ffff000180000001,0000000000000000 => 0 8090a0b0c0d0e0f ++xxgenpcvwm imm1 0000000000000000,8000000000000000 => 0 1020300000000 ++xxgenpcvwm imm1 8000000000000000,7f800000ff800000 => 0 405060708090a0b ++ ++xxgenpcvwm imm2 7f800000ff800000,ff8000007f800000 => 1716151403020100 70605041b1a1918 ++xxgenpcvwm imm2 ff8000007f800000,ff7ffffe7f7ffffe => 302010013121110 70605041b1a1918 ++xxgenpcvwm imm2 ff7ffffe7f7ffffe,0080000e8080000e => 302010013121110 1f1e1d1c07060504 ++xxgenpcvwm imm2 0080000e8080000e,0180055e0180077e => 1716151403020100 1f1e1d1c1b1a1918 ++xxgenpcvwm imm2 0180055e0180077e,0000111e8000222e => 1716151413121110 1f1e1d1c03020100 ++xxgenpcvwm imm2 0000111e8000222e,7ff0000000000000 => 1716151403020100 1f1e1d1c1b1a1918 ++xxgenpcvwm imm2 7ff0000000000000,fff0000000000000 => 1716151413121110 30201001b1a1918 ++xxgenpcvwm imm2 fff0000000000000,2208400000000000 => 302010013121110 1f1e1d1c1b1a1918 ++xxgenpcvwm imm2 2208400000000000,0000000000000009 => 1716151413121110 1f1e1d1c1b1a1918 ++xxgenpcvwm imm2 0000000000000009,ffff000180000001 => 1716151413121110 706050403020100 ++xxgenpcvwm imm2 ffff000180000001,0000000000000000 => 706050403020100 1f1e1d1c1b1a1918 ++xxgenpcvwm imm2 0000000000000000,8000000000000000 => 1716151413121110 30201001b1a1918 ++xxgenpcvwm imm2 8000000000000000,7f800000ff800000 => 302010013121110 1f1e1d1c07060504 ++ ++xxgenpcvwm imm3 7f800000ff800000,ff8000007f800000 => f0e0d0c03020100 0 ++xxgenpcvwm imm3 ff8000007f800000,ff7ffffe7f7ffffe => f0e0d0c07060504 0 ++xxgenpcvwm imm3 ff7ffffe7f7ffffe,0080000e8080000e => b0a090807060504 0 ++xxgenpcvwm imm3 0080000e8080000e,0180055e0180077e => 3020100 0 ++xxgenpcvwm imm3 0180055e0180077e,0000111e8000222e => b0a0908 0 ++xxgenpcvwm imm3 0000111e8000222e,7ff0000000000000 => 3020100 0 ++xxgenpcvwm imm3 7ff0000000000000,fff0000000000000 => f0e0d0c 0 ++xxgenpcvwm imm3 fff0000000000000,2208400000000000 => 7060504 0 ++xxgenpcvwm imm3 2208400000000000,0000000000000009 => 0 0 ++xxgenpcvwm imm3 0000000000000009,ffff000180000001 => f0e0d0c0b0a0908 0 ++xxgenpcvwm imm3 ffff000180000001,0000000000000000 => 706050403020100 0 ++xxgenpcvwm imm3 0000000000000000,8000000000000000 => f0e0d0c 0 ++xxgenpcvwm imm3 8000000000000000,7f800000ff800000 => b0a090807060504 0 ++ + xxpermx imm0 7f800000ff800000,ff8000007f800000 7f800000ff800000,ff8000007f800000 7f800000ff800000,ff8000007f800000 => ffff0000ffff ffff0000ffff + xxpermx imm0 7f800000ff800000,ff8000007f800000 7f800000ff800000,ff8000007f800000 7f800000ff800000,ff8000007f800000 => ffff0000ffff ffff0000ffff + xxpermx imm0 7f800000ff800000,ff8000007f800000 7f800000ff800000,ff8000007f800000 7f800000ff800000,ff8000007f800000 => ffff0000ffff ffff0000ffff +@@ -7094,4 +7318,4 @@ xxspltiw imm3 => 300000003 300000003 + + xxspltiw imm8 => 800000008 800000008 + +-All done. Tested 142 different instruction groups ++All done. Tested 158 different instruction groups + +commit c8fa838be405d7ac43035dcf675bf490800c26ec +Author: root +Date: Fri Feb 12 15:59:32 2021 -0500 + + Reduced Precision bfloat16 outer product tests + +diff --git a/none/tests/ppc64/test_isa_3_1_AT.c b/none/tests/ppc64/test_isa_3_1_AT.c +index 1d6d42c61..fee76f8f4 100644 +--- a/none/tests/ppc64/test_isa_3_1_AT.c ++++ b/none/tests/ppc64/test_isa_3_1_AT.c +@@ -626,8 +626,228 @@ static void test_pmxvf64gernn_XM11_YM0 (void) { + static void test_pmxvf64gernn_XM11_YM1 (void) { + __asm__ __volatile__ ("pmxvf64gernn 4, 22, %x0, 11, 1" :: "wa" (vec_xa) ); + } ++static void test_xvbf16ger2 (void) { ++ __asm__ __volatile__ ("xvbf16ger2 4, %x0, %x1" ++ :: "wa" (vec_xa), "wa" (vec_xb) ); ++} ++static void test_xvbf16ger2nn (void) { ++ __asm__ __volatile__ ("xvbf16ger2nn 4, %x0, %x1" ++ :: "wa" (vec_xa), "wa" (vec_xb) ); ++} ++static void test_xvbf16ger2np (void) { ++ __asm__ __volatile__ ("xvbf16ger2np 4, %x0, %x1" ++ :: "wa" (vec_xa), "wa" (vec_xb) ); ++} ++static void test_xvbf16ger2pn (void) { ++ __asm__ __volatile__ ("xvbf16ger2pn 4, %x0, %x1" ++ :: "wa" (vec_xa), "wa" (vec_xb) ); ++} ++static void test_xvbf16ger2pp (void) { ++ __asm__ __volatile__ ("xvbf16ger2pp 4, %x0, %x1" ++ :: "wa" (vec_xa), "wa" (vec_xb) ); ++} ++static void test_pmxvbf16ger2_XM0_YM0_PM0 (void) { ++ __asm__ __volatile__ ("pmxvbf16ger2 4, %x0, %x1, 0, 0, 0" ++ :: "wa" (vec_xa), "wa" (vec_xb) ); ++} ++static void test_pmxvbf16ger2_XM0_YM0_PM1 (void) { ++ __asm__ __volatile__ ("pmxvbf16ger2 4, %x0, %x1, 0, 0, 1" ++ :: "wa" (vec_xa), "wa" (vec_xb) ); ++} ++static void test_pmxvbf16ger2_XM0_YM13_PM0 (void) { ++ __asm__ __volatile__ ("pmxvbf16ger2 4, %x0, %x1, 0, 13, 0" ++ :: "wa" (vec_xa), "wa" (vec_xb) ); ++} ++static void test_pmxvbf16ger2_XM0_YM13_PM1 (void) { ++ __asm__ __volatile__ ("pmxvbf16ger2 4, %x0, %x1, 0, 13, 1" ++ :: "wa" (vec_xa), "wa" (vec_xb) ); ++} ++static void test_pmxvbf16ger2_XM11_YM0_PM0 (void) { ++ __asm__ __volatile__ ("pmxvbf16ger2 4, %x0, %x1, 11, 0, 0" ++ :: "wa" (vec_xa), "wa" (vec_xb) ); ++} ++static void test_pmxvbf16ger2_XM11_YM0_PM1 (void) { ++ __asm__ __volatile__ ("pmxvbf16ger2 4, %x0, %x1, 11, 0, 1" ++ :: "wa" (vec_xa), "wa" (vec_xb) ); ++} ++static void test_pmxvbf16ger2_XM11_YM13_PM0 (void) { ++ __asm__ __volatile__ ("pmxvbf16ger2 4, %x0, %x1, 11, 13, 0" ++ :: "wa" (vec_xa), "wa" (vec_xb) ); ++} ++static void test_pmxvbf16ger2_XM11_YM13_PM1 (void) { ++ __asm__ __volatile__ ("pmxvbf16ger2 4, %x0, %x1, 11, 13, 1" ++ :: "wa" (vec_xa), "wa" (vec_xb) ); ++} ++static void test_pmxvbf16ger2nn_XM0_YM0_PM0 (void) { ++ __asm__ __volatile__ ("pmxvbf16ger2nn 4, %x0, %x1, 0, 0, 0" ++ :: "wa" (vec_xa), "wa" (vec_xb) ); ++} ++static void test_pmxvbf16ger2nn_XM0_YM0_PM1 (void) { ++ __asm__ __volatile__ ("pmxvbf16ger2nn 4, %x0, %x1, 0, 0, 1" ++ :: "wa" (vec_xa), "wa" (vec_xb) ); ++} ++static void test_pmxvbf16ger2nn_XM0_YM13_PM0 (void) { ++ __asm__ __volatile__ ("pmxvbf16ger2nn 4, %x0, %x1, 0, 13, 0" ++ :: "wa" (vec_xa), "wa" (vec_xb) ); ++} ++static void test_pmxvbf16ger2nn_XM0_YM13_PM1 (void) { ++ __asm__ __volatile__ ("pmxvbf16ger2nn 4, %x0, %x1, 0, 13, 1" ++ :: "wa" (vec_xa), "wa" (vec_xb) ); ++} ++static void test_pmxvbf16ger2nn_XM11_YM0_PM0 (void) { ++ __asm__ __volatile__ ("pmxvbf16ger2nn 4, %x0, %x1, 11, 0, 0" ++ :: "wa" (vec_xa), "wa" (vec_xb) ); ++} ++static void test_pmxvbf16ger2nn_XM11_YM0_PM1 (void) { ++ __asm__ __volatile__ ("pmxvbf16ger2nn 4, %x0, %x1, 11, 0, 1" ++ :: "wa" (vec_xa), "wa" (vec_xb) ); ++} ++static void test_pmxvbf16ger2nn_XM11_YM13_PM0 (void) { ++ __asm__ __volatile__ ("pmxvbf16ger2nn 4, %x0, %x1, 11, 13, 0" ++ :: "wa" (vec_xa), "wa" (vec_xb) ); ++} ++static void test_pmxvbf16ger2nn_XM11_YM13_PM1 (void) { ++ __asm__ __volatile__ ("pmxvbf16ger2nn 4, %x0, %x1, 11, 13, 1" ++ :: "wa" (vec_xa), "wa" (vec_xb) ); ++} ++static void test_pmxvbf16ger2np_XM0_YM0_PM0 (void) { ++ __asm__ __volatile__ ("pmxvbf16ger2np 4, %x0, %x1, 0, 0, 0" ++ :: "wa" (vec_xa), "wa" (vec_xb) ); ++} ++static void test_pmxvbf16ger2np_XM0_YM0_PM1 (void) { ++ __asm__ __volatile__ ("pmxvbf16ger2np 4, %x0, %x1, 0, 0, 1" ++ :: "wa" (vec_xa), "wa" (vec_xb) ); ++} ++static void test_pmxvbf16ger2np_XM0_YM13_PM0 (void) { ++ __asm__ __volatile__ ("pmxvbf16ger2np 4, %x0, %x1, 0, 13, 0" ++ :: "wa" (vec_xa), "wa" (vec_xb) ); ++} ++static void test_pmxvbf16ger2np_XM0_YM13_PM1 (void) { ++ __asm__ __volatile__ ("pmxvbf16ger2np 4, %x0, %x1, 0, 13, 1" ++ :: "wa" (vec_xa), "wa" (vec_xb) ); ++} ++static void test_pmxvbf16ger2np_XM11_YM0_PM0 (void) { ++ __asm__ __volatile__ ("pmxvbf16ger2np 4, %x0, %x1, 11, 0, 0" ++ :: "wa" (vec_xa), "wa" (vec_xb) ); ++} ++static void test_pmxvbf16ger2np_XM11_YM0_PM1 (void) { ++ __asm__ __volatile__ ("pmxvbf16ger2np 4, %x0, %x1, 11, 0, 1" ++ :: "wa" (vec_xa), "wa" (vec_xb) ); ++} ++static void test_pmxvbf16ger2np_XM11_YM13_PM0 (void) { ++ __asm__ __volatile__ ("pmxvbf16ger2np 4, %x0, %x1, 11, 13, 0" ++ :: "wa" (vec_xa), "wa" (vec_xb) ); ++} ++static void test_pmxvbf16ger2np_XM11_YM13_PM1 (void) { ++ __asm__ __volatile__ ("pmxvbf16ger2np 4, %x0, %x1, 11, 13, 1" ++ :: "wa" (vec_xa), "wa" (vec_xb) ); ++} ++static void test_pmxvbf16ger2pn_XM0_YM0_PM0 (void) { ++ __asm__ __volatile__ ("pmxvbf16ger2pn 4, %x0, %x1, 0, 0, 0" ++ :: "wa" (vec_xa), "wa" (vec_xb) ); ++} ++static void test_pmxvbf16ger2pn_XM0_YM0_PM1 (void) { ++ __asm__ __volatile__ ("pmxvbf16ger2pn 4, %x0, %x1, 0, 0, 1" ++ :: "wa" (vec_xa), "wa" (vec_xb) ); ++} ++static void test_pmxvbf16ger2pn_XM0_YM13_PM0 (void) { ++ __asm__ __volatile__ ("pmxvbf16ger2pn 4, %x0, %x1, 0, 13, 0" ++ :: "wa" (vec_xa), "wa" (vec_xb) ); ++} ++static void test_pmxvbf16ger2pn_XM0_YM13_PM1 (void) { ++ __asm__ __volatile__ ("pmxvbf16ger2pn 4, %x0, %x1, 0, 13, 1" ++ :: "wa" (vec_xa), "wa" (vec_xb) ); ++} ++static void test_pmxvbf16ger2pn_XM11_YM0_PM0 (void) { ++ __asm__ __volatile__ ("pmxvbf16ger2pn 4, %x0, %x1, 11, 0, 0" ++ :: "wa" (vec_xa), "wa" (vec_xb) ); ++} ++static void test_pmxvbf16ger2pn_XM11_YM0_PM1 (void) { ++ __asm__ __volatile__ ("pmxvbf16ger2pn 4, %x0, %x1, 11, 0, 1" ++ :: "wa" (vec_xa), "wa" (vec_xb) ); ++} ++static void test_pmxvbf16ger2pn_XM11_YM13_PM0 (void) { ++ __asm__ __volatile__ ("pmxvbf16ger2pn 4, %x0, %x1, 11, 13, 0" ++ :: "wa" (vec_xa), "wa" (vec_xb) ); ++} ++static void test_pmxvbf16ger2pn_XM11_YM13_PM1 (void) { ++ __asm__ __volatile__ ("pmxvbf16ger2pn 4, %x0, %x1, 11, 13, 1" ++ :: "wa" (vec_xa), "wa" (vec_xb) ); ++} ++static void test_pmxvbf16ger2pp_XM0_YM0_PM0 (void) { ++ __asm__ __volatile__ ("pmxvbf16ger2pp 4, %x0, %x1, 0, 0, 0" ++ :: "wa" (vec_xa), "wa" (vec_xb) ); ++} ++static void test_pmxvbf16ger2pp_XM0_YM0_PM1 (void) { ++ __asm__ __volatile__ ("pmxvbf16ger2pp 4, %x0, %x1, 0, 0, 1" ++ :: "wa" (vec_xa), "wa" (vec_xb) ); ++} ++static void test_pmxvbf16ger2pp_XM0_YM13_PM0 (void) { ++ __asm__ __volatile__ ("pmxvbf16ger2pp 4, %x0, %x1, 0, 13, 0" ++ :: "wa" (vec_xa), "wa" (vec_xb) ); ++} ++static void test_pmxvbf16ger2pp_XM0_YM13_PM1 (void) { ++ __asm__ __volatile__ ("pmxvbf16ger2pp 4, %x0, %x1, 0, 13, 1" ++ :: "wa" (vec_xa), "wa" (vec_xb) ); ++} ++static void test_pmxvbf16ger2pp_XM11_YM0_PM0 (void) { ++ __asm__ __volatile__ ("pmxvbf16ger2pp 4, %x0, %x1, 11, 0, 0" ++ :: "wa" (vec_xa), "wa" (vec_xb) ); ++} ++static void test_pmxvbf16ger2pp_XM11_YM0_PM1 (void) { ++ __asm__ __volatile__ ("pmxvbf16ger2pp 4, %x0, %x1, 11, 0, 1" ++ :: "wa" (vec_xa), "wa" (vec_xb) ); ++} ++static void test_pmxvbf16ger2pp_XM11_YM13_PM0 (void) { ++ __asm__ __volatile__ ("pmxvbf16ger2pp 4, %x0, %x1, 11, 13, 0" ++ :: "wa" (vec_xa), "wa" (vec_xb) ); ++} ++static void test_pmxvbf16ger2pp_XM11_YM13_PM1 (void) { ++ __asm__ __volatile__ ("pmxvbf16ger2pp 4, %x0, %x1, 11, 13, 1" ++ :: "wa" (vec_xa), "wa" (vec_xb) ); ++} + + static test_list_t testgroup_generic[] = { ++ { &test_pmxvbf16ger2nn_XM0_YM0_PM0, "pmxvbf16ger2nn XM0_YM0_PM0", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ ++ { &test_pmxvbf16ger2nn_XM0_YM0_PM1, "pmxvbf16ger2nn XM0_YM0_PM1", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ ++ { &test_pmxvbf16ger2nn_XM0_YM13_PM0, "pmxvbf16ger2nn XM0_YM13_PM0", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ ++ { &test_pmxvbf16ger2nn_XM0_YM13_PM1, "pmxvbf16ger2nn XM0_YM13_PM1", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ ++ { &test_pmxvbf16ger2nn_XM11_YM0_PM0, "pmxvbf16ger2nn XM11_YM0_PM0", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ ++ { &test_pmxvbf16ger2nn_XM11_YM0_PM1, "pmxvbf16ger2nn XM11_YM0_PM1", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ ++ { &test_pmxvbf16ger2nn_XM11_YM13_PM0, "pmxvbf16ger2nn XM11_YM13_PM0", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ ++ { &test_pmxvbf16ger2nn_XM11_YM13_PM1, "pmxvbf16ger2nn XM11_YM13_PM1", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ ++ { &test_pmxvbf16ger2np_XM0_YM0_PM0, "pmxvbf16ger2np XM0_YM0_PM0", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ ++ { &test_pmxvbf16ger2np_XM0_YM0_PM1, "pmxvbf16ger2np XM0_YM0_PM1", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ ++ { &test_pmxvbf16ger2np_XM0_YM13_PM0, "pmxvbf16ger2np XM0_YM13_PM0", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ ++ { &test_pmxvbf16ger2np_XM0_YM13_PM1, "pmxvbf16ger2np XM0_YM13_PM1", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ ++ { &test_pmxvbf16ger2np_XM11_YM0_PM0, "pmxvbf16ger2np XM11_YM0_PM0", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ ++ { &test_pmxvbf16ger2np_XM11_YM0_PM1, "pmxvbf16ger2np XM11_YM0_PM1", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ ++ { &test_pmxvbf16ger2np_XM11_YM13_PM0, "pmxvbf16ger2np XM11_YM13_PM0", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ ++ { &test_pmxvbf16ger2np_XM11_YM13_PM1, "pmxvbf16ger2np XM11_YM13_PM1", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ ++ { &test_pmxvbf16ger2pn_XM0_YM0_PM0, "pmxvbf16ger2pn XM0_YM0_PM0", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ ++ { &test_pmxvbf16ger2pn_XM0_YM0_PM1, "pmxvbf16ger2pn XM0_YM0_PM1", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ ++ { &test_pmxvbf16ger2pn_XM0_YM13_PM0, "pmxvbf16ger2pn XM0_YM13_PM0", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ ++ { &test_pmxvbf16ger2pn_XM0_YM13_PM1, "pmxvbf16ger2pn XM0_YM13_PM1", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ ++ { &test_pmxvbf16ger2pn_XM11_YM0_PM0, "pmxvbf16ger2pn XM11_YM0_PM0", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ ++ { &test_pmxvbf16ger2pn_XM11_YM0_PM1, "pmxvbf16ger2pn XM11_YM0_PM1", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ ++ { &test_pmxvbf16ger2pn_XM11_YM13_PM0, "pmxvbf16ger2pn XM11_YM13_PM0", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ ++ { &test_pmxvbf16ger2pn_XM11_YM13_PM1, "pmxvbf16ger2pn XM11_YM13_PM1", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ ++ { &test_pmxvbf16ger2pp_XM0_YM0_PM0, "pmxvbf16ger2pp XM0_YM0_PM0", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ ++ { &test_pmxvbf16ger2pp_XM0_YM0_PM1, "pmxvbf16ger2pp XM0_YM0_PM1", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ ++ { &test_pmxvbf16ger2pp_XM0_YM13_PM0, "pmxvbf16ger2pp XM0_YM13_PM0", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ ++ { &test_pmxvbf16ger2pp_XM0_YM13_PM1, "pmxvbf16ger2pp XM0_YM13_PM1", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ ++ { &test_pmxvbf16ger2pp_XM11_YM0_PM0, "pmxvbf16ger2pp XM11_YM0_PM0", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ ++ { &test_pmxvbf16ger2pp_XM11_YM0_PM1, "pmxvbf16ger2pp XM11_YM0_PM1", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ ++ { &test_pmxvbf16ger2pp_XM11_YM13_PM0, "pmxvbf16ger2pp XM11_YM13_PM0", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ ++ { &test_pmxvbf16ger2pp_XM11_YM13_PM1, "pmxvbf16ger2pp XM11_YM13_PM1", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ ++ { &test_pmxvbf16ger2_XM0_YM0_PM0, "pmxvbf16ger2 XM0_YM0_PM0", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ ++ { &test_pmxvbf16ger2_XM0_YM0_PM1, "pmxvbf16ger2 XM0_YM0_PM1", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ ++ { &test_pmxvbf16ger2_XM0_YM13_PM0, "pmxvbf16ger2 XM0_YM13_PM0", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ ++ { &test_pmxvbf16ger2_XM0_YM13_PM1, "pmxvbf16ger2 XM0_YM13_PM1", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ ++ { &test_pmxvbf16ger2_XM11_YM0_PM0, "pmxvbf16ger2 XM11_YM0_PM0", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ ++ { &test_pmxvbf16ger2_XM11_YM0_PM1, "pmxvbf16ger2 XM11_YM0_PM1", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ ++ { &test_pmxvbf16ger2_XM11_YM13_PM0, "pmxvbf16ger2 XM11_YM13_PM0", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ ++ { &test_pmxvbf16ger2_XM11_YM13_PM1, "pmxvbf16ger2 XM11_YM13_PM1", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ + { &test_pmxvf16ger2nn_XM0_YM0_PM0, "pmxvf16ger2nn XM0_YM0_PM0", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ + { &test_pmxvf16ger2nn_XM0_YM0_PM1, "pmxvf16ger2nn XM0_YM0_PM1", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ + { &test_pmxvf16ger2nn_XM0_YM13_PM0, "pmxvf16ger2nn XM0_YM13_PM0", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ +@@ -756,6 +976,11 @@ static test_list_t testgroup_generic[] = { + { &test_pmxvi16ger2s_XM11_YM0_PM1, "pmxvi16ger2s XM11_YM0_PM1", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ + { &test_pmxvi16ger2s_XM11_YM13_PM0, "pmxvi16ger2s XM11_YM13_PM0", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ + { &test_pmxvi16ger2s_XM11_YM13_PM1, "pmxvi16ger2s XM11_YM13_PM1", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ ++ { &test_xvbf16ger2nn, "xvbf16ger2nn", "AT,XA,XB", 0b00001111}, /* bcs */ ++ { &test_xvbf16ger2np, "xvbf16ger2np", "AT,XA,XB", 0b00001111}, /* bcs */ ++ { &test_xvbf16ger2pn, "xvbf16ger2pn", "AT,XA,XB", 0b00001111}, /* bcs */ ++ { &test_xvbf16ger2pp, "xvbf16ger2pp", "AT,XA,XB", 0b00001111}, /* bcs */ ++ { &test_xvbf16ger2, "xvbf16ger2", "AT,XA,XB", 0b00001111}, /* bcs */ + { &test_xvf16ger2nn, "xvf16ger2nn", "AT,XA,XB", 0b00001111}, /* bcs */ + { &test_xvf16ger2np, "xvf16ger2np", "AT,XA,XB", 0b00001111}, /* bcs */ + { &test_xvf16ger2pn, "xvf16ger2pn", "AT,XA,XB", 0b00001111}, /* bcs */ +diff --git a/none/tests/ppc64/test_isa_3_1_AT.stdout.exp b/none/tests/ppc64/test_isa_3_1_AT.stdout.exp +index 5ea998563..be3f17ec3 100644 +--- a/none/tests/ppc64/test_isa_3_1_AT.stdout.exp ++++ b/none/tests/ppc64/test_isa_3_1_AT.stdout.exp +@@ -1,3 +1,403 @@ ++pmxvbf16ger2nn XM0_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2nn XM0_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2nn XM0_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2nn XM0_YM0_PM0 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2nn XM0_YM0_PM0 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2nn XM0_YM0_PM0 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2nn XM0_YM0_PM0 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2nn XM0_YM0_PM0 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2nn XM0_YM0_PM0 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++ ++pmxvbf16ger2nn XM0_YM0_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2nn XM0_YM0_PM1 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2nn XM0_YM0_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2nn XM0_YM0_PM1 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2nn XM0_YM0_PM1 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2nn XM0_YM0_PM1 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2nn XM0_YM0_PM1 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2nn XM0_YM0_PM1 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2nn XM0_YM0_PM1 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++ ++pmxvbf16ger2nn XM0_YM13_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2nn XM0_YM13_PM0 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2nn XM0_YM13_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2nn XM0_YM13_PM0 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2nn XM0_YM13_PM0 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2nn XM0_YM13_PM0 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2nn XM0_YM13_PM0 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2nn XM0_YM13_PM0 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2nn XM0_YM13_PM0 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++ ++pmxvbf16ger2nn XM0_YM13_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2nn XM0_YM13_PM1 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2nn XM0_YM13_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2nn XM0_YM13_PM1 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2nn XM0_YM13_PM1 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2nn XM0_YM13_PM1 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2nn XM0_YM13_PM1 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2nn XM0_YM13_PM1 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2nn XM0_YM13_PM1 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++ ++pmxvbf16ger2nn XM11_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2nn XM11_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2nn XM11_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2nn XM11_YM0_PM0 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2nn XM11_YM0_PM0 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2nn XM11_YM0_PM0 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2nn XM11_YM0_PM0 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2nn XM11_YM0_PM0 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2nn XM11_YM0_PM0 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++ ++pmxvbf16ger2nn XM11_YM0_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2nn XM11_YM0_PM1 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2nn XM11_YM0_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2nn XM11_YM0_PM1 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2nn XM11_YM0_PM1 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2nn XM11_YM0_PM1 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2nn XM11_YM0_PM1 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2nn XM11_YM0_PM1 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2nn XM11_YM0_PM1 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++ ++pmxvbf16ger2nn XM11_YM13_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => ( +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero +Zero +Zero +Zero +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28) ++pmxvbf16ger2nn XM11_YM13_PM0 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => ( +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero +Zero +Zero +Zero +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28) ++pmxvbf16ger2nn XM11_YM13_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => ( +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero +Zero +Zero +Zero +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28) ++pmxvbf16ger2nn XM11_YM13_PM0 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => ( +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero +Zero +Zero +Zero +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28) ++pmxvbf16ger2nn XM11_YM13_PM0 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => ( +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero +Zero +Zero +Zero +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28) ++pmxvbf16ger2nn XM11_YM13_PM0 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => ( +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero +Zero +Zero +Zero +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28) ++pmxvbf16ger2nn XM11_YM13_PM0 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => ( +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero +Zero +Zero +Zero +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28) ++pmxvbf16ger2nn XM11_YM13_PM0 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => ( +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero +Zero +Zero +Zero +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28) ++pmxvbf16ger2nn XM11_YM13_PM0 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => ( +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero +Zero +Zero +Zero +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28) ++ ++pmxvbf16ger2nn XM11_YM13_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => ( +Zero NaN NaN NaN +Zero +Zero +Zero +Zero +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28) ++pmxvbf16ger2nn XM11_YM13_PM1 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => ( +Zero NaN NaN NaN +Zero +Zero +Zero +Zero +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28) ++pmxvbf16ger2nn XM11_YM13_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => ( +Zero NaN NaN NaN +Zero +Zero +Zero +Zero +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28) ++pmxvbf16ger2nn XM11_YM13_PM1 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => ( +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero +Zero +Zero +Zero +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28) ++pmxvbf16ger2nn XM11_YM13_PM1 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => ( +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero +Zero +Zero +Zero +Zero -1.14437e-28 -3.63717e-28 -1.14437e-28 +Zero -1.14437e-28 -4.71628e-18 -1.14437e-28) ++pmxvbf16ger2nn XM11_YM13_PM1 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => ( +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero +Zero +Zero +Zero +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28) ++pmxvbf16ger2nn XM11_YM13_PM1 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => ( +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero +Zero +Zero +Zero +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28) ++pmxvbf16ger2nn XM11_YM13_PM1 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => ( +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero +Zero +Zero +Zero +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28) ++pmxvbf16ger2nn XM11_YM13_PM1 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => ( +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero +Zero +Zero +Zero +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28) ++ ++pmxvbf16ger2np XM0_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2np XM0_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2np XM0_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2np XM0_YM0_PM0 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2np XM0_YM0_PM0 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2np XM0_YM0_PM0 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2np XM0_YM0_PM0 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2np XM0_YM0_PM0 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2np XM0_YM0_PM0 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++ ++pmxvbf16ger2np XM0_YM0_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2np XM0_YM0_PM1 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2np XM0_YM0_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2np XM0_YM0_PM1 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2np XM0_YM0_PM1 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2np XM0_YM0_PM1 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2np XM0_YM0_PM1 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2np XM0_YM0_PM1 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2np XM0_YM0_PM1 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++ ++pmxvbf16ger2np XM0_YM13_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2np XM0_YM13_PM0 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2np XM0_YM13_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2np XM0_YM13_PM0 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2np XM0_YM13_PM0 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2np XM0_YM13_PM0 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2np XM0_YM13_PM0 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2np XM0_YM13_PM0 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2np XM0_YM13_PM0 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++ ++pmxvbf16ger2np XM0_YM13_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2np XM0_YM13_PM1 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2np XM0_YM13_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2np XM0_YM13_PM1 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2np XM0_YM13_PM1 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2np XM0_YM13_PM1 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2np XM0_YM13_PM1 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2np XM0_YM13_PM1 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2np XM0_YM13_PM1 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++ ++pmxvbf16ger2np XM11_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2np XM11_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2np XM11_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2np XM11_YM0_PM0 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2np XM11_YM0_PM0 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2np XM11_YM0_PM0 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2np XM11_YM0_PM0 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2np XM11_YM0_PM0 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2np XM11_YM0_PM0 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++ ++pmxvbf16ger2np XM11_YM0_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2np XM11_YM0_PM1 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2np XM11_YM0_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2np XM11_YM0_PM1 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2np XM11_YM0_PM1 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2np XM11_YM0_PM1 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2np XM11_YM0_PM1 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2np XM11_YM0_PM1 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2np XM11_YM0_PM1 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++ ++pmxvbf16ger2np XM11_YM13_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => ( +Zero 1.14437e-28 * * +Zero +Zero +Zero +Zero +Zero 1.14437e-28 * * +Zero 1.14437e-28 * *) ++pmxvbf16ger2np XM11_YM13_PM0 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => ( +Zero 1.14437e-28 * * +Zero +Zero +Zero +Zero +Zero 1.14437e-28 * * +Zero 1.14437e-28 * *) ++pmxvbf16ger2np XM11_YM13_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => ( +Zero 1.14437e-28 * * +Zero +Zero +Zero +Zero +Zero 1.14437e-28 * * +Zero 1.14437e-28 * *) ++pmxvbf16ger2np XM11_YM13_PM0 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => ( +Zero 1.14437e-28 * * +Zero +Zero +Zero +Zero +Zero 1.14437e-28 * * +Zero 1.14437e-28 * *) ++pmxvbf16ger2np XM11_YM13_PM0 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => ( +Zero 1.14437e-28 * * +Zero +Zero +Zero +Zero +Zero 1.14437e-28 * * +Zero 1.14437e-28 * *) ++pmxvbf16ger2np XM11_YM13_PM0 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => ( +Zero 1.14437e-28 * * +Zero +Zero +Zero +Zero +Zero 1.14437e-28 * * +Zero 1.14437e-28 * *) ++pmxvbf16ger2np XM11_YM13_PM0 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => ( +Zero 1.14437e-28 * * +Zero +Zero +Zero +Zero +Zero 1.14437e-28 * * +Zero 1.14437e-28 * *) ++pmxvbf16ger2np XM11_YM13_PM0 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => ( +Zero 1.14437e-28 * * +Zero +Zero +Zero +Zero +Zero 1.14437e-28 * * +Zero 1.14437e-28 * *) ++pmxvbf16ger2np XM11_YM13_PM0 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => ( +Zero 1.14437e-28 * * +Zero +Zero +Zero +Zero +Zero 1.14437e-28 * * +Zero 1.14437e-28 * *) ++ ++pmxvbf16ger2np XM11_YM13_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => ( +Zero NaN NaN NaN +Zero +Zero +Zero +Zero +Zero 1.14437e-28 * * +Zero 1.14437e-28 * *) ++pmxvbf16ger2np XM11_YM13_PM1 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => ( +Zero NaN NaN NaN +Zero +Zero +Zero +Zero +Zero 1.14437e-28 * * +Zero 1.14437e-28 * *) ++pmxvbf16ger2np XM11_YM13_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => ( +Zero NaN NaN NaN +Zero +Zero +Zero +Zero +Zero 1.14437e-28 * * +Zero 1.14437e-28 * *) ++pmxvbf16ger2np XM11_YM13_PM1 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => ( +Zero 1.14437e-28 * * +Zero +Zero +Zero +Zero +Zero 1.14437e-28 * * +Zero 1.14437e-28 * *) ++pmxvbf16ger2np XM11_YM13_PM1 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => ( +Zero 1.14437e-28 * * +Zero +Zero +Zero +Zero +Zero 1.14437e-28 -1.34843e-28 1.14437e-28 +Zero 1.14437e-28 -4.71628e-18 1.14437e-28) ++pmxvbf16ger2np XM11_YM13_PM1 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => ( +Zero 1.14437e-28 * * +Zero +Zero +Zero +Zero +Zero 1.14437e-28 * * +Zero 1.14437e-28 * *) ++pmxvbf16ger2np XM11_YM13_PM1 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => ( +Zero 1.14437e-28 * * +Zero +Zero +Zero +Zero +Zero 1.14437e-28 * * +Zero 1.14437e-28 * *) ++pmxvbf16ger2np XM11_YM13_PM1 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => ( +Zero 1.14437e-28 * * +Zero +Zero +Zero +Zero +Zero 1.14437e-28 * * +Zero 1.14437e-28 * *) ++pmxvbf16ger2np XM11_YM13_PM1 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => ( +Zero 1.14437e-28 * * +Zero +Zero +Zero +Zero +Zero 1.14437e-28 * * +Zero 1.14437e-28 * *) ++ ++pmxvbf16ger2pn XM0_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2pn XM0_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2pn XM0_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2pn XM0_YM0_PM0 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2pn XM0_YM0_PM0 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2pn XM0_YM0_PM0 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2pn XM0_YM0_PM0 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2pn XM0_YM0_PM0 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2pn XM0_YM0_PM0 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++ ++pmxvbf16ger2pn XM0_YM0_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2pn XM0_YM0_PM1 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2pn XM0_YM0_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2pn XM0_YM0_PM1 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2pn XM0_YM0_PM1 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2pn XM0_YM0_PM1 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2pn XM0_YM0_PM1 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2pn XM0_YM0_PM1 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2pn XM0_YM0_PM1 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++ ++pmxvbf16ger2pn XM0_YM13_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2pn XM0_YM13_PM0 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2pn XM0_YM13_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2pn XM0_YM13_PM0 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2pn XM0_YM13_PM0 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2pn XM0_YM13_PM0 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2pn XM0_YM13_PM0 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2pn XM0_YM13_PM0 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2pn XM0_YM13_PM0 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++ ++pmxvbf16ger2pn XM0_YM13_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2pn XM0_YM13_PM1 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2pn XM0_YM13_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2pn XM0_YM13_PM1 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2pn XM0_YM13_PM1 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2pn XM0_YM13_PM1 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2pn XM0_YM13_PM1 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2pn XM0_YM13_PM1 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2pn XM0_YM13_PM1 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++ ++pmxvbf16ger2pn XM11_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2pn XM11_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2pn XM11_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2pn XM11_YM0_PM0 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2pn XM11_YM0_PM0 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2pn XM11_YM0_PM0 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2pn XM11_YM0_PM0 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2pn XM11_YM0_PM0 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2pn XM11_YM0_PM0 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++ ++pmxvbf16ger2pn XM11_YM0_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2pn XM11_YM0_PM1 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2pn XM11_YM0_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2pn XM11_YM0_PM1 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2pn XM11_YM0_PM1 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2pn XM11_YM0_PM1 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2pn XM11_YM0_PM1 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2pn XM11_YM0_PM1 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2pn XM11_YM0_PM1 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++ ++pmxvbf16ger2pn XM11_YM13_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => ( +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero +Zero +Zero +Zero +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28) ++pmxvbf16ger2pn XM11_YM13_PM0 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => ( +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero +Zero +Zero +Zero +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28) ++pmxvbf16ger2pn XM11_YM13_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => ( +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero +Zero +Zero +Zero +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28) ++pmxvbf16ger2pn XM11_YM13_PM0 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => ( +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero +Zero +Zero +Zero +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28) ++pmxvbf16ger2pn XM11_YM13_PM0 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => ( +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero +Zero +Zero +Zero +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28) ++pmxvbf16ger2pn XM11_YM13_PM0 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => ( +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero +Zero +Zero +Zero +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28) ++pmxvbf16ger2pn XM11_YM13_PM0 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => ( +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero +Zero +Zero +Zero +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28) ++pmxvbf16ger2pn XM11_YM13_PM0 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => ( +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero +Zero +Zero +Zero +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28) ++pmxvbf16ger2pn XM11_YM13_PM0 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => ( +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero +Zero +Zero +Zero +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28) ++ ++pmxvbf16ger2pn XM11_YM13_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => ( +Zero NaN NaN NaN +Zero +Zero +Zero +Zero +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28) ++pmxvbf16ger2pn XM11_YM13_PM1 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => ( +Zero NaN NaN NaN +Zero +Zero +Zero +Zero +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28) ++pmxvbf16ger2pn XM11_YM13_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => ( +Zero NaN NaN NaN +Zero +Zero +Zero +Zero +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28) ++pmxvbf16ger2pn XM11_YM13_PM1 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => ( +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero +Zero +Zero +Zero +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28) ++pmxvbf16ger2pn XM11_YM13_PM1 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => ( +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero +Zero +Zero +Zero +Zero -1.14437e-28 1.34843e-28 -1.14437e-28 +Zero -1.14437e-28 4.71628e-18 -1.14437e-28) ++pmxvbf16ger2pn XM11_YM13_PM1 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => ( +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero +Zero +Zero +Zero +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28) ++pmxvbf16ger2pn XM11_YM13_PM1 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => ( +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero +Zero +Zero +Zero +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28) ++pmxvbf16ger2pn XM11_YM13_PM1 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => ( +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero +Zero +Zero +Zero +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28) ++pmxvbf16ger2pn XM11_YM13_PM1 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => ( +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero +Zero +Zero +Zero +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28 +Zero -1.14437e-28 -1.14437e-28 -1.14437e-28) ++ ++pmxvbf16ger2pp XM0_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2pp XM0_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2pp XM0_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2pp XM0_YM0_PM0 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2pp XM0_YM0_PM0 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2pp XM0_YM0_PM0 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2pp XM0_YM0_PM0 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2pp XM0_YM0_PM0 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2pp XM0_YM0_PM0 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++ ++pmxvbf16ger2pp XM0_YM0_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2pp XM0_YM0_PM1 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2pp XM0_YM0_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2pp XM0_YM0_PM1 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2pp XM0_YM0_PM1 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2pp XM0_YM0_PM1 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2pp XM0_YM0_PM1 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2pp XM0_YM0_PM1 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2pp XM0_YM0_PM1 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++ ++pmxvbf16ger2pp XM0_YM13_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2pp XM0_YM13_PM0 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2pp XM0_YM13_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2pp XM0_YM13_PM0 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2pp XM0_YM13_PM0 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2pp XM0_YM13_PM0 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2pp XM0_YM13_PM0 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2pp XM0_YM13_PM0 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2pp XM0_YM13_PM0 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++ ++pmxvbf16ger2pp XM0_YM13_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2pp XM0_YM13_PM1 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2pp XM0_YM13_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2pp XM0_YM13_PM1 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2pp XM0_YM13_PM1 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2pp XM0_YM13_PM1 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2pp XM0_YM13_PM1 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2pp XM0_YM13_PM1 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2pp XM0_YM13_PM1 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++ ++pmxvbf16ger2pp XM11_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2pp XM11_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2pp XM11_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2pp XM11_YM0_PM0 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2pp XM11_YM0_PM0 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2pp XM11_YM0_PM0 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2pp XM11_YM0_PM0 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2pp XM11_YM0_PM0 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2pp XM11_YM0_PM0 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++ ++pmxvbf16ger2pp XM11_YM0_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2pp XM11_YM0_PM1 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2pp XM11_YM0_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2pp XM11_YM0_PM1 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2pp XM11_YM0_PM1 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2pp XM11_YM0_PM1 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2pp XM11_YM0_PM1 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2pp XM11_YM0_PM1 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2pp XM11_YM0_PM1 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++ ++pmxvbf16ger2pp XM11_YM13_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => ( +Zero 1.14437e-28 * * +Zero +Zero +Zero +Zero +Zero 1.14437e-28 * * +Zero 1.14437e-28 * *) ++pmxvbf16ger2pp XM11_YM13_PM0 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => ( +Zero 1.14437e-28 * * +Zero +Zero +Zero +Zero +Zero 1.14437e-28 * * +Zero 1.14437e-28 * *) ++pmxvbf16ger2pp XM11_YM13_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => ( +Zero 1.14437e-28 * * +Zero +Zero +Zero +Zero +Zero 1.14437e-28 * * +Zero 1.14437e-28 * *) ++pmxvbf16ger2pp XM11_YM13_PM0 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => ( +Zero 1.14437e-28 * * +Zero +Zero +Zero +Zero +Zero 1.14437e-28 * * +Zero 1.14437e-28 * *) ++pmxvbf16ger2pp XM11_YM13_PM0 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => ( +Zero 1.14437e-28 * * +Zero +Zero +Zero +Zero +Zero 1.14437e-28 * * +Zero 1.14437e-28 * *) ++pmxvbf16ger2pp XM11_YM13_PM0 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => ( +Zero 1.14437e-28 * * +Zero +Zero +Zero +Zero +Zero 1.14437e-28 * * +Zero 1.14437e-28 * *) ++pmxvbf16ger2pp XM11_YM13_PM0 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => ( +Zero 1.14437e-28 * * +Zero +Zero +Zero +Zero +Zero 1.14437e-28 * * +Zero 1.14437e-28 * *) ++pmxvbf16ger2pp XM11_YM13_PM0 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => ( +Zero 1.14437e-28 * * +Zero +Zero +Zero +Zero +Zero 1.14437e-28 * * +Zero 1.14437e-28 * *) ++pmxvbf16ger2pp XM11_YM13_PM0 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => ( +Zero 1.14437e-28 * * +Zero +Zero +Zero +Zero +Zero 1.14437e-28 * * +Zero 1.14437e-28 * *) ++ ++pmxvbf16ger2pp XM11_YM13_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => ( +Zero NaN NaN NaN +Zero +Zero +Zero +Zero +Zero 1.14437e-28 * * +Zero 1.14437e-28 * *) ++pmxvbf16ger2pp XM11_YM13_PM1 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => ( +Zero NaN NaN NaN +Zero +Zero +Zero +Zero +Zero 1.14437e-28 * * +Zero 1.14437e-28 * *) ++pmxvbf16ger2pp XM11_YM13_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => ( +Zero NaN NaN NaN +Zero +Zero +Zero +Zero +Zero 1.14437e-28 * * +Zero 1.14437e-28 * *) ++pmxvbf16ger2pp XM11_YM13_PM1 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => ( +Zero 1.14437e-28 * * +Zero +Zero +Zero +Zero +Zero 1.14437e-28 * * +Zero 1.14437e-28 * *) ++pmxvbf16ger2pp XM11_YM13_PM1 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => ( +Zero 1.14437e-28 * * +Zero +Zero +Zero +Zero +Zero 1.14437e-28 3.63717e-28 1.14437e-28 +Zero 1.14437e-28 4.71628e-18 1.14437e-28) ++pmxvbf16ger2pp XM11_YM13_PM1 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => ( +Zero 1.14437e-28 * * +Zero +Zero +Zero +Zero +Zero 1.14437e-28 * * +Zero 1.14437e-28 * *) ++pmxvbf16ger2pp XM11_YM13_PM1 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => ( +Zero 1.14437e-28 * * +Zero +Zero +Zero +Zero +Zero 1.14437e-28 * * +Zero 1.14437e-28 * *) ++pmxvbf16ger2pp XM11_YM13_PM1 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => ( +Zero 1.14437e-28 * * +Zero +Zero +Zero +Zero +Zero 1.14437e-28 * * +Zero 1.14437e-28 * *) ++pmxvbf16ger2pp XM11_YM13_PM1 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => ( +Zero 1.14437e-28 * * +Zero +Zero +Zero +Zero +Zero 1.14437e-28 * * +Zero 1.14437e-28 * *) ++ ++pmxvbf16ger2 XM0_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2 XM0_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2 XM0_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2 XM0_YM0_PM0 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2 XM0_YM0_PM0 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2 XM0_YM0_PM0 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2 XM0_YM0_PM0 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2 XM0_YM0_PM0 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2 XM0_YM0_PM0 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++ ++pmxvbf16ger2 XM0_YM0_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2 XM0_YM0_PM1 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2 XM0_YM0_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2 XM0_YM0_PM1 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2 XM0_YM0_PM1 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2 XM0_YM0_PM1 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2 XM0_YM0_PM1 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2 XM0_YM0_PM1 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2 XM0_YM0_PM1 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++ ++pmxvbf16ger2 XM0_YM13_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2 XM0_YM13_PM0 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2 XM0_YM13_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2 XM0_YM13_PM0 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2 XM0_YM13_PM0 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2 XM0_YM13_PM0 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2 XM0_YM13_PM0 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2 XM0_YM13_PM0 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2 XM0_YM13_PM0 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++ ++pmxvbf16ger2 XM0_YM13_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2 XM0_YM13_PM1 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2 XM0_YM13_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2 XM0_YM13_PM1 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2 XM0_YM13_PM1 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2 XM0_YM13_PM1 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2 XM0_YM13_PM1 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2 XM0_YM13_PM1 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2 XM0_YM13_PM1 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++ ++pmxvbf16ger2 XM11_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2 XM11_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2 XM11_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2 XM11_YM0_PM0 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2 XM11_YM0_PM0 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2 XM11_YM0_PM0 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2 XM11_YM0_PM0 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2 XM11_YM0_PM0 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2 XM11_YM0_PM0 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++ ++pmxvbf16ger2 XM11_YM0_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2 XM11_YM0_PM1 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2 XM11_YM0_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2 XM11_YM0_PM1 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2 XM11_YM0_PM1 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2 XM11_YM0_PM1 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2 XM11_YM0_PM1 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2 XM11_YM0_PM1 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2 XM11_YM0_PM1 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++ ++pmxvbf16ger2 XM11_YM13_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2 XM11_YM13_PM0 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2 XM11_YM13_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2 XM11_YM13_PM0 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2 XM11_YM13_PM0 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2 XM11_YM13_PM0 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2 XM11_YM13_PM0 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2 XM11_YM13_PM0 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2 XM11_YM13_PM0 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++ ++pmxvbf16ger2 XM11_YM13_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => ( +Zero NaN NaN NaN +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2 XM11_YM13_PM1 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => ( +Zero NaN NaN NaN +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2 XM11_YM13_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => ( +Zero NaN NaN NaN +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2 XM11_YM13_PM1 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2 XM11_YM13_PM1 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero 2.49280e-28 +Zero +Zero +Zero 4.71628e-18 +Zero) ++pmxvbf16ger2 XM11_YM13_PM1 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2 XM11_YM13_PM1 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++pmxvbf16ger2 XM11_YM13_PM1 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => ( +Zero +Zero +Den +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Den +Zero) ++pmxvbf16ger2 XM11_YM13_PM1 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++ + pmxvf16ger2nn XM0_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) + pmxvf16ger2nn XM0_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) + pmxvf16ger2nn XM0_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) +@@ -1558,6 +1958,91 @@ pmxvi16ger2s XM11_YM13_PM1 0000000000000009,ffff000180000001 0080000e8080000e,01 + pmxvi16ger2s XM11_YM13_PM1 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => [ 0000000000000000 0000400000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0002400000000000] + pmxvi16ger2s XM11_YM13_PM1 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] + ++xvbf16ger2nn 7f800000ff800000,ff8000007f800000 7f800000ff800000,ff8000007f800000 => ( +Inf -Inf -Inf +Inf -Inf +Inf +Inf -Inf -Inf +Inf +Inf -Inf +Inf -Inf -Inf +Inf) ++xvbf16ger2nn 7f800000ff800000,ff8000007f800000 0180055e0180077e,0000111e8000222e => ( +Inf +Inf NaN NaN -Inf -Inf NaN NaN -Inf -Inf NaN NaN +Inf +Inf NaN NaN) ++xvbf16ger2nn 7f800000ff800000,ff8000007f800000 2208400000000000,0000000000000009 => ( +Inf NaN NaN NaN -Inf NaN NaN NaN -Inf NaN NaN NaN +Inf NaN NaN NaN) ++xvbf16ger2nn 7f800000ff800000,ff8000007f800000 8000000000000000,7f800000ff800000 => ( NaN NaN +Inf -Inf NaN NaN -Inf +Inf NaN NaN -Inf +Inf NaN NaN +Inf -Inf) ++xvbf16ger2nn 0180055e0180077e,0000111e8000222e 7f800000ff800000,ff8000007f800000 => ( NaN NaN NaN NaN NaN NaN NaN NaN -Inf +Inf +Inf -Inf -Inf +Inf +Inf -Inf) ++xvbf16ger2nn 0180055e0180077e,0000111e8000222e 0180055e0180077e,0000111e8000222e => ( -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28) ++xvbf16ger2nn 0180055e0180077e,0000111e8000222e 2208400000000000,0000000000000009 => ( -3.63717e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -4.71628e-18 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14438e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28) ++xvbf16ger2nn 0180055e0180077e,0000111e8000222e 8000000000000000,7f800000ff800000 => ( -1.14437e-28 -1.14437e-28 NaN NaN -1.14437e-28 -1.14437e-28 NaN NaN -1.14437e-28 -1.14437e-28 -Inf +Inf -1.14437e-28 -1.14437e-28 -Inf +Inf) ++xvbf16ger2nn 2208400000000000,0000000000000009 7f800000ff800000,ff8000007f800000 => ( NaN NaN NaN NaN NaN NaN NaN NaN -Inf +Inf +Inf -Inf NaN NaN NaN NaN) ++xvbf16ger2nn 2208400000000000,0000000000000009 0180055e0180077e,0000111e8000222e => ( -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14438e-28 -3.63717e-28 -4.71628e-18 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28) ++xvbf16ger2nn 2208400000000000,0000000000000009 2208400000000000,0000000000000009 => ( -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -4.00000e+00 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28) ++xvbf16ger2nn 2208400000000000,0000000000000009 8000000000000000,7f800000ff800000 => ( -1.14437e-28 -1.14437e-28 NaN NaN -1.14437e-28 -1.14437e-28 NaN NaN -1.14437e-28 -1.14437e-28 -Inf +Inf -1.14437e-28 -1.14437e-28 NaN NaN) ++xvbf16ger2nn 8000000000000000,7f800000ff800000 7f800000ff800000,ff8000007f800000 => ( -Inf +Inf +Inf -Inf +Inf -Inf -Inf +Inf NaN NaN NaN NaN NaN NaN NaN NaN) ++xvbf16ger2nn 8000000000000000,7f800000ff800000 0180055e0180077e,0000111e8000222e => ( -Inf -Inf NaN NaN +Inf +Inf NaN NaN -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28) ++xvbf16ger2nn 8000000000000000,7f800000ff800000 2208400000000000,0000000000000009 => ( -Inf NaN NaN NaN +Inf NaN NaN NaN -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28) ++xvbf16ger2nn 8000000000000000,7f800000ff800000 8000000000000000,7f800000ff800000 => ( NaN NaN -Inf +Inf NaN NaN +Inf -Inf -1.14437e-28 -1.14437e-28 NaN NaN -1.14437e-28 -1.14437e-28 NaN NaN) ++ ++xvbf16ger2np 7f800000ff800000,ff8000007f800000 7f800000ff800000,ff8000007f800000 => ( +Inf -Inf -Inf +Inf -Inf +Inf +Inf -Inf -Inf +Inf +Inf -Inf +Inf -Inf -Inf +Inf) ++xvbf16ger2np 7f800000ff800000,ff8000007f800000 0180055e0180077e,0000111e8000222e => ( +Inf +Inf NaN NaN -Inf -Inf NaN NaN -Inf -Inf NaN NaN +Inf +Inf NaN NaN) ++xvbf16ger2np 7f800000ff800000,ff8000007f800000 2208400000000000,0000000000000009 => ( +Inf NaN NaN NaN -Inf NaN NaN NaN -Inf NaN NaN NaN +Inf NaN NaN NaN) ++xvbf16ger2np 7f800000ff800000,ff8000007f800000 8000000000000000,7f800000ff800000 => ( NaN NaN +Inf -Inf NaN NaN -Inf +Inf NaN NaN -Inf +Inf NaN NaN +Inf -Inf) ++xvbf16ger2np 0180055e0180077e,0000111e8000222e 7f800000ff800000,ff8000007f800000 => ( NaN NaN NaN NaN NaN NaN NaN NaN -Inf +Inf +Inf -Inf -Inf +Inf +Inf -Inf) ++xvbf16ger2np 0180055e0180077e,0000111e8000222e 0180055e0180077e,0000111e8000222e => ( * * * * * * * * * * * * * * * *) ++xvbf16ger2np 0180055e0180077e,0000111e8000222e 2208400000000000,0000000000000009 => ( -1.34843e-28 1.14437e-28 * * -4.71628e-18 1.14437e-28 * * 1.14437e-28 1.14437e-28 * * 1.14437e-28 1.14437e-28 * *) ++xvbf16ger2np 0180055e0180077e,0000111e8000222e 8000000000000000,7f800000ff800000 => ( * * NaN NaN * * NaN NaN * * -Inf +Inf * * -Inf +Inf) ++xvbf16ger2np 2208400000000000,0000000000000009 7f800000ff800000,ff8000007f800000 => ( NaN NaN NaN NaN NaN NaN NaN NaN -Inf +Inf +Inf -Inf NaN NaN NaN NaN) ++xvbf16ger2np 2208400000000000,0000000000000009 0180055e0180077e,0000111e8000222e => ( * * * * * * * * 1.14437e-28 1.14437e-28 -1.34843e-28 -4.71628e-18 * * * *) ++xvbf16ger2np 2208400000000000,0000000000000009 2208400000000000,0000000000000009 => ( * * * * * * * * -4.00000e+00 1.14437e-28 * * * * * *) ++xvbf16ger2np 2208400000000000,0000000000000009 8000000000000000,7f800000ff800000 => ( * * NaN NaN * * NaN NaN * * -Inf +Inf * * NaN NaN) ++xvbf16ger2np 8000000000000000,7f800000ff800000 7f800000ff800000,ff8000007f800000 => ( -Inf +Inf +Inf -Inf +Inf -Inf -Inf +Inf NaN NaN NaN NaN NaN NaN NaN NaN) ++xvbf16ger2np 8000000000000000,7f800000ff800000 0180055e0180077e,0000111e8000222e => ( -Inf -Inf NaN NaN +Inf +Inf NaN NaN * * * * * * * *) ++xvbf16ger2np 8000000000000000,7f800000ff800000 2208400000000000,0000000000000009 => ( -Inf NaN NaN NaN +Inf NaN NaN NaN * * * * * * * *) ++xvbf16ger2np 8000000000000000,7f800000ff800000 8000000000000000,7f800000ff800000 => ( NaN NaN -Inf +Inf NaN NaN +Inf -Inf * * NaN NaN * * NaN NaN) ++ ++xvbf16ger2pn 7f800000ff800000,ff8000007f800000 7f800000ff800000,ff8000007f800000 => ( -Inf +Inf +Inf -Inf +Inf -Inf -Inf +Inf +Inf -Inf -Inf +Inf -Inf +Inf +Inf -Inf) ++xvbf16ger2pn 7f800000ff800000,ff8000007f800000 0180055e0180077e,0000111e8000222e => ( -Inf -Inf NaN NaN +Inf +Inf NaN NaN +Inf +Inf NaN NaN -Inf -Inf NaN NaN) ++xvbf16ger2pn 7f800000ff800000,ff8000007f800000 2208400000000000,0000000000000009 => ( -Inf NaN NaN NaN +Inf NaN NaN NaN +Inf NaN NaN NaN -Inf NaN NaN NaN) ++xvbf16ger2pn 7f800000ff800000,ff8000007f800000 8000000000000000,7f800000ff800000 => ( NaN NaN -Inf +Inf NaN NaN +Inf -Inf NaN NaN +Inf -Inf NaN NaN -Inf +Inf) ++xvbf16ger2pn 0180055e0180077e,0000111e8000222e 7f800000ff800000,ff8000007f800000 => ( NaN NaN NaN NaN NaN NaN NaN NaN +Inf -Inf -Inf +Inf +Inf -Inf -Inf +Inf) ++xvbf16ger2pn 0180055e0180077e,0000111e8000222e 0180055e0180077e,0000111e8000222e => ( -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28) ++xvbf16ger2pn 0180055e0180077e,0000111e8000222e 2208400000000000,0000000000000009 => ( 1.34843e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 4.71628e-18 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28) ++xvbf16ger2pn 0180055e0180077e,0000111e8000222e 8000000000000000,7f800000ff800000 => ( -1.14437e-28 -1.14437e-28 NaN NaN -1.14437e-28 -1.14437e-28 NaN NaN -1.14437e-28 -1.14437e-28 +Inf -Inf -1.14437e-28 -1.14437e-28 +Inf -Inf) ++xvbf16ger2pn 2208400000000000,0000000000000009 7f800000ff800000,ff8000007f800000 => ( NaN NaN NaN NaN NaN NaN NaN NaN +Inf -Inf -Inf +Inf NaN NaN NaN NaN) ++xvbf16ger2pn 2208400000000000,0000000000000009 0180055e0180077e,0000111e8000222e => ( -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 1.34843e-28 4.71628e-18 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28) ++xvbf16ger2pn 2208400000000000,0000000000000009 2208400000000000,0000000000000009 => ( -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 4.00000e+00 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28) ++xvbf16ger2pn 2208400000000000,0000000000000009 8000000000000000,7f800000ff800000 => ( -1.14437e-28 -1.14437e-28 NaN NaN -1.14437e-28 -1.14437e-28 NaN NaN -1.14437e-28 -1.14437e-28 +Inf -Inf -1.14437e-28 -1.14437e-28 NaN NaN) ++xvbf16ger2pn 8000000000000000,7f800000ff800000 7f800000ff800000,ff8000007f800000 => ( +Inf -Inf -Inf +Inf -Inf +Inf +Inf -Inf NaN NaN NaN NaN NaN NaN NaN NaN) ++xvbf16ger2pn 8000000000000000,7f800000ff800000 0180055e0180077e,0000111e8000222e => ( +Inf +Inf NaN NaN -Inf -Inf NaN NaN -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28) ++xvbf16ger2pn 8000000000000000,7f800000ff800000 2208400000000000,0000000000000009 => ( +Inf NaN NaN NaN -Inf NaN NaN NaN -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28 -1.14437e-28) ++xvbf16ger2pn 8000000000000000,7f800000ff800000 8000000000000000,7f800000ff800000 => ( NaN NaN +Inf -Inf NaN NaN -Inf +Inf -1.14437e-28 -1.14437e-28 NaN NaN -1.14437e-28 -1.14437e-28 NaN NaN) ++ ++xvbf16ger2pp 7f800000ff800000,ff8000007f800000 7f800000ff800000,ff8000007f800000 => ( -Inf +Inf +Inf -Inf +Inf -Inf -Inf +Inf +Inf -Inf -Inf +Inf -Inf +Inf +Inf -Inf) ++xvbf16ger2pp 7f800000ff800000,ff8000007f800000 0180055e0180077e,0000111e8000222e => ( -Inf -Inf NaN NaN +Inf +Inf NaN NaN +Inf +Inf NaN NaN -Inf -Inf NaN NaN) ++xvbf16ger2pp 7f800000ff800000,ff8000007f800000 2208400000000000,0000000000000009 => ( -Inf NaN NaN NaN +Inf NaN NaN NaN +Inf NaN NaN NaN -Inf NaN NaN NaN) ++xvbf16ger2pp 7f800000ff800000,ff8000007f800000 8000000000000000,7f800000ff800000 => ( NaN NaN -Inf +Inf NaN NaN +Inf -Inf NaN NaN +Inf -Inf NaN NaN -Inf +Inf) ++xvbf16ger2pp 0180055e0180077e,0000111e8000222e 7f800000ff800000,ff8000007f800000 => ( NaN NaN NaN NaN NaN NaN NaN NaN +Inf -Inf -Inf +Inf +Inf -Inf -Inf +Inf) ++xvbf16ger2pp 0180055e0180077e,0000111e8000222e 0180055e0180077e,0000111e8000222e => ( * * * * * * * * * * * * * * * *) ++xvbf16ger2pp 0180055e0180077e,0000111e8000222e 2208400000000000,0000000000000009 => ( 3.63717e-28 1.14437e-28 * * 4.71628e-18 1.14437e-28 * * 1.14437e-28 1.14437e-28 * * 1.14438e-28 1.14437e-28 * *) ++xvbf16ger2pp 0180055e0180077e,0000111e8000222e 8000000000000000,7f800000ff800000 => ( * * NaN NaN * * NaN NaN * * +Inf -Inf * * +Inf -Inf) ++xvbf16ger2pp 2208400000000000,0000000000000009 7f800000ff800000,ff8000007f800000 => ( NaN NaN NaN NaN NaN NaN NaN NaN +Inf -Inf -Inf +Inf NaN NaN NaN NaN) ++xvbf16ger2pp 2208400000000000,0000000000000009 0180055e0180077e,0000111e8000222e => ( * * * * * * * * 1.14437e-28 1.14438e-28 3.63717e-28 4.71628e-18 * * * *) ++xvbf16ger2pp 2208400000000000,0000000000000009 2208400000000000,0000000000000009 => ( * * * * * * * * 4.00000e+00 1.14437e-28 * * * * * *) ++xvbf16ger2pp 2208400000000000,0000000000000009 8000000000000000,7f800000ff800000 => ( * * NaN NaN * * NaN NaN * * +Inf -Inf * * NaN NaN) ++xvbf16ger2pp 8000000000000000,7f800000ff800000 7f800000ff800000,ff8000007f800000 => ( +Inf -Inf -Inf +Inf -Inf +Inf +Inf -Inf NaN NaN NaN NaN NaN NaN NaN NaN) ++xvbf16ger2pp 8000000000000000,7f800000ff800000 0180055e0180077e,0000111e8000222e => ( +Inf +Inf NaN NaN -Inf -Inf NaN NaN * * * * * * * *) ++xvbf16ger2pp 8000000000000000,7f800000ff800000 2208400000000000,0000000000000009 => ( +Inf NaN NaN NaN -Inf NaN NaN NaN * * * * * * * *) ++xvbf16ger2pp 8000000000000000,7f800000ff800000 8000000000000000,7f800000ff800000 => ( NaN NaN +Inf -Inf NaN NaN -Inf +Inf * * NaN NaN * * NaN NaN) ++ ++xvbf16ger2 7f800000ff800000,ff8000007f800000 7f800000ff800000,ff8000007f800000 => ( -Inf +Inf +Inf -Inf +Inf -Inf -Inf +Inf +Inf -Inf -Inf +Inf -Inf +Inf +Inf -Inf) ++xvbf16ger2 7f800000ff800000,ff8000007f800000 0180055e0180077e,0000111e8000222e => ( -Inf -Inf NaN NaN +Inf +Inf NaN NaN +Inf +Inf NaN NaN -Inf -Inf NaN NaN) ++xvbf16ger2 7f800000ff800000,ff8000007f800000 2208400000000000,0000000000000009 => ( -Inf NaN NaN NaN +Inf NaN NaN NaN +Inf NaN NaN NaN -Inf NaN NaN NaN) ++xvbf16ger2 7f800000ff800000,ff8000007f800000 8000000000000000,7f800000ff800000 => ( NaN NaN -Inf +Inf NaN NaN +Inf -Inf NaN NaN +Inf -Inf NaN NaN -Inf +Inf) ++xvbf16ger2 0180055e0180077e,0000111e8000222e 7f800000ff800000,ff8000007f800000 => ( NaN NaN NaN NaN NaN NaN NaN NaN +Inf -Inf -Inf +Inf +Inf -Inf -Inf +Inf) ++xvbf16ger2 0180055e0180077e,0000111e8000222e 0180055e0180077e,0000111e8000222e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero 5.56082e-36 +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++xvbf16ger2 0180055e0180077e,0000111e8000222e 2208400000000000,0000000000000009 => ( 2.49280e-28 +Zero +Zero +Zero 4.71628e-18 +Zero +Zero +Zero 2.08768e-35 +Zero +Zero +Zero 3.82177e-34 +Zero +Zero +Zero) ++xvbf16ger2 0180055e0180077e,0000111e8000222e 8000000000000000,7f800000ff800000 => ( +Zero +Zero NaN NaN +Zero +Zero NaN NaN +Zero +Zero +Inf -Inf +Zero +Zero +Inf -Inf) ++xvbf16ger2 2208400000000000,0000000000000009 7f800000ff800000,ff8000007f800000 => ( NaN NaN NaN NaN NaN NaN NaN NaN +Inf -Inf -Inf +Inf NaN NaN NaN NaN) ++xvbf16ger2 2208400000000000,0000000000000009 0180055e0180077e,0000111e8000222e => ( +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero 2.08768e-35 3.82177e-34 2.49280e-28 4.71628e-18 +Zero +Zero +Zero +Zero) ++xvbf16ger2 2208400000000000,0000000000000009 2208400000000000,0000000000000009 => ( +Zero +Zero +Zero +Zero +Den +Zero +Zero +Zero 4.00000e+00 +Zero +Zero +Den +Zero +Zero +Zero +Zero) ++xvbf16ger2 2208400000000000,0000000000000009 8000000000000000,7f800000ff800000 => ( +Zero +Zero NaN NaN +Zero +Zero NaN NaN +Zero +Zero +Inf -Inf +Zero +Zero NaN NaN) ++xvbf16ger2 8000000000000000,7f800000ff800000 7f800000ff800000,ff8000007f800000 => ( +Inf -Inf -Inf +Inf -Inf +Inf +Inf -Inf NaN NaN NaN NaN NaN NaN NaN NaN) ++xvbf16ger2 8000000000000000,7f800000ff800000 0180055e0180077e,0000111e8000222e => ( +Inf +Inf NaN NaN -Inf -Inf NaN NaN +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++xvbf16ger2 8000000000000000,7f800000ff800000 2208400000000000,0000000000000009 => ( +Inf NaN NaN NaN -Inf NaN NaN NaN +Zero +Zero +Zero +Zero +Zero +Zero +Zero +Zero) ++xvbf16ger2 8000000000000000,7f800000ff800000 8000000000000000,7f800000ff800000 => ( NaN NaN +Inf -Inf NaN NaN -Inf +Inf +Zero +Zero NaN NaN +Zero +Zero NaN NaN) ++ + xvf16ger2nn 7f800000ff800000,ff8000007f800000 7f800000ff800000,ff8000007f800000 => ( NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN) + xvf16ger2nn 7f800000ff800000,ff8000007f800000 0180055e0180077e,0000111e8000222e => ( NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN) + xvf16ger2nn 7f800000ff800000,ff8000007f800000 2208400000000000,0000000000000009 => ( NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN) +@@ -1921,4 +2406,4 @@ xxmtacc => [ 7f800000ff800000 ff8000007f800000 ff7ffffe7f7ffffe 0080000e8080000e + + xxsetaccz => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] + +-All done. Tested 152 different instruction groups ++All done. Tested 197 different instruction groups +diff --git a/none/tests/ppc64/test_isa_3_1_XT.c b/none/tests/ppc64/test_isa_3_1_XT.c +index a54e8763a..905c17951 100644 +--- a/none/tests/ppc64/test_isa_3_1_XT.c ++++ b/none/tests/ppc64/test_isa_3_1_XT.c +@@ -491,6 +491,16 @@ static void test_pstxv_4 (void) { + static void test_pstxv_0 (void) { + __asm__ __volatile__ ("pstxv %x0, 0(%1), 0" :: "wa" (vec_xs), "r" (ra) ); + } ++static void test_xvcvspbf16 (void) { ++SET_FPSCR_ZERO; ++ __asm__ __volatile__ ("xvcvspbf16 %x0, %x1" ++ : "=wa" (vec_xt) : "wa" (vec_xb) ); ++GET_FPSCR(current_fpscr); ++} ++static void test_xvcvbf16spn (void) { ++ __asm__ __volatile__ ("xvcvbf16spn %x0, %x1" ++ : "=wa" (vec_xt) : "wa" (vec_xb) ); ++} + static void test_xxgenpcvbm_imm0 (void) { + __asm__ __volatile__ ("xxgenpcvbm %x0, %1, 0" : "=wa" (vec_xt) : "v" (vrb) ); + } +@@ -634,6 +644,8 @@ static test_list_t testgroup_generic[] = { + { &test_stxvrdx, "stxvrdx", "XS,RA,RB"}, /* bcs */ + { &test_stxvrhx, "stxvrhx", "XS,RA,RB"}, /* bcs */ + { &test_stxvrwx, "stxvrwx", "XS,RA,RB"}, /* bcs */ ++ { &test_xvcvbf16spn, "xvcvbf16spn", "XT,XB"}, /* bcs */ ++ { &test_xvcvspbf16, "xvcvspbf16", "XT,XB", 0b0101010100000000}, /* bcs */ + { &test_xxblendvb, "xxblendvb", "XT,XA,XB,XC"}, /* bcs */ + { &test_xxblendvd, "xxblendvd", "XT,XA,XB,XC"}, /* bcs */ + { &test_xxblendvh, "xxblendvh", "XT,XA,XB,XC"}, /* bcs */ +diff --git a/none/tests/ppc64/test_isa_3_1_XT.stdout.exp b/none/tests/ppc64/test_isa_3_1_XT.stdout.exp +index 8b5f1d1a3..b9d6cb7a8 100644 +--- a/none/tests/ppc64/test_isa_3_1_XT.stdout.exp ++++ b/none/tests/ppc64/test_isa_3_1_XT.stdout.exp +@@ -244,6 +244,34 @@ stxvrwx 18 (&buffer) 0000111e8000222e,0180055e0180077e => [ - - - 7f0000008 + stxvrwx 20 (&buffer) 7ff0000000000000,0000111e8000222e => [ - - - - 5a05a05a00000000 - - - ] + stxvrwx 28 (&buffer) fff0000000000000,7ff0000000000000 => [ - - - - - 0102030400000000 - - ] + ++xvcvbf16spn 7f800000ff800000,ff8000007f800000 => 0 0 ++xvcvbf16spn ff8000007f800000,ff7ffffe7f7ffffe => 0 fffe0000fffe0000 ++xvcvbf16spn ff7ffffe7f7ffffe,0080000e8080000e => fffe0000fffe0000 e0000000e0000 ++xvcvbf16spn 0080000e8080000e,0180055e0180077e => e0000000e0000 55e0000077e0000 ++xvcvbf16spn 0180055e0180077e,0000111e8000222e => 55e0000077e0000 111e0000222e0000 ++xvcvbf16spn 0000111e8000222e,7ff0000000000000 => 111e0000222e0000 0 ++xvcvbf16spn 7ff0000000000000,fff0000000000000 => 0 0 ++xvcvbf16spn fff0000000000000,2208400000000000 => 0 4000000000000000 ++xvcvbf16spn 2208400000000000,0000000000000009 => 4000000000000000 90000 ++xvcvbf16spn 0000000000000009,ffff000180000001 => 90000 1000000010000 ++xvcvbf16spn ffff000180000001,0000000000000000 => 1000000010000 0 ++xvcvbf16spn 0000000000000000,8000000000000000 => 0 0 ++xvcvbf16spn 8000000000000000,7f800000ff800000 => 0 0 ++ ++xvcvspbf16 7f800000ff800000,ff8000007f800000 => +Inf -Inf -Inf +Inf ++xvcvspbf16 ff8000007f800000,ff7ffffe7f7ffffe => -Inf +Inf -Inf +Inf ++xvcvspbf16 ff7ffffe7f7ffffe,0080000e8080000e => -Inf +Inf 0x0080 0x8080 ++xvcvspbf16 0080000e8080000e,0180055e0180077e => 0x0080 0x8080 0x0180 0x0180 ++xvcvspbf16 0180055e0180077e,0000111e8000222e => 0x0180 0x0180 +Zero -Zero ++xvcvspbf16 0000111e8000222e,7ff0000000000000 => +Zero -Zero NaN +Zero ++xvcvspbf16 7ff0000000000000,fff0000000000000 => NaN +Zero NaN +Zero ++xvcvspbf16 fff0000000000000,2208400000000000 => NaN +Zero 0x2208 +Zero ++xvcvspbf16 2208400000000000,0000000000000009 => 0x2208 +Zero +Zero +Zero ++xvcvspbf16 0000000000000009,ffff000180000001 => +Zero +Zero NaN -Zero ++xvcvspbf16 ffff000180000001,0000000000000000 => NaN -Zero +Zero +Zero ++xvcvspbf16 0000000000000000,8000000000000000 => +Zero +Zero -Zero +Zero ++xvcvspbf16 8000000000000000,7f800000ff800000 => -Zero +Zero +Inf -Inf ++ + xxblendvb 7f800000ff800000,ff8000007f800000 0000000000000000,00000000ffffffff 7f800000ff800000,ff8000007f800000 => 7f800000ff800000 ff8000007f800000 + xxblendvb 7f800000ff800000,ff8000007f800000 ffffffff55555555,5555aaaaaaaa5555 7f800000ff800000,ff8000007f800000 => 7f800000ff800000 ff8000007f800000 + xxblendvb 7f800000ff800000,ff8000007f800000 aaaa00000000aaaa,0000000000000000 7f800000ff800000,ff8000007f800000 => 7f800000ff800000 ff8000007f800000 +@@ -7318,4 +7346,4 @@ xxspltiw imm3 => 300000003 300000003 + + xxspltiw imm8 => 800000008 800000008 + +-All done. Tested 158 different instruction groups ++All done. Tested 160 different instruction groups + +commit c589b652939655090c005a982a71f50c489fb5ce +Author: root +Date: Fri Feb 12 16:00:53 2021 -0500 + + Reduced precision Missing Integer based outer tests + +diff --git a/NEWS b/NEWS +index 2e42e74b2..52a51fd9e 100644 +--- a/NEWS ++++ b/NEWS +@@ -176,6 +176,7 @@ where XXXXXX is the bug number as listed below. + 433500 DRD regtest faulures when libstdc++ and libgcc debuginfo are installed + 433629 valgrind/README has type "abd" instead of "and" + 433641 Rust std::sys::unix::fs::try_statx Syscall param fstatat(file_name) ++433801 PPC ISA 3.1 support is missing, part 10 (ISA 3.1 support complete) + 433898 arm64: Handle sp, lr, fp as DwReg in CfiExpr + 434193 GCC 9+ inlined strcmp causes "Conditional jump or move [..] value" report + n-i-bz helgrind: If hg_cli__realloc fails, return NULL. +diff --git a/none/tests/ppc64/test_isa_3_1_AT.c b/none/tests/ppc64/test_isa_3_1_AT.c +index fee76f8f4..e9db9cc9a 100644 +--- a/none/tests/ppc64/test_isa_3_1_AT.c ++++ b/none/tests/ppc64/test_isa_3_1_AT.c +@@ -806,6 +806,114 @@ static void test_pmxvbf16ger2pp_XM11_YM13_PM1 (void) { + __asm__ __volatile__ ("pmxvbf16ger2pp 4, %x0, %x1, 11, 13, 1" + :: "wa" (vec_xa), "wa" (vec_xb) ); + } ++static void test_xvi8ger4spp (void) { ++ __asm__ __volatile__ ("xvi8ger4spp 4, %x0, %x1" ++ :: "wa" (vec_xa), "wa" (vec_xb) ); ++} ++static void test_pmxvi8ger4spp_XM0_YM0_PM0 (void) { ++ __asm__ __volatile__ ("pmxvi8ger4spp 4, %x0, %x1, 0, 0, 0" ++ :: "wa" (vec_xa), "wa" (vec_xb) ); ++} ++static void test_pmxvi8ger4spp_XM0_YM0_PM5 (void) { ++ __asm__ __volatile__ ("pmxvi8ger4spp 4, %x0, %x1, 0, 0, 5" ++ :: "wa" (vec_xa), "wa" (vec_xb) ); ++} ++static void test_pmxvi8ger4spp_XM0_YM13_PM0 (void) { ++ __asm__ __volatile__ ("pmxvi8ger4spp 4, %x0, %x1, 0, 13, 0" ++ :: "wa" (vec_xa), "wa" (vec_xb) ); ++} ++static void test_pmxvi8ger4spp_XM0_YM13_PM5 (void) { ++ __asm__ __volatile__ ("pmxvi8ger4spp 4, %x0, %x1, 0, 13, 5" ++ :: "wa" (vec_xa), "wa" (vec_xb) ); ++} ++static void test_pmxvi8ger4spp_XM11_YM0_PM0 (void) { ++ __asm__ __volatile__ ("pmxvi8ger4spp 4, %x0, %x1, 11, 0, 0" ++ :: "wa" (vec_xa), "wa" (vec_xb) ); ++} ++static void test_pmxvi8ger4spp_XM11_YM0_PM5 (void) { ++ __asm__ __volatile__ ("pmxvi8ger4spp 4, %x0, %x1, 11, 0, 5" ++ :: "wa" (vec_xa), "wa" (vec_xb) ); ++} ++static void test_pmxvi8ger4spp_XM11_YM13_PM0 (void) { ++ __asm__ __volatile__ ("pmxvi8ger4spp 4, %x0, %x1, 11, 13, 0" ++ :: "wa" (vec_xa), "wa" (vec_xb) ); ++} ++static void test_pmxvi8ger4spp_XM11_YM13_PM5 (void) { ++ __asm__ __volatile__ ("pmxvi8ger4spp 4, %x0, %x1, 11, 13, 5" ++ :: "wa" (vec_xa), "wa" (vec_xb) ); ++} ++static void test_xvi16ger2 (void) { ++ __asm__ __volatile__ ("xvi16ger2 4, %x0, %x1" ++ :: "wa" (vec_xa), "wa" (vec_xb) ); ++} ++static void test_xvi16ger2pp (void) { ++ __asm__ __volatile__ ("xvi16ger2pp 4, %x0, %x1" ++ :: "wa" (vec_xa), "wa" (vec_xb) ); ++} ++static void test_pmxvi16ger2_XM0_YM0_PM0 (void) { ++ __asm__ __volatile__ ("pmxvi16ger2 4, %x0, %x1, 0, 0, 0" ++ :: "wa" (vec_xa), "wa" (vec_xb) ); ++} ++static void test_pmxvi16ger2_XM0_YM0_PM1 (void) { ++ __asm__ __volatile__ ("pmxvi16ger2 4, %x0, %x1, 0, 0, 1" ++ :: "wa" (vec_xa), "wa" (vec_xb) ); ++} ++static void test_pmxvi16ger2_XM0_YM13_PM0 (void) { ++ __asm__ __volatile__ ("pmxvi16ger2 4, %x0, %x1, 0, 13, 0" ++ :: "wa" (vec_xa), "wa" (vec_xb) ); ++} ++static void test_pmxvi16ger2_XM0_YM13_PM1 (void) { ++ __asm__ __volatile__ ("pmxvi16ger2 4, %x0, %x1, 0, 13, 1" ++ :: "wa" (vec_xa), "wa" (vec_xb) ); ++} ++static void test_pmxvi16ger2_XM11_YM0_PM0 (void) { ++ __asm__ __volatile__ ("pmxvi16ger2 4, %x0, %x1, 11, 0, 0" ++ :: "wa" (vec_xa), "wa" (vec_xb) ); ++} ++static void test_pmxvi16ger2_XM11_YM0_PM1 (void) { ++ __asm__ __volatile__ ("pmxvi16ger2 4, %x0, %x1, 11, 0, 1" ++ :: "wa" (vec_xa), "wa" (vec_xb) ); ++} ++static void test_pmxvi16ger2_XM11_YM13_PM0 (void) { ++ __asm__ __volatile__ ("pmxvi16ger2 4, %x0, %x1, 11, 13, 0" ++ :: "wa" (vec_xa), "wa" (vec_xb) ); ++} ++static void test_pmxvi16ger2_XM11_YM13_PM1 (void) { ++ __asm__ __volatile__ ("pmxvi16ger2 4, %x0, %x1, 11, 13, 1" ++ :: "wa" (vec_xa), "wa" (vec_xb) ); ++} ++static void test_pmxvi16ger2pp_XM0_YM0_PM0 (void) { ++ __asm__ __volatile__ ("pmxvi16ger2pp 4, %x0, %x1, 0, 0, 0" ++ :: "wa" (vec_xa), "wa" (vec_xb) ); ++} ++static void test_pmxvi16ger2pp_XM0_YM0_PM1 (void) { ++ __asm__ __volatile__ ("pmxvi16ger2pp 4, %x0, %x1, 0, 0, 1" ++ :: "wa" (vec_xa), "wa" (vec_xb) ); ++} ++static void test_pmxvi16ger2pp_XM0_YM13_PM0 (void) { ++ __asm__ __volatile__ ("pmxvi16ger2pp 4, %x0, %x1, 0, 13, 0" ++ :: "wa" (vec_xa), "wa" (vec_xb) ); ++} ++static void test_pmxvi16ger2pp_XM0_YM13_PM1 (void) { ++ __asm__ __volatile__ ("pmxvi16ger2pp 4, %x0, %x1, 0, 13, 1" ++ :: "wa" (vec_xa), "wa" (vec_xb) ); ++} ++static void test_pmxvi16ger2pp_XM11_YM0_PM0 (void) { ++ __asm__ __volatile__ ("pmxvi16ger2pp 4, %x0, %x1, 11, 0, 0" ++ :: "wa" (vec_xa), "wa" (vec_xb) ); ++} ++static void test_pmxvi16ger2pp_XM11_YM0_PM1 (void) { ++ __asm__ __volatile__ ("pmxvi16ger2pp 4, %x0, %x1, 11, 0, 1" ++ :: "wa" (vec_xa), "wa" (vec_xb) ); ++} ++static void test_pmxvi16ger2pp_XM11_YM13_PM0 (void) { ++ __asm__ __volatile__ ("pmxvi16ger2pp 4, %x0, %x1, 11, 13, 0" ++ :: "wa" (vec_xa), "wa" (vec_xb) ); ++} ++static void test_pmxvi16ger2pp_XM11_YM13_PM1 (void) { ++ __asm__ __volatile__ ("pmxvi16ger2pp 4, %x0, %x1, 11, 13, 1" ++ :: "wa" (vec_xa), "wa" (vec_xb) ); ++} + + static test_list_t testgroup_generic[] = { + { &test_pmxvbf16ger2nn_XM0_YM0_PM0, "pmxvbf16ger2nn XM0_YM0_PM0", "AT,XA,XB,XMSK,YMSK,PMSK", 0b00001111}, /* bcwp */ +@@ -952,6 +1060,14 @@ static test_list_t testgroup_generic[] = { + { &test_pmxvi8ger4pp_XM11_YM0_PM5, "pmxvi8ger4pp XM11_YM0_PM5", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ + { &test_pmxvi8ger4pp_XM11_YM13_PM0, "pmxvi8ger4pp XM11_YM13_PM0", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ + { &test_pmxvi8ger4pp_XM11_YM13_PM5, "pmxvi8ger4pp XM11_YM13_PM5", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ ++ { &test_pmxvi8ger4spp_XM0_YM0_PM0, "pmxvi8ger4spp XM0_YM0_PM0", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ ++ { &test_pmxvi8ger4spp_XM0_YM0_PM5, "pmxvi8ger4spp XM0_YM0_PM5", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ ++ { &test_pmxvi8ger4spp_XM0_YM13_PM0, "pmxvi8ger4spp XM0_YM13_PM0", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ ++ { &test_pmxvi8ger4spp_XM0_YM13_PM5, "pmxvi8ger4spp XM0_YM13_PM5", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ ++ { &test_pmxvi8ger4spp_XM11_YM0_PM0, "pmxvi8ger4spp XM11_YM0_PM0", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ ++ { &test_pmxvi8ger4spp_XM11_YM0_PM5, "pmxvi8ger4spp XM11_YM0_PM5", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ ++ { &test_pmxvi8ger4spp_XM11_YM13_PM0, "pmxvi8ger4spp XM11_YM13_PM0", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ ++ { &test_pmxvi8ger4spp_XM11_YM13_PM5, "pmxvi8ger4spp XM11_YM13_PM5", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ + { &test_pmxvi8ger4_XM0_YM0_PM0, "pmxvi8ger4 XM0_YM0_PM0", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ + { &test_pmxvi8ger4_XM0_YM0_PM5, "pmxvi8ger4 XM0_YM0_PM5", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ + { &test_pmxvi8ger4_XM0_YM13_PM0, "pmxvi8ger4 XM0_YM13_PM0", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ +@@ -960,6 +1076,14 @@ static test_list_t testgroup_generic[] = { + { &test_pmxvi8ger4_XM11_YM0_PM5, "pmxvi8ger4 XM11_YM0_PM5", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ + { &test_pmxvi8ger4_XM11_YM13_PM0, "pmxvi8ger4 XM11_YM13_PM0", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ + { &test_pmxvi8ger4_XM11_YM13_PM5, "pmxvi8ger4 XM11_YM13_PM5", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ ++ { &test_pmxvi16ger2pp_XM0_YM0_PM0, "pmxvi16ger2pp XM0_YM0_PM0", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ ++ { &test_pmxvi16ger2pp_XM0_YM0_PM1, "pmxvi16ger2pp XM0_YM0_PM1", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ ++ { &test_pmxvi16ger2pp_XM0_YM13_PM0, "pmxvi16ger2pp XM0_YM13_PM0", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ ++ { &test_pmxvi16ger2pp_XM0_YM13_PM1, "pmxvi16ger2pp XM0_YM13_PM1", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ ++ { &test_pmxvi16ger2pp_XM11_YM0_PM0, "pmxvi16ger2pp XM11_YM0_PM0", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ ++ { &test_pmxvi16ger2pp_XM11_YM0_PM1, "pmxvi16ger2pp XM11_YM0_PM1", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ ++ { &test_pmxvi16ger2pp_XM11_YM13_PM0, "pmxvi16ger2pp XM11_YM13_PM0", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ ++ { &test_pmxvi16ger2pp_XM11_YM13_PM1, "pmxvi16ger2pp XM11_YM13_PM1", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ + { &test_pmxvi16ger2spp_XM0_YM0_PM0, "pmxvi16ger2spp XM0_YM0_PM0", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ + { &test_pmxvi16ger2spp_XM0_YM0_PM1, "pmxvi16ger2spp XM0_YM0_PM1", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ + { &test_pmxvi16ger2spp_XM0_YM13_PM0, "pmxvi16ger2spp XM0_YM13_PM0", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ +@@ -976,6 +1100,14 @@ static test_list_t testgroup_generic[] = { + { &test_pmxvi16ger2s_XM11_YM0_PM1, "pmxvi16ger2s XM11_YM0_PM1", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ + { &test_pmxvi16ger2s_XM11_YM13_PM0, "pmxvi16ger2s XM11_YM13_PM0", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ + { &test_pmxvi16ger2s_XM11_YM13_PM1, "pmxvi16ger2s XM11_YM13_PM1", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ ++ { &test_pmxvi16ger2_XM0_YM0_PM0, "pmxvi16ger2 XM0_YM0_PM0", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ ++ { &test_pmxvi16ger2_XM0_YM0_PM1, "pmxvi16ger2 XM0_YM0_PM1", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ ++ { &test_pmxvi16ger2_XM0_YM13_PM0, "pmxvi16ger2 XM0_YM13_PM0", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ ++ { &test_pmxvi16ger2_XM0_YM13_PM1, "pmxvi16ger2 XM0_YM13_PM1", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ ++ { &test_pmxvi16ger2_XM11_YM0_PM0, "pmxvi16ger2 XM11_YM0_PM0", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ ++ { &test_pmxvi16ger2_XM11_YM0_PM1, "pmxvi16ger2 XM11_YM0_PM1", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ ++ { &test_pmxvi16ger2_XM11_YM13_PM0, "pmxvi16ger2 XM11_YM13_PM0", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ ++ { &test_pmxvi16ger2_XM11_YM13_PM1, "pmxvi16ger2 XM11_YM13_PM1", "AT,XA,XB,XMSK,YMSK,PMSK"}, /* bcwp */ + { &test_xvbf16ger2nn, "xvbf16ger2nn", "AT,XA,XB", 0b00001111}, /* bcs */ + { &test_xvbf16ger2np, "xvbf16ger2np", "AT,XA,XB", 0b00001111}, /* bcs */ + { &test_xvbf16ger2pn, "xvbf16ger2pn", "AT,XA,XB", 0b00001111}, /* bcs */ +@@ -999,9 +1131,12 @@ static test_list_t testgroup_generic[] = { + { &test_xvi4ger8pp, "xvi4ger8pp", "AT,XA,XB"}, /* bcs */ + { &test_xvi4ger8, "xvi4ger8", "AT,XA,XB"}, /* bcs */ + { &test_xvi8ger4pp, "xvi8ger4pp", "AT,XA,XB"}, /* bcs */ ++ { &test_xvi8ger4spp, "xvi8ger4spp", "AT,XA,XB"}, /* bcs */ + { &test_xvi8ger4, "xvi8ger4", "AT,XA,XB"}, /* bcs */ ++ { &test_xvi16ger2pp, "xvi16ger2pp", "AT,XA,XB"}, /* bcs */ + { &test_xvi16ger2spp, "xvi16ger2spp", "AT,XA,XB"}, /* bcs */ + { &test_xvi16ger2s, "xvi16ger2s", "AT,XA,XB"}, /* bcs */ ++ { &test_xvi16ger2, "xvi16ger2", "AT,XA,XB"}, /* bcs */ + { &test_xxmfacc, "xxmfacc", "AS"}, /* bcs */ + { &test_xxmtacc, "xxmtacc", "AT"}, /* bcs */ + { &test_xxsetaccz, "xxsetaccz", "AT"}, /* bcs */ +diff --git a/none/tests/ppc64/test_isa_3_1_AT.stdout.exp b/none/tests/ppc64/test_isa_3_1_AT.stdout.exp +index be3f17ec3..8d1be35b1 100644 +--- a/none/tests/ppc64/test_isa_3_1_AT.stdout.exp ++++ b/none/tests/ppc64/test_isa_3_1_AT.stdout.exp +@@ -1718,6 +1718,86 @@ pmxvi8ger4pp XM11_YM13_PM5 0000000000000009,ffff000180000001 0080000e8080000e,01 + pmxvi8ger4pp XM11_YM13_PM5 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => [ 0000000011111111 1111110911111111 0000000000000000 0000000000000000 0000000011111111 * 0000000011111111 *] + pmxvi8ger4pp XM11_YM13_PM5 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => [ 0000000011111111 * 0000000000000000 0000000000000000 0000000011111111 * 0000000011111111 *] + ++pmxvi8ger4spp XM0_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi8ger4spp XM0_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi8ger4spp XM0_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi8ger4spp XM0_YM0_PM0 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi8ger4spp XM0_YM0_PM0 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi8ger4spp XM0_YM0_PM0 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi8ger4spp XM0_YM0_PM0 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi8ger4spp XM0_YM0_PM0 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi8ger4spp XM0_YM0_PM0 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++ ++pmxvi8ger4spp XM0_YM0_PM5 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi8ger4spp XM0_YM0_PM5 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi8ger4spp XM0_YM0_PM5 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi8ger4spp XM0_YM0_PM5 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi8ger4spp XM0_YM0_PM5 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi8ger4spp XM0_YM0_PM5 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi8ger4spp XM0_YM0_PM5 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi8ger4spp XM0_YM0_PM5 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi8ger4spp XM0_YM0_PM5 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++ ++pmxvi8ger4spp XM0_YM13_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi8ger4spp XM0_YM13_PM0 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi8ger4spp XM0_YM13_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi8ger4spp XM0_YM13_PM0 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi8ger4spp XM0_YM13_PM0 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi8ger4spp XM0_YM13_PM0 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi8ger4spp XM0_YM13_PM0 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi8ger4spp XM0_YM13_PM0 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi8ger4spp XM0_YM13_PM0 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++ ++pmxvi8ger4spp XM0_YM13_PM5 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi8ger4spp XM0_YM13_PM5 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi8ger4spp XM0_YM13_PM5 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi8ger4spp XM0_YM13_PM5 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi8ger4spp XM0_YM13_PM5 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi8ger4spp XM0_YM13_PM5 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi8ger4spp XM0_YM13_PM5 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi8ger4spp XM0_YM13_PM5 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi8ger4spp XM0_YM13_PM5 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++ ++pmxvi8ger4spp XM11_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi8ger4spp XM11_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi8ger4spp XM11_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi8ger4spp XM11_YM0_PM0 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi8ger4spp XM11_YM0_PM0 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi8ger4spp XM11_YM0_PM0 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi8ger4spp XM11_YM0_PM0 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi8ger4spp XM11_YM0_PM0 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi8ger4spp XM11_YM0_PM0 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++ ++pmxvi8ger4spp XM11_YM0_PM5 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi8ger4spp XM11_YM0_PM5 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi8ger4spp XM11_YM0_PM5 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi8ger4spp XM11_YM0_PM5 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi8ger4spp XM11_YM0_PM5 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi8ger4spp XM11_YM0_PM5 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi8ger4spp XM11_YM0_PM5 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi8ger4spp XM11_YM0_PM5 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi8ger4spp XM11_YM0_PM5 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++ ++pmxvi8ger4spp XM11_YM13_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => [ 0000000011111111 * 0000000000000000 0000000000000000 0000000011111111 * 0000000011111111 *] ++pmxvi8ger4spp XM11_YM13_PM0 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => [ 0000000011111111 * 0000000000000000 0000000000000000 0000000011111111 * 0000000011111111 *] ++pmxvi8ger4spp XM11_YM13_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => [ 0000000011111111 * 0000000000000000 0000000000000000 0000000011111111 * 0000000011111111 *] ++pmxvi8ger4spp XM11_YM13_PM0 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => [ 0000000011111111 * 0000000000000000 0000000000000000 0000000011111111 * 0000000011111111 *] ++pmxvi8ger4spp XM11_YM13_PM0 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => [ 0000000011111111 * 0000000000000000 0000000000000000 0000000011111111 * 0000000011111111 *] ++pmxvi8ger4spp XM11_YM13_PM0 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => [ 0000000011111111 * 0000000000000000 0000000000000000 0000000011111111 * 0000000011111111 *] ++pmxvi8ger4spp XM11_YM13_PM0 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => [ 0000000011111111 * 0000000000000000 0000000000000000 0000000011111111 * 0000000011111111 *] ++pmxvi8ger4spp XM11_YM13_PM0 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => [ 0000000011111111 * 0000000000000000 0000000000000000 0000000011111111 * 0000000011111111 *] ++pmxvi8ger4spp XM11_YM13_PM0 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => [ 0000000011111111 * 0000000000000000 0000000000000000 0000000011111111 * 0000000011111111 *] ++ ++pmxvi8ger4spp XM11_YM13_PM5 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => [ 0000000011115075 11114fd511114f95 0000000000000000 0000000000000000 000000001110d111 1110d1111110d111 000000001110d111 1110d1111110d111] ++pmxvi8ger4spp XM11_YM13_PM5 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => [ 0000000011111111 1111150911111111 0000000000000000 0000000000000000 0000000011111111 11110d1111111111 0000000011111111 11110d1111111111] ++pmxvi8ger4spp XM11_YM13_PM5 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => [ 0000000011111111 * 0000000000000000 0000000000000000 0000000011111111 * 0000000011111111 *] ++pmxvi8ger4spp XM11_YM13_PM5 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => [ 0000000011110911 1111091111110911 0000000000000000 0000000000000000 00000000111112b5 11111c1511111fd5 0000000011111395 111121f5111127b5] ++pmxvi8ger4spp XM11_YM13_PM5 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => [ 0000000011111111 1111109111111111 0000000000000000 0000000000000000 0000000011111111 * 0000000011111111 *] ++pmxvi8ger4spp XM11_YM13_PM5 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => [ 0000000011111111 * 0000000000000000 0000000000000000 0000000011111111 * 0000000011111111 *] ++pmxvi8ger4spp XM11_YM13_PM5 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => [ 000000001111109f 111110ef1111110f 0000000000000000 0000000000000000 0000000011111111 * 000000001111118f 1111145f1111157f] ++pmxvi8ger4spp XM11_YM13_PM5 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => [ 0000000011111111 1111110911111111 0000000000000000 0000000000000000 0000000011111111 * 0000000011111111 *] ++pmxvi8ger4spp XM11_YM13_PM5 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => [ 0000000011111111 * 0000000000000000 0000000000000000 0000000011111111 * 0000000011111111 *] ++ + pmxvi8ger4 XM0_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] + pmxvi8ger4 XM0_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] + pmxvi8ger4 XM0_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] +@@ -1798,6 +1878,86 @@ pmxvi8ger4 XM11_YM13_PM5 0000000000000009,ffff000180000001 0080000e8080000e,0180 + pmxvi8ger4 XM11_YM13_PM5 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => [ 0000000000000000 fffffff800000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] + pmxvi8ger4 XM11_YM13_PM5 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] + ++pmxvi16ger2pp XM0_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2pp XM0_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2pp XM0_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2pp XM0_YM0_PM0 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2pp XM0_YM0_PM0 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2pp XM0_YM0_PM0 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2pp XM0_YM0_PM0 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2pp XM0_YM0_PM0 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2pp XM0_YM0_PM0 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++ ++pmxvi16ger2pp XM0_YM0_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2pp XM0_YM0_PM1 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2pp XM0_YM0_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2pp XM0_YM0_PM1 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2pp XM0_YM0_PM1 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2pp XM0_YM0_PM1 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2pp XM0_YM0_PM1 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2pp XM0_YM0_PM1 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2pp XM0_YM0_PM1 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++ ++pmxvi16ger2pp XM0_YM13_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2pp XM0_YM13_PM0 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2pp XM0_YM13_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2pp XM0_YM13_PM0 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2pp XM0_YM13_PM0 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2pp XM0_YM13_PM0 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2pp XM0_YM13_PM0 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2pp XM0_YM13_PM0 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2pp XM0_YM13_PM0 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++ ++pmxvi16ger2pp XM0_YM13_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2pp XM0_YM13_PM1 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2pp XM0_YM13_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2pp XM0_YM13_PM1 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2pp XM0_YM13_PM1 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2pp XM0_YM13_PM1 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2pp XM0_YM13_PM1 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2pp XM0_YM13_PM1 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2pp XM0_YM13_PM1 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++ ++pmxvi16ger2pp XM11_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2pp XM11_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2pp XM11_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2pp XM11_YM0_PM0 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2pp XM11_YM0_PM0 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2pp XM11_YM0_PM0 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2pp XM11_YM0_PM0 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2pp XM11_YM0_PM0 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2pp XM11_YM0_PM0 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++ ++pmxvi16ger2pp XM11_YM0_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2pp XM11_YM0_PM1 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2pp XM11_YM0_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2pp XM11_YM0_PM1 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2pp XM11_YM0_PM1 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2pp XM11_YM0_PM1 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2pp XM11_YM0_PM1 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2pp XM11_YM0_PM1 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2pp XM11_YM0_PM1 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++ ++pmxvi16ger2pp XM11_YM13_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => [ 0000000011111111 * 0000000000000000 0000000000000000 0000000011111111 * 0000000011111111 *] ++pmxvi16ger2pp XM11_YM13_PM0 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => [ 0000000011111111 * 0000000000000000 0000000000000000 0000000011111111 * 0000000011111111 *] ++pmxvi16ger2pp XM11_YM13_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => [ 0000000011111111 * 0000000000000000 0000000000000000 0000000011111111 * 0000000011111111 *] ++pmxvi16ger2pp XM11_YM13_PM0 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => [ 0000000011111111 * 0000000000000000 0000000000000000 0000000011111111 * 0000000011111111 *] ++pmxvi16ger2pp XM11_YM13_PM0 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => [ 0000000011111111 * 0000000000000000 0000000000000000 0000000011111111 * 0000000011111111 *] ++pmxvi16ger2pp XM11_YM13_PM0 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => [ 0000000011111111 * 0000000000000000 0000000000000000 0000000011111111 * 0000000011111111 *] ++pmxvi16ger2pp XM11_YM13_PM0 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => [ 0000000011111111 * 0000000000000000 0000000000000000 0000000011111111 * 0000000011111111 *] ++pmxvi16ger2pp XM11_YM13_PM0 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => [ 0000000011111111 * 0000000000000000 0000000000000000 0000000011111111 * 0000000011111111 *] ++pmxvi16ger2pp XM11_YM13_PM0 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => [ 0000000011111111 * 0000000000000000 0000000000000000 0000000011111111 * 0000000011111111 *] ++ ++pmxvi16ger2pp XM11_YM13_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => [ 00000000111110f5 1111065511110215 0000000000000000 0000000000000000 0000000011111111 * 0000000011111111 *] ++pmxvi16ger2pp XM11_YM13_PM1 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => [ 0000000011111111 1110911111111111 0000000000000000 0000000000000000 0000000011111111 * 0000000011111111 *] ++pmxvi16ger2pp XM11_YM13_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => [ 0000000011111111 * 0000000000000000 0000000000000000 0000000011111111 * 0000000011111111 *] ++pmxvi16ger2pp XM11_YM13_PM1 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => [ 0000000011111111 * 0000000000000000 0000000000000000 00000000111200b5 116cf01511914fd5 000000001112ef95 11c883f5121125b5] ++pmxvi16ger2pp XM11_YM13_PM1 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => [ 0000000011111111 * 0000000000000000 0000000000000000 0000000011111111 1558911111111111 0000000011111111 199c911111111111] ++pmxvi16ger2pp XM11_YM13_PM1 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => [ 0000000011111111 * 0000000000000000 0000000000000000 0000000011111111 * 0000000011111111 *] ++pmxvi16ger2pp XM11_YM13_PM1 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => [ 000000001111111f 1111166f1111188f 0000000000000000 0000000000000000 0000000011111111 * 000000001111118f 1111415f1111547f] ++pmxvi16ger2pp XM11_YM13_PM1 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => [ 0000000011111111 1111511111111111 0000000000000000 0000000000000000 0000000011111111 * 0000000011111111 1113511111111111] ++pmxvi16ger2pp XM11_YM13_PM1 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => [ 0000000011111111 * 0000000000000000 0000000000000000 0000000011111111 * 0000000011111111 *] ++ + pmxvi16ger2spp XM0_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] + pmxvi16ger2spp XM0_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] + pmxvi16ger2spp XM0_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] +@@ -1958,6 +2118,86 @@ pmxvi16ger2s XM11_YM13_PM1 0000000000000009,ffff000180000001 0080000e8080000e,01 + pmxvi16ger2s XM11_YM13_PM1 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => [ 0000000000000000 0000400000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0002400000000000] + pmxvi16ger2s XM11_YM13_PM1 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] + ++pmxvi16ger2 XM0_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2 XM0_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2 XM0_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2 XM0_YM0_PM0 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2 XM0_YM0_PM0 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2 XM0_YM0_PM0 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2 XM0_YM0_PM0 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2 XM0_YM0_PM0 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2 XM0_YM0_PM0 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++ ++pmxvi16ger2 XM0_YM0_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2 XM0_YM0_PM1 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2 XM0_YM0_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2 XM0_YM0_PM1 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2 XM0_YM0_PM1 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2 XM0_YM0_PM1 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2 XM0_YM0_PM1 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2 XM0_YM0_PM1 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2 XM0_YM0_PM1 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++ ++pmxvi16ger2 XM0_YM13_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2 XM0_YM13_PM0 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2 XM0_YM13_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2 XM0_YM13_PM0 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2 XM0_YM13_PM0 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2 XM0_YM13_PM0 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2 XM0_YM13_PM0 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2 XM0_YM13_PM0 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2 XM0_YM13_PM0 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++ ++pmxvi16ger2 XM0_YM13_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2 XM0_YM13_PM1 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2 XM0_YM13_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2 XM0_YM13_PM1 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2 XM0_YM13_PM1 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2 XM0_YM13_PM1 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2 XM0_YM13_PM1 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2 XM0_YM13_PM1 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2 XM0_YM13_PM1 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++ ++pmxvi16ger2 XM11_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2 XM11_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2 XM11_YM0_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2 XM11_YM0_PM0 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2 XM11_YM0_PM0 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2 XM11_YM0_PM0 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2 XM11_YM0_PM0 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2 XM11_YM0_PM0 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2 XM11_YM0_PM0 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++ ++pmxvi16ger2 XM11_YM0_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2 XM11_YM0_PM1 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2 XM11_YM0_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2 XM11_YM0_PM1 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2 XM11_YM0_PM1 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2 XM11_YM0_PM1 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2 XM11_YM0_PM1 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2 XM11_YM0_PM1 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2 XM11_YM0_PM1 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++ ++pmxvi16ger2 XM11_YM13_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2 XM11_YM13_PM0 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2 XM11_YM13_PM0 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2 XM11_YM13_PM0 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2 XM11_YM13_PM0 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2 XM11_YM13_PM0 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2 XM11_YM13_PM0 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2 XM11_YM13_PM0 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2 XM11_YM13_PM0 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++ ++pmxvi16ger2 XM11_YM13_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0080000e8080000e,0180055e0180077e => [ 00000000ffffffe4 fffff544fffff104 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2 XM11_YM13_PM1 ff8000007f800000,ff7ffffe7f7ffffe fff0000000000000,2208400000000000 => [ 0000000000000000 ffff800000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2 XM11_YM13_PM1 ff8000007f800000,ff7ffffe7f7ffffe 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2 XM11_YM13_PM1 0000111e8000222e,7ff0000000000000 0080000e8080000e,0180055e0180077e => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 000000000000efa4 005bdf0400803ec4 000000000001de84 00b772e4010014a4] ++pmxvi16ger2 XM11_YM13_PM1 0000111e8000222e,7ff0000000000000 fff0000000000000,2208400000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0447800000000000 0000000000000000 088b800000000000] ++pmxvi16ger2 XM11_YM13_PM1 0000111e8000222e,7ff0000000000000 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++pmxvi16ger2 XM11_YM13_PM1 0000000000000009,ffff000180000001 0080000e8080000e,0180055e0180077e => [ 000000000000000e 0000055e0000077e 0000000000000000 0000000000000000 0000000000000000 0000000000000000 000000000000007e 0000304e0000436e] ++pmxvi16ger2 XM11_YM13_PM1 0000000000000009,ffff000180000001 fff0000000000000,2208400000000000 => [ 0000000000000000 0000400000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0002400000000000] ++pmxvi16ger2 XM11_YM13_PM1 0000000000000009,ffff000180000001 0000000000000000,8000000000000000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] ++ + xvbf16ger2nn 7f800000ff800000,ff8000007f800000 7f800000ff800000,ff8000007f800000 => ( +Inf -Inf -Inf +Inf -Inf +Inf +Inf -Inf -Inf +Inf +Inf -Inf +Inf -Inf -Inf +Inf) + xvbf16ger2nn 7f800000ff800000,ff8000007f800000 0180055e0180077e,0000111e8000222e => ( +Inf +Inf NaN NaN -Inf -Inf NaN NaN -Inf -Inf NaN NaN +Inf +Inf NaN NaN) + xvbf16ger2nn 7f800000ff800000,ff8000007f800000 2208400000000000,0000000000000009 => ( +Inf NaN NaN NaN -Inf NaN NaN NaN -Inf NaN NaN NaN +Inf NaN NaN NaN) +@@ -2349,6 +2589,23 @@ xvi8ger4pp 8000000000000000,7f800000ff800000 0180055e0180077e,0000111e8000222e = + xvi8ger4pp 8000000000000000,7f800000ff800000 2208400000000000,0000000000000009 => [ 11111def11111111 * 11110cef11111111 * 1111001111111111 * * *] + xvi8ger4pp 8000000000000000,7f800000ff800000 8000000000000000,7f800000ff800000 => [ 1111509111111111 1111101211114f92 1111109111111111 1110d0921110d012 1110d11111111111 1110d19111109191 * *] + ++xvi8ger4spp 7f800000ff800000,ff8000007f800000 7f800000ff800000,ff8000007f800000 => [ 1110d0921110d012 1110d0121110d092 1111101211114f92 11114f9211111012 1111101211114f92 11114f9211111012 1110d0921110d012 1110d0121110d092] ++xvi8ger4spp 7f800000ff800000,ff8000007f800000 0180055e0180077e,0000111e8000222e => [ 1110d1101110d110 1111111111111091 1110d1901110d190 1111111111115091 1110d1901110d190 1111111111115091 1110d1101110d110 1111111111111091] ++xvi8ger4spp 7f800000ff800000,ff8000007f800000 2208400000000000,0000000000000009 => [ 11110cef11111111 * 11111def11111111 * 11111def11111111 * 11110cef11111111 *] ++xvi8ger4spp 7f800000ff800000,ff8000007f800000 8000000000000000,7f800000ff800000 => [ 1111109111111111 1110d0921110d012 1111509111111111 1111101211114f92 1111509111111111 1111101211114f92 1111109111111111 1110d0921110d012] ++xvi8ger4spp 0180055e0180077e,0000111e8000222e 7f800000ff800000,ff8000007f800000 => [ * * 1110d19111109191 111091911110d191 1110d1901110d210 1110d2101110d190 1110d1901110d210 1110d2101110d190] ++xvi8ger4spp 0180055e0180077e,0000111e8000222e 0180055e0180077e,0000111e8000222e => [ 11111c6a1111204c 111115b6111118b7 1111221f11112823 111118b71110ddd9 1110f3af1110ff79 11111c6a1111231f 1110ff7911110f47 1111204c11112923] ++xvi8ger4spp 0180055e0180077e,0000111e8000222e 2208400000000000,0000000000000009 => [ 1111155111111111 111111111111121f 1111089111111111 11111111111112af 11110e7311111111 111111111111145f 11110ef311111111 111111111111157f] ++xvi8ger4spp 0180055e0180077e,0000111e8000222e 8000000000000000,7f800000ff800000 => [ * * 1110d11111111111 1110d19111109191 1111119111111111 1110d1901110d210 1111119111111111 1110d1901110d210] ++xvi8ger4spp 2208400000000000,0000000000000009 7f800000ff800000,ff8000007f800000 => [ * * * * 111125ef111136ef 111136ef111125ef * *] ++xvi8ger4spp 2208400000000000,0000000000000009 0180055e0180077e,0000111e8000222e => [ * * 1111145f1111157f 1111121f111112af 11111673111116f3 1111155111112a91 * *] ++xvi8ger4spp 2208400000000000,0000000000000009 2208400000000000,0000000000000009 => [ * * * 1111111111111162 111125d511111111 * * *] ++xvi8ger4spp 2208400000000000,0000000000000009 8000000000000000,7f800000ff800000 => [ * * * * 1111221111111111 111125ef111136ef * *] ++xvi8ger4spp 8000000000000000,7f800000ff800000 7f800000ff800000,ff8000007f800000 => [ 1111101211114f92 11114f9211111012 1110d0921110d012 1110d0121110d092 1110d19111109191 111091911110d191 * *] ++xvi8ger4spp 8000000000000000,7f800000ff800000 0180055e0180077e,0000111e8000222e => [ 1110d1901110d190 1111111111115091 1110d1101110d110 1111111111111091 1111109111111091 111111111110d111 * *] ++xvi8ger4spp 8000000000000000,7f800000ff800000 2208400000000000,0000000000000009 => [ 11111def11111111 * 11110cef11111111 * 1111001111111111 * * *] ++xvi8ger4spp 8000000000000000,7f800000ff800000 8000000000000000,7f800000ff800000 => [ 1111509111111111 1111101211114f92 1111109111111111 1110d0921110d012 1110d11111111111 1110d19111109191 * *] ++ + xvi8ger4 7f800000ff800000,ff8000007f800000 7f800000ff800000,ff8000007f800000 => [ ffffbf81ffffbf01 ffffbf01ffffbf81 ffffff0100003e81 00003e81ffffff01 ffffff0100003e81 00003e81ffffff01 ffffbf81ffffbf01 ffffbf01ffffbf81] + xvi8ger4 7f800000ff800000,ff8000007f800000 0180055e0180077e,0000111e8000222e => [ ffffbfffffffbfff 00000000ffffff80 ffffc07fffffc07f 0000000000003f80 ffffc07fffffc07f 0000000000003f80 ffffbfffffffbfff 00000000ffffff80] + xvi8ger4 7f800000ff800000,ff8000007f800000 2208400000000000,0000000000000009 => [ fffffbde00000000 0000000000000000 00000cde00000000 0000000000000000 00000cde00000000 0000000000000000 fffffbde00000000 0000000000000000] +@@ -2366,6 +2623,23 @@ xvi8ger4 8000000000000000,7f800000ff800000 0180055e0180077e,0000111e8000222e => + xvi8ger4 8000000000000000,7f800000ff800000 2208400000000000,0000000000000009 => [ 00000cde00000000 0000000000000000 fffffbde00000000 0000000000000000 ffffef0000000000 0000000000000000 0000000000000000 0000000000000000] + xvi8ger4 8000000000000000,7f800000ff800000 8000000000000000,7f800000ff800000 => [ 00003f8000000000 ffffff0100003e81 ffffff8000000000 ffffbf81ffffbf01 ffffc00000000000 ffffc080ffff8080 0000000000000000 0000000000000000] + ++xvi16ger2pp 7f800000ff800000,ff8000007f800000 7f800000ff800000,ff8000007f800000 => [ 10d1511111115111 1111511110d15111 5091511110d15111 10d1511150915111 5091511110d15111 10d1511150915111 10d1511111115111 1111511110d15111] ++xvi16ger2pp 7f800000ff800000,ff8000007f800000 0180055e0180077e,0000111e8000222e => [ 1110511111105111 1111111111511111 11d0511111d05111 11111111d1511111 11d0511111d05111 11111111d1511111 1110511111105111 1111111111511111] ++xvi16ger2pp 7f800000ff800000,ff8000007f800000 2208400000000000,0000000000000009 => [ 11000d1111111111 * 22040d1111111111 * 22040d1111111111 * 11000d1111111111 *] ++xvi16ger2pp 7f800000ff800000,ff8000007f800000 8000000000000000,7f800000ff800000 => [ 1151111111111111 10d1511111115111 d151111111111111 5091511110d15111 d151111111111111 5091511110d15111 1151111111111111 10d1511111115111] ++xvi16ger2pp 0180055e0180077e,0000111e8000222e 7f800000ff800000,ff8000007f800000 => [ * * d151111111511111 11511111d1511111 11d0511111105111 1110511111d05111 11d0511111105111 1110511111d05111] ++xvi16ger2pp 0180055e0180077e,0000111e8000222e 0180055e0180077e,0000111e8000222e => [ 116cf01511914fd5 12361095135a2075 110883f5115125b5 135a207555a15155 11301f95113b8755 116cf015110883f5 113b8755114b7315 11914fd5115125b5] ++xvi16ger2pp 0180055e0180077e,0000111e8000222e 2208400000000000,0000000000000009 => [ 1558911111111111 111111111111ab1f 0898911111111111 11111111111244af 129b9d1111111111 111111111111415f 13239d1111111111 111111111111547f] ++xvi16ger2pp 0180055e0180077e,0000111e8000222e 8000000000000000,7f800000ff800000 => [ * * 5111111111111111 d151111111511111 1051111111111111 11d0511111105111 1051111111111111 11d0511111105111] ++xvi16ger2pp 2208400000000000,0000000000000009 7f800000ff800000,ff8000007f800000 => [ * * * * 22040d1111000d11 11000d1122040d11 * *] ++xvi16ger2pp 2208400000000000,0000000000000009 0180055e0180077e,0000111e8000222e => [ * * 1111415f1111547f 1111ab1f111244af 129b9d1113239d11 1558911108989111 * *] ++xvi16ger2pp 2208400000000000,0000000000000009 2208400000000000,0000000000000009 => [ * * 1113511111111111 1111111111111162 2597315111111111 1111111111135111 * *] ++xvi16ger2pp 2208400000000000,0000000000000009 8000000000000000,7f800000ff800000 => [ * * * * 000d111111111111 22040d1111000d11 * *] ++xvi16ger2pp 8000000000000000,7f800000ff800000 7f800000ff800000,ff8000007f800000 => [ 5091511110d15111 10d1511150915111 10d1511111115111 1111511110d15111 d151111111511111 11511111d1511111 * *] ++xvi16ger2pp 8000000000000000,7f800000ff800000 0180055e0180077e,0000111e8000222e => [ 11d0511111d05111 11111111d1511111 1110511111105111 1111111111511111 1051111110511111 1111111151111111 * *] ++xvi16ger2pp 8000000000000000,7f800000ff800000 2208400000000000,0000000000000009 => [ 22040d1111111111 * 11000d1111111111 * 000d111111111111 * * *] ++xvi16ger2pp 8000000000000000,7f800000ff800000 8000000000000000,7f800000ff800000 => [ d151111111111111 5091511110d15111 1151111111111111 10d1511111115111 5111111111111111 d151111111511111 * *] ++ + xvi16ger2spp 7f800000ff800000,ff8000007f800000 7f800000ff800000,ff8000007f800000 => [ 10d1511111115111 1111511110d15111 5091511110d15111 10d1511150915111 5091511110d15111 10d1511150915111 10d1511111115111 1111511110d15111] + xvi16ger2spp 7f800000ff800000,ff8000007f800000 0180055e0180077e,0000111e8000222e => [ 1110511111105111 1111111111511111 11d0511111d05111 11111111d1511111 11d0511111d05111 11111111d1511111 1110511111105111 1111111111511111] + xvi16ger2spp 7f800000ff800000,ff8000007f800000 2208400000000000,0000000000000009 => [ 11000d1111111111 * 22040d1111111111 * 22040d1111111111 * 11000d1111111111 *] +@@ -2400,10 +2674,27 @@ xvi16ger2s 8000000000000000,7f800000ff800000 0180055e0180077e,0000111e8000222e = + xvi16ger2s 8000000000000000,7f800000ff800000 2208400000000000,0000000000000009 => [ 10f2fc0000000000 0000000000000000 ffeefc0000000000 0000000000000000 eefc000000000000 0000000000000000 0000000000000000 0000000000000000] + xvi16ger2s 8000000000000000,7f800000ff800000 8000000000000000,7f800000ff800000 => [ c040000000000000 3f804000ffc04000 0040000000000000 ffc0400000004000 4000000000000000 c040000000400000 0000000000000000 0000000000000000] + ++xvi16ger2 7f800000ff800000,ff8000007f800000 7f800000ff800000,ff8000007f800000 => [ ffc0400000004000 00004000ffc04000 3f804000ffc04000 ffc040003f804000 3f804000ffc04000 ffc040003f804000 ffc0400000004000 00004000ffc04000] ++xvi16ger2 7f800000ff800000,ff8000007f800000 0180055e0180077e,0000111e8000222e => [ ffff4000ffff4000 0000000000400000 00bf400000bf4000 00000000c0400000 00bf400000bf4000 00000000c0400000 ffff4000ffff4000 0000000000400000] ++xvi16ger2 7f800000ff800000,ff8000007f800000 2208400000000000,0000000000000009 => [ ffeefc0000000000 0000000000000000 10f2fc0000000000 0000000000000000 10f2fc0000000000 0000000000000000 ffeefc0000000000 0000000000000000] ++xvi16ger2 7f800000ff800000,ff8000007f800000 8000000000000000,7f800000ff800000 => [ 0040000000000000 ffc0400000004000 c040000000000000 3f804000ffc04000 c040000000000000 3f804000ffc04000 0040000000000000 ffc0400000004000] ++xvi16ger2 0180055e0180077e,0000111e8000222e 7f800000ff800000,ff8000007f800000 => [ 0000000000000000 0000000000000000 c040000000400000 00400000c0400000 00bf4000ffff4000 ffff400000bf4000 00bf4000ffff4000 ffff400000bf4000] ++xvi16ger2 0180055e0180077e,0000111e8000222e 0180055e0180077e,0000111e8000222e => [ 005bdf0400803ec4 0124ff8402490f64 fff772e4004014a4 02490f6444904044 001f0e84002a7644 005bdf04fff772e4 002a7644003a6204 00803ec4004014a4] ++xvi16ger2 0180055e0180077e,0000111e8000222e 2208400000000000,0000000000000009 => [ 0447800000000000 0000000000009a0e f787800000000000 000000000001339e 018a8c0000000000 000000000000304e 02128c0000000000 000000000000436e] ++xvi16ger2 0180055e0180077e,0000111e8000222e 8000000000000000,7f800000ff800000 => [ 0000000000000000 0000000000000000 4000000000000000 c040000000400000 ff40000000000000 00bf4000ffff4000 ff40000000000000 00bf4000ffff4000] ++xvi16ger2 2208400000000000,0000000000000009 7f800000ff800000,ff8000007f800000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 10f2fc00ffeefc00 ffeefc0010f2fc00 0000000000000000 0000000000000000] ++xvi16ger2 2208400000000000,0000000000000009 0180055e0180077e,0000111e8000222e => [ 0000000000000000 0000000000000000 0000304e0000436e 00009a0e0001339e 018a8c0002128c00 04478000f7878000 0000000000000000 0000000000000000] ++xvi16ger2 2208400000000000,0000000000000009 2208400000000000,0000000000000009 => [ 0000000000000000 0000000000000000 0002400000000000 0000000000000051 1486204000000000 0000000000024000 0000000000000000 0000000000000000] ++xvi16ger2 2208400000000000,0000000000000009 8000000000000000,7f800000ff800000 => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 eefc000000000000 10f2fc00ffeefc00 0000000000000000 0000000000000000] ++xvi16ger2 8000000000000000,7f800000ff800000 7f800000ff800000,ff8000007f800000 => [ 3f804000ffc04000 ffc040003f804000 ffc0400000004000 00004000ffc04000 c040000000400000 00400000c0400000 0000000000000000 0000000000000000] ++xvi16ger2 8000000000000000,7f800000ff800000 0180055e0180077e,0000111e8000222e => [ 00bf400000bf4000 00000000c0400000 ffff4000ffff4000 0000000000400000 ff400000ff400000 0000000040000000 0000000000000000 0000000000000000] ++xvi16ger2 8000000000000000,7f800000ff800000 2208400000000000,0000000000000009 => [ 10f2fc0000000000 0000000000000000 ffeefc0000000000 0000000000000000 eefc000000000000 0000000000000000 0000000000000000 0000000000000000] ++xvi16ger2 8000000000000000,7f800000ff800000 8000000000000000,7f800000ff800000 => [ c040000000000000 3f804000ffc04000 0040000000000000 ffc0400000004000 4000000000000000 c040000000400000 0000000000000000 0000000000000000] ++ + xxmfacc [ 7f800000ff800000 ff8000007f800000 ff7ffffe7f7ffffe 0080000e8080000e 0180055e0180077e 0000111e8000222e 7ff0000000000000 fff0000000000000] => [ 7f800000ff800000 ff8000007f800000 ff7ffffe7f7ffffe 0080000e8080000e 0180055e0180077e 0000111e8000222e 7ff0000000000000 fff0000000000000] + + xxmtacc => [ 7f800000ff800000 ff8000007f800000 ff7ffffe7f7ffffe 0080000e8080000e 0180055e0180077e 0000111e8000222e 7ff0000000000000 fff0000000000000] + + xxsetaccz => [ 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000 0000000000000000] + +-All done. Tested 197 different instruction groups ++All done. Tested 224 different instruction groups diff --git a/SOURCES/valgrind-3.17.0-ppc64-isa-3.1.patch b/SOURCES/valgrind-3.17.0-ppc64-isa-3.1.patch new file mode 100644 index 0000000..fb8fa50 --- /dev/null +++ b/SOURCES/valgrind-3.17.0-ppc64-isa-3.1.patch @@ -0,0 +1,1720 @@ +commit 3cc0232c46a5905b4a6c2fbd302b58bf5f90b3d5 +Author: Carl Love +Date: Mon Jan 11 16:00:57 2021 -0600 + + PPC64: ISA 3.1 VSX PCV Generate Operations + + xgenpcvbm VSX Vector Generate PCV from Byte Mask + xxgenpcvdmVSX Vector Generate PCV from Doubleword Mask + xxgenpcvhmVSX Vector Generate PCV from Halfword Mask + xxgenpcvwmVSX Vector Generate PCV from Word Mask + +diff --git a/VEX/priv/guest_ppc_defs.h b/VEX/priv/guest_ppc_defs.h +index deda4dfce..54ce923a9 100644 +--- a/VEX/priv/guest_ppc_defs.h ++++ b/VEX/priv/guest_ppc_defs.h +@@ -169,6 +169,23 @@ void write_ACC_entry (VexGuestPPC64State* gst, UInt offset, UInt acc, + void get_ACC_entry (VexGuestPPC64State* gst, UInt offset, UInt acc, + UInt reg, UInt *result); + ++extern void vector_gen_pvc_byte_mask_dirty_helper( VexGuestPPC64State* gst, ++ ULong src_hi, ++ ULong src_lo, ++ UInt rtn_val, UInt IMM ); ++extern void vector_gen_pvc_hword_mask_dirty_helper( VexGuestPPC64State* gst, ++ ULong src_hi, ++ ULong src_lo, ++ UInt rtn_val, UInt IMM ); ++extern void vector_gen_pvc_word_mask_dirty_helper( VexGuestPPC64State* gst, ++ ULong src_hi, ++ ULong src_lo, ++ UInt rtn_val, UInt IMM ); ++extern void vector_gen_pvc_dword_mask_dirty_helper( VexGuestPPC64State* gst, ++ ULong src_hi, ++ ULong src_lo, ++ UInt rtn_val, UInt IMM ); ++ + /* 8-bit XO value from instruction description */ + #define XVI4GER8 0b00100011 + #define XVI4GER8PP 0b00100010 +diff --git a/VEX/priv/guest_ppc_helpers.c b/VEX/priv/guest_ppc_helpers.c +index c24191ef3..75497abb9 100644 +--- a/VEX/priv/guest_ppc_helpers.c ++++ b/VEX/priv/guest_ppc_helpers.c +@@ -701,6 +701,738 @@ ULong vector_evaluate64_helper( ULong srcA, ULong srcB, ULong srcC, + #undef MAX_IMM_BITS + } + ++/*--------------------------------------------------*/ ++/*---- VSX Vector Generate PCV from Mask helpers ---*/ ++/*--------------------------------------------------*/ ++static void write_VSX_entry (VexGuestPPC64State* gst, UInt reg_offset, ++ ULong *vsx_entry) ++{ ++ U128* pU128_dst; ++ pU128_dst = (U128*) (((UChar*) gst) + reg_offset); ++ ++ /* The U128 type is defined as an array of unsigned intetgers. */ ++ /* Writing in LE order */ ++ (*pU128_dst)[0] = (UInt)(vsx_entry[1] & 0xFFFFFFFF); ++ (*pU128_dst)[1] = (UInt)(vsx_entry[1] >> 32); ++ (*pU128_dst)[2] = (UInt)(vsx_entry[0] & 0xFFFFFFFF); ++ (*pU128_dst)[3] = (UInt)(vsx_entry[0] >> 32); ++ return; ++} ++ ++/* CALLED FROM GENERATED CODE */ ++void vector_gen_pvc_byte_mask_dirty_helper( VexGuestPPC64State* gst, ++ ULong src_hi, ULong src_lo, ++ UInt reg_offset, UInt imm ) { ++ /* The function computes the 128-bit result then writes it directly ++ into the guest state VSX register. */ ++ ++ UInt i, shift_by, sel_shift_by, half_sel; ++ ULong index, src, result[2]; ++ ULong j; ++ ++ result[0] = 0; ++ result[1] = 0; ++ j = 0; ++ ++ /* The algorithm in the ISA is written with IBM numbering zero on left and ++ N-1 on right. The loop index is converted to "i" to match the algorithm ++ for claritiy of matching the C code to the algorithm in the ISA. */ ++ ++ if (imm == 0b00) { // big endian expansion ++ for( index = 0; index < 16; index++) { ++ i = 15 - index; ++ ++ shift_by = i*8; ++ ++ if ( i >= 8) { ++ src = src_hi; ++ shift_by = shift_by - 64; ++ half_sel = 0; ++ } else { ++ src = src_lo; ++ half_sel = 1; ++ } ++ ++ sel_shift_by = shift_by + 7; ++ ++ if ( ((src >> sel_shift_by) & 0x1) == 1) { ++ result[half_sel] |= j << shift_by; ++ j++; ++ } else { ++ result[half_sel] |= (index + (unsigned long long)0x10) << shift_by; ++ } ++ } ++ ++ ++ } else if (imm == 0b01) { // big endian compression ++ /* If IMM=0b00001, let pcv be the permute control vector required to ++ enable a left-indexed permute (vperm or xxperm) to implement a ++ compression of the sparse byte elements in a source vector specified ++ by the byte-element mask in VSR[VRB+32] into the leftmost byte ++ elements of a result vector. ++ */ ++ for( index = 0; index < 16; index++) { ++ i = 15 - index; ++ shift_by = i*8; ++ ++ if ( i >= 8) { ++ src = src_hi; ++ shift_by = shift_by - 64; ++ half_sel = 0; ++ } else { ++ src = src_lo; ++ half_sel = 1; ++ } ++ ++ sel_shift_by = shift_by + 7; ++ ++ if ( ((src >> sel_shift_by) & 0x1) == 1) { ++ if (j >= 8) ++ result[1] |= (index) << (15 - j)*8; ++ else ++ result[0] |= (index) << (7 - j)*8; ++ j++; ++ } ++ } ++ /* The algorithim says set to undefined, leave as 0 ++ for( index = 3 - j; index < 4; index++) { ++ result |= (0 << (index*8)); ++ } ++ */ ++ ++ } else if (imm == 0b10) { //little-endian expansion ++ /* If IMM=0b00010, let pcv be the permute control vector required to ++ enable a right-indexed permute (vpermr or xxpermr) to implement an ++ expansion of the rightmost byte elements of a source vector into the ++ byte elements of a result vector specified by the byte-element mask ++ in VSR[VRB+32]. */ ++ for( index = 0; index < 16; index++) { ++ i = index; ++ ++ shift_by = i*8; ++ ++ if ( i >= 8) { ++ src = src_hi; ++ shift_by = shift_by - 64; ++ half_sel = 0; ++ } else { ++ src = src_lo; ++ half_sel = 1; ++ } ++ ++ sel_shift_by = shift_by + 7; ++ ++ /* mod shift amount by 8 since src is either the upper or lower ++ 64-bits. */ ++ if ( ((src >> sel_shift_by) & 0x1) == 1) { ++ result[half_sel] |= j << shift_by; ++ j++; ++ } else { ++ result[half_sel] |= (index + (unsigned long long)0x10) << shift_by; ++ } ++ } ++ ++ } else if (imm == 0b11) { //little-endian compression ++ /* If IMM=0b00011, let pcv be the permute control vector required to ++ enable a right-indexed permute (vpermr or xxpermr) to implement a ++ compression of the sparse byte elements in a source vector specified ++ by the byte-element mask in VSR[VRB+32] into the rightmost byte ++ elements of a result vector. */ ++ ++ for( index = 0; index < 16; index++) { ++ i = index; ++ ++ shift_by = i*8; ++ ++ if ( i >= 8) { ++ src = src_hi; ++ shift_by = shift_by - 64; ++ half_sel = 0; ++ } else { ++ src = src_lo; ++ half_sel = 1; ++ } ++ ++ sel_shift_by = shift_by + 7; ++ ++ if ( ((src >> sel_shift_by) & 0x1) == 1) { ++ if (j >= 8) ++ result[0] |= (index) << (j-8)*8; ++ else ++ result[1] |= (index) << j*8; ++ j++; ++ } ++ } ++ ++ /* The algorithim says set to undefined, leave as 0 ++ for( index = 3 - j; index < 4; index++) { ++ result |= (0 << (index*8)); ++ } ++ */ ++ ++ } else { ++ vex_printf("ERROR, vector_gen_pvc_byte_mask_dirty_helper, imm value %u not supported.\n", ++ imm); ++ vassert(0); ++ } ++ write_VSX_entry( gst, reg_offset, result); ++} ++ ++/* CALLED FROM GENERATED CODE */ ++void vector_gen_pvc_hword_mask_dirty_helper( VexGuestPPC64State* gst, ++ ULong src_hi, ULong src_lo, ++ UInt reg_offset, ++ UInt imm ) { ++ /* The function computes the 128-bit result then writes it directly ++ into the guest state VSX register. */ ++ UInt i, shift_by, sel_shift_by, half_sel; ++ ULong index, src, result[2]; ++ ULong j; ++ ++ result[0] = 0; ++ result[1] = 0; ++ j = 0; ++ ++ /* The algorithm in the ISA is written with IBM numbering zero on left and ++ N-1 on right. The loop index is converted to "i" to match the algorithm ++ for claritiy of matching the C code to the algorithm in the ISA. */ ++ ++ if (imm == 0b00) { // big endian expansion ++ /* If IMM=0b00000, let pcv be the permute control vector required to ++ enable a left-indexed permute (vperm or xxperm) to implement an ++ expansion of the leftmost halfword elements of a source vector into ++ the halfword elements of a result vector specified by the halfword- ++ element mask in VSR[VRB+32]. ++ */ ++ for( index = 0; index < 8; index++) { ++ i = 7 - index; ++ ++ shift_by = i*16; ++ ++ if ( i >= 4) { ++ src = src_hi; ++ shift_by = shift_by - 64; ++ half_sel = 0; ++ } else { ++ src = src_lo; ++ half_sel = 1; ++ } ++ ++ sel_shift_by = shift_by + 15; ++ ++ if ( ((src >> sel_shift_by) & 0x1) == 1) { ++ // half-word i, byte 0 ++ result[half_sel] |= (2*j + 0x0) << (shift_by+8); ++ // half-word i, byte 1 ++ result[half_sel] |= (2*j + 0x1) << shift_by; ++ j++; ++ } else { ++ result[half_sel] |= (2*index + 0x10) << (shift_by+8); ++ result[half_sel] |= (2*index + 0x11) << shift_by; ++ } ++ } ++ ++ } else if (imm == 0b01) { // big endian expansion ++ /* If IMM=0b00001,let pcv be the permute control vector required to ++ enable a left-indexed permute (vperm or xxperm) to implement a ++ compression of the sparse halfword elements in a source vector ++ specified by the halfword-element mask in VSR[VRB+32] into the ++ leftmost halfword elements of a result vector. ++ */ ++ for( index = 0; index < 8; index++) { ++ i = 7 - index; ++ ++ shift_by = i*16; ++ ++ if ( i >= 4) { ++ src = src_hi; ++ shift_by = shift_by - 64; ++ half_sel = 0; ++ } else { ++ src = src_lo; ++ half_sel = 1; ++ } ++ ++ sel_shift_by = shift_by + 15; ++ ++ if ( ((src >> sel_shift_by) & 0x1) == 1) { ++ if (j >= 4) { ++ // half-word i, byte 0 ++ result[1] |= (2*index + 0x0) << ((7 - j)*16 + 8); ++ // half-word i, byte 1 ++ result[1] |= (2*index + 0x1) << ((7 - j)*16); ++ } else { ++ // half-word i, byte 0 ++ result[0] |= (2*index + 0x0) << ((3 - j)*16 + 8); ++ // half-word i, byte 1 ++ result[0] |= (2*index + 0x1) << ((3 - j)*16); ++ } ++ j++; ++ } ++ } ++ ++ } else if (imm == 0b10) { //little-endian expansion ++ /* If IMM=0b00010, let pcv be the permute control vector required to ++ enable a right-indexed permute (vpermr or xxpermr) to implement an ++ expansion of the rightmost halfword elements of a source vector into ++ the halfword elements of a result vector specified by the halfword- ++ element mask in VSR[VRB+32]. ++ */ ++ for( index = 0; index < 8; index++) { ++ i = index; ++ shift_by = i*16; ++ ++ if ( i >= 4) { ++ src = src_hi; ++ shift_by = shift_by - 64; ++ half_sel = 0; ++ } else { ++ src = src_lo; ++ half_sel = 1; ++ } ++ ++ sel_shift_by = shift_by + 15; ++ ++ if ( ((src >> sel_shift_by) & 0x1) == 1) { ++ // half-word i, byte 0 ++ result[half_sel] |= (2*j + 0x00) << shift_by; ++ // half-word i, byte 1 ++ result[half_sel] |= (2*j + 0x01) << (shift_by+8); ++ j++; ++ ++ } else { ++ // half-word i, byte 0 ++ result[half_sel] |= (2*index + 0x10) << shift_by; ++ // half-word i, byte 1 ++ result[half_sel] |= (2*index + 0x11) << (shift_by+8); ++ } ++ } ++ ++ } else if (imm == 0b11) { //little-endian compression ++ /* If IMM=0b00011, let pcv be the permute control vector required to ++ enable a right-indexed permute (vpermr or xxpermr) to implement a ++ compression of the sparse halfword elements in a source vector ++ specified by the halfword-element mask in VSR[VRB+32] into the ++ rightmost halfword elements of a result vector. */ ++ for( index = 0; index < 8; index++) { ++ i = index; ++ shift_by = i*16; ++ ++ if ( i >= 4) { ++ src = src_hi; ++ shift_by = shift_by - 64; ++ half_sel = 0; ++ } else { ++ src = src_lo; ++ half_sel = 1; ++ } ++ ++ sel_shift_by = shift_by + 15; ++ ++ if ( ((src >> sel_shift_by) & 0x1) == 1) { ++ if (j >= 4) { ++ // half-word j, byte 0 ++ result[0] |= (2*index + 0x0) << ((j-4)*16); ++ // half-word j, byte 1 ++ result[0] |= (2*index + 0x1) << ((j-4)*16+8); ++ } else { ++ // half-word j, byte 0 ++ result[1] |= (2*index + 0x0) << (j*16); ++ // half-word j, byte 1 ++ result[1] |= (2*index + 0x1) << ((j*16)+8); ++ } ++ j++; ++ } ++ } ++ ++ } else { ++ vex_printf("ERROR, vector_gen_pvc_hword_dirty_mask_helper, imm value %u not supported.\n", ++ imm); ++ vassert(0); ++ } ++ write_VSX_entry( gst, reg_offset, result); ++} ++ ++/* CALLED FROM GENERATED CODE */ ++void vector_gen_pvc_word_mask_dirty_helper( VexGuestPPC64State* gst, ++ ULong src_hi, ULong src_lo, ++ UInt reg_offset, UInt imm ) { ++ /* The function computes the 128-bit result then writes it directly ++ into the guest state VSX register. */ ++ UInt i, shift_by, sel_shift_by, half_sel; ++ ULong index, src, result[2]; ++ ULong j; ++ ++ result[0] = 0; ++ result[1] = 0; ++ j = 0; ++ ++ /* The algorithm in the ISA is written with IBM numbering zero on left and ++ N-1 on right. The loop index is converted to "i" to match the algorithm ++ for claritiy of matching the C code to the algorithm in the ISA. */ ++ ++ if (imm == 0b00) { // big endian expansion ++ /* If IMM=0b00000, let pcv be the permute control vector required to ++ enable a left-indexed permute (vperm or xxperm) to implement an ++ expansion of the leftmost word elements of a source vector into the ++ word elements of a result vector specified by the word-element mask ++ in VSR[VRB+32]. ++ */ ++ for( index = 0; index < 4; index++) { ++ i = 3 - index; ++ ++ shift_by = i*32; ++ ++ if ( i >= 2) { ++ src = src_hi; ++ shift_by = shift_by - 64; ++ half_sel = 0; ++ } else { ++ src = src_lo; ++ half_sel = 1; ++ } ++ ++ sel_shift_by = shift_by + 31; ++ ++ if ( ((src >> sel_shift_by) & 0x1) == 1) { ++ result[half_sel] |= (4*j+0) << (shift_by+24); // word i, byte 0 ++ result[half_sel] |= (4*j+1) << (shift_by+16); // word i, byte 1 ++ result[half_sel] |= (4*j+2) << (shift_by+8); // word i, byte 2 ++ result[half_sel] |= (4*j+3) << shift_by; // word i, byte 3 ++ j++; ++ } else { ++ result[half_sel] |= (4*index + 0x10) << (shift_by+24); ++ result[half_sel] |= (4*index + 0x11) << (shift_by+16); ++ result[half_sel] |= (4*index + 0x12) << (shift_by+8); ++ result[half_sel] |= (4*index + 0x13) << shift_by; ++ } ++ } ++ ++ } else if (imm == 0b01) { // big endian compression ++ /* If IMM=0b00001, let pcv be the permute control vector required to ++ enable a left-indexed permute (vperm or xxperm) to implement a ++ compression of the sparse word elements in a source vector specified ++ by the word-element mask in VSR[VRB+32] into the leftmost word ++ elements of a result vector. ++ */ ++ for( index = 0; index < 4; index++) { ++ i = 3 - index; ++ ++ shift_by = i*32; ++ ++ if ( i >= 2) { ++ src = src_hi; ++ shift_by = shift_by - 64; ++ half_sel = 0; ++ } else { ++ src = src_lo; ++ half_sel = 1; ++ } ++ ++ sel_shift_by = shift_by + 31; ++ ++ if (((src >> sel_shift_by) & 0x1) == 1) { ++ if (j >= 2) { ++ // word j, byte 0 ++ result[1] |= (4*index+0) << ((3 - j)*32 + 24); ++ // word j, byte 1 ++ result[1] |= (4*index+1) << ((3 - j)*32 + 16); ++ // word j, byte 2 ++ result[1] |= (4*index+2) << ((3 - j)*32 + 8); ++ // word j, byte 3 ++ result[1] |= (4*index+3) << ((3 - j)*32 + 0); ++ } else { ++ result[0] |= (4*index+0) << ((1 - j)*32 + 24); ++ result[0] |= (4*index+1) << ((1 - j)*32 + 16); ++ result[0] |= (4*index+2) << ((1 - j)*32 + 8); ++ result[0] |= (4*index+3) << ((1 - j)*32 + 0); ++ } ++ j++; ++ } ++ } ++ ++ } else if (imm == 0b10) { //little-endian expansion ++ /* If IMM=0b00010, let pcv be the permute control vector required to ++ enable a right-indexed permute (vpermr or xxpermr) to implement an ++ expansion of the rightmost word elements of a source vector into the ++ word elements of a result vector specified by the word-element mask ++ in VSR[VRB+32]. ++ */ ++ for( index = 0; index < 4; index++) { ++ i = index; ++ ++ shift_by = i*32; ++ ++ if ( i >= 2) { ++ src = src_hi; ++ shift_by = shift_by - 64; ++ half_sel = 0; ++ } else { ++ src = src_lo; ++ half_sel = 1; ++ } ++ ++ sel_shift_by = shift_by + 31; ++ ++ if (((src >> sel_shift_by) & 0x1) == 1) { ++ result[half_sel] |= (4*j+0) << (shift_by + 0); // word j, byte 0 ++ result[half_sel] |= (4*j+1) << (shift_by + 8); // word j, byte 1 ++ result[half_sel] |= (4*j+2) << (shift_by + 16); // word j, byte 2 ++ result[half_sel] |= (4*j+3) << (shift_by + 24); // word j, byte 3 ++ j++; ++ } else { ++ result[half_sel] |= (4*index + 0x10) << (shift_by + 0); ++ result[half_sel] |= (4*index + 0x11) << (shift_by + 8); ++ result[half_sel] |= (4*index + 0x12) << (shift_by + 16); ++ result[half_sel] |= (4*index + 0x13) << (shift_by + 24); ++ } ++ } ++ ++ } else if (imm == 0b11) { //little-endian compression ++ /* If IMM=0b00011, let pcv be the permute control vector required to ++ enable a right-indexed permute (vpermr or xxpermr) to implement a ++ compression of the sparse word elements in a source vector specified ++ by the word-element mask in VSR[VRB+32] into the rightmost word ++ elements of a result vector. */ ++ for( index = 0; index < 4; index++) { ++ i =index; ++ ++ shift_by = i*32; ++ ++ if ( i >= 2) { ++ src = src_hi; ++ shift_by = shift_by - 64; ++ half_sel = 0; ++ } else { ++ src = src_lo; ++ half_sel = 1; ++ } ++ ++ sel_shift_by = shift_by + 31; ++ ++ if (((src >> sel_shift_by) & 0x1) == 1) { ++ if (j >= 2){ ++ // word j, byte 0 ++ result[0] |= (4*index + 0x0) << ((j-2)*32+0); ++ // word j, byte 1 ++ result[0] |= (4*index + 0x1) << ((j-2)*32+8); ++ // word j, byte 2 ++ result[0] |= (4*index + 0x2) << ((j-2)*32+16); ++ // word j, byte 3 ++ result[0] |= (4*index + 0x3) << ((j-2)*32+24); ++ } else { ++ result[1] |= (4*index + 0x0) << (j*32+0); ++ result[1] |= (4*index + 0x1) << (j*32+8); ++ result[1] |= (4*index + 0x2) << (j*32+16); ++ result[1] |= (4*index + 0x3) << (j*32+24); ++ } ++ j++; ++ } ++ } ++ } else { ++ vex_printf("ERROR, vector_gen_pvc_word_mask_dirty_helper, imm value %u not supported.\n", ++ imm); ++ vassert(0); ++ } ++ ++ write_VSX_entry( gst, reg_offset, result); ++} ++ ++/* CALLED FROM GENERATED CODE */ ++void vector_gen_pvc_dword_mask_dirty_helper( VexGuestPPC64State* gst, ++ ULong src_hi, ULong src_lo, ++ UInt reg_offset, UInt imm ) { ++ /* The function computes the 128-bit result then writes it directly ++ into the guest state VSX register. */ ++ UInt sel_shift_by, half_sel; ++ ULong index, src, result[2]; ++ ULong j, i; ++ ++ result[0] = 0; ++ result[1] = 0; ++ j = 0; ++ ++ /* The algorithm in the ISA is written with IBM numbering zero on left and ++ N-1 on right. The loop index is converted to "i" to match the algorithm ++ for claritiy of matching the C code to the algorithm in the ISA. */ ++ ++ if (imm == 0b00) { // big endian expansion ++ /* If IMM=0b00000, let pcv be the permute control vector required to ++ enable a left-indexed permute (vperm or xxperm) to implement an ++ expansion of the leftmost doubleword elements of a source vector into ++ the doubleword elements of a result vector specified by the ++ doubleword-element mask in VSR[VRB+32]. ++ */ ++ for( index = 0; index < 2; index++) { ++ i = 1 - index; ++ ++ if ( i == 1) { ++ src = src_hi; ++ half_sel = 0; ++ } else { ++ src = src_lo; ++ half_sel = 1; ++ } ++ ++ sel_shift_by = 63; ++ ++ if ( ((src >> sel_shift_by) & 0x1) == 1) { ++ result[half_sel] |= (8*j + 0x0) << 56; // dword i, byte 0 ++ result[half_sel] |= (8*j + 0x1) << 48; // dword i, byte 1 ++ result[half_sel] |= (8*j + 0x2) << 40; // dword i, byte 2 ++ result[half_sel] |= (8*j + 0x3) << 32; // dword i, byte 3 ++ result[half_sel] |= (8*j + 0x4) << 24; // dword i, byte 4 ++ result[half_sel] |= (8*j + 0x5) << 16; // dword i, byte 5 ++ result[half_sel] |= (8*j + 0x6) << 8; // dword i, byte 6 ++ result[half_sel] |= (8*j + 0x7) << 0; // dword i, byte 7 ++ j++; ++ } else { ++ result[half_sel] |= (8*index + 0x10) << 56; ++ result[half_sel] |= (8*index + 0x11) << 48; ++ result[half_sel] |= (8*index + 0x12) << 40; ++ result[half_sel] |= (8*index + 0x13) << 32; ++ result[half_sel] |= (8*index + 0x14) << 24; ++ result[half_sel] |= (8*index + 0x15) << 16; ++ result[half_sel] |= (8*index + 0x16) << 8; ++ result[half_sel] |= (8*index + 0x17) << 0; ++ } ++ } ++ } else if (imm == 0b01) { // big endian compression ++ /* If IMM=0b00001, let pcv be the the permute control vector required to ++ enable a left-indexed permute (vperm or xxperm) to implement a ++ compression of the sparse doubleword elements in a source vector ++ specified by the doubleword-element mask in VSR[VRB+32] into the ++ leftmost doubleword elements of a result vector. ++ */ ++ for( index = 0; index < 2; index++) { ++ i = 1 - index; ++ ++ if ( i == 1) { ++ src = src_hi; ++ half_sel = 0; ++ } else { ++ src = src_lo; ++ half_sel = 1; ++ } ++ ++ sel_shift_by = 63; ++ ++ if ( ((src >> sel_shift_by) & 0x1) == 1) { ++ if (j == 1) { ++ result[1] |= (8*index + 0x0) << 56; // double-word j, byte 0 ++ result[1] |= (8*index + 0x1) << 48; // double-word j, byte 1 ++ result[1] |= (8*index + 0x2) << 40; // double-word j, byte 2 ++ result[1] |= (8*index + 0x3) << 32; // double-word j, byte 3 ++ result[1] |= (8*index + 0x4) << 24; // double-word j, byte 4 ++ result[1] |= (8*index + 0x5) << 16; // double-word j, byte 5 ++ result[1] |= (8*index + 0x6) << 8; // double-word j, byte 6 ++ result[1] |= (8*index + 0x7) << 0; // double-word j, byte 7 ++ } else { ++ result[0] |= (8*index + 0x0) << 56; // double-word j, byte 0 ++ result[0] |= (8*index + 0x1) << 48; // double-word j, byte 1 ++ result[0] |= (8*index + 0x2) << 40; // double-word j, byte 2 ++ result[0] |= (8*index + 0x3) << 32; // double-word j, byte 3 ++ result[0] |= (8*index + 0x4) << 24; // double-word j, byte 4 ++ result[0] |= (8*index + 0x5) << 16; // double-word j, byte 5 ++ result[0] |= (8*index + 0x6) << 8; // double-word j, byte 6 ++ result[0] |= (8*index + 0x7) << 0; // double-word j, byte 7 ++ } ++ j++; ++ } ++ } ++ } else if (imm == 0b10) { //little-endian expansion ++ /* If IMM=0b00010, let pcv be the permute control vector required to ++ enable a right-indexed permute (vpermr or xxpermr) to implement an ++ expansion of the rightmost doubleword elements of a source vector ++ into the doubleword elements of a result vector specified by the ++ doubleword-element mask in VSR[VRB+32]. ++ */ ++ ++ for( index = 0; index < 2; index++) { ++ i = index; ++ ++ if ( i == 1) { ++ src = src_hi; ++ half_sel = 0; ++ } else { ++ src = src_lo; ++ half_sel = 1; ++ } ++ ++ sel_shift_by = 63; ++ ++ if ( ((src >> sel_shift_by) & 0x1) == 1) { ++ result[half_sel] |= (8*j+0) << 0; // double-word i, byte 0 ++ result[half_sel] |= (8*j+1) << 8; // double-word i, byte 1 ++ result[half_sel] |= (8*j+2) << 16; // double-word i, byte 2 ++ result[half_sel] |= (8*j+3) << 24; // double-word i, byte 3 ++ result[half_sel] |= (8*j+4) << 32; // double-word i, byte 4 ++ result[half_sel] |= (8*j+5) << 40; // double-word i, byte 5 ++ result[half_sel] |= (8*j+6) << 48; // double-word i, byte 6 ++ result[half_sel] |= (8*j+7) << 56; // double-word i, byte 7 ++ j++; ++ } else { ++ result[half_sel] |= (8*index + 0x10) << 0; ++ result[half_sel] |= (8*index + 0x11) << 8; ++ result[half_sel] |= (8*index + 0x12) << 16; ++ result[half_sel] |= (8*index + 0x13) << 24; ++ result[half_sel] |= (8*index + 0x14) << 32; ++ result[half_sel] |= (8*index + 0x15) << 40; ++ result[half_sel] |= (8*index + 0x16) << 48; ++ result[half_sel] |= (8*index + 0x17) << 56; ++ } ++ } ++ ++ } else if (imm == 0b11) { //little-endian compression ++ /* If IMM=0b00011, let pcv be the permute control vector required to ++ enable a right-indexed permute (vpermr or xxpermr) to implement a ++ compression of the sparse doubleword elements in a source vector ++ specified by the doubleword-element mask in VSR[VRB+32] into the ++ rightmost doubleword elements of a result vector. */ ++ for( index = 0; index < 2; index++) { ++ i = index; ++ ++ if ( i == 1) { ++ src = src_hi; ++ half_sel = 0; ++ } else { ++ src = src_lo; ++ half_sel = 1; ++ } ++ ++ sel_shift_by = 63; ++ ++ if (((src >> sel_shift_by) & 0x1) == 1) { ++ if (j == 1) { ++ result[0] |= (8*index + 0x0) << 0; // double-word j, byte 0 ++ result[0] |= (8*index + 0x1) << 8; // double-word j, byte 1 ++ result[0] |= (8*index + 0x2) << 16; // double-word j, byte 2 ++ result[0] |= (8*index + 0x3) << 24; // double-word j, byte 3 ++ result[0] |= (8*index + 0x4) << 32; // double-word j, byte 4 ++ result[0] |= (8*index + 0x5) << 40; // double-word j, byte 5 ++ result[0] |= (8*index + 0x6) << 48; // double-word j, byte 6 ++ result[0] |= (8*index + 0x7) << 56; // double-word j, byte 7 ++ } else { ++ result[1] |= (8*index + 0x0) << 0; ++ result[1] |= (8*index + 0x1) << 8; ++ result[1] |= (8*index + 0x2) << 16; ++ result[1] |= (8*index + 0x3) << 24; ++ result[1] |= (8*index + 0x4) << 32; ++ result[1] |= (8*index + 0x5) << 40; ++ result[1] |= (8*index + 0x6) << 48; ++ result[1] |= (8*index + 0x7) << 56; ++ } ++ j++; ++ } ++ } ++ } else { ++ vex_printf("ERROR, vector_gen_pvc_dword_mask_helper, imm value %u not supported.\n", ++ imm); ++ vassert(0); ++ } ++ ++ write_VSX_entry( gst, reg_offset, result); ++} + + /*------------------------------------------------*/ + /*---- VSX Matrix signed integer GER functions ---*/ +diff --git a/VEX/priv/guest_ppc_toIR.c b/VEX/priv/guest_ppc_toIR.c +index bcabf69dd..354be6b53 100644 +--- a/VEX/priv/guest_ppc_toIR.c ++++ b/VEX/priv/guest_ppc_toIR.c +@@ -3322,6 +3322,7 @@ static IRExpr * locate_vector_ele_eq ( IRTemp src, IRExpr *value, + #define DFORM_IMMASK 0xffffffff + #define DSFORM_IMMASK 0xfffffffc + #define DQFORM_IMMASK 0xfffffff0 ++#define DA8LSFORM_IMMASK 0x3fffffff // Algebraic 8LS Dform + + #define ISA_3_1_PREFIX_CHECK if (prefix) {if (!allow_isa_3_1) goto decode_noIsa3_1;} + +@@ -6109,6 +6110,87 @@ static void vsx_matrix_64bit_float_ger ( const VexAbiInfo* vbi, + stmt( IRStmt_Dirty(d) ); + } + ++static void vector_gen_pvc_mask ( const VexAbiInfo* vbi, ++ IRExpr *src, UInt IMM, ++ UInt opc2, UInt VSX_addr ) { ++ /* The function takes a 64-bit source and an immediate value. The function ++ calls a helper to execute the xxgenpcvbm, xxgenpcvhm, xxgenpcvwm, ++ xxgenpcvdm instruction. The instructions are not practical to do with ++ Iops. The instruction is implemented with a dirty helper that ++ calculates the 128-bit result and writes it directly into the guest ++ state VSX register. ++ */ ++ IRTemp src_hi = newTemp( Ity_I64); ++ IRTemp src_lo = newTemp( Ity_I64); ++ ++ IRDirty* d; ++ ++ vassert( (VSX_addr >= 0) && (VSX_addr < 64) ); ++ UInt reg_offset = offsetofPPCGuestState( guest_VSR0 ) ++ + sizeof(U128) * VSX_addr; ++ ++ assign( src_hi, unop( Iop_V128HIto64, src ) ); ++ assign( src_lo, unop( Iop_V128to64, src ) ); ++ ++ IRExpr** args = mkIRExprVec_5( ++ IRExpr_GSPTR(), ++ mkexpr( src_hi ), ++ mkexpr( src_lo ), ++ mkU32( reg_offset ), ++ mkU64( IMM ) ); ++ ++ switch( opc2 ) { ++ case 0x394: // xxgenpcvbm ++ d = unsafeIRDirty_0_N ( ++ 0 /*regparms*/, ++ "vector_gen_pvc_byte_mask_dirty_helper", ++ fnptr_to_fnentry( vbi, ++ &vector_gen_pvc_byte_mask_dirty_helper ), ++ args); ++ break; ++ ++ case 0x395: // xxgenpcvhm ++ d = unsafeIRDirty_0_N ( ++ 0 /*regparms*/, ++ "vector_gen_pvc_hword_mask_dirty_helper", ++ fnptr_to_fnentry( vbi, ++ &vector_gen_pvc_hword_mask_dirty_helper ), ++ args); ++ break; ++ ++ case 0x3B4: // xxgenpcvwm ++ d = unsafeIRDirty_0_N ( ++ 0 /*regparms*/, ++ "vector_gen_pvc_word_mask_dirty_helper", ++ fnptr_to_fnentry( vbi, ++ &vector_gen_pvc_word_mask_dirty_helper ), ++ args); ++ break; ++ ++ case 0x3B5: // xxgenpcvdm ++ d = unsafeIRDirty_0_N ( ++ 0 /*regparms*/, ++ "vector_gen_pvc_dword_mask_dirty_helper", ++ fnptr_to_fnentry( vbi, ++ &vector_gen_pvc_dword_mask_dirty_helper ), ++ args); ++ break; ++ default: ++ vex_printf("ERROR: Unkown instruction = %u in vector_gen_pvc_mask()\n", ++ opc2); ++ return; ++ } ++ ++ d->nFxState = 1; ++ vex_bzero(&d->fxState, sizeof(d->fxState)); ++ d->fxState[0].fx = Ifx_Modify; ++ d->fxState[0].size = sizeof(U128); ++ d->fxState[0].offset = reg_offset; ++ ++ /* execute the dirty call, side-effecting guest state */ ++ stmt( IRStmt_Dirty(d) ); ++} ++ + static IRExpr * UNSIGNED_CMP_GT_V128 ( IRExpr *vA, IRExpr *vB ) { + /* This function does an unsigned compare of two V128 values. The + * function is for use in 32-bit mode only as it is expensive. The +@@ -35227,6 +35309,54 @@ static Bool dis_vsx_accumulator_prefix ( UInt prefix, UInt theInstr, + return True; + } + ++static Bool dis_vector_generate_pvc_from_mask ( UInt prefix, ++ UInt theInstr, ++ const VexAbiInfo* vbi ) ++{ ++ UChar XT_addr = ifieldRegXT(theInstr); ++ UChar vB_addr = ifieldRegB(theInstr); ++ IRTemp vB = newTemp( Ity_V128 ); ++ UInt opc2 = ifieldOPClo10(theInstr); ++ UInt IMM = IFIELD(theInstr, (31-15), 5); // bits[11:15] ++ ++ assign( vB, getVReg( vB_addr ) ); ++ ++ switch( opc2 ) { ++ case 0x394: ++ DIP("xxgenpcvbm v%u,v%u,%u\n", XT_addr, vB_addr, IMM); ++ /* vector_gen_pvc_mask uses a dirty helper to calculate the result and ++ write it to the VSX result register. */ ++ vector_gen_pvc_mask( vbi, mkexpr( vB ), IMM, opc2, XT_addr ); ++ break; ++ ++ case 0x395: ++ DIP("xxgenpcvhm v%u,v%u,%u\n", XT_addr, vB_addr, IMM); ++ /* vector_gen_pvc_mask uses a dirty helper to calculate the result and ++ write it to the VSX result register. */ ++ vector_gen_pvc_mask( vbi, mkexpr( vB ), IMM, opc2, XT_addr ); ++ break; ++ ++ case 0x3B4: ++ DIP("xxgenpcvwm v%u,v%u,%u\n", XT_addr, vB_addr, IMM); ++ /* vector_gen_pvc_mask uses a dirty helper to calculate the result and ++ write it to the VSX result register. */ ++ vector_gen_pvc_mask( vbi, mkexpr( vB ), IMM, opc2, XT_addr ); ++ break; ++ ++ case 0x3B5: ++ DIP("xxgenpcvdm v%u,v%u,%u\n", XT_addr, vB_addr, IMM); ++ /* vector_gen_pvc_mask uses a dirty helper to calculate the result and ++ write it to the VSX result register. */ ++ vector_gen_pvc_mask( vbi, mkexpr( vB ), IMM, opc2, XT_addr ); ++ break; ++ ++ default: ++ return False; ++ } ++ ++ return True; ++} ++ + static Int dis_nop_prefix ( UInt prefix, UInt theInstr ) + { + Bool is_prefix = prefix_instruction( prefix ); +@@ -35748,14 +35878,9 @@ DisResult disInstr_PPC_WRK ( + } + goto decode_failure; + +- case 0x31: // lfsu, stxv ++ case 0x31: // lfsu + if (!allow_F) goto decode_noF; +- if (prefix_instruction( prefix )) { // stxv +- if ( !(allow_isa_3_1) ) goto decode_noIsa3_1; +- if (dis_fp_pair_prefix( prefix, theInstr )) goto decode_success; +- } else { // lfsu +- if (dis_fp_load( prefix, theInstr )) goto decode_success; +- } ++ if (dis_fp_load( prefix, theInstr )) goto decode_success; + goto decode_failure; + + case 0x32: +@@ -35842,7 +35967,6 @@ DisResult disInstr_PPC_WRK ( + case 0x39: // pld, lxsd, lxssp, lfdp + { + UInt opc2tmp = ifieldOPC0o2(theInstr); +- + if (!allow_F) goto decode_noF; + if (prefix_instruction( prefix )) { // pld + if ( !(allow_isa_3_1) ) goto decode_noIsa3_1; +@@ -36125,12 +36249,6 @@ DisResult disInstr_PPC_WRK ( + goto decode_failure; + } + +- /* The vsxOpc2 returned is the "normalized" value, representing the +- * instructions secondary opcode as taken from the standard secondary +- * opcode field [21:30] (IBM notatition), even if the actual field +- * is non-standard. These normalized values are given in the opcode +- * appendices of the ISA 2.06 document. +- */ + if ( ( opc2 == 0x168 ) && ( IFIELD( theInstr, 19, 2 ) == 0 ) )// xxspltib + { + /* This is a special case of the XX1 form where the RA, RB +@@ -36153,6 +36271,23 @@ DisResult disInstr_PPC_WRK ( + goto decode_failure; + } + ++ if ( ( opc2 == 0x394 ) || // xxgenpcvbm ++ ( opc2 == 0x395 ) || // xxgenpcvwm ++ ( opc2 == 0x3B4 ) || // xxgenpcvhm ++ ( opc2 == 0x3B5 ) ) { // xxgenpcvdm ++ if ( !(allow_isa_3_1) ) goto decode_noIsa3_1; ++ if (dis_vector_generate_pvc_from_mask( prefix, theInstr, ++ abiinfo )) ++ goto decode_success; ++ goto decode_failure; ++ } ++ ++ /* The vsxOpc2 returned is the "normalized" value, representing the ++ * instructions secondary opcode as taken from the standard secondary ++ * opcode field [21:30] (IBM notatition), even if the actual field ++ * is non-standard. These normalized values are given in the opcode ++ * appendices of the ISA 2.06 document. ++ */ + vsxOpc2 = get_VSX60_opc2(opc2, theInstr); + + switch (vsxOpc2) { +commit 078f89e99b6f62e043f6138c6a7ae238befc1f2a +Author: Carl Love +Date: Fri Feb 26 15:46:55 2021 -0600 + + PPC64: Reduced-Precision - bfloat16 Outer Product & Format Conversion Operations + + Add support for: + + pmxvbf16ger2 Prefixed Masked VSX Vector bfloat16 GER (Rank-2 Update) + pmxvbf16ger2pp Prefixed Masked VSX Vector bfloat16 GER (Rank-2 Update) Positive + multiply, Positive accumulate + pmxvbf16ger2pn Prefixed Masked VSX Vector bfloat16 GER (Rank-2 Update) Positive + multiply, Negative accumulate + pmxvbf16ger2np Prefixed Masked VSX Vector bfloat16 GER (Rank-2 Update) Negative + multiply, Positive accumulate + pmxvbf16ger2nn Prefixed Masked VSX Vector bfloat16 GER (Rank-2 Update) Negative + multiply, Negative accumulate + xvbf16ger2VSX Vector bfloat16 GER (Rank-2 Update) + xvbf16ger2pp VSX Vector bfloat16 GER (Rank-2 Update) Positive multiply, Positive + accumulate + xvbf16ger2pn VSX Vector bfloat16 GER (Rank-2 Update) Positive multiply, Negative + accumulate + xvbf16ger2np VSX Vector bfloat16 GER (Rank-2 Update) Negative multiply, Positive + accumulate + xvbf16ger2nn VSX Vector bfloat16 GER (Rank-2 Update) Negative multiply, Negative + accumulate + xvcvbf16sp VSX Vector Convert bfloat16 to Single-Precision format + xvcvspbf16 VSX Vector Convert with round Single-Precision to bfloat16 format + +diff --git a/VEX/priv/guest_ppc_defs.h b/VEX/priv/guest_ppc_defs.h +index 54ce923a9..d36d6c07d 100644 +--- a/VEX/priv/guest_ppc_defs.h ++++ b/VEX/priv/guest_ppc_defs.h +@@ -150,6 +150,8 @@ extern ULong convert_to_zoned_helper( ULong src_hi, ULong src_low, + ULong return_upper ); + extern ULong convert_to_national_helper( ULong src, ULong return_upper ); + extern ULong convert_from_zoned_helper( ULong src_hi, ULong src_low ); ++extern ULong convert_from_floattobf16_helper( ULong src ); ++extern ULong convert_from_bf16tofloat_helper( ULong src ); + extern ULong convert_from_national_helper( ULong src_hi, ULong src_low ); + extern ULong generate_C_FPCC_helper( ULong size, ULong src_hi, ULong src ); + extern ULong extract_bits_under_mask_helper( ULong src, ULong mask, +@@ -201,6 +203,11 @@ extern void vector_gen_pvc_dword_mask_dirty_helper( VexGuestPPC64State* gst, + #define XVF16GER2PN 0b10010010 + #define XVF16GER2NP 0b01010010 + #define XVF16GER2NN 0b11010010 ++#define XVBF16GER2 0b00110011 ++#define XVBF16GER2PP 0b00110010 ++#define XVBF16GER2PN 0b10110010 ++#define XVBF16GER2NP 0b01110010 ++#define XVBF16GER2NN 0b11110010 + #define XVF32GER 0b00011011 + #define XVF32GERPP 0b00011010 + #define XVF32GERPN 0b10011010 +diff --git a/VEX/priv/guest_ppc_helpers.c b/VEX/priv/guest_ppc_helpers.c +index 75497abb9..6bcee966d 100644 +--- a/VEX/priv/guest_ppc_helpers.c ++++ b/VEX/priv/guest_ppc_helpers.c +@@ -1905,6 +1905,125 @@ static Double conv_f16_to_double( ULong input ) + # endif + } + ++#define BF16_SIGN_MASK 0x8000 ++#define BF16_EXP_MASK 0x7F80 ++#define BF16_FRAC_MASK 0x007F ++#define BF16_BIAS 127 ++#define BF16_MAX_UNBIASED_EXP 127 ++#define BF16_MIN_UNBIASED_EXP -126 ++#define FLOAT_SIGN_MASK 0x80000000 ++#define FLOAT_EXP_MASK 0x7F800000 ++#define FLOAT_FRAC_MASK 0x007FFFFF ++#define FLOAT_FRAC_BIT8 0x00008000 ++#define FLOAT_BIAS 127 ++ ++static Float conv_bf16_to_float( UInt input ) ++{ ++ /* input is 16-bit bfloat. ++ bias +127, exponent 8-bits, fraction 7-bits ++ ++ output is 32-bit float. ++ bias +127, exponent 8-bits, fraction 22-bits ++ */ ++ ++ UInt input_exp, input_fraction, unbiased_exp; ++ UInt output_exp, output_fraction; ++ UInt sign; ++ union convert_t conv; ++ ++ sign = (UInt)(input & BF16_SIGN_MASK); ++ input_exp = input & BF16_EXP_MASK; ++ unbiased_exp = (input_exp >> 7) - (UInt)BF16_BIAS; ++ input_fraction = input & BF16_FRAC_MASK; ++ ++ if (((input_exp & BF16_EXP_MASK) == BF16_EXP_MASK) && ++ (input_fraction != 0)) { ++ /* input is NaN or SNaN, exp all 1's, fraction != 0 */ ++ output_exp = FLOAT_EXP_MASK; ++ output_fraction = input_fraction; ++ ++ } else if(((input_exp & BF16_EXP_MASK) == BF16_EXP_MASK) && ++ ( input_fraction == 0)) { ++ /* input is infinity, exp all 1's, fraction = 0 */ ++ output_exp = FLOAT_EXP_MASK; ++ output_fraction = 0; ++ ++ } else if((input_exp == 0) && (input_fraction == 0)) { ++ /* input is zero */ ++ output_exp = 0; ++ output_fraction = 0; ++ ++ } else if((input_exp == 0) && (input_fraction != 0)) { ++ /* input is denormal */ ++ output_fraction = input_fraction; ++ output_exp = (-(Int)BF16_BIAS + (Int)FLOAT_BIAS ) << 23; ++ ++ } else { ++ /* result is normal */ ++ output_exp = (unbiased_exp + FLOAT_BIAS) << 23; ++ output_fraction = input_fraction; ++ } ++ ++ conv.u32 = sign << (31 - 15) | output_exp | (output_fraction << (23-7)); ++ return conv.f; ++} ++ ++static UInt conv_float_to_bf16( UInt input ) ++{ ++ /* input is 32-bit float stored as unsigned 32-bit. ++ bias +127, exponent 8-bits, fraction 23-bits ++ ++ output is 16-bit bfloat. ++ bias +127, exponent 8-bits, fraction 7-bits ++ ++ If the unbiased exponent of the input is greater than the max floating ++ point unbiased exponent value, the result of the floating point 16-bit ++ value is infinity. ++ */ ++ ++ UInt input_exp, input_fraction; ++ UInt output_exp, output_fraction; ++ UInt result, sign; ++ ++ sign = input & FLOAT_SIGN_MASK; ++ input_exp = input & FLOAT_EXP_MASK; ++ input_fraction = input & FLOAT_FRAC_MASK; ++ ++ if (((input_exp & FLOAT_EXP_MASK) == FLOAT_EXP_MASK) && ++ (input_fraction != 0)) { ++ /* input is NaN or SNaN, exp all 1's, fraction != 0 */ ++ output_exp = BF16_EXP_MASK; ++ output_fraction = (ULong)input_fraction >> (23 - 7); ++ } else if (((input_exp & FLOAT_EXP_MASK) == FLOAT_EXP_MASK) && ++ ( input_fraction == 0)) { ++ /* input is infinity, exp all 1's, fraction = 0 */ ++ output_exp = BF16_EXP_MASK; ++ output_fraction = 0; ++ } else if ((input_exp == 0) && (input_fraction == 0)) { ++ /* input is zero */ ++ output_exp = 0; ++ output_fraction = 0; ++ } else if ((input_exp == 0) && (input_fraction != 0)) { ++ /* input is denormal */ ++ output_exp = 0; ++ output_fraction = (ULong)input_fraction >> (23 - 7); ++ } else { ++ /* result is normal */ ++ output_exp = (input_exp - BF16_BIAS + FLOAT_BIAS) >> (23 - 7); ++ output_fraction = (ULong)input_fraction >> (23 - 7); ++ ++ /* Round result. Look at the 8th bit position of the 32-bit floating ++ pointt fraction. The F16 fraction is only 7 bits wide so if the 8th ++ bit of the F32 is a 1 we need to round up by adding 1 to the output ++ fraction. */ ++ if ((input_fraction & FLOAT_FRAC_BIT8) == FLOAT_FRAC_BIT8) ++ /* Round the F16 fraction up by 1 */ ++ output_fraction = output_fraction + 1; ++ } ++ ++ result = sign >> (31 - 15) | output_exp | output_fraction; ++ return result; ++} + + static Float conv_double_to_float( Double src ) + { +@@ -1942,6 +2061,36 @@ static Float negate_float( Float input ) + return -input; + } + ++/* This C-helper takes a vector of two 32-bit floating point values ++ * and returns a vector containing two 16-bit bfloats. ++ input: word0 word1 ++ output 0x0 hword1 0x0 hword3 ++ Called from generated code. ++ */ ++ULong convert_from_floattobf16_helper( ULong src ) { ++ ULong resultHi, resultLo; ++ ++ resultHi = (ULong)conv_float_to_bf16( (UInt)(src >> 32)); ++ resultLo = (ULong)conv_float_to_bf16( (UInt)(src & 0xFFFFFFFF)); ++ return (resultHi << 32) | resultLo; ++ ++} ++ ++/* This C-helper takes a vector of two 16-bit bfloating point values ++ * and returns a vector containing one 32-bit float. ++ input: 0x0 hword1 0x0 hword3 ++ output: word0 word1 ++ */ ++ULong convert_from_bf16tofloat_helper( ULong src ) { ++ ULong result; ++ union convert_t conv; ++ conv.f = conv_bf16_to_float( (UInt)(src >> 32) ); ++ result = (ULong) conv.u32; ++ conv.f = conv_bf16_to_float( (UInt)(src & 0xFFFFFFFF)); ++ result = (result << 32) | (ULong) conv.u32; ++ return result; ++ } ++ + void vsx_matrix_16bit_float_ger_dirty_helper( VexGuestPPC64State* gst, + UInt offset_ACC, + ULong srcA_hi, ULong srcA_lo, +@@ -2002,24 +2151,44 @@ void vsx_matrix_16bit_float_ger_dirty_helper( VexGuestPPC64State* gst, + srcB_word[0][j] = (UInt)((srcB_lo >> (16-16*j)) & mask); + } + ++ /* Note the isa is not consistent in the src naming. Will use the ++ naming src10, src11, src20, src21 used with xvf16ger2 instructions. ++ */ + for( j = 0; j < 4; j++) { + if (((pmsk >> 1) & 0x1) == 0) { + src10 = 0; + src20 = 0; + } else { +- src10 = conv_f16_to_double((ULong)srcA_word[i][0]); +- src20 = conv_f16_to_double((ULong)srcB_word[j][0]); ++ if (( inst == XVF16GER2 ) || ( inst == XVF16GER2PP ) ++ || ( inst == XVF16GER2PN ) || ( inst == XVF16GER2NP ) ++ || ( inst == XVF16GER2NN )) { ++ src10 = conv_f16_to_double((ULong)srcA_word[i][0]); ++ src20 = conv_f16_to_double((ULong)srcB_word[j][0]); ++ } else { ++ /* Input is in bfloat format, result is stored in the ++ "traditional" 64-bit float format. */ ++ src10 = (double)conv_bf16_to_float((ULong)srcA_word[i][0]); ++ src20 = (double)conv_bf16_to_float((ULong)srcB_word[j][0]); ++ } + } + + if ((pmsk & 0x1) == 0) { + src11 = 0; + src21 = 0; + } else { +- src11 = conv_f16_to_double((ULong)srcA_word[i][1]); +- src21 = conv_f16_to_double((ULong)srcB_word[j][1]); ++ if (( inst == XVF16GER2 ) || ( inst == XVF16GER2PP ) ++ || ( inst == XVF16GER2PN ) || ( inst == XVF16GER2NP ) ++ || ( inst == XVF16GER2NN )) { ++ src11 = conv_f16_to_double((ULong)srcA_word[i][1]); ++ src21 = conv_f16_to_double((ULong)srcB_word[j][1]); ++ } else { ++ /* Input is in bfloat format, result is stored in the ++ "traditional" 64-bit float format. */ ++ src11 = (double)conv_bf16_to_float((ULong)srcA_word[i][1]); ++ src21 = (double)conv_bf16_to_float((ULong)srcB_word[j][1]); ++ } + } + +- + prod = src10 * src20; + msum = prod + src11 * src21; + +@@ -2027,26 +2196,26 @@ void vsx_matrix_16bit_float_ger_dirty_helper( VexGuestPPC64State* gst, + /* Note, we do not track the exception handling bits + ox, ux, xx, si, mz, vxsnan and vximz in the FPSCR. */ + +- if ( inst == XVF16GER2 ) ++ if (( inst == XVF16GER2 ) || ( inst == XVBF16GER2 ) ) + result[j] = reinterpret_float_as_int( + conv_double_to_float(msum) ); + +- else if ( inst == XVF16GER2PP ) ++ else if (( inst == XVF16GER2PP ) || (inst == XVBF16GER2PP )) + result[j] = reinterpret_float_as_int( + conv_double_to_float(msum) + + acc_word[j] ); + +- else if ( inst == XVF16GER2PN ) ++ else if (( inst == XVF16GER2PN ) || ( inst == XVBF16GER2PN )) + result[j] = reinterpret_float_as_int( + conv_double_to_float(msum) + + negate_float( acc_word[j] ) ); + +- else if ( inst == XVF16GER2NP ) ++ else if (( inst == XVF16GER2NP ) || ( inst == XVBF16GER2NP )) + result[j] = reinterpret_float_as_int( + conv_double_to_float( negate_double( msum ) ) + + acc_word[j] ); + +- else if ( inst == XVF16GER2NN ) ++ else if (( inst == XVF16GER2NN ) || ( inst == XVBF16GER2NN )) + result[j] = reinterpret_float_as_int( + conv_double_to_float( negate_double( msum ) ) + + negate_float( acc_word[j] ) ); +diff --git a/VEX/priv/guest_ppc_toIR.c b/VEX/priv/guest_ppc_toIR.c +index 354be6b53..20553a539 100644 +--- a/VEX/priv/guest_ppc_toIR.c ++++ b/VEX/priv/guest_ppc_toIR.c +@@ -5688,6 +5688,57 @@ static IRExpr * convert_from_national ( const VexAbiInfo* vbi, IRExpr *src ) { + return mkexpr( result ); + } + ++static IRExpr * vector_convert_floattobf16 ( const VexAbiInfo* vbi, ++ IRExpr *src ) { ++ /* The function takes 128-bit value containing four 32-bit floats and ++ returns a 128-bit value containint four 16-bit bfloats in the lower ++ halfwords. */ ++ ++ IRTemp resultHi = newTemp( Ity_I64); ++ IRTemp resultLo = newTemp( Ity_I64); ++ ++ assign( resultHi, ++ mkIRExprCCall( Ity_I64, 0 /*regparms*/, ++ "vector_convert_floattobf16_helper", ++ fnptr_to_fnentry( vbi, ++ &convert_from_floattobf16_helper ), ++ mkIRExprVec_1( unop( Iop_V128HIto64, src ) ) ) ); ++ ++ assign( resultLo, ++ mkIRExprCCall( Ity_I64, 0 /*regparms*/, ++ "vector_convert_floattobf16_helper", ++ fnptr_to_fnentry( vbi, ++ &convert_from_floattobf16_helper ), ++ mkIRExprVec_1( unop( Iop_V128to64, src ) ) ) ); ++ ++ return binop( Iop_64HLtoV128, mkexpr( resultHi ), mkexpr( resultLo ) ); ++} ++ ++static IRExpr * vector_convert_bf16tofloat ( const VexAbiInfo* vbi, ++ IRExpr *src ) { ++ /* The function takes 128-bit value containing four 16-bit bfloats in ++ the lower halfwords and returns a 128-bit value containint four ++ 32-bit floats. */ ++ IRTemp resultHi = newTemp( Ity_I64); ++ IRTemp resultLo = newTemp( Ity_I64); ++ ++ assign( resultHi, ++ mkIRExprCCall( Ity_I64, 0 /*regparms*/, ++ "vector_convert_bf16tofloat_helper", ++ fnptr_to_fnentry( vbi, ++ &convert_from_bf16tofloat_helper ), ++ mkIRExprVec_1( unop( Iop_V128HIto64, src ) ) ) ); ++ ++ assign( resultLo, ++ mkIRExprCCall( Ity_I64, 0 /*regparms*/, ++ "vector_convert_bf16tofloat_helper", ++ fnptr_to_fnentry( vbi, ++ &convert_from_bf16tofloat_helper ), ++ mkIRExprVec_1( unop( Iop_V128to64, src ) ) ) ); ++ ++ return binop( Iop_64HLtoV128, mkexpr( resultHi ), mkexpr( resultLo ) ); ++} ++ + static IRExpr * popcnt64 ( const VexAbiInfo* vbi, + IRExpr *src ){ + /* The function takes a 64-bit source and counts the number of bits in the +@@ -5936,6 +5987,7 @@ static void vsx_matrix_ger ( const VexAbiInfo* vbi, + case XVI16GER2: + case XVI16GER2S: + case XVF16GER2: ++ case XVBF16GER2: + case XVF32GER: + AT_fx = Ifx_Write; + break; +@@ -5943,6 +5995,10 @@ static void vsx_matrix_ger ( const VexAbiInfo* vbi, + case XVI8GER4PP: + case XVI16GER2PP: + case XVI16GER2SPP: ++ case XVBF16GER2PP: ++ case XVBF16GER2PN: ++ case XVBF16GER2NP: ++ case XVBF16GER2NN: + case XVF16GER2PP: + case XVF16GER2PN: + case XVF16GER2NP: +@@ -23899,6 +23955,24 @@ dis_vxs_misc( UInt prefix, UInt theInstr, const VexAbiInfo* vbi, UInt opc2, + mkexpr( sub_element1 ), + mkexpr( sub_element0 ) ) ) ); + ++ } else if ((inst_select == 16) && !prefix) { ++ IRTemp result = newTemp(Ity_V128); ++ UChar xT_addr = ifieldRegXT ( theInstr ); ++ UChar xB_addr = ifieldRegXB ( theInstr ); ++ /* Convert 16-bit bfloat to 32-bit float, not a prefix inst */ ++ DIP("xvcvbf16sp v%u,v%u\n", xT_addr, xB_addr); ++ assign( result, vector_convert_bf16tofloat( vbi, mkexpr( vB ) ) ); ++ putVSReg( XT, mkexpr( result) ); ++ ++ } else if ((inst_select == 17) && !prefix) { ++ IRTemp result = newTemp(Ity_V128); ++ UChar xT_addr = ifieldRegXT ( theInstr ); ++ UChar xB_addr = ifieldRegXB ( theInstr ); ++ /* Convert 32-bit float to 16-bit bfloat, not a prefix inst */ ++ DIP("xvcvspbf16 v%u,v%u\n", xT_addr, xB_addr); ++ assign( result, vector_convert_floattobf16( vbi, mkexpr( vB ) ) ); ++ putVSReg( XT, mkexpr( result) ); ++ + } else if (inst_select == 23) { + DIP("xxbrd v%u, v%u\n", (UInt)XT, (UInt)XB); + +@@ -34956,6 +35030,41 @@ static Bool dis_vsx_accumulator_prefix ( UInt prefix, UInt theInstr, + getVSReg( rB_addr ), AT, + ( ( inst_prefix << 8 ) | XO ) ); + break; ++ case XVBF16GER2: ++ DIP("xvbf16ger2 %u,r%u, r%u\n", AT, rA_addr, rB_addr); ++ vsx_matrix_ger( vbi, MATRIX_16BIT_FLOAT_GER, ++ getVSReg( rA_addr ), ++ getVSReg( rB_addr ), AT, ++ ( ( inst_prefix << 8 ) | XO ) ); ++ break; ++ case XVBF16GER2PP: ++ DIP("xvbf16ger2pp %u,r%u, r%u\n", AT, rA_addr, rB_addr); ++ vsx_matrix_ger( vbi, MATRIX_16BIT_FLOAT_GER, ++ getVSReg( rA_addr ), ++ getVSReg( rB_addr ), AT, ++ ( ( inst_prefix << 8 ) | XO ) ); ++ break; ++ case XVBF16GER2PN: ++ DIP("xvbf16ger2pn %u,r%u, r%u\n", AT, rA_addr, rB_addr); ++ vsx_matrix_ger( vbi, MATRIX_16BIT_FLOAT_GER, ++ getVSReg( rA_addr ), ++ getVSReg( rB_addr ), AT, ++ ( ( inst_prefix << 8 ) | XO ) ); ++ break; ++ case XVBF16GER2NP: ++ DIP("xvbf16ger2np %u,r%u, r%u\n", AT, rA_addr, rB_addr); ++ vsx_matrix_ger( vbi, MATRIX_16BIT_FLOAT_GER, ++ getVSReg( rA_addr ), ++ getVSReg( rB_addr ), AT, ++ ( ( inst_prefix << 8 ) | XO ) ); ++ break; ++ case XVBF16GER2NN: ++ DIP("xvbf16ger2nn %u,r%u, r%u\n", AT, rA_addr, rB_addr); ++ vsx_matrix_ger( vbi, MATRIX_16BIT_FLOAT_GER, ++ getVSReg( rA_addr ), ++ getVSReg( rB_addr ), AT, ++ ( ( inst_prefix << 8 ) | XO ) ); ++ break; + case XVF32GER: + DIP("xvf32ger %u,r%u, r%u\n", AT, rA_addr, rB_addr); + vsx_matrix_ger( vbi, MATRIX_32BIT_FLOAT_GER, +@@ -35106,6 +35215,61 @@ static Bool dis_vsx_accumulator_prefix ( UInt prefix, UInt theInstr, + AT, + ( (MASKS << 9 ) | ( inst_prefix << 8 ) | XO ) ); + break; ++ case XVBF16GER2: ++ PMSK = IFIELD( prefix, 14, 2); ++ XMSK = IFIELD( prefix, 4, 4); ++ YMSK = IFIELD( prefix, 0, 4); ++ DIP("pmxvbf16ger2 %u,r%u, r%u\n", AT, rA_addr, rB_addr); ++ vsx_matrix_ger( vbi, MATRIX_16BIT_FLOAT_GER, ++ getVSReg( rA_addr ), ++ getVSReg( rB_addr ), ++ AT, ( (MASKS << 9 ) ++ | ( inst_prefix << 8 ) | XO ) ); ++ break; ++ case XVBF16GER2PP: ++ PMSK = IFIELD( prefix, 14, 2); ++ XMSK = IFIELD( prefix, 4, 4); ++ YMSK = IFIELD( prefix, 0, 4); ++ DIP("pmxvbf16ger2pp %u,r%u, r%u\n", AT, rA_addr, rB_addr); ++ vsx_matrix_ger( vbi, MATRIX_16BIT_FLOAT_GER, ++ getVSReg( rA_addr ), ++ getVSReg( rB_addr ), ++ AT, ( (MASKS << 9 ) ++ | ( inst_prefix << 8 ) | XO ) ); ++ break; ++ case XVBF16GER2PN: ++ PMSK = IFIELD( prefix, 14, 2); ++ XMSK = IFIELD( prefix, 4, 4); ++ YMSK = IFIELD( prefix, 0, 4); ++ DIP("pmxvbf16ger2pn %u,r%u, r%u\n", AT, rA_addr, rB_addr); ++ vsx_matrix_ger( vbi, MATRIX_16BIT_FLOAT_GER, ++ getVSReg( rA_addr ), ++ getVSReg( rB_addr ), ++ AT, ( (MASKS << 9 ) ++ | ( inst_prefix << 8 ) | XO ) ); ++ break; ++ case XVBF16GER2NP: ++ PMSK = IFIELD( prefix, 14, 2); ++ XMSK = IFIELD( prefix, 4, 4); ++ YMSK = IFIELD( prefix, 0, 4); ++ DIP("pmxvbf16ger2np %u,r%u, r%u\n", AT, rA_addr, rB_addr); ++ vsx_matrix_ger( vbi, MATRIX_16BIT_FLOAT_GER, ++ getVSReg( rA_addr ), ++ getVSReg( rB_addr ), ++ AT, ( (MASKS << 9 ) ++ | ( inst_prefix << 8 ) | XO ) ); ++ break; ++ case XVBF16GER2NN: ++ PMSK = IFIELD( prefix, 14, 2); ++ XMSK = IFIELD( prefix, 4, 4); ++ YMSK = IFIELD( prefix, 0, 4); ++ DIP("pmxvbf16ger2nn %u,r%u, r%u\n", AT, rA_addr, rB_addr); ++ vsx_matrix_ger( vbi, MATRIX_16BIT_FLOAT_GER, ++ getVSReg( rA_addr ), ++ getVSReg( rB_addr ), ++ AT, ( (MASKS << 9 ) ++ | ( inst_prefix << 8 ) | XO ) ); ++ break; + case XVF16GER2: + PMSK = IFIELD( prefix, 14, 2); + XMSK = IFIELD( prefix, 4, 4); +@@ -36181,6 +36345,11 @@ DisResult disInstr_PPC_WRK ( + (opc2 == XVI4GER8PP) || // xvi4ger8pp + (opc2 == XVI8GER4) || // xvi8ger4 + (opc2 == XVI8GER4PP) || // xvi8ger4pp ++ (opc2 == XVBF16GER2) || // xvbf16ger2 ++ (opc2 == XVBF16GER2PP) || // xvbf16ger2pp ++ (opc2 == XVBF16GER2PN) || // xvbf16ger2pn ++ (opc2 == XVBF16GER2NP) || // xvbf16ger2np ++ (opc2 == XVBF16GER2NN) || // xvbf16ger2nn + (opc2 == XVF16GER2) || // xvf16ger2 + (opc2 == XVF16GER2PP) || // xvf16ger2pp + (opc2 == XVF16GER2PN) || // xvf16ger2pn +commit e09fdaf569b975717465ed8043820d0198d4d47d +Author: Carl Love +Date: Fri Feb 26 16:05:12 2021 -0600 + + PPC64: Reduced-Precision: Missing Integer-based Outer Product Operations + + Add support for: + + pmxvi16ger2 VSX Vector 16-bit Signed Integer GER (rank-2 update), Prefixed + Masked + pmxvi16ger2pp VSX Vector 16-bit Signed Integer GER (rank-2 update) (Positive + multiply, Positive accumulate), Prefixed Masked + pmxvi8ger4spp VSX Vector 8-bit Signed/Unsigned Integer GER (rank-4 update) with + Saturation (Positive multiply, Positive accumulate), Prefixed Masked + xvi16ger2 VSX Vector 16-bit Signed Integer GER (rank-2 update) + xvi16ger2pp VSX Vector 16-bit Signed Integer GER (rank-2 update) (Positive + multiply, Positive accumulate) + xvi8ger4spp VSX Vector 8-bit Signed/Unsigned Integer GER (rank-4 update) with + Saturation (Positive multiply, Positive accumulate) + +diff --git a/VEX/priv/guest_ppc_helpers.c b/VEX/priv/guest_ppc_helpers.c +index 6bcee966d..d8131eb60 100644 +--- a/VEX/priv/guest_ppc_helpers.c ++++ b/VEX/priv/guest_ppc_helpers.c +@@ -1446,16 +1446,16 @@ static UInt exts4( UInt src) + return src & 0xF; /* make sure high order bits are zero */ + } + +-static UInt exts8( UInt src) ++static ULong exts8( UInt src) + { +- /* Input is an 8-bit value. Extend bit 7 to bits [31:8] */ ++ /* Input is an 8-bit value. Extend bit 7 to bits [63:8] */ + if (( src >> 7 ) & 0x1) +- return src | 0xFFFFFF00; /* sign bit is a 1, extend */ ++ return src | 0xFFFFFFFFFFFFFF00ULL; /* sign bit is a 1, extend */ + else + return src & 0xFF; /* make sure high order bits are zero */ + } + +-static UInt extz8( UInt src) ++static ULong extz8( UInt src) + { + /* Input is an 8-bit value. Extend src on the left with zeros. */ + return src & 0xFF; /* make sure high order bits are zero */ +@@ -1662,12 +1662,12 @@ void vsx_matrix_8bit_ger_dirty_helper( VexGuestPPC64State* gst, + ULong srcB_hi, ULong srcB_lo, + UInt masks_inst ) + { +- UInt i, j, mask, sum, inst, acc_entry, prefix_inst; ++ UInt i, j, mask, inst, acc_entry, prefix_inst; + + UInt srcA_bytes[4][4]; /* word, byte */ + UInt srcB_bytes[4][4]; /* word, byte */ + UInt acc_word[4]; +- UInt prod0, prod1, prod2, prod3; ++ ULong prod0, prod1, prod2, prod3, sum; + UInt result[4]; + UInt pmsk = 0; + UInt xmsk = 0; +@@ -1742,10 +1742,13 @@ void vsx_matrix_8bit_ger_dirty_helper( VexGuestPPC64State* gst, + sum = prod0 + prod1 + prod2 + prod3; + + if ( inst == XVI8GER4 ) +- result[j] = sum; ++ result[j] = chop64to32( sum ); + + else if ( inst == XVI8GER4PP ) +- result[j] = sum + acc_word[j]; ++ result[j] = chop64to32( sum + acc_word[j] ); ++ ++ else if ( inst == XVI8GER4SPP ) ++ result[j] = clampS64toS32(sum + acc_word[j]); + + } else { + result[j] = 0; +@@ -1821,7 +1824,7 @@ void vsx_matrix_16bit_ger_dirty_helper( VexGuestPPC64State* gst, + else + prod1 = exts16to64( srcA_word[i][1] ) + * exts16to64( srcB_word[j][1] ); +- /* sum is UInt so the result is choped to 32-bits */ ++ + sum = prod0 + prod1; + + if ( inst == XVI16GER2 ) +@@ -1830,13 +1833,11 @@ void vsx_matrix_16bit_ger_dirty_helper( VexGuestPPC64State* gst, + else if ( inst == XVI16GER2S ) + result[j] = clampS64toS32( sum ); + +- else if ( inst == XVI16GER2PP ) { ++ else if ( inst == XVI16GER2PP ) + result[j] = chop64to32( sum + acc_word[j] ); +- } + +- else if ( inst == XVI16GER2SPP ) { ++ else if ( inst == XVI16GER2SPP ) + result[j] = clampS64toS32( sum + acc_word[j] ); +- } + + } else { + result[j] = 0; +diff --git a/VEX/priv/guest_ppc_toIR.c b/VEX/priv/guest_ppc_toIR.c +index 20553a539..e54f0f389 100644 +--- a/VEX/priv/guest_ppc_toIR.c ++++ b/VEX/priv/guest_ppc_toIR.c +@@ -5993,6 +5993,7 @@ static void vsx_matrix_ger ( const VexAbiInfo* vbi, + break; + case XVI4GER8PP: + case XVI8GER4PP: ++ case XVI8GER4SPP: + case XVI16GER2PP: + case XVI16GER2SPP: + case XVBF16GER2PP: +@@ -34983,6 +34984,12 @@ static Bool dis_vsx_accumulator_prefix ( UInt prefix, UInt theInstr, + getVSReg( rA_addr ), getVSReg( rB_addr ), + AT, ( ( inst_prefix << 8 ) | XO ) ); + break; ++ case XVI8GER4SPP: ++ DIP("xvi8ger4spp %u,r%u, r%u\n", AT, rA_addr, rB_addr); ++ vsx_matrix_ger( vbi, MATRIX_8BIT_INT_GER, ++ getVSReg( rA_addr ), getVSReg( rB_addr ), ++ AT, ( ( inst_prefix << 8 ) | XO ) ); ++ break; + case XVI16GER2S: + DIP("xvi16ger2s %u,r%u, r%u\n", AT, rA_addr, rB_addr); + vsx_matrix_ger( vbi, MATRIX_16BIT_INT_GER, +@@ -34995,6 +35002,19 @@ static Bool dis_vsx_accumulator_prefix ( UInt prefix, UInt theInstr, + getVSReg( rA_addr ), getVSReg( rB_addr ), + AT, ( ( inst_prefix << 8 ) | XO ) ); + break; ++ case XVI16GER2: ++ DIP("xvi16ger2 %u,r%u, r%u\n", AT, rA_addr, rB_addr); ++ vsx_matrix_ger( vbi, MATRIX_16BIT_INT_GER, ++ getVSReg( rA_addr ), getVSReg( rB_addr ), ++ AT, ( ( inst_prefix << 8 ) | XO ) ); ++ break; ++ case XVI16GER2PP: ++ DIP("xvi16ger2pp %u,r%u, r%u\n", AT, rA_addr, rB_addr); ++ vsx_matrix_ger( vbi, MATRIX_16BIT_INT_GER, ++ getVSReg( rA_addr ), getVSReg( rB_addr ), ++ AT, ( ( inst_prefix << 8 ) | XO ) ); ++ break; ++ + case XVF16GER2: + DIP("xvf16ger2 %u,r%u, r%u\n", AT, rA_addr, rB_addr); + vsx_matrix_ger( vbi, MATRIX_16BIT_FLOAT_GER, +@@ -35193,6 +35213,39 @@ static Bool dis_vsx_accumulator_prefix ( UInt prefix, UInt theInstr, + AT, + ( (MASKS << 9 ) | ( inst_prefix << 8 ) | XO ) ); + break; ++ case XVI8GER4SPP: ++ PMSK = IFIELD( prefix, 12, 4); ++ XMSK = IFIELD( prefix, 4, 4); ++ YMSK = IFIELD( prefix, 0, 4); ++ DIP("pmxvi8ger4spp %u,r%u, r%u,%u,%u,%u\n", ++ AT, rA_addr, rB_addr, XMSK, YMSK, PMSK); ++ vsx_matrix_ger( vbi, MATRIX_8BIT_INT_GER, ++ getVSReg( rA_addr ), getVSReg( rB_addr ), ++ AT, ++ ( (MASKS << 9 ) | ( inst_prefix << 8 ) | XO ) ); ++ break; ++ case XVI16GER2: ++ PMSK = IFIELD( prefix, 12, 4); ++ XMSK = IFIELD( prefix, 4, 4); ++ YMSK = IFIELD( prefix, 0, 4); ++ DIP("pmxvi16ger2 %u,r%u, r%u,%u,%u,%u\n", ++ AT, rA_addr, rB_addr, XMSK, YMSK, PMSK); ++ vsx_matrix_ger( vbi, MATRIX_16BIT_INT_GER, ++ getVSReg( rA_addr ), getVSReg( rB_addr ), ++ AT, ++ ( (MASKS << 9 ) | ( inst_prefix << 8 ) | XO ) ); ++ break; ++ case XVI16GER2PP: ++ PMSK = IFIELD( prefix, 12, 4); ++ XMSK = IFIELD( prefix, 4, 4); ++ YMSK = IFIELD( prefix, 0, 4); ++ DIP("pmxvi16ger2pp %u,r%u, r%u,%u,%u,%u\n", ++ AT, rA_addr, rB_addr, XMSK, YMSK, PMSK); ++ vsx_matrix_ger( vbi, MATRIX_16BIT_INT_GER, ++ getVSReg( rA_addr ), getVSReg( rB_addr ), ++ AT, ++ ( (MASKS << 9 ) | ( inst_prefix << 8 ) | XO ) ); ++ break; + case XVI16GER2S: + PMSK = IFIELD( prefix, 14, 2); + XMSK = IFIELD( prefix, 4, 4); +@@ -36345,6 +36398,9 @@ DisResult disInstr_PPC_WRK ( + (opc2 == XVI4GER8PP) || // xvi4ger8pp + (opc2 == XVI8GER4) || // xvi8ger4 + (opc2 == XVI8GER4PP) || // xvi8ger4pp ++ (opc2 == XVI8GER4SPP) || // xvi8ger4spp ++ (opc2 == XVI16GER2) || // xvi16ger2 ++ (opc2 == XVI16GER2PP) || // xvi16ger2pp + (opc2 == XVBF16GER2) || // xvbf16ger2 + (opc2 == XVBF16GER2PP) || // xvbf16ger2pp + (opc2 == XVBF16GER2PN) || // xvbf16ger2pn diff --git a/SOURCES/valgrind-3.17.0-s390-prep.patch b/SOURCES/valgrind-3.17.0-s390-prep.patch new file mode 100644 index 0000000..8f2dbb1 --- /dev/null +++ b/SOURCES/valgrind-3.17.0-s390-prep.patch @@ -0,0 +1,2283 @@ +commit d74a637206ef5532ccd2ccb2e31ee2762f184e60 +Author: Andreas Arnez +Date: Wed Apr 28 18:52:30 2021 +0200 + + Bug 433863 - s390x: Remove memcheck test cases for cs, cds, and csg + + The fix for bug 429864 - "s390x: C++ atomic test_and_set yields + false-positive memcheck diagnostics" changes the memcheck behavior at + various compare-and-swap instructions. The comparison between the old and + expected value now always yields a defined result, even if the input + values are (partially) undefined. However, some existing test cases + explicitly verify that memcheck complains about the use of uninitialised + values here. These test cases are no longer valid. Remove them. + +diff --git a/memcheck/tests/s390x/Makefile.am b/memcheck/tests/s390x/Makefile.am +index 67ae8c293..e4e69eb38 100644 +--- a/memcheck/tests/s390x/Makefile.am ++++ b/memcheck/tests/s390x/Makefile.am +@@ -2,7 +2,7 @@ include $(top_srcdir)/Makefile.tool-tests.am + + dist_noinst_SCRIPTS = filter_stderr + +-INSN_TESTS = cs csg cds cdsg cu21 cu42 ltgjhe ++INSN_TESTS = cdsg cu21 cu42 ltgjhe + + check_PROGRAMS = $(INSN_TESTS) + +@@ -14,7 +14,3 @@ EXTRA_DIST = \ + AM_CFLAGS += @FLAG_M64@ + AM_CXXFLAGS += @FLAG_M64@ + AM_CCASFLAGS += @FLAG_M64@ +- +-cs_CFLAGS = $(AM_CFLAGS) @FLAG_W_NO_UNINITIALIZED@ +-csg_CFLAGS = $(AM_CFLAGS) @FLAG_W_NO_UNINITIALIZED@ +-cds_CFLAGS = $(AM_CFLAGS) @FLAG_W_NO_UNINITIALIZED@ +diff --git a/memcheck/tests/s390x/cds.c b/memcheck/tests/s390x/cds.c +deleted file mode 100644 +index ec5c533e0..000000000 +--- a/memcheck/tests/s390x/cds.c ++++ /dev/null +@@ -1,82 +0,0 @@ +-#include +-#include +- +-typedef struct { +- uint64_t high; +- uint64_t low; +-} quad_word; +- +-void +-test(quad_word op1_init, uint64_t op2_init, quad_word op3_init) +-{ +- int cc; // unused +- quad_word op1 = op1_init; +- uint64_t op2 = op2_init; +- quad_word op3 = op3_init; +- +- __asm__ volatile ( +- "lmg %%r0,%%r1,%1\n\t" +- "lmg %%r2,%%r3,%3\n\t" +- "cds %%r0,%%r2,%2\n\t" // cds 1st,3rd,2nd +- "stmg %%r0,%%r1,%1\n" // store r0,r1 to op1 +- "stmg %%r2,%%r3,%3\n" // store r2,r3 to op3 +- : "=d" (cc), "+QS" (op1), "+QS" (op2), "+QS" (op3) +- : +- : "r0", "r1", "r2", "r3", "cc"); +- +-} +- +-// Return a quad-word that only bits low[32:63] are undefined +-quad_word +-make_undefined(void) +-{ +- quad_word val; +- +- val.high = 0; +- val.low |= 0xFFFFFFFF00000000ull; +- +- return val; +-} +- +-void op1_undefined(void) +-{ +- quad_word op1, op3; +- uint64_t op2; +- +- // op1 undefined +- op1 = make_undefined(); +- op2 = 42; +- op3.high = op3.low = 0xdeadbeefdeadbabeull; +- test(op1, op2, op3); // complaint +-} +- +-void op2_undefined(void) +-{ +- quad_word op1, op3; +- uint64_t op2; +- +- op1.high = op1.low = 42; +- // op2 undefined +- op3.high = op3.low = 0xdeadbeefdeadbabeull; +- test(op1, op2, op3); // complaint +-} +- +-void op3_undefined(void) +-{ +- quad_word op1, op3; +- uint64_t op2; +- +- op1.high = op1.low = 42; +- op2 = 100; +- op3 = make_undefined(); +- test(op1, op2, op3); // no complaint; op3 is just copied around +-} +- +-int main () +-{ +- op1_undefined(); +- op2_undefined(); +- op3_undefined(); +- +- return 0; +-} +diff --git a/memcheck/tests/s390x/cds.stderr.exp b/memcheck/tests/s390x/cds.stderr.exp +deleted file mode 100644 +index e72de94c8..000000000 +--- a/memcheck/tests/s390x/cds.stderr.exp ++++ /dev/null +@@ -1,10 +0,0 @@ +-Conditional jump or move depends on uninitialised value(s) +- at 0x........: test (cds.c:17) +- by 0x........: op1_undefined (cds.c:50) +- by 0x........: main (cds.c:77) +- +-Conditional jump or move depends on uninitialised value(s) +- at 0x........: test (cds.c:17) +- by 0x........: op2_undefined (cds.c:61) +- by 0x........: main (cds.c:78) +- +diff --git a/memcheck/tests/s390x/cds.stdout.exp b/memcheck/tests/s390x/cds.stdout.exp +deleted file mode 100644 +index e69de29bb..000000000 +diff --git a/memcheck/tests/s390x/cds.vgtest b/memcheck/tests/s390x/cds.vgtest +deleted file mode 100644 +index 5195887e2..000000000 +--- a/memcheck/tests/s390x/cds.vgtest ++++ /dev/null +@@ -1,2 +0,0 @@ +-prog: cds +-vgopts: -q +diff --git a/memcheck/tests/s390x/cs.c b/memcheck/tests/s390x/cs.c +deleted file mode 100644 +index 9a298cef9..000000000 +--- a/memcheck/tests/s390x/cs.c ++++ /dev/null +@@ -1,32 +0,0 @@ +-#include +-#include +-#include +- +-void +-test(int32_t op1_init, int32_t op2_init, int32_t op3_init) +-{ +- register int32_t op1 asm("8") = op1_init; +- register int32_t op3 asm("9") = op3_init; +- +- int32_t op2 = op2_init; +- int cc = 1; +- +- __asm__ volatile ( +- "cs 8,9,%1\n\t" +- "ipm %0\n\t" +- "srl %0,28\n\t" +- : "=d" (cc), "+Q" (op2), "+d"(op1), "+d"(op3) +- : +- : "cc"); +-} +- +-int main () +-{ +- int op1, op2, op3; +- +- test(op1, 0x10000000, 0x12345678); // complaint +- test(0x10000000, op2, 0x12345678); // complaint +- test(0x10000000, 0x01000000, op3); // no complaint +- +- return 0; +-} +diff --git a/memcheck/tests/s390x/cs.stderr.exp b/memcheck/tests/s390x/cs.stderr.exp +deleted file mode 100644 +index e45dc99cd..000000000 +--- a/memcheck/tests/s390x/cs.stderr.exp ++++ /dev/null +@@ -1,8 +0,0 @@ +-Conditional jump or move depends on uninitialised value(s) +- at 0x........: test (cs.c:14) +- by 0x........: main (cs.c:27) +- +-Conditional jump or move depends on uninitialised value(s) +- at 0x........: test (cs.c:14) +- by 0x........: main (cs.c:28) +- +diff --git a/memcheck/tests/s390x/cs.stdout.exp b/memcheck/tests/s390x/cs.stdout.exp +deleted file mode 100644 +index e69de29bb..000000000 +diff --git a/memcheck/tests/s390x/cs.vgtest b/memcheck/tests/s390x/cs.vgtest +deleted file mode 100644 +index 323cce80c..000000000 +--- a/memcheck/tests/s390x/cs.vgtest ++++ /dev/null +@@ -1,2 +0,0 @@ +-prog: cs +-vgopts: -q +diff --git a/memcheck/tests/s390x/csg.c b/memcheck/tests/s390x/csg.c +deleted file mode 100644 +index 7f9d8c88e..000000000 +--- a/memcheck/tests/s390x/csg.c ++++ /dev/null +@@ -1,32 +0,0 @@ +-#include +-#include +-#include +- +-void +-test(int64_t op1_init, int64_t op2_init, int64_t op3_init) +-{ +- register int64_t op1 asm("8") = op1_init; +- register int64_t op3 asm("9") = op3_init; +- +- int64_t op2 = op2_init; +- int cc = 1; +- +- __asm__ volatile ( +- "csg 8,9,%1\n\t" +- "ipm %0\n\t" +- "srl %0,28\n\t" +- : "=d" (cc), "+Q" (op2), "+d"(op1), "+d"(op3) +- : +- : "cc"); +-} +- +-int main () +-{ +- int64_t op1, op2, op3; +- +- test(op1, 0x1000000000000000ull, 0x1234567887654321ull); // complaint +- test(0x1000000000000000ull, op2, 0x1234567887654321ull); // complaint +- test(0x1000000000000000ull, 0x1000000000000000ull, op3); // no complaint +- +- return 0; +-} +diff --git a/memcheck/tests/s390x/csg.stderr.exp b/memcheck/tests/s390x/csg.stderr.exp +deleted file mode 100644 +index fda2021ce..000000000 +--- a/memcheck/tests/s390x/csg.stderr.exp ++++ /dev/null +@@ -1,8 +0,0 @@ +-Conditional jump or move depends on uninitialised value(s) +- at 0x........: test (csg.c:14) +- by 0x........: main (csg.c:27) +- +-Conditional jump or move depends on uninitialised value(s) +- at 0x........: test (csg.c:14) +- by 0x........: main (csg.c:28) +- +diff --git a/memcheck/tests/s390x/csg.stdout.exp b/memcheck/tests/s390x/csg.stdout.exp +deleted file mode 100644 +index e69de29bb..000000000 +diff --git a/memcheck/tests/s390x/csg.vgtest b/memcheck/tests/s390x/csg.vgtest +deleted file mode 100644 +index 6de75c1d6..000000000 +--- a/memcheck/tests/s390x/csg.vgtest ++++ /dev/null +@@ -1,2 +0,0 @@ +-prog: csg +-vgopts: -q + +commit 18ddcc47c951427efd3b790ba2481159b9bd1598 +Author: Andreas Arnez +Date: Wed Apr 7 16:48:29 2021 +0200 + + s390x: Support "expensive" comparisons Iop_ExpCmpNE32/64 + + Add support for Iop_ExpCmpNE32 and Iop_ExpCmpNE64 in the s390x instruction + selector. Handle them exactly like the "inexpensive" variants Iop_CmpNE32 + and Iop_CmpNE64. + +diff --git a/VEX/priv/host_s390_isel.c b/VEX/priv/host_s390_isel.c +index 2000ec224..5f79280c0 100644 +--- a/VEX/priv/host_s390_isel.c ++++ b/VEX/priv/host_s390_isel.c +@@ -3611,6 +3611,8 @@ s390_isel_cc(ISelEnv *env, IRExpr *cond) + + case Iop_CmpNE32: + case Iop_CmpNE64: ++ case Iop_ExpCmpNE32: ++ case Iop_ExpCmpNE64: + case Iop_CasCmpNE32: + case Iop_CasCmpNE64: + result = S390_CC_NE; + +commit 5db3f929c43bf46f4707178706cfe90f43acdd19 +Author: Andreas Arnez +Date: Wed Apr 7 12:30:20 2021 +0200 + + s390x: Add convenience function mkV128() + + Provide mkV128() as a short-hand notation for creating a vector constant from + a bit pattern, similar to other such functions like mkU64(). + +diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c +index 339377007..7d54cb551 100644 +--- a/VEX/priv/guest_s390_toIR.c ++++ b/VEX/priv/guest_s390_toIR.c +@@ -376,6 +376,13 @@ mkU64(ULong value) + return IRExpr_Const(IRConst_U64(value)); + } + ++/* Create an expression node for a 128-bit vector constant */ ++static __inline__ IRExpr * ++mkV128(UShort value) ++{ ++ return IRExpr_Const(IRConst_V128(value)); ++} ++ + /* Create an expression node for a 32-bit floating point constant + whose value is given by a bit pattern. */ + static __inline__ IRExpr * +@@ -16249,7 +16256,7 @@ s390_irgen_VLGV(UChar r1, IRTemp op2addr, UChar v3, UChar m4) + static const HChar * + s390_irgen_VGBM(UChar v1, UShort i2, UChar m3 __attribute__((unused))) + { +- put_vr_qw(v1, IRExpr_Const(IRConst_V128(i2))); ++ put_vr_qw(v1, mkV128(i2)); + + return "vgbm"; + } +@@ -18160,11 +18167,11 @@ s390_irgen_VSUM(UChar v1, UChar v2, UChar v3, UChar m4) + switch(type) { + case Ity_I8: + sum = unop(Iop_PwAddL16Ux8, unop(Iop_PwAddL8Ux16, get_vr_qw(v2))); +- mask = IRExpr_Const(IRConst_V128(0b0001000100010001)); ++ mask = mkV128(0b0001000100010001); + break; + case Ity_I16: + sum = unop(Iop_PwAddL16Ux8, get_vr_qw(v2)); +- mask = IRExpr_Const(IRConst_V128(0b0011001100110011)); ++ mask = mkV128(0b0011001100110011); + break; + default: + vpanic("s390_irgen_VSUM: invalid type "); +@@ -18185,11 +18192,11 @@ s390_irgen_VSUMG(UChar v1, UChar v2, UChar v3, UChar m4) + switch(type) { + case Ity_I16: + sum = unop(Iop_PwAddL32Ux4, unop(Iop_PwAddL16Ux8, get_vr_qw(v2))); +- mask = IRExpr_Const(IRConst_V128(0b0000001100000011)); ++ mask = mkV128(0b0000001100000011); + break; + case Ity_I32: + sum = unop(Iop_PwAddL32Ux4, get_vr_qw(v2)); +- mask = IRExpr_Const(IRConst_V128(0b0000111100001111)); ++ mask = mkV128(0b0000111100001111); + break; + default: + vpanic("s390_irgen_VSUMG: invalid type "); +@@ -18210,11 +18217,11 @@ s390_irgen_VSUMQ(UChar v1, UChar v2, UChar v3, UChar m4) + switch(type) { + case Ity_I32: + sum = unop(Iop_PwAddL64Ux2, unop(Iop_PwAddL32Ux4, get_vr_qw(v2))); +- mask = IRExpr_Const(IRConst_V128(0b0000000000001111)); ++ mask = mkV128(0b0000000000001111); + break; + case Ity_I64: + sum = unop(Iop_PwAddL64Ux2, get_vr_qw(v2)); +- mask = IRExpr_Const(IRConst_V128(0b0000000011111111)); ++ mask = mkV128(0b0000000011111111); + break; + default: + vpanic("s390_irgen_VSUMQ: invalid type "); +@@ -18943,8 +18950,8 @@ s390_irgen_VFCx(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5, UChar m6, + assign(cond, binop(Iop_CmpEQ32, mkexpr(result), mkU32(cmp))); + } + put_vr_qw(v1, mkite(mkexpr(cond), +- IRExpr_Const(IRConst_V128(0xffff)), +- IRExpr_Const(IRConst_V128(0)))); ++ mkV128(0xffff), ++ mkV128(0))); + if (s390_vr_is_cs_set(m6)) { + IRTemp cc = newTemp(Ity_I64); + assign(cc, mkite(mkexpr(cond), mkU64(0), mkU64(3))); + +commit e78bd78d3043729033b426218ab8c6dae9c51e96 +Author: Andreas Arnez +Date: Thu Mar 18 18:01:10 2021 +0100 + + Bug 434296 - s390x: Rework IR conversion of VSTRC, VFAE, and VFEE + + The z/Architecture instructions "vector string range compare" (VSTRC), + "vector find any element equal" (VFAE), and "vector find element + equal" (VFEE) are each implemented with a dirty helper that executes the + instruction. Unfortunately this approach leads to memcheck false + positives, because these instructions may yield a defined result even if + parts of the input vectors are undefined. There are multiple ways this + can happen: Wherever the flags in the fourth operand to VSTRC indicate + "match always" or "match never", the corresponding elements in the third + operand don't affect the result. The same is true for the elements + following the first zero-element in the second operand if the ZS flag is + set, or for the elements following the first matching element, if any. + + Re-implement the instructions without dirty helpers and transform into + lengthy IR instead. + +diff --git a/VEX/priv/guest_s390_defs.h b/VEX/priv/guest_s390_defs.h +index 905429015..49b6cd5dd 100644 +--- a/VEX/priv/guest_s390_defs.h ++++ b/VEX/priv/guest_s390_defs.h +@@ -265,11 +265,8 @@ typedef enum { + S390_VEC_OP_INVALID = 0, + S390_VEC_OP_VPKS, + S390_VEC_OP_VPKLS, +- S390_VEC_OP_VFAE, +- S390_VEC_OP_VFEE, + S390_VEC_OP_VFENE, + S390_VEC_OP_VISTR, +- S390_VEC_OP_VSTRC, + S390_VEC_OP_VCEQ, + S390_VEC_OP_VTM, + S390_VEC_OP_VGFM, +diff --git a/VEX/priv/guest_s390_helpers.c b/VEX/priv/guest_s390_helpers.c +index b71b621ae..63d2e8ce5 100644 +--- a/VEX/priv/guest_s390_helpers.c ++++ b/VEX/priv/guest_s390_helpers.c +@@ -2538,11 +2538,8 @@ s390x_dirtyhelper_vec_op(VexGuestS390XState *guest_state, + {0x00, 0x00}, /* invalid */ + [S390_VEC_OP_VPKS] = {0xe7, 0x97}, + [S390_VEC_OP_VPKLS] = {0xe7, 0x95}, +- [S390_VEC_OP_VFAE] = {0xe7, 0x82}, +- [S390_VEC_OP_VFEE] = {0xe7, 0x80}, + [S390_VEC_OP_VFENE] = {0xe7, 0x81}, + [S390_VEC_OP_VISTR] = {0xe7, 0x5c}, +- [S390_VEC_OP_VSTRC] = {0xe7, 0x8a}, + [S390_VEC_OP_VCEQ] = {0xe7, 0xf8}, + [S390_VEC_OP_VTM] = {0xe7, 0xd8}, + [S390_VEC_OP_VGFM] = {0xe7, 0xb4}, +@@ -2630,8 +2627,6 @@ s390x_dirtyhelper_vec_op(VexGuestS390XState *guest_state, + + case S390_VEC_OP_VPKS: + case S390_VEC_OP_VPKLS: +- case S390_VEC_OP_VFAE: +- case S390_VEC_OP_VFEE: + case S390_VEC_OP_VFENE: + case S390_VEC_OP_VCEQ: + case S390_VEC_OP_VGFM: +@@ -2645,7 +2640,6 @@ s390x_dirtyhelper_vec_op(VexGuestS390XState *guest_state, + the_insn.VRR.m5 = d->m5; + break; + +- case S390_VEC_OP_VSTRC: + case S390_VEC_OP_VGFMA: + case S390_VEC_OP_VMAH: + case S390_VEC_OP_VMALH: +diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c +index 7d54cb551..26a947813 100644 +--- a/VEX/priv/guest_s390_toIR.c ++++ b/VEX/priv/guest_s390_toIR.c +@@ -17156,90 +17156,205 @@ s390_irgen_PPNO(UChar r1, UChar r2) + return "ppno"; + } + +-static const HChar * +-s390_irgen_VFAE(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5) +-{ +- IRDirty* d; +- IRTemp cc = newTemp(Ity_I64); ++enum s390_VStrX { ++ s390_VStrX_VSTRC, ++ s390_VStrX_VFAE, ++ s390_VStrX_VFEE ++}; + +- /* Check for specification exception */ +- vassert(m4 < 3); ++#define S390_VEC_OP3(m, op0, op1, op2) \ ++ (m) == 0 ? op0 : (m) == 1 ? op1 : (m) == 2 ? op2 : Iop_INVALID; + +- s390x_vec_op_details_t details = { .serialized = 0ULL }; +- details.op = S390_VEC_OP_VFAE; +- details.v1 = v1; +- details.v2 = v2; +- details.v3 = v3; +- details.m4 = m4; +- details.m5 = m5; +- +- d = unsafeIRDirty_1_N(cc, 0, "s390x_dirtyhelper_vec_op", +- &s390x_dirtyhelper_vec_op, +- mkIRExprVec_2(IRExpr_GSPTR(), +- mkU64(details.serialized))); ++/* Helper function for transforming VSTRC, VFAE, or VFEE. These instructions ++ share much of the same logic. */ ++static void ++s390_irgen_VStrX(UChar v1, UChar v2, UChar v3, UChar v4, UChar m5, ++ UChar m6, enum s390_VStrX which_insn) ++{ ++ IRTemp op2 = newTemp(Ity_V128); ++ IRTemp op3 = newTemp(Ity_V128); ++ IRExpr* tmp; ++ IRExpr* match = NULL; ++ UChar bitwidth = 8 << m5; ++ UChar n_elem = 16 >> m5; ++ IROp sub_op = S390_VEC_OP3(m5, Iop_Sub8x16, Iop_Sub16x8, Iop_Sub32x4); ++ IROp sar_op = S390_VEC_OP3(m5, Iop_SarN8x16, Iop_SarN16x8, Iop_SarN32x4); ++ IROp shl_op = S390_VEC_OP3(m5, Iop_ShlN8x16, Iop_ShlN16x8, Iop_ShlN32x4); ++ IROp dup_op = S390_VEC_OP3(m5, Iop_Dup8x16, Iop_Dup16x8, Iop_Dup32x4); ++ IROp cmpeq_op = S390_VEC_OP3(m5, Iop_CmpEQ8x16, ++ Iop_CmpEQ16x8, Iop_CmpEQ32x4); ++ IROp cmpgt_op = S390_VEC_OP3(m5, Iop_CmpGT8Ux16, ++ Iop_CmpGT16Ux8, Iop_CmpGT32Ux4); ++ IROp getelem_op = S390_VEC_OP3(m5, Iop_GetElem8x16, ++ Iop_GetElem16x8, Iop_GetElem32x4); ++ ++ assign(op2, get_vr_qw(v2)); ++ assign(op3, get_vr_qw(v3)); ++ ++ switch (which_insn) { ++ ++ case s390_VStrX_VSTRC: { ++ IRTemp op4 = newTemp(Ity_V128); ++ assign(op4, get_vr_qw(v4)); ++ ++ /* Mask off insignificant range boundaries from op3, i.e., all those for ++ which the corresponding field in op4 has all or no bits set ("match ++ always" / "match never"). */ ++ IRTemp bounds = newTemp(Ity_V128); ++ tmp = unop(Iop_NotV128, ++ binop(cmpeq_op, mkV128(0), ++ binop(sar_op, ++ binop(sub_op, ++ binop(sar_op, mkexpr(op4), ++ mkU8(bitwidth - 3)), ++ mkV128(-1)), ++ mkU8(1)))); ++ assign(bounds, binop(Iop_AndV128, mkexpr(op3), tmp)); ++ ++ IRTemp flags_eq = newTemp(Ity_V128); ++ IRTemp flags_lt = newTemp(Ity_V128); ++ IRTemp flags_gt = newTemp(Ity_V128); ++ assign(flags_eq, binop(sar_op, mkexpr(op4), mkU8(bitwidth - 1))); ++ assign(flags_lt, binop(sar_op, binop(shl_op, mkexpr(op4), mkU8(1)), ++ mkU8(bitwidth - 1))); ++ assign(flags_gt, binop(sar_op, binop(shl_op, mkexpr(op4), mkU8(2)), ++ mkU8(bitwidth - 1))); ++ ++ for (UChar idx = 0; idx < n_elem; idx += 2) { ++ /* Match according to the even/odd pairs in op3 and op4 at idx */ ++ IRTemp part[2]; ++ ++ for (UChar j = 0; j < 2; j++) { ++ IRTemp a = newTemp(Ity_V128); ++ assign(a, unop(dup_op, ++ binop(getelem_op, mkexpr(bounds), mkU8(idx + j)))); ++ ++ IRExpr* m[] = { ++ binop(cmpeq_op, mkexpr(op2), mkexpr(a)), ++ binop(cmpgt_op, mkexpr(a), mkexpr(op2)), ++ binop(cmpgt_op, mkexpr(op2), mkexpr(a)) ++ }; ++ IRExpr* f[] = { ++ unop(dup_op, binop(getelem_op, mkexpr(flags_eq), mkU8(idx + j))), ++ unop(dup_op, binop(getelem_op, mkexpr(flags_lt), mkU8(idx + j))), ++ unop(dup_op, binop(getelem_op, mkexpr(flags_gt), mkU8(idx + j))) ++ }; ++ part[j] = newTemp(Ity_V128); ++ assign(part[j], binop(Iop_OrV128, ++ binop(Iop_OrV128, ++ binop(Iop_AndV128, f[0], m[0]), ++ binop(Iop_AndV128, f[1], m[1])), ++ binop(Iop_AndV128, f[2], m[2]))); ++ } ++ tmp = binop(Iop_AndV128, mkexpr(part[0]), mkexpr(part[1])); ++ match = idx == 0 ? tmp : binop(Iop_OrV128, match, tmp); ++ } ++ break; ++ } + +- d->nFxState = 3; +- vex_bzero(&d->fxState, sizeof(d->fxState)); +- d->fxState[0].fx = Ifx_Read; +- d->fxState[0].offset = S390X_GUEST_OFFSET(guest_v0) + v2 * sizeof(V128); +- d->fxState[0].size = sizeof(V128); +- d->fxState[1].fx = Ifx_Read; +- d->fxState[1].offset = S390X_GUEST_OFFSET(guest_v0) + v3 * sizeof(V128); +- d->fxState[1].size = sizeof(V128); +- d->fxState[2].fx = Ifx_Write; +- d->fxState[2].offset = S390X_GUEST_OFFSET(guest_v0) + v1 * sizeof(V128); +- d->fxState[2].size = sizeof(V128); ++ case s390_VStrX_VFAE: ++ for (UChar idx = 0; idx < n_elem; idx++) { ++ IRTemp a = newTemp(Ity_V128); ++ assign(a, binop(cmpeq_op, mkexpr(op2), ++ unop(dup_op, ++ binop(getelem_op, mkexpr(op3), mkU8(idx))))); ++ match = idx == 0 ? mkexpr(a) : binop(Iop_OrV128, match, mkexpr(a)); ++ } ++ break; + +- stmt(IRStmt_Dirty(d)); ++ case s390_VStrX_VFEE: ++ match = binop(cmpeq_op, mkexpr(op2), mkexpr(op3)); ++ break; + +- if (s390_vr_is_cs_set(m5)) { +- s390_cc_set(cc); ++ default: ++ vpanic("s390_irgen_VStrX: unknown insn"); + } + +- return "vfae"; +-} ++ /* Invert first intermediate result if requested */ ++ if (m6 & 8) ++ match = unop(Iop_NotV128, match); + +-static const HChar * +-s390_irgen_VFEE(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5) +-{ +- IRDirty* d; +- IRTemp cc = newTemp(Ity_I64); ++ IRTemp inter1 = newTemp(Ity_V128); ++ IRTemp inter2 = newTemp(Ity_V128); ++ IRTemp accu = newTemp(Ity_V128); ++ assign(inter1, match); + +- /* Check for specification exception */ +- vassert(m4 < 3); +- vassert((m5 & 0b1100) == 0); ++ /* Determine second intermediate and accumulated result */ ++ if (s390_vr_is_zs_set(m6)) { ++ assign(inter2, binop(cmpeq_op, mkexpr(op2), mkV128(0))); ++ assign(accu, binop(Iop_OrV128, mkexpr(inter1), mkexpr(inter2))); ++ } else { ++ assign(inter2, mkV128(0)); ++ assign(accu, mkexpr(inter1)); ++ } + +- s390x_vec_op_details_t details = { .serialized = 0ULL }; +- details.op = S390_VEC_OP_VFEE; +- details.v1 = v1; +- details.v2 = v2; +- details.v3 = v3; +- details.m4 = m4; +- details.m5 = m5; ++ IRTemp accu0 = newTemp(Ity_I64); ++ IRTemp is_match0 = newTemp(Ity_I1); ++ IRTemp mismatch_bits = newTemp(Ity_I64); + +- d = unsafeIRDirty_1_N(cc, 0, "s390x_dirtyhelper_vec_op", +- &s390x_dirtyhelper_vec_op, +- mkIRExprVec_2(IRExpr_GSPTR(), +- mkU64(details.serialized))); ++ assign(accu0, unop(Iop_V128HIto64, mkexpr(accu))); ++ assign(is_match0, binop(Iop_ExpCmpNE64, mkexpr(accu0), mkU64(0))); ++ assign(mismatch_bits, unop(Iop_ClzNat64, ++ mkite(mkexpr(is_match0), mkexpr(accu0), ++ unop(Iop_V128to64, mkexpr(accu))))); + +- d->nFxState = 3; +- vex_bzero(&d->fxState, sizeof(d->fxState)); +- d->fxState[0].fx = Ifx_Read; +- d->fxState[0].offset = S390X_GUEST_OFFSET(guest_v0) + v2 * sizeof(V128); +- d->fxState[0].size = sizeof(V128); +- d->fxState[1].fx = Ifx_Read; +- d->fxState[1].offset = S390X_GUEST_OFFSET(guest_v0) + v3 * sizeof(V128); +- d->fxState[1].size = sizeof(V128); +- d->fxState[2].fx = Ifx_Write; +- d->fxState[2].offset = S390X_GUEST_OFFSET(guest_v0) + v1 * sizeof(V128); +- d->fxState[2].size = sizeof(V128); ++ if (m6 & 4) { ++ put_vr_qw(v1, mkexpr(inter1)); ++ } else { ++ /* Determine byte position of first match */ ++ tmp = binop(Iop_Add64, ++ binop(Iop_Shr64, mkexpr(mismatch_bits), mkU8(3)), ++ mkite(mkexpr(is_match0), mkU64(0), mkU64(8))); ++ put_vr_qw(v1, binop(Iop_64HLtoV128, tmp, mkU64(0))); ++ } + +- stmt(IRStmt_Dirty(d)); ++ if (s390_vr_is_cs_set(m6)) { ++ /* Set condition code depending on... ++ zero found ++ n y ++ +------ ++ match n | 3 0 ++ found y | 1 2 */ + +- if (s390_vr_is_cs_set(m5)) { ++ IRTemp cc = newTemp(Ity_I64); ++ ++ tmp = binop(Iop_Shr64, ++ mkite(mkexpr(is_match0), ++ unop(Iop_V128HIto64, mkexpr(inter1)), ++ unop(Iop_V128to64, mkexpr(inter1))), ++ unop(Iop_64to8, ++ binop(Iop_Sub64, mkU64(63), mkexpr(mismatch_bits)))); ++ tmp = binop(Iop_Shl64, tmp, mkU8(1)); ++ if (s390_vr_is_zs_set(m6)) { ++ tmp = binop(Iop_Xor64, tmp, ++ mkite(binop(Iop_ExpCmpNE64, mkU64(0), ++ binop(Iop_Or64, ++ unop(Iop_V128HIto64, mkexpr(inter2)), ++ unop(Iop_V128to64, mkexpr(inter2)))), ++ mkU64(0), ++ mkU64(3))); ++ } else { ++ tmp = binop(Iop_Xor64, tmp, mkU64(3)); ++ } ++ assign(cc, tmp); + s390_cc_set(cc); + } ++ dis_res->hint = Dis_HintVerbose; ++} + ++static const HChar * ++s390_irgen_VFAE(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5) ++{ ++ s390_insn_assert("vfae", m4 <= 2); ++ s390_irgen_VStrX(v1, v2, v3, 255, m4, m5, s390_VStrX_VFAE); ++ return "vfae"; ++} ++ ++static const HChar * ++s390_irgen_VFEE(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5) ++{ ++ s390_insn_assert("vfee", m4 < 3 && m5 == (m5 & 3)); ++ s390_irgen_VStrX(v1, v2, v3, 255, m4, m5, s390_VStrX_VFEE); + return "vfee"; + } + +@@ -17406,47 +17521,8 @@ s390_irgen_VISTR(UChar v1, UChar v2, UChar m3, UChar m5) + static const HChar * + s390_irgen_VSTRC(UChar v1, UChar v2, UChar v3, UChar v4, UChar m5, UChar m6) + { +- IRDirty* d; +- IRTemp cc = newTemp(Ity_I64); +- +- /* Check for specification exception */ +- vassert(m5 < 3); +- +- s390x_vec_op_details_t details = { .serialized = 0ULL }; +- details.op = S390_VEC_OP_VSTRC; +- details.v1 = v1; +- details.v2 = v2; +- details.v3 = v3; +- details.v4 = v4; +- details.m4 = m5; +- details.m5 = m6; +- +- d = unsafeIRDirty_1_N(cc, 0, "s390x_dirtyhelper_vec_op", +- &s390x_dirtyhelper_vec_op, +- mkIRExprVec_2(IRExpr_GSPTR(), +- mkU64(details.serialized))); +- +- d->nFxState = 4; +- vex_bzero(&d->fxState, sizeof(d->fxState)); +- d->fxState[0].fx = Ifx_Read; +- d->fxState[0].offset = S390X_GUEST_OFFSET(guest_v0) + v2 * sizeof(V128); +- d->fxState[0].size = sizeof(V128); +- d->fxState[1].fx = Ifx_Read; +- d->fxState[1].offset = S390X_GUEST_OFFSET(guest_v0) + v3 * sizeof(V128); +- d->fxState[1].size = sizeof(V128); +- d->fxState[2].fx = Ifx_Read; +- d->fxState[2].offset = S390X_GUEST_OFFSET(guest_v0) + v4 * sizeof(V128); +- d->fxState[2].size = sizeof(V128); +- d->fxState[3].fx = Ifx_Write; +- d->fxState[3].offset = S390X_GUEST_OFFSET(guest_v0) + v1 * sizeof(V128); +- d->fxState[3].size = sizeof(V128); +- +- stmt(IRStmt_Dirty(d)); +- +- if (s390_vr_is_cs_set(m6)) { +- s390_cc_set(cc); +- } +- ++ s390_insn_assert("vstrc", m5 <= 2); ++ s390_irgen_VStrX(v1, v2, v3, v4, m5, m6, s390_VStrX_VSTRC); + return "vstrc"; + } + + +commit 4f17a067c4f8245c05611d6e8aa36e8841bab376 +Author: Andreas Arnez +Date: Tue Mar 2 14:12:29 2021 +0100 + + Bug 434296 - s390x: Rework IR conversion of VFENE + + So far the z/Architecture instruction "vector find element not + equal" (VFENE) is transformed to a loop. This can cause spurious + "conditional jump or move depends on uninitialised value(s)" messages by + memcheck. Re-implement without a loop. + +diff --git a/VEX/priv/guest_s390_defs.h b/VEX/priv/guest_s390_defs.h +index 49b6cd5dd..caec3108e 100644 +--- a/VEX/priv/guest_s390_defs.h ++++ b/VEX/priv/guest_s390_defs.h +@@ -265,7 +265,6 @@ typedef enum { + S390_VEC_OP_INVALID = 0, + S390_VEC_OP_VPKS, + S390_VEC_OP_VPKLS, +- S390_VEC_OP_VFENE, + S390_VEC_OP_VISTR, + S390_VEC_OP_VCEQ, + S390_VEC_OP_VTM, +diff --git a/VEX/priv/guest_s390_helpers.c b/VEX/priv/guest_s390_helpers.c +index 63d2e8ce5..2188ce5c1 100644 +--- a/VEX/priv/guest_s390_helpers.c ++++ b/VEX/priv/guest_s390_helpers.c +@@ -2538,7 +2538,6 @@ s390x_dirtyhelper_vec_op(VexGuestS390XState *guest_state, + {0x00, 0x00}, /* invalid */ + [S390_VEC_OP_VPKS] = {0xe7, 0x97}, + [S390_VEC_OP_VPKLS] = {0xe7, 0x95}, +- [S390_VEC_OP_VFENE] = {0xe7, 0x81}, + [S390_VEC_OP_VISTR] = {0xe7, 0x5c}, + [S390_VEC_OP_VCEQ] = {0xe7, 0xf8}, + [S390_VEC_OP_VTM] = {0xe7, 0xd8}, +@@ -2627,7 +2626,6 @@ s390x_dirtyhelper_vec_op(VexGuestS390XState *guest_state, + + case S390_VEC_OP_VPKS: + case S390_VEC_OP_VPKLS: +- case S390_VEC_OP_VFENE: + case S390_VEC_OP_VCEQ: + case S390_VEC_OP_VGFM: + case S390_VEC_OP_VCH: +diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c +index 26a947813..c8dc3ec18 100644 +--- a/VEX/priv/guest_s390_toIR.c ++++ b/VEX/priv/guest_s390_toIR.c +@@ -17361,120 +17361,86 @@ s390_irgen_VFEE(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5) + static const HChar * + s390_irgen_VFENE(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5) + { +- const Bool negateComparison = True; +- const IRType type = s390_vr_get_type(m4); ++ s390_insn_assert("vfene", m4 < 3 && m5 == (m5 & 3)); + +- /* Check for specification exception */ +- vassert(m4 < 3); +- vassert((m5 & 0b1100) == 0); +- +- static const IROp elementGetters[] = { +- Iop_GetElem8x16, Iop_GetElem16x8, Iop_GetElem32x4 ++ static const IROp compare_op[3] = { ++ Iop_CmpEQ8x16, Iop_CmpEQ16x8, Iop_CmpEQ32x4 + }; +- IROp getter = elementGetters[m4]; +- +- static const IROp elementComparators[] = { +- Iop_CmpEQ8, Iop_CmpEQ16, Iop_CmpEQ32 ++ static const IROp abs_op[3] = { ++ Iop_Abs8x16, Iop_Abs16x8, Iop_Abs32x4 + }; +- IROp comparator = elementComparators[m4]; +- +- static const IROp resultConverter[] = {Iop_64to8, Iop_64to16, Iop_64to32}; +- IROp converter = resultConverter[m4]; +- +- IRTemp isZeroElem; +- +- IRTemp counter = newTemp(Ity_I64); +- assign(counter, get_counter_dw0()); +- +- IRTemp arg1 = newTemp(type); +- assign(arg1, binop(getter, get_vr_qw(v2), unop(Iop_64to8, mkexpr(counter)))); +- IRTemp arg2 = newTemp(type); +- assign(arg2, binop(getter, get_vr_qw(v3), unop(Iop_64to8, mkexpr(counter)))); ++ IRTemp op2 = newTemp(Ity_V128); ++ IRTemp op3 = newTemp(Ity_V128); ++ IRTemp op2zero = newTemp(Ity_V128); ++ IRTemp diff = newTemp(Ity_V128); ++ IRTemp diff0 = newTemp(Ity_I64); ++ IRTemp neq0 = newTemp(Ity_I1); ++ IRTemp samebits = newTemp(Ity_I64); ++ IRExpr* tmp; + +- IRTemp isGoodPair = newTemp(Ity_I1); +- if(negateComparison) { +- assign(isGoodPair, unop(Iop_Not1, binop(comparator, mkexpr(arg1), +- mkexpr(arg2)))); +- } else { +- assign(isGoodPair, binop(comparator, mkexpr(arg1), mkexpr(arg2))); +- } ++ assign(op2, get_vr_qw(v2)); ++ assign(op3, get_vr_qw(v3)); + +- if(s390_vr_is_zs_set(m5)) { +- isZeroElem = newTemp(Ity_I1); +- assign(isZeroElem, binop(comparator, mkexpr(arg1), +- unop(converter, mkU64(0)))); ++ tmp = mkV128(0); ++ if (s390_vr_is_zs_set(m5)) { ++ tmp = binop(compare_op[m4], mkexpr(op2), tmp); ++ if (s390_vr_is_cs_set(m5) && v3 != v2) { ++ /* Count leading equal bits in the terminating element too */ ++ tmp = unop(abs_op[m4], tmp); ++ } ++ assign(op2zero, tmp); ++ tmp = mkexpr(op2zero); + } +- +- static const UChar invalidIndices[] = {16, 8, 4}; +- const UChar invalidIndex = invalidIndices[m4]; +- IRTemp endOfVectorIsReached = newTemp(Ity_I1); +- assign(endOfVectorIsReached, binop(Iop_CmpEQ64, mkexpr(counter), +- mkU64(invalidIndex))); +- +- put_counter_dw0(binop(Iop_Add64, mkexpr(counter), mkU64(1))); +- IRExpr* shouldBreak = binop(Iop_Or32, +- unop(Iop_1Uto32, mkexpr(isGoodPair)), +- unop(Iop_1Uto32, mkexpr(endOfVectorIsReached)) +- ); +- if(s390_vr_is_zs_set(m5)) { +- shouldBreak = binop(Iop_Or32, +- shouldBreak, +- unop(Iop_1Uto32, mkexpr(isZeroElem))); +- } +- iterate_if(binop(Iop_CmpEQ32, shouldBreak, mkU32(0))); +- +- IRExpr* foundIndex = binop(Iop_Sub64, get_counter_dw0(), mkU64(1)); +- if(m4 > 0) { +- /* We should return index of byte but we found index of element in +- general case. +- if byte elem (m4 == 0) then indexOfByte = indexOfElement +- if halfword elem (m4 == 1) then indexOfByte = 2 * indexOfElement +- = indexOfElement << 1 +- if word elem (m4 == 2) then indexOfByte = 4 * indexOfElement +- = indexOfElement << 2 +- */ +- foundIndex = binop(Iop_Shl64, foundIndex, mkU8(m4)); ++ if (v3 != v2) { ++ tmp = binop(Iop_XorV128, mkexpr(op2), mkexpr(op3)); ++ if (s390_vr_is_zs_set(m5)) ++ tmp = binop(Iop_OrV128, tmp, mkexpr(op2zero)); + } + +- IRTemp result = newTemp(Ity_I64); +- assign(result, mkite(mkexpr(endOfVectorIsReached), +- mkU64(16), +- foundIndex)); +- put_vr_qw(v1, binop(Iop_64HLtoV128, mkexpr(result), mkU64(0))); ++ assign(diff, tmp); ++ assign(diff0, unop(Iop_V128HIto64, mkexpr(diff))); ++ assign(neq0, binop(Iop_ExpCmpNE64, mkexpr(diff0), mkU64(0))); ++ assign(samebits, unop(Iop_ClzNat64, ++ mkite(mkexpr(neq0), mkexpr(diff0), ++ unop(Iop_V128to64, mkexpr(diff))))); + ++ /* Determine the byte size of the initial equal-elements sequence */ ++ tmp = binop(Iop_Shr64, mkexpr(samebits), mkU8(m4 + 3)); ++ if (m4 != 0) ++ tmp = binop(Iop_Shl64, tmp, mkU8(m4)); ++ tmp = binop(Iop_Add64, tmp, mkite(mkexpr(neq0), mkU64(0), mkU64(8))); ++ put_vr_qw(v1, binop(Iop_64HLtoV128, tmp, mkU64(0))); + + if (s390_vr_is_cs_set(m5)) { +- static const IROp to64Converters[] = {Iop_8Uto64, Iop_16Uto64, Iop_32Uto64}; +- IROp to64Converter = to64Converters[m4]; +- +- IRExpr* arg1IsLessThanArg2 = binop(Iop_CmpLT64U, +- unop(to64Converter, mkexpr(arg1)), +- unop(to64Converter, mkexpr(arg2))); +- +- IRExpr* ccexp = mkite(binop(Iop_CmpEQ32, +- unop(Iop_1Uto32, mkexpr(isGoodPair)), +- mkU32(1)), +- mkite(arg1IsLessThanArg2, mkU64(1), mkU64(2)), +- mkU64(3)); +- +- if(s390_vr_is_zs_set(m5)) { +- IRExpr* arg2IsZero = binop(comparator, mkexpr(arg2), +- unop(converter, mkU64(0))); +- IRExpr* bothArgsAreZero = binop(Iop_And32, +- unop(Iop_1Uto32, mkexpr(isZeroElem)), +- unop(Iop_1Uto32, arg2IsZero)); +- ccexp = mkite(binop(Iop_CmpEQ32, bothArgsAreZero, mkU32(1)), +- mkU64(0), +- ccexp); +- } ++ /* Set condition code like follows -- ++ 0: operands equal up to and including zero element ++ 1: op2 < op3 2: op2 > op3 3: op2 = op3 */ + IRTemp cc = newTemp(Ity_I64); +- assign(cc, ccexp); +- ++ if (v3 == v2) { ++ tmp = mkU64(0); ++ } else { ++ IRTemp shift = newTemp(Ity_I8); ++ IRExpr* op2half = mkite(mkexpr(neq0), ++ unop(Iop_V128HIto64, mkexpr(op2)), ++ unop(Iop_V128to64, mkexpr(op2))); ++ IRExpr* op3half = mkite(mkexpr(neq0), ++ unop(Iop_V128HIto64, mkexpr(op3)), ++ unop(Iop_V128to64, mkexpr(op3))); ++ assign(shift, unop(Iop_64to8, ++ binop(Iop_Sub64, mkU64(63), mkexpr(samebits)))); ++ tmp = binop(Iop_Or64, ++ binop(Iop_Shl64, ++ binop(Iop_And64, mkU64(1), ++ binop(Iop_Shr64, op2half, mkexpr(shift))), ++ mkU8(1)), ++ binop(Iop_And64, mkU64(1), ++ binop(Iop_Shr64, op3half, mkexpr(shift)))); ++ } ++ assign(cc, mkite(binop(Iop_CmpEQ64, mkexpr(samebits), mkU64(64)), ++ mkU64(3), tmp)); + s390_cc_set(cc); + } +- +- +- put_counter_dw0(mkU64(0)); ++ dis_res->hint = Dis_HintVerbose; + return "vfene"; + } + + +commit 9bd78ebd8bb5cd4ebb3f081ceba46836cc485551 +Author: Andreas Arnez +Date: Tue Apr 27 20:13:26 2021 +0200 + + Bug 434296 - s390x: Rework IR conversion of VISTR + + The z/Architecture instruction VISTR is currently transformed to a dirty + helper that executes the instruction. This can cause false positives with + memcheck if the input string contains undefined characters after the + string terminator. Implement without a dirty helper and emulate the + instruction instead. + +diff --git a/VEX/priv/guest_s390_defs.h b/VEX/priv/guest_s390_defs.h +index caec3108e..24f3798c1 100644 +--- a/VEX/priv/guest_s390_defs.h ++++ b/VEX/priv/guest_s390_defs.h +@@ -265,7 +265,6 @@ typedef enum { + S390_VEC_OP_INVALID = 0, + S390_VEC_OP_VPKS, + S390_VEC_OP_VPKLS, +- S390_VEC_OP_VISTR, + S390_VEC_OP_VCEQ, + S390_VEC_OP_VTM, + S390_VEC_OP_VGFM, +diff --git a/VEX/priv/guest_s390_helpers.c b/VEX/priv/guest_s390_helpers.c +index 2188ce5c1..1e04f601a 100644 +--- a/VEX/priv/guest_s390_helpers.c ++++ b/VEX/priv/guest_s390_helpers.c +@@ -2538,7 +2538,6 @@ s390x_dirtyhelper_vec_op(VexGuestS390XState *guest_state, + {0x00, 0x00}, /* invalid */ + [S390_VEC_OP_VPKS] = {0xe7, 0x97}, + [S390_VEC_OP_VPKLS] = {0xe7, 0x95}, +- [S390_VEC_OP_VISTR] = {0xe7, 0x5c}, + [S390_VEC_OP_VCEQ] = {0xe7, 0xf8}, + [S390_VEC_OP_VTM] = {0xe7, 0xd8}, + [S390_VEC_OP_VGFM] = {0xe7, 0xb4}, +@@ -2610,14 +2609,6 @@ s390x_dirtyhelper_vec_op(VexGuestS390XState *guest_state, + the_insn.VRR.op2 = opcodes[d->op][1]; + + switch(d->op) { +- case S390_VEC_OP_VISTR: +- the_insn.VRR.v1 = 1; +- the_insn.VRR.v2 = 2; +- the_insn.VRR.rxb = 0b1100; +- the_insn.VRR.m4 = d->m4; +- the_insn.VRR.m5 = d->m5; +- break; +- + case S390_VEC_OP_VTM: + the_insn.VRR.v1 = 2; + the_insn.VRR.v2 = 3; +diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c +index c8dc3ec18..dfea54259 100644 +--- a/VEX/priv/guest_s390_toIR.c ++++ b/VEX/priv/guest_s390_toIR.c +@@ -17447,40 +17447,34 @@ s390_irgen_VFENE(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5) + static const HChar * + s390_irgen_VISTR(UChar v1, UChar v2, UChar m3, UChar m5) + { +- IRDirty* d; +- IRTemp cc = newTemp(Ity_I64); +- +- /* Check for specification exception */ +- vassert(m3 < 3); +- vassert((m5 & 0b1110) == 0); ++ s390_insn_assert("vistr", m3 < 3 && m5 == (m5 & 1)); + +- s390x_vec_op_details_t details = { .serialized = 0ULL }; +- details.op = S390_VEC_OP_VISTR; +- details.v1 = v1; +- details.v2 = v2; +- details.m4 = m3; +- details.m5 = m5; +- +- d = unsafeIRDirty_1_N(cc, 0, "s390x_dirtyhelper_vec_op", +- &s390x_dirtyhelper_vec_op, +- mkIRExprVec_2(IRExpr_GSPTR(), +- mkU64(details.serialized))); ++ static const IROp compare_op[3] = { ++ Iop_CmpEQ8x16, Iop_CmpEQ16x8, Iop_CmpEQ32x4 ++ }; ++ IRExpr* t; ++ IRTemp op2 = newTemp(Ity_V128); ++ IRTemp op2term = newTemp(Ity_V128); ++ IRTemp mask = newTemp(Ity_V128); + +- d->nFxState = 2; +- vex_bzero(&d->fxState, sizeof(d->fxState)); +- d->fxState[0].fx = Ifx_Read; +- d->fxState[0].offset = S390X_GUEST_OFFSET(guest_v0) + v2 * sizeof(V128); +- d->fxState[0].size = sizeof(V128); +- d->fxState[1].fx = Ifx_Write; +- d->fxState[1].offset = S390X_GUEST_OFFSET(guest_v0) + v1 * sizeof(V128); +- d->fxState[1].size = sizeof(V128); ++ assign(op2, get_vr_qw(v2)); ++ assign(op2term, binop(compare_op[m3], mkexpr(op2), mkV128(0))); ++ t = mkexpr(op2term); + +- stmt(IRStmt_Dirty(d)); ++ for (UChar i = m3; i < 4; i++) { ++ IRTemp s = newTemp(Ity_V128); ++ assign(s, binop(Iop_OrV128, t, binop(Iop_ShrV128, t, mkU8(8 << i)))); ++ t = mkexpr(s); ++ } ++ assign(mask, unop(Iop_NotV128, t)); ++ put_vr_qw(v1, binop(Iop_AndV128, mkexpr(op2), mkexpr(mask))); + + if (s390_vr_is_cs_set(m5)) { ++ IRTemp cc = newTemp(Ity_I64); ++ assign(cc, binop(Iop_And64, mkU64(3), unop(Iop_V128to64, mkexpr(mask)))); + s390_cc_set(cc); + } +- ++ dis_res->hint = Dis_HintVerbose; + return "vistr"; + } + + +commit 32312d588b77c5b5b5a0145bb0cc6f795b447790 +Author: Andreas Arnez +Date: Fri Apr 16 12:44:44 2021 +0200 + + Bug 434296 - s390x: Add memcheck test cases for vector string insns + + Bug 434296 addresses memcheck false positives with the vector string + instructions VISTR, VSTRC, VFAE, VFEE, and VFENE. Add test cases that + verify the fix for that bug. Without the fix, memcheck yields many + complains with these tests, most of which are false positives. + +diff --git a/memcheck/tests/s390x/Makefile.am b/memcheck/tests/s390x/Makefile.am +index e4e69eb38..d183841ef 100644 +--- a/memcheck/tests/s390x/Makefile.am ++++ b/memcheck/tests/s390x/Makefile.am +@@ -2,7 +2,7 @@ include $(top_srcdir)/Makefile.tool-tests.am + + dist_noinst_SCRIPTS = filter_stderr + +-INSN_TESTS = cdsg cu21 cu42 ltgjhe ++INSN_TESTS = cdsg cu21 cu42 ltgjhe vstrc vfae vistr + + check_PROGRAMS = $(INSN_TESTS) + +@@ -14,3 +14,7 @@ EXTRA_DIST = \ + AM_CFLAGS += @FLAG_M64@ + AM_CXXFLAGS += @FLAG_M64@ + AM_CCASFLAGS += @FLAG_M64@ ++ ++vstrc_CFLAGS = $(AM_CFLAGS) -march=z13 ++vfae_CFLAGS = $(AM_CFLAGS) -march=z13 ++vistr_CFLAGS = $(AM_CFLAGS) -march=z13 +diff --git a/memcheck/tests/s390x/vfae.c b/memcheck/tests/s390x/vfae.c +new file mode 100644 +index 000000000..68781e7fb +--- /dev/null ++++ b/memcheck/tests/s390x/vfae.c +@@ -0,0 +1,72 @@ ++#include ++#include ++ ++#define VECTOR __attribute__ ((vector_size (16))) ++ ++typedef char VECTOR char_v; ++ ++volatile char tmp; ++static const char *hex_digit = "0123456789abcdefGHIJKLMNOPQRSTUV"; ++ ++static char_v to_char_vec(const char *str) ++{ ++ char_v v; ++ char buf[17]; ++ int len = strlen(str); ++ ++ memcpy(buf, str, (len && str[len - 1] == '~') ? len - 1 : len + 1); ++ v = *(char_v *) buf; ++ return v; ++} ++ ++#define GENERATE_TEST(mnem) \ ++static void test_ ## mnem ## _char(const char *str, const char *match, \ ++ int expect_res, int expect_cc) \ ++{ \ ++ int cc; \ ++ char_v v1; \ ++ char_v v2 = to_char_vec(str); \ ++ char_v v3 = to_char_vec(match); \ ++ \ ++ __asm__( \ ++ "cr 0,0\n\t" /* Clear CC */ \ ++ #mnem " %[v1],%[v2],%[v3],0,3\n\t" \ ++ "ipm %[cc]\n\t" \ ++ "srl %[cc],28" \ ++ : [v1] "=v" (v1), \ ++ [cc] "=d" (cc) \ ++ : [v2] "v" (v2), \ ++ [v3] "v" (v3) \ ++ : "cc"); \ ++ \ ++ tmp = hex_digit[v1[7] & 0x1f]; \ ++ if (expect_res >= 0 && v1[7] != expect_res) \ ++ printf("result %u != %d\n", v1[7], expect_res); \ ++ \ ++ tmp = hex_digit[cc & 0xf]; \ ++ if (expect_cc >= 0 && cc != expect_cc) \ ++ printf("CC %d != %d\n", cc, expect_cc); \ ++} ++ ++GENERATE_TEST(vfae) ++ ++GENERATE_TEST(vfee) ++ ++GENERATE_TEST(vfene) ++ ++int main() ++{ ++ test_vfae_char("not found", "................", 9, 0); ++ test_vfae_char("xy", "zzzzzzzzyyyyyyyy", 1, 2); ++ test_vfae_char("incomplete~", "xxxxxxxxxxxxxxxx", -1, -1); ++ ++ test_vfee_char("same char here", "..........here", 10, 2); ++ test_vfee_char("and here too ...", "_________t~", 9, 1); ++ test_vfee_char("equality!~", "========!!~", 8, -1); ++ ++ test_vfene_char("strings equal", "strings equal", 13, 0); ++ test_vfene_char(hex_digit, hex_digit, 16, 3); ++ test_vfene_char("undef~", "undefined", -1, -1); ++ test_vfene_char("active~", "actually ok", 3, 1); ++ return 0; ++} +diff --git a/memcheck/tests/s390x/vfae.stderr.exp b/memcheck/tests/s390x/vfae.stderr.exp +new file mode 100644 +index 000000000..8aad3c87f +--- /dev/null ++++ b/memcheck/tests/s390x/vfae.stderr.exp +@@ -0,0 +1,20 @@ ++Use of uninitialised value of size 8 ++ at 0x........: test_vfae_char (vfae.c:51) ++ by 0x........: main (vfae.c:61) ++ ++Use of uninitialised value of size 8 ++ at 0x........: test_vfae_char (vfae.c:51) ++ by 0x........: main (vfae.c:61) ++ ++Use of uninitialised value of size 8 ++ at 0x........: test_vfee_char (vfae.c:53) ++ by 0x........: main (vfae.c:65) ++ ++Use of uninitialised value of size 8 ++ at 0x........: test_vfene_char (vfae.c:55) ++ by 0x........: main (vfae.c:69) ++ ++Use of uninitialised value of size 8 ++ at 0x........: test_vfene_char (vfae.c:55) ++ by 0x........: main (vfae.c:69) ++ +diff --git a/memcheck/tests/s390x/vfae.stdout.exp b/memcheck/tests/s390x/vfae.stdout.exp +new file mode 100644 +index 000000000..e69de29bb +diff --git a/memcheck/tests/s390x/vfae.vgtest b/memcheck/tests/s390x/vfae.vgtest +new file mode 100644 +index 000000000..ae36c22fe +--- /dev/null ++++ b/memcheck/tests/s390x/vfae.vgtest +@@ -0,0 +1,2 @@ ++prog: vfae ++vgopts: -q +diff --git a/memcheck/tests/s390x/vistr.c b/memcheck/tests/s390x/vistr.c +new file mode 100644 +index 000000000..7ed59b94b +--- /dev/null ++++ b/memcheck/tests/s390x/vistr.c +@@ -0,0 +1,76 @@ ++#include ++#include ++ ++#define VECTOR __attribute__ ((vector_size (16))) ++ ++typedef char VECTOR char_v; ++ ++volatile char tmp; ++static const char *hex_digit = "0123456789abcdef"; ++ ++static char_v to_char_vec(const char *str, char_v *maskp) ++{ ++ char buf[17]; ++ char_v v; ++ char_v mask = {0}; ++ ++ for (int i = 0; i < sizeof(buf); i++) { ++ char ch = str[i]; ++ if (ch == '\0') ++ break; ++ else if (ch == '$') { ++ buf[i] = '\0'; ++ mask[i] = -1; ++ } else if (ch != '~') { ++ buf[i] = ch; ++ mask[i] = -1; ++ } ++ } ++ v = *(char_v *) buf; ++ *maskp = mask; ++ return v; ++} ++ ++static void test_vistr_char(const char *str, const char *expect_res, ++ int expect_cc) ++{ ++ int cc, count; ++ char_v v1, mask; ++ char_v v2 = to_char_vec(str, &mask); ++ char_v exp_v1 = to_char_vec(expect_res, &mask); ++ char equal[16]; ++ ++ __asm__( ++ "cr 0,0\n\t" /* Clear CC */ ++ "vistr %[v1],%[v2],0,1\n\t" ++ "ipm %[cc]\n\t" ++ "srl %[cc],28" ++ : [v1] "=v" (v1), ++ [cc] "=d" (cc) ++ : [v2] "v" (v2) ++ : "cc"); ++ ++ *(char_v *) equal = (v1 & mask) == (exp_v1 & mask); ++ if (memchr(equal, 0, sizeof(equal))) ++ printf("Result doesn't match `%s'\n", expect_res); ++ ++ count = 0; ++ for (int i = 0; i < 16; i++) { ++ if (v1[i] == 0) count++; ++ } ++ tmp = hex_digit[count]; ++ ++ tmp = hex_digit[cc & 0xf]; ++ if (expect_cc >= 0 && cc != expect_cc) ++ printf("CC %d != %d\n", cc, expect_cc); ++} ++ ++int main() ++{ ++ test_vistr_char("terminated$====~", "terminated$$$$$$", 0); ++ test_vistr_char("undef~~~~~~~~~~~", "undef", -1); ++ test_vistr_char("undef, 2nd half~", "undef, 2nd half", -1); ++ test_vistr_char("Not. Terminated.", "Not. Terminated.", 3); ++ test_vistr_char("partiallyOK~~$~~", "partiallyOK~~$$$", 0); ++ return 0; ++} +diff --git a/memcheck/tests/s390x/vistr.stderr.exp b/memcheck/tests/s390x/vistr.stderr.exp +new file mode 100644 +index 000000000..e4f35fd74 +--- /dev/null ++++ b/memcheck/tests/s390x/vistr.stderr.exp +@@ -0,0 +1,20 @@ ++Conditional jump or move depends on uninitialised value(s) ++ at 0x........: test_vistr_char (vistr.c:59) ++ by 0x........: main (vistr.c:71) ++ ++Use of uninitialised value of size 8 ++ at 0x........: test_vistr_char (vistr.c:63) ++ by 0x........: main (vistr.c:71) ++ ++Conditional jump or move depends on uninitialised value(s) ++ at 0x........: test_vistr_char (vistr.c:59) ++ by 0x........: main (vistr.c:72) ++ ++Use of uninitialised value of size 8 ++ at 0x........: test_vistr_char (vistr.c:63) ++ by 0x........: main (vistr.c:72) ++ ++Conditional jump or move depends on uninitialised value(s) ++ at 0x........: test_vistr_char (vistr.c:59) ++ by 0x........: main (vistr.c:74) ++ +diff --git a/memcheck/tests/s390x/vistr.vgtest b/memcheck/tests/s390x/vistr.vgtest +new file mode 100644 +index 000000000..f99749d85 +--- /dev/null ++++ b/memcheck/tests/s390x/vistr.vgtest +@@ -0,0 +1,2 @@ ++prog: vistr ++vgopts: -q +diff --git a/memcheck/tests/s390x/vstrc.c b/memcheck/tests/s390x/vstrc.c +new file mode 100644 +index 000000000..268e2f858 +--- /dev/null ++++ b/memcheck/tests/s390x/vstrc.c +@@ -0,0 +1,92 @@ ++#include ++#include ++ ++#define VECTOR __attribute__ ((vector_size (16))) ++ ++typedef char VECTOR char_v; ++ ++struct vstrc_char_rng { ++ unsigned char range[16]; ++ unsigned char flags[16]; ++}; ++ ++#define RNG_FLAG_EQ 0x80 ++#define RNG_FLAG_LT 0x40 ++#define RNG_FLAG_GT 0x20 ++#define RNG_FLAG_ANY 0xe0 ++#define RNG_FLAG_NONE 0x00 ++ ++volatile char tmp; ++static const char *hex_digit = "0123456789abcdefGHIJKLMNOPQRSTUV"; ++ ++static void test_vstrc_char(const char *str, const struct vstrc_char_rng *rng, ++ int expect_res, int expect_cc) ++{ ++ int cc; ++ char_v v1; ++ char_v v2 = *(const char_v *) str; ++ char_v v3 = *(const char_v *) rng->range; ++ char_v v4 = *(const char_v *) rng->flags; ++ ++ __asm__( ++ "cr 0,0\n\t" /* Clear CC */ ++ "vstrc %[v1],%[v2],%[v3],%[v4],0,3\n\t" ++ "ipm %[cc]\n\t" ++ "srl %[cc],28" ++ : [v1] "=v" (v1), ++ [cc] "=d" (cc) ++ : [v2] "v" (v2), ++ [v3] "v" (v3), ++ [v4] "v" (v4) ++ : "cc"); ++ ++ tmp = hex_digit[v1[7] & 0x1f]; ++ if (expect_res >= 0 && v1[7] != expect_res) ++ printf("result %u != %d\n", v1[7], expect_res); ++ ++ tmp = hex_digit[cc & 0xf]; ++ if (expect_cc >= 0 && cc != expect_cc) ++ printf("CC %d != %d\n", cc, expect_cc); ++} ++ ++int main() ++{ ++ struct vstrc_char_rng rng; ++ char buf[16]; ++ ++ memset(rng.flags, RNG_FLAG_NONE, 16); ++ ++ rng.range[4] = 'z'; ++ rng.flags[4] = RNG_FLAG_GT | RNG_FLAG_EQ; ++ rng.flags[5] = RNG_FLAG_ANY; ++ /* OK: match at the 'z' */ ++ test_vstrc_char("find the z", &rng, 9, 2); ++ ++ rng.flags[12] = RNG_FLAG_GT | RNG_FLAG_EQ; ++ rng.flags[13] = RNG_FLAG_LT | RNG_FLAG_EQ; ++ /* Bad: undefined range */ ++ test_vstrc_char("undefined", &rng, -1, -1); ++ ++ rng.range[12] = 'a'; ++ rng.range[13] = 'c'; ++ /* OK: match at the 'a' */ ++ test_vstrc_char("get the abc", &rng, 8, 2); ++ ++ rng.flags[12] = RNG_FLAG_LT; ++ rng.flags[13] = RNG_FLAG_GT; ++ /* OK: no match up to null terminator */ ++ test_vstrc_char("no match", &rng, 8, 0); ++ ++ /* OK: no match, no null terminator */ ++ test_vstrc_char("0123456789abcdef", &rng, 16, 3); ++ ++ buf[0] = 'x'; ++ /* Bad: undefined string */ ++ test_vstrc_char(buf, &rng, -1, -1); ++ ++ buf[1] = 'z'; ++ /* Bad: valid match, but CC undefined */ ++ test_vstrc_char(buf, &rng, 1, -1); ++ ++ return 0; ++} +diff --git a/memcheck/tests/s390x/vstrc.stderr.exp b/memcheck/tests/s390x/vstrc.stderr.exp +new file mode 100644 +index 000000000..c1125bea1 +--- /dev/null ++++ b/memcheck/tests/s390x/vstrc.stderr.exp +@@ -0,0 +1,20 @@ ++Use of uninitialised value of size 8 ++ at 0x........: test_vstrc_char (vstrc.c:43) ++ by 0x........: main (vstrc.c:68) ++ ++Use of uninitialised value of size 8 ++ at 0x........: test_vstrc_char (vstrc.c:47) ++ by 0x........: main (vstrc.c:68) ++ ++Use of uninitialised value of size 8 ++ at 0x........: test_vstrc_char (vstrc.c:43) ++ by 0x........: main (vstrc.c:85) ++ ++Use of uninitialised value of size 8 ++ at 0x........: test_vstrc_char (vstrc.c:47) ++ by 0x........: main (vstrc.c:85) ++ ++Use of uninitialised value of size 8 ++ at 0x........: test_vstrc_char (vstrc.c:47) ++ by 0x........: main (vstrc.c:89) ++ +diff --git a/memcheck/tests/s390x/vstrc.stdout.exp b/memcheck/tests/s390x/vstrc.stdout.exp +new file mode 100644 +index 000000000..e69de29bb +diff --git a/memcheck/tests/s390x/vstrc.vgtest b/memcheck/tests/s390x/vstrc.vgtest +new file mode 100644 +index 000000000..26f5db99b +--- /dev/null ++++ b/memcheck/tests/s390x/vstrc.vgtest +@@ -0,0 +1,2 @@ ++prog: vstrc ++vgopts: -q + +commit a0bb049ace14ab52d386bb1d49a399f39eec4986 +Author: Andreas Arnez +Date: Tue Mar 23 14:55:09 2021 +0100 + + s390x: Improve handling of amodes without base register + + Addressing modes without a base or index register represent constants. + They can occur in some special cases such as shift operations and when + accessing individual vector elements. Perform some minor improvements to + the handling of such amodes. + +diff --git a/VEX/priv/host_s390_defs.c b/VEX/priv/host_s390_defs.c +index 6e0734ae0..2587f81a1 100644 +--- a/VEX/priv/host_s390_defs.c ++++ b/VEX/priv/host_s390_defs.c +@@ -360,7 +360,8 @@ s390_amode_is_sane(const s390_amode *am) + { + switch (am->tag) { + case S390_AMODE_B12: +- return is_virtual_gpr(am->b) && fits_unsigned_12bit(am->d); ++ return (is_virtual_gpr(am->b) || sameHReg(am->b, s390_hreg_gpr(0))) && ++ fits_unsigned_12bit(am->d); + + case S390_AMODE_B20: + return is_virtual_gpr(am->b) && fits_signed_20bit(am->d); +@@ -378,47 +379,31 @@ s390_amode_is_sane(const s390_amode *am) + } + } + ++static Bool ++s390_amode_is_constant(const s390_amode *am) ++{ ++ return am->tag == S390_AMODE_B12 && sameHReg(am->b, s390_hreg_gpr(0)); ++} ++ + + /* Record the register use of an amode */ + static void + s390_amode_get_reg_usage(HRegUsage *u, const s390_amode *am) + { +- switch (am->tag) { +- case S390_AMODE_B12: +- case S390_AMODE_B20: +- addHRegUse(u, HRmRead, am->b); +- return; +- +- case S390_AMODE_BX12: +- case S390_AMODE_BX20: ++ if (!sameHReg(am->b, s390_hreg_gpr(0))) + addHRegUse(u, HRmRead, am->b); ++ if (!sameHReg(am->x, s390_hreg_gpr(0))) + addHRegUse(u, HRmRead, am->x); +- return; +- +- default: +- vpanic("s390_amode_get_reg_usage"); +- } + } + + + static void + s390_amode_map_regs(HRegRemap *m, s390_amode *am) + { +- switch (am->tag) { +- case S390_AMODE_B12: +- case S390_AMODE_B20: +- am->b = lookupHRegRemap(m, am->b); +- return; +- +- case S390_AMODE_BX12: +- case S390_AMODE_BX20: ++ if (!sameHReg(am->b, s390_hreg_gpr(0))) + am->b = lookupHRegRemap(m, am->b); ++ if (!sameHReg(am->x, s390_hreg_gpr(0))) + am->x = lookupHRegRemap(m, am->x); +- return; +- +- default: +- vpanic("s390_amode_map_regs"); +- } + } + + +@@ -653,6 +638,16 @@ directReload_S390(HInstr* i, HReg vreg, Short spill_off) + insn->variant.alu.dst, vreg_opnd); + } + ++ /* v-vgetelem , */ ++ if (insn->tag == S390_INSN_VEC_AMODEOP ++ && insn->variant.vec_amodeop.tag == S390_VEC_GET_ELEM ++ && insn->size == 8 ++ && sameHReg(insn->variant.vec_amodeop.op1, vreg) ++ && s390_amode_is_constant(insn->variant.vec_amodeop.op2)) { ++ vreg_am->d += 8 * insn->variant.vec_amodeop.op2->d; ++ return s390_insn_load(insn->size, insn->variant.vec_amodeop.dst, vreg_am); ++ } ++ + /* v- , */ + if (insn->tag == S390_INSN_UNOP + && insn->variant.unop.src.tag == S390_OPND_REG +diff --git a/VEX/priv/host_s390_isel.c b/VEX/priv/host_s390_isel.c +index 5f79280c0..ceca6836e 100644 +--- a/VEX/priv/host_s390_isel.c ++++ b/VEX/priv/host_s390_isel.c +@@ -312,7 +312,18 @@ s390_isel_amode_wrk(ISelEnv *env, IRExpr *expr, + Bool no_index __attribute__((unused)), + Bool short_displacement) + { +- if (expr->tag == Iex_Binop && expr->Iex.Binop.op == Iop_Add64) { ++ if (expr->tag == Iex_Unop && expr->Iex.Unop.op == Iop_8Uto64 && ++ expr->Iex.Unop.arg->tag == Iex_Const) { ++ UChar value = expr->Iex.Unop.arg->Iex.Const.con->Ico.U8; ++ return s390_amode_b12((Int)value, s390_hreg_gpr(0)); ++ ++ } else if (expr->tag == Iex_Const) { ++ ULong value = expr->Iex.Const.con->Ico.U64; ++ if (ulong_fits_unsigned_12bit(value)) { ++ return s390_amode_b12((Int)value, s390_hreg_gpr(0)); ++ } ++ ++ } else if (expr->tag == Iex_Binop && expr->Iex.Binop.op == Iop_Add64) { + IRExpr *arg1 = expr->Iex.Binop.arg1; + IRExpr *arg2 = expr->Iex.Binop.arg2; + + +commit fd935e238d907d9c523a311ba795077d95ad6912 +Author: Andreas Arnez +Date: Fri Mar 26 19:27:47 2021 +0100 + + s390x: Rework insn "v-vdup" and add "v-vrep" + + So far the only s390x insn for filling a vector with copies of the same + element is "v-vdup" (S390_VEC_DUPLICATE), which replicates the first + element of its vector argument. This is fairly restrictive and can lead + to unnecessarily long code sequences. + + Redefine "v-vdup" to replicate any scalar value instead. And add + "v-vrep" (S390_INSN_VEC_REPLICATE) for replicating any given element of a + vector. Select the latter for suitable expressions like + + Iop_Dup8x16(Iop_GetElem8x16(vector_expr, i)) + + This improves the generated code for some vector string instructions, + where a lot of element replications are performed. + +diff --git a/VEX/priv/host_s390_defs.c b/VEX/priv/host_s390_defs.c +index 2587f81a1..c764d6ef9 100644 +--- a/VEX/priv/host_s390_defs.c ++++ b/VEX/priv/host_s390_defs.c +@@ -670,6 +670,14 @@ directReload_S390(HInstr* i, HReg vreg, Short spill_off) + insn->variant.unop.dst, vreg_opnd); + } + ++ /* v-vrep ,, */ ++ if (insn->tag == S390_INSN_VEC_REPLICATE ++ && sameHReg(insn->variant.vec_replicate.op1, vreg)) { ++ vreg_am->d += insn->size * insn->variant.vec_replicate.idx; ++ return s390_insn_unop(insn->size, S390_VEC_DUPLICATE, ++ insn->variant.vec_replicate.dst, vreg_opnd); ++ } ++ + no_match: + return NULL; + } +@@ -1050,6 +1058,11 @@ s390_insn_get_reg_usage(HRegUsage *u, const s390_insn *insn) + addHRegUse(u, HRmRead, insn->variant.vec_triop.op3); + break; + ++ case S390_INSN_VEC_REPLICATE: ++ addHRegUse(u, HRmWrite, insn->variant.vec_replicate.dst); ++ addHRegUse(u, HRmRead, insn->variant.vec_replicate.op1); ++ break; ++ + default: + vpanic("s390_insn_get_reg_usage"); + } +@@ -1433,6 +1446,14 @@ s390_insn_map_regs(HRegRemap *m, s390_insn *insn) + insn->variant.vec_triop.op3 = + lookupHRegRemap(m, insn->variant.vec_triop.op3); + break; ++ ++ case S390_INSN_VEC_REPLICATE: ++ insn->variant.vec_replicate.dst = ++ lookupHRegRemap(m, insn->variant.vec_replicate.dst); ++ insn->variant.vec_replicate.op1 = ++ lookupHRegRemap(m, insn->variant.vec_replicate.op1); ++ break; ++ + default: + vpanic("s390_insn_map_regs"); + } +@@ -1767,7 +1788,39 @@ emit_VRI_VI(UChar *p, ULong op, UChar v1, UShort i2) + + + static UChar * +-emit_VRX(UChar *p, ULong op, UChar v1, UChar x2, UChar b2, UShort d2) ++emit_VRI_VIM(UChar *p, ULong op, UChar v1, UShort i2, UChar m3) ++{ ++ ULong the_insn = op; ++ ULong rxb = s390_update_rxb(0, 1, &v1); ++ ++ the_insn |= ((ULong)v1) << 36; ++ the_insn |= ((ULong)i2) << 16; ++ the_insn |= ((ULong)m3) << 12; ++ the_insn |= ((ULong)rxb)<< 8; ++ ++ return emit_6bytes(p, the_insn); ++} ++ ++ ++static UChar * ++emit_VRI_VVMM(UChar *p, ULong op, UChar v1, UChar v3, UShort i2, UChar m4) ++{ ++ ULong the_insn = op; ++ ULong rxb = s390_update_rxb(0, 1, &v1); ++ rxb = s390_update_rxb(rxb, 2, &v3); ++ ++ the_insn |= ((ULong)v1) << 36; ++ the_insn |= ((ULong)v3) << 32; ++ the_insn |= ((ULong)i2) << 16; ++ the_insn |= ((ULong)m4) << 12; ++ the_insn |= ((ULong)rxb) << 8; ++ ++ return emit_6bytes(p, the_insn); ++} ++ ++ ++static UChar * ++emit_VRX(UChar *p, ULong op, UChar v1, UChar x2, UChar b2, UShort d2, UChar m3) + { + ULong the_insn = op; + ULong rxb = s390_update_rxb(0, 1, &v1); +@@ -1776,6 +1829,7 @@ emit_VRX(UChar *p, ULong op, UChar v1, UChar x2, UChar b2, UShort d2) + the_insn |= ((ULong)x2) << 32; + the_insn |= ((ULong)b2) << 28; + the_insn |= ((ULong)d2) << 16; ++ the_insn |= ((ULong)m3) << 12; + the_insn |= ((ULong)rxb)<< 8; + + return emit_6bytes(p, the_insn); +@@ -5782,7 +5836,7 @@ s390_emit_VL(UChar *p, UChar v1, UChar x2, UChar b2, UShort d2) + if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM)) + s390_disasm(ENC3(MNM, VR, UDXB), "vl", v1, d2, x2, b2); + +- return emit_VRX(p, 0xE70000000006ULL, v1, x2, b2, d2); ++ return emit_VRX(p, 0xE70000000006ULL, v1, x2, b2, d2, 0); + } + + static UChar * +@@ -5795,13 +5849,23 @@ s390_emit_VLR(UChar *p, UChar v1, UChar v2) + } + + ++static UChar * ++s390_emit_VLREP(UChar *p, UChar v1, UChar x2, UChar b2, UShort d2, UShort m3) ++{ ++ if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM)) ++ s390_disasm(ENC4(MNM, VR, UDXB, UINT), "vlrep", v1, d2, x2, b2, m3); ++ ++ return emit_VRX(p, 0xE70000000005ULL, v1, x2, b2, d2, m3); ++} ++ ++ + static UChar * + s390_emit_VST(UChar *p, UChar v1, UChar x2, UChar b2, UShort d2) + { + if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM)) + s390_disasm(ENC3(MNM, VR, UDXB), "vst", v1, d2, x2, b2); + +- return emit_VRX(p, 0xE7000000000eULL, v1, x2, b2, d2); ++ return emit_VRX(p, 0xE7000000000eULL, v1, x2, b2, d2, 0); + } + + +@@ -5912,15 +5976,24 @@ s390_emit_VPKLS(UChar *p, UChar v1, UChar v2, UChar v3, UChar m4) + + + static UChar * +-s390_emit_VREP(UChar *p, UChar v1, UChar v3, UChar m3) ++s390_emit_VREP(UChar *p, UChar v1, UChar v3, UShort i2, UChar m4) + { + if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM)) +- s390_disasm(ENC5(MNM, VR, VR, UINT, UINT), "vrep", v1, v3, 0, m3); ++ s390_disasm(ENC5(MNM, VR, VR, UINT, UINT), "vrep", v1, v3, i2, m4); + +- return emit_VRR_VVM(p, 0xE7000000004DULL, v1, v3, m3); ++ return emit_VRI_VVMM(p, 0xE7000000004DULL, v1, v3, i2, m4); + } + + ++static UChar * ++s390_emit_VREPI(UChar *p, UChar v1, UShort i2, UChar m3) ++{ ++ if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM)) ++ s390_disasm(ENC4(MNM, VR, UINT, UINT), "vrepi", v1, i2, m3); ++ ++ return emit_VRI_VIM(p, 0xE70000000045ULL, v1, i2, m3); ++} ++ + + static UChar * + s390_emit_VUPH(UChar *p, UChar v1, UChar v3, UChar m3) +@@ -7560,6 +7633,20 @@ s390_insn *s390_insn_vec_triop(UChar size, s390_vec_triop_t tag, HReg dst, + return insn; + } + ++s390_insn *s390_insn_vec_replicate(UChar size, HReg dst, HReg op1, ++ UChar idx) ++{ ++ s390_insn *insn = LibVEX_Alloc_inline(sizeof(s390_insn)); ++ ++ insn->tag = S390_INSN_VEC_REPLICATE; ++ insn->size = size; ++ insn->variant.vec_replicate.dst = dst; ++ insn->variant.vec_replicate.op1 = op1; ++ insn->variant.vec_replicate.idx = idx; ++ ++ return insn; ++} ++ + /*---------------------------------------------------------------*/ + /*--- Debug print ---*/ + /*---------------------------------------------------------------*/ +@@ -8284,6 +8371,13 @@ s390_insn_as_string(const s390_insn *insn) + insn->variant.vec_triop.op3); + break; + ++ case S390_INSN_VEC_REPLICATE: ++ s390_sprintf(buf, "%M %R, %R, %I", "v-vrep", ++ insn->variant.vec_replicate.dst, ++ insn->variant.vec_replicate.op1, ++ insn->variant.vec_replicate.idx); ++ break; ++ + default: goto fail; + } + +@@ -9386,6 +9480,56 @@ s390_negate_emit(UChar *buf, const s390_insn *insn) + } + + ++static UChar * ++s390_vec_duplicate_emit(UChar *buf, const s390_insn *insn) ++{ ++ UChar v1 = hregNumber(insn->variant.unop.dst); ++ s390_opnd_RMI opnd = insn->variant.unop.src; ++ UChar r2; ++ ++ switch (opnd.tag) { ++ case S390_OPND_AMODE: { ++ s390_amode* am = opnd.variant.am; ++ UInt b = hregNumber(am->b); ++ UInt x = hregNumber(am->x); ++ UInt d = am->d; ++ ++ if (fits_unsigned_12bit(d)) { ++ return s390_emit_VLREP(buf, v1, x, b, d, ++ s390_getM_from_size(insn->size)); ++ } ++ buf = s390_emit_load_mem(buf, insn->size, R0, am); ++ r2 = R0; ++ goto duplicate_from_gpr; ++ } ++ ++ case S390_OPND_IMMEDIATE: { ++ ULong val = opnd.variant.imm; ++ ++ if (ulong_fits_signed_16bit(val)) { ++ return s390_emit_VREPI(buf, v1, val, s390_getM_from_size(insn->size)); ++ } ++ buf = s390_emit_load_64imm(buf, R0, val); ++ r2 = R0; ++ goto duplicate_from_gpr; ++ } ++ ++ case S390_OPND_REG: ++ r2 = hregNumber(opnd.variant.reg); ++ ++ duplicate_from_gpr: ++ buf = s390_emit_VLVGP(buf, v1, r2, r2); ++ if (insn->size != 8) { ++ buf = s390_emit_VREP(buf, v1, v1, 8 / insn->size - 1, ++ s390_getM_from_size(insn->size)); ++ } ++ return buf; ++ } ++ ++ vpanic("s390_vec_duplicate_emit"); ++} ++ ++ + static UChar * + s390_insn_unop_emit(UChar *buf, const s390_insn *insn) + { +@@ -9405,12 +9549,7 @@ s390_insn_unop_emit(UChar *buf, const s390_insn *insn) + UShort i2 = insn->variant.unop.src.variant.imm; + return s390_emit_VGBM(buf, v1, i2); + } +- case S390_VEC_DUPLICATE: { +- vassert(insn->variant.unop.src.tag == S390_OPND_REG); +- UChar v1 = hregNumber(insn->variant.unop.dst); +- UChar v2 = hregNumber(insn->variant.unop.src.variant.reg); +- return s390_emit_VREP(buf, v1, v2, s390_getM_from_size(insn->size)); +- } ++ case S390_VEC_DUPLICATE: return s390_vec_duplicate_emit(buf, insn); + case S390_VEC_UNPACKLOWS: { + vassert(insn->variant.unop.src.tag == S390_OPND_REG); + vassert(insn->size < 8); +@@ -11595,6 +11734,16 @@ s390_insn_vec_triop_emit(UChar *buf, const s390_insn *insn) + } + + ++static UChar * ++s390_insn_vec_replicate_emit(UChar *buf, const s390_insn *insn) ++{ ++ UChar v1 = hregNumber(insn->variant.vec_replicate.dst); ++ UChar v2 = hregNumber(insn->variant.vec_replicate.op1); ++ UShort idx = (UShort) insn->variant.vec_replicate.idx; ++ return s390_emit_VREP(buf, v1, v2, idx, s390_getM_from_size(insn->size)); ++} ++ ++ + Int + emit_S390Instr(Bool *is_profinc, UChar *buf, Int nbuf, const s390_insn *insn, + Bool mode64, VexEndness endness_host, +@@ -11791,6 +11940,11 @@ emit_S390Instr(Bool *is_profinc, UChar *buf, Int nbuf, const s390_insn *insn, + case S390_INSN_VEC_TRIOP: + end = s390_insn_vec_triop_emit(buf, insn); + break; ++ ++ case S390_INSN_VEC_REPLICATE: ++ end = s390_insn_vec_replicate_emit(buf, insn); ++ break; ++ + fail: + default: + vpanic("emit_S390Instr"); +diff --git a/VEX/priv/host_s390_defs.h b/VEX/priv/host_s390_defs.h +index 9b69f4d38..063fd3800 100644 +--- a/VEX/priv/host_s390_defs.h ++++ b/VEX/priv/host_s390_defs.h +@@ -166,7 +166,8 @@ typedef enum { + S390_INSN_VEC_AMODEINTOP, + S390_INSN_VEC_UNOP, + S390_INSN_VEC_BINOP, +- S390_INSN_VEC_TRIOP ++ S390_INSN_VEC_TRIOP, ++ S390_INSN_VEC_REPLICATE + } s390_insn_tag; + + +@@ -738,6 +739,11 @@ typedef struct { + HReg op2; /* 128-bit second operand */ + HReg op3; /* 128-bit third operand */ + } vec_triop; ++ struct { ++ HReg dst; /* 128-bit result */ ++ HReg op1; /* 128-bit first operand */ ++ UChar idx; /* index of element to replicate */ ++ } vec_replicate; + } variant; + } s390_insn; + +@@ -853,6 +859,7 @@ s390_insn *s390_insn_vec_binop(UChar size, s390_vec_binop_t, HReg dst, HReg op1, + HReg op2); + s390_insn *s390_insn_vec_triop(UChar size, s390_vec_triop_t, HReg dst, HReg op1, + HReg op2, HReg op3); ++s390_insn *s390_insn_vec_replicate(UChar size, HReg dst, HReg op1, UChar idx); + + const HChar *s390_insn_as_string(const s390_insn *); + +diff --git a/VEX/priv/host_s390_isel.c b/VEX/priv/host_s390_isel.c +index ceca6836e..968122596 100644 +--- a/VEX/priv/host_s390_isel.c ++++ b/VEX/priv/host_s390_isel.c +@@ -3778,12 +3778,12 @@ s390_isel_vec_expr_wrk(ISelEnv *env, IRExpr *expr) + } + /* --------- UNARY OP --------- */ + case Iex_Unop: { +- UChar size_for_int_arg = 0; + HReg dst = INVALID_HREG; + HReg reg1 = INVALID_HREG; + s390_unop_t vec_unop = S390_UNOP_T_INVALID; + s390_vec_binop_t vec_binop = S390_VEC_BINOP_T_INVALID; + IROp op = expr->Iex.Unop.op; ++ IROp arg_op = Iop_INVALID; + IRExpr* arg = expr->Iex.Unop.arg; + switch(op) { + case Iop_NotV128: +@@ -3839,59 +3839,63 @@ s390_isel_vec_expr_wrk(ISelEnv *env, IRExpr *expr) + } + + case Iop_Dup8x16: +- size = size_for_int_arg = 1; +- vec_unop = S390_VEC_DUPLICATE; +- goto Iop_V_int_wrk; ++ size = 1; ++ arg_op = Iop_GetElem8x16; ++ goto Iop_V_dup_wrk; + case Iop_Dup16x8: +- size = size_for_int_arg = 2; +- vec_unop = S390_VEC_DUPLICATE; +- goto Iop_V_int_wrk; ++ size = 2; ++ arg_op = Iop_GetElem16x8; ++ goto Iop_V_dup_wrk; + case Iop_Dup32x4: +- size = size_for_int_arg = 4; +- vec_unop = S390_VEC_DUPLICATE; +- goto Iop_V_int_wrk; ++ size = 4; ++ arg_op = Iop_GetElem32x4; ++ goto Iop_V_dup_wrk; ++ ++ Iop_V_dup_wrk: { ++ dst = newVRegV(env); ++ if (arg->tag == Iex_Binop && arg->Iex.Binop.op == arg_op && ++ arg->Iex.Binop.arg2->tag == Iex_Const) { ++ ULong idx; ++ idx = get_const_value_as_ulong(arg->Iex.Binop.arg2-> Iex.Const.con); ++ reg1 = s390_isel_vec_expr(env, arg->Iex.Binop.arg1); ++ addInstr(env, s390_insn_vec_replicate(size, dst, reg1, (UChar)idx)); ++ } else { ++ s390_opnd_RMI src = s390_isel_int_expr_RMI(env, arg); ++ addInstr(env, s390_insn_unop(size, S390_VEC_DUPLICATE, dst, src)); ++ } ++ return dst; ++ } + + case Iop_Widen8Sto16x8: + size = 1; +- size_for_int_arg = 8; + vec_unop = S390_VEC_UNPACKLOWS; +- goto Iop_V_int_wrk; ++ goto Iop_V_widen_wrk; + case Iop_Widen16Sto32x4: + size = 2; +- size_for_int_arg = 8; + vec_unop = S390_VEC_UNPACKLOWS; +- goto Iop_V_int_wrk; ++ goto Iop_V_widen_wrk; + case Iop_Widen32Sto64x2: + size = 4; +- size_for_int_arg = 8; + vec_unop = S390_VEC_UNPACKLOWS; +- goto Iop_V_int_wrk; ++ goto Iop_V_widen_wrk; + case Iop_Widen8Uto16x8: + size = 1; +- size_for_int_arg = 8; + vec_unop = S390_VEC_UNPACKLOWU; +- goto Iop_V_int_wrk; ++ goto Iop_V_widen_wrk; + case Iop_Widen16Uto32x4: + size = 2; +- size_for_int_arg = 8; + vec_unop = S390_VEC_UNPACKLOWU; +- goto Iop_V_int_wrk; ++ goto Iop_V_widen_wrk; + case Iop_Widen32Uto64x2: + size = 4; +- size_for_int_arg = 8; + vec_unop = S390_VEC_UNPACKLOWU; +- goto Iop_V_int_wrk; +- +- Iop_V_int_wrk: { +- HReg vr1 = vec_generate_zeroes(env); +- s390_amode* amode2 = s390_isel_amode(env, IRExpr_Const(IRConst_U64(0))); +- reg1 = s390_isel_int_expr(env, arg); ++ goto Iop_V_widen_wrk; + ++ Iop_V_widen_wrk: { + vassert(vec_unop != S390_UNOP_T_INVALID); +- addInstr(env, +- s390_insn_vec_amodeintop(size_for_int_arg, S390_VEC_SET_ELEM, +- vr1, amode2, reg1)); +- ++ s390_opnd_RMI src = s390_isel_int_expr_RMI(env, arg); ++ HReg vr1 = newVRegV(env); ++ addInstr(env, s390_insn_unop(8, S390_VEC_DUPLICATE, vr1, src)); + dst = newVRegV(env); + addInstr(env, s390_insn_unop(size, vec_unop, dst, s390_opnd_reg(vr1))); + return dst; + +commit 6c1cb1a0128b00858b973ef9344e12d6ddbaaf57 +Author: Andreas Arnez +Date: Thu Mar 25 18:48:07 2021 +0100 + + s390x: Add support for emitting "vector or with complement" + + In the instruction selector, look out for IR expressions that fit "vector + or with complement (VOC)". Emit when applicable. + + This slighly reduces the generated code sometimes, such as for certain + vector string instructions, where such expressions occur quite frequently. + +diff --git a/VEX/priv/host_s390_defs.c b/VEX/priv/host_s390_defs.c +index c764d6ef9..239d9d299 100644 +--- a/VEX/priv/host_s390_defs.c ++++ b/VEX/priv/host_s390_defs.c +@@ -5907,6 +5907,15 @@ s390_emit_VO(UChar *p, UChar v1, UChar v2, UChar v3) + return emit_VRR_VVV(p, 0xE7000000006aULL, v1, v2, v3); + } + ++static UChar * ++s390_emit_VOC(UChar *p, UChar v1, UChar v2, UChar v3) ++{ ++ if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM)) ++ s390_disasm(ENC4(MNM, VR, VR, VR), "voc", v1, v2, v3); ++ ++ return emit_VRR_VVV(p, 0xE7000000006fULL, v1, v2, v3); ++} ++ + static UChar * + s390_emit_VX(UChar *p, UChar v1, UChar v2, UChar v3) + { +@@ -8312,6 +8321,7 @@ s390_insn_as_string(const s390_insn *insn) + case S390_VEC_PACK_SATURU: op = "v-vpacksaturu"; break; + case S390_VEC_COMPARE_EQUAL: op = "v-vcmpeq"; break; + case S390_VEC_OR: op = "v-vor"; break; ++ case S390_VEC_ORC: op = "v-vorc"; break; + case S390_VEC_XOR: op = "v-vxor"; break; + case S390_VEC_AND: op = "v-vand"; break; + case S390_VEC_MERGEL: op = "v-vmergel"; break; +@@ -11609,6 +11619,8 @@ s390_insn_vec_binop_emit(UChar *buf, const s390_insn *insn) + return s390_emit_VCEQ(buf, v1, v2, v3, s390_getM_from_size(size)); + case S390_VEC_OR: + return s390_emit_VO(buf, v1, v2, v3); ++ case S390_VEC_ORC: ++ return s390_emit_VOC(buf, v1, v2, v3); + case S390_VEC_XOR: + return s390_emit_VX(buf, v1, v2, v3); + case S390_VEC_AND: +diff --git a/VEX/priv/host_s390_defs.h b/VEX/priv/host_s390_defs.h +index 063fd3800..dc116106e 100644 +--- a/VEX/priv/host_s390_defs.h ++++ b/VEX/priv/host_s390_defs.h +@@ -366,6 +366,7 @@ typedef enum { + S390_VEC_PACK_SATURU, + S390_VEC_COMPARE_EQUAL, + S390_VEC_OR, ++ S390_VEC_ORC, + S390_VEC_XOR, + S390_VEC_AND, + S390_VEC_MERGEL, +diff --git a/VEX/priv/host_s390_isel.c b/VEX/priv/host_s390_isel.c +index 968122596..53d76fe8a 100644 +--- a/VEX/priv/host_s390_isel.c ++++ b/VEX/priv/host_s390_isel.c +@@ -4102,6 +4102,15 @@ s390_isel_vec_expr_wrk(ISelEnv *env, IRExpr *expr) + case Iop_OrV128: + size = 16; + vec_binop = S390_VEC_OR; ++ if (arg1->tag == Iex_Unop && arg1->Iex.Unop.op == Iop_NotV128) { ++ IRExpr* orig_arg1 = arg1; ++ arg1 = arg2; ++ arg2 = orig_arg1->Iex.Unop.arg; ++ vec_binop = S390_VEC_ORC; ++ } else if (arg2->tag == Iex_Unop && arg2->Iex.Unop.op == Iop_NotV128) { ++ arg2 = arg2->Iex.Unop.arg; ++ vec_binop = S390_VEC_ORC; ++ } + goto Iop_VV_wrk; + + case Iop_XorV128: + +commit 0bd4263326b2d48f782339a9bbe1a069c7de45c7 +Author: Andreas Arnez +Date: Tue Mar 30 17:45:20 2021 +0200 + + s390x: Fix/optimize Iop_64HLtoV128 + + In s390_vr_fill() in guest_s390_toIR.c, filling a vector with two copies + of a 64-bit value is realized with Iop_64HLtoV128, since there is no such + operator as Iop_Dup64x2. But the two args to Iop_64HLtoV128 use the same + expression, referenced twice. Although this hasn't been seen to cause + real trouble yet, it's problematic and potentially inefficient, so change + it: Assign to a temp and pass that twice instead. + + In the instruction selector, if Iop_64HLtoV128 is found to be used for a + duplication as above, select "v-vdup" instead of "v-vinitfromgprs". This + mimicks the behavior we'd get if there actually was an operator + Iop_Dup64x2. + +diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c +index dfea54259..a73dcfb14 100644 +--- a/VEX/priv/guest_s390_toIR.c ++++ b/VEX/priv/guest_s390_toIR.c +@@ -2299,9 +2299,12 @@ s390_vr_fill(UChar v1, IRExpr *o2) + case Ity_I32: + put_vr_qw(v1, unop(Iop_Dup32x4, o2)); + break; +- case Ity_I64: +- put_vr_qw(v1, binop(Iop_64HLtoV128, o2, o2)); ++ case Ity_I64: { ++ IRTemp val = newTemp(Ity_I64); ++ assign(val, o2); ++ put_vr_qw(v1, binop(Iop_64HLtoV128, mkexpr(val), mkexpr(val))); + break; ++ } + default: + ppIRType(o2type); + vpanic("s390_vr_fill: invalid IRType"); +diff --git a/VEX/priv/host_s390_isel.c b/VEX/priv/host_s390_isel.c +index 53d76fe8a..ee20c6711 100644 +--- a/VEX/priv/host_s390_isel.c ++++ b/VEX/priv/host_s390_isel.c +@@ -4662,12 +4662,16 @@ s390_isel_vec_expr_wrk(ISelEnv *env, IRExpr *expr) + } + + case Iop_64HLtoV128: +- reg1 = s390_isel_int_expr(env, arg1); +- reg2 = s390_isel_int_expr(env, arg2); +- +- addInstr(env, s390_insn_vec_binop(size, S390_VEC_INIT_FROM_GPRS, +- dst, reg1, reg2)); +- ++ if (arg1->tag == Iex_RdTmp && arg2->tag == Iex_RdTmp && ++ arg1->Iex.RdTmp.tmp == arg2->Iex.RdTmp.tmp) { ++ s390_opnd_RMI src = s390_isel_int_expr_RMI(env, arg1); ++ addInstr(env, s390_insn_unop(8, S390_VEC_DUPLICATE, dst, src)); ++ } else { ++ reg1 = s390_isel_int_expr(env, arg1); ++ reg2 = s390_isel_int_expr(env, arg2); ++ addInstr(env, s390_insn_vec_binop(size, S390_VEC_INIT_FROM_GPRS, ++ dst, reg1, reg2)); ++ } + return dst; + + default: + +commit cae5062b05b95e0303b1122a0ea9aadc197e4f0a +Author: Andreas Arnez +Date: Fri May 7 18:13:03 2021 +0200 + + s390x: Add missing stdout.exp for vector string memcheck test + + The file vistr.stdout.exp was missing from commit 32312d588. Add it. + +diff --git a/memcheck/tests/s390x/vistr.stdout.exp b/memcheck/tests/s390x/vistr.stdout.exp +new file mode 100644 +index 000000000..e69de29bb diff --git a/SOURCES/valgrind-3.17.0-s390-z13-vec-fix.patch b/SOURCES/valgrind-3.17.0-s390-z13-vec-fix.patch new file mode 100644 index 0000000..959e5f8 --- /dev/null +++ b/SOURCES/valgrind-3.17.0-s390-z13-vec-fix.patch @@ -0,0 +1,46 @@ +commit 124ae6cfa303f0cc71ffd685620cb57c4f8f02bb +Author: Andreas Arnez +Date: Mon Jun 7 14:01:53 2021 +0200 + + s390x: Don't emit "vector or with complement" on z13 + + The z/Architecture instruction "vector or with complement" (VOC) can be + used as an optimization to combine "vector or" with "vector nor". This is + exploited in Valgrind since commit 6c1cb1a0128b00858b973e. However, VOC + requires the vector-enhancements facility 1, which is not installed on a + z13 CPU. Thus Valgrind can now run into SIGILL on z13 when trying to + execute vector string instructions. + + Fix this by suppressing the VOC optimization unless the + vector-enhancements facility 1 is recognized on the host. + +diff --git a/VEX/priv/host_s390_isel.c b/VEX/priv/host_s390_isel.c +index ee20c6711..15ca92a6b 100644 +--- a/VEX/priv/host_s390_isel.c ++++ b/VEX/priv/host_s390_isel.c +@@ -4102,14 +4102,17 @@ s390_isel_vec_expr_wrk(ISelEnv *env, IRExpr *expr) + case Iop_OrV128: + size = 16; + vec_binop = S390_VEC_OR; +- if (arg1->tag == Iex_Unop && arg1->Iex.Unop.op == Iop_NotV128) { +- IRExpr* orig_arg1 = arg1; +- arg1 = arg2; +- arg2 = orig_arg1->Iex.Unop.arg; +- vec_binop = S390_VEC_ORC; +- } else if (arg2->tag == Iex_Unop && arg2->Iex.Unop.op == Iop_NotV128) { +- arg2 = arg2->Iex.Unop.arg; +- vec_binop = S390_VEC_ORC; ++ if (s390_host_has_vxe) { ++ if (arg1->tag == Iex_Unop && arg1->Iex.Unop.op == Iop_NotV128) { ++ IRExpr* orig_arg1 = arg1; ++ arg1 = arg2; ++ arg2 = orig_arg1->Iex.Unop.arg; ++ vec_binop = S390_VEC_ORC; ++ } else if (arg2->tag == Iex_Unop && ++ arg2->Iex.Unop.op == Iop_NotV128) { ++ arg2 = arg2->Iex.Unop.arg; ++ vec_binop = S390_VEC_ORC; ++ } + } + goto Iop_VV_wrk; + diff --git a/SOURCES/valgrind-3.17.0-s390-z15.patch b/SOURCES/valgrind-3.17.0-s390-z15.patch new file mode 100644 index 0000000..2ec3c2f --- /dev/null +++ b/SOURCES/valgrind-3.17.0-s390-z15.patch @@ -0,0 +1,2413 @@ +From 3fbde55a5696c9273084ee2c44daca752e407597 Mon Sep 17 00:00:00 2001 +From: Andreas Arnez +Date: Tue, 26 Jan 2021 15:06:47 +0100 +Subject: [PATCH 01/13] s390x: Misc-insn-3, bitwise logical 3-way instructions + +Add support for the instructions NCRK, NCGRK, NNRK, NNGRK, NORK, NOGRK, +NXRK, NXGRK, OCRK, and OCGRK. Introduce a common helper and use it for +the existing instructions NRK, NGRK, XRK, XGRK, ORK, and OGRK as well. +--- + VEX/priv/guest_s390_toIR.c | 154 ++++++++++++++++++++++++++----------- + 1 file changed, 109 insertions(+), 45 deletions(-) + +diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c +index a73dcfb14..f8afd5b96 100644 +--- a/VEX/priv/guest_s390_toIR.c ++++ b/VEX/priv/guest_s390_toIR.c +@@ -5022,8 +5022,12 @@ s390_irgen_NGR(UChar r1, UChar r2) + return "ngr"; + } + ++/* Helper for bitwise logical instructions with two 32-bit input operands and a ++ 32-bit output operand. `inv3' and `inv' indicate whether to invert (build ++ bitwise complement of) operand 3 or the result, respectively. */ + static const HChar * +-s390_irgen_NRK(UChar r3, UChar r1, UChar r2) ++s390_irgen_logicalK32(UChar r3, UChar r1, UChar r2, ++ const HChar *mnem, IROp op, Bool inv3, Bool inv) + { + IRTemp op2 = newTemp(Ity_I32); + IRTemp op3 = newTemp(Ity_I32); +@@ -5031,15 +5035,19 @@ s390_irgen_NRK(UChar r3, UChar r1, UChar r2) + + assign(op2, get_gpr_w1(r2)); + assign(op3, get_gpr_w1(r3)); +- assign(result, binop(Iop_And32, mkexpr(op2), mkexpr(op3))); ++ IRExpr* tmp = binop(op, mkexpr(op2), ++ inv3 ? unop(Iop_Not32, mkexpr(op3)) : mkexpr(op3)); ++ assign(result, inv ? unop(Iop_Not32, tmp) : tmp); + s390_cc_thunk_putZ(S390_CC_OP_BITWISE, result); + put_gpr_w1(r1, mkexpr(result)); + +- return "nrk"; ++ return mnem; + } + ++/* Same as s390_irgen_logicalK32, but for 64-bit operands. */ + static const HChar * +-s390_irgen_NGRK(UChar r3, UChar r1, UChar r2) ++s390_irgen_logicalK64(UChar r3, UChar r1, UChar r2, ++ const HChar *mnem, IROp op, Bool inv3, Bool inv) + { + IRTemp op2 = newTemp(Ity_I64); + IRTemp op3 = newTemp(Ity_I64); +@@ -5047,11 +5055,49 @@ s390_irgen_NGRK(UChar r3, UChar r1, UChar r2) + + assign(op2, get_gpr_dw0(r2)); + assign(op3, get_gpr_dw0(r3)); +- assign(result, binop(Iop_And64, mkexpr(op2), mkexpr(op3))); ++ IRExpr* tmp = binop(op, mkexpr(op2), ++ inv3 ? unop(Iop_Not64, mkexpr(op3)) : mkexpr(op3)); ++ assign(result, inv ? unop(Iop_Not64, tmp) : tmp); + s390_cc_thunk_putZ(S390_CC_OP_BITWISE, result); + put_gpr_dw0(r1, mkexpr(result)); + +- return "ngrk"; ++ return mnem; ++} ++ ++static const HChar * ++s390_irgen_NRK(UChar r3, UChar r1, UChar r2) ++{ ++ return s390_irgen_logicalK32(r3, r1, r2, "nrk", Iop_And32, False, False); ++} ++ ++static const HChar * ++s390_irgen_NGRK(UChar r3, UChar r1, UChar r2) ++{ ++ return s390_irgen_logicalK64(r3, r1, r2, "ngrk", Iop_And64, False, False); ++} ++ ++static const HChar * ++s390_irgen_NCRK(UChar r3, UChar r1, UChar r2) ++{ ++ return s390_irgen_logicalK32(r3, r1, r2, "ncrk", Iop_And32, True, False); ++} ++ ++static const HChar * ++s390_irgen_NCGRK(UChar r3, UChar r1, UChar r2) ++{ ++ return s390_irgen_logicalK64(r3, r1, r2, "ncgrk", Iop_And64, True, False); ++} ++ ++static const HChar * ++s390_irgen_NNRK(UChar r3, UChar r1, UChar r2) ++{ ++ return s390_irgen_logicalK32(r3, r1, r2, "nnrk", Iop_And32, False, True); ++} ++ ++static const HChar * ++s390_irgen_NNGRK(UChar r3, UChar r1, UChar r2) ++{ ++ return s390_irgen_logicalK64(r3, r1, r2, "nngrk", Iop_And64, False, True); + } + + static const HChar * +@@ -7071,33 +7117,25 @@ s390_irgen_XGR(UChar r1, UChar r2) + static const HChar * + s390_irgen_XRK(UChar r3, UChar r1, UChar r2) + { +- IRTemp op2 = newTemp(Ity_I32); +- IRTemp op3 = newTemp(Ity_I32); +- IRTemp result = newTemp(Ity_I32); +- +- assign(op2, get_gpr_w1(r2)); +- assign(op3, get_gpr_w1(r3)); +- assign(result, binop(Iop_Xor32, mkexpr(op2), mkexpr(op3))); +- s390_cc_thunk_putZ(S390_CC_OP_BITWISE, result); +- put_gpr_w1(r1, mkexpr(result)); +- +- return "xrk"; ++ return s390_irgen_logicalK32(r3, r1, r2, "xrk", Iop_Xor32, False, False); + } + + static const HChar * + s390_irgen_XGRK(UChar r3, UChar r1, UChar r2) + { +- IRTemp op2 = newTemp(Ity_I64); +- IRTemp op3 = newTemp(Ity_I64); +- IRTemp result = newTemp(Ity_I64); ++ return s390_irgen_logicalK64(r3, r1, r2, "xgrk", Iop_Xor64, False, False); ++} + +- assign(op2, get_gpr_dw0(r2)); +- assign(op3, get_gpr_dw0(r3)); +- assign(result, binop(Iop_Xor64, mkexpr(op2), mkexpr(op3))); +- s390_cc_thunk_putZ(S390_CC_OP_BITWISE, result); +- put_gpr_dw0(r1, mkexpr(result)); ++static const HChar * ++s390_irgen_NXRK(UChar r3, UChar r1, UChar r2) ++{ ++ return s390_irgen_logicalK32(r3, r1, r2, "nxrk", Iop_Xor32, False, True); ++} + +- return "xgrk"; ++static const HChar * ++s390_irgen_NXGRK(UChar r3, UChar r1, UChar r2) ++{ ++ return s390_irgen_logicalK64(r3, r1, r2, "nxgrk", Iop_Xor64, False, True); + } + + static const HChar * +@@ -8920,33 +8958,37 @@ s390_irgen_OGR(UChar r1, UChar r2) + static const HChar * + s390_irgen_ORK(UChar r3, UChar r1, UChar r2) + { +- IRTemp op2 = newTemp(Ity_I32); +- IRTemp op3 = newTemp(Ity_I32); +- IRTemp result = newTemp(Ity_I32); ++ return s390_irgen_logicalK32(r3, r1, r2, "ork", Iop_Or32, False, False); ++} + +- assign(op2, get_gpr_w1(r2)); +- assign(op3, get_gpr_w1(r3)); +- assign(result, binop(Iop_Or32, mkexpr(op2), mkexpr(op3))); +- s390_cc_thunk_putZ(S390_CC_OP_BITWISE, result); +- put_gpr_w1(r1, mkexpr(result)); ++static const HChar * ++s390_irgen_OGRK(UChar r3, UChar r1, UChar r2) ++{ ++ return s390_irgen_logicalK64(r3, r1, r2, "ogrk", Iop_Or64, False, False); ++} + +- return "ork"; ++static const HChar * ++s390_irgen_OCRK(UChar r3, UChar r1, UChar r2) ++{ ++ return s390_irgen_logicalK32(r3, r1, r2, "ocrk", Iop_Or32, True, False); + } + + static const HChar * +-s390_irgen_OGRK(UChar r3, UChar r1, UChar r2) ++s390_irgen_OCGRK(UChar r3, UChar r1, UChar r2) + { +- IRTemp op2 = newTemp(Ity_I64); +- IRTemp op3 = newTemp(Ity_I64); +- IRTemp result = newTemp(Ity_I64); ++ return s390_irgen_logicalK64(r3, r1, r2, "ocgrk", Iop_Or64, True, False); ++} + +- assign(op2, get_gpr_dw0(r2)); +- assign(op3, get_gpr_dw0(r3)); +- assign(result, binop(Iop_Or64, mkexpr(op2), mkexpr(op3))); +- s390_cc_thunk_putZ(S390_CC_OP_BITWISE, result); +- put_gpr_dw0(r1, mkexpr(result)); ++static const HChar * ++s390_irgen_NORK(UChar r3, UChar r1, UChar r2) ++{ ++ return s390_irgen_logicalK32(r3, r1, r2, "nork", Iop_Or32, False, True); ++} + +- return "ogrk"; ++static const HChar * ++s390_irgen_NOGRK(UChar r3, UChar r1, UChar r2) ++{ ++ return s390_irgen_logicalK64(r3, r1, r2, "nogrk", Iop_Or64, False, True); + } + + static const HChar * +@@ -20031,12 +20073,28 @@ s390_decode_4byte_and_irgen(const UChar *bytes) + case 0xb961: s390_format_RRF_U0RR(s390_irgen_CLGRT, RRF2_m3(ovl), + RRF2_r1(ovl), RRF2_r2(ovl), + S390_XMNM_CAB); goto ok; ++ case 0xb964: s390_format_RRF_R0RR2(s390_irgen_NNGRK, RRF4_r3(ovl), ++ RRF4_r1(ovl), RRF4_r2(ovl)); goto ok; ++ case 0xb965: s390_format_RRF_R0RR2(s390_irgen_OCGRK, RRF4_r3(ovl), ++ RRF4_r1(ovl), RRF4_r2(ovl)); goto ok; ++ case 0xb966: s390_format_RRF_R0RR2(s390_irgen_NOGRK, RRF4_r3(ovl), ++ RRF4_r1(ovl), RRF4_r2(ovl)); goto ok; ++ case 0xb967: s390_format_RRF_R0RR2(s390_irgen_NXGRK, RRF4_r3(ovl), ++ RRF4_r1(ovl), RRF4_r2(ovl)); goto ok; + case 0xb972: s390_format_RRF_U0RR(s390_irgen_CRT, RRF2_m3(ovl), + RRF2_r1(ovl), RRF2_r2(ovl), + S390_XMNM_CAB); goto ok; + case 0xb973: s390_format_RRF_U0RR(s390_irgen_CLRT, RRF2_m3(ovl), + RRF2_r1(ovl), RRF2_r2(ovl), + S390_XMNM_CAB); goto ok; ++ case 0xb974: s390_format_RRF_R0RR2(s390_irgen_NNRK, RRF4_r3(ovl), ++ RRF4_r1(ovl), RRF4_r2(ovl)); goto ok; ++ case 0xb975: s390_format_RRF_R0RR2(s390_irgen_OCRK, RRF4_r3(ovl), ++ RRF4_r1(ovl), RRF4_r2(ovl)); goto ok; ++ case 0xb976: s390_format_RRF_R0RR2(s390_irgen_NORK, RRF4_r3(ovl), ++ RRF4_r1(ovl), RRF4_r2(ovl)); goto ok; ++ case 0xb977: s390_format_RRF_R0RR2(s390_irgen_NXRK, RRF4_r3(ovl), ++ RRF4_r1(ovl), RRF4_r2(ovl)); goto ok; + case 0xb980: s390_format_RRE_RR(s390_irgen_NGR, RRE_r1(ovl), + RRE_r2(ovl)); goto ok; + case 0xb981: s390_format_RRE_RR(s390_irgen_OGR, RRE_r1(ovl), +@@ -20148,6 +20206,9 @@ s390_decode_4byte_and_irgen(const UChar *bytes) + case 0xb9e4: s390_format_RRF_R0RR2(s390_irgen_NGRK, RRF4_r3(ovl), + RRF4_r1(ovl), RRF4_r2(ovl)); + goto ok; ++ case 0xb9e5: s390_format_RRF_R0RR2(s390_irgen_NCGRK, RRF4_r3(ovl), ++ RRF4_r1(ovl), RRF4_r2(ovl)); ++ goto ok; + case 0xb9e6: s390_format_RRF_R0RR2(s390_irgen_OGRK, RRF4_r3(ovl), + RRF4_r1(ovl), RRF4_r2(ovl)); + goto ok; +@@ -20178,6 +20239,9 @@ s390_decode_4byte_and_irgen(const UChar *bytes) + case 0xb9f4: s390_format_RRF_R0RR2(s390_irgen_NRK, RRF4_r3(ovl), + RRF4_r1(ovl), RRF4_r2(ovl)); + goto ok; ++ case 0xb9f5: s390_format_RRF_R0RR2(s390_irgen_NCRK, RRF4_r3(ovl), ++ RRF4_r1(ovl), RRF4_r2(ovl)); ++ goto ok; + case 0xb9f6: s390_format_RRF_R0RR2(s390_irgen_ORK, RRF4_r3(ovl), + RRF4_r1(ovl), RRF4_r2(ovl)); + goto ok; +-- +2.23.0 + +From 748421b31ab6b15cc849bd6b9588ad759b807324 Mon Sep 17 00:00:00 2001 +From: Andreas Arnez +Date: Wed, 27 Jan 2021 18:11:06 +0100 +Subject: [PATCH 02/13] s390x: Misc-insn-3, "select" instructions + +Add support for the instructions SELR, SELGR, and SELFHR. +--- + VEX/priv/guest_s390_toIR.c | 43 ++++++++++++++++++++++++++++++++++++++ + 1 file changed, 43 insertions(+) + +diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c +index f8afd5b96..41265631b 100644 +--- a/VEX/priv/guest_s390_toIR.c ++++ b/VEX/priv/guest_s390_toIR.c +@@ -3113,6 +3113,16 @@ s390_format_RRF_FUFF2(const HChar *(*irgen)(UChar, UChar, UChar, UChar), + s390_disasm(ENC5(MNM, FPR, FPR, FPR, UINT), mnm, r1, r2, r3, m4); + } + ++static void ++s390_format_RRF_RURR(const HChar *(*irgen)(UChar, UChar, UChar, UChar), ++ UChar r3, UChar m4, UChar r1, UChar r2) ++{ ++ const HChar *mnm = irgen(r3, m4, r1, r2); ++ ++ if (UNLIKELY(vex_traceflags & VEX_TRACE_FE)) ++ s390_disasm(ENC5(MNM, GPR, GPR, GPR, UINT), mnm, r1, r3, r2, m4); ++} ++ + static void + s390_format_RRF_R0RR2(const HChar *(*irgen)(UChar r3, UChar r1, UChar r2), + UChar r3, UChar r1, UChar r2) +@@ -19254,6 +19264,30 @@ s390_irgen_VBPERM(UChar v1, UChar v2, UChar v3) + return "vbperm"; + } + ++static const HChar * ++s390_irgen_SELR(UChar r3, UChar m4, UChar r1, UChar r2) ++{ ++ IRExpr* cond = binop(Iop_CmpNE32, s390_call_calculate_cond(m4), mkU32(0)); ++ put_gpr_w1(r1, mkite(cond, get_gpr_w1(r2), get_gpr_w1(r3))); ++ return "selr"; ++} ++ ++static const HChar * ++s390_irgen_SELGR(UChar r3, UChar m4, UChar r1, UChar r2) ++{ ++ IRExpr* cond = binop(Iop_CmpNE32, s390_call_calculate_cond(m4), mkU32(0)); ++ put_gpr_dw0(r1, mkite(cond, get_gpr_dw0(r2), get_gpr_dw0(r3))); ++ return "selgr"; ++} ++ ++static const HChar * ++s390_irgen_SELFHR(UChar r3, UChar m4, UChar r1, UChar r2) ++{ ++ IRExpr* cond = binop(Iop_CmpNE32, s390_call_calculate_cond(m4), mkU32(0)); ++ put_gpr_w0(r1, mkite(cond, get_gpr_w0(r2), get_gpr_w0(r3))); ++ return "selfhr"; ++} ++ + /* New insns are added here. + If an insn is contingent on a facility being installed also + check whether the list of supported facilities in function +@@ -20163,6 +20197,9 @@ s390_decode_4byte_and_irgen(const UChar *bytes) + case 0xb9bd: /* TRTRE */ goto unimplemented; + case 0xb9be: /* SRSTU */ goto unimplemented; + case 0xb9bf: /* TRTE */ goto unimplemented; ++ case 0xb9c0: s390_format_RRF_RURR(s390_irgen_SELFHR, RRF4_r3(ovl), ++ RRF4_m4(ovl), RRF4_r1(ovl), ++ RRF4_r2(ovl)); goto ok; + case 0xb9c8: s390_format_RRF_R0RR2(s390_irgen_AHHHR, RRF4_r3(ovl), + RRF4_r1(ovl), RRF4_r2(ovl)); + goto ok; +@@ -20203,6 +20240,9 @@ s390_decode_4byte_and_irgen(const UChar *bytes) + case 0xb9e2: s390_format_RRF_U0RR(s390_irgen_LOCGR, RRF3_r3(ovl), + RRF3_r1(ovl), RRF3_r2(ovl), + S390_XMNM_LOCGR); goto ok; ++ case 0xb9e3: s390_format_RRF_RURR(s390_irgen_SELGR, RRF4_r3(ovl), ++ RRF4_m4(ovl), RRF4_r1(ovl), ++ RRF4_r2(ovl)); goto ok; + case 0xb9e4: s390_format_RRF_R0RR2(s390_irgen_NGRK, RRF4_r3(ovl), + RRF4_r1(ovl), RRF4_r2(ovl)); + goto ok; +@@ -20233,6 +20273,9 @@ s390_decode_4byte_and_irgen(const UChar *bytes) + case 0xb9ed: s390_format_RRF_R0RR2(s390_irgen_MSGRKC, RRF4_r3(ovl), + RRF4_r1(ovl), RRF4_r2(ovl)); + goto ok; ++ case 0xb9f0: s390_format_RRF_RURR(s390_irgen_SELR, RRF4_r3(ovl), ++ RRF4_m4(ovl), RRF4_r1(ovl), ++ RRF4_r2(ovl)); goto ok; + case 0xb9f2: s390_format_RRF_U0RR(s390_irgen_LOCR, RRF3_r3(ovl), + RRF3_r1(ovl), RRF3_r2(ovl), + S390_XMNM_LOCR); goto ok; +-- +2.23.0 + +From 31cbd583e858f47a86ada087d21a6abc13ba04f2 Mon Sep 17 00:00:00 2001 +From: Andreas Arnez +Date: Thu, 28 Jan 2021 19:47:00 +0100 +Subject: [PATCH 03/13] s390x: Misc-insn-3, new POPCNT variant + +Add support for the new POPCNT variant that has bit 0 of the M3 field set +and yields the total number of one bits in its 64-bit operand. +--- + VEX/priv/guest_s390_toIR.c | 44 ++++++++++++++++++++++++++------------ + 1 file changed, 30 insertions(+), 14 deletions(-) + +diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c +index 41265631b..ca9e6dc03 100644 +--- a/VEX/priv/guest_s390_toIR.c ++++ b/VEX/priv/guest_s390_toIR.c +@@ -3073,6 +3073,20 @@ s390_format_RRF_U0RR(const HChar *(*irgen)(UChar m3, UChar r1, UChar r2), + s390_disasm(ENC3(XMNM, GPR, GPR), xmnm_kind, m3, r1, r2); + } + ++static void ++s390_format_RRFa_U0RR(const HChar *(*irgen)(UChar m3, UChar r1, UChar r2), ++ UChar m3, UChar r1, UChar r2) ++{ ++ const HChar *mnm = irgen(m3, r1, r2); ++ ++ if (UNLIKELY(vex_traceflags & VEX_TRACE_FE)) { ++ if (m3 != 0) ++ s390_disasm(ENC4(MNM, GPR, GPR, UINT), mnm, r1, r2, m3); ++ else ++ s390_disasm(ENC3(MNM, GPR, GPR), mnm, r1, r2); ++ } ++} ++ + static void + s390_format_RRF_F0FF2(const HChar *(*irgen)(UChar, UChar, UChar), + UChar r3, UChar r1, UChar r2) +@@ -15112,30 +15126,32 @@ s390_irgen_FLOGR(UChar r1, UChar r2) + } + + static const HChar * +-s390_irgen_POPCNT(UChar r1, UChar r2) ++s390_irgen_POPCNT(UChar m3, UChar r1, UChar r2) + { +- Int i; ++ s390_insn_assert("popcnt", (m3 & 7) == 0); ++ ++ static const ULong masks[] = { ++ 0x5555555555555555, 0x3333333333333333, 0x0F0F0F0F0F0F0F0F, ++ 0x00FF00FF00FF00FF, 0x0000FFFF0000FFFF, 0x00000000FFFFFFFF, ++ }; ++ Int i, n; + IRTemp val = newTemp(Ity_I64); +- IRTemp mask[3]; + + assign(val, get_gpr_dw0(r2)); +- for (i = 0; i < 3; i++) { +- mask[i] = newTemp(Ity_I64); +- } +- assign(mask[0], mkU64(0x5555555555555555ULL)); +- assign(mask[1], mkU64(0x3333333333333333ULL)); +- assign(mask[2], mkU64(0x0F0F0F0F0F0F0F0FULL)); +- for (i = 0; i < 3; i++) { ++ n = (m3 & 8) ? 6 : 3; ++ for (i = 0; i < n; i++) { ++ IRTemp mask = newTemp(Ity_I64); + IRTemp tmp = newTemp(Ity_I64); + ++ assign (mask, mkU64(masks[i])); + assign(tmp, + binop(Iop_Add64, + binop(Iop_And64, + mkexpr(val), +- mkexpr(mask[i])), ++ mkexpr(mask)), + binop(Iop_And64, + binop(Iop_Shr64, mkexpr(val), mkU8(1 << i)), +- mkexpr(mask[i])))); ++ mkexpr(mask)))); + val = tmp; + } + s390_cc_thunk_putZ(S390_CC_OP_BITWISE, val); +@@ -20235,8 +20251,8 @@ s390_decode_4byte_and_irgen(const UChar *bytes) + case 0xb9e0: s390_format_RRF_U0RR(s390_irgen_LOCFHR, RRF3_r3(ovl), + RRF3_r1(ovl), RRF3_r2(ovl), + S390_XMNM_LOCFHR); goto ok; +- case 0xb9e1: s390_format_RRE_RR(s390_irgen_POPCNT, RRE_r1(ovl), +- RRE_r2(ovl)); goto ok; ++ case 0xb9e1: s390_format_RRFa_U0RR(s390_irgen_POPCNT, RRF3_r3(ovl), ++ RRF3_r1(ovl), RRF3_r2(ovl)); goto ok; + case 0xb9e2: s390_format_RRF_U0RR(s390_irgen_LOCGR, RRF3_r3(ovl), + RRF3_r1(ovl), RRF3_r2(ovl), + S390_XMNM_LOCGR); goto ok; +-- +2.23.0 + +From 64352d57f93711ce76fd481558dcf6d65e26b19f Mon Sep 17 00:00:00 2001 +From: Andreas Arnez +Date: Fri, 29 Jan 2021 20:13:05 +0100 +Subject: [PATCH 04/13] s390x: Misc-insn-3, MVCRL + +Add support for the "move right to left" instruction MVCRL. +--- + VEX/priv/guest_s390_toIR.c | 47 ++++++++++++++++++++++++++++++++++++++ + 1 file changed, 47 insertions(+) + +diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c +index ca9e6dc03..9f7d98f8c 100644 +--- a/VEX/priv/guest_s390_toIR.c ++++ b/VEX/priv/guest_s390_toIR.c +@@ -3562,6 +3562,25 @@ s390_format_SS_L0RDRD(const HChar *(*irgen)(UChar, IRTemp, IRTemp), + s390_disasm(ENC3(MNM, UDLB, UDXB), mnm, d1, l, b1, d2, 0, b2); + } + ++static void ++s390_format_SSE_RDRD(const HChar *(*irgen)(IRTemp, IRTemp), ++ UChar b1, UShort d1, UChar b2, UShort d2) ++{ ++ const HChar *mnm; ++ IRTemp op1addr = newTemp(Ity_I64); ++ IRTemp op2addr = newTemp(Ity_I64); ++ ++ assign(op1addr, binop(Iop_Add64, mkU64(d1), b1 != 0 ? get_gpr_dw0(b1) : ++ mkU64(0))); ++ assign(op2addr, binop(Iop_Add64, mkU64(d2), b2 != 0 ? get_gpr_dw0(b2) : ++ mkU64(0))); ++ ++ mnm = irgen(op1addr, op2addr); ++ ++ if (UNLIKELY(vex_traceflags & VEX_TRACE_FE)) ++ s390_disasm(ENC2(UDXB, UDXB), mnm, d1, 0, b1, d2, 0, b2); ++} ++ + static void + s390_format_SIL_RDI(const HChar *(*irgen)(UShort i2, IRTemp op1addr), + UChar b1, UShort d1, UShort i2) +@@ -13667,6 +13686,31 @@ s390_irgen_MVCIN(UChar length, IRTemp start1, IRTemp start2) + return "mvcin"; + } + ++static const HChar * ++s390_irgen_MVCRL(IRTemp op1addr, IRTemp op2addr) ++{ ++ IRTemp counter = newTemp(Ity_I64); ++ IRTemp offset = newTemp(Ity_I64); ++ ++ assign(counter, get_counter_dw0()); ++ /* offset = length - 1 - counter, where length-1 is specified in r0 */ ++ assign(offset, ++ binop(Iop_Sub64, ++ unop(Iop_16Uto64, ++ binop(Iop_And16, get_gpr_hw3(0), mkU16(0xfff))), ++ mkexpr(counter))); ++ ++ store(binop(Iop_Add64, mkexpr(op1addr), mkexpr(offset)), ++ load(Ity_I8, binop(Iop_Add64, mkexpr(op2addr), mkexpr(offset)))); ++ ++ /* Check for end of field */ ++ put_counter_dw0(binop(Iop_Add64, mkexpr(counter), mkU64(1))); ++ iterate_if(binop(Iop_CmpNE64, mkexpr(offset), mkU64(0))); ++ put_counter_dw0(mkU64(0)); ++ ++ return "mvcrl"; ++} ++ + static const HChar * + s390_irgen_MVCL(UChar r1, UChar r2) + { +@@ -22217,6 +22261,9 @@ s390_decode_6byte_and_irgen(const UChar *bytes) + case 0xe500ULL: /* LASP */ goto unimplemented; + case 0xe501ULL: /* TPROT */ goto unimplemented; + case 0xe502ULL: /* STRAG */ goto unimplemented; ++ case 0xe50aULL: s390_format_SSE_RDRD(s390_irgen_MVCRL, ++ SS_b1(ovl), SS_d1(ovl), ++ SS_b2(ovl), SS_d2(ovl)); goto ok; + case 0xe50eULL: /* MVCSK */ goto unimplemented; + case 0xe50fULL: /* MVCDK */ goto unimplemented; + case 0xe544ULL: s390_format_SIL_RDI(s390_irgen_MVHHI, SIL_b1(ovl), +-- +2.23.0 + +From 6cc4d66cc3a999253d9a57e2b5c75aeb67f77918 Mon Sep 17 00:00:00 2001 +From: Andreas Arnez +Date: Tue, 2 Feb 2021 20:15:02 +0100 +Subject: [PATCH 05/13] s390x: Misc-insn-3, test case + +Add a test case for the new instructions in the miscellaneous instruction +extensions facitility 3. +--- + .gitignore | 1 + + none/tests/s390x/Makefile.am | 3 +- + none/tests/s390x/misc3.c | 182 ++++++++++++++++++++++++++++++ + none/tests/s390x/misc3.stderr.exp | 2 + + none/tests/s390x/misc3.stdout.exp | 103 +++++++++++++++++ + none/tests/s390x/misc3.vgtest | 1 + + 6 files changed, 291 insertions(+), 1 deletion(-) + create mode 100644 none/tests/s390x/misc3.c + create mode 100644 none/tests/s390x/misc3.stderr.exp + create mode 100644 none/tests/s390x/misc3.stdout.exp + create mode 100644 none/tests/s390x/misc3.vgtest + +diff --git a/none/tests/s390x/Makefile.am b/none/tests/s390x/Makefile.am +index a0fb92ef5..2fd45ec1e 100644 +--- a/none/tests/s390x/Makefile.am ++++ b/none/tests/s390x/Makefile.am +@@ -19,7 +19,8 @@ INSN_TESTS = clc clcle cvb cvd icm lpr tcxb lam_stam xc mvst add sub mul \ + spechelper-ltr spechelper-or \ + spechelper-icm-1 spechelper-icm-2 spechelper-tmll \ + spechelper-tm laa vector lsc2 ppno vector_string vector_integer \ +- vector_float add-z14 sub-z14 mul-z14 bic ++ vector_float add-z14 sub-z14 mul-z14 bic \ ++ misc3 + + if BUILD_DFP_TESTS + INSN_TESTS += dfp-1 dfp-2 dfp-3 dfp-4 dfptest dfpext dfpconv srnmt pfpo +diff --git a/none/tests/s390x/misc3.c b/none/tests/s390x/misc3.c +new file mode 100644 +index 000000000..ae6e8d4c2 +--- /dev/null ++++ b/none/tests/s390x/misc3.c +@@ -0,0 +1,182 @@ ++#include ++ ++/* -- Logical instructions -- */ ++ ++#define TEST_GENERATE(opcode,insn) \ ++ static void test_##insn(unsigned long a, unsigned long b) \ ++ { \ ++ unsigned long out = 0xdecaffee42424242; \ ++ int cc; \ ++ \ ++ __asm__( \ ++ "cr 0,0\n\t" /* Clear CC */ \ ++ ".insn rrf,0x" #opcode "0000,%[out],%[a],%[b],0\n\t" \ ++ "ipm %[cc]\n\t" \ ++ "srl %[cc],28\n" \ ++ : [out] "+d" (out), \ ++ [cc] "=d" (cc) \ ++ : [a] "d" (a), \ ++ [b] "d" (b) \ ++ : "cc"); \ ++ \ ++ printf("\t%016lx %016lx -> %016lx cc=%d\n", \ ++ a, b, out, cc); \ ++ } ++ ++#define TEST_EXEC(opcode,insn) \ ++ do { \ ++ puts(#insn); \ ++ test_##insn(0, 0); \ ++ test_##insn(0, -1); \ ++ test_##insn(-1, 0); \ ++ test_##insn(-1, -1); \ ++ test_##insn(0x012345678abcdef, 0); \ ++ test_##insn(0x012345678abcdef, -1); \ ++ test_##insn(0x55555555aaaaaaaa, 0xaaaaaaaa55555555); \ ++ } while (0) ++ ++#define INSNS \ ++ XTEST(b9f5,ncrk); \ ++ XTEST(b9e5,ncgrk); \ ++ XTEST(b974,nnrk); \ ++ XTEST(b964,nngrk); \ ++ XTEST(b976,nork); \ ++ XTEST(b966,nogrk); \ ++ XTEST(b977,nxrk); \ ++ XTEST(b967,nxgrk); \ ++ XTEST(b975,ocrk); \ ++ XTEST(b965,ocgrk); ++ ++#define XTEST TEST_GENERATE ++INSNS ++#undef XTEST ++ ++static void test_all_logical_insns() ++{ ++#define XTEST TEST_EXEC ++ INSNS ++#undef XTEST ++} ++#undef INSNS ++#undef TEST_GENERATE ++#undef TEST_EXEC ++ ++ ++/* -- Full population count -- */ ++ ++static void test_popcnt(unsigned long op2) ++{ ++ unsigned long result; ++ int cc; ++ ++ __asm__(".insn rrf,0xb9e10000,%[result],%[op2],8,0\n\t" ++ "ipm %[cc]\n\t" ++ "srl %[cc],28\n" ++ : [result]"=d" (result), ++ [cc]"=d" (cc) ++ : [op2]"d" (op2) ++ : "cc"); ++ printf("\t%016lx -> %2lu cc=%d\n", op2, result, cc); ++} ++ ++static int test_all_popcnt() ++{ ++ puts("popcnt"); ++ test_popcnt(0); ++ test_popcnt(1); ++ test_popcnt(0x8000000000000000); ++ test_popcnt(-1UL); ++ test_popcnt(0xff427e3800556bcd); ++ return 0; ++} ++ ++/* -- Select -- */ ++ ++#define TEST_GENERATE(opcode,insn) \ ++ static void test_##insn(unsigned long a, unsigned long b) \ ++ { \ ++ unsigned long out0 = 0x0cafebad0badcafe; \ ++ unsigned long out1 = 0x0badcafe0cafebad; \ ++ \ ++ __asm__( \ ++ "cr 0,0\n\t" /* Clear CC */ \ ++ ".insn rrf,0x" #opcode "0000,%[out0],%[a],%[b],8\n\t" \ ++ ".insn rrf,0x" #opcode "0000,%[out1],%[a],%[b],7\n\t" \ ++ : [out0] "+d" (out0), \ ++ [out1] "+d" (out1) \ ++ : [a] "d" (a), \ ++ [b] "d" (b) \ ++ : ); \ ++ \ ++ printf("\t%016lx %016lx -> %016lx %016lx\n", \ ++ a, b, out0, out1); \ ++ } ++ ++#define TEST_EXEC(opcode,insn) \ ++ do { \ ++ puts(#insn); \ ++ test_##insn(-1, 0); \ ++ test_##insn(0, -1); \ ++ test_##insn(0x1234567890abcdef, 0xfedcba9876543210); \ ++ } while (0) ++ ++#define INSNS \ ++ XTEST(b9f0,selr); \ ++ XTEST(b9e3,selgr); \ ++ XTEST(b9c0,selfhr); ++ ++#define XTEST TEST_GENERATE ++INSNS ++#undef XTEST ++ ++static void test_all_select() ++{ ++#define XTEST TEST_EXEC ++ INSNS ++#undef XTEST ++} ++#undef INSNS ++#undef TEST_GENERATE ++#undef TEST_EXEC ++ ++ ++/* -- Move right to left -- */ ++ ++static void test_mvcrl(void *to, void *from, size_t len) ++{ ++ len -= 1; ++ __asm__("lgr 0,%[len]\n\t" ++ ".insn sse,0xe50a00000000,%[to],%[from]\n\t" ++ : [to] "+Q" (*(struct { char c[len]; } *) to) ++ : [from] "Q" (*(struct { char c[len]; } *) from), ++ [len] "d" (len) ++ : ); ++} ++ ++static void test_all_mvcrl() ++{ ++ static const char pattern[] = ++ "abcdefghijklmnopqrstuvwxyz-0123456789.ABCDEFGHIJKLMNOPQRSTUVWXYZ"; ++ char buf[4 * sizeof(pattern) - 2]; ++ ++ test_mvcrl(buf, (char *) pattern, sizeof(pattern)); ++ test_mvcrl(buf + sizeof(pattern) - 1, buf, sizeof(pattern)); ++ test_mvcrl(buf + 2 * sizeof(pattern) - 2, buf, 2 * sizeof(pattern) - 1); ++ test_mvcrl(buf + 32, buf + 10, 63); ++ test_mvcrl(buf + 2, buf + 1, 256); ++ test_mvcrl(buf + 254, buf + 256, 2); ++ puts("mvcrl"); ++ for (int i = 0; i < 256; i += 64) { ++ printf("\t%.64s\n", buf + i); ++ } ++} ++ ++ ++int main() ++{ ++ test_all_logical_insns(); ++ test_all_popcnt(); ++ test_all_select(); ++ test_all_mvcrl(); ++ return 0; ++} +diff --git a/none/tests/s390x/misc3.stderr.exp b/none/tests/s390x/misc3.stderr.exp +new file mode 100644 +index 000000000..139597f9c +--- /dev/null ++++ b/none/tests/s390x/misc3.stderr.exp +@@ -0,0 +1,2 @@ ++ ++ +diff --git a/none/tests/s390x/misc3.stdout.exp b/none/tests/s390x/misc3.stdout.exp +new file mode 100644 +index 000000000..caaba4960 +--- /dev/null ++++ b/none/tests/s390x/misc3.stdout.exp +@@ -0,0 +1,103 @@ ++ncrk ++ 0000000000000000 0000000000000000 -> decaffee00000000 cc=0 ++ 0000000000000000 ffffffffffffffff -> decaffee00000000 cc=0 ++ ffffffffffffffff 0000000000000000 -> decaffeeffffffff cc=1 ++ ffffffffffffffff ffffffffffffffff -> decaffee00000000 cc=0 ++ 0012345678abcdef 0000000000000000 -> decaffee78abcdef cc=1 ++ 0012345678abcdef ffffffffffffffff -> decaffee00000000 cc=0 ++ 55555555aaaaaaaa aaaaaaaa55555555 -> decaffeeaaaaaaaa cc=1 ++ncgrk ++ 0000000000000000 0000000000000000 -> 0000000000000000 cc=0 ++ 0000000000000000 ffffffffffffffff -> 0000000000000000 cc=0 ++ ffffffffffffffff 0000000000000000 -> ffffffffffffffff cc=1 ++ ffffffffffffffff ffffffffffffffff -> 0000000000000000 cc=0 ++ 0012345678abcdef 0000000000000000 -> 0012345678abcdef cc=1 ++ 0012345678abcdef ffffffffffffffff -> 0000000000000000 cc=0 ++ 55555555aaaaaaaa aaaaaaaa55555555 -> 55555555aaaaaaaa cc=1 ++nnrk ++ 0000000000000000 0000000000000000 -> decaffeeffffffff cc=1 ++ 0000000000000000 ffffffffffffffff -> decaffeeffffffff cc=1 ++ ffffffffffffffff 0000000000000000 -> decaffeeffffffff cc=1 ++ ffffffffffffffff ffffffffffffffff -> decaffee00000000 cc=0 ++ 0012345678abcdef 0000000000000000 -> decaffeeffffffff cc=1 ++ 0012345678abcdef ffffffffffffffff -> decaffee87543210 cc=1 ++ 55555555aaaaaaaa aaaaaaaa55555555 -> decaffeeffffffff cc=1 ++nngrk ++ 0000000000000000 0000000000000000 -> ffffffffffffffff cc=1 ++ 0000000000000000 ffffffffffffffff -> ffffffffffffffff cc=1 ++ ffffffffffffffff 0000000000000000 -> ffffffffffffffff cc=1 ++ ffffffffffffffff ffffffffffffffff -> 0000000000000000 cc=0 ++ 0012345678abcdef 0000000000000000 -> ffffffffffffffff cc=1 ++ 0012345678abcdef ffffffffffffffff -> ffedcba987543210 cc=1 ++ 55555555aaaaaaaa aaaaaaaa55555555 -> ffffffffffffffff cc=1 ++nork ++ 0000000000000000 0000000000000000 -> decaffeeffffffff cc=1 ++ 0000000000000000 ffffffffffffffff -> decaffee00000000 cc=0 ++ ffffffffffffffff 0000000000000000 -> decaffee00000000 cc=0 ++ ffffffffffffffff ffffffffffffffff -> decaffee00000000 cc=0 ++ 0012345678abcdef 0000000000000000 -> decaffee87543210 cc=1 ++ 0012345678abcdef ffffffffffffffff -> decaffee00000000 cc=0 ++ 55555555aaaaaaaa aaaaaaaa55555555 -> decaffee00000000 cc=0 ++nogrk ++ 0000000000000000 0000000000000000 -> ffffffffffffffff cc=1 ++ 0000000000000000 ffffffffffffffff -> 0000000000000000 cc=0 ++ ffffffffffffffff 0000000000000000 -> 0000000000000000 cc=0 ++ ffffffffffffffff ffffffffffffffff -> 0000000000000000 cc=0 ++ 0012345678abcdef 0000000000000000 -> ffedcba987543210 cc=1 ++ 0012345678abcdef ffffffffffffffff -> 0000000000000000 cc=0 ++ 55555555aaaaaaaa aaaaaaaa55555555 -> 0000000000000000 cc=0 ++nxrk ++ 0000000000000000 0000000000000000 -> decaffeeffffffff cc=1 ++ 0000000000000000 ffffffffffffffff -> decaffee00000000 cc=0 ++ ffffffffffffffff 0000000000000000 -> decaffee00000000 cc=0 ++ ffffffffffffffff ffffffffffffffff -> decaffeeffffffff cc=1 ++ 0012345678abcdef 0000000000000000 -> decaffee87543210 cc=1 ++ 0012345678abcdef ffffffffffffffff -> decaffee78abcdef cc=1 ++ 55555555aaaaaaaa aaaaaaaa55555555 -> decaffee00000000 cc=0 ++nxgrk ++ 0000000000000000 0000000000000000 -> ffffffffffffffff cc=1 ++ 0000000000000000 ffffffffffffffff -> 0000000000000000 cc=0 ++ ffffffffffffffff 0000000000000000 -> 0000000000000000 cc=0 ++ ffffffffffffffff ffffffffffffffff -> ffffffffffffffff cc=1 ++ 0012345678abcdef 0000000000000000 -> ffedcba987543210 cc=1 ++ 0012345678abcdef ffffffffffffffff -> 0012345678abcdef cc=1 ++ 55555555aaaaaaaa aaaaaaaa55555555 -> 0000000000000000 cc=0 ++ocrk ++ 0000000000000000 0000000000000000 -> decaffeeffffffff cc=1 ++ 0000000000000000 ffffffffffffffff -> decaffee00000000 cc=0 ++ ffffffffffffffff 0000000000000000 -> decaffeeffffffff cc=1 ++ ffffffffffffffff ffffffffffffffff -> decaffeeffffffff cc=1 ++ 0012345678abcdef 0000000000000000 -> decaffeeffffffff cc=1 ++ 0012345678abcdef ffffffffffffffff -> decaffee78abcdef cc=1 ++ 55555555aaaaaaaa aaaaaaaa55555555 -> decaffeeaaaaaaaa cc=1 ++ocgrk ++ 0000000000000000 0000000000000000 -> ffffffffffffffff cc=1 ++ 0000000000000000 ffffffffffffffff -> 0000000000000000 cc=0 ++ ffffffffffffffff 0000000000000000 -> ffffffffffffffff cc=1 ++ ffffffffffffffff ffffffffffffffff -> ffffffffffffffff cc=1 ++ 0012345678abcdef 0000000000000000 -> ffffffffffffffff cc=1 ++ 0012345678abcdef ffffffffffffffff -> 0012345678abcdef cc=1 ++ 55555555aaaaaaaa aaaaaaaa55555555 -> 55555555aaaaaaaa cc=1 ++popcnt ++ 0000000000000000 -> 0 cc=0 ++ 0000000000000001 -> 1 cc=1 ++ 8000000000000000 -> 1 cc=1 ++ ffffffffffffffff -> 64 cc=1 ++ ff427e3800556bcd -> 33 cc=1 ++selr ++ ffffffffffffffff 0000000000000000 -> 0cafebadffffffff 0badcafe00000000 ++ 0000000000000000 ffffffffffffffff -> 0cafebad00000000 0badcafeffffffff ++ 1234567890abcdef fedcba9876543210 -> 0cafebad90abcdef 0badcafe76543210 ++selgr ++ ffffffffffffffff 0000000000000000 -> ffffffffffffffff 0000000000000000 ++ 0000000000000000 ffffffffffffffff -> 0000000000000000 ffffffffffffffff ++ 1234567890abcdef fedcba9876543210 -> 1234567890abcdef fedcba9876543210 ++selfhr ++ ffffffffffffffff 0000000000000000 -> ffffffff0badcafe 000000000cafebad ++ 0000000000000000 ffffffffffffffff -> 000000000badcafe ffffffff0cafebad ++ 1234567890abcdef fedcba9876543210 -> 123456780badcafe fedcba980cafebad ++mvcrl ++ abbcdefghijklmnopqrstuvwxyz-01234klmnopqrstuvwxyz-0123456789.ABC ++ DEFGHIJKLMNOPQRSTUVWXYZabcdefghi456789.ABCDEFGHIJKLMNOPQRSTUVWXY ++ Zabcdefghijklmnopqrstuvwxyz-0123456789.ABCDEFGHIJKLMNOPQRSTUVWXY ++ Zabcdefghijklmnopqrstuvwxyz-0123456789.ABCDEFGHIJKLMNOPQRSTUVWZ +diff --git a/none/tests/s390x/misc3.vgtest b/none/tests/s390x/misc3.vgtest +new file mode 100644 +index 000000000..d051a06bd +--- /dev/null ++++ b/none/tests/s390x/misc3.vgtest +@@ -0,0 +1 @@ ++prog: misc3 +-- +2.23.0 + +From 401b51d79886362d1962dc487db45ac91462eaa0 Mon Sep 17 00:00:00 2001 +From: Andreas Arnez +Date: Wed, 7 Apr 2021 12:29:32 +0200 +Subject: [PATCH 06/13] s390x: Vec-enh-2, extend VSL, VSRA, and VSRL + +The vector-enhancements facility 2 extends the existing bitwise vector +shift instructions VSL, VSRA, and VSRL. Now they allow the shift +vector (the third operand) to contain different shift amounts for each +byte. Add support for these new forms. +--- + VEX/priv/guest_s390_toIR.c | 58 ++++++++++++++++++++++++++++++-------- + 1 file changed, 47 insertions(+), 11 deletions(-) + +diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c +index 9f7d98f8c..622d5a02e 100644 +--- a/VEX/priv/guest_s390_toIR.c ++++ b/VEX/priv/guest_s390_toIR.c +@@ -17983,30 +17983,66 @@ s390_irgen_VERLL(UChar v1, IRTemp op2addr, UChar v3, UChar m4) + static const HChar * + s390_irgen_VSL(UChar v1, UChar v2, UChar v3) + { +- IRTemp shift_amount = newTemp(Ity_I8); +- assign(shift_amount, binop(Iop_And8, get_vr_b7(v3), mkU8(0b00000111))); +- +- put_vr_qw(v1, binop(Iop_ShlV128, get_vr_qw(v2), mkexpr(shift_amount))); ++ IRTemp a = newTemp(Ity_V128); ++ IRTemp b = newTemp(Ity_V128); ++ ++ assign(a, get_vr_qw(v2)); ++ assign(b, get_vr_qw(v3)); ++ ++ put_vr_qw(v1, ++ binop(Iop_OrV128, ++ binop(Iop_Shl8x16, mkexpr(a), mkexpr(b)), ++ binop(Iop_Shr8x16, ++ binop(Iop_Shr8x16, ++ binop(Iop_ShlV128, mkexpr(a), mkU8(8)), ++ unop(Iop_NotV128, mkexpr(b))), ++ unop(Iop_Dup8x16, mkU8(1))))); + return "vsl"; + } + + static const HChar * + s390_irgen_VSRL(UChar v1, UChar v2, UChar v3) + { +- IRTemp shift_amount = newTemp(Ity_I8); +- assign(shift_amount, binop(Iop_And8, get_vr_b7(v3), mkU8(0b00000111))); ++ IRTemp a = newTemp(Ity_V128); ++ IRTemp b = newTemp(Ity_V128); + +- put_vr_qw(v1, binop(Iop_ShrV128, get_vr_qw(v2), mkexpr(shift_amount))); ++ assign(a, get_vr_qw(v2)); ++ assign(b, get_vr_qw(v3)); ++ ++ put_vr_qw(v1, ++ binop(Iop_OrV128, ++ binop(Iop_Shr8x16, mkexpr(a), mkexpr(b)), ++ binop(Iop_Shl8x16, ++ binop(Iop_Shl8x16, ++ binop(Iop_ShrV128, mkexpr(a), mkU8(8)), ++ unop(Iop_NotV128, mkexpr(b))), ++ unop(Iop_Dup8x16, mkU8(1))))); + return "vsrl"; + } + + static const HChar * + s390_irgen_VSRA(UChar v1, UChar v2, UChar v3) + { +- IRTemp shift_amount = newTemp(Ity_I8); +- assign(shift_amount, binop(Iop_And8, get_vr_b7(v3), mkU8(0b00000111))); +- +- put_vr_qw(v1, binop(Iop_SarV128, get_vr_qw(v2), mkexpr(shift_amount))); ++ IRTemp a = newTemp(Ity_V128); ++ IRTemp b = newTemp(Ity_V128); ++ ++ assign(a, get_vr_qw(v2)); ++ assign(b, get_vr_qw(v3)); ++ ++ /* Shift-right: first byte arithmetically, all others logically */ ++ IRExpr* elems_shifted = ++ binop(Iop_Sar8x16, ++ binop(Iop_Shr8x16, mkexpr(a), ++ binop(Iop_AndV128, mkexpr(b), mkV128(0x7fff))), ++ binop(Iop_AndV128, mkexpr(b), mkV128(0x8000))); ++ /* Then OR the appropriate bits from the byte to the left */ ++ put_vr_qw(v1, ++ binop(Iop_OrV128, elems_shifted, ++ binop(Iop_Shl8x16, ++ binop(Iop_Shl8x16, ++ binop(Iop_ShrV128, mkexpr(a), mkU8(8)), ++ unop(Iop_NotV128, mkexpr(b))), ++ unop(Iop_Dup8x16, mkU8(1))))); + return "vsra"; + } + +-- +2.23.0 + +From 3fdf065d0bf26a02d6d93a812a6571a287379c36 Mon Sep 17 00:00:00 2001 +From: Andreas Arnez +Date: Thu, 11 Feb 2021 20:02:03 +0100 +Subject: [PATCH 07/13] s390x: Vec-enh-2, extend VCDG, VCDLG, VCGD, and VCLGD + +The vector-enhancements facility 2 extends the vector floating-point +conversion instructions VCDG, VCDLG, VCGD, and VCLGD. In addition to +64-bit elements, they now also handle 32-bit elements. Add support for +these new forms. +--- + VEX/priv/guest_s390_toIR.c | 36 ++++++++++++++++++++---------------- + 1 file changed, 20 insertions(+), 16 deletions(-) + +diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c +index 622d5a02e..11271a1c9 100644 +--- a/VEX/priv/guest_s390_toIR.c ++++ b/VEX/priv/guest_s390_toIR.c +@@ -18794,44 +18794,48 @@ s390_vector_fp_convert(IROp op, IRType fromType, IRType toType, Bool rounding, + static const HChar * + s390_irgen_VCDG(UChar v1, UChar v2, UChar m3, UChar m4, UChar m5) + { +- s390_insn_assert("vcdg", m3 == 3); +- +- s390_vector_fp_convert(Iop_I64StoF64, Ity_I64, Ity_F64, True, +- v1, v2, m3, m4, m5); ++ s390_insn_assert("vcdg", m3 == 2 || m3 == 3); + ++ s390_vector_fp_convert(m3 == 2 ? Iop_I32StoF32 : Iop_I64StoF64, ++ m3 == 2 ? Ity_I32 : Ity_I64, ++ m3 == 2 ? Ity_F32 : Ity_F64, ++ True, v1, v2, m3, m4, m5); + return "vcdg"; + } + + static const HChar * + s390_irgen_VCDLG(UChar v1, UChar v2, UChar m3, UChar m4, UChar m5) + { +- s390_insn_assert("vcdlg", m3 == 3); +- +- s390_vector_fp_convert(Iop_I64UtoF64, Ity_I64, Ity_F64, True, +- v1, v2, m3, m4, m5); ++ s390_insn_assert("vcdlg", m3 == 2 || m3 == 3); + ++ s390_vector_fp_convert(m3 == 2 ? Iop_I32UtoF32 : Iop_I64UtoF64, ++ m3 == 2 ? Ity_I32 : Ity_I64, ++ m3 == 2 ? Ity_F32 : Ity_F64, ++ True, v1, v2, m3, m4, m5); + return "vcdlg"; + } + + static const HChar * + s390_irgen_VCGD(UChar v1, UChar v2, UChar m3, UChar m4, UChar m5) + { +- s390_insn_assert("vcgd", m3 == 3); +- +- s390_vector_fp_convert(Iop_F64toI64S, Ity_F64, Ity_I64, True, +- v1, v2, m3, m4, m5); ++ s390_insn_assert("vcgd", m3 == 2 || m3 == 3); + ++ s390_vector_fp_convert(m3 == 2 ? Iop_F32toI32S : Iop_F64toI64S, ++ m3 == 2 ? Ity_F32 : Ity_F64, ++ m3 == 2 ? Ity_I32 : Ity_I64, ++ True, v1, v2, m3, m4, m5); + return "vcgd"; + } + + static const HChar * + s390_irgen_VCLGD(UChar v1, UChar v2, UChar m3, UChar m4, UChar m5) + { +- s390_insn_assert("vclgd", m3 == 3); +- +- s390_vector_fp_convert(Iop_F64toI64U, Ity_F64, Ity_I64, True, +- v1, v2, m3, m4, m5); ++ s390_insn_assert("vclgd", m3 == 2 || m3 == 3); + ++ s390_vector_fp_convert(m3 == 2 ? Iop_F32toI32U : Iop_F64toI64U, ++ m3 == 2 ? Ity_F32 : Ity_F64, ++ m3 == 2 ? Ity_I32 : Ity_I64, ++ True, v1, v2, m3, m4, m5); + return "vclgd"; + } + +-- +2.23.0 + +From d195bf17388572e85474c7ded4b5bd0e4774637d Mon Sep 17 00:00:00 2001 +From: Andreas Arnez +Date: Tue, 16 Feb 2021 16:19:31 +0100 +Subject: [PATCH 08/13] s390x: Vec-enh-2, VLBR and friends + +Add support for the new byte- and element-swapping vector load/store +instructions VLEBRH, VLEBRG, VLEBRF, VLLEBRZ, VLBRREP, VLBR, VLER, +VSTEBRH, VSTEBRG, VSTEBRF, VSTBR, and VSTER. +--- + VEX/priv/guest_s390_toIR.c | 256 +++++++++++++++++++++++++++++++++++++ + VEX/priv/host_s390_isel.c | 9 ++ + 2 files changed, 265 insertions(+) + +diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c +index 11271a1c9..f65b42705 100644 +--- a/VEX/priv/guest_s390_toIR.c ++++ b/VEX/priv/guest_s390_toIR.c +@@ -19388,6 +19388,209 @@ s390_irgen_SELFHR(UChar r3, UChar m4, UChar r1, UChar r2) + return "selfhr"; + } + ++/* Helper function that byte-swaps each element of its V128 input operand */ ++static IRExpr * ++s390_byteswap_elements(IRExpr* v, UChar m) ++{ ++ static const ULong perm[4][2] = { ++ { 0x0100030205040706, 0x09080b0a0d0c0f0e }, /* 2-byte elements */ ++ { 0x0302010007060504, 0x0b0a09080f0e0d0c }, /* 4-byte elements */ ++ { 0x0706050403020100, 0x0f0e0d0c0b0a0908 }, /* 8-byte elements */ ++ { 0x0f0e0d0c0b0a0908, 0x0706050403020100 }, /* whole vector */ ++ }; ++ return binop(Iop_Perm8x16, v, binop(Iop_64HLtoV128, ++ mkU64(perm[m - 1][0]), ++ mkU64(perm[m - 1][1]))); ++} ++ ++/* Helper function that reverses the elements of its V128 input operand */ ++static IRExpr * ++s390_reverse_elements(IRExpr* v, UChar m) ++{ ++ static const ULong perm[3][2] = { ++ { 0x0e0f0c0d0a0b0809, 0x0607040502030001 }, /* 2-byte elements */ ++ { 0x0c0d0e0f08090a0b, 0x0405060700010203 }, /* 4-byte elements */ ++ { 0x08090a0b0c0d0e0f, 0x0001020304050607 }, /* 8-byte elements */ ++ }; ++ return binop(Iop_Perm8x16, v, binop(Iop_64HLtoV128, ++ mkU64(perm[m - 1][0]), ++ mkU64(perm[m - 1][1]))); ++} ++ ++static const HChar * ++s390_irgen_VLBR(UChar v1, IRTemp op2addr, UChar m3) ++{ ++ s390_insn_assert("vlbr", m3 >= 1 && m3 <= 4); ++ put_vr_qw(v1, s390_byteswap_elements(load(Ity_V128, mkexpr(op2addr)), m3)); ++ return "vlbr"; ++} ++ ++static const HChar * ++s390_irgen_VSTBR(UChar v1, IRTemp op2addr, UChar m3) ++{ ++ s390_insn_assert("vstbr", m3 >= 1 && m3 <= 4); ++ store(mkexpr(op2addr), s390_byteswap_elements(get_vr_qw(v1), m3)); ++ return "vstbr"; ++} ++ ++static const HChar * ++s390_irgen_VLER(UChar v1, IRTemp op2addr, UChar m3) ++{ ++ s390_insn_assert("vler", m3 >= 1 && m3 <= 3); ++ put_vr_qw(v1, s390_reverse_elements(load(Ity_V128, mkexpr(op2addr)), m3)); ++ return "vler"; ++} ++ ++static const HChar * ++s390_irgen_VSTER(UChar v1, IRTemp op2addr, UChar m3) ++{ ++ s390_insn_assert("vstbr", m3 >= 1 && m3 <= 4); ++ store(mkexpr(op2addr), s390_reverse_elements(get_vr_qw(v1), m3)); ++ return "vstbr"; ++} ++ ++/* Helper function that combines its two V128 operands by replacing element 'to' ++ in 'a' by byte-swapped element 'from' in 'b' */ ++static IRExpr * ++s390_insert_byteswapped(IRExpr* a, IRExpr* b, UChar m, UChar to, UChar from) ++{ ++ UInt elem_size = 1U << m; ++ UInt start = elem_size * to; ++ UInt end = start + elem_size - 1; ++ UInt offs = end + elem_size * from + 16; ++ UInt i; ++ ++ ULong permH = 0; ++ for (i = 0; i < 8; i++) { ++ permH = (permH << 8) | (i >= start && i <= end ? offs - i : i); ++ } ++ ULong permL = 0; ++ for (i = 8; i < 16; i++) { ++ permL = (permL << 8) | (i >= start && i <= end ? offs - i : i); ++ } ++ return triop(Iop_Perm8x16x2, a, b, binop(Iop_64HLtoV128, ++ mkU64(permH), mkU64(permL))); ++} ++ ++static const HChar * ++s390_irgen_VLEBRH(UChar v1, IRTemp op2addr, UChar m3) ++{ ++ s390_insn_assert("vlebrh", m3 <= 7); ++ IRTemp op2 = newTemp(Ity_I16); ++ assign(op2, load(Ity_I16, mkexpr(op2addr))); ++ put_vr(v1, Ity_I16, m3, binop(Iop_Or16, ++ binop(Iop_Shl16, mkexpr(op2), mkU8(8)), ++ binop(Iop_Shr16, mkexpr(op2), mkU8(8)))); ++ return "vlebrh"; ++} ++ ++static const HChar * ++s390_irgen_VLEBRF(UChar v1, IRTemp op2addr, UChar m3) ++{ ++ s390_insn_assert("vlebrf", m3 <= 3); ++ IRTemp op1 = newTemp(Ity_V128); ++ assign(op1, get_vr_qw(v1)); ++ IRTemp op2 = newTemp(Ity_I64); ++ assign(op2, unop(Iop_32Uto64, load(Ity_I32, mkexpr(op2addr)))); ++ IRExpr* b = binop(Iop_64HLtoV128, mkexpr(op2), mkexpr(op2)); ++ put_vr_qw(v1, s390_insert_byteswapped(mkexpr(op1), b, 2, m3, 3)); ++ return "vlebrf"; ++} ++ ++static const HChar * ++s390_irgen_VLEBRG(UChar v1, IRTemp op2addr, UChar m3) ++{ ++ s390_insn_assert("vlebrg", m3 <= 1); ++ IRTemp op1 = newTemp(Ity_V128); ++ assign(op1, get_vr_qw(v1)); ++ IRTemp op2 = newTemp(Ity_I64); ++ assign(op2, load(Ity_I64, mkexpr(op2addr))); ++ IRExpr* b = binop(Iop_64HLtoV128, mkexpr(op2), mkexpr(op2)); ++ put_vr_qw(v1, s390_insert_byteswapped(mkexpr(op1), b, 3, m3, 1)); ++ return "vlebrg"; ++} ++ ++static const HChar * ++s390_irgen_VLBRREP(UChar v1, IRTemp op2addr, UChar m3) ++{ ++ s390_insn_assert("vlbrrep", m3 >= 1 && m3 <= 3); ++ static const ULong perm[3] = { ++ 0x0f0e0f0e0f0e0f0e, /* 2-byte element */ ++ 0x0f0e0d0c0f0e0d0c, /* 4-byte element */ ++ 0x0f0e0d0c0b0a0908 /* 8-byte element */ ++ }; ++ IRExpr* permHL = mkU64(perm[m3 - 1]); ++ IRTemp op2 = newTemp(Ity_I64); ++ if (m3 == 3) ++ assign(op2, load(Ity_I64, mkexpr(op2addr))); ++ else ++ assign(op2, unop(m3 == 2 ? Iop_32Uto64 : Iop_16Uto64, ++ load(s390_vr_get_type(m3), mkexpr(op2addr)))); ++ put_vr_qw(v1, binop(Iop_Perm8x16, ++ binop(Iop_64HLtoV128, mkexpr(op2), mkexpr(op2)), ++ binop(Iop_64HLtoV128, permHL, permHL))); ++ return "vlbrrep"; ++} ++ ++static const HChar * ++s390_irgen_VLLEBRZ(UChar v1, IRTemp op2addr, UChar m3) ++{ ++ s390_insn_assert("vllebrz", (m3 >= 1 && m3 <= 3) || m3 == 6); ++ static const ULong perm[6] = { ++ 0x0000000000000f0e, /* 2-byte element */ ++ 0x000000000f0e0d0c, /* 4-byte element */ ++ 0x0f0e0d0c0b0a0908, /* 8-byte element */ ++ 0, /* invalid (4) */ ++ 0, /* invalid (5) */ ++ 0x0f0e0d0c00000000, /* 4-byte element, left-aligned */ ++ }; ++ IRExpr* permH = mkU64(perm[m3 - 1]); ++ IRTemp op2 = newTemp(Ity_I64); ++ if (m3 == 3) ++ assign(op2, load(Ity_I64, mkexpr(op2addr))); ++ else ++ assign(op2, unop((m3 & 3) == 2 ? Iop_32Uto64 : Iop_16Uto64, ++ load(s390_vr_get_type(m3 & 3), mkexpr(op2addr)))); ++ put_vr_qw(v1, binop(Iop_Perm8x16, ++ binop(Iop_64HLtoV128, mkU64(0), mkexpr(op2)), ++ binop(Iop_64HLtoV128, permH, mkU64(0)))); ++ return "vllebrz"; ++} ++ ++static const HChar * ++s390_irgen_VSTEBRH(UChar v1, IRTemp op2addr, UChar m3) ++{ ++ s390_insn_assert("vstebrh", m3 <= 7); ++ IRTemp op1 = newTemp(Ity_I16); ++ assign(op1, get_vr(v1, Ity_I16, m3)); ++ store(mkexpr(op2addr), binop(Iop_Or16, ++ binop(Iop_Shl16, mkexpr(op1), mkU8(8)), ++ binop(Iop_Shr16, mkexpr(op1), mkU8(8)))); ++ return "vstebrh"; ++} ++ ++static const HChar * ++s390_irgen_VSTEBRF(UChar v1, IRTemp op2addr, UChar m3) ++{ ++ s390_insn_assert("vstebrf", m3 <= 3); ++ IRTemp op1 = newTemp(Ity_V128); ++ assign(op1, get_vr_qw(v1)); ++ IRExpr* b = s390_insert_byteswapped(mkexpr(op1), mkexpr(op1), 2, 3, m3); ++ store(mkexpr(op2addr), unop(Iop_V128to32, b)); ++ return "vstebrf"; ++} ++ ++static const HChar * ++s390_irgen_VSTEBRG(UChar v1, IRTemp op2addr, UChar m3) ++{ ++ s390_insn_assert("vstebrg", m3 <= 1); ++ IRTemp op1 = newTemp(Ity_V128); ++ assign(op1, get_vr_qw(v1)); ++ IRExpr* b = s390_insert_byteswapped(mkexpr(op1), mkexpr(op1), 3, 1, m3); ++ store(mkexpr(op2addr), unop(Iop_V128to64, b)); ++ return "vstebrg"; ++} ++ + /* New insns are added here. + If an insn is contingent on a facility being installed also + check whether the list of supported facilities in function +@@ -21003,6 +21206,59 @@ s390_decode_6byte_and_irgen(const UChar *bytes) + RXY_x2(ovl), RXY_b2(ovl), + RXY_dl2(ovl), + RXY_dh2(ovl)); goto ok; ++ case 0xe60000000001ULL: s390_format_VRX_VRRDM(s390_irgen_VLEBRH, VRX_v1(ovl), ++ VRX_x2(ovl), VRX_b2(ovl), ++ VRX_d2(ovl), VRX_m3(ovl), ++ VRX_rxb(ovl)); goto ok; ++ case 0xe60000000002ULL: s390_format_VRX_VRRDM(s390_irgen_VLEBRG, VRX_v1(ovl), ++ VRX_x2(ovl), VRX_b2(ovl), ++ VRX_d2(ovl), VRX_m3(ovl), ++ VRX_rxb(ovl)); goto ok; ++ case 0xe60000000003ULL: s390_format_VRX_VRRDM(s390_irgen_VLEBRF, VRX_v1(ovl), ++ VRX_x2(ovl), VRX_b2(ovl), ++ VRX_d2(ovl), VRX_m3(ovl), ++ VRX_rxb(ovl)); goto ok; ++ case 0xe60000000004ULL: s390_format_VRX_VRRDM(s390_irgen_VLLEBRZ, ++ VRX_v1(ovl), ++ VRX_x2(ovl), VRX_b2(ovl), ++ VRX_d2(ovl), VRX_m3(ovl), ++ VRX_rxb(ovl)); goto ok; ++ case 0xe60000000005ULL: s390_format_VRX_VRRDM(s390_irgen_VLBRREP, ++ VRX_v1(ovl), ++ VRX_x2(ovl), VRX_b2(ovl), ++ VRX_d2(ovl), VRX_m3(ovl), ++ VRX_rxb(ovl)); goto ok; ++ case 0xe60000000006ULL: s390_format_VRX_VRRDM(s390_irgen_VLBR, VRX_v1(ovl), ++ VRX_x2(ovl), VRX_b2(ovl), ++ VRX_d2(ovl), VRX_m3(ovl), ++ VRX_rxb(ovl)); goto ok; ++ case 0xe60000000007ULL: s390_format_VRX_VRRDM(s390_irgen_VLER, VRX_v1(ovl), ++ VRX_x2(ovl), VRX_b2(ovl), ++ VRX_d2(ovl), VRX_m3(ovl), ++ VRX_rxb(ovl)); goto ok; ++ case 0xe60000000009ULL: s390_format_VRX_VRRDM(s390_irgen_VSTEBRH, ++ VRX_v1(ovl), ++ VRX_x2(ovl), VRX_b2(ovl), ++ VRX_d2(ovl), VRX_m3(ovl), ++ VRX_rxb(ovl)); goto ok; ++ case 0xe6000000000aULL: s390_format_VRX_VRRDM(s390_irgen_VSTEBRG, ++ VRX_v1(ovl), ++ VRX_x2(ovl), VRX_b2(ovl), ++ VRX_d2(ovl), VRX_m3(ovl), ++ VRX_rxb(ovl)); goto ok; ++ case 0xe6000000000bULL: s390_format_VRX_VRRDM(s390_irgen_VSTEBRF, ++ VRX_v1(ovl), ++ VRX_x2(ovl), VRX_b2(ovl), ++ VRX_d2(ovl), VRX_m3(ovl), ++ VRX_rxb(ovl)); goto ok; ++ case 0xe6000000000eULL: s390_format_VRX_VRRDM(s390_irgen_VSTBR, VRX_v1(ovl), ++ VRX_x2(ovl), VRX_b2(ovl), ++ VRX_d2(ovl), VRX_m3(ovl), ++ VRX_rxb(ovl)); goto ok; ++ case 0xe6000000000fULL: s390_format_VRX_VRRDM(s390_irgen_VSTER, VRX_v1(ovl), ++ VRX_x2(ovl), VRX_b2(ovl), ++ VRX_d2(ovl), VRX_m3(ovl), ++ VRX_rxb(ovl)); goto ok; + case 0xe60000000034ULL: /* VPKZ */ goto unimplemented; + case 0xe60000000035ULL: s390_format_VSI_URDV(s390_irgen_VLRL, VSI_v1(ovl), + VSI_b2(ovl), VSI_d2(ovl), +diff --git a/VEX/priv/host_s390_isel.c b/VEX/priv/host_s390_isel.c +index ee20c6711..06e195957 100644 +--- a/VEX/priv/host_s390_isel.c ++++ b/VEX/priv/host_s390_isel.c +@@ -4189,6 +4189,15 @@ s390_isel_vec_expr_wrk(ISelEnv *env, IRExpr *expr) + return dst; + } + ++ case Iop_Perm8x16: ++ size = 16; ++ reg1 = s390_isel_vec_expr(env, arg1); ++ reg2 = s390_isel_vec_expr(env, arg2); ++ ++ addInstr(env, s390_insn_vec_triop(size, S390_VEC_PERM, ++ dst, reg1, reg1, reg2)); ++ return dst; ++ + case Iop_CmpEQ8x16: + size = 1; + vec_binop = S390_VEC_COMPARE_EQUAL; +-- +2.23.0 + +From f7447f4c73b2d0fb4eb3827c3709f378f6c9c656 Mon Sep 17 00:00:00 2001 +From: Andreas Arnez +Date: Tue, 23 Feb 2021 19:10:37 +0100 +Subject: [PATCH 09/13] s390x: Vec-enh-2, VSLD and VSRD + +Support the new "vector shift left/right double by bit" instructions VSLD +and VSRD. +--- + VEX/priv/guest_s390_toIR.c | 50 ++++++++++++++++++++++++++++++++++++++ + 1 file changed, 50 insertions(+) + +diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c +index f65b42705..aa429d085 100644 +--- a/VEX/priv/guest_s390_toIR.c ++++ b/VEX/priv/guest_s390_toIR.c +@@ -18228,6 +18228,48 @@ s390_irgen_VSLDB(UChar v1, UChar v2, UChar v3, UChar i4) + return "vsldb"; + } + ++static const HChar * ++s390_irgen_VSLD(UChar v1, UChar v2, UChar v3, UChar i4) ++{ ++ s390_insn_assert("vsld", i4 <= 7); ++ ++ if (i4 == 0) { ++ /* Just copy v2. */ ++ put_vr_qw(v1, get_vr_qw(v2)); ++ } else { ++ /* Concatenate v2's tail with v3's head. */ ++ put_vr_qw(v1, ++ binop(Iop_OrV128, ++ binop(Iop_ShlV128, get_vr_qw(v2), mkU8(i4)), ++ binop(Iop_ShrV128, get_vr_qw(v3), mkU8(128 - i4)) ++ ) ++ ); ++ } ++ ++ return "vsld"; ++} ++ ++static const HChar * ++s390_irgen_VSRD(UChar v1, UChar v2, UChar v3, UChar i4) ++{ ++ s390_insn_assert("vsrd", i4 <= 7); ++ ++ if (i4 == 0) { ++ /* Just copy v3. */ ++ put_vr_qw(v1, get_vr_qw(v3)); ++ } else { ++ /* Concatenate v2's tail with v3's head. */ ++ put_vr_qw(v1, ++ binop(Iop_OrV128, ++ binop(Iop_ShlV128, get_vr_qw(v2), mkU8(128 - i4)), ++ binop(Iop_ShrV128, get_vr_qw(v3), mkU8(i4)) ++ ) ++ ); ++ } ++ ++ return "vsrd"; ++} ++ + static const HChar * + s390_irgen_VMO(UChar v1, UChar v2, UChar v3, UChar m4) + { +@@ -21541,6 +21583,14 @@ s390_decode_6byte_and_irgen(const UChar *bytes) + case 0xe70000000085ULL: s390_format_VRR_VVV(s390_irgen_VBPERM, VRR_v1(ovl), + VRR_v2(ovl), VRR_r3(ovl), + VRR_rxb(ovl)); goto ok; ++ case 0xe70000000086ULL: s390_format_VRId_VVVI(s390_irgen_VSLD, VRId_v1(ovl), ++ VRId_v2(ovl), VRId_v3(ovl), ++ VRId_i4(ovl), ++ VRId_rxb(ovl)); goto ok; ++ case 0xe70000000087ULL: s390_format_VRId_VVVI(s390_irgen_VSRD, VRId_v1(ovl), ++ VRId_v2(ovl), VRId_v3(ovl), ++ VRId_i4(ovl), ++ VRId_rxb(ovl)); goto ok; + case 0xe7000000008aULL: s390_format_VRR_VVVVMM(s390_irgen_VSTRC, VRRd_v1(ovl), + VRRd_v2(ovl), VRRd_v3(ovl), + VRRd_v4(ovl), VRRd_m5(ovl), +-- +2.23.0 + +From 388082bca7146f8a15814798dbfe570af2aab2a9 Mon Sep 17 00:00:00 2001 +From: Andreas Arnez +Date: Wed, 10 Mar 2021 19:22:51 +0100 +Subject: [PATCH 10/13] s390x: Vec-enh-2, VSTRS + +Support the new "vector string search" instruction VSTRS. The +implementation is a full emulation and follows a similar approach as for +the other vector string instructions. +--- + VEX/priv/guest_s390_toIR.c | 104 +++++++++++++++++++++++++++++++++++++ + 1 file changed, 104 insertions(+) + +diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c +index aa429d085..46a867475 100644 +--- a/VEX/priv/guest_s390_toIR.c ++++ b/VEX/priv/guest_s390_toIR.c +@@ -17601,6 +17601,105 @@ s390_irgen_VSTRC(UChar v1, UChar v2, UChar v3, UChar v4, UChar m5, UChar m6) + return "vstrc"; + } + ++static const HChar * ++s390_irgen_VSTRS(UChar v1, UChar v2, UChar v3, UChar v4, UChar m5, UChar m6) ++{ ++ s390_insn_assert("vstrs", m5 <= 2 && m6 == (m6 & 2)); ++ ++ IRTemp op2 = newTemp(Ity_V128); ++ IRTemp op3 = newTemp(Ity_V128); ++ IRTemp op4 = newTemp(Ity_I8); ++ IRTemp op2clean = newTemp(Ity_V128); ++ IRTemp op3mask = newTemp(Ity_V128); ++ IRTemp result = newTemp(Ity_V128); ++ IRTemp ccnomatch = newTemp(Ity_I64); ++ IRExpr* tmp; ++ IRExpr* match = NULL; ++ UChar elem_bits = 8 << m5; ++ IROp cmpeq_op = S390_VEC_OP3(m5, Iop_CmpEQ8x16, ++ Iop_CmpEQ16x8, Iop_CmpEQ32x4); ++ ++ assign(op2, get_vr_qw(v2)); ++ assign(op3, get_vr_qw(v3)); ++ assign(op4, get_vr_b7(v4)); ++ ++ tmp = unop(Iop_Dup32x4, ++ unop(Iop_1Sto32, binop(Iop_CmpNE8, mkexpr(op4), mkU8(16)))); ++ tmp = binop(Iop_ShrV128, tmp, binop(Iop_Shl8, mkexpr(op4), mkU8(3))); ++ ++ if (s390_vr_is_zs_set(m6)) { ++ IRTemp op2eos = newTemp(Ity_V128); ++ IRExpr* t; ++ t = binop(cmpeq_op, mkexpr(op2), mkV128(0)); ++ for (UChar i = m5; i < 4; i++) { ++ IRTemp s = newTemp(Ity_V128); ++ assign(s, t); ++ t = binop(Iop_OrV128, mkexpr(s), binop(Iop_ShrV128, mkexpr(s), ++ mkU8(8 << i))); ++ } ++ assign(op2eos, t); ++ assign(op2clean, binop(Iop_AndV128, mkexpr(op2), ++ unop(Iop_NotV128, mkexpr(op2eos)))); ++ assign(ccnomatch, binop(Iop_And64, mkU64(1), ++ unop(Iop_V128to64, mkexpr(op2eos)))); ++ ++ t = binop(cmpeq_op, mkexpr(op3), mkV128(0)); ++ for (UChar i = m5; i < 4; i++) { ++ IRTemp s = newTemp(Ity_V128); ++ assign(s, t); ++ t = binop(Iop_OrV128, mkexpr(s), binop(Iop_ShrV128, mkexpr(s), ++ mkU8(8 << i))); ++ } ++ tmp = binop(Iop_OrV128, tmp, t); ++ } else { ++ assign(op2clean, mkexpr(op2)); ++ } ++ assign(op3mask, unop(Iop_NotV128, tmp)); ++ ++ for (UChar shift = 0; shift < 128; shift += elem_bits) { ++ IRTemp s = newTemp(Ity_V128); ++ tmp = unop(Iop_NotV128, ++ binop(cmpeq_op, mkexpr(op2clean), ++ binop(Iop_ShrV128, mkexpr(op3), mkU8(shift)))); ++ assign(s, binop(Iop_CmpEQ64x2, mkV128(0), ++ binop(Iop_AndV128, mkexpr(op3mask), ++ binop(Iop_ShlV128, tmp, mkU8(shift))))); ++ tmp = mkexpr(s); ++ if (shift < 64) { ++ tmp = binop(Iop_AndV128, tmp, ++ unop(Iop_Dup16x8, binop(Iop_GetElem16x8, tmp, mkU8(4)))); ++ } ++ tmp = binop(Iop_AndV128, tmp, ++ unop(Iop_Dup16x8, mkU16(1 << (15 - shift / 8)))); ++ if (shift) ++ match = binop(Iop_OrV128, mkexpr(mktemp(Ity_V128, match)), tmp); ++ else ++ match = tmp; ++ } ++ assign(result, unop(Iop_ClzNat64, ++ binop(Iop_Or64, ++ unop(Iop_V128HIto64, match), ++ mkU64((1UL << 48) - 1)))); ++ put_vr_qw(v1, binop(Iop_64HLtoV128, mkexpr(result), mkU64(0))); ++ ++ /* Set condition code. ++ 0: no match, no string terminator in op2 ++ 1: no match, string terminator found ++ 2: full match ++ 3: partial match */ ++ IRTemp cc = newTemp(Ity_I64); ++ tmp = binop(Iop_CmpLE64U, ++ binop(Iop_Add64, mkexpr(result), unop(Iop_8Uto64, mkexpr(op4))), ++ mkU64(16)); ++ assign(cc, mkite(binop(Iop_CmpEQ64, mkexpr(result), mkU64(16)), ++ s390_vr_is_zs_set(m6) ? mkexpr(ccnomatch) : mkU64(0), ++ mkite(tmp, mkU64(2), mkU64(3)))); ++ s390_cc_set(cc); ++ ++ dis_res->hint = Dis_HintVerbose; ++ return "vstrs"; ++} ++ + static const HChar * + s390_irgen_VNC(UChar v1, UChar v2, UChar v3) + { +@@ -21596,6 +21695,11 @@ s390_decode_6byte_and_irgen(const UChar *bytes) + VRRd_v4(ovl), VRRd_m5(ovl), + VRRd_m6(ovl), + VRRd_rxb(ovl)); goto ok; ++ case 0xe7000000008bULL: s390_format_VRR_VVVVMM(s390_irgen_VSTRS, VRRd_v1(ovl), ++ VRRd_v2(ovl), VRRd_v3(ovl), ++ VRRd_v4(ovl), VRRd_m5(ovl), ++ VRRd_m6(ovl), ++ VRRd_rxb(ovl)); goto ok; + case 0xe7000000008cULL: s390_format_VRR_VVVV(s390_irgen_VPERM, VRR_v1(ovl), + VRR_v2(ovl), VRR_r3(ovl), + VRR_m4(ovl), VRR_rxb(ovl)); goto ok; +-- +2.23.0 + +From 8a079b405467fa127c6c311d7ae3c649e76106c6 Mon Sep 17 00:00:00 2001 +From: Andreas Arnez +Date: Tue, 16 Feb 2021 17:52:09 +0100 +Subject: [PATCH 11/13] s390x: Mark arch13 features as supported + +Make the STFLE instruction report the miscellaneous-instruction-extensions +facility 3 and the vector-enhancements facility 2 as supported. Indicate +support for the latter in the HWCAP vector as well. +--- + VEX/priv/guest_s390_helpers.c | 9 +++------ + coregrind/m_initimg/initimg-linux.c | 3 ++- + include/vki/vki-s390x-linux.h | 1 + + 3 files changed, 6 insertions(+), 7 deletions(-) + +diff --git a/VEX/priv/guest_s390_helpers.c b/VEX/priv/guest_s390_helpers.c +index 1e04f601a..804b92a29 100644 +--- a/VEX/priv/guest_s390_helpers.c ++++ b/VEX/priv/guest_s390_helpers.c +@@ -356,9 +356,7 @@ s390x_dirtyhelper_STFLE(VexGuestS390XState *guest_state, ULong *addr) + | s390_stfle_range(51, 55) + /* 56: unassigned */ + /* 57: MSA5, not supported */ +- | s390_stfle_range(58, 60) +- /* 61: miscellaneous-instruction 3, not supported */ +- | s390_stfle_range(62, 63)), ++ | s390_stfle_range(58, 63)), + + /* === 64 .. 127 === */ + (s390_stfle_range(64, 72) +@@ -384,11 +382,10 @@ s390x_dirtyhelper_STFLE(VexGuestS390XState *guest_state, ULong *addr) + /* 143: unassigned */ + | s390_stfle_range(144, 145) + /* 146: MSA8, not supported */ +- | s390_stfle_range(147, 147) +- /* 148: vector-enhancements 2, not supported */ +- | s390_stfle_range(149, 149) ++ | s390_stfle_range(147, 149) + /* 150: unassigned */ + /* 151: DEFLATE-conversion, not supported */ ++ /* 152: vector packed decimal enhancement, not supported */ + /* 153: unassigned */ + /* 154: unassigned */ + /* 155: MSA9, not supported */ +diff --git a/coregrind/m_initimg/initimg-linux.c b/coregrind/m_initimg/initimg-linux.c +index fc1a32ecf..37d005168 100644 +--- a/coregrind/m_initimg/initimg-linux.c ++++ b/coregrind/m_initimg/initimg-linux.c +@@ -703,7 +703,8 @@ Addr setup_client_stack( void* init_sp, + itself, is not supported by Valgrind. */ + auxv->u.a_val &= ((VKI_HWCAP_S390_TE - 1) + | VKI_HWCAP_S390_VXRS +- | VKI_HWCAP_S390_VXRS_EXT); ++ | VKI_HWCAP_S390_VXRS_EXT ++ | VKI_HWCAP_S390_VXRS_EXT2); + } + # elif defined(VGP_arm64_linux) + { +diff --git a/include/vki/vki-s390x-linux.h b/include/vki/vki-s390x-linux.h +index 4ab2d3334..71b363029 100644 +--- a/include/vki/vki-s390x-linux.h ++++ b/include/vki/vki-s390x-linux.h +@@ -807,6 +807,7 @@ typedef vki_s390_regs vki_elf_gregset_t; + #define VKI_HWCAP_S390_TE 1024 + #define VKI_HWCAP_S390_VXRS 2048 + #define VKI_HWCAP_S390_VXRS_EXT 8192 ++#define VKI_HWCAP_S390_VXRS_EXT2 32768 + + + //---------------------------------------------------------------------- +-- +2.23.0 + +From 1461d9b8d0b12e55b648fbf50c5dcee30785afa2 Mon Sep 17 00:00:00 2001 +From: Andreas Arnez +Date: Mon, 17 May 2021 15:34:15 +0200 +Subject: [PATCH 12/13] s390x: Vec-enh-2, test cases + +Add test cases for verifying the new/enhanced instructions in the +vector-enhancements facility 2. For "vector string search" VSTRS add a +memcheck test case. +--- + .gitignore | 2 + + memcheck/tests/s390x/Makefile.am | 3 +- + memcheck/tests/s390x/vstrs.c | 68 ++++++ + memcheck/tests/s390x/vstrs.stderr.exp | 16 ++ + memcheck/tests/s390x/vstrs.stdout.exp | 0 + memcheck/tests/s390x/vstrs.vgtest | 2 + + none/tests/s390x/Makefile.am | 3 +- + none/tests/s390x/vec2.c | 314 ++++++++++++++++++++++++++ + none/tests/s390x/vec2.stderr.exp | 2 + + none/tests/s390x/vec2.stdout.exp | 168 ++++++++++++++ + none/tests/s390x/vec2.vgtest | 2 + + tests/s390x_features.c | 4 + + 12 files changed, 582 insertions(+), 2 deletions(-) + create mode 100644 memcheck/tests/s390x/vstrs.c + create mode 100644 memcheck/tests/s390x/vstrs.stderr.exp + create mode 100644 memcheck/tests/s390x/vstrs.stdout.exp + create mode 100644 memcheck/tests/s390x/vstrs.vgtest + create mode 100644 none/tests/s390x/vec2.c + create mode 100644 none/tests/s390x/vec2.stderr.exp + create mode 100644 none/tests/s390x/vec2.stdout.exp + create mode 100644 none/tests/s390x/vec2.vgtest + +diff --git a/memcheck/tests/s390x/Makefile.am b/memcheck/tests/s390x/Makefile.am +index d183841ef..668fd9933 100644 +--- a/memcheck/tests/s390x/Makefile.am ++++ b/memcheck/tests/s390x/Makefile.am +@@ -2,7 +2,7 @@ include $(top_srcdir)/Makefile.tool-tests.am + + dist_noinst_SCRIPTS = filter_stderr + +-INSN_TESTS = cdsg cu21 cu42 ltgjhe vstrc vfae vistr ++INSN_TESTS = cdsg cu21 cu42 ltgjhe vstrc vfae vistr vstrs + + check_PROGRAMS = $(INSN_TESTS) + +@@ -18,3 +18,4 @@ AM_CCASFLAGS += @FLAG_M64@ + vstrc_CFLAGS = $(AM_CFLAGS) -march=z13 + vfae_CFLAGS = $(AM_CFLAGS) -march=z13 + vistr_CFLAGS = $(AM_CFLAGS) -march=z13 ++vstrs_CFLAGS = $(AM_CFLAGS) -march=z13 +diff --git a/memcheck/tests/s390x/vstrs.c b/memcheck/tests/s390x/vstrs.c +new file mode 100644 +index 000000000..3354c2e53 +--- /dev/null ++++ b/memcheck/tests/s390x/vstrs.c +@@ -0,0 +1,68 @@ ++#include ++#include ++ ++#define VECTOR __attribute__ ((vector_size (16))) ++ ++typedef char VECTOR char_v; ++ ++volatile char tmp; ++static const char *hex_digit = "0123456789abcdefGHIJKLMNOPQRSTUV"; ++ ++static char_v to_char_vec(const char *str) ++{ ++ char buf[17]; ++ char_v v; ++ ++ for (int i = 0; i < sizeof(buf); i++) { ++ char ch = str[i]; ++ if (ch == '\0') ++ break; ++ else if (ch == '$') ++ buf[i] = '\0'; ++ else if (ch != '~') ++ buf[i] = ch; ++ } ++ v = *(char_v *) buf; ++ return v; ++} ++ ++static void test_vstrs_char(const char *haystack, const char *needle, ++ int expect_res, int expect_cc) ++{ ++ int cc; ++ char_v v2val = to_char_vec(haystack); ++ char_v v3val = to_char_vec(needle); ++ ++ register unsigned long VECTOR v4 __asm__("v4") = { strlen(needle), 0 }; ++ register char_v v1 __asm__("v1"); ++ register char_v v2 __asm__("v2") = v2val; ++ register char_v v3 __asm__("v3") = v3val; ++ ++ __asm__( ++ "cr 0,0\n\t" /* Clear CC */ ++ ".short 0xe712,0x3020,0x408b\n\t" /* vstrs %v1,%v2,%v3,%v4,0,2 */ ++ "ipm %[cc]\n\t" ++ "srl %[cc],28" ++ : "=v" (v1), [cc] "=d" (cc) ++ : "v" (v2), "v" (v3), "v" (v4) ++ : "cc"); ++ ++ tmp = hex_digit[v1[7] & 0x1f]; ++ if (expect_res >= 0 && v1[7] != expect_res) ++ printf("result %u != %d\n", v1[7], expect_res); ++ ++ tmp = hex_digit[cc & 0xf]; ++ if (expect_cc >= 0 && cc != expect_cc) ++ printf("CC %d != %d\n", cc, expect_cc); ++} ++ ++int main() ++{ ++ test_vstrs_char("haystack$needle", "needle$haystack", 16, 1); ++ test_vstrs_char("haystack, needle", "needle, haystack", 10, 3); ++ test_vstrs_char("ABCDEFGH", "DEFGHI", -1, -1); ++ test_vstrs_char("match in UNDEF", "UN", 9, 2); ++ test_vstrs_char("after ~ UNDEF", "DEF", -1, -1); ++ test_vstrs_char("", "", 0, 2); ++ return 0; ++} +diff --git a/memcheck/tests/s390x/vstrs.stderr.exp b/memcheck/tests/s390x/vstrs.stderr.exp +new file mode 100644 +index 000000000..c5c3ef705 +--- /dev/null ++++ b/memcheck/tests/s390x/vstrs.stderr.exp +@@ -0,0 +1,16 @@ ++Use of uninitialised value of size 8 ++ at 0x........: test_vstrs_char (vstrs.c:50) ++ by 0x........: main (vstrs.c:63) ++ ++Use of uninitialised value of size 8 ++ at 0x........: test_vstrs_char (vstrs.c:54) ++ by 0x........: main (vstrs.c:63) ++ ++Use of uninitialised value of size 8 ++ at 0x........: test_vstrs_char (vstrs.c:50) ++ by 0x........: main (vstrs.c:65) ++ ++Use of uninitialised value of size 8 ++ at 0x........: test_vstrs_char (vstrs.c:54) ++ by 0x........: main (vstrs.c:65) ++ +diff --git a/memcheck/tests/s390x/vstrs.stdout.exp b/memcheck/tests/s390x/vstrs.stdout.exp +new file mode 100644 +index 000000000..e69de29bb +diff --git a/memcheck/tests/s390x/vstrs.vgtest b/memcheck/tests/s390x/vstrs.vgtest +new file mode 100644 +index 000000000..fd2a29873 +--- /dev/null ++++ b/memcheck/tests/s390x/vstrs.vgtest +@@ -0,0 +1,2 @@ ++prog: vstrs ++vgopts: -q +diff --git a/none/tests/s390x/Makefile.am b/none/tests/s390x/Makefile.am +index 2fd45ec1e..ca38db935 100644 +--- a/none/tests/s390x/Makefile.am ++++ b/none/tests/s390x/Makefile.am +@@ -20,7 +20,7 @@ INSN_TESTS = clc clcle cvb cvd icm lpr tcxb lam_stam xc mvst add sub mul \ + spechelper-icm-1 spechelper-icm-2 spechelper-tmll \ + spechelper-tm laa vector lsc2 ppno vector_string vector_integer \ + vector_float add-z14 sub-z14 mul-z14 bic \ +- misc3 ++ misc3 vec2 + + if BUILD_DFP_TESTS + INSN_TESTS += dfp-1 dfp-2 dfp-3 dfp-4 dfptest dfpext dfpconv srnmt pfpo +@@ -74,3 +74,4 @@ lsc2_CFLAGS = -march=z13 -DS390_TESTS_NOCOLOR + vector_string_CFLAGS = $(AM_CFLAGS) -march=z13 -DS390_TEST_COUNT=5 + vector_integer_CFLAGS = $(AM_CFLAGS) -march=z13 -DS390_TEST_COUNT=4 + vector_float_CFLAGS = $(AM_CFLAGS) -march=z13 -DS390_TEST_COUNT=4 ++vec2_CFLAGS = $(AM_CFLAGS) -march=z13 +diff --git a/none/tests/s390x/vec2.c b/none/tests/s390x/vec2.c +new file mode 100644 +index 000000000..73b04dee4 +--- /dev/null ++++ b/none/tests/s390x/vec2.c +@@ -0,0 +1,314 @@ ++#include ++ ++#define VECTOR __attribute__ ((vector_size (16))) ++ ++typedef unsigned long VECTOR ulong_v; ++typedef float VECTOR float_v; ++ ++static const ulong_v vec_a = { 0x0123456789abcdef, 0xfedcba9876543210 }; ++static const ulong_v vec_b = { 0xfedcba9876543210, 0x0123456789abcdef }; ++static const ulong_v vec_c = { 0x8040201008040201, 0x7fbfdfeff7fbfdfe }; ++static const ulong_v vec_one = { -1, -1 }; ++static const ulong_v vec_ini = { 0x0112233445566778, 0x899aabbccddeeff0 }; ++ ++static const float_v vec_fa = { 16777215., -16777215., 42.5, 10000. }; ++static const float_v vec_fb = { 4., 3., 2., 1. }; ++ ++/* -- Vector shift -- */ ++ ++#define TEST_GENERATE(insn) \ ++ static void test_##insn(ulong_v a, ulong_v b) \ ++ { \ ++ ulong_v out; \ ++ __asm__( \ ++ #insn " %[out],%[a],%[b]" \ ++ : [out] "=v" (out) \ ++ : [a] "v" (a), \ ++ [b] "v" (b) \ ++ : ); \ ++ printf("\t%016lx %016lx\n", out[0], out[1]); \ ++ } ++ ++#define TEST_EXEC(insn) \ ++ do { \ ++ puts(#insn); \ ++ test_##insn(vec_a, vec_b); \ ++ test_##insn(vec_b, vec_a); \ ++ test_##insn(vec_c, vec_a); \ ++ test_##insn(vec_one, vec_b); \ ++ } while (0) ++ ++#define INSNS \ ++ XTEST(vsl); \ ++ XTEST(vsrl); \ ++ XTEST(vsra); ++ ++#define XTEST TEST_GENERATE ++INSNS ++#undef XTEST ++ ++static void test_all_single_bitshifts() ++{ ++#define XTEST TEST_EXEC ++ INSNS ++#undef XTEST ++} ++#undef INSNS ++#undef TEST_EXEC ++#undef TEST_GENERATE ++ ++/* -- Vector load element-/byte-swapped -- */ ++ ++#define TEST_EXEC(opc1,opc2,insn,m3) \ ++ do { \ ++ puts(#insn " " #m3); \ ++ test_##insn##_##m3(vec_a); \ ++ test_##insn##_##m3(vec_b); \ ++ } while (0) ++ ++#define TEST_GENERATE(opc1,opc2,insn,m3) \ ++ static void test_##insn##_##m3(ulong_v a) \ ++ { \ ++ ulong_v out = vec_ini; \ ++ __asm__( \ ++ ".insn vrx,0x" #opc1 "00000000" #opc2 ",%[out],%[a]," #m3 \ ++ : [out] "+v" (out) \ ++ : [a] "R" (a) \ ++ : ); \ ++ printf("\t%016lx %016lx\n", out[0], out[1]); \ ++ } ++ ++#define INSNS \ ++ XTEST(e6,01, vlebrh, 0); \ ++ XTEST(e6,01, vlebrh, 7); \ ++ XTEST(e6,01, vlebrh, 2); \ ++ XTEST(e6,03, vlebrf, 0); \ ++ XTEST(e6,03, vlebrf, 3); \ ++ XTEST(e6,03, vlebrf, 1); \ ++ XTEST(e6,02, vlebrg, 0); \ ++ XTEST(e6,02, vlebrg, 1); \ ++ XTEST(e6,04, vllebrz, 1); \ ++ XTEST(e6,04, vllebrz, 2); \ ++ XTEST(e6,04, vllebrz, 3); \ ++ XTEST(e6,04, vllebrz, 6); \ ++ XTEST(e6,05, vlbrrep, 1); \ ++ XTEST(e6,05, vlbrrep, 2); \ ++ XTEST(e6,05, vlbrrep, 3); \ ++ XTEST(e6,06, vlbr, 1); \ ++ XTEST(e6,06, vlbr, 2); \ ++ XTEST(e6,06, vlbr, 3); \ ++ XTEST(e6,06, vlbr, 4); \ ++ XTEST(e6,07, vler, 1); \ ++ XTEST(e6,07, vler, 2); \ ++ XTEST(e6,07, vler, 3); ++ ++#define XTEST TEST_GENERATE ++INSNS ++#undef XTEST ++ ++static void test_all_swapped_loads() ++{ ++#define XTEST TEST_EXEC ++ INSNS ++#undef XTEST ++} ++ ++#undef INSNS ++#undef TEST_GENERATE ++ ++/* -- Vector store element-/byte-swapped -- */ ++ ++#define TEST_GENERATE(opc1,opc2,insn,m3) \ ++ static void test_##insn##_##m3(ulong_v a) \ ++ { \ ++ ulong_v out = vec_ini; \ ++ __asm__( \ ++ ".insn vrx,0x" #opc1 "00000000" #opc2 ",%[a],%[out]," #m3 \ ++ : [out] "+R" (out) \ ++ : [a] "v" (a) \ ++ : ); \ ++ printf("\t%016lx %016lx\n", out[0], out[1]); \ ++ } ++ ++#define INSNS \ ++ XTEST(e6,09, vstebrh, 0); \ ++ XTEST(e6,09, vstebrh, 7); \ ++ XTEST(e6,09, vstebrh, 2); \ ++ XTEST(e6,0b, vstebrf, 0); \ ++ XTEST(e6,0b, vstebrf, 3); \ ++ XTEST(e6,0b, vstebrf, 1); \ ++ XTEST(e6,0a, vstebrg, 0); \ ++ XTEST(e6,0a, vstebrg, 1); \ ++ XTEST(e6,0e, vstbr, 1); \ ++ XTEST(e6,0e, vstbr, 2); \ ++ XTEST(e6,0e, vstbr, 3); \ ++ XTEST(e6,0e, vstbr, 4); \ ++ XTEST(e6,0f, vster, 1); \ ++ XTEST(e6,0f, vster, 2); \ ++ XTEST(e6,0f, vster, 3); ++ ++#define XTEST TEST_GENERATE ++INSNS ++#undef XTEST ++ ++static void test_all_swapped_stores() ++{ ++#define XTEST TEST_EXEC ++ INSNS ++#undef XTEST ++} ++ ++#undef INSNS ++#undef TEST_EXEC ++#undef TEST_GENERATE ++ ++/* -- Vector shift double by bit -- */ ++ ++#define TEST_GENERATE(opc1,opc2,insn,i4) \ ++ static void test_##insn##_##i4(ulong_v a, ulong_v b) \ ++ { \ ++ ulong_v out = vec_ini; \ ++ __asm__( \ ++ ".insn vrr,0x" #opc1 "00000000" #opc2 \ ++ ",%[out],%[a],%[b],0," #i4 ",0" \ ++ : [out] "+v" (out) \ ++ : [a] "v" (a), \ ++ [b] "v" (b) \ ++ : ); \ ++ printf("\t%016lx %016lx\n", out[0], out[1]); \ ++ } ++ ++#define TEST_EXEC(opc1,opc2,insn,i4) \ ++ do { \ ++ puts(#insn " " #i4); \ ++ test_##insn##_##i4(vec_a, vec_one); \ ++ test_##insn##_##i4(vec_b, vec_a); \ ++ } while (0) ++ ++#define INSNS \ ++ XTEST(e7,86,vsld,0); \ ++ XTEST(e7,86,vsld,7); \ ++ XTEST(e7,86,vsld,4); \ ++ XTEST(e7,87,vsrd,0); \ ++ XTEST(e7,87,vsrd,7); \ ++ XTEST(e7,87,vsrd,4); ++ ++#define XTEST TEST_GENERATE ++INSNS ++#undef XTEST ++ ++static void test_all_double_bitshifts() ++{ ++#define XTEST TEST_EXEC ++ INSNS ++#undef XTEST ++} ++ ++#undef INSNS ++#undef TEST_EXEC ++#undef TEST_GENERATE ++ ++/* -- Vector integer -> FP conversions -- */ ++ ++#define TEST_GENERATE(opc1,opc2,insn,m4) \ ++ static void test_##insn##_##m4(ulong_v a) \ ++ { \ ++ float_v out; \ ++ __asm__( \ ++ ".insn vrr,0x" #opc1 "00000000" #opc2 \ ++ ",%[out],%[a],0,2," #m4 ",0" \ ++ : [out] "=v" (out) \ ++ : [a] "v" (a) \ ++ : ); \ ++ if (m4 & 8) \ ++ printf("\t%a - - -\n", out[0]); \ ++ else \ ++ printf("\t%a %a %a %a\n", out[0], out[1], out[2], out[3]); \ ++ } ++ ++#define TEST_EXEC(opc1,opc2,insn,m4) \ ++ do { \ ++ puts(#insn " " #m4); \ ++ test_##insn##_##m4(vec_a); \ ++ test_##insn##_##m4(vec_c); \ ++ } while (0) ++ ++#define INSNS \ ++ XTEST(e7,c1,vcfpl,0); \ ++ XTEST(e7,c1,vcfpl,8); \ ++ XTEST(e7,c3,vcfps,0); \ ++ XTEST(e7,c3,vcfps,8); ++ ++#define XTEST TEST_GENERATE ++INSNS ++#undef XTEST ++ ++static void test_all_int_fp_conversions() ++{ ++#define XTEST TEST_EXEC ++ INSNS ++#undef XTEST ++} ++ ++#undef INSNS ++#undef TEST_EXEC ++#undef TEST_GENERATE ++ ++/* -- Vector FP -> integer conversions -- */ ++ ++#define TEST_GENERATE(opc1,opc2,insn,m4) \ ++ static void test_##insn##_##m4(float_v a) \ ++ { \ ++ unsigned int VECTOR out; \ ++ __asm__( \ ++ ".insn vrr,0x" #opc1 "00000000" #opc2 \ ++ ",%[out],%[a],0,2," #m4 ",0" \ ++ : [out] "=v" (out) \ ++ : [a] "v" (a) \ ++ : ); \ ++ if (m4 & 8) \ ++ printf("\t%08x - - -\n", out[0]); \ ++ else \ ++ printf("\t%08x %08x %08x %08x\n", \ ++ out[0], out[1], out[2], out[3]); \ ++ } ++ ++#define TEST_EXEC(opc1,opc2,insn,m4) \ ++ do { \ ++ puts(#insn " " #m4); \ ++ test_##insn##_##m4(vec_fa); \ ++ test_##insn##_##m4(vec_fb); \ ++ } while (0) ++ ++#define INSNS \ ++ XTEST(e7,c0,vclfp,0); \ ++ XTEST(e7,c0,vclfp,8); \ ++ XTEST(e7,c2,vcsfp,0); \ ++ XTEST(e7,c2,vcsfp,8); ++ ++#define XTEST TEST_GENERATE ++INSNS ++#undef XTEST ++ ++static void test_all_fp_int_conversions() ++{ ++#define XTEST TEST_EXEC ++ INSNS ++#undef XTEST ++} ++ ++#undef INSNS ++#undef TEST_EXEC ++#undef TEST_GENERATE ++ ++ ++int main() ++{ ++ test_all_single_bitshifts(); ++ test_all_swapped_loads(); ++ test_all_swapped_stores(); ++ test_all_double_bitshifts(); ++ test_all_int_fp_conversions(); ++ test_all_fp_int_conversions(); ++ return 0; ++} +diff --git a/none/tests/s390x/vec2.stderr.exp b/none/tests/s390x/vec2.stderr.exp +new file mode 100644 +index 000000000..139597f9c +--- /dev/null ++++ b/none/tests/s390x/vec2.stderr.exp +@@ -0,0 +1,2 @@ ++ ++ +diff --git a/none/tests/s390x/vec2.stdout.exp b/none/tests/s390x/vec2.stdout.exp +new file mode 100644 +index 000000000..b32cbe1bc +--- /dev/null ++++ b/none/tests/s390x/vec2.stdout.exp +@@ -0,0 +1,168 @@ ++vsl ++ 483415676abc37ef fde5533beca14200 ++ fde5533beca14200 483415676abc37ef ++ 00010204102040bf effd7feffebff7fe ++ ffffffffffffffff ffffffffffffff80 ++vsrl ++ 0012d1679e9af3ef ffdbe5753bcaa164 ++ 7fdbe5753bcaa164 4012d1679e9af3ef ++ 4008014004002004 05fbf7efbf7ffffe ++ 03ffffffffffffff ffffffffffffffff ++vsra ++ 0012d1679e9af3ef ffdbe5753bcaa164 ++ ffdbe5753bcaa164 4012d1679e9af3ef ++ c008014004002004 05fbf7efbf7ffffe ++ ffffffffffffffff ffffffffffffffff ++vlebrh 0 ++ 2301233445566778 899aabbccddeeff0 ++ dcfe233445566778 899aabbccddeeff0 ++vlebrh 7 ++ 0112233445566778 899aabbccdde2301 ++ 0112233445566778 899aabbccddedcfe ++vlebrh 2 ++ 0112233423016778 899aabbccddeeff0 ++ 01122334dcfe6778 899aabbccddeeff0 ++vlebrf 0 ++ 6745230145566778 899aabbccddeeff0 ++ 98badcfe45566778 899aabbccddeeff0 ++vlebrf 3 ++ 0112233445566778 899aabbc67452301 ++ 0112233445566778 899aabbc98badcfe ++vlebrf 1 ++ 0112233467452301 899aabbccddeeff0 ++ 0112233498badcfe 899aabbccddeeff0 ++vlebrg 0 ++ efcdab8967452301 899aabbccddeeff0 ++ 1032547698badcfe 899aabbccddeeff0 ++vlebrg 1 ++ 0112233445566778 efcdab8967452301 ++ 0112233445566778 1032547698badcfe ++vllebrz 1 ++ 0000000000002301 0000000000000000 ++ 000000000000dcfe 0000000000000000 ++vllebrz 2 ++ 0000000067452301 0000000000000000 ++ 0000000098badcfe 0000000000000000 ++vllebrz 3 ++ efcdab8967452301 0000000000000000 ++ 1032547698badcfe 0000000000000000 ++vllebrz 6 ++ 6745230100000000 0000000000000000 ++ 98badcfe00000000 0000000000000000 ++vlbrrep 1 ++ 2301230123012301 2301230123012301 ++ dcfedcfedcfedcfe dcfedcfedcfedcfe ++vlbrrep 2 ++ 6745230167452301 6745230167452301 ++ 98badcfe98badcfe 98badcfe98badcfe ++vlbrrep 3 ++ efcdab8967452301 efcdab8967452301 ++ 1032547698badcfe 1032547698badcfe ++vlbr 1 ++ 23016745ab89efcd dcfe98ba54761032 ++ dcfe98ba54761032 23016745ab89efcd ++vlbr 2 ++ 67452301efcdab89 98badcfe10325476 ++ 98badcfe10325476 67452301efcdab89 ++vlbr 3 ++ efcdab8967452301 1032547698badcfe ++ 1032547698badcfe efcdab8967452301 ++vlbr 4 ++ 1032547698badcfe efcdab8967452301 ++ efcdab8967452301 1032547698badcfe ++vler 1 ++ 32107654ba98fedc cdef89ab45670123 ++ cdef89ab45670123 32107654ba98fedc ++vler 2 ++ 76543210fedcba98 89abcdef01234567 ++ 89abcdef01234567 76543210fedcba98 ++vler 3 ++ fedcba9876543210 0123456789abcdef ++ 0123456789abcdef fedcba9876543210 ++vstebrh 0 ++ 2301233445566778 899aabbccddeeff0 ++ dcfe233445566778 899aabbccddeeff0 ++vstebrh 7 ++ 1032233445566778 899aabbccddeeff0 ++ efcd233445566778 899aabbccddeeff0 ++vstebrh 2 ++ ab89233445566778 899aabbccddeeff0 ++ 5476233445566778 899aabbccddeeff0 ++vstebrf 0 ++ 6745230145566778 899aabbccddeeff0 ++ 98badcfe45566778 899aabbccddeeff0 ++vstebrf 3 ++ 1032547645566778 899aabbccddeeff0 ++ efcdab8945566778 899aabbccddeeff0 ++vstebrf 1 ++ efcdab8945566778 899aabbccddeeff0 ++ 1032547645566778 899aabbccddeeff0 ++vstebrg 0 ++ efcdab8967452301 899aabbccddeeff0 ++ 1032547698badcfe 899aabbccddeeff0 ++vstebrg 1 ++ 1032547698badcfe 899aabbccddeeff0 ++ efcdab8967452301 899aabbccddeeff0 ++vstbr 1 ++ 23016745ab89efcd dcfe98ba54761032 ++ dcfe98ba54761032 23016745ab89efcd ++vstbr 2 ++ 67452301efcdab89 98badcfe10325476 ++ 98badcfe10325476 67452301efcdab89 ++vstbr 3 ++ efcdab8967452301 1032547698badcfe ++ 1032547698badcfe efcdab8967452301 ++vstbr 4 ++ 1032547698badcfe efcdab8967452301 ++ efcdab8967452301 1032547698badcfe ++vster 1 ++ 32107654ba98fedc cdef89ab45670123 ++ cdef89ab45670123 32107654ba98fedc ++vster 2 ++ 76543210fedcba98 89abcdef01234567 ++ 89abcdef01234567 76543210fedcba98 ++vster 3 ++ fedcba9876543210 0123456789abcdef ++ 0123456789abcdef fedcba9876543210 ++vsld 0 ++ 0123456789abcdef fedcba9876543210 ++ fedcba9876543210 0123456789abcdef ++vsld 7 ++ 91a2b3c4d5e6f7ff 6e5d4c3b2a19087f ++ 6e5d4c3b2a190800 91a2b3c4d5e6f780 ++vsld 4 ++ 123456789abcdeff edcba9876543210f ++ edcba98765432100 123456789abcdef0 ++vsrd 0 ++ ffffffffffffffff ffffffffffffffff ++ 0123456789abcdef fedcba9876543210 ++vsrd 7 ++ 21ffffffffffffff ffffffffffffffff ++ de02468acf13579b dffdb97530eca864 ++vsrd 4 ++ 0fffffffffffffff ffffffffffffffff ++ f0123456789abcde ffedcba987654321 ++vcfpl 0 ++ 0x1.234568p+24 0x1.13579cp+31 0x1.fdb976p+31 0x1.d950c8p+30 ++ 0x1.00804p+31 0x1.00804p+27 0x1.feff8p+30 0x1.eff7fcp+31 ++vcfpl 8 ++ 0x1.234568p+24 - - - ++ 0x1.00804p+31 - - - ++vcfps 0 ++ 0x1.234568p+24 -0x1.d950c8p+30 -0x1.234568p+24 0x1.d950c8p+30 ++ -0x1.feff8p+30 0x1.00804p+27 0x1.feff8p+30 -0x1.00804p+27 ++vcfps 8 ++ 0x1.234568p+24 - - - ++ -0x1.feff8p+30 - - - ++vclfp 0 ++ 00ffffff 00000000 0000002a 00002710 ++ 00000004 00000003 00000002 00000001 ++vclfp 8 ++ 00ffffff - - - ++ 00000004 - - - ++vcsfp 0 ++ 00ffffff ff000001 0000002a 00002710 ++ 00000004 00000003 00000002 00000001 ++vcsfp 8 ++ 00ffffff - - - ++ 00000004 - - - +diff --git a/none/tests/s390x/vec2.vgtest b/none/tests/s390x/vec2.vgtest +new file mode 100644 +index 000000000..45e942e64 +--- /dev/null ++++ b/none/tests/s390x/vec2.vgtest +@@ -0,0 +1,2 @@ ++prog: vec2 ++prereq: test -e vec2 && ../../../tests/s390x_features s390x-vx +diff --git a/tests/s390x_features.c b/tests/s390x_features.c +index 25b98f3a3..e7939c463 100644 +--- a/tests/s390x_features.c ++++ b/tests/s390x_features.c +@@ -270,6 +270,10 @@ static int go(char *feature, char *cpu) + match = facilities[0] & FAC_BIT(57); /* message security assist 5 facility */ + } else if (strcmp(feature, "s390x-mi2") == 0 ) { + match = facilities[0] & FAC_BIT(58); ++ } else if (strcmp(feature, "s390x-mi3") == 0 ) { ++ match = facilities[0] & FAC_BIT(61); ++ } else if (strcmp(feature, "s390x-vx2") == 0 ) { ++ match = facilities[2] & FAC_BIT(20); + } else { + return 2; // Unrecognised feature. + } +-- +2.23.0 + +From d9364bc90ee894c43ee742840f806571edc08ab3 Mon Sep 17 00:00:00 2001 +From: Andreas Arnez +Date: Tue, 18 May 2021 19:59:32 +0200 +Subject: [PATCH 13/13] s390x: Wrap up misc-insn-3 and vec-enh-2 support + +Wrap up support for the miscellaneous-instruction-extensions facility 3 +and the vector-enhancements facility 2: Add 'case' statements for the +remaining unhandled arch13 instructions to 'guest_s390_toIR.c', document +the new support in 's390-opcodes.csv', adjust 's390-check-opcodes.pl', and +announce the new feature in 'NEWS'. +--- + NEWS | 5 ++ + VEX/priv/guest_s390_toIR.c | 5 +- + auxprogs/s390-check-opcodes.pl | 22 ++++++++- + docs/internals/s390-opcodes.csv | 81 +++++++++++++++++++++++++++++++-- + 4 files changed, 108 insertions(+), 5 deletions(-) + +diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c +index 46a867475..1bd18f760 100644 +--- a/VEX/priv/guest_s390_toIR.c ++++ b/VEX/priv/guest_s390_toIR.c +@@ -8,7 +8,7 @@ + This file is part of Valgrind, a dynamic binary instrumentation + framework. + +- Copyright IBM Corp. 2010-2020 ++ Copyright IBM Corp. 2010-2021 + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as +@@ -20503,6 +20503,9 @@ s390_decode_4byte_and_irgen(const UChar *bytes) + RRE_r2(ovl)); goto ok; + case 0xb931: s390_format_RRE_RR(s390_irgen_CLGFR, RRE_r1(ovl), + RRE_r2(ovl)); goto ok; ++ case 0xb938: /* SORTL */ goto unimplemented; ++ case 0xb939: /* DFLTCC */ goto unimplemented; ++ case 0xb93a: /* KDSA */ goto unimplemented; + case 0xb93c: s390_format_RRE_RR(s390_irgen_PPNO, RRE_r1(ovl), + RRE_r2(ovl)); goto ok; + case 0xb93e: /* KIMD */ goto unimplemented; +-- +2.23.0 + diff --git a/SOURCES/valgrind-3.17.0-s390_insn_as_string.patch b/SOURCES/valgrind-3.17.0-s390_insn_as_string.patch new file mode 100644 index 0000000..39c956c --- /dev/null +++ b/SOURCES/valgrind-3.17.0-s390_insn_as_string.patch @@ -0,0 +1,54 @@ +commit 45873298ff2d17accc65654d64758360616aade5 +Author: Andreas Arnez +Date: Tue Mar 30 18:10:43 2021 +0200 + + s390x: Add missing UNOP insns to s390_insn_as_string + + Some unary operator insns are not handled by s390_insn_as_string(). If + they are encountered while the appropriate trace flag is set, a vpanic + occurs. Fix this: add handling for the missing insns. + +diff --git a/VEX/priv/host_s390_defs.c b/VEX/priv/host_s390_defs.c +index 8762975b2..6e0734ae0 100644 +--- a/VEX/priv/host_s390_defs.c ++++ b/VEX/priv/host_s390_defs.c +@@ -7860,12 +7860,24 @@ s390_insn_as_string(const s390_insn *insn) + op = "v-vunpacku"; + break; + +- case S390_VEC_FLOAT_NEG: +- op = "v-vfloatneg"; ++ case S390_VEC_ABS: ++ op = "v-vabs"; + break; + +- case S390_VEC_FLOAT_SQRT: +- op = "v-vfloatsqrt"; ++ case S390_VEC_COUNT_LEADING_ZEROES: ++ op = "v-vclz"; ++ break; ++ ++ case S390_VEC_COUNT_TRAILING_ZEROES: ++ op = "v-vctz"; ++ break; ++ ++ case S390_VEC_COUNT_ONES: ++ op = "v-vpopct"; ++ break; ++ ++ case S390_VEC_FLOAT_NEG: ++ op = "v-vfloatneg"; + break; + + case S390_VEC_FLOAT_ABS: +@@ -7876,6 +7888,10 @@ s390_insn_as_string(const s390_insn *insn) + op = "v-vfloatnabs"; + break; + ++ case S390_VEC_FLOAT_SQRT: ++ op = "v-vfloatsqrt"; ++ break; ++ + default: + goto fail; + } diff --git a/SPECS/valgrind.spec b/SPECS/valgrind.spec index cf09408..f312a21 100644 --- a/SPECS/valgrind.spec +++ b/SPECS/valgrind.spec @@ -2,8 +2,8 @@ Summary: Tool for finding memory management bugs in programs Name: %{?scl_prefix}valgrind -Version: 3.16.0 -Release: 4%{?dist} +Version: 3.17.0 +Release: 5%{?dist} Epoch: 1 License: GPLv2+ URL: http://www.valgrind.org/ @@ -83,42 +83,74 @@ Patch2: valgrind-3.9.0-helgrind-race-supp.patch # Make ld.so supressions slightly less specific. Patch3: valgrind-3.9.0-ldso-supp.patch -# We want all executables and libraries in libexec instead of lib -# so they are only available for valgrind usage itself and so the -# same directory is used independent of arch. -Patch4: valgrind-3.16.0-pkglibexecdir.patch - # Add some stack-protector -Patch5: valgrind-3.16.0-some-stack-protector.patch +Patch4: valgrind-3.16.0-some-stack-protector.patch # Add some -Wl,z,now. -Patch6: valgrind-3.16.0-some-Wl-z-now.patch - -# KDE#422677 PPC sync instruction L field should only be 2 bits in ISA 3.0 -Patch7: valgrind-3.16.0-ppc-L-field.patch - -# KDE#422715 x86: vex: the `impossible' happened: expr_is_guardable -Patch8: valgrind-3.16.0-387-float.patch - -# KDE#422174 unhandled instruction bytes: 0x48 0xE9 (REX prefix JMP instr) -Patch9: valgrind-3.16.1-REX-prefix-JMP.patch - -# KDE#422623 epoll_ctl warns for uninit padding on non-amd64 64bit arches -Patch10: valgrind-3.16.1-epoll.patch - -# KDE#369029 handle linux syscalls sched_getattr and sched_setattr -Patch11: valgrind-3.16.1-sched_getsetattr.patch - -# KDE#415293 Incorrect call-graph tracking due to new _dl_runtime_resolve* -Patch12: valgrind-3.16.1-dl_runtime_resolve.patch - - -# KDE#428648 s390_emit_load_mem panics due to 20-bit offset for vector load -Patch15: valgrind-3.16.1-s390_emit_load_mem.patch - -# KDE#133812 s390x: z14 vector instructions not implemented -Patch16: valgrind-3.16.1-s390x-z14-vector.patch - +Patch5: valgrind-3.16.0-some-Wl-z-now.patch + +# Upstream commits that provide additional ppc64le ISA 3.1 support +# commit 3cc0232c46a5905b4a6c2fbd302b58bf5f90b3d5 +# PPC64: ISA 3.1 VSX PCV Generate Operations +# commit 078f89e99b6f62e043f6138c6a7ae238befc1f2a +# PPC64: Reduced-Precision bfloat16 Outer Product & Format Conversion Operations +# commit e09fdaf569b975717465ed8043820d0198d4d47d +# PPC64: Reduced-Precision: Missing Integer-based Outer Product Operations +Patch6: valgrind-3.17.0-ppc64-isa-3.1.patch + +# Upstream commits that provide extra tests for ppc64le ISA 3.1 support +# commit c8fa838be405d7ac43035dcf675bf490800c26ec +# Reduced Precision bfloat16 outer product tests +# commit 4bcc6c8a97c10c4dd41b35bd3b3035ec4037d524 +# VSX Permute Control Vector Generate Operation tests. +# commit c589b652939655090c005a982a71f50c489fb5ce +# Reduced precision Missing Integer based outer tests +Patch7: valgrind-3.17.0-ppc64-isa-3.1-tests.patch + +# commit 45873298ff2d17accc65654d64758360616aade5 +# s390x: Add missing UNOP insns to s390_insn_as_string +Patch8: valgrind-3.17.0-s390_insn_as_string.patch + +# KDE#435908 Don't look for separate debuginfo if image already has .debug_info +Patch9: valgrind-3.17.0-debuginfod.patch + +# KDE#423963 Only process clone results in the parent thread +Patch10: valgrind-3.17.0-clone-parent-res.patch + +# commit d74a637206ef5532ccd2ccb2e31ee2762f184e60 +# Bug 433863 - s390x: Remove memcheck test cases for cs, cds, and csg +# commit 18ddcc47c951427efd3b790ba2481159b9bd1598 +# s390x: Support "expensive" comparisons Iop_ExpCmpNE32/64 +# commit 5db3f929c43bf46f4707178706cfe90f43acdd19 +# s390x: Add convenience function mkV128() +# commit e78bd78d3043729033b426218ab8c6dae9c51e96 +# Bug 434296 - s390x: Rework IR conversion of VSTRC, VFAE, and VFEE +# commit 4f17a067c4f8245c05611d6e8aa36e8841bab376 +# Bug 434296 - s390x: Rework IR conversion of VFENE +# commit 9bd78ebd8bb5cd4ebb3f081ceba46836cc485551 +# Bug 434296 - s390x: Rework IR conversion of VISTR +# commit 32312d588b77c5b5b5a0145bb0cc6f795b447790 +# Bug 434296 - s390x: Add memcheck test cases for vector string insns +# commit a0bb049ace14ab52d386bb1d49a399f39eec4986 +# s390x: Improve handling of amodes without base register +# commit fd935e238d907d9c523a311ba795077d95ad6912 +# s390x: Rework insn "v-vdup" and add "v-vrep" +# commit 6c1cb1a0128b00858b973ef9344e12d6ddbaaf57 +# s390x: Add support for emitting "vector or with complement" +# commit 0bd4263326b2d48f782339a9bbe1a069c7de45c7 +# s390x: Fix/optimize Iop_64HLtoV128 +# commit cae5062b05b95e0303b1122a0ea9aadc197e4f0a +# s390x: Add missing stdout.exp for vector string memcheck test +Patch11: valgrind-3.17.0-s390-prep.patch + +# KDE#432387 - s390x: z15 instructions support +Patch12: valgrind-3.17.0-s390-z15.patch + +# commit 124ae6cfa303f0cc71ffd685620cb57c4f8f02bb +# s390x: Don't emit "vector or with complement" on z13 +Patch13: valgrind-3.17.0-s390-z13-vec-fix.patch + +BuildRequires: make BuildRequires: glibc-devel %if %{build_openmpi} @@ -148,6 +180,16 @@ BuildRequires: autoconf # For make check validating the documentation BuildRequires: docbook-dtds +# configure might use which +BuildRequires: which + +# For testing debuginfod-find +%if 0%{?fedora} > 29 || 0%{?rhel} > 7 +BuildRequires: elfutils-debuginfod-client +# For using debuginfod at runtime +Recommends: elfutils-debuginfod-client +%endif + %{?scl:Requires:%scl_runtime} # We need to fixup selinux file context when doing a scl build. @@ -245,23 +287,24 @@ Valgrind User Manual for details. %patch1 -p1 %patch2 -p1 %patch3 -p1 -%patch4 -p1 # Old rhel gcc doesn't have -fstack-protector-strong. %if 0%{?fedora} || 0%{?rhel} >= 7 +%patch4 -p1 %patch5 -p1 -%patch6 -p1 %endif +%patch6 -p1 %patch7 -p1 + %patch8 -p1 %patch9 -p1 %patch10 -p1 + %patch11 -p1 +touch memcheck/tests/s390x/vistr.stdout.exp %patch12 -p1 - -%patch15 -p1 -%patch16 -p1 +%patch13 -p1 %build @@ -450,6 +493,7 @@ echo ===============END TESTING=============== %{_includedir}/valgrind/drd.h %{_includedir}/valgrind/helgrind.h %{_includedir}/valgrind/memcheck.h +%{_includedir}/valgrind/dhat.h %{_libdir}/pkgconfig/valgrind.pc %if %{build_tools_devel} @@ -482,6 +526,18 @@ fi %endif %changelog +* Thu Jun 24 2021 Mark Wielaard - 3.17.0-5 +- Add valgrind-3.17.0-s390-prep.patch +- Add valgrind-3.17.0-s390-z15.patch +- Add valgrind-3.17.0-s390-z13-vec-fix.patch + +* Thu Jun 3 2021 Mark Wielaard - 3.17.0-4 +- Update to upstream 3.17.0 final. +- Add valgrind-3.17.0-ppc64-isa-3.1{,tests}.patch +- Add valgrind-3.17.0-s390_insn_as_string.patch +- Add valgrind-3.17.0-debuginfod.patch +- Add valgrind-3.17.0-clone-parent-res.patch + * Fri Dec 4 2020 Mark Wielaard - 3.16.0-4 - Add valgrind-3.16.1-s390_emit_load_mem.patch - Add valgrind-3.16.1-s390x-z14-vector.patch