From 37f9425f67cd9a8ac66f7f275e1915e6b40d01f9 Mon Sep 17 00:00:00 2001 From: CentOS Sources Date: May 18 2021 06:41:47 +0000 Subject: import valgrind-3.16.0-4.el8 --- diff --git a/SOURCES/valgrind-3.16.1-REX-prefix-JMP.patch b/SOURCES/valgrind-3.16.1-REX-prefix-JMP.patch new file mode 100644 index 0000000..f780fb1 --- /dev/null +++ b/SOURCES/valgrind-3.16.1-REX-prefix-JMP.patch @@ -0,0 +1,38 @@ +commit e2dec0ff9b1e071779bee2c4e6fc82f8194b1c1d +Author: Mark Wielaard +Date: Sun Jul 26 21:17:23 2020 +0200 + + Handle REX prefixed JMP instruction. + + The NET Core runtime might generate a JMP with a REX prefix. + For Jv (32bit offset) and Jb (8bit offset) this is valid. + Prefixes that change operand size are ignored for such JMPs. + So remove the check for sz == 4 and force sz = 4 for Jv. + + https://bugs.kde.org/show_bug.cgi?id=422174 + +diff --git a/VEX/priv/guest_amd64_toIR.c b/VEX/priv/guest_amd64_toIR.c +index fadf47d41..7888132eb 100644 +--- a/VEX/priv/guest_amd64_toIR.c ++++ b/VEX/priv/guest_amd64_toIR.c +@@ -21392,8 +21392,8 @@ Long dis_ESC_NONE ( + + case 0xE9: /* Jv (jump, 16/32 offset) */ + if (haveF3(pfx)) goto decode_failure; +- if (sz != 4) +- goto decode_failure; /* JRS added 2004 July 11 */ ++ sz = 4; /* Prefixes that change operand size are ignored for this ++ instruction. Operand size is forced to 32bit. */ + if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */ + d64 = (guest_RIP_bbstart+delta+sz) + getSDisp(sz,delta); + delta += sz; +@@ -21404,8 +21404,7 @@ Long dis_ESC_NONE ( + + case 0xEB: /* Jb (jump, byte offset) */ + if (haveF3(pfx)) goto decode_failure; +- if (sz != 4) +- goto decode_failure; /* JRS added 2004 July 11 */ ++ /* Prefixes that change operand size are ignored for this instruction. */ + if (haveF2(pfx)) DIP("bnd ; "); /* MPX bnd prefix. */ + d64 = (guest_RIP_bbstart+delta+1) + getSDisp8(delta); + delta++; diff --git a/SOURCES/valgrind-3.16.1-dl_runtime_resolve.patch b/SOURCES/valgrind-3.16.1-dl_runtime_resolve.patch new file mode 100644 index 0000000..0a34759 --- /dev/null +++ b/SOURCES/valgrind-3.16.1-dl_runtime_resolve.patch @@ -0,0 +1,206 @@ +commit f4abcc05fdba3f25890a9b30b71d511ccc906d46 +Author: Mark Wielaard +Date: Mon Jul 27 22:43:28 2020 +0200 + + Incorrect call-graph tracking due to new _dl_runtime_resolve_xsave* + + Newer glibc have alternate ld.so _ld_runtime_resolve functions. + Namely _dl_runtime_resolve_xsave and _dl_runtime_resolve_xsave'2 + + This patch recognizes the xsave, xsvec and fxsave variants and + changes callgrind so that any variant counts as _dl_runtime_resolve. + + Original patch by paulo.cesar.pereira.de.andrade@gmail.com + https://bugs.kde.org/show_bug.cgi?id=415293 + +diff --git a/callgrind/fn.c b/callgrind/fn.c +index e9d8dd214..7cce1a0c7 100644 +--- a/callgrind/fn.c ++++ b/callgrind/fn.c +@@ -30,8 +30,11 @@ + + static fn_array current_fn_active; + +-static Addr runtime_resolve_addr = 0; +-static int runtime_resolve_length = 0; ++/* x86_64 defines 4 variants. */ ++#define MAX_RESOLVE_ADDRS 4 ++static int runtime_resolve_addrs = 0; ++static Addr runtime_resolve_addr[MAX_RESOLVE_ADDRS]; ++static int runtime_resolve_length[MAX_RESOLVE_ADDRS]; + + // a code pattern is a list of tuples (start offset, length) + struct chunk_t { int start, len; }; +@@ -56,6 +59,9 @@ static Bool check_code(obj_node* obj, + /* first chunk of pattern should always start at offset 0 and + * have at least 3 bytes */ + CLG_ASSERT((pat->chunk[0].start == 0) && (pat->chunk[0].len >2)); ++ ++ /* and we cannot be called more than MAX_RESOLVE_ADDRS times */ ++ CLG_ASSERT(runtime_resolve_addrs < MAX_RESOLVE_ADDRS); + + CLG_DEBUG(1, "check_code: %s, pattern %s, check %d bytes of [%x %x %x...]\n", + obj->name, pat->name, pat->chunk[0].len, code[0], code[1], code[2]); +@@ -93,8 +99,9 @@ static Bool check_code(obj_node* obj, + pat->name, obj->name + obj->last_slash_pos, + addr - obj->start, addr, pat->len); + +- runtime_resolve_addr = addr; +- runtime_resolve_length = pat->len; ++ runtime_resolve_addr[runtime_resolve_addrs] = addr; ++ runtime_resolve_length[runtime_resolve_addrs] = pat->len; ++ runtime_resolve_addrs++; + return True; + } + } +@@ -138,8 +145,9 @@ static Bool search_runtime_resolve(obj_node* obj) + "x86-glibc2.8", 30, {{ 0,12 }, { 16,14 }, { 30,0}} }; + + if (VG_(strncmp)(obj->name, "/lib/ld", 7) != 0) return False; +- if (check_code(obj, code, &pat)) return True; +- if (check_code(obj, code_28, &pat_28)) return True; ++ Bool pat_p = check_code(obj, code, &pat); ++ Bool pat_28_p = check_code(obj, code_28, &pat_28); ++ if (pat_p || pat_28_p) return True; + return False; + #endif + +@@ -186,9 +194,98 @@ static Bool search_runtime_resolve(obj_node* obj) + static struct pattern pat = { + "amd64-def", 110, {{ 0,62 }, { 66,44 }, { 110,0 }} }; + ++ static UChar code_xsavec[] = { ++ /* 0*/ 0x53, 0x48, 0x89, 0xe3, 0x48, 0x83, 0xe4, 0xc0, ++ /* 8*/ 0x48, 0x2b, 0x25, 0x00, 0x00, 0x00, 0x00, /* sub (%rip),%rsp */ ++ /*15*/ 0x48, ++ /*16*/ 0x89, 0x04, 0x24, 0x48, 0x89, 0x4c, 0x24, 0x08, ++ /*24*/ 0x48, 0x89, 0x54, 0x24, 0x10, 0x48, 0x89, 0x74, ++ /*32*/ 0x24, 0x18, 0x48, 0x89, 0x7c, 0x24, 0x20, 0x4c, ++ /*40*/ 0x89, 0x44, 0x24, 0x28, 0x4c, 0x89, 0x4c, 0x24, ++ /*48*/ 0x30, 0xb8, 0xee, 0x00, 0x00, 0x00, 0x31, 0xd2, ++ /*56*/ 0x48, 0x89, 0x94, 0x24, 0x50, 0x02, 0x00, 0x00, ++ /*64*/ 0x48, 0x89, 0x94, 0x24, 0x58, 0x02, 0x00, 0x00, ++ /*72*/ 0x48, 0x89, 0x94, 0x24, 0x60, 0x02, 0x00, 0x00, ++ /*80*/ 0x48, 0x89, 0x94, 0x24, 0x68, 0x02, 0x00, 0x00, ++ /*88*/ 0x48, 0x89, 0x94, 0x24, 0x70, 0x02, 0x00, 0x00, ++ /*96*/ 0x48, 0x89, 0x94, 0x24, 0x78, 0x02, 0x00, 0x00, ++ /*04*/ 0x0f, 0xc7, 0x64, 0x24, 0x40, 0x48, 0x8b, 0x73, ++ /*112*/0x10, 0x48, 0x8b, 0x7b, 0x08, ++ /*117*/0xe8, 0x00, 0x00, 0x00, 0x00, /* callq <_dl_fixup> */ ++ /*122*/0x49, 0x89, 0xc3, 0xb8, 0xee, 0x00, ++ /*128*/0x00, 0x00, 0x31, 0xd2, 0x0f, 0xae, 0x6c, 0x24, ++ /*136*/0x40, 0x4c, 0x8b, 0x4c, 0x24, 0x30, 0x4c, 0x8b, ++ /*144*/0x44, 0x24, 0x28, 0x48, 0x8b, 0x7c, 0x24, 0x20, ++ /*152*/0x48, 0x8b, 0x74, 0x24, 0x18, 0x48, 0x8b, 0x54, ++ /*160*/0x24, 0x10, 0x48, 0x8b, 0x4c, 0x24, 0x08, 0x48, ++ /*168*/0x8b, 0x04, 0x24, 0x48, 0x89, 0xdc, 0x48, 0x8b, ++ /*176*/0x1c, 0x24, 0x48, 0x83, 0xc4, 0x18, 0xf2, 0x41, ++ /*184*/0xff, 0xe3 }; ++ static struct pattern pat_xsavec = { ++ "amd64-xsavec", 186, {{ 0,11 }, { 15,103 }, {122,64}, { 186,0 }} }; ++ ++ static UChar code_xsave[] = { ++ /* 0*/ 0x53, 0x48, 0x89, 0xe3, 0x48, 0x83, 0xe4, 0xc0, ++ /* 8*/ 0x48, 0x2b, 0x25, 0x00, 0x00, 0x00, 0x00, /* sub (%rip),%rsp */ ++ /*15*/ 0x48, ++ /*16*/ 0x89, 0x04, 0x24, 0x48, 0x89, 0x4c, 0x24, 0x08, ++ /*24*/ 0x48, 0x89, 0x54, 0x24, 0x10, 0x48, 0x89, 0x74, ++ /*32*/ 0x24, 0x18, 0x48, 0x89, 0x7c, 0x24, 0x20, 0x4c, ++ /*40*/ 0x89, 0x44, 0x24, 0x28, 0x4c, 0x89, 0x4c, 0x24, ++ /*48*/ 0x30, 0xb8, 0xee, 0x00, 0x00, 0x00, 0x31, 0xd2, ++ /*56*/ 0x48, 0x89, 0x94, 0x24, 0x40, 0x02, 0x00, 0x00, ++ /*64*/ 0x48, 0x89, 0x94, 0x24, 0x48, 0x02, 0x00, 0x00, ++ /*72*/ 0x48, 0x89, 0x94, 0x24, 0x50, 0x02, 0x00, 0x00, ++ /*80*/ 0x48, 0x89, 0x94, 0x24, 0x58, 0x02, 0x00, 0x00, ++ /*88*/ 0x48, 0x89, 0x94, 0x24, 0x60, 0x02, 0x00, 0x00, ++ /*96*/ 0x48, 0x89, 0x94, 0x24, 0x68, 0x02, 0x00, 0x00, ++ /*104*/0x48, 0x89, 0x94, 0x24, 0x70, 0x02, 0x00, 0x00, ++ /*112*/0x48, 0x89, 0x94, 0x24, 0x78, 0x02, 0x00, 0x00, ++ /*120*/0x0f, 0xae, 0x64, 0x24, 0x40, 0x48, 0x8b, 0x73, ++ /*128*/0x10, 0x48, 0x8b, 0x7b, 0x08, ++ /*133*/0xe8, 0x00, 0x00, 0x00, 0x00, /* callq <_dl_fixup> */ ++ /*138*/0x49, 0x89, 0xc3, 0xb8, 0xee, 0x00, ++ /*144*/0x00, 0x00, 0x31, 0xd2, 0x0f, 0xae, 0x6c, 0x24, ++ /*152*/0x40, 0x4c, 0x8b, 0x4c, 0x24, 0x30, 0x4c, 0x8b, ++ /*160*/0x44, 0x24, 0x28, 0x48, 0x8b, 0x7c, 0x24, 0x20, ++ /*168*/0x48, 0x8b, 0x74, 0x24, 0x18, 0x48, 0x8b, 0x54, ++ /*176*/0x24, 0x10, 0x48, 0x8b, 0x4c, 0x24, 0x08, 0x48, ++ /*184*/0x8b, 0x04, 0x24, 0x48, 0x89, 0xdc, 0x48, 0x8b, ++ /*192*/0x1c, 0x24, 0x48, 0x83, 0xc4, 0x18, 0xf2, 0x41, ++ /*200*/0xff, 0xe3 }; ++ static struct pattern pat_xsave = { ++ "amd64-xsave", 202, {{ 0,11 }, { 15,119 }, {138,64}, { 202,0 }} }; ++ ++ static UChar code_fxsave[] = { ++ /* 0*/ 0x53, 0x48, 0x89, 0xe3, 0x48, 0x83, 0xe4, 0xf0, ++ /* 8*/ 0x48, 0x81, 0xec, 0x40, 0x02, 0x00, 0x00, 0x48, ++ /*16*/ 0x89, 0x04, 0x24, 0x48, 0x89, 0x4c, 0x24, 0x08, ++ /*24*/ 0x48, 0x89, 0x54, 0x24, 0x10, 0x48, 0x89, 0x74, ++ /*32*/ 0x24, 0x18, 0x48, 0x89, 0x7c, 0x24, 0x20, 0x4c, ++ /*40*/ 0x89, 0x44, 0x24, 0x28, 0x4c, 0x89, 0x4c, 0x24, ++ /*48*/ 0x30, 0x0f, 0xae, 0x44, 0x24, 0x40, 0x48, 0x8b, ++ /*56*/ 0x73, 0x10, 0x48, 0x8b, 0x7b, 0x08, ++ /*62*/ 0xe8, 0x00, 0x00, 0x00, 0x00, /* callq <_dl_fixup> */ ++ /*67*/ 0x49, 0x89, 0xc3, 0x0f, 0xae, ++ /*72*/ 0x4c, 0x24, 0x40, 0x4c, 0x8b, 0x4c, 0x24, 0x30, ++ /*80*/ 0x4c, 0x8b, 0x44, 0x24, 0x28, 0x48, 0x8b, 0x7c, ++ /*88*/ 0x24, 0x20, 0x48, 0x8b, 0x74, 0x24, 0x18, 0x48, ++ /*96*/ 0x8b, 0x54, 0x24, 0x10, 0x48, 0x8b, 0x4c, 0x24, ++ /*104*/0x08, 0x48, 0x8b, 0x04, 0x24, 0x48, 0x89, 0xdc, ++ /*112*/0x48, 0x8b, 0x1c, 0x24, 0x48, 0x83, 0xc4, 0x18, ++ /*120*/0xf2, 0x41, 0xff, 0xe3 }; ++ static struct pattern pat_fxsave = { ++ "amd64-fxsave", 124, {{ 0,63 }, { 67,57 }, { 124,0 }} }; ++ + if ((VG_(strncmp)(obj->name, "/lib/ld", 7) != 0) && +- (VG_(strncmp)(obj->name, "/lib64/ld", 9) != 0)) return False; +- return check_code(obj, code, &pat); ++ (VG_(strncmp)(obj->name, "/lib64/ld", 9) != 0) && ++ (VG_(strncmp)(obj->name, "/usr/lib/ld", 11) != 0) && ++ (VG_(strncmp)(obj->name, "/usr/lib64/ld", 13) != 0)) return False; ++ Bool pat_p = check_code(obj, code, &pat); ++ Bool pat_xsavec_p = check_code(obj, code_xsavec, &pat_xsavec); ++ Bool pat_xsave_p = check_code(obj, code_xsave, &pat_xsave); ++ Bool pat_fxsave_p = check_code(obj, code_fxsave, &pat_fxsave); ++ if (pat_p || pat_xsavec_p || pat_xsave_p || pat_fxsave_p) return True; + #endif + + /* For other platforms, no patterns known */ +@@ -254,7 +351,7 @@ obj_node* new_obj_node(DebugInfo* di, obj_node* next) + i++; + } + +- if (runtime_resolve_addr == 0) search_runtime_resolve(obj); ++ if (runtime_resolve_addrs == 0) search_runtime_resolve(obj); + + return obj; + } +@@ -490,6 +587,7 @@ fn_node* CLG_(get_fn_node)(BB* bb) + DebugInfo* di; + UInt line_num; + fn_node* fn; ++ Int i; + + /* fn from debug info is idempotent for a BB */ + if (bb->fn) return bb->fn; +@@ -538,12 +636,14 @@ fn_node* CLG_(get_fn_node)(BB* bb) + } + if (0 == VG_(strcmp)(fnname, "_exit") && !exit_bb) + exit_bb = bb; +- +- if (runtime_resolve_addr && +- (bb_addr(bb) >= runtime_resolve_addr) && +- (bb_addr(bb) < runtime_resolve_addr + runtime_resolve_length)) { +- /* BB in runtime_resolve found by code check; use this name */ +- fnname = "_dl_runtime_resolve"; ++ ++ for (i = 0; i < runtime_resolve_addrs; i++) { ++ if ((bb_addr(bb) >= runtime_resolve_addr[i]) && ++ (bb_addr(bb) < runtime_resolve_addr[i] + runtime_resolve_length[i])) { ++ /* BB in runtime_resolve found by code check; use this name */ ++ fnname = "_dl_runtime_resolve"; ++ break; ++ } + } + + /* get fn_node struct for this function */ diff --git a/SOURCES/valgrind-3.16.1-epoll.patch b/SOURCES/valgrind-3.16.1-epoll.patch new file mode 100644 index 0000000..c6a0411 --- /dev/null +++ b/SOURCES/valgrind-3.16.1-epoll.patch @@ -0,0 +1,117 @@ +commit f326d68d762edf4b0e9604daa446b6f8ca25725a +Author: Mark Wielaard +Date: Sun Jul 26 22:40:22 2020 +0200 + + epoll_ctl warns for uninitialized padding on non-amd64 64bit arches + + struct vki_epoll_event is packed on x86_64, but not on other 64bit + arches. This means that on 64bit arches there can be padding in the + epoll_event struct. Seperately the data field is only used by user + space (which might not set the data field if it doesn't need to). + + Only check the events field on epoll_ctl. But assume both events + and data are both written to by epoll_[p]wait (exclude padding). + + https://bugs.kde.org/show_bug.cgi?id=422623 + +diff --git a/coregrind/m_syswrap/syswrap-linux.c b/coregrind/m_syswrap/syswrap-linux.c +index 5b5b7eee6..929a4d9af 100644 +--- a/coregrind/m_syswrap/syswrap-linux.c ++++ b/coregrind/m_syswrap/syswrap-linux.c +@@ -2099,8 +2099,29 @@ PRE(sys_epoll_ctl) + SARG1, ( ARG2<3 ? epoll_ctl_s[ARG2] : "?" ), SARG3, ARG4); + PRE_REG_READ4(long, "epoll_ctl", + int, epfd, int, op, int, fd, struct vki_epoll_event *, event); +- if (ARG2 != VKI_EPOLL_CTL_DEL) +- PRE_MEM_READ( "epoll_ctl(event)", ARG4, sizeof(struct vki_epoll_event) ); ++ if (ARG2 != VKI_EPOLL_CTL_DEL) { ++ /* Just check the events field, the data field is for user space and ++ unused by the kernel. */ ++ struct vki_epoll_event *event = (struct vki_epoll_event *) ARG4; ++ PRE_MEM_READ( "epoll_ctl(event)", (Addr) &event->events, ++ sizeof(__vki_u32) ); ++ } ++} ++ ++/* RES event records have been written (exclude padding). */ ++static void epoll_post_helper ( ThreadId tid, SyscallArgs* arrghs, ++ SyscallStatus* status ) ++{ ++ vg_assert(SUCCESS); ++ if (RES > 0) { ++ Int i; ++ struct vki_epoll_event **events = (struct vki_epoll_event**)(Addr)ARG2; ++ for (i = 0; i < RES; i++) { ++ /* Assume both events and data are set (data is user space only). */ ++ POST_FIELD_WRITE(events[i]->events); ++ POST_FIELD_WRITE(events[i]->data); ++ } ++ } + } + + PRE(sys_epoll_wait) +@@ -2111,13 +2132,12 @@ PRE(sys_epoll_wait) + PRE_REG_READ4(long, "epoll_wait", + int, epfd, struct vki_epoll_event *, events, + int, maxevents, int, timeout); ++ /* Assume all (maxevents) events records should be (fully) writable. */ + PRE_MEM_WRITE( "epoll_wait(events)", ARG2, sizeof(struct vki_epoll_event)*ARG3); + } + POST(sys_epoll_wait) + { +- vg_assert(SUCCESS); +- if (RES > 0) +- POST_MEM_WRITE( ARG2, sizeof(struct vki_epoll_event)*RES ) ; ++ epoll_post_helper (tid, arrghs, status); + } + + PRE(sys_epoll_pwait) +@@ -2130,15 +2150,14 @@ PRE(sys_epoll_pwait) + int, epfd, struct vki_epoll_event *, events, + int, maxevents, int, timeout, vki_sigset_t *, sigmask, + vki_size_t, sigsetsize); ++ /* Assume all (maxevents) events records should be (fully) writable. */ + PRE_MEM_WRITE( "epoll_pwait(events)", ARG2, sizeof(struct vki_epoll_event)*ARG3); + if (ARG5) + PRE_MEM_READ( "epoll_pwait(sigmask)", ARG5, sizeof(vki_sigset_t) ); + } + POST(sys_epoll_pwait) + { +- vg_assert(SUCCESS); +- if (RES > 0) +- POST_MEM_WRITE( ARG2, sizeof(struct vki_epoll_event)*RES ) ; ++ epoll_post_helper (tid, arrghs, status); + } + + PRE(sys_eventfd) +commit b74f9f23c8758c77367f18368ea95baa858544cb +Author: Mark Wielaard +Date: Tue Aug 18 23:58:55 2020 +0200 + + Fix epoll_ctl setting of array event and data fields. + + Fix for https://bugs.kde.org/show_bug.cgi?id=422623 in commit ecf5ba119 + epoll_ctl warns for uninitialized padding on non-amd64 64bit arches + contained a bug. A pointer to an array is not a pointer to a pointer to + an array. Found by a Fedora user: + https://bugzilla.redhat.com/show_bug.cgi?id=1844778#c10 + +diff --git a/coregrind/m_syswrap/syswrap-linux.c b/coregrind/m_syswrap/syswrap-linux.c +index 0850487e9..3f488795a 100644 +--- a/coregrind/m_syswrap/syswrap-linux.c ++++ b/coregrind/m_syswrap/syswrap-linux.c +@@ -2115,11 +2115,11 @@ static void epoll_post_helper ( ThreadId tid, SyscallArgs* arrghs, + vg_assert(SUCCESS); + if (RES > 0) { + Int i; +- struct vki_epoll_event **events = (struct vki_epoll_event**)(Addr)ARG2; ++ struct vki_epoll_event *events = (struct vki_epoll_event*)(Addr)ARG2; + for (i = 0; i < RES; i++) { + /* Assume both events and data are set (data is user space only). */ +- POST_FIELD_WRITE(events[i]->events); +- POST_FIELD_WRITE(events[i]->data); ++ POST_FIELD_WRITE(events[i].events); ++ POST_FIELD_WRITE(events[i].data); + } + } + } diff --git a/SOURCES/valgrind-3.16.1-s390_emit_load_mem.patch b/SOURCES/valgrind-3.16.1-s390_emit_load_mem.patch new file mode 100644 index 0000000..95da59f --- /dev/null +++ b/SOURCES/valgrind-3.16.1-s390_emit_load_mem.patch @@ -0,0 +1,27 @@ +commit ba73f8d2ebe4b5fe8163ee5ab806f0e50961ebdf +Author: Andreas Arnez +Date: Tue Nov 3 18:17:30 2020 +0100 + + Bug 428648 - s390x: Force 12-bit amode for vector loads in isel + + Similar to Bug 417452, where the instruction selector sometimes attempted + to generate vector stores with a 20-bit displacement, the same problem has + now been reported with vector loads. + + The problem is caused in s390_isel_vec_expr_wrk(), where the addressing + mode is generated with s390_isel_amode() instead of + s390_isel_amode_short(). This is fixed. + +diff --git a/VEX/priv/host_s390_isel.c b/VEX/priv/host_s390_isel.c +index 2f80dd850..134f3eb6f 100644 +--- a/VEX/priv/host_s390_isel.c ++++ b/VEX/priv/host_s390_isel.c +@@ -3741,7 +3741,7 @@ s390_isel_vec_expr_wrk(ISelEnv *env, IRExpr *expr) + /* --------- LOAD --------- */ + case Iex_Load: { + HReg dst = newVRegV(env); +- s390_amode *am = s390_isel_amode(env, expr->Iex.Load.addr); ++ s390_amode *am = s390_isel_amode_short(env, expr->Iex.Load.addr); + + if (expr->Iex.Load.end != Iend_BE) + goto irreducible; diff --git a/SOURCES/valgrind-3.16.1-s390x-z14-vector.patch b/SOURCES/valgrind-3.16.1-s390x-z14-vector.patch new file mode 100644 index 0000000..747c8a8 --- /dev/null +++ b/SOURCES/valgrind-3.16.1-s390x-z14-vector.patch @@ -0,0 +1,2977 @@ +diff --git a/VEX/priv/guest_s390_defs.h b/VEX/priv/guest_s390_defs.h +index 9f93cff19..905429015 100644 +--- a/VEX/priv/guest_s390_defs.h ++++ b/VEX/priv/guest_s390_defs.h +@@ -8,7 +8,7 @@ + This file is part of Valgrind, a dynamic binary instrumentation + framework. + +- Copyright IBM Corp. 2010-2017 ++ Copyright IBM Corp. 2010-2020 + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as +@@ -263,26 +263,27 @@ extern ULong last_execute_target; + before S390_VEC_OP_LAST. */ + typedef enum { + S390_VEC_OP_INVALID = 0, +- S390_VEC_OP_VPKS = 1, +- S390_VEC_OP_VPKLS = 2, +- S390_VEC_OP_VFAE = 3, +- S390_VEC_OP_VFEE = 4, +- S390_VEC_OP_VFENE = 5, +- S390_VEC_OP_VISTR = 6, +- S390_VEC_OP_VSTRC = 7, +- S390_VEC_OP_VCEQ = 8, +- S390_VEC_OP_VTM = 9, +- S390_VEC_OP_VGFM = 10, +- S390_VEC_OP_VGFMA = 11, +- S390_VEC_OP_VMAH = 12, +- S390_VEC_OP_VMALH = 13, +- S390_VEC_OP_VCH = 14, +- S390_VEC_OP_VCHL = 15, +- S390_VEC_OP_VFCE = 16, +- S390_VEC_OP_VFCH = 17, +- S390_VEC_OP_VFCHE = 18, +- S390_VEC_OP_VFTCI = 19, +- S390_VEC_OP_LAST = 20 // supposed to be the last element in enum ++ S390_VEC_OP_VPKS, ++ S390_VEC_OP_VPKLS, ++ S390_VEC_OP_VFAE, ++ S390_VEC_OP_VFEE, ++ S390_VEC_OP_VFENE, ++ S390_VEC_OP_VISTR, ++ S390_VEC_OP_VSTRC, ++ S390_VEC_OP_VCEQ, ++ S390_VEC_OP_VTM, ++ S390_VEC_OP_VGFM, ++ S390_VEC_OP_VGFMA, ++ S390_VEC_OP_VMAH, ++ S390_VEC_OP_VMALH, ++ S390_VEC_OP_VCH, ++ S390_VEC_OP_VCHL, ++ S390_VEC_OP_VFTCI, ++ S390_VEC_OP_VFMIN, ++ S390_VEC_OP_VFMAX, ++ S390_VEC_OP_VBPERM, ++ S390_VEC_OP_VMSL, ++ S390_VEC_OP_LAST // supposed to be the last element in enum + } s390x_vec_op_t; + + /* Arguments of s390x_dirtyhelper_vec_op(...) which are packed into one +diff --git a/VEX/priv/guest_s390_helpers.c b/VEX/priv/guest_s390_helpers.c +index a470d9f8d..b71b621ae 100644 +--- a/VEX/priv/guest_s390_helpers.c ++++ b/VEX/priv/guest_s390_helpers.c +@@ -8,7 +8,7 @@ + This file is part of Valgrind, a dynamic binary instrumentation + framework. + +- Copyright IBM Corp. 2010-2017 ++ Copyright IBM Corp. 2010-2020 + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as +@@ -314,20 +314,11 @@ ULong s390x_dirtyhelper_STCKE(ULong *addr) {return 3;} + /*--- Dirty helper for Store Facility instruction ---*/ + /*------------------------------------------------------------*/ + #if defined(VGA_s390x) +-static void +-s390_set_facility_bit(ULong *addr, UInt bitno, UInt value) +-{ +- addr += bitno / 64; +- bitno = bitno % 64; +- +- ULong mask = 1; +- mask <<= (63 - bitno); + +- if (value == 1) { +- *addr |= mask; // set +- } else { +- *addr &= ~mask; // clear +- } ++static ULong ++s390_stfle_range(UInt lo, UInt hi) ++{ ++ return ((1UL << (hi + 1 - lo)) - 1) << (63 - (hi % 64)); + } + + ULong +@@ -336,6 +327,77 @@ s390x_dirtyhelper_STFLE(VexGuestS390XState *guest_state, ULong *addr) + ULong hoststfle[S390_NUM_FACILITY_DW], cc, num_dw, i; + register ULong reg0 asm("0") = guest_state->guest_r0 & 0xF; /* r0[56:63] */ + ++ /* Restrict to facilities that we know about and that we assume to be ++ compatible with Valgrind. Of course, in this way we may reject features ++ that Valgrind is not really involved in (and thus would be compatible ++ with), but quering for such features doesn't seem like a typical use ++ case. */ ++ ULong accepted_facility[S390_NUM_FACILITY_DW] = { ++ /* === 0 .. 63 === */ ++ (s390_stfle_range(0, 16) ++ /* 17: message-security-assist, not supported */ ++ | s390_stfle_range(18, 19) ++ /* 20: HFP-multiply-and-add/subtract, not supported */ ++ | s390_stfle_range(21, 22) ++ /* 23: HFP-unnormalized-extension, not supported */ ++ | s390_stfle_range(24, 25) ++ /* 26: parsing-enhancement, not supported */ ++ | s390_stfle_range(27, 28) ++ /* 29: unassigned */ ++ | s390_stfle_range(30, 30) ++ /* 31: extract-CPU-time, not supported */ ++ | s390_stfle_range(32, 41) ++ /* 42-43: DFP, not fully supported */ ++ /* 44: PFPO, not fully supported */ ++ | s390_stfle_range(45, 47) ++ /* 48: DFP zoned-conversion, not supported */ ++ /* 49: includes PPA, not supported */ ++ /* 50: constrained transactional-execution, not supported */ ++ | s390_stfle_range(51, 55) ++ /* 56: unassigned */ ++ /* 57: MSA5, not supported */ ++ | s390_stfle_range(58, 60) ++ /* 61: miscellaneous-instruction 3, not supported */ ++ | s390_stfle_range(62, 63)), ++ ++ /* === 64 .. 127 === */ ++ (s390_stfle_range(64, 72) ++ /* 73: transactional-execution, not supported */ ++ | s390_stfle_range(74, 75) ++ /* 76: MSA3, not supported */ ++ /* 77: MSA4, not supported */ ++ | s390_stfle_range(78, 78) ++ /* 80: DFP packed-conversion, not supported */ ++ /* 81: PPA-in-order, not supported */ ++ | s390_stfle_range(82, 82) ++ /* 83-127: unassigned */ ), ++ ++ /* === 128 .. 191 === */ ++ (s390_stfle_range(128, 131) ++ /* 132: unassigned */ ++ /* 133: guarded-storage, not supported */ ++ /* 134: vector packed decimal, not supported */ ++ | s390_stfle_range(135, 135) ++ /* 136: unassigned */ ++ /* 137: unassigned */ ++ | s390_stfle_range(138, 142) ++ /* 143: unassigned */ ++ | s390_stfle_range(144, 145) ++ /* 146: MSA8, not supported */ ++ | s390_stfle_range(147, 147) ++ /* 148: vector-enhancements 2, not supported */ ++ | s390_stfle_range(149, 149) ++ /* 150: unassigned */ ++ /* 151: DEFLATE-conversion, not supported */ ++ /* 153: unassigned */ ++ /* 154: unassigned */ ++ /* 155: MSA9, not supported */ ++ | s390_stfle_range(156, 156) ++ /* 157-167: unassigned */ ++ | s390_stfle_range(168, 168) ++ /* 168-191: unassigned */ ), ++ }; ++ + /* We cannot store more than S390_NUM_FACILITY_DW + (and it makes not much sense to do so anyhow) */ + if (reg0 > S390_NUM_FACILITY_DW - 1) +@@ -351,35 +413,9 @@ s390x_dirtyhelper_STFLE(VexGuestS390XState *guest_state, ULong *addr) + /* Update guest register 0 with what STFLE set r0 to */ + guest_state->guest_r0 = reg0; + +- /* Set default: VM facilities = host facilities */ ++ /* VM facilities = host facilities, filtered by acceptance */ + for (i = 0; i < num_dw; ++i) +- addr[i] = hoststfle[i]; +- +- /* Now adjust the VM facilities according to what the VM supports */ +- s390_set_facility_bit(addr, S390_FAC_LDISP, 1); +- s390_set_facility_bit(addr, S390_FAC_EIMM, 1); +- s390_set_facility_bit(addr, S390_FAC_ETF2, 1); +- s390_set_facility_bit(addr, S390_FAC_ETF3, 1); +- s390_set_facility_bit(addr, S390_FAC_GIE, 1); +- s390_set_facility_bit(addr, S390_FAC_EXEXT, 1); +- s390_set_facility_bit(addr, S390_FAC_HIGHW, 1); +- s390_set_facility_bit(addr, S390_FAC_LSC2, 1); +- +- s390_set_facility_bit(addr, S390_FAC_HFPMAS, 0); +- s390_set_facility_bit(addr, S390_FAC_HFPUNX, 0); +- s390_set_facility_bit(addr, S390_FAC_XCPUT, 0); +- s390_set_facility_bit(addr, S390_FAC_MSA, 0); +- s390_set_facility_bit(addr, S390_FAC_PENH, 0); +- s390_set_facility_bit(addr, S390_FAC_DFP, 0); +- s390_set_facility_bit(addr, S390_FAC_PFPO, 0); +- s390_set_facility_bit(addr, S390_FAC_DFPZC, 0); +- s390_set_facility_bit(addr, S390_FAC_MISC, 0); +- s390_set_facility_bit(addr, S390_FAC_CTREXE, 0); +- s390_set_facility_bit(addr, S390_FAC_TREXE, 0); +- s390_set_facility_bit(addr, S390_FAC_MSA4, 0); +- s390_set_facility_bit(addr, S390_FAC_VXE, 0); +- s390_set_facility_bit(addr, S390_FAC_VXE2, 0); +- s390_set_facility_bit(addr, S390_FAC_DFLT, 0); ++ addr[i] = hoststfle[i] & accepted_facility[i]; + + return cc; + } +@@ -2500,25 +2536,26 @@ s390x_dirtyhelper_vec_op(VexGuestS390XState *guest_state, + vassert(d->op > S390_VEC_OP_INVALID && d->op < S390_VEC_OP_LAST); + static const UChar opcodes[][2] = { + {0x00, 0x00}, /* invalid */ +- {0xe7, 0x97}, /* VPKS */ +- {0xe7, 0x95}, /* VPKLS */ +- {0xe7, 0x82}, /* VFAE */ +- {0xe7, 0x80}, /* VFEE */ +- {0xe7, 0x81}, /* VFENE */ +- {0xe7, 0x5c}, /* VISTR */ +- {0xe7, 0x8a}, /* VSTRC */ +- {0xe7, 0xf8}, /* VCEQ */ +- {0xe7, 0xd8}, /* VTM */ +- {0xe7, 0xb4}, /* VGFM */ +- {0xe7, 0xbc}, /* VGFMA */ +- {0xe7, 0xab}, /* VMAH */ +- {0xe7, 0xa9}, /* VMALH */ +- {0xe7, 0xfb}, /* VCH */ +- {0xe7, 0xf9}, /* VCHL */ +- {0xe7, 0xe8}, /* VFCE */ +- {0xe7, 0xeb}, /* VFCH */ +- {0xe7, 0xea}, /* VFCHE */ +- {0xe7, 0x4a} /* VFTCI */ ++ [S390_VEC_OP_VPKS] = {0xe7, 0x97}, ++ [S390_VEC_OP_VPKLS] = {0xe7, 0x95}, ++ [S390_VEC_OP_VFAE] = {0xe7, 0x82}, ++ [S390_VEC_OP_VFEE] = {0xe7, 0x80}, ++ [S390_VEC_OP_VFENE] = {0xe7, 0x81}, ++ [S390_VEC_OP_VISTR] = {0xe7, 0x5c}, ++ [S390_VEC_OP_VSTRC] = {0xe7, 0x8a}, ++ [S390_VEC_OP_VCEQ] = {0xe7, 0xf8}, ++ [S390_VEC_OP_VTM] = {0xe7, 0xd8}, ++ [S390_VEC_OP_VGFM] = {0xe7, 0xb4}, ++ [S390_VEC_OP_VGFMA] = {0xe7, 0xbc}, ++ [S390_VEC_OP_VMAH] = {0xe7, 0xab}, ++ [S390_VEC_OP_VMALH] = {0xe7, 0xa9}, ++ [S390_VEC_OP_VCH] = {0xe7, 0xfb}, ++ [S390_VEC_OP_VCHL] = {0xe7, 0xf9}, ++ [S390_VEC_OP_VFTCI] = {0xe7, 0x4a}, ++ [S390_VEC_OP_VFMIN] = {0xe7, 0xee}, ++ [S390_VEC_OP_VFMAX] = {0xe7, 0xef}, ++ [S390_VEC_OP_VBPERM]= {0xe7, 0x85}, ++ [S390_VEC_OP_VMSL] = {0xe7, 0xb8}, + }; + + union { +@@ -2612,6 +2649,7 @@ s390x_dirtyhelper_vec_op(VexGuestS390XState *guest_state, + case S390_VEC_OP_VGFMA: + case S390_VEC_OP_VMAH: + case S390_VEC_OP_VMALH: ++ case S390_VEC_OP_VMSL: + the_insn.VRRd.v1 = 1; + the_insn.VRRd.v2 = 2; + the_insn.VRRd.v3 = 3; +@@ -2621,9 +2659,9 @@ s390x_dirtyhelper_vec_op(VexGuestS390XState *guest_state, + the_insn.VRRd.m6 = d->m5; + break; + +- case S390_VEC_OP_VFCE: +- case S390_VEC_OP_VFCH: +- case S390_VEC_OP_VFCHE: ++ case S390_VEC_OP_VFMIN: ++ case S390_VEC_OP_VFMAX: ++ case S390_VEC_OP_VBPERM: + the_insn.VRRc.v1 = 1; + the_insn.VRRc.v2 = 2; + the_insn.VRRc.v3 = 3; +diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c +index c27a8d3fe..5f2c5ce98 100644 +--- a/VEX/priv/guest_s390_toIR.c ++++ b/VEX/priv/guest_s390_toIR.c +@@ -8,7 +8,7 @@ + This file is part of Valgrind, a dynamic binary instrumentation + framework. + +- Copyright IBM Corp. 2010-2017 ++ Copyright IBM Corp. 2010-2020 + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as +@@ -248,6 +248,13 @@ typedef enum { + #define VRS_d2(insn) (((insn) >> 32) & 0xfff) + #define VRS_m4(insn) (((insn) >> 28) & 0xf) + #define VRS_rxb(insn) (((insn) >> 24) & 0xf) ++#define VRSd_v1(insn) (((insn) >> 28) & 0xf) ++#define VRSd_r3(insn) (((insn) >> 48) & 0xf) ++#define VSI_i3(insn) (((insn) >> 48) & 0xff) ++#define VSI_b2(insn) (((insn) >> 44) & 0xf) ++#define VSI_d2(insn) (((insn) >> 32) & 0xfff) ++#define VSI_v1(insn) (((insn) >> 28) & 0xf) ++#define VSI_rxb(insn) (((insn) >> 24) & 0xf) + + + /*------------------------------------------------------------*/ +@@ -1934,6 +1941,26 @@ s390_vr_get_type(const UChar m) + return results[m]; + } + ++/* Determine IRType from instruction's floating-point format field */ ++static IRType ++s390_vr_get_ftype(const UChar m) ++{ ++ static const IRType results[] = {Ity_F32, Ity_F64, Ity_F128}; ++ if (m >= 2 && m <= 4) ++ return results[m - 2]; ++ return Ity_INVALID; ++} ++ ++/* Determine number of elements from instruction's floating-point format ++ field */ ++static UChar ++s390_vr_get_n_elem(const UChar m) ++{ ++ if (m >= 2 && m <= 4) ++ return 1 << (4 - m); ++ return 0; ++} ++ + /* Determine if Condition Code Set (CS) flag is set in m field */ + #define s390_vr_is_cs_set(m) (((m) & 0x1) != 0) + +@@ -2188,12 +2215,15 @@ s390_vr_offset_by_index(UInt archreg,IRType type, UChar index) + goto invalidIndex; + } + return vr_offset(archreg) + sizeof(ULong) * index; ++ + case Ity_V128: ++ case Ity_F128: + if(index == 0) { + return vr_qw_offset(archreg); + } else { + goto invalidIndex; + } ++ + default: + vpanic("s390_vr_offset_by_index: unknown type"); + } +@@ -2211,7 +2241,14 @@ put_vr(UInt archreg, IRType type, UChar index, IRExpr *expr) + UInt offset = s390_vr_offset_by_index(archreg, type, index); + vassert(typeOfIRExpr(irsb->tyenv, expr) == type); + +- stmt(IRStmt_Put(offset, expr)); ++ if (type == Ity_F128) { ++ IRTemp val = newTemp(Ity_F128); ++ assign(val, expr); ++ stmt(IRStmt_Put(offset, unop(Iop_F128HItoF64, mkexpr(val)))); ++ stmt(IRStmt_Put(offset + 8, unop(Iop_F128LOtoF64, mkexpr(val)))); ++ } else { ++ stmt(IRStmt_Put(offset, expr)); ++ } + } + + /* Read type sized part specified by index of a vr register. */ +@@ -2219,6 +2256,11 @@ static IRExpr * + get_vr(UInt archreg, IRType type, UChar index) + { + UInt offset = s390_vr_offset_by_index(archreg, type, index); ++ if (type == Ity_F128) { ++ return binop(Iop_F64HLtoF128, ++ IRExpr_Get(offset, Ity_F64), ++ IRExpr_Get(offset + 8, Ity_F64)); ++ } + return IRExpr_Get(offset, type); + } + +@@ -2294,11 +2336,11 @@ s390_getCountToBlockBoundary(IRTemp op2addr, UChar m) + return mkexpr(output); + } + +-/* Load bytes into v1. +- maxIndex specifies max index to load and must be Ity_I32. +- If maxIndex >= 15, all 16 bytes are loaded. +- All bytes after maxIndex are zeroed. */ +-static void s390_vr_loadWithLength(UChar v1, IRTemp addr, IRExpr *maxIndex) ++/* Starting from addr, load at most maxIndex + 1 bytes into v1. Fill the ++ leftmost or rightmost bytes of v1, depending on whether `rightmost' is set. ++ If maxIndex >= 15, load all 16 bytes; otherwise clear the remaining bytes. */ ++static void ++s390_vr_loadWithLength(UChar v1, IRTemp addr, IRExpr *maxIndex, Bool rightmost) + { + IRTemp maxIdx = newTemp(Ity_I32); + IRTemp cappedMax = newTemp(Ity_I64); +@@ -2311,8 +2353,8 @@ static void s390_vr_loadWithLength(UChar v1, IRTemp addr, IRExpr *maxIndex) + crossed if and only if the real insn would have crossed it as well. + Thus, if the bytes to load are fully contained in an aligned 16-byte + chunk, load the whole 16-byte aligned chunk, and otherwise load 16 bytes +- from the unaligned address. Then shift the loaded data left-aligned +- into the target vector register. */ ++ from the unaligned address. Then shift the loaded data left- or ++ right-aligned into the target vector register. */ + + assign(maxIdx, maxIndex); + assign(cappedMax, mkite(binop(Iop_CmpLT32U, mkexpr(maxIdx), mkU32(15)), +@@ -2325,20 +2367,60 @@ static void s390_vr_loadWithLength(UChar v1, IRTemp addr, IRExpr *maxIndex) + assign(back, mkite(binop(Iop_CmpLE64U, mkexpr(offset), mkexpr(zeroed)), + mkexpr(offset), mkU64(0))); + +- /* How much to shift the loaded 16-byte vector to the right, and then to +- the left. Since both 'zeroed' and 'back' range from 0 to 15, the shift +- amounts range from 0 to 120. */ +- IRExpr *shrAmount = binop(Iop_Shl64, +- binop(Iop_Sub64, mkexpr(zeroed), mkexpr(back)), +- mkU8(3)); +- IRExpr *shlAmount = binop(Iop_Shl64, mkexpr(zeroed), mkU8(3)); ++ IRExpr* chunk = load(Ity_V128, binop(Iop_Sub64, mkexpr(addr), mkexpr(back))); ++ ++ /* Shift the loaded 16-byte vector to the right, then to the left, or vice ++ versa, where each shift amount ranges from 0 to 120. */ ++ IRExpr* shift1; ++ IRExpr* shift2 = unop(Iop_64to8, binop(Iop_Shl64, mkexpr(zeroed), mkU8(3))); ++ ++ if (rightmost) { ++ shift1 = unop(Iop_64to8, binop(Iop_Shl64, mkexpr(back), mkU8(3))); ++ put_vr_qw(v1, binop(Iop_ShrV128, ++ binop(Iop_ShlV128, chunk, shift1), ++ shift2)); ++ } else { ++ shift1 = unop(Iop_64to8, ++ binop(Iop_Shl64, ++ binop(Iop_Sub64, mkexpr(zeroed), mkexpr(back)), ++ mkU8(3))); ++ put_vr_qw(v1, binop(Iop_ShlV128, ++ binop(Iop_ShrV128, chunk, shift1), ++ shift2)); ++ } ++} ++ ++/* Store at most maxIndex + 1 bytes from v1 to addr. Store the leftmost or ++ rightmost bytes of v1, depending on whether `rightmost' is set. If maxIndex ++ >= 15, store all 16 bytes. */ ++static void ++s390_vr_storeWithLength(UChar v1, IRTemp addr, IRExpr *maxIndex, Bool rightmost) ++{ ++ IRTemp maxIdx = newTemp(Ity_I32); ++ IRTemp cappedMax = newTemp(Ity_I64); ++ IRTemp counter = newTemp(Ity_I64); ++ IRExpr* offset; ++ ++ assign(maxIdx, maxIndex); ++ assign(cappedMax, mkite(binop(Iop_CmpLT32U, mkexpr(maxIdx), mkU32(15)), ++ unop(Iop_32Uto64, mkexpr(maxIdx)), mkU64(15))); ++ ++ assign(counter, get_counter_dw0()); ++ ++ if (rightmost) ++ offset = binop(Iop_Add64, ++ binop(Iop_Sub64, mkU64(15), mkexpr(cappedMax)), ++ mkexpr(counter)); ++ else ++ offset = mkexpr(counter); ++ ++ store(binop(Iop_Add64, mkexpr(addr), mkexpr(counter)), ++ binop(Iop_GetElem8x16, get_vr_qw(v1), unop(Iop_64to8, offset))); + +- put_vr_qw(v1, binop(Iop_ShlV128, +- binop(Iop_ShrV128, +- load(Ity_V128, +- binop(Iop_Sub64, mkexpr(addr), mkexpr(back))), +- unop(Iop_64to8, shrAmount)), +- unop(Iop_64to8, shlAmount))); ++ /* Check for end of field */ ++ put_counter_dw0(binop(Iop_Add64, mkexpr(counter), mkU64(1))); ++ iterate_if(binop(Iop_CmpNE64, mkexpr(counter), mkexpr(cappedMax))); ++ put_counter_dw0(mkU64(0)); + } + + /* Bitwise vCond ? v1 : v2 +@@ -3749,6 +3831,28 @@ s390_format_VRS_RRDVM(const HChar *(*irgen)(UChar r1, IRTemp op2addr, UChar v3, + s390_disasm(ENC5(MNM, GPR, UDXB, VR, UINT), mnm, r1, d2, 0, b2, v3, m4); + } + ++static void ++s390_format_VRS_RRDV(const HChar *(*irgen)(UChar v1, UChar r3, IRTemp op2addr), ++ UChar v1, UChar r3, UChar b2, UShort d2, UChar rxb) ++{ ++ const HChar *mnm; ++ IRTemp op2addr = newTemp(Ity_I64); ++ ++ if (! s390_host_has_vx) { ++ emulation_failure(EmFail_S390X_vx); ++ return; ++ } ++ ++ assign(op2addr, binop(Iop_Add64, mkU64(d2), b2 != 0 ? get_gpr_dw0(b2) : ++ mkU64(0))); ++ ++ v1 = s390_vr_getVRindex(v1, 4, rxb); ++ mnm = irgen(v1, r3, op2addr); ++ ++ if (UNLIKELY(vex_traceflags & VEX_TRACE_FE)) ++ s390_disasm(ENC4(MNM, VR, GPR, UDXB), mnm, v1, r3, d2, 0, b2); ++} ++ + + static void + s390_format_VRS_VRDVM(const HChar *(*irgen)(UChar v1, IRTemp op2addr, UChar v3, +@@ -4081,6 +4185,29 @@ s390_format_VRRa_VVVMMM(const HChar *(*irgen)(UChar v1, UChar v2, UChar v3, + mnm, v1, v2, v3, m4, m5, m6); + } + ++static void ++s390_format_VSI_URDV(const HChar *(*irgen)(UChar v1, IRTemp op2addr, UChar i3), ++ UChar v1, UChar b2, UChar d2, UChar i3, UChar rxb) ++{ ++ const HChar *mnm; ++ IRTemp op2addr = newTemp(Ity_I64); ++ ++ if (!s390_host_has_vx) { ++ emulation_failure(EmFail_S390X_vx); ++ return; ++ } ++ ++ v1 = s390_vr_getVRindex(v1, 4, rxb); ++ ++ assign(op2addr, binop(Iop_Add64, mkU64(d2), b2 != 0 ? get_gpr_dw0(b2) : ++ mkU64(0))); ++ ++ mnm = irgen(v1, op2addr, i3); ++ ++ if (vex_traceflags & VEX_TRACE_FE) ++ s390_disasm(ENC4(MNM, VR, UDXB, UINT), mnm, v1, d2, 0, b2, i3); ++} ++ + /*------------------------------------------------------------*/ + /*--- Build IR for opcodes ---*/ + /*------------------------------------------------------------*/ +@@ -16186,7 +16313,9 @@ s390_irgen_VGM(UChar v1, UShort i2, UChar m3) + static const HChar * + s390_irgen_VLLEZ(UChar v1, IRTemp op2addr, UChar m3) + { +- IRType type = s390_vr_get_type(m3); ++ s390_insn_assert("vllez", m3 <= 3 || m3 == 6); ++ ++ IRType type = s390_vr_get_type(m3 & 3); + IRExpr* op2 = load(type, mkexpr(op2addr)); + IRExpr* op2as64bit; + switch (type) { +@@ -16206,7 +16335,13 @@ s390_irgen_VLLEZ(UChar v1, IRTemp op2addr, UChar m3) + vpanic("s390_irgen_VLLEZ: unknown type"); + } + +- put_vr_dw0(v1, op2as64bit); ++ if (m3 == 6) { ++ /* left-aligned */ ++ put_vr_dw0(v1, binop(Iop_Shl64, op2as64bit, mkU8(32))); ++ } else { ++ /* right-aligned */ ++ put_vr_dw0(v1, op2as64bit); ++ } + put_vr_dw1(v1, mkU64(0)); + return "vllez"; + } +@@ -16615,7 +16750,7 @@ s390_irgen_VLBB(UChar v1, IRTemp addr, UChar m3) + s390_getCountToBlockBoundary(addr, m3), + mkU32(1)); + +- s390_vr_loadWithLength(v1, addr, maxIndex); ++ s390_vr_loadWithLength(v1, addr, maxIndex, False); + + return "vlbb"; + } +@@ -16623,41 +16758,50 @@ s390_irgen_VLBB(UChar v1, IRTemp addr, UChar m3) + static const HChar * + s390_irgen_VLL(UChar v1, IRTemp addr, UChar r3) + { +- s390_vr_loadWithLength(v1, addr, get_gpr_w1(r3)); ++ s390_vr_loadWithLength(v1, addr, get_gpr_w1(r3), False); + + return "vll"; + } + + static const HChar * +-s390_irgen_VSTL(UChar v1, IRTemp addr, UChar r3) ++s390_irgen_VLRL(UChar v1, IRTemp addr, UChar i3) + { +- IRTemp counter = newTemp(Ity_I64); +- IRTemp maxIndexToStore = newTemp(Ity_I64); +- IRTemp gpr3 = newTemp(Ity_I64); +- +- assign(gpr3, unop(Iop_32Uto64, get_gpr_w1(r3))); +- assign(maxIndexToStore, mkite(binop(Iop_CmpLE64U, +- mkexpr(gpr3), +- mkU64(16) +- ), +- mkexpr(gpr3), +- mkU64(16) +- ) +- ); ++ s390_insn_assert("vlrl", (i3 & 0xf0) == 0); ++ s390_vr_loadWithLength(v1, addr, mkU32((UInt) i3), True); + +- assign(counter, get_counter_dw0()); ++ return "vlrl"; ++} + +- store(binop(Iop_Add64, mkexpr(addr), mkexpr(counter)), +- binop(Iop_GetElem8x16, get_vr_qw(v1), unop(Iop_64to8, mkexpr(counter)))); ++static const HChar * ++s390_irgen_VLRLR(UChar v1, UChar r3, IRTemp addr) ++{ ++ s390_vr_loadWithLength(v1, addr, get_gpr_w1(r3), True); + +- /* Check for end of field */ +- put_counter_dw0(binop(Iop_Add64, mkexpr(counter), mkU64(1))); +- iterate_if(binop(Iop_CmpNE64, mkexpr(counter), mkexpr(maxIndexToStore))); +- put_counter_dw0(mkU64(0)); ++ return "vlrlr"; ++} + ++static const HChar * ++s390_irgen_VSTL(UChar v1, IRTemp addr, UChar r3) ++{ ++ s390_vr_storeWithLength(v1, addr, get_gpr_w1(r3), False); + return "vstl"; + } + ++static const HChar * ++s390_irgen_VSTRL(UChar v1, IRTemp addr, UChar i3) ++{ ++ s390_insn_assert("vstrl", (i3 & 0xf0) == 0); ++ s390_vr_storeWithLength(v1, addr, mkU32((UInt) i3), True); ++ return "vstrl"; ++} ++ ++static const HChar * ++s390_irgen_VSTRLR(UChar v1, UChar r3, IRTemp addr) ++{ ++ s390_vr_storeWithLength(v1, addr, get_gpr_w1(r3), True); ++ return "vstrlr"; ++} ++ + static const HChar * + s390_irgen_VX(UChar v1, UChar v2, UChar v3) + { +@@ -16682,6 +16826,24 @@ s390_irgen_VO(UChar v1, UChar v2, UChar v3) + return "vo"; + } + ++static const HChar * ++s390_irgen_VOC(UChar v1, UChar v2, UChar v3) ++{ ++ put_vr_qw(v1, binop(Iop_OrV128, get_vr_qw(v2), ++ unop(Iop_NotV128, get_vr_qw(v3)))); ++ ++ return "voc"; ++} ++ ++static const HChar * ++s390_irgen_VNN(UChar v1, UChar v2, UChar v3) ++{ ++ put_vr_qw(v1, unop(Iop_NotV128, ++ binop(Iop_AndV128, get_vr_qw(v2), get_vr_qw(v3)))); ++ ++ return "vnn"; ++} ++ + static const HChar * + s390_irgen_VNO(UChar v1, UChar v2, UChar v3) + { +@@ -16691,6 +16853,15 @@ s390_irgen_VNO(UChar v1, UChar v2, UChar v3) + return "vno"; + } + ++static const HChar * ++s390_irgen_VNX(UChar v1, UChar v2, UChar v3) ++{ ++ put_vr_qw(v1, unop(Iop_NotV128, ++ binop(Iop_XorV128, get_vr_qw(v2), get_vr_qw(v3)))); ++ ++ return "vnx"; ++} ++ + static const HChar * + s390_irgen_LZRF(UChar r1, IRTemp op2addr) + { +@@ -17499,9 +17670,19 @@ s390_irgen_VCTZ(UChar v1, UChar v2, UChar m3) + static const HChar * + s390_irgen_VPOPCT(UChar v1, UChar v2, UChar m3) + { +- vassert(m3 == 0); ++ s390_insn_assert("vpopct", m3 <= 3); ++ ++ IRExpr* cnt = unop(Iop_Cnt8x16, get_vr_qw(v2)); + +- put_vr_qw(v1, unop(Iop_Cnt8x16, get_vr_qw(v2))); ++ if (m3 >= 1) { ++ cnt = unop(Iop_PwAddL8Ux16, cnt); ++ if (m3 >= 2) { ++ cnt = unop(Iop_PwAddL16Ux8, cnt); ++ if (m3 == 3) ++ cnt = unop(Iop_PwAddL32Ux4, cnt); ++ } ++ } ++ put_vr_qw(v1, cnt); + + return "vpopct"; + } +@@ -18335,12 +18516,53 @@ s390_irgen_VMALH(UChar v1, UChar v2, UChar v3, UChar v4, UChar m5) + return "vmalh"; + } + ++static const HChar * ++s390_irgen_VMSL(UChar v1, UChar v2, UChar v3, UChar v4, UChar m5, UChar m6) ++{ ++ s390_insn_assert("vmsl", m5 == 3 && (m6 & 3) == 0); ++ ++ IRDirty* d; ++ IRTemp cc = newTemp(Ity_I64); ++ ++ s390x_vec_op_details_t details = { .serialized = 0ULL }; ++ details.op = S390_VEC_OP_VMSL; ++ details.v1 = v1; ++ details.v2 = v2; ++ details.v3 = v3; ++ details.v4 = v4; ++ details.m4 = m5; ++ details.m5 = m6; ++ ++ d = unsafeIRDirty_1_N(cc, 0, "s390x_dirtyhelper_vec_op", ++ &s390x_dirtyhelper_vec_op, ++ mkIRExprVec_2(IRExpr_GSPTR(), ++ mkU64(details.serialized))); ++ ++ d->nFxState = 4; ++ vex_bzero(&d->fxState, sizeof(d->fxState)); ++ d->fxState[0].fx = Ifx_Read; ++ d->fxState[0].offset = S390X_GUEST_OFFSET(guest_v0) + v2 * sizeof(V128); ++ d->fxState[0].size = sizeof(V128); ++ d->fxState[1].fx = Ifx_Read; ++ d->fxState[1].offset = S390X_GUEST_OFFSET(guest_v0) + v3 * sizeof(V128); ++ d->fxState[1].size = sizeof(V128); ++ d->fxState[2].fx = Ifx_Read; ++ d->fxState[2].offset = S390X_GUEST_OFFSET(guest_v0) + v4 * sizeof(V128); ++ d->fxState[2].size = sizeof(V128); ++ d->fxState[3].fx = Ifx_Write; ++ d->fxState[3].offset = S390X_GUEST_OFFSET(guest_v0) + v1 * sizeof(V128); ++ d->fxState[3].size = sizeof(V128); ++ ++ stmt(IRStmt_Dirty(d)); ++ ++ return "vmsl"; ++} ++ + static void +-s390_vector_fp_convert(IROp op, IRType fromType, IRType toType, ++s390_vector_fp_convert(IROp op, IRType fromType, IRType toType, Bool rounding, + UChar v1, UChar v2, UChar m3, UChar m4, UChar m5) + { + Bool isSingleElementOp = s390_vr_is_single_element_control_set(m4); +- UChar maxIndex = isSingleElementOp ? 0 : 1; + + /* For Iop_F32toF64 we do this: + f32[0] -> f64[0] +@@ -18353,14 +18575,21 @@ s390_vector_fp_convert(IROp op, IRType fromType, IRType toType, + The magic below with scaling factors is used to achieve the logic + described above. + */ +- const UChar sourceIndexScaleFactor = (op == Iop_F32toF64) ? 2 : 1; +- const UChar destinationIndexScaleFactor = (op == Iop_F64toF32) ? 2 : 1; +- +- const Bool isUnary = (op == Iop_F32toF64); +- for (UChar i = 0; i <= maxIndex; i++) { ++ Int size_diff = sizeofIRType(toType) - sizeofIRType(fromType); ++ const UChar sourceIndexScaleFactor = size_diff > 0 ? 2 : 1; ++ const UChar destinationIndexScaleFactor = size_diff < 0 ? 2 : 1; ++ UChar n_elem = (isSingleElementOp ? 1 : ++ 16 / (size_diff > 0 ? ++ sizeofIRType(toType) : sizeofIRType(fromType))); ++ ++ for (UChar i = 0; i < n_elem; i++) { + IRExpr* argument = get_vr(v2, fromType, i * sourceIndexScaleFactor); + IRExpr* result; +- if (!isUnary) { ++ if (rounding) { ++ if (!s390_host_has_fpext && m5 != S390_BFP_ROUND_PER_FPC) { ++ emulation_warning(EmWarn_S390X_fpext_rounding); ++ m5 = S390_BFP_ROUND_PER_FPC; ++ } + result = binop(op, + mkexpr(encode_bfp_rounding_mode(m5)), + argument); +@@ -18369,10 +18598,6 @@ s390_vector_fp_convert(IROp op, IRType fromType, IRType toType, + } + put_vr(v1, toType, i * destinationIndexScaleFactor, result); + } +- +- if (isSingleElementOp) { +- put_vr_dw1(v1, mkU64(0)); +- } + } + + static const HChar * +@@ -18380,12 +18605,8 @@ s390_irgen_VCDG(UChar v1, UChar v2, UChar m3, UChar m4, UChar m5) + { + s390_insn_assert("vcdg", m3 == 3); + +- if (!s390_host_has_fpext && m5 != S390_BFP_ROUND_PER_FPC) { +- emulation_warning(EmWarn_S390X_fpext_rounding); +- m5 = S390_BFP_ROUND_PER_FPC; +- } +- +- s390_vector_fp_convert(Iop_I64StoF64, Ity_I64, Ity_F64, v1, v2, m3, m4, m5); ++ s390_vector_fp_convert(Iop_I64StoF64, Ity_I64, Ity_F64, True, ++ v1, v2, m3, m4, m5); + + return "vcdg"; + } +@@ -18395,12 +18616,8 @@ s390_irgen_VCDLG(UChar v1, UChar v2, UChar m3, UChar m4, UChar m5) + { + s390_insn_assert("vcdlg", m3 == 3); + +- if (!s390_host_has_fpext && m5 != S390_BFP_ROUND_PER_FPC) { +- emulation_warning(EmWarn_S390X_fpext_rounding); +- m5 = S390_BFP_ROUND_PER_FPC; +- } +- +- s390_vector_fp_convert(Iop_I64UtoF64, Ity_I64, Ity_F64, v1, v2, m3, m4, m5); ++ s390_vector_fp_convert(Iop_I64UtoF64, Ity_I64, Ity_F64, True, ++ v1, v2, m3, m4, m5); + + return "vcdlg"; + } +@@ -18410,12 +18627,8 @@ s390_irgen_VCGD(UChar v1, UChar v2, UChar m3, UChar m4, UChar m5) + { + s390_insn_assert("vcgd", m3 == 3); + +- if (!s390_host_has_fpext && m5 != S390_BFP_ROUND_PER_FPC) { +- emulation_warning(EmWarn_S390X_fpext_rounding); +- m5 = S390_BFP_ROUND_PER_FPC; +- } +- +- s390_vector_fp_convert(Iop_F64toI64S, Ity_F64, Ity_I64, v1, v2, m3, m4, m5); ++ s390_vector_fp_convert(Iop_F64toI64S, Ity_F64, Ity_I64, True, ++ v1, v2, m3, m4, m5); + + return "vcgd"; + } +@@ -18425,12 +18638,8 @@ s390_irgen_VCLGD(UChar v1, UChar v2, UChar m3, UChar m4, UChar m5) + { + s390_insn_assert("vclgd", m3 == 3); + +- if (!s390_host_has_fpext && m5 != S390_BFP_ROUND_PER_FPC) { +- emulation_warning(EmWarn_S390X_fpext_rounding); +- m5 = S390_BFP_ROUND_PER_FPC; +- } +- +- s390_vector_fp_convert(Iop_F64toI64U, Ity_F64, Ity_I64, v1, v2, m3, m4, m5); ++ s390_vector_fp_convert(Iop_F64toI64U, Ity_F64, Ity_I64, True, ++ v1, v2, m3, m4, m5); + + return "vclgd"; + } +@@ -18438,246 +18647,262 @@ s390_irgen_VCLGD(UChar v1, UChar v2, UChar m3, UChar m4, UChar m5) + static const HChar * + s390_irgen_VFI(UChar v1, UChar v2, UChar m3, UChar m4, UChar m5) + { +- s390_insn_assert("vfi", m3 == 3); ++ s390_insn_assert("vfi", ++ (m3 == 3 || (s390_host_has_vxe && m3 >= 2 && m3 <= 4))); + +- if (!s390_host_has_fpext && m5 != S390_BFP_ROUND_PER_FPC) { +- emulation_warning(EmWarn_S390X_fpext_rounding); +- m5 = S390_BFP_ROUND_PER_FPC; ++ switch (m3) { ++ case 2: s390_vector_fp_convert(Iop_RoundF32toInt, Ity_F32, Ity_F32, True, ++ v1, v2, m3, m4, m5); break; ++ case 3: s390_vector_fp_convert(Iop_RoundF64toInt, Ity_F64, Ity_F64, True, ++ v1, v2, m3, m4, m5); break; ++ case 4: s390_vector_fp_convert(Iop_RoundF128toInt, Ity_F128, Ity_F128, True, ++ v1, v2, m3, m4, m5); break; + } + +- s390_vector_fp_convert(Iop_RoundF64toInt, Ity_F64, Ity_F64, +- v1, v2, m3, m4, m5); +- +- return "vcgld"; ++ return "vfi"; + } + + static const HChar * +-s390_irgen_VLDE(UChar v1, UChar v2, UChar m3, UChar m4, UChar m5) ++s390_irgen_VFLL(UChar v1, UChar v2, UChar m3, UChar m4, UChar m5) + { +- s390_insn_assert("vlde", m3 == 2); ++ s390_insn_assert("vfll", m3 == 2 || (s390_host_has_vxe && m3 == 3)); + +- s390_vector_fp_convert(Iop_F32toF64, Ity_F32, Ity_F64, v1, v2, m3, m4, m5); ++ if (m3 == 2) ++ s390_vector_fp_convert(Iop_F32toF64, Ity_F32, Ity_F64, False, ++ v1, v2, m3, m4, m5); ++ else ++ s390_vector_fp_convert(Iop_F64toF128, Ity_F64, Ity_F128, False, ++ v1, v2, m3, m4, m5); + +- return "vlde"; ++ return "vfll"; + } + + static const HChar * +-s390_irgen_VLED(UChar v1, UChar v2, UChar m3, UChar m4, UChar m5) ++s390_irgen_VFLR(UChar v1, UChar v2, UChar m3, UChar m4, UChar m5) + { +- s390_insn_assert("vled", m3 == 3); +- +- if (!s390_host_has_fpext && m5 != S390_BFP_ROUND_PER_FPC) { +- m5 = S390_BFP_ROUND_PER_FPC; +- } ++ s390_insn_assert("vflr", m3 == 3 || (s390_host_has_vxe && m3 == 2)); + +- s390_vector_fp_convert(Iop_F64toF32, Ity_F64, Ity_F32, v1, v2, m3, m4, m5); ++ if (m3 == 3) ++ s390_vector_fp_convert(Iop_F64toF32, Ity_F64, Ity_F32, True, ++ v1, v2, m3, m4, m5); ++ else ++ s390_vector_fp_convert(Iop_F128toF64, Ity_F128, Ity_F64, True, ++ v1, v2, m3, m4, m5); + +- return "vled"; ++ return "vflr"; + } + + static const HChar * + s390_irgen_VFPSO(UChar v1, UChar v2, UChar m3, UChar m4, UChar m5) + { +- s390_insn_assert("vfpso", m3 == 3); +- +- IRExpr* result; +- switch (m5) { +- case 0: { +- /* Invert sign */ +- if (!s390_vr_is_single_element_control_set(m4)) { +- result = unop(Iop_Neg64Fx2, get_vr_qw(v2)); +- } +- else { +- result = binop(Iop_64HLtoV128, +- unop(Iop_ReinterpF64asI64, +- unop(Iop_NegF64, get_vr(v2, Ity_F64, 0))), +- mkU64(0)); +- } +- break; +- } ++ s390_insn_assert("vfpso", m5 <= 2 && ++ (m3 == 3 || (s390_host_has_vxe && m3 >= 2 && m3 <= 4))); + +- case 1: { +- /* Set sign to negative */ +- IRExpr* highHalf = mkU64(0x8000000000000000ULL); +- if (!s390_vr_is_single_element_control_set(m4)) { +- IRExpr* lowHalf = highHalf; +- IRExpr* mask = binop(Iop_64HLtoV128, highHalf, lowHalf); +- result = binop(Iop_OrV128, get_vr_qw(v2), mask); +- } +- else { +- result = binop(Iop_64HLtoV128, +- binop(Iop_Or64, get_vr_dw0(v2), highHalf), +- mkU64(0ULL)); +- } ++ Bool single = s390_vr_is_single_element_control_set(m4) || m3 == 4; ++ IRType type = single ? s390_vr_get_ftype(m3) : Ity_V128; ++ int idx = 2 * (m3 - 2) + (single ? 0 : 1); + +- break; +- } +- +- case 2: { +- /* Set sign to positive */ +- if (!s390_vr_is_single_element_control_set(m4)) { +- result = unop(Iop_Abs64Fx2, get_vr_qw(v2)); +- } +- else { +- result = binop(Iop_64HLtoV128, +- unop(Iop_ReinterpF64asI64, +- unop(Iop_AbsF64, get_vr(v2, Ity_F64, 0))), +- mkU64(0)); +- } +- +- break; +- } +- +- default: +- vpanic("s390_irgen_VFPSO: Invalid m5 value"); +- } ++ static const IROp negate_ops[] = { ++ Iop_NegF32, Iop_Neg32Fx4, ++ Iop_NegF64, Iop_Neg64Fx2, ++ Iop_NegF128 ++ }; ++ static const IROp abs_ops[] = { ++ Iop_AbsF32, Iop_Abs32Fx4, ++ Iop_AbsF64, Iop_Abs64Fx2, ++ Iop_AbsF128 ++ }; + +- put_vr_qw(v1, result); +- if (s390_vr_is_single_element_control_set(m4)) { +- put_vr_dw1(v1, mkU64(0ULL)); ++ if (m5 == 1) { ++ /* Set sign to negative */ ++ put_vr(v1, type, 0, ++ unop(negate_ops[idx], ++ unop(abs_ops[idx], get_vr(v2, type, 0)))); ++ } else { ++ /* m5 == 0: invert sign; m5 == 2: set sign to positive */ ++ const IROp *ops = m5 == 2 ? abs_ops : negate_ops; ++ put_vr(v1, type, 0, unop(ops[idx], get_vr(v2, type, 0))); + } + + return "vfpso"; + } + +-static void s390x_vec_fp_binary_op(IROp generalOp, IROp singleElementOp, +- UChar v1, UChar v2, UChar v3, UChar m4, +- UChar m5) ++static const HChar * ++s390x_vec_fp_binary_op(const HChar* mnm, const IROp ops[], ++ UChar v1, UChar v2, UChar v3, ++ UChar m4, UChar m5) + { +- IRExpr* result; +- if (!s390_vr_is_single_element_control_set(m5)) { +- result = triop(generalOp, get_bfp_rounding_mode_from_fpc(), +- get_vr_qw(v2), get_vr_qw(v3)); ++ s390_insn_assert(mnm, (m5 & 7) == 0 && ++ (m4 == 3 || (s390_host_has_vxe && m4 >= 2 && m4 <= 4))); ++ ++ int idx = 2 * (m4 - 2); ++ ++ if (m4 == 4 || s390_vr_is_single_element_control_set(m5)) { ++ IRType type = s390_vr_get_ftype(m4); ++ put_vr(v1, type, 0, ++ triop(ops[idx], get_bfp_rounding_mode_from_fpc(), ++ get_vr(v2, type, 0), get_vr(v3, type, 0))); + } else { +- IRExpr* highHalf = triop(singleElementOp, +- get_bfp_rounding_mode_from_fpc(), +- get_vr(v2, Ity_F64, 0), +- get_vr(v3, Ity_F64, 0)); +- result = binop(Iop_64HLtoV128, unop(Iop_ReinterpF64asI64, highHalf), +- mkU64(0ULL)); ++ put_vr_qw(v1, triop(ops[idx + 1], get_bfp_rounding_mode_from_fpc(), ++ get_vr_qw(v2), get_vr_qw(v3))); + } + +- put_vr_qw(v1, result); ++ return mnm; + } + +-static void s390x_vec_fp_unary_op(IROp generalOp, IROp singleElementOp, +- UChar v1, UChar v2, UChar m3, UChar m4) ++static const HChar * ++s390x_vec_fp_unary_op(const HChar* mnm, const IROp ops[], ++ UChar v1, UChar v2, UChar m3, UChar m4) + { +- IRExpr* result; +- if (!s390_vr_is_single_element_control_set(m4)) { +- result = binop(generalOp, get_bfp_rounding_mode_from_fpc(), +- get_vr_qw(v2)); ++ s390_insn_assert(mnm, (m4 & 7) == 0 && ++ (m3 == 3 || (s390_host_has_vxe && m3 >= 2 && m3 <= 4))); ++ ++ int idx = 2 * (m3 - 2); ++ ++ if (m3 == 4 || s390_vr_is_single_element_control_set(m4)) { ++ IRType type = s390_vr_get_ftype(m3); ++ put_vr(v1, type, 0, ++ binop(ops[idx], get_bfp_rounding_mode_from_fpc(), ++ get_vr(v2, type, 0))); + } + else { +- IRExpr* highHalf = binop(singleElementOp, +- get_bfp_rounding_mode_from_fpc(), +- get_vr(v2, Ity_F64, 0)); +- result = binop(Iop_64HLtoV128, unop(Iop_ReinterpF64asI64, highHalf), +- mkU64(0ULL)); ++ put_vr_qw(v1, binop(ops[idx + 1], get_bfp_rounding_mode_from_fpc(), ++ get_vr_qw(v2))); + } + +- put_vr_qw(v1, result); ++ return mnm; + } + + +-static void +-s390_vector_fp_mulAddOrSub(IROp singleElementOp, +- UChar v1, UChar v2, UChar v3, UChar v4, +- UChar m5, UChar m6) ++static const HChar * ++s390_vector_fp_mulAddOrSub(UChar v1, UChar v2, UChar v3, UChar v4, ++ UChar m5, UChar m6, ++ const HChar* mnm, const IROp single_ops[], ++ Bool negate) + { +- Bool isSingleElementOp = s390_vr_is_single_element_control_set(m5); ++ s390_insn_assert(mnm, m6 == 3 || (s390_host_has_vxe && m6 >= 2 && m6 <= 4)); ++ ++ static const IROp negate_ops[] = { Iop_NegF32, Iop_NegF64, Iop_NegF128 }; ++ IRType type = s390_vr_get_ftype(m6); ++ Bool single = s390_vr_is_single_element_control_set(m5) || m6 == 4; ++ UChar n_elem = single ? 1 : s390_vr_get_n_elem(m6); + IRTemp irrm_temp = newTemp(Ity_I32); + assign(irrm_temp, get_bfp_rounding_mode_from_fpc()); + IRExpr* irrm = mkexpr(irrm_temp); +- IRExpr* result; +- IRExpr* highHalf = qop(singleElementOp, +- irrm, +- get_vr(v2, Ity_F64, 0), +- get_vr(v3, Ity_F64, 0), +- get_vr(v4, Ity_F64, 0)); +- +- if (isSingleElementOp) { +- result = binop(Iop_64HLtoV128, unop(Iop_ReinterpF64asI64, highHalf), +- mkU64(0ULL)); +- } else { +- IRExpr* lowHalf = qop(singleElementOp, +- irrm, +- get_vr(v2, Ity_F64, 1), +- get_vr(v3, Ity_F64, 1), +- get_vr(v4, Ity_F64, 1)); +- result = binop(Iop_64HLtoV128, unop(Iop_ReinterpF64asI64, highHalf), +- unop(Iop_ReinterpF64asI64, lowHalf)); +- } + +- put_vr_qw(v1, result); ++ for (UChar idx = 0; idx < n_elem; idx++) { ++ IRExpr* result = qop(single_ops[m6 - 2], ++ irrm, ++ get_vr(v2, type, idx), ++ get_vr(v3, type, idx), ++ get_vr(v4, type, idx)); ++ put_vr(v1, type, idx, negate ? unop(negate_ops[m6 - 2], result) : result); ++ } ++ return mnm; + } + + static const HChar * + s390_irgen_VFA(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5) + { +- s390_insn_assert("vfa", m4 == 3); +- s390x_vec_fp_binary_op(Iop_Add64Fx2, Iop_AddF64, v1, v2, v3, m4, m5); +- return "vfa"; ++ static const IROp vfa_ops[] = { ++ Iop_AddF32, Iop_Add32Fx4, ++ Iop_AddF64, Iop_Add64Fx2, ++ Iop_AddF128, ++ }; ++ return s390x_vec_fp_binary_op("vfa", vfa_ops, v1, v2, v3, m4, m5); + } + + static const HChar * + s390_irgen_VFS(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5) + { +- s390_insn_assert("vfs", m4 == 3); +- s390x_vec_fp_binary_op(Iop_Sub64Fx2, Iop_SubF64, v1, v2, v3, m4, m5); +- return "vfs"; ++ static const IROp vfs_ops[] = { ++ Iop_SubF32, Iop_Sub32Fx4, ++ Iop_SubF64, Iop_Sub64Fx2, ++ Iop_SubF128, ++ }; ++ return s390x_vec_fp_binary_op("vfs", vfs_ops, v1, v2, v3, m4, m5); + } + + static const HChar * + s390_irgen_VFM(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5) + { +- s390_insn_assert("vfm", m4 == 3); +- s390x_vec_fp_binary_op(Iop_Mul64Fx2, Iop_MulF64, v1, v2, v3, m4, m5); +- return "vfm"; ++ static const IROp vfm_ops[] = { ++ Iop_MulF32, Iop_Mul32Fx4, ++ Iop_MulF64, Iop_Mul64Fx2, ++ Iop_MulF128, ++ }; ++ return s390x_vec_fp_binary_op("vfm", vfm_ops, v1, v2, v3, m4, m5); + } + + static const HChar * + s390_irgen_VFD(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5) + { +- s390_insn_assert("vfd", m4 == 3); +- s390x_vec_fp_binary_op(Iop_Div64Fx2, Iop_DivF64, v1, v2, v3, m4, m5); +- return "vfd"; ++ static const IROp vfd_ops[] = { ++ Iop_DivF32, Iop_Div32Fx4, ++ Iop_DivF64, Iop_Div64Fx2, ++ Iop_DivF128, ++ }; ++ return s390x_vec_fp_binary_op("vfd", vfd_ops, v1, v2, v3, m4, m5); + } + + static const HChar * + s390_irgen_VFSQ(UChar v1, UChar v2, UChar m3, UChar m4) + { +- s390_insn_assert("vfsq", m3 == 3); +- s390x_vec_fp_unary_op(Iop_Sqrt64Fx2, Iop_SqrtF64, v1, v2, m3, m4); +- +- return "vfsq"; ++ static const IROp vfsq_ops[] = { ++ Iop_SqrtF32, Iop_Sqrt32Fx4, ++ Iop_SqrtF64, Iop_Sqrt64Fx2, ++ Iop_SqrtF128 ++ }; ++ return s390x_vec_fp_unary_op("vfsq", vfsq_ops, v1, v2, m3, m4); + } + ++static const IROp FMA_single_ops[] = { ++ Iop_MAddF32, Iop_MAddF64, Iop_MAddF128 ++}; ++ + static const HChar * + s390_irgen_VFMA(UChar v1, UChar v2, UChar v3, UChar v4, UChar m5, UChar m6) + { +- s390_insn_assert("vfma", m6 == 3); +- s390_vector_fp_mulAddOrSub(Iop_MAddF64, v1, v2, v3, v4, m5, m6); +- return "vfma"; ++ return s390_vector_fp_mulAddOrSub(v1, v2, v3, v4, m5, m6, ++ "vfma", FMA_single_ops, False); + } + ++static const HChar * ++s390_irgen_VFNMA(UChar v1, UChar v2, UChar v3, UChar v4, UChar m5, UChar m6) ++{ ++ return s390_vector_fp_mulAddOrSub(v1, v2, v3, v4, m5, m6, ++ "vfnma", FMA_single_ops, True); ++} ++ ++static const IROp FMS_single_ops[] = { ++ Iop_MSubF32, Iop_MSubF64, Iop_MSubF128 ++}; ++ + static const HChar * + s390_irgen_VFMS(UChar v1, UChar v2, UChar v3, UChar v4, UChar m5, UChar m6) + { +- s390_insn_assert("vfms", m6 == 3); +- s390_vector_fp_mulAddOrSub(Iop_MSubF64, v1, v2, v3, v4, m5, m6); +- return "vfms"; ++ return s390_vector_fp_mulAddOrSub(v1, v2, v3, v4, m5, m6, ++ "vfms", FMS_single_ops, False); ++} ++ ++static const HChar * ++s390_irgen_VFNMS(UChar v1, UChar v2, UChar v3, UChar v4, UChar m5, UChar m6) ++{ ++ return s390_vector_fp_mulAddOrSub(v1, v2, v3, v4, m5, m6, ++ "vfnms", FMS_single_ops, True); + } + + static const HChar * + s390_irgen_WFC(UChar v1, UChar v2, UChar m3, UChar m4) + { +- s390_insn_assert("wfc", m3 == 3); +- s390_insn_assert("wfc", m4 == 0); ++ s390_insn_assert("wfc", m4 == 0 && ++ (m3 == 3 || (s390_host_has_vxe && m3 >= 2 && m3 <= 4))); ++ ++ static const IROp ops[] = { Iop_CmpF32, Iop_CmpF64, Iop_CmpF128 }; ++ IRType type = s390_vr_get_ftype(m3); + + IRTemp cc_vex = newTemp(Ity_I32); +- assign(cc_vex, binop(Iop_CmpF64, +- get_vr(v1, Ity_F64, 0), get_vr(v2, Ity_F64, 0))); ++ assign(cc_vex, binop(ops[m3 - 2], get_vr(v1, type, 0), get_vr(v2, type, 0))); + + IRTemp cc_s390 = newTemp(Ity_I32); + assign(cc_s390, convert_vex_bfpcc_to_s390(cc_vex)); +@@ -18695,213 +18920,253 @@ s390_irgen_WFK(UChar v1, UChar v2, UChar m3, UChar m4) + } + + static const HChar * +-s390_irgen_VFCE(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5, UChar m6) ++s390_irgen_VFCx(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5, UChar m6, ++ const HChar *mnem, IRCmpFResult cmp, Bool equal_ok, ++ IROp cmp32, IROp cmp64) + { +- s390_insn_assert("vfce", m4 == 3); ++ s390_insn_assert(mnem, (m5 & 3) == 0 && (m6 & 14) == 0 && ++ (m4 == 3 || (s390_host_has_vxe && m4 >= 2 && m4 <= 4))); + +- Bool isSingleElementOp = s390_vr_is_single_element_control_set(m5); +- if (!s390_vr_is_cs_set(m6)) { +- if (!isSingleElementOp) { +- put_vr_qw(v1, binop(Iop_CmpEQ64Fx2, get_vr_qw(v2), get_vr_qw(v3))); ++ Bool single = s390_vr_is_single_element_control_set(m5) || m4 == 4; ++ ++ if (single) { ++ static const IROp ops[] = { Iop_CmpF32, Iop_CmpF64, Iop_CmpF128 }; ++ IRType type = s390_vr_get_ftype(m4); ++ IRTemp result = newTemp(Ity_I32); ++ IRTemp cond = newTemp(Ity_I1); ++ ++ assign(result, binop(ops[m4 - 2], ++ get_vr(v2, type, 0), get_vr(v3, type, 0))); ++ if (equal_ok) { ++ assign(cond, ++ binop(Iop_Or1, ++ binop(Iop_CmpEQ32, mkexpr(result), mkU32(cmp)), ++ binop(Iop_CmpEQ32, mkexpr(result), mkU32(Ircr_EQ)))); + } else { +- IRExpr* comparisonResult = binop(Iop_CmpF64, get_vr(v2, Ity_F64, 0), +- get_vr(v3, Ity_F64, 0)); +- IRExpr* result = mkite(binop(Iop_CmpEQ32, comparisonResult, +- mkU32(Ircr_EQ)), +- mkU64(0xffffffffffffffffULL), +- mkU64(0ULL)); +- put_vr_qw(v1, binop(Iop_64HLtoV128, result, mkU64(0ULL))); ++ assign(cond, binop(Iop_CmpEQ32, mkexpr(result), mkU32(cmp))); ++ } ++ put_vr_qw(v1, mkite(mkexpr(cond), ++ IRExpr_Const(IRConst_V128(0xffff)), ++ IRExpr_Const(IRConst_V128(0)))); ++ if (s390_vr_is_cs_set(m6)) { ++ IRTemp cc = newTemp(Ity_I64); ++ assign(cc, mkite(mkexpr(cond), mkU64(0), mkU64(3))); ++ s390_cc_set(cc); + } + } else { +- IRDirty* d; +- IRTemp cc = newTemp(Ity_I64); +- +- s390x_vec_op_details_t details = { .serialized = 0ULL }; +- details.op = S390_VEC_OP_VFCE; +- details.v1 = v1; +- details.v2 = v2; +- details.v3 = v3; +- details.m4 = m4; +- details.m5 = m5; +- details.m6 = m6; ++ IRTemp result = newTemp(Ity_V128); ++ ++ assign(result, binop(m4 == 2 ? cmp32 : cmp64, ++ get_vr_qw(v2), get_vr_qw(v3))); ++ put_vr_qw(v1, mkexpr(result)); ++ if (s390_vr_is_cs_set(m6)) { ++ IRTemp cc = newTemp(Ity_I64); ++ assign(cc, ++ mkite(binop(Iop_CmpEQ64, ++ binop(Iop_And64, ++ unop(Iop_V128to64, mkexpr(result)), ++ unop(Iop_V128HIto64, mkexpr(result))), ++ mkU64(-1ULL)), ++ mkU64(0), /* all comparison results are true */ ++ mkite(binop(Iop_CmpEQ64, ++ binop(Iop_Or64, ++ unop(Iop_V128to64, mkexpr(result)), ++ unop(Iop_V128HIto64, mkexpr(result))), ++ mkU64(0)), ++ mkU64(3), /* all false */ ++ mkU64(1)))); /* mixed true/false */ ++ s390_cc_set(cc); ++ } ++ } + +- d = unsafeIRDirty_1_N(cc, 0, "s390x_dirtyhelper_vec_op", +- &s390x_dirtyhelper_vec_op, +- mkIRExprVec_2(IRExpr_GSPTR(), +- mkU64(details.serialized))); ++ return mnem; ++} + +- const UChar elementSize = isSingleElementOp ? sizeof(ULong) : sizeof(V128); +- d->nFxState = 3; +- vex_bzero(&d->fxState, sizeof(d->fxState)); +- d->fxState[0].fx = Ifx_Read; +- d->fxState[0].offset = S390X_GUEST_OFFSET(guest_v0) + v2 * sizeof(V128); +- d->fxState[0].size = elementSize; +- d->fxState[1].fx = Ifx_Read; +- d->fxState[1].offset = S390X_GUEST_OFFSET(guest_v0) + v3 * sizeof(V128); +- d->fxState[1].size = elementSize; +- d->fxState[2].fx = Ifx_Write; +- d->fxState[2].offset = S390X_GUEST_OFFSET(guest_v0) + v1 * sizeof(V128); +- d->fxState[2].size = sizeof(V128); ++static const HChar * ++s390_irgen_VFCE(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5, UChar m6) ++{ ++ return s390_irgen_VFCx(v1, v2, v3, m4, m5, m6, "vfce", Ircr_EQ, ++ False, Iop_CmpEQ32Fx4, Iop_CmpEQ64Fx2); ++} + +- stmt(IRStmt_Dirty(d)); +- s390_cc_set(cc); +- } ++static const HChar * ++s390_irgen_VFCH(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5, UChar m6) ++{ ++ /* Swap arguments and compare "low" instead. */ ++ return s390_irgen_VFCx(v1, v3, v2, m4, m5, m6, "vfch", Ircr_LT, ++ False, Iop_CmpLT32Fx4, Iop_CmpLT64Fx2); ++} + +- return "vfce"; ++static const HChar * ++s390_irgen_VFCHE(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5, UChar m6) ++{ ++ /* Swap arguments and compare "low or equal" instead. */ ++ return s390_irgen_VFCx(v1, v3, v2, m4, m5, m6, "vfche", Ircr_LT, ++ True, Iop_CmpLE32Fx4, Iop_CmpLE64Fx2); + } + + static const HChar * +-s390_irgen_VFCH(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5, UChar m6) ++s390_irgen_VFTCI(UChar v1, UChar v2, UShort i3, UChar m4, UChar m5) + { +- vassert(m4 == 3); ++ s390_insn_assert("vftci", ++ (m4 == 3 || (s390_host_has_vxe && m4 >= 2 && m4 <= 4))); + + Bool isSingleElementOp = s390_vr_is_single_element_control_set(m5); +- if (!s390_vr_is_cs_set(m6)) { +- if (!isSingleElementOp) { +- put_vr_qw(v1, binop(Iop_CmpLE64Fx2, get_vr_qw(v3), get_vr_qw(v2))); +- } else { +- IRExpr* comparisonResult = binop(Iop_CmpF64, get_vr(v2, Ity_F64, 0), +- get_vr(v3, Ity_F64, 0)); +- IRExpr* result = mkite(binop(Iop_CmpEQ32, comparisonResult, +- mkU32(Ircr_GT)), +- mkU64(0xffffffffffffffffULL), +- mkU64(0ULL)); +- put_vr_qw(v1, binop(Iop_64HLtoV128, result, mkU64(0ULL))); +- } +- } +- else { +- IRDirty* d; +- IRTemp cc = newTemp(Ity_I64); + +- s390x_vec_op_details_t details = { .serialized = 0ULL }; +- details.op = S390_VEC_OP_VFCH; +- details.v1 = v1; +- details.v2 = v2; +- details.v3 = v3; +- details.m4 = m4; +- details.m5 = m5; +- details.m6 = m6; ++ IRDirty* d; ++ IRTemp cc = newTemp(Ity_I64); + +- d = unsafeIRDirty_1_N(cc, 0, "s390x_dirtyhelper_vec_op", +- &s390x_dirtyhelper_vec_op, +- mkIRExprVec_2(IRExpr_GSPTR(), +- mkU64(details.serialized))); ++ s390x_vec_op_details_t details = { .serialized = 0ULL }; ++ details.op = S390_VEC_OP_VFTCI; ++ details.v1 = v1; ++ details.v2 = v2; ++ details.i3 = i3; ++ details.m4 = m4; ++ details.m5 = m5; + +- const UChar elementSize = isSingleElementOp ? sizeof(ULong) : sizeof(V128); +- d->nFxState = 3; +- vex_bzero(&d->fxState, sizeof(d->fxState)); +- d->fxState[0].fx = Ifx_Read; +- d->fxState[0].offset = S390X_GUEST_OFFSET(guest_v0) + v2 * sizeof(V128); +- d->fxState[0].size = elementSize; +- d->fxState[1].fx = Ifx_Read; +- d->fxState[1].offset = S390X_GUEST_OFFSET(guest_v0) + v3 * sizeof(V128); +- d->fxState[1].size = elementSize; +- d->fxState[2].fx = Ifx_Write; +- d->fxState[2].offset = S390X_GUEST_OFFSET(guest_v0) + v1 * sizeof(V128); +- d->fxState[2].size = sizeof(V128); ++ d = unsafeIRDirty_1_N(cc, 0, "s390x_dirtyhelper_vec_op", ++ &s390x_dirtyhelper_vec_op, ++ mkIRExprVec_2(IRExpr_GSPTR(), ++ mkU64(details.serialized))); + +- stmt(IRStmt_Dirty(d)); +- s390_cc_set(cc); +- } ++ const UChar elementSize = isSingleElementOp ? ++ sizeofIRType(s390_vr_get_ftype(m4)) : sizeof(V128); ++ d->nFxState = 2; ++ vex_bzero(&d->fxState, sizeof(d->fxState)); ++ d->fxState[0].fx = Ifx_Read; ++ d->fxState[0].offset = S390X_GUEST_OFFSET(guest_v0) + v2 * sizeof(V128); ++ d->fxState[0].size = elementSize; ++ d->fxState[1].fx = Ifx_Write; ++ d->fxState[1].offset = S390X_GUEST_OFFSET(guest_v0) + v1 * sizeof(V128); ++ d->fxState[1].size = sizeof(V128); ++ ++ stmt(IRStmt_Dirty(d)); ++ s390_cc_set(cc); + +- return "vfch"; ++ return "vftci"; + } + + static const HChar * +-s390_irgen_VFCHE(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5, UChar m6) ++s390_irgen_VFMIN(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5, UChar m6) + { +- s390_insn_assert("vfche", m4 == 3); ++ s390_insn_assert("vfmin", ++ (m4 == 3 || (s390_host_has_vxe && m4 >= 2 && m4 <= 4))); + + Bool isSingleElementOp = s390_vr_is_single_element_control_set(m5); +- if (!s390_vr_is_cs_set(m6)) { +- if (!isSingleElementOp) { +- put_vr_qw(v1, binop(Iop_CmpLT64Fx2, get_vr_qw(v3), get_vr_qw(v2))); +- } +- else { +- IRExpr* comparisonResult = binop(Iop_CmpF64, get_vr(v3, Ity_F64, 0), +- get_vr(v2, Ity_F64, 0)); +- IRExpr* result = mkite(binop(Iop_CmpEQ32, comparisonResult, +- mkU32(Ircr_LT)), +- mkU64(0xffffffffffffffffULL), +- mkU64(0ULL)); +- put_vr_qw(v1, binop(Iop_64HLtoV128, result, mkU64(0ULL))); +- } +- } +- else { +- IRDirty* d; +- IRTemp cc = newTemp(Ity_I64); +- +- s390x_vec_op_details_t details = { .serialized = 0ULL }; +- details.op = S390_VEC_OP_VFCHE; +- details.v1 = v1; +- details.v2 = v2; +- details.v3 = v3; +- details.m4 = m4; +- details.m5 = m5; +- details.m6 = m6; ++ IRDirty* d; ++ IRTemp cc = newTemp(Ity_I64); + +- d = unsafeIRDirty_1_N(cc, 0, "s390x_dirtyhelper_vec_op", +- &s390x_dirtyhelper_vec_op, +- mkIRExprVec_2(IRExpr_GSPTR(), +- mkU64(details.serialized))); ++ s390x_vec_op_details_t details = { .serialized = 0ULL }; ++ details.op = S390_VEC_OP_VFMIN; ++ details.v1 = v1; ++ details.v2 = v2; ++ details.v3 = v3; ++ details.m4 = m4; ++ details.m5 = m5; ++ details.m6 = m6; + +- const UChar elementSize = isSingleElementOp ? sizeof(ULong) : sizeof(V128); +- d->nFxState = 3; +- vex_bzero(&d->fxState, sizeof(d->fxState)); +- d->fxState[0].fx = Ifx_Read; +- d->fxState[0].offset = S390X_GUEST_OFFSET(guest_v0) + v2 * sizeof(V128); +- d->fxState[0].size = elementSize; +- d->fxState[1].fx = Ifx_Read; +- d->fxState[1].offset = S390X_GUEST_OFFSET(guest_v0) + v3 * sizeof(V128); +- d->fxState[1].size = elementSize; +- d->fxState[2].fx = Ifx_Write; +- d->fxState[2].offset = S390X_GUEST_OFFSET(guest_v0) + v1 * sizeof(V128); +- d->fxState[2].size = sizeof(V128); ++ d = unsafeIRDirty_1_N(cc, 0, "s390x_dirtyhelper_vec_op", ++ &s390x_dirtyhelper_vec_op, ++ mkIRExprVec_2(IRExpr_GSPTR(), ++ mkU64(details.serialized))); + +- stmt(IRStmt_Dirty(d)); +- s390_cc_set(cc); +- } ++ const UChar elementSize = isSingleElementOp ? ++ sizeofIRType(s390_vr_get_ftype(m4)) : sizeof(V128); ++ d->nFxState = 3; ++ vex_bzero(&d->fxState, sizeof(d->fxState)); ++ d->fxState[0].fx = Ifx_Read; ++ d->fxState[0].offset = S390X_GUEST_OFFSET(guest_v0) + v2 * sizeof(V128); ++ d->fxState[0].size = elementSize; ++ d->fxState[1].fx = Ifx_Read; ++ d->fxState[1].offset = S390X_GUEST_OFFSET(guest_v0) + v3 * sizeof(V128); ++ d->fxState[1].size = elementSize; ++ d->fxState[2].fx = Ifx_Write; ++ d->fxState[2].offset = S390X_GUEST_OFFSET(guest_v0) + v1 * sizeof(V128); ++ d->fxState[2].size = sizeof(V128); + +- return "vfche"; ++ stmt(IRStmt_Dirty(d)); ++ s390_cc_set(cc); ++ return "vfmin"; + } + + static const HChar * +-s390_irgen_VFTCI(UChar v1, UChar v2, UShort i3, UChar m4, UChar m5) ++s390_irgen_VFMAX(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5, UChar m6) + { +- s390_insn_assert("vftci", m4 == 3); ++ s390_insn_assert("vfmax", ++ (m4 == 3 || (s390_host_has_vxe && m4 >= 2 && m4 <= 4))); + + Bool isSingleElementOp = s390_vr_is_single_element_control_set(m5); +- + IRDirty* d; + IRTemp cc = newTemp(Ity_I64); + + s390x_vec_op_details_t details = { .serialized = 0ULL }; +- details.op = S390_VEC_OP_VFTCI; ++ details.op = S390_VEC_OP_VFMAX; + details.v1 = v1; + details.v2 = v2; +- details.i3 = i3; ++ details.v3 = v3; + details.m4 = m4; + details.m5 = m5; ++ details.m6 = m6; + + d = unsafeIRDirty_1_N(cc, 0, "s390x_dirtyhelper_vec_op", + &s390x_dirtyhelper_vec_op, + mkIRExprVec_2(IRExpr_GSPTR(), + mkU64(details.serialized))); + +- const UChar elementSize = isSingleElementOp ? sizeof(ULong) : sizeof(V128); +- d->nFxState = 2; ++ const UChar elementSize = isSingleElementOp ? ++ sizeofIRType(s390_vr_get_ftype(m4)) : sizeof(V128); ++ d->nFxState = 3; + vex_bzero(&d->fxState, sizeof(d->fxState)); + d->fxState[0].fx = Ifx_Read; + d->fxState[0].offset = S390X_GUEST_OFFSET(guest_v0) + v2 * sizeof(V128); + d->fxState[0].size = elementSize; +- d->fxState[1].fx = Ifx_Write; +- d->fxState[1].offset = S390X_GUEST_OFFSET(guest_v0) + v1 * sizeof(V128); +- d->fxState[1].size = sizeof(V128); ++ d->fxState[1].fx = Ifx_Read; ++ d->fxState[1].offset = S390X_GUEST_OFFSET(guest_v0) + v3 * sizeof(V128); ++ d->fxState[1].size = elementSize; ++ d->fxState[2].fx = Ifx_Write; ++ d->fxState[2].offset = S390X_GUEST_OFFSET(guest_v0) + v1 * sizeof(V128); ++ d->fxState[2].size = sizeof(V128); + + stmt(IRStmt_Dirty(d)); + s390_cc_set(cc); ++ return "vfmax"; ++} + +- return "vftci"; ++static const HChar * ++s390_irgen_VBPERM(UChar v1, UChar v2, UChar v3) ++{ ++ IRDirty* d; ++ IRTemp cc = newTemp(Ity_I64); ++ ++ s390x_vec_op_details_t details = { .serialized = 0ULL }; ++ details.op = S390_VEC_OP_VBPERM; ++ details.v1 = v1; ++ details.v2 = v2; ++ details.v3 = v3; ++ details.m4 = 0; ++ details.m5 = 0; ++ details.m6 = 0; ++ ++ d = unsafeIRDirty_1_N(cc, 0, "s390x_dirtyhelper_vec_op", ++ &s390x_dirtyhelper_vec_op, ++ mkIRExprVec_2(IRExpr_GSPTR(), ++ mkU64(details.serialized))); ++ ++ d->nFxState = 3; ++ vex_bzero(&d->fxState, sizeof(d->fxState)); ++ d->fxState[0].fx = Ifx_Read; ++ d->fxState[0].offset = S390X_GUEST_OFFSET(guest_v0) + v2 * sizeof(V128); ++ d->fxState[0].size = sizeof(V128); ++ d->fxState[1].fx = Ifx_Read; ++ d->fxState[1].offset = S390X_GUEST_OFFSET(guest_v0) + v3 * sizeof(V128); ++ d->fxState[1].size = sizeof(V128); ++ d->fxState[2].fx = Ifx_Write; ++ d->fxState[2].offset = S390X_GUEST_OFFSET(guest_v0) + v1 * sizeof(V128); ++ d->fxState[2].size = sizeof(V128); ++ ++ stmt(IRStmt_Dirty(d)); ++ s390_cc_set(cc); ++ return "vbperm"; + } + + /* New insns are added here. +@@ -20489,11 +20754,23 @@ s390_decode_6byte_and_irgen(const UChar *bytes) + RXY_dl2(ovl), + RXY_dh2(ovl)); goto ok; + case 0xe60000000034ULL: /* VPKZ */ goto unimplemented; +- case 0xe60000000035ULL: /* VLRL */ goto unimplemented; +- case 0xe60000000037ULL: /* VLRLR */ goto unimplemented; ++ case 0xe60000000035ULL: s390_format_VSI_URDV(s390_irgen_VLRL, VSI_v1(ovl), ++ VSI_b2(ovl), VSI_d2(ovl), ++ VSI_i3(ovl), ++ VSI_rxb(ovl)); goto ok; ++ case 0xe60000000037ULL: s390_format_VRS_RRDV(s390_irgen_VLRLR, VRSd_v1(ovl), ++ VRSd_r3(ovl), VRS_b2(ovl), ++ VRS_d2(ovl), ++ VRS_rxb(ovl)); goto ok; + case 0xe6000000003cULL: /* VUPKZ */ goto unimplemented; +- case 0xe6000000003dULL: /* VSTRL */ goto unimplemented; +- case 0xe6000000003fULL: /* VSTRLR */ goto unimplemented; ++ case 0xe6000000003dULL: s390_format_VSI_URDV(s390_irgen_VSTRL, VSI_v1(ovl), ++ VSI_b2(ovl), VSI_d2(ovl), ++ VSI_i3(ovl), ++ VSI_rxb(ovl)); goto ok; ++ case 0xe6000000003fULL: s390_format_VRS_RRDV(s390_irgen_VSTRLR, VRSd_v1(ovl), ++ VRSd_r3(ovl), VRS_b2(ovl), ++ VRS_d2(ovl), ++ VRS_rxb(ovl)); goto ok; + case 0xe60000000049ULL: /* VLIP */ goto unimplemented; + case 0xe60000000050ULL: /* VCVB */ goto unimplemented; + case 0xe60000000052ULL: /* VCVBG */ goto unimplemented; +@@ -20691,12 +20968,18 @@ s390_decode_6byte_and_irgen(const UChar *bytes) + case 0xe7000000006bULL: s390_format_VRR_VVV(s390_irgen_VNO, VRR_v1(ovl), + VRR_v2(ovl), VRR_r3(ovl), + VRR_rxb(ovl)); goto ok; +- case 0xe7000000006cULL: /* VNX */ goto unimplemented; ++ case 0xe7000000006cULL: s390_format_VRR_VVV(s390_irgen_VNX, VRR_v1(ovl), ++ VRR_v2(ovl), VRR_r3(ovl), ++ VRR_rxb(ovl)); goto ok; + case 0xe7000000006dULL: s390_format_VRR_VVV(s390_irgen_VX, VRR_v1(ovl), + VRR_v2(ovl), VRR_r3(ovl), + VRR_rxb(ovl)); goto ok; +- case 0xe7000000006eULL: /* VNN */ goto unimplemented; +- case 0xe7000000006fULL: /* VOC */ goto unimplemented; ++ case 0xe7000000006eULL: s390_format_VRR_VVV(s390_irgen_VNN, VRR_v1(ovl), ++ VRR_v2(ovl), VRR_r3(ovl), ++ VRR_rxb(ovl)); goto ok; ++ case 0xe7000000006fULL: s390_format_VRR_VVV(s390_irgen_VOC, VRR_v1(ovl), ++ VRR_v2(ovl), VRR_r3(ovl), ++ VRR_rxb(ovl)); goto ok; + case 0xe70000000070ULL: s390_format_VRR_VVVM(s390_irgen_VESLV, VRR_v1(ovl), + VRR_v2(ovl), VRR_r3(ovl), + VRR_m4(ovl), VRR_rxb(ovl)); goto ok; +@@ -20749,7 +21032,9 @@ s390_decode_6byte_and_irgen(const UChar *bytes) + case 0xe70000000084ULL: s390_format_VRR_VVVM(s390_irgen_VPDI, VRR_v1(ovl), + VRR_v2(ovl), VRR_r3(ovl), + VRR_m4(ovl), VRR_rxb(ovl)); goto ok; +- case 0xe70000000085ULL: /* VBPERM */ goto unimplemented; ++ case 0xe70000000085ULL: s390_format_VRR_VVV(s390_irgen_VBPERM, VRR_v1(ovl), ++ VRR_v2(ovl), VRR_r3(ovl), ++ VRR_rxb(ovl)); goto ok; + case 0xe7000000008aULL: s390_format_VRR_VVVVMM(s390_irgen_VSTRC, VRRd_v1(ovl), + VRRd_v2(ovl), VRRd_v3(ovl), + VRRd_v4(ovl), VRRd_m5(ovl), +@@ -20780,8 +21065,16 @@ s390_decode_6byte_and_irgen(const UChar *bytes) + case 0xe70000000097ULL: s390_format_VRR_VVVMM(s390_irgen_VPKS, VRR_v1(ovl), + VRR_v2(ovl), VRR_r3(ovl), + VRR_m4(ovl), VRR_m5(ovl), VRR_rxb(ovl)); goto ok; +- case 0xe7000000009eULL: /* VFNMS */ goto unimplemented; +- case 0xe7000000009fULL: /* VFNMA */ goto unimplemented; ++ case 0xe7000000009eULL: s390_format_VRR_VVVVMM(s390_irgen_VFNMS, VRRe_v1(ovl), ++ VRRe_v2(ovl), VRRe_v3(ovl), ++ VRRe_v4(ovl), VRRe_m5(ovl), ++ VRRe_m6(ovl), ++ VRRe_rxb(ovl)); goto ok; ++ case 0xe7000000009fULL: s390_format_VRR_VVVVMM(s390_irgen_VFNMA, VRRe_v1(ovl), ++ VRRe_v2(ovl), VRRe_v3(ovl), ++ VRRe_v4(ovl), VRRe_m5(ovl), ++ VRRe_m6(ovl), ++ VRRe_rxb(ovl)); goto ok; + case 0xe700000000a1ULL: s390_format_VRR_VVVM(s390_irgen_VMLH, VRR_v1(ovl), + VRR_v2(ovl), VRR_r3(ovl), + VRR_m4(ovl), VRR_rxb(ovl)); goto ok; +@@ -20834,7 +21127,11 @@ s390_decode_6byte_and_irgen(const UChar *bytes) + case 0xe700000000b4ULL: s390_format_VRR_VVVM(s390_irgen_VGFM, VRR_v1(ovl), + VRR_v2(ovl), VRR_r3(ovl), + VRR_m4(ovl), VRR_rxb(ovl)); goto ok; +- case 0xe700000000b8ULL: /* VMSL */ goto unimplemented; ++ case 0xe700000000b8ULL: s390_format_VRR_VVVVMM(s390_irgen_VMSL, VRRd_v1(ovl), ++ VRRd_v2(ovl), VRRd_v3(ovl), ++ VRRd_v4(ovl), VRRd_m5(ovl), ++ VRRd_m6(ovl), ++ VRRd_rxb(ovl)); goto ok; + case 0xe700000000b9ULL: s390_format_VRRd_VVVVM(s390_irgen_VACCC, VRRd_v1(ovl), + VRRd_v2(ovl), VRRd_v3(ovl), + VRRd_v4(ovl), VRRd_m5(ovl), +@@ -20871,11 +21168,11 @@ s390_decode_6byte_and_irgen(const UChar *bytes) + VRRa_v2(ovl), VRRa_m3(ovl), + VRRa_m4(ovl), VRRa_m5(ovl), + VRRa_rxb(ovl)); goto ok; +- case 0xe700000000c4ULL: s390_format_VRRa_VVMMM(s390_irgen_VLDE, VRRa_v1(ovl), ++ case 0xe700000000c4ULL: s390_format_VRRa_VVMMM(s390_irgen_VFLL, VRRa_v1(ovl), + VRRa_v2(ovl), VRRa_m3(ovl), + VRRa_m4(ovl), VRRa_m5(ovl), + VRRa_rxb(ovl)); goto ok; +- case 0xe700000000c5ULL: s390_format_VRRa_VVMMM(s390_irgen_VLED, VRRa_v1(ovl), ++ case 0xe700000000c5ULL: s390_format_VRRa_VVMMM(s390_irgen_VFLR, VRRa_v1(ovl), + VRRa_v2(ovl), VRRa_m3(ovl), + VRRa_m4(ovl), VRRa_m5(ovl), + VRRa_rxb(ovl)); goto ok; +@@ -20956,8 +21253,16 @@ s390_decode_6byte_and_irgen(const UChar *bytes) + VRRa_m3(ovl), VRRa_m4(ovl), + VRRa_m5(ovl), + VRRa_rxb(ovl)); goto ok; +- case 0xe700000000eeULL: /* VFMIN */ goto unimplemented; +- case 0xe700000000efULL: /* VFMAX */ goto unimplemented; ++ case 0xe700000000eeULL: s390_format_VRRa_VVVMMM(s390_irgen_VFMIN, VRRa_v1(ovl), ++ VRRa_v2(ovl), VRRa_v3(ovl), ++ VRRa_m3(ovl), VRRa_m4(ovl), ++ VRRa_m5(ovl), ++ VRRa_rxb(ovl)); goto ok; ++ case 0xe700000000efULL: s390_format_VRRa_VVVMMM(s390_irgen_VFMAX, VRRa_v1(ovl), ++ VRRa_v2(ovl), VRRa_v3(ovl), ++ VRRa_m3(ovl), VRRa_m4(ovl), ++ VRRa_m5(ovl), ++ VRRa_rxb(ovl)); goto ok; + case 0xe700000000f0ULL: s390_format_VRR_VVVM(s390_irgen_VAVGL, VRR_v1(ovl), + VRR_v2(ovl), VRR_r3(ovl), + VRR_m4(ovl), VRR_rxb(ovl)); goto ok; +diff --git a/VEX/priv/host_s390_defs.c b/VEX/priv/host_s390_defs.c +index 3b6121fec..8762975b2 100644 +--- a/VEX/priv/host_s390_defs.c ++++ b/VEX/priv/host_s390_defs.c +@@ -8,7 +8,7 @@ + This file is part of Valgrind, a dynamic binary instrumentation + framework. + +- Copyright IBM Corp. 2010-2017 ++ Copyright IBM Corp. 2010-2020 + Copyright (C) 2012-2017 Florian Krohm (britzel@acm.org) + + This program is free software; you can redistribute it and/or +@@ -684,6 +684,8 @@ s390_insn* genMove_S390(HReg from, HReg to, Bool mode64) + switch (hregClass(from)) { + case HRcInt64: + return s390_insn_move(sizeofIRType(Ity_I64), to, from); ++ case HRcFlt64: ++ return s390_insn_move(sizeofIRType(Ity_F64), to, from); + case HRcVec128: + return s390_insn_move(sizeofIRType(Ity_V128), to, from); + default: +@@ -7870,6 +7872,10 @@ s390_insn_as_string(const s390_insn *insn) + op = "v-vfloatabs"; + break; + ++ case S390_VEC_FLOAT_NABS: ++ op = "v-vfloatnabs"; ++ break; ++ + default: + goto fail; + } +@@ -9439,21 +9445,28 @@ s390_insn_unop_emit(UChar *buf, const s390_insn *insn) + + case S390_VEC_FLOAT_NEG: { + vassert(insn->variant.unop.src.tag == S390_OPND_REG); +- vassert(insn->size == 8); ++ vassert(insn->size >= 4); + UChar v1 = hregNumber(insn->variant.unop.dst); + UChar v2 = hregNumber(insn->variant.unop.src.variant.reg); + return s390_emit_VFPSO(buf, v1, v2, s390_getM_from_size(insn->size), 0, 0); + } + case S390_VEC_FLOAT_ABS: { + vassert(insn->variant.unop.src.tag == S390_OPND_REG); +- vassert(insn->size == 8); ++ vassert(insn->size >= 4); + UChar v1 = hregNumber(insn->variant.unop.dst); + UChar v2 = hregNumber(insn->variant.unop.src.variant.reg); + return s390_emit_VFPSO(buf, v1, v2, s390_getM_from_size(insn->size), 0, 2); + } ++ case S390_VEC_FLOAT_NABS: { ++ vassert(insn->variant.unop.src.tag == S390_OPND_REG); ++ vassert(insn->size >= 4); ++ UChar v1 = hregNumber(insn->variant.unop.dst); ++ UChar v2 = hregNumber(insn->variant.unop.src.variant.reg); ++ return s390_emit_VFPSO(buf, v1, v2, s390_getM_from_size(insn->size), 0, 1); ++ } + case S390_VEC_FLOAT_SQRT: { + vassert(insn->variant.unop.src.tag == S390_OPND_REG); +- vassert(insn->size == 8); ++ vassert(insn->size >= 4); + UChar v1 = hregNumber(insn->variant.unop.dst); + UChar v2 = hregNumber(insn->variant.unop.src.variant.reg); + return s390_emit_VFSQ(buf, v1, v2, s390_getM_from_size(insn->size), 0); +diff --git a/VEX/priv/host_s390_defs.h b/VEX/priv/host_s390_defs.h +index 3f6473e10..9b69f4d38 100644 +--- a/VEX/priv/host_s390_defs.h ++++ b/VEX/priv/host_s390_defs.h +@@ -8,7 +8,7 @@ + This file is part of Valgrind, a dynamic binary instrumentation + framework. + +- Copyright IBM Corp. 2010-2017 ++ Copyright IBM Corp. 2010-2020 + + This program is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as +@@ -205,6 +205,7 @@ typedef enum { + S390_VEC_COUNT_ONES, + S390_VEC_FLOAT_NEG, + S390_VEC_FLOAT_ABS, ++ S390_VEC_FLOAT_NABS, + S390_VEC_FLOAT_SQRT, + S390_UNOP_T_INVALID + } s390_unop_t; +@@ -931,6 +932,8 @@ extern UInt s390_host_hwcaps; + (s390_host_hwcaps & (VEX_HWCAPS_S390X_MSA5)) + #define s390_host_has_lsc2 \ + (s390_host_hwcaps & (VEX_HWCAPS_S390X_LSC2)) ++#define s390_host_has_vxe \ ++ (s390_host_hwcaps & (VEX_HWCAPS_S390X_VXE)) + #endif /* ndef __VEX_HOST_S390_DEFS_H */ + + /*---------------------------------------------------------------*/ +diff --git a/VEX/priv/host_s390_isel.c b/VEX/priv/host_s390_isel.c +index 134f3eb6f..2f9854038 100644 +--- a/VEX/priv/host_s390_isel.c ++++ b/VEX/priv/host_s390_isel.c +@@ -8,7 +8,7 @@ + This file is part of Valgrind, a dynamic binary instrumentation + framework. + +- Copyright IBM Corp. 2010-2017 ++ Copyright IBM Corp. 2010-2020 + Copyright (C) 2012-2017 Florian Krohm (britzel@acm.org) + + This program is free software; you can redistribute it and/or +@@ -2362,9 +2362,10 @@ s390_isel_float128_expr_wrk(HReg *dst_hi, HReg *dst_lo, ISelEnv *env, + case Iop_NegF128: + if (left->tag == Iex_Unop && + (left->Iex.Unop.op == Iop_AbsF32 || +- left->Iex.Unop.op == Iop_AbsF64)) ++ left->Iex.Unop.op == Iop_AbsF64)) { + bfpop = S390_BFP_NABS; +- else ++ left = left->Iex.Unop.arg; ++ } else + bfpop = S390_BFP_NEG; + goto float128_opnd; + case Iop_AbsF128: bfpop = S390_BFP_ABS; goto float128_opnd; +@@ -2726,9 +2727,10 @@ s390_isel_float_expr_wrk(ISelEnv *env, IRExpr *expr) + case Iop_NegF64: + if (left->tag == Iex_Unop && + (left->Iex.Unop.op == Iop_AbsF32 || +- left->Iex.Unop.op == Iop_AbsF64)) ++ left->Iex.Unop.op == Iop_AbsF64)) { + bfpop = S390_BFP_NABS; +- else ++ left = left->Iex.Unop.arg; ++ } else + bfpop = S390_BFP_NEG; + break; + +@@ -3944,11 +3946,27 @@ s390_isel_vec_expr_wrk(ISelEnv *env, IRExpr *expr) + vec_unop = S390_VEC_COUNT_ONES; + goto Iop_V_wrk; + ++ case Iop_Neg32Fx4: ++ size = 4; ++ vec_unop = S390_VEC_FLOAT_NEG; ++ if (arg->tag == Iex_Unop && arg->Iex.Unop.op == Iop_Abs32Fx4) { ++ vec_unop = S390_VEC_FLOAT_NABS; ++ arg = arg->Iex.Unop.arg; ++ } ++ goto Iop_V_wrk; + case Iop_Neg64Fx2: + size = 8; + vec_unop = S390_VEC_FLOAT_NEG; ++ if (arg->tag == Iex_Unop && arg->Iex.Unop.op == Iop_Abs64Fx2) { ++ vec_unop = S390_VEC_FLOAT_NABS; ++ arg = arg->Iex.Unop.arg; ++ } + goto Iop_V_wrk; + ++ case Iop_Abs32Fx4: ++ size = 4; ++ vec_unop = S390_VEC_FLOAT_ABS; ++ goto Iop_V_wrk; + case Iop_Abs64Fx2: + size = 8; + vec_unop = S390_VEC_FLOAT_ABS; +@@ -4474,17 +4492,29 @@ s390_isel_vec_expr_wrk(ISelEnv *env, IRExpr *expr) + vec_binop = S390_VEC_ELEM_ROLL_V; + goto Iop_VV_wrk; + ++ case Iop_CmpEQ32Fx4: ++ size = 4; ++ vec_binop = S390_VEC_FLOAT_COMPARE_EQUAL; ++ goto Iop_VV_wrk; + case Iop_CmpEQ64Fx2: + size = 8; + vec_binop = S390_VEC_FLOAT_COMPARE_EQUAL; + goto Iop_VV_wrk; + ++ case Iop_CmpLE32Fx4: ++ size = 4; ++ vec_binop = S390_VEC_FLOAT_COMPARE_LESS_OR_EQUAL; ++ goto Iop_VV_wrk; + case Iop_CmpLE64Fx2: { + size = 8; + vec_binop = S390_VEC_FLOAT_COMPARE_LESS_OR_EQUAL; + goto Iop_VV_wrk; + } + ++ case Iop_CmpLT32Fx4: ++ size = 4; ++ vec_binop = S390_VEC_FLOAT_COMPARE_LESS; ++ goto Iop_VV_wrk; + case Iop_CmpLT64Fx2: { + size = 8; + vec_binop = S390_VEC_FLOAT_COMPARE_LESS; +@@ -4671,20 +4701,41 @@ s390_isel_vec_expr_wrk(ISelEnv *env, IRExpr *expr) + dst, reg1, reg2, reg3)); + return dst; + ++ case Iop_Add32Fx4: ++ size = 4; ++ vec_binop = S390_VEC_FLOAT_ADD; ++ goto Iop_irrm_VV_wrk; ++ + case Iop_Add64Fx2: + size = 8; + vec_binop = S390_VEC_FLOAT_ADD; + goto Iop_irrm_VV_wrk; + ++ case Iop_Sub32Fx4: ++ size = 4; ++ vec_binop = S390_VEC_FLOAT_SUB; ++ goto Iop_irrm_VV_wrk; ++ + case Iop_Sub64Fx2: + size = 8; + vec_binop = S390_VEC_FLOAT_SUB; + goto Iop_irrm_VV_wrk; + ++ case Iop_Mul32Fx4: ++ size = 4; ++ vec_binop = S390_VEC_FLOAT_MUL; ++ goto Iop_irrm_VV_wrk; ++ + case Iop_Mul64Fx2: + size = 8; + vec_binop = S390_VEC_FLOAT_MUL; + goto Iop_irrm_VV_wrk; ++ ++ case Iop_Div32Fx4: ++ size = 4; ++ vec_binop = S390_VEC_FLOAT_DIV; ++ goto Iop_irrm_VV_wrk; ++ + case Iop_Div64Fx2: + size = 8; + vec_binop = S390_VEC_FLOAT_DIV; +diff --git a/VEX/priv/main_main.c b/VEX/priv/main_main.c +index 72f419988..12f521d8c 100644 +--- a/VEX/priv/main_main.c ++++ b/VEX/priv/main_main.c +@@ -1795,6 +1795,7 @@ static const HChar* show_hwcaps_s390x ( UInt hwcaps ) + { VEX_HWCAPS_S390X_MSA5, "msa5" }, + { VEX_HWCAPS_S390X_MI2, "mi2" }, + { VEX_HWCAPS_S390X_LSC2, "lsc2" }, ++ { VEX_HWCAPS_S390X_LSC2, "vxe" }, + }; + /* Allocate a large enough buffer */ + static HChar buf[sizeof prefix + +diff --git a/VEX/pub/libvex.h b/VEX/pub/libvex.h +index 53e3705da..2ffed0ad0 100644 +--- a/VEX/pub/libvex.h ++++ b/VEX/pub/libvex.h +@@ -171,7 +171,7 @@ typedef + #define VEX_HWCAPS_S390X_MSA5 (1<<19) /* message security assistance facility */ + #define VEX_HWCAPS_S390X_MI2 (1<<20) /* miscellaneous-instruction-extensions facility 2 */ + #define VEX_HWCAPS_S390X_LSC2 (1<<21) /* Conditional load/store facility2 */ +- ++#define VEX_HWCAPS_S390X_VXE (1<<22) /* Vector-enhancements facility */ + + /* Special value representing all available s390x hwcaps */ + #define VEX_HWCAPS_S390X_ALL (VEX_HWCAPS_S390X_LDISP | \ +@@ -189,7 +189,8 @@ typedef + VEX_HWCAPS_S390X_VX | \ + VEX_HWCAPS_S390X_MSA5 | \ + VEX_HWCAPS_S390X_MI2 | \ +- VEX_HWCAPS_S390X_LSC2) ++ VEX_HWCAPS_S390X_LSC2 | \ ++ VEX_HWCAPS_S390X_VXE) + + #define VEX_HWCAPS_S390X(x) ((x) & ~VEX_S390X_MODEL_MASK) + #define VEX_S390X_MODEL(x) ((x) & VEX_S390X_MODEL_MASK) +diff --git a/VEX/pub/libvex_emnote.h b/VEX/pub/libvex_emnote.h +index be033b4da..77880a270 100644 +--- a/VEX/pub/libvex_emnote.h ++++ b/VEX/pub/libvex_emnote.h +@@ -124,6 +124,10 @@ typedef + /* ppno insn is not supported on this host */ + EmFail_S390X_ppno, + ++ /* insn needs vector-enhancements facility which is not available on this ++ host */ ++ EmFail_S390X_vxe, ++ + EmNote_NUMBER + } + VexEmNote; +diff --git a/coregrind/m_initimg/initimg-linux.c b/coregrind/m_initimg/initimg-linux.c +index 365942c4f..ba84fa6e9 100644 +--- a/coregrind/m_initimg/initimg-linux.c ++++ b/coregrind/m_initimg/initimg-linux.c +@@ -697,9 +697,13 @@ Addr setup_client_stack( void* init_sp, + } + # elif defined(VGP_s390x_linux) + { +- /* Advertise hardware features "below" TE and VXRS. TE itself +- and anything above VXRS is not supported by Valgrind. */ +- auxv->u.a_val &= (VKI_HWCAP_S390_TE - 1) | VKI_HWCAP_S390_VXRS; ++ /* Out of the hardware features available on the platform, ++ advertise those "below" TE, as well as the ones explicitly ++ ORed in the expression below. Anything else, such as TE ++ itself, is not supported by Valgrind. */ ++ auxv->u.a_val &= ((VKI_HWCAP_S390_TE - 1) ++ | VKI_HWCAP_S390_VXRS ++ | VKI_HWCAP_S390_VXRS_EXT); + } + # elif defined(VGP_arm64_linux) + { +diff --git a/coregrind/m_machine.c b/coregrind/m_machine.c +index e7877e636..228ae2554 100644 +--- a/coregrind/m_machine.c ++++ b/coregrind/m_machine.c +@@ -1555,6 +1555,7 @@ Bool VG_(machine_get_hwcaps)( void ) + { False, S390_FAC_MSA5, VEX_HWCAPS_S390X_MSA5, "MSA5" }, + { False, S390_FAC_MI2, VEX_HWCAPS_S390X_MI2, "MI2" }, + { False, S390_FAC_LSC2, VEX_HWCAPS_S390X_LSC2, "LSC2" }, ++ { False, S390_FAC_VXE, VEX_HWCAPS_S390X_VXE, "VXE" }, + }; + + /* Set hwcaps according to the detected facilities */ +diff --git a/include/vki/vki-s390x-linux.h b/include/vki/vki-s390x-linux.h +index 7b863a324..4ab2d3334 100644 +--- a/include/vki/vki-s390x-linux.h ++++ b/include/vki/vki-s390x-linux.h +@@ -806,6 +806,7 @@ typedef vki_s390_regs vki_elf_gregset_t; + + #define VKI_HWCAP_S390_TE 1024 + #define VKI_HWCAP_S390_VXRS 2048 ++#define VKI_HWCAP_S390_VXRS_EXT 8192 + + + //---------------------------------------------------------------------- +diff --git a/none/tests/s390x/vector.h b/none/tests/s390x/vector.h +index de2391480..632c2cb9c 100644 +--- a/none/tests/s390x/vector.h ++++ b/none/tests/s390x/vector.h +@@ -86,6 +86,13 @@ void print_hex(const V128 value) { + printf("%016lx | %016lx\n", value.u64[0], value.u64[1]); + } + ++void print_hex64(const V128 value, int zero_only) { ++ if (zero_only) ++ printf("%016lx | --\n", value.u64[0]); ++ else ++ printf("%016lx | %016lx\n", value.u64[0], value.u64[1]); ++} ++ + void print_f32(const V128 value, int even_only, int zero_only) { + if (zero_only) + printf("%a | -- | -- | --\n", value.f32[0]); +@@ -222,8 +229,10 @@ static void test_##insn##_selective(const s390x_test_usageInfo info) \ + {printf(" v_arg2 = "); print_hex(v_arg2);} \ + if (info & V128_V_ARG3_AS_INT) \ + {printf(" v_arg3 = "); print_hex(v_arg3);} \ +- if (info & V128_V_RES_AS_INT) \ +- {printf(" v_result = "); print_hex(v_result);} \ ++ if (info & V128_V_RES_AS_INT) { \ ++ printf(" v_result = "); \ ++ print_hex64(v_result, info & V128_V_RES_ZERO_ONLY); \ ++ } \ + \ + if (info & V128_V_ARG1_AS_FLOAT64) \ + {printf(" v_arg1 = "); print_f64(v_arg1, 0);} \ +diff --git a/none/tests/s390x/vector_float.c b/none/tests/s390x/vector_float.c +index 52f3a296f..20853f381 100644 +--- a/none/tests/s390x/vector_float.c ++++ b/none/tests/s390x/vector_float.c +@@ -114,50 +114,59 @@ int main() + test_with_selective_printing(vldeb, (V128_V_RES_AS_FLOAT64 | + V128_V_ARG1_AS_FLOAT64)); + test_with_selective_printing(wldeb, (V128_V_RES_AS_FLOAT64 | +- V128_V_ARG1_AS_FLOAT64)); ++ V128_V_ARG1_AS_FLOAT64 | ++ V128_V_RES_ZERO_ONLY)); + + test_with_selective_printing(vflcdb, (V128_V_RES_AS_FLOAT64 | + V128_V_ARG1_AS_FLOAT64)); + test_with_selective_printing(wflcdb, (V128_V_RES_AS_FLOAT64 | +- V128_V_ARG1_AS_FLOAT64)); ++ V128_V_ARG1_AS_FLOAT64 | ++ V128_V_RES_ZERO_ONLY)); + test_with_selective_printing(vflndb, (V128_V_RES_AS_FLOAT64 | + V128_V_ARG1_AS_FLOAT64)); + test_with_selective_printing(wflndb, (V128_V_RES_AS_FLOAT64 | +- V128_V_ARG1_AS_FLOAT64)); ++ V128_V_ARG1_AS_FLOAT64 | ++ V128_V_RES_ZERO_ONLY)); + test_with_selective_printing(vflpdb, (V128_V_RES_AS_FLOAT64 | + V128_V_ARG1_AS_FLOAT64)); + test_with_selective_printing(wflpdb, (V128_V_RES_AS_FLOAT64 | +- V128_V_ARG1_AS_FLOAT64)); ++ V128_V_ARG1_AS_FLOAT64 | ++ V128_V_RES_ZERO_ONLY)); + + test_with_selective_printing(vfadb, (V128_V_RES_AS_FLOAT64 | + V128_V_ARG1_AS_FLOAT64 | + V128_V_ARG2_AS_FLOAT64)); + test_with_selective_printing(wfadb, (V128_V_RES_AS_FLOAT64 | + V128_V_ARG1_AS_FLOAT64 | +- V128_V_ARG2_AS_FLOAT64)); ++ V128_V_ARG2_AS_FLOAT64 | ++ V128_V_RES_ZERO_ONLY)); + test_with_selective_printing(vfsdb, (V128_V_RES_AS_FLOAT64 | + V128_V_ARG1_AS_FLOAT64 | + V128_V_ARG2_AS_FLOAT64)); + test_with_selective_printing(wfsdb, (V128_V_RES_AS_FLOAT64 | + V128_V_ARG1_AS_FLOAT64 | +- V128_V_ARG2_AS_FLOAT64)); ++ V128_V_ARG2_AS_FLOAT64 | ++ V128_V_RES_ZERO_ONLY)); + test_with_selective_printing(vfmdb, (V128_V_RES_AS_FLOAT64 | + V128_V_ARG1_AS_FLOAT64 | + V128_V_ARG2_AS_FLOAT64)); + test_with_selective_printing(wfmdb, (V128_V_RES_AS_FLOAT64 | + V128_V_ARG1_AS_FLOAT64 | +- V128_V_ARG2_AS_FLOAT64)); ++ V128_V_ARG2_AS_FLOAT64 | ++ V128_V_RES_ZERO_ONLY)); + test_with_selective_printing(vfddb, (V128_V_RES_AS_FLOAT64 | + V128_V_ARG1_AS_FLOAT64 | + V128_V_ARG2_AS_FLOAT64)); + test_with_selective_printing(wfddb, (V128_V_RES_AS_FLOAT64 | + V128_V_ARG1_AS_FLOAT64 | +- V128_V_ARG2_AS_FLOAT64)); ++ V128_V_ARG2_AS_FLOAT64 | ++ V128_V_RES_ZERO_ONLY)); + + test_with_selective_printing(vfsqdb, (V128_V_RES_AS_FLOAT64 | + V128_V_ARG1_AS_FLOAT64)); + test_with_selective_printing(wfsqdb, (V128_V_RES_AS_FLOAT64 | +- V128_V_ARG1_AS_FLOAT64)); ++ V128_V_ARG1_AS_FLOAT64 | ++ V128_V_RES_ZERO_ONLY)); + + test_with_selective_printing(vfmadb, (V128_V_RES_AS_FLOAT64 | + V128_V_ARG1_AS_FLOAT64 | +@@ -166,7 +175,8 @@ int main() + test_with_selective_printing(wfmadb, (V128_V_RES_AS_FLOAT64 | + V128_V_ARG1_AS_FLOAT64 | + V128_V_ARG2_AS_FLOAT64 | +- V128_V_ARG3_AS_FLOAT64)); ++ V128_V_ARG3_AS_FLOAT64 | ++ V128_V_RES_ZERO_ONLY)); + test_with_selective_printing(vfmsdb, (V128_V_RES_AS_FLOAT64 | + V128_V_ARG1_AS_FLOAT64 | + V128_V_ARG2_AS_FLOAT64 | +@@ -174,21 +184,25 @@ int main() + test_with_selective_printing(wfmsdb, (V128_V_RES_AS_FLOAT64 | + V128_V_ARG1_AS_FLOAT64 | + V128_V_ARG2_AS_FLOAT64 | +- V128_V_ARG3_AS_FLOAT64)); ++ V128_V_ARG3_AS_FLOAT64 | ++ V128_V_RES_ZERO_ONLY)); + + test_with_selective_printing(wfcdb, (V128_V_ARG1_AS_FLOAT64 | + V128_V_ARG2_AS_FLOAT64 | +- V128_R_RES)); ++ V128_R_RES | ++ V128_V_RES_ZERO_ONLY)); + test_with_selective_printing(wfkdb, (V128_V_ARG1_AS_FLOAT64 | + V128_V_ARG2_AS_FLOAT64 | +- V128_R_RES)); ++ V128_R_RES | ++ V128_V_RES_ZERO_ONLY)); + + test_with_selective_printing(vfcedb, (V128_V_RES_AS_INT | + V128_V_ARG1_AS_FLOAT64 | + V128_V_ARG2_AS_FLOAT64)); + test_with_selective_printing(wfcedb, (V128_V_RES_AS_INT | + V128_V_ARG1_AS_FLOAT64 | +- V128_V_ARG2_AS_FLOAT64)); ++ V128_V_ARG2_AS_FLOAT64 | ++ V128_V_RES_ZERO_ONLY)); + test_with_selective_printing(vfcedbs, (V128_V_RES_AS_INT | + V128_V_ARG1_AS_FLOAT64 | + V128_V_ARG2_AS_FLOAT64 | +@@ -196,14 +210,16 @@ int main() + test_with_selective_printing(wfcedbs, (V128_V_RES_AS_INT | + V128_V_ARG1_AS_FLOAT64 | + V128_V_ARG2_AS_FLOAT64 | +- V128_R_RES)); ++ V128_R_RES | ++ V128_V_RES_ZERO_ONLY)); + + test_with_selective_printing(vfchdb, (V128_V_RES_AS_INT | + V128_V_ARG1_AS_FLOAT64 | + V128_V_ARG2_AS_FLOAT64)); + test_with_selective_printing(wfchdb, (V128_V_RES_AS_INT | + V128_V_ARG1_AS_FLOAT64 | +- V128_V_ARG2_AS_FLOAT64)); ++ V128_V_ARG2_AS_FLOAT64 | ++ V128_V_RES_ZERO_ONLY)); + test_with_selective_printing(vfchdbs, (V128_V_RES_AS_INT | + V128_V_ARG1_AS_FLOAT64 | + V128_V_ARG2_AS_FLOAT64 | +@@ -211,14 +227,16 @@ int main() + test_with_selective_printing(wfchdbs, (V128_V_RES_AS_INT | + V128_V_ARG1_AS_FLOAT64 | + V128_V_ARG2_AS_FLOAT64 | +- V128_R_RES)); ++ V128_R_RES | ++ V128_V_RES_ZERO_ONLY)); + + test_with_selective_printing(vfchedb, (V128_V_RES_AS_INT | + V128_V_ARG1_AS_FLOAT64 | + V128_V_ARG2_AS_FLOAT64)); + test_with_selective_printing(wfchedb, (V128_V_RES_AS_INT | + V128_V_ARG1_AS_FLOAT64 | +- V128_V_ARG2_AS_FLOAT64)); ++ V128_V_ARG2_AS_FLOAT64 | ++ V128_V_RES_ZERO_ONLY)); + test_with_selective_printing(vfchedbs, (V128_V_RES_AS_INT | + V128_V_ARG1_AS_FLOAT64 | + V128_V_ARG2_AS_FLOAT64 | +@@ -226,7 +244,8 @@ int main() + test_with_selective_printing(wfchedbs, (V128_V_RES_AS_INT | + V128_V_ARG1_AS_FLOAT64 | + V128_V_ARG2_AS_FLOAT64 | +- V128_R_RES)); ++ V128_R_RES | ++ V128_V_RES_ZERO_ONLY)); + + test_with_selective_printing(vftcidb0, (V128_V_RES_AS_INT | + V128_V_ARG1_AS_FLOAT64 | +diff --git a/none/tests/s390x/vector_float.stdout.exp b/none/tests/s390x/vector_float.stdout.exp +index eac525041..a330ac832 100644 +--- a/none/tests/s390x/vector_float.stdout.exp ++++ b/none/tests/s390x/vector_float.stdout.exp +@@ -419,88 +419,88 @@ insn vcgdb07: + v_result = 7fffffffffffffff | 7fffffffffffffff + v_arg1 = 0x1.fed2f087c21p+341 | 0x1.180e4c1d87fc4p+682 + insn wcgdb00: +- v_result = 7fffffffffffffff | 0000000000000000 ++ v_result = 7fffffffffffffff | -- + v_arg1 = 0x1.d7fd9222e8b86p+670 | 0x1.c272612672a3p+798 + insn wcgdb00: +- v_result = 0000000000000000 | 0000000000000000 ++ v_result = 0000000000000000 | -- + v_arg1 = 0x1.745cd360987e5p-496 | -0x1.f3b404919f358p-321 + insn wcgdb00: +- v_result = 8000000000000000 | 0000000000000000 ++ v_result = 8000000000000000 | -- + v_arg1 = -0x1.9523565cd92d5p+643 | 0x1.253677d6d3be2p-556 + insn wcgdb00: +- v_result = 7fffffffffffffff | 0000000000000000 ++ v_result = 7fffffffffffffff | -- + v_arg1 = 0x1.b6eb576ec3e6ap+845 | -0x1.c7e102c503d91p+266 + insn wcgdb01: +- v_result = 0000000000000000 | 0000000000000000 ++ v_result = 0000000000000000 | -- + v_arg1 = -0x1.3d4319841f4d6p-1011 | -0x1.2feabf7dfc506p-680 + insn wcgdb01: +- v_result = 0000000000000000 | 0000000000000000 ++ v_result = 0000000000000000 | -- + v_arg1 = -0x1.6fb8d1cd8b32cp-843 | -0x1.50f6a6922f97ep+33 + insn wcgdb01: +- v_result = 0000000000000000 | 0000000000000000 ++ v_result = 0000000000000000 | -- + v_arg1 = -0x1.64a673daccf1ap-566 | -0x1.69ef9b1d01499p+824 + insn wcgdb01: +- v_result = 8000000000000000 | 0000000000000000 ++ v_result = 8000000000000000 | -- + v_arg1 = -0x1.3e2ddd862b4adp+1005 | -0x1.312466410271p+184 + insn wcgdb03: +- v_result = 0000000000000001 | 0000000000000000 ++ v_result = 0000000000000001 | -- + v_arg1 = 0x1.d594c3412a11p-953 | -0x1.a07393d34d77cp-224 + insn wcgdb03: +- v_result = 8000000000000000 | 0000000000000000 ++ v_result = 8000000000000000 | -- + v_arg1 = -0x1.f7a0dbcfd6e4cp+104 | -0x1.40f7cde7f2214p-702 + insn wcgdb03: +- v_result = 8000000000000000 | 0000000000000000 ++ v_result = 8000000000000000 | -- + v_arg1 = -0x1.40739c1574808p+560 | -0x1.970328ddf1b6ep-374 + insn wcgdb03: +- v_result = 0000000000000001 | 0000000000000000 ++ v_result = 0000000000000001 | -- + v_arg1 = 0x1.477653afd7048p-38 | 0x1.1eac2f8b2a93cp-384 + insn wcgdb04: +- v_result = ffffffffe9479a7d | 0000000000000000 ++ v_result = ffffffffe9479a7d | -- + v_arg1 = -0x1.6b865833eff3p+28 | 0x1.06e8cf1834d0ep-722 + insn wcgdb04: +- v_result = 0000000000000000 | 0000000000000000 ++ v_result = 0000000000000000 | -- + v_arg1 = 0x1.eef0b2294a5cp-544 | -0x1.8e8b133ccda15p+752 + insn wcgdb04: +- v_result = 0000000000000000 | 0000000000000000 ++ v_result = 0000000000000000 | -- + v_arg1 = -0x1.f34e77e6b6698p-894 | -0x1.9f7ce1cb53bddp-896 + insn wcgdb04: +- v_result = 7fffffffffffffff | 0000000000000000 ++ v_result = 7fffffffffffffff | -- + v_arg1 = 0x1.95707a6d75db5p+1018 | -0x1.3b0c072d23011p-224 + insn wcgdb05: +- v_result = 0000000000000000 | 0000000000000000 ++ v_result = 0000000000000000 | -- + v_arg1 = -0x1.a9fb71160793p-968 | 0x1.05f601fe8123ap-986 + insn wcgdb05: +- v_result = 8000000000000000 | 0000000000000000 ++ v_result = 8000000000000000 | -- + v_arg1 = -0x1.0864159b94305p+451 | -0x1.d4647f5a78b7ep-599 + insn wcgdb05: +- v_result = 7fffffffffffffff | 0000000000000000 ++ v_result = 7fffffffffffffff | -- + v_arg1 = 0x1.37eadff8397c8p+432 | -0x1.15d896b6f6063p+464 + insn wcgdb05: +- v_result = 0000000000000000 | 0000000000000000 ++ v_result = 0000000000000000 | -- + v_arg1 = 0x1.eb0812b0d677p-781 | 0x1.3117c5e0e288cp-202 + insn wcgdb06: +- v_result = 0000000000000001 | 0000000000000000 ++ v_result = 0000000000000001 | -- + v_arg1 = 0x1.6b88069167c0fp-662 | -0x1.70571d27e1279p+254 + insn wcgdb06: +- v_result = 7fffffffffffffff | 0000000000000000 ++ v_result = 7fffffffffffffff | -- + v_arg1 = 0x1.f6a6d6e883596p+260 | 0x1.0d578afaaa34ap+604 + insn wcgdb06: +- v_result = 0000000000000001 | 0000000000000000 ++ v_result = 0000000000000001 | -- + v_arg1 = 0x1.d91c7d13c4694p-475 | -0x1.ecf1f8529767bp+830 + insn wcgdb06: +- v_result = 0000000000000001 | 0000000000000000 ++ v_result = 0000000000000001 | -- + v_arg1 = 0x1.fac8dd3bb7af6p-101 | 0x1.fb8324a00fba8p+959 + insn wcgdb07: +- v_result = 7fffffffffffffff | 0000000000000000 ++ v_result = 7fffffffffffffff | -- + v_arg1 = 0x1.4b0fa18fa73c7p+111 | -0x1.08e7b17633a49p+61 + insn wcgdb07: +- v_result = e636b693e39a1100 | 0000000000000000 ++ v_result = e636b693e39a1100 | -- + v_arg1 = -0x1.9c9496c1c65efp+60 | 0x1.c4182ee728d76p-572 + insn wcgdb07: +- v_result = ffffffffffffffff | 0000000000000000 ++ v_result = ffffffffffffffff | -- + v_arg1 = -0x1.819718032dff7p-303 | 0x1.a784c77ff6aa2p-622 + insn wcgdb07: +- v_result = 7fffffffffffffff | 0000000000000000 ++ v_result = 7fffffffffffffff | -- + v_arg1 = 0x1.978e8abfd83c2p+152 | 0x1.2531ebf451762p+315 + insn vclgdb00: + v_result = 0000000000000000 | 0000000000000000 +@@ -587,88 +587,88 @@ insn vclgdb07: + v_result = 0000000000000000 | 0000000000000000 + v_arg1 = -0x1.137bbb51f08bdp+306 | 0x1.18d2a1063356p-795 + insn wclgdb00: +- v_result = 0000000000000000 | 0000000000000000 ++ v_result = 0000000000000000 | -- + v_arg1 = -0x1.e66f55dcc2639p-1013 | -0x1.733ee56929f3bp-304 + insn wclgdb00: +- v_result = 0000000000000000 | 0000000000000000 ++ v_result = 0000000000000000 | -- + v_arg1 = 0x1.8802fd9ab740cp-986 | -0x1.64d4d2c7c145fp-1015 + insn wclgdb00: +- v_result = 0000000000000000 | 0000000000000000 ++ v_result = 0000000000000000 | -- + v_arg1 = 0x1.a67209b8c407bp-645 | -0x1.6410ff9b1c801p+487 + insn wclgdb00: +- v_result = 0000000000000000 | 0000000000000000 ++ v_result = 0000000000000000 | -- + v_arg1 = -0x1.cb2febaefeb2dp+49 | 0x1.dee368b2ec375p-502 + insn wclgdb01: +- v_result = 0000000000000000 | 0000000000000000 ++ v_result = 0000000000000000 | -- + v_arg1 = 0x1.5703db3c1b0e2p-728 | 0x1.068c4d51ea4ebp+617 + insn wclgdb01: +- v_result = 0000000000000000 | 0000000000000000 ++ v_result = 0000000000000000 | -- + v_arg1 = -0x1.ae350291e5b3ep+291 | 0x1.1b87bb09b6032p+376 + insn wclgdb01: +- v_result = ffffffffffffffff | 0000000000000000 ++ v_result = ffffffffffffffff | -- + v_arg1 = 0x1.c4666a710127ep+424 | -0x1.19e969b6c0076p+491 + insn wclgdb01: +- v_result = ffffffffffffffff | 0000000000000000 ++ v_result = ffffffffffffffff | -- + v_arg1 = 0x1.c892c5a4d103fp+105 | -0x1.d4f937cc76704p+749 + insn wclgdb03: +- v_result = 0000000000000001 | 0000000000000000 ++ v_result = 0000000000000001 | -- + v_arg1 = 0x1.81090d8fc663dp-111 | 0x1.337ec5e0f0904p+1 + insn wclgdb03: +- v_result = 0000000000000000 | 0000000000000000 ++ v_result = 0000000000000000 | -- + v_arg1 = -0x1.e787adc70b91p-593 | 0x1.db8d83196b53cp-762 + insn wclgdb03: +- v_result = ffffffffffffffff | 0000000000000000 ++ v_result = ffffffffffffffff | -- + v_arg1 = 0x1.6529307e907efp+389 | -0x1.3ea0d8d5b4dd2p+589 + insn wclgdb03: +- v_result = 0000000000000000 | 0000000000000000 ++ v_result = 0000000000000000 | -- + v_arg1 = -0x1.be701a158637p-385 | 0x1.c5a7f70cb8a09p+107 + insn wclgdb04: +- v_result = 0000000000000000 | 0000000000000000 ++ v_result = 0000000000000000 | -- + v_arg1 = -0x1.2f328571ab445p+21 | -0x1.dcc21fc82ba01p-930 + insn wclgdb04: +- v_result = 0000000000000000 | 0000000000000000 ++ v_result = 0000000000000000 | -- + v_arg1 = -0x1.06b69fcbb7bffp-415 | 0x1.6f9a13a0a827ap+915 + insn wclgdb04: +- v_result = 0000000000000000 | 0000000000000000 ++ v_result = 0000000000000000 | -- + v_arg1 = -0x1.738e549b38bcdp+479 | 0x1.a522edb999c9p-45 + insn wclgdb04: +- v_result = 0000000000000000 | 0000000000000000 ++ v_result = 0000000000000000 | -- + v_arg1 = 0x1.7f9399d2bcf3bp-215 | -0x1.7bc35f2d69a7fp+818 + insn wclgdb05: +- v_result = ffffffffffffffff | 0000000000000000 ++ v_result = ffffffffffffffff | -- + v_arg1 = 0x1.fc542bdb707f6p+880 | -0x1.8521ebc93a25fp-969 + insn wclgdb05: +- v_result = 1ce8d9951b8c8600 | 0000000000000000 ++ v_result = 1ce8d9951b8c8600 | -- + v_arg1 = 0x1.ce8d9951b8c86p+60 | 0x1.92712589230e7p+475 + insn wclgdb05: +- v_result = 0000000000000000 | 0000000000000000 ++ v_result = 0000000000000000 | -- + v_arg1 = -0x1.8a297f60a0811p-156 | 0x1.102b79043d82cp-204 + insn wclgdb05: +- v_result = 0000000000000000 | 0000000000000000 ++ v_result = 0000000000000000 | -- + v_arg1 = 0x1.beb9057e1401dp-196 | -0x1.820f18f830262p+15 + insn wclgdb06: +- v_result = 0000000000000001 | 0000000000000000 ++ v_result = 0000000000000001 | -- + v_arg1 = 0x1.c321a966ecb4dp-430 | -0x1.2f6a1a95ead99p-943 + insn wclgdb06: +- v_result = 0000000000000000 | 0000000000000000 ++ v_result = 0000000000000000 | -- + v_arg1 = -0x1.f1a86b4aed821p-56 | -0x1.1ee6717cc2d7fp-899 + insn wclgdb06: +- v_result = 0000000000000000 | 0000000000000000 ++ v_result = 0000000000000000 | -- + v_arg1 = -0x1.73ce49d89ecb9p-302 | 0x1.52663b975ed23p-716 + insn wclgdb06: +- v_result = 0000000000000000 | 0000000000000000 ++ v_result = 0000000000000000 | -- + v_arg1 = -0x1.3e9c2de97a292p+879 | 0x1.d34eed36f2eafp+960 + insn wclgdb07: +- v_result = 0000000000000000 | 0000000000000000 ++ v_result = 0000000000000000 | -- + v_arg1 = -0x1.4e6ec6ddc6a45p-632 | -0x1.6e564d0fec72bp+369 + insn wclgdb07: +- v_result = ffffffffffffffff | 0000000000000000 ++ v_result = ffffffffffffffff | -- + v_arg1 = 0x1.42e2c658e4c4dp+459 | -0x1.9f9dc0252e44p+85 + insn wclgdb07: +- v_result = 0000000000000000 | 0000000000000000 ++ v_result = 0000000000000000 | -- + v_arg1 = -0x1.fb40ac8cda3c1p-762 | 0x1.0e9ed614bc8f1p-342 + insn wclgdb07: +- v_result = 0000000000000000 | 0000000000000000 ++ v_result = 0000000000000000 | -- + v_arg1 = -0x1.c1f8b3c68e214p+118 | -0x1.1a26a49368b61p+756 + insn vfidb00: + v_arg1 = -0x1.38df4cf9d52dbp-545 | -0x1.049253d90dd92p+94 +@@ -1020,16 +1020,16 @@ insn vldeb: + v_result = -0x1.6f5fb2p+70 | -0x1.0d2df6p-107 + insn wldeb: + v_arg1 = -0x1.d26169729db2ap-435 | 0x1.d6fd080793e8cp+767 +- v_result = -0x1.9a4c2cp-54 | 0x0p+0 ++ v_result = -0x1.9a4c2cp-54 | -- + insn wldeb: + v_arg1 = -0x1.f4b59107fce61p-930 | 0x1.cdf2816e253f4p-168 +- v_result = -0x1.be96b2p-116 | 0x0p+0 ++ v_result = -0x1.be96b2p-116 | -- + insn wldeb: + v_arg1 = -0x1.9603a2997928cp-441 | -0x1.aada85e355a11p-767 +- v_result = -0x1.d2c074p-55 | 0x0p+0 ++ v_result = -0x1.d2c074p-55 | -- + insn wldeb: + v_arg1 = 0x1.25ccf5bd0e83p+620 | 0x1.e1635864ebb17p-88 +- v_result = 0x1.64b99ep+78 | 0x0p+0 ++ v_result = 0x1.64b99ep+78 | -- + insn vflcdb: + v_arg1 = 0x1.0ae6d82f76afp-166 | -0x1.e8fb1e03a7415p-191 + v_result = -0x1.0ae6d82f76afp-166 | 0x1.e8fb1e03a7415p-191 +@@ -1044,16 +1044,16 @@ insn vflcdb: + v_result = -0x1.19520153d35b4p-301 | -0x1.ac5325cd23253p+396 + insn wflcdb: + v_arg1 = 0x1.ffd3eecfd54d7p-831 | -0x1.97854fa523a77p+146 +- v_result = -0x1.ffd3eecfd54d7p-831 | 0x0p+0 ++ v_result = -0x1.ffd3eecfd54d7p-831 | -- + insn wflcdb: + v_arg1 = -0x1.508ea45606447p-442 | 0x1.ae7f0e6cf9d2bp+583 +- v_result = 0x1.508ea45606447p-442 | 0x0p+0 ++ v_result = 0x1.508ea45606447p-442 | -- + insn wflcdb: + v_arg1 = 0x1.da8ab2188c21ap+94 | 0x1.78a9c152aa074p-808 +- v_result = -0x1.da8ab2188c21ap+94 | 0x0p+0 ++ v_result = -0x1.da8ab2188c21ap+94 | -- + insn wflcdb: + v_arg1 = -0x1.086882645e0c5p-1001 | -0x1.54e2de5af5a74p-262 +- v_result = 0x1.086882645e0c5p-1001 | 0x0p+0 ++ v_result = 0x1.086882645e0c5p-1001 | -- + insn vflndb: + v_arg1 = -0x1.5bec561d407dcp+819 | -0x1.a5773dadb7a2dp+935 + v_result = -0x1.5bec561d407dcp+819 | -0x1.a5773dadb7a2dp+935 +@@ -1068,16 +1068,16 @@ insn vflndb: + v_result = -0x1.c5bc39a06d4e2p-259 | -0x1.c5e61ad849e77p-833 + insn wflndb: + v_arg1 = -0x1.e9f3e6d1beffap-117 | -0x1.d58cc8bf123b3p-714 +- v_result = -0x1.e9f3e6d1beffap-117 | 0x0p+0 ++ v_result = -0x1.e9f3e6d1beffap-117 | -- + insn wflndb: + v_arg1 = -0x1.3fc4ef2e7485ep-691 | 0x1.eb328986081efp-775 +- v_result = -0x1.3fc4ef2e7485ep-691 | 0x0p+0 ++ v_result = -0x1.3fc4ef2e7485ep-691 | -- + insn wflndb: + v_arg1 = -0x1.7146c5afdec16p+23 | -0x1.597fcfa1fab2p-708 +- v_result = -0x1.7146c5afdec16p+23 | 0x0p+0 ++ v_result = -0x1.7146c5afdec16p+23 | -- + insn wflndb: + v_arg1 = 0x1.03f8d7e9afe84p-947 | 0x1.9a10c3feb6b57p-118 +- v_result = -0x1.03f8d7e9afe84p-947 | 0x0p+0 ++ v_result = -0x1.03f8d7e9afe84p-947 | -- + insn vflpdb: + v_arg1 = 0x1.64ae59b6c762ep-407 | -0x1.fa7191ab21e86p+533 + v_result = 0x1.64ae59b6c762ep-407 | 0x1.fa7191ab21e86p+533 +@@ -1092,16 +1092,16 @@ insn vflpdb: + v_result = 0x1.85fa2de1d492ap+170 | 0x1.ac36828822c11p-968 + insn wflpdb: + v_arg1 = 0x1.a6cf677640a73p-871 | 0x1.b6f1792385922p-278 +- v_result = 0x1.a6cf677640a73p-871 | 0x0p+0 ++ v_result = 0x1.a6cf677640a73p-871 | -- + insn wflpdb: + v_arg1 = -0x1.b886774f6d888p-191 | -0x1.6a2b08d735d22p-643 +- v_result = 0x1.b886774f6d888p-191 | 0x0p+0 ++ v_result = 0x1.b886774f6d888p-191 | -- + insn wflpdb: + v_arg1 = 0x1.5045d37d46f5fp+943 | -0x1.333a86ef2dcf6p-1013 +- v_result = 0x1.5045d37d46f5fp+943 | 0x0p+0 ++ v_result = 0x1.5045d37d46f5fp+943 | -- + insn wflpdb: + v_arg1 = 0x1.1e7bec6ada14dp+252 | 0x1.a70b3f3e24dap-153 +- v_result = 0x1.1e7bec6ada14dp+252 | 0x0p+0 ++ v_result = 0x1.1e7bec6ada14dp+252 | -- + insn vfadb: + v_arg1 = 0x1.5b1ad8e9f17c6p-294 | -0x1.ddd8300a0bf02p+122 + v_arg2 = -0x1.9b49c31ca8ac6p+926 | 0x1.fdbc992926268p+677 +@@ -1121,19 +1121,19 @@ insn vfadb: + insn wfadb: + v_arg1 = 0x1.3c5466cb80722p+489 | -0x1.11e1770053ca2p+924 + v_arg2 = 0x1.d876cd721a726p-946 | 0x1.5c04ceb79c9bcp+1001 +- v_result = 0x1.3c5466cb80722p+489 | 0x0p+0 ++ v_result = 0x1.3c5466cb80722p+489 | -- + insn wfadb: + v_arg1 = 0x1.b0b142d6b76a3p+577 | 0x1.3146824e993a2p+432 + v_arg2 = -0x1.f7f3b7582925fp-684 | -0x1.9700143c2b935p-837 +- v_result = 0x1.b0b142d6b76a2p+577 | 0x0p+0 ++ v_result = 0x1.b0b142d6b76a2p+577 | -- + insn wfadb: + v_arg1 = -0x1.8d65e15edabd6p+244 | 0x1.3be7fd08492d6p-141 + v_arg2 = -0x1.5eef86490fb0ap+481 | 0x1.7b26c897cb6dfp+810 +- v_result = -0x1.5eef86490fb0ap+481 | 0x0p+0 ++ v_result = -0x1.5eef86490fb0ap+481 | -- + insn wfadb: + v_arg1 = -0x1.2dffa5b5f29p+34 | 0x1.71a026274602fp-881 + v_arg2 = 0x1.4dad707287289p+756 | -0x1.1500d55807247p-616 +- v_result = 0x1.4dad707287288p+756 | 0x0p+0 ++ v_result = 0x1.4dad707287288p+756 | -- + insn vfsdb: + v_arg1 = 0x1.054fd9c4d4883p+644 | 0x1.45c90ed85bd7fp-780 + v_arg2 = 0x1.f3bc7a611dadap+494 | -0x1.7c9e1e858ba5bp-301 +@@ -1153,19 +1153,19 @@ insn vfsdb: + insn wfsdb: + v_arg1 = 0x1.9090dabf846e7p-648 | 0x1.1c4ab843a2d15p+329 + v_arg2 = -0x1.a7ceb293690dep+316 | 0x1.22245954a20cp+42 +- v_result = 0x1.a7ceb293690dep+316 | 0x0p+0 ++ v_result = 0x1.a7ceb293690dep+316 | -- + insn wfsdb: + v_arg1 = 0x1.4e5347c27819p-933 | -0x1.56a30bda28351p-64 + v_arg2 = -0x1.dedb9f3935b56p-155 | 0x1.8c5b6ed76816cp-522 +- v_result = 0x1.dedb9f3935b56p-155 | 0x0p+0 ++ v_result = 0x1.dedb9f3935b56p-155 | -- + insn wfsdb: + v_arg1 = 0x1.0ec4e562a015bp-491 | 0x1.3996381b52d9fp-686 + v_arg2 = 0x1.1dcce4e81819p+960 | -0x1.32fa425e8fc08p-263 +- v_result = -0x1.1dcce4e81818fp+960 | 0x0p+0 ++ v_result = -0x1.1dcce4e81818fp+960 | -- + insn wfsdb: + v_arg1 = -0x1.587229f90f77dp-19 | 0x1.100d8eb8105e4p-784 + v_arg2 = -0x1.afb4cce4c43ddp+530 | -0x1.6da7f05e7f512p-869 +- v_result = 0x1.afb4cce4c43dcp+530 | 0x0p+0 ++ v_result = 0x1.afb4cce4c43dcp+530 | -- + insn vfmdb: + v_arg1 = 0x1.892b425556c47p-124 | 0x1.38222404079dfp-656 + v_arg2 = 0x1.af612ed2c342dp-267 | -0x1.1f735fd6ce768p-877 +@@ -1185,19 +1185,19 @@ insn vfmdb: + insn wfmdb: + v_arg1 = -0x1.b992d950126a1p-683 | -0x1.9c1b22eb58c59p-497 + v_arg2 = 0x1.b557a7d8e32c3p-25 | -0x1.f746b2ddafccep+227 +- v_result = -0x1.792f6fb13894ap-707 | 0x0p+0 ++ v_result = -0x1.792f6fb13894ap-707 | -- + insn wfmdb: + v_arg1 = -0x1.677a8c20a5a2fp+876 | 0x1.c03e7b97e8c0dp-645 + v_arg2 = 0x1.dab44be430937p-1011 | -0x1.3f51352c67be9p-916 +- v_result = -0x1.4d4b0a1827064p-134 | 0x0p+0 ++ v_result = -0x1.4d4b0a1827064p-134 | -- + insn wfmdb: + v_arg1 = -0x1.da60f596ad0cep+254 | 0x1.52332e0650e33p+966 + v_arg2 = 0x1.a042c52ed993cp+215 | 0x1.8f380c84aa133p+204 +- v_result = -0x1.81aca4bbcbd24p+470 | 0x0p+0 ++ v_result = -0x1.81aca4bbcbd24p+470 | -- + insn wfmdb: + v_arg1 = -0x1.83d17f11f6aa3p-469 | -0x1.98117efe89b9ep-361 + v_arg2 = 0x1.8c445fd46d214p-701 | -0x1.f98118821821cp+596 +- v_result = -0x0p+0 | 0x0p+0 ++ v_result = -0x0p+0 | -- + insn vfddb: + v_arg1 = -0x1.ecbb48899e0f1p+969 | 0x1.caf175ab352p-20 + v_arg2 = -0x1.9455d67f9f79dp+208 | 0x1.bc4a431b04a6fp+482 +@@ -1217,19 +1217,19 @@ insn vfddb: + insn wfddb: + v_arg1 = 0x1.bd48489b60731p-114 | 0x1.a760dcf57b74fp-51 + v_arg2 = -0x1.171f83409eeb6p-402 | -0x1.e159d1409bdc6p-972 +- v_result = -0x1.9864f1511f8cp+288 | 0x0p+0 ++ v_result = -0x1.9864f1511f8cp+288 | -- + insn wfddb: + v_arg1 = -0x1.120505ef4606p-637 | -0x1.83f6f775c0eb7p+272 + v_arg2 = -0x1.d18ba3872fde1p+298 | 0x1.c60f8d191068cp-454 +- v_result = 0x1.2d5cdb15a686cp-936 | 0x0p+0 ++ v_result = 0x1.2d5cdb15a686cp-936 | -- + insn wfddb: + v_arg1 = 0x1.f637f7f8c790fp-97 | -0x1.7bdce4d74947p+189 + v_arg2 = -0x1.1c8f2d1b3a2edp-218 | -0x1.55fdfd1840241p-350 +- v_result = -0x1.c3d0799c1420fp+121 | 0x0p+0 ++ v_result = -0x1.c3d0799c1420fp+121 | -- + insn wfddb: + v_arg1 = -0x1.c63b7b2eee253p+250 | 0x1.dfd9dcd8b823fp-125 + v_arg2 = 0x1.094a1f1f87e0cp+629 | 0x1.eeaa23c0d7843p-814 +- v_result = -0x1.b653a10ebdeccp-379 | 0x0p+0 ++ v_result = -0x1.b653a10ebdeccp-379 | -- + insn vfsqdb: + v_arg1 = 0x1.f60db25f7066p-703 | -0x1.d43509abca8c3p+631 + v_result = 0x1.fb009ab25ec11p-352 | nan +@@ -1244,16 +1244,16 @@ insn vfsqdb: + v_result = 0x1.833dba0954bccp+249 | nan + insn wfsqdb: + v_arg1 = 0x1.71af4e7f64978p+481 | -0x1.3429dc60011d7p-879 +- v_result = 0x1.b30fc65551133p+240 | 0x0p+0 ++ v_result = 0x1.b30fc65551133p+240 | -- + insn wfsqdb: + v_arg1 = 0x1.5410db1c5f403p+173 | 0x1.97fa6581e692fp+108 +- v_result = 0x1.a144f43a592c1p+86 | 0x0p+0 ++ v_result = 0x1.a144f43a592c1p+86 | -- + insn wfsqdb: + v_arg1 = -0x1.5838027725afep+6 | 0x1.ac61529c11f38p+565 +- v_result = nan | 0x0p+0 ++ v_result = nan | -- + insn wfsqdb: + v_arg1 = -0x1.159e341dcc06ep-439 | 0x1.ed54ce5481ba5p-574 +- v_result = nan | 0x0p+0 ++ v_result = nan | -- + insn vfmadb: + v_arg1 = -0x1.eb00a5c503d75p+538 | 0x1.89fae603ddc07p+767 + v_arg2 = -0x1.71c72712c3957p+715 | 0x1.1bd5773442feap+762 +@@ -1278,22 +1278,22 @@ insn wfmadb: + v_arg1 = 0x1.1cc5b10a14d54p+668 | -0x1.686407390f7d1p+616 + v_arg2 = -0x1.bf34549e73246p+676 | -0x1.dc5a34cc470f3p+595 + v_arg3 = -0x1.95e0fdcf13974p-811 | -0x1.79c7cc1a8ec83p-558 +- v_result = -0x1.fffffffffffffp+1023 | 0x0p+0 ++ v_result = -0x1.fffffffffffffp+1023 | -- + insn wfmadb: + v_arg1 = 0x1.138bc1a5d75f8p+713 | -0x1.e226ebba2fe54p+381 + v_arg2 = -0x1.081ebb7cc3414p-772 | 0x1.369d99e174fc3p+922 + v_arg3 = -0x1.0671c682a5d0cp-1016 | 0x1.03c9530dd0377p+378 +- v_result = -0x1.1c4933e117d95p-59 | 0x0p+0 ++ v_result = -0x1.1c4933e117d95p-59 | -- + insn wfmadb: + v_arg1 = -0x1.166f0b1fad67bp+64 | -0x1.e9ee8d32e1069p-452 + v_arg2 = -0x1.4a235bdd109e2p-65 | 0x1.bacaa96fc7e81p-403 + v_arg3 = -0x1.d2e19acf7c4bdp+99 | 0x1.f901130f685adp-963 +- v_result = -0x1.d2e19acf7c4bcp+99 | 0x0p+0 ++ v_result = -0x1.d2e19acf7c4bcp+99 | -- + insn wfmadb: + v_arg1 = -0x1.77d7bfec863d2p-988 | -0x1.b68029700c6b1p-206 + v_arg2 = -0x1.aca05ad00aec1p+737 | 0x1.ac746bd7e216bp+51 + v_arg3 = 0x1.17342292078b4p+188 | -0x1.49efaf9392301p+555 +- v_result = 0x1.17342292078b4p+188 | 0x0p+0 ++ v_result = 0x1.17342292078b4p+188 | -- + insn vfmsdb: + v_arg1 = -0x1.a1b218e84e61p+34 | 0x1.b220f0d144daep-111 + v_arg2 = 0x1.564fcc2527961p-265 | 0x1.ea85a4154721ep+733 +@@ -1318,22 +1318,22 @@ insn wfmsdb: + v_arg1 = -0x1.7499a639673a6p-100 | -0x1.2a0d737e6cb1cp-207 + v_arg2 = -0x1.01ad4670a7aa3p-911 | 0x1.f94385e1021e8p+317 + v_arg3 = 0x1.aa42b2bb17af9p+982 | 0x1.c550e471711p+786 +- v_result = -0x1.aa42b2bb17af8p+982 | 0x0p+0 ++ v_result = -0x1.aa42b2bb17af8p+982 | -- + insn wfmsdb: + v_arg1 = 0x1.76840f99b431ep+500 | -0x1.989a500c92c08p+594 + v_arg2 = 0x1.33c657cb8385cp-84 | -0x1.2c795ad92ce17p+807 + v_arg3 = -0x1.ee58a39f02d54p-351 | -0x1.18695ed9a280ap+48 +- v_result = 0x1.c242894a0068p+416 | 0x0p+0 ++ v_result = 0x1.c242894a0068p+416 | -- + insn wfmsdb: + v_arg1 = -0x1.16db07e054a65p-469 | -0x1.3a627ab99c6e4p+689 + v_arg2 = 0x1.17872eae826e5p-538 | 0x1.44ed513fb5873p-929 + v_arg3 = 0x1.5ca912008e077p-217 | -0x1.982a6f7359876p-23 +- v_result = -0x1.5ca912008e077p-217 | 0x0p+0 ++ v_result = -0x1.5ca912008e077p-217 | -- + insn wfmsdb: + v_arg1 = -0x1.d315f4a932c6p+122 | 0x1.616a04493e143p+513 + v_arg2 = -0x1.cf1cd3516f23fp+552 | 0x1.7121749c3932cp-750 + v_arg3 = 0x1.dc26d92304d7fp-192 | -0x1.1fc3cca9ec20ep+371 +- v_result = 0x1.a67ca6ba395bcp+675 | 0x0p+0 ++ v_result = 0x1.a67ca6ba395bcp+675 | -- + insn wfcdb: + v_arg1 = 0x1.302001b736011p-633 | -0x1.72d5300225c97p-468 + v_arg2 = -0x1.8c007c5aba108p-17 | -0x1.bb3f9ae136acdp+569 +@@ -1383,19 +1383,19 @@ insn vfcedb: + v_arg1 = 0x1.d8e5c9930c19dp+623 | -0x1.cf1facff4e194p-605 + v_arg2 = -0x1.ed6ba02646d0dp+441 | -0x1.2d677e710620bp+810 + insn wfcedb: +- v_result = 0000000000000000 | 0000000000000000 ++ v_result = 0000000000000000 | -- + v_arg1 = -0x1.a252009e1a12cp-442 | 0x1.4dc608268bb29p-513 + v_arg2 = -0x1.81020aa1a36e6p-687 | -0x1.300e64ce414f1p-899 + insn wfcedb: +- v_result = 0000000000000000 | 0000000000000000 ++ v_result = 0000000000000000 | -- + v_arg1 = 0x1.cec439a8d4781p-175 | -0x1.d20e3b281d599p+893 + v_arg2 = 0x1.ca17cf16cf0aap-879 | 0x1.61506f8596092p+545 + insn wfcedb: +- v_result = 0000000000000000 | 0000000000000000 ++ v_result = 0000000000000000 | -- + v_arg1 = 0x1.0659f5f24a004p+877 | 0x1.fc46867ed0338p-680 + v_arg2 = -0x1.1d6849587155ep-1010 | -0x1.f68171edc235fp+575 + insn wfcedb: +- v_result = 0000000000000000 | 0000000000000000 ++ v_result = 0000000000000000 | -- + v_arg1 = 0x1.dc88a0d46ad79p-816 | 0x1.245140dcaed79p+851 + v_arg2 = 0x1.b33e977c7b3ep-818 | -0x1.04319d7c69367p+787 + insn vfcedbs: +@@ -1419,22 +1419,22 @@ insn vfcedbs: + v_arg2 = 0x1.ae2c06ea88ff4p+332 | -0x1.f668ce4f8ef9ap+821 + r_result = 0000000000000003 + insn wfcedbs: +- v_result = 0000000000000000 | 0000000000000000 ++ v_result = 0000000000000000 | -- + v_arg1 = 0x1.645261bf86b1fp-996 | 0x1.abd13c95397aap+992 + v_arg2 = -0x1.ba09e8fc66a8cp+113 | 0x1.75dbfe92c16c4p-786 + r_result = 0000000000000003 + insn wfcedbs: +- v_result = 0000000000000000 | 0000000000000000 ++ v_result = 0000000000000000 | -- + v_arg1 = -0x1.d02831d003e7dp+415 | -0x1.611a9dfd10f36p-80 + v_arg2 = -0x1.10bda62f4647p+723 | 0x1.cc47af6653378p-614 + r_result = 0000000000000003 + insn wfcedbs: +- v_result = 0000000000000000 | 0000000000000000 ++ v_result = 0000000000000000 | -- + v_arg1 = 0x1.f168f32f84178p-321 | -0x1.79a2a0b9549d1p-136 + v_arg2 = 0x1.41e19d1cfa692p+11 | -0x1.2a0ed6e7fd517p-453 + r_result = 0000000000000003 + insn wfcedbs: +- v_result = 0000000000000000 | 0000000000000000 ++ v_result = 0000000000000000 | -- + v_arg1 = -0x1.76a9144ee26c5p+188 | -0x1.386aaea2d9cddp-542 + v_arg2 = 0x1.810fcf222efc4p-999 | -0x1.ce90a9a43e2a1p+80 + r_result = 0000000000000003 +@@ -1455,19 +1455,19 @@ insn vfchdb: + v_arg1 = 0x1.82be31fb88a2dp+946 | -0x1.7ca9e9ff31953p-931 + v_arg2 = 0x1.fe75a1052beccp+490 | 0x1.179d18543d678p-255 + insn wfchdb: +- v_result = ffffffffffffffff | 0000000000000000 ++ v_result = ffffffffffffffff | -- + v_arg1 = 0x1.0af85d8d8d609p-464 | -0x1.9f639a686e0fep+203 + v_arg2 = -0x1.3142b77b55761p-673 | 0x1.ca9c474339da1p+472 + insn wfchdb: +- v_result = ffffffffffffffff | 0000000000000000 ++ v_result = ffffffffffffffff | -- + v_arg1 = -0x1.6cf16959a022bp+213 | 0x1.445606e4363e1p+942 + v_arg2 = -0x1.8c343201bbd2p+939 | -0x1.e5095ad0c37a4p-434 + insn wfchdb: +- v_result = ffffffffffffffff | 0000000000000000 ++ v_result = ffffffffffffffff | -- + v_arg1 = 0x1.36b4fc9cf5bdap-52 | -0x1.f1fd95cbcd533p+540 + v_arg2 = 0x1.5a2362891c9edp-175 | -0x1.e1f68c319e5d2p+58 + insn wfchdb: +- v_result = ffffffffffffffff | 0000000000000000 ++ v_result = ffffffffffffffff | -- + v_arg1 = 0x1.11c6489f544bbp+811 | 0x1.262a740ec3d47p+456 + v_arg2 = -0x1.d9394d354e989p-154 | 0x1.cc21b3094391ap-972 + insn vfchdbs: +@@ -1491,22 +1491,22 @@ insn vfchdbs: + v_arg2 = 0x1.e426748435a76p+370 | 0x1.8702527d17783p-871 + r_result = 0000000000000003 + insn wfchdbs: +- v_result = ffffffffffffffff | 0000000000000000 ++ v_result = ffffffffffffffff | -- + v_arg1 = 0x1.6c51b9f6442c8p+639 | 0x1.1e6b37adff703p+702 + v_arg2 = 0x1.0cba9c1c75e43p+520 | -0x1.145d44ed90967p+346 + r_result = 0000000000000000 + insn wfchdbs: +- v_result = ffffffffffffffff | 0000000000000000 ++ v_result = ffffffffffffffff | -- + v_arg1 = 0x1.7b3dd643bf36bp+816 | -0x1.61ce7bfb9307ap-683 + v_arg2 = -0x1.f2c998dc15c9ap-776 | 0x1.e16397f2dcdf5p+571 + r_result = 0000000000000000 + insn wfchdbs: +- v_result = ffffffffffffffff | 0000000000000000 ++ v_result = ffffffffffffffff | -- + v_arg1 = 0x1.cc3be81884e0ap-865 | -0x1.8b353bd41064p+820 + v_arg2 = -0x1.2c1bafaafdd4ep-34 | -0x1.24666808ab16ep-435 + r_result = 0000000000000000 + insn wfchdbs: +- v_result = ffffffffffffffff | 0000000000000000 ++ v_result = ffffffffffffffff | -- + v_arg1 = 0x1.c3de33d3b673ap+554 | 0x1.d39ed71e53096p-798 + v_arg2 = -0x1.c1e8f7b3c001p-828 | 0x1.22e2cf797fabp-787 + r_result = 0000000000000000 +@@ -1527,19 +1527,19 @@ insn vfchedb: + v_arg1 = -0x1.6c5599e7ba923p+829 | -0x1.5d1a1191ed6eap-994 + v_arg2 = -0x1.555c8775bc4d2p-478 | -0x1.4aa6a2c82319cp+493 + insn wfchedb: +- v_result = ffffffffffffffff | 0000000000000000 ++ v_result = ffffffffffffffff | -- + v_arg1 = 0x1.ae6cad07b0f3ep-232 | -0x1.2ed61a43f3b99p-74 + v_arg2 = -0x1.226f7cddbde13p-902 | -0x1.790d1d6febbf8p+336 + insn wfchedb: +- v_result = ffffffffffffffff | 0000000000000000 ++ v_result = ffffffffffffffff | -- + v_arg1 = 0x1.20eb8eac3711dp-385 | 0x1.ef71d3312d7e1p+739 + v_arg2 = 0x1.7a3ba08c5a0bdp-823 | -0x1.a7845ccaa544dp-129 + insn wfchedb: +- v_result = 0000000000000000 | 0000000000000000 ++ v_result = 0000000000000000 | -- + v_arg1 = -0x1.97ebdbc057be8p+824 | 0x1.2b7798b063cd6p+237 + v_arg2 = 0x1.cdb87a6074294p-81 | -0x1.074c902b19bccp-416 + insn wfchedb: +- v_result = 0000000000000000 | 0000000000000000 ++ v_result = 0000000000000000 | -- + v_arg1 = -0x1.82deebf9ff023p+937 | 0x1.56c5adcf9d4abp-672 + v_arg2 = -0x1.311ce49bc9439p+561 | 0x1.c8e1c512d8544p+103 + insn vfchedbs: +@@ -1563,22 +1563,22 @@ insn vfchedbs: + v_arg2 = -0x1.47f5dfc7a5bcp-569 | 0x1.5877ef33664a3p-758 + r_result = 0000000000000003 + insn wfchedbs: +- v_result = 0000000000000000 | 0000000000000000 ++ v_result = 0000000000000000 | -- + v_arg1 = -0x1.a7370ccfd9e49p+505 | 0x1.c6b2385850ca2p-591 + v_arg2 = 0x1.984f4fcd338b1p+675 | -0x1.feb996c821232p-39 + r_result = 0000000000000003 + insn wfchedbs: +- v_result = ffffffffffffffff | 0000000000000000 ++ v_result = ffffffffffffffff | -- + v_arg1 = 0x1.641878612dd2p+207 | 0x1.b35e3292db7f6p+567 + v_arg2 = -0x1.18a87f209e96bp+299 | -0x1.3d598f3612d8ap+1016 + r_result = 0000000000000000 + insn wfchedbs: +- v_result = ffffffffffffffff | 0000000000000000 ++ v_result = ffffffffffffffff | -- + v_arg1 = 0x1.cfc2cda244153p+404 | 0x1.d8b2b28e9d8d7p+276 + v_arg2 = 0x1.3517b8c7a59a1p-828 | 0x1.6096fab7003ccp-415 + r_result = 0000000000000000 + insn wfchedbs: +- v_result = 0000000000000000 | 0000000000000000 ++ v_result = 0000000000000000 | -- + v_arg1 = -0x1.54d656f033e56p-603 | -0x1.95ad0e2088967p+254 + v_arg2 = 0x1.4cb319db206e4p-614 | 0x1.b41cd9e3739b6p-862 + r_result = 0000000000000003 diff --git a/SOURCES/valgrind-3.16.1-sched_getsetattr.patch b/SOURCES/valgrind-3.16.1-sched_getsetattr.patch new file mode 100644 index 0000000..b95267d --- /dev/null +++ b/SOURCES/valgrind-3.16.1-sched_getsetattr.patch @@ -0,0 +1,201 @@ +commit a53adb79711ccfc76a4ee32b20253045cdab55c7 +Author: Mark Wielaard +Date: Mon Jul 27 16:36:17 2020 +0200 + + Handle linux syscalls sched_getattr and sched_setattr + + The only "special" thing about these syscalls is that the given + struct sched_attr determines its own size for future expansion. + + Original fix by "ISHIKAWA,chiaki" + + https://bugs.kde.org/show_bug.cgi?id=369029 + +diff --git a/coregrind/m_syswrap/priv_syswrap-linux.h b/coregrind/m_syswrap/priv_syswrap-linux.h +index cdc73c1e6..eb0b320ca 100644 +--- a/coregrind/m_syswrap/priv_syswrap-linux.h ++++ b/coregrind/m_syswrap/priv_syswrap-linux.h +@@ -227,6 +227,8 @@ DECL_TEMPLATE(linux, sys_fremovexattr); + // syscalls. + DECL_TEMPLATE(linux, sys_sched_setparam); + DECL_TEMPLATE(linux, sys_sched_getparam); ++DECL_TEMPLATE(linux, sys_sched_setattr); ++DECL_TEMPLATE(linux, sys_sched_getattr); + DECL_TEMPLATE(linux, sys_sched_setscheduler); + DECL_TEMPLATE(linux, sys_sched_getscheduler); + DECL_TEMPLATE(linux, sys_sched_yield); +diff --git a/coregrind/m_syswrap/syswrap-amd64-linux.c b/coregrind/m_syswrap/syswrap-amd64-linux.c +index 28d90135a..d6f3eb910 100644 +--- a/coregrind/m_syswrap/syswrap-amd64-linux.c ++++ b/coregrind/m_syswrap/syswrap-amd64-linux.c +@@ -846,9 +846,8 @@ static SyscallTableEntry syscall_table[] = { + LINX_(__NR_process_vm_writev, sys_process_vm_writev),// 311 + LINX_(__NR_kcmp, sys_kcmp), // 312 + LINX_(__NR_finit_module, sys_finit_module), // 313 +-// LIN__(__NR_sched_setattr, sys_ni_syscall), // 314 +- +-// LIN__(__NR_sched_getattr, sys_ni_syscall), // 315 ++ LINX_(__NR_sched_setattr, sys_sched_setattr), // 314 ++ LINXY(__NR_sched_getattr, sys_sched_getattr), // 315 + LINX_(__NR_renameat2, sys_renameat2), // 316 + // LIN__(__NR_seccomp, sys_ni_syscall), // 317 + LINXY(__NR_getrandom, sys_getrandom), // 318 +diff --git a/coregrind/m_syswrap/syswrap-arm-linux.c b/coregrind/m_syswrap/syswrap-arm-linux.c +index 579542785..70700e53f 100644 +--- a/coregrind/m_syswrap/syswrap-arm-linux.c ++++ b/coregrind/m_syswrap/syswrap-arm-linux.c +@@ -1009,6 +1009,8 @@ static SyscallTableEntry syscall_main_table[] = { + LINXY(__NR_process_vm_readv, sys_process_vm_readv), // 376 + LINX_(__NR_process_vm_writev, sys_process_vm_writev),// 377 + ++ LINX_(__NR_sched_setattr, sys_sched_setattr), // 380 ++ LINXY(__NR_sched_getattr, sys_sched_getattr), // 381 + LINX_(__NR_renameat2, sys_renameat2), // 382 + + LINXY(__NR_getrandom, sys_getrandom), // 384 +diff --git a/coregrind/m_syswrap/syswrap-arm64-linux.c b/coregrind/m_syswrap/syswrap-arm64-linux.c +index 81e01456f..acca02442 100644 +--- a/coregrind/m_syswrap/syswrap-arm64-linux.c ++++ b/coregrind/m_syswrap/syswrap-arm64-linux.c +@@ -806,8 +806,8 @@ static SyscallTableEntry syscall_main_table[] = { + LINX_(__NR_process_vm_writev, sys_process_vm_writev), // 271 + LINX_(__NR_kcmp, sys_kcmp), // 272 + LINX_(__NR_finit_module, sys_finit_module), // 273 +- // (__NR_sched_setattr, sys_ni_syscall), // 274 +- // (__NR_sched_getattr, sys_ni_syscall), // 275 ++ LINX_(__NR_sched_setattr, sys_sched_setattr), // 274 ++ LINXY(__NR_sched_getattr, sys_sched_getattr), // 275 + LINX_(__NR_renameat2, sys_renameat2), // 276 + // (__NR_seccomp, sys_ni_syscall), // 277 + LINXY(__NR_getrandom, sys_getrandom), // 278 +diff --git a/coregrind/m_syswrap/syswrap-linux.c b/coregrind/m_syswrap/syswrap-linux.c +index 5b5b7eee6..56be3032d 100644 +--- a/coregrind/m_syswrap/syswrap-linux.c ++++ b/coregrind/m_syswrap/syswrap-linux.c +@@ -3677,6 +3677,41 @@ POST(sys_sched_getparam) + POST_MEM_WRITE( ARG2, sizeof(struct vki_sched_param) ); + } + ++PRE(sys_sched_setattr) ++{ ++ struct vki_sched_attr *attr; ++ PRINT("sched_setattr ( %ld, %#" FMT_REGWORD "x, %#" ++ FMT_REGWORD "x )", SARG1, ARG2, ARG3 ); ++ PRE_REG_READ3(long, "sched_setattr", ++ vki_pid_t, pid, struct sched_attr *, p, unsigned int, flags); ++ /* We need to be able to read at least the size field. */ ++ PRE_MEM_READ( "sched_setattr(attr->size)", ARG2, sizeof(vki_uint32_t) ); ++ attr = (struct vki_sched_attr *)(Addr)ARG2; ++ if (ML_(safe_to_deref)(attr,sizeof(vki_uint32_t))) ++ PRE_MEM_READ( "sched_setattr(attr)", (Addr)attr, attr->size); ++} ++ ++PRE(sys_sched_getattr) ++{ ++ struct vki_sched_attr *attr; ++ PRINT("sched_getattr ( %ld, %#" FMT_REGWORD "x, %ld, %#" ++ FMT_REGWORD "x )", SARG1, ARG2, ARG3, ARG4 ); ++ PRE_REG_READ4(long, "sched_getattr", ++ vki_pid_t, pid, struct sched_attr *, p, ++ unsigned int, size, unsigned int, flags); ++ /* We need to be able to read at least the size field. */ ++ PRE_MEM_READ( "sched_setattr(attr->size)", ARG2, sizeof(vki_uint32_t) ); ++ /* And the kernel needs to be able to write to the whole struct size. */ ++ attr = (struct vki_sched_attr *)(Addr)ARG2; ++ if (ML_(safe_to_deref)(attr,sizeof(vki_uint32_t))) ++ PRE_MEM_WRITE( "sched_setattr(attr)", (Addr)attr, attr->size); ++} ++POST(sys_sched_getattr) ++{ ++ struct vki_sched_attr *attr = (struct vki_sched_attr *)(Addr)ARG2; ++ POST_MEM_WRITE( (Addr)attr, attr->size ); ++} ++ + PRE(sys_sched_getscheduler) + { + PRINT("sys_sched_getscheduler ( %ld )", SARG1); +diff --git a/coregrind/m_syswrap/syswrap-ppc32-linux.c b/coregrind/m_syswrap/syswrap-ppc32-linux.c +index eed12a1bc..c19cb9e0e 100644 +--- a/coregrind/m_syswrap/syswrap-ppc32-linux.c ++++ b/coregrind/m_syswrap/syswrap-ppc32-linux.c +@@ -1016,6 +1016,9 @@ static SyscallTableEntry syscall_table[] = { + LINXY(__NR_process_vm_readv, sys_process_vm_readv), // 351 + LINX_(__NR_process_vm_writev, sys_process_vm_writev),// 352 + ++ LINX_(__NR_sched_setattr, sys_sched_setattr), // 355 ++ LINXY(__NR_sched_getattr, sys_sched_getattr), // 356 ++ + LINXY(__NR_getrandom, sys_getrandom), // 359 + LINXY(__NR_memfd_create, sys_memfd_create), // 360 + +diff --git a/coregrind/m_syswrap/syswrap-ppc64-linux.c b/coregrind/m_syswrap/syswrap-ppc64-linux.c +index d58200b49..b6422a765 100644 +--- a/coregrind/m_syswrap/syswrap-ppc64-linux.c ++++ b/coregrind/m_syswrap/syswrap-ppc64-linux.c +@@ -998,6 +998,8 @@ static SyscallTableEntry syscall_table[] = { + LINXY(__NR_process_vm_readv, sys_process_vm_readv), // 351 + LINX_(__NR_process_vm_writev, sys_process_vm_writev),// 352 + ++ LINX_(__NR_sched_setattr, sys_sched_setattr), // 355 ++ LINXY(__NR_sched_getattr, sys_sched_getattr), // 356 + LINX_(__NR_renameat2, sys_renameat2), // 357 + + LINXY(__NR_getrandom, sys_getrandom), // 359 +diff --git a/coregrind/m_syswrap/syswrap-s390x-linux.c b/coregrind/m_syswrap/syswrap-s390x-linux.c +index a0a330aa2..3427fee16 100644 +--- a/coregrind/m_syswrap/syswrap-s390x-linux.c ++++ b/coregrind/m_syswrap/syswrap-s390x-linux.c +@@ -825,8 +825,8 @@ static SyscallTableEntry syscall_table[] = { + LINX_(__NR_kcmp, sys_kcmp), // 343 + // ?????(__NR_finit_module, ), // 344 + +-// ?????(__NR_sched_setattr, ), // 345 +-// ?????(__NR_sched_getattr, ), // 346 ++ LINX_(__NR_sched_setattr, sys_sched_setattr), // 345 ++ LINXY(__NR_sched_getattr, sys_sched_getattr), // 346 + LINX_(__NR_renameat2, sys_renameat2), // 347 + // ?????(__NR_seccomp, ), // 348 + LINXY(__NR_getrandom, sys_getrandom), // 349 +diff --git a/coregrind/m_syswrap/syswrap-x86-linux.c b/coregrind/m_syswrap/syswrap-x86-linux.c +index 332ed0bf2..b59d96f37 100644 +--- a/coregrind/m_syswrap/syswrap-x86-linux.c ++++ b/coregrind/m_syswrap/syswrap-x86-linux.c +@@ -1580,8 +1580,8 @@ static SyscallTableEntry syscall_table[] = { + LINX_(__NR_kcmp, sys_kcmp), // 349 + + // LIN__(__NR_finit_module, sys_ni_syscall), // 350 +-// LIN__(__NR_sched_setattr, sys_ni_syscall), // 351 +-// LIN__(__NR_sched_getattr, sys_ni_syscall), // 352 ++ LINX_(__NR_sched_setattr, sys_sched_setattr), // 351 ++ LINXY(__NR_sched_getattr, sys_sched_getattr), // 352 + LINX_(__NR_renameat2, sys_renameat2), // 353 + // LIN__(__NR_seccomp, sys_ni_syscall), // 354 + +diff --git a/include/vki/vki-linux.h b/include/vki/vki-linux.h +index 75b583165..ef93b9258 100644 +--- a/include/vki/vki-linux.h ++++ b/include/vki/vki-linux.h +@@ -410,6 +410,23 @@ struct vki_sched_param { + int sched_priority; + }; + ++struct vki_sched_attr { ++ vki_uint32_t size; ++ vki_uint32_t sched_policy; ++ vki_uint64_t sched_flags; ++ ++ /* SCHED_NORMAL, SCHED_BATCH */ ++ vki_int32_t sched_nice; ++ ++ /* SCHED_FIFO, SCHED_RR */ ++ vki_uint32_t sched_priority; ++ ++ /* SCHED_DEADLINE */ ++ vki_uint64_t sched_runtime; ++ vki_uint64_t sched_deadline; ++ vki_uint64_t sched_period; ++}; ++ + #define VKI_TASK_COMM_LEN 16 + + //---------------------------------------------------------------------- diff --git a/SPECS/valgrind.spec b/SPECS/valgrind.spec index 9b6a78c..cf09408 100644 --- a/SPECS/valgrind.spec +++ b/SPECS/valgrind.spec @@ -3,7 +3,7 @@ Summary: Tool for finding memory management bugs in programs Name: %{?scl_prefix}valgrind Version: 3.16.0 -Release: 2%{?dist} +Release: 4%{?dist} Epoch: 1 License: GPLv2+ URL: http://www.valgrind.org/ @@ -100,6 +100,25 @@ Patch7: valgrind-3.16.0-ppc-L-field.patch # KDE#422715 x86: vex: the `impossible' happened: expr_is_guardable Patch8: valgrind-3.16.0-387-float.patch +# KDE#422174 unhandled instruction bytes: 0x48 0xE9 (REX prefix JMP instr) +Patch9: valgrind-3.16.1-REX-prefix-JMP.patch + +# KDE#422623 epoll_ctl warns for uninit padding on non-amd64 64bit arches +Patch10: valgrind-3.16.1-epoll.patch + +# KDE#369029 handle linux syscalls sched_getattr and sched_setattr +Patch11: valgrind-3.16.1-sched_getsetattr.patch + +# KDE#415293 Incorrect call-graph tracking due to new _dl_runtime_resolve* +Patch12: valgrind-3.16.1-dl_runtime_resolve.patch + + +# KDE#428648 s390_emit_load_mem panics due to 20-bit offset for vector load +Patch15: valgrind-3.16.1-s390_emit_load_mem.patch + +# KDE#133812 s390x: z14 vector instructions not implemented +Patch16: valgrind-3.16.1-s390x-z14-vector.patch + BuildRequires: glibc-devel %if %{build_openmpi} @@ -236,6 +255,13 @@ Valgrind User Manual for details. %patch7 -p1 %patch8 -p1 +%patch9 -p1 +%patch10 -p1 +%patch11 -p1 +%patch12 -p1 + +%patch15 -p1 +%patch16 -p1 %build @@ -456,6 +482,16 @@ fi %endif %changelog +* Fri Dec 4 2020 Mark Wielaard - 3.16.0-4 +- Add valgrind-3.16.1-s390_emit_load_mem.patch +- Add valgrind-3.16.1-s390x-z14-vector.patch + +* Tue Oct 20 2020 Mark Wielaard - 3.16.0-3 +- Add valgrind-3.16.1-REX-prefix-JMP.patch +- Add valgrind-3.16.1-epoll.patch +- Add valgrind-3.16.1-sched_getsetattr.patch +- Add valgrind-3.16.1-dl_runtime_resolve.patch + * Wed Jun 24 2020 Mark Wielaard - 3.16.0-2 - Add valgrind-3.16.0-ppc-L-field.patch - Add valgrind-3.16.0-387-float.patch