Blob Blame History Raw
diff --git a/VEX/priv/guest_s390_defs.h b/VEX/priv/guest_s390_defs.h
index 9f93cff19..905429015 100644
--- a/VEX/priv/guest_s390_defs.h
+++ b/VEX/priv/guest_s390_defs.h
@@ -8,7 +8,7 @@
    This file is part of Valgrind, a dynamic binary instrumentation
    framework.
 
-   Copyright IBM Corp. 2010-2017
+   Copyright IBM Corp. 2010-2020
 
    This program is free software; you can redistribute it and/or
    modify it under the terms of the GNU General Public License as
@@ -263,26 +263,27 @@ extern ULong last_execute_target;
    before  S390_VEC_OP_LAST. */
 typedef enum {
    S390_VEC_OP_INVALID = 0,
-   S390_VEC_OP_VPKS = 1,
-   S390_VEC_OP_VPKLS = 2,
-   S390_VEC_OP_VFAE = 3,
-   S390_VEC_OP_VFEE = 4,
-   S390_VEC_OP_VFENE = 5,
-   S390_VEC_OP_VISTR = 6,
-   S390_VEC_OP_VSTRC = 7,
-   S390_VEC_OP_VCEQ = 8,
-   S390_VEC_OP_VTM = 9,
-   S390_VEC_OP_VGFM = 10,
-   S390_VEC_OP_VGFMA = 11,
-   S390_VEC_OP_VMAH = 12,
-   S390_VEC_OP_VMALH = 13,
-   S390_VEC_OP_VCH = 14,
-   S390_VEC_OP_VCHL = 15,
-   S390_VEC_OP_VFCE = 16,
-   S390_VEC_OP_VFCH = 17,
-   S390_VEC_OP_VFCHE = 18,
-   S390_VEC_OP_VFTCI = 19,
-   S390_VEC_OP_LAST = 20 // supposed to be the last element in enum
+   S390_VEC_OP_VPKS,
+   S390_VEC_OP_VPKLS,
+   S390_VEC_OP_VFAE,
+   S390_VEC_OP_VFEE,
+   S390_VEC_OP_VFENE,
+   S390_VEC_OP_VISTR,
+   S390_VEC_OP_VSTRC,
+   S390_VEC_OP_VCEQ,
+   S390_VEC_OP_VTM,
+   S390_VEC_OP_VGFM,
+   S390_VEC_OP_VGFMA,
+   S390_VEC_OP_VMAH,
+   S390_VEC_OP_VMALH,
+   S390_VEC_OP_VCH,
+   S390_VEC_OP_VCHL,
+   S390_VEC_OP_VFTCI,
+   S390_VEC_OP_VFMIN,
+   S390_VEC_OP_VFMAX,
+   S390_VEC_OP_VBPERM,
+   S390_VEC_OP_VMSL,
+   S390_VEC_OP_LAST             // supposed to be the last element in enum
 } s390x_vec_op_t;
 
 /* Arguments of s390x_dirtyhelper_vec_op(...) which are packed into one
diff --git a/VEX/priv/guest_s390_helpers.c b/VEX/priv/guest_s390_helpers.c
index a470d9f8d..b71b621ae 100644
--- a/VEX/priv/guest_s390_helpers.c
+++ b/VEX/priv/guest_s390_helpers.c
@@ -8,7 +8,7 @@
    This file is part of Valgrind, a dynamic binary instrumentation
    framework.
 
-   Copyright IBM Corp. 2010-2017
+   Copyright IBM Corp. 2010-2020
 
    This program is free software; you can redistribute it and/or
    modify it under the terms of the GNU General Public License as
@@ -314,20 +314,11 @@ ULong s390x_dirtyhelper_STCKE(ULong *addr) {return 3;}
 /*--- Dirty helper for Store Facility instruction          ---*/
 /*------------------------------------------------------------*/
 #if defined(VGA_s390x)
-static void
-s390_set_facility_bit(ULong *addr, UInt bitno, UInt value)
-{
-   addr  += bitno / 64;
-   bitno  = bitno % 64;
-
-   ULong mask = 1;
-   mask <<= (63 - bitno);
 
-   if (value == 1) {
-      *addr |= mask;   // set
-   } else {
-      *addr &= ~mask;  // clear
-   }
+static ULong
+s390_stfle_range(UInt lo, UInt hi)
+{
+   return ((1UL << (hi + 1 - lo)) - 1) << (63 - (hi % 64));
 }
 
 ULong
@@ -336,6 +327,77 @@ s390x_dirtyhelper_STFLE(VexGuestS390XState *guest_state, ULong *addr)
    ULong hoststfle[S390_NUM_FACILITY_DW], cc, num_dw, i;
    register ULong reg0 asm("0") = guest_state->guest_r0 & 0xF;  /* r0[56:63] */
 
+   /* Restrict to facilities that we know about and that we assume to be
+      compatible with Valgrind.  Of course, in this way we may reject features
+      that Valgrind is not really involved in (and thus would be compatible
+      with), but quering for such features doesn't seem like a typical use
+      case. */
+   ULong accepted_facility[S390_NUM_FACILITY_DW] = {
+      /* ===  0 .. 63  === */
+      (s390_stfle_range(0, 16)
+       /* 17: message-security-assist, not supported */
+       | s390_stfle_range(18, 19)
+       /* 20: HFP-multiply-and-add/subtract, not supported */
+       | s390_stfle_range(21, 22)
+       /* 23: HFP-unnormalized-extension, not supported */
+       | s390_stfle_range(24, 25)
+       /* 26: parsing-enhancement, not supported */
+       | s390_stfle_range(27, 28)
+       /* 29: unassigned */
+       | s390_stfle_range(30, 30)
+       /* 31: extract-CPU-time, not supported */
+       | s390_stfle_range(32, 41)
+       /* 42-43: DFP, not fully supported */
+       /* 44: PFPO, not fully supported */
+       | s390_stfle_range(45, 47)
+       /* 48: DFP zoned-conversion, not supported */
+       /* 49: includes PPA, not supported */
+       /* 50: constrained transactional-execution, not supported */
+       | s390_stfle_range(51, 55)
+       /* 56: unassigned */
+       /* 57: MSA5, not supported */
+       | s390_stfle_range(58, 60)
+       /* 61: miscellaneous-instruction 3, not supported */
+       | s390_stfle_range(62, 63)),
+
+      /* ===  64 .. 127  === */
+      (s390_stfle_range(64, 72)
+       /* 73: transactional-execution, not supported */
+       | s390_stfle_range(74, 75)
+       /* 76: MSA3, not supported */
+       /* 77: MSA4, not supported */
+       | s390_stfle_range(78, 78)
+       /* 80: DFP packed-conversion, not supported */
+       /* 81: PPA-in-order, not supported */
+       | s390_stfle_range(82, 82)
+       /* 83-127: unassigned */ ),
+
+      /* ===  128 .. 191  === */
+      (s390_stfle_range(128, 131)
+       /* 132: unassigned */
+       /* 133: guarded-storage, not supported */
+       /* 134: vector packed decimal, not supported */
+       | s390_stfle_range(135, 135)
+       /* 136: unassigned */
+       /* 137: unassigned */
+       | s390_stfle_range(138, 142)
+       /* 143: unassigned */
+       | s390_stfle_range(144, 145)
+       /* 146: MSA8, not supported */
+       | s390_stfle_range(147, 147)
+       /* 148: vector-enhancements 2, not supported */
+       | s390_stfle_range(149, 149)
+       /* 150: unassigned */
+       /* 151: DEFLATE-conversion, not supported */
+       /* 153: unassigned */
+       /* 154: unassigned */
+       /* 155: MSA9, not supported */
+       | s390_stfle_range(156, 156)
+       /* 157-167: unassigned */
+       | s390_stfle_range(168, 168)
+       /* 168-191: unassigned */ ),
+   };
+
    /* We cannot store more than S390_NUM_FACILITY_DW
       (and it makes not much sense to do so anyhow) */
    if (reg0 > S390_NUM_FACILITY_DW - 1)
@@ -351,35 +413,9 @@ s390x_dirtyhelper_STFLE(VexGuestS390XState *guest_state, ULong *addr)
    /* Update guest register 0  with what STFLE set r0 to */
    guest_state->guest_r0 = reg0;
 
-   /* Set default: VM facilities = host facilities */
+   /* VM facilities = host facilities, filtered by acceptance */
    for (i = 0; i < num_dw; ++i)
-      addr[i] = hoststfle[i];
-
-   /* Now adjust the VM facilities according to what the VM supports */
-   s390_set_facility_bit(addr, S390_FAC_LDISP,  1);
-   s390_set_facility_bit(addr, S390_FAC_EIMM,   1);
-   s390_set_facility_bit(addr, S390_FAC_ETF2,   1);
-   s390_set_facility_bit(addr, S390_FAC_ETF3,   1);
-   s390_set_facility_bit(addr, S390_FAC_GIE,    1);
-   s390_set_facility_bit(addr, S390_FAC_EXEXT,  1);
-   s390_set_facility_bit(addr, S390_FAC_HIGHW,  1);
-   s390_set_facility_bit(addr, S390_FAC_LSC2,   1);
-
-   s390_set_facility_bit(addr, S390_FAC_HFPMAS, 0);
-   s390_set_facility_bit(addr, S390_FAC_HFPUNX, 0);
-   s390_set_facility_bit(addr, S390_FAC_XCPUT,  0);
-   s390_set_facility_bit(addr, S390_FAC_MSA,    0);
-   s390_set_facility_bit(addr, S390_FAC_PENH,   0);
-   s390_set_facility_bit(addr, S390_FAC_DFP,    0);
-   s390_set_facility_bit(addr, S390_FAC_PFPO,   0);
-   s390_set_facility_bit(addr, S390_FAC_DFPZC,  0);
-   s390_set_facility_bit(addr, S390_FAC_MISC,   0);
-   s390_set_facility_bit(addr, S390_FAC_CTREXE, 0);
-   s390_set_facility_bit(addr, S390_FAC_TREXE,  0);
-   s390_set_facility_bit(addr, S390_FAC_MSA4,   0);
-   s390_set_facility_bit(addr, S390_FAC_VXE,    0);
-   s390_set_facility_bit(addr, S390_FAC_VXE2,   0);
-   s390_set_facility_bit(addr, S390_FAC_DFLT,   0);
+      addr[i] = hoststfle[i] & accepted_facility[i];
 
    return cc;
 }
@@ -2500,25 +2536,26 @@ s390x_dirtyhelper_vec_op(VexGuestS390XState *guest_state,
    vassert(d->op > S390_VEC_OP_INVALID && d->op < S390_VEC_OP_LAST);
    static const UChar opcodes[][2] = {
       {0x00, 0x00}, /* invalid */
-      {0xe7, 0x97}, /* VPKS */
-      {0xe7, 0x95}, /* VPKLS */
-      {0xe7, 0x82}, /* VFAE */
-      {0xe7, 0x80}, /* VFEE */
-      {0xe7, 0x81}, /* VFENE */
-      {0xe7, 0x5c}, /* VISTR */
-      {0xe7, 0x8a}, /* VSTRC */
-      {0xe7, 0xf8}, /* VCEQ */
-      {0xe7, 0xd8}, /* VTM */
-      {0xe7, 0xb4}, /* VGFM */
-      {0xe7, 0xbc}, /* VGFMA */
-      {0xe7, 0xab}, /* VMAH */
-      {0xe7, 0xa9}, /* VMALH */
-      {0xe7, 0xfb}, /* VCH */
-      {0xe7, 0xf9}, /* VCHL */
-      {0xe7, 0xe8}, /* VFCE */
-      {0xe7, 0xeb}, /* VFCH */
-      {0xe7, 0xea}, /* VFCHE */
-      {0xe7, 0x4a}  /* VFTCI */
+      [S390_VEC_OP_VPKS]  = {0xe7, 0x97},
+      [S390_VEC_OP_VPKLS] = {0xe7, 0x95},
+      [S390_VEC_OP_VFAE]  = {0xe7, 0x82},
+      [S390_VEC_OP_VFEE]  = {0xe7, 0x80},
+      [S390_VEC_OP_VFENE] = {0xe7, 0x81},
+      [S390_VEC_OP_VISTR] = {0xe7, 0x5c},
+      [S390_VEC_OP_VSTRC] = {0xe7, 0x8a},
+      [S390_VEC_OP_VCEQ]  = {0xe7, 0xf8},
+      [S390_VEC_OP_VTM]   = {0xe7, 0xd8},
+      [S390_VEC_OP_VGFM]  = {0xe7, 0xb4},
+      [S390_VEC_OP_VGFMA] = {0xe7, 0xbc},
+      [S390_VEC_OP_VMAH]  = {0xe7, 0xab},
+      [S390_VEC_OP_VMALH] = {0xe7, 0xa9},
+      [S390_VEC_OP_VCH]   = {0xe7, 0xfb},
+      [S390_VEC_OP_VCHL]  = {0xe7, 0xf9},
+      [S390_VEC_OP_VFTCI] = {0xe7, 0x4a},
+      [S390_VEC_OP_VFMIN] = {0xe7, 0xee},
+      [S390_VEC_OP_VFMAX] = {0xe7, 0xef},
+      [S390_VEC_OP_VBPERM]= {0xe7, 0x85},
+      [S390_VEC_OP_VMSL]  = {0xe7, 0xb8},
    };
 
    union {
@@ -2612,6 +2649,7 @@ s390x_dirtyhelper_vec_op(VexGuestS390XState *guest_state,
    case S390_VEC_OP_VGFMA:
    case S390_VEC_OP_VMAH:
    case S390_VEC_OP_VMALH:
+   case S390_VEC_OP_VMSL:
       the_insn.VRRd.v1 = 1;
       the_insn.VRRd.v2 = 2;
       the_insn.VRRd.v3 = 3;
@@ -2621,9 +2659,9 @@ s390x_dirtyhelper_vec_op(VexGuestS390XState *guest_state,
       the_insn.VRRd.m6 = d->m5;
       break;
 
-   case S390_VEC_OP_VFCE:
-   case S390_VEC_OP_VFCH:
-   case S390_VEC_OP_VFCHE:
+   case S390_VEC_OP_VFMIN:
+   case S390_VEC_OP_VFMAX:
+   case S390_VEC_OP_VBPERM:
       the_insn.VRRc.v1 = 1;
       the_insn.VRRc.v2 = 2;
       the_insn.VRRc.v3 = 3;
diff --git a/VEX/priv/guest_s390_toIR.c b/VEX/priv/guest_s390_toIR.c
index c27a8d3fe..5f2c5ce98 100644
--- a/VEX/priv/guest_s390_toIR.c
+++ b/VEX/priv/guest_s390_toIR.c
@@ -8,7 +8,7 @@
    This file is part of Valgrind, a dynamic binary instrumentation
    framework.
 
-   Copyright IBM Corp. 2010-2017
+   Copyright IBM Corp. 2010-2020
 
    This program is free software; you can redistribute it and/or
    modify it under the terms of the GNU General Public License as
@@ -248,6 +248,13 @@ typedef enum {
 #define VRS_d2(insn) (((insn) >> 32) & 0xfff)
 #define VRS_m4(insn) (((insn) >> 28) & 0xf)
 #define VRS_rxb(insn) (((insn) >> 24) & 0xf)
+#define VRSd_v1(insn) (((insn) >> 28) & 0xf)
+#define VRSd_r3(insn) (((insn) >> 48) & 0xf)
+#define VSI_i3(insn) (((insn) >> 48) & 0xff)
+#define VSI_b2(insn) (((insn) >> 44) & 0xf)
+#define VSI_d2(insn) (((insn) >> 32) & 0xfff)
+#define VSI_v1(insn) (((insn) >> 28) & 0xf)
+#define VSI_rxb(insn) (((insn) >> 24) & 0xf)
 
 
 /*------------------------------------------------------------*/
@@ -1934,6 +1941,26 @@ s390_vr_get_type(const UChar m)
    return results[m];
 }
 
+/* Determine IRType from instruction's floating-point format field */
+static IRType
+s390_vr_get_ftype(const UChar m)
+{
+   static const IRType results[] = {Ity_F32, Ity_F64, Ity_F128};
+   if (m >= 2 && m <= 4)
+      return results[m - 2];
+   return Ity_INVALID;
+}
+
+/* Determine number of elements from instruction's floating-point format
+   field */
+static UChar
+s390_vr_get_n_elem(const UChar m)
+{
+   if (m >= 2 && m <= 4)
+      return 1 << (4 - m);
+   return 0;
+}
+
 /* Determine if Condition Code Set (CS) flag is set in m field */
 #define s390_vr_is_cs_set(m) (((m) & 0x1) != 0)
 
@@ -2188,12 +2215,15 @@ s390_vr_offset_by_index(UInt archreg,IRType type, UChar index)
          goto invalidIndex;
       }
       return vr_offset(archreg) + sizeof(ULong) * index;
+
    case Ity_V128:
+   case Ity_F128:
       if(index == 0) {
          return vr_qw_offset(archreg);
       } else {
          goto invalidIndex;
       }
+
    default:
       vpanic("s390_vr_offset_by_index: unknown type");
    }
@@ -2211,7 +2241,14 @@ put_vr(UInt archreg, IRType type, UChar index, IRExpr *expr)
    UInt offset = s390_vr_offset_by_index(archreg, type, index);
    vassert(typeOfIRExpr(irsb->tyenv, expr) == type);
 
-   stmt(IRStmt_Put(offset, expr));
+   if (type == Ity_F128) {
+      IRTemp val = newTemp(Ity_F128);
+      assign(val, expr);
+      stmt(IRStmt_Put(offset, unop(Iop_F128HItoF64, mkexpr(val))));
+      stmt(IRStmt_Put(offset + 8, unop(Iop_F128LOtoF64, mkexpr(val))));
+   } else {
+      stmt(IRStmt_Put(offset, expr));
+   }
 }
 
 /* Read type sized part specified by index of a vr register. */
@@ -2219,6 +2256,11 @@ static IRExpr *
 get_vr(UInt archreg, IRType type, UChar index)
 {
    UInt offset = s390_vr_offset_by_index(archreg, type, index);
+   if (type == Ity_F128) {
+      return binop(Iop_F64HLtoF128,
+                   IRExpr_Get(offset, Ity_F64),
+                   IRExpr_Get(offset + 8, Ity_F64));
+   }
    return IRExpr_Get(offset, type);
 }
 
@@ -2294,11 +2336,11 @@ s390_getCountToBlockBoundary(IRTemp op2addr, UChar m)
    return mkexpr(output);
 }
 
-/* Load bytes into v1.
-   maxIndex specifies max index to load and must be Ity_I32.
-   If maxIndex >= 15, all 16 bytes are loaded.
-   All bytes after maxIndex are zeroed. */
-static void s390_vr_loadWithLength(UChar v1, IRTemp addr, IRExpr *maxIndex)
+/* Starting from addr, load at most maxIndex + 1 bytes into v1.  Fill the
+   leftmost or rightmost bytes of v1, depending on whether `rightmost' is set.
+   If maxIndex >= 15, load all 16 bytes; otherwise clear the remaining bytes. */
+static void
+s390_vr_loadWithLength(UChar v1, IRTemp addr, IRExpr *maxIndex, Bool rightmost)
 {
    IRTemp maxIdx = newTemp(Ity_I32);
    IRTemp cappedMax = newTemp(Ity_I64);
@@ -2311,8 +2353,8 @@ static void s390_vr_loadWithLength(UChar v1, IRTemp addr, IRExpr *maxIndex)
       crossed if and only if the real insn would have crossed it as well.
       Thus, if the bytes to load are fully contained in an aligned 16-byte
       chunk, load the whole 16-byte aligned chunk, and otherwise load 16 bytes
-      from the unaligned address.  Then shift the loaded data left-aligned
-      into the target vector register. */
+      from the unaligned address.  Then shift the loaded data left- or
+      right-aligned into the target vector register. */
 
    assign(maxIdx, maxIndex);
    assign(cappedMax, mkite(binop(Iop_CmpLT32U, mkexpr(maxIdx), mkU32(15)),
@@ -2325,20 +2367,60 @@ static void s390_vr_loadWithLength(UChar v1, IRTemp addr, IRExpr *maxIndex)
    assign(back, mkite(binop(Iop_CmpLE64U, mkexpr(offset), mkexpr(zeroed)),
                       mkexpr(offset), mkU64(0)));
 
-   /* How much to shift the loaded 16-byte vector to the right, and then to
-      the left.  Since both 'zeroed' and 'back' range from 0 to 15, the shift
-      amounts range from 0 to 120. */
-   IRExpr *shrAmount = binop(Iop_Shl64,
-                             binop(Iop_Sub64, mkexpr(zeroed), mkexpr(back)),
-                             mkU8(3));
-   IRExpr *shlAmount = binop(Iop_Shl64, mkexpr(zeroed), mkU8(3));
+   IRExpr* chunk = load(Ity_V128, binop(Iop_Sub64, mkexpr(addr), mkexpr(back)));
+
+   /* Shift the loaded 16-byte vector to the right, then to the left, or vice
+      versa, where each shift amount ranges from 0 to 120. */
+   IRExpr* shift1;
+   IRExpr* shift2 = unop(Iop_64to8, binop(Iop_Shl64, mkexpr(zeroed), mkU8(3)));
+
+   if (rightmost) {
+      shift1 = unop(Iop_64to8, binop(Iop_Shl64, mkexpr(back), mkU8(3)));
+      put_vr_qw(v1, binop(Iop_ShrV128,
+                          binop(Iop_ShlV128, chunk, shift1),
+                          shift2));
+   } else {
+      shift1 = unop(Iop_64to8,
+                    binop(Iop_Shl64,
+                          binop(Iop_Sub64, mkexpr(zeroed), mkexpr(back)),
+                          mkU8(3)));
+      put_vr_qw(v1, binop(Iop_ShlV128,
+                          binop(Iop_ShrV128, chunk, shift1),
+                          shift2));
+   }
+}
+
+/* Store at most maxIndex + 1 bytes from v1 to addr.  Store the leftmost or
+   rightmost bytes of v1, depending on whether `rightmost' is set.  If maxIndex
+   >= 15, store all 16 bytes. */
+static void
+s390_vr_storeWithLength(UChar v1, IRTemp addr, IRExpr *maxIndex, Bool rightmost)
+{
+   IRTemp maxIdx = newTemp(Ity_I32);
+   IRTemp cappedMax = newTemp(Ity_I64);
+   IRTemp counter = newTemp(Ity_I64);
+   IRExpr* offset;
+
+   assign(maxIdx, maxIndex);
+   assign(cappedMax, mkite(binop(Iop_CmpLT32U, mkexpr(maxIdx), mkU32(15)),
+                           unop(Iop_32Uto64, mkexpr(maxIdx)), mkU64(15)));
+
+   assign(counter, get_counter_dw0());
+
+   if (rightmost)
+      offset = binop(Iop_Add64,
+                     binop(Iop_Sub64, mkU64(15), mkexpr(cappedMax)),
+                     mkexpr(counter));
+   else
+      offset = mkexpr(counter);
+
+   store(binop(Iop_Add64, mkexpr(addr), mkexpr(counter)),
+         binop(Iop_GetElem8x16, get_vr_qw(v1), unop(Iop_64to8, offset)));
 
-   put_vr_qw(v1, binop(Iop_ShlV128,
-                       binop(Iop_ShrV128,
-                             load(Ity_V128,
-                                  binop(Iop_Sub64, mkexpr(addr), mkexpr(back))),
-                             unop(Iop_64to8, shrAmount)),
-                       unop(Iop_64to8, shlAmount)));
+   /* Check for end of field */
+   put_counter_dw0(binop(Iop_Add64, mkexpr(counter), mkU64(1)));
+   iterate_if(binop(Iop_CmpNE64, mkexpr(counter), mkexpr(cappedMax)));
+   put_counter_dw0(mkU64(0));
 }
 
 /* Bitwise vCond ? v1 : v2
@@ -3749,6 +3831,28 @@ s390_format_VRS_RRDVM(const HChar *(*irgen)(UChar r1, IRTemp op2addr, UChar v3,
       s390_disasm(ENC5(MNM, GPR, UDXB, VR, UINT), mnm, r1, d2, 0, b2, v3, m4);
 }
 
+static void
+s390_format_VRS_RRDV(const HChar *(*irgen)(UChar v1, UChar r3, IRTemp op2addr),
+                     UChar v1, UChar r3, UChar b2, UShort d2, UChar rxb)
+{
+   const HChar *mnm;
+   IRTemp op2addr = newTemp(Ity_I64);
+
+   if (! s390_host_has_vx) {
+      emulation_failure(EmFail_S390X_vx);
+      return;
+   }
+
+   assign(op2addr, binop(Iop_Add64, mkU64(d2), b2 != 0 ? get_gpr_dw0(b2) :
+          mkU64(0)));
+
+   v1  = s390_vr_getVRindex(v1, 4, rxb);
+   mnm = irgen(v1, r3, op2addr);
+
+   if (UNLIKELY(vex_traceflags & VEX_TRACE_FE))
+      s390_disasm(ENC4(MNM, VR, GPR, UDXB), mnm, v1, r3, d2, 0, b2);
+}
+
 
 static void
 s390_format_VRS_VRDVM(const HChar *(*irgen)(UChar v1, IRTemp op2addr, UChar v3,
@@ -4081,6 +4185,29 @@ s390_format_VRRa_VVVMMM(const HChar *(*irgen)(UChar v1, UChar v2, UChar v3,
                   mnm, v1, v2, v3, m4, m5, m6);
 }
 
+static void
+s390_format_VSI_URDV(const HChar *(*irgen)(UChar v1, IRTemp op2addr, UChar i3),
+                     UChar v1, UChar b2, UChar d2, UChar i3, UChar rxb)
+{
+   const HChar *mnm;
+   IRTemp op2addr = newTemp(Ity_I64);
+
+   if (!s390_host_has_vx) {
+      emulation_failure(EmFail_S390X_vx);
+      return;
+   }
+
+   v1 = s390_vr_getVRindex(v1, 4, rxb);
+
+   assign(op2addr, binop(Iop_Add64, mkU64(d2), b2 != 0 ? get_gpr_dw0(b2) :
+          mkU64(0)));
+
+   mnm = irgen(v1, op2addr, i3);
+
+   if (vex_traceflags & VEX_TRACE_FE)
+      s390_disasm(ENC4(MNM, VR, UDXB, UINT), mnm, v1, d2, 0, b2, i3);
+}
+
 /*------------------------------------------------------------*/
 /*--- Build IR for opcodes                                 ---*/
 /*------------------------------------------------------------*/
@@ -16186,7 +16313,9 @@ s390_irgen_VGM(UChar v1, UShort i2, UChar m3)
 static const HChar *
 s390_irgen_VLLEZ(UChar v1, IRTemp op2addr, UChar m3)
 {
-   IRType type = s390_vr_get_type(m3);
+   s390_insn_assert("vllez", m3 <= 3 || m3 == 6);
+
+   IRType type = s390_vr_get_type(m3 & 3);
    IRExpr* op2 = load(type, mkexpr(op2addr));
    IRExpr* op2as64bit;
    switch (type) {
@@ -16206,7 +16335,13 @@ s390_irgen_VLLEZ(UChar v1, IRTemp op2addr, UChar m3)
       vpanic("s390_irgen_VLLEZ: unknown type");
    }
 
-   put_vr_dw0(v1, op2as64bit);
+   if (m3 == 6) {
+      /* left-aligned */
+      put_vr_dw0(v1, binop(Iop_Shl64, op2as64bit, mkU8(32)));
+   } else {
+      /* right-aligned */
+      put_vr_dw0(v1, op2as64bit);
+   }
    put_vr_dw1(v1, mkU64(0));
    return "vllez";
 }
@@ -16615,7 +16750,7 @@ s390_irgen_VLBB(UChar v1, IRTemp addr, UChar m3)
                             s390_getCountToBlockBoundary(addr, m3),
                             mkU32(1));
 
-   s390_vr_loadWithLength(v1, addr, maxIndex);
+   s390_vr_loadWithLength(v1, addr, maxIndex, False);
 
    return "vlbb";
 }
@@ -16623,41 +16758,50 @@ s390_irgen_VLBB(UChar v1, IRTemp addr, UChar m3)
 static const HChar *
 s390_irgen_VLL(UChar v1, IRTemp addr, UChar r3)
 {
-   s390_vr_loadWithLength(v1, addr, get_gpr_w1(r3));
+   s390_vr_loadWithLength(v1, addr, get_gpr_w1(r3), False);
 
    return "vll";
 }
 
 static const HChar *
-s390_irgen_VSTL(UChar v1, IRTemp addr, UChar r3)
+s390_irgen_VLRL(UChar v1, IRTemp addr, UChar i3)
 {
-   IRTemp counter = newTemp(Ity_I64);
-   IRTemp maxIndexToStore = newTemp(Ity_I64);
-   IRTemp gpr3 = newTemp(Ity_I64);
-
-   assign(gpr3, unop(Iop_32Uto64, get_gpr_w1(r3)));
-   assign(maxIndexToStore, mkite(binop(Iop_CmpLE64U,
-                                       mkexpr(gpr3),
-                                       mkU64(16)
-                                       ),
-                                 mkexpr(gpr3),
-                                 mkU64(16)
-                                 )
-         );
+   s390_insn_assert("vlrl", (i3 & 0xf0) == 0);
+   s390_vr_loadWithLength(v1, addr, mkU32((UInt) i3), True);
 
-   assign(counter, get_counter_dw0());
+   return "vlrl";
+}
 
-   store(binop(Iop_Add64, mkexpr(addr), mkexpr(counter)),
-         binop(Iop_GetElem8x16, get_vr_qw(v1), unop(Iop_64to8, mkexpr(counter))));
+static const HChar *
+s390_irgen_VLRLR(UChar v1, UChar r3, IRTemp addr)
+{
+   s390_vr_loadWithLength(v1, addr, get_gpr_w1(r3), True);
 
-   /* Check for end of field */
-   put_counter_dw0(binop(Iop_Add64, mkexpr(counter), mkU64(1)));
-   iterate_if(binop(Iop_CmpNE64, mkexpr(counter), mkexpr(maxIndexToStore)));
-   put_counter_dw0(mkU64(0));
+   return "vlrlr";
+}
 
+static const HChar *
+s390_irgen_VSTL(UChar v1, IRTemp addr, UChar r3)
+{
+   s390_vr_storeWithLength(v1, addr, get_gpr_w1(r3), False);
    return "vstl";
 }
 
+static const HChar *
+s390_irgen_VSTRL(UChar v1, IRTemp addr, UChar i3)
+{
+   s390_insn_assert("vstrl", (i3 & 0xf0) == 0);
+   s390_vr_storeWithLength(v1, addr, mkU32((UInt) i3), True);
+   return "vstrl";
+}
+
+static const HChar *
+s390_irgen_VSTRLR(UChar v1, UChar r3, IRTemp addr)
+{
+   s390_vr_storeWithLength(v1, addr, get_gpr_w1(r3), True);
+   return "vstrlr";
+}
+
 static const HChar *
 s390_irgen_VX(UChar v1, UChar v2, UChar v3)
 {
@@ -16682,6 +16826,24 @@ s390_irgen_VO(UChar v1, UChar v2, UChar v3)
    return "vo";
 }
 
+static const HChar *
+s390_irgen_VOC(UChar v1, UChar v2, UChar v3)
+{
+   put_vr_qw(v1, binop(Iop_OrV128, get_vr_qw(v2),
+                       unop(Iop_NotV128, get_vr_qw(v3))));
+
+   return "voc";
+}
+
+static const HChar *
+s390_irgen_VNN(UChar v1, UChar v2, UChar v3)
+{
+   put_vr_qw(v1, unop(Iop_NotV128,
+                      binop(Iop_AndV128, get_vr_qw(v2), get_vr_qw(v3))));
+
+   return "vnn";
+}
+
 static const HChar *
 s390_irgen_VNO(UChar v1, UChar v2, UChar v3)
 {
@@ -16691,6 +16853,15 @@ s390_irgen_VNO(UChar v1, UChar v2, UChar v3)
    return "vno";
 }
 
+static const HChar *
+s390_irgen_VNX(UChar v1, UChar v2, UChar v3)
+{
+   put_vr_qw(v1, unop(Iop_NotV128,
+                      binop(Iop_XorV128, get_vr_qw(v2), get_vr_qw(v3))));
+
+   return "vnx";
+}
+
 static const HChar *
 s390_irgen_LZRF(UChar r1, IRTemp op2addr)
 {
@@ -17499,9 +17670,19 @@ s390_irgen_VCTZ(UChar v1, UChar v2, UChar m3)
 static const HChar *
 s390_irgen_VPOPCT(UChar v1, UChar v2, UChar m3)
 {
-   vassert(m3 == 0);
+   s390_insn_assert("vpopct", m3 <= 3);
+
+   IRExpr* cnt = unop(Iop_Cnt8x16, get_vr_qw(v2));
 
-   put_vr_qw(v1, unop(Iop_Cnt8x16, get_vr_qw(v2)));
+   if (m3 >= 1) {
+      cnt = unop(Iop_PwAddL8Ux16, cnt);
+      if (m3 >= 2) {
+         cnt = unop(Iop_PwAddL16Ux8, cnt);
+         if (m3 == 3)
+            cnt = unop(Iop_PwAddL32Ux4, cnt);
+      }
+   }
+   put_vr_qw(v1, cnt);
 
    return "vpopct";
 }
@@ -18335,12 +18516,53 @@ s390_irgen_VMALH(UChar v1, UChar v2, UChar v3, UChar v4, UChar m5)
    return "vmalh";
 }
 
+static const HChar *
+s390_irgen_VMSL(UChar v1, UChar v2, UChar v3, UChar v4, UChar m5, UChar m6)
+{
+   s390_insn_assert("vmsl", m5 == 3 && (m6 & 3) == 0);
+
+   IRDirty* d;
+   IRTemp cc = newTemp(Ity_I64);
+
+   s390x_vec_op_details_t details = { .serialized = 0ULL };
+   details.op = S390_VEC_OP_VMSL;
+   details.v1 = v1;
+   details.v2 = v2;
+   details.v3 = v3;
+   details.v4 = v4;
+   details.m4 = m5;
+   details.m5 = m6;
+
+   d = unsafeIRDirty_1_N(cc, 0, "s390x_dirtyhelper_vec_op",
+                         &s390x_dirtyhelper_vec_op,
+                         mkIRExprVec_2(IRExpr_GSPTR(),
+                                       mkU64(details.serialized)));
+
+   d->nFxState = 4;
+   vex_bzero(&d->fxState, sizeof(d->fxState));
+   d->fxState[0].fx     = Ifx_Read;
+   d->fxState[0].offset = S390X_GUEST_OFFSET(guest_v0) + v2 * sizeof(V128);
+   d->fxState[0].size   = sizeof(V128);
+   d->fxState[1].fx     = Ifx_Read;
+   d->fxState[1].offset = S390X_GUEST_OFFSET(guest_v0) + v3 * sizeof(V128);
+   d->fxState[1].size   = sizeof(V128);
+   d->fxState[2].fx     = Ifx_Read;
+   d->fxState[2].offset = S390X_GUEST_OFFSET(guest_v0) + v4 * sizeof(V128);
+   d->fxState[2].size   = sizeof(V128);
+   d->fxState[3].fx     = Ifx_Write;
+   d->fxState[3].offset = S390X_GUEST_OFFSET(guest_v0) + v1 * sizeof(V128);
+   d->fxState[3].size   = sizeof(V128);
+
+   stmt(IRStmt_Dirty(d));
+
+   return "vmsl";
+}
+
 static void
-s390_vector_fp_convert(IROp op, IRType fromType, IRType toType,
+s390_vector_fp_convert(IROp op, IRType fromType, IRType toType, Bool rounding,
                        UChar v1, UChar v2, UChar m3, UChar m4, UChar m5)
 {
    Bool isSingleElementOp = s390_vr_is_single_element_control_set(m4);
-   UChar maxIndex = isSingleElementOp ? 0 : 1;
 
    /* For Iop_F32toF64 we do this:
       f32[0] -> f64[0]
@@ -18353,14 +18575,21 @@ s390_vector_fp_convert(IROp op, IRType fromType, IRType toType,
       The magic below with scaling factors is used to achieve the logic
       described above.
    */
-   const UChar sourceIndexScaleFactor = (op == Iop_F32toF64) ? 2 : 1;
-   const UChar destinationIndexScaleFactor = (op == Iop_F64toF32) ? 2 : 1;
-
-   const Bool isUnary = (op == Iop_F32toF64);
-   for (UChar i = 0; i <= maxIndex; i++) {
+   Int size_diff = sizeofIRType(toType) - sizeofIRType(fromType);
+   const UChar sourceIndexScaleFactor = size_diff > 0 ? 2 : 1;
+   const UChar destinationIndexScaleFactor = size_diff < 0 ? 2 : 1;
+   UChar n_elem = (isSingleElementOp ? 1 :
+                   16 / (size_diff > 0 ?
+                         sizeofIRType(toType) : sizeofIRType(fromType)));
+
+   for (UChar i = 0; i < n_elem; i++) {
       IRExpr* argument = get_vr(v2, fromType, i * sourceIndexScaleFactor);
       IRExpr* result;
-      if (!isUnary) {
+      if (rounding) {
+         if (!s390_host_has_fpext && m5 != S390_BFP_ROUND_PER_FPC) {
+            emulation_warning(EmWarn_S390X_fpext_rounding);
+            m5 = S390_BFP_ROUND_PER_FPC;
+         }
          result = binop(op,
                         mkexpr(encode_bfp_rounding_mode(m5)),
                         argument);
@@ -18369,10 +18598,6 @@ s390_vector_fp_convert(IROp op, IRType fromType, IRType toType,
       }
       put_vr(v1, toType, i * destinationIndexScaleFactor, result);
    }
-
-   if (isSingleElementOp) {
-      put_vr_dw1(v1, mkU64(0));
-   }
 }
 
 static const HChar *
@@ -18380,12 +18605,8 @@ s390_irgen_VCDG(UChar v1, UChar v2, UChar m3, UChar m4, UChar m5)
 {
    s390_insn_assert("vcdg", m3 == 3);
 
-   if (!s390_host_has_fpext && m5 != S390_BFP_ROUND_PER_FPC) {
-      emulation_warning(EmWarn_S390X_fpext_rounding);
-      m5 = S390_BFP_ROUND_PER_FPC;
-   }
-
-   s390_vector_fp_convert(Iop_I64StoF64, Ity_I64, Ity_F64, v1, v2, m3, m4, m5);
+   s390_vector_fp_convert(Iop_I64StoF64, Ity_I64, Ity_F64, True,
+                          v1, v2, m3, m4, m5);
 
    return "vcdg";
 }
@@ -18395,12 +18616,8 @@ s390_irgen_VCDLG(UChar v1, UChar v2, UChar m3, UChar m4, UChar m5)
 {
    s390_insn_assert("vcdlg", m3 == 3);
 
-   if (!s390_host_has_fpext && m5 != S390_BFP_ROUND_PER_FPC) {
-      emulation_warning(EmWarn_S390X_fpext_rounding);
-      m5 = S390_BFP_ROUND_PER_FPC;
-   }
-
-   s390_vector_fp_convert(Iop_I64UtoF64, Ity_I64, Ity_F64, v1, v2, m3, m4, m5);
+   s390_vector_fp_convert(Iop_I64UtoF64, Ity_I64, Ity_F64, True,
+                          v1, v2, m3, m4, m5);
 
    return "vcdlg";
 }
@@ -18410,12 +18627,8 @@ s390_irgen_VCGD(UChar v1, UChar v2, UChar m3, UChar m4, UChar m5)
 {
    s390_insn_assert("vcgd", m3 == 3);
 
-   if (!s390_host_has_fpext && m5 != S390_BFP_ROUND_PER_FPC) {
-      emulation_warning(EmWarn_S390X_fpext_rounding);
-      m5 = S390_BFP_ROUND_PER_FPC;
-   }
-
-   s390_vector_fp_convert(Iop_F64toI64S, Ity_F64, Ity_I64, v1, v2, m3, m4, m5);
+   s390_vector_fp_convert(Iop_F64toI64S, Ity_F64, Ity_I64, True,
+                          v1, v2, m3, m4, m5);
 
    return "vcgd";
 }
@@ -18425,12 +18638,8 @@ s390_irgen_VCLGD(UChar v1, UChar v2, UChar m3, UChar m4, UChar m5)
 {
    s390_insn_assert("vclgd", m3 == 3);
 
-   if (!s390_host_has_fpext && m5 != S390_BFP_ROUND_PER_FPC) {
-      emulation_warning(EmWarn_S390X_fpext_rounding);
-      m5 = S390_BFP_ROUND_PER_FPC;
-   }
-
-   s390_vector_fp_convert(Iop_F64toI64U, Ity_F64, Ity_I64, v1, v2, m3, m4, m5);
+   s390_vector_fp_convert(Iop_F64toI64U, Ity_F64, Ity_I64, True,
+                          v1, v2, m3, m4, m5);
 
    return "vclgd";
 }
@@ -18438,246 +18647,262 @@ s390_irgen_VCLGD(UChar v1, UChar v2, UChar m3, UChar m4, UChar m5)
 static const HChar *
 s390_irgen_VFI(UChar v1, UChar v2, UChar m3, UChar m4, UChar m5)
 {
-   s390_insn_assert("vfi", m3 == 3);
+   s390_insn_assert("vfi",
+                    (m3 == 3 || (s390_host_has_vxe && m3 >= 2 && m3 <= 4)));
 
-   if (!s390_host_has_fpext && m5 != S390_BFP_ROUND_PER_FPC) {
-      emulation_warning(EmWarn_S390X_fpext_rounding);
-      m5 = S390_BFP_ROUND_PER_FPC;
+   switch (m3) {
+   case 2: s390_vector_fp_convert(Iop_RoundF32toInt, Ity_F32, Ity_F32, True,
+                                  v1, v2, m3, m4, m5); break;
+   case 3: s390_vector_fp_convert(Iop_RoundF64toInt, Ity_F64, Ity_F64, True,
+                                  v1, v2, m3, m4, m5); break;
+   case 4: s390_vector_fp_convert(Iop_RoundF128toInt, Ity_F128, Ity_F128, True,
+                                  v1, v2, m3, m4, m5); break;
    }
 
-   s390_vector_fp_convert(Iop_RoundF64toInt, Ity_F64, Ity_F64,
-                          v1, v2, m3, m4, m5);
-
-   return "vcgld";
+   return "vfi";
 }
 
 static const HChar *
-s390_irgen_VLDE(UChar v1, UChar v2, UChar m3, UChar m4, UChar m5)
+s390_irgen_VFLL(UChar v1, UChar v2, UChar m3, UChar m4, UChar m5)
 {
-   s390_insn_assert("vlde", m3 == 2);
+   s390_insn_assert("vfll", m3 == 2 || (s390_host_has_vxe && m3 == 3));
 
-   s390_vector_fp_convert(Iop_F32toF64, Ity_F32, Ity_F64, v1, v2, m3, m4, m5);
+   if (m3 == 2)
+      s390_vector_fp_convert(Iop_F32toF64, Ity_F32, Ity_F64, False,
+                             v1, v2, m3, m4, m5);
+   else
+      s390_vector_fp_convert(Iop_F64toF128, Ity_F64, Ity_F128, False,
+                             v1, v2, m3, m4, m5);
 
-   return "vlde";
+   return "vfll";
 }
 
 static const HChar *
-s390_irgen_VLED(UChar v1, UChar v2, UChar m3, UChar m4, UChar m5)
+s390_irgen_VFLR(UChar v1, UChar v2, UChar m3, UChar m4, UChar m5)
 {
-   s390_insn_assert("vled", m3 == 3);
-
-   if (!s390_host_has_fpext && m5 != S390_BFP_ROUND_PER_FPC) {
-      m5 = S390_BFP_ROUND_PER_FPC;
-   }
+   s390_insn_assert("vflr", m3 == 3 || (s390_host_has_vxe && m3 == 2));
 
-   s390_vector_fp_convert(Iop_F64toF32, Ity_F64, Ity_F32, v1, v2, m3, m4, m5);
+   if (m3 == 3)
+      s390_vector_fp_convert(Iop_F64toF32, Ity_F64, Ity_F32, True,
+                             v1, v2, m3, m4, m5);
+   else
+      s390_vector_fp_convert(Iop_F128toF64, Ity_F128, Ity_F64, True,
+                             v1, v2, m3, m4, m5);
 
-   return "vled";
+   return "vflr";
 }
 
 static const HChar *
 s390_irgen_VFPSO(UChar v1, UChar v2, UChar m3, UChar m4, UChar m5)
 {
-   s390_insn_assert("vfpso", m3 == 3);
-
-   IRExpr* result;
-   switch (m5) {
-   case 0: {
-      /* Invert sign */
-      if (!s390_vr_is_single_element_control_set(m4)) {
-         result = unop(Iop_Neg64Fx2, get_vr_qw(v2));
-      }
-      else {
-         result = binop(Iop_64HLtoV128,
-                        unop(Iop_ReinterpF64asI64,
-                             unop(Iop_NegF64, get_vr(v2, Ity_F64, 0))),
-                        mkU64(0));
-      }
-      break;
-   }
+   s390_insn_assert("vfpso", m5 <= 2 &&
+                    (m3 == 3 || (s390_host_has_vxe && m3 >= 2 && m3 <= 4)));
 
-   case 1: {
-      /* Set sign to negative */
-      IRExpr* highHalf = mkU64(0x8000000000000000ULL);
-      if (!s390_vr_is_single_element_control_set(m4)) {
-         IRExpr* lowHalf = highHalf;
-         IRExpr* mask = binop(Iop_64HLtoV128, highHalf, lowHalf);
-         result = binop(Iop_OrV128, get_vr_qw(v2), mask);
-      }
-      else {
-         result = binop(Iop_64HLtoV128,
-                        binop(Iop_Or64, get_vr_dw0(v2), highHalf),
-                        mkU64(0ULL));
-      }
+   Bool single = s390_vr_is_single_element_control_set(m4) || m3 == 4;
+   IRType type = single ? s390_vr_get_ftype(m3) : Ity_V128;
+   int idx = 2 * (m3 - 2) + (single ? 0 : 1);
 
-      break;
-   }
-
-   case 2: {
-      /* Set sign to positive */
-      if (!s390_vr_is_single_element_control_set(m4)) {
-         result = unop(Iop_Abs64Fx2, get_vr_qw(v2));
-      }
-      else {
-         result = binop(Iop_64HLtoV128,
-                        unop(Iop_ReinterpF64asI64,
-                             unop(Iop_AbsF64, get_vr(v2, Ity_F64, 0))),
-                        mkU64(0));
-      }
-
-      break;
-   }
-
-   default:
-      vpanic("s390_irgen_VFPSO: Invalid m5 value");
-   }
+   static const IROp negate_ops[] = {
+      Iop_NegF32, Iop_Neg32Fx4,
+      Iop_NegF64, Iop_Neg64Fx2,
+      Iop_NegF128
+   };
+   static const IROp abs_ops[] = {
+      Iop_AbsF32, Iop_Abs32Fx4,
+      Iop_AbsF64, Iop_Abs64Fx2,
+      Iop_AbsF128
+   };
 
-   put_vr_qw(v1, result);
-   if (s390_vr_is_single_element_control_set(m4)) {
-      put_vr_dw1(v1, mkU64(0ULL));
+   if (m5 == 1) {
+      /* Set sign to negative */
+      put_vr(v1, type, 0,
+             unop(negate_ops[idx],
+                  unop(abs_ops[idx], get_vr(v2, type, 0))));
+   } else {
+      /* m5 == 0: invert sign; m5 == 2: set sign to positive */
+      const IROp *ops = m5 == 2 ? abs_ops : negate_ops;
+      put_vr(v1, type, 0, unop(ops[idx], get_vr(v2, type, 0)));
    }
 
    return "vfpso";
 }
 
-static void s390x_vec_fp_binary_op(IROp generalOp, IROp singleElementOp,
-                                   UChar v1, UChar v2, UChar v3, UChar m4,
-                                   UChar m5)
+static const HChar *
+s390x_vec_fp_binary_op(const HChar* mnm, const IROp ops[],
+                       UChar v1, UChar v2, UChar v3,
+                       UChar m4, UChar m5)
 {
-   IRExpr* result;
-   if (!s390_vr_is_single_element_control_set(m5)) {
-      result = triop(generalOp, get_bfp_rounding_mode_from_fpc(),
-                     get_vr_qw(v2), get_vr_qw(v3));
+   s390_insn_assert(mnm, (m5 & 7) == 0 &&
+                    (m4 == 3 || (s390_host_has_vxe && m4 >= 2 && m4 <= 4)));
+
+   int idx = 2 * (m4 - 2);
+
+   if (m4 == 4 || s390_vr_is_single_element_control_set(m5)) {
+      IRType type = s390_vr_get_ftype(m4);
+      put_vr(v1, type, 0,
+             triop(ops[idx], get_bfp_rounding_mode_from_fpc(),
+                   get_vr(v2, type, 0), get_vr(v3, type, 0)));
    } else {
-      IRExpr* highHalf = triop(singleElementOp,
-                               get_bfp_rounding_mode_from_fpc(),
-                               get_vr(v2, Ity_F64, 0),
-                               get_vr(v3, Ity_F64, 0));
-      result = binop(Iop_64HLtoV128, unop(Iop_ReinterpF64asI64, highHalf),
-                     mkU64(0ULL));
+      put_vr_qw(v1, triop(ops[idx + 1], get_bfp_rounding_mode_from_fpc(),
+                          get_vr_qw(v2), get_vr_qw(v3)));
    }
 
-   put_vr_qw(v1, result);
+   return mnm;
 }
 
-static void s390x_vec_fp_unary_op(IROp generalOp, IROp singleElementOp,
-                                  UChar v1, UChar v2, UChar m3, UChar m4)
+static const HChar *
+s390x_vec_fp_unary_op(const HChar* mnm, const IROp ops[],
+                      UChar v1, UChar v2, UChar m3, UChar m4)
 {
-   IRExpr* result;
-   if (!s390_vr_is_single_element_control_set(m4)) {
-      result = binop(generalOp, get_bfp_rounding_mode_from_fpc(),
-                     get_vr_qw(v2));
+   s390_insn_assert(mnm, (m4 & 7) == 0 &&
+                    (m3 == 3 || (s390_host_has_vxe && m3 >= 2 && m3 <= 4)));
+
+   int idx = 2 * (m3 - 2);
+
+   if (m3 == 4 || s390_vr_is_single_element_control_set(m4)) {
+      IRType type = s390_vr_get_ftype(m3);
+      put_vr(v1, type, 0,
+             binop(ops[idx], get_bfp_rounding_mode_from_fpc(),
+                   get_vr(v2, type, 0)));
    }
    else {
-      IRExpr* highHalf = binop(singleElementOp,
-                               get_bfp_rounding_mode_from_fpc(),
-                               get_vr(v2, Ity_F64, 0));
-      result = binop(Iop_64HLtoV128, unop(Iop_ReinterpF64asI64, highHalf),
-                     mkU64(0ULL));
+      put_vr_qw(v1, binop(ops[idx + 1], get_bfp_rounding_mode_from_fpc(),
+                          get_vr_qw(v2)));
    }
 
-   put_vr_qw(v1, result);
+   return mnm;
 }
 
 
-static void
-s390_vector_fp_mulAddOrSub(IROp singleElementOp,
-                           UChar v1, UChar v2, UChar v3, UChar v4,
-                           UChar m5, UChar m6)
+static const HChar *
+s390_vector_fp_mulAddOrSub(UChar v1, UChar v2, UChar v3, UChar v4,
+                           UChar m5, UChar m6,
+                           const HChar* mnm, const IROp single_ops[],
+                           Bool negate)
 {
-   Bool isSingleElementOp = s390_vr_is_single_element_control_set(m5);
+   s390_insn_assert(mnm, m6 == 3 || (s390_host_has_vxe && m6 >= 2 && m6 <= 4));
+
+   static const IROp negate_ops[] = { Iop_NegF32, Iop_NegF64, Iop_NegF128 };
+   IRType type = s390_vr_get_ftype(m6);
+   Bool single = s390_vr_is_single_element_control_set(m5) || m6 == 4;
+   UChar n_elem = single ? 1 : s390_vr_get_n_elem(m6);
    IRTemp irrm_temp = newTemp(Ity_I32);
    assign(irrm_temp, get_bfp_rounding_mode_from_fpc());
    IRExpr* irrm = mkexpr(irrm_temp);
-   IRExpr* result;
-   IRExpr* highHalf = qop(singleElementOp,
-                          irrm,
-                          get_vr(v2, Ity_F64, 0),
-                          get_vr(v3, Ity_F64, 0),
-                          get_vr(v4, Ity_F64, 0));
-
-   if (isSingleElementOp) {
-      result = binop(Iop_64HLtoV128, unop(Iop_ReinterpF64asI64, highHalf),
-                     mkU64(0ULL));
-   } else {
-      IRExpr* lowHalf = qop(singleElementOp,
-                            irrm,
-                            get_vr(v2, Ity_F64, 1),
-                            get_vr(v3, Ity_F64, 1),
-                            get_vr(v4, Ity_F64, 1));
-      result = binop(Iop_64HLtoV128, unop(Iop_ReinterpF64asI64, highHalf),
-                     unop(Iop_ReinterpF64asI64, lowHalf));
-   }
 
-   put_vr_qw(v1, result);
+   for (UChar idx = 0; idx < n_elem; idx++) {
+      IRExpr* result = qop(single_ops[m6 - 2],
+                           irrm,
+                           get_vr(v2, type, idx),
+                           get_vr(v3, type, idx),
+                           get_vr(v4, type, idx));
+      put_vr(v1, type, idx, negate ? unop(negate_ops[m6 - 2], result) : result);
+   }
+   return mnm;
 }
 
 static const HChar *
 s390_irgen_VFA(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5)
 {
-   s390_insn_assert("vfa", m4 == 3);
-   s390x_vec_fp_binary_op(Iop_Add64Fx2, Iop_AddF64, v1, v2, v3, m4, m5);
-   return "vfa";
+   static const IROp vfa_ops[] = {
+      Iop_AddF32, Iop_Add32Fx4,
+      Iop_AddF64, Iop_Add64Fx2,
+      Iop_AddF128,
+   };
+   return s390x_vec_fp_binary_op("vfa", vfa_ops, v1, v2, v3, m4, m5);
 }
 
 static const HChar *
 s390_irgen_VFS(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5)
 {
-   s390_insn_assert("vfs", m4 == 3);
-   s390x_vec_fp_binary_op(Iop_Sub64Fx2, Iop_SubF64, v1, v2, v3, m4, m5);
-   return "vfs";
+   static const IROp vfs_ops[] = {
+      Iop_SubF32, Iop_Sub32Fx4,
+      Iop_SubF64, Iop_Sub64Fx2,
+      Iop_SubF128,
+   };
+   return s390x_vec_fp_binary_op("vfs", vfs_ops, v1, v2, v3, m4, m5);
 }
 
 static const HChar *
 s390_irgen_VFM(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5)
 {
-   s390_insn_assert("vfm", m4 == 3);
-   s390x_vec_fp_binary_op(Iop_Mul64Fx2, Iop_MulF64, v1, v2, v3, m4, m5);
-   return "vfm";
+   static const IROp vfm_ops[] = {
+      Iop_MulF32, Iop_Mul32Fx4,
+      Iop_MulF64, Iop_Mul64Fx2,
+      Iop_MulF128,
+   };
+   return s390x_vec_fp_binary_op("vfm", vfm_ops, v1, v2, v3, m4, m5);
 }
 
 static const HChar *
 s390_irgen_VFD(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5)
 {
-   s390_insn_assert("vfd", m4 == 3);
-   s390x_vec_fp_binary_op(Iop_Div64Fx2, Iop_DivF64, v1, v2, v3, m4, m5);
-   return "vfd";
+   static const IROp vfd_ops[] = {
+      Iop_DivF32, Iop_Div32Fx4,
+      Iop_DivF64, Iop_Div64Fx2,
+      Iop_DivF128,
+   };
+   return s390x_vec_fp_binary_op("vfd", vfd_ops, v1, v2, v3, m4, m5);
 }
 
 static const HChar *
 s390_irgen_VFSQ(UChar v1, UChar v2, UChar m3, UChar m4)
 {
-   s390_insn_assert("vfsq", m3 == 3);
-   s390x_vec_fp_unary_op(Iop_Sqrt64Fx2, Iop_SqrtF64, v1, v2, m3, m4);
-
-   return "vfsq";
+   static const IROp vfsq_ops[] = {
+      Iop_SqrtF32, Iop_Sqrt32Fx4,
+      Iop_SqrtF64, Iop_Sqrt64Fx2,
+      Iop_SqrtF128
+   };
+   return s390x_vec_fp_unary_op("vfsq", vfsq_ops, v1, v2, m3, m4);
 }
 
+static const IROp FMA_single_ops[] = {
+   Iop_MAddF32, Iop_MAddF64, Iop_MAddF128
+};
+
 static const HChar *
 s390_irgen_VFMA(UChar v1, UChar v2, UChar v3, UChar v4, UChar m5, UChar m6)
 {
-   s390_insn_assert("vfma", m6 == 3);
-   s390_vector_fp_mulAddOrSub(Iop_MAddF64, v1, v2, v3, v4, m5, m6);
-   return "vfma";
+   return s390_vector_fp_mulAddOrSub(v1, v2, v3, v4, m5, m6,
+                                     "vfma", FMA_single_ops, False);
 }
 
+static const HChar *
+s390_irgen_VFNMA(UChar v1, UChar v2, UChar v3, UChar v4, UChar m5, UChar m6)
+{
+   return s390_vector_fp_mulAddOrSub(v1, v2, v3, v4, m5, m6,
+                                     "vfnma", FMA_single_ops, True);
+}
+
+static const IROp FMS_single_ops[] = {
+   Iop_MSubF32, Iop_MSubF64, Iop_MSubF128
+};
+
 static const HChar *
 s390_irgen_VFMS(UChar v1, UChar v2, UChar v3, UChar v4, UChar m5, UChar m6)
 {
-   s390_insn_assert("vfms", m6 == 3);
-   s390_vector_fp_mulAddOrSub(Iop_MSubF64, v1, v2, v3, v4, m5, m6);
-   return "vfms";
+   return s390_vector_fp_mulAddOrSub(v1, v2, v3, v4, m5, m6,
+                                     "vfms", FMS_single_ops, False);
+}
+
+static const HChar *
+s390_irgen_VFNMS(UChar v1, UChar v2, UChar v3, UChar v4, UChar m5, UChar m6)
+{
+   return s390_vector_fp_mulAddOrSub(v1, v2, v3, v4, m5, m6,
+                                     "vfnms", FMS_single_ops, True);
 }
 
 static const HChar *
 s390_irgen_WFC(UChar v1, UChar v2, UChar m3, UChar m4)
 {
-   s390_insn_assert("wfc", m3 == 3);
-   s390_insn_assert("wfc", m4 == 0);
+   s390_insn_assert("wfc", m4 == 0 &&
+                    (m3 == 3 || (s390_host_has_vxe && m3 >= 2 && m3 <= 4)));
+
+   static const IROp ops[] = { Iop_CmpF32, Iop_CmpF64, Iop_CmpF128 };
+   IRType type = s390_vr_get_ftype(m3);
 
    IRTemp cc_vex = newTemp(Ity_I32);
-   assign(cc_vex, binop(Iop_CmpF64,
-                        get_vr(v1, Ity_F64, 0), get_vr(v2, Ity_F64, 0)));
+   assign(cc_vex, binop(ops[m3 - 2], get_vr(v1, type, 0), get_vr(v2, type, 0)));
 
    IRTemp cc_s390 = newTemp(Ity_I32);
    assign(cc_s390, convert_vex_bfpcc_to_s390(cc_vex));
@@ -18695,213 +18920,253 @@ s390_irgen_WFK(UChar v1, UChar v2, UChar m3, UChar m4)
 }
 
 static const HChar *
-s390_irgen_VFCE(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5, UChar m6)
+s390_irgen_VFCx(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5, UChar m6,
+                const HChar *mnem, IRCmpFResult cmp, Bool equal_ok,
+                IROp cmp32, IROp cmp64)
 {
-   s390_insn_assert("vfce", m4 == 3);
+   s390_insn_assert(mnem, (m5 & 3) == 0 && (m6 & 14) == 0 &&
+                    (m4 == 3 || (s390_host_has_vxe && m4 >= 2 && m4 <= 4)));
 
-   Bool isSingleElementOp = s390_vr_is_single_element_control_set(m5);
-   if (!s390_vr_is_cs_set(m6)) {
-      if (!isSingleElementOp) {
-         put_vr_qw(v1, binop(Iop_CmpEQ64Fx2, get_vr_qw(v2), get_vr_qw(v3)));
+   Bool single = s390_vr_is_single_element_control_set(m5) || m4 == 4;
+
+   if (single) {
+      static const IROp ops[] = { Iop_CmpF32, Iop_CmpF64, Iop_CmpF128 };
+      IRType type = s390_vr_get_ftype(m4);
+      IRTemp result = newTemp(Ity_I32);
+      IRTemp cond = newTemp(Ity_I1);
+
+      assign(result, binop(ops[m4 - 2],
+                           get_vr(v2, type, 0), get_vr(v3, type, 0)));
+      if (equal_ok) {
+         assign(cond,
+                binop(Iop_Or1,
+                      binop(Iop_CmpEQ32, mkexpr(result), mkU32(cmp)),
+                      binop(Iop_CmpEQ32, mkexpr(result), mkU32(Ircr_EQ))));
       } else {
-         IRExpr* comparisonResult = binop(Iop_CmpF64, get_vr(v2, Ity_F64, 0),
-                                          get_vr(v3, Ity_F64, 0));
-         IRExpr* result = mkite(binop(Iop_CmpEQ32, comparisonResult,
-                                      mkU32(Ircr_EQ)),
-                                mkU64(0xffffffffffffffffULL),
-                                mkU64(0ULL));
-         put_vr_qw(v1, binop(Iop_64HLtoV128, result, mkU64(0ULL)));
+         assign(cond, binop(Iop_CmpEQ32, mkexpr(result), mkU32(cmp)));
+      }
+      put_vr_qw(v1, mkite(mkexpr(cond),
+                          IRExpr_Const(IRConst_V128(0xffff)),
+                          IRExpr_Const(IRConst_V128(0))));
+      if (s390_vr_is_cs_set(m6)) {
+         IRTemp cc = newTemp(Ity_I64);
+         assign(cc, mkite(mkexpr(cond), mkU64(0), mkU64(3)));
+         s390_cc_set(cc);
       }
    } else {
-      IRDirty* d;
-      IRTemp cc = newTemp(Ity_I64);
-
-      s390x_vec_op_details_t details = { .serialized = 0ULL };
-      details.op = S390_VEC_OP_VFCE;
-      details.v1 = v1;
-      details.v2 = v2;
-      details.v3 = v3;
-      details.m4 = m4;
-      details.m5 = m5;
-      details.m6 = m6;
+      IRTemp result = newTemp(Ity_V128);
+
+      assign(result, binop(m4 == 2 ? cmp32 : cmp64,
+                           get_vr_qw(v2), get_vr_qw(v3)));
+      put_vr_qw(v1, mkexpr(result));
+      if (s390_vr_is_cs_set(m6)) {
+         IRTemp cc = newTemp(Ity_I64);
+         assign(cc,
+                mkite(binop(Iop_CmpEQ64,
+                            binop(Iop_And64,
+                                  unop(Iop_V128to64, mkexpr(result)),
+                                  unop(Iop_V128HIto64, mkexpr(result))),
+                            mkU64(-1ULL)),
+                      mkU64(0), /* all comparison results are true */
+                      mkite(binop(Iop_CmpEQ64,
+                                  binop(Iop_Or64,
+                                        unop(Iop_V128to64, mkexpr(result)),
+                                        unop(Iop_V128HIto64, mkexpr(result))),
+                                  mkU64(0)),
+                            mkU64(3), /* all false */
+                            mkU64(1)))); /* mixed true/false */
+         s390_cc_set(cc);
+      }
+   }
 
-      d = unsafeIRDirty_1_N(cc, 0, "s390x_dirtyhelper_vec_op",
-                            &s390x_dirtyhelper_vec_op,
-                            mkIRExprVec_2(IRExpr_GSPTR(),
-                                          mkU64(details.serialized)));
+   return mnem;
+}
 
-      const UChar elementSize = isSingleElementOp ? sizeof(ULong) : sizeof(V128);
-      d->nFxState = 3;
-      vex_bzero(&d->fxState, sizeof(d->fxState));
-      d->fxState[0].fx = Ifx_Read;
-      d->fxState[0].offset = S390X_GUEST_OFFSET(guest_v0) + v2 * sizeof(V128);
-      d->fxState[0].size = elementSize;
-      d->fxState[1].fx = Ifx_Read;
-      d->fxState[1].offset = S390X_GUEST_OFFSET(guest_v0) + v3 * sizeof(V128);
-      d->fxState[1].size = elementSize;
-      d->fxState[2].fx = Ifx_Write;
-      d->fxState[2].offset = S390X_GUEST_OFFSET(guest_v0) + v1 * sizeof(V128);
-      d->fxState[2].size = sizeof(V128);
+static const HChar *
+s390_irgen_VFCE(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5, UChar m6)
+{
+   return s390_irgen_VFCx(v1, v2, v3, m4, m5, m6, "vfce", Ircr_EQ,
+                          False, Iop_CmpEQ32Fx4, Iop_CmpEQ64Fx2);
+}
 
-      stmt(IRStmt_Dirty(d));
-      s390_cc_set(cc);
-   }
+static const HChar *
+s390_irgen_VFCH(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5, UChar m6)
+{
+   /* Swap arguments and compare "low" instead. */
+   return s390_irgen_VFCx(v1, v3, v2, m4, m5, m6, "vfch", Ircr_LT,
+                          False, Iop_CmpLT32Fx4, Iop_CmpLT64Fx2);
+}
 
-   return "vfce";
+static const HChar *
+s390_irgen_VFCHE(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5, UChar m6)
+{
+   /* Swap arguments and compare "low or equal" instead. */
+   return s390_irgen_VFCx(v1, v3, v2, m4, m5, m6, "vfche", Ircr_LT,
+                          True, Iop_CmpLE32Fx4, Iop_CmpLE64Fx2);
 }
 
 static const HChar *
-s390_irgen_VFCH(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5, UChar m6)
+s390_irgen_VFTCI(UChar v1, UChar v2, UShort i3, UChar m4, UChar m5)
 {
-   vassert(m4 == 3);
+   s390_insn_assert("vftci",
+                    (m4 == 3 || (s390_host_has_vxe && m4 >= 2 && m4 <= 4)));
 
    Bool isSingleElementOp = s390_vr_is_single_element_control_set(m5);
-   if (!s390_vr_is_cs_set(m6)) {
-      if (!isSingleElementOp) {
-         put_vr_qw(v1, binop(Iop_CmpLE64Fx2, get_vr_qw(v3), get_vr_qw(v2)));
-      } else {
-         IRExpr* comparisonResult = binop(Iop_CmpF64, get_vr(v2, Ity_F64, 0),
-                                          get_vr(v3, Ity_F64, 0));
-         IRExpr* result = mkite(binop(Iop_CmpEQ32, comparisonResult,
-                                      mkU32(Ircr_GT)),
-                                mkU64(0xffffffffffffffffULL),
-                                mkU64(0ULL));
-         put_vr_qw(v1, binop(Iop_64HLtoV128, result, mkU64(0ULL)));
-      }
-   }
-   else {
-      IRDirty* d;
-      IRTemp cc = newTemp(Ity_I64);
 
-      s390x_vec_op_details_t details = { .serialized = 0ULL };
-      details.op = S390_VEC_OP_VFCH;
-      details.v1 = v1;
-      details.v2 = v2;
-      details.v3 = v3;
-      details.m4 = m4;
-      details.m5 = m5;
-      details.m6 = m6;
+   IRDirty* d;
+   IRTemp cc = newTemp(Ity_I64);
 
-      d = unsafeIRDirty_1_N(cc, 0, "s390x_dirtyhelper_vec_op",
-                            &s390x_dirtyhelper_vec_op,
-                            mkIRExprVec_2(IRExpr_GSPTR(),
-                                          mkU64(details.serialized)));
+   s390x_vec_op_details_t details = { .serialized = 0ULL };
+   details.op = S390_VEC_OP_VFTCI;
+   details.v1 = v1;
+   details.v2 = v2;
+   details.i3 = i3;
+   details.m4 = m4;
+   details.m5 = m5;
 
-      const UChar elementSize = isSingleElementOp ? sizeof(ULong) : sizeof(V128);
-      d->nFxState = 3;
-      vex_bzero(&d->fxState, sizeof(d->fxState));
-      d->fxState[0].fx = Ifx_Read;
-      d->fxState[0].offset = S390X_GUEST_OFFSET(guest_v0) + v2 * sizeof(V128);
-      d->fxState[0].size = elementSize;
-      d->fxState[1].fx = Ifx_Read;
-      d->fxState[1].offset = S390X_GUEST_OFFSET(guest_v0) + v3 * sizeof(V128);
-      d->fxState[1].size = elementSize;
-      d->fxState[2].fx = Ifx_Write;
-      d->fxState[2].offset = S390X_GUEST_OFFSET(guest_v0) + v1 * sizeof(V128);
-      d->fxState[2].size = sizeof(V128);
+   d = unsafeIRDirty_1_N(cc, 0, "s390x_dirtyhelper_vec_op",
+                         &s390x_dirtyhelper_vec_op,
+                         mkIRExprVec_2(IRExpr_GSPTR(),
+                                       mkU64(details.serialized)));
 
-      stmt(IRStmt_Dirty(d));
-      s390_cc_set(cc);
-   }
+   const UChar elementSize = isSingleElementOp ?
+      sizeofIRType(s390_vr_get_ftype(m4)) : sizeof(V128);
+   d->nFxState = 2;
+   vex_bzero(&d->fxState, sizeof(d->fxState));
+   d->fxState[0].fx = Ifx_Read;
+   d->fxState[0].offset = S390X_GUEST_OFFSET(guest_v0) + v2 * sizeof(V128);
+   d->fxState[0].size = elementSize;
+   d->fxState[1].fx = Ifx_Write;
+   d->fxState[1].offset = S390X_GUEST_OFFSET(guest_v0) + v1 * sizeof(V128);
+   d->fxState[1].size = sizeof(V128);
+
+   stmt(IRStmt_Dirty(d));
+   s390_cc_set(cc);
 
-   return "vfch";
+   return "vftci";
 }
 
 static const HChar *
-s390_irgen_VFCHE(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5, UChar m6)
+s390_irgen_VFMIN(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5, UChar m6)
 {
-   s390_insn_assert("vfche", m4 == 3);
+   s390_insn_assert("vfmin",
+                    (m4 == 3 || (s390_host_has_vxe && m4 >= 2 && m4 <= 4)));
 
    Bool isSingleElementOp = s390_vr_is_single_element_control_set(m5);
-   if (!s390_vr_is_cs_set(m6)) {
-      if (!isSingleElementOp) {
-         put_vr_qw(v1, binop(Iop_CmpLT64Fx2, get_vr_qw(v3), get_vr_qw(v2)));
-      }
-      else {
-         IRExpr* comparisonResult = binop(Iop_CmpF64, get_vr(v3, Ity_F64, 0),
-                                          get_vr(v2, Ity_F64, 0));
-         IRExpr* result = mkite(binop(Iop_CmpEQ32, comparisonResult,
-                                      mkU32(Ircr_LT)),
-                                mkU64(0xffffffffffffffffULL),
-                                mkU64(0ULL));
-         put_vr_qw(v1, binop(Iop_64HLtoV128, result, mkU64(0ULL)));
-      }
-   }
-   else {
-      IRDirty* d;
-      IRTemp cc = newTemp(Ity_I64);
-
-      s390x_vec_op_details_t details = { .serialized = 0ULL };
-      details.op = S390_VEC_OP_VFCHE;
-      details.v1 = v1;
-      details.v2 = v2;
-      details.v3 = v3;
-      details.m4 = m4;
-      details.m5 = m5;
-      details.m6 = m6;
+   IRDirty* d;
+   IRTemp cc = newTemp(Ity_I64);
 
-      d = unsafeIRDirty_1_N(cc, 0, "s390x_dirtyhelper_vec_op",
-                            &s390x_dirtyhelper_vec_op,
-                            mkIRExprVec_2(IRExpr_GSPTR(),
-                                          mkU64(details.serialized)));
+   s390x_vec_op_details_t details = { .serialized = 0ULL };
+   details.op = S390_VEC_OP_VFMIN;
+   details.v1 = v1;
+   details.v2 = v2;
+   details.v3 = v3;
+   details.m4 = m4;
+   details.m5 = m5;
+   details.m6 = m6;
 
-      const UChar elementSize = isSingleElementOp ? sizeof(ULong) : sizeof(V128);
-      d->nFxState = 3;
-      vex_bzero(&d->fxState, sizeof(d->fxState));
-      d->fxState[0].fx = Ifx_Read;
-      d->fxState[0].offset = S390X_GUEST_OFFSET(guest_v0) + v2 * sizeof(V128);
-      d->fxState[0].size = elementSize;
-      d->fxState[1].fx = Ifx_Read;
-      d->fxState[1].offset = S390X_GUEST_OFFSET(guest_v0) + v3 * sizeof(V128);
-      d->fxState[1].size = elementSize;
-      d->fxState[2].fx = Ifx_Write;
-      d->fxState[2].offset = S390X_GUEST_OFFSET(guest_v0) + v1 * sizeof(V128);
-      d->fxState[2].size = sizeof(V128);
+   d = unsafeIRDirty_1_N(cc, 0, "s390x_dirtyhelper_vec_op",
+                         &s390x_dirtyhelper_vec_op,
+                         mkIRExprVec_2(IRExpr_GSPTR(),
+                                       mkU64(details.serialized)));
 
-      stmt(IRStmt_Dirty(d));
-      s390_cc_set(cc);
-   }
+   const UChar elementSize = isSingleElementOp ?
+      sizeofIRType(s390_vr_get_ftype(m4)) : sizeof(V128);
+   d->nFxState = 3;
+   vex_bzero(&d->fxState, sizeof(d->fxState));
+   d->fxState[0].fx = Ifx_Read;
+   d->fxState[0].offset = S390X_GUEST_OFFSET(guest_v0) + v2 * sizeof(V128);
+   d->fxState[0].size = elementSize;
+   d->fxState[1].fx = Ifx_Read;
+   d->fxState[1].offset = S390X_GUEST_OFFSET(guest_v0) + v3 * sizeof(V128);
+   d->fxState[1].size = elementSize;
+   d->fxState[2].fx = Ifx_Write;
+   d->fxState[2].offset = S390X_GUEST_OFFSET(guest_v0) + v1 * sizeof(V128);
+   d->fxState[2].size = sizeof(V128);
 
-   return "vfche";
+   stmt(IRStmt_Dirty(d));
+   s390_cc_set(cc);
+   return "vfmin";
 }
 
 static const HChar *
-s390_irgen_VFTCI(UChar v1, UChar v2, UShort i3, UChar m4, UChar m5)
+s390_irgen_VFMAX(UChar v1, UChar v2, UChar v3, UChar m4, UChar m5, UChar m6)
 {
-   s390_insn_assert("vftci", m4 == 3);
+   s390_insn_assert("vfmax",
+                    (m4 == 3 || (s390_host_has_vxe && m4 >= 2 && m4 <= 4)));
 
    Bool isSingleElementOp = s390_vr_is_single_element_control_set(m5);
-
    IRDirty* d;
    IRTemp cc = newTemp(Ity_I64);
 
    s390x_vec_op_details_t details = { .serialized = 0ULL };
-   details.op = S390_VEC_OP_VFTCI;
+   details.op = S390_VEC_OP_VFMAX;
    details.v1 = v1;
    details.v2 = v2;
-   details.i3 = i3;
+   details.v3 = v3;
    details.m4 = m4;
    details.m5 = m5;
+   details.m6 = m6;
 
    d = unsafeIRDirty_1_N(cc, 0, "s390x_dirtyhelper_vec_op",
                          &s390x_dirtyhelper_vec_op,
                          mkIRExprVec_2(IRExpr_GSPTR(),
                                        mkU64(details.serialized)));
 
-   const UChar elementSize = isSingleElementOp ? sizeof(ULong) : sizeof(V128);
-   d->nFxState = 2;
+   const UChar elementSize = isSingleElementOp ?
+      sizeofIRType(s390_vr_get_ftype(m4)) : sizeof(V128);
+   d->nFxState = 3;
    vex_bzero(&d->fxState, sizeof(d->fxState));
    d->fxState[0].fx = Ifx_Read;
    d->fxState[0].offset = S390X_GUEST_OFFSET(guest_v0) + v2 * sizeof(V128);
    d->fxState[0].size = elementSize;
-   d->fxState[1].fx = Ifx_Write;
-   d->fxState[1].offset = S390X_GUEST_OFFSET(guest_v0) + v1 * sizeof(V128);
-   d->fxState[1].size = sizeof(V128);
+   d->fxState[1].fx = Ifx_Read;
+   d->fxState[1].offset = S390X_GUEST_OFFSET(guest_v0) + v3 * sizeof(V128);
+   d->fxState[1].size = elementSize;
+   d->fxState[2].fx = Ifx_Write;
+   d->fxState[2].offset = S390X_GUEST_OFFSET(guest_v0) + v1 * sizeof(V128);
+   d->fxState[2].size = sizeof(V128);
 
    stmt(IRStmt_Dirty(d));
    s390_cc_set(cc);
+   return "vfmax";
+}
 
-   return "vftci";
+static const HChar *
+s390_irgen_VBPERM(UChar v1, UChar v2, UChar v3)
+{
+   IRDirty* d;
+   IRTemp cc = newTemp(Ity_I64);
+
+   s390x_vec_op_details_t details = { .serialized = 0ULL };
+   details.op = S390_VEC_OP_VBPERM;
+   details.v1 = v1;
+   details.v2 = v2;
+   details.v3 = v3;
+   details.m4 = 0;
+   details.m5 = 0;
+   details.m6 = 0;
+
+   d = unsafeIRDirty_1_N(cc, 0, "s390x_dirtyhelper_vec_op",
+                         &s390x_dirtyhelper_vec_op,
+                         mkIRExprVec_2(IRExpr_GSPTR(),
+                                       mkU64(details.serialized)));
+
+   d->nFxState = 3;
+   vex_bzero(&d->fxState, sizeof(d->fxState));
+   d->fxState[0].fx = Ifx_Read;
+   d->fxState[0].offset = S390X_GUEST_OFFSET(guest_v0) + v2 * sizeof(V128);
+   d->fxState[0].size = sizeof(V128);
+   d->fxState[1].fx = Ifx_Read;
+   d->fxState[1].offset = S390X_GUEST_OFFSET(guest_v0) + v3 * sizeof(V128);
+   d->fxState[1].size = sizeof(V128);
+   d->fxState[2].fx = Ifx_Write;
+   d->fxState[2].offset = S390X_GUEST_OFFSET(guest_v0) + v1 * sizeof(V128);
+   d->fxState[2].size = sizeof(V128);
+
+   stmt(IRStmt_Dirty(d));
+   s390_cc_set(cc);
+   return "vbperm";
 }
 
 /* New insns are added here.
@@ -20489,11 +20754,23 @@ s390_decode_6byte_and_irgen(const UChar *bytes)
                                                 RXY_dl2(ovl),
                                                 RXY_dh2(ovl));  goto ok;
    case 0xe60000000034ULL: /* VPKZ */ goto unimplemented;
-   case 0xe60000000035ULL: /* VLRL */ goto unimplemented;
-   case 0xe60000000037ULL: /* VLRLR */ goto unimplemented;
+   case 0xe60000000035ULL: s390_format_VSI_URDV(s390_irgen_VLRL, VSI_v1(ovl),
+                                                VSI_b2(ovl), VSI_d2(ovl),
+                                                VSI_i3(ovl),
+                                                VSI_rxb(ovl)); goto ok;
+   case 0xe60000000037ULL: s390_format_VRS_RRDV(s390_irgen_VLRLR, VRSd_v1(ovl),
+                                                VRSd_r3(ovl), VRS_b2(ovl),
+                                                VRS_d2(ovl),
+                                                VRS_rxb(ovl));  goto ok;
    case 0xe6000000003cULL: /* VUPKZ */ goto unimplemented;
-   case 0xe6000000003dULL: /* VSTRL */ goto unimplemented;
-   case 0xe6000000003fULL: /* VSTRLR */ goto unimplemented;
+   case 0xe6000000003dULL:  s390_format_VSI_URDV(s390_irgen_VSTRL, VSI_v1(ovl),
+                                                 VSI_b2(ovl), VSI_d2(ovl),
+                                                 VSI_i3(ovl),
+                                                 VSI_rxb(ovl)); goto ok;
+   case 0xe6000000003fULL: s390_format_VRS_RRDV(s390_irgen_VSTRLR, VRSd_v1(ovl),
+                                                VRSd_r3(ovl), VRS_b2(ovl),
+                                                VRS_d2(ovl),
+                                                VRS_rxb(ovl));  goto ok;
    case 0xe60000000049ULL: /* VLIP */ goto unimplemented;
    case 0xe60000000050ULL: /* VCVB */ goto unimplemented;
    case 0xe60000000052ULL: /* VCVBG */ goto unimplemented;
@@ -20691,12 +20968,18 @@ s390_decode_6byte_and_irgen(const UChar *bytes)
    case 0xe7000000006bULL: s390_format_VRR_VVV(s390_irgen_VNO, VRR_v1(ovl),
                                                VRR_v2(ovl), VRR_r3(ovl),
                                                VRR_rxb(ovl));  goto ok;
-   case 0xe7000000006cULL: /* VNX */ goto unimplemented;
+   case 0xe7000000006cULL: s390_format_VRR_VVV(s390_irgen_VNX, VRR_v1(ovl),
+                                               VRR_v2(ovl), VRR_r3(ovl),
+                                               VRR_rxb(ovl));  goto ok;
    case 0xe7000000006dULL: s390_format_VRR_VVV(s390_irgen_VX, VRR_v1(ovl),
                                                VRR_v2(ovl), VRR_r3(ovl),
                                                VRR_rxb(ovl));  goto ok;
-   case 0xe7000000006eULL: /* VNN */ goto unimplemented;
-   case 0xe7000000006fULL: /* VOC */ goto unimplemented;
+   case 0xe7000000006eULL: s390_format_VRR_VVV(s390_irgen_VNN, VRR_v1(ovl),
+                                               VRR_v2(ovl), VRR_r3(ovl),
+                                               VRR_rxb(ovl));  goto ok;
+   case 0xe7000000006fULL: s390_format_VRR_VVV(s390_irgen_VOC, VRR_v1(ovl),
+                                               VRR_v2(ovl), VRR_r3(ovl),
+                                               VRR_rxb(ovl));  goto ok;
    case 0xe70000000070ULL: s390_format_VRR_VVVM(s390_irgen_VESLV, VRR_v1(ovl),
                                                 VRR_v2(ovl), VRR_r3(ovl),
                                                 VRR_m4(ovl), VRR_rxb(ovl));  goto ok;
@@ -20749,7 +21032,9 @@ s390_decode_6byte_and_irgen(const UChar *bytes)
    case 0xe70000000084ULL: s390_format_VRR_VVVM(s390_irgen_VPDI, VRR_v1(ovl),
                                                VRR_v2(ovl), VRR_r3(ovl),
                                                VRR_m4(ovl), VRR_rxb(ovl));  goto ok;
-   case 0xe70000000085ULL: /* VBPERM */ goto unimplemented;
+   case 0xe70000000085ULL: s390_format_VRR_VVV(s390_irgen_VBPERM, VRR_v1(ovl),
+                                               VRR_v2(ovl), VRR_r3(ovl),
+                                               VRR_rxb(ovl));  goto ok;
    case 0xe7000000008aULL: s390_format_VRR_VVVVMM(s390_irgen_VSTRC, VRRd_v1(ovl),
                                                   VRRd_v2(ovl), VRRd_v3(ovl),
                                                   VRRd_v4(ovl), VRRd_m5(ovl),
@@ -20780,8 +21065,16 @@ s390_decode_6byte_and_irgen(const UChar *bytes)
    case 0xe70000000097ULL: s390_format_VRR_VVVMM(s390_irgen_VPKS, VRR_v1(ovl),
                                                VRR_v2(ovl), VRR_r3(ovl),
                                                VRR_m4(ovl), VRR_m5(ovl), VRR_rxb(ovl));  goto ok;
-   case 0xe7000000009eULL: /* VFNMS */ goto unimplemented;
-   case 0xe7000000009fULL: /* VFNMA */ goto unimplemented;
+   case 0xe7000000009eULL: s390_format_VRR_VVVVMM(s390_irgen_VFNMS, VRRe_v1(ovl),
+                                                  VRRe_v2(ovl), VRRe_v3(ovl),
+                                                  VRRe_v4(ovl), VRRe_m5(ovl),
+                                                  VRRe_m6(ovl),
+                                                  VRRe_rxb(ovl));  goto ok;
+   case 0xe7000000009fULL: s390_format_VRR_VVVVMM(s390_irgen_VFNMA, VRRe_v1(ovl),
+                                                  VRRe_v2(ovl), VRRe_v3(ovl),
+                                                  VRRe_v4(ovl), VRRe_m5(ovl),
+                                                  VRRe_m6(ovl),
+                                                  VRRe_rxb(ovl));  goto ok;
    case 0xe700000000a1ULL: s390_format_VRR_VVVM(s390_irgen_VMLH, VRR_v1(ovl),
                                                 VRR_v2(ovl), VRR_r3(ovl),
                                                 VRR_m4(ovl), VRR_rxb(ovl));  goto ok;
@@ -20834,7 +21127,11 @@ s390_decode_6byte_and_irgen(const UChar *bytes)
    case 0xe700000000b4ULL: s390_format_VRR_VVVM(s390_irgen_VGFM, VRR_v1(ovl),
                                                 VRR_v2(ovl), VRR_r3(ovl),
                                                 VRR_m4(ovl), VRR_rxb(ovl));  goto ok;
-   case 0xe700000000b8ULL: /* VMSL */ goto unimplemented;
+   case 0xe700000000b8ULL: s390_format_VRR_VVVVMM(s390_irgen_VMSL, VRRd_v1(ovl),
+                                                  VRRd_v2(ovl), VRRd_v3(ovl),
+                                                  VRRd_v4(ovl), VRRd_m5(ovl),
+                                                  VRRd_m6(ovl),
+                                                  VRRd_rxb(ovl));  goto ok;
    case 0xe700000000b9ULL: s390_format_VRRd_VVVVM(s390_irgen_VACCC, VRRd_v1(ovl),
                                                   VRRd_v2(ovl), VRRd_v3(ovl),
                                                   VRRd_v4(ovl), VRRd_m5(ovl),
@@ -20871,11 +21168,11 @@ s390_decode_6byte_and_irgen(const UChar *bytes)
                                                   VRRa_v2(ovl), VRRa_m3(ovl),
                                                   VRRa_m4(ovl), VRRa_m5(ovl),
                                                   VRRa_rxb(ovl)); goto ok;
-   case 0xe700000000c4ULL: s390_format_VRRa_VVMMM(s390_irgen_VLDE, VRRa_v1(ovl),
+   case 0xe700000000c4ULL: s390_format_VRRa_VVMMM(s390_irgen_VFLL, VRRa_v1(ovl),
                                                   VRRa_v2(ovl), VRRa_m3(ovl),
                                                   VRRa_m4(ovl), VRRa_m5(ovl),
                                                   VRRa_rxb(ovl)); goto ok;
-   case 0xe700000000c5ULL: s390_format_VRRa_VVMMM(s390_irgen_VLED, VRRa_v1(ovl),
+   case 0xe700000000c5ULL: s390_format_VRRa_VVMMM(s390_irgen_VFLR, VRRa_v1(ovl),
                                                   VRRa_v2(ovl), VRRa_m3(ovl),
                                                   VRRa_m4(ovl), VRRa_m5(ovl),
                                                   VRRa_rxb(ovl)); goto ok;
@@ -20956,8 +21253,16 @@ s390_decode_6byte_and_irgen(const UChar *bytes)
                                                    VRRa_m3(ovl), VRRa_m4(ovl),
                                                    VRRa_m5(ovl),
                                                    VRRa_rxb(ovl)); goto ok;
-   case 0xe700000000eeULL: /* VFMIN */ goto unimplemented;
-   case 0xe700000000efULL: /* VFMAX */ goto unimplemented;
+   case 0xe700000000eeULL: s390_format_VRRa_VVVMMM(s390_irgen_VFMIN, VRRa_v1(ovl),
+                                                   VRRa_v2(ovl), VRRa_v3(ovl),
+                                                   VRRa_m3(ovl), VRRa_m4(ovl),
+                                                   VRRa_m5(ovl),
+                                                   VRRa_rxb(ovl)); goto ok;
+   case 0xe700000000efULL: s390_format_VRRa_VVVMMM(s390_irgen_VFMAX, VRRa_v1(ovl),
+                                                   VRRa_v2(ovl), VRRa_v3(ovl),
+                                                   VRRa_m3(ovl), VRRa_m4(ovl),
+                                                   VRRa_m5(ovl),
+                                                   VRRa_rxb(ovl)); goto ok;
    case 0xe700000000f0ULL: s390_format_VRR_VVVM(s390_irgen_VAVGL, VRR_v1(ovl),
                                                 VRR_v2(ovl), VRR_r3(ovl),
                                                 VRR_m4(ovl), VRR_rxb(ovl));  goto ok;
diff --git a/VEX/priv/host_s390_defs.c b/VEX/priv/host_s390_defs.c
index 3b6121fec..8762975b2 100644
--- a/VEX/priv/host_s390_defs.c
+++ b/VEX/priv/host_s390_defs.c
@@ -8,7 +8,7 @@
    This file is part of Valgrind, a dynamic binary instrumentation
    framework.
 
-   Copyright IBM Corp. 2010-2017
+   Copyright IBM Corp. 2010-2020
    Copyright (C) 2012-2017  Florian Krohm   (britzel@acm.org)
 
    This program is free software; you can redistribute it and/or
@@ -684,6 +684,8 @@ s390_insn* genMove_S390(HReg from, HReg to, Bool mode64)
    switch (hregClass(from)) {
    case HRcInt64:
       return s390_insn_move(sizeofIRType(Ity_I64), to, from);
+   case HRcFlt64:
+      return s390_insn_move(sizeofIRType(Ity_F64), to, from);
    case HRcVec128:
       return s390_insn_move(sizeofIRType(Ity_V128), to, from);
    default:
@@ -7870,6 +7872,10 @@ s390_insn_as_string(const s390_insn *insn)
          op = "v-vfloatabs";
          break;
 
+      case S390_VEC_FLOAT_NABS:
+         op = "v-vfloatnabs";
+         break;
+
       default:
          goto fail;
       }
@@ -9439,21 +9445,28 @@ s390_insn_unop_emit(UChar *buf, const s390_insn *insn)
 
    case S390_VEC_FLOAT_NEG: {
       vassert(insn->variant.unop.src.tag == S390_OPND_REG);
-      vassert(insn->size == 8);
+      vassert(insn->size >= 4);
       UChar v1 = hregNumber(insn->variant.unop.dst);
       UChar v2 = hregNumber(insn->variant.unop.src.variant.reg);
       return s390_emit_VFPSO(buf, v1, v2, s390_getM_from_size(insn->size), 0, 0);
    }
    case S390_VEC_FLOAT_ABS: {
       vassert(insn->variant.unop.src.tag == S390_OPND_REG);
-      vassert(insn->size == 8);
+      vassert(insn->size >= 4);
       UChar v1 = hregNumber(insn->variant.unop.dst);
       UChar v2 = hregNumber(insn->variant.unop.src.variant.reg);
       return s390_emit_VFPSO(buf, v1, v2, s390_getM_from_size(insn->size), 0, 2);
    }
+   case S390_VEC_FLOAT_NABS: {
+      vassert(insn->variant.unop.src.tag == S390_OPND_REG);
+      vassert(insn->size >= 4);
+      UChar v1 = hregNumber(insn->variant.unop.dst);
+      UChar v2 = hregNumber(insn->variant.unop.src.variant.reg);
+      return s390_emit_VFPSO(buf, v1, v2, s390_getM_from_size(insn->size), 0, 1);
+   }
    case S390_VEC_FLOAT_SQRT: {
       vassert(insn->variant.unop.src.tag == S390_OPND_REG);
-      vassert(insn->size == 8);
+      vassert(insn->size >= 4);
       UChar v1 = hregNumber(insn->variant.unop.dst);
       UChar v2 = hregNumber(insn->variant.unop.src.variant.reg);
       return s390_emit_VFSQ(buf, v1, v2, s390_getM_from_size(insn->size), 0);
diff --git a/VEX/priv/host_s390_defs.h b/VEX/priv/host_s390_defs.h
index 3f6473e10..9b69f4d38 100644
--- a/VEX/priv/host_s390_defs.h
+++ b/VEX/priv/host_s390_defs.h
@@ -8,7 +8,7 @@
    This file is part of Valgrind, a dynamic binary instrumentation
    framework.
 
-   Copyright IBM Corp. 2010-2017
+   Copyright IBM Corp. 2010-2020
 
    This program is free software; you can redistribute it and/or
    modify it under the terms of the GNU General Public License as
@@ -205,6 +205,7 @@ typedef enum {
    S390_VEC_COUNT_ONES,
    S390_VEC_FLOAT_NEG,
    S390_VEC_FLOAT_ABS,
+   S390_VEC_FLOAT_NABS,
    S390_VEC_FLOAT_SQRT,
    S390_UNOP_T_INVALID
 } s390_unop_t;
@@ -931,6 +932,8 @@ extern UInt s390_host_hwcaps;
                       (s390_host_hwcaps & (VEX_HWCAPS_S390X_MSA5))
 #define s390_host_has_lsc2 \
                       (s390_host_hwcaps & (VEX_HWCAPS_S390X_LSC2))
+#define s390_host_has_vxe \
+                      (s390_host_hwcaps & (VEX_HWCAPS_S390X_VXE))
 #endif /* ndef __VEX_HOST_S390_DEFS_H */
 
 /*---------------------------------------------------------------*/
diff --git a/VEX/priv/host_s390_isel.c b/VEX/priv/host_s390_isel.c
index 134f3eb6f..2f9854038 100644
--- a/VEX/priv/host_s390_isel.c
+++ b/VEX/priv/host_s390_isel.c
@@ -8,7 +8,7 @@
    This file is part of Valgrind, a dynamic binary instrumentation
    framework.
 
-   Copyright IBM Corp. 2010-2017
+   Copyright IBM Corp. 2010-2020
    Copyright (C) 2012-2017  Florian Krohm   (britzel@acm.org)
 
    This program is free software; you can redistribute it and/or
@@ -2362,9 +2362,10 @@ s390_isel_float128_expr_wrk(HReg *dst_hi, HReg *dst_lo, ISelEnv *env,
       case Iop_NegF128:
          if (left->tag == Iex_Unop &&
              (left->Iex.Unop.op == Iop_AbsF32 ||
-              left->Iex.Unop.op == Iop_AbsF64))
+              left->Iex.Unop.op == Iop_AbsF64)) {
             bfpop = S390_BFP_NABS;
-         else
+            left = left->Iex.Unop.arg;
+         } else
             bfpop = S390_BFP_NEG;
          goto float128_opnd;
       case Iop_AbsF128:     bfpop = S390_BFP_ABS;         goto float128_opnd;
@@ -2726,9 +2727,10 @@ s390_isel_float_expr_wrk(ISelEnv *env, IRExpr *expr)
       case Iop_NegF64:
          if (left->tag == Iex_Unop &&
              (left->Iex.Unop.op == Iop_AbsF32 ||
-              left->Iex.Unop.op == Iop_AbsF64))
+              left->Iex.Unop.op == Iop_AbsF64)) {
             bfpop = S390_BFP_NABS;
-         else
+            left = left->Iex.Unop.arg;
+         } else
             bfpop = S390_BFP_NEG;
          break;
 
@@ -3944,11 +3946,27 @@ s390_isel_vec_expr_wrk(ISelEnv *env, IRExpr *expr)
          vec_unop = S390_VEC_COUNT_ONES;
          goto Iop_V_wrk;
 
+      case Iop_Neg32Fx4:
+         size = 4;
+         vec_unop = S390_VEC_FLOAT_NEG;
+         if (arg->tag == Iex_Unop && arg->Iex.Unop.op == Iop_Abs32Fx4) {
+            vec_unop = S390_VEC_FLOAT_NABS;
+            arg = arg->Iex.Unop.arg;
+         }
+         goto Iop_V_wrk;
       case Iop_Neg64Fx2:
          size = 8;
          vec_unop = S390_VEC_FLOAT_NEG;
+         if (arg->tag == Iex_Unop && arg->Iex.Unop.op == Iop_Abs64Fx2) {
+            vec_unop = S390_VEC_FLOAT_NABS;
+            arg = arg->Iex.Unop.arg;
+         }
          goto Iop_V_wrk;
 
+      case Iop_Abs32Fx4:
+         size = 4;
+         vec_unop = S390_VEC_FLOAT_ABS;
+         goto Iop_V_wrk;
       case Iop_Abs64Fx2:
          size = 8;
          vec_unop = S390_VEC_FLOAT_ABS;
@@ -4474,17 +4492,29 @@ s390_isel_vec_expr_wrk(ISelEnv *env, IRExpr *expr)
          vec_binop = S390_VEC_ELEM_ROLL_V;
          goto Iop_VV_wrk;
 
+      case Iop_CmpEQ32Fx4:
+         size = 4;
+         vec_binop = S390_VEC_FLOAT_COMPARE_EQUAL;
+         goto Iop_VV_wrk;
       case Iop_CmpEQ64Fx2:
          size = 8;
          vec_binop = S390_VEC_FLOAT_COMPARE_EQUAL;
          goto Iop_VV_wrk;
 
+      case Iop_CmpLE32Fx4:
+         size = 4;
+         vec_binop = S390_VEC_FLOAT_COMPARE_LESS_OR_EQUAL;
+         goto Iop_VV_wrk;
       case Iop_CmpLE64Fx2: {
          size = 8;
          vec_binop = S390_VEC_FLOAT_COMPARE_LESS_OR_EQUAL;
          goto Iop_VV_wrk;
       }
 
+      case Iop_CmpLT32Fx4:
+         size = 4;
+         vec_binop = S390_VEC_FLOAT_COMPARE_LESS;
+         goto Iop_VV_wrk;
       case Iop_CmpLT64Fx2: {
          size = 8;
          vec_binop = S390_VEC_FLOAT_COMPARE_LESS;
@@ -4671,20 +4701,41 @@ s390_isel_vec_expr_wrk(ISelEnv *env, IRExpr *expr)
                                            dst, reg1, reg2, reg3));
          return dst;
 
+      case Iop_Add32Fx4:
+         size = 4;
+         vec_binop = S390_VEC_FLOAT_ADD;
+         goto Iop_irrm_VV_wrk;
+
       case Iop_Add64Fx2:
          size = 8;
          vec_binop = S390_VEC_FLOAT_ADD;
          goto Iop_irrm_VV_wrk;
 
+      case Iop_Sub32Fx4:
+         size = 4;
+         vec_binop = S390_VEC_FLOAT_SUB;
+         goto Iop_irrm_VV_wrk;
+
       case Iop_Sub64Fx2:
          size = 8;
          vec_binop = S390_VEC_FLOAT_SUB;
          goto Iop_irrm_VV_wrk;
 
+      case Iop_Mul32Fx4:
+         size = 4;
+         vec_binop = S390_VEC_FLOAT_MUL;
+         goto Iop_irrm_VV_wrk;
+
       case Iop_Mul64Fx2:
          size = 8;
          vec_binop = S390_VEC_FLOAT_MUL;
          goto Iop_irrm_VV_wrk;
+
+      case Iop_Div32Fx4:
+         size = 4;
+         vec_binop = S390_VEC_FLOAT_DIV;
+         goto Iop_irrm_VV_wrk;
+
       case Iop_Div64Fx2:
          size = 8;
          vec_binop = S390_VEC_FLOAT_DIV;
diff --git a/VEX/priv/main_main.c b/VEX/priv/main_main.c
index 72f419988..12f521d8c 100644
--- a/VEX/priv/main_main.c
+++ b/VEX/priv/main_main.c
@@ -1795,6 +1795,7 @@ static const HChar* show_hwcaps_s390x ( UInt hwcaps )
       { VEX_HWCAPS_S390X_MSA5,  "msa5" },
       { VEX_HWCAPS_S390X_MI2,   "mi2" },
       { VEX_HWCAPS_S390X_LSC2,  "lsc2" },
+      { VEX_HWCAPS_S390X_LSC2,  "vxe" },
    };
    /* Allocate a large enough buffer */
    static HChar buf[sizeof prefix + 
diff --git a/VEX/pub/libvex.h b/VEX/pub/libvex.h
index 53e3705da..2ffed0ad0 100644
--- a/VEX/pub/libvex.h
+++ b/VEX/pub/libvex.h
@@ -171,7 +171,7 @@ typedef
 #define VEX_HWCAPS_S390X_MSA5  (1<<19)  /* message security assistance facility */
 #define VEX_HWCAPS_S390X_MI2   (1<<20)  /* miscellaneous-instruction-extensions facility 2 */
 #define VEX_HWCAPS_S390X_LSC2  (1<<21)  /* Conditional load/store facility2 */
-
+#define VEX_HWCAPS_S390X_VXE   (1<<22)  /* Vector-enhancements facility */
 
 /* Special value representing all available s390x hwcaps */
 #define VEX_HWCAPS_S390X_ALL   (VEX_HWCAPS_S390X_LDISP | \
@@ -189,7 +189,8 @@ typedef
                                 VEX_HWCAPS_S390X_VX    | \
                                 VEX_HWCAPS_S390X_MSA5  | \
                                 VEX_HWCAPS_S390X_MI2   | \
-                                VEX_HWCAPS_S390X_LSC2)
+                                VEX_HWCAPS_S390X_LSC2  | \
+                                VEX_HWCAPS_S390X_VXE)
 
 #define VEX_HWCAPS_S390X(x)  ((x) & ~VEX_S390X_MODEL_MASK)
 #define VEX_S390X_MODEL(x)   ((x) &  VEX_S390X_MODEL_MASK)
diff --git a/VEX/pub/libvex_emnote.h b/VEX/pub/libvex_emnote.h
index be033b4da..77880a270 100644
--- a/VEX/pub/libvex_emnote.h
+++ b/VEX/pub/libvex_emnote.h
@@ -124,6 +124,10 @@ typedef
       /* ppno insn is not supported on this host */
       EmFail_S390X_ppno,
 
+      /* insn needs vector-enhancements facility which is not available on this
+         host */
+      EmFail_S390X_vxe,
+
       EmNote_NUMBER
    }
    VexEmNote;
diff --git a/coregrind/m_initimg/initimg-linux.c b/coregrind/m_initimg/initimg-linux.c
index 365942c4f..ba84fa6e9 100644
--- a/coregrind/m_initimg/initimg-linux.c
+++ b/coregrind/m_initimg/initimg-linux.c
@@ -697,9 +697,13 @@ Addr setup_client_stack( void*  init_sp,
             }
 #           elif defined(VGP_s390x_linux)
             {
-               /* Advertise hardware features "below" TE and VXRS.  TE itself
-                  and anything above VXRS is not supported by Valgrind. */
-               auxv->u.a_val &= (VKI_HWCAP_S390_TE - 1) | VKI_HWCAP_S390_VXRS;
+               /* Out of the hardware features available on the platform,
+                  advertise those "below" TE, as well as the ones explicitly
+                  ORed in the expression below.  Anything else, such as TE
+                  itself, is not supported by Valgrind. */
+               auxv->u.a_val &= ((VKI_HWCAP_S390_TE - 1)
+                                 | VKI_HWCAP_S390_VXRS
+                                 | VKI_HWCAP_S390_VXRS_EXT);
             }
 #           elif defined(VGP_arm64_linux)
             {
diff --git a/coregrind/m_machine.c b/coregrind/m_machine.c
index e7877e636..228ae2554 100644
--- a/coregrind/m_machine.c
+++ b/coregrind/m_machine.c
@@ -1555,6 +1555,7 @@ Bool VG_(machine_get_hwcaps)( void )
         { False, S390_FAC_MSA5,  VEX_HWCAPS_S390X_MSA5,  "MSA5"  },
         { False, S390_FAC_MI2,   VEX_HWCAPS_S390X_MI2,   "MI2"   },
         { False, S390_FAC_LSC2,  VEX_HWCAPS_S390X_LSC2,  "LSC2"  },
+        { False, S390_FAC_VXE,   VEX_HWCAPS_S390X_VXE,   "VXE"   },
      };
 
      /* Set hwcaps according to the detected facilities */
diff --git a/include/vki/vki-s390x-linux.h b/include/vki/vki-s390x-linux.h
index 7b863a324..4ab2d3334 100644
--- a/include/vki/vki-s390x-linux.h
+++ b/include/vki/vki-s390x-linux.h
@@ -806,6 +806,7 @@ typedef vki_s390_regs vki_elf_gregset_t;
 
 #define VKI_HWCAP_S390_TE           1024
 #define VKI_HWCAP_S390_VXRS         2048
+#define VKI_HWCAP_S390_VXRS_EXT     8192
 
 
 //----------------------------------------------------------------------
diff --git a/none/tests/s390x/vector.h b/none/tests/s390x/vector.h
index de2391480..632c2cb9c 100644
--- a/none/tests/s390x/vector.h
+++ b/none/tests/s390x/vector.h
@@ -86,6 +86,13 @@ void print_hex(const V128 value) {
    printf("%016lx | %016lx\n", value.u64[0], value.u64[1]);
 }
 
+void print_hex64(const V128 value, int zero_only) {
+   if (zero_only)
+      printf("%016lx | --\n", value.u64[0]);
+   else
+      printf("%016lx | %016lx\n", value.u64[0], value.u64[1]);
+}
+
 void print_f32(const V128 value, int even_only, int zero_only) {
    if (zero_only)
       printf("%a | -- | -- | --\n", value.f32[0]);
@@ -222,8 +229,10 @@ static void test_##insn##_selective(const s390x_test_usageInfo info) \
       {printf("  v_arg2   = "); print_hex(v_arg2);} \
    if (info & V128_V_ARG3_AS_INT) \
       {printf("  v_arg3   = "); print_hex(v_arg3);} \
-   if (info & V128_V_RES_AS_INT) \
-      {printf("  v_result = "); print_hex(v_result);} \
+   if (info & V128_V_RES_AS_INT) { \
+      printf("  v_result = "); \
+      print_hex64(v_result, info & V128_V_RES_ZERO_ONLY); \
+   } \
    \
    if (info & V128_V_ARG1_AS_FLOAT64) \
       {printf("  v_arg1   = "); print_f64(v_arg1, 0);} \
diff --git a/none/tests/s390x/vector_float.c b/none/tests/s390x/vector_float.c
index 52f3a296f..20853f381 100644
--- a/none/tests/s390x/vector_float.c
+++ b/none/tests/s390x/vector_float.c
@@ -114,50 +114,59 @@ int main()
    test_with_selective_printing(vldeb, (V128_V_RES_AS_FLOAT64 |
                                         V128_V_ARG1_AS_FLOAT64));
    test_with_selective_printing(wldeb, (V128_V_RES_AS_FLOAT64 |
-                                        V128_V_ARG1_AS_FLOAT64));
+                                        V128_V_ARG1_AS_FLOAT64 |
+                                        V128_V_RES_ZERO_ONLY));
 
    test_with_selective_printing(vflcdb, (V128_V_RES_AS_FLOAT64 |
                                          V128_V_ARG1_AS_FLOAT64));
    test_with_selective_printing(wflcdb, (V128_V_RES_AS_FLOAT64 |
-                                         V128_V_ARG1_AS_FLOAT64));
+                                         V128_V_ARG1_AS_FLOAT64 |
+                                        V128_V_RES_ZERO_ONLY));
    test_with_selective_printing(vflndb, (V128_V_RES_AS_FLOAT64 |
                                          V128_V_ARG1_AS_FLOAT64));
    test_with_selective_printing(wflndb, (V128_V_RES_AS_FLOAT64 |
-                                         V128_V_ARG1_AS_FLOAT64));
+                                         V128_V_ARG1_AS_FLOAT64 |
+                                        V128_V_RES_ZERO_ONLY));
    test_with_selective_printing(vflpdb, (V128_V_RES_AS_FLOAT64 |
                                          V128_V_ARG1_AS_FLOAT64));
    test_with_selective_printing(wflpdb, (V128_V_RES_AS_FLOAT64 |
-                                         V128_V_ARG1_AS_FLOAT64));
+                                         V128_V_ARG1_AS_FLOAT64 |
+                                        V128_V_RES_ZERO_ONLY));
 
    test_with_selective_printing(vfadb, (V128_V_RES_AS_FLOAT64 |
                                         V128_V_ARG1_AS_FLOAT64 |
                                         V128_V_ARG2_AS_FLOAT64));
    test_with_selective_printing(wfadb, (V128_V_RES_AS_FLOAT64 |
                                         V128_V_ARG1_AS_FLOAT64 |
-                                        V128_V_ARG2_AS_FLOAT64));
+                                        V128_V_ARG2_AS_FLOAT64 |
+                                        V128_V_RES_ZERO_ONLY));
    test_with_selective_printing(vfsdb, (V128_V_RES_AS_FLOAT64 |
                                         V128_V_ARG1_AS_FLOAT64 |
                                         V128_V_ARG2_AS_FLOAT64));
    test_with_selective_printing(wfsdb, (V128_V_RES_AS_FLOAT64 |
                                         V128_V_ARG1_AS_FLOAT64 |
-                                        V128_V_ARG2_AS_FLOAT64));
+                                        V128_V_ARG2_AS_FLOAT64 |
+                                        V128_V_RES_ZERO_ONLY));
    test_with_selective_printing(vfmdb, (V128_V_RES_AS_FLOAT64 |
                                         V128_V_ARG1_AS_FLOAT64 |
                                         V128_V_ARG2_AS_FLOAT64));
    test_with_selective_printing(wfmdb, (V128_V_RES_AS_FLOAT64 |
                                         V128_V_ARG1_AS_FLOAT64 |
-                                        V128_V_ARG2_AS_FLOAT64));
+                                        V128_V_ARG2_AS_FLOAT64 |
+                                        V128_V_RES_ZERO_ONLY));
    test_with_selective_printing(vfddb, (V128_V_RES_AS_FLOAT64 |
                                         V128_V_ARG1_AS_FLOAT64 |
                                         V128_V_ARG2_AS_FLOAT64));
    test_with_selective_printing(wfddb, (V128_V_RES_AS_FLOAT64 |
                                         V128_V_ARG1_AS_FLOAT64 |
-                                        V128_V_ARG2_AS_FLOAT64));
+                                        V128_V_ARG2_AS_FLOAT64 |
+                                        V128_V_RES_ZERO_ONLY));
  
    test_with_selective_printing(vfsqdb, (V128_V_RES_AS_FLOAT64 |
                                          V128_V_ARG1_AS_FLOAT64));
    test_with_selective_printing(wfsqdb, (V128_V_RES_AS_FLOAT64 |
-                                         V128_V_ARG1_AS_FLOAT64));
+                                         V128_V_ARG1_AS_FLOAT64 |
+                                        V128_V_RES_ZERO_ONLY));
 
    test_with_selective_printing(vfmadb, (V128_V_RES_AS_FLOAT64 |
                                          V128_V_ARG1_AS_FLOAT64 |
@@ -166,7 +175,8 @@ int main()
    test_with_selective_printing(wfmadb, (V128_V_RES_AS_FLOAT64 |
                                          V128_V_ARG1_AS_FLOAT64 |
                                          V128_V_ARG2_AS_FLOAT64 |
-                                         V128_V_ARG3_AS_FLOAT64));
+                                         V128_V_ARG3_AS_FLOAT64 |
+                                        V128_V_RES_ZERO_ONLY));
    test_with_selective_printing(vfmsdb, (V128_V_RES_AS_FLOAT64 |
                                          V128_V_ARG1_AS_FLOAT64 |
                                          V128_V_ARG2_AS_FLOAT64 |
@@ -174,21 +184,25 @@ int main()
    test_with_selective_printing(wfmsdb, (V128_V_RES_AS_FLOAT64 |
                                          V128_V_ARG1_AS_FLOAT64 |
                                          V128_V_ARG2_AS_FLOAT64 |
-                                         V128_V_ARG3_AS_FLOAT64));
+                                         V128_V_ARG3_AS_FLOAT64 |
+                                        V128_V_RES_ZERO_ONLY));
 
    test_with_selective_printing(wfcdb, (V128_V_ARG1_AS_FLOAT64 |
                                         V128_V_ARG2_AS_FLOAT64 |
-                                        V128_R_RES));
+                                        V128_R_RES |
+                                        V128_V_RES_ZERO_ONLY));
    test_with_selective_printing(wfkdb, (V128_V_ARG1_AS_FLOAT64 |
                                         V128_V_ARG2_AS_FLOAT64 |
-                                        V128_R_RES));
+                                        V128_R_RES |
+                                        V128_V_RES_ZERO_ONLY));
 
    test_with_selective_printing(vfcedb,  (V128_V_RES_AS_INT |
                                           V128_V_ARG1_AS_FLOAT64 |
                                           V128_V_ARG2_AS_FLOAT64));
    test_with_selective_printing(wfcedb,  (V128_V_RES_AS_INT |
                                           V128_V_ARG1_AS_FLOAT64 |
-                                          V128_V_ARG2_AS_FLOAT64));
+                                          V128_V_ARG2_AS_FLOAT64 |
+                                          V128_V_RES_ZERO_ONLY));
    test_with_selective_printing(vfcedbs, (V128_V_RES_AS_INT |
                                           V128_V_ARG1_AS_FLOAT64 |
                                           V128_V_ARG2_AS_FLOAT64 |
@@ -196,14 +210,16 @@ int main()
    test_with_selective_printing(wfcedbs, (V128_V_RES_AS_INT |
                                           V128_V_ARG1_AS_FLOAT64 |
                                           V128_V_ARG2_AS_FLOAT64 |
-                                          V128_R_RES));
+                                          V128_R_RES |
+                                          V128_V_RES_ZERO_ONLY));
 
    test_with_selective_printing(vfchdb,  (V128_V_RES_AS_INT |
                                           V128_V_ARG1_AS_FLOAT64 |
                                           V128_V_ARG2_AS_FLOAT64));
    test_with_selective_printing(wfchdb,  (V128_V_RES_AS_INT |
                                           V128_V_ARG1_AS_FLOAT64 |
-                                          V128_V_ARG2_AS_FLOAT64));
+                                          V128_V_ARG2_AS_FLOAT64 |
+                                          V128_V_RES_ZERO_ONLY));
    test_with_selective_printing(vfchdbs, (V128_V_RES_AS_INT |
                                           V128_V_ARG1_AS_FLOAT64 |
                                           V128_V_ARG2_AS_FLOAT64 |
@@ -211,14 +227,16 @@ int main()
    test_with_selective_printing(wfchdbs, (V128_V_RES_AS_INT |
                                           V128_V_ARG1_AS_FLOAT64 |
                                           V128_V_ARG2_AS_FLOAT64 |
-                                          V128_R_RES));
+                                          V128_R_RES |
+                                          V128_V_RES_ZERO_ONLY));
 
    test_with_selective_printing(vfchedb,  (V128_V_RES_AS_INT |
                                            V128_V_ARG1_AS_FLOAT64 |
                                            V128_V_ARG2_AS_FLOAT64));
    test_with_selective_printing(wfchedb,  (V128_V_RES_AS_INT |
                                            V128_V_ARG1_AS_FLOAT64 |
-                                           V128_V_ARG2_AS_FLOAT64));
+                                           V128_V_ARG2_AS_FLOAT64 |
+                                           V128_V_RES_ZERO_ONLY));
    test_with_selective_printing(vfchedbs, (V128_V_RES_AS_INT |
                                            V128_V_ARG1_AS_FLOAT64 |
                                            V128_V_ARG2_AS_FLOAT64 |
@@ -226,7 +244,8 @@ int main()
    test_with_selective_printing(wfchedbs, (V128_V_RES_AS_INT |
                                            V128_V_ARG1_AS_FLOAT64 |
                                            V128_V_ARG2_AS_FLOAT64 |
-                                           V128_R_RES));
+                                           V128_R_RES |
+                                           V128_V_RES_ZERO_ONLY));
 
    test_with_selective_printing(vftcidb0,    (V128_V_RES_AS_INT |
                                               V128_V_ARG1_AS_FLOAT64 |
diff --git a/none/tests/s390x/vector_float.stdout.exp b/none/tests/s390x/vector_float.stdout.exp
index eac525041..a330ac832 100644
--- a/none/tests/s390x/vector_float.stdout.exp
+++ b/none/tests/s390x/vector_float.stdout.exp
@@ -419,88 +419,88 @@ insn vcgdb07:
   v_result = 7fffffffffffffff | 7fffffffffffffff
   v_arg1   = 0x1.fed2f087c21p+341 | 0x1.180e4c1d87fc4p+682
 insn wcgdb00:
-  v_result = 7fffffffffffffff | 0000000000000000
+  v_result = 7fffffffffffffff | --
   v_arg1   = 0x1.d7fd9222e8b86p+670 | 0x1.c272612672a3p+798
 insn wcgdb00:
-  v_result = 0000000000000000 | 0000000000000000
+  v_result = 0000000000000000 | --
   v_arg1   = 0x1.745cd360987e5p-496 | -0x1.f3b404919f358p-321
 insn wcgdb00:
-  v_result = 8000000000000000 | 0000000000000000
+  v_result = 8000000000000000 | --
   v_arg1   = -0x1.9523565cd92d5p+643 | 0x1.253677d6d3be2p-556
 insn wcgdb00:
-  v_result = 7fffffffffffffff | 0000000000000000
+  v_result = 7fffffffffffffff | --
   v_arg1   = 0x1.b6eb576ec3e6ap+845 | -0x1.c7e102c503d91p+266
 insn wcgdb01:
-  v_result = 0000000000000000 | 0000000000000000
+  v_result = 0000000000000000 | --
   v_arg1   = -0x1.3d4319841f4d6p-1011 | -0x1.2feabf7dfc506p-680
 insn wcgdb01:
-  v_result = 0000000000000000 | 0000000000000000
+  v_result = 0000000000000000 | --
   v_arg1   = -0x1.6fb8d1cd8b32cp-843 | -0x1.50f6a6922f97ep+33
 insn wcgdb01:
-  v_result = 0000000000000000 | 0000000000000000
+  v_result = 0000000000000000 | --
   v_arg1   = -0x1.64a673daccf1ap-566 | -0x1.69ef9b1d01499p+824
 insn wcgdb01:
-  v_result = 8000000000000000 | 0000000000000000
+  v_result = 8000000000000000 | --
   v_arg1   = -0x1.3e2ddd862b4adp+1005 | -0x1.312466410271p+184
 insn wcgdb03:
-  v_result = 0000000000000001 | 0000000000000000
+  v_result = 0000000000000001 | --
   v_arg1   = 0x1.d594c3412a11p-953 | -0x1.a07393d34d77cp-224
 insn wcgdb03:
-  v_result = 8000000000000000 | 0000000000000000
+  v_result = 8000000000000000 | --
   v_arg1   = -0x1.f7a0dbcfd6e4cp+104 | -0x1.40f7cde7f2214p-702
 insn wcgdb03:
-  v_result = 8000000000000000 | 0000000000000000
+  v_result = 8000000000000000 | --
   v_arg1   = -0x1.40739c1574808p+560 | -0x1.970328ddf1b6ep-374
 insn wcgdb03:
-  v_result = 0000000000000001 | 0000000000000000
+  v_result = 0000000000000001 | --
   v_arg1   = 0x1.477653afd7048p-38 | 0x1.1eac2f8b2a93cp-384
 insn wcgdb04:
-  v_result = ffffffffe9479a7d | 0000000000000000
+  v_result = ffffffffe9479a7d | --
   v_arg1   = -0x1.6b865833eff3p+28 | 0x1.06e8cf1834d0ep-722
 insn wcgdb04:
-  v_result = 0000000000000000 | 0000000000000000
+  v_result = 0000000000000000 | --
   v_arg1   = 0x1.eef0b2294a5cp-544 | -0x1.8e8b133ccda15p+752
 insn wcgdb04:
-  v_result = 0000000000000000 | 0000000000000000
+  v_result = 0000000000000000 | --
   v_arg1   = -0x1.f34e77e6b6698p-894 | -0x1.9f7ce1cb53bddp-896
 insn wcgdb04:
-  v_result = 7fffffffffffffff | 0000000000000000
+  v_result = 7fffffffffffffff | --
   v_arg1   = 0x1.95707a6d75db5p+1018 | -0x1.3b0c072d23011p-224
 insn wcgdb05:
-  v_result = 0000000000000000 | 0000000000000000
+  v_result = 0000000000000000 | --
   v_arg1   = -0x1.a9fb71160793p-968 | 0x1.05f601fe8123ap-986
 insn wcgdb05:
-  v_result = 8000000000000000 | 0000000000000000
+  v_result = 8000000000000000 | --
   v_arg1   = -0x1.0864159b94305p+451 | -0x1.d4647f5a78b7ep-599
 insn wcgdb05:
-  v_result = 7fffffffffffffff | 0000000000000000
+  v_result = 7fffffffffffffff | --
   v_arg1   = 0x1.37eadff8397c8p+432 | -0x1.15d896b6f6063p+464
 insn wcgdb05:
-  v_result = 0000000000000000 | 0000000000000000
+  v_result = 0000000000000000 | --
   v_arg1   = 0x1.eb0812b0d677p-781 | 0x1.3117c5e0e288cp-202
 insn wcgdb06:
-  v_result = 0000000000000001 | 0000000000000000
+  v_result = 0000000000000001 | --
   v_arg1   = 0x1.6b88069167c0fp-662 | -0x1.70571d27e1279p+254
 insn wcgdb06:
-  v_result = 7fffffffffffffff | 0000000000000000
+  v_result = 7fffffffffffffff | --
   v_arg1   = 0x1.f6a6d6e883596p+260 | 0x1.0d578afaaa34ap+604
 insn wcgdb06:
-  v_result = 0000000000000001 | 0000000000000000
+  v_result = 0000000000000001 | --
   v_arg1   = 0x1.d91c7d13c4694p-475 | -0x1.ecf1f8529767bp+830
 insn wcgdb06:
-  v_result = 0000000000000001 | 0000000000000000
+  v_result = 0000000000000001 | --
   v_arg1   = 0x1.fac8dd3bb7af6p-101 | 0x1.fb8324a00fba8p+959
 insn wcgdb07:
-  v_result = 7fffffffffffffff | 0000000000000000
+  v_result = 7fffffffffffffff | --
   v_arg1   = 0x1.4b0fa18fa73c7p+111 | -0x1.08e7b17633a49p+61
 insn wcgdb07:
-  v_result = e636b693e39a1100 | 0000000000000000
+  v_result = e636b693e39a1100 | --
   v_arg1   = -0x1.9c9496c1c65efp+60 | 0x1.c4182ee728d76p-572
 insn wcgdb07:
-  v_result = ffffffffffffffff | 0000000000000000
+  v_result = ffffffffffffffff | --
   v_arg1   = -0x1.819718032dff7p-303 | 0x1.a784c77ff6aa2p-622
 insn wcgdb07:
-  v_result = 7fffffffffffffff | 0000000000000000
+  v_result = 7fffffffffffffff | --
   v_arg1   = 0x1.978e8abfd83c2p+152 | 0x1.2531ebf451762p+315
 insn vclgdb00:
   v_result = 0000000000000000 | 0000000000000000
@@ -587,88 +587,88 @@ insn vclgdb07:
   v_result = 0000000000000000 | 0000000000000000
   v_arg1   = -0x1.137bbb51f08bdp+306 | 0x1.18d2a1063356p-795
 insn wclgdb00:
-  v_result = 0000000000000000 | 0000000000000000
+  v_result = 0000000000000000 | --
   v_arg1   = -0x1.e66f55dcc2639p-1013 | -0x1.733ee56929f3bp-304
 insn wclgdb00:
-  v_result = 0000000000000000 | 0000000000000000
+  v_result = 0000000000000000 | --
   v_arg1   = 0x1.8802fd9ab740cp-986 | -0x1.64d4d2c7c145fp-1015
 insn wclgdb00:
-  v_result = 0000000000000000 | 0000000000000000
+  v_result = 0000000000000000 | --
   v_arg1   = 0x1.a67209b8c407bp-645 | -0x1.6410ff9b1c801p+487
 insn wclgdb00:
-  v_result = 0000000000000000 | 0000000000000000
+  v_result = 0000000000000000 | --
   v_arg1   = -0x1.cb2febaefeb2dp+49 | 0x1.dee368b2ec375p-502
 insn wclgdb01:
-  v_result = 0000000000000000 | 0000000000000000
+  v_result = 0000000000000000 | --
   v_arg1   = 0x1.5703db3c1b0e2p-728 | 0x1.068c4d51ea4ebp+617
 insn wclgdb01:
-  v_result = 0000000000000000 | 0000000000000000
+  v_result = 0000000000000000 | --
   v_arg1   = -0x1.ae350291e5b3ep+291 | 0x1.1b87bb09b6032p+376
 insn wclgdb01:
-  v_result = ffffffffffffffff | 0000000000000000
+  v_result = ffffffffffffffff | --
   v_arg1   = 0x1.c4666a710127ep+424 | -0x1.19e969b6c0076p+491
 insn wclgdb01:
-  v_result = ffffffffffffffff | 0000000000000000
+  v_result = ffffffffffffffff | --
   v_arg1   = 0x1.c892c5a4d103fp+105 | -0x1.d4f937cc76704p+749
 insn wclgdb03:
-  v_result = 0000000000000001 | 0000000000000000
+  v_result = 0000000000000001 | --
   v_arg1   = 0x1.81090d8fc663dp-111 | 0x1.337ec5e0f0904p+1
 insn wclgdb03:
-  v_result = 0000000000000000 | 0000000000000000
+  v_result = 0000000000000000 | --
   v_arg1   = -0x1.e787adc70b91p-593 | 0x1.db8d83196b53cp-762
 insn wclgdb03:
-  v_result = ffffffffffffffff | 0000000000000000
+  v_result = ffffffffffffffff | --
   v_arg1   = 0x1.6529307e907efp+389 | -0x1.3ea0d8d5b4dd2p+589
 insn wclgdb03:
-  v_result = 0000000000000000 | 0000000000000000
+  v_result = 0000000000000000 | --
   v_arg1   = -0x1.be701a158637p-385 | 0x1.c5a7f70cb8a09p+107
 insn wclgdb04:
-  v_result = 0000000000000000 | 0000000000000000
+  v_result = 0000000000000000 | --
   v_arg1   = -0x1.2f328571ab445p+21 | -0x1.dcc21fc82ba01p-930
 insn wclgdb04:
-  v_result = 0000000000000000 | 0000000000000000
+  v_result = 0000000000000000 | --
   v_arg1   = -0x1.06b69fcbb7bffp-415 | 0x1.6f9a13a0a827ap+915
 insn wclgdb04:
-  v_result = 0000000000000000 | 0000000000000000
+  v_result = 0000000000000000 | --
   v_arg1   = -0x1.738e549b38bcdp+479 | 0x1.a522edb999c9p-45
 insn wclgdb04:
-  v_result = 0000000000000000 | 0000000000000000
+  v_result = 0000000000000000 | --
   v_arg1   = 0x1.7f9399d2bcf3bp-215 | -0x1.7bc35f2d69a7fp+818
 insn wclgdb05:
-  v_result = ffffffffffffffff | 0000000000000000
+  v_result = ffffffffffffffff | --
   v_arg1   = 0x1.fc542bdb707f6p+880 | -0x1.8521ebc93a25fp-969
 insn wclgdb05:
-  v_result = 1ce8d9951b8c8600 | 0000000000000000
+  v_result = 1ce8d9951b8c8600 | --
   v_arg1   = 0x1.ce8d9951b8c86p+60 | 0x1.92712589230e7p+475
 insn wclgdb05:
-  v_result = 0000000000000000 | 0000000000000000
+  v_result = 0000000000000000 | --
   v_arg1   = -0x1.8a297f60a0811p-156 | 0x1.102b79043d82cp-204
 insn wclgdb05:
-  v_result = 0000000000000000 | 0000000000000000
+  v_result = 0000000000000000 | --
   v_arg1   = 0x1.beb9057e1401dp-196 | -0x1.820f18f830262p+15
 insn wclgdb06:
-  v_result = 0000000000000001 | 0000000000000000
+  v_result = 0000000000000001 | --
   v_arg1   = 0x1.c321a966ecb4dp-430 | -0x1.2f6a1a95ead99p-943
 insn wclgdb06:
-  v_result = 0000000000000000 | 0000000000000000
+  v_result = 0000000000000000 | --
   v_arg1   = -0x1.f1a86b4aed821p-56 | -0x1.1ee6717cc2d7fp-899
 insn wclgdb06:
-  v_result = 0000000000000000 | 0000000000000000
+  v_result = 0000000000000000 | --
   v_arg1   = -0x1.73ce49d89ecb9p-302 | 0x1.52663b975ed23p-716
 insn wclgdb06:
-  v_result = 0000000000000000 | 0000000000000000
+  v_result = 0000000000000000 | --
   v_arg1   = -0x1.3e9c2de97a292p+879 | 0x1.d34eed36f2eafp+960
 insn wclgdb07:
-  v_result = 0000000000000000 | 0000000000000000
+  v_result = 0000000000000000 | --
   v_arg1   = -0x1.4e6ec6ddc6a45p-632 | -0x1.6e564d0fec72bp+369
 insn wclgdb07:
-  v_result = ffffffffffffffff | 0000000000000000
+  v_result = ffffffffffffffff | --
   v_arg1   = 0x1.42e2c658e4c4dp+459 | -0x1.9f9dc0252e44p+85
 insn wclgdb07:
-  v_result = 0000000000000000 | 0000000000000000
+  v_result = 0000000000000000 | --
   v_arg1   = -0x1.fb40ac8cda3c1p-762 | 0x1.0e9ed614bc8f1p-342
 insn wclgdb07:
-  v_result = 0000000000000000 | 0000000000000000
+  v_result = 0000000000000000 | --
   v_arg1   = -0x1.c1f8b3c68e214p+118 | -0x1.1a26a49368b61p+756
 insn vfidb00:
   v_arg1   = -0x1.38df4cf9d52dbp-545 | -0x1.049253d90dd92p+94
@@ -1020,16 +1020,16 @@ insn vldeb:
   v_result = -0x1.6f5fb2p+70 | -0x1.0d2df6p-107
 insn wldeb:
   v_arg1   = -0x1.d26169729db2ap-435 | 0x1.d6fd080793e8cp+767
-  v_result = -0x1.9a4c2cp-54 | 0x0p+0
+  v_result = -0x1.9a4c2cp-54 | --
 insn wldeb:
   v_arg1   = -0x1.f4b59107fce61p-930 | 0x1.cdf2816e253f4p-168
-  v_result = -0x1.be96b2p-116 | 0x0p+0
+  v_result = -0x1.be96b2p-116 | --
 insn wldeb:
   v_arg1   = -0x1.9603a2997928cp-441 | -0x1.aada85e355a11p-767
-  v_result = -0x1.d2c074p-55 | 0x0p+0
+  v_result = -0x1.d2c074p-55 | --
 insn wldeb:
   v_arg1   = 0x1.25ccf5bd0e83p+620 | 0x1.e1635864ebb17p-88
-  v_result = 0x1.64b99ep+78 | 0x0p+0
+  v_result = 0x1.64b99ep+78 | --
 insn vflcdb:
   v_arg1   = 0x1.0ae6d82f76afp-166 | -0x1.e8fb1e03a7415p-191
   v_result = -0x1.0ae6d82f76afp-166 | 0x1.e8fb1e03a7415p-191
@@ -1044,16 +1044,16 @@ insn vflcdb:
   v_result = -0x1.19520153d35b4p-301 | -0x1.ac5325cd23253p+396
 insn wflcdb:
   v_arg1   = 0x1.ffd3eecfd54d7p-831 | -0x1.97854fa523a77p+146
-  v_result = -0x1.ffd3eecfd54d7p-831 | 0x0p+0
+  v_result = -0x1.ffd3eecfd54d7p-831 | --
 insn wflcdb:
   v_arg1   = -0x1.508ea45606447p-442 | 0x1.ae7f0e6cf9d2bp+583
-  v_result = 0x1.508ea45606447p-442 | 0x0p+0
+  v_result = 0x1.508ea45606447p-442 | --
 insn wflcdb:
   v_arg1   = 0x1.da8ab2188c21ap+94 | 0x1.78a9c152aa074p-808
-  v_result = -0x1.da8ab2188c21ap+94 | 0x0p+0
+  v_result = -0x1.da8ab2188c21ap+94 | --
 insn wflcdb:
   v_arg1   = -0x1.086882645e0c5p-1001 | -0x1.54e2de5af5a74p-262
-  v_result = 0x1.086882645e0c5p-1001 | 0x0p+0
+  v_result = 0x1.086882645e0c5p-1001 | --
 insn vflndb:
   v_arg1   = -0x1.5bec561d407dcp+819 | -0x1.a5773dadb7a2dp+935
   v_result = -0x1.5bec561d407dcp+819 | -0x1.a5773dadb7a2dp+935
@@ -1068,16 +1068,16 @@ insn vflndb:
   v_result = -0x1.c5bc39a06d4e2p-259 | -0x1.c5e61ad849e77p-833
 insn wflndb:
   v_arg1   = -0x1.e9f3e6d1beffap-117 | -0x1.d58cc8bf123b3p-714
-  v_result = -0x1.e9f3e6d1beffap-117 | 0x0p+0
+  v_result = -0x1.e9f3e6d1beffap-117 | --
 insn wflndb:
   v_arg1   = -0x1.3fc4ef2e7485ep-691 | 0x1.eb328986081efp-775
-  v_result = -0x1.3fc4ef2e7485ep-691 | 0x0p+0
+  v_result = -0x1.3fc4ef2e7485ep-691 | --
 insn wflndb:
   v_arg1   = -0x1.7146c5afdec16p+23 | -0x1.597fcfa1fab2p-708
-  v_result = -0x1.7146c5afdec16p+23 | 0x0p+0
+  v_result = -0x1.7146c5afdec16p+23 | --
 insn wflndb:
   v_arg1   = 0x1.03f8d7e9afe84p-947 | 0x1.9a10c3feb6b57p-118
-  v_result = -0x1.03f8d7e9afe84p-947 | 0x0p+0
+  v_result = -0x1.03f8d7e9afe84p-947 | --
 insn vflpdb:
   v_arg1   = 0x1.64ae59b6c762ep-407 | -0x1.fa7191ab21e86p+533
   v_result = 0x1.64ae59b6c762ep-407 | 0x1.fa7191ab21e86p+533
@@ -1092,16 +1092,16 @@ insn vflpdb:
   v_result = 0x1.85fa2de1d492ap+170 | 0x1.ac36828822c11p-968
 insn wflpdb:
   v_arg1   = 0x1.a6cf677640a73p-871 | 0x1.b6f1792385922p-278
-  v_result = 0x1.a6cf677640a73p-871 | 0x0p+0
+  v_result = 0x1.a6cf677640a73p-871 | --
 insn wflpdb:
   v_arg1   = -0x1.b886774f6d888p-191 | -0x1.6a2b08d735d22p-643
-  v_result = 0x1.b886774f6d888p-191 | 0x0p+0
+  v_result = 0x1.b886774f6d888p-191 | --
 insn wflpdb:
   v_arg1   = 0x1.5045d37d46f5fp+943 | -0x1.333a86ef2dcf6p-1013
-  v_result = 0x1.5045d37d46f5fp+943 | 0x0p+0
+  v_result = 0x1.5045d37d46f5fp+943 | --
 insn wflpdb:
   v_arg1   = 0x1.1e7bec6ada14dp+252 | 0x1.a70b3f3e24dap-153
-  v_result = 0x1.1e7bec6ada14dp+252 | 0x0p+0
+  v_result = 0x1.1e7bec6ada14dp+252 | --
 insn vfadb:
   v_arg1   = 0x1.5b1ad8e9f17c6p-294 | -0x1.ddd8300a0bf02p+122
   v_arg2   = -0x1.9b49c31ca8ac6p+926 | 0x1.fdbc992926268p+677
@@ -1121,19 +1121,19 @@ insn vfadb:
 insn wfadb:
   v_arg1   = 0x1.3c5466cb80722p+489 | -0x1.11e1770053ca2p+924
   v_arg2   = 0x1.d876cd721a726p-946 | 0x1.5c04ceb79c9bcp+1001
-  v_result = 0x1.3c5466cb80722p+489 | 0x0p+0
+  v_result = 0x1.3c5466cb80722p+489 | --
 insn wfadb:
   v_arg1   = 0x1.b0b142d6b76a3p+577 | 0x1.3146824e993a2p+432
   v_arg2   = -0x1.f7f3b7582925fp-684 | -0x1.9700143c2b935p-837
-  v_result = 0x1.b0b142d6b76a2p+577 | 0x0p+0
+  v_result = 0x1.b0b142d6b76a2p+577 | --
 insn wfadb:
   v_arg1   = -0x1.8d65e15edabd6p+244 | 0x1.3be7fd08492d6p-141
   v_arg2   = -0x1.5eef86490fb0ap+481 | 0x1.7b26c897cb6dfp+810
-  v_result = -0x1.5eef86490fb0ap+481 | 0x0p+0
+  v_result = -0x1.5eef86490fb0ap+481 | --
 insn wfadb:
   v_arg1   = -0x1.2dffa5b5f29p+34 | 0x1.71a026274602fp-881
   v_arg2   = 0x1.4dad707287289p+756 | -0x1.1500d55807247p-616
-  v_result = 0x1.4dad707287288p+756 | 0x0p+0
+  v_result = 0x1.4dad707287288p+756 | --
 insn vfsdb:
   v_arg1   = 0x1.054fd9c4d4883p+644 | 0x1.45c90ed85bd7fp-780
   v_arg2   = 0x1.f3bc7a611dadap+494 | -0x1.7c9e1e858ba5bp-301
@@ -1153,19 +1153,19 @@ insn vfsdb:
 insn wfsdb:
   v_arg1   = 0x1.9090dabf846e7p-648 | 0x1.1c4ab843a2d15p+329
   v_arg2   = -0x1.a7ceb293690dep+316 | 0x1.22245954a20cp+42
-  v_result = 0x1.a7ceb293690dep+316 | 0x0p+0
+  v_result = 0x1.a7ceb293690dep+316 | --
 insn wfsdb:
   v_arg1   = 0x1.4e5347c27819p-933 | -0x1.56a30bda28351p-64
   v_arg2   = -0x1.dedb9f3935b56p-155 | 0x1.8c5b6ed76816cp-522
-  v_result = 0x1.dedb9f3935b56p-155 | 0x0p+0
+  v_result = 0x1.dedb9f3935b56p-155 | --
 insn wfsdb:
   v_arg1   = 0x1.0ec4e562a015bp-491 | 0x1.3996381b52d9fp-686
   v_arg2   = 0x1.1dcce4e81819p+960 | -0x1.32fa425e8fc08p-263
-  v_result = -0x1.1dcce4e81818fp+960 | 0x0p+0
+  v_result = -0x1.1dcce4e81818fp+960 | --
 insn wfsdb:
   v_arg1   = -0x1.587229f90f77dp-19 | 0x1.100d8eb8105e4p-784
   v_arg2   = -0x1.afb4cce4c43ddp+530 | -0x1.6da7f05e7f512p-869
-  v_result = 0x1.afb4cce4c43dcp+530 | 0x0p+0
+  v_result = 0x1.afb4cce4c43dcp+530 | --
 insn vfmdb:
   v_arg1   = 0x1.892b425556c47p-124 | 0x1.38222404079dfp-656
   v_arg2   = 0x1.af612ed2c342dp-267 | -0x1.1f735fd6ce768p-877
@@ -1185,19 +1185,19 @@ insn vfmdb:
 insn wfmdb:
   v_arg1   = -0x1.b992d950126a1p-683 | -0x1.9c1b22eb58c59p-497
   v_arg2   = 0x1.b557a7d8e32c3p-25 | -0x1.f746b2ddafccep+227
-  v_result = -0x1.792f6fb13894ap-707 | 0x0p+0
+  v_result = -0x1.792f6fb13894ap-707 | --
 insn wfmdb:
   v_arg1   = -0x1.677a8c20a5a2fp+876 | 0x1.c03e7b97e8c0dp-645
   v_arg2   = 0x1.dab44be430937p-1011 | -0x1.3f51352c67be9p-916
-  v_result = -0x1.4d4b0a1827064p-134 | 0x0p+0
+  v_result = -0x1.4d4b0a1827064p-134 | --
 insn wfmdb:
   v_arg1   = -0x1.da60f596ad0cep+254 | 0x1.52332e0650e33p+966
   v_arg2   = 0x1.a042c52ed993cp+215 | 0x1.8f380c84aa133p+204
-  v_result = -0x1.81aca4bbcbd24p+470 | 0x0p+0
+  v_result = -0x1.81aca4bbcbd24p+470 | --
 insn wfmdb:
   v_arg1   = -0x1.83d17f11f6aa3p-469 | -0x1.98117efe89b9ep-361
   v_arg2   = 0x1.8c445fd46d214p-701 | -0x1.f98118821821cp+596
-  v_result = -0x0p+0 | 0x0p+0
+  v_result = -0x0p+0 | --
 insn vfddb:
   v_arg1   = -0x1.ecbb48899e0f1p+969 | 0x1.caf175ab352p-20
   v_arg2   = -0x1.9455d67f9f79dp+208 | 0x1.bc4a431b04a6fp+482
@@ -1217,19 +1217,19 @@ insn vfddb:
 insn wfddb:
   v_arg1   = 0x1.bd48489b60731p-114 | 0x1.a760dcf57b74fp-51
   v_arg2   = -0x1.171f83409eeb6p-402 | -0x1.e159d1409bdc6p-972
-  v_result = -0x1.9864f1511f8cp+288 | 0x0p+0
+  v_result = -0x1.9864f1511f8cp+288 | --
 insn wfddb:
   v_arg1   = -0x1.120505ef4606p-637 | -0x1.83f6f775c0eb7p+272
   v_arg2   = -0x1.d18ba3872fde1p+298 | 0x1.c60f8d191068cp-454
-  v_result = 0x1.2d5cdb15a686cp-936 | 0x0p+0
+  v_result = 0x1.2d5cdb15a686cp-936 | --
 insn wfddb:
   v_arg1   = 0x1.f637f7f8c790fp-97 | -0x1.7bdce4d74947p+189
   v_arg2   = -0x1.1c8f2d1b3a2edp-218 | -0x1.55fdfd1840241p-350
-  v_result = -0x1.c3d0799c1420fp+121 | 0x0p+0
+  v_result = -0x1.c3d0799c1420fp+121 | --
 insn wfddb:
   v_arg1   = -0x1.c63b7b2eee253p+250 | 0x1.dfd9dcd8b823fp-125
   v_arg2   = 0x1.094a1f1f87e0cp+629 | 0x1.eeaa23c0d7843p-814
-  v_result = -0x1.b653a10ebdeccp-379 | 0x0p+0
+  v_result = -0x1.b653a10ebdeccp-379 | --
 insn vfsqdb:
   v_arg1   = 0x1.f60db25f7066p-703 | -0x1.d43509abca8c3p+631
   v_result = 0x1.fb009ab25ec11p-352 | nan
@@ -1244,16 +1244,16 @@ insn vfsqdb:
   v_result = 0x1.833dba0954bccp+249 | nan
 insn wfsqdb:
   v_arg1   = 0x1.71af4e7f64978p+481 | -0x1.3429dc60011d7p-879
-  v_result = 0x1.b30fc65551133p+240 | 0x0p+0
+  v_result = 0x1.b30fc65551133p+240 | --
 insn wfsqdb:
   v_arg1   = 0x1.5410db1c5f403p+173 | 0x1.97fa6581e692fp+108
-  v_result = 0x1.a144f43a592c1p+86 | 0x0p+0
+  v_result = 0x1.a144f43a592c1p+86 | --
 insn wfsqdb:
   v_arg1   = -0x1.5838027725afep+6 | 0x1.ac61529c11f38p+565
-  v_result = nan | 0x0p+0
+  v_result = nan | --
 insn wfsqdb:
   v_arg1   = -0x1.159e341dcc06ep-439 | 0x1.ed54ce5481ba5p-574
-  v_result = nan | 0x0p+0
+  v_result = nan | --
 insn vfmadb:
   v_arg1   = -0x1.eb00a5c503d75p+538 | 0x1.89fae603ddc07p+767
   v_arg2   = -0x1.71c72712c3957p+715 | 0x1.1bd5773442feap+762
@@ -1278,22 +1278,22 @@ insn wfmadb:
   v_arg1   = 0x1.1cc5b10a14d54p+668 | -0x1.686407390f7d1p+616
   v_arg2   = -0x1.bf34549e73246p+676 | -0x1.dc5a34cc470f3p+595
   v_arg3   = -0x1.95e0fdcf13974p-811 | -0x1.79c7cc1a8ec83p-558
-  v_result = -0x1.fffffffffffffp+1023 | 0x0p+0
+  v_result = -0x1.fffffffffffffp+1023 | --
 insn wfmadb:
   v_arg1   = 0x1.138bc1a5d75f8p+713 | -0x1.e226ebba2fe54p+381
   v_arg2   = -0x1.081ebb7cc3414p-772 | 0x1.369d99e174fc3p+922
   v_arg3   = -0x1.0671c682a5d0cp-1016 | 0x1.03c9530dd0377p+378
-  v_result = -0x1.1c4933e117d95p-59 | 0x0p+0
+  v_result = -0x1.1c4933e117d95p-59 | --
 insn wfmadb:
   v_arg1   = -0x1.166f0b1fad67bp+64 | -0x1.e9ee8d32e1069p-452
   v_arg2   = -0x1.4a235bdd109e2p-65 | 0x1.bacaa96fc7e81p-403
   v_arg3   = -0x1.d2e19acf7c4bdp+99 | 0x1.f901130f685adp-963
-  v_result = -0x1.d2e19acf7c4bcp+99 | 0x0p+0
+  v_result = -0x1.d2e19acf7c4bcp+99 | --
 insn wfmadb:
   v_arg1   = -0x1.77d7bfec863d2p-988 | -0x1.b68029700c6b1p-206
   v_arg2   = -0x1.aca05ad00aec1p+737 | 0x1.ac746bd7e216bp+51
   v_arg3   = 0x1.17342292078b4p+188 | -0x1.49efaf9392301p+555
-  v_result = 0x1.17342292078b4p+188 | 0x0p+0
+  v_result = 0x1.17342292078b4p+188 | --
 insn vfmsdb:
   v_arg1   = -0x1.a1b218e84e61p+34 | 0x1.b220f0d144daep-111
   v_arg2   = 0x1.564fcc2527961p-265 | 0x1.ea85a4154721ep+733
@@ -1318,22 +1318,22 @@ insn wfmsdb:
   v_arg1   = -0x1.7499a639673a6p-100 | -0x1.2a0d737e6cb1cp-207
   v_arg2   = -0x1.01ad4670a7aa3p-911 | 0x1.f94385e1021e8p+317
   v_arg3   = 0x1.aa42b2bb17af9p+982 | 0x1.c550e471711p+786
-  v_result = -0x1.aa42b2bb17af8p+982 | 0x0p+0
+  v_result = -0x1.aa42b2bb17af8p+982 | --
 insn wfmsdb:
   v_arg1   = 0x1.76840f99b431ep+500 | -0x1.989a500c92c08p+594
   v_arg2   = 0x1.33c657cb8385cp-84 | -0x1.2c795ad92ce17p+807
   v_arg3   = -0x1.ee58a39f02d54p-351 | -0x1.18695ed9a280ap+48
-  v_result = 0x1.c242894a0068p+416 | 0x0p+0
+  v_result = 0x1.c242894a0068p+416 | --
 insn wfmsdb:
   v_arg1   = -0x1.16db07e054a65p-469 | -0x1.3a627ab99c6e4p+689
   v_arg2   = 0x1.17872eae826e5p-538 | 0x1.44ed513fb5873p-929
   v_arg3   = 0x1.5ca912008e077p-217 | -0x1.982a6f7359876p-23
-  v_result = -0x1.5ca912008e077p-217 | 0x0p+0
+  v_result = -0x1.5ca912008e077p-217 | --
 insn wfmsdb:
   v_arg1   = -0x1.d315f4a932c6p+122 | 0x1.616a04493e143p+513
   v_arg2   = -0x1.cf1cd3516f23fp+552 | 0x1.7121749c3932cp-750
   v_arg3   = 0x1.dc26d92304d7fp-192 | -0x1.1fc3cca9ec20ep+371
-  v_result = 0x1.a67ca6ba395bcp+675 | 0x0p+0
+  v_result = 0x1.a67ca6ba395bcp+675 | --
 insn wfcdb:
   v_arg1   = 0x1.302001b736011p-633 | -0x1.72d5300225c97p-468
   v_arg2   = -0x1.8c007c5aba108p-17 | -0x1.bb3f9ae136acdp+569
@@ -1383,19 +1383,19 @@ insn vfcedb:
   v_arg1   = 0x1.d8e5c9930c19dp+623 | -0x1.cf1facff4e194p-605
   v_arg2   = -0x1.ed6ba02646d0dp+441 | -0x1.2d677e710620bp+810
 insn wfcedb:
-  v_result = 0000000000000000 | 0000000000000000
+  v_result = 0000000000000000 | --
   v_arg1   = -0x1.a252009e1a12cp-442 | 0x1.4dc608268bb29p-513
   v_arg2   = -0x1.81020aa1a36e6p-687 | -0x1.300e64ce414f1p-899
 insn wfcedb:
-  v_result = 0000000000000000 | 0000000000000000
+  v_result = 0000000000000000 | --
   v_arg1   = 0x1.cec439a8d4781p-175 | -0x1.d20e3b281d599p+893
   v_arg2   = 0x1.ca17cf16cf0aap-879 | 0x1.61506f8596092p+545
 insn wfcedb:
-  v_result = 0000000000000000 | 0000000000000000
+  v_result = 0000000000000000 | --
   v_arg1   = 0x1.0659f5f24a004p+877 | 0x1.fc46867ed0338p-680
   v_arg2   = -0x1.1d6849587155ep-1010 | -0x1.f68171edc235fp+575
 insn wfcedb:
-  v_result = 0000000000000000 | 0000000000000000
+  v_result = 0000000000000000 | --
   v_arg1   = 0x1.dc88a0d46ad79p-816 | 0x1.245140dcaed79p+851
   v_arg2   = 0x1.b33e977c7b3ep-818 | -0x1.04319d7c69367p+787
 insn vfcedbs:
@@ -1419,22 +1419,22 @@ insn vfcedbs:
   v_arg2   = 0x1.ae2c06ea88ff4p+332 | -0x1.f668ce4f8ef9ap+821
   r_result = 0000000000000003
 insn wfcedbs:
-  v_result = 0000000000000000 | 0000000000000000
+  v_result = 0000000000000000 | --
   v_arg1   = 0x1.645261bf86b1fp-996 | 0x1.abd13c95397aap+992
   v_arg2   = -0x1.ba09e8fc66a8cp+113 | 0x1.75dbfe92c16c4p-786
   r_result = 0000000000000003
 insn wfcedbs:
-  v_result = 0000000000000000 | 0000000000000000
+  v_result = 0000000000000000 | --
   v_arg1   = -0x1.d02831d003e7dp+415 | -0x1.611a9dfd10f36p-80
   v_arg2   = -0x1.10bda62f4647p+723 | 0x1.cc47af6653378p-614
   r_result = 0000000000000003
 insn wfcedbs:
-  v_result = 0000000000000000 | 0000000000000000
+  v_result = 0000000000000000 | --
   v_arg1   = 0x1.f168f32f84178p-321 | -0x1.79a2a0b9549d1p-136
   v_arg2   = 0x1.41e19d1cfa692p+11 | -0x1.2a0ed6e7fd517p-453
   r_result = 0000000000000003
 insn wfcedbs:
-  v_result = 0000000000000000 | 0000000000000000
+  v_result = 0000000000000000 | --
   v_arg1   = -0x1.76a9144ee26c5p+188 | -0x1.386aaea2d9cddp-542
   v_arg2   = 0x1.810fcf222efc4p-999 | -0x1.ce90a9a43e2a1p+80
   r_result = 0000000000000003
@@ -1455,19 +1455,19 @@ insn vfchdb:
   v_arg1   = 0x1.82be31fb88a2dp+946 | -0x1.7ca9e9ff31953p-931
   v_arg2   = 0x1.fe75a1052beccp+490 | 0x1.179d18543d678p-255
 insn wfchdb:
-  v_result = ffffffffffffffff | 0000000000000000
+  v_result = ffffffffffffffff | --
   v_arg1   = 0x1.0af85d8d8d609p-464 | -0x1.9f639a686e0fep+203
   v_arg2   = -0x1.3142b77b55761p-673 | 0x1.ca9c474339da1p+472
 insn wfchdb:
-  v_result = ffffffffffffffff | 0000000000000000
+  v_result = ffffffffffffffff | --
   v_arg1   = -0x1.6cf16959a022bp+213 | 0x1.445606e4363e1p+942
   v_arg2   = -0x1.8c343201bbd2p+939 | -0x1.e5095ad0c37a4p-434
 insn wfchdb:
-  v_result = ffffffffffffffff | 0000000000000000
+  v_result = ffffffffffffffff | --
   v_arg1   = 0x1.36b4fc9cf5bdap-52 | -0x1.f1fd95cbcd533p+540
   v_arg2   = 0x1.5a2362891c9edp-175 | -0x1.e1f68c319e5d2p+58
 insn wfchdb:
-  v_result = ffffffffffffffff | 0000000000000000
+  v_result = ffffffffffffffff | --
   v_arg1   = 0x1.11c6489f544bbp+811 | 0x1.262a740ec3d47p+456
   v_arg2   = -0x1.d9394d354e989p-154 | 0x1.cc21b3094391ap-972
 insn vfchdbs:
@@ -1491,22 +1491,22 @@ insn vfchdbs:
   v_arg2   = 0x1.e426748435a76p+370 | 0x1.8702527d17783p-871
   r_result = 0000000000000003
 insn wfchdbs:
-  v_result = ffffffffffffffff | 0000000000000000
+  v_result = ffffffffffffffff | --
   v_arg1   = 0x1.6c51b9f6442c8p+639 | 0x1.1e6b37adff703p+702
   v_arg2   = 0x1.0cba9c1c75e43p+520 | -0x1.145d44ed90967p+346
   r_result = 0000000000000000
 insn wfchdbs:
-  v_result = ffffffffffffffff | 0000000000000000
+  v_result = ffffffffffffffff | --
   v_arg1   = 0x1.7b3dd643bf36bp+816 | -0x1.61ce7bfb9307ap-683
   v_arg2   = -0x1.f2c998dc15c9ap-776 | 0x1.e16397f2dcdf5p+571
   r_result = 0000000000000000
 insn wfchdbs:
-  v_result = ffffffffffffffff | 0000000000000000
+  v_result = ffffffffffffffff | --
   v_arg1   = 0x1.cc3be81884e0ap-865 | -0x1.8b353bd41064p+820
   v_arg2   = -0x1.2c1bafaafdd4ep-34 | -0x1.24666808ab16ep-435
   r_result = 0000000000000000
 insn wfchdbs:
-  v_result = ffffffffffffffff | 0000000000000000
+  v_result = ffffffffffffffff | --
   v_arg1   = 0x1.c3de33d3b673ap+554 | 0x1.d39ed71e53096p-798
   v_arg2   = -0x1.c1e8f7b3c001p-828 | 0x1.22e2cf797fabp-787
   r_result = 0000000000000000
@@ -1527,19 +1527,19 @@ insn vfchedb:
   v_arg1   = -0x1.6c5599e7ba923p+829 | -0x1.5d1a1191ed6eap-994
   v_arg2   = -0x1.555c8775bc4d2p-478 | -0x1.4aa6a2c82319cp+493
 insn wfchedb:
-  v_result = ffffffffffffffff | 0000000000000000
+  v_result = ffffffffffffffff | --
   v_arg1   = 0x1.ae6cad07b0f3ep-232 | -0x1.2ed61a43f3b99p-74
   v_arg2   = -0x1.226f7cddbde13p-902 | -0x1.790d1d6febbf8p+336
 insn wfchedb:
-  v_result = ffffffffffffffff | 0000000000000000
+  v_result = ffffffffffffffff | --
   v_arg1   = 0x1.20eb8eac3711dp-385 | 0x1.ef71d3312d7e1p+739
   v_arg2   = 0x1.7a3ba08c5a0bdp-823 | -0x1.a7845ccaa544dp-129
 insn wfchedb:
-  v_result = 0000000000000000 | 0000000000000000
+  v_result = 0000000000000000 | --
   v_arg1   = -0x1.97ebdbc057be8p+824 | 0x1.2b7798b063cd6p+237
   v_arg2   = 0x1.cdb87a6074294p-81 | -0x1.074c902b19bccp-416
 insn wfchedb:
-  v_result = 0000000000000000 | 0000000000000000
+  v_result = 0000000000000000 | --
   v_arg1   = -0x1.82deebf9ff023p+937 | 0x1.56c5adcf9d4abp-672
   v_arg2   = -0x1.311ce49bc9439p+561 | 0x1.c8e1c512d8544p+103
 insn vfchedbs:
@@ -1563,22 +1563,22 @@ insn vfchedbs:
   v_arg2   = -0x1.47f5dfc7a5bcp-569 | 0x1.5877ef33664a3p-758
   r_result = 0000000000000003
 insn wfchedbs:
-  v_result = 0000000000000000 | 0000000000000000
+  v_result = 0000000000000000 | --
   v_arg1   = -0x1.a7370ccfd9e49p+505 | 0x1.c6b2385850ca2p-591
   v_arg2   = 0x1.984f4fcd338b1p+675 | -0x1.feb996c821232p-39
   r_result = 0000000000000003
 insn wfchedbs:
-  v_result = ffffffffffffffff | 0000000000000000
+  v_result = ffffffffffffffff | --
   v_arg1   = 0x1.641878612dd2p+207 | 0x1.b35e3292db7f6p+567
   v_arg2   = -0x1.18a87f209e96bp+299 | -0x1.3d598f3612d8ap+1016
   r_result = 0000000000000000
 insn wfchedbs:
-  v_result = ffffffffffffffff | 0000000000000000
+  v_result = ffffffffffffffff | --
   v_arg1   = 0x1.cfc2cda244153p+404 | 0x1.d8b2b28e9d8d7p+276
   v_arg2   = 0x1.3517b8c7a59a1p-828 | 0x1.6096fab7003ccp-415
   r_result = 0000000000000000
 insn wfchedbs:
-  v_result = 0000000000000000 | 0000000000000000
+  v_result = 0000000000000000 | --
   v_arg1   = -0x1.54d656f033e56p-603 | -0x1.95ad0e2088967p+254
   v_arg2   = 0x1.4cb319db206e4p-614 | 0x1.b41cd9e3739b6p-862
   r_result = 0000000000000003