Mark Wielaard 52876a
commit 3cc0232c46a5905b4a6c2fbd302b58bf5f90b3d5
Mark Wielaard 52876a
Author: Carl Love <cel@us.ibm.com>
Mark Wielaard 52876a
Date:   Mon Jan 11 16:00:57 2021 -0600
Mark Wielaard 52876a
Mark Wielaard 52876a
    PPC64: ISA 3.1 VSX PCV Generate Operations
Mark Wielaard 52876a
    
Mark Wielaard 52876a
    xgenpcvbm VSX Vector Generate PCV from Byte Mask
Mark Wielaard 52876a
    xxgenpcvdmVSX Vector Generate PCV from Doubleword Mask
Mark Wielaard 52876a
    xxgenpcvhmVSX Vector Generate PCV from Halfword Mask
Mark Wielaard 52876a
    xxgenpcvwmVSX Vector Generate PCV from Word Mask
Mark Wielaard 52876a
Mark Wielaard 52876a
diff --git a/VEX/priv/guest_ppc_defs.h b/VEX/priv/guest_ppc_defs.h
Mark Wielaard 52876a
index deda4dfce..54ce923a9 100644
Mark Wielaard 52876a
--- a/VEX/priv/guest_ppc_defs.h
Mark Wielaard 52876a
+++ b/VEX/priv/guest_ppc_defs.h
Mark Wielaard 52876a
@@ -169,6 +169,23 @@ void write_ACC_entry (VexGuestPPC64State* gst, UInt offset, UInt acc,
Mark Wielaard 52876a
 void get_ACC_entry (VexGuestPPC64State* gst, UInt offset, UInt acc,
Mark Wielaard 52876a
                     UInt reg, UInt *result);
Mark Wielaard 52876a
 
Mark Wielaard 52876a
+extern void vector_gen_pvc_byte_mask_dirty_helper( VexGuestPPC64State* gst,
Mark Wielaard 52876a
+                                                   ULong src_hi,
Mark Wielaard 52876a
+                                                   ULong src_lo,
Mark Wielaard 52876a
+                                                   UInt rtn_val, UInt IMM );
Mark Wielaard 52876a
+extern void vector_gen_pvc_hword_mask_dirty_helper( VexGuestPPC64State* gst,
Mark Wielaard 52876a
+                                                    ULong src_hi,
Mark Wielaard 52876a
+                                                    ULong src_lo,
Mark Wielaard 52876a
+                                                    UInt rtn_val, UInt IMM );
Mark Wielaard 52876a
+extern void vector_gen_pvc_word_mask_dirty_helper( VexGuestPPC64State* gst,
Mark Wielaard 52876a
+                                                   ULong src_hi,
Mark Wielaard 52876a
+                                                   ULong src_lo,
Mark Wielaard 52876a
+                                                   UInt rtn_val, UInt IMM );
Mark Wielaard 52876a
+extern void vector_gen_pvc_dword_mask_dirty_helper( VexGuestPPC64State* gst,
Mark Wielaard 52876a
+                                                    ULong src_hi,
Mark Wielaard 52876a
+                                                    ULong src_lo,
Mark Wielaard 52876a
+                                                    UInt rtn_val, UInt IMM );
Mark Wielaard 52876a
+
Mark Wielaard 52876a
 /* 8-bit XO value from instruction description */
Mark Wielaard 52876a
 #define XVI4GER8       0b00100011
Mark Wielaard 52876a
 #define XVI4GER8PP     0b00100010
Mark Wielaard 52876a
diff --git a/VEX/priv/guest_ppc_helpers.c b/VEX/priv/guest_ppc_helpers.c
Mark Wielaard 52876a
index c24191ef3..75497abb9 100644
Mark Wielaard 52876a
--- a/VEX/priv/guest_ppc_helpers.c
Mark Wielaard 52876a
+++ b/VEX/priv/guest_ppc_helpers.c
Mark Wielaard 52876a
@@ -701,6 +701,738 @@ ULong vector_evaluate64_helper( ULong srcA, ULong srcB, ULong srcC,
Mark Wielaard 52876a
 #undef MAX_IMM_BITS
Mark Wielaard 52876a
 }
Mark Wielaard 52876a
 
Mark Wielaard 52876a
+/*--------------------------------------------------*/
Mark Wielaard 52876a
+/*---- VSX Vector Generate PCV from Mask helpers ---*/
Mark Wielaard 52876a
+/*--------------------------------------------------*/
Mark Wielaard 52876a
+static void write_VSX_entry (VexGuestPPC64State* gst, UInt reg_offset,
Mark Wielaard 52876a
+                             ULong *vsx_entry)
Mark Wielaard 52876a
+{
Mark Wielaard 52876a
+   U128* pU128_dst;
Mark Wielaard 52876a
+   pU128_dst = (U128*) (((UChar*) gst) + reg_offset);
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+   /* The U128 type is defined as an array of unsigned intetgers.  */
Mark Wielaard 52876a
+   /* Writing in LE order */
Mark Wielaard 52876a
+   (*pU128_dst)[0] = (UInt)(vsx_entry[1] & 0xFFFFFFFF);
Mark Wielaard 52876a
+   (*pU128_dst)[1] = (UInt)(vsx_entry[1] >> 32);
Mark Wielaard 52876a
+   (*pU128_dst)[2] = (UInt)(vsx_entry[0] & 0xFFFFFFFF);
Mark Wielaard 52876a
+   (*pU128_dst)[3] = (UInt)(vsx_entry[0] >> 32);
Mark Wielaard 52876a
+   return;
Mark Wielaard 52876a
+}
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+/* CALLED FROM GENERATED CODE */
Mark Wielaard 52876a
+void vector_gen_pvc_byte_mask_dirty_helper( VexGuestPPC64State* gst,
Mark Wielaard 52876a
+                                            ULong src_hi, ULong src_lo,
Mark Wielaard 52876a
+                                            UInt reg_offset, UInt imm ) {
Mark Wielaard 52876a
+   /* The function computes the 128-bit result then writes it directly
Mark Wielaard 52876a
+      into the guest state VSX register.  */
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+   UInt  i, shift_by, sel_shift_by, half_sel;
Mark Wielaard 52876a
+   ULong index, src, result[2];
Mark Wielaard 52876a
+   ULong j;
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+   result[0] = 0;
Mark Wielaard 52876a
+   result[1] = 0;
Mark Wielaard 52876a
+   j = 0;
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+   /* The algorithm in the ISA is written with IBM numbering zero on left and
Mark Wielaard 52876a
+      N-1 on right. The loop index is converted to "i" to match the algorithm
Mark Wielaard 52876a
+      for claritiy of matching the C code to the algorithm in the ISA.  */
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+   if (imm == 0b00) {    // big endian expansion
Mark Wielaard 52876a
+      for( index = 0; index < 16; index++) {
Mark Wielaard 52876a
+         i = 15 - index;
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+         shift_by = i*8;
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+         if ( i >= 8) {
Mark Wielaard 52876a
+            src = src_hi;
Mark Wielaard 52876a
+            shift_by = shift_by - 64;
Mark Wielaard 52876a
+            half_sel = 0;
Mark Wielaard 52876a
+         } else {
Mark Wielaard 52876a
+            src = src_lo;
Mark Wielaard 52876a
+            half_sel = 1;
Mark Wielaard 52876a
+         }
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+         sel_shift_by = shift_by + 7;
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+         if ( ((src >> sel_shift_by) & 0x1) == 1) {
Mark Wielaard 52876a
+               result[half_sel] |= j << shift_by;
Mark Wielaard 52876a
+            j++;
Mark Wielaard 52876a
+         } else {
Mark Wielaard 52876a
+            result[half_sel] |= (index + (unsigned long long)0x10) << shift_by;
Mark Wielaard 52876a
+         }
Mark Wielaard 52876a
+      }
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+   } else if (imm == 0b01) {    // big endian compression
Mark Wielaard 52876a
+      /* If IMM=0b00001, let pcv be the permute control vector required to
Mark Wielaard 52876a
+         enable a left-indexed permute (vperm or xxperm) to implement a
Mark Wielaard 52876a
+         compression of the sparse byte elements in a source vector specified
Mark Wielaard 52876a
+         by the byte-element mask in VSR[VRB+32] into the leftmost byte
Mark Wielaard 52876a
+         elements of a result vector.
Mark Wielaard 52876a
+      */
Mark Wielaard 52876a
+      for( index = 0; index < 16; index++) {
Mark Wielaard 52876a
+         i = 15 - index;
Mark Wielaard 52876a
+         shift_by = i*8;
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+         if ( i >= 8) {
Mark Wielaard 52876a
+            src = src_hi;
Mark Wielaard 52876a
+            shift_by = shift_by - 64;
Mark Wielaard 52876a
+            half_sel = 0;
Mark Wielaard 52876a
+         } else {
Mark Wielaard 52876a
+            src = src_lo;
Mark Wielaard 52876a
+            half_sel = 1;
Mark Wielaard 52876a
+         }
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+         sel_shift_by = shift_by + 7;
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+         if ( ((src >> sel_shift_by) & 0x1) == 1) {
Mark Wielaard 52876a
+            if (j >= 8)
Mark Wielaard 52876a
+               result[1] |= (index) << (15 - j)*8;
Mark Wielaard 52876a
+            else
Mark Wielaard 52876a
+               result[0] |= (index) << (7 - j)*8;
Mark Wielaard 52876a
+            j++;
Mark Wielaard 52876a
+         }
Mark Wielaard 52876a
+      }
Mark Wielaard 52876a
+      /* The algorithim says set to undefined, leave as 0
Mark Wielaard 52876a
+      for( index = 3 - j; index < 4; index++) {
Mark Wielaard 52876a
+         result |= (0 << (index*8));
Mark Wielaard 52876a
+      }
Mark Wielaard 52876a
+      */
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+   } else if (imm == 0b10) {   //little-endian expansion
Mark Wielaard 52876a
+      /* If IMM=0b00010, let pcv be the permute control vector required to
Mark Wielaard 52876a
+         enable a right-indexed permute (vpermr or xxpermr) to implement an
Mark Wielaard 52876a
+         expansion of the rightmost byte elements of a source vector into the
Mark Wielaard 52876a
+         byte elements of a result vector specified by the byte-element mask
Mark Wielaard 52876a
+         in VSR[VRB+32].  */
Mark Wielaard 52876a
+      for( index = 0; index < 16; index++) {
Mark Wielaard 52876a
+         i = index;
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+         shift_by = i*8;
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+         if ( i >= 8) {
Mark Wielaard 52876a
+            src = src_hi;
Mark Wielaard 52876a
+            shift_by = shift_by - 64;
Mark Wielaard 52876a
+            half_sel = 0;
Mark Wielaard 52876a
+         } else {
Mark Wielaard 52876a
+            src = src_lo;
Mark Wielaard 52876a
+            half_sel = 1;
Mark Wielaard 52876a
+         }
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+         sel_shift_by = shift_by + 7;
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+         /* mod shift amount by 8 since src is either the upper or lower
Mark Wielaard 52876a
+            64-bits.  */
Mark Wielaard 52876a
+         if ( ((src >> sel_shift_by) & 0x1) == 1) {
Mark Wielaard 52876a
+               result[half_sel] |= j << shift_by;
Mark Wielaard 52876a
+            j++;
Mark Wielaard 52876a
+         } else {
Mark Wielaard 52876a
+            result[half_sel] |= (index + (unsigned long long)0x10) << shift_by;
Mark Wielaard 52876a
+         }
Mark Wielaard 52876a
+      }
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+   } else if (imm == 0b11) {   //little-endian compression
Mark Wielaard 52876a
+      /* If IMM=0b00011, let pcv be the permute control vector required to
Mark Wielaard 52876a
+         enable a right-indexed permute (vpermr or xxpermr) to implement a
Mark Wielaard 52876a
+         compression of the sparse byte elements in a source vector specified
Mark Wielaard 52876a
+         by the byte-element mask in VSR[VRB+32] into the rightmost byte
Mark Wielaard 52876a
+         elements of a result vector.  */
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+      for( index = 0; index < 16; index++) {
Mark Wielaard 52876a
+         i = index;
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+         shift_by = i*8;
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+         if ( i >= 8) {
Mark Wielaard 52876a
+            src = src_hi;
Mark Wielaard 52876a
+            shift_by = shift_by - 64;
Mark Wielaard 52876a
+            half_sel = 0;
Mark Wielaard 52876a
+         } else {
Mark Wielaard 52876a
+            src = src_lo;
Mark Wielaard 52876a
+            half_sel = 1;
Mark Wielaard 52876a
+         }
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+         sel_shift_by = shift_by + 7;
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+         if ( ((src >> sel_shift_by) & 0x1) == 1) {
Mark Wielaard 52876a
+            if (j >= 8)
Mark Wielaard 52876a
+               result[0] |= (index) << (j-8)*8;
Mark Wielaard 52876a
+            else
Mark Wielaard 52876a
+               result[1] |= (index) << j*8;
Mark Wielaard 52876a
+            j++;
Mark Wielaard 52876a
+         }
Mark Wielaard 52876a
+      }
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+      /* The algorithim says set to undefined, leave as 0
Mark Wielaard 52876a
+      for( index = 3 - j; index < 4; index++) {
Mark Wielaard 52876a
+         result |= (0 << (index*8));
Mark Wielaard 52876a
+      }
Mark Wielaard 52876a
+      */
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+   } else {
Mark Wielaard 52876a
+      vex_printf("ERROR, vector_gen_pvc_byte_mask_dirty_helper, imm value %u not supported.\n",
Mark Wielaard 52876a
+                 imm);
Mark Wielaard 52876a
+      vassert(0);
Mark Wielaard 52876a
+   }
Mark Wielaard 52876a
+   write_VSX_entry( gst, reg_offset, result);
Mark Wielaard 52876a
+}
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+/* CALLED FROM GENERATED CODE */
Mark Wielaard 52876a
+void vector_gen_pvc_hword_mask_dirty_helper( VexGuestPPC64State* gst,
Mark Wielaard 52876a
+                                             ULong src_hi, ULong src_lo,
Mark Wielaard 52876a
+                                             UInt reg_offset,
Mark Wielaard 52876a
+                                             UInt imm ) {
Mark Wielaard 52876a
+   /* The function computes the 128-bit result then writes it directly
Mark Wielaard 52876a
+      into the guest state VSX register.  */
Mark Wielaard 52876a
+   UInt  i, shift_by, sel_shift_by, half_sel;
Mark Wielaard 52876a
+   ULong index, src, result[2];
Mark Wielaard 52876a
+   ULong j;
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+   result[0] = 0;
Mark Wielaard 52876a
+   result[1] = 0;
Mark Wielaard 52876a
+   j = 0;
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+   /* The algorithm in the ISA is written with IBM numbering zero on left and
Mark Wielaard 52876a
+      N-1 on right. The loop index is converted to "i" to match the algorithm
Mark Wielaard 52876a
+      for claritiy of matching the C code to the algorithm in the ISA.  */
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+   if (imm == 0b00) {    // big endian expansion
Mark Wielaard 52876a
+      /* If IMM=0b00000, let pcv be the permute control vector required to
Mark Wielaard 52876a
+         enable a left-indexed permute (vperm or xxperm) to implement an
Mark Wielaard 52876a
+         expansion of the leftmost halfword elements of a source vector into
Mark Wielaard 52876a
+         the halfword elements of a result vector specified by the halfword-
Mark Wielaard 52876a
+         element mask in VSR[VRB+32].
Mark Wielaard 52876a
+      */
Mark Wielaard 52876a
+      for( index = 0; index < 8; index++) {
Mark Wielaard 52876a
+         i = 7 - index;
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+         shift_by = i*16;
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+         if ( i >= 4) {
Mark Wielaard 52876a
+            src = src_hi;
Mark Wielaard 52876a
+            shift_by = shift_by - 64;
Mark Wielaard 52876a
+            half_sel = 0;
Mark Wielaard 52876a
+         } else {
Mark Wielaard 52876a
+            src = src_lo;
Mark Wielaard 52876a
+            half_sel = 1;
Mark Wielaard 52876a
+         }
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+         sel_shift_by = shift_by + 15;
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+         if ( ((src >> sel_shift_by) & 0x1) == 1) {
Mark Wielaard 52876a
+            // half-word i, byte 0
Mark Wielaard 52876a
+            result[half_sel] |= (2*j + 0x0) << (shift_by+8);
Mark Wielaard 52876a
+            // half-word i, byte 1
Mark Wielaard 52876a
+            result[half_sel] |= (2*j + 0x1) << shift_by;
Mark Wielaard 52876a
+            j++;
Mark Wielaard 52876a
+         } else {
Mark Wielaard 52876a
+            result[half_sel] |= (2*index + 0x10) << (shift_by+8);
Mark Wielaard 52876a
+            result[half_sel] |= (2*index + 0x11) << shift_by;
Mark Wielaard 52876a
+         }
Mark Wielaard 52876a
+      }
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+   } else if (imm == 0b01) {    // big endian expansion
Mark Wielaard 52876a
+      /* If IMM=0b00001,let pcv be the permute control vector required to
Mark Wielaard 52876a
+         enable a left-indexed permute (vperm or xxperm) to implement a
Mark Wielaard 52876a
+         compression of the sparse halfword elements in a source vector
Mark Wielaard 52876a
+         specified by the halfword-element mask in VSR[VRB+32] into the
Mark Wielaard 52876a
+         leftmost halfword elements of a result vector.
Mark Wielaard 52876a
+      */
Mark Wielaard 52876a
+      for( index = 0; index < 8; index++) {
Mark Wielaard 52876a
+         i = 7 - index;
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+         shift_by = i*16;
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+         if ( i >= 4) {
Mark Wielaard 52876a
+            src = src_hi;
Mark Wielaard 52876a
+            shift_by = shift_by - 64;
Mark Wielaard 52876a
+            half_sel = 0;
Mark Wielaard 52876a
+         } else {
Mark Wielaard 52876a
+            src = src_lo;
Mark Wielaard 52876a
+            half_sel = 1;
Mark Wielaard 52876a
+         }
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+         sel_shift_by = shift_by + 15;
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+         if ( ((src >> sel_shift_by) & 0x1) == 1) {
Mark Wielaard 52876a
+            if (j >= 4) {
Mark Wielaard 52876a
+               // half-word i, byte 0
Mark Wielaard 52876a
+               result[1] |= (2*index + 0x0) << ((7 - j)*16 + 8);
Mark Wielaard 52876a
+               // half-word i, byte 1
Mark Wielaard 52876a
+               result[1] |= (2*index + 0x1) << ((7 - j)*16);
Mark Wielaard 52876a
+            } else {
Mark Wielaard 52876a
+               // half-word i, byte 0
Mark Wielaard 52876a
+               result[0] |= (2*index + 0x0) << ((3 - j)*16 + 8);
Mark Wielaard 52876a
+               // half-word i, byte 1
Mark Wielaard 52876a
+               result[0] |= (2*index + 0x1) << ((3 - j)*16);
Mark Wielaard 52876a
+            }
Mark Wielaard 52876a
+            j++;
Mark Wielaard 52876a
+         }
Mark Wielaard 52876a
+      }
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+   } else if (imm == 0b10) {   //little-endian expansion
Mark Wielaard 52876a
+      /* If IMM=0b00010, let pcv be the permute control vector required to
Mark Wielaard 52876a
+         enable a right-indexed permute (vpermr or xxpermr) to implement an
Mark Wielaard 52876a
+         expansion of the rightmost halfword elements of a source vector into
Mark Wielaard 52876a
+         the halfword elements of a result vector specified by the halfword-
Mark Wielaard 52876a
+         element mask in VSR[VRB+32].
Mark Wielaard 52876a
+       */
Mark Wielaard 52876a
+      for( index = 0; index < 8; index++) {
Mark Wielaard 52876a
+         i = index;
Mark Wielaard 52876a
+         shift_by = i*16;
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+         if ( i >= 4) {
Mark Wielaard 52876a
+            src = src_hi;
Mark Wielaard 52876a
+            shift_by = shift_by - 64;
Mark Wielaard 52876a
+            half_sel = 0;
Mark Wielaard 52876a
+         } else {
Mark Wielaard 52876a
+            src = src_lo;
Mark Wielaard 52876a
+            half_sel = 1;
Mark Wielaard 52876a
+         }
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+         sel_shift_by = shift_by + 15;
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+         if ( ((src >> sel_shift_by) & 0x1) == 1) {
Mark Wielaard 52876a
+            // half-word i, byte 0
Mark Wielaard 52876a
+            result[half_sel] |= (2*j + 0x00) << shift_by;
Mark Wielaard 52876a
+            // half-word i, byte 1
Mark Wielaard 52876a
+            result[half_sel] |= (2*j + 0x01) << (shift_by+8);
Mark Wielaard 52876a
+            j++;
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+         } else {
Mark Wielaard 52876a
+            // half-word i, byte 0
Mark Wielaard 52876a
+            result[half_sel] |= (2*index + 0x10) << shift_by;
Mark Wielaard 52876a
+            // half-word i, byte 1
Mark Wielaard 52876a
+            result[half_sel] |= (2*index + 0x11) << (shift_by+8);
Mark Wielaard 52876a
+         }
Mark Wielaard 52876a
+      }
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+   } else if (imm == 0b11) {   //little-endian compression
Mark Wielaard 52876a
+      /* If IMM=0b00011, let pcv be the permute control vector required to
Mark Wielaard 52876a
+         enable a right-indexed permute (vpermr or xxpermr) to implement a
Mark Wielaard 52876a
+         compression of the sparse halfword elements in a source vector
Mark Wielaard 52876a
+         specified by the halfword-element mask in VSR[VRB+32] into the
Mark Wielaard 52876a
+         rightmost halfword elements of a result vector.  */
Mark Wielaard 52876a
+      for( index = 0; index < 8; index++) {
Mark Wielaard 52876a
+         i = index;
Mark Wielaard 52876a
+         shift_by = i*16;
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+         if ( i >= 4) {
Mark Wielaard 52876a
+            src = src_hi;
Mark Wielaard 52876a
+            shift_by = shift_by - 64;
Mark Wielaard 52876a
+            half_sel = 0;
Mark Wielaard 52876a
+         } else {
Mark Wielaard 52876a
+            src = src_lo;
Mark Wielaard 52876a
+            half_sel = 1;
Mark Wielaard 52876a
+         }
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+         sel_shift_by = shift_by + 15;
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+         if ( ((src >> sel_shift_by) & 0x1) == 1) {
Mark Wielaard 52876a
+            if (j >= 4) {
Mark Wielaard 52876a
+               // half-word j, byte 0
Mark Wielaard 52876a
+               result[0] |= (2*index + 0x0) << ((j-4)*16);
Mark Wielaard 52876a
+               // half-word j, byte 1
Mark Wielaard 52876a
+               result[0] |= (2*index + 0x1) << ((j-4)*16+8);
Mark Wielaard 52876a
+            } else {
Mark Wielaard 52876a
+               // half-word j, byte 0
Mark Wielaard 52876a
+               result[1] |= (2*index + 0x0) << (j*16);
Mark Wielaard 52876a
+               // half-word j, byte 1
Mark Wielaard 52876a
+               result[1] |= (2*index + 0x1) << ((j*16)+8);
Mark Wielaard 52876a
+            }
Mark Wielaard 52876a
+            j++;
Mark Wielaard 52876a
+         }
Mark Wielaard 52876a
+      }
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+   } else {
Mark Wielaard 52876a
+      vex_printf("ERROR, vector_gen_pvc_hword_dirty_mask_helper, imm value %u not supported.\n",
Mark Wielaard 52876a
+                 imm);
Mark Wielaard 52876a
+      vassert(0);
Mark Wielaard 52876a
+   }
Mark Wielaard 52876a
+   write_VSX_entry( gst, reg_offset, result);
Mark Wielaard 52876a
+}
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+/* CALLED FROM GENERATED CODE */
Mark Wielaard 52876a
+void vector_gen_pvc_word_mask_dirty_helper( VexGuestPPC64State* gst,
Mark Wielaard 52876a
+                                            ULong src_hi, ULong src_lo,
Mark Wielaard 52876a
+                                            UInt reg_offset, UInt imm ) {
Mark Wielaard 52876a
+   /* The function computes the 128-bit result then writes it directly
Mark Wielaard 52876a
+      into the guest state VSX register.  */
Mark Wielaard 52876a
+   UInt  i, shift_by, sel_shift_by, half_sel;
Mark Wielaard 52876a
+   ULong index, src, result[2];
Mark Wielaard 52876a
+   ULong j;
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+   result[0] = 0;
Mark Wielaard 52876a
+   result[1] = 0;
Mark Wielaard 52876a
+   j = 0;
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+   /* The algorithm in the ISA is written with IBM numbering zero on left and
Mark Wielaard 52876a
+      N-1 on right. The loop index is converted to "i" to match the algorithm
Mark Wielaard 52876a
+      for claritiy of matching the C code to the algorithm in the ISA.  */
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+   if (imm == 0b00) {    // big endian expansion
Mark Wielaard 52876a
+      /* If IMM=0b00000, let pcv be the permute control vector required to
Mark Wielaard 52876a
+         enable a left-indexed permute (vperm or xxperm) to implement an
Mark Wielaard 52876a
+         expansion of the leftmost word elements of a source vector into the
Mark Wielaard 52876a
+         word elements of a result vector specified by the word-element mask
Mark Wielaard 52876a
+         in VSR[VRB+32].
Mark Wielaard 52876a
+      */
Mark Wielaard 52876a
+      for( index = 0; index < 4; index++) {
Mark Wielaard 52876a
+         i = 3 - index;
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+         shift_by = i*32;
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+         if ( i >= 2) {
Mark Wielaard 52876a
+            src = src_hi;
Mark Wielaard 52876a
+            shift_by = shift_by - 64;
Mark Wielaard 52876a
+            half_sel = 0;
Mark Wielaard 52876a
+         } else {
Mark Wielaard 52876a
+            src = src_lo;
Mark Wielaard 52876a
+            half_sel = 1;
Mark Wielaard 52876a
+         }
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+         sel_shift_by = shift_by + 31;
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+         if ( ((src >> sel_shift_by) & 0x1) == 1) {
Mark Wielaard 52876a
+            result[half_sel] |= (4*j+0) << (shift_by+24);  // word i, byte 0
Mark Wielaard 52876a
+            result[half_sel] |= (4*j+1) << (shift_by+16);  // word i, byte 1
Mark Wielaard 52876a
+            result[half_sel] |= (4*j+2) << (shift_by+8);   // word i, byte 2
Mark Wielaard 52876a
+            result[half_sel] |= (4*j+3) << shift_by;       // word i, byte 3
Mark Wielaard 52876a
+            j++;
Mark Wielaard 52876a
+         } else {
Mark Wielaard 52876a
+            result[half_sel] |= (4*index + 0x10) << (shift_by+24);
Mark Wielaard 52876a
+            result[half_sel] |= (4*index + 0x11) << (shift_by+16);
Mark Wielaard 52876a
+            result[half_sel] |= (4*index + 0x12) << (shift_by+8);
Mark Wielaard 52876a
+            result[half_sel] |= (4*index + 0x13) << shift_by;
Mark Wielaard 52876a
+         }
Mark Wielaard 52876a
+      }
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+   } else if (imm == 0b01) {    // big endian compression
Mark Wielaard 52876a
+      /* If IMM=0b00001, let pcv be the permute control vector required to
Mark Wielaard 52876a
+         enable a left-indexed permute (vperm or xxperm) to implement a
Mark Wielaard 52876a
+         compression of the sparse word elements in a source vector specified
Mark Wielaard 52876a
+         by the word-element mask in VSR[VRB+32] into the leftmost word
Mark Wielaard 52876a
+         elements of a result vector.
Mark Wielaard 52876a
+      */
Mark Wielaard 52876a
+      for( index = 0; index < 4; index++) {
Mark Wielaard 52876a
+         i = 3 - index;
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+         shift_by = i*32;
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+         if ( i >= 2) {
Mark Wielaard 52876a
+            src = src_hi;
Mark Wielaard 52876a
+            shift_by = shift_by - 64;
Mark Wielaard 52876a
+            half_sel = 0;
Mark Wielaard 52876a
+         } else {
Mark Wielaard 52876a
+            src = src_lo;
Mark Wielaard 52876a
+            half_sel = 1;
Mark Wielaard 52876a
+         }
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+         sel_shift_by = shift_by + 31;
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+         if (((src >> sel_shift_by) & 0x1) == 1) {
Mark Wielaard 52876a
+            if (j >= 2) {
Mark Wielaard 52876a
+               // word j, byte 0
Mark Wielaard 52876a
+               result[1] |= (4*index+0) << ((3 - j)*32 + 24);
Mark Wielaard 52876a
+               // word j, byte 1
Mark Wielaard 52876a
+               result[1] |= (4*index+1) << ((3 - j)*32 + 16);
Mark Wielaard 52876a
+               // word j, byte 2
Mark Wielaard 52876a
+               result[1] |= (4*index+2) << ((3 - j)*32 + 8);
Mark Wielaard 52876a
+               // word j, byte 3
Mark Wielaard 52876a
+               result[1] |= (4*index+3) << ((3 - j)*32 + 0);
Mark Wielaard 52876a
+            } else {
Mark Wielaard 52876a
+               result[0] |= (4*index+0) << ((1 - j)*32 + 24);
Mark Wielaard 52876a
+               result[0] |= (4*index+1) << ((1 - j)*32 + 16);
Mark Wielaard 52876a
+               result[0] |= (4*index+2) << ((1 - j)*32 + 8);
Mark Wielaard 52876a
+               result[0] |= (4*index+3) << ((1 - j)*32 + 0);
Mark Wielaard 52876a
+            }
Mark Wielaard 52876a
+            j++;
Mark Wielaard 52876a
+         }
Mark Wielaard 52876a
+      }
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+   } else if (imm == 0b10) {   //little-endian expansion
Mark Wielaard 52876a
+      /* If IMM=0b00010, let pcv be the permute control vector required to
Mark Wielaard 52876a
+         enable a right-indexed permute (vpermr or xxpermr) to implement an
Mark Wielaard 52876a
+         expansion of the rightmost word elements of a source vector into the
Mark Wielaard 52876a
+         word elements of a result vector specified by the word-element mask
Mark Wielaard 52876a
+         in VSR[VRB+32].
Mark Wielaard 52876a
+       */
Mark Wielaard 52876a
+      for( index = 0; index < 4; index++) {
Mark Wielaard 52876a
+         i = index;
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+         shift_by = i*32;
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+         if ( i >= 2) {
Mark Wielaard 52876a
+            src = src_hi;
Mark Wielaard 52876a
+            shift_by = shift_by - 64;
Mark Wielaard 52876a
+            half_sel = 0;
Mark Wielaard 52876a
+         } else {
Mark Wielaard 52876a
+            src = src_lo;
Mark Wielaard 52876a
+            half_sel = 1;
Mark Wielaard 52876a
+         }
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+         sel_shift_by = shift_by + 31;
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+         if (((src >> sel_shift_by) & 0x1) == 1) {
Mark Wielaard 52876a
+            result[half_sel] |= (4*j+0) << (shift_by + 0);  // word j, byte 0
Mark Wielaard 52876a
+            result[half_sel] |= (4*j+1) << (shift_by + 8);  // word j, byte 1
Mark Wielaard 52876a
+            result[half_sel] |= (4*j+2) << (shift_by + 16); // word j, byte 2
Mark Wielaard 52876a
+            result[half_sel] |= (4*j+3) << (shift_by + 24); // word j, byte 3
Mark Wielaard 52876a
+            j++;
Mark Wielaard 52876a
+         } else {
Mark Wielaard 52876a
+            result[half_sel] |= (4*index + 0x10) << (shift_by + 0);
Mark Wielaard 52876a
+            result[half_sel] |= (4*index + 0x11) << (shift_by + 8);
Mark Wielaard 52876a
+            result[half_sel] |= (4*index + 0x12) << (shift_by + 16);
Mark Wielaard 52876a
+            result[half_sel] |= (4*index + 0x13) << (shift_by + 24);
Mark Wielaard 52876a
+         }
Mark Wielaard 52876a
+      }
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+   } else if (imm == 0b11) {   //little-endian compression
Mark Wielaard 52876a
+      /* If IMM=0b00011, let pcv be the permute control vector required to
Mark Wielaard 52876a
+         enable a right-indexed permute (vpermr or xxpermr) to implement a
Mark Wielaard 52876a
+         compression of the sparse word elements in a source vector specified
Mark Wielaard 52876a
+         by the word-element mask in VSR[VRB+32] into the rightmost word
Mark Wielaard 52876a
+         elements of a result vector.  */
Mark Wielaard 52876a
+      for( index = 0; index < 4; index++) {
Mark Wielaard 52876a
+         i =index;
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+         shift_by = i*32;
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+         if ( i >= 2) {
Mark Wielaard 52876a
+            src = src_hi;
Mark Wielaard 52876a
+            shift_by = shift_by - 64;
Mark Wielaard 52876a
+            half_sel = 0;
Mark Wielaard 52876a
+         } else {
Mark Wielaard 52876a
+            src = src_lo;
Mark Wielaard 52876a
+            half_sel = 1;
Mark Wielaard 52876a
+         }
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+         sel_shift_by = shift_by + 31;
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+         if (((src >> sel_shift_by) & 0x1) == 1) {
Mark Wielaard 52876a
+            if (j >= 2){
Mark Wielaard 52876a
+               // word j, byte 0
Mark Wielaard 52876a
+               result[0] |= (4*index + 0x0) << ((j-2)*32+0);
Mark Wielaard 52876a
+               // word j, byte 1
Mark Wielaard 52876a
+               result[0] |= (4*index + 0x1) << ((j-2)*32+8);
Mark Wielaard 52876a
+               // word j, byte 2
Mark Wielaard 52876a
+               result[0] |= (4*index + 0x2) << ((j-2)*32+16);
Mark Wielaard 52876a
+               // word j, byte 3
Mark Wielaard 52876a
+               result[0] |= (4*index + 0x3) << ((j-2)*32+24);
Mark Wielaard 52876a
+            } else {
Mark Wielaard 52876a
+               result[1] |= (4*index + 0x0) << (j*32+0);
Mark Wielaard 52876a
+               result[1] |= (4*index + 0x1) << (j*32+8);
Mark Wielaard 52876a
+               result[1] |= (4*index + 0x2) << (j*32+16);
Mark Wielaard 52876a
+               result[1] |= (4*index + 0x3) << (j*32+24);
Mark Wielaard 52876a
+            }
Mark Wielaard 52876a
+            j++;
Mark Wielaard 52876a
+         }
Mark Wielaard 52876a
+      }
Mark Wielaard 52876a
+   } else {
Mark Wielaard 52876a
+      vex_printf("ERROR, vector_gen_pvc_word_mask_dirty_helper, imm value %u not supported.\n",
Mark Wielaard 52876a
+                 imm);
Mark Wielaard 52876a
+      vassert(0);
Mark Wielaard 52876a
+   }
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+   write_VSX_entry( gst, reg_offset, result);
Mark Wielaard 52876a
+}
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+/* CALLED FROM GENERATED CODE */
Mark Wielaard 52876a
+void vector_gen_pvc_dword_mask_dirty_helper( VexGuestPPC64State* gst,
Mark Wielaard 52876a
+                                             ULong src_hi, ULong src_lo,
Mark Wielaard 52876a
+                                             UInt reg_offset, UInt imm ) {
Mark Wielaard 52876a
+   /* The function computes the 128-bit result then writes it directly
Mark Wielaard 52876a
+      into the guest state VSX register.  */
Mark Wielaard 52876a
+   UInt  sel_shift_by, half_sel;
Mark Wielaard 52876a
+   ULong index, src, result[2];
Mark Wielaard 52876a
+   ULong j, i;
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+   result[0] = 0;
Mark Wielaard 52876a
+   result[1] = 0;
Mark Wielaard 52876a
+   j = 0;
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+   /* The algorithm in the ISA is written with IBM numbering zero on left and
Mark Wielaard 52876a
+      N-1 on right. The loop index is converted to "i" to match the algorithm
Mark Wielaard 52876a
+      for claritiy of matching the C code to the algorithm in the ISA.  */
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+   if (imm == 0b00) {    // big endian expansion
Mark Wielaard 52876a
+      /* If IMM=0b00000, let pcv be the permute control vector required to
Mark Wielaard 52876a
+         enable a left-indexed permute (vperm or xxperm) to implement an
Mark Wielaard 52876a
+         expansion of the leftmost doubleword elements of a source vector into
Mark Wielaard 52876a
+         the doubleword elements of a result vector specified by the
Mark Wielaard 52876a
+         doubleword-element mask in VSR[VRB+32].
Mark Wielaard 52876a
+      */
Mark Wielaard 52876a
+      for( index = 0; index < 2; index++) {
Mark Wielaard 52876a
+         i = 1 - index;
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+         if ( i == 1) {
Mark Wielaard 52876a
+            src = src_hi;
Mark Wielaard 52876a
+            half_sel = 0;
Mark Wielaard 52876a
+         } else {
Mark Wielaard 52876a
+            src = src_lo;
Mark Wielaard 52876a
+            half_sel = 1;
Mark Wielaard 52876a
+         }
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+         sel_shift_by = 63;
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+         if ( ((src >> sel_shift_by) & 0x1) == 1) {
Mark Wielaard 52876a
+            result[half_sel] |= (8*j + 0x0) << 56; // dword i, byte 0
Mark Wielaard 52876a
+            result[half_sel] |= (8*j + 0x1) << 48; // dword i, byte 1
Mark Wielaard 52876a
+            result[half_sel] |= (8*j + 0x2) << 40; // dword i, byte 2
Mark Wielaard 52876a
+            result[half_sel] |= (8*j + 0x3) << 32; // dword i, byte 3
Mark Wielaard 52876a
+            result[half_sel] |= (8*j + 0x4) << 24; // dword i, byte 4
Mark Wielaard 52876a
+            result[half_sel] |= (8*j + 0x5) << 16; // dword i, byte 5
Mark Wielaard 52876a
+            result[half_sel] |= (8*j + 0x6) << 8;  // dword i, byte 6
Mark Wielaard 52876a
+            result[half_sel] |= (8*j + 0x7) << 0;  // dword i, byte 7
Mark Wielaard 52876a
+            j++;
Mark Wielaard 52876a
+         } else {
Mark Wielaard 52876a
+            result[half_sel] |= (8*index + 0x10) << 56;
Mark Wielaard 52876a
+            result[half_sel] |= (8*index + 0x11) << 48;
Mark Wielaard 52876a
+            result[half_sel] |= (8*index + 0x12) << 40;
Mark Wielaard 52876a
+            result[half_sel] |= (8*index + 0x13) << 32;
Mark Wielaard 52876a
+            result[half_sel] |= (8*index + 0x14) << 24;
Mark Wielaard 52876a
+            result[half_sel] |= (8*index + 0x15) << 16;
Mark Wielaard 52876a
+            result[half_sel] |= (8*index + 0x16) << 8;
Mark Wielaard 52876a
+            result[half_sel] |= (8*index + 0x17) << 0;
Mark Wielaard 52876a
+         }
Mark Wielaard 52876a
+      }
Mark Wielaard 52876a
+   } else if (imm == 0b01) {    // big endian compression
Mark Wielaard 52876a
+      /* If IMM=0b00001, let pcv be the the permute control vector required to
Mark Wielaard 52876a
+         enable a left-indexed permute (vperm or xxperm) to implement a
Mark Wielaard 52876a
+         compression of the sparse doubleword elements in a source vector
Mark Wielaard 52876a
+         specified by the doubleword-element mask in VSR[VRB+32] into the
Mark Wielaard 52876a
+         leftmost doubleword elements of a result vector.
Mark Wielaard 52876a
+      */
Mark Wielaard 52876a
+      for( index = 0; index < 2; index++) {
Mark Wielaard 52876a
+         i = 1 - index;
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+         if ( i == 1) {
Mark Wielaard 52876a
+            src = src_hi;
Mark Wielaard 52876a
+            half_sel = 0;
Mark Wielaard 52876a
+         } else {
Mark Wielaard 52876a
+            src = src_lo;
Mark Wielaard 52876a
+            half_sel = 1;
Mark Wielaard 52876a
+         }
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+         sel_shift_by = 63;
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+         if ( ((src >> sel_shift_by) & 0x1) == 1) {
Mark Wielaard 52876a
+            if (j == 1) {
Mark Wielaard 52876a
+               result[1] |= (8*index + 0x0) << 56;   // double-word j, byte 0
Mark Wielaard 52876a
+               result[1] |= (8*index + 0x1) << 48;   // double-word j, byte 1
Mark Wielaard 52876a
+               result[1] |= (8*index + 0x2) << 40;   // double-word j, byte 2
Mark Wielaard 52876a
+               result[1] |= (8*index + 0x3) << 32;   // double-word j, byte 3
Mark Wielaard 52876a
+               result[1] |= (8*index + 0x4) << 24;   // double-word j, byte 4
Mark Wielaard 52876a
+               result[1] |= (8*index + 0x5) << 16;   // double-word j, byte 5
Mark Wielaard 52876a
+               result[1] |= (8*index + 0x6) << 8;    // double-word j, byte 6
Mark Wielaard 52876a
+               result[1] |= (8*index + 0x7) << 0;    // double-word j, byte 7
Mark Wielaard 52876a
+            } else {
Mark Wielaard 52876a
+               result[0] |= (8*index + 0x0) << 56;   // double-word j, byte 0
Mark Wielaard 52876a
+               result[0] |= (8*index + 0x1) << 48;   // double-word j, byte 1
Mark Wielaard 52876a
+               result[0] |= (8*index + 0x2) << 40;   // double-word j, byte 2
Mark Wielaard 52876a
+               result[0] |= (8*index + 0x3) << 32;   // double-word j, byte 3
Mark Wielaard 52876a
+               result[0] |= (8*index + 0x4) << 24;   // double-word j, byte 4
Mark Wielaard 52876a
+               result[0] |= (8*index + 0x5) << 16;   // double-word j, byte 5
Mark Wielaard 52876a
+               result[0] |= (8*index + 0x6) << 8;    // double-word j, byte 6
Mark Wielaard 52876a
+               result[0] |= (8*index + 0x7) << 0;    // double-word j, byte 7
Mark Wielaard 52876a
+            }
Mark Wielaard 52876a
+            j++;
Mark Wielaard 52876a
+         }
Mark Wielaard 52876a
+      }
Mark Wielaard 52876a
+   } else if (imm == 0b10) {   //little-endian expansion
Mark Wielaard 52876a
+      /* If IMM=0b00010, let pcv be the permute control vector required to
Mark Wielaard 52876a
+         enable a right-indexed permute (vpermr or xxpermr) to implement an
Mark Wielaard 52876a
+         expansion of the rightmost doubleword elements of a source vector
Mark Wielaard 52876a
+         into the doubleword elements of a result vector specified by the
Mark Wielaard 52876a
+         doubleword-element mask in VSR[VRB+32].
Mark Wielaard 52876a
+       */
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+      for( index = 0; index < 2; index++) {
Mark Wielaard 52876a
+         i = index;
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+         if ( i == 1) {
Mark Wielaard 52876a
+            src = src_hi;
Mark Wielaard 52876a
+            half_sel = 0;
Mark Wielaard 52876a
+         } else {
Mark Wielaard 52876a
+            src = src_lo;
Mark Wielaard 52876a
+            half_sel = 1;
Mark Wielaard 52876a
+         }
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+         sel_shift_by = 63;
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+         if ( ((src >> sel_shift_by) & 0x1) == 1) {
Mark Wielaard 52876a
+            result[half_sel] |= (8*j+0) << 0;  // double-word i, byte 0
Mark Wielaard 52876a
+            result[half_sel] |= (8*j+1) << 8;  // double-word i, byte 1
Mark Wielaard 52876a
+            result[half_sel] |= (8*j+2) << 16; // double-word i, byte 2
Mark Wielaard 52876a
+            result[half_sel] |= (8*j+3) << 24; // double-word i, byte 3
Mark Wielaard 52876a
+            result[half_sel] |= (8*j+4) << 32; // double-word i, byte 4
Mark Wielaard 52876a
+            result[half_sel] |= (8*j+5) << 40; // double-word i, byte 5
Mark Wielaard 52876a
+            result[half_sel] |= (8*j+6) << 48; // double-word i, byte 6
Mark Wielaard 52876a
+            result[half_sel] |= (8*j+7) << 56; // double-word i, byte 7
Mark Wielaard 52876a
+            j++;
Mark Wielaard 52876a
+         } else {
Mark Wielaard 52876a
+            result[half_sel] |= (8*index + 0x10) << 0;
Mark Wielaard 52876a
+            result[half_sel] |= (8*index + 0x11) << 8;
Mark Wielaard 52876a
+            result[half_sel] |= (8*index + 0x12) << 16;
Mark Wielaard 52876a
+            result[half_sel] |= (8*index + 0x13) << 24;
Mark Wielaard 52876a
+            result[half_sel] |= (8*index + 0x14) << 32;
Mark Wielaard 52876a
+            result[half_sel] |= (8*index + 0x15) << 40;
Mark Wielaard 52876a
+            result[half_sel] |= (8*index + 0x16) << 48;
Mark Wielaard 52876a
+            result[half_sel] |= (8*index + 0x17) << 56;
Mark Wielaard 52876a
+         }
Mark Wielaard 52876a
+      }
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+   } else if (imm == 0b11) {   //little-endian compression
Mark Wielaard 52876a
+      /* If IMM=0b00011, let pcv be the permute control vector required to
Mark Wielaard 52876a
+         enable a right-indexed permute (vpermr or xxpermr) to implement a
Mark Wielaard 52876a
+         compression of the sparse doubleword elements in a source vector
Mark Wielaard 52876a
+         specified by the doubleword-element mask in VSR[VRB+32] into the
Mark Wielaard 52876a
+         rightmost doubleword elements of a result vector.  */
Mark Wielaard 52876a
+      for( index = 0; index < 2; index++) {
Mark Wielaard 52876a
+         i = index;
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+         if ( i == 1) {
Mark Wielaard 52876a
+            src = src_hi;
Mark Wielaard 52876a
+            half_sel = 0;
Mark Wielaard 52876a
+         } else {
Mark Wielaard 52876a
+            src = src_lo;
Mark Wielaard 52876a
+            half_sel = 1;
Mark Wielaard 52876a
+         }
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+         sel_shift_by = 63;
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+         if (((src >> sel_shift_by) & 0x1) == 1) {
Mark Wielaard 52876a
+            if (j == 1) {
Mark Wielaard 52876a
+               result[0] |= (8*index + 0x0) << 0;    // double-word j, byte 0
Mark Wielaard 52876a
+               result[0] |= (8*index + 0x1) << 8;    // double-word j, byte 1
Mark Wielaard 52876a
+               result[0] |= (8*index + 0x2) << 16;   // double-word j, byte 2
Mark Wielaard 52876a
+               result[0] |= (8*index + 0x3) << 24;   // double-word j, byte 3
Mark Wielaard 52876a
+               result[0] |= (8*index + 0x4) << 32;   // double-word j, byte 4
Mark Wielaard 52876a
+               result[0] |= (8*index + 0x5) << 40;   // double-word j, byte 5
Mark Wielaard 52876a
+               result[0] |= (8*index + 0x6) << 48;   // double-word j, byte 6
Mark Wielaard 52876a
+               result[0] |= (8*index + 0x7) << 56;   // double-word j, byte 7
Mark Wielaard 52876a
+            } else {
Mark Wielaard 52876a
+               result[1] |= (8*index + 0x0) << 0;
Mark Wielaard 52876a
+               result[1] |= (8*index + 0x1) << 8;
Mark Wielaard 52876a
+               result[1] |= (8*index + 0x2) << 16;
Mark Wielaard 52876a
+               result[1] |= (8*index + 0x3) << 24;
Mark Wielaard 52876a
+               result[1] |= (8*index + 0x4) << 32;
Mark Wielaard 52876a
+               result[1] |= (8*index + 0x5) << 40;
Mark Wielaard 52876a
+               result[1] |= (8*index + 0x6) << 48;
Mark Wielaard 52876a
+               result[1] |= (8*index + 0x7) << 56;
Mark Wielaard 52876a
+            }
Mark Wielaard 52876a
+            j++;
Mark Wielaard 52876a
+         }
Mark Wielaard 52876a
+      }
Mark Wielaard 52876a
+   } else {
Mark Wielaard 52876a
+      vex_printf("ERROR, vector_gen_pvc_dword_mask_helper, imm value %u not supported.\n",
Mark Wielaard 52876a
+                 imm);
Mark Wielaard 52876a
+      vassert(0);
Mark Wielaard 52876a
+   }
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+   write_VSX_entry( gst, reg_offset, result);
Mark Wielaard 52876a
+}
Mark Wielaard 52876a
 
Mark Wielaard 52876a
 /*------------------------------------------------*/
Mark Wielaard 52876a
 /*---- VSX Matrix signed integer GER functions ---*/
Mark Wielaard 52876a
diff --git a/VEX/priv/guest_ppc_toIR.c b/VEX/priv/guest_ppc_toIR.c
Mark Wielaard 52876a
index bcabf69dd..354be6b53 100644
Mark Wielaard 52876a
--- a/VEX/priv/guest_ppc_toIR.c
Mark Wielaard 52876a
+++ b/VEX/priv/guest_ppc_toIR.c
Mark Wielaard 52876a
@@ -3322,6 +3322,7 @@ static IRExpr * locate_vector_ele_eq ( IRTemp src, IRExpr *value,
Mark Wielaard 52876a
 #define DFORM_IMMASK  0xffffffff
Mark Wielaard 52876a
 #define DSFORM_IMMASK 0xfffffffc
Mark Wielaard 52876a
 #define DQFORM_IMMASK 0xfffffff0
Mark Wielaard 52876a
+#define DA8LSFORM_IMMASK 0x3fffffff   // Algebraic 8LS Dform
Mark Wielaard 52876a
 
Mark Wielaard 52876a
 #define ISA_3_1_PREFIX_CHECK if (prefix) {if (!allow_isa_3_1) goto decode_noIsa3_1;}
Mark Wielaard 52876a
 
Mark Wielaard 52876a
@@ -6109,6 +6110,87 @@ static void vsx_matrix_64bit_float_ger ( const VexAbiInfo* vbi,
Mark Wielaard 52876a
    stmt( IRStmt_Dirty(d) );
Mark Wielaard 52876a
 }
Mark Wielaard 52876a
 
Mark Wielaard 52876a
+static void vector_gen_pvc_mask ( const VexAbiInfo* vbi,
Mark Wielaard 52876a
+                                   IRExpr *src, UInt IMM,
Mark Wielaard 52876a
+                                   UInt opc2, UInt VSX_addr ) {
Mark Wielaard 52876a
+   /* The function takes a 64-bit source and an immediate value.  The function
Mark Wielaard 52876a
+      calls a helper to execute the xxgenpcvbm, xxgenpcvhm, xxgenpcvwm,
Mark Wielaard 52876a
+      xxgenpcvdm instruction.  The instructions are not practical to do with
Mark Wielaard 52876a
+      Iops.  The instruction is implemented with a dirty helper that
Mark Wielaard 52876a
+      calculates the 128-bit result and writes it directly into the guest
Mark Wielaard 52876a
+      state VSX register.
Mark Wielaard 52876a
+  */
Mark Wielaard 52876a
+   IRTemp src_hi = newTemp( Ity_I64);
Mark Wielaard 52876a
+   IRTemp src_lo = newTemp( Ity_I64);
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+   IRDirty* d;
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+   vassert( (VSX_addr >= 0) && (VSX_addr < 64) );
Mark Wielaard 52876a
+   UInt reg_offset = offsetofPPCGuestState( guest_VSR0 )
Mark Wielaard 52876a
+      + sizeof(U128) * VSX_addr;
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+   assign( src_hi, unop( Iop_V128HIto64, src ) );
Mark Wielaard 52876a
+   assign( src_lo, unop( Iop_V128to64, src ) );
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+   IRExpr** args = mkIRExprVec_5(
Mark Wielaard 52876a
+      IRExpr_GSPTR(),
Mark Wielaard 52876a
+      mkexpr( src_hi ),
Mark Wielaard 52876a
+      mkexpr( src_lo ),
Mark Wielaard 52876a
+      mkU32( reg_offset ),
Mark Wielaard 52876a
+      mkU64( IMM ) );
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+   switch( opc2 ) {
Mark Wielaard 52876a
+   case 0x394: // xxgenpcvbm
Mark Wielaard 52876a
+      d = unsafeIRDirty_0_N (
Mark Wielaard 52876a
+         0 /*regparms*/,
Mark Wielaard 52876a
+         "vector_gen_pvc_byte_mask_dirty_helper",
Mark Wielaard 52876a
+         fnptr_to_fnentry( vbi,
Mark Wielaard 52876a
+                           &vector_gen_pvc_byte_mask_dirty_helper ),
Mark Wielaard 52876a
+         args);
Mark Wielaard 52876a
+      break;
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+   case 0x395: // xxgenpcvhm
Mark Wielaard 52876a
+      d = unsafeIRDirty_0_N (
Mark Wielaard 52876a
+         0 /*regparms*/,
Mark Wielaard 52876a
+         "vector_gen_pvc_hword_mask_dirty_helper",
Mark Wielaard 52876a
+         fnptr_to_fnentry( vbi,
Mark Wielaard 52876a
+                           &vector_gen_pvc_hword_mask_dirty_helper ),
Mark Wielaard 52876a
+         args);
Mark Wielaard 52876a
+      break;
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+   case 0x3B4: // xxgenpcvwm
Mark Wielaard 52876a
+      d = unsafeIRDirty_0_N (
Mark Wielaard 52876a
+         0 /*regparms*/,
Mark Wielaard 52876a
+         "vector_gen_pvc_word_mask_dirty_helper",
Mark Wielaard 52876a
+         fnptr_to_fnentry( vbi,
Mark Wielaard 52876a
+                           &vector_gen_pvc_word_mask_dirty_helper ),
Mark Wielaard 52876a
+         args);
Mark Wielaard 52876a
+      break;
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+   case 0x3B5: // xxgenpcvdm
Mark Wielaard 52876a
+      d = unsafeIRDirty_0_N (
Mark Wielaard 52876a
+         0 /*regparms*/,
Mark Wielaard 52876a
+         "vector_gen_pvc_dword_mask_dirty_helper",
Mark Wielaard 52876a
+         fnptr_to_fnentry( vbi,
Mark Wielaard 52876a
+                           &vector_gen_pvc_dword_mask_dirty_helper ),
Mark Wielaard 52876a
+         args);
Mark Wielaard 52876a
+      break;
Mark Wielaard 52876a
+   default:
Mark Wielaard 52876a
+      vex_printf("ERROR: Unkown instruction = %u in vector_gen_pvc_mask()\n",
Mark Wielaard 52876a
+                 opc2);
Mark Wielaard 52876a
+      return;
Mark Wielaard 52876a
+   }
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+   d->nFxState = 1;
Mark Wielaard 52876a
+   vex_bzero(&d->fxState, sizeof(d->fxState));
Mark Wielaard 52876a
+   d->fxState[0].fx     = Ifx_Modify;
Mark Wielaard 52876a
+   d->fxState[0].size   = sizeof(U128);
Mark Wielaard 52876a
+   d->fxState[0].offset = reg_offset;
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+   /* execute the dirty call, side-effecting guest state */
Mark Wielaard 52876a
+   stmt( IRStmt_Dirty(d) );
Mark Wielaard 52876a
+}
Mark Wielaard 52876a
+
Mark Wielaard 52876a
 static IRExpr * UNSIGNED_CMP_GT_V128 ( IRExpr *vA, IRExpr *vB ) {
Mark Wielaard 52876a
    /* This function does an unsigned compare of two V128 values. The
Mark Wielaard 52876a
     * function is for use in 32-bit mode only as it is expensive.  The
Mark Wielaard 52876a
@@ -35227,6 +35309,54 @@ static Bool dis_vsx_accumulator_prefix ( UInt prefix, UInt theInstr,
Mark Wielaard 52876a
    return True;
Mark Wielaard 52876a
 }
Mark Wielaard 52876a
 
Mark Wielaard 52876a
+static Bool dis_vector_generate_pvc_from_mask ( UInt prefix,
Mark Wielaard 52876a
+                                                UInt theInstr,
Mark Wielaard 52876a
+                                                const VexAbiInfo* vbi )
Mark Wielaard 52876a
+{
Mark Wielaard 52876a
+   UChar XT_addr = ifieldRegXT(theInstr);
Mark Wielaard 52876a
+   UChar vB_addr = ifieldRegB(theInstr);
Mark Wielaard 52876a
+   IRTemp vB = newTemp( Ity_V128 );
Mark Wielaard 52876a
+   UInt opc2 = ifieldOPClo10(theInstr);
Mark Wielaard 52876a
+   UInt IMM = IFIELD(theInstr, (31-15), 5);    // bits[11:15]
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+   assign( vB, getVReg( vB_addr ) );
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+   switch( opc2 ) {
Mark Wielaard 52876a
+   case 0x394:
Mark Wielaard 52876a
+      DIP("xxgenpcvbm v%u,v%u,%u\n", XT_addr, vB_addr, IMM);
Mark Wielaard 52876a
+      /* vector_gen_pvc_mask uses a dirty helper to calculate the result and
Mark Wielaard 52876a
+         write it to the VSX result register.  */
Mark Wielaard 52876a
+      vector_gen_pvc_mask( vbi, mkexpr( vB ), IMM, opc2, XT_addr );
Mark Wielaard 52876a
+      break;
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+   case 0x395:
Mark Wielaard 52876a
+      DIP("xxgenpcvhm v%u,v%u,%u\n", XT_addr, vB_addr, IMM);
Mark Wielaard 52876a
+      /* vector_gen_pvc_mask uses a dirty helper to calculate the result and
Mark Wielaard 52876a
+         write it to the VSX result register.  */
Mark Wielaard 52876a
+      vector_gen_pvc_mask( vbi, mkexpr( vB ), IMM, opc2, XT_addr );
Mark Wielaard 52876a
+      break;
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+   case 0x3B4:
Mark Wielaard 52876a
+      DIP("xxgenpcvwm v%u,v%u,%u\n", XT_addr, vB_addr, IMM);
Mark Wielaard 52876a
+      /* vector_gen_pvc_mask uses a dirty helper to calculate the result and
Mark Wielaard 52876a
+         write it to the VSX result register.  */
Mark Wielaard 52876a
+      vector_gen_pvc_mask( vbi, mkexpr( vB ), IMM, opc2, XT_addr );
Mark Wielaard 52876a
+      break;
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+   case 0x3B5:
Mark Wielaard 52876a
+      DIP("xxgenpcvdm v%u,v%u,%u\n", XT_addr, vB_addr, IMM);
Mark Wielaard 52876a
+      /* vector_gen_pvc_mask uses a dirty helper to calculate the result and
Mark Wielaard 52876a
+         write it to the VSX result register.  */
Mark Wielaard 52876a
+      vector_gen_pvc_mask( vbi, mkexpr( vB ), IMM, opc2, XT_addr );
Mark Wielaard 52876a
+      break;
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+   default:
Mark Wielaard 52876a
+      return False;
Mark Wielaard 52876a
+   }
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+   return True;
Mark Wielaard 52876a
+}
Mark Wielaard 52876a
+
Mark Wielaard 52876a
 static Int dis_nop_prefix ( UInt prefix, UInt theInstr )
Mark Wielaard 52876a
 {
Mark Wielaard 52876a
    Bool is_prefix   = prefix_instruction( prefix );
Mark Wielaard 52876a
@@ -35748,14 +35878,9 @@ DisResult disInstr_PPC_WRK (
Mark Wielaard 52876a
       }
Mark Wielaard 52876a
       goto decode_failure;
Mark Wielaard 52876a
 
Mark Wielaard 52876a
-   case 0x31:   // lfsu, stxv
Mark Wielaard 52876a
+   case 0x31:   // lfsu
Mark Wielaard 52876a
       if (!allow_F) goto decode_noF;
Mark Wielaard 52876a
-      if (prefix_instruction( prefix )) {  // stxv
Mark Wielaard 52876a
-         if ( !(allow_isa_3_1) ) goto decode_noIsa3_1;
Mark Wielaard 52876a
-         if (dis_fp_pair_prefix( prefix, theInstr )) goto decode_success;
Mark Wielaard 52876a
-      } else {  // lfsu
Mark Wielaard 52876a
-         if (dis_fp_load( prefix, theInstr )) goto decode_success;
Mark Wielaard 52876a
-      }
Mark Wielaard 52876a
+      if (dis_fp_load( prefix, theInstr )) goto decode_success;
Mark Wielaard 52876a
       goto decode_failure;
Mark Wielaard 52876a
 
Mark Wielaard 52876a
    case 0x32:
Mark Wielaard 52876a
@@ -35842,7 +35967,6 @@ DisResult disInstr_PPC_WRK (
Mark Wielaard 52876a
    case 0x39:  // pld, lxsd, lxssp, lfdp
Mark Wielaard 52876a
       {
Mark Wielaard 52876a
          UInt opc2tmp = ifieldOPC0o2(theInstr);
Mark Wielaard 52876a
-
Mark Wielaard 52876a
          if (!allow_F) goto decode_noF;
Mark Wielaard 52876a
          if (prefix_instruction( prefix )) {   // pld
Mark Wielaard 52876a
             if ( !(allow_isa_3_1) ) goto decode_noIsa3_1;
Mark Wielaard 52876a
@@ -36125,12 +36249,6 @@ DisResult disInstr_PPC_WRK (
Mark Wielaard 52876a
             goto decode_failure;
Mark Wielaard 52876a
       }
Mark Wielaard 52876a
 
Mark Wielaard 52876a
-      /* The vsxOpc2 returned is the "normalized" value, representing the
Mark Wielaard 52876a
-       * instructions secondary opcode as taken from the standard secondary
Mark Wielaard 52876a
-       * opcode field [21:30] (IBM notatition), even if the actual field
Mark Wielaard 52876a
-       * is non-standard.  These normalized values are given in the opcode
Mark Wielaard 52876a
-       * appendices of the ISA 2.06 document.
Mark Wielaard 52876a
-       */
Mark Wielaard 52876a
       if ( ( opc2 == 0x168 ) && ( IFIELD( theInstr, 19, 2 ) == 0 ) )// xxspltib
Mark Wielaard 52876a
       {
Mark Wielaard 52876a
          /* This is a special case of the XX1 form where the  RA, RB
Mark Wielaard 52876a
@@ -36153,6 +36271,23 @@ DisResult disInstr_PPC_WRK (
Mark Wielaard 52876a
          goto decode_failure;
Mark Wielaard 52876a
       }
Mark Wielaard 52876a
 
Mark Wielaard 52876a
+      if ( ( opc2 == 0x394 ) ||         // xxgenpcvbm
Mark Wielaard 52876a
+           ( opc2 == 0x395 ) ||         // xxgenpcvwm
Mark Wielaard 52876a
+           ( opc2 == 0x3B4 ) ||         // xxgenpcvhm
Mark Wielaard 52876a
+           ( opc2 == 0x3B5 ) ) {        // xxgenpcvdm
Mark Wielaard 52876a
+         if ( !(allow_isa_3_1) ) goto decode_noIsa3_1;
Mark Wielaard 52876a
+         if (dis_vector_generate_pvc_from_mask( prefix, theInstr,
Mark Wielaard 52876a
+                                                abiinfo ))
Mark Wielaard 52876a
+            goto decode_success;
Mark Wielaard 52876a
+         goto decode_failure;
Mark Wielaard 52876a
+      }
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+      /* The vsxOpc2 returned is the "normalized" value, representing the
Mark Wielaard 52876a
+       * instructions secondary opcode as taken from the standard secondary
Mark Wielaard 52876a
+       * opcode field [21:30] (IBM notatition), even if the actual field
Mark Wielaard 52876a
+       * is non-standard.  These normalized values are given in the opcode
Mark Wielaard 52876a
+       * appendices of the ISA 2.06 document.
Mark Wielaard 52876a
+       */
Mark Wielaard 52876a
       vsxOpc2 = get_VSX60_opc2(opc2, theInstr);
Mark Wielaard 52876a
 
Mark Wielaard 52876a
       switch (vsxOpc2) {
Mark Wielaard 52876a
commit 078f89e99b6f62e043f6138c6a7ae238befc1f2a
Mark Wielaard 52876a
Author: Carl Love <cel@us.ibm.com>
Mark Wielaard 52876a
Date:   Fri Feb 26 15:46:55 2021 -0600
Mark Wielaard 52876a
Mark Wielaard 52876a
    PPC64: Reduced-Precision - bfloat16 Outer Product & Format Conversion Operations
Mark Wielaard 52876a
    
Mark Wielaard 52876a
    Add support for:
Mark Wielaard 52876a
    
Mark Wielaard 52876a
    pmxvbf16ger2 Prefixed Masked VSX Vector bfloat16 GER (Rank-2 Update)
Mark Wielaard 52876a
    pmxvbf16ger2pp Prefixed Masked VSX Vector bfloat16 GER (Rank-2 Update) Positive
Mark Wielaard 52876a
      multiply, Positive accumulate
Mark Wielaard 52876a
    pmxvbf16ger2pn Prefixed Masked VSX Vector bfloat16 GER (Rank-2 Update) Positive
Mark Wielaard 52876a
      multiply, Negative accumulate
Mark Wielaard 52876a
    pmxvbf16ger2np Prefixed Masked VSX Vector bfloat16 GER (Rank-2 Update) Negative
Mark Wielaard 52876a
      multiply, Positive accumulate
Mark Wielaard 52876a
    pmxvbf16ger2nn Prefixed Masked VSX Vector bfloat16 GER (Rank-2 Update) Negative
Mark Wielaard 52876a
      multiply, Negative accumulate
Mark Wielaard 52876a
    xvbf16ger2VSX Vector bfloat16 GER (Rank-2 Update)
Mark Wielaard 52876a
    xvbf16ger2pp VSX Vector bfloat16 GER (Rank-2 Update) Positive multiply, Positive
Mark Wielaard 52876a
      accumulate
Mark Wielaard 52876a
    xvbf16ger2pn VSX Vector bfloat16 GER (Rank-2 Update) Positive multiply, Negative
Mark Wielaard 52876a
      accumulate
Mark Wielaard 52876a
    xvbf16ger2np VSX Vector bfloat16 GER (Rank-2 Update) Negative multiply, Positive
Mark Wielaard 52876a
      accumulate
Mark Wielaard 52876a
    xvbf16ger2nn VSX Vector bfloat16 GER (Rank-2 Update) Negative multiply, Negative
Mark Wielaard 52876a
      accumulate
Mark Wielaard 52876a
    xvcvbf16sp VSX Vector Convert bfloat16 to Single-Precision format
Mark Wielaard 52876a
    xvcvspbf16 VSX Vector Convert with round Single-Precision to bfloat16 format
Mark Wielaard 52876a
Mark Wielaard 52876a
diff --git a/VEX/priv/guest_ppc_defs.h b/VEX/priv/guest_ppc_defs.h
Mark Wielaard 52876a
index 54ce923a9..d36d6c07d 100644
Mark Wielaard 52876a
--- a/VEX/priv/guest_ppc_defs.h
Mark Wielaard 52876a
+++ b/VEX/priv/guest_ppc_defs.h
Mark Wielaard 52876a
@@ -150,6 +150,8 @@ extern ULong convert_to_zoned_helper( ULong src_hi, ULong src_low,
Mark Wielaard 52876a
                                       ULong return_upper );
Mark Wielaard 52876a
 extern ULong convert_to_national_helper( ULong src, ULong return_upper );
Mark Wielaard 52876a
 extern ULong convert_from_zoned_helper( ULong src_hi, ULong src_low );
Mark Wielaard 52876a
+extern ULong convert_from_floattobf16_helper( ULong src );
Mark Wielaard 52876a
+extern ULong convert_from_bf16tofloat_helper( ULong src );
Mark Wielaard 52876a
 extern ULong convert_from_national_helper( ULong src_hi, ULong src_low );
Mark Wielaard 52876a
 extern ULong generate_C_FPCC_helper( ULong size, ULong src_hi, ULong src );
Mark Wielaard 52876a
 extern ULong extract_bits_under_mask_helper( ULong src, ULong mask,
Mark Wielaard 52876a
@@ -201,6 +203,11 @@ extern void vector_gen_pvc_dword_mask_dirty_helper( VexGuestPPC64State* gst,
Mark Wielaard 52876a
 #define XVF16GER2PN    0b10010010
Mark Wielaard 52876a
 #define XVF16GER2NP    0b01010010
Mark Wielaard 52876a
 #define XVF16GER2NN    0b11010010
Mark Wielaard 52876a
+#define XVBF16GER2     0b00110011
Mark Wielaard 52876a
+#define XVBF16GER2PP   0b00110010
Mark Wielaard 52876a
+#define XVBF16GER2PN   0b10110010
Mark Wielaard 52876a
+#define XVBF16GER2NP   0b01110010
Mark Wielaard 52876a
+#define XVBF16GER2NN   0b11110010
Mark Wielaard 52876a
 #define XVF32GER       0b00011011
Mark Wielaard 52876a
 #define XVF32GERPP     0b00011010
Mark Wielaard 52876a
 #define XVF32GERPN     0b10011010
Mark Wielaard 52876a
diff --git a/VEX/priv/guest_ppc_helpers.c b/VEX/priv/guest_ppc_helpers.c
Mark Wielaard 52876a
index 75497abb9..6bcee966d 100644
Mark Wielaard 52876a
--- a/VEX/priv/guest_ppc_helpers.c
Mark Wielaard 52876a
+++ b/VEX/priv/guest_ppc_helpers.c
Mark Wielaard 52876a
@@ -1905,6 +1905,125 @@ static Double conv_f16_to_double( ULong input )
Mark Wielaard 52876a
 #  endif
Mark Wielaard 52876a
 }
Mark Wielaard 52876a
 
Mark Wielaard 52876a
+#define BF16_SIGN_MASK   0x8000
Mark Wielaard 52876a
+#define BF16_EXP_MASK    0x7F80
Mark Wielaard 52876a
+#define BF16_FRAC_MASK   0x007F
Mark Wielaard 52876a
+#define BF16_BIAS        127
Mark Wielaard 52876a
+#define BF16_MAX_UNBIASED_EXP 127
Mark Wielaard 52876a
+#define BF16_MIN_UNBIASED_EXP -126
Mark Wielaard 52876a
+#define FLOAT_SIGN_MASK  0x80000000
Mark Wielaard 52876a
+#define FLOAT_EXP_MASK   0x7F800000
Mark Wielaard 52876a
+#define FLOAT_FRAC_MASK  0x007FFFFF
Mark Wielaard 52876a
+#define FLOAT_FRAC_BIT8  0x00008000
Mark Wielaard 52876a
+#define FLOAT_BIAS       127
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+static Float conv_bf16_to_float( UInt input )
Mark Wielaard 52876a
+{
Mark Wielaard 52876a
+  /* input is 16-bit bfloat.
Mark Wielaard 52876a
+     bias +127, exponent 8-bits, fraction 7-bits
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+     output is 32-bit float.
Mark Wielaard 52876a
+     bias +127, exponent 8-bits, fraction 22-bits
Mark Wielaard 52876a
+  */
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+  UInt input_exp, input_fraction, unbiased_exp;
Mark Wielaard 52876a
+  UInt output_exp, output_fraction;
Mark Wielaard 52876a
+  UInt sign;
Mark Wielaard 52876a
+  union convert_t conv;
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+  sign = (UInt)(input & BF16_SIGN_MASK);
Mark Wielaard 52876a
+  input_exp = input & BF16_EXP_MASK;
Mark Wielaard 52876a
+  unbiased_exp = (input_exp >> 7) - (UInt)BF16_BIAS;
Mark Wielaard 52876a
+  input_fraction = input & BF16_FRAC_MASK;
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+  if (((input_exp & BF16_EXP_MASK) == BF16_EXP_MASK) &&
Mark Wielaard 52876a
+      (input_fraction != 0)) {
Mark Wielaard 52876a
+     /* input is NaN or SNaN, exp all 1's, fraction != 0 */
Mark Wielaard 52876a
+     output_exp = FLOAT_EXP_MASK;
Mark Wielaard 52876a
+     output_fraction = input_fraction;
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+  } else if(((input_exp & BF16_EXP_MASK) == BF16_EXP_MASK) &&
Mark Wielaard 52876a
+      ( input_fraction == 0)) {
Mark Wielaard 52876a
+     /* input is infinity,  exp all 1's, fraction = 0  */
Mark Wielaard 52876a
+     output_exp = FLOAT_EXP_MASK;
Mark Wielaard 52876a
+     output_fraction = 0;
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+  } else if((input_exp == 0) && (input_fraction == 0)) {
Mark Wielaard 52876a
+     /* input is zero */
Mark Wielaard 52876a
+     output_exp = 0;
Mark Wielaard 52876a
+     output_fraction = 0;
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+  } else if((input_exp == 0) && (input_fraction != 0)) {
Mark Wielaard 52876a
+     /* input is denormal */
Mark Wielaard 52876a
+     output_fraction = input_fraction;
Mark Wielaard 52876a
+     output_exp = (-(Int)BF16_BIAS + (Int)FLOAT_BIAS ) << 23;
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+  } else {
Mark Wielaard 52876a
+     /* result is normal */
Mark Wielaard 52876a
+     output_exp = (unbiased_exp + FLOAT_BIAS) << 23;
Mark Wielaard 52876a
+     output_fraction = input_fraction;
Mark Wielaard 52876a
+  }
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+  conv.u32 = sign << (31 - 15) | output_exp | (output_fraction << (23-7));
Mark Wielaard 52876a
+  return conv.f;
Mark Wielaard 52876a
+}
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+static UInt conv_float_to_bf16( UInt input )
Mark Wielaard 52876a
+{
Mark Wielaard 52876a
+   /* input is 32-bit float stored as unsigned 32-bit.
Mark Wielaard 52876a
+      bias +127, exponent 8-bits, fraction 23-bits
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+      output is 16-bit bfloat.
Mark Wielaard 52876a
+      bias +127, exponent 8-bits, fraction 7-bits
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+      If the unbiased exponent of the input is greater than the max floating
Mark Wielaard 52876a
+      point unbiased exponent value, the result of the floating point 16-bit
Mark Wielaard 52876a
+      value is infinity.
Mark Wielaard 52876a
+   */
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+   UInt input_exp, input_fraction;
Mark Wielaard 52876a
+   UInt output_exp, output_fraction;
Mark Wielaard 52876a
+   UInt result, sign;
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+   sign = input & FLOAT_SIGN_MASK;
Mark Wielaard 52876a
+   input_exp = input & FLOAT_EXP_MASK;
Mark Wielaard 52876a
+   input_fraction = input & FLOAT_FRAC_MASK;
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+   if (((input_exp & FLOAT_EXP_MASK) == FLOAT_EXP_MASK) &&
Mark Wielaard 52876a
+       (input_fraction != 0)) {
Mark Wielaard 52876a
+      /* input is NaN or SNaN, exp all 1's, fraction != 0 */
Mark Wielaard 52876a
+      output_exp = BF16_EXP_MASK;
Mark Wielaard 52876a
+      output_fraction = (ULong)input_fraction >> (23 - 7);
Mark Wielaard 52876a
+   } else if (((input_exp & FLOAT_EXP_MASK) == FLOAT_EXP_MASK) &&
Mark Wielaard 52876a
+              ( input_fraction == 0)) {
Mark Wielaard 52876a
+      /* input is infinity,  exp all 1's, fraction = 0  */
Mark Wielaard 52876a
+      output_exp = BF16_EXP_MASK;
Mark Wielaard 52876a
+      output_fraction = 0;
Mark Wielaard 52876a
+   } else if ((input_exp == 0) && (input_fraction == 0)) {
Mark Wielaard 52876a
+      /* input is zero */
Mark Wielaard 52876a
+      output_exp = 0;
Mark Wielaard 52876a
+      output_fraction = 0;
Mark Wielaard 52876a
+   } else if ((input_exp == 0) && (input_fraction != 0)) {
Mark Wielaard 52876a
+      /* input is denormal */
Mark Wielaard 52876a
+      output_exp = 0;
Mark Wielaard 52876a
+      output_fraction = (ULong)input_fraction >> (23 - 7);
Mark Wielaard 52876a
+   } else {
Mark Wielaard 52876a
+      /* result is normal */
Mark Wielaard 52876a
+      output_exp = (input_exp - BF16_BIAS + FLOAT_BIAS) >> (23 - 7);
Mark Wielaard 52876a
+      output_fraction = (ULong)input_fraction >> (23 - 7);
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+      /* Round result. Look at the 8th bit position of the 32-bit floating
Mark Wielaard 52876a
+         pointt fraction.  The F16 fraction is only 7 bits wide so if the 8th
Mark Wielaard 52876a
+         bit of the F32 is a 1 we need to round up by adding 1 to the output
Mark Wielaard 52876a
+         fraction.  */
Mark Wielaard 52876a
+      if ((input_fraction & FLOAT_FRAC_BIT8) == FLOAT_FRAC_BIT8)
Mark Wielaard 52876a
+         /* Round the F16 fraction up by 1 */
Mark Wielaard 52876a
+         output_fraction = output_fraction + 1;
Mark Wielaard 52876a
+   }
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+   result = sign >> (31 - 15) | output_exp | output_fraction;
Mark Wielaard 52876a
+   return result;
Mark Wielaard 52876a
+}
Mark Wielaard 52876a
 
Mark Wielaard 52876a
 static Float conv_double_to_float( Double src )
Mark Wielaard 52876a
 {
Mark Wielaard 52876a
@@ -1942,6 +2061,36 @@ static Float negate_float( Float input )
Mark Wielaard 52876a
       return -input;
Mark Wielaard 52876a
 }
Mark Wielaard 52876a
 
Mark Wielaard 52876a
+/* This C-helper takes a vector of two 32-bit floating point values
Mark Wielaard 52876a
+ * and returns a vector containing two 16-bit bfloats.
Mark Wielaard 52876a
+   input:    word0           word1
Mark Wielaard 52876a
+   output  0x0   hword1   0x0    hword3
Mark Wielaard 52876a
+   Called from generated code.
Mark Wielaard 52876a
+ */
Mark Wielaard 52876a
+ULong convert_from_floattobf16_helper( ULong src ) {
Mark Wielaard 52876a
+   ULong resultHi, resultLo;
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+   resultHi = (ULong)conv_float_to_bf16( (UInt)(src >> 32));
Mark Wielaard 52876a
+   resultLo = (ULong)conv_float_to_bf16( (UInt)(src & 0xFFFFFFFF));
Mark Wielaard 52876a
+   return (resultHi << 32) | resultLo;
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+}
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+/* This C-helper takes a vector of two 16-bit bfloating point values
Mark Wielaard 52876a
+ * and returns a vector containing one 32-bit float.
Mark Wielaard 52876a
+   input:   0x0   hword1   0x0    hword3
Mark Wielaard 52876a
+   output:    word0           word1
Mark Wielaard 52876a
+ */
Mark Wielaard 52876a
+ULong convert_from_bf16tofloat_helper( ULong src ) {
Mark Wielaard 52876a
+   ULong result;
Mark Wielaard 52876a
+   union convert_t conv;
Mark Wielaard 52876a
+   conv.f = conv_bf16_to_float( (UInt)(src >> 32) );
Mark Wielaard 52876a
+   result = (ULong) conv.u32;
Mark Wielaard 52876a
+   conv.f = conv_bf16_to_float( (UInt)(src & 0xFFFFFFFF));
Mark Wielaard 52876a
+   result = (result << 32) | (ULong) conv.u32;
Mark Wielaard 52876a
+   return result;
Mark Wielaard 52876a
+ }
Mark Wielaard 52876a
+
Mark Wielaard 52876a
 void vsx_matrix_16bit_float_ger_dirty_helper( VexGuestPPC64State* gst,
Mark Wielaard 52876a
                                               UInt offset_ACC,
Mark Wielaard 52876a
                                               ULong srcA_hi, ULong srcA_lo,
Mark Wielaard 52876a
@@ -2002,24 +2151,44 @@ void vsx_matrix_16bit_float_ger_dirty_helper( VexGuestPPC64State* gst,
Mark Wielaard 52876a
          srcB_word[0][j] = (UInt)((srcB_lo >> (16-16*j)) & mask);
Mark Wielaard 52876a
       }
Mark Wielaard 52876a
 
Mark Wielaard 52876a
+      /* Note the isa is not consistent in the src naming.  Will use the
Mark Wielaard 52876a
+         naming src10, src11, src20, src21 used with xvf16ger2 instructions.
Mark Wielaard 52876a
+      */
Mark Wielaard 52876a
       for( j = 0; j < 4; j++) {
Mark Wielaard 52876a
          if (((pmsk >> 1) & 0x1) == 0) {
Mark Wielaard 52876a
             src10 = 0;
Mark Wielaard 52876a
             src20 = 0;
Mark Wielaard 52876a
          } else {
Mark Wielaard 52876a
-            src10 = conv_f16_to_double((ULong)srcA_word[i][0]);
Mark Wielaard 52876a
-            src20 = conv_f16_to_double((ULong)srcB_word[j][0]);
Mark Wielaard 52876a
+            if (( inst  == XVF16GER2 ) || ( inst  == XVF16GER2PP )
Mark Wielaard 52876a
+                || ( inst == XVF16GER2PN ) || ( inst  == XVF16GER2NP )
Mark Wielaard 52876a
+                || ( inst == XVF16GER2NN )) {
Mark Wielaard 52876a
+               src10 = conv_f16_to_double((ULong)srcA_word[i][0]);
Mark Wielaard 52876a
+               src20 = conv_f16_to_double((ULong)srcB_word[j][0]);
Mark Wielaard 52876a
+            } else {
Mark Wielaard 52876a
+               /* Input is in bfloat format, result is stored in the
Mark Wielaard 52876a
+                  "traditional" 64-bit float format. */
Mark Wielaard 52876a
+               src10 = (double)conv_bf16_to_float((ULong)srcA_word[i][0]);
Mark Wielaard 52876a
+               src20 = (double)conv_bf16_to_float((ULong)srcB_word[j][0]);
Mark Wielaard 52876a
+            }
Mark Wielaard 52876a
          }
Mark Wielaard 52876a
 
Mark Wielaard 52876a
          if ((pmsk & 0x1) == 0) {
Mark Wielaard 52876a
             src11 = 0;
Mark Wielaard 52876a
             src21 = 0;
Mark Wielaard 52876a
          } else {
Mark Wielaard 52876a
-            src11 = conv_f16_to_double((ULong)srcA_word[i][1]);
Mark Wielaard 52876a
-            src21 = conv_f16_to_double((ULong)srcB_word[j][1]);
Mark Wielaard 52876a
+            if (( inst  == XVF16GER2 ) || ( inst  == XVF16GER2PP )
Mark Wielaard 52876a
+                || ( inst == XVF16GER2PN ) || ( inst  == XVF16GER2NP )
Mark Wielaard 52876a
+                || ( inst == XVF16GER2NN )) {
Mark Wielaard 52876a
+               src11 = conv_f16_to_double((ULong)srcA_word[i][1]);
Mark Wielaard 52876a
+               src21 = conv_f16_to_double((ULong)srcB_word[j][1]);
Mark Wielaard 52876a
+            } else {
Mark Wielaard 52876a
+               /* Input is in bfloat format, result is stored in the
Mark Wielaard 52876a
+                  "traditional" 64-bit float format. */
Mark Wielaard 52876a
+               src11 = (double)conv_bf16_to_float((ULong)srcA_word[i][1]);
Mark Wielaard 52876a
+               src21 = (double)conv_bf16_to_float((ULong)srcB_word[j][1]);
Mark Wielaard 52876a
+            }
Mark Wielaard 52876a
          }
Mark Wielaard 52876a
 
Mark Wielaard 52876a
-
Mark Wielaard 52876a
          prod = src10 * src20;
Mark Wielaard 52876a
          msum = prod + src11 * src21;
Mark Wielaard 52876a
 
Mark Wielaard 52876a
@@ -2027,26 +2196,26 @@ void vsx_matrix_16bit_float_ger_dirty_helper( VexGuestPPC64State* gst,
Mark Wielaard 52876a
             /* Note, we do not track the exception handling bits
Mark Wielaard 52876a
                ox, ux, xx, si, mz, vxsnan and vximz in the FPSCR.  */
Mark Wielaard 52876a
 
Mark Wielaard 52876a
-            if ( inst == XVF16GER2 )
Mark Wielaard 52876a
+            if (( inst == XVF16GER2 ) || ( inst == XVBF16GER2 ) )
Mark Wielaard 52876a
                result[j] = reinterpret_float_as_int(
Mark Wielaard 52876a
                   conv_double_to_float(msum) );
Mark Wielaard 52876a
 
Mark Wielaard 52876a
-            else if ( inst == XVF16GER2PP )
Mark Wielaard 52876a
+            else if (( inst == XVF16GER2PP ) ||  (inst == XVBF16GER2PP ))
Mark Wielaard 52876a
                result[j] = reinterpret_float_as_int(
Mark Wielaard 52876a
                   conv_double_to_float(msum)
Mark Wielaard 52876a
                   + acc_word[j] );
Mark Wielaard 52876a
 
Mark Wielaard 52876a
-            else if ( inst == XVF16GER2PN )
Mark Wielaard 52876a
+            else if (( inst == XVF16GER2PN ) || ( inst == XVBF16GER2PN ))
Mark Wielaard 52876a
                result[j] = reinterpret_float_as_int(
Mark Wielaard 52876a
                   conv_double_to_float(msum)
Mark Wielaard 52876a
                   + negate_float( acc_word[j] ) );
Mark Wielaard 52876a
 
Mark Wielaard 52876a
-            else if ( inst == XVF16GER2NP )
Mark Wielaard 52876a
+            else if (( inst == XVF16GER2NP ) || ( inst == XVBF16GER2NP ))
Mark Wielaard 52876a
                result[j] = reinterpret_float_as_int(
Mark Wielaard 52876a
                   conv_double_to_float( negate_double( msum ) )
Mark Wielaard 52876a
                   + acc_word[j] );
Mark Wielaard 52876a
 
Mark Wielaard 52876a
-            else if ( inst == XVF16GER2NN )
Mark Wielaard 52876a
+            else if (( inst == XVF16GER2NN ) || ( inst == XVBF16GER2NN ))
Mark Wielaard 52876a
                result[j] = reinterpret_float_as_int(
Mark Wielaard 52876a
                   conv_double_to_float( negate_double( msum ) )
Mark Wielaard 52876a
                   + negate_float( acc_word[j] ) );
Mark Wielaard 52876a
diff --git a/VEX/priv/guest_ppc_toIR.c b/VEX/priv/guest_ppc_toIR.c
Mark Wielaard 52876a
index 354be6b53..20553a539 100644
Mark Wielaard 52876a
--- a/VEX/priv/guest_ppc_toIR.c
Mark Wielaard 52876a
+++ b/VEX/priv/guest_ppc_toIR.c
Mark Wielaard 52876a
@@ -5688,6 +5688,57 @@ static IRExpr * convert_from_national ( const VexAbiInfo* vbi, IRExpr *src ) {
Mark Wielaard 52876a
    return mkexpr( result );
Mark Wielaard 52876a
 }
Mark Wielaard 52876a
 
Mark Wielaard 52876a
+static IRExpr * vector_convert_floattobf16 ( const VexAbiInfo* vbi,
Mark Wielaard 52876a
+                                             IRExpr *src ) {
Mark Wielaard 52876a
+   /* The function takes 128-bit value containing four 32-bit floats and
Mark Wielaard 52876a
+      returns a 128-bit value containint four 16-bit bfloats in the lower
Mark Wielaard 52876a
+      halfwords. */
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+   IRTemp resultHi = newTemp( Ity_I64);
Mark Wielaard 52876a
+   IRTemp resultLo = newTemp( Ity_I64);
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+   assign( resultHi,
Mark Wielaard 52876a
+           mkIRExprCCall( Ity_I64, 0 /*regparms*/,
Mark Wielaard 52876a
+                          "vector_convert_floattobf16_helper",
Mark Wielaard 52876a
+                          fnptr_to_fnentry( vbi,
Mark Wielaard 52876a
+                                            &convert_from_floattobf16_helper ),
Mark Wielaard 52876a
+                          mkIRExprVec_1( unop( Iop_V128HIto64, src ) ) ) );
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+   assign( resultLo,
Mark Wielaard 52876a
+           mkIRExprCCall( Ity_I64, 0 /*regparms*/,
Mark Wielaard 52876a
+                          "vector_convert_floattobf16_helper",
Mark Wielaard 52876a
+                          fnptr_to_fnentry( vbi,
Mark Wielaard 52876a
+                                            &convert_from_floattobf16_helper ),
Mark Wielaard 52876a
+                          mkIRExprVec_1( unop( Iop_V128to64, src ) ) ) );
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+   return binop( Iop_64HLtoV128, mkexpr( resultHi ), mkexpr( resultLo ) );
Mark Wielaard 52876a
+}
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+static IRExpr * vector_convert_bf16tofloat ( const VexAbiInfo* vbi,
Mark Wielaard 52876a
+                                             IRExpr *src ) {
Mark Wielaard 52876a
+   /* The function takes 128-bit value containing four 16-bit bfloats in
Mark Wielaard 52876a
+      the lower halfwords and returns a 128-bit value containint four
Mark Wielaard 52876a
+      32-bit floats. */
Mark Wielaard 52876a
+   IRTemp resultHi = newTemp( Ity_I64);
Mark Wielaard 52876a
+   IRTemp resultLo = newTemp( Ity_I64);
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+   assign( resultHi,
Mark Wielaard 52876a
+           mkIRExprCCall( Ity_I64, 0 /*regparms*/,
Mark Wielaard 52876a
+                          "vector_convert_bf16tofloat_helper",
Mark Wielaard 52876a
+                          fnptr_to_fnentry( vbi,
Mark Wielaard 52876a
+                                            &convert_from_bf16tofloat_helper ),
Mark Wielaard 52876a
+                          mkIRExprVec_1( unop( Iop_V128HIto64, src ) ) ) );
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+   assign( resultLo,
Mark Wielaard 52876a
+           mkIRExprCCall( Ity_I64, 0 /*regparms*/,
Mark Wielaard 52876a
+                          "vector_convert_bf16tofloat_helper",
Mark Wielaard 52876a
+                          fnptr_to_fnentry( vbi,
Mark Wielaard 52876a
+                                            &convert_from_bf16tofloat_helper ),
Mark Wielaard 52876a
+                          mkIRExprVec_1( unop( Iop_V128to64, src ) ) ) );
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+   return binop( Iop_64HLtoV128, mkexpr( resultHi ), mkexpr( resultLo ) );
Mark Wielaard 52876a
+}
Mark Wielaard 52876a
+
Mark Wielaard 52876a
 static IRExpr * popcnt64 ( const VexAbiInfo* vbi,
Mark Wielaard 52876a
                            IRExpr *src ){
Mark Wielaard 52876a
    /* The function takes a 64-bit source and counts the number of bits in the
Mark Wielaard 52876a
@@ -5936,6 +5987,7 @@ static void vsx_matrix_ger ( const VexAbiInfo* vbi,
Mark Wielaard 52876a
    case XVI16GER2:
Mark Wielaard 52876a
    case XVI16GER2S:
Mark Wielaard 52876a
    case XVF16GER2:
Mark Wielaard 52876a
+   case XVBF16GER2:
Mark Wielaard 52876a
    case XVF32GER:
Mark Wielaard 52876a
          AT_fx = Ifx_Write;
Mark Wielaard 52876a
          break;
Mark Wielaard 52876a
@@ -5943,6 +5995,10 @@ static void vsx_matrix_ger ( const VexAbiInfo* vbi,
Mark Wielaard 52876a
    case XVI8GER4PP:
Mark Wielaard 52876a
    case XVI16GER2PP:
Mark Wielaard 52876a
    case XVI16GER2SPP:
Mark Wielaard 52876a
+   case XVBF16GER2PP:
Mark Wielaard 52876a
+   case XVBF16GER2PN:
Mark Wielaard 52876a
+   case XVBF16GER2NP:
Mark Wielaard 52876a
+   case XVBF16GER2NN:
Mark Wielaard 52876a
    case XVF16GER2PP:
Mark Wielaard 52876a
    case XVF16GER2PN:
Mark Wielaard 52876a
    case XVF16GER2NP:
Mark Wielaard 52876a
@@ -23899,6 +23955,24 @@ dis_vxs_misc( UInt prefix, UInt theInstr, const VexAbiInfo* vbi, UInt opc2,
Mark Wielaard 52876a
                                     mkexpr( sub_element1 ),
Mark Wielaard 52876a
                                     mkexpr( sub_element0 ) ) ) );
Mark Wielaard 52876a
 
Mark Wielaard 52876a
+         } else if ((inst_select == 16) && !prefix) {
Mark Wielaard 52876a
+            IRTemp result = newTemp(Ity_V128);
Mark Wielaard 52876a
+            UChar xT_addr = ifieldRegXT ( theInstr );
Mark Wielaard 52876a
+            UChar xB_addr = ifieldRegXB ( theInstr );
Mark Wielaard 52876a
+            /* Convert 16-bit bfloat to 32-bit float, not a prefix inst */
Mark Wielaard 52876a
+            DIP("xvcvbf16sp v%u,v%u\n", xT_addr, xB_addr);
Mark Wielaard 52876a
+            assign( result, vector_convert_bf16tofloat( vbi, mkexpr( vB ) ) );
Mark Wielaard 52876a
+            putVSReg( XT, mkexpr( result) );
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+         } else if ((inst_select == 17) && !prefix) {
Mark Wielaard 52876a
+            IRTemp result = newTemp(Ity_V128);
Mark Wielaard 52876a
+            UChar xT_addr = ifieldRegXT ( theInstr );
Mark Wielaard 52876a
+            UChar xB_addr = ifieldRegXB ( theInstr );
Mark Wielaard 52876a
+            /* Convert 32-bit float to 16-bit bfloat, not a prefix inst */
Mark Wielaard 52876a
+            DIP("xvcvspbf16 v%u,v%u\n", xT_addr, xB_addr);
Mark Wielaard 52876a
+            assign( result, vector_convert_floattobf16( vbi, mkexpr( vB ) ) );
Mark Wielaard 52876a
+            putVSReg( XT, mkexpr( result) );
Mark Wielaard 52876a
+
Mark Wielaard 52876a
          } else if (inst_select == 23) {
Mark Wielaard 52876a
             DIP("xxbrd v%u, v%u\n", (UInt)XT, (UInt)XB);
Mark Wielaard 52876a
 
Mark Wielaard 52876a
@@ -34956,6 +35030,41 @@ static Bool dis_vsx_accumulator_prefix ( UInt prefix, UInt theInstr,
Mark Wielaard 52876a
                          getVSReg( rB_addr ), AT,
Mark Wielaard 52876a
                          ( ( inst_prefix << 8 ) | XO ) );
Mark Wielaard 52876a
          break;
Mark Wielaard 52876a
+      case XVBF16GER2:
Mark Wielaard 52876a
+         DIP("xvbf16ger2 %u,r%u, r%u\n", AT, rA_addr, rB_addr);
Mark Wielaard 52876a
+         vsx_matrix_ger( vbi, MATRIX_16BIT_FLOAT_GER,
Mark Wielaard 52876a
+                         getVSReg( rA_addr ),
Mark Wielaard 52876a
+                         getVSReg( rB_addr ), AT,
Mark Wielaard 52876a
+                         ( ( inst_prefix << 8 ) | XO ) );
Mark Wielaard 52876a
+         break;
Mark Wielaard 52876a
+      case XVBF16GER2PP:
Mark Wielaard 52876a
+         DIP("xvbf16ger2pp %u,r%u, r%u\n", AT, rA_addr, rB_addr);
Mark Wielaard 52876a
+         vsx_matrix_ger( vbi, MATRIX_16BIT_FLOAT_GER,
Mark Wielaard 52876a
+                         getVSReg( rA_addr ),
Mark Wielaard 52876a
+                         getVSReg( rB_addr ), AT,
Mark Wielaard 52876a
+                         ( ( inst_prefix << 8 ) | XO ) );
Mark Wielaard 52876a
+         break;
Mark Wielaard 52876a
+      case XVBF16GER2PN:
Mark Wielaard 52876a
+         DIP("xvbf16ger2pn %u,r%u, r%u\n", AT, rA_addr, rB_addr);
Mark Wielaard 52876a
+         vsx_matrix_ger( vbi, MATRIX_16BIT_FLOAT_GER,
Mark Wielaard 52876a
+                         getVSReg( rA_addr ),
Mark Wielaard 52876a
+                         getVSReg( rB_addr ), AT,
Mark Wielaard 52876a
+                         ( ( inst_prefix << 8 ) | XO ) );
Mark Wielaard 52876a
+         break;
Mark Wielaard 52876a
+      case XVBF16GER2NP:
Mark Wielaard 52876a
+         DIP("xvbf16ger2np %u,r%u, r%u\n", AT, rA_addr, rB_addr);
Mark Wielaard 52876a
+         vsx_matrix_ger( vbi, MATRIX_16BIT_FLOAT_GER,
Mark Wielaard 52876a
+                         getVSReg( rA_addr ),
Mark Wielaard 52876a
+                         getVSReg( rB_addr ), AT,
Mark Wielaard 52876a
+                         ( ( inst_prefix << 8 ) | XO ) );
Mark Wielaard 52876a
+         break;
Mark Wielaard 52876a
+      case XVBF16GER2NN:
Mark Wielaard 52876a
+         DIP("xvbf16ger2nn %u,r%u, r%u\n", AT, rA_addr, rB_addr);
Mark Wielaard 52876a
+         vsx_matrix_ger( vbi, MATRIX_16BIT_FLOAT_GER,
Mark Wielaard 52876a
+                         getVSReg( rA_addr ),
Mark Wielaard 52876a
+                         getVSReg( rB_addr ), AT,
Mark Wielaard 52876a
+                         ( ( inst_prefix << 8 ) | XO ) );
Mark Wielaard 52876a
+         break;
Mark Wielaard 52876a
       case XVF32GER:
Mark Wielaard 52876a
          DIP("xvf32ger %u,r%u, r%u\n", AT, rA_addr, rB_addr);
Mark Wielaard 52876a
          vsx_matrix_ger( vbi, MATRIX_32BIT_FLOAT_GER,
Mark Wielaard 52876a
@@ -35106,6 +35215,61 @@ static Bool dis_vsx_accumulator_prefix ( UInt prefix, UInt theInstr,
Mark Wielaard 52876a
                          AT,
Mark Wielaard 52876a
                          ( (MASKS << 9 ) | ( inst_prefix << 8 ) | XO ) );
Mark Wielaard 52876a
          break;
Mark Wielaard 52876a
+      case XVBF16GER2:
Mark Wielaard 52876a
+         PMSK = IFIELD( prefix, 14, 2);
Mark Wielaard 52876a
+         XMSK = IFIELD( prefix, 4, 4);
Mark Wielaard 52876a
+         YMSK = IFIELD( prefix, 0, 4);
Mark Wielaard 52876a
+         DIP("pmxvbf16ger2 %u,r%u, r%u\n", AT, rA_addr, rB_addr);
Mark Wielaard 52876a
+         vsx_matrix_ger( vbi, MATRIX_16BIT_FLOAT_GER,
Mark Wielaard 52876a
+                         getVSReg( rA_addr ),
Mark Wielaard 52876a
+                         getVSReg( rB_addr ),
Mark Wielaard 52876a
+                         AT, ( (MASKS << 9 )
Mark Wielaard 52876a
+                               | ( inst_prefix << 8 ) | XO ) );
Mark Wielaard 52876a
+         break;
Mark Wielaard 52876a
+      case XVBF16GER2PP:
Mark Wielaard 52876a
+         PMSK = IFIELD( prefix, 14, 2);
Mark Wielaard 52876a
+         XMSK = IFIELD( prefix, 4, 4);
Mark Wielaard 52876a
+         YMSK = IFIELD( prefix, 0, 4);
Mark Wielaard 52876a
+         DIP("pmxvbf16ger2pp %u,r%u, r%u\n", AT, rA_addr, rB_addr);
Mark Wielaard 52876a
+         vsx_matrix_ger( vbi, MATRIX_16BIT_FLOAT_GER,
Mark Wielaard 52876a
+                         getVSReg( rA_addr ),
Mark Wielaard 52876a
+                         getVSReg( rB_addr ),
Mark Wielaard 52876a
+                         AT, ( (MASKS << 9 )
Mark Wielaard 52876a
+                               | ( inst_prefix << 8 ) | XO ) );
Mark Wielaard 52876a
+         break;
Mark Wielaard 52876a
+      case XVBF16GER2PN:
Mark Wielaard 52876a
+         PMSK = IFIELD( prefix, 14, 2);
Mark Wielaard 52876a
+         XMSK = IFIELD( prefix, 4, 4);
Mark Wielaard 52876a
+         YMSK = IFIELD( prefix, 0, 4);
Mark Wielaard 52876a
+         DIP("pmxvbf16ger2pn %u,r%u, r%u\n", AT, rA_addr, rB_addr);
Mark Wielaard 52876a
+         vsx_matrix_ger( vbi, MATRIX_16BIT_FLOAT_GER,
Mark Wielaard 52876a
+                         getVSReg( rA_addr ),
Mark Wielaard 52876a
+                         getVSReg( rB_addr ),
Mark Wielaard 52876a
+                         AT, ( (MASKS << 9 )
Mark Wielaard 52876a
+                               | ( inst_prefix << 8 ) | XO ) );
Mark Wielaard 52876a
+         break;
Mark Wielaard 52876a
+      case XVBF16GER2NP:
Mark Wielaard 52876a
+         PMSK = IFIELD( prefix, 14, 2);
Mark Wielaard 52876a
+         XMSK = IFIELD( prefix, 4, 4);
Mark Wielaard 52876a
+         YMSK = IFIELD( prefix, 0, 4);
Mark Wielaard 52876a
+         DIP("pmxvbf16ger2np %u,r%u, r%u\n", AT, rA_addr, rB_addr);
Mark Wielaard 52876a
+         vsx_matrix_ger( vbi, MATRIX_16BIT_FLOAT_GER,
Mark Wielaard 52876a
+                         getVSReg( rA_addr ),
Mark Wielaard 52876a
+                         getVSReg( rB_addr ),
Mark Wielaard 52876a
+                         AT, ( (MASKS << 9 )
Mark Wielaard 52876a
+                               | ( inst_prefix << 8 ) | XO ) );
Mark Wielaard 52876a
+         break;
Mark Wielaard 52876a
+      case XVBF16GER2NN:
Mark Wielaard 52876a
+         PMSK = IFIELD( prefix, 14, 2);
Mark Wielaard 52876a
+         XMSK = IFIELD( prefix, 4, 4);
Mark Wielaard 52876a
+         YMSK = IFIELD( prefix, 0, 4);
Mark Wielaard 52876a
+         DIP("pmxvbf16ger2nn %u,r%u, r%u\n", AT, rA_addr, rB_addr);
Mark Wielaard 52876a
+         vsx_matrix_ger( vbi, MATRIX_16BIT_FLOAT_GER,
Mark Wielaard 52876a
+                         getVSReg( rA_addr ),
Mark Wielaard 52876a
+                         getVSReg( rB_addr ),
Mark Wielaard 52876a
+                         AT, ( (MASKS << 9 )
Mark Wielaard 52876a
+                               | ( inst_prefix << 8 ) | XO ) );
Mark Wielaard 52876a
+         break;
Mark Wielaard 52876a
       case XVF16GER2:
Mark Wielaard 52876a
          PMSK = IFIELD( prefix, 14, 2);
Mark Wielaard 52876a
          XMSK = IFIELD( prefix, 4, 4);
Mark Wielaard 52876a
@@ -36181,6 +36345,11 @@ DisResult disInstr_PPC_WRK (
Mark Wielaard 52876a
              (opc2 == XVI4GER8PP)     ||       // xvi4ger8pp
Mark Wielaard 52876a
              (opc2 == XVI8GER4)       ||       // xvi8ger4
Mark Wielaard 52876a
              (opc2 == XVI8GER4PP)     ||       // xvi8ger4pp
Mark Wielaard 52876a
+             (opc2 == XVBF16GER2)     ||       // xvbf16ger2
Mark Wielaard 52876a
+             (opc2 == XVBF16GER2PP)   ||       // xvbf16ger2pp
Mark Wielaard 52876a
+             (opc2 == XVBF16GER2PN)   ||       // xvbf16ger2pn
Mark Wielaard 52876a
+             (opc2 == XVBF16GER2NP)   ||       // xvbf16ger2np
Mark Wielaard 52876a
+             (opc2 == XVBF16GER2NN)   ||       // xvbf16ger2nn
Mark Wielaard 52876a
              (opc2 == XVF16GER2)      ||       // xvf16ger2
Mark Wielaard 52876a
              (opc2 == XVF16GER2PP)    ||       // xvf16ger2pp
Mark Wielaard 52876a
              (opc2 == XVF16GER2PN)    ||       // xvf16ger2pn
Mark Wielaard 52876a
commit e09fdaf569b975717465ed8043820d0198d4d47d
Mark Wielaard 52876a
Author: Carl Love <cel@us.ibm.com>
Mark Wielaard 52876a
Date:   Fri Feb 26 16:05:12 2021 -0600
Mark Wielaard 52876a
Mark Wielaard 52876a
    PPC64: Reduced-Precision: Missing Integer-based Outer Product Operations
Mark Wielaard 52876a
    
Mark Wielaard 52876a
    Add support for:
Mark Wielaard 52876a
    
Mark Wielaard 52876a
    pmxvi16ger2 VSX Vector 16-bit Signed Integer GER (rank-2 update), Prefixed
Mark Wielaard 52876a
       Masked
Mark Wielaard 52876a
    pmxvi16ger2pp VSX Vector 16-bit Signed Integer GER (rank-2 update) (Positive
Mark Wielaard 52876a
       multiply, Positive accumulate), Prefixed Masked
Mark Wielaard 52876a
    pmxvi8ger4spp VSX Vector 8-bit Signed/Unsigned Integer GER (rank-4 update) with
Mark Wielaard 52876a
       Saturation (Positive multiply, Positive accumulate), Prefixed Masked
Mark Wielaard 52876a
    xvi16ger2 VSX Vector 16-bit Signed Integer GER (rank-2 update)
Mark Wielaard 52876a
    xvi16ger2pp VSX Vector 16-bit Signed Integer GER (rank-2 update) (Positive
Mark Wielaard 52876a
       multiply, Positive accumulate)
Mark Wielaard 52876a
    xvi8ger4spp VSX Vector 8-bit Signed/Unsigned Integer GER (rank-4 update) with
Mark Wielaard 52876a
       Saturation (Positive multiply, Positive accumulate)
Mark Wielaard 52876a
Mark Wielaard 52876a
diff --git a/VEX/priv/guest_ppc_helpers.c b/VEX/priv/guest_ppc_helpers.c
Mark Wielaard 52876a
index 6bcee966d..d8131eb60 100644
Mark Wielaard 52876a
--- a/VEX/priv/guest_ppc_helpers.c
Mark Wielaard 52876a
+++ b/VEX/priv/guest_ppc_helpers.c
Mark Wielaard 52876a
@@ -1446,16 +1446,16 @@ static UInt exts4( UInt src)
Mark Wielaard 52876a
       return src & 0xF;        /* make sure high order bits are zero */
Mark Wielaard 52876a
 }
Mark Wielaard 52876a
 
Mark Wielaard 52876a
-static UInt exts8( UInt src)
Mark Wielaard 52876a
+static ULong exts8( UInt src)
Mark Wielaard 52876a
 {
Mark Wielaard 52876a
-   /* Input is an 8-bit value.  Extend bit 7 to bits [31:8] */
Mark Wielaard 52876a
+   /* Input is an 8-bit value.  Extend bit 7 to bits [63:8] */
Mark Wielaard 52876a
    if (( src >> 7 ) & 0x1)
Mark Wielaard 52876a
-      return src | 0xFFFFFF00; /* sign bit is a 1, extend */
Mark Wielaard 52876a
+      return src | 0xFFFFFFFFFFFFFF00ULL; /* sign bit is a 1, extend */
Mark Wielaard 52876a
    else
Mark Wielaard 52876a
       return src & 0xFF;        /* make sure high order bits are zero */
Mark Wielaard 52876a
 }
Mark Wielaard 52876a
 
Mark Wielaard 52876a
-static UInt extz8( UInt src)
Mark Wielaard 52876a
+static ULong extz8( UInt src)
Mark Wielaard 52876a
 {
Mark Wielaard 52876a
    /* Input is an 8-bit value.  Extend src on the left with zeros.  */
Mark Wielaard 52876a
    return src & 0xFF;        /* make sure high order bits are zero */
Mark Wielaard 52876a
@@ -1662,12 +1662,12 @@ void vsx_matrix_8bit_ger_dirty_helper( VexGuestPPC64State* gst,
Mark Wielaard 52876a
                                        ULong srcB_hi, ULong srcB_lo,
Mark Wielaard 52876a
                                        UInt masks_inst )
Mark Wielaard 52876a
 {
Mark Wielaard 52876a
-   UInt i, j, mask, sum, inst, acc_entry, prefix_inst;
Mark Wielaard 52876a
+   UInt i, j, mask, inst, acc_entry, prefix_inst;
Mark Wielaard 52876a
 
Mark Wielaard 52876a
    UInt srcA_bytes[4][4];   /* word, byte */
Mark Wielaard 52876a
    UInt srcB_bytes[4][4];   /* word, byte */
Mark Wielaard 52876a
    UInt acc_word[4];
Mark Wielaard 52876a
-   UInt prod0, prod1, prod2, prod3;
Mark Wielaard 52876a
+   ULong prod0, prod1, prod2, prod3, sum;
Mark Wielaard 52876a
    UInt result[4];
Mark Wielaard 52876a
    UInt pmsk = 0;
Mark Wielaard 52876a
    UInt xmsk = 0;
Mark Wielaard 52876a
@@ -1742,10 +1742,13 @@ void vsx_matrix_8bit_ger_dirty_helper( VexGuestPPC64State* gst,
Mark Wielaard 52876a
             sum = prod0 + prod1 + prod2 + prod3;
Mark Wielaard 52876a
 
Mark Wielaard 52876a
             if ( inst == XVI8GER4 )
Mark Wielaard 52876a
-               result[j] = sum;
Mark Wielaard 52876a
+               result[j] = chop64to32( sum );
Mark Wielaard 52876a
 
Mark Wielaard 52876a
             else if ( inst == XVI8GER4PP )
Mark Wielaard 52876a
-               result[j] = sum + acc_word[j];
Mark Wielaard 52876a
+               result[j] = chop64to32( sum + acc_word[j] );
Mark Wielaard 52876a
+
Mark Wielaard 52876a
+            else if ( inst == XVI8GER4SPP )
Mark Wielaard 52876a
+               result[j] = clampS64toS32(sum + acc_word[j]);
Mark Wielaard 52876a
 
Mark Wielaard 52876a
          } else {
Mark Wielaard 52876a
             result[j] = 0;
Mark Wielaard 52876a
@@ -1821,7 +1824,7 @@ void vsx_matrix_16bit_ger_dirty_helper( VexGuestPPC64State* gst,
Mark Wielaard 52876a
             else
Mark Wielaard 52876a
                prod1 = exts16to64( srcA_word[i][1] )
Mark Wielaard 52876a
                   * exts16to64( srcB_word[j][1] );
Mark Wielaard 52876a
-            /* sum is UInt so the result is choped to 32-bits */
Mark Wielaard 52876a
+
Mark Wielaard 52876a
             sum = prod0 + prod1;
Mark Wielaard 52876a
 
Mark Wielaard 52876a
             if ( inst == XVI16GER2 )
Mark Wielaard 52876a
@@ -1830,13 +1833,11 @@ void vsx_matrix_16bit_ger_dirty_helper( VexGuestPPC64State* gst,
Mark Wielaard 52876a
             else if ( inst == XVI16GER2S )
Mark Wielaard 52876a
                result[j] = clampS64toS32( sum );
Mark Wielaard 52876a
 
Mark Wielaard 52876a
-            else if ( inst == XVI16GER2PP ) {
Mark Wielaard 52876a
+            else if ( inst == XVI16GER2PP )
Mark Wielaard 52876a
                result[j] = chop64to32( sum + acc_word[j] );
Mark Wielaard 52876a
-            }
Mark Wielaard 52876a
 
Mark Wielaard 52876a
-            else if ( inst == XVI16GER2SPP ) {
Mark Wielaard 52876a
+            else if ( inst == XVI16GER2SPP )
Mark Wielaard 52876a
                result[j] = clampS64toS32( sum + acc_word[j] );
Mark Wielaard 52876a
-            }
Mark Wielaard 52876a
 
Mark Wielaard 52876a
          } else {
Mark Wielaard 52876a
             result[j] = 0;
Mark Wielaard 52876a
diff --git a/VEX/priv/guest_ppc_toIR.c b/VEX/priv/guest_ppc_toIR.c
Mark Wielaard 52876a
index 20553a539..e54f0f389 100644
Mark Wielaard 52876a
--- a/VEX/priv/guest_ppc_toIR.c
Mark Wielaard 52876a
+++ b/VEX/priv/guest_ppc_toIR.c
Mark Wielaard 52876a
@@ -5993,6 +5993,7 @@ static void vsx_matrix_ger ( const VexAbiInfo* vbi,
Mark Wielaard 52876a
          break;
Mark Wielaard 52876a
    case XVI4GER8PP:
Mark Wielaard 52876a
    case XVI8GER4PP:
Mark Wielaard 52876a
+   case XVI8GER4SPP:
Mark Wielaard 52876a
    case XVI16GER2PP:
Mark Wielaard 52876a
    case XVI16GER2SPP:
Mark Wielaard 52876a
    case XVBF16GER2PP:
Mark Wielaard 52876a
@@ -34983,6 +34984,12 @@ static Bool dis_vsx_accumulator_prefix ( UInt prefix, UInt theInstr,
Mark Wielaard 52876a
                          getVSReg( rA_addr ), getVSReg( rB_addr ),
Mark Wielaard 52876a
                          AT, ( ( inst_prefix << 8 ) | XO ) );
Mark Wielaard 52876a
          break;
Mark Wielaard 52876a
+      case XVI8GER4SPP:
Mark Wielaard 52876a
+         DIP("xvi8ger4spp %u,r%u, r%u\n", AT, rA_addr, rB_addr);
Mark Wielaard 52876a
+         vsx_matrix_ger( vbi, MATRIX_8BIT_INT_GER,
Mark Wielaard 52876a
+                         getVSReg( rA_addr ), getVSReg( rB_addr ),
Mark Wielaard 52876a
+                         AT, ( ( inst_prefix << 8 ) | XO ) );
Mark Wielaard 52876a
+         break;
Mark Wielaard 52876a
       case XVI16GER2S:
Mark Wielaard 52876a
          DIP("xvi16ger2s %u,r%u, r%u\n", AT, rA_addr, rB_addr);
Mark Wielaard 52876a
          vsx_matrix_ger( vbi, MATRIX_16BIT_INT_GER,
Mark Wielaard 52876a
@@ -34995,6 +35002,19 @@ static Bool dis_vsx_accumulator_prefix ( UInt prefix, UInt theInstr,
Mark Wielaard 52876a
                          getVSReg( rA_addr ), getVSReg( rB_addr ),
Mark Wielaard 52876a
                          AT, ( ( inst_prefix << 8 ) | XO ) );
Mark Wielaard 52876a
          break;
Mark Wielaard 52876a
+      case XVI16GER2:
Mark Wielaard 52876a
+         DIP("xvi16ger2 %u,r%u, r%u\n", AT, rA_addr, rB_addr);
Mark Wielaard 52876a
+         vsx_matrix_ger( vbi, MATRIX_16BIT_INT_GER,
Mark Wielaard 52876a
+                         getVSReg( rA_addr ), getVSReg( rB_addr ),
Mark Wielaard 52876a
+                         AT, ( ( inst_prefix << 8 ) | XO ) );
Mark Wielaard 52876a
+         break;
Mark Wielaard 52876a
+      case XVI16GER2PP:
Mark Wielaard 52876a
+         DIP("xvi16ger2pp %u,r%u, r%u\n", AT, rA_addr, rB_addr);
Mark Wielaard 52876a
+         vsx_matrix_ger( vbi, MATRIX_16BIT_INT_GER,
Mark Wielaard 52876a
+                         getVSReg( rA_addr ), getVSReg( rB_addr ),
Mark Wielaard 52876a
+                         AT, ( ( inst_prefix << 8 ) | XO ) );
Mark Wielaard 52876a
+         break;
Mark Wielaard 52876a
+
Mark Wielaard 52876a
       case XVF16GER2:
Mark Wielaard 52876a
          DIP("xvf16ger2 %u,r%u, r%u\n", AT, rA_addr, rB_addr);
Mark Wielaard 52876a
          vsx_matrix_ger( vbi, MATRIX_16BIT_FLOAT_GER,
Mark Wielaard 52876a
@@ -35193,6 +35213,39 @@ static Bool dis_vsx_accumulator_prefix ( UInt prefix, UInt theInstr,
Mark Wielaard 52876a
                          AT,
Mark Wielaard 52876a
                          ( (MASKS << 9 ) | ( inst_prefix << 8 ) | XO ) );
Mark Wielaard 52876a
          break;
Mark Wielaard 52876a
+      case XVI8GER4SPP:
Mark Wielaard 52876a
+         PMSK = IFIELD( prefix, 12, 4);
Mark Wielaard 52876a
+         XMSK = IFIELD( prefix, 4, 4);
Mark Wielaard 52876a
+         YMSK = IFIELD( prefix, 0, 4);
Mark Wielaard 52876a
+         DIP("pmxvi8ger4spp %u,r%u, r%u,%u,%u,%u\n",
Mark Wielaard 52876a
+             AT, rA_addr, rB_addr, XMSK, YMSK, PMSK);
Mark Wielaard 52876a
+         vsx_matrix_ger( vbi, MATRIX_8BIT_INT_GER,
Mark Wielaard 52876a
+                         getVSReg( rA_addr ), getVSReg( rB_addr ),
Mark Wielaard 52876a
+                         AT,
Mark Wielaard 52876a
+                         ( (MASKS << 9 ) | ( inst_prefix << 8 ) | XO ) );
Mark Wielaard 52876a
+         break;
Mark Wielaard 52876a
+      case XVI16GER2:
Mark Wielaard 52876a
+         PMSK = IFIELD( prefix, 12, 4);
Mark Wielaard 52876a
+         XMSK = IFIELD( prefix, 4, 4);
Mark Wielaard 52876a
+         YMSK = IFIELD( prefix, 0, 4);
Mark Wielaard 52876a
+         DIP("pmxvi16ger2 %u,r%u, r%u,%u,%u,%u\n",
Mark Wielaard 52876a
+             AT, rA_addr, rB_addr, XMSK, YMSK, PMSK);
Mark Wielaard 52876a
+         vsx_matrix_ger( vbi, MATRIX_16BIT_INT_GER,
Mark Wielaard 52876a
+                         getVSReg( rA_addr ), getVSReg( rB_addr ),
Mark Wielaard 52876a
+                         AT,
Mark Wielaard 52876a
+                         ( (MASKS << 9 ) | ( inst_prefix << 8 ) | XO ) );
Mark Wielaard 52876a
+         break;
Mark Wielaard 52876a
+      case XVI16GER2PP:
Mark Wielaard 52876a
+         PMSK = IFIELD( prefix, 12, 4);
Mark Wielaard 52876a
+         XMSK = IFIELD( prefix, 4, 4);
Mark Wielaard 52876a
+         YMSK = IFIELD( prefix, 0, 4);
Mark Wielaard 52876a
+         DIP("pmxvi16ger2pp %u,r%u, r%u,%u,%u,%u\n",
Mark Wielaard 52876a
+             AT, rA_addr, rB_addr, XMSK, YMSK, PMSK);
Mark Wielaard 52876a
+         vsx_matrix_ger( vbi, MATRIX_16BIT_INT_GER,
Mark Wielaard 52876a
+                         getVSReg( rA_addr ), getVSReg( rB_addr ),
Mark Wielaard 52876a
+                         AT,
Mark Wielaard 52876a
+                         ( (MASKS << 9 ) | ( inst_prefix << 8 ) | XO ) );
Mark Wielaard 52876a
+         break;
Mark Wielaard 52876a
       case XVI16GER2S:
Mark Wielaard 52876a
          PMSK = IFIELD( prefix, 14, 2);
Mark Wielaard 52876a
          XMSK = IFIELD( prefix, 4, 4);
Mark Wielaard 52876a
@@ -36345,6 +36398,9 @@ DisResult disInstr_PPC_WRK (
Mark Wielaard 52876a
              (opc2 == XVI4GER8PP)     ||       // xvi4ger8pp
Mark Wielaard 52876a
              (opc2 == XVI8GER4)       ||       // xvi8ger4
Mark Wielaard 52876a
              (opc2 == XVI8GER4PP)     ||       // xvi8ger4pp
Mark Wielaard 52876a
+             (opc2 == XVI8GER4SPP)    ||       // xvi8ger4spp
Mark Wielaard 52876a
+             (opc2 == XVI16GER2)      ||       // xvi16ger2
Mark Wielaard 52876a
+             (opc2 == XVI16GER2PP)    ||       // xvi16ger2pp
Mark Wielaard 52876a
              (opc2 == XVBF16GER2)     ||       // xvbf16ger2
Mark Wielaard 52876a
              (opc2 == XVBF16GER2PP)   ||       // xvbf16ger2pp
Mark Wielaard 52876a
              (opc2 == XVBF16GER2PN)   ||       // xvbf16ger2pn