From 94c1037926c45da300d0b9ed62e0cc30d7a475a6 Mon Sep 17 00:00:00 2001
From: CentOS Sources <bugs@centos.org>
Date: Nov 03 2016 06:09:43 +0000
Subject: import mesa-private-llvm-3.8.1-1.el7


---

diff --git a/.gitignore b/.gitignore
index 7c6c75c..5105db5 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1 @@
-SOURCES/llvm-3.6.2.src.tar.xz
+SOURCES/llvm-3.8.1.src.tar.xz
diff --git a/.mesa-private-llvm.metadata b/.mesa-private-llvm.metadata
index fb549fa..049decf 100644
--- a/.mesa-private-llvm.metadata
+++ b/.mesa-private-llvm.metadata
@@ -1 +1 @@
-7a00257eb2bc9431e4c77c3a36b033072c54bc7e SOURCES/llvm-3.6.2.src.tar.xz
+e0c48c4c182424b99999367d688cd8ce7876827b SOURCES/llvm-3.8.1.src.tar.xz
diff --git a/SOURCES/0001-AArch64-Fix-invalid-use-of-references-to-BuildMI.patch b/SOURCES/0001-AArch64-Fix-invalid-use-of-references-to-BuildMI.patch
deleted file mode 100644
index e4ea42c..0000000
--- a/SOURCES/0001-AArch64-Fix-invalid-use-of-references-to-BuildMI.patch
+++ /dev/null
@@ -1,48 +0,0 @@
-From 5717e28019e7348a04f63dcf965121171da15c62 Mon Sep 17 00:00:00 2001
-From: James Molloy <james.molloy@arm.com>
-Date: Thu, 16 Apr 2015 11:37:40 +0000
-Subject: [PATCH] [AArch64] Fix invalid use of references to BuildMI.
-
-This was found in GCC PR65773 (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=65773).
-
-We shouldn't be taking a reference to the temporary that BuildMI returns, we must copy it.
-
-git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@235088 91177308-0d34-0410-b5e6-96231b3b80d8
----
- lib/Target/AArch64/AArch64InstrInfo.cpp | 6 +++---
- 1 file changed, 3 insertions(+), 3 deletions(-)
-
-diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp
-index 8e0af2d..db231c4 100644
---- a/lib/Target/AArch64/AArch64InstrInfo.cpp
-+++ b/lib/Target/AArch64/AArch64InstrInfo.cpp
-@@ -1526,7 +1526,7 @@ void AArch64InstrInfo::copyPhysRegTuple(
-   }
- 
-   for (; SubReg != End; SubReg += Incr) {
--    const MachineInstrBuilder &MIB = BuildMI(MBB, I, DL, get(Opcode));
-+    const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
-     AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
-     AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI);
-     AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
-@@ -1904,7 +1904,7 @@ void AArch64InstrInfo::storeRegToStackSlot(
-   }
-   assert(Opc && "Unknown register class");
- 
--  const MachineInstrBuilder &MI = BuildMI(MBB, MBBI, DL, get(Opc))
-+  const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DL, get(Opc))
-                                       .addReg(SrcReg, getKillRegState(isKill))
-                                       .addFrameIndex(FI);
- 
-@@ -2002,7 +2002,7 @@ void AArch64InstrInfo::loadRegFromStackSlot(
-   }
-   assert(Opc && "Unknown register class");
- 
--  const MachineInstrBuilder &MI = BuildMI(MBB, MBBI, DL, get(Opc))
-+  const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DL, get(Opc))
-                                       .addReg(DestReg, getDefRegState(true))
-                                       .addFrameIndex(FI);
-   if (Offset)
--- 
-2.4.3
-
diff --git a/SOURCES/fix-cmake-include.patch b/SOURCES/fix-cmake-include.patch
new file mode 100644
index 0000000..842b5c1
--- /dev/null
+++ b/SOURCES/fix-cmake-include.patch
@@ -0,0 +1,41 @@
+diff -up llvm-3.8.0rc2.src/CMakeLists.txt.fixinc llvm-3.8.0rc2.src/CMakeLists.txt
+--- llvm-3.8.0rc2.src/CMakeLists.txt.fixinc	2016-01-14 05:03:44.000000000 +1000
++++ llvm-3.8.0rc2.src/CMakeLists.txt	2016-02-26 10:21:44.477295728 +1000
+@@ -192,6 +192,7 @@ else()
+ endif()
+ 
+ # Each of them corresponds to llvm-config's.
++#
+ set(LLVM_TOOLS_BINARY_DIR ${LLVM_RUNTIME_OUTPUT_INTDIR}) # --bindir
+ set(LLVM_LIBRARY_DIR      ${LLVM_LIBRARY_OUTPUT_INTDIR}) # --libdir
+ set(LLVM_MAIN_SRC_DIR     ${CMAKE_CURRENT_SOURCE_DIR}  ) # --src-root
+@@ -558,6 +559,11 @@ set( CMAKE_RUNTIME_OUTPUT_DIRECTORY ${LL
+ set( CMAKE_LIBRARY_OUTPUT_DIRECTORY ${LLVM_BINARY_DIR}/lib${LLVM_LIBDIR_SUFFIX} )
+ set( CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${LLVM_BINARY_DIR}/lib${LLVM_LIBDIR_SUFFIX} )
+ 
++if(INCLUDE_INSTALL_DIR)
++else()
++set(INCLUDE_INSTALL_DIR ${CMAKE_INSTALL_PREFIX}/include)
++endif()
++
+ set(CMAKE_BUILD_WITH_INSTALL_RPATH ON)
+ if (APPLE)
+   set(CMAKE_INSTALL_NAME_DIR "@rpath")
+@@ -728,7 +734,7 @@ add_subdirectory(cmake/modules)
+ 
+ if (NOT LLVM_INSTALL_TOOLCHAIN_ONLY)
+   install(DIRECTORY include/llvm include/llvm-c
+-    DESTINATION include
++    DESTINATION "${INCLUDE_INSTALL_DIR}"
+     COMPONENT llvm-headers
+     FILES_MATCHING
+     PATTERN "*.def"
+@@ -740,7 +746,7 @@ if (NOT LLVM_INSTALL_TOOLCHAIN_ONLY)
+     )
+ 
+   install(DIRECTORY ${LLVM_INCLUDE_DIR}/llvm
+-    DESTINATION include
++    DESTINATION "${INCLUDE_INSTALL_DIR}"
+     COMPONENT llvm-headers
+     FILES_MATCHING
+     PATTERN "*.def"
diff --git a/SOURCES/llvm-2.6-timestamp.patch b/SOURCES/llvm-2.6-timestamp.patch
deleted file mode 100644
index ab0979e..0000000
--- a/SOURCES/llvm-2.6-timestamp.patch
+++ /dev/null
@@ -1,11 +0,0 @@
---- llvm-2.6/Makefile.rules.timestamp	2009-08-19 18:04:44.000000000 -0400
-+++ llvm-2.6/Makefile.rules	2009-09-09 02:10:38.287389725 -0400
-@@ -672,7 +672,7 @@
- 
- ProgInstall   = $(INSTALL) $(Install.StripFlag) -m 0755
- ScriptInstall = $(INSTALL) -m 0755
--DataInstall   = $(INSTALL) -m 0644
-+DataInstall   = $(INSTALL) -p -m 0644
- 
- # When compiling under Mingw/Cygwin, the tblgen tool expects Windows
- # paths. In this case, the SYSPATH function (defined in
diff --git a/SOURCES/llvm-3.6-large-struct-return.patch b/SOURCES/llvm-3.6-large-struct-return.patch
deleted file mode 100644
index d387539..0000000
--- a/SOURCES/llvm-3.6-large-struct-return.patch
+++ /dev/null
@@ -1,368 +0,0 @@
-------------------------------------------------------------------------
-r244889 | uweigand | 2015-08-13 15:37:06 +0200 (Thu, 13 Aug 2015) | 22 lines
-
-[SystemZ] Support large LLVM IR struct return values
-
-Recent mesa/llvmpipe crashes on SystemZ due to a failed assertion when
-attempting to compile a routine with a return type of
-  { <4 x float>, <4 x float>, <4 x float>, <4 x float> }
-on a system without vector instruction support.
-
-This is because after legalizing the vector type, we get a return value
-consisting of 16 floats, which cannot all be returned in registers.
-
-Usually, what should happen in this case is that the target's CanLowerReturn
-routine rejects the return type, in which case SelectionDAG falls back to
-implementing a structure return in memory via implicit reference.
-
-However, the SystemZ target never actually implemented any CanLowerReturn
-routine, and thus would accept any struct return type.
-
-This patch fixes the crash by implementing CanLowerReturn.  As a side effect,
-this also handles fp128 return values, fixing a todo that was noted in
-SystemZCallingConv.td.
-
-Index: llvm-36/lib/Target/SystemZ/SystemZCallingConv.td
-===================================================================
---- llvm-36.orig/lib/Target/SystemZ/SystemZCallingConv.td
-+++ llvm-36/lib/Target/SystemZ/SystemZCallingConv.td
-@@ -53,10 +53,6 @@ def RetCC_SystemZ : CallingConv<[
-   CCIfSubtarget<"hasVector()",
-     CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
-              CCAssignToReg<[V24, V26, V28, V30, V25, V27, V29, V31]>>>
--
--  // ABI-compliant code returns long double by reference, but that conversion
--  // is left to higher-level code.  Perhaps we could add an f128 definition
--  // here for code that doesn't care about the ABI?
- ]>;
- 
- //===----------------------------------------------------------------------===//
-Index: llvm-36/lib/Target/SystemZ/SystemZISelLowering.cpp
-===================================================================
---- llvm-36.orig/lib/Target/SystemZ/SystemZISelLowering.cpp
-+++ llvm-36/lib/Target/SystemZ/SystemZISelLowering.cpp
-@@ -1169,6 +1169,20 @@ SystemZTargetLowering::LowerCall(CallLow
-   return Chain;
- }
- 
-+bool SystemZTargetLowering::
-+CanLowerReturn(CallingConv::ID CallConv,
-+               MachineFunction &MF, bool isVarArg,
-+               const SmallVectorImpl<ISD::OutputArg> &Outs,
-+               LLVMContext &Context) const {
-+  // Detect unsupported vector return types.
-+  if (Subtarget.hasVector())
-+    VerifyVectorTypes(Outs);
-+
-+  SmallVector<CCValAssign, 16> RetLocs;
-+  CCState RetCCInfo(CallConv, isVarArg, MF, RetLocs, Context);
-+  return RetCCInfo.CheckReturn(Outs, RetCC_SystemZ);
-+}
-+
- SDValue
- SystemZTargetLowering::LowerReturn(SDValue Chain,
-                                    CallingConv::ID CallConv, bool IsVarArg,
-Index: llvm-36/lib/Target/SystemZ/SystemZISelLowering.h
-===================================================================
---- llvm-36.orig/lib/Target/SystemZ/SystemZISelLowering.h
-+++ llvm-36/lib/Target/SystemZ/SystemZISelLowering.h
-@@ -401,6 +401,10 @@ public:
-   SDValue LowerCall(CallLoweringInfo &CLI,
-                     SmallVectorImpl<SDValue> &InVals) const override;
- 
-+  bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
-+                      bool isVarArg,
-+                      const SmallVectorImpl<ISD::OutputArg> &Outs,
-+                      LLVMContext &Context) const override;
-   SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
-                       const SmallVectorImpl<ISD::OutputArg> &Outs,
-                       const SmallVectorImpl<SDValue> &OutVals,
-Index: llvm-36/test/CodeGen/SystemZ/args-04.ll
-===================================================================
---- llvm-36.orig/test/CodeGen/SystemZ/args-04.ll
-+++ llvm-36/test/CodeGen/SystemZ/args-04.ll
-@@ -124,3 +124,17 @@ define void @f13(fp128 *%r2, i16 %r3, i3
-   store fp128 %y, fp128 *%r2
-   ret void
- }
-+
-+; Explicit fp128 return values are likewise passed indirectly.
-+define fp128 @f14(fp128 %r3) {
-+; CHECK-LABEL: f14:
-+; CHECK: ld %f0, 0(%r3)
-+; CHECK: ld %f2, 8(%r3)
-+; CHECK: axbr %f0, %f0
-+; CHECK: std %f0, 0(%r2)
-+; CHECK: std %f2, 8(%r2)
-+; CHECK: br %r14
-+  %y = fadd fp128 %r3, %r3
-+  ret fp128 %y
-+}
-+
-Index: llvm-36/test/CodeGen/SystemZ/args-07.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/args-07.ll
-@@ -0,0 +1,60 @@
-+; Test multiple return values (LLVM ABI extension)
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
-+
-+; Up to four integer return values fit into GPRs.
-+define { i64, i64, i64, i64 } @f1() {
-+; CHECK-LABEL: f1:
-+; CHECK: lghi %r2, 0
-+; CHECK: lghi %r3, 1
-+; CHECK: lghi %r4, 2
-+; CHECK: lghi %r5, 3
-+; CHECK: br %r14
-+  ret { i64, i64, i64, i64 } { i64 0, i64 1, i64 2, i64 3 }
-+}
-+
-+; More than four integer return values use sret.
-+define { i64, i64, i64, i64, i64 } @f2() {
-+; CHECK-LABEL: f2:
-+; CHECK: mvghi 32(%r2), 4
-+; CHECK: mvghi 24(%r2), 3
-+; CHECK: mvghi 16(%r2), 2
-+; CHECK: mvghi 8(%r2), 1
-+; CHECK: mvghi 0(%r2), 0
-+; CHECK: br %r14
-+  ret { i64, i64, i64, i64, i64 } { i64 0, i64 1, i64 2, i64 3, i64 4 }
-+}
-+
-+; Up to four floating-point return values fit into FPRs.
-+define { double, double, double, double } @f3() {
-+; CHECK-LABEL: f3:
-+; CHECK: larl [[TMP:%r[0-5]]], .LCPI
-+; CHECK: ldeb %f0, 0([[TMP]])
-+; CHECK: larl [[TMP:%r[0-5]]], .LCPI
-+; CHECK: ldeb %f2, 0([[TMP]])
-+; CHECK: larl [[TMP:%r[0-5]]], .LCPI
-+; CHECK: ldeb %f4, 0([[TMP]])
-+; CHECK: larl [[TMP:%r[0-5]]], .LCPI
-+; CHECK: ldeb %f6, 0([[TMP]])
-+; CHECK: br %r14
-+  ret { double, double, double, double }
-+      { double 1.0, double 2.0, double 3.0, double 4.0 }
-+}
-+
-+; More than four floating-point return values use sret.
-+define { double, double, double, double, double } @f4() {
-+; CHECK-LABEL: f4:
-+; CHECK: llihh [[TMP:%r[0-5]]], 16404
-+; CHECK: stg [[TMP]], 32(%r2)
-+; CHECK: llihh [[TMP:%r[0-5]]], 16400
-+; CHECK: stg [[TMP]], 24(%r2)
-+; CHECK: llihh [[TMP:%r[0-5]]], 16392
-+; CHECK: stg [[TMP]], 16(%r2)
-+; CHECK: llihh [[TMP:%r[0-5]]], 16384
-+; CHECK: stg [[TMP]], 8(%r2)
-+; CHECK: llihh [[TMP:%r[0-5]]], 16368
-+; CHECK: stg [[TMP]], 0(%r2)
-+; CHECK: br %r14
-+  ret { double, double, double, double, double }
-+      { double 1.0, double 2.0, double 3.0, double 4.0, double 5.0 }
-+}
-Index: llvm-36/test/CodeGen/SystemZ/args-08.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/args-08.ll
-@@ -0,0 +1,57 @@
-+; Test calling functions with multiple return values (LLVM ABI extension)
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
-+
-+; Up to four integer return values fit into GPRs.
-+declare { i64, i64, i64, i64 } @bar1()
-+
-+define i64 @f1() {
-+; CHECK-LABEL: f1:
-+; CHECK: brasl %r14, bar1
-+; CHECK: lgr %r2, %r5
-+; CHECK: br %r14
-+  %mret = call { i64, i64, i64, i64 } @bar1()
-+  %ret = extractvalue { i64, i64, i64, i64 } %mret, 3
-+  ret i64 %ret
-+}
-+
-+; More than four integer return values use sret.
-+declare { i64, i64, i64, i64, i64 } @bar2()
-+
-+define i64 @f2() {
-+; CHECK-LABEL: f2:
-+; CHECK: la %r2, 160(%r15)
-+; CHECK: brasl %r14, bar2
-+; CHECK: lg  %r2, 192(%r15)
-+; CHECK: br %r14
-+  %mret = call { i64, i64, i64, i64, i64 } @bar2()
-+  %ret = extractvalue { i64, i64, i64, i64, i64 } %mret, 4
-+  ret i64 %ret
-+}
-+
-+; Up to four floating-point return values fit into GPRs.
-+declare { double, double, double, double } @bar3()
-+
-+define double @f3() {
-+; CHECK-LABEL: f3:
-+; CHECK: brasl %r14, bar3
-+; CHECK: ldr %f0, %f6
-+; CHECK: br %r14
-+  %mret = call { double, double, double, double } @bar3()
-+  %ret = extractvalue { double, double, double, double } %mret, 3
-+  ret double %ret
-+}
-+
-+; More than four integer return values use sret.
-+declare { double, double, double, double, double } @bar4()
-+
-+define double @f4() {
-+; CHECK-LABEL: f4:
-+; CHECK: la %r2, 160(%r15)
-+; CHECK: brasl %r14, bar4
-+; CHECK: ld  %f0, 192(%r15)
-+; CHECK: br %r14
-+  %mret = call { double, double, double, double, double } @bar4()
-+  %ret = extractvalue { double, double, double, double, double } %mret, 4
-+  ret double %ret
-+}
-Index: llvm-36/test/CodeGen/SystemZ/vec-args-06.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-args-06.ll
-@@ -0,0 +1,83 @@
-+; Test multiple return values (LLVM ABI extension)
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-+
-+; Up to eight vector return values fit into VRs.
-+define { <2 x double>, <2 x double>, <2 x double>, <2 x double>,
-+         <2 x double>, <2 x double>, <2 x double>, <2 x double> } @f1() {
-+; CHECK-LABEL: f1:
-+; CHECK: larl [[TMP:%r[0-5]]], .LCPI
-+; CHECK: vl %v24, 0([[TMP]])
-+; CHECK: larl [[TMP:%r[0-5]]], .LCPI
-+; CHECK: vl %v26, 0([[TMP]])
-+; CHECK: larl [[TMP:%r[0-5]]], .LCPI
-+; CHECK: vl %v28, 0([[TMP]])
-+; CHECK: larl [[TMP:%r[0-5]]], .LCPI
-+; CHECK: vl %v30, 0([[TMP]])
-+; CHECK: larl [[TMP:%r[0-5]]], .LCPI
-+; CHECK: vl %v25, 0([[TMP]])
-+; CHECK: larl [[TMP:%r[0-5]]], .LCPI
-+; CHECK: vl %v27, 0([[TMP]])
-+; CHECK: larl [[TMP:%r[0-5]]], .LCPI
-+; CHECK: vl %v29, 0([[TMP]])
-+; CHECK: larl [[TMP:%r[0-5]]], .LCPI
-+; CHECK: vl %v31, 0([[TMP]])
-+; CHECK: br %r14
-+  ret { <2 x double>, <2 x double>, <2 x double>, <2 x double>,
-+        <2 x double>, <2 x double>, <2 x double>, <2 x double> }
-+      { <2 x double> <double 1.0, double 1.1>,
-+        <2 x double> <double 2.0, double 2.1>,
-+        <2 x double> <double 3.0, double 3.1>,
-+        <2 x double> <double 4.0, double 4.1>,
-+        <2 x double> <double 5.0, double 5.1>,
-+        <2 x double> <double 6.0, double 6.1>,
-+        <2 x double> <double 7.0, double 7.1>,
-+        <2 x double> <double 8.0, double 8.1> }
-+}
-+
-+; More than eight vector return values use sret.
-+define { <2 x double>, <2 x double>, <2 x double>, <2 x double>,
-+         <2 x double>, <2 x double>, <2 x double>, <2 x double>,
-+         <2 x double> } @f2() {
-+; CHECK-LABEL: f2:
-+; CHECK: larl [[TMP:%r[0-5]]], .LCPI
-+; CHECK: vl [[VTMP:%v[0-9]+]], 0([[TMP]])
-+; CHECK: vst [[VTMP]], 128(%r2)
-+; CHECK: larl [[TMP:%r[0-5]]], .LCPI
-+; CHECK: vl [[VTMP:%v[0-9]+]], 0([[TMP]])
-+; CHECK: vst [[VTMP]], 112(%r2)
-+; CHECK: larl [[TMP:%r[0-5]]], .LCPI
-+; CHECK: vl [[VTMP:%v[0-9]+]], 0([[TMP]])
-+; CHECK: vst [[VTMP]], 96(%r2)
-+; CHECK: larl [[TMP:%r[0-5]]], .LCPI
-+; CHECK: vl [[VTMP:%v[0-9]+]], 0([[TMP]])
-+; CHECK: vst [[VTMP]], 80(%r2)
-+; CHECK: larl [[TMP:%r[0-5]]], .LCPI
-+; CHECK: vl [[VTMP:%v[0-9]+]], 0([[TMP]])
-+; CHECK: vst [[VTMP]], 64(%r2)
-+; CHECK: larl [[TMP:%r[0-5]]], .LCPI
-+; CHECK: vl [[VTMP:%v[0-9]+]], 0([[TMP]])
-+; CHECK: vst [[VTMP]], 48(%r2)
-+; CHECK: larl [[TMP:%r[0-5]]], .LCPI
-+; CHECK: vl [[VTMP:%v[0-9]+]], 0([[TMP]])
-+; CHECK: vst [[VTMP]], 32(%r2)
-+; CHECK: larl [[TMP:%r[0-5]]], .LCPI
-+; CHECK: vl [[VTMP:%v[0-9]+]], 0([[TMP]])
-+; CHECK: vst [[VTMP]], 16(%r2)
-+; CHECK: larl [[TMP:%r[0-5]]], .LCPI
-+; CHECK: vl [[VTMP:%v[0-9]+]], 0([[TMP]])
-+; CHECK: vst [[VTMP]], 0(%r2)
-+; CHECK: br %r14
-+  ret { <2 x double>, <2 x double>, <2 x double>, <2 x double>,
-+        <2 x double>, <2 x double>, <2 x double>, <2 x double>,
-+        <2 x double> }
-+      { <2 x double> <double 1.0, double 1.1>,
-+        <2 x double> <double 2.0, double 2.1>,
-+        <2 x double> <double 3.0, double 3.1>,
-+        <2 x double> <double 4.0, double 4.1>,
-+        <2 x double> <double 5.0, double 5.1>,
-+        <2 x double> <double 6.0, double 6.1>,
-+        <2 x double> <double 7.0, double 7.1>,
-+        <2 x double> <double 8.0, double 8.1>,
-+        <2 x double> <double 9.0, double 9.1> }
-+}
-Index: llvm-36/test/CodeGen/SystemZ/vec-args-07.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-args-07.ll
-@@ -0,0 +1,47 @@
-+; Test calling functions with multiple return values (LLVM ABI extension)
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-+
-+; Up to eight vector return values fit into VRs.
-+declare { <2 x double>, <2 x double>, <2 x double>, <2 x double>,
-+          <2 x double>, <2 x double>, <2 x double>, <2 x double> } @bar1()
-+
-+define <2 x double> @f1() {
-+; CHECK-LABEL: f1:
-+; CHECK: brasl %r14, bar1
-+; CHECK: vlr %v24, %v31
-+; CHECK: br %r14
-+  %mret = call { <2 x double>, <2 x double>,
-+                 <2 x double>, <2 x double>,
-+                 <2 x double>, <2 x double>,
-+                 <2 x double>, <2 x double> } @bar1()
-+  %ret = extractvalue { <2 x double>, <2 x double>,
-+                        <2 x double>, <2 x double>,
-+                        <2 x double>, <2 x double>,
-+                        <2 x double>, <2 x double> } %mret, 7
-+  ret <2 x double> %ret
-+}
-+
-+; More than eight vector return values use sret.
-+declare { <2 x double>, <2 x double>, <2 x double>, <2 x double>,
-+          <2 x double>, <2 x double>, <2 x double>, <2 x double>,
-+          <2 x double> } @bar2()
-+
-+define <2 x double> @f2() {
-+; CHECK-LABEL: f2:
-+; CHECK: la %r2, 160(%r15)
-+; CHECK: brasl %r14, bar2
-+; CHECK: vl  %v24, 288(%r15)
-+; CHECK: br %r14
-+  %mret = call { <2 x double>, <2 x double>,
-+                 <2 x double>, <2 x double>,
-+                 <2 x double>, <2 x double>,
-+                 <2 x double>, <2 x double>,
-+                 <2 x double> } @bar2()
-+  %ret = extractvalue { <2 x double>, <2 x double>,
-+                        <2 x double>, <2 x double>,
-+                        <2 x double>, <2 x double>,
-+                        <2 x double>, <2 x double>,
-+                        <2 x double> } %mret, 8
-+  ret <2 x double> %ret
-+}
diff --git a/SOURCES/llvm-3.6.2-nerf-skylake.patch b/SOURCES/llvm-3.6.2-nerf-skylake.patch
deleted file mode 100644
index 5f8c3c4..0000000
--- a/SOURCES/llvm-3.6.2-nerf-skylake.patch
+++ /dev/null
@@ -1,28 +0,0 @@
-Skylake Pentium has the charming property of not supporting AVX, and
-getHostCPUName will return 'x86-64' since it doesn't know about skl at
-all in 3.6.x.  This confuses llvmpipe quite badly, as we'll emit SSE4.1
-intrinsics but llvm will think they're not valid, and we'll cough and
-die with a "Cannot select" message.
-
-Fix this by treating Skylake (and Broadwell, which also isn't present
-in 3.6) as if they were Haswell.  This isn't quite what upstream does,
-but upstream has changed this API a bit and introduced a getHostCPUFeatures
-to complement it, and while it looks like a much better approach it's
-quite a bit more invasive.
-
-diff -up llvm-3.6.2.src/lib/Support/Host.cpp.jx llvm-3.6.2.src/lib/Support/Host.cpp
---- llvm-3.6.2.src/lib/Support/Host.cpp.jx	2015-10-01 12:08:39.000000000 -0400
-+++ llvm-3.6.2.src/lib/Support/Host.cpp	2015-10-13 10:51:03.736425351 -0400
-@@ -362,6 +362,12 @@ StringRef sys::getHostCPUName() {
-       case 63:
-       case 69:
-       case 70:
-+      // Broadwell:
-+      case 61:
-+      case 71:
-+      // Skylake:
-+      case 78:
-+      case 94:
-         // Not all Haswell processors support AVX too (such as the Pentium
-         // versions instead of the i7 versions).
-         return HasAVX2 ? "core-avx2" : "corei7";
diff --git a/SOURCES/llvm-3.8.1-rhel-7.3.patch b/SOURCES/llvm-3.8.1-rhel-7.3.patch
new file mode 100644
index 0000000..4caea83
--- /dev/null
+++ b/SOURCES/llvm-3.8.1-rhel-7.3.patch
@@ -0,0 +1,344 @@
+diff --git a/lib/Support/Host.cpp b/lib/Support/Host.cpp
+index c0f9e07..94bf580 100644
+--- a/lib/Support/Host.cpp
++++ b/lib/Support/Host.cpp
+@@ -290,107 +290,112 @@ StringRef sys::getHostCPUName() {
+       }
+     case 6:
+       switch (Model) {
+-      case  1: // Pentium Pro processor
++      case 0x01: // Pentium Pro processor
+         return "pentiumpro";
+ 
+-      case  3: // Intel Pentium II OverDrive processor, Pentium II processor,
+-               // model 03
+-      case  5: // Pentium II processor, model 05, Pentium II Xeon processor,
+-               // model 05, and Intel Celeron processor, model 05
+-      case  6: // Celeron processor, model 06
++      case 0x03: // Intel Pentium II OverDrive processor, Pentium II processor,
++                 // model 03
++      case 0x05: // Pentium II processor, model 05, Pentium II Xeon processor,
++                 // model 05, and Intel Celeron processor, model 05
++      case 0x06: // Celeron processor, model 06
+         return "pentium2";
+ 
+-      case  7: // Pentium III processor, model 07, and Pentium III Xeon
+-               // processor, model 07
+-      case  8: // Pentium III processor, model 08, Pentium III Xeon processor,
+-               // model 08, and Celeron processor, model 08
+-      case 10: // Pentium III Xeon processor, model 0Ah
+-      case 11: // Pentium III processor, model 0Bh
++      case 0x07: // Pentium III processor, model 07, and Pentium III Xeon
++                 // processor, model 07
++      case 0x08: // Pentium III processor, model 08, Pentium III Xeon processor,
++                 // model 08, and Celeron processor, model 08
++      case 0x0a: // Pentium III Xeon processor, model 0Ah
++      case 0x0b: // Pentium III processor, model 0Bh
+         return "pentium3";
+ 
+-      case  9: // Intel Pentium M processor, Intel Celeron M processor model 09.
+-      case 13: // Intel Pentium M processor, Intel Celeron M processor, model
+-               // 0Dh. All processors are manufactured using the 90 nm process.
+-      case 21: // Intel EP80579 Integrated Processor and Intel EP80579
+-               // Integrated Processor with Intel QuickAssist Technology
++      case 0x09: // Intel Pentium M processor, Intel Celeron M processor model 09.
++      case 0x0d: // Intel Pentium M processor, Intel Celeron M processor, model
++                 // 0Dh. All processors are manufactured using the 90 nm process.
++      case 0x15: // Intel EP80579 Integrated Processor and Intel EP80579
++                 // Integrated Processor with Intel QuickAssist Technology
+         return "pentium-m";
+ 
+-      case 14: // Intel Core Duo processor, Intel Core Solo processor, model
+-               // 0Eh. All processors are manufactured using the 65 nm process.
++      case 0x0e: // Intel Core Duo processor, Intel Core Solo processor, model
++                 // 0Eh. All processors are manufactured using the 65 nm process.
+         return "yonah";
+ 
+-      case 15: // Intel Core 2 Duo processor, Intel Core 2 Duo mobile
+-               // processor, Intel Core 2 Quad processor, Intel Core 2 Quad
+-               // mobile processor, Intel Core 2 Extreme processor, Intel
+-               // Pentium Dual-Core processor, Intel Xeon processor, model
+-               // 0Fh. All processors are manufactured using the 65 nm process.
+-      case 22: // Intel Celeron processor model 16h. All processors are
+-               // manufactured using the 65 nm process
++      case 0x0f: // Intel Core 2 Duo processor, Intel Core 2 Duo mobile
++                 // processor, Intel Core 2 Quad processor, Intel Core 2 Quad
++                 // mobile processor, Intel Core 2 Extreme processor, Intel
++                 // Pentium Dual-Core processor, Intel Xeon processor, model
++                 // 0Fh. All processors are manufactured using the 65 nm process.
++      case 0x16: // Intel Celeron processor model 16h. All processors are
++                 // manufactured using the 65 nm process
+         return "core2";
+ 
+-      case 23: // Intel Core 2 Extreme processor, Intel Xeon processor, model
+-               // 17h. All processors are manufactured using the 45 nm process.
+-               //
+-               // 45nm: Penryn , Wolfdale, Yorkfield (XE)
+-      case 29: // Intel Xeon processor MP. All processors are manufactured using
+-               // the 45 nm process.
++      case 0x17: // Intel Core 2 Extreme processor, Intel Xeon processor, model
++                 // 17h. All processors are manufactured using the 45 nm process.
++                 //
++                 // 45nm: Penryn , Wolfdale, Yorkfield (XE)
++      case 0x1d: // Intel Xeon processor MP. All processors are manufactured using
++                 // the 45 nm process.
+         return "penryn";
+ 
+-      case 26: // Intel Core i7 processor and Intel Xeon processor. All
+-               // processors are manufactured using the 45 nm process.
+-      case 30: // Intel(R) Core(TM) i7 CPU         870  @ 2.93GHz.
+-               // As found in a Summer 2010 model iMac.
+-      case 46: // Nehalem EX
++      case 0x1a: // Intel Core i7 processor and Intel Xeon processor. All
++                 // processors are manufactured using the 45 nm process.
++      case 0x1e: // Intel(R) Core(TM) i7 CPU         870  @ 2.93GHz.
++                 // As found in a Summer 2010 model iMac.
++      case 0x2e: // Nehalem EX
+         return "nehalem";
+-      case 37: // Intel Core i7, laptop version.
+-      case 44: // Intel Core i7 processor and Intel Xeon processor. All
+-               // processors are manufactured using the 32 nm process.
+-      case 47: // Westmere EX
++      case 0x25: // Intel Core i7, laptop version.
++      case 0x2c: // Intel Core i7 processor and Intel Xeon processor. All
++                 // processors are manufactured using the 32 nm process.
++      case 0x2f: // Westmere EX
+         return "westmere";
+ 
+-      // SandyBridge:
+-      case 42: // Intel Core i7 processor. All processors are manufactured
+-               // using the 32 nm process.
+-      case 45:
++      case 0x2a: // Intel Core i7 processor. All processors are manufactured
++                 // using the 32 nm process.
++      case 0x2d:
+         return "sandybridge";
+ 
+-      // Ivy Bridge:
+-      case 58:
+-      case 62: // Ivy Bridge EP
++      case 0x3a:
++      case 0x3e: // Ivy Bridge EP
+         return "ivybridge";
+ 
+       // Haswell:
+-      case 60:
+-      case 63:
+-      case 69:
+-      case 70:
++      case 0x3c:
++      case 0x3f:
++      case 0x45:
++      case 0x46:
+         return "haswell";
+ 
+       // Broadwell:
+-      case 61:
+-      case 71:
++      case 0x3d:
++      case 0x47:
++      case 0x4f:
++      case 0x56:
+         return "broadwell";
+ 
+       // Skylake:
+-      case 78:
+-      case 94:
++      case 0x4e:
++        // return "skylake-avx512";
++      case 0x5e:
+         return "skylake";
+ 
+-      case 28: // Most 45 nm Intel Atom processors
+-      case 38: // 45 nm Atom Lincroft
+-      case 39: // 32 nm Atom Medfield
+-      case 53: // 32 nm Atom Midview
+-      case 54: // 32 nm Atom Midview
++      case 0x1c: // Most 45 nm Intel Atom processors
++      case 0x26: // 45 nm Atom Lincroft
++      case 0x27: // 32 nm Atom Medfield
++      case 0x35: // 32 nm Atom Midview
++      case 0x36: // 32 nm Atom Midview
+         return "bonnell";
+ 
+       // Atom Silvermont codes from the Intel software optimization guide.
+-      case 55:
+-      case 74:
+-      case 77:
+-      case 90:
+-      case 93:
++      case 0x37:
++      case 0x4a:
++      case 0x4d:
++      case 0x5a:
++      case 0x5d:
++      case 0x4c: // really airmont
+         return "silvermont";
+ 
++      case 0x57:
++        return "knl";
++
+       default: // Unknown family 6 CPU, try to guess.
+         if (HasAVX512)
+           return "knl";
+@@ -823,6 +828,7 @@ bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
+   Features["avx512cd"] = HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save;
+   Features["avx512bw"] = HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save;
+   Features["avx512vl"] = HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save;
++  Features["avx512vbmi"] = HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save;
+ 
+   bool HasLeafD = MaxLevel >= 0xd &&
+     !GetX86CpuIDAndInfoEx(0xd, 0x1, &EAX, &EBX, &ECX, &EDX);
+diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td
+index 8902a85..9b1bf43 100644
+--- a/lib/Target/X86/X86.td
++++ b/lib/Target/X86/X86.td
+@@ -134,6 +134,9 @@ def FeatureBWI     : SubtargetFeature<"avx512bw", "HasBWI", "true",
+ def FeatureVLX     : SubtargetFeature<"avx512vl", "HasVLX", "true",
+                       "Enable AVX-512 Vector Length eXtensions",
+                                       [FeatureAVX512]>;
++def FeatureVBMI     : SubtargetFeature<"avx512vbmi", "HasVBMI", "true",
++                      "Enable AVX-512 Vector Bit Manipulation Instructions",
++                                      [FeatureAVX512]>;
+ def FeaturePKU   : SubtargetFeature<"pku", "HasPKU", "true",
+                       "Enable protection keys">;
+ def FeaturePCLMUL  : SubtargetFeature<"pclmul", "HasPCLMUL", "true",
+@@ -454,6 +457,9 @@ class BroadwellProc<string Name> : ProcessorModel<Name, HaswellModel, [
+ ]>;
+ def : BroadwellProc<"broadwell">;
+ 
++def : HaswellProc<"skylake">; // RHEL mustard
++def : HaswellProc<"skx">; // RHEL mustard
++
+ // FIXME: define KNL model
+ class KnightsLandingProc<string Name> : ProcessorModel<Name, HaswellModel, [
+   FeatureMMX,
+@@ -484,45 +490,6 @@ class KnightsLandingProc<string Name> : ProcessorModel<Name, HaswellModel, [
+ ]>;
+ def : KnightsLandingProc<"knl">;
+ 
+-// FIXME: define SKX model
+-class SkylakeProc<string Name> : ProcessorModel<Name, HaswellModel, [
+-  FeatureMMX,
+-  FeatureAVX512,
+-  FeatureFXSR,
+-  FeatureCDI,
+-  FeatureDQI,
+-  FeatureBWI,
+-  FeatureVLX,
+-  FeaturePKU,
+-  FeatureCMPXCHG16B,
+-  FeatureSlowBTMem,
+-  FeaturePOPCNT,
+-  FeatureAES,
+-  FeaturePCLMUL,
+-  FeatureXSAVE,
+-  FeatureXSAVEOPT,
+-  FeatureRDRAND,
+-  FeatureF16C,
+-  FeatureFSGSBase,
+-  FeatureMOVBE,
+-  FeatureLZCNT,
+-  FeatureBMI,
+-  FeatureBMI2,
+-  FeatureFMA,
+-  FeatureRTM,
+-  FeatureHLE,
+-  FeatureADX,
+-  FeatureRDSEED,
+-  FeatureSlowIncDec,
+-  FeatureMPX,
+-  FeatureXSAVEC,
+-  FeatureXSAVES,
+-  FeatureLAHFSAHF
+-]>;
+-def : SkylakeProc<"skylake">;
+-def : SkylakeProc<"skx">; // Legacy alias.
+-
+-
+ // AMD CPUs.
+ 
+ def : Proc<"k6",              [FeatureSlowUAMem16, FeatureMMX]>;
+diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
+index 9c8339a..eed4319 100644
+--- a/lib/Target/X86/X86InstrInfo.td
++++ b/lib/Target/X86/X86InstrInfo.td
+@@ -773,7 +773,7 @@ def HasVLX       : Predicate<"Subtarget->hasVLX()">,
+ def NoVLX        : Predicate<"!Subtarget->hasVLX()">;
+ def NoVLX_Or_NoBWI : Predicate<"!Subtarget->hasVLX() || !Subtarget->hasBWI()">;
+ def NoVLX_Or_NoDQI : Predicate<"!Subtarget->hasVLX() || !Subtarget->hasDQI()">;
+-def PKU        : Predicate<"!Subtarget->hasPKU()">;
++def PKU        : Predicate<"Subtarget->hasPKU()">;
+ 
+ def HasPOPCNT    : Predicate<"Subtarget->hasPOPCNT()">;
+ def HasAES       : Predicate<"Subtarget->hasAES()">;
+@@ -795,6 +795,7 @@ def HasFSGSBase  : Predicate<"Subtarget->hasFSGSBase()">;
+ def HasLZCNT     : Predicate<"Subtarget->hasLZCNT()">;
+ def HasBMI       : Predicate<"Subtarget->hasBMI()">;
+ def HasBMI2      : Predicate<"Subtarget->hasBMI2()">;
++def HasVBMI      : Predicate<"Subtarget->hasVBMI()">;
+ def HasRTM       : Predicate<"Subtarget->hasRTM()">;
+ def HasHLE       : Predicate<"Subtarget->hasHLE()">;
+ def HasTSX       : Predicate<"Subtarget->hasRTM() || Subtarget->hasHLE()">;
+diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
+index 8ef08c9..739de59 100644
+--- a/lib/Target/X86/X86Subtarget.cpp
++++ b/lib/Target/X86/X86Subtarget.cpp
+@@ -261,6 +261,7 @@ void X86Subtarget::initializeEnvironment() {
+   HasLZCNT = false;
+   HasBMI = false;
+   HasBMI2 = false;
++  HasVBMI = false;
+   HasRTM = false;
+   HasHLE = false;
+   HasERI = false;
+diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
+index 13d1026..c1adb44 100644
+--- a/lib/Target/X86/X86Subtarget.h
++++ b/lib/Target/X86/X86Subtarget.h
+@@ -134,6 +134,9 @@ protected:
+   /// Processor has BMI2 instructions.
+   bool HasBMI2;
+ 
++  /// Processor has VBMI instructions.
++  bool HasVBMI;
++
+   /// Processor has RTM instructions.
+   bool HasRTM;
+ 
+@@ -374,6 +377,7 @@ public:
+   bool hasLZCNT() const { return HasLZCNT; }
+   bool hasBMI() const { return HasBMI; }
+   bool hasBMI2() const { return HasBMI2; }
++  bool hasVBMI() const { return HasVBMI; }
+   bool hasRTM() const { return HasRTM; }
+   bool hasHLE() const { return HasHLE; }
+   bool hasADX() const { return HasADX; }
+diff --git a/test/CodeGen/X86/slow-unaligned-mem.ll b/test/CodeGen/X86/slow-unaligned-mem.ll
+index 27cbef6..c25435b 100644
+--- a/test/CodeGen/X86/slow-unaligned-mem.ll
++++ b/test/CodeGen/X86/slow-unaligned-mem.ll
+@@ -14,15 +14,14 @@
+ 
+ ; Intel chips with fast unaligned memory accesses
+ 
+-; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=silvermont    2>&1 | FileCheck %s --check-prefix=FAST
+-; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=nehalem       2>&1 | FileCheck %s --check-prefix=FAST
+-; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=westmere      2>&1 | FileCheck %s --check-prefix=FAST
+-; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=sandybridge   2>&1 | FileCheck %s --check-prefix=FAST
+-; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=ivybridge     2>&1 | FileCheck %s --check-prefix=FAST
+-; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=haswell       2>&1 | FileCheck %s --check-prefix=FAST
+-; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=broadwell     2>&1 | FileCheck %s --check-prefix=FAST
+-; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=knl           2>&1 | FileCheck %s --check-prefix=FAST
+-; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=skylake       2>&1 | FileCheck %s --check-prefix=FAST
++; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=silvermont     2>&1 | FileCheck %s --check-prefix=FAST
++; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=nehalem        2>&1 | FileCheck %s --check-prefix=FAST
++; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=westmere       2>&1 | FileCheck %s --check-prefix=FAST
++; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=sandybridge    2>&1 | FileCheck %s --check-prefix=FAST
++; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=ivybridge      2>&1 | FileCheck %s --check-prefix=FAST
++; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=haswell        2>&1 | FileCheck %s --check-prefix=FAST
++; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=broadwell      2>&1 | FileCheck %s --check-prefix=FAST
++; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=knl            2>&1 | FileCheck %s --check-prefix=FAST
+ 
+ ; AMD chips with slow unaligned memory accesses
+ 
diff --git a/SOURCES/llvm-Config-config.h b/SOURCES/llvm-Config-config.h
deleted file mode 100644
index c369b45..0000000
--- a/SOURCES/llvm-Config-config.h
+++ /dev/null
@@ -1,9 +0,0 @@
-#include <bits/wordsize.h>
-
-#if __WORDSIZE == 32
-#include "config-32.h"
-#elif __WORDSIZE == 64
-#include "config-64.h"
-#else
-#error "Unknown word size"
-#endif
diff --git a/SOURCES/llvm-Config-llvm-config.h b/SOURCES/llvm-Config-llvm-config.h
deleted file mode 100644
index 2fa08c9..0000000
--- a/SOURCES/llvm-Config-llvm-config.h
+++ /dev/null
@@ -1,9 +0,0 @@
-#include <bits/wordsize.h>
-
-#if __WORDSIZE == 32
-#include "llvm-config-32.h"
-#elif __WORDSIZE == 64
-#include "llvm-config-64.h"
-#else
-#error "Unknown word size"
-#endif
diff --git a/SOURCES/llvm-config.h b/SOURCES/llvm-config.h
new file mode 100644
index 0000000..2fa08c9
--- /dev/null
+++ b/SOURCES/llvm-config.h
@@ -0,0 +1,9 @@
+#include <bits/wordsize.h>
+
+#if __WORDSIZE == 32
+#include "llvm-config-32.h"
+#elif __WORDSIZE == 64
+#include "llvm-config-64.h"
+#else
+#error "Unknown word size"
+#endif
diff --git a/SOURCES/llvm-z13-backports.patch b/SOURCES/llvm-z13-backports.patch
deleted file mode 100644
index c6aebb4..0000000
--- a/SOURCES/llvm-z13-backports.patch
+++ /dev/null
@@ -1,39981 +0,0 @@
-This patch backports z13 support and a number of other SystemZ
-enhancements to the LLVM 3.6 release branch.
-
-The patch consists of backports of the following mainline revisions:
-229652, 229654, 229658, 233540, 233541, 233688, 233689, 233690, 233700,
-233736, 233803, 236430, 236432, 236433, 236520, 236521, 236522, 236523,
-236524, 236525, 236526, 236527, 236528, 236529, 236530
-
-Index: llvm-36/include/llvm/IR/Intrinsics.td
-===================================================================
---- llvm-36.orig/include/llvm/IR/Intrinsics.td
-+++ llvm-36/include/llvm/IR/Intrinsics.td
-@@ -594,3 +594,4 @@ include "llvm/IR/IntrinsicsHexagon.td"
- include "llvm/IR/IntrinsicsNVVM.td"
- include "llvm/IR/IntrinsicsMips.td"
- include "llvm/IR/IntrinsicsR600.td"
-+include "llvm/IR/IntrinsicsSystemZ.td"
-Index: llvm-36/include/llvm/IR/IntrinsicsSystemZ.td
-===================================================================
---- /dev/null
-+++ llvm-36/include/llvm/IR/IntrinsicsSystemZ.td
-@@ -0,0 +1,378 @@
-+//===- IntrinsicsSystemZ.td - Defines SystemZ intrinsics ---*- tablegen -*-===//
-+//
-+//                     The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+// This file defines all of the SystemZ-specific intrinsics.
-+//
-+//===----------------------------------------------------------------------===//
-+
-+class SystemZUnaryConv<string name, LLVMType result, LLVMType arg>
-+  : GCCBuiltin<"__builtin_s390_" ## name>,
-+    Intrinsic<[result], [arg], [IntrNoMem]>;
-+
-+class SystemZUnary<string name, LLVMType type>
-+  : SystemZUnaryConv<name, type, type>;
-+
-+class SystemZUnaryConvCC<LLVMType result, LLVMType arg>
-+  : Intrinsic<[result, llvm_i32_ty], [arg], [IntrNoMem]>;
-+
-+class SystemZUnaryCC<LLVMType type>
-+  : SystemZUnaryConvCC<type, type>;
-+
-+class SystemZBinaryConv<string name, LLVMType result, LLVMType arg>
-+  : GCCBuiltin<"__builtin_s390_" ## name>,
-+    Intrinsic<[result], [arg, arg], [IntrNoMem]>;
-+
-+class SystemZBinary<string name, LLVMType type>
-+  : SystemZBinaryConv<name, type, type>;
-+
-+class SystemZBinaryInt<string name, LLVMType type>
-+  : GCCBuiltin<"__builtin_s390_" ## name>,
-+    Intrinsic<[type], [type, llvm_i32_ty], [IntrNoMem]>;
-+
-+class SystemZBinaryConvCC<LLVMType result, LLVMType arg>
-+  : Intrinsic<[result, llvm_i32_ty], [arg, arg], [IntrNoMem]>;
-+
-+class SystemZBinaryConvIntCC<LLVMType result, LLVMType arg>
-+  : Intrinsic<[result, llvm_i32_ty], [arg, llvm_i32_ty], [IntrNoMem]>;
-+
-+class SystemZBinaryCC<LLVMType type>
-+  : SystemZBinaryConvCC<type, type>;
-+
-+class SystemZTernaryConv<string name, LLVMType result, LLVMType arg>
-+  : GCCBuiltin<"__builtin_s390_" ## name>,
-+    Intrinsic<[result], [arg, arg, result], [IntrNoMem]>;
-+
-+class SystemZTernary<string name, LLVMType type>
-+  : SystemZTernaryConv<name, type, type>;
-+
-+class SystemZTernaryInt<string name, LLVMType type>
-+  : GCCBuiltin<"__builtin_s390_" ## name>,
-+    Intrinsic<[type], [type, type, llvm_i32_ty], [IntrNoMem]>;
-+
-+class SystemZTernaryIntCC<LLVMType type>
-+  : Intrinsic<[type, llvm_i32_ty], [type, type, llvm_i32_ty], [IntrNoMem]>;
-+
-+class SystemZQuaternaryInt<string name, LLVMType type>
-+  : GCCBuiltin<"__builtin_s390_" ## name>,
-+    Intrinsic<[type], [type, type, type, llvm_i32_ty], [IntrNoMem]>;
-+
-+class SystemZQuaternaryIntCC<LLVMType type>
-+  : Intrinsic<[type, llvm_i32_ty], [type, type, type, llvm_i32_ty],
-+              [IntrNoMem]>;
-+
-+multiclass SystemZUnaryExtBHF<string name> {
-+  def b : SystemZUnaryConv<name##"b", llvm_v8i16_ty, llvm_v16i8_ty>;
-+  def h : SystemZUnaryConv<name##"h", llvm_v4i32_ty, llvm_v8i16_ty>;
-+  def f : SystemZUnaryConv<name##"f", llvm_v2i64_ty, llvm_v4i32_ty>;
-+}
-+
-+multiclass SystemZUnaryExtBHWF<string name> {
-+  def b  : SystemZUnaryConv<name##"b",  llvm_v8i16_ty, llvm_v16i8_ty>;
-+  def hw : SystemZUnaryConv<name##"hw", llvm_v4i32_ty, llvm_v8i16_ty>;
-+  def f  : SystemZUnaryConv<name##"f",  llvm_v2i64_ty, llvm_v4i32_ty>;
-+}
-+
-+multiclass SystemZUnaryBHF<string name> {
-+  def b : SystemZUnary<name##"b", llvm_v16i8_ty>;
-+  def h : SystemZUnary<name##"h", llvm_v8i16_ty>;
-+  def f : SystemZUnary<name##"f", llvm_v4i32_ty>;
-+}
-+
-+multiclass SystemZUnaryBHFG<string name> : SystemZUnaryBHF<name> {
-+  def g : SystemZUnary<name##"g", llvm_v2i64_ty>;
-+}
-+
-+multiclass SystemZUnaryCCBHF {
-+  def bs : SystemZUnaryCC<llvm_v16i8_ty>;
-+  def hs : SystemZUnaryCC<llvm_v8i16_ty>;
-+  def fs : SystemZUnaryCC<llvm_v4i32_ty>;
-+}
-+
-+multiclass SystemZBinaryTruncHFG<string name> {
-+  def h : SystemZBinaryConv<name##"h", llvm_v16i8_ty, llvm_v8i16_ty>;
-+  def f : SystemZBinaryConv<name##"f", llvm_v8i16_ty, llvm_v4i32_ty>;
-+  def g : SystemZBinaryConv<name##"g", llvm_v4i32_ty, llvm_v2i64_ty>;
-+}
-+
-+multiclass SystemZBinaryTruncCCHFG {
-+  def hs : SystemZBinaryConvCC<llvm_v16i8_ty, llvm_v8i16_ty>;
-+  def fs : SystemZBinaryConvCC<llvm_v8i16_ty, llvm_v4i32_ty>;
-+  def gs : SystemZBinaryConvCC<llvm_v4i32_ty, llvm_v2i64_ty>;
-+}
-+
-+multiclass SystemZBinaryExtBHF<string name> {
-+  def b : SystemZBinaryConv<name##"b", llvm_v8i16_ty, llvm_v16i8_ty>;
-+  def h : SystemZBinaryConv<name##"h", llvm_v4i32_ty, llvm_v8i16_ty>;
-+  def f : SystemZBinaryConv<name##"f", llvm_v2i64_ty, llvm_v4i32_ty>;
-+}
-+
-+multiclass SystemZBinaryExtBHFG<string name> : SystemZBinaryExtBHF<name> {
-+  def g : SystemZBinaryConv<name##"g", llvm_v16i8_ty, llvm_v2i64_ty>;
-+}
-+
-+multiclass SystemZBinaryBHF<string name> {
-+  def b : SystemZBinary<name##"b", llvm_v16i8_ty>;
-+  def h : SystemZBinary<name##"h", llvm_v8i16_ty>;
-+  def f : SystemZBinary<name##"f", llvm_v4i32_ty>;
-+}
-+
-+multiclass SystemZBinaryBHFG<string name> : SystemZBinaryBHF<name> {
-+  def g : SystemZBinary<name##"g", llvm_v2i64_ty>;
-+}
-+
-+multiclass SystemZBinaryIntBHFG<string name> {
-+  def b : SystemZBinaryInt<name##"b", llvm_v16i8_ty>;
-+  def h : SystemZBinaryInt<name##"h", llvm_v8i16_ty>;
-+  def f : SystemZBinaryInt<name##"f", llvm_v4i32_ty>;
-+  def g : SystemZBinaryInt<name##"g", llvm_v2i64_ty>;
-+}
-+
-+multiclass SystemZBinaryCCBHF {
-+  def bs : SystemZBinaryCC<llvm_v16i8_ty>;
-+  def hs : SystemZBinaryCC<llvm_v8i16_ty>;
-+  def fs : SystemZBinaryCC<llvm_v4i32_ty>;
-+}
-+
-+multiclass SystemZCompareBHFG<string name> {
-+  def bs : SystemZBinaryCC<llvm_v16i8_ty>;
-+  def hs : SystemZBinaryCC<llvm_v8i16_ty>;
-+  def fs : SystemZBinaryCC<llvm_v4i32_ty>;
-+  def gs : SystemZBinaryCC<llvm_v2i64_ty>;
-+}
-+
-+multiclass SystemZTernaryExtBHF<string name> {
-+  def b : SystemZTernaryConv<name##"b", llvm_v8i16_ty, llvm_v16i8_ty>;
-+  def h : SystemZTernaryConv<name##"h", llvm_v4i32_ty, llvm_v8i16_ty>;
-+  def f : SystemZTernaryConv<name##"f", llvm_v2i64_ty, llvm_v4i32_ty>;
-+}
-+
-+multiclass SystemZTernaryExtBHFG<string name> : SystemZTernaryExtBHF<name> {
-+  def g : SystemZTernaryConv<name##"g", llvm_v16i8_ty, llvm_v2i64_ty>;
-+}
-+
-+multiclass SystemZTernaryBHF<string name> {
-+  def b : SystemZTernary<name##"b", llvm_v16i8_ty>;
-+  def h : SystemZTernary<name##"h", llvm_v8i16_ty>;
-+  def f : SystemZTernary<name##"f", llvm_v4i32_ty>;
-+}
-+
-+multiclass SystemZTernaryIntBHF<string name> {
-+  def b : SystemZTernaryInt<name##"b", llvm_v16i8_ty>;
-+  def h : SystemZTernaryInt<name##"h", llvm_v8i16_ty>;
-+  def f : SystemZTernaryInt<name##"f", llvm_v4i32_ty>;
-+}
-+
-+multiclass SystemZTernaryIntCCBHF {
-+  def bs : SystemZTernaryIntCC<llvm_v16i8_ty>;
-+  def hs : SystemZTernaryIntCC<llvm_v8i16_ty>;
-+  def fs : SystemZTernaryIntCC<llvm_v4i32_ty>;
-+}
-+
-+multiclass SystemZQuaternaryIntBHF<string name> {
-+  def b : SystemZQuaternaryInt<name##"b", llvm_v16i8_ty>;
-+  def h : SystemZQuaternaryInt<name##"h", llvm_v8i16_ty>;
-+  def f : SystemZQuaternaryInt<name##"f", llvm_v4i32_ty>;
-+}
-+
-+multiclass SystemZQuaternaryIntBHFG<string name> : SystemZQuaternaryIntBHF<name> {
-+  def g : SystemZQuaternaryInt<name##"g", llvm_v2i64_ty>;
-+}
-+
-+multiclass SystemZQuaternaryIntCCBHF {
-+  def bs : SystemZQuaternaryIntCC<llvm_v16i8_ty>;
-+  def hs : SystemZQuaternaryIntCC<llvm_v8i16_ty>;
-+  def fs : SystemZQuaternaryIntCC<llvm_v4i32_ty>;
-+}
-+
-+//===----------------------------------------------------------------------===//
-+//
-+// Transactional-execution intrinsics
-+//
-+//===----------------------------------------------------------------------===//
-+
-+def llvm_ptr64_ty : LLVMPointerType<llvm_i64_ty>;
-+
-+let TargetPrefix = "s390" in {
-+  def int_s390_tbegin : Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty],
-+                                  [IntrNoDuplicate]>;
-+
-+  def int_s390_tbegin_nofloat : Intrinsic<[llvm_i32_ty],
-+                                          [llvm_ptr_ty, llvm_i32_ty],
-+                                          [IntrNoDuplicate]>;
-+
-+  def int_s390_tbeginc : Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty],
-+                                   [IntrNoDuplicate]>;
-+
-+  def int_s390_tabort : Intrinsic<[], [llvm_i64_ty],
-+                                  [IntrNoReturn, Throws]>;
-+
-+  def int_s390_tend : GCCBuiltin<"__builtin_tend">,
-+                      Intrinsic<[llvm_i32_ty], []>;
-+
-+  def int_s390_etnd : GCCBuiltin<"__builtin_tx_nesting_depth">,
-+                      Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>;
-+
-+  def int_s390_ntstg : Intrinsic<[], [llvm_i64_ty, llvm_ptr64_ty],
-+                                 [IntrReadWriteArgMem]>;
-+
-+  def int_s390_ppa_txassist : GCCBuiltin<"__builtin_tx_assist">,
-+                              Intrinsic<[], [llvm_i32_ty]>;
-+}
-+
-+//===----------------------------------------------------------------------===//
-+//
-+// Vector intrinsics
-+//
-+//===----------------------------------------------------------------------===//
-+
-+let TargetPrefix = "s390" in {
-+  def int_s390_lcbb : GCCBuiltin<"__builtin_s390_lcbb">,
-+                      Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty],
-+                                [IntrNoMem]>;
-+
-+  def int_s390_vlbb : GCCBuiltin<"__builtin_s390_vlbb">,
-+                      Intrinsic<[llvm_v16i8_ty], [llvm_ptr_ty, llvm_i32_ty],
-+                                [IntrReadArgMem]>;
-+
-+  def int_s390_vll : GCCBuiltin<"__builtin_s390_vll">,
-+                     Intrinsic<[llvm_v16i8_ty], [llvm_i32_ty, llvm_ptr_ty],
-+                               [IntrReadArgMem]>;
-+
-+  def int_s390_vpdi : GCCBuiltin<"__builtin_s390_vpdi">,
-+                      Intrinsic<[llvm_v2i64_ty],
-+                                [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty],
-+                                [IntrNoMem]>;
-+
-+  def int_s390_vperm : GCCBuiltin<"__builtin_s390_vperm">,
-+                       Intrinsic<[llvm_v16i8_ty],
-+                                 [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty],
-+                                 [IntrNoMem]>;
-+
-+  defm int_s390_vpks : SystemZBinaryTruncHFG<"vpks">;
-+  defm int_s390_vpks : SystemZBinaryTruncCCHFG;
-+
-+  defm int_s390_vpkls : SystemZBinaryTruncHFG<"vpkls">;
-+  defm int_s390_vpkls : SystemZBinaryTruncCCHFG;
-+
-+  def int_s390_vstl : GCCBuiltin<"__builtin_s390_vstl">,
-+                      Intrinsic<[], [llvm_v16i8_ty, llvm_i32_ty, llvm_ptr_ty],
-+                                // In fact write-only but there's no property
-+                                // for that.
-+                                [IntrReadWriteArgMem]>;
-+
-+  defm int_s390_vupl  : SystemZUnaryExtBHWF<"vupl">;
-+  defm int_s390_vupll : SystemZUnaryExtBHF<"vupll">;
-+
-+  defm int_s390_vuph  : SystemZUnaryExtBHF<"vuph">;
-+  defm int_s390_vuplh : SystemZUnaryExtBHF<"vuplh">;
-+
-+  defm int_s390_vacc : SystemZBinaryBHFG<"vacc">;
-+
-+  def int_s390_vaq    : SystemZBinary<"vaq",     llvm_v16i8_ty>;
-+  def int_s390_vacq   : SystemZTernary<"vacq",   llvm_v16i8_ty>;
-+  def int_s390_vaccq  : SystemZBinary<"vaccq",   llvm_v16i8_ty>;
-+  def int_s390_vacccq : SystemZTernary<"vacccq", llvm_v16i8_ty>;
-+
-+  defm int_s390_vavg  : SystemZBinaryBHFG<"vavg">;
-+  defm int_s390_vavgl : SystemZBinaryBHFG<"vavgl">;
-+
-+  def int_s390_vcksm : SystemZBinary<"vcksm", llvm_v4i32_ty>;
-+
-+  defm int_s390_vgfm  : SystemZBinaryExtBHFG<"vgfm">;
-+  defm int_s390_vgfma : SystemZTernaryExtBHFG<"vgfma">;
-+
-+  defm int_s390_vmah  : SystemZTernaryBHF<"vmah">;
-+  defm int_s390_vmalh : SystemZTernaryBHF<"vmalh">;
-+  defm int_s390_vmae  : SystemZTernaryExtBHF<"vmae">;
-+  defm int_s390_vmale : SystemZTernaryExtBHF<"vmale">;
-+  defm int_s390_vmao  : SystemZTernaryExtBHF<"vmao">;
-+  defm int_s390_vmalo : SystemZTernaryExtBHF<"vmalo">;
-+
-+  defm int_s390_vmh  : SystemZBinaryBHF<"vmh">;
-+  defm int_s390_vmlh : SystemZBinaryBHF<"vmlh">;
-+  defm int_s390_vme  : SystemZBinaryExtBHF<"vme">;
-+  defm int_s390_vmle : SystemZBinaryExtBHF<"vmle">;
-+  defm int_s390_vmo  : SystemZBinaryExtBHF<"vmo">;
-+  defm int_s390_vmlo : SystemZBinaryExtBHF<"vmlo">;
-+
-+  defm int_s390_verllv : SystemZBinaryBHFG<"verllv">;
-+  defm int_s390_verll  : SystemZBinaryIntBHFG<"verll">;
-+  defm int_s390_verim  : SystemZQuaternaryIntBHFG<"verim">;
-+
-+  def int_s390_vsl   : SystemZBinary<"vsl",   llvm_v16i8_ty>;
-+  def int_s390_vslb  : SystemZBinary<"vslb",  llvm_v16i8_ty>;
-+  def int_s390_vsra  : SystemZBinary<"vsra",  llvm_v16i8_ty>;
-+  def int_s390_vsrab : SystemZBinary<"vsrab", llvm_v16i8_ty>;
-+  def int_s390_vsrl  : SystemZBinary<"vsrl",  llvm_v16i8_ty>;
-+  def int_s390_vsrlb : SystemZBinary<"vsrlb", llvm_v16i8_ty>;
-+
-+  def int_s390_vsldb : GCCBuiltin<"__builtin_s390_vsldb">,
-+                       Intrinsic<[llvm_v16i8_ty],
-+                                 [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty],
-+                                 [IntrNoMem]>;
-+
-+  defm int_s390_vscbi : SystemZBinaryBHFG<"vscbi">;
-+
-+  def int_s390_vsq     : SystemZBinary<"vsq",      llvm_v16i8_ty>;
-+  def int_s390_vsbiq   : SystemZTernary<"vsbiq",   llvm_v16i8_ty>;
-+  def int_s390_vscbiq  : SystemZBinary<"vscbiq",   llvm_v16i8_ty>;
-+  def int_s390_vsbcbiq : SystemZTernary<"vsbcbiq", llvm_v16i8_ty>;
-+
-+  def int_s390_vsumb : SystemZBinaryConv<"vsumb", llvm_v4i32_ty, llvm_v16i8_ty>;
-+  def int_s390_vsumh : SystemZBinaryConv<"vsumh", llvm_v4i32_ty, llvm_v8i16_ty>;
-+
-+  def int_s390_vsumgh : SystemZBinaryConv<"vsumgh", llvm_v2i64_ty,
-+                                          llvm_v8i16_ty>;
-+  def int_s390_vsumgf : SystemZBinaryConv<"vsumgf", llvm_v2i64_ty,
-+                                          llvm_v4i32_ty>;
-+
-+  def int_s390_vsumqf : SystemZBinaryConv<"vsumqf", llvm_v16i8_ty,
-+                                          llvm_v4i32_ty>;
-+  def int_s390_vsumqg : SystemZBinaryConv<"vsumqg", llvm_v16i8_ty,
-+                                          llvm_v2i64_ty>;
-+
-+  def int_s390_vtm : SystemZBinaryConv<"vtm", llvm_i32_ty, llvm_v16i8_ty>;
-+
-+  defm int_s390_vceq : SystemZCompareBHFG<"vceq">;
-+  defm int_s390_vch  : SystemZCompareBHFG<"vch">;
-+  defm int_s390_vchl : SystemZCompareBHFG<"vchl">;
-+
-+  defm int_s390_vfae  : SystemZTernaryIntBHF<"vfae">;
-+  defm int_s390_vfae  : SystemZTernaryIntCCBHF;
-+  defm int_s390_vfaez : SystemZTernaryIntBHF<"vfaez">;
-+  defm int_s390_vfaez : SystemZTernaryIntCCBHF;
-+
-+  defm int_s390_vfee  : SystemZBinaryBHF<"vfee">;
-+  defm int_s390_vfee  : SystemZBinaryCCBHF;
-+  defm int_s390_vfeez : SystemZBinaryBHF<"vfeez">;
-+  defm int_s390_vfeez : SystemZBinaryCCBHF;
-+
-+  defm int_s390_vfene  : SystemZBinaryBHF<"vfene">;
-+  defm int_s390_vfene  : SystemZBinaryCCBHF;
-+  defm int_s390_vfenez : SystemZBinaryBHF<"vfenez">;
-+  defm int_s390_vfenez : SystemZBinaryCCBHF;
-+
-+  defm int_s390_vistr : SystemZUnaryBHF<"vistr">;
-+  defm int_s390_vistr : SystemZUnaryCCBHF;
-+
-+  defm int_s390_vstrc  : SystemZQuaternaryIntBHF<"vstrc">;
-+  defm int_s390_vstrc  : SystemZQuaternaryIntCCBHF;
-+  defm int_s390_vstrcz : SystemZQuaternaryIntBHF<"vstrcz">;
-+  defm int_s390_vstrcz : SystemZQuaternaryIntCCBHF;
-+
-+  def int_s390_vfcedbs  : SystemZBinaryConvCC<llvm_v2i64_ty, llvm_v2f64_ty>;
-+  def int_s390_vfchdbs  : SystemZBinaryConvCC<llvm_v2i64_ty, llvm_v2f64_ty>;
-+  def int_s390_vfchedbs : SystemZBinaryConvCC<llvm_v2i64_ty, llvm_v2f64_ty>;
-+
-+  def int_s390_vftcidb : SystemZBinaryConvIntCC<llvm_v2i64_ty, llvm_v2f64_ty>;
-+
-+  def int_s390_vfidb : Intrinsic<[llvm_v2f64_ty],
-+                                 [llvm_v2f64_ty, llvm_i32_ty, llvm_i32_ty],
-+                                 [IntrNoMem]>;
-+}
-Index: llvm-36/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
-===================================================================
---- llvm-36.orig/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
-+++ llvm-36/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
-@@ -10496,18 +10496,13 @@ SDValue DAGCombiner::ReplaceExtractVecto
-   if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
-     int Elt = ConstEltNo->getZExtValue();
-     unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
--    if (TLI.isBigEndian())
--      PtrOff = InVecVT.getSizeInBits() / 8 - PtrOff;
-     Offset = DAG.getConstant(PtrOff, PtrType);
-     MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
-   } else {
-+    Offset = DAG.getZExtOrTrunc(EltNo, SDLoc(EVE), PtrType);
-     Offset = DAG.getNode(
--        ISD::MUL, SDLoc(EVE), EltNo.getValueType(), EltNo,
--        DAG.getConstant(VecEltVT.getStoreSize(), EltNo.getValueType()));
--    if (TLI.isBigEndian())
--      Offset = DAG.getNode(
--          ISD::SUB, SDLoc(EVE), EltNo.getValueType(),
--          DAG.getConstant(InVecVT.getStoreSize(), EltNo.getValueType()), Offset);
-+        ISD::MUL, SDLoc(EVE), PtrType, Offset,
-+        DAG.getConstant(VecEltVT.getStoreSize(), PtrType));
-     MPI = OriginalLoad->getPointerInfo();
-   }
-   NewPtr = DAG.getNode(ISD::ADD, SDLoc(EVE), PtrType, NewPtr, Offset);
-Index: llvm-36/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
-===================================================================
---- llvm-36.orig/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
-+++ llvm-36/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
-@@ -2888,7 +2888,10 @@ static EVT FindMemType(SelectionDAG& DAG
-     unsigned MemVTWidth = MemVT.getSizeInBits();
-     if (MemVT.getSizeInBits() <= WidenEltWidth)
-       break;
--    if (TLI.isTypeLegal(MemVT) && (WidenWidth % MemVTWidth) == 0 &&
-+    auto Action = TLI.getTypeAction(*DAG.getContext(), MemVT);
-+    if ((Action == TargetLowering::TypeLegal ||
-+         Action == TargetLowering::TypePromoteInteger) &&
-+        (WidenWidth % MemVTWidth) == 0 &&
-         isPowerOf2_32(WidenWidth / MemVTWidth) &&
-         (MemVTWidth <= Width ||
-          (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) {
-Index: llvm-36/lib/Support/Host.cpp
-===================================================================
---- llvm-36.orig/lib/Support/Host.cpp
-+++ llvm-36/lib/Support/Host.cpp
-@@ -655,6 +655,28 @@ StringRef sys::getHostCPUName() {
-   StringRef Str(buffer, CPUInfoSize);
-   SmallVector<StringRef, 32> Lines;
-   Str.split(Lines, "\n");
-+
-+  // Look for the CPU features.
-+  SmallVector<StringRef, 32> CPUFeatures;
-+  for (unsigned I = 0, E = Lines.size(); I != E; ++I)
-+    if (Lines[I].startswith("features")) {
-+      size_t Pos = Lines[I].find(":");
-+      if (Pos != StringRef::npos) {
-+        Lines[I].drop_front(Pos + 1).split(CPUFeatures, " ");
-+        break;
-+      }
-+    }
-+
-+  // We need to check for the presence of vector support independently of
-+  // the machine type, since we may only use the vector register set when
-+  // supported by the kernel (and hypervisor).
-+  bool HaveVectorSupport = false;
-+  for (unsigned I = 0, E = CPUFeatures.size(); I != E; ++I) {
-+    if (CPUFeatures[I] == "vx")
-+      HaveVectorSupport = true;
-+  }
-+
-+  // Now check the processor machine type.
-   for (unsigned I = 0, E = Lines.size(); I != E; ++I) {
-     if (Lines[I].startswith("processor ")) {
-       size_t Pos = Lines[I].find("machine = ");
-@@ -662,6 +684,8 @@ StringRef sys::getHostCPUName() {
-         Pos += sizeof("machine = ") - 1;
-         unsigned int Id;
-         if (!Lines[I].drop_front(Pos).getAsInteger(10, Id)) {
-+          if (Id >= 2964 && HaveVectorSupport)
-+            return "z13";
-           if (Id >= 2827)
-             return "zEC12";
-           if (Id >= 2817)
-Index: llvm-36/lib/Support/Triple.cpp
-===================================================================
---- llvm-36.orig/lib/Support/Triple.cpp
-+++ llvm-36/lib/Support/Triple.cpp
-@@ -89,7 +89,7 @@ const char *Triple::getArchTypePrefix(Ar
-   case sparcv9:
-   case sparc:       return "sparc";
- 
--  case systemz:     return "systemz";
-+  case systemz:     return "s390";
- 
-   case x86:
-   case x86_64:      return "x86";
-Index: llvm-36/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
-===================================================================
---- llvm-36.orig/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
-+++ llvm-36/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp
-@@ -39,13 +39,17 @@ enum RegisterKind {
-   ADDR64Reg,
-   FP32Reg,
-   FP64Reg,
--  FP128Reg
-+  FP128Reg,
-+  VR32Reg,
-+  VR64Reg,
-+  VR128Reg
- };
- 
- enum MemoryKind {
-   BDMem,
-   BDXMem,
--  BDLMem
-+  BDLMem,
-+  BDVMem
- };
- 
- class SystemZOperand : public MCParsedAsmOperand {
-@@ -57,6 +61,7 @@ private:
-     KindReg,
-     KindAccessReg,
-     KindImm,
-+    KindImmTLS,
-     KindMem
-   };
- 
-@@ -84,23 +89,31 @@ private:
-   };
- 
-   // Base + Disp + Index, where Base and Index are LLVM registers or 0.
--  // RegKind says what type the registers have (ADDR32Reg or ADDR64Reg).
--  // Length is the operand length for D(L,B)-style operands, otherwise
--  // it is null.
-+  // MemKind says what type of memory this is and RegKind says what type
-+  // the base register has (ADDR32Reg or ADDR64Reg).  Length is the operand
-+  // length for D(L,B)-style operands, otherwise it is null.
-   struct MemOp {
--    unsigned Base : 8;
--    unsigned Index : 8;
--    unsigned RegKind : 8;
--    unsigned Unused : 8;
-+    unsigned Base : 12;
-+    unsigned Index : 12;
-+    unsigned MemKind : 4;
-+    unsigned RegKind : 4;
-     const MCExpr *Disp;
-     const MCExpr *Length;
-   };
- 
-+  // Imm is an immediate operand, and Sym is an optional TLS symbol
-+  // for use with a __tls_get_offset marker relocation.
-+  struct ImmTLSOp {
-+    const MCExpr *Imm;
-+    const MCExpr *Sym;
-+  };
-+
-   union {
-     TokenOp Token;
-     RegOp Reg;
-     unsigned AccessReg;
-     const MCExpr *Imm;
-+    ImmTLSOp ImmTLS;
-     MemOp Mem;
-   };
- 
-@@ -149,10 +162,11 @@ public:
-     return Op;
-   }
-   static std::unique_ptr<SystemZOperand>
--  createMem(RegisterKind RegKind, unsigned Base, const MCExpr *Disp,
--            unsigned Index, const MCExpr *Length, SMLoc StartLoc,
--            SMLoc EndLoc) {
-+  createMem(MemoryKind MemKind, RegisterKind RegKind, unsigned Base,
-+            const MCExpr *Disp, unsigned Index, const MCExpr *Length,
-+            SMLoc StartLoc, SMLoc EndLoc) {
-     auto Op = make_unique<SystemZOperand>(KindMem, StartLoc, EndLoc);
-+    Op->Mem.MemKind = MemKind;
-     Op->Mem.RegKind = RegKind;
-     Op->Mem.Base = Base;
-     Op->Mem.Index = Index;
-@@ -160,6 +174,14 @@ public:
-     Op->Mem.Length = Length;
-     return Op;
-   }
-+  static std::unique_ptr<SystemZOperand>
-+  createImmTLS(const MCExpr *Imm, const MCExpr *Sym,
-+               SMLoc StartLoc, SMLoc EndLoc) {
-+    auto Op = make_unique<SystemZOperand>(KindImmTLS, StartLoc, EndLoc);
-+    Op->ImmTLS.Imm = Imm;
-+    Op->ImmTLS.Sym = Sym;
-+    return Op;
-+  }
- 
-   // Token operands
-   bool isToken() const override {
-@@ -200,24 +222,40 @@ public:
-     return Imm;
-   }
- 
-+  // Immediate operands with optional TLS symbol.
-+  bool isImmTLS() const {
-+    return Kind == KindImmTLS;
-+  }
-+
-   // Memory operands.
-   bool isMem() const override {
-     return Kind == KindMem;
-   }
--  bool isMem(RegisterKind RegKind, MemoryKind MemKind) const {
-+  bool isMem(MemoryKind MemKind) const {
-     return (Kind == KindMem &&
--            Mem.RegKind == RegKind &&
--            (MemKind == BDXMem || !Mem.Index) &&
--            (MemKind == BDLMem) == (Mem.Length != nullptr));
-+            (Mem.MemKind == MemKind ||
-+             // A BDMem can be treated as a BDXMem in which the index
-+             // register field is 0.
-+             (Mem.MemKind == BDMem && MemKind == BDXMem)));
-+  }
-+  bool isMem(MemoryKind MemKind, RegisterKind RegKind) const {
-+    return isMem(MemKind) && Mem.RegKind == RegKind;
-   }
--  bool isMemDisp12(RegisterKind RegKind, MemoryKind MemKind) const {
--    return isMem(RegKind, MemKind) && inRange(Mem.Disp, 0, 0xfff);
-+  bool isMemDisp12(MemoryKind MemKind, RegisterKind RegKind) const {
-+    return isMem(MemKind, RegKind) && inRange(Mem.Disp, 0, 0xfff);
-   }
--  bool isMemDisp20(RegisterKind RegKind, MemoryKind MemKind) const {
--    return isMem(RegKind, MemKind) && inRange(Mem.Disp, -524288, 524287);
-+  bool isMemDisp20(MemoryKind MemKind, RegisterKind RegKind) const {
-+    return isMem(MemKind, RegKind) && inRange(Mem.Disp, -524288, 524287);
-   }
-   bool isMemDisp12Len8(RegisterKind RegKind) const {
--    return isMemDisp12(RegKind, BDLMem) && inRange(Mem.Length, 1, 0x100);
-+    return isMemDisp12(BDLMem, RegKind) && inRange(Mem.Length, 1, 0x100);
-+  }
-+  void addBDVAddrOperands(MCInst &Inst, unsigned N) const {
-+    assert(N == 3 && "Invalid number of operands");
-+    assert(isMem(BDVMem) && "Invalid operand type");
-+    Inst.addOperand(MCOperand::CreateReg(Mem.Base));
-+    addExpr(Inst, Mem.Disp);
-+    Inst.addOperand(MCOperand::CreateReg(Mem.Index));
-   }
- 
-   // Override MCParsedAsmOperand.
-@@ -242,24 +280,31 @@ public:
-   }
-   void addBDAddrOperands(MCInst &Inst, unsigned N) const {
-     assert(N == 2 && "Invalid number of operands");
--    assert(Kind == KindMem && Mem.Index == 0 && "Invalid operand type");
-+    assert(isMem(BDMem) && "Invalid operand type");
-     Inst.addOperand(MCOperand::CreateReg(Mem.Base));
-     addExpr(Inst, Mem.Disp);
-   }
-   void addBDXAddrOperands(MCInst &Inst, unsigned N) const {
-     assert(N == 3 && "Invalid number of operands");
--    assert(Kind == KindMem && "Invalid operand type");
-+    assert(isMem(BDXMem) && "Invalid operand type");
-     Inst.addOperand(MCOperand::CreateReg(Mem.Base));
-     addExpr(Inst, Mem.Disp);
-     Inst.addOperand(MCOperand::CreateReg(Mem.Index));
-   }
-   void addBDLAddrOperands(MCInst &Inst, unsigned N) const {
-     assert(N == 3 && "Invalid number of operands");
--    assert(Kind == KindMem && "Invalid operand type");
-+    assert(isMem(BDLMem) && "Invalid operand type");
-     Inst.addOperand(MCOperand::CreateReg(Mem.Base));
-     addExpr(Inst, Mem.Disp);
-     addExpr(Inst, Mem.Length);
-   }
-+  void addImmTLSOperands(MCInst &Inst, unsigned N) const {
-+    assert(N == 2 && "Invalid number of operands");
-+    assert(Kind == KindImmTLS && "Invalid operand type");
-+    addExpr(Inst, ImmTLS.Imm);
-+    if (ImmTLS.Sym)
-+      addExpr(Inst, ImmTLS.Sym);
-+  }
- 
-   // Used by the TableGen code to check for particular operand types.
-   bool isGR32() const { return isReg(GR32Reg); }
-@@ -273,17 +318,26 @@ public:
-   bool isFP32() const { return isReg(FP32Reg); }
-   bool isFP64() const { return isReg(FP64Reg); }
-   bool isFP128() const { return isReg(FP128Reg); }
--  bool isBDAddr32Disp12() const { return isMemDisp12(ADDR32Reg, BDMem); }
--  bool isBDAddr32Disp20() const { return isMemDisp20(ADDR32Reg, BDMem); }
--  bool isBDAddr64Disp12() const { return isMemDisp12(ADDR64Reg, BDMem); }
--  bool isBDAddr64Disp20() const { return isMemDisp20(ADDR64Reg, BDMem); }
--  bool isBDXAddr64Disp12() const { return isMemDisp12(ADDR64Reg, BDXMem); }
--  bool isBDXAddr64Disp20() const { return isMemDisp20(ADDR64Reg, BDXMem); }
-+  bool isVR32() const { return isReg(VR32Reg); }
-+  bool isVR64() const { return isReg(VR64Reg); }
-+  bool isVF128() const { return false; }
-+  bool isVR128() const { return isReg(VR128Reg); }
-+  bool isBDAddr32Disp12() const { return isMemDisp12(BDMem, ADDR32Reg); }
-+  bool isBDAddr32Disp20() const { return isMemDisp20(BDMem, ADDR32Reg); }
-+  bool isBDAddr64Disp12() const { return isMemDisp12(BDMem, ADDR64Reg); }
-+  bool isBDAddr64Disp20() const { return isMemDisp20(BDMem, ADDR64Reg); }
-+  bool isBDXAddr64Disp12() const { return isMemDisp12(BDXMem, ADDR64Reg); }
-+  bool isBDXAddr64Disp20() const { return isMemDisp20(BDXMem, ADDR64Reg); }
-   bool isBDLAddr64Disp12Len8() const { return isMemDisp12Len8(ADDR64Reg); }
-+  bool isBDVAddr64Disp12() const { return isMemDisp12(BDVMem, ADDR64Reg); }
-+  bool isU1Imm() const { return isImm(0, 1); }
-+  bool isU2Imm() const { return isImm(0, 3); }
-+  bool isU3Imm() const { return isImm(0, 7); }
-   bool isU4Imm() const { return isImm(0, 15); }
-   bool isU6Imm() const { return isImm(0, 63); }
-   bool isU8Imm() const { return isImm(0, 255); }
-   bool isS8Imm() const { return isImm(-128, 127); }
-+  bool isU12Imm() const { return isImm(0, 4095); }
-   bool isU16Imm() const { return isImm(0, 65535); }
-   bool isS16Imm() const { return isImm(-32768, 32767); }
-   bool isU32Imm() const { return isImm(0, (1LL << 32) - 1); }
-@@ -300,6 +354,7 @@ private:
-   enum RegisterGroup {
-     RegGR,
-     RegFP,
-+    RegV,
-     RegAccess
-   };
-   struct Register {
-@@ -318,12 +373,15 @@ private:
-                                      RegisterKind Kind);
- 
-   bool parseAddress(unsigned &Base, const MCExpr *&Disp,
--                    unsigned &Index, const MCExpr *&Length,
-+                    unsigned &Index, bool &IsVector, const MCExpr *&Length,
-                     const unsigned *Regs, RegisterKind RegKind);
- 
-   OperandMatchResultTy parseAddress(OperandVector &Operands,
--                                    const unsigned *Regs, RegisterKind RegKind,
--                                    MemoryKind MemKind);
-+                                    MemoryKind MemKind, const unsigned *Regs,
-+                                    RegisterKind RegKind);
-+
-+  OperandMatchResultTy parsePCRel(OperandVector &Operands, int64_t MinVal,
-+                                  int64_t MaxVal, bool AllowTLS);
- 
-   bool parseOperand(OperandVector &Operands, StringRef Mnemonic);
- 
-@@ -382,26 +440,45 @@ public:
-   OperandMatchResultTy parseFP128(OperandVector &Operands) {
-     return parseRegister(Operands, RegFP, SystemZMC::FP128Regs, FP128Reg);
-   }
-+  OperandMatchResultTy parseVR32(OperandVector &Operands) {
-+    return parseRegister(Operands, RegV, SystemZMC::VR32Regs, VR32Reg);
-+  }
-+  OperandMatchResultTy parseVR64(OperandVector &Operands) {
-+    return parseRegister(Operands, RegV, SystemZMC::VR64Regs, VR64Reg);
-+  }
-+  OperandMatchResultTy parseVF128(OperandVector &Operands) {
-+    llvm_unreachable("Shouldn't be used as an operand");
-+  }
-+  OperandMatchResultTy parseVR128(OperandVector &Operands) {
-+    return parseRegister(Operands, RegV, SystemZMC::VR128Regs, VR128Reg);
-+  }
-   OperandMatchResultTy parseBDAddr32(OperandVector &Operands) {
--    return parseAddress(Operands, SystemZMC::GR32Regs, ADDR32Reg, BDMem);
-+    return parseAddress(Operands, BDMem, SystemZMC::GR32Regs, ADDR32Reg);
-   }
-   OperandMatchResultTy parseBDAddr64(OperandVector &Operands) {
--    return parseAddress(Operands, SystemZMC::GR64Regs, ADDR64Reg, BDMem);
-+    return parseAddress(Operands, BDMem, SystemZMC::GR64Regs, ADDR64Reg);
-   }
-   OperandMatchResultTy parseBDXAddr64(OperandVector &Operands) {
--    return parseAddress(Operands, SystemZMC::GR64Regs, ADDR64Reg, BDXMem);
-+    return parseAddress(Operands, BDXMem, SystemZMC::GR64Regs, ADDR64Reg);
-   }
-   OperandMatchResultTy parseBDLAddr64(OperandVector &Operands) {
--    return parseAddress(Operands, SystemZMC::GR64Regs, ADDR64Reg, BDLMem);
-+    return parseAddress(Operands, BDLMem, SystemZMC::GR64Regs, ADDR64Reg);
-+  }
-+  OperandMatchResultTy parseBDVAddr64(OperandVector &Operands) {
-+    return parseAddress(Operands, BDVMem, SystemZMC::GR64Regs, ADDR64Reg);
-   }
-   OperandMatchResultTy parseAccessReg(OperandVector &Operands);
--  OperandMatchResultTy parsePCRel(OperandVector &Operands, int64_t MinVal,
--                                  int64_t MaxVal);
-   OperandMatchResultTy parsePCRel16(OperandVector &Operands) {
--    return parsePCRel(Operands, -(1LL << 16), (1LL << 16) - 1);
-+    return parsePCRel(Operands, -(1LL << 16), (1LL << 16) - 1, false);
-   }
-   OperandMatchResultTy parsePCRel32(OperandVector &Operands) {
--    return parsePCRel(Operands, -(1LL << 32), (1LL << 32) - 1);
-+    return parsePCRel(Operands, -(1LL << 32), (1LL << 32) - 1, false);
-+  }
-+  OperandMatchResultTy parsePCRelTLS16(OperandVector &Operands) {
-+    return parsePCRel(Operands, -(1LL << 16), (1LL << 16) - 1, true);
-+  }
-+  OperandMatchResultTy parsePCRelTLS32(OperandVector &Operands) {
-+    return parsePCRel(Operands, -(1LL << 32), (1LL << 32) - 1, true);
-   }
- };
- } // end anonymous namespace
-@@ -443,6 +520,8 @@ bool SystemZAsmParser::parseRegister(Reg
-     Reg.Group = RegGR;
-   else if (Prefix == 'f' && Reg.Num < 16)
-     Reg.Group = RegFP;
-+  else if (Prefix == 'v' && Reg.Num < 32)
-+    Reg.Group = RegV;
-   else if (Prefix == 'a' && Reg.Num < 16)
-     Reg.Group = RegAccess;
-   else
-@@ -493,8 +572,8 @@ SystemZAsmParser::parseRegister(OperandV
- // Regs maps asm register numbers to LLVM register numbers and RegKind
- // says what kind of address register we're using (ADDR32Reg or ADDR64Reg).
- bool SystemZAsmParser::parseAddress(unsigned &Base, const MCExpr *&Disp,
--                                    unsigned &Index, const MCExpr *&Length,
--                                    const unsigned *Regs,
-+                                    unsigned &Index, bool &IsVector,
-+                                    const MCExpr *&Length, const unsigned *Regs,
-                                     RegisterKind RegKind) {
-   // Parse the displacement, which must always be present.
-   if (getParser().parseExpression(Disp))
-@@ -503,6 +582,7 @@ bool SystemZAsmParser::parseAddress(unsi
-   // Parse the optional base and index.
-   Index = 0;
-   Base = 0;
-+  IsVector = false;
-   Length = nullptr;
-   if (getLexer().is(AsmToken::LParen)) {
-     Parser.Lex();
-@@ -510,12 +590,23 @@ bool SystemZAsmParser::parseAddress(unsi
-     if (getLexer().is(AsmToken::Percent)) {
-       // Parse the first register and decide whether it's a base or an index.
-       Register Reg;
--      if (parseRegister(Reg, RegGR, Regs, RegKind))
-+      if (parseRegister(Reg))
-         return true;
--      if (getLexer().is(AsmToken::Comma))
--        Index = Reg.Num;
--      else
--        Base = Reg.Num;
-+      if (Reg.Group == RegV) {
-+        // A vector index register.  The base register is optional.
-+        IsVector = true;
-+        Index = SystemZMC::VR128Regs[Reg.Num];
-+      } else if (Reg.Group == RegGR) {
-+        if (Reg.Num == 0)
-+          return Error(Reg.StartLoc, "%r0 used in an address");
-+        // If the are two registers, the first one is the index and the
-+        // second is the base.
-+        if (getLexer().is(AsmToken::Comma))
-+          Index = Regs[Reg.Num];
-+        else
-+          Base = Regs[Reg.Num];
-+      } else
-+        return Error(Reg.StartLoc, "invalid address register");
-     } else {
-       // Parse the length.
-       if (getParser().parseExpression(Length))
-@@ -542,37 +633,46 @@ bool SystemZAsmParser::parseAddress(unsi
- // Parse a memory operand and add it to Operands.  The other arguments
- // are as above.
- SystemZAsmParser::OperandMatchResultTy
--SystemZAsmParser::parseAddress(OperandVector &Operands, const unsigned *Regs,
--                               RegisterKind RegKind, MemoryKind MemKind) {
-+SystemZAsmParser::parseAddress(OperandVector &Operands, MemoryKind MemKind,
-+                               const unsigned *Regs, RegisterKind RegKind) {
-   SMLoc StartLoc = Parser.getTok().getLoc();
-   unsigned Base, Index;
-+  bool IsVector;
-   const MCExpr *Disp;
-   const MCExpr *Length;
--  if (parseAddress(Base, Disp, Index, Length, Regs, RegKind))
-+  if (parseAddress(Base, Disp, Index, IsVector, Length, Regs, RegKind))
-     return MatchOperand_ParseFail;
- 
--  if (Index && MemKind != BDXMem)
--    {
--      Error(StartLoc, "invalid use of indexed addressing");
--      return MatchOperand_ParseFail;
--    }
-+  if (IsVector && MemKind != BDVMem) {
-+    Error(StartLoc, "invalid use of vector addressing");
-+    return MatchOperand_ParseFail;
-+  }
- 
--  if (Length && MemKind != BDLMem)
--    {
--      Error(StartLoc, "invalid use of length addressing");
--      return MatchOperand_ParseFail;
--    }
-+  if (!IsVector && MemKind == BDVMem) {
-+    Error(StartLoc, "vector index required in address");
-+    return MatchOperand_ParseFail;
-+  }
- 
--  if (!Length && MemKind == BDLMem)
--    {
--      Error(StartLoc, "missing length in address");
--      return MatchOperand_ParseFail;
--    }
-+  if (Index && MemKind != BDXMem && MemKind != BDVMem) {
-+    Error(StartLoc, "invalid use of indexed addressing");
-+    return MatchOperand_ParseFail;
-+  }
-+
-+  if (Length && MemKind != BDLMem) {
-+    Error(StartLoc, "invalid use of length addressing");
-+    return MatchOperand_ParseFail;
-+  }
-+
-+  if (!Length && MemKind == BDLMem) {
-+    Error(StartLoc, "missing length in address");
-+    return MatchOperand_ParseFail;
-+  }
- 
-   SMLoc EndLoc =
-     SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
--  Operands.push_back(SystemZOperand::createMem(RegKind, Base, Disp, Index,
--                                               Length, StartLoc, EndLoc));
-+  Operands.push_back(SystemZOperand::createMem(MemKind, RegKind, Base, Disp,
-+                                               Index, Length, StartLoc,
-+                                               EndLoc));
-   return MatchOperand_Success;
- }
- 
-@@ -589,6 +689,8 @@ bool SystemZAsmParser::ParseRegister(uns
-     RegNo = SystemZMC::GR64Regs[Reg.Num];
-   else if (Reg.Group == RegFP)
-     RegNo = SystemZMC::FP64Regs[Reg.Num];
-+  else if (Reg.Group == RegV)
-+    RegNo = SystemZMC::VR128Regs[Reg.Num];
-   else
-     // FIXME: Access registers aren't modelled as LLVM registers yet.
-     return Error(Reg.StartLoc, "invalid operand for instruction");
-@@ -661,8 +763,10 @@ bool SystemZAsmParser::parseOperand(Oper
-   // so we treat any plain expression as an immediate.
-   SMLoc StartLoc = Parser.getTok().getLoc();
-   unsigned Base, Index;
-+  bool IsVector;
-   const MCExpr *Expr, *Length;
--  if (parseAddress(Base, Expr, Index, Length, SystemZMC::GR64Regs, ADDR64Reg))
-+  if (parseAddress(Base, Expr, Index, IsVector, Length, SystemZMC::GR64Regs,
-+                   ADDR64Reg))
-     return true;
- 
-   SMLoc EndLoc =
-@@ -743,7 +847,7 @@ SystemZAsmParser::parseAccessReg(Operand
- 
- SystemZAsmParser::OperandMatchResultTy
- SystemZAsmParser::parsePCRel(OperandVector &Operands, int64_t MinVal,
--                             int64_t MaxVal) {
-+                             int64_t MaxVal, bool AllowTLS) {
-   MCContext &Ctx = getContext();
-   MCStreamer &Out = getStreamer();
-   const MCExpr *Expr;
-@@ -766,9 +870,54 @@ SystemZAsmParser::parsePCRel(OperandVect
-     Expr = Value == 0 ? Base : MCBinaryExpr::CreateAdd(Base, Expr, Ctx);
-   }
- 
-+  // Optionally match :tls_gdcall: or :tls_ldcall: followed by a TLS symbol.
-+  const MCExpr *Sym = nullptr;
-+  if (AllowTLS && getLexer().is(AsmToken::Colon)) {
-+    Parser.Lex();
-+
-+    if (Parser.getTok().isNot(AsmToken::Identifier)) {
-+      Error(Parser.getTok().getLoc(), "unexpected token");
-+      return MatchOperand_ParseFail;
-+    }
-+
-+    MCSymbolRefExpr::VariantKind Kind = MCSymbolRefExpr::VK_None;
-+    StringRef Name = Parser.getTok().getString();
-+    if (Name == "tls_gdcall")
-+      Kind = MCSymbolRefExpr::VK_TLSGD;
-+    else if (Name == "tls_ldcall")
-+      Kind = MCSymbolRefExpr::VK_TLSLDM;
-+    else {
-+      Error(Parser.getTok().getLoc(), "unknown TLS tag");
-+      return MatchOperand_ParseFail;
-+    }
-+    Parser.Lex();
-+
-+    if (Parser.getTok().isNot(AsmToken::Colon)) {
-+      Error(Parser.getTok().getLoc(), "unexpected token");
-+      return MatchOperand_ParseFail;
-+    }
-+    Parser.Lex();
-+
-+    if (Parser.getTok().isNot(AsmToken::Identifier)) {
-+      Error(Parser.getTok().getLoc(), "unexpected token");
-+      return MatchOperand_ParseFail;
-+    }
-+
-+    StringRef Identifier = Parser.getTok().getString();
-+    Sym = MCSymbolRefExpr::Create(Ctx.GetOrCreateSymbol(Identifier),
-+                                  Kind, Ctx);
-+    Parser.Lex();
-+  }
-+
-   SMLoc EndLoc =
-     SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1);
--  Operands.push_back(SystemZOperand::createImm(Expr, StartLoc, EndLoc));
-+
-+  if (AllowTLS)
-+    Operands.push_back(SystemZOperand::createImmTLS(Expr, Sym,
-+                                                    StartLoc, EndLoc));
-+  else
-+    Operands.push_back(SystemZOperand::createImm(Expr, StartLoc, EndLoc));
-+
-   return MatchOperand_Success;
- }
- 
-Index: llvm-36/lib/Target/SystemZ/CMakeLists.txt
-===================================================================
---- llvm-36.orig/lib/Target/SystemZ/CMakeLists.txt
-+++ llvm-36/lib/Target/SystemZ/CMakeLists.txt
-@@ -20,6 +20,7 @@ add_llvm_target(SystemZCodeGen
-   SystemZISelDAGToDAG.cpp
-   SystemZISelLowering.cpp
-   SystemZInstrInfo.cpp
-+  SystemZLDCleanup.cpp
-   SystemZLongBranch.cpp
-   SystemZMachineFunctionInfo.cpp
-   SystemZMCInstLower.cpp
-@@ -28,6 +29,7 @@ add_llvm_target(SystemZCodeGen
-   SystemZShortenInst.cpp
-   SystemZSubtarget.cpp
-   SystemZTargetMachine.cpp
-+  SystemZTargetTransformInfo.cpp
-   )
- 
- add_subdirectory(AsmParser)
-Index: llvm-36/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp
-===================================================================
---- llvm-36.orig/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp
-+++ llvm-36/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp
-@@ -47,8 +47,8 @@ extern "C" void LLVMInitializeSystemZDis
- }
- 
- static DecodeStatus decodeRegisterClass(MCInst &Inst, uint64_t RegNo,
--                                        const unsigned *Regs) {
--  assert(RegNo < 16 && "Invalid register");
-+                                        const unsigned *Regs, unsigned Size) {
-+  assert(RegNo < Size && "Invalid register");
-   RegNo = Regs[RegNo];
-   if (RegNo == 0)
-     return MCDisassembler::Fail;
-@@ -59,61 +59,81 @@ static DecodeStatus decodeRegisterClass(
- static DecodeStatus DecodeGR32BitRegisterClass(MCInst &Inst, uint64_t RegNo,
-                                                uint64_t Address,
-                                                const void *Decoder) {
--  return decodeRegisterClass(Inst, RegNo, SystemZMC::GR32Regs);
-+  return decodeRegisterClass(Inst, RegNo, SystemZMC::GR32Regs, 16);
- }
- 
- static DecodeStatus DecodeGRH32BitRegisterClass(MCInst &Inst, uint64_t RegNo,
-                                                 uint64_t Address,
-                                                 const void *Decoder) {
--  return decodeRegisterClass(Inst, RegNo, SystemZMC::GRH32Regs);
-+  return decodeRegisterClass(Inst, RegNo, SystemZMC::GRH32Regs, 16);
- }
- 
- static DecodeStatus DecodeGR64BitRegisterClass(MCInst &Inst, uint64_t RegNo,
-                                                uint64_t Address,
-                                                const void *Decoder) {
--  return decodeRegisterClass(Inst, RegNo, SystemZMC::GR64Regs);
-+  return decodeRegisterClass(Inst, RegNo, SystemZMC::GR64Regs, 16);
- }
- 
- static DecodeStatus DecodeGR128BitRegisterClass(MCInst &Inst, uint64_t RegNo,
-                                                 uint64_t Address,
-                                                 const void *Decoder) {
--  return decodeRegisterClass(Inst, RegNo, SystemZMC::GR128Regs);
-+  return decodeRegisterClass(Inst, RegNo, SystemZMC::GR128Regs, 16);
- }
- 
- static DecodeStatus DecodeADDR64BitRegisterClass(MCInst &Inst, uint64_t RegNo,
-                                                  uint64_t Address,
-                                                  const void *Decoder) {
--  return decodeRegisterClass(Inst, RegNo, SystemZMC::GR64Regs);
-+  return decodeRegisterClass(Inst, RegNo, SystemZMC::GR64Regs, 16);
- }
- 
- static DecodeStatus DecodeFP32BitRegisterClass(MCInst &Inst, uint64_t RegNo,
-                                                uint64_t Address,
-                                                const void *Decoder) {
--  return decodeRegisterClass(Inst, RegNo, SystemZMC::FP32Regs);
-+  return decodeRegisterClass(Inst, RegNo, SystemZMC::FP32Regs, 16);
- }
- 
- static DecodeStatus DecodeFP64BitRegisterClass(MCInst &Inst, uint64_t RegNo,
-                                                uint64_t Address,
-                                                const void *Decoder) {
--  return decodeRegisterClass(Inst, RegNo, SystemZMC::FP64Regs);
-+  return decodeRegisterClass(Inst, RegNo, SystemZMC::FP64Regs, 16);
- }
- 
- static DecodeStatus DecodeFP128BitRegisterClass(MCInst &Inst, uint64_t RegNo,
-                                                 uint64_t Address,
-                                                 const void *Decoder) {
--  return decodeRegisterClass(Inst, RegNo, SystemZMC::FP128Regs);
-+  return decodeRegisterClass(Inst, RegNo, SystemZMC::FP128Regs, 16);
-+}
-+
-+static DecodeStatus DecodeVR32BitRegisterClass(MCInst &Inst, uint64_t RegNo,
-+                                               uint64_t Address,
-+                                               const void *Decoder) {
-+  return decodeRegisterClass(Inst, RegNo, SystemZMC::VR32Regs, 32);
-+}
-+
-+static DecodeStatus DecodeVR64BitRegisterClass(MCInst &Inst, uint64_t RegNo,
-+                                               uint64_t Address,
-+                                               const void *Decoder) {
-+  return decodeRegisterClass(Inst, RegNo, SystemZMC::VR64Regs, 32);
-+}
-+
-+static DecodeStatus DecodeVR128BitRegisterClass(MCInst &Inst, uint64_t RegNo,
-+                                                uint64_t Address,
-+                                                const void *Decoder) {
-+  return decodeRegisterClass(Inst, RegNo, SystemZMC::VR128Regs, 32);
- }
- 
- template<unsigned N>
- static DecodeStatus decodeUImmOperand(MCInst &Inst, uint64_t Imm) {
--  assert(isUInt<N>(Imm) && "Invalid immediate");
-+  if (!isUInt<N>(Imm))
-+    return MCDisassembler::Fail;
-   Inst.addOperand(MCOperand::CreateImm(Imm));
-   return MCDisassembler::Success;
- }
- 
- template<unsigned N>
- static DecodeStatus decodeSImmOperand(MCInst &Inst, uint64_t Imm) {
--  assert(isUInt<N>(Imm) && "Invalid immediate");
-+  if (!isUInt<N>(Imm))
-+    return MCDisassembler::Fail;
-   Inst.addOperand(MCOperand::CreateImm(SignExtend64<N>(Imm)));
-   return MCDisassembler::Success;
- }
-@@ -124,6 +144,21 @@ static DecodeStatus decodeAccessRegOpera
-   return decodeUImmOperand<4>(Inst, Imm);
- }
- 
-+static DecodeStatus decodeU1ImmOperand(MCInst &Inst, uint64_t Imm,
-+                                       uint64_t Address, const void *Decoder) {
-+  return decodeUImmOperand<1>(Inst, Imm);
-+}
-+
-+static DecodeStatus decodeU2ImmOperand(MCInst &Inst, uint64_t Imm,
-+                                       uint64_t Address, const void *Decoder) {
-+  return decodeUImmOperand<2>(Inst, Imm);
-+}
-+
-+static DecodeStatus decodeU3ImmOperand(MCInst &Inst, uint64_t Imm,
-+                                       uint64_t Address, const void *Decoder) {
-+  return decodeUImmOperand<3>(Inst, Imm);
-+}
-+
- static DecodeStatus decodeU4ImmOperand(MCInst &Inst, uint64_t Imm,
-                                        uint64_t Address, const void *Decoder) {
-   return decodeUImmOperand<4>(Inst, Imm);
-@@ -139,6 +174,11 @@ static DecodeStatus decodeU8ImmOperand(M
-   return decodeUImmOperand<8>(Inst, Imm);
- }
- 
-+static DecodeStatus decodeU12ImmOperand(MCInst &Inst, uint64_t Imm,
-+                                        uint64_t Address, const void *Decoder) {
-+  return decodeUImmOperand<12>(Inst, Imm);
-+}
-+
- static DecodeStatus decodeU16ImmOperand(MCInst &Inst, uint64_t Imm,
-                                         uint64_t Address, const void *Decoder) {
-   return decodeUImmOperand<16>(Inst, Imm);
-@@ -240,6 +280,18 @@ static DecodeStatus decodeBDLAddr12Len8O
-   return MCDisassembler::Success;
- }
- 
-+static DecodeStatus decodeBDVAddr12Operand(MCInst &Inst, uint64_t Field,
-+                                           const unsigned *Regs) {
-+  uint64_t Index = Field >> 16;
-+  uint64_t Base = (Field >> 12) & 0xf;
-+  uint64_t Disp = Field & 0xfff;
-+  assert(Index < 32 && "Invalid BDVAddr12");
-+  Inst.addOperand(MCOperand::CreateReg(Base == 0 ? 0 : Regs[Base]));
-+  Inst.addOperand(MCOperand::CreateImm(Disp));
-+  Inst.addOperand(MCOperand::CreateReg(SystemZMC::VR128Regs[Index]));
-+  return MCDisassembler::Success;
-+}
-+
- static DecodeStatus decodeBDAddr32Disp12Operand(MCInst &Inst, uint64_t Field,
-                                                 uint64_t Address,
-                                                 const void *Decoder) {
-@@ -283,6 +335,12 @@ static DecodeStatus decodeBDLAddr64Disp1
-   return decodeBDLAddr12Len8Operand(Inst, Field, SystemZMC::GR64Regs);
- }
- 
-+static DecodeStatus decodeBDVAddr64Disp12Operand(MCInst &Inst, uint64_t Field,
-+                                                 uint64_t Address,
-+                                                 const void *Decoder) {
-+  return decodeBDVAddr12Operand(Inst, Field, SystemZMC::GR64Regs);
-+}
-+
- #include "SystemZGenDisassemblerTables.inc"
- 
- DecodeStatus SystemZDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
-Index: llvm-36/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp
-===================================================================
---- llvm-36.orig/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp
-+++ llvm-36/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp
-@@ -10,6 +10,7 @@
- #include "SystemZInstPrinter.h"
- #include "llvm/MC/MCExpr.h"
- #include "llvm/MC/MCInstrInfo.h"
-+#include "llvm/MC/MCSymbol.h"
- #include "llvm/Support/raw_ostream.h"
- 
- using namespace llvm;
-@@ -21,13 +22,17 @@ using namespace llvm;
- void SystemZInstPrinter::printAddress(unsigned Base, int64_t Disp,
-                                       unsigned Index, raw_ostream &O) {
-   O << Disp;
--  if (Base) {
-+  if (Base || Index) {
-     O << '(';
--    if (Index)
--      O << '%' << getRegisterName(Index) << ',';
--    O << '%' << getRegisterName(Base) << ')';
--  } else
--    assert(!Index && "Shouldn't have an index without a base");
-+    if (Index) {
-+      O << '%' << getRegisterName(Index);
-+      if (Base)
-+        O << ',';
-+    }
-+    if (Base)
-+      O << '%' << getRegisterName(Base);
-+    O << ')';
-+  }
- }
- 
- void SystemZInstPrinter::printOperand(const MCOperand &MO, raw_ostream &O) {
-@@ -51,60 +56,78 @@ void SystemZInstPrinter::printRegName(ra
-   O << '%' << getRegisterName(RegNo);
- }
- 
--void SystemZInstPrinter::printU4ImmOperand(const MCInst *MI, int OpNum,
--                                           raw_ostream &O) {
-+template<unsigned N>
-+void printUImmOperand(const MCInst *MI, int OpNum, raw_ostream &O) {
-   int64_t Value = MI->getOperand(OpNum).getImm();
--  assert(isUInt<4>(Value) && "Invalid u4imm argument");
-+  assert(isUInt<N>(Value) && "Invalid uimm argument");
-   O << Value;
- }
- 
--void SystemZInstPrinter::printU6ImmOperand(const MCInst *MI, int OpNum,
--                                           raw_ostream &O) {
-+template<unsigned N>
-+void printSImmOperand(const MCInst *MI, int OpNum, raw_ostream &O) {
-   int64_t Value = MI->getOperand(OpNum).getImm();
--  assert(isUInt<6>(Value) && "Invalid u6imm argument");
-+  assert(isInt<N>(Value) && "Invalid simm argument");
-   O << Value;
- }
- 
-+void SystemZInstPrinter::printU1ImmOperand(const MCInst *MI, int OpNum,
-+                                           raw_ostream &O) {
-+  printUImmOperand<1>(MI, OpNum, O);
-+}
-+
-+void SystemZInstPrinter::printU2ImmOperand(const MCInst *MI, int OpNum,
-+                                           raw_ostream &O) {
-+  printUImmOperand<2>(MI, OpNum, O);
-+}
-+
-+void SystemZInstPrinter::printU3ImmOperand(const MCInst *MI, int OpNum,
-+                                           raw_ostream &O) {
-+  printUImmOperand<3>(MI, OpNum, O);
-+}
-+
-+void SystemZInstPrinter::printU4ImmOperand(const MCInst *MI, int OpNum,
-+                                           raw_ostream &O) {
-+  printUImmOperand<4>(MI, OpNum, O);
-+}
-+
-+void SystemZInstPrinter::printU6ImmOperand(const MCInst *MI, int OpNum,
-+                                           raw_ostream &O) {
-+  printUImmOperand<6>(MI, OpNum, O);
-+}
-+
- void SystemZInstPrinter::printS8ImmOperand(const MCInst *MI, int OpNum,
-                                            raw_ostream &O) {
--  int64_t Value = MI->getOperand(OpNum).getImm();
--  assert(isInt<8>(Value) && "Invalid s8imm argument");
--  O << Value;
-+  printSImmOperand<8>(MI, OpNum, O);
- }
- 
- void SystemZInstPrinter::printU8ImmOperand(const MCInst *MI, int OpNum,
-                                            raw_ostream &O) {
--  int64_t Value = MI->getOperand(OpNum).getImm();
--  assert(isUInt<8>(Value) && "Invalid u8imm argument");
--  O << Value;
-+  printUImmOperand<8>(MI, OpNum, O);
-+}
-+
-+void SystemZInstPrinter::printU12ImmOperand(const MCInst *MI, int OpNum,
-+                                            raw_ostream &O) {
-+  printUImmOperand<12>(MI, OpNum, O);
- }
- 
- void SystemZInstPrinter::printS16ImmOperand(const MCInst *MI, int OpNum,
-                                             raw_ostream &O) {
--  int64_t Value = MI->getOperand(OpNum).getImm();
--  assert(isInt<16>(Value) && "Invalid s16imm argument");
--  O << Value;
-+  printSImmOperand<16>(MI, OpNum, O);
- }
- 
- void SystemZInstPrinter::printU16ImmOperand(const MCInst *MI, int OpNum,
-                                             raw_ostream &O) {
--  int64_t Value = MI->getOperand(OpNum).getImm();
--  assert(isUInt<16>(Value) && "Invalid u16imm argument");
--  O << Value;
-+  printUImmOperand<16>(MI, OpNum, O);
- }
- 
- void SystemZInstPrinter::printS32ImmOperand(const MCInst *MI, int OpNum,
-                                             raw_ostream &O) {
--  int64_t Value = MI->getOperand(OpNum).getImm();
--  assert(isInt<32>(Value) && "Invalid s32imm argument");
--  O << Value;
-+  printSImmOperand<32>(MI, OpNum, O);
- }
- 
- void SystemZInstPrinter::printU32ImmOperand(const MCInst *MI, int OpNum,
-                                             raw_ostream &O) {
--  int64_t Value = MI->getOperand(OpNum).getImm();
--  assert(isUInt<32>(Value) && "Invalid u32imm argument");
--  O << Value;
-+  printUImmOperand<32>(MI, OpNum, O);
- }
- 
- void SystemZInstPrinter::printAccessRegOperand(const MCInst *MI, int OpNum,
-@@ -124,6 +147,29 @@ void SystemZInstPrinter::printPCRelOpera
-     O << *MO.getExpr();
- }
- 
-+void SystemZInstPrinter::printPCRelTLSOperand(const MCInst *MI, int OpNum,
-+                                              raw_ostream &O) {
-+  // Output the PC-relative operand.
-+  printPCRelOperand(MI, OpNum, O);
-+
-+  // Output the TLS marker if present.
-+  if ((unsigned)OpNum + 1 < MI->getNumOperands()) {
-+    const MCOperand &MO = MI->getOperand(OpNum + 1);
-+    const MCSymbolRefExpr &refExp = cast<MCSymbolRefExpr>(*MO.getExpr());
-+    switch (refExp.getKind()) {
-+      case MCSymbolRefExpr::VK_TLSGD:
-+        O << ":tls_gdcall:";
-+        break;
-+      case MCSymbolRefExpr::VK_TLSLDM:
-+        O << ":tls_ldcall:";
-+        break;
-+      default:
-+        llvm_unreachable("Unexpected symbol kind");
-+    }
-+    O << refExp.getSymbol().getName();
-+  }
-+}
-+
- void SystemZInstPrinter::printOperand(const MCInst *MI, int OpNum,
-                                       raw_ostream &O) {
-   printOperand(MI->getOperand(OpNum), O);
-@@ -153,6 +199,13 @@ void SystemZInstPrinter::printBDLAddrOpe
-   O << ')';
- }
- 
-+void SystemZInstPrinter::printBDVAddrOperand(const MCInst *MI, int OpNum,
-+                                             raw_ostream &O) {
-+  printAddress(MI->getOperand(OpNum).getReg(),
-+               MI->getOperand(OpNum + 1).getImm(),
-+               MI->getOperand(OpNum + 2).getReg(), O);
-+}
-+
- void SystemZInstPrinter::printCond4Operand(const MCInst *MI, int OpNum,
-                                            raw_ostream &O) {
-   static const char *const CondNames[] = {
-Index: llvm-36/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h
-===================================================================
---- llvm-36.orig/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h
-+++ llvm-36/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h
-@@ -47,15 +47,21 @@ private:
-   void printBDAddrOperand(const MCInst *MI, int OpNum, raw_ostream &O);
-   void printBDXAddrOperand(const MCInst *MI, int OpNum, raw_ostream &O);
-   void printBDLAddrOperand(const MCInst *MI, int OpNum, raw_ostream &O);
-+  void printBDVAddrOperand(const MCInst *MI, int OpNum, raw_ostream &O);
-+  void printU1ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
-+  void printU2ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
-+  void printU3ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
-   void printU4ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
-   void printU6ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
-   void printS8ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
-   void printU8ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
-+  void printU12ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
-   void printS16ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
-   void printU16ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
-   void printS32ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
-   void printU32ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O);
-   void printPCRelOperand(const MCInst *MI, int OpNum, raw_ostream &O);
-+  void printPCRelTLSOperand(const MCInst *MI, int OpNum, raw_ostream &O);
-   void printAccessRegOperand(const MCInst *MI, int OpNum, raw_ostream &O);
- 
-   // Print the mnemonic for a condition-code mask ("ne", "lh", etc.)
-Index: llvm-36/lib/Target/SystemZ/LLVMBuild.txt
-===================================================================
---- llvm-36.orig/lib/Target/SystemZ/LLVMBuild.txt
-+++ llvm-36/lib/Target/SystemZ/LLVMBuild.txt
-@@ -31,5 +31,5 @@ has_jit = 1
- type = Library
- name = SystemZCodeGen
- parent = SystemZ
--required_libraries = AsmPrinter CodeGen Core MC SelectionDAG Support SystemZAsmPrinter SystemZDesc SystemZInfo Target
-+required_libraries = Analysis AsmPrinter CodeGen Core MC SelectionDAG Support SystemZAsmPrinter SystemZDesc SystemZInfo Target
- add_to_library_groups = SystemZ
-Index: llvm-36/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
-===================================================================
---- llvm-36.orig/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
-+++ llvm-36/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp
-@@ -27,9 +27,10 @@ static uint64_t extractBitsForFixup(MCFi
-   switch (unsigned(Kind)) {
-   case SystemZ::FK_390_PC16DBL:
-   case SystemZ::FK_390_PC32DBL:
--  case SystemZ::FK_390_PLT16DBL:
--  case SystemZ::FK_390_PLT32DBL:
-     return (int64_t)Value / 2;
-+
-+  case SystemZ::FK_390_TLS_CALL:
-+    return 0;
-   }
- 
-   llvm_unreachable("Unknown fixup kind!");
-@@ -72,8 +73,7 @@ SystemZMCAsmBackend::getFixupKindInfo(MC
-   const static MCFixupKindInfo Infos[SystemZ::NumTargetFixupKinds] = {
-     { "FK_390_PC16DBL",  0, 16, MCFixupKindInfo::FKF_IsPCRel },
-     { "FK_390_PC32DBL",  0, 32, MCFixupKindInfo::FKF_IsPCRel },
--    { "FK_390_PLT16DBL", 0, 16, MCFixupKindInfo::FKF_IsPCRel },
--    { "FK_390_PLT32DBL", 0, 32, MCFixupKindInfo::FKF_IsPCRel }
-+    { "FK_390_TLS_CALL", 0, 0, 0 }
-   };
- 
-   if (Kind < FirstTargetFixupKind)
-Index: llvm-36/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp
-===================================================================
---- llvm-36.orig/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp
-+++ llvm-36/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp
-@@ -70,24 +70,43 @@ private:
-   uint64_t getBDLAddr12Len8Encoding(const MCInst &MI, unsigned OpNum,
-                                     SmallVectorImpl<MCFixup> &Fixups,
-                                     const MCSubtargetInfo &STI) const;
-+  uint64_t getBDVAddr12Encoding(const MCInst &MI, unsigned OpNum,
-+                                SmallVectorImpl<MCFixup> &Fixups,
-+                                const MCSubtargetInfo &STI) const;
- 
-   // Operand OpNum of MI needs a PC-relative fixup of kind Kind at
-   // Offset bytes from the start of MI.  Add the fixup to Fixups
-   // and return the in-place addend, which since we're a RELA target
--  // is always 0.
-+  // is always 0.  If AllowTLS is true and optional operand OpNum + 1
-+  // is present, also emit a TLS call fixup for it.
-   uint64_t getPCRelEncoding(const MCInst &MI, unsigned OpNum,
-                             SmallVectorImpl<MCFixup> &Fixups,
--                            unsigned Kind, int64_t Offset) const;
-+                            unsigned Kind, int64_t Offset,
-+                            bool AllowTLS) const;
- 
-   uint64_t getPC16DBLEncoding(const MCInst &MI, unsigned OpNum,
-                               SmallVectorImpl<MCFixup> &Fixups,
-                               const MCSubtargetInfo &STI) const {
--    return getPCRelEncoding(MI, OpNum, Fixups, SystemZ::FK_390_PC16DBL, 2);
-+    return getPCRelEncoding(MI, OpNum, Fixups,
-+                            SystemZ::FK_390_PC16DBL, 2, false);
-   }
-   uint64_t getPC32DBLEncoding(const MCInst &MI, unsigned OpNum,
-                               SmallVectorImpl<MCFixup> &Fixups,
-                               const MCSubtargetInfo &STI) const {
--    return getPCRelEncoding(MI, OpNum, Fixups, SystemZ::FK_390_PC32DBL, 2);
-+    return getPCRelEncoding(MI, OpNum, Fixups,
-+                            SystemZ::FK_390_PC32DBL, 2, false);
-+  }
-+  uint64_t getPC16DBLTLSEncoding(const MCInst &MI, unsigned OpNum,
-+                                 SmallVectorImpl<MCFixup> &Fixups,
-+                                 const MCSubtargetInfo &STI) const {
-+    return getPCRelEncoding(MI, OpNum, Fixups,
-+                            SystemZ::FK_390_PC16DBL, 2, true);
-+  }
-+  uint64_t getPC32DBLTLSEncoding(const MCInst &MI, unsigned OpNum,
-+                                 SmallVectorImpl<MCFixup> &Fixups,
-+                                 const MCSubtargetInfo &STI) const {
-+    return getPCRelEncoding(MI, OpNum, Fixups,
-+                            SystemZ::FK_390_PC32DBL, 2, true);
-   }
- };
- } // end anonymous namespace
-@@ -178,10 +197,22 @@ getBDLAddr12Len8Encoding(const MCInst &M
-   return (Len << 16) | (Base << 12) | Disp;
- }
- 
-+uint64_t SystemZMCCodeEmitter::
-+getBDVAddr12Encoding(const MCInst &MI, unsigned OpNum,
-+                     SmallVectorImpl<MCFixup> &Fixups,
-+                     const MCSubtargetInfo &STI) const {
-+  uint64_t Base = getMachineOpValue(MI, MI.getOperand(OpNum), Fixups, STI);
-+  uint64_t Disp = getMachineOpValue(MI, MI.getOperand(OpNum + 1), Fixups, STI);
-+  uint64_t Index = getMachineOpValue(MI, MI.getOperand(OpNum + 2), Fixups, STI);
-+  assert(isUInt<4>(Base) && isUInt<12>(Disp) && isUInt<5>(Index));
-+  return (Index << 16) | (Base << 12) | Disp;
-+}
-+
- uint64_t
- SystemZMCCodeEmitter::getPCRelEncoding(const MCInst &MI, unsigned OpNum,
-                                        SmallVectorImpl<MCFixup> &Fixups,
--                                       unsigned Kind, int64_t Offset) const {
-+                                       unsigned Kind, int64_t Offset,
-+                                       bool AllowTLS) const {
-   const MCOperand &MO = MI.getOperand(OpNum);
-   const MCExpr *Expr;
-   if (MO.isImm())
-@@ -198,6 +229,13 @@ SystemZMCCodeEmitter::getPCRelEncoding(c
-     }
-   }
-   Fixups.push_back(MCFixup::Create(Offset, Expr, (MCFixupKind)Kind));
-+
-+  // Output the fixup for the TLS marker if present.
-+  if (AllowTLS && OpNum + 1 < MI.getNumOperands()) {
-+    const MCOperand &MOTLS = MI.getOperand(OpNum + 1);
-+    Fixups.push_back(MCFixup::Create(0, MOTLS.getExpr(),
-+                                     (MCFixupKind)SystemZ::FK_390_TLS_CALL));
-+  }
-   return 0;
- }
- 
-Index: llvm-36/lib/Target/SystemZ/MCTargetDesc/SystemZMCFixups.h
-===================================================================
---- llvm-36.orig/lib/Target/SystemZ/MCTargetDesc/SystemZMCFixups.h
-+++ llvm-36/lib/Target/SystemZ/MCTargetDesc/SystemZMCFixups.h
-@@ -18,8 +18,7 @@ enum FixupKind {
-   // These correspond directly to R_390_* relocations.
-   FK_390_PC16DBL = FirstTargetFixupKind,
-   FK_390_PC32DBL,
--  FK_390_PLT16DBL,
--  FK_390_PLT32DBL,
-+  FK_390_TLS_CALL,
- 
-   // Marker
-   LastTargetFixupKind,
-Index: llvm-36/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp
-===================================================================
---- llvm-36.orig/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp
-+++ llvm-36/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp
-@@ -55,8 +55,6 @@ static unsigned getPCRelReloc(unsigned K
-   case FK_Data_8:                return ELF::R_390_PC64;
-   case SystemZ::FK_390_PC16DBL:  return ELF::R_390_PC16DBL;
-   case SystemZ::FK_390_PC32DBL:  return ELF::R_390_PC32DBL;
--  case SystemZ::FK_390_PLT16DBL: return ELF::R_390_PLT16DBL;
--  case SystemZ::FK_390_PLT32DBL: return ELF::R_390_PLT32DBL;
-   }
-   llvm_unreachable("Unsupported PC-relative address");
- }
-@@ -70,6 +68,35 @@ static unsigned getTLSLEReloc(unsigned K
-   llvm_unreachable("Unsupported absolute address");
- }
- 
-+// Return the R_390_TLS_LDO* relocation type for MCFixupKind Kind.
-+static unsigned getTLSLDOReloc(unsigned Kind) {
-+  switch (Kind) {
-+  case FK_Data_4: return ELF::R_390_TLS_LDO32;
-+  case FK_Data_8: return ELF::R_390_TLS_LDO64;
-+  }
-+  llvm_unreachable("Unsupported absolute address");
-+}
-+
-+// Return the R_390_TLS_LDM* relocation type for MCFixupKind Kind.
-+static unsigned getTLSLDMReloc(unsigned Kind) {
-+  switch (Kind) {
-+  case FK_Data_4: return ELF::R_390_TLS_LDM32;
-+  case FK_Data_8: return ELF::R_390_TLS_LDM64;
-+  case SystemZ::FK_390_TLS_CALL: return ELF::R_390_TLS_LDCALL;
-+  }
-+  llvm_unreachable("Unsupported absolute address");
-+}
-+
-+// Return the R_390_TLS_GD* relocation type for MCFixupKind Kind.
-+static unsigned getTLSGDReloc(unsigned Kind) {
-+  switch (Kind) {
-+  case FK_Data_4: return ELF::R_390_TLS_GD32;
-+  case FK_Data_8: return ELF::R_390_TLS_GD64;
-+  case SystemZ::FK_390_TLS_CALL: return ELF::R_390_TLS_GDCALL;
-+  }
-+  llvm_unreachable("Unsupported absolute address");
-+}
-+
- // Return the PLT relocation counterpart of MCFixupKind Kind.
- static unsigned getPLTReloc(unsigned Kind) {
-   switch (Kind) {
-@@ -94,6 +121,23 @@ unsigned SystemZObjectWriter::GetRelocTy
-     assert(!IsPCRel && "NTPOFF shouldn't be PC-relative");
-     return getTLSLEReloc(Kind);
- 
-+  case MCSymbolRefExpr::VK_INDNTPOFF:
-+    if (IsPCRel && Kind == SystemZ::FK_390_PC32DBL)
-+      return ELF::R_390_TLS_IEENT;
-+    llvm_unreachable("Only PC-relative INDNTPOFF accesses are supported for now");
-+
-+  case MCSymbolRefExpr::VK_DTPOFF:
-+    assert(!IsPCRel && "DTPOFF shouldn't be PC-relative");
-+    return getTLSLDOReloc(Kind);
-+
-+  case MCSymbolRefExpr::VK_TLSLDM:
-+    assert(!IsPCRel && "TLSLDM shouldn't be PC-relative");
-+    return getTLSLDMReloc(Kind);
-+
-+  case MCSymbolRefExpr::VK_TLSGD:
-+    assert(!IsPCRel && "TLSGD shouldn't be PC-relative");
-+    return getTLSGDReloc(Kind);
-+
-   case MCSymbolRefExpr::VK_GOT:
-     if (IsPCRel && Kind == SystemZ::FK_390_PC32DBL)
-       return ELF::R_390_GOTENT;
-Index: llvm-36/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
-===================================================================
---- llvm-36.orig/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
-+++ llvm-36/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp
-@@ -76,6 +76,39 @@ const unsigned SystemZMC::FP128Regs[16]
-   SystemZ::F12Q, SystemZ::F13Q, 0, 0
- };
- 
-+const unsigned SystemZMC::VR32Regs[32] = {
-+  SystemZ::F0S, SystemZ::F1S, SystemZ::F2S, SystemZ::F3S,
-+  SystemZ::F4S, SystemZ::F5S, SystemZ::F6S, SystemZ::F7S,
-+  SystemZ::F8S, SystemZ::F9S, SystemZ::F10S, SystemZ::F11S,
-+  SystemZ::F12S, SystemZ::F13S, SystemZ::F14S, SystemZ::F15S,
-+  SystemZ::F16S, SystemZ::F17S, SystemZ::F18S, SystemZ::F19S,
-+  SystemZ::F20S, SystemZ::F21S, SystemZ::F22S, SystemZ::F23S,
-+  SystemZ::F24S, SystemZ::F25S, SystemZ::F26S, SystemZ::F27S,
-+  SystemZ::F28S, SystemZ::F29S, SystemZ::F30S, SystemZ::F31S
-+};
-+
-+const unsigned SystemZMC::VR64Regs[32] = {
-+  SystemZ::F0D, SystemZ::F1D, SystemZ::F2D, SystemZ::F3D,
-+  SystemZ::F4D, SystemZ::F5D, SystemZ::F6D, SystemZ::F7D,
-+  SystemZ::F8D, SystemZ::F9D, SystemZ::F10D, SystemZ::F11D,
-+  SystemZ::F12D, SystemZ::F13D, SystemZ::F14D, SystemZ::F15D,
-+  SystemZ::F16D, SystemZ::F17D, SystemZ::F18D, SystemZ::F19D,
-+  SystemZ::F20D, SystemZ::F21D, SystemZ::F22D, SystemZ::F23D,
-+  SystemZ::F24D, SystemZ::F25D, SystemZ::F26D, SystemZ::F27D,
-+  SystemZ::F28D, SystemZ::F29D, SystemZ::F30D, SystemZ::F31D
-+};
-+
-+const unsigned SystemZMC::VR128Regs[32] = {
-+  SystemZ::V0, SystemZ::V1, SystemZ::V2, SystemZ::V3,
-+  SystemZ::V4, SystemZ::V5, SystemZ::V6, SystemZ::V7,
-+  SystemZ::V8, SystemZ::V9, SystemZ::V10, SystemZ::V11,
-+  SystemZ::V12, SystemZ::V13, SystemZ::V14, SystemZ::V15,
-+  SystemZ::V16, SystemZ::V17, SystemZ::V18, SystemZ::V19,
-+  SystemZ::V20, SystemZ::V21, SystemZ::V22, SystemZ::V23,
-+  SystemZ::V24, SystemZ::V25, SystemZ::V26, SystemZ::V27,
-+  SystemZ::V28, SystemZ::V29, SystemZ::V30, SystemZ::V31
-+};
-+
- unsigned SystemZMC::getFirstReg(unsigned Reg) {
-   static unsigned Map[SystemZ::NUM_TARGET_REGS];
-   static bool Initialized = false;
-@@ -85,10 +118,13 @@ unsigned SystemZMC::getFirstReg(unsigned
-       Map[GRH32Regs[I]] = I;
-       Map[GR64Regs[I]] = I;
-       Map[GR128Regs[I]] = I;
--      Map[FP32Regs[I]] = I;
--      Map[FP64Regs[I]] = I;
-       Map[FP128Regs[I]] = I;
-     }
-+    for (unsigned I = 0; I < 32; ++I) {
-+      Map[VR32Regs[I]] = I;
-+      Map[VR64Regs[I]] = I;
-+      Map[VR128Regs[I]] = I;
-+    }
-   }
-   assert(Reg < SystemZ::NUM_TARGET_REGS);
-   return Map[Reg];
-Index: llvm-36/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h
-===================================================================
---- llvm-36.orig/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h
-+++ llvm-36/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h
-@@ -48,6 +48,9 @@ extern const unsigned GR128Regs[16];
- extern const unsigned FP32Regs[16];
- extern const unsigned FP64Regs[16];
- extern const unsigned FP128Regs[16];
-+extern const unsigned VR32Regs[32];
-+extern const unsigned VR64Regs[32];
-+extern const unsigned VR128Regs[32];
- 
- // Return the 0-based number of the first architectural register that
- // contains the given LLVM register.   E.g. R1D -> 1.
-@@ -67,6 +70,11 @@ inline unsigned getRegAsGR32(unsigned Re
- inline unsigned getRegAsGRH32(unsigned Reg) {
-   return GRH32Regs[getFirstReg(Reg)];
- }
-+
-+// Return the given register as a VR128.
-+inline unsigned getRegAsVR128(unsigned Reg) {
-+  return VR128Regs[getFirstReg(Reg)];
-+}
- } // end namespace SystemZMC
- 
- MCCodeEmitter *createSystemZMCCodeEmitter(const MCInstrInfo &MCII,
-Index: llvm-36/lib/Target/SystemZ/SystemZ.h
-===================================================================
---- llvm-36.orig/lib/Target/SystemZ/SystemZ.h
-+++ llvm-36/lib/Target/SystemZ/SystemZ.h
-@@ -21,6 +21,7 @@
- namespace llvm {
- class SystemZTargetMachine;
- class FunctionPass;
-+class ImmutablePass;
- 
- namespace SystemZ {
- // Condition-code mask values.
-@@ -68,6 +69,25 @@ const unsigned CCMASK_TM_MSB_0       = C
- const unsigned CCMASK_TM_MSB_1       = CCMASK_2 | CCMASK_3;
- const unsigned CCMASK_TM             = CCMASK_ANY;
- 
-+// Condition-code mask assignments for TRANSACTION_BEGIN.
-+const unsigned CCMASK_TBEGIN_STARTED       = CCMASK_0;
-+const unsigned CCMASK_TBEGIN_INDETERMINATE = CCMASK_1;
-+const unsigned CCMASK_TBEGIN_TRANSIENT     = CCMASK_2;
-+const unsigned CCMASK_TBEGIN_PERSISTENT    = CCMASK_3;
-+const unsigned CCMASK_TBEGIN               = CCMASK_ANY;
-+
-+// Condition-code mask assignments for TRANSACTION_END.
-+const unsigned CCMASK_TEND_TX   = CCMASK_0;
-+const unsigned CCMASK_TEND_NOTX = CCMASK_2;
-+const unsigned CCMASK_TEND      = CCMASK_TEND_TX | CCMASK_TEND_NOTX;
-+
-+// Condition-code mask assignments for vector comparisons (and similar
-+// operations).
-+const unsigned CCMASK_VCMP_ALL       = CCMASK_0;
-+const unsigned CCMASK_VCMP_MIXED     = CCMASK_1;
-+const unsigned CCMASK_VCMP_NONE      = CCMASK_3;
-+const unsigned CCMASK_VCMP           = CCMASK_0 | CCMASK_1 | CCMASK_3;
-+
- // The position of the low CC bit in an IPM result.
- const unsigned IPM_CC = 28;
- 
-@@ -75,6 +95,13 @@ const unsigned IPM_CC = 28;
- const unsigned PFD_READ  = 1;
- const unsigned PFD_WRITE = 2;
- 
-+// Number of bits in a vector register.
-+const unsigned VectorBits = 128;
-+
-+// Number of bytes in a vector register (and consequently the number of
-+// bytes in a general permute vector).
-+const unsigned VectorBytes = VectorBits / 8;
-+
- // Return true if Val fits an LLILL operand.
- static inline bool isImmLL(uint64_t Val) {
-   return (Val & ~0x000000000000ffffULL) == 0;
-@@ -111,6 +138,9 @@ FunctionPass *createSystemZISelDag(Syste
- FunctionPass *createSystemZElimComparePass(SystemZTargetMachine &TM);
- FunctionPass *createSystemZShortenInstPass(SystemZTargetMachine &TM);
- FunctionPass *createSystemZLongBranchPass(SystemZTargetMachine &TM);
-+FunctionPass *createSystemZLDCleanupPass(SystemZTargetMachine &TM);
-+ImmutablePass *createSystemZTargetTransformInfoPass(
-+  const SystemZTargetMachine *TM);
- } // end namespace llvm
- 
- #endif
-Index: llvm-36/lib/Target/SystemZ/SystemZ.td
-===================================================================
---- llvm-36.orig/lib/Target/SystemZ/SystemZ.td
-+++ llvm-36/lib/Target/SystemZ/SystemZ.td
-@@ -40,6 +40,7 @@ include "SystemZOperands.td"
- include "SystemZPatterns.td"
- include "SystemZInstrFormats.td"
- include "SystemZInstrInfo.td"
-+include "SystemZInstrVector.td"
- include "SystemZInstrFP.td"
- 
- def SystemZInstrInfo : InstrInfo {}
-Index: llvm-36/lib/Target/SystemZ/SystemZAsmPrinter.cpp
-===================================================================
---- llvm-36.orig/lib/Target/SystemZ/SystemZAsmPrinter.cpp
-+++ llvm-36/lib/Target/SystemZ/SystemZAsmPrinter.cpp
-@@ -66,6 +66,41 @@ static MCInst lowerRIEfLow(const Machine
-     .addImm(MI->getOperand(5).getImm());
- }
- 
-+static const MCSymbolRefExpr *getTLSGetOffset(MCContext &Context) {
-+  StringRef Name = "__tls_get_offset";
-+  return MCSymbolRefExpr::Create(Context.GetOrCreateSymbol(Name),
-+                                 MCSymbolRefExpr::VK_PLT,
-+                                 Context);
-+}
-+
-+static const MCSymbolRefExpr *getGlobalOffsetTable(MCContext &Context) {
-+  StringRef Name = "_GLOBAL_OFFSET_TABLE_";
-+  return MCSymbolRefExpr::Create(Context.GetOrCreateSymbol(Name),
-+                                 MCSymbolRefExpr::VK_None,
-+                                 Context);
-+}
-+
-+// MI loads the high part of a vector from memory.  Return an instruction
-+// that uses replicating vector load Opcode to do the same thing.
-+static MCInst lowerSubvectorLoad(const MachineInstr *MI, unsigned Opcode) {
-+  return MCInstBuilder(Opcode)
-+    .addReg(SystemZMC::getRegAsVR128(MI->getOperand(0).getReg()))
-+    .addReg(MI->getOperand(1).getReg())
-+    .addImm(MI->getOperand(2).getImm())
-+    .addReg(MI->getOperand(3).getReg());
-+}
-+
-+// MI stores the high part of a vector to memory.  Return an instruction
-+// that uses elemental vector store Opcode to do the same thing.
-+static MCInst lowerSubvectorStore(const MachineInstr *MI, unsigned Opcode) {
-+  return MCInstBuilder(Opcode)
-+    .addReg(SystemZMC::getRegAsVR128(MI->getOperand(0).getReg()))
-+    .addReg(MI->getOperand(1).getReg())
-+    .addImm(MI->getOperand(2).getImm())
-+    .addReg(MI->getOperand(3).getReg())
-+    .addImm(0);
-+}
-+
- void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) {
-   SystemZMCInstLower Lower(MF->getContext(), *this);
-   MCInst LoweredMI;
-@@ -95,6 +130,26 @@ void SystemZAsmPrinter::EmitInstruction(
-     LoweredMI = MCInstBuilder(SystemZ::BR).addReg(SystemZ::R1D);
-     break;
- 
-+  case SystemZ::TLS_GDCALL:
-+    LoweredMI = MCInstBuilder(SystemZ::BRASL)
-+      .addReg(SystemZ::R14D)
-+      .addExpr(getTLSGetOffset(MF->getContext()))
-+      .addExpr(Lower.getExpr(MI->getOperand(0), MCSymbolRefExpr::VK_TLSGD));
-+    break;
-+
-+  case SystemZ::TLS_LDCALL:
-+    LoweredMI = MCInstBuilder(SystemZ::BRASL)
-+      .addReg(SystemZ::R14D)
-+      .addExpr(getTLSGetOffset(MF->getContext()))
-+      .addExpr(Lower.getExpr(MI->getOperand(0), MCSymbolRefExpr::VK_TLSLDM));
-+    break;
-+
-+  case SystemZ::GOT:
-+    LoweredMI = MCInstBuilder(SystemZ::LARL)
-+      .addReg(MI->getOperand(0).getReg())
-+      .addExpr(getGlobalOffsetTable(MF->getContext()));
-+    break;
-+
-   case SystemZ::IILF64:
-     LoweredMI = MCInstBuilder(SystemZ::IILF)
-       .addReg(SystemZMC::getRegAsGR32(MI->getOperand(0).getReg()))
-@@ -117,6 +172,51 @@ void SystemZAsmPrinter::EmitInstruction(
-     LoweredMI = lowerRIEfLow(MI, SystemZ::RISBLG);
-     break;
- 
-+  case SystemZ::VLVGP32:
-+    LoweredMI = MCInstBuilder(SystemZ::VLVGP)
-+      .addReg(MI->getOperand(0).getReg())
-+      .addReg(SystemZMC::getRegAsGR64(MI->getOperand(1).getReg()))
-+      .addReg(SystemZMC::getRegAsGR64(MI->getOperand(2).getReg()));
-+    break;
-+
-+  case SystemZ::VLR32:
-+  case SystemZ::VLR64:
-+    LoweredMI = MCInstBuilder(SystemZ::VLR)
-+      .addReg(SystemZMC::getRegAsVR128(MI->getOperand(0).getReg()))
-+      .addReg(SystemZMC::getRegAsVR128(MI->getOperand(1).getReg()));
-+    break;
-+
-+  case SystemZ::VL32:
-+    LoweredMI = lowerSubvectorLoad(MI, SystemZ::VLREPF);
-+    break;
-+
-+  case SystemZ::VL64:
-+    LoweredMI = lowerSubvectorLoad(MI, SystemZ::VLREPG);
-+    break;
-+
-+  case SystemZ::VST32:
-+    LoweredMI = lowerSubvectorStore(MI, SystemZ::VSTEF);
-+    break;
-+
-+  case SystemZ::VST64:
-+    LoweredMI = lowerSubvectorStore(MI, SystemZ::VSTEG);
-+    break;
-+
-+  case SystemZ::LFER:
-+    LoweredMI = MCInstBuilder(SystemZ::VLGVF)
-+      .addReg(SystemZMC::getRegAsGR64(MI->getOperand(0).getReg()))
-+      .addReg(SystemZMC::getRegAsVR128(MI->getOperand(1).getReg()))
-+      .addReg(0).addImm(0);
-+    break;
-+
-+  case SystemZ::LEFR:
-+    LoweredMI = MCInstBuilder(SystemZ::VLVGF)
-+      .addReg(SystemZMC::getRegAsVR128(MI->getOperand(0).getReg()))
-+      .addReg(SystemZMC::getRegAsVR128(MI->getOperand(0).getReg()))
-+      .addReg(MI->getOperand(1).getReg())
-+      .addReg(0).addImm(0);
-+    break;
-+
- #define LOWER_LOW(NAME)                                                 \
-   case SystemZ::NAME##64: LoweredMI = lowerRILow(MI, SystemZ::NAME); break
- 
-@@ -172,6 +272,9 @@ void SystemZAsmPrinter::EmitInstruction(
- static MCSymbolRefExpr::VariantKind
- getModifierVariantKind(SystemZCP::SystemZCPModifier Modifier) {
-   switch (Modifier) {
-+  case SystemZCP::TLSGD: return MCSymbolRefExpr::VK_TLSGD;
-+  case SystemZCP::TLSLDM: return MCSymbolRefExpr::VK_TLSLDM;
-+  case SystemZCP::DTPOFF: return MCSymbolRefExpr::VK_DTPOFF;
-   case SystemZCP::NTPOFF: return MCSymbolRefExpr::VK_NTPOFF;
-   }
-   llvm_unreachable("Invalid SystemCPModifier!");
-Index: llvm-36/lib/Target/SystemZ/SystemZCallingConv.h
-===================================================================
---- llvm-36.orig/lib/Target/SystemZ/SystemZCallingConv.h
-+++ llvm-36/lib/Target/SystemZ/SystemZCallingConv.h
-@@ -10,6 +10,9 @@
- #ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZCALLINGCONV_H
- #define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZCALLINGCONV_H
- 
-+#include "llvm/ADT/SmallVector.h"
-+#include "llvm/CodeGen/CallingConvLower.h"
-+
- namespace llvm {
- namespace SystemZ {
-   const unsigned NumArgGPRs = 5;
-@@ -18,6 +21,64 @@ namespace SystemZ {
-   const unsigned NumArgFPRs = 4;
-   extern const unsigned ArgFPRs[NumArgFPRs];
- } // end namespace SystemZ
-+
-+class SystemZCCState : public CCState {
-+private:
-+  /// Records whether the value was a fixed argument.
-+  /// See ISD::OutputArg::IsFixed.
-+  SmallVector<bool, 4> ArgIsFixed;
-+
-+  /// Records whether the value was widened from a short vector type.
-+  SmallVector<bool, 4> ArgIsShortVector;
-+
-+  // Check whether ArgVT is a short vector type.
-+  bool IsShortVectorType(EVT ArgVT) {
-+    return ArgVT.isVector() && ArgVT.getStoreSize() <= 8;
-+  }
-+
-+public:
-+  SystemZCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF,
-+                 SmallVectorImpl<CCValAssign> &locs, LLVMContext &C)
-+      : CCState(CC, isVarArg, MF, locs, C) {}
-+
-+  void AnalyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Ins,
-+                              CCAssignFn Fn) {
-+    // Formal arguments are always fixed.
-+    ArgIsFixed.clear();
-+    for (unsigned i = 0; i < Ins.size(); ++i)
-+      ArgIsFixed.push_back(true);
-+    // Record whether the call operand was a short vector.
-+    ArgIsShortVector.clear();
-+    for (unsigned i = 0; i < Ins.size(); ++i)
-+      ArgIsShortVector.push_back(IsShortVectorType(Ins[i].ArgVT));
-+
-+    CCState::AnalyzeFormalArguments(Ins, Fn);
-+  }
-+
-+  void AnalyzeCallOperands(const SmallVectorImpl<ISD::OutputArg> &Outs,
-+                           CCAssignFn Fn) {
-+    // Record whether the call operand was a fixed argument.
-+    ArgIsFixed.clear();
-+    for (unsigned i = 0; i < Outs.size(); ++i)
-+      ArgIsFixed.push_back(Outs[i].IsFixed);
-+    // Record whether the call operand was a short vector.
-+    ArgIsShortVector.clear();
-+    for (unsigned i = 0; i < Outs.size(); ++i)
-+      ArgIsShortVector.push_back(IsShortVectorType(Outs[i].ArgVT));
-+
-+    CCState::AnalyzeCallOperands(Outs, Fn);
-+  }
-+
-+  // This version of AnalyzeCallOperands in the base class is not usable
-+  // since we must provide a means of accessing ISD::OutputArg::IsFixed.
-+  void AnalyzeCallOperands(const SmallVectorImpl<MVT> &Outs,
-+                           SmallVectorImpl<ISD::ArgFlagsTy> &Flags,
-+                           CCAssignFn Fn) = delete;
-+
-+  bool IsFixed(unsigned ValNo) { return ArgIsFixed[ValNo]; }
-+  bool IsShortVector(unsigned ValNo) { return ArgIsShortVector[ValNo]; }
-+};
-+
- } // end namespace llvm
- 
- #endif
-Index: llvm-36/lib/Target/SystemZ/SystemZCallingConv.td
-===================================================================
---- llvm-36.orig/lib/Target/SystemZ/SystemZCallingConv.td
-+++ llvm-36/lib/Target/SystemZ/SystemZCallingConv.td
-@@ -12,6 +12,20 @@
- class CCIfExtend<CCAction A>
-   : CCIf<"ArgFlags.isSExt() || ArgFlags.isZExt()", A>;
- 
-+class CCIfSubtarget<string F, CCAction A>
-+  : CCIf<!strconcat("static_cast<const SystemZSubtarget&>"
-+                    "(State.getMachineFunction().getSubtarget()).", F),
-+         A>;
-+
-+// Match if this specific argument is a fixed (i.e. named) argument.
-+class CCIfFixed<CCAction A>
-+    : CCIf<"static_cast<SystemZCCState *>(&State)->IsFixed(ValNo)", A>;
-+
-+// Match if this specific argument was widened from a short vector type.
-+class CCIfShortVector<CCAction A>
-+    : CCIf<"static_cast<SystemZCCState *>(&State)->IsShortVector(ValNo)", A>;
-+
-+
- //===----------------------------------------------------------------------===//
- // z/Linux return value calling convention
- //===----------------------------------------------------------------------===//
-@@ -31,7 +45,14 @@ def RetCC_SystemZ : CallingConv<[
-   // doesn't care about the ABI.  All floating-point argument registers
-   // are call-clobbered, so we can use all of them here.
-   CCIfType<[f32], CCAssignToReg<[F0S, F2S, F4S, F6S]>>,
--  CCIfType<[f64], CCAssignToReg<[F0D, F2D, F4D, F6D]>>
-+  CCIfType<[f64], CCAssignToReg<[F0D, F2D, F4D, F6D]>>,
-+
-+  // Similarly for vectors, with V24 being the ABI-compliant choice.
-+  // Sub-128 vectors are returned in the same way, but they're widened
-+  // to one of these types during type legalization.
-+  CCIfSubtarget<"hasVector()",
-+    CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
-+             CCAssignToReg<[V24, V26, V28, V30, V25, V27, V29, V31]>>>
- 
-   // ABI-compliant code returns long double by reference, but that conversion
-   // is left to higher-level code.  Perhaps we could add an f128 definition
-@@ -60,6 +81,25 @@ def CC_SystemZ : CallingConv<[
-   CCIfType<[f32], CCAssignToReg<[F0S, F2S, F4S, F6S]>>,
-   CCIfType<[f64], CCAssignToReg<[F0D, F2D, F4D, F6D]>>,
- 
-+  // The first 8 named vector arguments are passed in V24-V31.  Sub-128 vectors
-+  // are passed in the same way, but they're widened to one of these types
-+  // during type legalization.
-+  CCIfSubtarget<"hasVector()",
-+    CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
-+             CCIfFixed<CCAssignToReg<[V24, V26, V28, V30,
-+                                      V25, V27, V29, V31]>>>>,
-+
-+  // However, sub-128 vectors which need to go on the stack occupy just a
-+  // single 8-byte-aligned 8-byte stack slot.  Pass as i64.
-+  CCIfSubtarget<"hasVector()",
-+    CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
-+             CCIfShortVector<CCBitConvertToType<i64>>>>,
-+
-+  // Other vector arguments are passed in 8-byte-aligned 16-byte stack slots.
-+  CCIfSubtarget<"hasVector()",
-+    CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
-+             CCAssignToStack<16, 8>>>,
-+
-   // Other arguments are passed in 8-byte-aligned 8-byte stack slots.
-   CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>
- ]>;
-Index: llvm-36/lib/Target/SystemZ/SystemZConstantPoolValue.cpp
-===================================================================
---- llvm-36.orig/lib/Target/SystemZ/SystemZConstantPoolValue.cpp
-+++ llvm-36/lib/Target/SystemZ/SystemZConstantPoolValue.cpp
-@@ -28,6 +28,11 @@ SystemZConstantPoolValue::Create(const G
- 
- unsigned SystemZConstantPoolValue::getRelocationInfo() const {
-   switch (Modifier) {
-+  case SystemZCP::TLSGD:
-+  case SystemZCP::TLSLDM:
-+  case SystemZCP::DTPOFF:
-+    // May require a dynamic relocation.
-+    return 2;
-   case SystemZCP::NTPOFF:
-     // May require a relocation, but the relocations are always resolved
-     // by the static linker.
-Index: llvm-36/lib/Target/SystemZ/SystemZConstantPoolValue.h
-===================================================================
---- llvm-36.orig/lib/Target/SystemZ/SystemZConstantPoolValue.h
-+++ llvm-36/lib/Target/SystemZ/SystemZConstantPoolValue.h
-@@ -19,13 +19,17 @@ class GlobalValue;
- 
- namespace SystemZCP {
- enum SystemZCPModifier {
-+  TLSGD,
-+  TLSLDM,
-+  DTPOFF,
-   NTPOFF
- };
- } // end namespace SystemZCP
- 
- /// A SystemZ-specific constant pool value.  At present, the only
--/// defined constant pool values are offsets of thread-local variables
--/// (written x@NTPOFF).
-+/// defined constant pool values are module IDs or offsets of
-+/// thread-local variables (written x@TLSGD, x@TLSLDM, x@DTPOFF,
-+/// or x@NTPOFF).
- class SystemZConstantPoolValue : public MachineConstantPoolValue {
-   const GlobalValue *GV;
-   SystemZCP::SystemZCPModifier Modifier;
-Index: llvm-36/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
-===================================================================
---- llvm-36.orig/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
-+++ llvm-36/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
-@@ -256,6 +256,13 @@ class SystemZDAGToDAGISel : public Selec
-                          Addr, Base, Disp, Index);
-   }
- 
-+  // Try to match Addr as an address with a base, 12-bit displacement
-+  // and index, where the index is element Elem of a vector.
-+  // Return true on success, storing the base, displacement and vector
-+  // in Base, Disp and Index respectively.
-+  bool selectBDVAddr12Only(SDValue Addr, SDValue Elem, SDValue &Base,
-+                           SDValue &Disp, SDValue &Index) const;
-+
-   // Check whether (or Op (and X InsertMask)) is effectively an insertion
-   // of X into bits InsertMask of some Y != Op.  Return true if so and
-   // set Op to that Y.
-@@ -293,6 +300,12 @@ class SystemZDAGToDAGISel : public Selec
-   SDNode *splitLargeImmediate(unsigned Opcode, SDNode *Node, SDValue Op0,
-                               uint64_t UpperVal, uint64_t LowerVal);
- 
-+  // Try to use gather instruction Opcode to implement vector insertion N.
-+  SDNode *tryGather(SDNode *N, unsigned Opcode);
-+
-+  // Try to use scatter instruction Opcode to implement store Store.
-+  SDNode *tryScatter(StoreSDNode *Store, unsigned Opcode);
-+
-   // Return true if Load and Store are loads and stores of the same size
-   // and are guaranteed not to overlap.  Such operations can be implemented
-   // using block (SS-format) instructions.
-@@ -643,6 +656,30 @@ bool SystemZDAGToDAGISel::selectBDXAddr(
-   return true;
- }
- 
-+bool SystemZDAGToDAGISel::selectBDVAddr12Only(SDValue Addr, SDValue Elem,
-+                                              SDValue &Base,
-+                                              SDValue &Disp,
-+                                              SDValue &Index) const {
-+  SDValue Regs[2];
-+  if (selectBDXAddr12Only(Addr, Regs[0], Disp, Regs[1]) &&
-+      Regs[0].getNode() && Regs[1].getNode()) {
-+    for (unsigned int I = 0; I < 2; ++I) {
-+      Base = Regs[I];
-+      Index = Regs[1 - I];
-+      // We can't tell here whether the index vector has the right type
-+      // for the access; the caller needs to do that instead.
-+      if (Index.getOpcode() == ISD::ZERO_EXTEND)
-+        Index = Index.getOperand(0);
-+      if (Index.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
-+          Index.getOperand(1) == Elem) {
-+        Index = Index.getOperand(0);
-+        return true;
-+      }
-+    }
-+  }
-+  return false;
-+}
-+
- bool SystemZDAGToDAGISel::detectOrAndInsertion(SDValue &Op,
-                                                uint64_t InsertMask) const {
-   // We're only interested in cases where the insertion is into some operand
-@@ -896,6 +933,9 @@ SDNode *SystemZDAGToDAGISel::tryRISBGZer
-   }  
- 
-   unsigned Opcode = SystemZ::RISBG;
-+  // Prefer RISBGN if available, since it does not clobber CC.
-+  if (Subtarget.hasMiscellaneousExtensions())
-+    Opcode = SystemZ::RISBGN;
-   EVT OpcodeVT = MVT::i64;
-   if (VT == MVT::i32 && Subtarget.hasHighWord()) {
-     Opcode = SystemZ::RISBMux;
-@@ -943,9 +983,13 @@ SDNode *SystemZDAGToDAGISel::tryRxSBG(SD
- 
-   // See whether we can avoid an AND in the first operand by converting
-   // ROSBG to RISBG.
--  if (Opcode == SystemZ::ROSBG && detectOrAndInsertion(Op0, RxSBG[I].Mask))
-+  if (Opcode == SystemZ::ROSBG && detectOrAndInsertion(Op0, RxSBG[I].Mask)) {
-     Opcode = SystemZ::RISBG;
--           
-+    // Prefer RISBGN if available, since it does not clobber CC.
-+    if (Subtarget.hasMiscellaneousExtensions())
-+      Opcode = SystemZ::RISBGN;
-+  }
-+
-   EVT VT = N->getValueType(0);
-   SDValue Ops[5] = {
-     convertTo(SDLoc(N), MVT::i64, Op0),
-@@ -973,6 +1017,71 @@ SDNode *SystemZDAGToDAGISel::splitLargeI
-   return Or.getNode();
- }
- 
-+SDNode *SystemZDAGToDAGISel::tryGather(SDNode *N, unsigned Opcode) {
-+  SDValue ElemV = N->getOperand(2);
-+  auto *ElemN = dyn_cast<ConstantSDNode>(ElemV);
-+  if (!ElemN)
-+    return 0;
-+
-+  unsigned Elem = ElemN->getZExtValue();
-+  EVT VT = N->getValueType(0);
-+  if (Elem >= VT.getVectorNumElements())
-+    return 0;
-+
-+  auto *Load = dyn_cast<LoadSDNode>(N->getOperand(1));
-+  if (!Load || !Load->hasOneUse())
-+    return 0;
-+  if (Load->getMemoryVT().getSizeInBits() !=
-+      Load->getValueType(0).getSizeInBits())
-+    return 0;
-+
-+  SDValue Base, Disp, Index;
-+  if (!selectBDVAddr12Only(Load->getBasePtr(), ElemV, Base, Disp, Index) ||
-+      Index.getValueType() != VT.changeVectorElementTypeToInteger())
-+    return 0;
-+
-+  SDLoc DL(Load);
-+  SDValue Ops[] = {
-+    N->getOperand(0), Base, Disp, Index,
-+    CurDAG->getTargetConstant(Elem, MVT::i32), Load->getChain()
-+  };
-+  SDNode *Res = CurDAG->getMachineNode(Opcode, DL, VT, MVT::Other, Ops);
-+  ReplaceUses(SDValue(Load, 1), SDValue(Res, 1));
-+  return Res;
-+}
-+
-+SDNode *SystemZDAGToDAGISel::tryScatter(StoreSDNode *Store, unsigned Opcode) {
-+  SDValue Value = Store->getValue();
-+  if (Value.getOpcode() != ISD::EXTRACT_VECTOR_ELT)
-+    return 0;
-+  if (Store->getMemoryVT().getSizeInBits() !=
-+      Value.getValueType().getSizeInBits())
-+    return 0;
-+
-+  SDValue ElemV = Value.getOperand(1);
-+  auto *ElemN = dyn_cast<ConstantSDNode>(ElemV);
-+  if (!ElemN)
-+    return 0;
-+
-+  SDValue Vec = Value.getOperand(0);
-+  EVT VT = Vec.getValueType();
-+  unsigned Elem = ElemN->getZExtValue();
-+  if (Elem >= VT.getVectorNumElements())
-+    return 0;
-+
-+  SDValue Base, Disp, Index;
-+  if (!selectBDVAddr12Only(Store->getBasePtr(), ElemV, Base, Disp, Index) ||
-+      Index.getValueType() != VT.changeVectorElementTypeToInteger())
-+    return 0;
-+
-+  SDLoc DL(Store);
-+  SDValue Ops[] = {
-+    Vec, Base, Disp, Index, CurDAG->getTargetConstant(Elem, MVT::i32),
-+    Store->getChain()
-+  };
-+  return CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops);
-+}
-+
- bool SystemZDAGToDAGISel::canUseBlockOperation(StoreSDNode *Store,
-                                                LoadSDNode *Load) const {
-   // Check that the two memory operands have the same size.
-@@ -1109,6 +1218,26 @@ SDNode *SystemZDAGToDAGISel::Select(SDNo
-     }
-     break;
-   }
-+
-+  case ISD::INSERT_VECTOR_ELT: {
-+    EVT VT = Node->getValueType(0);
-+    unsigned ElemBitSize = VT.getVectorElementType().getSizeInBits();
-+    if (ElemBitSize == 32)
-+      ResNode = tryGather(Node, SystemZ::VGEF);
-+    else if (ElemBitSize == 64)
-+      ResNode = tryGather(Node, SystemZ::VGEG);
-+    break;
-+  }
-+
-+  case ISD::STORE: {
-+    auto *Store = cast<StoreSDNode>(Node);
-+    unsigned ElemBitSize = Store->getValue().getValueType().getSizeInBits();
-+    if (ElemBitSize == 32)
-+      ResNode = tryScatter(Store, SystemZ::VSCEF);
-+    else if (ElemBitSize == 64)
-+      ResNode = tryScatter(Store, SystemZ::VSCEG);
-+    break;
-+  }
-   }
- 
-   // Select the default instruction
-Index: llvm-36/lib/Target/SystemZ/SystemZISelLowering.cpp
-===================================================================
---- llvm-36.orig/lib/Target/SystemZ/SystemZISelLowering.cpp
-+++ llvm-36/lib/Target/SystemZ/SystemZISelLowering.cpp
-@@ -20,6 +20,7 @@
- #include "llvm/CodeGen/MachineInstrBuilder.h"
- #include "llvm/CodeGen/MachineRegisterInfo.h"
- #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
-+#include "llvm/IR/Intrinsics.h"
- #include <cctype>
- 
- using namespace llvm;
-@@ -90,11 +91,25 @@ SystemZTargetLowering::SystemZTargetLowe
-     addRegisterClass(MVT::i32, &SystemZ::GRX32BitRegClass);
-   else
-     addRegisterClass(MVT::i32, &SystemZ::GR32BitRegClass);
--  addRegisterClass(MVT::i64,  &SystemZ::GR64BitRegClass);
--  addRegisterClass(MVT::f32,  &SystemZ::FP32BitRegClass);
--  addRegisterClass(MVT::f64,  &SystemZ::FP64BitRegClass);
-+  addRegisterClass(MVT::i64, &SystemZ::GR64BitRegClass);
-+  if (Subtarget.hasVector()) {
-+    addRegisterClass(MVT::f32, &SystemZ::VR32BitRegClass);
-+    addRegisterClass(MVT::f64, &SystemZ::VR64BitRegClass);
-+  } else {
-+    addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass);
-+    addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass);
-+  }
-   addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass);
- 
-+  if (Subtarget.hasVector()) {
-+    addRegisterClass(MVT::v16i8, &SystemZ::VR128BitRegClass);
-+    addRegisterClass(MVT::v8i16, &SystemZ::VR128BitRegClass);
-+    addRegisterClass(MVT::v4i32, &SystemZ::VR128BitRegClass);
-+    addRegisterClass(MVT::v2i64, &SystemZ::VR128BitRegClass);
-+    addRegisterClass(MVT::v4f32, &SystemZ::VR128BitRegClass);
-+    addRegisterClass(MVT::v2f64, &SystemZ::VR128BitRegClass);
-+  }
-+
-   // Compute derived properties from the register classes
-   computeRegisterProperties();
- 
-@@ -110,7 +125,7 @@ SystemZTargetLowering::SystemZTargetLowe
-   setSchedulingPreference(Sched::RegPressure);
- 
-   setBooleanContents(ZeroOrOneBooleanContent);
--  setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct?
-+  setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
- 
-   // Instructions are strings of 2-byte aligned 2-byte values.
-   setMinFunctionAlignment(2);
-@@ -163,8 +178,13 @@ SystemZTargetLowering::SystemZTargetLowe
-       // available, or if the operand is constant.
-       setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom);
- 
-+      // Use POPCNT on z196 and above.
-+      if (Subtarget.hasPopulationCount())
-+        setOperationAction(ISD::CTPOP, VT, Custom);
-+      else
-+        setOperationAction(ISD::CTPOP, VT, Expand);
-+
-       // No special instructions for these.
--      setOperationAction(ISD::CTPOP,           VT, Expand);
-       setOperationAction(ISD::CTTZ,            VT, Expand);
-       setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand);
-       setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand);
-@@ -244,6 +264,90 @@ SystemZTargetLowering::SystemZTargetLowe
-   // Handle prefetches with PFD or PFDRL.
-   setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
- 
-+  for (MVT VT : MVT::vector_valuetypes()) {
-+    // Assume by default that all vector operations need to be expanded.
-+    for (unsigned Opcode = 0; Opcode < ISD::BUILTIN_OP_END; ++Opcode)
-+      if (getOperationAction(Opcode, VT) == Legal)
-+        setOperationAction(Opcode, VT, Expand);
-+
-+    // Likewise all truncating stores and extending loads.
-+    for (MVT InnerVT : MVT::vector_valuetypes()) {
-+      setTruncStoreAction(VT, InnerVT, Expand);
-+      setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand);
-+      setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand);
-+      setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand);
-+    }
-+
-+    if (isTypeLegal(VT)) {
-+      // These operations are legal for anything that can be stored in a
-+      // vector register, even if there is no native support for the format
-+      // as such.  In particular, we can do these for v4f32 even though there
-+      // are no specific instructions for that format.
-+      setOperationAction(ISD::LOAD, VT, Legal);
-+      setOperationAction(ISD::STORE, VT, Legal);
-+      setOperationAction(ISD::VSELECT, VT, Legal);
-+      setOperationAction(ISD::BITCAST, VT, Legal);
-+      setOperationAction(ISD::UNDEF, VT, Legal);
-+
-+      // Likewise, except that we need to replace the nodes with something
-+      // more specific.
-+      setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
-+      setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
-+    }
-+  }
-+
-+  // Handle integer vector types.
-+  for (MVT VT : MVT::integer_vector_valuetypes()) {
-+    if (isTypeLegal(VT)) {
-+      // These operations have direct equivalents.
-+      setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal);
-+      setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Legal);
-+      setOperationAction(ISD::ADD, VT, Legal);
-+      setOperationAction(ISD::SUB, VT, Legal);
-+      if (VT != MVT::v2i64)
-+        setOperationAction(ISD::MUL, VT, Legal);
-+      setOperationAction(ISD::AND, VT, Legal);
-+      setOperationAction(ISD::OR, VT, Legal);
-+      setOperationAction(ISD::XOR, VT, Legal);
-+      setOperationAction(ISD::CTPOP, VT, Custom);
-+      setOperationAction(ISD::CTTZ, VT, Legal);
-+      setOperationAction(ISD::CTLZ, VT, Legal);
-+      setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Custom);
-+      setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Custom);
-+
-+      // Convert a GPR scalar to a vector by inserting it into element 0.
-+      setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom);
-+
-+      // Use a series of unpacks for extensions.
-+      setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom);
-+      setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom);
-+
-+      // Detect shifts by a scalar amount and convert them into
-+      // V*_BY_SCALAR.
-+      setOperationAction(ISD::SHL, VT, Custom);
-+      setOperationAction(ISD::SRA, VT, Custom);
-+      setOperationAction(ISD::SRL, VT, Custom);
-+
-+      // At present ROTL isn't matched by DAGCombiner.  ROTR should be
-+      // converted into ROTL.
-+      setOperationAction(ISD::ROTL, VT, Expand);
-+      setOperationAction(ISD::ROTR, VT, Expand);
-+
-+      // Map SETCCs onto one of VCE, VCH or VCHL, swapping the operands
-+      // and inverting the result as necessary.
-+      setOperationAction(ISD::SETCC, VT, Custom);
-+    }
-+  }
-+
-+  if (Subtarget.hasVector()) {
-+    // There should be no need to check for float types other than v2f64
-+    // since <2 x f32> isn't a legal type.
-+    setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal);
-+    setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal);
-+    setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal);
-+    setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal);
-+  }
-+
-   // Handle floating-point types.
-   for (unsigned I = MVT::FIRST_FP_VALUETYPE;
-        I <= MVT::LAST_FP_VALUETYPE;
-@@ -269,6 +373,36 @@ SystemZTargetLowering::SystemZTargetLowe
-     }
-   }
- 
-+  // Handle floating-point vector types.
-+  if (Subtarget.hasVector()) {
-+    // Scalar-to-vector conversion is just a subreg.
-+    setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal);
-+    setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal);
-+
-+    // Some insertions and extractions can be done directly but others
-+    // need to go via integers.
-+    setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom);
-+    setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f64, Custom);
-+    setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom);
-+    setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom);
-+
-+    // These operations have direct equivalents.
-+    setOperationAction(ISD::FADD, MVT::v2f64, Legal);
-+    setOperationAction(ISD::FNEG, MVT::v2f64, Legal);
-+    setOperationAction(ISD::FSUB, MVT::v2f64, Legal);
-+    setOperationAction(ISD::FMUL, MVT::v2f64, Legal);
-+    setOperationAction(ISD::FMA, MVT::v2f64, Legal);
-+    setOperationAction(ISD::FDIV, MVT::v2f64, Legal);
-+    setOperationAction(ISD::FABS, MVT::v2f64, Legal);
-+    setOperationAction(ISD::FSQRT, MVT::v2f64, Legal);
-+    setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
-+    setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
-+    setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal);
-+    setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
-+    setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
-+    setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
-+  }
-+
-   // We have fused multiply-addition for f32 and f64 but not f128.
-   setOperationAction(ISD::FMA, MVT::f32,  Legal);
-   setOperationAction(ISD::FMA, MVT::f64,  Legal);
-@@ -287,8 +421,10 @@ SystemZTargetLowering::SystemZTargetLowe
- 
-   // We have 64-bit FPR<->GPR moves, but need special handling for
-   // 32-bit forms.
--  setOperationAction(ISD::BITCAST, MVT::i32, Custom);
--  setOperationAction(ISD::BITCAST, MVT::f32, Custom);
-+  if (!Subtarget.hasVector()) {
-+    setOperationAction(ISD::BITCAST, MVT::i32, Custom);
-+    setOperationAction(ISD::BITCAST, MVT::f32, Custom);
-+  }
- 
-   // VASTART and VACOPY need to deal with the SystemZ-specific varargs
-   // structure, but VAEND is a no-op.
-@@ -298,6 +434,13 @@ SystemZTargetLowering::SystemZTargetLowe
- 
-   // Codes for which we want to perform some z-specific combinations.
-   setTargetDAGCombine(ISD::SIGN_EXTEND);
-+  setTargetDAGCombine(ISD::STORE);
-+  setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT);
-+  setTargetDAGCombine(ISD::FP_ROUND);
-+
-+  // Handle intrinsics.
-+  setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
-+  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
- 
-   // We want to use MVC in preference to even a single load/store pair.
-   MaxStoresPerMemcpy = 0;
-@@ -342,6 +485,16 @@ bool SystemZTargetLowering::isFPImmLegal
-   return Imm.isZero() || Imm.isNegZero();
- }
- 
-+bool SystemZTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
-+  // We can use CGFI or CLGFI.
-+  return isInt<32>(Imm) || isUInt<32>(Imm);
-+}
-+
-+bool SystemZTargetLowering::isLegalAddImmediate(int64_t Imm) const {
-+  // We can use ALGFI or SLGFI.
-+  return isUInt<32>(Imm) || isUInt<32>(-Imm);
-+}
-+
- bool SystemZTargetLowering::allowsMisalignedMemoryAccesses(EVT VT,
-                                                            unsigned,
-                                                            unsigned,
-@@ -623,6 +776,24 @@ bool SystemZTargetLowering::mayBeEmitted
-   return true;
- }
- 
-+// We do not yet support 128-bit single-element vector types.  If the user
-+// attempts to use such types as function argument or return type, prefer
-+// to error out instead of emitting code violating the ABI.
-+static void VerifyVectorType(MVT VT, EVT ArgVT) {
-+  if (ArgVT.isVector() && !VT.isVector())
-+    report_fatal_error("Unsupported vector argument or return type");
-+}
-+
-+static void VerifyVectorTypes(const SmallVectorImpl<ISD::InputArg> &Ins) {
-+  for (unsigned i = 0; i < Ins.size(); ++i)
-+    VerifyVectorType(Ins[i].VT, Ins[i].ArgVT);
-+}
-+
-+static void VerifyVectorTypes(const SmallVectorImpl<ISD::OutputArg> &Outs) {
-+  for (unsigned i = 0; i < Outs.size(); ++i)
-+    VerifyVectorType(Outs[i].VT, Outs[i].ArgVT);
-+}
-+
- // Value is a value that has been passed to us in the location described by VA
- // (and so has type VA.getLocVT()).  Convert Value to VA.getValVT(), chaining
- // any loads onto Chain.
-@@ -643,7 +814,15 @@ static SDValue convertLocVTToValVT(Selec
-   else if (VA.getLocInfo() == CCValAssign::Indirect)
-     Value = DAG.getLoad(VA.getValVT(), DL, Chain, Value,
-                         MachinePointerInfo(), false, false, false, 0);
--  else
-+  else if (VA.getLocInfo() == CCValAssign::BCvt) {
-+    // If this is a short vector argument loaded from the stack,
-+    // extend from i64 to full vector size and then bitcast.
-+    assert(VA.getLocVT() == MVT::i64);
-+    assert(VA.getValVT().isVector());
-+    Value = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v2i64,
-+                        Value, DAG.getUNDEF(MVT::i64));
-+    Value = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Value);
-+  } else
-     assert(VA.getLocInfo() == CCValAssign::Full && "Unsupported getLocInfo");
-   return Value;
- }
-@@ -660,6 +839,14 @@ static SDValue convertValVTToLocVT(Selec
-     return DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Value);
-   case CCValAssign::AExt:
-     return DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Value);
-+  case CCValAssign::BCvt:
-+    // If this is a short vector argument to be stored to the stack,
-+    // bitcast to v2i64 and then extract first element.
-+    assert(VA.getLocVT() == MVT::i64);
-+    assert(VA.getValVT().isVector());
-+    Value = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Value);
-+    return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VA.getLocVT(), Value,
-+                       DAG.getConstant(0, MVT::i32));
-   case CCValAssign::Full:
-     return Value;
-   default:
-@@ -680,9 +867,13 @@ LowerFormalArguments(SDValue Chain, Call
-   auto *TFL = static_cast<const SystemZFrameLowering *>(
-       DAG.getSubtarget().getFrameLowering());
- 
-+  // Detect unsupported vector argument types.
-+  if (Subtarget.hasVector())
-+    VerifyVectorTypes(Ins);
-+
-   // Assign locations to all of the incoming arguments.
-   SmallVector<CCValAssign, 16> ArgLocs;
--  CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
-+  SystemZCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
-   CCInfo.AnalyzeFormalArguments(Ins, CC_SystemZ);
- 
-   unsigned NumFixedGPRs = 0;
-@@ -714,6 +905,14 @@ LowerFormalArguments(SDValue Chain, Call
-         NumFixedFPRs += 1;
-         RC = &SystemZ::FP64BitRegClass;
-         break;
-+      case MVT::v16i8:
-+      case MVT::v8i16:
-+      case MVT::v4i32:
-+      case MVT::v2i64:
-+      case MVT::v4f32:
-+      case MVT::v2f64:
-+        RC = &SystemZ::VR128BitRegClass;
-+        break;
-       }
- 
-       unsigned VReg = MRI.createVirtualRegister(RC);
-@@ -818,9 +1017,15 @@ SystemZTargetLowering::LowerCall(CallLow
-   MachineFunction &MF = DAG.getMachineFunction();
-   EVT PtrVT = getPointerTy();
- 
-+  // Detect unsupported vector argument and return types.
-+  if (Subtarget.hasVector()) {
-+    VerifyVectorTypes(Outs);
-+    VerifyVectorTypes(Ins);
-+  }
-+
-   // Analyze the operands of the call, assigning locations to each operand.
-   SmallVector<CCValAssign, 16> ArgLocs;
--  CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
-+  SystemZCCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
-   ArgCCInfo.AnalyzeCallOperands(Outs, CC_SystemZ);
- 
-   // We don't support GuaranteedTailCallOpt, only automatically-detected
-@@ -972,6 +1177,10 @@ SystemZTargetLowering::LowerReturn(SDVal
-                                    SDLoc DL, SelectionDAG &DAG) const {
-   MachineFunction &MF = DAG.getMachineFunction();
- 
-+  // Detect unsupported vector return types.
-+  if (Subtarget.hasVector())
-+    VerifyVectorTypes(Outs);
-+
-   // Assign locations to each returned value.
-   SmallVector<CCValAssign, 16> RetLocs;
-   CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext());
-@@ -1015,6 +1224,207 @@ prepareVolatileOrAtomicLoad(SDValue Chai
-   return DAG.getNode(SystemZISD::SERIALIZE, DL, MVT::Other, Chain);
- }
- 
-+// Return true if Op is an intrinsic node with chain that returns the CC value
-+// as its only (other) argument.  Provide the associated SystemZISD opcode and
-+// the mask of valid CC values if so.
-+static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode,
-+                                      unsigned &CCValid) {
-+  unsigned Id = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
-+  switch (Id) {
-+  case Intrinsic::s390_tbegin:
-+    Opcode = SystemZISD::TBEGIN;
-+    CCValid = SystemZ::CCMASK_TBEGIN;
-+    return true;
-+
-+  case Intrinsic::s390_tbegin_nofloat:
-+    Opcode = SystemZISD::TBEGIN_NOFLOAT;
-+    CCValid = SystemZ::CCMASK_TBEGIN;
-+    return true;
-+
-+  case Intrinsic::s390_tend:
-+    Opcode = SystemZISD::TEND;
-+    CCValid = SystemZ::CCMASK_TEND;
-+    return true;
-+
-+  default:
-+    return false;
-+  }
-+}
-+
-+// Return true if Op is an intrinsic node without chain that returns the
-+// CC value as its final argument.  Provide the associated SystemZISD
-+// opcode and the mask of valid CC values if so.
-+static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid) {
-+  unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
-+  switch (Id) {
-+  case Intrinsic::s390_vpkshs:
-+  case Intrinsic::s390_vpksfs:
-+  case Intrinsic::s390_vpksgs:
-+    Opcode = SystemZISD::PACKS_CC;
-+    CCValid = SystemZ::CCMASK_VCMP;
-+    return true;
-+
-+  case Intrinsic::s390_vpklshs:
-+  case Intrinsic::s390_vpklsfs:
-+  case Intrinsic::s390_vpklsgs:
-+    Opcode = SystemZISD::PACKLS_CC;
-+    CCValid = SystemZ::CCMASK_VCMP;
-+    return true;
-+
-+  case Intrinsic::s390_vceqbs:
-+  case Intrinsic::s390_vceqhs:
-+  case Intrinsic::s390_vceqfs:
-+  case Intrinsic::s390_vceqgs:
-+    Opcode = SystemZISD::VICMPES;
-+    CCValid = SystemZ::CCMASK_VCMP;
-+    return true;
-+
-+  case Intrinsic::s390_vchbs:
-+  case Intrinsic::s390_vchhs:
-+  case Intrinsic::s390_vchfs:
-+  case Intrinsic::s390_vchgs:
-+    Opcode = SystemZISD::VICMPHS;
-+    CCValid = SystemZ::CCMASK_VCMP;
-+    return true;
-+
-+  case Intrinsic::s390_vchlbs:
-+  case Intrinsic::s390_vchlhs:
-+  case Intrinsic::s390_vchlfs:
-+  case Intrinsic::s390_vchlgs:
-+    Opcode = SystemZISD::VICMPHLS;
-+    CCValid = SystemZ::CCMASK_VCMP;
-+    return true;
-+
-+  case Intrinsic::s390_vtm:
-+    Opcode = SystemZISD::VTM;
-+    CCValid = SystemZ::CCMASK_VCMP;
-+    return true;
-+
-+  case Intrinsic::s390_vfaebs:
-+  case Intrinsic::s390_vfaehs:
-+  case Intrinsic::s390_vfaefs:
-+    Opcode = SystemZISD::VFAE_CC;
-+    CCValid = SystemZ::CCMASK_ANY;
-+    return true;
-+
-+  case Intrinsic::s390_vfaezbs:
-+  case Intrinsic::s390_vfaezhs:
-+  case Intrinsic::s390_vfaezfs:
-+    Opcode = SystemZISD::VFAEZ_CC;
-+    CCValid = SystemZ::CCMASK_ANY;
-+    return true;
-+
-+  case Intrinsic::s390_vfeebs:
-+  case Intrinsic::s390_vfeehs:
-+  case Intrinsic::s390_vfeefs:
-+    Opcode = SystemZISD::VFEE_CC;
-+    CCValid = SystemZ::CCMASK_ANY;
-+    return true;
-+
-+  case Intrinsic::s390_vfeezbs:
-+  case Intrinsic::s390_vfeezhs:
-+  case Intrinsic::s390_vfeezfs:
-+    Opcode = SystemZISD::VFEEZ_CC;
-+    CCValid = SystemZ::CCMASK_ANY;
-+    return true;
-+
-+  case Intrinsic::s390_vfenebs:
-+  case Intrinsic::s390_vfenehs:
-+  case Intrinsic::s390_vfenefs:
-+    Opcode = SystemZISD::VFENE_CC;
-+    CCValid = SystemZ::CCMASK_ANY;
-+    return true;
-+
-+  case Intrinsic::s390_vfenezbs:
-+  case Intrinsic::s390_vfenezhs:
-+  case Intrinsic::s390_vfenezfs:
-+    Opcode = SystemZISD::VFENEZ_CC;
-+    CCValid = SystemZ::CCMASK_ANY;
-+    return true;
-+
-+  case Intrinsic::s390_vistrbs:
-+  case Intrinsic::s390_vistrhs:
-+  case Intrinsic::s390_vistrfs:
-+    Opcode = SystemZISD::VISTR_CC;
-+    CCValid = SystemZ::CCMASK_0 | SystemZ::CCMASK_3;
-+    return true;
-+
-+  case Intrinsic::s390_vstrcbs:
-+  case Intrinsic::s390_vstrchs:
-+  case Intrinsic::s390_vstrcfs:
-+    Opcode = SystemZISD::VSTRC_CC;
-+    CCValid = SystemZ::CCMASK_ANY;
-+    return true;
-+
-+  case Intrinsic::s390_vstrczbs:
-+  case Intrinsic::s390_vstrczhs:
-+  case Intrinsic::s390_vstrczfs:
-+    Opcode = SystemZISD::VSTRCZ_CC;
-+    CCValid = SystemZ::CCMASK_ANY;
-+    return true;
-+
-+  case Intrinsic::s390_vfcedbs:
-+    Opcode = SystemZISD::VFCMPES;
-+    CCValid = SystemZ::CCMASK_VCMP;
-+    return true;
-+
-+  case Intrinsic::s390_vfchdbs:
-+    Opcode = SystemZISD::VFCMPHS;
-+    CCValid = SystemZ::CCMASK_VCMP;
-+    return true;
-+
-+  case Intrinsic::s390_vfchedbs:
-+    Opcode = SystemZISD::VFCMPHES;
-+    CCValid = SystemZ::CCMASK_VCMP;
-+    return true;
-+
-+  case Intrinsic::s390_vftcidb:
-+    Opcode = SystemZISD::VFTCI;
-+    CCValid = SystemZ::CCMASK_VCMP;
-+    return true;
-+
-+  default:
-+    return false;
-+  }
-+}
-+
-+// Emit an intrinsic with chain with a glued value instead of its CC result.
-+static SDValue emitIntrinsicWithChainAndGlue(SelectionDAG &DAG, SDValue Op,
-+                                             unsigned Opcode) {
-+  // Copy all operands except the intrinsic ID.
-+  unsigned NumOps = Op.getNumOperands();
-+  SmallVector<SDValue, 6> Ops;
-+  Ops.reserve(NumOps - 1);
-+  Ops.push_back(Op.getOperand(0));
-+  for (unsigned I = 2; I < NumOps; ++I)
-+    Ops.push_back(Op.getOperand(I));
-+
-+  assert(Op->getNumValues() == 2 && "Expected only CC result and chain");
-+  SDVTList RawVTs = DAG.getVTList(MVT::Other, MVT::Glue);
-+  SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), RawVTs, Ops);
-+  SDValue OldChain = SDValue(Op.getNode(), 1);
-+  SDValue NewChain = SDValue(Intr.getNode(), 0);
-+  DAG.ReplaceAllUsesOfValueWith(OldChain, NewChain);
-+  return Intr;
-+}
-+
-+// Emit an intrinsic with a glued value instead of its CC result.
-+static SDValue emitIntrinsicWithGlue(SelectionDAG &DAG, SDValue Op,
-+                                     unsigned Opcode) {
-+  // Copy all operands except the intrinsic ID.
-+  unsigned NumOps = Op.getNumOperands();
-+  SmallVector<SDValue, 6> Ops;
-+  Ops.reserve(NumOps - 1);
-+  for (unsigned I = 1; I < NumOps; ++I)
-+    Ops.push_back(Op.getOperand(I));
-+
-+  if (Op->getNumValues() == 1)
-+    return DAG.getNode(Opcode, SDLoc(Op), MVT::Glue, Ops);
-+  assert(Op->getNumValues() == 2 && "Expected exactly one non-CC result");
-+  SDVTList RawVTs = DAG.getVTList(Op->getValueType(0), MVT::Glue);
-+  return DAG.getNode(Opcode, SDLoc(Op), RawVTs, Ops);
-+}
-+
- // CC is a comparison that will be implemented using an integer or
- // floating-point comparison.  Return the condition code mask for
- // a branch on true.  In the integer case, CCMASK_CMP_UO is set for
-@@ -1529,6 +1939,8 @@ static void adjustForTestUnderMask(Selec
-     MaskVal = -(CmpVal & -CmpVal);
-     NewC.ICmpType = SystemZICMP::UnsignedOnly;
-   }
-+  if (!MaskVal)
-+    return;
- 
-   // Check whether the combination of mask, comparison value and comparison
-   // type are suitable.
-@@ -1570,9 +1982,57 @@ static void adjustForTestUnderMask(Selec
-   C.CCMask = NewCCMask;
- }
- 
-+// Return a Comparison that tests the condition-code result of intrinsic
-+// node Call against constant integer CC using comparison code Cond.
-+// Opcode is the opcode of the SystemZISD operation for the intrinsic
-+// and CCValid is the set of possible condition-code results.
-+static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode,
-+                                  SDValue Call, unsigned CCValid, uint64_t CC,
-+                                  ISD::CondCode Cond) {
-+  Comparison C(Call, SDValue());
-+  C.Opcode = Opcode;
-+  C.CCValid = CCValid;
-+  if (Cond == ISD::SETEQ)
-+    // bit 3 for CC==0, bit 0 for CC==3, always false for CC>3.
-+    C.CCMask = CC < 4 ? 1 << (3 - CC) : 0;
-+  else if (Cond == ISD::SETNE)
-+    // ...and the inverse of that.
-+    C.CCMask = CC < 4 ? ~(1 << (3 - CC)) : -1;
-+  else if (Cond == ISD::SETLT || Cond == ISD::SETULT)
-+    // bits above bit 3 for CC==0 (always false), bits above bit 0 for CC==3,
-+    // always true for CC>3.
-+    C.CCMask = CC < 4 ? -1 << (4 - CC) : -1;
-+  else if (Cond == ISD::SETGE || Cond == ISD::SETUGE)
-+    // ...and the inverse of that.
-+    C.CCMask = CC < 4 ? ~(-1 << (4 - CC)) : 0;
-+  else if (Cond == ISD::SETLE || Cond == ISD::SETULE)
-+    // bit 3 and above for CC==0, bit 0 and above for CC==3 (always true),
-+    // always true for CC>3.
-+    C.CCMask = CC < 4 ? -1 << (3 - CC) : -1;
-+  else if (Cond == ISD::SETGT || Cond == ISD::SETUGT)
-+    // ...and the inverse of that.
-+    C.CCMask = CC < 4 ? ~(-1 << (3 - CC)) : 0;
-+  else
-+    llvm_unreachable("Unexpected integer comparison type");
-+  C.CCMask &= CCValid;
-+  return C;
-+}
-+
- // Decide how to implement a comparison of type Cond between CmpOp0 with CmpOp1.
- static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1,
-                          ISD::CondCode Cond) {
-+  if (CmpOp1.getOpcode() == ISD::Constant) {
-+    uint64_t Constant = cast<ConstantSDNode>(CmpOp1)->getZExtValue();
-+    unsigned Opcode, CCValid;
-+    if (CmpOp0.getOpcode() == ISD::INTRINSIC_W_CHAIN &&
-+        CmpOp0.getResNo() == 0 && CmpOp0->hasNUsesOfValue(1, 0) &&
-+        isIntrinsicWithCCAndChain(CmpOp0, Opcode, CCValid))
-+      return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, Constant, Cond);
-+    if (CmpOp0.getOpcode() == ISD::INTRINSIC_WO_CHAIN &&
-+        CmpOp0.getResNo() == CmpOp0->getNumValues() - 1 &&
-+        isIntrinsicWithCC(CmpOp0, Opcode, CCValid))
-+      return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, Constant, Cond);
-+  }
-   Comparison C(CmpOp0, CmpOp1);
-   C.CCMask = CCMaskForCondCode(Cond);
-   if (C.Op0.getValueType().isFloatingPoint()) {
-@@ -1614,6 +2074,20 @@ static Comparison getCmp(SelectionDAG &D
- 
- // Emit the comparison instruction described by C.
- static SDValue emitCmp(SelectionDAG &DAG, SDLoc DL, Comparison &C) {
-+  if (!C.Op1.getNode()) {
-+    SDValue Op;
-+    switch (C.Op0.getOpcode()) {
-+    case ISD::INTRINSIC_W_CHAIN:
-+      Op = emitIntrinsicWithChainAndGlue(DAG, C.Op0, C.Opcode);
-+      break;
-+    case ISD::INTRINSIC_WO_CHAIN:
-+      Op = emitIntrinsicWithGlue(DAG, C.Op0, C.Opcode);
-+      break;
-+    default:
-+      llvm_unreachable("Invalid comparison operands");
-+    }
-+    return SDValue(Op.getNode(), Op->getNumValues() - 1);
-+  }
-   if (C.Opcode == SystemZISD::ICMP)
-     return DAG.getNode(SystemZISD::ICMP, DL, MVT::Glue, C.Op0, C.Op1,
-                        DAG.getConstant(C.ICmpType, MVT::i32));
-@@ -1682,12 +2156,142 @@ static SDValue emitSETCC(SelectionDAG &D
-   return Result;
- }
- 
-+// Return the SystemISD vector comparison operation for CC, or 0 if it cannot
-+// be done directly.  IsFP is true if CC is for a floating-point rather than
-+// integer comparison.
-+static unsigned getVectorComparison(ISD::CondCode CC, bool IsFP) {
-+  switch (CC) {
-+  case ISD::SETOEQ:
-+  case ISD::SETEQ:
-+    return IsFP ? SystemZISD::VFCMPE : SystemZISD::VICMPE;
-+
-+  case ISD::SETOGE:
-+  case ISD::SETGE:
-+    return IsFP ? SystemZISD::VFCMPHE : 0;
-+
-+  case ISD::SETOGT:
-+  case ISD::SETGT:
-+    return IsFP ? SystemZISD::VFCMPH : SystemZISD::VICMPH;
-+
-+  case ISD::SETUGT:
-+    return IsFP ? 0 : SystemZISD::VICMPHL;
-+
-+  default:
-+    return 0;
-+  }
-+}
-+
-+// Return the SystemZISD vector comparison operation for CC or its inverse,
-+// or 0 if neither can be done directly.  Indicate in Invert whether the
-+// result is for the inverse of CC.  IsFP is true if CC is for a
-+// floating-point rather than integer comparison.
-+static unsigned getVectorComparisonOrInvert(ISD::CondCode CC, bool IsFP,
-+                                            bool &Invert) {
-+  if (unsigned Opcode = getVectorComparison(CC, IsFP)) {
-+    Invert = false;
-+    return Opcode;
-+  }
-+
-+  CC = ISD::getSetCCInverse(CC, !IsFP);
-+  if (unsigned Opcode = getVectorComparison(CC, IsFP)) {
-+    Invert = true;
-+    return Opcode;
-+  }
-+
-+  return 0;
-+}
-+
-+// Return a v2f64 that contains the extended form of elements Start and Start+1
-+// of v4f32 value Op.
-+static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, SDLoc DL,
-+                                  SDValue Op) {
-+  int Mask[] = { Start, -1, Start + 1, -1 };
-+  Op = DAG.getVectorShuffle(MVT::v4f32, DL, Op, DAG.getUNDEF(MVT::v4f32), Mask);
-+  return DAG.getNode(SystemZISD::VEXTEND, DL, MVT::v2f64, Op);
-+}
-+
-+// Build a comparison of vectors CmpOp0 and CmpOp1 using opcode Opcode,
-+// producing a result of type VT.
-+static SDValue getVectorCmp(SelectionDAG &DAG, unsigned Opcode, SDLoc DL,
-+                            EVT VT, SDValue CmpOp0, SDValue CmpOp1) {
-+  // There is no hardware support for v4f32, so extend the vector into
-+  // two v2f64s and compare those.
-+  if (CmpOp0.getValueType() == MVT::v4f32) {
-+    SDValue H0 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp0);
-+    SDValue L0 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp0);
-+    SDValue H1 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp1);
-+    SDValue L1 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp1);
-+    SDValue HRes = DAG.getNode(Opcode, DL, MVT::v2i64, H0, H1);
-+    SDValue LRes = DAG.getNode(Opcode, DL, MVT::v2i64, L0, L1);
-+    return DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes);
-+  }
-+  return DAG.getNode(Opcode, DL, VT, CmpOp0, CmpOp1);
-+}
-+
-+// Lower a vector comparison of type CC between CmpOp0 and CmpOp1, producing
-+// an integer mask of type VT.
-+static SDValue lowerVectorSETCC(SelectionDAG &DAG, SDLoc DL, EVT VT,
-+                                ISD::CondCode CC, SDValue CmpOp0,
-+                                SDValue CmpOp1) {
-+  bool IsFP = CmpOp0.getValueType().isFloatingPoint();
-+  bool Invert = false;
-+  SDValue Cmp;
-+  switch (CC) {
-+    // Handle tests for order using (or (ogt y x) (oge x y)).
-+  case ISD::SETUO:
-+    Invert = true;
-+  case ISD::SETO: {
-+    assert(IsFP && "Unexpected integer comparison");
-+    SDValue LT = getVectorCmp(DAG, SystemZISD::VFCMPH, DL, VT, CmpOp1, CmpOp0);
-+    SDValue GE = getVectorCmp(DAG, SystemZISD::VFCMPHE, DL, VT, CmpOp0, CmpOp1);
-+    Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GE);
-+    break;
-+  }
-+
-+    // Handle <> tests using (or (ogt y x) (ogt x y)).
-+  case ISD::SETUEQ:
-+    Invert = true;
-+  case ISD::SETONE: {
-+    assert(IsFP && "Unexpected integer comparison");
-+    SDValue LT = getVectorCmp(DAG, SystemZISD::VFCMPH, DL, VT, CmpOp1, CmpOp0);
-+    SDValue GT = getVectorCmp(DAG, SystemZISD::VFCMPH, DL, VT, CmpOp0, CmpOp1);
-+    Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GT);
-+    break;
-+  }
-+
-+    // Otherwise a single comparison is enough.  It doesn't really
-+    // matter whether we try the inversion or the swap first, since
-+    // there are no cases where both work.
-+  default:
-+    if (unsigned Opcode = getVectorComparisonOrInvert(CC, IsFP, Invert))
-+      Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp0, CmpOp1);
-+    else {
-+      CC = ISD::getSetCCSwappedOperands(CC);
-+      if (unsigned Opcode = getVectorComparisonOrInvert(CC, IsFP, Invert))
-+        Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp1, CmpOp0);
-+      else
-+        llvm_unreachable("Unhandled comparison");
-+    }
-+    break;
-+  }
-+  if (Invert) {
-+    SDValue Mask = DAG.getNode(SystemZISD::BYTE_MASK, DL, MVT::v16i8,
-+                               DAG.getConstant(65535, MVT::i32));
-+    Mask = DAG.getNode(ISD::BITCAST, DL, VT, Mask);
-+    Cmp = DAG.getNode(ISD::XOR, DL, VT, Cmp, Mask);
-+  }
-+  return Cmp;
-+}
-+
- SDValue SystemZTargetLowering::lowerSETCC(SDValue Op,
-                                           SelectionDAG &DAG) const {
-   SDValue CmpOp0   = Op.getOperand(0);
-   SDValue CmpOp1   = Op.getOperand(1);
-   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
-   SDLoc DL(Op);
-+  EVT VT = Op.getValueType();
-+  if (VT.isVector())
-+    return lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1);
- 
-   Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC));
-   SDValue Glue = emitCmp(DAG, DL, C);
-@@ -1695,7 +2299,6 @@ SDValue SystemZTargetLowering::lowerSETC
- }
- 
- SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
--  SDValue Chain    = Op.getOperand(0);
-   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
-   SDValue CmpOp0   = Op.getOperand(2);
-   SDValue CmpOp1   = Op.getOperand(3);
-@@ -1705,7 +2308,7 @@ SDValue SystemZTargetLowering::lowerBR_C
-   Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC));
-   SDValue Glue = emitCmp(DAG, DL, C);
-   return DAG.getNode(SystemZISD::BR_CCMASK, DL, Op.getValueType(),
--                     Chain, DAG.getConstant(C.CCValid, MVT::i32),
-+                     Op.getOperand(0), DAG.getConstant(C.CCValid, MVT::i32),
-                      DAG.getConstant(C.CCMask, MVT::i32), Dest, Glue);
- }
- 
-@@ -1831,6 +2434,52 @@ SDValue SystemZTargetLowering::lowerGlob
-   return Result;
- }
- 
-+SDValue SystemZTargetLowering::lowerTLSGetOffset(GlobalAddressSDNode *Node,
-+                                                 SelectionDAG &DAG,
-+                                                 unsigned Opcode,
-+                                                 SDValue GOTOffset) const {
-+  SDLoc DL(Node);
-+  EVT PtrVT = getPointerTy();
-+  SDValue Chain = DAG.getEntryNode();
-+  SDValue Glue;
-+
-+  // __tls_get_offset takes the GOT offset in %r2 and the GOT in %r12.
-+  SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
-+  Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R12D, GOT, Glue);
-+  Glue = Chain.getValue(1);
-+  Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R2D, GOTOffset, Glue);
-+  Glue = Chain.getValue(1);
-+
-+  // The first call operand is the chain and the second is the TLS symbol.
-+  SmallVector<SDValue, 8> Ops;
-+  Ops.push_back(Chain);
-+  Ops.push_back(DAG.getTargetGlobalAddress(Node->getGlobal(), DL,
-+                                           Node->getValueType(0),
-+                                           0, 0));
-+
-+  // Add argument registers to the end of the list so that they are
-+  // known live into the call.
-+  Ops.push_back(DAG.getRegister(SystemZ::R2D, PtrVT));
-+  Ops.push_back(DAG.getRegister(SystemZ::R12D, PtrVT));
-+
-+  // Add a register mask operand representing the call-preserved registers.
-+  const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
-+  const uint32_t *Mask = TRI->getCallPreservedMask(CallingConv::C);
-+  assert(Mask && "Missing call preserved mask for calling convention");
-+  Ops.push_back(DAG.getRegisterMask(Mask));
-+
-+  // Glue the call to the argument copies.
-+  Ops.push_back(Glue);
-+
-+  // Emit the call.
-+  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
-+  Chain = DAG.getNode(Opcode, DL, NodeTys, Ops);
-+  Glue = Chain.getValue(1);
-+
-+  // Copy the return value from %r2.
-+  return DAG.getCopyFromReg(Chain, DL, SystemZ::R2D, PtrVT, Glue);
-+}
-+
- SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
- 						     SelectionDAG &DAG) const {
-   SDLoc DL(Node);
-@@ -1838,9 +2487,6 @@ SDValue SystemZTargetLowering::lowerGlob
-   EVT PtrVT = getPointerTy();
-   TLSModel::Model model = DAG.getTarget().getTLSModel(GV);
- 
--  if (model != TLSModel::LocalExec)
--    llvm_unreachable("only local-exec TLS mode supported");
--
-   // The high part of the thread pointer is in access register 0.
-   SDValue TPHi = DAG.getNode(SystemZISD::EXTRACT_ACCESS, DL, MVT::i32,
-                              DAG.getConstant(0, MVT::i32));
-@@ -1856,15 +2502,79 @@ SDValue SystemZTargetLowering::lowerGlob
- 				    DAG.getConstant(32, PtrVT));
-   SDValue TP = DAG.getNode(ISD::OR, DL, PtrVT, TPHiShifted, TPLo);
- 
--  // Get the offset of GA from the thread pointer.
--  SystemZConstantPoolValue *CPV =
--    SystemZConstantPoolValue::Create(GV, SystemZCP::NTPOFF);
--
--  // Force the offset into the constant pool and load it from there.
--  SDValue CPAddr = DAG.getConstantPool(CPV, PtrVT, 8);
--  SDValue Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(),
--			       CPAddr, MachinePointerInfo::getConstantPool(),
--			       false, false, false, 0);
-+  // Get the offset of GA from the thread pointer, based on the TLS model.
-+  SDValue Offset;
-+  switch (model) {
-+    case TLSModel::GeneralDynamic: {
-+      // Load the GOT offset of the tls_index (module ID / per-symbol offset).
-+      SystemZConstantPoolValue *CPV =
-+        SystemZConstantPoolValue::Create(GV, SystemZCP::TLSGD);
-+
-+      Offset = DAG.getConstantPool(CPV, PtrVT, 8);
-+      Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(),
-+                           Offset, MachinePointerInfo::getConstantPool(),
-+                           false, false, false, 0);
-+
-+      // Call __tls_get_offset to retrieve the offset.
-+      Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_GDCALL, Offset);
-+      break;
-+    }
-+
-+    case TLSModel::LocalDynamic: {
-+      // Load the GOT offset of the module ID.
-+      SystemZConstantPoolValue *CPV =
-+        SystemZConstantPoolValue::Create(GV, SystemZCP::TLSLDM);
-+
-+      Offset = DAG.getConstantPool(CPV, PtrVT, 8);
-+      Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(),
-+                           Offset, MachinePointerInfo::getConstantPool(),
-+                           false, false, false, 0);
-+
-+      // Call __tls_get_offset to retrieve the module base offset.
-+      Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_LDCALL, Offset);
-+
-+      // Note: The SystemZLDCleanupPass will remove redundant computations
-+      // of the module base offset.  Count total number of local-dynamic
-+      // accesses to trigger execution of that pass.
-+      SystemZMachineFunctionInfo* MFI =
-+        DAG.getMachineFunction().getInfo<SystemZMachineFunctionInfo>();
-+      MFI->incNumLocalDynamicTLSAccesses();
-+
-+      // Add the per-symbol offset.
-+      CPV = SystemZConstantPoolValue::Create(GV, SystemZCP::DTPOFF);
-+
-+      SDValue DTPOffset = DAG.getConstantPool(CPV, PtrVT, 8);
-+      DTPOffset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(),
-+                              DTPOffset, MachinePointerInfo::getConstantPool(),
-+                              false, false, false, 0);
-+
-+      Offset = DAG.getNode(ISD::ADD, DL, PtrVT, Offset, DTPOffset);
-+      break;
-+    }
-+
-+    case TLSModel::InitialExec: {
-+      // Load the offset from the GOT.
-+      Offset = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0,
-+                                          SystemZII::MO_INDNTPOFF);
-+      Offset = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Offset);
-+      Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(),
-+                           Offset, MachinePointerInfo::getGOT(),
-+                           false, false, false, 0);
-+      break;
-+    }
-+
-+    case TLSModel::LocalExec: {
-+      // Force the offset into the constant pool and load it from there.
-+      SystemZConstantPoolValue *CPV =
-+        SystemZConstantPoolValue::Create(GV, SystemZCP::NTPOFF);
-+
-+      Offset = DAG.getConstantPool(CPV, PtrVT, 8);
-+      Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(),
-+                           Offset, MachinePointerInfo::getConstantPool(),
-+                           false, false, false, 0);
-+      break;
-+    }
-+  }
- 
-   // Add the base and offset together.
-   return DAG.getNode(ISD::ADD, DL, PtrVT, TP, Offset);
-@@ -1916,6 +2626,13 @@ SDValue SystemZTargetLowering::lowerBITC
-   EVT InVT = In.getValueType();
-   EVT ResVT = Op.getValueType();
- 
-+  // Convert loads directly.  This is normally done by DAGCombiner,
-+  // but we need this case for bitcasts that are created during lowering
-+  // and which are then lowered themselves.
-+  if (auto *LoadN = dyn_cast<LoadSDNode>(In))
-+    return DAG.getLoad(ResVT, DL, LoadN->getChain(), LoadN->getBasePtr(),
-+                       LoadN->getMemOperand());
-+
-   if (InVT == MVT::i32 && ResVT == MVT::f32) {
-     SDValue In64;
-     if (Subtarget.hasHighWord()) {
-@@ -1929,12 +2646,12 @@ SDValue SystemZTargetLowering::lowerBITC
-                          DAG.getConstant(32, MVT::i64));
-     }
-     SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::f64, In64);
--    return DAG.getTargetExtractSubreg(SystemZ::subreg_h32,
-+    return DAG.getTargetExtractSubreg(SystemZ::subreg_r32,
-                                       DL, MVT::f32, Out64);
-   }
-   if (InVT == MVT::f32 && ResVT == MVT::i32) {
-     SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f64);
--    SDValue In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL,
-+    SDValue In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_r32, DL,
-                                              MVT::f64, SDValue(U64, 0), In);
-     SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::i64, In64);
-     if (Subtarget.hasHighWord())
-@@ -2187,6 +2904,80 @@ SDValue SystemZTargetLowering::lowerOR(S
-                                    MVT::i64, HighOp, Low32);
- }
- 
-+SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op,
-+                                          SelectionDAG &DAG) const {
-+  EVT VT = Op.getValueType();
-+  SDLoc DL(Op);
-+  Op = Op.getOperand(0);
-+
-+  // Handle vector types via VPOPCT.
-+  if (VT.isVector()) {
-+    Op = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Op);
-+    Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::v16i8, Op);
-+    switch (VT.getVectorElementType().getSizeInBits()) {
-+    case 8:
-+      break;
-+    case 16: {
-+      Op = DAG.getNode(ISD::BITCAST, DL, VT, Op);
-+      SDValue Shift = DAG.getConstant(8, MVT::i32);
-+      SDValue Tmp = DAG.getNode(SystemZISD::VSHL_BY_SCALAR, DL, VT, Op, Shift);
-+      Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);
-+      Op = DAG.getNode(SystemZISD::VSRL_BY_SCALAR, DL, VT, Op, Shift);
-+      break;
-+    }
-+    case 32: {
-+      SDValue Tmp = DAG.getNode(SystemZISD::BYTE_MASK, DL, MVT::v16i8,
-+                                DAG.getConstant(0, MVT::i32));
-+      Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
-+      break;
-+    }
-+    case 64: {
-+      SDValue Tmp = DAG.getNode(SystemZISD::BYTE_MASK, DL, MVT::v16i8,
-+                                DAG.getConstant(0, MVT::i32));
-+      Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::v4i32, Op, Tmp);
-+      Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp);
-+      break;
-+    }
-+    default:
-+      llvm_unreachable("Unexpected type");
-+    }
-+    return Op;
-+  }
-+
-+  // Get the known-zero mask for the operand.
-+  APInt KnownZero, KnownOne;
-+  DAG.computeKnownBits(Op, KnownZero, KnownOne);
-+  unsigned NumSignificantBits = (~KnownZero).getActiveBits();
-+  if (NumSignificantBits == 0)
-+    return DAG.getConstant(0, VT);
-+
-+  // Skip known-zero high parts of the operand.
-+  int64_t OrigBitSize = VT.getSizeInBits();
-+  int64_t BitSize = (int64_t)1 << Log2_32_Ceil(NumSignificantBits);
-+  BitSize = std::min(BitSize, OrigBitSize);
-+
-+  // The POPCNT instruction counts the number of bits in each byte.
-+  Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op);
-+  Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::i64, Op);
-+  Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
-+
-+  // Add up per-byte counts in a binary tree.  All bits of Op at
-+  // position larger than BitSize remain zero throughout.
-+  for (int64_t I = BitSize / 2; I >= 8; I = I / 2) {
-+    SDValue Tmp = DAG.getNode(ISD::SHL, DL, VT, Op, DAG.getConstant(I, VT));
-+    if (BitSize != OrigBitSize)
-+      Tmp = DAG.getNode(ISD::AND, DL, VT, Tmp,
-+                        DAG.getConstant(((uint64_t)1 << BitSize) - 1, VT));
-+    Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp);
-+  }
-+
-+  // Extract overall result from high byte.
-+  if (BitSize > 8)
-+    Op = DAG.getNode(ISD::SRL, DL, VT, Op, DAG.getConstant(BitSize - 8, VT));
-+
-+  return Op;
-+}
-+
- // Op is an atomic load.  Lower it into a normal volatile load.
- SDValue SystemZTargetLowering::lowerATOMIC_LOAD(SDValue Op,
-                                                 SelectionDAG &DAG) const {
-@@ -2400,6 +3191,1069 @@ SDValue SystemZTargetLowering::lowerPREF
-                                  Node->getMemoryVT(), Node->getMemOperand());
- }
- 
-+// Return an i32 that contains the value of CC immediately after After,
-+// whose final operand must be MVT::Glue.
-+static SDValue getCCResult(SelectionDAG &DAG, SDNode *After) {
-+  SDValue Glue = SDValue(After, After->getNumValues() - 1);
-+  SDValue IPM = DAG.getNode(SystemZISD::IPM, SDLoc(After), MVT::i32, Glue);
-+  return DAG.getNode(ISD::SRL, SDLoc(After), MVT::i32, IPM,
-+                     DAG.getConstant(SystemZ::IPM_CC, MVT::i32));
-+}
-+
-+SDValue
-+SystemZTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op,
-+                                              SelectionDAG &DAG) const {
-+  unsigned Opcode, CCValid;
-+  if (isIntrinsicWithCCAndChain(Op, Opcode, CCValid)) {
-+    assert(Op->getNumValues() == 2 && "Expected only CC result and chain");
-+    SDValue Glued = emitIntrinsicWithChainAndGlue(DAG, Op, Opcode);
-+    SDValue CC = getCCResult(DAG, Glued.getNode());
-+    DAG.ReplaceAllUsesOfValueWith(SDValue(Op.getNode(), 0), CC);
-+    return SDValue();
-+  }
-+
-+  return SDValue();
-+}
-+
-+SDValue
-+SystemZTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
-+                                               SelectionDAG &DAG) const {
-+  unsigned Opcode, CCValid;
-+  if (isIntrinsicWithCC(Op, Opcode, CCValid)) {
-+    SDValue Glued = emitIntrinsicWithGlue(DAG, Op, Opcode);
-+    SDValue CC = getCCResult(DAG, Glued.getNode());
-+    if (Op->getNumValues() == 1)
-+      return CC;
-+    assert(Op->getNumValues() == 2 && "Expected a CC and non-CC result");
-+    return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), Op->getVTList(),
-+		    Glued, CC);
-+  }
-+
-+  unsigned Id = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
-+  switch (Id) {
-+  case Intrinsic::s390_vpdi:
-+    return DAG.getNode(SystemZISD::PERMUTE_DWORDS, SDLoc(Op), Op.getValueType(),
-+                       Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
-+
-+  case Intrinsic::s390_vperm:
-+    return DAG.getNode(SystemZISD::PERMUTE, SDLoc(Op), Op.getValueType(),
-+                       Op.getOperand(1), Op.getOperand(2), Op.getOperand(3));
-+
-+  case Intrinsic::s390_vuphb:
-+  case Intrinsic::s390_vuphh:
-+  case Intrinsic::s390_vuphf:
-+    return DAG.getNode(SystemZISD::UNPACK_HIGH, SDLoc(Op), Op.getValueType(),
-+                       Op.getOperand(1));
-+
-+  case Intrinsic::s390_vuplhb:
-+  case Intrinsic::s390_vuplhh:
-+  case Intrinsic::s390_vuplhf:
-+    return DAG.getNode(SystemZISD::UNPACKL_HIGH, SDLoc(Op), Op.getValueType(),
-+                       Op.getOperand(1));
-+
-+  case Intrinsic::s390_vuplb:
-+  case Intrinsic::s390_vuplhw:
-+  case Intrinsic::s390_vuplf:
-+    return DAG.getNode(SystemZISD::UNPACK_LOW, SDLoc(Op), Op.getValueType(),
-+                       Op.getOperand(1));
-+
-+  case Intrinsic::s390_vupllb:
-+  case Intrinsic::s390_vupllh:
-+  case Intrinsic::s390_vupllf:
-+    return DAG.getNode(SystemZISD::UNPACKL_LOW, SDLoc(Op), Op.getValueType(),
-+                       Op.getOperand(1));
-+
-+  case Intrinsic::s390_vsumb:
-+  case Intrinsic::s390_vsumh:
-+  case Intrinsic::s390_vsumgh:
-+  case Intrinsic::s390_vsumgf:
-+  case Intrinsic::s390_vsumqf:
-+  case Intrinsic::s390_vsumqg:
-+    return DAG.getNode(SystemZISD::VSUM, SDLoc(Op), Op.getValueType(),
-+                       Op.getOperand(1), Op.getOperand(2));
-+  }
-+
-+  return SDValue();
-+}
-+
-+namespace {
-+// Says that SystemZISD operation Opcode can be used to perform the equivalent
-+// of a VPERM with permute vector Bytes.  If Opcode takes three operands,
-+// Operand is the constant third operand, otherwise it is the number of
-+// bytes in each element of the result.
-+struct Permute {
-+  unsigned Opcode;
-+  unsigned Operand;
-+  unsigned char Bytes[SystemZ::VectorBytes];
-+};
-+}
-+
-+static const Permute PermuteForms[] = {
-+  // VMRHG
-+  { SystemZISD::MERGE_HIGH, 8,
-+    { 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23 } },
-+  // VMRHF
-+  { SystemZISD::MERGE_HIGH, 4,
-+    { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
-+  // VMRHH
-+  { SystemZISD::MERGE_HIGH, 2,
-+    { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
-+  // VMRHB
-+  { SystemZISD::MERGE_HIGH, 1,
-+    { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
-+  // VMRLG
-+  { SystemZISD::MERGE_LOW, 8,
-+    { 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31 } },
-+  // VMRLF
-+  { SystemZISD::MERGE_LOW, 4,
-+    { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
-+  // VMRLH
-+  { SystemZISD::MERGE_LOW, 2,
-+    { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
-+  // VMRLB
-+  { SystemZISD::MERGE_LOW, 1,
-+    { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
-+  // VPKG
-+  { SystemZISD::PACK, 4,
-+    { 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 } },
-+  // VPKF
-+  { SystemZISD::PACK, 2,
-+    { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
-+  // VPKH
-+  { SystemZISD::PACK, 1,
-+    { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
-+  // VPDI V1, V2, 4  (low half of V1, high half of V2)
-+  { SystemZISD::PERMUTE_DWORDS, 4,
-+    { 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 } },
-+  // VPDI V1, V2, 1  (high half of V1, low half of V2)
-+  { SystemZISD::PERMUTE_DWORDS, 1,
-+    { 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 } }
-+};
-+
-+// Called after matching a vector shuffle against a particular pattern.
-+// Both the original shuffle and the pattern have two vector operands.
-+// OpNos[0] is the operand of the original shuffle that should be used for
-+// operand 0 of the pattern, or -1 if operand 0 of the pattern can be anything.
-+// OpNos[1] is the same for operand 1 of the pattern.  Resolve these -1s and
-+// set OpNo0 and OpNo1 to the shuffle operands that should actually be used
-+// for operands 0 and 1 of the pattern.
-+static bool chooseShuffleOpNos(int *OpNos, unsigned &OpNo0, unsigned &OpNo1) {
-+  if (OpNos[0] < 0) {
-+    if (OpNos[1] < 0)
-+      return false;
-+    OpNo0 = OpNo1 = OpNos[1];
-+  } else if (OpNos[1] < 0) {
-+    OpNo0 = OpNo1 = OpNos[0];
-+  } else {
-+    OpNo0 = OpNos[0];
-+    OpNo1 = OpNos[1];
-+  }
-+  return true;
-+}
-+
-+// Bytes is a VPERM-like permute vector, except that -1 is used for
-+// undefined bytes.  Return true if the VPERM can be implemented using P.
-+// When returning true set OpNo0 to the VPERM operand that should be
-+// used for operand 0 of P and likewise OpNo1 for operand 1 of P.
-+//
-+// For example, if swapping the VPERM operands allows P to match, OpNo0
-+// will be 1 and OpNo1 will be 0.  If instead Bytes only refers to one
-+// operand, but rewriting it to use two duplicated operands allows it to
-+// match P, then OpNo0 and OpNo1 will be the same.
-+static bool matchPermute(const SmallVectorImpl<int> &Bytes, const Permute &P,
-+                         unsigned &OpNo0, unsigned &OpNo1) {
-+  int OpNos[] = { -1, -1 };
-+  for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) {
-+    int Elt = Bytes[I];
-+    if (Elt >= 0) {
-+      // Make sure that the two permute vectors use the same suboperand
-+      // byte number.  Only the operand numbers (the high bits) are
-+      // allowed to differ.
-+      if ((Elt ^ P.Bytes[I]) & (SystemZ::VectorBytes - 1))
-+        return false;
-+      int ModelOpNo = P.Bytes[I] / SystemZ::VectorBytes;
-+      int RealOpNo = unsigned(Elt) / SystemZ::VectorBytes;
-+      // Make sure that the operand mappings are consistent with previous
-+      // elements.
-+      if (OpNos[ModelOpNo] == 1 - RealOpNo)
-+        return false;
-+      OpNos[ModelOpNo] = RealOpNo;
-+    }
-+  }
-+  return chooseShuffleOpNos(OpNos, OpNo0, OpNo1);
-+}
-+
-+// As above, but search for a matching permute.
-+static const Permute *matchPermute(const SmallVectorImpl<int> &Bytes,
-+                                   unsigned &OpNo0, unsigned &OpNo1) {
-+  for (auto &P : PermuteForms)
-+    if (matchPermute(Bytes, P, OpNo0, OpNo1))
-+      return &P;
-+  return nullptr;
-+}
-+
-+// Bytes is a VPERM-like permute vector, except that -1 is used for
-+// undefined bytes.  This permute is an operand of an outer permute.
-+// See whether redistributing the -1 bytes gives a shuffle that can be
-+// implemented using P.  If so, set Transform to a VPERM-like permute vector
-+// that, when applied to the result of P, gives the original permute in Bytes.
-+static bool matchDoublePermute(const SmallVectorImpl<int> &Bytes,
-+                               const Permute &P,
-+                               SmallVectorImpl<int> &Transform) {
-+  unsigned To = 0;
-+  for (unsigned From = 0; From < SystemZ::VectorBytes; ++From) {
-+    int Elt = Bytes[From];
-+    if (Elt < 0)
-+      // Byte number From of the result is undefined.
-+      Transform[From] = -1;
-+    else {
-+      while (P.Bytes[To] != Elt) {
-+        To += 1;
-+        if (To == SystemZ::VectorBytes)
-+          return false;
-+      }
-+      Transform[From] = To;
-+    }
-+  }
-+  return true;
-+}
-+
-+// As above, but search for a matching permute.
-+static const Permute *matchDoublePermute(const SmallVectorImpl<int> &Bytes,
-+                                         SmallVectorImpl<int> &Transform) {
-+  for (auto &P : PermuteForms)
-+    if (matchDoublePermute(Bytes, P, Transform))
-+      return &P;
-+  return nullptr;
-+}
-+
-+// Convert the mask of the given VECTOR_SHUFFLE into a byte-level mask,
-+// as if it had type vNi8.
-+static void getVPermMask(ShuffleVectorSDNode *VSN,
-+                         SmallVectorImpl<int> &Bytes) {
-+  EVT VT = VSN->getValueType(0);
-+  unsigned NumElements = VT.getVectorNumElements();
-+  unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
-+  Bytes.resize(NumElements * BytesPerElement, -1);
-+  for (unsigned I = 0; I < NumElements; ++I) {
-+    int Index = VSN->getMaskElt(I);
-+    if (Index >= 0)
-+      for (unsigned J = 0; J < BytesPerElement; ++J)
-+        Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J;
-+  }
-+}
-+
-+// Bytes is a VPERM-like permute vector, except that -1 is used for
-+// undefined bytes.  See whether bytes [Start, Start + BytesPerElement) of
-+// the result come from a contiguous sequence of bytes from one input.
-+// Set Base to the selector for the first byte if so.
-+static bool getShuffleInput(const SmallVectorImpl<int> &Bytes, unsigned Start,
-+                            unsigned BytesPerElement, int &Base) {
-+  Base = -1;
-+  for (unsigned I = 0; I < BytesPerElement; ++I) {
-+    if (Bytes[Start + I] >= 0) {
-+      unsigned Elem = Bytes[Start + I];
-+      if (Base < 0) {
-+        Base = Elem - I;
-+        // Make sure the bytes would come from one input operand.
-+        if (unsigned(Base) % Bytes.size() + BytesPerElement > Bytes.size())
-+          return false;
-+      } else if (unsigned(Base) != Elem - I)
-+        return false;
-+    }
-+  }
-+  return true;
-+}
-+
-+// Bytes is a VPERM-like permute vector, except that -1 is used for
-+// undefined bytes.  Return true if it can be performed using VSLDI.
-+// When returning true, set StartIndex to the shift amount and OpNo0
-+// and OpNo1 to the VPERM operands that should be used as the first
-+// and second shift operand respectively.
-+static bool isShlDoublePermute(const SmallVectorImpl<int> &Bytes,
-+                               unsigned &StartIndex, unsigned &OpNo0,
-+                               unsigned &OpNo1) {
-+  int OpNos[] = { -1, -1 };
-+  int Shift = -1;
-+  for (unsigned I = 0; I < 16; ++I) {
-+    int Index = Bytes[I];
-+    if (Index >= 0) {
-+      int ExpectedShift = (Index - I) % SystemZ::VectorBytes;
-+      int ModelOpNo = unsigned(ExpectedShift + I) / SystemZ::VectorBytes;
-+      int RealOpNo = unsigned(Index) / SystemZ::VectorBytes;
-+      if (Shift < 0)
-+        Shift = ExpectedShift;
-+      else if (Shift != ExpectedShift)
-+        return false;
-+      // Make sure that the operand mappings are consistent with previous
-+      // elements.
-+      if (OpNos[ModelOpNo] == 1 - RealOpNo)
-+        return false;
-+      OpNos[ModelOpNo] = RealOpNo;
-+    }
-+  }
-+  StartIndex = Shift;
-+  return chooseShuffleOpNos(OpNos, OpNo0, OpNo1);
-+}
-+
-+// Create a node that performs P on operands Op0 and Op1, casting the
-+// operands to the appropriate type.  The type of the result is determined by P.
-+static SDValue getPermuteNode(SelectionDAG &DAG, SDLoc DL,
-+                              const Permute &P, SDValue Op0, SDValue Op1) {
-+  // VPDI (PERMUTE_DWORDS) always operates on v2i64s.  The input
-+  // elements of a PACK are twice as wide as the outputs.
-+  unsigned InBytes = (P.Opcode == SystemZISD::PERMUTE_DWORDS ? 8 :
-+                      P.Opcode == SystemZISD::PACK ? P.Operand * 2 :
-+                      P.Operand);
-+  // Cast both operands to the appropriate type.
-+  MVT InVT = MVT::getVectorVT(MVT::getIntegerVT(InBytes * 8),
-+                              SystemZ::VectorBytes / InBytes);
-+  Op0 = DAG.getNode(ISD::BITCAST, DL, InVT, Op0);
-+  Op1 = DAG.getNode(ISD::BITCAST, DL, InVT, Op1);
-+  SDValue Op;
-+  if (P.Opcode == SystemZISD::PERMUTE_DWORDS) {
-+    SDValue Op2 = DAG.getConstant(P.Operand, MVT::i32);
-+    Op = DAG.getNode(SystemZISD::PERMUTE_DWORDS, DL, InVT, Op0, Op1, Op2);
-+  } else if (P.Opcode == SystemZISD::PACK) {
-+    MVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(P.Operand * 8),
-+                                 SystemZ::VectorBytes / P.Operand);
-+    Op = DAG.getNode(SystemZISD::PACK, DL, OutVT, Op0, Op1);
-+  } else {
-+    Op = DAG.getNode(P.Opcode, DL, InVT, Op0, Op1);
-+  }
-+  return Op;
-+}
-+
-+// Bytes is a VPERM-like permute vector, except that -1 is used for
-+// undefined bytes.  Implement it on operands Ops[0] and Ops[1] using
-+// VSLDI or VPERM.
-+static SDValue getGeneralPermuteNode(SelectionDAG &DAG, SDLoc DL, SDValue *Ops,
-+                                     const SmallVectorImpl<int> &Bytes) {
-+  for (unsigned I = 0; I < 2; ++I)
-+    Ops[I] = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Ops[I]);
-+
-+  // First see whether VSLDI can be used.
-+  unsigned StartIndex, OpNo0, OpNo1;
-+  if (isShlDoublePermute(Bytes, StartIndex, OpNo0, OpNo1))
-+    return DAG.getNode(SystemZISD::SHL_DOUBLE, DL, MVT::v16i8, Ops[OpNo0],
-+                       Ops[OpNo1], DAG.getConstant(StartIndex, MVT::i32));
-+
-+  // Fall back on VPERM.  Construct an SDNode for the permute vector.
-+  SDValue IndexNodes[SystemZ::VectorBytes];
-+  for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
-+    if (Bytes[I] >= 0)
-+      IndexNodes[I] = DAG.getConstant(Bytes[I], MVT::i32);
-+    else
-+      IndexNodes[I] = DAG.getUNDEF(MVT::i32);
-+  SDValue Op2 = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v16i8, IndexNodes);
-+  return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Ops[0], Ops[1], Op2);
-+}
-+
-+namespace {
-+// Describes a general N-operand vector shuffle.
-+struct GeneralShuffle {
-+  GeneralShuffle(EVT vt) : VT(vt) {}
-+  void addUndef();
-+  void add(SDValue, unsigned);
-+  SDValue getNode(SelectionDAG &, SDLoc);
-+
-+  // The operands of the shuffle.
-+  SmallVector<SDValue, SystemZ::VectorBytes> Ops;
-+
-+  // Index I is -1 if byte I of the result is undefined.  Otherwise the
-+  // result comes from byte Bytes[I] % SystemZ::VectorBytes of operand
-+  // Bytes[I] / SystemZ::VectorBytes.
-+  SmallVector<int, SystemZ::VectorBytes> Bytes;
-+
-+  // The type of the shuffle result.
-+  EVT VT;
-+};
-+}
-+
-+// Add an extra undefined element to the shuffle.
-+void GeneralShuffle::addUndef() {
-+  unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
-+  for (unsigned I = 0; I < BytesPerElement; ++I)
-+    Bytes.push_back(-1);
-+}
-+
-+// Add an extra element to the shuffle, taking it from element Elem of Op.
-+// A null Op indicates a vector input whose value will be calculated later;
-+// there is at most one such input per shuffle and it always has the same
-+// type as the result.
-+void GeneralShuffle::add(SDValue Op, unsigned Elem) {
-+  unsigned BytesPerElement = VT.getVectorElementType().getStoreSize();
-+
-+  // The source vector can have wider elements than the result,
-+  // either through an explicit TRUNCATE or because of type legalization.
-+  // We want the least significant part.
-+  EVT FromVT = Op.getNode() ? Op.getValueType() : VT;
-+  unsigned FromBytesPerElement = FromVT.getVectorElementType().getStoreSize();
-+  assert(FromBytesPerElement >= BytesPerElement &&
-+         "Invalid EXTRACT_VECTOR_ELT");
-+  unsigned Byte = ((Elem * FromBytesPerElement) % SystemZ::VectorBytes +
-+                   (FromBytesPerElement - BytesPerElement));
-+
-+  // Look through things like shuffles and bitcasts.
-+  while (Op.getNode()) {
-+    if (Op.getOpcode() == ISD::BITCAST)
-+      Op = Op.getOperand(0);
-+    else if (Op.getOpcode() == ISD::VECTOR_SHUFFLE && Op.hasOneUse()) {
-+      // See whether the bytes we need come from a contiguous part of one
-+      // operand.
-+      SmallVector<int, SystemZ::VectorBytes> OpBytes;
-+      getVPermMask(cast<ShuffleVectorSDNode>(Op), OpBytes);
-+      int NewByte;
-+      if (!getShuffleInput(OpBytes, Byte, BytesPerElement, NewByte))
-+        break;
-+      if (NewByte < 0) {
-+        addUndef();
-+        return;
-+      }
-+      Op = Op.getOperand(unsigned(NewByte) / SystemZ::VectorBytes);
-+      Byte = unsigned(NewByte) % SystemZ::VectorBytes;
-+    } else if (Op.getOpcode() == ISD::UNDEF) {
-+      addUndef();
-+      return;
-+    } else
-+      break;
-+  }
-+
-+  // Make sure that the source of the extraction is in Ops.
-+  unsigned OpNo = 0;
-+  for (; OpNo < Ops.size(); ++OpNo)
-+    if (Ops[OpNo] == Op)
-+      break;
-+  if (OpNo == Ops.size())
-+    Ops.push_back(Op);
-+
-+  // Add the element to Bytes.
-+  unsigned Base = OpNo * SystemZ::VectorBytes + Byte;
-+  for (unsigned I = 0; I < BytesPerElement; ++I)
-+    Bytes.push_back(Base + I);
-+}
-+
-+// Return SDNodes for the completed shuffle.
-+SDValue GeneralShuffle::getNode(SelectionDAG &DAG, SDLoc DL) {
-+  assert(Bytes.size() == SystemZ::VectorBytes && "Incomplete vector");
-+
-+  if (Ops.size() == 0)
-+    return DAG.getUNDEF(VT);
-+
-+  // Make sure that there are at least two shuffle operands.
-+  if (Ops.size() == 1)
-+    Ops.push_back(DAG.getUNDEF(MVT::v16i8));
-+
-+  // Create a tree of shuffles, deferring root node until after the loop.
-+  // Try to redistribute the undefined elements of non-root nodes so that
-+  // the non-root shuffles match something like a pack or merge, then adjust
-+  // the parent node's permute vector to compensate for the new order.
-+  // Among other things, this copes with vectors like <2 x i16> that were
-+  // padded with undefined elements during type legalization.
-+  //
-+  // In the best case this redistribution will lead to the whole tree
-+  // using packs and merges.  It should rarely be a loss in other cases.
-+  unsigned Stride = 1;
-+  for (; Stride * 2 < Ops.size(); Stride *= 2) {
-+    for (unsigned I = 0; I < Ops.size() - Stride; I += Stride * 2) {
-+      SDValue SubOps[] = { Ops[I], Ops[I + Stride] };
-+
-+      // Create a mask for just these two operands.
-+      SmallVector<int, SystemZ::VectorBytes> NewBytes(SystemZ::VectorBytes);
-+      for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) {
-+        unsigned OpNo = unsigned(Bytes[J]) / SystemZ::VectorBytes;
-+        unsigned Byte = unsigned(Bytes[J]) % SystemZ::VectorBytes;
-+        if (OpNo == I)
-+          NewBytes[J] = Byte;
-+        else if (OpNo == I + Stride)
-+          NewBytes[J] = SystemZ::VectorBytes + Byte;
-+        else
-+          NewBytes[J] = -1;
-+      }
-+      // See if it would be better to reorganize NewMask to avoid using VPERM.
-+      SmallVector<int, SystemZ::VectorBytes> NewBytesMap(SystemZ::VectorBytes);
-+      if (const Permute *P = matchDoublePermute(NewBytes, NewBytesMap)) {
-+        Ops[I] = getPermuteNode(DAG, DL, *P, SubOps[0], SubOps[1]);
-+        // Applying NewBytesMap to Ops[I] gets back to NewBytes.
-+        for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) {
-+          if (NewBytes[J] >= 0) {
-+            assert(unsigned(NewBytesMap[J]) < SystemZ::VectorBytes &&
-+                   "Invalid double permute");
-+            Bytes[J] = I * SystemZ::VectorBytes + NewBytesMap[J];
-+          } else
-+            assert(NewBytesMap[J] < 0 && "Invalid double permute");
-+        }
-+      } else {
-+        // Just use NewBytes on the operands.
-+        Ops[I] = getGeneralPermuteNode(DAG, DL, SubOps, NewBytes);
-+        for (unsigned J = 0; J < SystemZ::VectorBytes; ++J)
-+          if (NewBytes[J] >= 0)
-+            Bytes[J] = I * SystemZ::VectorBytes + J;
-+      }
-+    }
-+  }
-+
-+  // Now we just have 2 inputs.  Put the second operand in Ops[1].
-+  if (Stride > 1) {
-+    Ops[1] = Ops[Stride];
-+    for (unsigned I = 0; I < SystemZ::VectorBytes; ++I)
-+      if (Bytes[I] >= int(SystemZ::VectorBytes))
-+        Bytes[I] -= (Stride - 1) * SystemZ::VectorBytes;
-+  }
-+
-+  // Look for an instruction that can do the permute without resorting
-+  // to VPERM.
-+  unsigned OpNo0, OpNo1;
-+  SDValue Op;
-+  if (const Permute *P = matchPermute(Bytes, OpNo0, OpNo1))
-+    Op = getPermuteNode(DAG, DL, *P, Ops[OpNo0], Ops[OpNo1]);
-+  else
-+    Op = getGeneralPermuteNode(DAG, DL, &Ops[0], Bytes);
-+  return DAG.getNode(ISD::BITCAST, DL, VT, Op);
-+}
-+
-+// Return true if the given BUILD_VECTOR is a scalar-to-vector conversion.
-+static bool isScalarToVector(SDValue Op) {
-+  for (unsigned I = 1, E = Op.getNumOperands(); I != E; ++I)
-+    if (Op.getOperand(I).getOpcode() != ISD::UNDEF)
-+      return false;
-+  return true;
-+}
-+
-+// Return a vector of type VT that contains Value in the first element.
-+// The other elements don't matter.
-+static SDValue buildScalarToVector(SelectionDAG &DAG, SDLoc DL, EVT VT,
-+                                   SDValue Value) {
-+  // If we have a constant, replicate it to all elements and let the
-+  // BUILD_VECTOR lowering take care of it.
-+  if (Value.getOpcode() == ISD::Constant ||
-+      Value.getOpcode() == ISD::ConstantFP) {
-+    SmallVector<SDValue, 16> Ops(VT.getVectorNumElements(), Value);
-+    return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Ops);
-+  }
-+  if (Value.getOpcode() == ISD::UNDEF)
-+    return DAG.getUNDEF(VT);
-+  return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Value);
-+}
-+
-+// Return a vector of type VT in which Op0 is in element 0 and Op1 is in
-+// element 1.  Used for cases in which replication is cheap.
-+static SDValue buildMergeScalars(SelectionDAG &DAG, SDLoc DL, EVT VT,
-+                                 SDValue Op0, SDValue Op1) {
-+  if (Op0.getOpcode() == ISD::UNDEF) {
-+    if (Op1.getOpcode() == ISD::UNDEF)
-+      return DAG.getUNDEF(VT);
-+    return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op1);
-+  }
-+  if (Op1.getOpcode() == ISD::UNDEF)
-+    return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0);
-+  return DAG.getNode(SystemZISD::MERGE_HIGH, DL, VT,
-+                     buildScalarToVector(DAG, DL, VT, Op0),
-+                     buildScalarToVector(DAG, DL, VT, Op1));
-+}
-+
-+// Extend GPR scalars Op0 and Op1 to doublewords and return a v2i64
-+// vector for them.
-+static SDValue joinDwords(SelectionDAG &DAG, SDLoc DL, SDValue Op0,
-+                          SDValue Op1) {
-+  if (Op0.getOpcode() == ISD::UNDEF && Op1.getOpcode() == ISD::UNDEF)
-+    return DAG.getUNDEF(MVT::v2i64);
-+  // If one of the two inputs is undefined then replicate the other one,
-+  // in order to avoid using another register unnecessarily.
-+  if (Op0.getOpcode() == ISD::UNDEF)
-+    Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1);
-+  else if (Op1.getOpcode() == ISD::UNDEF)
-+    Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
-+  else {
-+    Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
-+    Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1);
-+  }
-+  return DAG.getNode(SystemZISD::JOIN_DWORDS, DL, MVT::v2i64, Op0, Op1);
-+}
-+
-+// Try to represent constant BUILD_VECTOR node BVN using a
-+// SystemZISD::BYTE_MASK-style mask.  Store the mask value in Mask
-+// on success.
-+static bool tryBuildVectorByteMask(BuildVectorSDNode *BVN, uint64_t &Mask) {
-+  EVT ElemVT = BVN->getValueType(0).getVectorElementType();
-+  unsigned BytesPerElement = ElemVT.getStoreSize();
-+  for (unsigned I = 0, E = BVN->getNumOperands(); I != E; ++I) {
-+    SDValue Op = BVN->getOperand(I);
-+    if (Op.getOpcode() != ISD::UNDEF) {
-+      uint64_t Value;
-+      if (Op.getOpcode() == ISD::Constant)
-+        Value = dyn_cast<ConstantSDNode>(Op)->getZExtValue();
-+      else if (Op.getOpcode() == ISD::ConstantFP)
-+        Value = (dyn_cast<ConstantFPSDNode>(Op)->getValueAPF().bitcastToAPInt()
-+                 .getZExtValue());
-+      else
-+        return false;
-+      for (unsigned J = 0; J < BytesPerElement; ++J) {
-+        uint64_t Byte = (Value >> (J * 8)) & 0xff;
-+        if (Byte == 0xff)
-+          Mask |= 1 << ((E - I - 1) * BytesPerElement + J);
-+        else if (Byte != 0)
-+          return false;
-+      }
-+    }
-+  }
-+  return true;
-+}
-+
-+// Try to load a vector constant in which BitsPerElement-bit value Value
-+// is replicated to fill the vector.  VT is the type of the resulting
-+// constant, which may have elements of a different size from BitsPerElement.
-+// Return the SDValue of the constant on success, otherwise return
-+// an empty value.
-+static SDValue tryBuildVectorReplicate(SelectionDAG &DAG,
-+                                       const SystemZInstrInfo *TII,
-+                                       SDLoc DL, EVT VT, uint64_t Value,
-+                                       unsigned BitsPerElement) {
-+  // Signed 16-bit values can be replicated using VREPI.
-+  int64_t SignedValue = SignExtend64(Value, BitsPerElement);
-+  if (isInt<16>(SignedValue)) {
-+    MVT VecVT = MVT::getVectorVT(MVT::getIntegerVT(BitsPerElement),
-+                                 SystemZ::VectorBits / BitsPerElement);
-+    SDValue Op = DAG.getNode(SystemZISD::REPLICATE, DL, VecVT,
-+                             DAG.getConstant(SignedValue, MVT::i32));
-+    return DAG.getNode(ISD::BITCAST, DL, VT, Op);
-+  }
-+  // See whether rotating the constant left some N places gives a value that
-+  // is one less than a power of 2 (i.e. all zeros followed by all ones).
-+  // If so we can use VGM.
-+  unsigned Start, End;
-+  if (TII->isRxSBGMask(Value, BitsPerElement, Start, End)) {
-+    // isRxSBGMask returns the bit numbers for a full 64-bit value,
-+    // with 0 denoting 1 << 63 and 63 denoting 1.  Convert them to
-+    // bit numbers for an BitsPerElement value, so that 0 denotes
-+    // 1 << (BitsPerElement-1).
-+    Start -= 64 - BitsPerElement;
-+    End -= 64 - BitsPerElement;
-+    MVT VecVT = MVT::getVectorVT(MVT::getIntegerVT(BitsPerElement),
-+                                 SystemZ::VectorBits / BitsPerElement);
-+    SDValue Op = DAG.getNode(SystemZISD::ROTATE_MASK, DL, VecVT,
-+                             DAG.getConstant(Start, MVT::i32),
-+                             DAG.getConstant(End, MVT::i32));
-+    return DAG.getNode(ISD::BITCAST, DL, VT, Op);
-+  }
-+  return SDValue();
-+}
-+
-+// If a BUILD_VECTOR contains some EXTRACT_VECTOR_ELTs, it's usually
-+// better to use VECTOR_SHUFFLEs on them, only using BUILD_VECTOR for
-+// the non-EXTRACT_VECTOR_ELT elements.  See if the given BUILD_VECTOR
-+// would benefit from this representation and return it if so.
-+static SDValue tryBuildVectorShuffle(SelectionDAG &DAG,
-+                                     BuildVectorSDNode *BVN) {
-+  EVT VT = BVN->getValueType(0);
-+  unsigned NumElements = VT.getVectorNumElements();
-+
-+  // Represent the BUILD_VECTOR as an N-operand VECTOR_SHUFFLE-like operation
-+  // on byte vectors.  If there are non-EXTRACT_VECTOR_ELT elements that still
-+  // need a BUILD_VECTOR, add an additional placeholder operand for that
-+  // BUILD_VECTOR and store its operands in ResidueOps.
-+  GeneralShuffle GS(VT);
-+  SmallVector<SDValue, SystemZ::VectorBytes> ResidueOps;
-+  bool FoundOne = false;
-+  for (unsigned I = 0; I < NumElements; ++I) {
-+    SDValue Op = BVN->getOperand(I);
-+    if (Op.getOpcode() == ISD::TRUNCATE)
-+      Op = Op.getOperand(0);
-+    if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
-+        Op.getOperand(1).getOpcode() == ISD::Constant) {
-+      unsigned Elem = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
-+      GS.add(Op.getOperand(0), Elem);
-+      FoundOne = true;
-+    } else if (Op.getOpcode() == ISD::UNDEF) {
-+      GS.addUndef();
-+    } else {
-+      GS.add(SDValue(), ResidueOps.size());
-+      ResidueOps.push_back(Op);
-+    }
-+  }
-+
-+  // Nothing to do if there are no EXTRACT_VECTOR_ELTs.
-+  if (!FoundOne)
-+    return SDValue();
-+
-+  // Create the BUILD_VECTOR for the remaining elements, if any.
-+  if (!ResidueOps.empty()) {
-+    while (ResidueOps.size() < NumElements)
-+      ResidueOps.push_back(DAG.getUNDEF(VT.getVectorElementType()));
-+    for (auto &Op : GS.Ops) {
-+      if (!Op.getNode()) {
-+        Op = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BVN), VT, ResidueOps);
-+        break;
-+      }
-+    }
-+  }
-+  return GS.getNode(DAG, SDLoc(BVN));
-+}
-+
-+// Combine GPR scalar values Elems into a vector of type VT.
-+static SDValue buildVector(SelectionDAG &DAG, SDLoc DL, EVT VT,
-+                           SmallVectorImpl<SDValue> &Elems) {
-+  // See whether there is a single replicated value.
-+  SDValue Single;
-+  unsigned int NumElements = Elems.size();
-+  unsigned int Count = 0;
-+  for (auto Elem : Elems) {
-+    if (Elem.getOpcode() != ISD::UNDEF) {
-+      if (!Single.getNode())
-+        Single = Elem;
-+      else if (Elem != Single) {
-+        Single = SDValue();
-+        break;
-+      }
-+      Count += 1;
-+    }
-+  }
-+  // There are three cases here:
-+  //
-+  // - if the only defined element is a loaded one, the best sequence
-+  //   is a replicating load.
-+  //
-+  // - otherwise, if the only defined element is an i64 value, we will
-+  //   end up with the same VLVGP sequence regardless of whether we short-cut
-+  //   for replication or fall through to the later code.
-+  //
-+  // - otherwise, if the only defined element is an i32 or smaller value,
-+  //   we would need 2 instructions to replicate it: VLVGP followed by VREPx.
-+  //   This is only a win if the single defined element is used more than once.
-+  //   In other cases we're better off using a single VLVGx.
-+  if (Single.getNode() && (Count > 1 || Single.getOpcode() == ISD::LOAD))
-+    return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Single);
-+
-+  // The best way of building a v2i64 from two i64s is to use VLVGP.
-+  if (VT == MVT::v2i64)
-+    return joinDwords(DAG, DL, Elems[0], Elems[1]);
-+
-+  // Use a 64-bit merge high to combine two doubles.
-+  if (VT == MVT::v2f64)
-+    return buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]);
-+
-+  // Build v4f32 values directly from the FPRs:
-+  //
-+  //   <Axxx> <Bxxx> <Cxxxx> <Dxxx>
-+  //         V              V         VMRHF
-+  //      <ABxx>         <CDxx>
-+  //                V                 VMRHG
-+  //              <ABCD>
-+  if (VT == MVT::v4f32) {
-+    SDValue Op01 = buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]);
-+    SDValue Op23 = buildMergeScalars(DAG, DL, VT, Elems[2], Elems[3]);
-+    // Avoid unnecessary undefs by reusing the other operand.
-+    if (Op01.getOpcode() == ISD::UNDEF)
-+      Op01 = Op23;
-+    else if (Op23.getOpcode() == ISD::UNDEF)
-+      Op23 = Op01;
-+    // Merging identical replications is a no-op.
-+    if (Op01.getOpcode() == SystemZISD::REPLICATE && Op01 == Op23)
-+      return Op01;
-+    Op01 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op01);
-+    Op23 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op23);
-+    SDValue Op = DAG.getNode(SystemZISD::MERGE_HIGH,
-+                             DL, MVT::v2i64, Op01, Op23);
-+    return DAG.getNode(ISD::BITCAST, DL, VT, Op);
-+  }
-+
-+  // Collect the constant terms.
-+  SmallVector<SDValue, SystemZ::VectorBytes> Constants(NumElements, SDValue());
-+  SmallVector<bool, SystemZ::VectorBytes> Done(NumElements, false);
-+
-+  unsigned NumConstants = 0;
-+  for (unsigned I = 0; I < NumElements; ++I) {
-+    SDValue Elem = Elems[I];
-+    if (Elem.getOpcode() == ISD::Constant ||
-+        Elem.getOpcode() == ISD::ConstantFP) {
-+      NumConstants += 1;
-+      Constants[I] = Elem;
-+      Done[I] = true;
-+    }
-+  }
-+  // If there was at least one constant, fill in the other elements of
-+  // Constants with undefs to get a full vector constant and use that
-+  // as the starting point.
-+  SDValue Result;
-+  if (NumConstants > 0) {
-+    for (unsigned I = 0; I < NumElements; ++I)
-+      if (!Constants[I].getNode())
-+        Constants[I] = DAG.getUNDEF(Elems[I].getValueType());
-+    Result = DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Constants);
-+  } else {
-+    // Otherwise try to use VLVGP to start the sequence in order to
-+    // avoid a false dependency on any previous contents of the vector
-+    // register.  This only makes sense if one of the associated elements
-+    // is defined.
-+    unsigned I1 = NumElements / 2 - 1;
-+    unsigned I2 = NumElements - 1;
-+    bool Def1 = (Elems[I1].getOpcode() != ISD::UNDEF);
-+    bool Def2 = (Elems[I2].getOpcode() != ISD::UNDEF);
-+    if (Def1 || Def2) {
-+      SDValue Elem1 = Elems[Def1 ? I1 : I2];
-+      SDValue Elem2 = Elems[Def2 ? I2 : I1];
-+      Result = DAG.getNode(ISD::BITCAST, DL, VT,
-+                           joinDwords(DAG, DL, Elem1, Elem2));
-+      Done[I1] = true;
-+      Done[I2] = true;
-+    } else
-+      Result = DAG.getUNDEF(VT);
-+  }
-+
-+  // Use VLVGx to insert the other elements.
-+  for (unsigned I = 0; I < NumElements; ++I)
-+    if (!Done[I] && Elems[I].getOpcode() != ISD::UNDEF)
-+      Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Result, Elems[I],
-+                           DAG.getConstant(I, MVT::i32));
-+  return Result;
-+}
-+
-+SDValue SystemZTargetLowering::lowerBUILD_VECTOR(SDValue Op,
-+                                                 SelectionDAG &DAG) const {
-+  const SystemZInstrInfo *TII =
-+    static_cast<const SystemZInstrInfo *>(Subtarget.getInstrInfo());
-+  auto *BVN = cast<BuildVectorSDNode>(Op.getNode());
-+  SDLoc DL(Op);
-+  EVT VT = Op.getValueType();
-+
-+  if (BVN->isConstant()) {
-+    // Try using VECTOR GENERATE BYTE MASK.  This is the architecturally-
-+    // preferred way of creating all-zero and all-one vectors so give it
-+    // priority over other methods below.
-+    uint64_t Mask = 0;
-+    if (tryBuildVectorByteMask(BVN, Mask)) {
-+      SDValue Op = DAG.getNode(SystemZISD::BYTE_MASK, DL, MVT::v16i8,
-+                               DAG.getConstant(Mask, MVT::i32));
-+      return DAG.getNode(ISD::BITCAST, DL, VT, Op);
-+    }
-+
-+    // Try using some form of replication.
-+    APInt SplatBits, SplatUndef;
-+    unsigned SplatBitSize;
-+    bool HasAnyUndefs;
-+    if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs,
-+                             8, true) &&
-+        SplatBitSize <= 64) {
-+      // First try assuming that any undefined bits above the highest set bit
-+      // and below the lowest set bit are 1s.  This increases the likelihood of
-+      // being able to use a sign-extended element value in VECTOR REPLICATE
-+      // IMMEDIATE or a wraparound mask in VECTOR GENERATE MASK.
-+      uint64_t SplatBitsZ = SplatBits.getZExtValue();
-+      uint64_t SplatUndefZ = SplatUndef.getZExtValue();
-+      uint64_t Lower = (SplatUndefZ
-+                        & ((uint64_t(1) << findFirstSet(SplatBitsZ)) - 1));
-+      uint64_t Upper = (SplatUndefZ
-+                        & ~((uint64_t(1) << findLastSet(SplatBitsZ)) - 1));
-+      uint64_t Value = SplatBitsZ | Upper | Lower;
-+      SDValue Op = tryBuildVectorReplicate(DAG, TII, DL, VT, Value,
-+                                           SplatBitSize);
-+      if (Op.getNode())
-+        return Op;
-+
-+      // Now try assuming that any undefined bits between the first and
-+      // last defined set bits are set.  This increases the chances of
-+      // using a non-wraparound mask.
-+      uint64_t Middle = SplatUndefZ & ~Upper & ~Lower;
-+      Value = SplatBitsZ | Middle;
-+      Op = tryBuildVectorReplicate(DAG, TII, DL, VT, Value, SplatBitSize);
-+      if (Op.getNode())
-+        return Op;
-+    }
-+
-+    // Fall back to loading it from memory.
-+    return SDValue();
-+  }
-+
-+  // See if we should use shuffles to construct the vector from other vectors.
-+  SDValue Res = tryBuildVectorShuffle(DAG, BVN);
-+  if (Res.getNode())
-+    return Res;
-+
-+  // Detect SCALAR_TO_VECTOR conversions.
-+  if (isOperationLegal(ISD::SCALAR_TO_VECTOR, VT) && isScalarToVector(Op))
-+    return buildScalarToVector(DAG, DL, VT, Op.getOperand(0));
-+
-+  // Otherwise use buildVector to build the vector up from GPRs.
-+  unsigned NumElements = Op.getNumOperands();
-+  SmallVector<SDValue, SystemZ::VectorBytes> Ops(NumElements);
-+  for (unsigned I = 0; I < NumElements; ++I)
-+    Ops[I] = Op.getOperand(I);
-+  return buildVector(DAG, DL, VT, Ops);
-+}
-+
-+SDValue SystemZTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op,
-+                                                   SelectionDAG &DAG) const {
-+  auto *VSN = cast<ShuffleVectorSDNode>(Op.getNode());
-+  SDLoc DL(Op);
-+  EVT VT = Op.getValueType();
-+  unsigned NumElements = VT.getVectorNumElements();
-+
-+  if (VSN->isSplat()) {
-+    SDValue Op0 = Op.getOperand(0);
-+    unsigned Index = VSN->getSplatIndex();
-+    assert(Index < VT.getVectorNumElements() &&
-+           "Splat index should be defined and in first operand");
-+    // See whether the value we're splatting is directly available as a scalar.
-+    if ((Index == 0 && Op0.getOpcode() == ISD::SCALAR_TO_VECTOR) ||
-+        Op0.getOpcode() == ISD::BUILD_VECTOR)
-+      return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0.getOperand(Index));
-+    // Otherwise keep it as a vector-to-vector operation.
-+    return DAG.getNode(SystemZISD::SPLAT, DL, VT, Op.getOperand(0),
-+                       DAG.getConstant(Index, MVT::i32));
-+  }
-+
-+  GeneralShuffle GS(VT);
-+  for (unsigned I = 0; I < NumElements; ++I) {
-+    int Elt = VSN->getMaskElt(I);
-+    if (Elt < 0)
-+      GS.addUndef();
-+    else
-+      GS.add(Op.getOperand(unsigned(Elt) / NumElements),
-+             unsigned(Elt) % NumElements);
-+  }
-+  return GS.getNode(DAG, SDLoc(VSN));
-+}
-+
-+SDValue SystemZTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op,
-+                                                     SelectionDAG &DAG) const {
-+  SDLoc DL(Op);
-+  // Just insert the scalar into element 0 of an undefined vector.
-+  return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL,
-+                     Op.getValueType(), DAG.getUNDEF(Op.getValueType()),
-+                     Op.getOperand(0), DAG.getConstant(0, MVT::i32));
-+}
-+
-+SDValue SystemZTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
-+                                                      SelectionDAG &DAG) const {
-+  // Handle insertions of floating-point values.
-+  SDLoc DL(Op);
-+  SDValue Op0 = Op.getOperand(0);
-+  SDValue Op1 = Op.getOperand(1);
-+  SDValue Op2 = Op.getOperand(2);
-+  EVT VT = Op.getValueType();
-+
-+  // Insertions into constant indices of a v2f64 can be done using VPDI.
-+  // However, if the inserted value is a bitcast or a constant then it's
-+  // better to use GPRs, as below.
-+  if (VT == MVT::v2f64 &&
-+      Op1.getOpcode() != ISD::BITCAST &&
-+      Op1.getOpcode() != ISD::ConstantFP &&
-+      Op2.getOpcode() == ISD::Constant) {
-+    uint64_t Index = dyn_cast<ConstantSDNode>(Op2)->getZExtValue();
-+    unsigned Mask = VT.getVectorNumElements() - 1;
-+    if (Index <= Mask)
-+      return Op;
-+  }
-+
-+  // Otherwise bitcast to the equivalent integer form and insert via a GPR.
-+  MVT IntVT = MVT::getIntegerVT(VT.getVectorElementType().getSizeInBits());
-+  MVT IntVecVT = MVT::getVectorVT(IntVT, VT.getVectorNumElements());
-+  SDValue Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntVecVT,
-+                            DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0),
-+                            DAG.getNode(ISD::BITCAST, DL, IntVT, Op1), Op2);
-+  return DAG.getNode(ISD::BITCAST, DL, VT, Res);
-+}
-+
-+SDValue
-+SystemZTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
-+                                               SelectionDAG &DAG) const {
-+  // Handle extractions of floating-point values.
-+  SDLoc DL(Op);
-+  SDValue Op0 = Op.getOperand(0);
-+  SDValue Op1 = Op.getOperand(1);
-+  EVT VT = Op.getValueType();
-+  EVT VecVT = Op0.getValueType();
-+
-+  // Extractions of constant indices can be done directly.
-+  if (auto *CIndexN = dyn_cast<ConstantSDNode>(Op1)) {
-+    uint64_t Index = CIndexN->getZExtValue();
-+    unsigned Mask = VecVT.getVectorNumElements() - 1;
-+    if (Index <= Mask)
-+      return Op;
-+  }
-+
-+  // Otherwise bitcast to the equivalent integer form and extract via a GPR.
-+  MVT IntVT = MVT::getIntegerVT(VT.getSizeInBits());
-+  MVT IntVecVT = MVT::getVectorVT(IntVT, VecVT.getVectorNumElements());
-+  SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, IntVT,
-+                            DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0), Op1);
-+  return DAG.getNode(ISD::BITCAST, DL, VT, Res);
-+}
-+
-+SDValue
-+SystemZTargetLowering::lowerExtendVectorInreg(SDValue Op, SelectionDAG &DAG,
-+					      unsigned UnpackHigh) const {
-+  SDValue PackedOp = Op.getOperand(0);
-+  EVT OutVT = Op.getValueType();
-+  EVT InVT = PackedOp.getValueType();
-+  unsigned ToBits = OutVT.getVectorElementType().getSizeInBits();
-+  unsigned FromBits = InVT.getVectorElementType().getSizeInBits();
-+  do {
-+    FromBits *= 2;
-+    EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(FromBits),
-+                                 SystemZ::VectorBits / FromBits);
-+    PackedOp = DAG.getNode(UnpackHigh, SDLoc(PackedOp), OutVT, PackedOp);
-+  } while (FromBits != ToBits);
-+  return PackedOp;
-+}
-+
-+SDValue SystemZTargetLowering::lowerShift(SDValue Op, SelectionDAG &DAG,
-+                                          unsigned ByScalar) const {
-+  // Look for cases where a vector shift can use the *_BY_SCALAR form.
-+  SDValue Op0 = Op.getOperand(0);
-+  SDValue Op1 = Op.getOperand(1);
-+  SDLoc DL(Op);
-+  EVT VT = Op.getValueType();
-+  unsigned ElemBitSize = VT.getVectorElementType().getSizeInBits();
-+
-+  // See whether the shift vector is a splat represented as BUILD_VECTOR.
-+  if (auto *BVN = dyn_cast<BuildVectorSDNode>(Op1)) {
-+    APInt SplatBits, SplatUndef;
-+    unsigned SplatBitSize;
-+    bool HasAnyUndefs;
-+    // Check for constant splats.  Use ElemBitSize as the minimum element
-+    // width and reject splats that need wider elements.
-+    if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs,
-+                             ElemBitSize, true) &&
-+        SplatBitSize == ElemBitSize) {
-+      SDValue Shift = DAG.getConstant(SplatBits.getZExtValue() & 0xfff,
-+                                      MVT::i32);
-+      return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
-+    }
-+    // Check for variable splats.
-+    BitVector UndefElements;
-+    SDValue Splat = BVN->getSplatValue(&UndefElements);
-+    if (Splat) {
-+      // Since i32 is the smallest legal type, we either need a no-op
-+      // or a truncation.
-+      SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Splat);
-+      return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
-+    }
-+  }
-+
-+  // See whether the shift vector is a splat represented as SHUFFLE_VECTOR,
-+  // and the shift amount is directly available in a GPR.
-+  if (auto *VSN = dyn_cast<ShuffleVectorSDNode>(Op1)) {
-+    if (VSN->isSplat()) {
-+      SDValue VSNOp0 = VSN->getOperand(0);
-+      unsigned Index = VSN->getSplatIndex();
-+      assert(Index < VT.getVectorNumElements() &&
-+             "Splat index should be defined and in first operand");
-+      if ((Index == 0 && VSNOp0.getOpcode() == ISD::SCALAR_TO_VECTOR) ||
-+          VSNOp0.getOpcode() == ISD::BUILD_VECTOR) {
-+        // Since i32 is the smallest legal type, we either need a no-op
-+        // or a truncation.
-+        SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32,
-+                                    VSNOp0.getOperand(Index));
-+        return DAG.getNode(ByScalar, DL, VT, Op0, Shift);
-+      }
-+    }
-+  }
-+
-+  // Otherwise just treat the current form as legal.
-+  return Op;
-+}
-+
- SDValue SystemZTargetLowering::LowerOperation(SDValue Op,
-                                               SelectionDAG &DAG) const {
-   switch (Op.getOpcode()) {
-@@ -2437,6 +4291,14 @@ SDValue SystemZTargetLowering::LowerOper
-     return lowerUDIVREM(Op, DAG);
-   case ISD::OR:
-     return lowerOR(Op, DAG);
-+  case ISD::CTPOP:
-+    return lowerCTPOP(Op, DAG);
-+  case ISD::CTLZ_ZERO_UNDEF:
-+    return DAG.getNode(ISD::CTLZ, SDLoc(Op),
-+                       Op.getValueType(), Op.getOperand(0));
-+  case ISD::CTTZ_ZERO_UNDEF:
-+    return DAG.getNode(ISD::CTTZ, SDLoc(Op),
-+                       Op.getValueType(), Op.getOperand(0));
-   case ISD::ATOMIC_SWAP:
-     return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_SWAPW);
-   case ISD::ATOMIC_STORE:
-@@ -2471,6 +4333,30 @@ SDValue SystemZTargetLowering::LowerOper
-     return lowerSTACKRESTORE(Op, DAG);
-   case ISD::PREFETCH:
-     return lowerPREFETCH(Op, DAG);
-+  case ISD::INTRINSIC_W_CHAIN:
-+    return lowerINTRINSIC_W_CHAIN(Op, DAG);
-+  case ISD::INTRINSIC_WO_CHAIN:
-+    return lowerINTRINSIC_WO_CHAIN(Op, DAG);
-+  case ISD::BUILD_VECTOR:
-+    return lowerBUILD_VECTOR(Op, DAG);
-+  case ISD::VECTOR_SHUFFLE:
-+    return lowerVECTOR_SHUFFLE(Op, DAG);
-+  case ISD::SCALAR_TO_VECTOR:
-+    return lowerSCALAR_TO_VECTOR(Op, DAG);
-+  case ISD::INSERT_VECTOR_ELT:
-+    return lowerINSERT_VECTOR_ELT(Op, DAG);
-+  case ISD::EXTRACT_VECTOR_ELT:
-+    return lowerEXTRACT_VECTOR_ELT(Op, DAG);
-+  case ISD::SIGN_EXTEND_VECTOR_INREG:
-+    return lowerExtendVectorInreg(Op, DAG, SystemZISD::UNPACK_HIGH);
-+  case ISD::ZERO_EXTEND_VECTOR_INREG:
-+    return lowerExtendVectorInreg(Op, DAG, SystemZISD::UNPACKL_HIGH);
-+  case ISD::SHL:
-+    return lowerShift(Op, DAG, SystemZISD::VSHL_BY_SCALAR);
-+  case ISD::SRL:
-+    return lowerShift(Op, DAG, SystemZISD::VSRL_BY_SCALAR);
-+  case ISD::SRA:
-+    return lowerShift(Op, DAG, SystemZISD::VSRA_BY_SCALAR);
-   default:
-     llvm_unreachable("Unexpected node to lower");
-   }
-@@ -2482,6 +4368,8 @@ const char *SystemZTargetLowering::getTa
-     OPCODE(RET_FLAG);
-     OPCODE(CALL);
-     OPCODE(SIBCALL);
-+    OPCODE(TLS_GDCALL);
-+    OPCODE(TLS_LDCALL);
-     OPCODE(PCREL_WRAPPER);
-     OPCODE(PCREL_OFFSET);
-     OPCODE(IABS);
-@@ -2492,7 +4380,9 @@ const char *SystemZTargetLowering::getTa
-     OPCODE(SELECT_CCMASK);
-     OPCODE(ADJDYNALLOC);
-     OPCODE(EXTRACT_ACCESS);
-+    OPCODE(POPCNT);
-     OPCODE(UMUL_LOHI64);
-+    OPCODE(SDIVREM32);
-     OPCODE(SDIVREM64);
-     OPCODE(UDIVREM32);
-     OPCODE(UDIVREM64);
-@@ -2506,11 +4396,60 @@ const char *SystemZTargetLowering::getTa
-     OPCODE(XC_LOOP);
-     OPCODE(CLC);
-     OPCODE(CLC_LOOP);
--    OPCODE(STRCMP);
-     OPCODE(STPCPY);
-+    OPCODE(STRCMP);
-     OPCODE(SEARCH_STRING);
-     OPCODE(IPM);
-     OPCODE(SERIALIZE);
-+    OPCODE(TBEGIN);
-+    OPCODE(TBEGIN_NOFLOAT);
-+    OPCODE(TEND);
-+    OPCODE(BYTE_MASK);
-+    OPCODE(ROTATE_MASK);
-+    OPCODE(REPLICATE);
-+    OPCODE(JOIN_DWORDS);
-+    OPCODE(SPLAT);
-+    OPCODE(MERGE_HIGH);
-+    OPCODE(MERGE_LOW);
-+    OPCODE(SHL_DOUBLE);
-+    OPCODE(PERMUTE_DWORDS);
-+    OPCODE(PERMUTE);
-+    OPCODE(PACK);
-+    OPCODE(PACKS_CC);
-+    OPCODE(PACKLS_CC);
-+    OPCODE(UNPACK_HIGH);
-+    OPCODE(UNPACKL_HIGH);
-+    OPCODE(UNPACK_LOW);
-+    OPCODE(UNPACKL_LOW);
-+    OPCODE(VSHL_BY_SCALAR);
-+    OPCODE(VSRL_BY_SCALAR);
-+    OPCODE(VSRA_BY_SCALAR);
-+    OPCODE(VSUM);
-+    OPCODE(VICMPE);
-+    OPCODE(VICMPH);
-+    OPCODE(VICMPHL);
-+    OPCODE(VICMPES);
-+    OPCODE(VICMPHS);
-+    OPCODE(VICMPHLS);
-+    OPCODE(VFCMPE);
-+    OPCODE(VFCMPH);
-+    OPCODE(VFCMPHE);
-+    OPCODE(VFCMPES);
-+    OPCODE(VFCMPHS);
-+    OPCODE(VFCMPHES);
-+    OPCODE(VFTCI);
-+    OPCODE(VEXTEND);
-+    OPCODE(VROUND);
-+    OPCODE(VTM);
-+    OPCODE(VFAE_CC);
-+    OPCODE(VFAEZ_CC);
-+    OPCODE(VFEE_CC);
-+    OPCODE(VFEEZ_CC);
-+    OPCODE(VFENE_CC);
-+    OPCODE(VFENEZ_CC);
-+    OPCODE(VISTR_CC);
-+    OPCODE(VSTRC_CC);
-+    OPCODE(VSTRCZ_CC);
-     OPCODE(ATOMIC_SWAPW);
-     OPCODE(ATOMIC_LOADW_ADD);
-     OPCODE(ATOMIC_LOADW_SUB);
-@@ -2529,6 +4468,157 @@ const char *SystemZTargetLowering::getTa
- #undef OPCODE
- }
- 
-+// Return true if VT is a vector whose elements are a whole number of bytes
-+// in width.
-+static bool canTreatAsByteVector(EVT VT) {
-+  return VT.isVector() && VT.getVectorElementType().getSizeInBits() % 8 == 0;
-+}
-+
-+// Try to simplify an EXTRACT_VECTOR_ELT from a vector of type VecVT
-+// producing a result of type ResVT.  Op is a possibly bitcast version
-+// of the input vector and Index is the index (based on type VecVT) that
-+// should be extracted.  Return the new extraction if a simplification
-+// was possible or if Force is true.
-+SDValue SystemZTargetLowering::combineExtract(SDLoc DL, EVT ResVT, EVT VecVT,
-+                                              SDValue Op, unsigned Index,
-+                                              DAGCombinerInfo &DCI,
-+                                              bool Force) const {
-+  SelectionDAG &DAG = DCI.DAG;
-+
-+  // The number of bytes being extracted.
-+  unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize();
-+
-+  for (;;) {
-+    unsigned Opcode = Op.getOpcode();
-+    if (Opcode == ISD::BITCAST)
-+      // Look through bitcasts.
-+      Op = Op.getOperand(0);
-+    else if (Opcode == ISD::VECTOR_SHUFFLE &&
-+             canTreatAsByteVector(Op.getValueType())) {
-+      // Get a VPERM-like permute mask and see whether the bytes covered
-+      // by the extracted element are a contiguous sequence from one
-+      // source operand.
-+      SmallVector<int, SystemZ::VectorBytes> Bytes;
-+      getVPermMask(cast<ShuffleVectorSDNode>(Op), Bytes);
-+      int First;
-+      if (!getShuffleInput(Bytes, Index * BytesPerElement,
-+                           BytesPerElement, First))
-+        break;
-+      if (First < 0)
-+        return DAG.getUNDEF(ResVT);
-+      // Make sure the contiguous sequence starts at a multiple of the
-+      // original element size.
-+      unsigned Byte = unsigned(First) % Bytes.size();
-+      if (Byte % BytesPerElement != 0)
-+        break;
-+      // We can get the extracted value directly from an input.
-+      Index = Byte / BytesPerElement;
-+      Op = Op.getOperand(unsigned(First) / Bytes.size());
-+      Force = true;
-+    } else if (Opcode == ISD::BUILD_VECTOR &&
-+               canTreatAsByteVector(Op.getValueType())) {
-+      // We can only optimize this case if the BUILD_VECTOR elements are
-+      // at least as wide as the extracted value.
-+      EVT OpVT = Op.getValueType();
-+      unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize();
-+      if (OpBytesPerElement < BytesPerElement)
-+        break;
-+      // Make sure that the least-significant bit of the extracted value
-+      // is the least significant bit of an input.
-+      unsigned End = (Index + 1) * BytesPerElement;
-+      if (End % OpBytesPerElement != 0)
-+        break;
-+      // We're extracting the low part of one operand of the BUILD_VECTOR.
-+      Op = Op.getOperand(End / OpBytesPerElement - 1);
-+      if (!Op.getValueType().isInteger()) {
-+        EVT VT = MVT::getIntegerVT(Op.getValueType().getSizeInBits());
-+        Op = DAG.getNode(ISD::BITCAST, DL, VT, Op);
-+        DCI.AddToWorklist(Op.getNode());
-+      }
-+      EVT VT = MVT::getIntegerVT(ResVT.getSizeInBits());
-+      Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op);
-+      if (VT != ResVT) {
-+        DCI.AddToWorklist(Op.getNode());
-+        Op = DAG.getNode(ISD::BITCAST, DL, ResVT, Op);
-+      }
-+      return Op;
-+    } else if ((Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
-+		Opcode == ISD::ZERO_EXTEND_VECTOR_INREG ||
-+		Opcode == ISD::ANY_EXTEND_VECTOR_INREG) &&
-+	       canTreatAsByteVector(Op.getValueType()) &&
-+               canTreatAsByteVector(Op.getOperand(0).getValueType())) {
-+      // Make sure that only the unextended bits are significant.
-+      EVT ExtVT = Op.getValueType();
-+      EVT OpVT = Op.getOperand(0).getValueType();
-+      unsigned ExtBytesPerElement = ExtVT.getVectorElementType().getStoreSize();
-+      unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize();
-+      unsigned Byte = Index * BytesPerElement;
-+      unsigned SubByte = Byte % ExtBytesPerElement;
-+      unsigned MinSubByte = ExtBytesPerElement - OpBytesPerElement;
-+      if (SubByte < MinSubByte ||
-+	  SubByte + BytesPerElement > ExtBytesPerElement)
-+	break;
-+      // Get the byte offset of the unextended element
-+      Byte = Byte / ExtBytesPerElement * OpBytesPerElement;
-+      // ...then add the byte offset relative to that element.
-+      Byte += SubByte - MinSubByte;
-+      if (Byte % BytesPerElement != 0)
-+	break;
-+      Op = Op.getOperand(0);
-+      Index = Byte / BytesPerElement;
-+      Force = true;
-+    } else
-+      break;
-+  }
-+  if (Force) {
-+    if (Op.getValueType() != VecVT) {
-+      Op = DAG.getNode(ISD::BITCAST, DL, VecVT, Op);
-+      DCI.AddToWorklist(Op.getNode());
-+    }
-+    return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Op,
-+                       DAG.getConstant(Index, MVT::i32));
-+  }
-+  return SDValue();
-+}
-+
-+// Optimize vector operations in scalar value Op on the basis that Op
-+// is truncated to TruncVT.
-+SDValue
-+SystemZTargetLowering::combineTruncateExtract(SDLoc DL, EVT TruncVT, SDValue Op,
-+                                              DAGCombinerInfo &DCI) const {
-+  // If we have (trunc (extract_vector_elt X, Y)), try to turn it into
-+  // (extract_vector_elt (bitcast X), Y'), where (bitcast X) has elements
-+  // of type TruncVT.
-+  if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
-+      TruncVT.getSizeInBits() % 8 == 0) {
-+    SDValue Vec = Op.getOperand(0);
-+    EVT VecVT = Vec.getValueType();
-+    if (canTreatAsByteVector(VecVT)) {
-+      if (auto *IndexN = dyn_cast<ConstantSDNode>(Op.getOperand(1))) {
-+        unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize();
-+        unsigned TruncBytes = TruncVT.getStoreSize();
-+        if (BytesPerElement % TruncBytes == 0) {
-+          // Calculate the value of Y' in the above description.  We are
-+          // splitting the original elements into Scale equal-sized pieces
-+          // and for truncation purposes want the last (least-significant)
-+          // of these pieces for IndexN.  This is easiest to do by calculating
-+          // the start index of the following element and then subtracting 1.
-+          unsigned Scale = BytesPerElement / TruncBytes;
-+          unsigned NewIndex = (IndexN->getZExtValue() + 1) * Scale - 1;
-+
-+          // Defer the creation of the bitcast from X to combineExtract,
-+          // which might be able to optimize the extraction.
-+          VecVT = MVT::getVectorVT(MVT::getIntegerVT(TruncBytes * 8),
-+                                   VecVT.getStoreSize() / TruncBytes);
-+          EVT ResVT = (TruncBytes < 4 ? MVT::i32 : TruncVT);
-+          return combineExtract(DL, ResVT, VecVT, Vec, NewIndex, DCI, true);
-+        }
-+      }
-+    }
-+  }
-+  return SDValue();
-+}
-+
- SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N,
-                                                  DAGCombinerInfo &DCI) const {
-   SelectionDAG &DAG = DCI.DAG;
-@@ -2559,6 +4649,114 @@ SDValue SystemZTargetLowering::PerformDA
-       }
-     }
-   }
-+  if (Opcode == SystemZISD::MERGE_HIGH ||
-+      Opcode == SystemZISD::MERGE_LOW) {
-+    SDValue Op0 = N->getOperand(0);
-+    SDValue Op1 = N->getOperand(1);
-+    if (Op0.getOpcode() == ISD::BITCAST)
-+      Op0 = Op0.getOperand(0);
-+    if (Op0.getOpcode() == SystemZISD::BYTE_MASK &&
-+        cast<ConstantSDNode>(Op0.getOperand(0))->getZExtValue() == 0) {
-+      // (z_merge_* 0, 0) -> 0.  This is mostly useful for using VLLEZF
-+      // for v4f32.
-+      if (Op1 == N->getOperand(0))
-+        return Op1;
-+      // (z_merge_? 0, X) -> (z_unpackl_? 0, X).
-+      EVT VT = Op1.getValueType();
-+      unsigned ElemBytes = VT.getVectorElementType().getStoreSize();
-+      if (ElemBytes <= 4) {
-+        Opcode = (Opcode == SystemZISD::MERGE_HIGH ?
-+                  SystemZISD::UNPACKL_HIGH : SystemZISD::UNPACKL_LOW);
-+        EVT InVT = VT.changeVectorElementTypeToInteger();
-+        EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(ElemBytes * 16),
-+                                     SystemZ::VectorBytes / ElemBytes / 2);
-+        if (VT != InVT) {
-+          Op1 = DAG.getNode(ISD::BITCAST, SDLoc(N), InVT, Op1);
-+          DCI.AddToWorklist(Op1.getNode());
-+        }
-+        SDValue Op = DAG.getNode(Opcode, SDLoc(N), OutVT, Op1);
-+        DCI.AddToWorklist(Op.getNode());
-+        return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op);
-+      }
-+    }
-+  }
-+  // If we have (truncstoreiN (extract_vector_elt X, Y), Z) then it is better
-+  // for the extraction to be done on a vMiN value, so that we can use VSTE.
-+  // If X has wider elements then convert it to:
-+  // (truncstoreiN (extract_vector_elt (bitcast X), Y2), Z).
-+  if (Opcode == ISD::STORE) {
-+    auto *SN = cast<StoreSDNode>(N);
-+    EVT MemVT = SN->getMemoryVT();
-+    if (MemVT.isInteger()) {
-+      SDValue Value = combineTruncateExtract(SDLoc(N), MemVT,
-+                                             SN->getValue(), DCI);
-+      if (Value.getNode()) {
-+        DCI.AddToWorklist(Value.getNode());
-+
-+        // Rewrite the store with the new form of stored value.
-+        return DAG.getTruncStore(SN->getChain(), SDLoc(SN), Value,
-+                                 SN->getBasePtr(), SN->getMemoryVT(),
-+                                 SN->getMemOperand());
-+      }
-+    }
-+  }
-+  // Try to simplify a vector extraction.
-+  if (Opcode == ISD::EXTRACT_VECTOR_ELT) {
-+    if (auto *IndexN = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
-+      SDValue Op0 = N->getOperand(0);
-+      EVT VecVT = Op0.getValueType();
-+      return combineExtract(SDLoc(N), N->getValueType(0), VecVT, Op0,
-+                            IndexN->getZExtValue(), DCI, false);
-+    }
-+  }
-+  // (join_dwords X, X) == (replicate X)
-+  if (Opcode == SystemZISD::JOIN_DWORDS &&
-+      N->getOperand(0) == N->getOperand(1))
-+    return DAG.getNode(SystemZISD::REPLICATE, SDLoc(N), N->getValueType(0),
-+                       N->getOperand(0));
-+  // (fround (extract_vector_elt X 0))
-+  // (fround (extract_vector_elt X 1)) ->
-+  // (extract_vector_elt (VROUND X) 0)
-+  // (extract_vector_elt (VROUND X) 1)
-+  //
-+  // This is a special case since the target doesn't really support v2f32s.
-+  if (Opcode == ISD::FP_ROUND) {
-+    SDValue Op0 = N->getOperand(0);
-+    if (N->getValueType(0) == MVT::f32 &&
-+        Op0.hasOneUse() &&
-+        Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
-+        Op0.getOperand(0).getValueType() == MVT::v2f64 &&
-+        Op0.getOperand(1).getOpcode() == ISD::Constant &&
-+        cast<ConstantSDNode>(Op0.getOperand(1))->getZExtValue() == 0) {
-+      SDValue Vec = Op0.getOperand(0);
-+      for (auto *U : Vec->uses()) {
-+        if (U != Op0.getNode() &&
-+            U->hasOneUse() &&
-+            U->getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
-+            U->getOperand(0) == Vec &&
-+            U->getOperand(1).getOpcode() == ISD::Constant &&
-+            cast<ConstantSDNode>(U->getOperand(1))->getZExtValue() == 1) {
-+          SDValue OtherRound = SDValue(*U->use_begin(), 0);
-+          if (OtherRound.getOpcode() == ISD::FP_ROUND &&
-+              OtherRound.getOperand(0) == SDValue(U, 0) &&
-+              OtherRound.getValueType() == MVT::f32) {
-+            SDValue VRound = DAG.getNode(SystemZISD::VROUND, SDLoc(N),
-+                                         MVT::v4f32, Vec);
-+            DCI.AddToWorklist(VRound.getNode());
-+            SDValue Extract1 =
-+              DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f32,
-+                          VRound, DAG.getConstant(2, MVT::i32));
-+            DCI.AddToWorklist(Extract1.getNode());
-+            DAG.ReplaceAllUsesOfValueWith(OtherRound, Extract1);
-+            SDValue Extract0 =
-+              DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f32,
-+                          VRound, DAG.getConstant(0, MVT::i32));
-+            return Extract0;
-+          }
-+        }
-+      }
-+    }
-+  }
-   return SDValue();
- }
- 
-@@ -3338,6 +5536,57 @@ SystemZTargetLowering::emitStringWrapper
-   return DoneMBB;
- }
- 
-+// Update TBEGIN instruction with final opcode and register clobbers.
-+MachineBasicBlock *
-+SystemZTargetLowering::emitTransactionBegin(MachineInstr *MI,
-+                                            MachineBasicBlock *MBB,
-+                                            unsigned Opcode,
-+                                            bool NoFloat) const {
-+  MachineFunction &MF = *MBB->getParent();
-+  const TargetFrameLowering *TFI = Subtarget.getFrameLowering();
-+  const SystemZInstrInfo *TII = Subtarget.getInstrInfo();
-+
-+  // Update opcode.
-+  MI->setDesc(TII->get(Opcode));
-+
-+  // We cannot handle a TBEGIN that clobbers the stack or frame pointer.
-+  // Make sure to add the corresponding GRSM bits if they are missing.
-+  uint64_t Control = MI->getOperand(2).getImm();
-+  static const unsigned GPRControlBit[16] = {
-+    0x8000, 0x8000, 0x4000, 0x4000, 0x2000, 0x2000, 0x1000, 0x1000,
-+    0x0800, 0x0800, 0x0400, 0x0400, 0x0200, 0x0200, 0x0100, 0x0100
-+  };
-+  Control |= GPRControlBit[15];
-+  if (TFI->hasFP(MF))
-+    Control |= GPRControlBit[11];
-+  MI->getOperand(2).setImm(Control);
-+
-+  // Add GPR clobbers.
-+  for (int I = 0; I < 16; I++) {
-+    if ((Control & GPRControlBit[I]) == 0) {
-+      unsigned Reg = SystemZMC::GR64Regs[I];
-+      MI->addOperand(MachineOperand::CreateReg(Reg, true, true));
-+    }
-+  }
-+
-+  // Add FPR/VR clobbers.
-+  if (!NoFloat && (Control & 4) != 0) {
-+    if (Subtarget.hasVector()) {
-+      for (int I = 0; I < 32; I++) {
-+        unsigned Reg = SystemZMC::VR128Regs[I];
-+        MI->addOperand(MachineOperand::CreateReg(Reg, true, true));
-+      }
-+    } else {
-+      for (int I = 0; I < 16; I++) {
-+        unsigned Reg = SystemZMC::FP64Regs[I];
-+        MI->addOperand(MachineOperand::CreateReg(Reg, true, true));
-+      }
-+    }
-+  }
-+
-+  return MBB;
-+}
-+
- MachineBasicBlock *SystemZTargetLowering::
- EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const {
-   switch (MI->getOpcode()) {
-@@ -3579,6 +5828,12 @@ EmitInstrWithCustomInserter(MachineInstr
-     return emitStringWrapper(MI, MBB, SystemZ::MVST);
-   case SystemZ::SRSTLoop:
-     return emitStringWrapper(MI, MBB, SystemZ::SRST);
-+  case SystemZ::TBEGIN:
-+    return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, false);
-+  case SystemZ::TBEGIN_nofloat:
-+    return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, true);
-+  case SystemZ::TBEGINC:
-+    return emitTransactionBegin(MI, MBB, SystemZ::TBEGINC, true);
-   default:
-     llvm_unreachable("Unexpected instr type to insert");
-   }
-Index: llvm-36/lib/Target/SystemZ/SystemZISelLowering.h
-===================================================================
---- llvm-36.orig/lib/Target/SystemZ/SystemZISelLowering.h
-+++ llvm-36/lib/Target/SystemZ/SystemZISelLowering.h
-@@ -34,6 +34,11 @@ enum {
-   CALL,
-   SIBCALL,
- 
-+  // TLS calls.  Like regular calls, except operand 1 is the TLS symbol.
-+  // (The call target is implicitly __tls_get_offset.)
-+  TLS_GDCALL,
-+  TLS_LDCALL,
-+
-   // Wraps a TargetGlobalAddress that should be loaded using PC-relative
-   // accesses (LARL).  Operand 0 is the address.
-   PCREL_WRAPPER,
-@@ -82,6 +87,9 @@ enum {
-   // the number of the register.
-   EXTRACT_ACCESS,
- 
-+  // Count number of bits set in operand 0 per byte.
-+  POPCNT,
-+
-   // Wrappers around the ISD opcodes of the same name.  The output and
-   // first input operands are GR128s.  The trailing numbers are the
-   // widths of the second operand in bits.
-@@ -138,6 +146,135 @@ enum {
-   // Perform a serialization operation.  (BCR 15,0 or BCR 14,0.)
-   SERIALIZE,
- 
-+  // Transaction begin.  The first operand is the chain, the second
-+  // the TDB pointer, and the third the immediate control field.
-+  // Returns chain and glue.
-+  TBEGIN,
-+  TBEGIN_NOFLOAT,
-+
-+  // Transaction end.  Just the chain operand.  Returns chain and glue.
-+  TEND,
-+
-+  // Create a vector constant by filling byte N of the result with bit
-+  // 15-N of the single operand.
-+  BYTE_MASK,
-+
-+  // Create a vector constant by replicating an element-sized RISBG-style mask.
-+  // The first operand specifies the starting set bit and the second operand
-+  // specifies the ending set bit.  Both operands count from the MSB of the
-+  // element.
-+  ROTATE_MASK,
-+
-+  // Replicate a GPR scalar value into all elements of a vector.
-+  REPLICATE,
-+
-+  // Create a vector from two i64 GPRs.
-+  JOIN_DWORDS,
-+
-+  // Replicate one element of a vector into all elements.  The first operand
-+  // is the vector and the second is the index of the element to replicate.
-+  SPLAT,
-+
-+  // Interleave elements from the high half of operand 0 and the high half
-+  // of operand 1.
-+  MERGE_HIGH,
-+
-+  // Likewise for the low halves.
-+  MERGE_LOW,
-+
-+  // Concatenate the vectors in the first two operands, shift them left
-+  // by the third operand, and take the first half of the result.
-+  SHL_DOUBLE,
-+
-+  // Take one element of the first v2i64 operand and the one element of
-+  // the second v2i64 operand and concatenate them to form a v2i64 result.
-+  // The third operand is a 4-bit value of the form 0A0B, where A and B
-+  // are the element selectors for the first operand and second operands
-+  // respectively.
-+  PERMUTE_DWORDS,
-+
-+  // Perform a general vector permute on vector operands 0 and 1.
-+  // Each byte of operand 2 controls the corresponding byte of the result,
-+  // in the same way as a byte-level VECTOR_SHUFFLE mask.
-+  PERMUTE,
-+
-+  // Pack vector operands 0 and 1 into a single vector with half-sized elements.
-+  PACK,
-+
-+  // Likewise, but saturate the result and set CC.  PACKS_CC does signed
-+  // saturation and PACKLS_CC does unsigned saturation.
-+  PACKS_CC,
-+  PACKLS_CC,
-+
-+  // Unpack the first half of vector operand 0 into double-sized elements.
-+  // UNPACK_HIGH sign-extends and UNPACKL_HIGH zero-extends.
-+  UNPACK_HIGH,
-+  UNPACKL_HIGH,
-+
-+  // Likewise for the second half.
-+  UNPACK_LOW,
-+  UNPACKL_LOW,
-+
-+  // Shift each element of vector operand 0 by the number of bits specified
-+  // by scalar operand 1.
-+  VSHL_BY_SCALAR,
-+  VSRL_BY_SCALAR,
-+  VSRA_BY_SCALAR,
-+
-+  // For each element of the output type, sum across all sub-elements of
-+  // operand 0 belonging to the corresponding element, and add in the
-+  // rightmost sub-element of the corresponding element of operand 1.
-+  VSUM,
-+
-+  // Compare integer vector operands 0 and 1 to produce the usual 0/-1
-+  // vector result.  VICMPE is for equality, VICMPH for "signed greater than"
-+  // and VICMPHL for "unsigned greater than".
-+  VICMPE,
-+  VICMPH,
-+  VICMPHL,
-+
-+  // Likewise, but also set the condition codes on the result.
-+  VICMPES,
-+  VICMPHS,
-+  VICMPHLS,
-+
-+  // Compare floating-point vector operands 0 and 1 to preoduce the usual 0/-1
-+  // vector result.  VFCMPE is for "ordered and equal", VFCMPH for "ordered and
-+  // greater than" and VFCMPHE for "ordered and greater than or equal to".
-+  VFCMPE,
-+  VFCMPH,
-+  VFCMPHE,
-+
-+  // Likewise, but also set the condition codes on the result.
-+  VFCMPES,
-+  VFCMPHS,
-+  VFCMPHES,
-+
-+  // Test floating-point data class for vectors.
-+  VFTCI,
-+
-+  // Extend the even f32 elements of vector operand 0 to produce a vector
-+  // of f64 elements.
-+  VEXTEND,
-+
-+  // Round the f64 elements of vector operand 0 to f32s and store them in the
-+  // even elements of the result.
-+  VROUND,
-+
-+  // AND the two vector operands together and set CC based on the result.
-+  VTM,
-+
-+  // String operations that set CC as a side-effect.
-+  VFAE_CC,
-+  VFAEZ_CC,
-+  VFEE_CC,
-+  VFEEZ_CC,
-+  VFENE_CC,
-+  VFENEZ_CC,
-+  VISTR_CC,
-+  VSTRC_CC,
-+  VSTRCZ_CC,
-+
-   // Wrappers around the inner loop of an 8- or 16-bit ATOMIC_SWAP or
-   // ATOMIC_LOAD_<op>.
-   //
-@@ -204,9 +341,33 @@ public:
-   MVT getScalarShiftAmountTy(EVT LHSTy) const override {
-     return MVT::i32;
-   }
-+  MVT getVectorIdxTy() const override {
-+    // Only the lower 12 bits of an element index are used, so we don't
-+    // want to clobber the upper 32 bits of a GPR unnecessarily.
-+    return MVT::i32;
-+  }
-+  TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(EVT VT)
-+    const override {
-+    // Widen subvectors to the full width rather than promoting integer
-+    // elements.  This is better because:
-+    //
-+    // (a) it means that we can handle the ABI for passing and returning
-+    //     sub-128 vectors without having to handle them as legal types.
-+    //
-+    // (b) we don't have instructions to extend on load and truncate on store,
-+    //     so promoting the integers is less efficient.
-+    //
-+    // (c) there are no multiplication instructions for the widest integer
-+    //     type (v2i64).
-+    if (VT.getVectorElementType().getSizeInBits() % 8 == 0)
-+      return TypeWidenVector;
-+    return TargetLoweringBase::getPreferredVectorAction(VT);
-+  }
-   EVT getSetCCResultType(LLVMContext &, EVT) const override;
-   bool isFMAFasterThanFMulAndFAdd(EVT VT) const override;
-   bool isFPImmLegal(const APFloat &Imm, EVT VT) const override;
-+  bool isLegalICmpImmediate(int64_t Imm) const override;
-+  bool isLegalAddImmediate(int64_t Imm) const override;
-   bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const override;
-   bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS,
-                                       unsigned Align,
-@@ -257,6 +418,9 @@ private:
-   SDValue lowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const;
-   SDValue lowerGlobalAddress(GlobalAddressSDNode *Node,
-                              SelectionDAG &DAG) const;
-+  SDValue lowerTLSGetOffset(GlobalAddressSDNode *Node,
-+                            SelectionDAG &DAG, unsigned Opcode,
-+                            SDValue GOTOffset) const;
-   SDValue lowerGlobalTLSAddress(GlobalAddressSDNode *Node,
-                                 SelectionDAG &DAG) const;
-   SDValue lowerBlockAddress(BlockAddressSDNode *Node,
-@@ -272,6 +436,7 @@ private:
-   SDValue lowerUDIVREM(SDValue Op, SelectionDAG &DAG) const;
-   SDValue lowerBITCAST(SDValue Op, SelectionDAG &DAG) const;
-   SDValue lowerOR(SDValue Op, SelectionDAG &DAG) const;
-+  SDValue lowerCTPOP(SDValue Op, SelectionDAG &DAG) const;
-   SDValue lowerATOMIC_LOAD(SDValue Op, SelectionDAG &DAG) const;
-   SDValue lowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) const;
-   SDValue lowerATOMIC_LOAD_OP(SDValue Op, SelectionDAG &DAG,
-@@ -282,6 +447,22 @@ private:
-   SDValue lowerSTACKSAVE(SDValue Op, SelectionDAG &DAG) const;
-   SDValue lowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG) const;
-   SDValue lowerPREFETCH(SDValue Op, SelectionDAG &DAG) const;
-+  SDValue lowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const;
-+  SDValue lowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const;
-+  SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const;
-+  SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const;
-+  SDValue lowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const;
-+  SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
-+  SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const;
-+  SDValue lowerExtendVectorInreg(SDValue Op, SelectionDAG &DAG,
-+				 unsigned UnpackHigh) const;
-+  SDValue lowerShift(SDValue Op, SelectionDAG &DAG, unsigned ByScalar) const;
-+
-+  SDValue combineExtract(SDLoc DL, EVT ElemVT, EVT VecVT, SDValue OrigOp,
-+                         unsigned Index, DAGCombinerInfo &DCI,
-+                         bool Force) const;
-+  SDValue combineTruncateExtract(SDLoc DL, EVT TruncVT, SDValue Op,
-+                                 DAGCombinerInfo &DCI) const;
- 
-   // If the last instruction before MBBI in MBB was some form of COMPARE,
-   // try to replace it with a COMPARE AND BRANCH just before MBBI.
-@@ -319,6 +500,10 @@ private:
-   MachineBasicBlock *emitStringWrapper(MachineInstr *MI,
-                                        MachineBasicBlock *BB,
-                                        unsigned Opcode) const;
-+  MachineBasicBlock *emitTransactionBegin(MachineInstr *MI,
-+                                          MachineBasicBlock *MBB,
-+                                          unsigned Opcode,
-+                                          bool NoFloat) const;
- };
- } // end namespace llvm
- 
-Index: llvm-36/lib/Target/SystemZ/SystemZInstrFP.td
-===================================================================
---- llvm-36.orig/lib/Target/SystemZ/SystemZInstrFP.td
-+++ llvm-36/lib/Target/SystemZ/SystemZInstrFP.td
-@@ -46,9 +46,14 @@ let Defs = [CC], CCValues = 0xF, Compare
-   defm LTDBR : LoadAndTestRRE<"ltdb", 0xB312, FP64>;
-   defm LTXBR : LoadAndTestRRE<"ltxb", 0xB342, FP128>;
- }
--defm : CompareZeroFP<LTEBRCompare, FP32>;
--defm : CompareZeroFP<LTDBRCompare, FP64>;
--defm : CompareZeroFP<LTXBRCompare, FP128>;
-+// Note that the comparison against zero operation is not available if we
-+// have vector support, since load-and-test instructions will partially
-+// clobber the target (vector) register.
-+let Predicates = [FeatureNoVector] in {
-+  defm : CompareZeroFP<LTEBRCompare, FP32>;
-+  defm : CompareZeroFP<LTDBRCompare, FP64>;
-+  defm : CompareZeroFP<LTXBRCompare, FP128>;
-+}
- 
- // Moves between 64-bit integer and floating-point registers.
- def LGDR : UnaryRRE<"lgd", 0xB3CD, bitconvert, GR64, FP64>;
-@@ -98,6 +103,9 @@ let canFoldAsLoad = 1, SimpleBDXLoad = 1
-   defm LE : UnaryRXPair<"le", 0x78, 0xED64, load, FP32, 4>;
-   defm LD : UnaryRXPair<"ld", 0x68, 0xED65, load, FP64, 8>;
- 
-+  // For z13 we prefer LDE over LE to avoid partial register dependencies.
-+  def LDE32 : UnaryRXE<"lde", 0xED24, null_frag, FP32, 4>;
-+
-   // These instructions are split after register allocation, so we don't
-   // want a custom inserter.
-   let Has20BitOffset = 1, HasIndex = 1, Is128Bit = 1 in {
-@@ -141,7 +149,7 @@ def LDXBRA : UnaryRRF4<"ldxbra", 0xB345,
-              Requires<[FeatureFPExtension]>;
- 
- def : Pat<(f32 (fround FP128:$src)),
--          (EXTRACT_SUBREG (LEXBR FP128:$src), subreg_hh32)>;
-+          (EXTRACT_SUBREG (LEXBR FP128:$src), subreg_hr32)>;
- def : Pat<(f64 (fround FP128:$src)),
-           (EXTRACT_SUBREG (LDXBR FP128:$src), subreg_h64)>;
- 
-@@ -345,13 +353,13 @@ def MDB  : BinaryRXE<"mdb",  0xED1C, fmu
- def MDEBR : BinaryRRE<"mdeb", 0xB30C, null_frag, FP64, FP32>;
- def : Pat<(fmul (f64 (fextend FP32:$src1)), (f64 (fextend FP32:$src2))),
-           (MDEBR (INSERT_SUBREG (f64 (IMPLICIT_DEF)),
--                                FP32:$src1, subreg_h32), FP32:$src2)>;
-+                                FP32:$src1, subreg_r32), FP32:$src2)>;
- 
- // f64 multiplication of an FP32 register and an f32 memory.
- def MDEB : BinaryRXE<"mdeb", 0xED0C, null_frag, FP64, load, 4>;
- def : Pat<(fmul (f64 (fextend FP32:$src1)),
-                 (f64 (extloadf32 bdxaddr12only:$addr))),
--          (MDEB (INSERT_SUBREG (f64 (IMPLICIT_DEF)), FP32:$src1, subreg_h32),
-+          (MDEB (INSERT_SUBREG (f64 (IMPLICIT_DEF)), FP32:$src1, subreg_r32),
-                 bdxaddr12only:$addr)>;
- 
- // f128 multiplication of two FP64 registers.
-Index: llvm-36/lib/Target/SystemZ/SystemZInstrFormats.td
-===================================================================
---- llvm-36.orig/lib/Target/SystemZ/SystemZInstrFormats.td
-+++ llvm-36/lib/Target/SystemZ/SystemZInstrFormats.td
-@@ -142,10 +142,13 @@ def getThreeOperandOpcode : InstrMapping
- // Formats are specified using operand field declarations of the form:
- //
- //   bits<4> Rn   : register input or output for operand n
-+//   bits<5> Vn   : vector register input or output for operand n
- //   bits<m> In   : immediate value of width m for operand n
- //   bits<4> BDn  : address operand n, which has a base and a displacement
- //   bits<m> XBDn : address operand n, which has an index, a base and a
- //                  displacement
-+//   bits<m> VBDn : address operand n, which has a vector index, a base and a
-+//                  displacement
- //   bits<4> Xn   : index register for address operand n
- //   bits<4> Mn   : mode value for operand n
- //
-@@ -339,11 +342,13 @@ class InstRXE<bits<16> op, dag outs, dag
- 
-   bits<4> R1;
-   bits<20> XBD2;
-+  bits<4> M3;
- 
-   let Inst{47-40} = op{15-8};
-   let Inst{39-36} = R1;
-   let Inst{35-16} = XBD2;
--  let Inst{15-8}  = 0;
-+  let Inst{15-12} = M3;
-+  let Inst{11-8}  = 0;
-   let Inst{7-0}   = op{7-0};
- 
-   let HasIndex = 1;
-@@ -473,6 +478,393 @@ class InstSS<bits<8> op, dag outs, dag i
-   let Inst{15-0}  = BD2;
- }
- 
-+class InstS<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
-+  : InstSystemZ<4, outs, ins, asmstr, pattern> {
-+  field bits<32> Inst;
-+  field bits<32> SoftFail = 0;
-+
-+  bits<16> BD2;
-+
-+  let Inst{31-16} = op;
-+  let Inst{15-0}  = BD2;
-+}
-+
-+class InstVRIa<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
-+  : InstSystemZ<6, outs, ins, asmstr, pattern> {
-+  field bits<48> Inst;
-+  field bits<48> SoftFail = 0;
-+
-+  bits<5> V1;
-+  bits<16> I2;
-+  bits<4> M3;
-+
-+  let Inst{47-40} = op{15-8};
-+  let Inst{39-36} = V1{3-0};
-+  let Inst{35-32} = 0;
-+  let Inst{31-16} = I2;
-+  let Inst{15-12} = M3;
-+  let Inst{11}    = V1{4};
-+  let Inst{10-8}  = 0;
-+  let Inst{7-0}   = op{7-0};
-+}
-+
-+class InstVRIb<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
-+  : InstSystemZ<6, outs, ins, asmstr, pattern> {
-+  field bits<48> Inst;
-+  field bits<48> SoftFail = 0;
-+
-+  bits<5> V1;
-+  bits<8> I2;
-+  bits<8> I3;
-+  bits<4> M4;
-+
-+  let Inst{47-40} = op{15-8};
-+  let Inst{39-36} = V1{3-0};
-+  let Inst{35-32} = 0;
-+  let Inst{31-24} = I2;
-+  let Inst{23-16} = I3;
-+  let Inst{15-12} = M4;
-+  let Inst{11}    = V1{4};
-+  let Inst{10-8}  = 0;
-+  let Inst{7-0}   = op{7-0};
-+}
-+
-+class InstVRIc<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
-+  : InstSystemZ<6, outs, ins, asmstr, pattern> {
-+  field bits<48> Inst;
-+  field bits<48> SoftFail = 0;
-+
-+  bits<5> V1;
-+  bits<5> V3;
-+  bits<16> I2;
-+  bits<4> M4;
-+
-+  let Inst{47-40} = op{15-8};
-+  let Inst{39-36} = V1{3-0};
-+  let Inst{35-32} = V3{3-0};
-+  let Inst{31-16} = I2;
-+  let Inst{15-12} = M4;
-+  let Inst{11}    = V1{4};
-+  let Inst{10}    = V3{4};
-+  let Inst{9-8}   = 0;
-+  let Inst{7-0}   = op{7-0};
-+}
-+
-+class InstVRId<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
-+  : InstSystemZ<6, outs, ins, asmstr, pattern> {
-+  field bits<48> Inst;
-+  field bits<48> SoftFail = 0;
-+
-+  bits<5> V1;
-+  bits<5> V2;
-+  bits<5> V3;
-+  bits<8> I4;
-+  bits<4> M5;
-+
-+  let Inst{47-40} = op{15-8};
-+  let Inst{39-36} = V1{3-0};
-+  let Inst{35-32} = V2{3-0};
-+  let Inst{31-28} = V3{3-0};
-+  let Inst{27-24} = 0;
-+  let Inst{23-16} = I4;
-+  let Inst{15-12} = M5;
-+  let Inst{11}    = V1{4};
-+  let Inst{10}    = V2{4};
-+  let Inst{9}     = V3{4};
-+  let Inst{8}     = 0;
-+  let Inst{7-0}   = op{7-0};
-+}
-+
-+class InstVRIe<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
-+  : InstSystemZ<6, outs, ins, asmstr, pattern> {
-+  field bits<48> Inst;
-+  field bits<48> SoftFail = 0;
-+
-+  bits<5> V1;
-+  bits<5> V2;
-+  bits<12> I3;
-+  bits<4> M4;
-+  bits<4> M5;
-+
-+  let Inst{47-40} = op{15-8};
-+  let Inst{39-36} = V1{3-0};
-+  let Inst{35-32} = V2{3-0};
-+  let Inst{31-20} = I3;
-+  let Inst{19-16} = M5;
-+  let Inst{15-12} = M4;
-+  let Inst{11}    = V1{4};
-+  let Inst{10}    = V2{4};
-+  let Inst{9-8}   = 0;
-+  let Inst{7-0}   = op{7-0};
-+}
-+
-+// Depending on the instruction mnemonic, certain bits may be or-ed into
-+// the M4 value provided as explicit operand.  These are passed as m4or.
-+class InstVRRa<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern,
-+               bits<4> m4or = 0>
-+  : InstSystemZ<6, outs, ins, asmstr, pattern> {
-+  field bits<48> Inst;
-+  field bits<48> SoftFail = 0;
-+
-+  bits<5> V1;
-+  bits<5> V2;
-+  bits<4> M3;
-+  bits<4> M4;
-+  bits<4> M5;
-+
-+  let Inst{47-40} = op{15-8};
-+  let Inst{39-36} = V1{3-0};
-+  let Inst{35-32} = V2{3-0};
-+  let Inst{31-24} = 0;
-+  let Inst{23-20} = M5;
-+  let Inst{19}    = !if (!eq (m4or{3}, 1), 1, M4{3});
-+  let Inst{18}    = !if (!eq (m4or{2}, 1), 1, M4{2});
-+  let Inst{17}    = !if (!eq (m4or{1}, 1), 1, M4{1});
-+  let Inst{16}    = !if (!eq (m4or{0}, 1), 1, M4{0});
-+  let Inst{15-12} = M3;
-+  let Inst{11}    = V1{4};
-+  let Inst{10}    = V2{4};
-+  let Inst{9-8}   = 0;
-+  let Inst{7-0}   = op{7-0};
-+}
-+
-+// Depending on the instruction mnemonic, certain bits may be or-ed into
-+// the M5 value provided as explicit operand.  These are passed as m5or.
-+class InstVRRb<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern,
-+               bits<4> m5or = 0>
-+  : InstSystemZ<6, outs, ins, asmstr, pattern> {
-+  field bits<48> Inst;
-+  field bits<48> SoftFail = 0;
-+
-+  bits<5> V1;
-+  bits<5> V2;
-+  bits<5> V3;
-+  bits<4> M4;
-+  bits<4> M5;
-+
-+  let Inst{47-40} = op{15-8};
-+  let Inst{39-36} = V1{3-0};
-+  let Inst{35-32} = V2{3-0};
-+  let Inst{31-28} = V3{3-0};
-+  let Inst{27-24} = 0;
-+  let Inst{23}    = !if (!eq (m5or{3}, 1), 1, M5{3});
-+  let Inst{22}    = !if (!eq (m5or{2}, 1), 1, M5{2});
-+  let Inst{21}    = !if (!eq (m5or{1}, 1), 1, M5{1});
-+  let Inst{20}    = !if (!eq (m5or{0}, 1), 1, M5{0});
-+  let Inst{19-16} = 0;
-+  let Inst{15-12} = M4;
-+  let Inst{11}    = V1{4};
-+  let Inst{10}    = V2{4};
-+  let Inst{9}     = V3{4};
-+  let Inst{8}     = 0;
-+  let Inst{7-0}   = op{7-0};
-+}
-+
-+class InstVRRc<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
-+  : InstSystemZ<6, outs, ins, asmstr, pattern> {
-+  field bits<48> Inst;
-+  field bits<48> SoftFail = 0;
-+
-+  bits<5> V1;
-+  bits<5> V2;
-+  bits<5> V3;
-+  bits<4> M4;
-+  bits<4> M5;
-+  bits<4> M6;
-+
-+  let Inst{47-40} = op{15-8};
-+  let Inst{39-36} = V1{3-0};
-+  let Inst{35-32} = V2{3-0};
-+  let Inst{31-28} = V3{3-0};
-+  let Inst{27-24} = 0;
-+  let Inst{23-20} = M6;
-+  let Inst{19-16} = M5;
-+  let Inst{15-12} = M4;
-+  let Inst{11}    = V1{4};
-+  let Inst{10}    = V2{4};
-+  let Inst{9}     = V3{4};
-+  let Inst{8}     = 0;
-+  let Inst{7-0}   = op{7-0};
-+}
-+
-+// Depending on the instruction mnemonic, certain bits may be or-ed into
-+// the M6 value provided as explicit operand.  These are passed as m6or.
-+class InstVRRd<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern,
-+               bits<4> m6or = 0>
-+  : InstSystemZ<6, outs, ins, asmstr, pattern> {
-+  field bits<48> Inst;
-+  field bits<48> SoftFail = 0;
-+
-+  bits<5> V1;
-+  bits<5> V2;
-+  bits<5> V3;
-+  bits<5> V4;
-+  bits<4> M5;
-+  bits<4> M6;
-+
-+  let Inst{47-40} = op{15-8};
-+  let Inst{39-36} = V1{3-0};
-+  let Inst{35-32} = V2{3-0};
-+  let Inst{31-28} = V3{3-0};
-+  let Inst{27-24} = M5;
-+  let Inst{23}    = !if (!eq (m6or{3}, 1), 1, M6{3});
-+  let Inst{22}    = !if (!eq (m6or{2}, 1), 1, M6{2});
-+  let Inst{21}    = !if (!eq (m6or{1}, 1), 1, M6{1});
-+  let Inst{20}    = !if (!eq (m6or{0}, 1), 1, M6{0});
-+  let Inst{19-16} = 0;
-+  let Inst{15-12} = V4{3-0};
-+  let Inst{11}    = V1{4};
-+  let Inst{10}    = V2{4};
-+  let Inst{9}     = V3{4};
-+  let Inst{8}     = V4{4};
-+  let Inst{7-0}   = op{7-0};
-+}
-+
-+class InstVRRe<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
-+  : InstSystemZ<6, outs, ins, asmstr, pattern> {
-+  field bits<48> Inst;
-+  field bits<48> SoftFail = 0;
-+
-+  bits<5> V1;
-+  bits<5> V2;
-+  bits<5> V3;
-+  bits<5> V4;
-+  bits<4> M5;
-+  bits<4> M6;
-+
-+  let Inst{47-40} = op{15-8};
-+  let Inst{39-36} = V1{3-0};
-+  let Inst{35-32} = V2{3-0};
-+  let Inst{31-28} = V3{3-0};
-+  let Inst{27-24} = M6;
-+  let Inst{23-20} = 0;
-+  let Inst{19-16} = M5;
-+  let Inst{15-12} = V4{3-0};
-+  let Inst{11}    = V1{4};
-+  let Inst{10}    = V2{4};
-+  let Inst{9}     = V3{4};
-+  let Inst{8}     = V4{4};
-+  let Inst{7-0}   = op{7-0};
-+}
-+
-+class InstVRRf<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
-+  : InstSystemZ<6, outs, ins, asmstr, pattern> {
-+  field bits<48> Inst;
-+  field bits<48> SoftFail = 0;
-+
-+  bits<5> V1;
-+  bits<4> R2;
-+  bits<4> R3;
-+
-+  let Inst{47-40} = op{15-8};
-+  let Inst{39-36} = V1{3-0};
-+  let Inst{35-32} = R2;
-+  let Inst{31-28} = R3;
-+  let Inst{27-12} = 0;
-+  let Inst{11}    = V1{4};
-+  let Inst{10-8}  = 0;
-+  let Inst{7-0}   = op{7-0};
-+}
-+
-+class InstVRSa<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
-+  : InstSystemZ<6, outs, ins, asmstr, pattern> {
-+  field bits<48> Inst;
-+  field bits<48> SoftFail = 0;
-+
-+  bits<5> V1;
-+  bits<16> BD2;
-+  bits<5> V3;
-+  bits<4> M4;
-+
-+  let Inst{47-40} = op{15-8};
-+  let Inst{39-36} = V1{3-0};
-+  let Inst{35-32} = V3{3-0};
-+  let Inst{31-16} = BD2;
-+  let Inst{15-12} = M4;
-+  let Inst{11}    = V1{4};
-+  let Inst{10}    = V3{4};
-+  let Inst{9-8}   = 0;
-+  let Inst{7-0}   = op{7-0};
-+}
-+
-+class InstVRSb<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
-+  : InstSystemZ<6, outs, ins, asmstr, pattern> {
-+  field bits<48> Inst;
-+  field bits<48> SoftFail = 0;
-+
-+  bits<5> V1;
-+  bits<16> BD2;
-+  bits<4> R3;
-+  bits<4> M4;
-+
-+  let Inst{47-40} = op{15-8};
-+  let Inst{39-36} = V1{3-0};
-+  let Inst{35-32} = R3;
-+  let Inst{31-16} = BD2;
-+  let Inst{15-12} = M4;
-+  let Inst{11}    = V1{4};
-+  let Inst{10-8}  = 0;
-+  let Inst{7-0}   = op{7-0};
-+}
-+
-+class InstVRSc<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
-+  : InstSystemZ<6, outs, ins, asmstr, pattern> {
-+  field bits<48> Inst;
-+  field bits<48> SoftFail = 0;
-+
-+  bits<4> R1;
-+  bits<16> BD2;
-+  bits<5> V3;
-+  bits<4> M4;
-+
-+  let Inst{47-40} = op{15-8};
-+  let Inst{39-36} = R1;
-+  let Inst{35-32} = V3{3-0};
-+  let Inst{31-16} = BD2;
-+  let Inst{15-12} = M4;
-+  let Inst{11}    = 0;
-+  let Inst{10}    = V3{4};
-+  let Inst{9-8}   = 0;
-+  let Inst{7-0}   = op{7-0};
-+}
-+
-+class InstVRV<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
-+  : InstSystemZ<6, outs, ins, asmstr, pattern> {
-+  field bits<48> Inst;
-+  field bits<48> SoftFail = 0;
-+
-+  bits<5> V1;
-+  bits<21> VBD2;
-+  bits<4> M3;
-+
-+  let Inst{47-40} = op{15-8};
-+  let Inst{39-36} = V1{3-0};
-+  let Inst{35-16} = VBD2{19-0};
-+  let Inst{15-12} = M3;
-+  let Inst{11}    = V1{4};
-+  let Inst{10}    = VBD2{20};
-+  let Inst{9-8}   = 0;
-+  let Inst{7-0}   = op{7-0};
-+}
-+
-+class InstVRX<bits<16> op, dag outs, dag ins, string asmstr, list<dag> pattern>
-+  : InstSystemZ<6, outs, ins, asmstr, pattern> {
-+  field bits<48> Inst;
-+  field bits<48> SoftFail = 0;
-+
-+  bits<5> V1;
-+  bits<20> XBD2;
-+  bits<4> M3;
-+
-+  let Inst{47-40} = op{15-8};
-+  let Inst{39-36} = V1{3-0};
-+  let Inst{35-16} = XBD2;
-+  let Inst{15-12} = M3;
-+  let Inst{11}    = V1{4};
-+  let Inst{10-8}  = 0;
-+  let Inst{7-0}   = op{7-0};
-+}
-+
- //===----------------------------------------------------------------------===//
- // Instruction definitions with semantics
- //===----------------------------------------------------------------------===//
-@@ -492,12 +884,6 @@ class InstSS<bits<8> op, dag outs, dag i
- //     form of the source register in the destination register and
- //     branches on the result.
- //
--//   Store:
--//     One register or immediate input operand and one address input operand.
--//     The instruction stores the first operand to the address.
--//
--//     This category is used for both pure and truncating stores.
--//
- //   LoadMultiple:
- //     One address input operand and two explicit output operands.
- //     The instruction loads a range of registers from the address,
-@@ -510,18 +896,35 @@ class InstSS<bits<8> op, dag outs, dag i
- //     with the explicit operands giving the first and last register
- //     to store.  Other stored registers are added as implicit uses.
- //
-+//   StoreLength:
-+//     One value operand, one length operand and one address operand.
-+//     The instruction stores the value operand to the address but
-+//     doesn't write more than the number of bytes specified by the
-+//     length operand.
-+//
- //   Unary:
- //     One register output operand and one input operand.
- //
-+//   Store:
-+//     One address operand and one other input operand.  The instruction
-+//     stores to the address.
-+//
- //   Binary:
- //     One register output operand and two input operands.
- //
-+//   StoreBinary:
-+//     One address operand and two other input operands.  The instruction
-+//     stores to the address.
-+//
- //   Compare:
- //     Two input operands and an implicit CC output operand.
- //
- //   Ternary:
- //     One register output operand and three input operands.
- //
-+//   Quaternary:
-+//     One register output operand and four input operands.
-+//
- //   LoadAndOp:
- //     One output operand and two input operands, one of which is an address.
- //     The instruction both reads from and writes to the address.
-@@ -556,6 +959,12 @@ class InherentRRE<string mnemonic, bits<
-   let R2 = 0;
- }
- 
-+class InherentVRIa<string mnemonic, bits<16> opcode, bits<16> value>
-+  : InstVRIa<opcode, (outs VR128:$V1), (ins), mnemonic#"\t$V1", []> {
-+  let I2 = value;
-+  let M3 = 0;
-+}
-+
- class BranchUnaryRI<string mnemonic, bits<12> opcode, RegisterOperand cls>
-   : InstRI<opcode, (outs cls:$R1), (ins cls:$R1src, brtarget16:$I2),
-            mnemonic##"\t$R1, $I2", []> {
-@@ -571,6 +980,13 @@ class LoadMultipleRSY<string mnemonic, b
-   let mayLoad = 1;
- }
- 
-+class LoadMultipleVRSa<string mnemonic, bits<16> opcode>
-+  : InstVRSa<opcode, (outs VR128:$V1, VR128:$V3), (ins bdaddr12only:$BD2),
-+             mnemonic#"\t$V1, $V3, $BD2", []> {
-+  let M4 = 0;
-+  let mayLoad = 1;
-+}
-+
- class StoreRILPC<string mnemonic, bits<12> opcode, SDPatternOperator operator,
-                  RegisterOperand cls>
-   : InstRIL<opcode, (outs), (ins cls:$R1, pcrel32:$I2),
-@@ -619,12 +1035,39 @@ multiclass StoreRXPair<string mnemonic,
-   }
- }
- 
-+class StoreVRX<string mnemonic, bits<16> opcode, SDPatternOperator operator,
-+               TypedReg tr, bits<5> bytes, bits<4> type = 0>
-+  : InstVRX<opcode, (outs), (ins tr.op:$V1, bdxaddr12only:$XBD2),
-+            mnemonic#"\t$V1, $XBD2",
-+            [(set tr.op:$V1, (tr.vt (operator bdxaddr12only:$XBD2)))]> {
-+  let M3 = type;
-+  let mayStore = 1;
-+  let AccessBytes = bytes;
-+}
-+
-+class StoreLengthVRSb<string mnemonic, bits<16> opcode,
-+                      SDPatternOperator operator, bits<5> bytes>
-+  : InstVRSb<opcode, (outs), (ins VR128:$V1, GR32:$R3, bdaddr12only:$BD2),
-+             mnemonic#"\t$V1, $R3, $BD2",
-+             [(operator VR128:$V1, GR32:$R3, bdaddr12only:$BD2)]> {
-+  let M4 = 0;
-+  let mayStore = 1;
-+  let AccessBytes = bytes;
-+}
-+
- class StoreMultipleRSY<string mnemonic, bits<16> opcode, RegisterOperand cls>
-   : InstRSY<opcode, (outs), (ins cls:$R1, cls:$R3, bdaddr20only:$BD2),
-             mnemonic#"\t$R1, $R3, $BD2", []> {
-   let mayStore = 1;
- }
- 
-+class StoreMultipleVRSa<string mnemonic, bits<16> opcode>
-+  : InstVRSa<opcode, (outs), (ins VR128:$V1, VR128:$V3, bdaddr12only:$BD2),
-+             mnemonic#"\t$V1, $V3, $BD2", []> {
-+  let M4 = 0;
-+  let mayStore = 1;
-+}
-+
- // StoreSI* instructions are used to store an integer to memory, but the
- // addresses are more restricted than for normal stores.  If we are in the
- // situation of having to force either the address into a register or the
-@@ -857,6 +1300,7 @@ class UnaryRXE<string mnemonic, bits<16>
-   let OpType = "mem";
-   let mayLoad = 1;
-   let AccessBytes = bytes;
-+  let M3 = 0;
- }
- 
- class UnaryRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
-@@ -883,6 +1327,46 @@ multiclass UnaryRXPair<string mnemonic,
-   }
- }
- 
-+class UnaryVRIa<string mnemonic, bits<16> opcode, SDPatternOperator operator,
-+                TypedReg tr, Immediate imm, bits<4> type = 0>
-+  : InstVRIa<opcode, (outs tr.op:$V1), (ins imm:$I2),
-+             mnemonic#"\t$V1, $I2",
-+             [(set tr.op:$V1, (tr.vt (operator imm:$I2)))]> {
-+  let M3 = type;
-+}
-+
-+class UnaryVRRa<string mnemonic, bits<16> opcode, SDPatternOperator operator,
-+                TypedReg tr1, TypedReg tr2, bits<4> type = 0, bits<4> m4 = 0,
-+                bits<4> m5 = 0>
-+  : InstVRRa<opcode, (outs tr1.op:$V1), (ins tr2.op:$V2),
-+             mnemonic#"\t$V1, $V2",
-+             [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V2))))]> {
-+  let M3 = type;
-+  let M4 = m4;
-+  let M5 = m5;
-+}
-+
-+multiclass UnaryVRRaSPair<string mnemonic, bits<16> opcode,
-+                          SDPatternOperator operator,
-+                          SDPatternOperator operator_cc, TypedReg tr1,
-+                          TypedReg tr2, bits<4> type, bits<4> modifier = 0,
-+                          bits<4> modifier_cc = 1> {
-+  def "" : UnaryVRRa<mnemonic, opcode, operator, tr1, tr2, type, 0, modifier>;
-+  let Defs = [CC] in
-+    def S : UnaryVRRa<mnemonic##"s", opcode, operator_cc, tr1, tr2, type, 0,
-+                      modifier_cc>;
-+}
-+
-+class UnaryVRX<string mnemonic, bits<16> opcode, SDPatternOperator operator,
-+               TypedReg tr, bits<5> bytes, bits<4> type = 0>
-+  : InstVRX<opcode, (outs tr.op:$V1), (ins bdxaddr12only:$XBD2),
-+            mnemonic#"\t$V1, $XBD2",
-+            [(set tr.op:$V1, (tr.vt (operator bdxaddr12only:$XBD2)))]> {
-+  let M3 = type;
-+  let mayLoad = 1;
-+  let AccessBytes = bytes;
-+}
-+
- class BinaryRR<string mnemonic, bits<8> opcode, SDPatternOperator operator,
-                RegisterOperand cls1, RegisterOperand cls2>
-   : InstRR<opcode, (outs cls1:$R1), (ins cls1:$R1src, cls2:$R2),
-@@ -1036,6 +1520,7 @@ class BinaryRXE<string mnemonic, bits<16
-   let DisableEncoding = "$R1src";
-   let mayLoad = 1;
-   let AccessBytes = bytes;
-+  let M3 = 0;
- }
- 
- class BinaryRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
-@@ -1094,6 +1579,148 @@ multiclass BinarySIPair<string mnemonic,
-   }
- }
- 
-+class BinaryVRIb<string mnemonic, bits<16> opcode, SDPatternOperator operator,
-+                 TypedReg tr, bits<4> type>
-+  : InstVRIb<opcode, (outs tr.op:$V1), (ins imm32zx8:$I2, imm32zx8:$I3),
-+             mnemonic#"\t$V1, $I2, $I3",
-+             [(set tr.op:$V1, (tr.vt (operator imm32zx8:$I2, imm32zx8:$I3)))]> {
-+  let M4 = type;
-+}
-+
-+class BinaryVRIc<string mnemonic, bits<16> opcode, SDPatternOperator operator,
-+                 TypedReg tr1, TypedReg tr2, bits<4> type>
-+  : InstVRIc<opcode, (outs tr1.op:$V1), (ins tr2.op:$V3, imm32zx16:$I2),
-+             mnemonic#"\t$V1, $V3, $I2",
-+             [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V3),
-+                                                 imm32zx16:$I2)))]> {
-+  let M4 = type;
-+}
-+
-+class BinaryVRIe<string mnemonic, bits<16> opcode, SDPatternOperator operator,
-+                 TypedReg tr1, TypedReg tr2, bits<4> type, bits<4> m5>
-+  : InstVRIe<opcode, (outs tr1.op:$V1), (ins tr2.op:$V2, imm32zx12:$I3),
-+             mnemonic#"\t$V1, $V2, $I3",
-+             [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V2),
-+                                                 imm32zx12:$I3)))]> {
-+  let M4 = type;
-+  let M5 = m5;
-+}
-+
-+class BinaryVRRa<string mnemonic, bits<16> opcode>
-+  : InstVRRa<opcode, (outs VR128:$V1), (ins VR128:$V2, imm32zx4:$M3),
-+             mnemonic#"\t$V1, $V2, $M3", []> {
-+  let M4 = 0;
-+  let M5 = 0;
-+}
-+
-+class BinaryVRRb<string mnemonic, bits<16> opcode, SDPatternOperator operator,
-+                 TypedReg tr1, TypedReg tr2, bits<4> type = 0,
-+                 bits<4> modifier = 0>
-+  : InstVRRb<opcode, (outs tr1.op:$V1), (ins tr2.op:$V2, tr2.op:$V3),
-+             mnemonic#"\t$V1, $V2, $V3",
-+             [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V2),
-+                                                 (tr2.vt tr2.op:$V3))))]> {
-+  let M4 = type;
-+  let M5 = modifier;
-+}
-+
-+// Declare a pair of instructions, one which sets CC and one which doesn't.
-+// The CC-setting form ends with "S" and sets the low bit of M5.
-+multiclass BinaryVRRbSPair<string mnemonic, bits<16> opcode,
-+                           SDPatternOperator operator,
-+                           SDPatternOperator operator_cc, TypedReg tr1,
-+                           TypedReg tr2, bits<4> type,
-+                           bits<4> modifier = 0, bits<4> modifier_cc = 1> {
-+  def "" : BinaryVRRb<mnemonic, opcode, operator, tr1, tr2, type, modifier>;
-+  let Defs = [CC] in
-+    def S : BinaryVRRb<mnemonic##"s", opcode, operator_cc, tr1, tr2, type,
-+                       modifier_cc>;
-+}
-+
-+class BinaryVRRc<string mnemonic, bits<16> opcode, SDPatternOperator operator,
-+                 TypedReg tr1, TypedReg tr2, bits<4> type = 0, bits<4> m5 = 0,
-+                 bits<4> m6 = 0>
-+  : InstVRRc<opcode, (outs tr1.op:$V1), (ins tr2.op:$V2, tr2.op:$V3),
-+             mnemonic#"\t$V1, $V2, $V3",
-+             [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V2),
-+                                                 (tr2.vt tr2.op:$V3))))]> {
-+  let M4 = type;
-+  let M5 = m5;
-+  let M6 = m6;
-+}
-+
-+multiclass BinaryVRRcSPair<string mnemonic, bits<16> opcode,
-+                           SDPatternOperator operator,
-+                           SDPatternOperator operator_cc, TypedReg tr1,
-+                           TypedReg tr2, bits<4> type, bits<4> m5,
-+                           bits<4> modifier = 0, bits<4> modifier_cc = 1> {
-+  def "" : BinaryVRRc<mnemonic, opcode, operator, tr1, tr2, type, m5, modifier>;
-+  let Defs = [CC] in
-+    def S : BinaryVRRc<mnemonic##"s", opcode, operator_cc, tr1, tr2, type,
-+                       m5, modifier_cc>;
-+}
-+
-+class BinaryVRRf<string mnemonic, bits<16> opcode, SDPatternOperator operator,
-+                 TypedReg tr>
-+  : InstVRRf<opcode, (outs tr.op:$V1), (ins GR64:$R2, GR64:$R3),
-+             mnemonic#"\t$V1, $R2, $R3",
-+             [(set tr.op:$V1, (tr.vt (operator GR64:$R2, GR64:$R3)))]>;
-+
-+class BinaryVRSa<string mnemonic, bits<16> opcode, SDPatternOperator operator,
-+                 TypedReg tr1, TypedReg tr2, bits<4> type>
-+  : InstVRSa<opcode, (outs tr1.op:$V1), (ins tr2.op:$V3, shift12only:$BD2),
-+             mnemonic#"\t$V1, $V3, $BD2",
-+             [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V3),
-+                                                 shift12only:$BD2)))]> {
-+  let M4 = type;
-+}
-+
-+class BinaryVRSb<string mnemonic, bits<16> opcode, SDPatternOperator operator,
-+                 bits<5> bytes>
-+  : InstVRSb<opcode, (outs VR128:$V1), (ins GR32:$R3, bdaddr12only:$BD2),
-+             mnemonic#"\t$V1, $R3, $BD2",
-+             [(set VR128:$V1, (operator GR32:$R3, bdaddr12only:$BD2))]> {
-+  let M4 = 0;
-+  let mayLoad = 1;
-+  let AccessBytes = bytes;
-+}
-+
-+class BinaryVRSc<string mnemonic, bits<16> opcode, SDPatternOperator operator,
-+                 TypedReg tr, bits<4> type>
-+  : InstVRSc<opcode, (outs GR64:$R1), (ins tr.op:$V3, shift12only:$BD2),
-+           mnemonic#"\t$R1, $V3, $BD2",
-+           [(set GR64:$R1, (operator (tr.vt tr.op:$V3), shift12only:$BD2))]> {
-+  let M4 = type;
-+}
-+
-+class BinaryVRX<string mnemonic, bits<16> opcode, SDPatternOperator operator,
-+                TypedReg tr, bits<5> bytes>
-+  : InstVRX<opcode, (outs VR128:$V1), (ins bdxaddr12only:$XBD2, imm32zx4:$M3),
-+            mnemonic#"\t$V1, $XBD2, $M3",
-+            [(set tr.op:$V1, (tr.vt (operator bdxaddr12only:$XBD2,
-+                                              imm32zx4:$M3)))]> {
-+  let mayLoad = 1;
-+  let AccessBytes = bytes;
-+}
-+
-+class StoreBinaryVRV<string mnemonic, bits<16> opcode, bits<5> bytes,
-+                     Immediate index>
-+  : InstVRV<opcode, (outs), (ins VR128:$V1, bdvaddr12only:$VBD2, index:$M3),
-+            mnemonic#"\t$V1, $VBD2, $M3", []> {
-+  let mayStore = 1;
-+  let AccessBytes = bytes;
-+}
-+
-+class StoreBinaryVRX<string mnemonic, bits<16> opcode,
-+                     SDPatternOperator operator, TypedReg tr, bits<5> bytes,
-+                     Immediate index>
-+  : InstVRX<opcode, (outs), (ins tr.op:$V1, bdxaddr12only:$XBD2, index:$M3),
-+            mnemonic#"\t$V1, $XBD2, $M3",
-+            [(operator (tr.vt tr.op:$V1), bdxaddr12only:$XBD2, index:$M3)]> {
-+  let mayStore = 1;
-+  let AccessBytes = bytes;
-+}
-+
- class CompareRR<string mnemonic, bits<8> opcode, SDPatternOperator operator,
-                 RegisterOperand cls1, RegisterOperand cls2>
-   : InstRR<opcode, (outs), (ins cls1:$R1, cls2:$R2),
-@@ -1166,6 +1793,7 @@ class CompareRXE<string mnemonic, bits<1
-   let isCompare = 1;
-   let mayLoad = 1;
-   let AccessBytes = bytes;
-+  let M3 = 0;
- }
- 
- class CompareRXY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
-@@ -1235,6 +1863,17 @@ multiclass CompareSIPair<string mnemonic
-   }
- }
- 
-+class CompareVRRa<string mnemonic, bits<16> opcode, SDPatternOperator operator,
-+                  TypedReg tr, bits<4> type>
-+  : InstVRRa<opcode, (outs), (ins tr.op:$V1, tr.op:$V2),
-+             mnemonic#"\t$V1, $V2",
-+             [(operator (tr.vt tr.op:$V1), (tr.vt tr.op:$V2))]> {
-+  let isCompare = 1;
-+  let M3 = type;
-+  let M4 = 0;
-+  let M5 = 0;
-+}
-+
- class TernaryRRD<string mnemonic, bits<16> opcode,
-                  SDPatternOperator operator, RegisterOperand cls>
-   : InstRRD<opcode, (outs cls:$R1), (ins cls:$R1src, cls:$R3, cls:$R2),
-@@ -1261,6 +1900,188 @@ class TernaryRXF<string mnemonic, bits<1
-   let AccessBytes = bytes;
- }
- 
-+class TernaryVRIa<string mnemonic, bits<16> opcode, SDPatternOperator operator,
-+                  TypedReg tr1, TypedReg tr2, Immediate imm, Immediate index>
-+  : InstVRIa<opcode, (outs tr1.op:$V1), (ins tr2.op:$V1src, imm:$I2, index:$M3),
-+             mnemonic#"\t$V1, $I2, $M3",
-+             [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V1src),
-+                                                 imm:$I2, index:$M3)))]> {
-+  let Constraints = "$V1 = $V1src";
-+  let DisableEncoding = "$V1src";
-+}
-+
-+class TernaryVRId<string mnemonic, bits<16> opcode, SDPatternOperator operator,
-+                  TypedReg tr1, TypedReg tr2, bits<4> type>
-+  : InstVRId<opcode, (outs tr1.op:$V1),
-+             (ins tr2.op:$V2, tr2.op:$V3, imm32zx8:$I4),
-+             mnemonic#"\t$V1, $V2, $V3, $I4",
-+             [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V2),
-+                                                 (tr2.vt tr2.op:$V3),
-+                                                 imm32zx8:$I4)))]> {
-+  let M5 = type;
-+}
-+
-+class TernaryVRRa<string mnemonic, bits<16> opcode, SDPatternOperator operator,
-+                  TypedReg tr1, TypedReg tr2, bits<4> type, bits<4> m4or>
-+  : InstVRRa<opcode, (outs tr1.op:$V1),
-+             (ins tr2.op:$V2, imm32zx4:$M4, imm32zx4:$M5),
-+             mnemonic#"\t$V1, $V2, $M4, $M5",
-+             [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V2),
-+                                                 imm32zx4:$M4,
-+                                                 imm32zx4:$M5)))],
-+             m4or> {
-+  let M3 = type;
-+}
-+
-+class TernaryVRRb<string mnemonic, bits<16> opcode, SDPatternOperator operator,
-+                  TypedReg tr1, TypedReg tr2, bits<4> type,
-+                  SDPatternOperator m5mask, bits<4> m5or>
-+  : InstVRRb<opcode, (outs tr1.op:$V1),
-+             (ins tr2.op:$V2, tr2.op:$V3, m5mask:$M5),
-+             mnemonic#"\t$V1, $V2, $V3, $M5",
-+             [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V2),
-+                                                 (tr2.vt tr2.op:$V3),
-+                                                 m5mask:$M5)))],
-+             m5or> {
-+  let M4 = type;
-+}
-+
-+multiclass TernaryVRRbSPair<string mnemonic, bits<16> opcode,
-+                            SDPatternOperator operator,
-+                            SDPatternOperator operator_cc, TypedReg tr1,
-+                            TypedReg tr2, bits<4> type, bits<4> m5or> {
-+  def "" : TernaryVRRb<mnemonic, opcode, operator, tr1, tr2, type,
-+                       imm32zx4even, !and (m5or, 14)>;
-+  def : InstAlias<mnemonic#"\t$V1, $V2, $V3",
-+                  (!cast<Instruction>(NAME) tr1.op:$V1, tr2.op:$V2,
-+                                            tr2.op:$V3, 0)>;
-+  let Defs = [CC] in
-+    def S : TernaryVRRb<mnemonic##"s", opcode, operator_cc, tr1, tr2, type,
-+                        imm32zx4even, !add(!and (m5or, 14), 1)>;
-+  def : InstAlias<mnemonic#"s\t$V1, $V2, $V3",
-+                  (!cast<Instruction>(NAME#"S") tr1.op:$V1, tr2.op:$V2,
-+                                                tr2.op:$V3, 0)>;
-+}
-+
-+class TernaryVRRc<string mnemonic, bits<16> opcode, SDPatternOperator operator,
-+                  TypedReg tr1, TypedReg tr2>
-+  : InstVRRc<opcode, (outs tr1.op:$V1),
-+             (ins tr2.op:$V2, tr2.op:$V3, imm32zx4:$M4),
-+             mnemonic#"\t$V1, $V2, $V3, $M4",
-+             [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V2),
-+                                                 (tr2.vt tr2.op:$V3),
-+                                                 imm32zx4:$M4)))]> {
-+  let M5 = 0;
-+  let M6 = 0;
-+}
-+
-+class TernaryVRRd<string mnemonic, bits<16> opcode, SDPatternOperator operator,
-+                  TypedReg tr1, TypedReg tr2, bits<4> type = 0>
-+  : InstVRRd<opcode, (outs tr1.op:$V1),
-+             (ins tr2.op:$V2, tr2.op:$V3, tr1.op:$V4),
-+             mnemonic#"\t$V1, $V2, $V3, $V4",
-+             [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V2),
-+                                                 (tr2.vt tr2.op:$V3),
-+                                                 (tr1.vt tr1.op:$V4))))]> {
-+  let M5 = type;
-+  let M6 = 0;
-+}
-+
-+class TernaryVRRe<string mnemonic, bits<16> opcode, SDPatternOperator operator,
-+                  TypedReg tr1, TypedReg tr2, bits<4> m5 = 0, bits<4> type = 0>
-+  : InstVRRe<opcode, (outs tr1.op:$V1),
-+             (ins tr2.op:$V2, tr2.op:$V3, tr1.op:$V4),
-+             mnemonic#"\t$V1, $V2, $V3, $V4",
-+             [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V2),
-+                                                 (tr2.vt tr2.op:$V3),
-+                                                 (tr1.vt tr1.op:$V4))))]> {
-+  let M5 = m5;
-+  let M6 = type;
-+}
-+
-+class TernaryVRSb<string mnemonic, bits<16> opcode, SDPatternOperator operator,
-+                  TypedReg tr1, TypedReg tr2, RegisterOperand cls, bits<4> type>
-+  : InstVRSb<opcode, (outs tr1.op:$V1),
-+             (ins tr2.op:$V1src, cls:$R3, shift12only:$BD2),
-+             mnemonic#"\t$V1, $R3, $BD2",
-+             [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V1src),
-+                                                 cls:$R3,
-+                                                 shift12only:$BD2)))]> {
-+  let Constraints = "$V1 = $V1src";
-+  let DisableEncoding = "$V1src";
-+  let M4 = type;
-+}
-+
-+class TernaryVRV<string mnemonic, bits<16> opcode, bits<5> bytes,
-+                 Immediate index>
-+  : InstVRV<opcode, (outs VR128:$V1),
-+           (ins VR128:$V1src, bdvaddr12only:$VBD2, index:$M3),
-+           mnemonic#"\t$V1, $VBD2, $M3", []> {
-+  let Constraints = "$V1 = $V1src";
-+  let DisableEncoding = "$V1src";
-+  let mayLoad = 1;
-+  let AccessBytes = bytes;
-+}
-+
-+class TernaryVRX<string mnemonic, bits<16> opcode, SDPatternOperator operator,
-+                 TypedReg tr1, TypedReg tr2, bits<5> bytes, Immediate index>
-+  : InstVRX<opcode, (outs tr1.op:$V1),
-+           (ins tr2.op:$V1src, bdxaddr12only:$XBD2, index:$M3),
-+           mnemonic#"\t$V1, $XBD2, $M3",
-+           [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V1src),
-+                                               bdxaddr12only:$XBD2,
-+                                               index:$M3)))]> {
-+  let Constraints = "$V1 = $V1src";
-+  let DisableEncoding = "$V1src";
-+  let mayLoad = 1;
-+  let AccessBytes = bytes;
-+}
-+
-+class QuaternaryVRId<string mnemonic, bits<16> opcode, SDPatternOperator operator,
-+                     TypedReg tr1, TypedReg tr2, bits<4> type>
-+  : InstVRId<opcode, (outs tr1.op:$V1),
-+             (ins tr2.op:$V1src, tr2.op:$V2, tr2.op:$V3, imm32zx8:$I4),
-+             mnemonic#"\t$V1, $V2, $V3, $I4",
-+             [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V1src),
-+                                                 (tr2.vt tr2.op:$V2),
-+                                                 (tr2.vt tr2.op:$V3),
-+                                                 imm32zx8:$I4)))]> {
-+  let Constraints = "$V1 = $V1src";
-+  let DisableEncoding = "$V1src";
-+  let M5 = type;
-+}
-+
-+class QuaternaryVRRd<string mnemonic, bits<16> opcode,
-+                     SDPatternOperator operator, TypedReg tr1, TypedReg tr2,
-+                     bits<4> type, SDPatternOperator m6mask, bits<4> m6or>
-+  : InstVRRd<opcode, (outs tr1.op:$V1),
-+             (ins tr2.op:$V2, tr2.op:$V3, tr2.op:$V4, m6mask:$M6),
-+             mnemonic#"\t$V1, $V2, $V3, $V4, $M6",
-+             [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V2),
-+                                                 (tr2.vt tr2.op:$V3),
-+                                                 (tr2.vt tr2.op:$V4),
-+                                                 m6mask:$M6)))],
-+             m6or> {
-+  let M5 = type;
-+}
-+
-+multiclass QuaternaryVRRdSPair<string mnemonic, bits<16> opcode,
-+                               SDPatternOperator operator,
-+                               SDPatternOperator operator_cc, TypedReg tr1,
-+                               TypedReg tr2, bits<4> type, bits<4> m6or> {
-+  def "" : QuaternaryVRRd<mnemonic, opcode, operator, tr1, tr2, type,
-+                          imm32zx4even, !and (m6or, 14)>;
-+  def : InstAlias<mnemonic#"\t$V1, $V2, $V3, $V4",
-+                  (!cast<Instruction>(NAME) tr1.op:$V1, tr2.op:$V2,
-+                                            tr2.op:$V3, tr2.op:$V4, 0)>;
-+  let Defs = [CC] in
-+    def S : QuaternaryVRRd<mnemonic##"s", opcode, operator_cc, tr1, tr2, type,
-+                           imm32zx4even, !add (!and (m6or, 14), 1)>;
-+  def : InstAlias<mnemonic#"s\t$V1, $V2, $V3, $V4",
-+                  (!cast<Instruction>(NAME#"S") tr1.op:$V1, tr2.op:$V2,
-+                                                tr2.op:$V3, tr2.op:$V4, 0)>;
-+}
-+
- class LoadAndOpRSY<string mnemonic, bits<16> opcode, SDPatternOperator operator,
-                   RegisterOperand cls, AddressingMode mode = bdaddr20only>
-   : InstRSY<opcode, (outs cls:$R1), (ins cls:$R3, mode:$BD2),
-@@ -1330,10 +2151,13 @@ class PrefetchRILPC<string mnemonic, bit
- 
- // A floating-point load-and test operation.  Create both a normal unary
- // operation and one that acts as a comparison against zero.
-+// Note that the comparison against zero operation is not available if we
-+// have vector support, since load-and-test instructions will partially
-+// clobber the target (vector) register.
- multiclass LoadAndTestRRE<string mnemonic, bits<16> opcode,
-                           RegisterOperand cls> {
-   def "" : UnaryRRE<mnemonic, opcode, null_frag, cls, cls>;
--  let isCodeGenOnly = 1 in
-+  let isCodeGenOnly = 1, Predicates = [FeatureNoVector] in
-     def Compare : CompareRRE<mnemonic, opcode, null_frag, cls, cls>;
- }
- 
-@@ -1577,6 +2401,26 @@ class Alias<int size, dag outs, dag ins,
-   let isCodeGenOnly = 1;
- }
- 
-+class UnaryAliasVRS<RegisterOperand cls1, RegisterOperand cls2>
-+ : Alias<6, (outs cls1:$src1), (ins cls2:$src2), []>;
-+
-+// An alias of a UnaryVRR*, but with different register sizes.
-+class UnaryAliasVRR<SDPatternOperator operator, TypedReg tr1, TypedReg tr2>
-+  : Alias<6, (outs tr1.op:$V1), (ins tr2.op:$V2),
-+          [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V2))))]>;
-+
-+// An alias of a UnaryVRX, but with different register sizes.
-+class UnaryAliasVRX<SDPatternOperator operator, TypedReg tr,
-+                    AddressingMode mode = bdxaddr12only>
-+  : Alias<6, (outs tr.op:$V1), (ins mode:$XBD2),
-+          [(set tr.op:$V1, (tr.vt (operator mode:$XBD2)))]>;
-+
-+// An alias of a StoreVRX, but with different register sizes.
-+class StoreAliasVRX<SDPatternOperator operator, TypedReg tr,
-+                    AddressingMode mode = bdxaddr12only>
-+  : Alias<6, (outs), (ins tr.op:$V1, mode:$XBD2),
-+          [(operator (tr.vt tr.op:$V1), mode:$XBD2)]>;
-+
- // An alias of a BinaryRI, but with different register sizes.
- class BinaryAliasRI<SDPatternOperator operator, RegisterOperand cls,
-                     Immediate imm>
-@@ -1593,6 +2437,10 @@ class BinaryAliasRIL<SDPatternOperator o
-   let Constraints = "$R1 = $R1src";
- }
- 
-+// An alias of a BinaryVRRf, but with different register sizes.
-+class BinaryAliasVRRf<RegisterOperand cls>
-+  : Alias<6, (outs VR128:$V1), (ins cls:$R2, cls:$R3), []>;
-+
- // An alias of a CompareRI, but with different register sizes.
- class CompareAliasRI<SDPatternOperator operator, RegisterOperand cls,
-                      Immediate imm>
-Index: llvm-36/lib/Target/SystemZ/SystemZInstrInfo.cpp
-===================================================================
---- llvm-36.orig/lib/Target/SystemZ/SystemZInstrInfo.cpp
-+++ llvm-36/lib/Target/SystemZ/SystemZInstrInfo.cpp
-@@ -578,6 +578,12 @@ SystemZInstrInfo::copyPhysReg(MachineBas
-     Opcode = SystemZ::LDR;
-   else if (SystemZ::FP128BitRegClass.contains(DestReg, SrcReg))
-     Opcode = SystemZ::LXR;
-+  else if (SystemZ::VR32BitRegClass.contains(DestReg, SrcReg))
-+    Opcode = SystemZ::VLR32;
-+  else if (SystemZ::VR64BitRegClass.contains(DestReg, SrcReg))
-+    Opcode = SystemZ::VLR64;
-+  else if (SystemZ::VR128BitRegClass.contains(DestReg, SrcReg))
-+    Opcode = SystemZ::VLR;
-   else
-     llvm_unreachable("Impossible reg-to-reg copy");
- 
-@@ -723,9 +729,12 @@ SystemZInstrInfo::convertToThreeAddress(
-     unsigned Start, End;
-     if (isRxSBGMask(Imm, And.RegSize, Start, End)) {
-       unsigned NewOpcode;
--      if (And.RegSize == 64)
-+      if (And.RegSize == 64) {
-         NewOpcode = SystemZ::RISBG;
--      else {
-+        // Prefer RISBGN if available, since it does not clobber CC.
-+        if (STI.hasMiscellaneousExtensions())
-+          NewOpcode = SystemZ::RISBGN;
-+      } else {
-         NewOpcode = SystemZ::RISBMux;
-         Start &= 31;
-         End &= 31;
-@@ -1114,6 +1123,16 @@ void SystemZInstrInfo::getLoadStoreOpcod
-   } else if (RC == &SystemZ::FP128BitRegClass) {
-     LoadOpcode = SystemZ::LX;
-     StoreOpcode = SystemZ::STX;
-+  } else if (RC == &SystemZ::VR32BitRegClass) {
-+    LoadOpcode = SystemZ::VL32;
-+    StoreOpcode = SystemZ::VST32;
-+  } else if (RC == &SystemZ::VR64BitRegClass) {
-+    LoadOpcode = SystemZ::VL64;
-+    StoreOpcode = SystemZ::VST64;
-+  } else if (RC == &SystemZ::VF128BitRegClass ||
-+             RC == &SystemZ::VR128BitRegClass) {
-+    LoadOpcode = SystemZ::VL;
-+    StoreOpcode = SystemZ::VST;
-   } else
-     llvm_unreachable("Unsupported regclass to load or store");
- }
-@@ -1147,17 +1166,22 @@ unsigned SystemZInstrInfo::getOpcodeForO
- 
- unsigned SystemZInstrInfo::getLoadAndTest(unsigned Opcode) const {
-   switch (Opcode) {
--  case SystemZ::L:    return SystemZ::LT;
--  case SystemZ::LY:   return SystemZ::LT;
--  case SystemZ::LG:   return SystemZ::LTG;
--  case SystemZ::LGF:  return SystemZ::LTGF;
--  case SystemZ::LR:   return SystemZ::LTR;
--  case SystemZ::LGFR: return SystemZ::LTGFR;
--  case SystemZ::LGR:  return SystemZ::LTGR;
--  case SystemZ::LER:  return SystemZ::LTEBR;
--  case SystemZ::LDR:  return SystemZ::LTDBR;
--  case SystemZ::LXR:  return SystemZ::LTXBR;
--  default:            return 0;
-+  case SystemZ::L:      return SystemZ::LT;
-+  case SystemZ::LY:     return SystemZ::LT;
-+  case SystemZ::LG:     return SystemZ::LTG;
-+  case SystemZ::LGF:    return SystemZ::LTGF;
-+  case SystemZ::LR:     return SystemZ::LTR;
-+  case SystemZ::LGFR:   return SystemZ::LTGFR;
-+  case SystemZ::LGR:    return SystemZ::LTGR;
-+  case SystemZ::LER:    return SystemZ::LTEBR;
-+  case SystemZ::LDR:    return SystemZ::LTDBR;
-+  case SystemZ::LXR:    return SystemZ::LTXBR;
-+  // On zEC12 we prefer to use RISBGN.  But if there is a chance to
-+  // actually use the condition code, we may turn it back into RISGB.
-+  // Note that RISBG is not really a "load-and-test" instruction,
-+  // but sets the same condition code values, so is OK to use here.
-+  case SystemZ::RISBGN: return SystemZ::RISBG;
-+  default:              return 0;
-   }
- }
- 
-@@ -1178,6 +1202,7 @@ static bool isStringOfOnes(uint64_t Mask
- bool SystemZInstrInfo::isRxSBGMask(uint64_t Mask, unsigned BitSize,
-                                    unsigned &Start, unsigned &End) const {
-   // Reject trivial all-zero masks.
-+  Mask &= allOnes(BitSize);
-   if (Mask == 0)
-     return false;
- 
-Index: llvm-36/lib/Target/SystemZ/SystemZInstrInfo.h
-===================================================================
---- llvm-36.orig/lib/Target/SystemZ/SystemZInstrInfo.h
-+++ llvm-36/lib/Target/SystemZ/SystemZInstrInfo.h
-@@ -56,10 +56,13 @@ static inline unsigned getCompareZeroCCM
- // SystemZ MachineOperand target flags.
- enum {
-   // Masks out the bits for the access model.
--  MO_SYMBOL_MODIFIER = (1 << 0),
-+  MO_SYMBOL_MODIFIER = (3 << 0),
- 
-   // @GOT (aka @GOTENT)
--  MO_GOT = (1 << 0)
-+  MO_GOT = (1 << 0),
-+
-+  // @INDNTPOFF
-+  MO_INDNTPOFF = (2 << 0)
- };
- // Classifies a branch.
- enum BranchType {
-Index: llvm-36/lib/Target/SystemZ/SystemZInstrInfo.td
-===================================================================
---- llvm-36.orig/lib/Target/SystemZ/SystemZInstrInfo.td
-+++ llvm-36/lib/Target/SystemZ/SystemZInstrInfo.td
-@@ -249,11 +249,21 @@ let isCall = 1, isTerminator = 1, isRetu
-     def CallBR : Alias<2, (outs), (ins), [(z_sibcall R1D)]>;
- }
- 
-+// TLS calls.  These will be lowered into a call to __tls_get_offset,
-+// with an extra relocation specifying the TLS symbol.
-+let isCall = 1, Defs = [R14D, CC] in {
-+  def TLS_GDCALL : Alias<6, (outs), (ins tlssym:$I2, variable_ops),
-+                         [(z_tls_gdcall tglobaltlsaddr:$I2)]>;
-+  def TLS_LDCALL : Alias<6, (outs), (ins tlssym:$I2, variable_ops),
-+                         [(z_tls_ldcall tglobaltlsaddr:$I2)]>;
-+}
-+
- // Define the general form of the call instructions for the asm parser.
- // These instructions don't hard-code %r14 as the return address register.
--def BRAS  : InstRI<0xA75, (outs), (ins GR64:$R1, brtarget16:$I2),
-+// Allow an optional TLS marker symbol to generate TLS call relocations.
-+def BRAS  : InstRI<0xA75, (outs), (ins GR64:$R1, brtarget16tls:$I2),
-                    "bras\t$R1, $I2", []>;
--def BRASL : InstRIL<0xC05, (outs), (ins GR64:$R1, brtarget32:$I2),
-+def BRASL : InstRIL<0xC05, (outs), (ins GR64:$R1, brtarget32tls:$I2),
-                     "brasl\t$R1, $I2", []>;
- def BASR  : InstRR<0x0D, (outs), (ins GR64:$R1, ADDR64:$R2),
-                    "basr\t$R1, $R2", []>;
-@@ -587,6 +597,12 @@ let hasSideEffects = 0, isAsCheapAsAMove
-                      [(set GR64:$R1, pcrel32:$I2)]>;
- }
- 
-+// Load the Global Offset Table address.  This will be lowered into a
-+//     larl $R1, _GLOBAL_OFFSET_TABLE_
-+// instruction.
-+def GOT : Alias<6, (outs GR64:$R1), (ins),
-+                [(set GR64:$R1, (global_offset_table))]>;
-+
- //===----------------------------------------------------------------------===//
- // Absolute and Negation
- //===----------------------------------------------------------------------===//
-@@ -1045,6 +1061,10 @@ let Defs = [CC] in {
-     def RISBG : RotateSelectRIEf<"risbg", 0xEC55, GR64, GR64>;
- }
- 
-+// On zEC12 we have a variant of RISBG that does not set CC.
-+let Predicates = [FeatureMiscellaneousExtensions] in
-+  def RISBGN : RotateSelectRIEf<"risbgn", 0xEC59, GR64, GR64>;
-+
- // Forms of RISBG that only affect one word of the destination register.
- // They do not set CC.
- let Predicates = [FeatureHighWord] in {
-@@ -1342,6 +1362,60 @@ let Defs = [CC] in {
- }
- 
- //===----------------------------------------------------------------------===//
-+// Transactional execution
-+//===----------------------------------------------------------------------===//
-+
-+let Predicates = [FeatureTransactionalExecution] in {
-+  // Transaction Begin
-+  let hasSideEffects = 1, mayStore = 1,
-+      usesCustomInserter = 1, Defs = [CC] in {
-+    def TBEGIN : InstSIL<0xE560,
-+                         (outs), (ins bdaddr12only:$BD1, imm32zx16:$I2),
-+                         "tbegin\t$BD1, $I2",
-+                         [(z_tbegin bdaddr12only:$BD1, imm32zx16:$I2)]>;
-+    def TBEGIN_nofloat : Pseudo<(outs), (ins bdaddr12only:$BD1, imm32zx16:$I2),
-+                                [(z_tbegin_nofloat bdaddr12only:$BD1,
-+                                                   imm32zx16:$I2)]>;
-+    def TBEGINC : InstSIL<0xE561,
-+                          (outs), (ins bdaddr12only:$BD1, imm32zx16:$I2),
-+                          "tbeginc\t$BD1, $I2",
-+                          [(int_s390_tbeginc bdaddr12only:$BD1,
-+                                             imm32zx16:$I2)]>;
-+  }
-+
-+  // Transaction End
-+  let hasSideEffects = 1, Defs = [CC], BD2 = 0 in
-+    def TEND : InstS<0xB2F8, (outs), (ins), "tend", [(z_tend)]>;
-+
-+  // Transaction Abort
-+  let hasSideEffects = 1, isTerminator = 1, isBarrier = 1 in
-+    def TABORT : InstS<0xB2FC, (outs), (ins bdaddr12only:$BD2),
-+                       "tabort\t$BD2",
-+                       [(int_s390_tabort bdaddr12only:$BD2)]>;
-+
-+  // Nontransactional Store
-+  let hasSideEffects = 1 in
-+    def NTSTG : StoreRXY<"ntstg", 0xE325, int_s390_ntstg, GR64, 8>;
-+
-+  // Extract Transaction Nesting Depth
-+  let hasSideEffects = 1 in
-+    def ETND : InherentRRE<"etnd", 0xB2EC, GR32, (int_s390_etnd)>;
-+}
-+
-+//===----------------------------------------------------------------------===//
-+// Processor assist
-+//===----------------------------------------------------------------------===//
-+
-+let Predicates = [FeatureProcessorAssist] in {
-+  let hasSideEffects = 1, R4 = 0 in
-+    def PPA : InstRRF<0xB2E8, (outs), (ins GR64:$R1, GR64:$R2, imm32zx4:$R3),
-+                      "ppa\t$R1, $R2, $R3", []>;
-+  def : Pat<(int_s390_ppa_txassist GR32:$src),
-+            (PPA (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, subreg_l32),
-+                 0, 1)>;
-+}
-+
-+//===----------------------------------------------------------------------===//
- // Miscellaneous Instructions.
- //===----------------------------------------------------------------------===//
- 
-@@ -1366,6 +1440,13 @@ let Defs = [CC] in {
- def : Pat<(ctlz GR64:$src),
-           (EXTRACT_SUBREG (FLOGR GR64:$src), subreg_h64)>;
- 
-+// Population count.  Counts bits set per byte.
-+let Predicates = [FeaturePopulationCount], Defs = [CC] in {
-+  def POPCNT : InstRRE<0xB9E1, (outs GR64:$R1), (ins GR64:$R2),
-+                       "popcnt\t$R1, $R2",
-+                       [(set GR64:$R1, (z_popcnt GR64:$R2))]>;
-+}
-+
- // Use subregs to populate the "don't care" bits in a 32-bit to 64-bit anyext.
- def : Pat<(i64 (anyext GR32:$src)),
-           (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, subreg_l32)>;
-Index: llvm-36/lib/Target/SystemZ/SystemZInstrVector.td
-===================================================================
---- /dev/null
-+++ llvm-36/lib/Target/SystemZ/SystemZInstrVector.td
-@@ -0,0 +1,1097 @@
-+//==- SystemZInstrVector.td - SystemZ Vector instructions ------*- tblgen-*-==//
-+//
-+//                     The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+
-+//===----------------------------------------------------------------------===//
-+// Move instructions
-+//===----------------------------------------------------------------------===//
-+
-+let Predicates = [FeatureVector] in {
-+  // Register move.
-+  def VLR : UnaryVRRa<"vlr", 0xE756, null_frag, v128any, v128any>;
-+  def VLR32 : UnaryAliasVRR<null_frag, v32eb, v32eb>;
-+  def VLR64 : UnaryAliasVRR<null_frag, v64db, v64db>;
-+
-+  // Load GR from VR element.
-+  def VLGVB : BinaryVRSc<"vlgvb", 0xE721, null_frag, v128b, 0>;
-+  def VLGVH : BinaryVRSc<"vlgvh", 0xE721, null_frag, v128h, 1>;
-+  def VLGVF : BinaryVRSc<"vlgvf", 0xE721, null_frag, v128f, 2>;
-+  def VLGVG : BinaryVRSc<"vlgvg", 0xE721, z_vector_extract, v128g, 3>;
-+
-+  // Load VR element from GR.
-+  def VLVGB : TernaryVRSb<"vlvgb", 0xE722, z_vector_insert,
-+                          v128b, v128b, GR32, 0>;
-+  def VLVGH : TernaryVRSb<"vlvgh", 0xE722, z_vector_insert,
-+                          v128h, v128h, GR32, 1>;
-+  def VLVGF : TernaryVRSb<"vlvgf", 0xE722, z_vector_insert,
-+                          v128f, v128f, GR32, 2>;
-+  def VLVGG : TernaryVRSb<"vlvgg", 0xE722, z_vector_insert,
-+                          v128g, v128g, GR64, 3>;
-+
-+  // Load VR from GRs disjoint.
-+  def VLVGP : BinaryVRRf<"vlvgp", 0xE762, z_join_dwords, v128g>;
-+  def VLVGP32 : BinaryAliasVRRf<GR32>;
-+}
-+
-+// Extractions always assign to the full GR64, even if the element would
-+// fit in the lower 32 bits.  Sub-i64 extracts therefore need to take a
-+// subreg of the result.
-+class VectorExtractSubreg<ValueType type, Instruction insn>
-+  : Pat<(i32 (z_vector_extract (type VR128:$vec), shift12only:$index)),
-+        (EXTRACT_SUBREG (insn VR128:$vec, shift12only:$index), subreg_l32)>;
-+
-+def : VectorExtractSubreg<v16i8, VLGVB>;
-+def : VectorExtractSubreg<v8i16, VLGVH>;
-+def : VectorExtractSubreg<v4i32, VLGVF>;
-+
-+//===----------------------------------------------------------------------===//
-+// Immediate instructions
-+//===----------------------------------------------------------------------===//
-+
-+let Predicates = [FeatureVector] in {
-+  // Generate byte mask.
-+  def VZERO : InherentVRIa<"vzero", 0xE744, 0>;
-+  def VONE  : InherentVRIa<"vone", 0xE744, 0xffff>;
-+  def VGBM  : UnaryVRIa<"vgbm", 0xE744, z_byte_mask, v128b, imm32zx16>;
-+
-+  // Generate mask.
-+  def VGMB : BinaryVRIb<"vgmb", 0xE746, z_rotate_mask, v128b, 0>;
-+  def VGMH : BinaryVRIb<"vgmh", 0xE746, z_rotate_mask, v128h, 1>;
-+  def VGMF : BinaryVRIb<"vgmf", 0xE746, z_rotate_mask, v128f, 2>;
-+  def VGMG : BinaryVRIb<"vgmg", 0xE746, z_rotate_mask, v128g, 3>;
-+
-+  // Load element immediate.
-+  //
-+  // We want these instructions to be used ahead of VLVG* where possible.
-+  // However, VLVG* takes a variable BD-format index whereas VLEI takes
-+  // a plain immediate index.  This means that VLVG* has an extra "base"
-+  // register operand and is 3 units more complex.  Bumping the complexity
-+  // of the VLEI* instructions by 4 means that they are strictly better
-+  // than VLVG* in cases where both forms match.
-+  let AddedComplexity = 4 in {
-+    def VLEIB : TernaryVRIa<"vleib", 0xE740, z_vector_insert,
-+                            v128b, v128b, imm32sx16trunc, imm32zx4>;
-+    def VLEIH : TernaryVRIa<"vleih", 0xE741, z_vector_insert,
-+                            v128h, v128h, imm32sx16trunc, imm32zx3>;
-+    def VLEIF : TernaryVRIa<"vleif", 0xE743, z_vector_insert,
-+                            v128f, v128f, imm32sx16, imm32zx2>;
-+    def VLEIG : TernaryVRIa<"vleig", 0xE742, z_vector_insert,
-+                            v128g, v128g, imm64sx16, imm32zx1>;
-+  }
-+
-+  // Replicate immediate.
-+  def VREPIB : UnaryVRIa<"vrepib", 0xE745, z_replicate, v128b, imm32sx16, 0>;
-+  def VREPIH : UnaryVRIa<"vrepih", 0xE745, z_replicate, v128h, imm32sx16, 1>;
-+  def VREPIF : UnaryVRIa<"vrepif", 0xE745, z_replicate, v128f, imm32sx16, 2>;
-+  def VREPIG : UnaryVRIa<"vrepig", 0xE745, z_replicate, v128g, imm32sx16, 3>;
-+}
-+
-+//===----------------------------------------------------------------------===//
-+// Loads
-+//===----------------------------------------------------------------------===//
-+
-+let Predicates = [FeatureVector] in {
-+  // Load.
-+  def VL : UnaryVRX<"vl", 0xE706, null_frag, v128any, 16>;
-+
-+  // Load to block boundary.  The number of loaded bytes is only known
-+  // at run time.  The instruction is really polymorphic, but v128b matches
-+  // the return type of the associated intrinsic.
-+  def VLBB : BinaryVRX<"vlbb", 0xE707, int_s390_vlbb, v128b, 0>;
-+
-+  // Load count to block boundary.
-+  let Defs = [CC] in
-+    def LCBB : InstRXE<0xE727, (outs GR32:$R1),
-+                               (ins bdxaddr12only:$XBD2, imm32zx4:$M3),
-+                       "lcbb\t$R1, $XBD2, $M3",
-+                       [(set GR32:$R1, (int_s390_lcbb bdxaddr12only:$XBD2,
-+                                                      imm32zx4:$M3))]>;
-+
-+  // Load with length.  The number of loaded bytes is only known at run time.
-+  def VLL : BinaryVRSb<"vll", 0xE737, int_s390_vll, 0>;
-+
-+  // Load multiple.
-+  def VLM : LoadMultipleVRSa<"vlm", 0xE736>;
-+
-+  // Load and replicate
-+  def VLREPB : UnaryVRX<"vlrepb", 0xE705, z_replicate_loadi8,  v128b, 1, 0>;
-+  def VLREPH : UnaryVRX<"vlreph", 0xE705, z_replicate_loadi16, v128h, 2, 1>;
-+  def VLREPF : UnaryVRX<"vlrepf", 0xE705, z_replicate_loadi32, v128f, 4, 2>;
-+  def VLREPG : UnaryVRX<"vlrepg", 0xE705, z_replicate_loadi64, v128g, 8, 3>;
-+  def : Pat<(v4f32 (z_replicate_loadf32 bdxaddr12only:$addr)),
-+            (VLREPF bdxaddr12only:$addr)>;
-+  def : Pat<(v2f64 (z_replicate_loadf64 bdxaddr12only:$addr)),
-+            (VLREPG bdxaddr12only:$addr)>;
-+
-+  // Use VLREP to load subvectors.  These patterns use "12pair" because
-+  // LEY and LDY offer full 20-bit displacement fields.  It's often better
-+  // to use those instructions rather than force a 20-bit displacement
-+  // into a GPR temporary.
-+  def VL32 : UnaryAliasVRX<load, v32eb, bdxaddr12pair>;
-+  def VL64 : UnaryAliasVRX<load, v64db, bdxaddr12pair>;
-+
-+  // Load logical element and zero.
-+  def VLLEZB : UnaryVRX<"vllezb", 0xE704, z_vllezi8,  v128b, 1, 0>;
-+  def VLLEZH : UnaryVRX<"vllezh", 0xE704, z_vllezi16, v128h, 2, 1>;
-+  def VLLEZF : UnaryVRX<"vllezf", 0xE704, z_vllezi32, v128f, 4, 2>;
-+  def VLLEZG : UnaryVRX<"vllezg", 0xE704, z_vllezi64, v128g, 8, 3>;
-+  def : Pat<(v4f32 (z_vllezf32 bdxaddr12only:$addr)),
-+            (VLLEZF bdxaddr12only:$addr)>;
-+  def : Pat<(v2f64 (z_vllezf64 bdxaddr12only:$addr)),
-+            (VLLEZG bdxaddr12only:$addr)>;
-+
-+  // Load element.
-+  def VLEB : TernaryVRX<"vleb", 0xE700, z_vlei8,  v128b, v128b, 1, imm32zx4>;
-+  def VLEH : TernaryVRX<"vleh", 0xE701, z_vlei16, v128h, v128h, 2, imm32zx3>;
-+  def VLEF : TernaryVRX<"vlef", 0xE703, z_vlei32, v128f, v128f, 4, imm32zx2>;
-+  def VLEG : TernaryVRX<"vleg", 0xE702, z_vlei64, v128g, v128g, 8, imm32zx1>;
-+  def : Pat<(z_vlef32 (v4f32 VR128:$val), bdxaddr12only:$addr, imm32zx2:$index),
-+            (VLEF VR128:$val, bdxaddr12only:$addr, imm32zx2:$index)>;
-+  def : Pat<(z_vlef64 (v2f64 VR128:$val), bdxaddr12only:$addr, imm32zx1:$index),
-+            (VLEG VR128:$val, bdxaddr12only:$addr, imm32zx1:$index)>;
-+
-+  // Gather element.
-+  def VGEF : TernaryVRV<"vgef", 0xE713, 4, imm32zx2>;
-+  def VGEG : TernaryVRV<"vgeg", 0xE712, 8, imm32zx1>;
-+}
-+
-+// Use replicating loads if we're inserting a single element into an
-+// undefined vector.  This avoids a false dependency on the previous
-+// register contents.
-+multiclass ReplicatePeephole<Instruction vlrep, ValueType vectype,
-+                             SDPatternOperator load, ValueType scalartype> {
-+  def : Pat<(vectype (z_vector_insert
-+                      (undef), (scalartype (load bdxaddr12only:$addr)), 0)),
-+            (vlrep bdxaddr12only:$addr)>;
-+  def : Pat<(vectype (scalar_to_vector
-+                      (scalartype (load bdxaddr12only:$addr)))),
-+            (vlrep bdxaddr12only:$addr)>;
-+}
-+defm : ReplicatePeephole<VLREPB, v16i8, anyextloadi8, i32>;
-+defm : ReplicatePeephole<VLREPH, v8i16, anyextloadi16, i32>;
-+defm : ReplicatePeephole<VLREPF, v4i32, load, i32>;
-+defm : ReplicatePeephole<VLREPG, v2i64, load, i64>;
-+defm : ReplicatePeephole<VLREPF, v4f32, load, f32>;
-+defm : ReplicatePeephole<VLREPG, v2f64, load, f64>;
-+
-+//===----------------------------------------------------------------------===//
-+// Stores
-+//===----------------------------------------------------------------------===//
-+
-+let Predicates = [FeatureVector] in {
-+  // Store.
-+  def VST : StoreVRX<"vst", 0xE70E, null_frag, v128any, 16>;
-+
-+  // Store with length.  The number of stored bytes is only known at run time.
-+  def VSTL : StoreLengthVRSb<"vstl", 0xE73F, int_s390_vstl, 0>;
-+
-+  // Store multiple.
-+  def VSTM : StoreMultipleVRSa<"vstm", 0xE73E>;
-+
-+  // Store element.
-+  def VSTEB : StoreBinaryVRX<"vsteb", 0xE708, z_vstei8,  v128b, 1, imm32zx4>;
-+  def VSTEH : StoreBinaryVRX<"vsteh", 0xE709, z_vstei16, v128h, 2, imm32zx3>;
-+  def VSTEF : StoreBinaryVRX<"vstef", 0xE70B, z_vstei32, v128f, 4, imm32zx2>;
-+  def VSTEG : StoreBinaryVRX<"vsteg", 0xE70A, z_vstei64, v128g, 8, imm32zx1>;
-+  def : Pat<(z_vstef32 (v4f32 VR128:$val), bdxaddr12only:$addr,
-+                       imm32zx2:$index),
-+            (VSTEF VR128:$val, bdxaddr12only:$addr, imm32zx2:$index)>;
-+  def : Pat<(z_vstef64 (v2f64 VR128:$val), bdxaddr12only:$addr,
-+                       imm32zx1:$index),
-+            (VSTEG VR128:$val, bdxaddr12only:$addr, imm32zx1:$index)>;
-+
-+  // Use VSTE to store subvectors.  These patterns use "12pair" because
-+  // STEY and STDY offer full 20-bit displacement fields.  It's often better
-+  // to use those instructions rather than force a 20-bit displacement
-+  // into a GPR temporary.
-+  def VST32 : StoreAliasVRX<store, v32eb, bdxaddr12pair>;
-+  def VST64 : StoreAliasVRX<store, v64db, bdxaddr12pair>;
-+
-+  // Scatter element.
-+  def VSCEF : StoreBinaryVRV<"vscef", 0xE71B, 4, imm32zx2>;
-+  def VSCEG : StoreBinaryVRV<"vsceg", 0xE71A, 8, imm32zx1>;
-+}
-+
-+//===----------------------------------------------------------------------===//
-+// Selects and permutes
-+//===----------------------------------------------------------------------===//
-+
-+let Predicates = [FeatureVector] in {
-+  // Merge high.
-+  def VMRHB : BinaryVRRc<"vmrhb", 0xE761, z_merge_high, v128b, v128b, 0>;
-+  def VMRHH : BinaryVRRc<"vmrhh", 0xE761, z_merge_high, v128h, v128h, 1>;
-+  def VMRHF : BinaryVRRc<"vmrhf", 0xE761, z_merge_high, v128f, v128f, 2>;
-+  def VMRHG : BinaryVRRc<"vmrhg", 0xE761, z_merge_high, v128g, v128g, 3>;
-+  def : BinaryRRWithType<VMRHF, VR128, z_merge_high, v4f32>;
-+  def : BinaryRRWithType<VMRHG, VR128, z_merge_high, v2f64>;
-+
-+  // Merge low.
-+  def VMRLB : BinaryVRRc<"vmrlb", 0xE760, z_merge_low, v128b, v128b, 0>;
-+  def VMRLH : BinaryVRRc<"vmrlh", 0xE760, z_merge_low, v128h, v128h, 1>;
-+  def VMRLF : BinaryVRRc<"vmrlf", 0xE760, z_merge_low, v128f, v128f, 2>;
-+  def VMRLG : BinaryVRRc<"vmrlg", 0xE760, z_merge_low, v128g, v128g, 3>;
-+  def : BinaryRRWithType<VMRLF, VR128, z_merge_low, v4f32>;
-+  def : BinaryRRWithType<VMRLG, VR128, z_merge_low, v2f64>;
-+
-+  // Permute.
-+  def VPERM : TernaryVRRe<"vperm", 0xE78C, z_permute, v128b, v128b>;
-+
-+  // Permute doubleword immediate.
-+  def VPDI : TernaryVRRc<"vpdi", 0xE784, z_permute_dwords, v128g, v128g>;
-+
-+  // Replicate.
-+  def VREPB : BinaryVRIc<"vrepb", 0xE74D, z_splat, v128b, v128b, 0>;
-+  def VREPH : BinaryVRIc<"vreph", 0xE74D, z_splat, v128h, v128h, 1>;
-+  def VREPF : BinaryVRIc<"vrepf", 0xE74D, z_splat, v128f, v128f, 2>;
-+  def VREPG : BinaryVRIc<"vrepg", 0xE74D, z_splat, v128g, v128g, 3>;
-+  def : Pat<(v4f32 (z_splat VR128:$vec, imm32zx16:$index)),
-+            (VREPF VR128:$vec, imm32zx16:$index)>;
-+  def : Pat<(v2f64 (z_splat VR128:$vec, imm32zx16:$index)),
-+            (VREPG VR128:$vec, imm32zx16:$index)>;
-+
-+  // Select.
-+  def VSEL : TernaryVRRe<"vsel", 0xE78D, null_frag, v128any, v128any>;
-+}
-+
-+//===----------------------------------------------------------------------===//
-+// Widening and narrowing
-+//===----------------------------------------------------------------------===//
-+
-+let Predicates = [FeatureVector] in {
-+  // Pack
-+  def VPKH : BinaryVRRc<"vpkh", 0xE794, z_pack, v128b, v128h, 1>;
-+  def VPKF : BinaryVRRc<"vpkf", 0xE794, z_pack, v128h, v128f, 2>;
-+  def VPKG : BinaryVRRc<"vpkg", 0xE794, z_pack, v128f, v128g, 3>;
-+
-+  // Pack saturate.
-+  defm VPKSH : BinaryVRRbSPair<"vpksh", 0xE797, int_s390_vpksh, z_packs_cc,
-+                               v128b, v128h, 1>;
-+  defm VPKSF : BinaryVRRbSPair<"vpksf", 0xE797, int_s390_vpksf, z_packs_cc,
-+                               v128h, v128f, 2>;
-+  defm VPKSG : BinaryVRRbSPair<"vpksg", 0xE797, int_s390_vpksg, z_packs_cc,
-+                               v128f, v128g, 3>;
-+
-+  // Pack saturate logical.
-+  defm VPKLSH : BinaryVRRbSPair<"vpklsh", 0xE795, int_s390_vpklsh, z_packls_cc,
-+                                v128b, v128h, 1>;
-+  defm VPKLSF : BinaryVRRbSPair<"vpklsf", 0xE795, int_s390_vpklsf, z_packls_cc,
-+                                v128h, v128f, 2>;
-+  defm VPKLSG : BinaryVRRbSPair<"vpklsg", 0xE795, int_s390_vpklsg, z_packls_cc,
-+                                v128f, v128g, 3>;
-+
-+  // Sign-extend to doubleword.
-+  def VSEGB : UnaryVRRa<"vsegb", 0xE75F, z_vsei8,  v128g, v128g, 0>;
-+  def VSEGH : UnaryVRRa<"vsegh", 0xE75F, z_vsei16, v128g, v128g, 1>;
-+  def VSEGF : UnaryVRRa<"vsegf", 0xE75F, z_vsei32, v128g, v128g, 2>;
-+  def : Pat<(z_vsei8_by_parts  (v16i8 VR128:$src)), (VSEGB VR128:$src)>;
-+  def : Pat<(z_vsei16_by_parts (v8i16 VR128:$src)), (VSEGH VR128:$src)>;
-+  def : Pat<(z_vsei32_by_parts (v4i32 VR128:$src)), (VSEGF VR128:$src)>;
-+
-+  // Unpack high.
-+  def VUPHB : UnaryVRRa<"vuphb", 0xE7D7, z_unpack_high, v128h, v128b, 0>;
-+  def VUPHH : UnaryVRRa<"vuphh", 0xE7D7, z_unpack_high, v128f, v128h, 1>;
-+  def VUPHF : UnaryVRRa<"vuphf", 0xE7D7, z_unpack_high, v128g, v128f, 2>;
-+
-+  // Unpack logical high.
-+  def VUPLHB : UnaryVRRa<"vuplhb", 0xE7D5, z_unpackl_high, v128h, v128b, 0>;
-+  def VUPLHH : UnaryVRRa<"vuplhh", 0xE7D5, z_unpackl_high, v128f, v128h, 1>;
-+  def VUPLHF : UnaryVRRa<"vuplhf", 0xE7D5, z_unpackl_high, v128g, v128f, 2>;
-+
-+  // Unpack low.
-+  def VUPLB  : UnaryVRRa<"vuplb",  0xE7D6, z_unpack_low, v128h, v128b, 0>;
-+  def VUPLHW : UnaryVRRa<"vuplhw", 0xE7D6, z_unpack_low, v128f, v128h, 1>;
-+  def VUPLF  : UnaryVRRa<"vuplf",  0xE7D6, z_unpack_low, v128g, v128f, 2>;
-+
-+  // Unpack logical low.
-+  def VUPLLB : UnaryVRRa<"vupllb", 0xE7D4, z_unpackl_low, v128h, v128b, 0>;
-+  def VUPLLH : UnaryVRRa<"vupllh", 0xE7D4, z_unpackl_low, v128f, v128h, 1>;
-+  def VUPLLF : UnaryVRRa<"vupllf", 0xE7D4, z_unpackl_low, v128g, v128f, 2>;
-+}
-+
-+//===----------------------------------------------------------------------===//
-+// Instantiating generic operations for specific types.
-+//===----------------------------------------------------------------------===//
-+
-+multiclass GenericVectorOps<ValueType type, ValueType inttype> {
-+  let Predicates = [FeatureVector] in {
-+    def : Pat<(type (load bdxaddr12only:$addr)),
-+              (VL bdxaddr12only:$addr)>;
-+    def : Pat<(store (type VR128:$src), bdxaddr12only:$addr),
-+              (VST VR128:$src, bdxaddr12only:$addr)>;
-+    def : Pat<(type (vselect (inttype VR128:$x), VR128:$y, VR128:$z)),
-+              (VSEL VR128:$y, VR128:$z, VR128:$x)>;
-+    def : Pat<(type (vselect (inttype (z_vnot VR128:$x)), VR128:$y, VR128:$z)),
-+              (VSEL VR128:$z, VR128:$y, VR128:$x)>;
-+  }
-+}
-+
-+defm : GenericVectorOps<v16i8, v16i8>;
-+defm : GenericVectorOps<v8i16, v8i16>;
-+defm : GenericVectorOps<v4i32, v4i32>;
-+defm : GenericVectorOps<v2i64, v2i64>;
-+defm : GenericVectorOps<v4f32, v4i32>;
-+defm : GenericVectorOps<v2f64, v2i64>;
-+
-+//===----------------------------------------------------------------------===//
-+// Integer arithmetic
-+//===----------------------------------------------------------------------===//
-+
-+let Predicates = [FeatureVector] in {
-+  // Add.
-+  def VAB : BinaryVRRc<"vab", 0xE7F3, add, v128b, v128b, 0>;
-+  def VAH : BinaryVRRc<"vah", 0xE7F3, add, v128h, v128h, 1>;
-+  def VAF : BinaryVRRc<"vaf", 0xE7F3, add, v128f, v128f, 2>;
-+  def VAG : BinaryVRRc<"vag", 0xE7F3, add, v128g, v128g, 3>;
-+  def VAQ : BinaryVRRc<"vaq", 0xE7F3, int_s390_vaq, v128q, v128q, 4>;
-+
-+  // Add compute carry.
-+  def VACCB : BinaryVRRc<"vaccb", 0xE7F1, int_s390_vaccb, v128b, v128b, 0>;
-+  def VACCH : BinaryVRRc<"vacch", 0xE7F1, int_s390_vacch, v128h, v128h, 1>;
-+  def VACCF : BinaryVRRc<"vaccf", 0xE7F1, int_s390_vaccf, v128f, v128f, 2>;
-+  def VACCG : BinaryVRRc<"vaccg", 0xE7F1, int_s390_vaccg, v128g, v128g, 3>;
-+  def VACCQ : BinaryVRRc<"vaccq", 0xE7F1, int_s390_vaccq, v128q, v128q, 4>;
-+
-+  // Add with carry.
-+  def VACQ : TernaryVRRd<"vacq", 0xE7BB, int_s390_vacq, v128q, v128q, 4>;
-+
-+  // Add with carry compute carry.
-+  def VACCCQ : TernaryVRRd<"vacccq", 0xE7B9, int_s390_vacccq, v128q, v128q, 4>;
-+
-+  // And.
-+  def VN : BinaryVRRc<"vn", 0xE768, null_frag, v128any, v128any>;
-+
-+  // And with complement.
-+  def VNC : BinaryVRRc<"vnc", 0xE769, null_frag, v128any, v128any>;
-+
-+  // Average.
-+  def VAVGB : BinaryVRRc<"vavgb", 0xE7F2, int_s390_vavgb, v128b, v128b, 0>;
-+  def VAVGH : BinaryVRRc<"vavgh", 0xE7F2, int_s390_vavgh, v128h, v128h, 1>;
-+  def VAVGF : BinaryVRRc<"vavgf", 0xE7F2, int_s390_vavgf, v128f, v128f, 2>;
-+  def VAVGG : BinaryVRRc<"vavgg", 0xE7F2, int_s390_vavgg, v128g, v128g, 3>;
-+
-+  // Average logical.
-+  def VAVGLB : BinaryVRRc<"vavglb", 0xE7F0, int_s390_vavglb, v128b, v128b, 0>;
-+  def VAVGLH : BinaryVRRc<"vavglh", 0xE7F0, int_s390_vavglh, v128h, v128h, 1>;
-+  def VAVGLF : BinaryVRRc<"vavglf", 0xE7F0, int_s390_vavglf, v128f, v128f, 2>;
-+  def VAVGLG : BinaryVRRc<"vavglg", 0xE7F0, int_s390_vavglg, v128g, v128g, 3>;
-+
-+  // Checksum.
-+  def VCKSM : BinaryVRRc<"vcksm", 0xE766, int_s390_vcksm, v128f, v128f>;
-+
-+  // Count leading zeros.
-+  def VCLZB : UnaryVRRa<"vclzb", 0xE753, ctlz, v128b, v128b, 0>;
-+  def VCLZH : UnaryVRRa<"vclzh", 0xE753, ctlz, v128h, v128h, 1>;
-+  def VCLZF : UnaryVRRa<"vclzf", 0xE753, ctlz, v128f, v128f, 2>;
-+  def VCLZG : UnaryVRRa<"vclzg", 0xE753, ctlz, v128g, v128g, 3>;
-+
-+  // Count trailing zeros.
-+  def VCTZB : UnaryVRRa<"vctzb", 0xE752, cttz, v128b, v128b, 0>;
-+  def VCTZH : UnaryVRRa<"vctzh", 0xE752, cttz, v128h, v128h, 1>;
-+  def VCTZF : UnaryVRRa<"vctzf", 0xE752, cttz, v128f, v128f, 2>;
-+  def VCTZG : UnaryVRRa<"vctzg", 0xE752, cttz, v128g, v128g, 3>;
-+
-+  // Exclusive or.
-+  def VX : BinaryVRRc<"vx", 0xE76D, null_frag, v128any, v128any>;
-+
-+  // Galois field multiply sum.
-+  def VGFMB : BinaryVRRc<"vgfmb", 0xE7B4, int_s390_vgfmb, v128h, v128b, 0>;
-+  def VGFMH : BinaryVRRc<"vgfmh", 0xE7B4, int_s390_vgfmh, v128f, v128h, 1>;
-+  def VGFMF : BinaryVRRc<"vgfmf", 0xE7B4, int_s390_vgfmf, v128g, v128f, 2>;
-+  def VGFMG : BinaryVRRc<"vgfmg", 0xE7B4, int_s390_vgfmg, v128q, v128g, 3>;
-+
-+  // Galois field multiply sum and accumulate.
-+  def VGFMAB : TernaryVRRd<"vgfmab", 0xE7BC, int_s390_vgfmab, v128h, v128b, 0>;
-+  def VGFMAH : TernaryVRRd<"vgfmah", 0xE7BC, int_s390_vgfmah, v128f, v128h, 1>;
-+  def VGFMAF : TernaryVRRd<"vgfmaf", 0xE7BC, int_s390_vgfmaf, v128g, v128f, 2>;
-+  def VGFMAG : TernaryVRRd<"vgfmag", 0xE7BC, int_s390_vgfmag, v128q, v128g, 3>;
-+
-+  // Load complement.
-+  def VLCB : UnaryVRRa<"vlcb", 0xE7DE, z_vneg, v128b, v128b, 0>;
-+  def VLCH : UnaryVRRa<"vlch", 0xE7DE, z_vneg, v128h, v128h, 1>;
-+  def VLCF : UnaryVRRa<"vlcf", 0xE7DE, z_vneg, v128f, v128f, 2>;
-+  def VLCG : UnaryVRRa<"vlcg", 0xE7DE, z_vneg, v128g, v128g, 3>;
-+
-+  // Load positive.
-+  def VLPB : UnaryVRRa<"vlpb", 0xE7DF, z_viabs8,  v128b, v128b, 0>;
-+  def VLPH : UnaryVRRa<"vlph", 0xE7DF, z_viabs16, v128h, v128h, 1>;
-+  def VLPF : UnaryVRRa<"vlpf", 0xE7DF, z_viabs32, v128f, v128f, 2>;
-+  def VLPG : UnaryVRRa<"vlpg", 0xE7DF, z_viabs64, v128g, v128g, 3>;
-+
-+  // Maximum.
-+  def VMXB : BinaryVRRc<"vmxb", 0xE7FF, null_frag, v128b, v128b, 0>;
-+  def VMXH : BinaryVRRc<"vmxh", 0xE7FF, null_frag, v128h, v128h, 1>;
-+  def VMXF : BinaryVRRc<"vmxf", 0xE7FF, null_frag, v128f, v128f, 2>;
-+  def VMXG : BinaryVRRc<"vmxg", 0xE7FF, null_frag, v128g, v128g, 3>;
-+
-+  // Maximum logical.
-+  def VMXLB : BinaryVRRc<"vmxlb", 0xE7FD, null_frag, v128b, v128b, 0>;
-+  def VMXLH : BinaryVRRc<"vmxlh", 0xE7FD, null_frag, v128h, v128h, 1>;
-+  def VMXLF : BinaryVRRc<"vmxlf", 0xE7FD, null_frag, v128f, v128f, 2>;
-+  def VMXLG : BinaryVRRc<"vmxlg", 0xE7FD, null_frag, v128g, v128g, 3>;
-+
-+  // Minimum.
-+  def VMNB : BinaryVRRc<"vmnb", 0xE7FE, null_frag, v128b, v128b, 0>;
-+  def VMNH : BinaryVRRc<"vmnh", 0xE7FE, null_frag, v128h, v128h, 1>;
-+  def VMNF : BinaryVRRc<"vmnf", 0xE7FE, null_frag, v128f, v128f, 2>;
-+  def VMNG : BinaryVRRc<"vmng", 0xE7FE, null_frag, v128g, v128g, 3>;
-+
-+  // Minimum logical.
-+  def VMNLB : BinaryVRRc<"vmnlb", 0xE7FC, null_frag, v128b, v128b, 0>;
-+  def VMNLH : BinaryVRRc<"vmnlh", 0xE7FC, null_frag, v128h, v128h, 1>;
-+  def VMNLF : BinaryVRRc<"vmnlf", 0xE7FC, null_frag, v128f, v128f, 2>;
-+  def VMNLG : BinaryVRRc<"vmnlg", 0xE7FC, null_frag, v128g, v128g, 3>;
-+
-+  // Multiply and add low.
-+  def VMALB  : TernaryVRRd<"vmalb",  0xE7AA, z_muladd, v128b, v128b, 0>;
-+  def VMALHW : TernaryVRRd<"vmalhw", 0xE7AA, z_muladd, v128h, v128h, 1>;
-+  def VMALF  : TernaryVRRd<"vmalf",  0xE7AA, z_muladd, v128f, v128f, 2>;
-+
-+  // Multiply and add high.
-+  def VMAHB : TernaryVRRd<"vmahb", 0xE7AB, int_s390_vmahb, v128b, v128b, 0>;
-+  def VMAHH : TernaryVRRd<"vmahh", 0xE7AB, int_s390_vmahh, v128h, v128h, 1>;
-+  def VMAHF : TernaryVRRd<"vmahf", 0xE7AB, int_s390_vmahf, v128f, v128f, 2>;
-+
-+  // Multiply and add logical high.
-+  def VMALHB : TernaryVRRd<"vmalhb", 0xE7A9, int_s390_vmalhb, v128b, v128b, 0>;
-+  def VMALHH : TernaryVRRd<"vmalhh", 0xE7A9, int_s390_vmalhh, v128h, v128h, 1>;
-+  def VMALHF : TernaryVRRd<"vmalhf", 0xE7A9, int_s390_vmalhf, v128f, v128f, 2>;
-+
-+  // Multiply and add even.
-+  def VMAEB : TernaryVRRd<"vmaeb", 0xE7AE, int_s390_vmaeb, v128h, v128b, 0>;
-+  def VMAEH : TernaryVRRd<"vmaeh", 0xE7AE, int_s390_vmaeh, v128f, v128h, 1>;
-+  def VMAEF : TernaryVRRd<"vmaef", 0xE7AE, int_s390_vmaef, v128g, v128f, 2>;
-+
-+  // Multiply and add logical even.
-+  def VMALEB : TernaryVRRd<"vmaleb", 0xE7AC, int_s390_vmaleb, v128h, v128b, 0>;
-+  def VMALEH : TernaryVRRd<"vmaleh", 0xE7AC, int_s390_vmaleh, v128f, v128h, 1>;
-+  def VMALEF : TernaryVRRd<"vmalef", 0xE7AC, int_s390_vmalef, v128g, v128f, 2>;
-+
-+  // Multiply and add odd.
-+  def VMAOB : TernaryVRRd<"vmaob", 0xE7AF, int_s390_vmaob, v128h, v128b, 0>;
-+  def VMAOH : TernaryVRRd<"vmaoh", 0xE7AF, int_s390_vmaoh, v128f, v128h, 1>;
-+  def VMAOF : TernaryVRRd<"vmaof", 0xE7AF, int_s390_vmaof, v128g, v128f, 2>;
-+
-+  // Multiply and add logical odd.
-+  def VMALOB : TernaryVRRd<"vmalob", 0xE7AD, int_s390_vmalob, v128h, v128b, 0>;
-+  def VMALOH : TernaryVRRd<"vmaloh", 0xE7AD, int_s390_vmaloh, v128f, v128h, 1>;
-+  def VMALOF : TernaryVRRd<"vmalof", 0xE7AD, int_s390_vmalof, v128g, v128f, 2>;
-+
-+  // Multiply high.
-+  def VMHB : BinaryVRRc<"vmhb", 0xE7A3, int_s390_vmhb, v128b, v128b, 0>;
-+  def VMHH : BinaryVRRc<"vmhh", 0xE7A3, int_s390_vmhh, v128h, v128h, 1>;
-+  def VMHF : BinaryVRRc<"vmhf", 0xE7A3, int_s390_vmhf, v128f, v128f, 2>;
-+
-+  // Multiply logical high.
-+  def VMLHB : BinaryVRRc<"vmlhb", 0xE7A1, int_s390_vmlhb, v128b, v128b, 0>;
-+  def VMLHH : BinaryVRRc<"vmlhh", 0xE7A1, int_s390_vmlhh, v128h, v128h, 1>;
-+  def VMLHF : BinaryVRRc<"vmlhf", 0xE7A1, int_s390_vmlhf, v128f, v128f, 2>;
-+
-+  // Multiply low.
-+  def VMLB  : BinaryVRRc<"vmlb",  0xE7A2, mul, v128b, v128b, 0>;
-+  def VMLHW : BinaryVRRc<"vmlhw", 0xE7A2, mul, v128h, v128h, 1>;
-+  def VMLF  : BinaryVRRc<"vmlf",  0xE7A2, mul, v128f, v128f, 2>;
-+
-+  // Multiply even.
-+  def VMEB : BinaryVRRc<"vmeb", 0xE7A6, int_s390_vmeb, v128h, v128b, 0>;
-+  def VMEH : BinaryVRRc<"vmeh", 0xE7A6, int_s390_vmeh, v128f, v128h, 1>;
-+  def VMEF : BinaryVRRc<"vmef", 0xE7A6, int_s390_vmef, v128g, v128f, 2>;
-+
-+  // Multiply logical even.
-+  def VMLEB : BinaryVRRc<"vmleb", 0xE7A4, int_s390_vmleb, v128h, v128b, 0>;
-+  def VMLEH : BinaryVRRc<"vmleh", 0xE7A4, int_s390_vmleh, v128f, v128h, 1>;
-+  def VMLEF : BinaryVRRc<"vmlef", 0xE7A4, int_s390_vmlef, v128g, v128f, 2>;
-+
-+  // Multiply odd.
-+  def VMOB : BinaryVRRc<"vmob", 0xE7A7, int_s390_vmob, v128h, v128b, 0>;
-+  def VMOH : BinaryVRRc<"vmoh", 0xE7A7, int_s390_vmoh, v128f, v128h, 1>;
-+  def VMOF : BinaryVRRc<"vmof", 0xE7A7, int_s390_vmof, v128g, v128f, 2>;
-+
-+  // Multiply logical odd.
-+  def VMLOB : BinaryVRRc<"vmlob", 0xE7A5, int_s390_vmlob, v128h, v128b, 0>;
-+  def VMLOH : BinaryVRRc<"vmloh", 0xE7A5, int_s390_vmloh, v128f, v128h, 1>;
-+  def VMLOF : BinaryVRRc<"vmlof", 0xE7A5, int_s390_vmlof, v128g, v128f, 2>;
-+
-+  // Nor.
-+  def VNO : BinaryVRRc<"vno", 0xE76B, null_frag, v128any, v128any>;
-+
-+  // Or.
-+  def VO : BinaryVRRc<"vo", 0xE76A, null_frag, v128any, v128any>;
-+
-+  // Population count.
-+  def VPOPCT : BinaryVRRa<"vpopct", 0xE750>;
-+  def : Pat<(v16i8 (z_popcnt VR128:$x)), (VPOPCT VR128:$x, 0)>;
-+
-+  // Element rotate left logical (with vector shift amount).
-+  def VERLLVB : BinaryVRRc<"verllvb", 0xE773, int_s390_verllvb,
-+                           v128b, v128b, 0>;
-+  def VERLLVH : BinaryVRRc<"verllvh", 0xE773, int_s390_verllvh,
-+                           v128h, v128h, 1>;
-+  def VERLLVF : BinaryVRRc<"verllvf", 0xE773, int_s390_verllvf,
-+                           v128f, v128f, 2>;
-+  def VERLLVG : BinaryVRRc<"verllvg", 0xE773, int_s390_verllvg,
-+                           v128g, v128g, 3>;
-+
-+  // Element rotate left logical (with scalar shift amount).
-+  def VERLLB : BinaryVRSa<"verllb", 0xE733, int_s390_verllb, v128b, v128b, 0>;
-+  def VERLLH : BinaryVRSa<"verllh", 0xE733, int_s390_verllh, v128h, v128h, 1>;
-+  def VERLLF : BinaryVRSa<"verllf", 0xE733, int_s390_verllf, v128f, v128f, 2>;
-+  def VERLLG : BinaryVRSa<"verllg", 0xE733, int_s390_verllg, v128g, v128g, 3>;
-+
-+  // Element rotate and insert under mask.
-+  def VERIMB : QuaternaryVRId<"verimb", 0xE772, int_s390_verimb, v128b, v128b, 0>;
-+  def VERIMH : QuaternaryVRId<"verimh", 0xE772, int_s390_verimh, v128h, v128h, 1>;
-+  def VERIMF : QuaternaryVRId<"verimf", 0xE772, int_s390_verimf, v128f, v128f, 2>;
-+  def VERIMG : QuaternaryVRId<"verimg", 0xE772, int_s390_verimg, v128g, v128g, 3>;
-+
-+  // Element shift left (with vector shift amount).
-+  def VESLVB : BinaryVRRc<"veslvb", 0xE770, z_vshl, v128b, v128b, 0>;
-+  def VESLVH : BinaryVRRc<"veslvh", 0xE770, z_vshl, v128h, v128h, 1>;
-+  def VESLVF : BinaryVRRc<"veslvf", 0xE770, z_vshl, v128f, v128f, 2>;
-+  def VESLVG : BinaryVRRc<"veslvg", 0xE770, z_vshl, v128g, v128g, 3>;
-+
-+  // Element shift left (with scalar shift amount).
-+  def VESLB : BinaryVRSa<"veslb", 0xE730, z_vshl_by_scalar, v128b, v128b, 0>;
-+  def VESLH : BinaryVRSa<"veslh", 0xE730, z_vshl_by_scalar, v128h, v128h, 1>;
-+  def VESLF : BinaryVRSa<"veslf", 0xE730, z_vshl_by_scalar, v128f, v128f, 2>;
-+  def VESLG : BinaryVRSa<"veslg", 0xE730, z_vshl_by_scalar, v128g, v128g, 3>;
-+
-+  // Element shift right arithmetic (with vector shift amount).
-+  def VESRAVB : BinaryVRRc<"vesravb", 0xE77A, z_vsra, v128b, v128b, 0>;
-+  def VESRAVH : BinaryVRRc<"vesravh", 0xE77A, z_vsra, v128h, v128h, 1>;
-+  def VESRAVF : BinaryVRRc<"vesravf", 0xE77A, z_vsra, v128f, v128f, 2>;
-+  def VESRAVG : BinaryVRRc<"vesravg", 0xE77A, z_vsra, v128g, v128g, 3>;
-+
-+  // Element shift right arithmetic (with scalar shift amount).
-+  def VESRAB : BinaryVRSa<"vesrab", 0xE73A, z_vsra_by_scalar, v128b, v128b, 0>;
-+  def VESRAH : BinaryVRSa<"vesrah", 0xE73A, z_vsra_by_scalar, v128h, v128h, 1>;
-+  def VESRAF : BinaryVRSa<"vesraf", 0xE73A, z_vsra_by_scalar, v128f, v128f, 2>;
-+  def VESRAG : BinaryVRSa<"vesrag", 0xE73A, z_vsra_by_scalar, v128g, v128g, 3>;
-+
-+  // Element shift right logical (with vector shift amount).
-+  def VESRLVB : BinaryVRRc<"vesrlvb", 0xE778, z_vsrl, v128b, v128b, 0>;
-+  def VESRLVH : BinaryVRRc<"vesrlvh", 0xE778, z_vsrl, v128h, v128h, 1>;
-+  def VESRLVF : BinaryVRRc<"vesrlvf", 0xE778, z_vsrl, v128f, v128f, 2>;
-+  def VESRLVG : BinaryVRRc<"vesrlvg", 0xE778, z_vsrl, v128g, v128g, 3>;
-+
-+  // Element shift right logical (with scalar shift amount).
-+  def VESRLB : BinaryVRSa<"vesrlb", 0xE738, z_vsrl_by_scalar, v128b, v128b, 0>;
-+  def VESRLH : BinaryVRSa<"vesrlh", 0xE738, z_vsrl_by_scalar, v128h, v128h, 1>;
-+  def VESRLF : BinaryVRSa<"vesrlf", 0xE738, z_vsrl_by_scalar, v128f, v128f, 2>;
-+  def VESRLG : BinaryVRSa<"vesrlg", 0xE738, z_vsrl_by_scalar, v128g, v128g, 3>;
-+
-+  // Shift left.
-+  def VSL : BinaryVRRc<"vsl", 0xE774, int_s390_vsl, v128b, v128b>;
-+
-+  // Shift left by byte.
-+  def VSLB : BinaryVRRc<"vslb", 0xE775, int_s390_vslb, v128b, v128b>;
-+
-+  // Shift left double by byte.
-+  def VSLDB : TernaryVRId<"vsldb", 0xE777, z_shl_double, v128b, v128b, 0>;
-+  def : Pat<(int_s390_vsldb VR128:$x, VR128:$y, imm32zx8:$z),
-+            (VSLDB VR128:$x, VR128:$y, imm32zx8:$z)>;
-+
-+  // Shift right arithmetic.
-+  def VSRA : BinaryVRRc<"vsra", 0xE77E, int_s390_vsra, v128b, v128b>;
-+
-+  // Shift right arithmetic by byte.
-+  def VSRAB : BinaryVRRc<"vsrab", 0xE77F, int_s390_vsrab, v128b, v128b>;
-+
-+  // Shift right logical.
-+  def VSRL : BinaryVRRc<"vsrl", 0xE77C, int_s390_vsrl, v128b, v128b>;
-+
-+  // Shift right logical by byte.
-+  def VSRLB : BinaryVRRc<"vsrlb", 0xE77D, int_s390_vsrlb, v128b, v128b>;
-+
-+  // Subtract.
-+  def VSB : BinaryVRRc<"vsb", 0xE7F7, sub, v128b, v128b, 0>;
-+  def VSH : BinaryVRRc<"vsh", 0xE7F7, sub, v128h, v128h, 1>;
-+  def VSF : BinaryVRRc<"vsf", 0xE7F7, sub, v128f, v128f, 2>;
-+  def VSG : BinaryVRRc<"vsg", 0xE7F7, sub, v128g, v128g, 3>;
-+  def VSQ : BinaryVRRc<"vsq", 0xE7F7, int_s390_vsq, v128q, v128q, 4>;
-+
-+  // Subtract compute borrow indication.
-+  def VSCBIB : BinaryVRRc<"vscbib", 0xE7F5, int_s390_vscbib, v128b, v128b, 0>;
-+  def VSCBIH : BinaryVRRc<"vscbih", 0xE7F5, int_s390_vscbih, v128h, v128h, 1>;
-+  def VSCBIF : BinaryVRRc<"vscbif", 0xE7F5, int_s390_vscbif, v128f, v128f, 2>;
-+  def VSCBIG : BinaryVRRc<"vscbig", 0xE7F5, int_s390_vscbig, v128g, v128g, 3>;
-+  def VSCBIQ : BinaryVRRc<"vscbiq", 0xE7F5, int_s390_vscbiq, v128q, v128q, 4>;
-+
-+  // Subtract with borrow indication.
-+  def VSBIQ : TernaryVRRd<"vsbiq", 0xE7BF, int_s390_vsbiq, v128q, v128q, 4>;
-+
-+  // Subtract with borrow compute borrow indication.
-+  def VSBCBIQ : TernaryVRRd<"vsbcbiq", 0xE7BD, int_s390_vsbcbiq,
-+                            v128q, v128q, 4>;
-+
-+  // Sum across doubleword.
-+  def VSUMGH : BinaryVRRc<"vsumgh", 0xE765, z_vsum, v128g, v128h, 1>;
-+  def VSUMGF : BinaryVRRc<"vsumgf", 0xE765, z_vsum, v128g, v128f, 2>;
-+
-+  // Sum across quadword.
-+  def VSUMQF : BinaryVRRc<"vsumqf", 0xE767, z_vsum, v128q, v128f, 2>;
-+  def VSUMQG : BinaryVRRc<"vsumqg", 0xE767, z_vsum, v128q, v128g, 3>;
-+
-+  // Sum across word.
-+  def VSUMB : BinaryVRRc<"vsumb", 0xE764, z_vsum, v128f, v128b, 0>;
-+  def VSUMH : BinaryVRRc<"vsumh", 0xE764, z_vsum, v128f, v128h, 1>;
-+}
-+
-+// Instantiate the bitwise ops for type TYPE.
-+multiclass BitwiseVectorOps<ValueType type> {
-+  let Predicates = [FeatureVector] in {
-+    def : Pat<(type (and VR128:$x, VR128:$y)), (VN VR128:$x, VR128:$y)>;
-+    def : Pat<(type (and VR128:$x, (z_vnot VR128:$y))),
-+              (VNC VR128:$x, VR128:$y)>;
-+    def : Pat<(type (or VR128:$x, VR128:$y)), (VO VR128:$x, VR128:$y)>;
-+    def : Pat<(type (xor VR128:$x, VR128:$y)), (VX VR128:$x, VR128:$y)>;
-+    def : Pat<(type (or (and VR128:$x, VR128:$z),
-+                        (and VR128:$y, (z_vnot VR128:$z)))),
-+              (VSEL VR128:$x, VR128:$y, VR128:$z)>;
-+    def : Pat<(type (z_vnot (or VR128:$x, VR128:$y))),
-+              (VNO VR128:$x, VR128:$y)>;
-+    def : Pat<(type (z_vnot VR128:$x)), (VNO VR128:$x, VR128:$x)>;
-+  }
-+}
-+
-+defm : BitwiseVectorOps<v16i8>;
-+defm : BitwiseVectorOps<v8i16>;
-+defm : BitwiseVectorOps<v4i32>;
-+defm : BitwiseVectorOps<v2i64>;
-+
-+// Instantiate additional patterns for absolute-related expressions on
-+// type TYPE.  LC is the negate instruction for TYPE and LP is the absolute
-+// instruction.
-+multiclass IntegerAbsoluteVectorOps<ValueType type, Instruction lc,
-+                                    Instruction lp, int shift> {
-+  let Predicates = [FeatureVector] in {
-+    def : Pat<(type (vselect (type (z_vicmph_zero VR128:$x)),
-+                             (z_vneg VR128:$x), VR128:$x)),
-+              (lc (lp VR128:$x))>;
-+    def : Pat<(type (vselect (type (z_vnot (z_vicmph_zero VR128:$x))),
-+                             VR128:$x, (z_vneg VR128:$x))),
-+              (lc (lp VR128:$x))>;
-+    def : Pat<(type (vselect (type (z_vicmpl_zero VR128:$x)),
-+                             VR128:$x, (z_vneg VR128:$x))),
-+              (lc (lp VR128:$x))>;
-+    def : Pat<(type (vselect (type (z_vnot (z_vicmpl_zero VR128:$x))),
-+                             (z_vneg VR128:$x), VR128:$x)),
-+              (lc (lp VR128:$x))>;
-+    def : Pat<(type (or (and (z_vsra_by_scalar VR128:$x, (i32 shift)),
-+                             (z_vneg VR128:$x)),
-+                        (and (z_vnot (z_vsra_by_scalar VR128:$x, (i32 shift))),
-+                             VR128:$x))),
-+              (lp VR128:$x)>;
-+    def : Pat<(type (or (and (z_vsra_by_scalar VR128:$x, (i32 shift)),
-+                             VR128:$x),
-+                        (and (z_vnot (z_vsra_by_scalar VR128:$x, (i32 shift))),
-+                             (z_vneg VR128:$x)))),
-+              (lc (lp VR128:$x))>;
-+  }
-+}
-+
-+defm : IntegerAbsoluteVectorOps<v16i8, VLCB, VLPB, 7>;
-+defm : IntegerAbsoluteVectorOps<v8i16, VLCH, VLPH, 15>;
-+defm : IntegerAbsoluteVectorOps<v4i32, VLCF, VLPF, 31>;
-+defm : IntegerAbsoluteVectorOps<v2i64, VLCG, VLPG, 63>;
-+
-+// Instantiate minimum- and maximum-related patterns for TYPE.  CMPH is the
-+// signed or unsigned "set if greater than" comparison instruction and
-+// MIN and MAX are the associated minimum and maximum instructions.
-+multiclass IntegerMinMaxVectorOps<ValueType type, SDPatternOperator cmph,
-+                                  Instruction min, Instruction max> {
-+  let Predicates = [FeatureVector] in {
-+    def : Pat<(type (vselect (cmph VR128:$x, VR128:$y), VR128:$x, VR128:$y)),
-+              (max VR128:$x, VR128:$y)>;
-+    def : Pat<(type (vselect (cmph VR128:$x, VR128:$y), VR128:$y, VR128:$x)),
-+              (min VR128:$x, VR128:$y)>;
-+    def : Pat<(type (vselect (z_vnot (cmph VR128:$x, VR128:$y)),
-+                             VR128:$x, VR128:$y)),
-+              (min VR128:$x, VR128:$y)>;
-+    def : Pat<(type (vselect (z_vnot (cmph VR128:$x, VR128:$y)),
-+                             VR128:$y, VR128:$x)),
-+              (max VR128:$x, VR128:$y)>;
-+  }
-+}
-+
-+// Signed min/max.
-+defm : IntegerMinMaxVectorOps<v16i8, z_vicmph, VMNB, VMXB>;
-+defm : IntegerMinMaxVectorOps<v8i16, z_vicmph, VMNH, VMXH>;
-+defm : IntegerMinMaxVectorOps<v4i32, z_vicmph, VMNF, VMXF>;
-+defm : IntegerMinMaxVectorOps<v2i64, z_vicmph, VMNG, VMXG>;
-+
-+// Unsigned min/max.
-+defm : IntegerMinMaxVectorOps<v16i8, z_vicmphl, VMNLB, VMXLB>;
-+defm : IntegerMinMaxVectorOps<v8i16, z_vicmphl, VMNLH, VMXLH>;
-+defm : IntegerMinMaxVectorOps<v4i32, z_vicmphl, VMNLF, VMXLF>;
-+defm : IntegerMinMaxVectorOps<v2i64, z_vicmphl, VMNLG, VMXLG>;
-+
-+//===----------------------------------------------------------------------===//
-+// Integer comparison
-+//===----------------------------------------------------------------------===//
-+
-+let Predicates = [FeatureVector] in {
-+  // Element compare.
-+  let Defs = [CC] in {
-+    def VECB : CompareVRRa<"vecb", 0xE7DB, null_frag, v128b, 0>;
-+    def VECH : CompareVRRa<"vech", 0xE7DB, null_frag, v128h, 1>;
-+    def VECF : CompareVRRa<"vecf", 0xE7DB, null_frag, v128f, 2>;
-+    def VECG : CompareVRRa<"vecg", 0xE7DB, null_frag, v128g, 3>;
-+  }
-+
-+  // Element compare logical.
-+  let Defs = [CC] in {
-+    def VECLB : CompareVRRa<"veclb", 0xE7D9, null_frag, v128b, 0>;
-+    def VECLH : CompareVRRa<"veclh", 0xE7D9, null_frag, v128h, 1>;
-+    def VECLF : CompareVRRa<"veclf", 0xE7D9, null_frag, v128f, 2>;
-+    def VECLG : CompareVRRa<"veclg", 0xE7D9, null_frag, v128g, 3>;
-+  }
-+
-+  // Compare equal.
-+  defm VCEQB : BinaryVRRbSPair<"vceqb", 0xE7F8, z_vicmpe, z_vicmpes,
-+                               v128b, v128b, 0>;
-+  defm VCEQH : BinaryVRRbSPair<"vceqh", 0xE7F8, z_vicmpe, z_vicmpes,
-+                               v128h, v128h, 1>;
-+  defm VCEQF : BinaryVRRbSPair<"vceqf", 0xE7F8, z_vicmpe, z_vicmpes,
-+                               v128f, v128f, 2>;
-+  defm VCEQG : BinaryVRRbSPair<"vceqg", 0xE7F8, z_vicmpe, z_vicmpes,
-+                               v128g, v128g, 3>;
-+
-+  // Compare high.
-+  defm VCHB : BinaryVRRbSPair<"vchb", 0xE7FB, z_vicmph, z_vicmphs,
-+                              v128b, v128b, 0>;
-+  defm VCHH : BinaryVRRbSPair<"vchh", 0xE7FB, z_vicmph, z_vicmphs,
-+                              v128h, v128h, 1>;
-+  defm VCHF : BinaryVRRbSPair<"vchf", 0xE7FB, z_vicmph, z_vicmphs,
-+                              v128f, v128f, 2>;
-+  defm VCHG : BinaryVRRbSPair<"vchg", 0xE7FB, z_vicmph, z_vicmphs,
-+                              v128g, v128g, 3>;
-+
-+  // Compare high logical.
-+  defm VCHLB : BinaryVRRbSPair<"vchlb", 0xE7F9, z_vicmphl, z_vicmphls,
-+                               v128b, v128b, 0>;
-+  defm VCHLH : BinaryVRRbSPair<"vchlh", 0xE7F9, z_vicmphl, z_vicmphls,
-+                               v128h, v128h, 1>;
-+  defm VCHLF : BinaryVRRbSPair<"vchlf", 0xE7F9, z_vicmphl, z_vicmphls,
-+                               v128f, v128f, 2>;
-+  defm VCHLG : BinaryVRRbSPair<"vchlg", 0xE7F9, z_vicmphl, z_vicmphls,
-+                               v128g, v128g, 3>;
-+
-+  // Test under mask.
-+  let Defs = [CC] in
-+    def VTM : CompareVRRa<"vtm", 0xE7D8, z_vtm, v128b, 0>;
-+}
-+
-+//===----------------------------------------------------------------------===//
-+// Floating-point arithmetic
-+//===----------------------------------------------------------------------===//
-+
-+// See comments in SystemZInstrFP.td for the suppression flags and
-+// rounding modes.
-+multiclass VectorRounding<Instruction insn, TypedReg tr> {
-+  def : FPConversion<insn, frint,      tr, tr, 0, 0>;
-+  def : FPConversion<insn, fnearbyint, tr, tr, 4, 0>;
-+  def : FPConversion<insn, ffloor,     tr, tr, 4, 7>;
-+  def : FPConversion<insn, fceil,      tr, tr, 4, 6>;
-+  def : FPConversion<insn, ftrunc,     tr, tr, 4, 5>;
-+  def : FPConversion<insn, frnd,       tr, tr, 4, 1>;
-+}
-+
-+let Predicates = [FeatureVector] in {
-+  // Add.
-+  def VFADB : BinaryVRRc<"vfadb", 0xE7E3, fadd, v128db, v128db, 3, 0>;
-+  def WFADB : BinaryVRRc<"wfadb", 0xE7E3, fadd, v64db, v64db, 3, 8>;
-+
-+  // Convert from fixed 64-bit.
-+  def VCDGB : TernaryVRRa<"vcdgb", 0xE7C3, null_frag, v128db, v128g, 3, 0>;
-+  def WCDGB : TernaryVRRa<"wcdgb", 0xE7C3, null_frag, v64db, v64g, 3, 8>;
-+  def : FPConversion<VCDGB, sint_to_fp, v128db, v128g, 0, 0>;
-+
-+  // Convert from logical 64-bit.
-+  def VCDLGB : TernaryVRRa<"vcdlgb", 0xE7C1, null_frag, v128db, v128g, 3, 0>;
-+  def WCDLGB : TernaryVRRa<"wcdlgb", 0xE7C1, null_frag, v64db, v64g, 3, 8>;
-+  def : FPConversion<VCDLGB, uint_to_fp, v128db, v128g, 0, 0>;
-+
-+  // Convert to fixed 64-bit.
-+  def VCGDB : TernaryVRRa<"vcgdb", 0xE7C2, null_frag, v128g, v128db, 3, 0>;
-+  def WCGDB : TernaryVRRa<"wcgdb", 0xE7C2, null_frag, v64g, v64db, 3, 8>;
-+  // Rounding mode should agree with SystemZInstrFP.td.
-+  def : FPConversion<VCGDB, fp_to_sint, v128g, v128db, 0, 5>;
-+
-+  // Convert to logical 64-bit.
-+  def VCLGDB : TernaryVRRa<"vclgdb", 0xE7C0, null_frag, v128g, v128db, 3, 0>;
-+  def WCLGDB : TernaryVRRa<"wclgdb", 0xE7C0, null_frag, v64g, v64db, 3, 8>;
-+  // Rounding mode should agree with SystemZInstrFP.td.
-+  def : FPConversion<VCLGDB, fp_to_uint, v128g, v128db, 0, 5>;
-+
-+  // Divide.
-+  def VFDDB : BinaryVRRc<"vfddb", 0xE7E5, fdiv, v128db, v128db, 3, 0>;
-+  def WFDDB : BinaryVRRc<"wfddb", 0xE7E5, fdiv, v64db, v64db, 3, 8>;
-+
-+  // Load FP integer.
-+  def VFIDB : TernaryVRRa<"vfidb", 0xE7C7, int_s390_vfidb, v128db, v128db, 3, 0>;
-+  def WFIDB : TernaryVRRa<"wfidb", 0xE7C7, null_frag, v64db, v64db, 3, 8>;
-+  defm : VectorRounding<VFIDB, v128db>;
-+  defm : VectorRounding<WFIDB, v64db>;
-+
-+  // Load lengthened.
-+  def VLDEB : UnaryVRRa<"vldeb", 0xE7C4, z_vextend, v128db, v128eb, 2, 0>;
-+  def WLDEB : UnaryVRRa<"wldeb", 0xE7C4, fextend, v64db, v32eb, 2, 8>;
-+
-+  // Load rounded,
-+  def VLEDB : TernaryVRRa<"vledb", 0xE7C5, null_frag, v128eb, v128db, 3, 0>;
-+  def WLEDB : TernaryVRRa<"wledb", 0xE7C5, null_frag, v32eb, v64db, 3, 8>;
-+  def : Pat<(v4f32 (z_vround (v2f64 VR128:$src))), (VLEDB VR128:$src, 0, 0)>;
-+  def : FPConversion<WLEDB, fround, v32eb, v64db, 0, 0>;
-+
-+  // Multiply.
-+  def VFMDB : BinaryVRRc<"vfmdb", 0xE7E7, fmul, v128db, v128db, 3, 0>;
-+  def WFMDB : BinaryVRRc<"wfmdb", 0xE7E7, fmul, v64db, v64db, 3, 8>;
-+
-+  // Multiply and add.
-+  def VFMADB : TernaryVRRe<"vfmadb", 0xE78F, fma, v128db, v128db, 0, 3>;
-+  def WFMADB : TernaryVRRe<"wfmadb", 0xE78F, fma, v64db, v64db, 8, 3>;
-+
-+  // Multiply and subtract.
-+  def VFMSDB : TernaryVRRe<"vfmsdb", 0xE78E, fms, v128db, v128db, 0, 3>;
-+  def WFMSDB : TernaryVRRe<"wfmsdb", 0xE78E, fms, v64db, v64db, 8, 3>;
-+
-+  // Load complement,
-+  def VFLCDB : UnaryVRRa<"vflcdb", 0xE7CC, fneg, v128db, v128db, 3, 0, 0>;
-+  def WFLCDB : UnaryVRRa<"wflcdb", 0xE7CC, fneg, v64db, v64db, 3, 8, 0>;
-+
-+  // Load negative.
-+  def VFLNDB : UnaryVRRa<"vflndb", 0xE7CC, fnabs, v128db, v128db, 3, 0, 1>;
-+  def WFLNDB : UnaryVRRa<"wflndb", 0xE7CC, fnabs, v64db, v64db, 3, 8, 1>;
-+
-+  // Load positive.
-+  def VFLPDB : UnaryVRRa<"vflpdb", 0xE7CC, fabs, v128db, v128db, 3, 0, 2>;
-+  def WFLPDB : UnaryVRRa<"wflpdb", 0xE7CC, fabs, v64db, v64db, 3, 8, 2>;
-+
-+  // Square root.
-+  def VFSQDB : UnaryVRRa<"vfsqdb", 0xE7CE, fsqrt, v128db, v128db, 3, 0>;
-+  def WFSQDB : UnaryVRRa<"wfsqdb", 0xE7CE, fsqrt, v64db, v64db, 3, 8>;
-+
-+  // Subtract.
-+  def VFSDB : BinaryVRRc<"vfsdb", 0xE7E2, fsub, v128db, v128db, 3, 0>;
-+  def WFSDB : BinaryVRRc<"wfsdb", 0xE7E2, fsub, v64db, v64db, 3, 8>;
-+
-+  // Test data class immediate.
-+  let Defs = [CC] in {
-+    def VFTCIDB : BinaryVRIe<"vftcidb", 0xE74A, z_vftci, v128g, v128db, 3, 0>;
-+    def WFTCIDB : BinaryVRIe<"wftcidb", 0xE74A, null_frag, v64g, v64db, 3, 8>;
-+  }
-+}
-+
-+//===----------------------------------------------------------------------===//
-+// Floating-point comparison
-+//===----------------------------------------------------------------------===//
-+
-+let Predicates = [FeatureVector] in {
-+  // Compare scalar.
-+  let Defs = [CC] in
-+    def WFCDB : CompareVRRa<"wfcdb", 0xE7CB, z_fcmp, v64db, 3>;
-+
-+  // Compare and signal scalar.
-+  let Defs = [CC] in
-+    def WFKDB : CompareVRRa<"wfkdb", 0xE7CA, null_frag, v64db, 3>;
-+
-+  // Compare equal.
-+  defm VFCEDB : BinaryVRRcSPair<"vfcedb", 0xE7E8, z_vfcmpe, z_vfcmpes,
-+                                v128g, v128db, 3, 0>;
-+  defm WFCEDB : BinaryVRRcSPair<"wfcedb", 0xE7E8, null_frag, null_frag,
-+                                v64g, v64db, 3, 8>;
-+
-+  // Compare high.
-+  defm VFCHDB : BinaryVRRcSPair<"vfchdb", 0xE7EB, z_vfcmph, z_vfcmphs,
-+                                v128g, v128db, 3, 0>;
-+  defm WFCHDB : BinaryVRRcSPair<"wfchdb", 0xE7EB, null_frag, null_frag,
-+                                v64g, v64db, 3, 8>;
-+
-+  // Compare high or equal.
-+  defm VFCHEDB : BinaryVRRcSPair<"vfchedb", 0xE7EA, z_vfcmphe, z_vfcmphes,
-+                                 v128g, v128db, 3, 0>;
-+  defm WFCHEDB : BinaryVRRcSPair<"wfchedb", 0xE7EA, null_frag, null_frag,
-+                                 v64g, v64db, 3, 8>;
-+}
-+
-+//===----------------------------------------------------------------------===//
-+// Conversions
-+//===----------------------------------------------------------------------===//
-+
-+def : Pat<(v16i8 (bitconvert (v8i16 VR128:$src))), (v16i8 VR128:$src)>;
-+def : Pat<(v16i8 (bitconvert (v4i32 VR128:$src))), (v16i8 VR128:$src)>;
-+def : Pat<(v16i8 (bitconvert (v2i64 VR128:$src))), (v16i8 VR128:$src)>;
-+def : Pat<(v16i8 (bitconvert (v4f32 VR128:$src))), (v16i8 VR128:$src)>;
-+def : Pat<(v16i8 (bitconvert (v2f64 VR128:$src))), (v16i8 VR128:$src)>;
-+
-+def : Pat<(v8i16 (bitconvert (v16i8 VR128:$src))), (v8i16 VR128:$src)>;
-+def : Pat<(v8i16 (bitconvert (v4i32 VR128:$src))), (v8i16 VR128:$src)>;
-+def : Pat<(v8i16 (bitconvert (v2i64 VR128:$src))), (v8i16 VR128:$src)>;
-+def : Pat<(v8i16 (bitconvert (v4f32 VR128:$src))), (v8i16 VR128:$src)>;
-+def : Pat<(v8i16 (bitconvert (v2f64 VR128:$src))), (v8i16 VR128:$src)>;
-+
-+def : Pat<(v4i32 (bitconvert (v16i8 VR128:$src))), (v4i32 VR128:$src)>;
-+def : Pat<(v4i32 (bitconvert (v8i16 VR128:$src))), (v4i32 VR128:$src)>;
-+def : Pat<(v4i32 (bitconvert (v2i64 VR128:$src))), (v4i32 VR128:$src)>;
-+def : Pat<(v4i32 (bitconvert (v4f32 VR128:$src))), (v4i32 VR128:$src)>;
-+def : Pat<(v4i32 (bitconvert (v2f64 VR128:$src))), (v4i32 VR128:$src)>;
-+
-+def : Pat<(v2i64 (bitconvert (v16i8 VR128:$src))), (v2i64 VR128:$src)>;
-+def : Pat<(v2i64 (bitconvert (v8i16 VR128:$src))), (v2i64 VR128:$src)>;
-+def : Pat<(v2i64 (bitconvert (v4i32 VR128:$src))), (v2i64 VR128:$src)>;
-+def : Pat<(v2i64 (bitconvert (v4f32 VR128:$src))), (v2i64 VR128:$src)>;
-+def : Pat<(v2i64 (bitconvert (v2f64 VR128:$src))), (v2i64 VR128:$src)>;
-+
-+def : Pat<(v4f32 (bitconvert (v16i8 VR128:$src))), (v4f32 VR128:$src)>;
-+def : Pat<(v4f32 (bitconvert (v8i16 VR128:$src))), (v4f32 VR128:$src)>;
-+def : Pat<(v4f32 (bitconvert (v4i32 VR128:$src))), (v4f32 VR128:$src)>;
-+def : Pat<(v4f32 (bitconvert (v2i64 VR128:$src))), (v4f32 VR128:$src)>;
-+def : Pat<(v4f32 (bitconvert (v2f64 VR128:$src))), (v4f32 VR128:$src)>;
-+
-+def : Pat<(v2f64 (bitconvert (v16i8 VR128:$src))), (v2f64 VR128:$src)>;
-+def : Pat<(v2f64 (bitconvert (v8i16 VR128:$src))), (v2f64 VR128:$src)>;
-+def : Pat<(v2f64 (bitconvert (v4i32 VR128:$src))), (v2f64 VR128:$src)>;
-+def : Pat<(v2f64 (bitconvert (v2i64 VR128:$src))), (v2f64 VR128:$src)>;
-+def : Pat<(v2f64 (bitconvert (v4f32 VR128:$src))), (v2f64 VR128:$src)>;
-+
-+//===----------------------------------------------------------------------===//
-+// Replicating scalars
-+//===----------------------------------------------------------------------===//
-+
-+// Define patterns for replicating a scalar GR32 into a vector of type TYPE.
-+// INDEX is 8 minus the element size in bytes.
-+class VectorReplicateScalar<ValueType type, Instruction insn, bits<16> index>
-+  : Pat<(type (z_replicate GR32:$scalar)),
-+        (insn (VLVGP32 GR32:$scalar, GR32:$scalar), index)>;
-+
-+def : VectorReplicateScalar<v16i8, VREPB, 7>;
-+def : VectorReplicateScalar<v8i16, VREPH, 3>;
-+def : VectorReplicateScalar<v4i32, VREPF, 1>;
-+
-+// i64 replications are just a single isntruction.
-+def : Pat<(v2i64 (z_replicate GR64:$scalar)),
-+          (VLVGP GR64:$scalar, GR64:$scalar)>;
-+
-+//===----------------------------------------------------------------------===//
-+// Floating-point insertion and extraction
-+//===----------------------------------------------------------------------===//
-+
-+// Moving 32-bit values between GPRs and FPRs can be done using VLVGF
-+// and VLGVF.
-+def LEFR : UnaryAliasVRS<VR32, GR32>;
-+def LFER : UnaryAliasVRS<GR64, VR32>;
-+def : Pat<(f32 (bitconvert (i32 GR32:$src))), (LEFR GR32:$src)>;
-+def : Pat<(i32 (bitconvert (f32 VR32:$src))),
-+          (EXTRACT_SUBREG (LFER VR32:$src), subreg_l32)>;
-+
-+// Floating-point values are stored in element 0 of the corresponding
-+// vector register.  Scalar to vector conversion is just a subreg and
-+// scalar replication can just replicate element 0 of the vector register.
-+multiclass ScalarToVectorFP<Instruction vrep, ValueType vt, RegisterOperand cls,
-+                            SubRegIndex subreg> {
-+  def : Pat<(vt (scalar_to_vector cls:$scalar)),
-+            (INSERT_SUBREG (vt (IMPLICIT_DEF)), cls:$scalar, subreg)>;
-+  def : Pat<(vt (z_replicate cls:$scalar)),
-+            (vrep (INSERT_SUBREG (vt (IMPLICIT_DEF)), cls:$scalar,
-+                                 subreg), 0)>;
-+}
-+defm : ScalarToVectorFP<VREPF, v4f32, FP32, subreg_r32>;
-+defm : ScalarToVectorFP<VREPG, v2f64, FP64, subreg_r64>;
-+
-+// Match v2f64 insertions.  The AddedComplexity counters the 3 added by
-+// TableGen for the base register operand in VLVG-based integer insertions
-+// and ensures that this version is strictly better.
-+let AddedComplexity = 4 in {
-+  def : Pat<(z_vector_insert (v2f64 VR128:$vec), FP64:$elt, 0),
-+            (VPDI (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FP64:$elt,
-+                                 subreg_r64), VR128:$vec, 1)>;
-+  def : Pat<(z_vector_insert (v2f64 VR128:$vec), FP64:$elt, 1),
-+            (VPDI VR128:$vec, (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FP64:$elt,
-+                                             subreg_r64), 0)>;
-+}
-+
-+// We extract floating-point element X by replicating (for elements other
-+// than 0) and then taking a high subreg.  The AddedComplexity counters the
-+// 3 added by TableGen for the base register operand in VLGV-based integer
-+// extractions and ensures that this version is strictly better.
-+let AddedComplexity = 4 in {
-+  def : Pat<(f32 (z_vector_extract (v4f32 VR128:$vec), 0)),
-+            (EXTRACT_SUBREG VR128:$vec, subreg_r32)>;
-+  def : Pat<(f32 (z_vector_extract (v4f32 VR128:$vec), imm32zx2:$index)),
-+            (EXTRACT_SUBREG (VREPF VR128:$vec, imm32zx2:$index), subreg_r32)>;
-+
-+  def : Pat<(f64 (z_vector_extract (v2f64 VR128:$vec), 0)),
-+            (EXTRACT_SUBREG VR128:$vec, subreg_r64)>;
-+  def : Pat<(f64 (z_vector_extract (v2f64 VR128:$vec), imm32zx1:$index)),
-+            (EXTRACT_SUBREG (VREPG VR128:$vec, imm32zx1:$index), subreg_r64)>;
-+}
-+
-+//===----------------------------------------------------------------------===//
-+// String instructions
-+//===----------------------------------------------------------------------===//
-+
-+let Predicates = [FeatureVector] in {
-+  defm VFAEB : TernaryVRRbSPair<"vfaeb", 0xE782, int_s390_vfaeb, z_vfae_cc,
-+                                v128b, v128b, 0, 0>;
-+  defm VFAEH : TernaryVRRbSPair<"vfaeh", 0xE782, int_s390_vfaeh, z_vfae_cc,
-+                                v128h, v128h, 1, 0>;
-+  defm VFAEF : TernaryVRRbSPair<"vfaef", 0xE782, int_s390_vfaef, z_vfae_cc,
-+                                v128f, v128f, 2, 0>;
-+  defm VFAEZB : TernaryVRRbSPair<"vfaezb", 0xE782, int_s390_vfaezb, z_vfaez_cc,
-+                                 v128b, v128b, 0, 2>;
-+  defm VFAEZH : TernaryVRRbSPair<"vfaezh", 0xE782, int_s390_vfaezh, z_vfaez_cc,
-+                                 v128h, v128h, 1, 2>;
-+  defm VFAEZF : TernaryVRRbSPair<"vfaezf", 0xE782, int_s390_vfaezf, z_vfaez_cc,
-+                                 v128f, v128f, 2, 2>;
-+
-+  defm VFEEB : BinaryVRRbSPair<"vfeeb", 0xE780, int_s390_vfeeb, z_vfee_cc,
-+                               v128b, v128b, 0, 0, 1>;
-+  defm VFEEH : BinaryVRRbSPair<"vfeeh", 0xE780, int_s390_vfeeh, z_vfee_cc,
-+                               v128h, v128h, 1, 0, 1>;
-+  defm VFEEF : BinaryVRRbSPair<"vfeef", 0xE780, int_s390_vfeef, z_vfee_cc,
-+                               v128f, v128f, 2, 0, 1>;
-+  defm VFEEZB : BinaryVRRbSPair<"vfeezb", 0xE780, int_s390_vfeezb, z_vfeez_cc,
-+                                v128b, v128b, 0, 2, 3>;
-+  defm VFEEZH : BinaryVRRbSPair<"vfeezh", 0xE780, int_s390_vfeezh, z_vfeez_cc,
-+                                v128h, v128h, 1, 2, 3>;
-+  defm VFEEZF : BinaryVRRbSPair<"vfeezf", 0xE780, int_s390_vfeezf, z_vfeez_cc,
-+                                v128f, v128f, 2, 2, 3>;
-+
-+  defm VFENEB : BinaryVRRbSPair<"vfeneb", 0xE781, int_s390_vfeneb, z_vfene_cc,
-+                                v128b, v128b, 0, 0, 1>;
-+  defm VFENEH : BinaryVRRbSPair<"vfeneh", 0xE781, int_s390_vfeneh, z_vfene_cc,
-+                                v128h, v128h, 1, 0, 1>;
-+  defm VFENEF : BinaryVRRbSPair<"vfenef", 0xE781, int_s390_vfenef, z_vfene_cc,
-+                                v128f, v128f, 2, 0, 1>;
-+  defm VFENEZB : BinaryVRRbSPair<"vfenezb", 0xE781, int_s390_vfenezb,
-+                                 z_vfenez_cc, v128b, v128b, 0, 2, 3>;
-+  defm VFENEZH : BinaryVRRbSPair<"vfenezh", 0xE781, int_s390_vfenezh,
-+                                 z_vfenez_cc, v128h, v128h, 1, 2, 3>;
-+  defm VFENEZF : BinaryVRRbSPair<"vfenezf", 0xE781, int_s390_vfenezf,
-+                                 z_vfenez_cc, v128f, v128f, 2, 2, 3>;
-+
-+  defm VISTRB : UnaryVRRaSPair<"vistrb", 0xE75C, int_s390_vistrb, z_vistr_cc,
-+                               v128b, v128b, 0>;
-+  defm VISTRH : UnaryVRRaSPair<"vistrh", 0xE75C, int_s390_vistrh, z_vistr_cc,
-+                               v128h, v128h, 1>;
-+  defm VISTRF : UnaryVRRaSPair<"vistrf", 0xE75C, int_s390_vistrf, z_vistr_cc,
-+                               v128f, v128f, 2>;
-+
-+  defm VSTRCB : QuaternaryVRRdSPair<"vstrcb", 0xE78A, int_s390_vstrcb,
-+                                    z_vstrc_cc, v128b, v128b, 0, 0>;
-+  defm VSTRCH : QuaternaryVRRdSPair<"vstrch", 0xE78A, int_s390_vstrch,
-+                                    z_vstrc_cc, v128h, v128h, 1, 0>;
-+  defm VSTRCF : QuaternaryVRRdSPair<"vstrcf", 0xE78A, int_s390_vstrcf,
-+                                    z_vstrc_cc, v128f, v128f, 2, 0>;
-+  defm VSTRCZB : QuaternaryVRRdSPair<"vstrczb", 0xE78A, int_s390_vstrczb,
-+                                     z_vstrcz_cc, v128b, v128b, 0, 2>;
-+  defm VSTRCZH : QuaternaryVRRdSPair<"vstrczh", 0xE78A, int_s390_vstrczh,
-+                                     z_vstrcz_cc, v128h, v128h, 1, 2>;
-+  defm VSTRCZF : QuaternaryVRRdSPair<"vstrczf", 0xE78A, int_s390_vstrczf,
-+                                     z_vstrcz_cc, v128f, v128f, 2, 2>;
-+}
-Index: llvm-36/lib/Target/SystemZ/SystemZLDCleanup.cpp
-===================================================================
---- /dev/null
-+++ llvm-36/lib/Target/SystemZ/SystemZLDCleanup.cpp
-@@ -0,0 +1,143 @@
-+//===-- SystemZLDCleanup.cpp - Clean up local-dynamic TLS accesses --------===//
-+//
-+//                     The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+// This pass combines multiple accesses to local-dynamic TLS variables so that
-+// the TLS base address for the module is only fetched once per execution path
-+// through the function.
-+//
-+//===----------------------------------------------------------------------===//
-+
-+#include "SystemZTargetMachine.h"
-+#include "SystemZMachineFunctionInfo.h"
-+#include "llvm/CodeGen/MachineDominators.h"
-+#include "llvm/CodeGen/MachineFunctionPass.h"
-+#include "llvm/CodeGen/MachineInstrBuilder.h"
-+#include "llvm/CodeGen/MachineRegisterInfo.h"
-+#include "llvm/Target/TargetInstrInfo.h"
-+#include "llvm/Target/TargetMachine.h"
-+#include "llvm/Target/TargetRegisterInfo.h"
-+
-+using namespace llvm;
-+
-+namespace {
-+
-+class SystemZLDCleanup : public MachineFunctionPass {
-+public:
-+  static char ID;
-+  SystemZLDCleanup(const SystemZTargetMachine &tm)
-+    : MachineFunctionPass(ID), TII(nullptr), MF(nullptr) {}
-+
-+  const char *getPassName() const override {
-+    return "SystemZ Local Dynamic TLS Access Clean-up";
-+  }
-+
-+  bool runOnMachineFunction(MachineFunction &MF) override;
-+  void getAnalysisUsage(AnalysisUsage &AU) const override;
-+
-+private:
-+  bool VisitNode(MachineDomTreeNode *Node, unsigned TLSBaseAddrReg);
-+  MachineInstr *ReplaceTLSCall(MachineInstr *I, unsigned TLSBaseAddrReg);
-+  MachineInstr *SetRegister(MachineInstr *I, unsigned *TLSBaseAddrReg);
-+
-+  const SystemZInstrInfo *TII;
-+  MachineFunction *MF;
-+};
-+
-+char SystemZLDCleanup::ID = 0;
-+
-+} // end anonymous namespace
-+
-+FunctionPass *llvm::createSystemZLDCleanupPass(SystemZTargetMachine &TM) {
-+  return new SystemZLDCleanup(TM);
-+}
-+
-+void SystemZLDCleanup::getAnalysisUsage(AnalysisUsage &AU) const {
-+  AU.setPreservesCFG();
-+  AU.addRequired<MachineDominatorTree>();
-+  MachineFunctionPass::getAnalysisUsage(AU);
-+}
-+
-+bool SystemZLDCleanup::runOnMachineFunction(MachineFunction &F) {
-+  TII = static_cast<const SystemZInstrInfo *>(F.getSubtarget().getInstrInfo());
-+  MF = &F;
-+
-+  SystemZMachineFunctionInfo* MFI = F.getInfo<SystemZMachineFunctionInfo>();
-+  if (MFI->getNumLocalDynamicTLSAccesses() < 2) {
-+    // No point folding accesses if there isn't at least two.
-+    return false;
-+  }
-+
-+  MachineDominatorTree *DT = &getAnalysis<MachineDominatorTree>();
-+  return VisitNode(DT->getRootNode(), 0);
-+}
-+
-+// Visit the dominator subtree rooted at Node in pre-order.
-+// If TLSBaseAddrReg is non-null, then use that to replace any
-+// TLS_LDCALL instructions. Otherwise, create the register
-+// when the first such instruction is seen, and then use it
-+// as we encounter more instructions.
-+bool SystemZLDCleanup::VisitNode(MachineDomTreeNode *Node,
-+                                 unsigned TLSBaseAddrReg) {
-+  MachineBasicBlock *BB = Node->getBlock();
-+  bool Changed = false;
-+
-+  // Traverse the current block.
-+  for (auto I = BB->begin(), E = BB->end(); I != E; ++I) {
-+    switch (I->getOpcode()) {
-+      case SystemZ::TLS_LDCALL:
-+        if (TLSBaseAddrReg)
-+          I = ReplaceTLSCall(I, TLSBaseAddrReg);
-+        else
-+          I = SetRegister(I, &TLSBaseAddrReg);
-+        Changed = true;
-+        break;
-+      default:
-+        break;
-+    }
-+  }
-+
-+  // Visit the children of this block in the dominator tree.
-+  for (auto I = Node->begin(), E = Node->end(); I != E; ++I)
-+    Changed |= VisitNode(*I, TLSBaseAddrReg);
-+
-+  return Changed;
-+}
-+
-+// Replace the TLS_LDCALL instruction I with a copy from TLSBaseAddrReg,
-+// returning the new instruction.
-+MachineInstr *SystemZLDCleanup::ReplaceTLSCall(MachineInstr *I,
-+                                               unsigned TLSBaseAddrReg) {
-+  // Insert a Copy from TLSBaseAddrReg to R2.
-+  MachineInstr *Copy = BuildMI(*I->getParent(), I, I->getDebugLoc(),
-+                               TII->get(TargetOpcode::COPY), SystemZ::R2D)
-+                               .addReg(TLSBaseAddrReg);
-+
-+  // Erase the TLS_LDCALL instruction.
-+  I->eraseFromParent();
-+
-+  return Copy;
-+}
-+
-+// Create a virtal register in *TLSBaseAddrReg, and populate it by
-+// inserting a copy instruction after I. Returns the new instruction.
-+MachineInstr *SystemZLDCleanup::SetRegister(MachineInstr *I,
-+                                            unsigned *TLSBaseAddrReg) {
-+  // Create a virtual register for the TLS base address.
-+  MachineRegisterInfo &RegInfo = MF->getRegInfo();
-+  *TLSBaseAddrReg = RegInfo.createVirtualRegister(&SystemZ::GR64BitRegClass);
-+
-+  // Insert a copy from R2 to TLSBaseAddrReg.
-+  MachineInstr *Next = I->getNextNode();
-+  MachineInstr *Copy = BuildMI(*I->getParent(), Next, I->getDebugLoc(),
-+                               TII->get(TargetOpcode::COPY), *TLSBaseAddrReg)
-+                               .addReg(SystemZ::R2D);
-+
-+  return Copy;
-+}
-+
-Index: llvm-36/lib/Target/SystemZ/SystemZMCInstLower.cpp
-===================================================================
---- llvm-36.orig/lib/Target/SystemZ/SystemZMCInstLower.cpp
-+++ llvm-36/lib/Target/SystemZ/SystemZMCInstLower.cpp
-@@ -22,6 +22,8 @@ static MCSymbolRefExpr::VariantKind getV
-       return MCSymbolRefExpr::VK_None;
-     case SystemZII::MO_GOT:
-       return MCSymbolRefExpr::VK_GOT;
-+    case SystemZII::MO_INDNTPOFF:
-+      return MCSymbolRefExpr::VK_INDNTPOFF;
-   }
-   llvm_unreachable("Unrecognised MO_ACCESS_MODEL");
- }
-Index: llvm-36/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
-===================================================================
---- llvm-36.orig/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
-+++ llvm-36/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
-@@ -23,11 +23,13 @@ class SystemZMachineFunctionInfo : publi
-   unsigned VarArgsFrameIndex;
-   unsigned RegSaveFrameIndex;
-   bool ManipulatesSP;
-+  unsigned NumLocalDynamics;
- 
- public:
-   explicit SystemZMachineFunctionInfo(MachineFunction &MF)
-     : LowSavedGPR(0), HighSavedGPR(0), VarArgsFirstGPR(0), VarArgsFirstFPR(0),
--      VarArgsFrameIndex(0), RegSaveFrameIndex(0), ManipulatesSP(false) {}
-+      VarArgsFrameIndex(0), RegSaveFrameIndex(0), ManipulatesSP(false),
-+      NumLocalDynamics(0) {}
- 
-   // Get and set the first call-saved GPR that should be saved and restored
-   // by this function.  This is 0 if no GPRs need to be saved or restored.
-@@ -61,6 +63,10 @@ public:
-   // e.g. through STACKSAVE or STACKRESTORE.
-   bool getManipulatesSP() const { return ManipulatesSP; }
-   void setManipulatesSP(bool MSP) { ManipulatesSP = MSP; }
-+
-+  // Count number of local-dynamic TLS symbols used.
-+  unsigned getNumLocalDynamicTLSAccesses() const { return NumLocalDynamics; }
-+  void incNumLocalDynamicTLSAccesses() { ++NumLocalDynamics; }
- };
- 
- } // end namespace llvm
-Index: llvm-36/lib/Target/SystemZ/SystemZOperands.td
-===================================================================
---- llvm-36.orig/lib/Target/SystemZ/SystemZOperands.td
-+++ llvm-36/lib/Target/SystemZ/SystemZOperands.td
-@@ -16,6 +16,11 @@ class ImmediateAsmOperand<string name>
-   let Name = name;
-   let RenderMethod = "addImmOperands";
- }
-+class ImmediateTLSAsmOperand<string name>
-+  : AsmOperandClass {
-+  let Name = name;
-+  let RenderMethod = "addImmTLSOperands";
-+}
- 
- // Constructs both a DAG pattern and instruction operand for an immediate
- // of type VT.  PRED returns true if a node is acceptable and XFORM returns
-@@ -34,6 +39,11 @@ class PCRelAsmOperand<string size> : Imm
-   let PredicateMethod = "isImm";
-   let ParserMethod = "parsePCRel"##size;
- }
-+class PCRelTLSAsmOperand<string size>
-+  : ImmediateTLSAsmOperand<"PCRelTLS"##size> {
-+  let PredicateMethod = "isImmTLS";
-+  let ParserMethod = "parsePCRelTLS"##size;
-+}
- 
- // Constructs an operand for a PC-relative address with address type VT.
- // ASMOP is the associated asm operand.
-@@ -41,6 +51,10 @@ class PCRelOperand<ValueType vt, AsmOper
-   let PrintMethod = "printPCRelOperand";
-   let ParserMatchClass = asmop;
- }
-+class PCRelTLSOperand<ValueType vt, AsmOperandClass asmop> : Operand<vt> {
-+  let PrintMethod = "printPCRelTLSOperand";
-+  let ParserMatchClass = asmop;
-+}
- 
- // Constructs both a DAG pattern and instruction operand for a PC-relative
- // address with address size VT.  SELF is the name of the operand and
-@@ -64,6 +78,22 @@ class AddressAsmOperand<string format, s
-   let RenderMethod = "add"##format##"Operands";
- }
- 
-+// Constructs an instruction operand for an addressing mode.  FORMAT,
-+// BITSIZE, DISPSIZE and LENGTH are the parameters to an associated
-+// AddressAsmOperand.  OPERANDS is a list of individual operands
-+// (base register, displacement, etc.).
-+class AddressOperand<string bitsize, string dispsize, string length,
-+                     string format, dag operands>
-+  : Operand<!cast<ValueType>("i"##bitsize)> {
-+  let PrintMethod = "print"##format##"Operand";
-+  let EncoderMethod = "get"##format##dispsize##length##"Encoding";
-+  let DecoderMethod =
-+    "decode"##format##bitsize##"Disp"##dispsize##length##"Operand";
-+  let MIOperandInfo = operands;
-+  let ParserMatchClass =
-+    !cast<AddressAsmOperand>(format##bitsize##"Disp"##dispsize##length);
-+}
-+
- // Constructs both a DAG pattern and instruction operand for an addressing mode.
- // FORMAT, BITSIZE, DISPSIZE and LENGTH are the parameters to an associated
- // AddressAsmOperand.  OPERANDS is a list of NUMOPS individual operands
-@@ -79,15 +109,7 @@ class AddressingMode<string seltype, str
-   : ComplexPattern<!cast<ValueType>("i"##bitsize), numops,
-                    "select"##seltype##dispsize##suffix##length,
-                    [add, sub, or, frameindex, z_adjdynalloc]>,
--    Operand<!cast<ValueType>("i"##bitsize)> {
--  let PrintMethod = "print"##format##"Operand";
--  let EncoderMethod = "get"##format##dispsize##length##"Encoding";
--  let DecoderMethod =
--    "decode"##format##bitsize##"Disp"##dispsize##length##"Operand";
--  let MIOperandInfo = operands;
--  let ParserMatchClass =
--    !cast<AddressAsmOperand>(format##bitsize##"Disp"##dispsize##length);
--}
-+    AddressOperand<bitsize, dispsize, length, format, operands>;
- 
- // An addressing mode with a base and displacement but no index.
- class BDMode<string type, string bitsize, string dispsize, string suffix>
-@@ -111,6 +133,13 @@ class BDLMode<string type, string bitsiz
-                         !cast<Immediate>("disp"##dispsize##"imm"##bitsize),
-                         !cast<Immediate>("imm"##bitsize))>;
- 
-+// An addressing mode with a base, displacement and a vector index.
-+class BDVMode<string bitsize, string dispsize>
-+  : AddressOperand<bitsize, dispsize, "", "BDVAddr",
-+                   (ops !cast<RegisterOperand>("ADDR"##bitsize),
-+                        !cast<Immediate>("disp"##dispsize##"imm"##bitsize),
-+                        !cast<RegisterOperand>("VR128"))>;
-+
- //===----------------------------------------------------------------------===//
- // Extracting immediate operands from nodes
- // These all create MVT::i64 nodes to ensure the value is not sign-extended
-@@ -163,6 +192,16 @@ def UIMM8 : SDNodeXForm<imm, [{
-   return CurDAG->getTargetConstant(uint8_t(N->getZExtValue()), MVT::i64);
- }]>;
- 
-+// Truncate an immediate to a 8-bit unsigned quantity and mask off low bit.
-+def UIMM8EVEN : SDNodeXForm<imm, [{
-+  return CurDAG->getTargetConstant(N->getZExtValue() & 0xfe, MVT::i64);
-+}]>;
-+
-+// Truncate an immediate to a 12-bit unsigned quantity.
-+def UIMM12 : SDNodeXForm<imm, [{
-+  return CurDAG->getTargetConstant(N->getZExtValue() & 0xfff, MVT::i64);
-+}]>;
-+
- // Truncate an immediate to a 16-bit signed quantity.
- def SIMM16 : SDNodeXForm<imm, [{
-   return CurDAG->getTargetConstant(int16_t(N->getZExtValue()), MVT::i64);
-@@ -192,10 +231,14 @@ def NEGIMM32 : SDNodeXForm<imm, [{
- // Immediate asm operands.
- //===----------------------------------------------------------------------===//
- 
-+def U1Imm  : ImmediateAsmOperand<"U1Imm">;
-+def U2Imm  : ImmediateAsmOperand<"U2Imm">;
-+def U3Imm  : ImmediateAsmOperand<"U3Imm">;
- def U4Imm  : ImmediateAsmOperand<"U4Imm">;
- def U6Imm  : ImmediateAsmOperand<"U6Imm">;
- def S8Imm  : ImmediateAsmOperand<"S8Imm">;
- def U8Imm  : ImmediateAsmOperand<"U8Imm">;
-+def U12Imm : ImmediateAsmOperand<"U12Imm">;
- def S16Imm : ImmediateAsmOperand<"S16Imm">;
- def U16Imm : ImmediateAsmOperand<"U16Imm">;
- def S32Imm : ImmediateAsmOperand<"S32Imm">;
-@@ -226,10 +269,28 @@ def imm32lh16c : Immediate<i32, [{
- }], LH16, "U16Imm">;
- 
- // Short immediates
-+def imm32zx1 : Immediate<i32, [{
-+  return isUInt<1>(N->getZExtValue());
-+}], NOOP_SDNodeXForm, "U1Imm">;
-+
-+def imm32zx2 : Immediate<i32, [{
-+  return isUInt<2>(N->getZExtValue());
-+}], NOOP_SDNodeXForm, "U2Imm">;
-+
-+def imm32zx3 : Immediate<i32, [{
-+  return isUInt<3>(N->getZExtValue());
-+}], NOOP_SDNodeXForm, "U3Imm">;
-+
- def imm32zx4 : Immediate<i32, [{
-   return isUInt<4>(N->getZExtValue());
- }], NOOP_SDNodeXForm, "U4Imm">;
- 
-+// Note: this enforces an even value during code generation only.
-+// When used from the assembler, any 4-bit value is allowed.
-+def imm32zx4even : Immediate<i32, [{
-+  return isUInt<4>(N->getZExtValue());
-+}], UIMM8EVEN, "U4Imm">;
-+
- def imm32zx6 : Immediate<i32, [{
-   return isUInt<6>(N->getZExtValue());
- }], NOOP_SDNodeXForm, "U6Imm">;
-@@ -244,6 +305,10 @@ def imm32zx8 : Immediate<i32, [{
- 
- def imm32zx8trunc : Immediate<i32, [{}], UIMM8, "U8Imm">;
- 
-+def imm32zx12 : Immediate<i32, [{
-+  return isUInt<12>(N->getZExtValue());
-+}], UIMM12, "U12Imm">;
-+
- def imm32sx16 : Immediate<i32, [{
-   return isInt<16>(N->getSExtValue());
- }], SIMM16, "S16Imm">;
-@@ -370,6 +435,8 @@ def fpimmneg0 : PatLeaf<(fpimm), [{ retu
- // PC-relative asm operands.
- def PCRel16 : PCRelAsmOperand<"16">;
- def PCRel32 : PCRelAsmOperand<"32">;
-+def PCRelTLS16 : PCRelTLSAsmOperand<"16">;
-+def PCRelTLS32 : PCRelTLSAsmOperand<"32">;
- 
- // PC-relative offsets of a basic block.  The offset is sign-extended
- // and multiplied by 2.
-@@ -382,6 +449,20 @@ def brtarget32 : PCRelOperand<OtherVT, P
-   let DecoderMethod = "decodePC32DBLOperand";
- }
- 
-+// Variants of brtarget16/32 with an optional additional TLS symbol.
-+// These are used to annotate calls to __tls_get_offset.
-+def tlssym : Operand<i64> { }
-+def brtarget16tls : PCRelTLSOperand<OtherVT, PCRelTLS16> {
-+  let MIOperandInfo = (ops brtarget16:$func, tlssym:$sym);
-+  let EncoderMethod = "getPC16DBLTLSEncoding";
-+  let DecoderMethod = "decodePC16DBLOperand";
-+}
-+def brtarget32tls : PCRelTLSOperand<OtherVT, PCRelTLS32> {
-+  let MIOperandInfo = (ops brtarget32:$func, tlssym:$sym);
-+  let EncoderMethod = "getPC32DBLTLSEncoding";
-+  let DecoderMethod = "decodePC32DBLOperand";
-+}
-+
- // A PC-relative offset of a global value.  The offset is sign-extended
- // and multiplied by 2.
- def pcrel32 : PCRelAddress<i64, "pcrel32", PCRel32> {
-@@ -408,6 +489,7 @@ def BDAddr64Disp20      : AddressAsmOper
- def BDXAddr64Disp12     : AddressAsmOperand<"BDXAddr",  "64", "12">;
- def BDXAddr64Disp20     : AddressAsmOperand<"BDXAddr",  "64", "20">;
- def BDLAddr64Disp12Len8 : AddressAsmOperand<"BDLAddr",  "64", "12", "Len8">;
-+def BDVAddr64Disp12     : AddressAsmOperand<"BDVAddr",  "64", "12">;
- 
- // DAG patterns and operands for addressing modes.  Each mode has
- // the form <type><range><group>[<len>] where:
-@@ -420,6 +502,7 @@ def BDLAddr64Disp12Len8 : AddressAsmOper
- //   laaddr   : like bdxaddr, but used for Load Address operations
- //   dynalloc : base + displacement + index + ADJDYNALLOC
- //   bdladdr  : base + displacement with a length field
-+//   bdvaddr  : base + displacement with a vector index
- //
- // <range> is one of:
- //   12       : the displacement is an unsigned 12-bit value
-@@ -452,6 +535,7 @@ def dynalloc12only    : BDXMode<"DynAllo
- def laaddr12pair      : BDXMode<"LAAddr",   "64", "12", "Pair">;
- def laaddr20pair      : BDXMode<"LAAddr",   "64", "20", "Pair">;
- def bdladdr12onlylen8 : BDLMode<"BDLAddr",  "64", "12", "Only", "8">;
-+def bdvaddr12only     : BDVMode<            "64", "12">;
- 
- //===----------------------------------------------------------------------===//
- // Miscellaneous
-Index: llvm-36/lib/Target/SystemZ/SystemZOperators.td
-===================================================================
---- llvm-36.orig/lib/Target/SystemZ/SystemZOperators.td
-+++ llvm-36/lib/Target/SystemZ/SystemZOperators.td
-@@ -79,6 +79,64 @@ def SDT_ZI32Intrinsic       : SDTypeProf
- def SDT_ZPrefetch           : SDTypeProfile<0, 2,
-                                             [SDTCisVT<0, i32>,
-                                              SDTCisPtrTy<1>]>;
-+def SDT_ZTBegin             : SDTypeProfile<0, 2,
-+                                            [SDTCisPtrTy<0>,
-+                                             SDTCisVT<1, i32>]>;
-+def SDT_ZInsertVectorElt    : SDTypeProfile<1, 3,
-+                                            [SDTCisVec<0>,
-+                                             SDTCisSameAs<0, 1>,
-+                                             SDTCisVT<3, i32>]>;
-+def SDT_ZExtractVectorElt   : SDTypeProfile<1, 2,
-+                                            [SDTCisVec<1>,
-+                                             SDTCisVT<2, i32>]>;
-+def SDT_ZReplicate          : SDTypeProfile<1, 1,
-+                                            [SDTCisVec<0>]>;
-+def SDT_ZVecUnaryConv       : SDTypeProfile<1, 1,
-+                                            [SDTCisVec<0>,
-+                                             SDTCisVec<1>]>;
-+def SDT_ZVecUnary           : SDTypeProfile<1, 1,
-+                                            [SDTCisVec<0>,
-+                                             SDTCisSameAs<0, 1>]>;
-+def SDT_ZVecBinary          : SDTypeProfile<1, 2,
-+                                            [SDTCisVec<0>,
-+                                             SDTCisSameAs<0, 1>,
-+                                             SDTCisSameAs<0, 2>]>;
-+def SDT_ZVecBinaryInt       : SDTypeProfile<1, 2,
-+                                            [SDTCisVec<0>,
-+                                             SDTCisSameAs<0, 1>,
-+                                             SDTCisVT<2, i32>]>;
-+def SDT_ZVecBinaryConv      : SDTypeProfile<1, 2,
-+                                            [SDTCisVec<0>,
-+                                             SDTCisVec<1>,
-+                                             SDTCisSameAs<1, 2>]>;
-+def SDT_ZVecBinaryConvInt   : SDTypeProfile<1, 2,
-+                                            [SDTCisVec<0>,
-+                                             SDTCisVec<1>,
-+                                             SDTCisVT<2, i32>]>;
-+def SDT_ZRotateMask         : SDTypeProfile<1, 2,
-+                                            [SDTCisVec<0>,
-+                                             SDTCisVT<1, i32>,
-+                                             SDTCisVT<2, i32>]>;
-+def SDT_ZJoinDwords         : SDTypeProfile<1, 2,
-+                                            [SDTCisVT<0, v2i64>,
-+                                             SDTCisVT<1, i64>,
-+                                             SDTCisVT<2, i64>]>;
-+def SDT_ZVecTernary         : SDTypeProfile<1, 3,
-+                                            [SDTCisVec<0>,
-+                                             SDTCisSameAs<0, 1>,
-+                                             SDTCisSameAs<0, 2>,
-+                                             SDTCisSameAs<0, 3>]>;
-+def SDT_ZVecTernaryInt      : SDTypeProfile<1, 3,
-+                                            [SDTCisVec<0>,
-+                                             SDTCisSameAs<0, 1>,
-+                                             SDTCisSameAs<0, 2>,
-+                                             SDTCisVT<3, i32>]>;
-+def SDT_ZVecQuaternaryInt   : SDTypeProfile<1, 4,
-+                                            [SDTCisVec<0>,
-+                                             SDTCisSameAs<0, 1>,
-+                                             SDTCisSameAs<0, 2>,
-+                                             SDTCisSameAs<0, 3>,
-+                                             SDTCisVT<4, i32>]>;
- 
- //===----------------------------------------------------------------------===//
- // Node definitions
-@@ -90,6 +148,7 @@ def callseq_start       : SDNode<"ISD::C
- def callseq_end         : SDNode<"ISD::CALLSEQ_END",   SDT_CallSeqEnd,
-                                  [SDNPHasChain, SDNPSideEffect, SDNPOptInGlue,
-                                   SDNPOutGlue]>;
-+def global_offset_table : SDNode<"ISD::GLOBAL_OFFSET_TABLE", SDTPtrLeaf>;
- 
- // Nodes for SystemZISD::*.  See SystemZISelLowering.h for more details.
- def z_retflag           : SDNode<"SystemZISD::RET_FLAG", SDTNone,
-@@ -100,6 +159,12 @@ def z_call              : SDNode<"System
- def z_sibcall           : SDNode<"SystemZISD::SIBCALL", SDT_ZCall,
-                                  [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue,
-                                   SDNPVariadic]>;
-+def z_tls_gdcall        : SDNode<"SystemZISD::TLS_GDCALL", SDT_ZCall,
-+                                 [SDNPHasChain, SDNPInGlue, SDNPOutGlue,
-+                                  SDNPVariadic]>;
-+def z_tls_ldcall        : SDNode<"SystemZISD::TLS_LDCALL", SDT_ZCall,
-+                                 [SDNPHasChain, SDNPInGlue, SDNPOutGlue,
-+                                  SDNPVariadic]>;
- def z_pcrel_wrapper     : SDNode<"SystemZISD::PCREL_WRAPPER", SDT_ZWrapPtr, []>;
- def z_pcrel_offset      : SDNode<"SystemZISD::PCREL_OFFSET",
-                                  SDT_ZWrapOffset, []>;
-@@ -114,6 +179,7 @@ def z_select_ccmask     : SDNode<"System
- def z_adjdynalloc       : SDNode<"SystemZISD::ADJDYNALLOC", SDT_ZAdjDynAlloc>;
- def z_extract_access    : SDNode<"SystemZISD::EXTRACT_ACCESS",
-                                  SDT_ZExtractAccess>;
-+def z_popcnt            : SDNode<"SystemZISD::POPCNT", SDTIntUnaryOp>;
- def z_umul_lohi64       : SDNode<"SystemZISD::UMUL_LOHI64", SDT_ZGR128Binary64>;
- def z_sdivrem32         : SDNode<"SystemZISD::SDIVREM32", SDT_ZGR128Binary32>;
- def z_sdivrem64         : SDNode<"SystemZISD::SDIVREM64", SDT_ZGR128Binary64>;
-@@ -123,6 +189,80 @@ def z_udivrem64         : SDNode<"System
- def z_serialize         : SDNode<"SystemZISD::SERIALIZE", SDTNone,
-                                  [SDNPHasChain, SDNPMayStore]>;
- 
-+// Defined because the index is an i32 rather than a pointer.
-+def z_vector_insert     : SDNode<"ISD::INSERT_VECTOR_ELT",
-+                                 SDT_ZInsertVectorElt>;
-+def z_vector_extract    : SDNode<"ISD::EXTRACT_VECTOR_ELT",
-+                                 SDT_ZExtractVectorElt>;
-+def z_byte_mask         : SDNode<"SystemZISD::BYTE_MASK", SDT_ZReplicate>;
-+def z_rotate_mask       : SDNode<"SystemZISD::ROTATE_MASK", SDT_ZRotateMask>;
-+def z_replicate         : SDNode<"SystemZISD::REPLICATE", SDT_ZReplicate>;
-+def z_join_dwords       : SDNode<"SystemZISD::JOIN_DWORDS", SDT_ZJoinDwords>;
-+def z_splat             : SDNode<"SystemZISD::SPLAT", SDT_ZVecBinaryInt>;
-+def z_merge_high        : SDNode<"SystemZISD::MERGE_HIGH", SDT_ZVecBinary>;
-+def z_merge_low         : SDNode<"SystemZISD::MERGE_LOW", SDT_ZVecBinary>;
-+def z_shl_double        : SDNode<"SystemZISD::SHL_DOUBLE", SDT_ZVecTernaryInt>;
-+def z_permute_dwords    : SDNode<"SystemZISD::PERMUTE_DWORDS",
-+                                 SDT_ZVecTernaryInt>;
-+def z_permute           : SDNode<"SystemZISD::PERMUTE", SDT_ZVecTernary>;
-+def z_pack              : SDNode<"SystemZISD::PACK", SDT_ZVecBinaryConv>;
-+def z_packs_cc          : SDNode<"SystemZISD::PACKS_CC", SDT_ZVecBinaryConv,
-+                                 [SDNPOutGlue]>;
-+def z_packls_cc         : SDNode<"SystemZISD::PACKLS_CC", SDT_ZVecBinaryConv,
-+                                 [SDNPOutGlue]>;
-+def z_unpack_high       : SDNode<"SystemZISD::UNPACK_HIGH", SDT_ZVecUnaryConv>;
-+def z_unpackl_high      : SDNode<"SystemZISD::UNPACKL_HIGH", SDT_ZVecUnaryConv>;
-+def z_unpack_low        : SDNode<"SystemZISD::UNPACK_LOW", SDT_ZVecUnaryConv>;
-+def z_unpackl_low       : SDNode<"SystemZISD::UNPACKL_LOW", SDT_ZVecUnaryConv>;
-+def z_vshl_by_scalar    : SDNode<"SystemZISD::VSHL_BY_SCALAR",
-+                                 SDT_ZVecBinaryInt>;
-+def z_vsrl_by_scalar    : SDNode<"SystemZISD::VSRL_BY_SCALAR",
-+                                 SDT_ZVecBinaryInt>;
-+def z_vsra_by_scalar    : SDNode<"SystemZISD::VSRA_BY_SCALAR",
-+                                 SDT_ZVecBinaryInt>;
-+def z_vsum              : SDNode<"SystemZISD::VSUM", SDT_ZVecBinaryConv>;
-+def z_vicmpe            : SDNode<"SystemZISD::VICMPE", SDT_ZVecBinary>;
-+def z_vicmph            : SDNode<"SystemZISD::VICMPH", SDT_ZVecBinary>;
-+def z_vicmphl           : SDNode<"SystemZISD::VICMPHL", SDT_ZVecBinary>;
-+def z_vicmpes           : SDNode<"SystemZISD::VICMPES", SDT_ZVecBinary,
-+                                 [SDNPOutGlue]>;
-+def z_vicmphs           : SDNode<"SystemZISD::VICMPHS", SDT_ZVecBinary,
-+                                 [SDNPOutGlue]>;
-+def z_vicmphls          : SDNode<"SystemZISD::VICMPHLS", SDT_ZVecBinary,
-+                                 [SDNPOutGlue]>;
-+def z_vfcmpe            : SDNode<"SystemZISD::VFCMPE", SDT_ZVecBinaryConv>;
-+def z_vfcmph            : SDNode<"SystemZISD::VFCMPH", SDT_ZVecBinaryConv>;
-+def z_vfcmphe           : SDNode<"SystemZISD::VFCMPHE", SDT_ZVecBinaryConv>;
-+def z_vfcmpes           : SDNode<"SystemZISD::VFCMPES", SDT_ZVecBinaryConv,
-+                                 [SDNPOutGlue]>;
-+def z_vfcmphs           : SDNode<"SystemZISD::VFCMPHS", SDT_ZVecBinaryConv,
-+                                 [SDNPOutGlue]>;
-+def z_vfcmphes          : SDNode<"SystemZISD::VFCMPHES", SDT_ZVecBinaryConv,
-+                                 [SDNPOutGlue]>;
-+def z_vextend           : SDNode<"SystemZISD::VEXTEND", SDT_ZVecUnaryConv>;
-+def z_vround            : SDNode<"SystemZISD::VROUND", SDT_ZVecUnaryConv>;
-+def z_vtm               : SDNode<"SystemZISD::VTM", SDT_ZCmp, [SDNPOutGlue]>;
-+def z_vfae_cc           : SDNode<"SystemZISD::VFAE_CC", SDT_ZVecTernaryInt,
-+                                 [SDNPOutGlue]>;
-+def z_vfaez_cc          : SDNode<"SystemZISD::VFAEZ_CC", SDT_ZVecTernaryInt,
-+                                 [SDNPOutGlue]>;
-+def z_vfee_cc           : SDNode<"SystemZISD::VFEE_CC", SDT_ZVecBinary,
-+                                 [SDNPOutGlue]>;
-+def z_vfeez_cc          : SDNode<"SystemZISD::VFEEZ_CC", SDT_ZVecBinary,
-+                                 [SDNPOutGlue]>;
-+def z_vfene_cc          : SDNode<"SystemZISD::VFENE_CC", SDT_ZVecBinary,
-+                                 [SDNPOutGlue]>;
-+def z_vfenez_cc         : SDNode<"SystemZISD::VFENEZ_CC", SDT_ZVecBinary,
-+                                 [SDNPOutGlue]>;
-+def z_vistr_cc          : SDNode<"SystemZISD::VISTR_CC", SDT_ZVecUnary,
-+                                 [SDNPOutGlue]>;
-+def z_vstrc_cc          : SDNode<"SystemZISD::VSTRC_CC", SDT_ZVecQuaternaryInt,
-+                                 [SDNPOutGlue]>;
-+def z_vstrcz_cc         : SDNode<"SystemZISD::VSTRCZ_CC",
-+                                 SDT_ZVecQuaternaryInt, [SDNPOutGlue]>;
-+def z_vftci             : SDNode<"SystemZISD::VFTCI", SDT_ZVecBinaryConvInt,
-+                                 [SDNPOutGlue]>;
-+
- class AtomicWOp<string name, SDTypeProfile profile = SDT_ZAtomicLoadBinaryW>
-   : SDNode<"SystemZISD::"##name, profile,
-            [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>;
-@@ -172,6 +312,19 @@ def z_prefetch          : SDNode<"System
-                                  [SDNPHasChain, SDNPMayLoad, SDNPMayStore,
-                                   SDNPMemOperand]>;
- 
-+def z_tbegin            : SDNode<"SystemZISD::TBEGIN", SDT_ZTBegin,
-+                                 [SDNPHasChain, SDNPOutGlue, SDNPMayStore,
-+                                  SDNPSideEffect]>;
-+def z_tbegin_nofloat    : SDNode<"SystemZISD::TBEGIN_NOFLOAT", SDT_ZTBegin,
-+                                 [SDNPHasChain, SDNPOutGlue, SDNPMayStore,
-+                                  SDNPSideEffect]>;
-+def z_tend              : SDNode<"SystemZISD::TEND", SDTNone,
-+                                 [SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>;
-+
-+def z_vshl              : SDNode<"ISD::SHL", SDT_ZVecBinary>;
-+def z_vsra              : SDNode<"ISD::SRA", SDT_ZVecBinary>;
-+def z_vsrl              : SDNode<"ISD::SRL", SDT_ZVecBinary>;
-+
- //===----------------------------------------------------------------------===//
- // Pattern fragments
- //===----------------------------------------------------------------------===//
-@@ -195,11 +348,21 @@ def sext8  : PatFrag<(ops node:$src), (s
- def sext16 : PatFrag<(ops node:$src), (sext_inreg node:$src, i16)>;
- def sext32 : PatFrag<(ops node:$src), (sext (i32 node:$src))>;
- 
-+// Match extensions of an i32 to an i64, followed by an in-register sign
-+// extension from a sub-i32 value.
-+def sext8dbl : PatFrag<(ops node:$src), (sext8 (anyext node:$src))>;
-+def sext16dbl : PatFrag<(ops node:$src), (sext16 (anyext node:$src))>;
-+
- // Register zero-extend operations.  Sub-32-bit values are represented as i32s.
- def zext8  : PatFrag<(ops node:$src), (and node:$src, 0xff)>;
- def zext16 : PatFrag<(ops node:$src), (and node:$src, 0xffff)>;
- def zext32 : PatFrag<(ops node:$src), (zext (i32 node:$src))>;
- 
-+// Match extensions of an i32 to an i64, followed by an AND of the low
-+// i8 or i16 part.
-+def zext8dbl : PatFrag<(ops node:$src), (zext8 (anyext node:$src))>;
-+def zext16dbl : PatFrag<(ops node:$src), (zext16 (anyext node:$src))>;
-+
- // Typed floating-point loads.
- def loadf32 : PatFrag<(ops node:$src), (f32 (load node:$src))>;
- def loadf64 : PatFrag<(ops node:$src), (f64 (load node:$src))>;
-@@ -363,6 +526,14 @@ def z_iabs64 : PatFrag<(ops node:$src),
- def z_inegabs32 : PatFrag<(ops node:$src), (ineg (z_iabs32 node:$src))>;
- def z_inegabs64 : PatFrag<(ops node:$src), (ineg (z_iabs64 node:$src))>;
- 
-+// Integer multiply-and-add
-+def z_muladd : PatFrag<(ops node:$src1, node:$src2, node:$src3),
-+                       (add (mul node:$src1, node:$src2), node:$src3)>;
-+
-+// Fused multiply-subtract, using the natural operand order.
-+def fms : PatFrag<(ops node:$src1, node:$src2, node:$src3),
-+                  (fma node:$src1, node:$src2, (fneg node:$src3))>;
-+
- // Fused multiply-add and multiply-subtract, but with the order of the
- // operands matching SystemZ's MA and MS instructions.
- def z_fma : PatFrag<(ops node:$src1, node:$src2, node:$src3),
-@@ -383,3 +554,110 @@ class loadu<SDPatternOperator operator,
- class storeu<SDPatternOperator operator, SDPatternOperator store = store>
-   : PatFrag<(ops node:$value, node:$addr),
-             (store (operator node:$value), node:$addr)>;
-+
-+// Vector representation of all-zeros and all-ones.
-+def z_vzero : PatFrag<(ops), (bitconvert (v16i8 (z_byte_mask (i32 0))))>;
-+def z_vones : PatFrag<(ops), (bitconvert (v16i8 (z_byte_mask (i32 65535))))>;
-+
-+// Load a scalar and replicate it in all elements of a vector.
-+class z_replicate_load<ValueType scalartype, SDPatternOperator load>
-+  : PatFrag<(ops node:$addr),
-+            (z_replicate (scalartype (load node:$addr)))>;
-+def z_replicate_loadi8  : z_replicate_load<i32, anyextloadi8>;
-+def z_replicate_loadi16 : z_replicate_load<i32, anyextloadi16>;
-+def z_replicate_loadi32 : z_replicate_load<i32, load>;
-+def z_replicate_loadi64 : z_replicate_load<i64, load>;
-+def z_replicate_loadf32 : z_replicate_load<f32, load>;
-+def z_replicate_loadf64 : z_replicate_load<f64, load>;
-+
-+// Load a scalar and insert it into a single element of a vector.
-+class z_vle<ValueType scalartype, SDPatternOperator load>
-+  : PatFrag<(ops node:$vec, node:$addr, node:$index),
-+            (z_vector_insert node:$vec, (scalartype (load node:$addr)),
-+                             node:$index)>;
-+def z_vlei8  : z_vle<i32, anyextloadi8>;
-+def z_vlei16 : z_vle<i32, anyextloadi16>;
-+def z_vlei32 : z_vle<i32, load>;
-+def z_vlei64 : z_vle<i64, load>;
-+def z_vlef32 : z_vle<f32, load>;
-+def z_vlef64 : z_vle<f64, load>;
-+
-+// Load a scalar and insert it into the low element of the high i64 of a
-+// zeroed vector.
-+class z_vllez<ValueType scalartype, SDPatternOperator load, int index>
-+  : PatFrag<(ops node:$addr),
-+            (z_vector_insert (z_vzero),
-+                             (scalartype (load node:$addr)), (i32 index))>;
-+def z_vllezi8  : z_vllez<i32, anyextloadi8, 7>;
-+def z_vllezi16 : z_vllez<i32, anyextloadi16, 3>;
-+def z_vllezi32 : z_vllez<i32, load, 1>;
-+def z_vllezi64 : PatFrag<(ops node:$addr),
-+                         (z_join_dwords (i64 (load node:$addr)), (i64 0))>;
-+// We use high merges to form a v4f32 from four f32s.  Propagating zero
-+// into all elements but index 1 gives this expression.
-+def z_vllezf32 : PatFrag<(ops node:$addr),
-+                         (bitconvert
-+                          (z_merge_high
-+                           (v2i64
-+                            (z_unpackl_high
-+                             (v4i32
-+                              (bitconvert
-+                               (v4f32 (scalar_to_vector
-+                                       (f32 (load node:$addr)))))))),
-+                           (v2i64 (z_vzero))))>;
-+def z_vllezf64 : PatFrag<(ops node:$addr),
-+                         (z_merge_high
-+                          (scalar_to_vector (f64 (load node:$addr))),
-+                          (z_vzero))>;
-+
-+// Store one element of a vector.
-+class z_vste<ValueType scalartype, SDPatternOperator store>
-+  : PatFrag<(ops node:$vec, node:$addr, node:$index),
-+            (store (scalartype (z_vector_extract node:$vec, node:$index)),
-+                   node:$addr)>;
-+def z_vstei8  : z_vste<i32, truncstorei8>;
-+def z_vstei16 : z_vste<i32, truncstorei16>;
-+def z_vstei32 : z_vste<i32, store>;
-+def z_vstei64 : z_vste<i64, store>;
-+def z_vstef32 : z_vste<f32, store>;
-+def z_vstef64 : z_vste<f64, store>;
-+
-+// Arithmetic negation on vectors.
-+def z_vneg : PatFrag<(ops node:$x), (sub (z_vzero), node:$x)>;
-+
-+// Bitwise negation on vectors.
-+def z_vnot : PatFrag<(ops node:$x), (xor node:$x, (z_vones))>;
-+
-+// Signed "integer greater than zero" on vectors.
-+def z_vicmph_zero : PatFrag<(ops node:$x), (z_vicmph node:$x, (z_vzero))>;
-+
-+// Signed "integer less than zero" on vectors.
-+def z_vicmpl_zero : PatFrag<(ops node:$x), (z_vicmph (z_vzero), node:$x)>;
-+
-+// Integer absolute on vectors.
-+class z_viabs<int shift>
-+  : PatFrag<(ops node:$src),
-+            (xor (add node:$src, (z_vsra_by_scalar node:$src, (i32 shift))),
-+                 (z_vsra_by_scalar node:$src, (i32 shift)))>;
-+def z_viabs8  : z_viabs<7>;
-+def z_viabs16 : z_viabs<15>;
-+def z_viabs32 : z_viabs<31>;
-+def z_viabs64 : z_viabs<63>;
-+
-+// Sign-extend the i64 elements of a vector.
-+class z_vse<int shift>
-+  : PatFrag<(ops node:$src),
-+            (z_vsra_by_scalar (z_vshl_by_scalar node:$src, shift), shift)>;
-+def z_vsei8  : z_vse<56>;
-+def z_vsei16 : z_vse<48>;
-+def z_vsei32 : z_vse<32>;
-+
-+// ...and again with the extensions being done on individual i64 scalars.
-+class z_vse_by_parts<SDPatternOperator operator, int index1, int index2>
-+  : PatFrag<(ops node:$src),
-+            (z_join_dwords
-+             (operator (z_vector_extract node:$src, index1)),
-+             (operator (z_vector_extract node:$src, index2)))>;
-+def z_vsei8_by_parts  : z_vse_by_parts<sext8dbl, 7, 15>;
-+def z_vsei16_by_parts : z_vse_by_parts<sext16dbl, 3, 7>;
-+def z_vsei32_by_parts : z_vse_by_parts<sext32, 1, 3>;
-Index: llvm-36/lib/Target/SystemZ/SystemZPatterns.td
-===================================================================
---- llvm-36.orig/lib/Target/SystemZ/SystemZPatterns.td
-+++ llvm-36/lib/Target/SystemZ/SystemZPatterns.td
-@@ -153,3 +153,17 @@ multiclass CompareZeroFP<Instruction ins
-   // The sign of the zero makes no difference.
-   def : Pat<(z_fcmp cls:$reg, (fpimmneg0)), (insn cls:$reg, cls:$reg)>;
- }
-+
-+// Use INSN for performing binary operation OPERATION of type VT
-+// on registers of class CLS.
-+class BinaryRRWithType<Instruction insn, RegisterOperand cls,
-+                       SDPatternOperator operator, ValueType vt>
-+  : Pat<(vt (operator cls:$x, cls:$y)), (insn cls:$x, cls:$y)>;
-+
-+// Use INSN to perform conversion operation OPERATOR, with the input being
-+// TR2 and the output being TR1.  SUPPRESS is 4 to suppress inexact conditions
-+// and 0 to allow them.  MODE is the rounding mode to use.
-+class FPConversion<Instruction insn, SDPatternOperator operator, TypedReg tr1,
-+                   TypedReg tr2, bits<3> suppress, bits<4> mode>
-+  : Pat<(tr1.vt (operator (tr2.vt tr2.op:$vec))),
-+        (insn tr2.op:$vec, suppress, mode)>;
-Index: llvm-36/lib/Target/SystemZ/SystemZProcessors.td
-===================================================================
---- llvm-36.orig/lib/Target/SystemZ/SystemZProcessors.td
-+++ llvm-36/lib/Target/SystemZ/SystemZProcessors.td
-@@ -39,6 +39,11 @@ def FeatureFPExtension : SystemZFeature<
-   "Assume that the floating-point extension facility is installed"
- >;
- 
-+def FeaturePopulationCount : SystemZFeature<
-+  "population-count", "PopulationCount",
-+  "Assume that the population-count facility is installed"
-+>;
-+
- def FeatureFastSerialization : SystemZFeature<
-   "fast-serialization", "FastSerialization",
-   "Assume that the fast-serialization facility is installed"
-@@ -50,13 +55,42 @@ def FeatureInterlockedAccess1 : SystemZF
- >;
- def FeatureNoInterlockedAccess1 : SystemZMissingFeature<"InterlockedAccess1">;
- 
-+def FeatureMiscellaneousExtensions : SystemZFeature<
-+  "miscellaneous-extensions", "MiscellaneousExtensions",
-+  "Assume that the miscellaneous-extensions facility is installed"
-+>;
-+
-+def FeatureTransactionalExecution : SystemZFeature<
-+  "transactional-execution", "TransactionalExecution",
-+  "Assume that the transactional-execution facility is installed"
-+>;
-+
-+def FeatureProcessorAssist : SystemZFeature<
-+  "processor-assist", "ProcessorAssist",
-+  "Assume that the processor-assist facility is installed"
-+>;
-+
-+def FeatureVector : SystemZFeature<
-+  "vector", "Vector",
-+  "Assume that the vectory facility is installed"
-+>;
-+def FeatureNoVector : SystemZMissingFeature<"Vector">;
-+
- def : Processor<"generic", NoItineraries, []>;
- def : Processor<"z10", NoItineraries, []>;
- def : Processor<"z196", NoItineraries,
-                 [FeatureDistinctOps, FeatureLoadStoreOnCond, FeatureHighWord,
--                 FeatureFPExtension, FeatureFastSerialization,
--                 FeatureInterlockedAccess1]>;
-+                 FeatureFPExtension, FeaturePopulationCount,
-+                 FeatureFastSerialization, FeatureInterlockedAccess1]>;
- def : Processor<"zEC12", NoItineraries,
-                 [FeatureDistinctOps, FeatureLoadStoreOnCond, FeatureHighWord,
--                 FeatureFPExtension, FeatureFastSerialization,
--                 FeatureInterlockedAccess1]>;
-+                 FeatureFPExtension, FeaturePopulationCount,
-+                 FeatureFastSerialization, FeatureInterlockedAccess1,
-+                 FeatureMiscellaneousExtensions,
-+                 FeatureTransactionalExecution, FeatureProcessorAssist]>;
-+def : Processor<"z13", NoItineraries,
-+                [FeatureDistinctOps, FeatureLoadStoreOnCond, FeatureHighWord,
-+                 FeatureFPExtension, FeaturePopulationCount,
-+                 FeatureFastSerialization, FeatureInterlockedAccess1,
-+                 FeatureTransactionalExecution, FeatureProcessorAssist,
-+                 FeatureVector]>;
-Index: llvm-36/lib/Target/SystemZ/SystemZRegisterInfo.td
-===================================================================
---- llvm-36.orig/lib/Target/SystemZ/SystemZRegisterInfo.td
-+++ llvm-36/lib/Target/SystemZ/SystemZRegisterInfo.td
-@@ -25,20 +25,24 @@ def subreg_l32   : SubRegIndex<32, 0>;
- def subreg_h32   : SubRegIndex<32, 32>; // Also acts as subreg_lh32.
- def subreg_l64   : SubRegIndex<64, 0>;
- def subreg_h64   : SubRegIndex<64, 64>;
-+def subreg_r32   : SubRegIndex<32, 32>; // Reinterpret a wider reg as 32 bits.
-+def subreg_r64   : SubRegIndex<64, 64>; // Reinterpret a wider reg as 64 bits.
- def subreg_hh32  : ComposedSubRegIndex<subreg_h64, subreg_h32>;
- def subreg_hl32  : ComposedSubRegIndex<subreg_h64, subreg_l32>;
-+def subreg_hr32  : ComposedSubRegIndex<subreg_h64, subreg_r32>;
- }
- 
--// Define a register class that contains values of type TYPE and an
-+// Define a register class that contains values of types TYPES and an
- // associated operand called NAME.  SIZE is the size and alignment
- // of the registers and REGLIST is the list of individual registers.
--multiclass SystemZRegClass<string name, ValueType type, int size, dag regList> {
-+multiclass SystemZRegClass<string name, list<ValueType> types, int size,
-+                           dag regList> {
-   def AsmOperand : AsmOperandClass {
-     let Name = name;
-     let ParserMethod = "parse"##name;
-     let RenderMethod = "addRegOperands";
-   }
--  def Bit : RegisterClass<"SystemZ", [type], size, regList> {
-+  def Bit : RegisterClass<"SystemZ", types, size, regList> {
-     let Size = size;
-   }
-   def "" : RegisterOperand<!cast<RegisterClass>(name##"Bit")> {
-@@ -84,16 +88,19 @@ foreach I = [0, 2, 4, 6, 8, 10, 12, 14]
- 
- /// Allocate the callee-saved R6-R13 backwards. That way they can be saved
- /// together with R14 and R15 in one prolog instruction.
--defm GR32  : SystemZRegClass<"GR32",  i32, 32, (add (sequence "R%uL",  0, 5),
--                                                    (sequence "R%uL", 15, 6))>;
--defm GRH32 : SystemZRegClass<"GRH32", i32, 32, (add (sequence "R%uH",  0, 5),
--                                                    (sequence "R%uH", 15, 6))>;
--defm GR64  : SystemZRegClass<"GR64",  i64, 64, (add (sequence "R%uD",  0, 5),
--                                                    (sequence "R%uD", 15, 6))>;
-+defm GR32  : SystemZRegClass<"GR32",  [i32], 32,
-+                             (add (sequence "R%uL",  0, 5),
-+                                  (sequence "R%uL", 15, 6))>;
-+defm GRH32 : SystemZRegClass<"GRH32", [i32], 32,
-+                             (add (sequence "R%uH",  0, 5),
-+                                  (sequence "R%uH", 15, 6))>;
-+defm GR64  : SystemZRegClass<"GR64",  [i64], 64,
-+                             (add (sequence "R%uD",  0, 5),
-+                                  (sequence "R%uD", 15, 6))>;
- 
- // Combine the low and high GR32s into a single class.  This can only be
- // used for virtual registers if the high-word facility is available.
--defm GRX32 : SystemZRegClass<"GRX32", i32, 32,
-+defm GRX32 : SystemZRegClass<"GRX32", [i32], 32,
-                              (add (sequence "R%uL",  0, 5),
-                                   (sequence "R%uH",  0, 5),
-                                   R15L, R15H, R14L, R14H, R13L, R13H,
-@@ -102,18 +109,17 @@ defm GRX32 : SystemZRegClass<"GRX32", i3
- 
- // The architecture doesn't really have any i128 support, so model the
- // register pairs as untyped instead.
--defm GR128 : SystemZRegClass<"GR128", untyped, 128, (add R0Q, R2Q, R4Q,
--                                                         R12Q, R10Q, R8Q, R6Q,
--                                                         R14Q)>;
-+defm GR128 : SystemZRegClass<"GR128", [untyped], 128,
-+                             (add R0Q, R2Q, R4Q, R12Q, R10Q, R8Q, R6Q, R14Q)>;
- 
- // Base and index registers.  Everything except R0, which in an address
- // context evaluates as 0.
--defm ADDR32 : SystemZRegClass<"ADDR32", i32, 32, (sub GR32Bit, R0L)>;
--defm ADDR64 : SystemZRegClass<"ADDR64", i64, 64, (sub GR64Bit, R0D)>;
-+defm ADDR32 : SystemZRegClass<"ADDR32", [i32], 32, (sub GR32Bit, R0L)>;
-+defm ADDR64 : SystemZRegClass<"ADDR64", [i64], 64, (sub GR64Bit, R0D)>;
- 
- // Not used directly, but needs to exist for ADDR32 and ADDR64 subregs
- // of a GR128.
--defm ADDR128 : SystemZRegClass<"ADDR128", untyped, 128, (sub GR128Bit, R0Q)>;
-+defm ADDR128 : SystemZRegClass<"ADDR128", [untyped], 128, (sub GR128Bit, R0Q)>;
- 
- //===----------------------------------------------------------------------===//
- // Floating-point registers
-@@ -142,16 +148,36 @@ def F11Dwarf : DwarfMapping<29>;
- def F13Dwarf : DwarfMapping<30>;
- def F15Dwarf : DwarfMapping<31>;
- 
--// Lower 32 bits of one of the 16 64-bit floating-point registers
-+def F16Dwarf : DwarfMapping<68>;
-+def F18Dwarf : DwarfMapping<69>;
-+def F20Dwarf : DwarfMapping<70>;
-+def F22Dwarf : DwarfMapping<71>;
-+
-+def F17Dwarf : DwarfMapping<72>;
-+def F19Dwarf : DwarfMapping<73>;
-+def F21Dwarf : DwarfMapping<74>;
-+def F23Dwarf : DwarfMapping<75>;
-+
-+def F24Dwarf : DwarfMapping<76>;
-+def F26Dwarf : DwarfMapping<77>;
-+def F28Dwarf : DwarfMapping<78>;
-+def F30Dwarf : DwarfMapping<79>;
-+
-+def F25Dwarf : DwarfMapping<80>;
-+def F27Dwarf : DwarfMapping<81>;
-+def F29Dwarf : DwarfMapping<82>;
-+def F31Dwarf : DwarfMapping<83>;
-+
-+// Upper 32 bits of one of the floating-point registers
- class FPR32<bits<16> num, string n> : SystemZReg<n> {
-   let HWEncoding = num;
- }
- 
--// One of the 16 64-bit floating-point registers
--class FPR64<bits<16> num, string n, FPR32 low>
-- : SystemZRegWithSubregs<n, [low]> {
-+// One of the floating-point registers.
-+class FPR64<bits<16> num, string n, FPR32 high>
-+ : SystemZRegWithSubregs<n, [high]> {
-   let HWEncoding = num;
--  let SubRegIndices = [subreg_h32];
-+  let SubRegIndices = [subreg_r32];
- }
- 
- // 8 pairs of FPR64s, with a one-register gap inbetween.
-@@ -161,12 +187,17 @@ class FPR128<bits<16> num, string n, FPR
-   let SubRegIndices = [subreg_l64, subreg_h64];
- }
- 
--// Floating-point registers
-+// Floating-point registers.  Registers 16-31 require the vector facility.
- foreach I = 0-15 in {
-   def F#I#S : FPR32<I, "f"#I>;
-   def F#I#D : FPR64<I, "f"#I, !cast<FPR32>("F"#I#"S")>,
-               DwarfRegNum<[!cast<DwarfMapping>("F"#I#"Dwarf").Id]>;
- }
-+foreach I = 16-31 in {
-+  def F#I#S : FPR32<I, "v"#I>;
-+  def F#I#D : FPR64<I, "v"#I, !cast<FPR32>("F"#I#"S")>,
-+              DwarfRegNum<[!cast<DwarfMapping>("F"#I#"Dwarf").Id]>;
-+}
- 
- foreach I = [0, 1, 4, 5, 8, 9, 12, 13] in {
-   def F#I#Q  : FPR128<I, "f"#I, !cast<FPR64>("F"#!add(I, 2)#"D"),
-@@ -175,10 +206,74 @@ foreach I = [0, 1, 4, 5, 8, 9, 12, 13] i
- 
- // There's no store-multiple instruction for FPRs, so we're not fussy
- // about the order in which call-saved registers are allocated.
--defm FP32  : SystemZRegClass<"FP32", f32, 32, (sequence "F%uS", 0, 15)>;
--defm FP64  : SystemZRegClass<"FP64", f64, 64, (sequence "F%uD", 0, 15)>;
--defm FP128 : SystemZRegClass<"FP128", f128, 128, (add F0Q, F1Q, F4Q, F5Q,
--                                                      F8Q, F9Q, F12Q, F13Q)>;
-+defm FP32  : SystemZRegClass<"FP32", [f32], 32, (sequence "F%uS", 0, 15)>;
-+defm FP64  : SystemZRegClass<"FP64", [f64], 64, (sequence "F%uD", 0, 15)>;
-+defm FP128 : SystemZRegClass<"FP128", [f128], 128,
-+                             (add F0Q, F1Q, F4Q, F5Q, F8Q, F9Q, F12Q, F13Q)>;
-+
-+//===----------------------------------------------------------------------===//
-+// Vector registers
-+//===----------------------------------------------------------------------===//
-+
-+// A full 128-bit vector register, with an FPR64 as its high part.
-+class VR128<bits<16> num, string n, FPR64 high>
-+  : SystemZRegWithSubregs<n, [high]> {
-+  let HWEncoding = num;
-+  let SubRegIndices = [subreg_r64];
-+}
-+
-+// Full vector registers.
-+foreach I = 0-31 in {
-+  def V#I : VR128<I, "v"#I, !cast<FPR64>("F"#I#"D")>,
-+            DwarfRegNum<[!cast<DwarfMapping>("F"#I#"Dwarf").Id]>;
-+}
-+
-+// Class used to store 32-bit values in the first element of a vector
-+// register.  f32 scalars are used for the WLEDB and WLDEB instructions.
-+defm VR32 : SystemZRegClass<"VR32", [f32, v4i8, v2i16], 32,
-+                            (add (sequence "F%uS", 0, 7),
-+                                 (sequence "F%uS", 16, 31),
-+                                 (sequence "F%uS", 8, 15))>;
-+
-+// Class used to store 64-bit values in the upper half of a vector register.
-+// The vector facility also includes scalar f64 instructions that operate
-+// on the full vector register set.
-+defm VR64 : SystemZRegClass<"VR64", [f64, v8i8, v4i16, v2i32, v2f32], 64,
-+                            (add (sequence "F%uD", 0, 7),
-+                                 (sequence "F%uD", 16, 31),
-+                                 (sequence "F%uD", 8, 15))>;
-+
-+// The subset of vector registers that can be used for floating-point
-+// operations too.
-+defm VF128 : SystemZRegClass<"VF128",
-+                             [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], 128,
-+                             (sequence "V%u", 0, 15)>;
-+
-+// All vector registers.
-+defm VR128 : SystemZRegClass<"VR128",
-+                             [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], 128,
-+                             (add (sequence "V%u", 0, 7),
-+                                  (sequence "V%u", 16, 31),
-+                                  (sequence "V%u", 8, 15))>;
-+
-+// Attaches a ValueType to a register operand, to make the instruction
-+// definitions easier.
-+class TypedReg<ValueType vtin, RegisterOperand opin> {
-+  ValueType vt = vtin;
-+  RegisterOperand op = opin;
-+}
-+
-+def v32eb   : TypedReg<f32,     VR32>;
-+def v64g    : TypedReg<i64,     VR64>;
-+def v64db   : TypedReg<f64,     VR64>;
-+def v128b   : TypedReg<v16i8,   VR128>;
-+def v128h   : TypedReg<v8i16,   VR128>;
-+def v128f   : TypedReg<v4i32,   VR128>;
-+def v128g   : TypedReg<v2i64,   VR128>;
-+def v128q   : TypedReg<v16i8,   VR128>;
-+def v128eb  : TypedReg<v4f32,   VR128>;
-+def v128db  : TypedReg<v2f64,   VR128>;
-+def v128any : TypedReg<untyped, VR128>;
- 
- //===----------------------------------------------------------------------===//
- // Other registers
-Index: llvm-36/lib/Target/SystemZ/SystemZShortenInst.cpp
-===================================================================
---- llvm-36.orig/lib/Target/SystemZ/SystemZShortenInst.cpp
-+++ llvm-36/lib/Target/SystemZ/SystemZShortenInst.cpp
-@@ -15,6 +15,7 @@
- 
- #include "SystemZTargetMachine.h"
- #include "llvm/CodeGen/MachineFunctionPass.h"
-+#include "llvm/CodeGen/MachineInstrBuilder.h"
- 
- using namespace llvm;
- 
-@@ -36,6 +37,10 @@ public:
- private:
-   bool shortenIIF(MachineInstr &MI, unsigned *GPRMap, unsigned LiveOther,
-                   unsigned LLIxL, unsigned LLIxH);
-+  bool shortenOn0(MachineInstr &MI, unsigned Opcode);
-+  bool shortenOn01(MachineInstr &MI, unsigned Opcode);
-+  bool shortenOn001(MachineInstr &MI, unsigned Opcode);
-+  bool shortenFPConv(MachineInstr &MI, unsigned Opcode);
- 
-   const SystemZInstrInfo *TII;
- 
-@@ -97,6 +102,64 @@ bool SystemZShortenInst::shortenIIF(Mach
-   return false;
- }
- 
-+// Change MI's opcode to Opcode if register operand 0 has a 4-bit encoding.
-+bool SystemZShortenInst::shortenOn0(MachineInstr &MI, unsigned Opcode) {
-+  if (SystemZMC::getFirstReg(MI.getOperand(0).getReg()) < 16) {
-+    MI.setDesc(TII->get(Opcode));
-+    return true;
-+  }
-+  return false;
-+}
-+
-+// Change MI's opcode to Opcode if register operands 0 and 1 have a
-+// 4-bit encoding.
-+bool SystemZShortenInst::shortenOn01(MachineInstr &MI, unsigned Opcode) {
-+  if (SystemZMC::getFirstReg(MI.getOperand(0).getReg()) < 16 &&
-+      SystemZMC::getFirstReg(MI.getOperand(1).getReg()) < 16) {
-+    MI.setDesc(TII->get(Opcode));
-+    return true;
-+  }
-+  return false;
-+}
-+
-+// Change MI's opcode to Opcode if register operands 0, 1 and 2 have a
-+// 4-bit encoding and if operands 0 and 1 are tied.
-+bool SystemZShortenInst::shortenOn001(MachineInstr &MI, unsigned Opcode) {
-+  if (SystemZMC::getFirstReg(MI.getOperand(0).getReg()) < 16 &&
-+      MI.getOperand(1).getReg() == MI.getOperand(0).getReg() &&
-+      SystemZMC::getFirstReg(MI.getOperand(2).getReg()) < 16) {
-+    MI.setDesc(TII->get(Opcode));
-+    return true;
-+  }
-+  return false;
-+}
-+
-+// MI is a vector-style conversion instruction with the operand order:
-+// destination, source, exact-suppress, rounding-mode.  If both registers
-+// have a 4-bit encoding then change it to Opcode, which has operand order:
-+// destination, rouding-mode, source, exact-suppress.
-+bool SystemZShortenInst::shortenFPConv(MachineInstr &MI, unsigned Opcode) {
-+  if (SystemZMC::getFirstReg(MI.getOperand(0).getReg()) < 16 &&
-+      SystemZMC::getFirstReg(MI.getOperand(1).getReg()) < 16) {
-+    MachineOperand Dest(MI.getOperand(0));
-+    MachineOperand Src(MI.getOperand(1));
-+    MachineOperand Suppress(MI.getOperand(2));
-+    MachineOperand Mode(MI.getOperand(3));
-+    MI.RemoveOperand(3);
-+    MI.RemoveOperand(2);
-+    MI.RemoveOperand(1);
-+    MI.RemoveOperand(0);
-+    MI.setDesc(TII->get(Opcode));
-+    MachineInstrBuilder(*MI.getParent()->getParent(), &MI)
-+      .addOperand(Dest)
-+      .addOperand(Mode)
-+      .addOperand(Src)
-+      .addOperand(Suppress);
-+    return true;
-+  }
-+  return false;
-+}
-+
- // Process all instructions in MBB.  Return true if something changed.
- bool SystemZShortenInst::processBlock(MachineBasicBlock &MBB) {
-   bool Changed = false;
-@@ -117,13 +180,83 @@ bool SystemZShortenInst::processBlock(Ma
-   // Iterate backwards through the block looking for instructions to change.
-   for (auto MBBI = MBB.rbegin(), MBBE = MBB.rend(); MBBI != MBBE; ++MBBI) {
-     MachineInstr &MI = *MBBI;
--    unsigned Opcode = MI.getOpcode();
--    if (Opcode == SystemZ::IILF)
-+    switch (MI.getOpcode()) {
-+    case SystemZ::IILF:
-       Changed |= shortenIIF(MI, LowGPRs, LiveHigh, SystemZ::LLILL,
-                             SystemZ::LLILH);
--    else if (Opcode == SystemZ::IIHF)
-+      break;
-+
-+    case SystemZ::IIHF:
-       Changed |= shortenIIF(MI, HighGPRs, LiveLow, SystemZ::LLIHL,
-                             SystemZ::LLIHH);
-+      break;
-+
-+    case SystemZ::WFADB:
-+      Changed |= shortenOn001(MI, SystemZ::ADBR);
-+      break;
-+
-+    case SystemZ::WFDDB:
-+      Changed |= shortenOn001(MI, SystemZ::DDBR);
-+      break;
-+
-+    case SystemZ::WFIDB:
-+      Changed |= shortenFPConv(MI, SystemZ::FIDBRA);
-+      break;
-+
-+    case SystemZ::WLDEB:
-+      Changed |= shortenOn01(MI, SystemZ::LDEBR);
-+      break;
-+
-+    case SystemZ::WLEDB:
-+      Changed |= shortenFPConv(MI, SystemZ::LEDBRA);
-+      break;
-+
-+    case SystemZ::WFMDB:
-+      Changed |= shortenOn001(MI, SystemZ::MDBR);
-+      break;
-+
-+    case SystemZ::WFLCDB:
-+      Changed |= shortenOn01(MI, SystemZ::LCDBR);
-+      break;
-+
-+    case SystemZ::WFLNDB:
-+      Changed |= shortenOn01(MI, SystemZ::LNDBR);
-+      break;
-+
-+    case SystemZ::WFLPDB:
-+      Changed |= shortenOn01(MI, SystemZ::LPDBR);
-+      break;
-+
-+    case SystemZ::WFSQDB:
-+      Changed |= shortenOn01(MI, SystemZ::SQDBR);
-+      break;
-+
-+    case SystemZ::WFSDB:
-+      Changed |= shortenOn001(MI, SystemZ::SDBR);
-+      break;
-+
-+    case SystemZ::WFCDB:
-+      Changed |= shortenOn01(MI, SystemZ::CDBR);
-+      break;
-+
-+    case SystemZ::VL32:
-+      // For z13 we prefer LDE over LE to avoid partial register dependencies.
-+      Changed |= shortenOn0(MI, SystemZ::LDE32);
-+      break;
-+
-+    case SystemZ::VST32:
-+      Changed |= shortenOn0(MI, SystemZ::STE);
-+      break;
-+
-+    case SystemZ::VL64:
-+      Changed |= shortenOn0(MI, SystemZ::LD);
-+      break;
-+
-+    case SystemZ::VST64:
-+      Changed |= shortenOn0(MI, SystemZ::STD);
-+      break;
-+    }
-+
-     unsigned UsedLow = 0;
-     unsigned UsedHigh = 0;
-     for (auto MOI = MI.operands_begin(), MOE = MI.operands_end();
-Index: llvm-36/lib/Target/SystemZ/SystemZSubtarget.cpp
-===================================================================
---- llvm-36.orig/lib/Target/SystemZ/SystemZSubtarget.cpp
-+++ llvm-36/lib/Target/SystemZ/SystemZSubtarget.cpp
-@@ -10,7 +10,6 @@
- #include "SystemZSubtarget.h"
- #include "MCTargetDesc/SystemZMCTargetDesc.h"
- #include "llvm/IR/GlobalValue.h"
--#include "llvm/Support/Host.h"
- 
- using namespace llvm;
- 
-@@ -23,15 +22,69 @@ using namespace llvm;
- // Pin the vtable to this file.
- void SystemZSubtarget::anchor() {}
- 
-+// Determine whether we use the vector ABI.
-+static bool UsesVectorABI(StringRef CPU, StringRef FS) {
-+  // We use the vector ABI whenever the vector facility is avaiable.
-+  // This is the case by default if CPU is z13 or later, and can be
-+  // overridden via "[+-]vector" feature string elements.
-+  bool VectorABI = true;
-+  if (CPU.empty() || CPU == "generic" ||
-+      CPU == "z10" || CPU == "z196" || CPU == "zEC12")
-+    VectorABI = false;
-+
-+  SmallVector<StringRef, 3> Features;
-+  FS.split(Features, ",", -1, false /* KeepEmpty */);
-+  for (auto &Feature : Features) {
-+    if (Feature == "vector" || Feature == "+vector")
-+      VectorABI = true;
-+    if (Feature == "-vector")
-+      VectorABI = false;
-+  }
-+
-+  return VectorABI;
-+}
-+
-+static std::string computeDataLayout(StringRef TT, StringRef CPU,
-+                                     StringRef FS) {
-+  const Triple Triple(TT);
-+  bool VectorABI = UsesVectorABI(CPU, FS);
-+  std::string Ret = "";
-+
-+  // Big endian.
-+  Ret += "E";
-+
-+  // Data mangling.
-+  Ret += DataLayout::getManglingComponent(Triple);
-+
-+  // Make sure that global data has at least 16 bits of alignment by
-+  // default, so that we can refer to it using LARL.  We don't have any
-+  // special requirements for stack variables though.
-+  Ret += "-i1:8:16-i8:8:16";
-+
-+  // 64-bit integers are naturally aligned.
-+  Ret += "-i64:64";
-+
-+  // 128-bit floats are aligned only to 64 bits.
-+  Ret += "-f128:64";
-+
-+  // When using the vector ABI, 128-bit vectors are also aligned to 64 bits.
-+  if (VectorABI)
-+    Ret += "-v128:64";
-+
-+  // We prefer 16 bits of aligned for all globals; see above.
-+  Ret += "-a:8:16";
-+
-+  // Integer registers are 32 or 64 bits.
-+  Ret += "-n32:64";
-+
-+  return Ret;
-+}
-+
- SystemZSubtarget &
- SystemZSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) {
-   std::string CPUName = CPU;
-   if (CPUName.empty())
-     CPUName = "generic";
--#if defined(__linux__) && defined(__s390x__)
--  if (CPUName == "generic")
--    CPUName = sys::getHostCPUName();
--#endif
-   // Parse features string.
-   ParseSubtargetFeatures(CPUName, FS);
-   return *this;
-@@ -43,12 +96,12 @@ SystemZSubtarget::SystemZSubtarget(const
-                                    const TargetMachine &TM)
-     : SystemZGenSubtargetInfo(TT, CPU, FS), HasDistinctOps(false),
-       HasLoadStoreOnCond(false), HasHighWord(false), HasFPExtension(false),
--      HasFastSerialization(false), HasInterlockedAccess1(false),
-+      HasPopulationCount(false), HasFastSerialization(false),
-+      HasInterlockedAccess1(false), HasMiscellaneousExtensions(false),
-+      HasTransactionalExecution(false), HasProcessorAssist(false),
-+      HasVector(false),
-       TargetTriple(TT),
--      // Make sure that global data has at least 16 bits of alignment by
--      // default, so that we can refer to it using LARL.  We don't have any
--      // special requirements for stack variables though.
--      DL("E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-a:8:16-n32:64"),
-+      DL(computeDataLayout(TT, CPU, FS)),
-       InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM),
-       TSInfo(DL), FrameLowering() {}
- 
-Index: llvm-36/lib/Target/SystemZ/SystemZSubtarget.h
-===================================================================
---- llvm-36.orig/lib/Target/SystemZ/SystemZSubtarget.h
-+++ llvm-36/lib/Target/SystemZ/SystemZSubtarget.h
-@@ -38,8 +38,13 @@ protected:
-   bool HasLoadStoreOnCond;
-   bool HasHighWord;
-   bool HasFPExtension;
-+  bool HasPopulationCount;
-   bool HasFastSerialization;
-   bool HasInterlockedAccess1;
-+  bool HasMiscellaneousExtensions;
-+  bool HasTransactionalExecution;
-+  bool HasProcessorAssist;
-+  bool HasVector;
- 
- private:
-   Triple TargetTriple;
-@@ -88,12 +93,29 @@ public:
-   // Return true if the target has the floating-point extension facility.
-   bool hasFPExtension() const { return HasFPExtension; }
- 
-+  // Return true if the target has the population-count facility.
-+  bool hasPopulationCount() const { return HasPopulationCount; }
-+
-   // Return true if the target has the fast-serialization facility.
-   bool hasFastSerialization() const { return HasFastSerialization; }
- 
-   // Return true if the target has interlocked-access facility 1.
-   bool hasInterlockedAccess1() const { return HasInterlockedAccess1; }
- 
-+  // Return true if the target has the miscellaneous-extensions facility.
-+  bool hasMiscellaneousExtensions() const {
-+    return HasMiscellaneousExtensions;
-+  }
-+
-+  // Return true if the target has the transactional-execution facility.
-+  bool hasTransactionalExecution() const { return HasTransactionalExecution; }
-+
-+  // Return true if the target has the processor-assist facility.
-+  bool hasProcessorAssist() const { return HasProcessorAssist; }
-+
-+  // Return true if the target has the vector facility.
-+  bool hasVector() const { return HasVector; }
-+
-   // Return true if GV can be accessed using LARL for reloc model RM
-   // and code model CM.
-   bool isPC32DBLSymbol(const GlobalValue *GV, Reloc::Model RM,
-Index: llvm-36/lib/Target/SystemZ/SystemZTargetMachine.cpp
-===================================================================
---- llvm-36.orig/lib/Target/SystemZ/SystemZTargetMachine.cpp
-+++ llvm-36/lib/Target/SystemZ/SystemZTargetMachine.cpp
-@@ -9,6 +9,7 @@
- 
- #include "SystemZTargetMachine.h"
- #include "llvm/CodeGen/Passes.h"
-+#include "llvm/PassManager.h"
- #include "llvm/Support/TargetRegistry.h"
- #include "llvm/Transforms/Scalar.h"
- #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
-@@ -57,6 +58,10 @@ void SystemZPassConfig::addIRPasses() {
- 
- bool SystemZPassConfig::addInstSelector() {
-   addPass(createSystemZISelDag(getSystemZTargetMachine(), getOptLevel()));
-+
-+ if (getOptLevel() != CodeGenOpt::None)
-+    addPass(createSystemZLDCleanupPass(getSystemZTargetMachine()));
-+
-   return false;
- }
- 
-@@ -100,3 +105,12 @@ void SystemZPassConfig::addPreEmitPass()
- TargetPassConfig *SystemZTargetMachine::createPassConfig(PassManagerBase &PM) {
-   return new SystemZPassConfig(this, PM);
- }
-+
-+void SystemZTargetMachine::addAnalysisPasses(PassManagerBase &PM) {
-+  // Add first the target-independent BasicTTI pass, then our SystemZ pass.
-+  // This allows the SystemZ pass to delegate to the target independent layer
-+  // when appropriate.
-+  PM.add(createBasicTargetTransformInfoPass(this));
-+  PM.add(createSystemZTargetTransformInfoPass(this));
-+}
-+
-Index: llvm-36/lib/Target/SystemZ/SystemZTargetMachine.h
-===================================================================
---- llvm-36.orig/lib/Target/SystemZ/SystemZTargetMachine.h
-+++ llvm-36/lib/Target/SystemZ/SystemZTargetMachine.h
-@@ -39,6 +39,7 @@ public:
-   }
-   // Override LLVMTargetMachine
-   TargetPassConfig *createPassConfig(PassManagerBase &PM) override;
-+  void addAnalysisPasses(PassManagerBase &PM) override;
-   TargetLoweringObjectFile *getObjFileLowering() const override {
-     return TLOF.get();
-   }
-Index: llvm-36/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
-===================================================================
---- /dev/null
-+++ llvm-36/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp
-@@ -0,0 +1,334 @@
-+//===-- SystemZTargetTransformInfo.cpp - SystemZ-specific TTI -------------===//
-+//
-+//                     The LLVM Compiler Infrastructure
-+//
-+// This file is distributed under the University of Illinois Open Source
-+// License. See LICENSE.TXT for details.
-+//
-+//===----------------------------------------------------------------------===//
-+//
-+// This file implements a TargetTransformInfo analysis pass specific to the
-+// SystemZ target machine. It uses the target's detailed information to provide
-+// more precise answers to certain TTI queries, while letting the target
-+// independent and default TTI implementations handle the rest.
-+//
-+//===----------------------------------------------------------------------===//
-+
-+#include "SystemZTargetMachine.h"
-+#include "llvm/Analysis/TargetTransformInfo.h"
-+#include "llvm/IR/IntrinsicInst.h"
-+#include "llvm/Support/Debug.h"
-+#include "llvm/Target/CostTable.h"
-+#include "llvm/Target/TargetLowering.h"
-+using namespace llvm;
-+
-+#define DEBUG_TYPE "systemztti"
-+
-+// Declare the pass initialization routine locally as target-specific passes
-+// don't have a target-wide initialization entry point, and so we rely on the
-+// pass constructor initialization.
-+namespace llvm {
-+void initializeSystemZTTIPass(PassRegistry &);
-+}
-+
-+namespace {
-+
-+class SystemZTTI : public ImmutablePass, public TargetTransformInfo {
-+  const SystemZSubtarget *ST;
-+  const SystemZTargetLowering *TLI;
-+
-+public:
-+  SystemZTTI() : ImmutablePass(ID), ST(0), TLI(0) {
-+    llvm_unreachable("This pass cannot be directly constructed");
-+  }
-+
-+  SystemZTTI(const SystemZTargetMachine *TM)
-+      : ImmutablePass(ID), ST(TM->getSubtargetImpl()),
-+        TLI(TM->getSubtargetImpl()->getTargetLowering()) {
-+    initializeSystemZTTIPass(*PassRegistry::getPassRegistry());
-+  }
-+
-+  void initializePass() override {
-+    pushTTIStack(this);
-+  }
-+
-+  void getAnalysisUsage(AnalysisUsage &AU) const override {
-+    TargetTransformInfo::getAnalysisUsage(AU);
-+  }
-+
-+  // Pass identification.
-+  static char ID;
-+
-+  // Provide necessary pointer adjustments for the two base classes.
-+  void *getAdjustedAnalysisPointer(const void *ID) override {
-+    if (ID == &TargetTransformInfo::ID)
-+      return (TargetTransformInfo*)this;
-+    return this;
-+  }
-+
-+  /// \name Scalar TTI Implementations
-+  /// @{
-+
-+  unsigned getIntImmCost(const APInt &Imm, Type *Ty);
-+
-+  unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
-+                         Type *Ty);
-+  unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
-+                         Type *Ty);
-+
-+  PopcntSupportKind getPopcntSupport(unsigned TyWidth);
-+
-+  /// @}
-+
-+  /// \name Vector TTI Implementations
-+  /// @{
-+
-+  unsigned getNumberOfRegisters(bool Vector);
-+  unsigned getRegisterBitWidth(bool Vector);
-+
-+  /// @}
-+};
-+
-+} // end anonymous namespace
-+
-+INITIALIZE_AG_PASS(SystemZTTI, TargetTransformInfo, "systemztti",
-+                   "SystemZ Target Transform Info", true, true, false)
-+char SystemZTTI::ID = 0;
-+
-+ImmutablePass *
-+llvm::createSystemZTargetTransformInfoPass(const SystemZTargetMachine *TM) {
-+  return new SystemZTTI(TM);
-+}
-+
-+
-+//===----------------------------------------------------------------------===//
-+//
-+// SystemZ cost model.
-+//
-+//===----------------------------------------------------------------------===//
-+
-+unsigned SystemZTTI::getIntImmCost(const APInt &Imm, Type *Ty) {
-+  assert(Ty->isIntegerTy());
-+
-+  unsigned BitSize = Ty->getPrimitiveSizeInBits();
-+  // There is no cost model for constants with a bit size of 0. Return TCC_Free
-+  // here, so that constant hoisting will ignore this constant.
-+  if (BitSize == 0)
-+    return TCC_Free;
-+  // No cost model for operations on integers larger than 64 bit implemented yet.
-+  if (BitSize > 64)
-+    return TCC_Free;
-+
-+  if (Imm == 0)
-+    return TCC_Free;
-+
-+  if (Imm.getBitWidth() <= 64) {
-+    // Constants loaded via lgfi.
-+    if (isInt<32>(Imm.getSExtValue()))
-+      return TCC_Basic;
-+    // Constants loaded via llilf.
-+    if (isUInt<32>(Imm.getZExtValue()))
-+      return TCC_Basic;
-+    // Constants loaded via llihf:
-+    if ((Imm.getZExtValue() & 0xffffffff) == 0)
-+      return TCC_Basic;
-+
-+    return 2 * TCC_Basic;
-+  }
-+
-+  return 4 * TCC_Basic;
-+}
-+
-+unsigned SystemZTTI::getIntImmCost(unsigned Opcode, unsigned Idx,
-+                                       const APInt &Imm, Type *Ty) {
-+  assert(Ty->isIntegerTy());
-+
-+  unsigned BitSize = Ty->getPrimitiveSizeInBits();
-+  // There is no cost model for constants with a bit size of 0. Return TCC_Free
-+  // here, so that constant hoisting will ignore this constant.
-+  if (BitSize == 0)
-+    return TCC_Free;
-+  // No cost model for operations on integers larger than 64 bit implemented yet.
-+  if (BitSize > 64)
-+    return TCC_Free;
-+
-+  switch (Opcode) {
-+  default:
-+    return TCC_Free;
-+  case Instruction::GetElementPtr:
-+    // Always hoist the base address of a GetElementPtr. This prevents the
-+    // creation of new constants for every base constant that gets constant
-+    // folded with the offset.
-+    if (Idx == 0)
-+      return 2 * TCC_Basic;
-+    return TCC_Free;
-+  case Instruction::Store:
-+    if (Idx == 0 && Imm.getBitWidth() <= 64) {
-+      // Any 8-bit immediate store can by implemented via mvi.
-+      if (BitSize == 8)
-+        return TCC_Free;
-+      // 16-bit immediate values can be stored via mvhhi/mvhi/mvghi.
-+      if (isInt<16>(Imm.getSExtValue()))
-+        return TCC_Free;
-+    }
-+    break;
-+  case Instruction::ICmp:
-+    if (Idx == 1 && Imm.getBitWidth() <= 64) {
-+      // Comparisons against signed 32-bit immediates implemented via cgfi.
-+      if (isInt<32>(Imm.getSExtValue()))
-+        return TCC_Free;
-+      // Comparisons against unsigned 32-bit immediates implemented via clgfi.
-+      if (isUInt<32>(Imm.getZExtValue()))
-+        return TCC_Free;
-+    }
-+    break;
-+  case Instruction::Add:
-+  case Instruction::Sub:
-+    if (Idx == 1 && Imm.getBitWidth() <= 64) {
-+      // We use algfi/slgfi to add/subtract 32-bit unsigned immediates.
-+      if (isUInt<32>(Imm.getZExtValue()))
-+        return TCC_Free;
-+      // Or their negation, by swapping addition vs. subtraction.
-+      if (isUInt<32>(-Imm.getSExtValue()))
-+        return TCC_Free;
-+    }
-+    break;
-+  case Instruction::Mul:
-+    if (Idx == 1 && Imm.getBitWidth() <= 64) {
-+      // We use msgfi to multiply by 32-bit signed immediates.
-+      if (isInt<32>(Imm.getSExtValue()))
-+        return TCC_Free;
-+    }
-+    break;
-+  case Instruction::Or:
-+  case Instruction::Xor:
-+    if (Idx == 1 && Imm.getBitWidth() <= 64) {
-+      // Masks supported by oilf/xilf.
-+      if (isUInt<32>(Imm.getZExtValue()))
-+        return TCC_Free;
-+      // Masks supported by oihf/xihf.
-+      if ((Imm.getZExtValue() & 0xffffffff) == 0)
-+        return TCC_Free;
-+    }
-+    break;
-+  case Instruction::And:
-+    if (Idx == 1 && Imm.getBitWidth() <= 64) {
-+      // Any 32-bit AND operation can by implemented via nilf.
-+      if (BitSize <= 32)
-+        return TCC_Free;
-+      // 64-bit masks supported by nilf.
-+      if (isUInt<32>(~Imm.getZExtValue()))
-+        return TCC_Free;
-+      // 64-bit masks supported by nilh.
-+      if ((Imm.getZExtValue() & 0xffffffff) == 0xffffffff)
-+        return TCC_Free;
-+      // Some 64-bit AND operations can be implemented via risbg.
-+      const SystemZInstrInfo *TII = ST->getInstrInfo();
-+      unsigned Start, End;
-+      if (TII->isRxSBGMask(Imm.getZExtValue(), BitSize, Start, End))
-+        return TCC_Free;
-+    }
-+    break;
-+  case Instruction::Shl:
-+  case Instruction::LShr:
-+  case Instruction::AShr:
-+    // Always return TCC_Free for the shift value of a shift instruction.
-+    if (Idx == 1)
-+      return TCC_Free;
-+    break;
-+  case Instruction::UDiv:
-+  case Instruction::SDiv:
-+  case Instruction::URem:
-+  case Instruction::SRem:
-+  case Instruction::Trunc:
-+  case Instruction::ZExt:
-+  case Instruction::SExt:
-+  case Instruction::IntToPtr:
-+  case Instruction::PtrToInt:
-+  case Instruction::BitCast:
-+  case Instruction::PHI:
-+  case Instruction::Call:
-+  case Instruction::Select:
-+  case Instruction::Ret:
-+  case Instruction::Load:
-+    break;
-+  }
-+
-+  return SystemZTTI::getIntImmCost(Imm, Ty);
-+}
-+
-+unsigned SystemZTTI::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
-+                                       const APInt &Imm, Type *Ty) {
-+  assert(Ty->isIntegerTy());
-+
-+  unsigned BitSize = Ty->getPrimitiveSizeInBits();
-+  // There is no cost model for constants with a bit size of 0. Return TCC_Free
-+  // here, so that constant hoisting will ignore this constant.
-+  if (BitSize == 0)
-+    return TCC_Free;
-+  // No cost model for operations on integers larger than 64 bit implemented yet.
-+  if (BitSize > 64)
-+    return TCC_Free;
-+
-+  switch (IID) {
-+  default:
-+    return TCC_Free;
-+  case Intrinsic::sadd_with_overflow:
-+  case Intrinsic::uadd_with_overflow:
-+  case Intrinsic::ssub_with_overflow:
-+  case Intrinsic::usub_with_overflow:
-+    // These get expanded to include a normal addition/subtraction.
-+    if (Idx == 1 && Imm.getBitWidth() <= 64) {
-+      if (isUInt<32>(Imm.getZExtValue()))
-+        return TCC_Free;
-+      if (isUInt<32>(-Imm.getSExtValue()))
-+        return TCC_Free;
-+    }
-+    break;
-+  case Intrinsic::smul_with_overflow:
-+  case Intrinsic::umul_with_overflow:
-+    // These get expanded to include a normal multiplication.
-+    if (Idx == 1 && Imm.getBitWidth() <= 64) {
-+      if (isInt<32>(Imm.getSExtValue()))
-+        return TCC_Free;
-+    }
-+    break;
-+  case Intrinsic::experimental_stackmap:
-+    if ((Idx < 2) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
-+      return TCC_Free;
-+    break;
-+  case Intrinsic::experimental_patchpoint_void:
-+  case Intrinsic::experimental_patchpoint_i64:
-+    if ((Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue())))
-+      return TCC_Free;
-+    break;
-+  }
-+  return SystemZTTI::getIntImmCost(Imm, Ty);
-+}
-+
-+SystemZTTI::PopcntSupportKind
-+SystemZTTI::getPopcntSupport(unsigned TyWidth) {
-+  assert(isPowerOf2_32(TyWidth) && "Type width must be power of 2");
-+  if (ST->hasPopulationCount() && TyWidth <= 64)
-+    return PSK_FastHardware;
-+  return PSK_Software;
-+}
-+
-+unsigned SystemZTTI::getNumberOfRegisters(bool Vector) {
-+  if (!Vector)
-+    // Discount the stack pointer.  Also leave out %r0, since it can't
-+    // be used in an address.
-+    return 14;
-+  if (ST->hasVector())
-+    return 32;
-+  return 0;
-+}
-+
-+unsigned SystemZTTI::getRegisterBitWidth(bool Vector) {
-+  if (!Vector)
-+    return 64;
-+  if (ST->hasVector())
-+    return 128;
-+  return 0;
-+}
-+
-Index: llvm-36/test/CodeGen/SystemZ/ctpop-01.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/ctpop-01.ll
-@@ -0,0 +1,96 @@
-+; Test population-count instruction
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s
-+
-+declare i32 @llvm.ctpop.i32(i32 %a)
-+declare i64 @llvm.ctpop.i64(i64 %a)
-+
-+define i32 @f1(i32 %a) {
-+; CHECK-LABEL: f1:
-+; CHECK: popcnt  %r0, %r2
-+; CHECK: sllk    %r1, %r0, 16
-+; CHECK: ar      %r1, %r0
-+; CHECK: sllk    %r2, %r1, 8
-+; CHECK: ar      %r2, %r1
-+; CHECK: srl     %r2, 24
-+; CHECK: br      %r14
-+
-+  %popcnt = call i32 @llvm.ctpop.i32(i32 %a)
-+  ret i32 %popcnt
-+}
-+
-+define i32 @f2(i32 %a) {
-+; CHECK-LABEL: f2:
-+; CHECK: llhr    %r0, %r2
-+; CHECK: popcnt  %r0, %r0
-+; CHECK: risblg  %r2, %r0, 16, 151, 8
-+; CHECK: ar      %r2, %r0
-+; CHECK: srl     %r2, 8
-+; CHECK: br      %r14
-+  %and = and i32 %a, 65535
-+  %popcnt = call i32 @llvm.ctpop.i32(i32 %and)
-+  ret i32 %popcnt
-+}
-+
-+define i32 @f3(i32 %a) {
-+; CHECK-LABEL: f3:
-+; CHECK: llcr    %r0, %r2
-+; CHECK: popcnt  %r2, %r0
-+; CHECK: br      %r14
-+  %and = and i32 %a, 255
-+  %popcnt = call i32 @llvm.ctpop.i32(i32 %and)
-+  ret i32 %popcnt
-+}
-+
-+define i64 @f4(i64 %a) {
-+; CHECK-LABEL: f4:
-+; CHECK: popcnt  %r0, %r2
-+; CHECK: sllg    %r1, %r0, 32
-+; CHECK: agr     %r1, %r0
-+; CHECK: sllg    %r0, %r1, 16
-+; CHECK: agr     %r0, %r1
-+; CHECK: sllg    %r1, %r0, 8
-+; CHECK: agr     %r1, %r0
-+; CHECK: srlg    %r2, %r1, 56
-+; CHECK: br      %r14
-+  %popcnt = call i64 @llvm.ctpop.i64(i64 %a)
-+  ret i64 %popcnt
-+}
-+
-+define i64 @f5(i64 %a) {
-+; CHECK-LABEL: f5:
-+; CHECK: llgfr   %r0, %r2
-+; CHECK: popcnt  %r0, %r0
-+; CHECK: sllg    %r1, %r0, 16
-+; CHECK: algfr   %r0, %r1
-+; CHECK: sllg    %r1, %r0, 8
-+; CHECK: algfr   %r0, %r1
-+; CHECK: srlg    %r2, %r0, 24
-+  %and = and i64 %a, 4294967295
-+  %popcnt = call i64 @llvm.ctpop.i64(i64 %and)
-+  ret i64 %popcnt
-+}
-+
-+define i64 @f6(i64 %a) {
-+; CHECK-LABEL: f6:
-+; CHECK: llghr   %r0, %r2
-+; CHECK: popcnt  %r0, %r0
-+; CHECK: risbg   %r1, %r0, 48, 183, 8
-+; CHECK: agr     %r1, %r0
-+; CHECK: srlg    %r2, %r1, 8
-+; CHECK: br      %r14
-+  %and = and i64 %a, 65535
-+  %popcnt = call i64 @llvm.ctpop.i64(i64 %and)
-+  ret i64 %popcnt
-+}
-+
-+define i64 @f7(i64 %a) {
-+; CHECK-LABEL: f7:
-+; CHECK: llgcr   %r0, %r2
-+; CHECK: popcnt  %r2, %r0
-+; CHECK: br      %r14
-+  %and = and i64 %a, 255
-+  %popcnt = call i64 @llvm.ctpop.i64(i64 %and)
-+  ret i64 %popcnt
-+}
-+
-Index: llvm-36/test/CodeGen/SystemZ/fp-abs-01.ll
-===================================================================
---- llvm-36.orig/test/CodeGen/SystemZ/fp-abs-01.ll
-+++ llvm-36/test/CodeGen/SystemZ/fp-abs-01.ll
-@@ -1,6 +1,7 @@
- ; Test floating-point absolute.
- ;
--; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
- 
- ; Test f32.
- declare float @llvm.fabs.f32(float %f)
-Index: llvm-36/test/CodeGen/SystemZ/fp-abs-02.ll
-===================================================================
---- llvm-36.orig/test/CodeGen/SystemZ/fp-abs-02.ll
-+++ llvm-36/test/CodeGen/SystemZ/fp-abs-02.ll
-@@ -1,6 +1,7 @@
- ; Test negated floating-point absolute.
- ;
--; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
- 
- ; Test f32.
- declare float @llvm.fabs.f32(float %f)
-Index: llvm-36/test/CodeGen/SystemZ/fp-add-02.ll
-===================================================================
---- llvm-36.orig/test/CodeGen/SystemZ/fp-add-02.ll
-+++ llvm-36/test/CodeGen/SystemZ/fp-add-02.ll
-@@ -1,7 +1,8 @@
- ; Test 64-bit floating-point addition.
- ;
--; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
--
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
-+; RUN:   | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
- declare double @foo()
- 
- ; Check register addition.
-@@ -76,7 +77,7 @@ define double @f6(double %f1, double *%b
- define double @f7(double *%ptr0) {
- ; CHECK-LABEL: f7:
- ; CHECK: brasl %r14, foo@PLT
--; CHECK: adb %f0, 160(%r15)
-+; CHECK-SCALAR: adb %f0, 160(%r15)
- ; CHECK: br %r14
-   %ptr1 = getelementptr double *%ptr0, i64 2
-   %ptr2 = getelementptr double *%ptr0, i64 4
-Index: llvm-36/test/CodeGen/SystemZ/fp-cmp-02.ll
-===================================================================
---- llvm-36.orig/test/CodeGen/SystemZ/fp-cmp-02.ll
-+++ llvm-36/test/CodeGen/SystemZ/fp-cmp-02.ll
-@@ -1,7 +1,10 @@
- ; Test 64-bit floating-point comparison.  The tests assume a z10 implementation
- ; of select, using conditional branches rather than LOCGR.
- ;
--; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
-+; RUN:   | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 \
-+; RUN:   | FileCheck -check-prefix=CHECK -check-prefix=CHECK-VECTOR %s
- 
- declare double @foo()
- 
-@@ -9,8 +12,9 @@ declare double @foo()
- define i64 @f1(i64 %a, i64 %b, double %f1, double %f2) {
- ; CHECK-LABEL: f1:
- ; CHECK: cdbr %f0, %f2
--; CHECK-NEXT: je
--; CHECK: lgr %r2, %r3
-+; CHECK-SCALAR-NEXT: je
-+; CHECK-SCALAR: lgr %r2, %r3
-+; CHECK-VECTOR-NEXT: locgrne %r2, %r3
- ; CHECK: br %r14
-   %cond = fcmp oeq double %f1, %f2
-   %res = select i1 %cond, i64 %a, i64 %b
-@@ -21,8 +25,9 @@ define i64 @f1(i64 %a, i64 %b, double %f
- define i64 @f2(i64 %a, i64 %b, double %f1, double *%ptr) {
- ; CHECK-LABEL: f2:
- ; CHECK: cdb %f0, 0(%r4)
--; CHECK-NEXT: je
--; CHECK: lgr %r2, %r3
-+; CHECK-SCALAR-NEXT: je
-+; CHECK-SCALAR: lgr %r2, %r3
-+; CHECK-VECTOR-NEXT: locgrne %r2, %r3
- ; CHECK: br %r14
-   %f2 = load double *%ptr
-   %cond = fcmp oeq double %f1, %f2
-@@ -34,8 +39,9 @@ define i64 @f2(i64 %a, i64 %b, double %f
- define i64 @f3(i64 %a, i64 %b, double %f1, double *%base) {
- ; CHECK-LABEL: f3:
- ; CHECK: cdb %f0, 4088(%r4)
--; CHECK-NEXT: je
--; CHECK: lgr %r2, %r3
-+; CHECK-SCALAR-NEXT: je
-+; CHECK-SCALAR: lgr %r2, %r3
-+; CHECK-VECTOR-NEXT: locgrne %r2, %r3
- ; CHECK: br %r14
-   %ptr = getelementptr double *%base, i64 511
-   %f2 = load double *%ptr
-@@ -50,8 +56,9 @@ define i64 @f4(i64 %a, i64 %b, double %f
- ; CHECK-LABEL: f4:
- ; CHECK: aghi %r4, 4096
- ; CHECK: cdb %f0, 0(%r4)
--; CHECK-NEXT: je
--; CHECK: lgr %r2, %r3
-+; CHECK-SCALAR-NEXT: je
-+; CHECK-SCALAR: lgr %r2, %r3
-+; CHECK-VECTOR-NEXT: locgrne %r2, %r3
- ; CHECK: br %r14
-   %ptr = getelementptr double *%base, i64 512
-   %f2 = load double *%ptr
-@@ -65,8 +72,9 @@ define i64 @f5(i64 %a, i64 %b, double %f
- ; CHECK-LABEL: f5:
- ; CHECK: aghi %r4, -8
- ; CHECK: cdb %f0, 0(%r4)
--; CHECK-NEXT: je
--; CHECK: lgr %r2, %r3
-+; CHECK-SCALAR-NEXT: je
-+; CHECK-SCALAR: lgr %r2, %r3
-+; CHECK-VECTOR-NEXT: locgrne %r2, %r3
- ; CHECK: br %r14
-   %ptr = getelementptr double *%base, i64 -1
-   %f2 = load double *%ptr
-@@ -80,8 +88,9 @@ define i64 @f6(i64 %a, i64 %b, double %f
- ; CHECK-LABEL: f6:
- ; CHECK: sllg %r1, %r5, 3
- ; CHECK: cdb %f0, 800(%r1,%r4)
--; CHECK-NEXT: je
--; CHECK: lgr %r2, %r3
-+; CHECK-SCALAR-NEXT: je
-+; CHECK-SCALAR: lgr %r2, %r3
-+; CHECK-VECTOR-NEXT: locgrne %r2, %r3
- ; CHECK: br %r14
-   %ptr1 = getelementptr double *%base, i64 %index
-   %ptr2 = getelementptr double *%ptr1, i64 100
-@@ -95,7 +104,7 @@ define i64 @f6(i64 %a, i64 %b, double %f
- define double @f7(double *%ptr0) {
- ; CHECK-LABEL: f7:
- ; CHECK: brasl %r14, foo@PLT
--; CHECK: cdb {{%f[0-9]+}}, 160(%r15)
-+; CHECK-SCALAR: cdb {{%f[0-9]+}}, 160(%r15)
- ; CHECK: br %r14
-   %ptr1 = getelementptr double *%ptr0, i64 2
-   %ptr2 = getelementptr double *%ptr0, i64 4
-@@ -152,9 +161,12 @@ define double @f7(double *%ptr0) {
- ; Check comparison with zero.
- define i64 @f8(i64 %a, i64 %b, double %f) {
- ; CHECK-LABEL: f8:
--; CHECK: ltdbr %f0, %f0
--; CHECK-NEXT: je
--; CHECK: lgr %r2, %r3
-+; CHECK-SCALAR: ltdbr %f0, %f0
-+; CHECK-SCALAR-NEXT: je
-+; CHECK-SCALAR: lgr %r2, %r3
-+; CHECK-VECTOR: lzdr %f1
-+; CHECK-VECTOR-NEXT: cdbr %f0, %f1
-+; CHECK-VECTOR-NEXT: locgrne %r2, %r3
- ; CHECK: br %r14
-   %cond = fcmp oeq double %f, 0.0
-   %res = select i1 %cond, i64 %a, i64 %b
-@@ -165,8 +177,9 @@ define i64 @f8(i64 %a, i64 %b, double %f
- define i64 @f9(i64 %a, i64 %b, double %f2, double *%ptr) {
- ; CHECK-LABEL: f9:
- ; CHECK: cdb %f0, 0(%r4)
--; CHECK-NEXT: jl {{\.L.*}}
--; CHECK: lgr %r2, %r3
-+; CHECK-SCALAR-NEXT: jl
-+; CHECK-SCALAR: lgr %r2, %r3
-+; CHECK-VECTOR-NEXT: locgrnl %r2, %r3
- ; CHECK: br %r14
-   %f1 = load double *%ptr
-   %cond = fcmp ogt double %f1, %f2
-Index: llvm-36/test/CodeGen/SystemZ/fp-conv-01.ll
-===================================================================
---- llvm-36.orig/test/CodeGen/SystemZ/fp-conv-01.ll
-+++ llvm-36/test/CodeGen/SystemZ/fp-conv-01.ll
-@@ -1,11 +1,15 @@
- ; Test floating-point truncations.
- ;
--; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
-+; RUN:   | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 \
-+; RUN:   | FileCheck -check-prefix=CHECK -check-prefix=CHECK-VECTOR %s
- 
- ; Test f64->f32.
- define float @f1(double %d1, double %d2) {
- ; CHECK-LABEL: f1:
--; CHECK: ledbr %f0, %f2
-+; CHECK-SCALAR: ledbr %f0, %f2
-+; CHECK-VECTOR: ledbra %f0, 0, %f2, 0
- ; CHECK: br %r14
-   %res = fptrunc double %d2 to float
-   ret float %res
-@@ -50,8 +54,10 @@ define double @f4(fp128 *%ptr) {
- define void @f5(double *%dst, fp128 *%ptr, double %d1, double %d2) {
- ; CHECK-LABEL: f5:
- ; CHECK: ldxbr %f1, %f1
--; CHECK: adbr %f1, %f2
--; CHECK: std %f1, 0(%r2)
-+; CHECK-SCALAR: adbr %f1, %f2
-+; CHECK-SCALAR: std %f1, 0(%r2)
-+; CHECK-VECTOR: wfadb [[REG:%f[0-9]+]], %f1, %f2
-+; CHECK-VECTOR: std [[REG]], 0(%r2)
- ; CHECK: br %r14
-   %val = load fp128 *%ptr
-   %conv = fptrunc fp128 %val to double
-Index: llvm-36/test/CodeGen/SystemZ/fp-conv-02.ll
-===================================================================
---- llvm-36.orig/test/CodeGen/SystemZ/fp-conv-02.ll
-+++ llvm-36/test/CodeGen/SystemZ/fp-conv-02.ll
-@@ -1,6 +1,8 @@
- ; Test extensions of f32 to f64.
- ;
--; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
-+; RUN:   | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
- 
- ; Check register extension.
- define double @f1(float %val) {
-@@ -74,7 +76,7 @@ define double @f6(float *%base, i64 %ind
- ; to use LDEB if possible.
- define void @f7(double *%ptr1, float *%ptr2) {
- ; CHECK-LABEL: f7:
--; CHECK: ldeb {{%f[0-9]+}}, 16{{[04]}}(%r15)
-+; CHECK-SCALAR: ldeb {{%f[0-9]+}}, 16{{[04]}}(%r15)
- ; CHECK: br %r14
-   %val0 = load volatile float *%ptr2
-   %val1 = load volatile float *%ptr2
-Index: llvm-36/test/CodeGen/SystemZ/fp-div-02.ll
-===================================================================
---- llvm-36.orig/test/CodeGen/SystemZ/fp-div-02.ll
-+++ llvm-36/test/CodeGen/SystemZ/fp-div-02.ll
-@@ -1,6 +1,8 @@
- ; Test 64-bit floating-point division.
- ;
--; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
-+; RUN:   | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
- 
- declare double @foo()
- 
-@@ -76,7 +78,7 @@ define double @f6(double %f1, double *%b
- define double @f7(double *%ptr0) {
- ; CHECK-LABEL: f7:
- ; CHECK: brasl %r14, foo@PLT
--; CHECK: ddb %f0, 160(%r15)
-+; CHECK-SCALAR: ddb %f0, 160(%r15)
- ; CHECK: br %r14
-   %ptr1 = getelementptr double *%ptr0, i64 2
-   %ptr2 = getelementptr double *%ptr0, i64 4
-Index: llvm-36/test/CodeGen/SystemZ/fp-move-01.ll
-===================================================================
---- llvm-36.orig/test/CodeGen/SystemZ/fp-move-01.ll
-+++ llvm-36/test/CodeGen/SystemZ/fp-move-01.ll
-@@ -1,11 +1,13 @@
- ; Test moves between FPRs.
- ;
--; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
- 
- ; Test f32 moves.
- define float @f1(float %a, float %b) {
- ; CHECK-LABEL: f1:
- ; CHECK: ler %f0, %f2
-+; CHECK: br %r14
-   ret float %b
- }
- 
-@@ -13,6 +15,7 @@ define float @f1(float %a, float %b) {
- define double @f2(double %a, double %b) {
- ; CHECK-LABEL: f2:
- ; CHECK: ldr %f0, %f2
-+; CHECK: br %r14
-   ret double %b
- }
- 
-@@ -22,6 +25,7 @@ define void @f3(fp128 *%x) {
- ; CHECK-LABEL: f3:
- ; CHECK: lxr
- ; CHECK: axbr
-+; CHECK: br %r14
-   %val = load volatile fp128 *%x
-   %sum = fadd fp128 %val, %val
-   store volatile fp128 %sum, fp128 *%x
-Index: llvm-36/test/CodeGen/SystemZ/fp-move-04.ll
-===================================================================
---- llvm-36.orig/test/CodeGen/SystemZ/fp-move-04.ll
-+++ llvm-36/test/CodeGen/SystemZ/fp-move-04.ll
-@@ -1,6 +1,7 @@
- ; Test 64-bit floating-point loads.
- ;
--; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
- 
- ; Test the low end of the LD range.
- define double @f1(double *%src) {
-Index: llvm-36/test/CodeGen/SystemZ/fp-move-07.ll
-===================================================================
---- llvm-36.orig/test/CodeGen/SystemZ/fp-move-07.ll
-+++ llvm-36/test/CodeGen/SystemZ/fp-move-07.ll
-@@ -1,6 +1,7 @@
- ; Test 64-bit floating-point stores.
- ;
--; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
- 
- ; Test the low end of the STD range.
- define void @f1(double *%src, double %val) {
-Index: llvm-36/test/CodeGen/SystemZ/fp-move-09.ll
-===================================================================
---- llvm-36.orig/test/CodeGen/SystemZ/fp-move-09.ll
-+++ llvm-36/test/CodeGen/SystemZ/fp-move-09.ll
-@@ -1,4 +1,4 @@
--; Test moves between FPRs and GPRs for z196 and above.
-+; Test moves between FPRs and GPRs for z196 and zEC12.
- ;
- ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s
- 
-Index: llvm-36/test/CodeGen/SystemZ/fp-move-10.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/fp-move-10.ll
-@@ -0,0 +1,61 @@
-+; Test moves between FPRs and GPRs for z13 and above.
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-+
-+; Check that moves from i32s to floats use a low GR32 and vector operation.
-+define float @f1(i16 *%ptr) {
-+; CHECK-LABEL: f1:
-+; CHECK: llh [[REG:%r[0-5]]], 0(%r2)
-+; CHECK: oilh [[REG]], 16256
-+; CHECK: vlvgf %v0, [[REG]], 0
-+; CHECK: br %r14
-+  %base = load i16 *%ptr
-+  %ext = zext i16 %base to i32
-+  %full = or i32 %ext, 1065353216
-+  %res = bitcast i32 %full to float
-+  ret float %res
-+}
-+
-+; Check that moves from floats to i32s use a low GR32 and vector operation.
-+define void @f2(float %val, i8 *%ptr) {
-+; CHECK-LABEL: f2:
-+; CHECK: vlgvf [[REG:%r[0-5]]], %v0, 0
-+; CHECK: stc [[REG]], 0(%r2)
-+; CHECK: br %r14
-+  %res = bitcast float %val to i32
-+  %trunc = trunc i32 %res to i8
-+  store i8 %trunc, i8 *%ptr
-+  ret void
-+}
-+
-+; Like f2, but with a conditional store.
-+define void @f3(float %val, i8 *%ptr, i32 %which) {
-+; CHECK-LABEL: f3:
-+; CHECK-DAG: cijlh %r3, 0,
-+; CHECK-DAG: vlgvf [[REG:%r[0-5]]], %v0, 0
-+; CHECK: stc [[REG]], 0(%r2)
-+; CHECK: br %r14
-+  %int = bitcast float %val to i32
-+  %trunc = trunc i32 %int to i8
-+  %old = load i8 *%ptr
-+  %cmp = icmp eq i32 %which, 0
-+  %res = select i1 %cmp, i8 %trunc, i8 %old
-+  store i8 %res, i8 *%ptr
-+  ret void
-+}
-+
-+; ...and again with 16-bit memory.
-+define void @f4(float %val, i16 *%ptr, i32 %which) {
-+; CHECK-LABEL: f4:
-+; CHECK-DAG: cijlh %r3, 0,
-+; CHECK-DAG: vlgvf [[REG:%r[0-5]]], %v0, 0
-+; CHECK: sth [[REG]], 0(%r2)
-+; CHECK: br %r14
-+  %int = bitcast float %val to i32
-+  %trunc = trunc i32 %int to i16
-+  %old = load i16 *%ptr
-+  %cmp = icmp eq i32 %which, 0
-+  %res = select i1 %cmp, i16 %trunc, i16 %old
-+  store i16 %res, i16 *%ptr
-+  ret void
-+}
-Index: llvm-36/test/CodeGen/SystemZ/fp-move-11.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/fp-move-11.ll
-@@ -0,0 +1,110 @@
-+; Test 32-bit floating-point loads for z13.
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-+
-+; Test that we use LDE instead of LE - low end of the LE range.
-+define float @f1(float *%src) {
-+; CHECK-LABEL: f1:
-+; CHECK: lde %f0, 0(%r2)
-+; CHECK: br %r14
-+  %val = load float *%src
-+  ret float %val
-+}
-+
-+; Test that we use LDE instead of LE - high end of the LE range.
-+define float @f2(float *%src) {
-+; CHECK-LABEL: f2:
-+; CHECK: lde %f0, 4092(%r2)
-+; CHECK: br %r14
-+  %ptr = getelementptr float *%src, i64 1023
-+  %val = load float *%ptr
-+  ret float %val
-+}
-+
-+; Check the next word up, which should use LEY instead of LDE.
-+define float @f3(float *%src) {
-+; CHECK-LABEL: f3:
-+; CHECK: ley %f0, 4096(%r2)
-+; CHECK: br %r14
-+  %ptr = getelementptr float *%src, i64 1024
-+  %val = load float *%ptr
-+  ret float %val
-+}
-+
-+; Check the high end of the aligned LEY range.
-+define float @f4(float *%src) {
-+; CHECK-LABEL: f4:
-+; CHECK: ley %f0, 524284(%r2)
-+; CHECK: br %r14
-+  %ptr = getelementptr float *%src, i64 131071
-+  %val = load float *%ptr
-+  ret float %val
-+}
-+
-+; Check the next word up, which needs separate address logic.
-+; Other sequences besides this one would be OK.
-+define float @f5(float *%src) {
-+; CHECK-LABEL: f5:
-+; CHECK: agfi %r2, 524288
-+; CHECK: lde %f0, 0(%r2)
-+; CHECK: br %r14
-+  %ptr = getelementptr float *%src, i64 131072
-+  %val = load float *%ptr
-+  ret float %val
-+}
-+
-+; Check the high end of the negative aligned LEY range.
-+define float @f6(float *%src) {
-+; CHECK-LABEL: f6:
-+; CHECK: ley %f0, -4(%r2)
-+; CHECK: br %r14
-+  %ptr = getelementptr float *%src, i64 -1
-+  %val = load float *%ptr
-+  ret float %val
-+}
-+
-+; Check the low end of the LEY range.
-+define float @f7(float *%src) {
-+; CHECK-LABEL: f7:
-+; CHECK: ley %f0, -524288(%r2)
-+; CHECK: br %r14
-+  %ptr = getelementptr float *%src, i64 -131072
-+  %val = load float *%ptr
-+  ret float %val
-+}
-+
-+; Check the next word down, which needs separate address logic.
-+; Other sequences besides this one would be OK.
-+define float @f8(float *%src) {
-+; CHECK-LABEL: f8:
-+; CHECK: agfi %r2, -524292
-+; CHECK: lde %f0, 0(%r2)
-+; CHECK: br %r14
-+  %ptr = getelementptr float *%src, i64 -131073
-+  %val = load float *%ptr
-+  ret float %val
-+}
-+
-+; Check that LDE allows an index.
-+define float @f9(i64 %src, i64 %index) {
-+; CHECK-LABEL: f9:
-+; CHECK: lde %f0, 4092({{%r3,%r2|%r2,%r3}})
-+; CHECK: br %r14
-+  %add1 = add i64 %src, %index
-+  %add2 = add i64 %add1, 4092
-+  %ptr = inttoptr i64 %add2 to float *
-+  %val = load float *%ptr
-+  ret float %val
-+}
-+
-+; Check that LEY allows an index.
-+define float @f10(i64 %src, i64 %index) {
-+; CHECK-LABEL: f10:
-+; CHECK: ley %f0, 4096({{%r3,%r2|%r2,%r3}})
-+; CHECK: br %r14
-+  %add1 = add i64 %src, %index
-+  %add2 = add i64 %add1, 4096
-+  %ptr = inttoptr i64 %add2 to float *
-+  %val = load float *%ptr
-+  ret float %val
-+}
-Index: llvm-36/test/CodeGen/SystemZ/fp-mul-03.ll
-===================================================================
---- llvm-36.orig/test/CodeGen/SystemZ/fp-mul-03.ll
-+++ llvm-36/test/CodeGen/SystemZ/fp-mul-03.ll
-@@ -1,6 +1,8 @@
- ; Test multiplication of two f64s, producing an f64 result.
- ;
--; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
-+; RUN:   | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
- 
- declare double @foo()
- 
-@@ -76,7 +78,7 @@ define double @f6(double %f1, double *%b
- define double @f7(double *%ptr0) {
- ; CHECK-LABEL: f7:
- ; CHECK: brasl %r14, foo@PLT
--; CHECK: mdb %f0, 160(%r15)
-+; CHECK-SCALAR: mdb %f0, 160(%r15)
- ; CHECK: br %r14
-   %ptr1 = getelementptr double *%ptr0, i64 2
-   %ptr2 = getelementptr double *%ptr0, i64 4
-Index: llvm-36/test/CodeGen/SystemZ/fp-mul-07.ll
-===================================================================
---- llvm-36.orig/test/CodeGen/SystemZ/fp-mul-07.ll
-+++ llvm-36/test/CodeGen/SystemZ/fp-mul-07.ll
-@@ -1,11 +1,15 @@
--; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
-+; RUN:   | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 \
-+; RUN:   | FileCheck -check-prefix=CHECK -check-prefix=CHECK-VECTOR %s
- 
- declare double @llvm.fma.f64(double %f1, double %f2, double %f3)
- 
- define double @f1(double %f1, double %f2, double %acc) {
- ; CHECK-LABEL: f1:
--; CHECK: madbr %f4, %f0, %f2
--; CHECK: ldr %f0, %f4
-+; CHECK-SCALAR: madbr %f4, %f0, %f2
-+; CHECK-SCALAR: ldr %f0, %f4
-+; CHECK-VECTOR: wfmadb %f0, %f0, %f2, %f4
- ; CHECK: br %r14
-   %res = call double @llvm.fma.f64 (double %f1, double %f2, double %acc)
-   ret double %res
-Index: llvm-36/test/CodeGen/SystemZ/fp-mul-09.ll
-===================================================================
---- llvm-36.orig/test/CodeGen/SystemZ/fp-mul-09.ll
-+++ llvm-36/test/CodeGen/SystemZ/fp-mul-09.ll
-@@ -1,11 +1,15 @@
--; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
-+; RUN:   | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 \
-+; RUN:   | FileCheck -check-prefix=CHECK -check-prefix=CHECK-VECTOR %s
- 
- declare double @llvm.fma.f64(double %f1, double %f2, double %f3)
- 
- define double @f1(double %f1, double %f2, double %acc) {
- ; CHECK-LABEL: f1:
--; CHECK: msdbr %f4, %f0, %f2
--; CHECK: ldr %f0, %f4
-+; CHECK-SCALAR: msdbr %f4, %f0, %f2
-+; CHECK-SCALAR: ldr %f0, %f4
-+; CHECK-VECTOR: wfmsdb %f0, %f0, %f2, %f4
- ; CHECK: br %r14
-   %negacc = fsub double -0.0, %acc
-   %res = call double @llvm.fma.f64 (double %f1, double %f2, double %negacc)
-Index: llvm-36/test/CodeGen/SystemZ/fp-neg-01.ll
-===================================================================
---- llvm-36.orig/test/CodeGen/SystemZ/fp-neg-01.ll
-+++ llvm-36/test/CodeGen/SystemZ/fp-neg-01.ll
-@@ -1,6 +1,7 @@
- ; Test floating-point negation.
- ;
--; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
- 
- ; Test f32.
- define float @f1(float %f) {
-Index: llvm-36/test/CodeGen/SystemZ/fp-round-02.ll
-===================================================================
---- llvm-36.orig/test/CodeGen/SystemZ/fp-round-02.ll
-+++ llvm-36/test/CodeGen/SystemZ/fp-round-02.ll
-@@ -1,6 +1,9 @@
- ; Test rounding functions for z196 and above.
- ;
--; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 \
-+; RUN:   | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 \
-+; RUN:   | FileCheck -check-prefix=CHECK -check-prefix=CHECK-VECTOR %s
- 
- ; Test rint for f32.
- declare float @llvm.rint.f32(float %f)
-@@ -16,7 +19,8 @@ define float @f1(float %f) {
- declare double @llvm.rint.f64(double %f)
- define double @f2(double %f) {
- ; CHECK-LABEL: f2:
--; CHECK: fidbr %f0, 0, %f0
-+; CHECK-SCALAR: fidbr %f0, 0, %f0
-+; CHECK-VECTOR: fidbra %f0, 0, %f0, 0
- ; CHECK: br %r14
-   %res = call double @llvm.rint.f64(double %f)
-   ret double %res
-Index: llvm-36/test/CodeGen/SystemZ/fp-sqrt-02.ll
-===================================================================
---- llvm-36.orig/test/CodeGen/SystemZ/fp-sqrt-02.ll
-+++ llvm-36/test/CodeGen/SystemZ/fp-sqrt-02.ll
-@@ -1,6 +1,8 @@
- ; Test 64-bit square root.
- ;
--; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
-+; RUN:   | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
- 
- declare double @llvm.sqrt.f64(double %f)
- declare double @sqrt(double)
-@@ -77,7 +79,7 @@ define double @f6(double *%base, i64 %in
- ; to use SQDB if possible.
- define void @f7(double *%ptr) {
- ; CHECK-LABEL: f7:
--; CHECK: sqdb {{%f[0-9]+}}, 160(%r15)
-+; CHECK-SCALAR: sqdb {{%f[0-9]+}}, 160(%r15)
- ; CHECK: br %r14
-   %val0 = load volatile double *%ptr
-   %val1 = load volatile double *%ptr
-Index: llvm-36/test/CodeGen/SystemZ/fp-sub-02.ll
-===================================================================
---- llvm-36.orig/test/CodeGen/SystemZ/fp-sub-02.ll
-+++ llvm-36/test/CodeGen/SystemZ/fp-sub-02.ll
-@@ -1,6 +1,8 @@
- ; Test 64-bit floating-point subtraction.
- ;
--; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \
-+; RUN:   | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
- 
- declare double @foo()
- 
-@@ -76,7 +78,7 @@ define double @f6(double %f1, double *%b
- define double @f7(double *%ptr0) {
- ; CHECK-LABEL: f7:
- ; CHECK: brasl %r14, foo@PLT
--; CHECK: sdb %f0, 16{{[04]}}(%r15)
-+; CHECK-SCALAR: sdb %f0, 16{{[04]}}(%r15)
- ; CHECK: br %r14
-   %ptr1 = getelementptr double *%ptr0, i64 2
-   %ptr2 = getelementptr double *%ptr0, i64 4
-Index: llvm-36/test/CodeGen/SystemZ/frame-03.ll
-===================================================================
---- llvm-36.orig/test/CodeGen/SystemZ/frame-03.ll
-+++ llvm-36/test/CodeGen/SystemZ/frame-03.ll
-@@ -2,7 +2,7 @@
- ; uses a different register class, but the set of saved and restored
- ; registers should be the same.
- ;
--; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
- 
- ; This function should require all FPRs, but no other spill slots.
- ; We need to save and restore 8 of the 16 FPRs, so the frame size
-Index: llvm-36/test/CodeGen/SystemZ/frame-07.ll
-===================================================================
---- llvm-36.orig/test/CodeGen/SystemZ/frame-07.ll
-+++ llvm-36/test/CodeGen/SystemZ/frame-07.ll
-@@ -1,7 +1,7 @@
- ; Test the saving and restoring of FPRs in large frames.
- ;
--; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck -check-prefix=CHECK-NOFP %s
--; RUN: llc < %s -mtriple=s390x-linux-gnu -disable-fp-elim | FileCheck -check-prefix=CHECK-FP %s
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck -check-prefix=CHECK-NOFP %s
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 -disable-fp-elim | FileCheck -check-prefix=CHECK-FP %s
- 
- ; Test a frame size that requires some FPRs to be saved and loaded using
- ; the 20-bit STDY and LDY while others can use the 12-bit STD and LD.
-Index: llvm-36/test/CodeGen/SystemZ/frame-17.ll
-===================================================================
---- llvm-36.orig/test/CodeGen/SystemZ/frame-17.ll
-+++ llvm-36/test/CodeGen/SystemZ/frame-17.ll
-@@ -1,6 +1,6 @@
- ; Test spilling of FPRs.
- ;
--; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s
- 
- ; We need to save and restore 8 of the 16 FPRs and allocate an additional
- ; 4-byte spill slot, rounded to 8 bytes.  The frame size should be exactly
-Index: llvm-36/test/CodeGen/SystemZ/frame-19.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/frame-19.ll
-@@ -0,0 +1,314 @@
-+; Test spilling of vector registers.
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-+
-+; We need to allocate a 16-byte spill slot and save the 8 call-saved FPRs.
-+; The frame size should be exactly 160 + 16 + 8 * 8 = 240.
-+define void @f1(<16 x i8> *%ptr) {
-+; CHECK-LABEL: f1:
-+; CHECK: aghi %r15, -240
-+; CHECK-DAG: std %f8,
-+; CHECK-DAG: std %f9,
-+; CHECK-DAG: std %f10,
-+; CHECK-DAG: std %f11,
-+; CHECK-DAG: std %f12,
-+; CHECK-DAG: std %f13,
-+; CHECK-DAG: std %f14,
-+; CHECK-DAG: std %f15,
-+; CHECK: vst {{%v[0-9]+}}, 160(%r15)
-+; CHECK: vl {{%v[0-9]+}}, 160(%r15)
-+; CHECK-DAG: ld %f8,
-+; CHECK-DAG: ld %f9,
-+; CHECK-DAG: ld %f10,
-+; CHECK-DAG: ld %f11,
-+; CHECK-DAG: ld %f12,
-+; CHECK-DAG: ld %f13,
-+; CHECK-DAG: ld %f14,
-+; CHECK-DAG: ld %f15,
-+; CHECK: aghi %r15, 240
-+; CHECK: br %r14
-+  %v0 = load volatile <16 x i8> *%ptr
-+  %v1 = load volatile <16 x i8> *%ptr
-+  %v2 = load volatile <16 x i8> *%ptr
-+  %v3 = load volatile <16 x i8> *%ptr
-+  %v4 = load volatile <16 x i8> *%ptr
-+  %v5 = load volatile <16 x i8> *%ptr
-+  %v6 = load volatile <16 x i8> *%ptr
-+  %v7 = load volatile <16 x i8> *%ptr
-+  %v8 = load volatile <16 x i8> *%ptr
-+  %v9 = load volatile <16 x i8> *%ptr
-+  %v10 = load volatile <16 x i8> *%ptr
-+  %v11 = load volatile <16 x i8> *%ptr
-+  %v12 = load volatile <16 x i8> *%ptr
-+  %v13 = load volatile <16 x i8> *%ptr
-+  %v14 = load volatile <16 x i8> *%ptr
-+  %v15 = load volatile <16 x i8> *%ptr
-+  %v16 = load volatile <16 x i8> *%ptr
-+  %v17 = load volatile <16 x i8> *%ptr
-+  %v18 = load volatile <16 x i8> *%ptr
-+  %v19 = load volatile <16 x i8> *%ptr
-+  %v20 = load volatile <16 x i8> *%ptr
-+  %v21 = load volatile <16 x i8> *%ptr
-+  %v22 = load volatile <16 x i8> *%ptr
-+  %v23 = load volatile <16 x i8> *%ptr
-+  %v24 = load volatile <16 x i8> *%ptr
-+  %v25 = load volatile <16 x i8> *%ptr
-+  %v26 = load volatile <16 x i8> *%ptr
-+  %v27 = load volatile <16 x i8> *%ptr
-+  %v28 = load volatile <16 x i8> *%ptr
-+  %v29 = load volatile <16 x i8> *%ptr
-+  %v30 = load volatile <16 x i8> *%ptr
-+  %v31 = load volatile <16 x i8> *%ptr
-+  %vx = load volatile <16 x i8> *%ptr
-+  store volatile <16 x i8> %vx, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v31, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v30, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v29, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v28, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v27, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v26, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v25, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v24, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v23, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v22, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v21, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v20, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v19, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v18, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v17, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v16, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v15, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v14, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v13, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v12, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v11, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v10, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v9, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v8, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v7, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v6, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v5, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v4, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v3, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v2, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v1, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v0, <16 x i8> *%ptr
-+  ret void
-+}
-+
-+; Like f1, but no 16-byte slot should be needed.
-+define void @f2(<16 x i8> *%ptr) {
-+; CHECK-LABEL: f2:
-+; CHECK: aghi %r15, -224
-+; CHECK-DAG: std %f8,
-+; CHECK-DAG: std %f9,
-+; CHECK-DAG: std %f10,
-+; CHECK-DAG: std %f11,
-+; CHECK-DAG: std %f12,
-+; CHECK-DAG: std %f13,
-+; CHECK-DAG: std %f14,
-+; CHECK-DAG: std %f15,
-+; CHECK-NOT: vst {{.*}}(%r15)
-+; CHECK-NOT: vl {{.*}}(%r15)
-+; CHECK-DAG: ld %f8,
-+; CHECK-DAG: ld %f9,
-+; CHECK-DAG: ld %f10,
-+; CHECK-DAG: ld %f11,
-+; CHECK-DAG: ld %f12,
-+; CHECK-DAG: ld %f13,
-+; CHECK-DAG: ld %f14,
-+; CHECK-DAG: ld %f15,
-+; CHECK: aghi %r15, 224
-+; CHECK: br %r14
-+  %v0 = load volatile <16 x i8> *%ptr
-+  %v1 = load volatile <16 x i8> *%ptr
-+  %v2 = load volatile <16 x i8> *%ptr
-+  %v3 = load volatile <16 x i8> *%ptr
-+  %v4 = load volatile <16 x i8> *%ptr
-+  %v5 = load volatile <16 x i8> *%ptr
-+  %v6 = load volatile <16 x i8> *%ptr
-+  %v7 = load volatile <16 x i8> *%ptr
-+  %v8 = load volatile <16 x i8> *%ptr
-+  %v9 = load volatile <16 x i8> *%ptr
-+  %v10 = load volatile <16 x i8> *%ptr
-+  %v11 = load volatile <16 x i8> *%ptr
-+  %v12 = load volatile <16 x i8> *%ptr
-+  %v13 = load volatile <16 x i8> *%ptr
-+  %v14 = load volatile <16 x i8> *%ptr
-+  %v15 = load volatile <16 x i8> *%ptr
-+  %v16 = load volatile <16 x i8> *%ptr
-+  %v17 = load volatile <16 x i8> *%ptr
-+  %v18 = load volatile <16 x i8> *%ptr
-+  %v19 = load volatile <16 x i8> *%ptr
-+  %v20 = load volatile <16 x i8> *%ptr
-+  %v21 = load volatile <16 x i8> *%ptr
-+  %v22 = load volatile <16 x i8> *%ptr
-+  %v23 = load volatile <16 x i8> *%ptr
-+  %v24 = load volatile <16 x i8> *%ptr
-+  %v25 = load volatile <16 x i8> *%ptr
-+  %v26 = load volatile <16 x i8> *%ptr
-+  %v27 = load volatile <16 x i8> *%ptr
-+  %v28 = load volatile <16 x i8> *%ptr
-+  %v29 = load volatile <16 x i8> *%ptr
-+  %v30 = load volatile <16 x i8> *%ptr
-+  %v31 = load volatile <16 x i8> *%ptr
-+  store volatile <16 x i8> %v31, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v30, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v29, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v28, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v27, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v26, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v25, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v24, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v23, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v22, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v21, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v20, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v19, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v18, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v17, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v16, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v15, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v14, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v13, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v12, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v11, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v10, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v9, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v8, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v7, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v6, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v5, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v4, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v3, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v2, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v1, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v0, <16 x i8> *%ptr
-+  ret void
-+}
-+
-+; Like f2, but only %f8 should be saved.
-+define void @f3(<16 x i8> *%ptr) {
-+; CHECK-LABEL: f3:
-+; CHECK: aghi %r15, -168
-+; CHECK-DAG: std %f8,
-+; CHECK-NOT: vst {{.*}}(%r15)
-+; CHECK-NOT: vl {{.*}}(%r15)
-+; CHECK-NOT: %v9
-+; CHECK-NOT: %v10
-+; CHECK-NOT: %v11
-+; CHECK-NOT: %v12
-+; CHECK-NOT: %v13
-+; CHECK-NOT: %v14
-+; CHECK-NOT: %v15
-+; CHECK-DAG: ld %f8,
-+; CHECK: aghi %r15, 168
-+; CHECK: br %r14
-+  %v0 = load volatile <16 x i8> *%ptr
-+  %v1 = load volatile <16 x i8> *%ptr
-+  %v2 = load volatile <16 x i8> *%ptr
-+  %v3 = load volatile <16 x i8> *%ptr
-+  %v4 = load volatile <16 x i8> *%ptr
-+  %v5 = load volatile <16 x i8> *%ptr
-+  %v6 = load volatile <16 x i8> *%ptr
-+  %v7 = load volatile <16 x i8> *%ptr
-+  %v8 = load volatile <16 x i8> *%ptr
-+  %v16 = load volatile <16 x i8> *%ptr
-+  %v17 = load volatile <16 x i8> *%ptr
-+  %v18 = load volatile <16 x i8> *%ptr
-+  %v19 = load volatile <16 x i8> *%ptr
-+  %v20 = load volatile <16 x i8> *%ptr
-+  %v21 = load volatile <16 x i8> *%ptr
-+  %v22 = load volatile <16 x i8> *%ptr
-+  %v23 = load volatile <16 x i8> *%ptr
-+  %v24 = load volatile <16 x i8> *%ptr
-+  %v25 = load volatile <16 x i8> *%ptr
-+  %v26 = load volatile <16 x i8> *%ptr
-+  %v27 = load volatile <16 x i8> *%ptr
-+  %v28 = load volatile <16 x i8> *%ptr
-+  %v29 = load volatile <16 x i8> *%ptr
-+  %v30 = load volatile <16 x i8> *%ptr
-+  %v31 = load volatile <16 x i8> *%ptr
-+  store volatile <16 x i8> %v31, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v30, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v29, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v28, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v27, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v26, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v25, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v24, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v23, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v22, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v21, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v20, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v19, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v18, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v17, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v16, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v8, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v7, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v6, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v5, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v4, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v3, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v2, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v1, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v0, <16 x i8> *%ptr
-+  ret void
-+}
-+
-+; Like f2, but no registers should be saved.
-+define void @f4(<16 x i8> *%ptr) {
-+; CHECK-LABEL: f4:
-+; CHECK-NOT: %r15
-+; CHECK: br %r14
-+  %v0 = load volatile <16 x i8> *%ptr
-+  %v1 = load volatile <16 x i8> *%ptr
-+  %v2 = load volatile <16 x i8> *%ptr
-+  %v3 = load volatile <16 x i8> *%ptr
-+  %v4 = load volatile <16 x i8> *%ptr
-+  %v5 = load volatile <16 x i8> *%ptr
-+  %v6 = load volatile <16 x i8> *%ptr
-+  %v7 = load volatile <16 x i8> *%ptr
-+  %v16 = load volatile <16 x i8> *%ptr
-+  %v17 = load volatile <16 x i8> *%ptr
-+  %v18 = load volatile <16 x i8> *%ptr
-+  %v19 = load volatile <16 x i8> *%ptr
-+  %v20 = load volatile <16 x i8> *%ptr
-+  %v21 = load volatile <16 x i8> *%ptr
-+  %v22 = load volatile <16 x i8> *%ptr
-+  %v23 = load volatile <16 x i8> *%ptr
-+  %v24 = load volatile <16 x i8> *%ptr
-+  %v25 = load volatile <16 x i8> *%ptr
-+  %v26 = load volatile <16 x i8> *%ptr
-+  %v27 = load volatile <16 x i8> *%ptr
-+  %v28 = load volatile <16 x i8> *%ptr
-+  %v29 = load volatile <16 x i8> *%ptr
-+  %v30 = load volatile <16 x i8> *%ptr
-+  %v31 = load volatile <16 x i8> *%ptr
-+  store volatile <16 x i8> %v31, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v30, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v29, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v28, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v27, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v26, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v25, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v24, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v23, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v22, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v21, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v20, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v19, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v18, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v17, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v16, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v7, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v6, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v5, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v4, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v3, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v2, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v1, <16 x i8> *%ptr
-+  store volatile <16 x i8> %v0, <16 x i8> *%ptr
-+  ret void
-+}
-Index: llvm-36/test/CodeGen/SystemZ/frame-20.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/frame-20.ll
-@@ -0,0 +1,445 @@
-+; Like frame-03.ll, but for z13.  In this case we have 16 more registers
-+; available.
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-+
-+; This function should require all FPRs, but no other spill slots.
-+; We need to save and restore 8 of the 16 FPRs, so the frame size
-+; should be exactly 160 + 8 * 8 = 224.  The CFA offset is 160
-+; (the caller-allocated part of the frame) + 224.
-+define void @f1(double *%ptr) {
-+; CHECK-LABEL: f1:
-+; CHECK: aghi %r15, -224
-+; CHECK: .cfi_def_cfa_offset 384
-+; CHECK: std %f8, 216(%r15)
-+; CHECK: std %f9, 208(%r15)
-+; CHECK: std %f10, 200(%r15)
-+; CHECK: std %f11, 192(%r15)
-+; CHECK: std %f12, 184(%r15)
-+; CHECK: std %f13, 176(%r15)
-+; CHECK: std %f14, 168(%r15)
-+; CHECK: std %f15, 160(%r15)
-+; CHECK: .cfi_offset %f8, -168
-+; CHECK: .cfi_offset %f9, -176
-+; CHECK: .cfi_offset %f10, -184
-+; CHECK: .cfi_offset %f11, -192
-+; CHECK: .cfi_offset %f12, -200
-+; CHECK: .cfi_offset %f13, -208
-+; CHECK: .cfi_offset %f14, -216
-+; CHECK: .cfi_offset %f15, -224
-+; CHECK-DAG: ld %f0, 0(%r2)
-+; CHECK-DAG: ld %f7, 0(%r2)
-+; CHECK-DAG: ld %f8, 0(%r2)
-+; CHECK-DAG: ld %f15, 0(%r2)
-+; CHECK-DAG: vlrepg %v16, 0(%r2)
-+; CHECK-DAG: vlrepg %v23, 0(%r2)
-+; CHECK-DAG: vlrepg %v24, 0(%r2)
-+; CHECK-DAG: vlrepg %v31, 0(%r2)
-+; CHECK: ld %f8, 216(%r15)
-+; CHECK: ld %f9, 208(%r15)
-+; CHECK: ld %f10, 200(%r15)
-+; CHECK: ld %f11, 192(%r15)
-+; CHECK: ld %f12, 184(%r15)
-+; CHECK: ld %f13, 176(%r15)
-+; CHECK: ld %f14, 168(%r15)
-+; CHECK: ld %f15, 160(%r15)
-+; CHECK: aghi %r15, 224
-+; CHECK: br %r14
-+  %l0 = load volatile double *%ptr
-+  %l1 = load volatile double *%ptr
-+  %l2 = load volatile double *%ptr
-+  %l3 = load volatile double *%ptr
-+  %l4 = load volatile double *%ptr
-+  %l5 = load volatile double *%ptr
-+  %l6 = load volatile double *%ptr
-+  %l7 = load volatile double *%ptr
-+  %l8 = load volatile double *%ptr
-+  %l9 = load volatile double *%ptr
-+  %l10 = load volatile double *%ptr
-+  %l11 = load volatile double *%ptr
-+  %l12 = load volatile double *%ptr
-+  %l13 = load volatile double *%ptr
-+  %l14 = load volatile double *%ptr
-+  %l15 = load volatile double *%ptr
-+  %l16 = load volatile double *%ptr
-+  %l17 = load volatile double *%ptr
-+  %l18 = load volatile double *%ptr
-+  %l19 = load volatile double *%ptr
-+  %l20 = load volatile double *%ptr
-+  %l21 = load volatile double *%ptr
-+  %l22 = load volatile double *%ptr
-+  %l23 = load volatile double *%ptr
-+  %l24 = load volatile double *%ptr
-+  %l25 = load volatile double *%ptr
-+  %l26 = load volatile double *%ptr
-+  %l27 = load volatile double *%ptr
-+  %l28 = load volatile double *%ptr
-+  %l29 = load volatile double *%ptr
-+  %l30 = load volatile double *%ptr
-+  %l31 = load volatile double *%ptr
-+  %acc0 = fsub double %l0, %l0
-+  %acc1 = fsub double %l1, %acc0
-+  %acc2 = fsub double %l2, %acc1
-+  %acc3 = fsub double %l3, %acc2
-+  %acc4 = fsub double %l4, %acc3
-+  %acc5 = fsub double %l5, %acc4
-+  %acc6 = fsub double %l6, %acc5
-+  %acc7 = fsub double %l7, %acc6
-+  %acc8 = fsub double %l8, %acc7
-+  %acc9 = fsub double %l9, %acc8
-+  %acc10 = fsub double %l10, %acc9
-+  %acc11 = fsub double %l11, %acc10
-+  %acc12 = fsub double %l12, %acc11
-+  %acc13 = fsub double %l13, %acc12
-+  %acc14 = fsub double %l14, %acc13
-+  %acc15 = fsub double %l15, %acc14
-+  %acc16 = fsub double %l16, %acc15
-+  %acc17 = fsub double %l17, %acc16
-+  %acc18 = fsub double %l18, %acc17
-+  %acc19 = fsub double %l19, %acc18
-+  %acc20 = fsub double %l20, %acc19
-+  %acc21 = fsub double %l21, %acc20
-+  %acc22 = fsub double %l22, %acc21
-+  %acc23 = fsub double %l23, %acc22
-+  %acc24 = fsub double %l24, %acc23
-+  %acc25 = fsub double %l25, %acc24
-+  %acc26 = fsub double %l26, %acc25
-+  %acc27 = fsub double %l27, %acc26
-+  %acc28 = fsub double %l28, %acc27
-+  %acc29 = fsub double %l29, %acc28
-+  %acc30 = fsub double %l30, %acc29
-+  %acc31 = fsub double %l31, %acc30
-+  store volatile double %acc0, double *%ptr
-+  store volatile double %acc1, double *%ptr
-+  store volatile double %acc2, double *%ptr
-+  store volatile double %acc3, double *%ptr
-+  store volatile double %acc4, double *%ptr
-+  store volatile double %acc5, double *%ptr
-+  store volatile double %acc6, double *%ptr
-+  store volatile double %acc7, double *%ptr
-+  store volatile double %acc8, double *%ptr
-+  store volatile double %acc9, double *%ptr
-+  store volatile double %acc10, double *%ptr
-+  store volatile double %acc11, double *%ptr
-+  store volatile double %acc12, double *%ptr
-+  store volatile double %acc13, double *%ptr
-+  store volatile double %acc14, double *%ptr
-+  store volatile double %acc15, double *%ptr
-+  store volatile double %acc16, double *%ptr
-+  store volatile double %acc17, double *%ptr
-+  store volatile double %acc18, double *%ptr
-+  store volatile double %acc19, double *%ptr
-+  store volatile double %acc20, double *%ptr
-+  store volatile double %acc21, double *%ptr
-+  store volatile double %acc22, double *%ptr
-+  store volatile double %acc23, double *%ptr
-+  store volatile double %acc24, double *%ptr
-+  store volatile double %acc25, double *%ptr
-+  store volatile double %acc26, double *%ptr
-+  store volatile double %acc27, double *%ptr
-+  store volatile double %acc28, double *%ptr
-+  store volatile double %acc29, double *%ptr
-+  store volatile double %acc30, double *%ptr
-+  store volatile double %acc31, double *%ptr
-+  ret void
-+}
-+
-+; Like f1, but requires one fewer FPR.  We allocate in numerical order,
-+; so %f15 is the one that gets dropped.
-+define void @f2(double *%ptr) {
-+; CHECK-LABEL: f2:
-+; CHECK: aghi %r15, -216
-+; CHECK: .cfi_def_cfa_offset 376
-+; CHECK: std %f8, 208(%r15)
-+; CHECK: std %f9, 200(%r15)
-+; CHECK: std %f10, 192(%r15)
-+; CHECK: std %f11, 184(%r15)
-+; CHECK: std %f12, 176(%r15)
-+; CHECK: std %f13, 168(%r15)
-+; CHECK: std %f14, 160(%r15)
-+; CHECK: .cfi_offset %f8, -168
-+; CHECK: .cfi_offset %f9, -176
-+; CHECK: .cfi_offset %f10, -184
-+; CHECK: .cfi_offset %f11, -192
-+; CHECK: .cfi_offset %f12, -200
-+; CHECK: .cfi_offset %f13, -208
-+; CHECK: .cfi_offset %f14, -216
-+; CHECK-NOT: %v15
-+; CHECK-NOT: %f15
-+; CHECK: ld %f8, 208(%r15)
-+; CHECK: ld %f9, 200(%r15)
-+; CHECK: ld %f10, 192(%r15)
-+; CHECK: ld %f11, 184(%r15)
-+; CHECK: ld %f12, 176(%r15)
-+; CHECK: ld %f13, 168(%r15)
-+; CHECK: ld %f14, 160(%r15)
-+; CHECK: aghi %r15, 216
-+; CHECK: br %r14
-+  %l0 = load volatile double *%ptr
-+  %l1 = load volatile double *%ptr
-+  %l2 = load volatile double *%ptr
-+  %l3 = load volatile double *%ptr
-+  %l4 = load volatile double *%ptr
-+  %l5 = load volatile double *%ptr
-+  %l6 = load volatile double *%ptr
-+  %l7 = load volatile double *%ptr
-+  %l8 = load volatile double *%ptr
-+  %l9 = load volatile double *%ptr
-+  %l10 = load volatile double *%ptr
-+  %l11 = load volatile double *%ptr
-+  %l12 = load volatile double *%ptr
-+  %l13 = load volatile double *%ptr
-+  %l14 = load volatile double *%ptr
-+  %l16 = load volatile double *%ptr
-+  %l17 = load volatile double *%ptr
-+  %l18 = load volatile double *%ptr
-+  %l19 = load volatile double *%ptr
-+  %l20 = load volatile double *%ptr
-+  %l21 = load volatile double *%ptr
-+  %l22 = load volatile double *%ptr
-+  %l23 = load volatile double *%ptr
-+  %l24 = load volatile double *%ptr
-+  %l25 = load volatile double *%ptr
-+  %l26 = load volatile double *%ptr
-+  %l27 = load volatile double *%ptr
-+  %l28 = load volatile double *%ptr
-+  %l29 = load volatile double *%ptr
-+  %l30 = load volatile double *%ptr
-+  %l31 = load volatile double *%ptr
-+  %acc0 = fsub double %l0, %l0
-+  %acc1 = fsub double %l1, %acc0
-+  %acc2 = fsub double %l2, %acc1
-+  %acc3 = fsub double %l3, %acc2
-+  %acc4 = fsub double %l4, %acc3
-+  %acc5 = fsub double %l5, %acc4
-+  %acc6 = fsub double %l6, %acc5
-+  %acc7 = fsub double %l7, %acc6
-+  %acc8 = fsub double %l8, %acc7
-+  %acc9 = fsub double %l9, %acc8
-+  %acc10 = fsub double %l10, %acc9
-+  %acc11 = fsub double %l11, %acc10
-+  %acc12 = fsub double %l12, %acc11
-+  %acc13 = fsub double %l13, %acc12
-+  %acc14 = fsub double %l14, %acc13
-+  %acc16 = fsub double %l16, %acc14
-+  %acc17 = fsub double %l17, %acc16
-+  %acc18 = fsub double %l18, %acc17
-+  %acc19 = fsub double %l19, %acc18
-+  %acc20 = fsub double %l20, %acc19
-+  %acc21 = fsub double %l21, %acc20
-+  %acc22 = fsub double %l22, %acc21
-+  %acc23 = fsub double %l23, %acc22
-+  %acc24 = fsub double %l24, %acc23
-+  %acc25 = fsub double %l25, %acc24
-+  %acc26 = fsub double %l26, %acc25
-+  %acc27 = fsub double %l27, %acc26
-+  %acc28 = fsub double %l28, %acc27
-+  %acc29 = fsub double %l29, %acc28
-+  %acc30 = fsub double %l30, %acc29
-+  %acc31 = fsub double %l31, %acc30
-+  store volatile double %acc0, double *%ptr
-+  store volatile double %acc1, double *%ptr
-+  store volatile double %acc2, double *%ptr
-+  store volatile double %acc3, double *%ptr
-+  store volatile double %acc4, double *%ptr
-+  store volatile double %acc5, double *%ptr
-+  store volatile double %acc6, double *%ptr
-+  store volatile double %acc7, double *%ptr
-+  store volatile double %acc8, double *%ptr
-+  store volatile double %acc9, double *%ptr
-+  store volatile double %acc10, double *%ptr
-+  store volatile double %acc11, double *%ptr
-+  store volatile double %acc12, double *%ptr
-+  store volatile double %acc13, double *%ptr
-+  store volatile double %acc14, double *%ptr
-+  store volatile double %acc16, double *%ptr
-+  store volatile double %acc17, double *%ptr
-+  store volatile double %acc18, double *%ptr
-+  store volatile double %acc19, double *%ptr
-+  store volatile double %acc20, double *%ptr
-+  store volatile double %acc21, double *%ptr
-+  store volatile double %acc22, double *%ptr
-+  store volatile double %acc23, double *%ptr
-+  store volatile double %acc24, double *%ptr
-+  store volatile double %acc25, double *%ptr
-+  store volatile double %acc26, double *%ptr
-+  store volatile double %acc27, double *%ptr
-+  store volatile double %acc28, double *%ptr
-+  store volatile double %acc29, double *%ptr
-+  store volatile double %acc30, double *%ptr
-+  store volatile double %acc31, double *%ptr
-+  ret void
-+}
-+
-+; Like f1, but should require only one call-saved FPR.
-+define void @f3(double *%ptr) {
-+; CHECK-LABEL: f3:
-+; CHECK: aghi %r15, -168
-+; CHECK: .cfi_def_cfa_offset 328
-+; CHECK: std %f8, 160(%r15)
-+; CHECK: .cfi_offset %f8, -168
-+; CHECK-NOT: {{%[fv]9}}
-+; CHECK-NOT: {{%[fv]1[0-5]}}
-+; CHECK: ld %f8, 160(%r15)
-+; CHECK: aghi %r15, 168
-+; CHECK: br %r14
-+  %l0 = load volatile double *%ptr
-+  %l1 = load volatile double *%ptr
-+  %l2 = load volatile double *%ptr
-+  %l3 = load volatile double *%ptr
-+  %l4 = load volatile double *%ptr
-+  %l5 = load volatile double *%ptr
-+  %l6 = load volatile double *%ptr
-+  %l7 = load volatile double *%ptr
-+  %l8 = load volatile double *%ptr
-+  %l16 = load volatile double *%ptr
-+  %l17 = load volatile double *%ptr
-+  %l18 = load volatile double *%ptr
-+  %l19 = load volatile double *%ptr
-+  %l20 = load volatile double *%ptr
-+  %l21 = load volatile double *%ptr
-+  %l22 = load volatile double *%ptr
-+  %l23 = load volatile double *%ptr
-+  %l24 = load volatile double *%ptr
-+  %l25 = load volatile double *%ptr
-+  %l26 = load volatile double *%ptr
-+  %l27 = load volatile double *%ptr
-+  %l28 = load volatile double *%ptr
-+  %l29 = load volatile double *%ptr
-+  %l30 = load volatile double *%ptr
-+  %l31 = load volatile double *%ptr
-+  %acc0 = fsub double %l0, %l0
-+  %acc1 = fsub double %l1, %acc0
-+  %acc2 = fsub double %l2, %acc1
-+  %acc3 = fsub double %l3, %acc2
-+  %acc4 = fsub double %l4, %acc3
-+  %acc5 = fsub double %l5, %acc4
-+  %acc6 = fsub double %l6, %acc5
-+  %acc7 = fsub double %l7, %acc6
-+  %acc8 = fsub double %l8, %acc7
-+  %acc16 = fsub double %l16, %acc8
-+  %acc17 = fsub double %l17, %acc16
-+  %acc18 = fsub double %l18, %acc17
-+  %acc19 = fsub double %l19, %acc18
-+  %acc20 = fsub double %l20, %acc19
-+  %acc21 = fsub double %l21, %acc20
-+  %acc22 = fsub double %l22, %acc21
-+  %acc23 = fsub double %l23, %acc22
-+  %acc24 = fsub double %l24, %acc23
-+  %acc25 = fsub double %l25, %acc24
-+  %acc26 = fsub double %l26, %acc25
-+  %acc27 = fsub double %l27, %acc26
-+  %acc28 = fsub double %l28, %acc27
-+  %acc29 = fsub double %l29, %acc28
-+  %acc30 = fsub double %l30, %acc29
-+  %acc31 = fsub double %l31, %acc30
-+  store volatile double %acc0, double *%ptr
-+  store volatile double %acc1, double *%ptr
-+  store volatile double %acc2, double *%ptr
-+  store volatile double %acc3, double *%ptr
-+  store volatile double %acc4, double *%ptr
-+  store volatile double %acc5, double *%ptr
-+  store volatile double %acc6, double *%ptr
-+  store volatile double %acc7, double *%ptr
-+  store volatile double %acc8, double *%ptr
-+  store volatile double %acc16, double *%ptr
-+  store volatile double %acc17, double *%ptr
-+  store volatile double %acc18, double *%ptr
-+  store volatile double %acc19, double *%ptr
-+  store volatile double %acc20, double *%ptr
-+  store volatile double %acc21, double *%ptr
-+  store volatile double %acc22, double *%ptr
-+  store volatile double %acc23, double *%ptr
-+  store volatile double %acc24, double *%ptr
-+  store volatile double %acc25, double *%ptr
-+  store volatile double %acc26, double *%ptr
-+  store volatile double %acc27, double *%ptr
-+  store volatile double %acc28, double *%ptr
-+  store volatile double %acc29, double *%ptr
-+  store volatile double %acc30, double *%ptr
-+  store volatile double %acc31, double *%ptr
-+  ret void
-+}
-+
-+; This function should use all call-clobbered FPRs and vector registers
-+; but no call-saved ones.  It shouldn't need to create a frame.
-+define void @f4(double *%ptr) {
-+; CHECK-LABEL: f4:
-+; CHECK-NOT: %r15
-+; CHECK-NOT: {{%[fv][89]}}
-+; CHECK-NOT: {{%[fv]1[0-5]}}
-+; CHECK: br %r14
-+  %l0 = load volatile double *%ptr
-+  %l1 = load volatile double *%ptr
-+  %l2 = load volatile double *%ptr
-+  %l3 = load volatile double *%ptr
-+  %l4 = load volatile double *%ptr
-+  %l5 = load volatile double *%ptr
-+  %l6 = load volatile double *%ptr
-+  %l7 = load volatile double *%ptr
-+  %l16 = load volatile double *%ptr
-+  %l17 = load volatile double *%ptr
-+  %l18 = load volatile double *%ptr
-+  %l19 = load volatile double *%ptr
-+  %l20 = load volatile double *%ptr
-+  %l21 = load volatile double *%ptr
-+  %l22 = load volatile double *%ptr
-+  %l23 = load volatile double *%ptr
-+  %l24 = load volatile double *%ptr
-+  %l25 = load volatile double *%ptr
-+  %l26 = load volatile double *%ptr
-+  %l27 = load volatile double *%ptr
-+  %l28 = load volatile double *%ptr
-+  %l29 = load volatile double *%ptr
-+  %l30 = load volatile double *%ptr
-+  %l31 = load volatile double *%ptr
-+  %acc0 = fsub double %l0, %l0
-+  %acc1 = fsub double %l1, %acc0
-+  %acc2 = fsub double %l2, %acc1
-+  %acc3 = fsub double %l3, %acc2
-+  %acc4 = fsub double %l4, %acc3
-+  %acc5 = fsub double %l5, %acc4
-+  %acc6 = fsub double %l6, %acc5
-+  %acc7 = fsub double %l7, %acc6
-+  %acc16 = fsub double %l16, %acc7
-+  %acc17 = fsub double %l17, %acc16
-+  %acc18 = fsub double %l18, %acc17
-+  %acc19 = fsub double %l19, %acc18
-+  %acc20 = fsub double %l20, %acc19
-+  %acc21 = fsub double %l21, %acc20
-+  %acc22 = fsub double %l22, %acc21
-+  %acc23 = fsub double %l23, %acc22
-+  %acc24 = fsub double %l24, %acc23
-+  %acc25 = fsub double %l25, %acc24
-+  %acc26 = fsub double %l26, %acc25
-+  %acc27 = fsub double %l27, %acc26
-+  %acc28 = fsub double %l28, %acc27
-+  %acc29 = fsub double %l29, %acc28
-+  %acc30 = fsub double %l30, %acc29
-+  %acc31 = fsub double %l31, %acc30
-+  store volatile double %acc0, double *%ptr
-+  store volatile double %acc1, double *%ptr
-+  store volatile double %acc2, double *%ptr
-+  store volatile double %acc3, double *%ptr
-+  store volatile double %acc4, double *%ptr
-+  store volatile double %acc5, double *%ptr
-+  store volatile double %acc6, double *%ptr
-+  store volatile double %acc7, double *%ptr
-+  store volatile double %acc16, double *%ptr
-+  store volatile double %acc17, double *%ptr
-+  store volatile double %acc18, double *%ptr
-+  store volatile double %acc19, double *%ptr
-+  store volatile double %acc20, double *%ptr
-+  store volatile double %acc21, double *%ptr
-+  store volatile double %acc22, double *%ptr
-+  store volatile double %acc23, double *%ptr
-+  store volatile double %acc24, double *%ptr
-+  store volatile double %acc25, double *%ptr
-+  store volatile double %acc26, double *%ptr
-+  store volatile double %acc27, double *%ptr
-+  store volatile double %acc28, double *%ptr
-+  store volatile double %acc29, double *%ptr
-+  store volatile double %acc30, double *%ptr
-+  store volatile double %acc31, double *%ptr
-+  ret void
-+}
-Index: llvm-36/test/CodeGen/SystemZ/htm-intrinsics.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/htm-intrinsics.ll
-@@ -0,0 +1,352 @@
-+; Test transactional-execution intrinsics.
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=zEC12 | FileCheck %s
-+
-+declare i32 @llvm.s390.tbegin(i8 *, i32)
-+declare i32 @llvm.s390.tbegin.nofloat(i8 *, i32)
-+declare void @llvm.s390.tbeginc(i8 *, i32)
-+declare i32 @llvm.s390.tend()
-+declare void @llvm.s390.tabort(i64)
-+declare void @llvm.s390.ntstg(i64, i64 *)
-+declare i32 @llvm.s390.etnd()
-+declare void @llvm.s390.ppa.txassist(i32)
-+
-+; TBEGIN.
-+define void @test_tbegin() {
-+; CHECK-LABEL: test_tbegin:
-+; CHECK-NOT: stmg
-+; CHECK: std %f8,
-+; CHECK: std %f9,
-+; CHECK: std %f10,
-+; CHECK: std %f11,
-+; CHECK: std %f12,
-+; CHECK: std %f13,
-+; CHECK: std %f14,
-+; CHECK: std %f15,
-+; CHECK: tbegin 0, 65292
-+; CHECK: ld %f8,
-+; CHECK: ld %f9,
-+; CHECK: ld %f10,
-+; CHECK: ld %f11,
-+; CHECK: ld %f12,
-+; CHECK: ld %f13,
-+; CHECK: ld %f14,
-+; CHECK: ld %f15,
-+; CHECK: br %r14
-+  call i32 @llvm.s390.tbegin(i8 *null, i32 65292)
-+  ret void
-+}
-+
-+; TBEGIN (nofloat).
-+define void @test_tbegin_nofloat1() {
-+; CHECK-LABEL: test_tbegin_nofloat1:
-+; CHECK-NOT: stmg
-+; CHECK-NOT: std
-+; CHECK: tbegin 0, 65292
-+; CHECK: br %r14
-+  call i32 @llvm.s390.tbegin.nofloat(i8 *null, i32 65292)
-+  ret void
-+}
-+
-+; TBEGIN (nofloat) with integer CC return value.
-+define i32 @test_tbegin_nofloat2() {
-+; CHECK-LABEL: test_tbegin_nofloat2:
-+; CHECK-NOT: stmg
-+; CHECK-NOT: std
-+; CHECK: tbegin 0, 65292
-+; CHECK: ipm %r2
-+; CHECK: srl %r2, 28
-+; CHECK: br %r14
-+  %res = call i32 @llvm.s390.tbegin.nofloat(i8 *null, i32 65292)
-+  ret i32 %res
-+}
-+
-+; TBEGIN (nofloat) with implicit CC check.
-+define void @test_tbegin_nofloat3(i32 *%ptr) {
-+; CHECK-LABEL: test_tbegin_nofloat3:
-+; CHECK-NOT: stmg
-+; CHECK-NOT: std
-+; CHECK: tbegin 0, 65292
-+; CHECK: jnh  {{\.L*}}
-+; CHECK: mvhi 0(%r2), 0
-+; CHECK: br %r14
-+  %res = call i32 @llvm.s390.tbegin.nofloat(i8 *null, i32 65292)
-+  %cmp = icmp eq i32 %res, 2
-+  br i1 %cmp, label %if.then, label %if.end
-+
-+if.then:                                          ; preds = %entry
-+  store i32 0, i32* %ptr, align 4
-+  br label %if.end
-+
-+if.end:                                           ; preds = %if.then, %entry
-+  ret void
-+}
-+
-+; TBEGIN (nofloat) with dual CC use.
-+define i32 @test_tbegin_nofloat4(i32 %pad, i32 *%ptr) {
-+; CHECK-LABEL: test_tbegin_nofloat4:
-+; CHECK-NOT: stmg
-+; CHECK-NOT: std
-+; CHECK: tbegin 0, 65292
-+; CHECK: ipm %r2
-+; CHECK: srl %r2, 28
-+; CHECK: cijlh %r2, 2,  {{\.L*}}
-+; CHECK: mvhi 0(%r3), 0
-+; CHECK: br %r14
-+  %res = call i32 @llvm.s390.tbegin.nofloat(i8 *null, i32 65292)
-+  %cmp = icmp eq i32 %res, 2
-+  br i1 %cmp, label %if.then, label %if.end
-+
-+if.then:                                          ; preds = %entry
-+  store i32 0, i32* %ptr, align 4
-+  br label %if.end
-+
-+if.end:                                           ; preds = %if.then, %entry
-+  ret i32 %res
-+}
-+
-+; TBEGIN (nofloat) with register.
-+define void @test_tbegin_nofloat5(i8 *%ptr) {
-+; CHECK-LABEL: test_tbegin_nofloat5:
-+; CHECK-NOT: stmg
-+; CHECK-NOT: std
-+; CHECK: tbegin 0(%r2), 65292
-+; CHECK: br %r14
-+  call i32 @llvm.s390.tbegin.nofloat(i8 *%ptr, i32 65292)
-+  ret void
-+}
-+
-+; TBEGIN (nofloat) with GRSM 0x0f00.
-+define void @test_tbegin_nofloat6() {
-+; CHECK-LABEL: test_tbegin_nofloat6:
-+; CHECK: stmg %r6, %r15,
-+; CHECK-NOT: std
-+; CHECK: tbegin 0, 3840
-+; CHECK: br %r14
-+  call i32 @llvm.s390.tbegin.nofloat(i8 *null, i32 3840)
-+  ret void
-+}
-+
-+; TBEGIN (nofloat) with GRSM 0xf100.
-+define void @test_tbegin_nofloat7() {
-+; CHECK-LABEL: test_tbegin_nofloat7:
-+; CHECK: stmg %r8, %r15,
-+; CHECK-NOT: std
-+; CHECK: tbegin 0, 61696
-+; CHECK: br %r14
-+  call i32 @llvm.s390.tbegin.nofloat(i8 *null, i32 61696)
-+  ret void
-+}
-+
-+; TBEGIN (nofloat) with GRSM 0xfe00 -- stack pointer added automatically.
-+define void @test_tbegin_nofloat8() {
-+; CHECK-LABEL: test_tbegin_nofloat8:
-+; CHECK-NOT: stmg
-+; CHECK-NOT: std
-+; CHECK: tbegin 0, 65280
-+; CHECK: br %r14
-+  call i32 @llvm.s390.tbegin.nofloat(i8 *null, i32 65024)
-+  ret void
-+}
-+
-+; TBEGIN (nofloat) with GRSM 0xfb00 -- no frame pointer needed.
-+define void @test_tbegin_nofloat9() {
-+; CHECK-LABEL: test_tbegin_nofloat9:
-+; CHECK: stmg %r10, %r15,
-+; CHECK-NOT: std
-+; CHECK: tbegin 0, 64256
-+; CHECK: br %r14
-+  call i32 @llvm.s390.tbegin.nofloat(i8 *null, i32 64256)
-+  ret void
-+}
-+
-+; TBEGIN (nofloat) with GRSM 0xfb00 -- frame pointer added automatically.
-+define void @test_tbegin_nofloat10(i64 %n) {
-+; CHECK-LABEL: test_tbegin_nofloat10:
-+; CHECK: stmg %r11, %r15,
-+; CHECK-NOT: std
-+; CHECK: tbegin 0, 65280
-+; CHECK: br %r14
-+  %buf = alloca i8, i64 %n
-+  call i32 @llvm.s390.tbegin.nofloat(i8 *null, i32 64256)
-+  ret void
-+}
-+
-+; TBEGINC.
-+define void @test_tbeginc() {
-+; CHECK-LABEL: test_tbeginc:
-+; CHECK-NOT: stmg
-+; CHECK-NOT: std
-+; CHECK: tbeginc 0, 65288
-+; CHECK: br %r14
-+  call void @llvm.s390.tbeginc(i8 *null, i32 65288)
-+  ret void
-+}
-+
-+; TEND with integer CC return value.
-+define i32 @test_tend1() {
-+; CHECK-LABEL: test_tend1:
-+; CHECK: tend
-+; CHECK: ipm %r2
-+; CHECK: srl %r2, 28
-+; CHECK: br %r14
-+  %res = call i32 @llvm.s390.tend()
-+  ret i32 %res
-+}
-+
-+; TEND with implicit CC check.
-+define void @test_tend3(i32 *%ptr) {
-+; CHECK-LABEL: test_tend3:
-+; CHECK: tend
-+; CHECK: je  {{\.L*}}
-+; CHECK: mvhi 0(%r2), 0
-+; CHECK: br %r14
-+  %res = call i32 @llvm.s390.tend()
-+  %cmp = icmp eq i32 %res, 2
-+  br i1 %cmp, label %if.then, label %if.end
-+
-+if.then:                                          ; preds = %entry
-+  store i32 0, i32* %ptr, align 4
-+  br label %if.end
-+
-+if.end:                                           ; preds = %if.then, %entry
-+  ret void
-+}
-+
-+; TEND with dual CC use.
-+define i32 @test_tend2(i32 %pad, i32 *%ptr) {
-+; CHECK-LABEL: test_tend2:
-+; CHECK: tend
-+; CHECK: ipm %r2
-+; CHECK: srl %r2, 28
-+; CHECK: cijlh %r2, 2,  {{\.L*}}
-+; CHECK: mvhi 0(%r3), 0
-+; CHECK: br %r14
-+  %res = call i32 @llvm.s390.tend()
-+  %cmp = icmp eq i32 %res, 2
-+  br i1 %cmp, label %if.then, label %if.end
-+
-+if.then:                                          ; preds = %entry
-+  store i32 0, i32* %ptr, align 4
-+  br label %if.end
-+
-+if.end:                                           ; preds = %if.then, %entry
-+  ret i32 %res
-+}
-+
-+; TABORT with register only.
-+define void @test_tabort1(i64 %val) {
-+; CHECK-LABEL: test_tabort1:
-+; CHECK: tabort 0(%r2)
-+; CHECK: br %r14
-+  call void @llvm.s390.tabort(i64 %val)
-+  ret void
-+}
-+
-+; TABORT with immediate only.
-+define void @test_tabort2(i64 %val) {
-+; CHECK-LABEL: test_tabort2:
-+; CHECK: tabort 1234
-+; CHECK: br %r14
-+  call void @llvm.s390.tabort(i64 1234)
-+  ret void
-+}
-+
-+; TABORT with register + immediate.
-+define void @test_tabort3(i64 %val) {
-+; CHECK-LABEL: test_tabort3:
-+; CHECK: tabort 1234(%r2)
-+; CHECK: br %r14
-+  %sum = add i64 %val, 1234
-+  call void @llvm.s390.tabort(i64 %sum)
-+  ret void
-+}
-+
-+; TABORT with out-of-range immediate.
-+define void @test_tabort4(i64 %val) {
-+; CHECK-LABEL: test_tabort4:
-+; CHECK: tabort 0({{%r[1-5]}})
-+; CHECK: br %r14
-+  call void @llvm.s390.tabort(i64 4096)
-+  ret void
-+}
-+
-+; NTSTG with base pointer only.
-+define void @test_ntstg1(i64 *%ptr, i64 %val) {
-+; CHECK-LABEL: test_ntstg1:
-+; CHECK: ntstg %r3, 0(%r2)
-+; CHECK: br %r14
-+  call void @llvm.s390.ntstg(i64 %val, i64 *%ptr)
-+  ret void
-+}
-+
-+; NTSTG with base and index.
-+; Check that VSTL doesn't allow an index.
-+define void @test_ntstg2(i64 *%base, i64 %index, i64 %val) {
-+; CHECK-LABEL: test_ntstg2:
-+; CHECK: sllg [[REG:%r[1-5]]], %r3, 3
-+; CHECK: ntstg %r4, 0([[REG]],%r2)
-+; CHECK: br %r14
-+  %ptr = getelementptr i64 *%base, i64 %index
-+  call void @llvm.s390.ntstg(i64 %val, i64 *%ptr)
-+  ret void
-+}
-+
-+; NTSTG with the highest in-range displacement.
-+define void @test_ntstg3(i64 *%base, i64 %val) {
-+; CHECK-LABEL: test_ntstg3:
-+; CHECK: ntstg %r3, 524280(%r2)
-+; CHECK: br %r14
-+  %ptr = getelementptr i64 *%base, i64 65535
-+  call void @llvm.s390.ntstg(i64 %val, i64 *%ptr)
-+  ret void
-+}
-+
-+; NTSTG with an out-of-range positive displacement.
-+define void @test_ntstg4(i64 *%base, i64 %val) {
-+; CHECK-LABEL: test_ntstg4:
-+; CHECK: ntstg %r3, 0({{%r[1-5]}})
-+; CHECK: br %r14
-+  %ptr = getelementptr i64 *%base, i64 65536
-+  call void @llvm.s390.ntstg(i64 %val, i64 *%ptr)
-+  ret void
-+}
-+
-+; NTSTG with the lowest in-range displacement.
-+define void @test_ntstg5(i64 *%base, i64 %val) {
-+; CHECK-LABEL: test_ntstg5:
-+; CHECK: ntstg %r3, -524288(%r2)
-+; CHECK: br %r14
-+  %ptr = getelementptr i64 *%base, i64 -65536
-+  call void @llvm.s390.ntstg(i64 %val, i64 *%ptr)
-+  ret void
-+}
-+
-+; NTSTG with an out-of-range negative displacement.
-+define void @test_ntstg6(i64 *%base, i64 %val) {
-+; CHECK-LABEL: test_ntstg6:
-+; CHECK: ntstg %r3, 0({{%r[1-5]}})
-+; CHECK: br %r14
-+  %ptr = getelementptr i64 *%base, i64 -65537
-+  call void @llvm.s390.ntstg(i64 %val, i64 *%ptr)
-+  ret void
-+}
-+
-+; ETND.
-+define i32 @test_etnd() {
-+; CHECK-LABEL: test_etnd:
-+; CHECK: etnd %r2
-+; CHECK: br %r14
-+  %res = call i32 @llvm.s390.etnd()
-+  ret i32 %res
-+}
-+
-+; PPA (Transaction-Abort Assist)
-+define void @test_ppa_txassist(i32 %val) {
-+; CHECK-LABEL: test_ppa_txassist:
-+; CHECK: ppa %r2, 0, 1
-+; CHECK: br %r14
-+  call void @llvm.s390.ppa.txassist(i32 %val)
-+  ret void
-+}
-+
-Index: llvm-36/test/CodeGen/SystemZ/int-cmp-12.ll
-===================================================================
---- llvm-36.orig/test/CodeGen/SystemZ/int-cmp-12.ll
-+++ llvm-36/test/CodeGen/SystemZ/int-cmp-12.ll
-@@ -49,13 +49,24 @@ define double @f4(double %a, double %b,
-   ret double %res
- }
- 
--; Check the next value up, which must use a register comparison.
-+; Check the next value up, which can use a shifted comparison
- define double @f5(double %a, double %b, i64 %i1) {
- ; CHECK-LABEL: f5:
--; CHECK: clgrjl %r2,
-+; CHECK: srlg [[REG:%r[0-5]]], %r2, 32
-+; CHECK: cgije [[REG]], 0
- ; CHECK: ldr %f0, %f2
- ; CHECK: br %r14
-   %cond = icmp ult i64 %i1, 4294967296
-   %res = select i1 %cond, double %a, double %b
-   ret double %res
- }
-+; Check the next value up, which must use a register comparison.
-+define double @f6(double %a, double %b, i64 %i1) {
-+; CHECK-LABEL: f6:
-+; CHECK: clgrjl %r2,
-+; CHECK: ldr %f0, %f2
-+; CHECK: br %r14
-+  %cond = icmp ult i64 %i1, 4294967297
-+  %res = select i1 %cond, double %a, double %b
-+  ret double %res
-+}
-Index: llvm-36/test/CodeGen/SystemZ/int-cmp-47.ll
-===================================================================
---- llvm-36.orig/test/CodeGen/SystemZ/int-cmp-47.ll
-+++ llvm-36/test/CodeGen/SystemZ/int-cmp-47.ll
-@@ -309,7 +309,8 @@ exit:
- define void @f17(i64 %a) {
- ; CHECK-LABEL: f17:
- ; CHECK-NOT: tmhh
--; CHECK: llihh {{%r[0-5]}}, 49151
-+; CHECK: srlg [[REG:%r[0-5]]], %r2, 48
-+; CHECK: cgfi [[REG]], 49151
- ; CHECK-NOT: tmhh
- ; CHECK: br %r14
- entry:
-Index: llvm-36/test/CodeGen/SystemZ/int-cmp-50.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/int-cmp-50.ll
-@@ -0,0 +1,30 @@
-+; Verify that we do not crash on always-true conditions
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 -O0
-+;
-+; This test was compiled using clang -O0 from the following source code:
-+;
-+; int test(unsigned long x)
-+; {
-+;   return x >= 0 && x <= 15;
-+; }
-+
-+define signext i32 @test(i64 %x) {
-+entry:
-+  %x.addr = alloca i64, align 8
-+  store i64 %x, i64* %x.addr, align 8
-+  %0 = load i64 *%x.addr, align 8
-+  %cmp = icmp uge i64 %0, 0
-+  br i1 %cmp, label %land.rhs, label %land.end
-+
-+land.rhs:                                         ; preds = %entry
-+  %1 = load i64 *%x.addr, align 8
-+  %cmp1 = icmp ule i64 %1, 15
-+  br label %land.end
-+
-+land.end:                                         ; preds = %land.rhs, %entry
-+  %2 = phi i1 [ false, %entry ], [ %cmp1, %land.rhs ]
-+  %land.ext = zext i1 %2 to i32
-+  ret i32 %land.ext
-+}
-+
-Index: llvm-36/test/CodeGen/SystemZ/risbg-03.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/risbg-03.ll
-@@ -0,0 +1,30 @@
-+; Test use of RISBG vs RISBGN on zEC12.
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=zEC12 | FileCheck %s
-+
-+; On zEC12, we generally prefer RISBGN.
-+define i64 @f1(i64 %a, i64 %b) {
-+; CHECK-LABEL: f1:
-+; CHECK: risbgn %r2, %r3, 60, 62, 0
-+; CHECK: br %r14
-+  %anda = and i64 %a, -15
-+  %andb = and i64 %b, 14
-+  %or = or i64 %anda, %andb
-+  ret i64 %or
-+}
-+
-+; But we may fall back to RISBG if we can use the condition code.
-+define i64 @f2(i64 %a, i64 %b, i32* %c) {
-+; CHECK-LABEL: f2:
-+; CHECK: risbg %r2, %r3, 60, 62, 0
-+; CHECK-NEXT: ipm
-+; CHECK: br %r14
-+  %anda = and i64 %a, -15
-+  %andb = and i64 %b, 14
-+  %or = or i64 %anda, %andb
-+  %cmp = icmp sgt i64 %or, 0
-+  %conv = zext i1 %cmp to i32
-+  store i32 %conv, i32* %c, align 4
-+  ret i64 %or
-+}
-+
-Index: llvm-36/test/CodeGen/SystemZ/tls-01.ll
-===================================================================
---- llvm-36.orig/test/CodeGen/SystemZ/tls-01.ll
-+++ llvm-36/test/CodeGen/SystemZ/tls-01.ll
-@@ -1,7 +1,7 @@
--; Test initial-exec TLS accesses.
-+; Test local-exec TLS accesses.
- ;
--; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-MAIN
--; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-CP
-+; RUN: llc < %s -mcpu=z10 -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-MAIN
-+; RUN: llc < %s -mcpu=z10 -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-CP
- 
- @x = thread_local global i32 0
- 
-Index: llvm-36/test/CodeGen/SystemZ/tls-02.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/tls-02.ll
-@@ -0,0 +1,18 @@
-+; Test initial-exec TLS accesses.
-+;
-+; RUN: llc < %s -mcpu=z10 -mtriple=s390x-linux-gnu -relocation-model=pic | FileCheck %s -check-prefix=CHECK-MAIN
-+
-+@x = thread_local(initialexec) global i32 0
-+
-+; The offset must be loaded from the GOT.  This TLS access model does
-+; not use literal pool constants.
-+define i32 *@foo() {
-+; CHECK-MAIN-LABEL: foo:
-+; CHECK-MAIN: ear [[HIGH:%r[0-5]]], %a0
-+; CHECK-MAIN: sllg %r2, [[HIGH]], 32
-+; CHECK-MAIN: ear %r2, %a1
-+; CHECK-MAIN: larl %r1, x@INDNTPOFF
-+; CHECK-MAIN: ag %r2, 0(%r1)
-+; CHECK-MAIN: br %r14
-+  ret i32 *@x
-+}
-Index: llvm-36/test/CodeGen/SystemZ/tls-03.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/tls-03.ll
-@@ -0,0 +1,23 @@
-+; Test general-dynamic TLS accesses.
-+;
-+; RUN: llc < %s -mcpu=z10 -mtriple=s390x-linux-gnu -relocation-model=pic | FileCheck %s -check-prefix=CHECK-MAIN
-+; RUN: llc < %s -mcpu=z10 -mtriple=s390x-linux-gnu -relocation-model=pic | FileCheck %s -check-prefix=CHECK-CP
-+
-+@x = thread_local global i32 0
-+
-+; Call __tls_get_offset to retrieve the symbol's TLS offset.
-+define i32 *@foo() {
-+; CHECK-CP: .LCP{{.*}}:
-+; CHECK-CP: .quad x@TLSGD
-+;
-+; CHECK-MAIN-LABEL: foo:
-+; CHECK-MAIN-DAG: larl %r12, _GLOBAL_OFFSET_TABLE_
-+; CHECK-MAIN-DAG: lgrl %r2, .LCP{{.*}}
-+; CHECK-MAIN: brasl %r14, __tls_get_offset@PLT:tls_gdcall:x
-+; CHECK-MAIN: ear [[HIGH:%r[0-5]]], %a0
-+; CHECK-MAIN: sllg [[TP:%r[0-5]]], [[HIGH]], 32
-+; CHECK-MAIN: ear [[TP]], %a1
-+; CHECK-MAIN: agr %r2, [[TP]]
-+; CHECK-MAIN: br %r14
-+  ret i32 *@x
-+}
-Index: llvm-36/test/CodeGen/SystemZ/tls-04.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/tls-04.ll
-@@ -0,0 +1,28 @@
-+; Test local-dynamic TLS accesses.
-+;
-+; RUN: llc < %s -mcpu=z10 -mtriple=s390x-linux-gnu -relocation-model=pic | FileCheck %s -check-prefix=CHECK-MAIN
-+; RUN: llc < %s -mcpu=z10 -mtriple=s390x-linux-gnu -relocation-model=pic | FileCheck %s -check-prefix=CHECK-CP
-+
-+@x = thread_local(localdynamic) global i32 0
-+
-+; Call __tls_get_offset to retrieve the module's TLS base offset.
-+; Add the per-symbol offset and the thread pointer.
-+define i32 *@foo() {
-+; CHECK-CP: .LCP{{.*}}_0:
-+; CHECK-CP: .quad x@TLSLDM
-+; CHECK-CP: .LCP{{.*}}_1:
-+; CHECK-CP: .quad x@DTPOFF
-+;
-+; CHECK-MAIN-LABEL: foo:
-+; CHECK-MAIN-DAG: larl %r12, _GLOBAL_OFFSET_TABLE_
-+; CHECK-MAIN-DAG: lgrl %r2, .LCP{{.*}}_0
-+; CHECK-MAIN: brasl %r14, __tls_get_offset@PLT:tls_ldcall:x
-+; CHECK-MAIN: larl %r1, .LCP{{.*}}_1
-+; CHECK-MAIN: ag %r2, 0(%r1)
-+; CHECK-MAIN: ear [[HIGH:%r[0-5]]], %a0
-+; CHECK-MAIN: sllg [[TP:%r[0-5]]], [[HIGH]], 32
-+; CHECK-MAIN: ear [[TP]], %a1
-+; CHECK-MAIN: agr %r2, [[TP]]
-+; CHECK-MAIN: br %r14
-+  ret i32 *@x
-+}
-Index: llvm-36/test/CodeGen/SystemZ/tls-05.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/tls-05.ll
-@@ -0,0 +1,15 @@
-+; Test general-dynamic TLS access optimizations.
-+;
-+; If we access the same TLS variable twice, there should only be
-+; a single call to __tls_get_offset.
-+;
-+; RUN: llc < %s -mcpu=z10 -mtriple=s390x-linux-gnu -relocation-model=pic | grep "__tls_get_offset" | count 1
-+
-+@x = thread_local global i32 0
-+
-+define i32 @foo() {
-+  %val = load i32* @x
-+  %inc = add nsw i32 %val, 1
-+  store i32 %inc, i32* @x
-+  ret i32 %val
-+}
-Index: llvm-36/test/CodeGen/SystemZ/tls-06.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/tls-06.ll
-@@ -0,0 +1,17 @@
-+; Test general-dynamic TLS access optimizations.
-+;
-+; If we access two different TLS variables, we need two calls to
-+; __tls_get_offset, but should load _GLOBAL_OFFSET_TABLE only once.
-+;
-+; RUN: llc < %s -mcpu=z10 -mtriple=s390x-linux-gnu -relocation-model=pic | grep "__tls_get_offset" | count 2
-+; RUN: llc < %s -mcpu=z10 -mtriple=s390x-linux-gnu -relocation-model=pic | grep "_GLOBAL_OFFSET_TABLE_" | count 1
-+
-+@x = thread_local global i32 0
-+@y = thread_local global i32 0
-+
-+define i32 @foo() {
-+  %valx = load i32* @x
-+  %valy = load i32* @y
-+  %add = add nsw i32 %valx, %valy
-+  ret i32 %add
-+}
-Index: llvm-36/test/CodeGen/SystemZ/tls-07.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/tls-07.ll
-@@ -0,0 +1,16 @@
-+; Test local-dynamic TLS access optimizations.
-+;
-+; If we access two different local-dynamic TLS variables, we only
-+; need a single call to __tls_get_offset.
-+;
-+; RUN: llc < %s -mcpu=z10 -mtriple=s390x-linux-gnu -relocation-model=pic | grep "__tls_get_offset" | count 1
-+
-+@x = thread_local(localdynamic) global i32 0
-+@y = thread_local(localdynamic) global i32 0
-+
-+define i32 @foo() {
-+  %valx = load i32* @x
-+  %valy = load i32* @y
-+  %add = add nsw i32 %valx, %valy
-+  ret i32 %add
-+}
-Index: llvm-36/test/CodeGen/SystemZ/vec-abi-align.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-abi-align.ll
-@@ -0,0 +1,49 @@
-+; Verify that we use the vector ABI datalayout if and only if
-+; the vector facility is present.
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu | \
-+; RUN:   FileCheck -check-prefix=CHECK-NOVECTOR %s
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=generic | \
-+; RUN:   FileCheck -check-prefix=CHECK-NOVECTOR %s
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | \
-+; RUN:   FileCheck -check-prefix=CHECK-NOVECTOR %s
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | \
-+; RUN:   FileCheck -check-prefix=CHECK-NOVECTOR %s
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=zEC12 | \
-+; RUN:   FileCheck -check-prefix=CHECK-NOVECTOR %s
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | \
-+; RUN:   FileCheck -check-prefix=CHECK-VECTOR %s
-+
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mattr=vector | \
-+; RUN:   FileCheck -check-prefix=CHECK-VECTOR %s
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mattr=+vector | \
-+; RUN:   FileCheck -check-prefix=CHECK-VECTOR %s
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mattr=-vector,vector | \
-+; RUN:   FileCheck -check-prefix=CHECK-VECTOR %s
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mattr=-vector,+vector | \
-+; RUN:   FileCheck -check-prefix=CHECK-VECTOR %s
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mattr=-vector | \
-+; RUN:   FileCheck -check-prefix=CHECK-NOVECTOR %s
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mattr=vector,-vector | \
-+; RUN:   FileCheck -check-prefix=CHECK-NOVECTOR %s
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mattr=+vector,-vector | \
-+; RUN:   FileCheck -check-prefix=CHECK-NOVECTOR %s
-+
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 -mattr=-vector | \
-+; RUN:   FileCheck -check-prefix=CHECK-NOVECTOR %s
-+
-+%struct.S = type { i8, <2 x i64> }
-+
-+define void @test(%struct.S* %s) nounwind {
-+; CHECK-VECTOR-LABEL: @test
-+; CHECK-VECTOR: vl %v0, 8(%r2)
-+; CHECK-NOVECTOR-LABEL: @test
-+; CHECK-NOVECTOR-DAG: agsi 16(%r2), 1
-+; CHECK-NOVECTOR-DAG: agsi 24(%r2), 1
-+  %ptr = getelementptr %struct.S* %s, i64 0, i32 1
-+  %vec = load <2 x i64>* %ptr
-+  %add = add <2 x i64> %vec, <i64 1, i64 1>
-+  store <2 x i64> %add, <2 x i64>* %ptr
-+  ret void
-+}
-+
-Index: llvm-36/test/CodeGen/SystemZ/vec-abs-01.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-abs-01.ll
-@@ -0,0 +1,146 @@
-+; Test v16i8 absolute.
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-+
-+; Test with slt.
-+define <16 x i8> @f1(<16 x i8> %val) {
-+; CHECK-LABEL: f1:
-+; CHECK: vlpb %v24, %v24
-+; CHECK: br %r14
-+  %cmp = icmp slt <16 x i8> %val, zeroinitializer
-+  %neg = sub <16 x i8> zeroinitializer, %val
-+  %ret = select <16 x i1> %cmp, <16 x i8> %neg, <16 x i8> %val
-+  ret <16 x i8> %ret
-+}
-+
-+; Test with sle.
-+define <16 x i8> @f2(<16 x i8> %val) {
-+; CHECK-LABEL: f2:
-+; CHECK: vlpb %v24, %v24
-+; CHECK: br %r14
-+  %cmp = icmp sle <16 x i8> %val, zeroinitializer
-+  %neg = sub <16 x i8> zeroinitializer, %val
-+  %ret = select <16 x i1> %cmp, <16 x i8> %neg, <16 x i8> %val
-+  ret <16 x i8> %ret
-+}
-+
-+; Test with sgt.
-+define <16 x i8> @f3(<16 x i8> %val) {
-+; CHECK-LABEL: f3:
-+; CHECK: vlpb %v24, %v24
-+; CHECK: br %r14
-+  %cmp = icmp sgt <16 x i8> %val, zeroinitializer
-+  %neg = sub <16 x i8> zeroinitializer, %val
-+  %ret = select <16 x i1> %cmp, <16 x i8> %val, <16 x i8> %neg
-+  ret <16 x i8> %ret
-+}
-+
-+; Test with sge.
-+define <16 x i8> @f4(<16 x i8> %val) {
-+; CHECK-LABEL: f4:
-+; CHECK: vlpb %v24, %v24
-+; CHECK: br %r14
-+  %cmp = icmp sge <16 x i8> %val, zeroinitializer
-+  %neg = sub <16 x i8> zeroinitializer, %val
-+  %ret = select <16 x i1> %cmp, <16 x i8> %val, <16 x i8> %neg
-+  ret <16 x i8> %ret
-+}
-+
-+; Test that negative absolute uses VLPB too.  There is no vector equivalent
-+; of LOAD NEGATIVE.
-+define <16 x i8> @f5(<16 x i8> %val) {
-+; CHECK-LABEL: f5:
-+; CHECK: vlpb [[REG:%v[0-9]+]], %v24
-+; CHECK: vlcb %v24, [[REG]]
-+; CHECK: br %r14
-+  %cmp = icmp slt <16 x i8> %val, zeroinitializer
-+  %neg = sub <16 x i8> zeroinitializer, %val
-+  %abs = select <16 x i1> %cmp, <16 x i8> %neg, <16 x i8> %val
-+  %ret = sub <16 x i8> zeroinitializer, %abs
-+  ret <16 x i8> %ret
-+}
-+
-+; Try another form of negative absolute (slt version).
-+define <16 x i8> @f6(<16 x i8> %val) {
-+; CHECK-LABEL: f6:
-+; CHECK: vlpb [[REG:%v[0-9]+]], %v24
-+; CHECK: vlcb %v24, [[REG]]
-+; CHECK: br %r14
-+  %cmp = icmp slt <16 x i8> %val, zeroinitializer
-+  %neg = sub <16 x i8> zeroinitializer, %val
-+  %ret = select <16 x i1> %cmp, <16 x i8> %val, <16 x i8> %neg
-+  ret <16 x i8> %ret
-+}
-+
-+; Test with sle.
-+define <16 x i8> @f7(<16 x i8> %val) {
-+; CHECK-LABEL: f7:
-+; CHECK: vlpb [[REG:%v[0-9]+]], %v24
-+; CHECK: vlcb %v24, [[REG]]
-+; CHECK: br %r14
-+  %cmp = icmp sle <16 x i8> %val, zeroinitializer
-+  %neg = sub <16 x i8> zeroinitializer, %val
-+  %ret = select <16 x i1> %cmp, <16 x i8> %val, <16 x i8> %neg
-+  ret <16 x i8> %ret
-+}
-+
-+; Test with sgt.
-+define <16 x i8> @f8(<16 x i8> %val) {
-+; CHECK-LABEL: f8:
-+; CHECK: vlpb [[REG:%v[0-9]+]], %v24
-+; CHECK: vlcb %v24, [[REG]]
-+; CHECK: br %r14
-+  %cmp = icmp sgt <16 x i8> %val, zeroinitializer
-+  %neg = sub <16 x i8> zeroinitializer, %val
-+  %ret = select <16 x i1> %cmp, <16 x i8> %neg, <16 x i8> %val
-+  ret <16 x i8> %ret
-+}
-+
-+; Test with sge.
-+define <16 x i8> @f9(<16 x i8> %val) {
-+; CHECK-LABEL: f9:
-+; CHECK: vlpb [[REG:%v[0-9]+]], %v24
-+; CHECK: vlcb %v24, [[REG]]
-+; CHECK: br %r14
-+  %cmp = icmp sge <16 x i8> %val, zeroinitializer
-+  %neg = sub <16 x i8> zeroinitializer, %val
-+  %ret = select <16 x i1> %cmp, <16 x i8> %neg, <16 x i8> %val
-+  ret <16 x i8> %ret
-+}
-+
-+; Test with an SRA-based boolean vector.
-+define <16 x i8> @f10(<16 x i8> %val) {
-+; CHECK-LABEL: f10:
-+; CHECK: vlpb %v24, %v24
-+; CHECK: br %r14
-+  %shr = ashr <16 x i8> %val,
-+              <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,
-+               i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-+  %neg = sub <16 x i8> zeroinitializer, %val
-+  %and1 = and <16 x i8> %shr, %neg
-+  %not = xor <16 x i8> %shr,
-+             <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
-+              i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
-+  %and2 = and <16 x i8> %not, %val
-+  %ret = or <16 x i8> %and1, %and2
-+  ret <16 x i8> %ret
-+}
-+
-+; ...and again in reverse
-+define <16 x i8> @f11(<16 x i8> %val) {
-+; CHECK-LABEL: f11:
-+; CHECK: vlpb [[REG:%v[0-9]+]], %v24
-+; CHECK: vlcb %v24, [[REG]]
-+; CHECK: br %r14
-+  %shr = ashr <16 x i8> %val,
-+              <i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7,
-+               i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7, i8 7>
-+  %and1 = and <16 x i8> %shr, %val
-+  %not = xor <16 x i8> %shr,
-+             <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1,
-+              i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
-+  %neg = sub <16 x i8> zeroinitializer, %val
-+  %and2 = and <16 x i8> %not, %neg
-+  %ret = or <16 x i8> %and1, %and2
-+  ret <16 x i8> %ret
-+}
-Index: llvm-36/test/CodeGen/SystemZ/vec-abs-02.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-abs-02.ll
-@@ -0,0 +1,142 @@
-+; Test v8i16 absolute.
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-+
-+; Test with slt.
-+define <8 x i16> @f1(<8 x i16> %val) {
-+; CHECK-LABEL: f1:
-+; CHECK: vlph %v24, %v24
-+; CHECK: br %r14
-+  %cmp = icmp slt <8 x i16> %val, zeroinitializer
-+  %neg = sub <8 x i16> zeroinitializer, %val
-+  %ret = select <8 x i1> %cmp, <8 x i16> %neg, <8 x i16> %val
-+  ret <8 x i16> %ret
-+}
-+
-+; Test with sle.
-+define <8 x i16> @f2(<8 x i16> %val) {
-+; CHECK-LABEL: f2:
-+; CHECK: vlph %v24, %v24
-+; CHECK: br %r14
-+  %cmp = icmp sle <8 x i16> %val, zeroinitializer
-+  %neg = sub <8 x i16> zeroinitializer, %val
-+  %ret = select <8 x i1> %cmp, <8 x i16> %neg, <8 x i16> %val
-+  ret <8 x i16> %ret
-+}
-+
-+; Test with sgt.
-+define <8 x i16> @f3(<8 x i16> %val) {
-+; CHECK-LABEL: f3:
-+; CHECK: vlph %v24, %v24
-+; CHECK: br %r14
-+  %cmp = icmp sgt <8 x i16> %val, zeroinitializer
-+  %neg = sub <8 x i16> zeroinitializer, %val
-+  %ret = select <8 x i1> %cmp, <8 x i16> %val, <8 x i16> %neg
-+  ret <8 x i16> %ret
-+}
-+
-+; Test with sge.
-+define <8 x i16> @f4(<8 x i16> %val) {
-+; CHECK-LABEL: f4:
-+; CHECK: vlph %v24, %v24
-+; CHECK: br %r14
-+  %cmp = icmp sge <8 x i16> %val, zeroinitializer
-+  %neg = sub <8 x i16> zeroinitializer, %val
-+  %ret = select <8 x i1> %cmp, <8 x i16> %val, <8 x i16> %neg
-+  ret <8 x i16> %ret
-+}
-+
-+; Test that negative absolute uses VLPH too.  There is no vector equivalent
-+; of LOAD NEGATIVE.
-+define <8 x i16> @f5(<8 x i16> %val) {
-+; CHECK-LABEL: f5:
-+; CHECK: vlph [[REG:%v[0-9]+]], %v24
-+; CHECK: vlch %v24, [[REG]]
-+; CHECK: br %r14
-+  %cmp = icmp slt <8 x i16> %val, zeroinitializer
-+  %neg = sub <8 x i16> zeroinitializer, %val
-+  %abs = select <8 x i1> %cmp, <8 x i16> %neg, <8 x i16> %val
-+  %ret = sub <8 x i16> zeroinitializer, %abs
-+  ret <8 x i16> %ret
-+}
-+
-+; Try another form of negative absolute (slt version).
-+define <8 x i16> @f6(<8 x i16> %val) {
-+; CHECK-LABEL: f6:
-+; CHECK: vlph [[REG:%v[0-9]+]], %v24
-+; CHECK: vlch %v24, [[REG]]
-+; CHECK: br %r14
-+  %cmp = icmp slt <8 x i16> %val, zeroinitializer
-+  %neg = sub <8 x i16> zeroinitializer, %val
-+  %ret = select <8 x i1> %cmp, <8 x i16> %val, <8 x i16> %neg
-+  ret <8 x i16> %ret
-+}
-+
-+; Test with sle.
-+define <8 x i16> @f7(<8 x i16> %val) {
-+; CHECK-LABEL: f7:
-+; CHECK: vlph [[REG:%v[0-9]+]], %v24
-+; CHECK: vlch %v24, [[REG]]
-+; CHECK: br %r14
-+  %cmp = icmp sle <8 x i16> %val, zeroinitializer
-+  %neg = sub <8 x i16> zeroinitializer, %val
-+  %ret = select <8 x i1> %cmp, <8 x i16> %val, <8 x i16> %neg
-+  ret <8 x i16> %ret
-+}
-+
-+; Test with sgt.
-+define <8 x i16> @f8(<8 x i16> %val) {
-+; CHECK-LABEL: f8:
-+; CHECK: vlph [[REG:%v[0-9]+]], %v24
-+; CHECK: vlch %v24, [[REG]]
-+; CHECK: br %r14
-+  %cmp = icmp sgt <8 x i16> %val, zeroinitializer
-+  %neg = sub <8 x i16> zeroinitializer, %val
-+  %ret = select <8 x i1> %cmp, <8 x i16> %neg, <8 x i16> %val
-+  ret <8 x i16> %ret
-+}
-+
-+; Test with sge.
-+define <8 x i16> @f9(<8 x i16> %val) {
-+; CHECK-LABEL: f9:
-+; CHECK: vlph [[REG:%v[0-9]+]], %v24
-+; CHECK: vlch %v24, [[REG]]
-+; CHECK: br %r14
-+  %cmp = icmp sge <8 x i16> %val, zeroinitializer
-+  %neg = sub <8 x i16> zeroinitializer, %val
-+  %ret = select <8 x i1> %cmp, <8 x i16> %neg, <8 x i16> %val
-+  ret <8 x i16> %ret
-+}
-+
-+; Test with an SRA-based boolean vector.
-+define <8 x i16> @f10(<8 x i16> %val) {
-+; CHECK-LABEL: f10:
-+; CHECK: vlph %v24, %v24
-+; CHECK: br %r14
-+  %shr = ashr <8 x i16> %val,
-+              <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
-+  %neg = sub <8 x i16> zeroinitializer, %val
-+  %and1 = and <8 x i16> %shr, %neg
-+  %not = xor <8 x i16> %shr,
-+             <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
-+  %and2 = and <8 x i16> %not, %val
-+  %ret = or <8 x i16> %and1, %and2
-+  ret <8 x i16> %ret
-+}
-+
-+; ...and again in reverse
-+define <8 x i16> @f11(<8 x i16> %val) {
-+; CHECK-LABEL: f11:
-+; CHECK: vlph [[REG:%v[0-9]+]], %v24
-+; CHECK: vlch %v24, [[REG]]
-+; CHECK: br %r14
-+  %shr = ashr <8 x i16> %val,
-+              <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
-+  %and1 = and <8 x i16> %shr, %val
-+  %not = xor <8 x i16> %shr,
-+             <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1>
-+  %neg = sub <8 x i16> zeroinitializer, %val
-+  %and2 = and <8 x i16> %not, %neg
-+  %ret = or <8 x i16> %and1, %and2
-+  ret <8 x i16> %ret
-+}
-Index: llvm-36/test/CodeGen/SystemZ/vec-abs-03.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-abs-03.ll
-@@ -0,0 +1,138 @@
-+; Test v4i32 absolute.
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-+
-+; Test with slt.
-+define <4 x i32> @f1(<4 x i32> %val) {
-+; CHECK-LABEL: f1:
-+; CHECK: vlpf %v24, %v24
-+; CHECK: br %r14
-+  %cmp = icmp slt <4 x i32> %val, zeroinitializer
-+  %neg = sub <4 x i32> zeroinitializer, %val
-+  %ret = select <4 x i1> %cmp, <4 x i32> %neg, <4 x i32> %val
-+  ret <4 x i32> %ret
-+}
-+
-+; Test with sle.
-+define <4 x i32> @f2(<4 x i32> %val) {
-+; CHECK-LABEL: f2:
-+; CHECK: vlpf %v24, %v24
-+; CHECK: br %r14
-+  %cmp = icmp sle <4 x i32> %val, zeroinitializer
-+  %neg = sub <4 x i32> zeroinitializer, %val
-+  %ret = select <4 x i1> %cmp, <4 x i32> %neg, <4 x i32> %val
-+  ret <4 x i32> %ret
-+}
-+
-+; Test with sgt.
-+define <4 x i32> @f3(<4 x i32> %val) {
-+; CHECK-LABEL: f3:
-+; CHECK: vlpf %v24, %v24
-+; CHECK: br %r14
-+  %cmp = icmp sgt <4 x i32> %val, zeroinitializer
-+  %neg = sub <4 x i32> zeroinitializer, %val
-+  %ret = select <4 x i1> %cmp, <4 x i32> %val, <4 x i32> %neg
-+  ret <4 x i32> %ret
-+}
-+
-+; Test with sge.
-+define <4 x i32> @f4(<4 x i32> %val) {
-+; CHECK-LABEL: f4:
-+; CHECK: vlpf %v24, %v24
-+; CHECK: br %r14
-+  %cmp = icmp sge <4 x i32> %val, zeroinitializer
-+  %neg = sub <4 x i32> zeroinitializer, %val
-+  %ret = select <4 x i1> %cmp, <4 x i32> %val, <4 x i32> %neg
-+  ret <4 x i32> %ret
-+}
-+
-+; Test that negative absolute uses VLPF too.  There is no vector equivalent
-+; of LOAD NEGATIVE.
-+define <4 x i32> @f5(<4 x i32> %val) {
-+; CHECK-LABEL: f5:
-+; CHECK: vlpf [[REG:%v[0-9]+]], %v24
-+; CHECK: vlcf %v24, [[REG]]
-+; CHECK: br %r14
-+  %cmp = icmp slt <4 x i32> %val, zeroinitializer
-+  %neg = sub <4 x i32> zeroinitializer, %val
-+  %abs = select <4 x i1> %cmp, <4 x i32> %neg, <4 x i32> %val
-+  %ret = sub <4 x i32> zeroinitializer, %abs
-+  ret <4 x i32> %ret
-+}
-+
-+; Try another form of negative absolute (slt version).
-+define <4 x i32> @f6(<4 x i32> %val) {
-+; CHECK-LABEL: f6:
-+; CHECK: vlpf [[REG:%v[0-9]+]], %v24
-+; CHECK: vlcf %v24, [[REG]]
-+; CHECK: br %r14
-+  %cmp = icmp slt <4 x i32> %val, zeroinitializer
-+  %neg = sub <4 x i32> zeroinitializer, %val
-+  %ret = select <4 x i1> %cmp, <4 x i32> %val, <4 x i32> %neg
-+  ret <4 x i32> %ret
-+}
-+
-+; Test with sle.
-+define <4 x i32> @f7(<4 x i32> %val) {
-+; CHECK-LABEL: f7:
-+; CHECK: vlpf [[REG:%v[0-9]+]], %v24
-+; CHECK: vlcf %v24, [[REG]]
-+; CHECK: br %r14
-+  %cmp = icmp sle <4 x i32> %val, zeroinitializer
-+  %neg = sub <4 x i32> zeroinitializer, %val
-+  %ret = select <4 x i1> %cmp, <4 x i32> %val, <4 x i32> %neg
-+  ret <4 x i32> %ret
-+}
-+
-+; Test with sgt.
-+define <4 x i32> @f8(<4 x i32> %val) {
-+; CHECK-LABEL: f8:
-+; CHECK: vlpf [[REG:%v[0-9]+]], %v24
-+; CHECK: vlcf %v24, [[REG]]
-+; CHECK: br %r14
-+  %cmp = icmp sgt <4 x i32> %val, zeroinitializer
-+  %neg = sub <4 x i32> zeroinitializer, %val
-+  %ret = select <4 x i1> %cmp, <4 x i32> %neg, <4 x i32> %val
-+  ret <4 x i32> %ret
-+}
-+
-+; Test with sge.
-+define <4 x i32> @f9(<4 x i32> %val) {
-+; CHECK-LABEL: f9:
-+; CHECK: vlpf [[REG:%v[0-9]+]], %v24
-+; CHECK: vlcf %v24, [[REG]]
-+; CHECK: br %r14
-+  %cmp = icmp sge <4 x i32> %val, zeroinitializer
-+  %neg = sub <4 x i32> zeroinitializer, %val
-+  %ret = select <4 x i1> %cmp, <4 x i32> %neg, <4 x i32> %val
-+  ret <4 x i32> %ret
-+}
-+
-+; Test with an SRA-based boolean vector.
-+define <4 x i32> @f10(<4 x i32> %val) {
-+; CHECK-LABEL: f10:
-+; CHECK: vlpf %v24, %v24
-+; CHECK: br %r14
-+  %shr = ashr <4 x i32> %val, <i32 31, i32 31, i32 31, i32 31>
-+  %neg = sub <4 x i32> zeroinitializer, %val
-+  %and1 = and <4 x i32> %shr, %neg
-+  %not = xor <4 x i32> %shr, <i32 -1, i32 -1, i32 -1, i32 -1>
-+  %and2 = and <4 x i32> %not, %val
-+  %ret = or <4 x i32> %and1, %and2
-+  ret <4 x i32> %ret
-+}
-+
-+; ...and again in reverse
-+define <4 x i32> @f11(<4 x i32> %val) {
-+; CHECK-LABEL: f11:
-+; CHECK: vlpf [[REG:%v[0-9]+]], %v24
-+; CHECK: vlcf %v24, [[REG]]
-+; CHECK: br %r14
-+  %shr = ashr <4 x i32> %val, <i32 31, i32 31, i32 31, i32 31>
-+  %and1 = and <4 x i32> %shr, %val
-+  %not = xor <4 x i32> %shr, <i32 -1, i32 -1, i32 -1, i32 -1>
-+  %neg = sub <4 x i32> zeroinitializer, %val
-+  %and2 = and <4 x i32> %not, %neg
-+  %ret = or <4 x i32> %and1, %and2
-+  ret <4 x i32> %ret
-+}
-Index: llvm-36/test/CodeGen/SystemZ/vec-abs-04.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-abs-04.ll
-@@ -0,0 +1,138 @@
-+; Test v2i64 absolute.
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-+
-+; Test with slt.
-+define <2 x i64> @f1(<2 x i64> %val) {
-+; CHECK-LABEL: f1:
-+; CHECK: vlpg %v24, %v24
-+; CHECK: br %r14
-+  %cmp = icmp slt <2 x i64> %val, zeroinitializer
-+  %neg = sub <2 x i64> zeroinitializer, %val
-+  %ret = select <2 x i1> %cmp, <2 x i64> %neg, <2 x i64> %val
-+  ret <2 x i64> %ret
-+}
-+
-+; Test with sle.
-+define <2 x i64> @f2(<2 x i64> %val) {
-+; CHECK-LABEL: f2:
-+; CHECK: vlpg %v24, %v24
-+; CHECK: br %r14
-+  %cmp = icmp sle <2 x i64> %val, zeroinitializer
-+  %neg = sub <2 x i64> zeroinitializer, %val
-+  %ret = select <2 x i1> %cmp, <2 x i64> %neg, <2 x i64> %val
-+  ret <2 x i64> %ret
-+}
-+
-+; Test with sgt.
-+define <2 x i64> @f3(<2 x i64> %val) {
-+; CHECK-LABEL: f3:
-+; CHECK: vlpg %v24, %v24
-+; CHECK: br %r14
-+  %cmp = icmp sgt <2 x i64> %val, zeroinitializer
-+  %neg = sub <2 x i64> zeroinitializer, %val
-+  %ret = select <2 x i1> %cmp, <2 x i64> %val, <2 x i64> %neg
-+  ret <2 x i64> %ret
-+}
-+
-+; Test with sge.
-+define <2 x i64> @f4(<2 x i64> %val) {
-+; CHECK-LABEL: f4:
-+; CHECK: vlpg %v24, %v24
-+; CHECK: br %r14
-+  %cmp = icmp sge <2 x i64> %val, zeroinitializer
-+  %neg = sub <2 x i64> zeroinitializer, %val
-+  %ret = select <2 x i1> %cmp, <2 x i64> %val, <2 x i64> %neg
-+  ret <2 x i64> %ret
-+}
-+
-+; Test that negative absolute uses VLPG too.  There is no vector equivalent
-+; of LOAD NEGATIVE.
-+define <2 x i64> @f5(<2 x i64> %val) {
-+; CHECK-LABEL: f5:
-+; CHECK: vlpg [[REG:%v[0-9]+]], %v24
-+; CHECK: vlcg %v24, [[REG]]
-+; CHECK: br %r14
-+  %cmp = icmp slt <2 x i64> %val, zeroinitializer
-+  %neg = sub <2 x i64> zeroinitializer, %val
-+  %abs = select <2 x i1> %cmp, <2 x i64> %neg, <2 x i64> %val
-+  %ret = sub <2 x i64> zeroinitializer, %abs
-+  ret <2 x i64> %ret
-+}
-+
-+; Try another form of negative absolute (slt version).
-+define <2 x i64> @f6(<2 x i64> %val) {
-+; CHECK-LABEL: f6:
-+; CHECK: vlpg [[REG:%v[0-9]+]], %v24
-+; CHECK: vlcg %v24, [[REG]]
-+; CHECK: br %r14
-+  %cmp = icmp slt <2 x i64> %val, zeroinitializer
-+  %neg = sub <2 x i64> zeroinitializer, %val
-+  %ret = select <2 x i1> %cmp, <2 x i64> %val, <2 x i64> %neg
-+  ret <2 x i64> %ret
-+}
-+
-+; Test with sle.
-+define <2 x i64> @f7(<2 x i64> %val) {
-+; CHECK-LABEL: f7:
-+; CHECK: vlpg [[REG:%v[0-9]+]], %v24
-+; CHECK: vlcg %v24, [[REG]]
-+; CHECK: br %r14
-+  %cmp = icmp sle <2 x i64> %val, zeroinitializer
-+  %neg = sub <2 x i64> zeroinitializer, %val
-+  %ret = select <2 x i1> %cmp, <2 x i64> %val, <2 x i64> %neg
-+  ret <2 x i64> %ret
-+}
-+
-+; Test with sgt.
-+define <2 x i64> @f8(<2 x i64> %val) {
-+; CHECK-LABEL: f8:
-+; CHECK: vlpg [[REG:%v[0-9]+]], %v24
-+; CHECK: vlcg %v24, [[REG]]
-+; CHECK: br %r14
-+  %cmp = icmp sgt <2 x i64> %val, zeroinitializer
-+  %neg = sub <2 x i64> zeroinitializer, %val
-+  %ret = select <2 x i1> %cmp, <2 x i64> %neg, <2 x i64> %val
-+  ret <2 x i64> %ret
-+}
-+
-+; Test with sge.
-+define <2 x i64> @f9(<2 x i64> %val) {
-+; CHECK-LABEL: f9:
-+; CHECK: vlpg [[REG:%v[0-9]+]], %v24
-+; CHECK: vlcg %v24, [[REG]]
-+; CHECK: br %r14
-+  %cmp = icmp sge <2 x i64> %val, zeroinitializer
-+  %neg = sub <2 x i64> zeroinitializer, %val
-+  %ret = select <2 x i1> %cmp, <2 x i64> %neg, <2 x i64> %val
-+  ret <2 x i64> %ret
-+}
-+
-+; Test with an SRA-based boolean vector.
-+define <2 x i64> @f10(<2 x i64> %val) {
-+; CHECK-LABEL: f10:
-+; CHECK: vlpg %v24, %v24
-+; CHECK: br %r14
-+  %shr = ashr <2 x i64> %val, <i64 63, i64 63>
-+  %neg = sub <2 x i64> zeroinitializer, %val
-+  %and1 = and <2 x i64> %shr, %neg
-+  %not = xor <2 x i64> %shr, <i64 -1, i64 -1>
-+  %and2 = and <2 x i64> %not, %val
-+  %ret = or <2 x i64> %and1, %and2
-+  ret <2 x i64> %ret
-+}
-+
-+; ...and again in reverse
-+define <2 x i64> @f11(<2 x i64> %val) {
-+; CHECK-LABEL: f11:
-+; CHECK: vlpg [[REG:%v[0-9]+]], %v24
-+; CHECK: vlcg %v24, [[REG]]
-+; CHECK: br %r14
-+  %shr = ashr <2 x i64> %val, <i64 63, i64 63>
-+  %and1 = and <2 x i64> %shr, %val
-+  %not = xor <2 x i64> %shr, <i64 -1, i64 -1>
-+  %neg = sub <2 x i64> zeroinitializer, %val
-+  %and2 = and <2 x i64> %not, %neg
-+  %ret = or <2 x i64> %and1, %and2
-+  ret <2 x i64> %ret
-+}
-Index: llvm-36/test/CodeGen/SystemZ/vec-abs-05.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-abs-05.ll
-@@ -0,0 +1,46 @@
-+; Test f64 and v2f64 absolute.
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-+
-+declare double @llvm.fabs.f64(double)
-+declare <2 x double> @llvm.fabs.v2f64(<2 x double>)
-+
-+; Test a plain absolute.
-+define <2 x double> @f1(<2 x double> %val) {
-+; CHECK-LABEL: f1:
-+; CHECK: vflpdb %v24, %v24
-+; CHECK: br %r14
-+  %ret = call <2 x double> @llvm.fabs.v2f64(<2 x double> %val)
-+  ret <2 x double> %ret
-+}
-+
-+; Test a negative absolute.
-+define <2 x double> @f2(<2 x double> %val) {
-+; CHECK-LABEL: f2:
-+; CHECK: vflndb %v24, %v24
-+; CHECK: br %r14
-+  %abs = call <2 x double> @llvm.fabs.v2f64(<2 x double> %val)
-+  %ret = fsub <2 x double> <double -0.0, double -0.0>, %abs
-+  ret <2 x double> %ret
-+}
-+
-+; Test an f64 absolute that uses vector registers.
-+define double @f3(<2 x double> %val) {
-+; CHECK-LABEL: f3:
-+; CHECK: wflpdb %f0, %v24
-+; CHECK: br %r14
-+  %scalar = extractelement <2 x double> %val, i32 0
-+  %ret = call double @llvm.fabs.f64(double %scalar)
-+  ret double %ret
-+}
-+
-+; Test an f64 negative absolute that uses vector registers.
-+define double @f4(<2 x double> %val) {
-+; CHECK-LABEL: f4:
-+; CHECK: wflndb %f0, %v24
-+; CHECK: br %r14
-+  %scalar = extractelement <2 x double> %val, i32 0
-+  %abs = call double @llvm.fabs.f64(double %scalar)
-+  %ret = fsub double -0.0, %abs
-+  ret double %ret
-+}
-Index: llvm-36/test/CodeGen/SystemZ/vec-add-01.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-add-01.ll
-@@ -0,0 +1,60 @@
-+; Test vector addition.
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-+
-+; Test a v16i8 addition.
-+define <16 x i8> @f1(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) {
-+; CHECK-LABEL: f1:
-+; CHECK: vab %v24, %v26, %v28
-+; CHECK: br %r14
-+  %ret = add <16 x i8> %val1, %val2
-+  ret <16 x i8> %ret
-+}
-+
-+; Test a v8i16 addition.
-+define <8 x i16> @f2(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) {
-+; CHECK-LABEL: f2:
-+; CHECK: vah %v24, %v26, %v28
-+; CHECK: br %r14
-+  %ret = add <8 x i16> %val1, %val2
-+  ret <8 x i16> %ret
-+}
-+
-+; Test a v4i32 addition.
-+define <4 x i32> @f3(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) {
-+; CHECK-LABEL: f3:
-+; CHECK: vaf %v24, %v26, %v28
-+; CHECK: br %r14
-+  %ret = add <4 x i32> %val1, %val2
-+  ret <4 x i32> %ret
-+}
-+
-+; Test a v2i64 addition.
-+define <2 x i64> @f4(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) {
-+; CHECK-LABEL: f4:
-+; CHECK: vag %v24, %v26, %v28
-+; CHECK: br %r14
-+  %ret = add <2 x i64> %val1, %val2
-+  ret <2 x i64> %ret
-+}
-+
-+; Test a v2f64 addition.
-+define <2 x double> @f5(<2 x double> %dummy, <2 x double> %val1,
-+                        <2 x double> %val2) {
-+; CHECK-LABEL: f5:
-+; CHECK: vfadb %v24, %v26, %v28
-+; CHECK: br %r14
-+  %ret = fadd <2 x double> %val1, %val2
-+  ret <2 x double> %ret
-+}
-+
-+; Test an f64 addition that uses vector registers.
-+define double @f6(<2 x double> %val1, <2 x double> %val2) {
-+; CHECK-LABEL: f6:
-+; CHECK: wfadb %f0, %v24, %v26
-+; CHECK: br %r14
-+  %scalar1 = extractelement <2 x double> %val1, i32 0
-+  %scalar2 = extractelement <2 x double> %val2, i32 0
-+  %ret = fadd double %scalar1, %scalar2
-+  ret double %ret
-+}
-Index: llvm-36/test/CodeGen/SystemZ/vec-and-01.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-and-01.ll
-@@ -0,0 +1,39 @@
-+; Test vector AND.
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-+
-+; Test a v16i8 AND.
-+define <16 x i8> @f1(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) {
-+; CHECK-LABEL: f1:
-+; CHECK: vn %v24, %v26, %v28
-+; CHECK: br %r14
-+  %ret = and <16 x i8> %val1, %val2
-+  ret <16 x i8> %ret
-+}
-+
-+; Test a v8i16 AND.
-+define <8 x i16> @f2(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) {
-+; CHECK-LABEL: f2:
-+; CHECK: vn %v24, %v26, %v28
-+; CHECK: br %r14
-+  %ret = and <8 x i16> %val1, %val2
-+  ret <8 x i16> %ret
-+}
-+
-+; Test a v4i32 AND.
-+define <4 x i32> @f3(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) {
-+; CHECK-LABEL: f3:
-+; CHECK: vn %v24, %v26, %v28
-+; CHECK: br %r14
-+  %ret = and <4 x i32> %val1, %val2
-+  ret <4 x i32> %ret
-+}
-+
-+; Test a v2i64 AND.
-+define <2 x i64> @f4(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) {
-+; CHECK-LABEL: f4:
-+; CHECK: vn %v24, %v26, %v28
-+; CHECK: br %r14
-+  %ret = and <2 x i64> %val1, %val2
-+  ret <2 x i64> %ret
-+}
-Index: llvm-36/test/CodeGen/SystemZ/vec-and-02.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-and-02.ll
-@@ -0,0 +1,91 @@
-+; Test vector AND-NOT.
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-+
-+; Test a v16i8 AND-NOT.
-+define <16 x i8> @f1(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) {
-+; CHECK-LABEL: f1:
-+; CHECK: vnc %v24, %v26, %v28
-+; CHECK: br %r14
-+  %not = xor <16 x i8> %val2, <i8 -1, i8 -1, i8 -1, i8 -1,
-+                               i8 -1, i8 -1, i8 -1, i8 -1,
-+                               i8 -1, i8 -1, i8 -1, i8 -1,
-+                               i8 -1, i8 -1, i8 -1, i8 -1>
-+  %ret = and <16 x i8> %val1, %not
-+  ret <16 x i8> %ret
-+}
-+
-+; ...and again with the reverse.
-+define <16 x i8> @f2(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) {
-+; CHECK-LABEL: f2:
-+; CHECK: vnc %v24, %v28, %v26
-+; CHECK: br %r14
-+  %not = xor <16 x i8> %val1, <i8 -1, i8 -1, i8 -1, i8 -1,
-+                               i8 -1, i8 -1, i8 -1, i8 -1,
-+                               i8 -1, i8 -1, i8 -1, i8 -1,
-+                               i8 -1, i8 -1, i8 -1, i8 -1>
-+  %ret = and <16 x i8> %not, %val2
-+  ret <16 x i8> %ret
-+}
-+
-+; Test a v8i16 AND-NOT.
-+define <8 x i16> @f3(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) {
-+; CHECK-LABEL: f3:
-+; CHECK: vnc %v24, %v26, %v28
-+; CHECK: br %r14
-+  %not = xor <8 x i16> %val2, <i16 -1, i16 -1, i16 -1, i16 -1,
-+                               i16 -1, i16 -1, i16 -1, i16 -1>
-+  %ret = and <8 x i16> %val1, %not
-+  ret <8 x i16> %ret
-+}
-+
-+; ...and again with the reverse.
-+define <8 x i16> @f4(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) {
-+; CHECK-LABEL: f4:
-+; CHECK: vnc %v24, %v28, %v26
-+; CHECK: br %r14
-+  %not = xor <8 x i16> %val1, <i16 -1, i16 -1, i16 -1, i16 -1,
-+                               i16 -1, i16 -1, i16 -1, i16 -1>
-+  %ret = and <8 x i16> %not, %val2
-+  ret <8 x i16> %ret
-+}
-+
-+; Test a v4i32 AND-NOT.
-+define <4 x i32> @f5(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) {
-+; CHECK-LABEL: f5:
-+; CHECK: vnc %v24, %v26, %v28
-+; CHECK: br %r14
-+  %not = xor <4 x i32> %val2, <i32 -1, i32 -1, i32 -1, i32 -1>
-+  %ret = and <4 x i32> %val1, %not
-+  ret <4 x i32> %ret
-+}
-+
-+; ...and again with the reverse.
-+define <4 x i32> @f6(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) {
-+; CHECK-LABEL: f6:
-+; CHECK: vnc %v24, %v28, %v26
-+; CHECK: br %r14
-+  %not = xor <4 x i32> %val1, <i32 -1, i32 -1, i32 -1, i32 -1>
-+  %ret = and <4 x i32> %not, %val2
-+  ret <4 x i32> %ret
-+}
-+
-+; Test a v2i64 AND-NOT.
-+define <2 x i64> @f7(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) {
-+; CHECK-LABEL: f7:
-+; CHECK: vnc %v24, %v26, %v28
-+; CHECK: br %r14
-+  %not = xor <2 x i64> %val2, <i64 -1, i64 -1>
-+  %ret = and <2 x i64> %val1, %not
-+  ret <2 x i64> %ret
-+}
-+
-+; ...and again with the reverse.
-+define <2 x i64> @f8(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) {
-+; CHECK-LABEL: f8:
-+; CHECK: vnc %v24, %v28, %v26
-+; CHECK: br %r14
-+  %not = xor <2 x i64> %val1, <i64 -1, i64 -1>
-+  %ret = and <2 x i64> %not, %val2
-+  ret <2 x i64> %ret
-+}
-Index: llvm-36/test/CodeGen/SystemZ/vec-and-03.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-and-03.ll
-@@ -0,0 +1,113 @@
-+; Test vector zero extensions, which need to be implemented as ANDs.
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-+
-+; Test a v16i1->v16i8 extension.
-+define <16 x i8> @f1(<16 x i8> %val) {
-+; CHECK-LABEL: f1:
-+; CHECK: vrepib [[REG:%v[0-9]+]], 1
-+; CHECK: vn %v24, %v24, [[REG]]
-+; CHECK: br %r14
-+  %trunc = trunc <16 x i8> %val to <16 x i1>
-+  %ret = zext <16 x i1> %trunc to <16 x i8>
-+  ret <16 x i8> %ret
-+}
-+
-+; Test a v8i1->v8i16 extension.
-+define <8 x i16> @f2(<8 x i16> %val) {
-+; CHECK-LABEL: f2:
-+; CHECK: vrepih [[REG:%v[0-9]+]], 1
-+; CHECK: vn %v24, %v24, [[REG]]
-+; CHECK: br %r14
-+  %trunc = trunc <8 x i16> %val to <8 x i1>
-+  %ret = zext <8 x i1> %trunc to <8 x i16>
-+  ret <8 x i16> %ret
-+}
-+
-+; Test a v8i8->v8i16 extension.
-+define <8 x i16> @f3(<8 x i16> %val) {
-+; CHECK-LABEL: f3:
-+; CHECK: vgbm [[REG:%v[0-9]+]], 21845
-+; CHECK: vn %v24, %v24, [[REG]]
-+; CHECK: br %r14
-+  %trunc = trunc <8 x i16> %val to <8 x i8>
-+  %ret = zext <8 x i8> %trunc to <8 x i16>
-+  ret <8 x i16> %ret
-+}
-+
-+; Test a v4i1->v4i32 extension.
-+define <4 x i32> @f4(<4 x i32> %val) {
-+; CHECK-LABEL: f4:
-+; CHECK: vrepif [[REG:%v[0-9]+]], 1
-+; CHECK: vn %v24, %v24, [[REG]]
-+; CHECK: br %r14
-+  %trunc = trunc <4 x i32> %val to <4 x i1>
-+  %ret = zext <4 x i1> %trunc to <4 x i32>
-+  ret <4 x i32> %ret
-+}
-+
-+; Test a v4i8->v4i32 extension.
-+define <4 x i32> @f5(<4 x i32> %val) {
-+; CHECK-LABEL: f5:
-+; CHECK: vgbm [[REG:%v[0-9]+]], 4369
-+; CHECK: vn %v24, %v24, [[REG]]
-+; CHECK: br %r14
-+  %trunc = trunc <4 x i32> %val to <4 x i8>
-+  %ret = zext <4 x i8> %trunc to <4 x i32>
-+  ret <4 x i32> %ret
-+}
-+
-+; Test a v4i16->v4i32 extension.
-+define <4 x i32> @f6(<4 x i32> %val) {
-+; CHECK-LABEL: f6:
-+; CHECK: vgbm [[REG:%v[0-9]+]], 13107
-+; CHECK: vn %v24, %v24, [[REG]]
-+; CHECK: br %r14
-+  %trunc = trunc <4 x i32> %val to <4 x i16>
-+  %ret = zext <4 x i16> %trunc to <4 x i32>
-+  ret <4 x i32> %ret
-+}
-+
-+; Test a v2i1->v2i64 extension.
-+define <2 x i64> @f7(<2 x i64> %val) {
-+; CHECK-LABEL: f7:
-+; CHECK: vrepig [[REG:%v[0-9]+]], 1
-+; CHECK: vn %v24, %v24, [[REG]]
-+; CHECK: br %r14
-+  %trunc = trunc <2 x i64> %val to <2 x i1>
-+  %ret = zext <2 x i1> %trunc to <2 x i64>
-+  ret <2 x i64> %ret
-+}
-+
-+; Test a v2i8->v2i64 extension.
-+define <2 x i64> @f8(<2 x i64> %val) {
-+; CHECK-LABEL: f8:
-+; CHECK: vgbm [[REG:%v[0-9]+]], 257
-+; CHECK: vn %v24, %v24, [[REG]]
-+; CHECK: br %r14
-+  %trunc = trunc <2 x i64> %val to <2 x i8>
-+  %ret = zext <2 x i8> %trunc to <2 x i64>
-+  ret <2 x i64> %ret
-+}
-+
-+; Test a v2i16->v2i64 extension.
-+define <2 x i64> @f9(<2 x i64> %val) {
-+; CHECK-LABEL: f9:
-+; CHECK: vgbm [[REG:%v[0-9]+]], 771
-+; CHECK: vn %v24, %v24, [[REG]]
-+; CHECK: br %r14
-+  %trunc = trunc <2 x i64> %val to <2 x i16>
-+  %ret = zext <2 x i16> %trunc to <2 x i64>
-+  ret <2 x i64> %ret
-+}
-+
-+; Test a v2i32->v2i64 extension.
-+define <2 x i64> @f10(<2 x i64> %val) {
-+; CHECK-LABEL: f10:
-+; CHECK: vgbm [[REG:%v[0-9]+]], 3855
-+; CHECK: vn %v24, %v24, [[REG]]
-+; CHECK: br %r14
-+  %trunc = trunc <2 x i64> %val to <2 x i32>
-+  %ret = zext <2 x i32> %trunc to <2 x i64>
-+  ret <2 x i64> %ret
-+}
-Index: llvm-36/test/CodeGen/SystemZ/vec-args-01.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-args-01.ll
-@@ -0,0 +1,48 @@
-+; Test the handling of named vector arguments.
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -check-prefix=CHECK-VEC
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -check-prefix=CHECK-STACK
-+
-+; This routine has 6 integer arguments, which fill up r2-r5 and
-+; the stack slot at offset 160, and 10 vector arguments, which
-+; fill up v24-v31 and the two double-wide stack slots at 168
-+; and 184.
-+declare void @bar(i64, i64, i64, i64, i64, i64,
-+                  <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>,
-+                  <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>,
-+                  <4 x i32>, <4 x i32>)
-+
-+define void @foo() {
-+; CHECK-VEC-LABEL: foo:
-+; CHECK-VEC-DAG: vrepif %v24, 1
-+; CHECK-VEC-DAG: vrepif %v26, 2
-+; CHECK-VEC-DAG: vrepif %v28, 3
-+; CHECK-VEC-DAG: vrepif %v30, 4
-+; CHECK-VEC-DAG: vrepif %v25, 5
-+; CHECK-VEC-DAG: vrepif %v27, 6
-+; CHECK-VEC-DAG: vrepif %v29, 7
-+; CHECK-VEC-DAG: vrepif %v31, 8
-+; CHECK-VEC: brasl %r14, bar@PLT
-+;
-+; CHECK-STACK-LABEL: foo:
-+; CHECK-STACK: aghi %r15, -200
-+; CHECK-STACK-DAG: mvghi 160(%r15), 6
-+; CHECK-STACK-DAG: vrepif [[REG1:%v[0-9]+]], 9
-+; CHECK-STACK-DAG: vst [[REG1]], 168(%r15)
-+; CHECK-STACK-DAG: vrepif [[REG2:%v[0-9]+]], 10
-+; CHECK-STACK-DAG: vst [[REG2]], 184(%r15)
-+; CHECK-STACK: brasl %r14, bar@PLT
-+
-+  call void @bar (i64 1, i64 2, i64 3, i64 4, i64 5, i64 6,
-+                  <4 x i32> <i32 1, i32 1, i32 1, i32 1>,
-+                  <4 x i32> <i32 2, i32 2, i32 2, i32 2>,
-+                  <4 x i32> <i32 3, i32 3, i32 3, i32 3>,
-+                  <4 x i32> <i32 4, i32 4, i32 4, i32 4>,
-+                  <4 x i32> <i32 5, i32 5, i32 5, i32 5>,
-+                  <4 x i32> <i32 6, i32 6, i32 6, i32 6>,
-+                  <4 x i32> <i32 7, i32 7, i32 7, i32 7>,
-+                  <4 x i32> <i32 8, i32 8, i32 8, i32 8>,
-+                  <4 x i32> <i32 9, i32 9, i32 9, i32 9>,
-+                  <4 x i32> <i32 10, i32 10, i32 10, i32 10>)
-+  ret void
-+}
-Index: llvm-36/test/CodeGen/SystemZ/vec-args-02.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-args-02.ll
-@@ -0,0 +1,31 @@
-+; Test the handling of unnamed vector arguments.
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -check-prefix=CHECK-VEC
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -check-prefix=CHECK-STACK
-+
-+; This routine is called with two named vector argument (passed
-+; in %v24 and %v26) and two unnamed vector arguments (passed
-+; in the double-wide stack slots at 160 and 176).
-+declare void @bar(<4 x i32>, <4 x i32>, ...)
-+
-+define void @foo() {
-+; CHECK-VEC-LABEL: foo:
-+; CHECK-VEC-DAG: vrepif %v24, 1
-+; CHECK-VEC-DAG: vrepif %v26, 2
-+; CHECK-VEC: brasl %r14, bar@PLT
-+;
-+; CHECK-STACK-LABEL: foo:
-+; CHECK-STACK: aghi %r15, -192
-+; CHECK-STACK-DAG: vrepif [[REG1:%v[0-9]+]], 3
-+; CHECK-STACK-DAG: vst [[REG1]], 160(%r15)
-+; CHECK-STACK-DAG: vrepif [[REG2:%v[0-9]+]], 4
-+; CHECK-STACK-DAG: vst [[REG2]], 176(%r15)
-+; CHECK-STACK: brasl %r14, bar@PLT
-+
-+  call void (<4 x i32>, <4 x i32>, ...)* @bar
-+              (<4 x i32> <i32 1, i32 1, i32 1, i32 1>,
-+               <4 x i32> <i32 2, i32 2, i32 2, i32 2>,
-+               <4 x i32> <i32 3, i32 3, i32 3, i32 3>,
-+               <4 x i32> <i32 4, i32 4, i32 4, i32 4>)
-+  ret void
-+}
-Index: llvm-36/test/CodeGen/SystemZ/vec-args-03.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-args-03.ll
-@@ -0,0 +1,30 @@
-+; Test the handling of incoming vector arguments.
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-+
-+; This routine has 10 vector arguments, which fill up %v24-%v31 and
-+; the two double-wide stack slots at 160 and 176.
-+define <4 x i32> @foo(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3, <4 x i32> %v4,
-+                      <4 x i32> %v5, <4 x i32> %v6, <4 x i32> %v7, <4 x i32> %v8,
-+                      <4 x i32> %v9, <4 x i32> %v10) {
-+; CHECK-LABEL: foo:
-+; CHECK: vl [[REG1:%v[0-9]+]], 176(%r15)
-+; CHECK: vsf %v24, %v26, [[REG1]]
-+; CHECK: br %r14
-+  %y = sub <4 x i32> %v2, %v10
-+  ret <4 x i32> %y
-+}
-+
-+; This routine has 10 vector arguments, which fill up %v24-%v31 and
-+; the two single-wide stack slots at 160 and 168.
-+define <4 x i8> @bar(<4 x i8> %v1, <4 x i8> %v2, <4 x i8> %v3, <4 x i8> %v4,
-+                     <4 x i8> %v5, <4 x i8> %v6, <4 x i8> %v7, <4 x i8> %v8,
-+                     <4 x i8> %v9, <4 x i8> %v10) {
-+; CHECK-LABEL: bar:
-+; CHECK: vlrepg [[REG1:%v[0-9]+]], 168(%r15)
-+; CHECK: vsb %v24, %v26, [[REG1]]
-+; CHECK: br %r14
-+  %y = sub <4 x i8> %v2, %v10
-+  ret <4 x i8> %y
-+}
-+
-Index: llvm-36/test/CodeGen/SystemZ/vec-args-04.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-args-04.ll
-@@ -0,0 +1,50 @@
-+; Test the handling of named short vector arguments.
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -check-prefix=CHECK-VEC
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -check-prefix=CHECK-STACK
-+
-+; This routine has 12 vector arguments, which fill up %v24-%v31
-+; and the four single-wide stack slots starting at 160.
-+declare void @bar(<1 x i8>, <2 x i8>, <4 x i8>, <8 x i8>,
-+                  <1 x i8>, <2 x i8>, <4 x i8>, <8 x i8>,
-+                  <1 x i8>, <2 x i8>, <4 x i8>, <8 x i8>)
-+
-+define void @foo() {
-+; CHECK-VEC-LABEL: foo:
-+; CHECK-VEC-DAG: vrepib %v24, 1
-+; CHECK-VEC-DAG: vrepib %v26, 2
-+; CHECK-VEC-DAG: vrepib %v28, 3
-+; CHECK-VEC-DAG: vrepib %v30, 4
-+; CHECK-VEC-DAG: vrepib %v25, 5
-+; CHECK-VEC-DAG: vrepib %v27, 6
-+; CHECK-VEC-DAG: vrepib %v29, 7
-+; CHECK-VEC-DAG: vrepib %v31, 8
-+; CHECK-VEC: brasl %r14, bar@PLT
-+;
-+; CHECK-STACK-LABEL: foo:
-+; CHECK-STACK: aghi %r15, -192
-+; CHECK-STACK-DAG: llihh [[REG1:%r[0-9]+]], 2304
-+; CHECK-STACK-DAG: stg [[REG1]], 160(%r15)
-+; CHECK-STACK-DAG: llihh [[REG2:%r[0-9]+]], 2570
-+; CHECK-STACK-DAG: stg [[REG2]], 168(%r15)
-+; CHECK-STACK-DAG: llihf [[REG3:%r[0-9]+]], 185273099
-+; CHECK-STACK-DAG: stg [[REG3]], 176(%r15)
-+; CHECK-STACK-DAG: llihf [[REG4:%r[0-9]+]], 202116108
-+; CHECK-STACK-DAG: oilf [[REG4]], 202116108
-+; CHECK-STACK-DAG: stg [[REG4]], 176(%r15)
-+; CHECK-STACK: brasl %r14, bar@PLT
-+
-+  call void @bar (<1 x i8> <i8 1>,
-+                  <2 x i8> <i8 2, i8 2>,
-+                  <4 x i8> <i8 3, i8 3, i8 3, i8 3>,
-+                  <8 x i8> <i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4, i8 4>,
-+                  <1 x i8> <i8 5>,
-+                  <2 x i8> <i8 6, i8 6>,
-+                  <4 x i8> <i8 7, i8 7, i8 7, i8 7>,
-+                  <8 x i8> <i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8, i8 8>,
-+                  <1 x i8> <i8 9>,
-+                  <2 x i8> <i8 10, i8 10>,
-+                  <4 x i8> <i8 11, i8 11, i8 11, i8 11>,
-+                  <8 x i8> <i8 12, i8 12, i8 12, i8 12, i8 12, i8 12, i8 12, i8 12>)
-+  ret void
-+}
-Index: llvm-36/test/CodeGen/SystemZ/vec-args-05.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-args-05.ll
-@@ -0,0 +1,32 @@
-+; Test the handling of unnamed short vector arguments.
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -check-prefix=CHECK-VEC
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -check-prefix=CHECK-STACK
-+
-+; This routine is called with two named vector argument (passed
-+; in %v24 and %v26) and two unnamed vector arguments (passed
-+; in the single-wide stack slots at 160 and 168).
-+declare void @bar(<4 x i8>, <4 x i8>, ...)
-+
-+define void @foo() {
-+; CHECK-VEC-LABEL: foo:
-+; CHECK-VEC-DAG: vrepib %v24, 1
-+; CHECK-VEC-DAG: vrepib %v26, 2
-+; CHECK-VEC: brasl %r14, bar@PLT
-+;
-+; CHECK-STACK-LABEL: foo:
-+; CHECK-STACK: aghi %r15, -176
-+; CHECK-STACK-DAG: llihf [[REG1:%r[0-9]+]], 50529027
-+; CHECK-STACK-DAG: stg [[REG1]], 160(%r15)
-+; CHECK-STACK-DAG: llihf [[REG2:%r[0-9]+]], 67372036
-+; CHECK-STACK-DAG: stg [[REG2]], 168(%r15)
-+; CHECK-STACK: brasl %r14, bar@PLT
-+
-+  call void (<4 x i8>, <4 x i8>, ...)* @bar
-+              (<4 x i8> <i8 1, i8 1, i8 1, i8 1>,
-+               <4 x i8> <i8 2, i8 2, i8 2, i8 2>,
-+               <4 x i8> <i8 3, i8 3, i8 3, i8 3>,
-+               <4 x i8> <i8 4, i8 4, i8 4, i8 4>)
-+  ret void
-+}
-+
-Index: llvm-36/test/CodeGen/SystemZ/vec-args-error-01.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-args-error-01.ll
-@@ -0,0 +1,9 @@
-+; Verify that we detect unsupported single-element vector types.
-+
-+; RUN: not llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 2>&1 | FileCheck %s
-+
-+define void @foo(<1 x i128>) {
-+  ret void
-+}
-+
-+; CHECK: LLVM ERROR: Unsupported vector argument or return type
-Index: llvm-36/test/CodeGen/SystemZ/vec-args-error-02.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-args-error-02.ll
-@@ -0,0 +1,9 @@
-+; Verify that we detect unsupported single-element vector types.
-+
-+; RUN: not llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 2>&1 | FileCheck %s
-+
-+define <1 x i128> @foo() {
-+  ret <1 x i128><i128 0>
-+}
-+
-+; CHECK: LLVM ERROR: Unsupported vector argument or return type
-Index: llvm-36/test/CodeGen/SystemZ/vec-args-error-03.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-args-error-03.ll
-@@ -0,0 +1,12 @@
-+; Verify that we detect unsupported single-element vector types.
-+
-+; RUN: not llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 2>&1 | FileCheck %s
-+
-+declare void @bar(<1 x i128>)
-+
-+define void @foo() {
-+  call void @bar (<1 x i128> <i128 0>)
-+  ret void
-+}
-+
-+; CHECK: LLVM ERROR: Unsupported vector argument or return type
-Index: llvm-36/test/CodeGen/SystemZ/vec-args-error-04.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-args-error-04.ll
-@@ -0,0 +1,12 @@
-+; Verify that we detect unsupported single-element vector types.
-+
-+; RUN: not llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 2>&1 | FileCheck %s
-+
-+declare <1 x i128> @bar()
-+
-+define void @foo() {
-+  %res = call <1 x i128> @bar ()
-+  ret void
-+}
-+
-+; CHECK: LLVM ERROR: Unsupported vector argument or return type
-Index: llvm-36/test/CodeGen/SystemZ/vec-args-error-05.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-args-error-05.ll
-@@ -0,0 +1,9 @@
-+; Verify that we detect unsupported single-element vector types.
-+
-+; RUN: not llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 2>&1 | FileCheck %s
-+
-+define void @foo(<1 x fp128>) {
-+  ret void
-+}
-+
-+; CHECK: LLVM ERROR: Unsupported vector argument or return type
-Index: llvm-36/test/CodeGen/SystemZ/vec-args-error-06.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-args-error-06.ll
-@@ -0,0 +1,9 @@
-+; Verify that we detect unsupported single-element vector types.
-+
-+; RUN: not llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 2>&1 | FileCheck %s
-+
-+define <1 x fp128> @foo() {
-+  ret <1 x fp128><fp128 0xL00000000000000000000000000000000>
-+}
-+
-+; CHECK: LLVM ERROR: Unsupported vector argument or return type
-Index: llvm-36/test/CodeGen/SystemZ/vec-args-error-07.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-args-error-07.ll
-@@ -0,0 +1,12 @@
-+; Verify that we detect unsupported single-element vector types.
-+
-+; RUN: not llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 2>&1 | FileCheck %s
-+
-+declare void @bar(<1 x fp128>)
-+
-+define void @foo() {
-+  call void @bar (<1 x fp128> <fp128 0xL00000000000000000000000000000000>)
-+  ret void
-+}
-+
-+; CHECK: LLVM ERROR: Unsupported vector argument or return type
-Index: llvm-36/test/CodeGen/SystemZ/vec-args-error-08.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-args-error-08.ll
-@@ -0,0 +1,12 @@
-+; Verify that we detect unsupported single-element vector types.
-+
-+; RUN: not llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 2>&1 | FileCheck %s
-+
-+declare <1 x fp128> @bar()
-+
-+define void @foo() {
-+  %res = call <1 x fp128> @bar ()
-+  ret void
-+}
-+
-+; CHECK: LLVM ERROR: Unsupported vector argument or return type
-Index: llvm-36/test/CodeGen/SystemZ/vec-cmp-01.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-cmp-01.ll
-@@ -0,0 +1,228 @@
-+; Test v16i8 comparisons.
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-+
-+; Test eq.
-+define <16 x i8> @f1(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) {
-+; CHECK-LABEL: f1:
-+; CHECK: vceqb %v24, %v26, %v28
-+; CHECK-NEXT: br %r14
-+  %cmp = icmp eq <16 x i8> %val1, %val2
-+  %ret = sext <16 x i1> %cmp to <16 x i8>
-+  ret <16 x i8> %ret
-+}
-+
-+; Test ne.
-+define <16 x i8> @f2(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) {
-+; CHECK-LABEL: f2:
-+; CHECK: vceqb [[REG:%v[0-9]+]], %v26, %v28
-+; CHECK-NEXT: vno %v24, [[REG]], [[REG]]
-+; CHECK-NEXT: br %r14
-+  %cmp = icmp ne <16 x i8> %val1, %val2
-+  %ret = sext <16 x i1> %cmp to <16 x i8>
-+  ret <16 x i8> %ret
-+}
-+
-+; Test sgt.
-+define <16 x i8> @f3(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) {
-+; CHECK-LABEL: f3:
-+; CHECK: vchb %v24, %v26, %v28
-+; CHECK-NEXT: br %r14
-+  %cmp = icmp sgt <16 x i8> %val1, %val2
-+  %ret = sext <16 x i1> %cmp to <16 x i8>
-+  ret <16 x i8> %ret
-+}
-+
-+; Test sge.
-+define <16 x i8> @f4(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) {
-+; CHECK-LABEL: f4:
-+; CHECK: vchb [[REG:%v[0-9]+]], %v28, %v26
-+; CHECK-NEXT: vno %v24, [[REG]], [[REG]]
-+; CHECK-NEXT: br %r14
-+  %cmp = icmp sge <16 x i8> %val1, %val2
-+  %ret = sext <16 x i1> %cmp to <16 x i8>
-+  ret <16 x i8> %ret
-+}
-+
-+; Test sle.
-+define <16 x i8> @f5(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) {
-+; CHECK-LABEL: f5:
-+; CHECK: vchb [[REG:%v[0-9]+]], %v26, %v28
-+; CHECK-NEXT: vno %v24, [[REG]], [[REG]]
-+; CHECK-NEXT: br %r14
-+  %cmp = icmp sle <16 x i8> %val1, %val2
-+  %ret = sext <16 x i1> %cmp to <16 x i8>
-+  ret <16 x i8> %ret
-+}
-+
-+; Test slt.
-+define <16 x i8> @f6(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) {
-+; CHECK-LABEL: f6:
-+; CHECK: vchb %v24, %v28, %v26
-+; CHECK-NEXT: br %r14
-+  %cmp = icmp slt <16 x i8> %val1, %val2
-+  %ret = sext <16 x i1> %cmp to <16 x i8>
-+  ret <16 x i8> %ret
-+}
-+
-+; Test ugt.
-+define <16 x i8> @f7(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) {
-+; CHECK-LABEL: f7:
-+; CHECK: vchlb %v24, %v26, %v28
-+; CHECK-NEXT: br %r14
-+  %cmp = icmp ugt <16 x i8> %val1, %val2
-+  %ret = sext <16 x i1> %cmp to <16 x i8>
-+  ret <16 x i8> %ret
-+}
-+
-+; Test uge.
-+define <16 x i8> @f8(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) {
-+; CHECK-LABEL: f8:
-+; CHECK: vchlb [[REG:%v[0-9]+]], %v28, %v26
-+; CHECK-NEXT: vno %v24, [[REG]], [[REG]]
-+; CHECK-NEXT: br %r14
-+  %cmp = icmp uge <16 x i8> %val1, %val2
-+  %ret = sext <16 x i1> %cmp to <16 x i8>
-+  ret <16 x i8> %ret
-+}
-+
-+; Test ule.
-+define <16 x i8> @f9(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) {
-+; CHECK-LABEL: f9:
-+; CHECK: vchlb [[REG:%v[0-9]+]], %v26, %v28
-+; CHECK-NEXT: vno %v24, [[REG]], [[REG]]
-+; CHECK-NEXT: br %r14
-+  %cmp = icmp ule <16 x i8> %val1, %val2
-+  %ret = sext <16 x i1> %cmp to <16 x i8>
-+  ret <16 x i8> %ret
-+}
-+
-+; Test ult.
-+define <16 x i8> @f10(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) {
-+; CHECK-LABEL: f10:
-+; CHECK: vchlb %v24, %v28, %v26
-+; CHECK-NEXT: br %r14
-+  %cmp = icmp ult <16 x i8> %val1, %val2
-+  %ret = sext <16 x i1> %cmp to <16 x i8>
-+  ret <16 x i8> %ret
-+}
-+
-+; Test eq selects.
-+define <16 x i8> @f11(<16 x i8> %val1, <16 x i8> %val2,
-+                      <16 x i8> %val3, <16 x i8> %val4) {
-+; CHECK-LABEL: f11:
-+; CHECK: vceqb [[REG:%v[0-9]+]], %v24, %v26
-+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
-+; CHECK-NEXT: br %r14
-+  %cmp = icmp eq <16 x i8> %val1, %val2
-+  %ret = select <16 x i1> %cmp, <16 x i8> %val3, <16 x i8> %val4
-+  ret <16 x i8> %ret
-+}
-+
-+; Test ne selects.
-+define <16 x i8> @f12(<16 x i8> %val1, <16 x i8> %val2,
-+                      <16 x i8> %val3, <16 x i8> %val4) {
-+; CHECK-LABEL: f12:
-+; CHECK: vceqb [[REG:%v[0-9]+]], %v24, %v26
-+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
-+; CHECK-NEXT: br %r14
-+  %cmp = icmp ne <16 x i8> %val1, %val2
-+  %ret = select <16 x i1> %cmp, <16 x i8> %val3, <16 x i8> %val4
-+  ret <16 x i8> %ret
-+}
-+
-+; Test sgt selects.
-+define <16 x i8> @f13(<16 x i8> %val1, <16 x i8> %val2,
-+                      <16 x i8> %val3, <16 x i8> %val4) {
-+; CHECK-LABEL: f13:
-+; CHECK: vchb [[REG:%v[0-9]+]], %v24, %v26
-+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
-+; CHECK-NEXT: br %r14
-+  %cmp = icmp sgt <16 x i8> %val1, %val2
-+  %ret = select <16 x i1> %cmp, <16 x i8> %val3, <16 x i8> %val4
-+  ret <16 x i8> %ret
-+}
-+
-+; Test sge selects.
-+define <16 x i8> @f14(<16 x i8> %val1, <16 x i8> %val2,
-+                      <16 x i8> %val3, <16 x i8> %val4) {
-+; CHECK-LABEL: f14:
-+; CHECK: vchb [[REG:%v[0-9]+]], %v26, %v24
-+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
-+; CHECK-NEXT: br %r14
-+  %cmp = icmp sge <16 x i8> %val1, %val2
-+  %ret = select <16 x i1> %cmp, <16 x i8> %val3, <16 x i8> %val4
-+  ret <16 x i8> %ret
-+}
-+
-+; Test sle selects.
-+define <16 x i8> @f15(<16 x i8> %val1, <16 x i8> %val2,
-+                      <16 x i8> %val3, <16 x i8> %val4) {
-+; CHECK-LABEL: f15:
-+; CHECK: vchb [[REG:%v[0-9]+]], %v24, %v26
-+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
-+; CHECK-NEXT: br %r14
-+  %cmp = icmp sle <16 x i8> %val1, %val2
-+  %ret = select <16 x i1> %cmp, <16 x i8> %val3, <16 x i8> %val4
-+  ret <16 x i8> %ret
-+}
-+
-+; Test slt selects.
-+define <16 x i8> @f16(<16 x i8> %val1, <16 x i8> %val2,
-+                      <16 x i8> %val3, <16 x i8> %val4) {
-+; CHECK-LABEL: f16:
-+; CHECK: vchb [[REG:%v[0-9]+]], %v26, %v24
-+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
-+; CHECK-NEXT: br %r14
-+  %cmp = icmp slt <16 x i8> %val1, %val2
-+  %ret = select <16 x i1> %cmp, <16 x i8> %val3, <16 x i8> %val4
-+  ret <16 x i8> %ret
-+}
-+
-+; Test ugt selects.
-+define <16 x i8> @f17(<16 x i8> %val1, <16 x i8> %val2,
-+                      <16 x i8> %val3, <16 x i8> %val4) {
-+; CHECK-LABEL: f17:
-+; CHECK: vchlb [[REG:%v[0-9]+]], %v24, %v26
-+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
-+; CHECK-NEXT: br %r14
-+  %cmp = icmp ugt <16 x i8> %val1, %val2
-+  %ret = select <16 x i1> %cmp, <16 x i8> %val3, <16 x i8> %val4
-+  ret <16 x i8> %ret
-+}
-+
-+; Test uge selects.
-+define <16 x i8> @f18(<16 x i8> %val1, <16 x i8> %val2,
-+                      <16 x i8> %val3, <16 x i8> %val4) {
-+; CHECK-LABEL: f18:
-+; CHECK: vchlb [[REG:%v[0-9]+]], %v26, %v24
-+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
-+; CHECK-NEXT: br %r14
-+  %cmp = icmp uge <16 x i8> %val1, %val2
-+  %ret = select <16 x i1> %cmp, <16 x i8> %val3, <16 x i8> %val4
-+  ret <16 x i8> %ret
-+}
-+
-+; Test ule selects.
-+define <16 x i8> @f19(<16 x i8> %val1, <16 x i8> %val2,
-+                      <16 x i8> %val3, <16 x i8> %val4) {
-+; CHECK-LABEL: f19:
-+; CHECK: vchlb [[REG:%v[0-9]+]], %v24, %v26
-+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
-+; CHECK-NEXT: br %r14
-+  %cmp = icmp ule <16 x i8> %val1, %val2
-+  %ret = select <16 x i1> %cmp, <16 x i8> %val3, <16 x i8> %val4
-+  ret <16 x i8> %ret
-+}
-+
-+; Test ult selects.
-+define <16 x i8> @f20(<16 x i8> %val1, <16 x i8> %val2,
-+                      <16 x i8> %val3, <16 x i8> %val4) {
-+; CHECK-LABEL: f20:
-+; CHECK: vchlb [[REG:%v[0-9]+]], %v26, %v24
-+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
-+; CHECK-NEXT: br %r14
-+  %cmp = icmp ult <16 x i8> %val1, %val2
-+  %ret = select <16 x i1> %cmp, <16 x i8> %val3, <16 x i8> %val4
-+  ret <16 x i8> %ret
-+}
-Index: llvm-36/test/CodeGen/SystemZ/vec-cmp-02.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-cmp-02.ll
-@@ -0,0 +1,228 @@
-+; Test v8i16 comparisons.
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-+
-+; Test eq.
-+define <8 x i16> @f1(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) {
-+; CHECK-LABEL: f1:
-+; CHECK: vceqh %v24, %v26, %v28
-+; CHECK-NEXT: br %r14
-+  %cmp = icmp eq <8 x i16> %val1, %val2
-+  %ret = sext <8 x i1> %cmp to <8 x i16>
-+  ret <8 x i16> %ret
-+}
-+
-+; Test ne.
-+define <8 x i16> @f2(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) {
-+; CHECK-LABEL: f2:
-+; CHECK: vceqh [[REG:%v[0-9]+]], %v26, %v28
-+; CHECK-NEXT: vno %v24, [[REG]], [[REG]]
-+; CHECK-NEXT: br %r14
-+  %cmp = icmp ne <8 x i16> %val1, %val2
-+  %ret = sext <8 x i1> %cmp to <8 x i16>
-+  ret <8 x i16> %ret
-+}
-+
-+; Test sgt.
-+define <8 x i16> @f3(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) {
-+; CHECK-LABEL: f3:
-+; CHECK: vchh %v24, %v26, %v28
-+; CHECK-NEXT: br %r14
-+  %cmp = icmp sgt <8 x i16> %val1, %val2
-+  %ret = sext <8 x i1> %cmp to <8 x i16>
-+  ret <8 x i16> %ret
-+}
-+
-+; Test sge.
-+define <8 x i16> @f4(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) {
-+; CHECK-LABEL: f4:
-+; CHECK: vchh [[REG:%v[0-9]+]], %v28, %v26
-+; CHECK-NEXT: vno %v24, [[REG]], [[REG]]
-+; CHECK-NEXT: br %r14
-+  %cmp = icmp sge <8 x i16> %val1, %val2
-+  %ret = sext <8 x i1> %cmp to <8 x i16>
-+  ret <8 x i16> %ret
-+}
-+
-+; Test sle.
-+define <8 x i16> @f5(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) {
-+; CHECK-LABEL: f5:
-+; CHECK: vchh [[REG:%v[0-9]+]], %v26, %v28
-+; CHECK-NEXT: vno %v24, [[REG]], [[REG]]
-+; CHECK-NEXT: br %r14
-+  %cmp = icmp sle <8 x i16> %val1, %val2
-+  %ret = sext <8 x i1> %cmp to <8 x i16>
-+  ret <8 x i16> %ret
-+}
-+
-+; Test slt.
-+define <8 x i16> @f6(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) {
-+; CHECK-LABEL: f6:
-+; CHECK: vchh %v24, %v28, %v26
-+; CHECK-NEXT: br %r14
-+  %cmp = icmp slt <8 x i16> %val1, %val2
-+  %ret = sext <8 x i1> %cmp to <8 x i16>
-+  ret <8 x i16> %ret
-+}
-+
-+; Test ugt.
-+define <8 x i16> @f7(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) {
-+; CHECK-LABEL: f7:
-+; CHECK: vchlh %v24, %v26, %v28
-+; CHECK-NEXT: br %r14
-+  %cmp = icmp ugt <8 x i16> %val1, %val2
-+  %ret = sext <8 x i1> %cmp to <8 x i16>
-+  ret <8 x i16> %ret
-+}
-+
-+; Test uge.
-+define <8 x i16> @f8(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) {
-+; CHECK-LABEL: f8:
-+; CHECK: vchlh [[REG:%v[0-9]+]], %v28, %v26
-+; CHECK-NEXT: vno %v24, [[REG]], [[REG]]
-+; CHECK-NEXT: br %r14
-+  %cmp = icmp uge <8 x i16> %val1, %val2
-+  %ret = sext <8 x i1> %cmp to <8 x i16>
-+  ret <8 x i16> %ret
-+}
-+
-+; Test ule.
-+define <8 x i16> @f9(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) {
-+; CHECK-LABEL: f9:
-+; CHECK: vchlh [[REG:%v[0-9]+]], %v26, %v28
-+; CHECK-NEXT: vno %v24, [[REG]], [[REG]]
-+; CHECK-NEXT: br %r14
-+  %cmp = icmp ule <8 x i16> %val1, %val2
-+  %ret = sext <8 x i1> %cmp to <8 x i16>
-+  ret <8 x i16> %ret
-+}
-+
-+; Test ult.
-+define <8 x i16> @f10(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) {
-+; CHECK-LABEL: f10:
-+; CHECK: vchlh %v24, %v28, %v26
-+; CHECK-NEXT: br %r14
-+  %cmp = icmp ult <8 x i16> %val1, %val2
-+  %ret = sext <8 x i1> %cmp to <8 x i16>
-+  ret <8 x i16> %ret
-+}
-+
-+; Test eq selects.
-+define <8 x i16> @f11(<8 x i16> %val1, <8 x i16> %val2,
-+                      <8 x i16> %val3, <8 x i16> %val4) {
-+; CHECK-LABEL: f11:
-+; CHECK: vceqh [[REG:%v[0-9]+]], %v24, %v26
-+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
-+; CHECK-NEXT: br %r14
-+  %cmp = icmp eq <8 x i16> %val1, %val2
-+  %ret = select <8 x i1> %cmp, <8 x i16> %val3, <8 x i16> %val4
-+  ret <8 x i16> %ret
-+}
-+
-+; Test ne selects.
-+define <8 x i16> @f12(<8 x i16> %val1, <8 x i16> %val2,
-+                      <8 x i16> %val3, <8 x i16> %val4) {
-+; CHECK-LABEL: f12:
-+; CHECK: vceqh [[REG:%v[0-9]+]], %v24, %v26
-+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
-+; CHECK-NEXT: br %r14
-+  %cmp = icmp ne <8 x i16> %val1, %val2
-+  %ret = select <8 x i1> %cmp, <8 x i16> %val3, <8 x i16> %val4
-+  ret <8 x i16> %ret
-+}
-+
-+; Test sgt selects.
-+define <8 x i16> @f13(<8 x i16> %val1, <8 x i16> %val2,
-+                      <8 x i16> %val3, <8 x i16> %val4) {
-+; CHECK-LABEL: f13:
-+; CHECK: vchh [[REG:%v[0-9]+]], %v24, %v26
-+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
-+; CHECK-NEXT: br %r14
-+  %cmp = icmp sgt <8 x i16> %val1, %val2
-+  %ret = select <8 x i1> %cmp, <8 x i16> %val3, <8 x i16> %val4
-+  ret <8 x i16> %ret
-+}
-+
-+; Test sge selects.
-+define <8 x i16> @f14(<8 x i16> %val1, <8 x i16> %val2,
-+                      <8 x i16> %val3, <8 x i16> %val4) {
-+; CHECK-LABEL: f14:
-+; CHECK: vchh [[REG:%v[0-9]+]], %v26, %v24
-+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
-+; CHECK-NEXT: br %r14
-+  %cmp = icmp sge <8 x i16> %val1, %val2
-+  %ret = select <8 x i1> %cmp, <8 x i16> %val3, <8 x i16> %val4
-+  ret <8 x i16> %ret
-+}
-+
-+; Test sle selects.
-+define <8 x i16> @f15(<8 x i16> %val1, <8 x i16> %val2,
-+                      <8 x i16> %val3, <8 x i16> %val4) {
-+; CHECK-LABEL: f15:
-+; CHECK: vchh [[REG:%v[0-9]+]], %v24, %v26
-+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
-+; CHECK-NEXT: br %r14
-+  %cmp = icmp sle <8 x i16> %val1, %val2
-+  %ret = select <8 x i1> %cmp, <8 x i16> %val3, <8 x i16> %val4
-+  ret <8 x i16> %ret
-+}
-+
-+; Test slt selects.
-+define <8 x i16> @f16(<8 x i16> %val1, <8 x i16> %val2,
-+                      <8 x i16> %val3, <8 x i16> %val4) {
-+; CHECK-LABEL: f16:
-+; CHECK: vchh [[REG:%v[0-9]+]], %v26, %v24
-+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
-+; CHECK-NEXT: br %r14
-+  %cmp = icmp slt <8 x i16> %val1, %val2
-+  %ret = select <8 x i1> %cmp, <8 x i16> %val3, <8 x i16> %val4
-+  ret <8 x i16> %ret
-+}
-+
-+; Test ugt selects.
-+define <8 x i16> @f17(<8 x i16> %val1, <8 x i16> %val2,
-+                      <8 x i16> %val3, <8 x i16> %val4) {
-+; CHECK-LABEL: f17:
-+; CHECK: vchlh [[REG:%v[0-9]+]], %v24, %v26
-+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
-+; CHECK-NEXT: br %r14
-+  %cmp = icmp ugt <8 x i16> %val1, %val2
-+  %ret = select <8 x i1> %cmp, <8 x i16> %val3, <8 x i16> %val4
-+  ret <8 x i16> %ret
-+}
-+
-+; Test uge selects.
-+define <8 x i16> @f18(<8 x i16> %val1, <8 x i16> %val2,
-+                      <8 x i16> %val3, <8 x i16> %val4) {
-+; CHECK-LABEL: f18:
-+; CHECK: vchlh [[REG:%v[0-9]+]], %v26, %v24
-+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
-+; CHECK-NEXT: br %r14
-+  %cmp = icmp uge <8 x i16> %val1, %val2
-+  %ret = select <8 x i1> %cmp, <8 x i16> %val3, <8 x i16> %val4
-+  ret <8 x i16> %ret
-+}
-+
-+; Test ule selects.
-+define <8 x i16> @f19(<8 x i16> %val1, <8 x i16> %val2,
-+                      <8 x i16> %val3, <8 x i16> %val4) {
-+; CHECK-LABEL: f19:
-+; CHECK: vchlh [[REG:%v[0-9]+]], %v24, %v26
-+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
-+; CHECK-NEXT: br %r14
-+  %cmp = icmp ule <8 x i16> %val1, %val2
-+  %ret = select <8 x i1> %cmp, <8 x i16> %val3, <8 x i16> %val4
-+  ret <8 x i16> %ret
-+}
-+
-+; Test ult selects.
-+define <8 x i16> @f20(<8 x i16> %val1, <8 x i16> %val2,
-+                      <8 x i16> %val3, <8 x i16> %val4) {
-+; CHECK-LABEL: f20:
-+; CHECK: vchlh [[REG:%v[0-9]+]], %v26, %v24
-+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
-+; CHECK-NEXT: br %r14
-+  %cmp = icmp ult <8 x i16> %val1, %val2
-+  %ret = select <8 x i1> %cmp, <8 x i16> %val3, <8 x i16> %val4
-+  ret <8 x i16> %ret
-+}
-Index: llvm-36/test/CodeGen/SystemZ/vec-cmp-03.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-cmp-03.ll
-@@ -0,0 +1,228 @@
-+; Test v4i32 comparisons.
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-+
-+; Test eq.
-+define <4 x i32> @f1(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) {
-+; CHECK-LABEL: f1:
-+; CHECK: vceqf %v24, %v26, %v28
-+; CHECK-NEXT: br %r14
-+  %cmp = icmp eq <4 x i32> %val1, %val2
-+  %ret = sext <4 x i1> %cmp to <4 x i32>
-+  ret <4 x i32> %ret
-+}
-+
-+; Test ne.
-+define <4 x i32> @f2(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) {
-+; CHECK-LABEL: f2:
-+; CHECK: vceqf [[REG:%v[0-9]+]], %v26, %v28
-+; CHECK-NEXT: vno %v24, [[REG]], [[REG]]
-+; CHECK-NEXT: br %r14
-+  %cmp = icmp ne <4 x i32> %val1, %val2
-+  %ret = sext <4 x i1> %cmp to <4 x i32>
-+  ret <4 x i32> %ret
-+}
-+
-+; Test sgt.
-+define <4 x i32> @f3(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) {
-+; CHECK-LABEL: f3:
-+; CHECK: vchf %v24, %v26, %v28
-+; CHECK-NEXT: br %r14
-+  %cmp = icmp sgt <4 x i32> %val1, %val2
-+  %ret = sext <4 x i1> %cmp to <4 x i32>
-+  ret <4 x i32> %ret
-+}
-+
-+; Test sge.
-+define <4 x i32> @f4(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) {
-+; CHECK-LABEL: f4:
-+; CHECK: vchf [[REG:%v[0-9]+]], %v28, %v26
-+; CHECK-NEXT: vno %v24, [[REG]], [[REG]]
-+; CHECK-NEXT: br %r14
-+  %cmp = icmp sge <4 x i32> %val1, %val2
-+  %ret = sext <4 x i1> %cmp to <4 x i32>
-+  ret <4 x i32> %ret
-+}
-+
-+; Test sle.
-+define <4 x i32> @f5(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) {
-+; CHECK-LABEL: f5:
-+; CHECK: vchf [[REG:%v[0-9]+]], %v26, %v28
-+; CHECK-NEXT: vno %v24, [[REG]], [[REG]]
-+; CHECK-NEXT: br %r14
-+  %cmp = icmp sle <4 x i32> %val1, %val2
-+  %ret = sext <4 x i1> %cmp to <4 x i32>
-+  ret <4 x i32> %ret
-+}
-+
-+; Test slt.
-+define <4 x i32> @f6(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) {
-+; CHECK-LABEL: f6:
-+; CHECK: vchf %v24, %v28, %v26
-+; CHECK-NEXT: br %r14
-+  %cmp = icmp slt <4 x i32> %val1, %val2
-+  %ret = sext <4 x i1> %cmp to <4 x i32>
-+  ret <4 x i32> %ret
-+}
-+
-+; Test ugt.
-+define <4 x i32> @f7(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) {
-+; CHECK-LABEL: f7:
-+; CHECK: vchlf %v24, %v26, %v28
-+; CHECK-NEXT: br %r14
-+  %cmp = icmp ugt <4 x i32> %val1, %val2
-+  %ret = sext <4 x i1> %cmp to <4 x i32>
-+  ret <4 x i32> %ret
-+}
-+
-+; Test uge.
-+define <4 x i32> @f8(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) {
-+; CHECK-LABEL: f8:
-+; CHECK: vchlf [[REG:%v[0-9]+]], %v28, %v26
-+; CHECK-NEXT: vno %v24, [[REG]], [[REG]]
-+; CHECK-NEXT: br %r14
-+  %cmp = icmp uge <4 x i32> %val1, %val2
-+  %ret = sext <4 x i1> %cmp to <4 x i32>
-+  ret <4 x i32> %ret
-+}
-+
-+; Test ule.
-+define <4 x i32> @f9(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) {
-+; CHECK-LABEL: f9:
-+; CHECK: vchlf [[REG:%v[0-9]+]], %v26, %v28
-+; CHECK-NEXT: vno %v24, [[REG]], [[REG]]
-+; CHECK-NEXT: br %r14
-+  %cmp = icmp ule <4 x i32> %val1, %val2
-+  %ret = sext <4 x i1> %cmp to <4 x i32>
-+  ret <4 x i32> %ret
-+}
-+
-+; Test ult.
-+define <4 x i32> @f10(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) {
-+; CHECK-LABEL: f10:
-+; CHECK: vchlf %v24, %v28, %v26
-+; CHECK-NEXT: br %r14
-+  %cmp = icmp ult <4 x i32> %val1, %val2
-+  %ret = sext <4 x i1> %cmp to <4 x i32>
-+  ret <4 x i32> %ret
-+}
-+
-+; Test eq selects.
-+define <4 x i32> @f11(<4 x i32> %val1, <4 x i32> %val2,
-+                      <4 x i32> %val3, <4 x i32> %val4) {
-+; CHECK-LABEL: f11:
-+; CHECK: vceqf [[REG:%v[0-9]+]], %v24, %v26
-+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
-+; CHECK-NEXT: br %r14
-+  %cmp = icmp eq <4 x i32> %val1, %val2
-+  %ret = select <4 x i1> %cmp, <4 x i32> %val3, <4 x i32> %val4
-+  ret <4 x i32> %ret
-+}
-+
-+; Test ne selects.
-+define <4 x i32> @f12(<4 x i32> %val1, <4 x i32> %val2,
-+                      <4 x i32> %val3, <4 x i32> %val4) {
-+; CHECK-LABEL: f12:
-+; CHECK: vceqf [[REG:%v[0-9]+]], %v24, %v26
-+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
-+; CHECK-NEXT: br %r14
-+  %cmp = icmp ne <4 x i32> %val1, %val2
-+  %ret = select <4 x i1> %cmp, <4 x i32> %val3, <4 x i32> %val4
-+  ret <4 x i32> %ret
-+}
-+
-+; Test sgt selects.
-+define <4 x i32> @f13(<4 x i32> %val1, <4 x i32> %val2,
-+                      <4 x i32> %val3, <4 x i32> %val4) {
-+; CHECK-LABEL: f13:
-+; CHECK: vchf [[REG:%v[0-9]+]], %v24, %v26
-+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
-+; CHECK-NEXT: br %r14
-+  %cmp = icmp sgt <4 x i32> %val1, %val2
-+  %ret = select <4 x i1> %cmp, <4 x i32> %val3, <4 x i32> %val4
-+  ret <4 x i32> %ret
-+}
-+
-+; Test sge selects.
-+define <4 x i32> @f14(<4 x i32> %val1, <4 x i32> %val2,
-+                      <4 x i32> %val3, <4 x i32> %val4) {
-+; CHECK-LABEL: f14:
-+; CHECK: vchf [[REG:%v[0-9]+]], %v26, %v24
-+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
-+; CHECK-NEXT: br %r14
-+  %cmp = icmp sge <4 x i32> %val1, %val2
-+  %ret = select <4 x i1> %cmp, <4 x i32> %val3, <4 x i32> %val4
-+  ret <4 x i32> %ret
-+}
-+
-+; Test sle selects.
-+define <4 x i32> @f15(<4 x i32> %val1, <4 x i32> %val2,
-+                      <4 x i32> %val3, <4 x i32> %val4) {
-+; CHECK-LABEL: f15:
-+; CHECK: vchf [[REG:%v[0-9]+]], %v24, %v26
-+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
-+; CHECK-NEXT: br %r14
-+  %cmp = icmp sle <4 x i32> %val1, %val2
-+  %ret = select <4 x i1> %cmp, <4 x i32> %val3, <4 x i32> %val4
-+  ret <4 x i32> %ret
-+}
-+
-+; Test slt selects.
-+define <4 x i32> @f16(<4 x i32> %val1, <4 x i32> %val2,
-+                      <4 x i32> %val3, <4 x i32> %val4) {
-+; CHECK-LABEL: f16:
-+; CHECK: vchf [[REG:%v[0-9]+]], %v26, %v24
-+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
-+; CHECK-NEXT: br %r14
-+  %cmp = icmp slt <4 x i32> %val1, %val2
-+  %ret = select <4 x i1> %cmp, <4 x i32> %val3, <4 x i32> %val4
-+  ret <4 x i32> %ret
-+}
-+
-+; Test ugt selects.
-+define <4 x i32> @f17(<4 x i32> %val1, <4 x i32> %val2,
-+                      <4 x i32> %val3, <4 x i32> %val4) {
-+; CHECK-LABEL: f17:
-+; CHECK: vchlf [[REG:%v[0-9]+]], %v24, %v26
-+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
-+; CHECK-NEXT: br %r14
-+  %cmp = icmp ugt <4 x i32> %val1, %val2
-+  %ret = select <4 x i1> %cmp, <4 x i32> %val3, <4 x i32> %val4
-+  ret <4 x i32> %ret
-+}
-+
-+; Test uge selects.
-+define <4 x i32> @f18(<4 x i32> %val1, <4 x i32> %val2,
-+                      <4 x i32> %val3, <4 x i32> %val4) {
-+; CHECK-LABEL: f18:
-+; CHECK: vchlf [[REG:%v[0-9]+]], %v26, %v24
-+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
-+; CHECK-NEXT: br %r14
-+  %cmp = icmp uge <4 x i32> %val1, %val2
-+  %ret = select <4 x i1> %cmp, <4 x i32> %val3, <4 x i32> %val4
-+  ret <4 x i32> %ret
-+}
-+
-+; Test ule selects.
-+define <4 x i32> @f19(<4 x i32> %val1, <4 x i32> %val2,
-+                      <4 x i32> %val3, <4 x i32> %val4) {
-+; CHECK-LABEL: f19:
-+; CHECK: vchlf [[REG:%v[0-9]+]], %v24, %v26
-+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
-+; CHECK-NEXT: br %r14
-+  %cmp = icmp ule <4 x i32> %val1, %val2
-+  %ret = select <4 x i1> %cmp, <4 x i32> %val3, <4 x i32> %val4
-+  ret <4 x i32> %ret
-+}
-+
-+; Test ult selects.
-+define <4 x i32> @f20(<4 x i32> %val1, <4 x i32> %val2,
-+                      <4 x i32> %val3, <4 x i32> %val4) {
-+; CHECK-LABEL: f20:
-+; CHECK: vchlf [[REG:%v[0-9]+]], %v26, %v24
-+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
-+; CHECK-NEXT: br %r14
-+  %cmp = icmp ult <4 x i32> %val1, %val2
-+  %ret = select <4 x i1> %cmp, <4 x i32> %val3, <4 x i32> %val4
-+  ret <4 x i32> %ret
-+}
-Index: llvm-36/test/CodeGen/SystemZ/vec-cmp-04.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-cmp-04.ll
-@@ -0,0 +1,228 @@
-+; Test v2i64 comparisons.
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-+
-+; Test eq.
-+define <2 x i64> @f1(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) {
-+; CHECK-LABEL: f1:
-+; CHECK: vceqg %v24, %v26, %v28
-+; CHECK-NEXT: br %r14
-+  %cmp = icmp eq <2 x i64> %val1, %val2
-+  %ret = sext <2 x i1> %cmp to <2 x i64>
-+  ret <2 x i64> %ret
-+}
-+
-+; Test ne.
-+define <2 x i64> @f2(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) {
-+; CHECK-LABEL: f2:
-+; CHECK: vceqg [[REG:%v[0-9]+]], %v26, %v28
-+; CHECK-NEXT: vno %v24, [[REG]], [[REG]]
-+; CHECK-NEXT: br %r14
-+  %cmp = icmp ne <2 x i64> %val1, %val2
-+  %ret = sext <2 x i1> %cmp to <2 x i64>
-+  ret <2 x i64> %ret
-+}
-+
-+; Test sgt.
-+define <2 x i64> @f3(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) {
-+; CHECK-LABEL: f3:
-+; CHECK: vchg %v24, %v26, %v28
-+; CHECK-NEXT: br %r14
-+  %cmp = icmp sgt <2 x i64> %val1, %val2
-+  %ret = sext <2 x i1> %cmp to <2 x i64>
-+  ret <2 x i64> %ret
-+}
-+
-+; Test sge.
-+define <2 x i64> @f4(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) {
-+; CHECK-LABEL: f4:
-+; CHECK: vchg [[REG:%v[0-9]+]], %v28, %v26
-+; CHECK-NEXT: vno %v24, [[REG]], [[REG]]
-+; CHECK-NEXT: br %r14
-+  %cmp = icmp sge <2 x i64> %val1, %val2
-+  %ret = sext <2 x i1> %cmp to <2 x i64>
-+  ret <2 x i64> %ret
-+}
-+
-+; Test sle.
-+define <2 x i64> @f5(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) {
-+; CHECK-LABEL: f5:
-+; CHECK: vchg [[REG:%v[0-9]+]], %v26, %v28
-+; CHECK-NEXT: vno %v24, [[REG]], [[REG]]
-+; CHECK-NEXT: br %r14
-+  %cmp = icmp sle <2 x i64> %val1, %val2
-+  %ret = sext <2 x i1> %cmp to <2 x i64>
-+  ret <2 x i64> %ret
-+}
-+
-+; Test slt.
-+define <2 x i64> @f6(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) {
-+; CHECK-LABEL: f6:
-+; CHECK: vchg %v24, %v28, %v26
-+; CHECK-NEXT: br %r14
-+  %cmp = icmp slt <2 x i64> %val1, %val2
-+  %ret = sext <2 x i1> %cmp to <2 x i64>
-+  ret <2 x i64> %ret
-+}
-+
-+; Test ugt.
-+define <2 x i64> @f7(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) {
-+; CHECK-LABEL: f7:
-+; CHECK: vchlg %v24, %v26, %v28
-+; CHECK-NEXT: br %r14
-+  %cmp = icmp ugt <2 x i64> %val1, %val2
-+  %ret = sext <2 x i1> %cmp to <2 x i64>
-+  ret <2 x i64> %ret
-+}
-+
-+; Test uge.
-+define <2 x i64> @f8(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) {
-+; CHECK-LABEL: f8:
-+; CHECK: vchlg [[REG:%v[0-9]+]], %v28, %v26
-+; CHECK-NEXT: vno %v24, [[REG]], [[REG]]
-+; CHECK-NEXT: br %r14
-+  %cmp = icmp uge <2 x i64> %val1, %val2
-+  %ret = sext <2 x i1> %cmp to <2 x i64>
-+  ret <2 x i64> %ret
-+}
-+
-+; Test ule.
-+define <2 x i64> @f9(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) {
-+; CHECK-LABEL: f9:
-+; CHECK: vchlg [[REG:%v[0-9]+]], %v26, %v28
-+; CHECK-NEXT: vno %v24, [[REG]], [[REG]]
-+; CHECK-NEXT: br %r14
-+  %cmp = icmp ule <2 x i64> %val1, %val2
-+  %ret = sext <2 x i1> %cmp to <2 x i64>
-+  ret <2 x i64> %ret
-+}
-+
-+; Test ult.
-+define <2 x i64> @f10(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) {
-+; CHECK-LABEL: f10:
-+; CHECK: vchlg %v24, %v28, %v26
-+; CHECK-NEXT: br %r14
-+  %cmp = icmp ult <2 x i64> %val1, %val2
-+  %ret = sext <2 x i1> %cmp to <2 x i64>
-+  ret <2 x i64> %ret
-+}
-+
-+; Test eq selects.
-+define <2 x i64> @f11(<2 x i64> %val1, <2 x i64> %val2,
-+                      <2 x i64> %val3, <2 x i64> %val4) {
-+; CHECK-LABEL: f11:
-+; CHECK: vceqg [[REG:%v[0-9]+]], %v24, %v26
-+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
-+; CHECK-NEXT: br %r14
-+  %cmp = icmp eq <2 x i64> %val1, %val2
-+  %ret = select <2 x i1> %cmp, <2 x i64> %val3, <2 x i64> %val4
-+  ret <2 x i64> %ret
-+}
-+
-+; Test ne selects.
-+define <2 x i64> @f12(<2 x i64> %val1, <2 x i64> %val2,
-+                      <2 x i64> %val3, <2 x i64> %val4) {
-+; CHECK-LABEL: f12:
-+; CHECK: vceqg [[REG:%v[0-9]+]], %v24, %v26
-+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
-+; CHECK-NEXT: br %r14
-+  %cmp = icmp ne <2 x i64> %val1, %val2
-+  %ret = select <2 x i1> %cmp, <2 x i64> %val3, <2 x i64> %val4
-+  ret <2 x i64> %ret
-+}
-+
-+; Test sgt selects.
-+define <2 x i64> @f13(<2 x i64> %val1, <2 x i64> %val2,
-+                      <2 x i64> %val3, <2 x i64> %val4) {
-+; CHECK-LABEL: f13:
-+; CHECK: vchg [[REG:%v[0-9]+]], %v24, %v26
-+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
-+; CHECK-NEXT: br %r14
-+  %cmp = icmp sgt <2 x i64> %val1, %val2
-+  %ret = select <2 x i1> %cmp, <2 x i64> %val3, <2 x i64> %val4
-+  ret <2 x i64> %ret
-+}
-+
-+; Test sge selects.
-+define <2 x i64> @f14(<2 x i64> %val1, <2 x i64> %val2,
-+                      <2 x i64> %val3, <2 x i64> %val4) {
-+; CHECK-LABEL: f14:
-+; CHECK: vchg [[REG:%v[0-9]+]], %v26, %v24
-+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
-+; CHECK-NEXT: br %r14
-+  %cmp = icmp sge <2 x i64> %val1, %val2
-+  %ret = select <2 x i1> %cmp, <2 x i64> %val3, <2 x i64> %val4
-+  ret <2 x i64> %ret
-+}
-+
-+; Test sle selects.
-+define <2 x i64> @f15(<2 x i64> %val1, <2 x i64> %val2,
-+                      <2 x i64> %val3, <2 x i64> %val4) {
-+; CHECK-LABEL: f15:
-+; CHECK: vchg [[REG:%v[0-9]+]], %v24, %v26
-+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
-+; CHECK-NEXT: br %r14
-+  %cmp = icmp sle <2 x i64> %val1, %val2
-+  %ret = select <2 x i1> %cmp, <2 x i64> %val3, <2 x i64> %val4
-+  ret <2 x i64> %ret
-+}
-+
-+; Test slt selects.
-+define <2 x i64> @f16(<2 x i64> %val1, <2 x i64> %val2,
-+                      <2 x i64> %val3, <2 x i64> %val4) {
-+; CHECK-LABEL: f16:
-+; CHECK: vchg [[REG:%v[0-9]+]], %v26, %v24
-+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
-+; CHECK-NEXT: br %r14
-+  %cmp = icmp slt <2 x i64> %val1, %val2
-+  %ret = select <2 x i1> %cmp, <2 x i64> %val3, <2 x i64> %val4
-+  ret <2 x i64> %ret
-+}
-+
-+; Test ugt selects.
-+define <2 x i64> @f17(<2 x i64> %val1, <2 x i64> %val2,
-+                      <2 x i64> %val3, <2 x i64> %val4) {
-+; CHECK-LABEL: f17:
-+; CHECK: vchlg [[REG:%v[0-9]+]], %v24, %v26
-+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
-+; CHECK-NEXT: br %r14
-+  %cmp = icmp ugt <2 x i64> %val1, %val2
-+  %ret = select <2 x i1> %cmp, <2 x i64> %val3, <2 x i64> %val4
-+  ret <2 x i64> %ret
-+}
-+
-+; Test uge selects.
-+define <2 x i64> @f18(<2 x i64> %val1, <2 x i64> %val2,
-+                      <2 x i64> %val3, <2 x i64> %val4) {
-+; CHECK-LABEL: f18:
-+; CHECK: vchlg [[REG:%v[0-9]+]], %v26, %v24
-+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
-+; CHECK-NEXT: br %r14
-+  %cmp = icmp uge <2 x i64> %val1, %val2
-+  %ret = select <2 x i1> %cmp, <2 x i64> %val3, <2 x i64> %val4
-+  ret <2 x i64> %ret
-+}
-+
-+; Test ule selects.
-+define <2 x i64> @f19(<2 x i64> %val1, <2 x i64> %val2,
-+                      <2 x i64> %val3, <2 x i64> %val4) {
-+; CHECK-LABEL: f19:
-+; CHECK: vchlg [[REG:%v[0-9]+]], %v24, %v26
-+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
-+; CHECK-NEXT: br %r14
-+  %cmp = icmp ule <2 x i64> %val1, %val2
-+  %ret = select <2 x i1> %cmp, <2 x i64> %val3, <2 x i64> %val4
-+  ret <2 x i64> %ret
-+}
-+
-+; Test ult selects.
-+define <2 x i64> @f20(<2 x i64> %val1, <2 x i64> %val2,
-+                      <2 x i64> %val3, <2 x i64> %val4) {
-+; CHECK-LABEL: f20:
-+; CHECK: vchlg [[REG:%v[0-9]+]], %v26, %v24
-+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
-+; CHECK-NEXT: br %r14
-+  %cmp = icmp ult <2 x i64> %val1, %val2
-+  %ret = select <2 x i1> %cmp, <2 x i64> %val3, <2 x i64> %val4
-+  ret <2 x i64> %ret
-+}
-Index: llvm-36/test/CodeGen/SystemZ/vec-cmp-05.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-cmp-05.ll
-@@ -0,0 +1,472 @@
-+; Test v4f32 comparisons.
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-+
-+; Test oeq.
-+define <4 x i32> @f1(<4 x float> %val1, <4 x float> %val2) {
-+; CHECK-LABEL: f1:
-+; CHECK-DAG: vmrhf [[HIGH0E:%v[0-9]+]], %v24, %v24
-+; CHECK-DAG: vmrlf [[LOW0E:%v[0-9]+]], %v24, %v24
-+; CHECK-DAG: vmrhf [[HIGH1E:%v[0-9]+]], %v26, %v26
-+; CHECK-DAG: vmrlf [[LOW1E:%v[0-9]+]], %v26, %v26
-+; CHECK-DAG: vldeb [[HIGH0D:%v[0-9]+]], [[HIGH0E]]
-+; CHECK-DAG: vldeb [[HIGH1D:%v[0-9]+]], [[HIGH1E]]
-+; CHECK-DAG: vldeb [[LOW0D:%v[0-9]+]], [[LOW0E]]
-+; CHECK-DAG: vldeb [[LOW1D:%v[0-9]+]], [[LOW1E]]
-+; CHECK-DAG: vfcedb [[HIGHRES:%v[0-9]+]], [[HIGH0D]], [[HIGH1D]]
-+; CHECK-DAG: vfcedb [[LOWRES:%v[0-9]+]], [[LOW0D]], [[LOW1D]]
-+; CHECK: vpkg %v24, [[HIGHRES]], [[LOWRES]]
-+; CHECK-NEXT: br %r14
-+  %cmp = fcmp oeq <4 x float> %val1, %val2
-+  %ret = sext <4 x i1> %cmp to <4 x i32>
-+  ret <4 x i32> %ret
-+}
-+
-+; Test one.
-+define <4 x i32> @f2(<4 x float> %val1, <4 x float> %val2) {
-+; CHECK-LABEL: f2:
-+; CHECK-DAG: vmrhf [[HIGH0E:%v[0-9]+]], %v24, %v24
-+; CHECK-DAG: vmrlf [[LOW0E:%v[0-9]+]], %v24, %v24
-+; CHECK-DAG: vmrhf [[HIGH1E:%v[0-9]+]], %v26, %v26
-+; CHECK-DAG: vmrlf [[LOW1E:%v[0-9]+]], %v26, %v26
-+; CHECK-DAG: vldeb [[HIGH0D:%v[0-9]+]], [[HIGH0E]]
-+; CHECK-DAG: vldeb [[HIGH1D:%v[0-9]+]], [[HIGH1E]]
-+; CHECK-DAG: vldeb [[LOW0D:%v[0-9]+]], [[LOW0E]]
-+; CHECK-DAG: vldeb [[LOW1D:%v[0-9]+]], [[LOW1E]]
-+; CHECK-DAG: vfchdb [[HIGHRES0:%v[0-9]+]], [[HIGH0D]], [[HIGH1D]]
-+; CHECK-DAG: vfchdb [[LOWRES0:%v[0-9]+]], [[LOW0D]], [[LOW1D]]
-+; CHECK-DAG: vfchdb [[HIGHRES1:%v[0-9]+]], [[HIGH1D]], [[HIGH0D]]
-+; CHECK-DAG: vfchdb [[LOWRES1:%v[0-9]+]], [[LOW1D]], [[LOW0D]]
-+; CHECK-DAG: vpkg [[RES0:%v[0-9]+]], [[HIGHRES0]], [[LOWRES0]]
-+; CHECK-DAG: vpkg [[RES1:%v[0-9]+]], [[HIGHRES1]], [[LOWRES1]]
-+; CHECK: vo %v24, [[RES1]], [[RES0]]
-+; CHECK-NEXT: br %r14
-+  %cmp = fcmp one <4 x float> %val1, %val2
-+  %ret = sext <4 x i1> %cmp to <4 x i32>
-+  ret <4 x i32> %ret
-+}
-+
-+; Test ogt.
-+define <4 x i32> @f3(<4 x float> %val1, <4 x float> %val2) {
-+; CHECK-LABEL: f3:
-+; CHECK-DAG: vmrhf [[HIGH0E:%v[0-9]+]], %v24, %v24
-+; CHECK-DAG: vmrlf [[LOW0E:%v[0-9]+]], %v24, %v24
-+; CHECK-DAG: vmrhf [[HIGH1E:%v[0-9]+]], %v26, %v26
-+; CHECK-DAG: vmrlf [[LOW1E:%v[0-9]+]], %v26, %v26
-+; CHECK-DAG: vldeb [[HIGH0D:%v[0-9]+]], [[HIGH0E]]
-+; CHECK-DAG: vldeb [[HIGH1D:%v[0-9]+]], [[HIGH1E]]
-+; CHECK-DAG: vldeb [[LOW0D:%v[0-9]+]], [[LOW0E]]
-+; CHECK-DAG: vldeb [[LOW1D:%v[0-9]+]], [[LOW1E]]
-+; CHECK-DAG: vfchdb [[HIGHRES:%v[0-9]+]], [[HIGH0D]], [[HIGH1D]]
-+; CHECK-DAG: vfchdb [[LOWRES:%v[0-9]+]], [[LOW0D]], [[LOW1D]]
-+; CHECK: vpkg %v24, [[HIGHRES]], [[LOWRES]]
-+; CHECK-NEXT: br %r14
-+  %cmp = fcmp ogt <4 x float> %val1, %val2
-+  %ret = sext <4 x i1> %cmp to <4 x i32>
-+  ret <4 x i32> %ret
-+}
-+
-+; Test oge.
-+define <4 x i32> @f4(<4 x float> %val1, <4 x float> %val2) {
-+; CHECK-LABEL: f4:
-+; CHECK-DAG: vmrhf [[HIGH0E:%v[0-9]+]], %v24, %v24
-+; CHECK-DAG: vmrlf [[LOW0E:%v[0-9]+]], %v24, %v24
-+; CHECK-DAG: vmrhf [[HIGH1E:%v[0-9]+]], %v26, %v26
-+; CHECK-DAG: vmrlf [[LOW1E:%v[0-9]+]], %v26, %v26
-+; CHECK-DAG: vldeb [[HIGH0D:%v[0-9]+]], [[HIGH0E]]
-+; CHECK-DAG: vldeb [[HIGH1D:%v[0-9]+]], [[HIGH1E]]
-+; CHECK-DAG: vldeb [[LOW0D:%v[0-9]+]], [[LOW0E]]
-+; CHECK-DAG: vldeb [[LOW1D:%v[0-9]+]], [[LOW1E]]
-+; CHECK-DAG: vfchedb [[HIGHRES:%v[0-9]+]], [[HIGH0D]], [[HIGH1D]]
-+; CHECK-DAG: vfchedb [[LOWRES:%v[0-9]+]], [[LOW0D]], [[LOW1D]]
-+; CHECK: vpkg %v24, [[HIGHRES]], [[LOWRES]]
-+; CHECK-NEXT: br %r14
-+  %cmp = fcmp oge <4 x float> %val1, %val2
-+  %ret = sext <4 x i1> %cmp to <4 x i32>
-+  ret <4 x i32> %ret
-+}
-+
-+; Test ole.
-+define <4 x i32> @f5(<4 x float> %val1, <4 x float> %val2) {
-+; CHECK-LABEL: f5:
-+; CHECK-DAG: vmrhf [[HIGH0E:%v[0-9]+]], %v24, %v24
-+; CHECK-DAG: vmrlf [[LOW0E:%v[0-9]+]], %v24, %v24
-+; CHECK-DAG: vmrhf [[HIGH1E:%v[0-9]+]], %v26, %v26
-+; CHECK-DAG: vmrlf [[LOW1E:%v[0-9]+]], %v26, %v26
-+; CHECK-DAG: vldeb [[HIGH0D:%v[0-9]+]], [[HIGH0E]]
-+; CHECK-DAG: vldeb [[HIGH1D:%v[0-9]+]], [[HIGH1E]]
-+; CHECK-DAG: vldeb [[LOW0D:%v[0-9]+]], [[LOW0E]]
-+; CHECK-DAG: vldeb [[LOW1D:%v[0-9]+]], [[LOW1E]]
-+; CHECK-DAG: vfchedb [[HIGHRES:%v[0-9]+]], [[HIGH1D]], [[HIGH0D]]
-+; CHECK-DAG: vfchedb [[LOWRES:%v[0-9]+]], [[LOW1D]], [[LOW0D]]
-+; CHECK: vpkg %v24, [[HIGHRES]], [[LOWRES]]
-+; CHECK-NEXT: br %r14
-+  %cmp = fcmp ole <4 x float> %val1, %val2
-+  %ret = sext <4 x i1> %cmp to <4 x i32>
-+  ret <4 x i32> %ret
-+}
-+
-+; Test olt.
-+define <4 x i32> @f6(<4 x float> %val1, <4 x float> %val2) {
-+; CHECK-LABEL: f6:
-+; CHECK-DAG: vmrhf [[HIGH0E:%v[0-9]+]], %v24, %v24
-+; CHECK-DAG: vmrlf [[LOW0E:%v[0-9]+]], %v24, %v24
-+; CHECK-DAG: vmrhf [[HIGH1E:%v[0-9]+]], %v26, %v26
-+; CHECK-DAG: vmrlf [[LOW1E:%v[0-9]+]], %v26, %v26
-+; CHECK-DAG: vldeb [[HIGH0D:%v[0-9]+]], [[HIGH0E]]
-+; CHECK-DAG: vldeb [[HIGH1D:%v[0-9]+]], [[HIGH1E]]
-+; CHECK-DAG: vldeb [[LOW0D:%v[0-9]+]], [[LOW0E]]
-+; CHECK-DAG: vldeb [[LOW1D:%v[0-9]+]], [[LOW1E]]
-+; CHECK-DAG: vfchdb [[HIGHRES:%v[0-9]+]], [[HIGH1D]], [[HIGH0D]]
-+; CHECK-DAG: vfchdb [[LOWRES:%v[0-9]+]], [[LOW1D]], [[LOW0D]]
-+; CHECK: vpkg %v24, [[HIGHRES]], [[LOWRES]]
-+; CHECK-NEXT: br %r14
-+  %cmp = fcmp olt <4 x float> %val1, %val2
-+  %ret = sext <4 x i1> %cmp to <4 x i32>
-+  ret <4 x i32> %ret
-+}
-+
-+; Test ueq.
-+define <4 x i32> @f7(<4 x float> %val1, <4 x float> %val2) {
-+; CHECK-LABEL: f7:
-+; CHECK-DAG: vmrhf [[HIGH0E:%v[0-9]+]], %v24, %v24
-+; CHECK-DAG: vmrlf [[LOW0E:%v[0-9]+]], %v24, %v24
-+; CHECK-DAG: vmrhf [[HIGH1E:%v[0-9]+]], %v26, %v26
-+; CHECK-DAG: vmrlf [[LOW1E:%v[0-9]+]], %v26, %v26
-+; CHECK-DAG: vldeb [[HIGH0D:%v[0-9]+]], [[HIGH0E]]
-+; CHECK-DAG: vldeb [[HIGH1D:%v[0-9]+]], [[HIGH1E]]
-+; CHECK-DAG: vldeb [[LOW0D:%v[0-9]+]], [[LOW0E]]
-+; CHECK-DAG: vldeb [[LOW1D:%v[0-9]+]], [[LOW1E]]
-+; CHECK-DAG: vfchdb [[HIGHRES0:%v[0-9]+]], [[HIGH0D]], [[HIGH1D]]
-+; CHECK-DAG: vfchdb [[LOWRES0:%v[0-9]+]], [[LOW0D]], [[LOW1D]]
-+; CHECK-DAG: vfchdb [[HIGHRES1:%v[0-9]+]], [[HIGH1D]], [[HIGH0D]]
-+; CHECK-DAG: vfchdb [[LOWRES1:%v[0-9]+]], [[LOW1D]], [[LOW0D]]
-+; CHECK-DAG: vpkg [[RES0:%v[0-9]+]], [[HIGHRES0]], [[LOWRES0]]
-+; CHECK-DAG: vpkg [[RES1:%v[0-9]+]], [[HIGHRES1]], [[LOWRES1]]
-+; CHECK: vno %v24, [[RES1]], [[RES0]]
-+; CHECK-NEXT: br %r14
-+  %cmp = fcmp ueq <4 x float> %val1, %val2
-+  %ret = sext <4 x i1> %cmp to <4 x i32>
-+  ret <4 x i32> %ret
-+}
-+
-+; Test une.
-+define <4 x i32> @f8(<4 x float> %val1, <4 x float> %val2) {
-+; CHECK-LABEL: f8:
-+; CHECK-DAG: vmrhf [[HIGH0E:%v[0-9]+]], %v24, %v24
-+; CHECK-DAG: vmrlf [[LOW0E:%v[0-9]+]], %v24, %v24
-+; CHECK-DAG: vmrhf [[HIGH1E:%v[0-9]+]], %v26, %v26
-+; CHECK-DAG: vmrlf [[LOW1E:%v[0-9]+]], %v26, %v26
-+; CHECK-DAG: vldeb [[HIGH0D:%v[0-9]+]], [[HIGH0E]]
-+; CHECK-DAG: vldeb [[HIGH1D:%v[0-9]+]], [[HIGH1E]]
-+; CHECK-DAG: vldeb [[LOW0D:%v[0-9]+]], [[LOW0E]]
-+; CHECK-DAG: vldeb [[LOW1D:%v[0-9]+]], [[LOW1E]]
-+; CHECK-DAG: vfcedb [[HIGHRES:%v[0-9]+]], [[HIGH0D]], [[HIGH1D]]
-+; CHECK-DAG: vfcedb [[LOWRES:%v[0-9]+]], [[LOW0D]], [[LOW1D]]
-+; CHECK: vpkg [[RES:%v[0-9]+]], [[HIGHRES]], [[LOWRES]]
-+; CHECK-NEXT: vno %v24, [[RES]], [[RES]]
-+; CHECK-NEXT: br %r14
-+  %cmp = fcmp une <4 x float> %val1, %val2
-+  %ret = sext <4 x i1> %cmp to <4 x i32>
-+  ret <4 x i32> %ret
-+}
-+
-+; Test ugt.
-+define <4 x i32> @f9(<4 x float> %val1, <4 x float> %val2) {
-+; CHECK-LABEL: f9:
-+; CHECK-DAG: vmrhf [[HIGH0E:%v[0-9]+]], %v24, %v24
-+; CHECK-DAG: vmrlf [[LOW0E:%v[0-9]+]], %v24, %v24
-+; CHECK-DAG: vmrhf [[HIGH1E:%v[0-9]+]], %v26, %v26
-+; CHECK-DAG: vmrlf [[LOW1E:%v[0-9]+]], %v26, %v26
-+; CHECK-DAG: vldeb [[HIGH0D:%v[0-9]+]], [[HIGH0E]]
-+; CHECK-DAG: vldeb [[HIGH1D:%v[0-9]+]], [[HIGH1E]]
-+; CHECK-DAG: vldeb [[LOW0D:%v[0-9]+]], [[LOW0E]]
-+; CHECK-DAG: vldeb [[LOW1D:%v[0-9]+]], [[LOW1E]]
-+; CHECK-DAG: vfchedb [[HIGHRES:%v[0-9]+]], [[HIGH1D]], [[HIGH0D]]
-+; CHECK-DAG: vfchedb [[LOWRES:%v[0-9]+]], [[LOW1D]], [[LOW0D]]
-+; CHECK: vpkg [[RES:%v[0-9]+]], [[HIGHRES]], [[LOWRES]]
-+; CHECK-NEXT: vno %v24, [[RES]], [[RES]]
-+; CHECK-NEXT: br %r14
-+  %cmp = fcmp ugt <4 x float> %val1, %val2
-+  %ret = sext <4 x i1> %cmp to <4 x i32>
-+  ret <4 x i32> %ret
-+}
-+
-+; Test uge.
-+define <4 x i32> @f10(<4 x float> %val1, <4 x float> %val2) {
-+; CHECK-LABEL: f10:
-+; CHECK-DAG: vmrhf [[HIGH0E:%v[0-9]+]], %v24, %v24
-+; CHECK-DAG: vmrlf [[LOW0E:%v[0-9]+]], %v24, %v24
-+; CHECK-DAG: vmrhf [[HIGH1E:%v[0-9]+]], %v26, %v26
-+; CHECK-DAG: vmrlf [[LOW1E:%v[0-9]+]], %v26, %v26
-+; CHECK-DAG: vldeb [[HIGH0D:%v[0-9]+]], [[HIGH0E]]
-+; CHECK-DAG: vldeb [[HIGH1D:%v[0-9]+]], [[HIGH1E]]
-+; CHECK-DAG: vldeb [[LOW0D:%v[0-9]+]], [[LOW0E]]
-+; CHECK-DAG: vldeb [[LOW1D:%v[0-9]+]], [[LOW1E]]
-+; CHECK-DAG: vfchdb [[HIGHRES:%v[0-9]+]], [[HIGH1D]], [[HIGH0D]]
-+; CHECK-DAG: vfchdb [[LOWRES:%v[0-9]+]], [[LOW1D]], [[LOW0D]]
-+; CHECK: vpkg [[RES:%v[0-9]+]], [[HIGHRES]], [[LOWRES]]
-+; CHECK-NEXT: vno %v24, [[RES]], [[RES]]
-+; CHECK-NEXT: br %r14
-+  %cmp = fcmp uge <4 x float> %val1, %val2
-+  %ret = sext <4 x i1> %cmp to <4 x i32>
-+  ret <4 x i32> %ret
-+}
-+
-+; Test ule.
-+define <4 x i32> @f11(<4 x float> %val1, <4 x float> %val2) {
-+; CHECK-LABEL: f11:
-+; CHECK-DAG: vmrhf [[HIGH0E:%v[0-9]+]], %v24, %v24
-+; CHECK-DAG: vmrlf [[LOW0E:%v[0-9]+]], %v24, %v24
-+; CHECK-DAG: vmrhf [[HIGH1E:%v[0-9]+]], %v26, %v26
-+; CHECK-DAG: vmrlf [[LOW1E:%v[0-9]+]], %v26, %v26
-+; CHECK-DAG: vldeb [[HIGH0D:%v[0-9]+]], [[HIGH0E]]
-+; CHECK-DAG: vldeb [[HIGH1D:%v[0-9]+]], [[HIGH1E]]
-+; CHECK-DAG: vldeb [[LOW0D:%v[0-9]+]], [[LOW0E]]
-+; CHECK-DAG: vldeb [[LOW1D:%v[0-9]+]], [[LOW1E]]
-+; CHECK-DAG: vfchdb [[HIGHRES:%v[0-9]+]], [[HIGH0D]], [[HIGH1D]]
-+; CHECK-DAG: vfchdb [[LOWRES:%v[0-9]+]], [[LOW0D]], [[LOW1D]]
-+; CHECK: vpkg [[RES:%v[0-9]+]], [[HIGHRES]], [[LOWRES]]
-+; CHECK-NEXT: vno %v24, [[RES]], [[RES]]
-+; CHECK-NEXT: br %r14
-+  %cmp = fcmp ule <4 x float> %val1, %val2
-+  %ret = sext <4 x i1> %cmp to <4 x i32>
-+  ret <4 x i32> %ret
-+}
-+
-+; Test ult.
-+define <4 x i32> @f12(<4 x float> %val1, <4 x float> %val2) {
-+; CHECK-LABEL: f12:
-+; CHECK-DAG: vmrhf [[HIGH0E:%v[0-9]+]], %v24, %v24
-+; CHECK-DAG: vmrlf [[LOW0E:%v[0-9]+]], %v24, %v24
-+; CHECK-DAG: vmrhf [[HIGH1E:%v[0-9]+]], %v26, %v26
-+; CHECK-DAG: vmrlf [[LOW1E:%v[0-9]+]], %v26, %v26
-+; CHECK-DAG: vldeb [[HIGH0D:%v[0-9]+]], [[HIGH0E]]
-+; CHECK-DAG: vldeb [[HIGH1D:%v[0-9]+]], [[HIGH1E]]
-+; CHECK-DAG: vldeb [[LOW0D:%v[0-9]+]], [[LOW0E]]
-+; CHECK-DAG: vldeb [[LOW1D:%v[0-9]+]], [[LOW1E]]
-+; CHECK-DAG: vfchedb [[HIGHRES:%v[0-9]+]], [[HIGH0D]], [[HIGH1D]]
-+; CHECK-DAG: vfchedb [[LOWRES:%v[0-9]+]], [[LOW0D]], [[LOW1D]]
-+; CHECK: vpkg [[RES:%v[0-9]+]], [[HIGHRES]], [[LOWRES]]
-+; CHECK-NEXT: vno %v24, [[RES]], [[RES]]
-+; CHECK-NEXT: br %r14
-+  %cmp = fcmp ult <4 x float> %val1, %val2
-+  %ret = sext <4 x i1> %cmp to <4 x i32>
-+  ret <4 x i32> %ret
-+}
-+
-+; Test ord.
-+define <4 x i32> @f13(<4 x float> %val1, <4 x float> %val2) {
-+; CHECK-LABEL: f13:
-+; CHECK-DAG: vmrhf [[HIGH0E:%v[0-9]+]], %v24, %v24
-+; CHECK-DAG: vmrlf [[LOW0E:%v[0-9]+]], %v24, %v24
-+; CHECK-DAG: vmrhf [[HIGH1E:%v[0-9]+]], %v26, %v26
-+; CHECK-DAG: vmrlf [[LOW1E:%v[0-9]+]], %v26, %v26
-+; CHECK-DAG: vldeb [[HIGH0D:%v[0-9]+]], [[HIGH0E]]
-+; CHECK-DAG: vldeb [[HIGH1D:%v[0-9]+]], [[HIGH1E]]
-+; CHECK-DAG: vldeb [[LOW0D:%v[0-9]+]], [[LOW0E]]
-+; CHECK-DAG: vldeb [[LOW1D:%v[0-9]+]], [[LOW1E]]
-+; CHECK-DAG: vfchedb [[HIGHRES0:%v[0-9]+]], [[HIGH0D]], [[HIGH1D]]
-+; CHECK-DAG: vfchedb [[LOWRES0:%v[0-9]+]], [[LOW0D]], [[LOW1D]]
-+; CHECK-DAG: vfchdb [[HIGHRES1:%v[0-9]+]], [[HIGH1D]], [[HIGH0D]]
-+; CHECK-DAG: vfchdb [[LOWRES1:%v[0-9]+]], [[LOW1D]], [[LOW0D]]
-+; CHECK-DAG: vpkg [[RES0:%v[0-9]+]], [[HIGHRES0]], [[LOWRES0]]
-+; CHECK-DAG: vpkg [[RES1:%v[0-9]+]], [[HIGHRES1]], [[LOWRES1]]
-+; CHECK: vo %v24, [[RES1]], [[RES0]]
-+; CHECK-NEXT: br %r14
-+  %cmp = fcmp ord <4 x float> %val1, %val2
-+  %ret = sext <4 x i1> %cmp to <4 x i32>
-+  ret <4 x i32> %ret
-+}
-+
-+; Test uno.
-+define <4 x i32> @f14(<4 x float> %val1, <4 x float> %val2) {
-+; CHECK-LABEL: f14:
-+; CHECK-DAG: vmrhf [[HIGH0E:%v[0-9]+]], %v24, %v24
-+; CHECK-DAG: vmrlf [[LOW0E:%v[0-9]+]], %v24, %v24
-+; CHECK-DAG: vmrhf [[HIGH1E:%v[0-9]+]], %v26, %v26
-+; CHECK-DAG: vmrlf [[LOW1E:%v[0-9]+]], %v26, %v26
-+; CHECK-DAG: vldeb [[HIGH0D:%v[0-9]+]], [[HIGH0E]]
-+; CHECK-DAG: vldeb [[HIGH1D:%v[0-9]+]], [[HIGH1E]]
-+; CHECK-DAG: vldeb [[LOW0D:%v[0-9]+]], [[LOW0E]]
-+; CHECK-DAG: vldeb [[LOW1D:%v[0-9]+]], [[LOW1E]]
-+; CHECK-DAG: vfchedb [[HIGHRES0:%v[0-9]+]], [[HIGH0D]], [[HIGH1D]]
-+; CHECK-DAG: vfchedb [[LOWRES0:%v[0-9]+]], [[LOW0D]], [[LOW1D]]
-+; CHECK-DAG: vfchdb [[HIGHRES1:%v[0-9]+]], [[HIGH1D]], [[HIGH0D]]
-+; CHECK-DAG: vfchdb [[LOWRES1:%v[0-9]+]], [[LOW1D]], [[LOW0D]]
-+; CHECK-DAG: vpkg [[RES0:%v[0-9]+]], [[HIGHRES0]], [[LOWRES0]]
-+; CHECK-DAG: vpkg [[RES1:%v[0-9]+]], [[HIGHRES1]], [[LOWRES1]]
-+; CHECK: vno %v24, [[RES1]], [[RES0]]
-+; CHECK-NEXT: br %r14
-+  %cmp = fcmp uno <4 x float> %val1, %val2
-+  %ret = sext <4 x i1> %cmp to <4 x i32>
-+  ret <4 x i32> %ret
-+}
-+
-+; Test oeq selects.
-+define <4 x float> @f15(<4 x float> %val1, <4 x float> %val2,
-+                        <4 x float> %val3, <4 x float> %val4) {
-+; CHECK-LABEL: f15:
-+; CHECK: vpkg [[REG:%v[0-9]+]],
-+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
-+; CHECK-NEXT: br %r14
-+  %cmp = fcmp oeq <4 x float> %val1, %val2
-+  %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4
-+  ret <4 x float> %ret
-+}
-+
-+; Test one selects.
-+define <4 x float> @f16(<4 x float> %val1, <4 x float> %val2,
-+                        <4 x float> %val3, <4 x float> %val4) {
-+; CHECK-LABEL: f16:
-+; CHECK: vo [[REG:%v[0-9]+]],
-+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
-+; CHECK-NEXT: br %r14
-+  %cmp = fcmp one <4 x float> %val1, %val2
-+  %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4
-+  ret <4 x float> %ret
-+}
-+
-+; Test ogt selects.
-+define <4 x float> @f17(<4 x float> %val1, <4 x float> %val2,
-+                        <4 x float> %val3, <4 x float> %val4) {
-+; CHECK-LABEL: f17:
-+; CHECK: vpkg [[REG:%v[0-9]+]],
-+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
-+; CHECK-NEXT: br %r14
-+  %cmp = fcmp ogt <4 x float> %val1, %val2
-+  %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4
-+  ret <4 x float> %ret
-+}
-+
-+; Test oge selects.
-+define <4 x float> @f18(<4 x float> %val1, <4 x float> %val2,
-+                        <4 x float> %val3, <4 x float> %val4) {
-+; CHECK-LABEL: f18:
-+; CHECK: vpkg [[REG:%v[0-9]+]],
-+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
-+; CHECK-NEXT: br %r14
-+  %cmp = fcmp oge <4 x float> %val1, %val2
-+  %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4
-+  ret <4 x float> %ret
-+}
-+
-+; Test ole selects.
-+define <4 x float> @f19(<4 x float> %val1, <4 x float> %val2,
-+                        <4 x float> %val3, <4 x float> %val4) {
-+; CHECK-LABEL: f19:
-+; CHECK: vpkg [[REG:%v[0-9]+]],
-+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
-+; CHECK-NEXT: br %r14
-+  %cmp = fcmp ole <4 x float> %val1, %val2
-+  %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4
-+  ret <4 x float> %ret
-+}
-+
-+; Test olt selects.
-+define <4 x float> @f20(<4 x float> %val1, <4 x float> %val2,
-+                        <4 x float> %val3, <4 x float> %val4) {
-+; CHECK-LABEL: f20:
-+; CHECK: vpkg [[REG:%v[0-9]+]],
-+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
-+; CHECK-NEXT: br %r14
-+  %cmp = fcmp olt <4 x float> %val1, %val2
-+  %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4
-+  ret <4 x float> %ret
-+}
-+
-+; Test ueq selects.
-+define <4 x float> @f21(<4 x float> %val1, <4 x float> %val2,
-+                        <4 x float> %val3, <4 x float> %val4) {
-+; CHECK-LABEL: f21:
-+; CHECK: vo [[REG:%v[0-9]+]],
-+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
-+; CHECK-NEXT: br %r14
-+  %cmp = fcmp ueq <4 x float> %val1, %val2
-+  %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4
-+  ret <4 x float> %ret
-+}
-+
-+; Test une selects.
-+define <4 x float> @f22(<4 x float> %val1, <4 x float> %val2,
-+                        <4 x float> %val3, <4 x float> %val4) {
-+; CHECK-LABEL: f22:
-+; CHECK: vpkg [[REG:%v[0-9]+]],
-+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
-+; CHECK-NEXT: br %r14
-+  %cmp = fcmp une <4 x float> %val1, %val2
-+  %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4
-+  ret <4 x float> %ret
-+}
-+
-+; Test ugt selects.
-+define <4 x float> @f23(<4 x float> %val1, <4 x float> %val2,
-+                        <4 x float> %val3, <4 x float> %val4) {
-+; CHECK-LABEL: f23:
-+; CHECK: vpkg [[REG:%v[0-9]+]],
-+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
-+; CHECK-NEXT: br %r14
-+  %cmp = fcmp ugt <4 x float> %val1, %val2
-+  %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4
-+  ret <4 x float> %ret
-+}
-+
-+; Test uge selects.
-+define <4 x float> @f24(<4 x float> %val1, <4 x float> %val2,
-+                        <4 x float> %val3, <4 x float> %val4) {
-+; CHECK-LABEL: f24:
-+; CHECK: vpkg [[REG:%v[0-9]+]],
-+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
-+; CHECK-NEXT: br %r14
-+  %cmp = fcmp uge <4 x float> %val1, %val2
-+  %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4
-+  ret <4 x float> %ret
-+}
-+
-+; Test ule selects.
-+define <4 x float> @f25(<4 x float> %val1, <4 x float> %val2,
-+                        <4 x float> %val3, <4 x float> %val4) {
-+; CHECK-LABEL: f25:
-+; CHECK: vpkg [[REG:%v[0-9]+]],
-+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
-+; CHECK-NEXT: br %r14
-+  %cmp = fcmp ule <4 x float> %val1, %val2
-+  %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4
-+  ret <4 x float> %ret
-+}
-+
-+; Test ult selects.
-+define <4 x float> @f26(<4 x float> %val1, <4 x float> %val2,
-+                        <4 x float> %val3, <4 x float> %val4) {
-+; CHECK-LABEL: f26:
-+; CHECK: vpkg [[REG:%v[0-9]+]],
-+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
-+; CHECK-NEXT: br %r14
-+  %cmp = fcmp ult <4 x float> %val1, %val2
-+  %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4
-+  ret <4 x float> %ret
-+}
-+
-+; Test ord selects.
-+define <4 x float> @f27(<4 x float> %val1, <4 x float> %val2,
-+                        <4 x float> %val3, <4 x float> %val4) {
-+; CHECK-LABEL: f27:
-+; CHECK: vo [[REG:%v[0-9]+]],
-+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
-+; CHECK-NEXT: br %r14
-+  %cmp = fcmp ord <4 x float> %val1, %val2
-+  %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4
-+  ret <4 x float> %ret
-+}
-+
-+; Test uno selects.
-+define <4 x float> @f28(<4 x float> %val1, <4 x float> %val2,
-+                        <4 x float> %val3, <4 x float> %val4) {
-+; CHECK-LABEL: f28:
-+; CHECK: vo [[REG:%v[0-9]+]],
-+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
-+; CHECK-NEXT: br %r14
-+  %cmp = fcmp uno <4 x float> %val1, %val2
-+  %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4
-+  ret <4 x float> %ret
-+}
-Index: llvm-36/test/CodeGen/SystemZ/vec-cmp-06.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-cmp-06.ll
-@@ -0,0 +1,349 @@
-+; Test f64 and v2f64 comparisons.
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-+
-+; Test oeq.
-+define <2 x i64> @f1(<2 x i64> %dummy, <2 x double> %val1, <2 x double> %val2) {
-+; CHECK-LABEL: f1:
-+; CHECK: vfcedb %v24, %v26, %v28
-+; CHECK-NEXT: br %r14
-+  %cmp = fcmp oeq <2 x double> %val1, %val2
-+  %ret = sext <2 x i1> %cmp to <2 x i64>
-+  ret <2 x i64> %ret
-+}
-+
-+; Test one.
-+define <2 x i64> @f2(<2 x i64> %dummy, <2 x double> %val1, <2 x double> %val2) {
-+; CHECK-LABEL: f2:
-+; CHECK-DAG: vfchdb [[REG1:%v[0-9]+]], %v28, %v26
-+; CHECK-DAG: vfchdb [[REG2:%v[0-9]+]], %v26, %v28
-+; CHECK: vo %v24, [[REG1]], [[REG2]]
-+; CHECK-NEXT: br %r14
-+  %cmp = fcmp one <2 x double> %val1, %val2
-+  %ret = sext <2 x i1> %cmp to <2 x i64>
-+  ret <2 x i64> %ret
-+}
-+
-+; Test ogt.
-+define <2 x i64> @f3(<2 x i64> %dummy, <2 x double> %val1, <2 x double> %val2) {
-+; CHECK-LABEL: f3:
-+; CHECK: vfchdb %v24, %v26, %v28
-+; CHECK-NEXT: br %r14
-+  %cmp = fcmp ogt <2 x double> %val1, %val2
-+  %ret = sext <2 x i1> %cmp to <2 x i64>
-+  ret <2 x i64> %ret
-+}
-+
-+; Test oge.
-+define <2 x i64> @f4(<2 x i64> %dummy, <2 x double> %val1, <2 x double> %val2) {
-+; CHECK-LABEL: f4:
-+; CHECK: vfchedb %v24, %v26, %v28
-+; CHECK-NEXT: br %r14
-+  %cmp = fcmp oge <2 x double> %val1, %val2
-+  %ret = sext <2 x i1> %cmp to <2 x i64>
-+  ret <2 x i64> %ret
-+}
-+
-+; Test ole.
-+define <2 x i64> @f5(<2 x i64> %dummy, <2 x double> %val1, <2 x double> %val2) {
-+; CHECK-LABEL: f5:
-+; CHECK: vfchedb %v24, %v28, %v26
-+; CHECK-NEXT: br %r14
-+  %cmp = fcmp ole <2 x double> %val1, %val2
-+  %ret = sext <2 x i1> %cmp to <2 x i64>
-+  ret <2 x i64> %ret
-+}
-+
-+; Test olt.
-+define <2 x i64> @f6(<2 x i64> %dummy, <2 x double> %val1, <2 x double> %val2) {
-+; CHECK-LABEL: f6:
-+; CHECK: vfchdb %v24, %v28, %v26
-+; CHECK-NEXT: br %r14
-+  %cmp = fcmp olt <2 x double> %val1, %val2
-+  %ret = sext <2 x i1> %cmp to <2 x i64>
-+  ret <2 x i64> %ret
-+}
-+
-+; Test ueq.
-+define <2 x i64> @f7(<2 x i64> %dummy, <2 x double> %val1, <2 x double> %val2) {
-+; CHECK-LABEL: f7:
-+; CHECK-DAG: vfchdb [[REG1:%v[0-9]+]], %v28, %v26
-+; CHECK-DAG: vfchdb [[REG2:%v[0-9]+]], %v26, %v28
-+; CHECK: vno %v24, [[REG1]], [[REG2]]
-+; CHECK-NEXT: br %r14
-+  %cmp = fcmp ueq <2 x double> %val1, %val2
-+  %ret = sext <2 x i1> %cmp to <2 x i64>
-+  ret <2 x i64> %ret
-+}
-+
-+; Test une.
-+define <2 x i64> @f8(<2 x i64> %dummy, <2 x double> %val1, <2 x double> %val2) {
-+; CHECK-LABEL: f8:
-+; CHECK: vfcedb [[REG:%v[0-9]+]], %v26, %v28
-+; CHECK-NEXT: vno %v24, [[REG]], [[REG]]
-+; CHECK-NEXT: br %r14
-+  %cmp = fcmp une <2 x double> %val1, %val2
-+  %ret = sext <2 x i1> %cmp to <2 x i64>
-+  ret <2 x i64> %ret
-+}
-+
-+; Test ugt.
-+define <2 x i64> @f9(<2 x i64> %dummy, <2 x double> %val1, <2 x double> %val2) {
-+; CHECK-LABEL: f9:
-+; CHECK: vfchedb [[REG:%v[0-9]+]], %v28, %v26
-+; CHECK-NEXT: vno %v24, [[REG]], [[REG]]
-+; CHECK-NEXT: br %r14
-+  %cmp = fcmp ugt <2 x double> %val1, %val2
-+  %ret = sext <2 x i1> %cmp to <2 x i64>
-+  ret <2 x i64> %ret
-+}
-+
-+; Test uge.
-+define <2 x i64> @f10(<2 x i64> %dummy, <2 x double> %val1,
-+                      <2 x double> %val2) {
-+; CHECK-LABEL: f10:
-+; CHECK: vfchdb [[REG:%v[0-9]+]], %v28, %v26
-+; CHECK-NEXT: vno %v24, [[REG]], [[REG]]
-+; CHECK-NEXT: br %r14
-+  %cmp = fcmp uge <2 x double> %val1, %val2
-+  %ret = sext <2 x i1> %cmp to <2 x i64>
-+  ret <2 x i64> %ret
-+}
-+
-+; Test ule.
-+define <2 x i64> @f11(<2 x i64> %dummy, <2 x double> %val1,
-+                      <2 x double> %val2) {
-+; CHECK-LABEL: f11:
-+; CHECK: vfchdb [[REG:%v[0-9]+]], %v26, %v28
-+; CHECK-NEXT: vno %v24, [[REG]], [[REG]]
-+; CHECK-NEXT: br %r14
-+  %cmp = fcmp ule <2 x double> %val1, %val2
-+  %ret = sext <2 x i1> %cmp to <2 x i64>
-+  ret <2 x i64> %ret
-+}
-+
-+; Test ult.
-+define <2 x i64> @f12(<2 x i64> %dummy, <2 x double> %val1,
-+                      <2 x double> %val2) {
-+; CHECK-LABEL: f12:
-+; CHECK: vfchedb [[REG:%v[0-9]+]], %v26, %v28
-+; CHECK-NEXT: vno %v24, [[REG]], [[REG]]
-+; CHECK-NEXT: br %r14
-+  %cmp = fcmp ult <2 x double> %val1, %val2
-+  %ret = sext <2 x i1> %cmp to <2 x i64>
-+  ret <2 x i64> %ret
-+}
-+
-+; Test ord.
-+define <2 x i64> @f13(<2 x i64> %dummy, <2 x double> %val1,
-+                      <2 x double> %val2) {
-+; CHECK-LABEL: f13:
-+; CHECK-DAG: vfchdb [[REG1:%v[0-9]+]], %v28, %v26
-+; CHECK-DAG: vfchedb [[REG2:%v[0-9]+]], %v26, %v28
-+; CHECK: vo %v24, [[REG1]], [[REG2]]
-+; CHECK-NEXT: br %r14
-+  %cmp = fcmp ord <2 x double> %val1, %val2
-+  %ret = sext <2 x i1> %cmp to <2 x i64>
-+  ret <2 x i64> %ret
-+}
-+
-+; Test uno.
-+define <2 x i64> @f14(<2 x i64> %dummy, <2 x double> %val1,
-+                      <2 x double> %val2) {
-+; CHECK-LABEL: f14:
-+; CHECK-DAG: vfchdb [[REG1:%v[0-9]+]], %v28, %v26
-+; CHECK-DAG: vfchedb [[REG2:%v[0-9]+]], %v26, %v28
-+; CHECK: vno %v24, [[REG1]], [[REG2]]
-+; CHECK-NEXT: br %r14
-+  %cmp = fcmp uno <2 x double> %val1, %val2
-+  %ret = sext <2 x i1> %cmp to <2 x i64>
-+  ret <2 x i64> %ret
-+}
-+
-+; Test oeq selects.
-+define <2 x double> @f15(<2 x double> %val1, <2 x double> %val2,
-+                         <2 x double> %val3, <2 x double> %val4) {
-+; CHECK-LABEL: f15:
-+; CHECK: vfcedb [[REG:%v[0-9]+]], %v24, %v26
-+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
-+; CHECK-NEXT: br %r14
-+  %cmp = fcmp oeq <2 x double> %val1, %val2
-+  %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4
-+  ret <2 x double> %ret
-+}
-+
-+; Test one selects.
-+define <2 x double> @f16(<2 x double> %val1, <2 x double> %val2,
-+                         <2 x double> %val3, <2 x double> %val4) {
-+; CHECK-LABEL: f16:
-+; CHECK-DAG: vfchdb [[REG1:%v[0-9]+]], %v26, %v24
-+; CHECK-DAG: vfchdb [[REG2:%v[0-9]+]], %v24, %v26
-+; CHECK: vo [[REG:%v[0-9]+]], [[REG1]], [[REG2]]
-+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
-+; CHECK-NEXT: br %r14
-+  %cmp = fcmp one <2 x double> %val1, %val2
-+  %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4
-+  ret <2 x double> %ret
-+}
-+
-+; Test ogt selects.
-+define <2 x double> @f17(<2 x double> %val1, <2 x double> %val2,
-+                         <2 x double> %val3, <2 x double> %val4) {
-+; CHECK-LABEL: f17:
-+; CHECK: vfchdb [[REG:%v[0-9]+]], %v24, %v26
-+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
-+; CHECK-NEXT: br %r14
-+  %cmp = fcmp ogt <2 x double> %val1, %val2
-+  %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4
-+  ret <2 x double> %ret
-+}
-+
-+; Test oge selects.
-+define <2 x double> @f18(<2 x double> %val1, <2 x double> %val2,
-+                         <2 x double> %val3, <2 x double> %val4) {
-+; CHECK-LABEL: f18:
-+; CHECK: vfchedb [[REG:%v[0-9]+]], %v24, %v26
-+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
-+; CHECK-NEXT: br %r14
-+  %cmp = fcmp oge <2 x double> %val1, %val2
-+  %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4
-+  ret <2 x double> %ret
-+}
-+
-+; Test ole selects.
-+define <2 x double> @f19(<2 x double> %val1, <2 x double> %val2,
-+                         <2 x double> %val3, <2 x double> %val4) {
-+; CHECK-LABEL: f19:
-+; CHECK: vfchedb [[REG:%v[0-9]+]], %v26, %v24
-+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
-+; CHECK-NEXT: br %r14
-+  %cmp = fcmp ole <2 x double> %val1, %val2
-+  %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4
-+  ret <2 x double> %ret
-+}
-+
-+; Test olt selects.
-+define <2 x double> @f20(<2 x double> %val1, <2 x double> %val2,
-+                         <2 x double> %val3, <2 x double> %val4) {
-+; CHECK-LABEL: f20:
-+; CHECK: vfchdb [[REG:%v[0-9]+]], %v26, %v24
-+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
-+; CHECK-NEXT: br %r14
-+  %cmp = fcmp olt <2 x double> %val1, %val2
-+  %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4
-+  ret <2 x double> %ret
-+}
-+
-+; Test ueq selects.
-+define <2 x double> @f21(<2 x double> %val1, <2 x double> %val2,
-+                         <2 x double> %val3, <2 x double> %val4) {
-+; CHECK-LABEL: f21:
-+; CHECK-DAG: vfchdb [[REG1:%v[0-9]+]], %v26, %v24
-+; CHECK-DAG: vfchdb [[REG2:%v[0-9]+]], %v24, %v26
-+; CHECK: vo [[REG:%v[0-9]+]], [[REG1]], [[REG2]]
-+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
-+; CHECK-NEXT: br %r14
-+  %cmp = fcmp ueq <2 x double> %val1, %val2
-+  %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4
-+  ret <2 x double> %ret
-+}
-+
-+; Test une selects.
-+define <2 x double> @f22(<2 x double> %val1, <2 x double> %val2,
-+                         <2 x double> %val3, <2 x double> %val4) {
-+; CHECK-LABEL: f22:
-+; CHECK: vfcedb [[REG:%v[0-9]+]], %v24, %v26
-+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
-+; CHECK-NEXT: br %r14
-+  %cmp = fcmp une <2 x double> %val1, %val2
-+  %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4
-+  ret <2 x double> %ret
-+}
-+
-+; Test ugt selects.
-+define <2 x double> @f23(<2 x double> %val1, <2 x double> %val2,
-+                         <2 x double> %val3, <2 x double> %val4) {
-+; CHECK-LABEL: f23:
-+; CHECK: vfchedb [[REG:%v[0-9]+]], %v26, %v24
-+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
-+; CHECK-NEXT: br %r14
-+  %cmp = fcmp ugt <2 x double> %val1, %val2
-+  %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4
-+  ret <2 x double> %ret
-+}
-+
-+; Test uge selects.
-+define <2 x double> @f24(<2 x double> %val1, <2 x double> %val2,
-+                         <2 x double> %val3, <2 x double> %val4) {
-+; CHECK-LABEL: f24:
-+; CHECK: vfchdb [[REG:%v[0-9]+]], %v26, %v24
-+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
-+; CHECK-NEXT: br %r14
-+  %cmp = fcmp uge <2 x double> %val1, %val2
-+  %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4
-+  ret <2 x double> %ret
-+}
-+
-+; Test ule selects.
-+define <2 x double> @f25(<2 x double> %val1, <2 x double> %val2,
-+                         <2 x double> %val3, <2 x double> %val4) {
-+; CHECK-LABEL: f25:
-+; CHECK: vfchdb [[REG:%v[0-9]+]], %v24, %v26
-+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
-+; CHECK-NEXT: br %r14
-+  %cmp = fcmp ule <2 x double> %val1, %val2
-+  %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4
-+  ret <2 x double> %ret
-+}
-+
-+; Test ult selects.
-+define <2 x double> @f26(<2 x double> %val1, <2 x double> %val2,
-+                         <2 x double> %val3, <2 x double> %val4) {
-+; CHECK-LABEL: f26:
-+; CHECK: vfchedb [[REG:%v[0-9]+]], %v24, %v26
-+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
-+; CHECK-NEXT: br %r14
-+  %cmp = fcmp ult <2 x double> %val1, %val2
-+  %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4
-+  ret <2 x double> %ret
-+}
-+
-+; Test ord selects.
-+define <2 x double> @f27(<2 x double> %val1, <2 x double> %val2,
-+                         <2 x double> %val3, <2 x double> %val4) {
-+; CHECK-LABEL: f27:
-+; CHECK-DAG: vfchdb [[REG1:%v[0-9]+]], %v26, %v24
-+; CHECK-DAG: vfchedb [[REG2:%v[0-9]+]], %v24, %v26
-+; CHECK: vo [[REG:%v[0-9]+]], [[REG1]], [[REG2]]
-+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]]
-+; CHECK-NEXT: br %r14
-+  %cmp = fcmp ord <2 x double> %val1, %val2
-+  %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4
-+  ret <2 x double> %ret
-+}
-+
-+; Test uno selects.
-+define <2 x double> @f28(<2 x double> %val1, <2 x double> %val2,
-+                         <2 x double> %val3, <2 x double> %val4) {
-+; CHECK-LABEL: f28:
-+; CHECK-DAG: vfchdb [[REG1:%v[0-9]+]], %v26, %v24
-+; CHECK-DAG: vfchedb [[REG2:%v[0-9]+]], %v24, %v26
-+; CHECK: vo [[REG:%v[0-9]+]], [[REG1]], [[REG2]]
-+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]]
-+; CHECK-NEXT: br %r14
-+  %cmp = fcmp uno <2 x double> %val1, %val2
-+  %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4
-+  ret <2 x double> %ret
-+}
-+
-+; Test an f64 comparison that uses vector registers.
-+define i64 @f29(i64 %a, i64 %b, double %f1, <2 x double> %vec) {
-+; CHECK-LABEL: f29:
-+; CHECK: wfcdb %f0, %v24
-+; CHECK-NEXT: locgrne %r2, %r3
-+; CHECK: br %r14
-+  %f2 = extractelement <2 x double> %vec, i32 0
-+  %cond = fcmp oeq double %f1, %f2
-+  %res = select i1 %cond, i64 %a, i64 %b
-+  ret i64 %res
-+}
-Index: llvm-36/test/CodeGen/SystemZ/vec-combine-01.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-combine-01.ll
-@@ -0,0 +1,155 @@
-+; Test various target-specific DAG combiner patterns.
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-+
-+; Check that an extraction followed by a truncation is effectively treated
-+; as a bitcast.
-+define void @f1(<4 x i32> %v1, <4 x i32> %v2, i8 *%ptr1, i8 *%ptr2) {
-+; CHECK-LABEL: f1:
-+; CHECK: vaf [[REG:%v[0-9]+]], %v24, %v26
-+; CHECK-DAG: vsteb [[REG]], 0(%r2), 3
-+; CHECK-DAG: vsteb [[REG]], 0(%r3), 15
-+; CHECK: br %r14
-+  %add = add <4 x i32> %v1, %v2
-+  %elem1 = extractelement <4 x i32> %add, i32 0
-+  %elem2 = extractelement <4 x i32> %add, i32 3
-+  %trunc1 = trunc i32 %elem1 to i8
-+  %trunc2 = trunc i32 %elem2 to i8
-+  store i8 %trunc1, i8 *%ptr1
-+  store i8 %trunc2, i8 *%ptr2
-+  ret void
-+}
-+
-+; Test a case where a pack-type shuffle can be eliminated.
-+define i16 @f2(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) {
-+; CHECK-LABEL: f2:
-+; CHECK-NOT: vpk
-+; CHECK-DAG: vaf [[REG1:%v[0-9]+]], %v24, %v26
-+; CHECK-DAG: vaf [[REG2:%v[0-9]+]], %v26, %v28
-+; CHECK-DAG: vlgvh {{%r[0-5]}}, [[REG1]], 3
-+; CHECK-DAG: vlgvh {{%r[0-5]}}, [[REG2]], 7
-+; CHECK: br %r14
-+  %add1 = add <4 x i32> %v1, %v2
-+  %add2 = add <4 x i32> %v2, %v3
-+  %shuffle = shufflevector <4 x i32> %add1, <4 x i32> %add2,
-+                           <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-+  %bitcast = bitcast <4 x i32> %shuffle to <8 x i16>
-+  %elem1 = extractelement <8 x i16> %bitcast, i32 1
-+  %elem2 = extractelement <8 x i16> %bitcast, i32 7
-+  %res = add i16 %elem1, %elem2
-+  ret i16 %res
-+}
-+
-+; ...and again in a case where there's also a splat and a bitcast.
-+define i16 @f3(<4 x i32> %v1, <4 x i32> %v2, <2 x i64> %v3) {
-+; CHECK-LABEL: f3:
-+; CHECK-NOT: vrepg
-+; CHECK-NOT: vpk
-+; CHECK-DAG: vaf [[REG:%v[0-9]+]], %v24, %v26
-+; CHECK-DAG: vlgvh {{%r[0-5]}}, [[REG]], 6
-+; CHECK-DAG: vlgvh {{%r[0-5]}}, %v28, 3
-+; CHECK: br %r14
-+  %add = add <4 x i32> %v1, %v2
-+  %splat = shufflevector <2 x i64> %v3, <2 x i64> undef,
-+                         <2 x i32> <i32 0, i32 0>
-+  %splatcast = bitcast <2 x i64> %splat to <4 x i32>
-+  %shuffle = shufflevector <4 x i32> %add, <4 x i32> %splatcast,
-+                           <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-+  %bitcast = bitcast <4 x i32> %shuffle to <8 x i16>
-+  %elem1 = extractelement <8 x i16> %bitcast, i32 2
-+  %elem2 = extractelement <8 x i16> %bitcast, i32 7
-+  %res = add i16 %elem1, %elem2
-+  ret i16 %res
-+}
-+
-+; ...and again with a merge low instead of a pack.
-+define i16 @f4(<4 x i32> %v1, <4 x i32> %v2, <2 x i64> %v3) {
-+; CHECK-LABEL: f4:
-+; CHECK-NOT: vrepg
-+; CHECK-NOT: vmr
-+; CHECK-DAG: vaf [[REG:%v[0-9]+]], %v24, %v26
-+; CHECK-DAG: vlgvh {{%r[0-5]}}, [[REG]], 6
-+; CHECK-DAG: vlgvh {{%r[0-5]}}, %v28, 3
-+; CHECK: br %r14
-+  %add = add <4 x i32> %v1, %v2
-+  %splat = shufflevector <2 x i64> %v3, <2 x i64> undef,
-+                         <2 x i32> <i32 0, i32 0>
-+  %splatcast = bitcast <2 x i64> %splat to <4 x i32>
-+  %shuffle = shufflevector <4 x i32> %add, <4 x i32> %splatcast,
-+                           <4 x i32> <i32 2, i32 6, i32 3, i32 7>
-+  %bitcast = bitcast <4 x i32> %shuffle to <8 x i16>
-+  %elem1 = extractelement <8 x i16> %bitcast, i32 4
-+  %elem2 = extractelement <8 x i16> %bitcast, i32 7
-+  %res = add i16 %elem1, %elem2
-+  ret i16 %res
-+}
-+
-+; ...and again with a merge high.
-+define i16 @f5(<4 x i32> %v1, <4 x i32> %v2, <2 x i64> %v3) {
-+; CHECK-LABEL: f5:
-+; CHECK-NOT: vrepg
-+; CHECK-NOT: vmr
-+; CHECK-DAG: vaf [[REG:%v[0-9]+]], %v24, %v26
-+; CHECK-DAG: vlgvh {{%r[0-5]}}, [[REG]], 2
-+; CHECK-DAG: vlgvh {{%r[0-5]}}, %v28, 3
-+; CHECK: br %r14
-+  %add = add <4 x i32> %v1, %v2
-+  %splat = shufflevector <2 x i64> %v3, <2 x i64> undef,
-+                         <2 x i32> <i32 0, i32 0>
-+  %splatcast = bitcast <2 x i64> %splat to <4 x i32>
-+  %shuffle = shufflevector <4 x i32> %add, <4 x i32> %splatcast,
-+                           <4 x i32> <i32 0, i32 4, i32 1, i32 5>
-+  %bitcast = bitcast <4 x i32> %shuffle to <8 x i16>
-+  %elem1 = extractelement <8 x i16> %bitcast, i32 4
-+  %elem2 = extractelement <8 x i16> %bitcast, i32 7
-+  %res = add i16 %elem1, %elem2
-+  ret i16 %res
-+}
-+
-+; Test a case where an unpack high can be eliminated from the usual
-+; load-extend sequence.
-+define void @f6(<8 x i8> *%ptr1, i8 *%ptr2, i8 *%ptr3, i8 *%ptr4) {
-+; CHECK-LABEL: f6:
-+; CHECK: vlrepg [[REG:%v[0-9]+]], 0(%r2)
-+; CHECK-NOT: vup
-+; CHECK-DAG: vsteb [[REG]], 0(%r3), 1
-+; CHECK-DAG: vsteb [[REG]], 0(%r4), 2
-+; CHECK-DAG: vsteb [[REG]], 0(%r5), 7
-+; CHECK: br %r14
-+  %vec = load <8 x i8> *%ptr1
-+  %ext = sext <8 x i8> %vec to <8 x i16>
-+  %elem1 = extractelement <8 x i16> %ext, i32 1
-+  %elem2 = extractelement <8 x i16> %ext, i32 2
-+  %elem3 = extractelement <8 x i16> %ext, i32 7
-+  %trunc1 = trunc i16 %elem1 to i8
-+  %trunc2 = trunc i16 %elem2 to i8
-+  %trunc3 = trunc i16 %elem3 to i8
-+  store i8 %trunc1, i8 *%ptr2
-+  store i8 %trunc2, i8 *%ptr3
-+  store i8 %trunc3, i8 *%ptr4
-+  ret void
-+}
-+
-+; ...and again with a bitcast inbetween.
-+define void @f7(<4 x i8> *%ptr1, i8 *%ptr2, i8 *%ptr3, i8 *%ptr4) {
-+; CHECK-LABEL: f7:
-+; CHECK: vlrepf [[REG:%v[0-9]+]], 0(%r2)
-+; CHECK-NOT: vup
-+; CHECK-DAG: vsteb [[REG]], 0(%r3), 0
-+; CHECK-DAG: vsteb [[REG]], 0(%r4), 1
-+; CHECK-DAG: vsteb [[REG]], 0(%r5), 3
-+; CHECK: br %r14
-+  %vec = load <4 x i8> *%ptr1
-+  %ext = sext <4 x i8> %vec to <4 x i32>
-+  %bitcast = bitcast <4 x i32> %ext to <8 x i16>
-+  %elem1 = extractelement <8 x i16> %bitcast, i32 1
-+  %elem2 = extractelement <8 x i16> %bitcast, i32 3
-+  %elem3 = extractelement <8 x i16> %bitcast, i32 7
-+  %trunc1 = trunc i16 %elem1 to i8
-+  %trunc2 = trunc i16 %elem2 to i8
-+  %trunc3 = trunc i16 %elem3 to i8
-+  store i8 %trunc1, i8 *%ptr2
-+  store i8 %trunc2, i8 *%ptr3
-+  store i8 %trunc3, i8 *%ptr4
-+  ret void
-+}
-Index: llvm-36/test/CodeGen/SystemZ/vec-combine-02.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-combine-02.ll
-@@ -0,0 +1,433 @@
-+; Test various representations of pack-like operations.
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-+
-+; One way of writing a <4 x i32> -> <8 x i16> pack.
-+define <8 x i16> @f1(<4 x i32> %val0, <4 x i32> %val1) {
-+; CHECK-LABEL: f1:
-+; CHECK: vpkf %v24, %v24, %v26
-+; CHECK: br %r14
-+  %elem0 = extractelement <4 x i32> %val0, i32 0
-+  %elem1 = extractelement <4 x i32> %val0, i32 1
-+  %elem2 = extractelement <4 x i32> %val0, i32 2
-+  %elem3 = extractelement <4 x i32> %val0, i32 3
-+  %elem4 = extractelement <4 x i32> %val1, i32 0
-+  %elem5 = extractelement <4 x i32> %val1, i32 1
-+  %elem6 = extractelement <4 x i32> %val1, i32 2
-+  %elem7 = extractelement <4 x i32> %val1, i32 3
-+  %hboth0 = bitcast i32 %elem0 to <2 x i16>
-+  %hboth1 = bitcast i32 %elem1 to <2 x i16>
-+  %hboth2 = bitcast i32 %elem2 to <2 x i16>
-+  %hboth3 = bitcast i32 %elem3 to <2 x i16>
-+  %hboth4 = bitcast i32 %elem4 to <2 x i16>
-+  %hboth5 = bitcast i32 %elem5 to <2 x i16>
-+  %hboth6 = bitcast i32 %elem6 to <2 x i16>
-+  %hboth7 = bitcast i32 %elem7 to <2 x i16>
-+  %hlow0 = shufflevector <2 x i16> %hboth0, <2 x i16> %hboth1,
-+                         <2 x i32> <i32 1, i32 3>
-+  %hlow1 = shufflevector <2 x i16> %hboth2, <2 x i16> %hboth3,
-+                         <2 x i32> <i32 1, i32 3>
-+  %hlow2 = shufflevector <2 x i16> %hboth4, <2 x i16> %hboth5,
-+                         <2 x i32> <i32 1, i32 3>
-+  %hlow3 = shufflevector <2 x i16> %hboth6, <2 x i16> %hboth7,
-+                         <2 x i32> <i32 1, i32 3>
-+  %join0 = shufflevector <2 x i16> %hlow0, <2 x i16> %hlow1,
-+                         <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-+  %join1 = shufflevector <2 x i16> %hlow2, <2 x i16> %hlow3,
-+                         <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-+  %ret = shufflevector <4 x i16> %join0, <4 x i16> %join1,
-+                       <8 x i32> <i32 0, i32 1, i32 2, i32 3,
-+                                  i32 4, i32 5, i32 6, i32 7>
-+  ret <8 x i16> %ret
-+}
-+
-+; A different way of writing a <4 x i32> -> <8 x i16> pack.
-+define <8 x i16> @f2(<4 x i32> %val0, <4 x i32> %val1) {
-+; CHECK-LABEL: f2:
-+; CHECK: vpkf %v24, %v24, %v26
-+; CHECK: br %r14
-+  %elem0 = extractelement <4 x i32> %val0, i32 0
-+  %elem1 = extractelement <4 x i32> %val0, i32 1
-+  %elem2 = extractelement <4 x i32> %val0, i32 2
-+  %elem3 = extractelement <4 x i32> %val0, i32 3
-+  %elem4 = extractelement <4 x i32> %val1, i32 0
-+  %elem5 = extractelement <4 x i32> %val1, i32 1
-+  %elem6 = extractelement <4 x i32> %val1, i32 2
-+  %elem7 = extractelement <4 x i32> %val1, i32 3
-+  %wvec0 = insertelement <4 x i32> undef, i32 %elem0, i32 0
-+  %wvec1 = insertelement <4 x i32> undef, i32 %elem1, i32 0
-+  %wvec2 = insertelement <4 x i32> undef, i32 %elem2, i32 0
-+  %wvec3 = insertelement <4 x i32> undef, i32 %elem3, i32 0
-+  %wvec4 = insertelement <4 x i32> undef, i32 %elem4, i32 0
-+  %wvec5 = insertelement <4 x i32> undef, i32 %elem5, i32 0
-+  %wvec6 = insertelement <4 x i32> undef, i32 %elem6, i32 0
-+  %wvec7 = insertelement <4 x i32> undef, i32 %elem7, i32 0
-+  %hvec0 = bitcast <4 x i32> %wvec0 to <8 x i16>
-+  %hvec1 = bitcast <4 x i32> %wvec1 to <8 x i16>
-+  %hvec2 = bitcast <4 x i32> %wvec2 to <8 x i16>
-+  %hvec3 = bitcast <4 x i32> %wvec3 to <8 x i16>
-+  %hvec4 = bitcast <4 x i32> %wvec4 to <8 x i16>
-+  %hvec5 = bitcast <4 x i32> %wvec5 to <8 x i16>
-+  %hvec6 = bitcast <4 x i32> %wvec6 to <8 x i16>
-+  %hvec7 = bitcast <4 x i32> %wvec7 to <8 x i16>
-+  %hlow0 = shufflevector <8 x i16> %hvec0, <8 x i16> %hvec1,
-+                         <8 x i32> <i32 1, i32 9, i32 undef, i32 undef,
-+                                    i32 undef, i32 undef, i32 undef, i32 undef>
-+  %hlow1 = shufflevector <8 x i16> %hvec2, <8 x i16> %hvec3,
-+                         <8 x i32> <i32 1, i32 9, i32 undef, i32 undef,
-+                                    i32 undef, i32 undef, i32 undef, i32 undef>
-+  %hlow2 = shufflevector <8 x i16> %hvec4, <8 x i16> %hvec5,
-+                         <8 x i32> <i32 1, i32 9, i32 undef, i32 undef,
-+                                    i32 undef, i32 undef, i32 undef, i32 undef>
-+  %hlow3 = shufflevector <8 x i16> %hvec6, <8 x i16> %hvec7,
-+                         <8 x i32> <i32 1, i32 9, i32 undef, i32 undef,
-+                                    i32 undef, i32 undef, i32 undef, i32 undef>
-+  %join0 = shufflevector <8 x i16> %hlow0, <8 x i16> %hlow1,
-+                         <8 x i32> <i32 0, i32 1, i32 8, i32 9,
-+                                    i32 undef, i32 undef, i32 undef, i32 undef>
-+  %join1 = shufflevector <8 x i16> %hlow2, <8 x i16> %hlow3,
-+                         <8 x i32> <i32 0, i32 1, i32 8, i32 9,
-+                                    i32 undef, i32 undef, i32 undef, i32 undef>
-+  %ret = shufflevector <8 x i16> %join0, <8 x i16> %join1,
-+                       <8 x i32> <i32 0, i32 1, i32 2, i32 3,
-+                                  i32 8, i32 9, i32 10, i32 11>
-+  ret <8 x i16> %ret
-+}
-+
-+; A direct pack operation.
-+define <8 x i16> @f3(<4 x i32> %val0, <4 x i32> %val1) {
-+; CHECK-LABEL: f3:
-+; CHECK: vpkf %v24, %v24, %v26
-+; CHECK: br %r14
-+  %bitcast0 = bitcast <4 x i32> %val0 to <8 x i16>
-+  %bitcast1 = bitcast <4 x i32> %val1 to <8 x i16>
-+  %ret = shufflevector <8 x i16> %bitcast0, <8 x i16> %bitcast1,
-+                       <8 x i32> <i32 1, i32 3, i32 5, i32 7,
-+                                  i32 9, i32 11, i32 13, i32 15>
-+  ret <8 x i16> %ret
-+}
-+
-+; One way of writing a <4 x i32> -> <16 x i8> pack.  It doesn't matter
-+; whether the first pack is VPKF or VPKH since the even bytes of the
-+; result are discarded.
-+define <16 x i8> @f4(<4 x i32> %val0, <4 x i32> %val1,
-+                     <4 x i32> %val2, <4 x i32> %val3) {
-+; CHECK-LABEL: f4:
-+; CHECK-DAG: vpk{{[hf]}} [[REG1:%v[0-9]+]], %v24, %v26
-+; CHECK-DAG: vpk{{[hf]}} [[REG2:%v[0-9]+]], %v28, %v30
-+; CHECK: vpkh %v24, [[REG1]], [[REG2]]
-+; CHECK: br %r14
-+  %bitcast0 = bitcast <4 x i32> %val0 to <8 x i16>
-+  %bitcast1 = bitcast <4 x i32> %val1 to <8 x i16>
-+  %bitcast2 = bitcast <4 x i32> %val2 to <8 x i16>
-+  %bitcast3 = bitcast <4 x i32> %val3 to <8 x i16>
-+  %join0 = shufflevector <8 x i16> %bitcast0, <8 x i16> %bitcast1,
-+                         <8 x i32> <i32 1, i32 3, i32 5, i32 7,
-+                                    i32 9, i32 11, i32 13, i32 15>
-+  %join1 = shufflevector <8 x i16> %bitcast2, <8 x i16> %bitcast3,
-+                         <8 x i32> <i32 1, i32 3, i32 5, i32 7,
-+                                    i32 9, i32 11, i32 13, i32 15>
-+  %bitcast4 = bitcast <8 x i16> %join0 to <16 x i8>
-+  %bitcast5 = bitcast <8 x i16> %join1 to <16 x i8>
-+  %ret = shufflevector <16 x i8> %bitcast4, <16 x i8> %bitcast5,
-+                       <16 x i32> <i32 1, i32 3, i32 5, i32 7,
-+                                   i32 9, i32 11, i32 13, i32 15,
-+                                   i32 17, i32 19, i32 21, i32 23,
-+                                   i32 25, i32 27, i32 29, i32 31>
-+  ret <16 x i8> %ret
-+}
-+
-+; Check the same operation, but with elements being extracted from the result.
-+define void @f5(<4 x i32> %val0, <4 x i32> %val1,
-+                <4 x i32> %val2, <4 x i32> %val3,
-+                i8 *%base) {
-+; CHECK-LABEL: f5:
-+; CHECK-DAG: vsteb %v24, 0(%r2), 11
-+; CHECK-DAG: vsteb %v26, 1(%r2), 15
-+; CHECK-DAG: vsteb %v28, 2(%r2), 3
-+; CHECK-DAG: vsteb %v30, 3(%r2), 7
-+; CHECK: br %r14
-+  %bitcast0 = bitcast <4 x i32> %val0 to <8 x i16>
-+  %bitcast1 = bitcast <4 x i32> %val1 to <8 x i16>
-+  %bitcast2 = bitcast <4 x i32> %val2 to <8 x i16>
-+  %bitcast3 = bitcast <4 x i32> %val3 to <8 x i16>
-+  %join0 = shufflevector <8 x i16> %bitcast0, <8 x i16> %bitcast1,
-+                         <8 x i32> <i32 1, i32 3, i32 5, i32 7,
-+                                    i32 9, i32 11, i32 13, i32 15>
-+  %join1 = shufflevector <8 x i16> %bitcast2, <8 x i16> %bitcast3,
-+                         <8 x i32> <i32 1, i32 3, i32 5, i32 7,
-+                                    i32 9, i32 11, i32 13, i32 15>
-+  %bitcast4 = bitcast <8 x i16> %join0 to <16 x i8>
-+  %bitcast5 = bitcast <8 x i16> %join1 to <16 x i8>
-+  %vec = shufflevector <16 x i8> %bitcast4, <16 x i8> %bitcast5,
-+                       <16 x i32> <i32 1, i32 3, i32 5, i32 7,
-+                                   i32 9, i32 11, i32 13, i32 15,
-+                                   i32 17, i32 19, i32 21, i32 23,
-+                                   i32 25, i32 27, i32 29, i32 31>
-+
-+  %ptr0 = getelementptr i8 *%base, i64 0
-+  %ptr1 = getelementptr i8 *%base, i64 1
-+  %ptr2 = getelementptr i8 *%base, i64 2
-+  %ptr3 = getelementptr i8 *%base, i64 3
-+
-+  %byte0 = extractelement <16 x i8> %vec, i32 2
-+  %byte1 = extractelement <16 x i8> %vec, i32 7
-+  %byte2 = extractelement <16 x i8> %vec, i32 8
-+  %byte3 = extractelement <16 x i8> %vec, i32 13
-+
-+  store i8 %byte0, i8 *%ptr0
-+  store i8 %byte1, i8 *%ptr1
-+  store i8 %byte2, i8 *%ptr2
-+  store i8 %byte3, i8 *%ptr3
-+
-+  ret void
-+}
-+
-+; A different way of writing a <4 x i32> -> <16 x i8> pack.
-+define <16 x i8> @f6(<4 x i32> %val0, <4 x i32> %val1,
-+                     <4 x i32> %val2, <4 x i32> %val3) {
-+; CHECK-LABEL: f6:
-+; CHECK-DAG: vpk{{[hf]}} [[REG1:%v[0-9]+]], %v24, %v26
-+; CHECK-DAG: vpk{{[hf]}} [[REG2:%v[0-9]+]], %v28, %v30
-+; CHECK: vpkh %v24, [[REG1]], [[REG2]]
-+; CHECK: br %r14
-+  %elem0 = extractelement <4 x i32> %val0, i32 0
-+  %elem1 = extractelement <4 x i32> %val0, i32 1
-+  %elem2 = extractelement <4 x i32> %val0, i32 2
-+  %elem3 = extractelement <4 x i32> %val0, i32 3
-+  %elem4 = extractelement <4 x i32> %val1, i32 0
-+  %elem5 = extractelement <4 x i32> %val1, i32 1
-+  %elem6 = extractelement <4 x i32> %val1, i32 2
-+  %elem7 = extractelement <4 x i32> %val1, i32 3
-+  %elem8 = extractelement <4 x i32> %val2, i32 0
-+  %elem9 = extractelement <4 x i32> %val2, i32 1
-+  %elem10 = extractelement <4 x i32> %val2, i32 2
-+  %elem11 = extractelement <4 x i32> %val2, i32 3
-+  %elem12 = extractelement <4 x i32> %val3, i32 0
-+  %elem13 = extractelement <4 x i32> %val3, i32 1
-+  %elem14 = extractelement <4 x i32> %val3, i32 2
-+  %elem15 = extractelement <4 x i32> %val3, i32 3
-+  %bitcast0 = bitcast i32 %elem0 to <2 x i16>
-+  %bitcast1 = bitcast i32 %elem1 to <2 x i16>
-+  %bitcast2 = bitcast i32 %elem2 to <2 x i16>
-+  %bitcast3 = bitcast i32 %elem3 to <2 x i16>
-+  %bitcast4 = bitcast i32 %elem4 to <2 x i16>
-+  %bitcast5 = bitcast i32 %elem5 to <2 x i16>
-+  %bitcast6 = bitcast i32 %elem6 to <2 x i16>
-+  %bitcast7 = bitcast i32 %elem7 to <2 x i16>
-+  %bitcast8 = bitcast i32 %elem8 to <2 x i16>
-+  %bitcast9 = bitcast i32 %elem9 to <2 x i16>
-+  %bitcast10 = bitcast i32 %elem10 to <2 x i16>
-+  %bitcast11 = bitcast i32 %elem11 to <2 x i16>
-+  %bitcast12 = bitcast i32 %elem12 to <2 x i16>
-+  %bitcast13 = bitcast i32 %elem13 to <2 x i16>
-+  %bitcast14 = bitcast i32 %elem14 to <2 x i16>
-+  %bitcast15 = bitcast i32 %elem15 to <2 x i16>
-+  %low0 = shufflevector <2 x i16> %bitcast0, <2 x i16> %bitcast1,
-+                        <2 x i32> <i32 1, i32 3>
-+  %low1 = shufflevector <2 x i16> %bitcast2, <2 x i16> %bitcast3,
-+                        <2 x i32> <i32 1, i32 3>
-+  %low2 = shufflevector <2 x i16> %bitcast4, <2 x i16> %bitcast5,
-+                        <2 x i32> <i32 1, i32 3>
-+  %low3 = shufflevector <2 x i16> %bitcast6, <2 x i16> %bitcast7,
-+                        <2 x i32> <i32 1, i32 3>
-+  %low4 = shufflevector <2 x i16> %bitcast8, <2 x i16> %bitcast9,
-+                        <2 x i32> <i32 1, i32 3>
-+  %low5 = shufflevector <2 x i16> %bitcast10, <2 x i16> %bitcast11,
-+                        <2 x i32> <i32 1, i32 3>
-+  %low6 = shufflevector <2 x i16> %bitcast12, <2 x i16> %bitcast13,
-+                        <2 x i32> <i32 1, i32 3>
-+  %low7 = shufflevector <2 x i16> %bitcast14, <2 x i16> %bitcast15,
-+                        <2 x i32> <i32 1, i32 3>
-+  %bytes0 = bitcast <2 x i16> %low0 to <4 x i8>
-+  %bytes1 = bitcast <2 x i16> %low1 to <4 x i8>
-+  %bytes2 = bitcast <2 x i16> %low2 to <4 x i8>
-+  %bytes3 = bitcast <2 x i16> %low3 to <4 x i8>
-+  %bytes4 = bitcast <2 x i16> %low4 to <4 x i8>
-+  %bytes5 = bitcast <2 x i16> %low5 to <4 x i8>
-+  %bytes6 = bitcast <2 x i16> %low6 to <4 x i8>
-+  %bytes7 = bitcast <2 x i16> %low7 to <4 x i8>
-+  %blow0 = shufflevector <4 x i8> %bytes0, <4 x i8> %bytes1,
-+                         <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-+  %blow1 = shufflevector <4 x i8> %bytes2, <4 x i8> %bytes3,
-+                         <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-+  %blow2 = shufflevector <4 x i8> %bytes4, <4 x i8> %bytes5,
-+                         <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-+  %blow3 = shufflevector <4 x i8> %bytes6, <4 x i8> %bytes7,
-+                         <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-+  %join0 = shufflevector <4 x i8> %blow0, <4 x i8> %blow1,
-+                         <8 x i32> <i32 0, i32 1, i32 2, i32 3,
-+                                    i32 4, i32 5, i32 6, i32 7>
-+  %join1 = shufflevector <4 x i8> %blow2, <4 x i8> %blow3,
-+                         <8 x i32> <i32 0, i32 1, i32 2, i32 3,
-+                                    i32 4, i32 5, i32 6, i32 7>
-+  %ret = shufflevector <8 x i8> %join0, <8 x i8> %join1,
-+                       <16 x i32> <i32 0, i32 1, i32 2, i32 3,
-+                                   i32 4, i32 5, i32 6, i32 7,
-+                                   i32 8, i32 9, i32 10, i32 11,
-+                                   i32 12, i32 13, i32 14, i32 15>
-+  ret <16 x i8> %ret
-+}
-+
-+; One way of writing a <2 x i64> -> <16 x i8> pack.
-+define <16 x i8> @f7(<2 x i64> %val0, <2 x i64> %val1,
-+                     <2 x i64> %val2, <2 x i64> %val3,
-+                     <2 x i64> %val4, <2 x i64> %val5,
-+                     <2 x i64> %val6, <2 x i64> %val7) {
-+; CHECK-LABEL: f7:
-+; CHECK-DAG: vpk{{[hfg]}} [[REG1:%v[0-9]+]], %v24, %v26
-+; CHECK-DAG: vpk{{[hfg]}} [[REG2:%v[0-9]+]], %v28, %v30
-+; CHECK-DAG: vpk{{[hfg]}} [[REG3:%v[0-9]+]], %v25, %v27
-+; CHECK-DAG: vpk{{[hfg]}} [[REG4:%v[0-9]+]], %v29, %v31
-+; CHECK-DAG: vpk{{[hf]}} [[REG5:%v[0-9]+]], [[REG1]], [[REG2]]
-+; CHECK-DAG: vpk{{[hf]}} [[REG6:%v[0-9]+]], [[REG3]], [[REG4]]
-+; CHECK: vpkh %v24, [[REG5]], [[REG6]]
-+; CHECK: br %r14
-+  %elem0 = extractelement <2 x i64> %val0, i32 0
-+  %elem1 = extractelement <2 x i64> %val0, i32 1
-+  %elem2 = extractelement <2 x i64> %val1, i32 0
-+  %elem3 = extractelement <2 x i64> %val1, i32 1
-+  %elem4 = extractelement <2 x i64> %val2, i32 0
-+  %elem5 = extractelement <2 x i64> %val2, i32 1
-+  %elem6 = extractelement <2 x i64> %val3, i32 0
-+  %elem7 = extractelement <2 x i64> %val3, i32 1
-+  %elem8 = extractelement <2 x i64> %val4, i32 0
-+  %elem9 = extractelement <2 x i64> %val4, i32 1
-+  %elem10 = extractelement <2 x i64> %val5, i32 0
-+  %elem11 = extractelement <2 x i64> %val5, i32 1
-+  %elem12 = extractelement <2 x i64> %val6, i32 0
-+  %elem13 = extractelement <2 x i64> %val6, i32 1
-+  %elem14 = extractelement <2 x i64> %val7, i32 0
-+  %elem15 = extractelement <2 x i64> %val7, i32 1
-+  %bitcast0 = bitcast i64 %elem0 to <2 x i32>
-+  %bitcast1 = bitcast i64 %elem1 to <2 x i32>
-+  %bitcast2 = bitcast i64 %elem2 to <2 x i32>
-+  %bitcast3 = bitcast i64 %elem3 to <2 x i32>
-+  %bitcast4 = bitcast i64 %elem4 to <2 x i32>
-+  %bitcast5 = bitcast i64 %elem5 to <2 x i32>
-+  %bitcast6 = bitcast i64 %elem6 to <2 x i32>
-+  %bitcast7 = bitcast i64 %elem7 to <2 x i32>
-+  %bitcast8 = bitcast i64 %elem8 to <2 x i32>
-+  %bitcast9 = bitcast i64 %elem9 to <2 x i32>
-+  %bitcast10 = bitcast i64 %elem10 to <2 x i32>
-+  %bitcast11 = bitcast i64 %elem11 to <2 x i32>
-+  %bitcast12 = bitcast i64 %elem12 to <2 x i32>
-+  %bitcast13 = bitcast i64 %elem13 to <2 x i32>
-+  %bitcast14 = bitcast i64 %elem14 to <2 x i32>
-+  %bitcast15 = bitcast i64 %elem15 to <2 x i32>
-+  %low0 = shufflevector <2 x i32> %bitcast0, <2 x i32> %bitcast1,
-+                        <2 x i32> <i32 1, i32 3>
-+  %low1 = shufflevector <2 x i32> %bitcast2, <2 x i32> %bitcast3,
-+                        <2 x i32> <i32 1, i32 3>
-+  %low2 = shufflevector <2 x i32> %bitcast4, <2 x i32> %bitcast5,
-+                        <2 x i32> <i32 1, i32 3>
-+  %low3 = shufflevector <2 x i32> %bitcast6, <2 x i32> %bitcast7,
-+                        <2 x i32> <i32 1, i32 3>
-+  %low4 = shufflevector <2 x i32> %bitcast8, <2 x i32> %bitcast9,
-+                        <2 x i32> <i32 1, i32 3>
-+  %low5 = shufflevector <2 x i32> %bitcast10, <2 x i32> %bitcast11,
-+                        <2 x i32> <i32 1, i32 3>
-+  %low6 = shufflevector <2 x i32> %bitcast12, <2 x i32> %bitcast13,
-+                        <2 x i32> <i32 1, i32 3>
-+  %low7 = shufflevector <2 x i32> %bitcast14, <2 x i32> %bitcast15,
-+                        <2 x i32> <i32 1, i32 3>
-+  %half0 = bitcast <2 x i32> %low0 to <4 x i16>
-+  %half1 = bitcast <2 x i32> %low1 to <4 x i16>
-+  %half2 = bitcast <2 x i32> %low2 to <4 x i16>
-+  %half3 = bitcast <2 x i32> %low3 to <4 x i16>
-+  %half4 = bitcast <2 x i32> %low4 to <4 x i16>
-+  %half5 = bitcast <2 x i32> %low5 to <4 x i16>
-+  %half6 = bitcast <2 x i32> %low6 to <4 x i16>
-+  %half7 = bitcast <2 x i32> %low7 to <4 x i16>
-+  %hlow0 = shufflevector <4 x i16> %half0, <4 x i16> %half1,
-+                         <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-+  %hlow1 = shufflevector <4 x i16> %half2, <4 x i16> %half3,
-+                         <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-+  %hlow2 = shufflevector <4 x i16> %half4, <4 x i16> %half5,
-+                         <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-+  %hlow3 = shufflevector <4 x i16> %half6, <4 x i16> %half7,
-+                         <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-+  %bytes0 = bitcast <4 x i16> %hlow0 to <8 x i8>
-+  %bytes1 = bitcast <4 x i16> %hlow1 to <8 x i8>
-+  %bytes2 = bitcast <4 x i16> %hlow2 to <8 x i8>
-+  %bytes3 = bitcast <4 x i16> %hlow3 to <8 x i8>
-+  %join0 = shufflevector <8 x i8> %bytes0, <8 x i8> %bytes1,
-+                         <8 x i32> <i32 1, i32 3, i32 5, i32 7,
-+                                    i32 9, i32 11, i32 13, i32 15>
-+  %join1 = shufflevector <8 x i8> %bytes2, <8 x i8> %bytes3,
-+                         <8 x i32> <i32 1, i32 3, i32 5, i32 7,
-+                                    i32 9, i32 11, i32 13, i32 15>
-+  %ret = shufflevector <8 x i8> %join0, <8 x i8> %join1,
-+                       <16 x i32> <i32 0, i32 1, i32 2, i32 3,
-+                                   i32 4, i32 5, i32 6, i32 7,
-+                                   i32 8, i32 9, i32 10, i32 11,
-+                                   i32 12, i32 13, i32 14, i32 15>
-+  ret <16 x i8> %ret
-+}
-+
-+; Test a <2 x i64> -> <4 x f32> pack in which only individual elements are
-+; needed.
-+define float @f8(i64 %scalar0, i64 %scalar1, i64 %scalar2, i64 %scalar3) {
-+; CHECK-LABEL: f8:
-+; CHECK-NOT: vperm
-+; CHECK-NOT: vpk
-+; CHECK-NOT: vmrh
-+; CHECK: aebr {{%f[0-7]}},
-+; CHECK: aebr {{%f[0-7]}},
-+; CHECK: meebr %f0,
-+; CHECK: br %r14
-+  %vec0 = insertelement <2 x i64> undef, i64 %scalar0, i32 0
-+  %vec1 = insertelement <2 x i64> undef, i64 %scalar1, i32 0
-+  %vec2 = insertelement <2 x i64> undef, i64 %scalar2, i32 0
-+  %vec3 = insertelement <2 x i64> undef, i64 %scalar3, i32 0
-+  %join0 = shufflevector <2 x i64> %vec0, <2 x i64> %vec1,
-+                         <2 x i32> <i32 0, i32 2>
-+  %join1 = shufflevector <2 x i64> %vec2, <2 x i64> %vec3,
-+                         <2 x i32> <i32 0, i32 2>
-+  %bitcast0 = bitcast <2 x i64> %join0 to <4 x float>
-+  %bitcast1 = bitcast <2 x i64> %join1 to <4 x float>
-+  %pack = shufflevector <4 x float> %bitcast0, <4 x float> %bitcast1,
-+                        <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-+  %elt0 = extractelement <4 x float> %pack, i32 0
-+  %elt1 = extractelement <4 x float> %pack, i32 1
-+  %elt2 = extractelement <4 x float> %pack, i32 2
-+  %elt3 = extractelement <4 x float> %pack, i32 3
-+  %add0 = fadd float %elt0, %elt2
-+  %add1 = fadd float %elt1, %elt3
-+  %ret = fmul float %add0, %add1
-+  ret float %ret
-+}
-+
-+; Test a <2 x f64> -> <4 x i32> pack in which only individual elements are
-+; needed.
-+define i32 @f9(double %scalar0, double %scalar1, double %scalar2,
-+               double %scalar3) {
-+; CHECK-LABEL: f9:
-+; CHECK-NOT: vperm
-+; CHECK-NOT: vpk
-+; CHECK-NOT: vmrh
-+; CHECK: ar {{%r[0-5]}},
-+; CHECK: ar {{%r[0-5]}},
-+; CHECK: or %r2,
-+; CHECK: br %r14
-+  %vec0 = insertelement <2 x double> undef, double %scalar0, i32 0
-+  %vec1 = insertelement <2 x double> undef, double %scalar1, i32 0
-+  %vec2 = insertelement <2 x double> undef, double %scalar2, i32 0
-+  %vec3 = insertelement <2 x double> undef, double %scalar3, i32 0
-+  %join0 = shufflevector <2 x double> %vec0, <2 x double> %vec1,
-+                         <2 x i32> <i32 0, i32 2>
-+  %join1 = shufflevector <2 x double> %vec2, <2 x double> %vec3,
-+                         <2 x i32> <i32 0, i32 2>
-+  %bitcast0 = bitcast <2 x double> %join0 to <4 x i32>
-+  %bitcast1 = bitcast <2 x double> %join1 to <4 x i32>
-+  %pack = shufflevector <4 x i32> %bitcast0, <4 x i32> %bitcast1,
-+                        <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-+  %elt0 = extractelement <4 x i32> %pack, i32 0
-+  %elt1 = extractelement <4 x i32> %pack, i32 1
-+  %elt2 = extractelement <4 x i32> %pack, i32 2
-+  %elt3 = extractelement <4 x i32> %pack, i32 3
-+  %add0 = add i32 %elt0, %elt2
-+  %add1 = add i32 %elt1, %elt3
-+  %ret = or i32 %add0, %add1
-+  ret i32 %ret
-+}
-Index: llvm-36/test/CodeGen/SystemZ/vec-const-01.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-const-01.ll
-@@ -0,0 +1,103 @@
-+; Test vector byte masks, v16i8 version.
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-+
-+; Test an all-zeros vector.
-+define <16 x i8> @f1() {
-+; CHECK-LABEL: f1:
-+; CHECK: vgbm %v24, 0
-+; CHECK: br %r14
-+  ret <16 x i8> zeroinitializer
-+}
-+
-+; Test an all-ones vector.
-+define <16 x i8> @f2() {
-+; CHECK-LABEL: f2:
-+; CHECK: vgbm %v24, 65535
-+; CHECK: br %r14
-+  ret <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1,
-+                 i8 -1, i8 -1, i8 -1, i8 -1,
-+                 i8 -1, i8 -1, i8 -1, i8 -1,
-+                 i8 -1, i8 -1, i8 -1, i8 -1>
-+}
-+
-+; Test a mixed vector (mask 0x8c75).
-+define <16 x i8> @f3() {
-+; CHECK-LABEL: f3:
-+; CHECK: vgbm %v24, 35957
-+; CHECK: br %r14
-+  ret <16 x i8> <i8 -1, i8 0, i8 0, i8 0,
-+                 i8 -1, i8 -1, i8 0, i8 0,
-+                 i8 0, i8 -1, i8 -1, i8 -1,
-+                 i8 0, i8 -1, i8 0, i8 -1>
-+}
-+
-+; Test that undefs are treated as zero.
-+define <16 x i8> @f4() {
-+; CHECK-LABEL: f4:
-+; CHECK: vgbm %v24, 35957
-+; CHECK: br %r14
-+  ret <16 x i8> <i8 -1, i8 undef, i8 undef, i8 undef,
-+                 i8 -1, i8 -1, i8 undef, i8 undef,
-+                 i8 undef, i8 -1, i8 -1, i8 -1,
-+                 i8 undef, i8 -1, i8 undef, i8 -1>
-+}
-+
-+; Test that we don't use VGBM if one of the bytes is not 0 or 0xff.
-+define <16 x i8> @f5() {
-+; CHECK-LABEL: f5:
-+; CHECK-NOT: vgbm
-+; CHECK: br %r14
-+  ret <16 x i8> <i8 -1, i8 0, i8 0, i8 0,
-+                 i8 -1, i8 -1, i8 0, i8 1,
-+                 i8 0, i8 -1, i8 -1, i8 -1,
-+                 i8 0, i8 -1, i8 0, i8 -1>
-+}
-+
-+; Test an all-zeros v2i8 that gets promoted to v16i8.
-+define <2 x i8> @f6() {
-+; CHECK-LABEL: f6:
-+; CHECK: vgbm %v24, 0
-+; CHECK: br %r14
-+  ret <2 x i8> zeroinitializer
-+}
-+
-+; Test a mixed v2i8 that gets promoted to v16i8 (mask 0x8000).
-+define <2 x i8> @f7() {
-+; CHECK-LABEL: f7:
-+; CHECK: vgbm %v24, 32768
-+; CHECK: br %r14
-+  ret <2 x i8> <i8 255, i8 0>
-+}
-+
-+; Test an all-zeros v4i8 that gets promoted to v16i8.
-+define <4 x i8> @f8() {
-+; CHECK-LABEL: f8:
-+; CHECK: vgbm %v24, 0
-+; CHECK: br %r14
-+  ret <4 x i8> zeroinitializer
-+}
-+
-+; Test a mixed v4i8 that gets promoted to v16i8 (mask 0x9000).
-+define <4 x i8> @f9() {
-+; CHECK-LABEL: f9:
-+; CHECK: vgbm %v24, 36864
-+; CHECK: br %r14
-+  ret <4 x i8> <i8 255, i8 0, i8 0, i8 255>
-+}
-+
-+; Test an all-zeros v8i8 that gets promoted to v16i8.
-+define <8 x i8> @f10() {
-+; CHECK-LABEL: f10:
-+; CHECK: vgbm %v24, 0
-+; CHECK: br %r14
-+  ret <8 x i8> zeroinitializer
-+}
-+
-+; Test a mixed v8i8 that gets promoted to v16i8 (mask 0xE500).
-+define <8 x i8> @f11() {
-+; CHECK-LABEL: f11:
-+; CHECK: vgbm %v24, 58624
-+; CHECK: br %r14
-+  ret <8 x i8> <i8 255, i8 255, i8 255, i8 0, i8 0, i8 255, i8 0, i8 255>
-+}
-Index: llvm-36/test/CodeGen/SystemZ/vec-const-02.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-const-02.ll
-@@ -0,0 +1,79 @@
-+; Test vector byte masks, v8i16 version.
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-+
-+; Test an all-zeros vector.
-+define <8 x i16> @f1() {
-+; CHECK-LABEL: f1:
-+; CHECK: vgbm %v24, 0
-+; CHECK: br %r14
-+  ret <8 x i16> zeroinitializer
-+}
-+
-+; Test an all-ones vector.
-+define <8 x i16> @f2() {
-+; CHECK-LABEL: f2:
-+; CHECK: vgbm %v24, 65535
-+; CHECK: br %r14
-+  ret <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -1,
-+                 i16 -1, i16 -1, i16 -1, i16 -1>
-+}
-+
-+; Test a mixed vector (mask 0x8c76).
-+define <8 x i16> @f3() {
-+; CHECK-LABEL: f3:
-+; CHECK: vgbm %v24, 35958
-+; CHECK: br %r14
-+  ret <8 x i16> <i16 65280, i16 0, i16 65535, i16 0,
-+                 i16 255, i16 65535, i16 255, i16 65280>
-+}
-+
-+; Test that undefs are treated as zero.
-+define <8 x i16> @f4() {
-+; CHECK-LABEL: f4:
-+; CHECK: vgbm %v24, 35958
-+; CHECK: br %r14
-+  ret <8 x i16> <i16 65280, i16 undef, i16 65535, i16 undef,
-+                 i16 255, i16 65535, i16 255, i16 65280>
-+}
-+
-+; Test that we don't use VGBM if one of the bytes is not 0 or 0xff.
-+define <8 x i16> @f5() {
-+; CHECK-LABEL: f5:
-+; CHECK-NOT: vgbm
-+; CHECK: br %r14
-+  ret <8 x i16> <i16 65280, i16 0, i16 65535, i16 0,
-+                 i16 255, i16 65535, i16 256, i16 65280>
-+}
-+
-+; Test an all-zeros v2i16 that gets promoted to v8i16.
-+define <2 x i16> @f6() {
-+; CHECK-LABEL: f6:
-+; CHECK: vgbm %v24, 0
-+; CHECK: br %r14
-+  ret <2 x i16> zeroinitializer
-+}
-+
-+; Test a mixed v2i16 that gets promoted to v8i16 (mask 0xc000).
-+define <2 x i16> @f7() {
-+; CHECK-LABEL: f7:
-+; CHECK: vgbm %v24, 49152
-+; CHECK: br %r14
-+  ret <2 x i16> <i16 65535, i16 0>
-+}
-+
-+; Test an all-zeros v4i16 that gets promoted to v8i16.
-+define <4 x i16> @f8() {
-+; CHECK-LABEL: f8:
-+; CHECK: vgbm %v24, 0
-+; CHECK: br %r14
-+  ret <4 x i16> zeroinitializer
-+}
-+
-+; Test a mixed v4i16 that gets promoted to v8i16 (mask 0x7200).
-+define <4 x i16> @f9() {
-+; CHECK-LABEL: f9:
-+; CHECK: vgbm %v24, 29184
-+; CHECK: br %r14
-+  ret <4 x i16> <i16 255, i16 65535, i16 0, i16 65280>
-+}
-Index: llvm-36/test/CodeGen/SystemZ/vec-const-03.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-const-03.ll
-@@ -0,0 +1,59 @@
-+; Test vector byte masks, v4i32 version.
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-+
-+; Test an all-zeros vector.
-+define <4 x i32> @f1() {
-+; CHECK-LABEL: f1:
-+; CHECK: vgbm %v24, 0
-+; CHECK: br %r14
-+  ret <4 x i32> zeroinitializer
-+}
-+
-+; Test an all-ones vector.
-+define <4 x i32> @f2() {
-+; CHECK-LABEL: f2:
-+; CHECK: vgbm %v24, 65535
-+; CHECK: br %r14
-+  ret <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
-+}
-+
-+; Test a mixed vector (mask 0x8c76).
-+define <4 x i32> @f3() {
-+; CHECK-LABEL: f3:
-+; CHECK: vgbm %v24, 35958
-+; CHECK: br %r14
-+  ret <4 x i32> <i32 4278190080, i32 4294901760, i32 16777215, i32 16776960>
-+}
-+
-+; Test that undefs are treated as zero (mask 0x8076).
-+define <4 x i32> @f4() {
-+; CHECK-LABEL: f4:
-+; CHECK: vgbm %v24, 32886
-+; CHECK: br %r14
-+  ret <4 x i32> <i32 4278190080, i32 undef, i32 16777215, i32 16776960>
-+}
-+
-+; Test that we don't use VGBM if one of the bytes is not 0 or 0xff.
-+define <4 x i32> @f5() {
-+; CHECK-LABEL: f5:
-+; CHECK-NOT: vgbm
-+; CHECK: br %r14
-+  ret <4 x i32> <i32 4278190080, i32 1, i32 16777215, i32 16776960>
-+}
-+
-+; Test an all-zeros v2i32 that gets promoted to v4i32.
-+define <2 x i32> @f6() {
-+; CHECK-LABEL: f6:
-+; CHECK: vgbm %v24, 0
-+; CHECK: br %r14
-+  ret <2 x i32> zeroinitializer
-+}
-+
-+; Test a mixed v2i32 that gets promoted to v4i32 (mask 0xae00).
-+define <2 x i32> @f7() {
-+; CHECK-LABEL: f7:
-+; CHECK: vgbm %v24, 44544
-+; CHECK: br %r14
-+  ret <2 x i32> <i32 4278255360, i32 -256>
-+}
-Index: llvm-36/test/CodeGen/SystemZ/vec-const-04.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-const-04.ll
-@@ -0,0 +1,43 @@
-+; Test vector byte masks, v2i64 version.
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-+
-+; Test an all-zeros vector.
-+define <2 x i64> @f1() {
-+; CHECK-LABEL: f1:
-+; CHECK: vgbm %v24, 0
-+; CHECK: br %r14
-+  ret <2 x i64> zeroinitializer
-+}
-+
-+; Test an all-ones vector.
-+define <2 x i64> @f2() {
-+; CHECK-LABEL: f2:
-+; CHECK: vgbm %v24, 65535
-+; CHECK: br %r14
-+  ret <2 x i64> <i64 -1, i64 -1>
-+}
-+
-+; Test a mixed vector (mask 0x8c76).
-+define <2 x i64> @f3() {
-+; CHECK-LABEL: f3:
-+; CHECK: vgbm %v24, 35958
-+; CHECK: br %r14
-+  ret <2 x i64> <i64 18374686483966525440, i64 72057589759737600>
-+}
-+
-+; Test that undefs are treated as zero (mask 0x8c00).
-+define <2 x i64> @f4() {
-+; CHECK-LABEL: f4:
-+; CHECK: vgbm %v24, 35840
-+; CHECK: br %r14
-+  ret <2 x i64> <i64 18374686483966525440, i64 undef>
-+}
-+
-+; Test that we don't use VGBM if one of the bytes is not 0 or 0xff.
-+define <2 x i64> @f5() {
-+; CHECK-LABEL: f5:
-+; CHECK-NOT: vgbm
-+; CHECK: br %r14
-+  ret <2 x i64> <i64 18374686483966525441, i64 72057589759737600>
-+}
-Index: llvm-36/test/CodeGen/SystemZ/vec-const-05.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-const-05.ll
-@@ -0,0 +1,63 @@
-+; Test vector byte masks, v4f32 version.
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-+
-+; Test an all-zeros vector.
-+define <4 x float> @f1() {
-+; CHECK-LABEL: f1:
-+; CHECK: vgbm %v24, 0
-+; CHECK: br %r14
-+  ret <4 x float> zeroinitializer
-+}
-+
-+; Test an all-ones vector.
-+define <4 x float> @f2() {
-+; CHECK-LABEL: f2:
-+; CHECK: vgbm %v24, 65535
-+; CHECK: br %r14
-+  ret <4 x float> <float 0xffffffffe0000000, float 0xffffffffe0000000,
-+                   float 0xffffffffe0000000, float 0xffffffffe0000000>
-+}
-+
-+; Test a mixed vector (mask 0xc731).
-+define <4 x float> @f3() {
-+; CHECK-LABEL: f3:
-+; CHECK: vgbm %v24, 50993
-+; CHECK: br %r14
-+  ret <4 x float> <float 0xffffe00000000000, float 0x381fffffe0000000,
-+                   float 0x379fffe000000000, float 0x371fe00000000000>
-+}
-+
-+; Test that undefs are treated as zero (mask 0xc031).
-+define <4 x float> @f4() {
-+; CHECK-LABEL: f4:
-+; CHECK: vgbm %v24, 49201
-+; CHECK: br %r14
-+  ret <4 x float> <float 0xffffe00000000000, float undef,
-+                   float 0x379fffe000000000, float 0x371fe00000000000>
-+}
-+
-+; Test that we don't use VGBM if one of the bytes is not 0 or 0xff.
-+define <4 x float> @f5() {
-+; CHECK-LABEL: f5:
-+; CHECK-NOT: vgbm
-+; CHECK: br %r14
-+  ret <4 x float> <float 0xffffe00000000000, float 0x381fffffc0000000,
-+                   float 0x379fffe000000000, float 0x371fe00000000000>
-+}
-+
-+; Test an all-zeros v2f32 that gets promoted to v4f32.
-+define <2 x float> @f6() {
-+; CHECK-LABEL: f6:
-+; CHECK: vgbm %v24, 0
-+; CHECK: br %r14
-+  ret <2 x float> zeroinitializer
-+}
-+
-+; Test a mixed v2f32 that gets promoted to v4f32 (mask 0xc700).
-+define <2 x float> @f7() {
-+; CHECK-LABEL: f7:
-+; CHECK: vgbm %v24, 50944
-+; CHECK: br %r14
-+  ret <2 x float> <float 0xffffe00000000000, float 0x381fffffe0000000>
-+}
-Index: llvm-36/test/CodeGen/SystemZ/vec-const-06.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-const-06.ll
-@@ -0,0 +1,43 @@
-+; Test vector byte masks, v2f64 version.
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-+
-+; Test an all-zeros vector.
-+define <2 x double> @f1() {
-+; CHECK-LABEL: f1:
-+; CHECK: vgbm %v24, 0
-+; CHECK: br %r14
-+  ret <2 x double> zeroinitializer
-+}
-+
-+; Test an all-ones vector.
-+define <2 x double> @f2() {
-+; CHECK-LABEL: f2:
-+; CHECK: vgbm %v24, 65535
-+; CHECK: br %r14
-+  ret <2 x double> <double 0xffffffffffffffff, double 0xffffffffffffffff>
-+}
-+
-+; Test a mixed vector (mask 0x8c76).
-+define <2 x double> @f3() {
-+; CHECK-LABEL: f3:
-+; CHECK: vgbm %v24, 35958
-+; CHECK: br %r14
-+  ret <2 x double> <double 0xff000000ffff0000, double 0x00ffffff00ffff00>
-+}
-+
-+; Test that undefs are treated as zero (mask 0x8c00).
-+define <2 x double> @f4() {
-+; CHECK-LABEL: f4:
-+; CHECK: vgbm %v24, 35840
-+; CHECK: br %r14
-+  ret <2 x double> <double 0xff000000ffff0000, double undef>
-+}
-+
-+; Test that we don't use VGBM if one of the bytes is not 0 or 0xff.
-+define <2 x double> @f5() {
-+; CHECK-LABEL: f5:
-+; CHECK-NOT: vgbm
-+; CHECK: br %r14
-+  ret <2 x double> <double 0xfe000000ffff0000, double 0x00ffffff00ffff00>
-+}
-Index: llvm-36/test/CodeGen/SystemZ/vec-const-07.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-const-07.ll
-@@ -0,0 +1,229 @@
-+; Test vector replicates, v16i8 version.
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-+
-+; Test a byte-granularity replicate with the lowest useful value.
-+define <16 x i8> @f1() {
-+; CHECK-LABEL: f1:
-+; CHECK: vrepib %v24, 1
-+; CHECK: br %r14
-+  ret <16 x i8> <i8 1, i8 1, i8 1, i8 1,
-+                 i8 1, i8 1, i8 1, i8 1,
-+                 i8 1, i8 1, i8 1, i8 1,
-+                 i8 1, i8 1, i8 1, i8 1>
-+}
-+
-+; Test a byte-granularity replicate with an arbitrary value.
-+define <16 x i8> @f2() {
-+; CHECK-LABEL: f2:
-+; CHECK: vrepib %v24, -55
-+; CHECK: br %r14
-+  ret <16 x i8> <i8 201, i8 201, i8 201, i8 201,
-+                 i8 201, i8 201, i8 201, i8 201,
-+                 i8 201, i8 201, i8 201, i8 201,
-+                 i8 201, i8 201, i8 201, i8 201>
-+}
-+
-+; Test a byte-granularity replicate with the highest useful value.
-+define <16 x i8> @f3() {
-+; CHECK-LABEL: f3:
-+; CHECK: vrepib %v24, -2
-+; CHECK: br %r14
-+  ret <16 x i8> <i8 254, i8 254, i8 254, i8 254,
-+                 i8 254, i8 254, i8 254, i8 254,
-+                 i8 254, i8 254, i8 254, i8 254,
-+                 i8 254, i8 254, i8 254, i8 254>
-+}
-+
-+; Test a halfword-granularity replicate with the lowest useful value.
-+define <16 x i8> @f4() {
-+; CHECK-LABEL: f4:
-+; CHECK: vrepih %v24, 1
-+; CHECK: br %r14
-+  ret <16 x i8> <i8 0, i8 1, i8 0, i8 1,
-+                 i8 0, i8 1, i8 0, i8 1,
-+                 i8 0, i8 1, i8 0, i8 1,
-+                 i8 0, i8 1, i8 0, i8 1>
-+}
-+
-+; Test a halfword-granularity replicate with an arbitrary value.
-+define <16 x i8> @f5() {
-+; CHECK-LABEL: f5:
-+; CHECK: vrepih %v24, 25650
-+; CHECK: br %r14
-+  ret <16 x i8> <i8 100, i8 50, i8 100, i8 50,
-+                 i8 100, i8 50, i8 100, i8 50,
-+                 i8 100, i8 50, i8 100, i8 50,
-+                 i8 100, i8 50, i8 100, i8 50>
-+}
-+
-+; Test a halfword-granularity replicate with the highest useful value.
-+define <16 x i8> @f6() {
-+; CHECK-LABEL: f6:
-+; CHECK: vrepih %v24, -2
-+; CHECK: br %r14
-+  ret <16 x i8> <i8 255, i8 254, i8 255, i8 254,
-+                 i8 255, i8 254, i8 255, i8 254,
-+                 i8 255, i8 254, i8 255, i8 254,
-+                 i8 255, i8 254, i8 255, i8 254>
-+}
-+
-+; Test a word-granularity replicate with the lowest useful positive value.
-+define <16 x i8> @f7() {
-+; CHECK-LABEL: f7:
-+; CHECK: vrepif %v24, 1
-+; CHECK: br %r14
-+  ret <16 x i8> <i8 0, i8 0, i8 0, i8 1,
-+                 i8 0, i8 0, i8 0, i8 1,
-+                 i8 0, i8 0, i8 0, i8 1,
-+                 i8 0, i8 0, i8 0, i8 1>
-+}
-+
-+; Test a word-granularity replicate with the highest in-range value.
-+define <16 x i8> @f8() {
-+; CHECK-LABEL: f8:
-+; CHECK: vrepif %v24, 32767
-+; CHECK: br %r14
-+  ret <16 x i8> <i8 0, i8 0, i8 127, i8 255,
-+                 i8 0, i8 0, i8 127, i8 255,
-+                 i8 0, i8 0, i8 127, i8 255,
-+                 i8 0, i8 0, i8 127, i8 255>
-+}
-+
-+; Test a word-granularity replicate with the next highest value.
-+; This cannot use VREPIF.
-+define <16 x i8> @f9() {
-+; CHECK-LABEL: f9:
-+; CHECK-NOT: vrepif
-+; CHECK: br %r14
-+  ret <16 x i8> <i8 0, i8 0, i8 128, i8 0,
-+                 i8 0, i8 0, i8 128, i8 0,
-+                 i8 0, i8 0, i8 128, i8 0,
-+                 i8 0, i8 0, i8 128, i8 0>
-+}
-+
-+; Test a word-granularity replicate with the lowest in-range value.
-+define <16 x i8> @f10() {
-+; CHECK-LABEL: f10:
-+; CHECK: vrepif %v24, -32768
-+; CHECK: br %r14
-+  ret <16 x i8> <i8 255, i8 255, i8 128, i8 0,
-+                 i8 255, i8 255, i8 128, i8 0,
-+                 i8 255, i8 255, i8 128, i8 0,
-+                 i8 255, i8 255, i8 128, i8 0>
-+}
-+
-+; Test a word-granularity replicate with the next lowest value.
-+; This cannot use VREPIF.
-+define <16 x i8> @f11() {
-+; CHECK-LABEL: f11:
-+; CHECK-NOT: vrepif
-+; CHECK: br %r14
-+  ret <16 x i8> <i8 255, i8 255, i8 127, i8 255,
-+                 i8 255, i8 255, i8 127, i8 255,
-+                 i8 255, i8 255, i8 127, i8 255,
-+                 i8 255, i8 255, i8 127, i8 255>
-+}
-+
-+; Test a word-granularity replicate with the highest useful negative value.
-+define <16 x i8> @f12() {
-+; CHECK-LABEL: f12:
-+; CHECK: vrepif %v24, -2
-+; CHECK: br %r14
-+  ret <16 x i8> <i8 255, i8 255, i8 255, i8 254,
-+                 i8 255, i8 255, i8 255, i8 254,
-+                 i8 255, i8 255, i8 255, i8 254,
-+                 i8 255, i8 255, i8 255, i8 254>
-+}
-+
-+; Test a doubleword-granularity replicate with the lowest useful positive
-+; value.
-+define <16 x i8> @f13() {
-+; CHECK-LABEL: f13:
-+; CHECK: vrepig %v24, 1
-+; CHECK: br %r14
-+  ret <16 x i8> <i8 0, i8 0, i8 0, i8 0,
-+                 i8 0, i8 0, i8 0, i8 1,
-+                 i8 0, i8 0, i8 0, i8 0,
-+                 i8 0, i8 0, i8 0, i8 1>
-+}
-+
-+; Test a doubleword-granularity replicate with the highest in-range value.
-+define <16 x i8> @f14() {
-+; CHECK-LABEL: f14:
-+; CHECK: vrepig %v24, 32767
-+; CHECK: br %r14
-+  ret <16 x i8> <i8 0, i8 0, i8 0, i8 0,
-+                 i8 0, i8 0, i8 127, i8 255,
-+                 i8 0, i8 0, i8 0, i8 0,
-+                 i8 0, i8 0, i8 127, i8 255>
-+}
-+
-+; Test a doubleword-granularity replicate with the next highest value.
-+; This cannot use VREPIG.
-+define <16 x i8> @f15() {
-+; CHECK-LABEL: f15:
-+; CHECK-NOT: vrepig
-+; CHECK: br %r14
-+  ret <16 x i8> <i8 0, i8 0, i8 0, i8 0,
-+                 i8 0, i8 0, i8 128, i8 0,
-+                 i8 0, i8 0, i8 0, i8 0,
-+                 i8 0, i8 0, i8 128, i8 0>
-+}
-+
-+; Test a doubleword-granularity replicate with the lowest in-range value.
-+define <16 x i8> @f16() {
-+; CHECK-LABEL: f16:
-+; CHECK: vrepig %v24, -32768
-+; CHECK: br %r14
-+  ret <16 x i8> <i8 255, i8 255, i8 255, i8 255,
-+                 i8 255, i8 255, i8 128, i8 0,
-+                 i8 255, i8 255, i8 255, i8 255,
-+                 i8 255, i8 255, i8 128, i8 0>
-+}
-+
-+; Test a doubleword-granularity replicate with the next lowest value.
-+; This cannot use VREPIG.
-+define <16 x i8> @f17() {
-+; CHECK-LABEL: f17:
-+; CHECK-NOT: vrepig
-+; CHECK: br %r14
-+  ret <16 x i8> <i8 255, i8 255, i8 255, i8 255,
-+                 i8 255, i8 255, i8 127, i8 255,
-+                 i8 255, i8 255, i8 255, i8 255,
-+                 i8 255, i8 255, i8 127, i8 255>
-+}
-+
-+; Test a doubleword-granularity replicate with the highest useful negative
-+; value.
-+define <16 x i8> @f18() {
-+; CHECK-LABEL: f18:
-+; CHECK: vrepig %v24, -2
-+; CHECK: br %r14
-+  ret <16 x i8> <i8 255, i8 255, i8 255, i8 255,
-+                 i8 255, i8 255, i8 255, i8 254,
-+                 i8 255, i8 255, i8 255, i8 255,
-+                 i8 255, i8 255, i8 255, i8 254>
-+}
-+
-+; Repeat f14 with undefs optimistically treated as 0.
-+define <16 x i8> @f19() {
-+; CHECK-LABEL: f19:
-+; CHECK: vrepig %v24, 32767
-+; CHECK: br %r14
-+  ret <16 x i8> <i8 0, i8 undef, i8 0, i8 0,
-+                 i8 0, i8 0, i8 127, i8 255,
-+                 i8 undef, i8 0, i8 undef, i8 0,
-+                 i8 0, i8 0, i8 127, i8 255>
-+}
-+
-+; Repeat f18 with undefs optimistically treated as -1.
-+define <16 x i8> @f20() {
-+; CHECK-LABEL: f20:
-+; CHECK: vrepig %v24, -2
-+; CHECK: br %r14
-+  ret <16 x i8> <i8 undef, i8 255, i8 255, i8 255,
-+                 i8 255, i8 255, i8 undef, i8 254,
-+                 i8 255, i8 255, i8 255, i8 undef,
-+                 i8 255, i8 undef, i8 255, i8 254>
-+}
-Index: llvm-36/test/CodeGen/SystemZ/vec-const-08.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-const-08.ll
-@@ -0,0 +1,189 @@
-+; Test vector replicates, v8i16 version.
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-+
-+; Test a byte-granularity replicate with the lowest useful value.
-+define <8 x i16> @f1() {
-+; CHECK-LABEL: f1:
-+; CHECK: vrepib %v24, 1
-+; CHECK: br %r14
-+  ret <8 x i16> <i16 257, i16 257, i16 257, i16 257,
-+                 i16 257, i16 257, i16 257, i16 257>
-+}
-+
-+; Test a byte-granularity replicate with an arbitrary value.
-+define <8 x i16> @f2() {
-+; CHECK-LABEL: f2:
-+; CHECK: vrepib %v24, -55
-+; CHECK: br %r14
-+  ret <8 x i16> <i16 51657, i16 51657, i16 51657, i16 51657,
-+                 i16 51657, i16 51657, i16 51657, i16 51657>
-+}
-+
-+; Test a byte-granularity replicate with the highest useful value.
-+define <8 x i16> @f3() {
-+; CHECK-LABEL: f3:
-+; CHECK: vrepib %v24, -2
-+; CHECK: br %r14
-+  ret <8 x i16> <i16 -258, i16 -258, i16 -258, i16 -258,
-+                 i16 -258, i16 -258, i16 -258, i16 -258>
-+}
-+
-+; Test a halfword-granularity replicate with the lowest useful value.
-+define <8 x i16> @f4() {
-+; CHECK-LABEL: f4:
-+; CHECK: vrepih %v24, 1
-+; CHECK: br %r14
-+  ret <8 x i16> <i16 1, i16 1, i16 1, i16 1,
-+                 i16 1, i16 1, i16 1, i16 1>
-+}
-+
-+; Test a halfword-granularity replicate with an arbitrary value.
-+define <8 x i16> @f5() {
-+; CHECK-LABEL: f5:
-+; CHECK: vrepih %v24, 25650
-+; CHECK: br %r14
-+  ret <8 x i16> <i16 25650, i16 25650, i16 25650, i16 25650,
-+                 i16 25650, i16 25650, i16 25650, i16 25650>
-+}
-+
-+; Test a halfword-granularity replicate with the highest useful value.
-+define <8 x i16> @f6() {
-+; CHECK-LABEL: f6:
-+; CHECK: vrepih %v24, -2
-+; CHECK: br %r14
-+  ret <8 x i16> <i16 65534, i16 65534, i16 65534, i16 65534,
-+                 i16 65534, i16 65534, i16 65534, i16 65534>
-+}
-+
-+; Test a word-granularity replicate with the lowest useful positive value.
-+define <8 x i16> @f7() {
-+; CHECK-LABEL: f7:
-+; CHECK: vrepif %v24, 1
-+; CHECK: br %r14
-+  ret <8 x i16> <i16 0, i16 1, i16 0, i16 1,
-+                 i16 0, i16 1, i16 0, i16 1>
-+}
-+
-+; Test a word-granularity replicate with the highest in-range value.
-+define <8 x i16> @f8() {
-+; CHECK-LABEL: f8:
-+; CHECK: vrepif %v24, 32767
-+; CHECK: br %r14
-+  ret <8 x i16> <i16 0, i16 32767, i16 0, i16 32767,
-+                 i16 0, i16 32767, i16 0, i16 32767>
-+}
-+
-+; Test a word-granularity replicate with the next highest value.
-+; This cannot use VREPIF.
-+define <8 x i16> @f9() {
-+; CHECK-LABEL: f9:
-+; CHECK-NOT: vrepif
-+; CHECK: br %r14
-+  ret <8 x i16> <i16 0, i16 32768, i16 0, i16 32768,
-+                 i16 0, i16 32768, i16 0, i16 32768>
-+}
-+
-+; Test a word-granularity replicate with the lowest in-range value.
-+define <8 x i16> @f10() {
-+; CHECK-LABEL: f10:
-+; CHECK: vrepif %v24, -32768
-+; CHECK: br %r14
-+  ret <8 x i16> <i16 -1, i16 -32768, i16 -1, i16 -32768,
-+                 i16 -1, i16 -32768, i16 -1, i16 -32768>
-+}
-+
-+; Test a word-granularity replicate with the next lowest value.
-+; This cannot use VREPIF.
-+define <8 x i16> @f11() {
-+; CHECK-LABEL: f11:
-+; CHECK-NOT: vrepif
-+; CHECK: br %r14
-+  ret <8 x i16> <i16 -1, i16 -32769, i16 -1, i16 -32769,
-+                 i16 -1, i16 -32769, i16 -1, i16 -32769>
-+}
-+
-+; Test a word-granularity replicate with the highest useful negative value.
-+define <8 x i16> @f12() {
-+; CHECK-LABEL: f12:
-+; CHECK: vrepif %v24, -2
-+; CHECK: br %r14
-+  ret <8 x i16> <i16 -1, i16 -2, i16 -1, i16 -2,
-+                 i16 -1, i16 -2, i16 -1, i16 -2>
-+}
-+
-+; Test a doubleword-granularity replicate with the lowest useful positive
-+; value.
-+define <8 x i16> @f13() {
-+; CHECK-LABEL: f13:
-+; CHECK: vrepig %v24, 1
-+; CHECK: br %r14
-+  ret <8 x i16> <i16 0, i16 0, i16 0, i16 1,
-+                 i16 0, i16 0, i16 0, i16 1>
-+}
-+
-+; Test a doubleword-granularity replicate with the highest in-range value.
-+define <8 x i16> @f14() {
-+; CHECK-LABEL: f14:
-+; CHECK: vrepig %v24, 32767
-+; CHECK: br %r14
-+  ret <8 x i16> <i16 0, i16 0, i16 0, i16 32767,
-+                 i16 0, i16 0, i16 0, i16 32767>
-+}
-+
-+; Test a doubleword-granularity replicate with the next highest value.
-+; This cannot use VREPIG.
-+define <8 x i16> @f15() {
-+; CHECK-LABEL: f15:
-+; CHECK-NOT: vrepig
-+; CHECK: br %r14
-+  ret <8 x i16> <i16 0, i16 0, i16 0, i16 32768,
-+                 i16 0, i16 0, i16 0, i16 32768>
-+}
-+
-+; Test a doubleword-granularity replicate with the lowest in-range value.
-+define <8 x i16> @f16() {
-+; CHECK-LABEL: f16:
-+; CHECK: vrepig %v24, -32768
-+; CHECK: br %r14
-+  ret <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -32768,
-+                 i16 -1, i16 -1, i16 -1, i16 -32768>
-+}
-+
-+; Test a doubleword-granularity replicate with the next lowest value.
-+; This cannot use VREPIG.
-+define <8 x i16> @f17() {
-+; CHECK-LABEL: f17:
-+; CHECK-NOT: vrepig
-+; CHECK: br %r14
-+  ret <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -32769,
-+                 i16 -1, i16 -1, i16 -1, i16 -32769>
-+}
-+
-+; Test a doubleword-granularity replicate with the highest useful negative
-+; value.
-+define <8 x i16> @f18() {
-+; CHECK-LABEL: f18:
-+; CHECK: vrepig %v24, -2
-+; CHECK: br %r14
-+  ret <8 x i16> <i16 -1, i16 -1, i16 -1, i16 -2,
-+                 i16 -1, i16 -1, i16 -1, i16 -2>
-+}
-+
-+; Repeat f14 with undefs optimistically treated as 0.
-+define <8 x i16> @f19() {
-+; CHECK-LABEL: f19:
-+; CHECK: vrepig %v24, 32767
-+; CHECK: br %r14
-+  ret <8 x i16> <i16 0, i16 undef, i16 0, i16 32767,
-+                 i16 undef, i16 0, i16 undef, i16 32767>
-+}
-+
-+; Repeat f18 with undefs optimistically treated as -1.
-+define <8 x i16> @f20() {
-+; CHECK-LABEL: f20:
-+; CHECK: vrepig %v24, -2
-+; CHECK: br %r14
-+  ret <8 x i16> <i16 -1, i16 -1, i16 undef, i16 -2,
-+                 i16 undef, i16 undef, i16 -1, i16 -2>
-+}
-Index: llvm-36/test/CodeGen/SystemZ/vec-const-09.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-const-09.ll
-@@ -0,0 +1,169 @@
-+; Test vector replicates, v4i32 version.
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-+
-+; Test a byte-granularity replicate with the lowest useful value.
-+define <4 x i32> @f1() {
-+; CHECK-LABEL: f1:
-+; CHECK: vrepib %v24, 1
-+; CHECK: br %r14
-+  ret <4 x i32> <i32 16843009, i32 16843009, i32 16843009, i32 16843009>
-+}
-+
-+; Test a byte-granularity replicate with an arbitrary value.
-+define <4 x i32> @f2() {
-+; CHECK-LABEL: f2:
-+; CHECK: vrepib %v24, -55
-+; CHECK: br %r14
-+  ret <4 x i32> <i32 3385444809, i32 3385444809, i32 3385444809, i32 3385444809>
-+}
-+
-+; Test a byte-granularity replicate with the highest useful value.
-+define <4 x i32> @f3() {
-+; CHECK-LABEL: f3:
-+; CHECK: vrepib %v24, -2
-+; CHECK: br %r14
-+  ret <4 x i32> <i32 4278124286, i32 4278124286, i32 4278124286, i32 4278124286>
-+}
-+
-+; Test a halfword-granularity replicate with the lowest useful value.
-+define <4 x i32> @f4() {
-+; CHECK-LABEL: f4:
-+; CHECK: vrepih %v24, 1
-+; CHECK: br %r14
-+  ret <4 x i32> <i32 65537, i32 65537, i32 65537, i32 65537>
-+}
-+
-+; Test a halfword-granularity replicate with an arbitrary value.
-+define <4 x i32> @f5() {
-+; CHECK-LABEL: f5:
-+; CHECK: vrepih %v24, 25650
-+; CHECK: br %r14
-+  ret <4 x i32> <i32 1681024050, i32 1681024050, i32 1681024050, i32 1681024050>
-+}
-+
-+; Test a halfword-granularity replicate with the highest useful value.
-+define <4 x i32> @f6() {
-+; CHECK-LABEL: f6:
-+; CHECK: vrepih %v24, -2
-+; CHECK: br %r14
-+  ret <4 x i32> <i32 -65538, i32 -65538, i32 -65538, i32 -65538>
-+}
-+
-+; Test a word-granularity replicate with the lowest useful positive value.
-+define <4 x i32> @f7() {
-+; CHECK-LABEL: f7:
-+; CHECK: vrepif %v24, 1
-+; CHECK: br %r14
-+  ret <4 x i32> <i32 1, i32 1, i32 1, i32 1>
-+}
-+
-+; Test a word-granularity replicate with the highest in-range value.
-+define <4 x i32> @f8() {
-+; CHECK-LABEL: f8:
-+; CHECK: vrepif %v24, 32767
-+; CHECK: br %r14
-+  ret <4 x i32> <i32 32767, i32 32767, i32 32767, i32 32767>
-+}
-+
-+; Test a word-granularity replicate with the next highest value.
-+; This cannot use VREPIF.
-+define <4 x i32> @f9() {
-+; CHECK-LABEL: f9:
-+; CHECK-NOT: vrepif
-+; CHECK: br %r14
-+  ret <4 x i32> <i32 32768, i32 32768, i32 32768, i32 32768>
-+}
-+
-+; Test a word-granularity replicate with the lowest in-range value.
-+define <4 x i32> @f10() {
-+; CHECK-LABEL: f10:
-+; CHECK: vrepif %v24, -32768
-+; CHECK: br %r14
-+  ret <4 x i32> <i32 -32768, i32 -32768, i32 -32768, i32 -32768>
-+}
-+
-+; Test a word-granularity replicate with the next lowest value.
-+; This cannot use VREPIF.
-+define <4 x i32> @f11() {
-+; CHECK-LABEL: f11:
-+; CHECK-NOT: vrepif
-+; CHECK: br %r14
-+  ret <4 x i32> <i32 -32769, i32 -32769, i32 -32769, i32 -32769>
-+}
-+
-+; Test a word-granularity replicate with the highest useful negative value.
-+define <4 x i32> @f12() {
-+; CHECK-LABEL: f12:
-+; CHECK: vrepif %v24, -2
-+; CHECK: br %r14
-+  ret <4 x i32> <i32 -2, i32 -2, i32 -2, i32 -2>
-+}
-+
-+; Test a doubleword-granularity replicate with the lowest useful positive
-+; value.
-+define <4 x i32> @f13() {
-+; CHECK-LABEL: f13:
-+; CHECK: vrepig %v24, 1
-+; CHECK: br %r14
-+  ret <4 x i32> <i32 0, i32 1, i32 0, i32 1>
-+}
-+
-+; Test a doubleword-granularity replicate with the highest in-range value.
-+define <4 x i32> @f14() {
-+; CHECK-LABEL: f14:
-+; CHECK: vrepig %v24, 32767
-+; CHECK: br %r14
-+  ret <4 x i32> <i32 0, i32 32767, i32 0, i32 32767>
-+}
-+
-+; Test a doubleword-granularity replicate with the next highest value.
-+; This cannot use VREPIG.
-+define <4 x i32> @f15() {
-+; CHECK-LABEL: f15:
-+; CHECK-NOT: vrepig
-+; CHECK: br %r14
-+  ret <4 x i32> <i32 0, i32 32768, i32 0, i32 32768>
-+}
-+
-+; Test a doubleword-granularity replicate with the lowest in-range value.
-+define <4 x i32> @f16() {
-+; CHECK-LABEL: f16:
-+; CHECK: vrepig %v24, -32768
-+; CHECK: br %r14
-+  ret <4 x i32> <i32 -1, i32 -32768, i32 -1, i32 -32768>
-+}
-+
-+; Test a doubleword-granularity replicate with the next lowest value.
-+; This cannot use VREPIG.
-+define <4 x i32> @f17() {
-+; CHECK-LABEL: f17:
-+; CHECK-NOT: vrepig
-+; CHECK: br %r14
-+  ret <4 x i32> <i32 -1, i32 -32769, i32 -1, i32 -32769>
-+}
-+
-+; Test a doubleword-granularity replicate with the highest useful negative
-+; value.
-+define <4 x i32> @f18() {
-+; CHECK-LABEL: f18:
-+; CHECK: vrepig %v24, -2
-+; CHECK: br %r14
-+  ret <4 x i32> <i32 -1, i32 -2, i32 -1, i32 -2>
-+}
-+
-+; Repeat f14 with undefs optimistically treated as 0, 32767.
-+define <4 x i32> @f19() {
-+; CHECK-LABEL: f19:
-+; CHECK: vrepig %v24, 32767
-+; CHECK: br %r14
-+  ret <4 x i32> <i32 undef, i32 undef, i32 0, i32 32767>
-+}
-+
-+; Repeat f18 with undefs optimistically treated as -2, -1.
-+define <4 x i32> @f20() {
-+; CHECK-LABEL: f20:
-+; CHECK: vrepig %v24, -2
-+; CHECK: br %r14
-+  ret <4 x i32> <i32 -1, i32 undef, i32 undef, i32 -2>
-+}
-Index: llvm-36/test/CodeGen/SystemZ/vec-const-10.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-const-10.ll
-@@ -0,0 +1,169 @@
-+; Test vector replicates, v2i64 version.
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-+
-+; Test a byte-granularity replicate with the lowest useful value.
-+define <2 x i64> @f1() {
-+; CHECK-LABEL: f1:
-+; CHECK: vrepib %v24, 1
-+; CHECK: br %r14
-+  ret <2 x i64> <i64 72340172838076673, i64 72340172838076673>
-+}
-+
-+; Test a byte-granularity replicate with an arbitrary value.
-+define <2 x i64> @f2() {
-+; CHECK-LABEL: f2:
-+; CHECK: vrepib %v24, -55
-+; CHECK: br %r14
-+  ret <2 x i64> <i64 -3906369333256140343, i64 -3906369333256140343>
-+}
-+
-+; Test a byte-granularity replicate with the highest useful value.
-+define <2 x i64> @f3() {
-+; CHECK-LABEL: f3:
-+; CHECK: vrepib %v24, -2
-+; CHECK: br %r14
-+  ret <2 x i64> <i64 -72340172838076674, i64 -72340172838076674>
-+}
-+
-+; Test a halfword-granularity replicate with the lowest useful value.
-+define <2 x i64> @f4() {
-+; CHECK-LABEL: f4:
-+; CHECK: vrepih %v24, 1
-+; CHECK: br %r14
-+  ret <2 x i64> <i64 281479271743489, i64 281479271743489>
-+}
-+
-+; Test a halfword-granularity replicate with an arbitrary value.
-+define <2 x i64> @f5() {
-+; CHECK-LABEL: f5:
-+; CHECK: vrepih %v24, 25650
-+; CHECK: br %r14
-+  ret <2 x i64> <i64 7219943320220492850, i64 7219943320220492850>
-+}
-+
-+; Test a halfword-granularity replicate with the highest useful value.
-+define <2 x i64> @f6() {
-+; CHECK-LABEL: f6:
-+; CHECK: vrepih %v24, -2
-+; CHECK: br %r14
-+  ret <2 x i64> <i64 -281479271743490, i64 -281479271743490>
-+}
-+
-+; Test a word-granularity replicate with the lowest useful positive value.
-+define <2 x i64> @f7() {
-+; CHECK-LABEL: f7:
-+; CHECK: vrepif %v24, 1
-+; CHECK: br %r14
-+  ret <2 x i64> <i64 4294967297, i64 4294967297>
-+}
-+
-+; Test a word-granularity replicate with the highest in-range value.
-+define <2 x i64> @f8() {
-+; CHECK-LABEL: f8:
-+; CHECK: vrepif %v24, 32767
-+; CHECK: br %r14
-+  ret <2 x i64> <i64 140733193420799, i64 140733193420799>
-+}
-+
-+; Test a word-granularity replicate with the next highest value.
-+; This cannot use VREPIF.
-+define <2 x i64> @f9() {
-+; CHECK-LABEL: f9:
-+; CHECK-NOT: vrepif
-+; CHECK: br %r14
-+  ret <2 x i64> <i64 140737488388096, i64 140737488388096>
-+}
-+
-+; Test a word-granularity replicate with the lowest in-range value.
-+define <2 x i64> @f10() {
-+; CHECK-LABEL: f10:
-+; CHECK: vrepif %v24, -32768
-+; CHECK: br %r14
-+  ret <2 x i64> <i64 -140733193420800, i64 -140733193420800>
-+}
-+
-+; Test a word-granularity replicate with the next lowest value.
-+; This cannot use VREPIF.
-+define <2 x i64> @f11() {
-+; CHECK-LABEL: f11:
-+; CHECK-NOT: vrepif
-+; CHECK: br %r14
-+  ret <2 x i64> <i64 -140737488388097, i64 -140737488388097>
-+}
-+
-+; Test a word-granularity replicate with the highest useful negative value.
-+define <2 x i64> @f12() {
-+; CHECK-LABEL: f12:
-+; CHECK: vrepif %v24, -2
-+; CHECK: br %r14
-+  ret <2 x i64> <i64 -4294967298, i64 -4294967298>
-+}
-+
-+; Test a doubleword-granularity replicate with the lowest useful positive
-+; value.
-+define <2 x i64> @f13() {
-+; CHECK-LABEL: f13:
-+; CHECK: vrepig %v24, 1
-+; CHECK: br %r14
-+  ret <2 x i64> <i64 1, i64 1>
-+}
-+
-+; Test a doubleword-granularity replicate with the highest in-range value.
-+define <2 x i64> @f14() {
-+; CHECK-LABEL: f14:
-+; CHECK: vrepig %v24, 32767
-+; CHECK: br %r14
-+  ret <2 x i64> <i64 32767, i64 32767>
-+}
-+
-+; Test a doubleword-granularity replicate with the next highest value.
-+; This cannot use VREPIG.
-+define <2 x i64> @f15() {
-+; CHECK-LABEL: f15:
-+; CHECK-NOT: vrepig
-+; CHECK: br %r14
-+  ret <2 x i64> <i64 32768, i64 32768>
-+}
-+
-+; Test a doubleword-granularity replicate with the lowest in-range value.
-+define <2 x i64> @f16() {
-+; CHECK-LABEL: f16:
-+; CHECK: vrepig %v24, -32768
-+; CHECK: br %r14
-+  ret <2 x i64> <i64 -32768, i64 -32768>
-+}
-+
-+; Test a doubleword-granularity replicate with the next lowest value.
-+; This cannot use VREPIG.
-+define <2 x i64> @f17() {
-+; CHECK-LABEL: f17:
-+; CHECK-NOT: vrepig
-+; CHECK: br %r14
-+  ret <2 x i64> <i64 -32769, i64 -32769>
-+}
-+
-+; Test a doubleword-granularity replicate with the highest useful negative
-+; value.
-+define <2 x i64> @f18() {
-+; CHECK-LABEL: f18:
-+; CHECK: vrepig %v24, -2
-+; CHECK: br %r14
-+  ret <2 x i64> <i64 -2, i64 -2>
-+}
-+
-+; Repeat f14 with undefs optimistically treated as 32767.
-+define <2 x i64> @f19() {
-+; CHECK-LABEL: f19:
-+; CHECK: vrepig %v24, 32767
-+; CHECK: br %r14
-+  ret <2 x i64> <i64 undef, i64 32767>
-+}
-+
-+; Repeat f18 with undefs optimistically treated as -2.
-+define <2 x i64> @f20() {
-+; CHECK-LABEL: f20:
-+; CHECK: vrepig %v24, -2
-+; CHECK: br %r14
-+  ret <2 x i64> <i64 undef, i64 -2>
-+}
-Index: llvm-36/test/CodeGen/SystemZ/vec-const-11.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-const-11.ll
-@@ -0,0 +1,189 @@
-+; Test vector replicates, v4f32 version.
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-+
-+; Test a byte-granularity replicate with the lowest useful value.
-+define <4 x float> @f1() {
-+; CHECK-LABEL: f1:
-+; CHECK: vrepib %v24, 1
-+; CHECK: br %r14
-+  ret <4 x float> <float 0x3820202020000000, float 0x3820202020000000,
-+                   float 0x3820202020000000, float 0x3820202020000000>
-+}
-+
-+; Test a byte-granularity replicate with an arbitrary value.
-+define <4 x float> @f2() {
-+; CHECK-LABEL: f2:
-+; CHECK: vrepib %v24, -55
-+; CHECK: br %r14
-+  ret <4 x float> <float 0xc139393920000000, float 0xc139393920000000,
-+                   float 0xc139393920000000, float 0xc139393920000000>
-+}
-+
-+; Test a byte-granularity replicate with the highest useful value.
-+define <4 x float> @f3() {
-+; CHECK-LABEL: f3:
-+; CHECK: vrepib %v24, -2
-+; CHECK: br %r14
-+  ret <4 x float> <float 0xc7dfdfdfc0000000, float 0xc7dfdfdfc0000000,
-+                   float 0xc7dfdfdfc0000000, float 0xc7dfdfdfc0000000>
-+}
-+
-+; Test a halfword-granularity replicate with the lowest useful value.
-+define <4 x float> @f4() {
-+; CHECK-LABEL: f4:
-+; CHECK: vrepih %v24, 1
-+; CHECK: br %r14
-+  ret <4 x float> <float 0x37a0001000000000, float 0x37a0001000000000,
-+                   float 0x37a0001000000000, float 0x37a0001000000000>
-+}
-+
-+; Test a halfword-granularity replicate with an arbitrary value.
-+define <4 x float> @f5() {
-+; CHECK-LABEL: f5:
-+; CHECK: vrepih %v24, 25650
-+; CHECK: br %r14
-+  ret <4 x float> <float 0x44864c8640000000, float 0x44864c8640000000,
-+                   float 0x44864c8640000000, float 0x44864c8640000000>
-+}
-+
-+; Test a halfword-granularity replicate with the highest useful value.
-+define <4 x float> @f6() {
-+; CHECK-LABEL: f6:
-+; CHECK: vrepih %v24, -2
-+; CHECK: br %r14
-+  ret <4 x float> <float 0xffffdfffc0000000, float 0xffffdfffc0000000,
-+                   float 0xffffdfffc0000000, float 0xffffdfffc0000000>
-+}
-+
-+; Test a word-granularity replicate with the lowest useful positive value.
-+define <4 x float> @f7() {
-+; CHECK-LABEL: f7:
-+; CHECK: vrepif %v24, 1
-+; CHECK: br %r14
-+  ret <4 x float> <float 0x36a0000000000000, float 0x36a0000000000000,
-+                   float 0x36a0000000000000, float 0x36a0000000000000>
-+}
-+
-+; Test a word-granularity replicate with the highest in-range value.
-+define <4 x float> @f8() {
-+; CHECK-LABEL: f8:
-+; CHECK: vrepif %v24, 32767
-+; CHECK: br %r14
-+  ret <4 x float> <float 0x378fffc000000000, float 0x378fffc000000000,
-+                   float 0x378fffc000000000, float 0x378fffc000000000>
-+}
-+
-+; Test a word-granularity replicate with the next highest value.
-+; This cannot use VREPIF.
-+define <4 x float> @f9() {
-+; CHECK-LABEL: f9:
-+; CHECK-NOT: vrepif
-+; CHECK: br %r14
-+  ret <4 x float> <float 0x3790000000000000, float 0x3790000000000000,
-+                   float 0x3790000000000000, float 0x3790000000000000>
-+}
-+
-+; Test a word-granularity replicate with the lowest in-range value.
-+define <4 x float> @f10() {
-+; CHECK-LABEL: f10:
-+; CHECK: vrepif %v24, -32768
-+; CHECK: br %r14
-+  ret <4 x float> <float 0xfffff00000000000, float 0xfffff00000000000,
-+                   float 0xfffff00000000000, float 0xfffff00000000000>
-+}
-+
-+; Test a word-granularity replicate with the next lowest value.
-+; This cannot use VREPIF.
-+define <4 x float> @f11() {
-+; CHECK-LABEL: f11:
-+; CHECK-NOT: vrepif
-+; CHECK: br %r14
-+  ret <4 x float> <float 0xffffefffe0000000, float 0xffffefffe0000000,
-+                   float 0xffffefffe0000000, float 0xffffefffe0000000>
-+}
-+
-+; Test a word-granularity replicate with the highest useful negative value.
-+define <4 x float> @f12() {
-+; CHECK-LABEL: f12:
-+; CHECK: vrepif %v24, -2
-+; CHECK: br %r14
-+  ret <4 x float> <float 0xffffffffc0000000, float 0xffffffffc0000000,
-+                   float 0xffffffffc0000000, float 0xffffffffc0000000>
-+}
-+
-+; Test a doubleword-granularity replicate with the lowest useful positive
-+; value.
-+define <4 x float> @f13() {
-+; CHECK-LABEL: f13:
-+; CHECK: vrepig %v24, 1
-+; CHECK: br %r14
-+  ret <4 x float> <float 0.0, float 0x36a0000000000000,
-+                   float 0.0, float 0x36a0000000000000>
-+}
-+
-+; Test a doubleword-granularity replicate with the highest in-range value.
-+define <4 x float> @f14() {
-+; CHECK-LABEL: f14:
-+; CHECK: vrepig %v24, 32767
-+; CHECK: br %r14
-+  ret <4 x float> <float 0.0, float 0x378fffc000000000,
-+                   float 0.0, float 0x378fffc000000000>
-+}
-+
-+; Test a doubleword-granularity replicate with the next highest value.
-+; This cannot use VREPIG.
-+define <4 x float> @f15() {
-+; CHECK-LABEL: f15:
-+; CHECK-NOT: vrepig
-+; CHECK: br %r14
-+  ret <4 x float> <float 0.0, float 0x3790000000000000,
-+                   float 0.0, float 0x3790000000000000>
-+}
-+
-+; Test a doubleword-granularity replicate with the lowest in-range value.
-+define <4 x float> @f16() {
-+; CHECK-LABEL: f16:
-+; CHECK: vrepig %v24, -32768
-+; CHECK: br %r14
-+  ret <4 x float> <float 0xffffffffe0000000, float 0xfffff00000000000,
-+                   float 0xffffffffe0000000, float 0xfffff00000000000>
-+}
-+
-+; Test a doubleword-granularity replicate with the next lowest value.
-+; This cannot use VREPIG.
-+define <4 x float> @f17() {
-+; CHECK-LABEL: f17:
-+; CHECK-NOT: vrepig
-+; CHECK: br %r14
-+  ret <4 x float> <float 0xffffffffe0000000, float 0xffffefffe0000000,
-+                   float 0xffffffffe0000000, float 0xffffefffe0000000>
-+}
-+
-+; Test a doubleword-granularity replicate with the highest useful negative
-+; value.
-+define <4 x float> @f18() {
-+; CHECK-LABEL: f18:
-+; CHECK: vrepig %v24, -2
-+; CHECK: br %r14
-+  ret <4 x float> <float 0xffffffffe0000000, float 0xffffffffc0000000,
-+                   float 0xffffffffe0000000, float 0xffffffffc0000000>
-+}
-+
-+; Repeat f14 with undefs optimistically treated as 0, 32767.
-+define <4 x float> @f19() {
-+; CHECK-LABEL: f19:
-+; CHECK: vrepig %v24, 32767
-+; CHECK: br %r14
-+  ret <4 x float> <float undef, float undef,
-+                   float 0.0, float 0x378fffc000000000>
-+}
-+
-+; Repeat f18 with undefs optimistically treated as -2, -1.
-+define <4 x float> @f20() {
-+; CHECK-LABEL: f20:
-+; CHECK: vrepig %v24, -2
-+; CHECK: br %r14
-+  ret <4 x float> <float 0xffffffffe0000000, float undef,
-+                   float undef, float 0xffffffffc0000000>
-+}
-Index: llvm-36/test/CodeGen/SystemZ/vec-const-12.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-const-12.ll
-@@ -0,0 +1,169 @@
-+; Test vector replicates, v2f64 version.
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-+
-+; Test a byte-granularity replicate with the lowest useful value.
-+define <2 x double> @f1() {
-+; CHECK-LABEL: f1:
-+; CHECK: vrepib %v24, 1
-+; CHECK: br %r14
-+  ret <2 x double> <double 0x0101010101010101, double 0x0101010101010101>
-+}
-+
-+; Test a byte-granularity replicate with an arbitrary value.
-+define <2 x double> @f2() {
-+; CHECK-LABEL: f2:
-+; CHECK: vrepib %v24, -55
-+; CHECK: br %r14
-+  ret <2 x double> <double 0xc9c9c9c9c9c9c9c9, double 0xc9c9c9c9c9c9c9c9>
-+}
-+
-+; Test a byte-granularity replicate with the highest useful value.
-+define <2 x double> @f3() {
-+; CHECK-LABEL: f3:
-+; CHECK: vrepib %v24, -2
-+; CHECK: br %r14
-+  ret <2 x double> <double 0xfefefefefefefefe, double 0xfefefefefefefefe>
-+}
-+
-+; Test a halfword-granularity replicate with the lowest useful value.
-+define <2 x double> @f4() {
-+; CHECK-LABEL: f4:
-+; CHECK: vrepih %v24, 1
-+; CHECK: br %r14
-+  ret <2 x double> <double 0x0001000100010001, double 0x0001000100010001>
-+}
-+
-+; Test a halfword-granularity replicate with an arbitrary value.
-+define <2 x double> @f5() {
-+; CHECK-LABEL: f5:
-+; CHECK: vrepih %v24, 25650
-+; CHECK: br %r14
-+  ret <2 x double> <double 0x6432643264326432, double 0x6432643264326432>
-+}
-+
-+; Test a halfword-granularity replicate with the highest useful value.
-+define <2 x double> @f6() {
-+; CHECK-LABEL: f6:
-+; CHECK: vrepih %v24, -2
-+; CHECK: br %r14
-+  ret <2 x double> <double 0xfffefffefffefffe, double 0xfffefffefffefffe>
-+}
-+
-+; Test a word-granularity replicate with the lowest useful positive value.
-+define <2 x double> @f7() {
-+; CHECK-LABEL: f7:
-+; CHECK: vrepif %v24, 1
-+; CHECK: br %r14
-+  ret <2 x double> <double 0x0000000100000001, double 0x0000000100000001>
-+}
-+
-+; Test a word-granularity replicate with the highest in-range value.
-+define <2 x double> @f8() {
-+; CHECK-LABEL: f8:
-+; CHECK: vrepif %v24, 32767
-+; CHECK: br %r14
-+  ret <2 x double> <double 0x00007fff00007fff, double 0x00007fff00007fff>
-+}
-+
-+; Test a word-granularity replicate with the next highest value.
-+; This cannot use VREPIF.
-+define <2 x double> @f9() {
-+; CHECK-LABEL: f9:
-+; CHECK-NOT: vrepif
-+; CHECK: br %r14
-+  ret <2 x double> <double 0x0000800000008000, double 0x0000800000008000>
-+}
-+
-+; Test a word-granularity replicate with the lowest in-range value.
-+define <2 x double> @f10() {
-+; CHECK-LABEL: f10:
-+; CHECK: vrepif %v24, -32768
-+; CHECK: br %r14
-+  ret <2 x double> <double 0xffff8000ffff8000, double 0xffff8000ffff8000>
-+}
-+
-+; Test a word-granularity replicate with the next lowest value.
-+; This cannot use VREPIF.
-+define <2 x double> @f11() {
-+; CHECK-LABEL: f11:
-+; CHECK-NOT: vrepif
-+; CHECK: br %r14
-+  ret <2 x double> <double 0xffff7fffffff7fff, double 0xffff7fffffff7fff>
-+}
-+
-+; Test a word-granularity replicate with the highest useful negative value.
-+define <2 x double> @f12() {
-+; CHECK-LABEL: f12:
-+; CHECK: vrepif %v24, -2
-+; CHECK: br %r14
-+  ret <2 x double> <double 0xfffffffefffffffe, double 0xfffffffefffffffe>
-+}
-+
-+; Test a doubleword-granularity replicate with the lowest useful positive
-+; value.
-+define <2 x double> @f13() {
-+; CHECK-LABEL: f13:
-+; CHECK: vrepig %v24, 1
-+; CHECK: br %r14
-+  ret <2 x double> <double 0x0000000000000001, double 0x0000000000000001>
-+}
-+
-+; Test a doubleword-granularity replicate with the highest in-range value.
-+define <2 x double> @f14() {
-+; CHECK-LABEL: f14:
-+; CHECK: vrepig %v24, 32767
-+; CHECK: br %r14
-+  ret <2 x double> <double 0x0000000000007fff, double 0x0000000000007fff>
-+}
-+
-+; Test a doubleword-granularity replicate with the next highest value.
-+; This cannot use VREPIG.
-+define <2 x double> @f15() {
-+; CHECK-LABEL: f15:
-+; CHECK-NOT: vrepig
-+; CHECK: br %r14
-+  ret <2 x double> <double 0x0000000000008000, double 0x0000000000008000>
-+}
-+
-+; Test a doubleword-granularity replicate with the lowest in-range value.
-+define <2 x double> @f16() {
-+; CHECK-LABEL: f16:
-+; CHECK: vrepig %v24, -32768
-+; CHECK: br %r14
-+  ret <2 x double> <double 0xffffffffffff8000, double 0xffffffffffff8000>
-+}
-+
-+; Test a doubleword-granularity replicate with the next lowest value.
-+; This cannot use VREPIG.
-+define <2 x double> @f17() {
-+; CHECK-LABEL: f17:
-+; CHECK-NOT: vrepig
-+; CHECK: br %r14
-+  ret <2 x double> <double 0xffffffffffff7fff, double 0xffffffffffff7fff>
-+}
-+
-+; Test a doubleword-granularity replicate with the highest useful negative
-+; value.
-+define <2 x double> @f18() {
-+; CHECK-LABEL: f18:
-+; CHECK: vrepig %v24, -2
-+; CHECK: br %r14
-+  ret <2 x double> <double 0xfffffffffffffffe, double 0xfffffffffffffffe>
-+}
-+
-+; Repeat f14 with undefs optimistically treated as 32767.
-+define <2 x double> @f19() {
-+; CHECK-LABEL: f19:
-+; CHECK: vrepig %v24, 32767
-+; CHECK: br %r14
-+  ret <2 x double> <double undef, double 0x0000000000007fff>
-+}
-+
-+; Repeat f18 with undefs optimistically treated as -2.
-+define <2 x double> @f20() {
-+; CHECK-LABEL: f20:
-+; CHECK: vrepig %v24, -2
-+; CHECK: br %r14
-+  ret <2 x double> <double undef, double 0xfffffffffffffffe>
-+}
-Index: llvm-36/test/CodeGen/SystemZ/vec-const-13.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-const-13.ll
-@@ -0,0 +1,193 @@
-+; Test vector replicates that use VECTOR GENERATE MASK, v16i8 version.
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-+
-+; Test a word-granularity replicate with the lowest value that cannot use
-+; VREPIF.
-+define <16 x i8> @f1() {
-+; CHECK-LABEL: f1:
-+; CHECK: vgmf %v24, 16, 16
-+; CHECK: br %r14
-+  ret <16 x i8> <i8 0, i8 0, i8 128, i8 0,
-+                 i8 0, i8 0, i8 128, i8 0,
-+                 i8 0, i8 0, i8 128, i8 0,
-+                 i8 0, i8 0, i8 128, i8 0>
-+}
-+
-+; Test a word-granularity replicate that has the lower 17 bits set.
-+define <16 x i8> @f2() {
-+; CHECK-LABEL: f2:
-+; CHECK: vgmf %v24, 15, 31
-+; CHECK: br %r14
-+  ret <16 x i8> <i8 0, i8 1, i8 255, i8 255,
-+                 i8 0, i8 1, i8 255, i8 255,
-+                 i8 0, i8 1, i8 255, i8 255,
-+                 i8 0, i8 1, i8 255, i8 255>
-+}
-+
-+; Test a word-granularity replicate that has the upper 15 bits set.
-+define <16 x i8> @f3() {
-+; CHECK-LABEL: f3:
-+; CHECK: vgmf %v24, 0, 14
-+; CHECK: br %r14
-+  ret <16 x i8> <i8 255, i8 254, i8 0, i8 0,
-+                 i8 255, i8 254, i8 0, i8 0,
-+                 i8 255, i8 254, i8 0, i8 0,
-+                 i8 255, i8 254, i8 0, i8 0>
-+}
-+
-+; Test a word-granularity replicate that has middle bits set.
-+define <16 x i8> @f4() {
-+; CHECK-LABEL: f4:
-+; CHECK: vgmf %v24, 12, 17
-+; CHECK: br %r14
-+  ret <16 x i8> <i8 0, i8 15, i8 192, i8 0,
-+                 i8 0, i8 15, i8 192, i8 0,
-+                 i8 0, i8 15, i8 192, i8 0,
-+                 i8 0, i8 15, i8 192, i8 0>
-+}
-+
-+; Test a word-granularity replicate with a wrap-around mask.
-+define <16 x i8> @f5() {
-+; CHECK-LABEL: f5:
-+; CHECK: vgmf %v24, 17, 15
-+; CHECK: br %r14
-+  ret <16 x i8> <i8 255, i8 255, i8 127, i8 255,
-+                 i8 255, i8 255, i8 127, i8 255,
-+                 i8 255, i8 255, i8 127, i8 255,
-+                 i8 255, i8 255, i8 127, i8 255>
-+}
-+
-+; Test a doubleword-granularity replicate with the lowest value that cannot
-+; use VREPIG.
-+define <16 x i8> @f6() {
-+; CHECK-LABEL: f6:
-+; CHECK: vgmg %v24, 48, 48
-+; CHECK: br %r14
-+  ret <16 x i8> <i8 0, i8 0, i8 0, i8 0,
-+                 i8 0, i8 0, i8 128, i8 0,
-+                 i8 0, i8 0, i8 0, i8 0,
-+                 i8 0, i8 0, i8 128, i8 0>
-+}
-+
-+; Test a doubleword-granularity replicate that has the lower 22 bits set.
-+define <16 x i8> @f7() {
-+; CHECK-LABEL: f7:
-+; CHECK: vgmg %v24, 42, 63
-+; CHECK: br %r14
-+  ret <16 x i8> <i8 0, i8 0, i8 0, i8 0,
-+                 i8 0, i8 63, i8 255, i8 255,
-+                 i8 0, i8 0, i8 0, i8 0,
-+                 i8 0, i8 63, i8 255, i8 255>
-+}
-+
-+; Test a doubleword-granularity replicate that has the upper 45 bits set.
-+define <16 x i8> @f8() {
-+; CHECK-LABEL: f8:
-+; CHECK: vgmg %v24, 0, 44
-+; CHECK: br %r14
-+  ret <16 x i8> <i8 255, i8 255, i8 255, i8 255,
-+                 i8 255, i8 248, i8 0, i8 0,
-+                 i8 255, i8 255, i8 255, i8 255,
-+                 i8 255, i8 248, i8 0, i8 0>
-+}
-+
-+; Test a doubleword-granularity replicate that has middle bits set.
-+define <16 x i8> @f9() {
-+; CHECK-LABEL: f9:
-+; CHECK: vgmg %v24, 31, 42
-+; CHECK: br %r14
-+  ret <16 x i8> <i8 0, i8 0, i8 0, i8 1,
-+                 i8 255, i8 224, i8 0, i8 0,
-+                 i8 0, i8 0, i8 0, i8 1,
-+                 i8 255, i8 224, i8 0, i8 0>
-+}
-+
-+; Test a doubleword-granularity replicate with a wrap-around mask.
-+define <16 x i8> @f10() {
-+; CHECK-LABEL: f10:
-+; CHECK: vgmg %v24, 18, 0
-+; CHECK: br %r14
-+  ret <16 x i8> <i8 128, i8 0, i8 63, i8 255,
-+                 i8 255, i8 255, i8 255, i8 255,
-+                 i8 128, i8 0, i8 63, i8 255,
-+                 i8 255, i8 255, i8 255, i8 255>
-+}
-+
-+; Retest f1 with arbitrary undefs instead of 0s.
-+define <16 x i8> @f11() {
-+; CHECK-LABEL: f11:
-+; CHECK: vgmf %v24, 16, 16
-+; CHECK: br %r14
-+  ret <16 x i8> <i8 0, i8 undef, i8 128, i8 0,
-+                 i8 0, i8 0, i8 128, i8 undef,
-+                 i8 undef, i8 0, i8 128, i8 0,
-+                 i8 undef, i8 undef, i8 128, i8 0>
-+}
-+
-+; Try a case where we want consistent undefs to be treated as 0.
-+define <16 x i8> @f12() {
-+; CHECK-LABEL: f12:
-+; CHECK: vgmf %v24, 15, 23
-+; CHECK: br %r14
-+  ret <16 x i8> <i8 undef, i8 1, i8 255, i8 0,
-+                 i8 undef, i8 1, i8 255, i8 0,
-+                 i8 undef, i8 1, i8 255, i8 0,
-+                 i8 undef, i8 1, i8 255, i8 0>
-+}
-+
-+; ...and again with the lower bits of the replicated constant.
-+define <16 x i8> @f13() {
-+; CHECK-LABEL: f13:
-+; CHECK: vgmf %v24, 15, 22
-+; CHECK: br %r14
-+  ret <16 x i8> <i8 0, i8 1, i8 254, i8 undef,
-+                 i8 0, i8 1, i8 254, i8 undef,
-+                 i8 0, i8 1, i8 254, i8 undef,
-+                 i8 0, i8 1, i8 254, i8 undef>
-+}
-+
-+; Try a case where we want consistent undefs to be treated as -1.
-+define <16 x i8> @f14() {
-+; CHECK-LABEL: f14:
-+; CHECK: vgmf %v24, 28, 8
-+; CHECK: br %r14
-+  ret <16 x i8> <i8 undef, i8 128, i8 0, i8 15,
-+                 i8 undef, i8 128, i8 0, i8 15,
-+                 i8 undef, i8 128, i8 0, i8 15,
-+                 i8 undef, i8 128, i8 0, i8 15>
-+}
-+
-+; ...and again with the lower bits of the replicated constant.
-+define <16 x i8> @f15() {
-+; CHECK-LABEL: f15:
-+; CHECK: vgmf %v24, 18, 3
-+; CHECK: br %r14
-+  ret <16 x i8> <i8 240, i8 0, i8 63, i8 undef,
-+                 i8 240, i8 0, i8 63, i8 undef,
-+                 i8 240, i8 0, i8 63, i8 undef,
-+                 i8 240, i8 0, i8 63, i8 undef>
-+}
-+
-+; Repeat f9 with arbitrary undefs.
-+define <16 x i8> @f16() {
-+; CHECK-LABEL: f16:
-+; CHECK: vgmg %v24, 31, 42
-+; CHECK: br %r14
-+  ret <16 x i8> <i8 undef, i8 0, i8 undef, i8 1,
-+                 i8 255, i8 undef, i8 0, i8 0,
-+                 i8 0, i8 0, i8 0, i8 1,
-+                 i8 undef, i8 224, i8 undef, i8 undef>
-+}
-+
-+; Try a case where we want some consistent undefs to be treated as 0
-+; and some to be treated as 255.
-+define <16 x i8> @f17() {
-+; CHECK-LABEL: f17:
-+; CHECK: vgmg %v24, 23, 35
-+; CHECK: br %r14
-+  ret <16 x i8> <i8 0, i8 undef, i8 1, i8 undef,
-+                 i8 240, i8 undef, i8 0, i8 0,
-+                 i8 0, i8 undef, i8 1, i8 undef,
-+                 i8 240, i8 undef, i8 0, i8 0>
-+}
-Index: llvm-36/test/CodeGen/SystemZ/vec-const-14.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-const-14.ll
-@@ -0,0 +1,113 @@
-+; Test vector replicates that use VECTOR GENERATE MASK, v8i16 version.
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-+
-+; Test a word-granularity replicate with the lowest value that cannot use
-+; VREPIF.
-+define <8 x i16> @f1() {
-+; CHECK-LABEL: f1:
-+; CHECK: vgmf %v24, 16, 16
-+; CHECK: br %r14
-+  ret <8 x i16> <i16 0, i16 32768, i16 0, i16 32768,
-+                 i16 0, i16 32768, i16 0, i16 32768>
-+}
-+
-+; Test a word-granularity replicate that has the lower 17 bits set.
-+define <8 x i16> @f2() {
-+; CHECK-LABEL: f2:
-+; CHECK: vgmf %v24, 15, 31
-+; CHECK: br %r14
-+  ret <8 x i16> <i16 1, i16 -1, i16 1, i16 -1,
-+                 i16 1, i16 -1, i16 1, i16 -1>
-+}
-+
-+; Test a word-granularity replicate that has the upper 15 bits set.
-+define <8 x i16> @f3() {
-+; CHECK-LABEL: f3:
-+; CHECK: vgmf %v24, 0, 14
-+; CHECK: br %r14
-+  ret <8 x i16> <i16 -2, i16 0, i16 -2, i16 0,
-+                 i16 -2, i16 0, i16 -2, i16 0>
-+}
-+
-+; Test a word-granularity replicate that has middle bits set.
-+define <8 x i16> @f4() {
-+; CHECK-LABEL: f4:
-+; CHECK: vgmf %v24, 12, 17
-+; CHECK: br %r14
-+  ret <8 x i16> <i16 15, i16 49152, i16 15, i16 49152,
-+                 i16 15, i16 49152, i16 15, i16 49152>
-+}
-+
-+; Test a word-granularity replicate with a wrap-around mask.
-+define <8 x i16> @f5() {
-+; CHECK-LABEL: f5:
-+; CHECK: vgmf %v24, 17, 15
-+; CHECK: br %r14
-+  ret <8 x i16> <i16 -1, i16 32767, i16 -1, i16 32767,
-+                 i16 -1, i16 32767, i16 -1, i16 32767>
-+}
-+
-+; Test a doubleword-granularity replicate with the lowest value that cannot
-+; use VREPIG.
-+define <8 x i16> @f6() {
-+; CHECK-LABEL: f6:
-+; CHECK: vgmg %v24, 48, 48
-+; CHECK: br %r14
-+  ret <8 x i16> <i16 0, i16 0, i16 0, i16 32768,
-+                 i16 0, i16 0, i16 0, i16 32768>
-+}
-+
-+; Test a doubleword-granularity replicate that has the lower 22 bits set.
-+define <8 x i16> @f7() {
-+; CHECK-LABEL: f7:
-+; CHECK: vgmg %v24, 42, 63
-+; CHECK: br %r14
-+  ret <8 x i16> <i16 0, i16 0, i16 63, i16 -1,
-+                 i16 0, i16 0, i16 63, i16 -1>
-+}
-+
-+; Test a doubleword-granularity replicate that has the upper 45 bits set.
-+define <8 x i16> @f8() {
-+; CHECK-LABEL: f8:
-+; CHECK: vgmg %v24, 0, 44
-+; CHECK: br %r14
-+  ret <8 x i16> <i16 -1, i16 -1, i16 -8, i16 0,
-+                 i16 -1, i16 -1, i16 -8, i16 0>
-+}
-+
-+; Test a doubleword-granularity replicate that has middle bits set.
-+define <8 x i16> @f9() {
-+; CHECK-LABEL: f9:
-+; CHECK: vgmg %v24, 31, 42
-+; CHECK: br %r14
-+  ret <8 x i16> <i16 0, i16 1, i16 -32, i16 0,
-+                 i16 0, i16 1, i16 -32, i16 0>
-+}
-+
-+; Test a doubleword-granularity replicate with a wrap-around mask.
-+define <8 x i16> @f10() {
-+; CHECK-LABEL: f10:
-+; CHECK: vgmg %v24, 18, 0
-+; CHECK: br %r14
-+  ret <8 x i16> <i16 32768, i16 16383, i16 -1, i16 -1,
-+                 i16 32768, i16 16383, i16 -1, i16 -1>
-+}
-+
-+; Retest f1 with arbitrary undefs instead of 0s.
-+define <8 x i16> @f11() {
-+; CHECK-LABEL: f11:
-+; CHECK: vgmf %v24, 16, 16
-+; CHECK: br %r14
-+  ret <8 x i16> <i16 undef, i16 32768, i16 0, i16 32768,
-+                 i16 0, i16 32768, i16 undef, i16 32768>
-+}
-+
-+; ...likewise f9.
-+define <8 x i16> @f12() {
-+; CHECK-LABEL: f12:
-+; CHECK: vgmg %v24, 31, 42
-+; CHECK: br %r14
-+  ret <8 x i16> <i16 undef, i16 1, i16 -32, i16 0,
-+                 i16 0, i16 1, i16 -32, i16 undef>
-+}
-Index: llvm-36/test/CodeGen/SystemZ/vec-const-15.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-const-15.ll
-@@ -0,0 +1,85 @@
-+; Test vector replicates that use VECTOR GENERATE MASK, v4i32 version.
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-+
-+; Test a word-granularity replicate with the lowest value that cannot use
-+; VREPIF.
-+define <4 x i32> @f1() {
-+; CHECK-LABEL: f1:
-+; CHECK: vgmf %v24, 16, 16
-+; CHECK: br %r14
-+  ret <4 x i32> <i32 32768, i32 32768, i32 32768, i32 32768>
-+}
-+
-+; Test a word-granularity replicate that has the lower 17 bits set.
-+define <4 x i32> @f2() {
-+; CHECK-LABEL: f2:
-+; CHECK: vgmf %v24, 15, 31
-+; CHECK: br %r14
-+  ret <4 x i32> <i32 131071, i32 131071, i32 131071, i32 131071>
-+}
-+
-+; Test a word-granularity replicate that has the upper 15 bits set.
-+define <4 x i32> @f3() {
-+; CHECK-LABEL: f3:
-+; CHECK: vgmf %v24, 0, 14
-+; CHECK: br %r14
-+  ret <4 x i32> <i32 -131072, i32 -131072, i32 -131072, i32 -131072>
-+}
-+
-+; Test a word-granularity replicate that has middle bits set.
-+define <4 x i32> @f4() {
-+; CHECK-LABEL: f4:
-+; CHECK: vgmf %v24, 12, 17
-+; CHECK: br %r14
-+  ret <4 x i32> <i32 1032192, i32 1032192, i32 1032192, i32 1032192>
-+}
-+
-+; Test a word-granularity replicate with a wrap-around mask.
-+define <4 x i32> @f5() {
-+; CHECK-LABEL: f5:
-+; CHECK: vgmf %v24, 17, 15
-+; CHECK: br %r14
-+  ret <4 x i32> <i32 -32769, i32 -32769, i32 -32769, i32 -32769>
-+}
-+
-+; Test a doubleword-granularity replicate with the lowest value that cannot
-+; use VREPIG.
-+define <4 x i32> @f6() {
-+; CHECK-LABEL: f6:
-+; CHECK: vgmg %v24, 48, 48
-+; CHECK: br %r14
-+  ret <4 x i32> <i32 0, i32 32768, i32 0, i32 32768>
-+}
-+
-+; Test a doubleword-granularity replicate that has the lower 22 bits set.
-+define <4 x i32> @f7() {
-+; CHECK-LABEL: f7:
-+; CHECK: vgmg %v24, 42, 63
-+; CHECK: br %r14
-+  ret <4 x i32> <i32 0, i32 4194303, i32 0, i32 4194303>
-+}
-+
-+; Test a doubleword-granularity replicate that has the upper 45 bits set.
-+define <4 x i32> @f8() {
-+; CHECK-LABEL: f8:
-+; CHECK: vgmg %v24, 0, 44
-+; CHECK: br %r14
-+  ret <4 x i32> <i32 -1, i32 -524288, i32 -1, i32 -524288>
-+}
-+
-+; Test a doubleword-granularity replicate that has middle bits set.
-+define <4 x i32> @f9() {
-+; CHECK-LABEL: f9:
-+; CHECK: vgmg %v24, 31, 42
-+; CHECK: br %r14
-+  ret <4 x i32> <i32 1, i32 -2097152, i32 1, i32 -2097152>
-+}
-+
-+; Test a doubleword-granularity replicate with a wrap-around mask.
-+define <4 x i32> @f10() {
-+; CHECK-LABEL: f10:
-+; CHECK: vgmg %v24, 18, 0
-+; CHECK: br %r14
-+  ret <4 x i32> <i32 -2147467265, i32 -1, i32 -2147467265, i32 -1>
-+}
-Index: llvm-36/test/CodeGen/SystemZ/vec-const-16.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-const-16.ll
-@@ -0,0 +1,85 @@
-+; Test vector replicates that use VECTOR GENERATE MASK, v2i64 version.
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-+
-+; Test a word-granularity replicate with the lowest value that cannot use
-+; VREPIF.
-+define <2 x i64> @f1() {
-+; CHECK-LABEL: f1:
-+; CHECK: vgmf %v24, 16, 16
-+; CHECK: br %r14
-+  ret <2 x i64> <i64 140737488388096, i64 140737488388096>
-+}
-+
-+; Test a word-granularity replicate that has the lower 17 bits set.
-+define <2 x i64> @f2() {
-+; CHECK-LABEL: f2:
-+; CHECK: vgmf %v24, 15, 31
-+; CHECK: br %r14
-+  ret <2 x i64> <i64 562945658585087, i64 562945658585087>
-+}
-+
-+; Test a word-granularity replicate that has the upper 15 bits set.
-+define <2 x i64> @f3() {
-+; CHECK-LABEL: f3:
-+; CHECK: vgmf %v24, 0, 14
-+; CHECK: br %r14
-+  ret <2 x i64> <i64 -562945658585088, i64 -562945658585088>
-+}
-+
-+; Test a word-granularity replicate that has middle bits set.
-+define <2 x i64> @f4() {
-+; CHECK-LABEL: f4:
-+; CHECK: vgmf %v24, 12, 17
-+; CHECK: br %r14
-+  ret <2 x i64> <i64 4433230884225024, i64 4433230884225024>
-+}
-+
-+; Test a word-granularity replicate with a wrap-around mask.
-+define <2 x i64> @f5() {
-+; CHECK-LABEL: f5:
-+; CHECK: vgmf %v24, 17, 15
-+; CHECK: br %r14
-+  ret <2 x i64> <i64 -140737488388097, i64 -140737488388097>
-+}
-+
-+; Test a doubleword-granularity replicate with the lowest value that cannot
-+; use VREPIG.
-+define <2 x i64> @f6() {
-+; CHECK-LABEL: f6:
-+; CHECK: vgmg %v24, 48, 48
-+; CHECK: br %r14
-+  ret <2 x i64> <i64 32768, i64 32768>
-+}
-+
-+; Test a doubleword-granularity replicate that has the lower 22 bits set.
-+define <2 x i64> @f7() {
-+; CHECK-LABEL: f7:
-+; CHECK: vgmg %v24, 42, 63
-+; CHECK: br %r14
-+  ret <2 x i64> <i64 4194303, i64 4194303>
-+}
-+
-+; Test a doubleword-granularity replicate that has the upper 45 bits set.
-+define <2 x i64> @f8() {
-+; CHECK-LABEL: f8:
-+; CHECK: vgmg %v24, 0, 44
-+; CHECK: br %r14
-+  ret <2 x i64> <i64 -524288, i64 -524288>
-+}
-+
-+; Test a doubleword-granularity replicate that has middle bits set.
-+define <2 x i64> @f9() {
-+; CHECK-LABEL: f9:
-+; CHECK: vgmg %v24, 31, 42
-+; CHECK: br %r14
-+  ret <2 x i64> <i64 8587837440, i64 8587837440>
-+}
-+
-+; Test a doubleword-granularity replicate with a wrap-around mask.
-+define <2 x i64> @f10() {
-+; CHECK-LABEL: f10:
-+; CHECK: vgmg %v24, 18, 0
-+; CHECK: br %r14
-+  ret <2 x i64> <i64 -9223301668110598145, i64 -9223301668110598145>
-+}
-Index: llvm-36/test/CodeGen/SystemZ/vec-const-17.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-const-17.ll
-@@ -0,0 +1,95 @@
-+; Test vector replicates that use VECTOR GENERATE MASK, v4f32 version.
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-+
-+; Test a word-granularity replicate with the lowest value that cannot use
-+; VREPIF.
-+define <4 x float> @f1() {
-+; CHECK-LABEL: f1:
-+; CHECK: vgmf %v24, 16, 16
-+; CHECK: br %r14
-+  ret <4 x float> <float 0x3790000000000000, float 0x3790000000000000,
-+                   float 0x3790000000000000, float 0x3790000000000000>
-+}
-+
-+; Test a word-granularity replicate that has the lower 17 bits set.
-+define <4 x float> @f2() {
-+; CHECK-LABEL: f2:
-+; CHECK: vgmf %v24, 15, 31
-+; CHECK: br %r14
-+  ret <4 x float> <float 0x37affff000000000, float 0x37affff000000000,
-+                   float 0x37affff000000000, float 0x37affff000000000>
-+}
-+
-+; Test a word-granularity replicate that has the upper 15 bits set.
-+define <4 x float> @f3() {
-+; CHECK-LABEL: f3:
-+; CHECK: vgmf %v24, 0, 14
-+; CHECK: br %r14
-+  ret <4 x float> <float 0xffffc00000000000, float 0xffffc00000000000,
-+                   float 0xffffc00000000000, float 0xffffc00000000000>
-+}
-+
-+; Test a word-granularity replicate that has middle bits set.
-+define <4 x float> @f4() {
-+; CHECK-LABEL: f4:
-+; CHECK: vgmf %v24, 2, 8
-+; CHECK: br %r14
-+  ret <4 x float> <float 0x3ff0000000000000, float 0x3ff0000000000000,
-+                   float 0x3ff0000000000000, float 0x3ff0000000000000>
-+}
-+
-+; Test a word-granularity replicate with a wrap-around mask.
-+define <4 x float> @f5() {
-+; CHECK-LABEL: f5:
-+; CHECK: vgmf %v24, 9, 1
-+; CHECK: br %r14
-+  ret <4 x float> <float 0xc00fffffe0000000, float 0xc00fffffe0000000,
-+                   float 0xc00fffffe0000000, float 0xc00fffffe0000000>
-+}
-+
-+; Test a doubleword-granularity replicate with the lowest value that cannot
-+; use VREPIG.
-+define <4 x float> @f6() {
-+; CHECK-LABEL: f6:
-+; CHECK: vgmg %v24, 48, 48
-+; CHECK: br %r14
-+  ret <4 x float> <float 0.0, float 0x3790000000000000,
-+                   float 0.0, float 0x3790000000000000>
-+}
-+
-+; Test a doubleword-granularity replicate that has the lower 22 bits set.
-+define <4 x float> @f7() {
-+; CHECK-LABEL: f7:
-+; CHECK: vgmg %v24, 42, 63
-+; CHECK: br %r14
-+  ret <4 x float> <float 0.0, float 0x37ffffff80000000,
-+                   float 0.0, float 0x37ffffff80000000>
-+}
-+
-+; Test a doubleword-granularity replicate that has the upper 45 bits set.
-+define <4 x float> @f8() {
-+; CHECK-LABEL: f8:
-+; CHECK: vgmg %v24, 0, 44
-+; CHECK: br %r14
-+  ret <4 x float> <float 0xffffffffe0000000, float 0xffff000000000000,
-+                   float 0xffffffffe0000000, float 0xffff000000000000>
-+}
-+
-+; Test a doubleword-granularity replicate that has middle bits set.
-+define <4 x float> @f9() {
-+; CHECK-LABEL: f9:
-+; CHECK: vgmg %v24, 34, 41
-+; CHECK: br %r14
-+  ret <4 x float> <float 0.0, float 0x3ff8000000000000,
-+                   float 0.0, float 0x3ff8000000000000>
-+}
-+
-+; Test a doubleword-granularity replicate with a wrap-around mask.
-+define <4 x float> @f10() {
-+; CHECK-LABEL: f10:
-+; CHECK: vgmg %v24, 32, 0
-+; CHECK: br %r14
-+  ret <4 x float> <float 0x8000000000000000, float 0xffffffffe0000000,
-+                   float 0x8000000000000000, float 0xffffffffe0000000>
-+}
-Index: llvm-36/test/CodeGen/SystemZ/vec-const-18.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-const-18.ll
-@@ -0,0 +1,85 @@
-+; Test vector replicates that use VECTOR GENERATE MASK, v2f64 version.
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-+
-+; Test a word-granularity replicate with the lowest value that cannot use
-+; VREPIF.
-+define <2 x double> @f1() {
-+; CHECK-LABEL: f1:
-+; CHECK: vgmf %v24, 16, 16
-+; CHECK: br %r14
-+  ret <2 x double> <double 0x0000800000008000, double 0x0000800000008000>
-+}
-+
-+; Test a word-granularity replicate that has the lower 17 bits set.
-+define <2 x double> @f2() {
-+; CHECK-LABEL: f2:
-+; CHECK: vgmf %v24, 15, 31
-+; CHECK: br %r14
-+  ret <2 x double> <double 0x0001ffff0001ffff, double 0x0001ffff0001ffff>
-+}
-+
-+; Test a word-granularity replicate that has the upper 15 bits set.
-+define <2 x double> @f3() {
-+; CHECK-LABEL: f3:
-+; CHECK: vgmf %v24, 0, 14
-+; CHECK: br %r14
-+  ret <2 x double> <double 0xfffe0000fffe0000, double 0xfffe0000fffe0000>
-+}
-+
-+; Test a word-granularity replicate that has middle bits set.
-+define <2 x double> @f4() {
-+; CHECK-LABEL: f4:
-+; CHECK: vgmf %v24, 2, 11
-+; CHECK: br %r14
-+  ret <2 x double> <double 0x3ff000003ff00000, double 0x3ff000003ff00000>
-+}
-+
-+; Test a word-granularity replicate with a wrap-around mask.
-+define <2 x double> @f5() {
-+; CHECK-LABEL: f5:
-+; CHECK: vgmf %v24, 17, 15
-+; CHECK: br %r14
-+  ret <2 x double> <double 0xffff7fffffff7fff, double 0xffff7fffffff7fff>
-+}
-+
-+; Test a doubleword-granularity replicate with the lowest value that cannot
-+; use VREPIG.
-+define <2 x double> @f6() {
-+; CHECK-LABEL: f6:
-+; CHECK: vgmg %v24, 48, 48
-+; CHECK: br %r14
-+  ret <2 x double> <double 0x0000000000008000, double 0x0000000000008000>
-+}
-+
-+; Test a doubleword-granularity replicate that has the lower 22 bits set.
-+define <2 x double> @f7() {
-+; CHECK-LABEL: f7:
-+; CHECK: vgmg %v24, 42, 63
-+; CHECK: br %r14
-+  ret <2 x double> <double 0x000000000003fffff, double 0x000000000003fffff>
-+}
-+
-+; Test a doubleword-granularity replicate that has the upper 45 bits set.
-+define <2 x double> @f8() {
-+; CHECK-LABEL: f8:
-+; CHECK: vgmg %v24, 0, 44
-+; CHECK: br %r14
-+  ret <2 x double> <double 0xfffffffffff80000, double 0xfffffffffff80000>
-+}
-+
-+; Test a doubleword-granularity replicate that has middle bits set.
-+define <2 x double> @f9() {
-+; CHECK-LABEL: f9:
-+; CHECK: vgmg %v24, 2, 11
-+; CHECK: br %r14
-+  ret <2 x double> <double 0x3ff0000000000000, double 0x3ff0000000000000>
-+}
-+
-+; Test a doubleword-granularity replicate with a wrap-around mask.
-+define <2 x double> @f10() {
-+; CHECK-LABEL: f10:
-+; CHECK: vgmg %v24, 10, 0
-+; CHECK: br %r14
-+  ret <2 x double> <double 0x803fffffffffffff, double 0x803fffffffffffff>
-+}
-Index: llvm-36/test/CodeGen/SystemZ/vec-conv-01.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-conv-01.ll
-@@ -0,0 +1,95 @@
-+; Test conversions between integer and float elements.
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-+
-+; Test conversion of f64s to signed i64s.
-+define <2 x i64> @f1(<2 x double> %doubles) {
-+; CHECK-LABEL: f1:
-+; CHECK: vcgdb %v24, %v24, 0, 5
-+; CHECK: br %r14
-+  %dwords = fptosi <2 x double> %doubles to <2 x i64>
-+  ret <2 x i64> %dwords
-+}
-+
-+; Test conversion of f64s to unsigned i64s.
-+define <2 x i64> @f2(<2 x double> %doubles) {
-+; CHECK-LABEL: f2:
-+; CHECK: vclgdb %v24, %v24, 0, 5
-+; CHECK: br %r14
-+  %dwords = fptoui <2 x double> %doubles to <2 x i64>
-+  ret <2 x i64> %dwords
-+}
-+
-+; Test conversion of signed i64s to f64s.
-+define <2 x double> @f3(<2 x i64> %dwords) {
-+; CHECK-LABEL: f3:
-+; CHECK: vcdgb %v24, %v24, 0, 0
-+; CHECK: br %r14
-+  %doubles = sitofp <2 x i64> %dwords to <2 x double>
-+  ret <2 x double> %doubles
-+}
-+
-+; Test conversion of unsigned i64s to f64s.
-+define <2 x double> @f4(<2 x i64> %dwords) {
-+; CHECK-LABEL: f4:
-+; CHECK: vcdlgb %v24, %v24, 0, 0
-+; CHECK: br %r14
-+  %doubles = uitofp <2 x i64> %dwords to <2 x double>
-+  ret <2 x double> %doubles
-+}
-+
-+; Test conversion of f64s to signed i32s, which must compile.
-+define void @f5(<2 x double> %doubles, <2 x i32> *%ptr) {
-+  %words = fptosi <2 x double> %doubles to <2 x i32>
-+  store <2 x i32> %words, <2 x i32> *%ptr
-+  ret void
-+}
-+
-+; Test conversion of f64s to unsigned i32s, which must compile.
-+define void @f6(<2 x double> %doubles, <2 x i32> *%ptr) {
-+  %words = fptoui <2 x double> %doubles to <2 x i32>
-+  store <2 x i32> %words, <2 x i32> *%ptr
-+  ret void
-+}
-+
-+; Test conversion of signed i32s to f64s, which must compile.
-+define <2 x double> @f7(<2 x i32> *%ptr) {
-+  %words = load <2 x i32> *%ptr
-+  %doubles = sitofp <2 x i32> %words to <2 x double>
-+  ret <2 x double> %doubles
-+}
-+
-+; Test conversion of unsigned i32s to f64s, which must compile.
-+define <2 x double> @f8(<2 x i32> *%ptr) {
-+  %words = load <2 x i32> *%ptr
-+  %doubles = uitofp <2 x i32> %words to <2 x double>
-+  ret <2 x double> %doubles
-+}
-+
-+; Test conversion of f32s to signed i64s, which must compile.
-+define <2 x i64> @f9(<2 x float> *%ptr) {
-+  %floats = load <2 x float> *%ptr
-+  %dwords = fptosi <2 x float> %floats to <2 x i64>
-+  ret <2 x i64> %dwords
-+}
-+
-+; Test conversion of f32s to unsigned i64s, which must compile.
-+define <2 x i64> @f10(<2 x float> *%ptr) {
-+  %floats = load <2 x float> *%ptr
-+  %dwords = fptoui <2 x float> %floats to <2 x i64>
-+  ret <2 x i64> %dwords
-+}
-+
-+; Test conversion of signed i64s to f32, which must compile.
-+define void @f11(<2 x i64> %dwords, <2 x float> *%ptr) {
-+  %floats = sitofp <2 x i64> %dwords to <2 x float>
-+  store <2 x float> %floats, <2 x float> *%ptr
-+  ret void
-+}
-+
-+; Test conversion of unsigned i64s to f32, which must compile.
-+define void @f12(<2 x i64> %dwords, <2 x float> *%ptr) {
-+  %floats = uitofp <2 x i64> %dwords to <2 x float>
-+  store <2 x float> %floats, <2 x float> *%ptr
-+  ret void
-+}
-Index: llvm-36/test/CodeGen/SystemZ/vec-conv-02.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-conv-02.ll
-@@ -0,0 +1,33 @@
-+; Test conversions between different-sized float elements.
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-+
-+; Test cases where both elements of a v2f64 are converted to f32s.
-+define void @f1(<2 x double> %val, <2 x float> *%ptr) {
-+; CHECK-LABEL: f1:
-+; CHECK: vledb {{%v[0-9]+}}, %v24, 0, 0
-+; CHECK: br %r14
-+  %res = fptrunc <2 x double> %val to <2 x float>
-+  store <2 x float> %res, <2 x float> *%ptr
-+  ret void
-+}
-+
-+; Test conversion of an f64 in a vector register to an f32.
-+define float @f2(<2 x double> %vec) {
-+; CHECK-LABEL: f2:
-+; CHECK: wledb %f0, %v24
-+; CHECK: br %r14
-+  %scalar = extractelement <2 x double> %vec, i32 0
-+  %ret = fptrunc double %scalar to float
-+  ret float %ret
-+}
-+
-+; Test conversion of an f32 in a vector register to an f64.
-+define double @f3(<4 x float> %vec) {
-+; CHECK-LABEL: f3:
-+; CHECK: wldeb %f0, %v24
-+; CHECK: br %r14
-+  %scalar = extractelement <4 x float> %vec, i32 0
-+  %ret = fpext float %scalar to double
-+  ret double %ret
-+}
-Index: llvm-36/test/CodeGen/SystemZ/vec-ctlz-01.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-ctlz-01.ll
-@@ -0,0 +1,81 @@
-+; Test vector count leading zeros
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-+
-+declare <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %src, i1 %is_zero_undef)
-+declare <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %src, i1 %is_zero_undef)
-+declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %src, i1 %is_zero_undef)
-+declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %src, i1 %is_zero_undef)
-+
-+define <16 x i8> @f1(<16 x i8> %a) {
-+; CHECK-LABEL: f1:
-+; CHECK: vclzb %v24, %v24
-+; CHECK: br    %r14
-+
-+  %res = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false)
-+  ret <16 x i8> %res
-+}
-+
-+define <16 x i8> @f2(<16 x i8> %a) {
-+; CHECK-LABEL: f2:
-+; CHECK: vclzb %v24, %v24
-+; CHECK: br    %r14
-+
-+  %res = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 true)
-+  ret <16 x i8> %res
-+}
-+
-+define <8 x i16> @f3(<8 x i16> %a) {
-+; CHECK-LABEL: f3:
-+; CHECK: vclzh %v24, %v24
-+; CHECK: br    %r14
-+
-+  %res = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false)
-+  ret <8 x i16> %res
-+}
-+
-+define <8 x i16> @f4(<8 x i16> %a) {
-+; CHECK-LABEL: f4:
-+; CHECK: vclzh %v24, %v24
-+; CHECK: br    %r14
-+
-+  %res = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 true)
-+  ret <8 x i16> %res
-+}
-+
-+define <4 x i32> @f5(<4 x i32> %a) {
-+; CHECK-LABEL: f5:
-+; CHECK: vclzf %v24, %v24
-+; CHECK: br    %r14
-+
-+  %res = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false)
-+  ret <4 x i32> %res
-+}
-+
-+define <4 x i32> @f6(<4 x i32> %a) {
-+; CHECK-LABEL: f6:
-+; CHECK: vclzf %v24, %v24
-+; CHECK: br    %r14
-+
-+  %res = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 true)
-+  ret <4 x i32> %res
-+}
-+
-+define <2 x i64> @f7(<2 x i64> %a) {
-+; CHECK-LABEL: f7:
-+; CHECK: vclzg %v24, %v24
-+; CHECK: br    %r14
-+
-+  %res = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 false)
-+  ret <2 x i64> %res
-+}
-+
-+define <2 x i64> @f8(<2 x i64> %a) {
-+; CHECK-LABEL: f8:
-+; CHECK: vclzg %v24, %v24
-+; CHECK: br    %r14
-+
-+  %res = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 true)
-+  ret <2 x i64> %res
-+}
-+
-Index: llvm-36/test/CodeGen/SystemZ/vec-ctpop-01.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-ctpop-01.ll
-@@ -0,0 +1,53 @@
-+; Test vector population-count instruction
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-+
-+declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a)
-+declare <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %a)
-+declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %a)
-+declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
-+
-+define <16 x i8> @f1(<16 x i8> %a) {
-+; CHECK-LABEL: f1:
-+; CHECK: vpopct  %v24, %v24, 0
-+; CHECK: br      %r14
-+
-+  %popcnt = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a)
-+  ret <16 x i8> %popcnt
-+}
-+
-+define <8 x i16> @f2(<8 x i16> %a) {
-+; CHECK-LABEL: f2:
-+; CHECK: vpopct  [[T1:%v[0-9]+]], %v24, 0
-+; CHECK: veslh   [[T2:%v[0-9]+]], [[T1]], 8
-+; CHECK: vah     [[T3:%v[0-9]+]], [[T1]], [[T2]]
-+; CHECK: vesrlh  %v24, [[T3]], 8
-+; CHECK: br      %r14
-+
-+  %popcnt = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %a)
-+  ret <8 x i16> %popcnt
-+}
-+
-+define <4 x i32> @f3(<4 x i32> %a) {
-+; CHECK-LABEL: f3:
-+; CHECK: vpopct  [[T1:%v[0-9]+]], %v24, 0
-+; CHECK: vgbm    [[T2:%v[0-9]+]], 0
-+; CHECK: vsumb   %v24, [[T1]], [[T2]]
-+; CHECK: br      %r14
-+
-+  %popcnt = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %a)
-+  ret <4 x i32> %popcnt
-+}
-+
-+define <2 x i64> @f4(<2 x i64> %a) {
-+; CHECK-LABEL: f4:
-+; CHECK: vpopct  [[T1:%v[0-9]+]], %v24, 0
-+; CHECK: vgbm    [[T2:%v[0-9]+]], 0
-+; CHECK: vsumb   [[T3:%v[0-9]+]], [[T1]], [[T2]]
-+; CHECK: vsumgf  %v24, [[T3]], [[T2]]
-+; CHECK: br      %r14
-+
-+  %popcnt = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a)
-+  ret <2 x i64> %popcnt
-+}
-+
-Index: llvm-36/test/CodeGen/SystemZ/vec-cttz-01.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-cttz-01.ll
-@@ -0,0 +1,81 @@
-+; Test vector count trailing zeros
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-+
-+declare <16 x i8> @llvm.cttz.v16i8(<16 x i8> %src, i1 %is_zero_undef)
-+declare <8 x i16> @llvm.cttz.v8i16(<8 x i16> %src, i1 %is_zero_undef)
-+declare <4 x i32> @llvm.cttz.v4i32(<4 x i32> %src, i1 %is_zero_undef)
-+declare <2 x i64> @llvm.cttz.v2i64(<2 x i64> %src, i1 %is_zero_undef)
-+
-+define <16 x i8> @f1(<16 x i8> %a) {
-+; CHECK-LABEL: f1:
-+; CHECK: vctzb %v24, %v24
-+; CHECK: br    %r14
-+
-+  %res = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false)
-+  ret <16 x i8> %res
-+}
-+
-+define <16 x i8> @f2(<16 x i8> %a) {
-+; CHECK-LABEL: f2:
-+; CHECK: vctzb %v24, %v24
-+; CHECK: br    %r14
-+
-+  %res = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true)
-+  ret <16 x i8> %res
-+}
-+
-+define <8 x i16> @f3(<8 x i16> %a) {
-+; CHECK-LABEL: f3:
-+; CHECK: vctzh %v24, %v24
-+; CHECK: br    %r14
-+
-+  %res = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false)
-+  ret <8 x i16> %res
-+}
-+
-+define <8 x i16> @f4(<8 x i16> %a) {
-+; CHECK-LABEL: f4:
-+; CHECK: vctzh %v24, %v24
-+; CHECK: br    %r14
-+
-+  %res = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true)
-+  ret <8 x i16> %res
-+}
-+
-+define <4 x i32> @f5(<4 x i32> %a) {
-+; CHECK-LABEL: f5:
-+; CHECK: vctzf %v24, %v24
-+; CHECK: br    %r14
-+
-+  %res = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false)
-+  ret <4 x i32> %res
-+}
-+
-+define <4 x i32> @f6(<4 x i32> %a) {
-+; CHECK-LABEL: f6:
-+; CHECK: vctzf %v24, %v24
-+; CHECK: br    %r14
-+
-+  %res = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true)
-+  ret <4 x i32> %res
-+}
-+
-+define <2 x i64> @f7(<2 x i64> %a) {
-+; CHECK-LABEL: f7:
-+; CHECK: vctzg %v24, %v24
-+; CHECK: br    %r14
-+
-+  %res = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false)
-+  ret <2 x i64> %res
-+}
-+
-+define <2 x i64> @f8(<2 x i64> %a) {
-+; CHECK-LABEL: f8:
-+; CHECK: vctzg %v24, %v24
-+; CHECK: br    %r14
-+
-+  %res = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true)
-+  ret <2 x i64> %res
-+}
-+
-Index: llvm-36/test/CodeGen/SystemZ/vec-div-01.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-div-01.ll
-@@ -0,0 +1,83 @@
-+; Test vector division.  There is no native integer support for this,
-+; so the integer cases are really a test of the operation legalization code.
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-+
-+; Test a v16i8 division.
-+define <16 x i8> @f1(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) {
-+; CHECK-LABEL: f1:
-+; CHECK: vlvgp [[REG:%v[0-9]+]],
-+; CHECK-DAG: vlvgb [[REG]], {{%r[0-5]}}, 0
-+; CHECK-DAG: vlvgb [[REG]], {{%r[0-5]}}, 1
-+; CHECK-DAG: vlvgb [[REG]], {{%r[0-5]}}, 2
-+; CHECK-DAG: vlvgb [[REG]], {{%r[0-5]}}, 3
-+; CHECK-DAG: vlvgb [[REG]], {{%r[0-5]}}, 4
-+; CHECK-DAG: vlvgb [[REG]], {{%r[0-5]}}, 5
-+; CHECK-DAG: vlvgb [[REG]], {{%r[0-5]}}, 6
-+; CHECK-DAG: vlvgb [[REG]], {{%r[0-5]}}, 8
-+; CHECK-DAG: vlvgb [[REG]], {{%r[0-5]}}, 9
-+; CHECK-DAG: vlvgb [[REG]], {{%r[0-5]}}, 10
-+; CHECK-DAG: vlvgb [[REG]], {{%r[0-5]}}, 11
-+; CHECK-DAG: vlvgb [[REG]], {{%r[0-5]}}, 12
-+; CHECK-DAG: vlvgb [[REG]], {{%r[0-5]}}, 13
-+; CHECK-DAG: vlvgb [[REG]], {{%r[0-5]}}, 14
-+; CHECK: br %r14
-+  %ret = sdiv <16 x i8> %val1, %val2
-+  ret <16 x i8> %ret
-+}
-+
-+; Test a v8i16 division.
-+define <8 x i16> @f2(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) {
-+; CHECK-LABEL: f2:
-+; CHECK: vlvgp [[REG:%v[0-9]+]],
-+; CHECK-DAG: vlvgh [[REG]], {{%r[0-5]}}, 0
-+; CHECK-DAG: vlvgh [[REG]], {{%r[0-5]}}, 1
-+; CHECK-DAG: vlvgh [[REG]], {{%r[0-5]}}, 2
-+; CHECK-DAG: vlvgh [[REG]], {{%r[0-5]}}, 4
-+; CHECK-DAG: vlvgh [[REG]], {{%r[0-5]}}, 5
-+; CHECK-DAG: vlvgh [[REG]], {{%r[0-5]}}, 6
-+; CHECK: br %r14
-+  %ret = sdiv <8 x i16> %val1, %val2
-+  ret <8 x i16> %ret
-+}
-+
-+; Test a v4i32 division.
-+define <4 x i32> @f3(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) {
-+; CHECK-LABEL: f3:
-+; CHECK: vlvgp [[REG:%v[0-9]+]],
-+; CHECK-DAG: vlvgf [[REG]], {{%r[0-5]}}, 0
-+; CHECK-DAG: vlvgf [[REG]], {{%r[0-5]}}, 2
-+; CHECK: br %r14
-+  %ret = sdiv <4 x i32> %val1, %val2
-+  ret <4 x i32> %ret
-+}
-+
-+; Test a v2i64 division.
-+define <2 x i64> @f4(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) {
-+; CHECK-LABEL: f4:
-+; CHECK: vlvgp %v24,
-+; CHECK: br %r14
-+  %ret = sdiv <2 x i64> %val1, %val2
-+  ret <2 x i64> %ret
-+}
-+
-+; Test a v2f64 division.
-+define <2 x double> @f5(<2 x double> %dummy, <2 x double> %val1,
-+                        <2 x double> %val2) {
-+; CHECK-LABEL: f5:
-+; CHECK: vfddb %v24, %v26, %v28
-+; CHECK: br %r14
-+  %ret = fdiv <2 x double> %val1, %val2
-+  ret <2 x double> %ret
-+}
-+
-+; Test an f64 division that uses vector registers.
-+define double @f6(<2 x double> %val1, <2 x double> %val2) {
-+; CHECK-LABEL: f6:
-+; CHECK: wfddb %f0, %v24, %v26
-+; CHECK: br %r14
-+  %scalar1 = extractelement <2 x double> %val1, i32 0
-+  %scalar2 = extractelement <2 x double> %val2, i32 0
-+  %ret = fdiv double %scalar1, %scalar2
-+  ret double %ret
-+}
-Index: llvm-36/test/CodeGen/SystemZ/vec-extract-01.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-extract-01.ll
-@@ -0,0 +1,13 @@
-+; Verify ReplaceExtractVectorEltOfLoadWithNarrowedLoad fixes
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-+
-+; Test a memory copy of a v2i32 (via the constant pool).
-+define void @f1(<2 x i32> *%dest) {
-+; CHECK-LABEL: f1:
-+; CHECK: lgrl [[REG:%r[0-5]]], {{[._A-Za-z0-9]}}
-+; CHECK: stg [[REG]], 0(%r2)
-+; CHECK: br %r14
-+  store <2 x i32> <i32 1000000, i32 99999>, <2 x i32> *%dest
-+  ret void
-+}
-Index: llvm-36/test/CodeGen/SystemZ/vec-extract-02.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-extract-02.ll
-@@ -0,0 +1,15 @@
-+; Verify ReplaceExtractVectorEltOfLoadWithNarrowedLoad fixes
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-+
-+; Test a case where a vector extraction can be simplified to a scalar load.
-+; The index must be extended from i32 to i64.
-+define i32 @f1(<4 x i32> *%ptr, i32 %index) {
-+; CHECK-LABEL: f1:
-+; CHECK: risbg {{%r[0-5]}}, %r3, 30, 189, 2
-+; CHECK: l %r2,
-+; CHECK: br %r14
-+  %vec = load <4 x i32> *%ptr
-+  %res = extractelement <4 x i32> %vec, i32 %index
-+  ret i32 %res
-+}
-Index: llvm-36/test/CodeGen/SystemZ/vec-intrinsics.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-intrinsics.ll
-@@ -0,0 +1,3335 @@
-+; Test vector intrinsics.
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-+
-+declare i32 @llvm.s390.lcbb(i8 *, i32)
-+declare <16 x i8> @llvm.s390.vlbb(i8 *, i32)
-+declare <16 x i8> @llvm.s390.vll(i32, i8 *)
-+declare <2 x i64> @llvm.s390.vpdi(<2 x i64>, <2 x i64>, i32)
-+declare <16 x i8> @llvm.s390.vperm(<16 x i8>, <16 x i8>, <16 x i8>)
-+declare <16 x i8> @llvm.s390.vpksh(<8 x i16>, <8 x i16>)
-+declare <8 x i16> @llvm.s390.vpksf(<4 x i32>, <4 x i32>)
-+declare <4 x i32> @llvm.s390.vpksg(<2 x i64>, <2 x i64>)
-+declare {<16 x i8>, i32} @llvm.s390.vpkshs(<8 x i16>, <8 x i16>)
-+declare {<8 x i16>, i32} @llvm.s390.vpksfs(<4 x i32>, <4 x i32>)
-+declare {<4 x i32>, i32} @llvm.s390.vpksgs(<2 x i64>, <2 x i64>)
-+declare <16 x i8> @llvm.s390.vpklsh(<8 x i16>, <8 x i16>)
-+declare <8 x i16> @llvm.s390.vpklsf(<4 x i32>, <4 x i32>)
-+declare <4 x i32> @llvm.s390.vpklsg(<2 x i64>, <2 x i64>)
-+declare {<16 x i8>, i32} @llvm.s390.vpklshs(<8 x i16>, <8 x i16>)
-+declare {<8 x i16>, i32} @llvm.s390.vpklsfs(<4 x i32>, <4 x i32>)
-+declare {<4 x i32>, i32} @llvm.s390.vpklsgs(<2 x i64>, <2 x i64>)
-+declare void @llvm.s390.vstl(<16 x i8>, i32, i8 *)
-+declare <8 x i16> @llvm.s390.vuphb(<16 x i8>)
-+declare <4 x i32> @llvm.s390.vuphh(<8 x i16>)
-+declare <2 x i64> @llvm.s390.vuphf(<4 x i32>)
-+declare <8 x i16> @llvm.s390.vuplhb(<16 x i8>)
-+declare <4 x i32> @llvm.s390.vuplhh(<8 x i16>)
-+declare <2 x i64> @llvm.s390.vuplhf(<4 x i32>)
-+declare <8 x i16> @llvm.s390.vuplb(<16 x i8>)
-+declare <4 x i32> @llvm.s390.vuplhw(<8 x i16>)
-+declare <2 x i64> @llvm.s390.vuplf(<4 x i32>)
-+declare <8 x i16> @llvm.s390.vupllb(<16 x i8>)
-+declare <4 x i32> @llvm.s390.vupllh(<8 x i16>)
-+declare <2 x i64> @llvm.s390.vupllf(<4 x i32>)
-+declare <16 x i8> @llvm.s390.vaccb(<16 x i8>, <16 x i8>)
-+declare <8 x i16> @llvm.s390.vacch(<8 x i16>, <8 x i16>)
-+declare <4 x i32> @llvm.s390.vaccf(<4 x i32>, <4 x i32>)
-+declare <2 x i64> @llvm.s390.vaccg(<2 x i64>, <2 x i64>)
-+declare <16 x i8> @llvm.s390.vaq(<16 x i8>, <16 x i8>)
-+declare <16 x i8> @llvm.s390.vacq(<16 x i8>, <16 x i8>, <16 x i8>)
-+declare <16 x i8> @llvm.s390.vaccq(<16 x i8>, <16 x i8>)
-+declare <16 x i8> @llvm.s390.vacccq(<16 x i8>, <16 x i8>, <16 x i8>)
-+declare <16 x i8> @llvm.s390.vavgb(<16 x i8>, <16 x i8>)
-+declare <8 x i16> @llvm.s390.vavgh(<8 x i16>, <8 x i16>)
-+declare <4 x i32> @llvm.s390.vavgf(<4 x i32>, <4 x i32>)
-+declare <2 x i64> @llvm.s390.vavgg(<2 x i64>, <2 x i64>)
-+declare <16 x i8> @llvm.s390.vavglb(<16 x i8>, <16 x i8>)
-+declare <8 x i16> @llvm.s390.vavglh(<8 x i16>, <8 x i16>)
-+declare <4 x i32> @llvm.s390.vavglf(<4 x i32>, <4 x i32>)
-+declare <2 x i64> @llvm.s390.vavglg(<2 x i64>, <2 x i64>)
-+declare <4 x i32> @llvm.s390.vcksm(<4 x i32>, <4 x i32>)
-+declare <8 x i16> @llvm.s390.vgfmb(<16 x i8>, <16 x i8>)
-+declare <4 x i32> @llvm.s390.vgfmh(<8 x i16>, <8 x i16>)
-+declare <2 x i64> @llvm.s390.vgfmf(<4 x i32>, <4 x i32>)
-+declare <16 x i8> @llvm.s390.vgfmg(<2 x i64>, <2 x i64>)
-+declare <8 x i16> @llvm.s390.vgfmab(<16 x i8>, <16 x i8>, <8 x i16>)
-+declare <4 x i32> @llvm.s390.vgfmah(<8 x i16>, <8 x i16>, <4 x i32>)
-+declare <2 x i64> @llvm.s390.vgfmaf(<4 x i32>, <4 x i32>, <2 x i64>)
-+declare <16 x i8> @llvm.s390.vgfmag(<2 x i64>, <2 x i64>, <16 x i8>)
-+declare <16 x i8> @llvm.s390.vmahb(<16 x i8>, <16 x i8>, <16 x i8>)
-+declare <8 x i16> @llvm.s390.vmahh(<8 x i16>, <8 x i16>, <8 x i16>)
-+declare <4 x i32> @llvm.s390.vmahf(<4 x i32>, <4 x i32>, <4 x i32>)
-+declare <16 x i8> @llvm.s390.vmalhb(<16 x i8>, <16 x i8>, <16 x i8>)
-+declare <8 x i16> @llvm.s390.vmalhh(<8 x i16>, <8 x i16>, <8 x i16>)
-+declare <4 x i32> @llvm.s390.vmalhf(<4 x i32>, <4 x i32>, <4 x i32>)
-+declare <8 x i16> @llvm.s390.vmaeb(<16 x i8>, <16 x i8>, <8 x i16>)
-+declare <4 x i32> @llvm.s390.vmaeh(<8 x i16>, <8 x i16>, <4 x i32>)
-+declare <2 x i64> @llvm.s390.vmaef(<4 x i32>, <4 x i32>, <2 x i64>)
-+declare <8 x i16> @llvm.s390.vmaleb(<16 x i8>, <16 x i8>, <8 x i16>)
-+declare <4 x i32> @llvm.s390.vmaleh(<8 x i16>, <8 x i16>, <4 x i32>)
-+declare <2 x i64> @llvm.s390.vmalef(<4 x i32>, <4 x i32>, <2 x i64>)
-+declare <8 x i16> @llvm.s390.vmaob(<16 x i8>, <16 x i8>, <8 x i16>)
-+declare <4 x i32> @llvm.s390.vmaoh(<8 x i16>, <8 x i16>, <4 x i32>)
-+declare <2 x i64> @llvm.s390.vmaof(<4 x i32>, <4 x i32>, <2 x i64>)
-+declare <8 x i16> @llvm.s390.vmalob(<16 x i8>, <16 x i8>, <8 x i16>)
-+declare <4 x i32> @llvm.s390.vmaloh(<8 x i16>, <8 x i16>, <4 x i32>)
-+declare <2 x i64> @llvm.s390.vmalof(<4 x i32>, <4 x i32>, <2 x i64>)
-+declare <16 x i8> @llvm.s390.vmhb(<16 x i8>, <16 x i8>)
-+declare <8 x i16> @llvm.s390.vmhh(<8 x i16>, <8 x i16>)
-+declare <4 x i32> @llvm.s390.vmhf(<4 x i32>, <4 x i32>)
-+declare <16 x i8> @llvm.s390.vmlhb(<16 x i8>, <16 x i8>)
-+declare <8 x i16> @llvm.s390.vmlhh(<8 x i16>, <8 x i16>)
-+declare <4 x i32> @llvm.s390.vmlhf(<4 x i32>, <4 x i32>)
-+declare <8 x i16> @llvm.s390.vmeb(<16 x i8>, <16 x i8>)
-+declare <4 x i32> @llvm.s390.vmeh(<8 x i16>, <8 x i16>)
-+declare <2 x i64> @llvm.s390.vmef(<4 x i32>, <4 x i32>)
-+declare <8 x i16> @llvm.s390.vmleb(<16 x i8>, <16 x i8>)
-+declare <4 x i32> @llvm.s390.vmleh(<8 x i16>, <8 x i16>)
-+declare <2 x i64> @llvm.s390.vmlef(<4 x i32>, <4 x i32>)
-+declare <8 x i16> @llvm.s390.vmob(<16 x i8>, <16 x i8>)
-+declare <4 x i32> @llvm.s390.vmoh(<8 x i16>, <8 x i16>)
-+declare <2 x i64> @llvm.s390.vmof(<4 x i32>, <4 x i32>)
-+declare <8 x i16> @llvm.s390.vmlob(<16 x i8>, <16 x i8>)
-+declare <4 x i32> @llvm.s390.vmloh(<8 x i16>, <8 x i16>)
-+declare <2 x i64> @llvm.s390.vmlof(<4 x i32>, <4 x i32>)
-+declare <16 x i8> @llvm.s390.verllvb(<16 x i8>, <16 x i8>)
-+declare <8 x i16> @llvm.s390.verllvh(<8 x i16>, <8 x i16>)
-+declare <4 x i32> @llvm.s390.verllvf(<4 x i32>, <4 x i32>)
-+declare <2 x i64> @llvm.s390.verllvg(<2 x i64>, <2 x i64>)
-+declare <16 x i8> @llvm.s390.verllb(<16 x i8>, i32)
-+declare <8 x i16> @llvm.s390.verllh(<8 x i16>, i32)
-+declare <4 x i32> @llvm.s390.verllf(<4 x i32>, i32)
-+declare <2 x i64> @llvm.s390.verllg(<2 x i64>, i32)
-+declare <16 x i8> @llvm.s390.verimb(<16 x i8>, <16 x i8>, <16 x i8>, i32)
-+declare <8 x i16> @llvm.s390.verimh(<8 x i16>, <8 x i16>, <8 x i16>, i32)
-+declare <4 x i32> @llvm.s390.verimf(<4 x i32>, <4 x i32>, <4 x i32>, i32)
-+declare <2 x i64> @llvm.s390.verimg(<2 x i64>, <2 x i64>, <2 x i64>, i32)
-+declare <16 x i8> @llvm.s390.vsl(<16 x i8>, <16 x i8>)
-+declare <16 x i8> @llvm.s390.vslb(<16 x i8>, <16 x i8>)
-+declare <16 x i8> @llvm.s390.vsra(<16 x i8>, <16 x i8>)
-+declare <16 x i8> @llvm.s390.vsrab(<16 x i8>, <16 x i8>)
-+declare <16 x i8> @llvm.s390.vsrl(<16 x i8>, <16 x i8>)
-+declare <16 x i8> @llvm.s390.vsrlb(<16 x i8>, <16 x i8>)
-+declare <16 x i8> @llvm.s390.vsldb(<16 x i8>, <16 x i8>, i32)
-+declare <16 x i8> @llvm.s390.vscbib(<16 x i8>, <16 x i8>)
-+declare <8 x i16> @llvm.s390.vscbih(<8 x i16>, <8 x i16>)
-+declare <4 x i32> @llvm.s390.vscbif(<4 x i32>, <4 x i32>)
-+declare <2 x i64> @llvm.s390.vscbig(<2 x i64>, <2 x i64>)
-+declare <16 x i8> @llvm.s390.vsq(<16 x i8>, <16 x i8>)
-+declare <16 x i8> @llvm.s390.vsbiq(<16 x i8>, <16 x i8>, <16 x i8>)
-+declare <16 x i8> @llvm.s390.vscbiq(<16 x i8>, <16 x i8>)
-+declare <16 x i8> @llvm.s390.vsbcbiq(<16 x i8>, <16 x i8>, <16 x i8>)
-+declare <4 x i32> @llvm.s390.vsumb(<16 x i8>, <16 x i8>)
-+declare <4 x i32> @llvm.s390.vsumh(<8 x i16>, <8 x i16>)
-+declare <2 x i64> @llvm.s390.vsumgh(<8 x i16>, <8 x i16>)
-+declare <2 x i64> @llvm.s390.vsumgf(<4 x i32>, <4 x i32>)
-+declare <16 x i8> @llvm.s390.vsumqf(<4 x i32>, <4 x i32>)
-+declare <16 x i8> @llvm.s390.vsumqg(<2 x i64>, <2 x i64>)
-+declare i32 @llvm.s390.vtm(<16 x i8>, <16 x i8>)
-+declare {<16 x i8>, i32} @llvm.s390.vceqbs(<16 x i8>, <16 x i8>)
-+declare {<8 x i16>, i32} @llvm.s390.vceqhs(<8 x i16>, <8 x i16>)
-+declare {<4 x i32>, i32} @llvm.s390.vceqfs(<4 x i32>, <4 x i32>)
-+declare {<2 x i64>, i32} @llvm.s390.vceqgs(<2 x i64>, <2 x i64>)
-+declare {<16 x i8>, i32} @llvm.s390.vchbs(<16 x i8>, <16 x i8>)
-+declare {<8 x i16>, i32} @llvm.s390.vchhs(<8 x i16>, <8 x i16>)
-+declare {<4 x i32>, i32} @llvm.s390.vchfs(<4 x i32>, <4 x i32>)
-+declare {<2 x i64>, i32} @llvm.s390.vchgs(<2 x i64>, <2 x i64>)
-+declare {<16 x i8>, i32} @llvm.s390.vchlbs(<16 x i8>, <16 x i8>)
-+declare {<8 x i16>, i32} @llvm.s390.vchlhs(<8 x i16>, <8 x i16>)
-+declare {<4 x i32>, i32} @llvm.s390.vchlfs(<4 x i32>, <4 x i32>)
-+declare {<2 x i64>, i32} @llvm.s390.vchlgs(<2 x i64>, <2 x i64>)
-+declare <16 x i8> @llvm.s390.vfaeb(<16 x i8>, <16 x i8>, i32)
-+declare <8 x i16> @llvm.s390.vfaeh(<8 x i16>, <8 x i16>, i32)
-+declare <4 x i32> @llvm.s390.vfaef(<4 x i32>, <4 x i32>, i32)
-+declare {<16 x i8>, i32} @llvm.s390.vfaebs(<16 x i8>, <16 x i8>, i32)
-+declare {<8 x i16>, i32} @llvm.s390.vfaehs(<8 x i16>, <8 x i16>, i32)
-+declare {<4 x i32>, i32} @llvm.s390.vfaefs(<4 x i32>, <4 x i32>, i32)
-+declare <16 x i8> @llvm.s390.vfaezb(<16 x i8>, <16 x i8>, i32)
-+declare <8 x i16> @llvm.s390.vfaezh(<8 x i16>, <8 x i16>, i32)
-+declare <4 x i32> @llvm.s390.vfaezf(<4 x i32>, <4 x i32>, i32)
-+declare {<16 x i8>, i32} @llvm.s390.vfaezbs(<16 x i8>, <16 x i8>, i32)
-+declare {<8 x i16>, i32} @llvm.s390.vfaezhs(<8 x i16>, <8 x i16>, i32)
-+declare {<4 x i32>, i32} @llvm.s390.vfaezfs(<4 x i32>, <4 x i32>, i32)
-+declare <16 x i8> @llvm.s390.vfeeb(<16 x i8>, <16 x i8>)
-+declare <8 x i16> @llvm.s390.vfeeh(<8 x i16>, <8 x i16>)
-+declare <4 x i32> @llvm.s390.vfeef(<4 x i32>, <4 x i32>)
-+declare {<16 x i8>, i32} @llvm.s390.vfeebs(<16 x i8>, <16 x i8>)
-+declare {<8 x i16>, i32} @llvm.s390.vfeehs(<8 x i16>, <8 x i16>)
-+declare {<4 x i32>, i32} @llvm.s390.vfeefs(<4 x i32>, <4 x i32>)
-+declare <16 x i8> @llvm.s390.vfeezb(<16 x i8>, <16 x i8>)
-+declare <8 x i16> @llvm.s390.vfeezh(<8 x i16>, <8 x i16>)
-+declare <4 x i32> @llvm.s390.vfeezf(<4 x i32>, <4 x i32>)
-+declare {<16 x i8>, i32} @llvm.s390.vfeezbs(<16 x i8>, <16 x i8>)
-+declare {<8 x i16>, i32} @llvm.s390.vfeezhs(<8 x i16>, <8 x i16>)
-+declare {<4 x i32>, i32} @llvm.s390.vfeezfs(<4 x i32>, <4 x i32>)
-+declare <16 x i8> @llvm.s390.vfeneb(<16 x i8>, <16 x i8>)
-+declare <8 x i16> @llvm.s390.vfeneh(<8 x i16>, <8 x i16>)
-+declare <4 x i32> @llvm.s390.vfenef(<4 x i32>, <4 x i32>)
-+declare {<16 x i8>, i32} @llvm.s390.vfenebs(<16 x i8>, <16 x i8>)
-+declare {<8 x i16>, i32} @llvm.s390.vfenehs(<8 x i16>, <8 x i16>)
-+declare {<4 x i32>, i32} @llvm.s390.vfenefs(<4 x i32>, <4 x i32>)
-+declare <16 x i8> @llvm.s390.vfenezb(<16 x i8>, <16 x i8>)
-+declare <8 x i16> @llvm.s390.vfenezh(<8 x i16>, <8 x i16>)
-+declare <4 x i32> @llvm.s390.vfenezf(<4 x i32>, <4 x i32>)
-+declare {<16 x i8>, i32} @llvm.s390.vfenezbs(<16 x i8>, <16 x i8>)
-+declare {<8 x i16>, i32} @llvm.s390.vfenezhs(<8 x i16>, <8 x i16>)
-+declare {<4 x i32>, i32} @llvm.s390.vfenezfs(<4 x i32>, <4 x i32>)
-+declare <16 x i8> @llvm.s390.vistrb(<16 x i8>)
-+declare <8 x i16> @llvm.s390.vistrh(<8 x i16>)
-+declare <4 x i32> @llvm.s390.vistrf(<4 x i32>)
-+declare {<16 x i8>, i32} @llvm.s390.vistrbs(<16 x i8>)
-+declare {<8 x i16>, i32} @llvm.s390.vistrhs(<8 x i16>)
-+declare {<4 x i32>, i32} @llvm.s390.vistrfs(<4 x i32>)
-+declare <16 x i8> @llvm.s390.vstrcb(<16 x i8>, <16 x i8>, <16 x i8>, i32)
-+declare <8 x i16> @llvm.s390.vstrch(<8 x i16>, <8 x i16>, <8 x i16>, i32)
-+declare <4 x i32> @llvm.s390.vstrcf(<4 x i32>, <4 x i32>, <4 x i32>, i32)
-+declare {<16 x i8>, i32} @llvm.s390.vstrcbs(<16 x i8>, <16 x i8>, <16 x i8>,
-+                                            i32)
-+declare {<8 x i16>, i32} @llvm.s390.vstrchs(<8 x i16>, <8 x i16>, <8 x i16>,
-+                                            i32)
-+declare {<4 x i32>, i32} @llvm.s390.vstrcfs(<4 x i32>, <4 x i32>, <4 x i32>,
-+                                            i32)
-+declare <16 x i8> @llvm.s390.vstrczb(<16 x i8>, <16 x i8>, <16 x i8>, i32)
-+declare <8 x i16> @llvm.s390.vstrczh(<8 x i16>, <8 x i16>, <8 x i16>, i32)
-+declare <4 x i32> @llvm.s390.vstrczf(<4 x i32>, <4 x i32>, <4 x i32>, i32)
-+declare {<16 x i8>, i32} @llvm.s390.vstrczbs(<16 x i8>, <16 x i8>, <16 x i8>,
-+                                             i32)
-+declare {<8 x i16>, i32} @llvm.s390.vstrczhs(<8 x i16>, <8 x i16>, <8 x i16>,
-+                                             i32)
-+declare {<4 x i32>, i32} @llvm.s390.vstrczfs(<4 x i32>, <4 x i32>, <4 x i32>,
-+                                             i32)
-+declare {<2 x i64>, i32} @llvm.s390.vfcedbs(<2 x double>, <2 x double>)
-+declare {<2 x i64>, i32} @llvm.s390.vfchdbs(<2 x double>, <2 x double>)
-+declare {<2 x i64>, i32} @llvm.s390.vfchedbs(<2 x double>, <2 x double>)
-+declare {<2 x i64>, i32} @llvm.s390.vftcidb(<2 x double>, i32)
-+declare <2 x double> @llvm.s390.vfidb(<2 x double>, i32, i32)
-+
-+; LCBB with the lowest M3 operand.
-+define i32 @test_lcbb1(i8 *%ptr) {
-+; CHECK-LABEL: test_lcbb1:
-+; CHECK: lcbb %r2, 0(%r2), 0
-+; CHECK: br %r14
-+  %res = call i32 @llvm.s390.lcbb(i8 *%ptr, i32 0)
-+  ret i32 %res
-+}
-+
-+; LCBB with the highest M3 operand.
-+define i32 @test_lcbb2(i8 *%ptr) {
-+; CHECK-LABEL: test_lcbb2:
-+; CHECK: lcbb %r2, 0(%r2), 15
-+; CHECK: br %r14
-+  %res = call i32 @llvm.s390.lcbb(i8 *%ptr, i32 15)
-+  ret i32 %res
-+}
-+
-+; LCBB with a displacement and index.
-+define i32 @test_lcbb3(i8 *%base, i64 %index) {
-+; CHECK-LABEL: test_lcbb3:
-+; CHECK: lcbb %r2, 4095({{%r2,%r3|%r3,%r2}}), 4
-+; CHECK: br %r14
-+  %add = add i64 %index, 4095
-+  %ptr = getelementptr i8 *%base, i64 %add
-+  %res = call i32 @llvm.s390.lcbb(i8 *%ptr, i32 4)
-+  ret i32 %res
-+}
-+
-+; LCBB with an out-of-range displacement.
-+define i32 @test_lcbb4(i8 *%base) {
-+; CHECK-LABEL: test_lcbb4:
-+; CHECK: lcbb %r2, 0({{%r[1-5]}}), 5
-+; CHECK: br %r14
-+  %ptr = getelementptr i8 *%base, i64 4096
-+  %res = call i32 @llvm.s390.lcbb(i8 *%ptr, i32 5)
-+  ret i32 %res
-+}
-+
-+; VLBB with the lowest M3 operand.
-+define <16 x i8> @test_vlbb1(i8 *%ptr) {
-+; CHECK-LABEL: test_vlbb1:
-+; CHECK: vlbb %v24, 0(%r2), 0
-+; CHECK: br %r14
-+  %res = call <16 x i8> @llvm.s390.vlbb(i8 *%ptr, i32 0)
-+  ret <16 x i8> %res
-+}
-+
-+; VLBB with the highest M3 operand.
-+define <16 x i8> @test_vlbb2(i8 *%ptr) {
-+; CHECK-LABEL: test_vlbb2:
-+; CHECK: vlbb %v24, 0(%r2), 15
-+; CHECK: br %r14
-+  %res = call <16 x i8> @llvm.s390.vlbb(i8 *%ptr, i32 15)
-+  ret <16 x i8> %res
-+}
-+
-+; VLBB with a displacement and index.
-+define <16 x i8> @test_vlbb3(i8 *%base, i64 %index) {
-+; CHECK-LABEL: test_vlbb3:
-+; CHECK: vlbb %v24, 4095({{%r2,%r3|%r3,%r2}}), 4
-+; CHECK: br %r14
-+  %add = add i64 %index, 4095
-+  %ptr = getelementptr i8 *%base, i64 %add
-+  %res = call <16 x i8> @llvm.s390.vlbb(i8 *%ptr, i32 4)
-+  ret <16 x i8> %res
-+}
-+
-+; VLBB with an out-of-range displacement.
-+define <16 x i8> @test_vlbb4(i8 *%base) {
-+; CHECK-LABEL: test_vlbb4:
-+; CHECK: vlbb %v24, 0({{%r[1-5]}}), 5
-+; CHECK: br %r14
-+  %ptr = getelementptr i8 *%base, i64 4096
-+  %res = call <16 x i8> @llvm.s390.vlbb(i8 *%ptr, i32 5)
-+  ret <16 x i8> %res
-+}
-+
-+; VLL with the lowest in-range displacement.
-+define <16 x i8> @test_vll1(i8 *%ptr, i32 %length) {
-+; CHECK-LABEL: test_vll1:
-+; CHECK: vll %v24, %r3, 0(%r2)
-+; CHECK: br %r14
-+  %res = call <16 x i8> @llvm.s390.vll(i32 %length, i8 *%ptr)
-+  ret <16 x i8> %res
-+}
-+
-+; VLL with the highest in-range displacement.
-+define <16 x i8> @test_vll2(i8 *%base, i32 %length) {
-+; CHECK-LABEL: test_vll2:
-+; CHECK: vll %v24, %r3, 4095(%r2)
-+; CHECK: br %r14
-+  %ptr = getelementptr i8 *%base, i64 4095
-+  %res = call <16 x i8> @llvm.s390.vll(i32 %length, i8 *%ptr)
-+  ret <16 x i8> %res
-+}
-+
-+; VLL with an out-of-range displacementa.
-+define <16 x i8> @test_vll3(i8 *%base, i32 %length) {
-+; CHECK-LABEL: test_vll3:
-+; CHECK: vll %v24, %r3, 0({{%r[1-5]}})
-+; CHECK: br %r14
-+  %ptr = getelementptr i8 *%base, i64 4096
-+  %res = call <16 x i8> @llvm.s390.vll(i32 %length, i8 *%ptr)
-+  ret <16 x i8> %res
-+}
-+
-+; Check that VLL doesn't allow an index.
-+define <16 x i8> @test_vll4(i8 *%base, i64 %index, i32 %length) {
-+; CHECK-LABEL: test_vll4:
-+; CHECK: vll %v24, %r4, 0({{%r[1-5]}})
-+; CHECK: br %r14
-+  %ptr = getelementptr i8 *%base, i64 %index
-+  %res = call <16 x i8> @llvm.s390.vll(i32 %length, i8 *%ptr)
-+  ret <16 x i8> %res
-+}
-+
-+; VPDI taking element 0 from each half.
-+define <2 x i64> @test_vpdi1(<2 x i64> %a, <2 x i64> %b) {
-+; CHECK-LABEL: test_vpdi1:
-+; CHECK: vpdi %v24, %v24, %v26, 0
-+; CHECK: br %r14
-+  %res = call <2 x i64> @llvm.s390.vpdi(<2 x i64> %a, <2 x i64> %b, i32 0)
-+  ret <2 x i64> %res
-+}
-+
-+; VPDI taking element 1 from each half.
-+define <2 x i64> @test_vpdi2(<2 x i64> %a, <2 x i64> %b) {
-+; CHECK-LABEL: test_vpdi2:
-+; CHECK: vpdi %v24, %v24, %v26, 10
-+; CHECK: br %r14
-+  %res = call <2 x i64> @llvm.s390.vpdi(<2 x i64> %a, <2 x i64> %b, i32 10)
-+  ret <2 x i64> %res
-+}
-+
-+; VPERM.
-+define <16 x i8> @test_vperm(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) {
-+; CHECK-LABEL: test_vperm:
-+; CHECK: vperm %v24, %v24, %v26, %v28
-+; CHECK: br %r14
-+  %res = call <16 x i8> @llvm.s390.vperm(<16 x i8> %a, <16 x i8> %b,
-+                                         <16 x i8> %c)
-+  ret <16 x i8> %res
-+}
-+
-+; VPKSH.
-+define <16 x i8> @test_vpksh(<8 x i16> %a, <8 x i16> %b) {
-+; CHECK-LABEL: test_vpksh:
-+; CHECK: vpksh %v24, %v24, %v26
-+; CHECK: br %r14
-+  %res = call <16 x i8> @llvm.s390.vpksh(<8 x i16> %a, <8 x i16> %b)
-+  ret <16 x i8> %res
-+}
-+
-+; VPKSF.
-+define <8 x i16> @test_vpksf(<4 x i32> %a, <4 x i32> %b) {
-+; CHECK-LABEL: test_vpksf:
-+; CHECK: vpksf %v24, %v24, %v26
-+; CHECK: br %r14
-+  %res = call <8 x i16> @llvm.s390.vpksf(<4 x i32> %a, <4 x i32> %b)
-+  ret <8 x i16> %res
-+}
-+
-+; VPKSG.
-+define <4 x i32> @test_vpksg(<2 x i64> %a, <2 x i64> %b) {
-+; CHECK-LABEL: test_vpksg:
-+; CHECK: vpksg %v24, %v24, %v26
-+; CHECK: br %r14
-+  %res = call <4 x i32> @llvm.s390.vpksg(<2 x i64> %a, <2 x i64> %b)
-+  ret <4 x i32> %res
-+}
-+
-+; VPKSHS with no processing of the result.
-+define <16 x i8> @test_vpkshs(<8 x i16> %a, <8 x i16> %b, i32 *%ccptr) {
-+; CHECK-LABEL: test_vpkshs:
-+; CHECK: vpkshs %v24, %v24, %v26
-+; CHECK: ipm [[REG:%r[0-5]]]
-+; CHECK: srl [[REG]], 28
-+; CHECK: st [[REG]], 0(%r2)
-+; CHECK: br %r14
-+  %call = call {<16 x i8>, i32} @llvm.s390.vpkshs(<8 x i16> %a, <8 x i16> %b)
-+  %res = extractvalue {<16 x i8>, i32} %call, 0
-+  %cc = extractvalue {<16 x i8>, i32} %call, 1
-+  store i32 %cc, i32 *%ccptr
-+  ret <16 x i8> %res
-+}
-+
-+; VPKSHS, storing to %ptr if all values were saturated.
-+define <16 x i8> @test_vpkshs_all_store(<8 x i16> %a, <8 x i16> %b, i32 *%ptr) {
-+; CHECK-LABEL: test_vpkshs_all_store:
-+; CHECK: vpkshs %v24, %v24, %v26
-+; CHECK-NEXT: {{jno|jle}} {{\.L*}}
-+; CHECK: mvhi 0(%r2), 0
-+; CHECK: br %r14
-+  %call = call {<16 x i8>, i32} @llvm.s390.vpkshs(<8 x i16> %a, <8 x i16> %b)
-+  %res = extractvalue {<16 x i8>, i32} %call, 0
-+  %cc = extractvalue {<16 x i8>, i32} %call, 1
-+  %cmp = icmp uge i32 %cc, 3
-+  br i1 %cmp, label %store, label %exit
-+
-+store:
-+  store i32 0, i32 *%ptr
-+  br label %exit
-+
-+exit:
-+  ret <16 x i8> %res
-+}
-+
-+; VPKSFS with no processing of the result.
-+define <8 x i16> @test_vpksfs(<4 x i32> %a, <4 x i32> %b, i32 *%ccptr) {
-+; CHECK-LABEL: test_vpksfs:
-+; CHECK: vpksfs %v24, %v24, %v26
-+; CHECK: ipm [[REG:%r[0-5]]]
-+; CHECK: srl [[REG]], 28
-+; CHECK: st [[REG]], 0(%r2)
-+; CHECK: br %r14
-+  %call = call {<8 x i16>, i32} @llvm.s390.vpksfs(<4 x i32> %a, <4 x i32> %b)
-+  %res = extractvalue {<8 x i16>, i32} %call, 0
-+  %cc = extractvalue {<8 x i16>, i32} %call, 1
-+  store i32 %cc, i32 *%ccptr
-+  ret <8 x i16> %res
-+}
-+
-+; VPKSFS, storing to %ptr if any values were saturated.
-+define <8 x i16> @test_vpksfs_any_store(<4 x i32> %a, <4 x i32> %b, i32 *%ptr) {
-+; CHECK-LABEL: test_vpksfs_any_store:
-+; CHECK: vpksfs %v24, %v24, %v26
-+; CHECK-NEXT: {{jhe|je}} {{\.L*}}
-+; CHECK: mvhi 0(%r2), 0
-+; CHECK: br %r14
-+  %call = call {<8 x i16>, i32} @llvm.s390.vpksfs(<4 x i32> %a, <4 x i32> %b)
-+  %res = extractvalue {<8 x i16>, i32} %call, 0
-+  %cc = extractvalue {<8 x i16>, i32} %call, 1
-+  %cmp = icmp ugt i32 %cc, 0
-+  br i1 %cmp, label %store, label %exit
-+
-+store:
-+  store i32 0, i32 *%ptr
-+  br label %exit
-+
-+exit:
-+  ret <8 x i16> %res
-+}
-+
-+; VPKSGS with no processing of the result.
-+define <4 x i32> @test_vpksgs(<2 x i64> %a, <2 x i64> %b, i32 *%ccptr) {
-+; CHECK-LABEL: test_vpksgs:
-+; CHECK: vpksgs %v24, %v24, %v26
-+; CHECK: ipm [[REG:%r[0-5]]]
-+; CHECK: srl [[REG]], 28
-+; CHECK: st [[REG]], 0(%r2)
-+; CHECK: br %r14
-+  %call = call {<4 x i32>, i32} @llvm.s390.vpksgs(<2 x i64> %a, <2 x i64> %b)
-+  %res = extractvalue {<4 x i32>, i32} %call, 0
-+  %cc = extractvalue {<4 x i32>, i32} %call, 1
-+  store i32 %cc, i32 *%ccptr
-+  ret <4 x i32> %res
-+}
-+
-+; VPKSGS, storing to %ptr if no elements were saturated
-+define <4 x i32> @test_vpksgs_none_store(<2 x i64> %a, <2 x i64> %b,
-+                                         i32 *%ptr) {
-+; CHECK-LABEL: test_vpksgs_none_store:
-+; CHECK: vpksgs %v24, %v24, %v26
-+; CHECK-NEXT: {{jnhe|jne}} {{\.L*}}
-+; CHECK: mvhi 0(%r2), 0
-+; CHECK: br %r14
-+  %call = call {<4 x i32>, i32} @llvm.s390.vpksgs(<2 x i64> %a, <2 x i64> %b)
-+  %res = extractvalue {<4 x i32>, i32} %call, 0
-+  %cc = extractvalue {<4 x i32>, i32} %call, 1
-+  %cmp = icmp sle i32 %cc, 0
-+  br i1 %cmp, label %store, label %exit
-+
-+store:
-+  store i32 0, i32 *%ptr
-+  br label %exit
-+
-+exit:
-+  ret <4 x i32> %res
-+}
-+
-+; VPKLSH.
-+define <16 x i8> @test_vpklsh(<8 x i16> %a, <8 x i16> %b) {
-+; CHECK-LABEL: test_vpklsh:
-+; CHECK: vpklsh %v24, %v24, %v26
-+; CHECK: br %r14
-+  %res = call <16 x i8> @llvm.s390.vpklsh(<8 x i16> %a, <8 x i16> %b)
-+  ret <16 x i8> %res
-+}
-+
-+; VPKLSF.
-+define <8 x i16> @test_vpklsf(<4 x i32> %a, <4 x i32> %b) {
-+; CHECK-LABEL: test_vpklsf:
-+; CHECK: vpklsf %v24, %v24, %v26
-+; CHECK: br %r14
-+  %res = call <8 x i16> @llvm.s390.vpklsf(<4 x i32> %a, <4 x i32> %b)
-+  ret <8 x i16> %res
-+}
-+
-+; VPKLSG.
-+define <4 x i32> @test_vpklsg(<2 x i64> %a, <2 x i64> %b) {
-+; CHECK-LABEL: test_vpklsg:
-+; CHECK: vpklsg %v24, %v24, %v26
-+; CHECK: br %r14
-+  %res = call <4 x i32> @llvm.s390.vpklsg(<2 x i64> %a, <2 x i64> %b)
-+  ret <4 x i32> %res
-+}
-+
-+; VPKLSHS with no processing of the result.
-+define <16 x i8> @test_vpklshs(<8 x i16> %a, <8 x i16> %b, i32 *%ccptr) {
-+; CHECK-LABEL: test_vpklshs:
-+; CHECK: vpklshs %v24, %v24, %v26
-+; CHECK: ipm [[REG:%r[0-5]]]
-+; CHECK: srl [[REG]], 28
-+; CHECK: st [[REG]], 0(%r2)
-+; CHECK: br %r14
-+  %call = call {<16 x i8>, i32} @llvm.s390.vpklshs(<8 x i16> %a, <8 x i16> %b)
-+  %res = extractvalue {<16 x i8>, i32} %call, 0
-+  %cc = extractvalue {<16 x i8>, i32} %call, 1
-+  store i32 %cc, i32 *%ccptr
-+  ret <16 x i8> %res
-+}
-+
-+; VPKLSHS, storing to %ptr if all values were saturated.
-+define <16 x i8> @test_vpklshs_all_store(<8 x i16> %a, <8 x i16> %b,
-+                                         i32 *%ptr) {
-+; CHECK-LABEL: test_vpklshs_all_store:
-+; CHECK: vpklshs %v24, %v24, %v26
-+; CHECK-NEXT: {{jno|jle}} {{\.L*}}
-+; CHECK: mvhi 0(%r2), 0
-+; CHECK: br %r14
-+  %call = call {<16 x i8>, i32} @llvm.s390.vpklshs(<8 x i16> %a, <8 x i16> %b)
-+  %res = extractvalue {<16 x i8>, i32} %call, 0
-+  %cc = extractvalue {<16 x i8>, i32} %call, 1
-+  %cmp = icmp eq i32 %cc, 3
-+  br i1 %cmp, label %store, label %exit
-+
-+store:
-+  store i32 0, i32 *%ptr
-+  br label %exit
-+
-+exit:
-+  ret <16 x i8> %res
-+}
-+
-+; VPKLSFS with no processing of the result.
-+define <8 x i16> @test_vpklsfs(<4 x i32> %a, <4 x i32> %b, i32 *%ccptr) {
-+; CHECK-LABEL: test_vpklsfs:
-+; CHECK: vpklsfs %v24, %v24, %v26
-+; CHECK: ipm [[REG:%r[0-5]]]
-+; CHECK: srl [[REG]], 28
-+; CHECK: st [[REG]], 0(%r2)
-+; CHECK: br %r14
-+  %call = call {<8 x i16>, i32} @llvm.s390.vpklsfs(<4 x i32> %a, <4 x i32> %b)
-+  %res = extractvalue {<8 x i16>, i32} %call, 0
-+  %cc = extractvalue {<8 x i16>, i32} %call, 1
-+  store i32 %cc, i32 *%ccptr
-+  ret <8 x i16> %res
-+}
-+
-+; VPKLSFS, storing to %ptr if any values were saturated.
-+define <8 x i16> @test_vpklsfs_any_store(<4 x i32> %a, <4 x i32> %b,
-+                                         i32 *%ptr) {
-+; CHECK-LABEL: test_vpklsfs_any_store:
-+; CHECK: vpklsfs %v24, %v24, %v26
-+; CHECK-NEXT: {{jhe|je}} {{\.L*}}
-+; CHECK: mvhi 0(%r2), 0
-+; CHECK: br %r14
-+  %call = call {<8 x i16>, i32} @llvm.s390.vpklsfs(<4 x i32> %a, <4 x i32> %b)
-+  %res = extractvalue {<8 x i16>, i32} %call, 0
-+  %cc = extractvalue {<8 x i16>, i32} %call, 1
-+  %cmp = icmp ne i32 %cc, 0
-+  br i1 %cmp, label %store, label %exit
-+
-+store:
-+  store i32 0, i32 *%ptr
-+  br label %exit
-+
-+exit:
-+  ret <8 x i16> %res
-+}
-+
-+; VPKLSGS with no processing of the result.
-+define <4 x i32> @test_vpklsgs(<2 x i64> %a, <2 x i64> %b, i32 *%ccptr) {
-+; CHECK-LABEL: test_vpklsgs:
-+; CHECK: vpklsgs %v24, %v24, %v26
-+; CHECK: ipm [[REG:%r[0-5]]]
-+; CHECK: srl [[REG]], 28
-+; CHECK: st [[REG]], 0(%r2)
-+; CHECK: br %r14
-+  %call = call {<4 x i32>, i32} @llvm.s390.vpklsgs(<2 x i64> %a, <2 x i64> %b)
-+  %res = extractvalue {<4 x i32>, i32} %call, 0
-+  %cc = extractvalue {<4 x i32>, i32} %call, 1
-+  store i32 %cc, i32 *%ccptr
-+  ret <4 x i32> %res
-+}
-+
-+; VPKLSGS, storing to %ptr if no elements were saturated
-+define <4 x i32> @test_vpklsgs_none_store(<2 x i64> %a, <2 x i64> %b,
-+                                          i32 *%ptr) {
-+; CHECK-LABEL: test_vpklsgs_none_store:
-+; CHECK: vpklsgs %v24, %v24, %v26
-+; CHECK-NEXT: {{jnhe|jne}} {{\.L*}}
-+; CHECK: mvhi 0(%r2), 0
-+; CHECK: br %r14
-+  %call = call {<4 x i32>, i32} @llvm.s390.vpklsgs(<2 x i64> %a, <2 x i64> %b)
-+  %res = extractvalue {<4 x i32>, i32} %call, 0
-+  %cc = extractvalue {<4 x i32>, i32} %call, 1
-+  %cmp = icmp eq i32 %cc, 0
-+  br i1 %cmp, label %store, label %exit
-+
-+store:
-+  store i32 0, i32 *%ptr
-+  br label %exit
-+
-+exit:
-+  ret <4 x i32> %res
-+}
-+
-+; VSTL with the lowest in-range displacement.
-+define void @test_vstl1(<16 x i8> %vec, i8 *%ptr, i32 %length) {
-+; CHECK-LABEL: test_vstl1:
-+; CHECK: vstl %v24, %r3, 0(%r2)
-+; CHECK: br %r14
-+  call void @llvm.s390.vstl(<16 x i8> %vec, i32 %length, i8 *%ptr)
-+  ret void
-+}
-+
-+; VSTL with the highest in-range displacement.
-+define void @test_vstl2(<16 x i8> %vec, i8 *%base, i32 %length) {
-+; CHECK-LABEL: test_vstl2:
-+; CHECK: vstl %v24, %r3, 4095(%r2)
-+; CHECK: br %r14
-+  %ptr = getelementptr i8 *%base, i64 4095
-+  call void @llvm.s390.vstl(<16 x i8> %vec, i32 %length, i8 *%ptr)
-+  ret void
-+}
-+
-+; VSTL with an out-of-range displacement.
-+define void @test_vstl3(<16 x i8> %vec, i8 *%base, i32 %length) {
-+; CHECK-LABEL: test_vstl3:
-+; CHECK: vstl %v24, %r3, 0({{%r[1-5]}})
-+; CHECK: br %r14
-+  %ptr = getelementptr i8 *%base, i64 4096
-+  call void @llvm.s390.vstl(<16 x i8> %vec, i32 %length, i8 *%ptr)
-+  ret void
-+}
-+
-+; Check that VSTL doesn't allow an index.
-+define void @test_vstl4(<16 x i8> %vec, i8 *%base, i64 %index, i32 %length) {
-+; CHECK-LABEL: test_vstl4:
-+; CHECK: vstl %v24, %r4, 0({{%r[1-5]}})
-+; CHECK: br %r14
-+  %ptr = getelementptr i8 *%base, i64 %index
-+  call void @llvm.s390.vstl(<16 x i8> %vec, i32 %length, i8 *%ptr)
-+  ret void
-+}
-+
-+; VUPHB.
-+define <8 x i16> @test_vuphb(<16 x i8> %a) {
-+; CHECK-LABEL: test_vuphb:
-+; CHECK: vuphb %v24, %v24
-+; CHECK: br %r14
-+  %res = call <8 x i16> @llvm.s390.vuphb(<16 x i8> %a)
-+  ret <8 x i16> %res
-+}
-+
-+; VUPHH.
-+define <4 x i32> @test_vuphh(<8 x i16> %a) {
-+; CHECK-LABEL: test_vuphh:
-+; CHECK: vuphh %v24, %v24
-+; CHECK: br %r14
-+  %res = call <4 x i32> @llvm.s390.vuphh(<8 x i16> %a)
-+  ret <4 x i32> %res
-+}
-+
-+; VUPHF.
-+define <2 x i64> @test_vuphf(<4 x i32> %a) {
-+; CHECK-LABEL: test_vuphf:
-+; CHECK: vuphf %v24, %v24
-+; CHECK: br %r14
-+  %res = call <2 x i64> @llvm.s390.vuphf(<4 x i32> %a)
-+  ret <2 x i64> %res
-+}
-+
-+; VUPLHB.
-+define <8 x i16> @test_vuplhb(<16 x i8> %a) {
-+; CHECK-LABEL: test_vuplhb:
-+; CHECK: vuplhb %v24, %v24
-+; CHECK: br %r14
-+  %res = call <8 x i16> @llvm.s390.vuplhb(<16 x i8> %a)
-+  ret <8 x i16> %res
-+}
-+
-+; VUPLHH.
-+define <4 x i32> @test_vuplhh(<8 x i16> %a) {
-+; CHECK-LABEL: test_vuplhh:
-+; CHECK: vuplhh %v24, %v24
-+; CHECK: br %r14
-+  %res = call <4 x i32> @llvm.s390.vuplhh(<8 x i16> %a)
-+  ret <4 x i32> %res
-+}
-+
-+; VUPLHF.
-+define <2 x i64> @test_vuplhf(<4 x i32> %a) {
-+; CHECK-LABEL: test_vuplhf:
-+; CHECK: vuplhf %v24, %v24
-+; CHECK: br %r14
-+  %res = call <2 x i64> @llvm.s390.vuplhf(<4 x i32> %a)
-+  ret <2 x i64> %res
-+}
-+
-+; VUPLB.
-+define <8 x i16> @test_vuplb(<16 x i8> %a) {
-+; CHECK-LABEL: test_vuplb:
-+; CHECK: vuplb %v24, %v24
-+; CHECK: br %r14
-+  %res = call <8 x i16> @llvm.s390.vuplb(<16 x i8> %a)
-+  ret <8 x i16> %res
-+}
-+
-+; VUPLHW.
-+define <4 x i32> @test_vuplhw(<8 x i16> %a) {
-+; CHECK-LABEL: test_vuplhw:
-+; CHECK: vuplhw %v24, %v24
-+; CHECK: br %r14
-+  %res = call <4 x i32> @llvm.s390.vuplhw(<8 x i16> %a)
-+  ret <4 x i32> %res
-+}
-+
-+; VUPLF.
-+define <2 x i64> @test_vuplf(<4 x i32> %a) {
-+; CHECK-LABEL: test_vuplf:
-+; CHECK: vuplf %v24, %v24
-+; CHECK: br %r14
-+  %res = call <2 x i64> @llvm.s390.vuplf(<4 x i32> %a)
-+  ret <2 x i64> %res
-+}
-+
-+; VUPLLB.
-+define <8 x i16> @test_vupllb(<16 x i8> %a) {
-+; CHECK-LABEL: test_vupllb:
-+; CHECK: vupllb %v24, %v24
-+; CHECK: br %r14
-+  %res = call <8 x i16> @llvm.s390.vupllb(<16 x i8> %a)
-+  ret <8 x i16> %res
-+}
-+
-+; VUPLLH.
-+define <4 x i32> @test_vupllh(<8 x i16> %a) {
-+; CHECK-LABEL: test_vupllh:
-+; CHECK: vupllh %v24, %v24
-+; CHECK: br %r14
-+  %res = call <4 x i32> @llvm.s390.vupllh(<8 x i16> %a)
-+  ret <4 x i32> %res
-+}
-+
-+; VUPLLF.
-+define <2 x i64> @test_vupllf(<4 x i32> %a) {
-+; CHECK-LABEL: test_vupllf:
-+; CHECK: vupllf %v24, %v24
-+; CHECK: br %r14
-+  %res = call <2 x i64> @llvm.s390.vupllf(<4 x i32> %a)
-+  ret <2 x i64> %res
-+}
-+
-+; VACCB.
-+define <16 x i8> @test_vaccb(<16 x i8> %a, <16 x i8> %b) {
-+; CHECK-LABEL: test_vaccb:
-+; CHECK: vaccb %v24, %v24, %v26
-+; CHECK: br %r14
-+  %res = call <16 x i8> @llvm.s390.vaccb(<16 x i8> %a, <16 x i8> %b)
-+  ret <16 x i8> %res
-+}
-+
-+; VACCH.
-+define <8 x i16> @test_vacch(<8 x i16> %a, <8 x i16> %b) {
-+; CHECK-LABEL: test_vacch:
-+; CHECK: vacch %v24, %v24, %v26
-+; CHECK: br %r14
-+  %res = call <8 x i16> @llvm.s390.vacch(<8 x i16> %a, <8 x i16> %b)
-+  ret <8 x i16> %res
-+}
-+
-+; VACCF.
-+define <4 x i32> @test_vaccf(<4 x i32> %a, <4 x i32> %b) {
-+; CHECK-LABEL: test_vaccf:
-+; CHECK: vaccf %v24, %v24, %v26
-+; CHECK: br %r14
-+  %res = call <4 x i32> @llvm.s390.vaccf(<4 x i32> %a, <4 x i32> %b)
-+  ret <4 x i32> %res
-+}
-+
-+; VACCG.
-+define <2 x i64> @test_vaccg(<2 x i64> %a, <2 x i64> %b) {
-+; CHECK-LABEL: test_vaccg:
-+; CHECK: vaccg %v24, %v24, %v26
-+; CHECK: br %r14
-+  %res = call <2 x i64> @llvm.s390.vaccg(<2 x i64> %a, <2 x i64> %b)
-+  ret <2 x i64> %res
-+}
-+
-+; VAQ.
-+define <16 x i8> @test_vaq(<16 x i8> %a, <16 x i8> %b) {
-+; CHECK-LABEL: test_vaq:
-+; CHECK: vaq %v24, %v24, %v26
-+; CHECK: br %r14
-+  %res = call <16 x i8> @llvm.s390.vaq(<16 x i8> %a, <16 x i8> %b)
-+  ret <16 x i8> %res
-+}
-+
-+; VACQ.
-+define <16 x i8> @test_vacq(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) {
-+; CHECK-LABEL: test_vacq:
-+; CHECK: vacq %v24, %v24, %v26, %v28
-+; CHECK: br %r14
-+  %res = call <16 x i8> @llvm.s390.vacq(<16 x i8> %a, <16 x i8> %b,
-+                                        <16 x i8> %c)
-+  ret <16 x i8> %res
-+}
-+
-+; VACCQ.
-+define <16 x i8> @test_vaccq(<16 x i8> %a, <16 x i8> %b) {
-+; CHECK-LABEL: test_vaccq:
-+; CHECK: vaccq %v24, %v24, %v26
-+; CHECK: br %r14
-+  %res = call <16 x i8> @llvm.s390.vaccq(<16 x i8> %a, <16 x i8> %b)
-+  ret <16 x i8> %res
-+}
-+
-+; VACCCQ.
-+define <16 x i8> @test_vacccq(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) {
-+; CHECK-LABEL: test_vacccq:
-+; CHECK: vacccq %v24, %v24, %v26, %v28
-+; CHECK: br %r14
-+  %res = call <16 x i8> @llvm.s390.vacccq(<16 x i8> %a, <16 x i8> %b,
-+                                          <16 x i8> %c)
-+  ret <16 x i8> %res
-+}
-+
-+; VAVGB.
-+define <16 x i8> @test_vavgb(<16 x i8> %a, <16 x i8> %b) {
-+; CHECK-LABEL: test_vavgb:
-+; CHECK: vavgb %v24, %v24, %v26
-+; CHECK: br %r14
-+  %res = call <16 x i8> @llvm.s390.vavgb(<16 x i8> %a, <16 x i8> %b)
-+  ret <16 x i8> %res
-+}
-+
-+; VAVGH.
-+define <8 x i16> @test_vavgh(<8 x i16> %a, <8 x i16> %b) {
-+; CHECK-LABEL: test_vavgh:
-+; CHECK: vavgh %v24, %v24, %v26
-+; CHECK: br %r14
-+  %res = call <8 x i16> @llvm.s390.vavgh(<8 x i16> %a, <8 x i16> %b)
-+  ret <8 x i16> %res
-+}
-+
-+; VAVGF.
-+define <4 x i32> @test_vavgf(<4 x i32> %a, <4 x i32> %b) {
-+; CHECK-LABEL: test_vavgf:
-+; CHECK: vavgf %v24, %v24, %v26
-+; CHECK: br %r14
-+  %res = call <4 x i32> @llvm.s390.vavgf(<4 x i32> %a, <4 x i32> %b)
-+  ret <4 x i32> %res
-+}
-+
-+; VAVGG.
-+define <2 x i64> @test_vavgg(<2 x i64> %a, <2 x i64> %b) {
-+; CHECK-LABEL: test_vavgg:
-+; CHECK: vavgg %v24, %v24, %v26
-+; CHECK: br %r14
-+  %res = call <2 x i64> @llvm.s390.vavgg(<2 x i64> %a, <2 x i64> %b)
-+  ret <2 x i64> %res
-+}
-+
-+; VAVGLB.
-+define <16 x i8> @test_vavglb(<16 x i8> %a, <16 x i8> %b) {
-+; CHECK-LABEL: test_vavglb:
-+; CHECK: vavglb %v24, %v24, %v26
-+; CHECK: br %r14
-+  %res = call <16 x i8> @llvm.s390.vavglb(<16 x i8> %a, <16 x i8> %b)
-+  ret <16 x i8> %res
-+}
-+
-+; VAVGLH.
-+define <8 x i16> @test_vavglh(<8 x i16> %a, <8 x i16> %b) {
-+; CHECK-LABEL: test_vavglh:
-+; CHECK: vavglh %v24, %v24, %v26
-+; CHECK: br %r14
-+  %res = call <8 x i16> @llvm.s390.vavglh(<8 x i16> %a, <8 x i16> %b)
-+  ret <8 x i16> %res
-+}
-+
-+; VAVGLF.
-+define <4 x i32> @test_vavglf(<4 x i32> %a, <4 x i32> %b) {
-+; CHECK-LABEL: test_vavglf:
-+; CHECK: vavglf %v24, %v24, %v26
-+; CHECK: br %r14
-+  %res = call <4 x i32> @llvm.s390.vavglf(<4 x i32> %a, <4 x i32> %b)
-+  ret <4 x i32> %res
-+}
-+
-+; VAVGLG.
-+define <2 x i64> @test_vavglg(<2 x i64> %a, <2 x i64> %b) {
-+; CHECK-LABEL: test_vavglg:
-+; CHECK: vavglg %v24, %v24, %v26
-+; CHECK: br %r14
-+  %res = call <2 x i64> @llvm.s390.vavglg(<2 x i64> %a, <2 x i64> %b)
-+  ret <2 x i64> %res
-+}
-+
-+; VCKSM.
-+define <4 x i32> @test_vcksm(<4 x i32> %a, <4 x i32> %b) {
-+; CHECK-LABEL: test_vcksm:
-+; CHECK: vcksm %v24, %v24, %v26
-+; CHECK: br %r14
-+  %res = call <4 x i32> @llvm.s390.vcksm(<4 x i32> %a, <4 x i32> %b)
-+  ret <4 x i32> %res
-+}
-+
-+; VGFMB.
-+define <8 x i16> @test_vgfmb(<16 x i8> %a, <16 x i8> %b) {
-+; CHECK-LABEL: test_vgfmb:
-+; CHECK: vgfmb %v24, %v24, %v26
-+; CHECK: br %r14
-+  %res = call <8 x i16> @llvm.s390.vgfmb(<16 x i8> %a, <16 x i8> %b)
-+  ret <8 x i16> %res
-+}
-+
-+; VGFMH.
-+define <4 x i32> @test_vgfmh(<8 x i16> %a, <8 x i16> %b) {
-+; CHECK-LABEL: test_vgfmh:
-+; CHECK: vgfmh %v24, %v24, %v26
-+; CHECK: br %r14
-+  %res = call <4 x i32> @llvm.s390.vgfmh(<8 x i16> %a, <8 x i16> %b)
-+  ret <4 x i32> %res
-+}
-+
-+; VGFMF.
-+define <2 x i64> @test_vgfmf(<4 x i32> %a, <4 x i32> %b) {
-+; CHECK-LABEL: test_vgfmf:
-+; CHECK: vgfmf %v24, %v24, %v26
-+; CHECK: br %r14
-+  %res = call <2 x i64> @llvm.s390.vgfmf(<4 x i32> %a, <4 x i32> %b)
-+  ret <2 x i64> %res
-+}
-+
-+; VGFMG.
-+define <16 x i8> @test_vgfmg(<2 x i64> %a, <2 x i64> %b) {
-+; CHECK-LABEL: test_vgfmg:
-+; CHECK: vgfmg %v24, %v24, %v26
-+; CHECK: br %r14
-+  %res = call <16 x i8> @llvm.s390.vgfmg(<2 x i64> %a, <2 x i64> %b)
-+  ret <16 x i8> %res
-+}
-+
-+; VGFMAB.
-+define <8 x i16> @test_vgfmab(<16 x i8> %a, <16 x i8> %b, <8 x i16> %c) {
-+; CHECK-LABEL: test_vgfmab:
-+; CHECK: vgfmab %v24, %v24, %v26, %v28
-+; CHECK: br %r14
-+  %res = call <8 x i16> @llvm.s390.vgfmab(<16 x i8> %a, <16 x i8> %b,
-+                                          <8 x i16> %c)
-+  ret <8 x i16> %res
-+}
-+
-+; VGFMAH.
-+define <4 x i32> @test_vgfmah(<8 x i16> %a, <8 x i16> %b, <4 x i32> %c) {
-+; CHECK-LABEL: test_vgfmah:
-+; CHECK: vgfmah %v24, %v24, %v26, %v28
-+; CHECK: br %r14
-+  %res = call <4 x i32> @llvm.s390.vgfmah(<8 x i16> %a, <8 x i16> %b,
-+                                          <4 x i32> %c)
-+  ret <4 x i32> %res
-+}
-+
-+; VGFMAF.
-+define <2 x i64> @test_vgfmaf(<4 x i32> %a, <4 x i32> %b, <2 x i64> %c) {
-+; CHECK-LABEL: test_vgfmaf:
-+; CHECK: vgfmaf %v24, %v24, %v26, %v28
-+; CHECK: br %r14
-+  %res = call <2 x i64> @llvm.s390.vgfmaf(<4 x i32> %a, <4 x i32> %b,
-+                                          <2 x i64> %c)
-+  ret <2 x i64> %res
-+}
-+
-+; VGFMAG.
-+define <16 x i8> @test_vgfmag(<2 x i64> %a, <2 x i64> %b, <16 x i8> %c) {
-+; CHECK-LABEL: test_vgfmag:
-+; CHECK: vgfmag %v24, %v24, %v26, %v28
-+; CHECK: br %r14
-+  %res = call <16 x i8> @llvm.s390.vgfmag(<2 x i64> %a, <2 x i64> %b,
-+                                          <16 x i8> %c)
-+  ret <16 x i8> %res
-+}
-+
-+; VMAHB.
-+define <16 x i8> @test_vmahb(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) {
-+; CHECK-LABEL: test_vmahb:
-+; CHECK: vmahb %v24, %v24, %v26, %v28
-+; CHECK: br %r14
-+  %res = call <16 x i8> @llvm.s390.vmahb(<16 x i8> %a, <16 x i8> %b,
-+                                         <16 x i8> %c)
-+  ret <16 x i8> %res
-+}
-+
-+; VMAHH.
-+define <8 x i16> @test_vmahh(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) {
-+; CHECK-LABEL: test_vmahh:
-+; CHECK: vmahh %v24, %v24, %v26, %v28
-+; CHECK: br %r14
-+  %res = call <8 x i16> @llvm.s390.vmahh(<8 x i16> %a, <8 x i16> %b,
-+                                         <8 x i16> %c)
-+  ret <8 x i16> %res
-+}
-+
-+; VMAHF.
-+define <4 x i32> @test_vmahf(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
-+; CHECK-LABEL: test_vmahf:
-+; CHECK: vmahf %v24, %v24, %v26, %v28
-+; CHECK: br %r14
-+  %res = call <4 x i32> @llvm.s390.vmahf(<4 x i32> %a, <4 x i32> %b,
-+                                         <4 x i32> %c)
-+  ret <4 x i32> %res
-+}
-+
-+; VMALHB.
-+define <16 x i8> @test_vmalhb(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) {
-+; CHECK-LABEL: test_vmalhb:
-+; CHECK: vmalhb %v24, %v24, %v26, %v28
-+; CHECK: br %r14
-+  %res = call <16 x i8> @llvm.s390.vmalhb(<16 x i8> %a, <16 x i8> %b,
-+                                          <16 x i8> %c)
-+  ret <16 x i8> %res
-+}
-+
-+; VMALHH.
-+define <8 x i16> @test_vmalhh(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) {
-+; CHECK-LABEL: test_vmalhh:
-+; CHECK: vmalhh %v24, %v24, %v26, %v28
-+; CHECK: br %r14
-+  %res = call <8 x i16> @llvm.s390.vmalhh(<8 x i16> %a, <8 x i16> %b,
-+                                          <8 x i16> %c)
-+  ret <8 x i16> %res
-+}
-+
-+; VMALHF.
-+define <4 x i32> @test_vmalhf(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
-+; CHECK-LABEL: test_vmalhf:
-+; CHECK: vmalhf %v24, %v24, %v26, %v28
-+; CHECK: br %r14
-+  %res = call <4 x i32> @llvm.s390.vmalhf(<4 x i32> %a, <4 x i32> %b,
-+                                          <4 x i32> %c)
-+  ret <4 x i32> %res
-+}
-+
-+; VMAEB.
-+define <8 x i16> @test_vmaeb(<16 x i8> %a, <16 x i8> %b, <8 x i16> %c) {
-+; CHECK-LABEL: test_vmaeb:
-+; CHECK: vmaeb %v24, %v24, %v26, %v28
-+; CHECK: br %r14
-+  %res = call <8 x i16> @llvm.s390.vmaeb(<16 x i8> %a, <16 x i8> %b,
-+                                         <8 x i16> %c)
-+  ret <8 x i16> %res
-+}
-+
-+; VMAEH.
-+define <4 x i32> @test_vmaeh(<8 x i16> %a, <8 x i16> %b, <4 x i32> %c) {
-+; CHECK-LABEL: test_vmaeh:
-+; CHECK: vmaeh %v24, %v24, %v26, %v28
-+; CHECK: br %r14
-+  %res = call <4 x i32> @llvm.s390.vmaeh(<8 x i16> %a, <8 x i16> %b,
-+                                         <4 x i32> %c)
-+  ret <4 x i32> %res
-+}
-+
-+; VMAEF.
-+define <2 x i64> @test_vmaef(<4 x i32> %a, <4 x i32> %b, <2 x i64> %c) {
-+; CHECK-LABEL: test_vmaef:
-+; CHECK: vmaef %v24, %v24, %v26, %v28
-+; CHECK: br %r14
-+  %res = call <2 x i64> @llvm.s390.vmaef(<4 x i32> %a, <4 x i32> %b,
-+                                         <2 x i64> %c)
-+  ret <2 x i64> %res
-+}
-+
-+; VMALEB.
-+define <8 x i16> @test_vmaleb(<16 x i8> %a, <16 x i8> %b, <8 x i16> %c) {
-+; CHECK-LABEL: test_vmaleb:
-+; CHECK: vmaleb %v24, %v24, %v26, %v28
-+; CHECK: br %r14
-+  %res = call <8 x i16> @llvm.s390.vmaleb(<16 x i8> %a, <16 x i8> %b,
-+                                          <8 x i16> %c)
-+  ret <8 x i16> %res
-+}
-+
-+; VMALEH.
-+define <4 x i32> @test_vmaleh(<8 x i16> %a, <8 x i16> %b, <4 x i32> %c) {
-+; CHECK-LABEL: test_vmaleh:
-+; CHECK: vmaleh %v24, %v24, %v26, %v28
-+; CHECK: br %r14
-+  %res = call <4 x i32> @llvm.s390.vmaleh(<8 x i16> %a, <8 x i16> %b,
-+                                          <4 x i32> %c)
-+  ret <4 x i32> %res
-+}
-+
-+; VMALEF.
-+define <2 x i64> @test_vmalef(<4 x i32> %a, <4 x i32> %b, <2 x i64> %c) {
-+; CHECK-LABEL: test_vmalef:
-+; CHECK: vmalef %v24, %v24, %v26, %v28
-+; CHECK: br %r14
-+  %res = call <2 x i64> @llvm.s390.vmalef(<4 x i32> %a, <4 x i32> %b,
-+                                          <2 x i64> %c)
-+  ret <2 x i64> %res
-+}
-+
-+; VMAOB.
-+define <8 x i16> @test_vmaob(<16 x i8> %a, <16 x i8> %b, <8 x i16> %c) {
-+; CHECK-LABEL: test_vmaob:
-+; CHECK: vmaob %v24, %v24, %v26, %v28
-+; CHECK: br %r14
-+  %res = call <8 x i16> @llvm.s390.vmaob(<16 x i8> %a, <16 x i8> %b,
-+                                         <8 x i16> %c)
-+  ret <8 x i16> %res
-+}
-+
-+; VMAOH.
-+define <4 x i32> @test_vmaoh(<8 x i16> %a, <8 x i16> %b, <4 x i32> %c) {
-+; CHECK-LABEL: test_vmaoh:
-+; CHECK: vmaoh %v24, %v24, %v26, %v28
-+; CHECK: br %r14
-+  %res = call <4 x i32> @llvm.s390.vmaoh(<8 x i16> %a, <8 x i16> %b,
-+                                         <4 x i32> %c)
-+  ret <4 x i32> %res
-+}
-+
-+; VMAOF.
-+define <2 x i64> @test_vmaof(<4 x i32> %a, <4 x i32> %b, <2 x i64> %c) {
-+; CHECK-LABEL: test_vmaof:
-+; CHECK: vmaof %v24, %v24, %v26, %v28
-+; CHECK: br %r14
-+  %res = call <2 x i64> @llvm.s390.vmaof(<4 x i32> %a, <4 x i32> %b,
-+                                         <2 x i64> %c)
-+  ret <2 x i64> %res
-+}
-+
-+; VMALOB.
-+define <8 x i16> @test_vmalob(<16 x i8> %a, <16 x i8> %b, <8 x i16> %c) {
-+; CHECK-LABEL: test_vmalob:
-+; CHECK: vmalob %v24, %v24, %v26, %v28
-+; CHECK: br %r14
-+  %res = call <8 x i16> @llvm.s390.vmalob(<16 x i8> %a, <16 x i8> %b,
-+                                          <8 x i16> %c)
-+  ret <8 x i16> %res
-+}
-+
-+; VMALOH.
-+define <4 x i32> @test_vmaloh(<8 x i16> %a, <8 x i16> %b, <4 x i32> %c) {
-+; CHECK-LABEL: test_vmaloh:
-+; CHECK: vmaloh %v24, %v24, %v26, %v28
-+; CHECK: br %r14
-+  %res = call <4 x i32> @llvm.s390.vmaloh(<8 x i16> %a, <8 x i16> %b,
-+                                          <4 x i32> %c)
-+  ret <4 x i32> %res
-+}
-+
-+; VMALOF.
-+define <2 x i64> @test_vmalof(<4 x i32> %a, <4 x i32> %b, <2 x i64> %c) {
-+; CHECK-LABEL: test_vmalof:
-+; CHECK: vmalof %v24, %v24, %v26, %v28
-+; CHECK: br %r14
-+  %res = call <2 x i64> @llvm.s390.vmalof(<4 x i32> %a, <4 x i32> %b,
-+                                          <2 x i64> %c)
-+  ret <2 x i64> %res
-+}
-+
-+; VMHB.
-+define <16 x i8> @test_vmhb(<16 x i8> %a, <16 x i8> %b) {
-+; CHECK-LABEL: test_vmhb:
-+; CHECK: vmhb %v24, %v24, %v26
-+; CHECK: br %r14
-+  %res = call <16 x i8> @llvm.s390.vmhb(<16 x i8> %a, <16 x i8> %b)
-+  ret <16 x i8> %res
-+}
-+
-+; VMHH.
-+define <8 x i16> @test_vmhh(<8 x i16> %a, <8 x i16> %b) {
-+; CHECK-LABEL: test_vmhh:
-+; CHECK: vmhh %v24, %v24, %v26
-+; CHECK: br %r14
-+  %res = call <8 x i16> @llvm.s390.vmhh(<8 x i16> %a, <8 x i16> %b)
-+  ret <8 x i16> %res
-+}
-+
-+; VMHF.
-+define <4 x i32> @test_vmhf(<4 x i32> %a, <4 x i32> %b) {
-+; CHECK-LABEL: test_vmhf:
-+; CHECK: vmhf %v24, %v24, %v26
-+; CHECK: br %r14
-+  %res = call <4 x i32> @llvm.s390.vmhf(<4 x i32> %a, <4 x i32> %b)
-+  ret <4 x i32> %res
-+}
-+
-+; VMLHB.
-+define <16 x i8> @test_vmlhb(<16 x i8> %a, <16 x i8> %b) {
-+; CHECK-LABEL: test_vmlhb:
-+; CHECK: vmlhb %v24, %v24, %v26
-+; CHECK: br %r14
-+  %res = call <16 x i8> @llvm.s390.vmlhb(<16 x i8> %a, <16 x i8> %b)
-+  ret <16 x i8> %res
-+}
-+
-+; VMLHH.
-+define <8 x i16> @test_vmlhh(<8 x i16> %a, <8 x i16> %b) {
-+; CHECK-LABEL: test_vmlhh:
-+; CHECK: vmlhh %v24, %v24, %v26
-+; CHECK: br %r14
-+  %res = call <8 x i16> @llvm.s390.vmlhh(<8 x i16> %a, <8 x i16> %b)
-+  ret <8 x i16> %res
-+}
-+
-+; VMLHF.
-+define <4 x i32> @test_vmlhf(<4 x i32> %a, <4 x i32> %b) {
-+; CHECK-LABEL: test_vmlhf:
-+; CHECK: vmlhf %v24, %v24, %v26
-+; CHECK: br %r14
-+  %res = call <4 x i32> @llvm.s390.vmlhf(<4 x i32> %a, <4 x i32> %b)
-+  ret <4 x i32> %res
-+}
-+
-+; VMEB.
-+define <8 x i16> @test_vmeb(<16 x i8> %a, <16 x i8> %b) {
-+; CHECK-LABEL: test_vmeb:
-+; CHECK: vmeb %v24, %v24, %v26
-+; CHECK: br %r14
-+  %res = call <8 x i16> @llvm.s390.vmeb(<16 x i8> %a, <16 x i8> %b)
-+  ret <8 x i16> %res
-+}
-+
-+; VMEH.
-+define <4 x i32> @test_vmeh(<8 x i16> %a, <8 x i16> %b) {
-+; CHECK-LABEL: test_vmeh:
-+; CHECK: vmeh %v24, %v24, %v26
-+; CHECK: br %r14
-+  %res = call <4 x i32> @llvm.s390.vmeh(<8 x i16> %a, <8 x i16> %b)
-+  ret <4 x i32> %res
-+}
-+
-+; VMEF.
-+define <2 x i64> @test_vmef(<4 x i32> %a, <4 x i32> %b) {
-+; CHECK-LABEL: test_vmef:
-+; CHECK: vmef %v24, %v24, %v26
-+; CHECK: br %r14
-+  %res = call <2 x i64> @llvm.s390.vmef(<4 x i32> %a, <4 x i32> %b)
-+  ret <2 x i64> %res
-+}
-+
-+; VMLEB.
-+define <8 x i16> @test_vmleb(<16 x i8> %a, <16 x i8> %b) {
-+; CHECK-LABEL: test_vmleb:
-+; CHECK: vmleb %v24, %v24, %v26
-+; CHECK: br %r14
-+  %res = call <8 x i16> @llvm.s390.vmleb(<16 x i8> %a, <16 x i8> %b)
-+  ret <8 x i16> %res
-+}
-+
-+; VMLEH.
-+define <4 x i32> @test_vmleh(<8 x i16> %a, <8 x i16> %b) {
-+; CHECK-LABEL: test_vmleh:
-+; CHECK: vmleh %v24, %v24, %v26
-+; CHECK: br %r14
-+  %res = call <4 x i32> @llvm.s390.vmleh(<8 x i16> %a, <8 x i16> %b)
-+  ret <4 x i32> %res
-+}
-+
-+; VMLEF.
-+define <2 x i64> @test_vmlef(<4 x i32> %a, <4 x i32> %b) {
-+; CHECK-LABEL: test_vmlef:
-+; CHECK: vmlef %v24, %v24, %v26
-+; CHECK: br %r14
-+  %res = call <2 x i64> @llvm.s390.vmlef(<4 x i32> %a, <4 x i32> %b)
-+  ret <2 x i64> %res
-+}
-+
-+; VMOB.
-+define <8 x i16> @test_vmob(<16 x i8> %a, <16 x i8> %b) {
-+; CHECK-LABEL: test_vmob:
-+; CHECK: vmob %v24, %v24, %v26
-+; CHECK: br %r14
-+  %res = call <8 x i16> @llvm.s390.vmob(<16 x i8> %a, <16 x i8> %b)
-+  ret <8 x i16> %res
-+}
-+
-+; VMOH.
-+define <4 x i32> @test_vmoh(<8 x i16> %a, <8 x i16> %b) {
-+; CHECK-LABEL: test_vmoh:
-+; CHECK: vmoh %v24, %v24, %v26
-+; CHECK: br %r14
-+  %res = call <4 x i32> @llvm.s390.vmoh(<8 x i16> %a, <8 x i16> %b)
-+  ret <4 x i32> %res
-+}
-+
-+; VMOF.
-+define <2 x i64> @test_vmof(<4 x i32> %a, <4 x i32> %b) {
-+; CHECK-LABEL: test_vmof:
-+; CHECK: vmof %v24, %v24, %v26
-+; CHECK: br %r14
-+  %res = call <2 x i64> @llvm.s390.vmof(<4 x i32> %a, <4 x i32> %b)
-+  ret <2 x i64> %res
-+}
-+
-+; VMLOB.
-+define <8 x i16> @test_vmlob(<16 x i8> %a, <16 x i8> %b) {
-+; CHECK-LABEL: test_vmlob:
-+; CHECK: vmlob %v24, %v24, %v26
-+; CHECK: br %r14
-+  %res = call <8 x i16> @llvm.s390.vmlob(<16 x i8> %a, <16 x i8> %b)
-+  ret <8 x i16> %res
-+}
-+
-+; VMLOH.
-+define <4 x i32> @test_vmloh(<8 x i16> %a, <8 x i16> %b) {
-+; CHECK-LABEL: test_vmloh:
-+; CHECK: vmloh %v24, %v24, %v26
-+; CHECK: br %r14
-+  %res = call <4 x i32> @llvm.s390.vmloh(<8 x i16> %a, <8 x i16> %b)
-+  ret <4 x i32> %res
-+}
-+
-+; VMLOF.
-+define <2 x i64> @test_vmlof(<4 x i32> %a, <4 x i32> %b) {
-+; CHECK-LABEL: test_vmlof:
-+; CHECK: vmlof %v24, %v24, %v26
-+; CHECK: br %r14
-+  %res = call <2 x i64> @llvm.s390.vmlof(<4 x i32> %a, <4 x i32> %b)
-+  ret <2 x i64> %res
-+}
-+
-+; VERLLVB.
-+define <16 x i8> @test_verllvb(<16 x i8> %a, <16 x i8> %b) {
-+; CHECK-LABEL: test_verllvb:
-+; CHECK: verllvb %v24, %v24, %v26
-+; CHECK: br %r14
-+  %res = call <16 x i8> @llvm.s390.verllvb(<16 x i8> %a, <16 x i8> %b)
-+  ret <16 x i8> %res
-+}
-+
-+; VERLLVH.
-+define <8 x i16> @test_verllvh(<8 x i16> %a, <8 x i16> %b) {
-+; CHECK-LABEL: test_verllvh:
-+; CHECK: verllvh %v24, %v24, %v26
-+; CHECK: br %r14
-+  %res = call <8 x i16> @llvm.s390.verllvh(<8 x i16> %a, <8 x i16> %b)
-+  ret <8 x i16> %res
-+}
-+
-+; VERLLVF.
-+define <4 x i32> @test_verllvf(<4 x i32> %a, <4 x i32> %b) {
-+; CHECK-LABEL: test_verllvf:
-+; CHECK: verllvf %v24, %v24, %v26
-+; CHECK: br %r14
-+  %res = call <4 x i32> @llvm.s390.verllvf(<4 x i32> %a, <4 x i32> %b)
-+  ret <4 x i32> %res
-+}
-+
-+; VERLLVG.
-+define <2 x i64> @test_verllvg(<2 x i64> %a, <2 x i64> %b) {
-+; CHECK-LABEL: test_verllvg:
-+; CHECK: verllvg %v24, %v24, %v26
-+; CHECK: br %r14
-+  %res = call <2 x i64> @llvm.s390.verllvg(<2 x i64> %a, <2 x i64> %b)
-+  ret <2 x i64> %res
-+}
-+
-+; VERLLB.
-+define <16 x i8> @test_verllb(<16 x i8> %a, i32 %b) {
-+; CHECK-LABEL: test_verllb:
-+; CHECK: verllb %v24, %v24, 0(%r2)
-+; CHECK: br %r14
-+  %res = call <16 x i8> @llvm.s390.verllb(<16 x i8> %a, i32 %b)
-+  ret <16 x i8> %res
-+}
-+
-+; VERLLH.
-+define <8 x i16> @test_verllh(<8 x i16> %a, i32 %b) {
-+; CHECK-LABEL: test_verllh:
-+; CHECK: verllh %v24, %v24, 0(%r2)
-+; CHECK: br %r14
-+  %res = call <8 x i16> @llvm.s390.verllh(<8 x i16> %a, i32 %b)
-+  ret <8 x i16> %res
-+}
-+
-+; VERLLF.
-+define <4 x i32> @test_verllf(<4 x i32> %a, i32 %b) {
-+; CHECK-LABEL: test_verllf:
-+; CHECK: verllf %v24, %v24, 0(%r2)
-+; CHECK: br %r14
-+  %res = call <4 x i32> @llvm.s390.verllf(<4 x i32> %a, i32 %b)
-+  ret <4 x i32> %res
-+}
-+
-+; VERLLG.
-+define <2 x i64> @test_verllg(<2 x i64> %a, i32 %b) {
-+; CHECK-LABEL: test_verllg:
-+; CHECK: verllg %v24, %v24, 0(%r2)
-+; CHECK: br %r14
-+  %res = call <2 x i64> @llvm.s390.verllg(<2 x i64> %a, i32 %b)
-+  ret <2 x i64> %res
-+}
-+
-+; VERLLB with the smallest count.
-+define <16 x i8> @test_verllb_1(<16 x i8> %a) {
-+; CHECK-LABEL: test_verllb_1:
-+; CHECK: verllb %v24, %v24, 1
-+; CHECK: br %r14
-+  %res = call <16 x i8> @llvm.s390.verllb(<16 x i8> %a, i32 1)
-+  ret <16 x i8> %res
-+}
-+
-+; VERLLB with the largest count.
-+define <16 x i8> @test_verllb_4095(<16 x i8> %a) {
-+; CHECK-LABEL: test_verllb_4095:
-+; CHECK: verllb %v24, %v24, 4095
-+; CHECK: br %r14
-+  %res = call <16 x i8> @llvm.s390.verllb(<16 x i8> %a, i32 4095)
-+  ret <16 x i8> %res
-+}
-+
-+; VERLLB with the largest count + 1.
-+define <16 x i8> @test_verllb_4096(<16 x i8> %a) {
-+; CHECK-LABEL: test_verllb_4096:
-+; CHECK: lhi [[REG:%r[1-5]]], 4096
-+; CHECK: verllb %v24, %v24, 0([[REG]])
-+; CHECK: br %r14
-+  %res = call <16 x i8> @llvm.s390.verllb(<16 x i8> %a, i32 4096)
-+  ret <16 x i8> %res
-+}
-+
-+; VERIMB.
-+define <16 x i8> @test_verimb(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) {
-+; CHECK-LABEL: test_verimb:
-+; CHECK: verimb %v24, %v26, %v28, 1
-+; CHECK: br %r14
-+  %res = call <16 x i8> @llvm.s390.verimb(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, i32 1)
-+  ret <16 x i8> %res
-+}
-+
-+; VERIMH.
-+define <8 x i16> @test_verimh(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) {
-+; CHECK-LABEL: test_verimh:
-+; CHECK: verimh %v24, %v26, %v28, 1
-+; CHECK: br %r14
-+  %res = call <8 x i16> @llvm.s390.verimh(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c, i32 1)
-+  ret <8 x i16> %res
-+}
-+
-+; VERIMF.
-+define <4 x i32> @test_verimf(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
-+; CHECK-LABEL: test_verimf:
-+; CHECK: verimf %v24, %v26, %v28, 1
-+; CHECK: br %r14
-+  %res = call <4 x i32> @llvm.s390.verimf(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, i32 1)
-+  ret <4 x i32> %res
-+}
-+
-+; VERIMG.
-+define <2 x i64> @test_verimg(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) {
-+; CHECK-LABEL: test_verimg:
-+; CHECK: verimg %v24, %v26, %v28, 1
-+; CHECK: br %r14
-+  %res = call <2 x i64> @llvm.s390.verimg(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c, i32 1)
-+  ret <2 x i64> %res
-+}
-+
-+; VERIMB with a different mask.
-+define <16 x i8> @test_verimb_254(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) {
-+; CHECK-LABEL: test_verimb_254:
-+; CHECK: verimb %v24, %v26, %v28, 254
-+; CHECK: br %r14
-+  %res = call <16 x i8> @llvm.s390.verimb(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, i32 254)
-+  ret <16 x i8> %res
-+}
-+
-+; VSL.
-+define <16 x i8> @test_vsl(<16 x i8> %a, <16 x i8> %b) {
-+; CHECK-LABEL: test_vsl:
-+; CHECK: vsl %v24, %v24, %v26
-+; CHECK: br %r14
-+  %res = call <16 x i8> @llvm.s390.vsl(<16 x i8> %a, <16 x i8> %b)
-+  ret <16 x i8> %res
-+}
-+
-+; VSLB.
-+define <16 x i8> @test_vslb(<16 x i8> %a, <16 x i8> %b) {
-+; CHECK-LABEL: test_vslb:
-+; CHECK: vslb %v24, %v24, %v26
-+; CHECK: br %r14
-+  %res = call <16 x i8> @llvm.s390.vslb(<16 x i8> %a, <16 x i8> %b)
-+  ret <16 x i8> %res
-+}
-+
-+; VSRA.
-+define <16 x i8> @test_vsra(<16 x i8> %a, <16 x i8> %b) {
-+; CHECK-LABEL: test_vsra:
-+; CHECK: vsra %v24, %v24, %v26
-+; CHECK: br %r14
-+  %res = call <16 x i8> @llvm.s390.vsra(<16 x i8> %a, <16 x i8> %b)
-+  ret <16 x i8> %res
-+}
-+
-+; VSRAB.
-+define <16 x i8> @test_vsrab(<16 x i8> %a, <16 x i8> %b) {
-+; CHECK-LABEL: test_vsrab:
-+; CHECK: vsrab %v24, %v24, %v26
-+; CHECK: br %r14
-+  %res = call <16 x i8> @llvm.s390.vsrab(<16 x i8> %a, <16 x i8> %b)
-+  ret <16 x i8> %res
-+}
-+
-+; VSRL.
-+define <16 x i8> @test_vsrl(<16 x i8> %a, <16 x i8> %b) {
-+; CHECK-LABEL: test_vsrl:
-+; CHECK: vsrl %v24, %v24, %v26
-+; CHECK: br %r14
-+  %res = call <16 x i8> @llvm.s390.vsrl(<16 x i8> %a, <16 x i8> %b)
-+  ret <16 x i8> %res
-+}
-+
-+; VSRLB.
-+define <16 x i8> @test_vsrlb(<16 x i8> %a, <16 x i8> %b) {
-+; CHECK-LABEL: test_vsrlb:
-+; CHECK: vsrlb %v24, %v24, %v26
-+; CHECK: br %r14
-+  %res = call <16 x i8> @llvm.s390.vsrlb(<16 x i8> %a, <16 x i8> %b)
-+  ret <16 x i8> %res
-+}
-+
-+; VSLDB with the minimum useful value.
-+define <16 x i8> @test_vsldb_1(<16 x i8> %a, <16 x i8> %b) {
-+; CHECK-LABEL: test_vsldb_1:
-+; CHECK: vsldb %v24, %v24, %v26, 1
-+; CHECK: br %r14
-+  %res = call <16 x i8> @llvm.s390.vsldb(<16 x i8> %a, <16 x i8> %b, i32 1)
-+  ret <16 x i8> %res
-+}
-+
-+; VSLDB with the maximum value.
-+define <16 x i8> @test_vsldb_15(<16 x i8> %a, <16 x i8> %b) {
-+; CHECK-LABEL: test_vsldb_15:
-+; CHECK: vsldb %v24, %v24, %v26, 15
-+; CHECK: br %r14
-+  %res = call <16 x i8> @llvm.s390.vsldb(<16 x i8> %a, <16 x i8> %b, i32 15)
-+  ret <16 x i8> %res
-+}
-+
-+; VSCBIB.
-+define <16 x i8> @test_vscbib(<16 x i8> %a, <16 x i8> %b) {
-+; CHECK-LABEL: test_vscbib:
-+; CHECK: vscbib %v24, %v24, %v26
-+; CHECK: br %r14
-+  %res = call <16 x i8> @llvm.s390.vscbib(<16 x i8> %a, <16 x i8> %b)
-+  ret <16 x i8> %res
-+}
-+
-+; VSCBIH.
-+define <8 x i16> @test_vscbih(<8 x i16> %a, <8 x i16> %b) {
-+; CHECK-LABEL: test_vscbih:
-+; CHECK: vscbih %v24, %v24, %v26
-+; CHECK: br %r14
-+  %res = call <8 x i16> @llvm.s390.vscbih(<8 x i16> %a, <8 x i16> %b)
-+  ret <8 x i16> %res
-+}
-+
-+; VSCBIF.
-+define <4 x i32> @test_vscbif(<4 x i32> %a, <4 x i32> %b) {
-+; CHECK-LABEL: test_vscbif:
-+; CHECK: vscbif %v24, %v24, %v26
-+; CHECK: br %r14
-+  %res = call <4 x i32> @llvm.s390.vscbif(<4 x i32> %a, <4 x i32> %b)
-+  ret <4 x i32> %res
-+}
-+
-+; VSCBIG.
-+define <2 x i64> @test_vscbig(<2 x i64> %a, <2 x i64> %b) {
-+; CHECK-LABEL: test_vscbig:
-+; CHECK: vscbig %v24, %v24, %v26
-+; CHECK: br %r14
-+  %res = call <2 x i64> @llvm.s390.vscbig(<2 x i64> %a, <2 x i64> %b)
-+  ret <2 x i64> %res
-+}
-+
-+; VSQ.
-+define <16 x i8> @test_vsq(<16 x i8> %a, <16 x i8> %b) {
-+; CHECK-LABEL: test_vsq:
-+; CHECK: vsq %v24, %v24, %v26
-+; CHECK: br %r14
-+  %res = call <16 x i8> @llvm.s390.vsq(<16 x i8> %a, <16 x i8> %b)
-+  ret <16 x i8> %res
-+}
-+
-+; VSBIQ.
-+define <16 x i8> @test_vsbiq(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) {
-+; CHECK-LABEL: test_vsbiq:
-+; CHECK: vsbiq %v24, %v24, %v26, %v28
-+; CHECK: br %r14
-+  %res = call <16 x i8> @llvm.s390.vsbiq(<16 x i8> %a, <16 x i8> %b,
-+                                         <16 x i8> %c)
-+  ret <16 x i8> %res
-+}
-+
-+; VSCBIQ.
-+define <16 x i8> @test_vscbiq(<16 x i8> %a, <16 x i8> %b) {
-+; CHECK-LABEL: test_vscbiq:
-+; CHECK: vscbiq %v24, %v24, %v26
-+; CHECK: br %r14
-+  %res = call <16 x i8> @llvm.s390.vscbiq(<16 x i8> %a, <16 x i8> %b)
-+  ret <16 x i8> %res
-+}
-+
-+; VSBCBIQ.
-+define <16 x i8> @test_vsbcbiq(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) {
-+; CHECK-LABEL: test_vsbcbiq:
-+; CHECK: vsbcbiq %v24, %v24, %v26, %v28
-+; CHECK: br %r14
-+  %res = call <16 x i8> @llvm.s390.vsbcbiq(<16 x i8> %a, <16 x i8> %b,
-+                                           <16 x i8> %c)
-+  ret <16 x i8> %res
-+}
-+
-+; VSUMB.
-+define <4 x i32> @test_vsumb(<16 x i8> %a, <16 x i8> %b) {
-+; CHECK-LABEL: test_vsumb:
-+; CHECK: vsumb %v24, %v24, %v26
-+; CHECK: br %r14
-+  %res = call <4 x i32> @llvm.s390.vsumb(<16 x i8> %a, <16 x i8> %b)
-+  ret <4 x i32> %res
-+}
-+
-+; VSUMH.
-+define <4 x i32> @test_vsumh(<8 x i16> %a, <8 x i16> %b) {
-+; CHECK-LABEL: test_vsumh:
-+; CHECK: vsumh %v24, %v24, %v26
-+; CHECK: br %r14
-+  %res = call <4 x i32> @llvm.s390.vsumh(<8 x i16> %a, <8 x i16> %b)
-+  ret <4 x i32> %res
-+}
-+
-+; VSUMGH.
-+define <2 x i64> @test_vsumgh(<8 x i16> %a, <8 x i16> %b) {
-+; CHECK-LABEL: test_vsumgh:
-+; CHECK: vsumgh %v24, %v24, %v26
-+; CHECK: br %r14
-+  %res = call <2 x i64> @llvm.s390.vsumgh(<8 x i16> %a, <8 x i16> %b)
-+  ret <2 x i64> %res
-+}
-+
-+; VSUMGF.
-+define <2 x i64> @test_vsumgf(<4 x i32> %a, <4 x i32> %b) {
-+; CHECK-LABEL: test_vsumgf:
-+; CHECK: vsumgf %v24, %v24, %v26
-+; CHECK: br %r14
-+  %res = call <2 x i64> @llvm.s390.vsumgf(<4 x i32> %a, <4 x i32> %b)
-+  ret <2 x i64> %res
-+}
-+
-+; VSUMQF.
-+define <16 x i8> @test_vsumqf(<4 x i32> %a, <4 x i32> %b) {
-+; CHECK-LABEL: test_vsumqf:
-+; CHECK: vsumqf %v24, %v24, %v26
-+; CHECK: br %r14
-+  %res = call <16 x i8> @llvm.s390.vsumqf(<4 x i32> %a, <4 x i32> %b)
-+  ret <16 x i8> %res
-+}
-+
-+; VSUMQG.
-+define <16 x i8> @test_vsumqg(<2 x i64> %a, <2 x i64> %b) {
-+; CHECK-LABEL: test_vsumqg:
-+; CHECK: vsumqg %v24, %v24, %v26
-+; CHECK: br %r14
-+  %res = call <16 x i8> @llvm.s390.vsumqg(<2 x i64> %a, <2 x i64> %b)
-+  ret <16 x i8> %res
-+}
-+
-+; VTM with no processing of the result.
-+define i32 @test_vtm(<16 x i8> %a, <16 x i8> %b) {
-+; CHECK-LABEL: test_vtm:
-+; CHECK: vtm %v24, %v26
-+; CHECK: ipm %r2
-+; CHECK: srl %r2, 28
-+; CHECK: br %r14
-+  %res = call i32 @llvm.s390.vtm(<16 x i8> %a, <16 x i8> %b)
-+  ret i32 %res
-+}
-+
-+; VTM, storing to %ptr if all bits are set.
-+define void @test_vtm_all_store(<16 x i8> %a, <16 x i8> %b, i32 *%ptr) {
-+; CHECK-LABEL: test_vtm_all_store:
-+; CHECK-NOT: %r
-+; CHECK: vtm %v24, %v26
-+; CHECK-NEXT: {{jno|jle}} {{\.L*}}
-+; CHECK: mvhi 0(%r2), 0
-+; CHECK: br %r14
-+  %res = call i32 @llvm.s390.vtm(<16 x i8> %a, <16 x i8> %b)
-+  %cmp = icmp sge i32 %res, 3
-+  br i1 %cmp, label %store, label %exit
-+
-+store:
-+  store i32 0, i32 *%ptr
-+  br label %exit
-+
-+exit:
-+  ret void
-+}
-+
-+; VCEQBS with no processing of the result.
-+define i32 @test_vceqbs(<16 x i8> %a, <16 x i8> %b) {
-+; CHECK-LABEL: test_vceqbs:
-+; CHECK: vceqbs {{%v[0-9]+}}, %v24, %v26
-+; CHECK: ipm %r2
-+; CHECK: srl %r2, 28
-+; CHECK: br %r14
-+  %call = call {<16 x i8>, i32} @llvm.s390.vceqbs(<16 x i8> %a, <16 x i8> %b)
-+  %res = extractvalue {<16 x i8>, i32} %call, 1
-+  ret i32 %res
-+}
-+
-+; VCEQBS, returning 1 if any elements are equal (CC != 3).
-+define i32 @test_vceqbs_any_bool(<16 x i8> %a, <16 x i8> %b) {
-+; CHECK-LABEL: test_vceqbs_any_bool:
-+; CHECK: vceqbs {{%v[0-9]+}}, %v24, %v26
-+; CHECK: ipm %r2
-+; CHECK: afi %r2, -536870912
-+; CHECK: srl %r2, 31
-+; CHECK: br %r14
-+  %call = call {<16 x i8>, i32} @llvm.s390.vceqbs(<16 x i8> %a, <16 x i8> %b)
-+  %res = extractvalue {<16 x i8>, i32} %call, 1
-+  %cmp = icmp ne i32 %res, 3
-+  %ext = zext i1 %cmp to i32
-+  ret i32 %ext
-+}
-+
-+; VCEQBS, storing to %ptr if any elements are equal.
-+define <16 x i8> @test_vceqbs_any_store(<16 x i8> %a, <16 x i8> %b, i32 *%ptr) {
-+; CHECK-LABEL: test_vceqbs_any_store:
-+; CHECK-NOT: %r
-+; CHECK: vceqbs %v24, %v24, %v26
-+; CHECK-NEXT: {{jo|jnle}} {{\.L*}}
-+; CHECK: mvhi 0(%r2), 0
-+; CHECK: br %r14
-+  %call = call {<16 x i8>, i32} @llvm.s390.vceqbs(<16 x i8> %a, <16 x i8> %b)
-+  %res = extractvalue {<16 x i8>, i32} %call, 0
-+  %cc = extractvalue {<16 x i8>, i32} %call, 1
-+  %cmp = icmp ule i32 %cc, 2
-+  br i1 %cmp, label %store, label %exit
-+
-+store:
-+  store i32 0, i32 *%ptr
-+  br label %exit
-+
-+exit:
-+  ret <16 x i8> %res
-+}
-+
-+; VCEQHS with no processing of the result.
-+define i32 @test_vceqhs(<8 x i16> %a, <8 x i16> %b) {
-+; CHECK-LABEL: test_vceqhs:
-+; CHECK: vceqhs {{%v[0-9]+}}, %v24, %v26
-+; CHECK: ipm %r2
-+; CHECK: srl %r2, 28
-+; CHECK: br %r14
-+  %call = call {<8 x i16>, i32} @llvm.s390.vceqhs(<8 x i16> %a, <8 x i16> %b)
-+  %res = extractvalue {<8 x i16>, i32} %call, 1
-+  ret i32 %res
-+}
-+
-+; VCEQHS, returning 1 if not all elements are equal.
-+define i32 @test_vceqhs_notall_bool(<8 x i16> %a, <8 x i16> %b) {
-+; CHECK-LABEL: test_vceqhs_notall_bool:
-+; CHECK: vceqhs {{%v[0-9]+}}, %v24, %v26
-+; CHECK: ipm [[REG:%r[0-5]]]
-+; CHECK: risblg %r2, [[REG]], 31, 159, 36
-+; CHECK: br %r14
-+  %call = call {<8 x i16>, i32} @llvm.s390.vceqhs(<8 x i16> %a, <8 x i16> %b)
-+  %res = extractvalue {<8 x i16>, i32} %call, 1
-+  %cmp = icmp sge i32 %res, 1
-+  %ext = zext i1 %cmp to i32
-+  ret i32 %ext
-+}
-+
-+; VCEQHS, storing to %ptr if not all elements are equal.
-+define <8 x i16> @test_vceqhs_notall_store(<8 x i16> %a, <8 x i16> %b,
-+                                           i32 *%ptr) {
-+; CHECK-LABEL: test_vceqhs_notall_store:
-+; CHECK-NOT: %r
-+; CHECK: vceqhs %v24, %v24, %v26
-+; CHECK-NEXT: {{jhe|je}} {{\.L*}}
-+; CHECK: mvhi 0(%r2), 0
-+; CHECK: br %r14
-+  %call = call {<8 x i16>, i32} @llvm.s390.vceqhs(<8 x i16> %a, <8 x i16> %b)
-+  %res = extractvalue {<8 x i16>, i32} %call, 0
-+  %cc = extractvalue {<8 x i16>, i32} %call, 1
-+  %cmp = icmp ugt i32 %cc, 0
-+  br i1 %cmp, label %store, label %exit
-+
-+store:
-+  store i32 0, i32 *%ptr
-+  br label %exit
-+
-+exit:
-+  ret <8 x i16> %res
-+}
-+
-+; VCEQFS with no processing of the result.
-+define i32 @test_vceqfs(<4 x i32> %a, <4 x i32> %b) {
-+; CHECK-LABEL: test_vceqfs:
-+; CHECK: vceqfs {{%v[0-9]+}}, %v24, %v26
-+; CHECK: ipm %r2
-+; CHECK: srl %r2, 28
-+; CHECK: br %r14
-+  %call = call {<4 x i32>, i32} @llvm.s390.vceqfs(<4 x i32> %a, <4 x i32> %b)
-+  %res = extractvalue {<4 x i32>, i32} %call, 1
-+  ret i32 %res
-+}
-+
-+; VCEQFS, returning 1 if no elements are equal.
-+define i32 @test_vceqfs_none_bool(<4 x i32> %a, <4 x i32> %b) {
-+; CHECK-LABEL: test_vceqfs_none_bool:
-+; CHECK: vceqfs {{%v[0-9]+}}, %v24, %v26
-+; CHECK: ipm [[REG:%r[0-5]]]
-+; CHECK: risblg %r2, [[REG]], 31, 159, 35
-+; CHECK: br %r14
-+  %call = call {<4 x i32>, i32} @llvm.s390.vceqfs(<4 x i32> %a, <4 x i32> %b)
-+  %res = extractvalue {<4 x i32>, i32} %call, 1
-+  %cmp = icmp eq i32 %res, 3
-+  %ext = zext i1 %cmp to i32
-+  ret i32 %ext
-+}
-+
-+; VCEQFS, storing to %ptr if no elements are equal.
-+define <4 x i32> @test_vceqfs_none_store(<4 x i32> %a, <4 x i32> %b,
-+                                         i32 *%ptr) {
-+; CHECK-LABEL: test_vceqfs_none_store:
-+; CHECK-NOT: %r
-+; CHECK: vceqfs %v24, %v24, %v26
-+; CHECK-NEXT: {{jno|jle}} {{\.L*}}
-+; CHECK: mvhi 0(%r2), 0
-+; CHECK: br %r14
-+  %call = call {<4 x i32>, i32} @llvm.s390.vceqfs(<4 x i32> %a, <4 x i32> %b)
-+  %res = extractvalue {<4 x i32>, i32} %call, 0
-+  %cc = extractvalue {<4 x i32>, i32} %call, 1
-+  %cmp = icmp uge i32 %cc, 3
-+  br i1 %cmp, label %store, label %exit
-+
-+store:
-+  store i32 0, i32 *%ptr
-+  br label %exit
-+
-+exit:
-+  ret <4 x i32> %res
-+}
-+
-+; VCEQGS with no processing of the result.
-+define i32 @test_vceqgs(<2 x i64> %a, <2 x i64> %b) {
-+; CHECK-LABEL: test_vceqgs:
-+; CHECK: vceqgs {{%v[0-9]+}}, %v24, %v26
-+; CHECK: ipm %r2
-+; CHECK: srl %r2, 28
-+; CHECK: br %r14
-+  %call = call {<2 x i64>, i32} @llvm.s390.vceqgs(<2 x i64> %a, <2 x i64> %b)
-+  %res = extractvalue {<2 x i64>, i32} %call, 1
-+  ret i32 %res
-+}
-+
-+; VCEQGS returning 1 if all elements are equal (CC == 0).
-+define i32 @test_vceqgs_all_bool(<2 x i64> %a, <2 x i64> %b) {
-+; CHECK-LABEL: test_vceqgs_all_bool:
-+; CHECK: vceqgs {{%v[0-9]+}}, %v24, %v26
-+; CHECK: ipm %r2
-+; CHECK: afi %r2, -268435456
-+; CHECK: srl %r2, 31
-+; CHECK: br %r14
-+  %call = call {<2 x i64>, i32} @llvm.s390.vceqgs(<2 x i64> %a, <2 x i64> %b)
-+  %res = extractvalue {<2 x i64>, i32} %call, 1
-+  %cmp = icmp ult i32 %res, 1
-+  %ext = zext i1 %cmp to i32
-+  ret i32 %ext
-+}
-+
-+; VCEQGS, storing to %ptr if all elements are equal.
-+define <2 x i64> @test_vceqgs_all_store(<2 x i64> %a, <2 x i64> %b, i32 *%ptr) {
-+; CHECK-LABEL: test_vceqgs_all_store:
-+; CHECK-NOT: %r
-+; CHECK: vceqgs %v24, %v24, %v26
-+; CHECK-NEXT: {{jnhe|jne}} {{\.L*}}
-+; CHECK: mvhi 0(%r2), 0
-+; CHECK: br %r14
-+  %call = call {<2 x i64>, i32} @llvm.s390.vceqgs(<2 x i64> %a, <2 x i64> %b)
-+  %res = extractvalue {<2 x i64>, i32} %call, 0
-+  %cc = extractvalue {<2 x i64>, i32} %call, 1
-+  %cmp = icmp sle i32 %cc, 0
-+  br i1 %cmp, label %store, label %exit
-+
-+store:
-+  store i32 0, i32 *%ptr
-+  br label %exit
-+
-+exit:
-+  ret <2 x i64> %res
-+}
-+
-+; VCHBS with no processing of the result.
-+define i32 @test_vchbs(<16 x i8> %a, <16 x i8> %b) {
-+; CHECK-LABEL: test_vchbs:
-+; CHECK: vchbs {{%v[0-9]+}}, %v24, %v26
-+; CHECK: ipm %r2
-+; CHECK: srl %r2, 28
-+; CHECK: br %r14
-+  %call = call {<16 x i8>, i32} @llvm.s390.vchbs(<16 x i8> %a, <16 x i8> %b)
-+  %res = extractvalue {<16 x i8>, i32} %call, 1
-+  ret i32 %res
-+}
-+
-+; VCHBS, returning 1 if any elements are higher (CC != 3).
-+define i32 @test_vchbs_any_bool(<16 x i8> %a, <16 x i8> %b) {
-+; CHECK-LABEL: test_vchbs_any_bool:
-+; CHECK: vchbs {{%v[0-9]+}}, %v24, %v26
-+; CHECK: ipm %r2
-+; CHECK: afi %r2, -536870912
-+; CHECK: srl %r2, 31
-+; CHECK: br %r14
-+  %call = call {<16 x i8>, i32} @llvm.s390.vchbs(<16 x i8> %a, <16 x i8> %b)
-+  %res = extractvalue {<16 x i8>, i32} %call, 1
-+  %cmp = icmp ne i32 %res, 3
-+  %ext = zext i1 %cmp to i32
-+  ret i32 %ext
-+}
-+
-+; VCHBS, storing to %ptr if any elements are higher.
-+define <16 x i8> @test_vchbs_any_store(<16 x i8> %a, <16 x i8> %b, i32 *%ptr) {
-+; CHECK-LABEL: test_vchbs_any_store:
-+; CHECK-NOT: %r
-+; CHECK: vchbs %v24, %v24, %v26
-+; CHECK-NEXT: {{jo|jnle}} {{\.L*}}
-+; CHECK: mvhi 0(%r2), 0
-+; CHECK: br %r14
-+  %call = call {<16 x i8>, i32} @llvm.s390.vchbs(<16 x i8> %a, <16 x i8> %b)
-+  %res = extractvalue {<16 x i8>, i32} %call, 0
-+  %cc = extractvalue {<16 x i8>, i32} %call, 1
-+  %cmp = icmp ule i32 %cc, 2
-+  br i1 %cmp, label %store, label %exit
-+
-+store:
-+  store i32 0, i32 *%ptr
-+  br label %exit
-+
-+exit:
-+  ret <16 x i8> %res
-+}
-+
-+; VCHHS with no processing of the result.
-+define i32 @test_vchhs(<8 x i16> %a, <8 x i16> %b) {
-+; CHECK-LABEL: test_vchhs:
-+; CHECK: vchhs {{%v[0-9]+}}, %v24, %v26
-+; CHECK: ipm %r2
-+; CHECK: srl %r2, 28
-+; CHECK: br %r14
-+  %call = call {<8 x i16>, i32} @llvm.s390.vchhs(<8 x i16> %a, <8 x i16> %b)
-+  %res = extractvalue {<8 x i16>, i32} %call, 1
-+  ret i32 %res
-+}
-+
-+; VCHHS, returning 1 if not all elements are higher.
-+define i32 @test_vchhs_notall_bool(<8 x i16> %a, <8 x i16> %b) {
-+; CHECK-LABEL: test_vchhs_notall_bool:
-+; CHECK: vchhs {{%v[0-9]+}}, %v24, %v26
-+; CHECK: ipm [[REG:%r[0-5]]]
-+; CHECK: risblg %r2, [[REG]], 31, 159, 36
-+; CHECK: br %r14
-+  %call = call {<8 x i16>, i32} @llvm.s390.vchhs(<8 x i16> %a, <8 x i16> %b)
-+  %res = extractvalue {<8 x i16>, i32} %call, 1
-+  %cmp = icmp sge i32 %res, 1
-+  %ext = zext i1 %cmp to i32
-+  ret i32 %ext
-+}
-+
-+; VCHHS, storing to %ptr if not all elements are higher.
-+define <8 x i16> @test_vchhs_notall_store(<8 x i16> %a, <8 x i16> %b,
-+                                          i32 *%ptr) {
-+; CHECK-LABEL: test_vchhs_notall_store:
-+; CHECK-NOT: %r
-+; CHECK: vchhs %v24, %v24, %v26
-+; CHECK-NEXT: {{jhe|je}} {{\.L*}}
-+; CHECK: mvhi 0(%r2), 0
-+; CHECK: br %r14
-+  %call = call {<8 x i16>, i32} @llvm.s390.vchhs(<8 x i16> %a, <8 x i16> %b)
-+  %res = extractvalue {<8 x i16>, i32} %call, 0
-+  %cc = extractvalue {<8 x i16>, i32} %call, 1
-+  %cmp = icmp ugt i32 %cc, 0
-+  br i1 %cmp, label %store, label %exit
-+
-+store:
-+  store i32 0, i32 *%ptr
-+  br label %exit
-+
-+exit:
-+  ret <8 x i16> %res
-+}
-+
-+; VCHFS with no processing of the result.
-+define i32 @test_vchfs(<4 x i32> %a, <4 x i32> %b) {
-+; CHECK-LABEL: test_vchfs:
-+; CHECK: vchfs {{%v[0-9]+}}, %v24, %v26
-+; CHECK: ipm %r2
-+; CHECK: srl %r2, 28
-+; CHECK: br %r14
-+  %call = call {<4 x i32>, i32} @llvm.s390.vchfs(<4 x i32> %a, <4 x i32> %b)
-+  %res = extractvalue {<4 x i32>, i32} %call, 1
-+  ret i32 %res
-+}
-+
-+; VCHFS, returning 1 if no elements are higher.
-+define i32 @test_vchfs_none_bool(<4 x i32> %a, <4 x i32> %b) {
-+; CHECK-LABEL: test_vchfs_none_bool:
-+; CHECK: vchfs {{%v[0-9]+}}, %v24, %v26
-+; CHECK: ipm [[REG:%r[0-5]]]
-+; CHECK: risblg %r2, [[REG]], 31, 159, 35
-+; CHECK: br %r14
-+  %call = call {<4 x i32>, i32} @llvm.s390.vchfs(<4 x i32> %a, <4 x i32> %b)
-+  %res = extractvalue {<4 x i32>, i32} %call, 1
-+  %cmp = icmp eq i32 %res, 3
-+  %ext = zext i1 %cmp to i32
-+  ret i32 %ext
-+}
-+
-+; VCHFS, storing to %ptr if no elements are higher.
-+define <4 x i32> @test_vchfs_none_store(<4 x i32> %a, <4 x i32> %b, i32 *%ptr) {
-+; CHECK-LABEL: test_vchfs_none_store:
-+; CHECK-NOT: %r
-+; CHECK: vchfs %v24, %v24, %v26
-+; CHECK-NEXT: {{jno|jle}} {{\.L*}}
-+; CHECK: mvhi 0(%r2), 0
-+; CHECK: br %r14
-+  %call = call {<4 x i32>, i32} @llvm.s390.vchfs(<4 x i32> %a, <4 x i32> %b)
-+  %res = extractvalue {<4 x i32>, i32} %call, 0
-+  %cc = extractvalue {<4 x i32>, i32} %call, 1
-+  %cmp = icmp uge i32 %cc, 3
-+  br i1 %cmp, label %store, label %exit
-+
-+store:
-+  store i32 0, i32 *%ptr
-+  br label %exit
-+
-+exit:
-+  ret <4 x i32> %res
-+}
-+
-+; VCHGS with no processing of the result.
-+define i32 @test_vchgs(<2 x i64> %a, <2 x i64> %b) {
-+; CHECK-LABEL: test_vchgs:
-+; CHECK: vchgs {{%v[0-9]+}}, %v24, %v26
-+; CHECK: ipm %r2
-+; CHECK: srl %r2, 28
-+; CHECK: br %r14
-+  %call = call {<2 x i64>, i32} @llvm.s390.vchgs(<2 x i64> %a, <2 x i64> %b)
-+  %res = extractvalue {<2 x i64>, i32} %call, 1
-+  ret i32 %res
-+}
-+
-+; VCHGS returning 1 if all elements are higher (CC == 0).
-+define i32 @test_vchgs_all_bool(<2 x i64> %a, <2 x i64> %b) {
-+; CHECK-LABEL: test_vchgs_all_bool:
-+; CHECK: vchgs {{%v[0-9]+}}, %v24, %v26
-+; CHECK: ipm %r2
-+; CHECK: afi %r2, -268435456
-+; CHECK: srl %r2, 31
-+; CHECK: br %r14
-+  %call = call {<2 x i64>, i32} @llvm.s390.vchgs(<2 x i64> %a, <2 x i64> %b)
-+  %res = extractvalue {<2 x i64>, i32} %call, 1
-+  %cmp = icmp ult i32 %res, 1
-+  %ext = zext i1 %cmp to i32
-+  ret i32 %ext
-+}
-+
-+; VCHGS, storing to %ptr if all elements are higher.
-+define <2 x i64> @test_vchgs_all_store(<2 x i64> %a, <2 x i64> %b, i32 *%ptr) {
-+; CHECK-LABEL: test_vchgs_all_store:
-+; CHECK-NOT: %r
-+; CHECK: vchgs %v24, %v24, %v26
-+; CHECK-NEXT: {{jnhe|jne}} {{\.L*}}
-+; CHECK: mvhi 0(%r2), 0
-+; CHECK: br %r14
-+  %call = call {<2 x i64>, i32} @llvm.s390.vchgs(<2 x i64> %a, <2 x i64> %b)
-+  %res = extractvalue {<2 x i64>, i32} %call, 0
-+  %cc = extractvalue {<2 x i64>, i32} %call, 1
-+  %cmp = icmp sle i32 %cc, 0
-+  br i1 %cmp, label %store, label %exit
-+
-+store:
-+  store i32 0, i32 *%ptr
-+  br label %exit
-+
-+exit:
-+  ret <2 x i64> %res
-+}
-+
-+; VCHLBS with no processing of the result.
-+define i32 @test_vchlbs(<16 x i8> %a, <16 x i8> %b) {
-+; CHECK-LABEL: test_vchlbs:
-+; CHECK: vchlbs {{%v[0-9]+}}, %v24, %v26
-+; CHECK: ipm %r2
-+; CHECK: srl %r2, 28
-+; CHECK: br %r14
-+  %call = call {<16 x i8>, i32} @llvm.s390.vchlbs(<16 x i8> %a, <16 x i8> %b)
-+  %res = extractvalue {<16 x i8>, i32} %call, 1
-+  ret i32 %res
-+}
-+
-+; VCHLBS, returning 1 if any elements are higher (CC != 3).
-+define i32 @test_vchlbs_any_bool(<16 x i8> %a, <16 x i8> %b) {
-+; CHECK-LABEL: test_vchlbs_any_bool:
-+; CHECK: vchlbs {{%v[0-9]+}}, %v24, %v26
-+; CHECK: ipm %r2
-+; CHECK: afi %r2, -536870912
-+; CHECK: srl %r2, 31
-+; CHECK: br %r14
-+  %call = call {<16 x i8>, i32} @llvm.s390.vchlbs(<16 x i8> %a, <16 x i8> %b)
-+  %res = extractvalue {<16 x i8>, i32} %call, 1
-+  %cmp = icmp ne i32 %res, 3
-+  %ext = zext i1 %cmp to i32
-+  ret i32 %ext
-+}
-+
-+; VCHLBS, storing to %ptr if any elements are higher.
-+define <16 x i8> @test_vchlbs_any_store(<16 x i8> %a, <16 x i8> %b, i32 *%ptr) {
-+; CHECK-LABEL: test_vchlbs_any_store:
-+; CHECK-NOT: %r
-+; CHECK: vchlbs %v24, %v24, %v26
-+; CHECK-NEXT: {{jo|jnle}} {{\.L*}}
-+; CHECK: mvhi 0(%r2), 0
-+; CHECK: br %r14
-+  %call = call {<16 x i8>, i32} @llvm.s390.vchlbs(<16 x i8> %a, <16 x i8> %b)
-+  %res = extractvalue {<16 x i8>, i32} %call, 0
-+  %cc = extractvalue {<16 x i8>, i32} %call, 1
-+  %cmp = icmp sle i32 %cc, 2
-+  br i1 %cmp, label %store, label %exit
-+
-+store:
-+  store i32 0, i32 *%ptr
-+  br label %exit
-+
-+exit:
-+  ret <16 x i8> %res
-+}
-+
-+; VCHLHS with no processing of the result.
-+define i32 @test_vchlhs(<8 x i16> %a, <8 x i16> %b) {
-+; CHECK-LABEL: test_vchlhs:
-+; CHECK: vchlhs {{%v[0-9]+}}, %v24, %v26
-+; CHECK: ipm %r2
-+; CHECK: srl %r2, 28
-+; CHECK: br %r14
-+  %call = call {<8 x i16>, i32} @llvm.s390.vchlhs(<8 x i16> %a, <8 x i16> %b)
-+  %res = extractvalue {<8 x i16>, i32} %call, 1
-+  ret i32 %res
-+}
-+
-+; VCHLHS, returning 1 if not all elements are higher.
-+define i32 @test_vchlhs_notall_bool(<8 x i16> %a, <8 x i16> %b) {
-+; CHECK-LABEL: test_vchlhs_notall_bool:
-+; CHECK: vchlhs {{%v[0-9]+}}, %v24, %v26
-+; CHECK: ipm [[REG:%r[0-5]]]
-+; CHECK: risblg %r2, [[REG]], 31, 159, 36
-+; CHECK: br %r14
-+  %call = call {<8 x i16>, i32} @llvm.s390.vchlhs(<8 x i16> %a, <8 x i16> %b)
-+  %res = extractvalue {<8 x i16>, i32} %call, 1
-+  %cmp = icmp uge i32 %res, 1
-+  %ext = zext i1 %cmp to i32
-+  ret i32 %ext
-+}
-+
-+; VCHLHS, storing to %ptr if not all elements are higher.
-+define <8 x i16> @test_vchlhs_notall_store(<8 x i16> %a, <8 x i16> %b,
-+                                           i32 *%ptr) {
-+; CHECK-LABEL: test_vchlhs_notall_store:
-+; CHECK-NOT: %r
-+; CHECK: vchlhs %v24, %v24, %v26
-+; CHECK-NEXT: {{jhe|je}} {{\.L*}}
-+; CHECK: mvhi 0(%r2), 0
-+; CHECK: br %r14
-+  %call = call {<8 x i16>, i32} @llvm.s390.vchlhs(<8 x i16> %a, <8 x i16> %b)
-+  %res = extractvalue {<8 x i16>, i32} %call, 0
-+  %cc = extractvalue {<8 x i16>, i32} %call, 1
-+  %cmp = icmp sgt i32 %cc, 0
-+  br i1 %cmp, label %store, label %exit
-+
-+store:
-+  store i32 0, i32 *%ptr
-+  br label %exit
-+
-+exit:
-+  ret <8 x i16> %res
-+}
-+
-+; VCHLFS with no processing of the result.
-+define i32 @test_vchlfs(<4 x i32> %a, <4 x i32> %b) {
-+; CHECK-LABEL: test_vchlfs:
-+; CHECK: vchlfs {{%v[0-9]+}}, %v24, %v26
-+; CHECK: ipm %r2
-+; CHECK: srl %r2, 28
-+; CHECK: br %r14
-+  %call = call {<4 x i32>, i32} @llvm.s390.vchlfs(<4 x i32> %a, <4 x i32> %b)
-+  %res = extractvalue {<4 x i32>, i32} %call, 1
-+  ret i32 %res
-+}
-+
-+; VCHLFS, returning 1 if no elements are higher.
-+define i32 @test_vchlfs_none_bool(<4 x i32> %a, <4 x i32> %b) {
-+; CHECK-LABEL: test_vchlfs_none_bool:
-+; CHECK: vchlfs {{%v[0-9]+}}, %v24, %v26
-+; CHECK: ipm [[REG:%r[0-5]]]
-+; CHECK: risblg %r2, [[REG]], 31, 159, 35
-+; CHECK: br %r14
-+  %call = call {<4 x i32>, i32} @llvm.s390.vchlfs(<4 x i32> %a, <4 x i32> %b)
-+  %res = extractvalue {<4 x i32>, i32} %call, 1
-+  %cmp = icmp eq i32 %res, 3
-+  %ext = zext i1 %cmp to i32
-+  ret i32 %ext
-+}
-+
-+; VCHLFS, storing to %ptr if no elements are higher.
-+define <4 x i32> @test_vchlfs_none_store(<4 x i32> %a, <4 x i32> %b,
-+                                         i32 *%ptr) {
-+; CHECK-LABEL: test_vchlfs_none_store:
-+; CHECK-NOT: %r
-+; CHECK: vchlfs %v24, %v24, %v26
-+; CHECK-NEXT: {{jno|jle}} {{\.L*}}
-+; CHECK: mvhi 0(%r2), 0
-+; CHECK: br %r14
-+  %call = call {<4 x i32>, i32} @llvm.s390.vchlfs(<4 x i32> %a, <4 x i32> %b)
-+  %res = extractvalue {<4 x i32>, i32} %call, 0
-+  %cc = extractvalue {<4 x i32>, i32} %call, 1
-+  %cmp = icmp sge i32 %cc, 3
-+  br i1 %cmp, label %store, label %exit
-+
-+store:
-+  store i32 0, i32 *%ptr
-+  br label %exit
-+
-+exit:
-+  ret <4 x i32> %res
-+}
-+
-+; VCHLGS with no processing of the result.
-+define i32 @test_vchlgs(<2 x i64> %a, <2 x i64> %b) {
-+; CHECK-LABEL: test_vchlgs:
-+; CHECK: vchlgs {{%v[0-9]+}}, %v24, %v26
-+; CHECK: ipm %r2
-+; CHECK: srl %r2, 28
-+; CHECK: br %r14
-+  %call = call {<2 x i64>, i32} @llvm.s390.vchlgs(<2 x i64> %a, <2 x i64> %b)
-+  %res = extractvalue {<2 x i64>, i32} %call, 1
-+  ret i32 %res
-+}
-+
-+; VCHLGS returning 1 if all elements are higher (CC == 0).
-+define i32 @test_vchlgs_all_bool(<2 x i64> %a, <2 x i64> %b) {
-+; CHECK-LABEL: test_vchlgs_all_bool:
-+; CHECK: vchlgs {{%v[0-9]+}}, %v24, %v26
-+; CHECK: ipm %r2
-+; CHECK: afi %r2, -268435456
-+; CHECK: srl %r2, 31
-+; CHECK: br %r14
-+  %call = call {<2 x i64>, i32} @llvm.s390.vchlgs(<2 x i64> %a, <2 x i64> %b)
-+  %res = extractvalue {<2 x i64>, i32} %call, 1
-+  %cmp = icmp slt i32 %res, 1
-+  %ext = zext i1 %cmp to i32
-+  ret i32 %ext
-+}
-+
-+; VCHLGS, storing to %ptr if all elements are higher.
-+define <2 x i64> @test_vchlgs_all_store(<2 x i64> %a, <2 x i64> %b, i32 *%ptr) {
-+; CHECK-LABEL: test_vchlgs_all_store:
-+; CHECK-NOT: %r
-+; CHECK: vchlgs %v24, %v24, %v26
-+; CHECK-NEXT: {{jnhe|jne}} {{\.L*}}
-+; CHECK: mvhi 0(%r2), 0
-+; CHECK: br %r14
-+  %call = call {<2 x i64>, i32} @llvm.s390.vchlgs(<2 x i64> %a, <2 x i64> %b)
-+  %res = extractvalue {<2 x i64>, i32} %call, 0
-+  %cc = extractvalue {<2 x i64>, i32} %call, 1
-+  %cmp = icmp ule i32 %cc, 0
-+  br i1 %cmp, label %store, label %exit
-+
-+store:
-+  store i32 0, i32 *%ptr
-+  br label %exit
-+
-+exit:
-+  ret <2 x i64> %res
-+}
-+
-+; VFAEB with !IN !RT.
-+define <16 x i8> @test_vfaeb_0(<16 x i8> %a, <16 x i8> %b) {
-+; CHECK-LABEL: test_vfaeb_0:
-+; CHECK: vfaeb %v24, %v24, %v26, 0
-+; CHECK: br %r14
-+  %res = call <16 x i8> @llvm.s390.vfaeb(<16 x i8> %a, <16 x i8> %b, i32 0)
-+  ret <16 x i8> %res
-+}
-+
-+; VFAEB with !IN RT.
-+define <16 x i8> @test_vfaeb_4(<16 x i8> %a, <16 x i8> %b) {
-+; CHECK-LABEL: test_vfaeb_4:
-+; CHECK: vfaeb %v24, %v24, %v26, 4
-+; CHECK: br %r14
-+  %res = call <16 x i8> @llvm.s390.vfaeb(<16 x i8> %a, <16 x i8> %b, i32 4)
-+  ret <16 x i8> %res
-+}
-+
-+; VFAEB with IN !RT.
-+define <16 x i8> @test_vfaeb_8(<16 x i8> %a, <16 x i8> %b) {
-+; CHECK-LABEL: test_vfaeb_8:
-+; CHECK: vfaeb %v24, %v24, %v26, 8
-+; CHECK: br %r14
-+  %res = call <16 x i8> @llvm.s390.vfaeb(<16 x i8> %a, <16 x i8> %b, i32 8)
-+  ret <16 x i8> %res
-+}
-+
-+; VFAEB with IN RT.
-+define <16 x i8> @test_vfaeb_12(<16 x i8> %a, <16 x i8> %b) {
-+; CHECK-LABEL: test_vfaeb_12:
-+; CHECK: vfaeb %v24, %v24, %v26, 12
-+; CHECK: br %r14
-+  %res = call <16 x i8> @llvm.s390.vfaeb(<16 x i8> %a, <16 x i8> %b, i32 12)
-+  ret <16 x i8> %res
-+}
-+
-+; VFAEB with CS -- should be ignored.
-+define <16 x i8> @test_vfaeb_1(<16 x i8> %a, <16 x i8> %b) {
-+; CHECK-LABEL: test_vfaeb_1:
-+; CHECK: vfaeb %v24, %v24, %v26, 0
-+; CHECK: br %r14
-+  %res = call <16 x i8> @llvm.s390.vfaeb(<16 x i8> %a, <16 x i8> %b, i32 1)
-+  ret <16 x i8> %res
-+}
-+
-+; VFAEH.
-+define <8 x i16> @test_vfaeh(<8 x i16> %a, <8 x i16> %b) {
-+; CHECK-LABEL: test_vfaeh:
-+; CHECK: vfaeh %v24, %v24, %v26, 4
-+; CHECK: br %r14
-+  %res = call <8 x i16> @llvm.s390.vfaeh(<8 x i16> %a, <8 x i16> %b, i32 4)
-+  ret <8 x i16> %res
-+}
-+
-+; VFAEF.
-+define <4 x i32> @test_vfaef(<4 x i32> %a, <4 x i32> %b) {
-+; CHECK-LABEL: test_vfaef:
-+; CHECK: vfaef %v24, %v24, %v26, 8
-+; CHECK: br %r14
-+  %res = call <4 x i32> @llvm.s390.vfaef(<4 x i32> %a, <4 x i32> %b, i32 8)
-+  ret <4 x i32> %res
-+}
-+
-+; VFAEBS.
-+define <16 x i8> @test_vfaebs(<16 x i8> %a, <16 x i8> %b, i32 *%ccptr) {
-+; CHECK-LABEL: test_vfaebs:
-+; CHECK: vfaebs %v24, %v24, %v26, 0
-+; CHECK: ipm [[REG:%r[0-5]]]
-+; CHECK: srl [[REG]], 28
-+; CHECK: st [[REG]], 0(%r2)
-+; CHECK: br %r14
-+  %call = call {<16 x i8>, i32} @llvm.s390.vfaebs(<16 x i8> %a, <16 x i8> %b,
-+                                                  i32 0)
-+  %res = extractvalue {<16 x i8>, i32} %call, 0
-+  %cc = extractvalue {<16 x i8>, i32} %call, 1
-+  store i32 %cc, i32 *%ccptr
-+  ret <16 x i8> %res
-+}
-+
-+; VFAEHS.
-+define <8 x i16> @test_vfaehs(<8 x i16> %a, <8 x i16> %b, i32 *%ccptr) {
-+; CHECK-LABEL: test_vfaehs:
-+; CHECK: vfaehs %v24, %v24, %v26, 4
-+; CHECK: ipm [[REG:%r[0-5]]]
-+; CHECK: srl [[REG]], 28
-+; CHECK: st [[REG]], 0(%r2)
-+; CHECK: br %r14
-+  %call = call {<8 x i16>, i32} @llvm.s390.vfaehs(<8 x i16> %a, <8 x i16> %b,
-+                                                  i32 4)
-+  %res = extractvalue {<8 x i16>, i32} %call, 0
-+  %cc = extractvalue {<8 x i16>, i32} %call, 1
-+  store i32 %cc, i32 *%ccptr
-+  ret <8 x i16> %res
-+}
-+
-+; VFAEFS.
-+define <4 x i32> @test_vfaefs(<4 x i32> %a, <4 x i32> %b, i32 *%ccptr) {
-+; CHECK-LABEL: test_vfaefs:
-+; CHECK: vfaefs %v24, %v24, %v26, 8
-+; CHECK: ipm [[REG:%r[0-5]]]
-+; CHECK: srl [[REG]], 28
-+; CHECK: st [[REG]], 0(%r2)
-+; CHECK: br %r14
-+  %call = call {<4 x i32>, i32} @llvm.s390.vfaefs(<4 x i32> %a, <4 x i32> %b,
-+                                                  i32 8)
-+  %res = extractvalue {<4 x i32>, i32} %call, 0
-+  %cc = extractvalue {<4 x i32>, i32} %call, 1
-+  store i32 %cc, i32 *%ccptr
-+  ret <4 x i32> %res
-+}
-+
-+; VFAEZB with !IN !RT.
-+define <16 x i8> @test_vfaezb_0(<16 x i8> %a, <16 x i8> %b) {
-+; CHECK-LABEL: test_vfaezb_0:
-+; CHECK: vfaezb %v24, %v24, %v26, 0
-+; CHECK: br %r14
-+  %res = call <16 x i8> @llvm.s390.vfaezb(<16 x i8> %a, <16 x i8> %b, i32 0)
-+  ret <16 x i8> %res
-+}
-+
-+; VFAEZB with !IN RT.
-+define <16 x i8> @test_vfaezb_4(<16 x i8> %a, <16 x i8> %b) {
-+; CHECK-LABEL: test_vfaezb_4:
-+; CHECK: vfaezb %v24, %v24, %v26, 4
-+; CHECK: br %r14
-+  %res = call <16 x i8> @llvm.s390.vfaezb(<16 x i8> %a, <16 x i8> %b, i32 4)
-+  ret <16 x i8> %res
-+}
-+
-+; VFAEZB with IN !RT.
-+define <16 x i8> @test_vfaezb_8(<16 x i8> %a, <16 x i8> %b) {
-+; CHECK-LABEL: test_vfaezb_8:
-+; CHECK: vfaezb %v24, %v24, %v26, 8
-+; CHECK: br %r14
-+  %res = call <16 x i8> @llvm.s390.vfaezb(<16 x i8> %a, <16 x i8> %b, i32 8)
-+  ret <16 x i8> %res
-+}
-+
-+; VFAEZB with IN RT.
-+define <16 x i8> @test_vfaezb_12(<16 x i8> %a, <16 x i8> %b) {
-+; CHECK-LABEL: test_vfaezb_12:
-+; CHECK: vfaezb %v24, %v24, %v26, 12
-+; CHECK: br %r14
-+  %res = call <16 x i8> @llvm.s390.vfaezb(<16 x i8> %a, <16 x i8> %b, i32 12)
-+  ret <16 x i8> %res
-+}
-+
-+; VFAEZB with CS -- should be ignored.
-+define <16 x i8> @test_vfaezb_1(<16 x i8> %a, <16 x i8> %b) {
-+; CHECK-LABEL: test_vfaezb_1:
-+; CHECK: vfaezb %v24, %v24, %v26, 0
-+; CHECK: br %r14
-+  %res = call <16 x i8> @llvm.s390.vfaezb(<16 x i8> %a, <16 x i8> %b, i32 1)
-+  ret <16 x i8> %res
-+}
-+
-+; VFAEZH.
-+define <8 x i16> @test_vfaezh(<8 x i16> %a, <8 x i16> %b) {
-+; CHECK-LABEL: test_vfaezh:
-+; CHECK: vfaezh %v24, %v24, %v26, 4
-+; CHECK: br %r14
-+  %res = call <8 x i16> @llvm.s390.vfaezh(<8 x i16> %a, <8 x i16> %b, i32 4)
-+  ret <8 x i16> %res
-+}
-+
-+; VFAEZF.
-+define <4 x i32> @test_vfaezf(<4 x i32> %a, <4 x i32> %b) {
-+; CHECK-LABEL: test_vfaezf:
-+; CHECK: vfaezf %v24, %v24, %v26, 8
-+; CHECK: br %r14
-+  %res = call <4 x i32> @llvm.s390.vfaezf(<4 x i32> %a, <4 x i32> %b, i32 8)
-+  ret <4 x i32> %res
-+}
-+
-+; VFAEZBS.
-+define <16 x i8> @test_vfaezbs(<16 x i8> %a, <16 x i8> %b, i32 *%ccptr) {
-+; CHECK-LABEL: test_vfaezbs:
-+; CHECK: vfaezbs %v24, %v24, %v26, 0
-+; CHECK: ipm [[REG:%r[0-5]]]
-+; CHECK: srl [[REG]], 28
-+; CHECK: st [[REG]], 0(%r2)
-+; CHECK: br %r14
-+  %call = call {<16 x i8>, i32} @llvm.s390.vfaezbs(<16 x i8> %a, <16 x i8> %b,
-+                                                   i32 0)
-+  %res = extractvalue {<16 x i8>, i32} %call, 0
-+  %cc = extractvalue {<16 x i8>, i32} %call, 1
-+  store i32 %cc, i32 *%ccptr
-+  ret <16 x i8> %res
-+}
-+
-+; VFAEZHS.
-+define <8 x i16> @test_vfaezhs(<8 x i16> %a, <8 x i16> %b, i32 *%ccptr) {
-+; CHECK-LABEL: test_vfaezhs:
-+; CHECK: vfaezhs %v24, %v24, %v26, 4
-+; CHECK: ipm [[REG:%r[0-5]]]
-+; CHECK: srl [[REG]], 28
-+; CHECK: st [[REG]], 0(%r2)
-+; CHECK: br %r14
-+  %call = call {<8 x i16>, i32} @llvm.s390.vfaezhs(<8 x i16> %a, <8 x i16> %b,
-+                                                   i32 4)
-+  %res = extractvalue {<8 x i16>, i32} %call, 0
-+  %cc = extractvalue {<8 x i16>, i32} %call, 1
-+  store i32 %cc, i32 *%ccptr
-+  ret <8 x i16> %res
-+}
-+
-+; VFAEZFS.
-+define <4 x i32> @test_vfaezfs(<4 x i32> %a, <4 x i32> %b, i32 *%ccptr) {
-+; CHECK-LABEL: test_vfaezfs:
-+; CHECK: vfaezfs %v24, %v24, %v26, 8
-+; CHECK: ipm [[REG:%r[0-5]]]
-+; CHECK: srl [[REG]], 28
-+; CHECK: st [[REG]], 0(%r2)
-+; CHECK: br %r14
-+  %call = call {<4 x i32>, i32} @llvm.s390.vfaezfs(<4 x i32> %a, <4 x i32> %b,
-+                                                   i32 8)
-+  %res = extractvalue {<4 x i32>, i32} %call, 0
-+  %cc = extractvalue {<4 x i32>, i32} %call, 1
-+  store i32 %cc, i32 *%ccptr
-+  ret <4 x i32> %res
-+}
-+
-+; VFEEB.
-+define <16 x i8> @test_vfeeb_0(<16 x i8> %a, <16 x i8> %b) {
-+; CHECK-LABEL: test_vfeeb_0:
-+; CHECK: vfeeb %v24, %v24, %v26
-+; CHECK: br %r14
-+  %res = call <16 x i8> @llvm.s390.vfeeb(<16 x i8> %a, <16 x i8> %b)
-+  ret <16 x i8> %res
-+}
-+
-+; VFEEH.
-+define <8 x i16> @test_vfeeh(<8 x i16> %a, <8 x i16> %b) {
-+; CHECK-LABEL: test_vfeeh:
-+; CHECK: vfeeh %v24, %v24, %v26
-+; CHECK: br %r14
-+  %res = call <8 x i16> @llvm.s390.vfeeh(<8 x i16> %a, <8 x i16> %b)
-+  ret <8 x i16> %res
-+}
-+
-+; VFEEF.
-+define <4 x i32> @test_vfeef(<4 x i32> %a, <4 x i32> %b) {
-+; CHECK-LABEL: test_vfeef:
-+; CHECK: vfeef %v24, %v24, %v26
-+; CHECK: br %r14
-+  %res = call <4 x i32> @llvm.s390.vfeef(<4 x i32> %a, <4 x i32> %b)
-+  ret <4 x i32> %res
-+}
-+
-+; VFEEBS.
-+define <16 x i8> @test_vfeebs(<16 x i8> %a, <16 x i8> %b, i32 *%ccptr) {
-+; CHECK-LABEL: test_vfeebs:
-+; CHECK: vfeebs %v24, %v24, %v26
-+; CHECK: ipm [[REG:%r[0-5]]]
-+; CHECK: srl [[REG]], 28
-+; CHECK: st [[REG]], 0(%r2)
-+; CHECK: br %r14
-+  %call = call {<16 x i8>, i32} @llvm.s390.vfeebs(<16 x i8> %a, <16 x i8> %b)
-+  %res = extractvalue {<16 x i8>, i32} %call, 0
-+  %cc = extractvalue {<16 x i8>, i32} %call, 1
-+  store i32 %cc, i32 *%ccptr
-+  ret <16 x i8> %res
-+}
-+
-+; VFEEHS.
-+define <8 x i16> @test_vfeehs(<8 x i16> %a, <8 x i16> %b, i32 *%ccptr) {
-+; CHECK-LABEL: test_vfeehs:
-+; CHECK: vfeehs %v24, %v24, %v26
-+; CHECK: ipm [[REG:%r[0-5]]]
-+; CHECK: srl [[REG]], 28
-+; CHECK: st [[REG]], 0(%r2)
-+; CHECK: br %r14
-+  %call = call {<8 x i16>, i32} @llvm.s390.vfeehs(<8 x i16> %a, <8 x i16> %b)
-+  %res = extractvalue {<8 x i16>, i32} %call, 0
-+  %cc = extractvalue {<8 x i16>, i32} %call, 1
-+  store i32 %cc, i32 *%ccptr
-+  ret <8 x i16> %res
-+}
-+
-+; VFEEFS.
-+define <4 x i32> @test_vfeefs(<4 x i32> %a, <4 x i32> %b, i32 *%ccptr) {
-+; CHECK-LABEL: test_vfeefs:
-+; CHECK: vfeefs %v24, %v24, %v26
-+; CHECK: ipm [[REG:%r[0-5]]]
-+; CHECK: srl [[REG]], 28
-+; CHECK: st [[REG]], 0(%r2)
-+; CHECK: br %r14
-+  %call = call {<4 x i32>, i32} @llvm.s390.vfeefs(<4 x i32> %a, <4 x i32> %b)
-+  %res = extractvalue {<4 x i32>, i32} %call, 0
-+  %cc = extractvalue {<4 x i32>, i32} %call, 1
-+  store i32 %cc, i32 *%ccptr
-+  ret <4 x i32> %res
-+}
-+
-+; VFEEZB.
-+define <16 x i8> @test_vfeezb(<16 x i8> %a, <16 x i8> %b) {
-+; CHECK-LABEL: test_vfeezb:
-+; CHECK: vfeezb %v24, %v24, %v26
-+; CHECK: br %r14
-+  %res = call <16 x i8> @llvm.s390.vfeezb(<16 x i8> %a, <16 x i8> %b)
-+  ret <16 x i8> %res
-+}
-+
-+; VFEEZH.
-+define <8 x i16> @test_vfeezh(<8 x i16> %a, <8 x i16> %b) {
-+; CHECK-LABEL: test_vfeezh:
-+; CHECK: vfeezh %v24, %v24, %v26
-+; CHECK: br %r14
-+  %res = call <8 x i16> @llvm.s390.vfeezh(<8 x i16> %a, <8 x i16> %b)
-+  ret <8 x i16> %res
-+}
-+
-+; VFEEZF.
-+define <4 x i32> @test_vfeezf(<4 x i32> %a, <4 x i32> %b) {
-+; CHECK-LABEL: test_vfeezf:
-+; CHECK: vfeezf %v24, %v24, %v26
-+; CHECK: br %r14
-+  %res = call <4 x i32> @llvm.s390.vfeezf(<4 x i32> %a, <4 x i32> %b)
-+  ret <4 x i32> %res
-+}
-+
-+; VFEEZBS.
-+define <16 x i8> @test_vfeezbs(<16 x i8> %a, <16 x i8> %b, i32 *%ccptr) {
-+; CHECK-LABEL: test_vfeezbs:
-+; CHECK: vfeezbs %v24, %v24, %v26
-+; CHECK: ipm [[REG:%r[0-5]]]
-+; CHECK: srl [[REG]], 28
-+; CHECK: st [[REG]], 0(%r2)
-+; CHECK: br %r14
-+  %call = call {<16 x i8>, i32} @llvm.s390.vfeezbs(<16 x i8> %a, <16 x i8> %b)
-+  %res = extractvalue {<16 x i8>, i32} %call, 0
-+  %cc = extractvalue {<16 x i8>, i32} %call, 1
-+  store i32 %cc, i32 *%ccptr
-+  ret <16 x i8> %res
-+}
-+
-+; VFEEZHS.
-+define <8 x i16> @test_vfeezhs(<8 x i16> %a, <8 x i16> %b, i32 *%ccptr) {
-+; CHECK-LABEL: test_vfeezhs:
-+; CHECK: vfeezhs %v24, %v24, %v26
-+; CHECK: ipm [[REG:%r[0-5]]]
-+; CHECK: srl [[REG]], 28
-+; CHECK: st [[REG]], 0(%r2)
-+; CHECK: br %r14
-+  %call = call {<8 x i16>, i32} @llvm.s390.vfeezhs(<8 x i16> %a, <8 x i16> %b)
-+  %res = extractvalue {<8 x i16>, i32} %call, 0
-+  %cc = extractvalue {<8 x i16>, i32} %call, 1
-+  store i32 %cc, i32 *%ccptr
-+  ret <8 x i16> %res
-+}
-+
-+; VFEEZFS.
-+define <4 x i32> @test_vfeezfs(<4 x i32> %a, <4 x i32> %b, i32 *%ccptr) {
-+; CHECK-LABEL: test_vfeezfs:
-+; CHECK: vfeezfs %v24, %v24, %v26
-+; CHECK: ipm [[REG:%r[0-5]]]
-+; CHECK: srl [[REG]], 28
-+; CHECK: st [[REG]], 0(%r2)
-+; CHECK: br %r14
-+  %call = call {<4 x i32>, i32} @llvm.s390.vfeezfs(<4 x i32> %a, <4 x i32> %b)
-+  %res = extractvalue {<4 x i32>, i32} %call, 0
-+  %cc = extractvalue {<4 x i32>, i32} %call, 1
-+  store i32 %cc, i32 *%ccptr
-+  ret <4 x i32> %res
-+}
-+
-+; VFENEB.
-+define <16 x i8> @test_vfeneb_0(<16 x i8> %a, <16 x i8> %b) {
-+; CHECK-LABEL: test_vfeneb_0:
-+; CHECK: vfeneb %v24, %v24, %v26
-+; CHECK: br %r14
-+  %res = call <16 x i8> @llvm.s390.vfeneb(<16 x i8> %a, <16 x i8> %b)
-+  ret <16 x i8> %res
-+}
-+
-+; VFENEH.
-+define <8 x i16> @test_vfeneh(<8 x i16> %a, <8 x i16> %b) {
-+; CHECK-LABEL: test_vfeneh:
-+; CHECK: vfeneh %v24, %v24, %v26
-+; CHECK: br %r14
-+  %res = call <8 x i16> @llvm.s390.vfeneh(<8 x i16> %a, <8 x i16> %b)
-+  ret <8 x i16> %res
-+}
-+
-+; VFENEF.
-+define <4 x i32> @test_vfenef(<4 x i32> %a, <4 x i32> %b) {
-+; CHECK-LABEL: test_vfenef:
-+; CHECK: vfenef %v24, %v24, %v26
-+; CHECK: br %r14
-+  %res = call <4 x i32> @llvm.s390.vfenef(<4 x i32> %a, <4 x i32> %b)
-+  ret <4 x i32> %res
-+}
-+
-+; VFENEBS.
-+define <16 x i8> @test_vfenebs(<16 x i8> %a, <16 x i8> %b, i32 *%ccptr) {
-+; CHECK-LABEL: test_vfenebs:
-+; CHECK: vfenebs %v24, %v24, %v26
-+; CHECK: ipm [[REG:%r[0-5]]]
-+; CHECK: srl [[REG]], 28
-+; CHECK: st [[REG]], 0(%r2)
-+; CHECK: br %r14
-+  %call = call {<16 x i8>, i32} @llvm.s390.vfenebs(<16 x i8> %a, <16 x i8> %b)
-+  %res = extractvalue {<16 x i8>, i32} %call, 0
-+  %cc = extractvalue {<16 x i8>, i32} %call, 1
-+  store i32 %cc, i32 *%ccptr
-+  ret <16 x i8> %res
-+}
-+
-+; VFENEHS.
-+define <8 x i16> @test_vfenehs(<8 x i16> %a, <8 x i16> %b, i32 *%ccptr) {
-+; CHECK-LABEL: test_vfenehs:
-+; CHECK: vfenehs %v24, %v24, %v26
-+; CHECK: ipm [[REG:%r[0-5]]]
-+; CHECK: srl [[REG]], 28
-+; CHECK: st [[REG]], 0(%r2)
-+; CHECK: br %r14
-+  %call = call {<8 x i16>, i32} @llvm.s390.vfenehs(<8 x i16> %a, <8 x i16> %b)
-+  %res = extractvalue {<8 x i16>, i32} %call, 0
-+  %cc = extractvalue {<8 x i16>, i32} %call, 1
-+  store i32 %cc, i32 *%ccptr
-+  ret <8 x i16> %res
-+}
-+
-+; VFENEFS.
-+define <4 x i32> @test_vfenefs(<4 x i32> %a, <4 x i32> %b, i32 *%ccptr) {
-+; CHECK-LABEL: test_vfenefs:
-+; CHECK: vfenefs %v24, %v24, %v26
-+; CHECK: ipm [[REG:%r[0-5]]]
-+; CHECK: srl [[REG]], 28
-+; CHECK: st [[REG]], 0(%r2)
-+; CHECK: br %r14
-+  %call = call {<4 x i32>, i32} @llvm.s390.vfenefs(<4 x i32> %a, <4 x i32> %b)
-+  %res = extractvalue {<4 x i32>, i32} %call, 0
-+  %cc = extractvalue {<4 x i32>, i32} %call, 1
-+  store i32 %cc, i32 *%ccptr
-+  ret <4 x i32> %res
-+}
-+
-+; VFENEZB.
-+define <16 x i8> @test_vfenezb(<16 x i8> %a, <16 x i8> %b) {
-+; CHECK-LABEL: test_vfenezb:
-+; CHECK: vfenezb %v24, %v24, %v26
-+; CHECK: br %r14
-+  %res = call <16 x i8> @llvm.s390.vfenezb(<16 x i8> %a, <16 x i8> %b)
-+  ret <16 x i8> %res
-+}
-+
-+; VFENEZH.
-+define <8 x i16> @test_vfenezh(<8 x i16> %a, <8 x i16> %b) {
-+; CHECK-LABEL: test_vfenezh:
-+; CHECK: vfenezh %v24, %v24, %v26
-+; CHECK: br %r14
-+  %res = call <8 x i16> @llvm.s390.vfenezh(<8 x i16> %a, <8 x i16> %b)
-+  ret <8 x i16> %res
-+}
-+
-+; VFENEZF.
-+define <4 x i32> @test_vfenezf(<4 x i32> %a, <4 x i32> %b) {
-+; CHECK-LABEL: test_vfenezf:
-+; CHECK: vfenezf %v24, %v24, %v26
-+; CHECK: br %r14
-+  %res = call <4 x i32> @llvm.s390.vfenezf(<4 x i32> %a, <4 x i32> %b)
-+  ret <4 x i32> %res
-+}
-+
-+; VFENEZBS.
-+define <16 x i8> @test_vfenezbs(<16 x i8> %a, <16 x i8> %b, i32 *%ccptr) {
-+; CHECK-LABEL: test_vfenezbs:
-+; CHECK: vfenezbs %v24, %v24, %v26
-+; CHECK: ipm [[REG:%r[0-5]]]
-+; CHECK: srl [[REG]], 28
-+; CHECK: st [[REG]], 0(%r2)
-+; CHECK: br %r14
-+  %call = call {<16 x i8>, i32} @llvm.s390.vfenezbs(<16 x i8> %a, <16 x i8> %b)
-+  %res = extractvalue {<16 x i8>, i32} %call, 0
-+  %cc = extractvalue {<16 x i8>, i32} %call, 1
-+  store i32 %cc, i32 *%ccptr
-+  ret <16 x i8> %res
-+}
-+
-+; VFENEZHS.
-+define <8 x i16> @test_vfenezhs(<8 x i16> %a, <8 x i16> %b, i32 *%ccptr) {
-+; CHECK-LABEL: test_vfenezhs:
-+; CHECK: vfenezhs %v24, %v24, %v26
-+; CHECK: ipm [[REG:%r[0-5]]]
-+; CHECK: srl [[REG]], 28
-+; CHECK: st [[REG]], 0(%r2)
-+; CHECK: br %r14
-+  %call = call {<8 x i16>, i32} @llvm.s390.vfenezhs(<8 x i16> %a, <8 x i16> %b)
-+  %res = extractvalue {<8 x i16>, i32} %call, 0
-+  %cc = extractvalue {<8 x i16>, i32} %call, 1
-+  store i32 %cc, i32 *%ccptr
-+  ret <8 x i16> %res
-+}
-+
-+; VFENEZFS.
-+define <4 x i32> @test_vfenezfs(<4 x i32> %a, <4 x i32> %b, i32 *%ccptr) {
-+; CHECK-LABEL: test_vfenezfs:
-+; CHECK: vfenezfs %v24, %v24, %v26
-+; CHECK: ipm [[REG:%r[0-5]]]
-+; CHECK: srl [[REG]], 28
-+; CHECK: st [[REG]], 0(%r2)
-+; CHECK: br %r14
-+  %call = call {<4 x i32>, i32} @llvm.s390.vfenezfs(<4 x i32> %a, <4 x i32> %b)
-+  %res = extractvalue {<4 x i32>, i32} %call, 0
-+  %cc = extractvalue {<4 x i32>, i32} %call, 1
-+  store i32 %cc, i32 *%ccptr
-+  ret <4 x i32> %res
-+}
-+
-+; VISTRB.
-+define <16 x i8> @test_vistrb(<16 x i8> %a) {
-+; CHECK-LABEL: test_vistrb:
-+; CHECK: vistrb %v24, %v24
-+; CHECK: br %r14
-+  %res = call <16 x i8> @llvm.s390.vistrb(<16 x i8> %a)
-+  ret <16 x i8> %res
-+}
-+
-+; VISTRH.
-+define <8 x i16> @test_vistrh(<8 x i16> %a) {
-+; CHECK-LABEL: test_vistrh:
-+; CHECK: vistrh %v24, %v24
-+; CHECK: br %r14
-+  %res = call <8 x i16> @llvm.s390.vistrh(<8 x i16> %a)
-+  ret <8 x i16> %res
-+}
-+
-+; VISTRF.
-+define <4 x i32> @test_vistrf(<4 x i32> %a) {
-+; CHECK-LABEL: test_vistrf:
-+; CHECK: vistrf %v24, %v24
-+; CHECK: br %r14
-+  %res = call <4 x i32> @llvm.s390.vistrf(<4 x i32> %a)
-+  ret <4 x i32> %res
-+}
-+
-+; VISTRBS.
-+define <16 x i8> @test_vistrbs(<16 x i8> %a, i32 *%ccptr) {
-+; CHECK-LABEL: test_vistrbs:
-+; CHECK: vistrbs %v24, %v24
-+; CHECK: ipm [[REG:%r[0-5]]]
-+; CHECK: srl [[REG]], 28
-+; CHECK: st [[REG]], 0(%r2)
-+; CHECK: br %r14
-+  %call = call {<16 x i8>, i32} @llvm.s390.vistrbs(<16 x i8> %a)
-+  %res = extractvalue {<16 x i8>, i32} %call, 0
-+  %cc = extractvalue {<16 x i8>, i32} %call, 1
-+  store i32 %cc, i32 *%ccptr
-+  ret <16 x i8> %res
-+}
-+
-+; VISTRHS.
-+define <8 x i16> @test_vistrhs(<8 x i16> %a, i32 *%ccptr) {
-+; CHECK-LABEL: test_vistrhs:
-+; CHECK: vistrhs %v24, %v24
-+; CHECK: ipm [[REG:%r[0-5]]]
-+; CHECK: srl [[REG]], 28
-+; CHECK: st [[REG]], 0(%r2)
-+; CHECK: br %r14
-+  %call = call {<8 x i16>, i32} @llvm.s390.vistrhs(<8 x i16> %a)
-+  %res = extractvalue {<8 x i16>, i32} %call, 0
-+  %cc = extractvalue {<8 x i16>, i32} %call, 1
-+  store i32 %cc, i32 *%ccptr
-+  ret <8 x i16> %res
-+}
-+
-+; VISTRFS.
-+define <4 x i32> @test_vistrfs(<4 x i32> %a, i32 *%ccptr) {
-+; CHECK-LABEL: test_vistrfs:
-+; CHECK: vistrfs %v24, %v24
-+; CHECK: ipm [[REG:%r[0-5]]]
-+; CHECK: srl [[REG]], 28
-+; CHECK: st [[REG]], 0(%r2)
-+; CHECK: br %r14
-+  %call = call {<4 x i32>, i32} @llvm.s390.vistrfs(<4 x i32> %a)
-+  %res = extractvalue {<4 x i32>, i32} %call, 0
-+  %cc = extractvalue {<4 x i32>, i32} %call, 1
-+  store i32 %cc, i32 *%ccptr
-+  ret <4 x i32> %res
-+}
-+
-+; VSTRCB with !IN !RT.
-+define <16 x i8> @test_vstrcb_0(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) {
-+; CHECK-LABEL: test_vstrcb_0:
-+; CHECK: vstrcb %v24, %v24, %v26, %v28, 0
-+; CHECK: br %r14
-+  %res = call <16 x i8> @llvm.s390.vstrcb(<16 x i8> %a, <16 x i8> %b,
-+                                          <16 x i8> %c, i32 0)
-+  ret <16 x i8> %res
-+}
-+
-+; VSTRCB with !IN RT.
-+define <16 x i8> @test_vstrcb_4(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) {
-+; CHECK-LABEL: test_vstrcb_4:
-+; CHECK: vstrcb %v24, %v24, %v26, %v28, 4
-+; CHECK: br %r14
-+  %res = call <16 x i8> @llvm.s390.vstrcb(<16 x i8> %a, <16 x i8> %b,
-+                                          <16 x i8> %c, i32 4)
-+  ret <16 x i8> %res
-+}
-+
-+; VSTRCB with IN !RT.
-+define <16 x i8> @test_vstrcb_8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) {
-+; CHECK-LABEL: test_vstrcb_8:
-+; CHECK: vstrcb %v24, %v24, %v26, %v28, 8
-+; CHECK: br %r14
-+  %res = call <16 x i8> @llvm.s390.vstrcb(<16 x i8> %a, <16 x i8> %b,
-+                                          <16 x i8> %c, i32 8)
-+  ret <16 x i8> %res
-+}
-+
-+; VSTRCB with IN RT.
-+define <16 x i8> @test_vstrcb_12(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) {
-+; CHECK-LABEL: test_vstrcb_12:
-+; CHECK: vstrcb %v24, %v24, %v26, %v28, 12
-+; CHECK: br %r14
-+  %res = call <16 x i8> @llvm.s390.vstrcb(<16 x i8> %a, <16 x i8> %b,
-+                                          <16 x i8> %c, i32 12)
-+  ret <16 x i8> %res
-+}
-+
-+; VSTRCB with CS -- should be ignored.
-+define <16 x i8> @test_vstrcb_1(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) {
-+; CHECK-LABEL: test_vstrcb_1:
-+; CHECK: vstrcb %v24, %v24, %v26, %v28, 0
-+; CHECK: br %r14
-+  %res = call <16 x i8> @llvm.s390.vstrcb(<16 x i8> %a, <16 x i8> %b,
-+                                          <16 x i8> %c, i32 1)
-+  ret <16 x i8> %res
-+}
-+
-+; VSTRCH.
-+define <8 x i16> @test_vstrch(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) {
-+; CHECK-LABEL: test_vstrch:
-+; CHECK: vstrch %v24, %v24, %v26, %v28, 4
-+; CHECK: br %r14
-+  %res = call <8 x i16> @llvm.s390.vstrch(<8 x i16> %a, <8 x i16> %b,
-+                                          <8 x i16> %c, i32 4)
-+  ret <8 x i16> %res
-+}
-+
-+; VSTRCF.
-+define <4 x i32> @test_vstrcf(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
-+; CHECK-LABEL: test_vstrcf:
-+; CHECK: vstrcf %v24, %v24, %v26, %v28, 8
-+; CHECK: br %r14
-+  %res = call <4 x i32> @llvm.s390.vstrcf(<4 x i32> %a, <4 x i32> %b,
-+                                          <4 x i32> %c, i32 8)
-+  ret <4 x i32> %res
-+}
-+
-+; VSTRCBS.
-+define <16 x i8> @test_vstrcbs(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c,
-+                               i32 *%ccptr) {
-+; CHECK-LABEL: test_vstrcbs:
-+; CHECK: vstrcbs %v24, %v24, %v26, %v28, 0
-+; CHECK: ipm [[REG:%r[0-5]]]
-+; CHECK: srl [[REG]], 28
-+; CHECK: st [[REG]], 0(%r2)
-+; CHECK: br %r14
-+  %call = call {<16 x i8>, i32} @llvm.s390.vstrcbs(<16 x i8> %a, <16 x i8> %b,
-+                                                   <16 x i8> %c, i32 0)
-+  %res = extractvalue {<16 x i8>, i32} %call, 0
-+  %cc = extractvalue {<16 x i8>, i32} %call, 1
-+  store i32 %cc, i32 *%ccptr
-+  ret <16 x i8> %res
-+}
-+
-+; VSTRCHS.
-+define <8 x i16> @test_vstrchs(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c,
-+                               i32 *%ccptr) {
-+; CHECK-LABEL: test_vstrchs:
-+; CHECK: vstrchs %v24, %v24, %v26, %v28, 4
-+; CHECK: ipm [[REG:%r[0-5]]]
-+; CHECK: srl [[REG]], 28
-+; CHECK: st [[REG]], 0(%r2)
-+; CHECK: br %r14
-+  %call = call {<8 x i16>, i32} @llvm.s390.vstrchs(<8 x i16> %a, <8 x i16> %b,
-+                                                   <8 x i16> %c, i32 4)
-+  %res = extractvalue {<8 x i16>, i32} %call, 0
-+  %cc = extractvalue {<8 x i16>, i32} %call, 1
-+  store i32 %cc, i32 *%ccptr
-+  ret <8 x i16> %res
-+}
-+
-+; VSTRCFS.
-+define <4 x i32> @test_vstrcfs(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c,
-+                               i32 *%ccptr) {
-+; CHECK-LABEL: test_vstrcfs:
-+; CHECK: vstrcfs %v24, %v24, %v26, %v28, 8
-+; CHECK: ipm [[REG:%r[0-5]]]
-+; CHECK: srl [[REG]], 28
-+; CHECK: st [[REG]], 0(%r2)
-+; CHECK: br %r14
-+  %call = call {<4 x i32>, i32} @llvm.s390.vstrcfs(<4 x i32> %a, <4 x i32> %b,
-+                                                   <4 x i32> %c, i32 8)
-+  %res = extractvalue {<4 x i32>, i32} %call, 0
-+  %cc = extractvalue {<4 x i32>, i32} %call, 1
-+  store i32 %cc, i32 *%ccptr
-+  ret <4 x i32> %res
-+}
-+
-+; VSTRCZB with !IN !RT.
-+define <16 x i8> @test_vstrczb_0(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) {
-+; CHECK-LABEL: test_vstrczb_0:
-+; CHECK: vstrczb %v24, %v24, %v26, %v28, 0
-+; CHECK: br %r14
-+  %res = call <16 x i8> @llvm.s390.vstrczb(<16 x i8> %a, <16 x i8> %b,
-+                                           <16 x i8> %c, i32 0)
-+  ret <16 x i8> %res
-+}
-+
-+; VSTRCZB with !IN RT.
-+define <16 x i8> @test_vstrczb_4(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) {
-+; CHECK-LABEL: test_vstrczb_4:
-+; CHECK: vstrczb %v24, %v24, %v26, %v28, 4
-+; CHECK: br %r14
-+  %res = call <16 x i8> @llvm.s390.vstrczb(<16 x i8> %a, <16 x i8> %b,
-+                                           <16 x i8> %c, i32 4)
-+  ret <16 x i8> %res
-+}
-+
-+; VSTRCZB with IN !RT.
-+define <16 x i8> @test_vstrczb_8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) {
-+; CHECK-LABEL: test_vstrczb_8:
-+; CHECK: vstrczb %v24, %v24, %v26, %v28, 8
-+; CHECK: br %r14
-+  %res = call <16 x i8> @llvm.s390.vstrczb(<16 x i8> %a, <16 x i8> %b,
-+                                           <16 x i8> %c, i32 8)
-+  ret <16 x i8> %res
-+}
-+
-+; VSTRCZB with IN RT.
-+define <16 x i8> @test_vstrczb_12(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) {
-+; CHECK-LABEL: test_vstrczb_12:
-+; CHECK: vstrczb %v24, %v24, %v26, %v28, 12
-+; CHECK: br %r14
-+  %res = call <16 x i8> @llvm.s390.vstrczb(<16 x i8> %a, <16 x i8> %b,
-+                                           <16 x i8> %c, i32 12)
-+  ret <16 x i8> %res
-+}
-+
-+; VSTRCZB with CS -- should be ignored.
-+define <16 x i8> @test_vstrczb_1(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) {
-+; CHECK-LABEL: test_vstrczb_1:
-+; CHECK: vstrczb %v24, %v24, %v26, %v28, 0
-+; CHECK: br %r14
-+  %res = call <16 x i8> @llvm.s390.vstrczb(<16 x i8> %a, <16 x i8> %b,
-+                                           <16 x i8> %c, i32 1)
-+  ret <16 x i8> %res
-+}
-+
-+; VSTRCZH.
-+define <8 x i16> @test_vstrczh(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) {
-+; CHECK-LABEL: test_vstrczh:
-+; CHECK: vstrczh %v24, %v24, %v26, %v28, 4
-+; CHECK: br %r14
-+  %res = call <8 x i16> @llvm.s390.vstrczh(<8 x i16> %a, <8 x i16> %b,
-+                                           <8 x i16> %c,  i32 4)
-+  ret <8 x i16> %res
-+}
-+
-+; VSTRCZF.
-+define <4 x i32> @test_vstrczf(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) {
-+; CHECK-LABEL: test_vstrczf:
-+; CHECK: vstrczf %v24, %v24, %v26, %v28, 8
-+; CHECK: br %r14
-+  %res = call <4 x i32> @llvm.s390.vstrczf(<4 x i32> %a, <4 x i32> %b,
-+                                           <4 x i32> %c, i32 8)
-+  ret <4 x i32> %res
-+}
-+
-+; VSTRCZBS.
-+define <16 x i8> @test_vstrczbs(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c,
-+                                i32 *%ccptr) {
-+; CHECK-LABEL: test_vstrczbs:
-+; CHECK: vstrczbs %v24, %v24, %v26, %v28, 0
-+; CHECK: ipm [[REG:%r[0-5]]]
-+; CHECK: srl [[REG]], 28
-+; CHECK: st [[REG]], 0(%r2)
-+; CHECK: br %r14
-+  %call = call {<16 x i8>, i32} @llvm.s390.vstrczbs(<16 x i8> %a, <16 x i8> %b,
-+                                                    <16 x i8> %c, i32 0)
-+  %res = extractvalue {<16 x i8>, i32} %call, 0
-+  %cc = extractvalue {<16 x i8>, i32} %call, 1
-+  store i32 %cc, i32 *%ccptr
-+  ret <16 x i8> %res
-+}
-+
-+; VSTRCZHS.
-+define <8 x i16> @test_vstrczhs(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c,
-+                                i32 *%ccptr) {
-+; CHECK-LABEL: test_vstrczhs:
-+; CHECK: vstrczhs %v24, %v24, %v26, %v28, 4
-+; CHECK: ipm [[REG:%r[0-5]]]
-+; CHECK: srl [[REG]], 28
-+; CHECK: st [[REG]], 0(%r2)
-+; CHECK: br %r14
-+  %call = call {<8 x i16>, i32} @llvm.s390.vstrczhs(<8 x i16> %a, <8 x i16> %b,
-+                                                    <8 x i16> %c, i32 4)
-+  %res = extractvalue {<8 x i16>, i32} %call, 0
-+  %cc = extractvalue {<8 x i16>, i32} %call, 1
-+  store i32 %cc, i32 *%ccptr
-+  ret <8 x i16> %res
-+}
-+
-+; VSTRCZFS.
-+define <4 x i32> @test_vstrczfs(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c,
-+                                i32 *%ccptr) {
-+; CHECK-LABEL: test_vstrczfs:
-+; CHECK: vstrczfs %v24, %v24, %v26, %v28, 8
-+; CHECK: ipm [[REG:%r[0-5]]]
-+; CHECK: srl [[REG]], 28
-+; CHECK: st [[REG]], 0(%r2)
-+; CHECK: br %r14
-+  %call = call {<4 x i32>, i32} @llvm.s390.vstrczfs(<4 x i32> %a, <4 x i32> %b,
-+                                                    <4 x i32> %c, i32 8)
-+  %res = extractvalue {<4 x i32>, i32} %call, 0
-+  %cc = extractvalue {<4 x i32>, i32} %call, 1
-+  store i32 %cc, i32 *%ccptr
-+  ret <4 x i32> %res
-+}
-+
-+; VFCEDBS with no processing of the result.
-+define i32 @test_vfcedbs(<2 x double> %a, <2 x double> %b) {
-+; CHECK-LABEL: test_vfcedbs:
-+; CHECK: vfcedbs {{%v[0-9]+}}, %v24, %v26
-+; CHECK: ipm %r2
-+; CHECK: srl %r2, 28
-+; CHECK: br %r14
-+  %call = call {<2 x i64>, i32} @llvm.s390.vfcedbs(<2 x double> %a,
-+                                                   <2 x double> %b)
-+  %res = extractvalue {<2 x i64>, i32} %call, 1
-+  ret i32 %res
-+}
-+
-+; VFCEDBS, returning 1 if any elements are equal (CC != 3).
-+define i32 @test_vfcedbs_any_bool(<2 x double> %a, <2 x double> %b) {
-+; CHECK-LABEL: test_vfcedbs_any_bool:
-+; CHECK: vfcedbs {{%v[0-9]+}}, %v24, %v26
-+; CHECK: ipm %r2
-+; CHECK: afi %r2, -536870912
-+; CHECK: srl %r2, 31
-+; CHECK: br %r14
-+  %call = call {<2 x i64>, i32} @llvm.s390.vfcedbs(<2 x double> %a,
-+                                                   <2 x double> %b)
-+  %res = extractvalue {<2 x i64>, i32} %call, 1
-+  %cmp = icmp ne i32 %res, 3
-+  %ext = zext i1 %cmp to i32
-+  ret i32 %ext
-+}
-+
-+; VFCEDBS, storing to %ptr if any elements are equal.
-+define <2 x i64> @test_vfcedbs_any_store(<2 x double> %a, <2 x double> %b,
-+                                         i32 *%ptr) {
-+; CHECK-LABEL: test_vfcedbs_any_store:
-+; CHECK-NOT: %r
-+; CHECK: vfcedbs %v24, %v24, %v26
-+; CHECK-NEXT: {{jo|jnle}} {{\.L*}}
-+; CHECK: mvhi 0(%r2), 0
-+; CHECK: br %r14
-+  %call = call {<2 x i64>, i32} @llvm.s390.vfcedbs(<2 x double> %a,
-+                                                   <2 x double> %b)
-+  %res = extractvalue {<2 x i64>, i32} %call, 0
-+  %cc = extractvalue {<2 x i64>, i32} %call, 1
-+  %cmp = icmp ule i32 %cc, 2
-+  br i1 %cmp, label %store, label %exit
-+
-+store:
-+  store i32 0, i32 *%ptr
-+  br label %exit
-+
-+exit:
-+  ret <2 x i64> %res
-+}
-+
-+; VFCHDBS with no processing of the result.
-+define i32 @test_vfchdbs(<2 x double> %a, <2 x double> %b) {
-+; CHECK-LABEL: test_vfchdbs:
-+; CHECK: vfchdbs {{%v[0-9]+}}, %v24, %v26
-+; CHECK: ipm %r2
-+; CHECK: srl %r2, 28
-+; CHECK: br %r14
-+  %call = call {<2 x i64>, i32} @llvm.s390.vfchdbs(<2 x double> %a,
-+                                                   <2 x double> %b)
-+  %res = extractvalue {<2 x i64>, i32} %call, 1
-+  ret i32 %res
-+}
-+
-+; VFCHDBS, returning 1 if not all elements are higher.
-+define i32 @test_vfchdbs_notall_bool(<2 x double> %a, <2 x double> %b) {
-+; CHECK-LABEL: test_vfchdbs_notall_bool:
-+; CHECK: vfchdbs {{%v[0-9]+}}, %v24, %v26
-+; CHECK: ipm [[REG:%r[0-5]]]
-+; CHECK: risblg %r2, [[REG]], 31, 159, 36
-+; CHECK: br %r14
-+  %call = call {<2 x i64>, i32} @llvm.s390.vfchdbs(<2 x double> %a,
-+                                                   <2 x double> %b)
-+  %res = extractvalue {<2 x i64>, i32} %call, 1
-+  %cmp = icmp sge i32 %res, 1
-+  %ext = zext i1 %cmp to i32
-+  ret i32 %ext
-+}
-+
-+; VFCHDBS, storing to %ptr if not all elements are higher.
-+define <2 x i64> @test_vfchdbs_notall_store(<2 x double> %a, <2 x double> %b,
-+                                            i32 *%ptr) {
-+; CHECK-LABEL: test_vfchdbs_notall_store:
-+; CHECK-NOT: %r
-+; CHECK: vfchdbs %v24, %v24, %v26
-+; CHECK-NEXT: {{jhe|je}} {{\.L*}}
-+; CHECK: mvhi 0(%r2), 0
-+; CHECK: br %r14
-+  %call = call {<2 x i64>, i32} @llvm.s390.vfchdbs(<2 x double> %a,
-+                                                   <2 x double> %b)
-+  %res = extractvalue {<2 x i64>, i32} %call, 0
-+  %cc = extractvalue {<2 x i64>, i32} %call, 1
-+  %cmp = icmp ugt i32 %cc, 0
-+  br i1 %cmp, label %store, label %exit
-+
-+store:
-+  store i32 0, i32 *%ptr
-+  br label %exit
-+
-+exit:
-+  ret <2 x i64> %res
-+}
-+
-+; VFCHEDBS with no processing of the result.
-+define i32 @test_vfchedbs(<2 x double> %a, <2 x double> %b) {
-+; CHECK-LABEL: test_vfchedbs:
-+; CHECK: vfchedbs {{%v[0-9]+}}, %v24, %v26
-+; CHECK: ipm %r2
-+; CHECK: srl %r2, 28
-+; CHECK: br %r14
-+  %call = call {<2 x i64>, i32} @llvm.s390.vfchedbs(<2 x double> %a,
-+						    <2 x double> %b)
-+  %res = extractvalue {<2 x i64>, i32} %call, 1
-+  ret i32 %res
-+}
-+
-+; VFCHEDBS, returning 1 if neither element is higher or equal.
-+define i32 @test_vfchedbs_none_bool(<2 x double> %a, <2 x double> %b) {
-+; CHECK-LABEL: test_vfchedbs_none_bool:
-+; CHECK: vfchedbs {{%v[0-9]+}}, %v24, %v26
-+; CHECK: ipm [[REG:%r[0-5]]]
-+; CHECK: risblg %r2, [[REG]], 31, 159, 35
-+; CHECK: br %r14
-+  %call = call {<2 x i64>, i32} @llvm.s390.vfchedbs(<2 x double> %a,
-+						    <2 x double> %b)
-+  %res = extractvalue {<2 x i64>, i32} %call, 1
-+  %cmp = icmp eq i32 %res, 3
-+  %ext = zext i1 %cmp to i32
-+  ret i32 %ext
-+}
-+
-+; VFCHEDBS, storing to %ptr if neither element is higher or equal.
-+define <2 x i64> @test_vfchedbs_none_store(<2 x double> %a, <2 x double> %b,
-+                                           i32 *%ptr) {
-+; CHECK-LABEL: test_vfchedbs_none_store:
-+; CHECK-NOT: %r
-+; CHECK: vfchedbs %v24, %v24, %v26
-+; CHECK-NEXT: {{jno|jle}} {{\.L*}}
-+; CHECK: mvhi 0(%r2), 0
-+; CHECK: br %r14
-+  %call = call {<2 x i64>, i32} @llvm.s390.vfchedbs(<2 x double> %a,
-+						    <2 x double> %b)
-+  %res = extractvalue {<2 x i64>, i32} %call, 0
-+  %cc = extractvalue {<2 x i64>, i32} %call, 1
-+  %cmp = icmp uge i32 %cc, 3
-+  br i1 %cmp, label %store, label %exit
-+
-+store:
-+  store i32 0, i32 *%ptr
-+  br label %exit
-+
-+exit:
-+  ret <2 x i64> %res
-+}
-+
-+; VFTCIDB with the lowest useful class selector and no processing of the result.
-+define i32 @test_vftcidb(<2 x double> %a) {
-+; CHECK-LABEL: test_vftcidb:
-+; CHECK: vftcidb {{%v[0-9]+}}, %v24, 1
-+; CHECK: ipm %r2
-+; CHECK: srl %r2, 28
-+; CHECK: br %r14
-+  %call = call {<2 x i64>, i32} @llvm.s390.vftcidb(<2 x double> %a, i32 1)
-+  %res = extractvalue {<2 x i64>, i32} %call, 1
-+  ret i32 %res
-+}
-+
-+; VFTCIDB with the highest useful class selector, returning 1 if all elements
-+; have the right class (CC == 0).
-+define i32 @test_vftcidb_all_bool(<2 x double> %a) {
-+; CHECK-LABEL: test_vftcidb_all_bool:
-+; CHECK: vftcidb {{%v[0-9]+}}, %v24, 4094
-+; CHECK: afi %r2, -268435456
-+; CHECK: srl %r2, 31
-+; CHECK: br %r14
-+  %call = call {<2 x i64>, i32} @llvm.s390.vftcidb(<2 x double> %a, i32 4094)
-+  %res = extractvalue {<2 x i64>, i32} %call, 1
-+  %cmp = icmp eq i32 %res, 0
-+  %ext = zext i1 %cmp to i32
-+  ret i32 %ext
-+}
-+
-+; VFIDB with a rounding mode not usable via standard intrinsics.
-+define <2 x double> @test_vfidb_0_4(<2 x double> %a) {
-+; CHECK-LABEL: test_vfidb_0_4:
-+; CHECK: vfidb %v24, %v24, 0, 4
-+; CHECK: br %r14
-+  %res = call <2 x double> @llvm.s390.vfidb(<2 x double> %a, i32 0, i32 4)
-+  ret <2 x double> %res
-+}
-+
-+; VFIDB with IEEE-inexact exception suppressed.
-+define <2 x double> @test_vfidb_4_0(<2 x double> %a) {
-+; CHECK-LABEL: test_vfidb_4_0:
-+; CHECK: vfidb %v24, %v24, 4, 0
-+; CHECK: br %r14
-+  %res = call <2 x double> @llvm.s390.vfidb(<2 x double> %a, i32 4, i32 0)
-+  ret <2 x double> %res
-+}
-+
-Index: llvm-36/test/CodeGen/SystemZ/vec-log-01.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-log-01.ll
-@@ -0,0 +1,15 @@
-+; Test v2f64 logarithm.
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-+
-+declare <2 x double> @llvm.log.v2f64(<2 x double>)
-+
-+define <2 x double> @f1(<2 x double> %val) {
-+; CHECK-LABEL: f1:
-+; CHECK: brasl %r14, log@PLT
-+; CHECK: brasl %r14, log@PLT
-+; CHECK: vmrhg %v24,
-+; CHECK: br %r14
-+  %ret = call <2 x double> @llvm.log.v2f64(<2 x double> %val)
-+  ret <2 x double> %ret
-+}
-Index: llvm-36/test/CodeGen/SystemZ/vec-max-01.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-max-01.ll
-@@ -0,0 +1,83 @@
-+; Test v16i8 maximum.
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-+
-+; Test with slt.
-+define <16 x i8> @f1(<16 x i8> %val1, <16 x i8> %val2) {
-+; CHECK-LABEL: f1:
-+; CHECK: vmxb %v24, {{%v24, %v26|%v26, %v24}}
-+; CHECK: br %r14
-+  %cmp = icmp slt <16 x i8> %val1, %val2
-+  %ret = select <16 x i1> %cmp, <16 x i8> %val2, <16 x i8> %val1
-+  ret <16 x i8> %ret
-+}
-+
-+; Test with sle.
-+define <16 x i8> @f2(<16 x i8> %val1, <16 x i8> %val2) {
-+; CHECK-LABEL: f2:
-+; CHECK: vmxb %v24, {{%v24, %v26|%v26, %v24}}
-+; CHECK: br %r14
-+  %cmp = icmp sle <16 x i8> %val1, %val2
-+  %ret = select <16 x i1> %cmp, <16 x i8> %val2, <16 x i8> %val1
-+  ret <16 x i8> %ret
-+}
-+
-+; Test with sgt.
-+define <16 x i8> @f3(<16 x i8> %val1, <16 x i8> %val2) {
-+; CHECK-LABEL: f3:
-+; CHECK: vmxb %v24, {{%v24, %v26|%v26, %v24}}
-+; CHECK: br %r14
-+  %cmp = icmp sgt <16 x i8> %val1, %val2
-+  %ret = select <16 x i1> %cmp, <16 x i8> %val1, <16 x i8> %val2
-+  ret <16 x i8> %ret
-+}
-+
-+; Test with sge.
-+define <16 x i8> @f4(<16 x i8> %val1, <16 x i8> %val2) {
-+; CHECK-LABEL: f4:
-+; CHECK: vmxb %v24, {{%v24, %v26|%v26, %v24}}
-+; CHECK: br %r14
-+  %cmp = icmp sge <16 x i8> %val1, %val2
-+  %ret = select <16 x i1> %cmp, <16 x i8> %val1, <16 x i8> %val2
-+  ret <16 x i8> %ret
-+}
-+
-+; Test with ult.
-+define <16 x i8> @f5(<16 x i8> %val1, <16 x i8> %val2) {
-+; CHECK-LABEL: f5:
-+; CHECK: vmxlb %v24, {{%v24, %v26|%v26, %v24}}
-+; CHECK: br %r14
-+  %cmp = icmp ult <16 x i8> %val1, %val2
-+  %ret = select <16 x i1> %cmp, <16 x i8> %val2, <16 x i8> %val1
-+  ret <16 x i8> %ret
-+}
-+
-+; Test with ule.
-+define <16 x i8> @f6(<16 x i8> %val1, <16 x i8> %val2) {
-+; CHECK-LABEL: f6:
-+; CHECK: vmxlb %v24, {{%v24, %v26|%v26, %v24}}
-+; CHECK: br %r14
-+  %cmp = icmp ule <16 x i8> %val1, %val2
-+  %ret = select <16 x i1> %cmp, <16 x i8> %val2, <16 x i8> %val1
-+  ret <16 x i8> %ret
-+}
-+
-+; Test with ugt.
-+define <16 x i8> @f7(<16 x i8> %val1, <16 x i8> %val2) {
-+; CHECK-LABEL: f7:
-+; CHECK: vmxlb %v24, {{%v24, %v26|%v26, %v24}}
-+; CHECK: br %r14
-+  %cmp = icmp ugt <16 x i8> %val1, %val2
-+  %ret = select <16 x i1> %cmp, <16 x i8> %val1, <16 x i8> %val2
-+  ret <16 x i8> %ret
-+}
-+
-+; Test with uge.
-+define <16 x i8> @f8(<16 x i8> %val1, <16 x i8> %val2) {
-+; CHECK-LABEL: f8:
-+; CHECK: vmxlb %v24, {{%v24, %v26|%v26, %v24}}
-+; CHECK: br %r14
-+  %cmp = icmp uge <16 x i8> %val1, %val2
-+  %ret = select <16 x i1> %cmp, <16 x i8> %val1, <16 x i8> %val2
-+  ret <16 x i8> %ret
-+}
-Index: llvm-36/test/CodeGen/SystemZ/vec-max-02.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-max-02.ll
-@@ -0,0 +1,83 @@
-+; Test v8i16 maximum.
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-+
-+; Test with slt.
-+define <8 x i16> @f1(<8 x i16> %val1, <8 x i16> %val2) {
-+; CHECK-LABEL: f1:
-+; CHECK: vmxh %v24, {{%v24, %v26|%v26, %v24}}
-+; CHECK: br %r14
-+  %cmp = icmp slt <8 x i16> %val1, %val2
-+  %ret = select <8 x i1> %cmp, <8 x i16> %val2, <8 x i16> %val1
-+  ret <8 x i16> %ret
-+}
-+
-+; Test with sle.
-+define <8 x i16> @f2(<8 x i16> %val1, <8 x i16> %val2) {
-+; CHECK-LABEL: f2:
-+; CHECK: vmxh %v24, {{%v24, %v26|%v26, %v24}}
-+; CHECK: br %r14
-+  %cmp = icmp sle <8 x i16> %val1, %val2
-+  %ret = select <8 x i1> %cmp, <8 x i16> %val2, <8 x i16> %val1
-+  ret <8 x i16> %ret
-+}
-+
-+; Test with sgt.
-+define <8 x i16> @f3(<8 x i16> %val1, <8 x i16> %val2) {
-+; CHECK-LABEL: f3:
-+; CHECK: vmxh %v24, {{%v24, %v26|%v26, %v24}}
-+; CHECK: br %r14
-+  %cmp = icmp sgt <8 x i16> %val1, %val2
-+  %ret = select <8 x i1> %cmp, <8 x i16> %val1, <8 x i16> %val2
-+  ret <8 x i16> %ret
-+}
-+
-+; Test with sge.
-+define <8 x i16> @f4(<8 x i16> %val1, <8 x i16> %val2) {
-+; CHECK-LABEL: f4:
-+; CHECK: vmxh %v24, {{%v24, %v26|%v26, %v24}}
-+; CHECK: br %r14
-+  %cmp = icmp sge <8 x i16> %val1, %val2
-+  %ret = select <8 x i1> %cmp, <8 x i16> %val1, <8 x i16> %val2
-+  ret <8 x i16> %ret
-+}
-+
-+; Test with ult.
-+define <8 x i16> @f5(<8 x i16> %val1, <8 x i16> %val2) {
-+; CHECK-LABEL: f5:
-+; CHECK: vmxlh %v24, {{%v24, %v26|%v26, %v24}}
-+; CHECK: br %r14
-+  %cmp = icmp ult <8 x i16> %val1, %val2
-+  %ret = select <8 x i1> %cmp, <8 x i16> %val2, <8 x i16> %val1
-+  ret <8 x i16> %ret
-+}
-+
-+; Test with ule.
-+define <8 x i16> @f6(<8 x i16> %val1, <8 x i16> %val2) {
-+; CHECK-LABEL: f6:
-+; CHECK: vmxlh %v24, {{%v24, %v26|%v26, %v24}}
-+; CHECK: br %r14
-+  %cmp = icmp ule <8 x i16> %val1, %val2
-+  %ret = select <8 x i1> %cmp, <8 x i16> %val2, <8 x i16> %val1
-+  ret <8 x i16> %ret
-+}
-+
-+; Test with ugt.
-+define <8 x i16> @f7(<8 x i16> %val1, <8 x i16> %val2) {
-+; CHECK-LABEL: f7:
-+; CHECK: vmxlh %v24, {{%v24, %v26|%v26, %v24}}
-+; CHECK: br %r14
-+  %cmp = icmp ugt <8 x i16> %val1, %val2
-+  %ret = select <8 x i1> %cmp, <8 x i16> %val1, <8 x i16> %val2
-+  ret <8 x i16> %ret
-+}
-+
-+; Test with uge.
-+define <8 x i16> @f8(<8 x i16> %val1, <8 x i16> %val2) {
-+; CHECK-LABEL: f8:
-+; CHECK: vmxlh %v24, {{%v24, %v26|%v26, %v24}}
-+; CHECK: br %r14
-+  %cmp = icmp uge <8 x i16> %val1, %val2
-+  %ret = select <8 x i1> %cmp, <8 x i16> %val1, <8 x i16> %val2
-+  ret <8 x i16> %ret
-+}
-Index: llvm-36/test/CodeGen/SystemZ/vec-max-03.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-max-03.ll
-@@ -0,0 +1,83 @@
-+; Test v4i32 maximum.
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-+
-+; Test with slt.
-+define <4 x i32> @f1(<4 x i32> %val1, <4 x i32> %val2) {
-+; CHECK-LABEL: f1:
-+; CHECK: vmxf %v24, {{%v24, %v26|%v26, %v24}}
-+; CHECK: br %r14
-+  %cmp = icmp slt <4 x i32> %val1, %val2
-+  %ret = select <4 x i1> %cmp, <4 x i32> %val2, <4 x i32> %val1
-+  ret <4 x i32> %ret
-+}
-+
-+; Test with sle.
-+define <4 x i32> @f2(<4 x i32> %val1, <4 x i32> %val2) {
-+; CHECK-LABEL: f2:
-+; CHECK: vmxf %v24, {{%v24, %v26|%v26, %v24}}
-+; CHECK: br %r14
-+  %cmp = icmp sle <4 x i32> %val1, %val2
-+  %ret = select <4 x i1> %cmp, <4 x i32> %val2, <4 x i32> %val1
-+  ret <4 x i32> %ret
-+}
-+
-+; Test with sgt.
-+define <4 x i32> @f3(<4 x i32> %val1, <4 x i32> %val2) {
-+; CHECK-LABEL: f3:
-+; CHECK: vmxf %v24, {{%v24, %v26|%v26, %v24}}
-+; CHECK: br %r14
-+  %cmp = icmp sgt <4 x i32> %val1, %val2
-+  %ret = select <4 x i1> %cmp, <4 x i32> %val1, <4 x i32> %val2
-+  ret <4 x i32> %ret
-+}
-+
-+; Test with sge.
-+define <4 x i32> @f4(<4 x i32> %val1, <4 x i32> %val2) {
-+; CHECK-LABEL: f4:
-+; CHECK: vmxf %v24, {{%v24, %v26|%v26, %v24}}
-+; CHECK: br %r14
-+  %cmp = icmp sge <4 x i32> %val1, %val2
-+  %ret = select <4 x i1> %cmp, <4 x i32> %val1, <4 x i32> %val2
-+  ret <4 x i32> %ret
-+}
-+
-+; Test with ult.
-+define <4 x i32> @f5(<4 x i32> %val1, <4 x i32> %val2) {
-+; CHECK-LABEL: f5:
-+; CHECK: vmxlf %v24, {{%v24, %v26|%v26, %v24}}
-+; CHECK: br %r14
-+  %cmp = icmp ult <4 x i32> %val1, %val2
-+  %ret = select <4 x i1> %cmp, <4 x i32> %val2, <4 x i32> %val1
-+  ret <4 x i32> %ret
-+}
-+
-+; Test with ule.
-+define <4 x i32> @f6(<4 x i32> %val1, <4 x i32> %val2) {
-+; CHECK-LABEL: f6:
-+; CHECK: vmxlf %v24, {{%v24, %v26|%v26, %v24}}
-+; CHECK: br %r14
-+  %cmp = icmp ule <4 x i32> %val1, %val2
-+  %ret = select <4 x i1> %cmp, <4 x i32> %val2, <4 x i32> %val1
-+  ret <4 x i32> %ret
-+}
-+
-+; Test with ugt.
-+define <4 x i32> @f7(<4 x i32> %val1, <4 x i32> %val2) {
-+; CHECK-LABEL: f7:
-+; CHECK: vmxlf %v24, {{%v24, %v26|%v26, %v24}}
-+; CHECK: br %r14
-+  %cmp = icmp ugt <4 x i32> %val1, %val2
-+  %ret = select <4 x i1> %cmp, <4 x i32> %val1, <4 x i32> %val2
-+  ret <4 x i32> %ret
-+}
-+
-+; Test with uge.
-+define <4 x i32> @f8(<4 x i32> %val1, <4 x i32> %val2) {
-+; CHECK-LABEL: f8:
-+; CHECK: vmxlf %v24, {{%v24, %v26|%v26, %v24}}
-+; CHECK: br %r14
-+  %cmp = icmp uge <4 x i32> %val1, %val2
-+  %ret = select <4 x i1> %cmp, <4 x i32> %val1, <4 x i32> %val2
-+  ret <4 x i32> %ret
-+}
-Index: llvm-36/test/CodeGen/SystemZ/vec-max-04.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-max-04.ll
-@@ -0,0 +1,83 @@
-+; Test v2i64 maximum.
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-+
-+; Test with slt.
-+define <2 x i64> @f1(<2 x i64> %val1, <2 x i64> %val2) {
-+; CHECK-LABEL: f1:
-+; CHECK: vmxg %v24, {{%v24, %v26|%v26, %v24}}
-+; CHECK: br %r14
-+  %cmp = icmp slt <2 x i64> %val1, %val2
-+  %ret = select <2 x i1> %cmp, <2 x i64> %val2, <2 x i64> %val1
-+  ret <2 x i64> %ret
-+}
-+
-+; Test with sle.
-+define <2 x i64> @f2(<2 x i64> %val1, <2 x i64> %val2) {
-+; CHECK-LABEL: f2:
-+; CHECK: vmxg %v24, {{%v24, %v26|%v26, %v24}}
-+; CHECK: br %r14
-+  %cmp = icmp sle <2 x i64> %val1, %val2
-+  %ret = select <2 x i1> %cmp, <2 x i64> %val2, <2 x i64> %val1
-+  ret <2 x i64> %ret
-+}
-+
-+; Test with sgt.
-+define <2 x i64> @f3(<2 x i64> %val1, <2 x i64> %val2) {
-+; CHECK-LABEL: f3:
-+; CHECK: vmxg %v24, {{%v24, %v26|%v26, %v24}}
-+; CHECK: br %r14
-+  %cmp = icmp sgt <2 x i64> %val1, %val2
-+  %ret = select <2 x i1> %cmp, <2 x i64> %val1, <2 x i64> %val2
-+  ret <2 x i64> %ret
-+}
-+
-+; Test with sge.
-+define <2 x i64> @f4(<2 x i64> %val1, <2 x i64> %val2) {
-+; CHECK-LABEL: f4:
-+; CHECK: vmxg %v24, {{%v24, %v26|%v26, %v24}}
-+; CHECK: br %r14
-+  %cmp = icmp sge <2 x i64> %val1, %val2
-+  %ret = select <2 x i1> %cmp, <2 x i64> %val1, <2 x i64> %val2
-+  ret <2 x i64> %ret
-+}
-+
-+; Test with ult.
-+define <2 x i64> @f5(<2 x i64> %val1, <2 x i64> %val2) {
-+; CHECK-LABEL: f5:
-+; CHECK: vmxlg %v24, {{%v24, %v26|%v26, %v24}}
-+; CHECK: br %r14
-+  %cmp = icmp ult <2 x i64> %val1, %val2
-+  %ret = select <2 x i1> %cmp, <2 x i64> %val2, <2 x i64> %val1
-+  ret <2 x i64> %ret
-+}
-+
-+; Test with ule.
-+define <2 x i64> @f6(<2 x i64> %val1, <2 x i64> %val2) {
-+; CHECK-LABEL: f6:
-+; CHECK: vmxlg %v24, {{%v24, %v26|%v26, %v24}}
-+; CHECK: br %r14
-+  %cmp = icmp ule <2 x i64> %val1, %val2
-+  %ret = select <2 x i1> %cmp, <2 x i64> %val2, <2 x i64> %val1
-+  ret <2 x i64> %ret
-+}
-+
-+; Test with ugt.
-+define <2 x i64> @f7(<2 x i64> %val1, <2 x i64> %val2) {
-+; CHECK-LABEL: f7:
-+; CHECK: vmxlg %v24, {{%v24, %v26|%v26, %v24}}
-+; CHECK: br %r14
-+  %cmp = icmp ugt <2 x i64> %val1, %val2
-+  %ret = select <2 x i1> %cmp, <2 x i64> %val1, <2 x i64> %val2
-+  ret <2 x i64> %ret
-+}
-+
-+; Test with uge.
-+define <2 x i64> @f8(<2 x i64> %val1, <2 x i64> %val2) {
-+; CHECK-LABEL: f8:
-+; CHECK: vmxlg %v24, {{%v24, %v26|%v26, %v24}}
-+; CHECK: br %r14
-+  %cmp = icmp uge <2 x i64> %val1, %val2
-+  %ret = select <2 x i1> %cmp, <2 x i64> %val1, <2 x i64> %val2
-+  ret <2 x i64> %ret
-+}
-Index: llvm-36/test/CodeGen/SystemZ/vec-min-01.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-min-01.ll
-@@ -0,0 +1,83 @@
-+; Test v16i8 minimum.
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-+
-+; Test with slt.
-+define <16 x i8> @f1(<16 x i8> %val1, <16 x i8> %val2) {
-+; CHECK-LABEL: f1:
-+; CHECK: vmnb %v24, {{%v24, %v26|%v26, %v24}}
-+; CHECK: br %r14
-+  %cmp = icmp slt <16 x i8> %val2, %val1
-+  %ret = select <16 x i1> %cmp, <16 x i8> %val2, <16 x i8> %val1
-+  ret <16 x i8> %ret
-+}
-+
-+; Test with sle.
-+define <16 x i8> @f2(<16 x i8> %val1, <16 x i8> %val2) {
-+; CHECK-LABEL: f2:
-+; CHECK: vmnb %v24, {{%v24, %v26|%v26, %v24}}
-+; CHECK: br %r14
-+  %cmp = icmp sle <16 x i8> %val2, %val1
-+  %ret = select <16 x i1> %cmp, <16 x i8> %val2, <16 x i8> %val1
-+  ret <16 x i8> %ret
-+}
-+
-+; Test with sgt.
-+define <16 x i8> @f3(<16 x i8> %val1, <16 x i8> %val2) {
-+; CHECK-LABEL: f3:
-+; CHECK: vmnb %v24, {{%v24, %v26|%v26, %v24}}
-+; CHECK: br %r14
-+  %cmp = icmp sgt <16 x i8> %val2, %val1
-+  %ret = select <16 x i1> %cmp, <16 x i8> %val1, <16 x i8> %val2
-+  ret <16 x i8> %ret
-+}
-+
-+; Test with sge.
-+define <16 x i8> @f4(<16 x i8> %val1, <16 x i8> %val2) {
-+; CHECK-LABEL: f4:
-+; CHECK: vmnb %v24, {{%v24, %v26|%v26, %v24}}
-+; CHECK: br %r14
-+  %cmp = icmp sge <16 x i8> %val2, %val1
-+  %ret = select <16 x i1> %cmp, <16 x i8> %val1, <16 x i8> %val2
-+  ret <16 x i8> %ret
-+}
-+
-+; Test with ult.
-+define <16 x i8> @f5(<16 x i8> %val1, <16 x i8> %val2) {
-+; CHECK-LABEL: f5:
-+; CHECK: vmnlb %v24, {{%v24, %v26|%v26, %v24}}
-+; CHECK: br %r14
-+  %cmp = icmp ult <16 x i8> %val2, %val1
-+  %ret = select <16 x i1> %cmp, <16 x i8> %val2, <16 x i8> %val1
-+  ret <16 x i8> %ret
-+}
-+
-+; Test with ule.
-+define <16 x i8> @f6(<16 x i8> %val1, <16 x i8> %val2) {
-+; CHECK-LABEL: f6:
-+; CHECK: vmnlb %v24, {{%v24, %v26|%v26, %v24}}
-+; CHECK: br %r14
-+  %cmp = icmp ule <16 x i8> %val2, %val1
-+  %ret = select <16 x i1> %cmp, <16 x i8> %val2, <16 x i8> %val1
-+  ret <16 x i8> %ret
-+}
-+
-+; Test with ugt.
-+define <16 x i8> @f7(<16 x i8> %val1, <16 x i8> %val2) {
-+; CHECK-LABEL: f7:
-+; CHECK: vmnlb %v24, {{%v24, %v26|%v26, %v24}}
-+; CHECK: br %r14
-+  %cmp = icmp ugt <16 x i8> %val2, %val1
-+  %ret = select <16 x i1> %cmp, <16 x i8> %val1, <16 x i8> %val2
-+  ret <16 x i8> %ret
-+}
-+
-+; Test with uge.
-+define <16 x i8> @f8(<16 x i8> %val1, <16 x i8> %val2) {
-+; CHECK-LABEL: f8:
-+; CHECK: vmnlb %v24, {{%v24, %v26|%v26, %v24}}
-+; CHECK: br %r14
-+  %cmp = icmp uge <16 x i8> %val2, %val1
-+  %ret = select <16 x i1> %cmp, <16 x i8> %val1, <16 x i8> %val2
-+  ret <16 x i8> %ret
-+}
-Index: llvm-36/test/CodeGen/SystemZ/vec-min-02.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-min-02.ll
-@@ -0,0 +1,83 @@
-+; Test v8i16 minimum.
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-+
-+; Test with slt.
-+define <8 x i16> @f1(<8 x i16> %val1, <8 x i16> %val2) {
-+; CHECK-LABEL: f1:
-+; CHECK: vmnh %v24, {{%v24, %v26|%v26, %v24}}
-+; CHECK: br %r14
-+  %cmp = icmp slt <8 x i16> %val2, %val1
-+  %ret = select <8 x i1> %cmp, <8 x i16> %val2, <8 x i16> %val1
-+  ret <8 x i16> %ret
-+}
-+
-+; Test with sle.
-+define <8 x i16> @f2(<8 x i16> %val1, <8 x i16> %val2) {
-+; CHECK-LABEL: f2:
-+; CHECK: vmnh %v24, {{%v24, %v26|%v26, %v24}}
-+; CHECK: br %r14
-+  %cmp = icmp sle <8 x i16> %val2, %val1
-+  %ret = select <8 x i1> %cmp, <8 x i16> %val2, <8 x i16> %val1
-+  ret <8 x i16> %ret
-+}
-+
-+; Test with sgt.
-+define <8 x i16> @f3(<8 x i16> %val1, <8 x i16> %val2) {
-+; CHECK-LABEL: f3:
-+; CHECK: vmnh %v24, {{%v24, %v26|%v26, %v24}}
-+; CHECK: br %r14
-+  %cmp = icmp sgt <8 x i16> %val2, %val1
-+  %ret = select <8 x i1> %cmp, <8 x i16> %val1, <8 x i16> %val2
-+  ret <8 x i16> %ret
-+}
-+
-+; Test with sge.
-+define <8 x i16> @f4(<8 x i16> %val1, <8 x i16> %val2) {
-+; CHECK-LABEL: f4:
-+; CHECK: vmnh %v24, {{%v24, %v26|%v26, %v24}}
-+; CHECK: br %r14
-+  %cmp = icmp sge <8 x i16> %val2, %val1
-+  %ret = select <8 x i1> %cmp, <8 x i16> %val1, <8 x i16> %val2
-+  ret <8 x i16> %ret
-+}
-+
-+; Test with ult.
-+define <8 x i16> @f5(<8 x i16> %val1, <8 x i16> %val2) {
-+; CHECK-LABEL: f5:
-+; CHECK: vmnlh %v24, {{%v24, %v26|%v26, %v24}}
-+; CHECK: br %r14
-+  %cmp = icmp ult <8 x i16> %val2, %val1
-+  %ret = select <8 x i1> %cmp, <8 x i16> %val2, <8 x i16> %val1
-+  ret <8 x i16> %ret
-+}
-+
-+; Test with ule.
-+define <8 x i16> @f6(<8 x i16> %val1, <8 x i16> %val2) {
-+; CHECK-LABEL: f6:
-+; CHECK: vmnlh %v24, {{%v24, %v26|%v26, %v24}}
-+; CHECK: br %r14
-+  %cmp = icmp ule <8 x i16> %val2, %val1
-+  %ret = select <8 x i1> %cmp, <8 x i16> %val2, <8 x i16> %val1
-+  ret <8 x i16> %ret
-+}
-+
-+; Test with ugt.
-+define <8 x i16> @f7(<8 x i16> %val1, <8 x i16> %val2) {
-+; CHECK-LABEL: f7:
-+; CHECK: vmnlh %v24, {{%v24, %v26|%v26, %v24}}
-+; CHECK: br %r14
-+  %cmp = icmp ugt <8 x i16> %val2, %val1
-+  %ret = select <8 x i1> %cmp, <8 x i16> %val1, <8 x i16> %val2
-+  ret <8 x i16> %ret
-+}
-+
-+; Test with uge.
-+define <8 x i16> @f8(<8 x i16> %val1, <8 x i16> %val2) {
-+; CHECK-LABEL: f8:
-+; CHECK: vmnlh %v24, {{%v24, %v26|%v26, %v24}}
-+; CHECK: br %r14
-+  %cmp = icmp uge <8 x i16> %val2, %val1
-+  %ret = select <8 x i1> %cmp, <8 x i16> %val1, <8 x i16> %val2
-+  ret <8 x i16> %ret
-+}
-Index: llvm-36/test/CodeGen/SystemZ/vec-min-03.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-min-03.ll
-@@ -0,0 +1,83 @@
-+; Test v4i32 minimum.
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-+
-+; Test with slt.
-+define <4 x i32> @f1(<4 x i32> %val1, <4 x i32> %val2) {
-+; CHECK-LABEL: f1:
-+; CHECK: vmnf %v24, {{%v24, %v26|%v26, %v24}}
-+; CHECK: br %r14
-+  %cmp = icmp slt <4 x i32> %val2, %val1
-+  %ret = select <4 x i1> %cmp, <4 x i32> %val2, <4 x i32> %val1
-+  ret <4 x i32> %ret
-+}
-+
-+; Test with sle.
-+define <4 x i32> @f2(<4 x i32> %val1, <4 x i32> %val2) {
-+; CHECK-LABEL: f2:
-+; CHECK: vmnf %v24, {{%v24, %v26|%v26, %v24}}
-+; CHECK: br %r14
-+  %cmp = icmp sle <4 x i32> %val2, %val1
-+  %ret = select <4 x i1> %cmp, <4 x i32> %val2, <4 x i32> %val1
-+  ret <4 x i32> %ret
-+}
-+
-+; Test with sgt.
-+define <4 x i32> @f3(<4 x i32> %val1, <4 x i32> %val2) {
-+; CHECK-LABEL: f3:
-+; CHECK: vmnf %v24, {{%v24, %v26|%v26, %v24}}
-+; CHECK: br %r14
-+  %cmp = icmp sgt <4 x i32> %val2, %val1
-+  %ret = select <4 x i1> %cmp, <4 x i32> %val1, <4 x i32> %val2
-+  ret <4 x i32> %ret
-+}
-+
-+; Test with sge.
-+define <4 x i32> @f4(<4 x i32> %val1, <4 x i32> %val2) {
-+; CHECK-LABEL: f4:
-+; CHECK: vmnf %v24, {{%v24, %v26|%v26, %v24}}
-+; CHECK: br %r14
-+  %cmp = icmp sge <4 x i32> %val2, %val1
-+  %ret = select <4 x i1> %cmp, <4 x i32> %val1, <4 x i32> %val2
-+  ret <4 x i32> %ret
-+}
-+
-+; Test with ult.
-+define <4 x i32> @f5(<4 x i32> %val1, <4 x i32> %val2) {
-+; CHECK-LABEL: f5:
-+; CHECK: vmnlf %v24, {{%v24, %v26|%v26, %v24}}
-+; CHECK: br %r14
-+  %cmp = icmp ult <4 x i32> %val2, %val1
-+  %ret = select <4 x i1> %cmp, <4 x i32> %val2, <4 x i32> %val1
-+  ret <4 x i32> %ret
-+}
-+
-+; Test with ule.
-+define <4 x i32> @f6(<4 x i32> %val1, <4 x i32> %val2) {
-+; CHECK-LABEL: f6:
-+; CHECK: vmnlf %v24, {{%v24, %v26|%v26, %v24}}
-+; CHECK: br %r14
-+  %cmp = icmp ule <4 x i32> %val2, %val1
-+  %ret = select <4 x i1> %cmp, <4 x i32> %val2, <4 x i32> %val1
-+  ret <4 x i32> %ret
-+}
-+
-+; Test with ugt.
-+define <4 x i32> @f7(<4 x i32> %val1, <4 x i32> %val2) {
-+; CHECK-LABEL: f7:
-+; CHECK: vmnlf %v24, {{%v24, %v26|%v26, %v24}}
-+; CHECK: br %r14
-+  %cmp = icmp ugt <4 x i32> %val2, %val1
-+  %ret = select <4 x i1> %cmp, <4 x i32> %val1, <4 x i32> %val2
-+  ret <4 x i32> %ret
-+}
-+
-+; Test with uge.
-+define <4 x i32> @f8(<4 x i32> %val1, <4 x i32> %val2) {
-+; CHECK-LABEL: f8:
-+; CHECK: vmnlf %v24, {{%v24, %v26|%v26, %v24}}
-+; CHECK: br %r14
-+  %cmp = icmp uge <4 x i32> %val2, %val1
-+  %ret = select <4 x i1> %cmp, <4 x i32> %val1, <4 x i32> %val2
-+  ret <4 x i32> %ret
-+}
-Index: llvm-36/test/CodeGen/SystemZ/vec-min-04.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-min-04.ll
-@@ -0,0 +1,83 @@
-+; Test v2i64 minimum.
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-+
-+; Test with slt.
-+define <2 x i64> @f1(<2 x i64> %val1, <2 x i64> %val2) {
-+; CHECK-LABEL: f1:
-+; CHECK: vmng %v24, {{%v24, %v26|%v26, %v24}}
-+; CHECK: br %r14
-+  %cmp = icmp slt <2 x i64> %val2, %val1
-+  %ret = select <2 x i1> %cmp, <2 x i64> %val2, <2 x i64> %val1
-+  ret <2 x i64> %ret
-+}
-+
-+; Test with sle.
-+define <2 x i64> @f2(<2 x i64> %val1, <2 x i64> %val2) {
-+; CHECK-LABEL: f2:
-+; CHECK: vmng %v24, {{%v24, %v26|%v26, %v24}}
-+; CHECK: br %r14
-+  %cmp = icmp sle <2 x i64> %val2, %val1
-+  %ret = select <2 x i1> %cmp, <2 x i64> %val2, <2 x i64> %val1
-+  ret <2 x i64> %ret
-+}
-+
-+; Test with sgt.
-+define <2 x i64> @f3(<2 x i64> %val1, <2 x i64> %val2) {
-+; CHECK-LABEL: f3:
-+; CHECK: vmng %v24, {{%v24, %v26|%v26, %v24}}
-+; CHECK: br %r14
-+  %cmp = icmp sgt <2 x i64> %val2, %val1
-+  %ret = select <2 x i1> %cmp, <2 x i64> %val1, <2 x i64> %val2
-+  ret <2 x i64> %ret
-+}
-+
-+; Test with sge.
-+define <2 x i64> @f4(<2 x i64> %val1, <2 x i64> %val2) {
-+; CHECK-LABEL: f4:
-+; CHECK: vmng %v24, {{%v24, %v26|%v26, %v24}}
-+; CHECK: br %r14
-+  %cmp = icmp sge <2 x i64> %val2, %val1
-+  %ret = select <2 x i1> %cmp, <2 x i64> %val1, <2 x i64> %val2
-+  ret <2 x i64> %ret
-+}
-+
-+; Test with ult.
-+define <2 x i64> @f5(<2 x i64> %val1, <2 x i64> %val2) {
-+; CHECK-LABEL: f5:
-+; CHECK: vmnlg %v24, {{%v24, %v26|%v26, %v24}}
-+; CHECK: br %r14
-+  %cmp = icmp ult <2 x i64> %val2, %val1
-+  %ret = select <2 x i1> %cmp, <2 x i64> %val2, <2 x i64> %val1
-+  ret <2 x i64> %ret
-+}
-+
-+; Test with ule.
-+define <2 x i64> @f6(<2 x i64> %val1, <2 x i64> %val2) {
-+; CHECK-LABEL: f6:
-+; CHECK: vmnlg %v24, {{%v24, %v26|%v26, %v24}}
-+; CHECK: br %r14
-+  %cmp = icmp ule <2 x i64> %val2, %val1
-+  %ret = select <2 x i1> %cmp, <2 x i64> %val2, <2 x i64> %val1
-+  ret <2 x i64> %ret
-+}
-+
-+; Test with ugt.
-+define <2 x i64> @f7(<2 x i64> %val1, <2 x i64> %val2) {
-+; CHECK-LABEL: f7:
-+; CHECK: vmnlg %v24, {{%v24, %v26|%v26, %v24}}
-+; CHECK: br %r14
-+  %cmp = icmp ugt <2 x i64> %val2, %val1
-+  %ret = select <2 x i1> %cmp, <2 x i64> %val1, <2 x i64> %val2
-+  ret <2 x i64> %ret
-+}
-+
-+; Test with uge.
-+define <2 x i64> @f8(<2 x i64> %val1, <2 x i64> %val2) {
-+; CHECK-LABEL: f8:
-+; CHECK: vmnlg %v24, {{%v24, %v26|%v26, %v24}}
-+; CHECK: br %r14
-+  %cmp = icmp uge <2 x i64> %val2, %val1
-+  %ret = select <2 x i1> %cmp, <2 x i64> %val1, <2 x i64> %val2
-+  ret <2 x i64> %ret
-+}
-Index: llvm-36/test/CodeGen/SystemZ/vec-move-01.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-move-01.ll
-@@ -0,0 +1,107 @@
-+; Test vector register moves.
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-+
-+; Test v16i8 moves.
-+define <16 x i8> @f1(<16 x i8> %val1, <16 x i8> %val2) {
-+; CHECK-LABEL: f1:
-+; CHECK: vlr %v24, %v26
-+; CHECK: br %r14
-+  ret <16 x i8> %val2
-+}
-+
-+; Test v8i16 moves.
-+define <8 x i16> @f2(<8 x i16> %val1, <8 x i16> %val2) {
-+; CHECK-LABEL: f2:
-+; CHECK: vlr %v24, %v26
-+; CHECK: br %r14
-+  ret <8 x i16> %val2
-+}
-+
-+; Test v4i32 moves.
-+define <4 x i32> @f3(<4 x i32> %val1, <4 x i32> %val2) {
-+; CHECK-LABEL: f3:
-+; CHECK: vlr %v24, %v26
-+; CHECK: br %r14
-+  ret <4 x i32> %val2
-+}
-+
-+; Test v2i64 moves.
-+define <2 x i64> @f4(<2 x i64> %val1, <2 x i64> %val2) {
-+; CHECK-LABEL: f4:
-+; CHECK: vlr %v24, %v26
-+; CHECK: br %r14
-+  ret <2 x i64> %val2
-+}
-+
-+; Test v4f32 moves.
-+define <4 x float> @f5(<4 x float> %val1, <4 x float> %val2) {
-+; CHECK-LABEL: f5:
-+; CHECK: vlr %v24, %v26
-+; CHECK: br %r14
-+  ret <4 x float> %val2
-+}
-+
-+; Test v2f64 moves.
-+define <2 x double> @f6(<2 x double> %val1, <2 x double> %val2) {
-+; CHECK-LABEL: f6:
-+; CHECK: vlr %v24, %v26
-+; CHECK: br %r14
-+  ret <2 x double> %val2
-+}
-+
-+; Test v2i8 moves.
-+define <2 x i8> @f7(<2 x i8> %val1, <2 x i8> %val2) {
-+; CHECK-LABEL: f7:
-+; CHECK: vlr %v24, %v26
-+; CHECK: br %r14
-+  ret <2 x i8> %val2
-+}
-+
-+; Test v4i8 moves.
-+define <4 x i8> @f8(<4 x i8> %val1, <4 x i8> %val2) {
-+; CHECK-LABEL: f8:
-+; CHECK: vlr %v24, %v26
-+; CHECK: br %r14
-+  ret <4 x i8> %val2
-+}
-+
-+; Test v8i8 moves.
-+define <8 x i8> @f9(<8 x i8> %val1, <8 x i8> %val2) {
-+; CHECK-LABEL: f9:
-+; CHECK: vlr %v24, %v26
-+; CHECK: br %r14
-+  ret <8 x i8> %val2
-+}
-+
-+; Test v2i16 moves.
-+define <2 x i16> @f10(<2 x i16> %val1, <2 x i16> %val2) {
-+; CHECK-LABEL: f10:
-+; CHECK: vlr %v24, %v26
-+; CHECK: br %r14
-+  ret <2 x i16> %val2
-+}
-+
-+; Test v4i16 moves.
-+define <4 x i16> @f11(<4 x i16> %val1, <4 x i16> %val2) {
-+; CHECK-LABEL: f11:
-+; CHECK: vlr %v24, %v26
-+; CHECK: br %r14
-+  ret <4 x i16> %val2
-+}
-+
-+; Test v2i32 moves.
-+define <2 x i32> @f12(<2 x i32> %val1, <2 x i32> %val2) {
-+; CHECK-LABEL: f12:
-+; CHECK: vlr %v24, %v26
-+; CHECK: br %r14
-+  ret <2 x i32> %val2
-+}
-+
-+; Test v2f32 moves.
-+define <2 x float> @f13(<2 x float> %val1, <2 x float> %val2) {
-+; CHECK-LABEL: f13:
-+; CHECK: vlr %v24, %v26
-+; CHECK: br %r14
-+  ret <2 x float> %val2
-+}
-Index: llvm-36/test/CodeGen/SystemZ/vec-move-02.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-move-02.ll
-@@ -0,0 +1,174 @@
-+; Test vector loads.
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-+
-+; Test v16i8 loads.
-+define <16 x i8> @f1(<16 x i8> *%ptr) {
-+; CHECK-LABEL: f1:
-+; CHECK: vl %v24, 0(%r2)
-+; CHECK: br %r14
-+  %ret = load <16 x i8> *%ptr
-+  ret <16 x i8> %ret
-+}
-+
-+; Test v8i16 loads.
-+define <8 x i16> @f2(<8 x i16> *%ptr) {
-+; CHECK-LABEL: f2:
-+; CHECK: vl %v24, 0(%r2)
-+; CHECK: br %r14
-+  %ret = load <8 x i16> *%ptr
-+  ret <8 x i16> %ret
-+}
-+
-+; Test v4i32 loads.
-+define <4 x i32> @f3(<4 x i32> *%ptr) {
-+; CHECK-LABEL: f3:
-+; CHECK: vl %v24, 0(%r2)
-+; CHECK: br %r14
-+  %ret = load <4 x i32> *%ptr
-+  ret <4 x i32> %ret
-+}
-+
-+; Test v2i64 loads.
-+define <2 x i64> @f4(<2 x i64> *%ptr) {
-+; CHECK-LABEL: f4:
-+; CHECK: vl %v24, 0(%r2)
-+; CHECK: br %r14
-+  %ret = load <2 x i64> *%ptr
-+  ret <2 x i64> %ret
-+}
-+
-+; Test v4f32 loads.
-+define <4 x float> @f5(<4 x float> *%ptr) {
-+; CHECK-LABEL: f5:
-+; CHECK: vl %v24, 0(%r2)
-+; CHECK: br %r14
-+  %ret = load <4 x float> *%ptr
-+  ret <4 x float> %ret
-+}
-+
-+; Test v2f64 loads.
-+define <2 x double> @f6(<2 x double> *%ptr) {
-+; CHECK-LABEL: f6:
-+; CHECK: vl %v24, 0(%r2)
-+; CHECK: br %r14
-+  %ret = load <2 x double> *%ptr
-+  ret <2 x double> %ret
-+}
-+
-+; Test the highest aligned in-range offset.
-+define <16 x i8> @f7(<16 x i8> *%base) {
-+; CHECK-LABEL: f7:
-+; CHECK: vl %v24, 4080(%r2)
-+; CHECK: br %r14
-+  %ptr = getelementptr <16 x i8> *%base, i64 255
-+  %ret = load <16 x i8> *%ptr
-+  ret <16 x i8> %ret
-+}
-+
-+; Test the highest unaligned in-range offset.
-+define <16 x i8> @f8(i8 *%base) {
-+; CHECK-LABEL: f8:
-+; CHECK: vl %v24, 4095(%r2)
-+; CHECK: br %r14
-+  %addr = getelementptr i8 *%base, i64 4095
-+  %ptr = bitcast i8 *%addr to <16 x i8> *
-+  %ret = load <16 x i8> *%ptr, align 1
-+  ret <16 x i8> %ret
-+}
-+
-+; Test the next offset up, which requires separate address logic,
-+define <16 x i8> @f9(<16 x i8> *%base) {
-+; CHECK-LABEL: f9:
-+; CHECK: aghi %r2, 4096
-+; CHECK: vl %v24, 0(%r2)
-+; CHECK: br %r14
-+  %ptr = getelementptr <16 x i8> *%base, i64 256
-+  %ret = load <16 x i8> *%ptr
-+  ret <16 x i8> %ret
-+}
-+
-+; Test negative offsets, which also require separate address logic,
-+define <16 x i8> @f10(<16 x i8> *%base) {
-+; CHECK-LABEL: f10:
-+; CHECK: aghi %r2, -16
-+; CHECK: vl %v24, 0(%r2)
-+; CHECK: br %r14
-+  %ptr = getelementptr <16 x i8> *%base, i64 -1
-+  %ret = load <16 x i8> *%ptr
-+  ret <16 x i8> %ret
-+}
-+
-+; Check that indexes are allowed.
-+define <16 x i8> @f11(i8 *%base, i64 %index) {
-+; CHECK-LABEL: f11:
-+; CHECK: vl %v24, 0(%r3,%r2)
-+; CHECK: br %r14
-+  %addr = getelementptr i8 *%base, i64 %index
-+  %ptr = bitcast i8 *%addr to <16 x i8> *
-+  %ret = load <16 x i8> *%ptr, align 1
-+  ret <16 x i8> %ret
-+}
-+
-+; Test v2i8 loads.
-+define <2 x i8> @f12(<2 x i8> *%ptr) {
-+; CHECK-LABEL: f12:
-+; CHECK: vlreph %v24, 0(%r2)
-+; CHECK: br %r14
-+  %ret = load <2 x i8> *%ptr
-+  ret <2 x i8> %ret
-+}
-+
-+; Test v4i8 loads.
-+define <4 x i8> @f13(<4 x i8> *%ptr) {
-+; CHECK-LABEL: f13:
-+; CHECK: vlrepf %v24, 0(%r2)
-+; CHECK: br %r14
-+  %ret = load <4 x i8> *%ptr
-+  ret <4 x i8> %ret
-+}
-+
-+; Test v8i8 loads.
-+define <8 x i8> @f14(<8 x i8> *%ptr) {
-+; CHECK-LABEL: f14:
-+; CHECK: vlrepg %v24, 0(%r2)
-+; CHECK: br %r14
-+  %ret = load <8 x i8> *%ptr
-+  ret <8 x i8> %ret
-+}
-+
-+; Test v2i16 loads.
-+define <2 x i16> @f15(<2 x i16> *%ptr) {
-+; CHECK-LABEL: f15:
-+; CHECK: vlrepf %v24, 0(%r2)
-+; CHECK: br %r14
-+  %ret = load <2 x i16> *%ptr
-+  ret <2 x i16> %ret
-+}
-+
-+; Test v4i16 loads.
-+define <4 x i16> @f16(<4 x i16> *%ptr) {
-+; CHECK-LABEL: f16:
-+; CHECK: vlrepg %v24, 0(%r2)
-+; CHECK: br %r14
-+  %ret = load <4 x i16> *%ptr
-+  ret <4 x i16> %ret
-+}
-+
-+; Test v2i32 loads.
-+define <2 x i32> @f17(<2 x i32> *%ptr) {
-+; CHECK-LABEL: f17:
-+; CHECK: vlrepg %v24, 0(%r2)
-+; CHECK: br %r14
-+  %ret = load <2 x i32> *%ptr
-+  ret <2 x i32> %ret
-+}
-+
-+; Test v2f32 loads.
-+define <2 x float> @f18(<2 x float> *%ptr) {
-+; CHECK-LABEL: f18:
-+; CHECK: vlrepg %v24, 0(%r2)
-+; CHECK: br %r14
-+  %ret = load <2 x float> *%ptr
-+  ret <2 x float> %ret
-+}
-Index: llvm-36/test/CodeGen/SystemZ/vec-move-03.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-move-03.ll
-@@ -0,0 +1,174 @@
-+; Test vector stores.
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-+
-+; Test v16i8 stores.
-+define void @f1(<16 x i8> %val, <16 x i8> *%ptr) {
-+; CHECK-LABEL: f1:
-+; CHECK: vst %v24, 0(%r2)
-+; CHECK: br %r14
-+  store <16 x i8> %val, <16 x i8> *%ptr
-+  ret void
-+}
-+
-+; Test v8i16 stores.
-+define void @f2(<8 x i16> %val, <8 x i16> *%ptr) {
-+; CHECK-LABEL: f2:
-+; CHECK: vst %v24, 0(%r2)
-+; CHECK: br %r14
-+  store <8 x i16> %val, <8 x i16> *%ptr
-+  ret void
-+}
-+
-+; Test v4i32 stores.
-+define void @f3(<4 x i32> %val, <4 x i32> *%ptr) {
-+; CHECK-LABEL: f3:
-+; CHECK: vst %v24, 0(%r2)
-+; CHECK: br %r14
-+  store <4 x i32> %val, <4 x i32> *%ptr
-+  ret void
-+}
-+
-+; Test v2i64 stores.
-+define void @f4(<2 x i64> %val, <2 x i64> *%ptr) {
-+; CHECK-LABEL: f4:
-+; CHECK: vst %v24, 0(%r2)
-+; CHECK: br %r14
-+  store <2 x i64> %val, <2 x i64> *%ptr
-+  ret void
-+}
-+
-+; Test v4f32 stores.
-+define void @f5(<4 x float> %val, <4 x float> *%ptr) {
-+; CHECK-LABEL: f5:
-+; CHECK: vst %v24, 0(%r2)
-+; CHECK: br %r14
-+  store <4 x float> %val, <4 x float> *%ptr
-+  ret void
-+}
-+
-+; Test v2f64 stores.
-+define void @f6(<2 x double> %val, <2 x double> *%ptr) {
-+; CHECK-LABEL: f6:
-+; CHECK: vst %v24, 0(%r2)
-+; CHECK: br %r14
-+  store <2 x double> %val, <2 x double> *%ptr
-+  ret void
-+}
-+
-+; Test the highest aligned in-range offset.
-+define void @f7(<16 x i8> %val, <16 x i8> *%base) {
-+; CHECK-LABEL: f7:
-+; CHECK: vst %v24, 4080(%r2)
-+; CHECK: br %r14
-+  %ptr = getelementptr <16 x i8> *%base, i64 255
-+  store <16 x i8> %val, <16 x i8> *%ptr
-+  ret void
-+}
-+
-+; Test the highest unaligned in-range offset.
-+define void @f8(<16 x i8> %val, i8 *%base) {
-+; CHECK-LABEL: f8:
-+; CHECK: vst %v24, 4095(%r2)
-+; CHECK: br %r14
-+  %addr = getelementptr i8 *%base, i64 4095
-+  %ptr = bitcast i8 *%addr to <16 x i8> *
-+  store <16 x i8> %val, <16 x i8> *%ptr, align 1
-+  ret void
-+}
-+
-+; Test the next offset up, which requires separate address logic,
-+define void @f9(<16 x i8> %val, <16 x i8> *%base) {
-+; CHECK-LABEL: f9:
-+; CHECK: aghi %r2, 4096
-+; CHECK: vst %v24, 0(%r2)
-+; CHECK: br %r14
-+  %ptr = getelementptr <16 x i8> *%base, i64 256
-+  store <16 x i8> %val, <16 x i8> *%ptr
-+  ret void
-+}
-+
-+; Test negative offsets, which also require separate address logic,
-+define void @f10(<16 x i8> %val, <16 x i8> *%base) {
-+; CHECK-LABEL: f10:
-+; CHECK: aghi %r2, -16
-+; CHECK: vst %v24, 0(%r2)
-+; CHECK: br %r14
-+  %ptr = getelementptr <16 x i8> *%base, i64 -1
-+  store <16 x i8> %val, <16 x i8> *%ptr
-+  ret void
-+}
-+
-+; Check that indexes are allowed.
-+define void @f11(<16 x i8> %val, i8 *%base, i64 %index) {
-+; CHECK-LABEL: f11:
-+; CHECK: vst %v24, 0(%r3,%r2)
-+; CHECK: br %r14
-+  %addr = getelementptr i8 *%base, i64 %index
-+  %ptr = bitcast i8 *%addr to <16 x i8> *
-+  store <16 x i8> %val, <16 x i8> *%ptr, align 1
-+  ret void
-+}
-+
-+; Test v2i8 stores.
-+define void @f12(<2 x i8> %val, <2 x i8> *%ptr) {
-+; CHECK-LABEL: f12:
-+; CHECK: vsteh %v24, 0(%r2), 0
-+; CHECK: br %r14
-+  store <2 x i8> %val, <2 x i8> *%ptr
-+  ret void
-+}
-+
-+; Test v4i8 stores.
-+define void @f13(<4 x i8> %val, <4 x i8> *%ptr) {
-+; CHECK-LABEL: f13:
-+; CHECK: vstef %v24, 0(%r2)
-+; CHECK: br %r14
-+  store <4 x i8> %val, <4 x i8> *%ptr
-+  ret void
-+}
-+
-+; Test v8i8 stores.
-+define void @f14(<8 x i8> %val, <8 x i8> *%ptr) {
-+; CHECK-LABEL: f14:
-+; CHECK: vsteg %v24, 0(%r2)
-+; CHECK: br %r14
-+  store <8 x i8> %val, <8 x i8> *%ptr
-+  ret void
-+}
-+
-+; Test v2i16 stores.
-+define void @f15(<2 x i16> %val, <2 x i16> *%ptr) {
-+; CHECK-LABEL: f15:
-+; CHECK: vstef %v24, 0(%r2), 0
-+; CHECK: br %r14
-+  store <2 x i16> %val, <2 x i16> *%ptr
-+  ret void
-+}
-+
-+; Test v4i16 stores.
-+define void @f16(<4 x i16> %val, <4 x i16> *%ptr) {
-+; CHECK-LABEL: f16:
-+; CHECK: vsteg %v24, 0(%r2)
-+; CHECK: br %r14
-+  store <4 x i16> %val, <4 x i16> *%ptr
-+  ret void
-+}
-+
-+; Test v2i32 stores.
-+define void @f17(<2 x i32> %val, <2 x i32> *%ptr) {
-+; CHECK-LABEL: f17:
-+; CHECK: vsteg %v24, 0(%r2), 0
-+; CHECK: br %r14
-+  store <2 x i32> %val, <2 x i32> *%ptr
-+  ret void
-+}
-+
-+; Test v2f32 stores.
-+define void @f18(<2 x float> %val, <2 x float> *%ptr) {
-+; CHECK-LABEL: f18:
-+; CHECK: vsteg %v24, 0(%r2), 0
-+; CHECK: br %r14
-+  store <2 x float> %val, <2 x float> *%ptr
-+  ret void
-+}
-Index: llvm-36/test/CodeGen/SystemZ/vec-move-04.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-move-04.ll
-@@ -0,0 +1,179 @@
-+; Test vector insertion of register variables.
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-+
-+; Test v16i8 insertion into the first element.
-+define <16 x i8> @f1(<16 x i8> %val, i8 %element) {
-+; CHECK-LABEL: f1:
-+; CHECK: vlvgb %v24, %r2, 0
-+; CHECK: br %r14
-+  %ret = insertelement <16 x i8> %val, i8 %element, i32 0
-+  ret <16 x i8> %ret
-+}
-+
-+; Test v16i8 insertion into the last element.
-+define <16 x i8> @f2(<16 x i8> %val, i8 %element) {
-+; CHECK-LABEL: f2:
-+; CHECK: vlvgb %v24, %r2, 15
-+; CHECK: br %r14
-+  %ret = insertelement <16 x i8> %val, i8 %element, i32 15
-+  ret <16 x i8> %ret
-+}
-+
-+; Test v16i8 insertion into a variable element.
-+define <16 x i8> @f3(<16 x i8> %val, i8 %element, i32 %index) {
-+; CHECK-LABEL: f3:
-+; CHECK: vlvgb %v24, %r2, 0(%r3)
-+; CHECK: br %r14
-+  %ret = insertelement <16 x i8> %val, i8 %element, i32 %index
-+  ret <16 x i8> %ret
-+}
-+
-+; Test v8i16 insertion into the first element.
-+define <8 x i16> @f4(<8 x i16> %val, i16 %element) {
-+; CHECK-LABEL: f4:
-+; CHECK: vlvgh %v24, %r2, 0
-+; CHECK: br %r14
-+  %ret = insertelement <8 x i16> %val, i16 %element, i32 0
-+  ret <8 x i16> %ret
-+}
-+
-+; Test v8i16 insertion into the last element.
-+define <8 x i16> @f5(<8 x i16> %val, i16 %element) {
-+; CHECK-LABEL: f5:
-+; CHECK: vlvgh %v24, %r2, 7
-+; CHECK: br %r14
-+  %ret = insertelement <8 x i16> %val, i16 %element, i32 7
-+  ret <8 x i16> %ret
-+}
-+
-+; Test v8i16 insertion into a variable element.
-+define <8 x i16> @f6(<8 x i16> %val, i16 %element, i32 %index) {
-+; CHECK-LABEL: f6:
-+; CHECK: vlvgh %v24, %r2, 0(%r3)
-+; CHECK: br %r14
-+  %ret = insertelement <8 x i16> %val, i16 %element, i32 %index
-+  ret <8 x i16> %ret
-+}
-+
-+; Test v4i32 insertion into the first element.
-+define <4 x i32> @f7(<4 x i32> %val, i32 %element) {
-+; CHECK-LABEL: f7:
-+; CHECK: vlvgf %v24, %r2, 0
-+; CHECK: br %r14
-+  %ret = insertelement <4 x i32> %val, i32 %element, i32 0
-+  ret <4 x i32> %ret
-+}
-+
-+; Test v4i32 insertion into the last element.
-+define <4 x i32> @f8(<4 x i32> %val, i32 %element) {
-+; CHECK-LABEL: f8:
-+; CHECK: vlvgf %v24, %r2, 3
-+; CHECK: br %r14
-+  %ret = insertelement <4 x i32> %val, i32 %element, i32 3
-+  ret <4 x i32> %ret
-+}
-+
-+; Test v4i32 insertion into a variable element.
-+define <4 x i32> @f9(<4 x i32> %val, i32 %element, i32 %index) {
-+; CHECK-LABEL: f9:
-+; CHECK: vlvgf %v24, %r2, 0(%r3)
-+; CHECK: br %r14
-+  %ret = insertelement <4 x i32> %val, i32 %element, i32 %index
-+  ret <4 x i32> %ret
-+}
-+
-+; Test v2i64 insertion into the first element.
-+define <2 x i64> @f10(<2 x i64> %val, i64 %element) {
-+; CHECK-LABEL: f10:
-+; CHECK: vlvgg %v24, %r2, 0
-+; CHECK: br %r14
-+  %ret = insertelement <2 x i64> %val, i64 %element, i32 0
-+  ret <2 x i64> %ret
-+}
-+
-+; Test v2i64 insertion into the last element.
-+define <2 x i64> @f11(<2 x i64> %val, i64 %element) {
-+; CHECK-LABEL: f11:
-+; CHECK: vlvgg %v24, %r2, 1
-+; CHECK: br %r14
-+  %ret = insertelement <2 x i64> %val, i64 %element, i32 1
-+  ret <2 x i64> %ret
-+}
-+
-+; Test v2i64 insertion into a variable element.
-+define <2 x i64> @f12(<2 x i64> %val, i64 %element, i32 %index) {
-+; CHECK-LABEL: f12:
-+; CHECK: vlvgg %v24, %r2, 0(%r3)
-+; CHECK: br %r14
-+  %ret = insertelement <2 x i64> %val, i64 %element, i32 %index
-+  ret <2 x i64> %ret
-+}
-+
-+; Test v4f32 insertion into the first element.
-+define <4 x float> @f13(<4 x float> %val, float %element) {
-+; CHECK-LABEL: f13:
-+; CHECK: vlgvf [[REG:%r[0-5]]], %v0, 0
-+; CHECK: vlvgf %v24, [[REG]], 0
-+; CHECK: br %r14
-+  %ret = insertelement <4 x float> %val, float %element, i32 0
-+  ret <4 x float> %ret
-+}
-+
-+; Test v4f32 insertion into the last element.
-+define <4 x float> @f14(<4 x float> %val, float %element) {
-+; CHECK-LABEL: f14:
-+; CHECK: vlgvf [[REG:%r[0-5]]], %v0, 0
-+; CHECK: vlvgf %v24, [[REG]], 3
-+; CHECK: br %r14
-+  %ret = insertelement <4 x float> %val, float %element, i32 3
-+  ret <4 x float> %ret
-+}
-+
-+; Test v4f32 insertion into a variable element.
-+define <4 x float> @f15(<4 x float> %val, float %element, i32 %index) {
-+; CHECK-LABEL: f15:
-+; CHECK: vlgvf [[REG:%r[0-5]]], %v0, 0
-+; CHECK: vlvgf %v24, [[REG]], 0(%r2)
-+; CHECK: br %r14
-+  %ret = insertelement <4 x float> %val, float %element, i32 %index
-+  ret <4 x float> %ret
-+}
-+
-+; Test v2f64 insertion into the first element.
-+define <2 x double> @f16(<2 x double> %val, double %element) {
-+; CHECK-LABEL: f16:
-+; CHECK: vpdi %v24, %v0, %v24, 1
-+; CHECK: br %r14
-+  %ret = insertelement <2 x double> %val, double %element, i32 0
-+  ret <2 x double> %ret
-+}
-+
-+; Test v2f64 insertion into the last element.
-+define <2 x double> @f17(<2 x double> %val, double %element) {
-+; CHECK-LABEL: f17:
-+; CHECK: vpdi %v24, %v24, %v0, 0
-+; CHECK: br %r14
-+  %ret = insertelement <2 x double> %val, double %element, i32 1
-+  ret <2 x double> %ret
-+}
-+
-+; Test v2f64 insertion into a variable element.
-+define <2 x double> @f18(<2 x double> %val, double %element, i32 %index) {
-+; CHECK-LABEL: f18:
-+; CHECK: lgdr [[REG:%r[0-5]]], %f0
-+; CHECK: vlvgg %v24, [[REG]], 0(%r2)
-+; CHECK: br %r14
-+  %ret = insertelement <2 x double> %val, double %element, i32 %index
-+  ret <2 x double> %ret
-+}
-+
-+; Test v16i8 insertion into a variable element plus one.
-+define <16 x i8> @f19(<16 x i8> %val, i8 %element, i32 %index) {
-+; CHECK-LABEL: f19:
-+; CHECK: vlvgb %v24, %r2, 1(%r3)
-+; CHECK: br %r14
-+  %add = add i32 %index, 1
-+  %ret = insertelement <16 x i8> %val, i8 %element, i32 %add
-+  ret <16 x i8> %ret
-+}
-Index: llvm-36/test/CodeGen/SystemZ/vec-move-05.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-move-05.ll
-@@ -0,0 +1,249 @@
-+; Test vector extraction.
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-+
-+; Test v16i8 extraction of the first element.
-+define i8 @f1(<16 x i8> %val) {
-+; CHECK-LABEL: f1:
-+; CHECK: vlgvb %r2, %v24, 0
-+; CHECK: br %r14
-+  %ret = extractelement <16 x i8> %val, i32 0
-+  ret i8 %ret
-+}
-+
-+; Test v16i8 extraction of the last element.
-+define i8 @f2(<16 x i8> %val) {
-+; CHECK-LABEL: f2:
-+; CHECK: vlgvb %r2, %v24, 15
-+; CHECK: br %r14
-+  %ret = extractelement <16 x i8> %val, i32 15
-+  ret i8 %ret
-+}
-+
-+; Test v16i8 extractions of an absurd element number.  This must compile
-+; but we don't care what it does.
-+define i8 @f3(<16 x i8> %val) {
-+; CHECK-LABEL: f3:
-+; CHECK-NOT: vlgvb %r2, %v24, 100000
-+; CHECK: br %r14
-+  %ret = extractelement <16 x i8> %val, i32 100000
-+  ret i8 %ret
-+}
-+
-+; Test v16i8 extraction of a variable element.
-+define i8 @f4(<16 x i8> %val, i32 %index) {
-+; CHECK-LABEL: f4:
-+; CHECK: vlgvb %r2, %v24, 0(%r2)
-+; CHECK: br %r14
-+  %ret = extractelement <16 x i8> %val, i32 %index
-+  ret i8 %ret
-+}
-+
-+; Test v8i16 extraction of the first element.
-+define i16 @f5(<8 x i16> %val) {
-+; CHECK-LABEL: f5:
-+; CHECK: vlgvh %r2, %v24, 0
-+; CHECK: br %r14
-+  %ret = extractelement <8 x i16> %val, i32 0
-+  ret i16 %ret
-+}
-+
-+; Test v8i16 extraction of the last element.
-+define i16 @f6(<8 x i16> %val) {
-+; CHECK-LABEL: f6:
-+; CHECK: vlgvh %r2, %v24, 7
-+; CHECK: br %r14
-+  %ret = extractelement <8 x i16> %val, i32 7
-+  ret i16 %ret
-+}
-+
-+; Test v8i16 extractions of an absurd element number.  This must compile
-+; but we don't care what it does.
-+define i16 @f7(<8 x i16> %val) {
-+; CHECK-LABEL: f7:
-+; CHECK-NOT: vlgvh %r2, %v24, 100000
-+; CHECK: br %r14
-+  %ret = extractelement <8 x i16> %val, i32 100000
-+  ret i16 %ret
-+}
-+
-+; Test v8i16 extraction of a variable element.
-+define i16 @f8(<8 x i16> %val, i32 %index) {
-+; CHECK-LABEL: f8:
-+; CHECK: vlgvh %r2, %v24, 0(%r2)
-+; CHECK: br %r14
-+  %ret = extractelement <8 x i16> %val, i32 %index
-+  ret i16 %ret
-+}
-+
-+; Test v4i32 extraction of the first element.
-+define i32 @f9(<4 x i32> %val) {
-+; CHECK-LABEL: f9:
-+; CHECK: vlgvf %r2, %v24, 0
-+; CHECK: br %r14
-+  %ret = extractelement <4 x i32> %val, i32 0
-+  ret i32 %ret
-+}
-+
-+; Test v4i32 extraction of the last element.
-+define i32 @f10(<4 x i32> %val) {
-+; CHECK-LABEL: f10:
-+; CHECK: vlgvf %r2, %v24, 3
-+; CHECK: br %r14
-+  %ret = extractelement <4 x i32> %val, i32 3
-+  ret i32 %ret
-+}
-+
-+; Test v4i32 extractions of an absurd element number.  This must compile
-+; but we don't care what it does.
-+define i32 @f11(<4 x i32> %val) {
-+; CHECK-LABEL: f11:
-+; CHECK-NOT: vlgvf %r2, %v24, 100000
-+; CHECK: br %r14
-+  %ret = extractelement <4 x i32> %val, i32 100000
-+  ret i32 %ret
-+}
-+
-+; Test v4i32 extraction of a variable element.
-+define i32 @f12(<4 x i32> %val, i32 %index) {
-+; CHECK-LABEL: f12:
-+; CHECK: vlgvf %r2, %v24, 0(%r2)
-+; CHECK: br %r14
-+  %ret = extractelement <4 x i32> %val, i32 %index
-+  ret i32 %ret
-+}
-+
-+; Test v2i64 extraction of the first element.
-+define i64 @f13(<2 x i64> %val) {
-+; CHECK-LABEL: f13:
-+; CHECK: vlgvg %r2, %v24, 0
-+; CHECK: br %r14
-+  %ret = extractelement <2 x i64> %val, i32 0
-+  ret i64 %ret
-+}
-+
-+; Test v2i64 extraction of the last element.
-+define i64 @f14(<2 x i64> %val) {
-+; CHECK-LABEL: f14:
-+; CHECK: vlgvg %r2, %v24, 1
-+; CHECK: br %r14
-+  %ret = extractelement <2 x i64> %val, i32 1
-+  ret i64 %ret
-+}
-+
-+; Test v2i64 extractions of an absurd element number.  This must compile
-+; but we don't care what it does.
-+define i64 @f15(<2 x i64> %val) {
-+; CHECK-LABEL: f15:
-+; CHECK-NOT: vlgvg %r2, %v24, 100000
-+; CHECK: br %r14
-+  %ret = extractelement <2 x i64> %val, i32 100000
-+  ret i64 %ret
-+}
-+
-+; Test v2i64 extraction of a variable element.
-+define i64 @f16(<2 x i64> %val, i32 %index) {
-+; CHECK-LABEL: f16:
-+; CHECK: vlgvg %r2, %v24, 0(%r2)
-+; CHECK: br %r14
-+  %ret = extractelement <2 x i64> %val, i32 %index
-+  ret i64 %ret
-+}
-+
-+; Test v4f32 extraction of element 0.
-+define float @f17(<4 x float> %val) {
-+; CHECK-LABEL: f17:
-+; CHECK: vlr %v0, %v24
-+; CHECK: br %r14
-+  %ret = extractelement <4 x float> %val, i32 0
-+  ret float %ret
-+}
-+
-+; Test v4f32 extraction of element 1.
-+define float @f18(<4 x float> %val) {
-+; CHECK-LABEL: f18:
-+; CHECK: vrepf %v0, %v24, 1
-+; CHECK: br %r14
-+  %ret = extractelement <4 x float> %val, i32 1
-+  ret float %ret
-+}
-+
-+; Test v4f32 extraction of element 2.
-+define float @f19(<4 x float> %val) {
-+; CHECK-LABEL: f19:
-+; CHECK: vrepf %v0, %v24, 2
-+; CHECK: br %r14
-+  %ret = extractelement <4 x float> %val, i32 2
-+  ret float %ret
-+}
-+
-+; Test v4f32 extraction of element 3.
-+define float @f20(<4 x float> %val) {
-+; CHECK-LABEL: f20:
-+; CHECK: vrepf %v0, %v24, 3
-+; CHECK: br %r14
-+  %ret = extractelement <4 x float> %val, i32 3
-+  ret float %ret
-+}
-+
-+; Test v4f32 extractions of an absurd element number.  This must compile
-+; but we don't care what it does.
-+define float @f21(<4 x float> %val) {
-+  %ret = extractelement <4 x float> %val, i32 100000
-+  ret float %ret
-+}
-+
-+; Test v4f32 extraction of a variable element.
-+define float @f22(<4 x float> %val, i32 %index) {
-+; CHECK-LABEL: f22:
-+; CHECK: vlgvf [[REG:%r[0-5]]], %v24, 0(%r2)
-+; CHECK: vlvgf %v0, [[REG]], 0
-+; CHECK: br %r14
-+  %ret = extractelement <4 x float> %val, i32 %index
-+  ret float %ret
-+}
-+
-+; Test v2f64 extraction of the first element.
-+define double @f23(<2 x double> %val) {
-+; CHECK-LABEL: f23:
-+; CHECK: vlr %v0, %v24
-+; CHECK: br %r14
-+  %ret = extractelement <2 x double> %val, i32 0
-+  ret double %ret
-+}
-+
-+; Test v2f64 extraction of the last element.
-+define double @f24(<2 x double> %val) {
-+; CHECK-LABEL: f24:
-+; CHECK: vrepg %v0, %v24, 1
-+; CHECK: br %r14
-+  %ret = extractelement <2 x double> %val, i32 1
-+  ret double %ret
-+}
-+
-+; Test v2f64 extractions of an absurd element number.  This must compile
-+; but we don't care what it does.
-+define double @f25(<2 x double> %val) {
-+  %ret = extractelement <2 x double> %val, i32 100000
-+  ret double %ret
-+}
-+
-+; Test v2f64 extraction of a variable element.
-+define double @f26(<2 x double> %val, i32 %index) {
-+; CHECK-LABEL: f26:
-+; CHECK: vlgvg [[REG:%r[0-5]]], %v24, 0(%r2)
-+; CHECK: ldgr %f0, [[REG]]
-+; CHECK: br %r14
-+  %ret = extractelement <2 x double> %val, i32 %index
-+  ret double %ret
-+}
-+
-+; Test v16i8 extraction of a variable element with an offset.
-+define i8 @f27(<16 x i8> %val, i32 %index) {
-+; CHECK-LABEL: f27:
-+; CHECK: vlgvb %r2, %v24, 1(%r2)
-+; CHECK: br %r14
-+  %add = add i32 %index, 1
-+  %ret = extractelement <16 x i8> %val, i32 %add
-+  ret i8 %ret
-+}
-Index: llvm-36/test/CodeGen/SystemZ/vec-move-06.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-move-06.ll
-@@ -0,0 +1,13 @@
-+; Test vector builds using VLVGP.
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-+
-+; Test the basic v2i64 usage.
-+define <2 x i64> @f1(i64 %a, i64 %b) {
-+; CHECK-LABEL: f1:
-+; CHECK: vlvgp %v24, %r2, %r3
-+; CHECK: br %r14
-+  %veca = insertelement <2 x i64> undef, i64 %a, i32 0
-+  %vecb = insertelement <2 x i64> %veca, i64 %b, i32 1
-+  ret <2 x i64> %vecb
-+}
-Index: llvm-36/test/CodeGen/SystemZ/vec-move-07.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-move-07.ll
-@@ -0,0 +1,57 @@
-+; Test scalar_to_vector expansion.
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-+
-+; Test v16i8.
-+define <16 x i8> @f1(i8 %val) {
-+; CHECK-LABEL: f1:
-+; CHECK: vlvgb %v24, %r2, 0
-+; CHECK: br %r14
-+  %ret = insertelement <16 x i8> undef, i8 %val, i32 0
-+  ret <16 x i8> %ret
-+}
-+
-+; Test v8i16.
-+define <8 x i16> @f2(i16 %val) {
-+; CHECK-LABEL: f2:
-+; CHECK: vlvgh %v24, %r2, 0
-+; CHECK: br %r14
-+  %ret = insertelement <8 x i16> undef, i16 %val, i32 0
-+  ret <8 x i16> %ret
-+}
-+
-+; Test v4i32.
-+define <4 x i32> @f3(i32 %val) {
-+; CHECK-LABEL: f3:
-+; CHECK: vlvgf %v24, %r2, 0
-+; CHECK: br %r14
-+  %ret = insertelement <4 x i32> undef, i32 %val, i32 0
-+  ret <4 x i32> %ret
-+}
-+
-+; Test v2i64.  Here we load %val into both halves.
-+define <2 x i64> @f4(i64 %val) {
-+; CHECK-LABEL: f4:
-+; CHECK: vlvgp %v24, %r2, %r2
-+; CHECK: br %r14
-+  %ret = insertelement <2 x i64> undef, i64 %val, i32 0
-+  ret <2 x i64> %ret
-+}
-+
-+; Test v4f32, which is just a move.
-+define <4 x float> @f5(float %val) {
-+; CHECK-LABEL: f5:
-+; CHECK: vlr %v24, %v0
-+; CHECK: br %r14
-+  %ret = insertelement <4 x float> undef, float %val, i32 0
-+  ret <4 x float> %ret
-+}
-+
-+; Likewise v2f64.
-+define <2 x double> @f6(double %val) {
-+; CHECK-LABEL: f6:
-+; CHECK: vlr %v24, %v0
-+; CHECK: br %r14
-+  %ret = insertelement <2 x double> undef, double %val, i32 0
-+  ret <2 x double> %ret
-+}
-Index: llvm-36/test/CodeGen/SystemZ/vec-move-08.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-move-08.ll
-@@ -0,0 +1,444 @@
-+; Test vector insertion of memory values.
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-+
-+; Test v16i8 insertion into the first element.
-+define <16 x i8> @f1(<16 x i8> %val, i8 *%ptr) {
-+; CHECK-LABEL: f1:
-+; CHECK: vleb %v24, 0(%r2), 0
-+; CHECK: br %r14
-+  %element = load i8 *%ptr
-+  %ret = insertelement <16 x i8> %val, i8 %element, i32 0
-+  ret <16 x i8> %ret
-+}
-+
-+; Test v16i8 insertion into the last element.
-+define <16 x i8> @f2(<16 x i8> %val, i8 *%ptr) {
-+; CHECK-LABEL: f2:
-+; CHECK: vleb %v24, 0(%r2), 15
-+; CHECK: br %r14
-+  %element = load i8 *%ptr
-+  %ret = insertelement <16 x i8> %val, i8 %element, i32 15
-+  ret <16 x i8> %ret
-+}
-+
-+; Test v16i8 insertion with the highest in-range offset.
-+define <16 x i8> @f3(<16 x i8> %val, i8 *%base) {
-+; CHECK-LABEL: f3:
-+; CHECK: vleb %v24, 4095(%r2), 10
-+; CHECK: br %r14
-+  %ptr = getelementptr i8 *%base, i32 4095
-+  %element = load i8 *%ptr
-+  %ret = insertelement <16 x i8> %val, i8 %element, i32 10
-+  ret <16 x i8> %ret
-+}
-+
-+; Test v16i8 insertion with the first ouf-of-range offset.
-+define <16 x i8> @f4(<16 x i8> %val, i8 *%base) {
-+; CHECK-LABEL: f4:
-+; CHECK: aghi %r2, 4096
-+; CHECK: vleb %v24, 0(%r2), 5
-+; CHECK: br %r14
-+  %ptr = getelementptr i8 *%base, i32 4096
-+  %element = load i8 *%ptr
-+  %ret = insertelement <16 x i8> %val, i8 %element, i32 5
-+  ret <16 x i8> %ret
-+}
-+
-+; Test v16i8 insertion into a variable element.
-+define <16 x i8> @f5(<16 x i8> %val, i8 *%ptr, i32 %index) {
-+; CHECK-LABEL: f5:
-+; CHECK-NOT: vleb
-+; CHECK: br %r14
-+  %element = load i8 *%ptr
-+  %ret = insertelement <16 x i8> %val, i8 %element, i32 %index
-+  ret <16 x i8> %ret
-+}
-+
-+; Test v8i16 insertion into the first element.
-+define <8 x i16> @f6(<8 x i16> %val, i16 *%ptr) {
-+; CHECK-LABEL: f6:
-+; CHECK: vleh %v24, 0(%r2), 0
-+; CHECK: br %r14
-+  %element = load i16 *%ptr
-+  %ret = insertelement <8 x i16> %val, i16 %element, i32 0
-+  ret <8 x i16> %ret
-+}
-+
-+; Test v8i16 insertion into the last element.
-+define <8 x i16> @f7(<8 x i16> %val, i16 *%ptr) {
-+; CHECK-LABEL: f7:
-+; CHECK: vleh %v24, 0(%r2), 7
-+; CHECK: br %r14
-+  %element = load i16 *%ptr
-+  %ret = insertelement <8 x i16> %val, i16 %element, i32 7
-+  ret <8 x i16> %ret
-+}
-+
-+; Test v8i16 insertion with the highest in-range offset.
-+define <8 x i16> @f8(<8 x i16> %val, i16 *%base) {
-+; CHECK-LABEL: f8:
-+; CHECK: vleh %v24, 4094(%r2), 5
-+; CHECK: br %r14
-+  %ptr = getelementptr i16 *%base, i32 2047
-+  %element = load i16 *%ptr
-+  %ret = insertelement <8 x i16> %val, i16 %element, i32 5
-+  ret <8 x i16> %ret
-+}
-+
-+; Test v8i16 insertion with the first ouf-of-range offset.
-+define <8 x i16> @f9(<8 x i16> %val, i16 *%base) {
-+; CHECK-LABEL: f9:
-+; CHECK: aghi %r2, 4096
-+; CHECK: vleh %v24, 0(%r2), 1
-+; CHECK: br %r14
-+  %ptr = getelementptr i16 *%base, i32 2048
-+  %element = load i16 *%ptr
-+  %ret = insertelement <8 x i16> %val, i16 %element, i32 1
-+  ret <8 x i16> %ret
-+}
-+
-+; Test v8i16 insertion into a variable element.
-+define <8 x i16> @f10(<8 x i16> %val, i16 *%ptr, i32 %index) {
-+; CHECK-LABEL: f10:
-+; CHECK-NOT: vleh
-+; CHECK: br %r14
-+  %element = load i16 *%ptr
-+  %ret = insertelement <8 x i16> %val, i16 %element, i32 %index
-+  ret <8 x i16> %ret
-+}
-+
-+; Test v4i32 insertion into the first element.
-+define <4 x i32> @f11(<4 x i32> %val, i32 *%ptr) {
-+; CHECK-LABEL: f11:
-+; CHECK: vlef %v24, 0(%r2), 0
-+; CHECK: br %r14
-+  %element = load i32 *%ptr
-+  %ret = insertelement <4 x i32> %val, i32 %element, i32 0
-+  ret <4 x i32> %ret
-+}
-+
-+; Test v4i32 insertion into the last element.
-+define <4 x i32> @f12(<4 x i32> %val, i32 *%ptr) {
-+; CHECK-LABEL: f12:
-+; CHECK: vlef %v24, 0(%r2), 3
-+; CHECK: br %r14
-+  %element = load i32 *%ptr
-+  %ret = insertelement <4 x i32> %val, i32 %element, i32 3
-+  ret <4 x i32> %ret
-+}
-+
-+; Test v4i32 insertion with the highest in-range offset.
-+define <4 x i32> @f13(<4 x i32> %val, i32 *%base) {
-+; CHECK-LABEL: f13:
-+; CHECK: vlef %v24, 4092(%r2), 2
-+; CHECK: br %r14
-+  %ptr = getelementptr i32 *%base, i32 1023
-+  %element = load i32 *%ptr
-+  %ret = insertelement <4 x i32> %val, i32 %element, i32 2
-+  ret <4 x i32> %ret
-+}
-+
-+; Test v4i32 insertion with the first ouf-of-range offset.
-+define <4 x i32> @f14(<4 x i32> %val, i32 *%base) {
-+; CHECK-LABEL: f14:
-+; CHECK: aghi %r2, 4096
-+; CHECK: vlef %v24, 0(%r2), 1
-+; CHECK: br %r14
-+  %ptr = getelementptr i32 *%base, i32 1024
-+  %element = load i32 *%ptr
-+  %ret = insertelement <4 x i32> %val, i32 %element, i32 1
-+  ret <4 x i32> %ret
-+}
-+
-+; Test v4i32 insertion into a variable element.
-+define <4 x i32> @f15(<4 x i32> %val, i32 *%ptr, i32 %index) {
-+; CHECK-LABEL: f15:
-+; CHECK-NOT: vlef
-+; CHECK: br %r14
-+  %element = load i32 *%ptr
-+  %ret = insertelement <4 x i32> %val, i32 %element, i32 %index
-+  ret <4 x i32> %ret
-+}
-+
-+; Test v2i64 insertion into the first element.
-+define <2 x i64> @f16(<2 x i64> %val, i64 *%ptr) {
-+; CHECK-LABEL: f16:
-+; CHECK: vleg %v24, 0(%r2), 0
-+; CHECK: br %r14
-+  %element = load i64 *%ptr
-+  %ret = insertelement <2 x i64> %val, i64 %element, i32 0
-+  ret <2 x i64> %ret
-+}
-+
-+; Test v2i64 insertion into the last element.
-+define <2 x i64> @f17(<2 x i64> %val, i64 *%ptr) {
-+; CHECK-LABEL: f17:
-+; CHECK: vleg %v24, 0(%r2), 1
-+; CHECK: br %r14
-+  %element = load i64 *%ptr
-+  %ret = insertelement <2 x i64> %val, i64 %element, i32 1
-+  ret <2 x i64> %ret
-+}
-+
-+; Test v2i64 insertion with the highest in-range offset.
-+define <2 x i64> @f18(<2 x i64> %val, i64 *%base) {
-+; CHECK-LABEL: f18:
-+; CHECK: vleg %v24, 4088(%r2), 1
-+; CHECK: br %r14
-+  %ptr = getelementptr i64 *%base, i32 511
-+  %element = load i64 *%ptr
-+  %ret = insertelement <2 x i64> %val, i64 %element, i32 1
-+  ret <2 x i64> %ret
-+}
-+
-+; Test v2i64 insertion with the first ouf-of-range offset.
-+define <2 x i64> @f19(<2 x i64> %val, i64 *%base) {
-+; CHECK-LABEL: f19:
-+; CHECK: aghi %r2, 4096
-+; CHECK: vleg %v24, 0(%r2), 0
-+; CHECK: br %r14
-+  %ptr = getelementptr i64 *%base, i32 512
-+  %element = load i64 *%ptr
-+  %ret = insertelement <2 x i64> %val, i64 %element, i32 0
-+  ret <2 x i64> %ret
-+}
-+
-+; Test v2i64 insertion into a variable element.
-+define <2 x i64> @f20(<2 x i64> %val, i64 *%ptr, i32 %index) {
-+; CHECK-LABEL: f20:
-+; CHECK-NOT: vleg
-+; CHECK: br %r14
-+  %element = load i64 *%ptr
-+  %ret = insertelement <2 x i64> %val, i64 %element, i32 %index
-+  ret <2 x i64> %ret
-+}
-+
-+; Test v4f32 insertion into the first element.
-+define <4 x float> @f21(<4 x float> %val, float *%ptr) {
-+; CHECK-LABEL: f21:
-+; CHECK: vlef %v24, 0(%r2), 0
-+; CHECK: br %r14
-+  %element = load float *%ptr
-+  %ret = insertelement <4 x float> %val, float %element, i32 0
-+  ret <4 x float> %ret
-+}
-+
-+; Test v4f32 insertion into the last element.
-+define <4 x float> @f22(<4 x float> %val, float *%ptr) {
-+; CHECK-LABEL: f22:
-+; CHECK: vlef %v24, 0(%r2), 3
-+; CHECK: br %r14
-+  %element = load float *%ptr
-+  %ret = insertelement <4 x float> %val, float %element, i32 3
-+  ret <4 x float> %ret
-+}
-+
-+; Test v4f32 insertion with the highest in-range offset.
-+define <4 x float> @f23(<4 x float> %val, float *%base) {
-+; CHECK-LABEL: f23:
-+; CHECK: vlef %v24, 4092(%r2), 2
-+; CHECK: br %r14
-+  %ptr = getelementptr float *%base, i32 1023
-+  %element = load float *%ptr
-+  %ret = insertelement <4 x float> %val, float %element, i32 2
-+  ret <4 x float> %ret
-+}
-+
-+; Test v4f32 insertion with the first ouf-of-range offset.
-+define <4 x float> @f24(<4 x float> %val, float *%base) {
-+; CHECK-LABEL: f24:
-+; CHECK: aghi %r2, 4096
-+; CHECK: vlef %v24, 0(%r2), 1
-+; CHECK: br %r14
-+  %ptr = getelementptr float *%base, i32 1024
-+  %element = load float *%ptr
-+  %ret = insertelement <4 x float> %val, float %element, i32 1
-+  ret <4 x float> %ret
-+}
-+
-+; Test v4f32 insertion into a variable element.
-+define <4 x float> @f25(<4 x float> %val, float *%ptr, i32 %index) {
-+; CHECK-LABEL: f25:
-+; CHECK-NOT: vlef
-+; CHECK: br %r14
-+  %element = load float *%ptr
-+  %ret = insertelement <4 x float> %val, float %element, i32 %index
-+  ret <4 x float> %ret
-+}
-+
-+; Test v2f64 insertion into the first element.
-+define <2 x double> @f26(<2 x double> %val, double *%ptr) {
-+; CHECK-LABEL: f26:
-+; CHECK: vleg %v24, 0(%r2), 0
-+; CHECK: br %r14
-+  %element = load double *%ptr
-+  %ret = insertelement <2 x double> %val, double %element, i32 0
-+  ret <2 x double> %ret
-+}
-+
-+; Test v2f64 insertion into the last element.
-+define <2 x double> @f27(<2 x double> %val, double *%ptr) {
-+; CHECK-LABEL: f27:
-+; CHECK: vleg %v24, 0(%r2), 1
-+; CHECK: br %r14
-+  %element = load double *%ptr
-+  %ret = insertelement <2 x double> %val, double %element, i32 1
-+  ret <2 x double> %ret
-+}
-+
-+; Test v2f64 insertion with the highest in-range offset.
-+define <2 x double> @f28(<2 x double> %val, double *%base) {
-+; CHECK-LABEL: f28:
-+; CHECK: vleg %v24, 4088(%r2), 1
-+; CHECK: br %r14
-+  %ptr = getelementptr double *%base, i32 511
-+  %element = load double *%ptr
-+  %ret = insertelement <2 x double> %val, double %element, i32 1
-+  ret <2 x double> %ret
-+}
-+
-+; Test v2f64 insertion with the first ouf-of-range offset.
-+define <2 x double> @f29(<2 x double> %val, double *%base) {
-+; CHECK-LABEL: f29:
-+; CHECK: aghi %r2, 4096
-+; CHECK: vleg %v24, 0(%r2), 0
-+; CHECK: br %r14
-+  %ptr = getelementptr double *%base, i32 512
-+  %element = load double *%ptr
-+  %ret = insertelement <2 x double> %val, double %element, i32 0
-+  ret <2 x double> %ret
-+}
-+
-+; Test v2f64 insertion into a variable element.
-+define <2 x double> @f30(<2 x double> %val, double *%ptr, i32 %index) {
-+; CHECK-LABEL: f30:
-+; CHECK-NOT: vleg
-+; CHECK: br %r14
-+  %element = load double *%ptr
-+  %ret = insertelement <2 x double> %val, double %element, i32 %index
-+  ret <2 x double> %ret
-+}
-+
-+; Test a v4i32 gather of the first element.
-+define <4 x i32> @f31(<4 x i32> %val, <4 x i32> %index, i64 %base) {
-+; CHECK-LABEL: f31:
-+; CHECK: vgef %v24, 0(%v26,%r2), 0
-+; CHECK: br %r14
-+  %elem = extractelement <4 x i32> %index, i32 0
-+  %ext = zext i32 %elem to i64
-+  %add = add i64 %base, %ext
-+  %ptr = inttoptr i64 %add to i32 *
-+  %element = load i32 *%ptr
-+  %ret = insertelement <4 x i32> %val, i32 %element, i32 0
-+  ret <4 x i32> %ret
-+}
-+
-+; Test a v4i32 gather of the last element.
-+define <4 x i32> @f32(<4 x i32> %val, <4 x i32> %index, i64 %base) {
-+; CHECK-LABEL: f32:
-+; CHECK: vgef %v24, 0(%v26,%r2), 3
-+; CHECK: br %r14
-+  %elem = extractelement <4 x i32> %index, i32 3
-+  %ext = zext i32 %elem to i64
-+  %add = add i64 %base, %ext
-+  %ptr = inttoptr i64 %add to i32 *
-+  %element = load i32 *%ptr
-+  %ret = insertelement <4 x i32> %val, i32 %element, i32 3
-+  ret <4 x i32> %ret
-+}
-+
-+; Test a v4i32 gather with the highest in-range offset.
-+define <4 x i32> @f33(<4 x i32> %val, <4 x i32> %index, i64 %base) {
-+; CHECK-LABEL: f33:
-+; CHECK: vgef %v24, 4095(%v26,%r2), 1
-+; CHECK: br %r14
-+  %elem = extractelement <4 x i32> %index, i32 1
-+  %ext = zext i32 %elem to i64
-+  %add1 = add i64 %base, %ext
-+  %add2 = add i64 %add1, 4095
-+  %ptr = inttoptr i64 %add2 to i32 *
-+  %element = load i32 *%ptr
-+  %ret = insertelement <4 x i32> %val, i32 %element, i32 1
-+  ret <4 x i32> %ret
-+}
-+
-+; Test a v2i64 gather of the first element.
-+define <2 x i64> @f34(<2 x i64> %val, <2 x i64> %index, i64 %base) {
-+; CHECK-LABEL: f34:
-+; CHECK: vgeg %v24, 0(%v26,%r2), 0
-+; CHECK: br %r14
-+  %elem = extractelement <2 x i64> %index, i32 0
-+  %add = add i64 %base, %elem
-+  %ptr = inttoptr i64 %add to i64 *
-+  %element = load i64 *%ptr
-+  %ret = insertelement <2 x i64> %val, i64 %element, i32 0
-+  ret <2 x i64> %ret
-+}
-+
-+; Test a v2i64 gather of the last element.
-+define <2 x i64> @f35(<2 x i64> %val, <2 x i64> %index, i64 %base) {
-+; CHECK-LABEL: f35:
-+; CHECK: vgeg %v24, 0(%v26,%r2), 1
-+; CHECK: br %r14
-+  %elem = extractelement <2 x i64> %index, i32 1
-+  %add = add i64 %base, %elem
-+  %ptr = inttoptr i64 %add to i64 *
-+  %element = load i64 *%ptr
-+  %ret = insertelement <2 x i64> %val, i64 %element, i32 1
-+  ret <2 x i64> %ret
-+}
-+
-+; Test a v4f32 gather of the first element.
-+define <4 x float> @f36(<4 x float> %val, <4 x i32> %index, i64 %base) {
-+; CHECK-LABEL: f36:
-+; CHECK: vgef %v24, 0(%v26,%r2), 0
-+; CHECK: br %r14
-+  %elem = extractelement <4 x i32> %index, i32 0
-+  %ext = zext i32 %elem to i64
-+  %add = add i64 %base, %ext
-+  %ptr = inttoptr i64 %add to float *
-+  %element = load float *%ptr
-+  %ret = insertelement <4 x float> %val, float %element, i32 0
-+  ret <4 x float> %ret
-+}
-+
-+; Test a v4f32 gather of the last element.
-+define <4 x float> @f37(<4 x float> %val, <4 x i32> %index, i64 %base) {
-+; CHECK-LABEL: f37:
-+; CHECK: vgef %v24, 0(%v26,%r2), 3
-+; CHECK: br %r14
-+  %elem = extractelement <4 x i32> %index, i32 3
-+  %ext = zext i32 %elem to i64
-+  %add = add i64 %base, %ext
-+  %ptr = inttoptr i64 %add to float *
-+  %element = load float *%ptr
-+  %ret = insertelement <4 x float> %val, float %element, i32 3
-+  ret <4 x float> %ret
-+}
-+
-+; Test a v2f64 gather of the first element.
-+define <2 x double> @f38(<2 x double> %val, <2 x i64> %index, i64 %base) {
-+; CHECK-LABEL: f38:
-+; CHECK: vgeg %v24, 0(%v26,%r2), 0
-+; CHECK: br %r14
-+  %elem = extractelement <2 x i64> %index, i32 0
-+  %add = add i64 %base, %elem
-+  %ptr = inttoptr i64 %add to double *
-+  %element = load double *%ptr
-+  %ret = insertelement <2 x double> %val, double %element, i32 0
-+  ret <2 x double> %ret
-+}
-+
-+; Test a v2f64 gather of the last element.
-+define <2 x double> @f39(<2 x double> %val, <2 x i64> %index, i64 %base) {
-+; CHECK-LABEL: f39:
-+; CHECK: vgeg %v24, 0(%v26,%r2), 1
-+; CHECK: br %r14
-+  %elem = extractelement <2 x i64> %index, i32 1
-+  %add = add i64 %base, %elem
-+  %ptr = inttoptr i64 %add to double *
-+  %element = load double *%ptr
-+  %ret = insertelement <2 x double> %val, double %element, i32 1
-+  ret <2 x double> %ret
-+}
-Index: llvm-36/test/CodeGen/SystemZ/vec-move-09.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-move-09.ll
-@@ -0,0 +1,291 @@
-+; Test vector insertion of constants.
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-+
-+; Test v16i8 insertion into the first element.
-+define <16 x i8> @f1(<16 x i8> %val) {
-+; CHECK-LABEL: f1:
-+; CHECK: vleib %v24, 0, 0
-+; CHECK: br %r14
-+  %ret = insertelement <16 x i8> %val, i8 0, i32 0
-+  ret <16 x i8> %ret
-+}
-+
-+; Test v16i8 insertion into the last element.
-+define <16 x i8> @f2(<16 x i8> %val) {
-+; CHECK-LABEL: f2:
-+; CHECK: vleib %v24, 100, 15
-+; CHECK: br %r14
-+  %ret = insertelement <16 x i8> %val, i8 100, i32 15
-+  ret <16 x i8> %ret
-+}
-+
-+; Test v16i8 insertion with the maximum signed value.
-+define <16 x i8> @f3(<16 x i8> %val) {
-+; CHECK-LABEL: f3:
-+; CHECK: vleib %v24, 127, 10
-+; CHECK: br %r14
-+  %ret = insertelement <16 x i8> %val, i8 127, i32 10
-+  ret <16 x i8> %ret
-+}
-+
-+; Test v16i8 insertion with the minimum signed value.
-+define <16 x i8> @f4(<16 x i8> %val) {
-+; CHECK-LABEL: f4:
-+; CHECK: vleib %v24, -128, 11
-+; CHECK: br %r14
-+  %ret = insertelement <16 x i8> %val, i8 128, i32 11
-+  ret <16 x i8> %ret
-+}
-+
-+; Test v16i8 insertion with the maximum unsigned value.
-+define <16 x i8> @f5(<16 x i8> %val) {
-+; CHECK-LABEL: f5:
-+; CHECK: vleib %v24, -1, 12
-+; CHECK: br %r14
-+  %ret = insertelement <16 x i8> %val, i8 255, i32 12
-+  ret <16 x i8> %ret
-+}
-+
-+; Test v16i8 insertion into a variable element.
-+define <16 x i8> @f6(<16 x i8> %val, i32 %index) {
-+; CHECK-LABEL: f6:
-+; CHECK-NOT: vleib
-+; CHECK: br %r14
-+  %ret = insertelement <16 x i8> %val, i8 0, i32 %index
-+  ret <16 x i8> %ret
-+}
-+
-+; Test v8i16 insertion into the first element.
-+define <8 x i16> @f7(<8 x i16> %val) {
-+; CHECK-LABEL: f7:
-+; CHECK: vleih %v24, 0, 0
-+; CHECK: br %r14
-+  %ret = insertelement <8 x i16> %val, i16 0, i32 0
-+  ret <8 x i16> %ret
-+}
-+
-+; Test v8i16 insertion into the last element.
-+define <8 x i16> @f8(<8 x i16> %val) {
-+; CHECK-LABEL: f8:
-+; CHECK: vleih %v24, 0, 7
-+; CHECK: br %r14
-+  %ret = insertelement <8 x i16> %val, i16 0, i32 7
-+  ret <8 x i16> %ret
-+}
-+
-+; Test v8i16 insertion with the maximum signed value.
-+define <8 x i16> @f9(<8 x i16> %val) {
-+; CHECK-LABEL: f9:
-+; CHECK: vleih %v24, 32767, 4
-+; CHECK: br %r14
-+  %ret = insertelement <8 x i16> %val, i16 32767, i32 4
-+  ret <8 x i16> %ret
-+}
-+
-+; Test v8i16 insertion with the minimum signed value.
-+define <8 x i16> @f10(<8 x i16> %val) {
-+; CHECK-LABEL: f10:
-+; CHECK: vleih %v24, -32768, 5
-+; CHECK: br %r14
-+  %ret = insertelement <8 x i16> %val, i16 32768, i32 5
-+  ret <8 x i16> %ret
-+}
-+
-+; Test v8i16 insertion with the maximum unsigned value.
-+define <8 x i16> @f11(<8 x i16> %val) {
-+; CHECK-LABEL: f11:
-+; CHECK: vleih %v24, -1, 6
-+; CHECK: br %r14
-+  %ret = insertelement <8 x i16> %val, i16 65535, i32 6
-+  ret <8 x i16> %ret
-+}
-+
-+; Test v8i16 insertion into a variable element.
-+define <8 x i16> @f12(<8 x i16> %val, i32 %index) {
-+; CHECK-LABEL: f12:
-+; CHECK-NOT: vleih
-+; CHECK: br %r14
-+  %ret = insertelement <8 x i16> %val, i16 0, i32 %index
-+  ret <8 x i16> %ret
-+}
-+
-+; Test v4i32 insertion into the first element.
-+define <4 x i32> @f13(<4 x i32> %val) {
-+; CHECK-LABEL: f13:
-+; CHECK: vleif %v24, 0, 0
-+; CHECK: br %r14
-+  %ret = insertelement <4 x i32> %val, i32 0, i32 0
-+  ret <4 x i32> %ret
-+}
-+
-+; Test v4i32 insertion into the last element.
-+define <4 x i32> @f14(<4 x i32> %val) {
-+; CHECK-LABEL: f14:
-+; CHECK: vleif %v24, 0, 3
-+; CHECK: br %r14
-+  %ret = insertelement <4 x i32> %val, i32 0, i32 3
-+  ret <4 x i32> %ret
-+}
-+
-+; Test v4i32 insertion with the maximum value allowed by VLEIF.
-+define <4 x i32> @f15(<4 x i32> %val) {
-+; CHECK-LABEL: f15:
-+; CHECK: vleif %v24, 32767, 1
-+; CHECK: br %r14
-+  %ret = insertelement <4 x i32> %val, i32 32767, i32 1
-+  ret <4 x i32> %ret
-+}
-+
-+; Test v4i32 insertion with the next value up.
-+define <4 x i32> @f16(<4 x i32> %val) {
-+; CHECK-LABEL: f16:
-+; CHECK-NOT: vleif
-+; CHECK: br %r14
-+  %ret = insertelement <4 x i32> %val, i32 32768, i32 1
-+  ret <4 x i32> %ret
-+}
-+
-+; Test v4i32 insertion with the minimum value allowed by VLEIF.
-+define <4 x i32> @f17(<4 x i32> %val) {
-+; CHECK-LABEL: f17:
-+; CHECK: vleif %v24, -32768, 2
-+; CHECK: br %r14
-+  %ret = insertelement <4 x i32> %val, i32 -32768, i32 2
-+  ret <4 x i32> %ret
-+}
-+
-+; Test v4i32 insertion with the next value down.
-+define <4 x i32> @f18(<4 x i32> %val) {
-+; CHECK-LABEL: f18:
-+; CHECK-NOT: vleif
-+; CHECK: br %r14
-+  %ret = insertelement <4 x i32> %val, i32 -32769, i32 2
-+  ret <4 x i32> %ret
-+}
-+
-+; Test v4i32 insertion into a variable element.
-+define <4 x i32> @f19(<4 x i32> %val, i32 %index) {
-+; CHECK-LABEL: f19:
-+; CHECK-NOT: vleif
-+; CHECK: br %r14
-+  %ret = insertelement <4 x i32> %val, i32 0, i32 %index
-+  ret <4 x i32> %ret
-+}
-+
-+; Test v2i64 insertion into the first element.
-+define <2 x i64> @f20(<2 x i64> %val) {
-+; CHECK-LABEL: f20:
-+; CHECK: vleig %v24, 0, 0
-+; CHECK: br %r14
-+  %ret = insertelement <2 x i64> %val, i64 0, i32 0
-+  ret <2 x i64> %ret
-+}
-+
-+; Test v2i64 insertion into the last element.
-+define <2 x i64> @f21(<2 x i64> %val) {
-+; CHECK-LABEL: f21:
-+; CHECK: vleig %v24, 0, 1
-+; CHECK: br %r14
-+  %ret = insertelement <2 x i64> %val, i64 0, i32 1
-+  ret <2 x i64> %ret
-+}
-+
-+; Test v2i64 insertion with the maximum value allowed by VLEIG.
-+define <2 x i64> @f22(<2 x i64> %val) {
-+; CHECK-LABEL: f22:
-+; CHECK: vleig %v24, 32767, 1
-+; CHECK: br %r14
-+  %ret = insertelement <2 x i64> %val, i64 32767, i32 1
-+  ret <2 x i64> %ret
-+}
-+
-+; Test v2i64 insertion with the next value up.
-+define <2 x i64> @f23(<2 x i64> %val) {
-+; CHECK-LABEL: f23:
-+; CHECK-NOT: vleig
-+; CHECK: br %r14
-+  %ret = insertelement <2 x i64> %val, i64 32768, i32 1
-+  ret <2 x i64> %ret
-+}
-+
-+; Test v2i64 insertion with the minimum value allowed by VLEIG.
-+define <2 x i64> @f24(<2 x i64> %val) {
-+; CHECK-LABEL: f24:
-+; CHECK: vleig %v24, -32768, 0
-+; CHECK: br %r14
-+  %ret = insertelement <2 x i64> %val, i64 -32768, i32 0
-+  ret <2 x i64> %ret
-+}
-+
-+; Test v2i64 insertion with the next value down.
-+define <2 x i64> @f25(<2 x i64> %val) {
-+; CHECK-LABEL: f25:
-+; CHECK-NOT: vleig
-+; CHECK: br %r14
-+  %ret = insertelement <2 x i64> %val, i64 -32769, i32 0
-+  ret <2 x i64> %ret
-+}
-+
-+; Test v2i64 insertion into a variable element.
-+define <2 x i64> @f26(<2 x i64> %val, i32 %index) {
-+; CHECK-LABEL: f26:
-+; CHECK-NOT: vleig
-+; CHECK: br %r14
-+  %ret = insertelement <2 x i64> %val, i64 0, i32 %index
-+  ret <2 x i64> %ret
-+}
-+
-+; Test v4f32 insertion of 0 into the first element.
-+define <4 x float> @f27(<4 x float> %val) {
-+; CHECK-LABEL: f27:
-+; CHECK: vleif %v24, 0, 0
-+; CHECK: br %r14
-+  %ret = insertelement <4 x float> %val, float 0.0, i32 0
-+  ret <4 x float> %ret
-+}
-+
-+; Test v4f32 insertion of 0 into the last element.
-+define <4 x float> @f28(<4 x float> %val) {
-+; CHECK-LABEL: f28:
-+; CHECK: vleif %v24, 0, 3
-+; CHECK: br %r14
-+  %ret = insertelement <4 x float> %val, float 0.0, i32 3
-+  ret <4 x float> %ret
-+}
-+
-+; Test v4f32 insertion of a nonzero value.
-+define <4 x float> @f29(<4 x float> %val) {
-+; CHECK-LABEL: f29:
-+; CHECK-NOT: vleif
-+; CHECK: br %r14
-+  %ret = insertelement <4 x float> %val, float 1.0, i32 1
-+  ret <4 x float> %ret
-+}
-+
-+; Test v2f64 insertion of 0 into the first element.
-+define <2 x double> @f30(<2 x double> %val) {
-+; CHECK-LABEL: f30:
-+; CHECK: vleig %v24, 0, 0
-+; CHECK: br %r14
-+  %ret = insertelement <2 x double> %val, double 0.0, i32 0
-+  ret <2 x double> %ret
-+}
-+
-+; Test v2f64 insertion of 0 into the last element.
-+define <2 x double> @f31(<2 x double> %val) {
-+; CHECK-LABEL: f31:
-+; CHECK: vleig %v24, 0, 1
-+; CHECK: br %r14
-+  %ret = insertelement <2 x double> %val, double 0.0, i32 1
-+  ret <2 x double> %ret
-+}
-+
-+; Test v2f64 insertion of a nonzero value.
-+define <2 x double> @f32(<2 x double> %val) {
-+; CHECK-LABEL: f32:
-+; CHECK-NOT: vleig
-+; CHECK: br %r14
-+  %ret = insertelement <2 x double> %val, double 1.0, i32 1
-+  ret <2 x double> %ret
-+}
-Index: llvm-36/test/CodeGen/SystemZ/vec-move-10.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-move-10.ll
-@@ -0,0 +1,499 @@
-+; Test vector extraction to memory.
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-+
-+; Test v16i8 extraction from the first element.
-+define void @f1(<16 x i8> %val, i8 *%ptr) {
-+; CHECK-LABEL: f1:
-+; CHECK: vsteb %v24, 0(%r2), 0
-+; CHECK: br %r14
-+  %element = extractelement <16 x i8> %val, i32 0
-+  store i8 %element, i8 *%ptr
-+  ret void
-+}
-+
-+; Test v16i8 extraction from the last element.
-+define void @f2(<16 x i8> %val, i8 *%ptr) {
-+; CHECK-LABEL: f2:
-+; CHECK: vsteb %v24, 0(%r2), 15
-+; CHECK: br %r14
-+  %element = extractelement <16 x i8> %val, i32 15
-+  store i8 %element, i8 *%ptr
-+  ret void
-+}
-+
-+; Test v16i8 extraction of an invalid element.  This must compile,
-+; but we don't care what it does.
-+define void @f3(<16 x i8> %val, i8 *%ptr) {
-+; CHECK-LABEL: f3:
-+; CHECK-NOT: vsteb %v24, 0(%r2), 16
-+; CHECK: br %r14
-+  %element = extractelement <16 x i8> %val, i32 16
-+  store i8 %element, i8 *%ptr
-+  ret void
-+}
-+
-+; Test v16i8 extraction with the highest in-range offset.
-+define void @f4(<16 x i8> %val, i8 *%base) {
-+; CHECK-LABEL: f4:
-+; CHECK: vsteb %v24, 4095(%r2), 10
-+; CHECK: br %r14
-+  %ptr = getelementptr i8 *%base, i32 4095
-+  %element = extractelement <16 x i8> %val, i32 10
-+  store i8 %element, i8 *%ptr
-+  ret void
-+}
-+
-+; Test v16i8 extraction with the first ouf-of-range offset.
-+define void @f5(<16 x i8> %val, i8 *%base) {
-+; CHECK-LABEL: f5:
-+; CHECK: aghi %r2, 4096
-+; CHECK: vsteb %v24, 0(%r2), 5
-+; CHECK: br %r14
-+  %ptr = getelementptr i8 *%base, i32 4096
-+  %element = extractelement <16 x i8> %val, i32 5
-+  store i8 %element, i8 *%ptr
-+  ret void
-+}
-+
-+; Test v16i8 extraction from a variable element.
-+define void @f6(<16 x i8> %val, i8 *%ptr, i32 %index) {
-+; CHECK-LABEL: f6:
-+; CHECK-NOT: vsteb
-+; CHECK: br %r14
-+  %element = extractelement <16 x i8> %val, i32 %index
-+  store i8 %element, i8 *%ptr
-+  ret void
-+}
-+
-+; Test v8i16 extraction from the first element.
-+define void @f7(<8 x i16> %val, i16 *%ptr) {
-+; CHECK-LABEL: f7:
-+; CHECK: vsteh %v24, 0(%r2), 0
-+; CHECK: br %r14
-+  %element = extractelement <8 x i16> %val, i32 0
-+  store i16 %element, i16 *%ptr
-+  ret void
-+}
-+
-+; Test v8i16 extraction from the last element.
-+define void @f8(<8 x i16> %val, i16 *%ptr) {
-+; CHECK-LABEL: f8:
-+; CHECK: vsteh %v24, 0(%r2), 7
-+; CHECK: br %r14
-+  %element = extractelement <8 x i16> %val, i32 7
-+  store i16 %element, i16 *%ptr
-+  ret void
-+}
-+
-+; Test v8i16 extraction of an invalid element.  This must compile,
-+; but we don't care what it does.
-+define void @f9(<8 x i16> %val, i16 *%ptr) {
-+; CHECK-LABEL: f9:
-+; CHECK-NOT: vsteh %v24, 0(%r2), 8
-+; CHECK: br %r14
-+  %element = extractelement <8 x i16> %val, i32 8
-+  store i16 %element, i16 *%ptr
-+  ret void
-+}
-+
-+; Test v8i16 extraction with the highest in-range offset.
-+define void @f10(<8 x i16> %val, i16 *%base) {
-+; CHECK-LABEL: f10:
-+; CHECK: vsteh %v24, 4094(%r2), 5
-+; CHECK: br %r14
-+  %ptr = getelementptr i16 *%base, i32 2047
-+  %element = extractelement <8 x i16> %val, i32 5
-+  store i16 %element, i16 *%ptr
-+  ret void
-+}
-+
-+; Test v8i16 extraction with the first ouf-of-range offset.
-+define void @f11(<8 x i16> %val, i16 *%base) {
-+; CHECK-LABEL: f11:
-+; CHECK: aghi %r2, 4096
-+; CHECK: vsteh %v24, 0(%r2), 1
-+; CHECK: br %r14
-+  %ptr = getelementptr i16 *%base, i32 2048
-+  %element = extractelement <8 x i16> %val, i32 1
-+  store i16 %element, i16 *%ptr
-+  ret void
-+}
-+
-+; Test v8i16 extraction from a variable element.
-+define void @f12(<8 x i16> %val, i16 *%ptr, i32 %index) {
-+; CHECK-LABEL: f12:
-+; CHECK-NOT: vsteh
-+; CHECK: br %r14
-+  %element = extractelement <8 x i16> %val, i32 %index
-+  store i16 %element, i16 *%ptr
-+  ret void
-+}
-+
-+; Test v4i32 extraction from the first element.
-+define void @f13(<4 x i32> %val, i32 *%ptr) {
-+; CHECK-LABEL: f13:
-+; CHECK: vstef %v24, 0(%r2), 0
-+; CHECK: br %r14
-+  %element = extractelement <4 x i32> %val, i32 0
-+  store i32 %element, i32 *%ptr
-+  ret void
-+}
-+
-+; Test v4i32 extraction from the last element.
-+define void @f14(<4 x i32> %val, i32 *%ptr) {
-+; CHECK-LABEL: f14:
-+; CHECK: vstef %v24, 0(%r2), 3
-+; CHECK: br %r14
-+  %element = extractelement <4 x i32> %val, i32 3
-+  store i32 %element, i32 *%ptr
-+  ret void
-+}
-+
-+; Test v4i32 extraction of an invalid element.  This must compile,
-+; but we don't care what it does.
-+define void @f15(<4 x i32> %val, i32 *%ptr) {
-+; CHECK-LABEL: f15:
-+; CHECK-NOT: vstef %v24, 0(%r2), 4
-+; CHECK: br %r14
-+  %element = extractelement <4 x i32> %val, i32 4
-+  store i32 %element, i32 *%ptr
-+  ret void
-+}
-+
-+; Test v4i32 extraction with the highest in-range offset.
-+define void @f16(<4 x i32> %val, i32 *%base) {
-+; CHECK-LABEL: f16:
-+; CHECK: vstef %v24, 4092(%r2), 2
-+; CHECK: br %r14
-+  %ptr = getelementptr i32 *%base, i32 1023
-+  %element = extractelement <4 x i32> %val, i32 2
-+  store i32 %element, i32 *%ptr
-+  ret void
-+}
-+
-+; Test v4i32 extraction with the first ouf-of-range offset.
-+define void @f17(<4 x i32> %val, i32 *%base) {
-+; CHECK-LABEL: f17:
-+; CHECK: aghi %r2, 4096
-+; CHECK: vstef %v24, 0(%r2), 1
-+; CHECK: br %r14
-+  %ptr = getelementptr i32 *%base, i32 1024
-+  %element = extractelement <4 x i32> %val, i32 1
-+  store i32 %element, i32 *%ptr
-+  ret void
-+}
-+
-+; Test v4i32 extraction from a variable element.
-+define void @f18(<4 x i32> %val, i32 *%ptr, i32 %index) {
-+; CHECK-LABEL: f18:
-+; CHECK-NOT: vstef
-+; CHECK: br %r14
-+  %element = extractelement <4 x i32> %val, i32 %index
-+  store i32 %element, i32 *%ptr
-+  ret void
-+}
-+
-+; Test v2i64 extraction from the first element.
-+define void @f19(<2 x i64> %val, i64 *%ptr) {
-+; CHECK-LABEL: f19:
-+; CHECK: vsteg %v24, 0(%r2), 0
-+; CHECK: br %r14
-+  %element = extractelement <2 x i64> %val, i32 0
-+  store i64 %element, i64 *%ptr
-+  ret void
-+}
-+
-+; Test v2i64 extraction from the last element.
-+define void @f20(<2 x i64> %val, i64 *%ptr) {
-+; CHECK-LABEL: f20:
-+; CHECK: vsteg %v24, 0(%r2), 1
-+; CHECK: br %r14
-+  %element = extractelement <2 x i64> %val, i32 1
-+  store i64 %element, i64 *%ptr
-+  ret void
-+}
-+
-+; Test v2i64 extraction of an invalid element.  This must compile,
-+; but we don't care what it does.
-+define void @f21(<2 x i64> %val, i64 *%ptr) {
-+; CHECK-LABEL: f21:
-+; CHECK-NOT: vsteg %v24, 0(%r2), 2
-+; CHECK: br %r14
-+  %element = extractelement <2 x i64> %val, i32 2
-+  store i64 %element, i64 *%ptr
-+  ret void
-+}
-+
-+; Test v2i64 extraction with the highest in-range offset.
-+define void @f22(<2 x i64> %val, i64 *%base) {
-+; CHECK-LABEL: f22:
-+; CHECK: vsteg %v24, 4088(%r2), 1
-+; CHECK: br %r14
-+  %ptr = getelementptr i64 *%base, i32 511
-+  %element = extractelement <2 x i64> %val, i32 1
-+  store i64 %element, i64 *%ptr
-+  ret void
-+}
-+
-+; Test v2i64 extraction with the first ouf-of-range offset.
-+define void @f23(<2 x i64> %val, i64 *%base) {
-+; CHECK-LABEL: f23:
-+; CHECK: aghi %r2, 4096
-+; CHECK: vsteg %v24, 0(%r2), 0
-+; CHECK: br %r14
-+  %ptr = getelementptr i64 *%base, i32 512
-+  %element = extractelement <2 x i64> %val, i32 0
-+  store i64 %element, i64 *%ptr
-+  ret void
-+}
-+
-+; Test v2i64 extraction from a variable element.
-+define void @f24(<2 x i64> %val, i64 *%ptr, i32 %index) {
-+; CHECK-LABEL: f24:
-+; CHECK-NOT: vsteg
-+; CHECK: br %r14
-+  %element = extractelement <2 x i64> %val, i32 %index
-+  store i64 %element, i64 *%ptr
-+  ret void
-+}
-+
-+; Test v4f32 extraction from the first element.
-+define void @f25(<4 x float> %val, float *%ptr) {
-+; CHECK-LABEL: f25:
-+; CHECK: vstef %v24, 0(%r2), 0
-+; CHECK: br %r14
-+  %element = extractelement <4 x float> %val, i32 0
-+  store float %element, float *%ptr
-+  ret void
-+}
-+
-+; Test v4f32 extraction from the last element.
-+define void @f26(<4 x float> %val, float *%ptr) {
-+; CHECK-LABEL: f26:
-+; CHECK: vstef %v24, 0(%r2), 3
-+; CHECK: br %r14
-+  %element = extractelement <4 x float> %val, i32 3
-+  store float %element, float *%ptr
-+  ret void
-+}
-+
-+; Test v4f32 extraction of an invalid element.  This must compile,
-+; but we don't care what it does.
-+define void @f27(<4 x float> %val, float *%ptr) {
-+; CHECK-LABEL: f27:
-+; CHECK-NOT: vstef %v24, 0(%r2), 4
-+; CHECK: br %r14
-+  %element = extractelement <4 x float> %val, i32 4
-+  store float %element, float *%ptr
-+  ret void
-+}
-+
-+; Test v4f32 extraction with the highest in-range offset.
-+define void @f28(<4 x float> %val, float *%base) {
-+; CHECK-LABEL: f28:
-+; CHECK: vstef %v24, 4092(%r2), 2
-+; CHECK: br %r14
-+  %ptr = getelementptr float *%base, i32 1023
-+  %element = extractelement <4 x float> %val, i32 2
-+  store float %element, float *%ptr
-+  ret void
-+}
-+
-+; Test v4f32 extraction with the first ouf-of-range offset.
-+define void @f29(<4 x float> %val, float *%base) {
-+; CHECK-LABEL: f29:
-+; CHECK: aghi %r2, 4096
-+; CHECK: vstef %v24, 0(%r2), 1
-+; CHECK: br %r14
-+  %ptr = getelementptr float *%base, i32 1024
-+  %element = extractelement <4 x float> %val, i32 1
-+  store float %element, float *%ptr
-+  ret void
-+}
-+
-+; Test v4f32 extraction from a variable element.
-+define void @f30(<4 x float> %val, float *%ptr, i32 %index) {
-+; CHECK-LABEL: f30:
-+; CHECK-NOT: vstef
-+; CHECK: br %r14
-+  %element = extractelement <4 x float> %val, i32 %index
-+  store float %element, float *%ptr
-+  ret void
-+}
-+
-+; Test v2f64 extraction from the first element.
-+define void @f32(<2 x double> %val, double *%ptr) {
-+; CHECK-LABEL: f32:
-+; CHECK: vsteg %v24, 0(%r2), 0
-+; CHECK: br %r14
-+  %element = extractelement <2 x double> %val, i32 0
-+  store double %element, double *%ptr
-+  ret void
-+}
-+
-+; Test v2f64 extraction from the last element.
-+define void @f33(<2 x double> %val, double *%ptr) {
-+; CHECK-LABEL: f33:
-+; CHECK: vsteg %v24, 0(%r2), 1
-+; CHECK: br %r14
-+  %element = extractelement <2 x double> %val, i32 1
-+  store double %element, double *%ptr
-+  ret void
-+}
-+
-+; Test v2f64 extraction with the highest in-range offset.
-+define void @f34(<2 x double> %val, double *%base) {
-+; CHECK-LABEL: f34:
-+; CHECK: vsteg %v24, 4088(%r2), 1
-+; CHECK: br %r14
-+  %ptr = getelementptr double *%base, i32 511
-+  %element = extractelement <2 x double> %val, i32 1
-+  store double %element, double *%ptr
-+  ret void
-+}
-+
-+; Test v2f64 extraction with the first ouf-of-range offset.
-+define void @f35(<2 x double> %val, double *%base) {
-+; CHECK-LABEL: f35:
-+; CHECK: aghi %r2, 4096
-+; CHECK: vsteg %v24, 0(%r2), 0
-+; CHECK: br %r14
-+  %ptr = getelementptr double *%base, i32 512
-+  %element = extractelement <2 x double> %val, i32 0
-+  store double %element, double *%ptr
-+  ret void
-+}
-+
-+; Test v2f64 extraction from a variable element.
-+define void @f36(<2 x double> %val, double *%ptr, i32 %index) {
-+; CHECK-LABEL: f36:
-+; CHECK-NOT: vsteg
-+; CHECK: br %r14
-+  %element = extractelement <2 x double> %val, i32 %index
-+  store double %element, double *%ptr
-+  ret void
-+}
-+
-+; Test a v4i32 scatter of the first element.
-+define void @f37(<4 x i32> %val, <4 x i32> %index, i64 %base) {
-+; CHECK-LABEL: f37:
-+; CHECK: vscef %v24, 0(%v26,%r2), 0
-+; CHECK: br %r14
-+  %elem = extractelement <4 x i32> %index, i32 0
-+  %ext = zext i32 %elem to i64
-+  %add = add i64 %base, %ext
-+  %ptr = inttoptr i64 %add to i32 *
-+  %element = extractelement <4 x i32> %val, i32 0
-+  store i32 %element, i32 *%ptr
-+  ret void
-+}
-+
-+; Test a v4i32 scatter of the last element.
-+define void @f38(<4 x i32> %val, <4 x i32> %index, i64 %base) {
-+; CHECK-LABEL: f38:
-+; CHECK: vscef %v24, 0(%v26,%r2), 3
-+; CHECK: br %r14
-+  %elem = extractelement <4 x i32> %index, i32 3
-+  %ext = zext i32 %elem to i64
-+  %add = add i64 %base, %ext
-+  %ptr = inttoptr i64 %add to i32 *
-+  %element = extractelement <4 x i32> %val, i32 3
-+  store i32 %element, i32 *%ptr
-+  ret void
-+}
-+
-+; Test a v4i32 scatter with the highest in-range offset.
-+define void @f39(<4 x i32> %val, <4 x i32> %index, i64 %base) {
-+; CHECK-LABEL: f39:
-+; CHECK: vscef %v24, 4095(%v26,%r2), 1
-+; CHECK: br %r14
-+  %elem = extractelement <4 x i32> %index, i32 1
-+  %ext = zext i32 %elem to i64
-+  %add1 = add i64 %base, %ext
-+  %add2 = add i64 %add1, 4095
-+  %ptr = inttoptr i64 %add2 to i32 *
-+  %element = extractelement <4 x i32> %val, i32 1
-+  store i32 %element, i32 *%ptr
-+  ret void
-+}
-+
-+; Test a v2i64 scatter of the first element.
-+define void @f40(<2 x i64> %val, <2 x i64> %index, i64 %base) {
-+; CHECK-LABEL: f40:
-+; CHECK: vsceg %v24, 0(%v26,%r2), 0
-+; CHECK: br %r14
-+  %elem = extractelement <2 x i64> %index, i32 0
-+  %add = add i64 %base, %elem
-+  %ptr = inttoptr i64 %add to i64 *
-+  %element = extractelement <2 x i64> %val, i32 0
-+  store i64 %element, i64 *%ptr
-+  ret void
-+}
-+
-+; Test a v2i64 scatter of the last element.
-+define void @f41(<2 x i64> %val, <2 x i64> %index, i64 %base) {
-+; CHECK-LABEL: f41:
-+; CHECK: vsceg %v24, 0(%v26,%r2), 1
-+; CHECK: br %r14
-+  %elem = extractelement <2 x i64> %index, i32 1
-+  %add = add i64 %base, %elem
-+  %ptr = inttoptr i64 %add to i64 *
-+  %element = extractelement <2 x i64> %val, i32 1
-+  store i64 %element, i64 *%ptr
-+  ret void
-+}
-+
-+; Test a v4f32 scatter of the first element.
-+define void @f42(<4 x float> %val, <4 x i32> %index, i64 %base) {
-+; CHECK-LABEL: f42:
-+; CHECK: vscef %v24, 0(%v26,%r2), 0
-+; CHECK: br %r14
-+  %elem = extractelement <4 x i32> %index, i32 0
-+  %ext = zext i32 %elem to i64
-+  %add = add i64 %base, %ext
-+  %ptr = inttoptr i64 %add to float *
-+  %element = extractelement <4 x float> %val, i32 0
-+  store float %element, float *%ptr
-+  ret void
-+}
-+
-+; Test a v4f32 scatter of the last element.
-+define void @f43(<4 x float> %val, <4 x i32> %index, i64 %base) {
-+; CHECK-LABEL: f43:
-+; CHECK: vscef %v24, 0(%v26,%r2), 3
-+; CHECK: br %r14
-+  %elem = extractelement <4 x i32> %index, i32 3
-+  %ext = zext i32 %elem to i64
-+  %add = add i64 %base, %ext
-+  %ptr = inttoptr i64 %add to float *
-+  %element = extractelement <4 x float> %val, i32 3
-+  store float %element, float *%ptr
-+  ret void
-+}
-+
-+; Test a v2f64 scatter of the first element.
-+define void @f44(<2 x double> %val, <2 x i64> %index, i64 %base) {
-+; CHECK-LABEL: f44:
-+; CHECK: vsceg %v24, 0(%v26,%r2), 0
-+; CHECK: br %r14
-+  %elem = extractelement <2 x i64> %index, i32 0
-+  %add = add i64 %base, %elem
-+  %ptr = inttoptr i64 %add to double *
-+  %element = extractelement <2 x double> %val, i32 0
-+  store double %element, double *%ptr
-+  ret void
-+}
-+
-+; Test a v2f64 scatter of the last element.
-+define void @f45(<2 x double> %val, <2 x i64> %index, i64 %base) {
-+; CHECK-LABEL: f45:
-+; CHECK: vsceg %v24, 0(%v26,%r2), 1
-+; CHECK: br %r14
-+  %elem = extractelement <2 x i64> %index, i32 1
-+  %add = add i64 %base, %elem
-+  %ptr = inttoptr i64 %add to double *
-+  %element = extractelement <2 x double> %val, i32 1
-+  store double %element, double *%ptr
-+  ret void
-+}
-Index: llvm-36/test/CodeGen/SystemZ/vec-move-11.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-move-11.ll
-@@ -0,0 +1,111 @@
-+; Test insertions of register values into a nonzero index of an undef.
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-+
-+; Test v16i8 insertion into an undef, with an arbitrary index.
-+define <16 x i8> @f1(i8 %val) {
-+; CHECK-LABEL: f1:
-+; CHECK: vlvgb %v24, %r2, 12
-+; CHECK-NEXT: br %r14
-+  %ret = insertelement <16 x i8> undef, i8 %val, i32 12
-+  ret <16 x i8> %ret
-+}
-+
-+; Test v16i8 insertion into an undef, with the first good index for VLVGP.
-+define <16 x i8> @f2(i8 %val) {
-+; CHECK-LABEL: f2:
-+; CHECK: vlvgp %v24, %r2, %r2
-+; CHECK-NEXT: br %r14
-+  %ret = insertelement <16 x i8> undef, i8 %val, i32 7
-+  ret <16 x i8> %ret
-+}
-+
-+; Test v16i8 insertion into an undef, with the second good index for VLVGP.
-+define <16 x i8> @f3(i8 %val) {
-+; CHECK-LABEL: f3:
-+; CHECK: vlvgp %v24, %r2, %r2
-+; CHECK-NEXT: br %r14
-+  %ret = insertelement <16 x i8> undef, i8 %val, i32 15
-+  ret <16 x i8> %ret
-+}
-+
-+; Test v8i16 insertion into an undef, with an arbitrary index.
-+define <8 x i16> @f4(i16 %val) {
-+; CHECK-LABEL: f4:
-+; CHECK: vlvgh %v24, %r2, 5
-+; CHECK-NEXT: br %r14
-+  %ret = insertelement <8 x i16> undef, i16 %val, i32 5
-+  ret <8 x i16> %ret
-+}
-+
-+; Test v8i16 insertion into an undef, with the first good index for VLVGP.
-+define <8 x i16> @f5(i16 %val) {
-+; CHECK-LABEL: f5:
-+; CHECK: vlvgp %v24, %r2, %r2
-+; CHECK-NEXT: br %r14
-+  %ret = insertelement <8 x i16> undef, i16 %val, i32 3
-+  ret <8 x i16> %ret
-+}
-+
-+; Test v8i16 insertion into an undef, with the second good index for VLVGP.
-+define <8 x i16> @f6(i16 %val) {
-+; CHECK-LABEL: f6:
-+; CHECK: vlvgp %v24, %r2, %r2
-+; CHECK-NEXT: br %r14
-+  %ret = insertelement <8 x i16> undef, i16 %val, i32 7
-+  ret <8 x i16> %ret
-+}
-+
-+; Test v4i32 insertion into an undef, with an arbitrary index.
-+define <4 x i32> @f7(i32 %val) {
-+; CHECK-LABEL: f7:
-+; CHECK: vlvgf %v24, %r2, 2
-+; CHECK-NEXT: br %r14
-+  %ret = insertelement <4 x i32> undef, i32 %val, i32 2
-+  ret <4 x i32> %ret
-+}
-+
-+; Test v4i32 insertion into an undef, with the first good index for VLVGP.
-+define <4 x i32> @f8(i32 %val) {
-+; CHECK-LABEL: f8:
-+; CHECK: vlvgp %v24, %r2, %r2
-+; CHECK-NEXT: br %r14
-+  %ret = insertelement <4 x i32> undef, i32 %val, i32 1
-+  ret <4 x i32> %ret
-+}
-+
-+; Test v4i32 insertion into an undef, with the second good index for VLVGP.
-+define <4 x i32> @f9(i32 %val) {
-+; CHECK-LABEL: f9:
-+; CHECK: vlvgp %v24, %r2, %r2
-+; CHECK-NEXT: br %r14
-+  %ret = insertelement <4 x i32> undef, i32 %val, i32 3
-+  ret <4 x i32> %ret
-+}
-+
-+; Test v2i64 insertion into an undef.
-+define <2 x i64> @f10(i64 %val) {
-+; CHECK-LABEL: f10:
-+; CHECK: vlvgp %v24, %r2, %r2
-+; CHECK-NEXT: br %r14
-+  %ret = insertelement <2 x i64> undef, i64 %val, i32 1
-+  ret <2 x i64> %ret
-+}
-+
-+; Test v4f32 insertion into an undef.
-+define <4 x float> @f11(float %val) {
-+; CHECK-LABEL: f11:
-+; CHECK: vrepf %v24, %v0, 0
-+; CHECK: br %r14
-+  %ret = insertelement <4 x float> undef, float %val, i32 2
-+  ret <4 x float> %ret
-+}
-+
-+; Test v2f64 insertion into an undef.
-+define <2 x double> @f12(double %val) {
-+; CHECK-LABEL: f12:
-+; CHECK: vrepg %v24, %v0, 0
-+; CHECK: br %r14
-+  %ret = insertelement <2 x double> undef, double %val, i32 1
-+  ret <2 x double> %ret
-+}
-Index: llvm-36/test/CodeGen/SystemZ/vec-move-12.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-move-12.ll
-@@ -0,0 +1,123 @@
-+; Test insertions of memory values into a nonzero index of an undef.
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-+
-+; Test v16i8 insertion into an undef, with an arbitrary index.
-+define <16 x i8> @f1(i8 *%ptr) {
-+; CHECK-LABEL: f1:
-+; CHECK: vlrepb %v24, 0(%r2)
-+; CHECK-NEXT: br %r14
-+  %val = load i8 *%ptr
-+  %ret = insertelement <16 x i8> undef, i8 %val, i32 12
-+  ret <16 x i8> %ret
-+}
-+
-+; Test v16i8 insertion into an undef, with the first good index for VLVGP.
-+define <16 x i8> @f2(i8 *%ptr) {
-+; CHECK-LABEL: f2:
-+; CHECK: {{vlrepb|vllezb}} %v24, 0(%r2)
-+; CHECK-NEXT: br %r14
-+  %val = load i8 *%ptr
-+  %ret = insertelement <16 x i8> undef, i8 %val, i32 7
-+  ret <16 x i8> %ret
-+}
-+
-+; Test v16i8 insertion into an undef, with the second good index for VLVGP.
-+define <16 x i8> @f3(i8 *%ptr) {
-+; CHECK-LABEL: f3:
-+; CHECK: vlrepb %v24, 0(%r2)
-+; CHECK-NEXT: br %r14
-+  %val = load i8 *%ptr
-+  %ret = insertelement <16 x i8> undef, i8 %val, i32 15
-+  ret <16 x i8> %ret
-+}
-+
-+; Test v8i16 insertion into an undef, with an arbitrary index.
-+define <8 x i16> @f4(i16 *%ptr) {
-+; CHECK-LABEL: f4:
-+; CHECK: vlreph %v24, 0(%r2)
-+; CHECK-NEXT: br %r14
-+  %val = load i16 *%ptr
-+  %ret = insertelement <8 x i16> undef, i16 %val, i32 5
-+  ret <8 x i16> %ret
-+}
-+
-+; Test v8i16 insertion into an undef, with the first good index for VLVGP.
-+define <8 x i16> @f5(i16 *%ptr) {
-+; CHECK-LABEL: f5:
-+; CHECK: {{vlreph|vllezh}} %v24, 0(%r2)
-+; CHECK-NEXT: br %r14
-+  %val = load i16 *%ptr
-+  %ret = insertelement <8 x i16> undef, i16 %val, i32 3
-+  ret <8 x i16> %ret
-+}
-+
-+; Test v8i16 insertion into an undef, with the second good index for VLVGP.
-+define <8 x i16> @f6(i16 *%ptr) {
-+; CHECK-LABEL: f6:
-+; CHECK: vlreph %v24, 0(%r2)
-+; CHECK-NEXT: br %r14
-+  %val = load i16 *%ptr
-+  %ret = insertelement <8 x i16> undef, i16 %val, i32 7
-+  ret <8 x i16> %ret
-+}
-+
-+; Test v4i32 insertion into an undef, with an arbitrary index.
-+define <4 x i32> @f7(i32 *%ptr) {
-+; CHECK-LABEL: f7:
-+; CHECK: vlrepf %v24, 0(%r2)
-+; CHECK-NEXT: br %r14
-+  %val = load i32 *%ptr
-+  %ret = insertelement <4 x i32> undef, i32 %val, i32 2
-+  ret <4 x i32> %ret
-+}
-+
-+; Test v4i32 insertion into an undef, with the first good index for VLVGP.
-+define <4 x i32> @f8(i32 *%ptr) {
-+; CHECK-LABEL: f8:
-+; CHECK: {{vlrepf|vllezf}} %v24, 0(%r2)
-+; CHECK-NEXT: br %r14
-+  %val = load i32 *%ptr
-+  %ret = insertelement <4 x i32> undef, i32 %val, i32 1
-+  ret <4 x i32> %ret
-+}
-+
-+; Test v4i32 insertion into an undef, with the second good index for VLVGP.
-+define <4 x i32> @f9(i32 *%ptr) {
-+; CHECK-LABEL: f9:
-+; CHECK: vlrepf %v24, 0(%r2)
-+; CHECK-NEXT: br %r14
-+  %val = load i32 *%ptr
-+  %ret = insertelement <4 x i32> undef, i32 %val, i32 3
-+  ret <4 x i32> %ret
-+}
-+
-+; Test v2i64 insertion into an undef.
-+define <2 x i64> @f10(i64 *%ptr) {
-+; CHECK-LABEL: f10:
-+; CHECK: vlrepg %v24, 0(%r2)
-+; CHECK-NEXT: br %r14
-+  %val = load i64 *%ptr
-+  %ret = insertelement <2 x i64> undef, i64 %val, i32 1
-+  ret <2 x i64> %ret
-+}
-+
-+; Test v4f32 insertion into an undef.
-+define <4 x float> @f11(float *%ptr) {
-+; CHECK-LABEL: f11:
-+; CHECK: vlrepf %v24, 0(%r2)
-+; CHECK: br %r14
-+  %val = load float *%ptr
-+  %ret = insertelement <4 x float> undef, float %val, i32 2
-+  ret <4 x float> %ret
-+}
-+
-+; Test v2f64 insertion into an undef.
-+define <2 x double> @f12(double *%ptr) {
-+; CHECK-LABEL: f12:
-+; CHECK: vlrepg %v24, 0(%r2)
-+; CHECK: br %r14
-+  %val = load double *%ptr
-+  %ret = insertelement <2 x double> undef, double %val, i32 1
-+  ret <2 x double> %ret
-+}
-Index: llvm-36/test/CodeGen/SystemZ/vec-move-13.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-move-13.ll
-@@ -0,0 +1,69 @@
-+; Test insertions of register values into 0.
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-+
-+; Test v16i8 insertion into 0.
-+define <16 x i8> @f1(i8 %val1, i8 %val2) {
-+; CHECK-LABEL: f1:
-+; CHECK: vgbm %v24, 0
-+; CHECK-DAG: vlvgb %v24, %r2, 2
-+; CHECK-DAG: vlvgb %v24, %r3, 12
-+; CHECK: br %r14
-+  %vec1 = insertelement <16 x i8> zeroinitializer, i8 %val1, i32 2
-+  %vec2 = insertelement <16 x i8> %vec1, i8 %val2, i32 12
-+  ret <16 x i8> %vec2
-+}
-+
-+; Test v8i16 insertion into 0.
-+define <8 x i16> @f2(i16 %val1, i16 %val2) {
-+; CHECK-LABEL: f2:
-+; CHECK: vgbm %v24, 0
-+; CHECK-DAG: vlvgh %v24, %r2, 3
-+; CHECK-DAG: vlvgh %v24, %r3, 5
-+; CHECK: br %r14
-+  %vec1 = insertelement <8 x i16> zeroinitializer, i16 %val1, i32 3
-+  %vec2 = insertelement <8 x i16> %vec1, i16 %val2, i32 5
-+  ret <8 x i16> %vec2
-+}
-+
-+; Test v4i32 insertion into 0.
-+define <4 x i32> @f3(i32 %val) {
-+; CHECK-LABEL: f3:
-+; CHECK: vgbm %v24, 0
-+; CHECK: vlvgf %v24, %r2, 3
-+; CHECK: br %r14
-+  %ret = insertelement <4 x i32> zeroinitializer, i32 %val, i32 3
-+  ret <4 x i32> %ret
-+}
-+
-+; Test v2i64 insertion into 0.
-+define <2 x i64> @f4(i64 %val) {
-+; CHECK-LABEL: f4:
-+; CHECK: lghi [[REG:%r[0-5]]], 0
-+; CHECK: vlvgp %v24, [[REG]], %r2
-+; CHECK: br %r14
-+  %ret = insertelement <2 x i64> zeroinitializer, i64 %val, i32 1
-+  ret <2 x i64> %ret
-+}
-+
-+; Test v4f32 insertion into 0.
-+define <4 x float> @f5(float %val) {
-+; CHECK-LABEL: f5:
-+; CHECK-DAG: vuplhf [[REG:%v[0-9]+]], %v0
-+; CHECK-DAG: vgbm [[ZERO:%v[0-9]+]], 0
-+; CHECK: vmrhg %v24, [[ZERO]], [[REG]]
-+; CHECK: br %r14
-+  %ret = insertelement <4 x float> zeroinitializer, float %val, i32 3
-+  ret <4 x float> %ret
-+}
-+
-+; Test v2f64 insertion into 0.
-+define <2 x double> @f6(double %val) {
-+; CHECK-LABEL: f6:
-+; CHECK: vgbm [[REG:%v[0-9]+]], 0
-+; CHECK: vmrhg %v24, [[REG]], %v0
-+; CHECK: br %r14
-+  %ret = insertelement <2 x double> zeroinitializer, double %val, i32 1
-+  ret <2 x double> %ret
-+}
-+
-Index: llvm-36/test/CodeGen/SystemZ/vec-move-14.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-move-14.ll
-@@ -0,0 +1,96 @@
-+; Test insertions of memory values into 0.
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-+
-+; Test VLLEZB.
-+define <16 x i8> @f1(i8 *%ptr) {
-+; CHECK-LABEL: f1:
-+; CHECK: vllezb %v24, 0(%r2)
-+; CHECK: br %r14
-+  %val = load i8 *%ptr
-+  %ret = insertelement <16 x i8> zeroinitializer, i8 %val, i32 7
-+  ret <16 x i8> %ret
-+}
-+
-+; Test VLLEZB with the highest in-range offset.
-+define <16 x i8> @f2(i8 *%base) {
-+; CHECK-LABEL: f2:
-+; CHECK: vllezb %v24, 4095(%r2)
-+; CHECK: br %r14
-+  %ptr = getelementptr i8 *%base, i64 4095
-+  %val = load i8 *%ptr
-+  %ret = insertelement <16 x i8> zeroinitializer, i8 %val, i32 7
-+  ret <16 x i8> %ret
-+}
-+
-+; Test VLLEZB with the next highest offset.
-+define <16 x i8> @f3(i8 *%base) {
-+; CHECK-LABEL: f3:
-+; CHECK-NOT: vllezb %v24, 4096(%r2)
-+; CHECK: br %r14
-+  %ptr = getelementptr i8 *%base, i64 4096
-+  %val = load i8 *%ptr
-+  %ret = insertelement <16 x i8> zeroinitializer, i8 %val, i32 7
-+  ret <16 x i8> %ret
-+}
-+
-+; Test that VLLEZB allows an index.
-+define <16 x i8> @f4(i8 *%base, i64 %index) {
-+; CHECK-LABEL: f4:
-+; CHECK: vllezb %v24, 0({{%r2,%r3|%r3,%r2}})
-+; CHECK: br %r14
-+  %ptr = getelementptr i8 *%base, i64 %index
-+  %val = load i8 *%ptr
-+  %ret = insertelement <16 x i8> zeroinitializer, i8 %val, i32 7
-+  ret <16 x i8> %ret
-+}
-+
-+; Test VLLEZH.
-+define <8 x i16> @f5(i16 *%ptr) {
-+; CHECK-LABEL: f5:
-+; CHECK: vllezh %v24, 0(%r2)
-+; CHECK: br %r14
-+  %val = load i16 *%ptr
-+  %ret = insertelement <8 x i16> zeroinitializer, i16 %val, i32 3
-+  ret <8 x i16> %ret
-+}
-+
-+; Test VLLEZF.
-+define <4 x i32> @f6(i32 *%ptr) {
-+; CHECK-LABEL: f6:
-+; CHECK: vllezf %v24, 0(%r2)
-+; CHECK: br %r14
-+  %val = load i32 *%ptr
-+  %ret = insertelement <4 x i32> zeroinitializer, i32 %val, i32 1
-+  ret <4 x i32> %ret
-+}
-+
-+; Test VLLEZG.
-+define <2 x i64> @f7(i64 *%ptr) {
-+; CHECK-LABEL: f7:
-+; CHECK: vllezg %v24, 0(%r2)
-+; CHECK: br %r14
-+  %val = load i64 *%ptr
-+  %ret = insertelement <2 x i64> zeroinitializer, i64 %val, i32 0
-+  ret <2 x i64> %ret
-+}
-+
-+; Test VLLEZF with a float.
-+define <4 x float> @f8(float *%ptr) {
-+; CHECK-LABEL: f8:
-+; CHECK: vllezf %v24, 0(%r2)
-+; CHECK: br %r14
-+  %val = load float *%ptr
-+  %ret = insertelement <4 x float> zeroinitializer, float %val, i32 1
-+  ret <4 x float> %ret
-+}
-+
-+; Test VLLEZG with a double.
-+define <2 x double> @f9(double *%ptr) {
-+; CHECK-LABEL: f9:
-+; CHECK: vllezg %v24, 0(%r2)
-+; CHECK: br %r14
-+  %val = load double *%ptr
-+  %ret = insertelement <2 x double> zeroinitializer, double %val, i32 0
-+  ret <2 x double> %ret
-+}
-Index: llvm-36/test/CodeGen/SystemZ/vec-move-15.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-move-15.ll
-@@ -0,0 +1,105 @@
-+; Test vector sign-extending loads.
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-+
-+; Test a v16i1->v16i8 extension.
-+define <16 x i8> @f1(<16 x i1> *%ptr) {
-+; No expected output, but must compile.
-+  %val = load <16 x i1> *%ptr
-+  %ret = sext <16 x i1> %val to <16 x i8>
-+  ret <16 x i8> %ret
-+}
-+
-+; Test a v8i1->v8i16 extension.
-+define <8 x i16> @f2(<8 x i1> *%ptr) {
-+; No expected output, but must compile.
-+  %val = load <8 x i1> *%ptr
-+  %ret = sext <8 x i1> %val to <8 x i16>
-+  ret <8 x i16> %ret
-+}
-+
-+; Test a v8i8->v8i16 extension.
-+define <8 x i16> @f3(<8 x i8> *%ptr) {
-+; CHECK-LABEL: f3:
-+; CHECK: vlrepg [[REG1:%v[0-9]+]], 0(%r2)
-+; CHECK: vuphb %v24, [[REG1]]
-+; CHECK: br %r14
-+  %val = load <8 x i8> *%ptr
-+  %ret = sext <8 x i8> %val to <8 x i16>
-+  ret <8 x i16> %ret
-+}
-+
-+; Test a v4i1->v4i32 extension.
-+define <4 x i32> @f4(<4 x i1> *%ptr) {
-+; No expected output, but must compile.
-+  %val = load <4 x i1> *%ptr
-+  %ret = sext <4 x i1> %val to <4 x i32>
-+  ret <4 x i32> %ret
-+}
-+
-+; Test a v4i8->v4i32 extension.
-+define <4 x i32> @f5(<4 x i8> *%ptr) {
-+; CHECK-LABEL: f5:
-+; CHECK: vlrepf [[REG1:%v[0-9]+]], 0(%r2)
-+; CHECK: vuphb [[REG2:%v[0-9]+]], [[REG1]]
-+; CHECK: vuphh %v24, [[REG2]]
-+; CHECK: br %r14
-+  %val = load <4 x i8> *%ptr
-+  %ret = sext <4 x i8> %val to <4 x i32>
-+  ret <4 x i32> %ret
-+}
-+
-+; Test a v4i16->v4i32 extension.
-+define <4 x i32> @f6(<4 x i16> *%ptr) {
-+; CHECK-LABEL: f6:
-+; CHECK: vlrepg [[REG1:%v[0-9]+]], 0(%r2)
-+; CHECK: vuphh %v24, [[REG1]]
-+; CHECK: br %r14
-+  %val = load <4 x i16> *%ptr
-+  %ret = sext <4 x i16> %val to <4 x i32>
-+  ret <4 x i32> %ret
-+}
-+
-+; Test a v2i1->v2i64 extension.
-+define <2 x i64> @f7(<2 x i1> *%ptr) {
-+; No expected output, but must compile.
-+  %val = load <2 x i1> *%ptr
-+  %ret = sext <2 x i1> %val to <2 x i64>
-+  ret <2 x i64> %ret
-+}
-+
-+; Test a v2i8->v2i64 extension.
-+define <2 x i64> @f8(<2 x i8> *%ptr) {
-+; CHECK-LABEL: f8:
-+; CHECK: vlreph [[REG1:%v[0-9]+]], 0(%r2)
-+; CHECK: vuphb [[REG2:%v[0-9]+]], [[REG1]]
-+; CHECK: vuphh [[REG3:%v[0-9]+]], [[REG2]]
-+; CHECK: vuphf %v24, [[REG3]]
-+; CHECK: br %r14
-+  %val = load <2 x i8> *%ptr
-+  %ret = sext <2 x i8> %val to <2 x i64>
-+  ret <2 x i64> %ret
-+}
-+
-+; Test a v2i16->v2i64 extension.
-+define <2 x i64> @f9(<2 x i16> *%ptr) {
-+; CHECK-LABEL: f9:
-+; CHECK: vlrepf [[REG1:%v[0-9]+]], 0(%r2)
-+; CHECK: vuphh [[REG2:%v[0-9]+]], [[REG1]]
-+; CHECK: vuphf %v24, [[REG2]]
-+; CHECK: br %r14
-+  %val = load <2 x i16> *%ptr
-+  %ret = sext <2 x i16> %val to <2 x i64>
-+  ret <2 x i64> %ret
-+}
-+
-+; Test a v2i32->v2i64 extension.
-+define <2 x i64> @f10(<2 x i32> *%ptr) {
-+; CHECK-LABEL: f10:
-+; CHECK: vlrepg [[REG1:%v[0-9]+]], 0(%r2)
-+; CHECK: vuphf %v24, [[REG1]]
-+; CHECK: br %r14
-+  %val = load <2 x i32> *%ptr
-+  %ret = sext <2 x i32> %val to <2 x i64>
-+  ret <2 x i64> %ret
-+}
-Index: llvm-36/test/CodeGen/SystemZ/vec-move-16.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-move-16.ll
-@@ -0,0 +1,105 @@
-+; Test vector zero-extending loads.
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-+
-+; Test a v16i1->v16i8 extension.
-+define <16 x i8> @f1(<16 x i1> *%ptr) {
-+; No expected output, but must compile.
-+  %val = load <16 x i1> *%ptr
-+  %ret = zext <16 x i1> %val to <16 x i8>
-+  ret <16 x i8> %ret
-+}
-+
-+; Test a v8i1->v8i16 extension.
-+define <8 x i16> @f2(<8 x i1> *%ptr) {
-+; No expected output, but must compile.
-+  %val = load <8 x i1> *%ptr
-+  %ret = zext <8 x i1> %val to <8 x i16>
-+  ret <8 x i16> %ret
-+}
-+
-+; Test a v8i8->v8i16 extension.
-+define <8 x i16> @f3(<8 x i8> *%ptr) {
-+; CHECK-LABEL: f3:
-+; CHECK: vlrepg [[REG1:%v[0-9]+]], 0(%r2)
-+; CHECK: vuplhb %v24, [[REG1]]
-+; CHECK: br %r14
-+  %val = load <8 x i8> *%ptr
-+  %ret = zext <8 x i8> %val to <8 x i16>
-+  ret <8 x i16> %ret
-+}
-+
-+; Test a v4i1->v4i32 extension.
-+define <4 x i32> @f4(<4 x i1> *%ptr) {
-+; No expected output, but must compile.
-+  %val = load <4 x i1> *%ptr
-+  %ret = zext <4 x i1> %val to <4 x i32>
-+  ret <4 x i32> %ret
-+}
-+
-+; Test a v4i8->v4i32 extension.
-+define <4 x i32> @f5(<4 x i8> *%ptr) {
-+; CHECK-LABEL: f5:
-+; CHECK: vlrepf [[REG1:%v[0-9]+]], 0(%r2)
-+; CHECK: vuplhb [[REG2:%v[0-9]+]], [[REG1]]
-+; CHECK: vuplhh %v24, [[REG2]]
-+; CHECK: br %r14
-+  %val = load <4 x i8> *%ptr
-+  %ret = zext <4 x i8> %val to <4 x i32>
-+  ret <4 x i32> %ret
-+}
-+
-+; Test a v4i16->v4i32 extension.
-+define <4 x i32> @f6(<4 x i16> *%ptr) {
-+; CHECK-LABEL: f6:
-+; CHECK: vlrepg [[REG1:%v[0-9]+]], 0(%r2)
-+; CHECK: vuplhh %v24, [[REG1]]
-+; CHECK: br %r14
-+  %val = load <4 x i16> *%ptr
-+  %ret = zext <4 x i16> %val to <4 x i32>
-+  ret <4 x i32> %ret
-+}
-+
-+; Test a v2i1->v2i64 extension.
-+define <2 x i64> @f7(<2 x i1> *%ptr) {
-+; No expected output, but must compile.
-+  %val = load <2 x i1> *%ptr
-+  %ret = zext <2 x i1> %val to <2 x i64>
-+  ret <2 x i64> %ret
-+}
-+
-+; Test a v2i8->v2i64 extension.
-+define <2 x i64> @f8(<2 x i8> *%ptr) {
-+; CHECK-LABEL: f8:
-+; CHECK: vlreph [[REG1:%v[0-9]+]], 0(%r2)
-+; CHECK: vuplhb [[REG2:%v[0-9]+]], [[REG1]]
-+; CHECK: vuplhh [[REG3:%v[0-9]+]], [[REG2]]
-+; CHECK: vuplhf %v24, [[REG3]]
-+; CHECK: br %r14
-+  %val = load <2 x i8> *%ptr
-+  %ret = zext <2 x i8> %val to <2 x i64>
-+  ret <2 x i64> %ret
-+}
-+
-+; Test a v2i16->v2i64 extension.
-+define <2 x i64> @f9(<2 x i16> *%ptr) {
-+; CHECK-LABEL: f9:
-+; CHECK: vlrepf [[REG1:%v[0-9]+]], 0(%r2)
-+; CHECK: vuplhh [[REG2:%v[0-9]+]], [[REG1]]
-+; CHECK: vuplhf %v24, [[REG2]]
-+; CHECK: br %r14
-+  %val = load <2 x i16> *%ptr
-+  %ret = zext <2 x i16> %val to <2 x i64>
-+  ret <2 x i64> %ret
-+}
-+
-+; Test a v2i32->v2i64 extension.
-+define <2 x i64> @f10(<2 x i32> *%ptr) {
-+; CHECK-LABEL: f10:
-+; CHECK: vlrepg [[REG1:%v[0-9]+]], 0(%r2)
-+; CHECK: vuplhf %v24, [[REG1]]
-+; CHECK: br %r14
-+  %val = load <2 x i32> *%ptr
-+  %ret = zext <2 x i32> %val to <2 x i64>
-+  ret <2 x i64> %ret
-+}
-Index: llvm-36/test/CodeGen/SystemZ/vec-move-17.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-move-17.ll
-@@ -0,0 +1,104 @@
-+; Test vector truncating stores.
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-+
-+; Test a v16i8->v16i1 truncation.
-+define void @f1(<16 x i8> %val, <16 x i1> *%ptr) {
-+; No expected output, but must compile.
-+  %trunc = trunc <16 x i8> %val to <16 x i1>
-+  store <16 x i1> %trunc, <16 x i1> *%ptr
-+  ret void
-+}
-+
-+; Test a v8i16->v8i1 truncation.
-+define void @f2(<8 x i16> %val, <8 x i1> *%ptr) {
-+; No expected output, but must compile.
-+  %trunc = trunc <8 x i16> %val to <8 x i1>
-+  store <8 x i1> %trunc, <8 x i1> *%ptr
-+  ret void
-+}
-+
-+; Test a v8i16->v8i8 truncation.
-+define void @f3(<8 x i16> %val, <8 x i8> *%ptr) {
-+; CHECK-LABEL: f3:
-+; CHECK: vpkh [[REG1:%v[0-9]+]], %v24, %v24
-+; CHECK: vsteg [[REG1]], 0(%r2)
-+; CHECK: br %r14
-+  %trunc = trunc <8 x i16> %val to <8 x i8>
-+  store <8 x i8> %trunc, <8 x i8> *%ptr
-+  ret void
-+}
-+
-+; Test a v4i32->v4i1 truncation.
-+define void @f4(<4 x i32> %val, <4 x i1> *%ptr) {
-+; No expected output, but must compile.
-+  %trunc = trunc <4 x i32> %val to <4 x i1>
-+  store <4 x i1> %trunc, <4 x i1> *%ptr
-+  ret void
-+}
-+
-+; Test a v4i32->v4i8 truncation.  At the moment we use a VPERM rather than
-+; a chain of packs.
-+define void @f5(<4 x i32> %val, <4 x i8> *%ptr) {
-+; CHECK-LABEL: f5:
-+; CHECK: vperm [[REG:%v[0-9]+]],
-+; CHECK: vstef [[REG]], 0(%r2)
-+; CHECK: br %r14
-+  %trunc = trunc <4 x i32> %val to <4 x i8>
-+  store <4 x i8> %trunc, <4 x i8> *%ptr
-+  ret void
-+}
-+
-+; Test a v4i32->v4i16 truncation.
-+define void @f6(<4 x i32> %val, <4 x i16> *%ptr) {
-+; CHECK-LABEL: f6:
-+; CHECK: vpkf [[REG1:%v[0-9]+]], %v24, %v24
-+; CHECK: vsteg [[REG1]], 0(%r2)
-+; CHECK: br %r14
-+  %trunc = trunc <4 x i32> %val to <4 x i16>
-+  store <4 x i16> %trunc, <4 x i16> *%ptr
-+  ret void
-+}
-+
-+; Test a v2i64->v2i1 truncation.
-+define void @f7(<2 x i64> %val, <2 x i1> *%ptr) {
-+; No expected output, but must compile.
-+  %trunc = trunc <2 x i64> %val to <2 x i1>
-+  store <2 x i1> %trunc, <2 x i1> *%ptr
-+  ret void
-+}
-+
-+; Test a v2i64->v2i8 truncation.  At the moment we use a VPERM rather than
-+; a chain of packs.
-+define void @f8(<2 x i64> %val, <2 x i8> *%ptr) {
-+; CHECK-LABEL: f8:
-+; CHECK: vperm [[REG:%v[0-9]+]],
-+; CHECK: vsteh [[REG]], 0(%r2)
-+; CHECK: br %r14
-+  %trunc = trunc <2 x i64> %val to <2 x i8>
-+  store <2 x i8> %trunc, <2 x i8> *%ptr
-+  ret void
-+}
-+
-+; Test a v2i64->v2i16 truncation.  At the moment we use a VPERM rather than
-+; a chain of packs.
-+define void @f9(<2 x i64> %val, <2 x i16> *%ptr) {
-+; CHECK-LABEL: f9:
-+; CHECK: vperm [[REG:%v[0-9]+]],
-+; CHECK: vstef [[REG]], 0(%r2)
-+; CHECK: br %r14
-+  %trunc = trunc <2 x i64> %val to <2 x i16>
-+  store <2 x i16> %trunc, <2 x i16> *%ptr
-+  ret void
-+}
-+
-+; Test a v2i64->v2i32 truncation.
-+define void @f10(<2 x i64> %val, <2 x i32> *%ptr) {
-+; CHECK-LABEL: f10:
-+; CHECK: vpkg [[REG1:%v[0-9]+]], %v24, %v24
-+; CHECK: vsteg [[REG1]], 0(%r2)
-+; CHECK: br %r14
-+  %trunc = trunc <2 x i64> %val to <2 x i32>
-+  store <2 x i32> %trunc, <2 x i32> *%ptr
-+  ret void
-+}
-Index: llvm-36/test/CodeGen/SystemZ/vec-mul-01.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-mul-01.ll
-@@ -0,0 +1,60 @@
-+; Test vector multiplication.
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-+
-+; Test a v16i8 multiplication.
-+define <16 x i8> @f1(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) {
-+; CHECK-LABEL: f1:
-+; CHECK: vmlb %v24, %v26, %v28
-+; CHECK: br %r14
-+  %ret = mul <16 x i8> %val1, %val2
-+  ret <16 x i8> %ret
-+}
-+
-+; Test a v8i16 multiplication.
-+define <8 x i16> @f2(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) {
-+; CHECK-LABEL: f2:
-+; CHECK: vmlhw %v24, %v26, %v28
-+; CHECK: br %r14
-+  %ret = mul <8 x i16> %val1, %val2
-+  ret <8 x i16> %ret
-+}
-+
-+; Test a v4i32 multiplication.
-+define <4 x i32> @f3(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) {
-+; CHECK-LABEL: f3:
-+; CHECK: vmlf %v24, %v26, %v28
-+; CHECK: br %r14
-+  %ret = mul <4 x i32> %val1, %val2
-+  ret <4 x i32> %ret
-+}
-+
-+; Test a v2i64 multiplication.  There's no vector equivalent.
-+define <2 x i64> @f4(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) {
-+; CHECK-LABEL: f4:
-+; CHECK-NOT: vmlg
-+; CHECK: br %r14
-+  %ret = mul <2 x i64> %val1, %val2
-+  ret <2 x i64> %ret
-+}
-+
-+; Test a v2f64 multiplication.
-+define <2 x double> @f5(<2 x double> %dummy, <2 x double> %val1,
-+                        <2 x double> %val2) {
-+; CHECK-LABEL: f5:
-+; CHECK: vfmdb %v24, %v26, %v28
-+; CHECK: br %r14
-+  %ret = fmul <2 x double> %val1, %val2
-+  ret <2 x double> %ret
-+}
-+
-+; Test an f64 multiplication that uses vector registers.
-+define double @f6(<2 x double> %val1, <2 x double> %val2) {
-+; CHECK-LABEL: f6:
-+; CHECK: wfmdb %f0, %v24, %v26
-+; CHECK: br %r14
-+  %scalar1 = extractelement <2 x double> %val1, i32 0
-+  %scalar2 = extractelement <2 x double> %val2, i32 0
-+  %ret = fmul double %scalar1, %scalar2
-+  ret double %ret
-+}
-Index: llvm-36/test/CodeGen/SystemZ/vec-mul-02.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-mul-02.ll
-@@ -0,0 +1,63 @@
-+; Test vector multiply-and-add.
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-+
-+declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>)
-+
-+; Test a v16i8 multiply-and-add.
-+define <16 x i8> @f1(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2,
-+                     <16 x i8> %val3) {
-+; CHECK-LABEL: f1:
-+; CHECK: vmalb %v24, %v26, %v28, %v30
-+; CHECK: br %r14
-+  %mul = mul <16 x i8> %val1, %val2
-+  %ret = add <16 x i8> %mul, %val3
-+  ret <16 x i8> %ret
-+}
-+
-+; Test a v8i16 multiply-and-add.
-+define <8 x i16> @f2(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2,
-+                     <8 x i16> %val3) {
-+; CHECK-LABEL: f2:
-+; CHECK: vmalhw %v24, %v26, %v28, %v30
-+; CHECK: br %r14
-+  %mul = mul <8 x i16> %val1, %val2
-+  %ret = add <8 x i16> %mul, %val3
-+  ret <8 x i16> %ret
-+}
-+
-+; Test a v4i32 multiply-and-add.
-+define <4 x i32> @f3(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2,
-+                     <4 x i32> %val3) {
-+; CHECK-LABEL: f3:
-+; CHECK: vmalf %v24, %v26, %v28, %v30
-+; CHECK: br %r14
-+  %mul = mul <4 x i32> %val1, %val2
-+  %ret = add <4 x i32> %mul, %val3
-+  ret <4 x i32> %ret
-+}
-+
-+; Test a v2f64 multiply-and-add.
-+define <2 x double> @f4(<2 x double> %dummy, <2 x double> %val1,
-+                        <2 x double> %val2, <2 x double> %val3) {
-+; CHECK-LABEL: f4:
-+; CHECK: vfmadb %v24, %v26, %v28, %v30
-+; CHECK: br %r14
-+  %ret = call <2 x double> @llvm.fma.v2f64 (<2 x double> %val1,
-+                                            <2 x double> %val2,
-+                                            <2 x double> %val3)
-+  ret <2 x double> %ret
-+}
-+
-+; Test a v2f64 multiply-and-subtract.
-+define <2 x double> @f5(<2 x double> %dummy, <2 x double> %val1,
-+                        <2 x double> %val2, <2 x double> %val3) {
-+; CHECK-LABEL: f5:
-+; CHECK: vfmsdb %v24, %v26, %v28, %v30
-+; CHECK: br %r14
-+  %negval3 = fsub <2 x double> <double -0.0, double -0.0>, %val3
-+  %ret = call <2 x double> @llvm.fma.v2f64 (<2 x double> %val1,
-+                                            <2 x double> %val2,
-+                                            <2 x double> %negval3)
-+  ret <2 x double> %ret
-+}
-Index: llvm-36/test/CodeGen/SystemZ/vec-neg-01.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-neg-01.ll
-@@ -0,0 +1,58 @@
-+; Test vector negation.
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-+
-+; Test a v16i8 negation.
-+define <16 x i8> @f1(<16 x i8> %dummy, <16 x i8> %val) {
-+; CHECK-LABEL: f1:
-+; CHECK: vlcb %v24, %v26
-+; CHECK: br %r14
-+  %ret = sub <16 x i8> zeroinitializer, %val
-+  ret <16 x i8> %ret
-+}
-+
-+; Test a v8i16 negation.
-+define <8 x i16> @f2(<8 x i16> %dummy, <8 x i16> %val) {
-+; CHECK-LABEL: f2:
-+; CHECK: vlch %v24, %v26
-+; CHECK: br %r14
-+  %ret = sub <8 x i16> zeroinitializer, %val
-+  ret <8 x i16> %ret
-+}
-+
-+; Test a v4i32 negation.
-+define <4 x i32> @f3(<4 x i32> %dummy, <4 x i32> %val) {
-+; CHECK-LABEL: f3:
-+; CHECK: vlcf %v24, %v26
-+; CHECK: br %r14
-+  %ret = sub <4 x i32> zeroinitializer, %val
-+  ret <4 x i32> %ret
-+}
-+
-+; Test a v2i64 negation.
-+define <2 x i64> @f4(<2 x i64> %dummy, <2 x i64> %val) {
-+; CHECK-LABEL: f4:
-+; CHECK: vlcg %v24, %v26
-+; CHECK: br %r14
-+  %ret = sub <2 x i64> zeroinitializer, %val
-+  ret <2 x i64> %ret
-+}
-+
-+; Test a v2f64 negation.
-+define <2 x double> @f5(<2 x double> %dummy, <2 x double> %val) {
-+; CHECK-LABEL: f5:
-+; CHECK: vflcdb %v24, %v26
-+; CHECK: br %r14
-+  %ret = fsub <2 x double> <double -0.0, double -0.0>, %val
-+  ret <2 x double> %ret
-+}
-+
-+; Test an f64 negation that uses vector registers.
-+define double @f6(<2 x double> %val) {
-+; CHECK-LABEL: f6:
-+; CHECK: wflcdb %f0, %v24
-+; CHECK: br %r14
-+  %scalar = extractelement <2 x double> %val, i32 0
-+  %ret = fsub double -0.0, %scalar
-+  ret double %ret
-+}
-Index: llvm-36/test/CodeGen/SystemZ/vec-or-01.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-or-01.ll
-@@ -0,0 +1,39 @@
-+; Test vector OR.
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-+
-+; Test a v16i8 OR.
-+define <16 x i8> @f1(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) {
-+; CHECK-LABEL: f1:
-+; CHECK: vo %v24, %v26, %v28
-+; CHECK: br %r14
-+  %ret = or <16 x i8> %val1, %val2
-+  ret <16 x i8> %ret
-+}
-+
-+; Test a v8i16 OR.
-+define <8 x i16> @f2(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) {
-+; CHECK-LABEL: f2:
-+; CHECK: vo %v24, %v26, %v28
-+; CHECK: br %r14
-+  %ret = or <8 x i16> %val1, %val2
-+  ret <8 x i16> %ret
-+}
-+
-+; Test a v4i32 OR.
-+define <4 x i32> @f3(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) {
-+; CHECK-LABEL: f3:
-+; CHECK: vo %v24, %v26, %v28
-+; CHECK: br %r14
-+  %ret = or <4 x i32> %val1, %val2
-+  ret <4 x i32> %ret
-+}
-+
-+; Test a v2i64 OR.
-+define <2 x i64> @f4(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) {
-+; CHECK-LABEL: f4:
-+; CHECK: vo %v24, %v26, %v28
-+; CHECK: br %r14
-+  %ret = or <2 x i64> %val1, %val2
-+  ret <2 x i64> %ret
-+}
-Index: llvm-36/test/CodeGen/SystemZ/vec-or-02.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-or-02.ll
-@@ -0,0 +1,107 @@
-+; Test vector (or (and X, Z), (and Y, (not Z))) patterns.
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-+
-+; Test v16i8.
-+define <16 x i8> @f1(<16 x i8> %val1, <16 x i8> %val2, <16 x i8> %val3) {
-+; CHECK-LABEL: f1:
-+; CHECK: vsel %v24, %v24, %v26, %v28
-+; CHECK: br %r14
-+  %not = xor <16 x i8> %val3, <i8 -1, i8 -1, i8 -1, i8 -1,
-+                               i8 -1, i8 -1, i8 -1, i8 -1,
-+                               i8 -1, i8 -1, i8 -1, i8 -1,
-+                               i8 -1, i8 -1, i8 -1, i8 -1>
-+  %and1 = and <16 x i8> %val1, %val3
-+  %and2 = and <16 x i8> %val2, %not
-+  %ret = or <16 x i8> %and1, %and2
-+  ret <16 x i8> %ret
-+}
-+
-+; ...and again with the XOR applied to the other operand of the AND.
-+define <16 x i8> @f2(<16 x i8> %val1, <16 x i8> %val2, <16 x i8> %val3) {
-+; CHECK-LABEL: f2:
-+; CHECK: vsel %v24, %v26, %v24, %v28
-+; CHECK: br %r14
-+  %not = xor <16 x i8> %val3, <i8 -1, i8 -1, i8 -1, i8 -1,
-+                               i8 -1, i8 -1, i8 -1, i8 -1,
-+                               i8 -1, i8 -1, i8 -1, i8 -1,
-+                               i8 -1, i8 -1, i8 -1, i8 -1>
-+  %and1 = and <16 x i8> %val1, %not
-+  %and2 = and <16 x i8> %val2, %val3
-+  %ret = or <16 x i8> %and1, %and2
-+  ret <16 x i8> %ret
-+}
-+
-+; Test v8i16.
-+define <8 x i16> @f3(<8 x i16> %val1, <8 x i16> %val2, <8 x i16> %val3) {
-+; CHECK-LABEL: f3:
-+; CHECK: vsel %v24, %v24, %v26, %v28
-+; CHECK: br %r14
-+  %not = xor <8 x i16> %val3, <i16 -1, i16 -1, i16 -1, i16 -1,
-+                               i16 -1, i16 -1, i16 -1, i16 -1>
-+  %and1 = and <8 x i16> %val1, %val3
-+  %and2 = and <8 x i16> %val2, %not
-+  %ret = or <8 x i16> %and1, %and2
-+  ret <8 x i16> %ret
-+}
-+
-+; ...and again with the XOR applied to the other operand of the AND.
-+define <8 x i16> @f4(<8 x i16> %val1, <8 x i16> %val2, <8 x i16> %val3) {
-+; CHECK-LABEL: f4:
-+; CHECK: vsel %v24, %v26, %v24, %v28
-+; CHECK: br %r14
-+  %not = xor <8 x i16> %val3, <i16 -1, i16 -1, i16 -1, i16 -1,
-+                               i16 -1, i16 -1, i16 -1, i16 -1>
-+  %and1 = and <8 x i16> %val1, %not
-+  %and2 = and <8 x i16> %val2, %val3
-+  %ret = or <8 x i16> %and1, %and2
-+  ret <8 x i16> %ret
-+}
-+
-+; Test v4i32.
-+define <4 x i32> @f5(<4 x i32> %val1, <4 x i32> %val2, <4 x i32> %val3) {
-+; CHECK-LABEL: f5:
-+; CHECK: vsel %v24, %v24, %v26, %v28
-+; CHECK: br %r14
-+  %not = xor <4 x i32> %val3, <i32 -1, i32 -1, i32 -1, i32 -1>
-+  %and1 = and <4 x i32> %val1, %val3
-+  %and2 = and <4 x i32> %val2, %not
-+  %ret = or <4 x i32> %and1, %and2
-+  ret <4 x i32> %ret
-+}
-+
-+; ...and again with the XOR applied to the other operand of the AND.
-+define <4 x i32> @f6(<4 x i32> %val1, <4 x i32> %val2, <4 x i32> %val3) {
-+; CHECK-LABEL: f6:
-+; CHECK: vsel %v24, %v26, %v24, %v28
-+; CHECK: br %r14
-+  %not = xor <4 x i32> %val3, <i32 -1, i32 -1, i32 -1, i32 -1>
-+  %and1 = and <4 x i32> %val1, %not
-+  %and2 = and <4 x i32> %val2, %val3
-+  %ret = or <4 x i32> %and1, %and2
-+  ret <4 x i32> %ret
-+}
-+
-+; Test v2i64.
-+define <2 x i64> @f7(<2 x i64> %val1, <2 x i64> %val2, <2 x i64> %val3) {
-+; CHECK-LABEL: f7:
-+; CHECK: vsel %v24, %v24, %v26, %v28
-+; CHECK: br %r14
-+  %not = xor <2 x i64> %val3, <i64 -1, i64 -1>
-+  %and1 = and <2 x i64> %val1, %val3
-+  %and2 = and <2 x i64> %val2, %not
-+  %ret = or <2 x i64> %and1, %and2
-+  ret <2 x i64> %ret
-+}
-+
-+; ...and again with the XOR applied to the other operand of the AND.
-+define <2 x i64> @f8(<2 x i64> %val1, <2 x i64> %val2, <2 x i64> %val3) {
-+; CHECK-LABEL: f8:
-+; CHECK: vsel %v24, %v26, %v24, %v28
-+; CHECK: br %r14
-+  %not = xor <2 x i64> %val3, <i64 -1, i64 -1>
-+  %and1 = and <2 x i64> %val1, %not
-+  %and2 = and <2 x i64> %val2, %val3
-+  %ret = or <2 x i64> %and1, %and2
-+  ret <2 x i64> %ret
-+}
-Index: llvm-36/test/CodeGen/SystemZ/vec-perm-01.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-perm-01.ll
-@@ -0,0 +1,175 @@
-+; Test vector splat.
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-+
-+; Test v16i8 splat of the first element.
-+define <16 x i8> @f1(<16 x i8> %val) {
-+; CHECK-LABEL: f1:
-+; CHECK: vrepb %v24, %v24, 0
-+; CHECK: br %r14
-+  %ret = shufflevector <16 x i8> %val, <16 x i8> undef,
-+                       <16 x i32> zeroinitializer
-+  ret <16 x i8> %ret
-+}
-+
-+; Test v16i8 splat of the last element.
-+define <16 x i8> @f2(<16 x i8> %val) {
-+; CHECK-LABEL: f2:
-+; CHECK: vrepb %v24, %v24, 15
-+; CHECK: br %r14
-+  %ret = shufflevector <16 x i8> %val, <16 x i8> undef,
-+                       <16 x i32> <i32 15, i32 15, i32 15, i32 15,
-+                                   i32 15, i32 15, i32 15, i32 15,
-+                                   i32 15, i32 15, i32 15, i32 15,
-+                                   i32 15, i32 15, i32 15, i32 15>
-+  ret <16 x i8> %ret
-+}
-+
-+; Test v16i8 splat of an arbitrary element, using the second operand of
-+; the shufflevector.
-+define <16 x i8> @f3(<16 x i8> %val) {
-+; CHECK-LABEL: f3:
-+; CHECK: vrepb %v24, %v24, 4
-+; CHECK: br %r14
-+  %ret = shufflevector <16 x i8> undef, <16 x i8> %val,
-+                       <16 x i32> <i32 20, i32 20, i32 20, i32 20,
-+                                   i32 20, i32 20, i32 20, i32 20,
-+                                   i32 20, i32 20, i32 20, i32 20,
-+                                   i32 20, i32 20, i32 20, i32 20>
-+  ret <16 x i8> %ret
-+}
-+
-+; Test v8i16 splat of the first element.
-+define <8 x i16> @f4(<8 x i16> %val) {
-+; CHECK-LABEL: f4:
-+; CHECK: vreph %v24, %v24, 0
-+; CHECK: br %r14
-+  %ret = shufflevector <8 x i16> %val, <8 x i16> undef,
-+                       <8 x i32> zeroinitializer
-+  ret <8 x i16> %ret
-+}
-+
-+; Test v8i16 splat of the last element.
-+define <8 x i16> @f5(<8 x i16> %val) {
-+; CHECK-LABEL: f5:
-+; CHECK: vreph %v24, %v24, 7
-+; CHECK: br %r14
-+  %ret = shufflevector <8 x i16> %val, <8 x i16> undef,
-+                       <8 x i32> <i32 7, i32 7, i32 7, i32 7,
-+                                  i32 7, i32 7, i32 7, i32 7>
-+  ret <8 x i16> %ret
-+}
-+
-+; Test v8i16 splat of an arbitrary element, using the second operand of
-+; the shufflevector.
-+define <8 x i16> @f6(<8 x i16> %val) {
-+; CHECK-LABEL: f6:
-+; CHECK: vreph %v24, %v24, 2
-+; CHECK: br %r14
-+  %ret = shufflevector <8 x i16> undef, <8 x i16> %val,
-+                       <8 x i32> <i32 10, i32 10, i32 10, i32 10,
-+                                  i32 10, i32 10, i32 10, i32 10>
-+  ret <8 x i16> %ret
-+}
-+
-+; Test v4i32 splat of the first element.
-+define <4 x i32> @f7(<4 x i32> %val) {
-+; CHECK-LABEL: f7:
-+; CHECK: vrepf %v24, %v24, 0
-+; CHECK: br %r14
-+  %ret = shufflevector <4 x i32> %val, <4 x i32> undef,
-+                       <4 x i32> zeroinitializer
-+  ret <4 x i32> %ret
-+}
-+
-+; Test v4i32 splat of the last element.
-+define <4 x i32> @f8(<4 x i32> %val) {
-+; CHECK-LABEL: f8:
-+; CHECK: vrepf %v24, %v24, 3
-+; CHECK: br %r14
-+  %ret = shufflevector <4 x i32> %val, <4 x i32> undef,
-+                       <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-+  ret <4 x i32> %ret
-+}
-+
-+; Test v4i32 splat of an arbitrary element, using the second operand of
-+; the shufflevector.
-+define <4 x i32> @f9(<4 x i32> %val) {
-+; CHECK-LABEL: f9:
-+; CHECK: vrepf %v24, %v24, 1
-+; CHECK: br %r14
-+  %ret = shufflevector <4 x i32> undef, <4 x i32> %val,
-+                       <4 x i32> <i32 5, i32 5, i32 5, i32 5>
-+  ret <4 x i32> %ret
-+}
-+
-+; Test v2i64 splat of the first element.
-+define <2 x i64> @f10(<2 x i64> %val) {
-+; CHECK-LABEL: f10:
-+; CHECK: vrepg %v24, %v24, 0
-+; CHECK: br %r14
-+  %ret = shufflevector <2 x i64> %val, <2 x i64> undef,
-+                       <2 x i32> zeroinitializer
-+  ret <2 x i64> %ret
-+}
-+
-+; Test v2i64 splat of the last element.
-+define <2 x i64> @f11(<2 x i64> %val) {
-+; CHECK-LABEL: f11:
-+; CHECK: vrepg %v24, %v24, 1
-+; CHECK: br %r14
-+  %ret = shufflevector <2 x i64> %val, <2 x i64> undef,
-+                       <2 x i32> <i32 1, i32 1>
-+  ret <2 x i64> %ret
-+}
-+
-+; Test v4f32 splat of the first element.
-+define <4 x float> @f12(<4 x float> %val) {
-+; CHECK-LABEL: f12:
-+; CHECK: vrepf %v24, %v24, 0
-+; CHECK: br %r14
-+  %ret = shufflevector <4 x float> %val, <4 x float> undef,
-+                       <4 x i32> zeroinitializer
-+  ret <4 x float> %ret
-+}
-+
-+; Test v4f32 splat of the last element.
-+define <4 x float> @f13(<4 x float> %val) {
-+; CHECK-LABEL: f13:
-+; CHECK: vrepf %v24, %v24, 3
-+; CHECK: br %r14
-+  %ret = shufflevector <4 x float> %val, <4 x float> undef,
-+                       <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-+  ret <4 x float> %ret
-+}
-+
-+; Test v4f32 splat of an arbitrary element, using the second operand of
-+; the shufflevector.
-+define <4 x float> @f14(<4 x float> %val) {
-+; CHECK-LABEL: f14:
-+; CHECK: vrepf %v24, %v24, 1
-+; CHECK: br %r14
-+  %ret = shufflevector <4 x float> undef, <4 x float> %val,
-+                       <4 x i32> <i32 5, i32 5, i32 5, i32 5>
-+  ret <4 x float> %ret
-+}
-+
-+; Test v2f64 splat of the first element.
-+define <2 x double> @f15(<2 x double> %val) {
-+; CHECK-LABEL: f15:
-+; CHECK: vrepg %v24, %v24, 0
-+; CHECK: br %r14
-+  %ret = shufflevector <2 x double> %val, <2 x double> undef,
-+                       <2 x i32> zeroinitializer
-+  ret <2 x double> %ret
-+}
-+
-+; Test v2f64 splat of the last element.
-+define <2 x double> @f16(<2 x double> %val) {
-+; CHECK-LABEL: f16:
-+; CHECK: vrepg %v24, %v24, 1
-+; CHECK: br %r14
-+  %ret = shufflevector <2 x double> %val, <2 x double> undef,
-+                       <2 x i32> <i32 1, i32 1>
-+  ret <2 x double> %ret
-+}
-Index: llvm-36/test/CodeGen/SystemZ/vec-perm-02.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-perm-02.ll
-@@ -0,0 +1,200 @@
-+; Test replications of a scalar register value, represented as splats.
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-+
-+; Test v16i8 splat of the first element.
-+define <16 x i8> @f1(i8 %scalar) {
-+; CHECK-LABEL: f1:
-+; CHECK: vlvgp [[REG:%v[0-9]+]], %r2, %r2
-+; CHECK: vrepb %v24, [[REG]], 7
-+; CHECK: br %r14
-+  %val = insertelement <16 x i8> undef, i8 %scalar, i32 0
-+  %ret = shufflevector <16 x i8> %val, <16 x i8> undef,
-+                       <16 x i32> zeroinitializer
-+  ret <16 x i8> %ret
-+}
-+
-+; Test v16i8 splat of the last element.
-+define <16 x i8> @f2(i8 %scalar) {
-+; CHECK-LABEL: f2:
-+; CHECK: vlvgp [[REG:%v[0-9]+]], %r2, %r2
-+; CHECK: vrepb %v24, [[REG]], 7
-+; CHECK: br %r14
-+  %val = insertelement <16 x i8> undef, i8 %scalar, i32 15
-+  %ret = shufflevector <16 x i8> %val, <16 x i8> undef,
-+                       <16 x i32> <i32 15, i32 15, i32 15, i32 15,
-+                                   i32 15, i32 15, i32 15, i32 15,
-+                                   i32 15, i32 15, i32 15, i32 15,
-+                                   i32 15, i32 15, i32 15, i32 15>
-+  ret <16 x i8> %ret
-+}
-+
-+; Test v16i8 splat of an arbitrary element, using the second operand of
-+; the shufflevector.
-+define <16 x i8> @f3(i8 %scalar) {
-+; CHECK-LABEL: f3:
-+; CHECK: vlvgp [[REG:%v[0-9]+]], %r2, %r2
-+; CHECK: vrepb %v24, [[REG]], 7
-+; CHECK: br %r14
-+  %val = insertelement <16 x i8> undef, i8 %scalar, i32 4
-+  %ret = shufflevector <16 x i8> undef, <16 x i8> %val,
-+                       <16 x i32> <i32 20, i32 20, i32 20, i32 20,
-+                                   i32 20, i32 20, i32 20, i32 20,
-+                                   i32 20, i32 20, i32 20, i32 20,
-+                                   i32 20, i32 20, i32 20, i32 20>
-+  ret <16 x i8> %ret
-+}
-+
-+; Test v8i16 splat of the first element.
-+define <8 x i16> @f4(i16 %scalar) {
-+; CHECK-LABEL: f4:
-+; CHECK: vlvgp [[REG:%v[0-9]+]], %r2, %r2
-+; CHECK: vreph %v24, [[REG]], 3
-+; CHECK: br %r14
-+  %val = insertelement <8 x i16> undef, i16 %scalar, i32 0
-+  %ret = shufflevector <8 x i16> %val, <8 x i16> undef,
-+                       <8 x i32> zeroinitializer
-+  ret <8 x i16> %ret
-+}
-+
-+; Test v8i16 splat of the last element.
-+define <8 x i16> @f5(i16 %scalar) {
-+; CHECK-LABEL: f5:
-+; CHECK: vlvgp [[REG:%v[0-9]+]], %r2, %r2
-+; CHECK: vreph %v24, [[REG]], 3
-+; CHECK: br %r14
-+  %val = insertelement <8 x i16> undef, i16 %scalar, i32 7
-+  %ret = shufflevector <8 x i16> %val, <8 x i16> undef,
-+                       <8 x i32> <i32 7, i32 7, i32 7, i32 7,
-+                                  i32 7, i32 7, i32 7, i32 7>
-+  ret <8 x i16> %ret
-+}
-+
-+; Test v8i16 splat of an arbitrary element, using the second operand of
-+; the shufflevector.
-+define <8 x i16> @f6(i16 %scalar) {
-+; CHECK-LABEL: f6:
-+; CHECK: vlvgp [[REG:%v[0-9]+]], %r2, %r2
-+; CHECK: vreph %v24, [[REG]], 3
-+; CHECK: br %r14
-+  %val = insertelement <8 x i16> undef, i16 %scalar, i32 2
-+  %ret = shufflevector <8 x i16> undef, <8 x i16> %val,
-+                       <8 x i32> <i32 10, i32 10, i32 10, i32 10,
-+                                  i32 10, i32 10, i32 10, i32 10>
-+  ret <8 x i16> %ret
-+}
-+
-+; Test v4i32 splat of the first element.
-+define <4 x i32> @f7(i32 %scalar) {
-+; CHECK-LABEL: f7:
-+; CHECK: vlvgp [[REG:%v[0-9]+]], %r2, %r2
-+; CHECK: vrepf %v24, [[REG]], 1
-+; CHECK: br %r14
-+  %val = insertelement <4 x i32> undef, i32 %scalar, i32 0
-+  %ret = shufflevector <4 x i32> %val, <4 x i32> undef,
-+                       <4 x i32> zeroinitializer
-+  ret <4 x i32> %ret
-+}
-+
-+; Test v4i32 splat of the last element.
-+define <4 x i32> @f8(i32 %scalar) {
-+; CHECK-LABEL: f8:
-+; CHECK: vlvgp [[REG:%v[0-9]+]], %r2, %r2
-+; CHECK: vrepf %v24, [[REG]], 1
-+; CHECK: br %r14
-+  %val = insertelement <4 x i32> undef, i32 %scalar, i32 3
-+  %ret = shufflevector <4 x i32> %val, <4 x i32> undef,
-+                       <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-+  ret <4 x i32> %ret
-+}
-+
-+; Test v4i32 splat of an arbitrary element, using the second operand of
-+; the shufflevector.
-+define <4 x i32> @f9(i32 %scalar) {
-+; CHECK-LABEL: f9:
-+; CHECK: vlvgp [[REG:%v[0-9]+]], %r2, %r2
-+; CHECK: vrepf %v24, [[REG]], 1
-+; CHECK: br %r14
-+  %val = insertelement <4 x i32> undef, i32 %scalar, i32 1
-+  %ret = shufflevector <4 x i32> undef, <4 x i32> %val,
-+                       <4 x i32> <i32 5, i32 5, i32 5, i32 5>
-+  ret <4 x i32> %ret
-+}
-+
-+; Test v2i64 splat of the first element.
-+define <2 x i64> @f10(i64 %scalar) {
-+; CHECK-LABEL: f10:
-+; CHECK: vlvgp %v24, %r2, %r2
-+; CHECK: br %r14
-+  %val = insertelement <2 x i64> undef, i64 %scalar, i32 0
-+  %ret = shufflevector <2 x i64> %val, <2 x i64> undef,
-+                       <2 x i32> zeroinitializer
-+  ret <2 x i64> %ret
-+}
-+
-+; Test v2i64 splat of the last element.
-+define <2 x i64> @f11(i64 %scalar) {
-+; CHECK-LABEL: f11:
-+; CHECK: vlvgp %v24, %r2, %r2
-+; CHECK: br %r14
-+  %val = insertelement <2 x i64> undef, i64 %scalar, i32 1
-+  %ret = shufflevector <2 x i64> %val, <2 x i64> undef,
-+                       <2 x i32> <i32 1, i32 1>
-+  ret <2 x i64> %ret
-+}
-+
-+; Test v4f32 splat of the first element.
-+define <4 x float> @f12(float %scalar) {
-+; CHECK-LABEL: f12:
-+; CHECK: vrepf %v24, %v0, 0
-+; CHECK: br %r14
-+  %val = insertelement <4 x float> undef, float %scalar, i32 0
-+  %ret = shufflevector <4 x float> %val, <4 x float> undef,
-+                       <4 x i32> zeroinitializer
-+  ret <4 x float> %ret
-+}
-+
-+; Test v4f32 splat of the last element.
-+define <4 x float> @f13(float %scalar) {
-+; CHECK-LABEL: f13:
-+; CHECK: vrepf %v24, %v0, 0
-+; CHECK: br %r14
-+  %val = insertelement <4 x float> undef, float %scalar, i32 3
-+  %ret = shufflevector <4 x float> %val, <4 x float> undef,
-+                       <4 x i32> <i32 3, i32 3, i32 3, i32 3>
-+  ret <4 x float> %ret
-+}
-+
-+; Test v4f32 splat of an arbitrary element, using the second operand of
-+; the shufflevector.
-+define <4 x float> @f14(float %scalar) {
-+; CHECK-LABEL: f14:
-+; CHECK: vrepf %v24, %v0, 0
-+; CHECK: br %r14
-+  %val = insertelement <4 x float> undef, float %scalar, i32 1
-+  %ret = shufflevector <4 x float> undef, <4 x float> %val,
-+                       <4 x i32> <i32 5, i32 5, i32 5, i32 5>
-+  ret <4 x float> %ret
-+}
-+
-+; Test v2f64 splat of the first element.
-+define <2 x double> @f15(double %scalar) {
-+; CHECK-LABEL: f15:
-+; CHECK: vrepg %v24, %v0, 0
-+; CHECK: br %r14
-+  %val = insertelement <2 x double> undef, double %scalar, i32 0
-+  %ret = shufflevector <2 x double> %val, <2 x double> undef,
-+                       <2 x i32> zeroinitializer
-+  ret <2 x double> %ret
-+}
-+
-+; Test v2f64 splat of the last element.
-+define <2 x double> @f16(double %scalar) {
-+; CHECK-LABEL: f16:
-+; CHECK: vrepg %v24, %v0, 0
-+; CHECK: br %r14
-+  %val = insertelement <2 x double> undef, double %scalar, i32 1
-+  %ret = shufflevector <2 x double> %val, <2 x double> undef,
-+                       <2 x i32> <i32 1, i32 1>
-+  ret <2 x double> %ret
-+}
-Index: llvm-36/test/CodeGen/SystemZ/vec-perm-03.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-perm-03.ll
-@@ -0,0 +1,251 @@
-+; Test replications of a scalar memory value, represented as splats.
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-+
-+; Test a v16i8 replicating load with no offset.
-+define <16 x i8> @f1(i8 *%ptr) {
-+; CHECK-LABEL: f1:
-+; CHECK: vlrepb %v24, 0(%r2)
-+; CHECK: br %r14
-+  %scalar = load i8 *%ptr
-+  %val = insertelement <16 x i8> undef, i8 %scalar, i32 0
-+  %ret = shufflevector <16 x i8> %val, <16 x i8> undef,
-+                       <16 x i32> zeroinitializer
-+  ret <16 x i8> %ret
-+}
-+
-+; Test a v16i8 replicating load with the maximum in-range offset.
-+define <16 x i8> @f2(i8 *%base) {
-+; CHECK-LABEL: f2:
-+; CHECK: vlrepb %v24, 4095(%r2)
-+; CHECK: br %r14
-+  %ptr = getelementptr i8 *%base, i64 4095
-+  %scalar = load i8 *%ptr
-+  %val = insertelement <16 x i8> undef, i8 %scalar, i32 0
-+  %ret = shufflevector <16 x i8> %val, <16 x i8> undef,
-+                       <16 x i32> zeroinitializer
-+  ret <16 x i8> %ret
-+}
-+
-+; Test a v16i8 replicating load with the first out-of-range offset.
-+define <16 x i8> @f3(i8 *%base) {
-+; CHECK-LABEL: f3:
-+; CHECK: aghi %r2, 4096
-+; CHECK: vlrepb %v24, 0(%r2)
-+; CHECK: br %r14
-+  %ptr = getelementptr i8 *%base, i64 4096
-+  %scalar = load i8 *%ptr
-+  %val = insertelement <16 x i8> undef, i8 %scalar, i32 0
-+  %ret = shufflevector <16 x i8> %val, <16 x i8> undef,
-+                       <16 x i32> zeroinitializer
-+  ret <16 x i8> %ret
-+}
-+
-+; Test a v8i16 replicating load with no offset.
-+define <8 x i16> @f4(i16 *%ptr) {
-+; CHECK-LABEL: f4:
-+; CHECK: vlreph %v24, 0(%r2)
-+; CHECK: br %r14
-+  %scalar = load i16 *%ptr
-+  %val = insertelement <8 x i16> undef, i16 %scalar, i32 0
-+  %ret = shufflevector <8 x i16> %val, <8 x i16> undef,
-+                       <8 x i32> zeroinitializer
-+  ret <8 x i16> %ret
-+}
-+
-+; Test a v8i16 replicating load with the maximum in-range offset.
-+define <8 x i16> @f5(i16 *%base) {
-+; CHECK-LABEL: f5:
-+; CHECK: vlreph %v24, 4094(%r2)
-+; CHECK: br %r14
-+  %ptr = getelementptr i16 *%base, i64 2047
-+  %scalar = load i16 *%ptr
-+  %val = insertelement <8 x i16> undef, i16 %scalar, i32 0
-+  %ret = shufflevector <8 x i16> %val, <8 x i16> undef,
-+                       <8 x i32> zeroinitializer
-+  ret <8 x i16> %ret
-+}
-+
-+; Test a v8i16 replicating load with the first out-of-range offset.
-+define <8 x i16> @f6(i16 *%base) {
-+; CHECK-LABEL: f6:
-+; CHECK: aghi %r2, 4096
-+; CHECK: vlreph %v24, 0(%r2)
-+; CHECK: br %r14
-+  %ptr = getelementptr i16 *%base, i64 2048
-+  %scalar = load i16 *%ptr
-+  %val = insertelement <8 x i16> undef, i16 %scalar, i32 0
-+  %ret = shufflevector <8 x i16> %val, <8 x i16> undef,
-+                       <8 x i32> zeroinitializer
-+  ret <8 x i16> %ret
-+}
-+
-+; Test a v4i32 replicating load with no offset.
-+define <4 x i32> @f7(i32 *%ptr) {
-+; CHECK-LABEL: f7:
-+; CHECK: vlrepf %v24, 0(%r2)
-+; CHECK: br %r14
-+  %scalar = load i32 *%ptr
-+  %val = insertelement <4 x i32> undef, i32 %scalar, i32 0
-+  %ret = shufflevector <4 x i32> %val, <4 x i32> undef,
-+                       <4 x i32> zeroinitializer
-+  ret <4 x i32> %ret
-+}
-+
-+; Test a v4i32 replicating load with the maximum in-range offset.
-+define <4 x i32> @f8(i32 *%base) {
-+; CHECK-LABEL: f8:
-+; CHECK: vlrepf %v24, 4092(%r2)
-+; CHECK: br %r14
-+  %ptr = getelementptr i32 *%base, i64 1023
-+  %scalar = load i32 *%ptr
-+  %val = insertelement <4 x i32> undef, i32 %scalar, i32 0
-+  %ret = shufflevector <4 x i32> %val, <4 x i32> undef,
-+                       <4 x i32> zeroinitializer
-+  ret <4 x i32> %ret
-+}
-+
-+; Test a v4i32 replicating load with the first out-of-range offset.
-+define <4 x i32> @f9(i32 *%base) {
-+; CHECK-LABEL: f9:
-+; CHECK: aghi %r2, 4096
-+; CHECK: vlrepf %v24, 0(%r2)
-+; CHECK: br %r14
-+  %ptr = getelementptr i32 *%base, i64 1024
-+  %scalar = load i32 *%ptr
-+  %val = insertelement <4 x i32> undef, i32 %scalar, i32 0
-+  %ret = shufflevector <4 x i32> %val, <4 x i32> undef,
-+                       <4 x i32> zeroinitializer
-+  ret <4 x i32> %ret
-+}
-+
-+; Test a v2i64 replicating load with no offset.
-+define <2 x i64> @f10(i64 *%ptr) {
-+; CHECK-LABEL: f10:
-+; CHECK: vlrepg %v24, 0(%r2)
-+; CHECK: br %r14
-+  %scalar = load i64 *%ptr
-+  %val = insertelement <2 x i64> undef, i64 %scalar, i32 0
-+  %ret = shufflevector <2 x i64> %val, <2 x i64> undef,
-+                       <2 x i32> zeroinitializer
-+  ret <2 x i64> %ret
-+}
-+
-+; Test a v2i64 replicating load with the maximum in-range offset.
-+define <2 x i64> @f11(i64 *%base) {
-+; CHECK-LABEL: f11:
-+; CHECK: vlrepg %v24, 4088(%r2)
-+; CHECK: br %r14
-+  %ptr = getelementptr i64 *%base, i32 511
-+  %scalar = load i64 *%ptr
-+  %val = insertelement <2 x i64> undef, i64 %scalar, i32 0
-+  %ret = shufflevector <2 x i64> %val, <2 x i64> undef,
-+                       <2 x i32> zeroinitializer
-+  ret <2 x i64> %ret
-+}
-+
-+; Test a v2i64 replicating load with the first out-of-range offset.
-+define <2 x i64> @f12(i64 *%base) {
-+; CHECK-LABEL: f12:
-+; CHECK: aghi %r2, 4096
-+; CHECK: vlrepg %v24, 0(%r2)
-+; CHECK: br %r14
-+  %ptr = getelementptr i64 *%base, i32 512
-+  %scalar = load i64 *%ptr
-+  %val = insertelement <2 x i64> undef, i64 %scalar, i32 0
-+  %ret = shufflevector <2 x i64> %val, <2 x i64> undef,
-+                       <2 x i32> zeroinitializer
-+  ret <2 x i64> %ret
-+}
-+
-+; Test a v4f32 replicating load with no offset.
-+define <4 x float> @f13(float *%ptr) {
-+; CHECK-LABEL: f13:
-+; CHECK: vlrepf %v24, 0(%r2)
-+; CHECK: br %r14
-+  %scalar = load float *%ptr
-+  %val = insertelement <4 x float> undef, float %scalar, i32 0
-+  %ret = shufflevector <4 x float> %val, <4 x float> undef,
-+                       <4 x i32> zeroinitializer
-+  ret <4 x float> %ret
-+}
-+
-+; Test a v4f32 replicating load with the maximum in-range offset.
-+define <4 x float> @f14(float *%base) {
-+; CHECK-LABEL: f14:
-+; CHECK: vlrepf %v24, 4092(%r2)
-+; CHECK: br %r14
-+  %ptr = getelementptr float *%base, i64 1023
-+  %scalar = load float *%ptr
-+  %val = insertelement <4 x float> undef, float %scalar, i32 0
-+  %ret = shufflevector <4 x float> %val, <4 x float> undef,
-+                       <4 x i32> zeroinitializer
-+  ret <4 x float> %ret
-+}
-+
-+; Test a v4f32 replicating load with the first out-of-range offset.
-+define <4 x float> @f15(float *%base) {
-+; CHECK-LABEL: f15:
-+; CHECK: aghi %r2, 4096
-+; CHECK: vlrepf %v24, 0(%r2)
-+; CHECK: br %r14
-+  %ptr = getelementptr float *%base, i64 1024
-+  %scalar = load float *%ptr
-+  %val = insertelement <4 x float> undef, float %scalar, i32 0
-+  %ret = shufflevector <4 x float> %val, <4 x float> undef,
-+                       <4 x i32> zeroinitializer
-+  ret <4 x float> %ret
-+}
-+
-+; Test a v2f64 replicating load with no offset.
-+define <2 x double> @f16(double *%ptr) {
-+; CHECK-LABEL: f16:
-+; CHECK: vlrepg %v24, 0(%r2)
-+; CHECK: br %r14
-+  %scalar = load double *%ptr
-+  %val = insertelement <2 x double> undef, double %scalar, i32 0
-+  %ret = shufflevector <2 x double> %val, <2 x double> undef,
-+                       <2 x i32> zeroinitializer
-+  ret <2 x double> %ret
-+}
-+
-+; Test a v2f64 replicating load with the maximum in-range offset.
-+define <2 x double> @f17(double *%base) {
-+; CHECK-LABEL: f17:
-+; CHECK: vlrepg %v24, 4088(%r2)
-+; CHECK: br %r14
-+  %ptr = getelementptr double *%base, i32 511
-+  %scalar = load double *%ptr
-+  %val = insertelement <2 x double> undef, double %scalar, i32 0
-+  %ret = shufflevector <2 x double> %val, <2 x double> undef,
-+                       <2 x i32> zeroinitializer
-+  ret <2 x double> %ret
-+}
-+
-+; Test a v2f64 replicating load with the first out-of-range offset.
-+define <2 x double> @f18(double *%base) {
-+; CHECK-LABEL: f18:
-+; CHECK: aghi %r2, 4096
-+; CHECK: vlrepg %v24, 0(%r2)
-+; CHECK: br %r14
-+  %ptr = getelementptr double *%base, i32 512
-+  %scalar = load double *%ptr
-+  %val = insertelement <2 x double> undef, double %scalar, i32 0
-+  %ret = shufflevector <2 x double> %val, <2 x double> undef,
-+                       <2 x i32> zeroinitializer
-+  ret <2 x double> %ret
-+}
-+
-+; Test a v16i8 replicating load with an index.
-+define <16 x i8> @f19(i8 *%base, i64 %index) {
-+; CHECK-LABEL: f19:
-+; CHECK: vlrepb %v24, 1023(%r3,%r2)
-+; CHECK: br %r14
-+  %ptr1 = getelementptr i8 *%base, i64 %index
-+  %ptr = getelementptr i8 *%ptr1, i64 1023
-+  %scalar = load i8 *%ptr
-+  %val = insertelement <16 x i8> undef, i8 %scalar, i32 0
-+  %ret = shufflevector <16 x i8> %val, <16 x i8> undef,
-+                       <16 x i32> zeroinitializer
-+  ret <16 x i8> %ret
-+}
-Index: llvm-36/test/CodeGen/SystemZ/vec-perm-04.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-perm-04.ll
-@@ -0,0 +1,200 @@
-+; Test vector merge high.
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-+
-+; Test a canonical v16i8 merge high.
-+define <16 x i8> @f1(<16 x i8> %val1, <16 x i8> %val2) {
-+; CHECK-LABEL: f1:
-+; CHECK: vmrhb %v24, %v24, %v26
-+; CHECK: br %r14
-+  %ret = shufflevector <16 x i8> %val1, <16 x i8> %val2,
-+                       <16 x i32> <i32 0, i32 16, i32 1, i32 17,
-+                                   i32 2, i32 18, i32 3, i32 19,
-+                                   i32 4, i32 20, i32 5, i32 21,
-+                                   i32 6, i32 22, i32 7, i32 23>
-+  ret <16 x i8> %ret
-+}
-+
-+; Test a reversed v16i8 merge high.
-+define <16 x i8> @f2(<16 x i8> %val1, <16 x i8> %val2) {
-+; CHECK-LABEL: f2:
-+; CHECK: vmrhb %v24, %v26, %v24
-+; CHECK: br %r14
-+  %ret = shufflevector <16 x i8> %val1, <16 x i8> %val2,
-+                       <16 x i32> <i32 16, i32 0, i32 17, i32 1,
-+                                   i32 18, i32 2, i32 19, i32 3,
-+                                   i32 20, i32 4, i32 21, i32 5,
-+                                   i32 22, i32 6, i32 23, i32 7>
-+  ret <16 x i8> %ret
-+}
-+
-+; Test a v16i8 merge high with only the first operand being used.
-+define <16 x i8> @f3(<16 x i8> %val1, <16 x i8> %val2) {
-+; CHECK-LABEL: f3:
-+; CHECK: vmrhb %v24, %v24, %v24
-+; CHECK: br %r14
-+  %ret = shufflevector <16 x i8> %val1, <16 x i8> %val2,
-+                       <16 x i32> <i32 0, i32 0, i32 1, i32 1,
-+                                   i32 2, i32 2, i32 3, i32 3,
-+                                   i32 4, i32 4, i32 5, i32 5,
-+                                   i32 6, i32 6, i32 7, i32 7>
-+  ret <16 x i8> %ret
-+}
-+
-+; Test a v16i8 merge high with only the second operand being used.
-+; This is converted into @f3 by target-independent code.
-+define <16 x i8> @f4(<16 x i8> %val1, <16 x i8> %val2) {
-+; CHECK-LABEL: f4:
-+; CHECK: vmrhb %v24, %v26, %v26
-+; CHECK: br %r14
-+  %ret = shufflevector <16 x i8> %val1, <16 x i8> %val2,
-+                       <16 x i32> <i32 16, i32 16, i32 17, i32 17,
-+                                   i32 18, i32 18, i32 19, i32 19,
-+                                   i32 20, i32 20, i32 21, i32 21,
-+                                   i32 22, i32 22, i32 23, i32 23>
-+  ret <16 x i8> %ret
-+}
-+
-+; Test a v16i8 merge with both operands being the same.  This too is
-+; converted into @f3 by target-independent code.
-+define <16 x i8> @f5(<16 x i8> %val) {
-+; CHECK-LABEL: f5:
-+; CHECK: vmrhb %v24, %v24, %v24
-+; CHECK: br %r14
-+  %ret = shufflevector <16 x i8> %val, <16 x i8> %val,
-+                       <16 x i32> <i32 0, i32 16, i32 17, i32 17,
-+                                   i32 18, i32 2, i32 3, i32 3,
-+                                   i32 20, i32 20, i32 5, i32 5,
-+                                   i32 6, i32 22, i32 23, i32 7>
-+  ret <16 x i8> %ret
-+}
-+
-+; Test a v16i8 merge in which some of the indices are don't care.
-+define <16 x i8> @f6(<16 x i8> %val1, <16 x i8> %val2) {
-+; CHECK-LABEL: f6:
-+; CHECK: vmrhb %v24, %v24, %v26
-+; CHECK: br %r14
-+  %ret = shufflevector <16 x i8> %val1, <16 x i8> %val2,
-+                       <16 x i32> <i32 0, i32 undef, i32 1, i32 17,
-+                                   i32 undef, i32 18, i32 undef, i32 undef,
-+                                   i32 undef, i32 20, i32 5, i32 21,
-+                                   i32 undef, i32 22, i32 7, i32 undef>
-+  ret <16 x i8> %ret
-+}
-+
-+; Test a v16i8 merge in which one of the operands is undefined and where
-+; indices for that operand are "don't care".  Target-independent code
-+; converts the indices themselves into "undef"s.
-+define <16 x i8> @f7(<16 x i8> %val) {
-+; CHECK-LABEL: f7:
-+; CHECK: vmrhb %v24, %v24, %v24
-+; CHECK: br %r14
-+  %ret = shufflevector <16 x i8> undef, <16 x i8> %val,
-+                       <16 x i32> <i32 11, i32 16, i32 17, i32 5,
-+                                   i32 18, i32 10, i32 19, i32 19,
-+                                   i32 20, i32 20, i32 21, i32 3,
-+                                   i32 2, i32 22, i32 9, i32 23>
-+  ret <16 x i8> %ret
-+}
-+
-+; Test a canonical v8i16 merge high.
-+define <8 x i16> @f8(<8 x i16> %val1, <8 x i16> %val2) {
-+; CHECK-LABEL: f8:
-+; CHECK: vmrhh %v24, %v24, %v26
-+; CHECK: br %r14
-+  %ret = shufflevector <8 x i16> %val1, <8 x i16> %val2,
-+                       <8 x i32> <i32 0, i32 8, i32 1, i32 9,
-+                                  i32 2, i32 10, i32 3, i32 11>
-+  ret <8 x i16> %ret
-+}
-+
-+; Test a reversed v8i16 merge high.
-+define <8 x i16> @f9(<8 x i16> %val1, <8 x i16> %val2) {
-+; CHECK-LABEL: f9:
-+; CHECK: vmrhh %v24, %v26, %v24
-+; CHECK: br %r14
-+  %ret = shufflevector <8 x i16> %val1, <8 x i16> %val2,
-+                       <8 x i32> <i32 8, i32 0, i32 9, i32 1,
-+                                  i32 10, i32 2, i32 11, i32 3>
-+  ret <8 x i16> %ret
-+}
-+
-+; Test a canonical v4i32 merge high.
-+define <4 x i32> @f10(<4 x i32> %val1, <4 x i32> %val2) {
-+; CHECK-LABEL: f10:
-+; CHECK: vmrhf %v24, %v24, %v26
-+; CHECK: br %r14
-+  %ret = shufflevector <4 x i32> %val1, <4 x i32> %val2,
-+                       <4 x i32> <i32 0, i32 4, i32 1, i32 5>
-+  ret <4 x i32> %ret
-+}
-+
-+; Test a reversed v4i32 merge high.
-+define <4 x i32> @f11(<4 x i32> %val1, <4 x i32> %val2) {
-+; CHECK-LABEL: f11:
-+; CHECK: vmrhf %v24, %v26, %v24
-+; CHECK: br %r14
-+  %ret = shufflevector <4 x i32> %val1, <4 x i32> %val2,
-+                       <4 x i32> <i32 4, i32 0, i32 5, i32 1>
-+  ret <4 x i32> %ret
-+}
-+
-+; Test a canonical v2i64 merge high.
-+define <2 x i64> @f12(<2 x i64> %val1, <2 x i64> %val2) {
-+; CHECK-LABEL: f12:
-+; CHECK: vmrhg %v24, %v24, %v26
-+; CHECK: br %r14
-+  %ret = shufflevector <2 x i64> %val1, <2 x i64> %val2,
-+                       <2 x i32> <i32 0, i32 2>
-+  ret <2 x i64> %ret
-+}
-+
-+; Test a reversed v2i64 merge high.
-+define <2 x i64> @f13(<2 x i64> %val1, <2 x i64> %val2) {
-+; CHECK-LABEL: f13:
-+; CHECK: vmrhg %v24, %v26, %v24
-+; CHECK: br %r14
-+  %ret = shufflevector <2 x i64> %val1, <2 x i64> %val2,
-+                       <2 x i32> <i32 2, i32 0>
-+  ret <2 x i64> %ret
-+}
-+
-+; Test a canonical v4f32 merge high.
-+define <4 x float> @f14(<4 x float> %val1, <4 x float> %val2) {
-+; CHECK-LABEL: f14:
-+; CHECK: vmrhf %v24, %v24, %v26
-+; CHECK: br %r14
-+  %ret = shufflevector <4 x float> %val1, <4 x float> %val2,
-+                       <4 x i32> <i32 0, i32 4, i32 1, i32 5>
-+  ret <4 x float> %ret
-+}
-+
-+; Test a reversed v4f32 merge high.
-+define <4 x float> @f15(<4 x float> %val1, <4 x float> %val2) {
-+; CHECK-LABEL: f15:
-+; CHECK: vmrhf %v24, %v26, %v24
-+; CHECK: br %r14
-+  %ret = shufflevector <4 x float> %val1, <4 x float> %val2,
-+                       <4 x i32> <i32 4, i32 0, i32 5, i32 1>
-+  ret <4 x float> %ret
-+}
-+
-+; Test a canonical v2f64 merge high.
-+define <2 x double> @f16(<2 x double> %val1, <2 x double> %val2) {
-+; CHECK-LABEL: f16:
-+; CHECK: vmrhg %v24, %v24, %v26
-+; CHECK: br %r14
-+  %ret = shufflevector <2 x double> %val1, <2 x double> %val2,
-+                       <2 x i32> <i32 0, i32 2>
-+  ret <2 x double> %ret
-+}
-+
-+; Test a reversed v2f64 merge high.
-+define <2 x double> @f17(<2 x double> %val1, <2 x double> %val2) {
-+; CHECK-LABEL: f17:
-+; CHECK: vmrhg %v24, %v26, %v24
-+; CHECK: br %r14
-+  %ret = shufflevector <2 x double> %val1, <2 x double> %val2,
-+                       <2 x i32> <i32 2, i32 0>
-+  ret <2 x double> %ret
-+}
-Index: llvm-36/test/CodeGen/SystemZ/vec-perm-05.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-perm-05.ll
-@@ -0,0 +1,200 @@
-+; Test vector merge low.
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-+
-+; Test a canonical v16i8 merge low.
-+define <16 x i8> @f1(<16 x i8> %val1, <16 x i8> %val2) {
-+; CHECK-LABEL: f1:
-+; CHECK: vmrlb %v24, %v24, %v26
-+; CHECK: br %r14
-+  %ret = shufflevector <16 x i8> %val1, <16 x i8> %val2,
-+                       <16 x i32> <i32 8, i32 24, i32 9, i32 25,
-+                                   i32 10, i32 26, i32 11, i32 27,
-+                                   i32 12, i32 28, i32 13, i32 29,
-+                                   i32 14, i32 30, i32 15, i32 31>
-+  ret <16 x i8> %ret
-+}
-+
-+; Test a reversed v16i8 merge low.
-+define <16 x i8> @f2(<16 x i8> %val1, <16 x i8> %val2) {
-+; CHECK-LABEL: f2:
-+; CHECK: vmrlb %v24, %v26, %v24
-+; CHECK: br %r14
-+  %ret = shufflevector <16 x i8> %val1, <16 x i8> %val2,
-+                       <16 x i32> <i32 24, i32 8, i32 25, i32 9,
-+                                   i32 26, i32 10, i32 27, i32 11,
-+                                   i32 28, i32 12, i32 29, i32 13,
-+                                   i32 30, i32 14, i32 31, i32 15>
-+  ret <16 x i8> %ret
-+}
-+
-+; Test a v16i8 merge low with only the first operand being used.
-+define <16 x i8> @f3(<16 x i8> %val1, <16 x i8> %val2) {
-+; CHECK-LABEL: f3:
-+; CHECK: vmrlb %v24, %v24, %v24
-+; CHECK: br %r14
-+  %ret = shufflevector <16 x i8> %val1, <16 x i8> %val2,
-+                       <16 x i32> <i32 8, i32 8, i32 9, i32 9,
-+                                   i32 10, i32 10, i32 11, i32 11,
-+                                   i32 12, i32 12, i32 13, i32 13,
-+                                   i32 14, i32 14, i32 15, i32 15>
-+  ret <16 x i8> %ret
-+}
-+
-+; Test a v16i8 merge low with only the second operand being used.
-+; This is converted into @f3 by target-independent code.
-+define <16 x i8> @f4(<16 x i8> %val1, <16 x i8> %val2) {
-+; CHECK-LABEL: f4:
-+; CHECK: vmrlb %v24, %v26, %v26
-+; CHECK: br %r14
-+  %ret = shufflevector <16 x i8> %val1, <16 x i8> %val2,
-+                       <16 x i32> <i32 24, i32 24, i32 25, i32 25,
-+                                   i32 26, i32 26, i32 27, i32 27,
-+                                   i32 28, i32 28, i32 29, i32 29,
-+                                   i32 30, i32 30, i32 31, i32 31>
-+  ret <16 x i8> %ret
-+}
-+
-+; Test a v16i8 merge with both operands being the same.  This too is
-+; converted into @f3 by target-independent code.
-+define <16 x i8> @f5(<16 x i8> %val) {
-+; CHECK-LABEL: f5:
-+; CHECK: vmrlb %v24, %v24, %v24
-+; CHECK: br %r14
-+  %ret = shufflevector <16 x i8> %val, <16 x i8> %val,
-+                       <16 x i32> <i32 8, i32 24, i32 25, i32 25,
-+                                   i32 26, i32 10, i32 11, i32 11,
-+                                   i32 28, i32 28, i32 13, i32 13,
-+                                   i32 14, i32 30, i32 31, i32 15>
-+  ret <16 x i8> %ret
-+}
-+
-+; Test a v16i8 merge in which some of the indices are don't care.
-+define <16 x i8> @f6(<16 x i8> %val1, <16 x i8> %val2) {
-+; CHECK-LABEL: f6:
-+; CHECK: vmrlb %v24, %v24, %v26
-+; CHECK: br %r14
-+  %ret = shufflevector <16 x i8> %val1, <16 x i8> %val2,
-+                       <16 x i32> <i32 8, i32 undef, i32 9, i32 25,
-+                                   i32 undef, i32 26, i32 undef, i32 undef,
-+                                   i32 undef, i32 28, i32 13, i32 29,
-+                                   i32 undef, i32 30, i32 15, i32 undef>
-+  ret <16 x i8> %ret
-+}
-+
-+; Test a v16i8 merge in which one of the operands is undefined and where
-+; indices for that operand are "don't care".  Target-independent code
-+; converts the indices themselves into "undef"s.
-+define <16 x i8> @f7(<16 x i8> %val) {
-+; CHECK-LABEL: f7:
-+; CHECK: vmrlb %v24, %v24, %v24
-+; CHECK: br %r14
-+  %ret = shufflevector <16 x i8> undef, <16 x i8> %val,
-+                       <16 x i32> <i32 11, i32 24, i32 25, i32 5,
-+                                   i32 26, i32 10, i32 27, i32 27,
-+                                   i32 28, i32 28, i32 29, i32 3,
-+                                   i32 2, i32 30, i32 9, i32 31>
-+  ret <16 x i8> %ret
-+}
-+
-+; Test a canonical v8i16 merge low.
-+define <8 x i16> @f8(<8 x i16> %val1, <8 x i16> %val2) {
-+; CHECK-LABEL: f8:
-+; CHECK: vmrlh %v24, %v24, %v26
-+; CHECK: br %r14
-+  %ret = shufflevector <8 x i16> %val1, <8 x i16> %val2,
-+                       <8 x i32> <i32 4, i32 12, i32 5, i32 13,
-+                                  i32 6, i32 14, i32 7, i32 15>
-+  ret <8 x i16> %ret
-+}
-+
-+; Test a reversed v8i16 merge low.
-+define <8 x i16> @f9(<8 x i16> %val1, <8 x i16> %val2) {
-+; CHECK-LABEL: f9:
-+; CHECK: vmrlh %v24, %v26, %v24
-+; CHECK: br %r14
-+  %ret = shufflevector <8 x i16> %val1, <8 x i16> %val2,
-+                       <8 x i32> <i32 12, i32 4, i32 13, i32 5,
-+                                  i32 14, i32 6, i32 15, i32 7>
-+  ret <8 x i16> %ret
-+}
-+
-+; Test a canonical v4i32 merge low.
-+define <4 x i32> @f10(<4 x i32> %val1, <4 x i32> %val2) {
-+; CHECK-LABEL: f10:
-+; CHECK: vmrlf %v24, %v24, %v26
-+; CHECK: br %r14
-+  %ret = shufflevector <4 x i32> %val1, <4 x i32> %val2,
-+                       <4 x i32> <i32 2, i32 6, i32 3, i32 7>
-+  ret <4 x i32> %ret
-+}
-+
-+; Test a reversed v4i32 merge low.
-+define <4 x i32> @f11(<4 x i32> %val1, <4 x i32> %val2) {
-+; CHECK-LABEL: f11:
-+; CHECK: vmrlf %v24, %v26, %v24
-+; CHECK: br %r14
-+  %ret = shufflevector <4 x i32> %val1, <4 x i32> %val2,
-+                       <4 x i32> <i32 6, i32 2, i32 7, i32 3>
-+  ret <4 x i32> %ret
-+}
-+
-+; Test a canonical v2i64 merge low.
-+define <2 x i64> @f12(<2 x i64> %val1, <2 x i64> %val2) {
-+; CHECK-LABEL: f12:
-+; CHECK: vmrlg %v24, %v24, %v26
-+; CHECK: br %r14
-+  %ret = shufflevector <2 x i64> %val1, <2 x i64> %val2,
-+                       <2 x i32> <i32 1, i32 3>
-+  ret <2 x i64> %ret
-+}
-+
-+; Test a reversed v2i64 merge low.
-+define <2 x i64> @f13(<2 x i64> %val1, <2 x i64> %val2) {
-+; CHECK-LABEL: f13:
-+; CHECK: vmrlg %v24, %v26, %v24
-+; CHECK: br %r14
-+  %ret = shufflevector <2 x i64> %val1, <2 x i64> %val2,
-+                       <2 x i32> <i32 3, i32 1>
-+  ret <2 x i64> %ret
-+}
-+
-+; Test a canonical v4f32 merge low.
-+define <4 x float> @f14(<4 x float> %val1, <4 x float> %val2) {
-+; CHECK-LABEL: f14:
-+; CHECK: vmrlf %v24, %v24, %v26
-+; CHECK: br %r14
-+  %ret = shufflevector <4 x float> %val1, <4 x float> %val2,
-+                       <4 x i32> <i32 2, i32 6, i32 3, i32 7>
-+  ret <4 x float> %ret
-+}
-+
-+; Test a reversed v4f32 merge low.
-+define <4 x float> @f15(<4 x float> %val1, <4 x float> %val2) {
-+; CHECK-LABEL: f15:
-+; CHECK: vmrlf %v24, %v26, %v24
-+; CHECK: br %r14
-+  %ret = shufflevector <4 x float> %val1, <4 x float> %val2,
-+                       <4 x i32> <i32 6, i32 2, i32 7, i32 3>
-+  ret <4 x float> %ret
-+}
-+
-+; Test a canonical v2f64 merge low.
-+define <2 x double> @f16(<2 x double> %val1, <2 x double> %val2) {
-+; CHECK-LABEL: f16:
-+; CHECK: vmrlg %v24, %v24, %v26
-+; CHECK: br %r14
-+  %ret = shufflevector <2 x double> %val1, <2 x double> %val2,
-+                       <2 x i32> <i32 1, i32 3>
-+  ret <2 x double> %ret
-+}
-+
-+; Test a reversed v2f64 merge low.
-+define <2 x double> @f17(<2 x double> %val1, <2 x double> %val2) {
-+; CHECK-LABEL: f17:
-+; CHECK: vmrlg %v24, %v26, %v24
-+; CHECK: br %r14
-+  %ret = shufflevector <2 x double> %val1, <2 x double> %val2,
-+                       <2 x i32> <i32 3, i32 1>
-+  ret <2 x double> %ret
-+}
-Index: llvm-36/test/CodeGen/SystemZ/vec-perm-06.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-perm-06.ll
-@@ -0,0 +1,160 @@
-+; Test vector pack.
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-+
-+; Test a canonical v16i8 pack.
-+define <16 x i8> @f1(<16 x i8> %val1, <16 x i8> %val2) {
-+; CHECK-LABEL: f1:
-+; CHECK: vpkh %v24, %v24, %v26
-+; CHECK: br %r14
-+  %ret = shufflevector <16 x i8> %val1, <16 x i8> %val2,
-+                       <16 x i32> <i32 1, i32 3, i32 5, i32 7,
-+                                   i32 9, i32 11, i32 13, i32 15,
-+                                   i32 17, i32 19, i32 21, i32 23,
-+                                   i32 25, i32 27, i32 29, i32 31>
-+  ret <16 x i8> %ret
-+}
-+
-+; Test a reversed v16i8 pack.
-+define <16 x i8> @f2(<16 x i8> %val1, <16 x i8> %val2) {
-+; CHECK-LABEL: f2:
-+; CHECK: vpkh %v24, %v26, %v24
-+; CHECK: br %r14
-+  %ret = shufflevector <16 x i8> %val1, <16 x i8> %val2,
-+                       <16 x i32> <i32 17, i32 19, i32 21, i32 23,
-+                                   i32 25, i32 27, i32 29, i32 31,
-+                                   i32 1, i32 3, i32 5, i32 7,
-+                                   i32 9, i32 11, i32 13, i32 15>
-+  ret <16 x i8> %ret
-+}
-+
-+; Test a v16i8 pack with only the first operand being used.
-+define <16 x i8> @f3(<16 x i8> %val1, <16 x i8> %val2) {
-+; CHECK-LABEL: f3:
-+; CHECK: vpkh %v24, %v24, %v24
-+; CHECK: br %r14
-+  %ret = shufflevector <16 x i8> %val1, <16 x i8> %val2,
-+                       <16 x i32> <i32 1, i32 3, i32 5, i32 7,
-+                                   i32 9, i32 11, i32 13, i32 15,
-+                                   i32 1, i32 3, i32 5, i32 7,
-+                                   i32 9, i32 11, i32 13, i32 15>
-+  ret <16 x i8> %ret
-+}
-+
-+; Test a v16i8 pack with only the second operand being used.
-+; This is converted into @f3 by target-independent code.
-+define <16 x i8> @f4(<16 x i8> %val1, <16 x i8> %val2) {
-+; CHECK-LABEL: f4:
-+; CHECK: vpkh %v24, %v26, %v26
-+; CHECK: br %r14
-+  %ret = shufflevector <16 x i8> %val1, <16 x i8> %val2,
-+                       <16 x i32> <i32 17, i32 19, i32 21, i32 23,
-+                                   i32 25, i32 27, i32 29, i32 31,
-+                                   i32 17, i32 19, i32 21, i32 23,
-+                                   i32 25, i32 27, i32 29, i32 31>
-+  ret <16 x i8> %ret
-+}
-+
-+; Test a v16i8 pack with both operands being the same.  This too is
-+; converted into @f3 by target-independent code.
-+define <16 x i8> @f5(<16 x i8> %val) {
-+; CHECK-LABEL: f5:
-+; CHECK: vpkh %v24, %v24, %v24
-+; CHECK: br %r14
-+  %ret = shufflevector <16 x i8> %val, <16 x i8> %val,
-+                       <16 x i32> <i32 1, i32 3, i32 5, i32 7,
-+                                   i32 9, i32 11, i32 13, i32 15,
-+                                   i32 17, i32 19, i32 21, i32 23,
-+                                   i32 25, i32 27, i32 29, i32 31>
-+  ret <16 x i8> %ret
-+}
-+
-+; Test a v16i8 pack in which some of the indices are don't care.
-+define <16 x i8> @f6(<16 x i8> %val1, <16 x i8> %val2) {
-+; CHECK-LABEL: f6:
-+; CHECK: vpkh %v24, %v24, %v26
-+; CHECK: br %r14
-+  %ret = shufflevector <16 x i8> %val1, <16 x i8> %val2,
-+                       <16 x i32> <i32 1, i32 undef, i32 5, i32 7,
-+                                   i32 undef, i32 11, i32 undef, i32 undef,
-+                                   i32 undef, i32 19, i32 21, i32 23,
-+                                   i32 undef, i32 27, i32 29, i32 undef>
-+  ret <16 x i8> %ret
-+}
-+
-+; Test a v16i8 pack in which one of the operands is undefined and where
-+; indices for that operand are "don't care".  Target-independent code
-+; converts the indices themselves into "undef"s.
-+define <16 x i8> @f7(<16 x i8> %val) {
-+; CHECK-LABEL: f7:
-+; CHECK: vpkh %v24, %v24, %v24
-+; CHECK: br %r14
-+  %ret = shufflevector <16 x i8> undef, <16 x i8> %val,
-+                       <16 x i32> <i32 7, i32 1, i32 9, i32 15,
-+                                   i32 15, i32 3, i32 5, i32 1,
-+                                   i32 17, i32 19, i32 21, i32 23,
-+                                   i32 25, i32 27, i32 29, i32 31>
-+  ret <16 x i8> %ret
-+}
-+
-+; Test a canonical v8i16 pack.
-+define <8 x i16> @f8(<8 x i16> %val1, <8 x i16> %val2) {
-+; CHECK-LABEL: f8:
-+; CHECK: vpkf %v24, %v24, %v26
-+; CHECK: br %r14
-+  %ret = shufflevector <8 x i16> %val1, <8 x i16> %val2,
-+                       <8 x i32> <i32 1, i32 3, i32 5, i32 7,
-+                                  i32 9, i32 11, i32 13, i32 15>
-+  ret <8 x i16> %ret
-+}
-+
-+; Test a reversed v8i16 pack.
-+define <8 x i16> @f9(<8 x i16> %val1, <8 x i16> %val2) {
-+; CHECK-LABEL: f9:
-+; CHECK: vpkf %v24, %v26, %v24
-+; CHECK: br %r14
-+  %ret = shufflevector <8 x i16> %val1, <8 x i16> %val2,
-+                       <8 x i32> <i32 9, i32 11, i32 13, i32 15,
-+                                  i32 1, i32 3, i32 5, i32 7>
-+  ret <8 x i16> %ret
-+}
-+
-+; Test a canonical v4i32 pack.
-+define <4 x i32> @f10(<4 x i32> %val1, <4 x i32> %val2) {
-+; CHECK-LABEL: f10:
-+; CHECK: vpkg %v24, %v24, %v26
-+; CHECK: br %r14
-+  %ret = shufflevector <4 x i32> %val1, <4 x i32> %val2,
-+                       <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-+  ret <4 x i32> %ret
-+}
-+
-+; Test a reversed v4i32 pack.
-+define <4 x i32> @f11(<4 x i32> %val1, <4 x i32> %val2) {
-+; CHECK-LABEL: f11:
-+; CHECK: vpkg %v24, %v26, %v24
-+; CHECK: br %r14
-+  %ret = shufflevector <4 x i32> %val1, <4 x i32> %val2,
-+                       <4 x i32> <i32 5, i32 7, i32 1, i32 3>
-+  ret <4 x i32> %ret
-+}
-+
-+; Test a canonical v4f32 pack.
-+define <4 x float> @f12(<4 x float> %val1, <4 x float> %val2) {
-+; CHECK-LABEL: f12:
-+; CHECK: vpkg %v24, %v24, %v26
-+; CHECK: br %r14
-+  %ret = shufflevector <4 x float> %val1, <4 x float> %val2,
-+                       <4 x i32> <i32 1, i32 3, i32 5, i32 7>
-+  ret <4 x float> %ret
-+}
-+
-+; Test a reversed v4f32 pack.
-+define <4 x float> @f13(<4 x float> %val1, <4 x float> %val2) {
-+; CHECK-LABEL: f13:
-+; CHECK: vpkg %v24, %v26, %v24
-+; CHECK: br %r14
-+  %ret = shufflevector <4 x float> %val1, <4 x float> %val2,
-+                       <4 x i32> <i32 5, i32 7, i32 1, i32 3>
-+  ret <4 x float> %ret
-+}
-Index: llvm-36/test/CodeGen/SystemZ/vec-perm-07.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-perm-07.ll
-@@ -0,0 +1,145 @@
-+; Test vector shift left double immediate.
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-+
-+; Test a v16i8 shift with the lowest useful shift amount.
-+define <16 x i8> @f1(<16 x i8> %val1, <16 x i8> %val2) {
-+; CHECK-LABEL: f1:
-+; CHECK: vsldb %v24, %v24, %v26, 1
-+; CHECK: br %r14
-+  %ret = shufflevector <16 x i8> %val1, <16 x i8> %val2,
-+                       <16 x i32> <i32 1, i32 2, i32 3, i32 4,
-+                                   i32 5, i32 6, i32 7, i32 8,
-+                                   i32 9, i32 10, i32 11, i32 12,
-+                                   i32 13, i32 14, i32 15, i32 16>
-+  ret <16 x i8> %ret
-+}
-+
-+; Test a v16i8 shift with the highest shift amount.
-+define <16 x i8> @f2(<16 x i8> %val1, <16 x i8> %val2) {
-+; CHECK-LABEL: f2:
-+; CHECK: vsldb %v24, %v24, %v26, 15
-+; CHECK: br %r14
-+  %ret = shufflevector <16 x i8> %val1, <16 x i8> %val2,
-+                       <16 x i32> <i32 15, i32 16, i32 17, i32 18,
-+                                   i32 19, i32 20, i32 21, i32 22,
-+                                   i32 23, i32 24, i32 25, i32 26,
-+                                   i32 27, i32 28, i32 29, i32 30>
-+  ret <16 x i8> %ret
-+}
-+
-+; Test a v16i8 shift in which the operands need to be reversed.
-+define <16 x i8> @f3(<16 x i8> %val1, <16 x i8> %val2) {
-+; CHECK-LABEL: f3:
-+; CHECK: vsldb %v24, %v26, %v24, 4
-+; CHECK: br %r14
-+  %ret = shufflevector <16 x i8> %val1, <16 x i8> %val2,
-+                       <16 x i32> <i32 20, i32 21, i32 22, i32 23,
-+                                   i32 24, i32 25, i32 26, i32 27,
-+                                   i32 28, i32 29, i32 30, i32 31,
-+                                   i32 0, i32 1, i32 2, i32 3>
-+  ret <16 x i8> %ret
-+}
-+
-+; Test a v16i8 shift in which the operands need to be duplicated.
-+define <16 x i8> @f4(<16 x i8> %val) {
-+; CHECK-LABEL: f4:
-+; CHECK: vsldb %v24, %v24, %v24, 7
-+; CHECK: br %r14
-+  %ret = shufflevector <16 x i8> %val, <16 x i8> undef,
-+                       <16 x i32> <i32 7, i32 8, i32 9, i32 10,
-+                                   i32 11, i32 12, i32 13, i32 14,
-+                                   i32 15, i32 0, i32 1, i32 2,
-+                                   i32 3, i32 4, i32 5, i32 6>
-+  ret <16 x i8> %ret
-+}
-+
-+; Test a v16i8 shift in which some of the indices are undefs.
-+define <16 x i8> @f5(<16 x i8> %val1, <16 x i8> %val2) {
-+; CHECK-LABEL: f5:
-+; CHECK: vsldb %v24, %v24, %v26, 11
-+; CHECK: br %r14
-+  %ret = shufflevector <16 x i8> %val1, <16 x i8> %val2,
-+                       <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef,
-+                                   i32 15, i32 16, i32 undef, i32 18,
-+                                   i32 19, i32 20, i32 21, i32 22,
-+                                   i32 23, i32 24, i32 25, i32 26>
-+  ret <16 x i8> %ret
-+}
-+
-+; ...and again with reversed operands.
-+define <16 x i8> @f6(<16 x i8> %val1, <16 x i8> %val2) {
-+; CHECK-LABEL: f6:
-+; CHECK: vsldb %v24, %v26, %v24, 13
-+; CHECK: br %r14
-+  %ret = shufflevector <16 x i8> %val1, <16 x i8> %val2,
-+                       <16 x i32> <i32 undef, i32 undef, i32 31, i32 0,
-+                                   i32 1, i32 2, i32 3, i32 4,
-+                                   i32 5, i32 6, i32 7, i32 8,
-+                                   i32 9, i32 10, i32 11, i32 12>
-+  ret <16 x i8> %ret
-+}
-+
-+; Test a v8i16 shift with the lowest useful shift amount.
-+define <8 x i16> @f7(<8 x i16> %val1, <8 x i16> %val2) {
-+; CHECK-LABEL: f7:
-+; CHECK: vsldb %v24, %v24, %v26, 2
-+; CHECK: br %r14
-+  %ret = shufflevector <8 x i16> %val1, <8 x i16> %val2,
-+                       <8 x i32> <i32 1, i32 2, i32 3, i32 4,
-+                                  i32 5, i32 6, i32 7, i32 8>
-+  ret <8 x i16> %ret
-+}
-+
-+; Test a v8i16 shift with the highest useful shift amount.
-+define <8 x i16> @f8(<8 x i16> %val1, <8 x i16> %val2) {
-+; CHECK-LABEL: f8:
-+; CHECK: vsldb %v24, %v24, %v26, 14
-+; CHECK: br %r14
-+  %ret = shufflevector <8 x i16> %val1, <8 x i16> %val2,
-+                       <8 x i32> <i32 7, i32 8, i32 9, i32 10,
-+                                  i32 11, i32 12, i32 13, i32 14>
-+  ret <8 x i16> %ret
-+}
-+
-+; Test a v4i32 shift with the lowest useful shift amount.
-+define <4 x i32> @f9(<4 x i32> %val1, <4 x i32> %val2) {
-+; CHECK-LABEL: f9:
-+; CHECK: vsldb %v24, %v24, %v26, 4
-+; CHECK: br %r14
-+  %ret = shufflevector <4 x i32> %val1, <4 x i32> %val2,
-+                       <4 x i32> <i32 1, i32 2, i32 3, i32 4>
-+  ret <4 x i32> %ret
-+}
-+
-+; Test a v4i32 shift with the highest useful shift amount.
-+define <4 x i32> @f10(<4 x i32> %val1, <4 x i32> %val2) {
-+; CHECK-LABEL: f10:
-+; CHECK: vsldb %v24, %v24, %v26, 12
-+; CHECK: br %r14
-+  %ret = shufflevector <4 x i32> %val1, <4 x i32> %val2,
-+                       <4 x i32> <i32 3, i32 4, i32 5, i32 6>
-+  ret <4 x i32> %ret
-+}
-+
-+; Test a v4f32 shift with the lowest useful shift amount.
-+define <4 x float> @f12(<4 x float> %val1, <4 x float> %val2) {
-+; CHECK-LABEL: f12:
-+; CHECK: vsldb %v24, %v24, %v26, 4
-+; CHECK: br %r14
-+  %ret = shufflevector <4 x float> %val1, <4 x float> %val2,
-+                       <4 x i32> <i32 1, i32 2, i32 3, i32 4>
-+  ret <4 x float> %ret
-+}
-+
-+; Test a v4f32 shift with the highest useful shift amount.
-+define <4 x float> @f13(<4 x float> %val1, <4 x float> %val2) {
-+; CHECK-LABEL: f13:
-+; CHECK: vsldb %v24, %v24, %v26, 12
-+; CHECK: br %r14
-+  %ret = shufflevector <4 x float> %val1, <4 x float> %val2,
-+                       <4 x i32> <i32 3, i32 4, i32 5, i32 6>
-+  ret <4 x float> %ret
-+}
-+
-+; We use VPDI for v2i64 shuffles.
-Index: llvm-36/test/CodeGen/SystemZ/vec-perm-08.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-perm-08.ll
-@@ -0,0 +1,170 @@
-+; Test vector permutes using VPDI.
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-+
-+; Test a high1/low2 permute for v16i8.
-+define <16 x i8> @f1(<16 x i8> %val1, <16 x i8> %val2) {
-+; CHECK-LABEL: f1:
-+; CHECK: vpdi %v24, %v24, %v26, 1
-+; CHECK: br %r14
-+  %ret = shufflevector <16 x i8> %val1, <16 x i8> %val2,
-+                       <16 x i32> <i32 0, i32 1, i32 2, i32 3,
-+                                   i32 4, i32 5, i32 6, i32 7,
-+                                   i32 24, i32 25, i32 26, i32 27,
-+                                   i32 28, i32 29, i32 30, i32 31>
-+  ret <16 x i8> %ret
-+}
-+
-+; Test a low2/high1 permute for v16i8.
-+define <16 x i8> @f2(<16 x i8> %val1, <16 x i8> %val2) {
-+; CHECK-LABEL: f2:
-+; CHECK: vpdi %v24, %v26, %v24, 4
-+; CHECK: br %r14
-+  %ret = shufflevector <16 x i8> %val1, <16 x i8> %val2,
-+                       <16 x i32> <i32 24, i32 25, i32 26, i32 27,
-+                                   i32 28, i32 29, i32 30, i32 31,
-+                                   i32 0, i32 1, i32 2, i32 3,
-+                                   i32 4, i32 5, i32 6, i32 7>
-+  ret <16 x i8> %ret
-+}
-+
-+; Test a low1/high2 permute for v16i8.
-+define <16 x i8> @f3(<16 x i8> %val1, <16 x i8> %val2) {
-+; CHECK-LABEL: f3:
-+; CHECK: vpdi %v24, %v24, %v26, 4
-+; CHECK: br %r14
-+  %ret = shufflevector <16 x i8> %val1, <16 x i8> %val2,
-+                       <16 x i32> <i32 8, i32 9, i32 10, i32 undef,
-+                                   i32 12, i32 undef, i32 14, i32 15,
-+                                   i32 16, i32 17, i32 undef, i32 19,
-+                                   i32 20, i32 21, i32 22, i32 undef>
-+  ret <16 x i8> %ret
-+}
-+
-+; Test a high2/low1 permute for v16i8.
-+define <16 x i8> @f4(<16 x i8> %val1, <16 x i8> %val2) {
-+; CHECK-LABEL: f4:
-+; CHECK: vpdi %v24, %v26, %v24, 1
-+; CHECK: br %r14
-+  %ret = shufflevector <16 x i8> %val1, <16 x i8> %val2,
-+                       <16 x i32> <i32 16, i32 17, i32 18, i32 19,
-+                                   i32 20, i32 21, i32 22, i32 23,
-+                                   i32 8, i32 9, i32 10, i32 11,
-+                                   i32 12, i32 13, i32 14, i32 15>
-+  ret <16 x i8> %ret
-+}
-+
-+; Test reversing two doublewords in a v16i8.
-+define <16 x i8> @f5(<16 x i8> %val) {
-+; CHECK-LABEL: f5:
-+; CHECK: vpdi %v24, %v24, %v24, 4
-+; CHECK: br %r14
-+  %ret = shufflevector <16 x i8> %val, <16 x i8> undef,
-+                       <16 x i32> <i32 8, i32 9, i32 10, i32 11,
-+                                   i32 12, i32 13, i32 14, i32 15,
-+                                   i32 0, i32 1, i32 2, i32 3,
-+                                   i32 4, i32 5, i32 6, i32 7>
-+  ret <16 x i8> %ret
-+}
-+
-+; Test a high1/low2 permute for v8i16.
-+define <8 x i16> @f6(<8 x i16> %val1, <8 x i16> %val2) {
-+; CHECK-LABEL: f6:
-+; CHECK: vpdi %v24, %v24, %v26, 1
-+; CHECK: br %r14
-+  %ret = shufflevector <8 x i16> %val1, <8 x i16> %val2,
-+                       <8 x i32> <i32 0, i32 1, i32 2, i32 3,
-+                                  i32 12, i32 13, i32 14, i32 15>
-+  ret <8 x i16> %ret
-+}
-+
-+; Test a low2/high1 permute for v8i16.
-+define <8 x i16> @f7(<8 x i16> %val1, <8 x i16> %val2) {
-+; CHECK-LABEL: f7:
-+; CHECK: vpdi %v24, %v26, %v24, 4
-+; CHECK: br %r14
-+  %ret = shufflevector <8 x i16> %val1, <8 x i16> %val2,
-+                       <8 x i32> <i32 12, i32 13, i32 14, i32 15,
-+                                  i32 0, i32 1, i32 2, i32 3>
-+  ret <8 x i16> %ret
-+}
-+
-+; Test a high1/low2 permute for v4i32.
-+define <4 x i32> @f8(<4 x i32> %val1, <4 x i32> %val2) {
-+; CHECK-LABEL: f8:
-+; CHECK: vpdi %v24, %v24, %v26, 1
-+; CHECK: br %r14
-+  %ret = shufflevector <4 x i32> %val1, <4 x i32> %val2,
-+                       <4 x i32> <i32 0, i32 1, i32 6, i32 7>
-+  ret <4 x i32> %ret
-+}
-+
-+; Test a low2/high1 permute for v4i32.
-+define <4 x i32> @f9(<4 x i32> %val1, <4 x i32> %val2) {
-+; CHECK-LABEL: f9:
-+; CHECK: vpdi %v24, %v26, %v24, 4
-+; CHECK: br %r14
-+  %ret = shufflevector <4 x i32> %val1, <4 x i32> %val2,
-+                       <4 x i32> <i32 6, i32 7, i32 0, i32 1>
-+  ret <4 x i32> %ret
-+}
-+
-+; Test a high1/low2 permute for v2i64.
-+define <2 x i64> @f10(<2 x i64> %val1, <2 x i64> %val2) {
-+; CHECK-LABEL: f10:
-+; CHECK: vpdi %v24, %v24, %v26, 1
-+; CHECK: br %r14
-+  %ret = shufflevector <2 x i64> %val1, <2 x i64> %val2,
-+                       <2 x i32> <i32 0, i32 3>
-+  ret <2 x i64> %ret
-+}
-+
-+; Test low2/high1 permute for v2i64.
-+define <2 x i64> @f11(<2 x i64> %val1, <2 x i64> %val2) {
-+; CHECK-LABEL: f11:
-+; CHECK: vpdi %v24, %v26, %v24, 4
-+; CHECK: br %r14
-+  %ret = shufflevector <2 x i64> %val1, <2 x i64> %val2,
-+                       <2 x i32> <i32 3, i32 0>
-+  ret <2 x i64> %ret
-+}
-+
-+; Test a high1/low2 permute for v4f32.
-+define <4 x float> @f12(<4 x float> %val1, <4 x float> %val2) {
-+; CHECK-LABEL: f12:
-+; CHECK: vpdi %v24, %v24, %v26, 1
-+; CHECK: br %r14
-+  %ret = shufflevector <4 x float> %val1, <4 x float> %val2,
-+                       <4 x i32> <i32 0, i32 1, i32 6, i32 7>
-+  ret <4 x float> %ret
-+}
-+
-+; Test a low2/high1 permute for v4f32.
-+define <4 x float> @f13(<4 x float> %val1, <4 x float> %val2) {
-+; CHECK-LABEL: f13:
-+; CHECK: vpdi %v24, %v26, %v24, 4
-+; CHECK: br %r14
-+  %ret = shufflevector <4 x float> %val1, <4 x float> %val2,
-+                       <4 x i32> <i32 6, i32 7, i32 0, i32 1>
-+  ret <4 x float> %ret
-+}
-+
-+; Test a high1/low2 permute for v2f64.
-+define <2 x double> @f14(<2 x double> %val1, <2 x double> %val2) {
-+; CHECK-LABEL: f14:
-+; CHECK: vpdi %v24, %v24, %v26, 1
-+; CHECK: br %r14
-+  %ret = shufflevector <2 x double> %val1, <2 x double> %val2,
-+                       <2 x i32> <i32 0, i32 3>
-+  ret <2 x double> %ret
-+}
-+
-+; Test a low2/high1 permute for v2f64.
-+define <2 x double> @f15(<2 x double> %val1, <2 x double> %val2) {
-+; CHECK-LABEL: f15:
-+; CHECK: vpdi %v24, %v26, %v24, 4
-+; CHECK: br %r14
-+  %ret = shufflevector <2 x double> %val1, <2 x double> %val2,
-+                       <2 x i32> <i32 3, i32 0>
-+  ret <2 x double> %ret
-+}
-Index: llvm-36/test/CodeGen/SystemZ/vec-perm-09.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-perm-09.ll
-@@ -0,0 +1,38 @@
-+; Test general vector permute of a v16i8.
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | \
-+; RUN:   FileCheck -check-prefix=CHECK-CODE %s
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | \
-+; RUN:   FileCheck -check-prefix=CHECK-VECTOR %s
-+
-+define <16 x i8> @f1(<16 x i8> %val1, <16 x i8> %val2) {
-+; CHECK-CODE-LABEL: f1:
-+; CHECK-CODE: larl [[REG:%r[0-5]]],
-+; CHECK-CODE: vl [[MASK:%v[0-9]+]], 0([[REG]])
-+; CHECK-CODE: vperm %v24, %v24, %v26, [[MASK]]
-+; CHECK-CODE: br %r14
-+;
-+; CHECK-VECTOR: .byte 1
-+; CHECK-VECTOR-NEXT: .byte 19
-+; CHECK-VECTOR-NEXT: .byte 6
-+; CHECK-VECTOR-NEXT: .byte 5
-+; CHECK-VECTOR-NEXT: .byte 20
-+; CHECK-VECTOR-NEXT: .byte 22
-+; CHECK-VECTOR-NEXT: .byte 1
-+; CHECK-VECTOR-NEXT: .byte 1
-+; CHECK-VECTOR-NEXT: .byte 25
-+; CHECK-VECTOR-NEXT: .byte 29
-+; CHECK-VECTOR-NEXT: .byte 11
-+; Any byte would be OK here
-+; CHECK-VECTOR-NEXT: .space 1
-+; CHECK-VECTOR-NEXT: .byte 31
-+; CHECK-VECTOR-NEXT: .byte 4
-+; CHECK-VECTOR-NEXT: .byte 15
-+; CHECK-VECTOR-NEXT: .byte 19
-+  %ret = shufflevector <16 x i8> %val1, <16 x i8> %val2,
-+                       <16 x i32> <i32 1, i32 19, i32 6, i32 5,
-+                                   i32 20, i32 22, i32 1, i32 1,
-+                                   i32 25, i32 29, i32 11, i32 undef,
-+                                   i32 31, i32 4, i32 15, i32 19>
-+  ret <16 x i8> %ret
-+}
-Index: llvm-36/test/CodeGen/SystemZ/vec-perm-10.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-perm-10.ll
-@@ -0,0 +1,36 @@
-+; Test general vector permute of a v8i16.
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | \
-+; RUN:   FileCheck -check-prefix=CHECK-CODE %s
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | \
-+; RUN:   FileCheck -check-prefix=CHECK-VECTOR %s
-+
-+define <8 x i16> @f1(<8 x i16> %val1, <8 x i16> %val2) {
-+; CHECK-CODE-LABEL: f1:
-+; CHECK-CODE: larl [[REG:%r[0-5]]],
-+; CHECK-CODE: vl [[MASK:%v[0-9]+]], 0([[REG]])
-+; CHECK-CODE: vperm %v24, %v26, %v24, [[MASK]]
-+; CHECK-CODE: br %r14
-+;
-+; CHECK-VECTOR: .byte 0
-+; CHECK-VECTOR-NEXT: .byte 1
-+; CHECK-VECTOR-NEXT: .byte 26
-+; CHECK-VECTOR-NEXT: .byte 27
-+; Any 2 bytes would be OK here
-+; CHECK-VECTOR-NEXT: .space 1
-+; CHECK-VECTOR-NEXT: .space 1
-+; CHECK-VECTOR-NEXT: .byte 28
-+; CHECK-VECTOR-NEXT: .byte 29
-+; CHECK-VECTOR-NEXT: .byte 6
-+; CHECK-VECTOR-NEXT: .byte 7
-+; CHECK-VECTOR-NEXT: .byte 14
-+; CHECK-VECTOR-NEXT: .byte 15
-+; CHECK-VECTOR-NEXT: .byte 8
-+; CHECK-VECTOR-NEXT: .byte 9
-+; CHECK-VECTOR-NEXT: .byte 16
-+; CHECK-VECTOR-NEXT: .byte 17
-+  %ret = shufflevector <8 x i16> %val1, <8 x i16> %val2,
-+                       <8 x i32> <i32 8, i32 5, i32 undef, i32 6,
-+                                  i32 11, i32 15, i32 12, i32 0>
-+  ret <8 x i16> %ret
-+}
-Index: llvm-36/test/CodeGen/SystemZ/vec-perm-11.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-perm-11.ll
-@@ -0,0 +1,35 @@
-+; Test general vector permute of a v4i32.
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | \
-+; RUN:   FileCheck -check-prefix=CHECK-CODE %s
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | \
-+; RUN:   FileCheck -check-prefix=CHECK-VECTOR %s
-+
-+define <4 x i32> @f1(<4 x i32> %val1, <4 x i32> %val2) {
-+; CHECK-CODE-LABEL: f1:
-+; CHECK-CODE: larl [[REG:%r[0-5]]],
-+; CHECK-CODE: vl [[MASK:%v[0-9]+]], 0([[REG]])
-+; CHECK-CODE: vperm %v24, %v26, %v24, [[MASK]]
-+; CHECK-CODE: br %r14
-+;
-+; CHECK-VECTOR: .byte 4
-+; CHECK-VECTOR-NEXT: .byte 5
-+; CHECK-VECTOR-NEXT: .byte 6
-+; CHECK-VECTOR-NEXT: .byte 7
-+; CHECK-VECTOR-NEXT: .byte 20
-+; CHECK-VECTOR-NEXT: .byte 21
-+; CHECK-VECTOR-NEXT: .byte 22
-+; CHECK-VECTOR-NEXT: .byte 23
-+; Any 4 bytes would be OK here
-+; CHECK-VECTOR-NEXT: .space 1
-+; CHECK-VECTOR-NEXT: .space 1
-+; CHECK-VECTOR-NEXT: .space 1
-+; CHECK-VECTOR-NEXT: .space 1
-+; CHECK-VECTOR-NEXT: .byte 12
-+; CHECK-VECTOR-NEXT: .byte 13
-+; CHECK-VECTOR-NEXT: .byte 14
-+; CHECK-VECTOR-NEXT: .byte 15
-+  %ret = shufflevector <4 x i32> %val1, <4 x i32> %val2,
-+                       <4 x i32> <i32 5, i32 1, i32 undef, i32 7>
-+  ret <4 x i32> %ret
-+}
-Index: llvm-36/test/CodeGen/SystemZ/vec-round-01.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-round-01.ll
-@@ -0,0 +1,118 @@
-+; Test v2f64 rounding.
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-+
-+declare double @llvm.rint.f64(double)
-+declare double @llvm.nearbyint.f64(double)
-+declare double @llvm.floor.f64(double)
-+declare double @llvm.ceil.f64(double)
-+declare double @llvm.trunc.f64(double)
-+declare double @llvm.round.f64(double)
-+declare <2 x double> @llvm.rint.v2f64(<2 x double>)
-+declare <2 x double> @llvm.nearbyint.v2f64(<2 x double>)
-+declare <2 x double> @llvm.floor.v2f64(<2 x double>)
-+declare <2 x double> @llvm.ceil.v2f64(<2 x double>)
-+declare <2 x double> @llvm.trunc.v2f64(<2 x double>)
-+declare <2 x double> @llvm.round.v2f64(<2 x double>)
-+
-+define <2 x double> @f1(<2 x double> %val) {
-+; CHECK-LABEL: f1:
-+; CHECK: vfidb %v24, %v24, 0, 0
-+; CHECK: br %r14
-+  %res = call <2 x double> @llvm.rint.v2f64(<2 x double> %val)
-+  ret <2 x double> %res
-+}
-+
-+define <2 x double> @f2(<2 x double> %val) {
-+; CHECK-LABEL: f2:
-+; CHECK: vfidb %v24, %v24, 4, 0
-+; CHECK: br %r14
-+  %res = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %val)
-+  ret <2 x double> %res
-+}
-+
-+define <2 x double> @f3(<2 x double> %val) {
-+; CHECK-LABEL: f3:
-+; CHECK: vfidb %v24, %v24, 4, 7
-+; CHECK: br %r14
-+  %res = call <2 x double> @llvm.floor.v2f64(<2 x double> %val)
-+  ret <2 x double> %res
-+}
-+
-+define <2 x double> @f4(<2 x double> %val) {
-+; CHECK-LABEL: f4:
-+; CHECK: vfidb %v24, %v24, 4, 6
-+; CHECK: br %r14
-+  %res = call <2 x double> @llvm.ceil.v2f64(<2 x double> %val)
-+  ret <2 x double> %res
-+}
-+
-+define <2 x double> @f5(<2 x double> %val) {
-+; CHECK-LABEL: f5:
-+; CHECK: vfidb %v24, %v24, 4, 5
-+; CHECK: br %r14
-+  %res = call <2 x double> @llvm.trunc.v2f64(<2 x double> %val)
-+  ret <2 x double> %res
-+}
-+
-+define <2 x double> @f6(<2 x double> %val) {
-+; CHECK-LABEL: f6:
-+; CHECK: vfidb %v24, %v24, 4, 1
-+; CHECK: br %r14
-+  %res = call <2 x double> @llvm.round.v2f64(<2 x double> %val)
-+  ret <2 x double> %res
-+}
-+
-+define double @f7(<2 x double> %val) {
-+; CHECK-LABEL: f7:
-+; CHECK: wfidb %f0, %v24, 0, 0
-+; CHECK: br %r14
-+  %scalar = extractelement <2 x double> %val, i32 0
-+  %res = call double @llvm.rint.f64(double %scalar)
-+  ret double %res
-+}
-+
-+define double @f8(<2 x double> %val) {
-+; CHECK-LABEL: f8:
-+; CHECK: wfidb %f0, %v24, 4, 0
-+; CHECK: br %r14
-+  %scalar = extractelement <2 x double> %val, i32 0
-+  %res = call double @llvm.nearbyint.f64(double %scalar)
-+  ret double %res
-+}
-+
-+define double @f9(<2 x double> %val) {
-+; CHECK-LABEL: f9:
-+; CHECK: wfidb %f0, %v24, 4, 7
-+; CHECK: br %r14
-+  %scalar = extractelement <2 x double> %val, i32 0
-+  %res = call double @llvm.floor.f64(double %scalar)
-+  ret double %res
-+}
-+
-+define double @f10(<2 x double> %val) {
-+; CHECK-LABEL: f10:
-+; CHECK: wfidb %f0, %v24, 4, 6
-+; CHECK: br %r14
-+  %scalar = extractelement <2 x double> %val, i32 0
-+  %res = call double @llvm.ceil.f64(double %scalar)
-+  ret double %res
-+}
-+
-+define double @f11(<2 x double> %val) {
-+; CHECK-LABEL: f11:
-+; CHECK: wfidb %f0, %v24, 4, 5
-+; CHECK: br %r14
-+  %scalar = extractelement <2 x double> %val, i32 0
-+  %res = call double @llvm.trunc.f64(double %scalar)
-+  ret double %res
-+}
-+
-+define double @f12(<2 x double> %val) {
-+; CHECK-LABEL: f12:
-+; CHECK: wfidb %f0, %v24, 4, 1
-+; CHECK: br %r14
-+  %scalar = extractelement <2 x double> %val, i32 0
-+  %res = call double @llvm.round.f64(double %scalar)
-+  ret double %res
-+}
-Index: llvm-36/test/CodeGen/SystemZ/vec-shift-01.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-shift-01.ll
-@@ -0,0 +1,39 @@
-+; Test vector shift left with vector shift amount.
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-+
-+; Test a v16i8 shift.
-+define <16 x i8> @f1(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) {
-+; CHECK-LABEL: f1:
-+; CHECK: veslvb %v24, %v26, %v28
-+; CHECK: br %r14
-+  %ret = shl <16 x i8> %val1, %val2
-+  ret <16 x i8> %ret
-+}
-+
-+; Test a v8i16 shift.
-+define <8 x i16> @f2(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) {
-+; CHECK-LABEL: f2:
-+; CHECK: veslvh %v24, %v26, %v28
-+; CHECK: br %r14
-+  %ret = shl <8 x i16> %val1, %val2
-+  ret <8 x i16> %ret
-+}
-+
-+; Test a v4i32 shift.
-+define <4 x i32> @f3(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) {
-+; CHECK-LABEL: f3:
-+; CHECK: veslvf %v24, %v26, %v28
-+; CHECK: br %r14
-+  %ret = shl <4 x i32> %val1, %val2
-+  ret <4 x i32> %ret
-+}
-+
-+; Test a v2i64 shift.
-+define <2 x i64> @f4(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) {
-+; CHECK-LABEL: f4:
-+; CHECK: veslvg %v24, %v26, %v28
-+; CHECK: br %r14
-+  %ret = shl <2 x i64> %val1, %val2
-+  ret <2 x i64> %ret
-+}
-Index: llvm-36/test/CodeGen/SystemZ/vec-shift-02.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-shift-02.ll
-@@ -0,0 +1,39 @@
-+; Test vector arithmetic shift right with vector shift amount.
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-+
-+; Test a v16i8 shift.
-+define <16 x i8> @f1(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) {
-+; CHECK-LABEL: f1:
-+; CHECK: vesravb %v24, %v26, %v28
-+; CHECK: br %r14
-+  %ret = ashr <16 x i8> %val1, %val2
-+  ret <16 x i8> %ret
-+}
-+
-+; Test a v8i16 shift.
-+define <8 x i16> @f2(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) {
-+; CHECK-LABEL: f2:
-+; CHECK: vesravh %v24, %v26, %v28
-+; CHECK: br %r14
-+  %ret = ashr <8 x i16> %val1, %val2
-+  ret <8 x i16> %ret
-+}
-+
-+; Test a v4i32 shift.
-+define <4 x i32> @f3(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) {
-+; CHECK-LABEL: f3:
-+; CHECK: vesravf %v24, %v26, %v28
-+; CHECK: br %r14
-+  %ret = ashr <4 x i32> %val1, %val2
-+  ret <4 x i32> %ret
-+}
-+
-+; Test a v2i64 shift.
-+define <2 x i64> @f4(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) {
-+; CHECK-LABEL: f4:
-+; CHECK: vesravg %v24, %v26, %v28
-+; CHECK: br %r14
-+  %ret = ashr <2 x i64> %val1, %val2
-+  ret <2 x i64> %ret
-+}
-Index: llvm-36/test/CodeGen/SystemZ/vec-shift-03.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-shift-03.ll
-@@ -0,0 +1,39 @@
-+; Test vector logical shift right with vector shift amount.
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-+
-+; Test a v16i8 shift.
-+define <16 x i8> @f1(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) {
-+; CHECK-LABEL: f1:
-+; CHECK: vesrlvb %v24, %v26, %v28
-+; CHECK: br %r14
-+  %ret = lshr <16 x i8> %val1, %val2
-+  ret <16 x i8> %ret
-+}
-+
-+; Test a v8i16 shift.
-+define <8 x i16> @f2(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) {
-+; CHECK-LABEL: f2:
-+; CHECK: vesrlvh %v24, %v26, %v28
-+; CHECK: br %r14
-+  %ret = lshr <8 x i16> %val1, %val2
-+  ret <8 x i16> %ret
-+}
-+
-+; Test a v4i32 shift.
-+define <4 x i32> @f3(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) {
-+; CHECK-LABEL: f3:
-+; CHECK: vesrlvf %v24, %v26, %v28
-+; CHECK: br %r14
-+  %ret = lshr <4 x i32> %val1, %val2
-+  ret <4 x i32> %ret
-+}
-+
-+; Test a v2i64 shift.
-+define <2 x i64> @f4(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) {
-+; CHECK-LABEL: f4:
-+; CHECK: vesrlvg %v24, %v26, %v28
-+; CHECK: br %r14
-+  %ret = lshr <2 x i64> %val1, %val2
-+  ret <2 x i64> %ret
-+}
-Index: llvm-36/test/CodeGen/SystemZ/vec-shift-04.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-shift-04.ll
-@@ -0,0 +1,134 @@
-+; Test vector shift left with scalar shift amount.
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-+
-+; Test a v16i8 shift by a variable.
-+define <16 x i8> @f1(<16 x i8> %dummy, <16 x i8> %val1, i32 %shift) {
-+; CHECK-LABEL: f1:
-+; CHECK: veslb %v24, %v26, 0(%r2)
-+; CHECK: br %r14
-+  %truncshift = trunc i32 %shift to i8
-+  %shiftvec = insertelement <16 x i8> undef, i8 %truncshift, i32 0
-+  %val2 = shufflevector <16 x i8> %shiftvec, <16 x i8> undef,
-+                        <16 x i32> zeroinitializer
-+  %ret = shl <16 x i8> %val1, %val2
-+  ret <16 x i8> %ret
-+}
-+
-+; Test a v16i8 shift by the lowest useful constant.
-+define <16 x i8> @f2(<16 x i8> %dummy, <16 x i8> %val) {
-+; CHECK-LABEL: f2:
-+; CHECK: veslb %v24, %v26, 1
-+; CHECK: br %r14
-+  %ret = shl <16 x i8> %val, <i8 1, i8 1, i8 1, i8 1,
-+                              i8 1, i8 1, i8 1, i8 1,
-+                              i8 1, i8 1, i8 1, i8 1,
-+                              i8 1, i8 1, i8 1, i8 1>
-+  ret <16 x i8> %ret
-+}
-+
-+; Test a v16i8 shift by the highest useful constant.
-+define <16 x i8> @f3(<16 x i8> %dummy, <16 x i8> %val) {
-+; CHECK-LABEL: f3:
-+; CHECK: veslb %v24, %v26, 7
-+; CHECK: br %r14
-+  %ret = shl <16 x i8> %val, <i8 7, i8 7, i8 7, i8 7,
-+                              i8 7, i8 7, i8 7, i8 7,
-+                              i8 7, i8 7, i8 7, i8 7,
-+                              i8 7, i8 7, i8 7, i8 7>
-+  ret <16 x i8> %ret
-+}
-+
-+; Test a v8i16 shift by a variable.
-+define <8 x i16> @f4(<8 x i16> %dummy, <8 x i16> %val1, i32 %shift) {
-+; CHECK-LABEL: f4:
-+; CHECK: veslh %v24, %v26, 0(%r2)
-+; CHECK: br %r14
-+  %truncshift = trunc i32 %shift to i16
-+  %shiftvec = insertelement <8 x i16> undef, i16 %truncshift, i32 0
-+  %val2 = shufflevector <8 x i16> %shiftvec, <8 x i16> undef,
-+                        <8 x i32> zeroinitializer
-+  %ret = shl <8 x i16> %val1, %val2
-+  ret <8 x i16> %ret
-+}
-+
-+; Test a v8i16 shift by the lowest useful constant.
-+define <8 x i16> @f5(<8 x i16> %dummy, <8 x i16> %val) {
-+; CHECK-LABEL: f5:
-+; CHECK: veslh %v24, %v26, 1
-+; CHECK: br %r14
-+  %ret = shl <8 x i16> %val, <i16 1, i16 1, i16 1, i16 1,
-+                              i16 1, i16 1, i16 1, i16 1>
-+  ret <8 x i16> %ret
-+}
-+
-+; Test a v8i16 shift by the highest useful constant.
-+define <8 x i16> @f6(<8 x i16> %dummy, <8 x i16> %val) {
-+; CHECK-LABEL: f6:
-+; CHECK: veslh %v24, %v26, 15
-+; CHECK: br %r14
-+  %ret = shl <8 x i16> %val, <i16 15, i16 15, i16 15, i16 15,
-+                              i16 15, i16 15, i16 15, i16 15>
-+  ret <8 x i16> %ret
-+}
-+
-+; Test a v4i32 shift by a variable.
-+define <4 x i32> @f7(<4 x i32> %dummy, <4 x i32> %val1, i32 %shift) {
-+; CHECK-LABEL: f7:
-+; CHECK: veslf %v24, %v26, 0(%r2)
-+; CHECK: br %r14
-+  %shiftvec = insertelement <4 x i32> undef, i32 %shift, i32 0
-+  %val2 = shufflevector <4 x i32> %shiftvec, <4 x i32> undef,
-+                        <4 x i32> zeroinitializer
-+  %ret = shl <4 x i32> %val1, %val2
-+  ret <4 x i32> %ret
-+}
-+
-+; Test a v4i32 shift by the lowest useful constant.
-+define <4 x i32> @f8(<4 x i32> %dummy, <4 x i32> %val) {
-+; CHECK-LABEL: f8:
-+; CHECK: veslf %v24, %v26, 1
-+; CHECK: br %r14
-+  %ret = shl <4 x i32> %val, <i32 1, i32 1, i32 1, i32 1>
-+  ret <4 x i32> %ret
-+}
-+
-+; Test a v4i32 shift by the highest useful constant.
-+define <4 x i32> @f9(<4 x i32> %dummy, <4 x i32> %val) {
-+; CHECK-LABEL: f9:
-+; CHECK: veslf %v24, %v26, 31
-+; CHECK: br %r14
-+  %ret = shl <4 x i32> %val, <i32 31, i32 31, i32 31, i32 31>
-+  ret <4 x i32> %ret
-+}
-+
-+; Test a v2i64 shift by a variable.
-+define <2 x i64> @f10(<2 x i64> %dummy, <2 x i64> %val1, i32 %shift) {
-+; CHECK-LABEL: f10:
-+; CHECK: veslg %v24, %v26, 0(%r2)
-+; CHECK: br %r14
-+  %extshift = sext i32 %shift to i64
-+  %shiftvec = insertelement <2 x i64> undef, i64 %extshift, i32 0
-+  %val2 = shufflevector <2 x i64> %shiftvec, <2 x i64> undef,
-+                        <2 x i32> zeroinitializer
-+  %ret = shl <2 x i64> %val1, %val2
-+  ret <2 x i64> %ret
-+}
-+
-+; Test a v2i64 shift by the lowest useful constant.
-+define <2 x i64> @f11(<2 x i64> %dummy, <2 x i64> %val) {
-+; CHECK-LABEL: f11:
-+; CHECK: veslg %v24, %v26, 1
-+; CHECK: br %r14
-+  %ret = shl <2 x i64> %val, <i64 1, i64 1>
-+  ret <2 x i64> %ret
-+}
-+
-+; Test a v2i64 shift by the highest useful constant.
-+define <2 x i64> @f12(<2 x i64> %dummy, <2 x i64> %val) {
-+; CHECK-LABEL: f12:
-+; CHECK: veslg %v24, %v26, 63
-+; CHECK: br %r14
-+  %ret = shl <2 x i64> %val, <i64 63, i64 63>
-+  ret <2 x i64> %ret
-+}
-Index: llvm-36/test/CodeGen/SystemZ/vec-shift-05.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-shift-05.ll
-@@ -0,0 +1,134 @@
-+; Test vector arithmetic shift right with scalar shift amount.
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-+
-+; Test a v16i8 shift by a variable.
-+define <16 x i8> @f1(<16 x i8> %dummy, <16 x i8> %val1, i32 %shift) {
-+; CHECK-LABEL: f1:
-+; CHECK: vesrab %v24, %v26, 0(%r2)
-+; CHECK: br %r14
-+  %truncshift = trunc i32 %shift to i8
-+  %shiftvec = insertelement <16 x i8> undef, i8 %truncshift, i32 0
-+  %val2 = shufflevector <16 x i8> %shiftvec, <16 x i8> undef,
-+                        <16 x i32> zeroinitializer
-+  %ret = ashr <16 x i8> %val1, %val2
-+  ret <16 x i8> %ret
-+}
-+
-+; Test a v16i8 shift by the lowest useful constant.
-+define <16 x i8> @f2(<16 x i8> %dummy, <16 x i8> %val) {
-+; CHECK-LABEL: f2:
-+; CHECK: vesrab %v24, %v26, 1
-+; CHECK: br %r14
-+  %ret = ashr <16 x i8> %val, <i8 1, i8 1, i8 1, i8 1,
-+                               i8 1, i8 1, i8 1, i8 1,
-+                               i8 1, i8 1, i8 1, i8 1,
-+                               i8 1, i8 1, i8 1, i8 1>
-+  ret <16 x i8> %ret
-+}
-+
-+; Test a v16i8 shift by the highest useful constant.
-+define <16 x i8> @f3(<16 x i8> %dummy, <16 x i8> %val) {
-+; CHECK-LABEL: f3:
-+; CHECK: vesrab %v24, %v26, 7
-+; CHECK: br %r14
-+  %ret = ashr <16 x i8> %val, <i8 7, i8 7, i8 7, i8 7,
-+                               i8 7, i8 7, i8 7, i8 7,
-+                               i8 7, i8 7, i8 7, i8 7,
-+                               i8 7, i8 7, i8 7, i8 7>
-+  ret <16 x i8> %ret
-+}
-+
-+; Test a v8i16 shift by a variable.
-+define <8 x i16> @f4(<8 x i16> %dummy, <8 x i16> %val1, i32 %shift) {
-+; CHECK-LABEL: f4:
-+; CHECK: vesrah %v24, %v26, 0(%r2)
-+; CHECK: br %r14
-+  %truncshift = trunc i32 %shift to i16
-+  %shiftvec = insertelement <8 x i16> undef, i16 %truncshift, i32 0
-+  %val2 = shufflevector <8 x i16> %shiftvec, <8 x i16> undef,
-+                        <8 x i32> zeroinitializer
-+  %ret = ashr <8 x i16> %val1, %val2
-+  ret <8 x i16> %ret
-+}
-+
-+; Test a v8i16 shift by the lowest useful constant.
-+define <8 x i16> @f5(<8 x i16> %dummy, <8 x i16> %val) {
-+; CHECK-LABEL: f5:
-+; CHECK: vesrah %v24, %v26, 1
-+; CHECK: br %r14
-+  %ret = ashr <8 x i16> %val, <i16 1, i16 1, i16 1, i16 1,
-+                               i16 1, i16 1, i16 1, i16 1>
-+  ret <8 x i16> %ret
-+}
-+
-+; Test a v8i16 shift by the highest useful constant.
-+define <8 x i16> @f6(<8 x i16> %dummy, <8 x i16> %val) {
-+; CHECK-LABEL: f6:
-+; CHECK: vesrah %v24, %v26, 15
-+; CHECK: br %r14
-+  %ret = ashr <8 x i16> %val, <i16 15, i16 15, i16 15, i16 15,
-+                               i16 15, i16 15, i16 15, i16 15>
-+  ret <8 x i16> %ret
-+}
-+
-+; Test a v4i32 shift by a variable.
-+define <4 x i32> @f7(<4 x i32> %dummy, <4 x i32> %val1, i32 %shift) {
-+; CHECK-LABEL: f7:
-+; CHECK: vesraf %v24, %v26, 0(%r2)
-+; CHECK: br %r14
-+  %shiftvec = insertelement <4 x i32> undef, i32 %shift, i32 0
-+  %val2 = shufflevector <4 x i32> %shiftvec, <4 x i32> undef,
-+                        <4 x i32> zeroinitializer
-+  %ret = ashr <4 x i32> %val1, %val2
-+  ret <4 x i32> %ret
-+}
-+
-+; Test a v4i32 shift by the lowest useful constant.
-+define <4 x i32> @f8(<4 x i32> %dummy, <4 x i32> %val) {
-+; CHECK-LABEL: f8:
-+; CHECK: vesraf %v24, %v26, 1
-+; CHECK: br %r14
-+  %ret = ashr <4 x i32> %val, <i32 1, i32 1, i32 1, i32 1>
-+  ret <4 x i32> %ret
-+}
-+
-+; Test a v4i32 shift by the highest useful constant.
-+define <4 x i32> @f9(<4 x i32> %dummy, <4 x i32> %val) {
-+; CHECK-LABEL: f9:
-+; CHECK: vesraf %v24, %v26, 31
-+; CHECK: br %r14
-+  %ret = ashr <4 x i32> %val, <i32 31, i32 31, i32 31, i32 31>
-+  ret <4 x i32> %ret
-+}
-+
-+; Test a v2i64 shift by a variable.
-+define <2 x i64> @f10(<2 x i64> %dummy, <2 x i64> %val1, i32 %shift) {
-+; CHECK-LABEL: f10:
-+; CHECK: vesrag %v24, %v26, 0(%r2)
-+; CHECK: br %r14
-+  %extshift = sext i32 %shift to i64
-+  %shiftvec = insertelement <2 x i64> undef, i64 %extshift, i32 0
-+  %val2 = shufflevector <2 x i64> %shiftvec, <2 x i64> undef,
-+                        <2 x i32> zeroinitializer
-+  %ret = ashr <2 x i64> %val1, %val2
-+  ret <2 x i64> %ret
-+}
-+
-+; Test a v2i64 shift by the lowest useful constant.
-+define <2 x i64> @f11(<2 x i64> %dummy, <2 x i64> %val) {
-+; CHECK-LABEL: f11:
-+; CHECK: vesrag %v24, %v26, 1
-+; CHECK: br %r14
-+  %ret = ashr <2 x i64> %val, <i64 1, i64 1>
-+  ret <2 x i64> %ret
-+}
-+
-+; Test a v2i64 shift by the highest useful constant.
-+define <2 x i64> @f12(<2 x i64> %dummy, <2 x i64> %val) {
-+; CHECK-LABEL: f12:
-+; CHECK: vesrag %v24, %v26, 63
-+; CHECK: br %r14
-+  %ret = ashr <2 x i64> %val, <i64 63, i64 63>
-+  ret <2 x i64> %ret
-+}
-Index: llvm-36/test/CodeGen/SystemZ/vec-shift-06.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-shift-06.ll
-@@ -0,0 +1,134 @@
-+; Test vector logical shift right with scalar shift amount.
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-+
-+; Test a v16i8 shift by a variable.
-+define <16 x i8> @f1(<16 x i8> %dummy, <16 x i8> %val1, i32 %shift) {
-+; CHECK-LABEL: f1:
-+; CHECK: vesrlb %v24, %v26, 0(%r2)
-+; CHECK: br %r14
-+  %truncshift = trunc i32 %shift to i8
-+  %shiftvec = insertelement <16 x i8> undef, i8 %truncshift, i32 0
-+  %val2 = shufflevector <16 x i8> %shiftvec, <16 x i8> undef,
-+                        <16 x i32> zeroinitializer
-+  %ret = lshr <16 x i8> %val1, %val2
-+  ret <16 x i8> %ret
-+}
-+
-+; Test a v16i8 shift by the lowest useful constant.
-+define <16 x i8> @f2(<16 x i8> %dummy, <16 x i8> %val) {
-+; CHECK-LABEL: f2:
-+; CHECK: vesrlb %v24, %v26, 1
-+; CHECK: br %r14
-+  %ret = lshr <16 x i8> %val, <i8 1, i8 1, i8 1, i8 1,
-+                               i8 1, i8 1, i8 1, i8 1,
-+                               i8 1, i8 1, i8 1, i8 1,
-+                               i8 1, i8 1, i8 1, i8 1>
-+  ret <16 x i8> %ret
-+}
-+
-+; Test a v16i8 shift by the highest useful constant.
-+define <16 x i8> @f3(<16 x i8> %dummy, <16 x i8> %val) {
-+; CHECK-LABEL: f3:
-+; CHECK: vesrlb %v24, %v26, 7
-+; CHECK: br %r14
-+  %ret = lshr <16 x i8> %val, <i8 7, i8 7, i8 7, i8 7,
-+                               i8 7, i8 7, i8 7, i8 7,
-+                               i8 7, i8 7, i8 7, i8 7,
-+                               i8 7, i8 7, i8 7, i8 7>
-+  ret <16 x i8> %ret
-+}
-+
-+; Test a v8i16 shift by a variable.
-+define <8 x i16> @f4(<8 x i16> %dummy, <8 x i16> %val1, i32 %shift) {
-+; CHECK-LABEL: f4:
-+; CHECK: vesrlh %v24, %v26, 0(%r2)
-+; CHECK: br %r14
-+  %truncshift = trunc i32 %shift to i16
-+  %shiftvec = insertelement <8 x i16> undef, i16 %truncshift, i32 0
-+  %val2 = shufflevector <8 x i16> %shiftvec, <8 x i16> undef,
-+                        <8 x i32> zeroinitializer
-+  %ret = lshr <8 x i16> %val1, %val2
-+  ret <8 x i16> %ret
-+}
-+
-+; Test a v8i16 shift by the lowest useful constant.
-+define <8 x i16> @f5(<8 x i16> %dummy, <8 x i16> %val) {
-+; CHECK-LABEL: f5:
-+; CHECK: vesrlh %v24, %v26, 1
-+; CHECK: br %r14
-+  %ret = lshr <8 x i16> %val, <i16 1, i16 1, i16 1, i16 1,
-+                               i16 1, i16 1, i16 1, i16 1>
-+  ret <8 x i16> %ret
-+}
-+
-+; Test a v8i16 shift by the highest useful constant.
-+define <8 x i16> @f6(<8 x i16> %dummy, <8 x i16> %val) {
-+; CHECK-LABEL: f6:
-+; CHECK: vesrlh %v24, %v26, 15
-+; CHECK: br %r14
-+  %ret = lshr <8 x i16> %val, <i16 15, i16 15, i16 15, i16 15,
-+                               i16 15, i16 15, i16 15, i16 15>
-+  ret <8 x i16> %ret
-+}
-+
-+; Test a v4i32 shift by a variable.
-+define <4 x i32> @f7(<4 x i32> %dummy, <4 x i32> %val1, i32 %shift) {
-+; CHECK-LABEL: f7:
-+; CHECK: vesrlf %v24, %v26, 0(%r2)
-+; CHECK: br %r14
-+  %shiftvec = insertelement <4 x i32> undef, i32 %shift, i32 0
-+  %val2 = shufflevector <4 x i32> %shiftvec, <4 x i32> undef,
-+                        <4 x i32> zeroinitializer
-+  %ret = lshr <4 x i32> %val1, %val2
-+  ret <4 x i32> %ret
-+}
-+
-+; Test a v4i32 shift by the lowest useful constant.
-+define <4 x i32> @f8(<4 x i32> %dummy, <4 x i32> %val) {
-+; CHECK-LABEL: f8:
-+; CHECK: vesrlf %v24, %v26, 1
-+; CHECK: br %r14
-+  %ret = lshr <4 x i32> %val, <i32 1, i32 1, i32 1, i32 1>
-+  ret <4 x i32> %ret
-+}
-+
-+; Test a v4i32 shift by the highest useful constant.
-+define <4 x i32> @f9(<4 x i32> %dummy, <4 x i32> %val) {
-+; CHECK-LABEL: f9:
-+; CHECK: vesrlf %v24, %v26, 31
-+; CHECK: br %r14
-+  %ret = lshr <4 x i32> %val, <i32 31, i32 31, i32 31, i32 31>
-+  ret <4 x i32> %ret
-+}
-+
-+; Test a v2i64 shift by a variable.
-+define <2 x i64> @f10(<2 x i64> %dummy, <2 x i64> %val1, i32 %shift) {
-+; CHECK-LABEL: f10:
-+; CHECK: vesrlg %v24, %v26, 0(%r2)
-+; CHECK: br %r14
-+  %extshift = sext i32 %shift to i64
-+  %shiftvec = insertelement <2 x i64> undef, i64 %extshift, i32 0
-+  %val2 = shufflevector <2 x i64> %shiftvec, <2 x i64> undef,
-+                        <2 x i32> zeroinitializer
-+  %ret = lshr <2 x i64> %val1, %val2
-+  ret <2 x i64> %ret
-+}
-+
-+; Test a v2i64 shift by the lowest useful constant.
-+define <2 x i64> @f11(<2 x i64> %dummy, <2 x i64> %val) {
-+; CHECK-LABEL: f11:
-+; CHECK: vesrlg %v24, %v26, 1
-+; CHECK: br %r14
-+  %ret = lshr <2 x i64> %val, <i64 1, i64 1>
-+  ret <2 x i64> %ret
-+}
-+
-+; Test a v2i64 shift by the highest useful constant.
-+define <2 x i64> @f12(<2 x i64> %dummy, <2 x i64> %val) {
-+; CHECK-LABEL: f12:
-+; CHECK: vesrlg %v24, %v26, 63
-+; CHECK: br %r14
-+  %ret = lshr <2 x i64> %val, <i64 63, i64 63>
-+  ret <2 x i64> %ret
-+}
-Index: llvm-36/test/CodeGen/SystemZ/vec-shift-07.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-shift-07.ll
-@@ -0,0 +1,182 @@
-+; Test vector sign extensions.
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-+
-+; Test a v16i1->v16i8 extension.
-+define <16 x i8> @f1(<16 x i8> %val) {
-+; CHECK-LABEL: f1:
-+; CHECK: veslb [[REG:%v[0-9]+]], %v24, 7
-+; CHECK: vesrab %v24, [[REG]], 7
-+; CHECK: br %r14
-+  %trunc = trunc <16 x i8> %val to <16 x i1>
-+  %ret = sext <16 x i1> %trunc to <16 x i8>
-+  ret <16 x i8> %ret
-+}
-+
-+; Test a v8i1->v8i16 extension.
-+define <8 x i16> @f2(<8 x i16> %val) {
-+; CHECK-LABEL: f2:
-+; CHECK: veslh [[REG:%v[0-9]+]], %v24, 15
-+; CHECK: vesrah %v24, [[REG]], 15
-+; CHECK: br %r14
-+  %trunc = trunc <8 x i16> %val to <8 x i1>
-+  %ret = sext <8 x i1> %trunc to <8 x i16>
-+  ret <8 x i16> %ret
-+}
-+
-+; Test a v8i8->v8i16 extension.
-+define <8 x i16> @f3(<8 x i16> %val) {
-+; CHECK-LABEL: f3:
-+; CHECK: veslh [[REG:%v[0-9]+]], %v24, 8
-+; CHECK: vesrah %v24, [[REG]], 8
-+; CHECK: br %r14
-+  %trunc = trunc <8 x i16> %val to <8 x i8>
-+  %ret = sext <8 x i8> %trunc to <8 x i16>
-+  ret <8 x i16> %ret
-+}
-+
-+; Test a v4i1->v4i32 extension.
-+define <4 x i32> @f4(<4 x i32> %val) {
-+; CHECK-LABEL: f4:
-+; CHECK: veslf [[REG:%v[0-9]+]], %v24, 31
-+; CHECK: vesraf %v24, [[REG]], 31
-+; CHECK: br %r14
-+  %trunc = trunc <4 x i32> %val to <4 x i1>
-+  %ret = sext <4 x i1> %trunc to <4 x i32>
-+  ret <4 x i32> %ret
-+}
-+
-+; Test a v4i8->v4i32 extension.
-+define <4 x i32> @f5(<4 x i32> %val) {
-+; CHECK-LABEL: f5:
-+; CHECK: veslf [[REG:%v[0-9]+]], %v24, 24
-+; CHECK: vesraf %v24, [[REG]], 24
-+; CHECK: br %r14
-+  %trunc = trunc <4 x i32> %val to <4 x i8>
-+  %ret = sext <4 x i8> %trunc to <4 x i32>
-+  ret <4 x i32> %ret
-+}
-+
-+; Test a v4i16->v4i32 extension.
-+define <4 x i32> @f6(<4 x i32> %val) {
-+; CHECK-LABEL: f6:
-+; CHECK: veslf [[REG:%v[0-9]+]], %v24, 16
-+; CHECK: vesraf %v24, [[REG]], 16
-+; CHECK: br %r14
-+  %trunc = trunc <4 x i32> %val to <4 x i16>
-+  %ret = sext <4 x i16> %trunc to <4 x i32>
-+  ret <4 x i32> %ret
-+}
-+
-+; Test a v2i1->v2i64 extension.
-+define <2 x i64> @f7(<2 x i64> %val) {
-+; CHECK-LABEL: f7:
-+; CHECK: veslg [[REG:%v[0-9]+]], %v24, 63
-+; CHECK: vesrag %v24, [[REG]], 63
-+; CHECK: br %r14
-+  %trunc = trunc <2 x i64> %val to <2 x i1>
-+  %ret = sext <2 x i1> %trunc to <2 x i64>
-+  ret <2 x i64> %ret
-+}
-+
-+; Test a v2i8->v2i64 extension.
-+define <2 x i64> @f8(<2 x i64> %val) {
-+; CHECK-LABEL: f8:
-+; CHECK: vsegb %v24, %v24
-+; CHECK: br %r14
-+  %trunc = trunc <2 x i64> %val to <2 x i8>
-+  %ret = sext <2 x i8> %trunc to <2 x i64>
-+  ret <2 x i64> %ret
-+}
-+
-+; Test a v2i16->v2i64 extension.
-+define <2 x i64> @f9(<2 x i64> %val) {
-+; CHECK-LABEL: f9:
-+; CHECK: vsegh %v24, %v24
-+; CHECK: br %r14
-+  %trunc = trunc <2 x i64> %val to <2 x i16>
-+  %ret = sext <2 x i16> %trunc to <2 x i64>
-+  ret <2 x i64> %ret
-+}
-+
-+; Test a v2i32->v2i64 extension.
-+define <2 x i64> @f10(<2 x i64> %val) {
-+; CHECK-LABEL: f10:
-+; CHECK: vsegf %v24, %v24
-+; CHECK: br %r14
-+  %trunc = trunc <2 x i64> %val to <2 x i32>
-+  %ret = sext <2 x i32> %trunc to <2 x i64>
-+  ret <2 x i64> %ret
-+}
-+
-+; Test an alternative v2i8->v2i64 extension.
-+define <2 x i64> @f11(<2 x i64> %val) {
-+; CHECK-LABEL: f11:
-+; CHECK: vsegb %v24, %v24
-+; CHECK: br %r14
-+  %shl = shl <2 x i64> %val, <i64 56, i64 56>
-+  %ret = ashr <2 x i64> %shl, <i64 56, i64 56>
-+  ret <2 x i64> %ret
-+}
-+
-+; Test an alternative v2i16->v2i64 extension.
-+define <2 x i64> @f12(<2 x i64> %val) {
-+; CHECK-LABEL: f12:
-+; CHECK: vsegh %v24, %v24
-+; CHECK: br %r14
-+  %shl = shl <2 x i64> %val, <i64 48, i64 48>
-+  %ret = ashr <2 x i64> %shl, <i64 48, i64 48>
-+  ret <2 x i64> %ret
-+}
-+
-+; Test an alternative v2i32->v2i64 extension.
-+define <2 x i64> @f13(<2 x i64> %val) {
-+; CHECK-LABEL: f13:
-+; CHECK: vsegf %v24, %v24
-+; CHECK: br %r14
-+  %shl = shl <2 x i64> %val, <i64 32, i64 32>
-+  %ret = ashr <2 x i64> %shl, <i64 32, i64 32>
-+  ret <2 x i64> %ret
-+}
-+
-+; Test an extraction-based v2i8->v2i64 extension.
-+define <2 x i64> @f14(<16 x i8> %val) {
-+; CHECK-LABEL: f14:
-+; CHECK: vsegb %v24, %v24
-+; CHECK: br %r14
-+  %elt0 = extractelement <16 x i8> %val, i32 7
-+  %elt1 = extractelement <16 x i8> %val, i32 15
-+  %ext0 = sext i8 %elt0 to i64
-+  %ext1 = sext i8 %elt1 to i64
-+  %vec0 = insertelement <2 x i64> undef, i64 %ext0, i32 0
-+  %vec1 = insertelement <2 x i64> %vec0, i64 %ext1, i32 1
-+  ret <2 x i64> %vec1
-+}
-+
-+; Test an extraction-based v2i16->v2i64 extension.
-+define <2 x i64> @f15(<16 x i16> %val) {
-+; CHECK-LABEL: f15:
-+; CHECK: vsegh %v24, %v24
-+; CHECK: br %r14
-+  %elt0 = extractelement <16 x i16> %val, i32 3
-+  %elt1 = extractelement <16 x i16> %val, i32 7
-+  %ext0 = sext i16 %elt0 to i64
-+  %ext1 = sext i16 %elt1 to i64
-+  %vec0 = insertelement <2 x i64> undef, i64 %ext0, i32 0
-+  %vec1 = insertelement <2 x i64> %vec0, i64 %ext1, i32 1
-+  ret <2 x i64> %vec1
-+}
-+
-+; Test an extraction-based v2i32->v2i64 extension.
-+define <2 x i64> @f16(<16 x i32> %val) {
-+; CHECK-LABEL: f16:
-+; CHECK: vsegf %v24, %v24
-+; CHECK: br %r14
-+  %elt0 = extractelement <16 x i32> %val, i32 1
-+  %elt1 = extractelement <16 x i32> %val, i32 3
-+  %ext0 = sext i32 %elt0 to i64
-+  %ext1 = sext i32 %elt1 to i64
-+  %vec0 = insertelement <2 x i64> undef, i64 %ext0, i32 0
-+  %vec1 = insertelement <2 x i64> %vec0, i64 %ext1, i32 1
-+  ret <2 x i64> %vec1
-+}
-Index: llvm-36/test/CodeGen/SystemZ/vec-sqrt-01.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-sqrt-01.ll
-@@ -0,0 +1,23 @@
-+; Test f64 and v2f64 square root.
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-+
-+declare double @llvm.sqrt.f64(double)
-+declare <2 x double> @llvm.sqrt.v2f64(<2 x double>)
-+
-+define <2 x double> @f1(<2 x double> %val) {
-+; CHECK-LABEL: f1:
-+; CHECK: vfsqdb %v24, %v24
-+; CHECK: br %r14
-+  %ret = call <2 x double> @llvm.sqrt.v2f64(<2 x double> %val)
-+  ret <2 x double> %ret
-+}
-+
-+define double @f2(<2 x double> %val) {
-+; CHECK-LABEL: f2:
-+; CHECK: wfsqdb %f0, %v24
-+; CHECK: br %r14
-+  %scalar = extractelement <2 x double> %val, i32 0
-+  %ret = call double @llvm.sqrt.f64(double %scalar)
-+  ret double %ret
-+}
-Index: llvm-36/test/CodeGen/SystemZ/vec-sub-01.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-sub-01.ll
-@@ -0,0 +1,148 @@
-+; Test vector subtraction.
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-+
-+; Test a v16i8 subtraction.
-+define <16 x i8> @f1(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) {
-+; CHECK-LABEL: f1:
-+; CHECK: vsb %v24, %v26, %v28
-+; CHECK: br %r14
-+  %ret = sub <16 x i8> %val1, %val2
-+  ret <16 x i8> %ret
-+}
-+
-+; Test a v8i16 subtraction.
-+define <8 x i16> @f2(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) {
-+; CHECK-LABEL: f2:
-+; CHECK: vsh %v24, %v26, %v28
-+; CHECK: br %r14
-+  %ret = sub <8 x i16> %val1, %val2
-+  ret <8 x i16> %ret
-+}
-+
-+; Test a v4i32 subtraction.
-+define <4 x i32> @f3(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) {
-+; CHECK-LABEL: f3:
-+; CHECK: vsf %v24, %v26, %v28
-+; CHECK: br %r14
-+  %ret = sub <4 x i32> %val1, %val2
-+  ret <4 x i32> %ret
-+}
-+
-+; Test a v2i64 subtraction.
-+define <2 x i64> @f4(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) {
-+; CHECK-LABEL: f4:
-+; CHECK: vsg %v24, %v26, %v28
-+; CHECK: br %r14
-+  %ret = sub <2 x i64> %val1, %val2
-+  ret <2 x i64> %ret
-+}
-+
-+; Test a v4f32 subtraction, as an example of an operation that needs to be
-+; scalarized and reassembled.  At present there's an unnecessary move that
-+; could be avoided with smarter ordering.  It also isn't important whether
-+; the VSLDBs use the result of the VLRs or use %v24 and %v26 directly.
-+define <4 x float> @f5(<4 x float> %val1, <4 x float> %val2) {
-+; CHECK-LABEL: f5:
-+; CHECK-DAG: vlr %v[[A1:[0-5]]], %v24
-+; CHECK-DAG: vlr %v[[A2:[0-5]]], %v26
-+; CHECK-DAG: vrepf %v[[B1:[0-5]]], %v[[A1]], 1
-+; CHECK-DAG: vrepf %v[[B2:[0-5]]], %v[[A2]], 1
-+; CHECK-DAG: vrepf %v[[C1:[0-5]]], %v[[A1]], 2
-+; CHECK-DAG: vrepf %v[[C2:[0-5]]], %v[[A2]], 2
-+; CHECK-DAG: vrepf %v[[D1:[0-5]]], %v[[A1]], 3
-+; CHECK-DAG: vrepf %v[[D2:[0-5]]], %v[[A2]], 3
-+; CHECK-DAG: ler %f[[A1copy:[0-5]]], %f[[A1]]
-+; CHECK-DAG: sebr %f[[A1copy]], %f[[A2]]
-+; CHECK-DAG: sebr %f[[B1]], %f[[B2]]
-+; CHECK-DAG: sebr %f[[C1]], %f[[C2]]
-+; CHECK-DAG: sebr %f[[D1]], %f[[D2]]
-+; CHECK-DAG: vmrhf [[HIGH:%v[0-9]+]], %v[[A1copy]], %v[[B1]]
-+; CHECK-DAG: vmrhf [[LOW:%v[0-9]+]], %v[[C1]], %v[[D1]]
-+; CHECK: vmrhg %v24, [[HIGH]], [[LOW]]
-+; CHECK: br %r14
-+  %ret = fsub <4 x float> %val1, %val2
-+  ret <4 x float> %ret
-+}
-+
-+; Test a v2f64 subtraction.
-+define <2 x double> @f6(<2 x double> %dummy, <2 x double> %val1,
-+                        <2 x double> %val2) {
-+; CHECK-LABEL: f6:
-+; CHECK: vfsdb %v24, %v26, %v28
-+; CHECK: br %r14
-+  %ret = fsub <2 x double> %val1, %val2
-+  ret <2 x double> %ret
-+}
-+
-+; Test an f64 subtraction that uses vector registers.
-+define double @f7(<2 x double> %val1, <2 x double> %val2) {
-+; CHECK-LABEL: f7:
-+; CHECK: wfsdb %f0, %v24, %v26
-+; CHECK: br %r14
-+  %scalar1 = extractelement <2 x double> %val1, i32 0
-+  %scalar2 = extractelement <2 x double> %val2, i32 0
-+  %ret = fsub double %scalar1, %scalar2
-+  ret double %ret
-+}
-+
-+; Test a v2i8 subtraction, which gets promoted to v16i8.
-+define <2 x i8> @f8(<2 x i8> %dummy, <2 x i8> %val1, <2 x i8> %val2) {
-+; CHECK-LABEL: f8:
-+; CHECK: vsb %v24, %v26, %v28
-+; CHECK: br %r14
-+  %ret = sub <2 x i8> %val1, %val2
-+  ret <2 x i8> %ret
-+}
-+
-+; Test a v4i8 subtraction, which gets promoted to v16i8.
-+define <4 x i8> @f9(<4 x i8> %dummy, <4 x i8> %val1, <4 x i8> %val2) {
-+; CHECK-LABEL: f9:
-+; CHECK: vsb %v24, %v26, %v28
-+; CHECK: br %r14
-+  %ret = sub <4 x i8> %val1, %val2
-+  ret <4 x i8> %ret
-+}
-+
-+; Test a v8i8 subtraction, which gets promoted to v16i8.
-+define <8 x i8> @f10(<8 x i8> %dummy, <8 x i8> %val1, <8 x i8> %val2) {
-+; CHECK-LABEL: f10:
-+; CHECK: vsb %v24, %v26, %v28
-+; CHECK: br %r14
-+  %ret = sub <8 x i8> %val1, %val2
-+  ret <8 x i8> %ret
-+}
-+
-+; Test a v2i16 subtraction, which gets promoted to v8i16.
-+define <2 x i16> @f11(<2 x i16> %dummy, <2 x i16> %val1, <2 x i16> %val2) {
-+; CHECK-LABEL: f11:
-+; CHECK: vsh %v24, %v26, %v28
-+; CHECK: br %r14
-+  %ret = sub <2 x i16> %val1, %val2
-+  ret <2 x i16> %ret
-+}
-+
-+; Test a v4i16 subtraction, which gets promoted to v8i16.
-+define <4 x i16> @f12(<4 x i16> %dummy, <4 x i16> %val1, <4 x i16> %val2) {
-+; CHECK-LABEL: f12:
-+; CHECK: vsh %v24, %v26, %v28
-+; CHECK: br %r14
-+  %ret = sub <4 x i16> %val1, %val2
-+  ret <4 x i16> %ret
-+}
-+
-+; Test a v2i32 subtraction, which gets promoted to v4i32.
-+define <2 x i32> @f13(<2 x i32> %dummy, <2 x i32> %val1, <2 x i32> %val2) {
-+; CHECK-LABEL: f13:
-+; CHECK: vsf %v24, %v26, %v28
-+; CHECK: br %r14
-+  %ret = sub <2 x i32> %val1, %val2
-+  ret <2 x i32> %ret
-+}
-+
-+; Test a v2f32 subtraction, which gets promoted to v4f32.
-+define <2 x float> @f14(<2 x float> %val1, <2 x float> %val2) {
-+; No particular output expected, but must compile.
-+  %ret = fsub <2 x float> %val1, %val2
-+  ret <2 x float> %ret
-+}
-Index: llvm-36/test/CodeGen/SystemZ/vec-xor-01.ll
-===================================================================
---- /dev/null
-+++ llvm-36/test/CodeGen/SystemZ/vec-xor-01.ll
-@@ -0,0 +1,39 @@
-+; Test vector XOR.
-+;
-+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
-+
-+; Test a v16i8 XOR.
-+define <16 x i8> @f1(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) {
-+; CHECK-LABEL: f1:
-+; CHECK: vx %v24, %v26, %v28
-+; CHECK: br %r14
-+  %ret = xor <16 x i8> %val1, %val2
-+  ret <16 x i8> %ret
-+}
-+
-+; Test a v8i16 XOR.
-+define <8 x i16> @f2(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) {
-+; CHECK-LABEL: f2:
-+; CHECK: vx %v24, %v26, %v28
-+; CHECK: br %r14
-+  %ret = xor <8 x i16> %val1, %val2
-+  ret <8 x i16> %ret
-+}
-+
-+; Test a v4i32 XOR.
-+define <4 x i32> @f3(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) {
-+; CHECK-LABEL: f3:
-+; CHECK: vx %v24, %v26, %v28
-+; CHECK: br %r14
-+  %ret = xor <4 x i32> %val1, %val2
-+  ret <4 x i32> %ret
-+}
-+
-+; Test a v2i64 XOR.
-+define <2 x i64> @f4(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) {
-+; CHECK-LABEL: f4:
-+; CHECK: vx %v24, %v26, %v28
-+; CHECK: br %r14
-+  %ret = xor <2 x i64> %val1, %val2
-+  ret <2 x i64> %ret
-+}
-Index: llvm-36/test/MC/Disassembler/SystemZ/insns-z13-bad.txt
-===================================================================
---- /dev/null
-+++ llvm-36/test/MC/Disassembler/SystemZ/insns-z13-bad.txt
-@@ -0,0 +1,39 @@
-+# Test z13 instructions that don't have PC-relative operands.
-+# RUN: llvm-mc --disassemble %s -triple=s390x-linux-gnu -mcpu=z13 2>&1 \
-+# RUN:   | FileCheck %s
-+
-+# This would be "vlef %v0, 0, 4", but element 4 is invalid.
-+#
-+#CHECK: warning: invalid instruction encoding
-+#CHECK-NEXT: 0xe7 0x00 0x00 0x00 0x40 0x03
-+0xe7 0x00 0x00 0x00 0x40 0x03
-+
-+# ...and again with element 15
-+#
-+#CHECK: warning: invalid instruction encoding
-+#CHECK-NEXT: 0xe7 0x00 0x00 0x00 0xf0 0x03
-+0xe7 0x00 0x00 0x00 0xf0 0x03
-+
-+# This would be "vleg %v0, 0, 2", but element 2 is invalid.
-+#
-+#CHECK: warning: invalid instruction encoding
-+#CHECK-NEXT: 0xe7 0x00 0x00 0x00 0x20 0x02
-+0xe7 0x00 0x00 0x00 0x20 0x02
-+
-+# ...and again with element 15
-+#
-+#CHECK: warning: invalid instruction encoding
-+#CHECK-NEXT: 0xe7 0x00 0x00 0x00 0xf0 0x02
-+0xe7 0x00 0x00 0x00 0xf0 0x02
-+
-+# This would be "vleh %v0, 0, 8", but element 8 is invalid.
-+#
-+#CHECK: warning: invalid instruction encoding
-+#CHECK-NEXT: 0xe7 0x00 0x00 0x00 0x80 0x01
-+0xe7 0x00 0x00 0x00 0x80 0x01
-+
-+# ...and again with element 15
-+#
-+#CHECK: warning: invalid instruction encoding
-+#CHECK-NEXT: 0xe7 0x00 0x00 0x00 0xf0 0x01
-+0xe7 0x00 0x00 0x00 0xf0 0x01
-Index: llvm-36/test/MC/Disassembler/SystemZ/insns-z13.txt
-===================================================================
---- /dev/null
-+++ llvm-36/test/MC/Disassembler/SystemZ/insns-z13.txt
-@@ -0,0 +1,3315 @@
-+# Test z13 instructions that don't have PC-relative operands.
-+# RUN: llvm-mc --disassemble %s -triple=s390x-linux-gnu -mcpu=z13 \
-+# RUN:   | FileCheck %s
-+
-+#CHECK: lcbb    %r0, 0, 0
-+0xe7 0x00 0x00 0x00 0x00 0x27
-+
-+#CHECK: lcbb    %r1, 2475(%r7,%r8), 12
-+0xe7 0x17 0x89 0xab 0xc0 0x27
-+
-+#CHECK: lcbb    %r15, 4095(%r15,%r15), 15
-+0xe7 0xff 0xff 0xff 0xf0 0x27
-+
-+#CHECK: vab     %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x00 0xf3
-+
-+#CHECK: vab     %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x0a 0xf3
-+
-+#CHECK: vab     %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x0e 0xf3
-+
-+#CHECK: vaccb   %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x00 0xf1
-+
-+#CHECK: vaccb   %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x0a 0xf1
-+
-+#CHECK: vaccb   %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x0e 0xf1
-+
-+#CHECK: vacccq  %v0, %v0, %v0, %v0
-+0xe7 0x00 0x04 0x00 0x00 0xb9
-+
-+#CHECK: vacccq  %v3, %v20, %v5, %v22
-+0xe7 0x34 0x54 0x00 0x65 0xb9
-+
-+#CHECK: vacccq  %v31, %v31, %v31, %v31
-+0xe7 0xff 0xf4 0x00 0xff 0xb9
-+
-+#CHECK: vaccf   %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x20 0xf1
-+
-+#CHECK: vaccf   %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x2a 0xf1
-+
-+#CHECK: vaccf   %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x2e 0xf1
-+
-+#CHECK: vaccg   %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x30 0xf1
-+
-+#CHECK: vaccg   %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x3a 0xf1
-+
-+#CHECK: vaccg   %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x3e 0xf1
-+
-+#CHECK: vacch   %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x10 0xf1
-+
-+#CHECK: vacch   %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x1a 0xf1
-+
-+#CHECK: vacch   %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x1e 0xf1
-+
-+#CHECK: vaccq   %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x40 0xf1
-+
-+#CHECK: vaccq   %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x4a 0xf1
-+
-+#CHECK: vaccq   %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x4e 0xf1
-+
-+#CHECK: vacq    %v0, %v0, %v0, %v0
-+0xe7 0x00 0x04 0x00 0x00 0xbb
-+
-+#CHECK: vacq    %v3, %v20, %v5, %v22
-+0xe7 0x34 0x54 0x00 0x65 0xbb
-+
-+#CHECK: vacq    %v31, %v31, %v31, %v31
-+0xe7 0xff 0xf4 0x00 0xff 0xbb
-+
-+#CHECK: vaf     %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x20 0xf3
-+
-+#CHECK: vaf     %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x2a 0xf3
-+
-+#CHECK: vaf     %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x2e 0xf3
-+
-+#CHECK: vag     %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x30 0xf3
-+
-+#CHECK: vag     %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x3a 0xf3
-+
-+#CHECK: vag     %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x3e 0xf3
-+
-+#CHECK: vah     %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x10 0xf3
-+
-+#CHECK: vah     %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x1a 0xf3
-+
-+#CHECK: vah     %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x1e 0xf3
-+
-+#CHECK: vaq     %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x40 0xf3
-+
-+#CHECK: vaq     %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x4a 0xf3
-+
-+#CHECK: vaq     %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x4e 0xf3
-+
-+#CHECK: vavgb   %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x00 0xf2
-+
-+#CHECK: vavgb   %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x0a 0xf2
-+
-+#CHECK: vavgb   %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x0e 0xf2
-+
-+#CHECK: vavgf   %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x20 0xf2
-+
-+#CHECK: vavgf   %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x2a 0xf2
-+
-+#CHECK: vavgf   %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x2e 0xf2
-+
-+#CHECK: vavgg   %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x30 0xf2
-+
-+#CHECK: vavgg   %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x3a 0xf2
-+
-+#CHECK: vavgg   %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x3e 0xf2
-+
-+#CHECK: vavgh   %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x10 0xf2
-+
-+#CHECK: vavgh   %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x1a 0xf2
-+
-+#CHECK: vavgh   %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x1e 0xf2
-+
-+#CHECK: vavglb  %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x00 0xf0
-+
-+#CHECK: vavglb  %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x0a 0xf0
-+
-+#CHECK: vavglb  %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x0e 0xf0
-+
-+#CHECK: vavglf  %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x20 0xf0
-+
-+#CHECK: vavglf  %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x2a 0xf0
-+
-+#CHECK: vavglf  %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x2e 0xf0
-+
-+#CHECK: vavglg  %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x30 0xf0
-+
-+#CHECK: vavglg  %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x3a 0xf0
-+
-+#CHECK: vavglg  %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x3e 0xf0
-+
-+#CHECK: vavglh  %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x10 0xf0
-+
-+#CHECK: vavglh  %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x1a 0xf0
-+
-+#CHECK: vavglh  %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x1e 0xf0
-+
-+#CHECK: vcdgb   %v0, %v0, 0, 0
-+0xe7 0x00 0x00 0x00 0x30 0xc3
-+
-+#CHECK: vcdgb   %v19, %v14, 4, 10
-+0xe7 0x3e 0x00 0xa4 0x38 0xc3
-+
-+#CHECK: vcdgb   %v31, %v31, 7, 15
-+0xe7 0xff 0x00 0xf7 0x3c 0xc3
-+
-+#CHECK: vcdlgb  %v0, %v0, 0, 0
-+0xe7 0x00 0x00 0x00 0x30 0xc1
-+
-+#CHECK: vcdlgb  %v19, %v14, 4, 10
-+0xe7 0x3e 0x00 0xa4 0x38 0xc1
-+
-+#CHECK: vcdlgb  %v31, %v31, 7, 15
-+0xe7 0xff 0x00 0xf7 0x3c 0xc1
-+
-+#CHECK: vceqb   %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x00 0xf8
-+
-+#CHECK: vceqb   %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x0a 0xf8
-+
-+#CHECK: vceqbs  %v7, %v24, %v9
-+0xe7 0x78 0x90 0x10 0x04 0xf8
-+
-+#CHECK: vceqb   %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x0e 0xf8
-+
-+#CHECK: vceqf   %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x20 0xf8
-+
-+#CHECK: vceqf   %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x2a 0xf8
-+
-+#CHECK: vceqfs  %v7, %v24, %v9
-+0xe7 0x78 0x90 0x10 0x24 0xf8
-+
-+#CHECK: vceqf   %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x2e 0xf8
-+
-+#CHECK: vceqg   %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x30 0xf8
-+
-+#CHECK: vceqg   %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x3a 0xf8
-+
-+#CHECK: vceqgs  %v7, %v24, %v9
-+0xe7 0x78 0x90 0x10 0x34 0xf8
-+
-+#CHECK: vceqg   %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x3e 0xf8
-+
-+#CHECK: vceqh   %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x10 0xf8
-+
-+#CHECK: vceqh   %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x1a 0xf8
-+
-+#CHECK: vceqhs  %v7, %v24, %v9
-+0xe7 0x78 0x90 0x10 0x14 0xf8
-+
-+#CHECK: vceqh   %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x1e 0xf8
-+
-+#CHECK: vcgdb   %v0, %v0, 0, 0
-+0xe7 0x00 0x00 0x00 0x30 0xc2
-+
-+#CHECK: vcgdb   %v19, %v14, 4, 10
-+0xe7 0x3e 0x00 0xa4 0x38 0xc2
-+
-+#CHECK: vcgdb   %v31, %v31, 7, 15
-+0xe7 0xff 0x00 0xf7 0x3c 0xc2
-+
-+#CHECK: vchb    %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x00 0xfb
-+
-+#CHECK: vchb    %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x0a 0xfb
-+
-+#CHECK: vchbs   %v7, %v24, %v9
-+0xe7 0x78 0x90 0x10 0x04 0xfb
-+
-+#CHECK: vchb    %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x0e 0xfb
-+
-+#CHECK: vchf    %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x20 0xfb
-+
-+#CHECK: vchf    %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x2a 0xfb
-+
-+#CHECK: vchfs   %v7, %v24, %v9
-+0xe7 0x78 0x90 0x10 0x24 0xfb
-+
-+#CHECK: vchf    %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x2e 0xfb
-+
-+#CHECK: vchg    %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x30 0xfb
-+
-+#CHECK: vchg    %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x3a 0xfb
-+
-+#CHECK: vchgs   %v7, %v24, %v9
-+0xe7 0x78 0x90 0x10 0x34 0xfb
-+
-+#CHECK: vchg    %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x3e 0xfb
-+
-+#CHECK: vchh    %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x10 0xfb
-+
-+#CHECK: vchh    %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x1a 0xfb
-+
-+#CHECK: vchhs   %v7, %v24, %v9
-+0xe7 0x78 0x90 0x10 0x14 0xfb
-+
-+#CHECK: vchh    %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x1e 0xfb
-+
-+#CHECK: vchlb   %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x00 0xf9
-+
-+#CHECK: vchlb   %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x0a 0xf9
-+
-+#CHECK: vchlbs  %v7, %v24, %v9
-+0xe7 0x78 0x90 0x10 0x04 0xf9
-+
-+#CHECK: vchlb   %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x0e 0xf9
-+
-+#CHECK: vchlf   %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x20 0xf9
-+
-+#CHECK: vchlf   %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x2a 0xf9
-+
-+#CHECK: vchlfs  %v7, %v24, %v9
-+0xe7 0x78 0x90 0x10 0x24 0xf9
-+
-+#CHECK: vchlf   %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x2e 0xf9
-+
-+#CHECK: vchlg   %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x30 0xf9
-+
-+#CHECK: vchlg   %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x3a 0xf9
-+
-+#CHECK: vchlgs  %v7, %v24, %v9
-+0xe7 0x78 0x90 0x10 0x34 0xf9
-+
-+#CHECK: vchlg   %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x3e 0xf9
-+
-+#CHECK: vchlh   %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x10 0xf9
-+
-+#CHECK: vchlh   %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x1a 0xf9
-+
-+#CHECK: vchlhs  %v7, %v24, %v9
-+0xe7 0x78 0x90 0x10 0x14 0xf9
-+
-+#CHECK: vchlh   %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x1e 0xf9
-+
-+#CHECK: vcksm   %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x00 0x66
-+
-+#CHECK: vcksm   %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x0a 0x66
-+
-+#CHECK: vcksm   %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x0e 0x66
-+
-+#CHECK: vclgdb  %v0, %v0, 0, 0
-+0xe7 0x00 0x00 0x00 0x30 0xc0
-+
-+#CHECK: vclgdb  %v19, %v14, 4, 10
-+0xe7 0x3e 0x00 0xa4 0x38 0xc0
-+
-+#CHECK: vclgdb  %v31, %v31, 7, 15
-+0xe7 0xff 0x00 0xf7 0x3c 0xc0
-+
-+#CHECK: vclzb   %v0, %v0
-+0xe7 0x00 0x00 0x00 0x00 0x53
-+
-+#CHECK: vclzb   %v19, %v14
-+0xe7 0x3e 0x00 0x00 0x08 0x53
-+
-+#CHECK: vclzb   %v31, %v31
-+0xe7 0xff 0x00 0x00 0x0c 0x53
-+
-+#CHECK: vclzf   %v0, %v0
-+0xe7 0x00 0x00 0x00 0x20 0x53
-+
-+#CHECK: vclzf   %v19, %v14
-+0xe7 0x3e 0x00 0x00 0x28 0x53
-+
-+#CHECK: vclzf   %v31, %v31
-+0xe7 0xff 0x00 0x00 0x2c 0x53
-+
-+#CHECK: vclzg   %v0, %v0
-+0xe7 0x00 0x00 0x00 0x30 0x53
-+
-+#CHECK: vclzg   %v19, %v14
-+0xe7 0x3e 0x00 0x00 0x38 0x53
-+
-+#CHECK: vclzg   %v31, %v31
-+0xe7 0xff 0x00 0x00 0x3c 0x53
-+
-+#CHECK: vclzh   %v0, %v0
-+0xe7 0x00 0x00 0x00 0x10 0x53
-+
-+#CHECK: vclzh   %v19, %v14
-+0xe7 0x3e 0x00 0x00 0x18 0x53
-+
-+#CHECK: vclzh   %v31, %v31
-+0xe7 0xff 0x00 0x00 0x1c 0x53
-+
-+#CHECK: vctzb   %v0, %v0
-+0xe7 0x00 0x00 0x00 0x00 0x52
-+
-+#CHECK: vctzb   %v19, %v14
-+0xe7 0x3e 0x00 0x00 0x08 0x52
-+
-+#CHECK: vctzb   %v31, %v31
-+0xe7 0xff 0x00 0x00 0x0c 0x52
-+
-+#CHECK: vctzf   %v0, %v0
-+0xe7 0x00 0x00 0x00 0x20 0x52
-+
-+#CHECK: vctzf   %v19, %v14
-+0xe7 0x3e 0x00 0x00 0x28 0x52
-+
-+#CHECK: vctzf   %v31, %v31
-+0xe7 0xff 0x00 0x00 0x2c 0x52
-+
-+#CHECK: vctzg   %v0, %v0
-+0xe7 0x00 0x00 0x00 0x30 0x52
-+
-+#CHECK: vctzg   %v19, %v14
-+0xe7 0x3e 0x00 0x00 0x38 0x52
-+
-+#CHECK: vctzg   %v31, %v31
-+0xe7 0xff 0x00 0x00 0x3c 0x52
-+
-+#CHECK: vctzh   %v0, %v0
-+0xe7 0x00 0x00 0x00 0x10 0x52
-+
-+#CHECK: vctzh   %v19, %v14
-+0xe7 0x3e 0x00 0x00 0x18 0x52
-+
-+#CHECK: vctzh   %v31, %v31
-+0xe7 0xff 0x00 0x00 0x1c 0x52
-+
-+#CHECK: vecb    %v0, %v0
-+0xe7 0x00 0x00 0x00 0x00 0xdb
-+
-+#CHECK: vecb    %v19, %v14
-+0xe7 0x3e 0x00 0x00 0x08 0xdb
-+
-+#CHECK: vecb    %v31, %v31
-+0xe7 0xff 0x00 0x00 0x0c 0xdb
-+
-+#CHECK: vecf    %v0, %v0
-+0xe7 0x00 0x00 0x00 0x20 0xdb
-+
-+#CHECK: vecf    %v19, %v14
-+0xe7 0x3e 0x00 0x00 0x28 0xdb
-+
-+#CHECK: vecf    %v31, %v31
-+0xe7 0xff 0x00 0x00 0x2c 0xdb
-+
-+#CHECK: vecg    %v0, %v0
-+0xe7 0x00 0x00 0x00 0x30 0xdb
-+
-+#CHECK: vecg    %v19, %v14
-+0xe7 0x3e 0x00 0x00 0x38 0xdb
-+
-+#CHECK: vecg    %v31, %v31
-+0xe7 0xff 0x00 0x00 0x3c 0xdb
-+
-+#CHECK: vech    %v0, %v0
-+0xe7 0x00 0x00 0x00 0x10 0xdb
-+
-+#CHECK: vech    %v19, %v14
-+0xe7 0x3e 0x00 0x00 0x18 0xdb
-+
-+#CHECK: vech    %v31, %v31
-+0xe7 0xff 0x00 0x00 0x1c 0xdb
-+
-+#CHECK: veclb   %v0, %v0
-+0xe7 0x00 0x00 0x00 0x00 0xd9
-+
-+#CHECK: veclb   %v19, %v14
-+0xe7 0x3e 0x00 0x00 0x08 0xd9
-+
-+#CHECK: veclb   %v31, %v31
-+0xe7 0xff 0x00 0x00 0x0c 0xd9
-+
-+#CHECK: veclf   %v0, %v0
-+0xe7 0x00 0x00 0x00 0x20 0xd9
-+
-+#CHECK: veclf   %v19, %v14
-+0xe7 0x3e 0x00 0x00 0x28 0xd9
-+
-+#CHECK: veclf   %v31, %v31
-+0xe7 0xff 0x00 0x00 0x2c 0xd9
-+
-+#CHECK: veclg   %v0, %v0
-+0xe7 0x00 0x00 0x00 0x30 0xd9
-+
-+#CHECK: veclg   %v19, %v14
-+0xe7 0x3e 0x00 0x00 0x38 0xd9
-+
-+#CHECK: veclg   %v31, %v31
-+0xe7 0xff 0x00 0x00 0x3c 0xd9
-+
-+#CHECK: veclh   %v0, %v0
-+0xe7 0x00 0x00 0x00 0x10 0xd9
-+
-+#CHECK: veclh   %v19, %v14
-+0xe7 0x3e 0x00 0x00 0x18 0xd9
-+
-+#CHECK: veclh   %v31, %v31
-+0xe7 0xff 0x00 0x00 0x1c 0xd9
-+
-+#CHECK: verimb  %v0, %v0, %v0, 0
-+0xe7 0x00 0x00 0x00 0x00 0x72
-+
-+#CHECK: verimb  %v3, %v20, %v5, 103
-+0xe7 0x34 0x50 0x67 0x04 0x72
-+
-+#CHECK: verimb  %v31, %v31, %v31, 255
-+0xe7 0xff 0xf0 0xff 0x0e 0x72
-+
-+#CHECK: verimf  %v0, %v0, %v0, 0
-+0xe7 0x00 0x00 0x00 0x20 0x72
-+
-+#CHECK: verimf  %v3, %v20, %v5, 103
-+0xe7 0x34 0x50 0x67 0x24 0x72
-+
-+#CHECK: verimf  %v31, %v31, %v31, 255
-+0xe7 0xff 0xf0 0xff 0x2e 0x72
-+
-+#CHECK: verimg  %v0, %v0, %v0, 0
-+0xe7 0x00 0x00 0x00 0x30 0x72
-+
-+#CHECK: verimg  %v3, %v20, %v5, 103
-+0xe7 0x34 0x50 0x67 0x34 0x72
-+
-+#CHECK: verimg  %v31, %v31, %v31, 255
-+0xe7 0xff 0xf0 0xff 0x3e 0x72
-+
-+#CHECK: verimh  %v0, %v0, %v0, 0
-+0xe7 0x00 0x00 0x00 0x10 0x72
-+
-+#CHECK: verimh  %v3, %v20, %v5, 103
-+0xe7 0x34 0x50 0x67 0x14 0x72
-+
-+#CHECK: verimh  %v31, %v31, %v31, 255
-+0xe7 0xff 0xf0 0xff 0x1e 0x72
-+
-+#CHECK: verllvb %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x00 0x73
-+
-+#CHECK: verllvb %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x0a 0x73
-+
-+#CHECK: verllvb %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x0e 0x73
-+
-+#CHECK: verllvf %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x20 0x73
-+
-+#CHECK: verllvf %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x2a 0x73
-+
-+#CHECK: verllvf %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x2e 0x73
-+
-+#CHECK: verllvg %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x30 0x73
-+
-+#CHECK: verllvg %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x3a 0x73
-+
-+#CHECK: verllvg %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x3e 0x73
-+
-+#CHECK: verllvh %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x10 0x73
-+
-+#CHECK: verllvh %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x1a 0x73
-+
-+#CHECK: verllvh %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x1e 0x73
-+
-+#CHECK: verllb  %v0, %v0, 0
-+0xe7 0x00 0x00 0x00 0x00 0x33
-+
-+#CHECK: verllb  %v12, %v18, 1110(%r3)
-+0xe7 0xc2 0x34 0x56 0x04 0x33
-+
-+#CHECK: verllb  %v31, %v31, 4095(%r15)
-+0xe7 0xff 0xff 0xff 0x0c 0x33
-+
-+#CHECK: verllf  %v0, %v0, 0
-+0xe7 0x00 0x00 0x00 0x20 0x33
-+
-+#CHECK: verllf  %v12, %v18, 1110(%r3)
-+0xe7 0xc2 0x34 0x56 0x24 0x33
-+
-+#CHECK: verllf  %v31, %v31, 4095(%r15)
-+0xe7 0xff 0xff 0xff 0x2c 0x33
-+
-+#CHECK: verllg  %v0, %v0, 0
-+0xe7 0x00 0x00 0x00 0x30 0x33
-+
-+#CHECK: verllg  %v12, %v18, 1110(%r3)
-+0xe7 0xc2 0x34 0x56 0x34 0x33
-+
-+#CHECK: verllg  %v31, %v31, 4095(%r15)
-+0xe7 0xff 0xff 0xff 0x3c 0x33
-+
-+#CHECK: verllh  %v0, %v0, 0
-+0xe7 0x00 0x00 0x00 0x10 0x33
-+
-+#CHECK: verllh  %v12, %v18, 1110(%r3)
-+0xe7 0xc2 0x34 0x56 0x14 0x33
-+
-+#CHECK: verllh  %v31, %v31, 4095(%r15)
-+0xe7 0xff 0xff 0xff 0x1c 0x33
-+
-+#CHECK: veslvb  %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x00 0x70
-+
-+#CHECK: veslvb  %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x0a 0x70
-+
-+#CHECK: veslvb  %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x0e 0x70
-+
-+#CHECK: veslvf  %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x20 0x70
-+
-+#CHECK: veslvf  %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x2a 0x70
-+
-+#CHECK: veslvf  %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x2e 0x70
-+
-+#CHECK: veslvg  %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x30 0x70
-+
-+#CHECK: veslvg  %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x3a 0x70
-+
-+#CHECK: veslvg  %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x3e 0x70
-+
-+#CHECK: veslvh  %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x10 0x70
-+
-+#CHECK: veslvh  %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x1a 0x70
-+
-+#CHECK: veslvh  %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x1e 0x70
-+
-+#CHECK: veslb   %v0, %v0, 0
-+0xe7 0x00 0x00 0x00 0x00 0x30
-+
-+#CHECK: veslb   %v12, %v18, 1110(%r3)
-+0xe7 0xc2 0x34 0x56 0x04 0x30
-+
-+#CHECK: veslb   %v31, %v31, 4095(%r15)
-+0xe7 0xff 0xff 0xff 0x0c 0x30
-+
-+#CHECK: veslf   %v0, %v0, 0
-+0xe7 0x00 0x00 0x00 0x20 0x30
-+
-+#CHECK: veslf   %v12, %v18, 1110(%r3)
-+0xe7 0xc2 0x34 0x56 0x24 0x30
-+
-+#CHECK: veslf   %v31, %v31, 4095(%r15)
-+0xe7 0xff 0xff 0xff 0x2c 0x30
-+
-+#CHECK: veslg   %v0, %v0, 0
-+0xe7 0x00 0x00 0x00 0x30 0x30
-+
-+#CHECK: veslg   %v12, %v18, 1110(%r3)
-+0xe7 0xc2 0x34 0x56 0x34 0x30
-+
-+#CHECK: veslg   %v31, %v31, 4095(%r15)
-+0xe7 0xff 0xff 0xff 0x3c 0x30
-+
-+#CHECK: veslh   %v0, %v0, 0
-+0xe7 0x00 0x00 0x00 0x10 0x30
-+
-+#CHECK: veslh   %v12, %v18, 1110(%r3)
-+0xe7 0xc2 0x34 0x56 0x14 0x30
-+
-+#CHECK: veslh   %v31, %v31, 4095(%r15)
-+0xe7 0xff 0xff 0xff 0x1c 0x30
-+
-+#CHECK: vesravb %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x00 0x7a
-+
-+#CHECK: vesravb %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x0a 0x7a
-+
-+#CHECK: vesravb %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x0e 0x7a
-+
-+#CHECK: vesravf %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x20 0x7a
-+
-+#CHECK: vesravf %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x2a 0x7a
-+
-+#CHECK: vesravf %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x2e 0x7a
-+
-+#CHECK: vesravg %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x30 0x7a
-+
-+#CHECK: vesravg %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x3a 0x7a
-+
-+#CHECK: vesravg %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x3e 0x7a
-+
-+#CHECK: vesravh %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x10 0x7a
-+
-+#CHECK: vesravh %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x1a 0x7a
-+
-+#CHECK: vesravh %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x1e 0x7a
-+
-+#CHECK: vesrab  %v0, %v0, 0
-+0xe7 0x00 0x00 0x00 0x00 0x3a
-+
-+#CHECK: vesrab  %v12, %v18, 1110(%r3)
-+0xe7 0xc2 0x34 0x56 0x04 0x3a
-+
-+#CHECK: vesrab  %v31, %v31, 4095(%r15)
-+0xe7 0xff 0xff 0xff 0x0c 0x3a
-+
-+#CHECK: vesraf  %v0, %v0, 0
-+0xe7 0x00 0x00 0x00 0x20 0x3a
-+
-+#CHECK: vesraf  %v12, %v18, 1110(%r3)
-+0xe7 0xc2 0x34 0x56 0x24 0x3a
-+
-+#CHECK: vesraf  %v31, %v31, 4095(%r15)
-+0xe7 0xff 0xff 0xff 0x2c 0x3a
-+
-+#CHECK: vesrag  %v0, %v0, 0
-+0xe7 0x00 0x00 0x00 0x30 0x3a
-+
-+#CHECK: vesrag  %v12, %v18, 1110(%r3)
-+0xe7 0xc2 0x34 0x56 0x34 0x3a
-+
-+#CHECK: vesrag  %v31, %v31, 4095(%r15)
-+0xe7 0xff 0xff 0xff 0x3c 0x3a
-+
-+#CHECK: vesrah  %v0, %v0, 0
-+0xe7 0x00 0x00 0x00 0x10 0x3a
-+
-+#CHECK: vesrah  %v12, %v18, 1110(%r3)
-+0xe7 0xc2 0x34 0x56 0x14 0x3a
-+
-+#CHECK: vesrah  %v31, %v31, 4095(%r15)
-+0xe7 0xff 0xff 0xff 0x1c 0x3a
-+
-+#CHECK: vesrlvb %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x00 0x78
-+
-+#CHECK: vesrlvb %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x0a 0x78
-+
-+#CHECK: vesrlvb %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x0e 0x78
-+
-+#CHECK: vesrlvf %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x20 0x78
-+
-+#CHECK: vesrlvf %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x2a 0x78
-+
-+#CHECK: vesrlvf %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x2e 0x78
-+
-+#CHECK: vesrlvg %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x30 0x78
-+
-+#CHECK: vesrlvg %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x3a 0x78
-+
-+#CHECK: vesrlvg %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x3e 0x78
-+
-+#CHECK: vesrlvh %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x10 0x78
-+
-+#CHECK: vesrlvh %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x1a 0x78
-+
-+#CHECK: vesrlvh %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x1e 0x78
-+
-+#CHECK: vesrlb  %v0, %v0, 0
-+0xe7 0x00 0x00 0x00 0x00 0x38
-+
-+#CHECK: vesrlb  %v12, %v18, 1110(%r3)
-+0xe7 0xc2 0x34 0x56 0x04 0x38
-+
-+#CHECK: vesrlb  %v31, %v31, 4095(%r15)
-+0xe7 0xff 0xff 0xff 0x0c 0x38
-+
-+#CHECK: vesrlf  %v0, %v0, 0
-+0xe7 0x00 0x00 0x00 0x20 0x38
-+
-+#CHECK: vesrlf  %v12, %v18, 1110(%r3)
-+0xe7 0xc2 0x34 0x56 0x24 0x38
-+
-+#CHECK: vesrlf  %v31, %v31, 4095(%r15)
-+0xe7 0xff 0xff 0xff 0x2c 0x38
-+
-+#CHECK: vesrlg  %v0, %v0, 0
-+0xe7 0x00 0x00 0x00 0x30 0x38
-+
-+#CHECK: vesrlg  %v12, %v18, 1110(%r3)
-+0xe7 0xc2 0x34 0x56 0x34 0x38
-+
-+#CHECK: vesrlg  %v31, %v31, 4095(%r15)
-+0xe7 0xff 0xff 0xff 0x3c 0x38
-+
-+#CHECK: vesrlh  %v0, %v0, 0
-+0xe7 0x00 0x00 0x00 0x10 0x38
-+
-+#CHECK: vesrlh  %v12, %v18, 1110(%r3)
-+0xe7 0xc2 0x34 0x56 0x14 0x38
-+
-+#CHECK: vesrlh  %v31, %v31, 4095(%r15)
-+0xe7 0xff 0xff 0xff 0x1c 0x38
-+
-+#CHECK: vfadb   %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x30 0xe3
-+
-+#CHECK: vfadb   %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x3a 0xe3
-+
-+#CHECK: vfadb   %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x3e 0xe3
-+
-+#CHECK: vfaeb   %v0, %v0, %v0, 0
-+0xe7 0x00 0x00 0x00 0x00 0x82
-+
-+#CHECK: vfaeb   %v0, %v0, %v0, 12
-+0xe7 0x00 0x00 0xc0 0x00 0x82
-+
-+#CHECK: vfaeb   %v18, %v3, %v20, 0
-+0xe7 0x23 0x40 0x00 0x0a 0x82
-+
-+#CHECK: vfaeb   %v31, %v31, %v31, 4
-+0xe7 0xff 0xf0 0x40 0x0e 0x82
-+
-+#CHECK: vfaebs  %v31, %v31, %v31, 8
-+0xe7 0xff 0xf0 0x90 0x0e 0x82
-+
-+#CHECK: vfaezb  %v31, %v31, %v31, 4
-+0xe7 0xff 0xf0 0x60 0x0e 0x82
-+
-+#CHECK: vfaezbs %v31, %v31, %v31, 8
-+0xe7 0xff 0xf0 0xb0 0x0e 0x82
-+
-+#CHECK: vfaef   %v0, %v0, %v0, 0
-+0xe7 0x00 0x00 0x00 0x20 0x82
-+
-+#CHECK: vfaef   %v0, %v0, %v0, 12
-+0xe7 0x00 0x00 0xc0 0x20 0x82
-+
-+#CHECK: vfaef   %v18, %v3, %v20, 0
-+0xe7 0x23 0x40 0x00 0x2a 0x82
-+
-+#CHECK: vfaef   %v31, %v31, %v31, 4
-+0xe7 0xff 0xf0 0x40 0x2e 0x82
-+
-+#CHECK: vfaefs  %v31, %v31, %v31, 8
-+0xe7 0xff 0xf0 0x90 0x2e 0x82
-+
-+#CHECK: vfaezf  %v31, %v31, %v31, 4
-+0xe7 0xff 0xf0 0x60 0x2e 0x82
-+
-+#CHECK: vfaezfs %v31, %v31, %v31, 8
-+0xe7 0xff 0xf0 0xb0 0x2e 0x82
-+
-+#CHECK: vfaeh   %v0, %v0, %v0, 0
-+0xe7 0x00 0x00 0x00 0x10 0x82
-+
-+#CHECK: vfaeh   %v0, %v0, %v0, 12
-+0xe7 0x00 0x00 0xc0 0x10 0x82
-+
-+#CHECK: vfaeh   %v18, %v3, %v20, 0
-+0xe7 0x23 0x40 0x00 0x1a 0x82
-+
-+#CHECK: vfaeh   %v31, %v31, %v31, 4
-+0xe7 0xff 0xf0 0x40 0x1e 0x82
-+
-+#CHECK: vfaehs  %v31, %v31, %v31, 8
-+0xe7 0xff 0xf0 0x90 0x1e 0x82
-+
-+#CHECK: vfaezh  %v31, %v31, %v31, 4
-+0xe7 0xff 0xf0 0x60 0x1e 0x82
-+
-+#CHECK: vfaezhs %v31, %v31, %v31, 8
-+0xe7 0xff 0xf0 0xb0 0x1e 0x82
-+
-+#CHECK: vfcedb  %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x30 0xe8
-+
-+#CHECK: vfcedb  %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x3a 0xe8
-+
-+#CHECK: vfcedb  %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x3e 0xe8
-+
-+#CHECK: vfcedbs %v0, %v0, %v0
-+0xe7 0x00 0x00 0x10 0x30 0xe8
-+
-+#CHECK: vfcedbs %v18, %v3, %v20
-+0xe7 0x23 0x40 0x10 0x3a 0xe8
-+
-+#CHECK: vfcedbs %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x10 0x3e 0xe8
-+
-+#CHECK: vfchdb  %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x30 0xeb
-+
-+#CHECK: vfchdb  %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x3a 0xeb
-+
-+#CHECK: vfchdb  %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x3e 0xeb
-+
-+#CHECK: vfchdbs %v0, %v0, %v0
-+0xe7 0x00 0x00 0x10 0x30 0xeb
-+
-+#CHECK: vfchdbs %v18, %v3, %v20
-+0xe7 0x23 0x40 0x10 0x3a 0xeb
-+
-+#CHECK: vfchdbs %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x10 0x3e 0xeb
-+
-+#CHECK: vfchedb %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x30 0xea
-+
-+#CHECK: vfchedb %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x3a 0xea
-+
-+#CHECK: vfchedb %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x3e 0xea
-+
-+#CHECK: vfchedbs %v0, %v0, %v0
-+0xe7 0x00 0x00 0x10 0x30 0xea
-+
-+#CHECK: vfchedbs %v18, %v3, %v20
-+0xe7 0x23 0x40 0x10 0x3a 0xea
-+
-+#CHECK: vfchedbs %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x10 0x3e 0xea
-+
-+#CHECK: vfddb   %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x30 0xe5
-+
-+#CHECK: vfddb   %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x3a 0xe5
-+
-+#CHECK: vfddb   %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x3e 0xe5
-+
-+#CHECK: vfeeb   %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x00 0x80
-+
-+#CHECK: vfeeb   %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x0a 0x80
-+
-+#CHECK: vfeebs  %v7, %v24, %v9
-+0xe7 0x78 0x90 0x10 0x04 0x80
-+
-+#CHECK: vfeezb  %v18, %v3, %v20
-+0xe7 0x23 0x40 0x20 0x0a 0x80
-+
-+#CHECK: vfeezbs %v7, %v24, %v9
-+0xe7 0x78 0x90 0x30 0x04 0x80
-+
-+#CHECK: vfeeb   %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x0e 0x80
-+
-+#CHECK: vfeef   %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x20 0x80
-+
-+#CHECK: vfeef   %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x2a 0x80
-+
-+#CHECK: vfeefs  %v7, %v24, %v9
-+0xe7 0x78 0x90 0x10 0x24 0x80
-+
-+#CHECK: vfeezf  %v18, %v3, %v20
-+0xe7 0x23 0x40 0x20 0x2a 0x80
-+
-+#CHECK: vfeezfs %v7, %v24, %v9
-+0xe7 0x78 0x90 0x30 0x24 0x80
-+
-+#CHECK: vfeef   %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x2e 0x80
-+
-+#CHECK: vfeeh   %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x10 0x80
-+
-+#CHECK: vfeeh   %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x1a 0x80
-+
-+#CHECK: vfeehs  %v7, %v24, %v9
-+0xe7 0x78 0x90 0x10 0x14 0x80
-+
-+#CHECK: vfeezh  %v18, %v3, %v20
-+0xe7 0x23 0x40 0x20 0x1a 0x80
-+
-+#CHECK: vfeezhs %v7, %v24, %v9
-+0xe7 0x78 0x90 0x30 0x14 0x80
-+
-+#CHECK: vfeeh   %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x1e 0x80
-+
-+#CHECK: vfeneb   %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x00 0x81
-+
-+#CHECK: vfeneb   %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x0a 0x81
-+
-+#CHECK: vfenebs  %v7, %v24, %v9
-+0xe7 0x78 0x90 0x10 0x04 0x81
-+
-+#CHECK: vfenezb  %v18, %v3, %v20
-+0xe7 0x23 0x40 0x20 0x0a 0x81
-+
-+#CHECK: vfenezbs %v7, %v24, %v9
-+0xe7 0x78 0x90 0x30 0x04 0x81
-+
-+#CHECK: vfeneb   %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x0e 0x81
-+
-+#CHECK: vfenef   %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x20 0x81
-+
-+#CHECK: vfenef   %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x2a 0x81
-+
-+#CHECK: vfenefs  %v7, %v24, %v9
-+0xe7 0x78 0x90 0x10 0x24 0x81
-+
-+#CHECK: vfenezf  %v18, %v3, %v20
-+0xe7 0x23 0x40 0x20 0x2a 0x81
-+
-+#CHECK: vfenezfs %v7, %v24, %v9
-+0xe7 0x78 0x90 0x30 0x24 0x81
-+
-+#CHECK: vfenef   %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x2e 0x81
-+
-+#CHECK: vfeneh   %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x10 0x81
-+
-+#CHECK: vfeneh   %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x1a 0x81
-+
-+#CHECK: vfenehs  %v7, %v24, %v9
-+0xe7 0x78 0x90 0x10 0x14 0x81
-+
-+#CHECK: vfenezh  %v18, %v3, %v20
-+0xe7 0x23 0x40 0x20 0x1a 0x81
-+
-+#CHECK: vfenezhs %v7, %v24, %v9
-+0xe7 0x78 0x90 0x30 0x14 0x81
-+
-+#CHECK: vfeneh   %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x1e 0x81
-+
-+#CHECK: vfidb   %v0, %v0, 0, 0
-+0xe7 0x00 0x00 0x00 0x30 0xc7
-+
-+#CHECK: vfidb   %v19, %v14, 4, 10
-+0xe7 0x3e 0x00 0xa4 0x38 0xc7
-+
-+#CHECK: vfidb   %v31, %v31, 7, 15
-+0xe7 0xff 0x00 0xf7 0x3c 0xc7
-+
-+#CHECK: vistrb  %v0, %v0
-+0xe7 0x00 0x00 0x00 0x00 0x5c
-+
-+#CHECK: vistrb  %v18, %v3
-+0xe7 0x23 0x00 0x00 0x08 0x5c
-+
-+#CHECK: vistrbs %v7, %v24
-+0xe7 0x78 0x00 0x10 0x04 0x5c
-+
-+#CHECK: vistrb  %v31, %v31
-+0xe7 0xff 0x00 0x00 0x0c 0x5c
-+
-+#CHECK: vistrf  %v0, %v0
-+0xe7 0x00 0x00 0x00 0x20 0x5c
-+
-+#CHECK: vistrf  %v18, %v3
-+0xe7 0x23 0x00 0x00 0x28 0x5c
-+
-+#CHECK: vistrfs %v7, %v24
-+0xe7 0x78 0x00 0x10 0x24 0x5c
-+
-+#CHECK: vistrf  %v31, %v31
-+0xe7 0xff 0x00 0x00 0x2c 0x5c
-+
-+#CHECK: vistrh  %v0, %v0
-+0xe7 0x00 0x00 0x00 0x10 0x5c
-+
-+#CHECK: vistrh  %v18, %v3
-+0xe7 0x23 0x00 0x00 0x18 0x5c
-+
-+#CHECK: vistrhs %v7, %v24
-+0xe7 0x78 0x00 0x10 0x14 0x5c
-+
-+#CHECK: vistrh  %v31, %v31
-+0xe7 0xff 0x00 0x00 0x1c 0x5c
-+
-+#CHECK: vfmadb  %v0, %v0, %v0, %v0
-+0xe7 0x00 0x03 0x00 0x00 0x8f
-+
-+#CHECK: vfmadb  %v3, %v20, %v5, %v22
-+0xe7 0x34 0x53 0x00 0x65 0x8f
-+
-+#CHECK: vfmadb  %v31, %v31, %v31, %v31
-+0xe7 0xff 0xf3 0x00 0xff 0x8f
-+
-+#CHECK: vfmdb   %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x30 0xe7
-+
-+#CHECK: vfmdb   %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x3a 0xe7
-+
-+#CHECK: vfmdb   %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x3e 0xe7
-+
-+#CHECK: vfmsdb  %v0, %v0, %v0, %v0
-+0xe7 0x00 0x03 0x00 0x00 0x8e
-+
-+#CHECK: vfmsdb  %v3, %v20, %v5, %v22
-+0xe7 0x34 0x53 0x00 0x65 0x8e
-+
-+#CHECK: vfmsdb  %v31, %v31, %v31, %v31
-+0xe7 0xff 0xf3 0x00 0xff 0x8e
-+
-+#CHECK: vfsdb   %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x30 0xe2
-+
-+#CHECK: vfsdb   %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x3a 0xe2
-+
-+#CHECK: vfsdb   %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x3e 0xe2
-+
-+#CHECK: vzero   %v0
-+0xe7 0x00 0x00 0x00 0x00 0x44
-+
-+#CHECK: vgbm    %v0, 1
-+0xe7 0x00 0x00 0x01 0x00 0x44
-+
-+#CHECK: vgbm    %v0, 65534
-+0xe7 0x00 0xff 0xfe 0x00 0x44
-+
-+#CHECK: vone    %v0
-+0xe7 0x00 0xff 0xff 0x00 0x44
-+
-+#CHECK: vgbm    %v17, 4660
-+0xe7 0x10 0x12 0x34 0x08 0x44
-+
-+#CHECK: vone    %v31
-+0xe7 0xf0 0xff 0xff 0x08 0x44
-+
-+#CHECK: vgef    %v0, 0(%v0), 0
-+0xe7 0x00 0x00 0x00 0x00 0x13
-+
-+#CHECK: vgef    %v10, 1000(%v19,%r7), 2
-+0xe7 0xa3 0x73 0xe8 0x24 0x13
-+
-+#CHECK: vgef    %v31, 4095(%v31,%r15), 3
-+0xe7 0xff 0xff 0xff 0x3c 0x13
-+
-+#CHECK: vgeg    %v0, 0(%v0), 0
-+0xe7 0x00 0x00 0x00 0x00 0x12
-+
-+#CHECK: vgeg    %v10, 1000(%v19,%r7), 1
-+0xe7 0xa3 0x73 0xe8 0x14 0x12
-+
-+#CHECK: vgeg    %v31, 4095(%v31,%r15), 1
-+0xe7 0xff 0xff 0xff 0x1c 0x12
-+
-+#CHECK: vgfmab  %v0, %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x00 0xbc
-+
-+#CHECK: vgfmab  %v3, %v20, %v5, %v22
-+0xe7 0x34 0x50 0x00 0x65 0xbc
-+
-+#CHECK: vgfmab  %v31, %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0xff 0xbc
-+
-+#CHECK: vgfmaf  %v0, %v0, %v0, %v0
-+0xe7 0x00 0x02 0x00 0x00 0xbc
-+
-+#CHECK: vgfmaf  %v3, %v20, %v5, %v22
-+0xe7 0x34 0x52 0x00 0x65 0xbc
-+
-+#CHECK: vgfmaf  %v31, %v31, %v31, %v31
-+0xe7 0xff 0xf2 0x00 0xff 0xbc
-+
-+#CHECK: vgfmag  %v0, %v0, %v0, %v0
-+0xe7 0x00 0x03 0x00 0x00 0xbc
-+
-+#CHECK: vgfmag  %v3, %v20, %v5, %v22
-+0xe7 0x34 0x53 0x00 0x65 0xbc
-+
-+#CHECK: vgfmag  %v31, %v31, %v31, %v31
-+0xe7 0xff 0xf3 0x00 0xff 0xbc
-+
-+#CHECK: vgfmah  %v0, %v0, %v0, %v0
-+0xe7 0x00 0x01 0x00 0x00 0xbc
-+
-+#CHECK: vgfmah  %v3, %v20, %v5, %v22
-+0xe7 0x34 0x51 0x00 0x65 0xbc
-+
-+#CHECK: vgfmah  %v31, %v31, %v31, %v31
-+0xe7 0xff 0xf1 0x00 0xff 0xbc
-+
-+#CHECK: vgfmb   %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x00 0xb4
-+
-+#CHECK: vgfmb   %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x0a 0xb4
-+
-+#CHECK: vgfmb   %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x0e 0xb4
-+
-+#CHECK: vgfmf   %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x20 0xb4
-+
-+#CHECK: vgfmf   %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x2a 0xb4
-+
-+#CHECK: vgfmf   %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x2e 0xb4
-+
-+#CHECK: vgfmg   %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x30 0xb4
-+
-+#CHECK: vgfmg   %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x3a 0xb4
-+
-+#CHECK: vgfmg   %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x3e 0xb4
-+
-+#CHECK: vgfmh   %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x10 0xb4
-+
-+#CHECK: vgfmh   %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x1a 0xb4
-+
-+#CHECK: vgfmh   %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x1e 0xb4
-+
-+#CHECK: vgmb    %v0, 0, 0
-+0xe7 0x00 0x00 0x00 0x00 0x46
-+
-+#CHECK: vgmb    %v22, 55, 66
-+0xe7 0x60 0x37 0x42 0x08 0x46
-+
-+#CHECK: vgmb    %v31, 255, 255
-+0xe7 0xf0 0xff 0xff 0x08 0x46
-+
-+#CHECK: vgmf    %v0, 0, 0
-+0xe7 0x00 0x00 0x00 0x20 0x46
-+
-+#CHECK: vgmf    %v22, 55, 66
-+0xe7 0x60 0x37 0x42 0x28 0x46
-+
-+#CHECK: vgmf    %v31, 255, 255
-+0xe7 0xf0 0xff 0xff 0x28 0x46
-+
-+#CHECK: vgmg    %v0, 0, 0
-+0xe7 0x00 0x00 0x00 0x30 0x46
-+
-+#CHECK: vgmg    %v22, 55, 66
-+0xe7 0x60 0x37 0x42 0x38 0x46
-+
-+#CHECK: vgmg    %v31, 255, 255
-+0xe7 0xf0 0xff 0xff 0x38 0x46
-+
-+#CHECK: vgmh    %v0, 0, 0
-+0xe7 0x00 0x00 0x00 0x10 0x46
-+
-+#CHECK: vgmh    %v22, 55, 66
-+0xe7 0x60 0x37 0x42 0x18 0x46
-+
-+#CHECK: vgmh    %v31, 255, 255
-+0xe7 0xf0 0xff 0xff 0x18 0x46
-+
-+#CHECK: vl      %v0, 0
-+0xe7 0x00 0x00 0x00 0x00 0x06
-+
-+#CHECK: vl      %v17, 2475(%r7,%r8)
-+0xe7 0x17 0x89 0xab 0x08 0x06
-+
-+#CHECK: vl      %v31, 4095(%r15,%r15)
-+0xe7 0xff 0xff 0xff 0x08 0x06
-+
-+#CHECK: vlbb    %v0, 0, 0
-+0xe7 0x00 0x00 0x00 0x00 0x07
-+
-+#CHECK: vlbb    %v17, 2475(%r7,%r8), 12
-+0xe7 0x17 0x89 0xab 0xc8 0x07
-+
-+#CHECK: vlbb    %v31, 4095(%r15,%r15), 15
-+0xe7 0xff 0xff 0xff 0xf8 0x07
-+
-+#CHECK: vlcb    %v0, %v0
-+0xe7 0x00 0x00 0x00 0x00 0xde
-+
-+#CHECK: vlcb    %v19, %v14
-+0xe7 0x3e 0x00 0x00 0x08 0xde
-+
-+#CHECK: vlcb    %v31, %v31
-+0xe7 0xff 0x00 0x00 0x0c 0xde
-+
-+#CHECK: vlcf    %v0, %v0
-+0xe7 0x00 0x00 0x00 0x20 0xde
-+
-+#CHECK: vlcf    %v19, %v14
-+0xe7 0x3e 0x00 0x00 0x28 0xde
-+
-+#CHECK: vlcf    %v31, %v31
-+0xe7 0xff 0x00 0x00 0x2c 0xde
-+
-+#CHECK: vlcg    %v0, %v0
-+0xe7 0x00 0x00 0x00 0x30 0xde
-+
-+#CHECK: vlcg    %v19, %v14
-+0xe7 0x3e 0x00 0x00 0x38 0xde
-+
-+#CHECK: vlcg    %v31, %v31
-+0xe7 0xff 0x00 0x00 0x3c 0xde
-+
-+#CHECK: vlch    %v0, %v0
-+0xe7 0x00 0x00 0x00 0x10 0xde
-+
-+#CHECK: vlch    %v19, %v14
-+0xe7 0x3e 0x00 0x00 0x18 0xde
-+
-+#CHECK: vlch    %v31, %v31
-+0xe7 0xff 0x00 0x00 0x1c 0xde
-+
-+#CHECK: vldeb   %v0, %v0
-+0xe7 0x00 0x00 0x00 0x20 0xc4
-+
-+#CHECK: vldeb   %v19, %v14
-+0xe7 0x3e 0x00 0x00 0x28 0xc4
-+
-+#CHECK: vldeb   %v31, %v31
-+0xe7 0xff 0x00 0x00 0x2c 0xc4
-+
-+#CHECK: vleb    %v0, 0, 0
-+0xe7 0x00 0x00 0x00 0x00 0x00
-+
-+#CHECK: vleb    %v17, 2475(%r7,%r8), 12
-+0xe7 0x17 0x89 0xab 0xc8 0x00
-+
-+#CHECK: vleb    %v31, 4095(%r15,%r15), 15
-+0xe7 0xff 0xff 0xff 0xf8 0x00
-+
-+#CHECK: vledb   %v0, %v0, 0, 0
-+0xe7 0x00 0x00 0x00 0x30 0xc5
-+
-+#CHECK: vledb   %v19, %v14, 4, 10
-+0xe7 0x3e 0x00 0xa4 0x38 0xc5
-+
-+#CHECK: vledb   %v31, %v31, 7, 15
-+0xe7 0xff 0x00 0xf7 0x3c 0xc5
-+
-+#CHECK: vlef    %v0, 0, 0
-+0xe7 0x00 0x00 0x00 0x00 0x03
-+
-+#CHECK: vlef    %v17, 2475(%r7,%r8), 2
-+0xe7 0x17 0x89 0xab 0x28 0x03
-+
-+#CHECK: vlef    %v31, 4095(%r15,%r15), 3
-+0xe7 0xff 0xff 0xff 0x38 0x03
-+
-+#CHECK: vleg    %v0, 0, 0
-+0xe7 0x00 0x00 0x00 0x00 0x02
-+
-+#CHECK: vleg    %v17, 2475(%r7,%r8), 1
-+0xe7 0x17 0x89 0xab 0x18 0x02
-+
-+#CHECK: vleg    %v31, 4095(%r15,%r15), 1
-+0xe7 0xff 0xff 0xff 0x18 0x02
-+
-+#CHECK: vleh    %v0, 0, 0
-+0xe7 0x00 0x00 0x00 0x00 0x01
-+
-+#CHECK: vleh    %v17, 2475(%r7,%r8), 5
-+0xe7 0x17 0x89 0xab 0x58 0x01
-+
-+#CHECK: vleh    %v31, 4095(%r15,%r15), 7
-+0xe7 0xff 0xff 0xff 0x78 0x01
-+
-+#CHECK: vleib   %v0, 0, 0
-+0xe7 0x00 0x00 0x00 0x00 0x40
-+
-+#CHECK: vleib   %v23, -30293, 12
-+0xe7 0x70 0x89 0xab 0xc8 0x40
-+
-+#CHECK: vleib   %v31, -1, 15
-+0xe7 0xf0 0xff 0xff 0xf8 0x40
-+
-+#CHECK: vleif   %v0, 0, 0
-+0xe7 0x00 0x00 0x00 0x00 0x43
-+
-+#CHECK: vleif   %v23, -30293, 2
-+0xe7 0x70 0x89 0xab 0x28 0x43
-+
-+#CHECK: vleif   %v31, -1, 3
-+0xe7 0xf0 0xff 0xff 0x38 0x43
-+
-+#CHECK: vleig   %v0, 0, 0
-+0xe7 0x00 0x00 0x00 0x00 0x42
-+
-+#CHECK: vleig   %v23, -30293, 1
-+0xe7 0x70 0x89 0xab 0x18 0x42
-+
-+#CHECK: vleig   %v31, -1, 1
-+0xe7 0xf0 0xff 0xff 0x18 0x42
-+
-+#CHECK: vleih   %v0, 0, 0
-+0xe7 0x00 0x00 0x00 0x00 0x41
-+
-+#CHECK: vleih   %v23, -30293, 5
-+0xe7 0x70 0x89 0xab 0x58 0x41
-+
-+#CHECK: vleih   %v31, -1, 7
-+0xe7 0xf0 0xff 0xff 0x78 0x41
-+
-+#CHECK: vflcdb  %v0, %v0
-+0xe7 0x00 0x00 0x00 0x30 0xcc
-+
-+#CHECK: vflcdb  %v19, %v14
-+0xe7 0x3e 0x00 0x00 0x38 0xcc
-+
-+#CHECK: vflcdb  %v31, %v31
-+0xe7 0xff 0x00 0x00 0x3c 0xcc
-+
-+#CHECK: vflndb  %v0, %v0
-+0xe7 0x00 0x00 0x10 0x30 0xcc
-+
-+#CHECK: vflndb  %v19, %v14
-+0xe7 0x3e 0x00 0x10 0x38 0xcc
-+
-+#CHECK: vflndb  %v31, %v31
-+0xe7 0xff 0x00 0x10 0x3c 0xcc
-+
-+#CHECK: vflpdb  %v0, %v0
-+0xe7 0x00 0x00 0x20 0x30 0xcc
-+
-+#CHECK: vflpdb  %v19, %v14
-+0xe7 0x3e 0x00 0x20 0x38 0xcc
-+
-+#CHECK: vflpdb  %v31, %v31
-+0xe7 0xff 0x00 0x20 0x3c 0xcc
-+
-+#CHECK: vlgvb   %r0, %v0, 0
-+0xe7 0x00 0x00 0x00 0x00 0x21
-+
-+#CHECK: vlgvb   %r2, %v19, 1383(%r4)
-+0xe7 0x23 0x45 0x67 0x04 0x21
-+
-+#CHECK: vlgvb   %r15, %v31, 4095(%r15)
-+0xe7 0xff 0xff 0xff 0x04 0x21
-+
-+#CHECK: vlgvf   %r0, %v0, 0
-+0xe7 0x00 0x00 0x00 0x20 0x21
-+
-+#CHECK: vlgvf   %r2, %v19, 1383(%r4)
-+0xe7 0x23 0x45 0x67 0x24 0x21
-+
-+#CHECK: vlgvf   %r15, %v31, 4095(%r15)
-+0xe7 0xff 0xff 0xff 0x24 0x21
-+
-+#CHECK: vlgvg   %r0, %v0, 0
-+0xe7 0x00 0x00 0x00 0x30 0x21
-+
-+#CHECK: vlgvg   %r2, %v19, 1383(%r4)
-+0xe7 0x23 0x45 0x67 0x34 0x21
-+
-+#CHECK: vlgvg   %r15, %v31, 4095(%r15)
-+0xe7 0xff 0xff 0xff 0x34 0x21
-+
-+#CHECK: vlgvh   %r0, %v0, 0
-+0xe7 0x00 0x00 0x00 0x10 0x21
-+
-+#CHECK: vlgvh   %r2, %v19, 1383(%r4)
-+0xe7 0x23 0x45 0x67 0x14 0x21
-+
-+#CHECK: vlgvh   %r15, %v31, 4095(%r15)
-+0xe7 0xff 0xff 0xff 0x14 0x21
-+
-+#CHECK: vfsqdb  %v0, %v0
-+0xe7 0x00 0x00 0x00 0x30 0xce
-+
-+#CHECK: vfsqdb  %v19, %v14
-+0xe7 0x3e 0x00 0x00 0x38 0xce
-+
-+#CHECK: vfsqdb  %v31, %v31
-+0xe7 0xff 0x00 0x00 0x3c 0xce
-+
-+#CHECK: vftcidb %v0, %v0, 0
-+0xe7 0x00 0x00 0x00 0x30 0x4a
-+
-+#CHECK: vftcidb %v19, %v4, 1383
-+0xe7 0x34 0x56 0x70 0x38 0x4a
-+
-+#CHECK: vftcidb %v31, %v31, 4095
-+0xe7 0xff 0xff 0xf0 0x3c 0x4a
-+
-+#CHECK: vll     %v0, %r0, 0
-+0xe7 0x00 0x00 0x00 0x00 0x37
-+
-+#CHECK: vll     %v18, %r3, 1383(%r4)
-+0xe7 0x23 0x45 0x67 0x08 0x37
-+
-+#CHECK: vll     %v31, %r15, 4095(%r15)
-+0xe7 0xff 0xff 0xff 0x08 0x37
-+
-+#CHECK: vllezb  %v0, 0
-+0xe7 0x00 0x00 0x00 0x00 0x04
-+
-+#CHECK: vllezb  %v17, 2475(%r7,%r8)
-+0xe7 0x17 0x89 0xab 0x08 0x04
-+
-+#CHECK: vllezb  %v31, 4095(%r15,%r15)
-+0xe7 0xff 0xff 0xff 0x08 0x04
-+
-+#CHECK: vllezf  %v0, 0
-+0xe7 0x00 0x00 0x00 0x20 0x04
-+
-+#CHECK: vllezf  %v17, 2475(%r7,%r8)
-+0xe7 0x17 0x89 0xab 0x28 0x04
-+
-+#CHECK: vllezf  %v31, 4095(%r15,%r15)
-+0xe7 0xff 0xff 0xff 0x28 0x04
-+
-+#CHECK: vllezg  %v0, 0
-+0xe7 0x00 0x00 0x00 0x30 0x04
-+
-+#CHECK: vllezg  %v17, 2475(%r7,%r8)
-+0xe7 0x17 0x89 0xab 0x38 0x04
-+
-+#CHECK: vllezg  %v31, 4095(%r15,%r15)
-+0xe7 0xff 0xff 0xff 0x38 0x04
-+
-+#CHECK: vllezh  %v0, 0
-+0xe7 0x00 0x00 0x00 0x10 0x04
-+
-+#CHECK: vllezh  %v17, 2475(%r7,%r8)
-+0xe7 0x17 0x89 0xab 0x18 0x04
-+
-+#CHECK: vllezh  %v31, 4095(%r15,%r15)
-+0xe7 0xff 0xff 0xff 0x18 0x04
-+
-+#CHECK: vlm     %v0, %v0, 0
-+0xe7 0x00 0x00 0x00 0x00 0x36
-+
-+#CHECK: vlm     %v12, %v18, 1110(%r3)
-+0xe7 0xc2 0x34 0x56 0x04 0x36
-+
-+#CHECK: vlm     %v31, %v31, 4095(%r15)
-+0xe7 0xff 0xff 0xff 0x0c 0x36
-+
-+#CHECK: vlpb    %v0, %v0
-+0xe7 0x00 0x00 0x00 0x00 0xdf
-+
-+#CHECK: vlpb    %v19, %v14
-+0xe7 0x3e 0x00 0x00 0x08 0xdf
-+
-+#CHECK: vlpb    %v31, %v31
-+0xe7 0xff 0x00 0x00 0x0c 0xdf
-+
-+#CHECK: vlpf    %v0, %v0
-+0xe7 0x00 0x00 0x00 0x20 0xdf
-+
-+#CHECK: vlpf    %v19, %v14
-+0xe7 0x3e 0x00 0x00 0x28 0xdf
-+
-+#CHECK: vlpf    %v31, %v31
-+0xe7 0xff 0x00 0x00 0x2c 0xdf
-+
-+#CHECK: vlpg    %v0, %v0
-+0xe7 0x00 0x00 0x00 0x30 0xdf
-+
-+#CHECK: vlpg    %v19, %v14
-+0xe7 0x3e 0x00 0x00 0x38 0xdf
-+
-+#CHECK: vlpg    %v31, %v31
-+0xe7 0xff 0x00 0x00 0x3c 0xdf
-+
-+#CHECK: vlph    %v0, %v0
-+0xe7 0x00 0x00 0x00 0x10 0xdf
-+
-+#CHECK: vlph    %v19, %v14
-+0xe7 0x3e 0x00 0x00 0x18 0xdf
-+
-+#CHECK: vlph    %v31, %v31
-+0xe7 0xff 0x00 0x00 0x1c 0xdf
-+
-+#CHECK: vlr     %v0, %v0
-+0xe7 0x00 0x00 0x00 0x00 0x56
-+
-+#CHECK: vlr     %v19, %v14
-+0xe7 0x3e 0x00 0x00 0x08 0x56
-+
-+#CHECK: vlr     %v31, %v31
-+0xe7 0xff 0x00 0x00 0x0c 0x56
-+
-+#CHECK: vlrepb   %v0, 0
-+0xe7 0x00 0x00 0x00 0x00 0x05
-+
-+#CHECK: vlrepb   %v17, 2475(%r7,%r8)
-+0xe7 0x17 0x89 0xab 0x08 0x05
-+
-+#CHECK: vlrepb   %v31, 4095(%r15,%r15)
-+0xe7 0xff 0xff 0xff 0x08 0x05
-+
-+#CHECK: vlrepf   %v0, 0
-+0xe7 0x00 0x00 0x00 0x20 0x05
-+
-+#CHECK: vlrepf   %v17, 2475(%r7,%r8)
-+0xe7 0x17 0x89 0xab 0x28 0x05
-+
-+#CHECK: vlrepf   %v31, 4095(%r15,%r15)
-+0xe7 0xff 0xff 0xff 0x28 0x05
-+
-+#CHECK: vlrepg   %v0, 0
-+0xe7 0x00 0x00 0x00 0x30 0x05
-+
-+#CHECK: vlrepg   %v17, 2475(%r7,%r8)
-+0xe7 0x17 0x89 0xab 0x38 0x05
-+
-+#CHECK: vlrepg   %v31, 4095(%r15,%r15)
-+0xe7 0xff 0xff 0xff 0x38 0x05
-+
-+#CHECK: vlreph   %v0, 0
-+0xe7 0x00 0x00 0x00 0x10 0x05
-+
-+#CHECK: vlreph   %v17, 2475(%r7,%r8)
-+0xe7 0x17 0x89 0xab 0x18 0x05
-+
-+#CHECK: vlreph   %v31, 4095(%r15,%r15)
-+0xe7 0xff 0xff 0xff 0x18 0x05
-+
-+#CHECK: vlvgb   %v0, %r0, 0
-+0xe7 0x00 0x00 0x00 0x00 0x22
-+
-+#CHECK: vlvgb   %v18, %r3, 1383(%r4)
-+0xe7 0x23 0x45 0x67 0x08 0x22
-+
-+#CHECK: vlvgb   %v31, %r15, 4095(%r15)
-+0xe7 0xff 0xff 0xff 0x08 0x22
-+
-+#CHECK: vlvgf   %v0, %r0, 0
-+0xe7 0x00 0x00 0x00 0x20 0x22
-+
-+#CHECK: vlvgf   %v18, %r3, 1383(%r4)
-+0xe7 0x23 0x45 0x67 0x28 0x22
-+
-+#CHECK: vlvgf   %v31, %r15, 4095(%r15)
-+0xe7 0xff 0xff 0xff 0x28 0x22
-+
-+#CHECK: vlvgg   %v0, %r0, 0
-+0xe7 0x00 0x00 0x00 0x30 0x22
-+
-+#CHECK: vlvgg   %v18, %r3, 1383(%r4)
-+0xe7 0x23 0x45 0x67 0x38 0x22
-+
-+#CHECK: vlvgg   %v31, %r15, 4095(%r15)
-+0xe7 0xff 0xff 0xff 0x38 0x22
-+
-+#CHECK: vlvgh   %v0, %r0, 0
-+0xe7 0x00 0x00 0x00 0x10 0x22
-+
-+#CHECK: vlvgh   %v18, %r3, 1383(%r4)
-+0xe7 0x23 0x45 0x67 0x18 0x22
-+
-+#CHECK: vlvgh   %v31, %r15, 4095(%r15)
-+0xe7 0xff 0xff 0xff 0x18 0x22
-+
-+#CHECK: vlvgp   %v0, %r0, %r0
-+0xe7 0x00 0x00 0x00 0x00 0x62
-+
-+#CHECK: vlvgp   %v18, %r3, %r4
-+0xe7 0x23 0x40 0x00 0x08 0x62
-+
-+#CHECK: vlvgp   %v31, %r15, %r15
-+0xe7 0xff 0xf0 0x00 0x08 0x62
-+
-+#CHECK: vmaeb   %v0, %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x00 0xae
-+
-+#CHECK: vmaeb   %v3, %v20, %v5, %v22
-+0xe7 0x34 0x50 0x00 0x65 0xae
-+
-+#CHECK: vmaeb   %v31, %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0xff 0xae
-+
-+#CHECK: vmaef   %v0, %v0, %v0, %v0
-+0xe7 0x00 0x02 0x00 0x00 0xae
-+
-+#CHECK: vmaef   %v3, %v20, %v5, %v22
-+0xe7 0x34 0x52 0x00 0x65 0xae
-+
-+#CHECK: vmaef   %v31, %v31, %v31, %v31
-+0xe7 0xff 0xf2 0x00 0xff 0xae
-+
-+#CHECK: vmaeh   %v0, %v0, %v0, %v0
-+0xe7 0x00 0x01 0x00 0x00 0xae
-+
-+#CHECK: vmaeh   %v3, %v20, %v5, %v22
-+0xe7 0x34 0x51 0x00 0x65 0xae
-+
-+#CHECK: vmaeh   %v31, %v31, %v31, %v31
-+0xe7 0xff 0xf1 0x00 0xff 0xae
-+
-+#CHECK: vmahb   %v0, %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x00 0xab
-+
-+#CHECK: vmahb   %v3, %v20, %v5, %v22
-+0xe7 0x34 0x50 0x00 0x65 0xab
-+
-+#CHECK: vmahb   %v31, %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0xff 0xab
-+
-+#CHECK: vmahf   %v0, %v0, %v0, %v0
-+0xe7 0x00 0x02 0x00 0x00 0xab
-+
-+#CHECK: vmahf   %v3, %v20, %v5, %v22
-+0xe7 0x34 0x52 0x00 0x65 0xab
-+
-+#CHECK: vmahf   %v31, %v31, %v31, %v31
-+0xe7 0xff 0xf2 0x00 0xff 0xab
-+
-+#CHECK: vmahh   %v0, %v0, %v0, %v0
-+0xe7 0x00 0x01 0x00 0x00 0xab
-+
-+#CHECK: vmahh   %v3, %v20, %v5, %v22
-+0xe7 0x34 0x51 0x00 0x65 0xab
-+
-+#CHECK: vmahh   %v31, %v31, %v31, %v31
-+0xe7 0xff 0xf1 0x00 0xff 0xab
-+
-+#CHECK: vmalb   %v0, %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x00 0xaa
-+
-+#CHECK: vmalb   %v3, %v20, %v5, %v22
-+0xe7 0x34 0x50 0x00 0x65 0xaa
-+
-+#CHECK: vmalb   %v31, %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0xff 0xaa
-+
-+#CHECK: vmaleb  %v0, %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x00 0xac
-+
-+#CHECK: vmaleb  %v3, %v20, %v5, %v22
-+0xe7 0x34 0x50 0x00 0x65 0xac
-+
-+#CHECK: vmaleb  %v31, %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0xff 0xac
-+
-+#CHECK: vmalef  %v0, %v0, %v0, %v0
-+0xe7 0x00 0x02 0x00 0x00 0xac
-+
-+#CHECK: vmalef  %v3, %v20, %v5, %v22
-+0xe7 0x34 0x52 0x00 0x65 0xac
-+
-+#CHECK: vmalef  %v31, %v31, %v31, %v31
-+0xe7 0xff 0xf2 0x00 0xff 0xac
-+
-+#CHECK: vmaleh  %v0, %v0, %v0, %v0
-+0xe7 0x00 0x01 0x00 0x00 0xac
-+
-+#CHECK: vmaleh  %v3, %v20, %v5, %v22
-+0xe7 0x34 0x51 0x00 0x65 0xac
-+
-+#CHECK: vmaleh  %v31, %v31, %v31, %v31
-+0xe7 0xff 0xf1 0x00 0xff 0xac
-+
-+#CHECK: vmalf   %v0, %v0, %v0, %v0
-+0xe7 0x00 0x02 0x00 0x00 0xaa
-+
-+#CHECK: vmalf   %v3, %v20, %v5, %v22
-+0xe7 0x34 0x52 0x00 0x65 0xaa
-+
-+#CHECK: vmalf   %v31, %v31, %v31, %v31
-+0xe7 0xff 0xf2 0x00 0xff 0xaa
-+
-+#CHECK: vmalhb  %v0, %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x00 0xa9
-+
-+#CHECK: vmalhb  %v3, %v20, %v5, %v22
-+0xe7 0x34 0x50 0x00 0x65 0xa9
-+
-+#CHECK: vmalhb  %v31, %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0xff 0xa9
-+
-+#CHECK: vmalhf  %v0, %v0, %v0, %v0
-+0xe7 0x00 0x02 0x00 0x00 0xa9
-+
-+#CHECK: vmalhf  %v3, %v20, %v5, %v22
-+0xe7 0x34 0x52 0x00 0x65 0xa9
-+
-+#CHECK: vmalhf  %v31, %v31, %v31, %v31
-+0xe7 0xff 0xf2 0x00 0xff 0xa9
-+
-+#CHECK: vmalhh  %v0, %v0, %v0, %v0
-+0xe7 0x00 0x01 0x00 0x00 0xa9
-+
-+#CHECK: vmalhh  %v3, %v20, %v5, %v22
-+0xe7 0x34 0x51 0x00 0x65 0xa9
-+
-+#CHECK: vmalhh  %v31, %v31, %v31, %v31
-+0xe7 0xff 0xf1 0x00 0xff 0xa9
-+
-+#CHECK: vmalhw  %v0, %v0, %v0, %v0
-+0xe7 0x00 0x01 0x00 0x00 0xaa
-+
-+#CHECK: vmalhw  %v3, %v20, %v5, %v22
-+0xe7 0x34 0x51 0x00 0x65 0xaa
-+
-+#CHECK: vmalhw  %v31, %v31, %v31, %v31
-+0xe7 0xff 0xf1 0x00 0xff 0xaa
-+
-+#CHECK: vmalob  %v0, %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x00 0xad
-+
-+#CHECK: vmalob  %v3, %v20, %v5, %v22
-+0xe7 0x34 0x50 0x00 0x65 0xad
-+
-+#CHECK: vmalob  %v31, %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0xff 0xad
-+
-+#CHECK: vmalof  %v0, %v0, %v0, %v0
-+0xe7 0x00 0x02 0x00 0x00 0xad
-+
-+#CHECK: vmalof  %v3, %v20, %v5, %v22
-+0xe7 0x34 0x52 0x00 0x65 0xad
-+
-+#CHECK: vmalof  %v31, %v31, %v31, %v31
-+0xe7 0xff 0xf2 0x00 0xff 0xad
-+
-+#CHECK: vmaloh  %v0, %v0, %v0, %v0
-+0xe7 0x00 0x01 0x00 0x00 0xad
-+
-+#CHECK: vmaloh  %v3, %v20, %v5, %v22
-+0xe7 0x34 0x51 0x00 0x65 0xad
-+
-+#CHECK: vmaloh  %v31, %v31, %v31, %v31
-+0xe7 0xff 0xf1 0x00 0xff 0xad
-+
-+#CHECK: vmaob   %v0, %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x00 0xaf
-+
-+#CHECK: vmaob   %v3, %v20, %v5, %v22
-+0xe7 0x34 0x50 0x00 0x65 0xaf
-+
-+#CHECK: vmaob   %v31, %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0xff 0xaf
-+
-+#CHECK: vmaof   %v0, %v0, %v0, %v0
-+0xe7 0x00 0x02 0x00 0x00 0xaf
-+
-+#CHECK: vmaof   %v3, %v20, %v5, %v22
-+0xe7 0x34 0x52 0x00 0x65 0xaf
-+
-+#CHECK: vmaof   %v31, %v31, %v31, %v31
-+0xe7 0xff 0xf2 0x00 0xff 0xaf
-+
-+#CHECK: vmaoh   %v0, %v0, %v0, %v0
-+0xe7 0x00 0x01 0x00 0x00 0xaf
-+
-+#CHECK: vmaoh   %v3, %v20, %v5, %v22
-+0xe7 0x34 0x51 0x00 0x65 0xaf
-+
-+#CHECK: vmaoh   %v31, %v31, %v31, %v31
-+0xe7 0xff 0xf1 0x00 0xff 0xaf
-+
-+#CHECK: vmeb    %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x00 0xa6
-+
-+#CHECK: vmeb    %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x0a 0xa6
-+
-+#CHECK: vmeb    %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x0e 0xa6
-+
-+#CHECK: vmef    %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x20 0xa6
-+
-+#CHECK: vmef    %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x2a 0xa6
-+
-+#CHECK: vmef    %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x2e 0xa6
-+
-+#CHECK: vmeh    %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x10 0xa6
-+
-+#CHECK: vmeh    %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x1a 0xa6
-+
-+#CHECK: vmeh    %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x1e 0xa6
-+
-+#CHECK: vmhb    %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x00 0xa3
-+
-+#CHECK: vmhb    %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x0a 0xa3
-+
-+#CHECK: vmhb    %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x0e 0xa3
-+
-+#CHECK: vmhf    %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x20 0xa3
-+
-+#CHECK: vmhf    %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x2a 0xa3
-+
-+#CHECK: vmhf    %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x2e 0xa3
-+
-+#CHECK: vmhh    %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x10 0xa3
-+
-+#CHECK: vmhh    %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x1a 0xa3
-+
-+#CHECK: vmhh    %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x1e 0xa3
-+
-+#CHECK: vmlb    %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x00 0xa2
-+
-+#CHECK: vmlb    %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x0a 0xa2
-+
-+#CHECK: vmlb    %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x0e 0xa2
-+
-+#CHECK: vmlf    %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x20 0xa2
-+
-+#CHECK: vmlf    %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x2a 0xa2
-+
-+#CHECK: vmlf    %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x2e 0xa2
-+
-+#CHECK: vmleb   %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x00 0xa4
-+
-+#CHECK: vmleb   %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x0a 0xa4
-+
-+#CHECK: vmleb   %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x0e 0xa4
-+
-+#CHECK: vmlef   %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x20 0xa4
-+
-+#CHECK: vmlef   %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x2a 0xa4
-+
-+#CHECK: vmlef   %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x2e 0xa4
-+
-+#CHECK: vmleh   %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x10 0xa4
-+
-+#CHECK: vmleh   %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x1a 0xa4
-+
-+#CHECK: vmleh   %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x1e 0xa4
-+
-+#CHECK: vmlhb   %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x00 0xa1
-+
-+#CHECK: vmlhb   %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x0a 0xa1
-+
-+#CHECK: vmlhb   %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x0e 0xa1
-+
-+#CHECK: vmlhf   %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x20 0xa1
-+
-+#CHECK: vmlhf   %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x2a 0xa1
-+
-+#CHECK: vmlhf   %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x2e 0xa1
-+
-+#CHECK: vmlhh   %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x10 0xa1
-+
-+#CHECK: vmlhh   %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x1a 0xa1
-+
-+#CHECK: vmlhh   %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x1e 0xa1
-+
-+#CHECK: vmlhw   %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x10 0xa2
-+
-+#CHECK: vmlhw   %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x1a 0xa2
-+
-+#CHECK: vmlhw   %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x1e 0xa2
-+
-+#CHECK: vmlob   %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x00 0xa5
-+
-+#CHECK: vmlob   %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x0a 0xa5
-+
-+#CHECK: vmlob   %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x0e 0xa5
-+
-+#CHECK: vmlof   %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x20 0xa5
-+
-+#CHECK: vmlof   %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x2a 0xa5
-+
-+#CHECK: vmlof   %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x2e 0xa5
-+
-+#CHECK: vmloh   %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x10 0xa5
-+
-+#CHECK: vmloh   %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x1a 0xa5
-+
-+#CHECK: vmloh   %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x1e 0xa5
-+
-+#CHECK: vmnb    %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x00 0xfe
-+
-+#CHECK: vmnb    %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x0a 0xfe
-+
-+#CHECK: vmnb    %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x0e 0xfe
-+
-+#CHECK: vmnf    %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x20 0xfe
-+
-+#CHECK: vmnf    %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x2a 0xfe
-+
-+#CHECK: vmnf    %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x2e 0xfe
-+
-+#CHECK: vmng    %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x30 0xfe
-+
-+#CHECK: vmng    %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x3a 0xfe
-+
-+#CHECK: vmng    %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x3e 0xfe
-+
-+#CHECK: vmnh    %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x10 0xfe
-+
-+#CHECK: vmnh    %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x1a 0xfe
-+
-+#CHECK: vmnh    %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x1e 0xfe
-+
-+#CHECK: vmnlb   %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x00 0xfc
-+
-+#CHECK: vmnlb   %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x0a 0xfc
-+
-+#CHECK: vmnlb   %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x0e 0xfc
-+
-+#CHECK: vmnlf   %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x20 0xfc
-+
-+#CHECK: vmnlf   %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x2a 0xfc
-+
-+#CHECK: vmnlf   %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x2e 0xfc
-+
-+#CHECK: vmnlg   %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x30 0xfc
-+
-+#CHECK: vmnlg   %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x3a 0xfc
-+
-+#CHECK: vmnlg   %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x3e 0xfc
-+
-+#CHECK: vmnlh   %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x10 0xfc
-+
-+#CHECK: vmnlh   %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x1a 0xfc
-+
-+#CHECK: vmnlh   %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x1e 0xfc
-+
-+#CHECK: vmob    %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x00 0xa7
-+
-+#CHECK: vmob    %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x0a 0xa7
-+
-+#CHECK: vmob    %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x0e 0xa7
-+
-+#CHECK: vmof    %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x20 0xa7
-+
-+#CHECK: vmof    %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x2a 0xa7
-+
-+#CHECK: vmof    %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x2e 0xa7
-+
-+#CHECK: vmoh    %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x10 0xa7
-+
-+#CHECK: vmoh    %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x1a 0xa7
-+
-+#CHECK: vmoh    %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x1e 0xa7
-+
-+#CHECK: vmrhb   %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x00 0x61
-+
-+#CHECK: vmrhb   %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x0a 0x61
-+
-+#CHECK: vmrhb   %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x0e 0x61
-+
-+#CHECK: vmrhf   %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x20 0x61
-+
-+#CHECK: vmrhf   %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x2a 0x61
-+
-+#CHECK: vmrhf   %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x2e 0x61
-+
-+#CHECK: vmrhg   %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x30 0x61
-+
-+#CHECK: vmrhg   %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x3a 0x61
-+
-+#CHECK: vmrhg   %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x3e 0x61
-+
-+#CHECK: vmrhh   %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x10 0x61
-+
-+#CHECK: vmrhh   %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x1a 0x61
-+
-+#CHECK: vmrhh   %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x1e 0x61
-+
-+#CHECK: vmrlb   %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x00 0x60
-+
-+#CHECK: vmrlb   %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x0a 0x60
-+
-+#CHECK: vmrlb   %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x0e 0x60
-+
-+#CHECK: vmrlf   %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x20 0x60
-+
-+#CHECK: vmrlf   %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x2a 0x60
-+
-+#CHECK: vmrlf   %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x2e 0x60
-+
-+#CHECK: vmrlg   %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x30 0x60
-+
-+#CHECK: vmrlg   %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x3a 0x60
-+
-+#CHECK: vmrlg   %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x3e 0x60
-+
-+#CHECK: vmrlh   %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x10 0x60
-+
-+#CHECK: vmrlh   %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x1a 0x60
-+
-+#CHECK: vmrlh   %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x1e 0x60
-+
-+#CHECK: vmxb    %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x00 0xff
-+
-+#CHECK: vmxb    %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x0a 0xff
-+
-+#CHECK: vmxb    %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x0e 0xff
-+
-+#CHECK: vmxf    %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x20 0xff
-+
-+#CHECK: vmxf    %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x2a 0xff
-+
-+#CHECK: vmxf    %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x2e 0xff
-+
-+#CHECK: vmxg    %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x30 0xff
-+
-+#CHECK: vmxg    %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x3a 0xff
-+
-+#CHECK: vmxg    %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x3e 0xff
-+
-+#CHECK: vmxh    %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x10 0xff
-+
-+#CHECK: vmxh    %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x1a 0xff
-+
-+#CHECK: vmxh    %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x1e 0xff
-+
-+#CHECK: vmxlb   %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x00 0xfd
-+
-+#CHECK: vmxlb   %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x0a 0xfd
-+
-+#CHECK: vmxlb   %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x0e 0xfd
-+
-+#CHECK: vmxlf   %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x20 0xfd
-+
-+#CHECK: vmxlf   %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x2a 0xfd
-+
-+#CHECK: vmxlf   %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x2e 0xfd
-+
-+#CHECK: vmxlg   %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x30 0xfd
-+
-+#CHECK: vmxlg   %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x3a 0xfd
-+
-+#CHECK: vmxlg   %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x3e 0xfd
-+
-+#CHECK: vmxlh   %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x10 0xfd
-+
-+#CHECK: vmxlh   %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x1a 0xfd
-+
-+#CHECK: vmxlh   %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x1e 0xfd
-+
-+#CHECK: vn      %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x00 0x68
-+
-+#CHECK: vn      %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x0a 0x68
-+
-+#CHECK: vn      %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x0e 0x68
-+
-+#CHECK: vnc     %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x00 0x69
-+
-+#CHECK: vnc     %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x0a 0x69
-+
-+#CHECK: vnc     %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x0e 0x69
-+
-+#CHECK: vno     %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x00 0x6b
-+
-+#CHECK: vno     %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x0a 0x6b
-+
-+#CHECK: vno     %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x0e 0x6b
-+
-+#CHECK: vo      %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x00 0x6a
-+
-+#CHECK: vo      %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x0a 0x6a
-+
-+#CHECK: vo      %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x0e 0x6a
-+
-+#CHECK: vpdi    %v0, %v0, %v0, 0
-+0xe7 0x00 0x00 0x00 0x00 0x84
-+
-+#CHECK: vpdi    %v3, %v20, %v5, 4
-+0xe7 0x34 0x50 0x00 0x44 0x84
-+
-+#CHECK: vpdi    %v31, %v31, %v31, 15
-+0xe7 0xff 0xf0 0x00 0xfe 0x84
-+
-+#CHECK: vperm   %v0, %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x00 0x8c
-+
-+#CHECK: vperm   %v3, %v20, %v5, %v22
-+0xe7 0x34 0x50 0x00 0x65 0x8c
-+
-+#CHECK: vperm   %v31, %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0xff 0x8c
-+
-+#CHECK: vpkf    %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x20 0x94
-+
-+#CHECK: vpkf    %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x2a 0x94
-+
-+#CHECK: vpkf    %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x2e 0x94
-+
-+#CHECK: vpkg    %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x30 0x94
-+
-+#CHECK: vpkg    %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x3a 0x94
-+
-+#CHECK: vpkg    %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x3e 0x94
-+
-+#CHECK: vpkh    %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x10 0x94
-+
-+#CHECK: vpkh    %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x1a 0x94
-+
-+#CHECK: vpkh    %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x1e 0x94
-+
-+#CHECK: vpklsf  %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x20 0x95
-+
-+#CHECK: vpklsf  %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x2a 0x95
-+
-+#CHECK: vpklsfs %v7, %v24, %v9
-+0xe7 0x78 0x90 0x10 0x24 0x95
-+
-+#CHECK: vpklsf  %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x2e 0x95
-+
-+#CHECK: vpklsg  %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x30 0x95
-+
-+#CHECK: vpklsg  %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x3a 0x95
-+
-+#CHECK: vpklsgs %v7, %v24, %v9
-+0xe7 0x78 0x90 0x10 0x34 0x95
-+
-+#CHECK: vpklsg  %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x3e 0x95
-+
-+#CHECK: vpklsh  %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x10 0x95
-+
-+#CHECK: vpklsh  %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x1a 0x95
-+
-+#CHECK: vpklshs %v7, %v24, %v9
-+0xe7 0x78 0x90 0x10 0x14 0x95
-+
-+#CHECK: vpklsh  %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x1e 0x95
-+
-+#CHECK: vpksf   %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x20 0x97
-+
-+#CHECK: vpksf   %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x2a 0x97
-+
-+#CHECK: vpksfs  %v7, %v24, %v9
-+0xe7 0x78 0x90 0x10 0x24 0x97
-+
-+#CHECK: vpksf   %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x2e 0x97
-+
-+#CHECK: vpksg   %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x30 0x97
-+
-+#CHECK: vpksg   %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x3a 0x97
-+
-+#CHECK: vpksgs  %v7, %v24, %v9
-+0xe7 0x78 0x90 0x10 0x34 0x97
-+
-+#CHECK: vpksg   %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x3e 0x97
-+
-+#CHECK: vpksh   %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x10 0x97
-+
-+#CHECK: vpksh   %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x1a 0x97
-+
-+#CHECK: vpkshs  %v7, %v24, %v9
-+0xe7 0x78 0x90 0x10 0x14 0x97
-+
-+#CHECK: vpksh   %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x1e 0x97
-+
-+#CHECK: vpopct  %v0, %v0, 0
-+0xe7 0x00 0x00 0x00 0x00 0x50
-+
-+#CHECK: vpopct  %v19, %v14, 0
-+0xe7 0x3e 0x00 0x00 0x08 0x50
-+
-+#CHECK: vpopct  %v31, %v31
-+0xe7 0xff 0x00 0x00 0x0c 0x50
-+
-+#CHECK: vrepb   %v0, %v0, 0
-+0xe7 0x00 0x00 0x00 0x00 0x4d
-+
-+#CHECK: vrepb   %v19, %v4, 22136
-+0xe7 0x34 0x56 0x78 0x08 0x4d
-+
-+#CHECK: vrepb   %v31, %v31, 65535
-+0xe7 0xff 0xff 0xff 0x0c 0x4d
-+
-+#CHECK: vrepf   %v0, %v0, 0
-+0xe7 0x00 0x00 0x00 0x20 0x4d
-+
-+#CHECK: vrepf   %v19, %v4, 22136
-+0xe7 0x34 0x56 0x78 0x28 0x4d
-+
-+#CHECK: vrepf   %v31, %v31, 65535
-+0xe7 0xff 0xff 0xff 0x2c 0x4d
-+
-+#CHECK: vrepg   %v0, %v0, 0
-+0xe7 0x00 0x00 0x00 0x30 0x4d
-+
-+#CHECK: vrepg   %v19, %v4, 22136
-+0xe7 0x34 0x56 0x78 0x38 0x4d
-+
-+#CHECK: vrepg   %v31, %v31, 65535
-+0xe7 0xff 0xff 0xff 0x3c 0x4d
-+
-+#CHECK: vreph   %v0, %v0, 0
-+0xe7 0x00 0x00 0x00 0x10 0x4d
-+
-+#CHECK: vreph   %v19, %v4, 22136
-+0xe7 0x34 0x56 0x78 0x18 0x4d
-+
-+#CHECK: vreph   %v31, %v31, 65535
-+0xe7 0xff 0xff 0xff 0x1c 0x4d
-+
-+#CHECK: vrepib  %v0, 0
-+0xe7 0x00 0x00 0x00 0x00 0x45
-+
-+#CHECK: vrepib  %v23, -30293
-+0xe7 0x70 0x89 0xab 0x08 0x45
-+
-+#CHECK: vrepib  %v31, -1
-+0xe7 0xf0 0xff 0xff 0x08 0x45
-+
-+#CHECK: vrepif  %v0, 0
-+0xe7 0x00 0x00 0x00 0x20 0x45
-+
-+#CHECK: vrepif  %v23, -30293
-+0xe7 0x70 0x89 0xab 0x28 0x45
-+
-+#CHECK: vrepif  %v31, -1
-+0xe7 0xf0 0xff 0xff 0x28 0x45
-+
-+#CHECK: vrepig  %v0, 0
-+0xe7 0x00 0x00 0x00 0x30 0x45
-+
-+#CHECK: vrepig  %v23, -30293
-+0xe7 0x70 0x89 0xab 0x38 0x45
-+
-+#CHECK: vrepig  %v31, -1
-+0xe7 0xf0 0xff 0xff 0x38 0x45
-+
-+#CHECK: vrepih  %v0, 0
-+0xe7 0x00 0x00 0x00 0x10 0x45
-+
-+#CHECK: vrepih  %v23, -30293
-+0xe7 0x70 0x89 0xab 0x18 0x45
-+
-+#CHECK: vrepih  %v31, -1
-+0xe7 0xf0 0xff 0xff 0x18 0x45
-+
-+#CHECK: vsb     %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x00 0xf7
-+
-+#CHECK: vsb     %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x0a 0xf7
-+
-+#CHECK: vsb     %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x0e 0xf7
-+
-+#CHECK: vsbiq   %v0, %v0, %v0, %v0
-+0xe7 0x00 0x04 0x00 0x00 0xbf
-+
-+#CHECK: vsbiq   %v3, %v20, %v5, %v22
-+0xe7 0x34 0x54 0x00 0x65 0xbf
-+
-+#CHECK: vsbiq   %v31, %v31, %v31, %v31
-+0xe7 0xff 0xf4 0x00 0xff 0xbf
-+
-+#CHECK: vsbcbiq %v0, %v0, %v0, %v0
-+0xe7 0x00 0x04 0x00 0x00 0xbd
-+
-+#CHECK: vsbcbiq %v3, %v20, %v5, %v22
-+0xe7 0x34 0x54 0x00 0x65 0xbd
-+
-+#CHECK: vsbcbiq %v31, %v31, %v31, %v31
-+0xe7 0xff 0xf4 0x00 0xff 0xbd
-+
-+#CHECK: vscbib  %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x00 0xf5
-+
-+#CHECK: vscbib  %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x0a 0xf5
-+
-+#CHECK: vscbib  %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x0e 0xf5
-+
-+#CHECK: vscbif  %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x20 0xf5
-+
-+#CHECK: vscbif  %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x2a 0xf5
-+
-+#CHECK: vscbif  %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x2e 0xf5
-+
-+#CHECK: vscbig  %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x30 0xf5
-+
-+#CHECK: vscbig  %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x3a 0xf5
-+
-+#CHECK: vscbig  %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x3e 0xf5
-+
-+#CHECK: vscbih  %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x10 0xf5
-+
-+#CHECK: vscbih  %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x1a 0xf5
-+
-+#CHECK: vscbih  %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x1e 0xf5
-+
-+#CHECK: vscbiq  %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x40 0xf5
-+
-+#CHECK: vscbiq  %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x4a 0xf5
-+
-+#CHECK: vscbiq  %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x4e 0xf5
-+
-+#CHECK: vscef   %v0, 0(%v0), 0
-+0xe7 0x00 0x00 0x00 0x00 0x1b
-+
-+#CHECK: vscef   %v10, 1000(%v19,%r7), 2
-+0xe7 0xa3 0x73 0xe8 0x24 0x1b
-+
-+#CHECK: vscef   %v31, 4095(%v31,%r15), 3
-+0xe7 0xff 0xff 0xff 0x3c 0x1b
-+
-+#CHECK: vsceg   %v0, 0(%v0), 0
-+0xe7 0x00 0x00 0x00 0x00 0x1a
-+
-+#CHECK: vsceg   %v10, 1000(%v19,%r7), 1
-+0xe7 0xa3 0x73 0xe8 0x14 0x1a
-+
-+#CHECK: vsceg   %v31, 4095(%v31,%r15), 1
-+0xe7 0xff 0xff 0xff 0x1c 0x1a
-+
-+#CHECK: vsegb   %v0, %v0
-+0xe7 0x00 0x00 0x00 0x00 0x5f
-+
-+#CHECK: vsegb   %v19, %v14
-+0xe7 0x3e 0x00 0x00 0x08 0x5f
-+
-+#CHECK: vsegb   %v31, %v31
-+0xe7 0xff 0x00 0x00 0x0c 0x5f
-+
-+#CHECK: vsegf   %v0, %v0
-+0xe7 0x00 0x00 0x00 0x20 0x5f
-+
-+#CHECK: vsegf   %v19, %v14
-+0xe7 0x3e 0x00 0x00 0x28 0x5f
-+
-+#CHECK: vsegf   %v31, %v31
-+0xe7 0xff 0x00 0x00 0x2c 0x5f
-+
-+#CHECK: vsegh   %v0, %v0
-+0xe7 0x00 0x00 0x00 0x10 0x5f
-+
-+#CHECK: vsegh   %v19, %v14
-+0xe7 0x3e 0x00 0x00 0x18 0x5f
-+
-+#CHECK: vsegh   %v31, %v31
-+0xe7 0xff 0x00 0x00 0x1c 0x5f
-+
-+#CHECK: vsel    %v0, %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x00 0x8d
-+
-+#CHECK: vsel    %v3, %v20, %v5, %v22
-+0xe7 0x34 0x50 0x00 0x65 0x8d
-+
-+#CHECK: vsel    %v31, %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0xff 0x8d
-+
-+#CHECK: vsf     %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x20 0xf7
-+
-+#CHECK: vsf     %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x2a 0xf7
-+
-+#CHECK: vsf     %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x2e 0xf7
-+
-+#CHECK: vsg     %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x30 0xf7
-+
-+#CHECK: vsg     %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x3a 0xf7
-+
-+#CHECK: vsg     %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x3e 0xf7
-+
-+#CHECK: vsh     %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x10 0xf7
-+
-+#CHECK: vsh     %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x1a 0xf7
-+
-+#CHECK: vsh     %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x1e 0xf7
-+
-+#CHECK: vsl     %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x00 0x74
-+
-+#CHECK: vsl     %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x0a 0x74
-+
-+#CHECK: vsl     %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x0e 0x74
-+
-+#CHECK: vslb    %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x00 0x75
-+
-+#CHECK: vslb    %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x0a 0x75
-+
-+#CHECK: vslb    %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x0e 0x75
-+
-+#CHECK: vsldb   %v0, %v0, %v0, 0
-+0xe7 0x00 0x00 0x00 0x00 0x77
-+
-+#CHECK: vsldb   %v3, %v20, %v5, 103
-+0xe7 0x34 0x50 0x67 0x04 0x77
-+
-+#CHECK: vsldb   %v31, %v31, %v31, 255
-+0xe7 0xff 0xf0 0xff 0x0e 0x77
-+
-+#CHECK: vsq     %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x40 0xf7
-+
-+#CHECK: vsq     %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x4a 0xf7
-+
-+#CHECK: vsq     %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x4e 0xf7
-+
-+#CHECK: vsra    %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x00 0x7e
-+
-+#CHECK: vsra    %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x0a 0x7e
-+
-+#CHECK: vsra    %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x0e 0x7e
-+
-+#CHECK: vsrab   %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x00 0x7f
-+
-+#CHECK: vsrab   %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x0a 0x7f
-+
-+#CHECK: vsrab   %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x0e 0x7f
-+
-+#CHECK: vsrl    %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x00 0x7c
-+
-+#CHECK: vsrl    %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x0a 0x7c
-+
-+#CHECK: vsrl    %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x0e 0x7c
-+
-+#CHECK: vsrlb   %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x00 0x7d
-+
-+#CHECK: vsrlb   %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x0a 0x7d
-+
-+#CHECK: vsrlb   %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x0e 0x7d
-+
-+#CHECK: vst     %v0, 0
-+0xe7 0x00 0x00 0x00 0x00 0x0E
-+
-+#CHECK: vst     %v17, 2475(%r7,%r8)
-+0xe7 0x17 0x89 0xab 0x08 0x0E
-+
-+#CHECK: vst     %v31, 4095(%r15,%r15)
-+0xe7 0xff 0xff 0xff 0x08 0x0E
-+
-+#CHECK: vsteb   %v0, 0, 0
-+0xe7 0x00 0x00 0x00 0x00 0x08
-+
-+#CHECK: vsteb   %v17, 2475(%r7,%r8), 12
-+0xe7 0x17 0x89 0xab 0xc8 0x08
-+
-+#CHECK: vsteb   %v31, 4095(%r15,%r15), 15
-+0xe7 0xff 0xff 0xff 0xf8 0x08
-+
-+#CHECK: vstef   %v0, 0, 0
-+0xe7 0x00 0x00 0x00 0x00 0x0b
-+
-+#CHECK: vstef   %v17, 2475(%r7,%r8), 2
-+0xe7 0x17 0x89 0xab 0x28 0x0b
-+
-+#CHECK: vstef   %v31, 4095(%r15,%r15), 3
-+0xe7 0xff 0xff 0xff 0x38 0x0b
-+
-+#CHECK: vsteg   %v0, 0, 0
-+0xe7 0x00 0x00 0x00 0x00 0x0a
-+
-+#CHECK: vsteg   %v17, 2475(%r7,%r8), 1
-+0xe7 0x17 0x89 0xab 0x18 0x0a
-+
-+#CHECK: vsteg   %v31, 4095(%r15,%r15), 1
-+0xe7 0xff 0xff 0xff 0x18 0x0a
-+
-+#CHECK: vsteh   %v0, 0, 0
-+0xe7 0x00 0x00 0x00 0x00 0x09
-+
-+#CHECK: vsteh   %v17, 2475(%r7,%r8), 5
-+0xe7 0x17 0x89 0xab 0x58 0x09
-+
-+#CHECK: vsteh   %v31, 4095(%r15,%r15), 7
-+0xe7 0xff 0xff 0xff 0x78 0x09
-+
-+#CHECK: vstl    %v0, %r0, 0
-+0xe7 0x00 0x00 0x00 0x00 0x3f
-+
-+#CHECK: vstl    %v18, %r3, 1383(%r4)
-+0xe7 0x23 0x45 0x67 0x08 0x3f
-+
-+#CHECK: vstl    %v31, %r15, 4095(%r15)
-+0xe7 0xff 0xff 0xff 0x08 0x3f
-+
-+#CHECK: vstm    %v0, %v0, 0
-+0xe7 0x00 0x00 0x00 0x00 0x3e
-+
-+#CHECK: vstm    %v12, %v18, 1110(%r3)
-+0xe7 0xc2 0x34 0x56 0x04 0x3e
-+
-+#CHECK: vstm    %v31, %v31, 4095(%r15)
-+0xe7 0xff 0xff 0xff 0x0c 0x3e
-+
-+#CHECK: vstrcb   %v0, %v0, %v0, %v0, 0
-+0xe7 0x00 0x00 0x00 0x00 0x8a
-+
-+#CHECK: vstrcb   %v0, %v0, %v0, %v0, 12
-+0xe7 0x00 0x00 0xc0 0x00 0x8a
-+
-+#CHECK: vstrcb   %v18, %v3, %v20, %v5, 0
-+0xe7 0x23 0x40 0x00 0x5a 0x8a
-+
-+#CHECK: vstrcb   %v31, %v31, %v31, %v31, 4
-+0xe7 0xff 0xf0 0x40 0xff 0x8a
-+
-+#CHECK: vstrcbs  %v31, %v31, %v31, %v31, 8
-+0xe7 0xff 0xf0 0x90 0xff 0x8a
-+
-+#CHECK: vstrczb  %v31, %v31, %v31, %v31, 4
-+0xe7 0xff 0xf0 0x60 0xff 0x8a
-+
-+#CHECK: vstrczbs %v31, %v31, %v31, %v31, 8
-+0xe7 0xff 0xf0 0xb0 0xff 0x8a
-+
-+#CHECK: vstrcf   %v0, %v0, %v0, %v0, 0
-+0xe7 0x00 0x02 0x00 0x00 0x8a
-+
-+#CHECK: vstrcf   %v0, %v0, %v0, %v0, 12
-+0xe7 0x00 0x02 0xc0 0x00 0x8a
-+
-+#CHECK: vstrcf   %v18, %v3, %v20, %v5, 0
-+0xe7 0x23 0x42 0x00 0x5a 0x8a
-+
-+#CHECK: vstrcf   %v31, %v31, %v31, %v31, 4
-+0xe7 0xff 0xf2 0x40 0xff 0x8a
-+
-+#CHECK: vstrcfs  %v31, %v31, %v31, %v31, 8
-+0xe7 0xff 0xf2 0x90 0xff 0x8a
-+
-+#CHECK: vstrczf  %v31, %v31, %v31, %v31, 4
-+0xe7 0xff 0xf2 0x60 0xff 0x8a
-+
-+#CHECK: vstrczfs %v31, %v31, %v31, %v31, 8
-+0xe7 0xff 0xf2 0xb0 0xff 0x8a
-+
-+#CHECK: vstrch   %v0, %v0, %v0, %v0, 0
-+0xe7 0x00 0x01 0x00 0x00 0x8a
-+
-+#CHECK: vstrch   %v0, %v0, %v0, %v0, 12
-+0xe7 0x00 0x01 0xc0 0x00 0x8a
-+
-+#CHECK: vstrch   %v18, %v3, %v20, %v5, 0
-+0xe7 0x23 0x41 0x00 0x5a 0x8a
-+
-+#CHECK: vstrch   %v31, %v31, %v31, %v31, 4
-+0xe7 0xff 0xf1 0x40 0xff 0x8a
-+
-+#CHECK: vstrchs  %v31, %v31, %v31, %v31, 8
-+0xe7 0xff 0xf1 0x90 0xff 0x8a
-+
-+#CHECK: vstrczh  %v31, %v31, %v31, %v31, 4
-+0xe7 0xff 0xf1 0x60 0xff 0x8a
-+
-+#CHECK: vstrczhs %v31, %v31, %v31, %v31, 8
-+0xe7 0xff 0xf1 0xb0 0xff 0x8a
-+
-+#CHECK: vsumgh  %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x10 0x65
-+
-+#CHECK: vsumgh  %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x1a 0x65
-+
-+#CHECK: vsumgh  %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x1e 0x65
-+
-+#CHECK: vsumgf  %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x20 0x65
-+
-+#CHECK: vsumgf  %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x2a 0x65
-+
-+#CHECK: vsumgf  %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x2e 0x65
-+
-+#CHECK: vsumqf  %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x20 0x67
-+
-+#CHECK: vsumqf  %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x2a 0x67
-+
-+#CHECK: vsumqf  %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x2e 0x67
-+
-+#CHECK: vsumqg  %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x30 0x67
-+
-+#CHECK: vsumqg  %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x3a 0x67
-+
-+#CHECK: vsumqg  %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x3e 0x67
-+
-+#CHECK: vsumb   %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x00 0x64
-+
-+#CHECK: vsumb   %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x0a 0x64
-+
-+#CHECK: vsumb   %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x0e 0x64
-+
-+#CHECK: vsumh   %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x10 0x64
-+
-+#CHECK: vsumh   %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x1a 0x64
-+
-+#CHECK: vsumh   %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x1e 0x64
-+
-+#CHECK: vtm     %v0, %v0
-+0xe7 0x00 0x00 0x00 0x00 0xd8
-+
-+#CHECK: vtm     %v19, %v14
-+0xe7 0x3e 0x00 0x00 0x08 0xd8
-+
-+#CHECK: vtm     %v31, %v31
-+0xe7 0xff 0x00 0x00 0x0c 0xd8
-+
-+#CHECK: vuphb   %v0, %v0
-+0xe7 0x00 0x00 0x00 0x00 0xd7
-+
-+#CHECK: vuphb   %v19, %v14
-+0xe7 0x3e 0x00 0x00 0x08 0xd7
-+
-+#CHECK: vuphb   %v31, %v31
-+0xe7 0xff 0x00 0x00 0x0c 0xd7
-+
-+#CHECK: vuphf   %v0, %v0
-+0xe7 0x00 0x00 0x00 0x20 0xd7
-+
-+#CHECK: vuphf   %v19, %v14
-+0xe7 0x3e 0x00 0x00 0x28 0xd7
-+
-+#CHECK: vuphf   %v31, %v31
-+0xe7 0xff 0x00 0x00 0x2c 0xd7
-+
-+#CHECK: vuphh   %v0, %v0
-+0xe7 0x00 0x00 0x00 0x10 0xd7
-+
-+#CHECK: vuphh   %v19, %v14
-+0xe7 0x3e 0x00 0x00 0x18 0xd7
-+
-+#CHECK: vuphh   %v31, %v31
-+0xe7 0xff 0x00 0x00 0x1c 0xd7
-+
-+#CHECK: vuplhb  %v0, %v0
-+0xe7 0x00 0x00 0x00 0x00 0xd5
-+
-+#CHECK: vuplhb  %v19, %v14
-+0xe7 0x3e 0x00 0x00 0x08 0xd5
-+
-+#CHECK: vuplhb  %v31, %v31
-+0xe7 0xff 0x00 0x00 0x0c 0xd5
-+
-+#CHECK: vuplhf  %v0, %v0
-+0xe7 0x00 0x00 0x00 0x20 0xd5
-+
-+#CHECK: vuplhf  %v19, %v14
-+0xe7 0x3e 0x00 0x00 0x28 0xd5
-+
-+#CHECK: vuplhf  %v31, %v31
-+0xe7 0xff 0x00 0x00 0x2c 0xd5
-+
-+#CHECK: vuplhh  %v0, %v0
-+0xe7 0x00 0x00 0x00 0x10 0xd5
-+
-+#CHECK: vuplhh  %v19, %v14
-+0xe7 0x3e 0x00 0x00 0x18 0xd5
-+
-+#CHECK: vuplhh  %v31, %v31
-+0xe7 0xff 0x00 0x00 0x1c 0xd5
-+
-+#CHECK: vuplb   %v0, %v0
-+0xe7 0x00 0x00 0x00 0x00 0xd6
-+
-+#CHECK: vuplb   %v19, %v14
-+0xe7 0x3e 0x00 0x00 0x08 0xd6
-+
-+#CHECK: vuplb   %v31, %v31
-+0xe7 0xff 0x00 0x00 0x0c 0xd6
-+
-+#CHECK: vuplf   %v0, %v0
-+0xe7 0x00 0x00 0x00 0x20 0xd6
-+
-+#CHECK: vuplf   %v19, %v14
-+0xe7 0x3e 0x00 0x00 0x28 0xd6
-+
-+#CHECK: vuplf   %v31, %v31
-+0xe7 0xff 0x00 0x00 0x2c 0xd6
-+
-+#CHECK: vuplhw  %v0, %v0
-+0xe7 0x00 0x00 0x00 0x10 0xd6
-+
-+#CHECK: vuplhw  %v19, %v14
-+0xe7 0x3e 0x00 0x00 0x18 0xd6
-+
-+#CHECK: vuplhw  %v31, %v31
-+0xe7 0xff 0x00 0x00 0x1c 0xd6
-+
-+#CHECK: vupllb  %v0, %v0
-+0xe7 0x00 0x00 0x00 0x00 0xd4
-+
-+#CHECK: vupllb  %v19, %v14
-+0xe7 0x3e 0x00 0x00 0x08 0xd4
-+
-+#CHECK: vupllb  %v31, %v31
-+0xe7 0xff 0x00 0x00 0x0c 0xd4
-+
-+#CHECK: vupllf  %v0, %v0
-+0xe7 0x00 0x00 0x00 0x20 0xd4
-+
-+#CHECK: vupllf  %v19, %v14
-+0xe7 0x3e 0x00 0x00 0x28 0xd4
-+
-+#CHECK: vupllf  %v31, %v31
-+0xe7 0xff 0x00 0x00 0x2c 0xd4
-+
-+#CHECK: vupllh  %v0, %v0
-+0xe7 0x00 0x00 0x00 0x10 0xd4
-+
-+#CHECK: vupllh  %v19, %v14
-+0xe7 0x3e 0x00 0x00 0x18 0xd4
-+
-+#CHECK: vupllh  %v31, %v31
-+0xe7 0xff 0x00 0x00 0x1c 0xd4
-+
-+#CHECK: vx      %v0, %v0, %v0
-+0xe7 0x00 0x00 0x00 0x00 0x6d
-+
-+#CHECK: vx      %v18, %v3, %v20
-+0xe7 0x23 0x40 0x00 0x0a 0x6d
-+
-+#CHECK: vx      %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x00 0x0e 0x6d
-+
-+#CHECK: wcdgb   %f0, %f0, 0, 0
-+0xe7 0x00 0x00 0x08 0x30 0xc3
-+
-+#CHECK: wcdgb   %v19, %f14, 4, 10
-+0xe7 0x3e 0x00 0xac 0x38 0xc3
-+
-+#CHECK: wcdgb   %v31, %v31, 7, 15
-+0xe7 0xff 0x00 0xff 0x3c 0xc3
-+
-+#CHECK: wcdlgb  %f0, %f0, 0, 0
-+0xe7 0x00 0x00 0x08 0x30 0xc1
-+
-+#CHECK: wcdlgb  %v19, %f14, 4, 10
-+0xe7 0x3e 0x00 0xac 0x38 0xc1
-+
-+#CHECK: wcdlgb  %v31, %v31, 7, 15
-+0xe7 0xff 0x00 0xff 0x3c 0xc1
-+
-+#CHECK: wcgdb   %f0, %f0, 0, 0
-+0xe7 0x00 0x00 0x08 0x30 0xc2
-+
-+#CHECK: wcgdb   %v19, %f14, 4, 10
-+0xe7 0x3e 0x00 0xac 0x38 0xc2
-+
-+#CHECK: wcgdb   %v31, %v31, 7, 15
-+0xe7 0xff 0x00 0xff 0x3c 0xc2
-+
-+#CHECK: wclgdb  %f0, %f0, 0, 0
-+0xe7 0x00 0x00 0x08 0x30 0xc0
-+
-+#CHECK: wclgdb  %v19, %f14, 4, 10
-+0xe7 0x3e 0x00 0xac 0x38 0xc0
-+
-+#CHECK: wclgdb  %v31, %v31, 7, 15
-+0xe7 0xff 0x00 0xff 0x3c 0xc0
-+
-+#CHECK: wfadb   %f0, %f0, %f0
-+0xe7 0x00 0x00 0x08 0x30 0xe3
-+
-+#CHECK: wfadb   %v18, %f3, %v20
-+0xe7 0x23 0x40 0x08 0x3a 0xe3
-+
-+#CHECK: wfadb   %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x08 0x3e 0xe3
-+
-+#CHECK: wfcdb   %f0, %f0
-+0xe7 0x00 0x00 0x00 0x30 0xcb
-+
-+#CHECK: wfcdb   %v19, %f14
-+0xe7 0x3e 0x00 0x00 0x38 0xcb
-+
-+#CHECK: wfcdb   %v31, %v31
-+0xe7 0xff 0x00 0x00 0x3c 0xcb
-+
-+#CHECK: wfcedb  %f0, %f0, %f0
-+0xe7 0x00 0x00 0x08 0x30 0xe8
-+
-+#CHECK: wfcedb  %v18, %f3, %v20
-+0xe7 0x23 0x40 0x08 0x3a 0xe8
-+
-+#CHECK: wfcedb  %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x08 0x3e 0xe8
-+
-+#CHECK: wfcedbs %f0, %f0, %f0
-+0xe7 0x00 0x00 0x18 0x30 0xe8
-+
-+#CHECK: wfcedbs %v18, %f3, %v20
-+0xe7 0x23 0x40 0x18 0x3a 0xe8
-+
-+#CHECK: wfcedbs %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x18 0x3e 0xe8
-+
-+#CHECK: wfchdb  %f0, %f0, %f0
-+0xe7 0x00 0x00 0x08 0x30 0xeb
-+
-+#CHECK: wfchdb  %v18, %f3, %v20
-+0xe7 0x23 0x40 0x08 0x3a 0xeb
-+
-+#CHECK: wfchdb  %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x08 0x3e 0xeb
-+
-+#CHECK: wfchdbs %f0, %f0, %f0
-+0xe7 0x00 0x00 0x18 0x30 0xeb
-+
-+#CHECK: wfchdbs %v18, %f3, %v20
-+0xe7 0x23 0x40 0x18 0x3a 0xeb
-+
-+#CHECK: wfchdbs %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x18 0x3e 0xeb
-+
-+#CHECK: wfchedb %f0, %f0, %f0
-+0xe7 0x00 0x00 0x08 0x30 0xea
-+
-+#CHECK: wfchedb %v18, %f3, %v20
-+0xe7 0x23 0x40 0x08 0x3a 0xea
-+
-+#CHECK: wfchedb %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x08 0x3e 0xea
-+
-+#CHECK: wfchedbs %f0, %f0, %f0
-+0xe7 0x00 0x00 0x18 0x30 0xea
-+
-+#CHECK: wfchedbs %v18, %f3, %v20
-+0xe7 0x23 0x40 0x18 0x3a 0xea
-+
-+#CHECK: wfchedbs %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x18 0x3e 0xea
-+
-+#CHECK: wfddb   %f0, %f0, %f0
-+0xe7 0x00 0x00 0x08 0x30 0xe5
-+
-+#CHECK: wfddb   %v18, %f3, %v20
-+0xe7 0x23 0x40 0x08 0x3a 0xe5
-+
-+#CHECK: wfddb   %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x08 0x3e 0xe5
-+
-+#CHECK: wfidb   %f0, %f0, 0, 0
-+0xe7 0x00 0x00 0x08 0x30 0xc7
-+
-+#CHECK: wfidb   %v19, %f14, 4, 10
-+0xe7 0x3e 0x00 0xac 0x38 0xc7
-+
-+#CHECK: wfidb   %v31, %v31, 7, 15
-+0xe7 0xff 0x00 0xff 0x3c 0xc7
-+
-+#CHECK: wfkdb   %f0, %f0
-+0xe7 0x00 0x00 0x00 0x30 0xca
-+
-+#CHECK: wfkdb   %v19, %f14
-+0xe7 0x3e 0x00 0x00 0x38 0xca
-+
-+#CHECK: wfkdb   %v31, %v31
-+0xe7 0xff 0x00 0x00 0x3c 0xca
-+
-+#CHECK: wflcdb  %f0, %f0
-+0xe7 0x00 0x00 0x08 0x30 0xcc
-+
-+#CHECK: wflcdb  %v19, %f14
-+0xe7 0x3e 0x00 0x08 0x38 0xcc
-+
-+#CHECK: wflcdb  %v31, %v31
-+0xe7 0xff 0x00 0x08 0x3c 0xcc
-+
-+#CHECK: wflndb  %f0, %f0
-+0xe7 0x00 0x00 0x18 0x30 0xcc
-+
-+#CHECK: wflndb  %v19, %f14
-+0xe7 0x3e 0x00 0x18 0x38 0xcc
-+
-+#CHECK: wflndb  %v31, %v31
-+0xe7 0xff 0x00 0x18 0x3c 0xcc
-+
-+#CHECK: wflpdb  %f0, %f0
-+0xe7 0x00 0x00 0x28 0x30 0xcc
-+
-+#CHECK: wflpdb  %v19, %f14
-+0xe7 0x3e 0x00 0x28 0x38 0xcc
-+
-+#CHECK: wflpdb  %v31, %v31
-+0xe7 0xff 0x00 0x28 0x3c 0xcc
-+
-+#CHECK: wfmadb  %f0, %f0, %f0, %f0
-+0xe7 0x00 0x03 0x08 0x00 0x8f
-+
-+#CHECK: wfmadb  %f3, %v20, %f5, %v22
-+0xe7 0x34 0x53 0x08 0x65 0x8f
-+
-+#CHECK: wfmadb  %v31, %v31, %v31, %v31
-+0xe7 0xff 0xf3 0x08 0xff 0x8f
-+
-+#CHECK: wfmdb   %f0, %f0, %f0
-+0xe7 0x00 0x00 0x08 0x30 0xe7
-+
-+#CHECK: wfmdb   %v18, %f3, %v20
-+0xe7 0x23 0x40 0x08 0x3a 0xe7
-+
-+#CHECK: wfmdb   %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x08 0x3e 0xe7
-+
-+#CHECK: wfmsdb  %f0, %f0, %f0, %f0
-+0xe7 0x00 0x03 0x08 0x00 0x8e
-+
-+#CHECK: wfmsdb  %f3, %v20, %f5, %v22
-+0xe7 0x34 0x53 0x08 0x65 0x8e
-+
-+#CHECK: wfmsdb  %v31, %v31, %v31, %v31
-+0xe7 0xff 0xf3 0x08 0xff 0x8e
-+
-+#CHECK: wfsdb   %f0, %f0, %f0
-+0xe7 0x00 0x00 0x08 0x30 0xe2
-+
-+#CHECK: wfsdb   %v18, %f3, %v20
-+0xe7 0x23 0x40 0x08 0x3a 0xe2
-+
-+#CHECK: wfsdb   %v31, %v31, %v31
-+0xe7 0xff 0xf0 0x08 0x3e 0xe2
-+
-+#CHECK: wfsqdb  %f0, %f0
-+0xe7 0x00 0x00 0x08 0x30 0xce
-+
-+#CHECK: wfsqdb  %v19, %f14
-+0xe7 0x3e 0x00 0x08 0x38 0xce
-+
-+#CHECK: wfsqdb  %v31, %v31
-+0xe7 0xff 0x00 0x08 0x3c 0xce
-+
-+#CHECK: wftcidb %f0, %f0, 0
-+0xe7 0x00 0x00 0x08 0x30 0x4a
-+
-+#CHECK: wftcidb %v19, %f4, 1383
-+0xe7 0x34 0x56 0x78 0x38 0x4a
-+
-+#CHECK: wftcidb %v31, %v31, 4095
-+0xe7 0xff 0xff 0xf8 0x3c 0x4a
-+
-+#CHECK: wldeb   %f0, %f0
-+0xe7 0x00 0x00 0x08 0x20 0xc4
-+
-+#CHECK: wldeb   %v19, %f14
-+0xe7 0x3e 0x00 0x08 0x28 0xc4
-+
-+#CHECK: wldeb   %v31, %v31
-+0xe7 0xff 0x00 0x08 0x2c 0xc4
-+
-+#CHECK: wledb   %f0, %f0, 0, 0
-+0xe7 0x00 0x00 0x08 0x30 0xc5
-+
-+#CHECK: wledb   %v19, %f14, 4, 10
-+0xe7 0x3e 0x00 0xac 0x38 0xc5
-+
-+#CHECK: wledb   %v31, %v31, 7, 15
-+0xe7 0xff 0x00 0xff 0x3c 0xc5
-Index: llvm-36/test/MC/Disassembler/SystemZ/insns.txt
-===================================================================
---- llvm-36.orig/test/MC/Disassembler/SystemZ/insns.txt
-+++ llvm-36/test/MC/Disassembler/SystemZ/insns.txt
-@@ -2503,6 +2503,15 @@
- # CHECK: ear %r15, %a15
- 0xb2 0x4f 0x00 0xff
- 
-+# CHECK: etnd %r0
-+0xb2 0xec 0x00 0x00
-+
-+# CHECK: etnd %r15
-+0xb2 0xec 0x00 0xf0
-+
-+# CHECK: etnd %r7
-+0xb2 0xec 0x00 0x70
-+
- # CHECK: fidbr %f0, 0, %f0
- 0xb3 0x5f 0x00 0x00
- 
-@@ -6034,6 +6043,36 @@
- # CHECK: ny %r15, 0
- 0xe3 0xf0 0x00 0x00 0x00 0x54
- 
-+# CHECK: ntstg %r0, -524288
-+0xe3 0x00 0x00 0x00 0x80 0x25
-+
-+# CHECK: ntstg %r0, -1
-+0xe3 0x00 0x0f 0xff 0xff 0x25
-+
-+# CHECK: ntstg %r0, 0
-+0xe3 0x00 0x00 0x00 0x00 0x25
-+
-+# CHECK: ntstg %r0, 1
-+0xe3 0x00 0x00 0x01 0x00 0x25
-+
-+# CHECK: ntstg %r0, 524287
-+0xe3 0x00 0x0f 0xff 0x7f 0x25
-+
-+# CHECK: ntstg %r0, 0(%r1)
-+0xe3 0x00 0x10 0x00 0x00 0x25
-+
-+# CHECK: ntstg %r0, 0(%r15)
-+0xe3 0x00 0xf0 0x00 0x00 0x25
-+
-+# CHECK: ntstg %r0, 524287(%r1,%r15)
-+0xe3 0x01 0xff 0xff 0x7f 0x25
-+
-+# CHECK: ntstg %r0, 524287(%r15,%r1)
-+0xe3 0x0f 0x1f 0xff 0x7f 0x25
-+
-+# CHECK: ntstg %r15, 0
-+0xe3 0xf0 0x00 0x00 0x00 0x25
-+
- # CHECK: oc 0(1), 0
- 0xd6 0x00 0x00 0x00 0x00 0x00
- 
-@@ -6334,6 +6373,33 @@
- # CHECK: pfd 15, 0
- 0xe3 0xf0 0x00 0x00 0x00 0x36
- 
-+# CHECK: popcnt %r0, %r0
-+0xb9 0xe1 0x00 0x00
-+
-+# CHECK: popcnt %r0, %r15
-+0xb9 0xe1 0x00 0x0f
-+
-+# CHECK: popcnt %r15, %r0
-+0xb9 0xe1 0x00 0xf0
-+
-+# CHECK: popcnt %r7, %r8
-+0xb9 0xe1 0x00 0x78
-+
-+# CHECK: ppa %r0, %r0, 0
-+0xb2 0xe8 0x00 0x00
-+
-+# CHECK: ppa %r0, %r0, 15
-+0xb2 0xe8 0xf0 0x00
-+
-+# CHECK: ppa %r0, %r15, 0
-+0xb2 0xe8 0x00 0x0f
-+
-+# CHECK: ppa %r4, %r6, 7
-+0xb2 0xe8 0x70 0x46
-+
-+# CHECK: ppa %r15, %r0, 0
-+0xb2 0xe8 0x00 0xf0
-+
- # CHECK: risbg %r0, %r0, 0, 0, 0
- 0xec 0x00 0x00 0x00 0x00 0x55
- 
-@@ -6355,6 +6421,27 @@
- # CHECK: risbg %r4, %r5, 6, 7, 8
- 0xec 0x45 0x06 0x07 0x08 0x55
- 
-+# CHECK: risbgn %r0, %r0, 0, 0, 0
-+0xec 0x00 0x00 0x00 0x00 0x59
-+
-+# CHECK: risbgn %r0, %r0, 0, 0, 63
-+0xec 0x00 0x00 0x00 0x3f 0x59
-+
-+# CHECK: risbgn %r0, %r0, 0, 255, 0
-+0xec 0x00 0x00 0xff 0x00 0x59
-+
-+# CHECK: risbgn %r0, %r0, 255, 0, 0
-+0xec 0x00 0xff 0x00 0x00 0x59
-+
-+# CHECK: risbgn %r0, %r15, 0, 0, 0
-+0xec 0x0f 0x00 0x00 0x00 0x59
-+
-+# CHECK: risbgn %r15, %r0, 0, 0, 0
-+0xec 0xf0 0x00 0x00 0x00 0x59
-+
-+# CHECK: risbgn %r4, %r5, 6, 7, 8
-+0xec 0x45 0x06 0x07 0x08 0x59
-+
- # CHECK: risbhg %r0, %r0, 0, 0, 0
- 0xec 0x00 0x00 0x00 0x00 0x5d
- 
-@@ -8029,6 +8116,93 @@
- # CHECK: sy %r15, 0
- 0xe3 0xf0 0x00 0x00 0x00 0x5b
- 
-+# CHECK: tabort 0
-+0xb2 0xfc 0x00 0x00
-+
-+# CHECK: tabort 0(%r1)
-+0xb2 0xfc 0x10 0x00
-+
-+# CHECK: tabort 0(%r15)
-+0xb2 0xfc 0xf0 0x00
-+
-+# CHECK: tabort 4095
-+0xb2 0xfc 0x0f 0xff
-+
-+# CHECK: tabort 4095(%r1)
-+0xb2 0xfc 0x1f 0xff
-+
-+# CHECK: tabort 4095(%r15)
-+0xb2 0xfc 0xff 0xff
-+
-+# CHECK: tbegin 0, 0
-+0xe5 0x60 0x00 0x00 0x00 0x00
-+
-+# CHECK: tbegin 4095, 0
-+0xe5 0x60 0x0f 0xff 0x00 0x00
-+
-+# CHECK: tbegin 0, 0
-+0xe5 0x60 0x00 0x00 0x00 0x00
-+
-+# CHECK: tbegin 0, 1
-+0xe5 0x60 0x00 0x00 0x00 0x01
-+
-+# CHECK: tbegin 0, 32767
-+0xe5 0x60 0x00 0x00 0x7f 0xff
-+
-+# CHECK: tbegin 0, 32768
-+0xe5 0x60 0x00 0x00 0x80 0x00
-+
-+# CHECK: tbegin 0, 65535
-+0xe5 0x60 0x00 0x00 0xff 0xff
-+
-+# CHECK: tbegin 0(%r1), 42
-+0xe5 0x60 0x10 0x00 0x00 0x2a
-+
-+# CHECK: tbegin 0(%r15), 42
-+0xe5 0x60 0xf0 0x00 0x00 0x2a
-+
-+# CHECK: tbegin 4095(%r1), 42
-+0xe5 0x60 0x1f 0xff 0x00 0x2a
-+
-+# CHECK: tbegin 4095(%r15), 42
-+0xe5 0x60 0xff 0xff 0x00 0x2a
-+
-+# CHECK: tbeginc 0, 0
-+0xe5 0x61 0x00 0x00 0x00 0x00
-+
-+# CHECK: tbeginc 4095, 0
-+0xe5 0x61 0x0f 0xff 0x00 0x00
-+
-+# CHECK: tbeginc 0, 0
-+0xe5 0x61 0x00 0x00 0x00 0x00
-+
-+# CHECK: tbeginc 0, 1
-+0xe5 0x61 0x00 0x00 0x00 0x01
-+
-+# CHECK: tbeginc 0, 32767
-+0xe5 0x61 0x00 0x00 0x7f 0xff
-+
-+# CHECK: tbeginc 0, 32768
-+0xe5 0x61 0x00 0x00 0x80 0x00
-+
-+# CHECK: tbeginc 0, 65535
-+0xe5 0x61 0x00 0x00 0xff 0xff
-+
-+# CHECK: tbeginc 0(%r1), 42
-+0xe5 0x61 0x10 0x00 0x00 0x2a
-+
-+# CHECK: tbeginc 0(%r15), 42
-+0xe5 0x61 0xf0 0x00 0x00 0x2a
-+
-+# CHECK: tbeginc 4095(%r1), 42
-+0xe5 0x61 0x1f 0xff 0x00 0x2a
-+
-+# CHECK: tbeginc 4095(%r15), 42
-+0xe5 0x61 0xff 0xff 0x00 0x2a
-+
-+# CHECK: tend
-+0xb2 0xf8 0x00 0x00
-+
- # CHECK: tm 0, 0
- 0x91 0x00 0x00 0x00
- 
-Index: llvm-36/test/MC/SystemZ/fixups.s
-===================================================================
---- /dev/null
-+++ llvm-36/test/MC/SystemZ/fixups.s
-@@ -0,0 +1,119 @@
-+
-+# RUN: llvm-mc -triple s390x-unknown-unknown --show-encoding %s | FileCheck %s
-+
-+# RUN: llvm-mc -triple s390x-unknown-unknown -filetype=obj %s | \
-+# RUN: llvm-readobj -r | FileCheck %s -check-prefix=CHECK-REL
-+
-+# CHECK: larl %r14, target                      # encoding: [0xc0,0xe0,A,A,A,A]
-+# CHECK-NEXT:                                   # fixup A - offset: 2, value: target+2, kind: FK_390_PC32DBL
-+# CHECK-REL:                                    0x{{[0-9A-F]*2}} R_390_PC32DBL target 0x2
-+	.align 16
-+	larl %r14, target
-+
-+# CHECK: larl %r14, target@GOT                  # encoding: [0xc0,0xe0,A,A,A,A]
-+# CHECK-NEXT:                                   # fixup A - offset: 2, value: target@GOT+2, kind: FK_390_PC32DBL
-+# CHECK-REL:                                    0x{{[0-9A-F]*2}} R_390_GOTENT target 0x2
-+	.align 16
-+	larl %r14, target@got
-+
-+# CHECK: larl %r14, target@INDNTPOFF            # encoding: [0xc0,0xe0,A,A,A,A]
-+# CHECK-NEXT:                                   # fixup A - offset: 2, value: target@INDNTPOFF+2, kind: FK_390_PC32DBL
-+# CHECK-REL:                                    0x{{[0-9A-F]*2}} R_390_TLS_IEENT target 0x2
-+	.align 16
-+	larl %r14, target@indntpoff
-+
-+# CHECK: brasl %r14, target                     # encoding: [0xc0,0xe5,A,A,A,A]
-+# CHECK-NEXT:                                   # fixup A - offset: 2, value: target+2, kind: FK_390_PC32DBL
-+# CHECK-REL:                                    0x{{[0-9A-F]*2}} R_390_PC32DBL target 0x2
-+	.align 16
-+	brasl %r14, target
-+
-+# CHECK: brasl %r14, target@PLT                 # encoding: [0xc0,0xe5,A,A,A,A]
-+# CHECK-NEXT:                                   # fixup A - offset: 2, value: target@PLT+2, kind: FK_390_PC32DBL
-+# CHECK-REL:                                    0x{{[0-9A-F]*2}} R_390_PLT32DBL target 0x2
-+	.align 16
-+	brasl %r14, target@plt
-+
-+# CHECK: brasl %r14, target@PLT:tls_gdcall:sym  # encoding: [0xc0,0xe5,A,A,A,A]
-+# CHECK-NEXT:                                   # fixup A - offset: 2, value: target@PLT+2, kind: FK_390_PC32DBL
-+# CHECK-NEXT:                                   # fixup B - offset: 0, value: sym@TLSGD, kind: FK_390_TLS_CALL
-+# CHECK-REL:                                    0x{{[0-9A-F]*0}} R_390_TLS_GDCALL sym 0x0
-+# CHECK-REL:                                    0x{{[0-9A-F]*2}} R_390_PLT32DBL target 0x2
-+	.align 16
-+	brasl %r14, target@plt:tls_gdcall:sym
-+
-+# CHECK: brasl %r14, target@PLT:tls_ldcall:sym  # encoding: [0xc0,0xe5,A,A,A,A]
-+# CHECK-NEXT:                                   # fixup A - offset: 2, value: target@PLT+2, kind: FK_390_PC32DBL
-+# CHECK-NEXT:                                   # fixup B - offset: 0, value: sym@TLSLDM, kind: FK_390_TLS_CALL
-+# CHECK-REL:                                    0x{{[0-9A-F]*0}} R_390_TLS_LDCALL sym 0x0
-+# CHECK-REL:                                    0x{{[0-9A-F]*2}} R_390_PLT32DBL target 0x2
-+	.align 16
-+	brasl %r14, target@plt:tls_ldcall:sym
-+
-+# CHECK: bras %r14, target                      # encoding: [0xa7,0xe5,A,A]
-+# CHECK-NEXT:                                   # fixup A - offset: 2, value: target+2, kind: FK_390_PC16DBL
-+# CHECK-REL:                                    0x{{[0-9A-F]*2}} R_390_PC16DBL target 0x2
-+	.align 16
-+	bras %r14, target
-+
-+# CHECK: bras %r14, target@PLT                  # encoding: [0xa7,0xe5,A,A]
-+# CHECK-NEXT:                                   # fixup A - offset: 2, value: target@PLT+2, kind: FK_390_PC16DBL
-+# CHECK-REL:                                    0x{{[0-9A-F]*2}} R_390_PLT16DBL target 0x2
-+	.align 16
-+	bras %r14, target@plt
-+
-+# CHECK: bras %r14, target@PLT:tls_gdcall:sym   # encoding: [0xa7,0xe5,A,A]
-+# CHECK-NEXT:                                   # fixup A - offset: 2, value: target@PLT+2, kind: FK_390_PC16DBL
-+# CHECK-NEXT:                                   # fixup B - offset: 0, value: sym@TLSGD, kind: FK_390_TLS_CALL
-+# CHECK-REL:                                    0x{{[0-9A-F]*0}} R_390_TLS_GDCALL sym 0x0
-+# CHECK-REL:                                    0x{{[0-9A-F]*2}} R_390_PLT16DBL target 0x2
-+	.align 16
-+	bras %r14, target@plt:tls_gdcall:sym
-+
-+# CHECK: bras %r14, target@PLT:tls_ldcall:sym   # encoding: [0xa7,0xe5,A,A]
-+# CHECK-NEXT:                                   # fixup A - offset: 2, value: target@PLT+2, kind: FK_390_PC16DBL
-+# CHECK-NEXT:                                   # fixup B - offset: 0, value: sym@TLSLDM, kind: FK_390_TLS_CALL
-+# CHECK-REL:                                    0x{{[0-9A-F]*0}} R_390_TLS_LDCALL sym 0x0
-+# CHECK-REL:                                    0x{{[0-9A-F]*2}} R_390_PLT16DBL target 0x2
-+	.align 16
-+	bras %r14, target@plt:tls_ldcall:sym
-+
-+
-+# Data relocs
-+# llvm-mc does not show any "encoding" string for data, so we just check the relocs
-+
-+# CHECK-REL: .rela.data
-+	.data
-+
-+# CHECK-REL: 0x{{[0-9A-F]*0}} R_390_TLS_LE64 target 0x0
-+	.align 16
-+	.quad target@ntpoff
-+
-+# CHECK-REL: 0x{{[0-9A-F]*0}} R_390_TLS_LDO64 target 0x0
-+	.align 16
-+	.quad target@dtpoff
-+
-+# CHECK-REL: 0x{{[0-9A-F]*0}} R_390_TLS_LDM64 target 0x0
-+	.align 16
-+	.quad target@tlsldm
-+
-+# CHECK-REL: 0x{{[0-9A-F]*0}} R_390_TLS_GD64 target 0x0
-+	.align 16
-+	.quad target@tlsgd
-+
-+# CHECK-REL: 0x{{[0-9A-F]*0}} R_390_TLS_LE32 target 0x0
-+	.align 16
-+	.long target@ntpoff
-+
-+# CHECK-REL: 0x{{[0-9A-F]*0}} R_390_TLS_LDO32 target 0x0
-+	.align 16
-+	.long target@dtpoff
-+
-+# CHECK-REL: 0x{{[0-9A-F]*0}} R_390_TLS_LDM32 target 0x0
-+	.align 16
-+	.long target@tlsldm
-+
-+# CHECK-REL: 0x{{[0-9A-F]*0}} R_390_TLS_GD32 target 0x0
-+	.align 16
-+	.long target@tlsgd
-+
-Index: llvm-36/test/MC/SystemZ/insn-bad-z13.s
-===================================================================
---- /dev/null
-+++ llvm-36/test/MC/SystemZ/insn-bad-z13.s
-@@ -0,0 +1,1201 @@
-+# For z13 only.
-+# RUN: not llvm-mc -triple s390x-linux-gnu -mcpu=z13 < %s 2> %t
-+# RUN: FileCheck < %t %s
-+
-+#CHECK: error: invalid operand
-+#CHECK: lcbb	%r0, 0, -1
-+#CHECK: error: invalid operand
-+#CHECK: lcbb	%r0, 0, 16
-+#CHECK: error: invalid operand
-+#CHECK: lcbb	%r0, -1, 0
-+#CHECK: error: invalid operand
-+#CHECK: lcbb	%r0, 4096, 0
-+#CHECK: error: invalid use of vector addressing
-+#CHECK: lcbb	%r0, 0(%v1,%r2), 0
-+
-+	lcbb	%r0, 0, -1
-+	lcbb	%r0, 0, 16
-+	lcbb	%r0, -1, 0
-+	lcbb	%r0, 4096, 0
-+	lcbb	%r0, 0(%v1,%r2), 0
-+
-+#CHECK: error: invalid operand
-+#CHECK: vcdgb	%v0, %v0, 0, -1
-+#CHECK: error: invalid operand
-+#CHECK: vcdgb	%v0, %v0, 0, 16
-+#CHECK: error: invalid operand
-+#CHECK: vcdgb	%v0, %v0, -1, 0
-+#CHECK: error: invalid operand
-+#CHECK: vcdgb	%v0, %v0, 16, 0
-+
-+	vcdgb	%v0, %v0, 0, -1
-+	vcdgb	%v0, %v0, 0, 16
-+	vcdgb	%v0, %v0, -1, 0
-+	vcdgb	%v0, %v0, 16, 0
-+
-+#CHECK: error: invalid operand
-+#CHECK: vcdlgb	%v0, %v0, 0, -1
-+#CHECK: error: invalid operand
-+#CHECK: vcdlgb	%v0, %v0, 0, 16
-+#CHECK: error: invalid operand
-+#CHECK: vcdlgb	%v0, %v0, -1, 0
-+#CHECK: error: invalid operand
-+#CHECK: vcdlgb	%v0, %v0, 16, 0
-+
-+	vcdlgb	%v0, %v0, 0, -1
-+	vcdlgb	%v0, %v0, 0, 16
-+	vcdlgb	%v0, %v0, -1, 0
-+	vcdlgb	%v0, %v0, 16, 0
-+
-+#CHECK: error: invalid operand
-+#CHECK: vcgdb	%v0, %v0, 0, -1
-+#CHECK: error: invalid operand
-+#CHECK: vcgdb	%v0, %v0, 0, 16
-+#CHECK: error: invalid operand
-+#CHECK: vcgdb	%v0, %v0, -1, 0
-+#CHECK: error: invalid operand
-+#CHECK: vcgdb	%v0, %v0, 16, 0
-+
-+	vcgdb	%v0, %v0, 0, -1
-+	vcgdb	%v0, %v0, 0, 16
-+	vcgdb	%v0, %v0, -1, 0
-+	vcgdb	%v0, %v0, 16, 0
-+
-+#CHECK: error: invalid operand
-+#CHECK: vclgdb	%v0, %v0, 0, -1
-+#CHECK: error: invalid operand
-+#CHECK: vclgdb	%v0, %v0, 0, 16
-+#CHECK: error: invalid operand
-+#CHECK: vclgdb	%v0, %v0, -1, 0
-+#CHECK: error: invalid operand
-+#CHECK: vclgdb	%v0, %v0, 16, 0
-+
-+	vclgdb	%v0, %v0, 0, -1
-+	vclgdb	%v0, %v0, 0, 16
-+	vclgdb	%v0, %v0, -1, 0
-+	vclgdb	%v0, %v0, 16, 0
-+
-+#CHECK: error: invalid operand
-+#CHECK: verimb	%v0, %v0, %v0, -1
-+#CHECK: error: invalid operand
-+#CHECK: verimb	%v0, %v0, %v0, 256
-+
-+	verimb	%v0, %v0, %v0, -1
-+	verimb	%v0, %v0, %v0, 256
-+
-+#CHECK: error: invalid operand
-+#CHECK: verimf	%v0, %v0, %v0, -1
-+#CHECK: error: invalid operand
-+#CHECK: verimf	%v0, %v0, %v0, 256
-+
-+	verimf	%v0, %v0, %v0, -1
-+	verimf	%v0, %v0, %v0, 256
-+
-+#CHECK: error: invalid operand
-+#CHECK: verimg	%v0, %v0, %v0, -1
-+#CHECK: error: invalid operand
-+#CHECK: verimg	%v0, %v0, %v0, 256
-+
-+	verimg	%v0, %v0, %v0, -1
-+	verimg	%v0, %v0, %v0, 256
-+
-+#CHECK: error: invalid operand
-+#CHECK: verimh	%v0, %v0, %v0, -1
-+#CHECK: error: invalid operand
-+#CHECK: verimh	%v0, %v0, %v0, 256
-+
-+	verimh	%v0, %v0, %v0, -1
-+	verimh	%v0, %v0, %v0, 256
-+
-+#CHECK: error: invalid operand
-+#CHECK: verllb	%v0, %v0, -1
-+#CHECK: error: invalid operand
-+#CHECK: verllb	%v0, %v0, 4096
-+
-+	verllb	%v0, %v0, -1
-+	verllb	%v0, %v0, 4096
-+
-+#CHECK: error: invalid operand
-+#CHECK: verllf	%v0, %v0, -1
-+#CHECK: error: invalid operand
-+#CHECK: verllf	%v0, %v0, 4096
-+
-+	verllf	%v0, %v0, -1
-+	verllf	%v0, %v0, 4096
-+
-+#CHECK: error: invalid operand
-+#CHECK: verllg	%v0, %v0, -1
-+#CHECK: error: invalid operand
-+#CHECK: verllg	%v0, %v0, 4096
-+
-+	verllg	%v0, %v0, -1
-+	verllg	%v0, %v0, 4096
-+
-+#CHECK: error: invalid operand
-+#CHECK: verllh	%v0, %v0, -1
-+#CHECK: error: invalid operand
-+#CHECK: verllh	%v0, %v0, 4096
-+
-+	verllh	%v0, %v0, -1
-+	verllh	%v0, %v0, 4096
-+
-+#CHECK: error: invalid operand
-+#CHECK: veslb	%v0, %v0, -1
-+#CHECK: error: invalid operand
-+#CHECK: veslb	%v0, %v0, 4096
-+
-+	veslb	%v0, %v0, -1
-+	veslb	%v0, %v0, 4096
-+
-+#CHECK: error: invalid operand
-+#CHECK: veslf	%v0, %v0, -1
-+#CHECK: error: invalid operand
-+#CHECK: veslf	%v0, %v0, 4096
-+
-+	veslf	%v0, %v0, -1
-+	veslf	%v0, %v0, 4096
-+
-+#CHECK: error: invalid operand
-+#CHECK: veslg	%v0, %v0, -1
-+#CHECK: error: invalid operand
-+#CHECK: veslg	%v0, %v0, 4096
-+
-+	veslg	%v0, %v0, -1
-+	veslg	%v0, %v0, 4096
-+
-+#CHECK: error: invalid operand
-+#CHECK: veslh	%v0, %v0, -1
-+#CHECK: error: invalid operand
-+#CHECK: veslh	%v0, %v0, 4096
-+
-+	veslh	%v0, %v0, -1
-+	veslh	%v0, %v0, 4096
-+
-+#CHECK: error: invalid operand
-+#CHECK: vesrab	%v0, %v0, -1
-+#CHECK: error: invalid operand
-+#CHECK: vesrab	%v0, %v0, 4096
-+
-+	vesrab	%v0, %v0, -1
-+	vesrab	%v0, %v0, 4096
-+
-+#CHECK: error: invalid operand
-+#CHECK: vesraf	%v0, %v0, -1
-+#CHECK: error: invalid operand
-+#CHECK: vesraf	%v0, %v0, 4096
-+
-+	vesraf	%v0, %v0, -1
-+	vesraf	%v0, %v0, 4096
-+
-+#CHECK: error: invalid operand
-+#CHECK: vesrag	%v0, %v0, -1
-+#CHECK: error: invalid operand
-+#CHECK: vesrag	%v0, %v0, 4096
-+
-+	vesrag	%v0, %v0, -1
-+	vesrag	%v0, %v0, 4096
-+
-+#CHECK: error: invalid operand
-+#CHECK: vesrah	%v0, %v0, -1
-+#CHECK: error: invalid operand
-+#CHECK: vesrah	%v0, %v0, 4096
-+
-+	vesrah	%v0, %v0, -1
-+	vesrah	%v0, %v0, 4096
-+
-+#CHECK: error: invalid operand
-+#CHECK: vesrlb	%v0, %v0, -1
-+#CHECK: error: invalid operand
-+#CHECK: vesrlb	%v0, %v0, 4096
-+
-+	vesrlb	%v0, %v0, -1
-+	vesrlb	%v0, %v0, 4096
-+
-+#CHECK: error: invalid operand
-+#CHECK: vesrlf	%v0, %v0, -1
-+#CHECK: error: invalid operand
-+#CHECK: vesrlf	%v0, %v0, 4096
-+
-+	vesrlf	%v0, %v0, -1
-+	vesrlf	%v0, %v0, 4096
-+
-+#CHECK: error: invalid operand
-+#CHECK: vesrlg	%v0, %v0, -1
-+#CHECK: error: invalid operand
-+#CHECK: vesrlg	%v0, %v0, 4096
-+
-+	vesrlg	%v0, %v0, -1
-+	vesrlg	%v0, %v0, 4096
-+
-+#CHECK: error: invalid operand
-+#CHECK: vesrlh	%v0, %v0, -1
-+#CHECK: error: invalid operand
-+#CHECK: vesrlh	%v0, %v0, 4096
-+
-+	vesrlh	%v0, %v0, -1
-+	vesrlh	%v0, %v0, 4096
-+
-+#CHECK: error: invalid operand
-+#CHECK: vfaeb	%v0, %v0, %v0, -1
-+#CHECK: error: invalid operand
-+#CHECK: vfaeb	%v0, %v0, %v0, 16
-+#CHECK: error: too few operands
-+#CHECK: vfaeb	%v0, %v0
-+#CHECK: error: invalid operand
-+#CHECK: vfaeb	%v0, %v0, %v0, 0, 0
-+
-+	vfaeb	%v0, %v0, %v0, -1
-+	vfaeb	%v0, %v0, %v0, 16
-+	vfaeb	%v0, %v0
-+	vfaeb	%v0, %v0, %v0, 0, 0
-+
-+#CHECK: error: invalid operand
-+#CHECK: vfaebs	%v0, %v0, %v0, -1
-+#CHECK: error: invalid operand
-+#CHECK: vfaebs	%v0, %v0, %v0, 16
-+#CHECK: error: too few operands
-+#CHECK: vfaebs	%v0, %v0
-+#CHECK: error: invalid operand
-+#CHECK: vfaebs	%v0, %v0, %v0, 0, 0
-+
-+	vfaebs	%v0, %v0, %v0, -1
-+	vfaebs	%v0, %v0, %v0, 16
-+	vfaebs	%v0, %v0
-+	vfaebs	%v0, %v0, %v0, 0, 0
-+
-+#CHECK: error: invalid operand
-+#CHECK: vfaef	%v0, %v0, %v0, -1
-+#CHECK: error: invalid operand
-+#CHECK: vfaef	%v0, %v0, %v0, 16
-+#CHECK: error: too few operands
-+#CHECK: vfaef	%v0, %v0
-+#CHECK: error: invalid operand
-+#CHECK: vfaef	%v0, %v0, %v0, 0, 0
-+
-+	vfaef	%v0, %v0, %v0, -1
-+	vfaef	%v0, %v0, %v0, 16
-+	vfaef	%v0, %v0
-+	vfaef	%v0, %v0, %v0, 0, 0
-+
-+#CHECK: error: invalid operand
-+#CHECK: vfaeh	%v0, %v0, %v0, -1
-+#CHECK: error: invalid operand
-+#CHECK: vfaeh	%v0, %v0, %v0, 16
-+#CHECK: error: too few operands
-+#CHECK: vfaeh	%v0, %v0
-+#CHECK: error: invalid operand
-+#CHECK: vfaeh	%v0, %v0, %v0, 0, 0
-+
-+	vfaeh	%v0, %v0, %v0, -1
-+	vfaeh	%v0, %v0, %v0, 16
-+	vfaeh	%v0, %v0
-+	vfaeh	%v0, %v0, %v0, 0, 0
-+
-+#CHECK: error: invalid operand
-+#CHECK: vfaezh	%v0, %v0, %v0, -1
-+#CHECK: error: invalid operand
-+#CHECK: vfaezh	%v0, %v0, %v0, 16
-+#CHECK: error: too few operands
-+#CHECK: vfaezh	%v0, %v0
-+#CHECK: error: invalid operand
-+#CHECK: vfaezh	%v0, %v0, %v0, 0, 0
-+
-+	vfaezh	%v0, %v0, %v0, -1
-+	vfaezh	%v0, %v0, %v0, 16
-+	vfaezh	%v0, %v0
-+	vfaezh	%v0, %v0, %v0, 0, 0
-+
-+#CHECK: error: invalid operand
-+#CHECK: vfaezfs	%v0, %v0, %v0, -1
-+#CHECK: error: invalid operand
-+#CHECK: vfaezfs	%v0, %v0, %v0, 16
-+#CHECK: error: too few operands
-+#CHECK: vfaezfs	%v0, %v0
-+#CHECK: error: invalid operand
-+#CHECK: vfaezfs	%v0, %v0, %v0, 0, 0
-+
-+	vfaezfs	%v0, %v0, %v0, -1
-+	vfaezfs	%v0, %v0, %v0, 16
-+	vfaezfs	%v0, %v0
-+	vfaezfs	%v0, %v0, %v0, 0, 0
-+
-+#CHECK: error: invalid operand
-+#CHECK: vfidb	%v0, %v0, 0, -1
-+#CHECK: error: invalid operand
-+#CHECK: vfidb	%v0, %v0, 0, 16
-+#CHECK: error: invalid operand
-+#CHECK: vfidb	%v0, %v0, -1, 0
-+#CHECK: error: invalid operand
-+#CHECK: vfidb	%v0, %v0, 16, 0
-+
-+	vfidb	%v0, %v0, 0, -1
-+	vfidb	%v0, %v0, 0, 16
-+	vfidb	%v0, %v0, -1, 0
-+	vfidb	%v0, %v0, 16, 0
-+
-+#CHECK: error: invalid operand
-+#CHECK: vftcidb	%v0, %v0, -1
-+#CHECK: error: invalid operand
-+#CHECK: vftcidb	%v0, %v0, 4096
-+
-+	vftcidb	%v0, %v0, -1
-+	vftcidb	%v0, %v0, 4096
-+
-+#CHECK: error: invalid operand
-+#CHECK: vgbm	%v0, -1
-+#CHECK: error: invalid operand
-+#CHECK: vgbm	%v0, 0x10000
-+
-+	vgbm	%v0, -1
-+	vgbm	%v0, 0x10000
-+
-+#CHECK: error: vector index required
-+#CHECK: vgef	%v0, 0(%r1), 0
-+#CHECK: error: vector index required
-+#CHECK: vgef	%v0, 0(%r2,%r1), 0
-+#CHECK: error: invalid operand
-+#CHECK: vgef	%v0, 0(%v0,%r1), -1
-+#CHECK: error: invalid operand
-+#CHECK: vgef	%v0, 0(%v0,%r1), 4
-+#CHECK: error: invalid operand
-+#CHECK: vgef	%v0, -1(%v0,%r1), 0
-+#CHECK: error: invalid operand
-+#CHECK: vgef	%v0, 4096(%v0,%r1), 0
-+
-+	vgef	%v0, 0(%r1), 0
-+	vgef	%v0, 0(%r2,%r1), 0
-+	vgef	%v0, 0(%v0,%r1), -1
-+	vgef	%v0, 0(%v0,%r1), 4
-+	vgef	%v0, -1(%v0,%r1), 0
-+	vgef	%v0, 4096(%v0,%r1), 0
-+
-+#CHECK: error: vector index required
-+#CHECK: vgeg	%v0, 0(%r1), 0
-+#CHECK: error: vector index required
-+#CHECK: vgeg	%v0, 0(%r2,%r1), 0
-+#CHECK: error: invalid operand
-+#CHECK: vgeg	%v0, 0(%v0,%r1), -1
-+#CHECK: error: invalid operand
-+#CHECK: vgeg	%v0, 0(%v0,%r1), 2
-+#CHECK: error: invalid operand
-+#CHECK: vgeg	%v0, -1(%v0,%r1), 0
-+#CHECK: error: invalid operand
-+#CHECK: vgeg	%v0, 4096(%v0,%r1), 0
-+
-+	vgeg	%v0, 0(%r1), 0
-+	vgeg	%v0, 0(%r2,%r1), 0
-+	vgeg	%v0, 0(%v0,%r1), -1
-+	vgeg	%v0, 0(%v0,%r1), 2
-+	vgeg	%v0, -1(%v0,%r1), 0
-+	vgeg	%v0, 4096(%v0,%r1), 0
-+
-+#CHECK: error: invalid operand
-+#CHECK: vgmb	%v0, 0, -1
-+#CHECK: error: invalid operand
-+#CHECK: vgmb	%v0, 0, -1
-+#CHECK: error: invalid operand
-+#CHECK: vgmb	%v0, -1, 0
-+#CHECK: error: invalid operand
-+#CHECK: vgmb	%v0, 256, 0
-+
-+	vgmb	%v0, 0, -1
-+	vgmb	%v0, 0, -1
-+	vgmb	%v0, -1, 0
-+	vgmb	%v0, 256, 0
-+
-+#CHECK: error: invalid operand
-+#CHECK: vgmf	%v0, 0, -1
-+#CHECK: error: invalid operand
-+#CHECK: vgmf	%v0, 0, -1
-+#CHECK: error: invalid operand
-+#CHECK: vgmf	%v0, -1, 0
-+#CHECK: error: invalid operand
-+#CHECK: vgmf	%v0, 256, 0
-+
-+	vgmf	%v0, 0, -1
-+	vgmf	%v0, 0, -1
-+	vgmf	%v0, -1, 0
-+	vgmf	%v0, 256, 0
-+
-+#CHECK: error: invalid operand
-+#CHECK: vgmg	%v0, 0, -1
-+#CHECK: error: invalid operand
-+#CHECK: vgmg	%v0, 0, -1
-+#CHECK: error: invalid operand
-+#CHECK: vgmg	%v0, -1, 0
-+#CHECK: error: invalid operand
-+#CHECK: vgmg	%v0, 256, 0
-+
-+	vgmg	%v0, 0, -1
-+	vgmg	%v0, 0, -1
-+	vgmg	%v0, -1, 0
-+	vgmg	%v0, 256, 0
-+
-+#CHECK: error: invalid operand
-+#CHECK: vgmh	%v0, 0, -1
-+#CHECK: error: invalid operand
-+#CHECK: vgmh	%v0, 0, -1
-+#CHECK: error: invalid operand
-+#CHECK: vgmh	%v0, -1, 0
-+#CHECK: error: invalid operand
-+#CHECK: vgmh	%v0, 256, 0
-+
-+	vgmh	%v0, 0, -1
-+	vgmh	%v0, 0, -1
-+	vgmh	%v0, -1, 0
-+	vgmh	%v0, 256, 0
-+
-+#CHECK: error: invalid operand
-+#CHECK: vl	%v0, -1
-+#CHECK: error: invalid operand
-+#CHECK: vl	%v0, 4096
-+#CHECK: error: invalid use of vector addressing
-+#CHECK: vl	%v0, 0(%v1,%r2)
-+
-+	vl	%v0, -1
-+	vl	%v0, 4096
-+	vl	%v0, 0(%v1,%r2)
-+
-+#CHECK: error: invalid operand
-+#CHECK: vlbb	%v0, 0, -1
-+#CHECK: error: invalid operand
-+#CHECK: vlbb	%v0, 0, 16
-+#CHECK: error: invalid operand
-+#CHECK: vlbb	%v0, -1, 0
-+#CHECK: error: invalid operand
-+#CHECK: vlbb	%v0, 4096, 0
-+#CHECK: error: invalid use of vector addressing
-+#CHECK: vlbb	%v0, 0(%v1,%r2), 0
-+
-+	vlbb	%v0, 0, -1
-+	vlbb	%v0, 0, 16
-+	vlbb	%v0, -1, 0
-+	vlbb	%v0, 4096, 0
-+	vlbb	%v0, 0(%v1,%r2), 0
-+
-+#CHECK: error: invalid operand
-+#CHECK: vleb	%v0, 0, -1
-+#CHECK: error: invalid operand
-+#CHECK: vleb	%v0, 0, 16
-+#CHECK: error: invalid operand
-+#CHECK: vleb	%v0, -1, 0
-+#CHECK: error: invalid operand
-+#CHECK: vleb	%v0, 4096, 0
-+#CHECK: error: invalid use of vector addressing
-+#CHECK: vleb	%v0, 0(%v1,%r2), 0
-+
-+	vleb	%v0, 0, -1
-+	vleb	%v0, 0, 16
-+	vleb	%v0, -1, 0
-+	vleb	%v0, 4096, 0
-+	vleb	%v0, 0(%v1,%r2), 0
-+
-+#CHECK: error: invalid operand
-+#CHECK: vledb	%v0, %v0, 0, -1
-+#CHECK: error: invalid operand
-+#CHECK: vledb	%v0, %v0, 0, 16
-+#CHECK: error: invalid operand
-+#CHECK: vledb	%v0, %v0, -1, 0
-+#CHECK: error: invalid operand
-+#CHECK: vledb	%v0, %v0, 16, 0
-+
-+	vledb	%v0, %v0, 0, -1
-+	vledb	%v0, %v0, 0, 16
-+	vledb	%v0, %v0, -1, 0
-+	vledb	%v0, %v0, 16, 0
-+
-+#CHECK: error: invalid operand
-+#CHECK: vlef	%v0, 0, -1
-+#CHECK: error: invalid operand
-+#CHECK: vlef	%v0, 0, 4
-+#CHECK: error: invalid operand
-+#CHECK: vlef	%v0, -1, 0
-+#CHECK: error: invalid operand
-+#CHECK: vlef	%v0, 4096, 0
-+#CHECK: error: invalid use of vector addressing
-+#CHECK: vlef	%v0, 0(%v1,%r2), 0
-+
-+	vlef	%v0, 0, -1
-+	vlef	%v0, 0, 4
-+	vlef	%v0, -1, 0
-+	vlef	%v0, 4096, 0
-+	vlef	%v0, 0(%v1,%r2), 0
-+
-+#CHECK: error: invalid operand
-+#CHECK: vleg	%v0, 0, -1
-+#CHECK: error: invalid operand
-+#CHECK: vleg	%v0, 0, 2
-+#CHECK: error: invalid operand
-+#CHECK: vleg	%v0, -1, 0
-+#CHECK: error: invalid operand
-+#CHECK: vleg	%v0, 4096, 0
-+#CHECK: error: invalid use of vector addressing
-+#CHECK: vleg	%v0, 0(%v1,%r2), 0
-+
-+	vleg	%v0, 0, -1
-+	vleg	%v0, 0, 2
-+	vleg	%v0, -1, 0
-+	vleg	%v0, 4096, 0
-+	vleg	%v0, 0(%v1,%r2), 0
-+
-+#CHECK: error: invalid operand
-+#CHECK: vleh	%v0, 0, -1
-+#CHECK: error: invalid operand
-+#CHECK: vleh	%v0, 0, 8
-+#CHECK: error: invalid operand
-+#CHECK: vleh	%v0, -1, 0
-+#CHECK: error: invalid operand
-+#CHECK: vleh	%v0, 4096, 0
-+#CHECK: error: invalid use of vector addressing
-+#CHECK: vleh	%v0, 0(%v1,%r2), 0
-+
-+	vleh	%v0, 0, -1
-+	vleh	%v0, 0, 8
-+	vleh	%v0, -1, 0
-+	vleh	%v0, 4096, 0
-+	vleh	%v0, 0(%v1,%r2), 0
-+
-+#CHECK: error: invalid operand
-+#CHECK: vleib	%v0, 0, -1
-+#CHECK: error: invalid operand
-+#CHECK: vleib	%v0, 0, 16
-+#CHECK: error: invalid operand
-+#CHECK: vleib	%v0, -32769, 0
-+#CHECK: error: invalid operand
-+#CHECK: vleib	%v0, 32768, 0
-+
-+	vleib	%v0, 0, -1
-+	vleib	%v0, 0, 16
-+	vleib	%v0, -32769, 0
-+	vleib	%v0, 32768, 0
-+
-+#CHECK: error: invalid operand
-+#CHECK: vleif	%v0, 0, -1
-+#CHECK: error: invalid operand
-+#CHECK: vleif	%v0, 0, 4
-+#CHECK: error: invalid operand
-+#CHECK: vleif	%v0, -32769, 0
-+#CHECK: error: invalid operand
-+#CHECK: vleif	%v0, 32768, 0
-+
-+	vleif	%v0, 0, -1
-+	vleif	%v0, 0, 4
-+	vleif	%v0, -32769, 0
-+	vleif	%v0, 32768, 0
-+
-+#CHECK: error: invalid operand
-+#CHECK: vleig	%v0, 0, -1
-+#CHECK: error: invalid operand
-+#CHECK: vleig	%v0, 0, 2
-+#CHECK: error: invalid operand
-+#CHECK: vleig	%v0, -32769, 0
-+#CHECK: error: invalid operand
-+#CHECK: vleig	%v0, 32768, 0
-+
-+	vleig	%v0, 0, -1
-+	vleig	%v0, 0, 2
-+	vleig	%v0, -32769, 0
-+	vleig	%v0, 32768, 0
-+
-+#CHECK: error: invalid operand
-+#CHECK: vleih	%v0, 0, -1
-+#CHECK: error: invalid operand
-+#CHECK: vleih	%v0, 0, 8
-+#CHECK: error: invalid operand
-+#CHECK: vleih	%v0, -32769, 0
-+#CHECK: error: invalid operand
-+#CHECK: vleih	%v0, 32768, 0
-+
-+	vleih	%v0, 0, -1
-+	vleih	%v0, 0, 8
-+	vleih	%v0, -32769, 0
-+	vleih	%v0, 32768, 0
-+
-+#CHECK: error: invalid operand
-+#CHECK: vlgvb	%r0, %v0, -1
-+#CHECK: error: invalid operand
-+#CHECK: vlgvb	%r0, %v0, 4096
-+#CHECK: error: %r0 used in an address
-+#CHECK: vlgvb	%r0, %v0, 0(%r0)
-+
-+	vlgvb	%r0, %v0, -1
-+	vlgvb	%r0, %v0, 4096
-+	vlgvb	%r0, %v0, 0(%r0)
-+
-+#CHECK: error: invalid operand
-+#CHECK: vlgvf	%r0, %v0, -1
-+#CHECK: error: invalid operand
-+#CHECK: vlgvf	%r0, %v0, 4096
-+#CHECK: error: %r0 used in an address
-+#CHECK: vlgvf	%r0, %v0, 0(%r0)
-+
-+	vlgvf	%r0, %v0, -1
-+	vlgvf	%r0, %v0, 4096
-+	vlgvf	%r0, %v0, 0(%r0)
-+
-+#CHECK: error: invalid operand
-+#CHECK: vlgvg	%r0, %v0, -1
-+#CHECK: error: invalid operand
-+#CHECK: vlgvg	%r0, %v0, 4096
-+#CHECK: error: %r0 used in an address
-+#CHECK: vlgvg	%r0, %v0, 0(%r0)
-+
-+	vlgvg	%r0, %v0, -1
-+	vlgvg	%r0, %v0, 4096
-+	vlgvg	%r0, %v0, 0(%r0)
-+
-+#CHECK: error: invalid operand
-+#CHECK: vlgvh	%r0, %v0, -1
-+#CHECK: error: invalid operand
-+#CHECK: vlgvh	%r0, %v0, 4096
-+#CHECK: error: %r0 used in an address
-+#CHECK: vlgvh	%r0, %v0, 0(%r0)
-+
-+	vlgvh	%r0, %v0, -1
-+	vlgvh	%r0, %v0, 4096
-+	vlgvh	%r0, %v0, 0(%r0)
-+
-+#CHECK: error: invalid operand
-+#CHECK: vll	%v0, %r0, -1
-+#CHECK: error: invalid operand
-+#CHECK: vll	%v0, %r0, 4096
-+#CHECK: error: %r0 used in an address
-+#CHECK: vll	%v0, %r0, 0(%r0)
-+
-+	vll	%v0, %r0, -1
-+	vll	%v0, %r0, 4096
-+	vll	%v0, %r0, 0(%r0)
-+
-+#CHECK: error: invalid operand
-+#CHECK: vllezb	%v0, -1
-+#CHECK: error: invalid operand
-+#CHECK: vllezb	%v0, 4096
-+#CHECK: error: invalid use of vector addressing
-+#CHECK: vllezb	%v0, 0(%v1,%r2)
-+
-+	vllezb	%v0, -1
-+	vllezb	%v0, 4096
-+	vllezb	%v0, 0(%v1,%r2)
-+
-+#CHECK: error: invalid operand
-+#CHECK: vllezf	%v0, -1
-+#CHECK: error: invalid operand
-+#CHECK: vllezf	%v0, 4096
-+#CHECK: error: invalid use of vector addressing
-+#CHECK: vllezf	%v0, 0(%v1,%r2)
-+
-+	vllezf	%v0, -1
-+	vllezf	%v0, 4096
-+	vllezf	%v0, 0(%v1,%r2)
-+
-+#CHECK: error: invalid operand
-+#CHECK: vllezg	%v0, -1
-+#CHECK: error: invalid operand
-+#CHECK: vllezg	%v0, 4096
-+#CHECK: error: invalid use of vector addressing
-+#CHECK: vllezg	%v0, 0(%v1,%r2)
-+
-+	vllezg	%v0, -1
-+	vllezg	%v0, 4096
-+	vllezg	%v0, 0(%v1,%r2)
-+
-+#CHECK: error: invalid operand
-+#CHECK: vllezh	%v0, -1
-+#CHECK: error: invalid operand
-+#CHECK: vllezh	%v0, 4096
-+#CHECK: error: invalid use of vector addressing
-+#CHECK: vllezh	%v0, 0(%v1,%r2)
-+
-+	vllezh	%v0, -1
-+	vllezh	%v0, 4096
-+	vllezh	%v0, 0(%v1,%r2)
-+
-+#CHECK: error: invalid operand
-+#CHECK: vlm	%v0, %v0, -1
-+#CHECK: error: invalid operand
-+#CHECK: vlm	%v0, %v0, 4096
-+
-+	vlm	%v0, %v0, -1
-+	vlm	%v0, %v0, 4096
-+
-+#CHECK: error: invalid operand
-+#CHECK: vlrepb	%v0, -1
-+#CHECK: error: invalid operand
-+#CHECK: vlrepb	%v0, 4096
-+#CHECK: error: invalid use of vector addressing
-+#CHECK: vlrepb	%v0, 0(%v1,%r2)
-+
-+	vlrepb	%v0, -1
-+	vlrepb	%v0, 4096
-+	vlrepb	%v0, 0(%v1,%r2)
-+
-+#CHECK: error: invalid operand
-+#CHECK: vlrepf	%v0, -1
-+#CHECK: error: invalid operand
-+#CHECK: vlrepf	%v0, 4096
-+#CHECK: error: invalid use of vector addressing
-+#CHECK: vlrepf	%v0, 0(%v1,%r2)
-+
-+	vlrepf	%v0, -1
-+	vlrepf	%v0, 4096
-+	vlrepf	%v0, 0(%v1,%r2)
-+
-+#CHECK: error: invalid operand
-+#CHECK: vlrepg	%v0, -1
-+#CHECK: error: invalid operand
-+#CHECK: vlrepg	%v0, 4096
-+#CHECK: error: invalid use of vector addressing
-+#CHECK: vlrepg	%v0, 0(%v1,%r2)
-+
-+	vlrepg	%v0, -1
-+	vlrepg	%v0, 4096
-+	vlrepg	%v0, 0(%v1,%r2)
-+
-+#CHECK: error: invalid operand
-+#CHECK: vlreph	%v0, -1
-+#CHECK: error: invalid operand
-+#CHECK: vlreph	%v0, 4096
-+#CHECK: error: invalid use of vector addressing
-+#CHECK: vlreph	%v0, 0(%v1,%r2)
-+
-+	vlreph	%v0, -1
-+	vlreph	%v0, 4096
-+	vlreph	%v0, 0(%v1,%r2)
-+
-+#CHECK: error: invalid operand
-+#CHECK: vlvgb	%v0, %r0, -1
-+#CHECK: error: invalid operand
-+#CHECK: vlvgb	%v0, %r0, 4096
-+#CHECK: error: %r0 used in an address
-+#CHECK: vlvgb	%v0, %r0, 0(%r0)
-+
-+	vlvgb	%v0, %r0, -1
-+	vlvgb	%v0, %r0, 4096
-+	vlvgb	%v0, %r0, 0(%r0)
-+
-+#CHECK: error: invalid operand
-+#CHECK: vlvgf	%v0, %r0, -1
-+#CHECK: error: invalid operand
-+#CHECK: vlvgf	%v0, %r0, 4096
-+#CHECK: error: %r0 used in an address
-+#CHECK: vlvgf	%v0, %r0, 0(%r0)
-+
-+	vlvgf	%v0, %r0, -1
-+	vlvgf	%v0, %r0, 4096
-+	vlvgf	%v0, %r0, 0(%r0)
-+
-+#CHECK: error: invalid operand
-+#CHECK: vlvgg	%v0, %r0, -1
-+#CHECK: error: invalid operand
-+#CHECK: vlvgg	%v0, %r0, 4096
-+#CHECK: error: %r0 used in an address
-+#CHECK: vlvgg	%v0, %r0, 0(%r0)
-+
-+	vlvgg	%v0, %r0, -1
-+	vlvgg	%v0, %r0, 4096
-+	vlvgg	%v0, %r0, 0(%r0)
-+
-+#CHECK: error: invalid operand
-+#CHECK: vlvgh	%v0, %r0, -1
-+#CHECK: error: invalid operand
-+#CHECK: vlvgh	%v0, %r0, 4096
-+#CHECK: error: %r0 used in an address
-+#CHECK: vlvgh	%v0, %r0, 0(%r0)
-+
-+	vlvgh	%v0, %r0, -1
-+	vlvgh	%v0, %r0, 4096
-+	vlvgh	%v0, %r0, 0(%r0)
-+
-+#CHECK: error: invalid operand
-+#CHECK: vpdi	%v0, %v0, %v0, -1
-+#CHECK: error: invalid operand
-+#CHECK: vpdi	%v0, %v0, %v0, 16
-+
-+	vpdi	%v0, %v0, %v0, -1
-+	vpdi	%v0, %v0, %v0, 16
-+
-+#CHECK: error: invalid operand
-+#CHECK: vrepb	%v0, %v0, -1
-+#CHECK: error: invalid operand
-+#CHECK: vrepb	%v0, %v0, 65536
-+
-+	vrepb	%v0, %v0, -1
-+	vrepb	%v0, %v0, 65536
-+
-+#CHECK: error: invalid operand
-+#CHECK: vrepf	%v0, %v0, -1
-+#CHECK: error: invalid operand
-+#CHECK: vrepf	%v0, %v0, 65536
-+
-+	vrepf	%v0, %v0, -1
-+	vrepf	%v0, %v0, 65536
-+
-+#CHECK: error: invalid operand
-+#CHECK: vrepg	%v0, %v0, -1
-+#CHECK: error: invalid operand
-+#CHECK: vrepg	%v0, %v0, 65536
-+
-+	vrepg	%v0, %v0, -1
-+	vrepg	%v0, %v0, 65536
-+
-+#CHECK: error: invalid operand
-+#CHECK: vreph	%v0, %v0, -1
-+#CHECK: error: invalid operand
-+#CHECK: vreph	%v0, %v0, 65536
-+
-+	vreph	%v0, %v0, -1
-+	vreph	%v0, %v0, 65536
-+
-+#CHECK: error: invalid operand
-+#CHECK: vrepib	%v0, -32769
-+#CHECK: error: invalid operand
-+#CHECK: vrepib	%v0, 32768
-+
-+	vrepib	%v0, -32769
-+	vrepib	%v0, 32768
-+
-+#CHECK: error: invalid operand
-+#CHECK: vrepif	%v0, -32769
-+#CHECK: error: invalid operand
-+#CHECK: vrepif	%v0, 32768
-+
-+	vrepif	%v0, -32769
-+	vrepif	%v0, 32768
-+
-+#CHECK: error: invalid operand
-+#CHECK: vrepig	%v0, -32769
-+#CHECK: error: invalid operand
-+#CHECK: vrepig	%v0, 32768
-+
-+	vrepig	%v0, -32769
-+	vrepig	%v0, 32768
-+
-+#CHECK: error: invalid operand
-+#CHECK: vrepih	%v0, -32769
-+#CHECK: error: invalid operand
-+#CHECK: vrepih	%v0, 32768
-+
-+	vrepih	%v0, -32769
-+	vrepih	%v0, 32768
-+
-+#CHECK: error: vector index required
-+#CHECK: vscef	%v0, 0(%r1), 0
-+#CHECK: error: vector index required
-+#CHECK: vscef	%v0, 0(%r2,%r1), 0
-+#CHECK: error: invalid operand
-+#CHECK: vscef	%v0, 0(%v0,%r1), -1
-+#CHECK: error: invalid operand
-+#CHECK: vscef	%v0, 0(%v0,%r1), 4
-+#CHECK: error: invalid operand
-+#CHECK: vscef	%v0, -1(%v0,%r1), 0
-+#CHECK: error: invalid operand
-+#CHECK: vscef	%v0, 4096(%v0,%r1), 0
-+
-+	vscef	%v0, 0(%r1), 0
-+	vscef	%v0, 0(%r2,%r1), 0
-+	vscef	%v0, 0(%v0,%r1), -1
-+	vscef	%v0, 0(%v0,%r1), 4
-+	vscef	%v0, -1(%v0,%r1), 0
-+	vscef	%v0, 4096(%v0,%r1), 0
-+
-+#CHECK: error: vector index required
-+#CHECK: vsceg	%v0, 0(%r1), 0
-+#CHECK: error: vector index required
-+#CHECK: vsceg	%v0, 0(%r2,%r1), 0
-+#CHECK: error: invalid operand
-+#CHECK: vsceg	%v0, 0(%v0,%r1), -1
-+#CHECK: error: invalid operand
-+#CHECK: vsceg	%v0, 0(%v0,%r1), 2
-+#CHECK: error: invalid operand
-+#CHECK: vsceg	%v0, -1(%v0,%r1), 0
-+#CHECK: error: invalid operand
-+#CHECK: vsceg	%v0, 4096(%v0,%r1), 0
-+
-+	vsceg	%v0, 0(%r1), 0
-+	vsceg	%v0, 0(%r2,%r1), 0
-+	vsceg	%v0, 0(%v0,%r1), -1
-+	vsceg	%v0, 0(%v0,%r1), 2
-+	vsceg	%v0, -1(%v0,%r1), 0
-+	vsceg	%v0, 4096(%v0,%r1), 0
-+
-+#CHECK: error: invalid operand
-+#CHECK: vsldb	%v0, %v0, %v0, -1
-+#CHECK: error: invalid operand
-+#CHECK: vsldb	%v0, %v0, %v0, 256
-+
-+	vsldb	%v0, %v0, %v0, -1
-+	vsldb	%v0, %v0, %v0, 256
-+
-+#CHECK: error: invalid operand
-+#CHECK: vst	%v0, -1
-+#CHECK: error: invalid operand
-+#CHECK: vst	%v0, 4096
-+#CHECK: error: invalid use of vector addressing
-+#CHECK: vst	%v0, 0(%v1,%r2)
-+
-+	vst	%v0, -1
-+	vst	%v0, 4096
-+	vst	%v0, 0(%v1,%r2)
-+
-+#CHECK: error: invalid operand
-+#CHECK: vsteb	%v0, 0, -1
-+#CHECK: error: invalid operand
-+#CHECK: vsteb	%v0, 0, 16
-+#CHECK: error: invalid operand
-+#CHECK: vsteb	%v0, -1, 0
-+#CHECK: error: invalid operand
-+#CHECK: vsteb	%v0, 4096, 0
-+#CHECK: error: invalid use of vector addressing
-+#CHECK: vsteb	%v0, 0(%v1,%r2), 0
-+
-+	vsteb	%v0, 0, -1
-+	vsteb	%v0, 0, 16
-+	vsteb	%v0, -1, 0
-+	vsteb	%v0, 4096, 0
-+	vsteb	%v0, 0(%v1,%r2), 0
-+
-+#CHECK: error: invalid operand
-+#CHECK: vstef	%v0, 0, -1
-+#CHECK: error: invalid operand
-+#CHECK: vstef	%v0, 0, 4
-+#CHECK: error: invalid operand
-+#CHECK: vstef	%v0, -1, 0
-+#CHECK: error: invalid operand
-+#CHECK: vstef	%v0, 4096, 0
-+#CHECK: error: invalid use of vector addressing
-+#CHECK: vstef	%v0, 0(%v1,%r2), 0
-+
-+	vstef	%v0, 0, -1
-+	vstef	%v0, 0, 4
-+	vstef	%v0, -1, 0
-+	vstef	%v0, 4096, 0
-+	vstef	%v0, 0(%v1,%r2), 0
-+
-+#CHECK: error: invalid operand
-+#CHECK: vsteg	%v0, 0, -1
-+#CHECK: error: invalid operand
-+#CHECK: vsteg	%v0, 0, 2
-+#CHECK: error: invalid operand
-+#CHECK: vsteg	%v0, -1, 0
-+#CHECK: error: invalid operand
-+#CHECK: vsteg	%v0, 4096, 0
-+#CHECK: error: invalid use of vector addressing
-+#CHECK: vsteg	%v0, 0(%v1,%r2), 0
-+
-+	vsteg	%v0, 0, -1
-+	vsteg	%v0, 0, 2
-+	vsteg	%v0, -1, 0
-+	vsteg	%v0, 4096, 0
-+	vsteg	%v0, 0(%v1,%r2), 0
-+
-+#CHECK: error: invalid operand
-+#CHECK: vsteh	%v0, 0, -1
-+#CHECK: error: invalid operand
-+#CHECK: vsteh	%v0, 0, 8
-+#CHECK: error: invalid operand
-+#CHECK: vsteh	%v0, -1, 0
-+#CHECK: error: invalid operand
-+#CHECK: vsteh	%v0, 4096, 0
-+#CHECK: error: invalid use of vector addressing
-+#CHECK: vsteh	%v0, 0(%v1,%r2), 0
-+
-+	vsteh	%v0, 0, -1
-+	vsteh	%v0, 0, 8
-+	vsteh	%v0, -1, 0
-+	vsteh	%v0, 4096, 0
-+	vsteh	%v0, 0(%v1,%r2), 0
-+
-+#CHECK: error: invalid operand
-+#CHECK: vstl	%v0, %r0, -1
-+#CHECK: error: invalid operand
-+#CHECK: vstl	%v0, %r0, 4096
-+#CHECK: error: %r0 used in an address
-+#CHECK: vstl	%v0, %r0, 0(%r0)
-+
-+	vstl	%v0, %r0, -1
-+	vstl	%v0, %r0, 4096
-+	vstl	%v0, %r0, 0(%r0)
-+
-+#CHECK: error: invalid operand
-+#CHECK: vstm	%v0, %v0, -1
-+#CHECK: error: invalid operand
-+#CHECK: vstm	%v0, %v0, 4096
-+
-+	vstm	%v0, %v0, -1
-+	vstm	%v0, %v0, 4096
-+
-+#CHECK: error: invalid operand
-+#CHECK: vstrcb   %v0, %v0, %v0, %v0, -1
-+#CHECK: error: invalid operand
-+#CHECK: vstrcb   %v0, %v0, %v0, %v0, 16
-+#CHECK: error: too few operands
-+#CHECK: vstrcb   %v0, %v0, %v0
-+#CHECK: error: invalid operand
-+#CHECK: vstrcb   %v0, %v0, %v0, %v0, 0, 0
-+
-+        vstrcb   %v0, %v0, %v0, %v0, -1
-+        vstrcb   %v0, %v0, %v0, %v0, 16
-+        vstrcb   %v0, %v0, %v0
-+        vstrcb   %v0, %v0, %v0, %v0, 0, 0
-+
-+#CHECK: error: invalid operand
-+#CHECK: vstrcbs  %v0, %v0, %v0, %v0, -1
-+#CHECK: error: invalid operand
-+#CHECK: vstrcbs  %v0, %v0, %v0, %v0, 16
-+#CHECK: error: too few operands
-+#CHECK: vstrcbs  %v0, %v0, %v0
-+#CHECK: error: invalid operand
-+#CHECK: vstrcbs  %v0, %v0, %v0, %v0, 0, 0
-+
-+        vstrcbs  %v0, %v0, %v0, %v0, -1
-+        vstrcbs  %v0, %v0, %v0, %v0, 16
-+        vstrcbs  %v0, %v0, %v0
-+        vstrcbs  %v0, %v0, %v0, %v0, 0, 0
-+
-+#CHECK: error: invalid operand
-+#CHECK: vstrcf   %v0, %v0, %v0, %v0, -1
-+#CHECK: error: invalid operand
-+#CHECK: vstrcf   %v0, %v0, %v0, %v0, 16
-+#CHECK: error: too few operands
-+#CHECK: vstrcf   %v0, %v0, %v0
-+#CHECK: error: invalid operand
-+#CHECK: vstrcf   %v0, %v0, %v0, %v0, 0, 0
-+
-+        vstrcf   %v0, %v0, %v0, %v0, -1
-+        vstrcf   %v0, %v0, %v0, %v0, 16
-+        vstrcf   %v0, %v0, %v0
-+        vstrcf   %v0, %v0, %v0, %v0, 0, 0
-+
-+#CHECK: error: invalid operand
-+#CHECK: vstrch   %v0, %v0, %v0, %v0, -1
-+#CHECK: error: invalid operand
-+#CHECK: vstrch   %v0, %v0, %v0, %v0, 16
-+#CHECK: error: too few operands
-+#CHECK: vstrch   %v0, %v0, %v0
-+#CHECK: error: invalid operand
-+#CHECK: vstrch   %v0, %v0, %v0, %v0, 0, 0
-+
-+        vstrch   %v0, %v0, %v0, %v0, -1
-+        vstrch   %v0, %v0, %v0, %v0, 16
-+        vstrch   %v0, %v0, %v0
-+        vstrch   %v0, %v0, %v0, %v0, 0, 0
-+
-+#CHECK: error: invalid operand
-+#CHECK: vstrczh  %v0, %v0, %v0, %v0, -1
-+#CHECK: error: invalid operand
-+#CHECK: vstrczh  %v0, %v0, %v0, %v0, 16
-+#CHECK: error: too few operands
-+#CHECK: vstrczh  %v0, %v0, %v0
-+#CHECK: error: invalid operand
-+#CHECK: vstrczh  %v0, %v0, %v0, %v0, 0, 0
-+
-+        vstrczh  %v0, %v0, %v0, %v0, -1
-+        vstrczh  %v0, %v0, %v0, %v0, 16
-+        vstrczh  %v0, %v0, %v0
-+        vstrczh  %v0, %v0, %v0, %v0, 0, 0
-+
-+#CHECK: error: invalid operand
-+#CHECK: vstrczfs %v0, %v0, %v0, %v0, -1
-+#CHECK: error: invalid operand
-+#CHECK: vstrczfs %v0, %v0, %v0, %v0, 16
-+#CHECK: error: too few operands
-+#CHECK: vstrczfs %v0, %v0, %v0
-+#CHECK: error: invalid operand
-+#CHECK: vstrczfs %v0, %v0, %v0, %v0, 0, 0
-+
-+        vstrczfs %v0, %v0, %v0, %v0, -1
-+        vstrczfs %v0, %v0, %v0, %v0, 16
-+        vstrczfs %v0, %v0, %v0
-+        vstrczfs %v0, %v0, %v0, %v0, 0, 0
-+
-+#CHECK: error: invalid operand
-+#CHECK: wcdgb	%v0, %v0, 0, -1
-+#CHECK: error: invalid operand
-+#CHECK: wcdgb	%v0, %v0, 0, 16
-+#CHECK: error: invalid operand
-+#CHECK: wcdgb	%v0, %v0, -1, 0
-+#CHECK: error: invalid operand
-+#CHECK: wcdgb	%v0, %v0, 16, 0
-+
-+	wcdgb	%v0, %v0, 0, -1
-+	wcdgb	%v0, %v0, 0, 16
-+	wcdgb	%v0, %v0, -1, 0
-+	wcdgb	%v0, %v0, 16, 0
-+
-+#CHECK: error: invalid operand
-+#CHECK: wcdlgb	%v0, %v0, 0, -1
-+#CHECK: error: invalid operand
-+#CHECK: wcdlgb	%v0, %v0, 0, 16
-+#CHECK: error: invalid operand
-+#CHECK: wcdlgb	%v0, %v0, -1, 0
-+#CHECK: error: invalid operand
-+#CHECK: wcdlgb	%v0, %v0, 16, 0
-+
-+	wcdlgb	%v0, %v0, 0, -1
-+	wcdlgb	%v0, %v0, 0, 16
-+	wcdlgb	%v0, %v0, -1, 0
-+	wcdlgb	%v0, %v0, 16, 0
-+
-+#CHECK: error: invalid operand
-+#CHECK: wcgdb	%v0, %v0, 0, -1
-+#CHECK: error: invalid operand
-+#CHECK: wcgdb	%v0, %v0, 0, 16
-+#CHECK: error: invalid operand
-+#CHECK: wcgdb	%v0, %v0, -1, 0
-+#CHECK: error: invalid operand
-+#CHECK: wcgdb	%v0, %v0, 16, 0
-+
-+	wcgdb	%v0, %v0, 0, -1
-+	wcgdb	%v0, %v0, 0, 16
-+	wcgdb	%v0, %v0, -1, 0
-+	wcgdb	%v0, %v0, 16, 0
-+
-+#CHECK: error: invalid operand
-+#CHECK: wclgdb	%v0, %v0, 0, -1
-+#CHECK: error: invalid operand
-+#CHECK: wclgdb	%v0, %v0, 0, 16
-+#CHECK: error: invalid operand
-+#CHECK: wclgdb	%v0, %v0, -1, 0
-+#CHECK: error: invalid operand
-+#CHECK: wclgdb	%v0, %v0, 16, 0
-+
-+	wclgdb	%v0, %v0, 0, -1
-+	wclgdb	%v0, %v0, 0, 16
-+	wclgdb	%v0, %v0, -1, 0
-+	wclgdb	%v0, %v0, 16, 0
-+
-+#CHECK: error: invalid operand
-+#CHECK: wfidb	%v0, %v0, 0, -1
-+#CHECK: error: invalid operand
-+#CHECK: wfidb	%v0, %v0, 0, 16
-+#CHECK: error: invalid operand
-+#CHECK: wfidb	%v0, %v0, -1, 0
-+#CHECK: error: invalid operand
-+#CHECK: wfidb	%v0, %v0, 16, 0
-+
-+	wfidb	%v0, %v0, 0, -1
-+	wfidb	%v0, %v0, 0, 16
-+	wfidb	%v0, %v0, -1, 0
-+	wfidb	%v0, %v0, 16, 0
-+
-+#CHECK: error: invalid operand
-+#CHECK: wftcidb	%v0, %v0, -1
-+#CHECK: error: invalid operand
-+#CHECK: wftcidb	%v0, %v0, 4096
-+
-+	wftcidb	%v0, %v0, -1
-+	wftcidb	%v0, %v0, 4096
-+
-+#CHECK: error: invalid operand
-+#CHECK: wledb	%v0, %v0, 0, -1
-+#CHECK: error: invalid operand
-+#CHECK: wledb	%v0, %v0, 0, 16
-+#CHECK: error: invalid operand
-+#CHECK: wledb	%v0, %v0, -1, 0
-+#CHECK: error: invalid operand
-+#CHECK: wledb	%v0, %v0, 16, 0
-+
-+	wledb	%v0, %v0, 0, -1
-+	wledb	%v0, %v0, 0, 16
-+	wledb	%v0, %v0, -1, 0
-+	wledb	%v0, %v0, 16, 0
-Index: llvm-36/test/MC/SystemZ/insn-bad-z196.s
-===================================================================
---- llvm-36.orig/test/MC/SystemZ/insn-bad-z196.s
-+++ llvm-36/test/MC/SystemZ/insn-bad-z196.s
-@@ -244,6 +244,11 @@
- 	cxlgbr	%f0, 16, %r0, 0
- 	cxlgbr	%f2, 0, %r0, 0
- 
-+#CHECK: error: {{(instruction requires: transactional-execution)?}}
-+#CHECK: etnd	%r7
-+
-+	etnd	%r7
-+
- #CHECK: error: invalid operand
- #CHECK: fidbra	%f0, 0, %f0, -1
- #CHECK: error: invalid operand
-@@ -546,6 +551,21 @@
- 	locr	%r0,%r0,-1
- 	locr	%r0,%r0,16
- 
-+#CHECK: error: {{(instruction requires: transactional-execution)?}}
-+#CHECK: ntstg	%r0, 524287(%r1,%r15)
-+
-+	ntstg	%r0, 524287(%r1,%r15)
-+
-+#CHECK: error: {{(instruction requires: processor-assist)?}}
-+#CHECK: ppa	%r4, %r6, 7
-+
-+	ppa	%r4, %r6, 7
-+
-+#CHECK: error: {{(instruction requires: miscellaneous-extensions)?}}
-+#CHECK: risbgn	%r1, %r2, 0, 0, 0
-+
-+	risbgn	%r1, %r2, 0, 0, 0
-+
- #CHECK: error: invalid operand
- #CHECK: risbhg	%r0,%r0,0,0,-1
- #CHECK: error: invalid operand
-@@ -685,3 +705,24 @@
- 	stocg	%r0,-524289,1
- 	stocg	%r0,524288,1
- 	stocg	%r0,0(%r1,%r2),1
-+
-+#CHECK: error: {{(instruction requires: transactional-execution)?}}
-+#CHECK: tabort	4095(%r1)
-+
-+	tabort	4095(%r1)
-+
-+#CHECK: error: {{(instruction requires: transactional-execution)?}}
-+#CHECK: tbegin	4095(%r1), 42
-+
-+	tbegin	4095(%r1), 42
-+
-+#CHECK: error: {{(instruction requires: transactional-execution)?}}
-+#CHECK: tbeginc	4095(%r1), 42
-+
-+	tbeginc	4095(%r1), 42
-+
-+#CHECK: error: {{(instruction requires: transactional-execution)?}}
-+#CHECK: tend
-+
-+	tend
-+
-Index: llvm-36/test/MC/SystemZ/insn-bad-zEC12.s
-===================================================================
---- /dev/null
-+++ llvm-36/test/MC/SystemZ/insn-bad-zEC12.s
-@@ -0,0 +1,1578 @@
-+# For zEC12 only.
-+# RUN: not llvm-mc -triple s390x-linux-gnu -mcpu=zEC12 < %s 2> %t
-+# RUN: FileCheck < %t %s
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: lcbb	%r0, 0, 0
-+
-+	lcbb	%r0, 0, 0
-+
-+#CHECK: error: invalid operand
-+#CHECK: ntstg	%r0, -524289
-+#CHECK: error: invalid operand
-+#CHECK: ntstg	%r0, 524288
-+
-+	ntstg	%r0, -524289
-+	ntstg	%r0, 524288
-+
-+#CHECK: error: invalid operand
-+#CHECK: ppa	%r0, %r0, -1
-+#CHECK: error: invalid operand
-+#CHECK: ppa	%r0, %r0, 16
-+
-+	ppa	%r0, %r0, -1
-+	ppa	%r0, %r0, 16
-+
-+#CHECK: error: invalid operand
-+#CHECK: risbgn	%r0,%r0,0,0,-1
-+#CHECK: error: invalid operand
-+#CHECK: risbgn	%r0,%r0,0,0,64
-+#CHECK: error: invalid operand
-+#CHECK: risbgn	%r0,%r0,0,-1,0
-+#CHECK: error: invalid operand
-+#CHECK: risbgn	%r0,%r0,0,256,0
-+#CHECK: error: invalid operand
-+#CHECK: risbgn	%r0,%r0,-1,0,0
-+#CHECK: error: invalid operand
-+#CHECK: risbgn	%r0,%r0,256,0,0
-+
-+	risbgn	%r0,%r0,0,0,-1
-+	risbgn	%r0,%r0,0,0,64
-+	risbgn	%r0,%r0,0,-1,0
-+	risbgn	%r0,%r0,0,256,0
-+	risbgn	%r0,%r0,-1,0,0
-+	risbgn	%r0,%r0,256,0,0
-+
-+#CHECK: error: invalid operand
-+#CHECK: tabort	-1
-+#CHECK: error: invalid operand
-+#CHECK: tabort	4096
-+#CHECK: error: invalid use of indexed addressing
-+#CHECK: tabort	0(%r1,%r2)
-+
-+	tabort	-1
-+	tabort	4096
-+	tabort	0(%r1,%r2)
-+
-+#CHECK: error: invalid operand
-+#CHECK: tbegin	-1, 0
-+#CHECK: error: invalid operand
-+#CHECK: tbegin	4096, 0
-+#CHECK: error: invalid use of indexed addressing
-+#CHECK: tbegin	0(%r1,%r2), 0
-+#CHECK: error: invalid operand
-+#CHECK: tbegin	0, -1
-+#CHECK: error: invalid operand
-+#CHECK: tbegin	0, 65536
-+
-+	tbegin	-1, 0
-+	tbegin	4096, 0
-+	tbegin	0(%r1,%r2), 0
-+	tbegin	0, -1
-+	tbegin	0, 65536
-+
-+#CHECK: error: invalid operand
-+#CHECK: tbeginc	-1, 0
-+#CHECK: error: invalid operand
-+#CHECK: tbeginc	4096, 0
-+#CHECK: error: invalid use of indexed addressing
-+#CHECK: tbeginc	0(%r1,%r2), 0
-+#CHECK: error: invalid operand
-+#CHECK: tbeginc	0, -1
-+#CHECK: error: invalid operand
-+#CHECK: tbeginc	0, 65536
-+
-+	tbeginc	-1, 0
-+	tbeginc	4096, 0
-+	tbeginc	0(%r1,%r2), 0
-+	tbeginc	0, -1
-+	tbeginc	0, 65536
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vab	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vaf	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vag	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vah	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vaq	%v0, %v0, %v0
-+
-+	vab	%v0, %v0, %v0
-+	vaf	%v0, %v0, %v0
-+	vag	%v0, %v0, %v0
-+	vah	%v0, %v0, %v0
-+	vaq	%v0, %v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vaccb	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vaccf	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vaccg	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vacch	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vaccq	%v0, %v0, %v0
-+
-+	vaccb	%v0, %v0, %v0
-+	vaccf	%v0, %v0, %v0
-+	vaccg	%v0, %v0, %v0
-+	vacch	%v0, %v0, %v0
-+	vaccq	%v0, %v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vacccq	%v0, %v0, %v0
-+
-+	vacccq	%v0, %v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vacq	%v0, %v0, %v0
-+
-+	vacq	%v0, %v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vavgb	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vavgf	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vavgg	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vavgh	%v0, %v0, %v0
-+
-+	vavgb	%v0, %v0, %v0
-+	vavgf	%v0, %v0, %v0
-+	vavgg	%v0, %v0, %v0
-+	vavgh	%v0, %v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vavglb	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vavglf	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vavglg	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vavglh	%v0, %v0, %v0
-+
-+	vavglb	%v0, %v0, %v0
-+	vavglf	%v0, %v0, %v0
-+	vavglg	%v0, %v0, %v0
-+	vavglh	%v0, %v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vcdgb	%v0, %v0, 0, 0
-+
-+	vcdgb	%v0, %v0, 0, 0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vcdlgb	%v0, %v0, 0, 0
-+
-+	vcdlgb	%v0, %v0, 0, 0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vceqb	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vceqf	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vceqg	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vceqh	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vceqbs	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vceqhs	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vceqfs	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vceqgs	%v0, %v0, %v0
-+
-+	vceqb	%v0, %v0, %v0
-+	vceqf	%v0, %v0, %v0
-+	vceqg	%v0, %v0, %v0
-+	vceqh	%v0, %v0, %v0
-+	vceqbs	%v0, %v0, %v0
-+	vceqhs	%v0, %v0, %v0
-+	vceqfs	%v0, %v0, %v0
-+	vceqgs	%v0, %v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vcgdb	%v0, %v0, 0, 0
-+
-+	vcgdb	%v0, %v0, 0, 0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vchb	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vchf	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vchg	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vchh	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vchbs	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vchhs	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vchfs	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vchgs	%v0, %v0, %v0
-+
-+	vchb	%v0, %v0, %v0
-+	vchf	%v0, %v0, %v0
-+	vchg	%v0, %v0, %v0
-+	vchh	%v0, %v0, %v0
-+	vchbs	%v0, %v0, %v0
-+	vchhs	%v0, %v0, %v0
-+	vchfs	%v0, %v0, %v0
-+	vchgs	%v0, %v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vchlb	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vchlf	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vchlg	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vchlh	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vchlbs	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vchlhs	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vchlfs	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vchlgs	%v0, %v0, %v0
-+
-+	vchlb	%v0, %v0, %v0
-+	vchlf	%v0, %v0, %v0
-+	vchlg	%v0, %v0, %v0
-+	vchlh	%v0, %v0, %v0
-+	vchlbs	%v0, %v0, %v0
-+	vchlhs	%v0, %v0, %v0
-+	vchlfs	%v0, %v0, %v0
-+	vchlgs	%v0, %v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vcksm	%v0, %v0, %v0
-+
-+	vcksm	%v0, %v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vclgdb	%v0, %v0, 0, 0
-+
-+	vclgdb	%v0, %v0, 0, 0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vclzb	%v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vclzf	%v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vclzg	%v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vclzh	%v0, %v0
-+
-+	vclzb	%v0, %v0
-+	vclzf	%v0, %v0
-+	vclzg	%v0, %v0
-+	vclzh	%v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vctzb	%v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vctzf	%v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vctzg	%v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vctzh	%v0, %v0
-+
-+	vctzb	%v0, %v0
-+	vctzf	%v0, %v0
-+	vctzg	%v0, %v0
-+	vctzh	%v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vecb	%v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vecf	%v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vecg	%v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vech	%v0, %v0
-+
-+	vecb	%v0, %v0
-+	vecf	%v0, %v0
-+	vecg	%v0, %v0
-+	vech	%v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: verimb	%v0, %v0, %v0, 0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: verimf	%v0, %v0, %v0, 0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: verimg	%v0, %v0, %v0, 0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: verimh	%v0, %v0, %v0, 0
-+
-+	verimb	%v0, %v0, %v0, 0
-+	verimf	%v0, %v0, %v0, 0
-+	verimg	%v0, %v0, %v0, 0
-+	verimh	%v0, %v0, %v0, 0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: veclb	%v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: veclf	%v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: veclg	%v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: veclh	%v0, %v0
-+
-+	veclb	%v0, %v0
-+	veclf	%v0, %v0
-+	veclg	%v0, %v0
-+	veclh	%v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: verllvb	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: verllvf	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: verllvg	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: verllvh	%v0, %v0, %v0
-+
-+	verllvb	%v0, %v0, %v0
-+	verllvf	%v0, %v0, %v0
-+	verllvg	%v0, %v0, %v0
-+	verllvh	%v0, %v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: verllb	%v0, %v0, 0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: verllf	%v0, %v0, 0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: verllg	%v0, %v0, 0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: verllh	%v0, %v0, 0
-+
-+	verllb	%v0, %v0, 0
-+	verllf	%v0, %v0, 0
-+	verllg	%v0, %v0, 0
-+	verllh	%v0, %v0, 0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: veslvb	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: veslvf	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: veslvg	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: veslvh	%v0, %v0, %v0
-+
-+	veslvb	%v0, %v0, %v0
-+	veslvf	%v0, %v0, %v0
-+	veslvg	%v0, %v0, %v0
-+	veslvh	%v0, %v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: veslb	%v0, %v0, 0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: veslf	%v0, %v0, 0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: veslg	%v0, %v0, 0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: veslh	%v0, %v0, 0
-+
-+	veslb	%v0, %v0, 0
-+	veslf	%v0, %v0, 0
-+	veslg	%v0, %v0, 0
-+	veslh	%v0, %v0, 0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vesravb	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vesravf	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vesravg	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vesravh	%v0, %v0, %v0
-+
-+	vesravb	%v0, %v0, %v0
-+	vesravf	%v0, %v0, %v0
-+	vesravg	%v0, %v0, %v0
-+	vesravh	%v0, %v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vesrab	%v0, %v0, 0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vesraf	%v0, %v0, 0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vesrag	%v0, %v0, 0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vesrah	%v0, %v0, 0
-+
-+	vesrab	%v0, %v0, 0
-+	vesraf	%v0, %v0, 0
-+	vesrag	%v0, %v0, 0
-+	vesrah	%v0, %v0, 0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vesrlvb	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vesrlvf	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vesrlvg	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vesrlvh	%v0, %v0, %v0
-+
-+	vesrlvb	%v0, %v0, %v0
-+	vesrlvf	%v0, %v0, %v0
-+	vesrlvg	%v0, %v0, %v0
-+	vesrlvh	%v0, %v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vesrlb	%v0, %v0, 0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vesrlf	%v0, %v0, 0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vesrlg	%v0, %v0, 0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vesrlh	%v0, %v0, 0
-+
-+	vesrlb	%v0, %v0, 0
-+	vesrlf	%v0, %v0, 0
-+	vesrlg	%v0, %v0, 0
-+	vesrlh	%v0, %v0, 0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vfadb	%v0, %v0, %v0
-+
-+	vfadb	%v0, %v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vfcedb	%v0, %v0, %v0
-+#CHECK: vfcedbs	%v0, %v0, %v0
-+
-+	vfcedb	%v0, %v0, %v0
-+	vfcedbs	%v0, %v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vfchdb	%v0, %v0, %v0
-+#CHECK: vfchdbs	%v0, %v0, %v0
-+
-+	vfchdb	%v0, %v0, %v0
-+	vfchdbs	%v0, %v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vfddb	%v0, %v0, %v0
-+
-+	vfddb	%v0, %v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vfaeb	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vfaezb	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vfaebs	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vfaezbs	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vfaeh	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vfaezh	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vfaehs	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vfaezhs	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vfaef	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vfaezf	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vfaefs	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vfaezfs	%v0, %v0, %v0
-+
-+	vfaeb	%v0, %v0, %v0
-+	vfaezb	%v0, %v0, %v0
-+	vfaebs	%v0, %v0, %v0
-+	vfaezbs	%v0, %v0, %v0
-+	vfaeh	%v0, %v0, %v0
-+	vfaezh	%v0, %v0, %v0
-+	vfaehs	%v0, %v0, %v0
-+	vfaezhs	%v0, %v0, %v0
-+	vfaef	%v0, %v0, %v0
-+	vfaezf	%v0, %v0, %v0
-+	vfaefs	%v0, %v0, %v0
-+	vfaezfs	%v0, %v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vfeeb	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vfeezb	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vfeebs	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vfeezbs	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vfeeh	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vfeezh	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vfeehs	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vfeezhs	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vfeef	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vfeezf	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vfeefs	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vfeezfs	%v0, %v0, %v0
-+
-+	vfeeb	%v0, %v0, %v0
-+	vfeezb	%v0, %v0, %v0
-+	vfeebs	%v0, %v0, %v0
-+	vfeezbs	%v0, %v0, %v0
-+	vfeeh	%v0, %v0, %v0
-+	vfeezh	%v0, %v0, %v0
-+	vfeehs	%v0, %v0, %v0
-+	vfeezhs	%v0, %v0, %v0
-+	vfeef	%v0, %v0, %v0
-+	vfeezf	%v0, %v0, %v0
-+	vfeefs	%v0, %v0, %v0
-+	vfeezfs	%v0, %v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vfeneb   %v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vfenezb  %v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vfenebs  %v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vfenezbs %v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vfeneh   %v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vfenezh  %v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vfenehs  %v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vfenezhs %v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vfenef   %v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vfenezf  %v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vfenefs  %v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vfenezfs %v0, %v0, %v0
-+
-+	vfeneb   %v0, %v0, %v0
-+	vfenezb  %v0, %v0, %v0
-+	vfenebs  %v0, %v0, %v0
-+	vfenezbs %v0, %v0, %v0
-+	vfeneh   %v0, %v0, %v0
-+	vfenezh  %v0, %v0, %v0
-+	vfenehs  %v0, %v0, %v0
-+	vfenezhs %v0, %v0, %v0
-+	vfenef   %v0, %v0, %v0
-+	vfenezf  %v0, %v0, %v0
-+	vfenefs  %v0, %v0, %v0
-+	vfenezfs %v0, %v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vfidb	%v0, %v0, 0, 0
-+
-+	vfidb	%v0, %v0, 0, 0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vistrb	%v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vistrbs	%v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vistrh	%v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vistrhs	%v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vistrf	%v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vistrfs	%v0, %v0
-+
-+	vistrb	%v0, %v0
-+	vistrbs	%v0, %v0
-+	vistrh	%v0, %v0
-+	vistrhs	%v0, %v0
-+	vistrf	%v0, %v0
-+	vistrfs	%v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vflcdb	%v0, %v0
-+
-+	vflcdb	%v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vflndb	%v0, %v0
-+
-+	vflndb	%v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vflpdb	%v0, %v0
-+
-+	vflpdb	%v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vfmadb	%v0, %v0, %v0, %v0
-+
-+	vfmadb	%v0, %v0, %v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vfmdb	%v0, %v0, %v0
-+
-+	vfmdb	%v0, %v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vfmsdb	%v0, %v0, %v0, %v0
-+
-+	vfmsdb	%v0, %v0, %v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vfsdb	%v0, %v0, %v0
-+
-+	vfsdb	%v0, %v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vfsqdb	%v0, %v0
-+
-+	vfsqdb	%v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vftcidb	%v0, %v0, 0
-+
-+	vftcidb	%v0, %v0, 0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vgbm	%v0, 0
-+
-+	vgbm	%v0, 0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vgef	%v0, 0(%v0, %r1), 0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vgeg	%v0, 0(%v0, %r1), 0
-+
-+	vgef	%v0, 0(%v0, %r1), 0
-+	vgeg	%v0, 0(%v0, %r1), 0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vgfmab	%v0, %v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vgfmaf	%v0, %v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vgfmag	%v0, %v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vgfmah	%v0, %v0, %v0, %v0
-+
-+	vgfmab	%v0, %v0, %v0, %v0
-+	vgfmaf	%v0, %v0, %v0, %v0
-+	vgfmag	%v0, %v0, %v0, %v0
-+	vgfmah	%v0, %v0, %v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vgfmb	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vgfmf	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vgfmg	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vgfmh	%v0, %v0, %v0
-+
-+	vgfmb	%v0, %v0, %v0
-+	vgfmf	%v0, %v0, %v0
-+	vgfmg	%v0, %v0, %v0
-+	vgfmh	%v0, %v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vgmb	%v0, 0, 0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vgmf	%v0, 0, 0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vgmg	%v0, 0, 0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vgmh	%v0, 0, 0
-+
-+	vgmb	%v0, 0, 0
-+	vgmf	%v0, 0, 0
-+	vgmg	%v0, 0, 0
-+	vgmh	%v0, 0, 0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vl	%v0, 0
-+
-+	vl	%v0, 0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vlbb	%v0, 0, 0
-+
-+	vlbb	%v0, 0, 0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vlcb	%v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vlcf	%v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vlcg	%v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vlch	%v0, %v0
-+
-+	vlcb	%v0, %v0
-+	vlcf	%v0, %v0
-+	vlcg	%v0, %v0
-+	vlch	%v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vldeb	%v0, %v0
-+
-+	vldeb	%v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vleb	%v0, 0, 0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vlef	%v0, 0, 0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vleg	%v0, 0, 0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vleh	%v0, 0, 0
-+
-+	vleb	%v0, 0, 0
-+	vlef	%v0, 0, 0
-+	vleg	%v0, 0, 0
-+	vleh	%v0, 0, 0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vledb	%v0, %v0, 0, 0
-+
-+	vledb	%v0, %v0, 0, 0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vleib	%v0, 0, 0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vleif	%v0, 0, 0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vleig	%v0, 0, 0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vleih	%v0, 0, 0
-+
-+	vleib	%v0, 0, 0
-+	vleif	%v0, 0, 0
-+	vleig	%v0, 0, 0
-+	vleih	%v0, 0, 0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vlgvb	%r0, %v0, 0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vlgvf	%r0, %v0, 0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vlgvg	%r0, %v0, 0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vlgvh	%r0, %v0, 0
-+
-+	vlgvb	%r0, %v0, 0
-+	vlgvf	%r0, %v0, 0
-+	vlgvg	%r0, %v0, 0
-+	vlgvh	%r0, %v0, 0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vll	%v0, %r0, 0
-+
-+	vll	%v0, %r0, 0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vllezb	%v0, 0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vllezf	%v0, 0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vllezg	%v0, 0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vllezh	%v0, 0
-+
-+	vllezb	%v0, 0
-+	vllezf	%v0, 0
-+	vllezg	%v0, 0
-+	vllezh	%v0, 0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vlm	%v0, %v0, 0
-+
-+	vlm	%v0, %v0, 0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vlpb	%v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vlpf	%v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vlpg	%v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vlph	%v0, %v0
-+
-+	vlpb	%v0, %v0
-+	vlpf	%v0, %v0
-+	vlpg	%v0, %v0
-+	vlph	%v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vlr	%v0, %v0
-+
-+	vlr	%v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vlrepb	%v0, 0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vlrepf	%v0, 0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vlrepg	%v0, 0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vlreph	%v0, 0
-+
-+	vlrepb	%v0, 0
-+	vlrepf	%v0, 0
-+	vlrepg	%v0, 0
-+	vlreph	%v0, 0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vlvgb	%v0, %r0, 0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vlvgf	%v0, %r0, 0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vlvgg	%v0, %r0, 0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vlvgh	%v0, %r0, 0
-+
-+	vlvgb	%v0, %r0, 0
-+	vlvgf	%v0, %r0, 0
-+	vlvgg	%v0, %r0, 0
-+	vlvgh	%v0, %r0, 0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vlvgp	%v0, %r0, %r0
-+
-+	vlvgp	%v0, %r0, %r0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vmaeb	%v0, %v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vmaef	%v0, %v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vmaeh	%v0, %v0, %v0, %v0
-+
-+	vmaeb	%v0, %v0, %v0, %v0
-+	vmaef	%v0, %v0, %v0, %v0
-+	vmaeh	%v0, %v0, %v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vmahb	%v0, %v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vmahf	%v0, %v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vmahh	%v0, %v0, %v0, %v0
-+
-+	vmahb	%v0, %v0, %v0, %v0
-+	vmahf	%v0, %v0, %v0, %v0
-+	vmahh	%v0, %v0, %v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vmalb	%v0, %v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vmalf	%v0, %v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vmalhw	%v0, %v0, %v0, %v0
-+
-+	vmalb	%v0, %v0, %v0, %v0
-+	vmalf	%v0, %v0, %v0, %v0
-+	vmalhw	%v0, %v0, %v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vmaleb	%v0, %v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vmalef	%v0, %v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vmaleh	%v0, %v0, %v0, %v0
-+
-+	vmaleb	%v0, %v0, %v0, %v0
-+	vmalef	%v0, %v0, %v0, %v0
-+	vmaleh	%v0, %v0, %v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vmalhb	%v0, %v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vmalhf	%v0, %v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vmalhh	%v0, %v0, %v0, %v0
-+
-+	vmalhb	%v0, %v0, %v0, %v0
-+	vmalhf	%v0, %v0, %v0, %v0
-+	vmalhh	%v0, %v0, %v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vmalob	%v0, %v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vmalof	%v0, %v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vmaloh	%v0, %v0, %v0, %v0
-+
-+	vmalob	%v0, %v0, %v0, %v0
-+	vmalof	%v0, %v0, %v0, %v0
-+	vmaloh	%v0, %v0, %v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vmaob	%v0, %v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vmaof	%v0, %v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vmaoh	%v0, %v0, %v0, %v0
-+
-+	vmaob	%v0, %v0, %v0, %v0
-+	vmaof	%v0, %v0, %v0, %v0
-+	vmaoh	%v0, %v0, %v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vmeb	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vmef	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vmeh	%v0, %v0, %v0
-+
-+	vmeb	%v0, %v0, %v0
-+	vmef	%v0, %v0, %v0
-+	vmeh	%v0, %v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vmhb	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vmhf	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vmhh	%v0, %v0, %v0
-+
-+	vmhb	%v0, %v0, %v0
-+	vmhf	%v0, %v0, %v0
-+	vmhh	%v0, %v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vmlb	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vmlf	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vmlh	%v0, %v0, %v0
-+
-+	vmlb	%v0, %v0, %v0
-+	vmlf	%v0, %v0, %v0
-+	vmlh	%v0, %v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vmleb	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vmlef	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vmleh	%v0, %v0, %v0
-+
-+	vmleb	%v0, %v0, %v0
-+	vmlef	%v0, %v0, %v0
-+	vmleh	%v0, %v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vmlhb	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vmlhf	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vmlhh	%v0, %v0, %v0
-+
-+	vmlhb	%v0, %v0, %v0
-+	vmlhf	%v0, %v0, %v0
-+	vmlhh	%v0, %v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vmlob	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vmlof	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vmloh	%v0, %v0, %v0
-+
-+	vmlob	%v0, %v0, %v0
-+	vmlof	%v0, %v0, %v0
-+	vmloh	%v0, %v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vmnb	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vmnf	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vmng	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vmnh	%v0, %v0, %v0
-+
-+	vmnb	%v0, %v0, %v0
-+	vmnf	%v0, %v0, %v0
-+	vmng	%v0, %v0, %v0
-+	vmnh	%v0, %v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vmnlb	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vmnlf	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vmnlg	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vmnlh	%v0, %v0, %v0
-+
-+	vmnlb	%v0, %v0, %v0
-+	vmnlf	%v0, %v0, %v0
-+	vmnlg	%v0, %v0, %v0
-+	vmnlh	%v0, %v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vmob	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vmof	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vmoh	%v0, %v0, %v0
-+
-+	vmob	%v0, %v0, %v0
-+	vmof	%v0, %v0, %v0
-+	vmoh	%v0, %v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vmrhb	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vmrhf	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vmrhg	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vmrhh	%v0, %v0, %v0
-+
-+	vmrhb	%v0, %v0, %v0
-+	vmrhf	%v0, %v0, %v0
-+	vmrhg	%v0, %v0, %v0
-+	vmrhh	%v0, %v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vmrlb	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vmrlf	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vmrlg	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vmrlh	%v0, %v0, %v0
-+
-+	vmrlb	%v0, %v0, %v0
-+	vmrlf	%v0, %v0, %v0
-+	vmrlg	%v0, %v0, %v0
-+	vmrlh	%v0, %v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vmxb	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vmxf	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vmxg	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vmxh	%v0, %v0, %v0
-+
-+	vmxb	%v0, %v0, %v0
-+	vmxf	%v0, %v0, %v0
-+	vmxg	%v0, %v0, %v0
-+	vmxh	%v0, %v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vmxlb	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vmxlf	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vmxlg	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vmxlh	%v0, %v0, %v0
-+
-+	vmxlb	%v0, %v0, %v0
-+	vmxlf	%v0, %v0, %v0
-+	vmxlg	%v0, %v0, %v0
-+	vmxlh	%v0, %v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vn	%v0, %v0, %v0
-+
-+	vn	%v0, %v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vnc	%v0, %v0, %v0
-+
-+	vnc	%v0, %v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vno	%v0, %v0, %v0
-+
-+	vno	%v0, %v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vo	%v0, %v0, %v0
-+
-+	vo	%v0, %v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vone	%v0
-+
-+	vone	%v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vpdi	%v0, %v0, %v0, 0
-+
-+	vpdi	%v0, %v0, %v0, 0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vperm	%v0, %v0, %v0, %v0
-+
-+	vperm	%v0, %v0, %v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vpkf	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vpkg	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vpkh	%v0, %v0, %v0
-+
-+	vpkf	%v0, %v0, %v0
-+	vpkg	%v0, %v0, %v0
-+	vpkh	%v0, %v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vpksf	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vpksg	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vpksh	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vpksfs	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vpksgs	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vpkshs	%v0, %v0, %v0
-+
-+	vpksf	%v0, %v0, %v0
-+	vpksg	%v0, %v0, %v0
-+	vpksh	%v0, %v0, %v0
-+	vpksfs	%v0, %v0, %v0
-+	vpksgs	%v0, %v0, %v0
-+	vpkshs	%v0, %v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vpklsf	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vpklsg	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vpklsh	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vpklsfs	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vpklsgs	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vpklshs	%v0, %v0, %v0
-+
-+	vpklsf	%v0, %v0, %v0
-+	vpklsg	%v0, %v0, %v0
-+	vpklsh	%v0, %v0, %v0
-+	vpklsfs	%v0, %v0, %v0
-+	vpklsgs	%v0, %v0, %v0
-+	vpklshs	%v0, %v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vpopct	%v0, %v0
-+
-+	vpopct	%v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vrepb	%v0, %v0, 0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vrepf	%v0, %v0, 0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vrepg	%v0, %v0, 0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vreph	%v0, %v0, 0
-+
-+	vrepb	%v0, %v0, 0
-+	vrepf	%v0, %v0, 0
-+	vrepg	%v0, %v0, 0
-+	vreph	%v0, %v0, 0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vrepib	%v0, 0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vrepif	%v0, 0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vrepig	%v0, 0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vrepih	%v0, 0
-+
-+	vrepib	%v0, 0
-+	vrepif	%v0, 0
-+	vrepig	%v0, 0
-+	vrepih	%v0, 0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vsb	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vsf	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vsg	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vsh	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vsq	%v0, %v0, %v0
-+
-+	vsb	%v0, %v0, %v0
-+	vsf	%v0, %v0, %v0
-+	vsg	%v0, %v0, %v0
-+	vsh	%v0, %v0, %v0
-+	vsq	%v0, %v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vsbcbiq	%v0, %v0, %v0
-+
-+	vsbcbiq	%v0, %v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vsbiq	%v0, %v0, %v0
-+
-+	vsbiq	%v0, %v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vscbib	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vscbif	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vscbig	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vscbih	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vscbiq	%v0, %v0, %v0
-+
-+	vscbib	%v0, %v0, %v0
-+	vscbif	%v0, %v0, %v0
-+	vscbig	%v0, %v0, %v0
-+	vscbih	%v0, %v0, %v0
-+	vscbiq	%v0, %v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vscef	%v0, 0(%v0, %r1), 0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vsceg	%v0, 0(%v0, %r1), 0
-+
-+	vscef	%v0, 0(%v0, %r1), 0
-+	vsceg	%v0, 0(%v0, %r1), 0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vsegb	%v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vsegf	%v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vsegh	%v0, %v0
-+
-+	vsegb	%v0, %v0
-+	vsegf	%v0, %v0
-+	vsegh	%v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vsel	%v0, %v0, %v0, %v0
-+
-+	vsel	%v0, %v0, %v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vsl	%v0, %v0, %v0
-+
-+	vsl	%v0, %v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vslb	%v0, %v0, %v0
-+
-+	vslb	%v0, %v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vsldb	%v0, %v0, %v0, 0
-+
-+	vsldb	%v0, %v0, %v0, 0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vsra	%v0, %v0, %v0
-+
-+	vsra	%v0, %v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vsrab	%v0, %v0, %v0
-+
-+	vsrab	%v0, %v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vsrl	%v0, %v0, %v0
-+
-+	vsrl	%v0, %v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vsrlb	%v0, %v0, %v0
-+
-+	vsrlb	%v0, %v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vst	%v0, 0
-+
-+	vst	%v0, 0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vstl	%v0, %r0, 0
-+
-+	vstl	%v0, %r0, 0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vstm	%v0, %v0, 0
-+
-+	vstm	%v0, %v0, 0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vstrcb   %v0, %v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vstrczb  %v0, %v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vstrcbs  %v0, %v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vstrczbs %v0, %v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vstrch   %v0, %v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vstrczh  %v0, %v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vstrchs  %v0, %v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vstrczhs %v0, %v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vstrcf   %v0, %v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vstrczf  %v0, %v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vstrcfs  %v0, %v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vstrczfs %v0, %v0, %v0, %v0
-+
-+        vstrcb   %v0, %v0, %v0, %v0
-+        vstrczb  %v0, %v0, %v0, %v0
-+        vstrcbs  %v0, %v0, %v0, %v0
-+        vstrczbs %v0, %v0, %v0, %v0
-+        vstrch   %v0, %v0, %v0, %v0
-+        vstrczh  %v0, %v0, %v0, %v0
-+        vstrchs  %v0, %v0, %v0, %v0
-+        vstrczhs %v0, %v0, %v0, %v0
-+        vstrcf   %v0, %v0, %v0, %v0
-+        vstrczf  %v0, %v0, %v0, %v0
-+        vstrcfs  %v0, %v0, %v0, %v0
-+        vstrczfs %v0, %v0, %v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vsumgh	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vsumgf	%v0, %v0, %v0
-+
-+	vsumgh	%v0, %v0, %v0
-+	vsumgf	%v0, %v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vsumqf	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vsumqg	%v0, %v0, %v0
-+
-+	vsumqf	%v0, %v0, %v0
-+	vsumqg	%v0, %v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vsumb	%v0, %v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vsumh	%v0, %v0, %v0
-+
-+	vsumb	%v0, %v0, %v0
-+	vsumh	%v0, %v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vtm	%v0, %v0
-+
-+	vtm	%v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vuphb	%v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vuphf	%v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vuphh	%v0, %v0
-+
-+	vuphb	%v0, %v0
-+	vuphf	%v0, %v0
-+	vuphh	%v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vuplhb	%v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vuplhf	%v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vuplhh	%v0, %v0
-+
-+	vuplhb	%v0, %v0
-+	vuplhf	%v0, %v0
-+	vuplhh	%v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vuplb	%v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vuplf	%v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vuplhw	%v0, %v0
-+
-+	vuplb	%v0, %v0
-+	vuplf	%v0, %v0
-+	vuplhw	%v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vupllb	%v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vupllf	%v0, %v0
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vupllh	%v0, %v0
-+
-+	vupllb	%v0, %v0
-+	vupllf	%v0, %v0
-+	vupllh	%v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vx	%v0, %v0, %v0
-+
-+	vx	%v0, %v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: vzero	%v0
-+
-+	vzero	%v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: wcdgb	%v0, %v0, 0, 0
-+
-+	wcdgb	%v0, %v0, 0, 0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: wcdlgb	%v0, %v0, 0, 0
-+
-+	wcdlgb	%v0, %v0, 0, 0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: wcgdb	%v0, %v0, 0, 0
-+
-+	wcgdb	%v0, %v0, 0, 0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: wclgdb	%v0, %v0, 0, 0
-+
-+	wclgdb	%v0, %v0, 0, 0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: wfadb	%v0, %v0, %v0
-+
-+	wfadb	%v0, %v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: wfcdb	%v0, %v0
-+
-+	wfcdb	%v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: wfcedb	%v0, %v0, %v0
-+#CHECK: wfcedbs	%v0, %v0, %v0
-+
-+	wfcedb	%v0, %v0, %v0
-+	wfcedbs	%v0, %v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: wfchdb	%v0, %v0, %v0
-+#CHECK: wfchdbs	%v0, %v0, %v0
-+
-+	wfchdb	%v0, %v0, %v0
-+	wfchdbs	%v0, %v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: wfchedb	%v0, %v0, %v0
-+#CHECK: wfchedbs %v0, %v0, %v0
-+
-+	wfchedb	%v0, %v0, %v0
-+	wfchedbs %v0, %v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: wfddb	%v0, %v0, %v0
-+
-+	wfddb	%v0, %v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: wfidb	%v0, %v0, 0, 0
-+
-+	wfidb	%v0, %v0, 0, 0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: wfkdb	%v0, %v0
-+
-+	wfkdb	%v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: wflcdb	%v0, %v0
-+
-+	wflcdb	%v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: wflndb	%v0, %v0
-+
-+	wflndb	%v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: wflpdb	%v0, %v0
-+
-+	wflpdb	%v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: wfmadb	%v0, %v0, %v0, %v0
-+
-+	wfmadb	%v0, %v0, %v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: wfmdb	%v0, %v0, %v0
-+
-+	wfmdb	%v0, %v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: wfmsdb	%v0, %v0, %v0, %v0
-+
-+	wfmsdb	%v0, %v0, %v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: wfsdb	%v0, %v0, %v0
-+
-+	wfsdb	%v0, %v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: wfsqdb	%v0, %v0
-+
-+	wfsqdb	%v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: wftcidb	%v0, %v0, 0
-+
-+	wftcidb	%v0, %v0, 0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: wldeb	%v0, %v0
-+
-+	wldeb	%v0, %v0
-+
-+#CHECK: error: {{(instruction requires: vector)?}}
-+#CHECK: wledb	%v0, %v0, 0, 0
-+
-+	wledb	%v0, %v0, 0, 0
-Index: llvm-36/test/MC/SystemZ/insn-bad.s
-===================================================================
---- llvm-36.orig/test/MC/SystemZ/insn-bad.s
-+++ llvm-36/test/MC/SystemZ/insn-bad.s
-@@ -2666,6 +2666,11 @@
- 	pfdrl	1, 1
- 	pfdrl	1, 0x100000000
- 
-+#CHECK: error: {{(instruction requires: population-count)?}}
-+#CHECK: popcnt	%r0, %r0
-+
-+	popcnt	%r0, %r0
-+
- #CHECK: error: invalid operand
- #CHECK: risbg	%r0,%r0,0,0,-1
- #CHECK: error: invalid operand
-Index: llvm-36/test/MC/SystemZ/insn-good-z13.s
-===================================================================
---- /dev/null
-+++ llvm-36/test/MC/SystemZ/insn-good-z13.s
-@@ -0,0 +1,5039 @@
-+# For z13 and above.
-+# RUN: llvm-mc -triple s390x-linux-gnu -mcpu=z13 -show-encoding %s \
-+# RUN:   | FileCheck %s
-+
-+#CHECK: lcbb    %r0, 0, 0               # encoding: [0xe7,0x00,0x00,0x00,0x00,0x27]
-+#CHECK: lcbb    %r0, 0, 15              # encoding: [0xe7,0x00,0x00,0x00,0xf0,0x27]
-+#CHECK: lcbb    %r0, 4095, 0            # encoding: [0xe7,0x00,0x0f,0xff,0x00,0x27]
-+#CHECK: lcbb    %r0, 0(%r15), 0         # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x27]
-+#CHECK: lcbb    %r0, 0(%r15,%r1), 0     # encoding: [0xe7,0x0f,0x10,0x00,0x00,0x27]
-+#CHECK: lcbb    %r15, 0, 0              # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x27]
-+#CHECK: lcbb    %r2, 1383(%r3,%r4), 8   # encoding: [0xe7,0x23,0x45,0x67,0x80,0x27]
-+
-+	lcbb	%r0, 0, 0
-+	lcbb	%r0, 0, 15
-+	lcbb	%r0, 4095, 0
-+	lcbb	%r0, 0(%r15), 0
-+	lcbb	%r0, 0(%r15,%r1), 0
-+	lcbb	%r15, 0, 0
-+	lcbb	%r2, 1383(%r3,%r4), 8
-+
-+#CHECK: vab     %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x00,0xf3]
-+#CHECK: vab     %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x02,0xf3]
-+#CHECK: vab     %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xf3]
-+#CHECK: vab     %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xf3]
-+#CHECK: vab     %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x0a,0xf3]
-+
-+	vab	%v0, %v0, %v0
-+	vab	%v0, %v0, %v31
-+	vab	%v0, %v31, %v0
-+	vab	%v31, %v0, %v0
-+	vab	%v18, %v3, %v20
-+
-+#CHECK: vaccb   %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x00,0xf1]
-+#CHECK: vaccb   %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x02,0xf1]
-+#CHECK: vaccb   %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xf1]
-+#CHECK: vaccb   %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xf1]
-+#CHECK: vaccb   %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x0a,0xf1]
-+
-+	vaccb	%v0, %v0, %v0
-+	vaccb	%v0, %v0, %v31
-+	vaccb	%v0, %v31, %v0
-+	vaccb	%v31, %v0, %v0
-+	vaccb	%v18, %v3, %v20
-+
-+#CHECK: vacccq  %v0, %v0, %v0, %v0      # encoding: [0xe7,0x00,0x04,0x00,0x00,0xb9]
-+#CHECK: vacccq  %v0, %v0, %v0, %v31     # encoding: [0xe7,0x00,0x04,0x00,0xf1,0xb9]
-+#CHECK: vacccq  %v0, %v0, %v31, %v0     # encoding: [0xe7,0x00,0xf4,0x00,0x02,0xb9]
-+#CHECK: vacccq  %v0, %v31, %v0, %v0     # encoding: [0xe7,0x0f,0x04,0x00,0x04,0xb9]
-+#CHECK: vacccq  %v31, %v0, %v0, %v0     # encoding: [0xe7,0xf0,0x04,0x00,0x08,0xb9]
-+#CHECK: vacccq  %v13, %v17, %v21, %v25  # encoding: [0xe7,0xd1,0x54,0x00,0x97,0xb9]
-+
-+	vacccq	%v0, %v0, %v0, %v0
-+	vacccq	%v0, %v0, %v0, %v31
-+	vacccq	%v0, %v0, %v31, %v0
-+	vacccq	%v0, %v31, %v0, %v0
-+	vacccq	%v31, %v0, %v0, %v0
-+	vacccq	%v13, %v17, %v21, %v25
-+
-+#CHECK: vaccf   %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x20,0xf1]
-+#CHECK: vaccf   %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x22,0xf1]
-+#CHECK: vaccf   %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xf1]
-+#CHECK: vaccf   %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xf1]
-+#CHECK: vaccf   %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x2a,0xf1]
-+
-+	vaccf	%v0, %v0, %v0
-+	vaccf	%v0, %v0, %v31
-+	vaccf	%v0, %v31, %v0
-+	vaccf	%v31, %v0, %v0
-+	vaccf	%v18, %v3, %v20
-+
-+#CHECK: vaccg   %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x30,0xf1]
-+#CHECK: vaccg   %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x32,0xf1]
-+#CHECK: vaccg   %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xf1]
-+#CHECK: vaccg   %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xf1]
-+#CHECK: vaccg   %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x3a,0xf1]
-+
-+	vaccg	%v0, %v0, %v0
-+	vaccg	%v0, %v0, %v31
-+	vaccg	%v0, %v31, %v0
-+	vaccg	%v31, %v0, %v0
-+	vaccg	%v18, %v3, %v20
-+
-+#CHECK: vacch   %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x10,0xf1]
-+#CHECK: vacch   %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x12,0xf1]
-+#CHECK: vacch   %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x14,0xf1]
-+#CHECK: vacch   %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x18,0xf1]
-+#CHECK: vacch   %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x1a,0xf1]
-+
-+	vacch	%v0, %v0, %v0
-+	vacch	%v0, %v0, %v31
-+	vacch	%v0, %v31, %v0
-+	vacch	%v31, %v0, %v0
-+	vacch	%v18, %v3, %v20
-+
-+#CHECK: vaccq   %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x40,0xf1]
-+#CHECK: vaccq   %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x42,0xf1]
-+#CHECK: vaccq   %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x44,0xf1]
-+#CHECK: vaccq   %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x48,0xf1]
-+#CHECK: vaccq   %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x4a,0xf1]
-+
-+	vaccq	%v0, %v0, %v0
-+	vaccq	%v0, %v0, %v31
-+	vaccq	%v0, %v31, %v0
-+	vaccq	%v31, %v0, %v0
-+	vaccq	%v18, %v3, %v20
-+
-+#CHECK: vacq    %v0, %v0, %v0, %v0      # encoding: [0xe7,0x00,0x04,0x00,0x00,0xbb]
-+#CHECK: vacq    %v0, %v0, %v0, %v31     # encoding: [0xe7,0x00,0x04,0x00,0xf1,0xbb]
-+#CHECK: vacq    %v0, %v0, %v31, %v0     # encoding: [0xe7,0x00,0xf4,0x00,0x02,0xbb]
-+#CHECK: vacq    %v0, %v31, %v0, %v0     # encoding: [0xe7,0x0f,0x04,0x00,0x04,0xbb]
-+#CHECK: vacq    %v31, %v0, %v0, %v0     # encoding: [0xe7,0xf0,0x04,0x00,0x08,0xbb]
-+#CHECK: vacq    %v13, %v17, %v21, %v25  # encoding: [0xe7,0xd1,0x54,0x00,0x97,0xbb]
-+
-+	vacq	%v0, %v0, %v0, %v0
-+	vacq	%v0, %v0, %v0, %v31
-+	vacq	%v0, %v0, %v31, %v0
-+	vacq	%v0, %v31, %v0, %v0
-+	vacq	%v31, %v0, %v0, %v0
-+	vacq	%v13, %v17, %v21, %v25
-+
-+#CHECK: vaf     %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x20,0xf3]
-+#CHECK: vaf     %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x22,0xf3]
-+#CHECK: vaf     %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xf3]
-+#CHECK: vaf     %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xf3]
-+#CHECK: vaf     %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x2a,0xf3]
-+
-+	vaf	%v0, %v0, %v0
-+	vaf	%v0, %v0, %v31
-+	vaf	%v0, %v31, %v0
-+	vaf	%v31, %v0, %v0
-+	vaf	%v18, %v3, %v20
-+
-+#CHECK: vag     %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x30,0xf3]
-+#CHECK: vag     %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x32,0xf3]
-+#CHECK: vag     %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xf3]
-+#CHECK: vag     %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xf3]
-+#CHECK: vag     %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x3a,0xf3]
-+
-+	vag	%v0, %v0, %v0
-+	vag	%v0, %v0, %v31
-+	vag	%v0, %v31, %v0
-+	vag	%v31, %v0, %v0
-+	vag	%v18, %v3, %v20
-+
-+#CHECK: vah     %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x10,0xf3]
-+#CHECK: vah     %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x12,0xf3]
-+#CHECK: vah     %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x14,0xf3]
-+#CHECK: vah     %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x18,0xf3]
-+#CHECK: vah     %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x1a,0xf3]
-+
-+	vah	%v0, %v0, %v0
-+	vah	%v0, %v0, %v31
-+	vah	%v0, %v31, %v0
-+	vah	%v31, %v0, %v0
-+	vah	%v18, %v3, %v20
-+
-+#CHECK: vaq     %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x40,0xf3]
-+#CHECK: vaq     %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x42,0xf3]
-+#CHECK: vaq     %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x44,0xf3]
-+#CHECK: vaq     %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x48,0xf3]
-+#CHECK: vaq     %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x4a,0xf3]
-+
-+	vaq	%v0, %v0, %v0
-+	vaq	%v0, %v0, %v31
-+	vaq	%v0, %v31, %v0
-+	vaq	%v31, %v0, %v0
-+	vaq	%v18, %v3, %v20
-+
-+#CHECK: vavgb   %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x00,0xf2]
-+#CHECK: vavgb   %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x02,0xf2]
-+#CHECK: vavgb   %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xf2]
-+#CHECK: vavgb   %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xf2]
-+#CHECK: vavgb   %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x0a,0xf2]
-+
-+	vavgb	%v0, %v0, %v0
-+	vavgb	%v0, %v0, %v31
-+	vavgb	%v0, %v31, %v0
-+	vavgb	%v31, %v0, %v0
-+	vavgb	%v18, %v3, %v20
-+
-+#CHECK: vavgf   %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x20,0xf2]
-+#CHECK: vavgf   %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x22,0xf2]
-+#CHECK: vavgf   %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xf2]
-+#CHECK: vavgf   %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xf2]
-+#CHECK: vavgf   %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x2a,0xf2]
-+
-+	vavgf	%v0, %v0, %v0
-+	vavgf	%v0, %v0, %v31
-+	vavgf	%v0, %v31, %v0
-+	vavgf	%v31, %v0, %v0
-+	vavgf	%v18, %v3, %v20
-+
-+#CHECK: vavgg   %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x30,0xf2]
-+#CHECK: vavgg   %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x32,0xf2]
-+#CHECK: vavgg   %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xf2]
-+#CHECK: vavgg   %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xf2]
-+#CHECK: vavgg   %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x3a,0xf2]
-+
-+	vavgg	%v0, %v0, %v0
-+	vavgg	%v0, %v0, %v31
-+	vavgg	%v0, %v31, %v0
-+	vavgg	%v31, %v0, %v0
-+	vavgg	%v18, %v3, %v20
-+
-+#CHECK: vavgh   %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x10,0xf2]
-+#CHECK: vavgh   %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x12,0xf2]
-+#CHECK: vavgh   %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x14,0xf2]
-+#CHECK: vavgh   %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x18,0xf2]
-+#CHECK: vavgh   %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x1a,0xf2]
-+
-+	vavgh	%v0, %v0, %v0
-+	vavgh	%v0, %v0, %v31
-+	vavgh	%v0, %v31, %v0
-+	vavgh	%v31, %v0, %v0
-+	vavgh	%v18, %v3, %v20
-+
-+#CHECK: vavglb  %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x00,0xf0]
-+#CHECK: vavglb  %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x02,0xf0]
-+#CHECK: vavglb  %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xf0]
-+#CHECK: vavglb  %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xf0]
-+#CHECK: vavglb  %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x0a,0xf0]
-+
-+	vavglb	%v0, %v0, %v0
-+	vavglb	%v0, %v0, %v31
-+	vavglb	%v0, %v31, %v0
-+	vavglb	%v31, %v0, %v0
-+	vavglb	%v18, %v3, %v20
-+
-+#CHECK: vavglf  %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x20,0xf0]
-+#CHECK: vavglf  %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x22,0xf0]
-+#CHECK: vavglf  %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xf0]
-+#CHECK: vavglf  %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xf0]
-+#CHECK: vavglf  %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x2a,0xf0]
-+
-+	vavglf	%v0, %v0, %v0
-+	vavglf	%v0, %v0, %v31
-+	vavglf	%v0, %v31, %v0
-+	vavglf	%v31, %v0, %v0
-+	vavglf	%v18, %v3, %v20
-+
-+#CHECK: vavglg  %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x30,0xf0]
-+#CHECK: vavglg  %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x32,0xf0]
-+#CHECK: vavglg  %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xf0]
-+#CHECK: vavglg  %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xf0]
-+#CHECK: vavglg  %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x3a,0xf0]
-+
-+	vavglg	%v0, %v0, %v0
-+	vavglg	%v0, %v0, %v31
-+	vavglg	%v0, %v31, %v0
-+	vavglg	%v31, %v0, %v0
-+	vavglg	%v18, %v3, %v20
-+
-+#CHECK: vavglh  %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x10,0xf0]
-+#CHECK: vavglh  %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x12,0xf0]
-+#CHECK: vavglh  %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x14,0xf0]
-+#CHECK: vavglh  %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x18,0xf0]
-+#CHECK: vavglh  %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x1a,0xf0]
-+
-+	vavglh	%v0, %v0, %v0
-+	vavglh	%v0, %v0, %v31
-+	vavglh	%v0, %v31, %v0
-+	vavglh	%v31, %v0, %v0
-+	vavglh	%v18, %v3, %v20
-+
-+#CHECK: vcdgb   %v0, %v0, 0, 0          # encoding: [0xe7,0x00,0x00,0x00,0x30,0xc3]
-+#CHECK: vcdgb   %v0, %v0, 0, 15         # encoding: [0xe7,0x00,0x00,0xf0,0x30,0xc3]
-+#CHECK: vcdgb   %v0, %v0, 4, 0          # encoding: [0xe7,0x00,0x00,0x04,0x30,0xc3]
-+#CHECK: vcdgb   %v0, %v0, 12, 0         # encoding: [0xe7,0x00,0x00,0x0c,0x30,0xc3]
-+#CHECK: vcdgb   %v0, %v31, 0, 0         # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xc3]
-+#CHECK: vcdgb   %v31, %v0, 0, 0         # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xc3]
-+#CHECK: vcdgb   %v14, %v17, 4, 10       # encoding: [0xe7,0xe1,0x00,0xa4,0x34,0xc3]
-+
-+	vcdgb	%v0, %v0, 0, 0
-+	vcdgb	%v0, %v0, 0, 15
-+	vcdgb	%v0, %v0, 4, 0
-+	vcdgb	%v0, %v0, 12, 0
-+	vcdgb	%v0, %v31, 0, 0
-+	vcdgb	%v31, %v0, 0, 0
-+	vcdgb	%v14, %v17, 4, 10
-+
-+#CHECK: vcdlgb  %v0, %v0, 0, 0          # encoding: [0xe7,0x00,0x00,0x00,0x30,0xc1]
-+#CHECK: vcdlgb  %v0, %v0, 0, 15         # encoding: [0xe7,0x00,0x00,0xf0,0x30,0xc1]
-+#CHECK: vcdlgb  %v0, %v0, 4, 0          # encoding: [0xe7,0x00,0x00,0x04,0x30,0xc1]
-+#CHECK: vcdlgb  %v0, %v0, 12, 0         # encoding: [0xe7,0x00,0x00,0x0c,0x30,0xc1]
-+#CHECK: vcdlgb  %v0, %v31, 0, 0         # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xc1]
-+#CHECK: vcdlgb  %v31, %v0, 0, 0         # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xc1]
-+#CHECK: vcdlgb  %v14, %v17, 4, 10       # encoding: [0xe7,0xe1,0x00,0xa4,0x34,0xc1]
-+
-+	vcdlgb	%v0, %v0, 0, 0
-+	vcdlgb	%v0, %v0, 0, 15
-+	vcdlgb	%v0, %v0, 4, 0
-+	vcdlgb	%v0, %v0, 12, 0
-+	vcdlgb	%v0, %v31, 0, 0
-+	vcdlgb	%v31, %v0, 0, 0
-+	vcdlgb	%v14, %v17, 4, 10
-+
-+#CHECK: vcksm   %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x00,0x66]
-+#CHECK: vcksm   %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x02,0x66]
-+#CHECK: vcksm   %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x66]
-+#CHECK: vcksm   %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x66]
-+#CHECK: vcksm   %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x0a,0x66]
-+
-+	vcksm	%v0, %v0, %v0
-+	vcksm	%v0, %v0, %v31
-+	vcksm	%v0, %v31, %v0
-+	vcksm	%v31, %v0, %v0
-+	vcksm	%v18, %v3, %v20
-+
-+#CHECK: vceqb   %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x00,0xf8]
-+#CHECK: vceqb   %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x02,0xf8]
-+#CHECK: vceqb   %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xf8]
-+#CHECK: vceqb   %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xf8]
-+#CHECK: vceqb   %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x0a,0xf8]
-+#CHECK: vceqbs  %v5, %v22, %v7          # encoding: [0xe7,0x56,0x70,0x10,0x04,0xf8]
-+
-+	vceqb	%v0, %v0, %v0
-+	vceqb	%v0, %v0, %v31
-+	vceqb	%v0, %v31, %v0
-+	vceqb	%v31, %v0, %v0
-+	vceqb	%v18, %v3, %v20
-+	vceqbs	%v5, %v22, %v7
-+
-+#CHECK: vceqf   %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x20,0xf8]
-+#CHECK: vceqf   %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x22,0xf8]
-+#CHECK: vceqf   %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xf8]
-+#CHECK: vceqf   %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xf8]
-+#CHECK: vceqf   %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x2a,0xf8]
-+#CHECK: vceqfs  %v5, %v22, %v7          # encoding: [0xe7,0x56,0x70,0x10,0x24,0xf8]
-+
-+	vceqf	%v0, %v0, %v0
-+	vceqf	%v0, %v0, %v31
-+	vceqf	%v0, %v31, %v0
-+	vceqf	%v31, %v0, %v0
-+	vceqf	%v18, %v3, %v20
-+	vceqfs	%v5, %v22, %v7
-+
-+#CHECK: vceqg   %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x30,0xf8]
-+#CHECK: vceqg   %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x32,0xf8]
-+#CHECK: vceqg   %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xf8]
-+#CHECK: vceqg   %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xf8]
-+#CHECK: vceqg   %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x3a,0xf8]
-+#CHECK: vceqgs  %v5, %v22, %v7          # encoding: [0xe7,0x56,0x70,0x10,0x34,0xf8]
-+
-+	vceqg	%v0, %v0, %v0
-+	vceqg	%v0, %v0, %v31
-+	vceqg	%v0, %v31, %v0
-+	vceqg	%v31, %v0, %v0
-+	vceqg	%v18, %v3, %v20
-+	vceqgs	%v5, %v22, %v7
-+
-+#CHECK: vceqh   %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x10,0xf8]
-+#CHECK: vceqh   %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x12,0xf8]
-+#CHECK: vceqh   %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x14,0xf8]
-+#CHECK: vceqh   %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x18,0xf8]
-+#CHECK: vceqh   %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x1a,0xf8]
-+#CHECK: vceqhs  %v5, %v22, %v7          # encoding: [0xe7,0x56,0x70,0x10,0x14,0xf8]
-+
-+	vceqh	%v0, %v0, %v0
-+	vceqh	%v0, %v0, %v31
-+	vceqh	%v0, %v31, %v0
-+	vceqh	%v31, %v0, %v0
-+	vceqh	%v18, %v3, %v20
-+	vceqhs	%v5, %v22, %v7
-+
-+#CHECK: vcgdb   %v0, %v0, 0, 0          # encoding: [0xe7,0x00,0x00,0x00,0x30,0xc2]
-+#CHECK: vcgdb   %v0, %v0, 0, 15         # encoding: [0xe7,0x00,0x00,0xf0,0x30,0xc2]
-+#CHECK: vcgdb   %v0, %v0, 4, 0          # encoding: [0xe7,0x00,0x00,0x04,0x30,0xc2]
-+#CHECK: vcgdb   %v0, %v0, 12, 0         # encoding: [0xe7,0x00,0x00,0x0c,0x30,0xc2]
-+#CHECK: vcgdb   %v0, %v31, 0, 0         # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xc2]
-+#CHECK: vcgdb   %v31, %v0, 0, 0         # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xc2]
-+#CHECK: vcgdb   %v14, %v17, 4, 10       # encoding: [0xe7,0xe1,0x00,0xa4,0x34,0xc2]
-+
-+	vcgdb	%v0, %v0, 0, 0
-+	vcgdb	%v0, %v0, 0, 15
-+	vcgdb	%v0, %v0, 4, 0
-+	vcgdb	%v0, %v0, 12, 0
-+	vcgdb	%v0, %v31, 0, 0
-+	vcgdb	%v31, %v0, 0, 0
-+	vcgdb	%v14, %v17, 4, 10
-+
-+#CHECK: vchb    %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x00,0xfb]
-+#CHECK: vchb    %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x02,0xfb]
-+#CHECK: vchb    %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xfb]
-+#CHECK: vchb    %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xfb]
-+#CHECK: vchb    %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x0a,0xfb]
-+#CHECK: vchbs   %v5, %v22, %v7          # encoding: [0xe7,0x56,0x70,0x10,0x04,0xfb]
-+
-+	vchb	%v0, %v0, %v0
-+	vchb	%v0, %v0, %v31
-+	vchb	%v0, %v31, %v0
-+	vchb	%v31, %v0, %v0
-+	vchb	%v18, %v3, %v20
-+	vchbs	%v5, %v22, %v7
-+
-+#CHECK: vchf    %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x20,0xfb]
-+#CHECK: vchf    %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x22,0xfb]
-+#CHECK: vchf    %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xfb]
-+#CHECK: vchf    %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xfb]
-+#CHECK: vchf    %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x2a,0xfb]
-+#CHECK: vchfs   %v5, %v22, %v7          # encoding: [0xe7,0x56,0x70,0x10,0x24,0xfb]
-+
-+	vchf	%v0, %v0, %v0
-+	vchf	%v0, %v0, %v31
-+	vchf	%v0, %v31, %v0
-+	vchf	%v31, %v0, %v0
-+	vchf	%v18, %v3, %v20
-+	vchfs	%v5, %v22, %v7
-+
-+#CHECK: vchg    %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x30,0xfb]
-+#CHECK: vchg    %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x32,0xfb]
-+#CHECK: vchg    %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xfb]
-+#CHECK: vchg    %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xfb]
-+#CHECK: vchg    %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x3a,0xfb]
-+#CHECK: vchgs   %v5, %v22, %v7          # encoding: [0xe7,0x56,0x70,0x10,0x34,0xfb]
-+
-+	vchg	%v0, %v0, %v0
-+	vchg	%v0, %v0, %v31
-+	vchg	%v0, %v31, %v0
-+	vchg	%v31, %v0, %v0
-+	vchg	%v18, %v3, %v20
-+	vchgs	%v5, %v22, %v7
-+
-+#CHECK: vchh    %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x10,0xfb]
-+#CHECK: vchh    %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x12,0xfb]
-+#CHECK: vchh    %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x14,0xfb]
-+#CHECK: vchh    %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x18,0xfb]
-+#CHECK: vchh    %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x1a,0xfb]
-+#CHECK: vchhs   %v5, %v22, %v7          # encoding: [0xe7,0x56,0x70,0x10,0x14,0xfb]
-+
-+	vchh	%v0, %v0, %v0
-+	vchh	%v0, %v0, %v31
-+	vchh	%v0, %v31, %v0
-+	vchh	%v31, %v0, %v0
-+	vchh	%v18, %v3, %v20
-+	vchhs	%v5, %v22, %v7
-+
-+#CHECK: vchlb   %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x00,0xf9]
-+#CHECK: vchlb   %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x02,0xf9]
-+#CHECK: vchlb   %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xf9]
-+#CHECK: vchlb   %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xf9]
-+#CHECK: vchlb   %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x0a,0xf9]
-+#CHECK: vchlbs  %v5, %v22, %v7          # encoding: [0xe7,0x56,0x70,0x10,0x04,0xf9]
-+
-+	vchlb	%v0, %v0, %v0
-+	vchlb	%v0, %v0, %v31
-+	vchlb	%v0, %v31, %v0
-+	vchlb	%v31, %v0, %v0
-+	vchlb	%v18, %v3, %v20
-+	vchlbs	%v5, %v22, %v7
-+
-+#CHECK: vchlf   %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x20,0xf9]
-+#CHECK: vchlf   %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x22,0xf9]
-+#CHECK: vchlf   %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xf9]
-+#CHECK: vchlf   %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xf9]
-+#CHECK: vchlf   %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x2a,0xf9]
-+#CHECK: vchlfs  %v5, %v22, %v7          # encoding: [0xe7,0x56,0x70,0x10,0x24,0xf9]
-+
-+	vchlf	%v0, %v0, %v0
-+	vchlf	%v0, %v0, %v31
-+	vchlf	%v0, %v31, %v0
-+	vchlf	%v31, %v0, %v0
-+	vchlf	%v18, %v3, %v20
-+	vchlfs	%v5, %v22, %v7
-+
-+#CHECK: vchlg   %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x30,0xf9]
-+#CHECK: vchlg   %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x32,0xf9]
-+#CHECK: vchlg   %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xf9]
-+#CHECK: vchlg   %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xf9]
-+#CHECK: vchlg   %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x3a,0xf9]
-+#CHECK: vchlgs  %v5, %v22, %v7          # encoding: [0xe7,0x56,0x70,0x10,0x34,0xf9]
-+
-+	vchlg	%v0, %v0, %v0
-+	vchlg	%v0, %v0, %v31
-+	vchlg	%v0, %v31, %v0
-+	vchlg	%v31, %v0, %v0
-+	vchlg	%v18, %v3, %v20
-+	vchlgs	%v5, %v22, %v7
-+
-+#CHECK: vchlh   %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x10,0xf9]
-+#CHECK: vchlh   %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x12,0xf9]
-+#CHECK: vchlh   %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x14,0xf9]
-+#CHECK: vchlh   %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x18,0xf9]
-+#CHECK: vchlh   %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x1a,0xf9]
-+#CHECK: vchlhs  %v5, %v22, %v7          # encoding: [0xe7,0x56,0x70,0x10,0x14,0xf9]
-+
-+	vchlh	%v0, %v0, %v0
-+	vchlh	%v0, %v0, %v31
-+	vchlh	%v0, %v31, %v0
-+	vchlh	%v31, %v0, %v0
-+	vchlh	%v18, %v3, %v20
-+	vchlhs	%v5, %v22, %v7
-+
-+#CHECK: vclgdb  %v0, %v0, 0, 0          # encoding: [0xe7,0x00,0x00,0x00,0x30,0xc0]
-+#CHECK: vclgdb  %v0, %v0, 0, 15         # encoding: [0xe7,0x00,0x00,0xf0,0x30,0xc0]
-+#CHECK: vclgdb  %v0, %v0, 4, 0          # encoding: [0xe7,0x00,0x00,0x04,0x30,0xc0]
-+#CHECK: vclgdb  %v0, %v0, 12, 0         # encoding: [0xe7,0x00,0x00,0x0c,0x30,0xc0]
-+#CHECK: vclgdb  %v0, %v31, 0, 0         # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xc0]
-+#CHECK: vclgdb  %v31, %v0, 0, 0         # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xc0]
-+#CHECK: vclgdb  %v14, %v17, 4, 10       # encoding: [0xe7,0xe1,0x00,0xa4,0x34,0xc0]
-+
-+	vclgdb	%v0, %v0, 0, 0
-+	vclgdb	%v0, %v0, 0, 15
-+	vclgdb	%v0, %v0, 4, 0
-+	vclgdb	%v0, %v0, 12, 0
-+	vclgdb	%v0, %v31, 0, 0
-+	vclgdb	%v31, %v0, 0, 0
-+	vclgdb	%v14, %v17, 4, 10
-+
-+#CHECK: vclzb   %v0, %v0                # encoding: [0xe7,0x00,0x00,0x00,0x00,0x53]
-+#CHECK: vclzb   %v0, %v15               # encoding: [0xe7,0x0f,0x00,0x00,0x00,0x53]
-+#CHECK: vclzb   %v0, %v31               # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x53]
-+#CHECK: vclzb   %v15, %v0               # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x53]
-+#CHECK: vclzb   %v31, %v0               # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x53]
-+#CHECK: vclzb   %v14, %v17              # encoding: [0xe7,0xe1,0x00,0x00,0x04,0x53]
-+
-+	vclzb	%v0, %v0
-+	vclzb	%v0, %v15
-+	vclzb	%v0, %v31
-+	vclzb	%v15, %v0
-+	vclzb	%v31, %v0
-+	vclzb	%v14, %v17
-+
-+#CHECK: vclzf   %v0, %v0                # encoding: [0xe7,0x00,0x00,0x00,0x20,0x53]
-+#CHECK: vclzf   %v0, %v15               # encoding: [0xe7,0x0f,0x00,0x00,0x20,0x53]
-+#CHECK: vclzf   %v0, %v31               # encoding: [0xe7,0x0f,0x00,0x00,0x24,0x53]
-+#CHECK: vclzf   %v15, %v0               # encoding: [0xe7,0xf0,0x00,0x00,0x20,0x53]
-+#CHECK: vclzf   %v31, %v0               # encoding: [0xe7,0xf0,0x00,0x00,0x28,0x53]
-+#CHECK: vclzf   %v14, %v17              # encoding: [0xe7,0xe1,0x00,0x00,0x24,0x53]
-+
-+	vclzf	%v0, %v0
-+	vclzf	%v0, %v15
-+	vclzf	%v0, %v31
-+	vclzf	%v15, %v0
-+	vclzf	%v31, %v0
-+	vclzf	%v14, %v17
-+
-+#CHECK: vclzg   %v0, %v0                # encoding: [0xe7,0x00,0x00,0x00,0x30,0x53]
-+#CHECK: vclzg   %v0, %v15               # encoding: [0xe7,0x0f,0x00,0x00,0x30,0x53]
-+#CHECK: vclzg   %v0, %v31               # encoding: [0xe7,0x0f,0x00,0x00,0x34,0x53]
-+#CHECK: vclzg   %v15, %v0               # encoding: [0xe7,0xf0,0x00,0x00,0x30,0x53]
-+#CHECK: vclzg   %v31, %v0               # encoding: [0xe7,0xf0,0x00,0x00,0x38,0x53]
-+#CHECK: vclzg   %v14, %v17              # encoding: [0xe7,0xe1,0x00,0x00,0x34,0x53]
-+
-+	vclzg	%v0, %v0
-+	vclzg	%v0, %v15
-+	vclzg	%v0, %v31
-+	vclzg	%v15, %v0
-+	vclzg	%v31, %v0
-+	vclzg	%v14, %v17
-+
-+#CHECK: vclzh   %v0, %v0                # encoding: [0xe7,0x00,0x00,0x00,0x10,0x53]
-+#CHECK: vclzh   %v0, %v15               # encoding: [0xe7,0x0f,0x00,0x00,0x10,0x53]
-+#CHECK: vclzh   %v0, %v31               # encoding: [0xe7,0x0f,0x00,0x00,0x14,0x53]
-+#CHECK: vclzh   %v15, %v0               # encoding: [0xe7,0xf0,0x00,0x00,0x10,0x53]
-+#CHECK: vclzh   %v31, %v0               # encoding: [0xe7,0xf0,0x00,0x00,0x18,0x53]
-+#CHECK: vclzh   %v14, %v17              # encoding: [0xe7,0xe1,0x00,0x00,0x14,0x53]
-+
-+	vclzh	%v0, %v0
-+	vclzh	%v0, %v15
-+	vclzh	%v0, %v31
-+	vclzh	%v15, %v0
-+	vclzh	%v31, %v0
-+	vclzh	%v14, %v17
-+
-+#CHECK: vctzb   %v0, %v0                # encoding: [0xe7,0x00,0x00,0x00,0x00,0x52]
-+#CHECK: vctzb   %v0, %v15               # encoding: [0xe7,0x0f,0x00,0x00,0x00,0x52]
-+#CHECK: vctzb   %v0, %v31               # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x52]
-+#CHECK: vctzb   %v15, %v0               # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x52]
-+#CHECK: vctzb   %v31, %v0               # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x52]
-+#CHECK: vctzb   %v14, %v17              # encoding: [0xe7,0xe1,0x00,0x00,0x04,0x52]
-+
-+	vctzb	%v0, %v0
-+	vctzb	%v0, %v15
-+	vctzb	%v0, %v31
-+	vctzb	%v15, %v0
-+	vctzb	%v31, %v0
-+	vctzb	%v14, %v17
-+
-+#CHECK: vctzf   %v0, %v0                # encoding: [0xe7,0x00,0x00,0x00,0x20,0x52]
-+#CHECK: vctzf   %v0, %v15               # encoding: [0xe7,0x0f,0x00,0x00,0x20,0x52]
-+#CHECK: vctzf   %v0, %v31               # encoding: [0xe7,0x0f,0x00,0x00,0x24,0x52]
-+#CHECK: vctzf   %v15, %v0               # encoding: [0xe7,0xf0,0x00,0x00,0x20,0x52]
-+#CHECK: vctzf   %v31, %v0               # encoding: [0xe7,0xf0,0x00,0x00,0x28,0x52]
-+#CHECK: vctzf   %v14, %v17              # encoding: [0xe7,0xe1,0x00,0x00,0x24,0x52]
-+
-+	vctzf	%v0, %v0
-+	vctzf	%v0, %v15
-+	vctzf	%v0, %v31
-+	vctzf	%v15, %v0
-+	vctzf	%v31, %v0
-+	vctzf	%v14, %v17
-+
-+#CHECK: vctzg   %v0, %v0                # encoding: [0xe7,0x00,0x00,0x00,0x30,0x52]
-+#CHECK: vctzg   %v0, %v15               # encoding: [0xe7,0x0f,0x00,0x00,0x30,0x52]
-+#CHECK: vctzg   %v0, %v31               # encoding: [0xe7,0x0f,0x00,0x00,0x34,0x52]
-+#CHECK: vctzg   %v15, %v0               # encoding: [0xe7,0xf0,0x00,0x00,0x30,0x52]
-+#CHECK: vctzg   %v31, %v0               # encoding: [0xe7,0xf0,0x00,0x00,0x38,0x52]
-+#CHECK: vctzg   %v14, %v17              # encoding: [0xe7,0xe1,0x00,0x00,0x34,0x52]
-+
-+	vctzg	%v0, %v0
-+	vctzg	%v0, %v15
-+	vctzg	%v0, %v31
-+	vctzg	%v15, %v0
-+	vctzg	%v31, %v0
-+	vctzg	%v14, %v17
-+
-+#CHECK: vctzh   %v0, %v0                # encoding: [0xe7,0x00,0x00,0x00,0x10,0x52]
-+#CHECK: vctzh   %v0, %v15               # encoding: [0xe7,0x0f,0x00,0x00,0x10,0x52]
-+#CHECK: vctzh   %v0, %v31               # encoding: [0xe7,0x0f,0x00,0x00,0x14,0x52]
-+#CHECK: vctzh   %v15, %v0               # encoding: [0xe7,0xf0,0x00,0x00,0x10,0x52]
-+#CHECK: vctzh   %v31, %v0               # encoding: [0xe7,0xf0,0x00,0x00,0x18,0x52]
-+#CHECK: vctzh   %v14, %v17              # encoding: [0xe7,0xe1,0x00,0x00,0x14,0x52]
-+
-+	vctzh	%v0, %v0
-+	vctzh	%v0, %v15
-+	vctzh	%v0, %v31
-+	vctzh	%v15, %v0
-+	vctzh	%v31, %v0
-+	vctzh	%v14, %v17
-+
-+#CHECK: vecb    %v0, %v0                # encoding: [0xe7,0x00,0x00,0x00,0x00,0xdb]
-+#CHECK: vecb    %v0, %v15               # encoding: [0xe7,0x0f,0x00,0x00,0x00,0xdb]
-+#CHECK: vecb    %v0, %v31               # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xdb]
-+#CHECK: vecb    %v15, %v0               # encoding: [0xe7,0xf0,0x00,0x00,0x00,0xdb]
-+#CHECK: vecb    %v31, %v0               # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xdb]
-+#CHECK: vecb    %v14, %v17              # encoding: [0xe7,0xe1,0x00,0x00,0x04,0xdb]
-+
-+	vecb	%v0, %v0
-+	vecb	%v0, %v15
-+	vecb	%v0, %v31
-+	vecb	%v15, %v0
-+	vecb	%v31, %v0
-+	vecb	%v14, %v17
-+
-+#CHECK: vecf    %v0, %v0                # encoding: [0xe7,0x00,0x00,0x00,0x20,0xdb]
-+#CHECK: vecf    %v0, %v15               # encoding: [0xe7,0x0f,0x00,0x00,0x20,0xdb]
-+#CHECK: vecf    %v0, %v31               # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xdb]
-+#CHECK: vecf    %v15, %v0               # encoding: [0xe7,0xf0,0x00,0x00,0x20,0xdb]
-+#CHECK: vecf    %v31, %v0               # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xdb]
-+#CHECK: vecf    %v14, %v17              # encoding: [0xe7,0xe1,0x00,0x00,0x24,0xdb]
-+
-+	vecf	%v0, %v0
-+	vecf	%v0, %v15
-+	vecf	%v0, %v31
-+	vecf	%v15, %v0
-+	vecf	%v31, %v0
-+	vecf	%v14, %v17
-+
-+#CHECK: vecg    %v0, %v0                # encoding: [0xe7,0x00,0x00,0x00,0x30,0xdb]
-+#CHECK: vecg    %v0, %v15               # encoding: [0xe7,0x0f,0x00,0x00,0x30,0xdb]
-+#CHECK: vecg    %v0, %v31               # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xdb]
-+#CHECK: vecg    %v15, %v0               # encoding: [0xe7,0xf0,0x00,0x00,0x30,0xdb]
-+#CHECK: vecg    %v31, %v0               # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xdb]
-+#CHECK: vecg    %v14, %v17              # encoding: [0xe7,0xe1,0x00,0x00,0x34,0xdb]
-+
-+	vecg	%v0, %v0
-+	vecg	%v0, %v15
-+	vecg	%v0, %v31
-+	vecg	%v15, %v0
-+	vecg	%v31, %v0
-+	vecg	%v14, %v17
-+
-+#CHECK: vech    %v0, %v0                # encoding: [0xe7,0x00,0x00,0x00,0x10,0xdb]
-+#CHECK: vech    %v0, %v15               # encoding: [0xe7,0x0f,0x00,0x00,0x10,0xdb]
-+#CHECK: vech    %v0, %v31               # encoding: [0xe7,0x0f,0x00,0x00,0x14,0xdb]
-+#CHECK: vech    %v15, %v0               # encoding: [0xe7,0xf0,0x00,0x00,0x10,0xdb]
-+#CHECK: vech    %v31, %v0               # encoding: [0xe7,0xf0,0x00,0x00,0x18,0xdb]
-+#CHECK: vech    %v14, %v17              # encoding: [0xe7,0xe1,0x00,0x00,0x14,0xdb]
-+
-+	vech	%v0, %v0
-+	vech	%v0, %v15
-+	vech	%v0, %v31
-+	vech	%v15, %v0
-+	vech	%v31, %v0
-+	vech	%v14, %v17
-+
-+#CHECK: veclb   %v0, %v0                # encoding: [0xe7,0x00,0x00,0x00,0x00,0xd9]
-+#CHECK: veclb   %v0, %v15               # encoding: [0xe7,0x0f,0x00,0x00,0x00,0xd9]
-+#CHECK: veclb   %v0, %v31               # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xd9]
-+#CHECK: veclb   %v15, %v0               # encoding: [0xe7,0xf0,0x00,0x00,0x00,0xd9]
-+#CHECK: veclb   %v31, %v0               # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xd9]
-+#CHECK: veclb   %v14, %v17              # encoding: [0xe7,0xe1,0x00,0x00,0x04,0xd9]
-+
-+	veclb	%v0, %v0
-+	veclb	%v0, %v15
-+	veclb	%v0, %v31
-+	veclb	%v15, %v0
-+	veclb	%v31, %v0
-+	veclb	%v14, %v17
-+
-+#CHECK: veclf   %v0, %v0                # encoding: [0xe7,0x00,0x00,0x00,0x20,0xd9]
-+#CHECK: veclf   %v0, %v15               # encoding: [0xe7,0x0f,0x00,0x00,0x20,0xd9]
-+#CHECK: veclf   %v0, %v31               # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xd9]
-+#CHECK: veclf   %v15, %v0               # encoding: [0xe7,0xf0,0x00,0x00,0x20,0xd9]
-+#CHECK: veclf   %v31, %v0               # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xd9]
-+#CHECK: veclf   %v14, %v17              # encoding: [0xe7,0xe1,0x00,0x00,0x24,0xd9]
-+
-+	veclf	%v0, %v0
-+	veclf	%v0, %v15
-+	veclf	%v0, %v31
-+	veclf	%v15, %v0
-+	veclf	%v31, %v0
-+	veclf	%v14, %v17
-+
-+#CHECK: veclg   %v0, %v0                # encoding: [0xe7,0x00,0x00,0x00,0x30,0xd9]
-+#CHECK: veclg   %v0, %v15               # encoding: [0xe7,0x0f,0x00,0x00,0x30,0xd9]
-+#CHECK: veclg   %v0, %v31               # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xd9]
-+#CHECK: veclg   %v15, %v0               # encoding: [0xe7,0xf0,0x00,0x00,0x30,0xd9]
-+#CHECK: veclg   %v31, %v0               # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xd9]
-+#CHECK: veclg   %v14, %v17              # encoding: [0xe7,0xe1,0x00,0x00,0x34,0xd9]
-+
-+	veclg	%v0, %v0
-+	veclg	%v0, %v15
-+	veclg	%v0, %v31
-+	veclg	%v15, %v0
-+	veclg	%v31, %v0
-+	veclg	%v14, %v17
-+
-+#CHECK: veclh   %v0, %v0                # encoding: [0xe7,0x00,0x00,0x00,0x10,0xd9]
-+#CHECK: veclh   %v0, %v15               # encoding: [0xe7,0x0f,0x00,0x00,0x10,0xd9]
-+#CHECK: veclh   %v0, %v31               # encoding: [0xe7,0x0f,0x00,0x00,0x14,0xd9]
-+#CHECK: veclh   %v15, %v0               # encoding: [0xe7,0xf0,0x00,0x00,0x10,0xd9]
-+#CHECK: veclh   %v31, %v0               # encoding: [0xe7,0xf0,0x00,0x00,0x18,0xd9]
-+#CHECK: veclh   %v14, %v17              # encoding: [0xe7,0xe1,0x00,0x00,0x14,0xd9]
-+
-+	veclh	%v0, %v0
-+	veclh	%v0, %v15
-+	veclh	%v0, %v31
-+	veclh	%v15, %v0
-+	veclh	%v31, %v0
-+	veclh	%v14, %v17
-+
-+#CHECK: verimb  %v0, %v0, %v0, 0        # encoding: [0xe7,0x00,0x00,0x00,0x00,0x72]
-+#CHECK: verimb  %v0, %v0, %v0, 255      # encoding: [0xe7,0x00,0x00,0xff,0x00,0x72]
-+#CHECK: verimb  %v0, %v0, %v31, 0       # encoding: [0xe7,0x00,0xf0,0x00,0x02,0x72]
-+#CHECK: verimb  %v0, %v31, %v0, 0       # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x72]
-+#CHECK: verimb  %v31, %v0, %v0, 0       # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x72]
-+#CHECK: verimb  %v13, %v17, %v21, 121   # encoding: [0xe7,0xd1,0x50,0x79,0x06,0x72]
-+
-+	verimb	%v0, %v0, %v0, 0
-+	verimb	%v0, %v0, %v0, 255
-+	verimb	%v0, %v0, %v31, 0
-+	verimb	%v0, %v31, %v0, 0
-+	verimb	%v31, %v0, %v0, 0
-+	verimb 	%v13, %v17, %v21, 0x79
-+
-+#CHECK: verimf  %v0, %v0, %v0, 0        # encoding: [0xe7,0x00,0x00,0x00,0x20,0x72]
-+#CHECK: verimf  %v0, %v0, %v0, 255      # encoding: [0xe7,0x00,0x00,0xff,0x20,0x72]
-+#CHECK: verimf  %v0, %v0, %v31, 0       # encoding: [0xe7,0x00,0xf0,0x00,0x22,0x72]
-+#CHECK: verimf  %v0, %v31, %v0, 0       # encoding: [0xe7,0x0f,0x00,0x00,0x24,0x72]
-+#CHECK: verimf  %v31, %v0, %v0, 0       # encoding: [0xe7,0xf0,0x00,0x00,0x28,0x72]
-+#CHECK: verimf  %v13, %v17, %v21, 121   # encoding: [0xe7,0xd1,0x50,0x79,0x26,0x72]
-+
-+	verimf	%v0, %v0, %v0, 0
-+	verimf	%v0, %v0, %v0, 255
-+	verimf	%v0, %v0, %v31, 0
-+	verimf	%v0, %v31, %v0, 0
-+	verimf	%v31, %v0, %v0, 0
-+	verimf 	%v13, %v17, %v21, 0x79
-+
-+#CHECK: verimg  %v0, %v0, %v0, 0        # encoding: [0xe7,0x00,0x00,0x00,0x30,0x72]
-+#CHECK: verimg  %v0, %v0, %v0, 255      # encoding: [0xe7,0x00,0x00,0xff,0x30,0x72]
-+#CHECK: verimg  %v0, %v0, %v31, 0       # encoding: [0xe7,0x00,0xf0,0x00,0x32,0x72]
-+#CHECK: verimg  %v0, %v31, %v0, 0       # encoding: [0xe7,0x0f,0x00,0x00,0x34,0x72]
-+#CHECK: verimg  %v31, %v0, %v0, 0       # encoding: [0xe7,0xf0,0x00,0x00,0x38,0x72]
-+#CHECK: verimg  %v13, %v17, %v21, 121   # encoding: [0xe7,0xd1,0x50,0x79,0x36,0x72]
-+
-+	verimg	%v0, %v0, %v0, 0
-+	verimg	%v0, %v0, %v0, 255
-+	verimg	%v0, %v0, %v31, 0
-+	verimg	%v0, %v31, %v0, 0
-+	verimg	%v31, %v0, %v0, 0
-+	verimg 	%v13, %v17, %v21, 0x79
-+
-+#CHECK: verimh  %v0, %v0, %v0, 0        # encoding: [0xe7,0x00,0x00,0x00,0x10,0x72]
-+#CHECK: verimh  %v0, %v0, %v0, 255      # encoding: [0xe7,0x00,0x00,0xff,0x10,0x72]
-+#CHECK: verimh  %v0, %v0, %v31, 0       # encoding: [0xe7,0x00,0xf0,0x00,0x12,0x72]
-+#CHECK: verimh  %v0, %v31, %v0, 0       # encoding: [0xe7,0x0f,0x00,0x00,0x14,0x72]
-+#CHECK: verimh  %v31, %v0, %v0, 0       # encoding: [0xe7,0xf0,0x00,0x00,0x18,0x72]
-+#CHECK: verimh  %v13, %v17, %v21, 121   # encoding: [0xe7,0xd1,0x50,0x79,0x16,0x72]
-+
-+	verimh	%v0, %v0, %v0, 0
-+	verimh	%v0, %v0, %v0, 255
-+	verimh	%v0, %v0, %v31, 0
-+	verimh	%v0, %v31, %v0, 0
-+	verimh	%v31, %v0, %v0, 0
-+	verimh 	%v13, %v17, %v21, 0x79
-+
-+#CHECK: verllvb %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x00,0x73]
-+#CHECK: verllvb %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x02,0x73]
-+#CHECK: verllvb %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x73]
-+#CHECK: verllvb %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x73]
-+#CHECK: verllvb %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x0a,0x73]
-+
-+	verllvb	%v0, %v0, %v0
-+	verllvb	%v0, %v0, %v31
-+	verllvb	%v0, %v31, %v0
-+	verllvb	%v31, %v0, %v0
-+	verllvb	%v18, %v3, %v20
-+
-+#CHECK: verllvf %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x20,0x73]
-+#CHECK: verllvf %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x22,0x73]
-+#CHECK: verllvf %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x24,0x73]
-+#CHECK: verllvf %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x28,0x73]
-+#CHECK: verllvf %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x2a,0x73]
-+
-+	verllvf	%v0, %v0, %v0
-+	verllvf	%v0, %v0, %v31
-+	verllvf	%v0, %v31, %v0
-+	verllvf	%v31, %v0, %v0
-+	verllvf	%v18, %v3, %v20
-+
-+#CHECK: verllvg %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x30,0x73]
-+#CHECK: verllvg %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x32,0x73]
-+#CHECK: verllvg %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x34,0x73]
-+#CHECK: verllvg %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x38,0x73]
-+#CHECK: verllvg %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x3a,0x73]
-+
-+	verllvg	%v0, %v0, %v0
-+	verllvg	%v0, %v0, %v31
-+	verllvg	%v0, %v31, %v0
-+	verllvg	%v31, %v0, %v0
-+	verllvg	%v18, %v3, %v20
-+
-+#CHECK: verllvh %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x10,0x73]
-+#CHECK: verllvh %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x12,0x73]
-+#CHECK: verllvh %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x14,0x73]
-+#CHECK: verllvh %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x18,0x73]
-+#CHECK: verllvh %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x1a,0x73]
-+
-+	verllvh	%v0, %v0, %v0
-+	verllvh	%v0, %v0, %v31
-+	verllvh	%v0, %v31, %v0
-+	verllvh	%v31, %v0, %v0
-+	verllvh	%v18, %v3, %v20
-+
-+#CHECK: verllb  %v0, %v0, 0             # encoding: [0xe7,0x00,0x00,0x00,0x00,0x33]
-+#CHECK: verllb  %v0, %v0, 4095          # encoding: [0xe7,0x00,0x0f,0xff,0x00,0x33]
-+#CHECK: verllb  %v0, %v0, 0(%r15)       # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x33]
-+#CHECK: verllb  %v0, %v31, 0            # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x33]
-+#CHECK: verllb  %v31, %v0, 0            # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x33]
-+#CHECK: verllb  %v14, %v17, 1074(%r5)   # encoding: [0xe7,0xe1,0x54,0x32,0x04,0x33]
-+
-+	verllb	%v0, %v0, 0
-+	verllb	%v0, %v0, 4095
-+	verllb	%v0, %v0, 0(%r15)
-+	verllb	%v0, %v31, 0
-+	verllb	%v31, %v0, 0
-+	verllb	%v14, %v17, 1074(%r5)
-+
-+#CHECK: verllf  %v0, %v0, 0             # encoding: [0xe7,0x00,0x00,0x00,0x20,0x33]
-+#CHECK: verllf  %v0, %v0, 4095          # encoding: [0xe7,0x00,0x0f,0xff,0x20,0x33]
-+#CHECK: verllf  %v0, %v0, 0(%r15)       # encoding: [0xe7,0x00,0xf0,0x00,0x20,0x33]
-+#CHECK: verllf  %v0, %v31, 0            # encoding: [0xe7,0x0f,0x00,0x00,0x24,0x33]
-+#CHECK: verllf  %v31, %v0, 0            # encoding: [0xe7,0xf0,0x00,0x00,0x28,0x33]
-+#CHECK: verllf  %v14, %v17, 1074(%r5)   # encoding: [0xe7,0xe1,0x54,0x32,0x24,0x33]
-+
-+	verllf	%v0, %v0, 0
-+	verllf	%v0, %v0, 4095
-+	verllf	%v0, %v0, 0(%r15)
-+	verllf	%v0, %v31, 0
-+	verllf	%v31, %v0, 0
-+	verllf	%v14, %v17, 1074(%r5)
-+
-+#CHECK: verllg  %v0, %v0, 0             # encoding: [0xe7,0x00,0x00,0x00,0x30,0x33]
-+#CHECK: verllg  %v0, %v0, 4095          # encoding: [0xe7,0x00,0x0f,0xff,0x30,0x33]
-+#CHECK: verllg  %v0, %v0, 0(%r15)       # encoding: [0xe7,0x00,0xf0,0x00,0x30,0x33]
-+#CHECK: verllg  %v0, %v31, 0            # encoding: [0xe7,0x0f,0x00,0x00,0x34,0x33]
-+#CHECK: verllg  %v31, %v0, 0            # encoding: [0xe7,0xf0,0x00,0x00,0x38,0x33]
-+#CHECK: verllg  %v14, %v17, 1074(%r5)   # encoding: [0xe7,0xe1,0x54,0x32,0x34,0x33]
-+
-+	verllg	%v0, %v0, 0
-+	verllg	%v0, %v0, 4095
-+	verllg	%v0, %v0, 0(%r15)
-+	verllg	%v0, %v31, 0
-+	verllg	%v31, %v0, 0
-+	verllg	%v14, %v17, 1074(%r5)
-+
-+#CHECK: verllh  %v0, %v0, 0             # encoding: [0xe7,0x00,0x00,0x00,0x10,0x33]
-+#CHECK: verllh  %v0, %v0, 4095          # encoding: [0xe7,0x00,0x0f,0xff,0x10,0x33]
-+#CHECK: verllh  %v0, %v0, 0(%r15)       # encoding: [0xe7,0x00,0xf0,0x00,0x10,0x33]
-+#CHECK: verllh  %v0, %v31, 0            # encoding: [0xe7,0x0f,0x00,0x00,0x14,0x33]
-+#CHECK: verllh  %v31, %v0, 0            # encoding: [0xe7,0xf0,0x00,0x00,0x18,0x33]
-+#CHECK: verllh  %v14, %v17, 1074(%r5)   # encoding: [0xe7,0xe1,0x54,0x32,0x14,0x33]
-+
-+	verllh	%v0, %v0, 0
-+	verllh	%v0, %v0, 4095
-+	verllh	%v0, %v0, 0(%r15)
-+	verllh	%v0, %v31, 0
-+	verllh	%v31, %v0, 0
-+	verllh	%v14, %v17, 1074(%r5)
-+
-+#CHECK: veslvb  %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x00,0x70]
-+#CHECK: veslvb  %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x02,0x70]
-+#CHECK: veslvb  %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x70]
-+#CHECK: veslvb  %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x70]
-+#CHECK: veslvb  %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x0a,0x70]
-+
-+	veslvb	%v0, %v0, %v0
-+	veslvb	%v0, %v0, %v31
-+	veslvb	%v0, %v31, %v0
-+	veslvb	%v31, %v0, %v0
-+	veslvb	%v18, %v3, %v20
-+
-+#CHECK: veslvf  %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x20,0x70]
-+#CHECK: veslvf  %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x22,0x70]
-+#CHECK: veslvf  %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x24,0x70]
-+#CHECK: veslvf  %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x28,0x70]
-+#CHECK: veslvf  %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x2a,0x70]
-+
-+	veslvf	%v0, %v0, %v0
-+	veslvf	%v0, %v0, %v31
-+	veslvf	%v0, %v31, %v0
-+	veslvf	%v31, %v0, %v0
-+	veslvf	%v18, %v3, %v20
-+
-+#CHECK: veslvg  %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x30,0x70]
-+#CHECK: veslvg  %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x32,0x70]
-+#CHECK: veslvg  %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x34,0x70]
-+#CHECK: veslvg  %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x38,0x70]
-+#CHECK: veslvg  %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x3a,0x70]
-+
-+	veslvg	%v0, %v0, %v0
-+	veslvg	%v0, %v0, %v31
-+	veslvg	%v0, %v31, %v0
-+	veslvg	%v31, %v0, %v0
-+	veslvg	%v18, %v3, %v20
-+
-+#CHECK: veslvh  %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x10,0x70]
-+#CHECK: veslvh  %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x12,0x70]
-+#CHECK: veslvh  %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x14,0x70]
-+#CHECK: veslvh  %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x18,0x70]
-+#CHECK: veslvh  %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x1a,0x70]
-+
-+	veslvh	%v0, %v0, %v0
-+	veslvh	%v0, %v0, %v31
-+	veslvh	%v0, %v31, %v0
-+	veslvh	%v31, %v0, %v0
-+	veslvh	%v18, %v3, %v20
-+
-+#CHECK: veslb   %v0, %v0, 0             # encoding: [0xe7,0x00,0x00,0x00,0x00,0x30]
-+#CHECK: veslb   %v0, %v0, 4095          # encoding: [0xe7,0x00,0x0f,0xff,0x00,0x30]
-+#CHECK: veslb   %v0, %v0, 0(%r15)       # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x30]
-+#CHECK: veslb   %v0, %v31, 0            # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x30]
-+#CHECK: veslb   %v31, %v0, 0            # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x30]
-+#CHECK: veslb   %v14, %v17, 1074(%r5)   # encoding: [0xe7,0xe1,0x54,0x32,0x04,0x30]
-+
-+	veslb	%v0, %v0, 0
-+	veslb	%v0, %v0, 4095
-+	veslb	%v0, %v0, 0(%r15)
-+	veslb	%v0, %v31, 0
-+	veslb	%v31, %v0, 0
-+	veslb	%v14, %v17, 1074(%r5)
-+
-+#CHECK: veslf   %v0, %v0, 0             # encoding: [0xe7,0x00,0x00,0x00,0x20,0x30]
-+#CHECK: veslf   %v0, %v0, 4095          # encoding: [0xe7,0x00,0x0f,0xff,0x20,0x30]
-+#CHECK: veslf   %v0, %v0, 0(%r15)       # encoding: [0xe7,0x00,0xf0,0x00,0x20,0x30]
-+#CHECK: veslf   %v0, %v31, 0            # encoding: [0xe7,0x0f,0x00,0x00,0x24,0x30]
-+#CHECK: veslf   %v31, %v0, 0            # encoding: [0xe7,0xf0,0x00,0x00,0x28,0x30]
-+#CHECK: veslf   %v14, %v17, 1074(%r5)   # encoding: [0xe7,0xe1,0x54,0x32,0x24,0x30]
-+
-+	veslf	%v0, %v0, 0
-+	veslf	%v0, %v0, 4095
-+	veslf	%v0, %v0, 0(%r15)
-+	veslf	%v0, %v31, 0
-+	veslf	%v31, %v0, 0
-+	veslf	%v14, %v17, 1074(%r5)
-+
-+#CHECK: veslg   %v0, %v0, 0             # encoding: [0xe7,0x00,0x00,0x00,0x30,0x30]
-+#CHECK: veslg   %v0, %v0, 4095          # encoding: [0xe7,0x00,0x0f,0xff,0x30,0x30]
-+#CHECK: veslg   %v0, %v0, 0(%r15)       # encoding: [0xe7,0x00,0xf0,0x00,0x30,0x30]
-+#CHECK: veslg   %v0, %v31, 0            # encoding: [0xe7,0x0f,0x00,0x00,0x34,0x30]
-+#CHECK: veslg   %v31, %v0, 0            # encoding: [0xe7,0xf0,0x00,0x00,0x38,0x30]
-+#CHECK: veslg   %v14, %v17, 1074(%r5)   # encoding: [0xe7,0xe1,0x54,0x32,0x34,0x30]
-+
-+	veslg	%v0, %v0, 0
-+	veslg	%v0, %v0, 4095
-+	veslg	%v0, %v0, 0(%r15)
-+	veslg	%v0, %v31, 0
-+	veslg	%v31, %v0, 0
-+	veslg	%v14, %v17, 1074(%r5)
-+
-+#CHECK: veslh   %v0, %v0, 0             # encoding: [0xe7,0x00,0x00,0x00,0x10,0x30]
-+#CHECK: veslh   %v0, %v0, 4095          # encoding: [0xe7,0x00,0x0f,0xff,0x10,0x30]
-+#CHECK: veslh   %v0, %v0, 0(%r15)       # encoding: [0xe7,0x00,0xf0,0x00,0x10,0x30]
-+#CHECK: veslh   %v0, %v31, 0            # encoding: [0xe7,0x0f,0x00,0x00,0x14,0x30]
-+#CHECK: veslh   %v31, %v0, 0            # encoding: [0xe7,0xf0,0x00,0x00,0x18,0x30]
-+#CHECK: veslh   %v14, %v17, 1074(%r5)   # encoding: [0xe7,0xe1,0x54,0x32,0x14,0x30]
-+
-+	veslh	%v0, %v0, 0
-+	veslh	%v0, %v0, 4095
-+	veslh	%v0, %v0, 0(%r15)
-+	veslh	%v0, %v31, 0
-+	veslh	%v31, %v0, 0
-+	veslh	%v14, %v17, 1074(%r5)
-+
-+#CHECK: vesravb %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x00,0x7a]
-+#CHECK: vesravb %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x02,0x7a]
-+#CHECK: vesravb %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x7a]
-+#CHECK: vesravb %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x7a]
-+#CHECK: vesravb %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x0a,0x7a]
-+
-+	vesravb	%v0, %v0, %v0
-+	vesravb	%v0, %v0, %v31
-+	vesravb	%v0, %v31, %v0
-+	vesravb	%v31, %v0, %v0
-+	vesravb	%v18, %v3, %v20
-+
-+#CHECK: vesravf %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x20,0x7a]
-+#CHECK: vesravf %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x22,0x7a]
-+#CHECK: vesravf %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x24,0x7a]
-+#CHECK: vesravf %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x28,0x7a]
-+#CHECK: vesravf %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x2a,0x7a]
-+
-+	vesravf	%v0, %v0, %v0
-+	vesravf	%v0, %v0, %v31
-+	vesravf	%v0, %v31, %v0
-+	vesravf	%v31, %v0, %v0
-+	vesravf	%v18, %v3, %v20
-+
-+#CHECK: vesravg %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x30,0x7a]
-+#CHECK: vesravg %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x32,0x7a]
-+#CHECK: vesravg %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x34,0x7a]
-+#CHECK: vesravg %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x38,0x7a]
-+#CHECK: vesravg %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x3a,0x7a]
-+
-+	vesravg	%v0, %v0, %v0
-+	vesravg	%v0, %v0, %v31
-+	vesravg	%v0, %v31, %v0
-+	vesravg	%v31, %v0, %v0
-+	vesravg	%v18, %v3, %v20
-+
-+#CHECK: vesravh %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x10,0x7a]
-+#CHECK: vesravh %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x12,0x7a]
-+#CHECK: vesravh %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x14,0x7a]
-+#CHECK: vesravh %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x18,0x7a]
-+#CHECK: vesravh %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x1a,0x7a]
-+
-+	vesravh	%v0, %v0, %v0
-+	vesravh	%v0, %v0, %v31
-+	vesravh	%v0, %v31, %v0
-+	vesravh	%v31, %v0, %v0
-+	vesravh	%v18, %v3, %v20
-+
-+#CHECK: vesrab  %v0, %v0, 0             # encoding: [0xe7,0x00,0x00,0x00,0x00,0x3a]
-+#CHECK: vesrab  %v0, %v0, 4095          # encoding: [0xe7,0x00,0x0f,0xff,0x00,0x3a]
-+#CHECK: vesrab  %v0, %v0, 0(%r15)       # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x3a]
-+#CHECK: vesrab  %v0, %v31, 0            # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x3a]
-+#CHECK: vesrab  %v31, %v0, 0            # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x3a]
-+#CHECK: vesrab  %v14, %v17, 1074(%r5)   # encoding: [0xe7,0xe1,0x54,0x32,0x04,0x3a]
-+
-+	vesrab	%v0, %v0, 0
-+	vesrab	%v0, %v0, 4095
-+	vesrab	%v0, %v0, 0(%r15)
-+	vesrab	%v0, %v31, 0
-+	vesrab	%v31, %v0, 0
-+	vesrab	%v14, %v17, 1074(%r5)
-+
-+#CHECK: vesraf  %v0, %v0, 0             # encoding: [0xe7,0x00,0x00,0x00,0x20,0x3a]
-+#CHECK: vesraf  %v0, %v0, 4095          # encoding: [0xe7,0x00,0x0f,0xff,0x20,0x3a]
-+#CHECK: vesraf  %v0, %v0, 0(%r15)       # encoding: [0xe7,0x00,0xf0,0x00,0x20,0x3a]
-+#CHECK: vesraf  %v0, %v31, 0            # encoding: [0xe7,0x0f,0x00,0x00,0x24,0x3a]
-+#CHECK: vesraf  %v31, %v0, 0            # encoding: [0xe7,0xf0,0x00,0x00,0x28,0x3a]
-+#CHECK: vesraf  %v14, %v17, 1074(%r5)   # encoding: [0xe7,0xe1,0x54,0x32,0x24,0x3a]
-+
-+	vesraf	%v0, %v0, 0
-+	vesraf	%v0, %v0, 4095
-+	vesraf	%v0, %v0, 0(%r15)
-+	vesraf	%v0, %v31, 0
-+	vesraf	%v31, %v0, 0
-+	vesraf	%v14, %v17, 1074(%r5)
-+
-+#CHECK: vesrag  %v0, %v0, 0             # encoding: [0xe7,0x00,0x00,0x00,0x30,0x3a]
-+#CHECK: vesrag  %v0, %v0, 4095          # encoding: [0xe7,0x00,0x0f,0xff,0x30,0x3a]
-+#CHECK: vesrag  %v0, %v0, 0(%r15)       # encoding: [0xe7,0x00,0xf0,0x00,0x30,0x3a]
-+#CHECK: vesrag  %v0, %v31, 0            # encoding: [0xe7,0x0f,0x00,0x00,0x34,0x3a]
-+#CHECK: vesrag  %v31, %v0, 0            # encoding: [0xe7,0xf0,0x00,0x00,0x38,0x3a]
-+#CHECK: vesrag  %v14, %v17, 1074(%r5)   # encoding: [0xe7,0xe1,0x54,0x32,0x34,0x3a]
-+
-+	vesrag	%v0, %v0, 0
-+	vesrag	%v0, %v0, 4095
-+	vesrag	%v0, %v0, 0(%r15)
-+	vesrag	%v0, %v31, 0
-+	vesrag	%v31, %v0, 0
-+	vesrag	%v14, %v17, 1074(%r5)
-+
-+#CHECK: vesrah  %v0, %v0, 0             # encoding: [0xe7,0x00,0x00,0x00,0x10,0x3a]
-+#CHECK: vesrah  %v0, %v0, 4095          # encoding: [0xe7,0x00,0x0f,0xff,0x10,0x3a]
-+#CHECK: vesrah  %v0, %v0, 0(%r15)       # encoding: [0xe7,0x00,0xf0,0x00,0x10,0x3a]
-+#CHECK: vesrah  %v0, %v31, 0            # encoding: [0xe7,0x0f,0x00,0x00,0x14,0x3a]
-+#CHECK: vesrah  %v31, %v0, 0            # encoding: [0xe7,0xf0,0x00,0x00,0x18,0x3a]
-+#CHECK: vesrah  %v14, %v17, 1074(%r5)   # encoding: [0xe7,0xe1,0x54,0x32,0x14,0x3a]
-+
-+	vesrah	%v0, %v0, 0
-+	vesrah	%v0, %v0, 4095
-+	vesrah	%v0, %v0, 0(%r15)
-+	vesrah	%v0, %v31, 0
-+	vesrah	%v31, %v0, 0
-+	vesrah	%v14, %v17, 1074(%r5)
-+
-+#CHECK: vesrlvb %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x00,0x78]
-+#CHECK: vesrlvb %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x02,0x78]
-+#CHECK: vesrlvb %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x78]
-+#CHECK: vesrlvb %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x78]
-+#CHECK: vesrlvb %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x0a,0x78]
-+
-+	vesrlvb	%v0, %v0, %v0
-+	vesrlvb	%v0, %v0, %v31
-+	vesrlvb	%v0, %v31, %v0
-+	vesrlvb	%v31, %v0, %v0
-+	vesrlvb	%v18, %v3, %v20
-+
-+#CHECK: vesrlvf %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x20,0x78]
-+#CHECK: vesrlvf %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x22,0x78]
-+#CHECK: vesrlvf %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x24,0x78]
-+#CHECK: vesrlvf %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x28,0x78]
-+#CHECK: vesrlvf %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x2a,0x78]
-+
-+	vesrlvf	%v0, %v0, %v0
-+	vesrlvf	%v0, %v0, %v31
-+	vesrlvf	%v0, %v31, %v0
-+	vesrlvf	%v31, %v0, %v0
-+	vesrlvf	%v18, %v3, %v20
-+
-+#CHECK: vesrlvg %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x30,0x78]
-+#CHECK: vesrlvg %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x32,0x78]
-+#CHECK: vesrlvg %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x34,0x78]
-+#CHECK: vesrlvg %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x38,0x78]
-+#CHECK: vesrlvg %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x3a,0x78]
-+
-+	vesrlvg	%v0, %v0, %v0
-+	vesrlvg	%v0, %v0, %v31
-+	vesrlvg	%v0, %v31, %v0
-+	vesrlvg	%v31, %v0, %v0
-+	vesrlvg	%v18, %v3, %v20
-+
-+#CHECK: vesrlvh %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x10,0x78]
-+#CHECK: vesrlvh %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x12,0x78]
-+#CHECK: vesrlvh %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x14,0x78]
-+#CHECK: vesrlvh %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x18,0x78]
-+#CHECK: vesrlvh %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x1a,0x78]
-+
-+	vesrlvh	%v0, %v0, %v0
-+	vesrlvh	%v0, %v0, %v31
-+	vesrlvh	%v0, %v31, %v0
-+	vesrlvh	%v31, %v0, %v0
-+	vesrlvh	%v18, %v3, %v20
-+
-+#CHECK: vesrlb  %v0, %v0, 0             # encoding: [0xe7,0x00,0x00,0x00,0x00,0x38]
-+#CHECK: vesrlb  %v0, %v0, 4095          # encoding: [0xe7,0x00,0x0f,0xff,0x00,0x38]
-+#CHECK: vesrlb  %v0, %v0, 0(%r15)       # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x38]
-+#CHECK: vesrlb  %v0, %v31, 0            # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x38]
-+#CHECK: vesrlb  %v31, %v0, 0            # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x38]
-+#CHECK: vesrlb  %v14, %v17, 1074(%r5)   # encoding: [0xe7,0xe1,0x54,0x32,0x04,0x38]
-+
-+	vesrlb	%v0, %v0, 0
-+	vesrlb	%v0, %v0, 4095
-+	vesrlb	%v0, %v0, 0(%r15)
-+	vesrlb	%v0, %v31, 0
-+	vesrlb	%v31, %v0, 0
-+	vesrlb	%v14, %v17, 1074(%r5)
-+
-+#CHECK: vesrlf  %v0, %v0, 0             # encoding: [0xe7,0x00,0x00,0x00,0x20,0x38]
-+#CHECK: vesrlf  %v0, %v0, 4095          # encoding: [0xe7,0x00,0x0f,0xff,0x20,0x38]
-+#CHECK: vesrlf  %v0, %v0, 0(%r15)       # encoding: [0xe7,0x00,0xf0,0x00,0x20,0x38]
-+#CHECK: vesrlf  %v0, %v31, 0            # encoding: [0xe7,0x0f,0x00,0x00,0x24,0x38]
-+#CHECK: vesrlf  %v31, %v0, 0            # encoding: [0xe7,0xf0,0x00,0x00,0x28,0x38]
-+#CHECK: vesrlf  %v14, %v17, 1074(%r5)   # encoding: [0xe7,0xe1,0x54,0x32,0x24,0x38]
-+
-+	vesrlf	%v0, %v0, 0
-+	vesrlf	%v0, %v0, 4095
-+	vesrlf	%v0, %v0, 0(%r15)
-+	vesrlf	%v0, %v31, 0
-+	vesrlf	%v31, %v0, 0
-+	vesrlf	%v14, %v17, 1074(%r5)
-+
-+#CHECK: vesrlg  %v0, %v0, 0             # encoding: [0xe7,0x00,0x00,0x00,0x30,0x38]
-+#CHECK: vesrlg  %v0, %v0, 4095          # encoding: [0xe7,0x00,0x0f,0xff,0x30,0x38]
-+#CHECK: vesrlg  %v0, %v0, 0(%r15)       # encoding: [0xe7,0x00,0xf0,0x00,0x30,0x38]
-+#CHECK: vesrlg  %v0, %v31, 0            # encoding: [0xe7,0x0f,0x00,0x00,0x34,0x38]
-+#CHECK: vesrlg  %v31, %v0, 0            # encoding: [0xe7,0xf0,0x00,0x00,0x38,0x38]
-+#CHECK: vesrlg  %v14, %v17, 1074(%r5)   # encoding: [0xe7,0xe1,0x54,0x32,0x34,0x38]
-+
-+	vesrlg	%v0, %v0, 0
-+	vesrlg	%v0, %v0, 4095
-+	vesrlg	%v0, %v0, 0(%r15)
-+	vesrlg	%v0, %v31, 0
-+	vesrlg	%v31, %v0, 0
-+	vesrlg	%v14, %v17, 1074(%r5)
-+
-+#CHECK: vesrlh  %v0, %v0, 0             # encoding: [0xe7,0x00,0x00,0x00,0x10,0x38]
-+#CHECK: vesrlh  %v0, %v0, 4095          # encoding: [0xe7,0x00,0x0f,0xff,0x10,0x38]
-+#CHECK: vesrlh  %v0, %v0, 0(%r15)       # encoding: [0xe7,0x00,0xf0,0x00,0x10,0x38]
-+#CHECK: vesrlh  %v0, %v31, 0            # encoding: [0xe7,0x0f,0x00,0x00,0x14,0x38]
-+#CHECK: vesrlh  %v31, %v0, 0            # encoding: [0xe7,0xf0,0x00,0x00,0x18,0x38]
-+#CHECK: vesrlh  %v14, %v17, 1074(%r5)   # encoding: [0xe7,0xe1,0x54,0x32,0x14,0x38]
-+
-+	vesrlh	%v0, %v0, 0
-+	vesrlh	%v0, %v0, 4095
-+	vesrlh	%v0, %v0, 0(%r15)
-+	vesrlh	%v0, %v31, 0
-+	vesrlh	%v31, %v0, 0
-+	vesrlh	%v14, %v17, 1074(%r5)
-+
-+#CHECK: vfadb   %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x30,0xe3]
-+#CHECK: vfadb   %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x32,0xe3]
-+#CHECK: vfadb   %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xe3]
-+#CHECK: vfadb   %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xe3]
-+#CHECK: vfadb   %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x3a,0xe3]
-+
-+	vfadb	%v0, %v0, %v0
-+	vfadb	%v0, %v0, %v31
-+	vfadb	%v0, %v31, %v0
-+	vfadb	%v31, %v0, %v0
-+	vfadb	%v18, %v3, %v20
-+
-+#CHECK: vfaeb   %v0, %v0, %v0, 0        # encoding: [0xe7,0x00,0x00,0x00,0x00,0x82]
-+#CHECK: vfaeb   %v0, %v0, %v0, 0        # encoding: [0xe7,0x00,0x00,0x00,0x00,0x82]
-+#CHECK: vfaeb   %v0, %v0, %v0, 12       # encoding: [0xe7,0x00,0x00,0xc0,0x00,0x82]
-+#CHECK: vfaeb   %v0, %v0, %v15, 0       # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x82]
-+#CHECK: vfaeb   %v0, %v0, %v31, 0       # encoding: [0xe7,0x00,0xf0,0x00,0x02,0x82]
-+#CHECK: vfaeb   %v0, %v15, %v0, 0       # encoding: [0xe7,0x0f,0x00,0x00,0x00,0x82]
-+#CHECK: vfaeb   %v0, %v31, %v0, 0       # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x82]
-+#CHECK: vfaeb   %v15, %v0, %v0, 0       # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x82]
-+#CHECK: vfaeb   %v31, %v0, %v0, 0       # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x82]
-+#CHECK: vfaeb   %v18, %v3, %v20, 4      # encoding: [0xe7,0x23,0x40,0x40,0x0a,0x82]
-+#CHECK: vfaeb   %v18, %v3, %v20, 15     # encoding: [0xe7,0x23,0x40,0xf0,0x0a,0x82]
-+#CHECK: vfaebs  %v18, %v3, %v20, 8      # encoding: [0xe7,0x23,0x40,0x90,0x0a,0x82]
-+#CHECK: vfaezb  %v18, %v3, %v20, 4      # encoding: [0xe7,0x23,0x40,0x60,0x0a,0x82]
-+#CHECK: vfaezbs %v18, %v3, %v20, 8      # encoding: [0xe7,0x23,0x40,0xb0,0x0a,0x82]
-+#CHECK: vfaezbs %v18, %v3, %v20, 15     # encoding: [0xe7,0x23,0x40,0xf0,0x0a,0x82]
-+
-+	vfaeb	%v0, %v0, %v0
-+	vfaeb	%v0, %v0, %v0, 0
-+	vfaeb	%v0, %v0, %v0, 12
-+	vfaeb	%v0, %v0, %v15
-+	vfaeb	%v0, %v0, %v31
-+	vfaeb	%v0, %v15, %v0
-+	vfaeb	%v0, %v31, %v0
-+	vfaeb	%v15, %v0, %v0
-+	vfaeb	%v31, %v0, %v0
-+	vfaeb	%v18, %v3, %v20, 4
-+	vfaeb	%v18, %v3, %v20, 15
-+	vfaebs	%v18, %v3, %v20, 8
-+	vfaezb	%v18, %v3, %v20, 4
-+	vfaezbs	%v18, %v3, %v20, 8
-+	vfaezbs	%v18, %v3, %v20, 15
-+
-+#CHECK: vfaef   %v0, %v0, %v0, 0        # encoding: [0xe7,0x00,0x00,0x00,0x20,0x82]
-+#CHECK: vfaef   %v0, %v0, %v0, 0        # encoding: [0xe7,0x00,0x00,0x00,0x20,0x82]
-+#CHECK: vfaef   %v0, %v0, %v0, 12       # encoding: [0xe7,0x00,0x00,0xc0,0x20,0x82]
-+#CHECK: vfaef   %v0, %v0, %v15, 0       # encoding: [0xe7,0x00,0xf0,0x00,0x20,0x82]
-+#CHECK: vfaef   %v0, %v0, %v31, 0       # encoding: [0xe7,0x00,0xf0,0x00,0x22,0x82]
-+#CHECK: vfaef   %v0, %v15, %v0, 0       # encoding: [0xe7,0x0f,0x00,0x00,0x20,0x82]
-+#CHECK: vfaef   %v0, %v31, %v0, 0       # encoding: [0xe7,0x0f,0x00,0x00,0x24,0x82]
-+#CHECK: vfaef   %v15, %v0, %v0, 0       # encoding: [0xe7,0xf0,0x00,0x00,0x20,0x82]
-+#CHECK: vfaef   %v31, %v0, %v0, 0       # encoding: [0xe7,0xf0,0x00,0x00,0x28,0x82]
-+#CHECK: vfaef   %v18, %v3, %v20, 4      # encoding: [0xe7,0x23,0x40,0x40,0x2a,0x82]
-+#CHECK: vfaef   %v18, %v3, %v20, 15     # encoding: [0xe7,0x23,0x40,0xf0,0x2a,0x82]
-+#CHECK: vfaefs  %v18, %v3, %v20, 8      # encoding: [0xe7,0x23,0x40,0x90,0x2a,0x82]
-+#CHECK: vfaezf  %v18, %v3, %v20, 4      # encoding: [0xe7,0x23,0x40,0x60,0x2a,0x82]
-+#CHECK: vfaezfs %v18, %v3, %v20, 8      # encoding: [0xe7,0x23,0x40,0xb0,0x2a,0x82]
-+#CHECK: vfaezfs %v18, %v3, %v20, 15     # encoding: [0xe7,0x23,0x40,0xf0,0x2a,0x82]
-+
-+	vfaef	%v0, %v0, %v0
-+	vfaef	%v0, %v0, %v0, 0
-+	vfaef	%v0, %v0, %v0, 12
-+	vfaef	%v0, %v0, %v15
-+	vfaef	%v0, %v0, %v31
-+	vfaef	%v0, %v15, %v0
-+	vfaef	%v0, %v31, %v0
-+	vfaef	%v15, %v0, %v0
-+	vfaef	%v31, %v0, %v0
-+	vfaef	%v18, %v3, %v20, 4
-+	vfaef	%v18, %v3, %v20, 15
-+	vfaefs	%v18, %v3, %v20, 8
-+	vfaezf	%v18, %v3, %v20, 4
-+	vfaezfs	%v18, %v3, %v20, 8
-+	vfaezfs	%v18, %v3, %v20, 15
-+
-+#CHECK: vfaeh   %v0, %v0, %v0, 0        # encoding: [0xe7,0x00,0x00,0x00,0x10,0x82]
-+#CHECK: vfaeh   %v0, %v0, %v0, 0        # encoding: [0xe7,0x00,0x00,0x00,0x10,0x82]
-+#CHECK: vfaeh   %v0, %v0, %v0, 12       # encoding: [0xe7,0x00,0x00,0xc0,0x10,0x82]
-+#CHECK: vfaeh   %v0, %v0, %v15, 0       # encoding: [0xe7,0x00,0xf0,0x00,0x10,0x82]
-+#CHECK: vfaeh   %v0, %v0, %v31, 0       # encoding: [0xe7,0x00,0xf0,0x00,0x12,0x82]
-+#CHECK: vfaeh   %v0, %v15, %v0, 0       # encoding: [0xe7,0x0f,0x00,0x00,0x10,0x82]
-+#CHECK: vfaeh   %v0, %v31, %v0, 0       # encoding: [0xe7,0x0f,0x00,0x00,0x14,0x82]
-+#CHECK: vfaeh   %v15, %v0, %v0, 0       # encoding: [0xe7,0xf0,0x00,0x00,0x10,0x82]
-+#CHECK: vfaeh   %v31, %v0, %v0, 0       # encoding: [0xe7,0xf0,0x00,0x00,0x18,0x82]
-+#CHECK: vfaeh   %v18, %v3, %v20, 4      # encoding: [0xe7,0x23,0x40,0x40,0x1a,0x82]
-+#CHECK: vfaeh   %v18, %v3, %v20, 15     # encoding: [0xe7,0x23,0x40,0xf0,0x1a,0x82]
-+#CHECK: vfaehs  %v18, %v3, %v20, 8      # encoding: [0xe7,0x23,0x40,0x90,0x1a,0x82]
-+#CHECK: vfaezh  %v18, %v3, %v20, 4      # encoding: [0xe7,0x23,0x40,0x60,0x1a,0x82]
-+#CHECK: vfaezhs %v18, %v3, %v20, 8      # encoding: [0xe7,0x23,0x40,0xb0,0x1a,0x82]
-+#CHECK: vfaezhs %v18, %v3, %v20, 15     # encoding: [0xe7,0x23,0x40,0xf0,0x1a,0x82]
-+
-+	vfaeh	%v0, %v0, %v0
-+	vfaeh	%v0, %v0, %v0, 0
-+	vfaeh	%v0, %v0, %v0, 12
-+	vfaeh	%v0, %v0, %v15
-+	vfaeh	%v0, %v0, %v31
-+	vfaeh	%v0, %v15, %v0
-+	vfaeh	%v0, %v31, %v0
-+	vfaeh	%v15, %v0, %v0
-+	vfaeh	%v31, %v0, %v0
-+	vfaeh	%v18, %v3, %v20, 4
-+	vfaeh	%v18, %v3, %v20, 15
-+	vfaehs	%v18, %v3, %v20, 8
-+	vfaezh	%v18, %v3, %v20, 4
-+	vfaezhs	%v18, %v3, %v20, 8
-+	vfaezhs	%v18, %v3, %v20, 15
-+
-+#CHECK: vfcedb  %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x30,0xe8]
-+#CHECK: vfcedb  %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x32,0xe8]
-+#CHECK: vfcedb  %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xe8]
-+#CHECK: vfcedb  %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xe8]
-+#CHECK: vfcedb  %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x3a,0xe8]
-+
-+	vfcedb	%v0, %v0, %v0
-+	vfcedb	%v0, %v0, %v31
-+	vfcedb	%v0, %v31, %v0
-+	vfcedb	%v31, %v0, %v0
-+	vfcedb	%v18, %v3, %v20
-+
-+#CHECK: vfcedbs %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x10,0x30,0xe8]
-+#CHECK: vfcedbs %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x10,0x32,0xe8]
-+#CHECK: vfcedbs %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x10,0x34,0xe8]
-+#CHECK: vfcedbs %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x10,0x38,0xe8]
-+#CHECK: vfcedbs %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x10,0x3a,0xe8]
-+
-+	vfcedbs	%v0, %v0, %v0
-+	vfcedbs	%v0, %v0, %v31
-+	vfcedbs	%v0, %v31, %v0
-+	vfcedbs	%v31, %v0, %v0
-+	vfcedbs	%v18, %v3, %v20
-+
-+#CHECK: vfchdb  %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x30,0xeb]
-+#CHECK: vfchdb  %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x32,0xeb]
-+#CHECK: vfchdb  %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xeb]
-+#CHECK: vfchdb  %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xeb]
-+#CHECK: vfchdb  %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x3a,0xeb]
-+
-+	vfchdb	%v0, %v0, %v0
-+	vfchdb	%v0, %v0, %v31
-+	vfchdb	%v0, %v31, %v0
-+	vfchdb	%v31, %v0, %v0
-+	vfchdb	%v18, %v3, %v20
-+
-+#CHECK: vfchdbs %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x10,0x30,0xeb]
-+#CHECK: vfchdbs %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x10,0x32,0xeb]
-+#CHECK: vfchdbs %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x10,0x34,0xeb]
-+#CHECK: vfchdbs %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x10,0x38,0xeb]
-+#CHECK: vfchdbs %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x10,0x3a,0xeb]
-+
-+	vfchdbs	%v0, %v0, %v0
-+	vfchdbs	%v0, %v0, %v31
-+	vfchdbs	%v0, %v31, %v0
-+	vfchdbs	%v31, %v0, %v0
-+	vfchdbs	%v18, %v3, %v20
-+
-+#CHECK: vfchedb %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x30,0xea]
-+#CHECK: vfchedb %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x32,0xea]
-+#CHECK: vfchedb %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xea]
-+#CHECK: vfchedb %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xea]
-+#CHECK: vfchedb %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x3a,0xea]
-+
-+	vfchedb	%v0, %v0, %v0
-+	vfchedb	%v0, %v0, %v31
-+	vfchedb	%v0, %v31, %v0
-+	vfchedb	%v31, %v0, %v0
-+	vfchedb	%v18, %v3, %v20
-+
-+#CHECK: vfchedbs %v0, %v0, %v0          # encoding: [0xe7,0x00,0x00,0x10,0x30,0xea]
-+#CHECK: vfchedbs %v0, %v0, %v31         # encoding: [0xe7,0x00,0xf0,0x10,0x32,0xea]
-+#CHECK: vfchedbs %v0, %v31, %v0         # encoding: [0xe7,0x0f,0x00,0x10,0x34,0xea]
-+#CHECK: vfchedbs %v31, %v0, %v0         # encoding: [0xe7,0xf0,0x00,0x10,0x38,0xea]
-+#CHECK: vfchedbs %v18, %v3, %v20        # encoding: [0xe7,0x23,0x40,0x10,0x3a,0xea]
-+
-+	vfchedbs %v0, %v0, %v0
-+	vfchedbs %v0, %v0, %v31
-+	vfchedbs %v0, %v31, %v0
-+	vfchedbs %v31, %v0, %v0
-+	vfchedbs %v18, %v3, %v20
-+
-+#CHECK: vfddb   %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x30,0xe5]
-+#CHECK: vfddb   %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x32,0xe5]
-+#CHECK: vfddb   %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xe5]
-+#CHECK: vfddb   %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xe5]
-+#CHECK: vfddb   %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x3a,0xe5]
-+
-+	vfddb	%v0, %v0, %v0
-+	vfddb	%v0, %v0, %v31
-+	vfddb	%v0, %v31, %v0
-+	vfddb	%v31, %v0, %v0
-+	vfddb	%v18, %v3, %v20
-+
-+#CHECK: vfeeb   %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x00,0x80]
-+#CHECK: vfeeb   %v0, %v0, %v15          # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x80]
-+#CHECK: vfeeb   %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x02,0x80]
-+#CHECK: vfeeb   %v0, %v15, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x00,0x80]
-+#CHECK: vfeeb   %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x80]
-+#CHECK: vfeeb   %v15, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x80]
-+#CHECK: vfeeb   %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x80]
-+#CHECK: vfeeb   %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x0a,0x80]
-+#CHECK: vfeebs  %v5, %v22, %v7          # encoding: [0xe7,0x56,0x70,0x10,0x04,0x80]
-+#CHECK: vfeezb  %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x20,0x0a,0x80]
-+#CHECK: vfeezbs %v5, %v22, %v7          # encoding: [0xe7,0x56,0x70,0x30,0x04,0x80]
-+
-+	vfeeb	%v0, %v0, %v0
-+	vfeeb	%v0, %v0, %v15
-+	vfeeb	%v0, %v0, %v31
-+	vfeeb	%v0, %v15, %v0
-+	vfeeb	%v0, %v31, %v0
-+	vfeeb	%v15, %v0, %v0
-+	vfeeb	%v31, %v0, %v0
-+	vfeeb	%v18, %v3, %v20
-+	vfeebs	%v5, %v22, %v7
-+	vfeezb	%v18, %v3, %v20
-+	vfeezbs	%v5, %v22, %v7
-+
-+#CFECK: vfeef   %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x20,0x80]
-+#CFECK: vfeef   %v0, %v0, %v15          # encoding: [0xe7,0x00,0xf0,0x00,0x20,0x80]
-+#CFECK: vfeef   %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x22,0x80]
-+#CFECK: vfeef   %v0, %v15, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x20,0x80]
-+#CFECK: vfeef   %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x24,0x80]
-+#CFECK: vfeef   %v15, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x20,0x80]
-+#CFECK: vfeef   %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x28,0x80]
-+#CFECK: vfeef   %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x2a,0x80]
-+#CFECK: vfeefs  %v5, %v22, %v7          # encoding: [0xe7,0x56,0x70,0x10,0x24,0x80]
-+#CFECK: vfeezf  %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x20,0x2a,0x80]
-+#CFECK: vfeezfs %v5, %v22, %v7          # encoding: [0xe7,0x56,0x70,0x30,0x24,0x80]
-+
-+	vfeef	%v0, %v0, %v0
-+	vfeef	%v0, %v0, %v15
-+	vfeef	%v0, %v0, %v31
-+	vfeef	%v0, %v15, %v0
-+	vfeef	%v0, %v31, %v0
-+	vfeef	%v15, %v0, %v0
-+	vfeef	%v31, %v0, %v0
-+	vfeef	%v18, %v3, %v20
-+	vfeefs	%v5, %v22, %v7
-+	vfeezf	%v18, %v3, %v20
-+	vfeezfs	%v5, %v22, %v7
-+
-+#CHECK: vfeeh   %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x10,0x80]
-+#CHECK: vfeeh   %v0, %v0, %v15          # encoding: [0xe7,0x00,0xf0,0x00,0x10,0x80]
-+#CHECK: vfeeh   %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x12,0x80]
-+#CHECK: vfeeh   %v0, %v15, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x10,0x80]
-+#CHECK: vfeeh   %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x14,0x80]
-+#CHECK: vfeeh   %v15, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x10,0x80]
-+#CHECK: vfeeh   %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x18,0x80]
-+#CHECK: vfeeh   %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x1a,0x80]
-+#CHECK: vfeehs  %v5, %v22, %v7          # encoding: [0xe7,0x56,0x70,0x10,0x14,0x80]
-+#CHECK: vfeezh  %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x20,0x1a,0x80]
-+#CHECK: vfeezhs %v5, %v22, %v7          # encoding: [0xe7,0x56,0x70,0x30,0x14,0x80]
-+
-+	vfeeh	%v0, %v0, %v0
-+	vfeeh	%v0, %v0, %v15
-+	vfeeh	%v0, %v0, %v31
-+	vfeeh	%v0, %v15, %v0
-+	vfeeh	%v0, %v31, %v0
-+	vfeeh	%v15, %v0, %v0
-+	vfeeh	%v31, %v0, %v0
-+	vfeeh	%v18, %v3, %v20
-+	vfeehs	%v5, %v22, %v7
-+	vfeezh	%v18, %v3, %v20
-+	vfeezhs	%v5, %v22, %v7
-+
-+#CHECK: vfeneb   %v0, %v0, %v0          # encoding: [0xe7,0x00,0x00,0x00,0x00,0x81]
-+#CHECK: vfeneb   %v0, %v0, %v15         # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x81]
-+#CHECK: vfeneb   %v0, %v0, %v31         # encoding: [0xe7,0x00,0xf0,0x00,0x02,0x81]
-+#CHECK: vfeneb   %v0, %v15, %v0         # encoding: [0xe7,0x0f,0x00,0x00,0x00,0x81]
-+#CHECK: vfeneb   %v0, %v31, %v0         # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x81]
-+#CHECK: vfeneb   %v15, %v0, %v0         # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x81]
-+#CHECK: vfeneb   %v31, %v0, %v0         # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x81]
-+#CHECK: vfeneb   %v18, %v3, %v20        # encoding: [0xe7,0x23,0x40,0x00,0x0a,0x81]
-+#CHECK: vfenebs  %v5, %v22, %v7         # encoding: [0xe7,0x56,0x70,0x10,0x04,0x81]
-+#CHECK: vfenezb  %v18, %v3, %v20        # encoding: [0xe7,0x23,0x40,0x20,0x0a,0x81]
-+#CHECK: vfenezbs %v5, %v22, %v7         # encoding: [0xe7,0x56,0x70,0x30,0x04,0x81]
-+
-+	vfeneb   %v0, %v0, %v0
-+	vfeneb   %v0, %v0, %v15
-+	vfeneb   %v0, %v0, %v31
-+	vfeneb   %v0, %v15, %v0
-+	vfeneb   %v0, %v31, %v0
-+	vfeneb   %v15, %v0, %v0
-+	vfeneb   %v31, %v0, %v0
-+	vfeneb   %v18, %v3, %v20
-+	vfenebs  %v5, %v22, %v7
-+	vfenezb  %v18, %v3, %v20
-+	vfenezbs %v5, %v22, %v7
-+
-+#CFECK: vfenef   %v0, %v0, %v0          # encoding: [0xe7,0x00,0x00,0x00,0x20,0x81]
-+#CFECK: vfenef   %v0, %v0, %v15         # encoding: [0xe7,0x00,0xf0,0x00,0x20,0x81]
-+#CFECK: vfenef   %v0, %v0, %v31         # encoding: [0xe7,0x00,0xf0,0x00,0x22,0x81]
-+#CFECK: vfenef   %v0, %v15, %v0         # encoding: [0xe7,0x0f,0x00,0x00,0x20,0x81]
-+#CFECK: vfenef   %v0, %v31, %v0         # encoding: [0xe7,0x0f,0x00,0x00,0x24,0x81]
-+#CFECK: vfenef   %v15, %v0, %v0         # encoding: [0xe7,0xf0,0x00,0x00,0x20,0x81]
-+#CFECK: vfenef   %v31, %v0, %v0         # encoding: [0xe7,0xf0,0x00,0x00,0x28,0x81]
-+#CFECK: vfenef   %v18, %v3, %v20        # encoding: [0xe7,0x23,0x40,0x00,0x2a,0x81]
-+#CFECK: vfenefs  %v5, %v22, %v7         # encoding: [0xe7,0x56,0x70,0x10,0x24,0x81]
-+#CFECK: vfenezf  %v18, %v3, %v20        # encoding: [0xe7,0x23,0x40,0x20,0x2a,0x81]
-+#CFECK: vfenezfs %v5, %v22, %v7         # encoding: [0xe7,0x56,0x70,0x30,0x24,0x81]
-+
-+	vfenef   %v0, %v0, %v0
-+	vfenef   %v0, %v0, %v15
-+	vfenef   %v0, %v0, %v31
-+	vfenef   %v0, %v15, %v0
-+	vfenef   %v0, %v31, %v0
-+	vfenef   %v15, %v0, %v0
-+	vfenef   %v31, %v0, %v0
-+	vfenef   %v18, %v3, %v20
-+	vfenefs  %v5, %v22, %v7
-+	vfenezf  %v18, %v3, %v20
-+	vfenezfs %v5, %v22, %v7
-+
-+#CHECK: vfeneh   %v0, %v0, %v0          # encoding: [0xe7,0x00,0x00,0x00,0x10,0x81]
-+#CHECK: vfeneh   %v0, %v0, %v15         # encoding: [0xe7,0x00,0xf0,0x00,0x10,0x81]
-+#CHECK: vfeneh   %v0, %v0, %v31         # encoding: [0xe7,0x00,0xf0,0x00,0x12,0x81]
-+#CHECK: vfeneh   %v0, %v15, %v0         # encoding: [0xe7,0x0f,0x00,0x00,0x10,0x81]
-+#CHECK: vfeneh   %v0, %v31, %v0         # encoding: [0xe7,0x0f,0x00,0x00,0x14,0x81]
-+#CHECK: vfeneh   %v15, %v0, %v0         # encoding: [0xe7,0xf0,0x00,0x00,0x10,0x81]
-+#CHECK: vfeneh   %v31, %v0, %v0         # encoding: [0xe7,0xf0,0x00,0x00,0x18,0x81]
-+#CHECK: vfeneh   %v18, %v3, %v20        # encoding: [0xe7,0x23,0x40,0x00,0x1a,0x81]
-+#CHECK: vfenehs  %v5, %v22, %v7         # encoding: [0xe7,0x56,0x70,0x10,0x14,0x81]
-+#CHECK: vfenezh  %v18, %v3, %v20        # encoding: [0xe7,0x23,0x40,0x20,0x1a,0x81]
-+#CHECK: vfenezhs %v5, %v22, %v7         # encoding: [0xe7,0x56,0x70,0x30,0x14,0x81]
-+
-+	vfeneh   %v0, %v0, %v0
-+	vfeneh   %v0, %v0, %v15
-+	vfeneh   %v0, %v0, %v31
-+	vfeneh   %v0, %v15, %v0
-+	vfeneh   %v0, %v31, %v0
-+	vfeneh   %v15, %v0, %v0
-+	vfeneh   %v31, %v0, %v0
-+	vfeneh   %v18, %v3, %v20
-+	vfenehs  %v5, %v22, %v7
-+	vfenezh  %v18, %v3, %v20
-+	vfenezhs %v5, %v22, %v7
-+
-+#CHECK: vfidb   %v0, %v0, 0, 0          # encoding: [0xe7,0x00,0x00,0x00,0x30,0xc7]
-+#CHECK: vfidb   %v0, %v0, 0, 15         # encoding: [0xe7,0x00,0x00,0xf0,0x30,0xc7]
-+#CHECK: vfidb   %v0, %v0, 4, 0          # encoding: [0xe7,0x00,0x00,0x04,0x30,0xc7]
-+#CHECK: vfidb   %v0, %v0, 12, 0         # encoding: [0xe7,0x00,0x00,0x0c,0x30,0xc7]
-+#CHECK: vfidb   %v0, %v31, 0, 0         # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xc7]
-+#CHECK: vfidb   %v31, %v0, 0, 0         # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xc7]
-+#CHECK: vfidb   %v14, %v17, 4, 10       # encoding: [0xe7,0xe1,0x00,0xa4,0x34,0xc7]
-+
-+	vfidb	%v0, %v0, 0, 0
-+	vfidb	%v0, %v0, 0, 15
-+	vfidb	%v0, %v0, 4, 0
-+	vfidb	%v0, %v0, 12, 0
-+	vfidb	%v0, %v31, 0, 0
-+	vfidb	%v31, %v0, 0, 0
-+	vfidb	%v14, %v17, 4, 10
-+
-+#CHECK: vistrb   %v0, %v0               # encoding: [0xe7,0x00,0x00,0x00,0x00,0x5c]
-+#CHECK: vistrb   %v0, %v15              # encoding: [0xe7,0x0f,0x00,0x00,0x00,0x5c]
-+#CHECK: vistrb   %v0, %v31              # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x5c]
-+#CHECK: vistrb   %v15, %v0              # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x5c]
-+#CHECK: vistrb   %v31, %v0              # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x5c]
-+#CHECK: vistrb   %v18, %v3              # encoding: [0xe7,0x23,0x00,0x00,0x08,0x5c]
-+#CHECK: vistrbs  %v5, %v22              # encoding: [0xe7,0x56,0x00,0x10,0x04,0x5c]
-+
-+	vistrb   %v0, %v0
-+	vistrb   %v0, %v15
-+	vistrb   %v0, %v31
-+	vistrb   %v15, %v0
-+	vistrb   %v31, %v0
-+	vistrb   %v18, %v3
-+	vistrbs  %v5, %v22
-+
-+#CBECK: vistrf   %v0, %v0               # encoding: [0xe7,0x00,0x00,0x00,0x20,0x5c]
-+#CBECK: vistrf   %v0, %v15              # encoding: [0xe7,0x0f,0x00,0x00,0x20,0x5c]
-+#CBECK: vistrf   %v0, %v31              # encoding: [0xe7,0x0f,0x00,0x00,0x24,0x5c]
-+#CBECK: vistrf   %v15, %v0              # encoding: [0xe7,0xf0,0x00,0x00,0x20,0x5c]
-+#CBECK: vistrf   %v31, %v0              # encoding: [0xe7,0xf0,0x00,0x00,0x28,0x5c]
-+#CBECK: vistrf   %v18, %v3              # encoding: [0xe7,0x23,0x00,0x00,0x28,0x5c]
-+#CBECK: vistrfs  %v5, %v22              # encoding: [0xe7,0x56,0x00,0x10,0x24,0x5c]
-+
-+	vistrf   %v0, %v0
-+	vistrf   %v0, %v15
-+	vistrf   %v0, %v31
-+	vistrf   %v15, %v0
-+	vistrf   %v31, %v0
-+	vistrf   %v18, %v3
-+	vistrfs  %v5, %v22
-+
-+#CHECK: vistrh   %v0, %v0               # encoding: [0xe7,0x00,0x00,0x00,0x10,0x5c]
-+#CHECK: vistrh   %v0, %v15              # encoding: [0xe7,0x0f,0x00,0x00,0x10,0x5c]
-+#CHECK: vistrh   %v0, %v31              # encoding: [0xe7,0x0f,0x00,0x00,0x14,0x5c]
-+#CHECK: vistrh   %v15, %v0              # encoding: [0xe7,0xf0,0x00,0x00,0x10,0x5c]
-+#CHECK: vistrh   %v31, %v0              # encoding: [0xe7,0xf0,0x00,0x00,0x18,0x5c]
-+#CHECK: vistrh   %v18, %v3              # encoding: [0xe7,0x23,0x00,0x00,0x18,0x5c]
-+#CHECK: vistrhs  %v5, %v22              # encoding: [0xe7,0x56,0x00,0x10,0x14,0x5c]
-+
-+	vistrh   %v0, %v0
-+	vistrh   %v0, %v15
-+	vistrh   %v0, %v31
-+	vistrh   %v15, %v0
-+	vistrh   %v31, %v0
-+	vistrh   %v18, %v3
-+	vistrhs  %v5, %v22
-+
-+#CHECK: vflcdb  %v0, %v0                # encoding: [0xe7,0x00,0x00,0x00,0x30,0xcc]
-+#CHECK: vflcdb  %v0, %v15               # encoding: [0xe7,0x0f,0x00,0x00,0x30,0xcc]
-+#CHECK: vflcdb  %v0, %v31               # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xcc]
-+#CHECK: vflcdb  %v15, %v0               # encoding: [0xe7,0xf0,0x00,0x00,0x30,0xcc]
-+#CHECK: vflcdb  %v31, %v0               # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xcc]
-+#CHECK: vflcdb  %v14, %v17              # encoding: [0xe7,0xe1,0x00,0x00,0x34,0xcc]
-+
-+	vflcdb	%v0, %v0
-+	vflcdb	%v0, %v15
-+	vflcdb	%v0, %v31
-+	vflcdb	%v15, %v0
-+	vflcdb	%v31, %v0
-+	vflcdb	%v14, %v17
-+
-+#CHECK: vflndb  %v0, %v0                # encoding: [0xe7,0x00,0x00,0x10,0x30,0xcc]
-+#CHECK: vflndb  %v0, %v15               # encoding: [0xe7,0x0f,0x00,0x10,0x30,0xcc]
-+#CHECK: vflndb  %v0, %v31               # encoding: [0xe7,0x0f,0x00,0x10,0x34,0xcc]
-+#CHECK: vflndb  %v15, %v0               # encoding: [0xe7,0xf0,0x00,0x10,0x30,0xcc]
-+#CHECK: vflndb  %v31, %v0               # encoding: [0xe7,0xf0,0x00,0x10,0x38,0xcc]
-+#CHECK: vflndb  %v14, %v17              # encoding: [0xe7,0xe1,0x00,0x10,0x34,0xcc]
-+
-+	vflndb	%v0, %v0
-+	vflndb	%v0, %v15
-+	vflndb	%v0, %v31
-+	vflndb	%v15, %v0
-+	vflndb	%v31, %v0
-+	vflndb	%v14, %v17
-+
-+#CHECK: vflpdb  %v0, %v0                # encoding: [0xe7,0x00,0x00,0x20,0x30,0xcc]
-+#CHECK: vflpdb  %v0, %v15               # encoding: [0xe7,0x0f,0x00,0x20,0x30,0xcc]
-+#CHECK: vflpdb  %v0, %v31               # encoding: [0xe7,0x0f,0x00,0x20,0x34,0xcc]
-+#CHECK: vflpdb  %v15, %v0               # encoding: [0xe7,0xf0,0x00,0x20,0x30,0xcc]
-+#CHECK: vflpdb  %v31, %v0               # encoding: [0xe7,0xf0,0x00,0x20,0x38,0xcc]
-+#CHECK: vflpdb  %v14, %v17              # encoding: [0xe7,0xe1,0x00,0x20,0x34,0xcc]
-+
-+	vflpdb	%v0, %v0
-+	vflpdb	%v0, %v15
-+	vflpdb	%v0, %v31
-+	vflpdb	%v15, %v0
-+	vflpdb	%v31, %v0
-+	vflpdb	%v14, %v17
-+
-+#CHECK: vfmadb  %v0, %v0, %v0, %v0      # encoding: [0xe7,0x00,0x03,0x00,0x00,0x8f]
-+#CHECK: vfmadb  %v0, %v0, %v0, %v31     # encoding: [0xe7,0x00,0x03,0x00,0xf1,0x8f]
-+#CHECK: vfmadb  %v0, %v0, %v31, %v0     # encoding: [0xe7,0x00,0xf3,0x00,0x02,0x8f]
-+#CHECK: vfmadb  %v0, %v31, %v0, %v0     # encoding: [0xe7,0x0f,0x03,0x00,0x04,0x8f]
-+#CHECK: vfmadb  %v31, %v0, %v0, %v0     # encoding: [0xe7,0xf0,0x03,0x00,0x08,0x8f]
-+#CHECK: vfmadb  %v13, %v17, %v21, %v25  # encoding: [0xe7,0xd1,0x53,0x00,0x97,0x8f]
-+
-+	vfmadb	%v0, %v0, %v0, %v0
-+	vfmadb	%v0, %v0, %v0, %v31
-+	vfmadb	%v0, %v0, %v31, %v0
-+	vfmadb	%v0, %v31, %v0, %v0
-+	vfmadb	%v31, %v0, %v0, %v0
-+	vfmadb	%v13, %v17, %v21, %v25
-+
-+#CHECK: vfmdb   %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x30,0xe7]
-+#CHECK: vfmdb   %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x32,0xe7]
-+#CHECK: vfmdb   %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xe7]
-+#CHECK: vfmdb   %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xe7]
-+#CHECK: vfmdb   %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x3a,0xe7]
-+
-+	vfmdb	%v0, %v0, %v0
-+	vfmdb	%v0, %v0, %v31
-+	vfmdb	%v0, %v31, %v0
-+	vfmdb	%v31, %v0, %v0
-+	vfmdb	%v18, %v3, %v20
-+
-+#CHECK: vfmsdb  %v0, %v0, %v0, %v0      # encoding: [0xe7,0x00,0x03,0x00,0x00,0x8e]
-+#CHECK: vfmsdb  %v0, %v0, %v0, %v31     # encoding: [0xe7,0x00,0x03,0x00,0xf1,0x8e]
-+#CHECK: vfmsdb  %v0, %v0, %v31, %v0     # encoding: [0xe7,0x00,0xf3,0x00,0x02,0x8e]
-+#CHECK: vfmsdb  %v0, %v31, %v0, %v0     # encoding: [0xe7,0x0f,0x03,0x00,0x04,0x8e]
-+#CHECK: vfmsdb  %v31, %v0, %v0, %v0     # encoding: [0xe7,0xf0,0x03,0x00,0x08,0x8e]
-+#CHECK: vfmsdb  %v13, %v17, %v21, %v25  # encoding: [0xe7,0xd1,0x53,0x00,0x97,0x8e]
-+
-+	vfmsdb	%v0, %v0, %v0, %v0
-+	vfmsdb	%v0, %v0, %v0, %v31
-+	vfmsdb	%v0, %v0, %v31, %v0
-+	vfmsdb	%v0, %v31, %v0, %v0
-+	vfmsdb	%v31, %v0, %v0, %v0
-+	vfmsdb	%v13, %v17, %v21, %v25
-+
-+#CHECK: vfsdb   %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x30,0xe2]
-+#CHECK: vfsdb   %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x32,0xe2]
-+#CHECK: vfsdb   %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xe2]
-+#CHECK: vfsdb   %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xe2]
-+#CHECK: vfsdb   %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x3a,0xe2]
-+
-+	vfsdb	%v0, %v0, %v0
-+	vfsdb	%v0, %v0, %v31
-+	vfsdb	%v0, %v31, %v0
-+	vfsdb	%v31, %v0, %v0
-+	vfsdb	%v18, %v3, %v20
-+
-+#CHECK: vfsqdb  %v0, %v0                # encoding: [0xe7,0x00,0x00,0x00,0x30,0xce]
-+#CHECK: vfsqdb  %v0, %v15               # encoding: [0xe7,0x0f,0x00,0x00,0x30,0xce]
-+#CHECK: vfsqdb  %v0, %v31               # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xce]
-+#CHECK: vfsqdb  %v15, %v0               # encoding: [0xe7,0xf0,0x00,0x00,0x30,0xce]
-+#CHECK: vfsqdb  %v31, %v0               # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xce]
-+#CHECK: vfsqdb  %v14, %v17              # encoding: [0xe7,0xe1,0x00,0x00,0x34,0xce]
-+
-+	vfsqdb	%v0, %v0
-+	vfsqdb	%v0, %v15
-+	vfsqdb	%v0, %v31
-+	vfsqdb	%v15, %v0
-+	vfsqdb	%v31, %v0
-+	vfsqdb	%v14, %v17
-+
-+#CHECK: vftcidb %v0, %v0, 0             # encoding: [0xe7,0x00,0x00,0x00,0x30,0x4a]
-+#CHECK: vftcidb %v0, %v0, 4095          # encoding: [0xe7,0x00,0xff,0xf0,0x30,0x4a]
-+#CHECK: vftcidb %v0, %v15, 0            # encoding: [0xe7,0x0f,0x00,0x00,0x30,0x4a]
-+#CHECK: vftcidb %v0, %v31, 0            # encoding: [0xe7,0x0f,0x00,0x00,0x34,0x4a]
-+#CHECK: vftcidb %v15, %v0, 0            # encoding: [0xe7,0xf0,0x00,0x00,0x30,0x4a]
-+#CHECK: vftcidb %v31, %v0, 0            # encoding: [0xe7,0xf0,0x00,0x00,0x38,0x4a]
-+#CHECK: vftcidb %v4, %v21, 1656         # encoding: [0xe7,0x45,0x67,0x80,0x34,0x4a]
-+
-+	vftcidb	%v0, %v0, 0
-+	vftcidb	%v0, %v0, 4095
-+	vftcidb	%v0, %v15, 0
-+	vftcidb	%v0, %v31, 0
-+	vftcidb	%v15, %v0, 0
-+	vftcidb	%v31, %v0, 0
-+	vftcidb	%v4, %v21, 0x678
-+
-+#CHECK: vgbm    %v0, 0                  # encoding: [0xe7,0x00,0x00,0x00,0x00,0x44]
-+#CHECK: vgbm    %v0, 65535              # encoding: [0xe7,0x00,0xff,0xff,0x00,0x44]
-+#CHECK: vgbm    %v15, 0                 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x44]
-+#CHECK: vgbm    %v31, 0                 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x44]
-+#CHECK: vgbm    %v17, 4660              # encoding: [0xe7,0x10,0x12,0x34,0x08,0x44]
-+
-+	vgbm	%v0, 0
-+	vgbm	%v0, 0xffff
-+	vgbm	%v15, 0
-+	vgbm	%v31, 0
-+	vgbm	%v17, 0x1234
-+
-+#CHECK: vgef    %v0, 0(%v0), 0          # encoding: [0xe7,0x00,0x00,0x00,0x00,0x13]
-+#CHECK: vgef    %v0, 0(%v0,%r1), 0      # encoding: [0xe7,0x00,0x10,0x00,0x00,0x13]
-+#CHECK: vgef    %v0, 0(%v0,%r1), 3      # encoding: [0xe7,0x00,0x10,0x00,0x30,0x13]
-+#CHECK: vgef    %v0, 0(%v0,%r15), 0     # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x13]
-+#CHECK: vgef    %v0, 0(%v15,%r1), 0     # encoding: [0xe7,0x0f,0x10,0x00,0x00,0x13]
-+#CHECK: vgef    %v0, 0(%v31,%r1), 0     # encoding: [0xe7,0x0f,0x10,0x00,0x04,0x13]
-+#CHECK: vgef    %v0, 4095(%v0,%r1), 0   # encoding: [0xe7,0x00,0x1f,0xff,0x00,0x13]
-+#CHECK: vgef    %v15, 0(%v0,%r1), 0     # encoding: [0xe7,0xf0,0x10,0x00,0x00,0x13]
-+#CHECK: vgef    %v31, 0(%v0,%r1), 0     # encoding: [0xe7,0xf0,0x10,0x00,0x08,0x13]
-+#CHECK: vgef    %v10, 1000(%v19,%r7), 1 # encoding: [0xe7,0xa3,0x73,0xe8,0x14,0x13]
-+
-+	vgef	%v0, 0(%v0), 0
-+	vgef	%v0, 0(%v0,%r1), 0
-+	vgef	%v0, 0(%v0,%r1), 3
-+	vgef	%v0, 0(%v0,%r15), 0
-+	vgef	%v0, 0(%v15,%r1), 0
-+	vgef	%v0, 0(%v31,%r1), 0
-+	vgef	%v0, 4095(%v0, %r1), 0
-+	vgef	%v15, 0(%v0,%r1), 0
-+	vgef	%v31, 0(%v0,%r1), 0
-+	vgef	%v10, 1000(%v19,%r7), 1
-+
-+#CHECK: vgeg    %v0, 0(%v0), 0          # encoding: [0xe7,0x00,0x00,0x00,0x00,0x12]
-+#CHECK: vgeg    %v0, 0(%v0,%r1), 0      # encoding: [0xe7,0x00,0x10,0x00,0x00,0x12]
-+#CHECK: vgeg    %v0, 0(%v0,%r1), 1      # encoding: [0xe7,0x00,0x10,0x00,0x10,0x12]
-+#CHECK: vgeg    %v0, 0(%v0,%r15), 0     # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x12]
-+#CHECK: vgeg    %v0, 0(%v15,%r1), 0     # encoding: [0xe7,0x0f,0x10,0x00,0x00,0x12]
-+#CHECK: vgeg    %v0, 0(%v31,%r1), 0     # encoding: [0xe7,0x0f,0x10,0x00,0x04,0x12]
-+#CHECK: vgeg    %v0, 4095(%v0,%r1), 0   # encoding: [0xe7,0x00,0x1f,0xff,0x00,0x12]
-+#CHECK: vgeg    %v15, 0(%v0,%r1), 0     # encoding: [0xe7,0xf0,0x10,0x00,0x00,0x12]
-+#CHECK: vgeg    %v31, 0(%v0,%r1), 0     # encoding: [0xe7,0xf0,0x10,0x00,0x08,0x12]
-+#CHECK: vgeg    %v10, 1000(%v19,%r7), 1 # encoding: [0xe7,0xa3,0x73,0xe8,0x14,0x12]
-+
-+	vgeg	%v0, 0(%v0), 0
-+	vgeg	%v0, 0(%v0,%r1), 0
-+	vgeg	%v0, 0(%v0,%r1), 1
-+	vgeg	%v0, 0(%v0,%r15), 0
-+	vgeg	%v0, 0(%v15,%r1), 0
-+	vgeg	%v0, 0(%v31,%r1), 0
-+	vgeg	%v0, 4095(%v0,%r1), 0
-+	vgeg	%v15, 0(%v0,%r1), 0
-+	vgeg	%v31, 0(%v0,%r1), 0
-+	vgeg	%v10, 1000(%v19,%r7), 1
-+
-+#CHECK: vgfmab  %v0, %v0, %v0, %v0      # encoding: [0xe7,0x00,0x00,0x00,0x00,0xbc]
-+#CHECK: vgfmab  %v0, %v0, %v0, %v31     # encoding: [0xe7,0x00,0x00,0x00,0xf1,0xbc]
-+#CHECK: vgfmab  %v0, %v0, %v31, %v0     # encoding: [0xe7,0x00,0xf0,0x00,0x02,0xbc]
-+#CHECK: vgfmab  %v0, %v31, %v0, %v0     # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xbc]
-+#CHECK: vgfmab  %v31, %v0, %v0, %v0     # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xbc]
-+#CHECK: vgfmab  %v13, %v17, %v21, %v25  # encoding: [0xe7,0xd1,0x50,0x00,0x97,0xbc]
-+
-+	vgfmab	%v0, %v0, %v0, %v0
-+	vgfmab	%v0, %v0, %v0, %v31
-+	vgfmab	%v0, %v0, %v31, %v0
-+	vgfmab	%v0, %v31, %v0, %v0
-+	vgfmab	%v31, %v0, %v0, %v0
-+	vgfmab	%v13, %v17, %v21, %v25
-+
-+#CHECK: vgfmaf  %v0, %v0, %v0, %v0      # encoding: [0xe7,0x00,0x02,0x00,0x00,0xbc]
-+#CHECK: vgfmaf  %v0, %v0, %v0, %v31     # encoding: [0xe7,0x00,0x02,0x00,0xf1,0xbc]
-+#CHECK: vgfmaf  %v0, %v0, %v31, %v0     # encoding: [0xe7,0x00,0xf2,0x00,0x02,0xbc]
-+#CHECK: vgfmaf  %v0, %v31, %v0, %v0     # encoding: [0xe7,0x0f,0x02,0x00,0x04,0xbc]
-+#CHECK: vgfmaf  %v31, %v0, %v0, %v0     # encoding: [0xe7,0xf0,0x02,0x00,0x08,0xbc]
-+#CHECK: vgfmaf  %v13, %v17, %v21, %v25  # encoding: [0xe7,0xd1,0x52,0x00,0x97,0xbc]
-+
-+	vgfmaf	%v0, %v0, %v0, %v0
-+	vgfmaf	%v0, %v0, %v0, %v31
-+	vgfmaf	%v0, %v0, %v31, %v0
-+	vgfmaf	%v0, %v31, %v0, %v0
-+	vgfmaf	%v31, %v0, %v0, %v0
-+	vgfmaf	%v13, %v17, %v21, %v25
-+
-+#CHECK: vgfmag  %v0, %v0, %v0, %v0      # encoding: [0xe7,0x00,0x03,0x00,0x00,0xbc]
-+#CHECK: vgfmag  %v0, %v0, %v0, %v31     # encoding: [0xe7,0x00,0x03,0x00,0xf1,0xbc]
-+#CHECK: vgfmag  %v0, %v0, %v31, %v0     # encoding: [0xe7,0x00,0xf3,0x00,0x02,0xbc]
-+#CHECK: vgfmag  %v0, %v31, %v0, %v0     # encoding: [0xe7,0x0f,0x03,0x00,0x04,0xbc]
-+#CHECK: vgfmag  %v31, %v0, %v0, %v0     # encoding: [0xe7,0xf0,0x03,0x00,0x08,0xbc]
-+#CHECK: vgfmag  %v13, %v17, %v21, %v25  # encoding: [0xe7,0xd1,0x53,0x00,0x97,0xbc]
-+
-+	vgfmag	%v0, %v0, %v0, %v0
-+	vgfmag	%v0, %v0, %v0, %v31
-+	vgfmag	%v0, %v0, %v31, %v0
-+	vgfmag	%v0, %v31, %v0, %v0
-+	vgfmag	%v31, %v0, %v0, %v0
-+	vgfmag	%v13, %v17, %v21, %v25
-+
-+#CHECK: vgfmah  %v0, %v0, %v0, %v0      # encoding: [0xe7,0x00,0x01,0x00,0x00,0xbc]
-+#CHECK: vgfmah  %v0, %v0, %v0, %v31     # encoding: [0xe7,0x00,0x01,0x00,0xf1,0xbc]
-+#CHECK: vgfmah  %v0, %v0, %v31, %v0     # encoding: [0xe7,0x00,0xf1,0x00,0x02,0xbc]
-+#CHECK: vgfmah  %v0, %v31, %v0, %v0     # encoding: [0xe7,0x0f,0x01,0x00,0x04,0xbc]
-+#CHECK: vgfmah  %v31, %v0, %v0, %v0     # encoding: [0xe7,0xf0,0x01,0x00,0x08,0xbc]
-+#CHECK: vgfmah  %v13, %v17, %v21, %v25  # encoding: [0xe7,0xd1,0x51,0x00,0x97,0xbc]
-+
-+	vgfmah	%v0, %v0, %v0, %v0
-+	vgfmah	%v0, %v0, %v0, %v31
-+	vgfmah	%v0, %v0, %v31, %v0
-+	vgfmah	%v0, %v31, %v0, %v0
-+	vgfmah	%v31, %v0, %v0, %v0
-+	vgfmah	%v13, %v17, %v21, %v25
-+
-+#CHECK: vgfmb   %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x00,0xb4]
-+#CHECK: vgfmb   %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x02,0xb4]
-+#CHECK: vgfmb   %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xb4]
-+#CHECK: vgfmb   %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xb4]
-+#CHECK: vgfmb   %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x0a,0xb4]
-+
-+	vgfmb	%v0, %v0, %v0
-+	vgfmb	%v0, %v0, %v31
-+	vgfmb	%v0, %v31, %v0
-+	vgfmb	%v31, %v0, %v0
-+	vgfmb	%v18, %v3, %v20
-+
-+#CHECK: vgfmf   %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x20,0xb4]
-+#CHECK: vgfmf   %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x22,0xb4]
-+#CHECK: vgfmf   %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xb4]
-+#CHECK: vgfmf   %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xb4]
-+#CHECK: vgfmf   %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x2a,0xb4]
-+
-+	vgfmf	%v0, %v0, %v0
-+	vgfmf	%v0, %v0, %v31
-+	vgfmf	%v0, %v31, %v0
-+	vgfmf	%v31, %v0, %v0
-+	vgfmf	%v18, %v3, %v20
-+
-+#CHECK: vgfmg   %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x30,0xb4]
-+#CHECK: vgfmg   %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x32,0xb4]
-+#CHECK: vgfmg   %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xb4]
-+#CHECK: vgfmg   %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xb4]
-+#CHECK: vgfmg   %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x3a,0xb4]
-+
-+	vgfmg	%v0, %v0, %v0
-+	vgfmg	%v0, %v0, %v31
-+	vgfmg	%v0, %v31, %v0
-+	vgfmg	%v31, %v0, %v0
-+	vgfmg	%v18, %v3, %v20
-+
-+#CHECK: vgfmh   %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x10,0xb4]
-+#CHECK: vgfmh   %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x12,0xb4]
-+#CHECK: vgfmh   %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x14,0xb4]
-+#CHECK: vgfmh   %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x18,0xb4]
-+#CHECK: vgfmh   %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x1a,0xb4]
-+
-+	vgfmh	%v0, %v0, %v0
-+	vgfmh	%v0, %v0, %v31
-+	vgfmh	%v0, %v31, %v0
-+	vgfmh	%v31, %v0, %v0
-+	vgfmh	%v18, %v3, %v20
-+
-+#CHECK: vgmb    %v0, 0, 0               # encoding: [0xe7,0x00,0x00,0x00,0x00,0x46]
-+#CHECK: vgmb    %v0, 0, 255             # encoding: [0xe7,0x00,0x00,0xff,0x00,0x46]
-+#CHECK: vgmb    %v0, 255, 0             # encoding: [0xe7,0x00,0xff,0x00,0x00,0x46]
-+#CHECK: vgmb    %v15, 0, 0              # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x46]
-+#CHECK: vgmb    %v31, 0, 0              # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x46]
-+#CHECK: vgmb    %v21, 2, 3              # encoding: [0xe7,0x50,0x02,0x03,0x08,0x46]
-+
-+	vgmb	%v0, 0, 0
-+	vgmb	%v0, 0, 255
-+	vgmb	%v0, 255, 0
-+	vgmb	%v15, 0, 0
-+	vgmb	%v31, 0, 0
-+	vgmb	%v21, 2, 3
-+
-+#CHECK: vgmf    %v0, 0, 0               # encoding: [0xe7,0x00,0x00,0x00,0x20,0x46]
-+#CHECK: vgmf    %v0, 0, 255             # encoding: [0xe7,0x00,0x00,0xff,0x20,0x46]
-+#CHECK: vgmf    %v0, 255, 0             # encoding: [0xe7,0x00,0xff,0x00,0x20,0x46]
-+#CHECK: vgmf    %v15, 0, 0              # encoding: [0xe7,0xf0,0x00,0x00,0x20,0x46]
-+#CHECK: vgmf    %v31, 0, 0              # encoding: [0xe7,0xf0,0x00,0x00,0x28,0x46]
-+#CHECK: vgmf    %v21, 2, 3              # encoding: [0xe7,0x50,0x02,0x03,0x28,0x46]
-+
-+	vgmf	%v0, 0, 0
-+	vgmf	%v0, 0, 255
-+	vgmf	%v0, 255, 0
-+	vgmf	%v15, 0, 0
-+	vgmf	%v31, 0, 0
-+	vgmf	%v21, 2, 3
-+
-+#CHECK: vgmg    %v0, 0, 0               # encoding: [0xe7,0x00,0x00,0x00,0x30,0x46]
-+#CHECK: vgmg    %v0, 0, 255             # encoding: [0xe7,0x00,0x00,0xff,0x30,0x46]
-+#CHECK: vgmg    %v0, 255, 0             # encoding: [0xe7,0x00,0xff,0x00,0x30,0x46]
-+#CHECK: vgmg    %v15, 0, 0              # encoding: [0xe7,0xf0,0x00,0x00,0x30,0x46]
-+#CHECK: vgmg    %v31, 0, 0              # encoding: [0xe7,0xf0,0x00,0x00,0x38,0x46]
-+#CHECK: vgmg    %v21, 2, 3              # encoding: [0xe7,0x50,0x02,0x03,0x38,0x46]
-+
-+	vgmg	%v0, 0, 0
-+	vgmg	%v0, 0, 255
-+	vgmg	%v0, 255, 0
-+	vgmg	%v15, 0, 0
-+	vgmg	%v31, 0, 0
-+	vgmg	%v21, 2, 3
-+
-+#CHECK: vgmh    %v0, 0, 0               # encoding: [0xe7,0x00,0x00,0x00,0x10,0x46]
-+#CHECK: vgmh    %v0, 0, 255             # encoding: [0xe7,0x00,0x00,0xff,0x10,0x46]
-+#CHECK: vgmh    %v0, 255, 0             # encoding: [0xe7,0x00,0xff,0x00,0x10,0x46]
-+#CHECK: vgmh    %v15, 0, 0              # encoding: [0xe7,0xf0,0x00,0x00,0x10,0x46]
-+#CHECK: vgmh    %v31, 0, 0              # encoding: [0xe7,0xf0,0x00,0x00,0x18,0x46]
-+#CHECK: vgmh    %v21, 2, 3              # encoding: [0xe7,0x50,0x02,0x03,0x18,0x46]
-+
-+	vgmh	%v0, 0, 0
-+	vgmh	%v0, 0, 255
-+	vgmh	%v0, 255, 0
-+	vgmh	%v15, 0, 0
-+	vgmh	%v31, 0, 0
-+	vgmh	%v21, 2, 3
-+
-+#CHECK: vl      %v0, 0                  # encoding: [0xe7,0x00,0x00,0x00,0x00,0x06]
-+#CHECK: vl      %v0, 4095               # encoding: [0xe7,0x00,0x0f,0xff,0x00,0x06]
-+#CHECK: vl      %v0, 0(%r15)            # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x06]
-+#CHECK: vl      %v0, 0(%r15,%r1)        # encoding: [0xe7,0x0f,0x10,0x00,0x00,0x06]
-+#CHECK: vl      %v15, 0                 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x06]
-+#CHECK: vl      %v31, 0                 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x06]
-+#CHECK: vl      %v18, 1383(%r3,%r4)     # encoding: [0xe7,0x23,0x45,0x67,0x08,0x06]
-+
-+	vl	%v0, 0
-+	vl	%v0, 4095
-+	vl	%v0, 0(%r15)
-+	vl	%v0, 0(%r15,%r1)
-+	vl	%v15, 0
-+	vl	%v31, 0
-+	vl	%v18, 0x567(%r3,%r4)
-+
-+#CHECK: vlbb    %v0, 0, 0               # encoding: [0xe7,0x00,0x00,0x00,0x00,0x07]
-+#CHECK: vlbb    %v0, 0, 15              # encoding: [0xe7,0x00,0x00,0x00,0xf0,0x07]
-+#CHECK: vlbb    %v0, 4095, 0            # encoding: [0xe7,0x00,0x0f,0xff,0x00,0x07]
-+#CHECK: vlbb    %v0, 0(%r15), 0         # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x07]
-+#CHECK: vlbb    %v0, 0(%r15,%r1), 0     # encoding: [0xe7,0x0f,0x10,0x00,0x00,0x07]
-+#CHECK: vlbb    %v15, 0, 0              # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x07]
-+#CHECK: vlbb    %v31, 0, 0              # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x07]
-+#CHECK: vlbb    %v18, 1383(%r3,%r4), 8  # encoding: [0xe7,0x23,0x45,0x67,0x88,0x07]
-+
-+	vlbb	%v0, 0, 0
-+	vlbb	%v0, 0, 15
-+	vlbb	%v0, 4095, 0
-+	vlbb	%v0, 0(%r15), 0
-+	vlbb	%v0, 0(%r15,%r1), 0
-+	vlbb	%v15, 0, 0
-+	vlbb	%v31, 0, 0
-+	vlbb	%v18, 1383(%r3,%r4), 8
-+
-+#CHECK: vlcb    %v0, %v0                # encoding: [0xe7,0x00,0x00,0x00,0x00,0xde]
-+#CHECK: vlcb    %v0, %v15               # encoding: [0xe7,0x0f,0x00,0x00,0x00,0xde]
-+#CHECK: vlcb    %v0, %v31               # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xde]
-+#CHECK: vlcb    %v15, %v0               # encoding: [0xe7,0xf0,0x00,0x00,0x00,0xde]
-+#CHECK: vlcb    %v31, %v0               # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xde]
-+#CHECK: vlcb    %v14, %v17              # encoding: [0xe7,0xe1,0x00,0x00,0x04,0xde]
-+
-+	vlcb	%v0, %v0
-+	vlcb	%v0, %v15
-+	vlcb	%v0, %v31
-+	vlcb	%v15, %v0
-+	vlcb	%v31, %v0
-+	vlcb	%v14, %v17
-+
-+#CHECK: vlcf    %v0, %v0                # encoding: [0xe7,0x00,0x00,0x00,0x20,0xde]
-+#CHECK: vlcf    %v0, %v15               # encoding: [0xe7,0x0f,0x00,0x00,0x20,0xde]
-+#CHECK: vlcf    %v0, %v31               # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xde]
-+#CHECK: vlcf    %v15, %v0               # encoding: [0xe7,0xf0,0x00,0x00,0x20,0xde]
-+#CHECK: vlcf    %v31, %v0               # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xde]
-+#CHECK: vlcf    %v14, %v17              # encoding: [0xe7,0xe1,0x00,0x00,0x24,0xde]
-+
-+	vlcf	%v0, %v0
-+	vlcf	%v0, %v15
-+	vlcf	%v0, %v31
-+	vlcf	%v15, %v0
-+	vlcf	%v31, %v0
-+	vlcf	%v14, %v17
-+
-+#CHECK: vlcg    %v0, %v0                # encoding: [0xe7,0x00,0x00,0x00,0x30,0xde]
-+#CHECK: vlcg    %v0, %v15               # encoding: [0xe7,0x0f,0x00,0x00,0x30,0xde]
-+#CHECK: vlcg    %v0, %v31               # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xde]
-+#CHECK: vlcg    %v15, %v0               # encoding: [0xe7,0xf0,0x00,0x00,0x30,0xde]
-+#CHECK: vlcg    %v31, %v0               # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xde]
-+#CHECK: vlcg    %v14, %v17              # encoding: [0xe7,0xe1,0x00,0x00,0x34,0xde]
-+
-+	vlcg	%v0, %v0
-+	vlcg	%v0, %v15
-+	vlcg	%v0, %v31
-+	vlcg	%v15, %v0
-+	vlcg	%v31, %v0
-+	vlcg	%v14, %v17
-+
-+#CHECK: vlch    %v0, %v0                # encoding: [0xe7,0x00,0x00,0x00,0x10,0xde]
-+#CHECK: vlch    %v0, %v15               # encoding: [0xe7,0x0f,0x00,0x00,0x10,0xde]
-+#CHECK: vlch    %v0, %v31               # encoding: [0xe7,0x0f,0x00,0x00,0x14,0xde]
-+#CHECK: vlch    %v15, %v0               # encoding: [0xe7,0xf0,0x00,0x00,0x10,0xde]
-+#CHECK: vlch    %v31, %v0               # encoding: [0xe7,0xf0,0x00,0x00,0x18,0xde]
-+#CHECK: vlch    %v14, %v17              # encoding: [0xe7,0xe1,0x00,0x00,0x14,0xde]
-+
-+	vlch	%v0, %v0
-+	vlch	%v0, %v15
-+	vlch	%v0, %v31
-+	vlch	%v15, %v0
-+	vlch	%v31, %v0
-+	vlch	%v14, %v17
-+
-+#CHECK: vldeb   %v0, %v0                # encoding: [0xe7,0x00,0x00,0x00,0x20,0xc4]
-+#CHECK: vldeb   %v0, %v15               # encoding: [0xe7,0x0f,0x00,0x00,0x20,0xc4]
-+#CHECK: vldeb   %v0, %v31               # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xc4]
-+#CHECK: vldeb   %v15, %v0               # encoding: [0xe7,0xf0,0x00,0x00,0x20,0xc4]
-+#CHECK: vldeb   %v31, %v0               # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xc4]
-+#CHECK: vldeb   %v14, %v17              # encoding: [0xe7,0xe1,0x00,0x00,0x24,0xc4]
-+
-+	vldeb	%v0, %v0
-+	vldeb	%v0, %v15
-+	vldeb	%v0, %v31
-+	vldeb	%v15, %v0
-+	vldeb	%v31, %v0
-+	vldeb	%v14, %v17
-+
-+#CHECK: vleb    %v0, 0, 0               # encoding: [0xe7,0x00,0x00,0x00,0x00,0x00]
-+#CHECK: vleb    %v0, 0, 15              # encoding: [0xe7,0x00,0x00,0x00,0xf0,0x00]
-+#CHECK: vleb    %v0, 4095, 0            # encoding: [0xe7,0x00,0x0f,0xff,0x00,0x00]
-+#CHECK: vleb    %v0, 0(%r15), 0         # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x00]
-+#CHECK: vleb    %v0, 0(%r15,%r1), 0     # encoding: [0xe7,0x0f,0x10,0x00,0x00,0x00]
-+#CHECK: vleb    %v15, 0, 0              # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x00]
-+#CHECK: vleb    %v31, 0, 0              # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x00]
-+#CHECK: vleb    %v18, 1383(%r3,%r4), 8  # encoding: [0xe7,0x23,0x45,0x67,0x88,0x00]
-+
-+	vleb	%v0, 0, 0
-+	vleb	%v0, 0, 15
-+	vleb	%v0, 4095, 0
-+	vleb	%v0, 0(%r15), 0
-+	vleb	%v0, 0(%r15,%r1), 0
-+	vleb	%v15, 0, 0
-+	vleb	%v31, 0, 0
-+	vleb	%v18, 1383(%r3,%r4), 8
-+
-+#CHECK: vledb   %v0, %v0, 0, 0          # encoding: [0xe7,0x00,0x00,0x00,0x30,0xc5]
-+#CHECK: vledb   %v0, %v0, 0, 15         # encoding: [0xe7,0x00,0x00,0xf0,0x30,0xc5]
-+#CHECK: vledb   %v0, %v0, 4, 0          # encoding: [0xe7,0x00,0x00,0x04,0x30,0xc5]
-+#CHECK: vledb   %v0, %v0, 12, 0         # encoding: [0xe7,0x00,0x00,0x0c,0x30,0xc5]
-+#CHECK: vledb   %v0, %v31, 0, 0         # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xc5]
-+#CHECK: vledb   %v31, %v0, 0, 0         # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xc5]
-+#CHECK: vledb   %v14, %v17, 4, 10       # encoding: [0xe7,0xe1,0x00,0xa4,0x34,0xc5]
-+
-+	vledb	%v0, %v0, 0, 0
-+	vledb	%v0, %v0, 0, 15
-+	vledb	%v0, %v0, 4, 0
-+	vledb	%v0, %v0, 12, 0
-+	vledb	%v0, %v31, 0, 0
-+	vledb	%v31, %v0, 0, 0
-+	vledb	%v14, %v17, 4, 10
-+
-+#CHECK: vlef    %v0, 0, 0               # encoding: [0xe7,0x00,0x00,0x00,0x00,0x03]
-+#CHECK: vlef    %v0, 0, 3               # encoding: [0xe7,0x00,0x00,0x00,0x30,0x03]
-+#CHECK: vlef    %v0, 4095, 0            # encoding: [0xe7,0x00,0x0f,0xff,0x00,0x03]
-+#CHECK: vlef    %v0, 0(%r15), 0         # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x03]
-+#CHECK: vlef    %v0, 0(%r15,%r1), 0     # encoding: [0xe7,0x0f,0x10,0x00,0x00,0x03]
-+#CHECK: vlef    %v15, 0, 0              # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x03]
-+#CHECK: vlef    %v31, 0, 0              # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x03]
-+#CHECK: vlef    %v18, 1383(%r3,%r4), 2  # encoding: [0xe7,0x23,0x45,0x67,0x28,0x03]
-+
-+	vlef	%v0, 0, 0
-+	vlef	%v0, 0, 3
-+	vlef	%v0, 4095, 0
-+	vlef	%v0, 0(%r15), 0
-+	vlef	%v0, 0(%r15,%r1), 0
-+	vlef	%v15, 0, 0
-+	vlef	%v31, 0, 0
-+	vlef	%v18, 1383(%r3,%r4), 2
-+
-+#CHECK: vleg    %v0, 0, 0               # encoding: [0xe7,0x00,0x00,0x00,0x00,0x02]
-+#CHECK: vleg    %v0, 0, 1               # encoding: [0xe7,0x00,0x00,0x00,0x10,0x02]
-+#CHECK: vleg    %v0, 4095, 0            # encoding: [0xe7,0x00,0x0f,0xff,0x00,0x02]
-+#CHECK: vleg    %v0, 0(%r15), 0         # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x02]
-+#CHECK: vleg    %v0, 0(%r15,%r1), 0     # encoding: [0xe7,0x0f,0x10,0x00,0x00,0x02]
-+#CHECK: vleg    %v15, 0, 0              # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x02]
-+#CHECK: vleg    %v31, 0, 0              # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x02]
-+#CHECK: vleg    %v18, 1383(%r3,%r4), 1  # encoding: [0xe7,0x23,0x45,0x67,0x18,0x02]
-+
-+	vleg	%v0, 0, 0
-+	vleg	%v0, 0, 1
-+	vleg	%v0, 4095, 0
-+	vleg	%v0, 0(%r15), 0
-+	vleg	%v0, 0(%r15,%r1), 0
-+	vleg	%v15, 0, 0
-+	vleg	%v31, 0, 0
-+	vleg	%v18, 1383(%r3,%r4), 1
-+
-+#CHECK: vleh    %v0, 0, 0               # encoding: [0xe7,0x00,0x00,0x00,0x00,0x01]
-+#CHECK: vleh    %v0, 0, 7               # encoding: [0xe7,0x00,0x00,0x00,0x70,0x01]
-+#CHECK: vleh    %v0, 4095, 0            # encoding: [0xe7,0x00,0x0f,0xff,0x00,0x01]
-+#CHECK: vleh    %v0, 0(%r15), 0         # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x01]
-+#CHECK: vleh    %v0, 0(%r15,%r1), 0     # encoding: [0xe7,0x0f,0x10,0x00,0x00,0x01]
-+#CHECK: vleh    %v15, 0, 0              # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x01]
-+#CHECK: vleh    %v31, 0, 0              # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x01]
-+#CHECK: vleh    %v18, 1383(%r3,%r4), 4  # encoding: [0xe7,0x23,0x45,0x67,0x48,0x01]
-+
-+	vleh	%v0, 0, 0
-+	vleh	%v0, 0, 7
-+	vleh	%v0, 4095, 0
-+	vleh	%v0, 0(%r15), 0
-+	vleh	%v0, 0(%r15,%r1), 0
-+	vleh	%v15, 0, 0
-+	vleh	%v31, 0, 0
-+	vleh	%v18, 1383(%r3,%r4), 4
-+
-+#CHECK: vleib   %v0, 0, 0               # encoding: [0xe7,0x00,0x00,0x00,0x00,0x40]
-+#CHECK: vleib   %v0, 0, 15              # encoding: [0xe7,0x00,0x00,0x00,0xf0,0x40]
-+#CHECK: vleib   %v0, -32768, 0          # encoding: [0xe7,0x00,0x80,0x00,0x00,0x40]
-+#CHECK: vleib   %v0, 32767, 0           # encoding: [0xe7,0x00,0x7f,0xff,0x00,0x40]
-+#CHECK: vleib   %v15, 0, 0              # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x40]
-+#CHECK: vleib   %v31, 0, 0              # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x40]
-+#CHECK: vleib   %v18, 13398, 11         # encoding: [0xe7,0x20,0x34,0x56,0xb8,0x40]
-+
-+	vleib	%v0, 0, 0
-+	vleib	%v0, 0, 15
-+	vleib	%v0, -32768, 0
-+	vleib	%v0, 32767, 0
-+	vleib	%v15, 0, 0
-+	vleib	%v31, 0, 0
-+	vleib	%v18, 0x3456, 11
-+
-+#CHECK: vleif   %v0, 0, 0               # encoding: [0xe7,0x00,0x00,0x00,0x00,0x43]
-+#CHECK: vleif   %v0, 0, 3               # encoding: [0xe7,0x00,0x00,0x00,0x30,0x43]
-+#CHECK: vleif   %v0, -32768, 0          # encoding: [0xe7,0x00,0x80,0x00,0x00,0x43]
-+#CHECK: vleif   %v0, 32767, 0           # encoding: [0xe7,0x00,0x7f,0xff,0x00,0x43]
-+#CHECK: vleif   %v15, 0, 0              # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x43]
-+#CHECK: vleif   %v31, 0, 0              # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x43]
-+#CHECK: vleif   %v18, 13398, 3          # encoding: [0xe7,0x20,0x34,0x56,0x38,0x43]
-+
-+	vleif	%v0, 0, 0
-+	vleif	%v0, 0, 3
-+	vleif	%v0, -32768, 0
-+	vleif	%v0, 32767, 0
-+	vleif	%v15, 0, 0
-+	vleif	%v31, 0, 0
-+	vleif	%v18, 0x3456, 3
-+
-+#CHECK: vleig   %v0, 0, 0               # encoding: [0xe7,0x00,0x00,0x00,0x00,0x42]
-+#CHECK: vleig   %v0, 0, 1               # encoding: [0xe7,0x00,0x00,0x00,0x10,0x42]
-+#CHECK: vleig   %v0, -32768, 0          # encoding: [0xe7,0x00,0x80,0x00,0x00,0x42]
-+#CHECK: vleig   %v0, 32767, 0           # encoding: [0xe7,0x00,0x7f,0xff,0x00,0x42]
-+#CHECK: vleig   %v15, 0, 0              # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x42]
-+#CHECK: vleig   %v31, 0, 0              # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x42]
-+#CHECK: vleig   %v18, 13398, 1          # encoding: [0xe7,0x20,0x34,0x56,0x18,0x42]
-+
-+	vleig	%v0, 0, 0
-+	vleig	%v0, 0, 1
-+	vleig	%v0, -32768, 0
-+	vleig	%v0, 32767, 0
-+	vleig	%v15, 0, 0
-+	vleig	%v31, 0, 0
-+	vleig	%v18, 0x3456, 1
-+
-+#CHECK: vleih   %v0, 0, 0               # encoding: [0xe7,0x00,0x00,0x00,0x00,0x41]
-+#CHECK: vleih   %v0, 0, 7               # encoding: [0xe7,0x00,0x00,0x00,0x70,0x41]
-+#CHECK: vleih   %v0, -32768, 0          # encoding: [0xe7,0x00,0x80,0x00,0x00,0x41]
-+#CHECK: vleih   %v0, 32767, 0           # encoding: [0xe7,0x00,0x7f,0xff,0x00,0x41]
-+#CHECK: vleih   %v15, 0, 0              # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x41]
-+#CHECK: vleih   %v31, 0, 0              # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x41]
-+#CHECK: vleih   %v18, 13398, 7          # encoding: [0xe7,0x20,0x34,0x56,0x78,0x41]
-+
-+	vleih	%v0, 0, 0
-+	vleih	%v0, 0, 7
-+	vleih	%v0, -32768, 0
-+	vleih	%v0, 32767, 0
-+	vleih	%v15, 0, 0
-+	vleih	%v31, 0, 0
-+	vleih	%v18, 0x3456, 7
-+
-+#CHECK: vlgvb   %r0, %v0, 0             # encoding: [0xe7,0x00,0x00,0x00,0x00,0x21]
-+#CHECK: vlgvb   %r0, %v0, 4095          # encoding: [0xe7,0x00,0x0f,0xff,0x00,0x21]
-+#CHECK: vlgvb   %r0, %v0, 0(%r15)       # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x21]
-+#CHECK: vlgvb   %r0, %v15, 0            # encoding: [0xe7,0x0f,0x00,0x00,0x00,0x21]
-+#CHECK: vlgvb   %r0, %v31, 0            # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x21]
-+#CHECK: vlgvb   %r15, %v0, 0            # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x21]
-+#CHECK: vlgvb   %r2, %v19, 1383(%r4)    # encoding: [0xe7,0x23,0x45,0x67,0x04,0x21]
-+
-+	vlgvb	%r0, %v0, 0
-+	vlgvb	%r0, %v0, 4095
-+	vlgvb	%r0, %v0, 0(%r15)
-+	vlgvb	%r0, %v15, 0
-+	vlgvb	%r0, %v31, 0
-+	vlgvb	%r15, %v0, 0
-+	vlgvb	%r2, %v19, 1383(%r4)
-+
-+#CHECK: vlgvf   %r0, %v0, 0             # encoding: [0xe7,0x00,0x00,0x00,0x20,0x21]
-+#CHECK: vlgvf   %r0, %v0, 4095          # encoding: [0xe7,0x00,0x0f,0xff,0x20,0x21]
-+#CHECK: vlgvf   %r0, %v0, 0(%r15)       # encoding: [0xe7,0x00,0xf0,0x00,0x20,0x21]
-+#CHECK: vlgvf   %r0, %v15, 0            # encoding: [0xe7,0x0f,0x00,0x00,0x20,0x21]
-+#CHECK: vlgvf   %r0, %v31, 0            # encoding: [0xe7,0x0f,0x00,0x00,0x24,0x21]
-+#CHECK: vlgvf   %r15, %v0, 0            # encoding: [0xe7,0xf0,0x00,0x00,0x20,0x21]
-+#CHECK: vlgvf   %r2, %v19, 1383(%r4)    # encoding: [0xe7,0x23,0x45,0x67,0x24,0x21]
-+
-+	vlgvf	%r0, %v0, 0
-+	vlgvf	%r0, %v0, 4095
-+	vlgvf	%r0, %v0, 0(%r15)
-+	vlgvf	%r0, %v15, 0
-+	vlgvf	%r0, %v31, 0
-+	vlgvf	%r15, %v0, 0
-+	vlgvf	%r2, %v19, 1383(%r4)
-+
-+#CHECK: vlgvg   %r0, %v0, 0             # encoding: [0xe7,0x00,0x00,0x00,0x30,0x21]
-+#CHECK: vlgvg   %r0, %v0, 4095          # encoding: [0xe7,0x00,0x0f,0xff,0x30,0x21]
-+#CHECK: vlgvg   %r0, %v0, 0(%r15)       # encoding: [0xe7,0x00,0xf0,0x00,0x30,0x21]
-+#CHECK: vlgvg   %r0, %v15, 0            # encoding: [0xe7,0x0f,0x00,0x00,0x30,0x21]
-+#CHECK: vlgvg   %r0, %v31, 0            # encoding: [0xe7,0x0f,0x00,0x00,0x34,0x21]
-+#CHECK: vlgvg   %r15, %v0, 0            # encoding: [0xe7,0xf0,0x00,0x00,0x30,0x21]
-+#CHECK: vlgvg   %r2, %v19, 1383(%r4)    # encoding: [0xe7,0x23,0x45,0x67,0x34,0x21]
-+
-+	vlgvg	%r0, %v0, 0
-+	vlgvg	%r0, %v0, 4095
-+	vlgvg	%r0, %v0, 0(%r15)
-+	vlgvg	%r0, %v15, 0
-+	vlgvg	%r0, %v31, 0
-+	vlgvg	%r15, %v0, 0
-+	vlgvg	%r2, %v19, 1383(%r4)
-+
-+#CHECK: vlgvh   %r0, %v0, 0             # encoding: [0xe7,0x00,0x00,0x00,0x10,0x21]
-+#CHECK: vlgvh   %r0, %v0, 4095          # encoding: [0xe7,0x00,0x0f,0xff,0x10,0x21]
-+#CHECK: vlgvh   %r0, %v0, 0(%r15)       # encoding: [0xe7,0x00,0xf0,0x00,0x10,0x21]
-+#CHECK: vlgvh   %r0, %v15, 0            # encoding: [0xe7,0x0f,0x00,0x00,0x10,0x21]
-+#CHECK: vlgvh   %r0, %v31, 0            # encoding: [0xe7,0x0f,0x00,0x00,0x14,0x21]
-+#CHECK: vlgvh   %r15, %v0, 0            # encoding: [0xe7,0xf0,0x00,0x00,0x10,0x21]
-+#CHECK: vlgvh   %r2, %v19, 1383(%r4)    # encoding: [0xe7,0x23,0x45,0x67,0x14,0x21]
-+
-+	vlgvh	%r0, %v0, 0
-+	vlgvh	%r0, %v0, 4095
-+	vlgvh	%r0, %v0, 0(%r15)
-+	vlgvh	%r0, %v15, 0
-+	vlgvh	%r0, %v31, 0
-+	vlgvh	%r15, %v0, 0
-+	vlgvh	%r2, %v19, 1383(%r4)
-+
-+#CHECK: vll     %v0, %r0, 0             # encoding: [0xe7,0x00,0x00,0x00,0x00,0x37]
-+#CHECK: vll     %v0, %r0, 4095          # encoding: [0xe7,0x00,0x0f,0xff,0x00,0x37]
-+#CHECK: vll     %v0, %r0, 0(%r15)       # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x37]
-+#CHECK: vll     %v0, %r15, 0            # encoding: [0xe7,0x0f,0x00,0x00,0x00,0x37]
-+#CHECK: vll     %v15, %r0, 0            # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x37]
-+#CHECK: vll     %v31, %r0, 0            # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x37]
-+#CHECK: vll     %v18, %r3, 1383(%r4)    # encoding: [0xe7,0x23,0x45,0x67,0x08,0x37]
-+
-+	vll	%v0, %r0, 0
-+	vll	%v0, %r0, 4095
-+	vll	%v0, %r0, 0(%r15)
-+	vll	%v0, %r15, 0
-+	vll	%v15, %r0, 0
-+	vll	%v31, %r0, 0
-+	vll	%v18, %r3, 1383(%r4)
-+
-+#CHECK: vllezb  %v0, 0                  # encoding: [0xe7,0x00,0x00,0x00,0x00,0x04]
-+#CHECK: vllezb  %v0, 4095               # encoding: [0xe7,0x00,0x0f,0xff,0x00,0x04]
-+#CHECK: vllezb  %v0, 0(%r15)            # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x04]
-+#CHECK: vllezb  %v0, 0(%r15,%r1)        # encoding: [0xe7,0x0f,0x10,0x00,0x00,0x04]
-+#CHECK: vllezb  %v15, 0                 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x04]
-+#CHECK: vllezb  %v31, 0                 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x04]
-+#CHECK: vllezb  %v18, 1383(%r3,%r4)     # encoding: [0xe7,0x23,0x45,0x67,0x08,0x04]
-+
-+	vllezb	%v0, 0
-+	vllezb	%v0, 4095
-+	vllezb	%v0, 0(%r15)
-+	vllezb	%v0, 0(%r15,%r1)
-+	vllezb	%v15, 0
-+	vllezb	%v31, 0
-+	vllezb	%v18, 0x567(%r3,%r4)
-+
-+#CHECK: vllezf  %v0, 0                  # encoding: [0xe7,0x00,0x00,0x00,0x20,0x04]
-+#CHECK: vllezf  %v0, 4095               # encoding: [0xe7,0x00,0x0f,0xff,0x20,0x04]
-+#CHECK: vllezf  %v0, 0(%r15)            # encoding: [0xe7,0x00,0xf0,0x00,0x20,0x04]
-+#CHECK: vllezf  %v0, 0(%r15,%r1)        # encoding: [0xe7,0x0f,0x10,0x00,0x20,0x04]
-+#CHECK: vllezf  %v15, 0                 # encoding: [0xe7,0xf0,0x00,0x00,0x20,0x04]
-+#CHECK: vllezf  %v31, 0                 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0x04]
-+#CHECK: vllezf  %v18, 1383(%r3,%r4)     # encoding: [0xe7,0x23,0x45,0x67,0x28,0x04]
-+
-+	vllezf	%v0, 0
-+	vllezf	%v0, 4095
-+	vllezf	%v0, 0(%r15)
-+	vllezf	%v0, 0(%r15,%r1)
-+	vllezf	%v15, 0
-+	vllezf	%v31, 0
-+	vllezf	%v18, 0x567(%r3,%r4)
-+
-+#CHECK: vllezg  %v0, 0                  # encoding: [0xe7,0x00,0x00,0x00,0x30,0x04]
-+#CHECK: vllezg  %v0, 4095               # encoding: [0xe7,0x00,0x0f,0xff,0x30,0x04]
-+#CHECK: vllezg  %v0, 0(%r15)            # encoding: [0xe7,0x00,0xf0,0x00,0x30,0x04]
-+#CHECK: vllezg  %v0, 0(%r15,%r1)        # encoding: [0xe7,0x0f,0x10,0x00,0x30,0x04]
-+#CHECK: vllezg  %v15, 0                 # encoding: [0xe7,0xf0,0x00,0x00,0x30,0x04]
-+#CHECK: vllezg  %v31, 0                 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0x04]
-+#CHECK: vllezg  %v18, 1383(%r3,%r4)     # encoding: [0xe7,0x23,0x45,0x67,0x38,0x04]
-+
-+	vllezg	%v0, 0
-+	vllezg	%v0, 4095
-+	vllezg	%v0, 0(%r15)
-+	vllezg	%v0, 0(%r15,%r1)
-+	vllezg	%v15, 0
-+	vllezg	%v31, 0
-+	vllezg	%v18, 0x567(%r3,%r4)
-+
-+#CHECK: vllezh  %v0, 0                  # encoding: [0xe7,0x00,0x00,0x00,0x10,0x04]
-+#CHECK: vllezh  %v0, 4095               # encoding: [0xe7,0x00,0x0f,0xff,0x10,0x04]
-+#CHECK: vllezh  %v0, 0(%r15)            # encoding: [0xe7,0x00,0xf0,0x00,0x10,0x04]
-+#CHECK: vllezh  %v0, 0(%r15,%r1)        # encoding: [0xe7,0x0f,0x10,0x00,0x10,0x04]
-+#CHECK: vllezh  %v15, 0                 # encoding: [0xe7,0xf0,0x00,0x00,0x10,0x04]
-+#CHECK: vllezh  %v31, 0                 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0x04]
-+#CHECK: vllezh  %v18, 1383(%r3,%r4)     # encoding: [0xe7,0x23,0x45,0x67,0x18,0x04]
-+
-+	vllezh	%v0, 0
-+	vllezh	%v0, 4095
-+	vllezh	%v0, 0(%r15)
-+	vllezh	%v0, 0(%r15,%r1)
-+	vllezh	%v15, 0
-+	vllezh	%v31, 0
-+	vllezh	%v18, 0x567(%r3,%r4)
-+
-+#CHECK: vlm     %v0, %v0, 0             # encoding: [0xe7,0x00,0x00,0x00,0x00,0x36]
-+#CHECK: vlm     %v0, %v0, 4095          # encoding: [0xe7,0x00,0x0f,0xff,0x00,0x36]
-+#CHECK: vlm     %v0, %v0, 0(%r15)       # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x36]
-+#CHECK: vlm     %v0, %v31, 0            # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x36]
-+#CHECK: vlm     %v31, %v0, 0            # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x36]
-+#CHECK: vlm     %v14, %v17, 1074(%r5)   # encoding: [0xe7,0xe1,0x54,0x32,0x04,0x36]
-+
-+	vlm	%v0, %v0, 0
-+	vlm	%v0, %v0, 4095
-+	vlm	%v0, %v0, 0(%r15)
-+	vlm	%v0, %v31, 0
-+	vlm	%v31, %v0, 0
-+	vlm	%v14, %v17, 1074(%r5)
-+
-+#CHECK: vlpb    %v0, %v0                # encoding: [0xe7,0x00,0x00,0x00,0x00,0xdf]
-+#CHECK: vlpb    %v0, %v15               # encoding: [0xe7,0x0f,0x00,0x00,0x00,0xdf]
-+#CHECK: vlpb    %v0, %v31               # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xdf]
-+#CHECK: vlpb    %v15, %v0               # encoding: [0xe7,0xf0,0x00,0x00,0x00,0xdf]
-+#CHECK: vlpb    %v31, %v0               # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xdf]
-+#CHECK: vlpb    %v14, %v17              # encoding: [0xe7,0xe1,0x00,0x00,0x04,0xdf]
-+
-+	vlpb	%v0, %v0
-+	vlpb	%v0, %v15
-+	vlpb	%v0, %v31
-+	vlpb	%v15, %v0
-+	vlpb	%v31, %v0
-+	vlpb	%v14, %v17
-+
-+#CHECK: vlpf    %v0, %v0                # encoding: [0xe7,0x00,0x00,0x00,0x20,0xdf]
-+#CHECK: vlpf    %v0, %v15               # encoding: [0xe7,0x0f,0x00,0x00,0x20,0xdf]
-+#CHECK: vlpf    %v0, %v31               # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xdf]
-+#CHECK: vlpf    %v15, %v0               # encoding: [0xe7,0xf0,0x00,0x00,0x20,0xdf]
-+#CHECK: vlpf    %v31, %v0               # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xdf]
-+#CHECK: vlpf    %v14, %v17              # encoding: [0xe7,0xe1,0x00,0x00,0x24,0xdf]
-+
-+	vlpf	%v0, %v0
-+	vlpf	%v0, %v15
-+	vlpf	%v0, %v31
-+	vlpf	%v15, %v0
-+	vlpf	%v31, %v0
-+	vlpf	%v14, %v17
-+
-+#CHECK: vlpg    %v0, %v0                # encoding: [0xe7,0x00,0x00,0x00,0x30,0xdf]
-+#CHECK: vlpg    %v0, %v15               # encoding: [0xe7,0x0f,0x00,0x00,0x30,0xdf]
-+#CHECK: vlpg    %v0, %v31               # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xdf]
-+#CHECK: vlpg    %v15, %v0               # encoding: [0xe7,0xf0,0x00,0x00,0x30,0xdf]
-+#CHECK: vlpg    %v31, %v0               # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xdf]
-+#CHECK: vlpg    %v14, %v17              # encoding: [0xe7,0xe1,0x00,0x00,0x34,0xdf]
-+
-+	vlpg	%v0, %v0
-+	vlpg	%v0, %v15
-+	vlpg	%v0, %v31
-+	vlpg	%v15, %v0
-+	vlpg	%v31, %v0
-+	vlpg	%v14, %v17
-+
-+#CHECK: vlph    %v0, %v0                # encoding: [0xe7,0x00,0x00,0x00,0x10,0xdf]
-+#CHECK: vlph    %v0, %v15               # encoding: [0xe7,0x0f,0x00,0x00,0x10,0xdf]
-+#CHECK: vlph    %v0, %v31               # encoding: [0xe7,0x0f,0x00,0x00,0x14,0xdf]
-+#CHECK: vlph    %v15, %v0               # encoding: [0xe7,0xf0,0x00,0x00,0x10,0xdf]
-+#CHECK: vlph    %v31, %v0               # encoding: [0xe7,0xf0,0x00,0x00,0x18,0xdf]
-+#CHECK: vlph    %v14, %v17              # encoding: [0xe7,0xe1,0x00,0x00,0x14,0xdf]
-+
-+	vlph	%v0, %v0
-+	vlph	%v0, %v15
-+	vlph	%v0, %v31
-+	vlph	%v15, %v0
-+	vlph	%v31, %v0
-+	vlph	%v14, %v17
-+
-+#CHECK: vlr     %v0, %v0                # encoding: [0xe7,0x00,0x00,0x00,0x00,0x56]
-+#CHECK: vlr     %v0, %v15               # encoding: [0xe7,0x0f,0x00,0x00,0x00,0x56]
-+#CHECK: vlr     %v0, %v31               # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x56]
-+#CHECK: vlr     %v15, %v0               # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x56]
-+#CHECK: vlr     %v31, %v0               # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x56]
-+#CHECK: vlr     %v14, %v17              # encoding: [0xe7,0xe1,0x00,0x00,0x04,0x56]
-+
-+	vlr	%v0, %v0
-+	vlr	%v0, %v15
-+	vlr	%v0, %v31
-+	vlr	%v15, %v0
-+	vlr	%v31, %v0
-+	vlr	%v14, %v17
-+
-+#CHECK: vlrepb  %v0, 0                  # encoding: [0xe7,0x00,0x00,0x00,0x00,0x05]
-+#CHECK: vlrepb  %v0, 4095               # encoding: [0xe7,0x00,0x0f,0xff,0x00,0x05]
-+#CHECK: vlrepb  %v0, 0(%r15)            # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x05]
-+#CHECK: vlrepb  %v0, 0(%r15,%r1)        # encoding: [0xe7,0x0f,0x10,0x00,0x00,0x05]
-+#CHECK: vlrepb  %v15, 0                 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x05]
-+#CHECK: vlrepb  %v31, 0                 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x05]
-+#CHECK: vlrepb  %v18, 1383(%r3,%r4)     # encoding: [0xe7,0x23,0x45,0x67,0x08,0x05]
-+
-+	vlrepb	%v0, 0
-+	vlrepb	%v0, 4095
-+	vlrepb	%v0, 0(%r15)
-+	vlrepb	%v0, 0(%r15,%r1)
-+	vlrepb	%v15, 0
-+	vlrepb	%v31, 0
-+	vlrepb	%v18, 0x567(%r3,%r4)
-+
-+#CHECK: vlrepf  %v0, 0                  # encoding: [0xe7,0x00,0x00,0x00,0x20,0x05]
-+#CHECK: vlrepf  %v0, 4095               # encoding: [0xe7,0x00,0x0f,0xff,0x20,0x05]
-+#CHECK: vlrepf  %v0, 0(%r15)            # encoding: [0xe7,0x00,0xf0,0x00,0x20,0x05]
-+#CHECK: vlrepf  %v0, 0(%r15,%r1)        # encoding: [0xe7,0x0f,0x10,0x00,0x20,0x05]
-+#CHECK: vlrepf  %v15, 0                 # encoding: [0xe7,0xf0,0x00,0x00,0x20,0x05]
-+#CHECK: vlrepf  %v31, 0                 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0x05]
-+#CHECK: vlrepf  %v18, 1383(%r3,%r4)     # encoding: [0xe7,0x23,0x45,0x67,0x28,0x05]
-+
-+	vlrepf	%v0, 0
-+	vlrepf	%v0, 4095
-+	vlrepf	%v0, 0(%r15)
-+	vlrepf	%v0, 0(%r15,%r1)
-+	vlrepf	%v15, 0
-+	vlrepf	%v31, 0
-+	vlrepf	%v18, 0x567(%r3,%r4)
-+
-+#CHECK: vlrepg  %v0, 0                  # encoding: [0xe7,0x00,0x00,0x00,0x30,0x05]
-+#CHECK: vlrepg  %v0, 4095               # encoding: [0xe7,0x00,0x0f,0xff,0x30,0x05]
-+#CHECK: vlrepg  %v0, 0(%r15)            # encoding: [0xe7,0x00,0xf0,0x00,0x30,0x05]
-+#CHECK: vlrepg  %v0, 0(%r15,%r1)        # encoding: [0xe7,0x0f,0x10,0x00,0x30,0x05]
-+#CHECK: vlrepg  %v15, 0                 # encoding: [0xe7,0xf0,0x00,0x00,0x30,0x05]
-+#CHECK: vlrepg  %v31, 0                 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0x05]
-+#CHECK: vlrepg  %v18, 1383(%r3,%r4)     # encoding: [0xe7,0x23,0x45,0x67,0x38,0x05]
-+
-+	vlrepg	%v0, 0
-+	vlrepg	%v0, 4095
-+	vlrepg	%v0, 0(%r15)
-+	vlrepg	%v0, 0(%r15,%r1)
-+	vlrepg	%v15, 0
-+	vlrepg	%v31, 0
-+	vlrepg	%v18, 0x567(%r3,%r4)
-+
-+#CHECK: vlreph  %v0, 0                  # encoding: [0xe7,0x00,0x00,0x00,0x10,0x05]
-+#CHECK: vlreph  %v0, 4095               # encoding: [0xe7,0x00,0x0f,0xff,0x10,0x05]
-+#CHECK: vlreph  %v0, 0(%r15)            # encoding: [0xe7,0x00,0xf0,0x00,0x10,0x05]
-+#CHECK: vlreph  %v0, 0(%r15,%r1)        # encoding: [0xe7,0x0f,0x10,0x00,0x10,0x05]
-+#CHECK: vlreph  %v15, 0                 # encoding: [0xe7,0xf0,0x00,0x00,0x10,0x05]
-+#CHECK: vlreph  %v31, 0                 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0x05]
-+#CHECK: vlreph  %v18, 1383(%r3,%r4)     # encoding: [0xe7,0x23,0x45,0x67,0x18,0x05]
-+
-+	vlreph	%v0, 0
-+	vlreph	%v0, 4095
-+	vlreph	%v0, 0(%r15)
-+	vlreph	%v0, 0(%r15,%r1)
-+	vlreph	%v15, 0
-+	vlreph	%v31, 0
-+	vlreph	%v18, 0x567(%r3,%r4)
-+
-+#CHECK: vlvgb   %v0, %r0, 0             # encoding: [0xe7,0x00,0x00,0x00,0x00,0x22]
-+#CHECK: vlvgb   %v0, %r0, 4095          # encoding: [0xe7,0x00,0x0f,0xff,0x00,0x22]
-+#CHECK: vlvgb   %v0, %r0, 0(%r15)       # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x22]
-+#CHECK: vlvgb   %v0, %r15, 0            # encoding: [0xe7,0x0f,0x00,0x00,0x00,0x22]
-+#CHECK: vlvgb   %v15, %r0, 0            # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x22]
-+#CHECK: vlvgb   %v31, %r0, 0            # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x22]
-+#CHECK: vlvgb   %v18, %r3, 1383(%r4)    # encoding: [0xe7,0x23,0x45,0x67,0x08,0x22]
-+
-+	vlvgb	%v0, %r0, 0
-+	vlvgb	%v0, %r0, 4095
-+	vlvgb	%v0, %r0, 0(%r15)
-+	vlvgb	%v0, %r15, 0
-+	vlvgb	%v15, %r0, 0
-+	vlvgb	%v31, %r0, 0
-+	vlvgb	%v18, %r3, 1383(%r4)
-+
-+#CHECK: vlvgf   %v0, %r0, 0             # encoding: [0xe7,0x00,0x00,0x00,0x20,0x22]
-+#CHECK: vlvgf   %v0, %r0, 4095          # encoding: [0xe7,0x00,0x0f,0xff,0x20,0x22]
-+#CHECK: vlvgf   %v0, %r0, 0(%r15)       # encoding: [0xe7,0x00,0xf0,0x00,0x20,0x22]
-+#CHECK: vlvgf   %v0, %r15, 0            # encoding: [0xe7,0x0f,0x00,0x00,0x20,0x22]
-+#CHECK: vlvgf   %v15, %r0, 0            # encoding: [0xe7,0xf0,0x00,0x00,0x20,0x22]
-+#CHECK: vlvgf   %v31, %r0, 0            # encoding: [0xe7,0xf0,0x00,0x00,0x28,0x22]
-+#CHECK: vlvgf   %v18, %r3, 1383(%r4)    # encoding: [0xe7,0x23,0x45,0x67,0x28,0x22]
-+
-+	vlvgf	%v0, %r0, 0
-+	vlvgf	%v0, %r0, 4095
-+	vlvgf	%v0, %r0, 0(%r15)
-+	vlvgf	%v0, %r15, 0
-+	vlvgf	%v15, %r0, 0
-+	vlvgf	%v31, %r0, 0
-+	vlvgf	%v18, %r3, 1383(%r4)
-+
-+#CHECK: vlvgg   %v0, %r0, 0             # encoding: [0xe7,0x00,0x00,0x00,0x30,0x22]
-+#CHECK: vlvgg   %v0, %r0, 4095          # encoding: [0xe7,0x00,0x0f,0xff,0x30,0x22]
-+#CHECK: vlvgg   %v0, %r0, 0(%r15)       # encoding: [0xe7,0x00,0xf0,0x00,0x30,0x22]
-+#CHECK: vlvgg   %v0, %r15, 0            # encoding: [0xe7,0x0f,0x00,0x00,0x30,0x22]
-+#CHECK: vlvgg   %v15, %r0, 0            # encoding: [0xe7,0xf0,0x00,0x00,0x30,0x22]
-+#CHECK: vlvgg   %v31, %r0, 0            # encoding: [0xe7,0xf0,0x00,0x00,0x38,0x22]
-+#CHECK: vlvgg   %v18, %r3, 1383(%r4)    # encoding: [0xe7,0x23,0x45,0x67,0x38,0x22]
-+
-+	vlvgg	%v0, %r0, 0
-+	vlvgg	%v0, %r0, 4095
-+	vlvgg	%v0, %r0, 0(%r15)
-+	vlvgg	%v0, %r15, 0
-+	vlvgg	%v15, %r0, 0
-+	vlvgg	%v31, %r0, 0
-+	vlvgg	%v18, %r3, 1383(%r4)
-+
-+#CHECK: vlvgh   %v0, %r0, 0             # encoding: [0xe7,0x00,0x00,0x00,0x10,0x22]
-+#CHECK: vlvgh   %v0, %r0, 4095          # encoding: [0xe7,0x00,0x0f,0xff,0x10,0x22]
-+#CHECK: vlvgh   %v0, %r0, 0(%r15)       # encoding: [0xe7,0x00,0xf0,0x00,0x10,0x22]
-+#CHECK: vlvgh   %v0, %r15, 0            # encoding: [0xe7,0x0f,0x00,0x00,0x10,0x22]
-+#CHECK: vlvgh   %v15, %r0, 0            # encoding: [0xe7,0xf0,0x00,0x00,0x10,0x22]
-+#CHECK: vlvgh   %v31, %r0, 0            # encoding: [0xe7,0xf0,0x00,0x00,0x18,0x22]
-+#CHECK: vlvgh   %v18, %r3, 1383(%r4)    # encoding: [0xe7,0x23,0x45,0x67,0x18,0x22]
-+
-+	vlvgh	%v0, %r0, 0
-+	vlvgh	%v0, %r0, 4095
-+	vlvgh	%v0, %r0, 0(%r15)
-+	vlvgh	%v0, %r15, 0
-+	vlvgh	%v15, %r0, 0
-+	vlvgh	%v31, %r0, 0
-+	vlvgh	%v18, %r3, 1383(%r4)
-+
-+#CHECK: vlvgp   %v0, %r0, %r0           # encoding: [0xe7,0x00,0x00,0x00,0x00,0x62]
-+#CHECK: vlvgp   %v0, %r0, %r15          # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x62]
-+#CHECK: vlvgp   %v0, %r15, %r0          # encoding: [0xe7,0x0f,0x00,0x00,0x00,0x62]
-+#CHECK: vlvgp   %v15, %r0, %r0          # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x62]
-+#CHECK: vlvgp   %v31, %r0, %r0          # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x62]
-+#CHECK: vlvgp   %v18, %r3, %r4          # encoding: [0xe7,0x23,0x40,0x00,0x08,0x62]
-+
-+	vlvgp	%v0, %r0, %r0
-+	vlvgp	%v0, %r0, %r15
-+	vlvgp	%v0, %r15, %r0
-+	vlvgp	%v15, %r0, %r0
-+	vlvgp	%v31, %r0, %r0
-+	vlvgp	%v18, %r3, %r4
-+
-+#CHECK: vmaeb   %v0, %v0, %v0, %v0      # encoding: [0xe7,0x00,0x00,0x00,0x00,0xae]
-+#CHECK: vmaeb   %v0, %v0, %v0, %v31     # encoding: [0xe7,0x00,0x00,0x00,0xf1,0xae]
-+#CHECK: vmaeb   %v0, %v0, %v31, %v0     # encoding: [0xe7,0x00,0xf0,0x00,0x02,0xae]
-+#CHECK: vmaeb   %v0, %v31, %v0, %v0     # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xae]
-+#CHECK: vmaeb   %v31, %v0, %v0, %v0     # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xae]
-+#CHECK: vmaeb   %v13, %v17, %v21, %v25  # encoding: [0xe7,0xd1,0x50,0x00,0x97,0xae]
-+
-+	vmaeb	%v0, %v0, %v0, %v0
-+	vmaeb	%v0, %v0, %v0, %v31
-+	vmaeb	%v0, %v0, %v31, %v0
-+	vmaeb	%v0, %v31, %v0, %v0
-+	vmaeb	%v31, %v0, %v0, %v0
-+	vmaeb	%v13, %v17, %v21, %v25
-+
-+#CHECK: vmaef   %v0, %v0, %v0, %v0      # encoding: [0xe7,0x00,0x02,0x00,0x00,0xae]
-+#CHECK: vmaef   %v0, %v0, %v0, %v31     # encoding: [0xe7,0x00,0x02,0x00,0xf1,0xae]
-+#CHECK: vmaef   %v0, %v0, %v31, %v0     # encoding: [0xe7,0x00,0xf2,0x00,0x02,0xae]
-+#CHECK: vmaef   %v0, %v31, %v0, %v0     # encoding: [0xe7,0x0f,0x02,0x00,0x04,0xae]
-+#CHECK: vmaef   %v31, %v0, %v0, %v0     # encoding: [0xe7,0xf0,0x02,0x00,0x08,0xae]
-+#CHECK: vmaef   %v13, %v17, %v21, %v25  # encoding: [0xe7,0xd1,0x52,0x00,0x97,0xae]
-+
-+	vmaef	%v0, %v0, %v0, %v0
-+	vmaef	%v0, %v0, %v0, %v31
-+	vmaef	%v0, %v0, %v31, %v0
-+	vmaef	%v0, %v31, %v0, %v0
-+	vmaef	%v31, %v0, %v0, %v0
-+	vmaef	%v13, %v17, %v21, %v25
-+
-+#CHECK: vmaeh   %v0, %v0, %v0, %v0      # encoding: [0xe7,0x00,0x01,0x00,0x00,0xae]
-+#CHECK: vmaeh   %v0, %v0, %v0, %v31     # encoding: [0xe7,0x00,0x01,0x00,0xf1,0xae]
-+#CHECK: vmaeh   %v0, %v0, %v31, %v0     # encoding: [0xe7,0x00,0xf1,0x00,0x02,0xae]
-+#CHECK: vmaeh   %v0, %v31, %v0, %v0     # encoding: [0xe7,0x0f,0x01,0x00,0x04,0xae]
-+#CHECK: vmaeh   %v31, %v0, %v0, %v0     # encoding: [0xe7,0xf0,0x01,0x00,0x08,0xae]
-+#CHECK: vmaeh   %v13, %v17, %v21, %v25  # encoding: [0xe7,0xd1,0x51,0x00,0x97,0xae]
-+
-+	vmaeh	%v0, %v0, %v0, %v0
-+	vmaeh	%v0, %v0, %v0, %v31
-+	vmaeh	%v0, %v0, %v31, %v0
-+	vmaeh	%v0, %v31, %v0, %v0
-+	vmaeh	%v31, %v0, %v0, %v0
-+	vmaeh	%v13, %v17, %v21, %v25
-+
-+#CHECK: vmahb   %v0, %v0, %v0, %v0      # encoding: [0xe7,0x00,0x00,0x00,0x00,0xab]
-+#CHECK: vmahb   %v0, %v0, %v0, %v31     # encoding: [0xe7,0x00,0x00,0x00,0xf1,0xab]
-+#CHECK: vmahb   %v0, %v0, %v31, %v0     # encoding: [0xe7,0x00,0xf0,0x00,0x02,0xab]
-+#CHECK: vmahb   %v0, %v31, %v0, %v0     # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xab]
-+#CHECK: vmahb   %v31, %v0, %v0, %v0     # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xab]
-+#CHECK: vmahb   %v13, %v17, %v21, %v25  # encoding: [0xe7,0xd1,0x50,0x00,0x97,0xab]
-+
-+	vmahb	%v0, %v0, %v0, %v0
-+	vmahb	%v0, %v0, %v0, %v31
-+	vmahb	%v0, %v0, %v31, %v0
-+	vmahb	%v0, %v31, %v0, %v0
-+	vmahb	%v31, %v0, %v0, %v0
-+	vmahb	%v13, %v17, %v21, %v25
-+
-+#CHECK: vmahf   %v0, %v0, %v0, %v0      # encoding: [0xe7,0x00,0x02,0x00,0x00,0xab]
-+#CHECK: vmahf   %v0, %v0, %v0, %v31     # encoding: [0xe7,0x00,0x02,0x00,0xf1,0xab]
-+#CHECK: vmahf   %v0, %v0, %v31, %v0     # encoding: [0xe7,0x00,0xf2,0x00,0x02,0xab]
-+#CHECK: vmahf   %v0, %v31, %v0, %v0     # encoding: [0xe7,0x0f,0x02,0x00,0x04,0xab]
-+#CHECK: vmahf   %v31, %v0, %v0, %v0     # encoding: [0xe7,0xf0,0x02,0x00,0x08,0xab]
-+#CHECK: vmahf   %v13, %v17, %v21, %v25  # encoding: [0xe7,0xd1,0x52,0x00,0x97,0xab]
-+
-+	vmahf	%v0, %v0, %v0, %v0
-+	vmahf	%v0, %v0, %v0, %v31
-+	vmahf	%v0, %v0, %v31, %v0
-+	vmahf	%v0, %v31, %v0, %v0
-+	vmahf	%v31, %v0, %v0, %v0
-+	vmahf	%v13, %v17, %v21, %v25
-+
-+#CHECK: vmahh   %v0, %v0, %v0, %v0      # encoding: [0xe7,0x00,0x01,0x00,0x00,0xab]
-+#CHECK: vmahh   %v0, %v0, %v0, %v31     # encoding: [0xe7,0x00,0x01,0x00,0xf1,0xab]
-+#CHECK: vmahh   %v0, %v0, %v31, %v0     # encoding: [0xe7,0x00,0xf1,0x00,0x02,0xab]
-+#CHECK: vmahh   %v0, %v31, %v0, %v0     # encoding: [0xe7,0x0f,0x01,0x00,0x04,0xab]
-+#CHECK: vmahh   %v31, %v0, %v0, %v0     # encoding: [0xe7,0xf0,0x01,0x00,0x08,0xab]
-+#CHECK: vmahh   %v13, %v17, %v21, %v25  # encoding: [0xe7,0xd1,0x51,0x00,0x97,0xab]
-+
-+	vmahh	%v0, %v0, %v0, %v0
-+	vmahh	%v0, %v0, %v0, %v31
-+	vmahh	%v0, %v0, %v31, %v0
-+	vmahh	%v0, %v31, %v0, %v0
-+	vmahh	%v31, %v0, %v0, %v0
-+	vmahh	%v13, %v17, %v21, %v25
-+
-+#CHECK: vmalb   %v0, %v0, %v0, %v0      # encoding: [0xe7,0x00,0x00,0x00,0x00,0xaa]
-+#CHECK: vmalb   %v0, %v0, %v0, %v31     # encoding: [0xe7,0x00,0x00,0x00,0xf1,0xaa]
-+#CHECK: vmalb   %v0, %v0, %v31, %v0     # encoding: [0xe7,0x00,0xf0,0x00,0x02,0xaa]
-+#CHECK: vmalb   %v0, %v31, %v0, %v0     # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xaa]
-+#CHECK: vmalb   %v31, %v0, %v0, %v0     # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xaa]
-+#CHECK: vmalb   %v13, %v17, %v21, %v25  # encoding: [0xe7,0xd1,0x50,0x00,0x97,0xaa]
-+
-+	vmalb	%v0, %v0, %v0, %v0
-+	vmalb	%v0, %v0, %v0, %v31
-+	vmalb	%v0, %v0, %v31, %v0
-+	vmalb	%v0, %v31, %v0, %v0
-+	vmalb	%v31, %v0, %v0, %v0
-+	vmalb	%v13, %v17, %v21, %v25
-+
-+#CHECK: vmaleb  %v0, %v0, %v0, %v0      # encoding: [0xe7,0x00,0x00,0x00,0x00,0xac]
-+#CHECK: vmaleb  %v0, %v0, %v0, %v31     # encoding: [0xe7,0x00,0x00,0x00,0xf1,0xac]
-+#CHECK: vmaleb  %v0, %v0, %v31, %v0     # encoding: [0xe7,0x00,0xf0,0x00,0x02,0xac]
-+#CHECK: vmaleb  %v0, %v31, %v0, %v0     # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xac]
-+#CHECK: vmaleb  %v31, %v0, %v0, %v0     # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xac]
-+#CHECK: vmaleb  %v13, %v17, %v21, %v25  # encoding: [0xe7,0xd1,0x50,0x00,0x97,0xac]
-+
-+	vmaleb	%v0, %v0, %v0, %v0
-+	vmaleb	%v0, %v0, %v0, %v31
-+	vmaleb	%v0, %v0, %v31, %v0
-+	vmaleb	%v0, %v31, %v0, %v0
-+	vmaleb	%v31, %v0, %v0, %v0
-+	vmaleb	%v13, %v17, %v21, %v25
-+
-+#CHECK: vmalef  %v0, %v0, %v0, %v0      # encoding: [0xe7,0x00,0x02,0x00,0x00,0xac]
-+#CHECK: vmalef  %v0, %v0, %v0, %v31     # encoding: [0xe7,0x00,0x02,0x00,0xf1,0xac]
-+#CHECK: vmalef  %v0, %v0, %v31, %v0     # encoding: [0xe7,0x00,0xf2,0x00,0x02,0xac]
-+#CHECK: vmalef  %v0, %v31, %v0, %v0     # encoding: [0xe7,0x0f,0x02,0x00,0x04,0xac]
-+#CHECK: vmalef  %v31, %v0, %v0, %v0     # encoding: [0xe7,0xf0,0x02,0x00,0x08,0xac]
-+#CHECK: vmalef  %v13, %v17, %v21, %v25  # encoding: [0xe7,0xd1,0x52,0x00,0x97,0xac]
-+
-+	vmalef	%v0, %v0, %v0, %v0
-+	vmalef	%v0, %v0, %v0, %v31
-+	vmalef	%v0, %v0, %v31, %v0
-+	vmalef	%v0, %v31, %v0, %v0
-+	vmalef	%v31, %v0, %v0, %v0
-+	vmalef	%v13, %v17, %v21, %v25
-+
-+#CHECK: vmaleh  %v0, %v0, %v0, %v0      # encoding: [0xe7,0x00,0x01,0x00,0x00,0xac]
-+#CHECK: vmaleh  %v0, %v0, %v0, %v31     # encoding: [0xe7,0x00,0x01,0x00,0xf1,0xac]
-+#CHECK: vmaleh  %v0, %v0, %v31, %v0     # encoding: [0xe7,0x00,0xf1,0x00,0x02,0xac]
-+#CHECK: vmaleh  %v0, %v31, %v0, %v0     # encoding: [0xe7,0x0f,0x01,0x00,0x04,0xac]
-+#CHECK: vmaleh  %v31, %v0, %v0, %v0     # encoding: [0xe7,0xf0,0x01,0x00,0x08,0xac]
-+#CHECK: vmaleh  %v13, %v17, %v21, %v25  # encoding: [0xe7,0xd1,0x51,0x00,0x97,0xac]
-+
-+	vmaleh	%v0, %v0, %v0, %v0
-+	vmaleh	%v0, %v0, %v0, %v31
-+	vmaleh	%v0, %v0, %v31, %v0
-+	vmaleh	%v0, %v31, %v0, %v0
-+	vmaleh	%v31, %v0, %v0, %v0
-+	vmaleh	%v13, %v17, %v21, %v25
-+
-+#CHECK: vmalf   %v0, %v0, %v0, %v0      # encoding: [0xe7,0x00,0x02,0x00,0x00,0xaa]
-+#CHECK: vmalf   %v0, %v0, %v0, %v31     # encoding: [0xe7,0x00,0x02,0x00,0xf1,0xaa]
-+#CHECK: vmalf   %v0, %v0, %v31, %v0     # encoding: [0xe7,0x00,0xf2,0x00,0x02,0xaa]
-+#CHECK: vmalf   %v0, %v31, %v0, %v0     # encoding: [0xe7,0x0f,0x02,0x00,0x04,0xaa]
-+#CHECK: vmalf   %v31, %v0, %v0, %v0     # encoding: [0xe7,0xf0,0x02,0x00,0x08,0xaa]
-+#CHECK: vmalf   %v13, %v17, %v21, %v25  # encoding: [0xe7,0xd1,0x52,0x00,0x97,0xaa]
-+
-+	vmalf	%v0, %v0, %v0, %v0
-+	vmalf	%v0, %v0, %v0, %v31
-+	vmalf	%v0, %v0, %v31, %v0
-+	vmalf	%v0, %v31, %v0, %v0
-+	vmalf	%v31, %v0, %v0, %v0
-+	vmalf	%v13, %v17, %v21, %v25
-+
-+#CHECK: vmalhb  %v0, %v0, %v0, %v0      # encoding: [0xe7,0x00,0x00,0x00,0x00,0xa9]
-+#CHECK: vmalhb  %v0, %v0, %v0, %v31     # encoding: [0xe7,0x00,0x00,0x00,0xf1,0xa9]
-+#CHECK: vmalhb  %v0, %v0, %v31, %v0     # encoding: [0xe7,0x00,0xf0,0x00,0x02,0xa9]
-+#CHECK: vmalhb  %v0, %v31, %v0, %v0     # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xa9]
-+#CHECK: vmalhb  %v31, %v0, %v0, %v0     # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xa9]
-+#CHECK: vmalhb  %v13, %v17, %v21, %v25  # encoding: [0xe7,0xd1,0x50,0x00,0x97,0xa9]
-+
-+	vmalhb	%v0, %v0, %v0, %v0
-+	vmalhb	%v0, %v0, %v0, %v31
-+	vmalhb	%v0, %v0, %v31, %v0
-+	vmalhb	%v0, %v31, %v0, %v0
-+	vmalhb	%v31, %v0, %v0, %v0
-+	vmalhb	%v13, %v17, %v21, %v25
-+
-+#CHECK: vmalhf  %v0, %v0, %v0, %v0      # encoding: [0xe7,0x00,0x02,0x00,0x00,0xa9]
-+#CHECK: vmalhf  %v0, %v0, %v0, %v31     # encoding: [0xe7,0x00,0x02,0x00,0xf1,0xa9]
-+#CHECK: vmalhf  %v0, %v0, %v31, %v0     # encoding: [0xe7,0x00,0xf2,0x00,0x02,0xa9]
-+#CHECK: vmalhf  %v0, %v31, %v0, %v0     # encoding: [0xe7,0x0f,0x02,0x00,0x04,0xa9]
-+#CHECK: vmalhf  %v31, %v0, %v0, %v0     # encoding: [0xe7,0xf0,0x02,0x00,0x08,0xa9]
-+#CHECK: vmalhf  %v13, %v17, %v21, %v25  # encoding: [0xe7,0xd1,0x52,0x00,0x97,0xa9]
-+
-+	vmalhf	%v0, %v0, %v0, %v0
-+	vmalhf	%v0, %v0, %v0, %v31
-+	vmalhf	%v0, %v0, %v31, %v0
-+	vmalhf	%v0, %v31, %v0, %v0
-+	vmalhf	%v31, %v0, %v0, %v0
-+	vmalhf	%v13, %v17, %v21, %v25
-+
-+#CHECK: vmalhh  %v0, %v0, %v0, %v0      # encoding: [0xe7,0x00,0x01,0x00,0x00,0xa9]
-+#CHECK: vmalhh  %v0, %v0, %v0, %v31     # encoding: [0xe7,0x00,0x01,0x00,0xf1,0xa9]
-+#CHECK: vmalhh  %v0, %v0, %v31, %v0     # encoding: [0xe7,0x00,0xf1,0x00,0x02,0xa9]
-+#CHECK: vmalhh  %v0, %v31, %v0, %v0     # encoding: [0xe7,0x0f,0x01,0x00,0x04,0xa9]
-+#CHECK: vmalhh  %v31, %v0, %v0, %v0     # encoding: [0xe7,0xf0,0x01,0x00,0x08,0xa9]
-+#CHECK: vmalhh  %v13, %v17, %v21, %v25  # encoding: [0xe7,0xd1,0x51,0x00,0x97,0xa9]
-+
-+	vmalhh	%v0, %v0, %v0, %v0
-+	vmalhh	%v0, %v0, %v0, %v31
-+	vmalhh	%v0, %v0, %v31, %v0
-+	vmalhh	%v0, %v31, %v0, %v0
-+	vmalhh	%v31, %v0, %v0, %v0
-+	vmalhh	%v13, %v17, %v21, %v25
-+
-+#CHECK: vmalhw  %v0, %v0, %v0, %v0      # encoding: [0xe7,0x00,0x01,0x00,0x00,0xaa]
-+#CHECK: vmalhw  %v0, %v0, %v0, %v31     # encoding: [0xe7,0x00,0x01,0x00,0xf1,0xaa]
-+#CHECK: vmalhw  %v0, %v0, %v31, %v0     # encoding: [0xe7,0x00,0xf1,0x00,0x02,0xaa]
-+#CHECK: vmalhw  %v0, %v31, %v0, %v0     # encoding: [0xe7,0x0f,0x01,0x00,0x04,0xaa]
-+#CHECK: vmalhw  %v31, %v0, %v0, %v0     # encoding: [0xe7,0xf0,0x01,0x00,0x08,0xaa]
-+#CHECK: vmalhw  %v13, %v17, %v21, %v25  # encoding: [0xe7,0xd1,0x51,0x00,0x97,0xaa]
-+
-+	vmalhw	%v0, %v0, %v0, %v0
-+	vmalhw	%v0, %v0, %v0, %v31
-+	vmalhw	%v0, %v0, %v31, %v0
-+	vmalhw	%v0, %v31, %v0, %v0
-+	vmalhw	%v31, %v0, %v0, %v0
-+	vmalhw	%v13, %v17, %v21, %v25
-+
-+#CHECK: vmalob  %v0, %v0, %v0, %v0      # encoding: [0xe7,0x00,0x00,0x00,0x00,0xad]
-+#CHECK: vmalob  %v0, %v0, %v0, %v31     # encoding: [0xe7,0x00,0x00,0x00,0xf1,0xad]
-+#CHECK: vmalob  %v0, %v0, %v31, %v0     # encoding: [0xe7,0x00,0xf0,0x00,0x02,0xad]
-+#CHECK: vmalob  %v0, %v31, %v0, %v0     # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xad]
-+#CHECK: vmalob  %v31, %v0, %v0, %v0     # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xad]
-+#CHECK: vmalob  %v13, %v17, %v21, %v25  # encoding: [0xe7,0xd1,0x50,0x00,0x97,0xad]
-+
-+	vmalob	%v0, %v0, %v0, %v0
-+	vmalob	%v0, %v0, %v0, %v31
-+	vmalob	%v0, %v0, %v31, %v0
-+	vmalob	%v0, %v31, %v0, %v0
-+	vmalob	%v31, %v0, %v0, %v0
-+	vmalob	%v13, %v17, %v21, %v25
-+
-+#CHECK: vmalof  %v0, %v0, %v0, %v0      # encoding: [0xe7,0x00,0x02,0x00,0x00,0xad]
-+#CHECK: vmalof  %v0, %v0, %v0, %v31     # encoding: [0xe7,0x00,0x02,0x00,0xf1,0xad]
-+#CHECK: vmalof  %v0, %v0, %v31, %v0     # encoding: [0xe7,0x00,0xf2,0x00,0x02,0xad]
-+#CHECK: vmalof  %v0, %v31, %v0, %v0     # encoding: [0xe7,0x0f,0x02,0x00,0x04,0xad]
-+#CHECK: vmalof  %v31, %v0, %v0, %v0     # encoding: [0xe7,0xf0,0x02,0x00,0x08,0xad]
-+#CHECK: vmalof  %v13, %v17, %v21, %v25  # encoding: [0xe7,0xd1,0x52,0x00,0x97,0xad]
-+
-+	vmalof	%v0, %v0, %v0, %v0
-+	vmalof	%v0, %v0, %v0, %v31
-+	vmalof	%v0, %v0, %v31, %v0
-+	vmalof	%v0, %v31, %v0, %v0
-+	vmalof	%v31, %v0, %v0, %v0
-+	vmalof	%v13, %v17, %v21, %v25
-+
-+#CHECK: vmaloh  %v0, %v0, %v0, %v0      # encoding: [0xe7,0x00,0x01,0x00,0x00,0xad]
-+#CHECK: vmaloh  %v0, %v0, %v0, %v31     # encoding: [0xe7,0x00,0x01,0x00,0xf1,0xad]
-+#CHECK: vmaloh  %v0, %v0, %v31, %v0     # encoding: [0xe7,0x00,0xf1,0x00,0x02,0xad]
-+#CHECK: vmaloh  %v0, %v31, %v0, %v0     # encoding: [0xe7,0x0f,0x01,0x00,0x04,0xad]
-+#CHECK: vmaloh  %v31, %v0, %v0, %v0     # encoding: [0xe7,0xf0,0x01,0x00,0x08,0xad]
-+#CHECK: vmaloh  %v13, %v17, %v21, %v25  # encoding: [0xe7,0xd1,0x51,0x00,0x97,0xad]
-+
-+	vmaloh	%v0, %v0, %v0, %v0
-+	vmaloh	%v0, %v0, %v0, %v31
-+	vmaloh	%v0, %v0, %v31, %v0
-+	vmaloh	%v0, %v31, %v0, %v0
-+	vmaloh	%v31, %v0, %v0, %v0
-+	vmaloh	%v13, %v17, %v21, %v25
-+
-+#CHECK: vmaob   %v0, %v0, %v0, %v0      # encoding: [0xe7,0x00,0x00,0x00,0x00,0xaf]
-+#CHECK: vmaob   %v0, %v0, %v0, %v31     # encoding: [0xe7,0x00,0x00,0x00,0xf1,0xaf]
-+#CHECK: vmaob   %v0, %v0, %v31, %v0     # encoding: [0xe7,0x00,0xf0,0x00,0x02,0xaf]
-+#CHECK: vmaob   %v0, %v31, %v0, %v0     # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xaf]
-+#CHECK: vmaob   %v31, %v0, %v0, %v0     # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xaf]
-+#CHECK: vmaob   %v13, %v17, %v21, %v25  # encoding: [0xe7,0xd1,0x50,0x00,0x97,0xaf]
-+
-+	vmaob	%v0, %v0, %v0, %v0
-+	vmaob	%v0, %v0, %v0, %v31
-+	vmaob	%v0, %v0, %v31, %v0
-+	vmaob	%v0, %v31, %v0, %v0
-+	vmaob	%v31, %v0, %v0, %v0
-+	vmaob	%v13, %v17, %v21, %v25
-+
-+#CHECK: vmaof   %v0, %v0, %v0, %v0      # encoding: [0xe7,0x00,0x02,0x00,0x00,0xaf]
-+#CHECK: vmaof   %v0, %v0, %v0, %v31     # encoding: [0xe7,0x00,0x02,0x00,0xf1,0xaf]
-+#CHECK: vmaof   %v0, %v0, %v31, %v0     # encoding: [0xe7,0x00,0xf2,0x00,0x02,0xaf]
-+#CHECK: vmaof   %v0, %v31, %v0, %v0     # encoding: [0xe7,0x0f,0x02,0x00,0x04,0xaf]
-+#CHECK: vmaof   %v31, %v0, %v0, %v0     # encoding: [0xe7,0xf0,0x02,0x00,0x08,0xaf]
-+#CHECK: vmaof   %v13, %v17, %v21, %v25  # encoding: [0xe7,0xd1,0x52,0x00,0x97,0xaf]
-+
-+	vmaof	%v0, %v0, %v0, %v0
-+	vmaof	%v0, %v0, %v0, %v31
-+	vmaof	%v0, %v0, %v31, %v0
-+	vmaof	%v0, %v31, %v0, %v0
-+	vmaof	%v31, %v0, %v0, %v0
-+	vmaof	%v13, %v17, %v21, %v25
-+
-+#CHECK: vmaoh   %v0, %v0, %v0, %v0      # encoding: [0xe7,0x00,0x01,0x00,0x00,0xaf]
-+#CHECK: vmaoh   %v0, %v0, %v0, %v31     # encoding: [0xe7,0x00,0x01,0x00,0xf1,0xaf]
-+#CHECK: vmaoh   %v0, %v0, %v31, %v0     # encoding: [0xe7,0x00,0xf1,0x00,0x02,0xaf]
-+#CHECK: vmaoh   %v0, %v31, %v0, %v0     # encoding: [0xe7,0x0f,0x01,0x00,0x04,0xaf]
-+#CHECK: vmaoh   %v31, %v0, %v0, %v0     # encoding: [0xe7,0xf0,0x01,0x00,0x08,0xaf]
-+#CHECK: vmaoh   %v13, %v17, %v21, %v25  # encoding: [0xe7,0xd1,0x51,0x00,0x97,0xaf]
-+
-+	vmaoh	%v0, %v0, %v0, %v0
-+	vmaoh	%v0, %v0, %v0, %v31
-+	vmaoh	%v0, %v0, %v31, %v0
-+	vmaoh	%v0, %v31, %v0, %v0
-+	vmaoh	%v31, %v0, %v0, %v0
-+	vmaoh	%v13, %v17, %v21, %v25
-+
-+#CHECK: vmeb    %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x00,0xa6]
-+#CHECK: vmeb    %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x02,0xa6]
-+#CHECK: vmeb    %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xa6]
-+#CHECK: vmeb    %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xa6]
-+#CHECK: vmeb    %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x0a,0xa6]
-+
-+	vmeb	%v0, %v0, %v0
-+	vmeb	%v0, %v0, %v31
-+	vmeb	%v0, %v31, %v0
-+	vmeb	%v31, %v0, %v0
-+	vmeb	%v18, %v3, %v20
-+
-+#CHECK: vmef    %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x20,0xa6]
-+#CHECK: vmef    %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x22,0xa6]
-+#CHECK: vmef    %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xa6]
-+#CHECK: vmef    %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xa6]
-+#CHECK: vmef    %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x2a,0xa6]
-+
-+	vmef	%v0, %v0, %v0
-+	vmef	%v0, %v0, %v31
-+	vmef	%v0, %v31, %v0
-+	vmef	%v31, %v0, %v0
-+	vmef	%v18, %v3, %v20
-+
-+#CHECK: vmeh    %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x10,0xa6]
-+#CHECK: vmeh    %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x12,0xa6]
-+#CHECK: vmeh    %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x14,0xa6]
-+#CHECK: vmeh    %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x18,0xa6]
-+#CHECK: vmeh    %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x1a,0xa6]
-+
-+	vmeh	%v0, %v0, %v0
-+	vmeh	%v0, %v0, %v31
-+	vmeh	%v0, %v31, %v0
-+	vmeh	%v31, %v0, %v0
-+	vmeh	%v18, %v3, %v20
-+
-+#CHECK: vmhb    %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x00,0xa3]
-+#CHECK: vmhb    %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x02,0xa3]
-+#CHECK: vmhb    %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xa3]
-+#CHECK: vmhb    %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xa3]
-+#CHECK: vmhb    %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x0a,0xa3]
-+
-+	vmhb	%v0, %v0, %v0
-+	vmhb	%v0, %v0, %v31
-+	vmhb	%v0, %v31, %v0
-+	vmhb	%v31, %v0, %v0
-+	vmhb	%v18, %v3, %v20
-+
-+#CHECK: vmhf    %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x20,0xa3]
-+#CHECK: vmhf    %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x22,0xa3]
-+#CHECK: vmhf    %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xa3]
-+#CHECK: vmhf    %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xa3]
-+#CHECK: vmhf    %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x2a,0xa3]
-+
-+	vmhf	%v0, %v0, %v0
-+	vmhf	%v0, %v0, %v31
-+	vmhf	%v0, %v31, %v0
-+	vmhf	%v31, %v0, %v0
-+	vmhf	%v18, %v3, %v20
-+
-+#CHECK: vmhh    %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x10,0xa3]
-+#CHECK: vmhh    %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x12,0xa3]
-+#CHECK: vmhh    %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x14,0xa3]
-+#CHECK: vmhh    %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x18,0xa3]
-+#CHECK: vmhh    %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x1a,0xa3]
-+
-+	vmhh	%v0, %v0, %v0
-+	vmhh	%v0, %v0, %v31
-+	vmhh	%v0, %v31, %v0
-+	vmhh	%v31, %v0, %v0
-+	vmhh	%v18, %v3, %v20
-+
-+#CHECK: vmlb    %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x00,0xa2]
-+#CHECK: vmlb    %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x02,0xa2]
-+#CHECK: vmlb    %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xa2]
-+#CHECK: vmlb    %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xa2]
-+#CHECK: vmlb    %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x0a,0xa2]
-+
-+	vmlb	%v0, %v0, %v0
-+	vmlb	%v0, %v0, %v31
-+	vmlb	%v0, %v31, %v0
-+	vmlb	%v31, %v0, %v0
-+	vmlb	%v18, %v3, %v20
-+
-+#CHECK: vmleb   %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x00,0xa4]
-+#CHECK: vmleb   %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x02,0xa4]
-+#CHECK: vmleb   %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xa4]
-+#CHECK: vmleb   %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xa4]
-+#CHECK: vmleb   %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x0a,0xa4]
-+
-+	vmleb	%v0, %v0, %v0
-+	vmleb	%v0, %v0, %v31
-+	vmleb	%v0, %v31, %v0
-+	vmleb	%v31, %v0, %v0
-+	vmleb	%v18, %v3, %v20
-+
-+#CHECK: vmlef   %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x20,0xa4]
-+#CHECK: vmlef   %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x22,0xa4]
-+#CHECK: vmlef   %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xa4]
-+#CHECK: vmlef   %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xa4]
-+#CHECK: vmlef   %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x2a,0xa4]
-+
-+	vmlef	%v0, %v0, %v0
-+	vmlef	%v0, %v0, %v31
-+	vmlef	%v0, %v31, %v0
-+	vmlef	%v31, %v0, %v0
-+	vmlef	%v18, %v3, %v20
-+
-+#CHECK: vmleh   %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x10,0xa4]
-+#CHECK: vmleh   %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x12,0xa4]
-+#CHECK: vmleh   %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x14,0xa4]
-+#CHECK: vmleh   %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x18,0xa4]
-+#CHECK: vmleh   %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x1a,0xa4]
-+
-+	vmleh	%v0, %v0, %v0
-+	vmleh	%v0, %v0, %v31
-+	vmleh	%v0, %v31, %v0
-+	vmleh	%v31, %v0, %v0
-+	vmleh	%v18, %v3, %v20
-+
-+#CHECK: vmlf    %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x20,0xa2]
-+#CHECK: vmlf    %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x22,0xa2]
-+#CHECK: vmlf    %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xa2]
-+#CHECK: vmlf    %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xa2]
-+#CHECK: vmlf    %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x2a,0xa2]
-+
-+	vmlf	%v0, %v0, %v0
-+	vmlf	%v0, %v0, %v31
-+	vmlf	%v0, %v31, %v0
-+	vmlf	%v31, %v0, %v0
-+	vmlf	%v18, %v3, %v20
-+
-+#CHECK: vmlhb   %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x00,0xa1]
-+#CHECK: vmlhb   %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x02,0xa1]
-+#CHECK: vmlhb   %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xa1]
-+#CHECK: vmlhb   %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xa1]
-+#CHECK: vmlhb   %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x0a,0xa1]
-+
-+	vmlhb	%v0, %v0, %v0
-+	vmlhb	%v0, %v0, %v31
-+	vmlhb	%v0, %v31, %v0
-+	vmlhb	%v31, %v0, %v0
-+	vmlhb	%v18, %v3, %v20
-+
-+#CHECK: vmlhf   %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x20,0xa1]
-+#CHECK: vmlhf   %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x22,0xa1]
-+#CHECK: vmlhf   %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xa1]
-+#CHECK: vmlhf   %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xa1]
-+#CHECK: vmlhf   %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x2a,0xa1]
-+
-+	vmlhf	%v0, %v0, %v0
-+	vmlhf	%v0, %v0, %v31
-+	vmlhf	%v0, %v31, %v0
-+	vmlhf	%v31, %v0, %v0
-+	vmlhf	%v18, %v3, %v20
-+
-+#CHECK: vmlhh   %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x10,0xa1]
-+#CHECK: vmlhh   %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x12,0xa1]
-+#CHECK: vmlhh   %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x14,0xa1]
-+#CHECK: vmlhh   %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x18,0xa1]
-+#CHECK: vmlhh   %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x1a,0xa1]
-+
-+	vmlhh	%v0, %v0, %v0
-+	vmlhh	%v0, %v0, %v31
-+	vmlhh	%v0, %v31, %v0
-+	vmlhh	%v31, %v0, %v0
-+	vmlhh	%v18, %v3, %v20
-+
-+#CHECK: vmlhw   %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x10,0xa2]
-+#CHECK: vmlhw   %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x12,0xa2]
-+#CHECK: vmlhw   %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x14,0xa2]
-+#CHECK: vmlhw   %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x18,0xa2]
-+#CHECK: vmlhw   %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x1a,0xa2]
-+
-+	vmlhw	%v0, %v0, %v0
-+	vmlhw	%v0, %v0, %v31
-+	vmlhw	%v0, %v31, %v0
-+	vmlhw	%v31, %v0, %v0
-+	vmlhw	%v18, %v3, %v20
-+
-+#CHECK: vmlob   %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x00,0xa5]
-+#CHECK: vmlob   %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x02,0xa5]
-+#CHECK: vmlob   %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xa5]
-+#CHECK: vmlob   %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xa5]
-+#CHECK: vmlob   %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x0a,0xa5]
-+
-+	vmlob	%v0, %v0, %v0
-+	vmlob	%v0, %v0, %v31
-+	vmlob	%v0, %v31, %v0
-+	vmlob	%v31, %v0, %v0
-+	vmlob	%v18, %v3, %v20
-+
-+#CHECK: vmlof   %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x20,0xa5]
-+#CHECK: vmlof   %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x22,0xa5]
-+#CHECK: vmlof   %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xa5]
-+#CHECK: vmlof   %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xa5]
-+#CHECK: vmlof   %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x2a,0xa5]
-+
-+	vmlof	%v0, %v0, %v0
-+	vmlof	%v0, %v0, %v31
-+	vmlof	%v0, %v31, %v0
-+	vmlof	%v31, %v0, %v0
-+	vmlof	%v18, %v3, %v20
-+
-+#CHECK: vmloh   %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x10,0xa5]
-+#CHECK: vmloh   %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x12,0xa5]
-+#CHECK: vmloh   %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x14,0xa5]
-+#CHECK: vmloh   %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x18,0xa5]
-+#CHECK: vmloh   %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x1a,0xa5]
-+
-+	vmloh	%v0, %v0, %v0
-+	vmloh	%v0, %v0, %v31
-+	vmloh	%v0, %v31, %v0
-+	vmloh	%v31, %v0, %v0
-+	vmloh	%v18, %v3, %v20
-+
-+#CHECK: vmnb    %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x00,0xfe]
-+#CHECK: vmnb    %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x02,0xfe]
-+#CHECK: vmnb    %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xfe]
-+#CHECK: vmnb    %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xfe]
-+#CHECK: vmnb    %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x0a,0xfe]
-+
-+	vmnb	%v0, %v0, %v0
-+	vmnb	%v0, %v0, %v31
-+	vmnb	%v0, %v31, %v0
-+	vmnb	%v31, %v0, %v0
-+	vmnb	%v18, %v3, %v20
-+
-+#CHECK: vmnf    %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x20,0xfe]
-+#CHECK: vmnf    %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x22,0xfe]
-+#CHECK: vmnf    %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xfe]
-+#CHECK: vmnf    %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xfe]
-+#CHECK: vmnf    %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x2a,0xfe]
-+
-+	vmnf	%v0, %v0, %v0
-+	vmnf	%v0, %v0, %v31
-+	vmnf	%v0, %v31, %v0
-+	vmnf	%v31, %v0, %v0
-+	vmnf	%v18, %v3, %v20
-+
-+#CHECK: vmng    %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x30,0xfe]
-+#CHECK: vmng    %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x32,0xfe]
-+#CHECK: vmng    %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xfe]
-+#CHECK: vmng    %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xfe]
-+#CHECK: vmng    %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x3a,0xfe]
-+
-+	vmng	%v0, %v0, %v0
-+	vmng	%v0, %v0, %v31
-+	vmng	%v0, %v31, %v0
-+	vmng	%v31, %v0, %v0
-+	vmng	%v18, %v3, %v20
-+
-+#CHECK: vmnh    %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x10,0xfe]
-+#CHECK: vmnh    %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x12,0xfe]
-+#CHECK: vmnh    %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x14,0xfe]
-+#CHECK: vmnh    %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x18,0xfe]
-+#CHECK: vmnh    %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x1a,0xfe]
-+
-+	vmnh	%v0, %v0, %v0
-+	vmnh	%v0, %v0, %v31
-+	vmnh	%v0, %v31, %v0
-+	vmnh	%v31, %v0, %v0
-+	vmnh	%v18, %v3, %v20
-+
-+#CHECK: vmnlb   %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x00,0xfc]
-+#CHECK: vmnlb   %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x02,0xfc]
-+#CHECK: vmnlb   %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xfc]
-+#CHECK: vmnlb   %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xfc]
-+#CHECK: vmnlb   %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x0a,0xfc]
-+
-+	vmnlb	%v0, %v0, %v0
-+	vmnlb	%v0, %v0, %v31
-+	vmnlb	%v0, %v31, %v0
-+	vmnlb	%v31, %v0, %v0
-+	vmnlb	%v18, %v3, %v20
-+
-+#CHECK: vmnlf   %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x20,0xfc]
-+#CHECK: vmnlf   %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x22,0xfc]
-+#CHECK: vmnlf   %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xfc]
-+#CHECK: vmnlf   %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xfc]
-+#CHECK: vmnlf   %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x2a,0xfc]
-+
-+	vmnlf	%v0, %v0, %v0
-+	vmnlf	%v0, %v0, %v31
-+	vmnlf	%v0, %v31, %v0
-+	vmnlf	%v31, %v0, %v0
-+	vmnlf	%v18, %v3, %v20
-+
-+#CHECK: vmnlg   %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x30,0xfc]
-+#CHECK: vmnlg   %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x32,0xfc]
-+#CHECK: vmnlg   %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xfc]
-+#CHECK: vmnlg   %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xfc]
-+#CHECK: vmnlg   %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x3a,0xfc]
-+
-+	vmnlg	%v0, %v0, %v0
-+	vmnlg	%v0, %v0, %v31
-+	vmnlg	%v0, %v31, %v0
-+	vmnlg	%v31, %v0, %v0
-+	vmnlg	%v18, %v3, %v20
-+
-+#CHECK: vmnlh   %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x10,0xfc]
-+#CHECK: vmnlh   %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x12,0xfc]
-+#CHECK: vmnlh   %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x14,0xfc]
-+#CHECK: vmnlh   %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x18,0xfc]
-+#CHECK: vmnlh   %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x1a,0xfc]
-+
-+	vmnlh	%v0, %v0, %v0
-+	vmnlh	%v0, %v0, %v31
-+	vmnlh	%v0, %v31, %v0
-+	vmnlh	%v31, %v0, %v0
-+	vmnlh	%v18, %v3, %v20
-+
-+#CHECK: vmob    %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x00,0xa7]
-+#CHECK: vmob    %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x02,0xa7]
-+#CHECK: vmob    %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xa7]
-+#CHECK: vmob    %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xa7]
-+#CHECK: vmob    %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x0a,0xa7]
-+
-+	vmob	%v0, %v0, %v0
-+	vmob	%v0, %v0, %v31
-+	vmob	%v0, %v31, %v0
-+	vmob	%v31, %v0, %v0
-+	vmob	%v18, %v3, %v20
-+
-+#CHECK: vmof    %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x20,0xa7]
-+#CHECK: vmof    %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x22,0xa7]
-+#CHECK: vmof    %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xa7]
-+#CHECK: vmof    %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xa7]
-+#CHECK: vmof    %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x2a,0xa7]
-+
-+	vmof	%v0, %v0, %v0
-+	vmof	%v0, %v0, %v31
-+	vmof	%v0, %v31, %v0
-+	vmof	%v31, %v0, %v0
-+	vmof	%v18, %v3, %v20
-+
-+#CHECK: vmoh    %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x10,0xa7]
-+#CHECK: vmoh    %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x12,0xa7]
-+#CHECK: vmoh    %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x14,0xa7]
-+#CHECK: vmoh    %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x18,0xa7]
-+#CHECK: vmoh    %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x1a,0xa7]
-+
-+	vmoh	%v0, %v0, %v0
-+	vmoh	%v0, %v0, %v31
-+	vmoh	%v0, %v31, %v0
-+	vmoh	%v31, %v0, %v0
-+	vmoh	%v18, %v3, %v20
-+
-+#CHECK: vmrhb   %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x00,0x61]
-+#CHECK: vmrhb   %v0, %v0, %v15          # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x61]
-+#CHECK: vmrhb   %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x02,0x61]
-+#CHECK: vmrhb   %v0, %v15, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x00,0x61]
-+#CHECK: vmrhb   %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x61]
-+#CHECK: vmrhb   %v15, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x61]
-+#CHECK: vmrhb   %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x61]
-+#CHECK: vmrhb   %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x0a,0x61]
-+
-+	vmrhb	%v0, %v0, %v0
-+	vmrhb	%v0, %v0, %v15
-+	vmrhb	%v0, %v0, %v31
-+	vmrhb	%v0, %v15, %v0
-+	vmrhb	%v0, %v31, %v0
-+	vmrhb	%v15, %v0, %v0
-+	vmrhb	%v31, %v0, %v0
-+	vmrhb	%v18, %v3, %v20
-+
-+#CHECK: vmrhf   %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x20,0x61]
-+#CHECK: vmrhf   %v0, %v0, %v15          # encoding: [0xe7,0x00,0xf0,0x00,0x20,0x61]
-+#CHECK: vmrhf   %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x22,0x61]
-+#CHECK: vmrhf   %v0, %v15, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x20,0x61]
-+#CHECK: vmrhf   %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x24,0x61]
-+#CHECK: vmrhf   %v15, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x20,0x61]
-+#CHECK: vmrhf   %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x28,0x61]
-+#CHECK: vmrhf   %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x2a,0x61]
-+
-+	vmrhf	%v0, %v0, %v0
-+	vmrhf	%v0, %v0, %v15
-+	vmrhf	%v0, %v0, %v31
-+	vmrhf	%v0, %v15, %v0
-+	vmrhf	%v0, %v31, %v0
-+	vmrhf	%v15, %v0, %v0
-+	vmrhf	%v31, %v0, %v0
-+	vmrhf	%v18, %v3, %v20
-+
-+#CHECK: vmrhg   %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x30,0x61]
-+#CHECK: vmrhg   %v0, %v0, %v15          # encoding: [0xe7,0x00,0xf0,0x00,0x30,0x61]
-+#CHECK: vmrhg   %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x32,0x61]
-+#CHECK: vmrhg   %v0, %v15, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x30,0x61]
-+#CHECK: vmrhg   %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x34,0x61]
-+#CHECK: vmrhg   %v15, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x30,0x61]
-+#CHECK: vmrhg   %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x38,0x61]
-+#CHECK: vmrhg   %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x3a,0x61]
-+
-+	vmrhg	%v0, %v0, %v0
-+	vmrhg	%v0, %v0, %v15
-+	vmrhg	%v0, %v0, %v31
-+	vmrhg	%v0, %v15, %v0
-+	vmrhg	%v0, %v31, %v0
-+	vmrhg	%v15, %v0, %v0
-+	vmrhg	%v31, %v0, %v0
-+	vmrhg	%v18, %v3, %v20
-+
-+#CHECK: vmrhh   %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x10,0x61]
-+#CHECK: vmrhh   %v0, %v0, %v15          # encoding: [0xe7,0x00,0xf0,0x00,0x10,0x61]
-+#CHECK: vmrhh   %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x12,0x61]
-+#CHECK: vmrhh   %v0, %v15, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x10,0x61]
-+#CHECK: vmrhh   %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x14,0x61]
-+#CHECK: vmrhh   %v15, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x10,0x61]
-+#CHECK: vmrhh   %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x18,0x61]
-+#CHECK: vmrhh   %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x1a,0x61]
-+
-+	vmrhh	%v0, %v0, %v0
-+	vmrhh	%v0, %v0, %v15
-+	vmrhh	%v0, %v0, %v31
-+	vmrhh	%v0, %v15, %v0
-+	vmrhh	%v0, %v31, %v0
-+	vmrhh	%v15, %v0, %v0
-+	vmrhh	%v31, %v0, %v0
-+	vmrhh	%v18, %v3, %v20
-+
-+#CHECK: vmrlb   %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x00,0x60]
-+#CHECK: vmrlb   %v0, %v0, %v15          # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x60]
-+#CHECK: vmrlb   %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x02,0x60]
-+#CHECK: vmrlb   %v0, %v15, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x00,0x60]
-+#CHECK: vmrlb   %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x60]
-+#CHECK: vmrlb   %v15, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x60]
-+#CHECK: vmrlb   %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x60]
-+#CHECK: vmrlb   %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x0a,0x60]
-+
-+	vmrlb	%v0, %v0, %v0
-+	vmrlb	%v0, %v0, %v15
-+	vmrlb	%v0, %v0, %v31
-+	vmrlb	%v0, %v15, %v0
-+	vmrlb	%v0, %v31, %v0
-+	vmrlb	%v15, %v0, %v0
-+	vmrlb	%v31, %v0, %v0
-+	vmrlb	%v18, %v3, %v20
-+
-+#CHECK: vmrlf   %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x20,0x60]
-+#CHECK: vmrlf   %v0, %v0, %v15          # encoding: [0xe7,0x00,0xf0,0x00,0x20,0x60]
-+#CHECK: vmrlf   %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x22,0x60]
-+#CHECK: vmrlf   %v0, %v15, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x20,0x60]
-+#CHECK: vmrlf   %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x24,0x60]
-+#CHECK: vmrlf   %v15, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x20,0x60]
-+#CHECK: vmrlf   %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x28,0x60]
-+#CHECK: vmrlf   %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x2a,0x60]
-+
-+	vmrlf	%v0, %v0, %v0
-+	vmrlf	%v0, %v0, %v15
-+	vmrlf	%v0, %v0, %v31
-+	vmrlf	%v0, %v15, %v0
-+	vmrlf	%v0, %v31, %v0
-+	vmrlf	%v15, %v0, %v0
-+	vmrlf	%v31, %v0, %v0
-+	vmrlf	%v18, %v3, %v20
-+
-+#CHECK: vmrlg   %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x30,0x60]
-+#CHECK: vmrlg   %v0, %v0, %v15          # encoding: [0xe7,0x00,0xf0,0x00,0x30,0x60]
-+#CHECK: vmrlg   %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x32,0x60]
-+#CHECK: vmrlg   %v0, %v15, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x30,0x60]
-+#CHECK: vmrlg   %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x34,0x60]
-+#CHECK: vmrlg   %v15, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x30,0x60]
-+#CHECK: vmrlg   %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x38,0x60]
-+#CHECK: vmrlg   %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x3a,0x60]
-+
-+	vmrlg	%v0, %v0, %v0
-+	vmrlg	%v0, %v0, %v15
-+	vmrlg	%v0, %v0, %v31
-+	vmrlg	%v0, %v15, %v0
-+	vmrlg	%v0, %v31, %v0
-+	vmrlg	%v15, %v0, %v0
-+	vmrlg	%v31, %v0, %v0
-+	vmrlg	%v18, %v3, %v20
-+
-+#CHECK: vmrlh   %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x10,0x60]
-+#CHECK: vmrlh   %v0, %v0, %v15          # encoding: [0xe7,0x00,0xf0,0x00,0x10,0x60]
-+#CHECK: vmrlh   %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x12,0x60]
-+#CHECK: vmrlh   %v0, %v15, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x10,0x60]
-+#CHECK: vmrlh   %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x14,0x60]
-+#CHECK: vmrlh   %v15, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x10,0x60]
-+#CHECK: vmrlh   %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x18,0x60]
-+#CHECK: vmrlh   %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x1a,0x60]
-+
-+	vmrlh	%v0, %v0, %v0
-+	vmrlh	%v0, %v0, %v15
-+	vmrlh	%v0, %v0, %v31
-+	vmrlh	%v0, %v15, %v0
-+	vmrlh	%v0, %v31, %v0
-+	vmrlh	%v15, %v0, %v0
-+	vmrlh	%v31, %v0, %v0
-+	vmrlh	%v18, %v3, %v20
-+
-+#CHECK: vmxb    %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x00,0xff]
-+#CHECK: vmxb    %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x02,0xff]
-+#CHECK: vmxb    %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xff]
-+#CHECK: vmxb    %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xff]
-+#CHECK: vmxb    %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x0a,0xff]
-+
-+	vmxb	%v0, %v0, %v0
-+	vmxb	%v0, %v0, %v31
-+	vmxb	%v0, %v31, %v0
-+	vmxb	%v31, %v0, %v0
-+	vmxb	%v18, %v3, %v20
-+
-+#CHECK: vmxf    %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x20,0xff]
-+#CHECK: vmxf    %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x22,0xff]
-+#CHECK: vmxf    %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xff]
-+#CHECK: vmxf    %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xff]
-+#CHECK: vmxf    %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x2a,0xff]
-+
-+	vmxf	%v0, %v0, %v0
-+	vmxf	%v0, %v0, %v31
-+	vmxf	%v0, %v31, %v0
-+	vmxf	%v31, %v0, %v0
-+	vmxf	%v18, %v3, %v20
-+
-+#CHECK: vmxg    %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x30,0xff]
-+#CHECK: vmxg    %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x32,0xff]
-+#CHECK: vmxg    %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xff]
-+#CHECK: vmxg    %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xff]
-+#CHECK: vmxg    %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x3a,0xff]
-+
-+	vmxg	%v0, %v0, %v0
-+	vmxg	%v0, %v0, %v31
-+	vmxg	%v0, %v31, %v0
-+	vmxg	%v31, %v0, %v0
-+	vmxg	%v18, %v3, %v20
-+
-+#CHECK: vmxh    %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x10,0xff]
-+#CHECK: vmxh    %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x12,0xff]
-+#CHECK: vmxh    %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x14,0xff]
-+#CHECK: vmxh    %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x18,0xff]
-+#CHECK: vmxh    %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x1a,0xff]
-+
-+	vmxh	%v0, %v0, %v0
-+	vmxh	%v0, %v0, %v31
-+	vmxh	%v0, %v31, %v0
-+	vmxh	%v31, %v0, %v0
-+	vmxh	%v18, %v3, %v20
-+
-+#CHECK: vmxlb   %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x00,0xfd]
-+#CHECK: vmxlb   %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x02,0xfd]
-+#CHECK: vmxlb   %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xfd]
-+#CHECK: vmxlb   %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xfd]
-+#CHECK: vmxlb   %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x0a,0xfd]
-+
-+	vmxlb	%v0, %v0, %v0
-+	vmxlb	%v0, %v0, %v31
-+	vmxlb	%v0, %v31, %v0
-+	vmxlb	%v31, %v0, %v0
-+	vmxlb	%v18, %v3, %v20
-+
-+#CHECK: vmxlf   %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x20,0xfd]
-+#CHECK: vmxlf   %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x22,0xfd]
-+#CHECK: vmxlf   %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xfd]
-+#CHECK: vmxlf   %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xfd]
-+#CHECK: vmxlf   %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x2a,0xfd]
-+
-+	vmxlf	%v0, %v0, %v0
-+	vmxlf	%v0, %v0, %v31
-+	vmxlf	%v0, %v31, %v0
-+	vmxlf	%v31, %v0, %v0
-+	vmxlf	%v18, %v3, %v20
-+
-+#CHECK: vmxlg   %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x30,0xfd]
-+#CHECK: vmxlg   %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x32,0xfd]
-+#CHECK: vmxlg   %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xfd]
-+#CHECK: vmxlg   %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xfd]
-+#CHECK: vmxlg   %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x3a,0xfd]
-+
-+	vmxlg	%v0, %v0, %v0
-+	vmxlg	%v0, %v0, %v31
-+	vmxlg	%v0, %v31, %v0
-+	vmxlg	%v31, %v0, %v0
-+	vmxlg	%v18, %v3, %v20
-+
-+#CHECK: vmxlh   %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x10,0xfd]
-+#CHECK: vmxlh   %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x12,0xfd]
-+#CHECK: vmxlh   %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x14,0xfd]
-+#CHECK: vmxlh   %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x18,0xfd]
-+#CHECK: vmxlh   %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x1a,0xfd]
-+
-+	vmxlh	%v0, %v0, %v0
-+	vmxlh	%v0, %v0, %v31
-+	vmxlh	%v0, %v31, %v0
-+	vmxlh	%v31, %v0, %v0
-+	vmxlh	%v18, %v3, %v20
-+
-+#CHECK: vn      %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x00,0x68]
-+#CHECK: vn      %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x02,0x68]
-+#CHECK: vn      %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x68]
-+#CHECK: vn      %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x68]
-+#CHECK: vn      %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x0a,0x68]
-+
-+	vn	%v0, %v0, %v0
-+	vn	%v0, %v0, %v31
-+	vn	%v0, %v31, %v0
-+	vn	%v31, %v0, %v0
-+	vn	%v18, %v3, %v20
-+
-+#CHECK: vnc     %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x00,0x69]
-+#CHECK: vnc     %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x02,0x69]
-+#CHECK: vnc     %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x69]
-+#CHECK: vnc     %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x69]
-+#CHECK: vnc     %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x0a,0x69]
-+
-+	vnc	%v0, %v0, %v0
-+	vnc	%v0, %v0, %v31
-+	vnc	%v0, %v31, %v0
-+	vnc	%v31, %v0, %v0
-+	vnc	%v18, %v3, %v20
-+
-+#CHECK: vno     %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x00,0x6b]
-+#CHECK: vno     %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x02,0x6b]
-+#CHECK: vno     %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x6b]
-+#CHECK: vno     %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x6b]
-+#CHECK: vno     %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x0a,0x6b]
-+
-+	vno	%v0, %v0, %v0
-+	vno	%v0, %v0, %v31
-+	vno	%v0, %v31, %v0
-+	vno	%v31, %v0, %v0
-+	vno	%v18, %v3, %v20
-+
-+#CHECK: vo      %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x00,0x6a]
-+#CHECK: vo      %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x02,0x6a]
-+#CHECK: vo      %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x6a]
-+#CHECK: vo      %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x6a]
-+#CHECK: vo      %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x0a,0x6a]
-+
-+	vo	%v0, %v0, %v0
-+	vo	%v0, %v0, %v31
-+	vo	%v0, %v31, %v0
-+	vo	%v31, %v0, %v0
-+	vo	%v18, %v3, %v20
-+
-+#CHECK: vone    %v0                     # encoding: [0xe7,0x00,0xff,0xff,0x00,0x44]
-+#CHECK: vone    %v15                    # encoding: [0xe7,0xf0,0xff,0xff,0x00,0x44]
-+#CHECK: vone    %v22                    # encoding: [0xe7,0x60,0xff,0xff,0x08,0x44]
-+#CHECK: vone    %v31                    # encoding: [0xe7,0xf0,0xff,0xff,0x08,0x44]
-+
-+	vone	%v0
-+	vone	%v15
-+	vone	%v22
-+	vone	%v31
-+
-+#CHECK: vpdi    %v0, %v0, %v0, 0        # encoding: [0xe7,0x00,0x00,0x00,0x00,0x84]
-+#CHECK: vpdi    %v0, %v0, %v0, 5        # encoding: [0xe7,0x00,0x00,0x00,0x50,0x84]
-+#CHECK: vpdi    %v0, %v0, %v31, 0       # encoding: [0xe7,0x00,0xf0,0x00,0x02,0x84]
-+#CHECK: vpdi    %v0, %v31, %v0, 0       # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x84]
-+#CHECK: vpdi    %v31, %v0, %v0, 0       # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x84]
-+#CHECK: vpdi    %v13, %v17, %v21, 4     # encoding: [0xe7,0xd1,0x50,0x00,0x46,0x84]
-+
-+	vpdi	%v0, %v0, %v0, 0
-+	vpdi	%v0, %v0, %v0, 5
-+	vpdi	%v0, %v0, %v31, 0
-+	vpdi	%v0, %v31, %v0, 0
-+	vpdi	%v31, %v0, %v0, 0
-+	vpdi	%v13, %v17, %v21, 4
-+
-+#CHECK: vperm   %v0, %v0, %v0, %v0      # encoding: [0xe7,0x00,0x00,0x00,0x00,0x8c]
-+#CHECK: vperm   %v0, %v0, %v0, %v31     # encoding: [0xe7,0x00,0x00,0x00,0xf1,0x8c]
-+#CHECK: vperm   %v0, %v0, %v31, %v0     # encoding: [0xe7,0x00,0xf0,0x00,0x02,0x8c]
-+#CHECK: vperm   %v0, %v31, %v0, %v0     # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x8c]
-+#CHECK: vperm   %v31, %v0, %v0, %v0     # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x8c]
-+#CHECK: vperm   %v13, %v17, %v21, %v25  # encoding: [0xe7,0xd1,0x50,0x00,0x97,0x8c]
-+
-+	vperm	%v0, %v0, %v0, %v0
-+	vperm	%v0, %v0, %v0, %v31
-+	vperm	%v0, %v0, %v31, %v0
-+	vperm	%v0, %v31, %v0, %v0
-+	vperm	%v31, %v0, %v0, %v0
-+	vperm	%v13, %v17, %v21, %v25
-+
-+#CHECK: vpkf    %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x20,0x94]
-+#CHECK: vpkf    %v0, %v0, %v15          # encoding: [0xe7,0x00,0xf0,0x00,0x20,0x94]
-+#CHECK: vpkf    %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x22,0x94]
-+#CHECK: vpkf    %v0, %v15, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x20,0x94]
-+#CHECK: vpkf    %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x24,0x94]
-+#CHECK: vpkf    %v15, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x20,0x94]
-+#CHECK: vpkf    %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x28,0x94]
-+#CHECK: vpkf    %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x2a,0x94]
-+
-+	vpkf	%v0, %v0, %v0
-+	vpkf	%v0, %v0, %v15
-+	vpkf	%v0, %v0, %v31
-+	vpkf	%v0, %v15, %v0
-+	vpkf	%v0, %v31, %v0
-+	vpkf	%v15, %v0, %v0
-+	vpkf	%v31, %v0, %v0
-+	vpkf	%v18, %v3, %v20
-+
-+#CHECK: vpkg    %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x30,0x94]
-+#CHECK: vpkg    %v0, %v0, %v15          # encoding: [0xe7,0x00,0xf0,0x00,0x30,0x94]
-+#CHECK: vpkg    %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x32,0x94]
-+#CHECK: vpkg    %v0, %v15, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x30,0x94]
-+#CHECK: vpkg    %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x34,0x94]
-+#CHECK: vpkg    %v15, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x30,0x94]
-+#CHECK: vpkg    %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x38,0x94]
-+#CHECK: vpkg    %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x3a,0x94]
-+
-+	vpkg	%v0, %v0, %v0
-+	vpkg	%v0, %v0, %v15
-+	vpkg	%v0, %v0, %v31
-+	vpkg	%v0, %v15, %v0
-+	vpkg	%v0, %v31, %v0
-+	vpkg	%v15, %v0, %v0
-+	vpkg	%v31, %v0, %v0
-+	vpkg	%v18, %v3, %v20
-+
-+#CHECK: vpkh    %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x10,0x94]
-+#CHECK: vpkh    %v0, %v0, %v15          # encoding: [0xe7,0x00,0xf0,0x00,0x10,0x94]
-+#CHECK: vpkh    %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x12,0x94]
-+#CHECK: vpkh    %v0, %v15, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x10,0x94]
-+#CHECK: vpkh    %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x14,0x94]
-+#CHECK: vpkh    %v15, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x10,0x94]
-+#CHECK: vpkh    %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x18,0x94]
-+#CHECK: vpkh    %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x1a,0x94]
-+
-+	vpkh	%v0, %v0, %v0
-+	vpkh	%v0, %v0, %v15
-+	vpkh	%v0, %v0, %v31
-+	vpkh	%v0, %v15, %v0
-+	vpkh	%v0, %v31, %v0
-+	vpkh	%v15, %v0, %v0
-+	vpkh	%v31, %v0, %v0
-+	vpkh	%v18, %v3, %v20
-+
-+#CHECK: vpklsf  %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x20,0x95]
-+#CHECK: vpklsf  %v0, %v0, %v15          # encoding: [0xe7,0x00,0xf0,0x00,0x20,0x95]
-+#CHECK: vpklsf  %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x22,0x95]
-+#CHECK: vpklsf  %v0, %v15, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x20,0x95]
-+#CHECK: vpklsf  %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x24,0x95]
-+#CHECK: vpklsf  %v15, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x20,0x95]
-+#CHECK: vpklsf  %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x28,0x95]
-+#CHECK: vpklsf  %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x2a,0x95]
-+#CHECK: vpklsfs %v5, %v22, %v7          # encoding: [0xe7,0x56,0x70,0x10,0x24,0x95]
-+
-+	vpklsf	%v0, %v0, %v0
-+	vpklsf	%v0, %v0, %v15
-+	vpklsf	%v0, %v0, %v31
-+	vpklsf	%v0, %v15, %v0
-+	vpklsf	%v0, %v31, %v0
-+	vpklsf	%v15, %v0, %v0
-+	vpklsf	%v31, %v0, %v0
-+	vpklsf	%v18, %v3, %v20
-+	vpklsfs	%v5, %v22, %v7
-+
-+#CHECK: vpklsg  %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x30,0x95]
-+#CHECK: vpklsg  %v0, %v0, %v15          # encoding: [0xe7,0x00,0xf0,0x00,0x30,0x95]
-+#CHECK: vpklsg  %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x32,0x95]
-+#CHECK: vpklsg  %v0, %v15, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x30,0x95]
-+#CHECK: vpklsg  %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x34,0x95]
-+#CHECK: vpklsg  %v15, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x30,0x95]
-+#CHECK: vpklsg  %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x38,0x95]
-+#CHECK: vpklsg  %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x3a,0x95]
-+#CHECK: vpklsgs %v5, %v22, %v7          # encoding: [0xe7,0x56,0x70,0x10,0x34,0x95]
-+
-+	vpklsg	%v0, %v0, %v0
-+	vpklsg	%v0, %v0, %v15
-+	vpklsg	%v0, %v0, %v31
-+	vpklsg	%v0, %v15, %v0
-+	vpklsg	%v0, %v31, %v0
-+	vpklsg	%v15, %v0, %v0
-+	vpklsg	%v31, %v0, %v0
-+	vpklsg	%v18, %v3, %v20
-+	vpklsgs	%v5, %v22, %v7
-+
-+#CHECK: vpklsh  %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x10,0x95]
-+#CHECK: vpklsh  %v0, %v0, %v15          # encoding: [0xe7,0x00,0xf0,0x00,0x10,0x95]
-+#CHECK: vpklsh  %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x12,0x95]
-+#CHECK: vpklsh  %v0, %v15, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x10,0x95]
-+#CHECK: vpklsh  %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x14,0x95]
-+#CHECK: vpklsh  %v15, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x10,0x95]
-+#CHECK: vpklsh  %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x18,0x95]
-+#CHECK: vpklsh  %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x1a,0x95]
-+#CHECK: vpklshs %v5, %v22, %v7          # encoding: [0xe7,0x56,0x70,0x10,0x14,0x95]
-+
-+	vpklsh	%v0, %v0, %v0
-+	vpklsh	%v0, %v0, %v15
-+	vpklsh	%v0, %v0, %v31
-+	vpklsh	%v0, %v15, %v0
-+	vpklsh	%v0, %v31, %v0
-+	vpklsh	%v15, %v0, %v0
-+	vpklsh	%v31, %v0, %v0
-+	vpklsh	%v18, %v3, %v20
-+	vpklshs	%v5, %v22, %v7
-+
-+#CHECK: vpksf   %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x20,0x97]
-+#CHECK: vpksf   %v0, %v0, %v15          # encoding: [0xe7,0x00,0xf0,0x00,0x20,0x97]
-+#CHECK: vpksf   %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x22,0x97]
-+#CHECK: vpksf   %v0, %v15, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x20,0x97]
-+#CHECK: vpksf   %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x24,0x97]
-+#CHECK: vpksf   %v15, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x20,0x97]
-+#CHECK: vpksf   %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x28,0x97]
-+#CHECK: vpksf   %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x2a,0x97]
-+#CHECK: vpksfs  %v5, %v22, %v7          # encoding: [0xe7,0x56,0x70,0x10,0x24,0x97]
-+
-+	vpksf	%v0, %v0, %v0
-+	vpksf	%v0, %v0, %v15
-+	vpksf	%v0, %v0, %v31
-+	vpksf	%v0, %v15, %v0
-+	vpksf	%v0, %v31, %v0
-+	vpksf	%v15, %v0, %v0
-+	vpksf	%v31, %v0, %v0
-+	vpksf	%v18, %v3, %v20
-+	vpksfs	%v5, %v22, %v7
-+
-+#CHECK: vpksg   %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x30,0x97]
-+#CHECK: vpksg   %v0, %v0, %v15          # encoding: [0xe7,0x00,0xf0,0x00,0x30,0x97]
-+#CHECK: vpksg   %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x32,0x97]
-+#CHECK: vpksg   %v0, %v15, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x30,0x97]
-+#CHECK: vpksg   %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x34,0x97]
-+#CHECK: vpksg   %v15, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x30,0x97]
-+#CHECK: vpksg   %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x38,0x97]
-+#CHECK: vpksg   %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x3a,0x97]
-+#CHECK: vpksgs  %v5, %v22, %v7          # encoding: [0xe7,0x56,0x70,0x10,0x34,0x97]
-+
-+	vpksg	%v0, %v0, %v0
-+	vpksg	%v0, %v0, %v15
-+	vpksg	%v0, %v0, %v31
-+	vpksg	%v0, %v15, %v0
-+	vpksg	%v0, %v31, %v0
-+	vpksg	%v15, %v0, %v0
-+	vpksg	%v31, %v0, %v0
-+	vpksg	%v18, %v3, %v20
-+	vpksgs	%v5, %v22, %v7
-+
-+#CHECK: vpksh   %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x10,0x97]
-+#CHECK: vpksh   %v0, %v0, %v15          # encoding: [0xe7,0x00,0xf0,0x00,0x10,0x97]
-+#CHECK: vpksh   %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x12,0x97]
-+#CHECK: vpksh   %v0, %v15, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x10,0x97]
-+#CHECK: vpksh   %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x14,0x97]
-+#CHECK: vpksh   %v15, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x10,0x97]
-+#CHECK: vpksh   %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x18,0x97]
-+#CHECK: vpksh   %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x1a,0x97]
-+#CHECK: vpkshs  %v5, %v22, %v7          # encoding: [0xe7,0x56,0x70,0x10,0x14,0x97]
-+
-+	vpksh	%v0, %v0, %v0
-+	vpksh	%v0, %v0, %v15
-+	vpksh	%v0, %v0, %v31
-+	vpksh	%v0, %v15, %v0
-+	vpksh	%v0, %v31, %v0
-+	vpksh	%v15, %v0, %v0
-+	vpksh	%v31, %v0, %v0
-+	vpksh	%v18, %v3, %v20
-+	vpkshs	%v5, %v22, %v7
-+
-+#CHECK: vpopct  %v0, %v0, 0             # encoding: [0xe7,0x00,0x00,0x00,0x00,0x50]
-+#CHECK: vpopct  %v0, %v15, 0            # encoding: [0xe7,0x0f,0x00,0x00,0x00,0x50]
-+#CHECK: vpopct  %v0, %v31, 0            # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x50]
-+#CHECK: vpopct  %v15, %v0, 0            # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x50]
-+#CHECK: vpopct  %v31, %v0, 0            # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x50]
-+#CHECK: vpopct  %v14, %v17, 0           # encoding: [0xe7,0xe1,0x00,0x00,0x04,0x50]
-+
-+	vpopct	%v0, %v0, 0
-+	vpopct	%v0, %v15, 0
-+	vpopct	%v0, %v31, 0
-+	vpopct	%v15, %v0, 0
-+	vpopct	%v31, %v0, 0
-+	vpopct	%v14, %v17, 0
-+
-+#CHECK: vrepb   %v0, %v0, 0             # encoding: [0xe7,0x00,0x00,0x00,0x00,0x4d]
-+#CHECK: vrepb   %v0, %v0, 65535         # encoding: [0xe7,0x00,0xff,0xff,0x00,0x4d]
-+#CHECK: vrepb   %v0, %v15, 0            # encoding: [0xe7,0x0f,0x00,0x00,0x00,0x4d]
-+#CHECK: vrepb   %v0, %v31, 0            # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x4d]
-+#CHECK: vrepb   %v15, %v0, 0            # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x4d]
-+#CHECK: vrepb   %v31, %v0, 0            # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x4d]
-+#CHECK: vrepb   %v4, %v21, 26505        # encoding: [0xe7,0x45,0x67,0x89,0x04,0x4d]
-+
-+	vrepb	%v0, %v0, 0
-+	vrepb	%v0, %v0, 65535
-+	vrepb	%v0, %v15, 0
-+	vrepb	%v0, %v31, 0
-+	vrepb	%v15, %v0, 0
-+	vrepb	%v31, %v0, 0
-+	vrepb	%v4, %v21, 0x6789
-+
-+#CHECK: vrepf   %v0, %v0, 0             # encoding: [0xe7,0x00,0x00,0x00,0x20,0x4d]
-+#CHECK: vrepf   %v0, %v0, 65535         # encoding: [0xe7,0x00,0xff,0xff,0x20,0x4d]
-+#CHECK: vrepf   %v0, %v15, 0            # encoding: [0xe7,0x0f,0x00,0x00,0x20,0x4d]
-+#CHECK: vrepf   %v0, %v31, 0            # encoding: [0xe7,0x0f,0x00,0x00,0x24,0x4d]
-+#CHECK: vrepf   %v15, %v0, 0            # encoding: [0xe7,0xf0,0x00,0x00,0x20,0x4d]
-+#CHECK: vrepf   %v31, %v0, 0            # encoding: [0xe7,0xf0,0x00,0x00,0x28,0x4d]
-+#CHECK: vrepf   %v4, %v21, 26505        # encoding: [0xe7,0x45,0x67,0x89,0x24,0x4d]
-+
-+	vrepf	%v0, %v0, 0
-+	vrepf	%v0, %v0, 65535
-+	vrepf	%v0, %v15, 0
-+	vrepf	%v0, %v31, 0
-+	vrepf	%v15, %v0, 0
-+	vrepf	%v31, %v0, 0
-+	vrepf	%v4, %v21, 0x6789
-+
-+#CHECK: vrepg   %v0, %v0, 0             # encoding: [0xe7,0x00,0x00,0x00,0x30,0x4d]
-+#CHECK: vrepg   %v0, %v0, 65535         # encoding: [0xe7,0x00,0xff,0xff,0x30,0x4d]
-+#CHECK: vrepg   %v0, %v15, 0            # encoding: [0xe7,0x0f,0x00,0x00,0x30,0x4d]
-+#CHECK: vrepg   %v0, %v31, 0            # encoding: [0xe7,0x0f,0x00,0x00,0x34,0x4d]
-+#CHECK: vrepg   %v15, %v0, 0            # encoding: [0xe7,0xf0,0x00,0x00,0x30,0x4d]
-+#CHECK: vrepg   %v31, %v0, 0            # encoding: [0xe7,0xf0,0x00,0x00,0x38,0x4d]
-+#CHECK: vrepg   %v4, %v21, 26505        # encoding: [0xe7,0x45,0x67,0x89,0x34,0x4d]
-+
-+	vrepg	%v0, %v0, 0
-+	vrepg	%v0, %v0, 65535
-+	vrepg	%v0, %v15, 0
-+	vrepg	%v0, %v31, 0
-+	vrepg	%v15, %v0, 0
-+	vrepg	%v31, %v0, 0
-+	vrepg	%v4, %v21, 0x6789
-+
-+#CHECK: vreph   %v0, %v0, 0             # encoding: [0xe7,0x00,0x00,0x00,0x10,0x4d]
-+#CHECK: vreph   %v0, %v0, 65535         # encoding: [0xe7,0x00,0xff,0xff,0x10,0x4d]
-+#CHECK: vreph   %v0, %v15, 0            # encoding: [0xe7,0x0f,0x00,0x00,0x10,0x4d]
-+#CHECK: vreph   %v0, %v31, 0            # encoding: [0xe7,0x0f,0x00,0x00,0x14,0x4d]
-+#CHECK: vreph   %v15, %v0, 0            # encoding: [0xe7,0xf0,0x00,0x00,0x10,0x4d]
-+#CHECK: vreph   %v31, %v0, 0            # encoding: [0xe7,0xf0,0x00,0x00,0x18,0x4d]
-+#CHECK: vreph   %v4, %v21, 26505        # encoding: [0xe7,0x45,0x67,0x89,0x14,0x4d]
-+
-+	vreph	%v0, %v0, 0
-+	vreph	%v0, %v0, 65535
-+	vreph	%v0, %v15, 0
-+	vreph	%v0, %v31, 0
-+	vreph	%v15, %v0, 0
-+	vreph	%v31, %v0, 0
-+	vreph	%v4, %v21, 0x6789
-+
-+#CHECK: vrepib  %v0, 0                  # encoding: [0xe7,0x00,0x00,0x00,0x00,0x45]
-+#CHECK: vrepib  %v0, -32768             # encoding: [0xe7,0x00,0x80,0x00,0x00,0x45]
-+#CHECK: vrepib  %v0, 32767              # encoding: [0xe7,0x00,0x7f,0xff,0x00,0x45]
-+#CHECK: vrepib  %v15, 0                 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x45]
-+#CHECK: vrepib  %v31, 0                 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x45]
-+#CHECK: vrepib  %v18, 13398             # encoding: [0xe7,0x20,0x34,0x56,0x08,0x45]
-+
-+	vrepib	%v0, 0
-+	vrepib	%v0, -32768
-+	vrepib	%v0, 32767
-+	vrepib	%v15, 0
-+	vrepib	%v31, 0
-+	vrepib	%v18, 0x3456
-+
-+#CHECK: vrepif  %v0, 0                  # encoding: [0xe7,0x00,0x00,0x00,0x20,0x45]
-+#CHECK: vrepif  %v0, -32768             # encoding: [0xe7,0x00,0x80,0x00,0x20,0x45]
-+#CHECK: vrepif  %v0, 32767              # encoding: [0xe7,0x00,0x7f,0xff,0x20,0x45]
-+#CHECK: vrepif  %v15, 0                 # encoding: [0xe7,0xf0,0x00,0x00,0x20,0x45]
-+#CHECK: vrepif  %v31, 0                 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0x45]
-+#CHECK: vrepif  %v18, 13398             # encoding: [0xe7,0x20,0x34,0x56,0x28,0x45]
-+
-+	vrepif	%v0, 0
-+	vrepif	%v0, -32768
-+	vrepif	%v0, 32767
-+	vrepif	%v15, 0
-+	vrepif	%v31, 0
-+	vrepif	%v18, 0x3456
-+
-+#CHECK: vrepig  %v0, 0                  # encoding: [0xe7,0x00,0x00,0x00,0x30,0x45]
-+#CHECK: vrepig  %v0, -32768             # encoding: [0xe7,0x00,0x80,0x00,0x30,0x45]
-+#CHECK: vrepig  %v0, 32767              # encoding: [0xe7,0x00,0x7f,0xff,0x30,0x45]
-+#CHECK: vrepig  %v15, 0                 # encoding: [0xe7,0xf0,0x00,0x00,0x30,0x45]
-+#CHECK: vrepig  %v31, 0                 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0x45]
-+#CHECK: vrepig  %v18, 13398             # encoding: [0xe7,0x20,0x34,0x56,0x38,0x45]
-+
-+	vrepig	%v0, 0
-+	vrepig	%v0, -32768
-+	vrepig	%v0, 32767
-+	vrepig	%v15, 0
-+	vrepig	%v31, 0
-+	vrepig	%v18, 0x3456
-+
-+#CHECK: vrepih  %v0, 0                  # encoding: [0xe7,0x00,0x00,0x00,0x10,0x45]
-+#CHECK: vrepih  %v0, -32768             # encoding: [0xe7,0x00,0x80,0x00,0x10,0x45]
-+#CHECK: vrepih  %v0, 32767              # encoding: [0xe7,0x00,0x7f,0xff,0x10,0x45]
-+#CHECK: vrepih  %v15, 0                 # encoding: [0xe7,0xf0,0x00,0x00,0x10,0x45]
-+#CHECK: vrepih  %v31, 0                 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0x45]
-+#CHECK: vrepih  %v18, 13398             # encoding: [0xe7,0x20,0x34,0x56,0x18,0x45]
-+
-+	vrepih	%v0, 0
-+	vrepih	%v0, -32768
-+	vrepih	%v0, 32767
-+	vrepih	%v15, 0
-+	vrepih	%v31, 0
-+	vrepih	%v18, 0x3456
-+
-+#CHECK: vsb     %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x00,0xf7]
-+#CHECK: vsb     %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x02,0xf7]
-+#CHECK: vsb     %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xf7]
-+#CHECK: vsb     %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xf7]
-+#CHECK: vsb     %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x0a,0xf7]
-+
-+	vsb	%v0, %v0, %v0
-+	vsb	%v0, %v0, %v31
-+	vsb	%v0, %v31, %v0
-+	vsb	%v31, %v0, %v0
-+	vsb	%v18, %v3, %v20
-+
-+#CHECK: vsbcbiq %v0, %v0, %v0, %v0      # encoding: [0xe7,0x00,0x04,0x00,0x00,0xbd]
-+#CHECK: vsbcbiq %v0, %v0, %v0, %v31     # encoding: [0xe7,0x00,0x04,0x00,0xf1,0xbd]
-+#CHECK: vsbcbiq %v0, %v0, %v31, %v0     # encoding: [0xe7,0x00,0xf4,0x00,0x02,0xbd]
-+#CHECK: vsbcbiq %v0, %v31, %v0, %v0     # encoding: [0xe7,0x0f,0x04,0x00,0x04,0xbd]
-+#CHECK: vsbcbiq %v31, %v0, %v0, %v0     # encoding: [0xe7,0xf0,0x04,0x00,0x08,0xbd]
-+#CHECK: vsbcbiq %v13, %v17, %v21, %v25  # encoding: [0xe7,0xd1,0x54,0x00,0x97,0xbd]
-+
-+	vsbcbiq	%v0, %v0, %v0, %v0
-+	vsbcbiq	%v0, %v0, %v0, %v31
-+	vsbcbiq	%v0, %v0, %v31, %v0
-+	vsbcbiq	%v0, %v31, %v0, %v0
-+	vsbcbiq	%v31, %v0, %v0, %v0
-+	vsbcbiq	%v13, %v17, %v21, %v25
-+
-+#CHECK: vsbiq   %v0, %v0, %v0, %v0      # encoding: [0xe7,0x00,0x04,0x00,0x00,0xbf]
-+#CHECK: vsbiq   %v0, %v0, %v0, %v31     # encoding: [0xe7,0x00,0x04,0x00,0xf1,0xbf]
-+#CHECK: vsbiq   %v0, %v0, %v31, %v0     # encoding: [0xe7,0x00,0xf4,0x00,0x02,0xbf]
-+#CHECK: vsbiq   %v0, %v31, %v0, %v0     # encoding: [0xe7,0x0f,0x04,0x00,0x04,0xbf]
-+#CHECK: vsbiq   %v31, %v0, %v0, %v0     # encoding: [0xe7,0xf0,0x04,0x00,0x08,0xbf]
-+#CHECK: vsbiq   %v13, %v17, %v21, %v25  # encoding: [0xe7,0xd1,0x54,0x00,0x97,0xbf]
-+
-+	vsbiq	%v0, %v0, %v0, %v0
-+	vsbiq	%v0, %v0, %v0, %v31
-+	vsbiq	%v0, %v0, %v31, %v0
-+	vsbiq	%v0, %v31, %v0, %v0
-+	vsbiq	%v31, %v0, %v0, %v0
-+	vsbiq	%v13, %v17, %v21, %v25
-+
-+#CHECK: vscbib  %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x00,0xf5]
-+#CHECK: vscbib  %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x02,0xf5]
-+#CHECK: vscbib  %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xf5]
-+#CHECK: vscbib  %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xf5]
-+#CHECK: vscbib  %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x0a,0xf5]
-+
-+	vscbib	%v0, %v0, %v0
-+	vscbib	%v0, %v0, %v31
-+	vscbib	%v0, %v31, %v0
-+	vscbib	%v31, %v0, %v0
-+	vscbib	%v18, %v3, %v20
-+
-+#CHECK: vscbif  %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x20,0xf5]
-+#CHECK: vscbif  %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x22,0xf5]
-+#CHECK: vscbif  %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xf5]
-+#CHECK: vscbif  %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xf5]
-+#CHECK: vscbif  %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x2a,0xf5]
-+
-+	vscbif	%v0, %v0, %v0
-+	vscbif	%v0, %v0, %v31
-+	vscbif	%v0, %v31, %v0
-+	vscbif	%v31, %v0, %v0
-+	vscbif	%v18, %v3, %v20
-+
-+#CHECK: vscbig  %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x30,0xf5]
-+#CHECK: vscbig  %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x32,0xf5]
-+#CHECK: vscbig  %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xf5]
-+#CHECK: vscbig  %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xf5]
-+#CHECK: vscbig  %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x3a,0xf5]
-+
-+	vscbig	%v0, %v0, %v0
-+	vscbig	%v0, %v0, %v31
-+	vscbig	%v0, %v31, %v0
-+	vscbig	%v31, %v0, %v0
-+	vscbig	%v18, %v3, %v20
-+
-+#CHECK: vscbih  %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x10,0xf5]
-+#CHECK: vscbih  %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x12,0xf5]
-+#CHECK: vscbih  %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x14,0xf5]
-+#CHECK: vscbih  %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x18,0xf5]
-+#CHECK: vscbih  %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x1a,0xf5]
-+
-+	vscbih	%v0, %v0, %v0
-+	vscbih	%v0, %v0, %v31
-+	vscbih	%v0, %v31, %v0
-+	vscbih	%v31, %v0, %v0
-+	vscbih	%v18, %v3, %v20
-+
-+#CHECK: vscbiq  %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x40,0xf5]
-+#CHECK: vscbiq  %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x42,0xf5]
-+#CHECK: vscbiq  %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x44,0xf5]
-+#CHECK: vscbiq  %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x48,0xf5]
-+#CHECK: vscbiq  %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x4a,0xf5]
-+
-+	vscbiq	%v0, %v0, %v0
-+	vscbiq	%v0, %v0, %v31
-+	vscbiq	%v0, %v31, %v0
-+	vscbiq	%v31, %v0, %v0
-+	vscbiq	%v18, %v3, %v20
-+
-+#CHECK: vscef   %v0, 0(%v0), 0          # encoding: [0xe7,0x00,0x00,0x00,0x00,0x1b]
-+#CHECK: vscef   %v0, 0(%v0,%r1), 0      # encoding: [0xe7,0x00,0x10,0x00,0x00,0x1b]
-+#CHECK: vscef   %v0, 0(%v0,%r1), 3      # encoding: [0xe7,0x00,0x10,0x00,0x30,0x1b]
-+#CHECK: vscef   %v0, 0(%v0,%r15), 0     # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x1b]
-+#CHECK: vscef   %v0, 0(%v15,%r1), 0     # encoding: [0xe7,0x0f,0x10,0x00,0x00,0x1b]
-+#CHECK: vscef   %v0, 0(%v31,%r1), 0     # encoding: [0xe7,0x0f,0x10,0x00,0x04,0x1b]
-+#CHECK: vscef   %v0, 4095(%v0,%r1), 0   # encoding: [0xe7,0x00,0x1f,0xff,0x00,0x1b]
-+#CHECK: vscef   %v15, 0(%v0,%r1), 0     # encoding: [0xe7,0xf0,0x10,0x00,0x00,0x1b]
-+#CHECK: vscef   %v31, 0(%v0,%r1), 0     # encoding: [0xe7,0xf0,0x10,0x00,0x08,0x1b]
-+#CHECK: vscef   %v10, 1000(%v19,%r7), 1 # encoding: [0xe7,0xa3,0x73,0xe8,0x14,0x1b]
-+
-+	vscef	%v0, 0(%v0), 0
-+	vscef	%v0, 0(%v0,%r1), 0
-+	vscef	%v0, 0(%v0,%r1), 3
-+	vscef	%v0, 0(%v0,%r15), 0
-+	vscef	%v0, 0(%v15,%r1), 0
-+	vscef	%v0, 0(%v31,%r1), 0
-+	vscef	%v0, 4095(%v0, %r1), 0
-+	vscef	%v15, 0(%v0,%r1), 0
-+	vscef	%v31, 0(%v0,%r1), 0
-+	vscef	%v10, 1000(%v19,%r7), 1
-+
-+#CHECK: vsceg   %v0, 0(%v0), 0          # encoding: [0xe7,0x00,0x00,0x00,0x00,0x1a]
-+#CHECK: vsceg   %v0, 0(%v0,%r1), 0      # encoding: [0xe7,0x00,0x10,0x00,0x00,0x1a]
-+#CHECK: vsceg   %v0, 0(%v0,%r1), 1      # encoding: [0xe7,0x00,0x10,0x00,0x10,0x1a]
-+#CHECK: vsceg   %v0, 0(%v0,%r15), 0     # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x1a]
-+#CHECK: vsceg   %v0, 0(%v15,%r1), 0     # encoding: [0xe7,0x0f,0x10,0x00,0x00,0x1a]
-+#CHECK: vsceg   %v0, 0(%v31,%r1), 0     # encoding: [0xe7,0x0f,0x10,0x00,0x04,0x1a]
-+#CHECK: vsceg   %v0, 4095(%v0,%r1), 0   # encoding: [0xe7,0x00,0x1f,0xff,0x00,0x1a]
-+#CHECK: vsceg   %v15, 0(%v0,%r1), 0     # encoding: [0xe7,0xf0,0x10,0x00,0x00,0x1a]
-+#CHECK: vsceg   %v31, 0(%v0,%r1), 0     # encoding: [0xe7,0xf0,0x10,0x00,0x08,0x1a]
-+#CHECK: vsceg   %v10, 1000(%v19,%r7), 1 # encoding: [0xe7,0xa3,0x73,0xe8,0x14,0x1a]
-+
-+	vsceg	%v0, 0(%v0), 0
-+	vsceg	%v0, 0(%v0,%r1), 0
-+	vsceg	%v0, 0(%v0,%r1), 1
-+	vsceg	%v0, 0(%v0,%r15), 0
-+	vsceg	%v0, 0(%v15,%r1), 0
-+	vsceg	%v0, 0(%v31,%r1), 0
-+	vsceg	%v0, 4095(%v0,%r1), 0
-+	vsceg	%v15, 0(%v0,%r1), 0
-+	vsceg	%v31, 0(%v0,%r1), 0
-+	vsceg	%v10, 1000(%v19,%r7), 1
-+
-+#CHECK: vsel    %v0, %v0, %v0, %v0      # encoding: [0xe7,0x00,0x00,0x00,0x00,0x8d]
-+#CHECK: vsel    %v0, %v0, %v0, %v31     # encoding: [0xe7,0x00,0x00,0x00,0xf1,0x8d]
-+#CHECK: vsel    %v0, %v0, %v31, %v0     # encoding: [0xe7,0x00,0xf0,0x00,0x02,0x8d]
-+#CHECK: vsel    %v0, %v31, %v0, %v0     # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x8d]
-+#CHECK: vsel    %v31, %v0, %v0, %v0     # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x8d]
-+#CHECK: vsel    %v13, %v17, %v21, %v25  # encoding: [0xe7,0xd1,0x50,0x00,0x97,0x8d]
-+
-+	vsel	%v0, %v0, %v0, %v0
-+	vsel	%v0, %v0, %v0, %v31
-+	vsel	%v0, %v0, %v31, %v0
-+	vsel	%v0, %v31, %v0, %v0
-+	vsel	%v31, %v0, %v0, %v0
-+	vsel 	%v13, %v17, %v21, %v25
-+
-+#CHECK: vsegb   %v0, %v0                # encoding: [0xe7,0x00,0x00,0x00,0x00,0x5f]
-+#CHECK: vsegb   %v0, %v15               # encoding: [0xe7,0x0f,0x00,0x00,0x00,0x5f]
-+#CHECK: vsegb   %v0, %v31               # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x5f]
-+#CHECK: vsegb   %v15, %v0               # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x5f]
-+#CHECK: vsegb   %v31, %v0               # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x5f]
-+#CHECK: vsegb   %v14, %v17              # encoding: [0xe7,0xe1,0x00,0x00,0x04,0x5f]
-+
-+	vsegb	%v0, %v0
-+	vsegb	%v0, %v15
-+	vsegb	%v0, %v31
-+	vsegb	%v15, %v0
-+	vsegb	%v31, %v0
-+	vsegb	%v14, %v17
-+
-+#CHECK: vsegf   %v0, %v0                # encoding: [0xe7,0x00,0x00,0x00,0x20,0x5f]
-+#CHECK: vsegf   %v0, %v15               # encoding: [0xe7,0x0f,0x00,0x00,0x20,0x5f]
-+#CHECK: vsegf   %v0, %v31               # encoding: [0xe7,0x0f,0x00,0x00,0x24,0x5f]
-+#CHECK: vsegf   %v15, %v0               # encoding: [0xe7,0xf0,0x00,0x00,0x20,0x5f]
-+#CHECK: vsegf   %v31, %v0               # encoding: [0xe7,0xf0,0x00,0x00,0x28,0x5f]
-+#CHECK: vsegf   %v14, %v17              # encoding: [0xe7,0xe1,0x00,0x00,0x24,0x5f]
-+
-+	vsegf	%v0, %v0
-+	vsegf	%v0, %v15
-+	vsegf	%v0, %v31
-+	vsegf	%v15, %v0
-+	vsegf	%v31, %v0
-+	vsegf	%v14, %v17
-+
-+#CHECK: vsegh   %v0, %v0                # encoding: [0xe7,0x00,0x00,0x00,0x10,0x5f]
-+#CHECK: vsegh   %v0, %v15               # encoding: [0xe7,0x0f,0x00,0x00,0x10,0x5f]
-+#CHECK: vsegh   %v0, %v31               # encoding: [0xe7,0x0f,0x00,0x00,0x14,0x5f]
-+#CHECK: vsegh   %v15, %v0               # encoding: [0xe7,0xf0,0x00,0x00,0x10,0x5f]
-+#CHECK: vsegh   %v31, %v0               # encoding: [0xe7,0xf0,0x00,0x00,0x18,0x5f]
-+#CHECK: vsegh   %v14, %v17              # encoding: [0xe7,0xe1,0x00,0x00,0x14,0x5f]
-+
-+	vsegh	%v0, %v0
-+	vsegh	%v0, %v15
-+	vsegh	%v0, %v31
-+	vsegh	%v15, %v0
-+	vsegh	%v31, %v0
-+	vsegh	%v14, %v17
-+
-+#CHECK: vsf     %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x20,0xf7]
-+#CHECK: vsf     %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x22,0xf7]
-+#CHECK: vsf     %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xf7]
-+#CHECK: vsf     %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xf7]
-+#CHECK: vsf     %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x2a,0xf7]
-+
-+	vsf	%v0, %v0, %v0
-+	vsf	%v0, %v0, %v31
-+	vsf	%v0, %v31, %v0
-+	vsf	%v31, %v0, %v0
-+	vsf	%v18, %v3, %v20
-+
-+#CHECK: vsg     %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x30,0xf7]
-+#CHECK: vsg     %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x32,0xf7]
-+#CHECK: vsg     %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xf7]
-+#CHECK: vsg     %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xf7]
-+#CHECK: vsg     %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x3a,0xf7]
-+
-+	vsg	%v0, %v0, %v0
-+	vsg	%v0, %v0, %v31
-+	vsg	%v0, %v31, %v0
-+	vsg	%v31, %v0, %v0
-+	vsg	%v18, %v3, %v20
-+
-+#CHECK: vsh     %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x10,0xf7]
-+#CHECK: vsh     %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x12,0xf7]
-+#CHECK: vsh     %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x14,0xf7]
-+#CHECK: vsh     %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x18,0xf7]
-+#CHECK: vsh     %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x1a,0xf7]
-+
-+	vsh	%v0, %v0, %v0
-+	vsh	%v0, %v0, %v31
-+	vsh	%v0, %v31, %v0
-+	vsh	%v31, %v0, %v0
-+	vsh	%v18, %v3, %v20
-+
-+#CHECK: vsl     %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x00,0x74]
-+#CHECK: vsl     %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x02,0x74]
-+#CHECK: vsl     %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x74]
-+#CHECK: vsl     %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x74]
-+#CHECK: vsl     %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x0a,0x74]
-+
-+	vsl	%v0, %v0, %v0
-+	vsl	%v0, %v0, %v31
-+	vsl	%v0, %v31, %v0
-+	vsl	%v31, %v0, %v0
-+	vsl	%v18, %v3, %v20
-+
-+#CHECK: vslb    %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x00,0x75]
-+#CHECK: vslb    %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x02,0x75]
-+#CHECK: vslb    %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x75]
-+#CHECK: vslb    %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x75]
-+#CHECK: vslb    %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x0a,0x75]
-+
-+	vslb	%v0, %v0, %v0
-+	vslb	%v0, %v0, %v31
-+	vslb	%v0, %v31, %v0
-+	vslb	%v31, %v0, %v0
-+	vslb	%v18, %v3, %v20
-+
-+#CHECK: vsldb   %v0, %v0, %v0, 0        # encoding: [0xe7,0x00,0x00,0x00,0x00,0x77]
-+#CHECK: vsldb   %v0, %v0, %v0, 255      # encoding: [0xe7,0x00,0x00,0xff,0x00,0x77]
-+#CHECK: vsldb   %v0, %v0, %v31, 0       # encoding: [0xe7,0x00,0xf0,0x00,0x02,0x77]
-+#CHECK: vsldb   %v0, %v31, %v0, 0       # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x77]
-+#CHECK: vsldb   %v31, %v0, %v0, 0       # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x77]
-+#CHECK: vsldb   %v13, %v17, %v21, 121   # encoding: [0xe7,0xd1,0x50,0x79,0x06,0x77]
-+
-+	vsldb	%v0, %v0, %v0, 0
-+	vsldb	%v0, %v0, %v0, 255
-+	vsldb	%v0, %v0, %v31, 0
-+	vsldb	%v0, %v31, %v0, 0
-+	vsldb	%v31, %v0, %v0, 0
-+	vsldb 	%v13, %v17, %v21, 0x79
-+
-+#CHECK: vsq     %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x40,0xf7]
-+#CHECK: vsq     %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x42,0xf7]
-+#CHECK: vsq     %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x44,0xf7]
-+#CHECK: vsq     %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x48,0xf7]
-+#CHECK: vsq     %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x4a,0xf7]
-+
-+	vsq	%v0, %v0, %v0
-+	vsq	%v0, %v0, %v31
-+	vsq	%v0, %v31, %v0
-+	vsq	%v31, %v0, %v0
-+	vsq	%v18, %v3, %v20
-+
-+#CHECK: vsra    %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x00,0x7e]
-+#CHECK: vsra    %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x02,0x7e]
-+#CHECK: vsra    %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x7e]
-+#CHECK: vsra    %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x7e]
-+#CHECK: vsra    %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x0a,0x7e]
-+
-+	vsra	%v0, %v0, %v0
-+	vsra	%v0, %v0, %v31
-+	vsra	%v0, %v31, %v0
-+	vsra	%v31, %v0, %v0
-+	vsra	%v18, %v3, %v20
-+
-+#CHECK: vsrab   %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x00,0x7f]
-+#CHECK: vsrab   %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x02,0x7f]
-+#CHECK: vsrab   %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x7f]
-+#CHECK: vsrab   %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x7f]
-+#CHECK: vsrab   %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x0a,0x7f]
-+
-+	vsrab	%v0, %v0, %v0
-+	vsrab	%v0, %v0, %v31
-+	vsrab	%v0, %v31, %v0
-+	vsrab	%v31, %v0, %v0
-+	vsrab	%v18, %v3, %v20
-+
-+#CHECK: vsrl    %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x00,0x7c]
-+#CHECK: vsrl    %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x02,0x7c]
-+#CHECK: vsrl    %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x7c]
-+#CHECK: vsrl    %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x7c]
-+#CHECK: vsrl    %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x0a,0x7c]
-+
-+	vsrl	%v0, %v0, %v0
-+	vsrl	%v0, %v0, %v31
-+	vsrl	%v0, %v31, %v0
-+	vsrl	%v31, %v0, %v0
-+	vsrl	%v18, %v3, %v20
-+
-+#CHECK: vsrlb   %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x00,0x7d]
-+#CHECK: vsrlb   %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x02,0x7d]
-+#CHECK: vsrlb   %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x7d]
-+#CHECK: vsrlb   %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x7d]
-+#CHECK: vsrlb   %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x0a,0x7d]
-+
-+	vsrlb	%v0, %v0, %v0
-+	vsrlb	%v0, %v0, %v31
-+	vsrlb	%v0, %v31, %v0
-+	vsrlb	%v31, %v0, %v0
-+	vsrlb	%v18, %v3, %v20
-+
-+#CHECK: vst     %v0, 0                  # encoding: [0xe7,0x00,0x00,0x00,0x00,0x0e]
-+#CHECK: vst     %v0, 4095               # encoding: [0xe7,0x00,0x0f,0xff,0x00,0x0e]
-+#CHECK: vst     %v0, 0(%r15)            # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x0e]
-+#CHECK: vst     %v0, 0(%r15,%r1)        # encoding: [0xe7,0x0f,0x10,0x00,0x00,0x0e]
-+#CHECK: vst     %v15, 0                 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x0e]
-+#CHECK: vst     %v31, 0                 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x0e]
-+#CHECK: vst     %v18, 1383(%r3,%r4)     # encoding: [0xe7,0x23,0x45,0x67,0x08,0x0e]
-+
-+	vst	%v0, 0
-+	vst	%v0, 4095
-+	vst	%v0, 0(%r15)
-+	vst	%v0, 0(%r15,%r1)
-+	vst	%v15, 0
-+	vst	%v31, 0
-+	vst	%v18, 0x567(%r3,%r4)
-+
-+#CHECK: vsteb   %v0, 0, 0               # encoding: [0xe7,0x00,0x00,0x00,0x00,0x08]
-+#CHECK: vsteb   %v0, 0, 15              # encoding: [0xe7,0x00,0x00,0x00,0xf0,0x08]
-+#CHECK: vsteb   %v0, 4095, 0            # encoding: [0xe7,0x00,0x0f,0xff,0x00,0x08]
-+#CHECK: vsteb   %v0, 0(%r15), 0         # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x08]
-+#CHECK: vsteb   %v0, 0(%r15,%r1), 0     # encoding: [0xe7,0x0f,0x10,0x00,0x00,0x08]
-+#CHECK: vsteb   %v15, 0, 0              # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x08]
-+#CHECK: vsteb   %v31, 0, 0              # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x08]
-+#CHECK: vsteb   %v18, 1383(%r3,%r4), 8  # encoding: [0xe7,0x23,0x45,0x67,0x88,0x08]
-+
-+	vsteb	%v0, 0, 0
-+	vsteb	%v0, 0, 15
-+	vsteb	%v0, 4095, 0
-+	vsteb	%v0, 0(%r15), 0
-+	vsteb	%v0, 0(%r15,%r1), 0
-+	vsteb	%v15, 0, 0
-+	vsteb	%v31, 0, 0
-+	vsteb	%v18, 1383(%r3,%r4), 8
-+
-+#CHECK: vstef   %v0, 0, 0               # encoding: [0xe7,0x00,0x00,0x00,0x00,0x0b]
-+#CHECK: vstef   %v0, 0, 3               # encoding: [0xe7,0x00,0x00,0x00,0x30,0x0b]
-+#CHECK: vstef   %v0, 4095, 0            # encoding: [0xe7,0x00,0x0f,0xff,0x00,0x0b]
-+#CHECK: vstef   %v0, 0(%r15), 0         # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x0b]
-+#CHECK: vstef   %v0, 0(%r15,%r1), 0     # encoding: [0xe7,0x0f,0x10,0x00,0x00,0x0b]
-+#CHECK: vstef   %v15, 0, 0              # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x0b]
-+#CHECK: vstef   %v31, 0, 0              # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x0b]
-+#CHECK: vstef   %v18, 1383(%r3,%r4), 2  # encoding: [0xe7,0x23,0x45,0x67,0x28,0x0b]
-+
-+	vstef	%v0, 0, 0
-+	vstef	%v0, 0, 3
-+	vstef	%v0, 4095, 0
-+	vstef	%v0, 0(%r15), 0
-+	vstef	%v0, 0(%r15,%r1), 0
-+	vstef	%v15, 0, 0
-+	vstef	%v31, 0, 0
-+	vstef	%v18, 1383(%r3,%r4), 2
-+
-+#CHECK: vsteg   %v0, 0, 0               # encoding: [0xe7,0x00,0x00,0x00,0x00,0x0a]
-+#CHECK: vsteg   %v0, 0, 1               # encoding: [0xe7,0x00,0x00,0x00,0x10,0x0a]
-+#CHECK: vsteg   %v0, 4095, 0            # encoding: [0xe7,0x00,0x0f,0xff,0x00,0x0a]
-+#CHECK: vsteg   %v0, 0(%r15), 0         # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x0a]
-+#CHECK: vsteg   %v0, 0(%r15,%r1), 0     # encoding: [0xe7,0x0f,0x10,0x00,0x00,0x0a]
-+#CHECK: vsteg   %v15, 0, 0              # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x0a]
-+#CHECK: vsteg   %v31, 0, 0              # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x0a]
-+#CHECK: vsteg   %v18, 1383(%r3,%r4), 1  # encoding: [0xe7,0x23,0x45,0x67,0x18,0x0a]
-+
-+	vsteg	%v0, 0, 0
-+	vsteg	%v0, 0, 1
-+	vsteg	%v0, 4095, 0
-+	vsteg	%v0, 0(%r15), 0
-+	vsteg	%v0, 0(%r15,%r1), 0
-+	vsteg	%v15, 0, 0
-+	vsteg	%v31, 0, 0
-+	vsteg	%v18, 1383(%r3,%r4), 1
-+
-+#CHECK: vsteh   %v0, 0, 0               # encoding: [0xe7,0x00,0x00,0x00,0x00,0x09]
-+#CHECK: vsteh   %v0, 0, 7               # encoding: [0xe7,0x00,0x00,0x00,0x70,0x09]
-+#CHECK: vsteh   %v0, 4095, 0            # encoding: [0xe7,0x00,0x0f,0xff,0x00,0x09]
-+#CHECK: vsteh   %v0, 0(%r15), 0         # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x09]
-+#CHECK: vsteh   %v0, 0(%r15,%r1), 0     # encoding: [0xe7,0x0f,0x10,0x00,0x00,0x09]
-+#CHECK: vsteh   %v15, 0, 0              # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x09]
-+#CHECK: vsteh   %v31, 0, 0              # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x09]
-+#CHECK: vsteh   %v18, 1383(%r3,%r4), 4  # encoding: [0xe7,0x23,0x45,0x67,0x48,0x09]
-+
-+	vsteh	%v0, 0, 0
-+	vsteh	%v0, 0, 7
-+	vsteh	%v0, 4095, 0
-+	vsteh	%v0, 0(%r15), 0
-+	vsteh	%v0, 0(%r15,%r1), 0
-+	vsteh	%v15, 0, 0
-+	vsteh	%v31, 0, 0
-+	vsteh	%v18, 1383(%r3,%r4), 4
-+
-+#CHECK: vstl    %v0, %r0, 0             # encoding: [0xe7,0x00,0x00,0x00,0x00,0x3f]
-+#CHECK: vstl    %v0, %r0, 4095          # encoding: [0xe7,0x00,0x0f,0xff,0x00,0x3f]
-+#CHECK: vstl    %v0, %r0, 0(%r15)       # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x3f]
-+#CHECK: vstl    %v0, %r15, 0            # encoding: [0xe7,0x0f,0x00,0x00,0x00,0x3f]
-+#CHECK: vstl    %v15, %r0, 0            # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x3f]
-+#CHECK: vstl    %v31, %r0, 0            # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x3f]
-+#CHECK: vstl    %v18, %r3, 1383(%r4)    # encoding: [0xe7,0x23,0x45,0x67,0x08,0x3f]
-+
-+	vstl	%v0, %r0, 0
-+	vstl	%v0, %r0, 4095
-+	vstl	%v0, %r0, 0(%r15)
-+	vstl	%v0, %r15, 0
-+	vstl	%v15, %r0, 0
-+	vstl	%v31, %r0, 0
-+	vstl	%v18, %r3, 1383(%r4)
-+
-+#CHECK: vstm    %v0, %v0, 0             # encoding: [0xe7,0x00,0x00,0x00,0x00,0x3e]
-+#CHECK: vstm    %v0, %v0, 4095          # encoding: [0xe7,0x00,0x0f,0xff,0x00,0x3e]
-+#CHECK: vstm    %v0, %v0, 0(%r15)       # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x3e]
-+#CHECK: vstm    %v0, %v31, 0            # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x3e]
-+#CHECK: vstm    %v31, %v0, 0            # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x3e]
-+#CHECK: vstm    %v14, %v17, 1074(%r5)   # encoding: [0xe7,0xe1,0x54,0x32,0x04,0x3e]
-+
-+	vstm	%v0, %v0, 0
-+	vstm	%v0, %v0, 4095
-+	vstm	%v0, %v0, 0(%r15)
-+	vstm	%v0, %v31, 0
-+	vstm	%v31, %v0, 0
-+	vstm	%v14, %v17, 1074(%r5)
-+
-+#CHECK: vstrcb   %v0, %v0, %v0, %v0, 0   # encoding: [0xe7,0x00,0x00,0x00,0x00,0x8a]
-+#CHECK: vstrcb   %v0, %v0, %v0, %v0, 0   # encoding: [0xe7,0x00,0x00,0x00,0x00,0x8a]
-+#CHECK: vstrcb   %v0, %v0, %v0, %v0, 12  # encoding: [0xe7,0x00,0x00,0xc0,0x00,0x8a]
-+#CHECK: vstrcb   %v0, %v0, %v0, %v15, 0  # encoding: [0xe7,0x00,0x00,0x00,0xf0,0x8a]
-+#CHECK: vstrcb   %v0, %v0, %v0, %v31, 0  # encoding: [0xe7,0x00,0x00,0x00,0xf1,0x8a]
-+#CHECK: vstrcb   %v0, %v0, %v15, %v0, 0  # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x8a]
-+#CHECK: vstrcb   %v0, %v0, %v31, %v0, 0  # encoding: [0xe7,0x00,0xf0,0x00,0x02,0x8a]
-+#CHECK: vstrcb   %v0, %v15, %v0, %v0, 0  # encoding: [0xe7,0x0f,0x00,0x00,0x00,0x8a]
-+#CHECK: vstrcb   %v0, %v31, %v0, %v0, 0  # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x8a]
-+#CHECK: vstrcb   %v15, %v0, %v0, %v0, 0  # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x8a]
-+#CHECK: vstrcb   %v31, %v0, %v0, %v0, 0  # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x8a]
-+#CHECK: vstrcb   %v18, %v3, %v20, %v5, 4 # encoding: [0xe7,0x23,0x40,0x40,0x5a,0x8a]
-+#CHECK: vstrcb   %v18, %v3, %v20, %v5, 15 # encoding: [0xe7,0x23,0x40,0xf0,0x5a,0x8a]
-+#CHECK: vstrcbs  %v18, %v3, %v20, %v5, 8 # encoding: [0xe7,0x23,0x40,0x90,0x5a,0x8a]
-+#CHECK: vstrczb  %v18, %v3, %v20, %v5, 4 # encoding: [0xe7,0x23,0x40,0x60,0x5a,0x8a]
-+#CHECK: vstrczbs %v18, %v3, %v20, %v5, 8 # encoding: [0xe7,0x23,0x40,0xb0,0x5a,0x8a]
-+#CHECK: vstrczbs %v18, %v3, %v20, %v5, 15 # encoding: [0xe7,0x23,0x40,0xf0,0x5a,0x8a]
-+
-+        vstrcb   %v0, %v0, %v0, %v0
-+        vstrcb   %v0, %v0, %v0, %v0, 0
-+        vstrcb   %v0, %v0, %v0, %v0, 12
-+        vstrcb   %v0, %v0, %v0, %v15
-+        vstrcb   %v0, %v0, %v0, %v31
-+        vstrcb   %v0, %v0, %v15, %v0
-+        vstrcb   %v0, %v0, %v31, %v0
-+        vstrcb   %v0, %v15, %v0, %v0
-+        vstrcb   %v0, %v31, %v0, %v0
-+        vstrcb   %v15, %v0, %v0, %v0
-+        vstrcb   %v31, %v0, %v0, %v0
-+        vstrcb   %v18, %v3, %v20, %v5, 4
-+        vstrcb   %v18, %v3, %v20, %v5, 15
-+        vstrcbs  %v18, %v3, %v20, %v5, 8
-+        vstrczb  %v18, %v3, %v20, %v5, 4
-+        vstrczbs %v18, %v3, %v20, %v5, 8
-+        vstrczbs %v18, %v3, %v20, %v5, 15
-+
-+#CHECK: vstrcf   %v0, %v0, %v0, %v0, 0   # encoding: [0xe7,0x00,0x02,0x00,0x00,0x8a]
-+#CHECK: vstrcf   %v0, %v0, %v0, %v0, 0   # encoding: [0xe7,0x00,0x02,0x00,0x00,0x8a]
-+#CHECK: vstrcf   %v0, %v0, %v0, %v0, 12  # encoding: [0xe7,0x00,0x02,0xc0,0x00,0x8a]
-+#CHECK: vstrcf   %v0, %v0, %v0, %v15, 0  # encoding: [0xe7,0x00,0x02,0x00,0xf0,0x8a]
-+#CHECK: vstrcf   %v0, %v0, %v0, %v31, 0  # encoding: [0xe7,0x00,0x02,0x00,0xf1,0x8a]
-+#CHECK: vstrcf   %v0, %v0, %v15, %v0, 0  # encoding: [0xe7,0x00,0xf2,0x00,0x00,0x8a]
-+#CHECK: vstrcf   %v0, %v0, %v31, %v0, 0  # encoding: [0xe7,0x00,0xf2,0x00,0x02,0x8a]
-+#CHECK: vstrcf   %v0, %v15, %v0, %v0, 0  # encoding: [0xe7,0x0f,0x02,0x00,0x00,0x8a]
-+#CHECK: vstrcf   %v0, %v31, %v0, %v0, 0  # encoding: [0xe7,0x0f,0x02,0x00,0x04,0x8a]
-+#CHECK: vstrcf   %v15, %v0, %v0, %v0, 0  # encoding: [0xe7,0xf0,0x02,0x00,0x00,0x8a]
-+#CHECK: vstrcf   %v31, %v0, %v0, %v0, 0  # encoding: [0xe7,0xf0,0x02,0x00,0x08,0x8a]
-+#CHECK: vstrcf   %v18, %v3, %v20, %v5, 4 # encoding: [0xe7,0x23,0x42,0x40,0x5a,0x8a]
-+#CHECK: vstrcf   %v18, %v3, %v20, %v5, 15 # encoding: [0xe7,0x23,0x42,0xf0,0x5a,0x8a]
-+#CHECK: vstrcfs  %v18, %v3, %v20, %v5, 8 # encoding: [0xe7,0x23,0x42,0x90,0x5a,0x8a]
-+#CHECK: vstrczf  %v18, %v3, %v20, %v5, 4 # encoding: [0xe7,0x23,0x42,0x60,0x5a,0x8a]
-+#CHECK: vstrczfs %v18, %v3, %v20, %v5, 8 # encoding: [0xe7,0x23,0x42,0xb0,0x5a,0x8a]
-+#CHECK: vstrczfs %v18, %v3, %v20, %v5, 15 # encoding: [0xe7,0x23,0x42,0xf0,0x5a,0x8a]
-+
-+        vstrcf   %v0, %v0, %v0, %v0
-+        vstrcf   %v0, %v0, %v0, %v0, 0
-+        vstrcf   %v0, %v0, %v0, %v0, 12
-+        vstrcf   %v0, %v0, %v0, %v15
-+        vstrcf   %v0, %v0, %v0, %v31
-+        vstrcf   %v0, %v0, %v15, %v0
-+        vstrcf   %v0, %v0, %v31, %v0
-+        vstrcf   %v0, %v15, %v0, %v0
-+        vstrcf   %v0, %v31, %v0, %v0
-+        vstrcf   %v15, %v0, %v0, %v0
-+        vstrcf   %v31, %v0, %v0, %v0
-+        vstrcf   %v18, %v3, %v20, %v5, 4
-+        vstrcf   %v18, %v3, %v20, %v5, 15
-+        vstrcfs  %v18, %v3, %v20, %v5, 8
-+        vstrczf  %v18, %v3, %v20, %v5, 4
-+        vstrczfs %v18, %v3, %v20, %v5, 8
-+        vstrczfs %v18, %v3, %v20, %v5, 15
-+
-+#CHECK: vstrch   %v0, %v0, %v0, %v0, 0   # encoding: [0xe7,0x00,0x01,0x00,0x00,0x8a]
-+#CHECK: vstrch   %v0, %v0, %v0, %v0, 0   # encoding: [0xe7,0x00,0x01,0x00,0x00,0x8a]
-+#CHECK: vstrch   %v0, %v0, %v0, %v0, 12  # encoding: [0xe7,0x00,0x01,0xc0,0x00,0x8a]
-+#CHECK: vstrch   %v0, %v0, %v0, %v15, 0  # encoding: [0xe7,0x00,0x01,0x00,0xf0,0x8a]
-+#CHECK: vstrch   %v0, %v0, %v0, %v31, 0  # encoding: [0xe7,0x00,0x01,0x00,0xf1,0x8a]
-+#CHECK: vstrch   %v0, %v0, %v15, %v0, 0  # encoding: [0xe7,0x00,0xf1,0x00,0x00,0x8a]
-+#CHECK: vstrch   %v0, %v0, %v31, %v0, 0  # encoding: [0xe7,0x00,0xf1,0x00,0x02,0x8a]
-+#CHECK: vstrch   %v0, %v15, %v0, %v0, 0  # encoding: [0xe7,0x0f,0x01,0x00,0x00,0x8a]
-+#CHECK: vstrch   %v0, %v31, %v0, %v0, 0  # encoding: [0xe7,0x0f,0x01,0x00,0x04,0x8a]
-+#CHECK: vstrch   %v15, %v0, %v0, %v0, 0  # encoding: [0xe7,0xf0,0x01,0x00,0x00,0x8a]
-+#CHECK: vstrch   %v31, %v0, %v0, %v0, 0  # encoding: [0xe7,0xf0,0x01,0x00,0x08,0x8a]
-+#CHECK: vstrch   %v18, %v3, %v20, %v5, 4 # encoding: [0xe7,0x23,0x41,0x40,0x5a,0x8a]
-+#CHECK: vstrch   %v18, %v3, %v20, %v5, 15 # encoding: [0xe7,0x23,0x41,0xf0,0x5a,0x8a]
-+#CHECK: vstrchs  %v18, %v3, %v20, %v5, 8 # encoding: [0xe7,0x23,0x41,0x90,0x5a,0x8a]
-+#CHECK: vstrczh  %v18, %v3, %v20, %v5, 4 # encoding: [0xe7,0x23,0x41,0x60,0x5a,0x8a]
-+#CHECK: vstrczhs %v18, %v3, %v20, %v5, 8 # encoding: [0xe7,0x23,0x41,0xb0,0x5a,0x8a]
-+#CHECK: vstrczhs %v18, %v3, %v20, %v5, 15 # encoding: [0xe7,0x23,0x41,0xf0,0x5a,0x8a]
-+
-+        vstrch   %v0, %v0, %v0, %v0
-+        vstrch   %v0, %v0, %v0, %v0, 0
-+        vstrch   %v0, %v0, %v0, %v0, 12
-+        vstrch   %v0, %v0, %v0, %v15
-+        vstrch   %v0, %v0, %v0, %v31
-+        vstrch   %v0, %v0, %v15, %v0
-+        vstrch   %v0, %v0, %v31, %v0
-+        vstrch   %v0, %v15, %v0, %v0
-+        vstrch   %v0, %v31, %v0, %v0
-+        vstrch   %v15, %v0, %v0, %v0
-+        vstrch   %v31, %v0, %v0, %v0
-+        vstrch   %v18, %v3, %v20, %v5, 4
-+        vstrch   %v18, %v3, %v20, %v5, 15
-+        vstrchs  %v18, %v3, %v20, %v5, 8
-+        vstrczh  %v18, %v3, %v20, %v5, 4
-+        vstrczhs %v18, %v3, %v20, %v5, 8
-+        vstrczhs %v18, %v3, %v20, %v5, 15
-+
-+#CHECK: vsumgh  %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x10,0x65]
-+#CHECK: vsumgh  %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x12,0x65]
-+#CHECK: vsumgh  %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x14,0x65]
-+#CHECK: vsumgh  %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x18,0x65]
-+#CHECK: vsumgh  %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x1a,0x65]
-+
-+	vsumgh	%v0, %v0, %v0
-+	vsumgh	%v0, %v0, %v31
-+	vsumgh	%v0, %v31, %v0
-+	vsumgh	%v31, %v0, %v0
-+	vsumgh	%v18, %v3, %v20
-+
-+#CHECK: vsumgf  %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x20,0x65]
-+#CHECK: vsumgf  %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x22,0x65]
-+#CHECK: vsumgf  %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x24,0x65]
-+#CHECK: vsumgf  %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x28,0x65]
-+#CHECK: vsumgf  %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x2a,0x65]
-+
-+	vsumgf	%v0, %v0, %v0
-+	vsumgf	%v0, %v0, %v31
-+	vsumgf	%v0, %v31, %v0
-+	vsumgf	%v31, %v0, %v0
-+	vsumgf	%v18, %v3, %v20
-+
-+#CHECK: vsumqf  %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x20,0x67]
-+#CHECK: vsumqf  %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x22,0x67]
-+#CHECK: vsumqf  %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x24,0x67]
-+#CHECK: vsumqf  %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x28,0x67]
-+#CHECK: vsumqf  %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x2a,0x67]
-+
-+	vsumqf	%v0, %v0, %v0
-+	vsumqf	%v0, %v0, %v31
-+	vsumqf	%v0, %v31, %v0
-+	vsumqf	%v31, %v0, %v0
-+	vsumqf	%v18, %v3, %v20
-+
-+#CHECK: vsumqg  %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x30,0x67]
-+#CHECK: vsumqg  %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x32,0x67]
-+#CHECK: vsumqg  %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x34,0x67]
-+#CHECK: vsumqg  %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x38,0x67]
-+#CHECK: vsumqg  %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x3a,0x67]
-+
-+	vsumqg	%v0, %v0, %v0
-+	vsumqg	%v0, %v0, %v31
-+	vsumqg	%v0, %v31, %v0
-+	vsumqg	%v31, %v0, %v0
-+	vsumqg	%v18, %v3, %v20
-+
-+#CHECK: vsumb   %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x00,0x64]
-+#CHECK: vsumb   %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x02,0x64]
-+#CHECK: vsumb   %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x64]
-+#CHECK: vsumb   %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x64]
-+#CHECK: vsumb   %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x0a,0x64]
-+
-+	vsumb	%v0, %v0, %v0
-+	vsumb	%v0, %v0, %v31
-+	vsumb	%v0, %v31, %v0
-+	vsumb	%v31, %v0, %v0
-+	vsumb	%v18, %v3, %v20
-+
-+#CHECK: vsumh   %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x10,0x64]
-+#CHECK: vsumh   %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x12,0x64]
-+#CHECK: vsumh   %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x14,0x64]
-+#CHECK: vsumh   %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x18,0x64]
-+#CHECK: vsumh   %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x1a,0x64]
-+
-+	vsumh	%v0, %v0, %v0
-+	vsumh	%v0, %v0, %v31
-+	vsumh	%v0, %v31, %v0
-+	vsumh	%v31, %v0, %v0
-+	vsumh	%v18, %v3, %v20
-+
-+#CHECK: vtm     %v0, %v0                # encoding: [0xe7,0x00,0x00,0x00,0x00,0xd8]
-+#CHECK: vtm     %v0, %v15               # encoding: [0xe7,0x0f,0x00,0x00,0x00,0xd8]
-+#CHECK: vtm     %v0, %v31               # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xd8]
-+#CHECK: vtm     %v15, %v0               # encoding: [0xe7,0xf0,0x00,0x00,0x00,0xd8]
-+#CHECK: vtm     %v31, %v0               # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xd8]
-+#CHECK: vtm     %v14, %v17              # encoding: [0xe7,0xe1,0x00,0x00,0x04,0xd8]
-+
-+	vtm	%v0, %v0
-+	vtm	%v0, %v15
-+	vtm	%v0, %v31
-+	vtm	%v15, %v0
-+	vtm	%v31, %v0
-+	vtm	%v14, %v17
-+
-+#CHECK: vuphb   %v0, %v0                # encoding: [0xe7,0x00,0x00,0x00,0x00,0xd7]
-+#CHECK: vuphb   %v0, %v15               # encoding: [0xe7,0x0f,0x00,0x00,0x00,0xd7]
-+#CHECK: vuphb   %v0, %v31               # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xd7]
-+#CHECK: vuphb   %v15, %v0               # encoding: [0xe7,0xf0,0x00,0x00,0x00,0xd7]
-+#CHECK: vuphb   %v31, %v0               # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xd7]
-+#CHECK: vuphb   %v14, %v17              # encoding: [0xe7,0xe1,0x00,0x00,0x04,0xd7]
-+
-+	vuphb	%v0, %v0
-+	vuphb	%v0, %v15
-+	vuphb	%v0, %v31
-+	vuphb	%v15, %v0
-+	vuphb	%v31, %v0
-+	vuphb	%v14, %v17
-+
-+#CHECK: vuphf   %v0, %v0                # encoding: [0xe7,0x00,0x00,0x00,0x20,0xd7]
-+#CHECK: vuphf   %v0, %v15               # encoding: [0xe7,0x0f,0x00,0x00,0x20,0xd7]
-+#CHECK: vuphf   %v0, %v31               # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xd7]
-+#CHECK: vuphf   %v15, %v0               # encoding: [0xe7,0xf0,0x00,0x00,0x20,0xd7]
-+#CHECK: vuphf   %v31, %v0               # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xd7]
-+#CHECK: vuphf   %v14, %v17              # encoding: [0xe7,0xe1,0x00,0x00,0x24,0xd7]
-+
-+	vuphf	%v0, %v0
-+	vuphf	%v0, %v15
-+	vuphf	%v0, %v31
-+	vuphf	%v15, %v0
-+	vuphf	%v31, %v0
-+	vuphf	%v14, %v17
-+
-+#CHECK: vuphh   %v0, %v0                # encoding: [0xe7,0x00,0x00,0x00,0x10,0xd7]
-+#CHECK: vuphh   %v0, %v15               # encoding: [0xe7,0x0f,0x00,0x00,0x10,0xd7]
-+#CHECK: vuphh   %v0, %v31               # encoding: [0xe7,0x0f,0x00,0x00,0x14,0xd7]
-+#CHECK: vuphh   %v15, %v0               # encoding: [0xe7,0xf0,0x00,0x00,0x10,0xd7]
-+#CHECK: vuphh   %v31, %v0               # encoding: [0xe7,0xf0,0x00,0x00,0x18,0xd7]
-+#CHECK: vuphh   %v14, %v17              # encoding: [0xe7,0xe1,0x00,0x00,0x14,0xd7]
-+
-+	vuphh	%v0, %v0
-+	vuphh	%v0, %v15
-+	vuphh	%v0, %v31
-+	vuphh	%v15, %v0
-+	vuphh	%v31, %v0
-+	vuphh	%v14, %v17
-+
-+#CHECK: vuplhb  %v0, %v0                # encoding: [0xe7,0x00,0x00,0x00,0x00,0xd5]
-+#CHECK: vuplhb  %v0, %v15               # encoding: [0xe7,0x0f,0x00,0x00,0x00,0xd5]
-+#CHECK: vuplhb  %v0, %v31               # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xd5]
-+#CHECK: vuplhb  %v15, %v0               # encoding: [0xe7,0xf0,0x00,0x00,0x00,0xd5]
-+#CHECK: vuplhb  %v31, %v0               # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xd5]
-+#CHECK: vuplhb  %v14, %v17              # encoding: [0xe7,0xe1,0x00,0x00,0x04,0xd5]
-+
-+	vuplhb	%v0, %v0
-+	vuplhb	%v0, %v15
-+	vuplhb	%v0, %v31
-+	vuplhb	%v15, %v0
-+	vuplhb	%v31, %v0
-+	vuplhb	%v14, %v17
-+
-+#CHECK: vuplhf  %v0, %v0                # encoding: [0xe7,0x00,0x00,0x00,0x20,0xd5]
-+#CHECK: vuplhf  %v0, %v15               # encoding: [0xe7,0x0f,0x00,0x00,0x20,0xd5]
-+#CHECK: vuplhf  %v0, %v31               # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xd5]
-+#CHECK: vuplhf  %v15, %v0               # encoding: [0xe7,0xf0,0x00,0x00,0x20,0xd5]
-+#CHECK: vuplhf  %v31, %v0               # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xd5]
-+#CHECK: vuplhf  %v14, %v17              # encoding: [0xe7,0xe1,0x00,0x00,0x24,0xd5]
-+
-+	vuplhf	%v0, %v0
-+	vuplhf	%v0, %v15
-+	vuplhf	%v0, %v31
-+	vuplhf	%v15, %v0
-+	vuplhf	%v31, %v0
-+	vuplhf	%v14, %v17
-+
-+#CHECK: vuplhh  %v0, %v0                # encoding: [0xe7,0x00,0x00,0x00,0x10,0xd5]
-+#CHECK: vuplhh  %v0, %v15               # encoding: [0xe7,0x0f,0x00,0x00,0x10,0xd5]
-+#CHECK: vuplhh  %v0, %v31               # encoding: [0xe7,0x0f,0x00,0x00,0x14,0xd5]
-+#CHECK: vuplhh  %v15, %v0               # encoding: [0xe7,0xf0,0x00,0x00,0x10,0xd5]
-+#CHECK: vuplhh  %v31, %v0               # encoding: [0xe7,0xf0,0x00,0x00,0x18,0xd5]
-+#CHECK: vuplhh  %v14, %v17              # encoding: [0xe7,0xe1,0x00,0x00,0x14,0xd5]
-+
-+	vuplhh	%v0, %v0
-+	vuplhh	%v0, %v15
-+	vuplhh	%v0, %v31
-+	vuplhh	%v15, %v0
-+	vuplhh	%v31, %v0
-+	vuplhh	%v14, %v17
-+
-+#CHECK: vuplb   %v0, %v0                # encoding: [0xe7,0x00,0x00,0x00,0x00,0xd6]
-+#CHECK: vuplb   %v0, %v15               # encoding: [0xe7,0x0f,0x00,0x00,0x00,0xd6]
-+#CHECK: vuplb   %v0, %v31               # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xd6]
-+#CHECK: vuplb   %v15, %v0               # encoding: [0xe7,0xf0,0x00,0x00,0x00,0xd6]
-+#CHECK: vuplb   %v31, %v0               # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xd6]
-+#CHECK: vuplb   %v14, %v17              # encoding: [0xe7,0xe1,0x00,0x00,0x04,0xd6]
-+
-+	vuplb	%v0, %v0
-+	vuplb	%v0, %v15
-+	vuplb	%v0, %v31
-+	vuplb	%v15, %v0
-+	vuplb	%v31, %v0
-+	vuplb	%v14, %v17
-+
-+#CHECK: vuplf   %v0, %v0                # encoding: [0xe7,0x00,0x00,0x00,0x20,0xd6]
-+#CHECK: vuplf   %v0, %v15               # encoding: [0xe7,0x0f,0x00,0x00,0x20,0xd6]
-+#CHECK: vuplf   %v0, %v31               # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xd6]
-+#CHECK: vuplf   %v15, %v0               # encoding: [0xe7,0xf0,0x00,0x00,0x20,0xd6]
-+#CHECK: vuplf   %v31, %v0               # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xd6]
-+#CHECK: vuplf   %v14, %v17              # encoding: [0xe7,0xe1,0x00,0x00,0x24,0xd6]
-+
-+	vuplf	%v0, %v0
-+	vuplf	%v0, %v15
-+	vuplf	%v0, %v31
-+	vuplf	%v15, %v0
-+	vuplf	%v31, %v0
-+	vuplf	%v14, %v17
-+
-+#CHECK: vuplhw  %v0, %v0                # encoding: [0xe7,0x00,0x00,0x00,0x10,0xd6]
-+#CHECK: vuplhw  %v0, %v15               # encoding: [0xe7,0x0f,0x00,0x00,0x10,0xd6]
-+#CHECK: vuplhw  %v0, %v31               # encoding: [0xe7,0x0f,0x00,0x00,0x14,0xd6]
-+#CHECK: vuplhw  %v15, %v0               # encoding: [0xe7,0xf0,0x00,0x00,0x10,0xd6]
-+#CHECK: vuplhw  %v31, %v0               # encoding: [0xe7,0xf0,0x00,0x00,0x18,0xd6]
-+#CHECK: vuplhw  %v14, %v17              # encoding: [0xe7,0xe1,0x00,0x00,0x14,0xd6]
-+
-+	vuplhw	%v0, %v0
-+	vuplhw	%v0, %v15
-+	vuplhw	%v0, %v31
-+	vuplhw	%v15, %v0
-+	vuplhw	%v31, %v0
-+	vuplhw	%v14, %v17
-+
-+#CHECK: vupllb  %v0, %v0                # encoding: [0xe7,0x00,0x00,0x00,0x00,0xd4]
-+#CHECK: vupllb  %v0, %v15               # encoding: [0xe7,0x0f,0x00,0x00,0x00,0xd4]
-+#CHECK: vupllb  %v0, %v31               # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xd4]
-+#CHECK: vupllb  %v15, %v0               # encoding: [0xe7,0xf0,0x00,0x00,0x00,0xd4]
-+#CHECK: vupllb  %v31, %v0               # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xd4]
-+#CHECK: vupllb  %v14, %v17              # encoding: [0xe7,0xe1,0x00,0x00,0x04,0xd4]
-+
-+	vupllb	%v0, %v0
-+	vupllb	%v0, %v15
-+	vupllb	%v0, %v31
-+	vupllb	%v15, %v0
-+	vupllb	%v31, %v0
-+	vupllb	%v14, %v17
-+
-+#CHECK: vupllf  %v0, %v0                # encoding: [0xe7,0x00,0x00,0x00,0x20,0xd4]
-+#CHECK: vupllf  %v0, %v15               # encoding: [0xe7,0x0f,0x00,0x00,0x20,0xd4]
-+#CHECK: vupllf  %v0, %v31               # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xd4]
-+#CHECK: vupllf  %v15, %v0               # encoding: [0xe7,0xf0,0x00,0x00,0x20,0xd4]
-+#CHECK: vupllf  %v31, %v0               # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xd4]
-+#CHECK: vupllf  %v14, %v17              # encoding: [0xe7,0xe1,0x00,0x00,0x24,0xd4]
-+
-+	vupllf	%v0, %v0
-+	vupllf	%v0, %v15
-+	vupllf	%v0, %v31
-+	vupllf	%v15, %v0
-+	vupllf	%v31, %v0
-+	vupllf	%v14, %v17
-+
-+#CHECK: vupllh  %v0, %v0                # encoding: [0xe7,0x00,0x00,0x00,0x10,0xd4]
-+#CHECK: vupllh  %v0, %v15               # encoding: [0xe7,0x0f,0x00,0x00,0x10,0xd4]
-+#CHECK: vupllh  %v0, %v31               # encoding: [0xe7,0x0f,0x00,0x00,0x14,0xd4]
-+#CHECK: vupllh  %v15, %v0               # encoding: [0xe7,0xf0,0x00,0x00,0x10,0xd4]
-+#CHECK: vupllh  %v31, %v0               # encoding: [0xe7,0xf0,0x00,0x00,0x18,0xd4]
-+#CHECK: vupllh  %v14, %v17              # encoding: [0xe7,0xe1,0x00,0x00,0x14,0xd4]
-+
-+	vupllh	%v0, %v0
-+	vupllh	%v0, %v15
-+	vupllh	%v0, %v31
-+	vupllh	%v15, %v0
-+	vupllh	%v31, %v0
-+	vupllh	%v14, %v17
-+
-+#CHECK: vx      %v0, %v0, %v0           # encoding: [0xe7,0x00,0x00,0x00,0x00,0x6d]
-+#CHECK: vx      %v0, %v0, %v31          # encoding: [0xe7,0x00,0xf0,0x00,0x02,0x6d]
-+#CHECK: vx      %v0, %v31, %v0          # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x6d]
-+#CHECK: vx      %v31, %v0, %v0          # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x6d]
-+#CHECK: vx      %v18, %v3, %v20         # encoding: [0xe7,0x23,0x40,0x00,0x0a,0x6d]
-+
-+	vx	%v0, %v0, %v0
-+	vx	%v0, %v0, %v31
-+	vx	%v0, %v31, %v0
-+	vx	%v31, %v0, %v0
-+	vx	%v18, %v3, %v20
-+
-+#CHECK: vzero   %v0                     # encoding: [0xe7,0x00,0x00,0x00,0x00,0x44]
-+#CHECK: vzero   %v11                    # encoding: [0xe7,0xb0,0x00,0x00,0x00,0x44]
-+#CHECK: vzero   %v15                    # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x44]
-+#CHECK: vzero   %v31                    # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x44]
-+
-+	vzero	%v0
-+	vzero	%v11
-+	vzero	%v15
-+	vzero	%v31
-+
-+#CHECK: wcdgb   %f0, %f0, 0, 0          # encoding: [0xe7,0x00,0x00,0x08,0x30,0xc3]
-+#CHECK: wcdgb   %f0, %f0, 0, 15         # encoding: [0xe7,0x00,0x00,0xf8,0x30,0xc3]
-+#CHECK: wcdgb   %f0, %f0, 4, 0          # encoding: [0xe7,0x00,0x00,0x0c,0x30,0xc3]
-+#CHECK: wcdgb   %f0, %f0, 12, 0         # encoding: [0xe7,0x00,0x00,0x0c,0x30,0xc3]
-+#CHECK: wcdgb   %f0, %v31, 0, 0         # encoding: [0xe7,0x0f,0x00,0x08,0x34,0xc3]
-+#CHECK: wcdgb   %v31, %f0, 0, 0         # encoding: [0xe7,0xf0,0x00,0x08,0x38,0xc3]
-+#CHECK: wcdgb   %f14, %v17, 4, 10       # encoding: [0xe7,0xe1,0x00,0xac,0x34,0xc3]
-+
-+	wcdgb	%v0, %v0, 0, 0
-+ 	wcdgb	%v0, %v0, 0, 15
-+	wcdgb	%v0, %v0, 4, 0
-+	wcdgb	%v0, %v0, 12, 0
-+	wcdgb	%v0, %v31, 0, 0
-+	wcdgb	%v31, %v0, 0, 0
-+	wcdgb	%v14, %v17, 4, 10
-+
-+#CHECK: wcdlgb  %f0, %f0, 0, 0          # encoding: [0xe7,0x00,0x00,0x08,0x30,0xc1]
-+#CHECK: wcdlgb  %f0, %f0, 0, 15         # encoding: [0xe7,0x00,0x00,0xf8,0x30,0xc1]
-+#CHECK: wcdlgb  %f0, %f0, 4, 0          # encoding: [0xe7,0x00,0x00,0x0c,0x30,0xc1]
-+#CHECK: wcdlgb  %f0, %f0, 12, 0         # encoding: [0xe7,0x00,0x00,0x0c,0x30,0xc1]
-+#CHECK: wcdlgb  %f0, %v31, 0, 0         # encoding: [0xe7,0x0f,0x00,0x08,0x34,0xc1]
-+#CHECK: wcdlgb  %v31, %f0, 0, 0         # encoding: [0xe7,0xf0,0x00,0x08,0x38,0xc1]
-+#CHECK: wcdlgb  %f14, %v17, 4, 10       # encoding: [0xe7,0xe1,0x00,0xac,0x34,0xc1]
-+
-+	wcdlgb	%v0, %v0, 0, 0
-+ 	wcdlgb	%v0, %v0, 0, 15
-+	wcdlgb	%v0, %v0, 4, 0
-+	wcdlgb	%v0, %v0, 12, 0
-+	wcdlgb	%v0, %v31, 0, 0
-+	wcdlgb	%v31, %v0, 0, 0
-+	wcdlgb	%v14, %v17, 4, 10
-+
-+#CHECK: wcgdb   %f0, %f0, 0, 0          # encoding: [0xe7,0x00,0x00,0x08,0x30,0xc2]
-+#CHECK: wcgdb   %f0, %f0, 0, 15         # encoding: [0xe7,0x00,0x00,0xf8,0x30,0xc2]
-+#CHECK: wcgdb   %f0, %f0, 4, 0          # encoding: [0xe7,0x00,0x00,0x0c,0x30,0xc2]
-+#CHECK: wcgdb   %f0, %f0, 12, 0         # encoding: [0xe7,0x00,0x00,0x0c,0x30,0xc2]
-+#CHECK: wcgdb   %f0, %v31, 0, 0         # encoding: [0xe7,0x0f,0x00,0x08,0x34,0xc2]
-+#CHECK: wcgdb   %v31, %f0, 0, 0         # encoding: [0xe7,0xf0,0x00,0x08,0x38,0xc2]
-+#CHECK: wcgdb   %f14, %v17, 4, 10       # encoding: [0xe7,0xe1,0x00,0xac,0x34,0xc2]
-+
-+	wcgdb	%v0, %v0, 0, 0
-+ 	wcgdb	%v0, %v0, 0, 15
-+	wcgdb	%v0, %v0, 4, 0
-+	wcgdb	%v0, %v0, 12, 0
-+	wcgdb	%v0, %v31, 0, 0
-+	wcgdb	%v31, %v0, 0, 0
-+	wcgdb	%v14, %v17, 4, 10
-+
-+#CHECK: wclgdb  %f0, %f0, 0, 0          # encoding: [0xe7,0x00,0x00,0x08,0x30,0xc0]
-+#CHECK: wclgdb  %f0, %f0, 0, 15         # encoding: [0xe7,0x00,0x00,0xf8,0x30,0xc0]
-+#CHECK: wclgdb  %f0, %f0, 4, 0          # encoding: [0xe7,0x00,0x00,0x0c,0x30,0xc0]
-+#CHECK: wclgdb  %f0, %f0, 12, 0         # encoding: [0xe7,0x00,0x00,0x0c,0x30,0xc0]
-+#CHECK: wclgdb  %f0, %v31, 0, 0         # encoding: [0xe7,0x0f,0x00,0x08,0x34,0xc0]
-+#CHECK: wclgdb  %v31, %f0, 0, 0         # encoding: [0xe7,0xf0,0x00,0x08,0x38,0xc0]
-+#CHECK: wclgdb  %f14, %v17, 4, 10       # encoding: [0xe7,0xe1,0x00,0xac,0x34,0xc0]
-+
-+	wclgdb	%v0, %v0, 0, 0
-+ 	wclgdb	%v0, %v0, 0, 15
-+	wclgdb	%v0, %v0, 4, 0
-+	wclgdb	%v0, %v0, 12, 0
-+	wclgdb	%v0, %v31, 0, 0
-+	wclgdb	%v31, %v0, 0, 0
-+	wclgdb	%v14, %v17, 4, 10
-+
-+#CHECK: wfadb   %f0, %f0, %f0           # encoding: [0xe7,0x00,0x00,0x08,0x30,0xe3]
-+#CHECK: wfadb   %f0, %f0, %v31          # encoding: [0xe7,0x00,0xf0,0x08,0x32,0xe3]
-+#CHECK: wfadb   %f0, %v31, %f0          # encoding: [0xe7,0x0f,0x00,0x08,0x34,0xe3]
-+#CHECK: wfadb   %v31, %f0, %f0          # encoding: [0xe7,0xf0,0x00,0x08,0x38,0xe3]
-+#CHECK: wfadb   %v18, %f3, %v20         # encoding: [0xe7,0x23,0x40,0x08,0x3a,0xe3]
-+
-+	wfadb	%v0, %v0, %v0
-+	wfadb	%v0, %v0, %v31
-+	wfadb	%v0, %v31, %v0
-+	wfadb	%v31, %v0, %v0
-+	wfadb	%v18, %v3, %v20
-+
-+#CHECK: wfcdb   %f0, %f0                # encoding: [0xe7,0x00,0x00,0x00,0x30,0xcb]
-+#CHECK: wfcdb   %f0, %f15               # encoding: [0xe7,0x0f,0x00,0x00,0x30,0xcb]
-+#CHECK: wfcdb   %f0, %v31               # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xcb]
-+#CHECK: wfcdb   %f15, %f0               # encoding: [0xe7,0xf0,0x00,0x00,0x30,0xcb]
-+#CHECK: wfcdb   %v31, %f0               # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xcb]
-+#CHECK: wfcdb   %f14, %v17              # encoding: [0xe7,0xe1,0x00,0x00,0x34,0xcb]
-+
-+	wfcdb	%v0, %v0
-+	wfcdb	%v0, %v15
-+	wfcdb	%v0, %v31
-+	wfcdb	%v15, %v0
-+	wfcdb	%v31, %v0
-+	wfcdb	%v14, %v17
-+
-+#CHECK: wfcedb  %f0, %f0, %f0           # encoding: [0xe7,0x00,0x00,0x08,0x30,0xe8]
-+#CHECK: wfcedb  %f0, %f0, %v31          # encoding: [0xe7,0x00,0xf0,0x08,0x32,0xe8]
-+#CHECK: wfcedb  %f0, %v31, %f0          # encoding: [0xe7,0x0f,0x00,0x08,0x34,0xe8]
-+#CHECK: wfcedb  %v31, %f0, %f0          # encoding: [0xe7,0xf0,0x00,0x08,0x38,0xe8]
-+#CHECK: wfcedb  %v18, %f3, %v20         # encoding: [0xe7,0x23,0x40,0x08,0x3a,0xe8]
-+
-+	wfcedb	%v0, %v0, %v0
-+	wfcedb	%v0, %v0, %v31
-+	wfcedb	%v0, %v31, %v0
-+	wfcedb	%v31, %v0, %v0
-+	wfcedb	%v18, %v3, %v20
-+
-+#CHECK: wfcedbs %f0, %f0, %f0           # encoding: [0xe7,0x00,0x00,0x18,0x30,0xe8]
-+#CHECK: wfcedbs %f0, %f0, %v31          # encoding: [0xe7,0x00,0xf0,0x18,0x32,0xe8]
-+#CHECK: wfcedbs %f0, %v31, %f0          # encoding: [0xe7,0x0f,0x00,0x18,0x34,0xe8]
-+#CHECK: wfcedbs %v31, %f0, %f0          # encoding: [0xe7,0xf0,0x00,0x18,0x38,0xe8]
-+#CHECK: wfcedbs %v18, %f3, %v20         # encoding: [0xe7,0x23,0x40,0x18,0x3a,0xe8]
-+
-+	wfcedbs	%v0, %v0, %v0
-+	wfcedbs	%v0, %v0, %v31
-+	wfcedbs	%v0, %v31, %v0
-+	wfcedbs	%v31, %v0, %v0
-+	wfcedbs	%v18, %v3, %v20
-+
-+#CHECK: wfchdb  %f0, %f0, %f0           # encoding: [0xe7,0x00,0x00,0x08,0x30,0xeb]
-+#CHECK: wfchdb  %f0, %f0, %v31          # encoding: [0xe7,0x00,0xf0,0x08,0x32,0xeb]
-+#CHECK: wfchdb  %f0, %v31, %f0          # encoding: [0xe7,0x0f,0x00,0x08,0x34,0xeb]
-+#CHECK: wfchdb  %v31, %f0, %f0          # encoding: [0xe7,0xf0,0x00,0x08,0x38,0xeb]
-+#CHECK: wfchdb  %v18, %f3, %v20         # encoding: [0xe7,0x23,0x40,0x08,0x3a,0xeb]
-+
-+	wfchdb	%v0, %v0, %v0
-+	wfchdb	%v0, %v0, %v31
-+	wfchdb	%v0, %v31, %v0
-+	wfchdb	%v31, %v0, %v0
-+	wfchdb	%v18, %v3, %v20
-+
-+#CHECK: wfchdbs %f0, %f0, %f0           # encoding: [0xe7,0x00,0x00,0x18,0x30,0xeb]
-+#CHECK: wfchdbs %f0, %f0, %v31          # encoding: [0xe7,0x00,0xf0,0x18,0x32,0xeb]
-+#CHECK: wfchdbs %f0, %v31, %f0          # encoding: [0xe7,0x0f,0x00,0x18,0x34,0xeb]
-+#CHECK: wfchdbs %v31, %f0, %f0          # encoding: [0xe7,0xf0,0x00,0x18,0x38,0xeb]
-+#CHECK: wfchdbs %v18, %f3, %v20         # encoding: [0xe7,0x23,0x40,0x18,0x3a,0xeb]
-+
-+	wfchdbs	%v0, %v0, %v0
-+	wfchdbs	%v0, %v0, %v31
-+	wfchdbs	%v0, %v31, %v0
-+	wfchdbs	%v31, %v0, %v0
-+	wfchdbs	%v18, %v3, %v20
-+
-+#CHECK: wfchedb %f0, %f0, %f0           # encoding: [0xe7,0x00,0x00,0x08,0x30,0xea]
-+#CHECK: wfchedb %f0, %f0, %v31          # encoding: [0xe7,0x00,0xf0,0x08,0x32,0xea]
-+#CHECK: wfchedb %f0, %v31, %f0          # encoding: [0xe7,0x0f,0x00,0x08,0x34,0xea]
-+#CHECK: wfchedb %v31, %f0, %f0          # encoding: [0xe7,0xf0,0x00,0x08,0x38,0xea]
-+#CHECK: wfchedb %v18, %f3, %v20         # encoding: [0xe7,0x23,0x40,0x08,0x3a,0xea]
-+
-+	wfchedb	%v0, %v0, %v0
-+	wfchedb	%v0, %v0, %v31
-+	wfchedb	%v0, %v31, %v0
-+	wfchedb	%v31, %v0, %v0
-+	wfchedb	%v18, %v3, %v20
-+
-+#CHECK: wfchedbs %f0, %f0, %f0          # encoding: [0xe7,0x00,0x00,0x18,0x30,0xea]
-+#CHECK: wfchedbs %f0, %f0, %v31         # encoding: [0xe7,0x00,0xf0,0x18,0x32,0xea]
-+#CHECK: wfchedbs %f0, %v31, %f0         # encoding: [0xe7,0x0f,0x00,0x18,0x34,0xea]
-+#CHECK: wfchedbs %v31, %f0, %f0         # encoding: [0xe7,0xf0,0x00,0x18,0x38,0xea]
-+#CHECK: wfchedbs %v18, %f3, %v20        # encoding: [0xe7,0x23,0x40,0x18,0x3a,0xea]
-+
-+	wfchedbs %v0, %v0, %v0
-+	wfchedbs %v0, %v0, %v31
-+	wfchedbs %v0, %v31, %v0
-+	wfchedbs %v31, %v0, %v0
-+	wfchedbs %v18, %v3, %v20
-+
-+#CHECK: wfddb   %f0, %f0, %f0           # encoding: [0xe7,0x00,0x00,0x08,0x30,0xe5]
-+#CHECK: wfddb   %f0, %f0, %v31          # encoding: [0xe7,0x00,0xf0,0x08,0x32,0xe5]
-+#CHECK: wfddb   %f0, %v31, %f0          # encoding: [0xe7,0x0f,0x00,0x08,0x34,0xe5]
-+#CHECK: wfddb   %v31, %f0, %f0          # encoding: [0xe7,0xf0,0x00,0x08,0x38,0xe5]
-+#CHECK: wfddb   %v18, %f3, %v20         # encoding: [0xe7,0x23,0x40,0x08,0x3a,0xe5]
-+
-+	wfddb	%v0, %v0, %v0
-+	wfddb	%v0, %v0, %v31
-+	wfddb	%v0, %v31, %v0
-+	wfddb	%v31, %v0, %v0
-+	wfddb	%v18, %v3, %v20
-+
-+#CHECK: wfidb   %f0, %f0, 0, 0          # encoding: [0xe7,0x00,0x00,0x08,0x30,0xc7]
-+#CHECK: wfidb   %f0, %f0, 0, 15         # encoding: [0xe7,0x00,0x00,0xf8,0x30,0xc7]
-+#CHECK: wfidb   %f0, %f0, 4, 0          # encoding: [0xe7,0x00,0x00,0x0c,0x30,0xc7]
-+#CHECK: wfidb   %f0, %f0, 12, 0         # encoding: [0xe7,0x00,0x00,0x0c,0x30,0xc7]
-+#CHECK: wfidb   %f0, %v31, 0, 0         # encoding: [0xe7,0x0f,0x00,0x08,0x34,0xc7]
-+#CHECK: wfidb   %v31, %f0, 0, 0         # encoding: [0xe7,0xf0,0x00,0x08,0x38,0xc7]
-+#CHECK: wfidb   %f14, %v17, 4, 10       # encoding: [0xe7,0xe1,0x00,0xac,0x34,0xc7]
-+
-+	wfidb	%v0, %v0, 0, 0
-+ 	wfidb	%v0, %v0, 0, 15
-+	wfidb	%v0, %v0, 4, 0
-+	wfidb	%v0, %v0, 12, 0
-+	wfidb	%v0, %v31, 0, 0
-+	wfidb	%v31, %v0, 0, 0
-+	wfidb	%v14, %v17, 4, 10
-+
-+#CHECK: wfkdb   %f0, %f0                # encoding: [0xe7,0x00,0x00,0x00,0x30,0xca]
-+#CHECK: wfkdb   %f0, %f15               # encoding: [0xe7,0x0f,0x00,0x00,0x30,0xca]
-+#CHECK: wfkdb   %f0, %v31               # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xca]
-+#CHECK: wfkdb   %f15, %f0               # encoding: [0xe7,0xf0,0x00,0x00,0x30,0xca]
-+#CHECK: wfkdb   %v31, %f0               # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xca]
-+#CHECK: wfkdb   %f14, %v17              # encoding: [0xe7,0xe1,0x00,0x00,0x34,0xca]
-+
-+	wfkdb	%v0, %v0
-+	wfkdb	%v0, %v15
-+	wfkdb	%v0, %v31
-+	wfkdb	%v15, %v0
-+	wfkdb	%v31, %v0
-+	wfkdb	%v14, %v17
-+
-+#CHECK: wflcdb  %f0, %f0                # encoding: [0xe7,0x00,0x00,0x08,0x30,0xcc]
-+#CHECK: wflcdb  %f0, %f15               # encoding: [0xe7,0x0f,0x00,0x08,0x30,0xcc]
-+#CHECK: wflcdb  %f0, %v31               # encoding: [0xe7,0x0f,0x00,0x08,0x34,0xcc]
-+#CHECK: wflcdb  %f15, %f0               # encoding: [0xe7,0xf0,0x00,0x08,0x30,0xcc]
-+#CHECK: wflcdb  %v31, %f0               # encoding: [0xe7,0xf0,0x00,0x08,0x38,0xcc]
-+#CHECK: wflcdb  %f14, %v17              # encoding: [0xe7,0xe1,0x00,0x08,0x34,0xcc]
-+
-+	wflcdb	%v0, %v0
-+	wflcdb	%v0, %v15
-+	wflcdb	%v0, %v31
-+	wflcdb	%v15, %v0
-+	wflcdb	%v31, %v0
-+	wflcdb	%v14, %v17
-+
-+#CHECK: wflndb  %f0, %f0                # encoding: [0xe7,0x00,0x00,0x18,0x30,0xcc]
-+#CHECK: wflndb  %f0, %f15               # encoding: [0xe7,0x0f,0x00,0x18,0x30,0xcc]
-+#CHECK: wflndb  %f0, %v31               # encoding: [0xe7,0x0f,0x00,0x18,0x34,0xcc]
-+#CHECK: wflndb  %f15, %f0               # encoding: [0xe7,0xf0,0x00,0x18,0x30,0xcc]
-+#CHECK: wflndb  %v31, %f0               # encoding: [0xe7,0xf0,0x00,0x18,0x38,0xcc]
-+#CHECK: wflndb  %f14, %v17              # encoding: [0xe7,0xe1,0x00,0x18,0x34,0xcc]
-+
-+	wflndb	%v0, %v0
-+	wflndb	%v0, %v15
-+	wflndb	%v0, %v31
-+	wflndb	%v15, %v0
-+	wflndb	%v31, %v0
-+	wflndb	%v14, %v17
-+
-+#CHECK: wflpdb  %f0, %f0                # encoding: [0xe7,0x00,0x00,0x28,0x30,0xcc]
-+#CHECK: wflpdb  %f0, %f15               # encoding: [0xe7,0x0f,0x00,0x28,0x30,0xcc]
-+#CHECK: wflpdb  %f0, %v31               # encoding: [0xe7,0x0f,0x00,0x28,0x34,0xcc]
-+#CHECK: wflpdb  %f15, %f0               # encoding: [0xe7,0xf0,0x00,0x28,0x30,0xcc]
-+#CHECK: wflpdb  %v31, %f0               # encoding: [0xe7,0xf0,0x00,0x28,0x38,0xcc]
-+#CHECK: wflpdb  %f14, %v17              # encoding: [0xe7,0xe1,0x00,0x28,0x34,0xcc]
-+
-+	wflpdb	%v0, %v0
-+	wflpdb	%v0, %v15
-+	wflpdb	%v0, %v31
-+	wflpdb	%v15, %v0
-+	wflpdb	%v31, %v0
-+	wflpdb	%v14, %v17
-+
-+#CHECK: wfmadb  %f0, %f0, %f0, %f0      # encoding: [0xe7,0x00,0x03,0x08,0x00,0x8f]
-+#CHECK: wfmadb  %f0, %f0, %f0, %v31     # encoding: [0xe7,0x00,0x03,0x08,0xf1,0x8f]
-+#CHECK: wfmadb  %f0, %f0, %v31, %f0     # encoding: [0xe7,0x00,0xf3,0x08,0x02,0x8f]
-+#CHECK: wfmadb  %f0, %v31, %f0, %f0     # encoding: [0xe7,0x0f,0x03,0x08,0x04,0x8f]
-+#CHECK: wfmadb  %v31, %f0, %f0, %f0     # encoding: [0xe7,0xf0,0x03,0x08,0x08,0x8f]
-+#CHECK: wfmadb  %f13, %v17, %v21, %v25  # encoding: [0xe7,0xd1,0x53,0x08,0x97,0x8f]
-+
-+	wfmadb	%v0, %v0, %v0, %v0
-+	wfmadb	%v0, %v0, %v0, %v31
-+	wfmadb	%v0, %v0, %v31, %v0
-+	wfmadb	%v0, %v31, %v0, %v0
-+	wfmadb	%v31, %v0, %v0, %v0
-+	wfmadb	%v13, %v17, %v21, %v25
-+
-+#CHECK: wfmdb   %f0, %f0, %f0           # encoding: [0xe7,0x00,0x00,0x08,0x30,0xe7]
-+#CHECK: wfmdb   %f0, %f0, %v31          # encoding: [0xe7,0x00,0xf0,0x08,0x32,0xe7]
-+#CHECK: wfmdb   %f0, %v31, %f0          # encoding: [0xe7,0x0f,0x00,0x08,0x34,0xe7]
-+#CHECK: wfmdb   %v31, %f0, %f0          # encoding: [0xe7,0xf0,0x00,0x08,0x38,0xe7]
-+#CHECK: wfmdb   %v18, %f3, %v20         # encoding: [0xe7,0x23,0x40,0x08,0x3a,0xe7]
-+
-+	wfmdb	%v0, %v0, %v0
-+	wfmdb	%v0, %v0, %v31
-+	wfmdb	%v0, %v31, %v0
-+	wfmdb	%v31, %v0, %v0
-+	wfmdb	%v18, %v3, %v20
-+
-+#CHECK: wfmsdb  %f0, %f0, %f0, %f0      # encoding: [0xe7,0x00,0x03,0x08,0x00,0x8e]
-+#CHECK: wfmsdb  %f0, %f0, %f0, %v31     # encoding: [0xe7,0x00,0x03,0x08,0xf1,0x8e]
-+#CHECK: wfmsdb  %f0, %f0, %v31, %f0     # encoding: [0xe7,0x00,0xf3,0x08,0x02,0x8e]
-+#CHECK: wfmsdb  %f0, %v31, %f0, %f0     # encoding: [0xe7,0x0f,0x03,0x08,0x04,0x8e]
-+#CHECK: wfmsdb  %v31, %f0, %f0, %f0     # encoding: [0xe7,0xf0,0x03,0x08,0x08,0x8e]
-+#CHECK: wfmsdb  %f13, %v17, %v21, %v25  # encoding: [0xe7,0xd1,0x53,0x08,0x97,0x8e]
-+
-+	wfmsdb	%v0, %v0, %v0, %v0
-+	wfmsdb	%v0, %v0, %v0, %v31
-+	wfmsdb	%v0, %v0, %v31, %v0
-+	wfmsdb	%v0, %v31, %v0, %v0
-+	wfmsdb	%v31, %v0, %v0, %v0
-+	wfmsdb	%v13, %v17, %v21, %v25
-+
-+#CHECK: wfsdb   %f0, %f0, %f0           # encoding: [0xe7,0x00,0x00,0x08,0x30,0xe2]
-+#CHECK: wfsdb   %f0, %f0, %v31          # encoding: [0xe7,0x00,0xf0,0x08,0x32,0xe2]
-+#CHECK: wfsdb   %f0, %v31, %f0          # encoding: [0xe7,0x0f,0x00,0x08,0x34,0xe2]
-+#CHECK: wfsdb   %v31, %f0, %f0          # encoding: [0xe7,0xf0,0x00,0x08,0x38,0xe2]
-+#CHECK: wfsdb   %v18, %f3, %v20         # encoding: [0xe7,0x23,0x40,0x08,0x3a,0xe2]
-+
-+	wfsdb	%v0, %v0, %v0
-+	wfsdb	%v0, %v0, %v31
-+	wfsdb	%v0, %v31, %v0
-+	wfsdb	%v31, %v0, %v0
-+	wfsdb	%v18, %v3, %v20
-+
-+#CHECK: wfsqdb  %f0, %f0                # encoding: [0xe7,0x00,0x00,0x08,0x30,0xce]
-+#CHECK: wfsqdb  %f0, %f15               # encoding: [0xe7,0x0f,0x00,0x08,0x30,0xce]
-+#CHECK: wfsqdb  %f0, %v31               # encoding: [0xe7,0x0f,0x00,0x08,0x34,0xce]
-+#CHECK: wfsqdb  %f15, %f0               # encoding: [0xe7,0xf0,0x00,0x08,0x30,0xce]
-+#CHECK: wfsqdb  %v31, %f0               # encoding: [0xe7,0xf0,0x00,0x08,0x38,0xce]
-+#CHECK: wfsqdb  %f14, %v17              # encoding: [0xe7,0xe1,0x00,0x08,0x34,0xce]
-+
-+	wfsqdb	%v0, %v0
-+	wfsqdb	%v0, %v15
-+	wfsqdb	%v0, %v31
-+	wfsqdb	%v15, %v0
-+	wfsqdb	%v31, %v0
-+	wfsqdb	%v14, %v17
-+
-+#CHECK: wftcidb %f0, %f0, 0             # encoding: [0xe7,0x00,0x00,0x08,0x30,0x4a]
-+#CHECK: wftcidb %f0, %f0, 4095          # encoding: [0xe7,0x00,0xff,0xf8,0x30,0x4a]
-+#CHECK: wftcidb %f0, %f15, 0            # encoding: [0xe7,0x0f,0x00,0x08,0x30,0x4a]
-+#CHECK: wftcidb %f0, %v31, 0            # encoding: [0xe7,0x0f,0x00,0x08,0x34,0x4a]
-+#CHECK: wftcidb %f15, %f0, 0            # encoding: [0xe7,0xf0,0x00,0x08,0x30,0x4a]
-+#CHECK: wftcidb %v31, %f0, 0            # encoding: [0xe7,0xf0,0x00,0x08,0x38,0x4a]
-+#CHECK: wftcidb %f4, %v21, 1656         # encoding: [0xe7,0x45,0x67,0x88,0x34,0x4a]
-+
-+	wftcidb	%v0, %v0, 0
-+	wftcidb	%v0, %v0, 4095
-+	wftcidb	%v0, %v15, 0
-+	wftcidb	%v0, %v31, 0
-+	wftcidb	%v15, %v0, 0
-+	wftcidb	%v31, %v0, 0
-+	wftcidb	%v4, %v21, 0x678
-+
-+#CHECK: wldeb   %f0, %f0                # encoding: [0xe7,0x00,0x00,0x08,0x20,0xc4]
-+#CHECK: wldeb   %f0, %f15               # encoding: [0xe7,0x0f,0x00,0x08,0x20,0xc4]
-+#CHECK: wldeb   %f0, %v31               # encoding: [0xe7,0x0f,0x00,0x08,0x24,0xc4]
-+#CHECK: wldeb   %f15, %f0               # encoding: [0xe7,0xf0,0x00,0x08,0x20,0xc4]
-+#CHECK: wldeb   %v31, %f0               # encoding: [0xe7,0xf0,0x00,0x08,0x28,0xc4]
-+#CHECK: wldeb   %f14, %v17              # encoding: [0xe7,0xe1,0x00,0x08,0x24,0xc4]
-+
-+	wldeb	%v0, %v0
-+	wldeb	%v0, %v15
-+	wldeb	%v0, %v31
-+	wldeb	%v15, %v0
-+	wldeb	%v31, %v0
-+	wldeb	%v14, %v17
-+
-+#CHECK: wledb   %f0, %f0, 0, 0          # encoding: [0xe7,0x00,0x00,0x08,0x30,0xc5]
-+#CHECK: wledb   %f0, %f0, 0, 15         # encoding: [0xe7,0x00,0x00,0xf8,0x30,0xc5]
-+#CHECK: wledb   %f0, %f0, 4, 0          # encoding: [0xe7,0x00,0x00,0x0c,0x30,0xc5]
-+#CHECK: wledb   %f0, %f0, 12, 0         # encoding: [0xe7,0x00,0x00,0x0c,0x30,0xc5]
-+#CHECK: wledb   %f0, %v31, 0, 0         # encoding: [0xe7,0x0f,0x00,0x08,0x34,0xc5]
-+#CHECK: wledb   %v31, %f0, 0, 0         # encoding: [0xe7,0xf0,0x00,0x08,0x38,0xc5]
-+#CHECK: wledb   %f14, %v17, 4, 10       # encoding: [0xe7,0xe1,0x00,0xac,0x34,0xc5]
-+
-+	wledb	%v0, %v0, 0, 0
-+ 	wledb	%v0, %v0, 0, 15
-+	wledb	%v0, %v0, 4, 0
-+	wledb	%v0, %v0, 12, 0
-+	wledb	%v0, %v31, 0, 0
-+	wledb	%v31, %v0, 0, 0
-+	wledb	%v14, %v17, 4, 10
-Index: llvm-36/test/MC/SystemZ/insn-good-z196.s
-===================================================================
---- llvm-36.orig/test/MC/SystemZ/insn-good-z196.s
-+++ llvm-36/test/MC/SystemZ/insn-good-z196.s
-@@ -1021,6 +1021,16 @@
- 	ork	%r15,%r0,%r0
- 	ork	%r7,%r8,%r9
- 
-+#CHECK: popcnt	%r0, %r0                # encoding: [0xb9,0xe1,0x00,0x00]
-+#CHECK: popcnt	%r0, %r15               # encoding: [0xb9,0xe1,0x00,0x0f]
-+#CHECK: popcnt	%r15, %r0               # encoding: [0xb9,0xe1,0x00,0xf0]
-+#CHECK: popcnt	%r7, %r8                # encoding: [0xb9,0xe1,0x00,0x78]
-+
-+	popcnt	%r0,%r0
-+	popcnt	%r0,%r15
-+	popcnt	%r15,%r0
-+	popcnt	%r7,%r8
-+
- #CHECK: risbhg	%r0, %r0, 0, 0, 0       # encoding: [0xec,0x00,0x00,0x00,0x00,0x5d]
- #CHECK: risbhg	%r0, %r0, 0, 0, 63      # encoding: [0xec,0x00,0x00,0x00,0x3f,0x5d]
- #CHECK: risbhg	%r0, %r0, 0, 255, 0     # encoding: [0xec,0x00,0x00,0xff,0x00,0x5d]
-Index: llvm-36/test/MC/SystemZ/insn-good-zEC12.s
-===================================================================
---- /dev/null
-+++ llvm-36/test/MC/SystemZ/insn-good-zEC12.s
-@@ -0,0 +1,126 @@
-+# For zEC12 and above.
-+# RUN: llvm-mc -triple s390x-linux-gnu -mcpu=zEC12 -show-encoding %s | FileCheck %s
-+
-+#CHECK: etnd	%r0                     # encoding: [0xb2,0xec,0x00,0x00]
-+#CHECK: etnd	%r15                    # encoding: [0xb2,0xec,0x00,0xf0]
-+#CHECK: etnd	%r7                     # encoding: [0xb2,0xec,0x00,0x70]
-+
-+	etnd	%r0
-+	etnd	%r15
-+	etnd	%r7
-+
-+#CHECK: ntstg	%r0, -524288            # encoding: [0xe3,0x00,0x00,0x00,0x80,0x25]
-+#CHECK: ntstg	%r0, -1                 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x25]
-+#CHECK: ntstg	%r0, 0                  # encoding: [0xe3,0x00,0x00,0x00,0x00,0x25]
-+#CHECK: ntstg	%r0, 1                  # encoding: [0xe3,0x00,0x00,0x01,0x00,0x25]
-+#CHECK: ntstg	%r0, 524287             # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x25]
-+#CHECK: ntstg	%r0, 0(%r1)             # encoding: [0xe3,0x00,0x10,0x00,0x00,0x25]
-+#CHECK: ntstg	%r0, 0(%r15)            # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x25]
-+#CHECK: ntstg	%r0, 524287(%r1,%r15)   # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x25]
-+#CHECK: ntstg	%r0, 524287(%r15,%r1)   # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x25]
-+#CHECK: ntstg	%r15, 0                 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x25]
-+
-+	ntstg	%r0, -524288
-+	ntstg	%r0, -1
-+	ntstg	%r0, 0
-+	ntstg	%r0, 1
-+	ntstg	%r0, 524287
-+	ntstg	%r0, 0(%r1)
-+	ntstg	%r0, 0(%r15)
-+	ntstg	%r0, 524287(%r1,%r15)
-+	ntstg	%r0, 524287(%r15,%r1)
-+	ntstg	%r15, 0
-+
-+#CHECK: ppa	%r0, %r0, 0             # encoding: [0xb2,0xe8,0x00,0x00]
-+#CHECK: ppa	%r0, %r0, 15            # encoding: [0xb2,0xe8,0xf0,0x00]
-+#CHECK: ppa	%r0, %r15, 0            # encoding: [0xb2,0xe8,0x00,0x0f]
-+#CHECK: ppa	%r4, %r6, 7             # encoding: [0xb2,0xe8,0x70,0x46]
-+#CHECK: ppa	%r15, %r0, 0            # encoding: [0xb2,0xe8,0x00,0xf0]
-+
-+	ppa	%r0, %r0, 0
-+	ppa	%r0, %r0, 15
-+	ppa	%r0, %r15, 0
-+	ppa	%r4, %r6, 7
-+	ppa	%r15, %r0, 0
-+
-+#CHECK: risbgn	%r0, %r0, 0, 0, 0       # encoding: [0xec,0x00,0x00,0x00,0x00,0x59]
-+#CHECK: risbgn	%r0, %r0, 0, 0, 63      # encoding: [0xec,0x00,0x00,0x00,0x3f,0x59]
-+#CHECK: risbgn	%r0, %r0, 0, 255, 0     # encoding: [0xec,0x00,0x00,0xff,0x00,0x59]
-+#CHECK: risbgn	%r0, %r0, 255, 0, 0     # encoding: [0xec,0x00,0xff,0x00,0x00,0x59]
-+#CHECK: risbgn	%r0, %r15, 0, 0, 0      # encoding: [0xec,0x0f,0x00,0x00,0x00,0x59]
-+#CHECK: risbgn	%r15, %r0, 0, 0, 0      # encoding: [0xec,0xf0,0x00,0x00,0x00,0x59]
-+#CHECK: risbgn	%r4, %r5, 6, 7, 8       # encoding: [0xec,0x45,0x06,0x07,0x08,0x59]
-+
-+	risbgn	%r0,%r0,0,0,0
-+	risbgn	%r0,%r0,0,0,63
-+	risbgn	%r0,%r0,0,255,0
-+	risbgn	%r0,%r0,255,0,0
-+	risbgn	%r0,%r15,0,0,0
-+	risbgn	%r15,%r0,0,0,0
-+	risbgn	%r4,%r5,6,7,8
-+
-+#CHECK: tabort	0                       # encoding: [0xb2,0xfc,0x00,0x00]
-+#CHECK: tabort	0(%r1)                  # encoding: [0xb2,0xfc,0x10,0x00]
-+#CHECK: tabort	0(%r15)                 # encoding: [0xb2,0xfc,0xf0,0x00]
-+#CHECK: tabort	4095                    # encoding: [0xb2,0xfc,0x0f,0xff]
-+#CHECK: tabort	4095(%r1)               # encoding: [0xb2,0xfc,0x1f,0xff]
-+#CHECK: tabort	4095(%r15)              # encoding: [0xb2,0xfc,0xff,0xff]
-+
-+	tabort	0
-+	tabort	0(%r1)
-+	tabort	0(%r15)
-+	tabort	4095
-+	tabort	4095(%r1)
-+	tabort	4095(%r15)
-+
-+#CHECK: tbegin	0, 0                    # encoding: [0xe5,0x60,0x00,0x00,0x00,0x00]
-+#CHECK: tbegin	4095, 0                 # encoding: [0xe5,0x60,0x0f,0xff,0x00,0x00]
-+#CHECK: tbegin	0, 0                    # encoding: [0xe5,0x60,0x00,0x00,0x00,0x00]
-+#CHECK: tbegin	0, 1                    # encoding: [0xe5,0x60,0x00,0x00,0x00,0x01]
-+#CHECK: tbegin	0, 32767                # encoding: [0xe5,0x60,0x00,0x00,0x7f,0xff]
-+#CHECK: tbegin	0, 32768                # encoding: [0xe5,0x60,0x00,0x00,0x80,0x00]
-+#CHECK: tbegin	0, 65535                # encoding: [0xe5,0x60,0x00,0x00,0xff,0xff]
-+#CHECK: tbegin	0(%r1), 42              # encoding: [0xe5,0x60,0x10,0x00,0x00,0x2a]
-+#CHECK: tbegin	0(%r15), 42             # encoding: [0xe5,0x60,0xf0,0x00,0x00,0x2a]
-+#CHECK: tbegin	4095(%r1), 42           # encoding: [0xe5,0x60,0x1f,0xff,0x00,0x2a]
-+#CHECK: tbegin	4095(%r15), 42          # encoding: [0xe5,0x60,0xff,0xff,0x00,0x2a]
-+
-+	tbegin	0, 0
-+	tbegin	4095, 0
-+	tbegin	0, 0
-+	tbegin	0, 1
-+	tbegin	0, 32767
-+	tbegin	0, 32768
-+	tbegin	0, 65535
-+	tbegin	0(%r1), 42
-+	tbegin	0(%r15), 42
-+	tbegin	4095(%r1), 42
-+	tbegin	4095(%r15), 42
-+
-+#CHECK: tbeginc	0, 0                    # encoding: [0xe5,0x61,0x00,0x00,0x00,0x00]
-+#CHECK: tbeginc	4095, 0                 # encoding: [0xe5,0x61,0x0f,0xff,0x00,0x00]
-+#CHECK: tbeginc	0, 0                    # encoding: [0xe5,0x61,0x00,0x00,0x00,0x00]
-+#CHECK: tbeginc	0, 1                    # encoding: [0xe5,0x61,0x00,0x00,0x00,0x01]
-+#CHECK: tbeginc	0, 32767                # encoding: [0xe5,0x61,0x00,0x00,0x7f,0xff]
-+#CHECK: tbeginc	0, 32768                # encoding: [0xe5,0x61,0x00,0x00,0x80,0x00]
-+#CHECK: tbeginc	0, 65535                # encoding: [0xe5,0x61,0x00,0x00,0xff,0xff]
-+#CHECK: tbeginc	0(%r1), 42              # encoding: [0xe5,0x61,0x10,0x00,0x00,0x2a]
-+#CHECK: tbeginc	0(%r15), 42             # encoding: [0xe5,0x61,0xf0,0x00,0x00,0x2a]
-+#CHECK: tbeginc	4095(%r1), 42           # encoding: [0xe5,0x61,0x1f,0xff,0x00,0x2a]
-+#CHECK: tbeginc	4095(%r15), 42          # encoding: [0xe5,0x61,0xff,0xff,0x00,0x2a]
-+
-+	tbeginc	0, 0
-+	tbeginc	4095, 0
-+	tbeginc	0, 0
-+	tbeginc	0, 1
-+	tbeginc	0, 32767
-+	tbeginc	0, 32768
-+	tbeginc	0, 65535
-+	tbeginc	0(%r1), 42
-+	tbeginc	0(%r15), 42
-+	tbeginc	4095(%r1), 42
-+	tbeginc	4095(%r15), 42
-+
-+#CHECK: tend                            # encoding: [0xb2,0xf8,0x00,0x00]
-+
-+	tend
-Index: llvm-36/test/MC/SystemZ/tokens.s
-===================================================================
---- llvm-36.orig/test/MC/SystemZ/tokens.s
-+++ llvm-36/test/MC/SystemZ/tokens.s
-@@ -13,10 +13,16 @@
- #CHECK: foo	100(200,%r0), 300
- #CHECK: error: invalid instruction
- #CHECK: foo	100(200,%r1), 300
--#CHECK: error: invalid operand
-+#CHECK: error: invalid address register
- #CHECK: foo	100(%a0), 200
- #CHECK: error: %r0 used in an address
- #CHECK: foo	100(%r0), 200
-+#CHECK: error: %r0 used in an address
-+#CHECK: foo	100(%v1,%r0), 200
-+#CHECK: error: invalid instruction
-+#CHECK: foo	100(%v0,%r1), 200
-+#CHECK: error: invalid instruction
-+#CHECK: foo	100(%v31), 200
- #CHECK: error: invalid operand
- #CHECK: foo	100(%r1,%a0), 200
- #CHECK: error: %r0 used in an address
-@@ -45,6 +51,12 @@
- #CHECK: foo	%a15, 200
- #CHECK: error: invalid register
- #CHECK: foo	%a16, 200
-+#CHECK: error: invalid instruction
-+#CHECK: foo	%v0, 200
-+#CHECK: error: invalid instruction
-+#CHECK: foo	%v31, 200
-+#CHECK: error: invalid register
-+#CHECK: foo	%v32, 200
- #CHECK: error: invalid register
- #CHECK: foo	%c, 200
- #CHECK: error: invalid register
-@@ -60,6 +72,9 @@
- 	foo	100(200,%r1), 300
- 	foo	100(%a0), 200
- 	foo	100(%r0), 200
-+	foo	100(%v1,%r0), 200
-+	foo	100(%v0,%r1), 200
-+	foo	100(%v31), 200
- 	foo	100(%r1,%a0), 200
- 	foo	100(%r1,%r0), 200
- 	foo	100(%r1,%r2, 200
-@@ -74,6 +89,9 @@
- 	foo	%a0, 200
- 	foo	%a15, 200
- 	foo	%a16, 200
-+	foo	%v0, 200
-+	foo	%v31, 200
-+	foo	%v32, 200
- 	foo	%c, 200
- 	foo	%, 200
- 	foo	{, 200
diff --git a/SOURCES/make-llvm-snapshot.sh b/SOURCES/make-llvm-snapshot.sh
deleted file mode 100755
index 40aa6c9..0000000
--- a/SOURCES/make-llvm-snapshot.sh
+++ /dev/null
@@ -1,13 +0,0 @@
-#!/bin/sh
-
-DIRNAME=llvm-$( date +%Y%m%d )
-URL=http://llvm.org/svn/llvm-project/llvm/branches/release_33/
-#URL=http://llvm.org/svn/llvm-project/llvm/trunk/
-
-rm -rf $DIRNAME
-svn co $URL $DIRNAME |& tail -1 > revision
-mv revision $DIRNAME
-rm -rf $DIRNAME/.svn
-
-tar Jcf $DIRNAME.tar.xz $DIRNAME
-rm -rf $DIRNAME
diff --git a/SPECS/llvm.spec b/SPECS/llvm.spec
index 450f2b8..6952d0f 100644
--- a/SPECS/llvm.spec
+++ b/SPECS/llvm.spec
@@ -1,3 +1,10 @@
+# Components enabled if supported by target architecture:
+%ifarch %ix86 x86_64
+  %bcond_without gold
+%else
+  %bcond_with gold
+%endif
+
 %if 0%{?rhel} == 6
 %define rhel6 1
 %endif
@@ -6,45 +13,50 @@
 # consequently we build swrast on them instead of llvmpipe.
 ExcludeArch: ppc s390 %{?rhel6:s390x}
 
-#global svndate 20131023
-#global prerel rc4
-
-Name:           mesa-private-llvm
-Version:        3.6.2
-Release:        2%{?prerel:.%prerel}%{?dist}
-Summary:        llvm engine for Mesa
-
-Group:		System Environment/Libraries
-License:        NCSA
-URL:            http://llvm.org/
-Source0:	http://llvm.org/releases/%{version}/%{?prerel}/llvm-%{version}%{?prerel}.src.tar.xz
-#Source0:	llvm-%{svndate}.tar.xz
-Source1:	make-llvm-snapshot.sh
-# multilib fixes
-Source2:        llvm-Config-config.h
-Source3:        llvm-Config-llvm-config.h
-
-# Data files should be installed with timestamps preserved
-Patch0:         llvm-2.6-timestamp.patch
-
-# llvm Z13 backports (#1182150)
-Patch1: llvm-z13-backports.patch
-Patch2: llvm-3.6-large-struct-return.patch
-
-# llvm aarch64 bug fix (#1254386)
-Patch10: 0001-AArch64-Fix-invalid-use-of-references-to-BuildMI.patch
-# add model detection for skylake and broadwell
-Patch11: llvm-3.6.2-nerf-skylake.patch
-
-BuildRequires:  bison
-BuildRequires:  chrpath
-BuildRequires:  flex
-BuildRequires:  gcc-c++ >= 3.4
-BuildRequires:  groff
-BuildRequires:  libtool-ltdl-devel
-BuildRequires:  zip
-# for DejaGNU test suite
-BuildRequires:  dejagnu tcl-devel python
+%ifarch s390x
+%global host_target SystemZ
+%endif
+%ifarch ppc64 ppc64le
+%global host_target PowerPC
+%endif
+%ifarch %ix86 x86_64
+%global host_target X86
+%endif
+%ifarch aarch64
+%global host_target AArch64
+%endif
+%ifarch %{arm}
+%global host_target ARM
+%endif
+
+%ifnarch s390x
+%global amdgpu ;AMDGPU
+%endif
+
+Name:		mesa-private-llvm
+Version:	3.8.1
+Release:	1%{?dist}
+Summary:	llvm engine for Mesa
+
+Group:          System Environment/Libraries
+License:	NCSA
+URL:		http://llvm.org
+Source0:	http://llvm.org/releases/%{version}/llvm-%{version}.src.tar.xz
+Source100:	llvm-config.h
+
+# recognize s390 as SystemZ when configuring build
+#Patch0:		llvm-3.7.1-cmake-s390.patch
+
+Patch1: fix-cmake-include.patch
+Patch2: llvm-3.8.1-rhel-7.3.patch
+
+BuildRequires:	cmake
+BuildRequires:	zlib-devel
+%if %{with gold}
+BuildRequires:  binutils-devel
+%endif
+BuildRequires:  libstdc++-static
+BuildRequires:  python
 
 %description
 This package contains the LLVM-based runtime support for Mesa.  It is not a
@@ -52,98 +64,87 @@ fully-featured build of LLVM, and use by any package other than Mesa is not
 supported.
 
 %package devel
-Summary:        Libraries and header files for Mesa's llvm engine
-Group:          Development/Libraries
-Requires:       %{name}%{?_isa} = %{version}-%{release}
-Requires:       libstdc++-devel >= 3.4
+Summary:	Libraries and header files for LLVM
+Requires:	%{name}%{?_isa} = %{version}-%{release}
 
 %description devel
 This package contains library and header files needed to build the LLVM
 support in Mesa.
 
 %prep
-%setup -q -n llvm-%{version}%{?prerel}.src
-rm -r -f tools/clang
-
-# llvm patches
-%patch0 -p1 -b .timestamp
-%patch1 -p1 -b .z13
-%patch2 -p1 -b .large-struct
-%patch10 -p1 -b .aarch64-fix
-%patch11 -p1 -b .skl-fix
-
-# fix ld search path
-sed -i 's|/lib /usr/lib $lt_ld_extra|%{_libdir} $lt_ld_extra|' \
-    ./configure
-
-# mangle the library name
-sed -i 's|^LLVM_VERSION_SUFFIX=|&-mesa|' ./configure
-
-%ifnarch s390x
-%define r600 ,r600
-%endif
+%setup -q -n llvm-%{version}.src
+#patch0 -p1 -b .s390
+%patch1 -p1 -b .fixinc
+%patch2 -p1
 
 %build
-export CC=gcc
-export CXX=g++
-%configure \
-  --prefix=%{_prefix} \
-  --libdir=%{_libdir} \
-  --includedir=%{_includedir}/mesa-private \
-  --with-extra-ld-options=-Wl,-Bsymbolic,--default-symver \
-  --enable-targets=host%{?r600} \
-  --enable-bindings=none \
-  --enable-debug-runtime \
-  --enable-jit \
-  --enable-shared \
-  --enable-optimized \
-  --disable-clang-arcmt \
-  --disable-clang-static-analyzer \
-  --disable-clang-rewriter \
-  --disable-assertions \
-  --disable-docs \
-  --disable-libffi \
-  --disable-terminfo \
-  --disable-timestamps \
-  %{nil}
-
-# FIXME file this
-# configure does not properly specify libdir or includedir
-sed -i 's|(PROJ_prefix)/lib|(PROJ_prefix)/%{_lib}|g' Makefile.config
-sed -i 's|(PROJ_prefix)/include|&/mesa-private|g' Makefile.config
-#sed -i 's|LLVM_VERSION_SUFFIX := |& -mesa|g' Makefile.config
-
-# FIXME upstream need to fix this
-# llvm-config.cpp hardcodes lib in it
-sed -i 's|ActiveLibDir = ActivePrefix + "/lib"|ActiveLibDir = ActivePrefix + "/%{_lib}"|g' tools/llvm-config/llvm-config.cpp
+
 sed -i 's|ActiveIncludeDir = ActivePrefix + "/include|&/mesa-private|g' tools/llvm-config/llvm-config.cpp
 
-make %{_smp_mflags} VERBOSE=1 OPTIMIZE_OPTION="%{optflags} -fno-strict-aliasing"
+mkdir -p _build
+cd _build
+
+# force off shared libs as cmake macros turns it on.
+%cmake .. \
+	-DINCLUDE_INSTALL_DIR=%{_includedir}/mesa-private \
+	-DLLVM_VERSION_SUFFIX="-mesa" \
+	-DBUILD_SHARED_LIBS:BOOL=OFF \
+	-DCMAKE_BUILD_TYPE=RelWithDebInfo \
+	-DCMAKE_SHARED_LINKER_FLAGS="-Wl,-Bsymbolic -static-libstdc++" \
+%if 0%{?__isa_bits} == 64
+	-DLLVM_LIBDIR_SUFFIX=64 \
+%else
+	-DLLVM_LIBDIR_SUFFIX= \
+%endif
+	\
+	-DLLVM_TARGETS_TO_BUILD="%{host_target}%{?amdgpu}" \
+	-DLLVM_ENABLE_LIBCXX:BOOL=OFF \
+	-DLLVM_ENABLE_ZLIB:BOOL=ON \
+	-DLLVM_ENABLE_FFI:BOOL=OFF \
+	-DLLVM_ENABLE_RTTI:BOOL=OFF \
+%if %{with gold}
+	-DLLVM_BINUTILS_INCDIR=%{_includedir} \
+%endif
+	\
+	-DLLVM_BUILD_RUNTIME:BOOL=ON \
+	\
+	-DLLVM_INCLUDE_TOOLS:BOOL=ON \
+	-DLLVM_BUILD_TOOLS:BOOL=ON \
+	\
+	-DLLVM_INCLUDE_TESTS:BOOL=ON \
+	-DLLVM_BUILD_TESTS:BOOL=ON \
+	\
+	-DLLVM_INCLUDE_EXAMPLES:BOOL=OFF \
+	-DLLVM_BUILD_EXAMPLES:BOOL=OFF \
+	\
+	-DLLVM_INCLUDE_UTILS:BOOL=ON \
+	-DLLVM_INSTALL_UTILS:BOOL=OFF \
+	\
+	-DLLVM_INCLUDE_DOCS:BOOL=OFF \
+	-DLLVM_BUILD_DOCS:BOOL=OFF \
+	-DLLVM_ENABLE_SPHINX:BOOL=OFF \
+	-DLLVM_ENABLE_DOXYGEN:BOOL=OFF \
+	\
+	-DLLVM_BUILD_LLVM_DYLIB:BOOL=ON \
+	-DLLVM_DYLIB_EXPORT_ALL:BOOL=ON \
+	-DLLVM_LINK_LLVM_DYLIB:BOOL=ON \
+	-DLLVM_BUILD_EXTERNAL_COMPILER_RT:BOOL=ON \
+	-DLLVM_INSTALL_TOOLCHAIN_ONLY:BOOL=OFF
+
+make %{?_smp_mflags} VERBOSE=1
 
 %install
+cd _build
 make install DESTDIR=%{buildroot}
 
-# rename the few binaries we're keeping
-mv %{buildroot}%{_bindir}/llvm-config %{buildroot}%{_bindir}/%{name}-config-%{__isa_bits}
-
-pushd %{buildroot}%{_includedir}/mesa-private/llvm/Config
-mv config.h config-%{__isa_bits}.h
-cp -p %{SOURCE2} config.h
-mv llvm-config.h llvm-config-%{__isa_bits}.h
-cp -p %{SOURCE3} llvm-config.h
-popd
-
-file %{buildroot}/%{_bindir}/* %{buildroot}/%{bindir}/*.so | \
-    awk -F: '$2~/ELF/{print $1}' | \
-    xargs -r chrpath -d
-
-# FIXME file this bug
-sed -i 's,ABS_RUN_DIR/lib",ABS_RUN_DIR/%{_lib}/%{name}",' \
-  %{buildroot}%{_bindir}/%{name}-config-%{__isa_bits}
+# fix multi-lib
+mv -v %{buildroot}%{_bindir}/llvm-config %{buildroot}%{_bindir}/%{name}-config-%{__isa_bits}
+mv -v %{buildroot}%{_includedir}/mesa-private/llvm/Config/llvm-config{,-%{__isa_bits}}.h
+install -m 0644 %{SOURCE100} %{buildroot}%{_includedir}/mesa-private/llvm/Config/llvm-config.h
 
 rm -f %{buildroot}%{_libdir}/*.a
 
-rm -f %{buildroot}%{_libdir}/libLLVM-%{version}.so
+rm -f %{buildroot}%{_libdir}/libLLVM.so
 
 # remove documentation makefiles:
 # they require the build directory to work
@@ -155,103 +156,74 @@ ls %{buildroot}%{_libdir}/* | grep -v libLLVM | xargs rm -f
 rm -rf %{buildroot}%{_mandir}/man1
 
 # RHEL: Strip out some headers Mesa doesn't need
-rm -rf %{buildroot}%{_includedir}/mesa-private/llvm/{Analysis,Assembly}
-rm -rf %{buildroot}%{_includedir}/mesa-private/llvm/{DebugInfo,Option}
+rm -rf %{buildroot}%{_includedir}/mesa-private/llvm/{Assembly}
+rm -rf %{buildroot}%{_includedir}/mesa-private/llvm/Option
 rm -rf %{buildroot}%{_includedir}/mesa-private/llvm/TableGen
+rm -rf %{buildroot}%{_includedir}/llvm-c/lto.h
 
 # RHEL: Strip out cmake build foo
 rm -rf %{buildroot}%{_datadir}/llvm/cmake
 
 %check
-# the Koji build server does not seem to have enough RAM
-# for the default 16 threads
-
-# just log the results, don't fail the build
-make check LIT_ARGS="-v -j4" | tee llvm-testlog-%{_arch}.txt
+cd _build
+# 3.8.1 note: skx failures are XFAIL. the skylake backport does not wire
+# up AVX512 for skylake, but the tests are from code that expects that.
+# safe to ignore.
+make check-all || :
 
 %post -p /sbin/ldconfig
 %postun -p /sbin/ldconfig
 
 %files
-%defattr(-,root,root,-)
 %doc LICENSE.TXT
-%{_libdir}/libLLVM-3.6-mesa.so
+%{_libdir}/libLLVM-3.8*-mesa.so
 
 %files devel
-%defattr(-,root,root,-)
 %{_bindir}/%{name}-config-%{__isa_bits}
 %{_includedir}/mesa-private/llvm
 %{_includedir}/mesa-private/llvm-c
 
 %changelog
-* Wed Oct 14 2015 Adam Jackson <ajax@redhat.com> 3.6.2-2
-- Teach CPU detection about Skylake/Broadwell, treat them like Haswell
-
-* Mon Aug 24 2015 Dave Airlie <airlied@redhat.com> 3.6.2-1
-- fix aarch64 bugs via 3.6.2 + patch
-
-* Tue Aug 18 2015 Adam Jackson <ajax@redhat.com> 3.6.1-2
-- Fix large struct return on s390
-
-* Tue May 26 2015 Dave Airlie <airlied@redhat.com> 3.6.1-1
-- rebase to llvm 3.6.1
-
-* Thu May 21 2015 Dave Airlie <airlied@redhat.com> 3.6.0-3
-- backport llvm z13 support from IBM
-
-* Wed May 13 2015 Dave Airlie <airlied@redhat.com> 3.6.0-2
-- mesa needs Object headers now.
-
-* Wed May 13 2015 Dave Airlie <airlied@redhat.com> 3.6.0-1
-- llvm 3.6.0 final
+* Wed Jul 13 2016 Adam Jackson <ajax@redhat.com> - 3.8.1-1
+- Update to 3.8.1
+- Sync some x86 getHostCPUName updates from trunk
 
-* Mon Feb 23 2015 Adam Jackson <ajax@redhat.com> 3.6.0-0.1
-- llvm 3.6.0 rc4
+* Tue Jun 14 2016 Dave Airlie <airlied@redhat.com> - 3.8.0-2
+- drop private cmake build
 
-* Tue Sep 09 2014 Dave Airlie <airlied@redhat.com> 3.5.0-1
-- llvm 3.5.0 final
+* Thu Mar 10 2016 Dave Airlie <airlied@redhat.com> 3.8.0-1
+- llvm 3.8.0 final release
 
-* Wed Aug 27 2014 Adam Jackson <ajax@redhat.com> 3.5.0-0.1.rc3
-- llvm 3.5.0 RC3
+* Thu Mar 03 2016 Dave Airlie <airlied@redhat.com> 3.8.0-0.2
+- llvm 3.8.0 rc3 release
 
-* Wed Aug 27 2014 Dave Airlie <airlied@redhat.com> 3.4.2-1
-- llvm 3.4.2 for RHEL 7.1
+* Fri Feb 19 2016 Dave Airlie <airlied@redhat.com> 3.8.0-0.1
+- llvm 3.8.0 rc2 release
 
-* Tue Jan 28 2014 Adam Jackson <ajax@redhat.com> 3.3-0.8.20131023
-- Disable %%check, only fails in places that don't matter to Mesa (#1028575)
+* Tue Feb 16 2016 Dan Horák <dan[at][danny.cz> 3.7.1-7
+- recognize s390 as SystemZ when configuring build
 
-* Fri Jan 24 2014 Daniel Mach <dmach@redhat.com> - 3.3-0.7.20131023
-- Mass rebuild 2014-01-24
+* Sat Feb 13 2016 Dave Airlie <airlied@redhat.com> 3.7.1-6
+- export C++ API for mesa.
 
-* Fri Dec 27 2013 Daniel Mach <dmach@redhat.com> - 3.3-0.6.20131023
-- Mass rebuild 2013-12-27
+* Sat Feb 13 2016 Dave Airlie <airlied@redhat.com> 3.7.1-5
+- reintroduce llvm-static, clang needs it currently.
 
-* Wed Oct 23 2013 Jerome Glisse <jglisse@redhat.com> 3.3-0.5.20131023
-- 3.3.1 snapshot
+* Fri Feb 12 2016 Dave Airlie <airlied@redhat.com> 3.7.1-4
+- jump back to single llvm library, the split libs aren't working very well.
 
-* Tue Aug 20 2013 Adam Jackson <ajax@redhat.com> 3.3-0.4.rc3
-- Build with -fno-strict-aliasing
+* Fri Feb 05 2016 Dave Airlie <airlied@redhat.com> 3.7.1-3
+- add missing obsoletes (#1303497)
 
-* Tue Jun 18 2013 Adam Jackson <ajax@redhat.com> 3.3-0.3.rc3
-- Port to RHEL6
-- Don't bother building R600 on s390x
+* Thu Feb 04 2016 Fedora Release Engineering <releng@fedoraproject.org> - 3.7.1-2
+- Rebuilt for https://fedoraproject.org/wiki/Fedora_24_Mass_Rebuild
 
-* Tue Jun 11 2013 Adam Jackson <ajax@redhat.com> 3.3-0.2.rc3
-- 3.3 rc3
-- Drop tblgen
-- Strip out some headers
+* Thu Jan 07 2016 Jan Vcelak <jvcelak@fedoraproject.org> 3.7.1-1
+- new upstream release
+- enable gold linker
 
-* Tue May 14 2013 Adam Jackson <ajax@redhat.com> 3.3-0.1.rc1
-- Update to 3.3 rc1
-- Move library to %%{_libdir} to avoid rpath headaches
-- Link with -Bsymbolic and --default-symver
-- --disable-libffi
-- Misc spec cleanup
+* Wed Nov 04 2015 Jan Vcelak <jvcelak@fedoraproject.org> 3.7.0-100
+- fix Requires for subpackages on the main package
 
-* Wed Dec 05 2012 Adam Jackson <ajax@redhat.com> 3.1-13
-- Forked spec for RHEL7 Mesa's private use
-  - no ocaml support
-  - no doxygen build
-  - no clang support
-  - no static archives
-  - no libraries, binaries, or manpages not needed by Mesa
+* Tue Oct 06 2015 Jan Vcelak <jvcelak@fedoraproject.org> 3.7.0-100
+- initial version using cmake build system