From 94c1037926c45da300d0b9ed62e0cc30d7a475a6 Mon Sep 17 00:00:00 2001 From: CentOS Sources Date: Nov 03 2016 06:09:43 +0000 Subject: import mesa-private-llvm-3.8.1-1.el7 --- diff --git a/.gitignore b/.gitignore index 7c6c75c..5105db5 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1 @@ -SOURCES/llvm-3.6.2.src.tar.xz +SOURCES/llvm-3.8.1.src.tar.xz diff --git a/.mesa-private-llvm.metadata b/.mesa-private-llvm.metadata index fb549fa..049decf 100644 --- a/.mesa-private-llvm.metadata +++ b/.mesa-private-llvm.metadata @@ -1 +1 @@ -7a00257eb2bc9431e4c77c3a36b033072c54bc7e SOURCES/llvm-3.6.2.src.tar.xz +e0c48c4c182424b99999367d688cd8ce7876827b SOURCES/llvm-3.8.1.src.tar.xz diff --git a/SOURCES/0001-AArch64-Fix-invalid-use-of-references-to-BuildMI.patch b/SOURCES/0001-AArch64-Fix-invalid-use-of-references-to-BuildMI.patch deleted file mode 100644 index e4ea42c..0000000 --- a/SOURCES/0001-AArch64-Fix-invalid-use-of-references-to-BuildMI.patch +++ /dev/null @@ -1,48 +0,0 @@ -From 5717e28019e7348a04f63dcf965121171da15c62 Mon Sep 17 00:00:00 2001 -From: James Molloy -Date: Thu, 16 Apr 2015 11:37:40 +0000 -Subject: [PATCH] [AArch64] Fix invalid use of references to BuildMI. - -This was found in GCC PR65773 (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=65773). - -We shouldn't be taking a reference to the temporary that BuildMI returns, we must copy it. - -git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@235088 91177308-0d34-0410-b5e6-96231b3b80d8 ---- - lib/Target/AArch64/AArch64InstrInfo.cpp | 6 +++--- - 1 file changed, 3 insertions(+), 3 deletions(-) - -diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp -index 8e0af2d..db231c4 100644 ---- a/lib/Target/AArch64/AArch64InstrInfo.cpp -+++ b/lib/Target/AArch64/AArch64InstrInfo.cpp -@@ -1526,7 +1526,7 @@ void AArch64InstrInfo::copyPhysRegTuple( - } - - for (; SubReg != End; SubReg += Incr) { -- const MachineInstrBuilder &MIB = BuildMI(MBB, I, DL, get(Opcode)); -+ const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode)); - AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI); - AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI); - AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI); -@@ -1904,7 +1904,7 @@ void AArch64InstrInfo::storeRegToStackSlot( - } - assert(Opc && "Unknown register class"); - -- const MachineInstrBuilder &MI = BuildMI(MBB, MBBI, DL, get(Opc)) -+ const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DL, get(Opc)) - .addReg(SrcReg, getKillRegState(isKill)) - .addFrameIndex(FI); - -@@ -2002,7 +2002,7 @@ void AArch64InstrInfo::loadRegFromStackSlot( - } - assert(Opc && "Unknown register class"); - -- const MachineInstrBuilder &MI = BuildMI(MBB, MBBI, DL, get(Opc)) -+ const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DL, get(Opc)) - .addReg(DestReg, getDefRegState(true)) - .addFrameIndex(FI); - if (Offset) --- -2.4.3 - diff --git a/SOURCES/fix-cmake-include.patch b/SOURCES/fix-cmake-include.patch new file mode 100644 index 0000000..842b5c1 --- /dev/null +++ b/SOURCES/fix-cmake-include.patch @@ -0,0 +1,41 @@ +diff -up llvm-3.8.0rc2.src/CMakeLists.txt.fixinc llvm-3.8.0rc2.src/CMakeLists.txt +--- llvm-3.8.0rc2.src/CMakeLists.txt.fixinc 2016-01-14 05:03:44.000000000 +1000 ++++ llvm-3.8.0rc2.src/CMakeLists.txt 2016-02-26 10:21:44.477295728 +1000 +@@ -192,6 +192,7 @@ else() + endif() + + # Each of them corresponds to llvm-config's. ++# + set(LLVM_TOOLS_BINARY_DIR ${LLVM_RUNTIME_OUTPUT_INTDIR}) # --bindir + set(LLVM_LIBRARY_DIR ${LLVM_LIBRARY_OUTPUT_INTDIR}) # --libdir + set(LLVM_MAIN_SRC_DIR ${CMAKE_CURRENT_SOURCE_DIR} ) # --src-root +@@ -558,6 +559,11 @@ set( CMAKE_RUNTIME_OUTPUT_DIRECTORY ${LL + set( CMAKE_LIBRARY_OUTPUT_DIRECTORY ${LLVM_BINARY_DIR}/lib${LLVM_LIBDIR_SUFFIX} ) + set( CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${LLVM_BINARY_DIR}/lib${LLVM_LIBDIR_SUFFIX} ) + ++if(INCLUDE_INSTALL_DIR) ++else() ++set(INCLUDE_INSTALL_DIR ${CMAKE_INSTALL_PREFIX}/include) ++endif() ++ + set(CMAKE_BUILD_WITH_INSTALL_RPATH ON) + if (APPLE) + set(CMAKE_INSTALL_NAME_DIR "@rpath") +@@ -728,7 +734,7 @@ add_subdirectory(cmake/modules) + + if (NOT LLVM_INSTALL_TOOLCHAIN_ONLY) + install(DIRECTORY include/llvm include/llvm-c +- DESTINATION include ++ DESTINATION "${INCLUDE_INSTALL_DIR}" + COMPONENT llvm-headers + FILES_MATCHING + PATTERN "*.def" +@@ -740,7 +746,7 @@ if (NOT LLVM_INSTALL_TOOLCHAIN_ONLY) + ) + + install(DIRECTORY ${LLVM_INCLUDE_DIR}/llvm +- DESTINATION include ++ DESTINATION "${INCLUDE_INSTALL_DIR}" + COMPONENT llvm-headers + FILES_MATCHING + PATTERN "*.def" diff --git a/SOURCES/llvm-2.6-timestamp.patch b/SOURCES/llvm-2.6-timestamp.patch deleted file mode 100644 index ab0979e..0000000 --- a/SOURCES/llvm-2.6-timestamp.patch +++ /dev/null @@ -1,11 +0,0 @@ ---- llvm-2.6/Makefile.rules.timestamp 2009-08-19 18:04:44.000000000 -0400 -+++ llvm-2.6/Makefile.rules 2009-09-09 02:10:38.287389725 -0400 -@@ -672,7 +672,7 @@ - - ProgInstall = $(INSTALL) $(Install.StripFlag) -m 0755 - ScriptInstall = $(INSTALL) -m 0755 --DataInstall = $(INSTALL) -m 0644 -+DataInstall = $(INSTALL) -p -m 0644 - - # When compiling under Mingw/Cygwin, the tblgen tool expects Windows - # paths. In this case, the SYSPATH function (defined in diff --git a/SOURCES/llvm-3.6-large-struct-return.patch b/SOURCES/llvm-3.6-large-struct-return.patch deleted file mode 100644 index d387539..0000000 --- a/SOURCES/llvm-3.6-large-struct-return.patch +++ /dev/null @@ -1,368 +0,0 @@ ------------------------------------------------------------------------- -r244889 | uweigand | 2015-08-13 15:37:06 +0200 (Thu, 13 Aug 2015) | 22 lines - -[SystemZ] Support large LLVM IR struct return values - -Recent mesa/llvmpipe crashes on SystemZ due to a failed assertion when -attempting to compile a routine with a return type of - { <4 x float>, <4 x float>, <4 x float>, <4 x float> } -on a system without vector instruction support. - -This is because after legalizing the vector type, we get a return value -consisting of 16 floats, which cannot all be returned in registers. - -Usually, what should happen in this case is that the target's CanLowerReturn -routine rejects the return type, in which case SelectionDAG falls back to -implementing a structure return in memory via implicit reference. - -However, the SystemZ target never actually implemented any CanLowerReturn -routine, and thus would accept any struct return type. - -This patch fixes the crash by implementing CanLowerReturn. As a side effect, -this also handles fp128 return values, fixing a todo that was noted in -SystemZCallingConv.td. - -Index: llvm-36/lib/Target/SystemZ/SystemZCallingConv.td -=================================================================== ---- llvm-36.orig/lib/Target/SystemZ/SystemZCallingConv.td -+++ llvm-36/lib/Target/SystemZ/SystemZCallingConv.td -@@ -53,10 +53,6 @@ def RetCC_SystemZ : CallingConv<[ - CCIfSubtarget<"hasVector()", - CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], - CCAssignToReg<[V24, V26, V28, V30, V25, V27, V29, V31]>>> -- -- // ABI-compliant code returns long double by reference, but that conversion -- // is left to higher-level code. Perhaps we could add an f128 definition -- // here for code that doesn't care about the ABI? - ]>; - - //===----------------------------------------------------------------------===// -Index: llvm-36/lib/Target/SystemZ/SystemZISelLowering.cpp -=================================================================== ---- llvm-36.orig/lib/Target/SystemZ/SystemZISelLowering.cpp -+++ llvm-36/lib/Target/SystemZ/SystemZISelLowering.cpp -@@ -1169,6 +1169,20 @@ SystemZTargetLowering::LowerCall(CallLow - return Chain; - } - -+bool SystemZTargetLowering:: -+CanLowerReturn(CallingConv::ID CallConv, -+ MachineFunction &MF, bool isVarArg, -+ const SmallVectorImpl &Outs, -+ LLVMContext &Context) const { -+ // Detect unsupported vector return types. -+ if (Subtarget.hasVector()) -+ VerifyVectorTypes(Outs); -+ -+ SmallVector RetLocs; -+ CCState RetCCInfo(CallConv, isVarArg, MF, RetLocs, Context); -+ return RetCCInfo.CheckReturn(Outs, RetCC_SystemZ); -+} -+ - SDValue - SystemZTargetLowering::LowerReturn(SDValue Chain, - CallingConv::ID CallConv, bool IsVarArg, -Index: llvm-36/lib/Target/SystemZ/SystemZISelLowering.h -=================================================================== ---- llvm-36.orig/lib/Target/SystemZ/SystemZISelLowering.h -+++ llvm-36/lib/Target/SystemZ/SystemZISelLowering.h -@@ -401,6 +401,10 @@ public: - SDValue LowerCall(CallLoweringInfo &CLI, - SmallVectorImpl &InVals) const override; - -+ bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, -+ bool isVarArg, -+ const SmallVectorImpl &Outs, -+ LLVMContext &Context) const override; - SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, - const SmallVectorImpl &Outs, - const SmallVectorImpl &OutVals, -Index: llvm-36/test/CodeGen/SystemZ/args-04.ll -=================================================================== ---- llvm-36.orig/test/CodeGen/SystemZ/args-04.ll -+++ llvm-36/test/CodeGen/SystemZ/args-04.ll -@@ -124,3 +124,17 @@ define void @f13(fp128 *%r2, i16 %r3, i3 - store fp128 %y, fp128 *%r2 - ret void - } -+ -+; Explicit fp128 return values are likewise passed indirectly. -+define fp128 @f14(fp128 %r3) { -+; CHECK-LABEL: f14: -+; CHECK: ld %f0, 0(%r3) -+; CHECK: ld %f2, 8(%r3) -+; CHECK: axbr %f0, %f0 -+; CHECK: std %f0, 0(%r2) -+; CHECK: std %f2, 8(%r2) -+; CHECK: br %r14 -+ %y = fadd fp128 %r3, %r3 -+ ret fp128 %y -+} -+ -Index: llvm-36/test/CodeGen/SystemZ/args-07.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/args-07.ll -@@ -0,0 +1,60 @@ -+; Test multiple return values (LLVM ABI extension) -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -+ -+; Up to four integer return values fit into GPRs. -+define { i64, i64, i64, i64 } @f1() { -+; CHECK-LABEL: f1: -+; CHECK: lghi %r2, 0 -+; CHECK: lghi %r3, 1 -+; CHECK: lghi %r4, 2 -+; CHECK: lghi %r5, 3 -+; CHECK: br %r14 -+ ret { i64, i64, i64, i64 } { i64 0, i64 1, i64 2, i64 3 } -+} -+ -+; More than four integer return values use sret. -+define { i64, i64, i64, i64, i64 } @f2() { -+; CHECK-LABEL: f2: -+; CHECK: mvghi 32(%r2), 4 -+; CHECK: mvghi 24(%r2), 3 -+; CHECK: mvghi 16(%r2), 2 -+; CHECK: mvghi 8(%r2), 1 -+; CHECK: mvghi 0(%r2), 0 -+; CHECK: br %r14 -+ ret { i64, i64, i64, i64, i64 } { i64 0, i64 1, i64 2, i64 3, i64 4 } -+} -+ -+; Up to four floating-point return values fit into FPRs. -+define { double, double, double, double } @f3() { -+; CHECK-LABEL: f3: -+; CHECK: larl [[TMP:%r[0-5]]], .LCPI -+; CHECK: ldeb %f0, 0([[TMP]]) -+; CHECK: larl [[TMP:%r[0-5]]], .LCPI -+; CHECK: ldeb %f2, 0([[TMP]]) -+; CHECK: larl [[TMP:%r[0-5]]], .LCPI -+; CHECK: ldeb %f4, 0([[TMP]]) -+; CHECK: larl [[TMP:%r[0-5]]], .LCPI -+; CHECK: ldeb %f6, 0([[TMP]]) -+; CHECK: br %r14 -+ ret { double, double, double, double } -+ { double 1.0, double 2.0, double 3.0, double 4.0 } -+} -+ -+; More than four floating-point return values use sret. -+define { double, double, double, double, double } @f4() { -+; CHECK-LABEL: f4: -+; CHECK: llihh [[TMP:%r[0-5]]], 16404 -+; CHECK: stg [[TMP]], 32(%r2) -+; CHECK: llihh [[TMP:%r[0-5]]], 16400 -+; CHECK: stg [[TMP]], 24(%r2) -+; CHECK: llihh [[TMP:%r[0-5]]], 16392 -+; CHECK: stg [[TMP]], 16(%r2) -+; CHECK: llihh [[TMP:%r[0-5]]], 16384 -+; CHECK: stg [[TMP]], 8(%r2) -+; CHECK: llihh [[TMP:%r[0-5]]], 16368 -+; CHECK: stg [[TMP]], 0(%r2) -+; CHECK: br %r14 -+ ret { double, double, double, double, double } -+ { double 1.0, double 2.0, double 3.0, double 4.0, double 5.0 } -+} -Index: llvm-36/test/CodeGen/SystemZ/args-08.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/args-08.ll -@@ -0,0 +1,57 @@ -+; Test calling functions with multiple return values (LLVM ABI extension) -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -+ -+; Up to four integer return values fit into GPRs. -+declare { i64, i64, i64, i64 } @bar1() -+ -+define i64 @f1() { -+; CHECK-LABEL: f1: -+; CHECK: brasl %r14, bar1 -+; CHECK: lgr %r2, %r5 -+; CHECK: br %r14 -+ %mret = call { i64, i64, i64, i64 } @bar1() -+ %ret = extractvalue { i64, i64, i64, i64 } %mret, 3 -+ ret i64 %ret -+} -+ -+; More than four integer return values use sret. -+declare { i64, i64, i64, i64, i64 } @bar2() -+ -+define i64 @f2() { -+; CHECK-LABEL: f2: -+; CHECK: la %r2, 160(%r15) -+; CHECK: brasl %r14, bar2 -+; CHECK: lg %r2, 192(%r15) -+; CHECK: br %r14 -+ %mret = call { i64, i64, i64, i64, i64 } @bar2() -+ %ret = extractvalue { i64, i64, i64, i64, i64 } %mret, 4 -+ ret i64 %ret -+} -+ -+; Up to four floating-point return values fit into GPRs. -+declare { double, double, double, double } @bar3() -+ -+define double @f3() { -+; CHECK-LABEL: f3: -+; CHECK: brasl %r14, bar3 -+; CHECK: ldr %f0, %f6 -+; CHECK: br %r14 -+ %mret = call { double, double, double, double } @bar3() -+ %ret = extractvalue { double, double, double, double } %mret, 3 -+ ret double %ret -+} -+ -+; More than four integer return values use sret. -+declare { double, double, double, double, double } @bar4() -+ -+define double @f4() { -+; CHECK-LABEL: f4: -+; CHECK: la %r2, 160(%r15) -+; CHECK: brasl %r14, bar4 -+; CHECK: ld %f0, 192(%r15) -+; CHECK: br %r14 -+ %mret = call { double, double, double, double, double } @bar4() -+ %ret = extractvalue { double, double, double, double, double } %mret, 4 -+ ret double %ret -+} -Index: llvm-36/test/CodeGen/SystemZ/vec-args-06.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-args-06.ll -@@ -0,0 +1,83 @@ -+; Test multiple return values (LLVM ABI extension) -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -+ -+; Up to eight vector return values fit into VRs. -+define { <2 x double>, <2 x double>, <2 x double>, <2 x double>, -+ <2 x double>, <2 x double>, <2 x double>, <2 x double> } @f1() { -+; CHECK-LABEL: f1: -+; CHECK: larl [[TMP:%r[0-5]]], .LCPI -+; CHECK: vl %v24, 0([[TMP]]) -+; CHECK: larl [[TMP:%r[0-5]]], .LCPI -+; CHECK: vl %v26, 0([[TMP]]) -+; CHECK: larl [[TMP:%r[0-5]]], .LCPI -+; CHECK: vl %v28, 0([[TMP]]) -+; CHECK: larl [[TMP:%r[0-5]]], .LCPI -+; CHECK: vl %v30, 0([[TMP]]) -+; CHECK: larl [[TMP:%r[0-5]]], .LCPI -+; CHECK: vl %v25, 0([[TMP]]) -+; CHECK: larl [[TMP:%r[0-5]]], .LCPI -+; CHECK: vl %v27, 0([[TMP]]) -+; CHECK: larl [[TMP:%r[0-5]]], .LCPI -+; CHECK: vl %v29, 0([[TMP]]) -+; CHECK: larl [[TMP:%r[0-5]]], .LCPI -+; CHECK: vl %v31, 0([[TMP]]) -+; CHECK: br %r14 -+ ret { <2 x double>, <2 x double>, <2 x double>, <2 x double>, -+ <2 x double>, <2 x double>, <2 x double>, <2 x double> } -+ { <2 x double> , -+ <2 x double> , -+ <2 x double> , -+ <2 x double> , -+ <2 x double> , -+ <2 x double> , -+ <2 x double> , -+ <2 x double> } -+} -+ -+; More than eight vector return values use sret. -+define { <2 x double>, <2 x double>, <2 x double>, <2 x double>, -+ <2 x double>, <2 x double>, <2 x double>, <2 x double>, -+ <2 x double> } @f2() { -+; CHECK-LABEL: f2: -+; CHECK: larl [[TMP:%r[0-5]]], .LCPI -+; CHECK: vl [[VTMP:%v[0-9]+]], 0([[TMP]]) -+; CHECK: vst [[VTMP]], 128(%r2) -+; CHECK: larl [[TMP:%r[0-5]]], .LCPI -+; CHECK: vl [[VTMP:%v[0-9]+]], 0([[TMP]]) -+; CHECK: vst [[VTMP]], 112(%r2) -+; CHECK: larl [[TMP:%r[0-5]]], .LCPI -+; CHECK: vl [[VTMP:%v[0-9]+]], 0([[TMP]]) -+; CHECK: vst [[VTMP]], 96(%r2) -+; CHECK: larl [[TMP:%r[0-5]]], .LCPI -+; CHECK: vl [[VTMP:%v[0-9]+]], 0([[TMP]]) -+; CHECK: vst [[VTMP]], 80(%r2) -+; CHECK: larl [[TMP:%r[0-5]]], .LCPI -+; CHECK: vl [[VTMP:%v[0-9]+]], 0([[TMP]]) -+; CHECK: vst [[VTMP]], 64(%r2) -+; CHECK: larl [[TMP:%r[0-5]]], .LCPI -+; CHECK: vl [[VTMP:%v[0-9]+]], 0([[TMP]]) -+; CHECK: vst [[VTMP]], 48(%r2) -+; CHECK: larl [[TMP:%r[0-5]]], .LCPI -+; CHECK: vl [[VTMP:%v[0-9]+]], 0([[TMP]]) -+; CHECK: vst [[VTMP]], 32(%r2) -+; CHECK: larl [[TMP:%r[0-5]]], .LCPI -+; CHECK: vl [[VTMP:%v[0-9]+]], 0([[TMP]]) -+; CHECK: vst [[VTMP]], 16(%r2) -+; CHECK: larl [[TMP:%r[0-5]]], .LCPI -+; CHECK: vl [[VTMP:%v[0-9]+]], 0([[TMP]]) -+; CHECK: vst [[VTMP]], 0(%r2) -+; CHECK: br %r14 -+ ret { <2 x double>, <2 x double>, <2 x double>, <2 x double>, -+ <2 x double>, <2 x double>, <2 x double>, <2 x double>, -+ <2 x double> } -+ { <2 x double> , -+ <2 x double> , -+ <2 x double> , -+ <2 x double> , -+ <2 x double> , -+ <2 x double> , -+ <2 x double> , -+ <2 x double> , -+ <2 x double> } -+} -Index: llvm-36/test/CodeGen/SystemZ/vec-args-07.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-args-07.ll -@@ -0,0 +1,47 @@ -+; Test calling functions with multiple return values (LLVM ABI extension) -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -+ -+; Up to eight vector return values fit into VRs. -+declare { <2 x double>, <2 x double>, <2 x double>, <2 x double>, -+ <2 x double>, <2 x double>, <2 x double>, <2 x double> } @bar1() -+ -+define <2 x double> @f1() { -+; CHECK-LABEL: f1: -+; CHECK: brasl %r14, bar1 -+; CHECK: vlr %v24, %v31 -+; CHECK: br %r14 -+ %mret = call { <2 x double>, <2 x double>, -+ <2 x double>, <2 x double>, -+ <2 x double>, <2 x double>, -+ <2 x double>, <2 x double> } @bar1() -+ %ret = extractvalue { <2 x double>, <2 x double>, -+ <2 x double>, <2 x double>, -+ <2 x double>, <2 x double>, -+ <2 x double>, <2 x double> } %mret, 7 -+ ret <2 x double> %ret -+} -+ -+; More than eight vector return values use sret. -+declare { <2 x double>, <2 x double>, <2 x double>, <2 x double>, -+ <2 x double>, <2 x double>, <2 x double>, <2 x double>, -+ <2 x double> } @bar2() -+ -+define <2 x double> @f2() { -+; CHECK-LABEL: f2: -+; CHECK: la %r2, 160(%r15) -+; CHECK: brasl %r14, bar2 -+; CHECK: vl %v24, 288(%r15) -+; CHECK: br %r14 -+ %mret = call { <2 x double>, <2 x double>, -+ <2 x double>, <2 x double>, -+ <2 x double>, <2 x double>, -+ <2 x double>, <2 x double>, -+ <2 x double> } @bar2() -+ %ret = extractvalue { <2 x double>, <2 x double>, -+ <2 x double>, <2 x double>, -+ <2 x double>, <2 x double>, -+ <2 x double>, <2 x double>, -+ <2 x double> } %mret, 8 -+ ret <2 x double> %ret -+} diff --git a/SOURCES/llvm-3.6.2-nerf-skylake.patch b/SOURCES/llvm-3.6.2-nerf-skylake.patch deleted file mode 100644 index 5f8c3c4..0000000 --- a/SOURCES/llvm-3.6.2-nerf-skylake.patch +++ /dev/null @@ -1,28 +0,0 @@ -Skylake Pentium has the charming property of not supporting AVX, and -getHostCPUName will return 'x86-64' since it doesn't know about skl at -all in 3.6.x. This confuses llvmpipe quite badly, as we'll emit SSE4.1 -intrinsics but llvm will think they're not valid, and we'll cough and -die with a "Cannot select" message. - -Fix this by treating Skylake (and Broadwell, which also isn't present -in 3.6) as if they were Haswell. This isn't quite what upstream does, -but upstream has changed this API a bit and introduced a getHostCPUFeatures -to complement it, and while it looks like a much better approach it's -quite a bit more invasive. - -diff -up llvm-3.6.2.src/lib/Support/Host.cpp.jx llvm-3.6.2.src/lib/Support/Host.cpp ---- llvm-3.6.2.src/lib/Support/Host.cpp.jx 2015-10-01 12:08:39.000000000 -0400 -+++ llvm-3.6.2.src/lib/Support/Host.cpp 2015-10-13 10:51:03.736425351 -0400 -@@ -362,6 +362,12 @@ StringRef sys::getHostCPUName() { - case 63: - case 69: - case 70: -+ // Broadwell: -+ case 61: -+ case 71: -+ // Skylake: -+ case 78: -+ case 94: - // Not all Haswell processors support AVX too (such as the Pentium - // versions instead of the i7 versions). - return HasAVX2 ? "core-avx2" : "corei7"; diff --git a/SOURCES/llvm-3.8.1-rhel-7.3.patch b/SOURCES/llvm-3.8.1-rhel-7.3.patch new file mode 100644 index 0000000..4caea83 --- /dev/null +++ b/SOURCES/llvm-3.8.1-rhel-7.3.patch @@ -0,0 +1,344 @@ +diff --git a/lib/Support/Host.cpp b/lib/Support/Host.cpp +index c0f9e07..94bf580 100644 +--- a/lib/Support/Host.cpp ++++ b/lib/Support/Host.cpp +@@ -290,107 +290,112 @@ StringRef sys::getHostCPUName() { + } + case 6: + switch (Model) { +- case 1: // Pentium Pro processor ++ case 0x01: // Pentium Pro processor + return "pentiumpro"; + +- case 3: // Intel Pentium II OverDrive processor, Pentium II processor, +- // model 03 +- case 5: // Pentium II processor, model 05, Pentium II Xeon processor, +- // model 05, and Intel Celeron processor, model 05 +- case 6: // Celeron processor, model 06 ++ case 0x03: // Intel Pentium II OverDrive processor, Pentium II processor, ++ // model 03 ++ case 0x05: // Pentium II processor, model 05, Pentium II Xeon processor, ++ // model 05, and Intel Celeron processor, model 05 ++ case 0x06: // Celeron processor, model 06 + return "pentium2"; + +- case 7: // Pentium III processor, model 07, and Pentium III Xeon +- // processor, model 07 +- case 8: // Pentium III processor, model 08, Pentium III Xeon processor, +- // model 08, and Celeron processor, model 08 +- case 10: // Pentium III Xeon processor, model 0Ah +- case 11: // Pentium III processor, model 0Bh ++ case 0x07: // Pentium III processor, model 07, and Pentium III Xeon ++ // processor, model 07 ++ case 0x08: // Pentium III processor, model 08, Pentium III Xeon processor, ++ // model 08, and Celeron processor, model 08 ++ case 0x0a: // Pentium III Xeon processor, model 0Ah ++ case 0x0b: // Pentium III processor, model 0Bh + return "pentium3"; + +- case 9: // Intel Pentium M processor, Intel Celeron M processor model 09. +- case 13: // Intel Pentium M processor, Intel Celeron M processor, model +- // 0Dh. All processors are manufactured using the 90 nm process. +- case 21: // Intel EP80579 Integrated Processor and Intel EP80579 +- // Integrated Processor with Intel QuickAssist Technology ++ case 0x09: // Intel Pentium M processor, Intel Celeron M processor model 09. ++ case 0x0d: // Intel Pentium M processor, Intel Celeron M processor, model ++ // 0Dh. All processors are manufactured using the 90 nm process. ++ case 0x15: // Intel EP80579 Integrated Processor and Intel EP80579 ++ // Integrated Processor with Intel QuickAssist Technology + return "pentium-m"; + +- case 14: // Intel Core Duo processor, Intel Core Solo processor, model +- // 0Eh. All processors are manufactured using the 65 nm process. ++ case 0x0e: // Intel Core Duo processor, Intel Core Solo processor, model ++ // 0Eh. All processors are manufactured using the 65 nm process. + return "yonah"; + +- case 15: // Intel Core 2 Duo processor, Intel Core 2 Duo mobile +- // processor, Intel Core 2 Quad processor, Intel Core 2 Quad +- // mobile processor, Intel Core 2 Extreme processor, Intel +- // Pentium Dual-Core processor, Intel Xeon processor, model +- // 0Fh. All processors are manufactured using the 65 nm process. +- case 22: // Intel Celeron processor model 16h. All processors are +- // manufactured using the 65 nm process ++ case 0x0f: // Intel Core 2 Duo processor, Intel Core 2 Duo mobile ++ // processor, Intel Core 2 Quad processor, Intel Core 2 Quad ++ // mobile processor, Intel Core 2 Extreme processor, Intel ++ // Pentium Dual-Core processor, Intel Xeon processor, model ++ // 0Fh. All processors are manufactured using the 65 nm process. ++ case 0x16: // Intel Celeron processor model 16h. All processors are ++ // manufactured using the 65 nm process + return "core2"; + +- case 23: // Intel Core 2 Extreme processor, Intel Xeon processor, model +- // 17h. All processors are manufactured using the 45 nm process. +- // +- // 45nm: Penryn , Wolfdale, Yorkfield (XE) +- case 29: // Intel Xeon processor MP. All processors are manufactured using +- // the 45 nm process. ++ case 0x17: // Intel Core 2 Extreme processor, Intel Xeon processor, model ++ // 17h. All processors are manufactured using the 45 nm process. ++ // ++ // 45nm: Penryn , Wolfdale, Yorkfield (XE) ++ case 0x1d: // Intel Xeon processor MP. All processors are manufactured using ++ // the 45 nm process. + return "penryn"; + +- case 26: // Intel Core i7 processor and Intel Xeon processor. All +- // processors are manufactured using the 45 nm process. +- case 30: // Intel(R) Core(TM) i7 CPU 870 @ 2.93GHz. +- // As found in a Summer 2010 model iMac. +- case 46: // Nehalem EX ++ case 0x1a: // Intel Core i7 processor and Intel Xeon processor. All ++ // processors are manufactured using the 45 nm process. ++ case 0x1e: // Intel(R) Core(TM) i7 CPU 870 @ 2.93GHz. ++ // As found in a Summer 2010 model iMac. ++ case 0x2e: // Nehalem EX + return "nehalem"; +- case 37: // Intel Core i7, laptop version. +- case 44: // Intel Core i7 processor and Intel Xeon processor. All +- // processors are manufactured using the 32 nm process. +- case 47: // Westmere EX ++ case 0x25: // Intel Core i7, laptop version. ++ case 0x2c: // Intel Core i7 processor and Intel Xeon processor. All ++ // processors are manufactured using the 32 nm process. ++ case 0x2f: // Westmere EX + return "westmere"; + +- // SandyBridge: +- case 42: // Intel Core i7 processor. All processors are manufactured +- // using the 32 nm process. +- case 45: ++ case 0x2a: // Intel Core i7 processor. All processors are manufactured ++ // using the 32 nm process. ++ case 0x2d: + return "sandybridge"; + +- // Ivy Bridge: +- case 58: +- case 62: // Ivy Bridge EP ++ case 0x3a: ++ case 0x3e: // Ivy Bridge EP + return "ivybridge"; + + // Haswell: +- case 60: +- case 63: +- case 69: +- case 70: ++ case 0x3c: ++ case 0x3f: ++ case 0x45: ++ case 0x46: + return "haswell"; + + // Broadwell: +- case 61: +- case 71: ++ case 0x3d: ++ case 0x47: ++ case 0x4f: ++ case 0x56: + return "broadwell"; + + // Skylake: +- case 78: +- case 94: ++ case 0x4e: ++ // return "skylake-avx512"; ++ case 0x5e: + return "skylake"; + +- case 28: // Most 45 nm Intel Atom processors +- case 38: // 45 nm Atom Lincroft +- case 39: // 32 nm Atom Medfield +- case 53: // 32 nm Atom Midview +- case 54: // 32 nm Atom Midview ++ case 0x1c: // Most 45 nm Intel Atom processors ++ case 0x26: // 45 nm Atom Lincroft ++ case 0x27: // 32 nm Atom Medfield ++ case 0x35: // 32 nm Atom Midview ++ case 0x36: // 32 nm Atom Midview + return "bonnell"; + + // Atom Silvermont codes from the Intel software optimization guide. +- case 55: +- case 74: +- case 77: +- case 90: +- case 93: ++ case 0x37: ++ case 0x4a: ++ case 0x4d: ++ case 0x5a: ++ case 0x5d: ++ case 0x4c: // really airmont + return "silvermont"; + ++ case 0x57: ++ return "knl"; ++ + default: // Unknown family 6 CPU, try to guess. + if (HasAVX512) + return "knl"; +@@ -823,6 +828,7 @@ bool sys::getHostCPUFeatures(StringMap &Features) { + Features["avx512cd"] = HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save; + Features["avx512bw"] = HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save; + Features["avx512vl"] = HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save; ++ Features["avx512vbmi"] = HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save; + + bool HasLeafD = MaxLevel >= 0xd && + !GetX86CpuIDAndInfoEx(0xd, 0x1, &EAX, &EBX, &ECX, &EDX); +diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td +index 8902a85..9b1bf43 100644 +--- a/lib/Target/X86/X86.td ++++ b/lib/Target/X86/X86.td +@@ -134,6 +134,9 @@ def FeatureBWI : SubtargetFeature<"avx512bw", "HasBWI", "true", + def FeatureVLX : SubtargetFeature<"avx512vl", "HasVLX", "true", + "Enable AVX-512 Vector Length eXtensions", + [FeatureAVX512]>; ++def FeatureVBMI : SubtargetFeature<"avx512vbmi", "HasVBMI", "true", ++ "Enable AVX-512 Vector Bit Manipulation Instructions", ++ [FeatureAVX512]>; + def FeaturePKU : SubtargetFeature<"pku", "HasPKU", "true", + "Enable protection keys">; + def FeaturePCLMUL : SubtargetFeature<"pclmul", "HasPCLMUL", "true", +@@ -454,6 +457,9 @@ class BroadwellProc : ProcessorModel; + def : BroadwellProc<"broadwell">; + ++def : HaswellProc<"skylake">; // RHEL mustard ++def : HaswellProc<"skx">; // RHEL mustard ++ + // FIXME: define KNL model + class KnightsLandingProc : ProcessorModel : ProcessorModel; + def : KnightsLandingProc<"knl">; + +-// FIXME: define SKX model +-class SkylakeProc : ProcessorModel; +-def : SkylakeProc<"skylake">; +-def : SkylakeProc<"skx">; // Legacy alias. +- +- + // AMD CPUs. + + def : Proc<"k6", [FeatureSlowUAMem16, FeatureMMX]>; +diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td +index 9c8339a..eed4319 100644 +--- a/lib/Target/X86/X86InstrInfo.td ++++ b/lib/Target/X86/X86InstrInfo.td +@@ -773,7 +773,7 @@ def HasVLX : Predicate<"Subtarget->hasVLX()">, + def NoVLX : Predicate<"!Subtarget->hasVLX()">; + def NoVLX_Or_NoBWI : Predicate<"!Subtarget->hasVLX() || !Subtarget->hasBWI()">; + def NoVLX_Or_NoDQI : Predicate<"!Subtarget->hasVLX() || !Subtarget->hasDQI()">; +-def PKU : Predicate<"!Subtarget->hasPKU()">; ++def PKU : Predicate<"Subtarget->hasPKU()">; + + def HasPOPCNT : Predicate<"Subtarget->hasPOPCNT()">; + def HasAES : Predicate<"Subtarget->hasAES()">; +@@ -795,6 +795,7 @@ def HasFSGSBase : Predicate<"Subtarget->hasFSGSBase()">; + def HasLZCNT : Predicate<"Subtarget->hasLZCNT()">; + def HasBMI : Predicate<"Subtarget->hasBMI()">; + def HasBMI2 : Predicate<"Subtarget->hasBMI2()">; ++def HasVBMI : Predicate<"Subtarget->hasVBMI()">; + def HasRTM : Predicate<"Subtarget->hasRTM()">; + def HasHLE : Predicate<"Subtarget->hasHLE()">; + def HasTSX : Predicate<"Subtarget->hasRTM() || Subtarget->hasHLE()">; +diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp +index 8ef08c9..739de59 100644 +--- a/lib/Target/X86/X86Subtarget.cpp ++++ b/lib/Target/X86/X86Subtarget.cpp +@@ -261,6 +261,7 @@ void X86Subtarget::initializeEnvironment() { + HasLZCNT = false; + HasBMI = false; + HasBMI2 = false; ++ HasVBMI = false; + HasRTM = false; + HasHLE = false; + HasERI = false; +diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h +index 13d1026..c1adb44 100644 +--- a/lib/Target/X86/X86Subtarget.h ++++ b/lib/Target/X86/X86Subtarget.h +@@ -134,6 +134,9 @@ protected: + /// Processor has BMI2 instructions. + bool HasBMI2; + ++ /// Processor has VBMI instructions. ++ bool HasVBMI; ++ + /// Processor has RTM instructions. + bool HasRTM; + +@@ -374,6 +377,7 @@ public: + bool hasLZCNT() const { return HasLZCNT; } + bool hasBMI() const { return HasBMI; } + bool hasBMI2() const { return HasBMI2; } ++ bool hasVBMI() const { return HasVBMI; } + bool hasRTM() const { return HasRTM; } + bool hasHLE() const { return HasHLE; } + bool hasADX() const { return HasADX; } +diff --git a/test/CodeGen/X86/slow-unaligned-mem.ll b/test/CodeGen/X86/slow-unaligned-mem.ll +index 27cbef6..c25435b 100644 +--- a/test/CodeGen/X86/slow-unaligned-mem.ll ++++ b/test/CodeGen/X86/slow-unaligned-mem.ll +@@ -14,15 +14,14 @@ + + ; Intel chips with fast unaligned memory accesses + +-; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=silvermont 2>&1 | FileCheck %s --check-prefix=FAST +-; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=nehalem 2>&1 | FileCheck %s --check-prefix=FAST +-; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=westmere 2>&1 | FileCheck %s --check-prefix=FAST +-; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=sandybridge 2>&1 | FileCheck %s --check-prefix=FAST +-; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=ivybridge 2>&1 | FileCheck %s --check-prefix=FAST +-; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=haswell 2>&1 | FileCheck %s --check-prefix=FAST +-; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=broadwell 2>&1 | FileCheck %s --check-prefix=FAST +-; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=knl 2>&1 | FileCheck %s --check-prefix=FAST +-; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=skylake 2>&1 | FileCheck %s --check-prefix=FAST ++; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=silvermont 2>&1 | FileCheck %s --check-prefix=FAST ++; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=nehalem 2>&1 | FileCheck %s --check-prefix=FAST ++; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=westmere 2>&1 | FileCheck %s --check-prefix=FAST ++; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=sandybridge 2>&1 | FileCheck %s --check-prefix=FAST ++; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=ivybridge 2>&1 | FileCheck %s --check-prefix=FAST ++; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=haswell 2>&1 | FileCheck %s --check-prefix=FAST ++; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=broadwell 2>&1 | FileCheck %s --check-prefix=FAST ++; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=knl 2>&1 | FileCheck %s --check-prefix=FAST + + ; AMD chips with slow unaligned memory accesses + diff --git a/SOURCES/llvm-Config-config.h b/SOURCES/llvm-Config-config.h deleted file mode 100644 index c369b45..0000000 --- a/SOURCES/llvm-Config-config.h +++ /dev/null @@ -1,9 +0,0 @@ -#include - -#if __WORDSIZE == 32 -#include "config-32.h" -#elif __WORDSIZE == 64 -#include "config-64.h" -#else -#error "Unknown word size" -#endif diff --git a/SOURCES/llvm-Config-llvm-config.h b/SOURCES/llvm-Config-llvm-config.h deleted file mode 100644 index 2fa08c9..0000000 --- a/SOURCES/llvm-Config-llvm-config.h +++ /dev/null @@ -1,9 +0,0 @@ -#include - -#if __WORDSIZE == 32 -#include "llvm-config-32.h" -#elif __WORDSIZE == 64 -#include "llvm-config-64.h" -#else -#error "Unknown word size" -#endif diff --git a/SOURCES/llvm-config.h b/SOURCES/llvm-config.h new file mode 100644 index 0000000..2fa08c9 --- /dev/null +++ b/SOURCES/llvm-config.h @@ -0,0 +1,9 @@ +#include + +#if __WORDSIZE == 32 +#include "llvm-config-32.h" +#elif __WORDSIZE == 64 +#include "llvm-config-64.h" +#else +#error "Unknown word size" +#endif diff --git a/SOURCES/llvm-z13-backports.patch b/SOURCES/llvm-z13-backports.patch deleted file mode 100644 index c6aebb4..0000000 --- a/SOURCES/llvm-z13-backports.patch +++ /dev/null @@ -1,39981 +0,0 @@ -This patch backports z13 support and a number of other SystemZ -enhancements to the LLVM 3.6 release branch. - -The patch consists of backports of the following mainline revisions: -229652, 229654, 229658, 233540, 233541, 233688, 233689, 233690, 233700, -233736, 233803, 236430, 236432, 236433, 236520, 236521, 236522, 236523, -236524, 236525, 236526, 236527, 236528, 236529, 236530 - -Index: llvm-36/include/llvm/IR/Intrinsics.td -=================================================================== ---- llvm-36.orig/include/llvm/IR/Intrinsics.td -+++ llvm-36/include/llvm/IR/Intrinsics.td -@@ -594,3 +594,4 @@ include "llvm/IR/IntrinsicsHexagon.td" - include "llvm/IR/IntrinsicsNVVM.td" - include "llvm/IR/IntrinsicsMips.td" - include "llvm/IR/IntrinsicsR600.td" -+include "llvm/IR/IntrinsicsSystemZ.td" -Index: llvm-36/include/llvm/IR/IntrinsicsSystemZ.td -=================================================================== ---- /dev/null -+++ llvm-36/include/llvm/IR/IntrinsicsSystemZ.td -@@ -0,0 +1,378 @@ -+//===- IntrinsicsSystemZ.td - Defines SystemZ intrinsics ---*- tablegen -*-===// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+//===----------------------------------------------------------------------===// -+// -+// This file defines all of the SystemZ-specific intrinsics. -+// -+//===----------------------------------------------------------------------===// -+ -+class SystemZUnaryConv -+ : GCCBuiltin<"__builtin_s390_" ## name>, -+ Intrinsic<[result], [arg], [IntrNoMem]>; -+ -+class SystemZUnary -+ : SystemZUnaryConv; -+ -+class SystemZUnaryConvCC -+ : Intrinsic<[result, llvm_i32_ty], [arg], [IntrNoMem]>; -+ -+class SystemZUnaryCC -+ : SystemZUnaryConvCC; -+ -+class SystemZBinaryConv -+ : GCCBuiltin<"__builtin_s390_" ## name>, -+ Intrinsic<[result], [arg, arg], [IntrNoMem]>; -+ -+class SystemZBinary -+ : SystemZBinaryConv; -+ -+class SystemZBinaryInt -+ : GCCBuiltin<"__builtin_s390_" ## name>, -+ Intrinsic<[type], [type, llvm_i32_ty], [IntrNoMem]>; -+ -+class SystemZBinaryConvCC -+ : Intrinsic<[result, llvm_i32_ty], [arg, arg], [IntrNoMem]>; -+ -+class SystemZBinaryConvIntCC -+ : Intrinsic<[result, llvm_i32_ty], [arg, llvm_i32_ty], [IntrNoMem]>; -+ -+class SystemZBinaryCC -+ : SystemZBinaryConvCC; -+ -+class SystemZTernaryConv -+ : GCCBuiltin<"__builtin_s390_" ## name>, -+ Intrinsic<[result], [arg, arg, result], [IntrNoMem]>; -+ -+class SystemZTernary -+ : SystemZTernaryConv; -+ -+class SystemZTernaryInt -+ : GCCBuiltin<"__builtin_s390_" ## name>, -+ Intrinsic<[type], [type, type, llvm_i32_ty], [IntrNoMem]>; -+ -+class SystemZTernaryIntCC -+ : Intrinsic<[type, llvm_i32_ty], [type, type, llvm_i32_ty], [IntrNoMem]>; -+ -+class SystemZQuaternaryInt -+ : GCCBuiltin<"__builtin_s390_" ## name>, -+ Intrinsic<[type], [type, type, type, llvm_i32_ty], [IntrNoMem]>; -+ -+class SystemZQuaternaryIntCC -+ : Intrinsic<[type, llvm_i32_ty], [type, type, type, llvm_i32_ty], -+ [IntrNoMem]>; -+ -+multiclass SystemZUnaryExtBHF { -+ def b : SystemZUnaryConv; -+ def h : SystemZUnaryConv; -+ def f : SystemZUnaryConv; -+} -+ -+multiclass SystemZUnaryExtBHWF { -+ def b : SystemZUnaryConv; -+ def hw : SystemZUnaryConv; -+ def f : SystemZUnaryConv; -+} -+ -+multiclass SystemZUnaryBHF { -+ def b : SystemZUnary; -+ def h : SystemZUnary; -+ def f : SystemZUnary; -+} -+ -+multiclass SystemZUnaryBHFG : SystemZUnaryBHF { -+ def g : SystemZUnary; -+} -+ -+multiclass SystemZUnaryCCBHF { -+ def bs : SystemZUnaryCC; -+ def hs : SystemZUnaryCC; -+ def fs : SystemZUnaryCC; -+} -+ -+multiclass SystemZBinaryTruncHFG { -+ def h : SystemZBinaryConv; -+ def f : SystemZBinaryConv; -+ def g : SystemZBinaryConv; -+} -+ -+multiclass SystemZBinaryTruncCCHFG { -+ def hs : SystemZBinaryConvCC; -+ def fs : SystemZBinaryConvCC; -+ def gs : SystemZBinaryConvCC; -+} -+ -+multiclass SystemZBinaryExtBHF { -+ def b : SystemZBinaryConv; -+ def h : SystemZBinaryConv; -+ def f : SystemZBinaryConv; -+} -+ -+multiclass SystemZBinaryExtBHFG : SystemZBinaryExtBHF { -+ def g : SystemZBinaryConv; -+} -+ -+multiclass SystemZBinaryBHF { -+ def b : SystemZBinary; -+ def h : SystemZBinary; -+ def f : SystemZBinary; -+} -+ -+multiclass SystemZBinaryBHFG : SystemZBinaryBHF { -+ def g : SystemZBinary; -+} -+ -+multiclass SystemZBinaryIntBHFG { -+ def b : SystemZBinaryInt; -+ def h : SystemZBinaryInt; -+ def f : SystemZBinaryInt; -+ def g : SystemZBinaryInt; -+} -+ -+multiclass SystemZBinaryCCBHF { -+ def bs : SystemZBinaryCC; -+ def hs : SystemZBinaryCC; -+ def fs : SystemZBinaryCC; -+} -+ -+multiclass SystemZCompareBHFG { -+ def bs : SystemZBinaryCC; -+ def hs : SystemZBinaryCC; -+ def fs : SystemZBinaryCC; -+ def gs : SystemZBinaryCC; -+} -+ -+multiclass SystemZTernaryExtBHF { -+ def b : SystemZTernaryConv; -+ def h : SystemZTernaryConv; -+ def f : SystemZTernaryConv; -+} -+ -+multiclass SystemZTernaryExtBHFG : SystemZTernaryExtBHF { -+ def g : SystemZTernaryConv; -+} -+ -+multiclass SystemZTernaryBHF { -+ def b : SystemZTernary; -+ def h : SystemZTernary; -+ def f : SystemZTernary; -+} -+ -+multiclass SystemZTernaryIntBHF { -+ def b : SystemZTernaryInt; -+ def h : SystemZTernaryInt; -+ def f : SystemZTernaryInt; -+} -+ -+multiclass SystemZTernaryIntCCBHF { -+ def bs : SystemZTernaryIntCC; -+ def hs : SystemZTernaryIntCC; -+ def fs : SystemZTernaryIntCC; -+} -+ -+multiclass SystemZQuaternaryIntBHF { -+ def b : SystemZQuaternaryInt; -+ def h : SystemZQuaternaryInt; -+ def f : SystemZQuaternaryInt; -+} -+ -+multiclass SystemZQuaternaryIntBHFG : SystemZQuaternaryIntBHF { -+ def g : SystemZQuaternaryInt; -+} -+ -+multiclass SystemZQuaternaryIntCCBHF { -+ def bs : SystemZQuaternaryIntCC; -+ def hs : SystemZQuaternaryIntCC; -+ def fs : SystemZQuaternaryIntCC; -+} -+ -+//===----------------------------------------------------------------------===// -+// -+// Transactional-execution intrinsics -+// -+//===----------------------------------------------------------------------===// -+ -+def llvm_ptr64_ty : LLVMPointerType; -+ -+let TargetPrefix = "s390" in { -+ def int_s390_tbegin : Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], -+ [IntrNoDuplicate]>; -+ -+ def int_s390_tbegin_nofloat : Intrinsic<[llvm_i32_ty], -+ [llvm_ptr_ty, llvm_i32_ty], -+ [IntrNoDuplicate]>; -+ -+ def int_s390_tbeginc : Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty], -+ [IntrNoDuplicate]>; -+ -+ def int_s390_tabort : Intrinsic<[], [llvm_i64_ty], -+ [IntrNoReturn, Throws]>; -+ -+ def int_s390_tend : GCCBuiltin<"__builtin_tend">, -+ Intrinsic<[llvm_i32_ty], []>; -+ -+ def int_s390_etnd : GCCBuiltin<"__builtin_tx_nesting_depth">, -+ Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>; -+ -+ def int_s390_ntstg : Intrinsic<[], [llvm_i64_ty, llvm_ptr64_ty], -+ [IntrReadWriteArgMem]>; -+ -+ def int_s390_ppa_txassist : GCCBuiltin<"__builtin_tx_assist">, -+ Intrinsic<[], [llvm_i32_ty]>; -+} -+ -+//===----------------------------------------------------------------------===// -+// -+// Vector intrinsics -+// -+//===----------------------------------------------------------------------===// -+ -+let TargetPrefix = "s390" in { -+ def int_s390_lcbb : GCCBuiltin<"__builtin_s390_lcbb">, -+ Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], -+ [IntrNoMem]>; -+ -+ def int_s390_vlbb : GCCBuiltin<"__builtin_s390_vlbb">, -+ Intrinsic<[llvm_v16i8_ty], [llvm_ptr_ty, llvm_i32_ty], -+ [IntrReadArgMem]>; -+ -+ def int_s390_vll : GCCBuiltin<"__builtin_s390_vll">, -+ Intrinsic<[llvm_v16i8_ty], [llvm_i32_ty, llvm_ptr_ty], -+ [IntrReadArgMem]>; -+ -+ def int_s390_vpdi : GCCBuiltin<"__builtin_s390_vpdi">, -+ Intrinsic<[llvm_v2i64_ty], -+ [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], -+ [IntrNoMem]>; -+ -+ def int_s390_vperm : GCCBuiltin<"__builtin_s390_vperm">, -+ Intrinsic<[llvm_v16i8_ty], -+ [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty], -+ [IntrNoMem]>; -+ -+ defm int_s390_vpks : SystemZBinaryTruncHFG<"vpks">; -+ defm int_s390_vpks : SystemZBinaryTruncCCHFG; -+ -+ defm int_s390_vpkls : SystemZBinaryTruncHFG<"vpkls">; -+ defm int_s390_vpkls : SystemZBinaryTruncCCHFG; -+ -+ def int_s390_vstl : GCCBuiltin<"__builtin_s390_vstl">, -+ Intrinsic<[], [llvm_v16i8_ty, llvm_i32_ty, llvm_ptr_ty], -+ // In fact write-only but there's no property -+ // for that. -+ [IntrReadWriteArgMem]>; -+ -+ defm int_s390_vupl : SystemZUnaryExtBHWF<"vupl">; -+ defm int_s390_vupll : SystemZUnaryExtBHF<"vupll">; -+ -+ defm int_s390_vuph : SystemZUnaryExtBHF<"vuph">; -+ defm int_s390_vuplh : SystemZUnaryExtBHF<"vuplh">; -+ -+ defm int_s390_vacc : SystemZBinaryBHFG<"vacc">; -+ -+ def int_s390_vaq : SystemZBinary<"vaq", llvm_v16i8_ty>; -+ def int_s390_vacq : SystemZTernary<"vacq", llvm_v16i8_ty>; -+ def int_s390_vaccq : SystemZBinary<"vaccq", llvm_v16i8_ty>; -+ def int_s390_vacccq : SystemZTernary<"vacccq", llvm_v16i8_ty>; -+ -+ defm int_s390_vavg : SystemZBinaryBHFG<"vavg">; -+ defm int_s390_vavgl : SystemZBinaryBHFG<"vavgl">; -+ -+ def int_s390_vcksm : SystemZBinary<"vcksm", llvm_v4i32_ty>; -+ -+ defm int_s390_vgfm : SystemZBinaryExtBHFG<"vgfm">; -+ defm int_s390_vgfma : SystemZTernaryExtBHFG<"vgfma">; -+ -+ defm int_s390_vmah : SystemZTernaryBHF<"vmah">; -+ defm int_s390_vmalh : SystemZTernaryBHF<"vmalh">; -+ defm int_s390_vmae : SystemZTernaryExtBHF<"vmae">; -+ defm int_s390_vmale : SystemZTernaryExtBHF<"vmale">; -+ defm int_s390_vmao : SystemZTernaryExtBHF<"vmao">; -+ defm int_s390_vmalo : SystemZTernaryExtBHF<"vmalo">; -+ -+ defm int_s390_vmh : SystemZBinaryBHF<"vmh">; -+ defm int_s390_vmlh : SystemZBinaryBHF<"vmlh">; -+ defm int_s390_vme : SystemZBinaryExtBHF<"vme">; -+ defm int_s390_vmle : SystemZBinaryExtBHF<"vmle">; -+ defm int_s390_vmo : SystemZBinaryExtBHF<"vmo">; -+ defm int_s390_vmlo : SystemZBinaryExtBHF<"vmlo">; -+ -+ defm int_s390_verllv : SystemZBinaryBHFG<"verllv">; -+ defm int_s390_verll : SystemZBinaryIntBHFG<"verll">; -+ defm int_s390_verim : SystemZQuaternaryIntBHFG<"verim">; -+ -+ def int_s390_vsl : SystemZBinary<"vsl", llvm_v16i8_ty>; -+ def int_s390_vslb : SystemZBinary<"vslb", llvm_v16i8_ty>; -+ def int_s390_vsra : SystemZBinary<"vsra", llvm_v16i8_ty>; -+ def int_s390_vsrab : SystemZBinary<"vsrab", llvm_v16i8_ty>; -+ def int_s390_vsrl : SystemZBinary<"vsrl", llvm_v16i8_ty>; -+ def int_s390_vsrlb : SystemZBinary<"vsrlb", llvm_v16i8_ty>; -+ -+ def int_s390_vsldb : GCCBuiltin<"__builtin_s390_vsldb">, -+ Intrinsic<[llvm_v16i8_ty], -+ [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], -+ [IntrNoMem]>; -+ -+ defm int_s390_vscbi : SystemZBinaryBHFG<"vscbi">; -+ -+ def int_s390_vsq : SystemZBinary<"vsq", llvm_v16i8_ty>; -+ def int_s390_vsbiq : SystemZTernary<"vsbiq", llvm_v16i8_ty>; -+ def int_s390_vscbiq : SystemZBinary<"vscbiq", llvm_v16i8_ty>; -+ def int_s390_vsbcbiq : SystemZTernary<"vsbcbiq", llvm_v16i8_ty>; -+ -+ def int_s390_vsumb : SystemZBinaryConv<"vsumb", llvm_v4i32_ty, llvm_v16i8_ty>; -+ def int_s390_vsumh : SystemZBinaryConv<"vsumh", llvm_v4i32_ty, llvm_v8i16_ty>; -+ -+ def int_s390_vsumgh : SystemZBinaryConv<"vsumgh", llvm_v2i64_ty, -+ llvm_v8i16_ty>; -+ def int_s390_vsumgf : SystemZBinaryConv<"vsumgf", llvm_v2i64_ty, -+ llvm_v4i32_ty>; -+ -+ def int_s390_vsumqf : SystemZBinaryConv<"vsumqf", llvm_v16i8_ty, -+ llvm_v4i32_ty>; -+ def int_s390_vsumqg : SystemZBinaryConv<"vsumqg", llvm_v16i8_ty, -+ llvm_v2i64_ty>; -+ -+ def int_s390_vtm : SystemZBinaryConv<"vtm", llvm_i32_ty, llvm_v16i8_ty>; -+ -+ defm int_s390_vceq : SystemZCompareBHFG<"vceq">; -+ defm int_s390_vch : SystemZCompareBHFG<"vch">; -+ defm int_s390_vchl : SystemZCompareBHFG<"vchl">; -+ -+ defm int_s390_vfae : SystemZTernaryIntBHF<"vfae">; -+ defm int_s390_vfae : SystemZTernaryIntCCBHF; -+ defm int_s390_vfaez : SystemZTernaryIntBHF<"vfaez">; -+ defm int_s390_vfaez : SystemZTernaryIntCCBHF; -+ -+ defm int_s390_vfee : SystemZBinaryBHF<"vfee">; -+ defm int_s390_vfee : SystemZBinaryCCBHF; -+ defm int_s390_vfeez : SystemZBinaryBHF<"vfeez">; -+ defm int_s390_vfeez : SystemZBinaryCCBHF; -+ -+ defm int_s390_vfene : SystemZBinaryBHF<"vfene">; -+ defm int_s390_vfene : SystemZBinaryCCBHF; -+ defm int_s390_vfenez : SystemZBinaryBHF<"vfenez">; -+ defm int_s390_vfenez : SystemZBinaryCCBHF; -+ -+ defm int_s390_vistr : SystemZUnaryBHF<"vistr">; -+ defm int_s390_vistr : SystemZUnaryCCBHF; -+ -+ defm int_s390_vstrc : SystemZQuaternaryIntBHF<"vstrc">; -+ defm int_s390_vstrc : SystemZQuaternaryIntCCBHF; -+ defm int_s390_vstrcz : SystemZQuaternaryIntBHF<"vstrcz">; -+ defm int_s390_vstrcz : SystemZQuaternaryIntCCBHF; -+ -+ def int_s390_vfcedbs : SystemZBinaryConvCC; -+ def int_s390_vfchdbs : SystemZBinaryConvCC; -+ def int_s390_vfchedbs : SystemZBinaryConvCC; -+ -+ def int_s390_vftcidb : SystemZBinaryConvIntCC; -+ -+ def int_s390_vfidb : Intrinsic<[llvm_v2f64_ty], -+ [llvm_v2f64_ty, llvm_i32_ty, llvm_i32_ty], -+ [IntrNoMem]>; -+} -Index: llvm-36/lib/CodeGen/SelectionDAG/DAGCombiner.cpp -=================================================================== ---- llvm-36.orig/lib/CodeGen/SelectionDAG/DAGCombiner.cpp -+++ llvm-36/lib/CodeGen/SelectionDAG/DAGCombiner.cpp -@@ -10496,18 +10496,13 @@ SDValue DAGCombiner::ReplaceExtractVecto - if (auto *ConstEltNo = dyn_cast(EltNo)) { - int Elt = ConstEltNo->getZExtValue(); - unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8; -- if (TLI.isBigEndian()) -- PtrOff = InVecVT.getSizeInBits() / 8 - PtrOff; - Offset = DAG.getConstant(PtrOff, PtrType); - MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff); - } else { -+ Offset = DAG.getZExtOrTrunc(EltNo, SDLoc(EVE), PtrType); - Offset = DAG.getNode( -- ISD::MUL, SDLoc(EVE), EltNo.getValueType(), EltNo, -- DAG.getConstant(VecEltVT.getStoreSize(), EltNo.getValueType())); -- if (TLI.isBigEndian()) -- Offset = DAG.getNode( -- ISD::SUB, SDLoc(EVE), EltNo.getValueType(), -- DAG.getConstant(InVecVT.getStoreSize(), EltNo.getValueType()), Offset); -+ ISD::MUL, SDLoc(EVE), PtrType, Offset, -+ DAG.getConstant(VecEltVT.getStoreSize(), PtrType)); - MPI = OriginalLoad->getPointerInfo(); - } - NewPtr = DAG.getNode(ISD::ADD, SDLoc(EVE), PtrType, NewPtr, Offset); -Index: llvm-36/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp -=================================================================== ---- llvm-36.orig/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp -+++ llvm-36/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp -@@ -2888,7 +2888,10 @@ static EVT FindMemType(SelectionDAG& DAG - unsigned MemVTWidth = MemVT.getSizeInBits(); - if (MemVT.getSizeInBits() <= WidenEltWidth) - break; -- if (TLI.isTypeLegal(MemVT) && (WidenWidth % MemVTWidth) == 0 && -+ auto Action = TLI.getTypeAction(*DAG.getContext(), MemVT); -+ if ((Action == TargetLowering::TypeLegal || -+ Action == TargetLowering::TypePromoteInteger) && -+ (WidenWidth % MemVTWidth) == 0 && - isPowerOf2_32(WidenWidth / MemVTWidth) && - (MemVTWidth <= Width || - (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) { -Index: llvm-36/lib/Support/Host.cpp -=================================================================== ---- llvm-36.orig/lib/Support/Host.cpp -+++ llvm-36/lib/Support/Host.cpp -@@ -655,6 +655,28 @@ StringRef sys::getHostCPUName() { - StringRef Str(buffer, CPUInfoSize); - SmallVector Lines; - Str.split(Lines, "\n"); -+ -+ // Look for the CPU features. -+ SmallVector CPUFeatures; -+ for (unsigned I = 0, E = Lines.size(); I != E; ++I) -+ if (Lines[I].startswith("features")) { -+ size_t Pos = Lines[I].find(":"); -+ if (Pos != StringRef::npos) { -+ Lines[I].drop_front(Pos + 1).split(CPUFeatures, " "); -+ break; -+ } -+ } -+ -+ // We need to check for the presence of vector support independently of -+ // the machine type, since we may only use the vector register set when -+ // supported by the kernel (and hypervisor). -+ bool HaveVectorSupport = false; -+ for (unsigned I = 0, E = CPUFeatures.size(); I != E; ++I) { -+ if (CPUFeatures[I] == "vx") -+ HaveVectorSupport = true; -+ } -+ -+ // Now check the processor machine type. - for (unsigned I = 0, E = Lines.size(); I != E; ++I) { - if (Lines[I].startswith("processor ")) { - size_t Pos = Lines[I].find("machine = "); -@@ -662,6 +684,8 @@ StringRef sys::getHostCPUName() { - Pos += sizeof("machine = ") - 1; - unsigned int Id; - if (!Lines[I].drop_front(Pos).getAsInteger(10, Id)) { -+ if (Id >= 2964 && HaveVectorSupport) -+ return "z13"; - if (Id >= 2827) - return "zEC12"; - if (Id >= 2817) -Index: llvm-36/lib/Support/Triple.cpp -=================================================================== ---- llvm-36.orig/lib/Support/Triple.cpp -+++ llvm-36/lib/Support/Triple.cpp -@@ -89,7 +89,7 @@ const char *Triple::getArchTypePrefix(Ar - case sparcv9: - case sparc: return "sparc"; - -- case systemz: return "systemz"; -+ case systemz: return "s390"; - - case x86: - case x86_64: return "x86"; -Index: llvm-36/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp -=================================================================== ---- llvm-36.orig/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp -+++ llvm-36/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp -@@ -39,13 +39,17 @@ enum RegisterKind { - ADDR64Reg, - FP32Reg, - FP64Reg, -- FP128Reg -+ FP128Reg, -+ VR32Reg, -+ VR64Reg, -+ VR128Reg - }; - - enum MemoryKind { - BDMem, - BDXMem, -- BDLMem -+ BDLMem, -+ BDVMem - }; - - class SystemZOperand : public MCParsedAsmOperand { -@@ -57,6 +61,7 @@ private: - KindReg, - KindAccessReg, - KindImm, -+ KindImmTLS, - KindMem - }; - -@@ -84,23 +89,31 @@ private: - }; - - // Base + Disp + Index, where Base and Index are LLVM registers or 0. -- // RegKind says what type the registers have (ADDR32Reg or ADDR64Reg). -- // Length is the operand length for D(L,B)-style operands, otherwise -- // it is null. -+ // MemKind says what type of memory this is and RegKind says what type -+ // the base register has (ADDR32Reg or ADDR64Reg). Length is the operand -+ // length for D(L,B)-style operands, otherwise it is null. - struct MemOp { -- unsigned Base : 8; -- unsigned Index : 8; -- unsigned RegKind : 8; -- unsigned Unused : 8; -+ unsigned Base : 12; -+ unsigned Index : 12; -+ unsigned MemKind : 4; -+ unsigned RegKind : 4; - const MCExpr *Disp; - const MCExpr *Length; - }; - -+ // Imm is an immediate operand, and Sym is an optional TLS symbol -+ // for use with a __tls_get_offset marker relocation. -+ struct ImmTLSOp { -+ const MCExpr *Imm; -+ const MCExpr *Sym; -+ }; -+ - union { - TokenOp Token; - RegOp Reg; - unsigned AccessReg; - const MCExpr *Imm; -+ ImmTLSOp ImmTLS; - MemOp Mem; - }; - -@@ -149,10 +162,11 @@ public: - return Op; - } - static std::unique_ptr -- createMem(RegisterKind RegKind, unsigned Base, const MCExpr *Disp, -- unsigned Index, const MCExpr *Length, SMLoc StartLoc, -- SMLoc EndLoc) { -+ createMem(MemoryKind MemKind, RegisterKind RegKind, unsigned Base, -+ const MCExpr *Disp, unsigned Index, const MCExpr *Length, -+ SMLoc StartLoc, SMLoc EndLoc) { - auto Op = make_unique(KindMem, StartLoc, EndLoc); -+ Op->Mem.MemKind = MemKind; - Op->Mem.RegKind = RegKind; - Op->Mem.Base = Base; - Op->Mem.Index = Index; -@@ -160,6 +174,14 @@ public: - Op->Mem.Length = Length; - return Op; - } -+ static std::unique_ptr -+ createImmTLS(const MCExpr *Imm, const MCExpr *Sym, -+ SMLoc StartLoc, SMLoc EndLoc) { -+ auto Op = make_unique(KindImmTLS, StartLoc, EndLoc); -+ Op->ImmTLS.Imm = Imm; -+ Op->ImmTLS.Sym = Sym; -+ return Op; -+ } - - // Token operands - bool isToken() const override { -@@ -200,24 +222,40 @@ public: - return Imm; - } - -+ // Immediate operands with optional TLS symbol. -+ bool isImmTLS() const { -+ return Kind == KindImmTLS; -+ } -+ - // Memory operands. - bool isMem() const override { - return Kind == KindMem; - } -- bool isMem(RegisterKind RegKind, MemoryKind MemKind) const { -+ bool isMem(MemoryKind MemKind) const { - return (Kind == KindMem && -- Mem.RegKind == RegKind && -- (MemKind == BDXMem || !Mem.Index) && -- (MemKind == BDLMem) == (Mem.Length != nullptr)); -+ (Mem.MemKind == MemKind || -+ // A BDMem can be treated as a BDXMem in which the index -+ // register field is 0. -+ (Mem.MemKind == BDMem && MemKind == BDXMem))); -+ } -+ bool isMem(MemoryKind MemKind, RegisterKind RegKind) const { -+ return isMem(MemKind) && Mem.RegKind == RegKind; - } -- bool isMemDisp12(RegisterKind RegKind, MemoryKind MemKind) const { -- return isMem(RegKind, MemKind) && inRange(Mem.Disp, 0, 0xfff); -+ bool isMemDisp12(MemoryKind MemKind, RegisterKind RegKind) const { -+ return isMem(MemKind, RegKind) && inRange(Mem.Disp, 0, 0xfff); - } -- bool isMemDisp20(RegisterKind RegKind, MemoryKind MemKind) const { -- return isMem(RegKind, MemKind) && inRange(Mem.Disp, -524288, 524287); -+ bool isMemDisp20(MemoryKind MemKind, RegisterKind RegKind) const { -+ return isMem(MemKind, RegKind) && inRange(Mem.Disp, -524288, 524287); - } - bool isMemDisp12Len8(RegisterKind RegKind) const { -- return isMemDisp12(RegKind, BDLMem) && inRange(Mem.Length, 1, 0x100); -+ return isMemDisp12(BDLMem, RegKind) && inRange(Mem.Length, 1, 0x100); -+ } -+ void addBDVAddrOperands(MCInst &Inst, unsigned N) const { -+ assert(N == 3 && "Invalid number of operands"); -+ assert(isMem(BDVMem) && "Invalid operand type"); -+ Inst.addOperand(MCOperand::CreateReg(Mem.Base)); -+ addExpr(Inst, Mem.Disp); -+ Inst.addOperand(MCOperand::CreateReg(Mem.Index)); - } - - // Override MCParsedAsmOperand. -@@ -242,24 +280,31 @@ public: - } - void addBDAddrOperands(MCInst &Inst, unsigned N) const { - assert(N == 2 && "Invalid number of operands"); -- assert(Kind == KindMem && Mem.Index == 0 && "Invalid operand type"); -+ assert(isMem(BDMem) && "Invalid operand type"); - Inst.addOperand(MCOperand::CreateReg(Mem.Base)); - addExpr(Inst, Mem.Disp); - } - void addBDXAddrOperands(MCInst &Inst, unsigned N) const { - assert(N == 3 && "Invalid number of operands"); -- assert(Kind == KindMem && "Invalid operand type"); -+ assert(isMem(BDXMem) && "Invalid operand type"); - Inst.addOperand(MCOperand::CreateReg(Mem.Base)); - addExpr(Inst, Mem.Disp); - Inst.addOperand(MCOperand::CreateReg(Mem.Index)); - } - void addBDLAddrOperands(MCInst &Inst, unsigned N) const { - assert(N == 3 && "Invalid number of operands"); -- assert(Kind == KindMem && "Invalid operand type"); -+ assert(isMem(BDLMem) && "Invalid operand type"); - Inst.addOperand(MCOperand::CreateReg(Mem.Base)); - addExpr(Inst, Mem.Disp); - addExpr(Inst, Mem.Length); - } -+ void addImmTLSOperands(MCInst &Inst, unsigned N) const { -+ assert(N == 2 && "Invalid number of operands"); -+ assert(Kind == KindImmTLS && "Invalid operand type"); -+ addExpr(Inst, ImmTLS.Imm); -+ if (ImmTLS.Sym) -+ addExpr(Inst, ImmTLS.Sym); -+ } - - // Used by the TableGen code to check for particular operand types. - bool isGR32() const { return isReg(GR32Reg); } -@@ -273,17 +318,26 @@ public: - bool isFP32() const { return isReg(FP32Reg); } - bool isFP64() const { return isReg(FP64Reg); } - bool isFP128() const { return isReg(FP128Reg); } -- bool isBDAddr32Disp12() const { return isMemDisp12(ADDR32Reg, BDMem); } -- bool isBDAddr32Disp20() const { return isMemDisp20(ADDR32Reg, BDMem); } -- bool isBDAddr64Disp12() const { return isMemDisp12(ADDR64Reg, BDMem); } -- bool isBDAddr64Disp20() const { return isMemDisp20(ADDR64Reg, BDMem); } -- bool isBDXAddr64Disp12() const { return isMemDisp12(ADDR64Reg, BDXMem); } -- bool isBDXAddr64Disp20() const { return isMemDisp20(ADDR64Reg, BDXMem); } -+ bool isVR32() const { return isReg(VR32Reg); } -+ bool isVR64() const { return isReg(VR64Reg); } -+ bool isVF128() const { return false; } -+ bool isVR128() const { return isReg(VR128Reg); } -+ bool isBDAddr32Disp12() const { return isMemDisp12(BDMem, ADDR32Reg); } -+ bool isBDAddr32Disp20() const { return isMemDisp20(BDMem, ADDR32Reg); } -+ bool isBDAddr64Disp12() const { return isMemDisp12(BDMem, ADDR64Reg); } -+ bool isBDAddr64Disp20() const { return isMemDisp20(BDMem, ADDR64Reg); } -+ bool isBDXAddr64Disp12() const { return isMemDisp12(BDXMem, ADDR64Reg); } -+ bool isBDXAddr64Disp20() const { return isMemDisp20(BDXMem, ADDR64Reg); } - bool isBDLAddr64Disp12Len8() const { return isMemDisp12Len8(ADDR64Reg); } -+ bool isBDVAddr64Disp12() const { return isMemDisp12(BDVMem, ADDR64Reg); } -+ bool isU1Imm() const { return isImm(0, 1); } -+ bool isU2Imm() const { return isImm(0, 3); } -+ bool isU3Imm() const { return isImm(0, 7); } - bool isU4Imm() const { return isImm(0, 15); } - bool isU6Imm() const { return isImm(0, 63); } - bool isU8Imm() const { return isImm(0, 255); } - bool isS8Imm() const { return isImm(-128, 127); } -+ bool isU12Imm() const { return isImm(0, 4095); } - bool isU16Imm() const { return isImm(0, 65535); } - bool isS16Imm() const { return isImm(-32768, 32767); } - bool isU32Imm() const { return isImm(0, (1LL << 32) - 1); } -@@ -300,6 +354,7 @@ private: - enum RegisterGroup { - RegGR, - RegFP, -+ RegV, - RegAccess - }; - struct Register { -@@ -318,12 +373,15 @@ private: - RegisterKind Kind); - - bool parseAddress(unsigned &Base, const MCExpr *&Disp, -- unsigned &Index, const MCExpr *&Length, -+ unsigned &Index, bool &IsVector, const MCExpr *&Length, - const unsigned *Regs, RegisterKind RegKind); - - OperandMatchResultTy parseAddress(OperandVector &Operands, -- const unsigned *Regs, RegisterKind RegKind, -- MemoryKind MemKind); -+ MemoryKind MemKind, const unsigned *Regs, -+ RegisterKind RegKind); -+ -+ OperandMatchResultTy parsePCRel(OperandVector &Operands, int64_t MinVal, -+ int64_t MaxVal, bool AllowTLS); - - bool parseOperand(OperandVector &Operands, StringRef Mnemonic); - -@@ -382,26 +440,45 @@ public: - OperandMatchResultTy parseFP128(OperandVector &Operands) { - return parseRegister(Operands, RegFP, SystemZMC::FP128Regs, FP128Reg); - } -+ OperandMatchResultTy parseVR32(OperandVector &Operands) { -+ return parseRegister(Operands, RegV, SystemZMC::VR32Regs, VR32Reg); -+ } -+ OperandMatchResultTy parseVR64(OperandVector &Operands) { -+ return parseRegister(Operands, RegV, SystemZMC::VR64Regs, VR64Reg); -+ } -+ OperandMatchResultTy parseVF128(OperandVector &Operands) { -+ llvm_unreachable("Shouldn't be used as an operand"); -+ } -+ OperandMatchResultTy parseVR128(OperandVector &Operands) { -+ return parseRegister(Operands, RegV, SystemZMC::VR128Regs, VR128Reg); -+ } - OperandMatchResultTy parseBDAddr32(OperandVector &Operands) { -- return parseAddress(Operands, SystemZMC::GR32Regs, ADDR32Reg, BDMem); -+ return parseAddress(Operands, BDMem, SystemZMC::GR32Regs, ADDR32Reg); - } - OperandMatchResultTy parseBDAddr64(OperandVector &Operands) { -- return parseAddress(Operands, SystemZMC::GR64Regs, ADDR64Reg, BDMem); -+ return parseAddress(Operands, BDMem, SystemZMC::GR64Regs, ADDR64Reg); - } - OperandMatchResultTy parseBDXAddr64(OperandVector &Operands) { -- return parseAddress(Operands, SystemZMC::GR64Regs, ADDR64Reg, BDXMem); -+ return parseAddress(Operands, BDXMem, SystemZMC::GR64Regs, ADDR64Reg); - } - OperandMatchResultTy parseBDLAddr64(OperandVector &Operands) { -- return parseAddress(Operands, SystemZMC::GR64Regs, ADDR64Reg, BDLMem); -+ return parseAddress(Operands, BDLMem, SystemZMC::GR64Regs, ADDR64Reg); -+ } -+ OperandMatchResultTy parseBDVAddr64(OperandVector &Operands) { -+ return parseAddress(Operands, BDVMem, SystemZMC::GR64Regs, ADDR64Reg); - } - OperandMatchResultTy parseAccessReg(OperandVector &Operands); -- OperandMatchResultTy parsePCRel(OperandVector &Operands, int64_t MinVal, -- int64_t MaxVal); - OperandMatchResultTy parsePCRel16(OperandVector &Operands) { -- return parsePCRel(Operands, -(1LL << 16), (1LL << 16) - 1); -+ return parsePCRel(Operands, -(1LL << 16), (1LL << 16) - 1, false); - } - OperandMatchResultTy parsePCRel32(OperandVector &Operands) { -- return parsePCRel(Operands, -(1LL << 32), (1LL << 32) - 1); -+ return parsePCRel(Operands, -(1LL << 32), (1LL << 32) - 1, false); -+ } -+ OperandMatchResultTy parsePCRelTLS16(OperandVector &Operands) { -+ return parsePCRel(Operands, -(1LL << 16), (1LL << 16) - 1, true); -+ } -+ OperandMatchResultTy parsePCRelTLS32(OperandVector &Operands) { -+ return parsePCRel(Operands, -(1LL << 32), (1LL << 32) - 1, true); - } - }; - } // end anonymous namespace -@@ -443,6 +520,8 @@ bool SystemZAsmParser::parseRegister(Reg - Reg.Group = RegGR; - else if (Prefix == 'f' && Reg.Num < 16) - Reg.Group = RegFP; -+ else if (Prefix == 'v' && Reg.Num < 32) -+ Reg.Group = RegV; - else if (Prefix == 'a' && Reg.Num < 16) - Reg.Group = RegAccess; - else -@@ -493,8 +572,8 @@ SystemZAsmParser::parseRegister(OperandV - // Regs maps asm register numbers to LLVM register numbers and RegKind - // says what kind of address register we're using (ADDR32Reg or ADDR64Reg). - bool SystemZAsmParser::parseAddress(unsigned &Base, const MCExpr *&Disp, -- unsigned &Index, const MCExpr *&Length, -- const unsigned *Regs, -+ unsigned &Index, bool &IsVector, -+ const MCExpr *&Length, const unsigned *Regs, - RegisterKind RegKind) { - // Parse the displacement, which must always be present. - if (getParser().parseExpression(Disp)) -@@ -503,6 +582,7 @@ bool SystemZAsmParser::parseAddress(unsi - // Parse the optional base and index. - Index = 0; - Base = 0; -+ IsVector = false; - Length = nullptr; - if (getLexer().is(AsmToken::LParen)) { - Parser.Lex(); -@@ -510,12 +590,23 @@ bool SystemZAsmParser::parseAddress(unsi - if (getLexer().is(AsmToken::Percent)) { - // Parse the first register and decide whether it's a base or an index. - Register Reg; -- if (parseRegister(Reg, RegGR, Regs, RegKind)) -+ if (parseRegister(Reg)) - return true; -- if (getLexer().is(AsmToken::Comma)) -- Index = Reg.Num; -- else -- Base = Reg.Num; -+ if (Reg.Group == RegV) { -+ // A vector index register. The base register is optional. -+ IsVector = true; -+ Index = SystemZMC::VR128Regs[Reg.Num]; -+ } else if (Reg.Group == RegGR) { -+ if (Reg.Num == 0) -+ return Error(Reg.StartLoc, "%r0 used in an address"); -+ // If the are two registers, the first one is the index and the -+ // second is the base. -+ if (getLexer().is(AsmToken::Comma)) -+ Index = Regs[Reg.Num]; -+ else -+ Base = Regs[Reg.Num]; -+ } else -+ return Error(Reg.StartLoc, "invalid address register"); - } else { - // Parse the length. - if (getParser().parseExpression(Length)) -@@ -542,37 +633,46 @@ bool SystemZAsmParser::parseAddress(unsi - // Parse a memory operand and add it to Operands. The other arguments - // are as above. - SystemZAsmParser::OperandMatchResultTy --SystemZAsmParser::parseAddress(OperandVector &Operands, const unsigned *Regs, -- RegisterKind RegKind, MemoryKind MemKind) { -+SystemZAsmParser::parseAddress(OperandVector &Operands, MemoryKind MemKind, -+ const unsigned *Regs, RegisterKind RegKind) { - SMLoc StartLoc = Parser.getTok().getLoc(); - unsigned Base, Index; -+ bool IsVector; - const MCExpr *Disp; - const MCExpr *Length; -- if (parseAddress(Base, Disp, Index, Length, Regs, RegKind)) -+ if (parseAddress(Base, Disp, Index, IsVector, Length, Regs, RegKind)) - return MatchOperand_ParseFail; - -- if (Index && MemKind != BDXMem) -- { -- Error(StartLoc, "invalid use of indexed addressing"); -- return MatchOperand_ParseFail; -- } -+ if (IsVector && MemKind != BDVMem) { -+ Error(StartLoc, "invalid use of vector addressing"); -+ return MatchOperand_ParseFail; -+ } - -- if (Length && MemKind != BDLMem) -- { -- Error(StartLoc, "invalid use of length addressing"); -- return MatchOperand_ParseFail; -- } -+ if (!IsVector && MemKind == BDVMem) { -+ Error(StartLoc, "vector index required in address"); -+ return MatchOperand_ParseFail; -+ } - -- if (!Length && MemKind == BDLMem) -- { -- Error(StartLoc, "missing length in address"); -- return MatchOperand_ParseFail; -- } -+ if (Index && MemKind != BDXMem && MemKind != BDVMem) { -+ Error(StartLoc, "invalid use of indexed addressing"); -+ return MatchOperand_ParseFail; -+ } -+ -+ if (Length && MemKind != BDLMem) { -+ Error(StartLoc, "invalid use of length addressing"); -+ return MatchOperand_ParseFail; -+ } -+ -+ if (!Length && MemKind == BDLMem) { -+ Error(StartLoc, "missing length in address"); -+ return MatchOperand_ParseFail; -+ } - - SMLoc EndLoc = - SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); -- Operands.push_back(SystemZOperand::createMem(RegKind, Base, Disp, Index, -- Length, StartLoc, EndLoc)); -+ Operands.push_back(SystemZOperand::createMem(MemKind, RegKind, Base, Disp, -+ Index, Length, StartLoc, -+ EndLoc)); - return MatchOperand_Success; - } - -@@ -589,6 +689,8 @@ bool SystemZAsmParser::ParseRegister(uns - RegNo = SystemZMC::GR64Regs[Reg.Num]; - else if (Reg.Group == RegFP) - RegNo = SystemZMC::FP64Regs[Reg.Num]; -+ else if (Reg.Group == RegV) -+ RegNo = SystemZMC::VR128Regs[Reg.Num]; - else - // FIXME: Access registers aren't modelled as LLVM registers yet. - return Error(Reg.StartLoc, "invalid operand for instruction"); -@@ -661,8 +763,10 @@ bool SystemZAsmParser::parseOperand(Oper - // so we treat any plain expression as an immediate. - SMLoc StartLoc = Parser.getTok().getLoc(); - unsigned Base, Index; -+ bool IsVector; - const MCExpr *Expr, *Length; -- if (parseAddress(Base, Expr, Index, Length, SystemZMC::GR64Regs, ADDR64Reg)) -+ if (parseAddress(Base, Expr, Index, IsVector, Length, SystemZMC::GR64Regs, -+ ADDR64Reg)) - return true; - - SMLoc EndLoc = -@@ -743,7 +847,7 @@ SystemZAsmParser::parseAccessReg(Operand - - SystemZAsmParser::OperandMatchResultTy - SystemZAsmParser::parsePCRel(OperandVector &Operands, int64_t MinVal, -- int64_t MaxVal) { -+ int64_t MaxVal, bool AllowTLS) { - MCContext &Ctx = getContext(); - MCStreamer &Out = getStreamer(); - const MCExpr *Expr; -@@ -766,9 +870,54 @@ SystemZAsmParser::parsePCRel(OperandVect - Expr = Value == 0 ? Base : MCBinaryExpr::CreateAdd(Base, Expr, Ctx); - } - -+ // Optionally match :tls_gdcall: or :tls_ldcall: followed by a TLS symbol. -+ const MCExpr *Sym = nullptr; -+ if (AllowTLS && getLexer().is(AsmToken::Colon)) { -+ Parser.Lex(); -+ -+ if (Parser.getTok().isNot(AsmToken::Identifier)) { -+ Error(Parser.getTok().getLoc(), "unexpected token"); -+ return MatchOperand_ParseFail; -+ } -+ -+ MCSymbolRefExpr::VariantKind Kind = MCSymbolRefExpr::VK_None; -+ StringRef Name = Parser.getTok().getString(); -+ if (Name == "tls_gdcall") -+ Kind = MCSymbolRefExpr::VK_TLSGD; -+ else if (Name == "tls_ldcall") -+ Kind = MCSymbolRefExpr::VK_TLSLDM; -+ else { -+ Error(Parser.getTok().getLoc(), "unknown TLS tag"); -+ return MatchOperand_ParseFail; -+ } -+ Parser.Lex(); -+ -+ if (Parser.getTok().isNot(AsmToken::Colon)) { -+ Error(Parser.getTok().getLoc(), "unexpected token"); -+ return MatchOperand_ParseFail; -+ } -+ Parser.Lex(); -+ -+ if (Parser.getTok().isNot(AsmToken::Identifier)) { -+ Error(Parser.getTok().getLoc(), "unexpected token"); -+ return MatchOperand_ParseFail; -+ } -+ -+ StringRef Identifier = Parser.getTok().getString(); -+ Sym = MCSymbolRefExpr::Create(Ctx.GetOrCreateSymbol(Identifier), -+ Kind, Ctx); -+ Parser.Lex(); -+ } -+ - SMLoc EndLoc = - SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); -- Operands.push_back(SystemZOperand::createImm(Expr, StartLoc, EndLoc)); -+ -+ if (AllowTLS) -+ Operands.push_back(SystemZOperand::createImmTLS(Expr, Sym, -+ StartLoc, EndLoc)); -+ else -+ Operands.push_back(SystemZOperand::createImm(Expr, StartLoc, EndLoc)); -+ - return MatchOperand_Success; - } - -Index: llvm-36/lib/Target/SystemZ/CMakeLists.txt -=================================================================== ---- llvm-36.orig/lib/Target/SystemZ/CMakeLists.txt -+++ llvm-36/lib/Target/SystemZ/CMakeLists.txt -@@ -20,6 +20,7 @@ add_llvm_target(SystemZCodeGen - SystemZISelDAGToDAG.cpp - SystemZISelLowering.cpp - SystemZInstrInfo.cpp -+ SystemZLDCleanup.cpp - SystemZLongBranch.cpp - SystemZMachineFunctionInfo.cpp - SystemZMCInstLower.cpp -@@ -28,6 +29,7 @@ add_llvm_target(SystemZCodeGen - SystemZShortenInst.cpp - SystemZSubtarget.cpp - SystemZTargetMachine.cpp -+ SystemZTargetTransformInfo.cpp - ) - - add_subdirectory(AsmParser) -Index: llvm-36/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp -=================================================================== ---- llvm-36.orig/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp -+++ llvm-36/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp -@@ -47,8 +47,8 @@ extern "C" void LLVMInitializeSystemZDis - } - - static DecodeStatus decodeRegisterClass(MCInst &Inst, uint64_t RegNo, -- const unsigned *Regs) { -- assert(RegNo < 16 && "Invalid register"); -+ const unsigned *Regs, unsigned Size) { -+ assert(RegNo < Size && "Invalid register"); - RegNo = Regs[RegNo]; - if (RegNo == 0) - return MCDisassembler::Fail; -@@ -59,61 +59,81 @@ static DecodeStatus decodeRegisterClass( - static DecodeStatus DecodeGR32BitRegisterClass(MCInst &Inst, uint64_t RegNo, - uint64_t Address, - const void *Decoder) { -- return decodeRegisterClass(Inst, RegNo, SystemZMC::GR32Regs); -+ return decodeRegisterClass(Inst, RegNo, SystemZMC::GR32Regs, 16); - } - - static DecodeStatus DecodeGRH32BitRegisterClass(MCInst &Inst, uint64_t RegNo, - uint64_t Address, - const void *Decoder) { -- return decodeRegisterClass(Inst, RegNo, SystemZMC::GRH32Regs); -+ return decodeRegisterClass(Inst, RegNo, SystemZMC::GRH32Regs, 16); - } - - static DecodeStatus DecodeGR64BitRegisterClass(MCInst &Inst, uint64_t RegNo, - uint64_t Address, - const void *Decoder) { -- return decodeRegisterClass(Inst, RegNo, SystemZMC::GR64Regs); -+ return decodeRegisterClass(Inst, RegNo, SystemZMC::GR64Regs, 16); - } - - static DecodeStatus DecodeGR128BitRegisterClass(MCInst &Inst, uint64_t RegNo, - uint64_t Address, - const void *Decoder) { -- return decodeRegisterClass(Inst, RegNo, SystemZMC::GR128Regs); -+ return decodeRegisterClass(Inst, RegNo, SystemZMC::GR128Regs, 16); - } - - static DecodeStatus DecodeADDR64BitRegisterClass(MCInst &Inst, uint64_t RegNo, - uint64_t Address, - const void *Decoder) { -- return decodeRegisterClass(Inst, RegNo, SystemZMC::GR64Regs); -+ return decodeRegisterClass(Inst, RegNo, SystemZMC::GR64Regs, 16); - } - - static DecodeStatus DecodeFP32BitRegisterClass(MCInst &Inst, uint64_t RegNo, - uint64_t Address, - const void *Decoder) { -- return decodeRegisterClass(Inst, RegNo, SystemZMC::FP32Regs); -+ return decodeRegisterClass(Inst, RegNo, SystemZMC::FP32Regs, 16); - } - - static DecodeStatus DecodeFP64BitRegisterClass(MCInst &Inst, uint64_t RegNo, - uint64_t Address, - const void *Decoder) { -- return decodeRegisterClass(Inst, RegNo, SystemZMC::FP64Regs); -+ return decodeRegisterClass(Inst, RegNo, SystemZMC::FP64Regs, 16); - } - - static DecodeStatus DecodeFP128BitRegisterClass(MCInst &Inst, uint64_t RegNo, - uint64_t Address, - const void *Decoder) { -- return decodeRegisterClass(Inst, RegNo, SystemZMC::FP128Regs); -+ return decodeRegisterClass(Inst, RegNo, SystemZMC::FP128Regs, 16); -+} -+ -+static DecodeStatus DecodeVR32BitRegisterClass(MCInst &Inst, uint64_t RegNo, -+ uint64_t Address, -+ const void *Decoder) { -+ return decodeRegisterClass(Inst, RegNo, SystemZMC::VR32Regs, 32); -+} -+ -+static DecodeStatus DecodeVR64BitRegisterClass(MCInst &Inst, uint64_t RegNo, -+ uint64_t Address, -+ const void *Decoder) { -+ return decodeRegisterClass(Inst, RegNo, SystemZMC::VR64Regs, 32); -+} -+ -+static DecodeStatus DecodeVR128BitRegisterClass(MCInst &Inst, uint64_t RegNo, -+ uint64_t Address, -+ const void *Decoder) { -+ return decodeRegisterClass(Inst, RegNo, SystemZMC::VR128Regs, 32); - } - - template - static DecodeStatus decodeUImmOperand(MCInst &Inst, uint64_t Imm) { -- assert(isUInt(Imm) && "Invalid immediate"); -+ if (!isUInt(Imm)) -+ return MCDisassembler::Fail; - Inst.addOperand(MCOperand::CreateImm(Imm)); - return MCDisassembler::Success; - } - - template - static DecodeStatus decodeSImmOperand(MCInst &Inst, uint64_t Imm) { -- assert(isUInt(Imm) && "Invalid immediate"); -+ if (!isUInt(Imm)) -+ return MCDisassembler::Fail; - Inst.addOperand(MCOperand::CreateImm(SignExtend64(Imm))); - return MCDisassembler::Success; - } -@@ -124,6 +144,21 @@ static DecodeStatus decodeAccessRegOpera - return decodeUImmOperand<4>(Inst, Imm); - } - -+static DecodeStatus decodeU1ImmOperand(MCInst &Inst, uint64_t Imm, -+ uint64_t Address, const void *Decoder) { -+ return decodeUImmOperand<1>(Inst, Imm); -+} -+ -+static DecodeStatus decodeU2ImmOperand(MCInst &Inst, uint64_t Imm, -+ uint64_t Address, const void *Decoder) { -+ return decodeUImmOperand<2>(Inst, Imm); -+} -+ -+static DecodeStatus decodeU3ImmOperand(MCInst &Inst, uint64_t Imm, -+ uint64_t Address, const void *Decoder) { -+ return decodeUImmOperand<3>(Inst, Imm); -+} -+ - static DecodeStatus decodeU4ImmOperand(MCInst &Inst, uint64_t Imm, - uint64_t Address, const void *Decoder) { - return decodeUImmOperand<4>(Inst, Imm); -@@ -139,6 +174,11 @@ static DecodeStatus decodeU8ImmOperand(M - return decodeUImmOperand<8>(Inst, Imm); - } - -+static DecodeStatus decodeU12ImmOperand(MCInst &Inst, uint64_t Imm, -+ uint64_t Address, const void *Decoder) { -+ return decodeUImmOperand<12>(Inst, Imm); -+} -+ - static DecodeStatus decodeU16ImmOperand(MCInst &Inst, uint64_t Imm, - uint64_t Address, const void *Decoder) { - return decodeUImmOperand<16>(Inst, Imm); -@@ -240,6 +280,18 @@ static DecodeStatus decodeBDLAddr12Len8O - return MCDisassembler::Success; - } - -+static DecodeStatus decodeBDVAddr12Operand(MCInst &Inst, uint64_t Field, -+ const unsigned *Regs) { -+ uint64_t Index = Field >> 16; -+ uint64_t Base = (Field >> 12) & 0xf; -+ uint64_t Disp = Field & 0xfff; -+ assert(Index < 32 && "Invalid BDVAddr12"); -+ Inst.addOperand(MCOperand::CreateReg(Base == 0 ? 0 : Regs[Base])); -+ Inst.addOperand(MCOperand::CreateImm(Disp)); -+ Inst.addOperand(MCOperand::CreateReg(SystemZMC::VR128Regs[Index])); -+ return MCDisassembler::Success; -+} -+ - static DecodeStatus decodeBDAddr32Disp12Operand(MCInst &Inst, uint64_t Field, - uint64_t Address, - const void *Decoder) { -@@ -283,6 +335,12 @@ static DecodeStatus decodeBDLAddr64Disp1 - return decodeBDLAddr12Len8Operand(Inst, Field, SystemZMC::GR64Regs); - } - -+static DecodeStatus decodeBDVAddr64Disp12Operand(MCInst &Inst, uint64_t Field, -+ uint64_t Address, -+ const void *Decoder) { -+ return decodeBDVAddr12Operand(Inst, Field, SystemZMC::GR64Regs); -+} -+ - #include "SystemZGenDisassemblerTables.inc" - - DecodeStatus SystemZDisassembler::getInstruction(MCInst &MI, uint64_t &Size, -Index: llvm-36/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp -=================================================================== ---- llvm-36.orig/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp -+++ llvm-36/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp -@@ -10,6 +10,7 @@ - #include "SystemZInstPrinter.h" - #include "llvm/MC/MCExpr.h" - #include "llvm/MC/MCInstrInfo.h" -+#include "llvm/MC/MCSymbol.h" - #include "llvm/Support/raw_ostream.h" - - using namespace llvm; -@@ -21,13 +22,17 @@ using namespace llvm; - void SystemZInstPrinter::printAddress(unsigned Base, int64_t Disp, - unsigned Index, raw_ostream &O) { - O << Disp; -- if (Base) { -+ if (Base || Index) { - O << '('; -- if (Index) -- O << '%' << getRegisterName(Index) << ','; -- O << '%' << getRegisterName(Base) << ')'; -- } else -- assert(!Index && "Shouldn't have an index without a base"); -+ if (Index) { -+ O << '%' << getRegisterName(Index); -+ if (Base) -+ O << ','; -+ } -+ if (Base) -+ O << '%' << getRegisterName(Base); -+ O << ')'; -+ } - } - - void SystemZInstPrinter::printOperand(const MCOperand &MO, raw_ostream &O) { -@@ -51,60 +56,78 @@ void SystemZInstPrinter::printRegName(ra - O << '%' << getRegisterName(RegNo); - } - --void SystemZInstPrinter::printU4ImmOperand(const MCInst *MI, int OpNum, -- raw_ostream &O) { -+template -+void printUImmOperand(const MCInst *MI, int OpNum, raw_ostream &O) { - int64_t Value = MI->getOperand(OpNum).getImm(); -- assert(isUInt<4>(Value) && "Invalid u4imm argument"); -+ assert(isUInt(Value) && "Invalid uimm argument"); - O << Value; - } - --void SystemZInstPrinter::printU6ImmOperand(const MCInst *MI, int OpNum, -- raw_ostream &O) { -+template -+void printSImmOperand(const MCInst *MI, int OpNum, raw_ostream &O) { - int64_t Value = MI->getOperand(OpNum).getImm(); -- assert(isUInt<6>(Value) && "Invalid u6imm argument"); -+ assert(isInt(Value) && "Invalid simm argument"); - O << Value; - } - -+void SystemZInstPrinter::printU1ImmOperand(const MCInst *MI, int OpNum, -+ raw_ostream &O) { -+ printUImmOperand<1>(MI, OpNum, O); -+} -+ -+void SystemZInstPrinter::printU2ImmOperand(const MCInst *MI, int OpNum, -+ raw_ostream &O) { -+ printUImmOperand<2>(MI, OpNum, O); -+} -+ -+void SystemZInstPrinter::printU3ImmOperand(const MCInst *MI, int OpNum, -+ raw_ostream &O) { -+ printUImmOperand<3>(MI, OpNum, O); -+} -+ -+void SystemZInstPrinter::printU4ImmOperand(const MCInst *MI, int OpNum, -+ raw_ostream &O) { -+ printUImmOperand<4>(MI, OpNum, O); -+} -+ -+void SystemZInstPrinter::printU6ImmOperand(const MCInst *MI, int OpNum, -+ raw_ostream &O) { -+ printUImmOperand<6>(MI, OpNum, O); -+} -+ - void SystemZInstPrinter::printS8ImmOperand(const MCInst *MI, int OpNum, - raw_ostream &O) { -- int64_t Value = MI->getOperand(OpNum).getImm(); -- assert(isInt<8>(Value) && "Invalid s8imm argument"); -- O << Value; -+ printSImmOperand<8>(MI, OpNum, O); - } - - void SystemZInstPrinter::printU8ImmOperand(const MCInst *MI, int OpNum, - raw_ostream &O) { -- int64_t Value = MI->getOperand(OpNum).getImm(); -- assert(isUInt<8>(Value) && "Invalid u8imm argument"); -- O << Value; -+ printUImmOperand<8>(MI, OpNum, O); -+} -+ -+void SystemZInstPrinter::printU12ImmOperand(const MCInst *MI, int OpNum, -+ raw_ostream &O) { -+ printUImmOperand<12>(MI, OpNum, O); - } - - void SystemZInstPrinter::printS16ImmOperand(const MCInst *MI, int OpNum, - raw_ostream &O) { -- int64_t Value = MI->getOperand(OpNum).getImm(); -- assert(isInt<16>(Value) && "Invalid s16imm argument"); -- O << Value; -+ printSImmOperand<16>(MI, OpNum, O); - } - - void SystemZInstPrinter::printU16ImmOperand(const MCInst *MI, int OpNum, - raw_ostream &O) { -- int64_t Value = MI->getOperand(OpNum).getImm(); -- assert(isUInt<16>(Value) && "Invalid u16imm argument"); -- O << Value; -+ printUImmOperand<16>(MI, OpNum, O); - } - - void SystemZInstPrinter::printS32ImmOperand(const MCInst *MI, int OpNum, - raw_ostream &O) { -- int64_t Value = MI->getOperand(OpNum).getImm(); -- assert(isInt<32>(Value) && "Invalid s32imm argument"); -- O << Value; -+ printSImmOperand<32>(MI, OpNum, O); - } - - void SystemZInstPrinter::printU32ImmOperand(const MCInst *MI, int OpNum, - raw_ostream &O) { -- int64_t Value = MI->getOperand(OpNum).getImm(); -- assert(isUInt<32>(Value) && "Invalid u32imm argument"); -- O << Value; -+ printUImmOperand<32>(MI, OpNum, O); - } - - void SystemZInstPrinter::printAccessRegOperand(const MCInst *MI, int OpNum, -@@ -124,6 +147,29 @@ void SystemZInstPrinter::printPCRelOpera - O << *MO.getExpr(); - } - -+void SystemZInstPrinter::printPCRelTLSOperand(const MCInst *MI, int OpNum, -+ raw_ostream &O) { -+ // Output the PC-relative operand. -+ printPCRelOperand(MI, OpNum, O); -+ -+ // Output the TLS marker if present. -+ if ((unsigned)OpNum + 1 < MI->getNumOperands()) { -+ const MCOperand &MO = MI->getOperand(OpNum + 1); -+ const MCSymbolRefExpr &refExp = cast(*MO.getExpr()); -+ switch (refExp.getKind()) { -+ case MCSymbolRefExpr::VK_TLSGD: -+ O << ":tls_gdcall:"; -+ break; -+ case MCSymbolRefExpr::VK_TLSLDM: -+ O << ":tls_ldcall:"; -+ break; -+ default: -+ llvm_unreachable("Unexpected symbol kind"); -+ } -+ O << refExp.getSymbol().getName(); -+ } -+} -+ - void SystemZInstPrinter::printOperand(const MCInst *MI, int OpNum, - raw_ostream &O) { - printOperand(MI->getOperand(OpNum), O); -@@ -153,6 +199,13 @@ void SystemZInstPrinter::printBDLAddrOpe - O << ')'; - } - -+void SystemZInstPrinter::printBDVAddrOperand(const MCInst *MI, int OpNum, -+ raw_ostream &O) { -+ printAddress(MI->getOperand(OpNum).getReg(), -+ MI->getOperand(OpNum + 1).getImm(), -+ MI->getOperand(OpNum + 2).getReg(), O); -+} -+ - void SystemZInstPrinter::printCond4Operand(const MCInst *MI, int OpNum, - raw_ostream &O) { - static const char *const CondNames[] = { -Index: llvm-36/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h -=================================================================== ---- llvm-36.orig/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h -+++ llvm-36/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h -@@ -47,15 +47,21 @@ private: - void printBDAddrOperand(const MCInst *MI, int OpNum, raw_ostream &O); - void printBDXAddrOperand(const MCInst *MI, int OpNum, raw_ostream &O); - void printBDLAddrOperand(const MCInst *MI, int OpNum, raw_ostream &O); -+ void printBDVAddrOperand(const MCInst *MI, int OpNum, raw_ostream &O); -+ void printU1ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O); -+ void printU2ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O); -+ void printU3ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O); - void printU4ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O); - void printU6ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O); - void printS8ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O); - void printU8ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O); -+ void printU12ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O); - void printS16ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O); - void printU16ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O); - void printS32ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O); - void printU32ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O); - void printPCRelOperand(const MCInst *MI, int OpNum, raw_ostream &O); -+ void printPCRelTLSOperand(const MCInst *MI, int OpNum, raw_ostream &O); - void printAccessRegOperand(const MCInst *MI, int OpNum, raw_ostream &O); - - // Print the mnemonic for a condition-code mask ("ne", "lh", etc.) -Index: llvm-36/lib/Target/SystemZ/LLVMBuild.txt -=================================================================== ---- llvm-36.orig/lib/Target/SystemZ/LLVMBuild.txt -+++ llvm-36/lib/Target/SystemZ/LLVMBuild.txt -@@ -31,5 +31,5 @@ has_jit = 1 - type = Library - name = SystemZCodeGen - parent = SystemZ --required_libraries = AsmPrinter CodeGen Core MC SelectionDAG Support SystemZAsmPrinter SystemZDesc SystemZInfo Target -+required_libraries = Analysis AsmPrinter CodeGen Core MC SelectionDAG Support SystemZAsmPrinter SystemZDesc SystemZInfo Target - add_to_library_groups = SystemZ -Index: llvm-36/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp -=================================================================== ---- llvm-36.orig/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp -+++ llvm-36/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp -@@ -27,9 +27,10 @@ static uint64_t extractBitsForFixup(MCFi - switch (unsigned(Kind)) { - case SystemZ::FK_390_PC16DBL: - case SystemZ::FK_390_PC32DBL: -- case SystemZ::FK_390_PLT16DBL: -- case SystemZ::FK_390_PLT32DBL: - return (int64_t)Value / 2; -+ -+ case SystemZ::FK_390_TLS_CALL: -+ return 0; - } - - llvm_unreachable("Unknown fixup kind!"); -@@ -72,8 +73,7 @@ SystemZMCAsmBackend::getFixupKindInfo(MC - const static MCFixupKindInfo Infos[SystemZ::NumTargetFixupKinds] = { - { "FK_390_PC16DBL", 0, 16, MCFixupKindInfo::FKF_IsPCRel }, - { "FK_390_PC32DBL", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, -- { "FK_390_PLT16DBL", 0, 16, MCFixupKindInfo::FKF_IsPCRel }, -- { "FK_390_PLT32DBL", 0, 32, MCFixupKindInfo::FKF_IsPCRel } -+ { "FK_390_TLS_CALL", 0, 0, 0 } - }; - - if (Kind < FirstTargetFixupKind) -Index: llvm-36/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp -=================================================================== ---- llvm-36.orig/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp -+++ llvm-36/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp -@@ -70,24 +70,43 @@ private: - uint64_t getBDLAddr12Len8Encoding(const MCInst &MI, unsigned OpNum, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const; -+ uint64_t getBDVAddr12Encoding(const MCInst &MI, unsigned OpNum, -+ SmallVectorImpl &Fixups, -+ const MCSubtargetInfo &STI) const; - - // Operand OpNum of MI needs a PC-relative fixup of kind Kind at - // Offset bytes from the start of MI. Add the fixup to Fixups - // and return the in-place addend, which since we're a RELA target -- // is always 0. -+ // is always 0. If AllowTLS is true and optional operand OpNum + 1 -+ // is present, also emit a TLS call fixup for it. - uint64_t getPCRelEncoding(const MCInst &MI, unsigned OpNum, - SmallVectorImpl &Fixups, -- unsigned Kind, int64_t Offset) const; -+ unsigned Kind, int64_t Offset, -+ bool AllowTLS) const; - - uint64_t getPC16DBLEncoding(const MCInst &MI, unsigned OpNum, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { -- return getPCRelEncoding(MI, OpNum, Fixups, SystemZ::FK_390_PC16DBL, 2); -+ return getPCRelEncoding(MI, OpNum, Fixups, -+ SystemZ::FK_390_PC16DBL, 2, false); - } - uint64_t getPC32DBLEncoding(const MCInst &MI, unsigned OpNum, - SmallVectorImpl &Fixups, - const MCSubtargetInfo &STI) const { -- return getPCRelEncoding(MI, OpNum, Fixups, SystemZ::FK_390_PC32DBL, 2); -+ return getPCRelEncoding(MI, OpNum, Fixups, -+ SystemZ::FK_390_PC32DBL, 2, false); -+ } -+ uint64_t getPC16DBLTLSEncoding(const MCInst &MI, unsigned OpNum, -+ SmallVectorImpl &Fixups, -+ const MCSubtargetInfo &STI) const { -+ return getPCRelEncoding(MI, OpNum, Fixups, -+ SystemZ::FK_390_PC16DBL, 2, true); -+ } -+ uint64_t getPC32DBLTLSEncoding(const MCInst &MI, unsigned OpNum, -+ SmallVectorImpl &Fixups, -+ const MCSubtargetInfo &STI) const { -+ return getPCRelEncoding(MI, OpNum, Fixups, -+ SystemZ::FK_390_PC32DBL, 2, true); - } - }; - } // end anonymous namespace -@@ -178,10 +197,22 @@ getBDLAddr12Len8Encoding(const MCInst &M - return (Len << 16) | (Base << 12) | Disp; - } - -+uint64_t SystemZMCCodeEmitter:: -+getBDVAddr12Encoding(const MCInst &MI, unsigned OpNum, -+ SmallVectorImpl &Fixups, -+ const MCSubtargetInfo &STI) const { -+ uint64_t Base = getMachineOpValue(MI, MI.getOperand(OpNum), Fixups, STI); -+ uint64_t Disp = getMachineOpValue(MI, MI.getOperand(OpNum + 1), Fixups, STI); -+ uint64_t Index = getMachineOpValue(MI, MI.getOperand(OpNum + 2), Fixups, STI); -+ assert(isUInt<4>(Base) && isUInt<12>(Disp) && isUInt<5>(Index)); -+ return (Index << 16) | (Base << 12) | Disp; -+} -+ - uint64_t - SystemZMCCodeEmitter::getPCRelEncoding(const MCInst &MI, unsigned OpNum, - SmallVectorImpl &Fixups, -- unsigned Kind, int64_t Offset) const { -+ unsigned Kind, int64_t Offset, -+ bool AllowTLS) const { - const MCOperand &MO = MI.getOperand(OpNum); - const MCExpr *Expr; - if (MO.isImm()) -@@ -198,6 +229,13 @@ SystemZMCCodeEmitter::getPCRelEncoding(c - } - } - Fixups.push_back(MCFixup::Create(Offset, Expr, (MCFixupKind)Kind)); -+ -+ // Output the fixup for the TLS marker if present. -+ if (AllowTLS && OpNum + 1 < MI.getNumOperands()) { -+ const MCOperand &MOTLS = MI.getOperand(OpNum + 1); -+ Fixups.push_back(MCFixup::Create(0, MOTLS.getExpr(), -+ (MCFixupKind)SystemZ::FK_390_TLS_CALL)); -+ } - return 0; - } - -Index: llvm-36/lib/Target/SystemZ/MCTargetDesc/SystemZMCFixups.h -=================================================================== ---- llvm-36.orig/lib/Target/SystemZ/MCTargetDesc/SystemZMCFixups.h -+++ llvm-36/lib/Target/SystemZ/MCTargetDesc/SystemZMCFixups.h -@@ -18,8 +18,7 @@ enum FixupKind { - // These correspond directly to R_390_* relocations. - FK_390_PC16DBL = FirstTargetFixupKind, - FK_390_PC32DBL, -- FK_390_PLT16DBL, -- FK_390_PLT32DBL, -+ FK_390_TLS_CALL, - - // Marker - LastTargetFixupKind, -Index: llvm-36/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp -=================================================================== ---- llvm-36.orig/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp -+++ llvm-36/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp -@@ -55,8 +55,6 @@ static unsigned getPCRelReloc(unsigned K - case FK_Data_8: return ELF::R_390_PC64; - case SystemZ::FK_390_PC16DBL: return ELF::R_390_PC16DBL; - case SystemZ::FK_390_PC32DBL: return ELF::R_390_PC32DBL; -- case SystemZ::FK_390_PLT16DBL: return ELF::R_390_PLT16DBL; -- case SystemZ::FK_390_PLT32DBL: return ELF::R_390_PLT32DBL; - } - llvm_unreachable("Unsupported PC-relative address"); - } -@@ -70,6 +68,35 @@ static unsigned getTLSLEReloc(unsigned K - llvm_unreachable("Unsupported absolute address"); - } - -+// Return the R_390_TLS_LDO* relocation type for MCFixupKind Kind. -+static unsigned getTLSLDOReloc(unsigned Kind) { -+ switch (Kind) { -+ case FK_Data_4: return ELF::R_390_TLS_LDO32; -+ case FK_Data_8: return ELF::R_390_TLS_LDO64; -+ } -+ llvm_unreachable("Unsupported absolute address"); -+} -+ -+// Return the R_390_TLS_LDM* relocation type for MCFixupKind Kind. -+static unsigned getTLSLDMReloc(unsigned Kind) { -+ switch (Kind) { -+ case FK_Data_4: return ELF::R_390_TLS_LDM32; -+ case FK_Data_8: return ELF::R_390_TLS_LDM64; -+ case SystemZ::FK_390_TLS_CALL: return ELF::R_390_TLS_LDCALL; -+ } -+ llvm_unreachable("Unsupported absolute address"); -+} -+ -+// Return the R_390_TLS_GD* relocation type for MCFixupKind Kind. -+static unsigned getTLSGDReloc(unsigned Kind) { -+ switch (Kind) { -+ case FK_Data_4: return ELF::R_390_TLS_GD32; -+ case FK_Data_8: return ELF::R_390_TLS_GD64; -+ case SystemZ::FK_390_TLS_CALL: return ELF::R_390_TLS_GDCALL; -+ } -+ llvm_unreachable("Unsupported absolute address"); -+} -+ - // Return the PLT relocation counterpart of MCFixupKind Kind. - static unsigned getPLTReloc(unsigned Kind) { - switch (Kind) { -@@ -94,6 +121,23 @@ unsigned SystemZObjectWriter::GetRelocTy - assert(!IsPCRel && "NTPOFF shouldn't be PC-relative"); - return getTLSLEReloc(Kind); - -+ case MCSymbolRefExpr::VK_INDNTPOFF: -+ if (IsPCRel && Kind == SystemZ::FK_390_PC32DBL) -+ return ELF::R_390_TLS_IEENT; -+ llvm_unreachable("Only PC-relative INDNTPOFF accesses are supported for now"); -+ -+ case MCSymbolRefExpr::VK_DTPOFF: -+ assert(!IsPCRel && "DTPOFF shouldn't be PC-relative"); -+ return getTLSLDOReloc(Kind); -+ -+ case MCSymbolRefExpr::VK_TLSLDM: -+ assert(!IsPCRel && "TLSLDM shouldn't be PC-relative"); -+ return getTLSLDMReloc(Kind); -+ -+ case MCSymbolRefExpr::VK_TLSGD: -+ assert(!IsPCRel && "TLSGD shouldn't be PC-relative"); -+ return getTLSGDReloc(Kind); -+ - case MCSymbolRefExpr::VK_GOT: - if (IsPCRel && Kind == SystemZ::FK_390_PC32DBL) - return ELF::R_390_GOTENT; -Index: llvm-36/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp -=================================================================== ---- llvm-36.orig/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp -+++ llvm-36/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp -@@ -76,6 +76,39 @@ const unsigned SystemZMC::FP128Regs[16] - SystemZ::F12Q, SystemZ::F13Q, 0, 0 - }; - -+const unsigned SystemZMC::VR32Regs[32] = { -+ SystemZ::F0S, SystemZ::F1S, SystemZ::F2S, SystemZ::F3S, -+ SystemZ::F4S, SystemZ::F5S, SystemZ::F6S, SystemZ::F7S, -+ SystemZ::F8S, SystemZ::F9S, SystemZ::F10S, SystemZ::F11S, -+ SystemZ::F12S, SystemZ::F13S, SystemZ::F14S, SystemZ::F15S, -+ SystemZ::F16S, SystemZ::F17S, SystemZ::F18S, SystemZ::F19S, -+ SystemZ::F20S, SystemZ::F21S, SystemZ::F22S, SystemZ::F23S, -+ SystemZ::F24S, SystemZ::F25S, SystemZ::F26S, SystemZ::F27S, -+ SystemZ::F28S, SystemZ::F29S, SystemZ::F30S, SystemZ::F31S -+}; -+ -+const unsigned SystemZMC::VR64Regs[32] = { -+ SystemZ::F0D, SystemZ::F1D, SystemZ::F2D, SystemZ::F3D, -+ SystemZ::F4D, SystemZ::F5D, SystemZ::F6D, SystemZ::F7D, -+ SystemZ::F8D, SystemZ::F9D, SystemZ::F10D, SystemZ::F11D, -+ SystemZ::F12D, SystemZ::F13D, SystemZ::F14D, SystemZ::F15D, -+ SystemZ::F16D, SystemZ::F17D, SystemZ::F18D, SystemZ::F19D, -+ SystemZ::F20D, SystemZ::F21D, SystemZ::F22D, SystemZ::F23D, -+ SystemZ::F24D, SystemZ::F25D, SystemZ::F26D, SystemZ::F27D, -+ SystemZ::F28D, SystemZ::F29D, SystemZ::F30D, SystemZ::F31D -+}; -+ -+const unsigned SystemZMC::VR128Regs[32] = { -+ SystemZ::V0, SystemZ::V1, SystemZ::V2, SystemZ::V3, -+ SystemZ::V4, SystemZ::V5, SystemZ::V6, SystemZ::V7, -+ SystemZ::V8, SystemZ::V9, SystemZ::V10, SystemZ::V11, -+ SystemZ::V12, SystemZ::V13, SystemZ::V14, SystemZ::V15, -+ SystemZ::V16, SystemZ::V17, SystemZ::V18, SystemZ::V19, -+ SystemZ::V20, SystemZ::V21, SystemZ::V22, SystemZ::V23, -+ SystemZ::V24, SystemZ::V25, SystemZ::V26, SystemZ::V27, -+ SystemZ::V28, SystemZ::V29, SystemZ::V30, SystemZ::V31 -+}; -+ - unsigned SystemZMC::getFirstReg(unsigned Reg) { - static unsigned Map[SystemZ::NUM_TARGET_REGS]; - static bool Initialized = false; -@@ -85,10 +118,13 @@ unsigned SystemZMC::getFirstReg(unsigned - Map[GRH32Regs[I]] = I; - Map[GR64Regs[I]] = I; - Map[GR128Regs[I]] = I; -- Map[FP32Regs[I]] = I; -- Map[FP64Regs[I]] = I; - Map[FP128Regs[I]] = I; - } -+ for (unsigned I = 0; I < 32; ++I) { -+ Map[VR32Regs[I]] = I; -+ Map[VR64Regs[I]] = I; -+ Map[VR128Regs[I]] = I; -+ } - } - assert(Reg < SystemZ::NUM_TARGET_REGS); - return Map[Reg]; -Index: llvm-36/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h -=================================================================== ---- llvm-36.orig/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h -+++ llvm-36/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h -@@ -48,6 +48,9 @@ extern const unsigned GR128Regs[16]; - extern const unsigned FP32Regs[16]; - extern const unsigned FP64Regs[16]; - extern const unsigned FP128Regs[16]; -+extern const unsigned VR32Regs[32]; -+extern const unsigned VR64Regs[32]; -+extern const unsigned VR128Regs[32]; - - // Return the 0-based number of the first architectural register that - // contains the given LLVM register. E.g. R1D -> 1. -@@ -67,6 +70,11 @@ inline unsigned getRegAsGR32(unsigned Re - inline unsigned getRegAsGRH32(unsigned Reg) { - return GRH32Regs[getFirstReg(Reg)]; - } -+ -+// Return the given register as a VR128. -+inline unsigned getRegAsVR128(unsigned Reg) { -+ return VR128Regs[getFirstReg(Reg)]; -+} - } // end namespace SystemZMC - - MCCodeEmitter *createSystemZMCCodeEmitter(const MCInstrInfo &MCII, -Index: llvm-36/lib/Target/SystemZ/SystemZ.h -=================================================================== ---- llvm-36.orig/lib/Target/SystemZ/SystemZ.h -+++ llvm-36/lib/Target/SystemZ/SystemZ.h -@@ -21,6 +21,7 @@ - namespace llvm { - class SystemZTargetMachine; - class FunctionPass; -+class ImmutablePass; - - namespace SystemZ { - // Condition-code mask values. -@@ -68,6 +69,25 @@ const unsigned CCMASK_TM_MSB_0 = C - const unsigned CCMASK_TM_MSB_1 = CCMASK_2 | CCMASK_3; - const unsigned CCMASK_TM = CCMASK_ANY; - -+// Condition-code mask assignments for TRANSACTION_BEGIN. -+const unsigned CCMASK_TBEGIN_STARTED = CCMASK_0; -+const unsigned CCMASK_TBEGIN_INDETERMINATE = CCMASK_1; -+const unsigned CCMASK_TBEGIN_TRANSIENT = CCMASK_2; -+const unsigned CCMASK_TBEGIN_PERSISTENT = CCMASK_3; -+const unsigned CCMASK_TBEGIN = CCMASK_ANY; -+ -+// Condition-code mask assignments for TRANSACTION_END. -+const unsigned CCMASK_TEND_TX = CCMASK_0; -+const unsigned CCMASK_TEND_NOTX = CCMASK_2; -+const unsigned CCMASK_TEND = CCMASK_TEND_TX | CCMASK_TEND_NOTX; -+ -+// Condition-code mask assignments for vector comparisons (and similar -+// operations). -+const unsigned CCMASK_VCMP_ALL = CCMASK_0; -+const unsigned CCMASK_VCMP_MIXED = CCMASK_1; -+const unsigned CCMASK_VCMP_NONE = CCMASK_3; -+const unsigned CCMASK_VCMP = CCMASK_0 | CCMASK_1 | CCMASK_3; -+ - // The position of the low CC bit in an IPM result. - const unsigned IPM_CC = 28; - -@@ -75,6 +95,13 @@ const unsigned IPM_CC = 28; - const unsigned PFD_READ = 1; - const unsigned PFD_WRITE = 2; - -+// Number of bits in a vector register. -+const unsigned VectorBits = 128; -+ -+// Number of bytes in a vector register (and consequently the number of -+// bytes in a general permute vector). -+const unsigned VectorBytes = VectorBits / 8; -+ - // Return true if Val fits an LLILL operand. - static inline bool isImmLL(uint64_t Val) { - return (Val & ~0x000000000000ffffULL) == 0; -@@ -111,6 +138,9 @@ FunctionPass *createSystemZISelDag(Syste - FunctionPass *createSystemZElimComparePass(SystemZTargetMachine &TM); - FunctionPass *createSystemZShortenInstPass(SystemZTargetMachine &TM); - FunctionPass *createSystemZLongBranchPass(SystemZTargetMachine &TM); -+FunctionPass *createSystemZLDCleanupPass(SystemZTargetMachine &TM); -+ImmutablePass *createSystemZTargetTransformInfoPass( -+ const SystemZTargetMachine *TM); - } // end namespace llvm - - #endif -Index: llvm-36/lib/Target/SystemZ/SystemZ.td -=================================================================== ---- llvm-36.orig/lib/Target/SystemZ/SystemZ.td -+++ llvm-36/lib/Target/SystemZ/SystemZ.td -@@ -40,6 +40,7 @@ include "SystemZOperands.td" - include "SystemZPatterns.td" - include "SystemZInstrFormats.td" - include "SystemZInstrInfo.td" -+include "SystemZInstrVector.td" - include "SystemZInstrFP.td" - - def SystemZInstrInfo : InstrInfo {} -Index: llvm-36/lib/Target/SystemZ/SystemZAsmPrinter.cpp -=================================================================== ---- llvm-36.orig/lib/Target/SystemZ/SystemZAsmPrinter.cpp -+++ llvm-36/lib/Target/SystemZ/SystemZAsmPrinter.cpp -@@ -66,6 +66,41 @@ static MCInst lowerRIEfLow(const Machine - .addImm(MI->getOperand(5).getImm()); - } - -+static const MCSymbolRefExpr *getTLSGetOffset(MCContext &Context) { -+ StringRef Name = "__tls_get_offset"; -+ return MCSymbolRefExpr::Create(Context.GetOrCreateSymbol(Name), -+ MCSymbolRefExpr::VK_PLT, -+ Context); -+} -+ -+static const MCSymbolRefExpr *getGlobalOffsetTable(MCContext &Context) { -+ StringRef Name = "_GLOBAL_OFFSET_TABLE_"; -+ return MCSymbolRefExpr::Create(Context.GetOrCreateSymbol(Name), -+ MCSymbolRefExpr::VK_None, -+ Context); -+} -+ -+// MI loads the high part of a vector from memory. Return an instruction -+// that uses replicating vector load Opcode to do the same thing. -+static MCInst lowerSubvectorLoad(const MachineInstr *MI, unsigned Opcode) { -+ return MCInstBuilder(Opcode) -+ .addReg(SystemZMC::getRegAsVR128(MI->getOperand(0).getReg())) -+ .addReg(MI->getOperand(1).getReg()) -+ .addImm(MI->getOperand(2).getImm()) -+ .addReg(MI->getOperand(3).getReg()); -+} -+ -+// MI stores the high part of a vector to memory. Return an instruction -+// that uses elemental vector store Opcode to do the same thing. -+static MCInst lowerSubvectorStore(const MachineInstr *MI, unsigned Opcode) { -+ return MCInstBuilder(Opcode) -+ .addReg(SystemZMC::getRegAsVR128(MI->getOperand(0).getReg())) -+ .addReg(MI->getOperand(1).getReg()) -+ .addImm(MI->getOperand(2).getImm()) -+ .addReg(MI->getOperand(3).getReg()) -+ .addImm(0); -+} -+ - void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) { - SystemZMCInstLower Lower(MF->getContext(), *this); - MCInst LoweredMI; -@@ -95,6 +130,26 @@ void SystemZAsmPrinter::EmitInstruction( - LoweredMI = MCInstBuilder(SystemZ::BR).addReg(SystemZ::R1D); - break; - -+ case SystemZ::TLS_GDCALL: -+ LoweredMI = MCInstBuilder(SystemZ::BRASL) -+ .addReg(SystemZ::R14D) -+ .addExpr(getTLSGetOffset(MF->getContext())) -+ .addExpr(Lower.getExpr(MI->getOperand(0), MCSymbolRefExpr::VK_TLSGD)); -+ break; -+ -+ case SystemZ::TLS_LDCALL: -+ LoweredMI = MCInstBuilder(SystemZ::BRASL) -+ .addReg(SystemZ::R14D) -+ .addExpr(getTLSGetOffset(MF->getContext())) -+ .addExpr(Lower.getExpr(MI->getOperand(0), MCSymbolRefExpr::VK_TLSLDM)); -+ break; -+ -+ case SystemZ::GOT: -+ LoweredMI = MCInstBuilder(SystemZ::LARL) -+ .addReg(MI->getOperand(0).getReg()) -+ .addExpr(getGlobalOffsetTable(MF->getContext())); -+ break; -+ - case SystemZ::IILF64: - LoweredMI = MCInstBuilder(SystemZ::IILF) - .addReg(SystemZMC::getRegAsGR32(MI->getOperand(0).getReg())) -@@ -117,6 +172,51 @@ void SystemZAsmPrinter::EmitInstruction( - LoweredMI = lowerRIEfLow(MI, SystemZ::RISBLG); - break; - -+ case SystemZ::VLVGP32: -+ LoweredMI = MCInstBuilder(SystemZ::VLVGP) -+ .addReg(MI->getOperand(0).getReg()) -+ .addReg(SystemZMC::getRegAsGR64(MI->getOperand(1).getReg())) -+ .addReg(SystemZMC::getRegAsGR64(MI->getOperand(2).getReg())); -+ break; -+ -+ case SystemZ::VLR32: -+ case SystemZ::VLR64: -+ LoweredMI = MCInstBuilder(SystemZ::VLR) -+ .addReg(SystemZMC::getRegAsVR128(MI->getOperand(0).getReg())) -+ .addReg(SystemZMC::getRegAsVR128(MI->getOperand(1).getReg())); -+ break; -+ -+ case SystemZ::VL32: -+ LoweredMI = lowerSubvectorLoad(MI, SystemZ::VLREPF); -+ break; -+ -+ case SystemZ::VL64: -+ LoweredMI = lowerSubvectorLoad(MI, SystemZ::VLREPG); -+ break; -+ -+ case SystemZ::VST32: -+ LoweredMI = lowerSubvectorStore(MI, SystemZ::VSTEF); -+ break; -+ -+ case SystemZ::VST64: -+ LoweredMI = lowerSubvectorStore(MI, SystemZ::VSTEG); -+ break; -+ -+ case SystemZ::LFER: -+ LoweredMI = MCInstBuilder(SystemZ::VLGVF) -+ .addReg(SystemZMC::getRegAsGR64(MI->getOperand(0).getReg())) -+ .addReg(SystemZMC::getRegAsVR128(MI->getOperand(1).getReg())) -+ .addReg(0).addImm(0); -+ break; -+ -+ case SystemZ::LEFR: -+ LoweredMI = MCInstBuilder(SystemZ::VLVGF) -+ .addReg(SystemZMC::getRegAsVR128(MI->getOperand(0).getReg())) -+ .addReg(SystemZMC::getRegAsVR128(MI->getOperand(0).getReg())) -+ .addReg(MI->getOperand(1).getReg()) -+ .addReg(0).addImm(0); -+ break; -+ - #define LOWER_LOW(NAME) \ - case SystemZ::NAME##64: LoweredMI = lowerRILow(MI, SystemZ::NAME); break - -@@ -172,6 +272,9 @@ void SystemZAsmPrinter::EmitInstruction( - static MCSymbolRefExpr::VariantKind - getModifierVariantKind(SystemZCP::SystemZCPModifier Modifier) { - switch (Modifier) { -+ case SystemZCP::TLSGD: return MCSymbolRefExpr::VK_TLSGD; -+ case SystemZCP::TLSLDM: return MCSymbolRefExpr::VK_TLSLDM; -+ case SystemZCP::DTPOFF: return MCSymbolRefExpr::VK_DTPOFF; - case SystemZCP::NTPOFF: return MCSymbolRefExpr::VK_NTPOFF; - } - llvm_unreachable("Invalid SystemCPModifier!"); -Index: llvm-36/lib/Target/SystemZ/SystemZCallingConv.h -=================================================================== ---- llvm-36.orig/lib/Target/SystemZ/SystemZCallingConv.h -+++ llvm-36/lib/Target/SystemZ/SystemZCallingConv.h -@@ -10,6 +10,9 @@ - #ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZCALLINGCONV_H - #define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZCALLINGCONV_H - -+#include "llvm/ADT/SmallVector.h" -+#include "llvm/CodeGen/CallingConvLower.h" -+ - namespace llvm { - namespace SystemZ { - const unsigned NumArgGPRs = 5; -@@ -18,6 +21,64 @@ namespace SystemZ { - const unsigned NumArgFPRs = 4; - extern const unsigned ArgFPRs[NumArgFPRs]; - } // end namespace SystemZ -+ -+class SystemZCCState : public CCState { -+private: -+ /// Records whether the value was a fixed argument. -+ /// See ISD::OutputArg::IsFixed. -+ SmallVector ArgIsFixed; -+ -+ /// Records whether the value was widened from a short vector type. -+ SmallVector ArgIsShortVector; -+ -+ // Check whether ArgVT is a short vector type. -+ bool IsShortVectorType(EVT ArgVT) { -+ return ArgVT.isVector() && ArgVT.getStoreSize() <= 8; -+ } -+ -+public: -+ SystemZCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF, -+ SmallVectorImpl &locs, LLVMContext &C) -+ : CCState(CC, isVarArg, MF, locs, C) {} -+ -+ void AnalyzeFormalArguments(const SmallVectorImpl &Ins, -+ CCAssignFn Fn) { -+ // Formal arguments are always fixed. -+ ArgIsFixed.clear(); -+ for (unsigned i = 0; i < Ins.size(); ++i) -+ ArgIsFixed.push_back(true); -+ // Record whether the call operand was a short vector. -+ ArgIsShortVector.clear(); -+ for (unsigned i = 0; i < Ins.size(); ++i) -+ ArgIsShortVector.push_back(IsShortVectorType(Ins[i].ArgVT)); -+ -+ CCState::AnalyzeFormalArguments(Ins, Fn); -+ } -+ -+ void AnalyzeCallOperands(const SmallVectorImpl &Outs, -+ CCAssignFn Fn) { -+ // Record whether the call operand was a fixed argument. -+ ArgIsFixed.clear(); -+ for (unsigned i = 0; i < Outs.size(); ++i) -+ ArgIsFixed.push_back(Outs[i].IsFixed); -+ // Record whether the call operand was a short vector. -+ ArgIsShortVector.clear(); -+ for (unsigned i = 0; i < Outs.size(); ++i) -+ ArgIsShortVector.push_back(IsShortVectorType(Outs[i].ArgVT)); -+ -+ CCState::AnalyzeCallOperands(Outs, Fn); -+ } -+ -+ // This version of AnalyzeCallOperands in the base class is not usable -+ // since we must provide a means of accessing ISD::OutputArg::IsFixed. -+ void AnalyzeCallOperands(const SmallVectorImpl &Outs, -+ SmallVectorImpl &Flags, -+ CCAssignFn Fn) = delete; -+ -+ bool IsFixed(unsigned ValNo) { return ArgIsFixed[ValNo]; } -+ bool IsShortVector(unsigned ValNo) { return ArgIsShortVector[ValNo]; } -+}; -+ - } // end namespace llvm - - #endif -Index: llvm-36/lib/Target/SystemZ/SystemZCallingConv.td -=================================================================== ---- llvm-36.orig/lib/Target/SystemZ/SystemZCallingConv.td -+++ llvm-36/lib/Target/SystemZ/SystemZCallingConv.td -@@ -12,6 +12,20 @@ - class CCIfExtend - : CCIf<"ArgFlags.isSExt() || ArgFlags.isZExt()", A>; - -+class CCIfSubtarget -+ : CCIf" -+ "(State.getMachineFunction().getSubtarget()).", F), -+ A>; -+ -+// Match if this specific argument is a fixed (i.e. named) argument. -+class CCIfFixed -+ : CCIf<"static_cast(&State)->IsFixed(ValNo)", A>; -+ -+// Match if this specific argument was widened from a short vector type. -+class CCIfShortVector -+ : CCIf<"static_cast(&State)->IsShortVector(ValNo)", A>; -+ -+ - //===----------------------------------------------------------------------===// - // z/Linux return value calling convention - //===----------------------------------------------------------------------===// -@@ -31,7 +45,14 @@ def RetCC_SystemZ : CallingConv<[ - // doesn't care about the ABI. All floating-point argument registers - // are call-clobbered, so we can use all of them here. - CCIfType<[f32], CCAssignToReg<[F0S, F2S, F4S, F6S]>>, -- CCIfType<[f64], CCAssignToReg<[F0D, F2D, F4D, F6D]>> -+ CCIfType<[f64], CCAssignToReg<[F0D, F2D, F4D, F6D]>>, -+ -+ // Similarly for vectors, with V24 being the ABI-compliant choice. -+ // Sub-128 vectors are returned in the same way, but they're widened -+ // to one of these types during type legalization. -+ CCIfSubtarget<"hasVector()", -+ CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], -+ CCAssignToReg<[V24, V26, V28, V30, V25, V27, V29, V31]>>> - - // ABI-compliant code returns long double by reference, but that conversion - // is left to higher-level code. Perhaps we could add an f128 definition -@@ -60,6 +81,25 @@ def CC_SystemZ : CallingConv<[ - CCIfType<[f32], CCAssignToReg<[F0S, F2S, F4S, F6S]>>, - CCIfType<[f64], CCAssignToReg<[F0D, F2D, F4D, F6D]>>, - -+ // The first 8 named vector arguments are passed in V24-V31. Sub-128 vectors -+ // are passed in the same way, but they're widened to one of these types -+ // during type legalization. -+ CCIfSubtarget<"hasVector()", -+ CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], -+ CCIfFixed>>>, -+ -+ // However, sub-128 vectors which need to go on the stack occupy just a -+ // single 8-byte-aligned 8-byte stack slot. Pass as i64. -+ CCIfSubtarget<"hasVector()", -+ CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], -+ CCIfShortVector>>>, -+ -+ // Other vector arguments are passed in 8-byte-aligned 16-byte stack slots. -+ CCIfSubtarget<"hasVector()", -+ CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], -+ CCAssignToStack<16, 8>>>, -+ - // Other arguments are passed in 8-byte-aligned 8-byte stack slots. - CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>> - ]>; -Index: llvm-36/lib/Target/SystemZ/SystemZConstantPoolValue.cpp -=================================================================== ---- llvm-36.orig/lib/Target/SystemZ/SystemZConstantPoolValue.cpp -+++ llvm-36/lib/Target/SystemZ/SystemZConstantPoolValue.cpp -@@ -28,6 +28,11 @@ SystemZConstantPoolValue::Create(const G - - unsigned SystemZConstantPoolValue::getRelocationInfo() const { - switch (Modifier) { -+ case SystemZCP::TLSGD: -+ case SystemZCP::TLSLDM: -+ case SystemZCP::DTPOFF: -+ // May require a dynamic relocation. -+ return 2; - case SystemZCP::NTPOFF: - // May require a relocation, but the relocations are always resolved - // by the static linker. -Index: llvm-36/lib/Target/SystemZ/SystemZConstantPoolValue.h -=================================================================== ---- llvm-36.orig/lib/Target/SystemZ/SystemZConstantPoolValue.h -+++ llvm-36/lib/Target/SystemZ/SystemZConstantPoolValue.h -@@ -19,13 +19,17 @@ class GlobalValue; - - namespace SystemZCP { - enum SystemZCPModifier { -+ TLSGD, -+ TLSLDM, -+ DTPOFF, - NTPOFF - }; - } // end namespace SystemZCP - - /// A SystemZ-specific constant pool value. At present, the only --/// defined constant pool values are offsets of thread-local variables --/// (written x@NTPOFF). -+/// defined constant pool values are module IDs or offsets of -+/// thread-local variables (written x@TLSGD, x@TLSLDM, x@DTPOFF, -+/// or x@NTPOFF). - class SystemZConstantPoolValue : public MachineConstantPoolValue { - const GlobalValue *GV; - SystemZCP::SystemZCPModifier Modifier; -Index: llvm-36/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp -=================================================================== ---- llvm-36.orig/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp -+++ llvm-36/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp -@@ -256,6 +256,13 @@ class SystemZDAGToDAGISel : public Selec - Addr, Base, Disp, Index); - } - -+ // Try to match Addr as an address with a base, 12-bit displacement -+ // and index, where the index is element Elem of a vector. -+ // Return true on success, storing the base, displacement and vector -+ // in Base, Disp and Index respectively. -+ bool selectBDVAddr12Only(SDValue Addr, SDValue Elem, SDValue &Base, -+ SDValue &Disp, SDValue &Index) const; -+ - // Check whether (or Op (and X InsertMask)) is effectively an insertion - // of X into bits InsertMask of some Y != Op. Return true if so and - // set Op to that Y. -@@ -293,6 +300,12 @@ class SystemZDAGToDAGISel : public Selec - SDNode *splitLargeImmediate(unsigned Opcode, SDNode *Node, SDValue Op0, - uint64_t UpperVal, uint64_t LowerVal); - -+ // Try to use gather instruction Opcode to implement vector insertion N. -+ SDNode *tryGather(SDNode *N, unsigned Opcode); -+ -+ // Try to use scatter instruction Opcode to implement store Store. -+ SDNode *tryScatter(StoreSDNode *Store, unsigned Opcode); -+ - // Return true if Load and Store are loads and stores of the same size - // and are guaranteed not to overlap. Such operations can be implemented - // using block (SS-format) instructions. -@@ -643,6 +656,30 @@ bool SystemZDAGToDAGISel::selectBDXAddr( - return true; - } - -+bool SystemZDAGToDAGISel::selectBDVAddr12Only(SDValue Addr, SDValue Elem, -+ SDValue &Base, -+ SDValue &Disp, -+ SDValue &Index) const { -+ SDValue Regs[2]; -+ if (selectBDXAddr12Only(Addr, Regs[0], Disp, Regs[1]) && -+ Regs[0].getNode() && Regs[1].getNode()) { -+ for (unsigned int I = 0; I < 2; ++I) { -+ Base = Regs[I]; -+ Index = Regs[1 - I]; -+ // We can't tell here whether the index vector has the right type -+ // for the access; the caller needs to do that instead. -+ if (Index.getOpcode() == ISD::ZERO_EXTEND) -+ Index = Index.getOperand(0); -+ if (Index.getOpcode() == ISD::EXTRACT_VECTOR_ELT && -+ Index.getOperand(1) == Elem) { -+ Index = Index.getOperand(0); -+ return true; -+ } -+ } -+ } -+ return false; -+} -+ - bool SystemZDAGToDAGISel::detectOrAndInsertion(SDValue &Op, - uint64_t InsertMask) const { - // We're only interested in cases where the insertion is into some operand -@@ -896,6 +933,9 @@ SDNode *SystemZDAGToDAGISel::tryRISBGZer - } - - unsigned Opcode = SystemZ::RISBG; -+ // Prefer RISBGN if available, since it does not clobber CC. -+ if (Subtarget.hasMiscellaneousExtensions()) -+ Opcode = SystemZ::RISBGN; - EVT OpcodeVT = MVT::i64; - if (VT == MVT::i32 && Subtarget.hasHighWord()) { - Opcode = SystemZ::RISBMux; -@@ -943,9 +983,13 @@ SDNode *SystemZDAGToDAGISel::tryRxSBG(SD - - // See whether we can avoid an AND in the first operand by converting - // ROSBG to RISBG. -- if (Opcode == SystemZ::ROSBG && detectOrAndInsertion(Op0, RxSBG[I].Mask)) -+ if (Opcode == SystemZ::ROSBG && detectOrAndInsertion(Op0, RxSBG[I].Mask)) { - Opcode = SystemZ::RISBG; -- -+ // Prefer RISBGN if available, since it does not clobber CC. -+ if (Subtarget.hasMiscellaneousExtensions()) -+ Opcode = SystemZ::RISBGN; -+ } -+ - EVT VT = N->getValueType(0); - SDValue Ops[5] = { - convertTo(SDLoc(N), MVT::i64, Op0), -@@ -973,6 +1017,71 @@ SDNode *SystemZDAGToDAGISel::splitLargeI - return Or.getNode(); - } - -+SDNode *SystemZDAGToDAGISel::tryGather(SDNode *N, unsigned Opcode) { -+ SDValue ElemV = N->getOperand(2); -+ auto *ElemN = dyn_cast(ElemV); -+ if (!ElemN) -+ return 0; -+ -+ unsigned Elem = ElemN->getZExtValue(); -+ EVT VT = N->getValueType(0); -+ if (Elem >= VT.getVectorNumElements()) -+ return 0; -+ -+ auto *Load = dyn_cast(N->getOperand(1)); -+ if (!Load || !Load->hasOneUse()) -+ return 0; -+ if (Load->getMemoryVT().getSizeInBits() != -+ Load->getValueType(0).getSizeInBits()) -+ return 0; -+ -+ SDValue Base, Disp, Index; -+ if (!selectBDVAddr12Only(Load->getBasePtr(), ElemV, Base, Disp, Index) || -+ Index.getValueType() != VT.changeVectorElementTypeToInteger()) -+ return 0; -+ -+ SDLoc DL(Load); -+ SDValue Ops[] = { -+ N->getOperand(0), Base, Disp, Index, -+ CurDAG->getTargetConstant(Elem, MVT::i32), Load->getChain() -+ }; -+ SDNode *Res = CurDAG->getMachineNode(Opcode, DL, VT, MVT::Other, Ops); -+ ReplaceUses(SDValue(Load, 1), SDValue(Res, 1)); -+ return Res; -+} -+ -+SDNode *SystemZDAGToDAGISel::tryScatter(StoreSDNode *Store, unsigned Opcode) { -+ SDValue Value = Store->getValue(); -+ if (Value.getOpcode() != ISD::EXTRACT_VECTOR_ELT) -+ return 0; -+ if (Store->getMemoryVT().getSizeInBits() != -+ Value.getValueType().getSizeInBits()) -+ return 0; -+ -+ SDValue ElemV = Value.getOperand(1); -+ auto *ElemN = dyn_cast(ElemV); -+ if (!ElemN) -+ return 0; -+ -+ SDValue Vec = Value.getOperand(0); -+ EVT VT = Vec.getValueType(); -+ unsigned Elem = ElemN->getZExtValue(); -+ if (Elem >= VT.getVectorNumElements()) -+ return 0; -+ -+ SDValue Base, Disp, Index; -+ if (!selectBDVAddr12Only(Store->getBasePtr(), ElemV, Base, Disp, Index) || -+ Index.getValueType() != VT.changeVectorElementTypeToInteger()) -+ return 0; -+ -+ SDLoc DL(Store); -+ SDValue Ops[] = { -+ Vec, Base, Disp, Index, CurDAG->getTargetConstant(Elem, MVT::i32), -+ Store->getChain() -+ }; -+ return CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops); -+} -+ - bool SystemZDAGToDAGISel::canUseBlockOperation(StoreSDNode *Store, - LoadSDNode *Load) const { - // Check that the two memory operands have the same size. -@@ -1109,6 +1218,26 @@ SDNode *SystemZDAGToDAGISel::Select(SDNo - } - break; - } -+ -+ case ISD::INSERT_VECTOR_ELT: { -+ EVT VT = Node->getValueType(0); -+ unsigned ElemBitSize = VT.getVectorElementType().getSizeInBits(); -+ if (ElemBitSize == 32) -+ ResNode = tryGather(Node, SystemZ::VGEF); -+ else if (ElemBitSize == 64) -+ ResNode = tryGather(Node, SystemZ::VGEG); -+ break; -+ } -+ -+ case ISD::STORE: { -+ auto *Store = cast(Node); -+ unsigned ElemBitSize = Store->getValue().getValueType().getSizeInBits(); -+ if (ElemBitSize == 32) -+ ResNode = tryScatter(Store, SystemZ::VSCEF); -+ else if (ElemBitSize == 64) -+ ResNode = tryScatter(Store, SystemZ::VSCEG); -+ break; -+ } - } - - // Select the default instruction -Index: llvm-36/lib/Target/SystemZ/SystemZISelLowering.cpp -=================================================================== ---- llvm-36.orig/lib/Target/SystemZ/SystemZISelLowering.cpp -+++ llvm-36/lib/Target/SystemZ/SystemZISelLowering.cpp -@@ -20,6 +20,7 @@ - #include "llvm/CodeGen/MachineInstrBuilder.h" - #include "llvm/CodeGen/MachineRegisterInfo.h" - #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" -+#include "llvm/IR/Intrinsics.h" - #include - - using namespace llvm; -@@ -90,11 +91,25 @@ SystemZTargetLowering::SystemZTargetLowe - addRegisterClass(MVT::i32, &SystemZ::GRX32BitRegClass); - else - addRegisterClass(MVT::i32, &SystemZ::GR32BitRegClass); -- addRegisterClass(MVT::i64, &SystemZ::GR64BitRegClass); -- addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass); -- addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass); -+ addRegisterClass(MVT::i64, &SystemZ::GR64BitRegClass); -+ if (Subtarget.hasVector()) { -+ addRegisterClass(MVT::f32, &SystemZ::VR32BitRegClass); -+ addRegisterClass(MVT::f64, &SystemZ::VR64BitRegClass); -+ } else { -+ addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass); -+ addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass); -+ } - addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass); - -+ if (Subtarget.hasVector()) { -+ addRegisterClass(MVT::v16i8, &SystemZ::VR128BitRegClass); -+ addRegisterClass(MVT::v8i16, &SystemZ::VR128BitRegClass); -+ addRegisterClass(MVT::v4i32, &SystemZ::VR128BitRegClass); -+ addRegisterClass(MVT::v2i64, &SystemZ::VR128BitRegClass); -+ addRegisterClass(MVT::v4f32, &SystemZ::VR128BitRegClass); -+ addRegisterClass(MVT::v2f64, &SystemZ::VR128BitRegClass); -+ } -+ - // Compute derived properties from the register classes - computeRegisterProperties(); - -@@ -110,7 +125,7 @@ SystemZTargetLowering::SystemZTargetLowe - setSchedulingPreference(Sched::RegPressure); - - setBooleanContents(ZeroOrOneBooleanContent); -- setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct? -+ setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); - - // Instructions are strings of 2-byte aligned 2-byte values. - setMinFunctionAlignment(2); -@@ -163,8 +178,13 @@ SystemZTargetLowering::SystemZTargetLowe - // available, or if the operand is constant. - setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom); - -+ // Use POPCNT on z196 and above. -+ if (Subtarget.hasPopulationCount()) -+ setOperationAction(ISD::CTPOP, VT, Custom); -+ else -+ setOperationAction(ISD::CTPOP, VT, Expand); -+ - // No special instructions for these. -- setOperationAction(ISD::CTPOP, VT, Expand); - setOperationAction(ISD::CTTZ, VT, Expand); - setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand); - setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand); -@@ -244,6 +264,90 @@ SystemZTargetLowering::SystemZTargetLowe - // Handle prefetches with PFD or PFDRL. - setOperationAction(ISD::PREFETCH, MVT::Other, Custom); - -+ for (MVT VT : MVT::vector_valuetypes()) { -+ // Assume by default that all vector operations need to be expanded. -+ for (unsigned Opcode = 0; Opcode < ISD::BUILTIN_OP_END; ++Opcode) -+ if (getOperationAction(Opcode, VT) == Legal) -+ setOperationAction(Opcode, VT, Expand); -+ -+ // Likewise all truncating stores and extending loads. -+ for (MVT InnerVT : MVT::vector_valuetypes()) { -+ setTruncStoreAction(VT, InnerVT, Expand); -+ setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand); -+ setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand); -+ setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand); -+ } -+ -+ if (isTypeLegal(VT)) { -+ // These operations are legal for anything that can be stored in a -+ // vector register, even if there is no native support for the format -+ // as such. In particular, we can do these for v4f32 even though there -+ // are no specific instructions for that format. -+ setOperationAction(ISD::LOAD, VT, Legal); -+ setOperationAction(ISD::STORE, VT, Legal); -+ setOperationAction(ISD::VSELECT, VT, Legal); -+ setOperationAction(ISD::BITCAST, VT, Legal); -+ setOperationAction(ISD::UNDEF, VT, Legal); -+ -+ // Likewise, except that we need to replace the nodes with something -+ // more specific. -+ setOperationAction(ISD::BUILD_VECTOR, VT, Custom); -+ setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); -+ } -+ } -+ -+ // Handle integer vector types. -+ for (MVT VT : MVT::integer_vector_valuetypes()) { -+ if (isTypeLegal(VT)) { -+ // These operations have direct equivalents. -+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal); -+ setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Legal); -+ setOperationAction(ISD::ADD, VT, Legal); -+ setOperationAction(ISD::SUB, VT, Legal); -+ if (VT != MVT::v2i64) -+ setOperationAction(ISD::MUL, VT, Legal); -+ setOperationAction(ISD::AND, VT, Legal); -+ setOperationAction(ISD::OR, VT, Legal); -+ setOperationAction(ISD::XOR, VT, Legal); -+ setOperationAction(ISD::CTPOP, VT, Custom); -+ setOperationAction(ISD::CTTZ, VT, Legal); -+ setOperationAction(ISD::CTLZ, VT, Legal); -+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Custom); -+ setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Custom); -+ -+ // Convert a GPR scalar to a vector by inserting it into element 0. -+ setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); -+ -+ // Use a series of unpacks for extensions. -+ setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom); -+ setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom); -+ -+ // Detect shifts by a scalar amount and convert them into -+ // V*_BY_SCALAR. -+ setOperationAction(ISD::SHL, VT, Custom); -+ setOperationAction(ISD::SRA, VT, Custom); -+ setOperationAction(ISD::SRL, VT, Custom); -+ -+ // At present ROTL isn't matched by DAGCombiner. ROTR should be -+ // converted into ROTL. -+ setOperationAction(ISD::ROTL, VT, Expand); -+ setOperationAction(ISD::ROTR, VT, Expand); -+ -+ // Map SETCCs onto one of VCE, VCH or VCHL, swapping the operands -+ // and inverting the result as necessary. -+ setOperationAction(ISD::SETCC, VT, Custom); -+ } -+ } -+ -+ if (Subtarget.hasVector()) { -+ // There should be no need to check for float types other than v2f64 -+ // since <2 x f32> isn't a legal type. -+ setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal); -+ setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal); -+ setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal); -+ setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal); -+ } -+ - // Handle floating-point types. - for (unsigned I = MVT::FIRST_FP_VALUETYPE; - I <= MVT::LAST_FP_VALUETYPE; -@@ -269,6 +373,36 @@ SystemZTargetLowering::SystemZTargetLowe - } - } - -+ // Handle floating-point vector types. -+ if (Subtarget.hasVector()) { -+ // Scalar-to-vector conversion is just a subreg. -+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal); -+ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal); -+ -+ // Some insertions and extractions can be done directly but others -+ // need to go via integers. -+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); -+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f64, Custom); -+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom); -+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom); -+ -+ // These operations have direct equivalents. -+ setOperationAction(ISD::FADD, MVT::v2f64, Legal); -+ setOperationAction(ISD::FNEG, MVT::v2f64, Legal); -+ setOperationAction(ISD::FSUB, MVT::v2f64, Legal); -+ setOperationAction(ISD::FMUL, MVT::v2f64, Legal); -+ setOperationAction(ISD::FMA, MVT::v2f64, Legal); -+ setOperationAction(ISD::FDIV, MVT::v2f64, Legal); -+ setOperationAction(ISD::FABS, MVT::v2f64, Legal); -+ setOperationAction(ISD::FSQRT, MVT::v2f64, Legal); -+ setOperationAction(ISD::FRINT, MVT::v2f64, Legal); -+ setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal); -+ setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal); -+ setOperationAction(ISD::FCEIL, MVT::v2f64, Legal); -+ setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal); -+ setOperationAction(ISD::FROUND, MVT::v2f64, Legal); -+ } -+ - // We have fused multiply-addition for f32 and f64 but not f128. - setOperationAction(ISD::FMA, MVT::f32, Legal); - setOperationAction(ISD::FMA, MVT::f64, Legal); -@@ -287,8 +421,10 @@ SystemZTargetLowering::SystemZTargetLowe - - // We have 64-bit FPR<->GPR moves, but need special handling for - // 32-bit forms. -- setOperationAction(ISD::BITCAST, MVT::i32, Custom); -- setOperationAction(ISD::BITCAST, MVT::f32, Custom); -+ if (!Subtarget.hasVector()) { -+ setOperationAction(ISD::BITCAST, MVT::i32, Custom); -+ setOperationAction(ISD::BITCAST, MVT::f32, Custom); -+ } - - // VASTART and VACOPY need to deal with the SystemZ-specific varargs - // structure, but VAEND is a no-op. -@@ -298,6 +434,13 @@ SystemZTargetLowering::SystemZTargetLowe - - // Codes for which we want to perform some z-specific combinations. - setTargetDAGCombine(ISD::SIGN_EXTEND); -+ setTargetDAGCombine(ISD::STORE); -+ setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT); -+ setTargetDAGCombine(ISD::FP_ROUND); -+ -+ // Handle intrinsics. -+ setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); -+ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); - - // We want to use MVC in preference to even a single load/store pair. - MaxStoresPerMemcpy = 0; -@@ -342,6 +485,16 @@ bool SystemZTargetLowering::isFPImmLegal - return Imm.isZero() || Imm.isNegZero(); - } - -+bool SystemZTargetLowering::isLegalICmpImmediate(int64_t Imm) const { -+ // We can use CGFI or CLGFI. -+ return isInt<32>(Imm) || isUInt<32>(Imm); -+} -+ -+bool SystemZTargetLowering::isLegalAddImmediate(int64_t Imm) const { -+ // We can use ALGFI or SLGFI. -+ return isUInt<32>(Imm) || isUInt<32>(-Imm); -+} -+ - bool SystemZTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, - unsigned, - unsigned, -@@ -623,6 +776,24 @@ bool SystemZTargetLowering::mayBeEmitted - return true; - } - -+// We do not yet support 128-bit single-element vector types. If the user -+// attempts to use such types as function argument or return type, prefer -+// to error out instead of emitting code violating the ABI. -+static void VerifyVectorType(MVT VT, EVT ArgVT) { -+ if (ArgVT.isVector() && !VT.isVector()) -+ report_fatal_error("Unsupported vector argument or return type"); -+} -+ -+static void VerifyVectorTypes(const SmallVectorImpl &Ins) { -+ for (unsigned i = 0; i < Ins.size(); ++i) -+ VerifyVectorType(Ins[i].VT, Ins[i].ArgVT); -+} -+ -+static void VerifyVectorTypes(const SmallVectorImpl &Outs) { -+ for (unsigned i = 0; i < Outs.size(); ++i) -+ VerifyVectorType(Outs[i].VT, Outs[i].ArgVT); -+} -+ - // Value is a value that has been passed to us in the location described by VA - // (and so has type VA.getLocVT()). Convert Value to VA.getValVT(), chaining - // any loads onto Chain. -@@ -643,7 +814,15 @@ static SDValue convertLocVTToValVT(Selec - else if (VA.getLocInfo() == CCValAssign::Indirect) - Value = DAG.getLoad(VA.getValVT(), DL, Chain, Value, - MachinePointerInfo(), false, false, false, 0); -- else -+ else if (VA.getLocInfo() == CCValAssign::BCvt) { -+ // If this is a short vector argument loaded from the stack, -+ // extend from i64 to full vector size and then bitcast. -+ assert(VA.getLocVT() == MVT::i64); -+ assert(VA.getValVT().isVector()); -+ Value = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v2i64, -+ Value, DAG.getUNDEF(MVT::i64)); -+ Value = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Value); -+ } else - assert(VA.getLocInfo() == CCValAssign::Full && "Unsupported getLocInfo"); - return Value; - } -@@ -660,6 +839,14 @@ static SDValue convertValVTToLocVT(Selec - return DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Value); - case CCValAssign::AExt: - return DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Value); -+ case CCValAssign::BCvt: -+ // If this is a short vector argument to be stored to the stack, -+ // bitcast to v2i64 and then extract first element. -+ assert(VA.getLocVT() == MVT::i64); -+ assert(VA.getValVT().isVector()); -+ Value = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Value); -+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VA.getLocVT(), Value, -+ DAG.getConstant(0, MVT::i32)); - case CCValAssign::Full: - return Value; - default: -@@ -680,9 +867,13 @@ LowerFormalArguments(SDValue Chain, Call - auto *TFL = static_cast( - DAG.getSubtarget().getFrameLowering()); - -+ // Detect unsupported vector argument types. -+ if (Subtarget.hasVector()) -+ VerifyVectorTypes(Ins); -+ - // Assign locations to all of the incoming arguments. - SmallVector ArgLocs; -- CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); -+ SystemZCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); - CCInfo.AnalyzeFormalArguments(Ins, CC_SystemZ); - - unsigned NumFixedGPRs = 0; -@@ -714,6 +905,14 @@ LowerFormalArguments(SDValue Chain, Call - NumFixedFPRs += 1; - RC = &SystemZ::FP64BitRegClass; - break; -+ case MVT::v16i8: -+ case MVT::v8i16: -+ case MVT::v4i32: -+ case MVT::v2i64: -+ case MVT::v4f32: -+ case MVT::v2f64: -+ RC = &SystemZ::VR128BitRegClass; -+ break; - } - - unsigned VReg = MRI.createVirtualRegister(RC); -@@ -818,9 +1017,15 @@ SystemZTargetLowering::LowerCall(CallLow - MachineFunction &MF = DAG.getMachineFunction(); - EVT PtrVT = getPointerTy(); - -+ // Detect unsupported vector argument and return types. -+ if (Subtarget.hasVector()) { -+ VerifyVectorTypes(Outs); -+ VerifyVectorTypes(Ins); -+ } -+ - // Analyze the operands of the call, assigning locations to each operand. - SmallVector ArgLocs; -- CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); -+ SystemZCCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); - ArgCCInfo.AnalyzeCallOperands(Outs, CC_SystemZ); - - // We don't support GuaranteedTailCallOpt, only automatically-detected -@@ -972,6 +1177,10 @@ SystemZTargetLowering::LowerReturn(SDVal - SDLoc DL, SelectionDAG &DAG) const { - MachineFunction &MF = DAG.getMachineFunction(); - -+ // Detect unsupported vector return types. -+ if (Subtarget.hasVector()) -+ VerifyVectorTypes(Outs); -+ - // Assign locations to each returned value. - SmallVector RetLocs; - CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext()); -@@ -1015,6 +1224,207 @@ prepareVolatileOrAtomicLoad(SDValue Chai - return DAG.getNode(SystemZISD::SERIALIZE, DL, MVT::Other, Chain); - } - -+// Return true if Op is an intrinsic node with chain that returns the CC value -+// as its only (other) argument. Provide the associated SystemZISD opcode and -+// the mask of valid CC values if so. -+static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode, -+ unsigned &CCValid) { -+ unsigned Id = cast(Op.getOperand(1))->getZExtValue(); -+ switch (Id) { -+ case Intrinsic::s390_tbegin: -+ Opcode = SystemZISD::TBEGIN; -+ CCValid = SystemZ::CCMASK_TBEGIN; -+ return true; -+ -+ case Intrinsic::s390_tbegin_nofloat: -+ Opcode = SystemZISD::TBEGIN_NOFLOAT; -+ CCValid = SystemZ::CCMASK_TBEGIN; -+ return true; -+ -+ case Intrinsic::s390_tend: -+ Opcode = SystemZISD::TEND; -+ CCValid = SystemZ::CCMASK_TEND; -+ return true; -+ -+ default: -+ return false; -+ } -+} -+ -+// Return true if Op is an intrinsic node without chain that returns the -+// CC value as its final argument. Provide the associated SystemZISD -+// opcode and the mask of valid CC values if so. -+static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid) { -+ unsigned Id = cast(Op.getOperand(0))->getZExtValue(); -+ switch (Id) { -+ case Intrinsic::s390_vpkshs: -+ case Intrinsic::s390_vpksfs: -+ case Intrinsic::s390_vpksgs: -+ Opcode = SystemZISD::PACKS_CC; -+ CCValid = SystemZ::CCMASK_VCMP; -+ return true; -+ -+ case Intrinsic::s390_vpklshs: -+ case Intrinsic::s390_vpklsfs: -+ case Intrinsic::s390_vpklsgs: -+ Opcode = SystemZISD::PACKLS_CC; -+ CCValid = SystemZ::CCMASK_VCMP; -+ return true; -+ -+ case Intrinsic::s390_vceqbs: -+ case Intrinsic::s390_vceqhs: -+ case Intrinsic::s390_vceqfs: -+ case Intrinsic::s390_vceqgs: -+ Opcode = SystemZISD::VICMPES; -+ CCValid = SystemZ::CCMASK_VCMP; -+ return true; -+ -+ case Intrinsic::s390_vchbs: -+ case Intrinsic::s390_vchhs: -+ case Intrinsic::s390_vchfs: -+ case Intrinsic::s390_vchgs: -+ Opcode = SystemZISD::VICMPHS; -+ CCValid = SystemZ::CCMASK_VCMP; -+ return true; -+ -+ case Intrinsic::s390_vchlbs: -+ case Intrinsic::s390_vchlhs: -+ case Intrinsic::s390_vchlfs: -+ case Intrinsic::s390_vchlgs: -+ Opcode = SystemZISD::VICMPHLS; -+ CCValid = SystemZ::CCMASK_VCMP; -+ return true; -+ -+ case Intrinsic::s390_vtm: -+ Opcode = SystemZISD::VTM; -+ CCValid = SystemZ::CCMASK_VCMP; -+ return true; -+ -+ case Intrinsic::s390_vfaebs: -+ case Intrinsic::s390_vfaehs: -+ case Intrinsic::s390_vfaefs: -+ Opcode = SystemZISD::VFAE_CC; -+ CCValid = SystemZ::CCMASK_ANY; -+ return true; -+ -+ case Intrinsic::s390_vfaezbs: -+ case Intrinsic::s390_vfaezhs: -+ case Intrinsic::s390_vfaezfs: -+ Opcode = SystemZISD::VFAEZ_CC; -+ CCValid = SystemZ::CCMASK_ANY; -+ return true; -+ -+ case Intrinsic::s390_vfeebs: -+ case Intrinsic::s390_vfeehs: -+ case Intrinsic::s390_vfeefs: -+ Opcode = SystemZISD::VFEE_CC; -+ CCValid = SystemZ::CCMASK_ANY; -+ return true; -+ -+ case Intrinsic::s390_vfeezbs: -+ case Intrinsic::s390_vfeezhs: -+ case Intrinsic::s390_vfeezfs: -+ Opcode = SystemZISD::VFEEZ_CC; -+ CCValid = SystemZ::CCMASK_ANY; -+ return true; -+ -+ case Intrinsic::s390_vfenebs: -+ case Intrinsic::s390_vfenehs: -+ case Intrinsic::s390_vfenefs: -+ Opcode = SystemZISD::VFENE_CC; -+ CCValid = SystemZ::CCMASK_ANY; -+ return true; -+ -+ case Intrinsic::s390_vfenezbs: -+ case Intrinsic::s390_vfenezhs: -+ case Intrinsic::s390_vfenezfs: -+ Opcode = SystemZISD::VFENEZ_CC; -+ CCValid = SystemZ::CCMASK_ANY; -+ return true; -+ -+ case Intrinsic::s390_vistrbs: -+ case Intrinsic::s390_vistrhs: -+ case Intrinsic::s390_vistrfs: -+ Opcode = SystemZISD::VISTR_CC; -+ CCValid = SystemZ::CCMASK_0 | SystemZ::CCMASK_3; -+ return true; -+ -+ case Intrinsic::s390_vstrcbs: -+ case Intrinsic::s390_vstrchs: -+ case Intrinsic::s390_vstrcfs: -+ Opcode = SystemZISD::VSTRC_CC; -+ CCValid = SystemZ::CCMASK_ANY; -+ return true; -+ -+ case Intrinsic::s390_vstrczbs: -+ case Intrinsic::s390_vstrczhs: -+ case Intrinsic::s390_vstrczfs: -+ Opcode = SystemZISD::VSTRCZ_CC; -+ CCValid = SystemZ::CCMASK_ANY; -+ return true; -+ -+ case Intrinsic::s390_vfcedbs: -+ Opcode = SystemZISD::VFCMPES; -+ CCValid = SystemZ::CCMASK_VCMP; -+ return true; -+ -+ case Intrinsic::s390_vfchdbs: -+ Opcode = SystemZISD::VFCMPHS; -+ CCValid = SystemZ::CCMASK_VCMP; -+ return true; -+ -+ case Intrinsic::s390_vfchedbs: -+ Opcode = SystemZISD::VFCMPHES; -+ CCValid = SystemZ::CCMASK_VCMP; -+ return true; -+ -+ case Intrinsic::s390_vftcidb: -+ Opcode = SystemZISD::VFTCI; -+ CCValid = SystemZ::CCMASK_VCMP; -+ return true; -+ -+ default: -+ return false; -+ } -+} -+ -+// Emit an intrinsic with chain with a glued value instead of its CC result. -+static SDValue emitIntrinsicWithChainAndGlue(SelectionDAG &DAG, SDValue Op, -+ unsigned Opcode) { -+ // Copy all operands except the intrinsic ID. -+ unsigned NumOps = Op.getNumOperands(); -+ SmallVector Ops; -+ Ops.reserve(NumOps - 1); -+ Ops.push_back(Op.getOperand(0)); -+ for (unsigned I = 2; I < NumOps; ++I) -+ Ops.push_back(Op.getOperand(I)); -+ -+ assert(Op->getNumValues() == 2 && "Expected only CC result and chain"); -+ SDVTList RawVTs = DAG.getVTList(MVT::Other, MVT::Glue); -+ SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), RawVTs, Ops); -+ SDValue OldChain = SDValue(Op.getNode(), 1); -+ SDValue NewChain = SDValue(Intr.getNode(), 0); -+ DAG.ReplaceAllUsesOfValueWith(OldChain, NewChain); -+ return Intr; -+} -+ -+// Emit an intrinsic with a glued value instead of its CC result. -+static SDValue emitIntrinsicWithGlue(SelectionDAG &DAG, SDValue Op, -+ unsigned Opcode) { -+ // Copy all operands except the intrinsic ID. -+ unsigned NumOps = Op.getNumOperands(); -+ SmallVector Ops; -+ Ops.reserve(NumOps - 1); -+ for (unsigned I = 1; I < NumOps; ++I) -+ Ops.push_back(Op.getOperand(I)); -+ -+ if (Op->getNumValues() == 1) -+ return DAG.getNode(Opcode, SDLoc(Op), MVT::Glue, Ops); -+ assert(Op->getNumValues() == 2 && "Expected exactly one non-CC result"); -+ SDVTList RawVTs = DAG.getVTList(Op->getValueType(0), MVT::Glue); -+ return DAG.getNode(Opcode, SDLoc(Op), RawVTs, Ops); -+} -+ - // CC is a comparison that will be implemented using an integer or - // floating-point comparison. Return the condition code mask for - // a branch on true. In the integer case, CCMASK_CMP_UO is set for -@@ -1529,6 +1939,8 @@ static void adjustForTestUnderMask(Selec - MaskVal = -(CmpVal & -CmpVal); - NewC.ICmpType = SystemZICMP::UnsignedOnly; - } -+ if (!MaskVal) -+ return; - - // Check whether the combination of mask, comparison value and comparison - // type are suitable. -@@ -1570,9 +1982,57 @@ static void adjustForTestUnderMask(Selec - C.CCMask = NewCCMask; - } - -+// Return a Comparison that tests the condition-code result of intrinsic -+// node Call against constant integer CC using comparison code Cond. -+// Opcode is the opcode of the SystemZISD operation for the intrinsic -+// and CCValid is the set of possible condition-code results. -+static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode, -+ SDValue Call, unsigned CCValid, uint64_t CC, -+ ISD::CondCode Cond) { -+ Comparison C(Call, SDValue()); -+ C.Opcode = Opcode; -+ C.CCValid = CCValid; -+ if (Cond == ISD::SETEQ) -+ // bit 3 for CC==0, bit 0 for CC==3, always false for CC>3. -+ C.CCMask = CC < 4 ? 1 << (3 - CC) : 0; -+ else if (Cond == ISD::SETNE) -+ // ...and the inverse of that. -+ C.CCMask = CC < 4 ? ~(1 << (3 - CC)) : -1; -+ else if (Cond == ISD::SETLT || Cond == ISD::SETULT) -+ // bits above bit 3 for CC==0 (always false), bits above bit 0 for CC==3, -+ // always true for CC>3. -+ C.CCMask = CC < 4 ? -1 << (4 - CC) : -1; -+ else if (Cond == ISD::SETGE || Cond == ISD::SETUGE) -+ // ...and the inverse of that. -+ C.CCMask = CC < 4 ? ~(-1 << (4 - CC)) : 0; -+ else if (Cond == ISD::SETLE || Cond == ISD::SETULE) -+ // bit 3 and above for CC==0, bit 0 and above for CC==3 (always true), -+ // always true for CC>3. -+ C.CCMask = CC < 4 ? -1 << (3 - CC) : -1; -+ else if (Cond == ISD::SETGT || Cond == ISD::SETUGT) -+ // ...and the inverse of that. -+ C.CCMask = CC < 4 ? ~(-1 << (3 - CC)) : 0; -+ else -+ llvm_unreachable("Unexpected integer comparison type"); -+ C.CCMask &= CCValid; -+ return C; -+} -+ - // Decide how to implement a comparison of type Cond between CmpOp0 with CmpOp1. - static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1, - ISD::CondCode Cond) { -+ if (CmpOp1.getOpcode() == ISD::Constant) { -+ uint64_t Constant = cast(CmpOp1)->getZExtValue(); -+ unsigned Opcode, CCValid; -+ if (CmpOp0.getOpcode() == ISD::INTRINSIC_W_CHAIN && -+ CmpOp0.getResNo() == 0 && CmpOp0->hasNUsesOfValue(1, 0) && -+ isIntrinsicWithCCAndChain(CmpOp0, Opcode, CCValid)) -+ return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, Constant, Cond); -+ if (CmpOp0.getOpcode() == ISD::INTRINSIC_WO_CHAIN && -+ CmpOp0.getResNo() == CmpOp0->getNumValues() - 1 && -+ isIntrinsicWithCC(CmpOp0, Opcode, CCValid)) -+ return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, Constant, Cond); -+ } - Comparison C(CmpOp0, CmpOp1); - C.CCMask = CCMaskForCondCode(Cond); - if (C.Op0.getValueType().isFloatingPoint()) { -@@ -1614,6 +2074,20 @@ static Comparison getCmp(SelectionDAG &D - - // Emit the comparison instruction described by C. - static SDValue emitCmp(SelectionDAG &DAG, SDLoc DL, Comparison &C) { -+ if (!C.Op1.getNode()) { -+ SDValue Op; -+ switch (C.Op0.getOpcode()) { -+ case ISD::INTRINSIC_W_CHAIN: -+ Op = emitIntrinsicWithChainAndGlue(DAG, C.Op0, C.Opcode); -+ break; -+ case ISD::INTRINSIC_WO_CHAIN: -+ Op = emitIntrinsicWithGlue(DAG, C.Op0, C.Opcode); -+ break; -+ default: -+ llvm_unreachable("Invalid comparison operands"); -+ } -+ return SDValue(Op.getNode(), Op->getNumValues() - 1); -+ } - if (C.Opcode == SystemZISD::ICMP) - return DAG.getNode(SystemZISD::ICMP, DL, MVT::Glue, C.Op0, C.Op1, - DAG.getConstant(C.ICmpType, MVT::i32)); -@@ -1682,12 +2156,142 @@ static SDValue emitSETCC(SelectionDAG &D - return Result; - } - -+// Return the SystemISD vector comparison operation for CC, or 0 if it cannot -+// be done directly. IsFP is true if CC is for a floating-point rather than -+// integer comparison. -+static unsigned getVectorComparison(ISD::CondCode CC, bool IsFP) { -+ switch (CC) { -+ case ISD::SETOEQ: -+ case ISD::SETEQ: -+ return IsFP ? SystemZISD::VFCMPE : SystemZISD::VICMPE; -+ -+ case ISD::SETOGE: -+ case ISD::SETGE: -+ return IsFP ? SystemZISD::VFCMPHE : 0; -+ -+ case ISD::SETOGT: -+ case ISD::SETGT: -+ return IsFP ? SystemZISD::VFCMPH : SystemZISD::VICMPH; -+ -+ case ISD::SETUGT: -+ return IsFP ? 0 : SystemZISD::VICMPHL; -+ -+ default: -+ return 0; -+ } -+} -+ -+// Return the SystemZISD vector comparison operation for CC or its inverse, -+// or 0 if neither can be done directly. Indicate in Invert whether the -+// result is for the inverse of CC. IsFP is true if CC is for a -+// floating-point rather than integer comparison. -+static unsigned getVectorComparisonOrInvert(ISD::CondCode CC, bool IsFP, -+ bool &Invert) { -+ if (unsigned Opcode = getVectorComparison(CC, IsFP)) { -+ Invert = false; -+ return Opcode; -+ } -+ -+ CC = ISD::getSetCCInverse(CC, !IsFP); -+ if (unsigned Opcode = getVectorComparison(CC, IsFP)) { -+ Invert = true; -+ return Opcode; -+ } -+ -+ return 0; -+} -+ -+// Return a v2f64 that contains the extended form of elements Start and Start+1 -+// of v4f32 value Op. -+static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, SDLoc DL, -+ SDValue Op) { -+ int Mask[] = { Start, -1, Start + 1, -1 }; -+ Op = DAG.getVectorShuffle(MVT::v4f32, DL, Op, DAG.getUNDEF(MVT::v4f32), Mask); -+ return DAG.getNode(SystemZISD::VEXTEND, DL, MVT::v2f64, Op); -+} -+ -+// Build a comparison of vectors CmpOp0 and CmpOp1 using opcode Opcode, -+// producing a result of type VT. -+static SDValue getVectorCmp(SelectionDAG &DAG, unsigned Opcode, SDLoc DL, -+ EVT VT, SDValue CmpOp0, SDValue CmpOp1) { -+ // There is no hardware support for v4f32, so extend the vector into -+ // two v2f64s and compare those. -+ if (CmpOp0.getValueType() == MVT::v4f32) { -+ SDValue H0 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp0); -+ SDValue L0 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp0); -+ SDValue H1 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp1); -+ SDValue L1 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp1); -+ SDValue HRes = DAG.getNode(Opcode, DL, MVT::v2i64, H0, H1); -+ SDValue LRes = DAG.getNode(Opcode, DL, MVT::v2i64, L0, L1); -+ return DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes); -+ } -+ return DAG.getNode(Opcode, DL, VT, CmpOp0, CmpOp1); -+} -+ -+// Lower a vector comparison of type CC between CmpOp0 and CmpOp1, producing -+// an integer mask of type VT. -+static SDValue lowerVectorSETCC(SelectionDAG &DAG, SDLoc DL, EVT VT, -+ ISD::CondCode CC, SDValue CmpOp0, -+ SDValue CmpOp1) { -+ bool IsFP = CmpOp0.getValueType().isFloatingPoint(); -+ bool Invert = false; -+ SDValue Cmp; -+ switch (CC) { -+ // Handle tests for order using (or (ogt y x) (oge x y)). -+ case ISD::SETUO: -+ Invert = true; -+ case ISD::SETO: { -+ assert(IsFP && "Unexpected integer comparison"); -+ SDValue LT = getVectorCmp(DAG, SystemZISD::VFCMPH, DL, VT, CmpOp1, CmpOp0); -+ SDValue GE = getVectorCmp(DAG, SystemZISD::VFCMPHE, DL, VT, CmpOp0, CmpOp1); -+ Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GE); -+ break; -+ } -+ -+ // Handle <> tests using (or (ogt y x) (ogt x y)). -+ case ISD::SETUEQ: -+ Invert = true; -+ case ISD::SETONE: { -+ assert(IsFP && "Unexpected integer comparison"); -+ SDValue LT = getVectorCmp(DAG, SystemZISD::VFCMPH, DL, VT, CmpOp1, CmpOp0); -+ SDValue GT = getVectorCmp(DAG, SystemZISD::VFCMPH, DL, VT, CmpOp0, CmpOp1); -+ Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GT); -+ break; -+ } -+ -+ // Otherwise a single comparison is enough. It doesn't really -+ // matter whether we try the inversion or the swap first, since -+ // there are no cases where both work. -+ default: -+ if (unsigned Opcode = getVectorComparisonOrInvert(CC, IsFP, Invert)) -+ Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp0, CmpOp1); -+ else { -+ CC = ISD::getSetCCSwappedOperands(CC); -+ if (unsigned Opcode = getVectorComparisonOrInvert(CC, IsFP, Invert)) -+ Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp1, CmpOp0); -+ else -+ llvm_unreachable("Unhandled comparison"); -+ } -+ break; -+ } -+ if (Invert) { -+ SDValue Mask = DAG.getNode(SystemZISD::BYTE_MASK, DL, MVT::v16i8, -+ DAG.getConstant(65535, MVT::i32)); -+ Mask = DAG.getNode(ISD::BITCAST, DL, VT, Mask); -+ Cmp = DAG.getNode(ISD::XOR, DL, VT, Cmp, Mask); -+ } -+ return Cmp; -+} -+ - SDValue SystemZTargetLowering::lowerSETCC(SDValue Op, - SelectionDAG &DAG) const { - SDValue CmpOp0 = Op.getOperand(0); - SDValue CmpOp1 = Op.getOperand(1); - ISD::CondCode CC = cast(Op.getOperand(2))->get(); - SDLoc DL(Op); -+ EVT VT = Op.getValueType(); -+ if (VT.isVector()) -+ return lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1); - - Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC)); - SDValue Glue = emitCmp(DAG, DL, C); -@@ -1695,7 +2299,6 @@ SDValue SystemZTargetLowering::lowerSETC - } - - SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const { -- SDValue Chain = Op.getOperand(0); - ISD::CondCode CC = cast(Op.getOperand(1))->get(); - SDValue CmpOp0 = Op.getOperand(2); - SDValue CmpOp1 = Op.getOperand(3); -@@ -1705,7 +2308,7 @@ SDValue SystemZTargetLowering::lowerBR_C - Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC)); - SDValue Glue = emitCmp(DAG, DL, C); - return DAG.getNode(SystemZISD::BR_CCMASK, DL, Op.getValueType(), -- Chain, DAG.getConstant(C.CCValid, MVT::i32), -+ Op.getOperand(0), DAG.getConstant(C.CCValid, MVT::i32), - DAG.getConstant(C.CCMask, MVT::i32), Dest, Glue); - } - -@@ -1831,6 +2434,52 @@ SDValue SystemZTargetLowering::lowerGlob - return Result; - } - -+SDValue SystemZTargetLowering::lowerTLSGetOffset(GlobalAddressSDNode *Node, -+ SelectionDAG &DAG, -+ unsigned Opcode, -+ SDValue GOTOffset) const { -+ SDLoc DL(Node); -+ EVT PtrVT = getPointerTy(); -+ SDValue Chain = DAG.getEntryNode(); -+ SDValue Glue; -+ -+ // __tls_get_offset takes the GOT offset in %r2 and the GOT in %r12. -+ SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT); -+ Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R12D, GOT, Glue); -+ Glue = Chain.getValue(1); -+ Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R2D, GOTOffset, Glue); -+ Glue = Chain.getValue(1); -+ -+ // The first call operand is the chain and the second is the TLS symbol. -+ SmallVector Ops; -+ Ops.push_back(Chain); -+ Ops.push_back(DAG.getTargetGlobalAddress(Node->getGlobal(), DL, -+ Node->getValueType(0), -+ 0, 0)); -+ -+ // Add argument registers to the end of the list so that they are -+ // known live into the call. -+ Ops.push_back(DAG.getRegister(SystemZ::R2D, PtrVT)); -+ Ops.push_back(DAG.getRegister(SystemZ::R12D, PtrVT)); -+ -+ // Add a register mask operand representing the call-preserved registers. -+ const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); -+ const uint32_t *Mask = TRI->getCallPreservedMask(CallingConv::C); -+ assert(Mask && "Missing call preserved mask for calling convention"); -+ Ops.push_back(DAG.getRegisterMask(Mask)); -+ -+ // Glue the call to the argument copies. -+ Ops.push_back(Glue); -+ -+ // Emit the call. -+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); -+ Chain = DAG.getNode(Opcode, DL, NodeTys, Ops); -+ Glue = Chain.getValue(1); -+ -+ // Copy the return value from %r2. -+ return DAG.getCopyFromReg(Chain, DL, SystemZ::R2D, PtrVT, Glue); -+} -+ - SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node, - SelectionDAG &DAG) const { - SDLoc DL(Node); -@@ -1838,9 +2487,6 @@ SDValue SystemZTargetLowering::lowerGlob - EVT PtrVT = getPointerTy(); - TLSModel::Model model = DAG.getTarget().getTLSModel(GV); - -- if (model != TLSModel::LocalExec) -- llvm_unreachable("only local-exec TLS mode supported"); -- - // The high part of the thread pointer is in access register 0. - SDValue TPHi = DAG.getNode(SystemZISD::EXTRACT_ACCESS, DL, MVT::i32, - DAG.getConstant(0, MVT::i32)); -@@ -1856,15 +2502,79 @@ SDValue SystemZTargetLowering::lowerGlob - DAG.getConstant(32, PtrVT)); - SDValue TP = DAG.getNode(ISD::OR, DL, PtrVT, TPHiShifted, TPLo); - -- // Get the offset of GA from the thread pointer. -- SystemZConstantPoolValue *CPV = -- SystemZConstantPoolValue::Create(GV, SystemZCP::NTPOFF); -- -- // Force the offset into the constant pool and load it from there. -- SDValue CPAddr = DAG.getConstantPool(CPV, PtrVT, 8); -- SDValue Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), -- CPAddr, MachinePointerInfo::getConstantPool(), -- false, false, false, 0); -+ // Get the offset of GA from the thread pointer, based on the TLS model. -+ SDValue Offset; -+ switch (model) { -+ case TLSModel::GeneralDynamic: { -+ // Load the GOT offset of the tls_index (module ID / per-symbol offset). -+ SystemZConstantPoolValue *CPV = -+ SystemZConstantPoolValue::Create(GV, SystemZCP::TLSGD); -+ -+ Offset = DAG.getConstantPool(CPV, PtrVT, 8); -+ Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), -+ Offset, MachinePointerInfo::getConstantPool(), -+ false, false, false, 0); -+ -+ // Call __tls_get_offset to retrieve the offset. -+ Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_GDCALL, Offset); -+ break; -+ } -+ -+ case TLSModel::LocalDynamic: { -+ // Load the GOT offset of the module ID. -+ SystemZConstantPoolValue *CPV = -+ SystemZConstantPoolValue::Create(GV, SystemZCP::TLSLDM); -+ -+ Offset = DAG.getConstantPool(CPV, PtrVT, 8); -+ Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), -+ Offset, MachinePointerInfo::getConstantPool(), -+ false, false, false, 0); -+ -+ // Call __tls_get_offset to retrieve the module base offset. -+ Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_LDCALL, Offset); -+ -+ // Note: The SystemZLDCleanupPass will remove redundant computations -+ // of the module base offset. Count total number of local-dynamic -+ // accesses to trigger execution of that pass. -+ SystemZMachineFunctionInfo* MFI = -+ DAG.getMachineFunction().getInfo(); -+ MFI->incNumLocalDynamicTLSAccesses(); -+ -+ // Add the per-symbol offset. -+ CPV = SystemZConstantPoolValue::Create(GV, SystemZCP::DTPOFF); -+ -+ SDValue DTPOffset = DAG.getConstantPool(CPV, PtrVT, 8); -+ DTPOffset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), -+ DTPOffset, MachinePointerInfo::getConstantPool(), -+ false, false, false, 0); -+ -+ Offset = DAG.getNode(ISD::ADD, DL, PtrVT, Offset, DTPOffset); -+ break; -+ } -+ -+ case TLSModel::InitialExec: { -+ // Load the offset from the GOT. -+ Offset = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, -+ SystemZII::MO_INDNTPOFF); -+ Offset = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Offset); -+ Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), -+ Offset, MachinePointerInfo::getGOT(), -+ false, false, false, 0); -+ break; -+ } -+ -+ case TLSModel::LocalExec: { -+ // Force the offset into the constant pool and load it from there. -+ SystemZConstantPoolValue *CPV = -+ SystemZConstantPoolValue::Create(GV, SystemZCP::NTPOFF); -+ -+ Offset = DAG.getConstantPool(CPV, PtrVT, 8); -+ Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), -+ Offset, MachinePointerInfo::getConstantPool(), -+ false, false, false, 0); -+ break; -+ } -+ } - - // Add the base and offset together. - return DAG.getNode(ISD::ADD, DL, PtrVT, TP, Offset); -@@ -1916,6 +2626,13 @@ SDValue SystemZTargetLowering::lowerBITC - EVT InVT = In.getValueType(); - EVT ResVT = Op.getValueType(); - -+ // Convert loads directly. This is normally done by DAGCombiner, -+ // but we need this case for bitcasts that are created during lowering -+ // and which are then lowered themselves. -+ if (auto *LoadN = dyn_cast(In)) -+ return DAG.getLoad(ResVT, DL, LoadN->getChain(), LoadN->getBasePtr(), -+ LoadN->getMemOperand()); -+ - if (InVT == MVT::i32 && ResVT == MVT::f32) { - SDValue In64; - if (Subtarget.hasHighWord()) { -@@ -1929,12 +2646,12 @@ SDValue SystemZTargetLowering::lowerBITC - DAG.getConstant(32, MVT::i64)); - } - SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::f64, In64); -- return DAG.getTargetExtractSubreg(SystemZ::subreg_h32, -+ return DAG.getTargetExtractSubreg(SystemZ::subreg_r32, - DL, MVT::f32, Out64); - } - if (InVT == MVT::f32 && ResVT == MVT::i32) { - SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f64); -- SDValue In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL, -+ SDValue In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_r32, DL, - MVT::f64, SDValue(U64, 0), In); - SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::i64, In64); - if (Subtarget.hasHighWord()) -@@ -2187,6 +2904,80 @@ SDValue SystemZTargetLowering::lowerOR(S - MVT::i64, HighOp, Low32); - } - -+SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op, -+ SelectionDAG &DAG) const { -+ EVT VT = Op.getValueType(); -+ SDLoc DL(Op); -+ Op = Op.getOperand(0); -+ -+ // Handle vector types via VPOPCT. -+ if (VT.isVector()) { -+ Op = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Op); -+ Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::v16i8, Op); -+ switch (VT.getVectorElementType().getSizeInBits()) { -+ case 8: -+ break; -+ case 16: { -+ Op = DAG.getNode(ISD::BITCAST, DL, VT, Op); -+ SDValue Shift = DAG.getConstant(8, MVT::i32); -+ SDValue Tmp = DAG.getNode(SystemZISD::VSHL_BY_SCALAR, DL, VT, Op, Shift); -+ Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp); -+ Op = DAG.getNode(SystemZISD::VSRL_BY_SCALAR, DL, VT, Op, Shift); -+ break; -+ } -+ case 32: { -+ SDValue Tmp = DAG.getNode(SystemZISD::BYTE_MASK, DL, MVT::v16i8, -+ DAG.getConstant(0, MVT::i32)); -+ Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp); -+ break; -+ } -+ case 64: { -+ SDValue Tmp = DAG.getNode(SystemZISD::BYTE_MASK, DL, MVT::v16i8, -+ DAG.getConstant(0, MVT::i32)); -+ Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::v4i32, Op, Tmp); -+ Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp); -+ break; -+ } -+ default: -+ llvm_unreachable("Unexpected type"); -+ } -+ return Op; -+ } -+ -+ // Get the known-zero mask for the operand. -+ APInt KnownZero, KnownOne; -+ DAG.computeKnownBits(Op, KnownZero, KnownOne); -+ unsigned NumSignificantBits = (~KnownZero).getActiveBits(); -+ if (NumSignificantBits == 0) -+ return DAG.getConstant(0, VT); -+ -+ // Skip known-zero high parts of the operand. -+ int64_t OrigBitSize = VT.getSizeInBits(); -+ int64_t BitSize = (int64_t)1 << Log2_32_Ceil(NumSignificantBits); -+ BitSize = std::min(BitSize, OrigBitSize); -+ -+ // The POPCNT instruction counts the number of bits in each byte. -+ Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op); -+ Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::i64, Op); -+ Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op); -+ -+ // Add up per-byte counts in a binary tree. All bits of Op at -+ // position larger than BitSize remain zero throughout. -+ for (int64_t I = BitSize / 2; I >= 8; I = I / 2) { -+ SDValue Tmp = DAG.getNode(ISD::SHL, DL, VT, Op, DAG.getConstant(I, VT)); -+ if (BitSize != OrigBitSize) -+ Tmp = DAG.getNode(ISD::AND, DL, VT, Tmp, -+ DAG.getConstant(((uint64_t)1 << BitSize) - 1, VT)); -+ Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp); -+ } -+ -+ // Extract overall result from high byte. -+ if (BitSize > 8) -+ Op = DAG.getNode(ISD::SRL, DL, VT, Op, DAG.getConstant(BitSize - 8, VT)); -+ -+ return Op; -+} -+ - // Op is an atomic load. Lower it into a normal volatile load. - SDValue SystemZTargetLowering::lowerATOMIC_LOAD(SDValue Op, - SelectionDAG &DAG) const { -@@ -2400,6 +3191,1069 @@ SDValue SystemZTargetLowering::lowerPREF - Node->getMemoryVT(), Node->getMemOperand()); - } - -+// Return an i32 that contains the value of CC immediately after After, -+// whose final operand must be MVT::Glue. -+static SDValue getCCResult(SelectionDAG &DAG, SDNode *After) { -+ SDValue Glue = SDValue(After, After->getNumValues() - 1); -+ SDValue IPM = DAG.getNode(SystemZISD::IPM, SDLoc(After), MVT::i32, Glue); -+ return DAG.getNode(ISD::SRL, SDLoc(After), MVT::i32, IPM, -+ DAG.getConstant(SystemZ::IPM_CC, MVT::i32)); -+} -+ -+SDValue -+SystemZTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op, -+ SelectionDAG &DAG) const { -+ unsigned Opcode, CCValid; -+ if (isIntrinsicWithCCAndChain(Op, Opcode, CCValid)) { -+ assert(Op->getNumValues() == 2 && "Expected only CC result and chain"); -+ SDValue Glued = emitIntrinsicWithChainAndGlue(DAG, Op, Opcode); -+ SDValue CC = getCCResult(DAG, Glued.getNode()); -+ DAG.ReplaceAllUsesOfValueWith(SDValue(Op.getNode(), 0), CC); -+ return SDValue(); -+ } -+ -+ return SDValue(); -+} -+ -+SDValue -+SystemZTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, -+ SelectionDAG &DAG) const { -+ unsigned Opcode, CCValid; -+ if (isIntrinsicWithCC(Op, Opcode, CCValid)) { -+ SDValue Glued = emitIntrinsicWithGlue(DAG, Op, Opcode); -+ SDValue CC = getCCResult(DAG, Glued.getNode()); -+ if (Op->getNumValues() == 1) -+ return CC; -+ assert(Op->getNumValues() == 2 && "Expected a CC and non-CC result"); -+ return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), Op->getVTList(), -+ Glued, CC); -+ } -+ -+ unsigned Id = cast(Op.getOperand(0))->getZExtValue(); -+ switch (Id) { -+ case Intrinsic::s390_vpdi: -+ return DAG.getNode(SystemZISD::PERMUTE_DWORDS, SDLoc(Op), Op.getValueType(), -+ Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); -+ -+ case Intrinsic::s390_vperm: -+ return DAG.getNode(SystemZISD::PERMUTE, SDLoc(Op), Op.getValueType(), -+ Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); -+ -+ case Intrinsic::s390_vuphb: -+ case Intrinsic::s390_vuphh: -+ case Intrinsic::s390_vuphf: -+ return DAG.getNode(SystemZISD::UNPACK_HIGH, SDLoc(Op), Op.getValueType(), -+ Op.getOperand(1)); -+ -+ case Intrinsic::s390_vuplhb: -+ case Intrinsic::s390_vuplhh: -+ case Intrinsic::s390_vuplhf: -+ return DAG.getNode(SystemZISD::UNPACKL_HIGH, SDLoc(Op), Op.getValueType(), -+ Op.getOperand(1)); -+ -+ case Intrinsic::s390_vuplb: -+ case Intrinsic::s390_vuplhw: -+ case Intrinsic::s390_vuplf: -+ return DAG.getNode(SystemZISD::UNPACK_LOW, SDLoc(Op), Op.getValueType(), -+ Op.getOperand(1)); -+ -+ case Intrinsic::s390_vupllb: -+ case Intrinsic::s390_vupllh: -+ case Intrinsic::s390_vupllf: -+ return DAG.getNode(SystemZISD::UNPACKL_LOW, SDLoc(Op), Op.getValueType(), -+ Op.getOperand(1)); -+ -+ case Intrinsic::s390_vsumb: -+ case Intrinsic::s390_vsumh: -+ case Intrinsic::s390_vsumgh: -+ case Intrinsic::s390_vsumgf: -+ case Intrinsic::s390_vsumqf: -+ case Intrinsic::s390_vsumqg: -+ return DAG.getNode(SystemZISD::VSUM, SDLoc(Op), Op.getValueType(), -+ Op.getOperand(1), Op.getOperand(2)); -+ } -+ -+ return SDValue(); -+} -+ -+namespace { -+// Says that SystemZISD operation Opcode can be used to perform the equivalent -+// of a VPERM with permute vector Bytes. If Opcode takes three operands, -+// Operand is the constant third operand, otherwise it is the number of -+// bytes in each element of the result. -+struct Permute { -+ unsigned Opcode; -+ unsigned Operand; -+ unsigned char Bytes[SystemZ::VectorBytes]; -+}; -+} -+ -+static const Permute PermuteForms[] = { -+ // VMRHG -+ { SystemZISD::MERGE_HIGH, 8, -+ { 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23 } }, -+ // VMRHF -+ { SystemZISD::MERGE_HIGH, 4, -+ { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } }, -+ // VMRHH -+ { SystemZISD::MERGE_HIGH, 2, -+ { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } }, -+ // VMRHB -+ { SystemZISD::MERGE_HIGH, 1, -+ { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } }, -+ // VMRLG -+ { SystemZISD::MERGE_LOW, 8, -+ { 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31 } }, -+ // VMRLF -+ { SystemZISD::MERGE_LOW, 4, -+ { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } }, -+ // VMRLH -+ { SystemZISD::MERGE_LOW, 2, -+ { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } }, -+ // VMRLB -+ { SystemZISD::MERGE_LOW, 1, -+ { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } }, -+ // VPKG -+ { SystemZISD::PACK, 4, -+ { 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 } }, -+ // VPKF -+ { SystemZISD::PACK, 2, -+ { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } }, -+ // VPKH -+ { SystemZISD::PACK, 1, -+ { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } }, -+ // VPDI V1, V2, 4 (low half of V1, high half of V2) -+ { SystemZISD::PERMUTE_DWORDS, 4, -+ { 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 } }, -+ // VPDI V1, V2, 1 (high half of V1, low half of V2) -+ { SystemZISD::PERMUTE_DWORDS, 1, -+ { 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 } } -+}; -+ -+// Called after matching a vector shuffle against a particular pattern. -+// Both the original shuffle and the pattern have two vector operands. -+// OpNos[0] is the operand of the original shuffle that should be used for -+// operand 0 of the pattern, or -1 if operand 0 of the pattern can be anything. -+// OpNos[1] is the same for operand 1 of the pattern. Resolve these -1s and -+// set OpNo0 and OpNo1 to the shuffle operands that should actually be used -+// for operands 0 and 1 of the pattern. -+static bool chooseShuffleOpNos(int *OpNos, unsigned &OpNo0, unsigned &OpNo1) { -+ if (OpNos[0] < 0) { -+ if (OpNos[1] < 0) -+ return false; -+ OpNo0 = OpNo1 = OpNos[1]; -+ } else if (OpNos[1] < 0) { -+ OpNo0 = OpNo1 = OpNos[0]; -+ } else { -+ OpNo0 = OpNos[0]; -+ OpNo1 = OpNos[1]; -+ } -+ return true; -+} -+ -+// Bytes is a VPERM-like permute vector, except that -1 is used for -+// undefined bytes. Return true if the VPERM can be implemented using P. -+// When returning true set OpNo0 to the VPERM operand that should be -+// used for operand 0 of P and likewise OpNo1 for operand 1 of P. -+// -+// For example, if swapping the VPERM operands allows P to match, OpNo0 -+// will be 1 and OpNo1 will be 0. If instead Bytes only refers to one -+// operand, but rewriting it to use two duplicated operands allows it to -+// match P, then OpNo0 and OpNo1 will be the same. -+static bool matchPermute(const SmallVectorImpl &Bytes, const Permute &P, -+ unsigned &OpNo0, unsigned &OpNo1) { -+ int OpNos[] = { -1, -1 }; -+ for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) { -+ int Elt = Bytes[I]; -+ if (Elt >= 0) { -+ // Make sure that the two permute vectors use the same suboperand -+ // byte number. Only the operand numbers (the high bits) are -+ // allowed to differ. -+ if ((Elt ^ P.Bytes[I]) & (SystemZ::VectorBytes - 1)) -+ return false; -+ int ModelOpNo = P.Bytes[I] / SystemZ::VectorBytes; -+ int RealOpNo = unsigned(Elt) / SystemZ::VectorBytes; -+ // Make sure that the operand mappings are consistent with previous -+ // elements. -+ if (OpNos[ModelOpNo] == 1 - RealOpNo) -+ return false; -+ OpNos[ModelOpNo] = RealOpNo; -+ } -+ } -+ return chooseShuffleOpNos(OpNos, OpNo0, OpNo1); -+} -+ -+// As above, but search for a matching permute. -+static const Permute *matchPermute(const SmallVectorImpl &Bytes, -+ unsigned &OpNo0, unsigned &OpNo1) { -+ for (auto &P : PermuteForms) -+ if (matchPermute(Bytes, P, OpNo0, OpNo1)) -+ return &P; -+ return nullptr; -+} -+ -+// Bytes is a VPERM-like permute vector, except that -1 is used for -+// undefined bytes. This permute is an operand of an outer permute. -+// See whether redistributing the -1 bytes gives a shuffle that can be -+// implemented using P. If so, set Transform to a VPERM-like permute vector -+// that, when applied to the result of P, gives the original permute in Bytes. -+static bool matchDoublePermute(const SmallVectorImpl &Bytes, -+ const Permute &P, -+ SmallVectorImpl &Transform) { -+ unsigned To = 0; -+ for (unsigned From = 0; From < SystemZ::VectorBytes; ++From) { -+ int Elt = Bytes[From]; -+ if (Elt < 0) -+ // Byte number From of the result is undefined. -+ Transform[From] = -1; -+ else { -+ while (P.Bytes[To] != Elt) { -+ To += 1; -+ if (To == SystemZ::VectorBytes) -+ return false; -+ } -+ Transform[From] = To; -+ } -+ } -+ return true; -+} -+ -+// As above, but search for a matching permute. -+static const Permute *matchDoublePermute(const SmallVectorImpl &Bytes, -+ SmallVectorImpl &Transform) { -+ for (auto &P : PermuteForms) -+ if (matchDoublePermute(Bytes, P, Transform)) -+ return &P; -+ return nullptr; -+} -+ -+// Convert the mask of the given VECTOR_SHUFFLE into a byte-level mask, -+// as if it had type vNi8. -+static void getVPermMask(ShuffleVectorSDNode *VSN, -+ SmallVectorImpl &Bytes) { -+ EVT VT = VSN->getValueType(0); -+ unsigned NumElements = VT.getVectorNumElements(); -+ unsigned BytesPerElement = VT.getVectorElementType().getStoreSize(); -+ Bytes.resize(NumElements * BytesPerElement, -1); -+ for (unsigned I = 0; I < NumElements; ++I) { -+ int Index = VSN->getMaskElt(I); -+ if (Index >= 0) -+ for (unsigned J = 0; J < BytesPerElement; ++J) -+ Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J; -+ } -+} -+ -+// Bytes is a VPERM-like permute vector, except that -1 is used for -+// undefined bytes. See whether bytes [Start, Start + BytesPerElement) of -+// the result come from a contiguous sequence of bytes from one input. -+// Set Base to the selector for the first byte if so. -+static bool getShuffleInput(const SmallVectorImpl &Bytes, unsigned Start, -+ unsigned BytesPerElement, int &Base) { -+ Base = -1; -+ for (unsigned I = 0; I < BytesPerElement; ++I) { -+ if (Bytes[Start + I] >= 0) { -+ unsigned Elem = Bytes[Start + I]; -+ if (Base < 0) { -+ Base = Elem - I; -+ // Make sure the bytes would come from one input operand. -+ if (unsigned(Base) % Bytes.size() + BytesPerElement > Bytes.size()) -+ return false; -+ } else if (unsigned(Base) != Elem - I) -+ return false; -+ } -+ } -+ return true; -+} -+ -+// Bytes is a VPERM-like permute vector, except that -1 is used for -+// undefined bytes. Return true if it can be performed using VSLDI. -+// When returning true, set StartIndex to the shift amount and OpNo0 -+// and OpNo1 to the VPERM operands that should be used as the first -+// and second shift operand respectively. -+static bool isShlDoublePermute(const SmallVectorImpl &Bytes, -+ unsigned &StartIndex, unsigned &OpNo0, -+ unsigned &OpNo1) { -+ int OpNos[] = { -1, -1 }; -+ int Shift = -1; -+ for (unsigned I = 0; I < 16; ++I) { -+ int Index = Bytes[I]; -+ if (Index >= 0) { -+ int ExpectedShift = (Index - I) % SystemZ::VectorBytes; -+ int ModelOpNo = unsigned(ExpectedShift + I) / SystemZ::VectorBytes; -+ int RealOpNo = unsigned(Index) / SystemZ::VectorBytes; -+ if (Shift < 0) -+ Shift = ExpectedShift; -+ else if (Shift != ExpectedShift) -+ return false; -+ // Make sure that the operand mappings are consistent with previous -+ // elements. -+ if (OpNos[ModelOpNo] == 1 - RealOpNo) -+ return false; -+ OpNos[ModelOpNo] = RealOpNo; -+ } -+ } -+ StartIndex = Shift; -+ return chooseShuffleOpNos(OpNos, OpNo0, OpNo1); -+} -+ -+// Create a node that performs P on operands Op0 and Op1, casting the -+// operands to the appropriate type. The type of the result is determined by P. -+static SDValue getPermuteNode(SelectionDAG &DAG, SDLoc DL, -+ const Permute &P, SDValue Op0, SDValue Op1) { -+ // VPDI (PERMUTE_DWORDS) always operates on v2i64s. The input -+ // elements of a PACK are twice as wide as the outputs. -+ unsigned InBytes = (P.Opcode == SystemZISD::PERMUTE_DWORDS ? 8 : -+ P.Opcode == SystemZISD::PACK ? P.Operand * 2 : -+ P.Operand); -+ // Cast both operands to the appropriate type. -+ MVT InVT = MVT::getVectorVT(MVT::getIntegerVT(InBytes * 8), -+ SystemZ::VectorBytes / InBytes); -+ Op0 = DAG.getNode(ISD::BITCAST, DL, InVT, Op0); -+ Op1 = DAG.getNode(ISD::BITCAST, DL, InVT, Op1); -+ SDValue Op; -+ if (P.Opcode == SystemZISD::PERMUTE_DWORDS) { -+ SDValue Op2 = DAG.getConstant(P.Operand, MVT::i32); -+ Op = DAG.getNode(SystemZISD::PERMUTE_DWORDS, DL, InVT, Op0, Op1, Op2); -+ } else if (P.Opcode == SystemZISD::PACK) { -+ MVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(P.Operand * 8), -+ SystemZ::VectorBytes / P.Operand); -+ Op = DAG.getNode(SystemZISD::PACK, DL, OutVT, Op0, Op1); -+ } else { -+ Op = DAG.getNode(P.Opcode, DL, InVT, Op0, Op1); -+ } -+ return Op; -+} -+ -+// Bytes is a VPERM-like permute vector, except that -1 is used for -+// undefined bytes. Implement it on operands Ops[0] and Ops[1] using -+// VSLDI or VPERM. -+static SDValue getGeneralPermuteNode(SelectionDAG &DAG, SDLoc DL, SDValue *Ops, -+ const SmallVectorImpl &Bytes) { -+ for (unsigned I = 0; I < 2; ++I) -+ Ops[I] = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Ops[I]); -+ -+ // First see whether VSLDI can be used. -+ unsigned StartIndex, OpNo0, OpNo1; -+ if (isShlDoublePermute(Bytes, StartIndex, OpNo0, OpNo1)) -+ return DAG.getNode(SystemZISD::SHL_DOUBLE, DL, MVT::v16i8, Ops[OpNo0], -+ Ops[OpNo1], DAG.getConstant(StartIndex, MVT::i32)); -+ -+ // Fall back on VPERM. Construct an SDNode for the permute vector. -+ SDValue IndexNodes[SystemZ::VectorBytes]; -+ for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) -+ if (Bytes[I] >= 0) -+ IndexNodes[I] = DAG.getConstant(Bytes[I], MVT::i32); -+ else -+ IndexNodes[I] = DAG.getUNDEF(MVT::i32); -+ SDValue Op2 = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v16i8, IndexNodes); -+ return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Ops[0], Ops[1], Op2); -+} -+ -+namespace { -+// Describes a general N-operand vector shuffle. -+struct GeneralShuffle { -+ GeneralShuffle(EVT vt) : VT(vt) {} -+ void addUndef(); -+ void add(SDValue, unsigned); -+ SDValue getNode(SelectionDAG &, SDLoc); -+ -+ // The operands of the shuffle. -+ SmallVector Ops; -+ -+ // Index I is -1 if byte I of the result is undefined. Otherwise the -+ // result comes from byte Bytes[I] % SystemZ::VectorBytes of operand -+ // Bytes[I] / SystemZ::VectorBytes. -+ SmallVector Bytes; -+ -+ // The type of the shuffle result. -+ EVT VT; -+}; -+} -+ -+// Add an extra undefined element to the shuffle. -+void GeneralShuffle::addUndef() { -+ unsigned BytesPerElement = VT.getVectorElementType().getStoreSize(); -+ for (unsigned I = 0; I < BytesPerElement; ++I) -+ Bytes.push_back(-1); -+} -+ -+// Add an extra element to the shuffle, taking it from element Elem of Op. -+// A null Op indicates a vector input whose value will be calculated later; -+// there is at most one such input per shuffle and it always has the same -+// type as the result. -+void GeneralShuffle::add(SDValue Op, unsigned Elem) { -+ unsigned BytesPerElement = VT.getVectorElementType().getStoreSize(); -+ -+ // The source vector can have wider elements than the result, -+ // either through an explicit TRUNCATE or because of type legalization. -+ // We want the least significant part. -+ EVT FromVT = Op.getNode() ? Op.getValueType() : VT; -+ unsigned FromBytesPerElement = FromVT.getVectorElementType().getStoreSize(); -+ assert(FromBytesPerElement >= BytesPerElement && -+ "Invalid EXTRACT_VECTOR_ELT"); -+ unsigned Byte = ((Elem * FromBytesPerElement) % SystemZ::VectorBytes + -+ (FromBytesPerElement - BytesPerElement)); -+ -+ // Look through things like shuffles and bitcasts. -+ while (Op.getNode()) { -+ if (Op.getOpcode() == ISD::BITCAST) -+ Op = Op.getOperand(0); -+ else if (Op.getOpcode() == ISD::VECTOR_SHUFFLE && Op.hasOneUse()) { -+ // See whether the bytes we need come from a contiguous part of one -+ // operand. -+ SmallVector OpBytes; -+ getVPermMask(cast(Op), OpBytes); -+ int NewByte; -+ if (!getShuffleInput(OpBytes, Byte, BytesPerElement, NewByte)) -+ break; -+ if (NewByte < 0) { -+ addUndef(); -+ return; -+ } -+ Op = Op.getOperand(unsigned(NewByte) / SystemZ::VectorBytes); -+ Byte = unsigned(NewByte) % SystemZ::VectorBytes; -+ } else if (Op.getOpcode() == ISD::UNDEF) { -+ addUndef(); -+ return; -+ } else -+ break; -+ } -+ -+ // Make sure that the source of the extraction is in Ops. -+ unsigned OpNo = 0; -+ for (; OpNo < Ops.size(); ++OpNo) -+ if (Ops[OpNo] == Op) -+ break; -+ if (OpNo == Ops.size()) -+ Ops.push_back(Op); -+ -+ // Add the element to Bytes. -+ unsigned Base = OpNo * SystemZ::VectorBytes + Byte; -+ for (unsigned I = 0; I < BytesPerElement; ++I) -+ Bytes.push_back(Base + I); -+} -+ -+// Return SDNodes for the completed shuffle. -+SDValue GeneralShuffle::getNode(SelectionDAG &DAG, SDLoc DL) { -+ assert(Bytes.size() == SystemZ::VectorBytes && "Incomplete vector"); -+ -+ if (Ops.size() == 0) -+ return DAG.getUNDEF(VT); -+ -+ // Make sure that there are at least two shuffle operands. -+ if (Ops.size() == 1) -+ Ops.push_back(DAG.getUNDEF(MVT::v16i8)); -+ -+ // Create a tree of shuffles, deferring root node until after the loop. -+ // Try to redistribute the undefined elements of non-root nodes so that -+ // the non-root shuffles match something like a pack or merge, then adjust -+ // the parent node's permute vector to compensate for the new order. -+ // Among other things, this copes with vectors like <2 x i16> that were -+ // padded with undefined elements during type legalization. -+ // -+ // In the best case this redistribution will lead to the whole tree -+ // using packs and merges. It should rarely be a loss in other cases. -+ unsigned Stride = 1; -+ for (; Stride * 2 < Ops.size(); Stride *= 2) { -+ for (unsigned I = 0; I < Ops.size() - Stride; I += Stride * 2) { -+ SDValue SubOps[] = { Ops[I], Ops[I + Stride] }; -+ -+ // Create a mask for just these two operands. -+ SmallVector NewBytes(SystemZ::VectorBytes); -+ for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) { -+ unsigned OpNo = unsigned(Bytes[J]) / SystemZ::VectorBytes; -+ unsigned Byte = unsigned(Bytes[J]) % SystemZ::VectorBytes; -+ if (OpNo == I) -+ NewBytes[J] = Byte; -+ else if (OpNo == I + Stride) -+ NewBytes[J] = SystemZ::VectorBytes + Byte; -+ else -+ NewBytes[J] = -1; -+ } -+ // See if it would be better to reorganize NewMask to avoid using VPERM. -+ SmallVector NewBytesMap(SystemZ::VectorBytes); -+ if (const Permute *P = matchDoublePermute(NewBytes, NewBytesMap)) { -+ Ops[I] = getPermuteNode(DAG, DL, *P, SubOps[0], SubOps[1]); -+ // Applying NewBytesMap to Ops[I] gets back to NewBytes. -+ for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) { -+ if (NewBytes[J] >= 0) { -+ assert(unsigned(NewBytesMap[J]) < SystemZ::VectorBytes && -+ "Invalid double permute"); -+ Bytes[J] = I * SystemZ::VectorBytes + NewBytesMap[J]; -+ } else -+ assert(NewBytesMap[J] < 0 && "Invalid double permute"); -+ } -+ } else { -+ // Just use NewBytes on the operands. -+ Ops[I] = getGeneralPermuteNode(DAG, DL, SubOps, NewBytes); -+ for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) -+ if (NewBytes[J] >= 0) -+ Bytes[J] = I * SystemZ::VectorBytes + J; -+ } -+ } -+ } -+ -+ // Now we just have 2 inputs. Put the second operand in Ops[1]. -+ if (Stride > 1) { -+ Ops[1] = Ops[Stride]; -+ for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) -+ if (Bytes[I] >= int(SystemZ::VectorBytes)) -+ Bytes[I] -= (Stride - 1) * SystemZ::VectorBytes; -+ } -+ -+ // Look for an instruction that can do the permute without resorting -+ // to VPERM. -+ unsigned OpNo0, OpNo1; -+ SDValue Op; -+ if (const Permute *P = matchPermute(Bytes, OpNo0, OpNo1)) -+ Op = getPermuteNode(DAG, DL, *P, Ops[OpNo0], Ops[OpNo1]); -+ else -+ Op = getGeneralPermuteNode(DAG, DL, &Ops[0], Bytes); -+ return DAG.getNode(ISD::BITCAST, DL, VT, Op); -+} -+ -+// Return true if the given BUILD_VECTOR is a scalar-to-vector conversion. -+static bool isScalarToVector(SDValue Op) { -+ for (unsigned I = 1, E = Op.getNumOperands(); I != E; ++I) -+ if (Op.getOperand(I).getOpcode() != ISD::UNDEF) -+ return false; -+ return true; -+} -+ -+// Return a vector of type VT that contains Value in the first element. -+// The other elements don't matter. -+static SDValue buildScalarToVector(SelectionDAG &DAG, SDLoc DL, EVT VT, -+ SDValue Value) { -+ // If we have a constant, replicate it to all elements and let the -+ // BUILD_VECTOR lowering take care of it. -+ if (Value.getOpcode() == ISD::Constant || -+ Value.getOpcode() == ISD::ConstantFP) { -+ SmallVector Ops(VT.getVectorNumElements(), Value); -+ return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Ops); -+ } -+ if (Value.getOpcode() == ISD::UNDEF) -+ return DAG.getUNDEF(VT); -+ return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Value); -+} -+ -+// Return a vector of type VT in which Op0 is in element 0 and Op1 is in -+// element 1. Used for cases in which replication is cheap. -+static SDValue buildMergeScalars(SelectionDAG &DAG, SDLoc DL, EVT VT, -+ SDValue Op0, SDValue Op1) { -+ if (Op0.getOpcode() == ISD::UNDEF) { -+ if (Op1.getOpcode() == ISD::UNDEF) -+ return DAG.getUNDEF(VT); -+ return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op1); -+ } -+ if (Op1.getOpcode() == ISD::UNDEF) -+ return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0); -+ return DAG.getNode(SystemZISD::MERGE_HIGH, DL, VT, -+ buildScalarToVector(DAG, DL, VT, Op0), -+ buildScalarToVector(DAG, DL, VT, Op1)); -+} -+ -+// Extend GPR scalars Op0 and Op1 to doublewords and return a v2i64 -+// vector for them. -+static SDValue joinDwords(SelectionDAG &DAG, SDLoc DL, SDValue Op0, -+ SDValue Op1) { -+ if (Op0.getOpcode() == ISD::UNDEF && Op1.getOpcode() == ISD::UNDEF) -+ return DAG.getUNDEF(MVT::v2i64); -+ // If one of the two inputs is undefined then replicate the other one, -+ // in order to avoid using another register unnecessarily. -+ if (Op0.getOpcode() == ISD::UNDEF) -+ Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1); -+ else if (Op1.getOpcode() == ISD::UNDEF) -+ Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0); -+ else { -+ Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0); -+ Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1); -+ } -+ return DAG.getNode(SystemZISD::JOIN_DWORDS, DL, MVT::v2i64, Op0, Op1); -+} -+ -+// Try to represent constant BUILD_VECTOR node BVN using a -+// SystemZISD::BYTE_MASK-style mask. Store the mask value in Mask -+// on success. -+static bool tryBuildVectorByteMask(BuildVectorSDNode *BVN, uint64_t &Mask) { -+ EVT ElemVT = BVN->getValueType(0).getVectorElementType(); -+ unsigned BytesPerElement = ElemVT.getStoreSize(); -+ for (unsigned I = 0, E = BVN->getNumOperands(); I != E; ++I) { -+ SDValue Op = BVN->getOperand(I); -+ if (Op.getOpcode() != ISD::UNDEF) { -+ uint64_t Value; -+ if (Op.getOpcode() == ISD::Constant) -+ Value = dyn_cast(Op)->getZExtValue(); -+ else if (Op.getOpcode() == ISD::ConstantFP) -+ Value = (dyn_cast(Op)->getValueAPF().bitcastToAPInt() -+ .getZExtValue()); -+ else -+ return false; -+ for (unsigned J = 0; J < BytesPerElement; ++J) { -+ uint64_t Byte = (Value >> (J * 8)) & 0xff; -+ if (Byte == 0xff) -+ Mask |= 1 << ((E - I - 1) * BytesPerElement + J); -+ else if (Byte != 0) -+ return false; -+ } -+ } -+ } -+ return true; -+} -+ -+// Try to load a vector constant in which BitsPerElement-bit value Value -+// is replicated to fill the vector. VT is the type of the resulting -+// constant, which may have elements of a different size from BitsPerElement. -+// Return the SDValue of the constant on success, otherwise return -+// an empty value. -+static SDValue tryBuildVectorReplicate(SelectionDAG &DAG, -+ const SystemZInstrInfo *TII, -+ SDLoc DL, EVT VT, uint64_t Value, -+ unsigned BitsPerElement) { -+ // Signed 16-bit values can be replicated using VREPI. -+ int64_t SignedValue = SignExtend64(Value, BitsPerElement); -+ if (isInt<16>(SignedValue)) { -+ MVT VecVT = MVT::getVectorVT(MVT::getIntegerVT(BitsPerElement), -+ SystemZ::VectorBits / BitsPerElement); -+ SDValue Op = DAG.getNode(SystemZISD::REPLICATE, DL, VecVT, -+ DAG.getConstant(SignedValue, MVT::i32)); -+ return DAG.getNode(ISD::BITCAST, DL, VT, Op); -+ } -+ // See whether rotating the constant left some N places gives a value that -+ // is one less than a power of 2 (i.e. all zeros followed by all ones). -+ // If so we can use VGM. -+ unsigned Start, End; -+ if (TII->isRxSBGMask(Value, BitsPerElement, Start, End)) { -+ // isRxSBGMask returns the bit numbers for a full 64-bit value, -+ // with 0 denoting 1 << 63 and 63 denoting 1. Convert them to -+ // bit numbers for an BitsPerElement value, so that 0 denotes -+ // 1 << (BitsPerElement-1). -+ Start -= 64 - BitsPerElement; -+ End -= 64 - BitsPerElement; -+ MVT VecVT = MVT::getVectorVT(MVT::getIntegerVT(BitsPerElement), -+ SystemZ::VectorBits / BitsPerElement); -+ SDValue Op = DAG.getNode(SystemZISD::ROTATE_MASK, DL, VecVT, -+ DAG.getConstant(Start, MVT::i32), -+ DAG.getConstant(End, MVT::i32)); -+ return DAG.getNode(ISD::BITCAST, DL, VT, Op); -+ } -+ return SDValue(); -+} -+ -+// If a BUILD_VECTOR contains some EXTRACT_VECTOR_ELTs, it's usually -+// better to use VECTOR_SHUFFLEs on them, only using BUILD_VECTOR for -+// the non-EXTRACT_VECTOR_ELT elements. See if the given BUILD_VECTOR -+// would benefit from this representation and return it if so. -+static SDValue tryBuildVectorShuffle(SelectionDAG &DAG, -+ BuildVectorSDNode *BVN) { -+ EVT VT = BVN->getValueType(0); -+ unsigned NumElements = VT.getVectorNumElements(); -+ -+ // Represent the BUILD_VECTOR as an N-operand VECTOR_SHUFFLE-like operation -+ // on byte vectors. If there are non-EXTRACT_VECTOR_ELT elements that still -+ // need a BUILD_VECTOR, add an additional placeholder operand for that -+ // BUILD_VECTOR and store its operands in ResidueOps. -+ GeneralShuffle GS(VT); -+ SmallVector ResidueOps; -+ bool FoundOne = false; -+ for (unsigned I = 0; I < NumElements; ++I) { -+ SDValue Op = BVN->getOperand(I); -+ if (Op.getOpcode() == ISD::TRUNCATE) -+ Op = Op.getOperand(0); -+ if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT && -+ Op.getOperand(1).getOpcode() == ISD::Constant) { -+ unsigned Elem = cast(Op.getOperand(1))->getZExtValue(); -+ GS.add(Op.getOperand(0), Elem); -+ FoundOne = true; -+ } else if (Op.getOpcode() == ISD::UNDEF) { -+ GS.addUndef(); -+ } else { -+ GS.add(SDValue(), ResidueOps.size()); -+ ResidueOps.push_back(Op); -+ } -+ } -+ -+ // Nothing to do if there are no EXTRACT_VECTOR_ELTs. -+ if (!FoundOne) -+ return SDValue(); -+ -+ // Create the BUILD_VECTOR for the remaining elements, if any. -+ if (!ResidueOps.empty()) { -+ while (ResidueOps.size() < NumElements) -+ ResidueOps.push_back(DAG.getUNDEF(VT.getVectorElementType())); -+ for (auto &Op : GS.Ops) { -+ if (!Op.getNode()) { -+ Op = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BVN), VT, ResidueOps); -+ break; -+ } -+ } -+ } -+ return GS.getNode(DAG, SDLoc(BVN)); -+} -+ -+// Combine GPR scalar values Elems into a vector of type VT. -+static SDValue buildVector(SelectionDAG &DAG, SDLoc DL, EVT VT, -+ SmallVectorImpl &Elems) { -+ // See whether there is a single replicated value. -+ SDValue Single; -+ unsigned int NumElements = Elems.size(); -+ unsigned int Count = 0; -+ for (auto Elem : Elems) { -+ if (Elem.getOpcode() != ISD::UNDEF) { -+ if (!Single.getNode()) -+ Single = Elem; -+ else if (Elem != Single) { -+ Single = SDValue(); -+ break; -+ } -+ Count += 1; -+ } -+ } -+ // There are three cases here: -+ // -+ // - if the only defined element is a loaded one, the best sequence -+ // is a replicating load. -+ // -+ // - otherwise, if the only defined element is an i64 value, we will -+ // end up with the same VLVGP sequence regardless of whether we short-cut -+ // for replication or fall through to the later code. -+ // -+ // - otherwise, if the only defined element is an i32 or smaller value, -+ // we would need 2 instructions to replicate it: VLVGP followed by VREPx. -+ // This is only a win if the single defined element is used more than once. -+ // In other cases we're better off using a single VLVGx. -+ if (Single.getNode() && (Count > 1 || Single.getOpcode() == ISD::LOAD)) -+ return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Single); -+ -+ // The best way of building a v2i64 from two i64s is to use VLVGP. -+ if (VT == MVT::v2i64) -+ return joinDwords(DAG, DL, Elems[0], Elems[1]); -+ -+ // Use a 64-bit merge high to combine two doubles. -+ if (VT == MVT::v2f64) -+ return buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]); -+ -+ // Build v4f32 values directly from the FPRs: -+ // -+ // -+ // V V VMRHF -+ // -+ // V VMRHG -+ // -+ if (VT == MVT::v4f32) { -+ SDValue Op01 = buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]); -+ SDValue Op23 = buildMergeScalars(DAG, DL, VT, Elems[2], Elems[3]); -+ // Avoid unnecessary undefs by reusing the other operand. -+ if (Op01.getOpcode() == ISD::UNDEF) -+ Op01 = Op23; -+ else if (Op23.getOpcode() == ISD::UNDEF) -+ Op23 = Op01; -+ // Merging identical replications is a no-op. -+ if (Op01.getOpcode() == SystemZISD::REPLICATE && Op01 == Op23) -+ return Op01; -+ Op01 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op01); -+ Op23 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op23); -+ SDValue Op = DAG.getNode(SystemZISD::MERGE_HIGH, -+ DL, MVT::v2i64, Op01, Op23); -+ return DAG.getNode(ISD::BITCAST, DL, VT, Op); -+ } -+ -+ // Collect the constant terms. -+ SmallVector Constants(NumElements, SDValue()); -+ SmallVector Done(NumElements, false); -+ -+ unsigned NumConstants = 0; -+ for (unsigned I = 0; I < NumElements; ++I) { -+ SDValue Elem = Elems[I]; -+ if (Elem.getOpcode() == ISD::Constant || -+ Elem.getOpcode() == ISD::ConstantFP) { -+ NumConstants += 1; -+ Constants[I] = Elem; -+ Done[I] = true; -+ } -+ } -+ // If there was at least one constant, fill in the other elements of -+ // Constants with undefs to get a full vector constant and use that -+ // as the starting point. -+ SDValue Result; -+ if (NumConstants > 0) { -+ for (unsigned I = 0; I < NumElements; ++I) -+ if (!Constants[I].getNode()) -+ Constants[I] = DAG.getUNDEF(Elems[I].getValueType()); -+ Result = DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Constants); -+ } else { -+ // Otherwise try to use VLVGP to start the sequence in order to -+ // avoid a false dependency on any previous contents of the vector -+ // register. This only makes sense if one of the associated elements -+ // is defined. -+ unsigned I1 = NumElements / 2 - 1; -+ unsigned I2 = NumElements - 1; -+ bool Def1 = (Elems[I1].getOpcode() != ISD::UNDEF); -+ bool Def2 = (Elems[I2].getOpcode() != ISD::UNDEF); -+ if (Def1 || Def2) { -+ SDValue Elem1 = Elems[Def1 ? I1 : I2]; -+ SDValue Elem2 = Elems[Def2 ? I2 : I1]; -+ Result = DAG.getNode(ISD::BITCAST, DL, VT, -+ joinDwords(DAG, DL, Elem1, Elem2)); -+ Done[I1] = true; -+ Done[I2] = true; -+ } else -+ Result = DAG.getUNDEF(VT); -+ } -+ -+ // Use VLVGx to insert the other elements. -+ for (unsigned I = 0; I < NumElements; ++I) -+ if (!Done[I] && Elems[I].getOpcode() != ISD::UNDEF) -+ Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Result, Elems[I], -+ DAG.getConstant(I, MVT::i32)); -+ return Result; -+} -+ -+SDValue SystemZTargetLowering::lowerBUILD_VECTOR(SDValue Op, -+ SelectionDAG &DAG) const { -+ const SystemZInstrInfo *TII = -+ static_cast(Subtarget.getInstrInfo()); -+ auto *BVN = cast(Op.getNode()); -+ SDLoc DL(Op); -+ EVT VT = Op.getValueType(); -+ -+ if (BVN->isConstant()) { -+ // Try using VECTOR GENERATE BYTE MASK. This is the architecturally- -+ // preferred way of creating all-zero and all-one vectors so give it -+ // priority over other methods below. -+ uint64_t Mask = 0; -+ if (tryBuildVectorByteMask(BVN, Mask)) { -+ SDValue Op = DAG.getNode(SystemZISD::BYTE_MASK, DL, MVT::v16i8, -+ DAG.getConstant(Mask, MVT::i32)); -+ return DAG.getNode(ISD::BITCAST, DL, VT, Op); -+ } -+ -+ // Try using some form of replication. -+ APInt SplatBits, SplatUndef; -+ unsigned SplatBitSize; -+ bool HasAnyUndefs; -+ if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs, -+ 8, true) && -+ SplatBitSize <= 64) { -+ // First try assuming that any undefined bits above the highest set bit -+ // and below the lowest set bit are 1s. This increases the likelihood of -+ // being able to use a sign-extended element value in VECTOR REPLICATE -+ // IMMEDIATE or a wraparound mask in VECTOR GENERATE MASK. -+ uint64_t SplatBitsZ = SplatBits.getZExtValue(); -+ uint64_t SplatUndefZ = SplatUndef.getZExtValue(); -+ uint64_t Lower = (SplatUndefZ -+ & ((uint64_t(1) << findFirstSet(SplatBitsZ)) - 1)); -+ uint64_t Upper = (SplatUndefZ -+ & ~((uint64_t(1) << findLastSet(SplatBitsZ)) - 1)); -+ uint64_t Value = SplatBitsZ | Upper | Lower; -+ SDValue Op = tryBuildVectorReplicate(DAG, TII, DL, VT, Value, -+ SplatBitSize); -+ if (Op.getNode()) -+ return Op; -+ -+ // Now try assuming that any undefined bits between the first and -+ // last defined set bits are set. This increases the chances of -+ // using a non-wraparound mask. -+ uint64_t Middle = SplatUndefZ & ~Upper & ~Lower; -+ Value = SplatBitsZ | Middle; -+ Op = tryBuildVectorReplicate(DAG, TII, DL, VT, Value, SplatBitSize); -+ if (Op.getNode()) -+ return Op; -+ } -+ -+ // Fall back to loading it from memory. -+ return SDValue(); -+ } -+ -+ // See if we should use shuffles to construct the vector from other vectors. -+ SDValue Res = tryBuildVectorShuffle(DAG, BVN); -+ if (Res.getNode()) -+ return Res; -+ -+ // Detect SCALAR_TO_VECTOR conversions. -+ if (isOperationLegal(ISD::SCALAR_TO_VECTOR, VT) && isScalarToVector(Op)) -+ return buildScalarToVector(DAG, DL, VT, Op.getOperand(0)); -+ -+ // Otherwise use buildVector to build the vector up from GPRs. -+ unsigned NumElements = Op.getNumOperands(); -+ SmallVector Ops(NumElements); -+ for (unsigned I = 0; I < NumElements; ++I) -+ Ops[I] = Op.getOperand(I); -+ return buildVector(DAG, DL, VT, Ops); -+} -+ -+SDValue SystemZTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op, -+ SelectionDAG &DAG) const { -+ auto *VSN = cast(Op.getNode()); -+ SDLoc DL(Op); -+ EVT VT = Op.getValueType(); -+ unsigned NumElements = VT.getVectorNumElements(); -+ -+ if (VSN->isSplat()) { -+ SDValue Op0 = Op.getOperand(0); -+ unsigned Index = VSN->getSplatIndex(); -+ assert(Index < VT.getVectorNumElements() && -+ "Splat index should be defined and in first operand"); -+ // See whether the value we're splatting is directly available as a scalar. -+ if ((Index == 0 && Op0.getOpcode() == ISD::SCALAR_TO_VECTOR) || -+ Op0.getOpcode() == ISD::BUILD_VECTOR) -+ return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0.getOperand(Index)); -+ // Otherwise keep it as a vector-to-vector operation. -+ return DAG.getNode(SystemZISD::SPLAT, DL, VT, Op.getOperand(0), -+ DAG.getConstant(Index, MVT::i32)); -+ } -+ -+ GeneralShuffle GS(VT); -+ for (unsigned I = 0; I < NumElements; ++I) { -+ int Elt = VSN->getMaskElt(I); -+ if (Elt < 0) -+ GS.addUndef(); -+ else -+ GS.add(Op.getOperand(unsigned(Elt) / NumElements), -+ unsigned(Elt) % NumElements); -+ } -+ return GS.getNode(DAG, SDLoc(VSN)); -+} -+ -+SDValue SystemZTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op, -+ SelectionDAG &DAG) const { -+ SDLoc DL(Op); -+ // Just insert the scalar into element 0 of an undefined vector. -+ return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, -+ Op.getValueType(), DAG.getUNDEF(Op.getValueType()), -+ Op.getOperand(0), DAG.getConstant(0, MVT::i32)); -+} -+ -+SDValue SystemZTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, -+ SelectionDAG &DAG) const { -+ // Handle insertions of floating-point values. -+ SDLoc DL(Op); -+ SDValue Op0 = Op.getOperand(0); -+ SDValue Op1 = Op.getOperand(1); -+ SDValue Op2 = Op.getOperand(2); -+ EVT VT = Op.getValueType(); -+ -+ // Insertions into constant indices of a v2f64 can be done using VPDI. -+ // However, if the inserted value is a bitcast or a constant then it's -+ // better to use GPRs, as below. -+ if (VT == MVT::v2f64 && -+ Op1.getOpcode() != ISD::BITCAST && -+ Op1.getOpcode() != ISD::ConstantFP && -+ Op2.getOpcode() == ISD::Constant) { -+ uint64_t Index = dyn_cast(Op2)->getZExtValue(); -+ unsigned Mask = VT.getVectorNumElements() - 1; -+ if (Index <= Mask) -+ return Op; -+ } -+ -+ // Otherwise bitcast to the equivalent integer form and insert via a GPR. -+ MVT IntVT = MVT::getIntegerVT(VT.getVectorElementType().getSizeInBits()); -+ MVT IntVecVT = MVT::getVectorVT(IntVT, VT.getVectorNumElements()); -+ SDValue Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntVecVT, -+ DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0), -+ DAG.getNode(ISD::BITCAST, DL, IntVT, Op1), Op2); -+ return DAG.getNode(ISD::BITCAST, DL, VT, Res); -+} -+ -+SDValue -+SystemZTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op, -+ SelectionDAG &DAG) const { -+ // Handle extractions of floating-point values. -+ SDLoc DL(Op); -+ SDValue Op0 = Op.getOperand(0); -+ SDValue Op1 = Op.getOperand(1); -+ EVT VT = Op.getValueType(); -+ EVT VecVT = Op0.getValueType(); -+ -+ // Extractions of constant indices can be done directly. -+ if (auto *CIndexN = dyn_cast(Op1)) { -+ uint64_t Index = CIndexN->getZExtValue(); -+ unsigned Mask = VecVT.getVectorNumElements() - 1; -+ if (Index <= Mask) -+ return Op; -+ } -+ -+ // Otherwise bitcast to the equivalent integer form and extract via a GPR. -+ MVT IntVT = MVT::getIntegerVT(VT.getSizeInBits()); -+ MVT IntVecVT = MVT::getVectorVT(IntVT, VecVT.getVectorNumElements()); -+ SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, IntVT, -+ DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0), Op1); -+ return DAG.getNode(ISD::BITCAST, DL, VT, Res); -+} -+ -+SDValue -+SystemZTargetLowering::lowerExtendVectorInreg(SDValue Op, SelectionDAG &DAG, -+ unsigned UnpackHigh) const { -+ SDValue PackedOp = Op.getOperand(0); -+ EVT OutVT = Op.getValueType(); -+ EVT InVT = PackedOp.getValueType(); -+ unsigned ToBits = OutVT.getVectorElementType().getSizeInBits(); -+ unsigned FromBits = InVT.getVectorElementType().getSizeInBits(); -+ do { -+ FromBits *= 2; -+ EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(FromBits), -+ SystemZ::VectorBits / FromBits); -+ PackedOp = DAG.getNode(UnpackHigh, SDLoc(PackedOp), OutVT, PackedOp); -+ } while (FromBits != ToBits); -+ return PackedOp; -+} -+ -+SDValue SystemZTargetLowering::lowerShift(SDValue Op, SelectionDAG &DAG, -+ unsigned ByScalar) const { -+ // Look for cases where a vector shift can use the *_BY_SCALAR form. -+ SDValue Op0 = Op.getOperand(0); -+ SDValue Op1 = Op.getOperand(1); -+ SDLoc DL(Op); -+ EVT VT = Op.getValueType(); -+ unsigned ElemBitSize = VT.getVectorElementType().getSizeInBits(); -+ -+ // See whether the shift vector is a splat represented as BUILD_VECTOR. -+ if (auto *BVN = dyn_cast(Op1)) { -+ APInt SplatBits, SplatUndef; -+ unsigned SplatBitSize; -+ bool HasAnyUndefs; -+ // Check for constant splats. Use ElemBitSize as the minimum element -+ // width and reject splats that need wider elements. -+ if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs, -+ ElemBitSize, true) && -+ SplatBitSize == ElemBitSize) { -+ SDValue Shift = DAG.getConstant(SplatBits.getZExtValue() & 0xfff, -+ MVT::i32); -+ return DAG.getNode(ByScalar, DL, VT, Op0, Shift); -+ } -+ // Check for variable splats. -+ BitVector UndefElements; -+ SDValue Splat = BVN->getSplatValue(&UndefElements); -+ if (Splat) { -+ // Since i32 is the smallest legal type, we either need a no-op -+ // or a truncation. -+ SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Splat); -+ return DAG.getNode(ByScalar, DL, VT, Op0, Shift); -+ } -+ } -+ -+ // See whether the shift vector is a splat represented as SHUFFLE_VECTOR, -+ // and the shift amount is directly available in a GPR. -+ if (auto *VSN = dyn_cast(Op1)) { -+ if (VSN->isSplat()) { -+ SDValue VSNOp0 = VSN->getOperand(0); -+ unsigned Index = VSN->getSplatIndex(); -+ assert(Index < VT.getVectorNumElements() && -+ "Splat index should be defined and in first operand"); -+ if ((Index == 0 && VSNOp0.getOpcode() == ISD::SCALAR_TO_VECTOR) || -+ VSNOp0.getOpcode() == ISD::BUILD_VECTOR) { -+ // Since i32 is the smallest legal type, we either need a no-op -+ // or a truncation. -+ SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, -+ VSNOp0.getOperand(Index)); -+ return DAG.getNode(ByScalar, DL, VT, Op0, Shift); -+ } -+ } -+ } -+ -+ // Otherwise just treat the current form as legal. -+ return Op; -+} -+ - SDValue SystemZTargetLowering::LowerOperation(SDValue Op, - SelectionDAG &DAG) const { - switch (Op.getOpcode()) { -@@ -2437,6 +4291,14 @@ SDValue SystemZTargetLowering::LowerOper - return lowerUDIVREM(Op, DAG); - case ISD::OR: - return lowerOR(Op, DAG); -+ case ISD::CTPOP: -+ return lowerCTPOP(Op, DAG); -+ case ISD::CTLZ_ZERO_UNDEF: -+ return DAG.getNode(ISD::CTLZ, SDLoc(Op), -+ Op.getValueType(), Op.getOperand(0)); -+ case ISD::CTTZ_ZERO_UNDEF: -+ return DAG.getNode(ISD::CTTZ, SDLoc(Op), -+ Op.getValueType(), Op.getOperand(0)); - case ISD::ATOMIC_SWAP: - return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_SWAPW); - case ISD::ATOMIC_STORE: -@@ -2471,6 +4333,30 @@ SDValue SystemZTargetLowering::LowerOper - return lowerSTACKRESTORE(Op, DAG); - case ISD::PREFETCH: - return lowerPREFETCH(Op, DAG); -+ case ISD::INTRINSIC_W_CHAIN: -+ return lowerINTRINSIC_W_CHAIN(Op, DAG); -+ case ISD::INTRINSIC_WO_CHAIN: -+ return lowerINTRINSIC_WO_CHAIN(Op, DAG); -+ case ISD::BUILD_VECTOR: -+ return lowerBUILD_VECTOR(Op, DAG); -+ case ISD::VECTOR_SHUFFLE: -+ return lowerVECTOR_SHUFFLE(Op, DAG); -+ case ISD::SCALAR_TO_VECTOR: -+ return lowerSCALAR_TO_VECTOR(Op, DAG); -+ case ISD::INSERT_VECTOR_ELT: -+ return lowerINSERT_VECTOR_ELT(Op, DAG); -+ case ISD::EXTRACT_VECTOR_ELT: -+ return lowerEXTRACT_VECTOR_ELT(Op, DAG); -+ case ISD::SIGN_EXTEND_VECTOR_INREG: -+ return lowerExtendVectorInreg(Op, DAG, SystemZISD::UNPACK_HIGH); -+ case ISD::ZERO_EXTEND_VECTOR_INREG: -+ return lowerExtendVectorInreg(Op, DAG, SystemZISD::UNPACKL_HIGH); -+ case ISD::SHL: -+ return lowerShift(Op, DAG, SystemZISD::VSHL_BY_SCALAR); -+ case ISD::SRL: -+ return lowerShift(Op, DAG, SystemZISD::VSRL_BY_SCALAR); -+ case ISD::SRA: -+ return lowerShift(Op, DAG, SystemZISD::VSRA_BY_SCALAR); - default: - llvm_unreachable("Unexpected node to lower"); - } -@@ -2482,6 +4368,8 @@ const char *SystemZTargetLowering::getTa - OPCODE(RET_FLAG); - OPCODE(CALL); - OPCODE(SIBCALL); -+ OPCODE(TLS_GDCALL); -+ OPCODE(TLS_LDCALL); - OPCODE(PCREL_WRAPPER); - OPCODE(PCREL_OFFSET); - OPCODE(IABS); -@@ -2492,7 +4380,9 @@ const char *SystemZTargetLowering::getTa - OPCODE(SELECT_CCMASK); - OPCODE(ADJDYNALLOC); - OPCODE(EXTRACT_ACCESS); -+ OPCODE(POPCNT); - OPCODE(UMUL_LOHI64); -+ OPCODE(SDIVREM32); - OPCODE(SDIVREM64); - OPCODE(UDIVREM32); - OPCODE(UDIVREM64); -@@ -2506,11 +4396,60 @@ const char *SystemZTargetLowering::getTa - OPCODE(XC_LOOP); - OPCODE(CLC); - OPCODE(CLC_LOOP); -- OPCODE(STRCMP); - OPCODE(STPCPY); -+ OPCODE(STRCMP); - OPCODE(SEARCH_STRING); - OPCODE(IPM); - OPCODE(SERIALIZE); -+ OPCODE(TBEGIN); -+ OPCODE(TBEGIN_NOFLOAT); -+ OPCODE(TEND); -+ OPCODE(BYTE_MASK); -+ OPCODE(ROTATE_MASK); -+ OPCODE(REPLICATE); -+ OPCODE(JOIN_DWORDS); -+ OPCODE(SPLAT); -+ OPCODE(MERGE_HIGH); -+ OPCODE(MERGE_LOW); -+ OPCODE(SHL_DOUBLE); -+ OPCODE(PERMUTE_DWORDS); -+ OPCODE(PERMUTE); -+ OPCODE(PACK); -+ OPCODE(PACKS_CC); -+ OPCODE(PACKLS_CC); -+ OPCODE(UNPACK_HIGH); -+ OPCODE(UNPACKL_HIGH); -+ OPCODE(UNPACK_LOW); -+ OPCODE(UNPACKL_LOW); -+ OPCODE(VSHL_BY_SCALAR); -+ OPCODE(VSRL_BY_SCALAR); -+ OPCODE(VSRA_BY_SCALAR); -+ OPCODE(VSUM); -+ OPCODE(VICMPE); -+ OPCODE(VICMPH); -+ OPCODE(VICMPHL); -+ OPCODE(VICMPES); -+ OPCODE(VICMPHS); -+ OPCODE(VICMPHLS); -+ OPCODE(VFCMPE); -+ OPCODE(VFCMPH); -+ OPCODE(VFCMPHE); -+ OPCODE(VFCMPES); -+ OPCODE(VFCMPHS); -+ OPCODE(VFCMPHES); -+ OPCODE(VFTCI); -+ OPCODE(VEXTEND); -+ OPCODE(VROUND); -+ OPCODE(VTM); -+ OPCODE(VFAE_CC); -+ OPCODE(VFAEZ_CC); -+ OPCODE(VFEE_CC); -+ OPCODE(VFEEZ_CC); -+ OPCODE(VFENE_CC); -+ OPCODE(VFENEZ_CC); -+ OPCODE(VISTR_CC); -+ OPCODE(VSTRC_CC); -+ OPCODE(VSTRCZ_CC); - OPCODE(ATOMIC_SWAPW); - OPCODE(ATOMIC_LOADW_ADD); - OPCODE(ATOMIC_LOADW_SUB); -@@ -2529,6 +4468,157 @@ const char *SystemZTargetLowering::getTa - #undef OPCODE - } - -+// Return true if VT is a vector whose elements are a whole number of bytes -+// in width. -+static bool canTreatAsByteVector(EVT VT) { -+ return VT.isVector() && VT.getVectorElementType().getSizeInBits() % 8 == 0; -+} -+ -+// Try to simplify an EXTRACT_VECTOR_ELT from a vector of type VecVT -+// producing a result of type ResVT. Op is a possibly bitcast version -+// of the input vector and Index is the index (based on type VecVT) that -+// should be extracted. Return the new extraction if a simplification -+// was possible or if Force is true. -+SDValue SystemZTargetLowering::combineExtract(SDLoc DL, EVT ResVT, EVT VecVT, -+ SDValue Op, unsigned Index, -+ DAGCombinerInfo &DCI, -+ bool Force) const { -+ SelectionDAG &DAG = DCI.DAG; -+ -+ // The number of bytes being extracted. -+ unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize(); -+ -+ for (;;) { -+ unsigned Opcode = Op.getOpcode(); -+ if (Opcode == ISD::BITCAST) -+ // Look through bitcasts. -+ Op = Op.getOperand(0); -+ else if (Opcode == ISD::VECTOR_SHUFFLE && -+ canTreatAsByteVector(Op.getValueType())) { -+ // Get a VPERM-like permute mask and see whether the bytes covered -+ // by the extracted element are a contiguous sequence from one -+ // source operand. -+ SmallVector Bytes; -+ getVPermMask(cast(Op), Bytes); -+ int First; -+ if (!getShuffleInput(Bytes, Index * BytesPerElement, -+ BytesPerElement, First)) -+ break; -+ if (First < 0) -+ return DAG.getUNDEF(ResVT); -+ // Make sure the contiguous sequence starts at a multiple of the -+ // original element size. -+ unsigned Byte = unsigned(First) % Bytes.size(); -+ if (Byte % BytesPerElement != 0) -+ break; -+ // We can get the extracted value directly from an input. -+ Index = Byte / BytesPerElement; -+ Op = Op.getOperand(unsigned(First) / Bytes.size()); -+ Force = true; -+ } else if (Opcode == ISD::BUILD_VECTOR && -+ canTreatAsByteVector(Op.getValueType())) { -+ // We can only optimize this case if the BUILD_VECTOR elements are -+ // at least as wide as the extracted value. -+ EVT OpVT = Op.getValueType(); -+ unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize(); -+ if (OpBytesPerElement < BytesPerElement) -+ break; -+ // Make sure that the least-significant bit of the extracted value -+ // is the least significant bit of an input. -+ unsigned End = (Index + 1) * BytesPerElement; -+ if (End % OpBytesPerElement != 0) -+ break; -+ // We're extracting the low part of one operand of the BUILD_VECTOR. -+ Op = Op.getOperand(End / OpBytesPerElement - 1); -+ if (!Op.getValueType().isInteger()) { -+ EVT VT = MVT::getIntegerVT(Op.getValueType().getSizeInBits()); -+ Op = DAG.getNode(ISD::BITCAST, DL, VT, Op); -+ DCI.AddToWorklist(Op.getNode()); -+ } -+ EVT VT = MVT::getIntegerVT(ResVT.getSizeInBits()); -+ Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op); -+ if (VT != ResVT) { -+ DCI.AddToWorklist(Op.getNode()); -+ Op = DAG.getNode(ISD::BITCAST, DL, ResVT, Op); -+ } -+ return Op; -+ } else if ((Opcode == ISD::SIGN_EXTEND_VECTOR_INREG || -+ Opcode == ISD::ZERO_EXTEND_VECTOR_INREG || -+ Opcode == ISD::ANY_EXTEND_VECTOR_INREG) && -+ canTreatAsByteVector(Op.getValueType()) && -+ canTreatAsByteVector(Op.getOperand(0).getValueType())) { -+ // Make sure that only the unextended bits are significant. -+ EVT ExtVT = Op.getValueType(); -+ EVT OpVT = Op.getOperand(0).getValueType(); -+ unsigned ExtBytesPerElement = ExtVT.getVectorElementType().getStoreSize(); -+ unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize(); -+ unsigned Byte = Index * BytesPerElement; -+ unsigned SubByte = Byte % ExtBytesPerElement; -+ unsigned MinSubByte = ExtBytesPerElement - OpBytesPerElement; -+ if (SubByte < MinSubByte || -+ SubByte + BytesPerElement > ExtBytesPerElement) -+ break; -+ // Get the byte offset of the unextended element -+ Byte = Byte / ExtBytesPerElement * OpBytesPerElement; -+ // ...then add the byte offset relative to that element. -+ Byte += SubByte - MinSubByte; -+ if (Byte % BytesPerElement != 0) -+ break; -+ Op = Op.getOperand(0); -+ Index = Byte / BytesPerElement; -+ Force = true; -+ } else -+ break; -+ } -+ if (Force) { -+ if (Op.getValueType() != VecVT) { -+ Op = DAG.getNode(ISD::BITCAST, DL, VecVT, Op); -+ DCI.AddToWorklist(Op.getNode()); -+ } -+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Op, -+ DAG.getConstant(Index, MVT::i32)); -+ } -+ return SDValue(); -+} -+ -+// Optimize vector operations in scalar value Op on the basis that Op -+// is truncated to TruncVT. -+SDValue -+SystemZTargetLowering::combineTruncateExtract(SDLoc DL, EVT TruncVT, SDValue Op, -+ DAGCombinerInfo &DCI) const { -+ // If we have (trunc (extract_vector_elt X, Y)), try to turn it into -+ // (extract_vector_elt (bitcast X), Y'), where (bitcast X) has elements -+ // of type TruncVT. -+ if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT && -+ TruncVT.getSizeInBits() % 8 == 0) { -+ SDValue Vec = Op.getOperand(0); -+ EVT VecVT = Vec.getValueType(); -+ if (canTreatAsByteVector(VecVT)) { -+ if (auto *IndexN = dyn_cast(Op.getOperand(1))) { -+ unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize(); -+ unsigned TruncBytes = TruncVT.getStoreSize(); -+ if (BytesPerElement % TruncBytes == 0) { -+ // Calculate the value of Y' in the above description. We are -+ // splitting the original elements into Scale equal-sized pieces -+ // and for truncation purposes want the last (least-significant) -+ // of these pieces for IndexN. This is easiest to do by calculating -+ // the start index of the following element and then subtracting 1. -+ unsigned Scale = BytesPerElement / TruncBytes; -+ unsigned NewIndex = (IndexN->getZExtValue() + 1) * Scale - 1; -+ -+ // Defer the creation of the bitcast from X to combineExtract, -+ // which might be able to optimize the extraction. -+ VecVT = MVT::getVectorVT(MVT::getIntegerVT(TruncBytes * 8), -+ VecVT.getStoreSize() / TruncBytes); -+ EVT ResVT = (TruncBytes < 4 ? MVT::i32 : TruncVT); -+ return combineExtract(DL, ResVT, VecVT, Vec, NewIndex, DCI, true); -+ } -+ } -+ } -+ } -+ return SDValue(); -+} -+ - SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N, - DAGCombinerInfo &DCI) const { - SelectionDAG &DAG = DCI.DAG; -@@ -2559,6 +4649,114 @@ SDValue SystemZTargetLowering::PerformDA - } - } - } -+ if (Opcode == SystemZISD::MERGE_HIGH || -+ Opcode == SystemZISD::MERGE_LOW) { -+ SDValue Op0 = N->getOperand(0); -+ SDValue Op1 = N->getOperand(1); -+ if (Op0.getOpcode() == ISD::BITCAST) -+ Op0 = Op0.getOperand(0); -+ if (Op0.getOpcode() == SystemZISD::BYTE_MASK && -+ cast(Op0.getOperand(0))->getZExtValue() == 0) { -+ // (z_merge_* 0, 0) -> 0. This is mostly useful for using VLLEZF -+ // for v4f32. -+ if (Op1 == N->getOperand(0)) -+ return Op1; -+ // (z_merge_? 0, X) -> (z_unpackl_? 0, X). -+ EVT VT = Op1.getValueType(); -+ unsigned ElemBytes = VT.getVectorElementType().getStoreSize(); -+ if (ElemBytes <= 4) { -+ Opcode = (Opcode == SystemZISD::MERGE_HIGH ? -+ SystemZISD::UNPACKL_HIGH : SystemZISD::UNPACKL_LOW); -+ EVT InVT = VT.changeVectorElementTypeToInteger(); -+ EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(ElemBytes * 16), -+ SystemZ::VectorBytes / ElemBytes / 2); -+ if (VT != InVT) { -+ Op1 = DAG.getNode(ISD::BITCAST, SDLoc(N), InVT, Op1); -+ DCI.AddToWorklist(Op1.getNode()); -+ } -+ SDValue Op = DAG.getNode(Opcode, SDLoc(N), OutVT, Op1); -+ DCI.AddToWorklist(Op.getNode()); -+ return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op); -+ } -+ } -+ } -+ // If we have (truncstoreiN (extract_vector_elt X, Y), Z) then it is better -+ // for the extraction to be done on a vMiN value, so that we can use VSTE. -+ // If X has wider elements then convert it to: -+ // (truncstoreiN (extract_vector_elt (bitcast X), Y2), Z). -+ if (Opcode == ISD::STORE) { -+ auto *SN = cast(N); -+ EVT MemVT = SN->getMemoryVT(); -+ if (MemVT.isInteger()) { -+ SDValue Value = combineTruncateExtract(SDLoc(N), MemVT, -+ SN->getValue(), DCI); -+ if (Value.getNode()) { -+ DCI.AddToWorklist(Value.getNode()); -+ -+ // Rewrite the store with the new form of stored value. -+ return DAG.getTruncStore(SN->getChain(), SDLoc(SN), Value, -+ SN->getBasePtr(), SN->getMemoryVT(), -+ SN->getMemOperand()); -+ } -+ } -+ } -+ // Try to simplify a vector extraction. -+ if (Opcode == ISD::EXTRACT_VECTOR_ELT) { -+ if (auto *IndexN = dyn_cast(N->getOperand(1))) { -+ SDValue Op0 = N->getOperand(0); -+ EVT VecVT = Op0.getValueType(); -+ return combineExtract(SDLoc(N), N->getValueType(0), VecVT, Op0, -+ IndexN->getZExtValue(), DCI, false); -+ } -+ } -+ // (join_dwords X, X) == (replicate X) -+ if (Opcode == SystemZISD::JOIN_DWORDS && -+ N->getOperand(0) == N->getOperand(1)) -+ return DAG.getNode(SystemZISD::REPLICATE, SDLoc(N), N->getValueType(0), -+ N->getOperand(0)); -+ // (fround (extract_vector_elt X 0)) -+ // (fround (extract_vector_elt X 1)) -> -+ // (extract_vector_elt (VROUND X) 0) -+ // (extract_vector_elt (VROUND X) 1) -+ // -+ // This is a special case since the target doesn't really support v2f32s. -+ if (Opcode == ISD::FP_ROUND) { -+ SDValue Op0 = N->getOperand(0); -+ if (N->getValueType(0) == MVT::f32 && -+ Op0.hasOneUse() && -+ Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && -+ Op0.getOperand(0).getValueType() == MVT::v2f64 && -+ Op0.getOperand(1).getOpcode() == ISD::Constant && -+ cast(Op0.getOperand(1))->getZExtValue() == 0) { -+ SDValue Vec = Op0.getOperand(0); -+ for (auto *U : Vec->uses()) { -+ if (U != Op0.getNode() && -+ U->hasOneUse() && -+ U->getOpcode() == ISD::EXTRACT_VECTOR_ELT && -+ U->getOperand(0) == Vec && -+ U->getOperand(1).getOpcode() == ISD::Constant && -+ cast(U->getOperand(1))->getZExtValue() == 1) { -+ SDValue OtherRound = SDValue(*U->use_begin(), 0); -+ if (OtherRound.getOpcode() == ISD::FP_ROUND && -+ OtherRound.getOperand(0) == SDValue(U, 0) && -+ OtherRound.getValueType() == MVT::f32) { -+ SDValue VRound = DAG.getNode(SystemZISD::VROUND, SDLoc(N), -+ MVT::v4f32, Vec); -+ DCI.AddToWorklist(VRound.getNode()); -+ SDValue Extract1 = -+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f32, -+ VRound, DAG.getConstant(2, MVT::i32)); -+ DCI.AddToWorklist(Extract1.getNode()); -+ DAG.ReplaceAllUsesOfValueWith(OtherRound, Extract1); -+ SDValue Extract0 = -+ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f32, -+ VRound, DAG.getConstant(0, MVT::i32)); -+ return Extract0; -+ } -+ } -+ } -+ } -+ } - return SDValue(); - } - -@@ -3338,6 +5536,57 @@ SystemZTargetLowering::emitStringWrapper - return DoneMBB; - } - -+// Update TBEGIN instruction with final opcode and register clobbers. -+MachineBasicBlock * -+SystemZTargetLowering::emitTransactionBegin(MachineInstr *MI, -+ MachineBasicBlock *MBB, -+ unsigned Opcode, -+ bool NoFloat) const { -+ MachineFunction &MF = *MBB->getParent(); -+ const TargetFrameLowering *TFI = Subtarget.getFrameLowering(); -+ const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); -+ -+ // Update opcode. -+ MI->setDesc(TII->get(Opcode)); -+ -+ // We cannot handle a TBEGIN that clobbers the stack or frame pointer. -+ // Make sure to add the corresponding GRSM bits if they are missing. -+ uint64_t Control = MI->getOperand(2).getImm(); -+ static const unsigned GPRControlBit[16] = { -+ 0x8000, 0x8000, 0x4000, 0x4000, 0x2000, 0x2000, 0x1000, 0x1000, -+ 0x0800, 0x0800, 0x0400, 0x0400, 0x0200, 0x0200, 0x0100, 0x0100 -+ }; -+ Control |= GPRControlBit[15]; -+ if (TFI->hasFP(MF)) -+ Control |= GPRControlBit[11]; -+ MI->getOperand(2).setImm(Control); -+ -+ // Add GPR clobbers. -+ for (int I = 0; I < 16; I++) { -+ if ((Control & GPRControlBit[I]) == 0) { -+ unsigned Reg = SystemZMC::GR64Regs[I]; -+ MI->addOperand(MachineOperand::CreateReg(Reg, true, true)); -+ } -+ } -+ -+ // Add FPR/VR clobbers. -+ if (!NoFloat && (Control & 4) != 0) { -+ if (Subtarget.hasVector()) { -+ for (int I = 0; I < 32; I++) { -+ unsigned Reg = SystemZMC::VR128Regs[I]; -+ MI->addOperand(MachineOperand::CreateReg(Reg, true, true)); -+ } -+ } else { -+ for (int I = 0; I < 16; I++) { -+ unsigned Reg = SystemZMC::FP64Regs[I]; -+ MI->addOperand(MachineOperand::CreateReg(Reg, true, true)); -+ } -+ } -+ } -+ -+ return MBB; -+} -+ - MachineBasicBlock *SystemZTargetLowering:: - EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const { - switch (MI->getOpcode()) { -@@ -3579,6 +5828,12 @@ EmitInstrWithCustomInserter(MachineInstr - return emitStringWrapper(MI, MBB, SystemZ::MVST); - case SystemZ::SRSTLoop: - return emitStringWrapper(MI, MBB, SystemZ::SRST); -+ case SystemZ::TBEGIN: -+ return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, false); -+ case SystemZ::TBEGIN_nofloat: -+ return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, true); -+ case SystemZ::TBEGINC: -+ return emitTransactionBegin(MI, MBB, SystemZ::TBEGINC, true); - default: - llvm_unreachable("Unexpected instr type to insert"); - } -Index: llvm-36/lib/Target/SystemZ/SystemZISelLowering.h -=================================================================== ---- llvm-36.orig/lib/Target/SystemZ/SystemZISelLowering.h -+++ llvm-36/lib/Target/SystemZ/SystemZISelLowering.h -@@ -34,6 +34,11 @@ enum { - CALL, - SIBCALL, - -+ // TLS calls. Like regular calls, except operand 1 is the TLS symbol. -+ // (The call target is implicitly __tls_get_offset.) -+ TLS_GDCALL, -+ TLS_LDCALL, -+ - // Wraps a TargetGlobalAddress that should be loaded using PC-relative - // accesses (LARL). Operand 0 is the address. - PCREL_WRAPPER, -@@ -82,6 +87,9 @@ enum { - // the number of the register. - EXTRACT_ACCESS, - -+ // Count number of bits set in operand 0 per byte. -+ POPCNT, -+ - // Wrappers around the ISD opcodes of the same name. The output and - // first input operands are GR128s. The trailing numbers are the - // widths of the second operand in bits. -@@ -138,6 +146,135 @@ enum { - // Perform a serialization operation. (BCR 15,0 or BCR 14,0.) - SERIALIZE, - -+ // Transaction begin. The first operand is the chain, the second -+ // the TDB pointer, and the third the immediate control field. -+ // Returns chain and glue. -+ TBEGIN, -+ TBEGIN_NOFLOAT, -+ -+ // Transaction end. Just the chain operand. Returns chain and glue. -+ TEND, -+ -+ // Create a vector constant by filling byte N of the result with bit -+ // 15-N of the single operand. -+ BYTE_MASK, -+ -+ // Create a vector constant by replicating an element-sized RISBG-style mask. -+ // The first operand specifies the starting set bit and the second operand -+ // specifies the ending set bit. Both operands count from the MSB of the -+ // element. -+ ROTATE_MASK, -+ -+ // Replicate a GPR scalar value into all elements of a vector. -+ REPLICATE, -+ -+ // Create a vector from two i64 GPRs. -+ JOIN_DWORDS, -+ -+ // Replicate one element of a vector into all elements. The first operand -+ // is the vector and the second is the index of the element to replicate. -+ SPLAT, -+ -+ // Interleave elements from the high half of operand 0 and the high half -+ // of operand 1. -+ MERGE_HIGH, -+ -+ // Likewise for the low halves. -+ MERGE_LOW, -+ -+ // Concatenate the vectors in the first two operands, shift them left -+ // by the third operand, and take the first half of the result. -+ SHL_DOUBLE, -+ -+ // Take one element of the first v2i64 operand and the one element of -+ // the second v2i64 operand and concatenate them to form a v2i64 result. -+ // The third operand is a 4-bit value of the form 0A0B, where A and B -+ // are the element selectors for the first operand and second operands -+ // respectively. -+ PERMUTE_DWORDS, -+ -+ // Perform a general vector permute on vector operands 0 and 1. -+ // Each byte of operand 2 controls the corresponding byte of the result, -+ // in the same way as a byte-level VECTOR_SHUFFLE mask. -+ PERMUTE, -+ -+ // Pack vector operands 0 and 1 into a single vector with half-sized elements. -+ PACK, -+ -+ // Likewise, but saturate the result and set CC. PACKS_CC does signed -+ // saturation and PACKLS_CC does unsigned saturation. -+ PACKS_CC, -+ PACKLS_CC, -+ -+ // Unpack the first half of vector operand 0 into double-sized elements. -+ // UNPACK_HIGH sign-extends and UNPACKL_HIGH zero-extends. -+ UNPACK_HIGH, -+ UNPACKL_HIGH, -+ -+ // Likewise for the second half. -+ UNPACK_LOW, -+ UNPACKL_LOW, -+ -+ // Shift each element of vector operand 0 by the number of bits specified -+ // by scalar operand 1. -+ VSHL_BY_SCALAR, -+ VSRL_BY_SCALAR, -+ VSRA_BY_SCALAR, -+ -+ // For each element of the output type, sum across all sub-elements of -+ // operand 0 belonging to the corresponding element, and add in the -+ // rightmost sub-element of the corresponding element of operand 1. -+ VSUM, -+ -+ // Compare integer vector operands 0 and 1 to produce the usual 0/-1 -+ // vector result. VICMPE is for equality, VICMPH for "signed greater than" -+ // and VICMPHL for "unsigned greater than". -+ VICMPE, -+ VICMPH, -+ VICMPHL, -+ -+ // Likewise, but also set the condition codes on the result. -+ VICMPES, -+ VICMPHS, -+ VICMPHLS, -+ -+ // Compare floating-point vector operands 0 and 1 to preoduce the usual 0/-1 -+ // vector result. VFCMPE is for "ordered and equal", VFCMPH for "ordered and -+ // greater than" and VFCMPHE for "ordered and greater than or equal to". -+ VFCMPE, -+ VFCMPH, -+ VFCMPHE, -+ -+ // Likewise, but also set the condition codes on the result. -+ VFCMPES, -+ VFCMPHS, -+ VFCMPHES, -+ -+ // Test floating-point data class for vectors. -+ VFTCI, -+ -+ // Extend the even f32 elements of vector operand 0 to produce a vector -+ // of f64 elements. -+ VEXTEND, -+ -+ // Round the f64 elements of vector operand 0 to f32s and store them in the -+ // even elements of the result. -+ VROUND, -+ -+ // AND the two vector operands together and set CC based on the result. -+ VTM, -+ -+ // String operations that set CC as a side-effect. -+ VFAE_CC, -+ VFAEZ_CC, -+ VFEE_CC, -+ VFEEZ_CC, -+ VFENE_CC, -+ VFENEZ_CC, -+ VISTR_CC, -+ VSTRC_CC, -+ VSTRCZ_CC, -+ - // Wrappers around the inner loop of an 8- or 16-bit ATOMIC_SWAP or - // ATOMIC_LOAD_. - // -@@ -204,9 +341,33 @@ public: - MVT getScalarShiftAmountTy(EVT LHSTy) const override { - return MVT::i32; - } -+ MVT getVectorIdxTy() const override { -+ // Only the lower 12 bits of an element index are used, so we don't -+ // want to clobber the upper 32 bits of a GPR unnecessarily. -+ return MVT::i32; -+ } -+ TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(EVT VT) -+ const override { -+ // Widen subvectors to the full width rather than promoting integer -+ // elements. This is better because: -+ // -+ // (a) it means that we can handle the ABI for passing and returning -+ // sub-128 vectors without having to handle them as legal types. -+ // -+ // (b) we don't have instructions to extend on load and truncate on store, -+ // so promoting the integers is less efficient. -+ // -+ // (c) there are no multiplication instructions for the widest integer -+ // type (v2i64). -+ if (VT.getVectorElementType().getSizeInBits() % 8 == 0) -+ return TypeWidenVector; -+ return TargetLoweringBase::getPreferredVectorAction(VT); -+ } - EVT getSetCCResultType(LLVMContext &, EVT) const override; - bool isFMAFasterThanFMulAndFAdd(EVT VT) const override; - bool isFPImmLegal(const APFloat &Imm, EVT VT) const override; -+ bool isLegalICmpImmediate(int64_t Imm) const override; -+ bool isLegalAddImmediate(int64_t Imm) const override; - bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const override; - bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, - unsigned Align, -@@ -257,6 +418,9 @@ private: - SDValue lowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; - SDValue lowerGlobalAddress(GlobalAddressSDNode *Node, - SelectionDAG &DAG) const; -+ SDValue lowerTLSGetOffset(GlobalAddressSDNode *Node, -+ SelectionDAG &DAG, unsigned Opcode, -+ SDValue GOTOffset) const; - SDValue lowerGlobalTLSAddress(GlobalAddressSDNode *Node, - SelectionDAG &DAG) const; - SDValue lowerBlockAddress(BlockAddressSDNode *Node, -@@ -272,6 +436,7 @@ private: - SDValue lowerUDIVREM(SDValue Op, SelectionDAG &DAG) const; - SDValue lowerBITCAST(SDValue Op, SelectionDAG &DAG) const; - SDValue lowerOR(SDValue Op, SelectionDAG &DAG) const; -+ SDValue lowerCTPOP(SDValue Op, SelectionDAG &DAG) const; - SDValue lowerATOMIC_LOAD(SDValue Op, SelectionDAG &DAG) const; - SDValue lowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) const; - SDValue lowerATOMIC_LOAD_OP(SDValue Op, SelectionDAG &DAG, -@@ -282,6 +447,22 @@ private: - SDValue lowerSTACKSAVE(SDValue Op, SelectionDAG &DAG) const; - SDValue lowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG) const; - SDValue lowerPREFETCH(SDValue Op, SelectionDAG &DAG) const; -+ SDValue lowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const; -+ SDValue lowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; -+ SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; -+ SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; -+ SDValue lowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const; -+ SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; -+ SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; -+ SDValue lowerExtendVectorInreg(SDValue Op, SelectionDAG &DAG, -+ unsigned UnpackHigh) const; -+ SDValue lowerShift(SDValue Op, SelectionDAG &DAG, unsigned ByScalar) const; -+ -+ SDValue combineExtract(SDLoc DL, EVT ElemVT, EVT VecVT, SDValue OrigOp, -+ unsigned Index, DAGCombinerInfo &DCI, -+ bool Force) const; -+ SDValue combineTruncateExtract(SDLoc DL, EVT TruncVT, SDValue Op, -+ DAGCombinerInfo &DCI) const; - - // If the last instruction before MBBI in MBB was some form of COMPARE, - // try to replace it with a COMPARE AND BRANCH just before MBBI. -@@ -319,6 +500,10 @@ private: - MachineBasicBlock *emitStringWrapper(MachineInstr *MI, - MachineBasicBlock *BB, - unsigned Opcode) const; -+ MachineBasicBlock *emitTransactionBegin(MachineInstr *MI, -+ MachineBasicBlock *MBB, -+ unsigned Opcode, -+ bool NoFloat) const; - }; - } // end namespace llvm - -Index: llvm-36/lib/Target/SystemZ/SystemZInstrFP.td -=================================================================== ---- llvm-36.orig/lib/Target/SystemZ/SystemZInstrFP.td -+++ llvm-36/lib/Target/SystemZ/SystemZInstrFP.td -@@ -46,9 +46,14 @@ let Defs = [CC], CCValues = 0xF, Compare - defm LTDBR : LoadAndTestRRE<"ltdb", 0xB312, FP64>; - defm LTXBR : LoadAndTestRRE<"ltxb", 0xB342, FP128>; - } --defm : CompareZeroFP; --defm : CompareZeroFP; --defm : CompareZeroFP; -+// Note that the comparison against zero operation is not available if we -+// have vector support, since load-and-test instructions will partially -+// clobber the target (vector) register. -+let Predicates = [FeatureNoVector] in { -+ defm : CompareZeroFP; -+ defm : CompareZeroFP; -+ defm : CompareZeroFP; -+} - - // Moves between 64-bit integer and floating-point registers. - def LGDR : UnaryRRE<"lgd", 0xB3CD, bitconvert, GR64, FP64>; -@@ -98,6 +103,9 @@ let canFoldAsLoad = 1, SimpleBDXLoad = 1 - defm LE : UnaryRXPair<"le", 0x78, 0xED64, load, FP32, 4>; - defm LD : UnaryRXPair<"ld", 0x68, 0xED65, load, FP64, 8>; - -+ // For z13 we prefer LDE over LE to avoid partial register dependencies. -+ def LDE32 : UnaryRXE<"lde", 0xED24, null_frag, FP32, 4>; -+ - // These instructions are split after register allocation, so we don't - // want a custom inserter. - let Has20BitOffset = 1, HasIndex = 1, Is128Bit = 1 in { -@@ -141,7 +149,7 @@ def LDXBRA : UnaryRRF4<"ldxbra", 0xB345, - Requires<[FeatureFPExtension]>; - - def : Pat<(f32 (fround FP128:$src)), -- (EXTRACT_SUBREG (LEXBR FP128:$src), subreg_hh32)>; -+ (EXTRACT_SUBREG (LEXBR FP128:$src), subreg_hr32)>; - def : Pat<(f64 (fround FP128:$src)), - (EXTRACT_SUBREG (LDXBR FP128:$src), subreg_h64)>; - -@@ -345,13 +353,13 @@ def MDB : BinaryRXE<"mdb", 0xED1C, fmu - def MDEBR : BinaryRRE<"mdeb", 0xB30C, null_frag, FP64, FP32>; - def : Pat<(fmul (f64 (fextend FP32:$src1)), (f64 (fextend FP32:$src2))), - (MDEBR (INSERT_SUBREG (f64 (IMPLICIT_DEF)), -- FP32:$src1, subreg_h32), FP32:$src2)>; -+ FP32:$src1, subreg_r32), FP32:$src2)>; - - // f64 multiplication of an FP32 register and an f32 memory. - def MDEB : BinaryRXE<"mdeb", 0xED0C, null_frag, FP64, load, 4>; - def : Pat<(fmul (f64 (fextend FP32:$src1)), - (f64 (extloadf32 bdxaddr12only:$addr))), -- (MDEB (INSERT_SUBREG (f64 (IMPLICIT_DEF)), FP32:$src1, subreg_h32), -+ (MDEB (INSERT_SUBREG (f64 (IMPLICIT_DEF)), FP32:$src1, subreg_r32), - bdxaddr12only:$addr)>; - - // f128 multiplication of two FP64 registers. -Index: llvm-36/lib/Target/SystemZ/SystemZInstrFormats.td -=================================================================== ---- llvm-36.orig/lib/Target/SystemZ/SystemZInstrFormats.td -+++ llvm-36/lib/Target/SystemZ/SystemZInstrFormats.td -@@ -142,10 +142,13 @@ def getThreeOperandOpcode : InstrMapping - // Formats are specified using operand field declarations of the form: - // - // bits<4> Rn : register input or output for operand n -+// bits<5> Vn : vector register input or output for operand n - // bits In : immediate value of width m for operand n - // bits<4> BDn : address operand n, which has a base and a displacement - // bits XBDn : address operand n, which has an index, a base and a - // displacement -+// bits VBDn : address operand n, which has a vector index, a base and a -+// displacement - // bits<4> Xn : index register for address operand n - // bits<4> Mn : mode value for operand n - // -@@ -339,11 +342,13 @@ class InstRXE op, dag outs, dag - - bits<4> R1; - bits<20> XBD2; -+ bits<4> M3; - - let Inst{47-40} = op{15-8}; - let Inst{39-36} = R1; - let Inst{35-16} = XBD2; -- let Inst{15-8} = 0; -+ let Inst{15-12} = M3; -+ let Inst{11-8} = 0; - let Inst{7-0} = op{7-0}; - - let HasIndex = 1; -@@ -473,6 +478,393 @@ class InstSS op, dag outs, dag i - let Inst{15-0} = BD2; - } - -+class InstS op, dag outs, dag ins, string asmstr, list pattern> -+ : InstSystemZ<4, outs, ins, asmstr, pattern> { -+ field bits<32> Inst; -+ field bits<32> SoftFail = 0; -+ -+ bits<16> BD2; -+ -+ let Inst{31-16} = op; -+ let Inst{15-0} = BD2; -+} -+ -+class InstVRIa op, dag outs, dag ins, string asmstr, list pattern> -+ : InstSystemZ<6, outs, ins, asmstr, pattern> { -+ field bits<48> Inst; -+ field bits<48> SoftFail = 0; -+ -+ bits<5> V1; -+ bits<16> I2; -+ bits<4> M3; -+ -+ let Inst{47-40} = op{15-8}; -+ let Inst{39-36} = V1{3-0}; -+ let Inst{35-32} = 0; -+ let Inst{31-16} = I2; -+ let Inst{15-12} = M3; -+ let Inst{11} = V1{4}; -+ let Inst{10-8} = 0; -+ let Inst{7-0} = op{7-0}; -+} -+ -+class InstVRIb op, dag outs, dag ins, string asmstr, list pattern> -+ : InstSystemZ<6, outs, ins, asmstr, pattern> { -+ field bits<48> Inst; -+ field bits<48> SoftFail = 0; -+ -+ bits<5> V1; -+ bits<8> I2; -+ bits<8> I3; -+ bits<4> M4; -+ -+ let Inst{47-40} = op{15-8}; -+ let Inst{39-36} = V1{3-0}; -+ let Inst{35-32} = 0; -+ let Inst{31-24} = I2; -+ let Inst{23-16} = I3; -+ let Inst{15-12} = M4; -+ let Inst{11} = V1{4}; -+ let Inst{10-8} = 0; -+ let Inst{7-0} = op{7-0}; -+} -+ -+class InstVRIc op, dag outs, dag ins, string asmstr, list pattern> -+ : InstSystemZ<6, outs, ins, asmstr, pattern> { -+ field bits<48> Inst; -+ field bits<48> SoftFail = 0; -+ -+ bits<5> V1; -+ bits<5> V3; -+ bits<16> I2; -+ bits<4> M4; -+ -+ let Inst{47-40} = op{15-8}; -+ let Inst{39-36} = V1{3-0}; -+ let Inst{35-32} = V3{3-0}; -+ let Inst{31-16} = I2; -+ let Inst{15-12} = M4; -+ let Inst{11} = V1{4}; -+ let Inst{10} = V3{4}; -+ let Inst{9-8} = 0; -+ let Inst{7-0} = op{7-0}; -+} -+ -+class InstVRId op, dag outs, dag ins, string asmstr, list pattern> -+ : InstSystemZ<6, outs, ins, asmstr, pattern> { -+ field bits<48> Inst; -+ field bits<48> SoftFail = 0; -+ -+ bits<5> V1; -+ bits<5> V2; -+ bits<5> V3; -+ bits<8> I4; -+ bits<4> M5; -+ -+ let Inst{47-40} = op{15-8}; -+ let Inst{39-36} = V1{3-0}; -+ let Inst{35-32} = V2{3-0}; -+ let Inst{31-28} = V3{3-0}; -+ let Inst{27-24} = 0; -+ let Inst{23-16} = I4; -+ let Inst{15-12} = M5; -+ let Inst{11} = V1{4}; -+ let Inst{10} = V2{4}; -+ let Inst{9} = V3{4}; -+ let Inst{8} = 0; -+ let Inst{7-0} = op{7-0}; -+} -+ -+class InstVRIe op, dag outs, dag ins, string asmstr, list pattern> -+ : InstSystemZ<6, outs, ins, asmstr, pattern> { -+ field bits<48> Inst; -+ field bits<48> SoftFail = 0; -+ -+ bits<5> V1; -+ bits<5> V2; -+ bits<12> I3; -+ bits<4> M4; -+ bits<4> M5; -+ -+ let Inst{47-40} = op{15-8}; -+ let Inst{39-36} = V1{3-0}; -+ let Inst{35-32} = V2{3-0}; -+ let Inst{31-20} = I3; -+ let Inst{19-16} = M5; -+ let Inst{15-12} = M4; -+ let Inst{11} = V1{4}; -+ let Inst{10} = V2{4}; -+ let Inst{9-8} = 0; -+ let Inst{7-0} = op{7-0}; -+} -+ -+// Depending on the instruction mnemonic, certain bits may be or-ed into -+// the M4 value provided as explicit operand. These are passed as m4or. -+class InstVRRa op, dag outs, dag ins, string asmstr, list pattern, -+ bits<4> m4or = 0> -+ : InstSystemZ<6, outs, ins, asmstr, pattern> { -+ field bits<48> Inst; -+ field bits<48> SoftFail = 0; -+ -+ bits<5> V1; -+ bits<5> V2; -+ bits<4> M3; -+ bits<4> M4; -+ bits<4> M5; -+ -+ let Inst{47-40} = op{15-8}; -+ let Inst{39-36} = V1{3-0}; -+ let Inst{35-32} = V2{3-0}; -+ let Inst{31-24} = 0; -+ let Inst{23-20} = M5; -+ let Inst{19} = !if (!eq (m4or{3}, 1), 1, M4{3}); -+ let Inst{18} = !if (!eq (m4or{2}, 1), 1, M4{2}); -+ let Inst{17} = !if (!eq (m4or{1}, 1), 1, M4{1}); -+ let Inst{16} = !if (!eq (m4or{0}, 1), 1, M4{0}); -+ let Inst{15-12} = M3; -+ let Inst{11} = V1{4}; -+ let Inst{10} = V2{4}; -+ let Inst{9-8} = 0; -+ let Inst{7-0} = op{7-0}; -+} -+ -+// Depending on the instruction mnemonic, certain bits may be or-ed into -+// the M5 value provided as explicit operand. These are passed as m5or. -+class InstVRRb op, dag outs, dag ins, string asmstr, list pattern, -+ bits<4> m5or = 0> -+ : InstSystemZ<6, outs, ins, asmstr, pattern> { -+ field bits<48> Inst; -+ field bits<48> SoftFail = 0; -+ -+ bits<5> V1; -+ bits<5> V2; -+ bits<5> V3; -+ bits<4> M4; -+ bits<4> M5; -+ -+ let Inst{47-40} = op{15-8}; -+ let Inst{39-36} = V1{3-0}; -+ let Inst{35-32} = V2{3-0}; -+ let Inst{31-28} = V3{3-0}; -+ let Inst{27-24} = 0; -+ let Inst{23} = !if (!eq (m5or{3}, 1), 1, M5{3}); -+ let Inst{22} = !if (!eq (m5or{2}, 1), 1, M5{2}); -+ let Inst{21} = !if (!eq (m5or{1}, 1), 1, M5{1}); -+ let Inst{20} = !if (!eq (m5or{0}, 1), 1, M5{0}); -+ let Inst{19-16} = 0; -+ let Inst{15-12} = M4; -+ let Inst{11} = V1{4}; -+ let Inst{10} = V2{4}; -+ let Inst{9} = V3{4}; -+ let Inst{8} = 0; -+ let Inst{7-0} = op{7-0}; -+} -+ -+class InstVRRc op, dag outs, dag ins, string asmstr, list pattern> -+ : InstSystemZ<6, outs, ins, asmstr, pattern> { -+ field bits<48> Inst; -+ field bits<48> SoftFail = 0; -+ -+ bits<5> V1; -+ bits<5> V2; -+ bits<5> V3; -+ bits<4> M4; -+ bits<4> M5; -+ bits<4> M6; -+ -+ let Inst{47-40} = op{15-8}; -+ let Inst{39-36} = V1{3-0}; -+ let Inst{35-32} = V2{3-0}; -+ let Inst{31-28} = V3{3-0}; -+ let Inst{27-24} = 0; -+ let Inst{23-20} = M6; -+ let Inst{19-16} = M5; -+ let Inst{15-12} = M4; -+ let Inst{11} = V1{4}; -+ let Inst{10} = V2{4}; -+ let Inst{9} = V3{4}; -+ let Inst{8} = 0; -+ let Inst{7-0} = op{7-0}; -+} -+ -+// Depending on the instruction mnemonic, certain bits may be or-ed into -+// the M6 value provided as explicit operand. These are passed as m6or. -+class InstVRRd op, dag outs, dag ins, string asmstr, list pattern, -+ bits<4> m6or = 0> -+ : InstSystemZ<6, outs, ins, asmstr, pattern> { -+ field bits<48> Inst; -+ field bits<48> SoftFail = 0; -+ -+ bits<5> V1; -+ bits<5> V2; -+ bits<5> V3; -+ bits<5> V4; -+ bits<4> M5; -+ bits<4> M6; -+ -+ let Inst{47-40} = op{15-8}; -+ let Inst{39-36} = V1{3-0}; -+ let Inst{35-32} = V2{3-0}; -+ let Inst{31-28} = V3{3-0}; -+ let Inst{27-24} = M5; -+ let Inst{23} = !if (!eq (m6or{3}, 1), 1, M6{3}); -+ let Inst{22} = !if (!eq (m6or{2}, 1), 1, M6{2}); -+ let Inst{21} = !if (!eq (m6or{1}, 1), 1, M6{1}); -+ let Inst{20} = !if (!eq (m6or{0}, 1), 1, M6{0}); -+ let Inst{19-16} = 0; -+ let Inst{15-12} = V4{3-0}; -+ let Inst{11} = V1{4}; -+ let Inst{10} = V2{4}; -+ let Inst{9} = V3{4}; -+ let Inst{8} = V4{4}; -+ let Inst{7-0} = op{7-0}; -+} -+ -+class InstVRRe op, dag outs, dag ins, string asmstr, list pattern> -+ : InstSystemZ<6, outs, ins, asmstr, pattern> { -+ field bits<48> Inst; -+ field bits<48> SoftFail = 0; -+ -+ bits<5> V1; -+ bits<5> V2; -+ bits<5> V3; -+ bits<5> V4; -+ bits<4> M5; -+ bits<4> M6; -+ -+ let Inst{47-40} = op{15-8}; -+ let Inst{39-36} = V1{3-0}; -+ let Inst{35-32} = V2{3-0}; -+ let Inst{31-28} = V3{3-0}; -+ let Inst{27-24} = M6; -+ let Inst{23-20} = 0; -+ let Inst{19-16} = M5; -+ let Inst{15-12} = V4{3-0}; -+ let Inst{11} = V1{4}; -+ let Inst{10} = V2{4}; -+ let Inst{9} = V3{4}; -+ let Inst{8} = V4{4}; -+ let Inst{7-0} = op{7-0}; -+} -+ -+class InstVRRf op, dag outs, dag ins, string asmstr, list pattern> -+ : InstSystemZ<6, outs, ins, asmstr, pattern> { -+ field bits<48> Inst; -+ field bits<48> SoftFail = 0; -+ -+ bits<5> V1; -+ bits<4> R2; -+ bits<4> R3; -+ -+ let Inst{47-40} = op{15-8}; -+ let Inst{39-36} = V1{3-0}; -+ let Inst{35-32} = R2; -+ let Inst{31-28} = R3; -+ let Inst{27-12} = 0; -+ let Inst{11} = V1{4}; -+ let Inst{10-8} = 0; -+ let Inst{7-0} = op{7-0}; -+} -+ -+class InstVRSa op, dag outs, dag ins, string asmstr, list pattern> -+ : InstSystemZ<6, outs, ins, asmstr, pattern> { -+ field bits<48> Inst; -+ field bits<48> SoftFail = 0; -+ -+ bits<5> V1; -+ bits<16> BD2; -+ bits<5> V3; -+ bits<4> M4; -+ -+ let Inst{47-40} = op{15-8}; -+ let Inst{39-36} = V1{3-0}; -+ let Inst{35-32} = V3{3-0}; -+ let Inst{31-16} = BD2; -+ let Inst{15-12} = M4; -+ let Inst{11} = V1{4}; -+ let Inst{10} = V3{4}; -+ let Inst{9-8} = 0; -+ let Inst{7-0} = op{7-0}; -+} -+ -+class InstVRSb op, dag outs, dag ins, string asmstr, list pattern> -+ : InstSystemZ<6, outs, ins, asmstr, pattern> { -+ field bits<48> Inst; -+ field bits<48> SoftFail = 0; -+ -+ bits<5> V1; -+ bits<16> BD2; -+ bits<4> R3; -+ bits<4> M4; -+ -+ let Inst{47-40} = op{15-8}; -+ let Inst{39-36} = V1{3-0}; -+ let Inst{35-32} = R3; -+ let Inst{31-16} = BD2; -+ let Inst{15-12} = M4; -+ let Inst{11} = V1{4}; -+ let Inst{10-8} = 0; -+ let Inst{7-0} = op{7-0}; -+} -+ -+class InstVRSc op, dag outs, dag ins, string asmstr, list pattern> -+ : InstSystemZ<6, outs, ins, asmstr, pattern> { -+ field bits<48> Inst; -+ field bits<48> SoftFail = 0; -+ -+ bits<4> R1; -+ bits<16> BD2; -+ bits<5> V3; -+ bits<4> M4; -+ -+ let Inst{47-40} = op{15-8}; -+ let Inst{39-36} = R1; -+ let Inst{35-32} = V3{3-0}; -+ let Inst{31-16} = BD2; -+ let Inst{15-12} = M4; -+ let Inst{11} = 0; -+ let Inst{10} = V3{4}; -+ let Inst{9-8} = 0; -+ let Inst{7-0} = op{7-0}; -+} -+ -+class InstVRV op, dag outs, dag ins, string asmstr, list pattern> -+ : InstSystemZ<6, outs, ins, asmstr, pattern> { -+ field bits<48> Inst; -+ field bits<48> SoftFail = 0; -+ -+ bits<5> V1; -+ bits<21> VBD2; -+ bits<4> M3; -+ -+ let Inst{47-40} = op{15-8}; -+ let Inst{39-36} = V1{3-0}; -+ let Inst{35-16} = VBD2{19-0}; -+ let Inst{15-12} = M3; -+ let Inst{11} = V1{4}; -+ let Inst{10} = VBD2{20}; -+ let Inst{9-8} = 0; -+ let Inst{7-0} = op{7-0}; -+} -+ -+class InstVRX op, dag outs, dag ins, string asmstr, list pattern> -+ : InstSystemZ<6, outs, ins, asmstr, pattern> { -+ field bits<48> Inst; -+ field bits<48> SoftFail = 0; -+ -+ bits<5> V1; -+ bits<20> XBD2; -+ bits<4> M3; -+ -+ let Inst{47-40} = op{15-8}; -+ let Inst{39-36} = V1{3-0}; -+ let Inst{35-16} = XBD2; -+ let Inst{15-12} = M3; -+ let Inst{11} = V1{4}; -+ let Inst{10-8} = 0; -+ let Inst{7-0} = op{7-0}; -+} -+ - //===----------------------------------------------------------------------===// - // Instruction definitions with semantics - //===----------------------------------------------------------------------===// -@@ -492,12 +884,6 @@ class InstSS op, dag outs, dag i - // form of the source register in the destination register and - // branches on the result. - // --// Store: --// One register or immediate input operand and one address input operand. --// The instruction stores the first operand to the address. --// --// This category is used for both pure and truncating stores. --// - // LoadMultiple: - // One address input operand and two explicit output operands. - // The instruction loads a range of registers from the address, -@@ -510,18 +896,35 @@ class InstSS op, dag outs, dag i - // with the explicit operands giving the first and last register - // to store. Other stored registers are added as implicit uses. - // -+// StoreLength: -+// One value operand, one length operand and one address operand. -+// The instruction stores the value operand to the address but -+// doesn't write more than the number of bytes specified by the -+// length operand. -+// - // Unary: - // One register output operand and one input operand. - // -+// Store: -+// One address operand and one other input operand. The instruction -+// stores to the address. -+// - // Binary: - // One register output operand and two input operands. - // -+// StoreBinary: -+// One address operand and two other input operands. The instruction -+// stores to the address. -+// - // Compare: - // Two input operands and an implicit CC output operand. - // - // Ternary: - // One register output operand and three input operands. - // -+// Quaternary: -+// One register output operand and four input operands. -+// - // LoadAndOp: - // One output operand and two input operands, one of which is an address. - // The instruction both reads from and writes to the address. -@@ -556,6 +959,12 @@ class InherentRRE opcode, bits<16> value> -+ : InstVRIa { -+ let I2 = value; -+ let M3 = 0; -+} -+ - class BranchUnaryRI opcode, RegisterOperand cls> - : InstRI { -@@ -571,6 +980,13 @@ class LoadMultipleRSY opcode> -+ : InstVRSa { -+ let M4 = 0; -+ let mayLoad = 1; -+} -+ - class StoreRILPC opcode, SDPatternOperator operator, - RegisterOperand cls> - : InstRIL opcode, SDPatternOperator operator, -+ TypedReg tr, bits<5> bytes, bits<4> type = 0> -+ : InstVRX { -+ let M3 = type; -+ let mayStore = 1; -+ let AccessBytes = bytes; -+} -+ -+class StoreLengthVRSb opcode, -+ SDPatternOperator operator, bits<5> bytes> -+ : InstVRSb { -+ let M4 = 0; -+ let mayStore = 1; -+ let AccessBytes = bytes; -+} -+ - class StoreMultipleRSY opcode, RegisterOperand cls> - : InstRSY { - let mayStore = 1; - } - -+class StoreMultipleVRSa opcode> -+ : InstVRSa { -+ let M4 = 0; -+ let mayStore = 1; -+} -+ - // StoreSI* instructions are used to store an integer to memory, but the - // addresses are more restricted than for normal stores. If we are in the - // situation of having to force either the address into a register or the -@@ -857,6 +1300,7 @@ class UnaryRXE - let OpType = "mem"; - let mayLoad = 1; - let AccessBytes = bytes; -+ let M3 = 0; - } - - class UnaryRXY opcode, SDPatternOperator operator, -@@ -883,6 +1327,46 @@ multiclass UnaryRXPair opcode, SDPatternOperator operator, -+ TypedReg tr, Immediate imm, bits<4> type = 0> -+ : InstVRIa { -+ let M3 = type; -+} -+ -+class UnaryVRRa opcode, SDPatternOperator operator, -+ TypedReg tr1, TypedReg tr2, bits<4> type = 0, bits<4> m4 = 0, -+ bits<4> m5 = 0> -+ : InstVRRa { -+ let M3 = type; -+ let M4 = m4; -+ let M5 = m5; -+} -+ -+multiclass UnaryVRRaSPair opcode, -+ SDPatternOperator operator, -+ SDPatternOperator operator_cc, TypedReg tr1, -+ TypedReg tr2, bits<4> type, bits<4> modifier = 0, -+ bits<4> modifier_cc = 1> { -+ def "" : UnaryVRRa; -+ let Defs = [CC] in -+ def S : UnaryVRRa; -+} -+ -+class UnaryVRX opcode, SDPatternOperator operator, -+ TypedReg tr, bits<5> bytes, bits<4> type = 0> -+ : InstVRX { -+ let M3 = type; -+ let mayLoad = 1; -+ let AccessBytes = bytes; -+} -+ - class BinaryRR opcode, SDPatternOperator operator, - RegisterOperand cls1, RegisterOperand cls2> - : InstRR opcode, SDPatternOperator operator, -@@ -1094,6 +1579,148 @@ multiclass BinarySIPair opcode, SDPatternOperator operator, -+ TypedReg tr, bits<4> type> -+ : InstVRIb { -+ let M4 = type; -+} -+ -+class BinaryVRIc opcode, SDPatternOperator operator, -+ TypedReg tr1, TypedReg tr2, bits<4> type> -+ : InstVRIc { -+ let M4 = type; -+} -+ -+class BinaryVRIe opcode, SDPatternOperator operator, -+ TypedReg tr1, TypedReg tr2, bits<4> type, bits<4> m5> -+ : InstVRIe { -+ let M4 = type; -+ let M5 = m5; -+} -+ -+class BinaryVRRa opcode> -+ : InstVRRa { -+ let M4 = 0; -+ let M5 = 0; -+} -+ -+class BinaryVRRb opcode, SDPatternOperator operator, -+ TypedReg tr1, TypedReg tr2, bits<4> type = 0, -+ bits<4> modifier = 0> -+ : InstVRRb { -+ let M4 = type; -+ let M5 = modifier; -+} -+ -+// Declare a pair of instructions, one which sets CC and one which doesn't. -+// The CC-setting form ends with "S" and sets the low bit of M5. -+multiclass BinaryVRRbSPair opcode, -+ SDPatternOperator operator, -+ SDPatternOperator operator_cc, TypedReg tr1, -+ TypedReg tr2, bits<4> type, -+ bits<4> modifier = 0, bits<4> modifier_cc = 1> { -+ def "" : BinaryVRRb; -+ let Defs = [CC] in -+ def S : BinaryVRRb; -+} -+ -+class BinaryVRRc opcode, SDPatternOperator operator, -+ TypedReg tr1, TypedReg tr2, bits<4> type = 0, bits<4> m5 = 0, -+ bits<4> m6 = 0> -+ : InstVRRc { -+ let M4 = type; -+ let M5 = m5; -+ let M6 = m6; -+} -+ -+multiclass BinaryVRRcSPair opcode, -+ SDPatternOperator operator, -+ SDPatternOperator operator_cc, TypedReg tr1, -+ TypedReg tr2, bits<4> type, bits<4> m5, -+ bits<4> modifier = 0, bits<4> modifier_cc = 1> { -+ def "" : BinaryVRRc; -+ let Defs = [CC] in -+ def S : BinaryVRRc; -+} -+ -+class BinaryVRRf opcode, SDPatternOperator operator, -+ TypedReg tr> -+ : InstVRRf; -+ -+class BinaryVRSa opcode, SDPatternOperator operator, -+ TypedReg tr1, TypedReg tr2, bits<4> type> -+ : InstVRSa { -+ let M4 = type; -+} -+ -+class BinaryVRSb opcode, SDPatternOperator operator, -+ bits<5> bytes> -+ : InstVRSb { -+ let M4 = 0; -+ let mayLoad = 1; -+ let AccessBytes = bytes; -+} -+ -+class BinaryVRSc opcode, SDPatternOperator operator, -+ TypedReg tr, bits<4> type> -+ : InstVRSc { -+ let M4 = type; -+} -+ -+class BinaryVRX opcode, SDPatternOperator operator, -+ TypedReg tr, bits<5> bytes> -+ : InstVRX { -+ let mayLoad = 1; -+ let AccessBytes = bytes; -+} -+ -+class StoreBinaryVRV opcode, bits<5> bytes, -+ Immediate index> -+ : InstVRV { -+ let mayStore = 1; -+ let AccessBytes = bytes; -+} -+ -+class StoreBinaryVRX opcode, -+ SDPatternOperator operator, TypedReg tr, bits<5> bytes, -+ Immediate index> -+ : InstVRX { -+ let mayStore = 1; -+ let AccessBytes = bytes; -+} -+ - class CompareRR opcode, SDPatternOperator operator, - RegisterOperand cls1, RegisterOperand cls2> - : InstRR opcode, SDPatternOperator operator, -@@ -1235,6 +1863,17 @@ multiclass CompareSIPair opcode, SDPatternOperator operator, -+ TypedReg tr, bits<4> type> -+ : InstVRRa { -+ let isCompare = 1; -+ let M3 = type; -+ let M4 = 0; -+ let M5 = 0; -+} -+ - class TernaryRRD opcode, - SDPatternOperator operator, RegisterOperand cls> - : InstRRD opcode, SDPatternOperator operator, -+ TypedReg tr1, TypedReg tr2, Immediate imm, Immediate index> -+ : InstVRIa { -+ let Constraints = "$V1 = $V1src"; -+ let DisableEncoding = "$V1src"; -+} -+ -+class TernaryVRId opcode, SDPatternOperator operator, -+ TypedReg tr1, TypedReg tr2, bits<4> type> -+ : InstVRId { -+ let M5 = type; -+} -+ -+class TernaryVRRa opcode, SDPatternOperator operator, -+ TypedReg tr1, TypedReg tr2, bits<4> type, bits<4> m4or> -+ : InstVRRa { -+ let M3 = type; -+} -+ -+class TernaryVRRb opcode, SDPatternOperator operator, -+ TypedReg tr1, TypedReg tr2, bits<4> type, -+ SDPatternOperator m5mask, bits<4> m5or> -+ : InstVRRb { -+ let M4 = type; -+} -+ -+multiclass TernaryVRRbSPair opcode, -+ SDPatternOperator operator, -+ SDPatternOperator operator_cc, TypedReg tr1, -+ TypedReg tr2, bits<4> type, bits<4> m5or> { -+ def "" : TernaryVRRb; -+ def : InstAlias(NAME) tr1.op:$V1, tr2.op:$V2, -+ tr2.op:$V3, 0)>; -+ let Defs = [CC] in -+ def S : TernaryVRRb; -+ def : InstAlias(NAME#"S") tr1.op:$V1, tr2.op:$V2, -+ tr2.op:$V3, 0)>; -+} -+ -+class TernaryVRRc opcode, SDPatternOperator operator, -+ TypedReg tr1, TypedReg tr2> -+ : InstVRRc { -+ let M5 = 0; -+ let M6 = 0; -+} -+ -+class TernaryVRRd opcode, SDPatternOperator operator, -+ TypedReg tr1, TypedReg tr2, bits<4> type = 0> -+ : InstVRRd { -+ let M5 = type; -+ let M6 = 0; -+} -+ -+class TernaryVRRe opcode, SDPatternOperator operator, -+ TypedReg tr1, TypedReg tr2, bits<4> m5 = 0, bits<4> type = 0> -+ : InstVRRe { -+ let M5 = m5; -+ let M6 = type; -+} -+ -+class TernaryVRSb opcode, SDPatternOperator operator, -+ TypedReg tr1, TypedReg tr2, RegisterOperand cls, bits<4> type> -+ : InstVRSb { -+ let Constraints = "$V1 = $V1src"; -+ let DisableEncoding = "$V1src"; -+ let M4 = type; -+} -+ -+class TernaryVRV opcode, bits<5> bytes, -+ Immediate index> -+ : InstVRV { -+ let Constraints = "$V1 = $V1src"; -+ let DisableEncoding = "$V1src"; -+ let mayLoad = 1; -+ let AccessBytes = bytes; -+} -+ -+class TernaryVRX opcode, SDPatternOperator operator, -+ TypedReg tr1, TypedReg tr2, bits<5> bytes, Immediate index> -+ : InstVRX { -+ let Constraints = "$V1 = $V1src"; -+ let DisableEncoding = "$V1src"; -+ let mayLoad = 1; -+ let AccessBytes = bytes; -+} -+ -+class QuaternaryVRId opcode, SDPatternOperator operator, -+ TypedReg tr1, TypedReg tr2, bits<4> type> -+ : InstVRId { -+ let Constraints = "$V1 = $V1src"; -+ let DisableEncoding = "$V1src"; -+ let M5 = type; -+} -+ -+class QuaternaryVRRd opcode, -+ SDPatternOperator operator, TypedReg tr1, TypedReg tr2, -+ bits<4> type, SDPatternOperator m6mask, bits<4> m6or> -+ : InstVRRd { -+ let M5 = type; -+} -+ -+multiclass QuaternaryVRRdSPair opcode, -+ SDPatternOperator operator, -+ SDPatternOperator operator_cc, TypedReg tr1, -+ TypedReg tr2, bits<4> type, bits<4> m6or> { -+ def "" : QuaternaryVRRd; -+ def : InstAlias(NAME) tr1.op:$V1, tr2.op:$V2, -+ tr2.op:$V3, tr2.op:$V4, 0)>; -+ let Defs = [CC] in -+ def S : QuaternaryVRRd; -+ def : InstAlias(NAME#"S") tr1.op:$V1, tr2.op:$V2, -+ tr2.op:$V3, tr2.op:$V4, 0)>; -+} -+ - class LoadAndOpRSY opcode, SDPatternOperator operator, - RegisterOperand cls, AddressingMode mode = bdaddr20only> - : InstRSY opcode, - RegisterOperand cls> { - def "" : UnaryRRE; -- let isCodeGenOnly = 1 in -+ let isCodeGenOnly = 1, Predicates = [FeatureNoVector] in - def Compare : CompareRRE; - } - -@@ -1577,6 +2401,26 @@ class Alias -+ : Alias<6, (outs cls1:$src1), (ins cls2:$src2), []>; -+ -+// An alias of a UnaryVRR*, but with different register sizes. -+class UnaryAliasVRR -+ : Alias<6, (outs tr1.op:$V1), (ins tr2.op:$V2), -+ [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V2))))]>; -+ -+// An alias of a UnaryVRX, but with different register sizes. -+class UnaryAliasVRX -+ : Alias<6, (outs tr.op:$V1), (ins mode:$XBD2), -+ [(set tr.op:$V1, (tr.vt (operator mode:$XBD2)))]>; -+ -+// An alias of a StoreVRX, but with different register sizes. -+class StoreAliasVRX -+ : Alias<6, (outs), (ins tr.op:$V1, mode:$XBD2), -+ [(operator (tr.vt tr.op:$V1), mode:$XBD2)]>; -+ - // An alias of a BinaryRI, but with different register sizes. - class BinaryAliasRI -@@ -1593,6 +2437,10 @@ class BinaryAliasRIL -+ : Alias<6, (outs VR128:$V1), (ins cls:$R2, cls:$R3), []>; -+ - // An alias of a CompareRI, but with different register sizes. - class CompareAliasRI -Index: llvm-36/lib/Target/SystemZ/SystemZInstrInfo.cpp -=================================================================== ---- llvm-36.orig/lib/Target/SystemZ/SystemZInstrInfo.cpp -+++ llvm-36/lib/Target/SystemZ/SystemZInstrInfo.cpp -@@ -578,6 +578,12 @@ SystemZInstrInfo::copyPhysReg(MachineBas - Opcode = SystemZ::LDR; - else if (SystemZ::FP128BitRegClass.contains(DestReg, SrcReg)) - Opcode = SystemZ::LXR; -+ else if (SystemZ::VR32BitRegClass.contains(DestReg, SrcReg)) -+ Opcode = SystemZ::VLR32; -+ else if (SystemZ::VR64BitRegClass.contains(DestReg, SrcReg)) -+ Opcode = SystemZ::VLR64; -+ else if (SystemZ::VR128BitRegClass.contains(DestReg, SrcReg)) -+ Opcode = SystemZ::VLR; - else - llvm_unreachable("Impossible reg-to-reg copy"); - -@@ -723,9 +729,12 @@ SystemZInstrInfo::convertToThreeAddress( - unsigned Start, End; - if (isRxSBGMask(Imm, And.RegSize, Start, End)) { - unsigned NewOpcode; -- if (And.RegSize == 64) -+ if (And.RegSize == 64) { - NewOpcode = SystemZ::RISBG; -- else { -+ // Prefer RISBGN if available, since it does not clobber CC. -+ if (STI.hasMiscellaneousExtensions()) -+ NewOpcode = SystemZ::RISBGN; -+ } else { - NewOpcode = SystemZ::RISBMux; - Start &= 31; - End &= 31; -@@ -1114,6 +1123,16 @@ void SystemZInstrInfo::getLoadStoreOpcod - } else if (RC == &SystemZ::FP128BitRegClass) { - LoadOpcode = SystemZ::LX; - StoreOpcode = SystemZ::STX; -+ } else if (RC == &SystemZ::VR32BitRegClass) { -+ LoadOpcode = SystemZ::VL32; -+ StoreOpcode = SystemZ::VST32; -+ } else if (RC == &SystemZ::VR64BitRegClass) { -+ LoadOpcode = SystemZ::VL64; -+ StoreOpcode = SystemZ::VST64; -+ } else if (RC == &SystemZ::VF128BitRegClass || -+ RC == &SystemZ::VR128BitRegClass) { -+ LoadOpcode = SystemZ::VL; -+ StoreOpcode = SystemZ::VST; - } else - llvm_unreachable("Unsupported regclass to load or store"); - } -@@ -1147,17 +1166,22 @@ unsigned SystemZInstrInfo::getOpcodeForO - - unsigned SystemZInstrInfo::getLoadAndTest(unsigned Opcode) const { - switch (Opcode) { -- case SystemZ::L: return SystemZ::LT; -- case SystemZ::LY: return SystemZ::LT; -- case SystemZ::LG: return SystemZ::LTG; -- case SystemZ::LGF: return SystemZ::LTGF; -- case SystemZ::LR: return SystemZ::LTR; -- case SystemZ::LGFR: return SystemZ::LTGFR; -- case SystemZ::LGR: return SystemZ::LTGR; -- case SystemZ::LER: return SystemZ::LTEBR; -- case SystemZ::LDR: return SystemZ::LTDBR; -- case SystemZ::LXR: return SystemZ::LTXBR; -- default: return 0; -+ case SystemZ::L: return SystemZ::LT; -+ case SystemZ::LY: return SystemZ::LT; -+ case SystemZ::LG: return SystemZ::LTG; -+ case SystemZ::LGF: return SystemZ::LTGF; -+ case SystemZ::LR: return SystemZ::LTR; -+ case SystemZ::LGFR: return SystemZ::LTGFR; -+ case SystemZ::LGR: return SystemZ::LTGR; -+ case SystemZ::LER: return SystemZ::LTEBR; -+ case SystemZ::LDR: return SystemZ::LTDBR; -+ case SystemZ::LXR: return SystemZ::LTXBR; -+ // On zEC12 we prefer to use RISBGN. But if there is a chance to -+ // actually use the condition code, we may turn it back into RISGB. -+ // Note that RISBG is not really a "load-and-test" instruction, -+ // but sets the same condition code values, so is OK to use here. -+ case SystemZ::RISBGN: return SystemZ::RISBG; -+ default: return 0; - } - } - -@@ -1178,6 +1202,7 @@ static bool isStringOfOnes(uint64_t Mask - bool SystemZInstrInfo::isRxSBGMask(uint64_t Mask, unsigned BitSize, - unsigned &Start, unsigned &End) const { - // Reject trivial all-zero masks. -+ Mask &= allOnes(BitSize); - if (Mask == 0) - return false; - -Index: llvm-36/lib/Target/SystemZ/SystemZInstrInfo.h -=================================================================== ---- llvm-36.orig/lib/Target/SystemZ/SystemZInstrInfo.h -+++ llvm-36/lib/Target/SystemZ/SystemZInstrInfo.h -@@ -56,10 +56,13 @@ static inline unsigned getCompareZeroCCM - // SystemZ MachineOperand target flags. - enum { - // Masks out the bits for the access model. -- MO_SYMBOL_MODIFIER = (1 << 0), -+ MO_SYMBOL_MODIFIER = (3 << 0), - - // @GOT (aka @GOTENT) -- MO_GOT = (1 << 0) -+ MO_GOT = (1 << 0), -+ -+ // @INDNTPOFF -+ MO_INDNTPOFF = (2 << 0) - }; - // Classifies a branch. - enum BranchType { -Index: llvm-36/lib/Target/SystemZ/SystemZInstrInfo.td -=================================================================== ---- llvm-36.orig/lib/Target/SystemZ/SystemZInstrInfo.td -+++ llvm-36/lib/Target/SystemZ/SystemZInstrInfo.td -@@ -249,11 +249,21 @@ let isCall = 1, isTerminator = 1, isRetu - def CallBR : Alias<2, (outs), (ins), [(z_sibcall R1D)]>; - } - -+// TLS calls. These will be lowered into a call to __tls_get_offset, -+// with an extra relocation specifying the TLS symbol. -+let isCall = 1, Defs = [R14D, CC] in { -+ def TLS_GDCALL : Alias<6, (outs), (ins tlssym:$I2, variable_ops), -+ [(z_tls_gdcall tglobaltlsaddr:$I2)]>; -+ def TLS_LDCALL : Alias<6, (outs), (ins tlssym:$I2, variable_ops), -+ [(z_tls_ldcall tglobaltlsaddr:$I2)]>; -+} -+ - // Define the general form of the call instructions for the asm parser. - // These instructions don't hard-code %r14 as the return address register. --def BRAS : InstRI<0xA75, (outs), (ins GR64:$R1, brtarget16:$I2), -+// Allow an optional TLS marker symbol to generate TLS call relocations. -+def BRAS : InstRI<0xA75, (outs), (ins GR64:$R1, brtarget16tls:$I2), - "bras\t$R1, $I2", []>; --def BRASL : InstRIL<0xC05, (outs), (ins GR64:$R1, brtarget32:$I2), -+def BRASL : InstRIL<0xC05, (outs), (ins GR64:$R1, brtarget32tls:$I2), - "brasl\t$R1, $I2", []>; - def BASR : InstRR<0x0D, (outs), (ins GR64:$R1, ADDR64:$R2), - "basr\t$R1, $R2", []>; -@@ -587,6 +597,12 @@ let hasSideEffects = 0, isAsCheapAsAMove - [(set GR64:$R1, pcrel32:$I2)]>; - } - -+// Load the Global Offset Table address. This will be lowered into a -+// larl $R1, _GLOBAL_OFFSET_TABLE_ -+// instruction. -+def GOT : Alias<6, (outs GR64:$R1), (ins), -+ [(set GR64:$R1, (global_offset_table))]>; -+ - //===----------------------------------------------------------------------===// - // Absolute and Negation - //===----------------------------------------------------------------------===// -@@ -1045,6 +1061,10 @@ let Defs = [CC] in { - def RISBG : RotateSelectRIEf<"risbg", 0xEC55, GR64, GR64>; - } - -+// On zEC12 we have a variant of RISBG that does not set CC. -+let Predicates = [FeatureMiscellaneousExtensions] in -+ def RISBGN : RotateSelectRIEf<"risbgn", 0xEC59, GR64, GR64>; -+ - // Forms of RISBG that only affect one word of the destination register. - // They do not set CC. - let Predicates = [FeatureHighWord] in { -@@ -1342,6 +1362,60 @@ let Defs = [CC] in { - } - - //===----------------------------------------------------------------------===// -+// Transactional execution -+//===----------------------------------------------------------------------===// -+ -+let Predicates = [FeatureTransactionalExecution] in { -+ // Transaction Begin -+ let hasSideEffects = 1, mayStore = 1, -+ usesCustomInserter = 1, Defs = [CC] in { -+ def TBEGIN : InstSIL<0xE560, -+ (outs), (ins bdaddr12only:$BD1, imm32zx16:$I2), -+ "tbegin\t$BD1, $I2", -+ [(z_tbegin bdaddr12only:$BD1, imm32zx16:$I2)]>; -+ def TBEGIN_nofloat : Pseudo<(outs), (ins bdaddr12only:$BD1, imm32zx16:$I2), -+ [(z_tbegin_nofloat bdaddr12only:$BD1, -+ imm32zx16:$I2)]>; -+ def TBEGINC : InstSIL<0xE561, -+ (outs), (ins bdaddr12only:$BD1, imm32zx16:$I2), -+ "tbeginc\t$BD1, $I2", -+ [(int_s390_tbeginc bdaddr12only:$BD1, -+ imm32zx16:$I2)]>; -+ } -+ -+ // Transaction End -+ let hasSideEffects = 1, Defs = [CC], BD2 = 0 in -+ def TEND : InstS<0xB2F8, (outs), (ins), "tend", [(z_tend)]>; -+ -+ // Transaction Abort -+ let hasSideEffects = 1, isTerminator = 1, isBarrier = 1 in -+ def TABORT : InstS<0xB2FC, (outs), (ins bdaddr12only:$BD2), -+ "tabort\t$BD2", -+ [(int_s390_tabort bdaddr12only:$BD2)]>; -+ -+ // Nontransactional Store -+ let hasSideEffects = 1 in -+ def NTSTG : StoreRXY<"ntstg", 0xE325, int_s390_ntstg, GR64, 8>; -+ -+ // Extract Transaction Nesting Depth -+ let hasSideEffects = 1 in -+ def ETND : InherentRRE<"etnd", 0xB2EC, GR32, (int_s390_etnd)>; -+} -+ -+//===----------------------------------------------------------------------===// -+// Processor assist -+//===----------------------------------------------------------------------===// -+ -+let Predicates = [FeatureProcessorAssist] in { -+ let hasSideEffects = 1, R4 = 0 in -+ def PPA : InstRRF<0xB2E8, (outs), (ins GR64:$R1, GR64:$R2, imm32zx4:$R3), -+ "ppa\t$R1, $R2, $R3", []>; -+ def : Pat<(int_s390_ppa_txassist GR32:$src), -+ (PPA (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, subreg_l32), -+ 0, 1)>; -+} -+ -+//===----------------------------------------------------------------------===// - // Miscellaneous Instructions. - //===----------------------------------------------------------------------===// - -@@ -1366,6 +1440,13 @@ let Defs = [CC] in { - def : Pat<(ctlz GR64:$src), - (EXTRACT_SUBREG (FLOGR GR64:$src), subreg_h64)>; - -+// Population count. Counts bits set per byte. -+let Predicates = [FeaturePopulationCount], Defs = [CC] in { -+ def POPCNT : InstRRE<0xB9E1, (outs GR64:$R1), (ins GR64:$R2), -+ "popcnt\t$R1, $R2", -+ [(set GR64:$R1, (z_popcnt GR64:$R2))]>; -+} -+ - // Use subregs to populate the "don't care" bits in a 32-bit to 64-bit anyext. - def : Pat<(i64 (anyext GR32:$src)), - (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, subreg_l32)>; -Index: llvm-36/lib/Target/SystemZ/SystemZInstrVector.td -=================================================================== ---- /dev/null -+++ llvm-36/lib/Target/SystemZ/SystemZInstrVector.td -@@ -0,0 +1,1097 @@ -+//==- SystemZInstrVector.td - SystemZ Vector instructions ------*- tblgen-*-==// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+//===----------------------------------------------------------------------===// -+ -+//===----------------------------------------------------------------------===// -+// Move instructions -+//===----------------------------------------------------------------------===// -+ -+let Predicates = [FeatureVector] in { -+ // Register move. -+ def VLR : UnaryVRRa<"vlr", 0xE756, null_frag, v128any, v128any>; -+ def VLR32 : UnaryAliasVRR; -+ def VLR64 : UnaryAliasVRR; -+ -+ // Load GR from VR element. -+ def VLGVB : BinaryVRSc<"vlgvb", 0xE721, null_frag, v128b, 0>; -+ def VLGVH : BinaryVRSc<"vlgvh", 0xE721, null_frag, v128h, 1>; -+ def VLGVF : BinaryVRSc<"vlgvf", 0xE721, null_frag, v128f, 2>; -+ def VLGVG : BinaryVRSc<"vlgvg", 0xE721, z_vector_extract, v128g, 3>; -+ -+ // Load VR element from GR. -+ def VLVGB : TernaryVRSb<"vlvgb", 0xE722, z_vector_insert, -+ v128b, v128b, GR32, 0>; -+ def VLVGH : TernaryVRSb<"vlvgh", 0xE722, z_vector_insert, -+ v128h, v128h, GR32, 1>; -+ def VLVGF : TernaryVRSb<"vlvgf", 0xE722, z_vector_insert, -+ v128f, v128f, GR32, 2>; -+ def VLVGG : TernaryVRSb<"vlvgg", 0xE722, z_vector_insert, -+ v128g, v128g, GR64, 3>; -+ -+ // Load VR from GRs disjoint. -+ def VLVGP : BinaryVRRf<"vlvgp", 0xE762, z_join_dwords, v128g>; -+ def VLVGP32 : BinaryAliasVRRf; -+} -+ -+// Extractions always assign to the full GR64, even if the element would -+// fit in the lower 32 bits. Sub-i64 extracts therefore need to take a -+// subreg of the result. -+class VectorExtractSubreg -+ : Pat<(i32 (z_vector_extract (type VR128:$vec), shift12only:$index)), -+ (EXTRACT_SUBREG (insn VR128:$vec, shift12only:$index), subreg_l32)>; -+ -+def : VectorExtractSubreg; -+def : VectorExtractSubreg; -+def : VectorExtractSubreg; -+ -+//===----------------------------------------------------------------------===// -+// Immediate instructions -+//===----------------------------------------------------------------------===// -+ -+let Predicates = [FeatureVector] in { -+ // Generate byte mask. -+ def VZERO : InherentVRIa<"vzero", 0xE744, 0>; -+ def VONE : InherentVRIa<"vone", 0xE744, 0xffff>; -+ def VGBM : UnaryVRIa<"vgbm", 0xE744, z_byte_mask, v128b, imm32zx16>; -+ -+ // Generate mask. -+ def VGMB : BinaryVRIb<"vgmb", 0xE746, z_rotate_mask, v128b, 0>; -+ def VGMH : BinaryVRIb<"vgmh", 0xE746, z_rotate_mask, v128h, 1>; -+ def VGMF : BinaryVRIb<"vgmf", 0xE746, z_rotate_mask, v128f, 2>; -+ def VGMG : BinaryVRIb<"vgmg", 0xE746, z_rotate_mask, v128g, 3>; -+ -+ // Load element immediate. -+ // -+ // We want these instructions to be used ahead of VLVG* where possible. -+ // However, VLVG* takes a variable BD-format index whereas VLEI takes -+ // a plain immediate index. This means that VLVG* has an extra "base" -+ // register operand and is 3 units more complex. Bumping the complexity -+ // of the VLEI* instructions by 4 means that they are strictly better -+ // than VLVG* in cases where both forms match. -+ let AddedComplexity = 4 in { -+ def VLEIB : TernaryVRIa<"vleib", 0xE740, z_vector_insert, -+ v128b, v128b, imm32sx16trunc, imm32zx4>; -+ def VLEIH : TernaryVRIa<"vleih", 0xE741, z_vector_insert, -+ v128h, v128h, imm32sx16trunc, imm32zx3>; -+ def VLEIF : TernaryVRIa<"vleif", 0xE743, z_vector_insert, -+ v128f, v128f, imm32sx16, imm32zx2>; -+ def VLEIG : TernaryVRIa<"vleig", 0xE742, z_vector_insert, -+ v128g, v128g, imm64sx16, imm32zx1>; -+ } -+ -+ // Replicate immediate. -+ def VREPIB : UnaryVRIa<"vrepib", 0xE745, z_replicate, v128b, imm32sx16, 0>; -+ def VREPIH : UnaryVRIa<"vrepih", 0xE745, z_replicate, v128h, imm32sx16, 1>; -+ def VREPIF : UnaryVRIa<"vrepif", 0xE745, z_replicate, v128f, imm32sx16, 2>; -+ def VREPIG : UnaryVRIa<"vrepig", 0xE745, z_replicate, v128g, imm32sx16, 3>; -+} -+ -+//===----------------------------------------------------------------------===// -+// Loads -+//===----------------------------------------------------------------------===// -+ -+let Predicates = [FeatureVector] in { -+ // Load. -+ def VL : UnaryVRX<"vl", 0xE706, null_frag, v128any, 16>; -+ -+ // Load to block boundary. The number of loaded bytes is only known -+ // at run time. The instruction is really polymorphic, but v128b matches -+ // the return type of the associated intrinsic. -+ def VLBB : BinaryVRX<"vlbb", 0xE707, int_s390_vlbb, v128b, 0>; -+ -+ // Load count to block boundary. -+ let Defs = [CC] in -+ def LCBB : InstRXE<0xE727, (outs GR32:$R1), -+ (ins bdxaddr12only:$XBD2, imm32zx4:$M3), -+ "lcbb\t$R1, $XBD2, $M3", -+ [(set GR32:$R1, (int_s390_lcbb bdxaddr12only:$XBD2, -+ imm32zx4:$M3))]>; -+ -+ // Load with length. The number of loaded bytes is only known at run time. -+ def VLL : BinaryVRSb<"vll", 0xE737, int_s390_vll, 0>; -+ -+ // Load multiple. -+ def VLM : LoadMultipleVRSa<"vlm", 0xE736>; -+ -+ // Load and replicate -+ def VLREPB : UnaryVRX<"vlrepb", 0xE705, z_replicate_loadi8, v128b, 1, 0>; -+ def VLREPH : UnaryVRX<"vlreph", 0xE705, z_replicate_loadi16, v128h, 2, 1>; -+ def VLREPF : UnaryVRX<"vlrepf", 0xE705, z_replicate_loadi32, v128f, 4, 2>; -+ def VLREPG : UnaryVRX<"vlrepg", 0xE705, z_replicate_loadi64, v128g, 8, 3>; -+ def : Pat<(v4f32 (z_replicate_loadf32 bdxaddr12only:$addr)), -+ (VLREPF bdxaddr12only:$addr)>; -+ def : Pat<(v2f64 (z_replicate_loadf64 bdxaddr12only:$addr)), -+ (VLREPG bdxaddr12only:$addr)>; -+ -+ // Use VLREP to load subvectors. These patterns use "12pair" because -+ // LEY and LDY offer full 20-bit displacement fields. It's often better -+ // to use those instructions rather than force a 20-bit displacement -+ // into a GPR temporary. -+ def VL32 : UnaryAliasVRX; -+ def VL64 : UnaryAliasVRX; -+ -+ // Load logical element and zero. -+ def VLLEZB : UnaryVRX<"vllezb", 0xE704, z_vllezi8, v128b, 1, 0>; -+ def VLLEZH : UnaryVRX<"vllezh", 0xE704, z_vllezi16, v128h, 2, 1>; -+ def VLLEZF : UnaryVRX<"vllezf", 0xE704, z_vllezi32, v128f, 4, 2>; -+ def VLLEZG : UnaryVRX<"vllezg", 0xE704, z_vllezi64, v128g, 8, 3>; -+ def : Pat<(v4f32 (z_vllezf32 bdxaddr12only:$addr)), -+ (VLLEZF bdxaddr12only:$addr)>; -+ def : Pat<(v2f64 (z_vllezf64 bdxaddr12only:$addr)), -+ (VLLEZG bdxaddr12only:$addr)>; -+ -+ // Load element. -+ def VLEB : TernaryVRX<"vleb", 0xE700, z_vlei8, v128b, v128b, 1, imm32zx4>; -+ def VLEH : TernaryVRX<"vleh", 0xE701, z_vlei16, v128h, v128h, 2, imm32zx3>; -+ def VLEF : TernaryVRX<"vlef", 0xE703, z_vlei32, v128f, v128f, 4, imm32zx2>; -+ def VLEG : TernaryVRX<"vleg", 0xE702, z_vlei64, v128g, v128g, 8, imm32zx1>; -+ def : Pat<(z_vlef32 (v4f32 VR128:$val), bdxaddr12only:$addr, imm32zx2:$index), -+ (VLEF VR128:$val, bdxaddr12only:$addr, imm32zx2:$index)>; -+ def : Pat<(z_vlef64 (v2f64 VR128:$val), bdxaddr12only:$addr, imm32zx1:$index), -+ (VLEG VR128:$val, bdxaddr12only:$addr, imm32zx1:$index)>; -+ -+ // Gather element. -+ def VGEF : TernaryVRV<"vgef", 0xE713, 4, imm32zx2>; -+ def VGEG : TernaryVRV<"vgeg", 0xE712, 8, imm32zx1>; -+} -+ -+// Use replicating loads if we're inserting a single element into an -+// undefined vector. This avoids a false dependency on the previous -+// register contents. -+multiclass ReplicatePeephole { -+ def : Pat<(vectype (z_vector_insert -+ (undef), (scalartype (load bdxaddr12only:$addr)), 0)), -+ (vlrep bdxaddr12only:$addr)>; -+ def : Pat<(vectype (scalar_to_vector -+ (scalartype (load bdxaddr12only:$addr)))), -+ (vlrep bdxaddr12only:$addr)>; -+} -+defm : ReplicatePeephole; -+defm : ReplicatePeephole; -+defm : ReplicatePeephole; -+defm : ReplicatePeephole; -+defm : ReplicatePeephole; -+defm : ReplicatePeephole; -+ -+//===----------------------------------------------------------------------===// -+// Stores -+//===----------------------------------------------------------------------===// -+ -+let Predicates = [FeatureVector] in { -+ // Store. -+ def VST : StoreVRX<"vst", 0xE70E, null_frag, v128any, 16>; -+ -+ // Store with length. The number of stored bytes is only known at run time. -+ def VSTL : StoreLengthVRSb<"vstl", 0xE73F, int_s390_vstl, 0>; -+ -+ // Store multiple. -+ def VSTM : StoreMultipleVRSa<"vstm", 0xE73E>; -+ -+ // Store element. -+ def VSTEB : StoreBinaryVRX<"vsteb", 0xE708, z_vstei8, v128b, 1, imm32zx4>; -+ def VSTEH : StoreBinaryVRX<"vsteh", 0xE709, z_vstei16, v128h, 2, imm32zx3>; -+ def VSTEF : StoreBinaryVRX<"vstef", 0xE70B, z_vstei32, v128f, 4, imm32zx2>; -+ def VSTEG : StoreBinaryVRX<"vsteg", 0xE70A, z_vstei64, v128g, 8, imm32zx1>; -+ def : Pat<(z_vstef32 (v4f32 VR128:$val), bdxaddr12only:$addr, -+ imm32zx2:$index), -+ (VSTEF VR128:$val, bdxaddr12only:$addr, imm32zx2:$index)>; -+ def : Pat<(z_vstef64 (v2f64 VR128:$val), bdxaddr12only:$addr, -+ imm32zx1:$index), -+ (VSTEG VR128:$val, bdxaddr12only:$addr, imm32zx1:$index)>; -+ -+ // Use VSTE to store subvectors. These patterns use "12pair" because -+ // STEY and STDY offer full 20-bit displacement fields. It's often better -+ // to use those instructions rather than force a 20-bit displacement -+ // into a GPR temporary. -+ def VST32 : StoreAliasVRX; -+ def VST64 : StoreAliasVRX; -+ -+ // Scatter element. -+ def VSCEF : StoreBinaryVRV<"vscef", 0xE71B, 4, imm32zx2>; -+ def VSCEG : StoreBinaryVRV<"vsceg", 0xE71A, 8, imm32zx1>; -+} -+ -+//===----------------------------------------------------------------------===// -+// Selects and permutes -+//===----------------------------------------------------------------------===// -+ -+let Predicates = [FeatureVector] in { -+ // Merge high. -+ def VMRHB : BinaryVRRc<"vmrhb", 0xE761, z_merge_high, v128b, v128b, 0>; -+ def VMRHH : BinaryVRRc<"vmrhh", 0xE761, z_merge_high, v128h, v128h, 1>; -+ def VMRHF : BinaryVRRc<"vmrhf", 0xE761, z_merge_high, v128f, v128f, 2>; -+ def VMRHG : BinaryVRRc<"vmrhg", 0xE761, z_merge_high, v128g, v128g, 3>; -+ def : BinaryRRWithType; -+ def : BinaryRRWithType; -+ -+ // Merge low. -+ def VMRLB : BinaryVRRc<"vmrlb", 0xE760, z_merge_low, v128b, v128b, 0>; -+ def VMRLH : BinaryVRRc<"vmrlh", 0xE760, z_merge_low, v128h, v128h, 1>; -+ def VMRLF : BinaryVRRc<"vmrlf", 0xE760, z_merge_low, v128f, v128f, 2>; -+ def VMRLG : BinaryVRRc<"vmrlg", 0xE760, z_merge_low, v128g, v128g, 3>; -+ def : BinaryRRWithType; -+ def : BinaryRRWithType; -+ -+ // Permute. -+ def VPERM : TernaryVRRe<"vperm", 0xE78C, z_permute, v128b, v128b>; -+ -+ // Permute doubleword immediate. -+ def VPDI : TernaryVRRc<"vpdi", 0xE784, z_permute_dwords, v128g, v128g>; -+ -+ // Replicate. -+ def VREPB : BinaryVRIc<"vrepb", 0xE74D, z_splat, v128b, v128b, 0>; -+ def VREPH : BinaryVRIc<"vreph", 0xE74D, z_splat, v128h, v128h, 1>; -+ def VREPF : BinaryVRIc<"vrepf", 0xE74D, z_splat, v128f, v128f, 2>; -+ def VREPG : BinaryVRIc<"vrepg", 0xE74D, z_splat, v128g, v128g, 3>; -+ def : Pat<(v4f32 (z_splat VR128:$vec, imm32zx16:$index)), -+ (VREPF VR128:$vec, imm32zx16:$index)>; -+ def : Pat<(v2f64 (z_splat VR128:$vec, imm32zx16:$index)), -+ (VREPG VR128:$vec, imm32zx16:$index)>; -+ -+ // Select. -+ def VSEL : TernaryVRRe<"vsel", 0xE78D, null_frag, v128any, v128any>; -+} -+ -+//===----------------------------------------------------------------------===// -+// Widening and narrowing -+//===----------------------------------------------------------------------===// -+ -+let Predicates = [FeatureVector] in { -+ // Pack -+ def VPKH : BinaryVRRc<"vpkh", 0xE794, z_pack, v128b, v128h, 1>; -+ def VPKF : BinaryVRRc<"vpkf", 0xE794, z_pack, v128h, v128f, 2>; -+ def VPKG : BinaryVRRc<"vpkg", 0xE794, z_pack, v128f, v128g, 3>; -+ -+ // Pack saturate. -+ defm VPKSH : BinaryVRRbSPair<"vpksh", 0xE797, int_s390_vpksh, z_packs_cc, -+ v128b, v128h, 1>; -+ defm VPKSF : BinaryVRRbSPair<"vpksf", 0xE797, int_s390_vpksf, z_packs_cc, -+ v128h, v128f, 2>; -+ defm VPKSG : BinaryVRRbSPair<"vpksg", 0xE797, int_s390_vpksg, z_packs_cc, -+ v128f, v128g, 3>; -+ -+ // Pack saturate logical. -+ defm VPKLSH : BinaryVRRbSPair<"vpklsh", 0xE795, int_s390_vpklsh, z_packls_cc, -+ v128b, v128h, 1>; -+ defm VPKLSF : BinaryVRRbSPair<"vpklsf", 0xE795, int_s390_vpklsf, z_packls_cc, -+ v128h, v128f, 2>; -+ defm VPKLSG : BinaryVRRbSPair<"vpklsg", 0xE795, int_s390_vpklsg, z_packls_cc, -+ v128f, v128g, 3>; -+ -+ // Sign-extend to doubleword. -+ def VSEGB : UnaryVRRa<"vsegb", 0xE75F, z_vsei8, v128g, v128g, 0>; -+ def VSEGH : UnaryVRRa<"vsegh", 0xE75F, z_vsei16, v128g, v128g, 1>; -+ def VSEGF : UnaryVRRa<"vsegf", 0xE75F, z_vsei32, v128g, v128g, 2>; -+ def : Pat<(z_vsei8_by_parts (v16i8 VR128:$src)), (VSEGB VR128:$src)>; -+ def : Pat<(z_vsei16_by_parts (v8i16 VR128:$src)), (VSEGH VR128:$src)>; -+ def : Pat<(z_vsei32_by_parts (v4i32 VR128:$src)), (VSEGF VR128:$src)>; -+ -+ // Unpack high. -+ def VUPHB : UnaryVRRa<"vuphb", 0xE7D7, z_unpack_high, v128h, v128b, 0>; -+ def VUPHH : UnaryVRRa<"vuphh", 0xE7D7, z_unpack_high, v128f, v128h, 1>; -+ def VUPHF : UnaryVRRa<"vuphf", 0xE7D7, z_unpack_high, v128g, v128f, 2>; -+ -+ // Unpack logical high. -+ def VUPLHB : UnaryVRRa<"vuplhb", 0xE7D5, z_unpackl_high, v128h, v128b, 0>; -+ def VUPLHH : UnaryVRRa<"vuplhh", 0xE7D5, z_unpackl_high, v128f, v128h, 1>; -+ def VUPLHF : UnaryVRRa<"vuplhf", 0xE7D5, z_unpackl_high, v128g, v128f, 2>; -+ -+ // Unpack low. -+ def VUPLB : UnaryVRRa<"vuplb", 0xE7D6, z_unpack_low, v128h, v128b, 0>; -+ def VUPLHW : UnaryVRRa<"vuplhw", 0xE7D6, z_unpack_low, v128f, v128h, 1>; -+ def VUPLF : UnaryVRRa<"vuplf", 0xE7D6, z_unpack_low, v128g, v128f, 2>; -+ -+ // Unpack logical low. -+ def VUPLLB : UnaryVRRa<"vupllb", 0xE7D4, z_unpackl_low, v128h, v128b, 0>; -+ def VUPLLH : UnaryVRRa<"vupllh", 0xE7D4, z_unpackl_low, v128f, v128h, 1>; -+ def VUPLLF : UnaryVRRa<"vupllf", 0xE7D4, z_unpackl_low, v128g, v128f, 2>; -+} -+ -+//===----------------------------------------------------------------------===// -+// Instantiating generic operations for specific types. -+//===----------------------------------------------------------------------===// -+ -+multiclass GenericVectorOps { -+ let Predicates = [FeatureVector] in { -+ def : Pat<(type (load bdxaddr12only:$addr)), -+ (VL bdxaddr12only:$addr)>; -+ def : Pat<(store (type VR128:$src), bdxaddr12only:$addr), -+ (VST VR128:$src, bdxaddr12only:$addr)>; -+ def : Pat<(type (vselect (inttype VR128:$x), VR128:$y, VR128:$z)), -+ (VSEL VR128:$y, VR128:$z, VR128:$x)>; -+ def : Pat<(type (vselect (inttype (z_vnot VR128:$x)), VR128:$y, VR128:$z)), -+ (VSEL VR128:$z, VR128:$y, VR128:$x)>; -+ } -+} -+ -+defm : GenericVectorOps; -+defm : GenericVectorOps; -+defm : GenericVectorOps; -+defm : GenericVectorOps; -+defm : GenericVectorOps; -+defm : GenericVectorOps; -+ -+//===----------------------------------------------------------------------===// -+// Integer arithmetic -+//===----------------------------------------------------------------------===// -+ -+let Predicates = [FeatureVector] in { -+ // Add. -+ def VAB : BinaryVRRc<"vab", 0xE7F3, add, v128b, v128b, 0>; -+ def VAH : BinaryVRRc<"vah", 0xE7F3, add, v128h, v128h, 1>; -+ def VAF : BinaryVRRc<"vaf", 0xE7F3, add, v128f, v128f, 2>; -+ def VAG : BinaryVRRc<"vag", 0xE7F3, add, v128g, v128g, 3>; -+ def VAQ : BinaryVRRc<"vaq", 0xE7F3, int_s390_vaq, v128q, v128q, 4>; -+ -+ // Add compute carry. -+ def VACCB : BinaryVRRc<"vaccb", 0xE7F1, int_s390_vaccb, v128b, v128b, 0>; -+ def VACCH : BinaryVRRc<"vacch", 0xE7F1, int_s390_vacch, v128h, v128h, 1>; -+ def VACCF : BinaryVRRc<"vaccf", 0xE7F1, int_s390_vaccf, v128f, v128f, 2>; -+ def VACCG : BinaryVRRc<"vaccg", 0xE7F1, int_s390_vaccg, v128g, v128g, 3>; -+ def VACCQ : BinaryVRRc<"vaccq", 0xE7F1, int_s390_vaccq, v128q, v128q, 4>; -+ -+ // Add with carry. -+ def VACQ : TernaryVRRd<"vacq", 0xE7BB, int_s390_vacq, v128q, v128q, 4>; -+ -+ // Add with carry compute carry. -+ def VACCCQ : TernaryVRRd<"vacccq", 0xE7B9, int_s390_vacccq, v128q, v128q, 4>; -+ -+ // And. -+ def VN : BinaryVRRc<"vn", 0xE768, null_frag, v128any, v128any>; -+ -+ // And with complement. -+ def VNC : BinaryVRRc<"vnc", 0xE769, null_frag, v128any, v128any>; -+ -+ // Average. -+ def VAVGB : BinaryVRRc<"vavgb", 0xE7F2, int_s390_vavgb, v128b, v128b, 0>; -+ def VAVGH : BinaryVRRc<"vavgh", 0xE7F2, int_s390_vavgh, v128h, v128h, 1>; -+ def VAVGF : BinaryVRRc<"vavgf", 0xE7F2, int_s390_vavgf, v128f, v128f, 2>; -+ def VAVGG : BinaryVRRc<"vavgg", 0xE7F2, int_s390_vavgg, v128g, v128g, 3>; -+ -+ // Average logical. -+ def VAVGLB : BinaryVRRc<"vavglb", 0xE7F0, int_s390_vavglb, v128b, v128b, 0>; -+ def VAVGLH : BinaryVRRc<"vavglh", 0xE7F0, int_s390_vavglh, v128h, v128h, 1>; -+ def VAVGLF : BinaryVRRc<"vavglf", 0xE7F0, int_s390_vavglf, v128f, v128f, 2>; -+ def VAVGLG : BinaryVRRc<"vavglg", 0xE7F0, int_s390_vavglg, v128g, v128g, 3>; -+ -+ // Checksum. -+ def VCKSM : BinaryVRRc<"vcksm", 0xE766, int_s390_vcksm, v128f, v128f>; -+ -+ // Count leading zeros. -+ def VCLZB : UnaryVRRa<"vclzb", 0xE753, ctlz, v128b, v128b, 0>; -+ def VCLZH : UnaryVRRa<"vclzh", 0xE753, ctlz, v128h, v128h, 1>; -+ def VCLZF : UnaryVRRa<"vclzf", 0xE753, ctlz, v128f, v128f, 2>; -+ def VCLZG : UnaryVRRa<"vclzg", 0xE753, ctlz, v128g, v128g, 3>; -+ -+ // Count trailing zeros. -+ def VCTZB : UnaryVRRa<"vctzb", 0xE752, cttz, v128b, v128b, 0>; -+ def VCTZH : UnaryVRRa<"vctzh", 0xE752, cttz, v128h, v128h, 1>; -+ def VCTZF : UnaryVRRa<"vctzf", 0xE752, cttz, v128f, v128f, 2>; -+ def VCTZG : UnaryVRRa<"vctzg", 0xE752, cttz, v128g, v128g, 3>; -+ -+ // Exclusive or. -+ def VX : BinaryVRRc<"vx", 0xE76D, null_frag, v128any, v128any>; -+ -+ // Galois field multiply sum. -+ def VGFMB : BinaryVRRc<"vgfmb", 0xE7B4, int_s390_vgfmb, v128h, v128b, 0>; -+ def VGFMH : BinaryVRRc<"vgfmh", 0xE7B4, int_s390_vgfmh, v128f, v128h, 1>; -+ def VGFMF : BinaryVRRc<"vgfmf", 0xE7B4, int_s390_vgfmf, v128g, v128f, 2>; -+ def VGFMG : BinaryVRRc<"vgfmg", 0xE7B4, int_s390_vgfmg, v128q, v128g, 3>; -+ -+ // Galois field multiply sum and accumulate. -+ def VGFMAB : TernaryVRRd<"vgfmab", 0xE7BC, int_s390_vgfmab, v128h, v128b, 0>; -+ def VGFMAH : TernaryVRRd<"vgfmah", 0xE7BC, int_s390_vgfmah, v128f, v128h, 1>; -+ def VGFMAF : TernaryVRRd<"vgfmaf", 0xE7BC, int_s390_vgfmaf, v128g, v128f, 2>; -+ def VGFMAG : TernaryVRRd<"vgfmag", 0xE7BC, int_s390_vgfmag, v128q, v128g, 3>; -+ -+ // Load complement. -+ def VLCB : UnaryVRRa<"vlcb", 0xE7DE, z_vneg, v128b, v128b, 0>; -+ def VLCH : UnaryVRRa<"vlch", 0xE7DE, z_vneg, v128h, v128h, 1>; -+ def VLCF : UnaryVRRa<"vlcf", 0xE7DE, z_vneg, v128f, v128f, 2>; -+ def VLCG : UnaryVRRa<"vlcg", 0xE7DE, z_vneg, v128g, v128g, 3>; -+ -+ // Load positive. -+ def VLPB : UnaryVRRa<"vlpb", 0xE7DF, z_viabs8, v128b, v128b, 0>; -+ def VLPH : UnaryVRRa<"vlph", 0xE7DF, z_viabs16, v128h, v128h, 1>; -+ def VLPF : UnaryVRRa<"vlpf", 0xE7DF, z_viabs32, v128f, v128f, 2>; -+ def VLPG : UnaryVRRa<"vlpg", 0xE7DF, z_viabs64, v128g, v128g, 3>; -+ -+ // Maximum. -+ def VMXB : BinaryVRRc<"vmxb", 0xE7FF, null_frag, v128b, v128b, 0>; -+ def VMXH : BinaryVRRc<"vmxh", 0xE7FF, null_frag, v128h, v128h, 1>; -+ def VMXF : BinaryVRRc<"vmxf", 0xE7FF, null_frag, v128f, v128f, 2>; -+ def VMXG : BinaryVRRc<"vmxg", 0xE7FF, null_frag, v128g, v128g, 3>; -+ -+ // Maximum logical. -+ def VMXLB : BinaryVRRc<"vmxlb", 0xE7FD, null_frag, v128b, v128b, 0>; -+ def VMXLH : BinaryVRRc<"vmxlh", 0xE7FD, null_frag, v128h, v128h, 1>; -+ def VMXLF : BinaryVRRc<"vmxlf", 0xE7FD, null_frag, v128f, v128f, 2>; -+ def VMXLG : BinaryVRRc<"vmxlg", 0xE7FD, null_frag, v128g, v128g, 3>; -+ -+ // Minimum. -+ def VMNB : BinaryVRRc<"vmnb", 0xE7FE, null_frag, v128b, v128b, 0>; -+ def VMNH : BinaryVRRc<"vmnh", 0xE7FE, null_frag, v128h, v128h, 1>; -+ def VMNF : BinaryVRRc<"vmnf", 0xE7FE, null_frag, v128f, v128f, 2>; -+ def VMNG : BinaryVRRc<"vmng", 0xE7FE, null_frag, v128g, v128g, 3>; -+ -+ // Minimum logical. -+ def VMNLB : BinaryVRRc<"vmnlb", 0xE7FC, null_frag, v128b, v128b, 0>; -+ def VMNLH : BinaryVRRc<"vmnlh", 0xE7FC, null_frag, v128h, v128h, 1>; -+ def VMNLF : BinaryVRRc<"vmnlf", 0xE7FC, null_frag, v128f, v128f, 2>; -+ def VMNLG : BinaryVRRc<"vmnlg", 0xE7FC, null_frag, v128g, v128g, 3>; -+ -+ // Multiply and add low. -+ def VMALB : TernaryVRRd<"vmalb", 0xE7AA, z_muladd, v128b, v128b, 0>; -+ def VMALHW : TernaryVRRd<"vmalhw", 0xE7AA, z_muladd, v128h, v128h, 1>; -+ def VMALF : TernaryVRRd<"vmalf", 0xE7AA, z_muladd, v128f, v128f, 2>; -+ -+ // Multiply and add high. -+ def VMAHB : TernaryVRRd<"vmahb", 0xE7AB, int_s390_vmahb, v128b, v128b, 0>; -+ def VMAHH : TernaryVRRd<"vmahh", 0xE7AB, int_s390_vmahh, v128h, v128h, 1>; -+ def VMAHF : TernaryVRRd<"vmahf", 0xE7AB, int_s390_vmahf, v128f, v128f, 2>; -+ -+ // Multiply and add logical high. -+ def VMALHB : TernaryVRRd<"vmalhb", 0xE7A9, int_s390_vmalhb, v128b, v128b, 0>; -+ def VMALHH : TernaryVRRd<"vmalhh", 0xE7A9, int_s390_vmalhh, v128h, v128h, 1>; -+ def VMALHF : TernaryVRRd<"vmalhf", 0xE7A9, int_s390_vmalhf, v128f, v128f, 2>; -+ -+ // Multiply and add even. -+ def VMAEB : TernaryVRRd<"vmaeb", 0xE7AE, int_s390_vmaeb, v128h, v128b, 0>; -+ def VMAEH : TernaryVRRd<"vmaeh", 0xE7AE, int_s390_vmaeh, v128f, v128h, 1>; -+ def VMAEF : TernaryVRRd<"vmaef", 0xE7AE, int_s390_vmaef, v128g, v128f, 2>; -+ -+ // Multiply and add logical even. -+ def VMALEB : TernaryVRRd<"vmaleb", 0xE7AC, int_s390_vmaleb, v128h, v128b, 0>; -+ def VMALEH : TernaryVRRd<"vmaleh", 0xE7AC, int_s390_vmaleh, v128f, v128h, 1>; -+ def VMALEF : TernaryVRRd<"vmalef", 0xE7AC, int_s390_vmalef, v128g, v128f, 2>; -+ -+ // Multiply and add odd. -+ def VMAOB : TernaryVRRd<"vmaob", 0xE7AF, int_s390_vmaob, v128h, v128b, 0>; -+ def VMAOH : TernaryVRRd<"vmaoh", 0xE7AF, int_s390_vmaoh, v128f, v128h, 1>; -+ def VMAOF : TernaryVRRd<"vmaof", 0xE7AF, int_s390_vmaof, v128g, v128f, 2>; -+ -+ // Multiply and add logical odd. -+ def VMALOB : TernaryVRRd<"vmalob", 0xE7AD, int_s390_vmalob, v128h, v128b, 0>; -+ def VMALOH : TernaryVRRd<"vmaloh", 0xE7AD, int_s390_vmaloh, v128f, v128h, 1>; -+ def VMALOF : TernaryVRRd<"vmalof", 0xE7AD, int_s390_vmalof, v128g, v128f, 2>; -+ -+ // Multiply high. -+ def VMHB : BinaryVRRc<"vmhb", 0xE7A3, int_s390_vmhb, v128b, v128b, 0>; -+ def VMHH : BinaryVRRc<"vmhh", 0xE7A3, int_s390_vmhh, v128h, v128h, 1>; -+ def VMHF : BinaryVRRc<"vmhf", 0xE7A3, int_s390_vmhf, v128f, v128f, 2>; -+ -+ // Multiply logical high. -+ def VMLHB : BinaryVRRc<"vmlhb", 0xE7A1, int_s390_vmlhb, v128b, v128b, 0>; -+ def VMLHH : BinaryVRRc<"vmlhh", 0xE7A1, int_s390_vmlhh, v128h, v128h, 1>; -+ def VMLHF : BinaryVRRc<"vmlhf", 0xE7A1, int_s390_vmlhf, v128f, v128f, 2>; -+ -+ // Multiply low. -+ def VMLB : BinaryVRRc<"vmlb", 0xE7A2, mul, v128b, v128b, 0>; -+ def VMLHW : BinaryVRRc<"vmlhw", 0xE7A2, mul, v128h, v128h, 1>; -+ def VMLF : BinaryVRRc<"vmlf", 0xE7A2, mul, v128f, v128f, 2>; -+ -+ // Multiply even. -+ def VMEB : BinaryVRRc<"vmeb", 0xE7A6, int_s390_vmeb, v128h, v128b, 0>; -+ def VMEH : BinaryVRRc<"vmeh", 0xE7A6, int_s390_vmeh, v128f, v128h, 1>; -+ def VMEF : BinaryVRRc<"vmef", 0xE7A6, int_s390_vmef, v128g, v128f, 2>; -+ -+ // Multiply logical even. -+ def VMLEB : BinaryVRRc<"vmleb", 0xE7A4, int_s390_vmleb, v128h, v128b, 0>; -+ def VMLEH : BinaryVRRc<"vmleh", 0xE7A4, int_s390_vmleh, v128f, v128h, 1>; -+ def VMLEF : BinaryVRRc<"vmlef", 0xE7A4, int_s390_vmlef, v128g, v128f, 2>; -+ -+ // Multiply odd. -+ def VMOB : BinaryVRRc<"vmob", 0xE7A7, int_s390_vmob, v128h, v128b, 0>; -+ def VMOH : BinaryVRRc<"vmoh", 0xE7A7, int_s390_vmoh, v128f, v128h, 1>; -+ def VMOF : BinaryVRRc<"vmof", 0xE7A7, int_s390_vmof, v128g, v128f, 2>; -+ -+ // Multiply logical odd. -+ def VMLOB : BinaryVRRc<"vmlob", 0xE7A5, int_s390_vmlob, v128h, v128b, 0>; -+ def VMLOH : BinaryVRRc<"vmloh", 0xE7A5, int_s390_vmloh, v128f, v128h, 1>; -+ def VMLOF : BinaryVRRc<"vmlof", 0xE7A5, int_s390_vmlof, v128g, v128f, 2>; -+ -+ // Nor. -+ def VNO : BinaryVRRc<"vno", 0xE76B, null_frag, v128any, v128any>; -+ -+ // Or. -+ def VO : BinaryVRRc<"vo", 0xE76A, null_frag, v128any, v128any>; -+ -+ // Population count. -+ def VPOPCT : BinaryVRRa<"vpopct", 0xE750>; -+ def : Pat<(v16i8 (z_popcnt VR128:$x)), (VPOPCT VR128:$x, 0)>; -+ -+ // Element rotate left logical (with vector shift amount). -+ def VERLLVB : BinaryVRRc<"verllvb", 0xE773, int_s390_verllvb, -+ v128b, v128b, 0>; -+ def VERLLVH : BinaryVRRc<"verllvh", 0xE773, int_s390_verllvh, -+ v128h, v128h, 1>; -+ def VERLLVF : BinaryVRRc<"verllvf", 0xE773, int_s390_verllvf, -+ v128f, v128f, 2>; -+ def VERLLVG : BinaryVRRc<"verllvg", 0xE773, int_s390_verllvg, -+ v128g, v128g, 3>; -+ -+ // Element rotate left logical (with scalar shift amount). -+ def VERLLB : BinaryVRSa<"verllb", 0xE733, int_s390_verllb, v128b, v128b, 0>; -+ def VERLLH : BinaryVRSa<"verllh", 0xE733, int_s390_verllh, v128h, v128h, 1>; -+ def VERLLF : BinaryVRSa<"verllf", 0xE733, int_s390_verllf, v128f, v128f, 2>; -+ def VERLLG : BinaryVRSa<"verllg", 0xE733, int_s390_verllg, v128g, v128g, 3>; -+ -+ // Element rotate and insert under mask. -+ def VERIMB : QuaternaryVRId<"verimb", 0xE772, int_s390_verimb, v128b, v128b, 0>; -+ def VERIMH : QuaternaryVRId<"verimh", 0xE772, int_s390_verimh, v128h, v128h, 1>; -+ def VERIMF : QuaternaryVRId<"verimf", 0xE772, int_s390_verimf, v128f, v128f, 2>; -+ def VERIMG : QuaternaryVRId<"verimg", 0xE772, int_s390_verimg, v128g, v128g, 3>; -+ -+ // Element shift left (with vector shift amount). -+ def VESLVB : BinaryVRRc<"veslvb", 0xE770, z_vshl, v128b, v128b, 0>; -+ def VESLVH : BinaryVRRc<"veslvh", 0xE770, z_vshl, v128h, v128h, 1>; -+ def VESLVF : BinaryVRRc<"veslvf", 0xE770, z_vshl, v128f, v128f, 2>; -+ def VESLVG : BinaryVRRc<"veslvg", 0xE770, z_vshl, v128g, v128g, 3>; -+ -+ // Element shift left (with scalar shift amount). -+ def VESLB : BinaryVRSa<"veslb", 0xE730, z_vshl_by_scalar, v128b, v128b, 0>; -+ def VESLH : BinaryVRSa<"veslh", 0xE730, z_vshl_by_scalar, v128h, v128h, 1>; -+ def VESLF : BinaryVRSa<"veslf", 0xE730, z_vshl_by_scalar, v128f, v128f, 2>; -+ def VESLG : BinaryVRSa<"veslg", 0xE730, z_vshl_by_scalar, v128g, v128g, 3>; -+ -+ // Element shift right arithmetic (with vector shift amount). -+ def VESRAVB : BinaryVRRc<"vesravb", 0xE77A, z_vsra, v128b, v128b, 0>; -+ def VESRAVH : BinaryVRRc<"vesravh", 0xE77A, z_vsra, v128h, v128h, 1>; -+ def VESRAVF : BinaryVRRc<"vesravf", 0xE77A, z_vsra, v128f, v128f, 2>; -+ def VESRAVG : BinaryVRRc<"vesravg", 0xE77A, z_vsra, v128g, v128g, 3>; -+ -+ // Element shift right arithmetic (with scalar shift amount). -+ def VESRAB : BinaryVRSa<"vesrab", 0xE73A, z_vsra_by_scalar, v128b, v128b, 0>; -+ def VESRAH : BinaryVRSa<"vesrah", 0xE73A, z_vsra_by_scalar, v128h, v128h, 1>; -+ def VESRAF : BinaryVRSa<"vesraf", 0xE73A, z_vsra_by_scalar, v128f, v128f, 2>; -+ def VESRAG : BinaryVRSa<"vesrag", 0xE73A, z_vsra_by_scalar, v128g, v128g, 3>; -+ -+ // Element shift right logical (with vector shift amount). -+ def VESRLVB : BinaryVRRc<"vesrlvb", 0xE778, z_vsrl, v128b, v128b, 0>; -+ def VESRLVH : BinaryVRRc<"vesrlvh", 0xE778, z_vsrl, v128h, v128h, 1>; -+ def VESRLVF : BinaryVRRc<"vesrlvf", 0xE778, z_vsrl, v128f, v128f, 2>; -+ def VESRLVG : BinaryVRRc<"vesrlvg", 0xE778, z_vsrl, v128g, v128g, 3>; -+ -+ // Element shift right logical (with scalar shift amount). -+ def VESRLB : BinaryVRSa<"vesrlb", 0xE738, z_vsrl_by_scalar, v128b, v128b, 0>; -+ def VESRLH : BinaryVRSa<"vesrlh", 0xE738, z_vsrl_by_scalar, v128h, v128h, 1>; -+ def VESRLF : BinaryVRSa<"vesrlf", 0xE738, z_vsrl_by_scalar, v128f, v128f, 2>; -+ def VESRLG : BinaryVRSa<"vesrlg", 0xE738, z_vsrl_by_scalar, v128g, v128g, 3>; -+ -+ // Shift left. -+ def VSL : BinaryVRRc<"vsl", 0xE774, int_s390_vsl, v128b, v128b>; -+ -+ // Shift left by byte. -+ def VSLB : BinaryVRRc<"vslb", 0xE775, int_s390_vslb, v128b, v128b>; -+ -+ // Shift left double by byte. -+ def VSLDB : TernaryVRId<"vsldb", 0xE777, z_shl_double, v128b, v128b, 0>; -+ def : Pat<(int_s390_vsldb VR128:$x, VR128:$y, imm32zx8:$z), -+ (VSLDB VR128:$x, VR128:$y, imm32zx8:$z)>; -+ -+ // Shift right arithmetic. -+ def VSRA : BinaryVRRc<"vsra", 0xE77E, int_s390_vsra, v128b, v128b>; -+ -+ // Shift right arithmetic by byte. -+ def VSRAB : BinaryVRRc<"vsrab", 0xE77F, int_s390_vsrab, v128b, v128b>; -+ -+ // Shift right logical. -+ def VSRL : BinaryVRRc<"vsrl", 0xE77C, int_s390_vsrl, v128b, v128b>; -+ -+ // Shift right logical by byte. -+ def VSRLB : BinaryVRRc<"vsrlb", 0xE77D, int_s390_vsrlb, v128b, v128b>; -+ -+ // Subtract. -+ def VSB : BinaryVRRc<"vsb", 0xE7F7, sub, v128b, v128b, 0>; -+ def VSH : BinaryVRRc<"vsh", 0xE7F7, sub, v128h, v128h, 1>; -+ def VSF : BinaryVRRc<"vsf", 0xE7F7, sub, v128f, v128f, 2>; -+ def VSG : BinaryVRRc<"vsg", 0xE7F7, sub, v128g, v128g, 3>; -+ def VSQ : BinaryVRRc<"vsq", 0xE7F7, int_s390_vsq, v128q, v128q, 4>; -+ -+ // Subtract compute borrow indication. -+ def VSCBIB : BinaryVRRc<"vscbib", 0xE7F5, int_s390_vscbib, v128b, v128b, 0>; -+ def VSCBIH : BinaryVRRc<"vscbih", 0xE7F5, int_s390_vscbih, v128h, v128h, 1>; -+ def VSCBIF : BinaryVRRc<"vscbif", 0xE7F5, int_s390_vscbif, v128f, v128f, 2>; -+ def VSCBIG : BinaryVRRc<"vscbig", 0xE7F5, int_s390_vscbig, v128g, v128g, 3>; -+ def VSCBIQ : BinaryVRRc<"vscbiq", 0xE7F5, int_s390_vscbiq, v128q, v128q, 4>; -+ -+ // Subtract with borrow indication. -+ def VSBIQ : TernaryVRRd<"vsbiq", 0xE7BF, int_s390_vsbiq, v128q, v128q, 4>; -+ -+ // Subtract with borrow compute borrow indication. -+ def VSBCBIQ : TernaryVRRd<"vsbcbiq", 0xE7BD, int_s390_vsbcbiq, -+ v128q, v128q, 4>; -+ -+ // Sum across doubleword. -+ def VSUMGH : BinaryVRRc<"vsumgh", 0xE765, z_vsum, v128g, v128h, 1>; -+ def VSUMGF : BinaryVRRc<"vsumgf", 0xE765, z_vsum, v128g, v128f, 2>; -+ -+ // Sum across quadword. -+ def VSUMQF : BinaryVRRc<"vsumqf", 0xE767, z_vsum, v128q, v128f, 2>; -+ def VSUMQG : BinaryVRRc<"vsumqg", 0xE767, z_vsum, v128q, v128g, 3>; -+ -+ // Sum across word. -+ def VSUMB : BinaryVRRc<"vsumb", 0xE764, z_vsum, v128f, v128b, 0>; -+ def VSUMH : BinaryVRRc<"vsumh", 0xE764, z_vsum, v128f, v128h, 1>; -+} -+ -+// Instantiate the bitwise ops for type TYPE. -+multiclass BitwiseVectorOps { -+ let Predicates = [FeatureVector] in { -+ def : Pat<(type (and VR128:$x, VR128:$y)), (VN VR128:$x, VR128:$y)>; -+ def : Pat<(type (and VR128:$x, (z_vnot VR128:$y))), -+ (VNC VR128:$x, VR128:$y)>; -+ def : Pat<(type (or VR128:$x, VR128:$y)), (VO VR128:$x, VR128:$y)>; -+ def : Pat<(type (xor VR128:$x, VR128:$y)), (VX VR128:$x, VR128:$y)>; -+ def : Pat<(type (or (and VR128:$x, VR128:$z), -+ (and VR128:$y, (z_vnot VR128:$z)))), -+ (VSEL VR128:$x, VR128:$y, VR128:$z)>; -+ def : Pat<(type (z_vnot (or VR128:$x, VR128:$y))), -+ (VNO VR128:$x, VR128:$y)>; -+ def : Pat<(type (z_vnot VR128:$x)), (VNO VR128:$x, VR128:$x)>; -+ } -+} -+ -+defm : BitwiseVectorOps; -+defm : BitwiseVectorOps; -+defm : BitwiseVectorOps; -+defm : BitwiseVectorOps; -+ -+// Instantiate additional patterns for absolute-related expressions on -+// type TYPE. LC is the negate instruction for TYPE and LP is the absolute -+// instruction. -+multiclass IntegerAbsoluteVectorOps { -+ let Predicates = [FeatureVector] in { -+ def : Pat<(type (vselect (type (z_vicmph_zero VR128:$x)), -+ (z_vneg VR128:$x), VR128:$x)), -+ (lc (lp VR128:$x))>; -+ def : Pat<(type (vselect (type (z_vnot (z_vicmph_zero VR128:$x))), -+ VR128:$x, (z_vneg VR128:$x))), -+ (lc (lp VR128:$x))>; -+ def : Pat<(type (vselect (type (z_vicmpl_zero VR128:$x)), -+ VR128:$x, (z_vneg VR128:$x))), -+ (lc (lp VR128:$x))>; -+ def : Pat<(type (vselect (type (z_vnot (z_vicmpl_zero VR128:$x))), -+ (z_vneg VR128:$x), VR128:$x)), -+ (lc (lp VR128:$x))>; -+ def : Pat<(type (or (and (z_vsra_by_scalar VR128:$x, (i32 shift)), -+ (z_vneg VR128:$x)), -+ (and (z_vnot (z_vsra_by_scalar VR128:$x, (i32 shift))), -+ VR128:$x))), -+ (lp VR128:$x)>; -+ def : Pat<(type (or (and (z_vsra_by_scalar VR128:$x, (i32 shift)), -+ VR128:$x), -+ (and (z_vnot (z_vsra_by_scalar VR128:$x, (i32 shift))), -+ (z_vneg VR128:$x)))), -+ (lc (lp VR128:$x))>; -+ } -+} -+ -+defm : IntegerAbsoluteVectorOps; -+defm : IntegerAbsoluteVectorOps; -+defm : IntegerAbsoluteVectorOps; -+defm : IntegerAbsoluteVectorOps; -+ -+// Instantiate minimum- and maximum-related patterns for TYPE. CMPH is the -+// signed or unsigned "set if greater than" comparison instruction and -+// MIN and MAX are the associated minimum and maximum instructions. -+multiclass IntegerMinMaxVectorOps { -+ let Predicates = [FeatureVector] in { -+ def : Pat<(type (vselect (cmph VR128:$x, VR128:$y), VR128:$x, VR128:$y)), -+ (max VR128:$x, VR128:$y)>; -+ def : Pat<(type (vselect (cmph VR128:$x, VR128:$y), VR128:$y, VR128:$x)), -+ (min VR128:$x, VR128:$y)>; -+ def : Pat<(type (vselect (z_vnot (cmph VR128:$x, VR128:$y)), -+ VR128:$x, VR128:$y)), -+ (min VR128:$x, VR128:$y)>; -+ def : Pat<(type (vselect (z_vnot (cmph VR128:$x, VR128:$y)), -+ VR128:$y, VR128:$x)), -+ (max VR128:$x, VR128:$y)>; -+ } -+} -+ -+// Signed min/max. -+defm : IntegerMinMaxVectorOps; -+defm : IntegerMinMaxVectorOps; -+defm : IntegerMinMaxVectorOps; -+defm : IntegerMinMaxVectorOps; -+ -+// Unsigned min/max. -+defm : IntegerMinMaxVectorOps; -+defm : IntegerMinMaxVectorOps; -+defm : IntegerMinMaxVectorOps; -+defm : IntegerMinMaxVectorOps; -+ -+//===----------------------------------------------------------------------===// -+// Integer comparison -+//===----------------------------------------------------------------------===// -+ -+let Predicates = [FeatureVector] in { -+ // Element compare. -+ let Defs = [CC] in { -+ def VECB : CompareVRRa<"vecb", 0xE7DB, null_frag, v128b, 0>; -+ def VECH : CompareVRRa<"vech", 0xE7DB, null_frag, v128h, 1>; -+ def VECF : CompareVRRa<"vecf", 0xE7DB, null_frag, v128f, 2>; -+ def VECG : CompareVRRa<"vecg", 0xE7DB, null_frag, v128g, 3>; -+ } -+ -+ // Element compare logical. -+ let Defs = [CC] in { -+ def VECLB : CompareVRRa<"veclb", 0xE7D9, null_frag, v128b, 0>; -+ def VECLH : CompareVRRa<"veclh", 0xE7D9, null_frag, v128h, 1>; -+ def VECLF : CompareVRRa<"veclf", 0xE7D9, null_frag, v128f, 2>; -+ def VECLG : CompareVRRa<"veclg", 0xE7D9, null_frag, v128g, 3>; -+ } -+ -+ // Compare equal. -+ defm VCEQB : BinaryVRRbSPair<"vceqb", 0xE7F8, z_vicmpe, z_vicmpes, -+ v128b, v128b, 0>; -+ defm VCEQH : BinaryVRRbSPair<"vceqh", 0xE7F8, z_vicmpe, z_vicmpes, -+ v128h, v128h, 1>; -+ defm VCEQF : BinaryVRRbSPair<"vceqf", 0xE7F8, z_vicmpe, z_vicmpes, -+ v128f, v128f, 2>; -+ defm VCEQG : BinaryVRRbSPair<"vceqg", 0xE7F8, z_vicmpe, z_vicmpes, -+ v128g, v128g, 3>; -+ -+ // Compare high. -+ defm VCHB : BinaryVRRbSPair<"vchb", 0xE7FB, z_vicmph, z_vicmphs, -+ v128b, v128b, 0>; -+ defm VCHH : BinaryVRRbSPair<"vchh", 0xE7FB, z_vicmph, z_vicmphs, -+ v128h, v128h, 1>; -+ defm VCHF : BinaryVRRbSPair<"vchf", 0xE7FB, z_vicmph, z_vicmphs, -+ v128f, v128f, 2>; -+ defm VCHG : BinaryVRRbSPair<"vchg", 0xE7FB, z_vicmph, z_vicmphs, -+ v128g, v128g, 3>; -+ -+ // Compare high logical. -+ defm VCHLB : BinaryVRRbSPair<"vchlb", 0xE7F9, z_vicmphl, z_vicmphls, -+ v128b, v128b, 0>; -+ defm VCHLH : BinaryVRRbSPair<"vchlh", 0xE7F9, z_vicmphl, z_vicmphls, -+ v128h, v128h, 1>; -+ defm VCHLF : BinaryVRRbSPair<"vchlf", 0xE7F9, z_vicmphl, z_vicmphls, -+ v128f, v128f, 2>; -+ defm VCHLG : BinaryVRRbSPair<"vchlg", 0xE7F9, z_vicmphl, z_vicmphls, -+ v128g, v128g, 3>; -+ -+ // Test under mask. -+ let Defs = [CC] in -+ def VTM : CompareVRRa<"vtm", 0xE7D8, z_vtm, v128b, 0>; -+} -+ -+//===----------------------------------------------------------------------===// -+// Floating-point arithmetic -+//===----------------------------------------------------------------------===// -+ -+// See comments in SystemZInstrFP.td for the suppression flags and -+// rounding modes. -+multiclass VectorRounding { -+ def : FPConversion; -+ def : FPConversion; -+ def : FPConversion; -+ def : FPConversion; -+ def : FPConversion; -+ def : FPConversion; -+} -+ -+let Predicates = [FeatureVector] in { -+ // Add. -+ def VFADB : BinaryVRRc<"vfadb", 0xE7E3, fadd, v128db, v128db, 3, 0>; -+ def WFADB : BinaryVRRc<"wfadb", 0xE7E3, fadd, v64db, v64db, 3, 8>; -+ -+ // Convert from fixed 64-bit. -+ def VCDGB : TernaryVRRa<"vcdgb", 0xE7C3, null_frag, v128db, v128g, 3, 0>; -+ def WCDGB : TernaryVRRa<"wcdgb", 0xE7C3, null_frag, v64db, v64g, 3, 8>; -+ def : FPConversion; -+ -+ // Convert from logical 64-bit. -+ def VCDLGB : TernaryVRRa<"vcdlgb", 0xE7C1, null_frag, v128db, v128g, 3, 0>; -+ def WCDLGB : TernaryVRRa<"wcdlgb", 0xE7C1, null_frag, v64db, v64g, 3, 8>; -+ def : FPConversion; -+ -+ // Convert to fixed 64-bit. -+ def VCGDB : TernaryVRRa<"vcgdb", 0xE7C2, null_frag, v128g, v128db, 3, 0>; -+ def WCGDB : TernaryVRRa<"wcgdb", 0xE7C2, null_frag, v64g, v64db, 3, 8>; -+ // Rounding mode should agree with SystemZInstrFP.td. -+ def : FPConversion; -+ -+ // Convert to logical 64-bit. -+ def VCLGDB : TernaryVRRa<"vclgdb", 0xE7C0, null_frag, v128g, v128db, 3, 0>; -+ def WCLGDB : TernaryVRRa<"wclgdb", 0xE7C0, null_frag, v64g, v64db, 3, 8>; -+ // Rounding mode should agree with SystemZInstrFP.td. -+ def : FPConversion; -+ -+ // Divide. -+ def VFDDB : BinaryVRRc<"vfddb", 0xE7E5, fdiv, v128db, v128db, 3, 0>; -+ def WFDDB : BinaryVRRc<"wfddb", 0xE7E5, fdiv, v64db, v64db, 3, 8>; -+ -+ // Load FP integer. -+ def VFIDB : TernaryVRRa<"vfidb", 0xE7C7, int_s390_vfidb, v128db, v128db, 3, 0>; -+ def WFIDB : TernaryVRRa<"wfidb", 0xE7C7, null_frag, v64db, v64db, 3, 8>; -+ defm : VectorRounding; -+ defm : VectorRounding; -+ -+ // Load lengthened. -+ def VLDEB : UnaryVRRa<"vldeb", 0xE7C4, z_vextend, v128db, v128eb, 2, 0>; -+ def WLDEB : UnaryVRRa<"wldeb", 0xE7C4, fextend, v64db, v32eb, 2, 8>; -+ -+ // Load rounded, -+ def VLEDB : TernaryVRRa<"vledb", 0xE7C5, null_frag, v128eb, v128db, 3, 0>; -+ def WLEDB : TernaryVRRa<"wledb", 0xE7C5, null_frag, v32eb, v64db, 3, 8>; -+ def : Pat<(v4f32 (z_vround (v2f64 VR128:$src))), (VLEDB VR128:$src, 0, 0)>; -+ def : FPConversion; -+ -+ // Multiply. -+ def VFMDB : BinaryVRRc<"vfmdb", 0xE7E7, fmul, v128db, v128db, 3, 0>; -+ def WFMDB : BinaryVRRc<"wfmdb", 0xE7E7, fmul, v64db, v64db, 3, 8>; -+ -+ // Multiply and add. -+ def VFMADB : TernaryVRRe<"vfmadb", 0xE78F, fma, v128db, v128db, 0, 3>; -+ def WFMADB : TernaryVRRe<"wfmadb", 0xE78F, fma, v64db, v64db, 8, 3>; -+ -+ // Multiply and subtract. -+ def VFMSDB : TernaryVRRe<"vfmsdb", 0xE78E, fms, v128db, v128db, 0, 3>; -+ def WFMSDB : TernaryVRRe<"wfmsdb", 0xE78E, fms, v64db, v64db, 8, 3>; -+ -+ // Load complement, -+ def VFLCDB : UnaryVRRa<"vflcdb", 0xE7CC, fneg, v128db, v128db, 3, 0, 0>; -+ def WFLCDB : UnaryVRRa<"wflcdb", 0xE7CC, fneg, v64db, v64db, 3, 8, 0>; -+ -+ // Load negative. -+ def VFLNDB : UnaryVRRa<"vflndb", 0xE7CC, fnabs, v128db, v128db, 3, 0, 1>; -+ def WFLNDB : UnaryVRRa<"wflndb", 0xE7CC, fnabs, v64db, v64db, 3, 8, 1>; -+ -+ // Load positive. -+ def VFLPDB : UnaryVRRa<"vflpdb", 0xE7CC, fabs, v128db, v128db, 3, 0, 2>; -+ def WFLPDB : UnaryVRRa<"wflpdb", 0xE7CC, fabs, v64db, v64db, 3, 8, 2>; -+ -+ // Square root. -+ def VFSQDB : UnaryVRRa<"vfsqdb", 0xE7CE, fsqrt, v128db, v128db, 3, 0>; -+ def WFSQDB : UnaryVRRa<"wfsqdb", 0xE7CE, fsqrt, v64db, v64db, 3, 8>; -+ -+ // Subtract. -+ def VFSDB : BinaryVRRc<"vfsdb", 0xE7E2, fsub, v128db, v128db, 3, 0>; -+ def WFSDB : BinaryVRRc<"wfsdb", 0xE7E2, fsub, v64db, v64db, 3, 8>; -+ -+ // Test data class immediate. -+ let Defs = [CC] in { -+ def VFTCIDB : BinaryVRIe<"vftcidb", 0xE74A, z_vftci, v128g, v128db, 3, 0>; -+ def WFTCIDB : BinaryVRIe<"wftcidb", 0xE74A, null_frag, v64g, v64db, 3, 8>; -+ } -+} -+ -+//===----------------------------------------------------------------------===// -+// Floating-point comparison -+//===----------------------------------------------------------------------===// -+ -+let Predicates = [FeatureVector] in { -+ // Compare scalar. -+ let Defs = [CC] in -+ def WFCDB : CompareVRRa<"wfcdb", 0xE7CB, z_fcmp, v64db, 3>; -+ -+ // Compare and signal scalar. -+ let Defs = [CC] in -+ def WFKDB : CompareVRRa<"wfkdb", 0xE7CA, null_frag, v64db, 3>; -+ -+ // Compare equal. -+ defm VFCEDB : BinaryVRRcSPair<"vfcedb", 0xE7E8, z_vfcmpe, z_vfcmpes, -+ v128g, v128db, 3, 0>; -+ defm WFCEDB : BinaryVRRcSPair<"wfcedb", 0xE7E8, null_frag, null_frag, -+ v64g, v64db, 3, 8>; -+ -+ // Compare high. -+ defm VFCHDB : BinaryVRRcSPair<"vfchdb", 0xE7EB, z_vfcmph, z_vfcmphs, -+ v128g, v128db, 3, 0>; -+ defm WFCHDB : BinaryVRRcSPair<"wfchdb", 0xE7EB, null_frag, null_frag, -+ v64g, v64db, 3, 8>; -+ -+ // Compare high or equal. -+ defm VFCHEDB : BinaryVRRcSPair<"vfchedb", 0xE7EA, z_vfcmphe, z_vfcmphes, -+ v128g, v128db, 3, 0>; -+ defm WFCHEDB : BinaryVRRcSPair<"wfchedb", 0xE7EA, null_frag, null_frag, -+ v64g, v64db, 3, 8>; -+} -+ -+//===----------------------------------------------------------------------===// -+// Conversions -+//===----------------------------------------------------------------------===// -+ -+def : Pat<(v16i8 (bitconvert (v8i16 VR128:$src))), (v16i8 VR128:$src)>; -+def : Pat<(v16i8 (bitconvert (v4i32 VR128:$src))), (v16i8 VR128:$src)>; -+def : Pat<(v16i8 (bitconvert (v2i64 VR128:$src))), (v16i8 VR128:$src)>; -+def : Pat<(v16i8 (bitconvert (v4f32 VR128:$src))), (v16i8 VR128:$src)>; -+def : Pat<(v16i8 (bitconvert (v2f64 VR128:$src))), (v16i8 VR128:$src)>; -+ -+def : Pat<(v8i16 (bitconvert (v16i8 VR128:$src))), (v8i16 VR128:$src)>; -+def : Pat<(v8i16 (bitconvert (v4i32 VR128:$src))), (v8i16 VR128:$src)>; -+def : Pat<(v8i16 (bitconvert (v2i64 VR128:$src))), (v8i16 VR128:$src)>; -+def : Pat<(v8i16 (bitconvert (v4f32 VR128:$src))), (v8i16 VR128:$src)>; -+def : Pat<(v8i16 (bitconvert (v2f64 VR128:$src))), (v8i16 VR128:$src)>; -+ -+def : Pat<(v4i32 (bitconvert (v16i8 VR128:$src))), (v4i32 VR128:$src)>; -+def : Pat<(v4i32 (bitconvert (v8i16 VR128:$src))), (v4i32 VR128:$src)>; -+def : Pat<(v4i32 (bitconvert (v2i64 VR128:$src))), (v4i32 VR128:$src)>; -+def : Pat<(v4i32 (bitconvert (v4f32 VR128:$src))), (v4i32 VR128:$src)>; -+def : Pat<(v4i32 (bitconvert (v2f64 VR128:$src))), (v4i32 VR128:$src)>; -+ -+def : Pat<(v2i64 (bitconvert (v16i8 VR128:$src))), (v2i64 VR128:$src)>; -+def : Pat<(v2i64 (bitconvert (v8i16 VR128:$src))), (v2i64 VR128:$src)>; -+def : Pat<(v2i64 (bitconvert (v4i32 VR128:$src))), (v2i64 VR128:$src)>; -+def : Pat<(v2i64 (bitconvert (v4f32 VR128:$src))), (v2i64 VR128:$src)>; -+def : Pat<(v2i64 (bitconvert (v2f64 VR128:$src))), (v2i64 VR128:$src)>; -+ -+def : Pat<(v4f32 (bitconvert (v16i8 VR128:$src))), (v4f32 VR128:$src)>; -+def : Pat<(v4f32 (bitconvert (v8i16 VR128:$src))), (v4f32 VR128:$src)>; -+def : Pat<(v4f32 (bitconvert (v4i32 VR128:$src))), (v4f32 VR128:$src)>; -+def : Pat<(v4f32 (bitconvert (v2i64 VR128:$src))), (v4f32 VR128:$src)>; -+def : Pat<(v4f32 (bitconvert (v2f64 VR128:$src))), (v4f32 VR128:$src)>; -+ -+def : Pat<(v2f64 (bitconvert (v16i8 VR128:$src))), (v2f64 VR128:$src)>; -+def : Pat<(v2f64 (bitconvert (v8i16 VR128:$src))), (v2f64 VR128:$src)>; -+def : Pat<(v2f64 (bitconvert (v4i32 VR128:$src))), (v2f64 VR128:$src)>; -+def : Pat<(v2f64 (bitconvert (v2i64 VR128:$src))), (v2f64 VR128:$src)>; -+def : Pat<(v2f64 (bitconvert (v4f32 VR128:$src))), (v2f64 VR128:$src)>; -+ -+//===----------------------------------------------------------------------===// -+// Replicating scalars -+//===----------------------------------------------------------------------===// -+ -+// Define patterns for replicating a scalar GR32 into a vector of type TYPE. -+// INDEX is 8 minus the element size in bytes. -+class VectorReplicateScalar index> -+ : Pat<(type (z_replicate GR32:$scalar)), -+ (insn (VLVGP32 GR32:$scalar, GR32:$scalar), index)>; -+ -+def : VectorReplicateScalar; -+def : VectorReplicateScalar; -+def : VectorReplicateScalar; -+ -+// i64 replications are just a single isntruction. -+def : Pat<(v2i64 (z_replicate GR64:$scalar)), -+ (VLVGP GR64:$scalar, GR64:$scalar)>; -+ -+//===----------------------------------------------------------------------===// -+// Floating-point insertion and extraction -+//===----------------------------------------------------------------------===// -+ -+// Moving 32-bit values between GPRs and FPRs can be done using VLVGF -+// and VLGVF. -+def LEFR : UnaryAliasVRS; -+def LFER : UnaryAliasVRS; -+def : Pat<(f32 (bitconvert (i32 GR32:$src))), (LEFR GR32:$src)>; -+def : Pat<(i32 (bitconvert (f32 VR32:$src))), -+ (EXTRACT_SUBREG (LFER VR32:$src), subreg_l32)>; -+ -+// Floating-point values are stored in element 0 of the corresponding -+// vector register. Scalar to vector conversion is just a subreg and -+// scalar replication can just replicate element 0 of the vector register. -+multiclass ScalarToVectorFP { -+ def : Pat<(vt (scalar_to_vector cls:$scalar)), -+ (INSERT_SUBREG (vt (IMPLICIT_DEF)), cls:$scalar, subreg)>; -+ def : Pat<(vt (z_replicate cls:$scalar)), -+ (vrep (INSERT_SUBREG (vt (IMPLICIT_DEF)), cls:$scalar, -+ subreg), 0)>; -+} -+defm : ScalarToVectorFP; -+defm : ScalarToVectorFP; -+ -+// Match v2f64 insertions. The AddedComplexity counters the 3 added by -+// TableGen for the base register operand in VLVG-based integer insertions -+// and ensures that this version is strictly better. -+let AddedComplexity = 4 in { -+ def : Pat<(z_vector_insert (v2f64 VR128:$vec), FP64:$elt, 0), -+ (VPDI (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FP64:$elt, -+ subreg_r64), VR128:$vec, 1)>; -+ def : Pat<(z_vector_insert (v2f64 VR128:$vec), FP64:$elt, 1), -+ (VPDI VR128:$vec, (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FP64:$elt, -+ subreg_r64), 0)>; -+} -+ -+// We extract floating-point element X by replicating (for elements other -+// than 0) and then taking a high subreg. The AddedComplexity counters the -+// 3 added by TableGen for the base register operand in VLGV-based integer -+// extractions and ensures that this version is strictly better. -+let AddedComplexity = 4 in { -+ def : Pat<(f32 (z_vector_extract (v4f32 VR128:$vec), 0)), -+ (EXTRACT_SUBREG VR128:$vec, subreg_r32)>; -+ def : Pat<(f32 (z_vector_extract (v4f32 VR128:$vec), imm32zx2:$index)), -+ (EXTRACT_SUBREG (VREPF VR128:$vec, imm32zx2:$index), subreg_r32)>; -+ -+ def : Pat<(f64 (z_vector_extract (v2f64 VR128:$vec), 0)), -+ (EXTRACT_SUBREG VR128:$vec, subreg_r64)>; -+ def : Pat<(f64 (z_vector_extract (v2f64 VR128:$vec), imm32zx1:$index)), -+ (EXTRACT_SUBREG (VREPG VR128:$vec, imm32zx1:$index), subreg_r64)>; -+} -+ -+//===----------------------------------------------------------------------===// -+// String instructions -+//===----------------------------------------------------------------------===// -+ -+let Predicates = [FeatureVector] in { -+ defm VFAEB : TernaryVRRbSPair<"vfaeb", 0xE782, int_s390_vfaeb, z_vfae_cc, -+ v128b, v128b, 0, 0>; -+ defm VFAEH : TernaryVRRbSPair<"vfaeh", 0xE782, int_s390_vfaeh, z_vfae_cc, -+ v128h, v128h, 1, 0>; -+ defm VFAEF : TernaryVRRbSPair<"vfaef", 0xE782, int_s390_vfaef, z_vfae_cc, -+ v128f, v128f, 2, 0>; -+ defm VFAEZB : TernaryVRRbSPair<"vfaezb", 0xE782, int_s390_vfaezb, z_vfaez_cc, -+ v128b, v128b, 0, 2>; -+ defm VFAEZH : TernaryVRRbSPair<"vfaezh", 0xE782, int_s390_vfaezh, z_vfaez_cc, -+ v128h, v128h, 1, 2>; -+ defm VFAEZF : TernaryVRRbSPair<"vfaezf", 0xE782, int_s390_vfaezf, z_vfaez_cc, -+ v128f, v128f, 2, 2>; -+ -+ defm VFEEB : BinaryVRRbSPair<"vfeeb", 0xE780, int_s390_vfeeb, z_vfee_cc, -+ v128b, v128b, 0, 0, 1>; -+ defm VFEEH : BinaryVRRbSPair<"vfeeh", 0xE780, int_s390_vfeeh, z_vfee_cc, -+ v128h, v128h, 1, 0, 1>; -+ defm VFEEF : BinaryVRRbSPair<"vfeef", 0xE780, int_s390_vfeef, z_vfee_cc, -+ v128f, v128f, 2, 0, 1>; -+ defm VFEEZB : BinaryVRRbSPair<"vfeezb", 0xE780, int_s390_vfeezb, z_vfeez_cc, -+ v128b, v128b, 0, 2, 3>; -+ defm VFEEZH : BinaryVRRbSPair<"vfeezh", 0xE780, int_s390_vfeezh, z_vfeez_cc, -+ v128h, v128h, 1, 2, 3>; -+ defm VFEEZF : BinaryVRRbSPair<"vfeezf", 0xE780, int_s390_vfeezf, z_vfeez_cc, -+ v128f, v128f, 2, 2, 3>; -+ -+ defm VFENEB : BinaryVRRbSPair<"vfeneb", 0xE781, int_s390_vfeneb, z_vfene_cc, -+ v128b, v128b, 0, 0, 1>; -+ defm VFENEH : BinaryVRRbSPair<"vfeneh", 0xE781, int_s390_vfeneh, z_vfene_cc, -+ v128h, v128h, 1, 0, 1>; -+ defm VFENEF : BinaryVRRbSPair<"vfenef", 0xE781, int_s390_vfenef, z_vfene_cc, -+ v128f, v128f, 2, 0, 1>; -+ defm VFENEZB : BinaryVRRbSPair<"vfenezb", 0xE781, int_s390_vfenezb, -+ z_vfenez_cc, v128b, v128b, 0, 2, 3>; -+ defm VFENEZH : BinaryVRRbSPair<"vfenezh", 0xE781, int_s390_vfenezh, -+ z_vfenez_cc, v128h, v128h, 1, 2, 3>; -+ defm VFENEZF : BinaryVRRbSPair<"vfenezf", 0xE781, int_s390_vfenezf, -+ z_vfenez_cc, v128f, v128f, 2, 2, 3>; -+ -+ defm VISTRB : UnaryVRRaSPair<"vistrb", 0xE75C, int_s390_vistrb, z_vistr_cc, -+ v128b, v128b, 0>; -+ defm VISTRH : UnaryVRRaSPair<"vistrh", 0xE75C, int_s390_vistrh, z_vistr_cc, -+ v128h, v128h, 1>; -+ defm VISTRF : UnaryVRRaSPair<"vistrf", 0xE75C, int_s390_vistrf, z_vistr_cc, -+ v128f, v128f, 2>; -+ -+ defm VSTRCB : QuaternaryVRRdSPair<"vstrcb", 0xE78A, int_s390_vstrcb, -+ z_vstrc_cc, v128b, v128b, 0, 0>; -+ defm VSTRCH : QuaternaryVRRdSPair<"vstrch", 0xE78A, int_s390_vstrch, -+ z_vstrc_cc, v128h, v128h, 1, 0>; -+ defm VSTRCF : QuaternaryVRRdSPair<"vstrcf", 0xE78A, int_s390_vstrcf, -+ z_vstrc_cc, v128f, v128f, 2, 0>; -+ defm VSTRCZB : QuaternaryVRRdSPair<"vstrczb", 0xE78A, int_s390_vstrczb, -+ z_vstrcz_cc, v128b, v128b, 0, 2>; -+ defm VSTRCZH : QuaternaryVRRdSPair<"vstrczh", 0xE78A, int_s390_vstrczh, -+ z_vstrcz_cc, v128h, v128h, 1, 2>; -+ defm VSTRCZF : QuaternaryVRRdSPair<"vstrczf", 0xE78A, int_s390_vstrczf, -+ z_vstrcz_cc, v128f, v128f, 2, 2>; -+} -Index: llvm-36/lib/Target/SystemZ/SystemZLDCleanup.cpp -=================================================================== ---- /dev/null -+++ llvm-36/lib/Target/SystemZ/SystemZLDCleanup.cpp -@@ -0,0 +1,143 @@ -+//===-- SystemZLDCleanup.cpp - Clean up local-dynamic TLS accesses --------===// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+//===----------------------------------------------------------------------===// -+// -+// This pass combines multiple accesses to local-dynamic TLS variables so that -+// the TLS base address for the module is only fetched once per execution path -+// through the function. -+// -+//===----------------------------------------------------------------------===// -+ -+#include "SystemZTargetMachine.h" -+#include "SystemZMachineFunctionInfo.h" -+#include "llvm/CodeGen/MachineDominators.h" -+#include "llvm/CodeGen/MachineFunctionPass.h" -+#include "llvm/CodeGen/MachineInstrBuilder.h" -+#include "llvm/CodeGen/MachineRegisterInfo.h" -+#include "llvm/Target/TargetInstrInfo.h" -+#include "llvm/Target/TargetMachine.h" -+#include "llvm/Target/TargetRegisterInfo.h" -+ -+using namespace llvm; -+ -+namespace { -+ -+class SystemZLDCleanup : public MachineFunctionPass { -+public: -+ static char ID; -+ SystemZLDCleanup(const SystemZTargetMachine &tm) -+ : MachineFunctionPass(ID), TII(nullptr), MF(nullptr) {} -+ -+ const char *getPassName() const override { -+ return "SystemZ Local Dynamic TLS Access Clean-up"; -+ } -+ -+ bool runOnMachineFunction(MachineFunction &MF) override; -+ void getAnalysisUsage(AnalysisUsage &AU) const override; -+ -+private: -+ bool VisitNode(MachineDomTreeNode *Node, unsigned TLSBaseAddrReg); -+ MachineInstr *ReplaceTLSCall(MachineInstr *I, unsigned TLSBaseAddrReg); -+ MachineInstr *SetRegister(MachineInstr *I, unsigned *TLSBaseAddrReg); -+ -+ const SystemZInstrInfo *TII; -+ MachineFunction *MF; -+}; -+ -+char SystemZLDCleanup::ID = 0; -+ -+} // end anonymous namespace -+ -+FunctionPass *llvm::createSystemZLDCleanupPass(SystemZTargetMachine &TM) { -+ return new SystemZLDCleanup(TM); -+} -+ -+void SystemZLDCleanup::getAnalysisUsage(AnalysisUsage &AU) const { -+ AU.setPreservesCFG(); -+ AU.addRequired(); -+ MachineFunctionPass::getAnalysisUsage(AU); -+} -+ -+bool SystemZLDCleanup::runOnMachineFunction(MachineFunction &F) { -+ TII = static_cast(F.getSubtarget().getInstrInfo()); -+ MF = &F; -+ -+ SystemZMachineFunctionInfo* MFI = F.getInfo(); -+ if (MFI->getNumLocalDynamicTLSAccesses() < 2) { -+ // No point folding accesses if there isn't at least two. -+ return false; -+ } -+ -+ MachineDominatorTree *DT = &getAnalysis(); -+ return VisitNode(DT->getRootNode(), 0); -+} -+ -+// Visit the dominator subtree rooted at Node in pre-order. -+// If TLSBaseAddrReg is non-null, then use that to replace any -+// TLS_LDCALL instructions. Otherwise, create the register -+// when the first such instruction is seen, and then use it -+// as we encounter more instructions. -+bool SystemZLDCleanup::VisitNode(MachineDomTreeNode *Node, -+ unsigned TLSBaseAddrReg) { -+ MachineBasicBlock *BB = Node->getBlock(); -+ bool Changed = false; -+ -+ // Traverse the current block. -+ for (auto I = BB->begin(), E = BB->end(); I != E; ++I) { -+ switch (I->getOpcode()) { -+ case SystemZ::TLS_LDCALL: -+ if (TLSBaseAddrReg) -+ I = ReplaceTLSCall(I, TLSBaseAddrReg); -+ else -+ I = SetRegister(I, &TLSBaseAddrReg); -+ Changed = true; -+ break; -+ default: -+ break; -+ } -+ } -+ -+ // Visit the children of this block in the dominator tree. -+ for (auto I = Node->begin(), E = Node->end(); I != E; ++I) -+ Changed |= VisitNode(*I, TLSBaseAddrReg); -+ -+ return Changed; -+} -+ -+// Replace the TLS_LDCALL instruction I with a copy from TLSBaseAddrReg, -+// returning the new instruction. -+MachineInstr *SystemZLDCleanup::ReplaceTLSCall(MachineInstr *I, -+ unsigned TLSBaseAddrReg) { -+ // Insert a Copy from TLSBaseAddrReg to R2. -+ MachineInstr *Copy = BuildMI(*I->getParent(), I, I->getDebugLoc(), -+ TII->get(TargetOpcode::COPY), SystemZ::R2D) -+ .addReg(TLSBaseAddrReg); -+ -+ // Erase the TLS_LDCALL instruction. -+ I->eraseFromParent(); -+ -+ return Copy; -+} -+ -+// Create a virtal register in *TLSBaseAddrReg, and populate it by -+// inserting a copy instruction after I. Returns the new instruction. -+MachineInstr *SystemZLDCleanup::SetRegister(MachineInstr *I, -+ unsigned *TLSBaseAddrReg) { -+ // Create a virtual register for the TLS base address. -+ MachineRegisterInfo &RegInfo = MF->getRegInfo(); -+ *TLSBaseAddrReg = RegInfo.createVirtualRegister(&SystemZ::GR64BitRegClass); -+ -+ // Insert a copy from R2 to TLSBaseAddrReg. -+ MachineInstr *Next = I->getNextNode(); -+ MachineInstr *Copy = BuildMI(*I->getParent(), Next, I->getDebugLoc(), -+ TII->get(TargetOpcode::COPY), *TLSBaseAddrReg) -+ .addReg(SystemZ::R2D); -+ -+ return Copy; -+} -+ -Index: llvm-36/lib/Target/SystemZ/SystemZMCInstLower.cpp -=================================================================== ---- llvm-36.orig/lib/Target/SystemZ/SystemZMCInstLower.cpp -+++ llvm-36/lib/Target/SystemZ/SystemZMCInstLower.cpp -@@ -22,6 +22,8 @@ static MCSymbolRefExpr::VariantKind getV - return MCSymbolRefExpr::VK_None; - case SystemZII::MO_GOT: - return MCSymbolRefExpr::VK_GOT; -+ case SystemZII::MO_INDNTPOFF: -+ return MCSymbolRefExpr::VK_INDNTPOFF; - } - llvm_unreachable("Unrecognised MO_ACCESS_MODEL"); - } -Index: llvm-36/lib/Target/SystemZ/SystemZMachineFunctionInfo.h -=================================================================== ---- llvm-36.orig/lib/Target/SystemZ/SystemZMachineFunctionInfo.h -+++ llvm-36/lib/Target/SystemZ/SystemZMachineFunctionInfo.h -@@ -23,11 +23,13 @@ class SystemZMachineFunctionInfo : publi - unsigned VarArgsFrameIndex; - unsigned RegSaveFrameIndex; - bool ManipulatesSP; -+ unsigned NumLocalDynamics; - - public: - explicit SystemZMachineFunctionInfo(MachineFunction &MF) - : LowSavedGPR(0), HighSavedGPR(0), VarArgsFirstGPR(0), VarArgsFirstFPR(0), -- VarArgsFrameIndex(0), RegSaveFrameIndex(0), ManipulatesSP(false) {} -+ VarArgsFrameIndex(0), RegSaveFrameIndex(0), ManipulatesSP(false), -+ NumLocalDynamics(0) {} - - // Get and set the first call-saved GPR that should be saved and restored - // by this function. This is 0 if no GPRs need to be saved or restored. -@@ -61,6 +63,10 @@ public: - // e.g. through STACKSAVE or STACKRESTORE. - bool getManipulatesSP() const { return ManipulatesSP; } - void setManipulatesSP(bool MSP) { ManipulatesSP = MSP; } -+ -+ // Count number of local-dynamic TLS symbols used. -+ unsigned getNumLocalDynamicTLSAccesses() const { return NumLocalDynamics; } -+ void incNumLocalDynamicTLSAccesses() { ++NumLocalDynamics; } - }; - - } // end namespace llvm -Index: llvm-36/lib/Target/SystemZ/SystemZOperands.td -=================================================================== ---- llvm-36.orig/lib/Target/SystemZ/SystemZOperands.td -+++ llvm-36/lib/Target/SystemZ/SystemZOperands.td -@@ -16,6 +16,11 @@ class ImmediateAsmOperand - let Name = name; - let RenderMethod = "addImmOperands"; - } -+class ImmediateTLSAsmOperand -+ : AsmOperandClass { -+ let Name = name; -+ let RenderMethod = "addImmTLSOperands"; -+} - - // Constructs both a DAG pattern and instruction operand for an immediate - // of type VT. PRED returns true if a node is acceptable and XFORM returns -@@ -34,6 +39,11 @@ class PCRelAsmOperand : Imm - let PredicateMethod = "isImm"; - let ParserMethod = "parsePCRel"##size; - } -+class PCRelTLSAsmOperand -+ : ImmediateTLSAsmOperand<"PCRelTLS"##size> { -+ let PredicateMethod = "isImmTLS"; -+ let ParserMethod = "parsePCRelTLS"##size; -+} - - // Constructs an operand for a PC-relative address with address type VT. - // ASMOP is the associated asm operand. -@@ -41,6 +51,10 @@ class PCRelOperand : Operand { -+ let PrintMethod = "printPCRelTLSOperand"; -+ let ParserMatchClass = asmop; -+} - - // Constructs both a DAG pattern and instruction operand for a PC-relative - // address with address size VT. SELF is the name of the operand and -@@ -64,6 +78,22 @@ class AddressAsmOperand -+ : Operand("i"##bitsize)> { -+ let PrintMethod = "print"##format##"Operand"; -+ let EncoderMethod = "get"##format##dispsize##length##"Encoding"; -+ let DecoderMethod = -+ "decode"##format##bitsize##"Disp"##dispsize##length##"Operand"; -+ let MIOperandInfo = operands; -+ let ParserMatchClass = -+ !cast(format##bitsize##"Disp"##dispsize##length); -+} -+ - // Constructs both a DAG pattern and instruction operand for an addressing mode. - // FORMAT, BITSIZE, DISPSIZE and LENGTH are the parameters to an associated - // AddressAsmOperand. OPERANDS is a list of NUMOPS individual operands -@@ -79,15 +109,7 @@ class AddressingMode("i"##bitsize), numops, - "select"##seltype##dispsize##suffix##length, - [add, sub, or, frameindex, z_adjdynalloc]>, -- Operand("i"##bitsize)> { -- let PrintMethod = "print"##format##"Operand"; -- let EncoderMethod = "get"##format##dispsize##length##"Encoding"; -- let DecoderMethod = -- "decode"##format##bitsize##"Disp"##dispsize##length##"Operand"; -- let MIOperandInfo = operands; -- let ParserMatchClass = -- !cast(format##bitsize##"Disp"##dispsize##length); --} -+ AddressOperand; - - // An addressing mode with a base and displacement but no index. - class BDMode -@@ -111,6 +133,13 @@ class BDLMode("disp"##dispsize##"imm"##bitsize), - !cast("imm"##bitsize))>; - -+// An addressing mode with a base, displacement and a vector index. -+class BDVMode -+ : AddressOperand("ADDR"##bitsize), -+ !cast("disp"##dispsize##"imm"##bitsize), -+ !cast("VR128"))>; -+ - //===----------------------------------------------------------------------===// - // Extracting immediate operands from nodes - // These all create MVT::i64 nodes to ensure the value is not sign-extended -@@ -163,6 +192,16 @@ def UIMM8 : SDNodeXFormgetTargetConstant(uint8_t(N->getZExtValue()), MVT::i64); - }]>; - -+// Truncate an immediate to a 8-bit unsigned quantity and mask off low bit. -+def UIMM8EVEN : SDNodeXFormgetTargetConstant(N->getZExtValue() & 0xfe, MVT::i64); -+}]>; -+ -+// Truncate an immediate to a 12-bit unsigned quantity. -+def UIMM12 : SDNodeXFormgetTargetConstant(N->getZExtValue() & 0xfff, MVT::i64); -+}]>; -+ - // Truncate an immediate to a 16-bit signed quantity. - def SIMM16 : SDNodeXFormgetTargetConstant(int16_t(N->getZExtValue()), MVT::i64); -@@ -192,10 +231,14 @@ def NEGIMM32 : SDNodeXForm; -+def U2Imm : ImmediateAsmOperand<"U2Imm">; -+def U3Imm : ImmediateAsmOperand<"U3Imm">; - def U4Imm : ImmediateAsmOperand<"U4Imm">; - def U6Imm : ImmediateAsmOperand<"U6Imm">; - def S8Imm : ImmediateAsmOperand<"S8Imm">; - def U8Imm : ImmediateAsmOperand<"U8Imm">; -+def U12Imm : ImmediateAsmOperand<"U12Imm">; - def S16Imm : ImmediateAsmOperand<"S16Imm">; - def U16Imm : ImmediateAsmOperand<"U16Imm">; - def S32Imm : ImmediateAsmOperand<"S32Imm">; -@@ -226,10 +269,28 @@ def imm32lh16c : Immediate; - - // Short immediates -+def imm32zx1 : Immediate(N->getZExtValue()); -+}], NOOP_SDNodeXForm, "U1Imm">; -+ -+def imm32zx2 : Immediate(N->getZExtValue()); -+}], NOOP_SDNodeXForm, "U2Imm">; -+ -+def imm32zx3 : Immediate(N->getZExtValue()); -+}], NOOP_SDNodeXForm, "U3Imm">; -+ - def imm32zx4 : Immediate(N->getZExtValue()); - }], NOOP_SDNodeXForm, "U4Imm">; - -+// Note: this enforces an even value during code generation only. -+// When used from the assembler, any 4-bit value is allowed. -+def imm32zx4even : Immediate(N->getZExtValue()); -+}], UIMM8EVEN, "U4Imm">; -+ - def imm32zx6 : Immediate(N->getZExtValue()); - }], NOOP_SDNodeXForm, "U6Imm">; -@@ -244,6 +305,10 @@ def imm32zx8 : Immediate; - -+def imm32zx12 : Immediate(N->getZExtValue()); -+}], UIMM12, "U12Imm">; -+ - def imm32sx16 : Immediate(N->getSExtValue()); - }], SIMM16, "S16Imm">; -@@ -370,6 +435,8 @@ def fpimmneg0 : PatLeaf<(fpimm), [{ retu - // PC-relative asm operands. - def PCRel16 : PCRelAsmOperand<"16">; - def PCRel32 : PCRelAsmOperand<"32">; -+def PCRelTLS16 : PCRelTLSAsmOperand<"16">; -+def PCRelTLS32 : PCRelTLSAsmOperand<"32">; - - // PC-relative offsets of a basic block. The offset is sign-extended - // and multiplied by 2. -@@ -382,6 +449,20 @@ def brtarget32 : PCRelOperand { } -+def brtarget16tls : PCRelTLSOperand { -+ let MIOperandInfo = (ops brtarget16:$func, tlssym:$sym); -+ let EncoderMethod = "getPC16DBLTLSEncoding"; -+ let DecoderMethod = "decodePC16DBLOperand"; -+} -+def brtarget32tls : PCRelTLSOperand { -+ let MIOperandInfo = (ops brtarget32:$func, tlssym:$sym); -+ let EncoderMethod = "getPC32DBLTLSEncoding"; -+ let DecoderMethod = "decodePC32DBLOperand"; -+} -+ - // A PC-relative offset of a global value. The offset is sign-extended - // and multiplied by 2. - def pcrel32 : PCRelAddress { -@@ -408,6 +489,7 @@ def BDAddr64Disp20 : AddressAsmOper - def BDXAddr64Disp12 : AddressAsmOperand<"BDXAddr", "64", "12">; - def BDXAddr64Disp20 : AddressAsmOperand<"BDXAddr", "64", "20">; - def BDLAddr64Disp12Len8 : AddressAsmOperand<"BDLAddr", "64", "12", "Len8">; -+def BDVAddr64Disp12 : AddressAsmOperand<"BDVAddr", "64", "12">; - - // DAG patterns and operands for addressing modes. Each mode has - // the form [] where: -@@ -420,6 +502,7 @@ def BDLAddr64Disp12Len8 : AddressAsmOper - // laaddr : like bdxaddr, but used for Load Address operations - // dynalloc : base + displacement + index + ADJDYNALLOC - // bdladdr : base + displacement with a length field -+// bdvaddr : base + displacement with a vector index - // - // is one of: - // 12 : the displacement is an unsigned 12-bit value -@@ -452,6 +535,7 @@ def dynalloc12only : BDXMode<"DynAllo - def laaddr12pair : BDXMode<"LAAddr", "64", "12", "Pair">; - def laaddr20pair : BDXMode<"LAAddr", "64", "20", "Pair">; - def bdladdr12onlylen8 : BDLMode<"BDLAddr", "64", "12", "Only", "8">; -+def bdvaddr12only : BDVMode< "64", "12">; - - //===----------------------------------------------------------------------===// - // Miscellaneous -Index: llvm-36/lib/Target/SystemZ/SystemZOperators.td -=================================================================== ---- llvm-36.orig/lib/Target/SystemZ/SystemZOperators.td -+++ llvm-36/lib/Target/SystemZ/SystemZOperators.td -@@ -79,6 +79,64 @@ def SDT_ZI32Intrinsic : SDTypeProf - def SDT_ZPrefetch : SDTypeProfile<0, 2, - [SDTCisVT<0, i32>, - SDTCisPtrTy<1>]>; -+def SDT_ZTBegin : SDTypeProfile<0, 2, -+ [SDTCisPtrTy<0>, -+ SDTCisVT<1, i32>]>; -+def SDT_ZInsertVectorElt : SDTypeProfile<1, 3, -+ [SDTCisVec<0>, -+ SDTCisSameAs<0, 1>, -+ SDTCisVT<3, i32>]>; -+def SDT_ZExtractVectorElt : SDTypeProfile<1, 2, -+ [SDTCisVec<1>, -+ SDTCisVT<2, i32>]>; -+def SDT_ZReplicate : SDTypeProfile<1, 1, -+ [SDTCisVec<0>]>; -+def SDT_ZVecUnaryConv : SDTypeProfile<1, 1, -+ [SDTCisVec<0>, -+ SDTCisVec<1>]>; -+def SDT_ZVecUnary : SDTypeProfile<1, 1, -+ [SDTCisVec<0>, -+ SDTCisSameAs<0, 1>]>; -+def SDT_ZVecBinary : SDTypeProfile<1, 2, -+ [SDTCisVec<0>, -+ SDTCisSameAs<0, 1>, -+ SDTCisSameAs<0, 2>]>; -+def SDT_ZVecBinaryInt : SDTypeProfile<1, 2, -+ [SDTCisVec<0>, -+ SDTCisSameAs<0, 1>, -+ SDTCisVT<2, i32>]>; -+def SDT_ZVecBinaryConv : SDTypeProfile<1, 2, -+ [SDTCisVec<0>, -+ SDTCisVec<1>, -+ SDTCisSameAs<1, 2>]>; -+def SDT_ZVecBinaryConvInt : SDTypeProfile<1, 2, -+ [SDTCisVec<0>, -+ SDTCisVec<1>, -+ SDTCisVT<2, i32>]>; -+def SDT_ZRotateMask : SDTypeProfile<1, 2, -+ [SDTCisVec<0>, -+ SDTCisVT<1, i32>, -+ SDTCisVT<2, i32>]>; -+def SDT_ZJoinDwords : SDTypeProfile<1, 2, -+ [SDTCisVT<0, v2i64>, -+ SDTCisVT<1, i64>, -+ SDTCisVT<2, i64>]>; -+def SDT_ZVecTernary : SDTypeProfile<1, 3, -+ [SDTCisVec<0>, -+ SDTCisSameAs<0, 1>, -+ SDTCisSameAs<0, 2>, -+ SDTCisSameAs<0, 3>]>; -+def SDT_ZVecTernaryInt : SDTypeProfile<1, 3, -+ [SDTCisVec<0>, -+ SDTCisSameAs<0, 1>, -+ SDTCisSameAs<0, 2>, -+ SDTCisVT<3, i32>]>; -+def SDT_ZVecQuaternaryInt : SDTypeProfile<1, 4, -+ [SDTCisVec<0>, -+ SDTCisSameAs<0, 1>, -+ SDTCisSameAs<0, 2>, -+ SDTCisSameAs<0, 3>, -+ SDTCisVT<4, i32>]>; - - //===----------------------------------------------------------------------===// - // Node definitions -@@ -90,6 +148,7 @@ def callseq_start : SDNode<"ISD::C - def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_CallSeqEnd, - [SDNPHasChain, SDNPSideEffect, SDNPOptInGlue, - SDNPOutGlue]>; -+def global_offset_table : SDNode<"ISD::GLOBAL_OFFSET_TABLE", SDTPtrLeaf>; - - // Nodes for SystemZISD::*. See SystemZISelLowering.h for more details. - def z_retflag : SDNode<"SystemZISD::RET_FLAG", SDTNone, -@@ -100,6 +159,12 @@ def z_call : SDNode<"System - def z_sibcall : SDNode<"SystemZISD::SIBCALL", SDT_ZCall, - [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue, - SDNPVariadic]>; -+def z_tls_gdcall : SDNode<"SystemZISD::TLS_GDCALL", SDT_ZCall, -+ [SDNPHasChain, SDNPInGlue, SDNPOutGlue, -+ SDNPVariadic]>; -+def z_tls_ldcall : SDNode<"SystemZISD::TLS_LDCALL", SDT_ZCall, -+ [SDNPHasChain, SDNPInGlue, SDNPOutGlue, -+ SDNPVariadic]>; - def z_pcrel_wrapper : SDNode<"SystemZISD::PCREL_WRAPPER", SDT_ZWrapPtr, []>; - def z_pcrel_offset : SDNode<"SystemZISD::PCREL_OFFSET", - SDT_ZWrapOffset, []>; -@@ -114,6 +179,7 @@ def z_select_ccmask : SDNode<"System - def z_adjdynalloc : SDNode<"SystemZISD::ADJDYNALLOC", SDT_ZAdjDynAlloc>; - def z_extract_access : SDNode<"SystemZISD::EXTRACT_ACCESS", - SDT_ZExtractAccess>; -+def z_popcnt : SDNode<"SystemZISD::POPCNT", SDTIntUnaryOp>; - def z_umul_lohi64 : SDNode<"SystemZISD::UMUL_LOHI64", SDT_ZGR128Binary64>; - def z_sdivrem32 : SDNode<"SystemZISD::SDIVREM32", SDT_ZGR128Binary32>; - def z_sdivrem64 : SDNode<"SystemZISD::SDIVREM64", SDT_ZGR128Binary64>; -@@ -123,6 +189,80 @@ def z_udivrem64 : SDNode<"System - def z_serialize : SDNode<"SystemZISD::SERIALIZE", SDTNone, - [SDNPHasChain, SDNPMayStore]>; - -+// Defined because the index is an i32 rather than a pointer. -+def z_vector_insert : SDNode<"ISD::INSERT_VECTOR_ELT", -+ SDT_ZInsertVectorElt>; -+def z_vector_extract : SDNode<"ISD::EXTRACT_VECTOR_ELT", -+ SDT_ZExtractVectorElt>; -+def z_byte_mask : SDNode<"SystemZISD::BYTE_MASK", SDT_ZReplicate>; -+def z_rotate_mask : SDNode<"SystemZISD::ROTATE_MASK", SDT_ZRotateMask>; -+def z_replicate : SDNode<"SystemZISD::REPLICATE", SDT_ZReplicate>; -+def z_join_dwords : SDNode<"SystemZISD::JOIN_DWORDS", SDT_ZJoinDwords>; -+def z_splat : SDNode<"SystemZISD::SPLAT", SDT_ZVecBinaryInt>; -+def z_merge_high : SDNode<"SystemZISD::MERGE_HIGH", SDT_ZVecBinary>; -+def z_merge_low : SDNode<"SystemZISD::MERGE_LOW", SDT_ZVecBinary>; -+def z_shl_double : SDNode<"SystemZISD::SHL_DOUBLE", SDT_ZVecTernaryInt>; -+def z_permute_dwords : SDNode<"SystemZISD::PERMUTE_DWORDS", -+ SDT_ZVecTernaryInt>; -+def z_permute : SDNode<"SystemZISD::PERMUTE", SDT_ZVecTernary>; -+def z_pack : SDNode<"SystemZISD::PACK", SDT_ZVecBinaryConv>; -+def z_packs_cc : SDNode<"SystemZISD::PACKS_CC", SDT_ZVecBinaryConv, -+ [SDNPOutGlue]>; -+def z_packls_cc : SDNode<"SystemZISD::PACKLS_CC", SDT_ZVecBinaryConv, -+ [SDNPOutGlue]>; -+def z_unpack_high : SDNode<"SystemZISD::UNPACK_HIGH", SDT_ZVecUnaryConv>; -+def z_unpackl_high : SDNode<"SystemZISD::UNPACKL_HIGH", SDT_ZVecUnaryConv>; -+def z_unpack_low : SDNode<"SystemZISD::UNPACK_LOW", SDT_ZVecUnaryConv>; -+def z_unpackl_low : SDNode<"SystemZISD::UNPACKL_LOW", SDT_ZVecUnaryConv>; -+def z_vshl_by_scalar : SDNode<"SystemZISD::VSHL_BY_SCALAR", -+ SDT_ZVecBinaryInt>; -+def z_vsrl_by_scalar : SDNode<"SystemZISD::VSRL_BY_SCALAR", -+ SDT_ZVecBinaryInt>; -+def z_vsra_by_scalar : SDNode<"SystemZISD::VSRA_BY_SCALAR", -+ SDT_ZVecBinaryInt>; -+def z_vsum : SDNode<"SystemZISD::VSUM", SDT_ZVecBinaryConv>; -+def z_vicmpe : SDNode<"SystemZISD::VICMPE", SDT_ZVecBinary>; -+def z_vicmph : SDNode<"SystemZISD::VICMPH", SDT_ZVecBinary>; -+def z_vicmphl : SDNode<"SystemZISD::VICMPHL", SDT_ZVecBinary>; -+def z_vicmpes : SDNode<"SystemZISD::VICMPES", SDT_ZVecBinary, -+ [SDNPOutGlue]>; -+def z_vicmphs : SDNode<"SystemZISD::VICMPHS", SDT_ZVecBinary, -+ [SDNPOutGlue]>; -+def z_vicmphls : SDNode<"SystemZISD::VICMPHLS", SDT_ZVecBinary, -+ [SDNPOutGlue]>; -+def z_vfcmpe : SDNode<"SystemZISD::VFCMPE", SDT_ZVecBinaryConv>; -+def z_vfcmph : SDNode<"SystemZISD::VFCMPH", SDT_ZVecBinaryConv>; -+def z_vfcmphe : SDNode<"SystemZISD::VFCMPHE", SDT_ZVecBinaryConv>; -+def z_vfcmpes : SDNode<"SystemZISD::VFCMPES", SDT_ZVecBinaryConv, -+ [SDNPOutGlue]>; -+def z_vfcmphs : SDNode<"SystemZISD::VFCMPHS", SDT_ZVecBinaryConv, -+ [SDNPOutGlue]>; -+def z_vfcmphes : SDNode<"SystemZISD::VFCMPHES", SDT_ZVecBinaryConv, -+ [SDNPOutGlue]>; -+def z_vextend : SDNode<"SystemZISD::VEXTEND", SDT_ZVecUnaryConv>; -+def z_vround : SDNode<"SystemZISD::VROUND", SDT_ZVecUnaryConv>; -+def z_vtm : SDNode<"SystemZISD::VTM", SDT_ZCmp, [SDNPOutGlue]>; -+def z_vfae_cc : SDNode<"SystemZISD::VFAE_CC", SDT_ZVecTernaryInt, -+ [SDNPOutGlue]>; -+def z_vfaez_cc : SDNode<"SystemZISD::VFAEZ_CC", SDT_ZVecTernaryInt, -+ [SDNPOutGlue]>; -+def z_vfee_cc : SDNode<"SystemZISD::VFEE_CC", SDT_ZVecBinary, -+ [SDNPOutGlue]>; -+def z_vfeez_cc : SDNode<"SystemZISD::VFEEZ_CC", SDT_ZVecBinary, -+ [SDNPOutGlue]>; -+def z_vfene_cc : SDNode<"SystemZISD::VFENE_CC", SDT_ZVecBinary, -+ [SDNPOutGlue]>; -+def z_vfenez_cc : SDNode<"SystemZISD::VFENEZ_CC", SDT_ZVecBinary, -+ [SDNPOutGlue]>; -+def z_vistr_cc : SDNode<"SystemZISD::VISTR_CC", SDT_ZVecUnary, -+ [SDNPOutGlue]>; -+def z_vstrc_cc : SDNode<"SystemZISD::VSTRC_CC", SDT_ZVecQuaternaryInt, -+ [SDNPOutGlue]>; -+def z_vstrcz_cc : SDNode<"SystemZISD::VSTRCZ_CC", -+ SDT_ZVecQuaternaryInt, [SDNPOutGlue]>; -+def z_vftci : SDNode<"SystemZISD::VFTCI", SDT_ZVecBinaryConvInt, -+ [SDNPOutGlue]>; -+ - class AtomicWOp - : SDNode<"SystemZISD::"##name, profile, - [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; -@@ -172,6 +312,19 @@ def z_prefetch : SDNode<"System - [SDNPHasChain, SDNPMayLoad, SDNPMayStore, - SDNPMemOperand]>; - -+def z_tbegin : SDNode<"SystemZISD::TBEGIN", SDT_ZTBegin, -+ [SDNPHasChain, SDNPOutGlue, SDNPMayStore, -+ SDNPSideEffect]>; -+def z_tbegin_nofloat : SDNode<"SystemZISD::TBEGIN_NOFLOAT", SDT_ZTBegin, -+ [SDNPHasChain, SDNPOutGlue, SDNPMayStore, -+ SDNPSideEffect]>; -+def z_tend : SDNode<"SystemZISD::TEND", SDTNone, -+ [SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>; -+ -+def z_vshl : SDNode<"ISD::SHL", SDT_ZVecBinary>; -+def z_vsra : SDNode<"ISD::SRA", SDT_ZVecBinary>; -+def z_vsrl : SDNode<"ISD::SRL", SDT_ZVecBinary>; -+ - //===----------------------------------------------------------------------===// - // Pattern fragments - //===----------------------------------------------------------------------===// -@@ -195,11 +348,21 @@ def sext8 : PatFrag<(ops node:$src), (s - def sext16 : PatFrag<(ops node:$src), (sext_inreg node:$src, i16)>; - def sext32 : PatFrag<(ops node:$src), (sext (i32 node:$src))>; - -+// Match extensions of an i32 to an i64, followed by an in-register sign -+// extension from a sub-i32 value. -+def sext8dbl : PatFrag<(ops node:$src), (sext8 (anyext node:$src))>; -+def sext16dbl : PatFrag<(ops node:$src), (sext16 (anyext node:$src))>; -+ - // Register zero-extend operations. Sub-32-bit values are represented as i32s. - def zext8 : PatFrag<(ops node:$src), (and node:$src, 0xff)>; - def zext16 : PatFrag<(ops node:$src), (and node:$src, 0xffff)>; - def zext32 : PatFrag<(ops node:$src), (zext (i32 node:$src))>; - -+// Match extensions of an i32 to an i64, followed by an AND of the low -+// i8 or i16 part. -+def zext8dbl : PatFrag<(ops node:$src), (zext8 (anyext node:$src))>; -+def zext16dbl : PatFrag<(ops node:$src), (zext16 (anyext node:$src))>; -+ - // Typed floating-point loads. - def loadf32 : PatFrag<(ops node:$src), (f32 (load node:$src))>; - def loadf64 : PatFrag<(ops node:$src), (f64 (load node:$src))>; -@@ -363,6 +526,14 @@ def z_iabs64 : PatFrag<(ops node:$src), - def z_inegabs32 : PatFrag<(ops node:$src), (ineg (z_iabs32 node:$src))>; - def z_inegabs64 : PatFrag<(ops node:$src), (ineg (z_iabs64 node:$src))>; - -+// Integer multiply-and-add -+def z_muladd : PatFrag<(ops node:$src1, node:$src2, node:$src3), -+ (add (mul node:$src1, node:$src2), node:$src3)>; -+ -+// Fused multiply-subtract, using the natural operand order. -+def fms : PatFrag<(ops node:$src1, node:$src2, node:$src3), -+ (fma node:$src1, node:$src2, (fneg node:$src3))>; -+ - // Fused multiply-add and multiply-subtract, but with the order of the - // operands matching SystemZ's MA and MS instructions. - def z_fma : PatFrag<(ops node:$src1, node:$src2, node:$src3), -@@ -383,3 +554,110 @@ class loadu - : PatFrag<(ops node:$value, node:$addr), - (store (operator node:$value), node:$addr)>; -+ -+// Vector representation of all-zeros and all-ones. -+def z_vzero : PatFrag<(ops), (bitconvert (v16i8 (z_byte_mask (i32 0))))>; -+def z_vones : PatFrag<(ops), (bitconvert (v16i8 (z_byte_mask (i32 65535))))>; -+ -+// Load a scalar and replicate it in all elements of a vector. -+class z_replicate_load -+ : PatFrag<(ops node:$addr), -+ (z_replicate (scalartype (load node:$addr)))>; -+def z_replicate_loadi8 : z_replicate_load; -+def z_replicate_loadi16 : z_replicate_load; -+def z_replicate_loadi32 : z_replicate_load; -+def z_replicate_loadi64 : z_replicate_load; -+def z_replicate_loadf32 : z_replicate_load; -+def z_replicate_loadf64 : z_replicate_load; -+ -+// Load a scalar and insert it into a single element of a vector. -+class z_vle -+ : PatFrag<(ops node:$vec, node:$addr, node:$index), -+ (z_vector_insert node:$vec, (scalartype (load node:$addr)), -+ node:$index)>; -+def z_vlei8 : z_vle; -+def z_vlei16 : z_vle; -+def z_vlei32 : z_vle; -+def z_vlei64 : z_vle; -+def z_vlef32 : z_vle; -+def z_vlef64 : z_vle; -+ -+// Load a scalar and insert it into the low element of the high i64 of a -+// zeroed vector. -+class z_vllez -+ : PatFrag<(ops node:$addr), -+ (z_vector_insert (z_vzero), -+ (scalartype (load node:$addr)), (i32 index))>; -+def z_vllezi8 : z_vllez; -+def z_vllezi16 : z_vllez; -+def z_vllezi32 : z_vllez; -+def z_vllezi64 : PatFrag<(ops node:$addr), -+ (z_join_dwords (i64 (load node:$addr)), (i64 0))>; -+// We use high merges to form a v4f32 from four f32s. Propagating zero -+// into all elements but index 1 gives this expression. -+def z_vllezf32 : PatFrag<(ops node:$addr), -+ (bitconvert -+ (z_merge_high -+ (v2i64 -+ (z_unpackl_high -+ (v4i32 -+ (bitconvert -+ (v4f32 (scalar_to_vector -+ (f32 (load node:$addr)))))))), -+ (v2i64 (z_vzero))))>; -+def z_vllezf64 : PatFrag<(ops node:$addr), -+ (z_merge_high -+ (scalar_to_vector (f64 (load node:$addr))), -+ (z_vzero))>; -+ -+// Store one element of a vector. -+class z_vste -+ : PatFrag<(ops node:$vec, node:$addr, node:$index), -+ (store (scalartype (z_vector_extract node:$vec, node:$index)), -+ node:$addr)>; -+def z_vstei8 : z_vste; -+def z_vstei16 : z_vste; -+def z_vstei32 : z_vste; -+def z_vstei64 : z_vste; -+def z_vstef32 : z_vste; -+def z_vstef64 : z_vste; -+ -+// Arithmetic negation on vectors. -+def z_vneg : PatFrag<(ops node:$x), (sub (z_vzero), node:$x)>; -+ -+// Bitwise negation on vectors. -+def z_vnot : PatFrag<(ops node:$x), (xor node:$x, (z_vones))>; -+ -+// Signed "integer greater than zero" on vectors. -+def z_vicmph_zero : PatFrag<(ops node:$x), (z_vicmph node:$x, (z_vzero))>; -+ -+// Signed "integer less than zero" on vectors. -+def z_vicmpl_zero : PatFrag<(ops node:$x), (z_vicmph (z_vzero), node:$x)>; -+ -+// Integer absolute on vectors. -+class z_viabs -+ : PatFrag<(ops node:$src), -+ (xor (add node:$src, (z_vsra_by_scalar node:$src, (i32 shift))), -+ (z_vsra_by_scalar node:$src, (i32 shift)))>; -+def z_viabs8 : z_viabs<7>; -+def z_viabs16 : z_viabs<15>; -+def z_viabs32 : z_viabs<31>; -+def z_viabs64 : z_viabs<63>; -+ -+// Sign-extend the i64 elements of a vector. -+class z_vse -+ : PatFrag<(ops node:$src), -+ (z_vsra_by_scalar (z_vshl_by_scalar node:$src, shift), shift)>; -+def z_vsei8 : z_vse<56>; -+def z_vsei16 : z_vse<48>; -+def z_vsei32 : z_vse<32>; -+ -+// ...and again with the extensions being done on individual i64 scalars. -+class z_vse_by_parts -+ : PatFrag<(ops node:$src), -+ (z_join_dwords -+ (operator (z_vector_extract node:$src, index1)), -+ (operator (z_vector_extract node:$src, index2)))>; -+def z_vsei8_by_parts : z_vse_by_parts; -+def z_vsei16_by_parts : z_vse_by_parts; -+def z_vsei32_by_parts : z_vse_by_parts; -Index: llvm-36/lib/Target/SystemZ/SystemZPatterns.td -=================================================================== ---- llvm-36.orig/lib/Target/SystemZ/SystemZPatterns.td -+++ llvm-36/lib/Target/SystemZ/SystemZPatterns.td -@@ -153,3 +153,17 @@ multiclass CompareZeroFP; - } -+ -+// Use INSN for performing binary operation OPERATION of type VT -+// on registers of class CLS. -+class BinaryRRWithType -+ : Pat<(vt (operator cls:$x, cls:$y)), (insn cls:$x, cls:$y)>; -+ -+// Use INSN to perform conversion operation OPERATOR, with the input being -+// TR2 and the output being TR1. SUPPRESS is 4 to suppress inexact conditions -+// and 0 to allow them. MODE is the rounding mode to use. -+class FPConversion suppress, bits<4> mode> -+ : Pat<(tr1.vt (operator (tr2.vt tr2.op:$vec))), -+ (insn tr2.op:$vec, suppress, mode)>; -Index: llvm-36/lib/Target/SystemZ/SystemZProcessors.td -=================================================================== ---- llvm-36.orig/lib/Target/SystemZ/SystemZProcessors.td -+++ llvm-36/lib/Target/SystemZ/SystemZProcessors.td -@@ -39,6 +39,11 @@ def FeatureFPExtension : SystemZFeature< - "Assume that the floating-point extension facility is installed" - >; - -+def FeaturePopulationCount : SystemZFeature< -+ "population-count", "PopulationCount", -+ "Assume that the population-count facility is installed" -+>; -+ - def FeatureFastSerialization : SystemZFeature< - "fast-serialization", "FastSerialization", - "Assume that the fast-serialization facility is installed" -@@ -50,13 +55,42 @@ def FeatureInterlockedAccess1 : SystemZF - >; - def FeatureNoInterlockedAccess1 : SystemZMissingFeature<"InterlockedAccess1">; - -+def FeatureMiscellaneousExtensions : SystemZFeature< -+ "miscellaneous-extensions", "MiscellaneousExtensions", -+ "Assume that the miscellaneous-extensions facility is installed" -+>; -+ -+def FeatureTransactionalExecution : SystemZFeature< -+ "transactional-execution", "TransactionalExecution", -+ "Assume that the transactional-execution facility is installed" -+>; -+ -+def FeatureProcessorAssist : SystemZFeature< -+ "processor-assist", "ProcessorAssist", -+ "Assume that the processor-assist facility is installed" -+>; -+ -+def FeatureVector : SystemZFeature< -+ "vector", "Vector", -+ "Assume that the vectory facility is installed" -+>; -+def FeatureNoVector : SystemZMissingFeature<"Vector">; -+ - def : Processor<"generic", NoItineraries, []>; - def : Processor<"z10", NoItineraries, []>; - def : Processor<"z196", NoItineraries, - [FeatureDistinctOps, FeatureLoadStoreOnCond, FeatureHighWord, -- FeatureFPExtension, FeatureFastSerialization, -- FeatureInterlockedAccess1]>; -+ FeatureFPExtension, FeaturePopulationCount, -+ FeatureFastSerialization, FeatureInterlockedAccess1]>; - def : Processor<"zEC12", NoItineraries, - [FeatureDistinctOps, FeatureLoadStoreOnCond, FeatureHighWord, -- FeatureFPExtension, FeatureFastSerialization, -- FeatureInterlockedAccess1]>; -+ FeatureFPExtension, FeaturePopulationCount, -+ FeatureFastSerialization, FeatureInterlockedAccess1, -+ FeatureMiscellaneousExtensions, -+ FeatureTransactionalExecution, FeatureProcessorAssist]>; -+def : Processor<"z13", NoItineraries, -+ [FeatureDistinctOps, FeatureLoadStoreOnCond, FeatureHighWord, -+ FeatureFPExtension, FeaturePopulationCount, -+ FeatureFastSerialization, FeatureInterlockedAccess1, -+ FeatureTransactionalExecution, FeatureProcessorAssist, -+ FeatureVector]>; -Index: llvm-36/lib/Target/SystemZ/SystemZRegisterInfo.td -=================================================================== ---- llvm-36.orig/lib/Target/SystemZ/SystemZRegisterInfo.td -+++ llvm-36/lib/Target/SystemZ/SystemZRegisterInfo.td -@@ -25,20 +25,24 @@ def subreg_l32 : SubRegIndex<32, 0>; - def subreg_h32 : SubRegIndex<32, 32>; // Also acts as subreg_lh32. - def subreg_l64 : SubRegIndex<64, 0>; - def subreg_h64 : SubRegIndex<64, 64>; -+def subreg_r32 : SubRegIndex<32, 32>; // Reinterpret a wider reg as 32 bits. -+def subreg_r64 : SubRegIndex<64, 64>; // Reinterpret a wider reg as 64 bits. - def subreg_hh32 : ComposedSubRegIndex; - def subreg_hl32 : ComposedSubRegIndex; -+def subreg_hr32 : ComposedSubRegIndex; - } - --// Define a register class that contains values of type TYPE and an -+// Define a register class that contains values of types TYPES and an - // associated operand called NAME. SIZE is the size and alignment - // of the registers and REGLIST is the list of individual registers. --multiclass SystemZRegClass { -+multiclass SystemZRegClass types, int size, -+ dag regList> { - def AsmOperand : AsmOperandClass { - let Name = name; - let ParserMethod = "parse"##name; - let RenderMethod = "addRegOperands"; - } -- def Bit : RegisterClass<"SystemZ", [type], size, regList> { -+ def Bit : RegisterClass<"SystemZ", types, size, regList> { - let Size = size; - } - def "" : RegisterOperand(name##"Bit")> { -@@ -84,16 +88,19 @@ foreach I = [0, 2, 4, 6, 8, 10, 12, 14] - - /// Allocate the callee-saved R6-R13 backwards. That way they can be saved - /// together with R14 and R15 in one prolog instruction. --defm GR32 : SystemZRegClass<"GR32", i32, 32, (add (sequence "R%uL", 0, 5), -- (sequence "R%uL", 15, 6))>; --defm GRH32 : SystemZRegClass<"GRH32", i32, 32, (add (sequence "R%uH", 0, 5), -- (sequence "R%uH", 15, 6))>; --defm GR64 : SystemZRegClass<"GR64", i64, 64, (add (sequence "R%uD", 0, 5), -- (sequence "R%uD", 15, 6))>; -+defm GR32 : SystemZRegClass<"GR32", [i32], 32, -+ (add (sequence "R%uL", 0, 5), -+ (sequence "R%uL", 15, 6))>; -+defm GRH32 : SystemZRegClass<"GRH32", [i32], 32, -+ (add (sequence "R%uH", 0, 5), -+ (sequence "R%uH", 15, 6))>; -+defm GR64 : SystemZRegClass<"GR64", [i64], 64, -+ (add (sequence "R%uD", 0, 5), -+ (sequence "R%uD", 15, 6))>; - - // Combine the low and high GR32s into a single class. This can only be - // used for virtual registers if the high-word facility is available. --defm GRX32 : SystemZRegClass<"GRX32", i32, 32, -+defm GRX32 : SystemZRegClass<"GRX32", [i32], 32, - (add (sequence "R%uL", 0, 5), - (sequence "R%uH", 0, 5), - R15L, R15H, R14L, R14H, R13L, R13H, -@@ -102,18 +109,17 @@ defm GRX32 : SystemZRegClass<"GRX32", i3 - - // The architecture doesn't really have any i128 support, so model the - // register pairs as untyped instead. --defm GR128 : SystemZRegClass<"GR128", untyped, 128, (add R0Q, R2Q, R4Q, -- R12Q, R10Q, R8Q, R6Q, -- R14Q)>; -+defm GR128 : SystemZRegClass<"GR128", [untyped], 128, -+ (add R0Q, R2Q, R4Q, R12Q, R10Q, R8Q, R6Q, R14Q)>; - - // Base and index registers. Everything except R0, which in an address - // context evaluates as 0. --defm ADDR32 : SystemZRegClass<"ADDR32", i32, 32, (sub GR32Bit, R0L)>; --defm ADDR64 : SystemZRegClass<"ADDR64", i64, 64, (sub GR64Bit, R0D)>; -+defm ADDR32 : SystemZRegClass<"ADDR32", [i32], 32, (sub GR32Bit, R0L)>; -+defm ADDR64 : SystemZRegClass<"ADDR64", [i64], 64, (sub GR64Bit, R0D)>; - - // Not used directly, but needs to exist for ADDR32 and ADDR64 subregs - // of a GR128. --defm ADDR128 : SystemZRegClass<"ADDR128", untyped, 128, (sub GR128Bit, R0Q)>; -+defm ADDR128 : SystemZRegClass<"ADDR128", [untyped], 128, (sub GR128Bit, R0Q)>; - - //===----------------------------------------------------------------------===// - // Floating-point registers -@@ -142,16 +148,36 @@ def F11Dwarf : DwarfMapping<29>; - def F13Dwarf : DwarfMapping<30>; - def F15Dwarf : DwarfMapping<31>; - --// Lower 32 bits of one of the 16 64-bit floating-point registers -+def F16Dwarf : DwarfMapping<68>; -+def F18Dwarf : DwarfMapping<69>; -+def F20Dwarf : DwarfMapping<70>; -+def F22Dwarf : DwarfMapping<71>; -+ -+def F17Dwarf : DwarfMapping<72>; -+def F19Dwarf : DwarfMapping<73>; -+def F21Dwarf : DwarfMapping<74>; -+def F23Dwarf : DwarfMapping<75>; -+ -+def F24Dwarf : DwarfMapping<76>; -+def F26Dwarf : DwarfMapping<77>; -+def F28Dwarf : DwarfMapping<78>; -+def F30Dwarf : DwarfMapping<79>; -+ -+def F25Dwarf : DwarfMapping<80>; -+def F27Dwarf : DwarfMapping<81>; -+def F29Dwarf : DwarfMapping<82>; -+def F31Dwarf : DwarfMapping<83>; -+ -+// Upper 32 bits of one of the floating-point registers - class FPR32 num, string n> : SystemZReg { - let HWEncoding = num; - } - --// One of the 16 64-bit floating-point registers --class FPR64 num, string n, FPR32 low> -- : SystemZRegWithSubregs { -+// One of the floating-point registers. -+class FPR64 num, string n, FPR32 high> -+ : SystemZRegWithSubregs { - let HWEncoding = num; -- let SubRegIndices = [subreg_h32]; -+ let SubRegIndices = [subreg_r32]; - } - - // 8 pairs of FPR64s, with a one-register gap inbetween. -@@ -161,12 +187,17 @@ class FPR128 num, string n, FPR - let SubRegIndices = [subreg_l64, subreg_h64]; - } - --// Floating-point registers -+// Floating-point registers. Registers 16-31 require the vector facility. - foreach I = 0-15 in { - def F#I#S : FPR32; - def F#I#D : FPR64("F"#I#"S")>, - DwarfRegNum<[!cast("F"#I#"Dwarf").Id]>; - } -+foreach I = 16-31 in { -+ def F#I#S : FPR32; -+ def F#I#D : FPR64("F"#I#"S")>, -+ DwarfRegNum<[!cast("F"#I#"Dwarf").Id]>; -+} - - foreach I = [0, 1, 4, 5, 8, 9, 12, 13] in { - def F#I#Q : FPR128("F"#!add(I, 2)#"D"), -@@ -175,10 +206,74 @@ foreach I = [0, 1, 4, 5, 8, 9, 12, 13] i - - // There's no store-multiple instruction for FPRs, so we're not fussy - // about the order in which call-saved registers are allocated. --defm FP32 : SystemZRegClass<"FP32", f32, 32, (sequence "F%uS", 0, 15)>; --defm FP64 : SystemZRegClass<"FP64", f64, 64, (sequence "F%uD", 0, 15)>; --defm FP128 : SystemZRegClass<"FP128", f128, 128, (add F0Q, F1Q, F4Q, F5Q, -- F8Q, F9Q, F12Q, F13Q)>; -+defm FP32 : SystemZRegClass<"FP32", [f32], 32, (sequence "F%uS", 0, 15)>; -+defm FP64 : SystemZRegClass<"FP64", [f64], 64, (sequence "F%uD", 0, 15)>; -+defm FP128 : SystemZRegClass<"FP128", [f128], 128, -+ (add F0Q, F1Q, F4Q, F5Q, F8Q, F9Q, F12Q, F13Q)>; -+ -+//===----------------------------------------------------------------------===// -+// Vector registers -+//===----------------------------------------------------------------------===// -+ -+// A full 128-bit vector register, with an FPR64 as its high part. -+class VR128 num, string n, FPR64 high> -+ : SystemZRegWithSubregs { -+ let HWEncoding = num; -+ let SubRegIndices = [subreg_r64]; -+} -+ -+// Full vector registers. -+foreach I = 0-31 in { -+ def V#I : VR128("F"#I#"D")>, -+ DwarfRegNum<[!cast("F"#I#"Dwarf").Id]>; -+} -+ -+// Class used to store 32-bit values in the first element of a vector -+// register. f32 scalars are used for the WLEDB and WLDEB instructions. -+defm VR32 : SystemZRegClass<"VR32", [f32, v4i8, v2i16], 32, -+ (add (sequence "F%uS", 0, 7), -+ (sequence "F%uS", 16, 31), -+ (sequence "F%uS", 8, 15))>; -+ -+// Class used to store 64-bit values in the upper half of a vector register. -+// The vector facility also includes scalar f64 instructions that operate -+// on the full vector register set. -+defm VR64 : SystemZRegClass<"VR64", [f64, v8i8, v4i16, v2i32, v2f32], 64, -+ (add (sequence "F%uD", 0, 7), -+ (sequence "F%uD", 16, 31), -+ (sequence "F%uD", 8, 15))>; -+ -+// The subset of vector registers that can be used for floating-point -+// operations too. -+defm VF128 : SystemZRegClass<"VF128", -+ [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], 128, -+ (sequence "V%u", 0, 15)>; -+ -+// All vector registers. -+defm VR128 : SystemZRegClass<"VR128", -+ [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], 128, -+ (add (sequence "V%u", 0, 7), -+ (sequence "V%u", 16, 31), -+ (sequence "V%u", 8, 15))>; -+ -+// Attaches a ValueType to a register operand, to make the instruction -+// definitions easier. -+class TypedReg { -+ ValueType vt = vtin; -+ RegisterOperand op = opin; -+} -+ -+def v32eb : TypedReg; -+def v64g : TypedReg; -+def v64db : TypedReg; -+def v128b : TypedReg; -+def v128h : TypedReg; -+def v128f : TypedReg; -+def v128g : TypedReg; -+def v128q : TypedReg; -+def v128eb : TypedReg; -+def v128db : TypedReg; -+def v128any : TypedReg; - - //===----------------------------------------------------------------------===// - // Other registers -Index: llvm-36/lib/Target/SystemZ/SystemZShortenInst.cpp -=================================================================== ---- llvm-36.orig/lib/Target/SystemZ/SystemZShortenInst.cpp -+++ llvm-36/lib/Target/SystemZ/SystemZShortenInst.cpp -@@ -15,6 +15,7 @@ - - #include "SystemZTargetMachine.h" - #include "llvm/CodeGen/MachineFunctionPass.h" -+#include "llvm/CodeGen/MachineInstrBuilder.h" - - using namespace llvm; - -@@ -36,6 +37,10 @@ public: - private: - bool shortenIIF(MachineInstr &MI, unsigned *GPRMap, unsigned LiveOther, - unsigned LLIxL, unsigned LLIxH); -+ bool shortenOn0(MachineInstr &MI, unsigned Opcode); -+ bool shortenOn01(MachineInstr &MI, unsigned Opcode); -+ bool shortenOn001(MachineInstr &MI, unsigned Opcode); -+ bool shortenFPConv(MachineInstr &MI, unsigned Opcode); - - const SystemZInstrInfo *TII; - -@@ -97,6 +102,64 @@ bool SystemZShortenInst::shortenIIF(Mach - return false; - } - -+// Change MI's opcode to Opcode if register operand 0 has a 4-bit encoding. -+bool SystemZShortenInst::shortenOn0(MachineInstr &MI, unsigned Opcode) { -+ if (SystemZMC::getFirstReg(MI.getOperand(0).getReg()) < 16) { -+ MI.setDesc(TII->get(Opcode)); -+ return true; -+ } -+ return false; -+} -+ -+// Change MI's opcode to Opcode if register operands 0 and 1 have a -+// 4-bit encoding. -+bool SystemZShortenInst::shortenOn01(MachineInstr &MI, unsigned Opcode) { -+ if (SystemZMC::getFirstReg(MI.getOperand(0).getReg()) < 16 && -+ SystemZMC::getFirstReg(MI.getOperand(1).getReg()) < 16) { -+ MI.setDesc(TII->get(Opcode)); -+ return true; -+ } -+ return false; -+} -+ -+// Change MI's opcode to Opcode if register operands 0, 1 and 2 have a -+// 4-bit encoding and if operands 0 and 1 are tied. -+bool SystemZShortenInst::shortenOn001(MachineInstr &MI, unsigned Opcode) { -+ if (SystemZMC::getFirstReg(MI.getOperand(0).getReg()) < 16 && -+ MI.getOperand(1).getReg() == MI.getOperand(0).getReg() && -+ SystemZMC::getFirstReg(MI.getOperand(2).getReg()) < 16) { -+ MI.setDesc(TII->get(Opcode)); -+ return true; -+ } -+ return false; -+} -+ -+// MI is a vector-style conversion instruction with the operand order: -+// destination, source, exact-suppress, rounding-mode. If both registers -+// have a 4-bit encoding then change it to Opcode, which has operand order: -+// destination, rouding-mode, source, exact-suppress. -+bool SystemZShortenInst::shortenFPConv(MachineInstr &MI, unsigned Opcode) { -+ if (SystemZMC::getFirstReg(MI.getOperand(0).getReg()) < 16 && -+ SystemZMC::getFirstReg(MI.getOperand(1).getReg()) < 16) { -+ MachineOperand Dest(MI.getOperand(0)); -+ MachineOperand Src(MI.getOperand(1)); -+ MachineOperand Suppress(MI.getOperand(2)); -+ MachineOperand Mode(MI.getOperand(3)); -+ MI.RemoveOperand(3); -+ MI.RemoveOperand(2); -+ MI.RemoveOperand(1); -+ MI.RemoveOperand(0); -+ MI.setDesc(TII->get(Opcode)); -+ MachineInstrBuilder(*MI.getParent()->getParent(), &MI) -+ .addOperand(Dest) -+ .addOperand(Mode) -+ .addOperand(Src) -+ .addOperand(Suppress); -+ return true; -+ } -+ return false; -+} -+ - // Process all instructions in MBB. Return true if something changed. - bool SystemZShortenInst::processBlock(MachineBasicBlock &MBB) { - bool Changed = false; -@@ -117,13 +180,83 @@ bool SystemZShortenInst::processBlock(Ma - // Iterate backwards through the block looking for instructions to change. - for (auto MBBI = MBB.rbegin(), MBBE = MBB.rend(); MBBI != MBBE; ++MBBI) { - MachineInstr &MI = *MBBI; -- unsigned Opcode = MI.getOpcode(); -- if (Opcode == SystemZ::IILF) -+ switch (MI.getOpcode()) { -+ case SystemZ::IILF: - Changed |= shortenIIF(MI, LowGPRs, LiveHigh, SystemZ::LLILL, - SystemZ::LLILH); -- else if (Opcode == SystemZ::IIHF) -+ break; -+ -+ case SystemZ::IIHF: - Changed |= shortenIIF(MI, HighGPRs, LiveLow, SystemZ::LLIHL, - SystemZ::LLIHH); -+ break; -+ -+ case SystemZ::WFADB: -+ Changed |= shortenOn001(MI, SystemZ::ADBR); -+ break; -+ -+ case SystemZ::WFDDB: -+ Changed |= shortenOn001(MI, SystemZ::DDBR); -+ break; -+ -+ case SystemZ::WFIDB: -+ Changed |= shortenFPConv(MI, SystemZ::FIDBRA); -+ break; -+ -+ case SystemZ::WLDEB: -+ Changed |= shortenOn01(MI, SystemZ::LDEBR); -+ break; -+ -+ case SystemZ::WLEDB: -+ Changed |= shortenFPConv(MI, SystemZ::LEDBRA); -+ break; -+ -+ case SystemZ::WFMDB: -+ Changed |= shortenOn001(MI, SystemZ::MDBR); -+ break; -+ -+ case SystemZ::WFLCDB: -+ Changed |= shortenOn01(MI, SystemZ::LCDBR); -+ break; -+ -+ case SystemZ::WFLNDB: -+ Changed |= shortenOn01(MI, SystemZ::LNDBR); -+ break; -+ -+ case SystemZ::WFLPDB: -+ Changed |= shortenOn01(MI, SystemZ::LPDBR); -+ break; -+ -+ case SystemZ::WFSQDB: -+ Changed |= shortenOn01(MI, SystemZ::SQDBR); -+ break; -+ -+ case SystemZ::WFSDB: -+ Changed |= shortenOn001(MI, SystemZ::SDBR); -+ break; -+ -+ case SystemZ::WFCDB: -+ Changed |= shortenOn01(MI, SystemZ::CDBR); -+ break; -+ -+ case SystemZ::VL32: -+ // For z13 we prefer LDE over LE to avoid partial register dependencies. -+ Changed |= shortenOn0(MI, SystemZ::LDE32); -+ break; -+ -+ case SystemZ::VST32: -+ Changed |= shortenOn0(MI, SystemZ::STE); -+ break; -+ -+ case SystemZ::VL64: -+ Changed |= shortenOn0(MI, SystemZ::LD); -+ break; -+ -+ case SystemZ::VST64: -+ Changed |= shortenOn0(MI, SystemZ::STD); -+ break; -+ } -+ - unsigned UsedLow = 0; - unsigned UsedHigh = 0; - for (auto MOI = MI.operands_begin(), MOE = MI.operands_end(); -Index: llvm-36/lib/Target/SystemZ/SystemZSubtarget.cpp -=================================================================== ---- llvm-36.orig/lib/Target/SystemZ/SystemZSubtarget.cpp -+++ llvm-36/lib/Target/SystemZ/SystemZSubtarget.cpp -@@ -10,7 +10,6 @@ - #include "SystemZSubtarget.h" - #include "MCTargetDesc/SystemZMCTargetDesc.h" - #include "llvm/IR/GlobalValue.h" --#include "llvm/Support/Host.h" - - using namespace llvm; - -@@ -23,15 +22,69 @@ using namespace llvm; - // Pin the vtable to this file. - void SystemZSubtarget::anchor() {} - -+// Determine whether we use the vector ABI. -+static bool UsesVectorABI(StringRef CPU, StringRef FS) { -+ // We use the vector ABI whenever the vector facility is avaiable. -+ // This is the case by default if CPU is z13 or later, and can be -+ // overridden via "[+-]vector" feature string elements. -+ bool VectorABI = true; -+ if (CPU.empty() || CPU == "generic" || -+ CPU == "z10" || CPU == "z196" || CPU == "zEC12") -+ VectorABI = false; -+ -+ SmallVector Features; -+ FS.split(Features, ",", -1, false /* KeepEmpty */); -+ for (auto &Feature : Features) { -+ if (Feature == "vector" || Feature == "+vector") -+ VectorABI = true; -+ if (Feature == "-vector") -+ VectorABI = false; -+ } -+ -+ return VectorABI; -+} -+ -+static std::string computeDataLayout(StringRef TT, StringRef CPU, -+ StringRef FS) { -+ const Triple Triple(TT); -+ bool VectorABI = UsesVectorABI(CPU, FS); -+ std::string Ret = ""; -+ -+ // Big endian. -+ Ret += "E"; -+ -+ // Data mangling. -+ Ret += DataLayout::getManglingComponent(Triple); -+ -+ // Make sure that global data has at least 16 bits of alignment by -+ // default, so that we can refer to it using LARL. We don't have any -+ // special requirements for stack variables though. -+ Ret += "-i1:8:16-i8:8:16"; -+ -+ // 64-bit integers are naturally aligned. -+ Ret += "-i64:64"; -+ -+ // 128-bit floats are aligned only to 64 bits. -+ Ret += "-f128:64"; -+ -+ // When using the vector ABI, 128-bit vectors are also aligned to 64 bits. -+ if (VectorABI) -+ Ret += "-v128:64"; -+ -+ // We prefer 16 bits of aligned for all globals; see above. -+ Ret += "-a:8:16"; -+ -+ // Integer registers are 32 or 64 bits. -+ Ret += "-n32:64"; -+ -+ return Ret; -+} -+ - SystemZSubtarget & - SystemZSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) { - std::string CPUName = CPU; - if (CPUName.empty()) - CPUName = "generic"; --#if defined(__linux__) && defined(__s390x__) -- if (CPUName == "generic") -- CPUName = sys::getHostCPUName(); --#endif - // Parse features string. - ParseSubtargetFeatures(CPUName, FS); - return *this; -@@ -43,12 +96,12 @@ SystemZSubtarget::SystemZSubtarget(const - const TargetMachine &TM) - : SystemZGenSubtargetInfo(TT, CPU, FS), HasDistinctOps(false), - HasLoadStoreOnCond(false), HasHighWord(false), HasFPExtension(false), -- HasFastSerialization(false), HasInterlockedAccess1(false), -+ HasPopulationCount(false), HasFastSerialization(false), -+ HasInterlockedAccess1(false), HasMiscellaneousExtensions(false), -+ HasTransactionalExecution(false), HasProcessorAssist(false), -+ HasVector(false), - TargetTriple(TT), -- // Make sure that global data has at least 16 bits of alignment by -- // default, so that we can refer to it using LARL. We don't have any -- // special requirements for stack variables though. -- DL("E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-a:8:16-n32:64"), -+ DL(computeDataLayout(TT, CPU, FS)), - InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM), - TSInfo(DL), FrameLowering() {} - -Index: llvm-36/lib/Target/SystemZ/SystemZSubtarget.h -=================================================================== ---- llvm-36.orig/lib/Target/SystemZ/SystemZSubtarget.h -+++ llvm-36/lib/Target/SystemZ/SystemZSubtarget.h -@@ -38,8 +38,13 @@ protected: - bool HasLoadStoreOnCond; - bool HasHighWord; - bool HasFPExtension; -+ bool HasPopulationCount; - bool HasFastSerialization; - bool HasInterlockedAccess1; -+ bool HasMiscellaneousExtensions; -+ bool HasTransactionalExecution; -+ bool HasProcessorAssist; -+ bool HasVector; - - private: - Triple TargetTriple; -@@ -88,12 +93,29 @@ public: - // Return true if the target has the floating-point extension facility. - bool hasFPExtension() const { return HasFPExtension; } - -+ // Return true if the target has the population-count facility. -+ bool hasPopulationCount() const { return HasPopulationCount; } -+ - // Return true if the target has the fast-serialization facility. - bool hasFastSerialization() const { return HasFastSerialization; } - - // Return true if the target has interlocked-access facility 1. - bool hasInterlockedAccess1() const { return HasInterlockedAccess1; } - -+ // Return true if the target has the miscellaneous-extensions facility. -+ bool hasMiscellaneousExtensions() const { -+ return HasMiscellaneousExtensions; -+ } -+ -+ // Return true if the target has the transactional-execution facility. -+ bool hasTransactionalExecution() const { return HasTransactionalExecution; } -+ -+ // Return true if the target has the processor-assist facility. -+ bool hasProcessorAssist() const { return HasProcessorAssist; } -+ -+ // Return true if the target has the vector facility. -+ bool hasVector() const { return HasVector; } -+ - // Return true if GV can be accessed using LARL for reloc model RM - // and code model CM. - bool isPC32DBLSymbol(const GlobalValue *GV, Reloc::Model RM, -Index: llvm-36/lib/Target/SystemZ/SystemZTargetMachine.cpp -=================================================================== ---- llvm-36.orig/lib/Target/SystemZ/SystemZTargetMachine.cpp -+++ llvm-36/lib/Target/SystemZ/SystemZTargetMachine.cpp -@@ -9,6 +9,7 @@ - - #include "SystemZTargetMachine.h" - #include "llvm/CodeGen/Passes.h" -+#include "llvm/PassManager.h" - #include "llvm/Support/TargetRegistry.h" - #include "llvm/Transforms/Scalar.h" - #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" -@@ -57,6 +58,10 @@ void SystemZPassConfig::addIRPasses() { - - bool SystemZPassConfig::addInstSelector() { - addPass(createSystemZISelDag(getSystemZTargetMachine(), getOptLevel())); -+ -+ if (getOptLevel() != CodeGenOpt::None) -+ addPass(createSystemZLDCleanupPass(getSystemZTargetMachine())); -+ - return false; - } - -@@ -100,3 +105,12 @@ void SystemZPassConfig::addPreEmitPass() - TargetPassConfig *SystemZTargetMachine::createPassConfig(PassManagerBase &PM) { - return new SystemZPassConfig(this, PM); - } -+ -+void SystemZTargetMachine::addAnalysisPasses(PassManagerBase &PM) { -+ // Add first the target-independent BasicTTI pass, then our SystemZ pass. -+ // This allows the SystemZ pass to delegate to the target independent layer -+ // when appropriate. -+ PM.add(createBasicTargetTransformInfoPass(this)); -+ PM.add(createSystemZTargetTransformInfoPass(this)); -+} -+ -Index: llvm-36/lib/Target/SystemZ/SystemZTargetMachine.h -=================================================================== ---- llvm-36.orig/lib/Target/SystemZ/SystemZTargetMachine.h -+++ llvm-36/lib/Target/SystemZ/SystemZTargetMachine.h -@@ -39,6 +39,7 @@ public: - } - // Override LLVMTargetMachine - TargetPassConfig *createPassConfig(PassManagerBase &PM) override; -+ void addAnalysisPasses(PassManagerBase &PM) override; - TargetLoweringObjectFile *getObjFileLowering() const override { - return TLOF.get(); - } -Index: llvm-36/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp -=================================================================== ---- /dev/null -+++ llvm-36/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp -@@ -0,0 +1,334 @@ -+//===-- SystemZTargetTransformInfo.cpp - SystemZ-specific TTI -------------===// -+// -+// The LLVM Compiler Infrastructure -+// -+// This file is distributed under the University of Illinois Open Source -+// License. See LICENSE.TXT for details. -+// -+//===----------------------------------------------------------------------===// -+// -+// This file implements a TargetTransformInfo analysis pass specific to the -+// SystemZ target machine. It uses the target's detailed information to provide -+// more precise answers to certain TTI queries, while letting the target -+// independent and default TTI implementations handle the rest. -+// -+//===----------------------------------------------------------------------===// -+ -+#include "SystemZTargetMachine.h" -+#include "llvm/Analysis/TargetTransformInfo.h" -+#include "llvm/IR/IntrinsicInst.h" -+#include "llvm/Support/Debug.h" -+#include "llvm/Target/CostTable.h" -+#include "llvm/Target/TargetLowering.h" -+using namespace llvm; -+ -+#define DEBUG_TYPE "systemztti" -+ -+// Declare the pass initialization routine locally as target-specific passes -+// don't have a target-wide initialization entry point, and so we rely on the -+// pass constructor initialization. -+namespace llvm { -+void initializeSystemZTTIPass(PassRegistry &); -+} -+ -+namespace { -+ -+class SystemZTTI : public ImmutablePass, public TargetTransformInfo { -+ const SystemZSubtarget *ST; -+ const SystemZTargetLowering *TLI; -+ -+public: -+ SystemZTTI() : ImmutablePass(ID), ST(0), TLI(0) { -+ llvm_unreachable("This pass cannot be directly constructed"); -+ } -+ -+ SystemZTTI(const SystemZTargetMachine *TM) -+ : ImmutablePass(ID), ST(TM->getSubtargetImpl()), -+ TLI(TM->getSubtargetImpl()->getTargetLowering()) { -+ initializeSystemZTTIPass(*PassRegistry::getPassRegistry()); -+ } -+ -+ void initializePass() override { -+ pushTTIStack(this); -+ } -+ -+ void getAnalysisUsage(AnalysisUsage &AU) const override { -+ TargetTransformInfo::getAnalysisUsage(AU); -+ } -+ -+ // Pass identification. -+ static char ID; -+ -+ // Provide necessary pointer adjustments for the two base classes. -+ void *getAdjustedAnalysisPointer(const void *ID) override { -+ if (ID == &TargetTransformInfo::ID) -+ return (TargetTransformInfo*)this; -+ return this; -+ } -+ -+ /// \name Scalar TTI Implementations -+ /// @{ -+ -+ unsigned getIntImmCost(const APInt &Imm, Type *Ty); -+ -+ unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, -+ Type *Ty); -+ unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, -+ Type *Ty); -+ -+ PopcntSupportKind getPopcntSupport(unsigned TyWidth); -+ -+ /// @} -+ -+ /// \name Vector TTI Implementations -+ /// @{ -+ -+ unsigned getNumberOfRegisters(bool Vector); -+ unsigned getRegisterBitWidth(bool Vector); -+ -+ /// @} -+}; -+ -+} // end anonymous namespace -+ -+INITIALIZE_AG_PASS(SystemZTTI, TargetTransformInfo, "systemztti", -+ "SystemZ Target Transform Info", true, true, false) -+char SystemZTTI::ID = 0; -+ -+ImmutablePass * -+llvm::createSystemZTargetTransformInfoPass(const SystemZTargetMachine *TM) { -+ return new SystemZTTI(TM); -+} -+ -+ -+//===----------------------------------------------------------------------===// -+// -+// SystemZ cost model. -+// -+//===----------------------------------------------------------------------===// -+ -+unsigned SystemZTTI::getIntImmCost(const APInt &Imm, Type *Ty) { -+ assert(Ty->isIntegerTy()); -+ -+ unsigned BitSize = Ty->getPrimitiveSizeInBits(); -+ // There is no cost model for constants with a bit size of 0. Return TCC_Free -+ // here, so that constant hoisting will ignore this constant. -+ if (BitSize == 0) -+ return TCC_Free; -+ // No cost model for operations on integers larger than 64 bit implemented yet. -+ if (BitSize > 64) -+ return TCC_Free; -+ -+ if (Imm == 0) -+ return TCC_Free; -+ -+ if (Imm.getBitWidth() <= 64) { -+ // Constants loaded via lgfi. -+ if (isInt<32>(Imm.getSExtValue())) -+ return TCC_Basic; -+ // Constants loaded via llilf. -+ if (isUInt<32>(Imm.getZExtValue())) -+ return TCC_Basic; -+ // Constants loaded via llihf: -+ if ((Imm.getZExtValue() & 0xffffffff) == 0) -+ return TCC_Basic; -+ -+ return 2 * TCC_Basic; -+ } -+ -+ return 4 * TCC_Basic; -+} -+ -+unsigned SystemZTTI::getIntImmCost(unsigned Opcode, unsigned Idx, -+ const APInt &Imm, Type *Ty) { -+ assert(Ty->isIntegerTy()); -+ -+ unsigned BitSize = Ty->getPrimitiveSizeInBits(); -+ // There is no cost model for constants with a bit size of 0. Return TCC_Free -+ // here, so that constant hoisting will ignore this constant. -+ if (BitSize == 0) -+ return TCC_Free; -+ // No cost model for operations on integers larger than 64 bit implemented yet. -+ if (BitSize > 64) -+ return TCC_Free; -+ -+ switch (Opcode) { -+ default: -+ return TCC_Free; -+ case Instruction::GetElementPtr: -+ // Always hoist the base address of a GetElementPtr. This prevents the -+ // creation of new constants for every base constant that gets constant -+ // folded with the offset. -+ if (Idx == 0) -+ return 2 * TCC_Basic; -+ return TCC_Free; -+ case Instruction::Store: -+ if (Idx == 0 && Imm.getBitWidth() <= 64) { -+ // Any 8-bit immediate store can by implemented via mvi. -+ if (BitSize == 8) -+ return TCC_Free; -+ // 16-bit immediate values can be stored via mvhhi/mvhi/mvghi. -+ if (isInt<16>(Imm.getSExtValue())) -+ return TCC_Free; -+ } -+ break; -+ case Instruction::ICmp: -+ if (Idx == 1 && Imm.getBitWidth() <= 64) { -+ // Comparisons against signed 32-bit immediates implemented via cgfi. -+ if (isInt<32>(Imm.getSExtValue())) -+ return TCC_Free; -+ // Comparisons against unsigned 32-bit immediates implemented via clgfi. -+ if (isUInt<32>(Imm.getZExtValue())) -+ return TCC_Free; -+ } -+ break; -+ case Instruction::Add: -+ case Instruction::Sub: -+ if (Idx == 1 && Imm.getBitWidth() <= 64) { -+ // We use algfi/slgfi to add/subtract 32-bit unsigned immediates. -+ if (isUInt<32>(Imm.getZExtValue())) -+ return TCC_Free; -+ // Or their negation, by swapping addition vs. subtraction. -+ if (isUInt<32>(-Imm.getSExtValue())) -+ return TCC_Free; -+ } -+ break; -+ case Instruction::Mul: -+ if (Idx == 1 && Imm.getBitWidth() <= 64) { -+ // We use msgfi to multiply by 32-bit signed immediates. -+ if (isInt<32>(Imm.getSExtValue())) -+ return TCC_Free; -+ } -+ break; -+ case Instruction::Or: -+ case Instruction::Xor: -+ if (Idx == 1 && Imm.getBitWidth() <= 64) { -+ // Masks supported by oilf/xilf. -+ if (isUInt<32>(Imm.getZExtValue())) -+ return TCC_Free; -+ // Masks supported by oihf/xihf. -+ if ((Imm.getZExtValue() & 0xffffffff) == 0) -+ return TCC_Free; -+ } -+ break; -+ case Instruction::And: -+ if (Idx == 1 && Imm.getBitWidth() <= 64) { -+ // Any 32-bit AND operation can by implemented via nilf. -+ if (BitSize <= 32) -+ return TCC_Free; -+ // 64-bit masks supported by nilf. -+ if (isUInt<32>(~Imm.getZExtValue())) -+ return TCC_Free; -+ // 64-bit masks supported by nilh. -+ if ((Imm.getZExtValue() & 0xffffffff) == 0xffffffff) -+ return TCC_Free; -+ // Some 64-bit AND operations can be implemented via risbg. -+ const SystemZInstrInfo *TII = ST->getInstrInfo(); -+ unsigned Start, End; -+ if (TII->isRxSBGMask(Imm.getZExtValue(), BitSize, Start, End)) -+ return TCC_Free; -+ } -+ break; -+ case Instruction::Shl: -+ case Instruction::LShr: -+ case Instruction::AShr: -+ // Always return TCC_Free for the shift value of a shift instruction. -+ if (Idx == 1) -+ return TCC_Free; -+ break; -+ case Instruction::UDiv: -+ case Instruction::SDiv: -+ case Instruction::URem: -+ case Instruction::SRem: -+ case Instruction::Trunc: -+ case Instruction::ZExt: -+ case Instruction::SExt: -+ case Instruction::IntToPtr: -+ case Instruction::PtrToInt: -+ case Instruction::BitCast: -+ case Instruction::PHI: -+ case Instruction::Call: -+ case Instruction::Select: -+ case Instruction::Ret: -+ case Instruction::Load: -+ break; -+ } -+ -+ return SystemZTTI::getIntImmCost(Imm, Ty); -+} -+ -+unsigned SystemZTTI::getIntImmCost(Intrinsic::ID IID, unsigned Idx, -+ const APInt &Imm, Type *Ty) { -+ assert(Ty->isIntegerTy()); -+ -+ unsigned BitSize = Ty->getPrimitiveSizeInBits(); -+ // There is no cost model for constants with a bit size of 0. Return TCC_Free -+ // here, so that constant hoisting will ignore this constant. -+ if (BitSize == 0) -+ return TCC_Free; -+ // No cost model for operations on integers larger than 64 bit implemented yet. -+ if (BitSize > 64) -+ return TCC_Free; -+ -+ switch (IID) { -+ default: -+ return TCC_Free; -+ case Intrinsic::sadd_with_overflow: -+ case Intrinsic::uadd_with_overflow: -+ case Intrinsic::ssub_with_overflow: -+ case Intrinsic::usub_with_overflow: -+ // These get expanded to include a normal addition/subtraction. -+ if (Idx == 1 && Imm.getBitWidth() <= 64) { -+ if (isUInt<32>(Imm.getZExtValue())) -+ return TCC_Free; -+ if (isUInt<32>(-Imm.getSExtValue())) -+ return TCC_Free; -+ } -+ break; -+ case Intrinsic::smul_with_overflow: -+ case Intrinsic::umul_with_overflow: -+ // These get expanded to include a normal multiplication. -+ if (Idx == 1 && Imm.getBitWidth() <= 64) { -+ if (isInt<32>(Imm.getSExtValue())) -+ return TCC_Free; -+ } -+ break; -+ case Intrinsic::experimental_stackmap: -+ if ((Idx < 2) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue()))) -+ return TCC_Free; -+ break; -+ case Intrinsic::experimental_patchpoint_void: -+ case Intrinsic::experimental_patchpoint_i64: -+ if ((Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue()))) -+ return TCC_Free; -+ break; -+ } -+ return SystemZTTI::getIntImmCost(Imm, Ty); -+} -+ -+SystemZTTI::PopcntSupportKind -+SystemZTTI::getPopcntSupport(unsigned TyWidth) { -+ assert(isPowerOf2_32(TyWidth) && "Type width must be power of 2"); -+ if (ST->hasPopulationCount() && TyWidth <= 64) -+ return PSK_FastHardware; -+ return PSK_Software; -+} -+ -+unsigned SystemZTTI::getNumberOfRegisters(bool Vector) { -+ if (!Vector) -+ // Discount the stack pointer. Also leave out %r0, since it can't -+ // be used in an address. -+ return 14; -+ if (ST->hasVector()) -+ return 32; -+ return 0; -+} -+ -+unsigned SystemZTTI::getRegisterBitWidth(bool Vector) { -+ if (!Vector) -+ return 64; -+ if (ST->hasVector()) -+ return 128; -+ return 0; -+} -+ -Index: llvm-36/test/CodeGen/SystemZ/ctpop-01.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/ctpop-01.ll -@@ -0,0 +1,96 @@ -+; Test population-count instruction -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s -+ -+declare i32 @llvm.ctpop.i32(i32 %a) -+declare i64 @llvm.ctpop.i64(i64 %a) -+ -+define i32 @f1(i32 %a) { -+; CHECK-LABEL: f1: -+; CHECK: popcnt %r0, %r2 -+; CHECK: sllk %r1, %r0, 16 -+; CHECK: ar %r1, %r0 -+; CHECK: sllk %r2, %r1, 8 -+; CHECK: ar %r2, %r1 -+; CHECK: srl %r2, 24 -+; CHECK: br %r14 -+ -+ %popcnt = call i32 @llvm.ctpop.i32(i32 %a) -+ ret i32 %popcnt -+} -+ -+define i32 @f2(i32 %a) { -+; CHECK-LABEL: f2: -+; CHECK: llhr %r0, %r2 -+; CHECK: popcnt %r0, %r0 -+; CHECK: risblg %r2, %r0, 16, 151, 8 -+; CHECK: ar %r2, %r0 -+; CHECK: srl %r2, 8 -+; CHECK: br %r14 -+ %and = and i32 %a, 65535 -+ %popcnt = call i32 @llvm.ctpop.i32(i32 %and) -+ ret i32 %popcnt -+} -+ -+define i32 @f3(i32 %a) { -+; CHECK-LABEL: f3: -+; CHECK: llcr %r0, %r2 -+; CHECK: popcnt %r2, %r0 -+; CHECK: br %r14 -+ %and = and i32 %a, 255 -+ %popcnt = call i32 @llvm.ctpop.i32(i32 %and) -+ ret i32 %popcnt -+} -+ -+define i64 @f4(i64 %a) { -+; CHECK-LABEL: f4: -+; CHECK: popcnt %r0, %r2 -+; CHECK: sllg %r1, %r0, 32 -+; CHECK: agr %r1, %r0 -+; CHECK: sllg %r0, %r1, 16 -+; CHECK: agr %r0, %r1 -+; CHECK: sllg %r1, %r0, 8 -+; CHECK: agr %r1, %r0 -+; CHECK: srlg %r2, %r1, 56 -+; CHECK: br %r14 -+ %popcnt = call i64 @llvm.ctpop.i64(i64 %a) -+ ret i64 %popcnt -+} -+ -+define i64 @f5(i64 %a) { -+; CHECK-LABEL: f5: -+; CHECK: llgfr %r0, %r2 -+; CHECK: popcnt %r0, %r0 -+; CHECK: sllg %r1, %r0, 16 -+; CHECK: algfr %r0, %r1 -+; CHECK: sllg %r1, %r0, 8 -+; CHECK: algfr %r0, %r1 -+; CHECK: srlg %r2, %r0, 24 -+ %and = and i64 %a, 4294967295 -+ %popcnt = call i64 @llvm.ctpop.i64(i64 %and) -+ ret i64 %popcnt -+} -+ -+define i64 @f6(i64 %a) { -+; CHECK-LABEL: f6: -+; CHECK: llghr %r0, %r2 -+; CHECK: popcnt %r0, %r0 -+; CHECK: risbg %r1, %r0, 48, 183, 8 -+; CHECK: agr %r1, %r0 -+; CHECK: srlg %r2, %r1, 8 -+; CHECK: br %r14 -+ %and = and i64 %a, 65535 -+ %popcnt = call i64 @llvm.ctpop.i64(i64 %and) -+ ret i64 %popcnt -+} -+ -+define i64 @f7(i64 %a) { -+; CHECK-LABEL: f7: -+; CHECK: llgcr %r0, %r2 -+; CHECK: popcnt %r2, %r0 -+; CHECK: br %r14 -+ %and = and i64 %a, 255 -+ %popcnt = call i64 @llvm.ctpop.i64(i64 %and) -+ ret i64 %popcnt -+} -+ -Index: llvm-36/test/CodeGen/SystemZ/fp-abs-01.ll -=================================================================== ---- llvm-36.orig/test/CodeGen/SystemZ/fp-abs-01.ll -+++ llvm-36/test/CodeGen/SystemZ/fp-abs-01.ll -@@ -1,6 +1,7 @@ - ; Test floating-point absolute. - ; --; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s - - ; Test f32. - declare float @llvm.fabs.f32(float %f) -Index: llvm-36/test/CodeGen/SystemZ/fp-abs-02.ll -=================================================================== ---- llvm-36.orig/test/CodeGen/SystemZ/fp-abs-02.ll -+++ llvm-36/test/CodeGen/SystemZ/fp-abs-02.ll -@@ -1,6 +1,7 @@ - ; Test negated floating-point absolute. - ; --; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s - - ; Test f32. - declare float @llvm.fabs.f32(float %f) -Index: llvm-36/test/CodeGen/SystemZ/fp-add-02.ll -=================================================================== ---- llvm-36.orig/test/CodeGen/SystemZ/fp-add-02.ll -+++ llvm-36/test/CodeGen/SystemZ/fp-add-02.ll -@@ -1,7 +1,8 @@ - ; Test 64-bit floating-point addition. - ; --; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -- -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \ -+; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s - declare double @foo() - - ; Check register addition. -@@ -76,7 +77,7 @@ define double @f6(double %f1, double *%b - define double @f7(double *%ptr0) { - ; CHECK-LABEL: f7: - ; CHECK: brasl %r14, foo@PLT --; CHECK: adb %f0, 160(%r15) -+; CHECK-SCALAR: adb %f0, 160(%r15) - ; CHECK: br %r14 - %ptr1 = getelementptr double *%ptr0, i64 2 - %ptr2 = getelementptr double *%ptr0, i64 4 -Index: llvm-36/test/CodeGen/SystemZ/fp-cmp-02.ll -=================================================================== ---- llvm-36.orig/test/CodeGen/SystemZ/fp-cmp-02.ll -+++ llvm-36/test/CodeGen/SystemZ/fp-cmp-02.ll -@@ -1,7 +1,10 @@ - ; Test 64-bit floating-point comparison. The tests assume a z10 implementation - ; of select, using conditional branches rather than LOCGR. - ; --; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \ -+; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 \ -+; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-VECTOR %s - - declare double @foo() - -@@ -9,8 +12,9 @@ declare double @foo() - define i64 @f1(i64 %a, i64 %b, double %f1, double %f2) { - ; CHECK-LABEL: f1: - ; CHECK: cdbr %f0, %f2 --; CHECK-NEXT: je --; CHECK: lgr %r2, %r3 -+; CHECK-SCALAR-NEXT: je -+; CHECK-SCALAR: lgr %r2, %r3 -+; CHECK-VECTOR-NEXT: locgrne %r2, %r3 - ; CHECK: br %r14 - %cond = fcmp oeq double %f1, %f2 - %res = select i1 %cond, i64 %a, i64 %b -@@ -21,8 +25,9 @@ define i64 @f1(i64 %a, i64 %b, double %f - define i64 @f2(i64 %a, i64 %b, double %f1, double *%ptr) { - ; CHECK-LABEL: f2: - ; CHECK: cdb %f0, 0(%r4) --; CHECK-NEXT: je --; CHECK: lgr %r2, %r3 -+; CHECK-SCALAR-NEXT: je -+; CHECK-SCALAR: lgr %r2, %r3 -+; CHECK-VECTOR-NEXT: locgrne %r2, %r3 - ; CHECK: br %r14 - %f2 = load double *%ptr - %cond = fcmp oeq double %f1, %f2 -@@ -34,8 +39,9 @@ define i64 @f2(i64 %a, i64 %b, double %f - define i64 @f3(i64 %a, i64 %b, double %f1, double *%base) { - ; CHECK-LABEL: f3: - ; CHECK: cdb %f0, 4088(%r4) --; CHECK-NEXT: je --; CHECK: lgr %r2, %r3 -+; CHECK-SCALAR-NEXT: je -+; CHECK-SCALAR: lgr %r2, %r3 -+; CHECK-VECTOR-NEXT: locgrne %r2, %r3 - ; CHECK: br %r14 - %ptr = getelementptr double *%base, i64 511 - %f2 = load double *%ptr -@@ -50,8 +56,9 @@ define i64 @f4(i64 %a, i64 %b, double %f - ; CHECK-LABEL: f4: - ; CHECK: aghi %r4, 4096 - ; CHECK: cdb %f0, 0(%r4) --; CHECK-NEXT: je --; CHECK: lgr %r2, %r3 -+; CHECK-SCALAR-NEXT: je -+; CHECK-SCALAR: lgr %r2, %r3 -+; CHECK-VECTOR-NEXT: locgrne %r2, %r3 - ; CHECK: br %r14 - %ptr = getelementptr double *%base, i64 512 - %f2 = load double *%ptr -@@ -65,8 +72,9 @@ define i64 @f5(i64 %a, i64 %b, double %f - ; CHECK-LABEL: f5: - ; CHECK: aghi %r4, -8 - ; CHECK: cdb %f0, 0(%r4) --; CHECK-NEXT: je --; CHECK: lgr %r2, %r3 -+; CHECK-SCALAR-NEXT: je -+; CHECK-SCALAR: lgr %r2, %r3 -+; CHECK-VECTOR-NEXT: locgrne %r2, %r3 - ; CHECK: br %r14 - %ptr = getelementptr double *%base, i64 -1 - %f2 = load double *%ptr -@@ -80,8 +88,9 @@ define i64 @f6(i64 %a, i64 %b, double %f - ; CHECK-LABEL: f6: - ; CHECK: sllg %r1, %r5, 3 - ; CHECK: cdb %f0, 800(%r1,%r4) --; CHECK-NEXT: je --; CHECK: lgr %r2, %r3 -+; CHECK-SCALAR-NEXT: je -+; CHECK-SCALAR: lgr %r2, %r3 -+; CHECK-VECTOR-NEXT: locgrne %r2, %r3 - ; CHECK: br %r14 - %ptr1 = getelementptr double *%base, i64 %index - %ptr2 = getelementptr double *%ptr1, i64 100 -@@ -95,7 +104,7 @@ define i64 @f6(i64 %a, i64 %b, double %f - define double @f7(double *%ptr0) { - ; CHECK-LABEL: f7: - ; CHECK: brasl %r14, foo@PLT --; CHECK: cdb {{%f[0-9]+}}, 160(%r15) -+; CHECK-SCALAR: cdb {{%f[0-9]+}}, 160(%r15) - ; CHECK: br %r14 - %ptr1 = getelementptr double *%ptr0, i64 2 - %ptr2 = getelementptr double *%ptr0, i64 4 -@@ -152,9 +161,12 @@ define double @f7(double *%ptr0) { - ; Check comparison with zero. - define i64 @f8(i64 %a, i64 %b, double %f) { - ; CHECK-LABEL: f8: --; CHECK: ltdbr %f0, %f0 --; CHECK-NEXT: je --; CHECK: lgr %r2, %r3 -+; CHECK-SCALAR: ltdbr %f0, %f0 -+; CHECK-SCALAR-NEXT: je -+; CHECK-SCALAR: lgr %r2, %r3 -+; CHECK-VECTOR: lzdr %f1 -+; CHECK-VECTOR-NEXT: cdbr %f0, %f1 -+; CHECK-VECTOR-NEXT: locgrne %r2, %r3 - ; CHECK: br %r14 - %cond = fcmp oeq double %f, 0.0 - %res = select i1 %cond, i64 %a, i64 %b -@@ -165,8 +177,9 @@ define i64 @f8(i64 %a, i64 %b, double %f - define i64 @f9(i64 %a, i64 %b, double %f2, double *%ptr) { - ; CHECK-LABEL: f9: - ; CHECK: cdb %f0, 0(%r4) --; CHECK-NEXT: jl {{\.L.*}} --; CHECK: lgr %r2, %r3 -+; CHECK-SCALAR-NEXT: jl -+; CHECK-SCALAR: lgr %r2, %r3 -+; CHECK-VECTOR-NEXT: locgrnl %r2, %r3 - ; CHECK: br %r14 - %f1 = load double *%ptr - %cond = fcmp ogt double %f1, %f2 -Index: llvm-36/test/CodeGen/SystemZ/fp-conv-01.ll -=================================================================== ---- llvm-36.orig/test/CodeGen/SystemZ/fp-conv-01.ll -+++ llvm-36/test/CodeGen/SystemZ/fp-conv-01.ll -@@ -1,11 +1,15 @@ - ; Test floating-point truncations. - ; --; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \ -+; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 \ -+; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-VECTOR %s - - ; Test f64->f32. - define float @f1(double %d1, double %d2) { - ; CHECK-LABEL: f1: --; CHECK: ledbr %f0, %f2 -+; CHECK-SCALAR: ledbr %f0, %f2 -+; CHECK-VECTOR: ledbra %f0, 0, %f2, 0 - ; CHECK: br %r14 - %res = fptrunc double %d2 to float - ret float %res -@@ -50,8 +54,10 @@ define double @f4(fp128 *%ptr) { - define void @f5(double *%dst, fp128 *%ptr, double %d1, double %d2) { - ; CHECK-LABEL: f5: - ; CHECK: ldxbr %f1, %f1 --; CHECK: adbr %f1, %f2 --; CHECK: std %f1, 0(%r2) -+; CHECK-SCALAR: adbr %f1, %f2 -+; CHECK-SCALAR: std %f1, 0(%r2) -+; CHECK-VECTOR: wfadb [[REG:%f[0-9]+]], %f1, %f2 -+; CHECK-VECTOR: std [[REG]], 0(%r2) - ; CHECK: br %r14 - %val = load fp128 *%ptr - %conv = fptrunc fp128 %val to double -Index: llvm-36/test/CodeGen/SystemZ/fp-conv-02.ll -=================================================================== ---- llvm-36.orig/test/CodeGen/SystemZ/fp-conv-02.ll -+++ llvm-36/test/CodeGen/SystemZ/fp-conv-02.ll -@@ -1,6 +1,8 @@ - ; Test extensions of f32 to f64. - ; --; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \ -+; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s - - ; Check register extension. - define double @f1(float %val) { -@@ -74,7 +76,7 @@ define double @f6(float *%base, i64 %ind - ; to use LDEB if possible. - define void @f7(double *%ptr1, float *%ptr2) { - ; CHECK-LABEL: f7: --; CHECK: ldeb {{%f[0-9]+}}, 16{{[04]}}(%r15) -+; CHECK-SCALAR: ldeb {{%f[0-9]+}}, 16{{[04]}}(%r15) - ; CHECK: br %r14 - %val0 = load volatile float *%ptr2 - %val1 = load volatile float *%ptr2 -Index: llvm-36/test/CodeGen/SystemZ/fp-div-02.ll -=================================================================== ---- llvm-36.orig/test/CodeGen/SystemZ/fp-div-02.ll -+++ llvm-36/test/CodeGen/SystemZ/fp-div-02.ll -@@ -1,6 +1,8 @@ - ; Test 64-bit floating-point division. - ; --; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \ -+; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s - - declare double @foo() - -@@ -76,7 +78,7 @@ define double @f6(double %f1, double *%b - define double @f7(double *%ptr0) { - ; CHECK-LABEL: f7: - ; CHECK: brasl %r14, foo@PLT --; CHECK: ddb %f0, 160(%r15) -+; CHECK-SCALAR: ddb %f0, 160(%r15) - ; CHECK: br %r14 - %ptr1 = getelementptr double *%ptr0, i64 2 - %ptr2 = getelementptr double *%ptr0, i64 4 -Index: llvm-36/test/CodeGen/SystemZ/fp-move-01.ll -=================================================================== ---- llvm-36.orig/test/CodeGen/SystemZ/fp-move-01.ll -+++ llvm-36/test/CodeGen/SystemZ/fp-move-01.ll -@@ -1,11 +1,13 @@ - ; Test moves between FPRs. - ; --; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s - - ; Test f32 moves. - define float @f1(float %a, float %b) { - ; CHECK-LABEL: f1: - ; CHECK: ler %f0, %f2 -+; CHECK: br %r14 - ret float %b - } - -@@ -13,6 +15,7 @@ define float @f1(float %a, float %b) { - define double @f2(double %a, double %b) { - ; CHECK-LABEL: f2: - ; CHECK: ldr %f0, %f2 -+; CHECK: br %r14 - ret double %b - } - -@@ -22,6 +25,7 @@ define void @f3(fp128 *%x) { - ; CHECK-LABEL: f3: - ; CHECK: lxr - ; CHECK: axbr -+; CHECK: br %r14 - %val = load volatile fp128 *%x - %sum = fadd fp128 %val, %val - store volatile fp128 %sum, fp128 *%x -Index: llvm-36/test/CodeGen/SystemZ/fp-move-04.ll -=================================================================== ---- llvm-36.orig/test/CodeGen/SystemZ/fp-move-04.ll -+++ llvm-36/test/CodeGen/SystemZ/fp-move-04.ll -@@ -1,6 +1,7 @@ - ; Test 64-bit floating-point loads. - ; --; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s - - ; Test the low end of the LD range. - define double @f1(double *%src) { -Index: llvm-36/test/CodeGen/SystemZ/fp-move-07.ll -=================================================================== ---- llvm-36.orig/test/CodeGen/SystemZ/fp-move-07.ll -+++ llvm-36/test/CodeGen/SystemZ/fp-move-07.ll -@@ -1,6 +1,7 @@ - ; Test 64-bit floating-point stores. - ; --; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s - - ; Test the low end of the STD range. - define void @f1(double *%src, double %val) { -Index: llvm-36/test/CodeGen/SystemZ/fp-move-09.ll -=================================================================== ---- llvm-36.orig/test/CodeGen/SystemZ/fp-move-09.ll -+++ llvm-36/test/CodeGen/SystemZ/fp-move-09.ll -@@ -1,4 +1,4 @@ --; Test moves between FPRs and GPRs for z196 and above. -+; Test moves between FPRs and GPRs for z196 and zEC12. - ; - ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s - -Index: llvm-36/test/CodeGen/SystemZ/fp-move-10.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/fp-move-10.ll -@@ -0,0 +1,61 @@ -+; Test moves between FPRs and GPRs for z13 and above. -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -+ -+; Check that moves from i32s to floats use a low GR32 and vector operation. -+define float @f1(i16 *%ptr) { -+; CHECK-LABEL: f1: -+; CHECK: llh [[REG:%r[0-5]]], 0(%r2) -+; CHECK: oilh [[REG]], 16256 -+; CHECK: vlvgf %v0, [[REG]], 0 -+; CHECK: br %r14 -+ %base = load i16 *%ptr -+ %ext = zext i16 %base to i32 -+ %full = or i32 %ext, 1065353216 -+ %res = bitcast i32 %full to float -+ ret float %res -+} -+ -+; Check that moves from floats to i32s use a low GR32 and vector operation. -+define void @f2(float %val, i8 *%ptr) { -+; CHECK-LABEL: f2: -+; CHECK: vlgvf [[REG:%r[0-5]]], %v0, 0 -+; CHECK: stc [[REG]], 0(%r2) -+; CHECK: br %r14 -+ %res = bitcast float %val to i32 -+ %trunc = trunc i32 %res to i8 -+ store i8 %trunc, i8 *%ptr -+ ret void -+} -+ -+; Like f2, but with a conditional store. -+define void @f3(float %val, i8 *%ptr, i32 %which) { -+; CHECK-LABEL: f3: -+; CHECK-DAG: cijlh %r3, 0, -+; CHECK-DAG: vlgvf [[REG:%r[0-5]]], %v0, 0 -+; CHECK: stc [[REG]], 0(%r2) -+; CHECK: br %r14 -+ %int = bitcast float %val to i32 -+ %trunc = trunc i32 %int to i8 -+ %old = load i8 *%ptr -+ %cmp = icmp eq i32 %which, 0 -+ %res = select i1 %cmp, i8 %trunc, i8 %old -+ store i8 %res, i8 *%ptr -+ ret void -+} -+ -+; ...and again with 16-bit memory. -+define void @f4(float %val, i16 *%ptr, i32 %which) { -+; CHECK-LABEL: f4: -+; CHECK-DAG: cijlh %r3, 0, -+; CHECK-DAG: vlgvf [[REG:%r[0-5]]], %v0, 0 -+; CHECK: sth [[REG]], 0(%r2) -+; CHECK: br %r14 -+ %int = bitcast float %val to i32 -+ %trunc = trunc i32 %int to i16 -+ %old = load i16 *%ptr -+ %cmp = icmp eq i32 %which, 0 -+ %res = select i1 %cmp, i16 %trunc, i16 %old -+ store i16 %res, i16 *%ptr -+ ret void -+} -Index: llvm-36/test/CodeGen/SystemZ/fp-move-11.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/fp-move-11.ll -@@ -0,0 +1,110 @@ -+; Test 32-bit floating-point loads for z13. -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -+ -+; Test that we use LDE instead of LE - low end of the LE range. -+define float @f1(float *%src) { -+; CHECK-LABEL: f1: -+; CHECK: lde %f0, 0(%r2) -+; CHECK: br %r14 -+ %val = load float *%src -+ ret float %val -+} -+ -+; Test that we use LDE instead of LE - high end of the LE range. -+define float @f2(float *%src) { -+; CHECK-LABEL: f2: -+; CHECK: lde %f0, 4092(%r2) -+; CHECK: br %r14 -+ %ptr = getelementptr float *%src, i64 1023 -+ %val = load float *%ptr -+ ret float %val -+} -+ -+; Check the next word up, which should use LEY instead of LDE. -+define float @f3(float *%src) { -+; CHECK-LABEL: f3: -+; CHECK: ley %f0, 4096(%r2) -+; CHECK: br %r14 -+ %ptr = getelementptr float *%src, i64 1024 -+ %val = load float *%ptr -+ ret float %val -+} -+ -+; Check the high end of the aligned LEY range. -+define float @f4(float *%src) { -+; CHECK-LABEL: f4: -+; CHECK: ley %f0, 524284(%r2) -+; CHECK: br %r14 -+ %ptr = getelementptr float *%src, i64 131071 -+ %val = load float *%ptr -+ ret float %val -+} -+ -+; Check the next word up, which needs separate address logic. -+; Other sequences besides this one would be OK. -+define float @f5(float *%src) { -+; CHECK-LABEL: f5: -+; CHECK: agfi %r2, 524288 -+; CHECK: lde %f0, 0(%r2) -+; CHECK: br %r14 -+ %ptr = getelementptr float *%src, i64 131072 -+ %val = load float *%ptr -+ ret float %val -+} -+ -+; Check the high end of the negative aligned LEY range. -+define float @f6(float *%src) { -+; CHECK-LABEL: f6: -+; CHECK: ley %f0, -4(%r2) -+; CHECK: br %r14 -+ %ptr = getelementptr float *%src, i64 -1 -+ %val = load float *%ptr -+ ret float %val -+} -+ -+; Check the low end of the LEY range. -+define float @f7(float *%src) { -+; CHECK-LABEL: f7: -+; CHECK: ley %f0, -524288(%r2) -+; CHECK: br %r14 -+ %ptr = getelementptr float *%src, i64 -131072 -+ %val = load float *%ptr -+ ret float %val -+} -+ -+; Check the next word down, which needs separate address logic. -+; Other sequences besides this one would be OK. -+define float @f8(float *%src) { -+; CHECK-LABEL: f8: -+; CHECK: agfi %r2, -524292 -+; CHECK: lde %f0, 0(%r2) -+; CHECK: br %r14 -+ %ptr = getelementptr float *%src, i64 -131073 -+ %val = load float *%ptr -+ ret float %val -+} -+ -+; Check that LDE allows an index. -+define float @f9(i64 %src, i64 %index) { -+; CHECK-LABEL: f9: -+; CHECK: lde %f0, 4092({{%r3,%r2|%r2,%r3}}) -+; CHECK: br %r14 -+ %add1 = add i64 %src, %index -+ %add2 = add i64 %add1, 4092 -+ %ptr = inttoptr i64 %add2 to float * -+ %val = load float *%ptr -+ ret float %val -+} -+ -+; Check that LEY allows an index. -+define float @f10(i64 %src, i64 %index) { -+; CHECK-LABEL: f10: -+; CHECK: ley %f0, 4096({{%r3,%r2|%r2,%r3}}) -+; CHECK: br %r14 -+ %add1 = add i64 %src, %index -+ %add2 = add i64 %add1, 4096 -+ %ptr = inttoptr i64 %add2 to float * -+ %val = load float *%ptr -+ ret float %val -+} -Index: llvm-36/test/CodeGen/SystemZ/fp-mul-03.ll -=================================================================== ---- llvm-36.orig/test/CodeGen/SystemZ/fp-mul-03.ll -+++ llvm-36/test/CodeGen/SystemZ/fp-mul-03.ll -@@ -1,6 +1,8 @@ - ; Test multiplication of two f64s, producing an f64 result. - ; --; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \ -+; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s - - declare double @foo() - -@@ -76,7 +78,7 @@ define double @f6(double %f1, double *%b - define double @f7(double *%ptr0) { - ; CHECK-LABEL: f7: - ; CHECK: brasl %r14, foo@PLT --; CHECK: mdb %f0, 160(%r15) -+; CHECK-SCALAR: mdb %f0, 160(%r15) - ; CHECK: br %r14 - %ptr1 = getelementptr double *%ptr0, i64 2 - %ptr2 = getelementptr double *%ptr0, i64 4 -Index: llvm-36/test/CodeGen/SystemZ/fp-mul-07.ll -=================================================================== ---- llvm-36.orig/test/CodeGen/SystemZ/fp-mul-07.ll -+++ llvm-36/test/CodeGen/SystemZ/fp-mul-07.ll -@@ -1,11 +1,15 @@ --; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \ -+; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 \ -+; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-VECTOR %s - - declare double @llvm.fma.f64(double %f1, double %f2, double %f3) - - define double @f1(double %f1, double %f2, double %acc) { - ; CHECK-LABEL: f1: --; CHECK: madbr %f4, %f0, %f2 --; CHECK: ldr %f0, %f4 -+; CHECK-SCALAR: madbr %f4, %f0, %f2 -+; CHECK-SCALAR: ldr %f0, %f4 -+; CHECK-VECTOR: wfmadb %f0, %f0, %f2, %f4 - ; CHECK: br %r14 - %res = call double @llvm.fma.f64 (double %f1, double %f2, double %acc) - ret double %res -Index: llvm-36/test/CodeGen/SystemZ/fp-mul-09.ll -=================================================================== ---- llvm-36.orig/test/CodeGen/SystemZ/fp-mul-09.ll -+++ llvm-36/test/CodeGen/SystemZ/fp-mul-09.ll -@@ -1,11 +1,15 @@ --; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \ -+; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 \ -+; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-VECTOR %s - - declare double @llvm.fma.f64(double %f1, double %f2, double %f3) - - define double @f1(double %f1, double %f2, double %acc) { - ; CHECK-LABEL: f1: --; CHECK: msdbr %f4, %f0, %f2 --; CHECK: ldr %f0, %f4 -+; CHECK-SCALAR: msdbr %f4, %f0, %f2 -+; CHECK-SCALAR: ldr %f0, %f4 -+; CHECK-VECTOR: wfmsdb %f0, %f0, %f2, %f4 - ; CHECK: br %r14 - %negacc = fsub double -0.0, %acc - %res = call double @llvm.fma.f64 (double %f1, double %f2, double %negacc) -Index: llvm-36/test/CodeGen/SystemZ/fp-neg-01.ll -=================================================================== ---- llvm-36.orig/test/CodeGen/SystemZ/fp-neg-01.ll -+++ llvm-36/test/CodeGen/SystemZ/fp-neg-01.ll -@@ -1,6 +1,7 @@ - ; Test floating-point negation. - ; --; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s - - ; Test f32. - define float @f1(float %f) { -Index: llvm-36/test/CodeGen/SystemZ/fp-round-02.ll -=================================================================== ---- llvm-36.orig/test/CodeGen/SystemZ/fp-round-02.ll -+++ llvm-36/test/CodeGen/SystemZ/fp-round-02.ll -@@ -1,6 +1,9 @@ - ; Test rounding functions for z196 and above. - ; --; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 \ -+; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 \ -+; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-VECTOR %s - - ; Test rint for f32. - declare float @llvm.rint.f32(float %f) -@@ -16,7 +19,8 @@ define float @f1(float %f) { - declare double @llvm.rint.f64(double %f) - define double @f2(double %f) { - ; CHECK-LABEL: f2: --; CHECK: fidbr %f0, 0, %f0 -+; CHECK-SCALAR: fidbr %f0, 0, %f0 -+; CHECK-VECTOR: fidbra %f0, 0, %f0, 0 - ; CHECK: br %r14 - %res = call double @llvm.rint.f64(double %f) - ret double %res -Index: llvm-36/test/CodeGen/SystemZ/fp-sqrt-02.ll -=================================================================== ---- llvm-36.orig/test/CodeGen/SystemZ/fp-sqrt-02.ll -+++ llvm-36/test/CodeGen/SystemZ/fp-sqrt-02.ll -@@ -1,6 +1,8 @@ - ; Test 64-bit square root. - ; --; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \ -+; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s - - declare double @llvm.sqrt.f64(double %f) - declare double @sqrt(double) -@@ -77,7 +79,7 @@ define double @f6(double *%base, i64 %in - ; to use SQDB if possible. - define void @f7(double *%ptr) { - ; CHECK-LABEL: f7: --; CHECK: sqdb {{%f[0-9]+}}, 160(%r15) -+; CHECK-SCALAR: sqdb {{%f[0-9]+}}, 160(%r15) - ; CHECK: br %r14 - %val0 = load volatile double *%ptr - %val1 = load volatile double *%ptr -Index: llvm-36/test/CodeGen/SystemZ/fp-sub-02.ll -=================================================================== ---- llvm-36.orig/test/CodeGen/SystemZ/fp-sub-02.ll -+++ llvm-36/test/CodeGen/SystemZ/fp-sub-02.ll -@@ -1,6 +1,8 @@ - ; Test 64-bit floating-point subtraction. - ; --; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \ -+; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s - - declare double @foo() - -@@ -76,7 +78,7 @@ define double @f6(double %f1, double *%b - define double @f7(double *%ptr0) { - ; CHECK-LABEL: f7: - ; CHECK: brasl %r14, foo@PLT --; CHECK: sdb %f0, 16{{[04]}}(%r15) -+; CHECK-SCALAR: sdb %f0, 16{{[04]}}(%r15) - ; CHECK: br %r14 - %ptr1 = getelementptr double *%ptr0, i64 2 - %ptr2 = getelementptr double *%ptr0, i64 4 -Index: llvm-36/test/CodeGen/SystemZ/frame-03.ll -=================================================================== ---- llvm-36.orig/test/CodeGen/SystemZ/frame-03.ll -+++ llvm-36/test/CodeGen/SystemZ/frame-03.ll -@@ -2,7 +2,7 @@ - ; uses a different register class, but the set of saved and restored - ; registers should be the same. - ; --; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s - - ; This function should require all FPRs, but no other spill slots. - ; We need to save and restore 8 of the 16 FPRs, so the frame size -Index: llvm-36/test/CodeGen/SystemZ/frame-07.ll -=================================================================== ---- llvm-36.orig/test/CodeGen/SystemZ/frame-07.ll -+++ llvm-36/test/CodeGen/SystemZ/frame-07.ll -@@ -1,7 +1,7 @@ - ; Test the saving and restoring of FPRs in large frames. - ; --; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck -check-prefix=CHECK-NOFP %s --; RUN: llc < %s -mtriple=s390x-linux-gnu -disable-fp-elim | FileCheck -check-prefix=CHECK-FP %s -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck -check-prefix=CHECK-NOFP %s -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 -disable-fp-elim | FileCheck -check-prefix=CHECK-FP %s - - ; Test a frame size that requires some FPRs to be saved and loaded using - ; the 20-bit STDY and LDY while others can use the 12-bit STD and LD. -Index: llvm-36/test/CodeGen/SystemZ/frame-17.ll -=================================================================== ---- llvm-36.orig/test/CodeGen/SystemZ/frame-17.ll -+++ llvm-36/test/CodeGen/SystemZ/frame-17.ll -@@ -1,6 +1,6 @@ - ; Test spilling of FPRs. - ; --; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s - - ; We need to save and restore 8 of the 16 FPRs and allocate an additional - ; 4-byte spill slot, rounded to 8 bytes. The frame size should be exactly -Index: llvm-36/test/CodeGen/SystemZ/frame-19.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/frame-19.ll -@@ -0,0 +1,314 @@ -+; Test spilling of vector registers. -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -+ -+; We need to allocate a 16-byte spill slot and save the 8 call-saved FPRs. -+; The frame size should be exactly 160 + 16 + 8 * 8 = 240. -+define void @f1(<16 x i8> *%ptr) { -+; CHECK-LABEL: f1: -+; CHECK: aghi %r15, -240 -+; CHECK-DAG: std %f8, -+; CHECK-DAG: std %f9, -+; CHECK-DAG: std %f10, -+; CHECK-DAG: std %f11, -+; CHECK-DAG: std %f12, -+; CHECK-DAG: std %f13, -+; CHECK-DAG: std %f14, -+; CHECK-DAG: std %f15, -+; CHECK: vst {{%v[0-9]+}}, 160(%r15) -+; CHECK: vl {{%v[0-9]+}}, 160(%r15) -+; CHECK-DAG: ld %f8, -+; CHECK-DAG: ld %f9, -+; CHECK-DAG: ld %f10, -+; CHECK-DAG: ld %f11, -+; CHECK-DAG: ld %f12, -+; CHECK-DAG: ld %f13, -+; CHECK-DAG: ld %f14, -+; CHECK-DAG: ld %f15, -+; CHECK: aghi %r15, 240 -+; CHECK: br %r14 -+ %v0 = load volatile <16 x i8> *%ptr -+ %v1 = load volatile <16 x i8> *%ptr -+ %v2 = load volatile <16 x i8> *%ptr -+ %v3 = load volatile <16 x i8> *%ptr -+ %v4 = load volatile <16 x i8> *%ptr -+ %v5 = load volatile <16 x i8> *%ptr -+ %v6 = load volatile <16 x i8> *%ptr -+ %v7 = load volatile <16 x i8> *%ptr -+ %v8 = load volatile <16 x i8> *%ptr -+ %v9 = load volatile <16 x i8> *%ptr -+ %v10 = load volatile <16 x i8> *%ptr -+ %v11 = load volatile <16 x i8> *%ptr -+ %v12 = load volatile <16 x i8> *%ptr -+ %v13 = load volatile <16 x i8> *%ptr -+ %v14 = load volatile <16 x i8> *%ptr -+ %v15 = load volatile <16 x i8> *%ptr -+ %v16 = load volatile <16 x i8> *%ptr -+ %v17 = load volatile <16 x i8> *%ptr -+ %v18 = load volatile <16 x i8> *%ptr -+ %v19 = load volatile <16 x i8> *%ptr -+ %v20 = load volatile <16 x i8> *%ptr -+ %v21 = load volatile <16 x i8> *%ptr -+ %v22 = load volatile <16 x i8> *%ptr -+ %v23 = load volatile <16 x i8> *%ptr -+ %v24 = load volatile <16 x i8> *%ptr -+ %v25 = load volatile <16 x i8> *%ptr -+ %v26 = load volatile <16 x i8> *%ptr -+ %v27 = load volatile <16 x i8> *%ptr -+ %v28 = load volatile <16 x i8> *%ptr -+ %v29 = load volatile <16 x i8> *%ptr -+ %v30 = load volatile <16 x i8> *%ptr -+ %v31 = load volatile <16 x i8> *%ptr -+ %vx = load volatile <16 x i8> *%ptr -+ store volatile <16 x i8> %vx, <16 x i8> *%ptr -+ store volatile <16 x i8> %v31, <16 x i8> *%ptr -+ store volatile <16 x i8> %v30, <16 x i8> *%ptr -+ store volatile <16 x i8> %v29, <16 x i8> *%ptr -+ store volatile <16 x i8> %v28, <16 x i8> *%ptr -+ store volatile <16 x i8> %v27, <16 x i8> *%ptr -+ store volatile <16 x i8> %v26, <16 x i8> *%ptr -+ store volatile <16 x i8> %v25, <16 x i8> *%ptr -+ store volatile <16 x i8> %v24, <16 x i8> *%ptr -+ store volatile <16 x i8> %v23, <16 x i8> *%ptr -+ store volatile <16 x i8> %v22, <16 x i8> *%ptr -+ store volatile <16 x i8> %v21, <16 x i8> *%ptr -+ store volatile <16 x i8> %v20, <16 x i8> *%ptr -+ store volatile <16 x i8> %v19, <16 x i8> *%ptr -+ store volatile <16 x i8> %v18, <16 x i8> *%ptr -+ store volatile <16 x i8> %v17, <16 x i8> *%ptr -+ store volatile <16 x i8> %v16, <16 x i8> *%ptr -+ store volatile <16 x i8> %v15, <16 x i8> *%ptr -+ store volatile <16 x i8> %v14, <16 x i8> *%ptr -+ store volatile <16 x i8> %v13, <16 x i8> *%ptr -+ store volatile <16 x i8> %v12, <16 x i8> *%ptr -+ store volatile <16 x i8> %v11, <16 x i8> *%ptr -+ store volatile <16 x i8> %v10, <16 x i8> *%ptr -+ store volatile <16 x i8> %v9, <16 x i8> *%ptr -+ store volatile <16 x i8> %v8, <16 x i8> *%ptr -+ store volatile <16 x i8> %v7, <16 x i8> *%ptr -+ store volatile <16 x i8> %v6, <16 x i8> *%ptr -+ store volatile <16 x i8> %v5, <16 x i8> *%ptr -+ store volatile <16 x i8> %v4, <16 x i8> *%ptr -+ store volatile <16 x i8> %v3, <16 x i8> *%ptr -+ store volatile <16 x i8> %v2, <16 x i8> *%ptr -+ store volatile <16 x i8> %v1, <16 x i8> *%ptr -+ store volatile <16 x i8> %v0, <16 x i8> *%ptr -+ ret void -+} -+ -+; Like f1, but no 16-byte slot should be needed. -+define void @f2(<16 x i8> *%ptr) { -+; CHECK-LABEL: f2: -+; CHECK: aghi %r15, -224 -+; CHECK-DAG: std %f8, -+; CHECK-DAG: std %f9, -+; CHECK-DAG: std %f10, -+; CHECK-DAG: std %f11, -+; CHECK-DAG: std %f12, -+; CHECK-DAG: std %f13, -+; CHECK-DAG: std %f14, -+; CHECK-DAG: std %f15, -+; CHECK-NOT: vst {{.*}}(%r15) -+; CHECK-NOT: vl {{.*}}(%r15) -+; CHECK-DAG: ld %f8, -+; CHECK-DAG: ld %f9, -+; CHECK-DAG: ld %f10, -+; CHECK-DAG: ld %f11, -+; CHECK-DAG: ld %f12, -+; CHECK-DAG: ld %f13, -+; CHECK-DAG: ld %f14, -+; CHECK-DAG: ld %f15, -+; CHECK: aghi %r15, 224 -+; CHECK: br %r14 -+ %v0 = load volatile <16 x i8> *%ptr -+ %v1 = load volatile <16 x i8> *%ptr -+ %v2 = load volatile <16 x i8> *%ptr -+ %v3 = load volatile <16 x i8> *%ptr -+ %v4 = load volatile <16 x i8> *%ptr -+ %v5 = load volatile <16 x i8> *%ptr -+ %v6 = load volatile <16 x i8> *%ptr -+ %v7 = load volatile <16 x i8> *%ptr -+ %v8 = load volatile <16 x i8> *%ptr -+ %v9 = load volatile <16 x i8> *%ptr -+ %v10 = load volatile <16 x i8> *%ptr -+ %v11 = load volatile <16 x i8> *%ptr -+ %v12 = load volatile <16 x i8> *%ptr -+ %v13 = load volatile <16 x i8> *%ptr -+ %v14 = load volatile <16 x i8> *%ptr -+ %v15 = load volatile <16 x i8> *%ptr -+ %v16 = load volatile <16 x i8> *%ptr -+ %v17 = load volatile <16 x i8> *%ptr -+ %v18 = load volatile <16 x i8> *%ptr -+ %v19 = load volatile <16 x i8> *%ptr -+ %v20 = load volatile <16 x i8> *%ptr -+ %v21 = load volatile <16 x i8> *%ptr -+ %v22 = load volatile <16 x i8> *%ptr -+ %v23 = load volatile <16 x i8> *%ptr -+ %v24 = load volatile <16 x i8> *%ptr -+ %v25 = load volatile <16 x i8> *%ptr -+ %v26 = load volatile <16 x i8> *%ptr -+ %v27 = load volatile <16 x i8> *%ptr -+ %v28 = load volatile <16 x i8> *%ptr -+ %v29 = load volatile <16 x i8> *%ptr -+ %v30 = load volatile <16 x i8> *%ptr -+ %v31 = load volatile <16 x i8> *%ptr -+ store volatile <16 x i8> %v31, <16 x i8> *%ptr -+ store volatile <16 x i8> %v30, <16 x i8> *%ptr -+ store volatile <16 x i8> %v29, <16 x i8> *%ptr -+ store volatile <16 x i8> %v28, <16 x i8> *%ptr -+ store volatile <16 x i8> %v27, <16 x i8> *%ptr -+ store volatile <16 x i8> %v26, <16 x i8> *%ptr -+ store volatile <16 x i8> %v25, <16 x i8> *%ptr -+ store volatile <16 x i8> %v24, <16 x i8> *%ptr -+ store volatile <16 x i8> %v23, <16 x i8> *%ptr -+ store volatile <16 x i8> %v22, <16 x i8> *%ptr -+ store volatile <16 x i8> %v21, <16 x i8> *%ptr -+ store volatile <16 x i8> %v20, <16 x i8> *%ptr -+ store volatile <16 x i8> %v19, <16 x i8> *%ptr -+ store volatile <16 x i8> %v18, <16 x i8> *%ptr -+ store volatile <16 x i8> %v17, <16 x i8> *%ptr -+ store volatile <16 x i8> %v16, <16 x i8> *%ptr -+ store volatile <16 x i8> %v15, <16 x i8> *%ptr -+ store volatile <16 x i8> %v14, <16 x i8> *%ptr -+ store volatile <16 x i8> %v13, <16 x i8> *%ptr -+ store volatile <16 x i8> %v12, <16 x i8> *%ptr -+ store volatile <16 x i8> %v11, <16 x i8> *%ptr -+ store volatile <16 x i8> %v10, <16 x i8> *%ptr -+ store volatile <16 x i8> %v9, <16 x i8> *%ptr -+ store volatile <16 x i8> %v8, <16 x i8> *%ptr -+ store volatile <16 x i8> %v7, <16 x i8> *%ptr -+ store volatile <16 x i8> %v6, <16 x i8> *%ptr -+ store volatile <16 x i8> %v5, <16 x i8> *%ptr -+ store volatile <16 x i8> %v4, <16 x i8> *%ptr -+ store volatile <16 x i8> %v3, <16 x i8> *%ptr -+ store volatile <16 x i8> %v2, <16 x i8> *%ptr -+ store volatile <16 x i8> %v1, <16 x i8> *%ptr -+ store volatile <16 x i8> %v0, <16 x i8> *%ptr -+ ret void -+} -+ -+; Like f2, but only %f8 should be saved. -+define void @f3(<16 x i8> *%ptr) { -+; CHECK-LABEL: f3: -+; CHECK: aghi %r15, -168 -+; CHECK-DAG: std %f8, -+; CHECK-NOT: vst {{.*}}(%r15) -+; CHECK-NOT: vl {{.*}}(%r15) -+; CHECK-NOT: %v9 -+; CHECK-NOT: %v10 -+; CHECK-NOT: %v11 -+; CHECK-NOT: %v12 -+; CHECK-NOT: %v13 -+; CHECK-NOT: %v14 -+; CHECK-NOT: %v15 -+; CHECK-DAG: ld %f8, -+; CHECK: aghi %r15, 168 -+; CHECK: br %r14 -+ %v0 = load volatile <16 x i8> *%ptr -+ %v1 = load volatile <16 x i8> *%ptr -+ %v2 = load volatile <16 x i8> *%ptr -+ %v3 = load volatile <16 x i8> *%ptr -+ %v4 = load volatile <16 x i8> *%ptr -+ %v5 = load volatile <16 x i8> *%ptr -+ %v6 = load volatile <16 x i8> *%ptr -+ %v7 = load volatile <16 x i8> *%ptr -+ %v8 = load volatile <16 x i8> *%ptr -+ %v16 = load volatile <16 x i8> *%ptr -+ %v17 = load volatile <16 x i8> *%ptr -+ %v18 = load volatile <16 x i8> *%ptr -+ %v19 = load volatile <16 x i8> *%ptr -+ %v20 = load volatile <16 x i8> *%ptr -+ %v21 = load volatile <16 x i8> *%ptr -+ %v22 = load volatile <16 x i8> *%ptr -+ %v23 = load volatile <16 x i8> *%ptr -+ %v24 = load volatile <16 x i8> *%ptr -+ %v25 = load volatile <16 x i8> *%ptr -+ %v26 = load volatile <16 x i8> *%ptr -+ %v27 = load volatile <16 x i8> *%ptr -+ %v28 = load volatile <16 x i8> *%ptr -+ %v29 = load volatile <16 x i8> *%ptr -+ %v30 = load volatile <16 x i8> *%ptr -+ %v31 = load volatile <16 x i8> *%ptr -+ store volatile <16 x i8> %v31, <16 x i8> *%ptr -+ store volatile <16 x i8> %v30, <16 x i8> *%ptr -+ store volatile <16 x i8> %v29, <16 x i8> *%ptr -+ store volatile <16 x i8> %v28, <16 x i8> *%ptr -+ store volatile <16 x i8> %v27, <16 x i8> *%ptr -+ store volatile <16 x i8> %v26, <16 x i8> *%ptr -+ store volatile <16 x i8> %v25, <16 x i8> *%ptr -+ store volatile <16 x i8> %v24, <16 x i8> *%ptr -+ store volatile <16 x i8> %v23, <16 x i8> *%ptr -+ store volatile <16 x i8> %v22, <16 x i8> *%ptr -+ store volatile <16 x i8> %v21, <16 x i8> *%ptr -+ store volatile <16 x i8> %v20, <16 x i8> *%ptr -+ store volatile <16 x i8> %v19, <16 x i8> *%ptr -+ store volatile <16 x i8> %v18, <16 x i8> *%ptr -+ store volatile <16 x i8> %v17, <16 x i8> *%ptr -+ store volatile <16 x i8> %v16, <16 x i8> *%ptr -+ store volatile <16 x i8> %v8, <16 x i8> *%ptr -+ store volatile <16 x i8> %v7, <16 x i8> *%ptr -+ store volatile <16 x i8> %v6, <16 x i8> *%ptr -+ store volatile <16 x i8> %v5, <16 x i8> *%ptr -+ store volatile <16 x i8> %v4, <16 x i8> *%ptr -+ store volatile <16 x i8> %v3, <16 x i8> *%ptr -+ store volatile <16 x i8> %v2, <16 x i8> *%ptr -+ store volatile <16 x i8> %v1, <16 x i8> *%ptr -+ store volatile <16 x i8> %v0, <16 x i8> *%ptr -+ ret void -+} -+ -+; Like f2, but no registers should be saved. -+define void @f4(<16 x i8> *%ptr) { -+; CHECK-LABEL: f4: -+; CHECK-NOT: %r15 -+; CHECK: br %r14 -+ %v0 = load volatile <16 x i8> *%ptr -+ %v1 = load volatile <16 x i8> *%ptr -+ %v2 = load volatile <16 x i8> *%ptr -+ %v3 = load volatile <16 x i8> *%ptr -+ %v4 = load volatile <16 x i8> *%ptr -+ %v5 = load volatile <16 x i8> *%ptr -+ %v6 = load volatile <16 x i8> *%ptr -+ %v7 = load volatile <16 x i8> *%ptr -+ %v16 = load volatile <16 x i8> *%ptr -+ %v17 = load volatile <16 x i8> *%ptr -+ %v18 = load volatile <16 x i8> *%ptr -+ %v19 = load volatile <16 x i8> *%ptr -+ %v20 = load volatile <16 x i8> *%ptr -+ %v21 = load volatile <16 x i8> *%ptr -+ %v22 = load volatile <16 x i8> *%ptr -+ %v23 = load volatile <16 x i8> *%ptr -+ %v24 = load volatile <16 x i8> *%ptr -+ %v25 = load volatile <16 x i8> *%ptr -+ %v26 = load volatile <16 x i8> *%ptr -+ %v27 = load volatile <16 x i8> *%ptr -+ %v28 = load volatile <16 x i8> *%ptr -+ %v29 = load volatile <16 x i8> *%ptr -+ %v30 = load volatile <16 x i8> *%ptr -+ %v31 = load volatile <16 x i8> *%ptr -+ store volatile <16 x i8> %v31, <16 x i8> *%ptr -+ store volatile <16 x i8> %v30, <16 x i8> *%ptr -+ store volatile <16 x i8> %v29, <16 x i8> *%ptr -+ store volatile <16 x i8> %v28, <16 x i8> *%ptr -+ store volatile <16 x i8> %v27, <16 x i8> *%ptr -+ store volatile <16 x i8> %v26, <16 x i8> *%ptr -+ store volatile <16 x i8> %v25, <16 x i8> *%ptr -+ store volatile <16 x i8> %v24, <16 x i8> *%ptr -+ store volatile <16 x i8> %v23, <16 x i8> *%ptr -+ store volatile <16 x i8> %v22, <16 x i8> *%ptr -+ store volatile <16 x i8> %v21, <16 x i8> *%ptr -+ store volatile <16 x i8> %v20, <16 x i8> *%ptr -+ store volatile <16 x i8> %v19, <16 x i8> *%ptr -+ store volatile <16 x i8> %v18, <16 x i8> *%ptr -+ store volatile <16 x i8> %v17, <16 x i8> *%ptr -+ store volatile <16 x i8> %v16, <16 x i8> *%ptr -+ store volatile <16 x i8> %v7, <16 x i8> *%ptr -+ store volatile <16 x i8> %v6, <16 x i8> *%ptr -+ store volatile <16 x i8> %v5, <16 x i8> *%ptr -+ store volatile <16 x i8> %v4, <16 x i8> *%ptr -+ store volatile <16 x i8> %v3, <16 x i8> *%ptr -+ store volatile <16 x i8> %v2, <16 x i8> *%ptr -+ store volatile <16 x i8> %v1, <16 x i8> *%ptr -+ store volatile <16 x i8> %v0, <16 x i8> *%ptr -+ ret void -+} -Index: llvm-36/test/CodeGen/SystemZ/frame-20.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/frame-20.ll -@@ -0,0 +1,445 @@ -+; Like frame-03.ll, but for z13. In this case we have 16 more registers -+; available. -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -+ -+; This function should require all FPRs, but no other spill slots. -+; We need to save and restore 8 of the 16 FPRs, so the frame size -+; should be exactly 160 + 8 * 8 = 224. The CFA offset is 160 -+; (the caller-allocated part of the frame) + 224. -+define void @f1(double *%ptr) { -+; CHECK-LABEL: f1: -+; CHECK: aghi %r15, -224 -+; CHECK: .cfi_def_cfa_offset 384 -+; CHECK: std %f8, 216(%r15) -+; CHECK: std %f9, 208(%r15) -+; CHECK: std %f10, 200(%r15) -+; CHECK: std %f11, 192(%r15) -+; CHECK: std %f12, 184(%r15) -+; CHECK: std %f13, 176(%r15) -+; CHECK: std %f14, 168(%r15) -+; CHECK: std %f15, 160(%r15) -+; CHECK: .cfi_offset %f8, -168 -+; CHECK: .cfi_offset %f9, -176 -+; CHECK: .cfi_offset %f10, -184 -+; CHECK: .cfi_offset %f11, -192 -+; CHECK: .cfi_offset %f12, -200 -+; CHECK: .cfi_offset %f13, -208 -+; CHECK: .cfi_offset %f14, -216 -+; CHECK: .cfi_offset %f15, -224 -+; CHECK-DAG: ld %f0, 0(%r2) -+; CHECK-DAG: ld %f7, 0(%r2) -+; CHECK-DAG: ld %f8, 0(%r2) -+; CHECK-DAG: ld %f15, 0(%r2) -+; CHECK-DAG: vlrepg %v16, 0(%r2) -+; CHECK-DAG: vlrepg %v23, 0(%r2) -+; CHECK-DAG: vlrepg %v24, 0(%r2) -+; CHECK-DAG: vlrepg %v31, 0(%r2) -+; CHECK: ld %f8, 216(%r15) -+; CHECK: ld %f9, 208(%r15) -+; CHECK: ld %f10, 200(%r15) -+; CHECK: ld %f11, 192(%r15) -+; CHECK: ld %f12, 184(%r15) -+; CHECK: ld %f13, 176(%r15) -+; CHECK: ld %f14, 168(%r15) -+; CHECK: ld %f15, 160(%r15) -+; CHECK: aghi %r15, 224 -+; CHECK: br %r14 -+ %l0 = load volatile double *%ptr -+ %l1 = load volatile double *%ptr -+ %l2 = load volatile double *%ptr -+ %l3 = load volatile double *%ptr -+ %l4 = load volatile double *%ptr -+ %l5 = load volatile double *%ptr -+ %l6 = load volatile double *%ptr -+ %l7 = load volatile double *%ptr -+ %l8 = load volatile double *%ptr -+ %l9 = load volatile double *%ptr -+ %l10 = load volatile double *%ptr -+ %l11 = load volatile double *%ptr -+ %l12 = load volatile double *%ptr -+ %l13 = load volatile double *%ptr -+ %l14 = load volatile double *%ptr -+ %l15 = load volatile double *%ptr -+ %l16 = load volatile double *%ptr -+ %l17 = load volatile double *%ptr -+ %l18 = load volatile double *%ptr -+ %l19 = load volatile double *%ptr -+ %l20 = load volatile double *%ptr -+ %l21 = load volatile double *%ptr -+ %l22 = load volatile double *%ptr -+ %l23 = load volatile double *%ptr -+ %l24 = load volatile double *%ptr -+ %l25 = load volatile double *%ptr -+ %l26 = load volatile double *%ptr -+ %l27 = load volatile double *%ptr -+ %l28 = load volatile double *%ptr -+ %l29 = load volatile double *%ptr -+ %l30 = load volatile double *%ptr -+ %l31 = load volatile double *%ptr -+ %acc0 = fsub double %l0, %l0 -+ %acc1 = fsub double %l1, %acc0 -+ %acc2 = fsub double %l2, %acc1 -+ %acc3 = fsub double %l3, %acc2 -+ %acc4 = fsub double %l4, %acc3 -+ %acc5 = fsub double %l5, %acc4 -+ %acc6 = fsub double %l6, %acc5 -+ %acc7 = fsub double %l7, %acc6 -+ %acc8 = fsub double %l8, %acc7 -+ %acc9 = fsub double %l9, %acc8 -+ %acc10 = fsub double %l10, %acc9 -+ %acc11 = fsub double %l11, %acc10 -+ %acc12 = fsub double %l12, %acc11 -+ %acc13 = fsub double %l13, %acc12 -+ %acc14 = fsub double %l14, %acc13 -+ %acc15 = fsub double %l15, %acc14 -+ %acc16 = fsub double %l16, %acc15 -+ %acc17 = fsub double %l17, %acc16 -+ %acc18 = fsub double %l18, %acc17 -+ %acc19 = fsub double %l19, %acc18 -+ %acc20 = fsub double %l20, %acc19 -+ %acc21 = fsub double %l21, %acc20 -+ %acc22 = fsub double %l22, %acc21 -+ %acc23 = fsub double %l23, %acc22 -+ %acc24 = fsub double %l24, %acc23 -+ %acc25 = fsub double %l25, %acc24 -+ %acc26 = fsub double %l26, %acc25 -+ %acc27 = fsub double %l27, %acc26 -+ %acc28 = fsub double %l28, %acc27 -+ %acc29 = fsub double %l29, %acc28 -+ %acc30 = fsub double %l30, %acc29 -+ %acc31 = fsub double %l31, %acc30 -+ store volatile double %acc0, double *%ptr -+ store volatile double %acc1, double *%ptr -+ store volatile double %acc2, double *%ptr -+ store volatile double %acc3, double *%ptr -+ store volatile double %acc4, double *%ptr -+ store volatile double %acc5, double *%ptr -+ store volatile double %acc6, double *%ptr -+ store volatile double %acc7, double *%ptr -+ store volatile double %acc8, double *%ptr -+ store volatile double %acc9, double *%ptr -+ store volatile double %acc10, double *%ptr -+ store volatile double %acc11, double *%ptr -+ store volatile double %acc12, double *%ptr -+ store volatile double %acc13, double *%ptr -+ store volatile double %acc14, double *%ptr -+ store volatile double %acc15, double *%ptr -+ store volatile double %acc16, double *%ptr -+ store volatile double %acc17, double *%ptr -+ store volatile double %acc18, double *%ptr -+ store volatile double %acc19, double *%ptr -+ store volatile double %acc20, double *%ptr -+ store volatile double %acc21, double *%ptr -+ store volatile double %acc22, double *%ptr -+ store volatile double %acc23, double *%ptr -+ store volatile double %acc24, double *%ptr -+ store volatile double %acc25, double *%ptr -+ store volatile double %acc26, double *%ptr -+ store volatile double %acc27, double *%ptr -+ store volatile double %acc28, double *%ptr -+ store volatile double %acc29, double *%ptr -+ store volatile double %acc30, double *%ptr -+ store volatile double %acc31, double *%ptr -+ ret void -+} -+ -+; Like f1, but requires one fewer FPR. We allocate in numerical order, -+; so %f15 is the one that gets dropped. -+define void @f2(double *%ptr) { -+; CHECK-LABEL: f2: -+; CHECK: aghi %r15, -216 -+; CHECK: .cfi_def_cfa_offset 376 -+; CHECK: std %f8, 208(%r15) -+; CHECK: std %f9, 200(%r15) -+; CHECK: std %f10, 192(%r15) -+; CHECK: std %f11, 184(%r15) -+; CHECK: std %f12, 176(%r15) -+; CHECK: std %f13, 168(%r15) -+; CHECK: std %f14, 160(%r15) -+; CHECK: .cfi_offset %f8, -168 -+; CHECK: .cfi_offset %f9, -176 -+; CHECK: .cfi_offset %f10, -184 -+; CHECK: .cfi_offset %f11, -192 -+; CHECK: .cfi_offset %f12, -200 -+; CHECK: .cfi_offset %f13, -208 -+; CHECK: .cfi_offset %f14, -216 -+; CHECK-NOT: %v15 -+; CHECK-NOT: %f15 -+; CHECK: ld %f8, 208(%r15) -+; CHECK: ld %f9, 200(%r15) -+; CHECK: ld %f10, 192(%r15) -+; CHECK: ld %f11, 184(%r15) -+; CHECK: ld %f12, 176(%r15) -+; CHECK: ld %f13, 168(%r15) -+; CHECK: ld %f14, 160(%r15) -+; CHECK: aghi %r15, 216 -+; CHECK: br %r14 -+ %l0 = load volatile double *%ptr -+ %l1 = load volatile double *%ptr -+ %l2 = load volatile double *%ptr -+ %l3 = load volatile double *%ptr -+ %l4 = load volatile double *%ptr -+ %l5 = load volatile double *%ptr -+ %l6 = load volatile double *%ptr -+ %l7 = load volatile double *%ptr -+ %l8 = load volatile double *%ptr -+ %l9 = load volatile double *%ptr -+ %l10 = load volatile double *%ptr -+ %l11 = load volatile double *%ptr -+ %l12 = load volatile double *%ptr -+ %l13 = load volatile double *%ptr -+ %l14 = load volatile double *%ptr -+ %l16 = load volatile double *%ptr -+ %l17 = load volatile double *%ptr -+ %l18 = load volatile double *%ptr -+ %l19 = load volatile double *%ptr -+ %l20 = load volatile double *%ptr -+ %l21 = load volatile double *%ptr -+ %l22 = load volatile double *%ptr -+ %l23 = load volatile double *%ptr -+ %l24 = load volatile double *%ptr -+ %l25 = load volatile double *%ptr -+ %l26 = load volatile double *%ptr -+ %l27 = load volatile double *%ptr -+ %l28 = load volatile double *%ptr -+ %l29 = load volatile double *%ptr -+ %l30 = load volatile double *%ptr -+ %l31 = load volatile double *%ptr -+ %acc0 = fsub double %l0, %l0 -+ %acc1 = fsub double %l1, %acc0 -+ %acc2 = fsub double %l2, %acc1 -+ %acc3 = fsub double %l3, %acc2 -+ %acc4 = fsub double %l4, %acc3 -+ %acc5 = fsub double %l5, %acc4 -+ %acc6 = fsub double %l6, %acc5 -+ %acc7 = fsub double %l7, %acc6 -+ %acc8 = fsub double %l8, %acc7 -+ %acc9 = fsub double %l9, %acc8 -+ %acc10 = fsub double %l10, %acc9 -+ %acc11 = fsub double %l11, %acc10 -+ %acc12 = fsub double %l12, %acc11 -+ %acc13 = fsub double %l13, %acc12 -+ %acc14 = fsub double %l14, %acc13 -+ %acc16 = fsub double %l16, %acc14 -+ %acc17 = fsub double %l17, %acc16 -+ %acc18 = fsub double %l18, %acc17 -+ %acc19 = fsub double %l19, %acc18 -+ %acc20 = fsub double %l20, %acc19 -+ %acc21 = fsub double %l21, %acc20 -+ %acc22 = fsub double %l22, %acc21 -+ %acc23 = fsub double %l23, %acc22 -+ %acc24 = fsub double %l24, %acc23 -+ %acc25 = fsub double %l25, %acc24 -+ %acc26 = fsub double %l26, %acc25 -+ %acc27 = fsub double %l27, %acc26 -+ %acc28 = fsub double %l28, %acc27 -+ %acc29 = fsub double %l29, %acc28 -+ %acc30 = fsub double %l30, %acc29 -+ %acc31 = fsub double %l31, %acc30 -+ store volatile double %acc0, double *%ptr -+ store volatile double %acc1, double *%ptr -+ store volatile double %acc2, double *%ptr -+ store volatile double %acc3, double *%ptr -+ store volatile double %acc4, double *%ptr -+ store volatile double %acc5, double *%ptr -+ store volatile double %acc6, double *%ptr -+ store volatile double %acc7, double *%ptr -+ store volatile double %acc8, double *%ptr -+ store volatile double %acc9, double *%ptr -+ store volatile double %acc10, double *%ptr -+ store volatile double %acc11, double *%ptr -+ store volatile double %acc12, double *%ptr -+ store volatile double %acc13, double *%ptr -+ store volatile double %acc14, double *%ptr -+ store volatile double %acc16, double *%ptr -+ store volatile double %acc17, double *%ptr -+ store volatile double %acc18, double *%ptr -+ store volatile double %acc19, double *%ptr -+ store volatile double %acc20, double *%ptr -+ store volatile double %acc21, double *%ptr -+ store volatile double %acc22, double *%ptr -+ store volatile double %acc23, double *%ptr -+ store volatile double %acc24, double *%ptr -+ store volatile double %acc25, double *%ptr -+ store volatile double %acc26, double *%ptr -+ store volatile double %acc27, double *%ptr -+ store volatile double %acc28, double *%ptr -+ store volatile double %acc29, double *%ptr -+ store volatile double %acc30, double *%ptr -+ store volatile double %acc31, double *%ptr -+ ret void -+} -+ -+; Like f1, but should require only one call-saved FPR. -+define void @f3(double *%ptr) { -+; CHECK-LABEL: f3: -+; CHECK: aghi %r15, -168 -+; CHECK: .cfi_def_cfa_offset 328 -+; CHECK: std %f8, 160(%r15) -+; CHECK: .cfi_offset %f8, -168 -+; CHECK-NOT: {{%[fv]9}} -+; CHECK-NOT: {{%[fv]1[0-5]}} -+; CHECK: ld %f8, 160(%r15) -+; CHECK: aghi %r15, 168 -+; CHECK: br %r14 -+ %l0 = load volatile double *%ptr -+ %l1 = load volatile double *%ptr -+ %l2 = load volatile double *%ptr -+ %l3 = load volatile double *%ptr -+ %l4 = load volatile double *%ptr -+ %l5 = load volatile double *%ptr -+ %l6 = load volatile double *%ptr -+ %l7 = load volatile double *%ptr -+ %l8 = load volatile double *%ptr -+ %l16 = load volatile double *%ptr -+ %l17 = load volatile double *%ptr -+ %l18 = load volatile double *%ptr -+ %l19 = load volatile double *%ptr -+ %l20 = load volatile double *%ptr -+ %l21 = load volatile double *%ptr -+ %l22 = load volatile double *%ptr -+ %l23 = load volatile double *%ptr -+ %l24 = load volatile double *%ptr -+ %l25 = load volatile double *%ptr -+ %l26 = load volatile double *%ptr -+ %l27 = load volatile double *%ptr -+ %l28 = load volatile double *%ptr -+ %l29 = load volatile double *%ptr -+ %l30 = load volatile double *%ptr -+ %l31 = load volatile double *%ptr -+ %acc0 = fsub double %l0, %l0 -+ %acc1 = fsub double %l1, %acc0 -+ %acc2 = fsub double %l2, %acc1 -+ %acc3 = fsub double %l3, %acc2 -+ %acc4 = fsub double %l4, %acc3 -+ %acc5 = fsub double %l5, %acc4 -+ %acc6 = fsub double %l6, %acc5 -+ %acc7 = fsub double %l7, %acc6 -+ %acc8 = fsub double %l8, %acc7 -+ %acc16 = fsub double %l16, %acc8 -+ %acc17 = fsub double %l17, %acc16 -+ %acc18 = fsub double %l18, %acc17 -+ %acc19 = fsub double %l19, %acc18 -+ %acc20 = fsub double %l20, %acc19 -+ %acc21 = fsub double %l21, %acc20 -+ %acc22 = fsub double %l22, %acc21 -+ %acc23 = fsub double %l23, %acc22 -+ %acc24 = fsub double %l24, %acc23 -+ %acc25 = fsub double %l25, %acc24 -+ %acc26 = fsub double %l26, %acc25 -+ %acc27 = fsub double %l27, %acc26 -+ %acc28 = fsub double %l28, %acc27 -+ %acc29 = fsub double %l29, %acc28 -+ %acc30 = fsub double %l30, %acc29 -+ %acc31 = fsub double %l31, %acc30 -+ store volatile double %acc0, double *%ptr -+ store volatile double %acc1, double *%ptr -+ store volatile double %acc2, double *%ptr -+ store volatile double %acc3, double *%ptr -+ store volatile double %acc4, double *%ptr -+ store volatile double %acc5, double *%ptr -+ store volatile double %acc6, double *%ptr -+ store volatile double %acc7, double *%ptr -+ store volatile double %acc8, double *%ptr -+ store volatile double %acc16, double *%ptr -+ store volatile double %acc17, double *%ptr -+ store volatile double %acc18, double *%ptr -+ store volatile double %acc19, double *%ptr -+ store volatile double %acc20, double *%ptr -+ store volatile double %acc21, double *%ptr -+ store volatile double %acc22, double *%ptr -+ store volatile double %acc23, double *%ptr -+ store volatile double %acc24, double *%ptr -+ store volatile double %acc25, double *%ptr -+ store volatile double %acc26, double *%ptr -+ store volatile double %acc27, double *%ptr -+ store volatile double %acc28, double *%ptr -+ store volatile double %acc29, double *%ptr -+ store volatile double %acc30, double *%ptr -+ store volatile double %acc31, double *%ptr -+ ret void -+} -+ -+; This function should use all call-clobbered FPRs and vector registers -+; but no call-saved ones. It shouldn't need to create a frame. -+define void @f4(double *%ptr) { -+; CHECK-LABEL: f4: -+; CHECK-NOT: %r15 -+; CHECK-NOT: {{%[fv][89]}} -+; CHECK-NOT: {{%[fv]1[0-5]}} -+; CHECK: br %r14 -+ %l0 = load volatile double *%ptr -+ %l1 = load volatile double *%ptr -+ %l2 = load volatile double *%ptr -+ %l3 = load volatile double *%ptr -+ %l4 = load volatile double *%ptr -+ %l5 = load volatile double *%ptr -+ %l6 = load volatile double *%ptr -+ %l7 = load volatile double *%ptr -+ %l16 = load volatile double *%ptr -+ %l17 = load volatile double *%ptr -+ %l18 = load volatile double *%ptr -+ %l19 = load volatile double *%ptr -+ %l20 = load volatile double *%ptr -+ %l21 = load volatile double *%ptr -+ %l22 = load volatile double *%ptr -+ %l23 = load volatile double *%ptr -+ %l24 = load volatile double *%ptr -+ %l25 = load volatile double *%ptr -+ %l26 = load volatile double *%ptr -+ %l27 = load volatile double *%ptr -+ %l28 = load volatile double *%ptr -+ %l29 = load volatile double *%ptr -+ %l30 = load volatile double *%ptr -+ %l31 = load volatile double *%ptr -+ %acc0 = fsub double %l0, %l0 -+ %acc1 = fsub double %l1, %acc0 -+ %acc2 = fsub double %l2, %acc1 -+ %acc3 = fsub double %l3, %acc2 -+ %acc4 = fsub double %l4, %acc3 -+ %acc5 = fsub double %l5, %acc4 -+ %acc6 = fsub double %l6, %acc5 -+ %acc7 = fsub double %l7, %acc6 -+ %acc16 = fsub double %l16, %acc7 -+ %acc17 = fsub double %l17, %acc16 -+ %acc18 = fsub double %l18, %acc17 -+ %acc19 = fsub double %l19, %acc18 -+ %acc20 = fsub double %l20, %acc19 -+ %acc21 = fsub double %l21, %acc20 -+ %acc22 = fsub double %l22, %acc21 -+ %acc23 = fsub double %l23, %acc22 -+ %acc24 = fsub double %l24, %acc23 -+ %acc25 = fsub double %l25, %acc24 -+ %acc26 = fsub double %l26, %acc25 -+ %acc27 = fsub double %l27, %acc26 -+ %acc28 = fsub double %l28, %acc27 -+ %acc29 = fsub double %l29, %acc28 -+ %acc30 = fsub double %l30, %acc29 -+ %acc31 = fsub double %l31, %acc30 -+ store volatile double %acc0, double *%ptr -+ store volatile double %acc1, double *%ptr -+ store volatile double %acc2, double *%ptr -+ store volatile double %acc3, double *%ptr -+ store volatile double %acc4, double *%ptr -+ store volatile double %acc5, double *%ptr -+ store volatile double %acc6, double *%ptr -+ store volatile double %acc7, double *%ptr -+ store volatile double %acc16, double *%ptr -+ store volatile double %acc17, double *%ptr -+ store volatile double %acc18, double *%ptr -+ store volatile double %acc19, double *%ptr -+ store volatile double %acc20, double *%ptr -+ store volatile double %acc21, double *%ptr -+ store volatile double %acc22, double *%ptr -+ store volatile double %acc23, double *%ptr -+ store volatile double %acc24, double *%ptr -+ store volatile double %acc25, double *%ptr -+ store volatile double %acc26, double *%ptr -+ store volatile double %acc27, double *%ptr -+ store volatile double %acc28, double *%ptr -+ store volatile double %acc29, double *%ptr -+ store volatile double %acc30, double *%ptr -+ store volatile double %acc31, double *%ptr -+ ret void -+} -Index: llvm-36/test/CodeGen/SystemZ/htm-intrinsics.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/htm-intrinsics.ll -@@ -0,0 +1,352 @@ -+; Test transactional-execution intrinsics. -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=zEC12 | FileCheck %s -+ -+declare i32 @llvm.s390.tbegin(i8 *, i32) -+declare i32 @llvm.s390.tbegin.nofloat(i8 *, i32) -+declare void @llvm.s390.tbeginc(i8 *, i32) -+declare i32 @llvm.s390.tend() -+declare void @llvm.s390.tabort(i64) -+declare void @llvm.s390.ntstg(i64, i64 *) -+declare i32 @llvm.s390.etnd() -+declare void @llvm.s390.ppa.txassist(i32) -+ -+; TBEGIN. -+define void @test_tbegin() { -+; CHECK-LABEL: test_tbegin: -+; CHECK-NOT: stmg -+; CHECK: std %f8, -+; CHECK: std %f9, -+; CHECK: std %f10, -+; CHECK: std %f11, -+; CHECK: std %f12, -+; CHECK: std %f13, -+; CHECK: std %f14, -+; CHECK: std %f15, -+; CHECK: tbegin 0, 65292 -+; CHECK: ld %f8, -+; CHECK: ld %f9, -+; CHECK: ld %f10, -+; CHECK: ld %f11, -+; CHECK: ld %f12, -+; CHECK: ld %f13, -+; CHECK: ld %f14, -+; CHECK: ld %f15, -+; CHECK: br %r14 -+ call i32 @llvm.s390.tbegin(i8 *null, i32 65292) -+ ret void -+} -+ -+; TBEGIN (nofloat). -+define void @test_tbegin_nofloat1() { -+; CHECK-LABEL: test_tbegin_nofloat1: -+; CHECK-NOT: stmg -+; CHECK-NOT: std -+; CHECK: tbegin 0, 65292 -+; CHECK: br %r14 -+ call i32 @llvm.s390.tbegin.nofloat(i8 *null, i32 65292) -+ ret void -+} -+ -+; TBEGIN (nofloat) with integer CC return value. -+define i32 @test_tbegin_nofloat2() { -+; CHECK-LABEL: test_tbegin_nofloat2: -+; CHECK-NOT: stmg -+; CHECK-NOT: std -+; CHECK: tbegin 0, 65292 -+; CHECK: ipm %r2 -+; CHECK: srl %r2, 28 -+; CHECK: br %r14 -+ %res = call i32 @llvm.s390.tbegin.nofloat(i8 *null, i32 65292) -+ ret i32 %res -+} -+ -+; TBEGIN (nofloat) with implicit CC check. -+define void @test_tbegin_nofloat3(i32 *%ptr) { -+; CHECK-LABEL: test_tbegin_nofloat3: -+; CHECK-NOT: stmg -+; CHECK-NOT: std -+; CHECK: tbegin 0, 65292 -+; CHECK: jnh {{\.L*}} -+; CHECK: mvhi 0(%r2), 0 -+; CHECK: br %r14 -+ %res = call i32 @llvm.s390.tbegin.nofloat(i8 *null, i32 65292) -+ %cmp = icmp eq i32 %res, 2 -+ br i1 %cmp, label %if.then, label %if.end -+ -+if.then: ; preds = %entry -+ store i32 0, i32* %ptr, align 4 -+ br label %if.end -+ -+if.end: ; preds = %if.then, %entry -+ ret void -+} -+ -+; TBEGIN (nofloat) with dual CC use. -+define i32 @test_tbegin_nofloat4(i32 %pad, i32 *%ptr) { -+; CHECK-LABEL: test_tbegin_nofloat4: -+; CHECK-NOT: stmg -+; CHECK-NOT: std -+; CHECK: tbegin 0, 65292 -+; CHECK: ipm %r2 -+; CHECK: srl %r2, 28 -+; CHECK: cijlh %r2, 2, {{\.L*}} -+; CHECK: mvhi 0(%r3), 0 -+; CHECK: br %r14 -+ %res = call i32 @llvm.s390.tbegin.nofloat(i8 *null, i32 65292) -+ %cmp = icmp eq i32 %res, 2 -+ br i1 %cmp, label %if.then, label %if.end -+ -+if.then: ; preds = %entry -+ store i32 0, i32* %ptr, align 4 -+ br label %if.end -+ -+if.end: ; preds = %if.then, %entry -+ ret i32 %res -+} -+ -+; TBEGIN (nofloat) with register. -+define void @test_tbegin_nofloat5(i8 *%ptr) { -+; CHECK-LABEL: test_tbegin_nofloat5: -+; CHECK-NOT: stmg -+; CHECK-NOT: std -+; CHECK: tbegin 0(%r2), 65292 -+; CHECK: br %r14 -+ call i32 @llvm.s390.tbegin.nofloat(i8 *%ptr, i32 65292) -+ ret void -+} -+ -+; TBEGIN (nofloat) with GRSM 0x0f00. -+define void @test_tbegin_nofloat6() { -+; CHECK-LABEL: test_tbegin_nofloat6: -+; CHECK: stmg %r6, %r15, -+; CHECK-NOT: std -+; CHECK: tbegin 0, 3840 -+; CHECK: br %r14 -+ call i32 @llvm.s390.tbegin.nofloat(i8 *null, i32 3840) -+ ret void -+} -+ -+; TBEGIN (nofloat) with GRSM 0xf100. -+define void @test_tbegin_nofloat7() { -+; CHECK-LABEL: test_tbegin_nofloat7: -+; CHECK: stmg %r8, %r15, -+; CHECK-NOT: std -+; CHECK: tbegin 0, 61696 -+; CHECK: br %r14 -+ call i32 @llvm.s390.tbegin.nofloat(i8 *null, i32 61696) -+ ret void -+} -+ -+; TBEGIN (nofloat) with GRSM 0xfe00 -- stack pointer added automatically. -+define void @test_tbegin_nofloat8() { -+; CHECK-LABEL: test_tbegin_nofloat8: -+; CHECK-NOT: stmg -+; CHECK-NOT: std -+; CHECK: tbegin 0, 65280 -+; CHECK: br %r14 -+ call i32 @llvm.s390.tbegin.nofloat(i8 *null, i32 65024) -+ ret void -+} -+ -+; TBEGIN (nofloat) with GRSM 0xfb00 -- no frame pointer needed. -+define void @test_tbegin_nofloat9() { -+; CHECK-LABEL: test_tbegin_nofloat9: -+; CHECK: stmg %r10, %r15, -+; CHECK-NOT: std -+; CHECK: tbegin 0, 64256 -+; CHECK: br %r14 -+ call i32 @llvm.s390.tbegin.nofloat(i8 *null, i32 64256) -+ ret void -+} -+ -+; TBEGIN (nofloat) with GRSM 0xfb00 -- frame pointer added automatically. -+define void @test_tbegin_nofloat10(i64 %n) { -+; CHECK-LABEL: test_tbegin_nofloat10: -+; CHECK: stmg %r11, %r15, -+; CHECK-NOT: std -+; CHECK: tbegin 0, 65280 -+; CHECK: br %r14 -+ %buf = alloca i8, i64 %n -+ call i32 @llvm.s390.tbegin.nofloat(i8 *null, i32 64256) -+ ret void -+} -+ -+; TBEGINC. -+define void @test_tbeginc() { -+; CHECK-LABEL: test_tbeginc: -+; CHECK-NOT: stmg -+; CHECK-NOT: std -+; CHECK: tbeginc 0, 65288 -+; CHECK: br %r14 -+ call void @llvm.s390.tbeginc(i8 *null, i32 65288) -+ ret void -+} -+ -+; TEND with integer CC return value. -+define i32 @test_tend1() { -+; CHECK-LABEL: test_tend1: -+; CHECK: tend -+; CHECK: ipm %r2 -+; CHECK: srl %r2, 28 -+; CHECK: br %r14 -+ %res = call i32 @llvm.s390.tend() -+ ret i32 %res -+} -+ -+; TEND with implicit CC check. -+define void @test_tend3(i32 *%ptr) { -+; CHECK-LABEL: test_tend3: -+; CHECK: tend -+; CHECK: je {{\.L*}} -+; CHECK: mvhi 0(%r2), 0 -+; CHECK: br %r14 -+ %res = call i32 @llvm.s390.tend() -+ %cmp = icmp eq i32 %res, 2 -+ br i1 %cmp, label %if.then, label %if.end -+ -+if.then: ; preds = %entry -+ store i32 0, i32* %ptr, align 4 -+ br label %if.end -+ -+if.end: ; preds = %if.then, %entry -+ ret void -+} -+ -+; TEND with dual CC use. -+define i32 @test_tend2(i32 %pad, i32 *%ptr) { -+; CHECK-LABEL: test_tend2: -+; CHECK: tend -+; CHECK: ipm %r2 -+; CHECK: srl %r2, 28 -+; CHECK: cijlh %r2, 2, {{\.L*}} -+; CHECK: mvhi 0(%r3), 0 -+; CHECK: br %r14 -+ %res = call i32 @llvm.s390.tend() -+ %cmp = icmp eq i32 %res, 2 -+ br i1 %cmp, label %if.then, label %if.end -+ -+if.then: ; preds = %entry -+ store i32 0, i32* %ptr, align 4 -+ br label %if.end -+ -+if.end: ; preds = %if.then, %entry -+ ret i32 %res -+} -+ -+; TABORT with register only. -+define void @test_tabort1(i64 %val) { -+; CHECK-LABEL: test_tabort1: -+; CHECK: tabort 0(%r2) -+; CHECK: br %r14 -+ call void @llvm.s390.tabort(i64 %val) -+ ret void -+} -+ -+; TABORT with immediate only. -+define void @test_tabort2(i64 %val) { -+; CHECK-LABEL: test_tabort2: -+; CHECK: tabort 1234 -+; CHECK: br %r14 -+ call void @llvm.s390.tabort(i64 1234) -+ ret void -+} -+ -+; TABORT with register + immediate. -+define void @test_tabort3(i64 %val) { -+; CHECK-LABEL: test_tabort3: -+; CHECK: tabort 1234(%r2) -+; CHECK: br %r14 -+ %sum = add i64 %val, 1234 -+ call void @llvm.s390.tabort(i64 %sum) -+ ret void -+} -+ -+; TABORT with out-of-range immediate. -+define void @test_tabort4(i64 %val) { -+; CHECK-LABEL: test_tabort4: -+; CHECK: tabort 0({{%r[1-5]}}) -+; CHECK: br %r14 -+ call void @llvm.s390.tabort(i64 4096) -+ ret void -+} -+ -+; NTSTG with base pointer only. -+define void @test_ntstg1(i64 *%ptr, i64 %val) { -+; CHECK-LABEL: test_ntstg1: -+; CHECK: ntstg %r3, 0(%r2) -+; CHECK: br %r14 -+ call void @llvm.s390.ntstg(i64 %val, i64 *%ptr) -+ ret void -+} -+ -+; NTSTG with base and index. -+; Check that VSTL doesn't allow an index. -+define void @test_ntstg2(i64 *%base, i64 %index, i64 %val) { -+; CHECK-LABEL: test_ntstg2: -+; CHECK: sllg [[REG:%r[1-5]]], %r3, 3 -+; CHECK: ntstg %r4, 0([[REG]],%r2) -+; CHECK: br %r14 -+ %ptr = getelementptr i64 *%base, i64 %index -+ call void @llvm.s390.ntstg(i64 %val, i64 *%ptr) -+ ret void -+} -+ -+; NTSTG with the highest in-range displacement. -+define void @test_ntstg3(i64 *%base, i64 %val) { -+; CHECK-LABEL: test_ntstg3: -+; CHECK: ntstg %r3, 524280(%r2) -+; CHECK: br %r14 -+ %ptr = getelementptr i64 *%base, i64 65535 -+ call void @llvm.s390.ntstg(i64 %val, i64 *%ptr) -+ ret void -+} -+ -+; NTSTG with an out-of-range positive displacement. -+define void @test_ntstg4(i64 *%base, i64 %val) { -+; CHECK-LABEL: test_ntstg4: -+; CHECK: ntstg %r3, 0({{%r[1-5]}}) -+; CHECK: br %r14 -+ %ptr = getelementptr i64 *%base, i64 65536 -+ call void @llvm.s390.ntstg(i64 %val, i64 *%ptr) -+ ret void -+} -+ -+; NTSTG with the lowest in-range displacement. -+define void @test_ntstg5(i64 *%base, i64 %val) { -+; CHECK-LABEL: test_ntstg5: -+; CHECK: ntstg %r3, -524288(%r2) -+; CHECK: br %r14 -+ %ptr = getelementptr i64 *%base, i64 -65536 -+ call void @llvm.s390.ntstg(i64 %val, i64 *%ptr) -+ ret void -+} -+ -+; NTSTG with an out-of-range negative displacement. -+define void @test_ntstg6(i64 *%base, i64 %val) { -+; CHECK-LABEL: test_ntstg6: -+; CHECK: ntstg %r3, 0({{%r[1-5]}}) -+; CHECK: br %r14 -+ %ptr = getelementptr i64 *%base, i64 -65537 -+ call void @llvm.s390.ntstg(i64 %val, i64 *%ptr) -+ ret void -+} -+ -+; ETND. -+define i32 @test_etnd() { -+; CHECK-LABEL: test_etnd: -+; CHECK: etnd %r2 -+; CHECK: br %r14 -+ %res = call i32 @llvm.s390.etnd() -+ ret i32 %res -+} -+ -+; PPA (Transaction-Abort Assist) -+define void @test_ppa_txassist(i32 %val) { -+; CHECK-LABEL: test_ppa_txassist: -+; CHECK: ppa %r2, 0, 1 -+; CHECK: br %r14 -+ call void @llvm.s390.ppa.txassist(i32 %val) -+ ret void -+} -+ -Index: llvm-36/test/CodeGen/SystemZ/int-cmp-12.ll -=================================================================== ---- llvm-36.orig/test/CodeGen/SystemZ/int-cmp-12.ll -+++ llvm-36/test/CodeGen/SystemZ/int-cmp-12.ll -@@ -49,13 +49,24 @@ define double @f4(double %a, double %b, - ret double %res - } - --; Check the next value up, which must use a register comparison. -+; Check the next value up, which can use a shifted comparison - define double @f5(double %a, double %b, i64 %i1) { - ; CHECK-LABEL: f5: --; CHECK: clgrjl %r2, -+; CHECK: srlg [[REG:%r[0-5]]], %r2, 32 -+; CHECK: cgije [[REG]], 0 - ; CHECK: ldr %f0, %f2 - ; CHECK: br %r14 - %cond = icmp ult i64 %i1, 4294967296 - %res = select i1 %cond, double %a, double %b - ret double %res - } -+; Check the next value up, which must use a register comparison. -+define double @f6(double %a, double %b, i64 %i1) { -+; CHECK-LABEL: f6: -+; CHECK: clgrjl %r2, -+; CHECK: ldr %f0, %f2 -+; CHECK: br %r14 -+ %cond = icmp ult i64 %i1, 4294967297 -+ %res = select i1 %cond, double %a, double %b -+ ret double %res -+} -Index: llvm-36/test/CodeGen/SystemZ/int-cmp-47.ll -=================================================================== ---- llvm-36.orig/test/CodeGen/SystemZ/int-cmp-47.ll -+++ llvm-36/test/CodeGen/SystemZ/int-cmp-47.ll -@@ -309,7 +309,8 @@ exit: - define void @f17(i64 %a) { - ; CHECK-LABEL: f17: - ; CHECK-NOT: tmhh --; CHECK: llihh {{%r[0-5]}}, 49151 -+; CHECK: srlg [[REG:%r[0-5]]], %r2, 48 -+; CHECK: cgfi [[REG]], 49151 - ; CHECK-NOT: tmhh - ; CHECK: br %r14 - entry: -Index: llvm-36/test/CodeGen/SystemZ/int-cmp-50.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/int-cmp-50.ll -@@ -0,0 +1,30 @@ -+; Verify that we do not crash on always-true conditions -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 -O0 -+; -+; This test was compiled using clang -O0 from the following source code: -+; -+; int test(unsigned long x) -+; { -+; return x >= 0 && x <= 15; -+; } -+ -+define signext i32 @test(i64 %x) { -+entry: -+ %x.addr = alloca i64, align 8 -+ store i64 %x, i64* %x.addr, align 8 -+ %0 = load i64 *%x.addr, align 8 -+ %cmp = icmp uge i64 %0, 0 -+ br i1 %cmp, label %land.rhs, label %land.end -+ -+land.rhs: ; preds = %entry -+ %1 = load i64 *%x.addr, align 8 -+ %cmp1 = icmp ule i64 %1, 15 -+ br label %land.end -+ -+land.end: ; preds = %land.rhs, %entry -+ %2 = phi i1 [ false, %entry ], [ %cmp1, %land.rhs ] -+ %land.ext = zext i1 %2 to i32 -+ ret i32 %land.ext -+} -+ -Index: llvm-36/test/CodeGen/SystemZ/risbg-03.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/risbg-03.ll -@@ -0,0 +1,30 @@ -+; Test use of RISBG vs RISBGN on zEC12. -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=zEC12 | FileCheck %s -+ -+; On zEC12, we generally prefer RISBGN. -+define i64 @f1(i64 %a, i64 %b) { -+; CHECK-LABEL: f1: -+; CHECK: risbgn %r2, %r3, 60, 62, 0 -+; CHECK: br %r14 -+ %anda = and i64 %a, -15 -+ %andb = and i64 %b, 14 -+ %or = or i64 %anda, %andb -+ ret i64 %or -+} -+ -+; But we may fall back to RISBG if we can use the condition code. -+define i64 @f2(i64 %a, i64 %b, i32* %c) { -+; CHECK-LABEL: f2: -+; CHECK: risbg %r2, %r3, 60, 62, 0 -+; CHECK-NEXT: ipm -+; CHECK: br %r14 -+ %anda = and i64 %a, -15 -+ %andb = and i64 %b, 14 -+ %or = or i64 %anda, %andb -+ %cmp = icmp sgt i64 %or, 0 -+ %conv = zext i1 %cmp to i32 -+ store i32 %conv, i32* %c, align 4 -+ ret i64 %or -+} -+ -Index: llvm-36/test/CodeGen/SystemZ/tls-01.ll -=================================================================== ---- llvm-36.orig/test/CodeGen/SystemZ/tls-01.ll -+++ llvm-36/test/CodeGen/SystemZ/tls-01.ll -@@ -1,7 +1,7 @@ --; Test initial-exec TLS accesses. -+; Test local-exec TLS accesses. - ; --; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-MAIN --; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-CP -+; RUN: llc < %s -mcpu=z10 -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-MAIN -+; RUN: llc < %s -mcpu=z10 -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-CP - - @x = thread_local global i32 0 - -Index: llvm-36/test/CodeGen/SystemZ/tls-02.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/tls-02.ll -@@ -0,0 +1,18 @@ -+; Test initial-exec TLS accesses. -+; -+; RUN: llc < %s -mcpu=z10 -mtriple=s390x-linux-gnu -relocation-model=pic | FileCheck %s -check-prefix=CHECK-MAIN -+ -+@x = thread_local(initialexec) global i32 0 -+ -+; The offset must be loaded from the GOT. This TLS access model does -+; not use literal pool constants. -+define i32 *@foo() { -+; CHECK-MAIN-LABEL: foo: -+; CHECK-MAIN: ear [[HIGH:%r[0-5]]], %a0 -+; CHECK-MAIN: sllg %r2, [[HIGH]], 32 -+; CHECK-MAIN: ear %r2, %a1 -+; CHECK-MAIN: larl %r1, x@INDNTPOFF -+; CHECK-MAIN: ag %r2, 0(%r1) -+; CHECK-MAIN: br %r14 -+ ret i32 *@x -+} -Index: llvm-36/test/CodeGen/SystemZ/tls-03.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/tls-03.ll -@@ -0,0 +1,23 @@ -+; Test general-dynamic TLS accesses. -+; -+; RUN: llc < %s -mcpu=z10 -mtriple=s390x-linux-gnu -relocation-model=pic | FileCheck %s -check-prefix=CHECK-MAIN -+; RUN: llc < %s -mcpu=z10 -mtriple=s390x-linux-gnu -relocation-model=pic | FileCheck %s -check-prefix=CHECK-CP -+ -+@x = thread_local global i32 0 -+ -+; Call __tls_get_offset to retrieve the symbol's TLS offset. -+define i32 *@foo() { -+; CHECK-CP: .LCP{{.*}}: -+; CHECK-CP: .quad x@TLSGD -+; -+; CHECK-MAIN-LABEL: foo: -+; CHECK-MAIN-DAG: larl %r12, _GLOBAL_OFFSET_TABLE_ -+; CHECK-MAIN-DAG: lgrl %r2, .LCP{{.*}} -+; CHECK-MAIN: brasl %r14, __tls_get_offset@PLT:tls_gdcall:x -+; CHECK-MAIN: ear [[HIGH:%r[0-5]]], %a0 -+; CHECK-MAIN: sllg [[TP:%r[0-5]]], [[HIGH]], 32 -+; CHECK-MAIN: ear [[TP]], %a1 -+; CHECK-MAIN: agr %r2, [[TP]] -+; CHECK-MAIN: br %r14 -+ ret i32 *@x -+} -Index: llvm-36/test/CodeGen/SystemZ/tls-04.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/tls-04.ll -@@ -0,0 +1,28 @@ -+; Test local-dynamic TLS accesses. -+; -+; RUN: llc < %s -mcpu=z10 -mtriple=s390x-linux-gnu -relocation-model=pic | FileCheck %s -check-prefix=CHECK-MAIN -+; RUN: llc < %s -mcpu=z10 -mtriple=s390x-linux-gnu -relocation-model=pic | FileCheck %s -check-prefix=CHECK-CP -+ -+@x = thread_local(localdynamic) global i32 0 -+ -+; Call __tls_get_offset to retrieve the module's TLS base offset. -+; Add the per-symbol offset and the thread pointer. -+define i32 *@foo() { -+; CHECK-CP: .LCP{{.*}}_0: -+; CHECK-CP: .quad x@TLSLDM -+; CHECK-CP: .LCP{{.*}}_1: -+; CHECK-CP: .quad x@DTPOFF -+; -+; CHECK-MAIN-LABEL: foo: -+; CHECK-MAIN-DAG: larl %r12, _GLOBAL_OFFSET_TABLE_ -+; CHECK-MAIN-DAG: lgrl %r2, .LCP{{.*}}_0 -+; CHECK-MAIN: brasl %r14, __tls_get_offset@PLT:tls_ldcall:x -+; CHECK-MAIN: larl %r1, .LCP{{.*}}_1 -+; CHECK-MAIN: ag %r2, 0(%r1) -+; CHECK-MAIN: ear [[HIGH:%r[0-5]]], %a0 -+; CHECK-MAIN: sllg [[TP:%r[0-5]]], [[HIGH]], 32 -+; CHECK-MAIN: ear [[TP]], %a1 -+; CHECK-MAIN: agr %r2, [[TP]] -+; CHECK-MAIN: br %r14 -+ ret i32 *@x -+} -Index: llvm-36/test/CodeGen/SystemZ/tls-05.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/tls-05.ll -@@ -0,0 +1,15 @@ -+; Test general-dynamic TLS access optimizations. -+; -+; If we access the same TLS variable twice, there should only be -+; a single call to __tls_get_offset. -+; -+; RUN: llc < %s -mcpu=z10 -mtriple=s390x-linux-gnu -relocation-model=pic | grep "__tls_get_offset" | count 1 -+ -+@x = thread_local global i32 0 -+ -+define i32 @foo() { -+ %val = load i32* @x -+ %inc = add nsw i32 %val, 1 -+ store i32 %inc, i32* @x -+ ret i32 %val -+} -Index: llvm-36/test/CodeGen/SystemZ/tls-06.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/tls-06.ll -@@ -0,0 +1,17 @@ -+; Test general-dynamic TLS access optimizations. -+; -+; If we access two different TLS variables, we need two calls to -+; __tls_get_offset, but should load _GLOBAL_OFFSET_TABLE only once. -+; -+; RUN: llc < %s -mcpu=z10 -mtriple=s390x-linux-gnu -relocation-model=pic | grep "__tls_get_offset" | count 2 -+; RUN: llc < %s -mcpu=z10 -mtriple=s390x-linux-gnu -relocation-model=pic | grep "_GLOBAL_OFFSET_TABLE_" | count 1 -+ -+@x = thread_local global i32 0 -+@y = thread_local global i32 0 -+ -+define i32 @foo() { -+ %valx = load i32* @x -+ %valy = load i32* @y -+ %add = add nsw i32 %valx, %valy -+ ret i32 %add -+} -Index: llvm-36/test/CodeGen/SystemZ/tls-07.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/tls-07.ll -@@ -0,0 +1,16 @@ -+; Test local-dynamic TLS access optimizations. -+; -+; If we access two different local-dynamic TLS variables, we only -+; need a single call to __tls_get_offset. -+; -+; RUN: llc < %s -mcpu=z10 -mtriple=s390x-linux-gnu -relocation-model=pic | grep "__tls_get_offset" | count 1 -+ -+@x = thread_local(localdynamic) global i32 0 -+@y = thread_local(localdynamic) global i32 0 -+ -+define i32 @foo() { -+ %valx = load i32* @x -+ %valy = load i32* @y -+ %add = add nsw i32 %valx, %valy -+ ret i32 %add -+} -Index: llvm-36/test/CodeGen/SystemZ/vec-abi-align.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-abi-align.ll -@@ -0,0 +1,49 @@ -+; Verify that we use the vector ABI datalayout if and only if -+; the vector facility is present. -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu | \ -+; RUN: FileCheck -check-prefix=CHECK-NOVECTOR %s -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=generic | \ -+; RUN: FileCheck -check-prefix=CHECK-NOVECTOR %s -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | \ -+; RUN: FileCheck -check-prefix=CHECK-NOVECTOR %s -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | \ -+; RUN: FileCheck -check-prefix=CHECK-NOVECTOR %s -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=zEC12 | \ -+; RUN: FileCheck -check-prefix=CHECK-NOVECTOR %s -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | \ -+; RUN: FileCheck -check-prefix=CHECK-VECTOR %s -+ -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mattr=vector | \ -+; RUN: FileCheck -check-prefix=CHECK-VECTOR %s -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mattr=+vector | \ -+; RUN: FileCheck -check-prefix=CHECK-VECTOR %s -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mattr=-vector,vector | \ -+; RUN: FileCheck -check-prefix=CHECK-VECTOR %s -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mattr=-vector,+vector | \ -+; RUN: FileCheck -check-prefix=CHECK-VECTOR %s -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mattr=-vector | \ -+; RUN: FileCheck -check-prefix=CHECK-NOVECTOR %s -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mattr=vector,-vector | \ -+; RUN: FileCheck -check-prefix=CHECK-NOVECTOR %s -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mattr=+vector,-vector | \ -+; RUN: FileCheck -check-prefix=CHECK-NOVECTOR %s -+ -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 -mattr=-vector | \ -+; RUN: FileCheck -check-prefix=CHECK-NOVECTOR %s -+ -+%struct.S = type { i8, <2 x i64> } -+ -+define void @test(%struct.S* %s) nounwind { -+; CHECK-VECTOR-LABEL: @test -+; CHECK-VECTOR: vl %v0, 8(%r2) -+; CHECK-NOVECTOR-LABEL: @test -+; CHECK-NOVECTOR-DAG: agsi 16(%r2), 1 -+; CHECK-NOVECTOR-DAG: agsi 24(%r2), 1 -+ %ptr = getelementptr %struct.S* %s, i64 0, i32 1 -+ %vec = load <2 x i64>* %ptr -+ %add = add <2 x i64> %vec, -+ store <2 x i64> %add, <2 x i64>* %ptr -+ ret void -+} -+ -Index: llvm-36/test/CodeGen/SystemZ/vec-abs-01.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-abs-01.ll -@@ -0,0 +1,146 @@ -+; Test v16i8 absolute. -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -+ -+; Test with slt. -+define <16 x i8> @f1(<16 x i8> %val) { -+; CHECK-LABEL: f1: -+; CHECK: vlpb %v24, %v24 -+; CHECK: br %r14 -+ %cmp = icmp slt <16 x i8> %val, zeroinitializer -+ %neg = sub <16 x i8> zeroinitializer, %val -+ %ret = select <16 x i1> %cmp, <16 x i8> %neg, <16 x i8> %val -+ ret <16 x i8> %ret -+} -+ -+; Test with sle. -+define <16 x i8> @f2(<16 x i8> %val) { -+; CHECK-LABEL: f2: -+; CHECK: vlpb %v24, %v24 -+; CHECK: br %r14 -+ %cmp = icmp sle <16 x i8> %val, zeroinitializer -+ %neg = sub <16 x i8> zeroinitializer, %val -+ %ret = select <16 x i1> %cmp, <16 x i8> %neg, <16 x i8> %val -+ ret <16 x i8> %ret -+} -+ -+; Test with sgt. -+define <16 x i8> @f3(<16 x i8> %val) { -+; CHECK-LABEL: f3: -+; CHECK: vlpb %v24, %v24 -+; CHECK: br %r14 -+ %cmp = icmp sgt <16 x i8> %val, zeroinitializer -+ %neg = sub <16 x i8> zeroinitializer, %val -+ %ret = select <16 x i1> %cmp, <16 x i8> %val, <16 x i8> %neg -+ ret <16 x i8> %ret -+} -+ -+; Test with sge. -+define <16 x i8> @f4(<16 x i8> %val) { -+; CHECK-LABEL: f4: -+; CHECK: vlpb %v24, %v24 -+; CHECK: br %r14 -+ %cmp = icmp sge <16 x i8> %val, zeroinitializer -+ %neg = sub <16 x i8> zeroinitializer, %val -+ %ret = select <16 x i1> %cmp, <16 x i8> %val, <16 x i8> %neg -+ ret <16 x i8> %ret -+} -+ -+; Test that negative absolute uses VLPB too. There is no vector equivalent -+; of LOAD NEGATIVE. -+define <16 x i8> @f5(<16 x i8> %val) { -+; CHECK-LABEL: f5: -+; CHECK: vlpb [[REG:%v[0-9]+]], %v24 -+; CHECK: vlcb %v24, [[REG]] -+; CHECK: br %r14 -+ %cmp = icmp slt <16 x i8> %val, zeroinitializer -+ %neg = sub <16 x i8> zeroinitializer, %val -+ %abs = select <16 x i1> %cmp, <16 x i8> %neg, <16 x i8> %val -+ %ret = sub <16 x i8> zeroinitializer, %abs -+ ret <16 x i8> %ret -+} -+ -+; Try another form of negative absolute (slt version). -+define <16 x i8> @f6(<16 x i8> %val) { -+; CHECK-LABEL: f6: -+; CHECK: vlpb [[REG:%v[0-9]+]], %v24 -+; CHECK: vlcb %v24, [[REG]] -+; CHECK: br %r14 -+ %cmp = icmp slt <16 x i8> %val, zeroinitializer -+ %neg = sub <16 x i8> zeroinitializer, %val -+ %ret = select <16 x i1> %cmp, <16 x i8> %val, <16 x i8> %neg -+ ret <16 x i8> %ret -+} -+ -+; Test with sle. -+define <16 x i8> @f7(<16 x i8> %val) { -+; CHECK-LABEL: f7: -+; CHECK: vlpb [[REG:%v[0-9]+]], %v24 -+; CHECK: vlcb %v24, [[REG]] -+; CHECK: br %r14 -+ %cmp = icmp sle <16 x i8> %val, zeroinitializer -+ %neg = sub <16 x i8> zeroinitializer, %val -+ %ret = select <16 x i1> %cmp, <16 x i8> %val, <16 x i8> %neg -+ ret <16 x i8> %ret -+} -+ -+; Test with sgt. -+define <16 x i8> @f8(<16 x i8> %val) { -+; CHECK-LABEL: f8: -+; CHECK: vlpb [[REG:%v[0-9]+]], %v24 -+; CHECK: vlcb %v24, [[REG]] -+; CHECK: br %r14 -+ %cmp = icmp sgt <16 x i8> %val, zeroinitializer -+ %neg = sub <16 x i8> zeroinitializer, %val -+ %ret = select <16 x i1> %cmp, <16 x i8> %neg, <16 x i8> %val -+ ret <16 x i8> %ret -+} -+ -+; Test with sge. -+define <16 x i8> @f9(<16 x i8> %val) { -+; CHECK-LABEL: f9: -+; CHECK: vlpb [[REG:%v[0-9]+]], %v24 -+; CHECK: vlcb %v24, [[REG]] -+; CHECK: br %r14 -+ %cmp = icmp sge <16 x i8> %val, zeroinitializer -+ %neg = sub <16 x i8> zeroinitializer, %val -+ %ret = select <16 x i1> %cmp, <16 x i8> %neg, <16 x i8> %val -+ ret <16 x i8> %ret -+} -+ -+; Test with an SRA-based boolean vector. -+define <16 x i8> @f10(<16 x i8> %val) { -+; CHECK-LABEL: f10: -+; CHECK: vlpb %v24, %v24 -+; CHECK: br %r14 -+ %shr = ashr <16 x i8> %val, -+ -+ %neg = sub <16 x i8> zeroinitializer, %val -+ %and1 = and <16 x i8> %shr, %neg -+ %not = xor <16 x i8> %shr, -+ -+ %and2 = and <16 x i8> %not, %val -+ %ret = or <16 x i8> %and1, %and2 -+ ret <16 x i8> %ret -+} -+ -+; ...and again in reverse -+define <16 x i8> @f11(<16 x i8> %val) { -+; CHECK-LABEL: f11: -+; CHECK: vlpb [[REG:%v[0-9]+]], %v24 -+; CHECK: vlcb %v24, [[REG]] -+; CHECK: br %r14 -+ %shr = ashr <16 x i8> %val, -+ -+ %and1 = and <16 x i8> %shr, %val -+ %not = xor <16 x i8> %shr, -+ -+ %neg = sub <16 x i8> zeroinitializer, %val -+ %and2 = and <16 x i8> %not, %neg -+ %ret = or <16 x i8> %and1, %and2 -+ ret <16 x i8> %ret -+} -Index: llvm-36/test/CodeGen/SystemZ/vec-abs-02.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-abs-02.ll -@@ -0,0 +1,142 @@ -+; Test v8i16 absolute. -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -+ -+; Test with slt. -+define <8 x i16> @f1(<8 x i16> %val) { -+; CHECK-LABEL: f1: -+; CHECK: vlph %v24, %v24 -+; CHECK: br %r14 -+ %cmp = icmp slt <8 x i16> %val, zeroinitializer -+ %neg = sub <8 x i16> zeroinitializer, %val -+ %ret = select <8 x i1> %cmp, <8 x i16> %neg, <8 x i16> %val -+ ret <8 x i16> %ret -+} -+ -+; Test with sle. -+define <8 x i16> @f2(<8 x i16> %val) { -+; CHECK-LABEL: f2: -+; CHECK: vlph %v24, %v24 -+; CHECK: br %r14 -+ %cmp = icmp sle <8 x i16> %val, zeroinitializer -+ %neg = sub <8 x i16> zeroinitializer, %val -+ %ret = select <8 x i1> %cmp, <8 x i16> %neg, <8 x i16> %val -+ ret <8 x i16> %ret -+} -+ -+; Test with sgt. -+define <8 x i16> @f3(<8 x i16> %val) { -+; CHECK-LABEL: f3: -+; CHECK: vlph %v24, %v24 -+; CHECK: br %r14 -+ %cmp = icmp sgt <8 x i16> %val, zeroinitializer -+ %neg = sub <8 x i16> zeroinitializer, %val -+ %ret = select <8 x i1> %cmp, <8 x i16> %val, <8 x i16> %neg -+ ret <8 x i16> %ret -+} -+ -+; Test with sge. -+define <8 x i16> @f4(<8 x i16> %val) { -+; CHECK-LABEL: f4: -+; CHECK: vlph %v24, %v24 -+; CHECK: br %r14 -+ %cmp = icmp sge <8 x i16> %val, zeroinitializer -+ %neg = sub <8 x i16> zeroinitializer, %val -+ %ret = select <8 x i1> %cmp, <8 x i16> %val, <8 x i16> %neg -+ ret <8 x i16> %ret -+} -+ -+; Test that negative absolute uses VLPH too. There is no vector equivalent -+; of LOAD NEGATIVE. -+define <8 x i16> @f5(<8 x i16> %val) { -+; CHECK-LABEL: f5: -+; CHECK: vlph [[REG:%v[0-9]+]], %v24 -+; CHECK: vlch %v24, [[REG]] -+; CHECK: br %r14 -+ %cmp = icmp slt <8 x i16> %val, zeroinitializer -+ %neg = sub <8 x i16> zeroinitializer, %val -+ %abs = select <8 x i1> %cmp, <8 x i16> %neg, <8 x i16> %val -+ %ret = sub <8 x i16> zeroinitializer, %abs -+ ret <8 x i16> %ret -+} -+ -+; Try another form of negative absolute (slt version). -+define <8 x i16> @f6(<8 x i16> %val) { -+; CHECK-LABEL: f6: -+; CHECK: vlph [[REG:%v[0-9]+]], %v24 -+; CHECK: vlch %v24, [[REG]] -+; CHECK: br %r14 -+ %cmp = icmp slt <8 x i16> %val, zeroinitializer -+ %neg = sub <8 x i16> zeroinitializer, %val -+ %ret = select <8 x i1> %cmp, <8 x i16> %val, <8 x i16> %neg -+ ret <8 x i16> %ret -+} -+ -+; Test with sle. -+define <8 x i16> @f7(<8 x i16> %val) { -+; CHECK-LABEL: f7: -+; CHECK: vlph [[REG:%v[0-9]+]], %v24 -+; CHECK: vlch %v24, [[REG]] -+; CHECK: br %r14 -+ %cmp = icmp sle <8 x i16> %val, zeroinitializer -+ %neg = sub <8 x i16> zeroinitializer, %val -+ %ret = select <8 x i1> %cmp, <8 x i16> %val, <8 x i16> %neg -+ ret <8 x i16> %ret -+} -+ -+; Test with sgt. -+define <8 x i16> @f8(<8 x i16> %val) { -+; CHECK-LABEL: f8: -+; CHECK: vlph [[REG:%v[0-9]+]], %v24 -+; CHECK: vlch %v24, [[REG]] -+; CHECK: br %r14 -+ %cmp = icmp sgt <8 x i16> %val, zeroinitializer -+ %neg = sub <8 x i16> zeroinitializer, %val -+ %ret = select <8 x i1> %cmp, <8 x i16> %neg, <8 x i16> %val -+ ret <8 x i16> %ret -+} -+ -+; Test with sge. -+define <8 x i16> @f9(<8 x i16> %val) { -+; CHECK-LABEL: f9: -+; CHECK: vlph [[REG:%v[0-9]+]], %v24 -+; CHECK: vlch %v24, [[REG]] -+; CHECK: br %r14 -+ %cmp = icmp sge <8 x i16> %val, zeroinitializer -+ %neg = sub <8 x i16> zeroinitializer, %val -+ %ret = select <8 x i1> %cmp, <8 x i16> %neg, <8 x i16> %val -+ ret <8 x i16> %ret -+} -+ -+; Test with an SRA-based boolean vector. -+define <8 x i16> @f10(<8 x i16> %val) { -+; CHECK-LABEL: f10: -+; CHECK: vlph %v24, %v24 -+; CHECK: br %r14 -+ %shr = ashr <8 x i16> %val, -+ -+ %neg = sub <8 x i16> zeroinitializer, %val -+ %and1 = and <8 x i16> %shr, %neg -+ %not = xor <8 x i16> %shr, -+ -+ %and2 = and <8 x i16> %not, %val -+ %ret = or <8 x i16> %and1, %and2 -+ ret <8 x i16> %ret -+} -+ -+; ...and again in reverse -+define <8 x i16> @f11(<8 x i16> %val) { -+; CHECK-LABEL: f11: -+; CHECK: vlph [[REG:%v[0-9]+]], %v24 -+; CHECK: vlch %v24, [[REG]] -+; CHECK: br %r14 -+ %shr = ashr <8 x i16> %val, -+ -+ %and1 = and <8 x i16> %shr, %val -+ %not = xor <8 x i16> %shr, -+ -+ %neg = sub <8 x i16> zeroinitializer, %val -+ %and2 = and <8 x i16> %not, %neg -+ %ret = or <8 x i16> %and1, %and2 -+ ret <8 x i16> %ret -+} -Index: llvm-36/test/CodeGen/SystemZ/vec-abs-03.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-abs-03.ll -@@ -0,0 +1,138 @@ -+; Test v4i32 absolute. -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -+ -+; Test with slt. -+define <4 x i32> @f1(<4 x i32> %val) { -+; CHECK-LABEL: f1: -+; CHECK: vlpf %v24, %v24 -+; CHECK: br %r14 -+ %cmp = icmp slt <4 x i32> %val, zeroinitializer -+ %neg = sub <4 x i32> zeroinitializer, %val -+ %ret = select <4 x i1> %cmp, <4 x i32> %neg, <4 x i32> %val -+ ret <4 x i32> %ret -+} -+ -+; Test with sle. -+define <4 x i32> @f2(<4 x i32> %val) { -+; CHECK-LABEL: f2: -+; CHECK: vlpf %v24, %v24 -+; CHECK: br %r14 -+ %cmp = icmp sle <4 x i32> %val, zeroinitializer -+ %neg = sub <4 x i32> zeroinitializer, %val -+ %ret = select <4 x i1> %cmp, <4 x i32> %neg, <4 x i32> %val -+ ret <4 x i32> %ret -+} -+ -+; Test with sgt. -+define <4 x i32> @f3(<4 x i32> %val) { -+; CHECK-LABEL: f3: -+; CHECK: vlpf %v24, %v24 -+; CHECK: br %r14 -+ %cmp = icmp sgt <4 x i32> %val, zeroinitializer -+ %neg = sub <4 x i32> zeroinitializer, %val -+ %ret = select <4 x i1> %cmp, <4 x i32> %val, <4 x i32> %neg -+ ret <4 x i32> %ret -+} -+ -+; Test with sge. -+define <4 x i32> @f4(<4 x i32> %val) { -+; CHECK-LABEL: f4: -+; CHECK: vlpf %v24, %v24 -+; CHECK: br %r14 -+ %cmp = icmp sge <4 x i32> %val, zeroinitializer -+ %neg = sub <4 x i32> zeroinitializer, %val -+ %ret = select <4 x i1> %cmp, <4 x i32> %val, <4 x i32> %neg -+ ret <4 x i32> %ret -+} -+ -+; Test that negative absolute uses VLPF too. There is no vector equivalent -+; of LOAD NEGATIVE. -+define <4 x i32> @f5(<4 x i32> %val) { -+; CHECK-LABEL: f5: -+; CHECK: vlpf [[REG:%v[0-9]+]], %v24 -+; CHECK: vlcf %v24, [[REG]] -+; CHECK: br %r14 -+ %cmp = icmp slt <4 x i32> %val, zeroinitializer -+ %neg = sub <4 x i32> zeroinitializer, %val -+ %abs = select <4 x i1> %cmp, <4 x i32> %neg, <4 x i32> %val -+ %ret = sub <4 x i32> zeroinitializer, %abs -+ ret <4 x i32> %ret -+} -+ -+; Try another form of negative absolute (slt version). -+define <4 x i32> @f6(<4 x i32> %val) { -+; CHECK-LABEL: f6: -+; CHECK: vlpf [[REG:%v[0-9]+]], %v24 -+; CHECK: vlcf %v24, [[REG]] -+; CHECK: br %r14 -+ %cmp = icmp slt <4 x i32> %val, zeroinitializer -+ %neg = sub <4 x i32> zeroinitializer, %val -+ %ret = select <4 x i1> %cmp, <4 x i32> %val, <4 x i32> %neg -+ ret <4 x i32> %ret -+} -+ -+; Test with sle. -+define <4 x i32> @f7(<4 x i32> %val) { -+; CHECK-LABEL: f7: -+; CHECK: vlpf [[REG:%v[0-9]+]], %v24 -+; CHECK: vlcf %v24, [[REG]] -+; CHECK: br %r14 -+ %cmp = icmp sle <4 x i32> %val, zeroinitializer -+ %neg = sub <4 x i32> zeroinitializer, %val -+ %ret = select <4 x i1> %cmp, <4 x i32> %val, <4 x i32> %neg -+ ret <4 x i32> %ret -+} -+ -+; Test with sgt. -+define <4 x i32> @f8(<4 x i32> %val) { -+; CHECK-LABEL: f8: -+; CHECK: vlpf [[REG:%v[0-9]+]], %v24 -+; CHECK: vlcf %v24, [[REG]] -+; CHECK: br %r14 -+ %cmp = icmp sgt <4 x i32> %val, zeroinitializer -+ %neg = sub <4 x i32> zeroinitializer, %val -+ %ret = select <4 x i1> %cmp, <4 x i32> %neg, <4 x i32> %val -+ ret <4 x i32> %ret -+} -+ -+; Test with sge. -+define <4 x i32> @f9(<4 x i32> %val) { -+; CHECK-LABEL: f9: -+; CHECK: vlpf [[REG:%v[0-9]+]], %v24 -+; CHECK: vlcf %v24, [[REG]] -+; CHECK: br %r14 -+ %cmp = icmp sge <4 x i32> %val, zeroinitializer -+ %neg = sub <4 x i32> zeroinitializer, %val -+ %ret = select <4 x i1> %cmp, <4 x i32> %neg, <4 x i32> %val -+ ret <4 x i32> %ret -+} -+ -+; Test with an SRA-based boolean vector. -+define <4 x i32> @f10(<4 x i32> %val) { -+; CHECK-LABEL: f10: -+; CHECK: vlpf %v24, %v24 -+; CHECK: br %r14 -+ %shr = ashr <4 x i32> %val, -+ %neg = sub <4 x i32> zeroinitializer, %val -+ %and1 = and <4 x i32> %shr, %neg -+ %not = xor <4 x i32> %shr, -+ %and2 = and <4 x i32> %not, %val -+ %ret = or <4 x i32> %and1, %and2 -+ ret <4 x i32> %ret -+} -+ -+; ...and again in reverse -+define <4 x i32> @f11(<4 x i32> %val) { -+; CHECK-LABEL: f11: -+; CHECK: vlpf [[REG:%v[0-9]+]], %v24 -+; CHECK: vlcf %v24, [[REG]] -+; CHECK: br %r14 -+ %shr = ashr <4 x i32> %val, -+ %and1 = and <4 x i32> %shr, %val -+ %not = xor <4 x i32> %shr, -+ %neg = sub <4 x i32> zeroinitializer, %val -+ %and2 = and <4 x i32> %not, %neg -+ %ret = or <4 x i32> %and1, %and2 -+ ret <4 x i32> %ret -+} -Index: llvm-36/test/CodeGen/SystemZ/vec-abs-04.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-abs-04.ll -@@ -0,0 +1,138 @@ -+; Test v2i64 absolute. -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -+ -+; Test with slt. -+define <2 x i64> @f1(<2 x i64> %val) { -+; CHECK-LABEL: f1: -+; CHECK: vlpg %v24, %v24 -+; CHECK: br %r14 -+ %cmp = icmp slt <2 x i64> %val, zeroinitializer -+ %neg = sub <2 x i64> zeroinitializer, %val -+ %ret = select <2 x i1> %cmp, <2 x i64> %neg, <2 x i64> %val -+ ret <2 x i64> %ret -+} -+ -+; Test with sle. -+define <2 x i64> @f2(<2 x i64> %val) { -+; CHECK-LABEL: f2: -+; CHECK: vlpg %v24, %v24 -+; CHECK: br %r14 -+ %cmp = icmp sle <2 x i64> %val, zeroinitializer -+ %neg = sub <2 x i64> zeroinitializer, %val -+ %ret = select <2 x i1> %cmp, <2 x i64> %neg, <2 x i64> %val -+ ret <2 x i64> %ret -+} -+ -+; Test with sgt. -+define <2 x i64> @f3(<2 x i64> %val) { -+; CHECK-LABEL: f3: -+; CHECK: vlpg %v24, %v24 -+; CHECK: br %r14 -+ %cmp = icmp sgt <2 x i64> %val, zeroinitializer -+ %neg = sub <2 x i64> zeroinitializer, %val -+ %ret = select <2 x i1> %cmp, <2 x i64> %val, <2 x i64> %neg -+ ret <2 x i64> %ret -+} -+ -+; Test with sge. -+define <2 x i64> @f4(<2 x i64> %val) { -+; CHECK-LABEL: f4: -+; CHECK: vlpg %v24, %v24 -+; CHECK: br %r14 -+ %cmp = icmp sge <2 x i64> %val, zeroinitializer -+ %neg = sub <2 x i64> zeroinitializer, %val -+ %ret = select <2 x i1> %cmp, <2 x i64> %val, <2 x i64> %neg -+ ret <2 x i64> %ret -+} -+ -+; Test that negative absolute uses VLPG too. There is no vector equivalent -+; of LOAD NEGATIVE. -+define <2 x i64> @f5(<2 x i64> %val) { -+; CHECK-LABEL: f5: -+; CHECK: vlpg [[REG:%v[0-9]+]], %v24 -+; CHECK: vlcg %v24, [[REG]] -+; CHECK: br %r14 -+ %cmp = icmp slt <2 x i64> %val, zeroinitializer -+ %neg = sub <2 x i64> zeroinitializer, %val -+ %abs = select <2 x i1> %cmp, <2 x i64> %neg, <2 x i64> %val -+ %ret = sub <2 x i64> zeroinitializer, %abs -+ ret <2 x i64> %ret -+} -+ -+; Try another form of negative absolute (slt version). -+define <2 x i64> @f6(<2 x i64> %val) { -+; CHECK-LABEL: f6: -+; CHECK: vlpg [[REG:%v[0-9]+]], %v24 -+; CHECK: vlcg %v24, [[REG]] -+; CHECK: br %r14 -+ %cmp = icmp slt <2 x i64> %val, zeroinitializer -+ %neg = sub <2 x i64> zeroinitializer, %val -+ %ret = select <2 x i1> %cmp, <2 x i64> %val, <2 x i64> %neg -+ ret <2 x i64> %ret -+} -+ -+; Test with sle. -+define <2 x i64> @f7(<2 x i64> %val) { -+; CHECK-LABEL: f7: -+; CHECK: vlpg [[REG:%v[0-9]+]], %v24 -+; CHECK: vlcg %v24, [[REG]] -+; CHECK: br %r14 -+ %cmp = icmp sle <2 x i64> %val, zeroinitializer -+ %neg = sub <2 x i64> zeroinitializer, %val -+ %ret = select <2 x i1> %cmp, <2 x i64> %val, <2 x i64> %neg -+ ret <2 x i64> %ret -+} -+ -+; Test with sgt. -+define <2 x i64> @f8(<2 x i64> %val) { -+; CHECK-LABEL: f8: -+; CHECK: vlpg [[REG:%v[0-9]+]], %v24 -+; CHECK: vlcg %v24, [[REG]] -+; CHECK: br %r14 -+ %cmp = icmp sgt <2 x i64> %val, zeroinitializer -+ %neg = sub <2 x i64> zeroinitializer, %val -+ %ret = select <2 x i1> %cmp, <2 x i64> %neg, <2 x i64> %val -+ ret <2 x i64> %ret -+} -+ -+; Test with sge. -+define <2 x i64> @f9(<2 x i64> %val) { -+; CHECK-LABEL: f9: -+; CHECK: vlpg [[REG:%v[0-9]+]], %v24 -+; CHECK: vlcg %v24, [[REG]] -+; CHECK: br %r14 -+ %cmp = icmp sge <2 x i64> %val, zeroinitializer -+ %neg = sub <2 x i64> zeroinitializer, %val -+ %ret = select <2 x i1> %cmp, <2 x i64> %neg, <2 x i64> %val -+ ret <2 x i64> %ret -+} -+ -+; Test with an SRA-based boolean vector. -+define <2 x i64> @f10(<2 x i64> %val) { -+; CHECK-LABEL: f10: -+; CHECK: vlpg %v24, %v24 -+; CHECK: br %r14 -+ %shr = ashr <2 x i64> %val, -+ %neg = sub <2 x i64> zeroinitializer, %val -+ %and1 = and <2 x i64> %shr, %neg -+ %not = xor <2 x i64> %shr, -+ %and2 = and <2 x i64> %not, %val -+ %ret = or <2 x i64> %and1, %and2 -+ ret <2 x i64> %ret -+} -+ -+; ...and again in reverse -+define <2 x i64> @f11(<2 x i64> %val) { -+; CHECK-LABEL: f11: -+; CHECK: vlpg [[REG:%v[0-9]+]], %v24 -+; CHECK: vlcg %v24, [[REG]] -+; CHECK: br %r14 -+ %shr = ashr <2 x i64> %val, -+ %and1 = and <2 x i64> %shr, %val -+ %not = xor <2 x i64> %shr, -+ %neg = sub <2 x i64> zeroinitializer, %val -+ %and2 = and <2 x i64> %not, %neg -+ %ret = or <2 x i64> %and1, %and2 -+ ret <2 x i64> %ret -+} -Index: llvm-36/test/CodeGen/SystemZ/vec-abs-05.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-abs-05.ll -@@ -0,0 +1,46 @@ -+; Test f64 and v2f64 absolute. -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -+ -+declare double @llvm.fabs.f64(double) -+declare <2 x double> @llvm.fabs.v2f64(<2 x double>) -+ -+; Test a plain absolute. -+define <2 x double> @f1(<2 x double> %val) { -+; CHECK-LABEL: f1: -+; CHECK: vflpdb %v24, %v24 -+; CHECK: br %r14 -+ %ret = call <2 x double> @llvm.fabs.v2f64(<2 x double> %val) -+ ret <2 x double> %ret -+} -+ -+; Test a negative absolute. -+define <2 x double> @f2(<2 x double> %val) { -+; CHECK-LABEL: f2: -+; CHECK: vflndb %v24, %v24 -+; CHECK: br %r14 -+ %abs = call <2 x double> @llvm.fabs.v2f64(<2 x double> %val) -+ %ret = fsub <2 x double> , %abs -+ ret <2 x double> %ret -+} -+ -+; Test an f64 absolute that uses vector registers. -+define double @f3(<2 x double> %val) { -+; CHECK-LABEL: f3: -+; CHECK: wflpdb %f0, %v24 -+; CHECK: br %r14 -+ %scalar = extractelement <2 x double> %val, i32 0 -+ %ret = call double @llvm.fabs.f64(double %scalar) -+ ret double %ret -+} -+ -+; Test an f64 negative absolute that uses vector registers. -+define double @f4(<2 x double> %val) { -+; CHECK-LABEL: f4: -+; CHECK: wflndb %f0, %v24 -+; CHECK: br %r14 -+ %scalar = extractelement <2 x double> %val, i32 0 -+ %abs = call double @llvm.fabs.f64(double %scalar) -+ %ret = fsub double -0.0, %abs -+ ret double %ret -+} -Index: llvm-36/test/CodeGen/SystemZ/vec-add-01.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-add-01.ll -@@ -0,0 +1,60 @@ -+; Test vector addition. -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -+ -+; Test a v16i8 addition. -+define <16 x i8> @f1(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) { -+; CHECK-LABEL: f1: -+; CHECK: vab %v24, %v26, %v28 -+; CHECK: br %r14 -+ %ret = add <16 x i8> %val1, %val2 -+ ret <16 x i8> %ret -+} -+ -+; Test a v8i16 addition. -+define <8 x i16> @f2(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) { -+; CHECK-LABEL: f2: -+; CHECK: vah %v24, %v26, %v28 -+; CHECK: br %r14 -+ %ret = add <8 x i16> %val1, %val2 -+ ret <8 x i16> %ret -+} -+ -+; Test a v4i32 addition. -+define <4 x i32> @f3(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) { -+; CHECK-LABEL: f3: -+; CHECK: vaf %v24, %v26, %v28 -+; CHECK: br %r14 -+ %ret = add <4 x i32> %val1, %val2 -+ ret <4 x i32> %ret -+} -+ -+; Test a v2i64 addition. -+define <2 x i64> @f4(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) { -+; CHECK-LABEL: f4: -+; CHECK: vag %v24, %v26, %v28 -+; CHECK: br %r14 -+ %ret = add <2 x i64> %val1, %val2 -+ ret <2 x i64> %ret -+} -+ -+; Test a v2f64 addition. -+define <2 x double> @f5(<2 x double> %dummy, <2 x double> %val1, -+ <2 x double> %val2) { -+; CHECK-LABEL: f5: -+; CHECK: vfadb %v24, %v26, %v28 -+; CHECK: br %r14 -+ %ret = fadd <2 x double> %val1, %val2 -+ ret <2 x double> %ret -+} -+ -+; Test an f64 addition that uses vector registers. -+define double @f6(<2 x double> %val1, <2 x double> %val2) { -+; CHECK-LABEL: f6: -+; CHECK: wfadb %f0, %v24, %v26 -+; CHECK: br %r14 -+ %scalar1 = extractelement <2 x double> %val1, i32 0 -+ %scalar2 = extractelement <2 x double> %val2, i32 0 -+ %ret = fadd double %scalar1, %scalar2 -+ ret double %ret -+} -Index: llvm-36/test/CodeGen/SystemZ/vec-and-01.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-and-01.ll -@@ -0,0 +1,39 @@ -+; Test vector AND. -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -+ -+; Test a v16i8 AND. -+define <16 x i8> @f1(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) { -+; CHECK-LABEL: f1: -+; CHECK: vn %v24, %v26, %v28 -+; CHECK: br %r14 -+ %ret = and <16 x i8> %val1, %val2 -+ ret <16 x i8> %ret -+} -+ -+; Test a v8i16 AND. -+define <8 x i16> @f2(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) { -+; CHECK-LABEL: f2: -+; CHECK: vn %v24, %v26, %v28 -+; CHECK: br %r14 -+ %ret = and <8 x i16> %val1, %val2 -+ ret <8 x i16> %ret -+} -+ -+; Test a v4i32 AND. -+define <4 x i32> @f3(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) { -+; CHECK-LABEL: f3: -+; CHECK: vn %v24, %v26, %v28 -+; CHECK: br %r14 -+ %ret = and <4 x i32> %val1, %val2 -+ ret <4 x i32> %ret -+} -+ -+; Test a v2i64 AND. -+define <2 x i64> @f4(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) { -+; CHECK-LABEL: f4: -+; CHECK: vn %v24, %v26, %v28 -+; CHECK: br %r14 -+ %ret = and <2 x i64> %val1, %val2 -+ ret <2 x i64> %ret -+} -Index: llvm-36/test/CodeGen/SystemZ/vec-and-02.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-and-02.ll -@@ -0,0 +1,91 @@ -+; Test vector AND-NOT. -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -+ -+; Test a v16i8 AND-NOT. -+define <16 x i8> @f1(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) { -+; CHECK-LABEL: f1: -+; CHECK: vnc %v24, %v26, %v28 -+; CHECK: br %r14 -+ %not = xor <16 x i8> %val2, -+ %ret = and <16 x i8> %val1, %not -+ ret <16 x i8> %ret -+} -+ -+; ...and again with the reverse. -+define <16 x i8> @f2(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) { -+; CHECK-LABEL: f2: -+; CHECK: vnc %v24, %v28, %v26 -+; CHECK: br %r14 -+ %not = xor <16 x i8> %val1, -+ %ret = and <16 x i8> %not, %val2 -+ ret <16 x i8> %ret -+} -+ -+; Test a v8i16 AND-NOT. -+define <8 x i16> @f3(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) { -+; CHECK-LABEL: f3: -+; CHECK: vnc %v24, %v26, %v28 -+; CHECK: br %r14 -+ %not = xor <8 x i16> %val2, -+ %ret = and <8 x i16> %val1, %not -+ ret <8 x i16> %ret -+} -+ -+; ...and again with the reverse. -+define <8 x i16> @f4(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) { -+; CHECK-LABEL: f4: -+; CHECK: vnc %v24, %v28, %v26 -+; CHECK: br %r14 -+ %not = xor <8 x i16> %val1, -+ %ret = and <8 x i16> %not, %val2 -+ ret <8 x i16> %ret -+} -+ -+; Test a v4i32 AND-NOT. -+define <4 x i32> @f5(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) { -+; CHECK-LABEL: f5: -+; CHECK: vnc %v24, %v26, %v28 -+; CHECK: br %r14 -+ %not = xor <4 x i32> %val2, -+ %ret = and <4 x i32> %val1, %not -+ ret <4 x i32> %ret -+} -+ -+; ...and again with the reverse. -+define <4 x i32> @f6(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) { -+; CHECK-LABEL: f6: -+; CHECK: vnc %v24, %v28, %v26 -+; CHECK: br %r14 -+ %not = xor <4 x i32> %val1, -+ %ret = and <4 x i32> %not, %val2 -+ ret <4 x i32> %ret -+} -+ -+; Test a v2i64 AND-NOT. -+define <2 x i64> @f7(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) { -+; CHECK-LABEL: f7: -+; CHECK: vnc %v24, %v26, %v28 -+; CHECK: br %r14 -+ %not = xor <2 x i64> %val2, -+ %ret = and <2 x i64> %val1, %not -+ ret <2 x i64> %ret -+} -+ -+; ...and again with the reverse. -+define <2 x i64> @f8(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) { -+; CHECK-LABEL: f8: -+; CHECK: vnc %v24, %v28, %v26 -+; CHECK: br %r14 -+ %not = xor <2 x i64> %val1, -+ %ret = and <2 x i64> %not, %val2 -+ ret <2 x i64> %ret -+} -Index: llvm-36/test/CodeGen/SystemZ/vec-and-03.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-and-03.ll -@@ -0,0 +1,113 @@ -+; Test vector zero extensions, which need to be implemented as ANDs. -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -+ -+; Test a v16i1->v16i8 extension. -+define <16 x i8> @f1(<16 x i8> %val) { -+; CHECK-LABEL: f1: -+; CHECK: vrepib [[REG:%v[0-9]+]], 1 -+; CHECK: vn %v24, %v24, [[REG]] -+; CHECK: br %r14 -+ %trunc = trunc <16 x i8> %val to <16 x i1> -+ %ret = zext <16 x i1> %trunc to <16 x i8> -+ ret <16 x i8> %ret -+} -+ -+; Test a v8i1->v8i16 extension. -+define <8 x i16> @f2(<8 x i16> %val) { -+; CHECK-LABEL: f2: -+; CHECK: vrepih [[REG:%v[0-9]+]], 1 -+; CHECK: vn %v24, %v24, [[REG]] -+; CHECK: br %r14 -+ %trunc = trunc <8 x i16> %val to <8 x i1> -+ %ret = zext <8 x i1> %trunc to <8 x i16> -+ ret <8 x i16> %ret -+} -+ -+; Test a v8i8->v8i16 extension. -+define <8 x i16> @f3(<8 x i16> %val) { -+; CHECK-LABEL: f3: -+; CHECK: vgbm [[REG:%v[0-9]+]], 21845 -+; CHECK: vn %v24, %v24, [[REG]] -+; CHECK: br %r14 -+ %trunc = trunc <8 x i16> %val to <8 x i8> -+ %ret = zext <8 x i8> %trunc to <8 x i16> -+ ret <8 x i16> %ret -+} -+ -+; Test a v4i1->v4i32 extension. -+define <4 x i32> @f4(<4 x i32> %val) { -+; CHECK-LABEL: f4: -+; CHECK: vrepif [[REG:%v[0-9]+]], 1 -+; CHECK: vn %v24, %v24, [[REG]] -+; CHECK: br %r14 -+ %trunc = trunc <4 x i32> %val to <4 x i1> -+ %ret = zext <4 x i1> %trunc to <4 x i32> -+ ret <4 x i32> %ret -+} -+ -+; Test a v4i8->v4i32 extension. -+define <4 x i32> @f5(<4 x i32> %val) { -+; CHECK-LABEL: f5: -+; CHECK: vgbm [[REG:%v[0-9]+]], 4369 -+; CHECK: vn %v24, %v24, [[REG]] -+; CHECK: br %r14 -+ %trunc = trunc <4 x i32> %val to <4 x i8> -+ %ret = zext <4 x i8> %trunc to <4 x i32> -+ ret <4 x i32> %ret -+} -+ -+; Test a v4i16->v4i32 extension. -+define <4 x i32> @f6(<4 x i32> %val) { -+; CHECK-LABEL: f6: -+; CHECK: vgbm [[REG:%v[0-9]+]], 13107 -+; CHECK: vn %v24, %v24, [[REG]] -+; CHECK: br %r14 -+ %trunc = trunc <4 x i32> %val to <4 x i16> -+ %ret = zext <4 x i16> %trunc to <4 x i32> -+ ret <4 x i32> %ret -+} -+ -+; Test a v2i1->v2i64 extension. -+define <2 x i64> @f7(<2 x i64> %val) { -+; CHECK-LABEL: f7: -+; CHECK: vrepig [[REG:%v[0-9]+]], 1 -+; CHECK: vn %v24, %v24, [[REG]] -+; CHECK: br %r14 -+ %trunc = trunc <2 x i64> %val to <2 x i1> -+ %ret = zext <2 x i1> %trunc to <2 x i64> -+ ret <2 x i64> %ret -+} -+ -+; Test a v2i8->v2i64 extension. -+define <2 x i64> @f8(<2 x i64> %val) { -+; CHECK-LABEL: f8: -+; CHECK: vgbm [[REG:%v[0-9]+]], 257 -+; CHECK: vn %v24, %v24, [[REG]] -+; CHECK: br %r14 -+ %trunc = trunc <2 x i64> %val to <2 x i8> -+ %ret = zext <2 x i8> %trunc to <2 x i64> -+ ret <2 x i64> %ret -+} -+ -+; Test a v2i16->v2i64 extension. -+define <2 x i64> @f9(<2 x i64> %val) { -+; CHECK-LABEL: f9: -+; CHECK: vgbm [[REG:%v[0-9]+]], 771 -+; CHECK: vn %v24, %v24, [[REG]] -+; CHECK: br %r14 -+ %trunc = trunc <2 x i64> %val to <2 x i16> -+ %ret = zext <2 x i16> %trunc to <2 x i64> -+ ret <2 x i64> %ret -+} -+ -+; Test a v2i32->v2i64 extension. -+define <2 x i64> @f10(<2 x i64> %val) { -+; CHECK-LABEL: f10: -+; CHECK: vgbm [[REG:%v[0-9]+]], 3855 -+; CHECK: vn %v24, %v24, [[REG]] -+; CHECK: br %r14 -+ %trunc = trunc <2 x i64> %val to <2 x i32> -+ %ret = zext <2 x i32> %trunc to <2 x i64> -+ ret <2 x i64> %ret -+} -Index: llvm-36/test/CodeGen/SystemZ/vec-args-01.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-args-01.ll -@@ -0,0 +1,48 @@ -+; Test the handling of named vector arguments. -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -check-prefix=CHECK-VEC -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -check-prefix=CHECK-STACK -+ -+; This routine has 6 integer arguments, which fill up r2-r5 and -+; the stack slot at offset 160, and 10 vector arguments, which -+; fill up v24-v31 and the two double-wide stack slots at 168 -+; and 184. -+declare void @bar(i64, i64, i64, i64, i64, i64, -+ <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, -+ <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, -+ <4 x i32>, <4 x i32>) -+ -+define void @foo() { -+; CHECK-VEC-LABEL: foo: -+; CHECK-VEC-DAG: vrepif %v24, 1 -+; CHECK-VEC-DAG: vrepif %v26, 2 -+; CHECK-VEC-DAG: vrepif %v28, 3 -+; CHECK-VEC-DAG: vrepif %v30, 4 -+; CHECK-VEC-DAG: vrepif %v25, 5 -+; CHECK-VEC-DAG: vrepif %v27, 6 -+; CHECK-VEC-DAG: vrepif %v29, 7 -+; CHECK-VEC-DAG: vrepif %v31, 8 -+; CHECK-VEC: brasl %r14, bar@PLT -+; -+; CHECK-STACK-LABEL: foo: -+; CHECK-STACK: aghi %r15, -200 -+; CHECK-STACK-DAG: mvghi 160(%r15), 6 -+; CHECK-STACK-DAG: vrepif [[REG1:%v[0-9]+]], 9 -+; CHECK-STACK-DAG: vst [[REG1]], 168(%r15) -+; CHECK-STACK-DAG: vrepif [[REG2:%v[0-9]+]], 10 -+; CHECK-STACK-DAG: vst [[REG2]], 184(%r15) -+; CHECK-STACK: brasl %r14, bar@PLT -+ -+ call void @bar (i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, -+ <4 x i32> , -+ <4 x i32> , -+ <4 x i32> , -+ <4 x i32> , -+ <4 x i32> , -+ <4 x i32> , -+ <4 x i32> , -+ <4 x i32> , -+ <4 x i32> , -+ <4 x i32> ) -+ ret void -+} -Index: llvm-36/test/CodeGen/SystemZ/vec-args-02.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-args-02.ll -@@ -0,0 +1,31 @@ -+; Test the handling of unnamed vector arguments. -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -check-prefix=CHECK-VEC -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -check-prefix=CHECK-STACK -+ -+; This routine is called with two named vector argument (passed -+; in %v24 and %v26) and two unnamed vector arguments (passed -+; in the double-wide stack slots at 160 and 176). -+declare void @bar(<4 x i32>, <4 x i32>, ...) -+ -+define void @foo() { -+; CHECK-VEC-LABEL: foo: -+; CHECK-VEC-DAG: vrepif %v24, 1 -+; CHECK-VEC-DAG: vrepif %v26, 2 -+; CHECK-VEC: brasl %r14, bar@PLT -+; -+; CHECK-STACK-LABEL: foo: -+; CHECK-STACK: aghi %r15, -192 -+; CHECK-STACK-DAG: vrepif [[REG1:%v[0-9]+]], 3 -+; CHECK-STACK-DAG: vst [[REG1]], 160(%r15) -+; CHECK-STACK-DAG: vrepif [[REG2:%v[0-9]+]], 4 -+; CHECK-STACK-DAG: vst [[REG2]], 176(%r15) -+; CHECK-STACK: brasl %r14, bar@PLT -+ -+ call void (<4 x i32>, <4 x i32>, ...)* @bar -+ (<4 x i32> , -+ <4 x i32> , -+ <4 x i32> , -+ <4 x i32> ) -+ ret void -+} -Index: llvm-36/test/CodeGen/SystemZ/vec-args-03.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-args-03.ll -@@ -0,0 +1,30 @@ -+; Test the handling of incoming vector arguments. -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -+ -+; This routine has 10 vector arguments, which fill up %v24-%v31 and -+; the two double-wide stack slots at 160 and 176. -+define <4 x i32> @foo(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3, <4 x i32> %v4, -+ <4 x i32> %v5, <4 x i32> %v6, <4 x i32> %v7, <4 x i32> %v8, -+ <4 x i32> %v9, <4 x i32> %v10) { -+; CHECK-LABEL: foo: -+; CHECK: vl [[REG1:%v[0-9]+]], 176(%r15) -+; CHECK: vsf %v24, %v26, [[REG1]] -+; CHECK: br %r14 -+ %y = sub <4 x i32> %v2, %v10 -+ ret <4 x i32> %y -+} -+ -+; This routine has 10 vector arguments, which fill up %v24-%v31 and -+; the two single-wide stack slots at 160 and 168. -+define <4 x i8> @bar(<4 x i8> %v1, <4 x i8> %v2, <4 x i8> %v3, <4 x i8> %v4, -+ <4 x i8> %v5, <4 x i8> %v6, <4 x i8> %v7, <4 x i8> %v8, -+ <4 x i8> %v9, <4 x i8> %v10) { -+; CHECK-LABEL: bar: -+; CHECK: vlrepg [[REG1:%v[0-9]+]], 168(%r15) -+; CHECK: vsb %v24, %v26, [[REG1]] -+; CHECK: br %r14 -+ %y = sub <4 x i8> %v2, %v10 -+ ret <4 x i8> %y -+} -+ -Index: llvm-36/test/CodeGen/SystemZ/vec-args-04.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-args-04.ll -@@ -0,0 +1,50 @@ -+; Test the handling of named short vector arguments. -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -check-prefix=CHECK-VEC -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -check-prefix=CHECK-STACK -+ -+; This routine has 12 vector arguments, which fill up %v24-%v31 -+; and the four single-wide stack slots starting at 160. -+declare void @bar(<1 x i8>, <2 x i8>, <4 x i8>, <8 x i8>, -+ <1 x i8>, <2 x i8>, <4 x i8>, <8 x i8>, -+ <1 x i8>, <2 x i8>, <4 x i8>, <8 x i8>) -+ -+define void @foo() { -+; CHECK-VEC-LABEL: foo: -+; CHECK-VEC-DAG: vrepib %v24, 1 -+; CHECK-VEC-DAG: vrepib %v26, 2 -+; CHECK-VEC-DAG: vrepib %v28, 3 -+; CHECK-VEC-DAG: vrepib %v30, 4 -+; CHECK-VEC-DAG: vrepib %v25, 5 -+; CHECK-VEC-DAG: vrepib %v27, 6 -+; CHECK-VEC-DAG: vrepib %v29, 7 -+; CHECK-VEC-DAG: vrepib %v31, 8 -+; CHECK-VEC: brasl %r14, bar@PLT -+; -+; CHECK-STACK-LABEL: foo: -+; CHECK-STACK: aghi %r15, -192 -+; CHECK-STACK-DAG: llihh [[REG1:%r[0-9]+]], 2304 -+; CHECK-STACK-DAG: stg [[REG1]], 160(%r15) -+; CHECK-STACK-DAG: llihh [[REG2:%r[0-9]+]], 2570 -+; CHECK-STACK-DAG: stg [[REG2]], 168(%r15) -+; CHECK-STACK-DAG: llihf [[REG3:%r[0-9]+]], 185273099 -+; CHECK-STACK-DAG: stg [[REG3]], 176(%r15) -+; CHECK-STACK-DAG: llihf [[REG4:%r[0-9]+]], 202116108 -+; CHECK-STACK-DAG: oilf [[REG4]], 202116108 -+; CHECK-STACK-DAG: stg [[REG4]], 176(%r15) -+; CHECK-STACK: brasl %r14, bar@PLT -+ -+ call void @bar (<1 x i8> , -+ <2 x i8> , -+ <4 x i8> , -+ <8 x i8> , -+ <1 x i8> , -+ <2 x i8> , -+ <4 x i8> , -+ <8 x i8> , -+ <1 x i8> , -+ <2 x i8> , -+ <4 x i8> , -+ <8 x i8> ) -+ ret void -+} -Index: llvm-36/test/CodeGen/SystemZ/vec-args-05.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-args-05.ll -@@ -0,0 +1,32 @@ -+; Test the handling of unnamed short vector arguments. -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -check-prefix=CHECK-VEC -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -check-prefix=CHECK-STACK -+ -+; This routine is called with two named vector argument (passed -+; in %v24 and %v26) and two unnamed vector arguments (passed -+; in the single-wide stack slots at 160 and 168). -+declare void @bar(<4 x i8>, <4 x i8>, ...) -+ -+define void @foo() { -+; CHECK-VEC-LABEL: foo: -+; CHECK-VEC-DAG: vrepib %v24, 1 -+; CHECK-VEC-DAG: vrepib %v26, 2 -+; CHECK-VEC: brasl %r14, bar@PLT -+; -+; CHECK-STACK-LABEL: foo: -+; CHECK-STACK: aghi %r15, -176 -+; CHECK-STACK-DAG: llihf [[REG1:%r[0-9]+]], 50529027 -+; CHECK-STACK-DAG: stg [[REG1]], 160(%r15) -+; CHECK-STACK-DAG: llihf [[REG2:%r[0-9]+]], 67372036 -+; CHECK-STACK-DAG: stg [[REG2]], 168(%r15) -+; CHECK-STACK: brasl %r14, bar@PLT -+ -+ call void (<4 x i8>, <4 x i8>, ...)* @bar -+ (<4 x i8> , -+ <4 x i8> , -+ <4 x i8> , -+ <4 x i8> ) -+ ret void -+} -+ -Index: llvm-36/test/CodeGen/SystemZ/vec-args-error-01.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-args-error-01.ll -@@ -0,0 +1,9 @@ -+; Verify that we detect unsupported single-element vector types. -+ -+; RUN: not llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 2>&1 | FileCheck %s -+ -+define void @foo(<1 x i128>) { -+ ret void -+} -+ -+; CHECK: LLVM ERROR: Unsupported vector argument or return type -Index: llvm-36/test/CodeGen/SystemZ/vec-args-error-02.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-args-error-02.ll -@@ -0,0 +1,9 @@ -+; Verify that we detect unsupported single-element vector types. -+ -+; RUN: not llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 2>&1 | FileCheck %s -+ -+define <1 x i128> @foo() { -+ ret <1 x i128> -+} -+ -+; CHECK: LLVM ERROR: Unsupported vector argument or return type -Index: llvm-36/test/CodeGen/SystemZ/vec-args-error-03.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-args-error-03.ll -@@ -0,0 +1,12 @@ -+; Verify that we detect unsupported single-element vector types. -+ -+; RUN: not llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 2>&1 | FileCheck %s -+ -+declare void @bar(<1 x i128>) -+ -+define void @foo() { -+ call void @bar (<1 x i128> ) -+ ret void -+} -+ -+; CHECK: LLVM ERROR: Unsupported vector argument or return type -Index: llvm-36/test/CodeGen/SystemZ/vec-args-error-04.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-args-error-04.ll -@@ -0,0 +1,12 @@ -+; Verify that we detect unsupported single-element vector types. -+ -+; RUN: not llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 2>&1 | FileCheck %s -+ -+declare <1 x i128> @bar() -+ -+define void @foo() { -+ %res = call <1 x i128> @bar () -+ ret void -+} -+ -+; CHECK: LLVM ERROR: Unsupported vector argument or return type -Index: llvm-36/test/CodeGen/SystemZ/vec-args-error-05.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-args-error-05.ll -@@ -0,0 +1,9 @@ -+; Verify that we detect unsupported single-element vector types. -+ -+; RUN: not llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 2>&1 | FileCheck %s -+ -+define void @foo(<1 x fp128>) { -+ ret void -+} -+ -+; CHECK: LLVM ERROR: Unsupported vector argument or return type -Index: llvm-36/test/CodeGen/SystemZ/vec-args-error-06.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-args-error-06.ll -@@ -0,0 +1,9 @@ -+; Verify that we detect unsupported single-element vector types. -+ -+; RUN: not llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 2>&1 | FileCheck %s -+ -+define <1 x fp128> @foo() { -+ ret <1 x fp128> -+} -+ -+; CHECK: LLVM ERROR: Unsupported vector argument or return type -Index: llvm-36/test/CodeGen/SystemZ/vec-args-error-07.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-args-error-07.ll -@@ -0,0 +1,12 @@ -+; Verify that we detect unsupported single-element vector types. -+ -+; RUN: not llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 2>&1 | FileCheck %s -+ -+declare void @bar(<1 x fp128>) -+ -+define void @foo() { -+ call void @bar (<1 x fp128> ) -+ ret void -+} -+ -+; CHECK: LLVM ERROR: Unsupported vector argument or return type -Index: llvm-36/test/CodeGen/SystemZ/vec-args-error-08.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-args-error-08.ll -@@ -0,0 +1,12 @@ -+; Verify that we detect unsupported single-element vector types. -+ -+; RUN: not llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 2>&1 | FileCheck %s -+ -+declare <1 x fp128> @bar() -+ -+define void @foo() { -+ %res = call <1 x fp128> @bar () -+ ret void -+} -+ -+; CHECK: LLVM ERROR: Unsupported vector argument or return type -Index: llvm-36/test/CodeGen/SystemZ/vec-cmp-01.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-cmp-01.ll -@@ -0,0 +1,228 @@ -+; Test v16i8 comparisons. -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -+ -+; Test eq. -+define <16 x i8> @f1(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) { -+; CHECK-LABEL: f1: -+; CHECK: vceqb %v24, %v26, %v28 -+; CHECK-NEXT: br %r14 -+ %cmp = icmp eq <16 x i8> %val1, %val2 -+ %ret = sext <16 x i1> %cmp to <16 x i8> -+ ret <16 x i8> %ret -+} -+ -+; Test ne. -+define <16 x i8> @f2(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) { -+; CHECK-LABEL: f2: -+; CHECK: vceqb [[REG:%v[0-9]+]], %v26, %v28 -+; CHECK-NEXT: vno %v24, [[REG]], [[REG]] -+; CHECK-NEXT: br %r14 -+ %cmp = icmp ne <16 x i8> %val1, %val2 -+ %ret = sext <16 x i1> %cmp to <16 x i8> -+ ret <16 x i8> %ret -+} -+ -+; Test sgt. -+define <16 x i8> @f3(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) { -+; CHECK-LABEL: f3: -+; CHECK: vchb %v24, %v26, %v28 -+; CHECK-NEXT: br %r14 -+ %cmp = icmp sgt <16 x i8> %val1, %val2 -+ %ret = sext <16 x i1> %cmp to <16 x i8> -+ ret <16 x i8> %ret -+} -+ -+; Test sge. -+define <16 x i8> @f4(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) { -+; CHECK-LABEL: f4: -+; CHECK: vchb [[REG:%v[0-9]+]], %v28, %v26 -+; CHECK-NEXT: vno %v24, [[REG]], [[REG]] -+; CHECK-NEXT: br %r14 -+ %cmp = icmp sge <16 x i8> %val1, %val2 -+ %ret = sext <16 x i1> %cmp to <16 x i8> -+ ret <16 x i8> %ret -+} -+ -+; Test sle. -+define <16 x i8> @f5(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) { -+; CHECK-LABEL: f5: -+; CHECK: vchb [[REG:%v[0-9]+]], %v26, %v28 -+; CHECK-NEXT: vno %v24, [[REG]], [[REG]] -+; CHECK-NEXT: br %r14 -+ %cmp = icmp sle <16 x i8> %val1, %val2 -+ %ret = sext <16 x i1> %cmp to <16 x i8> -+ ret <16 x i8> %ret -+} -+ -+; Test slt. -+define <16 x i8> @f6(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) { -+; CHECK-LABEL: f6: -+; CHECK: vchb %v24, %v28, %v26 -+; CHECK-NEXT: br %r14 -+ %cmp = icmp slt <16 x i8> %val1, %val2 -+ %ret = sext <16 x i1> %cmp to <16 x i8> -+ ret <16 x i8> %ret -+} -+ -+; Test ugt. -+define <16 x i8> @f7(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) { -+; CHECK-LABEL: f7: -+; CHECK: vchlb %v24, %v26, %v28 -+; CHECK-NEXT: br %r14 -+ %cmp = icmp ugt <16 x i8> %val1, %val2 -+ %ret = sext <16 x i1> %cmp to <16 x i8> -+ ret <16 x i8> %ret -+} -+ -+; Test uge. -+define <16 x i8> @f8(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) { -+; CHECK-LABEL: f8: -+; CHECK: vchlb [[REG:%v[0-9]+]], %v28, %v26 -+; CHECK-NEXT: vno %v24, [[REG]], [[REG]] -+; CHECK-NEXT: br %r14 -+ %cmp = icmp uge <16 x i8> %val1, %val2 -+ %ret = sext <16 x i1> %cmp to <16 x i8> -+ ret <16 x i8> %ret -+} -+ -+; Test ule. -+define <16 x i8> @f9(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) { -+; CHECK-LABEL: f9: -+; CHECK: vchlb [[REG:%v[0-9]+]], %v26, %v28 -+; CHECK-NEXT: vno %v24, [[REG]], [[REG]] -+; CHECK-NEXT: br %r14 -+ %cmp = icmp ule <16 x i8> %val1, %val2 -+ %ret = sext <16 x i1> %cmp to <16 x i8> -+ ret <16 x i8> %ret -+} -+ -+; Test ult. -+define <16 x i8> @f10(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) { -+; CHECK-LABEL: f10: -+; CHECK: vchlb %v24, %v28, %v26 -+; CHECK-NEXT: br %r14 -+ %cmp = icmp ult <16 x i8> %val1, %val2 -+ %ret = sext <16 x i1> %cmp to <16 x i8> -+ ret <16 x i8> %ret -+} -+ -+; Test eq selects. -+define <16 x i8> @f11(<16 x i8> %val1, <16 x i8> %val2, -+ <16 x i8> %val3, <16 x i8> %val4) { -+; CHECK-LABEL: f11: -+; CHECK: vceqb [[REG:%v[0-9]+]], %v24, %v26 -+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] -+; CHECK-NEXT: br %r14 -+ %cmp = icmp eq <16 x i8> %val1, %val2 -+ %ret = select <16 x i1> %cmp, <16 x i8> %val3, <16 x i8> %val4 -+ ret <16 x i8> %ret -+} -+ -+; Test ne selects. -+define <16 x i8> @f12(<16 x i8> %val1, <16 x i8> %val2, -+ <16 x i8> %val3, <16 x i8> %val4) { -+; CHECK-LABEL: f12: -+; CHECK: vceqb [[REG:%v[0-9]+]], %v24, %v26 -+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] -+; CHECK-NEXT: br %r14 -+ %cmp = icmp ne <16 x i8> %val1, %val2 -+ %ret = select <16 x i1> %cmp, <16 x i8> %val3, <16 x i8> %val4 -+ ret <16 x i8> %ret -+} -+ -+; Test sgt selects. -+define <16 x i8> @f13(<16 x i8> %val1, <16 x i8> %val2, -+ <16 x i8> %val3, <16 x i8> %val4) { -+; CHECK-LABEL: f13: -+; CHECK: vchb [[REG:%v[0-9]+]], %v24, %v26 -+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] -+; CHECK-NEXT: br %r14 -+ %cmp = icmp sgt <16 x i8> %val1, %val2 -+ %ret = select <16 x i1> %cmp, <16 x i8> %val3, <16 x i8> %val4 -+ ret <16 x i8> %ret -+} -+ -+; Test sge selects. -+define <16 x i8> @f14(<16 x i8> %val1, <16 x i8> %val2, -+ <16 x i8> %val3, <16 x i8> %val4) { -+; CHECK-LABEL: f14: -+; CHECK: vchb [[REG:%v[0-9]+]], %v26, %v24 -+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] -+; CHECK-NEXT: br %r14 -+ %cmp = icmp sge <16 x i8> %val1, %val2 -+ %ret = select <16 x i1> %cmp, <16 x i8> %val3, <16 x i8> %val4 -+ ret <16 x i8> %ret -+} -+ -+; Test sle selects. -+define <16 x i8> @f15(<16 x i8> %val1, <16 x i8> %val2, -+ <16 x i8> %val3, <16 x i8> %val4) { -+; CHECK-LABEL: f15: -+; CHECK: vchb [[REG:%v[0-9]+]], %v24, %v26 -+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] -+; CHECK-NEXT: br %r14 -+ %cmp = icmp sle <16 x i8> %val1, %val2 -+ %ret = select <16 x i1> %cmp, <16 x i8> %val3, <16 x i8> %val4 -+ ret <16 x i8> %ret -+} -+ -+; Test slt selects. -+define <16 x i8> @f16(<16 x i8> %val1, <16 x i8> %val2, -+ <16 x i8> %val3, <16 x i8> %val4) { -+; CHECK-LABEL: f16: -+; CHECK: vchb [[REG:%v[0-9]+]], %v26, %v24 -+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] -+; CHECK-NEXT: br %r14 -+ %cmp = icmp slt <16 x i8> %val1, %val2 -+ %ret = select <16 x i1> %cmp, <16 x i8> %val3, <16 x i8> %val4 -+ ret <16 x i8> %ret -+} -+ -+; Test ugt selects. -+define <16 x i8> @f17(<16 x i8> %val1, <16 x i8> %val2, -+ <16 x i8> %val3, <16 x i8> %val4) { -+; CHECK-LABEL: f17: -+; CHECK: vchlb [[REG:%v[0-9]+]], %v24, %v26 -+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] -+; CHECK-NEXT: br %r14 -+ %cmp = icmp ugt <16 x i8> %val1, %val2 -+ %ret = select <16 x i1> %cmp, <16 x i8> %val3, <16 x i8> %val4 -+ ret <16 x i8> %ret -+} -+ -+; Test uge selects. -+define <16 x i8> @f18(<16 x i8> %val1, <16 x i8> %val2, -+ <16 x i8> %val3, <16 x i8> %val4) { -+; CHECK-LABEL: f18: -+; CHECK: vchlb [[REG:%v[0-9]+]], %v26, %v24 -+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] -+; CHECK-NEXT: br %r14 -+ %cmp = icmp uge <16 x i8> %val1, %val2 -+ %ret = select <16 x i1> %cmp, <16 x i8> %val3, <16 x i8> %val4 -+ ret <16 x i8> %ret -+} -+ -+; Test ule selects. -+define <16 x i8> @f19(<16 x i8> %val1, <16 x i8> %val2, -+ <16 x i8> %val3, <16 x i8> %val4) { -+; CHECK-LABEL: f19: -+; CHECK: vchlb [[REG:%v[0-9]+]], %v24, %v26 -+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] -+; CHECK-NEXT: br %r14 -+ %cmp = icmp ule <16 x i8> %val1, %val2 -+ %ret = select <16 x i1> %cmp, <16 x i8> %val3, <16 x i8> %val4 -+ ret <16 x i8> %ret -+} -+ -+; Test ult selects. -+define <16 x i8> @f20(<16 x i8> %val1, <16 x i8> %val2, -+ <16 x i8> %val3, <16 x i8> %val4) { -+; CHECK-LABEL: f20: -+; CHECK: vchlb [[REG:%v[0-9]+]], %v26, %v24 -+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] -+; CHECK-NEXT: br %r14 -+ %cmp = icmp ult <16 x i8> %val1, %val2 -+ %ret = select <16 x i1> %cmp, <16 x i8> %val3, <16 x i8> %val4 -+ ret <16 x i8> %ret -+} -Index: llvm-36/test/CodeGen/SystemZ/vec-cmp-02.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-cmp-02.ll -@@ -0,0 +1,228 @@ -+; Test v8i16 comparisons. -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -+ -+; Test eq. -+define <8 x i16> @f1(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) { -+; CHECK-LABEL: f1: -+; CHECK: vceqh %v24, %v26, %v28 -+; CHECK-NEXT: br %r14 -+ %cmp = icmp eq <8 x i16> %val1, %val2 -+ %ret = sext <8 x i1> %cmp to <8 x i16> -+ ret <8 x i16> %ret -+} -+ -+; Test ne. -+define <8 x i16> @f2(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) { -+; CHECK-LABEL: f2: -+; CHECK: vceqh [[REG:%v[0-9]+]], %v26, %v28 -+; CHECK-NEXT: vno %v24, [[REG]], [[REG]] -+; CHECK-NEXT: br %r14 -+ %cmp = icmp ne <8 x i16> %val1, %val2 -+ %ret = sext <8 x i1> %cmp to <8 x i16> -+ ret <8 x i16> %ret -+} -+ -+; Test sgt. -+define <8 x i16> @f3(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) { -+; CHECK-LABEL: f3: -+; CHECK: vchh %v24, %v26, %v28 -+; CHECK-NEXT: br %r14 -+ %cmp = icmp sgt <8 x i16> %val1, %val2 -+ %ret = sext <8 x i1> %cmp to <8 x i16> -+ ret <8 x i16> %ret -+} -+ -+; Test sge. -+define <8 x i16> @f4(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) { -+; CHECK-LABEL: f4: -+; CHECK: vchh [[REG:%v[0-9]+]], %v28, %v26 -+; CHECK-NEXT: vno %v24, [[REG]], [[REG]] -+; CHECK-NEXT: br %r14 -+ %cmp = icmp sge <8 x i16> %val1, %val2 -+ %ret = sext <8 x i1> %cmp to <8 x i16> -+ ret <8 x i16> %ret -+} -+ -+; Test sle. -+define <8 x i16> @f5(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) { -+; CHECK-LABEL: f5: -+; CHECK: vchh [[REG:%v[0-9]+]], %v26, %v28 -+; CHECK-NEXT: vno %v24, [[REG]], [[REG]] -+; CHECK-NEXT: br %r14 -+ %cmp = icmp sle <8 x i16> %val1, %val2 -+ %ret = sext <8 x i1> %cmp to <8 x i16> -+ ret <8 x i16> %ret -+} -+ -+; Test slt. -+define <8 x i16> @f6(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) { -+; CHECK-LABEL: f6: -+; CHECK: vchh %v24, %v28, %v26 -+; CHECK-NEXT: br %r14 -+ %cmp = icmp slt <8 x i16> %val1, %val2 -+ %ret = sext <8 x i1> %cmp to <8 x i16> -+ ret <8 x i16> %ret -+} -+ -+; Test ugt. -+define <8 x i16> @f7(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) { -+; CHECK-LABEL: f7: -+; CHECK: vchlh %v24, %v26, %v28 -+; CHECK-NEXT: br %r14 -+ %cmp = icmp ugt <8 x i16> %val1, %val2 -+ %ret = sext <8 x i1> %cmp to <8 x i16> -+ ret <8 x i16> %ret -+} -+ -+; Test uge. -+define <8 x i16> @f8(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) { -+; CHECK-LABEL: f8: -+; CHECK: vchlh [[REG:%v[0-9]+]], %v28, %v26 -+; CHECK-NEXT: vno %v24, [[REG]], [[REG]] -+; CHECK-NEXT: br %r14 -+ %cmp = icmp uge <8 x i16> %val1, %val2 -+ %ret = sext <8 x i1> %cmp to <8 x i16> -+ ret <8 x i16> %ret -+} -+ -+; Test ule. -+define <8 x i16> @f9(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) { -+; CHECK-LABEL: f9: -+; CHECK: vchlh [[REG:%v[0-9]+]], %v26, %v28 -+; CHECK-NEXT: vno %v24, [[REG]], [[REG]] -+; CHECK-NEXT: br %r14 -+ %cmp = icmp ule <8 x i16> %val1, %val2 -+ %ret = sext <8 x i1> %cmp to <8 x i16> -+ ret <8 x i16> %ret -+} -+ -+; Test ult. -+define <8 x i16> @f10(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) { -+; CHECK-LABEL: f10: -+; CHECK: vchlh %v24, %v28, %v26 -+; CHECK-NEXT: br %r14 -+ %cmp = icmp ult <8 x i16> %val1, %val2 -+ %ret = sext <8 x i1> %cmp to <8 x i16> -+ ret <8 x i16> %ret -+} -+ -+; Test eq selects. -+define <8 x i16> @f11(<8 x i16> %val1, <8 x i16> %val2, -+ <8 x i16> %val3, <8 x i16> %val4) { -+; CHECK-LABEL: f11: -+; CHECK: vceqh [[REG:%v[0-9]+]], %v24, %v26 -+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] -+; CHECK-NEXT: br %r14 -+ %cmp = icmp eq <8 x i16> %val1, %val2 -+ %ret = select <8 x i1> %cmp, <8 x i16> %val3, <8 x i16> %val4 -+ ret <8 x i16> %ret -+} -+ -+; Test ne selects. -+define <8 x i16> @f12(<8 x i16> %val1, <8 x i16> %val2, -+ <8 x i16> %val3, <8 x i16> %val4) { -+; CHECK-LABEL: f12: -+; CHECK: vceqh [[REG:%v[0-9]+]], %v24, %v26 -+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] -+; CHECK-NEXT: br %r14 -+ %cmp = icmp ne <8 x i16> %val1, %val2 -+ %ret = select <8 x i1> %cmp, <8 x i16> %val3, <8 x i16> %val4 -+ ret <8 x i16> %ret -+} -+ -+; Test sgt selects. -+define <8 x i16> @f13(<8 x i16> %val1, <8 x i16> %val2, -+ <8 x i16> %val3, <8 x i16> %val4) { -+; CHECK-LABEL: f13: -+; CHECK: vchh [[REG:%v[0-9]+]], %v24, %v26 -+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] -+; CHECK-NEXT: br %r14 -+ %cmp = icmp sgt <8 x i16> %val1, %val2 -+ %ret = select <8 x i1> %cmp, <8 x i16> %val3, <8 x i16> %val4 -+ ret <8 x i16> %ret -+} -+ -+; Test sge selects. -+define <8 x i16> @f14(<8 x i16> %val1, <8 x i16> %val2, -+ <8 x i16> %val3, <8 x i16> %val4) { -+; CHECK-LABEL: f14: -+; CHECK: vchh [[REG:%v[0-9]+]], %v26, %v24 -+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] -+; CHECK-NEXT: br %r14 -+ %cmp = icmp sge <8 x i16> %val1, %val2 -+ %ret = select <8 x i1> %cmp, <8 x i16> %val3, <8 x i16> %val4 -+ ret <8 x i16> %ret -+} -+ -+; Test sle selects. -+define <8 x i16> @f15(<8 x i16> %val1, <8 x i16> %val2, -+ <8 x i16> %val3, <8 x i16> %val4) { -+; CHECK-LABEL: f15: -+; CHECK: vchh [[REG:%v[0-9]+]], %v24, %v26 -+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] -+; CHECK-NEXT: br %r14 -+ %cmp = icmp sle <8 x i16> %val1, %val2 -+ %ret = select <8 x i1> %cmp, <8 x i16> %val3, <8 x i16> %val4 -+ ret <8 x i16> %ret -+} -+ -+; Test slt selects. -+define <8 x i16> @f16(<8 x i16> %val1, <8 x i16> %val2, -+ <8 x i16> %val3, <8 x i16> %val4) { -+; CHECK-LABEL: f16: -+; CHECK: vchh [[REG:%v[0-9]+]], %v26, %v24 -+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] -+; CHECK-NEXT: br %r14 -+ %cmp = icmp slt <8 x i16> %val1, %val2 -+ %ret = select <8 x i1> %cmp, <8 x i16> %val3, <8 x i16> %val4 -+ ret <8 x i16> %ret -+} -+ -+; Test ugt selects. -+define <8 x i16> @f17(<8 x i16> %val1, <8 x i16> %val2, -+ <8 x i16> %val3, <8 x i16> %val4) { -+; CHECK-LABEL: f17: -+; CHECK: vchlh [[REG:%v[0-9]+]], %v24, %v26 -+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] -+; CHECK-NEXT: br %r14 -+ %cmp = icmp ugt <8 x i16> %val1, %val2 -+ %ret = select <8 x i1> %cmp, <8 x i16> %val3, <8 x i16> %val4 -+ ret <8 x i16> %ret -+} -+ -+; Test uge selects. -+define <8 x i16> @f18(<8 x i16> %val1, <8 x i16> %val2, -+ <8 x i16> %val3, <8 x i16> %val4) { -+; CHECK-LABEL: f18: -+; CHECK: vchlh [[REG:%v[0-9]+]], %v26, %v24 -+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] -+; CHECK-NEXT: br %r14 -+ %cmp = icmp uge <8 x i16> %val1, %val2 -+ %ret = select <8 x i1> %cmp, <8 x i16> %val3, <8 x i16> %val4 -+ ret <8 x i16> %ret -+} -+ -+; Test ule selects. -+define <8 x i16> @f19(<8 x i16> %val1, <8 x i16> %val2, -+ <8 x i16> %val3, <8 x i16> %val4) { -+; CHECK-LABEL: f19: -+; CHECK: vchlh [[REG:%v[0-9]+]], %v24, %v26 -+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] -+; CHECK-NEXT: br %r14 -+ %cmp = icmp ule <8 x i16> %val1, %val2 -+ %ret = select <8 x i1> %cmp, <8 x i16> %val3, <8 x i16> %val4 -+ ret <8 x i16> %ret -+} -+ -+; Test ult selects. -+define <8 x i16> @f20(<8 x i16> %val1, <8 x i16> %val2, -+ <8 x i16> %val3, <8 x i16> %val4) { -+; CHECK-LABEL: f20: -+; CHECK: vchlh [[REG:%v[0-9]+]], %v26, %v24 -+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] -+; CHECK-NEXT: br %r14 -+ %cmp = icmp ult <8 x i16> %val1, %val2 -+ %ret = select <8 x i1> %cmp, <8 x i16> %val3, <8 x i16> %val4 -+ ret <8 x i16> %ret -+} -Index: llvm-36/test/CodeGen/SystemZ/vec-cmp-03.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-cmp-03.ll -@@ -0,0 +1,228 @@ -+; Test v4i32 comparisons. -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -+ -+; Test eq. -+define <4 x i32> @f1(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) { -+; CHECK-LABEL: f1: -+; CHECK: vceqf %v24, %v26, %v28 -+; CHECK-NEXT: br %r14 -+ %cmp = icmp eq <4 x i32> %val1, %val2 -+ %ret = sext <4 x i1> %cmp to <4 x i32> -+ ret <4 x i32> %ret -+} -+ -+; Test ne. -+define <4 x i32> @f2(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) { -+; CHECK-LABEL: f2: -+; CHECK: vceqf [[REG:%v[0-9]+]], %v26, %v28 -+; CHECK-NEXT: vno %v24, [[REG]], [[REG]] -+; CHECK-NEXT: br %r14 -+ %cmp = icmp ne <4 x i32> %val1, %val2 -+ %ret = sext <4 x i1> %cmp to <4 x i32> -+ ret <4 x i32> %ret -+} -+ -+; Test sgt. -+define <4 x i32> @f3(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) { -+; CHECK-LABEL: f3: -+; CHECK: vchf %v24, %v26, %v28 -+; CHECK-NEXT: br %r14 -+ %cmp = icmp sgt <4 x i32> %val1, %val2 -+ %ret = sext <4 x i1> %cmp to <4 x i32> -+ ret <4 x i32> %ret -+} -+ -+; Test sge. -+define <4 x i32> @f4(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) { -+; CHECK-LABEL: f4: -+; CHECK: vchf [[REG:%v[0-9]+]], %v28, %v26 -+; CHECK-NEXT: vno %v24, [[REG]], [[REG]] -+; CHECK-NEXT: br %r14 -+ %cmp = icmp sge <4 x i32> %val1, %val2 -+ %ret = sext <4 x i1> %cmp to <4 x i32> -+ ret <4 x i32> %ret -+} -+ -+; Test sle. -+define <4 x i32> @f5(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) { -+; CHECK-LABEL: f5: -+; CHECK: vchf [[REG:%v[0-9]+]], %v26, %v28 -+; CHECK-NEXT: vno %v24, [[REG]], [[REG]] -+; CHECK-NEXT: br %r14 -+ %cmp = icmp sle <4 x i32> %val1, %val2 -+ %ret = sext <4 x i1> %cmp to <4 x i32> -+ ret <4 x i32> %ret -+} -+ -+; Test slt. -+define <4 x i32> @f6(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) { -+; CHECK-LABEL: f6: -+; CHECK: vchf %v24, %v28, %v26 -+; CHECK-NEXT: br %r14 -+ %cmp = icmp slt <4 x i32> %val1, %val2 -+ %ret = sext <4 x i1> %cmp to <4 x i32> -+ ret <4 x i32> %ret -+} -+ -+; Test ugt. -+define <4 x i32> @f7(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) { -+; CHECK-LABEL: f7: -+; CHECK: vchlf %v24, %v26, %v28 -+; CHECK-NEXT: br %r14 -+ %cmp = icmp ugt <4 x i32> %val1, %val2 -+ %ret = sext <4 x i1> %cmp to <4 x i32> -+ ret <4 x i32> %ret -+} -+ -+; Test uge. -+define <4 x i32> @f8(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) { -+; CHECK-LABEL: f8: -+; CHECK: vchlf [[REG:%v[0-9]+]], %v28, %v26 -+; CHECK-NEXT: vno %v24, [[REG]], [[REG]] -+; CHECK-NEXT: br %r14 -+ %cmp = icmp uge <4 x i32> %val1, %val2 -+ %ret = sext <4 x i1> %cmp to <4 x i32> -+ ret <4 x i32> %ret -+} -+ -+; Test ule. -+define <4 x i32> @f9(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) { -+; CHECK-LABEL: f9: -+; CHECK: vchlf [[REG:%v[0-9]+]], %v26, %v28 -+; CHECK-NEXT: vno %v24, [[REG]], [[REG]] -+; CHECK-NEXT: br %r14 -+ %cmp = icmp ule <4 x i32> %val1, %val2 -+ %ret = sext <4 x i1> %cmp to <4 x i32> -+ ret <4 x i32> %ret -+} -+ -+; Test ult. -+define <4 x i32> @f10(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) { -+; CHECK-LABEL: f10: -+; CHECK: vchlf %v24, %v28, %v26 -+; CHECK-NEXT: br %r14 -+ %cmp = icmp ult <4 x i32> %val1, %val2 -+ %ret = sext <4 x i1> %cmp to <4 x i32> -+ ret <4 x i32> %ret -+} -+ -+; Test eq selects. -+define <4 x i32> @f11(<4 x i32> %val1, <4 x i32> %val2, -+ <4 x i32> %val3, <4 x i32> %val4) { -+; CHECK-LABEL: f11: -+; CHECK: vceqf [[REG:%v[0-9]+]], %v24, %v26 -+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] -+; CHECK-NEXT: br %r14 -+ %cmp = icmp eq <4 x i32> %val1, %val2 -+ %ret = select <4 x i1> %cmp, <4 x i32> %val3, <4 x i32> %val4 -+ ret <4 x i32> %ret -+} -+ -+; Test ne selects. -+define <4 x i32> @f12(<4 x i32> %val1, <4 x i32> %val2, -+ <4 x i32> %val3, <4 x i32> %val4) { -+; CHECK-LABEL: f12: -+; CHECK: vceqf [[REG:%v[0-9]+]], %v24, %v26 -+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] -+; CHECK-NEXT: br %r14 -+ %cmp = icmp ne <4 x i32> %val1, %val2 -+ %ret = select <4 x i1> %cmp, <4 x i32> %val3, <4 x i32> %val4 -+ ret <4 x i32> %ret -+} -+ -+; Test sgt selects. -+define <4 x i32> @f13(<4 x i32> %val1, <4 x i32> %val2, -+ <4 x i32> %val3, <4 x i32> %val4) { -+; CHECK-LABEL: f13: -+; CHECK: vchf [[REG:%v[0-9]+]], %v24, %v26 -+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] -+; CHECK-NEXT: br %r14 -+ %cmp = icmp sgt <4 x i32> %val1, %val2 -+ %ret = select <4 x i1> %cmp, <4 x i32> %val3, <4 x i32> %val4 -+ ret <4 x i32> %ret -+} -+ -+; Test sge selects. -+define <4 x i32> @f14(<4 x i32> %val1, <4 x i32> %val2, -+ <4 x i32> %val3, <4 x i32> %val4) { -+; CHECK-LABEL: f14: -+; CHECK: vchf [[REG:%v[0-9]+]], %v26, %v24 -+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] -+; CHECK-NEXT: br %r14 -+ %cmp = icmp sge <4 x i32> %val1, %val2 -+ %ret = select <4 x i1> %cmp, <4 x i32> %val3, <4 x i32> %val4 -+ ret <4 x i32> %ret -+} -+ -+; Test sle selects. -+define <4 x i32> @f15(<4 x i32> %val1, <4 x i32> %val2, -+ <4 x i32> %val3, <4 x i32> %val4) { -+; CHECK-LABEL: f15: -+; CHECK: vchf [[REG:%v[0-9]+]], %v24, %v26 -+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] -+; CHECK-NEXT: br %r14 -+ %cmp = icmp sle <4 x i32> %val1, %val2 -+ %ret = select <4 x i1> %cmp, <4 x i32> %val3, <4 x i32> %val4 -+ ret <4 x i32> %ret -+} -+ -+; Test slt selects. -+define <4 x i32> @f16(<4 x i32> %val1, <4 x i32> %val2, -+ <4 x i32> %val3, <4 x i32> %val4) { -+; CHECK-LABEL: f16: -+; CHECK: vchf [[REG:%v[0-9]+]], %v26, %v24 -+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] -+; CHECK-NEXT: br %r14 -+ %cmp = icmp slt <4 x i32> %val1, %val2 -+ %ret = select <4 x i1> %cmp, <4 x i32> %val3, <4 x i32> %val4 -+ ret <4 x i32> %ret -+} -+ -+; Test ugt selects. -+define <4 x i32> @f17(<4 x i32> %val1, <4 x i32> %val2, -+ <4 x i32> %val3, <4 x i32> %val4) { -+; CHECK-LABEL: f17: -+; CHECK: vchlf [[REG:%v[0-9]+]], %v24, %v26 -+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] -+; CHECK-NEXT: br %r14 -+ %cmp = icmp ugt <4 x i32> %val1, %val2 -+ %ret = select <4 x i1> %cmp, <4 x i32> %val3, <4 x i32> %val4 -+ ret <4 x i32> %ret -+} -+ -+; Test uge selects. -+define <4 x i32> @f18(<4 x i32> %val1, <4 x i32> %val2, -+ <4 x i32> %val3, <4 x i32> %val4) { -+; CHECK-LABEL: f18: -+; CHECK: vchlf [[REG:%v[0-9]+]], %v26, %v24 -+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] -+; CHECK-NEXT: br %r14 -+ %cmp = icmp uge <4 x i32> %val1, %val2 -+ %ret = select <4 x i1> %cmp, <4 x i32> %val3, <4 x i32> %val4 -+ ret <4 x i32> %ret -+} -+ -+; Test ule selects. -+define <4 x i32> @f19(<4 x i32> %val1, <4 x i32> %val2, -+ <4 x i32> %val3, <4 x i32> %val4) { -+; CHECK-LABEL: f19: -+; CHECK: vchlf [[REG:%v[0-9]+]], %v24, %v26 -+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] -+; CHECK-NEXT: br %r14 -+ %cmp = icmp ule <4 x i32> %val1, %val2 -+ %ret = select <4 x i1> %cmp, <4 x i32> %val3, <4 x i32> %val4 -+ ret <4 x i32> %ret -+} -+ -+; Test ult selects. -+define <4 x i32> @f20(<4 x i32> %val1, <4 x i32> %val2, -+ <4 x i32> %val3, <4 x i32> %val4) { -+; CHECK-LABEL: f20: -+; CHECK: vchlf [[REG:%v[0-9]+]], %v26, %v24 -+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] -+; CHECK-NEXT: br %r14 -+ %cmp = icmp ult <4 x i32> %val1, %val2 -+ %ret = select <4 x i1> %cmp, <4 x i32> %val3, <4 x i32> %val4 -+ ret <4 x i32> %ret -+} -Index: llvm-36/test/CodeGen/SystemZ/vec-cmp-04.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-cmp-04.ll -@@ -0,0 +1,228 @@ -+; Test v2i64 comparisons. -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -+ -+; Test eq. -+define <2 x i64> @f1(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) { -+; CHECK-LABEL: f1: -+; CHECK: vceqg %v24, %v26, %v28 -+; CHECK-NEXT: br %r14 -+ %cmp = icmp eq <2 x i64> %val1, %val2 -+ %ret = sext <2 x i1> %cmp to <2 x i64> -+ ret <2 x i64> %ret -+} -+ -+; Test ne. -+define <2 x i64> @f2(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) { -+; CHECK-LABEL: f2: -+; CHECK: vceqg [[REG:%v[0-9]+]], %v26, %v28 -+; CHECK-NEXT: vno %v24, [[REG]], [[REG]] -+; CHECK-NEXT: br %r14 -+ %cmp = icmp ne <2 x i64> %val1, %val2 -+ %ret = sext <2 x i1> %cmp to <2 x i64> -+ ret <2 x i64> %ret -+} -+ -+; Test sgt. -+define <2 x i64> @f3(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) { -+; CHECK-LABEL: f3: -+; CHECK: vchg %v24, %v26, %v28 -+; CHECK-NEXT: br %r14 -+ %cmp = icmp sgt <2 x i64> %val1, %val2 -+ %ret = sext <2 x i1> %cmp to <2 x i64> -+ ret <2 x i64> %ret -+} -+ -+; Test sge. -+define <2 x i64> @f4(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) { -+; CHECK-LABEL: f4: -+; CHECK: vchg [[REG:%v[0-9]+]], %v28, %v26 -+; CHECK-NEXT: vno %v24, [[REG]], [[REG]] -+; CHECK-NEXT: br %r14 -+ %cmp = icmp sge <2 x i64> %val1, %val2 -+ %ret = sext <2 x i1> %cmp to <2 x i64> -+ ret <2 x i64> %ret -+} -+ -+; Test sle. -+define <2 x i64> @f5(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) { -+; CHECK-LABEL: f5: -+; CHECK: vchg [[REG:%v[0-9]+]], %v26, %v28 -+; CHECK-NEXT: vno %v24, [[REG]], [[REG]] -+; CHECK-NEXT: br %r14 -+ %cmp = icmp sle <2 x i64> %val1, %val2 -+ %ret = sext <2 x i1> %cmp to <2 x i64> -+ ret <2 x i64> %ret -+} -+ -+; Test slt. -+define <2 x i64> @f6(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) { -+; CHECK-LABEL: f6: -+; CHECK: vchg %v24, %v28, %v26 -+; CHECK-NEXT: br %r14 -+ %cmp = icmp slt <2 x i64> %val1, %val2 -+ %ret = sext <2 x i1> %cmp to <2 x i64> -+ ret <2 x i64> %ret -+} -+ -+; Test ugt. -+define <2 x i64> @f7(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) { -+; CHECK-LABEL: f7: -+; CHECK: vchlg %v24, %v26, %v28 -+; CHECK-NEXT: br %r14 -+ %cmp = icmp ugt <2 x i64> %val1, %val2 -+ %ret = sext <2 x i1> %cmp to <2 x i64> -+ ret <2 x i64> %ret -+} -+ -+; Test uge. -+define <2 x i64> @f8(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) { -+; CHECK-LABEL: f8: -+; CHECK: vchlg [[REG:%v[0-9]+]], %v28, %v26 -+; CHECK-NEXT: vno %v24, [[REG]], [[REG]] -+; CHECK-NEXT: br %r14 -+ %cmp = icmp uge <2 x i64> %val1, %val2 -+ %ret = sext <2 x i1> %cmp to <2 x i64> -+ ret <2 x i64> %ret -+} -+ -+; Test ule. -+define <2 x i64> @f9(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) { -+; CHECK-LABEL: f9: -+; CHECK: vchlg [[REG:%v[0-9]+]], %v26, %v28 -+; CHECK-NEXT: vno %v24, [[REG]], [[REG]] -+; CHECK-NEXT: br %r14 -+ %cmp = icmp ule <2 x i64> %val1, %val2 -+ %ret = sext <2 x i1> %cmp to <2 x i64> -+ ret <2 x i64> %ret -+} -+ -+; Test ult. -+define <2 x i64> @f10(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) { -+; CHECK-LABEL: f10: -+; CHECK: vchlg %v24, %v28, %v26 -+; CHECK-NEXT: br %r14 -+ %cmp = icmp ult <2 x i64> %val1, %val2 -+ %ret = sext <2 x i1> %cmp to <2 x i64> -+ ret <2 x i64> %ret -+} -+ -+; Test eq selects. -+define <2 x i64> @f11(<2 x i64> %val1, <2 x i64> %val2, -+ <2 x i64> %val3, <2 x i64> %val4) { -+; CHECK-LABEL: f11: -+; CHECK: vceqg [[REG:%v[0-9]+]], %v24, %v26 -+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] -+; CHECK-NEXT: br %r14 -+ %cmp = icmp eq <2 x i64> %val1, %val2 -+ %ret = select <2 x i1> %cmp, <2 x i64> %val3, <2 x i64> %val4 -+ ret <2 x i64> %ret -+} -+ -+; Test ne selects. -+define <2 x i64> @f12(<2 x i64> %val1, <2 x i64> %val2, -+ <2 x i64> %val3, <2 x i64> %val4) { -+; CHECK-LABEL: f12: -+; CHECK: vceqg [[REG:%v[0-9]+]], %v24, %v26 -+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] -+; CHECK-NEXT: br %r14 -+ %cmp = icmp ne <2 x i64> %val1, %val2 -+ %ret = select <2 x i1> %cmp, <2 x i64> %val3, <2 x i64> %val4 -+ ret <2 x i64> %ret -+} -+ -+; Test sgt selects. -+define <2 x i64> @f13(<2 x i64> %val1, <2 x i64> %val2, -+ <2 x i64> %val3, <2 x i64> %val4) { -+; CHECK-LABEL: f13: -+; CHECK: vchg [[REG:%v[0-9]+]], %v24, %v26 -+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] -+; CHECK-NEXT: br %r14 -+ %cmp = icmp sgt <2 x i64> %val1, %val2 -+ %ret = select <2 x i1> %cmp, <2 x i64> %val3, <2 x i64> %val4 -+ ret <2 x i64> %ret -+} -+ -+; Test sge selects. -+define <2 x i64> @f14(<2 x i64> %val1, <2 x i64> %val2, -+ <2 x i64> %val3, <2 x i64> %val4) { -+; CHECK-LABEL: f14: -+; CHECK: vchg [[REG:%v[0-9]+]], %v26, %v24 -+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] -+; CHECK-NEXT: br %r14 -+ %cmp = icmp sge <2 x i64> %val1, %val2 -+ %ret = select <2 x i1> %cmp, <2 x i64> %val3, <2 x i64> %val4 -+ ret <2 x i64> %ret -+} -+ -+; Test sle selects. -+define <2 x i64> @f15(<2 x i64> %val1, <2 x i64> %val2, -+ <2 x i64> %val3, <2 x i64> %val4) { -+; CHECK-LABEL: f15: -+; CHECK: vchg [[REG:%v[0-9]+]], %v24, %v26 -+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] -+; CHECK-NEXT: br %r14 -+ %cmp = icmp sle <2 x i64> %val1, %val2 -+ %ret = select <2 x i1> %cmp, <2 x i64> %val3, <2 x i64> %val4 -+ ret <2 x i64> %ret -+} -+ -+; Test slt selects. -+define <2 x i64> @f16(<2 x i64> %val1, <2 x i64> %val2, -+ <2 x i64> %val3, <2 x i64> %val4) { -+; CHECK-LABEL: f16: -+; CHECK: vchg [[REG:%v[0-9]+]], %v26, %v24 -+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] -+; CHECK-NEXT: br %r14 -+ %cmp = icmp slt <2 x i64> %val1, %val2 -+ %ret = select <2 x i1> %cmp, <2 x i64> %val3, <2 x i64> %val4 -+ ret <2 x i64> %ret -+} -+ -+; Test ugt selects. -+define <2 x i64> @f17(<2 x i64> %val1, <2 x i64> %val2, -+ <2 x i64> %val3, <2 x i64> %val4) { -+; CHECK-LABEL: f17: -+; CHECK: vchlg [[REG:%v[0-9]+]], %v24, %v26 -+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] -+; CHECK-NEXT: br %r14 -+ %cmp = icmp ugt <2 x i64> %val1, %val2 -+ %ret = select <2 x i1> %cmp, <2 x i64> %val3, <2 x i64> %val4 -+ ret <2 x i64> %ret -+} -+ -+; Test uge selects. -+define <2 x i64> @f18(<2 x i64> %val1, <2 x i64> %val2, -+ <2 x i64> %val3, <2 x i64> %val4) { -+; CHECK-LABEL: f18: -+; CHECK: vchlg [[REG:%v[0-9]+]], %v26, %v24 -+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] -+; CHECK-NEXT: br %r14 -+ %cmp = icmp uge <2 x i64> %val1, %val2 -+ %ret = select <2 x i1> %cmp, <2 x i64> %val3, <2 x i64> %val4 -+ ret <2 x i64> %ret -+} -+ -+; Test ule selects. -+define <2 x i64> @f19(<2 x i64> %val1, <2 x i64> %val2, -+ <2 x i64> %val3, <2 x i64> %val4) { -+; CHECK-LABEL: f19: -+; CHECK: vchlg [[REG:%v[0-9]+]], %v24, %v26 -+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] -+; CHECK-NEXT: br %r14 -+ %cmp = icmp ule <2 x i64> %val1, %val2 -+ %ret = select <2 x i1> %cmp, <2 x i64> %val3, <2 x i64> %val4 -+ ret <2 x i64> %ret -+} -+ -+; Test ult selects. -+define <2 x i64> @f20(<2 x i64> %val1, <2 x i64> %val2, -+ <2 x i64> %val3, <2 x i64> %val4) { -+; CHECK-LABEL: f20: -+; CHECK: vchlg [[REG:%v[0-9]+]], %v26, %v24 -+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] -+; CHECK-NEXT: br %r14 -+ %cmp = icmp ult <2 x i64> %val1, %val2 -+ %ret = select <2 x i1> %cmp, <2 x i64> %val3, <2 x i64> %val4 -+ ret <2 x i64> %ret -+} -Index: llvm-36/test/CodeGen/SystemZ/vec-cmp-05.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-cmp-05.ll -@@ -0,0 +1,472 @@ -+; Test v4f32 comparisons. -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -+ -+; Test oeq. -+define <4 x i32> @f1(<4 x float> %val1, <4 x float> %val2) { -+; CHECK-LABEL: f1: -+; CHECK-DAG: vmrhf [[HIGH0E:%v[0-9]+]], %v24, %v24 -+; CHECK-DAG: vmrlf [[LOW0E:%v[0-9]+]], %v24, %v24 -+; CHECK-DAG: vmrhf [[HIGH1E:%v[0-9]+]], %v26, %v26 -+; CHECK-DAG: vmrlf [[LOW1E:%v[0-9]+]], %v26, %v26 -+; CHECK-DAG: vldeb [[HIGH0D:%v[0-9]+]], [[HIGH0E]] -+; CHECK-DAG: vldeb [[HIGH1D:%v[0-9]+]], [[HIGH1E]] -+; CHECK-DAG: vldeb [[LOW0D:%v[0-9]+]], [[LOW0E]] -+; CHECK-DAG: vldeb [[LOW1D:%v[0-9]+]], [[LOW1E]] -+; CHECK-DAG: vfcedb [[HIGHRES:%v[0-9]+]], [[HIGH0D]], [[HIGH1D]] -+; CHECK-DAG: vfcedb [[LOWRES:%v[0-9]+]], [[LOW0D]], [[LOW1D]] -+; CHECK: vpkg %v24, [[HIGHRES]], [[LOWRES]] -+; CHECK-NEXT: br %r14 -+ %cmp = fcmp oeq <4 x float> %val1, %val2 -+ %ret = sext <4 x i1> %cmp to <4 x i32> -+ ret <4 x i32> %ret -+} -+ -+; Test one. -+define <4 x i32> @f2(<4 x float> %val1, <4 x float> %val2) { -+; CHECK-LABEL: f2: -+; CHECK-DAG: vmrhf [[HIGH0E:%v[0-9]+]], %v24, %v24 -+; CHECK-DAG: vmrlf [[LOW0E:%v[0-9]+]], %v24, %v24 -+; CHECK-DAG: vmrhf [[HIGH1E:%v[0-9]+]], %v26, %v26 -+; CHECK-DAG: vmrlf [[LOW1E:%v[0-9]+]], %v26, %v26 -+; CHECK-DAG: vldeb [[HIGH0D:%v[0-9]+]], [[HIGH0E]] -+; CHECK-DAG: vldeb [[HIGH1D:%v[0-9]+]], [[HIGH1E]] -+; CHECK-DAG: vldeb [[LOW0D:%v[0-9]+]], [[LOW0E]] -+; CHECK-DAG: vldeb [[LOW1D:%v[0-9]+]], [[LOW1E]] -+; CHECK-DAG: vfchdb [[HIGHRES0:%v[0-9]+]], [[HIGH0D]], [[HIGH1D]] -+; CHECK-DAG: vfchdb [[LOWRES0:%v[0-9]+]], [[LOW0D]], [[LOW1D]] -+; CHECK-DAG: vfchdb [[HIGHRES1:%v[0-9]+]], [[HIGH1D]], [[HIGH0D]] -+; CHECK-DAG: vfchdb [[LOWRES1:%v[0-9]+]], [[LOW1D]], [[LOW0D]] -+; CHECK-DAG: vpkg [[RES0:%v[0-9]+]], [[HIGHRES0]], [[LOWRES0]] -+; CHECK-DAG: vpkg [[RES1:%v[0-9]+]], [[HIGHRES1]], [[LOWRES1]] -+; CHECK: vo %v24, [[RES1]], [[RES0]] -+; CHECK-NEXT: br %r14 -+ %cmp = fcmp one <4 x float> %val1, %val2 -+ %ret = sext <4 x i1> %cmp to <4 x i32> -+ ret <4 x i32> %ret -+} -+ -+; Test ogt. -+define <4 x i32> @f3(<4 x float> %val1, <4 x float> %val2) { -+; CHECK-LABEL: f3: -+; CHECK-DAG: vmrhf [[HIGH0E:%v[0-9]+]], %v24, %v24 -+; CHECK-DAG: vmrlf [[LOW0E:%v[0-9]+]], %v24, %v24 -+; CHECK-DAG: vmrhf [[HIGH1E:%v[0-9]+]], %v26, %v26 -+; CHECK-DAG: vmrlf [[LOW1E:%v[0-9]+]], %v26, %v26 -+; CHECK-DAG: vldeb [[HIGH0D:%v[0-9]+]], [[HIGH0E]] -+; CHECK-DAG: vldeb [[HIGH1D:%v[0-9]+]], [[HIGH1E]] -+; CHECK-DAG: vldeb [[LOW0D:%v[0-9]+]], [[LOW0E]] -+; CHECK-DAG: vldeb [[LOW1D:%v[0-9]+]], [[LOW1E]] -+; CHECK-DAG: vfchdb [[HIGHRES:%v[0-9]+]], [[HIGH0D]], [[HIGH1D]] -+; CHECK-DAG: vfchdb [[LOWRES:%v[0-9]+]], [[LOW0D]], [[LOW1D]] -+; CHECK: vpkg %v24, [[HIGHRES]], [[LOWRES]] -+; CHECK-NEXT: br %r14 -+ %cmp = fcmp ogt <4 x float> %val1, %val2 -+ %ret = sext <4 x i1> %cmp to <4 x i32> -+ ret <4 x i32> %ret -+} -+ -+; Test oge. -+define <4 x i32> @f4(<4 x float> %val1, <4 x float> %val2) { -+; CHECK-LABEL: f4: -+; CHECK-DAG: vmrhf [[HIGH0E:%v[0-9]+]], %v24, %v24 -+; CHECK-DAG: vmrlf [[LOW0E:%v[0-9]+]], %v24, %v24 -+; CHECK-DAG: vmrhf [[HIGH1E:%v[0-9]+]], %v26, %v26 -+; CHECK-DAG: vmrlf [[LOW1E:%v[0-9]+]], %v26, %v26 -+; CHECK-DAG: vldeb [[HIGH0D:%v[0-9]+]], [[HIGH0E]] -+; CHECK-DAG: vldeb [[HIGH1D:%v[0-9]+]], [[HIGH1E]] -+; CHECK-DAG: vldeb [[LOW0D:%v[0-9]+]], [[LOW0E]] -+; CHECK-DAG: vldeb [[LOW1D:%v[0-9]+]], [[LOW1E]] -+; CHECK-DAG: vfchedb [[HIGHRES:%v[0-9]+]], [[HIGH0D]], [[HIGH1D]] -+; CHECK-DAG: vfchedb [[LOWRES:%v[0-9]+]], [[LOW0D]], [[LOW1D]] -+; CHECK: vpkg %v24, [[HIGHRES]], [[LOWRES]] -+; CHECK-NEXT: br %r14 -+ %cmp = fcmp oge <4 x float> %val1, %val2 -+ %ret = sext <4 x i1> %cmp to <4 x i32> -+ ret <4 x i32> %ret -+} -+ -+; Test ole. -+define <4 x i32> @f5(<4 x float> %val1, <4 x float> %val2) { -+; CHECK-LABEL: f5: -+; CHECK-DAG: vmrhf [[HIGH0E:%v[0-9]+]], %v24, %v24 -+; CHECK-DAG: vmrlf [[LOW0E:%v[0-9]+]], %v24, %v24 -+; CHECK-DAG: vmrhf [[HIGH1E:%v[0-9]+]], %v26, %v26 -+; CHECK-DAG: vmrlf [[LOW1E:%v[0-9]+]], %v26, %v26 -+; CHECK-DAG: vldeb [[HIGH0D:%v[0-9]+]], [[HIGH0E]] -+; CHECK-DAG: vldeb [[HIGH1D:%v[0-9]+]], [[HIGH1E]] -+; CHECK-DAG: vldeb [[LOW0D:%v[0-9]+]], [[LOW0E]] -+; CHECK-DAG: vldeb [[LOW1D:%v[0-9]+]], [[LOW1E]] -+; CHECK-DAG: vfchedb [[HIGHRES:%v[0-9]+]], [[HIGH1D]], [[HIGH0D]] -+; CHECK-DAG: vfchedb [[LOWRES:%v[0-9]+]], [[LOW1D]], [[LOW0D]] -+; CHECK: vpkg %v24, [[HIGHRES]], [[LOWRES]] -+; CHECK-NEXT: br %r14 -+ %cmp = fcmp ole <4 x float> %val1, %val2 -+ %ret = sext <4 x i1> %cmp to <4 x i32> -+ ret <4 x i32> %ret -+} -+ -+; Test olt. -+define <4 x i32> @f6(<4 x float> %val1, <4 x float> %val2) { -+; CHECK-LABEL: f6: -+; CHECK-DAG: vmrhf [[HIGH0E:%v[0-9]+]], %v24, %v24 -+; CHECK-DAG: vmrlf [[LOW0E:%v[0-9]+]], %v24, %v24 -+; CHECK-DAG: vmrhf [[HIGH1E:%v[0-9]+]], %v26, %v26 -+; CHECK-DAG: vmrlf [[LOW1E:%v[0-9]+]], %v26, %v26 -+; CHECK-DAG: vldeb [[HIGH0D:%v[0-9]+]], [[HIGH0E]] -+; CHECK-DAG: vldeb [[HIGH1D:%v[0-9]+]], [[HIGH1E]] -+; CHECK-DAG: vldeb [[LOW0D:%v[0-9]+]], [[LOW0E]] -+; CHECK-DAG: vldeb [[LOW1D:%v[0-9]+]], [[LOW1E]] -+; CHECK-DAG: vfchdb [[HIGHRES:%v[0-9]+]], [[HIGH1D]], [[HIGH0D]] -+; CHECK-DAG: vfchdb [[LOWRES:%v[0-9]+]], [[LOW1D]], [[LOW0D]] -+; CHECK: vpkg %v24, [[HIGHRES]], [[LOWRES]] -+; CHECK-NEXT: br %r14 -+ %cmp = fcmp olt <4 x float> %val1, %val2 -+ %ret = sext <4 x i1> %cmp to <4 x i32> -+ ret <4 x i32> %ret -+} -+ -+; Test ueq. -+define <4 x i32> @f7(<4 x float> %val1, <4 x float> %val2) { -+; CHECK-LABEL: f7: -+; CHECK-DAG: vmrhf [[HIGH0E:%v[0-9]+]], %v24, %v24 -+; CHECK-DAG: vmrlf [[LOW0E:%v[0-9]+]], %v24, %v24 -+; CHECK-DAG: vmrhf [[HIGH1E:%v[0-9]+]], %v26, %v26 -+; CHECK-DAG: vmrlf [[LOW1E:%v[0-9]+]], %v26, %v26 -+; CHECK-DAG: vldeb [[HIGH0D:%v[0-9]+]], [[HIGH0E]] -+; CHECK-DAG: vldeb [[HIGH1D:%v[0-9]+]], [[HIGH1E]] -+; CHECK-DAG: vldeb [[LOW0D:%v[0-9]+]], [[LOW0E]] -+; CHECK-DAG: vldeb [[LOW1D:%v[0-9]+]], [[LOW1E]] -+; CHECK-DAG: vfchdb [[HIGHRES0:%v[0-9]+]], [[HIGH0D]], [[HIGH1D]] -+; CHECK-DAG: vfchdb [[LOWRES0:%v[0-9]+]], [[LOW0D]], [[LOW1D]] -+; CHECK-DAG: vfchdb [[HIGHRES1:%v[0-9]+]], [[HIGH1D]], [[HIGH0D]] -+; CHECK-DAG: vfchdb [[LOWRES1:%v[0-9]+]], [[LOW1D]], [[LOW0D]] -+; CHECK-DAG: vpkg [[RES0:%v[0-9]+]], [[HIGHRES0]], [[LOWRES0]] -+; CHECK-DAG: vpkg [[RES1:%v[0-9]+]], [[HIGHRES1]], [[LOWRES1]] -+; CHECK: vno %v24, [[RES1]], [[RES0]] -+; CHECK-NEXT: br %r14 -+ %cmp = fcmp ueq <4 x float> %val1, %val2 -+ %ret = sext <4 x i1> %cmp to <4 x i32> -+ ret <4 x i32> %ret -+} -+ -+; Test une. -+define <4 x i32> @f8(<4 x float> %val1, <4 x float> %val2) { -+; CHECK-LABEL: f8: -+; CHECK-DAG: vmrhf [[HIGH0E:%v[0-9]+]], %v24, %v24 -+; CHECK-DAG: vmrlf [[LOW0E:%v[0-9]+]], %v24, %v24 -+; CHECK-DAG: vmrhf [[HIGH1E:%v[0-9]+]], %v26, %v26 -+; CHECK-DAG: vmrlf [[LOW1E:%v[0-9]+]], %v26, %v26 -+; CHECK-DAG: vldeb [[HIGH0D:%v[0-9]+]], [[HIGH0E]] -+; CHECK-DAG: vldeb [[HIGH1D:%v[0-9]+]], [[HIGH1E]] -+; CHECK-DAG: vldeb [[LOW0D:%v[0-9]+]], [[LOW0E]] -+; CHECK-DAG: vldeb [[LOW1D:%v[0-9]+]], [[LOW1E]] -+; CHECK-DAG: vfcedb [[HIGHRES:%v[0-9]+]], [[HIGH0D]], [[HIGH1D]] -+; CHECK-DAG: vfcedb [[LOWRES:%v[0-9]+]], [[LOW0D]], [[LOW1D]] -+; CHECK: vpkg [[RES:%v[0-9]+]], [[HIGHRES]], [[LOWRES]] -+; CHECK-NEXT: vno %v24, [[RES]], [[RES]] -+; CHECK-NEXT: br %r14 -+ %cmp = fcmp une <4 x float> %val1, %val2 -+ %ret = sext <4 x i1> %cmp to <4 x i32> -+ ret <4 x i32> %ret -+} -+ -+; Test ugt. -+define <4 x i32> @f9(<4 x float> %val1, <4 x float> %val2) { -+; CHECK-LABEL: f9: -+; CHECK-DAG: vmrhf [[HIGH0E:%v[0-9]+]], %v24, %v24 -+; CHECK-DAG: vmrlf [[LOW0E:%v[0-9]+]], %v24, %v24 -+; CHECK-DAG: vmrhf [[HIGH1E:%v[0-9]+]], %v26, %v26 -+; CHECK-DAG: vmrlf [[LOW1E:%v[0-9]+]], %v26, %v26 -+; CHECK-DAG: vldeb [[HIGH0D:%v[0-9]+]], [[HIGH0E]] -+; CHECK-DAG: vldeb [[HIGH1D:%v[0-9]+]], [[HIGH1E]] -+; CHECK-DAG: vldeb [[LOW0D:%v[0-9]+]], [[LOW0E]] -+; CHECK-DAG: vldeb [[LOW1D:%v[0-9]+]], [[LOW1E]] -+; CHECK-DAG: vfchedb [[HIGHRES:%v[0-9]+]], [[HIGH1D]], [[HIGH0D]] -+; CHECK-DAG: vfchedb [[LOWRES:%v[0-9]+]], [[LOW1D]], [[LOW0D]] -+; CHECK: vpkg [[RES:%v[0-9]+]], [[HIGHRES]], [[LOWRES]] -+; CHECK-NEXT: vno %v24, [[RES]], [[RES]] -+; CHECK-NEXT: br %r14 -+ %cmp = fcmp ugt <4 x float> %val1, %val2 -+ %ret = sext <4 x i1> %cmp to <4 x i32> -+ ret <4 x i32> %ret -+} -+ -+; Test uge. -+define <4 x i32> @f10(<4 x float> %val1, <4 x float> %val2) { -+; CHECK-LABEL: f10: -+; CHECK-DAG: vmrhf [[HIGH0E:%v[0-9]+]], %v24, %v24 -+; CHECK-DAG: vmrlf [[LOW0E:%v[0-9]+]], %v24, %v24 -+; CHECK-DAG: vmrhf [[HIGH1E:%v[0-9]+]], %v26, %v26 -+; CHECK-DAG: vmrlf [[LOW1E:%v[0-9]+]], %v26, %v26 -+; CHECK-DAG: vldeb [[HIGH0D:%v[0-9]+]], [[HIGH0E]] -+; CHECK-DAG: vldeb [[HIGH1D:%v[0-9]+]], [[HIGH1E]] -+; CHECK-DAG: vldeb [[LOW0D:%v[0-9]+]], [[LOW0E]] -+; CHECK-DAG: vldeb [[LOW1D:%v[0-9]+]], [[LOW1E]] -+; CHECK-DAG: vfchdb [[HIGHRES:%v[0-9]+]], [[HIGH1D]], [[HIGH0D]] -+; CHECK-DAG: vfchdb [[LOWRES:%v[0-9]+]], [[LOW1D]], [[LOW0D]] -+; CHECK: vpkg [[RES:%v[0-9]+]], [[HIGHRES]], [[LOWRES]] -+; CHECK-NEXT: vno %v24, [[RES]], [[RES]] -+; CHECK-NEXT: br %r14 -+ %cmp = fcmp uge <4 x float> %val1, %val2 -+ %ret = sext <4 x i1> %cmp to <4 x i32> -+ ret <4 x i32> %ret -+} -+ -+; Test ule. -+define <4 x i32> @f11(<4 x float> %val1, <4 x float> %val2) { -+; CHECK-LABEL: f11: -+; CHECK-DAG: vmrhf [[HIGH0E:%v[0-9]+]], %v24, %v24 -+; CHECK-DAG: vmrlf [[LOW0E:%v[0-9]+]], %v24, %v24 -+; CHECK-DAG: vmrhf [[HIGH1E:%v[0-9]+]], %v26, %v26 -+; CHECK-DAG: vmrlf [[LOW1E:%v[0-9]+]], %v26, %v26 -+; CHECK-DAG: vldeb [[HIGH0D:%v[0-9]+]], [[HIGH0E]] -+; CHECK-DAG: vldeb [[HIGH1D:%v[0-9]+]], [[HIGH1E]] -+; CHECK-DAG: vldeb [[LOW0D:%v[0-9]+]], [[LOW0E]] -+; CHECK-DAG: vldeb [[LOW1D:%v[0-9]+]], [[LOW1E]] -+; CHECK-DAG: vfchdb [[HIGHRES:%v[0-9]+]], [[HIGH0D]], [[HIGH1D]] -+; CHECK-DAG: vfchdb [[LOWRES:%v[0-9]+]], [[LOW0D]], [[LOW1D]] -+; CHECK: vpkg [[RES:%v[0-9]+]], [[HIGHRES]], [[LOWRES]] -+; CHECK-NEXT: vno %v24, [[RES]], [[RES]] -+; CHECK-NEXT: br %r14 -+ %cmp = fcmp ule <4 x float> %val1, %val2 -+ %ret = sext <4 x i1> %cmp to <4 x i32> -+ ret <4 x i32> %ret -+} -+ -+; Test ult. -+define <4 x i32> @f12(<4 x float> %val1, <4 x float> %val2) { -+; CHECK-LABEL: f12: -+; CHECK-DAG: vmrhf [[HIGH0E:%v[0-9]+]], %v24, %v24 -+; CHECK-DAG: vmrlf [[LOW0E:%v[0-9]+]], %v24, %v24 -+; CHECK-DAG: vmrhf [[HIGH1E:%v[0-9]+]], %v26, %v26 -+; CHECK-DAG: vmrlf [[LOW1E:%v[0-9]+]], %v26, %v26 -+; CHECK-DAG: vldeb [[HIGH0D:%v[0-9]+]], [[HIGH0E]] -+; CHECK-DAG: vldeb [[HIGH1D:%v[0-9]+]], [[HIGH1E]] -+; CHECK-DAG: vldeb [[LOW0D:%v[0-9]+]], [[LOW0E]] -+; CHECK-DAG: vldeb [[LOW1D:%v[0-9]+]], [[LOW1E]] -+; CHECK-DAG: vfchedb [[HIGHRES:%v[0-9]+]], [[HIGH0D]], [[HIGH1D]] -+; CHECK-DAG: vfchedb [[LOWRES:%v[0-9]+]], [[LOW0D]], [[LOW1D]] -+; CHECK: vpkg [[RES:%v[0-9]+]], [[HIGHRES]], [[LOWRES]] -+; CHECK-NEXT: vno %v24, [[RES]], [[RES]] -+; CHECK-NEXT: br %r14 -+ %cmp = fcmp ult <4 x float> %val1, %val2 -+ %ret = sext <4 x i1> %cmp to <4 x i32> -+ ret <4 x i32> %ret -+} -+ -+; Test ord. -+define <4 x i32> @f13(<4 x float> %val1, <4 x float> %val2) { -+; CHECK-LABEL: f13: -+; CHECK-DAG: vmrhf [[HIGH0E:%v[0-9]+]], %v24, %v24 -+; CHECK-DAG: vmrlf [[LOW0E:%v[0-9]+]], %v24, %v24 -+; CHECK-DAG: vmrhf [[HIGH1E:%v[0-9]+]], %v26, %v26 -+; CHECK-DAG: vmrlf [[LOW1E:%v[0-9]+]], %v26, %v26 -+; CHECK-DAG: vldeb [[HIGH0D:%v[0-9]+]], [[HIGH0E]] -+; CHECK-DAG: vldeb [[HIGH1D:%v[0-9]+]], [[HIGH1E]] -+; CHECK-DAG: vldeb [[LOW0D:%v[0-9]+]], [[LOW0E]] -+; CHECK-DAG: vldeb [[LOW1D:%v[0-9]+]], [[LOW1E]] -+; CHECK-DAG: vfchedb [[HIGHRES0:%v[0-9]+]], [[HIGH0D]], [[HIGH1D]] -+; CHECK-DAG: vfchedb [[LOWRES0:%v[0-9]+]], [[LOW0D]], [[LOW1D]] -+; CHECK-DAG: vfchdb [[HIGHRES1:%v[0-9]+]], [[HIGH1D]], [[HIGH0D]] -+; CHECK-DAG: vfchdb [[LOWRES1:%v[0-9]+]], [[LOW1D]], [[LOW0D]] -+; CHECK-DAG: vpkg [[RES0:%v[0-9]+]], [[HIGHRES0]], [[LOWRES0]] -+; CHECK-DAG: vpkg [[RES1:%v[0-9]+]], [[HIGHRES1]], [[LOWRES1]] -+; CHECK: vo %v24, [[RES1]], [[RES0]] -+; CHECK-NEXT: br %r14 -+ %cmp = fcmp ord <4 x float> %val1, %val2 -+ %ret = sext <4 x i1> %cmp to <4 x i32> -+ ret <4 x i32> %ret -+} -+ -+; Test uno. -+define <4 x i32> @f14(<4 x float> %val1, <4 x float> %val2) { -+; CHECK-LABEL: f14: -+; CHECK-DAG: vmrhf [[HIGH0E:%v[0-9]+]], %v24, %v24 -+; CHECK-DAG: vmrlf [[LOW0E:%v[0-9]+]], %v24, %v24 -+; CHECK-DAG: vmrhf [[HIGH1E:%v[0-9]+]], %v26, %v26 -+; CHECK-DAG: vmrlf [[LOW1E:%v[0-9]+]], %v26, %v26 -+; CHECK-DAG: vldeb [[HIGH0D:%v[0-9]+]], [[HIGH0E]] -+; CHECK-DAG: vldeb [[HIGH1D:%v[0-9]+]], [[HIGH1E]] -+; CHECK-DAG: vldeb [[LOW0D:%v[0-9]+]], [[LOW0E]] -+; CHECK-DAG: vldeb [[LOW1D:%v[0-9]+]], [[LOW1E]] -+; CHECK-DAG: vfchedb [[HIGHRES0:%v[0-9]+]], [[HIGH0D]], [[HIGH1D]] -+; CHECK-DAG: vfchedb [[LOWRES0:%v[0-9]+]], [[LOW0D]], [[LOW1D]] -+; CHECK-DAG: vfchdb [[HIGHRES1:%v[0-9]+]], [[HIGH1D]], [[HIGH0D]] -+; CHECK-DAG: vfchdb [[LOWRES1:%v[0-9]+]], [[LOW1D]], [[LOW0D]] -+; CHECK-DAG: vpkg [[RES0:%v[0-9]+]], [[HIGHRES0]], [[LOWRES0]] -+; CHECK-DAG: vpkg [[RES1:%v[0-9]+]], [[HIGHRES1]], [[LOWRES1]] -+; CHECK: vno %v24, [[RES1]], [[RES0]] -+; CHECK-NEXT: br %r14 -+ %cmp = fcmp uno <4 x float> %val1, %val2 -+ %ret = sext <4 x i1> %cmp to <4 x i32> -+ ret <4 x i32> %ret -+} -+ -+; Test oeq selects. -+define <4 x float> @f15(<4 x float> %val1, <4 x float> %val2, -+ <4 x float> %val3, <4 x float> %val4) { -+; CHECK-LABEL: f15: -+; CHECK: vpkg [[REG:%v[0-9]+]], -+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] -+; CHECK-NEXT: br %r14 -+ %cmp = fcmp oeq <4 x float> %val1, %val2 -+ %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 -+ ret <4 x float> %ret -+} -+ -+; Test one selects. -+define <4 x float> @f16(<4 x float> %val1, <4 x float> %val2, -+ <4 x float> %val3, <4 x float> %val4) { -+; CHECK-LABEL: f16: -+; CHECK: vo [[REG:%v[0-9]+]], -+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] -+; CHECK-NEXT: br %r14 -+ %cmp = fcmp one <4 x float> %val1, %val2 -+ %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 -+ ret <4 x float> %ret -+} -+ -+; Test ogt selects. -+define <4 x float> @f17(<4 x float> %val1, <4 x float> %val2, -+ <4 x float> %val3, <4 x float> %val4) { -+; CHECK-LABEL: f17: -+; CHECK: vpkg [[REG:%v[0-9]+]], -+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] -+; CHECK-NEXT: br %r14 -+ %cmp = fcmp ogt <4 x float> %val1, %val2 -+ %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 -+ ret <4 x float> %ret -+} -+ -+; Test oge selects. -+define <4 x float> @f18(<4 x float> %val1, <4 x float> %val2, -+ <4 x float> %val3, <4 x float> %val4) { -+; CHECK-LABEL: f18: -+; CHECK: vpkg [[REG:%v[0-9]+]], -+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] -+; CHECK-NEXT: br %r14 -+ %cmp = fcmp oge <4 x float> %val1, %val2 -+ %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 -+ ret <4 x float> %ret -+} -+ -+; Test ole selects. -+define <4 x float> @f19(<4 x float> %val1, <4 x float> %val2, -+ <4 x float> %val3, <4 x float> %val4) { -+; CHECK-LABEL: f19: -+; CHECK: vpkg [[REG:%v[0-9]+]], -+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] -+; CHECK-NEXT: br %r14 -+ %cmp = fcmp ole <4 x float> %val1, %val2 -+ %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 -+ ret <4 x float> %ret -+} -+ -+; Test olt selects. -+define <4 x float> @f20(<4 x float> %val1, <4 x float> %val2, -+ <4 x float> %val3, <4 x float> %val4) { -+; CHECK-LABEL: f20: -+; CHECK: vpkg [[REG:%v[0-9]+]], -+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] -+; CHECK-NEXT: br %r14 -+ %cmp = fcmp olt <4 x float> %val1, %val2 -+ %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 -+ ret <4 x float> %ret -+} -+ -+; Test ueq selects. -+define <4 x float> @f21(<4 x float> %val1, <4 x float> %val2, -+ <4 x float> %val3, <4 x float> %val4) { -+; CHECK-LABEL: f21: -+; CHECK: vo [[REG:%v[0-9]+]], -+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] -+; CHECK-NEXT: br %r14 -+ %cmp = fcmp ueq <4 x float> %val1, %val2 -+ %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 -+ ret <4 x float> %ret -+} -+ -+; Test une selects. -+define <4 x float> @f22(<4 x float> %val1, <4 x float> %val2, -+ <4 x float> %val3, <4 x float> %val4) { -+; CHECK-LABEL: f22: -+; CHECK: vpkg [[REG:%v[0-9]+]], -+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] -+; CHECK-NEXT: br %r14 -+ %cmp = fcmp une <4 x float> %val1, %val2 -+ %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 -+ ret <4 x float> %ret -+} -+ -+; Test ugt selects. -+define <4 x float> @f23(<4 x float> %val1, <4 x float> %val2, -+ <4 x float> %val3, <4 x float> %val4) { -+; CHECK-LABEL: f23: -+; CHECK: vpkg [[REG:%v[0-9]+]], -+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] -+; CHECK-NEXT: br %r14 -+ %cmp = fcmp ugt <4 x float> %val1, %val2 -+ %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 -+ ret <4 x float> %ret -+} -+ -+; Test uge selects. -+define <4 x float> @f24(<4 x float> %val1, <4 x float> %val2, -+ <4 x float> %val3, <4 x float> %val4) { -+; CHECK-LABEL: f24: -+; CHECK: vpkg [[REG:%v[0-9]+]], -+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] -+; CHECK-NEXT: br %r14 -+ %cmp = fcmp uge <4 x float> %val1, %val2 -+ %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 -+ ret <4 x float> %ret -+} -+ -+; Test ule selects. -+define <4 x float> @f25(<4 x float> %val1, <4 x float> %val2, -+ <4 x float> %val3, <4 x float> %val4) { -+; CHECK-LABEL: f25: -+; CHECK: vpkg [[REG:%v[0-9]+]], -+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] -+; CHECK-NEXT: br %r14 -+ %cmp = fcmp ule <4 x float> %val1, %val2 -+ %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 -+ ret <4 x float> %ret -+} -+ -+; Test ult selects. -+define <4 x float> @f26(<4 x float> %val1, <4 x float> %val2, -+ <4 x float> %val3, <4 x float> %val4) { -+; CHECK-LABEL: f26: -+; CHECK: vpkg [[REG:%v[0-9]+]], -+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] -+; CHECK-NEXT: br %r14 -+ %cmp = fcmp ult <4 x float> %val1, %val2 -+ %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 -+ ret <4 x float> %ret -+} -+ -+; Test ord selects. -+define <4 x float> @f27(<4 x float> %val1, <4 x float> %val2, -+ <4 x float> %val3, <4 x float> %val4) { -+; CHECK-LABEL: f27: -+; CHECK: vo [[REG:%v[0-9]+]], -+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] -+; CHECK-NEXT: br %r14 -+ %cmp = fcmp ord <4 x float> %val1, %val2 -+ %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 -+ ret <4 x float> %ret -+} -+ -+; Test uno selects. -+define <4 x float> @f28(<4 x float> %val1, <4 x float> %val2, -+ <4 x float> %val3, <4 x float> %val4) { -+; CHECK-LABEL: f28: -+; CHECK: vo [[REG:%v[0-9]+]], -+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] -+; CHECK-NEXT: br %r14 -+ %cmp = fcmp uno <4 x float> %val1, %val2 -+ %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 -+ ret <4 x float> %ret -+} -Index: llvm-36/test/CodeGen/SystemZ/vec-cmp-06.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-cmp-06.ll -@@ -0,0 +1,349 @@ -+; Test f64 and v2f64 comparisons. -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -+ -+; Test oeq. -+define <2 x i64> @f1(<2 x i64> %dummy, <2 x double> %val1, <2 x double> %val2) { -+; CHECK-LABEL: f1: -+; CHECK: vfcedb %v24, %v26, %v28 -+; CHECK-NEXT: br %r14 -+ %cmp = fcmp oeq <2 x double> %val1, %val2 -+ %ret = sext <2 x i1> %cmp to <2 x i64> -+ ret <2 x i64> %ret -+} -+ -+; Test one. -+define <2 x i64> @f2(<2 x i64> %dummy, <2 x double> %val1, <2 x double> %val2) { -+; CHECK-LABEL: f2: -+; CHECK-DAG: vfchdb [[REG1:%v[0-9]+]], %v28, %v26 -+; CHECK-DAG: vfchdb [[REG2:%v[0-9]+]], %v26, %v28 -+; CHECK: vo %v24, [[REG1]], [[REG2]] -+; CHECK-NEXT: br %r14 -+ %cmp = fcmp one <2 x double> %val1, %val2 -+ %ret = sext <2 x i1> %cmp to <2 x i64> -+ ret <2 x i64> %ret -+} -+ -+; Test ogt. -+define <2 x i64> @f3(<2 x i64> %dummy, <2 x double> %val1, <2 x double> %val2) { -+; CHECK-LABEL: f3: -+; CHECK: vfchdb %v24, %v26, %v28 -+; CHECK-NEXT: br %r14 -+ %cmp = fcmp ogt <2 x double> %val1, %val2 -+ %ret = sext <2 x i1> %cmp to <2 x i64> -+ ret <2 x i64> %ret -+} -+ -+; Test oge. -+define <2 x i64> @f4(<2 x i64> %dummy, <2 x double> %val1, <2 x double> %val2) { -+; CHECK-LABEL: f4: -+; CHECK: vfchedb %v24, %v26, %v28 -+; CHECK-NEXT: br %r14 -+ %cmp = fcmp oge <2 x double> %val1, %val2 -+ %ret = sext <2 x i1> %cmp to <2 x i64> -+ ret <2 x i64> %ret -+} -+ -+; Test ole. -+define <2 x i64> @f5(<2 x i64> %dummy, <2 x double> %val1, <2 x double> %val2) { -+; CHECK-LABEL: f5: -+; CHECK: vfchedb %v24, %v28, %v26 -+; CHECK-NEXT: br %r14 -+ %cmp = fcmp ole <2 x double> %val1, %val2 -+ %ret = sext <2 x i1> %cmp to <2 x i64> -+ ret <2 x i64> %ret -+} -+ -+; Test olt. -+define <2 x i64> @f6(<2 x i64> %dummy, <2 x double> %val1, <2 x double> %val2) { -+; CHECK-LABEL: f6: -+; CHECK: vfchdb %v24, %v28, %v26 -+; CHECK-NEXT: br %r14 -+ %cmp = fcmp olt <2 x double> %val1, %val2 -+ %ret = sext <2 x i1> %cmp to <2 x i64> -+ ret <2 x i64> %ret -+} -+ -+; Test ueq. -+define <2 x i64> @f7(<2 x i64> %dummy, <2 x double> %val1, <2 x double> %val2) { -+; CHECK-LABEL: f7: -+; CHECK-DAG: vfchdb [[REG1:%v[0-9]+]], %v28, %v26 -+; CHECK-DAG: vfchdb [[REG2:%v[0-9]+]], %v26, %v28 -+; CHECK: vno %v24, [[REG1]], [[REG2]] -+; CHECK-NEXT: br %r14 -+ %cmp = fcmp ueq <2 x double> %val1, %val2 -+ %ret = sext <2 x i1> %cmp to <2 x i64> -+ ret <2 x i64> %ret -+} -+ -+; Test une. -+define <2 x i64> @f8(<2 x i64> %dummy, <2 x double> %val1, <2 x double> %val2) { -+; CHECK-LABEL: f8: -+; CHECK: vfcedb [[REG:%v[0-9]+]], %v26, %v28 -+; CHECK-NEXT: vno %v24, [[REG]], [[REG]] -+; CHECK-NEXT: br %r14 -+ %cmp = fcmp une <2 x double> %val1, %val2 -+ %ret = sext <2 x i1> %cmp to <2 x i64> -+ ret <2 x i64> %ret -+} -+ -+; Test ugt. -+define <2 x i64> @f9(<2 x i64> %dummy, <2 x double> %val1, <2 x double> %val2) { -+; CHECK-LABEL: f9: -+; CHECK: vfchedb [[REG:%v[0-9]+]], %v28, %v26 -+; CHECK-NEXT: vno %v24, [[REG]], [[REG]] -+; CHECK-NEXT: br %r14 -+ %cmp = fcmp ugt <2 x double> %val1, %val2 -+ %ret = sext <2 x i1> %cmp to <2 x i64> -+ ret <2 x i64> %ret -+} -+ -+; Test uge. -+define <2 x i64> @f10(<2 x i64> %dummy, <2 x double> %val1, -+ <2 x double> %val2) { -+; CHECK-LABEL: f10: -+; CHECK: vfchdb [[REG:%v[0-9]+]], %v28, %v26 -+; CHECK-NEXT: vno %v24, [[REG]], [[REG]] -+; CHECK-NEXT: br %r14 -+ %cmp = fcmp uge <2 x double> %val1, %val2 -+ %ret = sext <2 x i1> %cmp to <2 x i64> -+ ret <2 x i64> %ret -+} -+ -+; Test ule. -+define <2 x i64> @f11(<2 x i64> %dummy, <2 x double> %val1, -+ <2 x double> %val2) { -+; CHECK-LABEL: f11: -+; CHECK: vfchdb [[REG:%v[0-9]+]], %v26, %v28 -+; CHECK-NEXT: vno %v24, [[REG]], [[REG]] -+; CHECK-NEXT: br %r14 -+ %cmp = fcmp ule <2 x double> %val1, %val2 -+ %ret = sext <2 x i1> %cmp to <2 x i64> -+ ret <2 x i64> %ret -+} -+ -+; Test ult. -+define <2 x i64> @f12(<2 x i64> %dummy, <2 x double> %val1, -+ <2 x double> %val2) { -+; CHECK-LABEL: f12: -+; CHECK: vfchedb [[REG:%v[0-9]+]], %v26, %v28 -+; CHECK-NEXT: vno %v24, [[REG]], [[REG]] -+; CHECK-NEXT: br %r14 -+ %cmp = fcmp ult <2 x double> %val1, %val2 -+ %ret = sext <2 x i1> %cmp to <2 x i64> -+ ret <2 x i64> %ret -+} -+ -+; Test ord. -+define <2 x i64> @f13(<2 x i64> %dummy, <2 x double> %val1, -+ <2 x double> %val2) { -+; CHECK-LABEL: f13: -+; CHECK-DAG: vfchdb [[REG1:%v[0-9]+]], %v28, %v26 -+; CHECK-DAG: vfchedb [[REG2:%v[0-9]+]], %v26, %v28 -+; CHECK: vo %v24, [[REG1]], [[REG2]] -+; CHECK-NEXT: br %r14 -+ %cmp = fcmp ord <2 x double> %val1, %val2 -+ %ret = sext <2 x i1> %cmp to <2 x i64> -+ ret <2 x i64> %ret -+} -+ -+; Test uno. -+define <2 x i64> @f14(<2 x i64> %dummy, <2 x double> %val1, -+ <2 x double> %val2) { -+; CHECK-LABEL: f14: -+; CHECK-DAG: vfchdb [[REG1:%v[0-9]+]], %v28, %v26 -+; CHECK-DAG: vfchedb [[REG2:%v[0-9]+]], %v26, %v28 -+; CHECK: vno %v24, [[REG1]], [[REG2]] -+; CHECK-NEXT: br %r14 -+ %cmp = fcmp uno <2 x double> %val1, %val2 -+ %ret = sext <2 x i1> %cmp to <2 x i64> -+ ret <2 x i64> %ret -+} -+ -+; Test oeq selects. -+define <2 x double> @f15(<2 x double> %val1, <2 x double> %val2, -+ <2 x double> %val3, <2 x double> %val4) { -+; CHECK-LABEL: f15: -+; CHECK: vfcedb [[REG:%v[0-9]+]], %v24, %v26 -+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] -+; CHECK-NEXT: br %r14 -+ %cmp = fcmp oeq <2 x double> %val1, %val2 -+ %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4 -+ ret <2 x double> %ret -+} -+ -+; Test one selects. -+define <2 x double> @f16(<2 x double> %val1, <2 x double> %val2, -+ <2 x double> %val3, <2 x double> %val4) { -+; CHECK-LABEL: f16: -+; CHECK-DAG: vfchdb [[REG1:%v[0-9]+]], %v26, %v24 -+; CHECK-DAG: vfchdb [[REG2:%v[0-9]+]], %v24, %v26 -+; CHECK: vo [[REG:%v[0-9]+]], [[REG1]], [[REG2]] -+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] -+; CHECK-NEXT: br %r14 -+ %cmp = fcmp one <2 x double> %val1, %val2 -+ %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4 -+ ret <2 x double> %ret -+} -+ -+; Test ogt selects. -+define <2 x double> @f17(<2 x double> %val1, <2 x double> %val2, -+ <2 x double> %val3, <2 x double> %val4) { -+; CHECK-LABEL: f17: -+; CHECK: vfchdb [[REG:%v[0-9]+]], %v24, %v26 -+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] -+; CHECK-NEXT: br %r14 -+ %cmp = fcmp ogt <2 x double> %val1, %val2 -+ %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4 -+ ret <2 x double> %ret -+} -+ -+; Test oge selects. -+define <2 x double> @f18(<2 x double> %val1, <2 x double> %val2, -+ <2 x double> %val3, <2 x double> %val4) { -+; CHECK-LABEL: f18: -+; CHECK: vfchedb [[REG:%v[0-9]+]], %v24, %v26 -+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] -+; CHECK-NEXT: br %r14 -+ %cmp = fcmp oge <2 x double> %val1, %val2 -+ %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4 -+ ret <2 x double> %ret -+} -+ -+; Test ole selects. -+define <2 x double> @f19(<2 x double> %val1, <2 x double> %val2, -+ <2 x double> %val3, <2 x double> %val4) { -+; CHECK-LABEL: f19: -+; CHECK: vfchedb [[REG:%v[0-9]+]], %v26, %v24 -+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] -+; CHECK-NEXT: br %r14 -+ %cmp = fcmp ole <2 x double> %val1, %val2 -+ %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4 -+ ret <2 x double> %ret -+} -+ -+; Test olt selects. -+define <2 x double> @f20(<2 x double> %val1, <2 x double> %val2, -+ <2 x double> %val3, <2 x double> %val4) { -+; CHECK-LABEL: f20: -+; CHECK: vfchdb [[REG:%v[0-9]+]], %v26, %v24 -+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] -+; CHECK-NEXT: br %r14 -+ %cmp = fcmp olt <2 x double> %val1, %val2 -+ %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4 -+ ret <2 x double> %ret -+} -+ -+; Test ueq selects. -+define <2 x double> @f21(<2 x double> %val1, <2 x double> %val2, -+ <2 x double> %val3, <2 x double> %val4) { -+; CHECK-LABEL: f21: -+; CHECK-DAG: vfchdb [[REG1:%v[0-9]+]], %v26, %v24 -+; CHECK-DAG: vfchdb [[REG2:%v[0-9]+]], %v24, %v26 -+; CHECK: vo [[REG:%v[0-9]+]], [[REG1]], [[REG2]] -+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] -+; CHECK-NEXT: br %r14 -+ %cmp = fcmp ueq <2 x double> %val1, %val2 -+ %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4 -+ ret <2 x double> %ret -+} -+ -+; Test une selects. -+define <2 x double> @f22(<2 x double> %val1, <2 x double> %val2, -+ <2 x double> %val3, <2 x double> %val4) { -+; CHECK-LABEL: f22: -+; CHECK: vfcedb [[REG:%v[0-9]+]], %v24, %v26 -+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] -+; CHECK-NEXT: br %r14 -+ %cmp = fcmp une <2 x double> %val1, %val2 -+ %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4 -+ ret <2 x double> %ret -+} -+ -+; Test ugt selects. -+define <2 x double> @f23(<2 x double> %val1, <2 x double> %val2, -+ <2 x double> %val3, <2 x double> %val4) { -+; CHECK-LABEL: f23: -+; CHECK: vfchedb [[REG:%v[0-9]+]], %v26, %v24 -+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] -+; CHECK-NEXT: br %r14 -+ %cmp = fcmp ugt <2 x double> %val1, %val2 -+ %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4 -+ ret <2 x double> %ret -+} -+ -+; Test uge selects. -+define <2 x double> @f24(<2 x double> %val1, <2 x double> %val2, -+ <2 x double> %val3, <2 x double> %val4) { -+; CHECK-LABEL: f24: -+; CHECK: vfchdb [[REG:%v[0-9]+]], %v26, %v24 -+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] -+; CHECK-NEXT: br %r14 -+ %cmp = fcmp uge <2 x double> %val1, %val2 -+ %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4 -+ ret <2 x double> %ret -+} -+ -+; Test ule selects. -+define <2 x double> @f25(<2 x double> %val1, <2 x double> %val2, -+ <2 x double> %val3, <2 x double> %val4) { -+; CHECK-LABEL: f25: -+; CHECK: vfchdb [[REG:%v[0-9]+]], %v24, %v26 -+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] -+; CHECK-NEXT: br %r14 -+ %cmp = fcmp ule <2 x double> %val1, %val2 -+ %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4 -+ ret <2 x double> %ret -+} -+ -+; Test ult selects. -+define <2 x double> @f26(<2 x double> %val1, <2 x double> %val2, -+ <2 x double> %val3, <2 x double> %val4) { -+; CHECK-LABEL: f26: -+; CHECK: vfchedb [[REG:%v[0-9]+]], %v24, %v26 -+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] -+; CHECK-NEXT: br %r14 -+ %cmp = fcmp ult <2 x double> %val1, %val2 -+ %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4 -+ ret <2 x double> %ret -+} -+ -+; Test ord selects. -+define <2 x double> @f27(<2 x double> %val1, <2 x double> %val2, -+ <2 x double> %val3, <2 x double> %val4) { -+; CHECK-LABEL: f27: -+; CHECK-DAG: vfchdb [[REG1:%v[0-9]+]], %v26, %v24 -+; CHECK-DAG: vfchedb [[REG2:%v[0-9]+]], %v24, %v26 -+; CHECK: vo [[REG:%v[0-9]+]], [[REG1]], [[REG2]] -+; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] -+; CHECK-NEXT: br %r14 -+ %cmp = fcmp ord <2 x double> %val1, %val2 -+ %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4 -+ ret <2 x double> %ret -+} -+ -+; Test uno selects. -+define <2 x double> @f28(<2 x double> %val1, <2 x double> %val2, -+ <2 x double> %val3, <2 x double> %val4) { -+; CHECK-LABEL: f28: -+; CHECK-DAG: vfchdb [[REG1:%v[0-9]+]], %v26, %v24 -+; CHECK-DAG: vfchedb [[REG2:%v[0-9]+]], %v24, %v26 -+; CHECK: vo [[REG:%v[0-9]+]], [[REG1]], [[REG2]] -+; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] -+; CHECK-NEXT: br %r14 -+ %cmp = fcmp uno <2 x double> %val1, %val2 -+ %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4 -+ ret <2 x double> %ret -+} -+ -+; Test an f64 comparison that uses vector registers. -+define i64 @f29(i64 %a, i64 %b, double %f1, <2 x double> %vec) { -+; CHECK-LABEL: f29: -+; CHECK: wfcdb %f0, %v24 -+; CHECK-NEXT: locgrne %r2, %r3 -+; CHECK: br %r14 -+ %f2 = extractelement <2 x double> %vec, i32 0 -+ %cond = fcmp oeq double %f1, %f2 -+ %res = select i1 %cond, i64 %a, i64 %b -+ ret i64 %res -+} -Index: llvm-36/test/CodeGen/SystemZ/vec-combine-01.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-combine-01.ll -@@ -0,0 +1,155 @@ -+; Test various target-specific DAG combiner patterns. -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -+ -+; Check that an extraction followed by a truncation is effectively treated -+; as a bitcast. -+define void @f1(<4 x i32> %v1, <4 x i32> %v2, i8 *%ptr1, i8 *%ptr2) { -+; CHECK-LABEL: f1: -+; CHECK: vaf [[REG:%v[0-9]+]], %v24, %v26 -+; CHECK-DAG: vsteb [[REG]], 0(%r2), 3 -+; CHECK-DAG: vsteb [[REG]], 0(%r3), 15 -+; CHECK: br %r14 -+ %add = add <4 x i32> %v1, %v2 -+ %elem1 = extractelement <4 x i32> %add, i32 0 -+ %elem2 = extractelement <4 x i32> %add, i32 3 -+ %trunc1 = trunc i32 %elem1 to i8 -+ %trunc2 = trunc i32 %elem2 to i8 -+ store i8 %trunc1, i8 *%ptr1 -+ store i8 %trunc2, i8 *%ptr2 -+ ret void -+} -+ -+; Test a case where a pack-type shuffle can be eliminated. -+define i16 @f2(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) { -+; CHECK-LABEL: f2: -+; CHECK-NOT: vpk -+; CHECK-DAG: vaf [[REG1:%v[0-9]+]], %v24, %v26 -+; CHECK-DAG: vaf [[REG2:%v[0-9]+]], %v26, %v28 -+; CHECK-DAG: vlgvh {{%r[0-5]}}, [[REG1]], 3 -+; CHECK-DAG: vlgvh {{%r[0-5]}}, [[REG2]], 7 -+; CHECK: br %r14 -+ %add1 = add <4 x i32> %v1, %v2 -+ %add2 = add <4 x i32> %v2, %v3 -+ %shuffle = shufflevector <4 x i32> %add1, <4 x i32> %add2, -+ <4 x i32> -+ %bitcast = bitcast <4 x i32> %shuffle to <8 x i16> -+ %elem1 = extractelement <8 x i16> %bitcast, i32 1 -+ %elem2 = extractelement <8 x i16> %bitcast, i32 7 -+ %res = add i16 %elem1, %elem2 -+ ret i16 %res -+} -+ -+; ...and again in a case where there's also a splat and a bitcast. -+define i16 @f3(<4 x i32> %v1, <4 x i32> %v2, <2 x i64> %v3) { -+; CHECK-LABEL: f3: -+; CHECK-NOT: vrepg -+; CHECK-NOT: vpk -+; CHECK-DAG: vaf [[REG:%v[0-9]+]], %v24, %v26 -+; CHECK-DAG: vlgvh {{%r[0-5]}}, [[REG]], 6 -+; CHECK-DAG: vlgvh {{%r[0-5]}}, %v28, 3 -+; CHECK: br %r14 -+ %add = add <4 x i32> %v1, %v2 -+ %splat = shufflevector <2 x i64> %v3, <2 x i64> undef, -+ <2 x i32> -+ %splatcast = bitcast <2 x i64> %splat to <4 x i32> -+ %shuffle = shufflevector <4 x i32> %add, <4 x i32> %splatcast, -+ <4 x i32> -+ %bitcast = bitcast <4 x i32> %shuffle to <8 x i16> -+ %elem1 = extractelement <8 x i16> %bitcast, i32 2 -+ %elem2 = extractelement <8 x i16> %bitcast, i32 7 -+ %res = add i16 %elem1, %elem2 -+ ret i16 %res -+} -+ -+; ...and again with a merge low instead of a pack. -+define i16 @f4(<4 x i32> %v1, <4 x i32> %v2, <2 x i64> %v3) { -+; CHECK-LABEL: f4: -+; CHECK-NOT: vrepg -+; CHECK-NOT: vmr -+; CHECK-DAG: vaf [[REG:%v[0-9]+]], %v24, %v26 -+; CHECK-DAG: vlgvh {{%r[0-5]}}, [[REG]], 6 -+; CHECK-DAG: vlgvh {{%r[0-5]}}, %v28, 3 -+; CHECK: br %r14 -+ %add = add <4 x i32> %v1, %v2 -+ %splat = shufflevector <2 x i64> %v3, <2 x i64> undef, -+ <2 x i32> -+ %splatcast = bitcast <2 x i64> %splat to <4 x i32> -+ %shuffle = shufflevector <4 x i32> %add, <4 x i32> %splatcast, -+ <4 x i32> -+ %bitcast = bitcast <4 x i32> %shuffle to <8 x i16> -+ %elem1 = extractelement <8 x i16> %bitcast, i32 4 -+ %elem2 = extractelement <8 x i16> %bitcast, i32 7 -+ %res = add i16 %elem1, %elem2 -+ ret i16 %res -+} -+ -+; ...and again with a merge high. -+define i16 @f5(<4 x i32> %v1, <4 x i32> %v2, <2 x i64> %v3) { -+; CHECK-LABEL: f5: -+; CHECK-NOT: vrepg -+; CHECK-NOT: vmr -+; CHECK-DAG: vaf [[REG:%v[0-9]+]], %v24, %v26 -+; CHECK-DAG: vlgvh {{%r[0-5]}}, [[REG]], 2 -+; CHECK-DAG: vlgvh {{%r[0-5]}}, %v28, 3 -+; CHECK: br %r14 -+ %add = add <4 x i32> %v1, %v2 -+ %splat = shufflevector <2 x i64> %v3, <2 x i64> undef, -+ <2 x i32> -+ %splatcast = bitcast <2 x i64> %splat to <4 x i32> -+ %shuffle = shufflevector <4 x i32> %add, <4 x i32> %splatcast, -+ <4 x i32> -+ %bitcast = bitcast <4 x i32> %shuffle to <8 x i16> -+ %elem1 = extractelement <8 x i16> %bitcast, i32 4 -+ %elem2 = extractelement <8 x i16> %bitcast, i32 7 -+ %res = add i16 %elem1, %elem2 -+ ret i16 %res -+} -+ -+; Test a case where an unpack high can be eliminated from the usual -+; load-extend sequence. -+define void @f6(<8 x i8> *%ptr1, i8 *%ptr2, i8 *%ptr3, i8 *%ptr4) { -+; CHECK-LABEL: f6: -+; CHECK: vlrepg [[REG:%v[0-9]+]], 0(%r2) -+; CHECK-NOT: vup -+; CHECK-DAG: vsteb [[REG]], 0(%r3), 1 -+; CHECK-DAG: vsteb [[REG]], 0(%r4), 2 -+; CHECK-DAG: vsteb [[REG]], 0(%r5), 7 -+; CHECK: br %r14 -+ %vec = load <8 x i8> *%ptr1 -+ %ext = sext <8 x i8> %vec to <8 x i16> -+ %elem1 = extractelement <8 x i16> %ext, i32 1 -+ %elem2 = extractelement <8 x i16> %ext, i32 2 -+ %elem3 = extractelement <8 x i16> %ext, i32 7 -+ %trunc1 = trunc i16 %elem1 to i8 -+ %trunc2 = trunc i16 %elem2 to i8 -+ %trunc3 = trunc i16 %elem3 to i8 -+ store i8 %trunc1, i8 *%ptr2 -+ store i8 %trunc2, i8 *%ptr3 -+ store i8 %trunc3, i8 *%ptr4 -+ ret void -+} -+ -+; ...and again with a bitcast inbetween. -+define void @f7(<4 x i8> *%ptr1, i8 *%ptr2, i8 *%ptr3, i8 *%ptr4) { -+; CHECK-LABEL: f7: -+; CHECK: vlrepf [[REG:%v[0-9]+]], 0(%r2) -+; CHECK-NOT: vup -+; CHECK-DAG: vsteb [[REG]], 0(%r3), 0 -+; CHECK-DAG: vsteb [[REG]], 0(%r4), 1 -+; CHECK-DAG: vsteb [[REG]], 0(%r5), 3 -+; CHECK: br %r14 -+ %vec = load <4 x i8> *%ptr1 -+ %ext = sext <4 x i8> %vec to <4 x i32> -+ %bitcast = bitcast <4 x i32> %ext to <8 x i16> -+ %elem1 = extractelement <8 x i16> %bitcast, i32 1 -+ %elem2 = extractelement <8 x i16> %bitcast, i32 3 -+ %elem3 = extractelement <8 x i16> %bitcast, i32 7 -+ %trunc1 = trunc i16 %elem1 to i8 -+ %trunc2 = trunc i16 %elem2 to i8 -+ %trunc3 = trunc i16 %elem3 to i8 -+ store i8 %trunc1, i8 *%ptr2 -+ store i8 %trunc2, i8 *%ptr3 -+ store i8 %trunc3, i8 *%ptr4 -+ ret void -+} -Index: llvm-36/test/CodeGen/SystemZ/vec-combine-02.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-combine-02.ll -@@ -0,0 +1,433 @@ -+; Test various representations of pack-like operations. -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -+ -+; One way of writing a <4 x i32> -> <8 x i16> pack. -+define <8 x i16> @f1(<4 x i32> %val0, <4 x i32> %val1) { -+; CHECK-LABEL: f1: -+; CHECK: vpkf %v24, %v24, %v26 -+; CHECK: br %r14 -+ %elem0 = extractelement <4 x i32> %val0, i32 0 -+ %elem1 = extractelement <4 x i32> %val0, i32 1 -+ %elem2 = extractelement <4 x i32> %val0, i32 2 -+ %elem3 = extractelement <4 x i32> %val0, i32 3 -+ %elem4 = extractelement <4 x i32> %val1, i32 0 -+ %elem5 = extractelement <4 x i32> %val1, i32 1 -+ %elem6 = extractelement <4 x i32> %val1, i32 2 -+ %elem7 = extractelement <4 x i32> %val1, i32 3 -+ %hboth0 = bitcast i32 %elem0 to <2 x i16> -+ %hboth1 = bitcast i32 %elem1 to <2 x i16> -+ %hboth2 = bitcast i32 %elem2 to <2 x i16> -+ %hboth3 = bitcast i32 %elem3 to <2 x i16> -+ %hboth4 = bitcast i32 %elem4 to <2 x i16> -+ %hboth5 = bitcast i32 %elem5 to <2 x i16> -+ %hboth6 = bitcast i32 %elem6 to <2 x i16> -+ %hboth7 = bitcast i32 %elem7 to <2 x i16> -+ %hlow0 = shufflevector <2 x i16> %hboth0, <2 x i16> %hboth1, -+ <2 x i32> -+ %hlow1 = shufflevector <2 x i16> %hboth2, <2 x i16> %hboth3, -+ <2 x i32> -+ %hlow2 = shufflevector <2 x i16> %hboth4, <2 x i16> %hboth5, -+ <2 x i32> -+ %hlow3 = shufflevector <2 x i16> %hboth6, <2 x i16> %hboth7, -+ <2 x i32> -+ %join0 = shufflevector <2 x i16> %hlow0, <2 x i16> %hlow1, -+ <4 x i32> -+ %join1 = shufflevector <2 x i16> %hlow2, <2 x i16> %hlow3, -+ <4 x i32> -+ %ret = shufflevector <4 x i16> %join0, <4 x i16> %join1, -+ <8 x i32> -+ ret <8 x i16> %ret -+} -+ -+; A different way of writing a <4 x i32> -> <8 x i16> pack. -+define <8 x i16> @f2(<4 x i32> %val0, <4 x i32> %val1) { -+; CHECK-LABEL: f2: -+; CHECK: vpkf %v24, %v24, %v26 -+; CHECK: br %r14 -+ %elem0 = extractelement <4 x i32> %val0, i32 0 -+ %elem1 = extractelement <4 x i32> %val0, i32 1 -+ %elem2 = extractelement <4 x i32> %val0, i32 2 -+ %elem3 = extractelement <4 x i32> %val0, i32 3 -+ %elem4 = extractelement <4 x i32> %val1, i32 0 -+ %elem5 = extractelement <4 x i32> %val1, i32 1 -+ %elem6 = extractelement <4 x i32> %val1, i32 2 -+ %elem7 = extractelement <4 x i32> %val1, i32 3 -+ %wvec0 = insertelement <4 x i32> undef, i32 %elem0, i32 0 -+ %wvec1 = insertelement <4 x i32> undef, i32 %elem1, i32 0 -+ %wvec2 = insertelement <4 x i32> undef, i32 %elem2, i32 0 -+ %wvec3 = insertelement <4 x i32> undef, i32 %elem3, i32 0 -+ %wvec4 = insertelement <4 x i32> undef, i32 %elem4, i32 0 -+ %wvec5 = insertelement <4 x i32> undef, i32 %elem5, i32 0 -+ %wvec6 = insertelement <4 x i32> undef, i32 %elem6, i32 0 -+ %wvec7 = insertelement <4 x i32> undef, i32 %elem7, i32 0 -+ %hvec0 = bitcast <4 x i32> %wvec0 to <8 x i16> -+ %hvec1 = bitcast <4 x i32> %wvec1 to <8 x i16> -+ %hvec2 = bitcast <4 x i32> %wvec2 to <8 x i16> -+ %hvec3 = bitcast <4 x i32> %wvec3 to <8 x i16> -+ %hvec4 = bitcast <4 x i32> %wvec4 to <8 x i16> -+ %hvec5 = bitcast <4 x i32> %wvec5 to <8 x i16> -+ %hvec6 = bitcast <4 x i32> %wvec6 to <8 x i16> -+ %hvec7 = bitcast <4 x i32> %wvec7 to <8 x i16> -+ %hlow0 = shufflevector <8 x i16> %hvec0, <8 x i16> %hvec1, -+ <8 x i32> -+ %hlow1 = shufflevector <8 x i16> %hvec2, <8 x i16> %hvec3, -+ <8 x i32> -+ %hlow2 = shufflevector <8 x i16> %hvec4, <8 x i16> %hvec5, -+ <8 x i32> -+ %hlow3 = shufflevector <8 x i16> %hvec6, <8 x i16> %hvec7, -+ <8 x i32> -+ %join0 = shufflevector <8 x i16> %hlow0, <8 x i16> %hlow1, -+ <8 x i32> -+ %join1 = shufflevector <8 x i16> %hlow2, <8 x i16> %hlow3, -+ <8 x i32> -+ %ret = shufflevector <8 x i16> %join0, <8 x i16> %join1, -+ <8 x i32> -+ ret <8 x i16> %ret -+} -+ -+; A direct pack operation. -+define <8 x i16> @f3(<4 x i32> %val0, <4 x i32> %val1) { -+; CHECK-LABEL: f3: -+; CHECK: vpkf %v24, %v24, %v26 -+; CHECK: br %r14 -+ %bitcast0 = bitcast <4 x i32> %val0 to <8 x i16> -+ %bitcast1 = bitcast <4 x i32> %val1 to <8 x i16> -+ %ret = shufflevector <8 x i16> %bitcast0, <8 x i16> %bitcast1, -+ <8 x i32> -+ ret <8 x i16> %ret -+} -+ -+; One way of writing a <4 x i32> -> <16 x i8> pack. It doesn't matter -+; whether the first pack is VPKF or VPKH since the even bytes of the -+; result are discarded. -+define <16 x i8> @f4(<4 x i32> %val0, <4 x i32> %val1, -+ <4 x i32> %val2, <4 x i32> %val3) { -+; CHECK-LABEL: f4: -+; CHECK-DAG: vpk{{[hf]}} [[REG1:%v[0-9]+]], %v24, %v26 -+; CHECK-DAG: vpk{{[hf]}} [[REG2:%v[0-9]+]], %v28, %v30 -+; CHECK: vpkh %v24, [[REG1]], [[REG2]] -+; CHECK: br %r14 -+ %bitcast0 = bitcast <4 x i32> %val0 to <8 x i16> -+ %bitcast1 = bitcast <4 x i32> %val1 to <8 x i16> -+ %bitcast2 = bitcast <4 x i32> %val2 to <8 x i16> -+ %bitcast3 = bitcast <4 x i32> %val3 to <8 x i16> -+ %join0 = shufflevector <8 x i16> %bitcast0, <8 x i16> %bitcast1, -+ <8 x i32> -+ %join1 = shufflevector <8 x i16> %bitcast2, <8 x i16> %bitcast3, -+ <8 x i32> -+ %bitcast4 = bitcast <8 x i16> %join0 to <16 x i8> -+ %bitcast5 = bitcast <8 x i16> %join1 to <16 x i8> -+ %ret = shufflevector <16 x i8> %bitcast4, <16 x i8> %bitcast5, -+ <16 x i32> -+ ret <16 x i8> %ret -+} -+ -+; Check the same operation, but with elements being extracted from the result. -+define void @f5(<4 x i32> %val0, <4 x i32> %val1, -+ <4 x i32> %val2, <4 x i32> %val3, -+ i8 *%base) { -+; CHECK-LABEL: f5: -+; CHECK-DAG: vsteb %v24, 0(%r2), 11 -+; CHECK-DAG: vsteb %v26, 1(%r2), 15 -+; CHECK-DAG: vsteb %v28, 2(%r2), 3 -+; CHECK-DAG: vsteb %v30, 3(%r2), 7 -+; CHECK: br %r14 -+ %bitcast0 = bitcast <4 x i32> %val0 to <8 x i16> -+ %bitcast1 = bitcast <4 x i32> %val1 to <8 x i16> -+ %bitcast2 = bitcast <4 x i32> %val2 to <8 x i16> -+ %bitcast3 = bitcast <4 x i32> %val3 to <8 x i16> -+ %join0 = shufflevector <8 x i16> %bitcast0, <8 x i16> %bitcast1, -+ <8 x i32> -+ %join1 = shufflevector <8 x i16> %bitcast2, <8 x i16> %bitcast3, -+ <8 x i32> -+ %bitcast4 = bitcast <8 x i16> %join0 to <16 x i8> -+ %bitcast5 = bitcast <8 x i16> %join1 to <16 x i8> -+ %vec = shufflevector <16 x i8> %bitcast4, <16 x i8> %bitcast5, -+ <16 x i32> -+ -+ %ptr0 = getelementptr i8 *%base, i64 0 -+ %ptr1 = getelementptr i8 *%base, i64 1 -+ %ptr2 = getelementptr i8 *%base, i64 2 -+ %ptr3 = getelementptr i8 *%base, i64 3 -+ -+ %byte0 = extractelement <16 x i8> %vec, i32 2 -+ %byte1 = extractelement <16 x i8> %vec, i32 7 -+ %byte2 = extractelement <16 x i8> %vec, i32 8 -+ %byte3 = extractelement <16 x i8> %vec, i32 13 -+ -+ store i8 %byte0, i8 *%ptr0 -+ store i8 %byte1, i8 *%ptr1 -+ store i8 %byte2, i8 *%ptr2 -+ store i8 %byte3, i8 *%ptr3 -+ -+ ret void -+} -+ -+; A different way of writing a <4 x i32> -> <16 x i8> pack. -+define <16 x i8> @f6(<4 x i32> %val0, <4 x i32> %val1, -+ <4 x i32> %val2, <4 x i32> %val3) { -+; CHECK-LABEL: f6: -+; CHECK-DAG: vpk{{[hf]}} [[REG1:%v[0-9]+]], %v24, %v26 -+; CHECK-DAG: vpk{{[hf]}} [[REG2:%v[0-9]+]], %v28, %v30 -+; CHECK: vpkh %v24, [[REG1]], [[REG2]] -+; CHECK: br %r14 -+ %elem0 = extractelement <4 x i32> %val0, i32 0 -+ %elem1 = extractelement <4 x i32> %val0, i32 1 -+ %elem2 = extractelement <4 x i32> %val0, i32 2 -+ %elem3 = extractelement <4 x i32> %val0, i32 3 -+ %elem4 = extractelement <4 x i32> %val1, i32 0 -+ %elem5 = extractelement <4 x i32> %val1, i32 1 -+ %elem6 = extractelement <4 x i32> %val1, i32 2 -+ %elem7 = extractelement <4 x i32> %val1, i32 3 -+ %elem8 = extractelement <4 x i32> %val2, i32 0 -+ %elem9 = extractelement <4 x i32> %val2, i32 1 -+ %elem10 = extractelement <4 x i32> %val2, i32 2 -+ %elem11 = extractelement <4 x i32> %val2, i32 3 -+ %elem12 = extractelement <4 x i32> %val3, i32 0 -+ %elem13 = extractelement <4 x i32> %val3, i32 1 -+ %elem14 = extractelement <4 x i32> %val3, i32 2 -+ %elem15 = extractelement <4 x i32> %val3, i32 3 -+ %bitcast0 = bitcast i32 %elem0 to <2 x i16> -+ %bitcast1 = bitcast i32 %elem1 to <2 x i16> -+ %bitcast2 = bitcast i32 %elem2 to <2 x i16> -+ %bitcast3 = bitcast i32 %elem3 to <2 x i16> -+ %bitcast4 = bitcast i32 %elem4 to <2 x i16> -+ %bitcast5 = bitcast i32 %elem5 to <2 x i16> -+ %bitcast6 = bitcast i32 %elem6 to <2 x i16> -+ %bitcast7 = bitcast i32 %elem7 to <2 x i16> -+ %bitcast8 = bitcast i32 %elem8 to <2 x i16> -+ %bitcast9 = bitcast i32 %elem9 to <2 x i16> -+ %bitcast10 = bitcast i32 %elem10 to <2 x i16> -+ %bitcast11 = bitcast i32 %elem11 to <2 x i16> -+ %bitcast12 = bitcast i32 %elem12 to <2 x i16> -+ %bitcast13 = bitcast i32 %elem13 to <2 x i16> -+ %bitcast14 = bitcast i32 %elem14 to <2 x i16> -+ %bitcast15 = bitcast i32 %elem15 to <2 x i16> -+ %low0 = shufflevector <2 x i16> %bitcast0, <2 x i16> %bitcast1, -+ <2 x i32> -+ %low1 = shufflevector <2 x i16> %bitcast2, <2 x i16> %bitcast3, -+ <2 x i32> -+ %low2 = shufflevector <2 x i16> %bitcast4, <2 x i16> %bitcast5, -+ <2 x i32> -+ %low3 = shufflevector <2 x i16> %bitcast6, <2 x i16> %bitcast7, -+ <2 x i32> -+ %low4 = shufflevector <2 x i16> %bitcast8, <2 x i16> %bitcast9, -+ <2 x i32> -+ %low5 = shufflevector <2 x i16> %bitcast10, <2 x i16> %bitcast11, -+ <2 x i32> -+ %low6 = shufflevector <2 x i16> %bitcast12, <2 x i16> %bitcast13, -+ <2 x i32> -+ %low7 = shufflevector <2 x i16> %bitcast14, <2 x i16> %bitcast15, -+ <2 x i32> -+ %bytes0 = bitcast <2 x i16> %low0 to <4 x i8> -+ %bytes1 = bitcast <2 x i16> %low1 to <4 x i8> -+ %bytes2 = bitcast <2 x i16> %low2 to <4 x i8> -+ %bytes3 = bitcast <2 x i16> %low3 to <4 x i8> -+ %bytes4 = bitcast <2 x i16> %low4 to <4 x i8> -+ %bytes5 = bitcast <2 x i16> %low5 to <4 x i8> -+ %bytes6 = bitcast <2 x i16> %low6 to <4 x i8> -+ %bytes7 = bitcast <2 x i16> %low7 to <4 x i8> -+ %blow0 = shufflevector <4 x i8> %bytes0, <4 x i8> %bytes1, -+ <4 x i32> -+ %blow1 = shufflevector <4 x i8> %bytes2, <4 x i8> %bytes3, -+ <4 x i32> -+ %blow2 = shufflevector <4 x i8> %bytes4, <4 x i8> %bytes5, -+ <4 x i32> -+ %blow3 = shufflevector <4 x i8> %bytes6, <4 x i8> %bytes7, -+ <4 x i32> -+ %join0 = shufflevector <4 x i8> %blow0, <4 x i8> %blow1, -+ <8 x i32> -+ %join1 = shufflevector <4 x i8> %blow2, <4 x i8> %blow3, -+ <8 x i32> -+ %ret = shufflevector <8 x i8> %join0, <8 x i8> %join1, -+ <16 x i32> -+ ret <16 x i8> %ret -+} -+ -+; One way of writing a <2 x i64> -> <16 x i8> pack. -+define <16 x i8> @f7(<2 x i64> %val0, <2 x i64> %val1, -+ <2 x i64> %val2, <2 x i64> %val3, -+ <2 x i64> %val4, <2 x i64> %val5, -+ <2 x i64> %val6, <2 x i64> %val7) { -+; CHECK-LABEL: f7: -+; CHECK-DAG: vpk{{[hfg]}} [[REG1:%v[0-9]+]], %v24, %v26 -+; CHECK-DAG: vpk{{[hfg]}} [[REG2:%v[0-9]+]], %v28, %v30 -+; CHECK-DAG: vpk{{[hfg]}} [[REG3:%v[0-9]+]], %v25, %v27 -+; CHECK-DAG: vpk{{[hfg]}} [[REG4:%v[0-9]+]], %v29, %v31 -+; CHECK-DAG: vpk{{[hf]}} [[REG5:%v[0-9]+]], [[REG1]], [[REG2]] -+; CHECK-DAG: vpk{{[hf]}} [[REG6:%v[0-9]+]], [[REG3]], [[REG4]] -+; CHECK: vpkh %v24, [[REG5]], [[REG6]] -+; CHECK: br %r14 -+ %elem0 = extractelement <2 x i64> %val0, i32 0 -+ %elem1 = extractelement <2 x i64> %val0, i32 1 -+ %elem2 = extractelement <2 x i64> %val1, i32 0 -+ %elem3 = extractelement <2 x i64> %val1, i32 1 -+ %elem4 = extractelement <2 x i64> %val2, i32 0 -+ %elem5 = extractelement <2 x i64> %val2, i32 1 -+ %elem6 = extractelement <2 x i64> %val3, i32 0 -+ %elem7 = extractelement <2 x i64> %val3, i32 1 -+ %elem8 = extractelement <2 x i64> %val4, i32 0 -+ %elem9 = extractelement <2 x i64> %val4, i32 1 -+ %elem10 = extractelement <2 x i64> %val5, i32 0 -+ %elem11 = extractelement <2 x i64> %val5, i32 1 -+ %elem12 = extractelement <2 x i64> %val6, i32 0 -+ %elem13 = extractelement <2 x i64> %val6, i32 1 -+ %elem14 = extractelement <2 x i64> %val7, i32 0 -+ %elem15 = extractelement <2 x i64> %val7, i32 1 -+ %bitcast0 = bitcast i64 %elem0 to <2 x i32> -+ %bitcast1 = bitcast i64 %elem1 to <2 x i32> -+ %bitcast2 = bitcast i64 %elem2 to <2 x i32> -+ %bitcast3 = bitcast i64 %elem3 to <2 x i32> -+ %bitcast4 = bitcast i64 %elem4 to <2 x i32> -+ %bitcast5 = bitcast i64 %elem5 to <2 x i32> -+ %bitcast6 = bitcast i64 %elem6 to <2 x i32> -+ %bitcast7 = bitcast i64 %elem7 to <2 x i32> -+ %bitcast8 = bitcast i64 %elem8 to <2 x i32> -+ %bitcast9 = bitcast i64 %elem9 to <2 x i32> -+ %bitcast10 = bitcast i64 %elem10 to <2 x i32> -+ %bitcast11 = bitcast i64 %elem11 to <2 x i32> -+ %bitcast12 = bitcast i64 %elem12 to <2 x i32> -+ %bitcast13 = bitcast i64 %elem13 to <2 x i32> -+ %bitcast14 = bitcast i64 %elem14 to <2 x i32> -+ %bitcast15 = bitcast i64 %elem15 to <2 x i32> -+ %low0 = shufflevector <2 x i32> %bitcast0, <2 x i32> %bitcast1, -+ <2 x i32> -+ %low1 = shufflevector <2 x i32> %bitcast2, <2 x i32> %bitcast3, -+ <2 x i32> -+ %low2 = shufflevector <2 x i32> %bitcast4, <2 x i32> %bitcast5, -+ <2 x i32> -+ %low3 = shufflevector <2 x i32> %bitcast6, <2 x i32> %bitcast7, -+ <2 x i32> -+ %low4 = shufflevector <2 x i32> %bitcast8, <2 x i32> %bitcast9, -+ <2 x i32> -+ %low5 = shufflevector <2 x i32> %bitcast10, <2 x i32> %bitcast11, -+ <2 x i32> -+ %low6 = shufflevector <2 x i32> %bitcast12, <2 x i32> %bitcast13, -+ <2 x i32> -+ %low7 = shufflevector <2 x i32> %bitcast14, <2 x i32> %bitcast15, -+ <2 x i32> -+ %half0 = bitcast <2 x i32> %low0 to <4 x i16> -+ %half1 = bitcast <2 x i32> %low1 to <4 x i16> -+ %half2 = bitcast <2 x i32> %low2 to <4 x i16> -+ %half3 = bitcast <2 x i32> %low3 to <4 x i16> -+ %half4 = bitcast <2 x i32> %low4 to <4 x i16> -+ %half5 = bitcast <2 x i32> %low5 to <4 x i16> -+ %half6 = bitcast <2 x i32> %low6 to <4 x i16> -+ %half7 = bitcast <2 x i32> %low7 to <4 x i16> -+ %hlow0 = shufflevector <4 x i16> %half0, <4 x i16> %half1, -+ <4 x i32> -+ %hlow1 = shufflevector <4 x i16> %half2, <4 x i16> %half3, -+ <4 x i32> -+ %hlow2 = shufflevector <4 x i16> %half4, <4 x i16> %half5, -+ <4 x i32> -+ %hlow3 = shufflevector <4 x i16> %half6, <4 x i16> %half7, -+ <4 x i32> -+ %bytes0 = bitcast <4 x i16> %hlow0 to <8 x i8> -+ %bytes1 = bitcast <4 x i16> %hlow1 to <8 x i8> -+ %bytes2 = bitcast <4 x i16> %hlow2 to <8 x i8> -+ %bytes3 = bitcast <4 x i16> %hlow3 to <8 x i8> -+ %join0 = shufflevector <8 x i8> %bytes0, <8 x i8> %bytes1, -+ <8 x i32> -+ %join1 = shufflevector <8 x i8> %bytes2, <8 x i8> %bytes3, -+ <8 x i32> -+ %ret = shufflevector <8 x i8> %join0, <8 x i8> %join1, -+ <16 x i32> -+ ret <16 x i8> %ret -+} -+ -+; Test a <2 x i64> -> <4 x f32> pack in which only individual elements are -+; needed. -+define float @f8(i64 %scalar0, i64 %scalar1, i64 %scalar2, i64 %scalar3) { -+; CHECK-LABEL: f8: -+; CHECK-NOT: vperm -+; CHECK-NOT: vpk -+; CHECK-NOT: vmrh -+; CHECK: aebr {{%f[0-7]}}, -+; CHECK: aebr {{%f[0-7]}}, -+; CHECK: meebr %f0, -+; CHECK: br %r14 -+ %vec0 = insertelement <2 x i64> undef, i64 %scalar0, i32 0 -+ %vec1 = insertelement <2 x i64> undef, i64 %scalar1, i32 0 -+ %vec2 = insertelement <2 x i64> undef, i64 %scalar2, i32 0 -+ %vec3 = insertelement <2 x i64> undef, i64 %scalar3, i32 0 -+ %join0 = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, -+ <2 x i32> -+ %join1 = shufflevector <2 x i64> %vec2, <2 x i64> %vec3, -+ <2 x i32> -+ %bitcast0 = bitcast <2 x i64> %join0 to <4 x float> -+ %bitcast1 = bitcast <2 x i64> %join1 to <4 x float> -+ %pack = shufflevector <4 x float> %bitcast0, <4 x float> %bitcast1, -+ <4 x i32> -+ %elt0 = extractelement <4 x float> %pack, i32 0 -+ %elt1 = extractelement <4 x float> %pack, i32 1 -+ %elt2 = extractelement <4 x float> %pack, i32 2 -+ %elt3 = extractelement <4 x float> %pack, i32 3 -+ %add0 = fadd float %elt0, %elt2 -+ %add1 = fadd float %elt1, %elt3 -+ %ret = fmul float %add0, %add1 -+ ret float %ret -+} -+ -+; Test a <2 x f64> -> <4 x i32> pack in which only individual elements are -+; needed. -+define i32 @f9(double %scalar0, double %scalar1, double %scalar2, -+ double %scalar3) { -+; CHECK-LABEL: f9: -+; CHECK-NOT: vperm -+; CHECK-NOT: vpk -+; CHECK-NOT: vmrh -+; CHECK: ar {{%r[0-5]}}, -+; CHECK: ar {{%r[0-5]}}, -+; CHECK: or %r2, -+; CHECK: br %r14 -+ %vec0 = insertelement <2 x double> undef, double %scalar0, i32 0 -+ %vec1 = insertelement <2 x double> undef, double %scalar1, i32 0 -+ %vec2 = insertelement <2 x double> undef, double %scalar2, i32 0 -+ %vec3 = insertelement <2 x double> undef, double %scalar3, i32 0 -+ %join0 = shufflevector <2 x double> %vec0, <2 x double> %vec1, -+ <2 x i32> -+ %join1 = shufflevector <2 x double> %vec2, <2 x double> %vec3, -+ <2 x i32> -+ %bitcast0 = bitcast <2 x double> %join0 to <4 x i32> -+ %bitcast1 = bitcast <2 x double> %join1 to <4 x i32> -+ %pack = shufflevector <4 x i32> %bitcast0, <4 x i32> %bitcast1, -+ <4 x i32> -+ %elt0 = extractelement <4 x i32> %pack, i32 0 -+ %elt1 = extractelement <4 x i32> %pack, i32 1 -+ %elt2 = extractelement <4 x i32> %pack, i32 2 -+ %elt3 = extractelement <4 x i32> %pack, i32 3 -+ %add0 = add i32 %elt0, %elt2 -+ %add1 = add i32 %elt1, %elt3 -+ %ret = or i32 %add0, %add1 -+ ret i32 %ret -+} -Index: llvm-36/test/CodeGen/SystemZ/vec-const-01.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-const-01.ll -@@ -0,0 +1,103 @@ -+; Test vector byte masks, v16i8 version. -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -+ -+; Test an all-zeros vector. -+define <16 x i8> @f1() { -+; CHECK-LABEL: f1: -+; CHECK: vgbm %v24, 0 -+; CHECK: br %r14 -+ ret <16 x i8> zeroinitializer -+} -+ -+; Test an all-ones vector. -+define <16 x i8> @f2() { -+; CHECK-LABEL: f2: -+; CHECK: vgbm %v24, 65535 -+; CHECK: br %r14 -+ ret <16 x i8> -+} -+ -+; Test a mixed vector (mask 0x8c75). -+define <16 x i8> @f3() { -+; CHECK-LABEL: f3: -+; CHECK: vgbm %v24, 35957 -+; CHECK: br %r14 -+ ret <16 x i8> -+} -+ -+; Test that undefs are treated as zero. -+define <16 x i8> @f4() { -+; CHECK-LABEL: f4: -+; CHECK: vgbm %v24, 35957 -+; CHECK: br %r14 -+ ret <16 x i8> -+} -+ -+; Test that we don't use VGBM if one of the bytes is not 0 or 0xff. -+define <16 x i8> @f5() { -+; CHECK-LABEL: f5: -+; CHECK-NOT: vgbm -+; CHECK: br %r14 -+ ret <16 x i8> -+} -+ -+; Test an all-zeros v2i8 that gets promoted to v16i8. -+define <2 x i8> @f6() { -+; CHECK-LABEL: f6: -+; CHECK: vgbm %v24, 0 -+; CHECK: br %r14 -+ ret <2 x i8> zeroinitializer -+} -+ -+; Test a mixed v2i8 that gets promoted to v16i8 (mask 0x8000). -+define <2 x i8> @f7() { -+; CHECK-LABEL: f7: -+; CHECK: vgbm %v24, 32768 -+; CHECK: br %r14 -+ ret <2 x i8> -+} -+ -+; Test an all-zeros v4i8 that gets promoted to v16i8. -+define <4 x i8> @f8() { -+; CHECK-LABEL: f8: -+; CHECK: vgbm %v24, 0 -+; CHECK: br %r14 -+ ret <4 x i8> zeroinitializer -+} -+ -+; Test a mixed v4i8 that gets promoted to v16i8 (mask 0x9000). -+define <4 x i8> @f9() { -+; CHECK-LABEL: f9: -+; CHECK: vgbm %v24, 36864 -+; CHECK: br %r14 -+ ret <4 x i8> -+} -+ -+; Test an all-zeros v8i8 that gets promoted to v16i8. -+define <8 x i8> @f10() { -+; CHECK-LABEL: f10: -+; CHECK: vgbm %v24, 0 -+; CHECK: br %r14 -+ ret <8 x i8> zeroinitializer -+} -+ -+; Test a mixed v8i8 that gets promoted to v16i8 (mask 0xE500). -+define <8 x i8> @f11() { -+; CHECK-LABEL: f11: -+; CHECK: vgbm %v24, 58624 -+; CHECK: br %r14 -+ ret <8 x i8> -+} -Index: llvm-36/test/CodeGen/SystemZ/vec-const-02.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-const-02.ll -@@ -0,0 +1,79 @@ -+; Test vector byte masks, v8i16 version. -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -+ -+; Test an all-zeros vector. -+define <8 x i16> @f1() { -+; CHECK-LABEL: f1: -+; CHECK: vgbm %v24, 0 -+; CHECK: br %r14 -+ ret <8 x i16> zeroinitializer -+} -+ -+; Test an all-ones vector. -+define <8 x i16> @f2() { -+; CHECK-LABEL: f2: -+; CHECK: vgbm %v24, 65535 -+; CHECK: br %r14 -+ ret <8 x i16> -+} -+ -+; Test a mixed vector (mask 0x8c76). -+define <8 x i16> @f3() { -+; CHECK-LABEL: f3: -+; CHECK: vgbm %v24, 35958 -+; CHECK: br %r14 -+ ret <8 x i16> -+} -+ -+; Test that undefs are treated as zero. -+define <8 x i16> @f4() { -+; CHECK-LABEL: f4: -+; CHECK: vgbm %v24, 35958 -+; CHECK: br %r14 -+ ret <8 x i16> -+} -+ -+; Test that we don't use VGBM if one of the bytes is not 0 or 0xff. -+define <8 x i16> @f5() { -+; CHECK-LABEL: f5: -+; CHECK-NOT: vgbm -+; CHECK: br %r14 -+ ret <8 x i16> -+} -+ -+; Test an all-zeros v2i16 that gets promoted to v8i16. -+define <2 x i16> @f6() { -+; CHECK-LABEL: f6: -+; CHECK: vgbm %v24, 0 -+; CHECK: br %r14 -+ ret <2 x i16> zeroinitializer -+} -+ -+; Test a mixed v2i16 that gets promoted to v8i16 (mask 0xc000). -+define <2 x i16> @f7() { -+; CHECK-LABEL: f7: -+; CHECK: vgbm %v24, 49152 -+; CHECK: br %r14 -+ ret <2 x i16> -+} -+ -+; Test an all-zeros v4i16 that gets promoted to v8i16. -+define <4 x i16> @f8() { -+; CHECK-LABEL: f8: -+; CHECK: vgbm %v24, 0 -+; CHECK: br %r14 -+ ret <4 x i16> zeroinitializer -+} -+ -+; Test a mixed v4i16 that gets promoted to v8i16 (mask 0x7200). -+define <4 x i16> @f9() { -+; CHECK-LABEL: f9: -+; CHECK: vgbm %v24, 29184 -+; CHECK: br %r14 -+ ret <4 x i16> -+} -Index: llvm-36/test/CodeGen/SystemZ/vec-const-03.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-const-03.ll -@@ -0,0 +1,59 @@ -+; Test vector byte masks, v4i32 version. -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -+ -+; Test an all-zeros vector. -+define <4 x i32> @f1() { -+; CHECK-LABEL: f1: -+; CHECK: vgbm %v24, 0 -+; CHECK: br %r14 -+ ret <4 x i32> zeroinitializer -+} -+ -+; Test an all-ones vector. -+define <4 x i32> @f2() { -+; CHECK-LABEL: f2: -+; CHECK: vgbm %v24, 65535 -+; CHECK: br %r14 -+ ret <4 x i32> -+} -+ -+; Test a mixed vector (mask 0x8c76). -+define <4 x i32> @f3() { -+; CHECK-LABEL: f3: -+; CHECK: vgbm %v24, 35958 -+; CHECK: br %r14 -+ ret <4 x i32> -+} -+ -+; Test that undefs are treated as zero (mask 0x8076). -+define <4 x i32> @f4() { -+; CHECK-LABEL: f4: -+; CHECK: vgbm %v24, 32886 -+; CHECK: br %r14 -+ ret <4 x i32> -+} -+ -+; Test that we don't use VGBM if one of the bytes is not 0 or 0xff. -+define <4 x i32> @f5() { -+; CHECK-LABEL: f5: -+; CHECK-NOT: vgbm -+; CHECK: br %r14 -+ ret <4 x i32> -+} -+ -+; Test an all-zeros v2i32 that gets promoted to v4i32. -+define <2 x i32> @f6() { -+; CHECK-LABEL: f6: -+; CHECK: vgbm %v24, 0 -+; CHECK: br %r14 -+ ret <2 x i32> zeroinitializer -+} -+ -+; Test a mixed v2i32 that gets promoted to v4i32 (mask 0xae00). -+define <2 x i32> @f7() { -+; CHECK-LABEL: f7: -+; CHECK: vgbm %v24, 44544 -+; CHECK: br %r14 -+ ret <2 x i32> -+} -Index: llvm-36/test/CodeGen/SystemZ/vec-const-04.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-const-04.ll -@@ -0,0 +1,43 @@ -+; Test vector byte masks, v2i64 version. -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -+ -+; Test an all-zeros vector. -+define <2 x i64> @f1() { -+; CHECK-LABEL: f1: -+; CHECK: vgbm %v24, 0 -+; CHECK: br %r14 -+ ret <2 x i64> zeroinitializer -+} -+ -+; Test an all-ones vector. -+define <2 x i64> @f2() { -+; CHECK-LABEL: f2: -+; CHECK: vgbm %v24, 65535 -+; CHECK: br %r14 -+ ret <2 x i64> -+} -+ -+; Test a mixed vector (mask 0x8c76). -+define <2 x i64> @f3() { -+; CHECK-LABEL: f3: -+; CHECK: vgbm %v24, 35958 -+; CHECK: br %r14 -+ ret <2 x i64> -+} -+ -+; Test that undefs are treated as zero (mask 0x8c00). -+define <2 x i64> @f4() { -+; CHECK-LABEL: f4: -+; CHECK: vgbm %v24, 35840 -+; CHECK: br %r14 -+ ret <2 x i64> -+} -+ -+; Test that we don't use VGBM if one of the bytes is not 0 or 0xff. -+define <2 x i64> @f5() { -+; CHECK-LABEL: f5: -+; CHECK-NOT: vgbm -+; CHECK: br %r14 -+ ret <2 x i64> -+} -Index: llvm-36/test/CodeGen/SystemZ/vec-const-05.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-const-05.ll -@@ -0,0 +1,63 @@ -+; Test vector byte masks, v4f32 version. -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -+ -+; Test an all-zeros vector. -+define <4 x float> @f1() { -+; CHECK-LABEL: f1: -+; CHECK: vgbm %v24, 0 -+; CHECK: br %r14 -+ ret <4 x float> zeroinitializer -+} -+ -+; Test an all-ones vector. -+define <4 x float> @f2() { -+; CHECK-LABEL: f2: -+; CHECK: vgbm %v24, 65535 -+; CHECK: br %r14 -+ ret <4 x float> -+} -+ -+; Test a mixed vector (mask 0xc731). -+define <4 x float> @f3() { -+; CHECK-LABEL: f3: -+; CHECK: vgbm %v24, 50993 -+; CHECK: br %r14 -+ ret <4 x float> -+} -+ -+; Test that undefs are treated as zero (mask 0xc031). -+define <4 x float> @f4() { -+; CHECK-LABEL: f4: -+; CHECK: vgbm %v24, 49201 -+; CHECK: br %r14 -+ ret <4 x float> -+} -+ -+; Test that we don't use VGBM if one of the bytes is not 0 or 0xff. -+define <4 x float> @f5() { -+; CHECK-LABEL: f5: -+; CHECK-NOT: vgbm -+; CHECK: br %r14 -+ ret <4 x float> -+} -+ -+; Test an all-zeros v2f32 that gets promoted to v4f32. -+define <2 x float> @f6() { -+; CHECK-LABEL: f6: -+; CHECK: vgbm %v24, 0 -+; CHECK: br %r14 -+ ret <2 x float> zeroinitializer -+} -+ -+; Test a mixed v2f32 that gets promoted to v4f32 (mask 0xc700). -+define <2 x float> @f7() { -+; CHECK-LABEL: f7: -+; CHECK: vgbm %v24, 50944 -+; CHECK: br %r14 -+ ret <2 x float> -+} -Index: llvm-36/test/CodeGen/SystemZ/vec-const-06.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-const-06.ll -@@ -0,0 +1,43 @@ -+; Test vector byte masks, v2f64 version. -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -+ -+; Test an all-zeros vector. -+define <2 x double> @f1() { -+; CHECK-LABEL: f1: -+; CHECK: vgbm %v24, 0 -+; CHECK: br %r14 -+ ret <2 x double> zeroinitializer -+} -+ -+; Test an all-ones vector. -+define <2 x double> @f2() { -+; CHECK-LABEL: f2: -+; CHECK: vgbm %v24, 65535 -+; CHECK: br %r14 -+ ret <2 x double> -+} -+ -+; Test a mixed vector (mask 0x8c76). -+define <2 x double> @f3() { -+; CHECK-LABEL: f3: -+; CHECK: vgbm %v24, 35958 -+; CHECK: br %r14 -+ ret <2 x double> -+} -+ -+; Test that undefs are treated as zero (mask 0x8c00). -+define <2 x double> @f4() { -+; CHECK-LABEL: f4: -+; CHECK: vgbm %v24, 35840 -+; CHECK: br %r14 -+ ret <2 x double> -+} -+ -+; Test that we don't use VGBM if one of the bytes is not 0 or 0xff. -+define <2 x double> @f5() { -+; CHECK-LABEL: f5: -+; CHECK-NOT: vgbm -+; CHECK: br %r14 -+ ret <2 x double> -+} -Index: llvm-36/test/CodeGen/SystemZ/vec-const-07.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-const-07.ll -@@ -0,0 +1,229 @@ -+; Test vector replicates, v16i8 version. -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -+ -+; Test a byte-granularity replicate with the lowest useful value. -+define <16 x i8> @f1() { -+; CHECK-LABEL: f1: -+; CHECK: vrepib %v24, 1 -+; CHECK: br %r14 -+ ret <16 x i8> -+} -+ -+; Test a byte-granularity replicate with an arbitrary value. -+define <16 x i8> @f2() { -+; CHECK-LABEL: f2: -+; CHECK: vrepib %v24, -55 -+; CHECK: br %r14 -+ ret <16 x i8> -+} -+ -+; Test a byte-granularity replicate with the highest useful value. -+define <16 x i8> @f3() { -+; CHECK-LABEL: f3: -+; CHECK: vrepib %v24, -2 -+; CHECK: br %r14 -+ ret <16 x i8> -+} -+ -+; Test a halfword-granularity replicate with the lowest useful value. -+define <16 x i8> @f4() { -+; CHECK-LABEL: f4: -+; CHECK: vrepih %v24, 1 -+; CHECK: br %r14 -+ ret <16 x i8> -+} -+ -+; Test a halfword-granularity replicate with an arbitrary value. -+define <16 x i8> @f5() { -+; CHECK-LABEL: f5: -+; CHECK: vrepih %v24, 25650 -+; CHECK: br %r14 -+ ret <16 x i8> -+} -+ -+; Test a halfword-granularity replicate with the highest useful value. -+define <16 x i8> @f6() { -+; CHECK-LABEL: f6: -+; CHECK: vrepih %v24, -2 -+; CHECK: br %r14 -+ ret <16 x i8> -+} -+ -+; Test a word-granularity replicate with the lowest useful positive value. -+define <16 x i8> @f7() { -+; CHECK-LABEL: f7: -+; CHECK: vrepif %v24, 1 -+; CHECK: br %r14 -+ ret <16 x i8> -+} -+ -+; Test a word-granularity replicate with the highest in-range value. -+define <16 x i8> @f8() { -+; CHECK-LABEL: f8: -+; CHECK: vrepif %v24, 32767 -+; CHECK: br %r14 -+ ret <16 x i8> -+} -+ -+; Test a word-granularity replicate with the next highest value. -+; This cannot use VREPIF. -+define <16 x i8> @f9() { -+; CHECK-LABEL: f9: -+; CHECK-NOT: vrepif -+; CHECK: br %r14 -+ ret <16 x i8> -+} -+ -+; Test a word-granularity replicate with the lowest in-range value. -+define <16 x i8> @f10() { -+; CHECK-LABEL: f10: -+; CHECK: vrepif %v24, -32768 -+; CHECK: br %r14 -+ ret <16 x i8> -+} -+ -+; Test a word-granularity replicate with the next lowest value. -+; This cannot use VREPIF. -+define <16 x i8> @f11() { -+; CHECK-LABEL: f11: -+; CHECK-NOT: vrepif -+; CHECK: br %r14 -+ ret <16 x i8> -+} -+ -+; Test a word-granularity replicate with the highest useful negative value. -+define <16 x i8> @f12() { -+; CHECK-LABEL: f12: -+; CHECK: vrepif %v24, -2 -+; CHECK: br %r14 -+ ret <16 x i8> -+} -+ -+; Test a doubleword-granularity replicate with the lowest useful positive -+; value. -+define <16 x i8> @f13() { -+; CHECK-LABEL: f13: -+; CHECK: vrepig %v24, 1 -+; CHECK: br %r14 -+ ret <16 x i8> -+} -+ -+; Test a doubleword-granularity replicate with the highest in-range value. -+define <16 x i8> @f14() { -+; CHECK-LABEL: f14: -+; CHECK: vrepig %v24, 32767 -+; CHECK: br %r14 -+ ret <16 x i8> -+} -+ -+; Test a doubleword-granularity replicate with the next highest value. -+; This cannot use VREPIG. -+define <16 x i8> @f15() { -+; CHECK-LABEL: f15: -+; CHECK-NOT: vrepig -+; CHECK: br %r14 -+ ret <16 x i8> -+} -+ -+; Test a doubleword-granularity replicate with the lowest in-range value. -+define <16 x i8> @f16() { -+; CHECK-LABEL: f16: -+; CHECK: vrepig %v24, -32768 -+; CHECK: br %r14 -+ ret <16 x i8> -+} -+ -+; Test a doubleword-granularity replicate with the next lowest value. -+; This cannot use VREPIG. -+define <16 x i8> @f17() { -+; CHECK-LABEL: f17: -+; CHECK-NOT: vrepig -+; CHECK: br %r14 -+ ret <16 x i8> -+} -+ -+; Test a doubleword-granularity replicate with the highest useful negative -+; value. -+define <16 x i8> @f18() { -+; CHECK-LABEL: f18: -+; CHECK: vrepig %v24, -2 -+; CHECK: br %r14 -+ ret <16 x i8> -+} -+ -+; Repeat f14 with undefs optimistically treated as 0. -+define <16 x i8> @f19() { -+; CHECK-LABEL: f19: -+; CHECK: vrepig %v24, 32767 -+; CHECK: br %r14 -+ ret <16 x i8> -+} -+ -+; Repeat f18 with undefs optimistically treated as -1. -+define <16 x i8> @f20() { -+; CHECK-LABEL: f20: -+; CHECK: vrepig %v24, -2 -+; CHECK: br %r14 -+ ret <16 x i8> -+} -Index: llvm-36/test/CodeGen/SystemZ/vec-const-08.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-const-08.ll -@@ -0,0 +1,189 @@ -+; Test vector replicates, v8i16 version. -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -+ -+; Test a byte-granularity replicate with the lowest useful value. -+define <8 x i16> @f1() { -+; CHECK-LABEL: f1: -+; CHECK: vrepib %v24, 1 -+; CHECK: br %r14 -+ ret <8 x i16> -+} -+ -+; Test a byte-granularity replicate with an arbitrary value. -+define <8 x i16> @f2() { -+; CHECK-LABEL: f2: -+; CHECK: vrepib %v24, -55 -+; CHECK: br %r14 -+ ret <8 x i16> -+} -+ -+; Test a byte-granularity replicate with the highest useful value. -+define <8 x i16> @f3() { -+; CHECK-LABEL: f3: -+; CHECK: vrepib %v24, -2 -+; CHECK: br %r14 -+ ret <8 x i16> -+} -+ -+; Test a halfword-granularity replicate with the lowest useful value. -+define <8 x i16> @f4() { -+; CHECK-LABEL: f4: -+; CHECK: vrepih %v24, 1 -+; CHECK: br %r14 -+ ret <8 x i16> -+} -+ -+; Test a halfword-granularity replicate with an arbitrary value. -+define <8 x i16> @f5() { -+; CHECK-LABEL: f5: -+; CHECK: vrepih %v24, 25650 -+; CHECK: br %r14 -+ ret <8 x i16> -+} -+ -+; Test a halfword-granularity replicate with the highest useful value. -+define <8 x i16> @f6() { -+; CHECK-LABEL: f6: -+; CHECK: vrepih %v24, -2 -+; CHECK: br %r14 -+ ret <8 x i16> -+} -+ -+; Test a word-granularity replicate with the lowest useful positive value. -+define <8 x i16> @f7() { -+; CHECK-LABEL: f7: -+; CHECK: vrepif %v24, 1 -+; CHECK: br %r14 -+ ret <8 x i16> -+} -+ -+; Test a word-granularity replicate with the highest in-range value. -+define <8 x i16> @f8() { -+; CHECK-LABEL: f8: -+; CHECK: vrepif %v24, 32767 -+; CHECK: br %r14 -+ ret <8 x i16> -+} -+ -+; Test a word-granularity replicate with the next highest value. -+; This cannot use VREPIF. -+define <8 x i16> @f9() { -+; CHECK-LABEL: f9: -+; CHECK-NOT: vrepif -+; CHECK: br %r14 -+ ret <8 x i16> -+} -+ -+; Test a word-granularity replicate with the lowest in-range value. -+define <8 x i16> @f10() { -+; CHECK-LABEL: f10: -+; CHECK: vrepif %v24, -32768 -+; CHECK: br %r14 -+ ret <8 x i16> -+} -+ -+; Test a word-granularity replicate with the next lowest value. -+; This cannot use VREPIF. -+define <8 x i16> @f11() { -+; CHECK-LABEL: f11: -+; CHECK-NOT: vrepif -+; CHECK: br %r14 -+ ret <8 x i16> -+} -+ -+; Test a word-granularity replicate with the highest useful negative value. -+define <8 x i16> @f12() { -+; CHECK-LABEL: f12: -+; CHECK: vrepif %v24, -2 -+; CHECK: br %r14 -+ ret <8 x i16> -+} -+ -+; Test a doubleword-granularity replicate with the lowest useful positive -+; value. -+define <8 x i16> @f13() { -+; CHECK-LABEL: f13: -+; CHECK: vrepig %v24, 1 -+; CHECK: br %r14 -+ ret <8 x i16> -+} -+ -+; Test a doubleword-granularity replicate with the highest in-range value. -+define <8 x i16> @f14() { -+; CHECK-LABEL: f14: -+; CHECK: vrepig %v24, 32767 -+; CHECK: br %r14 -+ ret <8 x i16> -+} -+ -+; Test a doubleword-granularity replicate with the next highest value. -+; This cannot use VREPIG. -+define <8 x i16> @f15() { -+; CHECK-LABEL: f15: -+; CHECK-NOT: vrepig -+; CHECK: br %r14 -+ ret <8 x i16> -+} -+ -+; Test a doubleword-granularity replicate with the lowest in-range value. -+define <8 x i16> @f16() { -+; CHECK-LABEL: f16: -+; CHECK: vrepig %v24, -32768 -+; CHECK: br %r14 -+ ret <8 x i16> -+} -+ -+; Test a doubleword-granularity replicate with the next lowest value. -+; This cannot use VREPIG. -+define <8 x i16> @f17() { -+; CHECK-LABEL: f17: -+; CHECK-NOT: vrepig -+; CHECK: br %r14 -+ ret <8 x i16> -+} -+ -+; Test a doubleword-granularity replicate with the highest useful negative -+; value. -+define <8 x i16> @f18() { -+; CHECK-LABEL: f18: -+; CHECK: vrepig %v24, -2 -+; CHECK: br %r14 -+ ret <8 x i16> -+} -+ -+; Repeat f14 with undefs optimistically treated as 0. -+define <8 x i16> @f19() { -+; CHECK-LABEL: f19: -+; CHECK: vrepig %v24, 32767 -+; CHECK: br %r14 -+ ret <8 x i16> -+} -+ -+; Repeat f18 with undefs optimistically treated as -1. -+define <8 x i16> @f20() { -+; CHECK-LABEL: f20: -+; CHECK: vrepig %v24, -2 -+; CHECK: br %r14 -+ ret <8 x i16> -+} -Index: llvm-36/test/CodeGen/SystemZ/vec-const-09.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-const-09.ll -@@ -0,0 +1,169 @@ -+; Test vector replicates, v4i32 version. -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -+ -+; Test a byte-granularity replicate with the lowest useful value. -+define <4 x i32> @f1() { -+; CHECK-LABEL: f1: -+; CHECK: vrepib %v24, 1 -+; CHECK: br %r14 -+ ret <4 x i32> -+} -+ -+; Test a byte-granularity replicate with an arbitrary value. -+define <4 x i32> @f2() { -+; CHECK-LABEL: f2: -+; CHECK: vrepib %v24, -55 -+; CHECK: br %r14 -+ ret <4 x i32> -+} -+ -+; Test a byte-granularity replicate with the highest useful value. -+define <4 x i32> @f3() { -+; CHECK-LABEL: f3: -+; CHECK: vrepib %v24, -2 -+; CHECK: br %r14 -+ ret <4 x i32> -+} -+ -+; Test a halfword-granularity replicate with the lowest useful value. -+define <4 x i32> @f4() { -+; CHECK-LABEL: f4: -+; CHECK: vrepih %v24, 1 -+; CHECK: br %r14 -+ ret <4 x i32> -+} -+ -+; Test a halfword-granularity replicate with an arbitrary value. -+define <4 x i32> @f5() { -+; CHECK-LABEL: f5: -+; CHECK: vrepih %v24, 25650 -+; CHECK: br %r14 -+ ret <4 x i32> -+} -+ -+; Test a halfword-granularity replicate with the highest useful value. -+define <4 x i32> @f6() { -+; CHECK-LABEL: f6: -+; CHECK: vrepih %v24, -2 -+; CHECK: br %r14 -+ ret <4 x i32> -+} -+ -+; Test a word-granularity replicate with the lowest useful positive value. -+define <4 x i32> @f7() { -+; CHECK-LABEL: f7: -+; CHECK: vrepif %v24, 1 -+; CHECK: br %r14 -+ ret <4 x i32> -+} -+ -+; Test a word-granularity replicate with the highest in-range value. -+define <4 x i32> @f8() { -+; CHECK-LABEL: f8: -+; CHECK: vrepif %v24, 32767 -+; CHECK: br %r14 -+ ret <4 x i32> -+} -+ -+; Test a word-granularity replicate with the next highest value. -+; This cannot use VREPIF. -+define <4 x i32> @f9() { -+; CHECK-LABEL: f9: -+; CHECK-NOT: vrepif -+; CHECK: br %r14 -+ ret <4 x i32> -+} -+ -+; Test a word-granularity replicate with the lowest in-range value. -+define <4 x i32> @f10() { -+; CHECK-LABEL: f10: -+; CHECK: vrepif %v24, -32768 -+; CHECK: br %r14 -+ ret <4 x i32> -+} -+ -+; Test a word-granularity replicate with the next lowest value. -+; This cannot use VREPIF. -+define <4 x i32> @f11() { -+; CHECK-LABEL: f11: -+; CHECK-NOT: vrepif -+; CHECK: br %r14 -+ ret <4 x i32> -+} -+ -+; Test a word-granularity replicate with the highest useful negative value. -+define <4 x i32> @f12() { -+; CHECK-LABEL: f12: -+; CHECK: vrepif %v24, -2 -+; CHECK: br %r14 -+ ret <4 x i32> -+} -+ -+; Test a doubleword-granularity replicate with the lowest useful positive -+; value. -+define <4 x i32> @f13() { -+; CHECK-LABEL: f13: -+; CHECK: vrepig %v24, 1 -+; CHECK: br %r14 -+ ret <4 x i32> -+} -+ -+; Test a doubleword-granularity replicate with the highest in-range value. -+define <4 x i32> @f14() { -+; CHECK-LABEL: f14: -+; CHECK: vrepig %v24, 32767 -+; CHECK: br %r14 -+ ret <4 x i32> -+} -+ -+; Test a doubleword-granularity replicate with the next highest value. -+; This cannot use VREPIG. -+define <4 x i32> @f15() { -+; CHECK-LABEL: f15: -+; CHECK-NOT: vrepig -+; CHECK: br %r14 -+ ret <4 x i32> -+} -+ -+; Test a doubleword-granularity replicate with the lowest in-range value. -+define <4 x i32> @f16() { -+; CHECK-LABEL: f16: -+; CHECK: vrepig %v24, -32768 -+; CHECK: br %r14 -+ ret <4 x i32> -+} -+ -+; Test a doubleword-granularity replicate with the next lowest value. -+; This cannot use VREPIG. -+define <4 x i32> @f17() { -+; CHECK-LABEL: f17: -+; CHECK-NOT: vrepig -+; CHECK: br %r14 -+ ret <4 x i32> -+} -+ -+; Test a doubleword-granularity replicate with the highest useful negative -+; value. -+define <4 x i32> @f18() { -+; CHECK-LABEL: f18: -+; CHECK: vrepig %v24, -2 -+; CHECK: br %r14 -+ ret <4 x i32> -+} -+ -+; Repeat f14 with undefs optimistically treated as 0, 32767. -+define <4 x i32> @f19() { -+; CHECK-LABEL: f19: -+; CHECK: vrepig %v24, 32767 -+; CHECK: br %r14 -+ ret <4 x i32> -+} -+ -+; Repeat f18 with undefs optimistically treated as -2, -1. -+define <4 x i32> @f20() { -+; CHECK-LABEL: f20: -+; CHECK: vrepig %v24, -2 -+; CHECK: br %r14 -+ ret <4 x i32> -+} -Index: llvm-36/test/CodeGen/SystemZ/vec-const-10.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-const-10.ll -@@ -0,0 +1,169 @@ -+; Test vector replicates, v2i64 version. -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -+ -+; Test a byte-granularity replicate with the lowest useful value. -+define <2 x i64> @f1() { -+; CHECK-LABEL: f1: -+; CHECK: vrepib %v24, 1 -+; CHECK: br %r14 -+ ret <2 x i64> -+} -+ -+; Test a byte-granularity replicate with an arbitrary value. -+define <2 x i64> @f2() { -+; CHECK-LABEL: f2: -+; CHECK: vrepib %v24, -55 -+; CHECK: br %r14 -+ ret <2 x i64> -+} -+ -+; Test a byte-granularity replicate with the highest useful value. -+define <2 x i64> @f3() { -+; CHECK-LABEL: f3: -+; CHECK: vrepib %v24, -2 -+; CHECK: br %r14 -+ ret <2 x i64> -+} -+ -+; Test a halfword-granularity replicate with the lowest useful value. -+define <2 x i64> @f4() { -+; CHECK-LABEL: f4: -+; CHECK: vrepih %v24, 1 -+; CHECK: br %r14 -+ ret <2 x i64> -+} -+ -+; Test a halfword-granularity replicate with an arbitrary value. -+define <2 x i64> @f5() { -+; CHECK-LABEL: f5: -+; CHECK: vrepih %v24, 25650 -+; CHECK: br %r14 -+ ret <2 x i64> -+} -+ -+; Test a halfword-granularity replicate with the highest useful value. -+define <2 x i64> @f6() { -+; CHECK-LABEL: f6: -+; CHECK: vrepih %v24, -2 -+; CHECK: br %r14 -+ ret <2 x i64> -+} -+ -+; Test a word-granularity replicate with the lowest useful positive value. -+define <2 x i64> @f7() { -+; CHECK-LABEL: f7: -+; CHECK: vrepif %v24, 1 -+; CHECK: br %r14 -+ ret <2 x i64> -+} -+ -+; Test a word-granularity replicate with the highest in-range value. -+define <2 x i64> @f8() { -+; CHECK-LABEL: f8: -+; CHECK: vrepif %v24, 32767 -+; CHECK: br %r14 -+ ret <2 x i64> -+} -+ -+; Test a word-granularity replicate with the next highest value. -+; This cannot use VREPIF. -+define <2 x i64> @f9() { -+; CHECK-LABEL: f9: -+; CHECK-NOT: vrepif -+; CHECK: br %r14 -+ ret <2 x i64> -+} -+ -+; Test a word-granularity replicate with the lowest in-range value. -+define <2 x i64> @f10() { -+; CHECK-LABEL: f10: -+; CHECK: vrepif %v24, -32768 -+; CHECK: br %r14 -+ ret <2 x i64> -+} -+ -+; Test a word-granularity replicate with the next lowest value. -+; This cannot use VREPIF. -+define <2 x i64> @f11() { -+; CHECK-LABEL: f11: -+; CHECK-NOT: vrepif -+; CHECK: br %r14 -+ ret <2 x i64> -+} -+ -+; Test a word-granularity replicate with the highest useful negative value. -+define <2 x i64> @f12() { -+; CHECK-LABEL: f12: -+; CHECK: vrepif %v24, -2 -+; CHECK: br %r14 -+ ret <2 x i64> -+} -+ -+; Test a doubleword-granularity replicate with the lowest useful positive -+; value. -+define <2 x i64> @f13() { -+; CHECK-LABEL: f13: -+; CHECK: vrepig %v24, 1 -+; CHECK: br %r14 -+ ret <2 x i64> -+} -+ -+; Test a doubleword-granularity replicate with the highest in-range value. -+define <2 x i64> @f14() { -+; CHECK-LABEL: f14: -+; CHECK: vrepig %v24, 32767 -+; CHECK: br %r14 -+ ret <2 x i64> -+} -+ -+; Test a doubleword-granularity replicate with the next highest value. -+; This cannot use VREPIG. -+define <2 x i64> @f15() { -+; CHECK-LABEL: f15: -+; CHECK-NOT: vrepig -+; CHECK: br %r14 -+ ret <2 x i64> -+} -+ -+; Test a doubleword-granularity replicate with the lowest in-range value. -+define <2 x i64> @f16() { -+; CHECK-LABEL: f16: -+; CHECK: vrepig %v24, -32768 -+; CHECK: br %r14 -+ ret <2 x i64> -+} -+ -+; Test a doubleword-granularity replicate with the next lowest value. -+; This cannot use VREPIG. -+define <2 x i64> @f17() { -+; CHECK-LABEL: f17: -+; CHECK-NOT: vrepig -+; CHECK: br %r14 -+ ret <2 x i64> -+} -+ -+; Test a doubleword-granularity replicate with the highest useful negative -+; value. -+define <2 x i64> @f18() { -+; CHECK-LABEL: f18: -+; CHECK: vrepig %v24, -2 -+; CHECK: br %r14 -+ ret <2 x i64> -+} -+ -+; Repeat f14 with undefs optimistically treated as 32767. -+define <2 x i64> @f19() { -+; CHECK-LABEL: f19: -+; CHECK: vrepig %v24, 32767 -+; CHECK: br %r14 -+ ret <2 x i64> -+} -+ -+; Repeat f18 with undefs optimistically treated as -2. -+define <2 x i64> @f20() { -+; CHECK-LABEL: f20: -+; CHECK: vrepig %v24, -2 -+; CHECK: br %r14 -+ ret <2 x i64> -+} -Index: llvm-36/test/CodeGen/SystemZ/vec-const-11.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-const-11.ll -@@ -0,0 +1,189 @@ -+; Test vector replicates, v4f32 version. -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -+ -+; Test a byte-granularity replicate with the lowest useful value. -+define <4 x float> @f1() { -+; CHECK-LABEL: f1: -+; CHECK: vrepib %v24, 1 -+; CHECK: br %r14 -+ ret <4 x float> -+} -+ -+; Test a byte-granularity replicate with an arbitrary value. -+define <4 x float> @f2() { -+; CHECK-LABEL: f2: -+; CHECK: vrepib %v24, -55 -+; CHECK: br %r14 -+ ret <4 x float> -+} -+ -+; Test a byte-granularity replicate with the highest useful value. -+define <4 x float> @f3() { -+; CHECK-LABEL: f3: -+; CHECK: vrepib %v24, -2 -+; CHECK: br %r14 -+ ret <4 x float> -+} -+ -+; Test a halfword-granularity replicate with the lowest useful value. -+define <4 x float> @f4() { -+; CHECK-LABEL: f4: -+; CHECK: vrepih %v24, 1 -+; CHECK: br %r14 -+ ret <4 x float> -+} -+ -+; Test a halfword-granularity replicate with an arbitrary value. -+define <4 x float> @f5() { -+; CHECK-LABEL: f5: -+; CHECK: vrepih %v24, 25650 -+; CHECK: br %r14 -+ ret <4 x float> -+} -+ -+; Test a halfword-granularity replicate with the highest useful value. -+define <4 x float> @f6() { -+; CHECK-LABEL: f6: -+; CHECK: vrepih %v24, -2 -+; CHECK: br %r14 -+ ret <4 x float> -+} -+ -+; Test a word-granularity replicate with the lowest useful positive value. -+define <4 x float> @f7() { -+; CHECK-LABEL: f7: -+; CHECK: vrepif %v24, 1 -+; CHECK: br %r14 -+ ret <4 x float> -+} -+ -+; Test a word-granularity replicate with the highest in-range value. -+define <4 x float> @f8() { -+; CHECK-LABEL: f8: -+; CHECK: vrepif %v24, 32767 -+; CHECK: br %r14 -+ ret <4 x float> -+} -+ -+; Test a word-granularity replicate with the next highest value. -+; This cannot use VREPIF. -+define <4 x float> @f9() { -+; CHECK-LABEL: f9: -+; CHECK-NOT: vrepif -+; CHECK: br %r14 -+ ret <4 x float> -+} -+ -+; Test a word-granularity replicate with the lowest in-range value. -+define <4 x float> @f10() { -+; CHECK-LABEL: f10: -+; CHECK: vrepif %v24, -32768 -+; CHECK: br %r14 -+ ret <4 x float> -+} -+ -+; Test a word-granularity replicate with the next lowest value. -+; This cannot use VREPIF. -+define <4 x float> @f11() { -+; CHECK-LABEL: f11: -+; CHECK-NOT: vrepif -+; CHECK: br %r14 -+ ret <4 x float> -+} -+ -+; Test a word-granularity replicate with the highest useful negative value. -+define <4 x float> @f12() { -+; CHECK-LABEL: f12: -+; CHECK: vrepif %v24, -2 -+; CHECK: br %r14 -+ ret <4 x float> -+} -+ -+; Test a doubleword-granularity replicate with the lowest useful positive -+; value. -+define <4 x float> @f13() { -+; CHECK-LABEL: f13: -+; CHECK: vrepig %v24, 1 -+; CHECK: br %r14 -+ ret <4 x float> -+} -+ -+; Test a doubleword-granularity replicate with the highest in-range value. -+define <4 x float> @f14() { -+; CHECK-LABEL: f14: -+; CHECK: vrepig %v24, 32767 -+; CHECK: br %r14 -+ ret <4 x float> -+} -+ -+; Test a doubleword-granularity replicate with the next highest value. -+; This cannot use VREPIG. -+define <4 x float> @f15() { -+; CHECK-LABEL: f15: -+; CHECK-NOT: vrepig -+; CHECK: br %r14 -+ ret <4 x float> -+} -+ -+; Test a doubleword-granularity replicate with the lowest in-range value. -+define <4 x float> @f16() { -+; CHECK-LABEL: f16: -+; CHECK: vrepig %v24, -32768 -+; CHECK: br %r14 -+ ret <4 x float> -+} -+ -+; Test a doubleword-granularity replicate with the next lowest value. -+; This cannot use VREPIG. -+define <4 x float> @f17() { -+; CHECK-LABEL: f17: -+; CHECK-NOT: vrepig -+; CHECK: br %r14 -+ ret <4 x float> -+} -+ -+; Test a doubleword-granularity replicate with the highest useful negative -+; value. -+define <4 x float> @f18() { -+; CHECK-LABEL: f18: -+; CHECK: vrepig %v24, -2 -+; CHECK: br %r14 -+ ret <4 x float> -+} -+ -+; Repeat f14 with undefs optimistically treated as 0, 32767. -+define <4 x float> @f19() { -+; CHECK-LABEL: f19: -+; CHECK: vrepig %v24, 32767 -+; CHECK: br %r14 -+ ret <4 x float> -+} -+ -+; Repeat f18 with undefs optimistically treated as -2, -1. -+define <4 x float> @f20() { -+; CHECK-LABEL: f20: -+; CHECK: vrepig %v24, -2 -+; CHECK: br %r14 -+ ret <4 x float> -+} -Index: llvm-36/test/CodeGen/SystemZ/vec-const-12.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-const-12.ll -@@ -0,0 +1,169 @@ -+; Test vector replicates, v2f64 version. -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -+ -+; Test a byte-granularity replicate with the lowest useful value. -+define <2 x double> @f1() { -+; CHECK-LABEL: f1: -+; CHECK: vrepib %v24, 1 -+; CHECK: br %r14 -+ ret <2 x double> -+} -+ -+; Test a byte-granularity replicate with an arbitrary value. -+define <2 x double> @f2() { -+; CHECK-LABEL: f2: -+; CHECK: vrepib %v24, -55 -+; CHECK: br %r14 -+ ret <2 x double> -+} -+ -+; Test a byte-granularity replicate with the highest useful value. -+define <2 x double> @f3() { -+; CHECK-LABEL: f3: -+; CHECK: vrepib %v24, -2 -+; CHECK: br %r14 -+ ret <2 x double> -+} -+ -+; Test a halfword-granularity replicate with the lowest useful value. -+define <2 x double> @f4() { -+; CHECK-LABEL: f4: -+; CHECK: vrepih %v24, 1 -+; CHECK: br %r14 -+ ret <2 x double> -+} -+ -+; Test a halfword-granularity replicate with an arbitrary value. -+define <2 x double> @f5() { -+; CHECK-LABEL: f5: -+; CHECK: vrepih %v24, 25650 -+; CHECK: br %r14 -+ ret <2 x double> -+} -+ -+; Test a halfword-granularity replicate with the highest useful value. -+define <2 x double> @f6() { -+; CHECK-LABEL: f6: -+; CHECK: vrepih %v24, -2 -+; CHECK: br %r14 -+ ret <2 x double> -+} -+ -+; Test a word-granularity replicate with the lowest useful positive value. -+define <2 x double> @f7() { -+; CHECK-LABEL: f7: -+; CHECK: vrepif %v24, 1 -+; CHECK: br %r14 -+ ret <2 x double> -+} -+ -+; Test a word-granularity replicate with the highest in-range value. -+define <2 x double> @f8() { -+; CHECK-LABEL: f8: -+; CHECK: vrepif %v24, 32767 -+; CHECK: br %r14 -+ ret <2 x double> -+} -+ -+; Test a word-granularity replicate with the next highest value. -+; This cannot use VREPIF. -+define <2 x double> @f9() { -+; CHECK-LABEL: f9: -+; CHECK-NOT: vrepif -+; CHECK: br %r14 -+ ret <2 x double> -+} -+ -+; Test a word-granularity replicate with the lowest in-range value. -+define <2 x double> @f10() { -+; CHECK-LABEL: f10: -+; CHECK: vrepif %v24, -32768 -+; CHECK: br %r14 -+ ret <2 x double> -+} -+ -+; Test a word-granularity replicate with the next lowest value. -+; This cannot use VREPIF. -+define <2 x double> @f11() { -+; CHECK-LABEL: f11: -+; CHECK-NOT: vrepif -+; CHECK: br %r14 -+ ret <2 x double> -+} -+ -+; Test a word-granularity replicate with the highest useful negative value. -+define <2 x double> @f12() { -+; CHECK-LABEL: f12: -+; CHECK: vrepif %v24, -2 -+; CHECK: br %r14 -+ ret <2 x double> -+} -+ -+; Test a doubleword-granularity replicate with the lowest useful positive -+; value. -+define <2 x double> @f13() { -+; CHECK-LABEL: f13: -+; CHECK: vrepig %v24, 1 -+; CHECK: br %r14 -+ ret <2 x double> -+} -+ -+; Test a doubleword-granularity replicate with the highest in-range value. -+define <2 x double> @f14() { -+; CHECK-LABEL: f14: -+; CHECK: vrepig %v24, 32767 -+; CHECK: br %r14 -+ ret <2 x double> -+} -+ -+; Test a doubleword-granularity replicate with the next highest value. -+; This cannot use VREPIG. -+define <2 x double> @f15() { -+; CHECK-LABEL: f15: -+; CHECK-NOT: vrepig -+; CHECK: br %r14 -+ ret <2 x double> -+} -+ -+; Test a doubleword-granularity replicate with the lowest in-range value. -+define <2 x double> @f16() { -+; CHECK-LABEL: f16: -+; CHECK: vrepig %v24, -32768 -+; CHECK: br %r14 -+ ret <2 x double> -+} -+ -+; Test a doubleword-granularity replicate with the next lowest value. -+; This cannot use VREPIG. -+define <2 x double> @f17() { -+; CHECK-LABEL: f17: -+; CHECK-NOT: vrepig -+; CHECK: br %r14 -+ ret <2 x double> -+} -+ -+; Test a doubleword-granularity replicate with the highest useful negative -+; value. -+define <2 x double> @f18() { -+; CHECK-LABEL: f18: -+; CHECK: vrepig %v24, -2 -+; CHECK: br %r14 -+ ret <2 x double> -+} -+ -+; Repeat f14 with undefs optimistically treated as 32767. -+define <2 x double> @f19() { -+; CHECK-LABEL: f19: -+; CHECK: vrepig %v24, 32767 -+; CHECK: br %r14 -+ ret <2 x double> -+} -+ -+; Repeat f18 with undefs optimistically treated as -2. -+define <2 x double> @f20() { -+; CHECK-LABEL: f20: -+; CHECK: vrepig %v24, -2 -+; CHECK: br %r14 -+ ret <2 x double> -+} -Index: llvm-36/test/CodeGen/SystemZ/vec-const-13.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-const-13.ll -@@ -0,0 +1,193 @@ -+; Test vector replicates that use VECTOR GENERATE MASK, v16i8 version. -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -+ -+; Test a word-granularity replicate with the lowest value that cannot use -+; VREPIF. -+define <16 x i8> @f1() { -+; CHECK-LABEL: f1: -+; CHECK: vgmf %v24, 16, 16 -+; CHECK: br %r14 -+ ret <16 x i8> -+} -+ -+; Test a word-granularity replicate that has the lower 17 bits set. -+define <16 x i8> @f2() { -+; CHECK-LABEL: f2: -+; CHECK: vgmf %v24, 15, 31 -+; CHECK: br %r14 -+ ret <16 x i8> -+} -+ -+; Test a word-granularity replicate that has the upper 15 bits set. -+define <16 x i8> @f3() { -+; CHECK-LABEL: f3: -+; CHECK: vgmf %v24, 0, 14 -+; CHECK: br %r14 -+ ret <16 x i8> -+} -+ -+; Test a word-granularity replicate that has middle bits set. -+define <16 x i8> @f4() { -+; CHECK-LABEL: f4: -+; CHECK: vgmf %v24, 12, 17 -+; CHECK: br %r14 -+ ret <16 x i8> -+} -+ -+; Test a word-granularity replicate with a wrap-around mask. -+define <16 x i8> @f5() { -+; CHECK-LABEL: f5: -+; CHECK: vgmf %v24, 17, 15 -+; CHECK: br %r14 -+ ret <16 x i8> -+} -+ -+; Test a doubleword-granularity replicate with the lowest value that cannot -+; use VREPIG. -+define <16 x i8> @f6() { -+; CHECK-LABEL: f6: -+; CHECK: vgmg %v24, 48, 48 -+; CHECK: br %r14 -+ ret <16 x i8> -+} -+ -+; Test a doubleword-granularity replicate that has the lower 22 bits set. -+define <16 x i8> @f7() { -+; CHECK-LABEL: f7: -+; CHECK: vgmg %v24, 42, 63 -+; CHECK: br %r14 -+ ret <16 x i8> -+} -+ -+; Test a doubleword-granularity replicate that has the upper 45 bits set. -+define <16 x i8> @f8() { -+; CHECK-LABEL: f8: -+; CHECK: vgmg %v24, 0, 44 -+; CHECK: br %r14 -+ ret <16 x i8> -+} -+ -+; Test a doubleword-granularity replicate that has middle bits set. -+define <16 x i8> @f9() { -+; CHECK-LABEL: f9: -+; CHECK: vgmg %v24, 31, 42 -+; CHECK: br %r14 -+ ret <16 x i8> -+} -+ -+; Test a doubleword-granularity replicate with a wrap-around mask. -+define <16 x i8> @f10() { -+; CHECK-LABEL: f10: -+; CHECK: vgmg %v24, 18, 0 -+; CHECK: br %r14 -+ ret <16 x i8> -+} -+ -+; Retest f1 with arbitrary undefs instead of 0s. -+define <16 x i8> @f11() { -+; CHECK-LABEL: f11: -+; CHECK: vgmf %v24, 16, 16 -+; CHECK: br %r14 -+ ret <16 x i8> -+} -+ -+; Try a case where we want consistent undefs to be treated as 0. -+define <16 x i8> @f12() { -+; CHECK-LABEL: f12: -+; CHECK: vgmf %v24, 15, 23 -+; CHECK: br %r14 -+ ret <16 x i8> -+} -+ -+; ...and again with the lower bits of the replicated constant. -+define <16 x i8> @f13() { -+; CHECK-LABEL: f13: -+; CHECK: vgmf %v24, 15, 22 -+; CHECK: br %r14 -+ ret <16 x i8> -+} -+ -+; Try a case where we want consistent undefs to be treated as -1. -+define <16 x i8> @f14() { -+; CHECK-LABEL: f14: -+; CHECK: vgmf %v24, 28, 8 -+; CHECK: br %r14 -+ ret <16 x i8> -+} -+ -+; ...and again with the lower bits of the replicated constant. -+define <16 x i8> @f15() { -+; CHECK-LABEL: f15: -+; CHECK: vgmf %v24, 18, 3 -+; CHECK: br %r14 -+ ret <16 x i8> -+} -+ -+; Repeat f9 with arbitrary undefs. -+define <16 x i8> @f16() { -+; CHECK-LABEL: f16: -+; CHECK: vgmg %v24, 31, 42 -+; CHECK: br %r14 -+ ret <16 x i8> -+} -+ -+; Try a case where we want some consistent undefs to be treated as 0 -+; and some to be treated as 255. -+define <16 x i8> @f17() { -+; CHECK-LABEL: f17: -+; CHECK: vgmg %v24, 23, 35 -+; CHECK: br %r14 -+ ret <16 x i8> -+} -Index: llvm-36/test/CodeGen/SystemZ/vec-const-14.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-const-14.ll -@@ -0,0 +1,113 @@ -+; Test vector replicates that use VECTOR GENERATE MASK, v8i16 version. -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -+ -+; Test a word-granularity replicate with the lowest value that cannot use -+; VREPIF. -+define <8 x i16> @f1() { -+; CHECK-LABEL: f1: -+; CHECK: vgmf %v24, 16, 16 -+; CHECK: br %r14 -+ ret <8 x i16> -+} -+ -+; Test a word-granularity replicate that has the lower 17 bits set. -+define <8 x i16> @f2() { -+; CHECK-LABEL: f2: -+; CHECK: vgmf %v24, 15, 31 -+; CHECK: br %r14 -+ ret <8 x i16> -+} -+ -+; Test a word-granularity replicate that has the upper 15 bits set. -+define <8 x i16> @f3() { -+; CHECK-LABEL: f3: -+; CHECK: vgmf %v24, 0, 14 -+; CHECK: br %r14 -+ ret <8 x i16> -+} -+ -+; Test a word-granularity replicate that has middle bits set. -+define <8 x i16> @f4() { -+; CHECK-LABEL: f4: -+; CHECK: vgmf %v24, 12, 17 -+; CHECK: br %r14 -+ ret <8 x i16> -+} -+ -+; Test a word-granularity replicate with a wrap-around mask. -+define <8 x i16> @f5() { -+; CHECK-LABEL: f5: -+; CHECK: vgmf %v24, 17, 15 -+; CHECK: br %r14 -+ ret <8 x i16> -+} -+ -+; Test a doubleword-granularity replicate with the lowest value that cannot -+; use VREPIG. -+define <8 x i16> @f6() { -+; CHECK-LABEL: f6: -+; CHECK: vgmg %v24, 48, 48 -+; CHECK: br %r14 -+ ret <8 x i16> -+} -+ -+; Test a doubleword-granularity replicate that has the lower 22 bits set. -+define <8 x i16> @f7() { -+; CHECK-LABEL: f7: -+; CHECK: vgmg %v24, 42, 63 -+; CHECK: br %r14 -+ ret <8 x i16> -+} -+ -+; Test a doubleword-granularity replicate that has the upper 45 bits set. -+define <8 x i16> @f8() { -+; CHECK-LABEL: f8: -+; CHECK: vgmg %v24, 0, 44 -+; CHECK: br %r14 -+ ret <8 x i16> -+} -+ -+; Test a doubleword-granularity replicate that has middle bits set. -+define <8 x i16> @f9() { -+; CHECK-LABEL: f9: -+; CHECK: vgmg %v24, 31, 42 -+; CHECK: br %r14 -+ ret <8 x i16> -+} -+ -+; Test a doubleword-granularity replicate with a wrap-around mask. -+define <8 x i16> @f10() { -+; CHECK-LABEL: f10: -+; CHECK: vgmg %v24, 18, 0 -+; CHECK: br %r14 -+ ret <8 x i16> -+} -+ -+; Retest f1 with arbitrary undefs instead of 0s. -+define <8 x i16> @f11() { -+; CHECK-LABEL: f11: -+; CHECK: vgmf %v24, 16, 16 -+; CHECK: br %r14 -+ ret <8 x i16> -+} -+ -+; ...likewise f9. -+define <8 x i16> @f12() { -+; CHECK-LABEL: f12: -+; CHECK: vgmg %v24, 31, 42 -+; CHECK: br %r14 -+ ret <8 x i16> -+} -Index: llvm-36/test/CodeGen/SystemZ/vec-const-15.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-const-15.ll -@@ -0,0 +1,85 @@ -+; Test vector replicates that use VECTOR GENERATE MASK, v4i32 version. -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -+ -+; Test a word-granularity replicate with the lowest value that cannot use -+; VREPIF. -+define <4 x i32> @f1() { -+; CHECK-LABEL: f1: -+; CHECK: vgmf %v24, 16, 16 -+; CHECK: br %r14 -+ ret <4 x i32> -+} -+ -+; Test a word-granularity replicate that has the lower 17 bits set. -+define <4 x i32> @f2() { -+; CHECK-LABEL: f2: -+; CHECK: vgmf %v24, 15, 31 -+; CHECK: br %r14 -+ ret <4 x i32> -+} -+ -+; Test a word-granularity replicate that has the upper 15 bits set. -+define <4 x i32> @f3() { -+; CHECK-LABEL: f3: -+; CHECK: vgmf %v24, 0, 14 -+; CHECK: br %r14 -+ ret <4 x i32> -+} -+ -+; Test a word-granularity replicate that has middle bits set. -+define <4 x i32> @f4() { -+; CHECK-LABEL: f4: -+; CHECK: vgmf %v24, 12, 17 -+; CHECK: br %r14 -+ ret <4 x i32> -+} -+ -+; Test a word-granularity replicate with a wrap-around mask. -+define <4 x i32> @f5() { -+; CHECK-LABEL: f5: -+; CHECK: vgmf %v24, 17, 15 -+; CHECK: br %r14 -+ ret <4 x i32> -+} -+ -+; Test a doubleword-granularity replicate with the lowest value that cannot -+; use VREPIG. -+define <4 x i32> @f6() { -+; CHECK-LABEL: f6: -+; CHECK: vgmg %v24, 48, 48 -+; CHECK: br %r14 -+ ret <4 x i32> -+} -+ -+; Test a doubleword-granularity replicate that has the lower 22 bits set. -+define <4 x i32> @f7() { -+; CHECK-LABEL: f7: -+; CHECK: vgmg %v24, 42, 63 -+; CHECK: br %r14 -+ ret <4 x i32> -+} -+ -+; Test a doubleword-granularity replicate that has the upper 45 bits set. -+define <4 x i32> @f8() { -+; CHECK-LABEL: f8: -+; CHECK: vgmg %v24, 0, 44 -+; CHECK: br %r14 -+ ret <4 x i32> -+} -+ -+; Test a doubleword-granularity replicate that has middle bits set. -+define <4 x i32> @f9() { -+; CHECK-LABEL: f9: -+; CHECK: vgmg %v24, 31, 42 -+; CHECK: br %r14 -+ ret <4 x i32> -+} -+ -+; Test a doubleword-granularity replicate with a wrap-around mask. -+define <4 x i32> @f10() { -+; CHECK-LABEL: f10: -+; CHECK: vgmg %v24, 18, 0 -+; CHECK: br %r14 -+ ret <4 x i32> -+} -Index: llvm-36/test/CodeGen/SystemZ/vec-const-16.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-const-16.ll -@@ -0,0 +1,85 @@ -+; Test vector replicates that use VECTOR GENERATE MASK, v2i64 version. -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -+ -+; Test a word-granularity replicate with the lowest value that cannot use -+; VREPIF. -+define <2 x i64> @f1() { -+; CHECK-LABEL: f1: -+; CHECK: vgmf %v24, 16, 16 -+; CHECK: br %r14 -+ ret <2 x i64> -+} -+ -+; Test a word-granularity replicate that has the lower 17 bits set. -+define <2 x i64> @f2() { -+; CHECK-LABEL: f2: -+; CHECK: vgmf %v24, 15, 31 -+; CHECK: br %r14 -+ ret <2 x i64> -+} -+ -+; Test a word-granularity replicate that has the upper 15 bits set. -+define <2 x i64> @f3() { -+; CHECK-LABEL: f3: -+; CHECK: vgmf %v24, 0, 14 -+; CHECK: br %r14 -+ ret <2 x i64> -+} -+ -+; Test a word-granularity replicate that has middle bits set. -+define <2 x i64> @f4() { -+; CHECK-LABEL: f4: -+; CHECK: vgmf %v24, 12, 17 -+; CHECK: br %r14 -+ ret <2 x i64> -+} -+ -+; Test a word-granularity replicate with a wrap-around mask. -+define <2 x i64> @f5() { -+; CHECK-LABEL: f5: -+; CHECK: vgmf %v24, 17, 15 -+; CHECK: br %r14 -+ ret <2 x i64> -+} -+ -+; Test a doubleword-granularity replicate with the lowest value that cannot -+; use VREPIG. -+define <2 x i64> @f6() { -+; CHECK-LABEL: f6: -+; CHECK: vgmg %v24, 48, 48 -+; CHECK: br %r14 -+ ret <2 x i64> -+} -+ -+; Test a doubleword-granularity replicate that has the lower 22 bits set. -+define <2 x i64> @f7() { -+; CHECK-LABEL: f7: -+; CHECK: vgmg %v24, 42, 63 -+; CHECK: br %r14 -+ ret <2 x i64> -+} -+ -+; Test a doubleword-granularity replicate that has the upper 45 bits set. -+define <2 x i64> @f8() { -+; CHECK-LABEL: f8: -+; CHECK: vgmg %v24, 0, 44 -+; CHECK: br %r14 -+ ret <2 x i64> -+} -+ -+; Test a doubleword-granularity replicate that has middle bits set. -+define <2 x i64> @f9() { -+; CHECK-LABEL: f9: -+; CHECK: vgmg %v24, 31, 42 -+; CHECK: br %r14 -+ ret <2 x i64> -+} -+ -+; Test a doubleword-granularity replicate with a wrap-around mask. -+define <2 x i64> @f10() { -+; CHECK-LABEL: f10: -+; CHECK: vgmg %v24, 18, 0 -+; CHECK: br %r14 -+ ret <2 x i64> -+} -Index: llvm-36/test/CodeGen/SystemZ/vec-const-17.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-const-17.ll -@@ -0,0 +1,95 @@ -+; Test vector replicates that use VECTOR GENERATE MASK, v4f32 version. -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -+ -+; Test a word-granularity replicate with the lowest value that cannot use -+; VREPIF. -+define <4 x float> @f1() { -+; CHECK-LABEL: f1: -+; CHECK: vgmf %v24, 16, 16 -+; CHECK: br %r14 -+ ret <4 x float> -+} -+ -+; Test a word-granularity replicate that has the lower 17 bits set. -+define <4 x float> @f2() { -+; CHECK-LABEL: f2: -+; CHECK: vgmf %v24, 15, 31 -+; CHECK: br %r14 -+ ret <4 x float> -+} -+ -+; Test a word-granularity replicate that has the upper 15 bits set. -+define <4 x float> @f3() { -+; CHECK-LABEL: f3: -+; CHECK: vgmf %v24, 0, 14 -+; CHECK: br %r14 -+ ret <4 x float> -+} -+ -+; Test a word-granularity replicate that has middle bits set. -+define <4 x float> @f4() { -+; CHECK-LABEL: f4: -+; CHECK: vgmf %v24, 2, 8 -+; CHECK: br %r14 -+ ret <4 x float> -+} -+ -+; Test a word-granularity replicate with a wrap-around mask. -+define <4 x float> @f5() { -+; CHECK-LABEL: f5: -+; CHECK: vgmf %v24, 9, 1 -+; CHECK: br %r14 -+ ret <4 x float> -+} -+ -+; Test a doubleword-granularity replicate with the lowest value that cannot -+; use VREPIG. -+define <4 x float> @f6() { -+; CHECK-LABEL: f6: -+; CHECK: vgmg %v24, 48, 48 -+; CHECK: br %r14 -+ ret <4 x float> -+} -+ -+; Test a doubleword-granularity replicate that has the lower 22 bits set. -+define <4 x float> @f7() { -+; CHECK-LABEL: f7: -+; CHECK: vgmg %v24, 42, 63 -+; CHECK: br %r14 -+ ret <4 x float> -+} -+ -+; Test a doubleword-granularity replicate that has the upper 45 bits set. -+define <4 x float> @f8() { -+; CHECK-LABEL: f8: -+; CHECK: vgmg %v24, 0, 44 -+; CHECK: br %r14 -+ ret <4 x float> -+} -+ -+; Test a doubleword-granularity replicate that has middle bits set. -+define <4 x float> @f9() { -+; CHECK-LABEL: f9: -+; CHECK: vgmg %v24, 34, 41 -+; CHECK: br %r14 -+ ret <4 x float> -+} -+ -+; Test a doubleword-granularity replicate with a wrap-around mask. -+define <4 x float> @f10() { -+; CHECK-LABEL: f10: -+; CHECK: vgmg %v24, 32, 0 -+; CHECK: br %r14 -+ ret <4 x float> -+} -Index: llvm-36/test/CodeGen/SystemZ/vec-const-18.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-const-18.ll -@@ -0,0 +1,85 @@ -+; Test vector replicates that use VECTOR GENERATE MASK, v2f64 version. -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -+ -+; Test a word-granularity replicate with the lowest value that cannot use -+; VREPIF. -+define <2 x double> @f1() { -+; CHECK-LABEL: f1: -+; CHECK: vgmf %v24, 16, 16 -+; CHECK: br %r14 -+ ret <2 x double> -+} -+ -+; Test a word-granularity replicate that has the lower 17 bits set. -+define <2 x double> @f2() { -+; CHECK-LABEL: f2: -+; CHECK: vgmf %v24, 15, 31 -+; CHECK: br %r14 -+ ret <2 x double> -+} -+ -+; Test a word-granularity replicate that has the upper 15 bits set. -+define <2 x double> @f3() { -+; CHECK-LABEL: f3: -+; CHECK: vgmf %v24, 0, 14 -+; CHECK: br %r14 -+ ret <2 x double> -+} -+ -+; Test a word-granularity replicate that has middle bits set. -+define <2 x double> @f4() { -+; CHECK-LABEL: f4: -+; CHECK: vgmf %v24, 2, 11 -+; CHECK: br %r14 -+ ret <2 x double> -+} -+ -+; Test a word-granularity replicate with a wrap-around mask. -+define <2 x double> @f5() { -+; CHECK-LABEL: f5: -+; CHECK: vgmf %v24, 17, 15 -+; CHECK: br %r14 -+ ret <2 x double> -+} -+ -+; Test a doubleword-granularity replicate with the lowest value that cannot -+; use VREPIG. -+define <2 x double> @f6() { -+; CHECK-LABEL: f6: -+; CHECK: vgmg %v24, 48, 48 -+; CHECK: br %r14 -+ ret <2 x double> -+} -+ -+; Test a doubleword-granularity replicate that has the lower 22 bits set. -+define <2 x double> @f7() { -+; CHECK-LABEL: f7: -+; CHECK: vgmg %v24, 42, 63 -+; CHECK: br %r14 -+ ret <2 x double> -+} -+ -+; Test a doubleword-granularity replicate that has the upper 45 bits set. -+define <2 x double> @f8() { -+; CHECK-LABEL: f8: -+; CHECK: vgmg %v24, 0, 44 -+; CHECK: br %r14 -+ ret <2 x double> -+} -+ -+; Test a doubleword-granularity replicate that has middle bits set. -+define <2 x double> @f9() { -+; CHECK-LABEL: f9: -+; CHECK: vgmg %v24, 2, 11 -+; CHECK: br %r14 -+ ret <2 x double> -+} -+ -+; Test a doubleword-granularity replicate with a wrap-around mask. -+define <2 x double> @f10() { -+; CHECK-LABEL: f10: -+; CHECK: vgmg %v24, 10, 0 -+; CHECK: br %r14 -+ ret <2 x double> -+} -Index: llvm-36/test/CodeGen/SystemZ/vec-conv-01.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-conv-01.ll -@@ -0,0 +1,95 @@ -+; Test conversions between integer and float elements. -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -+ -+; Test conversion of f64s to signed i64s. -+define <2 x i64> @f1(<2 x double> %doubles) { -+; CHECK-LABEL: f1: -+; CHECK: vcgdb %v24, %v24, 0, 5 -+; CHECK: br %r14 -+ %dwords = fptosi <2 x double> %doubles to <2 x i64> -+ ret <2 x i64> %dwords -+} -+ -+; Test conversion of f64s to unsigned i64s. -+define <2 x i64> @f2(<2 x double> %doubles) { -+; CHECK-LABEL: f2: -+; CHECK: vclgdb %v24, %v24, 0, 5 -+; CHECK: br %r14 -+ %dwords = fptoui <2 x double> %doubles to <2 x i64> -+ ret <2 x i64> %dwords -+} -+ -+; Test conversion of signed i64s to f64s. -+define <2 x double> @f3(<2 x i64> %dwords) { -+; CHECK-LABEL: f3: -+; CHECK: vcdgb %v24, %v24, 0, 0 -+; CHECK: br %r14 -+ %doubles = sitofp <2 x i64> %dwords to <2 x double> -+ ret <2 x double> %doubles -+} -+ -+; Test conversion of unsigned i64s to f64s. -+define <2 x double> @f4(<2 x i64> %dwords) { -+; CHECK-LABEL: f4: -+; CHECK: vcdlgb %v24, %v24, 0, 0 -+; CHECK: br %r14 -+ %doubles = uitofp <2 x i64> %dwords to <2 x double> -+ ret <2 x double> %doubles -+} -+ -+; Test conversion of f64s to signed i32s, which must compile. -+define void @f5(<2 x double> %doubles, <2 x i32> *%ptr) { -+ %words = fptosi <2 x double> %doubles to <2 x i32> -+ store <2 x i32> %words, <2 x i32> *%ptr -+ ret void -+} -+ -+; Test conversion of f64s to unsigned i32s, which must compile. -+define void @f6(<2 x double> %doubles, <2 x i32> *%ptr) { -+ %words = fptoui <2 x double> %doubles to <2 x i32> -+ store <2 x i32> %words, <2 x i32> *%ptr -+ ret void -+} -+ -+; Test conversion of signed i32s to f64s, which must compile. -+define <2 x double> @f7(<2 x i32> *%ptr) { -+ %words = load <2 x i32> *%ptr -+ %doubles = sitofp <2 x i32> %words to <2 x double> -+ ret <2 x double> %doubles -+} -+ -+; Test conversion of unsigned i32s to f64s, which must compile. -+define <2 x double> @f8(<2 x i32> *%ptr) { -+ %words = load <2 x i32> *%ptr -+ %doubles = uitofp <2 x i32> %words to <2 x double> -+ ret <2 x double> %doubles -+} -+ -+; Test conversion of f32s to signed i64s, which must compile. -+define <2 x i64> @f9(<2 x float> *%ptr) { -+ %floats = load <2 x float> *%ptr -+ %dwords = fptosi <2 x float> %floats to <2 x i64> -+ ret <2 x i64> %dwords -+} -+ -+; Test conversion of f32s to unsigned i64s, which must compile. -+define <2 x i64> @f10(<2 x float> *%ptr) { -+ %floats = load <2 x float> *%ptr -+ %dwords = fptoui <2 x float> %floats to <2 x i64> -+ ret <2 x i64> %dwords -+} -+ -+; Test conversion of signed i64s to f32, which must compile. -+define void @f11(<2 x i64> %dwords, <2 x float> *%ptr) { -+ %floats = sitofp <2 x i64> %dwords to <2 x float> -+ store <2 x float> %floats, <2 x float> *%ptr -+ ret void -+} -+ -+; Test conversion of unsigned i64s to f32, which must compile. -+define void @f12(<2 x i64> %dwords, <2 x float> *%ptr) { -+ %floats = uitofp <2 x i64> %dwords to <2 x float> -+ store <2 x float> %floats, <2 x float> *%ptr -+ ret void -+} -Index: llvm-36/test/CodeGen/SystemZ/vec-conv-02.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-conv-02.ll -@@ -0,0 +1,33 @@ -+; Test conversions between different-sized float elements. -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -+ -+; Test cases where both elements of a v2f64 are converted to f32s. -+define void @f1(<2 x double> %val, <2 x float> *%ptr) { -+; CHECK-LABEL: f1: -+; CHECK: vledb {{%v[0-9]+}}, %v24, 0, 0 -+; CHECK: br %r14 -+ %res = fptrunc <2 x double> %val to <2 x float> -+ store <2 x float> %res, <2 x float> *%ptr -+ ret void -+} -+ -+; Test conversion of an f64 in a vector register to an f32. -+define float @f2(<2 x double> %vec) { -+; CHECK-LABEL: f2: -+; CHECK: wledb %f0, %v24 -+; CHECK: br %r14 -+ %scalar = extractelement <2 x double> %vec, i32 0 -+ %ret = fptrunc double %scalar to float -+ ret float %ret -+} -+ -+; Test conversion of an f32 in a vector register to an f64. -+define double @f3(<4 x float> %vec) { -+; CHECK-LABEL: f3: -+; CHECK: wldeb %f0, %v24 -+; CHECK: br %r14 -+ %scalar = extractelement <4 x float> %vec, i32 0 -+ %ret = fpext float %scalar to double -+ ret double %ret -+} -Index: llvm-36/test/CodeGen/SystemZ/vec-ctlz-01.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-ctlz-01.ll -@@ -0,0 +1,81 @@ -+; Test vector count leading zeros -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -+ -+declare <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %src, i1 %is_zero_undef) -+declare <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %src, i1 %is_zero_undef) -+declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %src, i1 %is_zero_undef) -+declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %src, i1 %is_zero_undef) -+ -+define <16 x i8> @f1(<16 x i8> %a) { -+; CHECK-LABEL: f1: -+; CHECK: vclzb %v24, %v24 -+; CHECK: br %r14 -+ -+ %res = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false) -+ ret <16 x i8> %res -+} -+ -+define <16 x i8> @f2(<16 x i8> %a) { -+; CHECK-LABEL: f2: -+; CHECK: vclzb %v24, %v24 -+; CHECK: br %r14 -+ -+ %res = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 true) -+ ret <16 x i8> %res -+} -+ -+define <8 x i16> @f3(<8 x i16> %a) { -+; CHECK-LABEL: f3: -+; CHECK: vclzh %v24, %v24 -+; CHECK: br %r14 -+ -+ %res = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false) -+ ret <8 x i16> %res -+} -+ -+define <8 x i16> @f4(<8 x i16> %a) { -+; CHECK-LABEL: f4: -+; CHECK: vclzh %v24, %v24 -+; CHECK: br %r14 -+ -+ %res = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 true) -+ ret <8 x i16> %res -+} -+ -+define <4 x i32> @f5(<4 x i32> %a) { -+; CHECK-LABEL: f5: -+; CHECK: vclzf %v24, %v24 -+; CHECK: br %r14 -+ -+ %res = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false) -+ ret <4 x i32> %res -+} -+ -+define <4 x i32> @f6(<4 x i32> %a) { -+; CHECK-LABEL: f6: -+; CHECK: vclzf %v24, %v24 -+; CHECK: br %r14 -+ -+ %res = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 true) -+ ret <4 x i32> %res -+} -+ -+define <2 x i64> @f7(<2 x i64> %a) { -+; CHECK-LABEL: f7: -+; CHECK: vclzg %v24, %v24 -+; CHECK: br %r14 -+ -+ %res = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 false) -+ ret <2 x i64> %res -+} -+ -+define <2 x i64> @f8(<2 x i64> %a) { -+; CHECK-LABEL: f8: -+; CHECK: vclzg %v24, %v24 -+; CHECK: br %r14 -+ -+ %res = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 true) -+ ret <2 x i64> %res -+} -+ -Index: llvm-36/test/CodeGen/SystemZ/vec-ctpop-01.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-ctpop-01.ll -@@ -0,0 +1,53 @@ -+; Test vector population-count instruction -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -+ -+declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a) -+declare <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %a) -+declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %a) -+declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a) -+ -+define <16 x i8> @f1(<16 x i8> %a) { -+; CHECK-LABEL: f1: -+; CHECK: vpopct %v24, %v24, 0 -+; CHECK: br %r14 -+ -+ %popcnt = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a) -+ ret <16 x i8> %popcnt -+} -+ -+define <8 x i16> @f2(<8 x i16> %a) { -+; CHECK-LABEL: f2: -+; CHECK: vpopct [[T1:%v[0-9]+]], %v24, 0 -+; CHECK: veslh [[T2:%v[0-9]+]], [[T1]], 8 -+; CHECK: vah [[T3:%v[0-9]+]], [[T1]], [[T2]] -+; CHECK: vesrlh %v24, [[T3]], 8 -+; CHECK: br %r14 -+ -+ %popcnt = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %a) -+ ret <8 x i16> %popcnt -+} -+ -+define <4 x i32> @f3(<4 x i32> %a) { -+; CHECK-LABEL: f3: -+; CHECK: vpopct [[T1:%v[0-9]+]], %v24, 0 -+; CHECK: vgbm [[T2:%v[0-9]+]], 0 -+; CHECK: vsumb %v24, [[T1]], [[T2]] -+; CHECK: br %r14 -+ -+ %popcnt = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %a) -+ ret <4 x i32> %popcnt -+} -+ -+define <2 x i64> @f4(<2 x i64> %a) { -+; CHECK-LABEL: f4: -+; CHECK: vpopct [[T1:%v[0-9]+]], %v24, 0 -+; CHECK: vgbm [[T2:%v[0-9]+]], 0 -+; CHECK: vsumb [[T3:%v[0-9]+]], [[T1]], [[T2]] -+; CHECK: vsumgf %v24, [[T3]], [[T2]] -+; CHECK: br %r14 -+ -+ %popcnt = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a) -+ ret <2 x i64> %popcnt -+} -+ -Index: llvm-36/test/CodeGen/SystemZ/vec-cttz-01.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-cttz-01.ll -@@ -0,0 +1,81 @@ -+; Test vector count trailing zeros -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -+ -+declare <16 x i8> @llvm.cttz.v16i8(<16 x i8> %src, i1 %is_zero_undef) -+declare <8 x i16> @llvm.cttz.v8i16(<8 x i16> %src, i1 %is_zero_undef) -+declare <4 x i32> @llvm.cttz.v4i32(<4 x i32> %src, i1 %is_zero_undef) -+declare <2 x i64> @llvm.cttz.v2i64(<2 x i64> %src, i1 %is_zero_undef) -+ -+define <16 x i8> @f1(<16 x i8> %a) { -+; CHECK-LABEL: f1: -+; CHECK: vctzb %v24, %v24 -+; CHECK: br %r14 -+ -+ %res = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false) -+ ret <16 x i8> %res -+} -+ -+define <16 x i8> @f2(<16 x i8> %a) { -+; CHECK-LABEL: f2: -+; CHECK: vctzb %v24, %v24 -+; CHECK: br %r14 -+ -+ %res = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true) -+ ret <16 x i8> %res -+} -+ -+define <8 x i16> @f3(<8 x i16> %a) { -+; CHECK-LABEL: f3: -+; CHECK: vctzh %v24, %v24 -+; CHECK: br %r14 -+ -+ %res = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false) -+ ret <8 x i16> %res -+} -+ -+define <8 x i16> @f4(<8 x i16> %a) { -+; CHECK-LABEL: f4: -+; CHECK: vctzh %v24, %v24 -+; CHECK: br %r14 -+ -+ %res = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true) -+ ret <8 x i16> %res -+} -+ -+define <4 x i32> @f5(<4 x i32> %a) { -+; CHECK-LABEL: f5: -+; CHECK: vctzf %v24, %v24 -+; CHECK: br %r14 -+ -+ %res = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false) -+ ret <4 x i32> %res -+} -+ -+define <4 x i32> @f6(<4 x i32> %a) { -+; CHECK-LABEL: f6: -+; CHECK: vctzf %v24, %v24 -+; CHECK: br %r14 -+ -+ %res = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true) -+ ret <4 x i32> %res -+} -+ -+define <2 x i64> @f7(<2 x i64> %a) { -+; CHECK-LABEL: f7: -+; CHECK: vctzg %v24, %v24 -+; CHECK: br %r14 -+ -+ %res = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false) -+ ret <2 x i64> %res -+} -+ -+define <2 x i64> @f8(<2 x i64> %a) { -+; CHECK-LABEL: f8: -+; CHECK: vctzg %v24, %v24 -+; CHECK: br %r14 -+ -+ %res = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true) -+ ret <2 x i64> %res -+} -+ -Index: llvm-36/test/CodeGen/SystemZ/vec-div-01.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-div-01.ll -@@ -0,0 +1,83 @@ -+; Test vector division. There is no native integer support for this, -+; so the integer cases are really a test of the operation legalization code. -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -+ -+; Test a v16i8 division. -+define <16 x i8> @f1(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) { -+; CHECK-LABEL: f1: -+; CHECK: vlvgp [[REG:%v[0-9]+]], -+; CHECK-DAG: vlvgb [[REG]], {{%r[0-5]}}, 0 -+; CHECK-DAG: vlvgb [[REG]], {{%r[0-5]}}, 1 -+; CHECK-DAG: vlvgb [[REG]], {{%r[0-5]}}, 2 -+; CHECK-DAG: vlvgb [[REG]], {{%r[0-5]}}, 3 -+; CHECK-DAG: vlvgb [[REG]], {{%r[0-5]}}, 4 -+; CHECK-DAG: vlvgb [[REG]], {{%r[0-5]}}, 5 -+; CHECK-DAG: vlvgb [[REG]], {{%r[0-5]}}, 6 -+; CHECK-DAG: vlvgb [[REG]], {{%r[0-5]}}, 8 -+; CHECK-DAG: vlvgb [[REG]], {{%r[0-5]}}, 9 -+; CHECK-DAG: vlvgb [[REG]], {{%r[0-5]}}, 10 -+; CHECK-DAG: vlvgb [[REG]], {{%r[0-5]}}, 11 -+; CHECK-DAG: vlvgb [[REG]], {{%r[0-5]}}, 12 -+; CHECK-DAG: vlvgb [[REG]], {{%r[0-5]}}, 13 -+; CHECK-DAG: vlvgb [[REG]], {{%r[0-5]}}, 14 -+; CHECK: br %r14 -+ %ret = sdiv <16 x i8> %val1, %val2 -+ ret <16 x i8> %ret -+} -+ -+; Test a v8i16 division. -+define <8 x i16> @f2(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) { -+; CHECK-LABEL: f2: -+; CHECK: vlvgp [[REG:%v[0-9]+]], -+; CHECK-DAG: vlvgh [[REG]], {{%r[0-5]}}, 0 -+; CHECK-DAG: vlvgh [[REG]], {{%r[0-5]}}, 1 -+; CHECK-DAG: vlvgh [[REG]], {{%r[0-5]}}, 2 -+; CHECK-DAG: vlvgh [[REG]], {{%r[0-5]}}, 4 -+; CHECK-DAG: vlvgh [[REG]], {{%r[0-5]}}, 5 -+; CHECK-DAG: vlvgh [[REG]], {{%r[0-5]}}, 6 -+; CHECK: br %r14 -+ %ret = sdiv <8 x i16> %val1, %val2 -+ ret <8 x i16> %ret -+} -+ -+; Test a v4i32 division. -+define <4 x i32> @f3(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) { -+; CHECK-LABEL: f3: -+; CHECK: vlvgp [[REG:%v[0-9]+]], -+; CHECK-DAG: vlvgf [[REG]], {{%r[0-5]}}, 0 -+; CHECK-DAG: vlvgf [[REG]], {{%r[0-5]}}, 2 -+; CHECK: br %r14 -+ %ret = sdiv <4 x i32> %val1, %val2 -+ ret <4 x i32> %ret -+} -+ -+; Test a v2i64 division. -+define <2 x i64> @f4(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) { -+; CHECK-LABEL: f4: -+; CHECK: vlvgp %v24, -+; CHECK: br %r14 -+ %ret = sdiv <2 x i64> %val1, %val2 -+ ret <2 x i64> %ret -+} -+ -+; Test a v2f64 division. -+define <2 x double> @f5(<2 x double> %dummy, <2 x double> %val1, -+ <2 x double> %val2) { -+; CHECK-LABEL: f5: -+; CHECK: vfddb %v24, %v26, %v28 -+; CHECK: br %r14 -+ %ret = fdiv <2 x double> %val1, %val2 -+ ret <2 x double> %ret -+} -+ -+; Test an f64 division that uses vector registers. -+define double @f6(<2 x double> %val1, <2 x double> %val2) { -+; CHECK-LABEL: f6: -+; CHECK: wfddb %f0, %v24, %v26 -+; CHECK: br %r14 -+ %scalar1 = extractelement <2 x double> %val1, i32 0 -+ %scalar2 = extractelement <2 x double> %val2, i32 0 -+ %ret = fdiv double %scalar1, %scalar2 -+ ret double %ret -+} -Index: llvm-36/test/CodeGen/SystemZ/vec-extract-01.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-extract-01.ll -@@ -0,0 +1,13 @@ -+; Verify ReplaceExtractVectorEltOfLoadWithNarrowedLoad fixes -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -+ -+; Test a memory copy of a v2i32 (via the constant pool). -+define void @f1(<2 x i32> *%dest) { -+; CHECK-LABEL: f1: -+; CHECK: lgrl [[REG:%r[0-5]]], {{[._A-Za-z0-9]}} -+; CHECK: stg [[REG]], 0(%r2) -+; CHECK: br %r14 -+ store <2 x i32> , <2 x i32> *%dest -+ ret void -+} -Index: llvm-36/test/CodeGen/SystemZ/vec-extract-02.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-extract-02.ll -@@ -0,0 +1,15 @@ -+; Verify ReplaceExtractVectorEltOfLoadWithNarrowedLoad fixes -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -+ -+; Test a case where a vector extraction can be simplified to a scalar load. -+; The index must be extended from i32 to i64. -+define i32 @f1(<4 x i32> *%ptr, i32 %index) { -+; CHECK-LABEL: f1: -+; CHECK: risbg {{%r[0-5]}}, %r3, 30, 189, 2 -+; CHECK: l %r2, -+; CHECK: br %r14 -+ %vec = load <4 x i32> *%ptr -+ %res = extractelement <4 x i32> %vec, i32 %index -+ ret i32 %res -+} -Index: llvm-36/test/CodeGen/SystemZ/vec-intrinsics.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-intrinsics.ll -@@ -0,0 +1,3335 @@ -+; Test vector intrinsics. -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -+ -+declare i32 @llvm.s390.lcbb(i8 *, i32) -+declare <16 x i8> @llvm.s390.vlbb(i8 *, i32) -+declare <16 x i8> @llvm.s390.vll(i32, i8 *) -+declare <2 x i64> @llvm.s390.vpdi(<2 x i64>, <2 x i64>, i32) -+declare <16 x i8> @llvm.s390.vperm(<16 x i8>, <16 x i8>, <16 x i8>) -+declare <16 x i8> @llvm.s390.vpksh(<8 x i16>, <8 x i16>) -+declare <8 x i16> @llvm.s390.vpksf(<4 x i32>, <4 x i32>) -+declare <4 x i32> @llvm.s390.vpksg(<2 x i64>, <2 x i64>) -+declare {<16 x i8>, i32} @llvm.s390.vpkshs(<8 x i16>, <8 x i16>) -+declare {<8 x i16>, i32} @llvm.s390.vpksfs(<4 x i32>, <4 x i32>) -+declare {<4 x i32>, i32} @llvm.s390.vpksgs(<2 x i64>, <2 x i64>) -+declare <16 x i8> @llvm.s390.vpklsh(<8 x i16>, <8 x i16>) -+declare <8 x i16> @llvm.s390.vpklsf(<4 x i32>, <4 x i32>) -+declare <4 x i32> @llvm.s390.vpklsg(<2 x i64>, <2 x i64>) -+declare {<16 x i8>, i32} @llvm.s390.vpklshs(<8 x i16>, <8 x i16>) -+declare {<8 x i16>, i32} @llvm.s390.vpklsfs(<4 x i32>, <4 x i32>) -+declare {<4 x i32>, i32} @llvm.s390.vpklsgs(<2 x i64>, <2 x i64>) -+declare void @llvm.s390.vstl(<16 x i8>, i32, i8 *) -+declare <8 x i16> @llvm.s390.vuphb(<16 x i8>) -+declare <4 x i32> @llvm.s390.vuphh(<8 x i16>) -+declare <2 x i64> @llvm.s390.vuphf(<4 x i32>) -+declare <8 x i16> @llvm.s390.vuplhb(<16 x i8>) -+declare <4 x i32> @llvm.s390.vuplhh(<8 x i16>) -+declare <2 x i64> @llvm.s390.vuplhf(<4 x i32>) -+declare <8 x i16> @llvm.s390.vuplb(<16 x i8>) -+declare <4 x i32> @llvm.s390.vuplhw(<8 x i16>) -+declare <2 x i64> @llvm.s390.vuplf(<4 x i32>) -+declare <8 x i16> @llvm.s390.vupllb(<16 x i8>) -+declare <4 x i32> @llvm.s390.vupllh(<8 x i16>) -+declare <2 x i64> @llvm.s390.vupllf(<4 x i32>) -+declare <16 x i8> @llvm.s390.vaccb(<16 x i8>, <16 x i8>) -+declare <8 x i16> @llvm.s390.vacch(<8 x i16>, <8 x i16>) -+declare <4 x i32> @llvm.s390.vaccf(<4 x i32>, <4 x i32>) -+declare <2 x i64> @llvm.s390.vaccg(<2 x i64>, <2 x i64>) -+declare <16 x i8> @llvm.s390.vaq(<16 x i8>, <16 x i8>) -+declare <16 x i8> @llvm.s390.vacq(<16 x i8>, <16 x i8>, <16 x i8>) -+declare <16 x i8> @llvm.s390.vaccq(<16 x i8>, <16 x i8>) -+declare <16 x i8> @llvm.s390.vacccq(<16 x i8>, <16 x i8>, <16 x i8>) -+declare <16 x i8> @llvm.s390.vavgb(<16 x i8>, <16 x i8>) -+declare <8 x i16> @llvm.s390.vavgh(<8 x i16>, <8 x i16>) -+declare <4 x i32> @llvm.s390.vavgf(<4 x i32>, <4 x i32>) -+declare <2 x i64> @llvm.s390.vavgg(<2 x i64>, <2 x i64>) -+declare <16 x i8> @llvm.s390.vavglb(<16 x i8>, <16 x i8>) -+declare <8 x i16> @llvm.s390.vavglh(<8 x i16>, <8 x i16>) -+declare <4 x i32> @llvm.s390.vavglf(<4 x i32>, <4 x i32>) -+declare <2 x i64> @llvm.s390.vavglg(<2 x i64>, <2 x i64>) -+declare <4 x i32> @llvm.s390.vcksm(<4 x i32>, <4 x i32>) -+declare <8 x i16> @llvm.s390.vgfmb(<16 x i8>, <16 x i8>) -+declare <4 x i32> @llvm.s390.vgfmh(<8 x i16>, <8 x i16>) -+declare <2 x i64> @llvm.s390.vgfmf(<4 x i32>, <4 x i32>) -+declare <16 x i8> @llvm.s390.vgfmg(<2 x i64>, <2 x i64>) -+declare <8 x i16> @llvm.s390.vgfmab(<16 x i8>, <16 x i8>, <8 x i16>) -+declare <4 x i32> @llvm.s390.vgfmah(<8 x i16>, <8 x i16>, <4 x i32>) -+declare <2 x i64> @llvm.s390.vgfmaf(<4 x i32>, <4 x i32>, <2 x i64>) -+declare <16 x i8> @llvm.s390.vgfmag(<2 x i64>, <2 x i64>, <16 x i8>) -+declare <16 x i8> @llvm.s390.vmahb(<16 x i8>, <16 x i8>, <16 x i8>) -+declare <8 x i16> @llvm.s390.vmahh(<8 x i16>, <8 x i16>, <8 x i16>) -+declare <4 x i32> @llvm.s390.vmahf(<4 x i32>, <4 x i32>, <4 x i32>) -+declare <16 x i8> @llvm.s390.vmalhb(<16 x i8>, <16 x i8>, <16 x i8>) -+declare <8 x i16> @llvm.s390.vmalhh(<8 x i16>, <8 x i16>, <8 x i16>) -+declare <4 x i32> @llvm.s390.vmalhf(<4 x i32>, <4 x i32>, <4 x i32>) -+declare <8 x i16> @llvm.s390.vmaeb(<16 x i8>, <16 x i8>, <8 x i16>) -+declare <4 x i32> @llvm.s390.vmaeh(<8 x i16>, <8 x i16>, <4 x i32>) -+declare <2 x i64> @llvm.s390.vmaef(<4 x i32>, <4 x i32>, <2 x i64>) -+declare <8 x i16> @llvm.s390.vmaleb(<16 x i8>, <16 x i8>, <8 x i16>) -+declare <4 x i32> @llvm.s390.vmaleh(<8 x i16>, <8 x i16>, <4 x i32>) -+declare <2 x i64> @llvm.s390.vmalef(<4 x i32>, <4 x i32>, <2 x i64>) -+declare <8 x i16> @llvm.s390.vmaob(<16 x i8>, <16 x i8>, <8 x i16>) -+declare <4 x i32> @llvm.s390.vmaoh(<8 x i16>, <8 x i16>, <4 x i32>) -+declare <2 x i64> @llvm.s390.vmaof(<4 x i32>, <4 x i32>, <2 x i64>) -+declare <8 x i16> @llvm.s390.vmalob(<16 x i8>, <16 x i8>, <8 x i16>) -+declare <4 x i32> @llvm.s390.vmaloh(<8 x i16>, <8 x i16>, <4 x i32>) -+declare <2 x i64> @llvm.s390.vmalof(<4 x i32>, <4 x i32>, <2 x i64>) -+declare <16 x i8> @llvm.s390.vmhb(<16 x i8>, <16 x i8>) -+declare <8 x i16> @llvm.s390.vmhh(<8 x i16>, <8 x i16>) -+declare <4 x i32> @llvm.s390.vmhf(<4 x i32>, <4 x i32>) -+declare <16 x i8> @llvm.s390.vmlhb(<16 x i8>, <16 x i8>) -+declare <8 x i16> @llvm.s390.vmlhh(<8 x i16>, <8 x i16>) -+declare <4 x i32> @llvm.s390.vmlhf(<4 x i32>, <4 x i32>) -+declare <8 x i16> @llvm.s390.vmeb(<16 x i8>, <16 x i8>) -+declare <4 x i32> @llvm.s390.vmeh(<8 x i16>, <8 x i16>) -+declare <2 x i64> @llvm.s390.vmef(<4 x i32>, <4 x i32>) -+declare <8 x i16> @llvm.s390.vmleb(<16 x i8>, <16 x i8>) -+declare <4 x i32> @llvm.s390.vmleh(<8 x i16>, <8 x i16>) -+declare <2 x i64> @llvm.s390.vmlef(<4 x i32>, <4 x i32>) -+declare <8 x i16> @llvm.s390.vmob(<16 x i8>, <16 x i8>) -+declare <4 x i32> @llvm.s390.vmoh(<8 x i16>, <8 x i16>) -+declare <2 x i64> @llvm.s390.vmof(<4 x i32>, <4 x i32>) -+declare <8 x i16> @llvm.s390.vmlob(<16 x i8>, <16 x i8>) -+declare <4 x i32> @llvm.s390.vmloh(<8 x i16>, <8 x i16>) -+declare <2 x i64> @llvm.s390.vmlof(<4 x i32>, <4 x i32>) -+declare <16 x i8> @llvm.s390.verllvb(<16 x i8>, <16 x i8>) -+declare <8 x i16> @llvm.s390.verllvh(<8 x i16>, <8 x i16>) -+declare <4 x i32> @llvm.s390.verllvf(<4 x i32>, <4 x i32>) -+declare <2 x i64> @llvm.s390.verllvg(<2 x i64>, <2 x i64>) -+declare <16 x i8> @llvm.s390.verllb(<16 x i8>, i32) -+declare <8 x i16> @llvm.s390.verllh(<8 x i16>, i32) -+declare <4 x i32> @llvm.s390.verllf(<4 x i32>, i32) -+declare <2 x i64> @llvm.s390.verllg(<2 x i64>, i32) -+declare <16 x i8> @llvm.s390.verimb(<16 x i8>, <16 x i8>, <16 x i8>, i32) -+declare <8 x i16> @llvm.s390.verimh(<8 x i16>, <8 x i16>, <8 x i16>, i32) -+declare <4 x i32> @llvm.s390.verimf(<4 x i32>, <4 x i32>, <4 x i32>, i32) -+declare <2 x i64> @llvm.s390.verimg(<2 x i64>, <2 x i64>, <2 x i64>, i32) -+declare <16 x i8> @llvm.s390.vsl(<16 x i8>, <16 x i8>) -+declare <16 x i8> @llvm.s390.vslb(<16 x i8>, <16 x i8>) -+declare <16 x i8> @llvm.s390.vsra(<16 x i8>, <16 x i8>) -+declare <16 x i8> @llvm.s390.vsrab(<16 x i8>, <16 x i8>) -+declare <16 x i8> @llvm.s390.vsrl(<16 x i8>, <16 x i8>) -+declare <16 x i8> @llvm.s390.vsrlb(<16 x i8>, <16 x i8>) -+declare <16 x i8> @llvm.s390.vsldb(<16 x i8>, <16 x i8>, i32) -+declare <16 x i8> @llvm.s390.vscbib(<16 x i8>, <16 x i8>) -+declare <8 x i16> @llvm.s390.vscbih(<8 x i16>, <8 x i16>) -+declare <4 x i32> @llvm.s390.vscbif(<4 x i32>, <4 x i32>) -+declare <2 x i64> @llvm.s390.vscbig(<2 x i64>, <2 x i64>) -+declare <16 x i8> @llvm.s390.vsq(<16 x i8>, <16 x i8>) -+declare <16 x i8> @llvm.s390.vsbiq(<16 x i8>, <16 x i8>, <16 x i8>) -+declare <16 x i8> @llvm.s390.vscbiq(<16 x i8>, <16 x i8>) -+declare <16 x i8> @llvm.s390.vsbcbiq(<16 x i8>, <16 x i8>, <16 x i8>) -+declare <4 x i32> @llvm.s390.vsumb(<16 x i8>, <16 x i8>) -+declare <4 x i32> @llvm.s390.vsumh(<8 x i16>, <8 x i16>) -+declare <2 x i64> @llvm.s390.vsumgh(<8 x i16>, <8 x i16>) -+declare <2 x i64> @llvm.s390.vsumgf(<4 x i32>, <4 x i32>) -+declare <16 x i8> @llvm.s390.vsumqf(<4 x i32>, <4 x i32>) -+declare <16 x i8> @llvm.s390.vsumqg(<2 x i64>, <2 x i64>) -+declare i32 @llvm.s390.vtm(<16 x i8>, <16 x i8>) -+declare {<16 x i8>, i32} @llvm.s390.vceqbs(<16 x i8>, <16 x i8>) -+declare {<8 x i16>, i32} @llvm.s390.vceqhs(<8 x i16>, <8 x i16>) -+declare {<4 x i32>, i32} @llvm.s390.vceqfs(<4 x i32>, <4 x i32>) -+declare {<2 x i64>, i32} @llvm.s390.vceqgs(<2 x i64>, <2 x i64>) -+declare {<16 x i8>, i32} @llvm.s390.vchbs(<16 x i8>, <16 x i8>) -+declare {<8 x i16>, i32} @llvm.s390.vchhs(<8 x i16>, <8 x i16>) -+declare {<4 x i32>, i32} @llvm.s390.vchfs(<4 x i32>, <4 x i32>) -+declare {<2 x i64>, i32} @llvm.s390.vchgs(<2 x i64>, <2 x i64>) -+declare {<16 x i8>, i32} @llvm.s390.vchlbs(<16 x i8>, <16 x i8>) -+declare {<8 x i16>, i32} @llvm.s390.vchlhs(<8 x i16>, <8 x i16>) -+declare {<4 x i32>, i32} @llvm.s390.vchlfs(<4 x i32>, <4 x i32>) -+declare {<2 x i64>, i32} @llvm.s390.vchlgs(<2 x i64>, <2 x i64>) -+declare <16 x i8> @llvm.s390.vfaeb(<16 x i8>, <16 x i8>, i32) -+declare <8 x i16> @llvm.s390.vfaeh(<8 x i16>, <8 x i16>, i32) -+declare <4 x i32> @llvm.s390.vfaef(<4 x i32>, <4 x i32>, i32) -+declare {<16 x i8>, i32} @llvm.s390.vfaebs(<16 x i8>, <16 x i8>, i32) -+declare {<8 x i16>, i32} @llvm.s390.vfaehs(<8 x i16>, <8 x i16>, i32) -+declare {<4 x i32>, i32} @llvm.s390.vfaefs(<4 x i32>, <4 x i32>, i32) -+declare <16 x i8> @llvm.s390.vfaezb(<16 x i8>, <16 x i8>, i32) -+declare <8 x i16> @llvm.s390.vfaezh(<8 x i16>, <8 x i16>, i32) -+declare <4 x i32> @llvm.s390.vfaezf(<4 x i32>, <4 x i32>, i32) -+declare {<16 x i8>, i32} @llvm.s390.vfaezbs(<16 x i8>, <16 x i8>, i32) -+declare {<8 x i16>, i32} @llvm.s390.vfaezhs(<8 x i16>, <8 x i16>, i32) -+declare {<4 x i32>, i32} @llvm.s390.vfaezfs(<4 x i32>, <4 x i32>, i32) -+declare <16 x i8> @llvm.s390.vfeeb(<16 x i8>, <16 x i8>) -+declare <8 x i16> @llvm.s390.vfeeh(<8 x i16>, <8 x i16>) -+declare <4 x i32> @llvm.s390.vfeef(<4 x i32>, <4 x i32>) -+declare {<16 x i8>, i32} @llvm.s390.vfeebs(<16 x i8>, <16 x i8>) -+declare {<8 x i16>, i32} @llvm.s390.vfeehs(<8 x i16>, <8 x i16>) -+declare {<4 x i32>, i32} @llvm.s390.vfeefs(<4 x i32>, <4 x i32>) -+declare <16 x i8> @llvm.s390.vfeezb(<16 x i8>, <16 x i8>) -+declare <8 x i16> @llvm.s390.vfeezh(<8 x i16>, <8 x i16>) -+declare <4 x i32> @llvm.s390.vfeezf(<4 x i32>, <4 x i32>) -+declare {<16 x i8>, i32} @llvm.s390.vfeezbs(<16 x i8>, <16 x i8>) -+declare {<8 x i16>, i32} @llvm.s390.vfeezhs(<8 x i16>, <8 x i16>) -+declare {<4 x i32>, i32} @llvm.s390.vfeezfs(<4 x i32>, <4 x i32>) -+declare <16 x i8> @llvm.s390.vfeneb(<16 x i8>, <16 x i8>) -+declare <8 x i16> @llvm.s390.vfeneh(<8 x i16>, <8 x i16>) -+declare <4 x i32> @llvm.s390.vfenef(<4 x i32>, <4 x i32>) -+declare {<16 x i8>, i32} @llvm.s390.vfenebs(<16 x i8>, <16 x i8>) -+declare {<8 x i16>, i32} @llvm.s390.vfenehs(<8 x i16>, <8 x i16>) -+declare {<4 x i32>, i32} @llvm.s390.vfenefs(<4 x i32>, <4 x i32>) -+declare <16 x i8> @llvm.s390.vfenezb(<16 x i8>, <16 x i8>) -+declare <8 x i16> @llvm.s390.vfenezh(<8 x i16>, <8 x i16>) -+declare <4 x i32> @llvm.s390.vfenezf(<4 x i32>, <4 x i32>) -+declare {<16 x i8>, i32} @llvm.s390.vfenezbs(<16 x i8>, <16 x i8>) -+declare {<8 x i16>, i32} @llvm.s390.vfenezhs(<8 x i16>, <8 x i16>) -+declare {<4 x i32>, i32} @llvm.s390.vfenezfs(<4 x i32>, <4 x i32>) -+declare <16 x i8> @llvm.s390.vistrb(<16 x i8>) -+declare <8 x i16> @llvm.s390.vistrh(<8 x i16>) -+declare <4 x i32> @llvm.s390.vistrf(<4 x i32>) -+declare {<16 x i8>, i32} @llvm.s390.vistrbs(<16 x i8>) -+declare {<8 x i16>, i32} @llvm.s390.vistrhs(<8 x i16>) -+declare {<4 x i32>, i32} @llvm.s390.vistrfs(<4 x i32>) -+declare <16 x i8> @llvm.s390.vstrcb(<16 x i8>, <16 x i8>, <16 x i8>, i32) -+declare <8 x i16> @llvm.s390.vstrch(<8 x i16>, <8 x i16>, <8 x i16>, i32) -+declare <4 x i32> @llvm.s390.vstrcf(<4 x i32>, <4 x i32>, <4 x i32>, i32) -+declare {<16 x i8>, i32} @llvm.s390.vstrcbs(<16 x i8>, <16 x i8>, <16 x i8>, -+ i32) -+declare {<8 x i16>, i32} @llvm.s390.vstrchs(<8 x i16>, <8 x i16>, <8 x i16>, -+ i32) -+declare {<4 x i32>, i32} @llvm.s390.vstrcfs(<4 x i32>, <4 x i32>, <4 x i32>, -+ i32) -+declare <16 x i8> @llvm.s390.vstrczb(<16 x i8>, <16 x i8>, <16 x i8>, i32) -+declare <8 x i16> @llvm.s390.vstrczh(<8 x i16>, <8 x i16>, <8 x i16>, i32) -+declare <4 x i32> @llvm.s390.vstrczf(<4 x i32>, <4 x i32>, <4 x i32>, i32) -+declare {<16 x i8>, i32} @llvm.s390.vstrczbs(<16 x i8>, <16 x i8>, <16 x i8>, -+ i32) -+declare {<8 x i16>, i32} @llvm.s390.vstrczhs(<8 x i16>, <8 x i16>, <8 x i16>, -+ i32) -+declare {<4 x i32>, i32} @llvm.s390.vstrczfs(<4 x i32>, <4 x i32>, <4 x i32>, -+ i32) -+declare {<2 x i64>, i32} @llvm.s390.vfcedbs(<2 x double>, <2 x double>) -+declare {<2 x i64>, i32} @llvm.s390.vfchdbs(<2 x double>, <2 x double>) -+declare {<2 x i64>, i32} @llvm.s390.vfchedbs(<2 x double>, <2 x double>) -+declare {<2 x i64>, i32} @llvm.s390.vftcidb(<2 x double>, i32) -+declare <2 x double> @llvm.s390.vfidb(<2 x double>, i32, i32) -+ -+; LCBB with the lowest M3 operand. -+define i32 @test_lcbb1(i8 *%ptr) { -+; CHECK-LABEL: test_lcbb1: -+; CHECK: lcbb %r2, 0(%r2), 0 -+; CHECK: br %r14 -+ %res = call i32 @llvm.s390.lcbb(i8 *%ptr, i32 0) -+ ret i32 %res -+} -+ -+; LCBB with the highest M3 operand. -+define i32 @test_lcbb2(i8 *%ptr) { -+; CHECK-LABEL: test_lcbb2: -+; CHECK: lcbb %r2, 0(%r2), 15 -+; CHECK: br %r14 -+ %res = call i32 @llvm.s390.lcbb(i8 *%ptr, i32 15) -+ ret i32 %res -+} -+ -+; LCBB with a displacement and index. -+define i32 @test_lcbb3(i8 *%base, i64 %index) { -+; CHECK-LABEL: test_lcbb3: -+; CHECK: lcbb %r2, 4095({{%r2,%r3|%r3,%r2}}), 4 -+; CHECK: br %r14 -+ %add = add i64 %index, 4095 -+ %ptr = getelementptr i8 *%base, i64 %add -+ %res = call i32 @llvm.s390.lcbb(i8 *%ptr, i32 4) -+ ret i32 %res -+} -+ -+; LCBB with an out-of-range displacement. -+define i32 @test_lcbb4(i8 *%base) { -+; CHECK-LABEL: test_lcbb4: -+; CHECK: lcbb %r2, 0({{%r[1-5]}}), 5 -+; CHECK: br %r14 -+ %ptr = getelementptr i8 *%base, i64 4096 -+ %res = call i32 @llvm.s390.lcbb(i8 *%ptr, i32 5) -+ ret i32 %res -+} -+ -+; VLBB with the lowest M3 operand. -+define <16 x i8> @test_vlbb1(i8 *%ptr) { -+; CHECK-LABEL: test_vlbb1: -+; CHECK: vlbb %v24, 0(%r2), 0 -+; CHECK: br %r14 -+ %res = call <16 x i8> @llvm.s390.vlbb(i8 *%ptr, i32 0) -+ ret <16 x i8> %res -+} -+ -+; VLBB with the highest M3 operand. -+define <16 x i8> @test_vlbb2(i8 *%ptr) { -+; CHECK-LABEL: test_vlbb2: -+; CHECK: vlbb %v24, 0(%r2), 15 -+; CHECK: br %r14 -+ %res = call <16 x i8> @llvm.s390.vlbb(i8 *%ptr, i32 15) -+ ret <16 x i8> %res -+} -+ -+; VLBB with a displacement and index. -+define <16 x i8> @test_vlbb3(i8 *%base, i64 %index) { -+; CHECK-LABEL: test_vlbb3: -+; CHECK: vlbb %v24, 4095({{%r2,%r3|%r3,%r2}}), 4 -+; CHECK: br %r14 -+ %add = add i64 %index, 4095 -+ %ptr = getelementptr i8 *%base, i64 %add -+ %res = call <16 x i8> @llvm.s390.vlbb(i8 *%ptr, i32 4) -+ ret <16 x i8> %res -+} -+ -+; VLBB with an out-of-range displacement. -+define <16 x i8> @test_vlbb4(i8 *%base) { -+; CHECK-LABEL: test_vlbb4: -+; CHECK: vlbb %v24, 0({{%r[1-5]}}), 5 -+; CHECK: br %r14 -+ %ptr = getelementptr i8 *%base, i64 4096 -+ %res = call <16 x i8> @llvm.s390.vlbb(i8 *%ptr, i32 5) -+ ret <16 x i8> %res -+} -+ -+; VLL with the lowest in-range displacement. -+define <16 x i8> @test_vll1(i8 *%ptr, i32 %length) { -+; CHECK-LABEL: test_vll1: -+; CHECK: vll %v24, %r3, 0(%r2) -+; CHECK: br %r14 -+ %res = call <16 x i8> @llvm.s390.vll(i32 %length, i8 *%ptr) -+ ret <16 x i8> %res -+} -+ -+; VLL with the highest in-range displacement. -+define <16 x i8> @test_vll2(i8 *%base, i32 %length) { -+; CHECK-LABEL: test_vll2: -+; CHECK: vll %v24, %r3, 4095(%r2) -+; CHECK: br %r14 -+ %ptr = getelementptr i8 *%base, i64 4095 -+ %res = call <16 x i8> @llvm.s390.vll(i32 %length, i8 *%ptr) -+ ret <16 x i8> %res -+} -+ -+; VLL with an out-of-range displacementa. -+define <16 x i8> @test_vll3(i8 *%base, i32 %length) { -+; CHECK-LABEL: test_vll3: -+; CHECK: vll %v24, %r3, 0({{%r[1-5]}}) -+; CHECK: br %r14 -+ %ptr = getelementptr i8 *%base, i64 4096 -+ %res = call <16 x i8> @llvm.s390.vll(i32 %length, i8 *%ptr) -+ ret <16 x i8> %res -+} -+ -+; Check that VLL doesn't allow an index. -+define <16 x i8> @test_vll4(i8 *%base, i64 %index, i32 %length) { -+; CHECK-LABEL: test_vll4: -+; CHECK: vll %v24, %r4, 0({{%r[1-5]}}) -+; CHECK: br %r14 -+ %ptr = getelementptr i8 *%base, i64 %index -+ %res = call <16 x i8> @llvm.s390.vll(i32 %length, i8 *%ptr) -+ ret <16 x i8> %res -+} -+ -+; VPDI taking element 0 from each half. -+define <2 x i64> @test_vpdi1(<2 x i64> %a, <2 x i64> %b) { -+; CHECK-LABEL: test_vpdi1: -+; CHECK: vpdi %v24, %v24, %v26, 0 -+; CHECK: br %r14 -+ %res = call <2 x i64> @llvm.s390.vpdi(<2 x i64> %a, <2 x i64> %b, i32 0) -+ ret <2 x i64> %res -+} -+ -+; VPDI taking element 1 from each half. -+define <2 x i64> @test_vpdi2(<2 x i64> %a, <2 x i64> %b) { -+; CHECK-LABEL: test_vpdi2: -+; CHECK: vpdi %v24, %v24, %v26, 10 -+; CHECK: br %r14 -+ %res = call <2 x i64> @llvm.s390.vpdi(<2 x i64> %a, <2 x i64> %b, i32 10) -+ ret <2 x i64> %res -+} -+ -+; VPERM. -+define <16 x i8> @test_vperm(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) { -+; CHECK-LABEL: test_vperm: -+; CHECK: vperm %v24, %v24, %v26, %v28 -+; CHECK: br %r14 -+ %res = call <16 x i8> @llvm.s390.vperm(<16 x i8> %a, <16 x i8> %b, -+ <16 x i8> %c) -+ ret <16 x i8> %res -+} -+ -+; VPKSH. -+define <16 x i8> @test_vpksh(<8 x i16> %a, <8 x i16> %b) { -+; CHECK-LABEL: test_vpksh: -+; CHECK: vpksh %v24, %v24, %v26 -+; CHECK: br %r14 -+ %res = call <16 x i8> @llvm.s390.vpksh(<8 x i16> %a, <8 x i16> %b) -+ ret <16 x i8> %res -+} -+ -+; VPKSF. -+define <8 x i16> @test_vpksf(<4 x i32> %a, <4 x i32> %b) { -+; CHECK-LABEL: test_vpksf: -+; CHECK: vpksf %v24, %v24, %v26 -+; CHECK: br %r14 -+ %res = call <8 x i16> @llvm.s390.vpksf(<4 x i32> %a, <4 x i32> %b) -+ ret <8 x i16> %res -+} -+ -+; VPKSG. -+define <4 x i32> @test_vpksg(<2 x i64> %a, <2 x i64> %b) { -+; CHECK-LABEL: test_vpksg: -+; CHECK: vpksg %v24, %v24, %v26 -+; CHECK: br %r14 -+ %res = call <4 x i32> @llvm.s390.vpksg(<2 x i64> %a, <2 x i64> %b) -+ ret <4 x i32> %res -+} -+ -+; VPKSHS with no processing of the result. -+define <16 x i8> @test_vpkshs(<8 x i16> %a, <8 x i16> %b, i32 *%ccptr) { -+; CHECK-LABEL: test_vpkshs: -+; CHECK: vpkshs %v24, %v24, %v26 -+; CHECK: ipm [[REG:%r[0-5]]] -+; CHECK: srl [[REG]], 28 -+; CHECK: st [[REG]], 0(%r2) -+; CHECK: br %r14 -+ %call = call {<16 x i8>, i32} @llvm.s390.vpkshs(<8 x i16> %a, <8 x i16> %b) -+ %res = extractvalue {<16 x i8>, i32} %call, 0 -+ %cc = extractvalue {<16 x i8>, i32} %call, 1 -+ store i32 %cc, i32 *%ccptr -+ ret <16 x i8> %res -+} -+ -+; VPKSHS, storing to %ptr if all values were saturated. -+define <16 x i8> @test_vpkshs_all_store(<8 x i16> %a, <8 x i16> %b, i32 *%ptr) { -+; CHECK-LABEL: test_vpkshs_all_store: -+; CHECK: vpkshs %v24, %v24, %v26 -+; CHECK-NEXT: {{jno|jle}} {{\.L*}} -+; CHECK: mvhi 0(%r2), 0 -+; CHECK: br %r14 -+ %call = call {<16 x i8>, i32} @llvm.s390.vpkshs(<8 x i16> %a, <8 x i16> %b) -+ %res = extractvalue {<16 x i8>, i32} %call, 0 -+ %cc = extractvalue {<16 x i8>, i32} %call, 1 -+ %cmp = icmp uge i32 %cc, 3 -+ br i1 %cmp, label %store, label %exit -+ -+store: -+ store i32 0, i32 *%ptr -+ br label %exit -+ -+exit: -+ ret <16 x i8> %res -+} -+ -+; VPKSFS with no processing of the result. -+define <8 x i16> @test_vpksfs(<4 x i32> %a, <4 x i32> %b, i32 *%ccptr) { -+; CHECK-LABEL: test_vpksfs: -+; CHECK: vpksfs %v24, %v24, %v26 -+; CHECK: ipm [[REG:%r[0-5]]] -+; CHECK: srl [[REG]], 28 -+; CHECK: st [[REG]], 0(%r2) -+; CHECK: br %r14 -+ %call = call {<8 x i16>, i32} @llvm.s390.vpksfs(<4 x i32> %a, <4 x i32> %b) -+ %res = extractvalue {<8 x i16>, i32} %call, 0 -+ %cc = extractvalue {<8 x i16>, i32} %call, 1 -+ store i32 %cc, i32 *%ccptr -+ ret <8 x i16> %res -+} -+ -+; VPKSFS, storing to %ptr if any values were saturated. -+define <8 x i16> @test_vpksfs_any_store(<4 x i32> %a, <4 x i32> %b, i32 *%ptr) { -+; CHECK-LABEL: test_vpksfs_any_store: -+; CHECK: vpksfs %v24, %v24, %v26 -+; CHECK-NEXT: {{jhe|je}} {{\.L*}} -+; CHECK: mvhi 0(%r2), 0 -+; CHECK: br %r14 -+ %call = call {<8 x i16>, i32} @llvm.s390.vpksfs(<4 x i32> %a, <4 x i32> %b) -+ %res = extractvalue {<8 x i16>, i32} %call, 0 -+ %cc = extractvalue {<8 x i16>, i32} %call, 1 -+ %cmp = icmp ugt i32 %cc, 0 -+ br i1 %cmp, label %store, label %exit -+ -+store: -+ store i32 0, i32 *%ptr -+ br label %exit -+ -+exit: -+ ret <8 x i16> %res -+} -+ -+; VPKSGS with no processing of the result. -+define <4 x i32> @test_vpksgs(<2 x i64> %a, <2 x i64> %b, i32 *%ccptr) { -+; CHECK-LABEL: test_vpksgs: -+; CHECK: vpksgs %v24, %v24, %v26 -+; CHECK: ipm [[REG:%r[0-5]]] -+; CHECK: srl [[REG]], 28 -+; CHECK: st [[REG]], 0(%r2) -+; CHECK: br %r14 -+ %call = call {<4 x i32>, i32} @llvm.s390.vpksgs(<2 x i64> %a, <2 x i64> %b) -+ %res = extractvalue {<4 x i32>, i32} %call, 0 -+ %cc = extractvalue {<4 x i32>, i32} %call, 1 -+ store i32 %cc, i32 *%ccptr -+ ret <4 x i32> %res -+} -+ -+; VPKSGS, storing to %ptr if no elements were saturated -+define <4 x i32> @test_vpksgs_none_store(<2 x i64> %a, <2 x i64> %b, -+ i32 *%ptr) { -+; CHECK-LABEL: test_vpksgs_none_store: -+; CHECK: vpksgs %v24, %v24, %v26 -+; CHECK-NEXT: {{jnhe|jne}} {{\.L*}} -+; CHECK: mvhi 0(%r2), 0 -+; CHECK: br %r14 -+ %call = call {<4 x i32>, i32} @llvm.s390.vpksgs(<2 x i64> %a, <2 x i64> %b) -+ %res = extractvalue {<4 x i32>, i32} %call, 0 -+ %cc = extractvalue {<4 x i32>, i32} %call, 1 -+ %cmp = icmp sle i32 %cc, 0 -+ br i1 %cmp, label %store, label %exit -+ -+store: -+ store i32 0, i32 *%ptr -+ br label %exit -+ -+exit: -+ ret <4 x i32> %res -+} -+ -+; VPKLSH. -+define <16 x i8> @test_vpklsh(<8 x i16> %a, <8 x i16> %b) { -+; CHECK-LABEL: test_vpklsh: -+; CHECK: vpklsh %v24, %v24, %v26 -+; CHECK: br %r14 -+ %res = call <16 x i8> @llvm.s390.vpklsh(<8 x i16> %a, <8 x i16> %b) -+ ret <16 x i8> %res -+} -+ -+; VPKLSF. -+define <8 x i16> @test_vpklsf(<4 x i32> %a, <4 x i32> %b) { -+; CHECK-LABEL: test_vpklsf: -+; CHECK: vpklsf %v24, %v24, %v26 -+; CHECK: br %r14 -+ %res = call <8 x i16> @llvm.s390.vpklsf(<4 x i32> %a, <4 x i32> %b) -+ ret <8 x i16> %res -+} -+ -+; VPKLSG. -+define <4 x i32> @test_vpklsg(<2 x i64> %a, <2 x i64> %b) { -+; CHECK-LABEL: test_vpklsg: -+; CHECK: vpklsg %v24, %v24, %v26 -+; CHECK: br %r14 -+ %res = call <4 x i32> @llvm.s390.vpklsg(<2 x i64> %a, <2 x i64> %b) -+ ret <4 x i32> %res -+} -+ -+; VPKLSHS with no processing of the result. -+define <16 x i8> @test_vpklshs(<8 x i16> %a, <8 x i16> %b, i32 *%ccptr) { -+; CHECK-LABEL: test_vpklshs: -+; CHECK: vpklshs %v24, %v24, %v26 -+; CHECK: ipm [[REG:%r[0-5]]] -+; CHECK: srl [[REG]], 28 -+; CHECK: st [[REG]], 0(%r2) -+; CHECK: br %r14 -+ %call = call {<16 x i8>, i32} @llvm.s390.vpklshs(<8 x i16> %a, <8 x i16> %b) -+ %res = extractvalue {<16 x i8>, i32} %call, 0 -+ %cc = extractvalue {<16 x i8>, i32} %call, 1 -+ store i32 %cc, i32 *%ccptr -+ ret <16 x i8> %res -+} -+ -+; VPKLSHS, storing to %ptr if all values were saturated. -+define <16 x i8> @test_vpklshs_all_store(<8 x i16> %a, <8 x i16> %b, -+ i32 *%ptr) { -+; CHECK-LABEL: test_vpklshs_all_store: -+; CHECK: vpklshs %v24, %v24, %v26 -+; CHECK-NEXT: {{jno|jle}} {{\.L*}} -+; CHECK: mvhi 0(%r2), 0 -+; CHECK: br %r14 -+ %call = call {<16 x i8>, i32} @llvm.s390.vpklshs(<8 x i16> %a, <8 x i16> %b) -+ %res = extractvalue {<16 x i8>, i32} %call, 0 -+ %cc = extractvalue {<16 x i8>, i32} %call, 1 -+ %cmp = icmp eq i32 %cc, 3 -+ br i1 %cmp, label %store, label %exit -+ -+store: -+ store i32 0, i32 *%ptr -+ br label %exit -+ -+exit: -+ ret <16 x i8> %res -+} -+ -+; VPKLSFS with no processing of the result. -+define <8 x i16> @test_vpklsfs(<4 x i32> %a, <4 x i32> %b, i32 *%ccptr) { -+; CHECK-LABEL: test_vpklsfs: -+; CHECK: vpklsfs %v24, %v24, %v26 -+; CHECK: ipm [[REG:%r[0-5]]] -+; CHECK: srl [[REG]], 28 -+; CHECK: st [[REG]], 0(%r2) -+; CHECK: br %r14 -+ %call = call {<8 x i16>, i32} @llvm.s390.vpklsfs(<4 x i32> %a, <4 x i32> %b) -+ %res = extractvalue {<8 x i16>, i32} %call, 0 -+ %cc = extractvalue {<8 x i16>, i32} %call, 1 -+ store i32 %cc, i32 *%ccptr -+ ret <8 x i16> %res -+} -+ -+; VPKLSFS, storing to %ptr if any values were saturated. -+define <8 x i16> @test_vpklsfs_any_store(<4 x i32> %a, <4 x i32> %b, -+ i32 *%ptr) { -+; CHECK-LABEL: test_vpklsfs_any_store: -+; CHECK: vpklsfs %v24, %v24, %v26 -+; CHECK-NEXT: {{jhe|je}} {{\.L*}} -+; CHECK: mvhi 0(%r2), 0 -+; CHECK: br %r14 -+ %call = call {<8 x i16>, i32} @llvm.s390.vpklsfs(<4 x i32> %a, <4 x i32> %b) -+ %res = extractvalue {<8 x i16>, i32} %call, 0 -+ %cc = extractvalue {<8 x i16>, i32} %call, 1 -+ %cmp = icmp ne i32 %cc, 0 -+ br i1 %cmp, label %store, label %exit -+ -+store: -+ store i32 0, i32 *%ptr -+ br label %exit -+ -+exit: -+ ret <8 x i16> %res -+} -+ -+; VPKLSGS with no processing of the result. -+define <4 x i32> @test_vpklsgs(<2 x i64> %a, <2 x i64> %b, i32 *%ccptr) { -+; CHECK-LABEL: test_vpklsgs: -+; CHECK: vpklsgs %v24, %v24, %v26 -+; CHECK: ipm [[REG:%r[0-5]]] -+; CHECK: srl [[REG]], 28 -+; CHECK: st [[REG]], 0(%r2) -+; CHECK: br %r14 -+ %call = call {<4 x i32>, i32} @llvm.s390.vpklsgs(<2 x i64> %a, <2 x i64> %b) -+ %res = extractvalue {<4 x i32>, i32} %call, 0 -+ %cc = extractvalue {<4 x i32>, i32} %call, 1 -+ store i32 %cc, i32 *%ccptr -+ ret <4 x i32> %res -+} -+ -+; VPKLSGS, storing to %ptr if no elements were saturated -+define <4 x i32> @test_vpklsgs_none_store(<2 x i64> %a, <2 x i64> %b, -+ i32 *%ptr) { -+; CHECK-LABEL: test_vpklsgs_none_store: -+; CHECK: vpklsgs %v24, %v24, %v26 -+; CHECK-NEXT: {{jnhe|jne}} {{\.L*}} -+; CHECK: mvhi 0(%r2), 0 -+; CHECK: br %r14 -+ %call = call {<4 x i32>, i32} @llvm.s390.vpklsgs(<2 x i64> %a, <2 x i64> %b) -+ %res = extractvalue {<4 x i32>, i32} %call, 0 -+ %cc = extractvalue {<4 x i32>, i32} %call, 1 -+ %cmp = icmp eq i32 %cc, 0 -+ br i1 %cmp, label %store, label %exit -+ -+store: -+ store i32 0, i32 *%ptr -+ br label %exit -+ -+exit: -+ ret <4 x i32> %res -+} -+ -+; VSTL with the lowest in-range displacement. -+define void @test_vstl1(<16 x i8> %vec, i8 *%ptr, i32 %length) { -+; CHECK-LABEL: test_vstl1: -+; CHECK: vstl %v24, %r3, 0(%r2) -+; CHECK: br %r14 -+ call void @llvm.s390.vstl(<16 x i8> %vec, i32 %length, i8 *%ptr) -+ ret void -+} -+ -+; VSTL with the highest in-range displacement. -+define void @test_vstl2(<16 x i8> %vec, i8 *%base, i32 %length) { -+; CHECK-LABEL: test_vstl2: -+; CHECK: vstl %v24, %r3, 4095(%r2) -+; CHECK: br %r14 -+ %ptr = getelementptr i8 *%base, i64 4095 -+ call void @llvm.s390.vstl(<16 x i8> %vec, i32 %length, i8 *%ptr) -+ ret void -+} -+ -+; VSTL with an out-of-range displacement. -+define void @test_vstl3(<16 x i8> %vec, i8 *%base, i32 %length) { -+; CHECK-LABEL: test_vstl3: -+; CHECK: vstl %v24, %r3, 0({{%r[1-5]}}) -+; CHECK: br %r14 -+ %ptr = getelementptr i8 *%base, i64 4096 -+ call void @llvm.s390.vstl(<16 x i8> %vec, i32 %length, i8 *%ptr) -+ ret void -+} -+ -+; Check that VSTL doesn't allow an index. -+define void @test_vstl4(<16 x i8> %vec, i8 *%base, i64 %index, i32 %length) { -+; CHECK-LABEL: test_vstl4: -+; CHECK: vstl %v24, %r4, 0({{%r[1-5]}}) -+; CHECK: br %r14 -+ %ptr = getelementptr i8 *%base, i64 %index -+ call void @llvm.s390.vstl(<16 x i8> %vec, i32 %length, i8 *%ptr) -+ ret void -+} -+ -+; VUPHB. -+define <8 x i16> @test_vuphb(<16 x i8> %a) { -+; CHECK-LABEL: test_vuphb: -+; CHECK: vuphb %v24, %v24 -+; CHECK: br %r14 -+ %res = call <8 x i16> @llvm.s390.vuphb(<16 x i8> %a) -+ ret <8 x i16> %res -+} -+ -+; VUPHH. -+define <4 x i32> @test_vuphh(<8 x i16> %a) { -+; CHECK-LABEL: test_vuphh: -+; CHECK: vuphh %v24, %v24 -+; CHECK: br %r14 -+ %res = call <4 x i32> @llvm.s390.vuphh(<8 x i16> %a) -+ ret <4 x i32> %res -+} -+ -+; VUPHF. -+define <2 x i64> @test_vuphf(<4 x i32> %a) { -+; CHECK-LABEL: test_vuphf: -+; CHECK: vuphf %v24, %v24 -+; CHECK: br %r14 -+ %res = call <2 x i64> @llvm.s390.vuphf(<4 x i32> %a) -+ ret <2 x i64> %res -+} -+ -+; VUPLHB. -+define <8 x i16> @test_vuplhb(<16 x i8> %a) { -+; CHECK-LABEL: test_vuplhb: -+; CHECK: vuplhb %v24, %v24 -+; CHECK: br %r14 -+ %res = call <8 x i16> @llvm.s390.vuplhb(<16 x i8> %a) -+ ret <8 x i16> %res -+} -+ -+; VUPLHH. -+define <4 x i32> @test_vuplhh(<8 x i16> %a) { -+; CHECK-LABEL: test_vuplhh: -+; CHECK: vuplhh %v24, %v24 -+; CHECK: br %r14 -+ %res = call <4 x i32> @llvm.s390.vuplhh(<8 x i16> %a) -+ ret <4 x i32> %res -+} -+ -+; VUPLHF. -+define <2 x i64> @test_vuplhf(<4 x i32> %a) { -+; CHECK-LABEL: test_vuplhf: -+; CHECK: vuplhf %v24, %v24 -+; CHECK: br %r14 -+ %res = call <2 x i64> @llvm.s390.vuplhf(<4 x i32> %a) -+ ret <2 x i64> %res -+} -+ -+; VUPLB. -+define <8 x i16> @test_vuplb(<16 x i8> %a) { -+; CHECK-LABEL: test_vuplb: -+; CHECK: vuplb %v24, %v24 -+; CHECK: br %r14 -+ %res = call <8 x i16> @llvm.s390.vuplb(<16 x i8> %a) -+ ret <8 x i16> %res -+} -+ -+; VUPLHW. -+define <4 x i32> @test_vuplhw(<8 x i16> %a) { -+; CHECK-LABEL: test_vuplhw: -+; CHECK: vuplhw %v24, %v24 -+; CHECK: br %r14 -+ %res = call <4 x i32> @llvm.s390.vuplhw(<8 x i16> %a) -+ ret <4 x i32> %res -+} -+ -+; VUPLF. -+define <2 x i64> @test_vuplf(<4 x i32> %a) { -+; CHECK-LABEL: test_vuplf: -+; CHECK: vuplf %v24, %v24 -+; CHECK: br %r14 -+ %res = call <2 x i64> @llvm.s390.vuplf(<4 x i32> %a) -+ ret <2 x i64> %res -+} -+ -+; VUPLLB. -+define <8 x i16> @test_vupllb(<16 x i8> %a) { -+; CHECK-LABEL: test_vupllb: -+; CHECK: vupllb %v24, %v24 -+; CHECK: br %r14 -+ %res = call <8 x i16> @llvm.s390.vupllb(<16 x i8> %a) -+ ret <8 x i16> %res -+} -+ -+; VUPLLH. -+define <4 x i32> @test_vupllh(<8 x i16> %a) { -+; CHECK-LABEL: test_vupllh: -+; CHECK: vupllh %v24, %v24 -+; CHECK: br %r14 -+ %res = call <4 x i32> @llvm.s390.vupllh(<8 x i16> %a) -+ ret <4 x i32> %res -+} -+ -+; VUPLLF. -+define <2 x i64> @test_vupllf(<4 x i32> %a) { -+; CHECK-LABEL: test_vupllf: -+; CHECK: vupllf %v24, %v24 -+; CHECK: br %r14 -+ %res = call <2 x i64> @llvm.s390.vupllf(<4 x i32> %a) -+ ret <2 x i64> %res -+} -+ -+; VACCB. -+define <16 x i8> @test_vaccb(<16 x i8> %a, <16 x i8> %b) { -+; CHECK-LABEL: test_vaccb: -+; CHECK: vaccb %v24, %v24, %v26 -+; CHECK: br %r14 -+ %res = call <16 x i8> @llvm.s390.vaccb(<16 x i8> %a, <16 x i8> %b) -+ ret <16 x i8> %res -+} -+ -+; VACCH. -+define <8 x i16> @test_vacch(<8 x i16> %a, <8 x i16> %b) { -+; CHECK-LABEL: test_vacch: -+; CHECK: vacch %v24, %v24, %v26 -+; CHECK: br %r14 -+ %res = call <8 x i16> @llvm.s390.vacch(<8 x i16> %a, <8 x i16> %b) -+ ret <8 x i16> %res -+} -+ -+; VACCF. -+define <4 x i32> @test_vaccf(<4 x i32> %a, <4 x i32> %b) { -+; CHECK-LABEL: test_vaccf: -+; CHECK: vaccf %v24, %v24, %v26 -+; CHECK: br %r14 -+ %res = call <4 x i32> @llvm.s390.vaccf(<4 x i32> %a, <4 x i32> %b) -+ ret <4 x i32> %res -+} -+ -+; VACCG. -+define <2 x i64> @test_vaccg(<2 x i64> %a, <2 x i64> %b) { -+; CHECK-LABEL: test_vaccg: -+; CHECK: vaccg %v24, %v24, %v26 -+; CHECK: br %r14 -+ %res = call <2 x i64> @llvm.s390.vaccg(<2 x i64> %a, <2 x i64> %b) -+ ret <2 x i64> %res -+} -+ -+; VAQ. -+define <16 x i8> @test_vaq(<16 x i8> %a, <16 x i8> %b) { -+; CHECK-LABEL: test_vaq: -+; CHECK: vaq %v24, %v24, %v26 -+; CHECK: br %r14 -+ %res = call <16 x i8> @llvm.s390.vaq(<16 x i8> %a, <16 x i8> %b) -+ ret <16 x i8> %res -+} -+ -+; VACQ. -+define <16 x i8> @test_vacq(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) { -+; CHECK-LABEL: test_vacq: -+; CHECK: vacq %v24, %v24, %v26, %v28 -+; CHECK: br %r14 -+ %res = call <16 x i8> @llvm.s390.vacq(<16 x i8> %a, <16 x i8> %b, -+ <16 x i8> %c) -+ ret <16 x i8> %res -+} -+ -+; VACCQ. -+define <16 x i8> @test_vaccq(<16 x i8> %a, <16 x i8> %b) { -+; CHECK-LABEL: test_vaccq: -+; CHECK: vaccq %v24, %v24, %v26 -+; CHECK: br %r14 -+ %res = call <16 x i8> @llvm.s390.vaccq(<16 x i8> %a, <16 x i8> %b) -+ ret <16 x i8> %res -+} -+ -+; VACCCQ. -+define <16 x i8> @test_vacccq(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) { -+; CHECK-LABEL: test_vacccq: -+; CHECK: vacccq %v24, %v24, %v26, %v28 -+; CHECK: br %r14 -+ %res = call <16 x i8> @llvm.s390.vacccq(<16 x i8> %a, <16 x i8> %b, -+ <16 x i8> %c) -+ ret <16 x i8> %res -+} -+ -+; VAVGB. -+define <16 x i8> @test_vavgb(<16 x i8> %a, <16 x i8> %b) { -+; CHECK-LABEL: test_vavgb: -+; CHECK: vavgb %v24, %v24, %v26 -+; CHECK: br %r14 -+ %res = call <16 x i8> @llvm.s390.vavgb(<16 x i8> %a, <16 x i8> %b) -+ ret <16 x i8> %res -+} -+ -+; VAVGH. -+define <8 x i16> @test_vavgh(<8 x i16> %a, <8 x i16> %b) { -+; CHECK-LABEL: test_vavgh: -+; CHECK: vavgh %v24, %v24, %v26 -+; CHECK: br %r14 -+ %res = call <8 x i16> @llvm.s390.vavgh(<8 x i16> %a, <8 x i16> %b) -+ ret <8 x i16> %res -+} -+ -+; VAVGF. -+define <4 x i32> @test_vavgf(<4 x i32> %a, <4 x i32> %b) { -+; CHECK-LABEL: test_vavgf: -+; CHECK: vavgf %v24, %v24, %v26 -+; CHECK: br %r14 -+ %res = call <4 x i32> @llvm.s390.vavgf(<4 x i32> %a, <4 x i32> %b) -+ ret <4 x i32> %res -+} -+ -+; VAVGG. -+define <2 x i64> @test_vavgg(<2 x i64> %a, <2 x i64> %b) { -+; CHECK-LABEL: test_vavgg: -+; CHECK: vavgg %v24, %v24, %v26 -+; CHECK: br %r14 -+ %res = call <2 x i64> @llvm.s390.vavgg(<2 x i64> %a, <2 x i64> %b) -+ ret <2 x i64> %res -+} -+ -+; VAVGLB. -+define <16 x i8> @test_vavglb(<16 x i8> %a, <16 x i8> %b) { -+; CHECK-LABEL: test_vavglb: -+; CHECK: vavglb %v24, %v24, %v26 -+; CHECK: br %r14 -+ %res = call <16 x i8> @llvm.s390.vavglb(<16 x i8> %a, <16 x i8> %b) -+ ret <16 x i8> %res -+} -+ -+; VAVGLH. -+define <8 x i16> @test_vavglh(<8 x i16> %a, <8 x i16> %b) { -+; CHECK-LABEL: test_vavglh: -+; CHECK: vavglh %v24, %v24, %v26 -+; CHECK: br %r14 -+ %res = call <8 x i16> @llvm.s390.vavglh(<8 x i16> %a, <8 x i16> %b) -+ ret <8 x i16> %res -+} -+ -+; VAVGLF. -+define <4 x i32> @test_vavglf(<4 x i32> %a, <4 x i32> %b) { -+; CHECK-LABEL: test_vavglf: -+; CHECK: vavglf %v24, %v24, %v26 -+; CHECK: br %r14 -+ %res = call <4 x i32> @llvm.s390.vavglf(<4 x i32> %a, <4 x i32> %b) -+ ret <4 x i32> %res -+} -+ -+; VAVGLG. -+define <2 x i64> @test_vavglg(<2 x i64> %a, <2 x i64> %b) { -+; CHECK-LABEL: test_vavglg: -+; CHECK: vavglg %v24, %v24, %v26 -+; CHECK: br %r14 -+ %res = call <2 x i64> @llvm.s390.vavglg(<2 x i64> %a, <2 x i64> %b) -+ ret <2 x i64> %res -+} -+ -+; VCKSM. -+define <4 x i32> @test_vcksm(<4 x i32> %a, <4 x i32> %b) { -+; CHECK-LABEL: test_vcksm: -+; CHECK: vcksm %v24, %v24, %v26 -+; CHECK: br %r14 -+ %res = call <4 x i32> @llvm.s390.vcksm(<4 x i32> %a, <4 x i32> %b) -+ ret <4 x i32> %res -+} -+ -+; VGFMB. -+define <8 x i16> @test_vgfmb(<16 x i8> %a, <16 x i8> %b) { -+; CHECK-LABEL: test_vgfmb: -+; CHECK: vgfmb %v24, %v24, %v26 -+; CHECK: br %r14 -+ %res = call <8 x i16> @llvm.s390.vgfmb(<16 x i8> %a, <16 x i8> %b) -+ ret <8 x i16> %res -+} -+ -+; VGFMH. -+define <4 x i32> @test_vgfmh(<8 x i16> %a, <8 x i16> %b) { -+; CHECK-LABEL: test_vgfmh: -+; CHECK: vgfmh %v24, %v24, %v26 -+; CHECK: br %r14 -+ %res = call <4 x i32> @llvm.s390.vgfmh(<8 x i16> %a, <8 x i16> %b) -+ ret <4 x i32> %res -+} -+ -+; VGFMF. -+define <2 x i64> @test_vgfmf(<4 x i32> %a, <4 x i32> %b) { -+; CHECK-LABEL: test_vgfmf: -+; CHECK: vgfmf %v24, %v24, %v26 -+; CHECK: br %r14 -+ %res = call <2 x i64> @llvm.s390.vgfmf(<4 x i32> %a, <4 x i32> %b) -+ ret <2 x i64> %res -+} -+ -+; VGFMG. -+define <16 x i8> @test_vgfmg(<2 x i64> %a, <2 x i64> %b) { -+; CHECK-LABEL: test_vgfmg: -+; CHECK: vgfmg %v24, %v24, %v26 -+; CHECK: br %r14 -+ %res = call <16 x i8> @llvm.s390.vgfmg(<2 x i64> %a, <2 x i64> %b) -+ ret <16 x i8> %res -+} -+ -+; VGFMAB. -+define <8 x i16> @test_vgfmab(<16 x i8> %a, <16 x i8> %b, <8 x i16> %c) { -+; CHECK-LABEL: test_vgfmab: -+; CHECK: vgfmab %v24, %v24, %v26, %v28 -+; CHECK: br %r14 -+ %res = call <8 x i16> @llvm.s390.vgfmab(<16 x i8> %a, <16 x i8> %b, -+ <8 x i16> %c) -+ ret <8 x i16> %res -+} -+ -+; VGFMAH. -+define <4 x i32> @test_vgfmah(<8 x i16> %a, <8 x i16> %b, <4 x i32> %c) { -+; CHECK-LABEL: test_vgfmah: -+; CHECK: vgfmah %v24, %v24, %v26, %v28 -+; CHECK: br %r14 -+ %res = call <4 x i32> @llvm.s390.vgfmah(<8 x i16> %a, <8 x i16> %b, -+ <4 x i32> %c) -+ ret <4 x i32> %res -+} -+ -+; VGFMAF. -+define <2 x i64> @test_vgfmaf(<4 x i32> %a, <4 x i32> %b, <2 x i64> %c) { -+; CHECK-LABEL: test_vgfmaf: -+; CHECK: vgfmaf %v24, %v24, %v26, %v28 -+; CHECK: br %r14 -+ %res = call <2 x i64> @llvm.s390.vgfmaf(<4 x i32> %a, <4 x i32> %b, -+ <2 x i64> %c) -+ ret <2 x i64> %res -+} -+ -+; VGFMAG. -+define <16 x i8> @test_vgfmag(<2 x i64> %a, <2 x i64> %b, <16 x i8> %c) { -+; CHECK-LABEL: test_vgfmag: -+; CHECK: vgfmag %v24, %v24, %v26, %v28 -+; CHECK: br %r14 -+ %res = call <16 x i8> @llvm.s390.vgfmag(<2 x i64> %a, <2 x i64> %b, -+ <16 x i8> %c) -+ ret <16 x i8> %res -+} -+ -+; VMAHB. -+define <16 x i8> @test_vmahb(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) { -+; CHECK-LABEL: test_vmahb: -+; CHECK: vmahb %v24, %v24, %v26, %v28 -+; CHECK: br %r14 -+ %res = call <16 x i8> @llvm.s390.vmahb(<16 x i8> %a, <16 x i8> %b, -+ <16 x i8> %c) -+ ret <16 x i8> %res -+} -+ -+; VMAHH. -+define <8 x i16> @test_vmahh(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) { -+; CHECK-LABEL: test_vmahh: -+; CHECK: vmahh %v24, %v24, %v26, %v28 -+; CHECK: br %r14 -+ %res = call <8 x i16> @llvm.s390.vmahh(<8 x i16> %a, <8 x i16> %b, -+ <8 x i16> %c) -+ ret <8 x i16> %res -+} -+ -+; VMAHF. -+define <4 x i32> @test_vmahf(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { -+; CHECK-LABEL: test_vmahf: -+; CHECK: vmahf %v24, %v24, %v26, %v28 -+; CHECK: br %r14 -+ %res = call <4 x i32> @llvm.s390.vmahf(<4 x i32> %a, <4 x i32> %b, -+ <4 x i32> %c) -+ ret <4 x i32> %res -+} -+ -+; VMALHB. -+define <16 x i8> @test_vmalhb(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) { -+; CHECK-LABEL: test_vmalhb: -+; CHECK: vmalhb %v24, %v24, %v26, %v28 -+; CHECK: br %r14 -+ %res = call <16 x i8> @llvm.s390.vmalhb(<16 x i8> %a, <16 x i8> %b, -+ <16 x i8> %c) -+ ret <16 x i8> %res -+} -+ -+; VMALHH. -+define <8 x i16> @test_vmalhh(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) { -+; CHECK-LABEL: test_vmalhh: -+; CHECK: vmalhh %v24, %v24, %v26, %v28 -+; CHECK: br %r14 -+ %res = call <8 x i16> @llvm.s390.vmalhh(<8 x i16> %a, <8 x i16> %b, -+ <8 x i16> %c) -+ ret <8 x i16> %res -+} -+ -+; VMALHF. -+define <4 x i32> @test_vmalhf(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { -+; CHECK-LABEL: test_vmalhf: -+; CHECK: vmalhf %v24, %v24, %v26, %v28 -+; CHECK: br %r14 -+ %res = call <4 x i32> @llvm.s390.vmalhf(<4 x i32> %a, <4 x i32> %b, -+ <4 x i32> %c) -+ ret <4 x i32> %res -+} -+ -+; VMAEB. -+define <8 x i16> @test_vmaeb(<16 x i8> %a, <16 x i8> %b, <8 x i16> %c) { -+; CHECK-LABEL: test_vmaeb: -+; CHECK: vmaeb %v24, %v24, %v26, %v28 -+; CHECK: br %r14 -+ %res = call <8 x i16> @llvm.s390.vmaeb(<16 x i8> %a, <16 x i8> %b, -+ <8 x i16> %c) -+ ret <8 x i16> %res -+} -+ -+; VMAEH. -+define <4 x i32> @test_vmaeh(<8 x i16> %a, <8 x i16> %b, <4 x i32> %c) { -+; CHECK-LABEL: test_vmaeh: -+; CHECK: vmaeh %v24, %v24, %v26, %v28 -+; CHECK: br %r14 -+ %res = call <4 x i32> @llvm.s390.vmaeh(<8 x i16> %a, <8 x i16> %b, -+ <4 x i32> %c) -+ ret <4 x i32> %res -+} -+ -+; VMAEF. -+define <2 x i64> @test_vmaef(<4 x i32> %a, <4 x i32> %b, <2 x i64> %c) { -+; CHECK-LABEL: test_vmaef: -+; CHECK: vmaef %v24, %v24, %v26, %v28 -+; CHECK: br %r14 -+ %res = call <2 x i64> @llvm.s390.vmaef(<4 x i32> %a, <4 x i32> %b, -+ <2 x i64> %c) -+ ret <2 x i64> %res -+} -+ -+; VMALEB. -+define <8 x i16> @test_vmaleb(<16 x i8> %a, <16 x i8> %b, <8 x i16> %c) { -+; CHECK-LABEL: test_vmaleb: -+; CHECK: vmaleb %v24, %v24, %v26, %v28 -+; CHECK: br %r14 -+ %res = call <8 x i16> @llvm.s390.vmaleb(<16 x i8> %a, <16 x i8> %b, -+ <8 x i16> %c) -+ ret <8 x i16> %res -+} -+ -+; VMALEH. -+define <4 x i32> @test_vmaleh(<8 x i16> %a, <8 x i16> %b, <4 x i32> %c) { -+; CHECK-LABEL: test_vmaleh: -+; CHECK: vmaleh %v24, %v24, %v26, %v28 -+; CHECK: br %r14 -+ %res = call <4 x i32> @llvm.s390.vmaleh(<8 x i16> %a, <8 x i16> %b, -+ <4 x i32> %c) -+ ret <4 x i32> %res -+} -+ -+; VMALEF. -+define <2 x i64> @test_vmalef(<4 x i32> %a, <4 x i32> %b, <2 x i64> %c) { -+; CHECK-LABEL: test_vmalef: -+; CHECK: vmalef %v24, %v24, %v26, %v28 -+; CHECK: br %r14 -+ %res = call <2 x i64> @llvm.s390.vmalef(<4 x i32> %a, <4 x i32> %b, -+ <2 x i64> %c) -+ ret <2 x i64> %res -+} -+ -+; VMAOB. -+define <8 x i16> @test_vmaob(<16 x i8> %a, <16 x i8> %b, <8 x i16> %c) { -+; CHECK-LABEL: test_vmaob: -+; CHECK: vmaob %v24, %v24, %v26, %v28 -+; CHECK: br %r14 -+ %res = call <8 x i16> @llvm.s390.vmaob(<16 x i8> %a, <16 x i8> %b, -+ <8 x i16> %c) -+ ret <8 x i16> %res -+} -+ -+; VMAOH. -+define <4 x i32> @test_vmaoh(<8 x i16> %a, <8 x i16> %b, <4 x i32> %c) { -+; CHECK-LABEL: test_vmaoh: -+; CHECK: vmaoh %v24, %v24, %v26, %v28 -+; CHECK: br %r14 -+ %res = call <4 x i32> @llvm.s390.vmaoh(<8 x i16> %a, <8 x i16> %b, -+ <4 x i32> %c) -+ ret <4 x i32> %res -+} -+ -+; VMAOF. -+define <2 x i64> @test_vmaof(<4 x i32> %a, <4 x i32> %b, <2 x i64> %c) { -+; CHECK-LABEL: test_vmaof: -+; CHECK: vmaof %v24, %v24, %v26, %v28 -+; CHECK: br %r14 -+ %res = call <2 x i64> @llvm.s390.vmaof(<4 x i32> %a, <4 x i32> %b, -+ <2 x i64> %c) -+ ret <2 x i64> %res -+} -+ -+; VMALOB. -+define <8 x i16> @test_vmalob(<16 x i8> %a, <16 x i8> %b, <8 x i16> %c) { -+; CHECK-LABEL: test_vmalob: -+; CHECK: vmalob %v24, %v24, %v26, %v28 -+; CHECK: br %r14 -+ %res = call <8 x i16> @llvm.s390.vmalob(<16 x i8> %a, <16 x i8> %b, -+ <8 x i16> %c) -+ ret <8 x i16> %res -+} -+ -+; VMALOH. -+define <4 x i32> @test_vmaloh(<8 x i16> %a, <8 x i16> %b, <4 x i32> %c) { -+; CHECK-LABEL: test_vmaloh: -+; CHECK: vmaloh %v24, %v24, %v26, %v28 -+; CHECK: br %r14 -+ %res = call <4 x i32> @llvm.s390.vmaloh(<8 x i16> %a, <8 x i16> %b, -+ <4 x i32> %c) -+ ret <4 x i32> %res -+} -+ -+; VMALOF. -+define <2 x i64> @test_vmalof(<4 x i32> %a, <4 x i32> %b, <2 x i64> %c) { -+; CHECK-LABEL: test_vmalof: -+; CHECK: vmalof %v24, %v24, %v26, %v28 -+; CHECK: br %r14 -+ %res = call <2 x i64> @llvm.s390.vmalof(<4 x i32> %a, <4 x i32> %b, -+ <2 x i64> %c) -+ ret <2 x i64> %res -+} -+ -+; VMHB. -+define <16 x i8> @test_vmhb(<16 x i8> %a, <16 x i8> %b) { -+; CHECK-LABEL: test_vmhb: -+; CHECK: vmhb %v24, %v24, %v26 -+; CHECK: br %r14 -+ %res = call <16 x i8> @llvm.s390.vmhb(<16 x i8> %a, <16 x i8> %b) -+ ret <16 x i8> %res -+} -+ -+; VMHH. -+define <8 x i16> @test_vmhh(<8 x i16> %a, <8 x i16> %b) { -+; CHECK-LABEL: test_vmhh: -+; CHECK: vmhh %v24, %v24, %v26 -+; CHECK: br %r14 -+ %res = call <8 x i16> @llvm.s390.vmhh(<8 x i16> %a, <8 x i16> %b) -+ ret <8 x i16> %res -+} -+ -+; VMHF. -+define <4 x i32> @test_vmhf(<4 x i32> %a, <4 x i32> %b) { -+; CHECK-LABEL: test_vmhf: -+; CHECK: vmhf %v24, %v24, %v26 -+; CHECK: br %r14 -+ %res = call <4 x i32> @llvm.s390.vmhf(<4 x i32> %a, <4 x i32> %b) -+ ret <4 x i32> %res -+} -+ -+; VMLHB. -+define <16 x i8> @test_vmlhb(<16 x i8> %a, <16 x i8> %b) { -+; CHECK-LABEL: test_vmlhb: -+; CHECK: vmlhb %v24, %v24, %v26 -+; CHECK: br %r14 -+ %res = call <16 x i8> @llvm.s390.vmlhb(<16 x i8> %a, <16 x i8> %b) -+ ret <16 x i8> %res -+} -+ -+; VMLHH. -+define <8 x i16> @test_vmlhh(<8 x i16> %a, <8 x i16> %b) { -+; CHECK-LABEL: test_vmlhh: -+; CHECK: vmlhh %v24, %v24, %v26 -+; CHECK: br %r14 -+ %res = call <8 x i16> @llvm.s390.vmlhh(<8 x i16> %a, <8 x i16> %b) -+ ret <8 x i16> %res -+} -+ -+; VMLHF. -+define <4 x i32> @test_vmlhf(<4 x i32> %a, <4 x i32> %b) { -+; CHECK-LABEL: test_vmlhf: -+; CHECK: vmlhf %v24, %v24, %v26 -+; CHECK: br %r14 -+ %res = call <4 x i32> @llvm.s390.vmlhf(<4 x i32> %a, <4 x i32> %b) -+ ret <4 x i32> %res -+} -+ -+; VMEB. -+define <8 x i16> @test_vmeb(<16 x i8> %a, <16 x i8> %b) { -+; CHECK-LABEL: test_vmeb: -+; CHECK: vmeb %v24, %v24, %v26 -+; CHECK: br %r14 -+ %res = call <8 x i16> @llvm.s390.vmeb(<16 x i8> %a, <16 x i8> %b) -+ ret <8 x i16> %res -+} -+ -+; VMEH. -+define <4 x i32> @test_vmeh(<8 x i16> %a, <8 x i16> %b) { -+; CHECK-LABEL: test_vmeh: -+; CHECK: vmeh %v24, %v24, %v26 -+; CHECK: br %r14 -+ %res = call <4 x i32> @llvm.s390.vmeh(<8 x i16> %a, <8 x i16> %b) -+ ret <4 x i32> %res -+} -+ -+; VMEF. -+define <2 x i64> @test_vmef(<4 x i32> %a, <4 x i32> %b) { -+; CHECK-LABEL: test_vmef: -+; CHECK: vmef %v24, %v24, %v26 -+; CHECK: br %r14 -+ %res = call <2 x i64> @llvm.s390.vmef(<4 x i32> %a, <4 x i32> %b) -+ ret <2 x i64> %res -+} -+ -+; VMLEB. -+define <8 x i16> @test_vmleb(<16 x i8> %a, <16 x i8> %b) { -+; CHECK-LABEL: test_vmleb: -+; CHECK: vmleb %v24, %v24, %v26 -+; CHECK: br %r14 -+ %res = call <8 x i16> @llvm.s390.vmleb(<16 x i8> %a, <16 x i8> %b) -+ ret <8 x i16> %res -+} -+ -+; VMLEH. -+define <4 x i32> @test_vmleh(<8 x i16> %a, <8 x i16> %b) { -+; CHECK-LABEL: test_vmleh: -+; CHECK: vmleh %v24, %v24, %v26 -+; CHECK: br %r14 -+ %res = call <4 x i32> @llvm.s390.vmleh(<8 x i16> %a, <8 x i16> %b) -+ ret <4 x i32> %res -+} -+ -+; VMLEF. -+define <2 x i64> @test_vmlef(<4 x i32> %a, <4 x i32> %b) { -+; CHECK-LABEL: test_vmlef: -+; CHECK: vmlef %v24, %v24, %v26 -+; CHECK: br %r14 -+ %res = call <2 x i64> @llvm.s390.vmlef(<4 x i32> %a, <4 x i32> %b) -+ ret <2 x i64> %res -+} -+ -+; VMOB. -+define <8 x i16> @test_vmob(<16 x i8> %a, <16 x i8> %b) { -+; CHECK-LABEL: test_vmob: -+; CHECK: vmob %v24, %v24, %v26 -+; CHECK: br %r14 -+ %res = call <8 x i16> @llvm.s390.vmob(<16 x i8> %a, <16 x i8> %b) -+ ret <8 x i16> %res -+} -+ -+; VMOH. -+define <4 x i32> @test_vmoh(<8 x i16> %a, <8 x i16> %b) { -+; CHECK-LABEL: test_vmoh: -+; CHECK: vmoh %v24, %v24, %v26 -+; CHECK: br %r14 -+ %res = call <4 x i32> @llvm.s390.vmoh(<8 x i16> %a, <8 x i16> %b) -+ ret <4 x i32> %res -+} -+ -+; VMOF. -+define <2 x i64> @test_vmof(<4 x i32> %a, <4 x i32> %b) { -+; CHECK-LABEL: test_vmof: -+; CHECK: vmof %v24, %v24, %v26 -+; CHECK: br %r14 -+ %res = call <2 x i64> @llvm.s390.vmof(<4 x i32> %a, <4 x i32> %b) -+ ret <2 x i64> %res -+} -+ -+; VMLOB. -+define <8 x i16> @test_vmlob(<16 x i8> %a, <16 x i8> %b) { -+; CHECK-LABEL: test_vmlob: -+; CHECK: vmlob %v24, %v24, %v26 -+; CHECK: br %r14 -+ %res = call <8 x i16> @llvm.s390.vmlob(<16 x i8> %a, <16 x i8> %b) -+ ret <8 x i16> %res -+} -+ -+; VMLOH. -+define <4 x i32> @test_vmloh(<8 x i16> %a, <8 x i16> %b) { -+; CHECK-LABEL: test_vmloh: -+; CHECK: vmloh %v24, %v24, %v26 -+; CHECK: br %r14 -+ %res = call <4 x i32> @llvm.s390.vmloh(<8 x i16> %a, <8 x i16> %b) -+ ret <4 x i32> %res -+} -+ -+; VMLOF. -+define <2 x i64> @test_vmlof(<4 x i32> %a, <4 x i32> %b) { -+; CHECK-LABEL: test_vmlof: -+; CHECK: vmlof %v24, %v24, %v26 -+; CHECK: br %r14 -+ %res = call <2 x i64> @llvm.s390.vmlof(<4 x i32> %a, <4 x i32> %b) -+ ret <2 x i64> %res -+} -+ -+; VERLLVB. -+define <16 x i8> @test_verllvb(<16 x i8> %a, <16 x i8> %b) { -+; CHECK-LABEL: test_verllvb: -+; CHECK: verllvb %v24, %v24, %v26 -+; CHECK: br %r14 -+ %res = call <16 x i8> @llvm.s390.verllvb(<16 x i8> %a, <16 x i8> %b) -+ ret <16 x i8> %res -+} -+ -+; VERLLVH. -+define <8 x i16> @test_verllvh(<8 x i16> %a, <8 x i16> %b) { -+; CHECK-LABEL: test_verllvh: -+; CHECK: verllvh %v24, %v24, %v26 -+; CHECK: br %r14 -+ %res = call <8 x i16> @llvm.s390.verllvh(<8 x i16> %a, <8 x i16> %b) -+ ret <8 x i16> %res -+} -+ -+; VERLLVF. -+define <4 x i32> @test_verllvf(<4 x i32> %a, <4 x i32> %b) { -+; CHECK-LABEL: test_verllvf: -+; CHECK: verllvf %v24, %v24, %v26 -+; CHECK: br %r14 -+ %res = call <4 x i32> @llvm.s390.verllvf(<4 x i32> %a, <4 x i32> %b) -+ ret <4 x i32> %res -+} -+ -+; VERLLVG. -+define <2 x i64> @test_verllvg(<2 x i64> %a, <2 x i64> %b) { -+; CHECK-LABEL: test_verllvg: -+; CHECK: verllvg %v24, %v24, %v26 -+; CHECK: br %r14 -+ %res = call <2 x i64> @llvm.s390.verllvg(<2 x i64> %a, <2 x i64> %b) -+ ret <2 x i64> %res -+} -+ -+; VERLLB. -+define <16 x i8> @test_verllb(<16 x i8> %a, i32 %b) { -+; CHECK-LABEL: test_verllb: -+; CHECK: verllb %v24, %v24, 0(%r2) -+; CHECK: br %r14 -+ %res = call <16 x i8> @llvm.s390.verllb(<16 x i8> %a, i32 %b) -+ ret <16 x i8> %res -+} -+ -+; VERLLH. -+define <8 x i16> @test_verllh(<8 x i16> %a, i32 %b) { -+; CHECK-LABEL: test_verllh: -+; CHECK: verllh %v24, %v24, 0(%r2) -+; CHECK: br %r14 -+ %res = call <8 x i16> @llvm.s390.verllh(<8 x i16> %a, i32 %b) -+ ret <8 x i16> %res -+} -+ -+; VERLLF. -+define <4 x i32> @test_verllf(<4 x i32> %a, i32 %b) { -+; CHECK-LABEL: test_verllf: -+; CHECK: verllf %v24, %v24, 0(%r2) -+; CHECK: br %r14 -+ %res = call <4 x i32> @llvm.s390.verllf(<4 x i32> %a, i32 %b) -+ ret <4 x i32> %res -+} -+ -+; VERLLG. -+define <2 x i64> @test_verllg(<2 x i64> %a, i32 %b) { -+; CHECK-LABEL: test_verllg: -+; CHECK: verllg %v24, %v24, 0(%r2) -+; CHECK: br %r14 -+ %res = call <2 x i64> @llvm.s390.verllg(<2 x i64> %a, i32 %b) -+ ret <2 x i64> %res -+} -+ -+; VERLLB with the smallest count. -+define <16 x i8> @test_verllb_1(<16 x i8> %a) { -+; CHECK-LABEL: test_verllb_1: -+; CHECK: verllb %v24, %v24, 1 -+; CHECK: br %r14 -+ %res = call <16 x i8> @llvm.s390.verllb(<16 x i8> %a, i32 1) -+ ret <16 x i8> %res -+} -+ -+; VERLLB with the largest count. -+define <16 x i8> @test_verllb_4095(<16 x i8> %a) { -+; CHECK-LABEL: test_verllb_4095: -+; CHECK: verllb %v24, %v24, 4095 -+; CHECK: br %r14 -+ %res = call <16 x i8> @llvm.s390.verllb(<16 x i8> %a, i32 4095) -+ ret <16 x i8> %res -+} -+ -+; VERLLB with the largest count + 1. -+define <16 x i8> @test_verllb_4096(<16 x i8> %a) { -+; CHECK-LABEL: test_verllb_4096: -+; CHECK: lhi [[REG:%r[1-5]]], 4096 -+; CHECK: verllb %v24, %v24, 0([[REG]]) -+; CHECK: br %r14 -+ %res = call <16 x i8> @llvm.s390.verllb(<16 x i8> %a, i32 4096) -+ ret <16 x i8> %res -+} -+ -+; VERIMB. -+define <16 x i8> @test_verimb(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) { -+; CHECK-LABEL: test_verimb: -+; CHECK: verimb %v24, %v26, %v28, 1 -+; CHECK: br %r14 -+ %res = call <16 x i8> @llvm.s390.verimb(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, i32 1) -+ ret <16 x i8> %res -+} -+ -+; VERIMH. -+define <8 x i16> @test_verimh(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) { -+; CHECK-LABEL: test_verimh: -+; CHECK: verimh %v24, %v26, %v28, 1 -+; CHECK: br %r14 -+ %res = call <8 x i16> @llvm.s390.verimh(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c, i32 1) -+ ret <8 x i16> %res -+} -+ -+; VERIMF. -+define <4 x i32> @test_verimf(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { -+; CHECK-LABEL: test_verimf: -+; CHECK: verimf %v24, %v26, %v28, 1 -+; CHECK: br %r14 -+ %res = call <4 x i32> @llvm.s390.verimf(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, i32 1) -+ ret <4 x i32> %res -+} -+ -+; VERIMG. -+define <2 x i64> @test_verimg(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) { -+; CHECK-LABEL: test_verimg: -+; CHECK: verimg %v24, %v26, %v28, 1 -+; CHECK: br %r14 -+ %res = call <2 x i64> @llvm.s390.verimg(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c, i32 1) -+ ret <2 x i64> %res -+} -+ -+; VERIMB with a different mask. -+define <16 x i8> @test_verimb_254(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) { -+; CHECK-LABEL: test_verimb_254: -+; CHECK: verimb %v24, %v26, %v28, 254 -+; CHECK: br %r14 -+ %res = call <16 x i8> @llvm.s390.verimb(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, i32 254) -+ ret <16 x i8> %res -+} -+ -+; VSL. -+define <16 x i8> @test_vsl(<16 x i8> %a, <16 x i8> %b) { -+; CHECK-LABEL: test_vsl: -+; CHECK: vsl %v24, %v24, %v26 -+; CHECK: br %r14 -+ %res = call <16 x i8> @llvm.s390.vsl(<16 x i8> %a, <16 x i8> %b) -+ ret <16 x i8> %res -+} -+ -+; VSLB. -+define <16 x i8> @test_vslb(<16 x i8> %a, <16 x i8> %b) { -+; CHECK-LABEL: test_vslb: -+; CHECK: vslb %v24, %v24, %v26 -+; CHECK: br %r14 -+ %res = call <16 x i8> @llvm.s390.vslb(<16 x i8> %a, <16 x i8> %b) -+ ret <16 x i8> %res -+} -+ -+; VSRA. -+define <16 x i8> @test_vsra(<16 x i8> %a, <16 x i8> %b) { -+; CHECK-LABEL: test_vsra: -+; CHECK: vsra %v24, %v24, %v26 -+; CHECK: br %r14 -+ %res = call <16 x i8> @llvm.s390.vsra(<16 x i8> %a, <16 x i8> %b) -+ ret <16 x i8> %res -+} -+ -+; VSRAB. -+define <16 x i8> @test_vsrab(<16 x i8> %a, <16 x i8> %b) { -+; CHECK-LABEL: test_vsrab: -+; CHECK: vsrab %v24, %v24, %v26 -+; CHECK: br %r14 -+ %res = call <16 x i8> @llvm.s390.vsrab(<16 x i8> %a, <16 x i8> %b) -+ ret <16 x i8> %res -+} -+ -+; VSRL. -+define <16 x i8> @test_vsrl(<16 x i8> %a, <16 x i8> %b) { -+; CHECK-LABEL: test_vsrl: -+; CHECK: vsrl %v24, %v24, %v26 -+; CHECK: br %r14 -+ %res = call <16 x i8> @llvm.s390.vsrl(<16 x i8> %a, <16 x i8> %b) -+ ret <16 x i8> %res -+} -+ -+; VSRLB. -+define <16 x i8> @test_vsrlb(<16 x i8> %a, <16 x i8> %b) { -+; CHECK-LABEL: test_vsrlb: -+; CHECK: vsrlb %v24, %v24, %v26 -+; CHECK: br %r14 -+ %res = call <16 x i8> @llvm.s390.vsrlb(<16 x i8> %a, <16 x i8> %b) -+ ret <16 x i8> %res -+} -+ -+; VSLDB with the minimum useful value. -+define <16 x i8> @test_vsldb_1(<16 x i8> %a, <16 x i8> %b) { -+; CHECK-LABEL: test_vsldb_1: -+; CHECK: vsldb %v24, %v24, %v26, 1 -+; CHECK: br %r14 -+ %res = call <16 x i8> @llvm.s390.vsldb(<16 x i8> %a, <16 x i8> %b, i32 1) -+ ret <16 x i8> %res -+} -+ -+; VSLDB with the maximum value. -+define <16 x i8> @test_vsldb_15(<16 x i8> %a, <16 x i8> %b) { -+; CHECK-LABEL: test_vsldb_15: -+; CHECK: vsldb %v24, %v24, %v26, 15 -+; CHECK: br %r14 -+ %res = call <16 x i8> @llvm.s390.vsldb(<16 x i8> %a, <16 x i8> %b, i32 15) -+ ret <16 x i8> %res -+} -+ -+; VSCBIB. -+define <16 x i8> @test_vscbib(<16 x i8> %a, <16 x i8> %b) { -+; CHECK-LABEL: test_vscbib: -+; CHECK: vscbib %v24, %v24, %v26 -+; CHECK: br %r14 -+ %res = call <16 x i8> @llvm.s390.vscbib(<16 x i8> %a, <16 x i8> %b) -+ ret <16 x i8> %res -+} -+ -+; VSCBIH. -+define <8 x i16> @test_vscbih(<8 x i16> %a, <8 x i16> %b) { -+; CHECK-LABEL: test_vscbih: -+; CHECK: vscbih %v24, %v24, %v26 -+; CHECK: br %r14 -+ %res = call <8 x i16> @llvm.s390.vscbih(<8 x i16> %a, <8 x i16> %b) -+ ret <8 x i16> %res -+} -+ -+; VSCBIF. -+define <4 x i32> @test_vscbif(<4 x i32> %a, <4 x i32> %b) { -+; CHECK-LABEL: test_vscbif: -+; CHECK: vscbif %v24, %v24, %v26 -+; CHECK: br %r14 -+ %res = call <4 x i32> @llvm.s390.vscbif(<4 x i32> %a, <4 x i32> %b) -+ ret <4 x i32> %res -+} -+ -+; VSCBIG. -+define <2 x i64> @test_vscbig(<2 x i64> %a, <2 x i64> %b) { -+; CHECK-LABEL: test_vscbig: -+; CHECK: vscbig %v24, %v24, %v26 -+; CHECK: br %r14 -+ %res = call <2 x i64> @llvm.s390.vscbig(<2 x i64> %a, <2 x i64> %b) -+ ret <2 x i64> %res -+} -+ -+; VSQ. -+define <16 x i8> @test_vsq(<16 x i8> %a, <16 x i8> %b) { -+; CHECK-LABEL: test_vsq: -+; CHECK: vsq %v24, %v24, %v26 -+; CHECK: br %r14 -+ %res = call <16 x i8> @llvm.s390.vsq(<16 x i8> %a, <16 x i8> %b) -+ ret <16 x i8> %res -+} -+ -+; VSBIQ. -+define <16 x i8> @test_vsbiq(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) { -+; CHECK-LABEL: test_vsbiq: -+; CHECK: vsbiq %v24, %v24, %v26, %v28 -+; CHECK: br %r14 -+ %res = call <16 x i8> @llvm.s390.vsbiq(<16 x i8> %a, <16 x i8> %b, -+ <16 x i8> %c) -+ ret <16 x i8> %res -+} -+ -+; VSCBIQ. -+define <16 x i8> @test_vscbiq(<16 x i8> %a, <16 x i8> %b) { -+; CHECK-LABEL: test_vscbiq: -+; CHECK: vscbiq %v24, %v24, %v26 -+; CHECK: br %r14 -+ %res = call <16 x i8> @llvm.s390.vscbiq(<16 x i8> %a, <16 x i8> %b) -+ ret <16 x i8> %res -+} -+ -+; VSBCBIQ. -+define <16 x i8> @test_vsbcbiq(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) { -+; CHECK-LABEL: test_vsbcbiq: -+; CHECK: vsbcbiq %v24, %v24, %v26, %v28 -+; CHECK: br %r14 -+ %res = call <16 x i8> @llvm.s390.vsbcbiq(<16 x i8> %a, <16 x i8> %b, -+ <16 x i8> %c) -+ ret <16 x i8> %res -+} -+ -+; VSUMB. -+define <4 x i32> @test_vsumb(<16 x i8> %a, <16 x i8> %b) { -+; CHECK-LABEL: test_vsumb: -+; CHECK: vsumb %v24, %v24, %v26 -+; CHECK: br %r14 -+ %res = call <4 x i32> @llvm.s390.vsumb(<16 x i8> %a, <16 x i8> %b) -+ ret <4 x i32> %res -+} -+ -+; VSUMH. -+define <4 x i32> @test_vsumh(<8 x i16> %a, <8 x i16> %b) { -+; CHECK-LABEL: test_vsumh: -+; CHECK: vsumh %v24, %v24, %v26 -+; CHECK: br %r14 -+ %res = call <4 x i32> @llvm.s390.vsumh(<8 x i16> %a, <8 x i16> %b) -+ ret <4 x i32> %res -+} -+ -+; VSUMGH. -+define <2 x i64> @test_vsumgh(<8 x i16> %a, <8 x i16> %b) { -+; CHECK-LABEL: test_vsumgh: -+; CHECK: vsumgh %v24, %v24, %v26 -+; CHECK: br %r14 -+ %res = call <2 x i64> @llvm.s390.vsumgh(<8 x i16> %a, <8 x i16> %b) -+ ret <2 x i64> %res -+} -+ -+; VSUMGF. -+define <2 x i64> @test_vsumgf(<4 x i32> %a, <4 x i32> %b) { -+; CHECK-LABEL: test_vsumgf: -+; CHECK: vsumgf %v24, %v24, %v26 -+; CHECK: br %r14 -+ %res = call <2 x i64> @llvm.s390.vsumgf(<4 x i32> %a, <4 x i32> %b) -+ ret <2 x i64> %res -+} -+ -+; VSUMQF. -+define <16 x i8> @test_vsumqf(<4 x i32> %a, <4 x i32> %b) { -+; CHECK-LABEL: test_vsumqf: -+; CHECK: vsumqf %v24, %v24, %v26 -+; CHECK: br %r14 -+ %res = call <16 x i8> @llvm.s390.vsumqf(<4 x i32> %a, <4 x i32> %b) -+ ret <16 x i8> %res -+} -+ -+; VSUMQG. -+define <16 x i8> @test_vsumqg(<2 x i64> %a, <2 x i64> %b) { -+; CHECK-LABEL: test_vsumqg: -+; CHECK: vsumqg %v24, %v24, %v26 -+; CHECK: br %r14 -+ %res = call <16 x i8> @llvm.s390.vsumqg(<2 x i64> %a, <2 x i64> %b) -+ ret <16 x i8> %res -+} -+ -+; VTM with no processing of the result. -+define i32 @test_vtm(<16 x i8> %a, <16 x i8> %b) { -+; CHECK-LABEL: test_vtm: -+; CHECK: vtm %v24, %v26 -+; CHECK: ipm %r2 -+; CHECK: srl %r2, 28 -+; CHECK: br %r14 -+ %res = call i32 @llvm.s390.vtm(<16 x i8> %a, <16 x i8> %b) -+ ret i32 %res -+} -+ -+; VTM, storing to %ptr if all bits are set. -+define void @test_vtm_all_store(<16 x i8> %a, <16 x i8> %b, i32 *%ptr) { -+; CHECK-LABEL: test_vtm_all_store: -+; CHECK-NOT: %r -+; CHECK: vtm %v24, %v26 -+; CHECK-NEXT: {{jno|jle}} {{\.L*}} -+; CHECK: mvhi 0(%r2), 0 -+; CHECK: br %r14 -+ %res = call i32 @llvm.s390.vtm(<16 x i8> %a, <16 x i8> %b) -+ %cmp = icmp sge i32 %res, 3 -+ br i1 %cmp, label %store, label %exit -+ -+store: -+ store i32 0, i32 *%ptr -+ br label %exit -+ -+exit: -+ ret void -+} -+ -+; VCEQBS with no processing of the result. -+define i32 @test_vceqbs(<16 x i8> %a, <16 x i8> %b) { -+; CHECK-LABEL: test_vceqbs: -+; CHECK: vceqbs {{%v[0-9]+}}, %v24, %v26 -+; CHECK: ipm %r2 -+; CHECK: srl %r2, 28 -+; CHECK: br %r14 -+ %call = call {<16 x i8>, i32} @llvm.s390.vceqbs(<16 x i8> %a, <16 x i8> %b) -+ %res = extractvalue {<16 x i8>, i32} %call, 1 -+ ret i32 %res -+} -+ -+; VCEQBS, returning 1 if any elements are equal (CC != 3). -+define i32 @test_vceqbs_any_bool(<16 x i8> %a, <16 x i8> %b) { -+; CHECK-LABEL: test_vceqbs_any_bool: -+; CHECK: vceqbs {{%v[0-9]+}}, %v24, %v26 -+; CHECK: ipm %r2 -+; CHECK: afi %r2, -536870912 -+; CHECK: srl %r2, 31 -+; CHECK: br %r14 -+ %call = call {<16 x i8>, i32} @llvm.s390.vceqbs(<16 x i8> %a, <16 x i8> %b) -+ %res = extractvalue {<16 x i8>, i32} %call, 1 -+ %cmp = icmp ne i32 %res, 3 -+ %ext = zext i1 %cmp to i32 -+ ret i32 %ext -+} -+ -+; VCEQBS, storing to %ptr if any elements are equal. -+define <16 x i8> @test_vceqbs_any_store(<16 x i8> %a, <16 x i8> %b, i32 *%ptr) { -+; CHECK-LABEL: test_vceqbs_any_store: -+; CHECK-NOT: %r -+; CHECK: vceqbs %v24, %v24, %v26 -+; CHECK-NEXT: {{jo|jnle}} {{\.L*}} -+; CHECK: mvhi 0(%r2), 0 -+; CHECK: br %r14 -+ %call = call {<16 x i8>, i32} @llvm.s390.vceqbs(<16 x i8> %a, <16 x i8> %b) -+ %res = extractvalue {<16 x i8>, i32} %call, 0 -+ %cc = extractvalue {<16 x i8>, i32} %call, 1 -+ %cmp = icmp ule i32 %cc, 2 -+ br i1 %cmp, label %store, label %exit -+ -+store: -+ store i32 0, i32 *%ptr -+ br label %exit -+ -+exit: -+ ret <16 x i8> %res -+} -+ -+; VCEQHS with no processing of the result. -+define i32 @test_vceqhs(<8 x i16> %a, <8 x i16> %b) { -+; CHECK-LABEL: test_vceqhs: -+; CHECK: vceqhs {{%v[0-9]+}}, %v24, %v26 -+; CHECK: ipm %r2 -+; CHECK: srl %r2, 28 -+; CHECK: br %r14 -+ %call = call {<8 x i16>, i32} @llvm.s390.vceqhs(<8 x i16> %a, <8 x i16> %b) -+ %res = extractvalue {<8 x i16>, i32} %call, 1 -+ ret i32 %res -+} -+ -+; VCEQHS, returning 1 if not all elements are equal. -+define i32 @test_vceqhs_notall_bool(<8 x i16> %a, <8 x i16> %b) { -+; CHECK-LABEL: test_vceqhs_notall_bool: -+; CHECK: vceqhs {{%v[0-9]+}}, %v24, %v26 -+; CHECK: ipm [[REG:%r[0-5]]] -+; CHECK: risblg %r2, [[REG]], 31, 159, 36 -+; CHECK: br %r14 -+ %call = call {<8 x i16>, i32} @llvm.s390.vceqhs(<8 x i16> %a, <8 x i16> %b) -+ %res = extractvalue {<8 x i16>, i32} %call, 1 -+ %cmp = icmp sge i32 %res, 1 -+ %ext = zext i1 %cmp to i32 -+ ret i32 %ext -+} -+ -+; VCEQHS, storing to %ptr if not all elements are equal. -+define <8 x i16> @test_vceqhs_notall_store(<8 x i16> %a, <8 x i16> %b, -+ i32 *%ptr) { -+; CHECK-LABEL: test_vceqhs_notall_store: -+; CHECK-NOT: %r -+; CHECK: vceqhs %v24, %v24, %v26 -+; CHECK-NEXT: {{jhe|je}} {{\.L*}} -+; CHECK: mvhi 0(%r2), 0 -+; CHECK: br %r14 -+ %call = call {<8 x i16>, i32} @llvm.s390.vceqhs(<8 x i16> %a, <8 x i16> %b) -+ %res = extractvalue {<8 x i16>, i32} %call, 0 -+ %cc = extractvalue {<8 x i16>, i32} %call, 1 -+ %cmp = icmp ugt i32 %cc, 0 -+ br i1 %cmp, label %store, label %exit -+ -+store: -+ store i32 0, i32 *%ptr -+ br label %exit -+ -+exit: -+ ret <8 x i16> %res -+} -+ -+; VCEQFS with no processing of the result. -+define i32 @test_vceqfs(<4 x i32> %a, <4 x i32> %b) { -+; CHECK-LABEL: test_vceqfs: -+; CHECK: vceqfs {{%v[0-9]+}}, %v24, %v26 -+; CHECK: ipm %r2 -+; CHECK: srl %r2, 28 -+; CHECK: br %r14 -+ %call = call {<4 x i32>, i32} @llvm.s390.vceqfs(<4 x i32> %a, <4 x i32> %b) -+ %res = extractvalue {<4 x i32>, i32} %call, 1 -+ ret i32 %res -+} -+ -+; VCEQFS, returning 1 if no elements are equal. -+define i32 @test_vceqfs_none_bool(<4 x i32> %a, <4 x i32> %b) { -+; CHECK-LABEL: test_vceqfs_none_bool: -+; CHECK: vceqfs {{%v[0-9]+}}, %v24, %v26 -+; CHECK: ipm [[REG:%r[0-5]]] -+; CHECK: risblg %r2, [[REG]], 31, 159, 35 -+; CHECK: br %r14 -+ %call = call {<4 x i32>, i32} @llvm.s390.vceqfs(<4 x i32> %a, <4 x i32> %b) -+ %res = extractvalue {<4 x i32>, i32} %call, 1 -+ %cmp = icmp eq i32 %res, 3 -+ %ext = zext i1 %cmp to i32 -+ ret i32 %ext -+} -+ -+; VCEQFS, storing to %ptr if no elements are equal. -+define <4 x i32> @test_vceqfs_none_store(<4 x i32> %a, <4 x i32> %b, -+ i32 *%ptr) { -+; CHECK-LABEL: test_vceqfs_none_store: -+; CHECK-NOT: %r -+; CHECK: vceqfs %v24, %v24, %v26 -+; CHECK-NEXT: {{jno|jle}} {{\.L*}} -+; CHECK: mvhi 0(%r2), 0 -+; CHECK: br %r14 -+ %call = call {<4 x i32>, i32} @llvm.s390.vceqfs(<4 x i32> %a, <4 x i32> %b) -+ %res = extractvalue {<4 x i32>, i32} %call, 0 -+ %cc = extractvalue {<4 x i32>, i32} %call, 1 -+ %cmp = icmp uge i32 %cc, 3 -+ br i1 %cmp, label %store, label %exit -+ -+store: -+ store i32 0, i32 *%ptr -+ br label %exit -+ -+exit: -+ ret <4 x i32> %res -+} -+ -+; VCEQGS with no processing of the result. -+define i32 @test_vceqgs(<2 x i64> %a, <2 x i64> %b) { -+; CHECK-LABEL: test_vceqgs: -+; CHECK: vceqgs {{%v[0-9]+}}, %v24, %v26 -+; CHECK: ipm %r2 -+; CHECK: srl %r2, 28 -+; CHECK: br %r14 -+ %call = call {<2 x i64>, i32} @llvm.s390.vceqgs(<2 x i64> %a, <2 x i64> %b) -+ %res = extractvalue {<2 x i64>, i32} %call, 1 -+ ret i32 %res -+} -+ -+; VCEQGS returning 1 if all elements are equal (CC == 0). -+define i32 @test_vceqgs_all_bool(<2 x i64> %a, <2 x i64> %b) { -+; CHECK-LABEL: test_vceqgs_all_bool: -+; CHECK: vceqgs {{%v[0-9]+}}, %v24, %v26 -+; CHECK: ipm %r2 -+; CHECK: afi %r2, -268435456 -+; CHECK: srl %r2, 31 -+; CHECK: br %r14 -+ %call = call {<2 x i64>, i32} @llvm.s390.vceqgs(<2 x i64> %a, <2 x i64> %b) -+ %res = extractvalue {<2 x i64>, i32} %call, 1 -+ %cmp = icmp ult i32 %res, 1 -+ %ext = zext i1 %cmp to i32 -+ ret i32 %ext -+} -+ -+; VCEQGS, storing to %ptr if all elements are equal. -+define <2 x i64> @test_vceqgs_all_store(<2 x i64> %a, <2 x i64> %b, i32 *%ptr) { -+; CHECK-LABEL: test_vceqgs_all_store: -+; CHECK-NOT: %r -+; CHECK: vceqgs %v24, %v24, %v26 -+; CHECK-NEXT: {{jnhe|jne}} {{\.L*}} -+; CHECK: mvhi 0(%r2), 0 -+; CHECK: br %r14 -+ %call = call {<2 x i64>, i32} @llvm.s390.vceqgs(<2 x i64> %a, <2 x i64> %b) -+ %res = extractvalue {<2 x i64>, i32} %call, 0 -+ %cc = extractvalue {<2 x i64>, i32} %call, 1 -+ %cmp = icmp sle i32 %cc, 0 -+ br i1 %cmp, label %store, label %exit -+ -+store: -+ store i32 0, i32 *%ptr -+ br label %exit -+ -+exit: -+ ret <2 x i64> %res -+} -+ -+; VCHBS with no processing of the result. -+define i32 @test_vchbs(<16 x i8> %a, <16 x i8> %b) { -+; CHECK-LABEL: test_vchbs: -+; CHECK: vchbs {{%v[0-9]+}}, %v24, %v26 -+; CHECK: ipm %r2 -+; CHECK: srl %r2, 28 -+; CHECK: br %r14 -+ %call = call {<16 x i8>, i32} @llvm.s390.vchbs(<16 x i8> %a, <16 x i8> %b) -+ %res = extractvalue {<16 x i8>, i32} %call, 1 -+ ret i32 %res -+} -+ -+; VCHBS, returning 1 if any elements are higher (CC != 3). -+define i32 @test_vchbs_any_bool(<16 x i8> %a, <16 x i8> %b) { -+; CHECK-LABEL: test_vchbs_any_bool: -+; CHECK: vchbs {{%v[0-9]+}}, %v24, %v26 -+; CHECK: ipm %r2 -+; CHECK: afi %r2, -536870912 -+; CHECK: srl %r2, 31 -+; CHECK: br %r14 -+ %call = call {<16 x i8>, i32} @llvm.s390.vchbs(<16 x i8> %a, <16 x i8> %b) -+ %res = extractvalue {<16 x i8>, i32} %call, 1 -+ %cmp = icmp ne i32 %res, 3 -+ %ext = zext i1 %cmp to i32 -+ ret i32 %ext -+} -+ -+; VCHBS, storing to %ptr if any elements are higher. -+define <16 x i8> @test_vchbs_any_store(<16 x i8> %a, <16 x i8> %b, i32 *%ptr) { -+; CHECK-LABEL: test_vchbs_any_store: -+; CHECK-NOT: %r -+; CHECK: vchbs %v24, %v24, %v26 -+; CHECK-NEXT: {{jo|jnle}} {{\.L*}} -+; CHECK: mvhi 0(%r2), 0 -+; CHECK: br %r14 -+ %call = call {<16 x i8>, i32} @llvm.s390.vchbs(<16 x i8> %a, <16 x i8> %b) -+ %res = extractvalue {<16 x i8>, i32} %call, 0 -+ %cc = extractvalue {<16 x i8>, i32} %call, 1 -+ %cmp = icmp ule i32 %cc, 2 -+ br i1 %cmp, label %store, label %exit -+ -+store: -+ store i32 0, i32 *%ptr -+ br label %exit -+ -+exit: -+ ret <16 x i8> %res -+} -+ -+; VCHHS with no processing of the result. -+define i32 @test_vchhs(<8 x i16> %a, <8 x i16> %b) { -+; CHECK-LABEL: test_vchhs: -+; CHECK: vchhs {{%v[0-9]+}}, %v24, %v26 -+; CHECK: ipm %r2 -+; CHECK: srl %r2, 28 -+; CHECK: br %r14 -+ %call = call {<8 x i16>, i32} @llvm.s390.vchhs(<8 x i16> %a, <8 x i16> %b) -+ %res = extractvalue {<8 x i16>, i32} %call, 1 -+ ret i32 %res -+} -+ -+; VCHHS, returning 1 if not all elements are higher. -+define i32 @test_vchhs_notall_bool(<8 x i16> %a, <8 x i16> %b) { -+; CHECK-LABEL: test_vchhs_notall_bool: -+; CHECK: vchhs {{%v[0-9]+}}, %v24, %v26 -+; CHECK: ipm [[REG:%r[0-5]]] -+; CHECK: risblg %r2, [[REG]], 31, 159, 36 -+; CHECK: br %r14 -+ %call = call {<8 x i16>, i32} @llvm.s390.vchhs(<8 x i16> %a, <8 x i16> %b) -+ %res = extractvalue {<8 x i16>, i32} %call, 1 -+ %cmp = icmp sge i32 %res, 1 -+ %ext = zext i1 %cmp to i32 -+ ret i32 %ext -+} -+ -+; VCHHS, storing to %ptr if not all elements are higher. -+define <8 x i16> @test_vchhs_notall_store(<8 x i16> %a, <8 x i16> %b, -+ i32 *%ptr) { -+; CHECK-LABEL: test_vchhs_notall_store: -+; CHECK-NOT: %r -+; CHECK: vchhs %v24, %v24, %v26 -+; CHECK-NEXT: {{jhe|je}} {{\.L*}} -+; CHECK: mvhi 0(%r2), 0 -+; CHECK: br %r14 -+ %call = call {<8 x i16>, i32} @llvm.s390.vchhs(<8 x i16> %a, <8 x i16> %b) -+ %res = extractvalue {<8 x i16>, i32} %call, 0 -+ %cc = extractvalue {<8 x i16>, i32} %call, 1 -+ %cmp = icmp ugt i32 %cc, 0 -+ br i1 %cmp, label %store, label %exit -+ -+store: -+ store i32 0, i32 *%ptr -+ br label %exit -+ -+exit: -+ ret <8 x i16> %res -+} -+ -+; VCHFS with no processing of the result. -+define i32 @test_vchfs(<4 x i32> %a, <4 x i32> %b) { -+; CHECK-LABEL: test_vchfs: -+; CHECK: vchfs {{%v[0-9]+}}, %v24, %v26 -+; CHECK: ipm %r2 -+; CHECK: srl %r2, 28 -+; CHECK: br %r14 -+ %call = call {<4 x i32>, i32} @llvm.s390.vchfs(<4 x i32> %a, <4 x i32> %b) -+ %res = extractvalue {<4 x i32>, i32} %call, 1 -+ ret i32 %res -+} -+ -+; VCHFS, returning 1 if no elements are higher. -+define i32 @test_vchfs_none_bool(<4 x i32> %a, <4 x i32> %b) { -+; CHECK-LABEL: test_vchfs_none_bool: -+; CHECK: vchfs {{%v[0-9]+}}, %v24, %v26 -+; CHECK: ipm [[REG:%r[0-5]]] -+; CHECK: risblg %r2, [[REG]], 31, 159, 35 -+; CHECK: br %r14 -+ %call = call {<4 x i32>, i32} @llvm.s390.vchfs(<4 x i32> %a, <4 x i32> %b) -+ %res = extractvalue {<4 x i32>, i32} %call, 1 -+ %cmp = icmp eq i32 %res, 3 -+ %ext = zext i1 %cmp to i32 -+ ret i32 %ext -+} -+ -+; VCHFS, storing to %ptr if no elements are higher. -+define <4 x i32> @test_vchfs_none_store(<4 x i32> %a, <4 x i32> %b, i32 *%ptr) { -+; CHECK-LABEL: test_vchfs_none_store: -+; CHECK-NOT: %r -+; CHECK: vchfs %v24, %v24, %v26 -+; CHECK-NEXT: {{jno|jle}} {{\.L*}} -+; CHECK: mvhi 0(%r2), 0 -+; CHECK: br %r14 -+ %call = call {<4 x i32>, i32} @llvm.s390.vchfs(<4 x i32> %a, <4 x i32> %b) -+ %res = extractvalue {<4 x i32>, i32} %call, 0 -+ %cc = extractvalue {<4 x i32>, i32} %call, 1 -+ %cmp = icmp uge i32 %cc, 3 -+ br i1 %cmp, label %store, label %exit -+ -+store: -+ store i32 0, i32 *%ptr -+ br label %exit -+ -+exit: -+ ret <4 x i32> %res -+} -+ -+; VCHGS with no processing of the result. -+define i32 @test_vchgs(<2 x i64> %a, <2 x i64> %b) { -+; CHECK-LABEL: test_vchgs: -+; CHECK: vchgs {{%v[0-9]+}}, %v24, %v26 -+; CHECK: ipm %r2 -+; CHECK: srl %r2, 28 -+; CHECK: br %r14 -+ %call = call {<2 x i64>, i32} @llvm.s390.vchgs(<2 x i64> %a, <2 x i64> %b) -+ %res = extractvalue {<2 x i64>, i32} %call, 1 -+ ret i32 %res -+} -+ -+; VCHGS returning 1 if all elements are higher (CC == 0). -+define i32 @test_vchgs_all_bool(<2 x i64> %a, <2 x i64> %b) { -+; CHECK-LABEL: test_vchgs_all_bool: -+; CHECK: vchgs {{%v[0-9]+}}, %v24, %v26 -+; CHECK: ipm %r2 -+; CHECK: afi %r2, -268435456 -+; CHECK: srl %r2, 31 -+; CHECK: br %r14 -+ %call = call {<2 x i64>, i32} @llvm.s390.vchgs(<2 x i64> %a, <2 x i64> %b) -+ %res = extractvalue {<2 x i64>, i32} %call, 1 -+ %cmp = icmp ult i32 %res, 1 -+ %ext = zext i1 %cmp to i32 -+ ret i32 %ext -+} -+ -+; VCHGS, storing to %ptr if all elements are higher. -+define <2 x i64> @test_vchgs_all_store(<2 x i64> %a, <2 x i64> %b, i32 *%ptr) { -+; CHECK-LABEL: test_vchgs_all_store: -+; CHECK-NOT: %r -+; CHECK: vchgs %v24, %v24, %v26 -+; CHECK-NEXT: {{jnhe|jne}} {{\.L*}} -+; CHECK: mvhi 0(%r2), 0 -+; CHECK: br %r14 -+ %call = call {<2 x i64>, i32} @llvm.s390.vchgs(<2 x i64> %a, <2 x i64> %b) -+ %res = extractvalue {<2 x i64>, i32} %call, 0 -+ %cc = extractvalue {<2 x i64>, i32} %call, 1 -+ %cmp = icmp sle i32 %cc, 0 -+ br i1 %cmp, label %store, label %exit -+ -+store: -+ store i32 0, i32 *%ptr -+ br label %exit -+ -+exit: -+ ret <2 x i64> %res -+} -+ -+; VCHLBS with no processing of the result. -+define i32 @test_vchlbs(<16 x i8> %a, <16 x i8> %b) { -+; CHECK-LABEL: test_vchlbs: -+; CHECK: vchlbs {{%v[0-9]+}}, %v24, %v26 -+; CHECK: ipm %r2 -+; CHECK: srl %r2, 28 -+; CHECK: br %r14 -+ %call = call {<16 x i8>, i32} @llvm.s390.vchlbs(<16 x i8> %a, <16 x i8> %b) -+ %res = extractvalue {<16 x i8>, i32} %call, 1 -+ ret i32 %res -+} -+ -+; VCHLBS, returning 1 if any elements are higher (CC != 3). -+define i32 @test_vchlbs_any_bool(<16 x i8> %a, <16 x i8> %b) { -+; CHECK-LABEL: test_vchlbs_any_bool: -+; CHECK: vchlbs {{%v[0-9]+}}, %v24, %v26 -+; CHECK: ipm %r2 -+; CHECK: afi %r2, -536870912 -+; CHECK: srl %r2, 31 -+; CHECK: br %r14 -+ %call = call {<16 x i8>, i32} @llvm.s390.vchlbs(<16 x i8> %a, <16 x i8> %b) -+ %res = extractvalue {<16 x i8>, i32} %call, 1 -+ %cmp = icmp ne i32 %res, 3 -+ %ext = zext i1 %cmp to i32 -+ ret i32 %ext -+} -+ -+; VCHLBS, storing to %ptr if any elements are higher. -+define <16 x i8> @test_vchlbs_any_store(<16 x i8> %a, <16 x i8> %b, i32 *%ptr) { -+; CHECK-LABEL: test_vchlbs_any_store: -+; CHECK-NOT: %r -+; CHECK: vchlbs %v24, %v24, %v26 -+; CHECK-NEXT: {{jo|jnle}} {{\.L*}} -+; CHECK: mvhi 0(%r2), 0 -+; CHECK: br %r14 -+ %call = call {<16 x i8>, i32} @llvm.s390.vchlbs(<16 x i8> %a, <16 x i8> %b) -+ %res = extractvalue {<16 x i8>, i32} %call, 0 -+ %cc = extractvalue {<16 x i8>, i32} %call, 1 -+ %cmp = icmp sle i32 %cc, 2 -+ br i1 %cmp, label %store, label %exit -+ -+store: -+ store i32 0, i32 *%ptr -+ br label %exit -+ -+exit: -+ ret <16 x i8> %res -+} -+ -+; VCHLHS with no processing of the result. -+define i32 @test_vchlhs(<8 x i16> %a, <8 x i16> %b) { -+; CHECK-LABEL: test_vchlhs: -+; CHECK: vchlhs {{%v[0-9]+}}, %v24, %v26 -+; CHECK: ipm %r2 -+; CHECK: srl %r2, 28 -+; CHECK: br %r14 -+ %call = call {<8 x i16>, i32} @llvm.s390.vchlhs(<8 x i16> %a, <8 x i16> %b) -+ %res = extractvalue {<8 x i16>, i32} %call, 1 -+ ret i32 %res -+} -+ -+; VCHLHS, returning 1 if not all elements are higher. -+define i32 @test_vchlhs_notall_bool(<8 x i16> %a, <8 x i16> %b) { -+; CHECK-LABEL: test_vchlhs_notall_bool: -+; CHECK: vchlhs {{%v[0-9]+}}, %v24, %v26 -+; CHECK: ipm [[REG:%r[0-5]]] -+; CHECK: risblg %r2, [[REG]], 31, 159, 36 -+; CHECK: br %r14 -+ %call = call {<8 x i16>, i32} @llvm.s390.vchlhs(<8 x i16> %a, <8 x i16> %b) -+ %res = extractvalue {<8 x i16>, i32} %call, 1 -+ %cmp = icmp uge i32 %res, 1 -+ %ext = zext i1 %cmp to i32 -+ ret i32 %ext -+} -+ -+; VCHLHS, storing to %ptr if not all elements are higher. -+define <8 x i16> @test_vchlhs_notall_store(<8 x i16> %a, <8 x i16> %b, -+ i32 *%ptr) { -+; CHECK-LABEL: test_vchlhs_notall_store: -+; CHECK-NOT: %r -+; CHECK: vchlhs %v24, %v24, %v26 -+; CHECK-NEXT: {{jhe|je}} {{\.L*}} -+; CHECK: mvhi 0(%r2), 0 -+; CHECK: br %r14 -+ %call = call {<8 x i16>, i32} @llvm.s390.vchlhs(<8 x i16> %a, <8 x i16> %b) -+ %res = extractvalue {<8 x i16>, i32} %call, 0 -+ %cc = extractvalue {<8 x i16>, i32} %call, 1 -+ %cmp = icmp sgt i32 %cc, 0 -+ br i1 %cmp, label %store, label %exit -+ -+store: -+ store i32 0, i32 *%ptr -+ br label %exit -+ -+exit: -+ ret <8 x i16> %res -+} -+ -+; VCHLFS with no processing of the result. -+define i32 @test_vchlfs(<4 x i32> %a, <4 x i32> %b) { -+; CHECK-LABEL: test_vchlfs: -+; CHECK: vchlfs {{%v[0-9]+}}, %v24, %v26 -+; CHECK: ipm %r2 -+; CHECK: srl %r2, 28 -+; CHECK: br %r14 -+ %call = call {<4 x i32>, i32} @llvm.s390.vchlfs(<4 x i32> %a, <4 x i32> %b) -+ %res = extractvalue {<4 x i32>, i32} %call, 1 -+ ret i32 %res -+} -+ -+; VCHLFS, returning 1 if no elements are higher. -+define i32 @test_vchlfs_none_bool(<4 x i32> %a, <4 x i32> %b) { -+; CHECK-LABEL: test_vchlfs_none_bool: -+; CHECK: vchlfs {{%v[0-9]+}}, %v24, %v26 -+; CHECK: ipm [[REG:%r[0-5]]] -+; CHECK: risblg %r2, [[REG]], 31, 159, 35 -+; CHECK: br %r14 -+ %call = call {<4 x i32>, i32} @llvm.s390.vchlfs(<4 x i32> %a, <4 x i32> %b) -+ %res = extractvalue {<4 x i32>, i32} %call, 1 -+ %cmp = icmp eq i32 %res, 3 -+ %ext = zext i1 %cmp to i32 -+ ret i32 %ext -+} -+ -+; VCHLFS, storing to %ptr if no elements are higher. -+define <4 x i32> @test_vchlfs_none_store(<4 x i32> %a, <4 x i32> %b, -+ i32 *%ptr) { -+; CHECK-LABEL: test_vchlfs_none_store: -+; CHECK-NOT: %r -+; CHECK: vchlfs %v24, %v24, %v26 -+; CHECK-NEXT: {{jno|jle}} {{\.L*}} -+; CHECK: mvhi 0(%r2), 0 -+; CHECK: br %r14 -+ %call = call {<4 x i32>, i32} @llvm.s390.vchlfs(<4 x i32> %a, <4 x i32> %b) -+ %res = extractvalue {<4 x i32>, i32} %call, 0 -+ %cc = extractvalue {<4 x i32>, i32} %call, 1 -+ %cmp = icmp sge i32 %cc, 3 -+ br i1 %cmp, label %store, label %exit -+ -+store: -+ store i32 0, i32 *%ptr -+ br label %exit -+ -+exit: -+ ret <4 x i32> %res -+} -+ -+; VCHLGS with no processing of the result. -+define i32 @test_vchlgs(<2 x i64> %a, <2 x i64> %b) { -+; CHECK-LABEL: test_vchlgs: -+; CHECK: vchlgs {{%v[0-9]+}}, %v24, %v26 -+; CHECK: ipm %r2 -+; CHECK: srl %r2, 28 -+; CHECK: br %r14 -+ %call = call {<2 x i64>, i32} @llvm.s390.vchlgs(<2 x i64> %a, <2 x i64> %b) -+ %res = extractvalue {<2 x i64>, i32} %call, 1 -+ ret i32 %res -+} -+ -+; VCHLGS returning 1 if all elements are higher (CC == 0). -+define i32 @test_vchlgs_all_bool(<2 x i64> %a, <2 x i64> %b) { -+; CHECK-LABEL: test_vchlgs_all_bool: -+; CHECK: vchlgs {{%v[0-9]+}}, %v24, %v26 -+; CHECK: ipm %r2 -+; CHECK: afi %r2, -268435456 -+; CHECK: srl %r2, 31 -+; CHECK: br %r14 -+ %call = call {<2 x i64>, i32} @llvm.s390.vchlgs(<2 x i64> %a, <2 x i64> %b) -+ %res = extractvalue {<2 x i64>, i32} %call, 1 -+ %cmp = icmp slt i32 %res, 1 -+ %ext = zext i1 %cmp to i32 -+ ret i32 %ext -+} -+ -+; VCHLGS, storing to %ptr if all elements are higher. -+define <2 x i64> @test_vchlgs_all_store(<2 x i64> %a, <2 x i64> %b, i32 *%ptr) { -+; CHECK-LABEL: test_vchlgs_all_store: -+; CHECK-NOT: %r -+; CHECK: vchlgs %v24, %v24, %v26 -+; CHECK-NEXT: {{jnhe|jne}} {{\.L*}} -+; CHECK: mvhi 0(%r2), 0 -+; CHECK: br %r14 -+ %call = call {<2 x i64>, i32} @llvm.s390.vchlgs(<2 x i64> %a, <2 x i64> %b) -+ %res = extractvalue {<2 x i64>, i32} %call, 0 -+ %cc = extractvalue {<2 x i64>, i32} %call, 1 -+ %cmp = icmp ule i32 %cc, 0 -+ br i1 %cmp, label %store, label %exit -+ -+store: -+ store i32 0, i32 *%ptr -+ br label %exit -+ -+exit: -+ ret <2 x i64> %res -+} -+ -+; VFAEB with !IN !RT. -+define <16 x i8> @test_vfaeb_0(<16 x i8> %a, <16 x i8> %b) { -+; CHECK-LABEL: test_vfaeb_0: -+; CHECK: vfaeb %v24, %v24, %v26, 0 -+; CHECK: br %r14 -+ %res = call <16 x i8> @llvm.s390.vfaeb(<16 x i8> %a, <16 x i8> %b, i32 0) -+ ret <16 x i8> %res -+} -+ -+; VFAEB with !IN RT. -+define <16 x i8> @test_vfaeb_4(<16 x i8> %a, <16 x i8> %b) { -+; CHECK-LABEL: test_vfaeb_4: -+; CHECK: vfaeb %v24, %v24, %v26, 4 -+; CHECK: br %r14 -+ %res = call <16 x i8> @llvm.s390.vfaeb(<16 x i8> %a, <16 x i8> %b, i32 4) -+ ret <16 x i8> %res -+} -+ -+; VFAEB with IN !RT. -+define <16 x i8> @test_vfaeb_8(<16 x i8> %a, <16 x i8> %b) { -+; CHECK-LABEL: test_vfaeb_8: -+; CHECK: vfaeb %v24, %v24, %v26, 8 -+; CHECK: br %r14 -+ %res = call <16 x i8> @llvm.s390.vfaeb(<16 x i8> %a, <16 x i8> %b, i32 8) -+ ret <16 x i8> %res -+} -+ -+; VFAEB with IN RT. -+define <16 x i8> @test_vfaeb_12(<16 x i8> %a, <16 x i8> %b) { -+; CHECK-LABEL: test_vfaeb_12: -+; CHECK: vfaeb %v24, %v24, %v26, 12 -+; CHECK: br %r14 -+ %res = call <16 x i8> @llvm.s390.vfaeb(<16 x i8> %a, <16 x i8> %b, i32 12) -+ ret <16 x i8> %res -+} -+ -+; VFAEB with CS -- should be ignored. -+define <16 x i8> @test_vfaeb_1(<16 x i8> %a, <16 x i8> %b) { -+; CHECK-LABEL: test_vfaeb_1: -+; CHECK: vfaeb %v24, %v24, %v26, 0 -+; CHECK: br %r14 -+ %res = call <16 x i8> @llvm.s390.vfaeb(<16 x i8> %a, <16 x i8> %b, i32 1) -+ ret <16 x i8> %res -+} -+ -+; VFAEH. -+define <8 x i16> @test_vfaeh(<8 x i16> %a, <8 x i16> %b) { -+; CHECK-LABEL: test_vfaeh: -+; CHECK: vfaeh %v24, %v24, %v26, 4 -+; CHECK: br %r14 -+ %res = call <8 x i16> @llvm.s390.vfaeh(<8 x i16> %a, <8 x i16> %b, i32 4) -+ ret <8 x i16> %res -+} -+ -+; VFAEF. -+define <4 x i32> @test_vfaef(<4 x i32> %a, <4 x i32> %b) { -+; CHECK-LABEL: test_vfaef: -+; CHECK: vfaef %v24, %v24, %v26, 8 -+; CHECK: br %r14 -+ %res = call <4 x i32> @llvm.s390.vfaef(<4 x i32> %a, <4 x i32> %b, i32 8) -+ ret <4 x i32> %res -+} -+ -+; VFAEBS. -+define <16 x i8> @test_vfaebs(<16 x i8> %a, <16 x i8> %b, i32 *%ccptr) { -+; CHECK-LABEL: test_vfaebs: -+; CHECK: vfaebs %v24, %v24, %v26, 0 -+; CHECK: ipm [[REG:%r[0-5]]] -+; CHECK: srl [[REG]], 28 -+; CHECK: st [[REG]], 0(%r2) -+; CHECK: br %r14 -+ %call = call {<16 x i8>, i32} @llvm.s390.vfaebs(<16 x i8> %a, <16 x i8> %b, -+ i32 0) -+ %res = extractvalue {<16 x i8>, i32} %call, 0 -+ %cc = extractvalue {<16 x i8>, i32} %call, 1 -+ store i32 %cc, i32 *%ccptr -+ ret <16 x i8> %res -+} -+ -+; VFAEHS. -+define <8 x i16> @test_vfaehs(<8 x i16> %a, <8 x i16> %b, i32 *%ccptr) { -+; CHECK-LABEL: test_vfaehs: -+; CHECK: vfaehs %v24, %v24, %v26, 4 -+; CHECK: ipm [[REG:%r[0-5]]] -+; CHECK: srl [[REG]], 28 -+; CHECK: st [[REG]], 0(%r2) -+; CHECK: br %r14 -+ %call = call {<8 x i16>, i32} @llvm.s390.vfaehs(<8 x i16> %a, <8 x i16> %b, -+ i32 4) -+ %res = extractvalue {<8 x i16>, i32} %call, 0 -+ %cc = extractvalue {<8 x i16>, i32} %call, 1 -+ store i32 %cc, i32 *%ccptr -+ ret <8 x i16> %res -+} -+ -+; VFAEFS. -+define <4 x i32> @test_vfaefs(<4 x i32> %a, <4 x i32> %b, i32 *%ccptr) { -+; CHECK-LABEL: test_vfaefs: -+; CHECK: vfaefs %v24, %v24, %v26, 8 -+; CHECK: ipm [[REG:%r[0-5]]] -+; CHECK: srl [[REG]], 28 -+; CHECK: st [[REG]], 0(%r2) -+; CHECK: br %r14 -+ %call = call {<4 x i32>, i32} @llvm.s390.vfaefs(<4 x i32> %a, <4 x i32> %b, -+ i32 8) -+ %res = extractvalue {<4 x i32>, i32} %call, 0 -+ %cc = extractvalue {<4 x i32>, i32} %call, 1 -+ store i32 %cc, i32 *%ccptr -+ ret <4 x i32> %res -+} -+ -+; VFAEZB with !IN !RT. -+define <16 x i8> @test_vfaezb_0(<16 x i8> %a, <16 x i8> %b) { -+; CHECK-LABEL: test_vfaezb_0: -+; CHECK: vfaezb %v24, %v24, %v26, 0 -+; CHECK: br %r14 -+ %res = call <16 x i8> @llvm.s390.vfaezb(<16 x i8> %a, <16 x i8> %b, i32 0) -+ ret <16 x i8> %res -+} -+ -+; VFAEZB with !IN RT. -+define <16 x i8> @test_vfaezb_4(<16 x i8> %a, <16 x i8> %b) { -+; CHECK-LABEL: test_vfaezb_4: -+; CHECK: vfaezb %v24, %v24, %v26, 4 -+; CHECK: br %r14 -+ %res = call <16 x i8> @llvm.s390.vfaezb(<16 x i8> %a, <16 x i8> %b, i32 4) -+ ret <16 x i8> %res -+} -+ -+; VFAEZB with IN !RT. -+define <16 x i8> @test_vfaezb_8(<16 x i8> %a, <16 x i8> %b) { -+; CHECK-LABEL: test_vfaezb_8: -+; CHECK: vfaezb %v24, %v24, %v26, 8 -+; CHECK: br %r14 -+ %res = call <16 x i8> @llvm.s390.vfaezb(<16 x i8> %a, <16 x i8> %b, i32 8) -+ ret <16 x i8> %res -+} -+ -+; VFAEZB with IN RT. -+define <16 x i8> @test_vfaezb_12(<16 x i8> %a, <16 x i8> %b) { -+; CHECK-LABEL: test_vfaezb_12: -+; CHECK: vfaezb %v24, %v24, %v26, 12 -+; CHECK: br %r14 -+ %res = call <16 x i8> @llvm.s390.vfaezb(<16 x i8> %a, <16 x i8> %b, i32 12) -+ ret <16 x i8> %res -+} -+ -+; VFAEZB with CS -- should be ignored. -+define <16 x i8> @test_vfaezb_1(<16 x i8> %a, <16 x i8> %b) { -+; CHECK-LABEL: test_vfaezb_1: -+; CHECK: vfaezb %v24, %v24, %v26, 0 -+; CHECK: br %r14 -+ %res = call <16 x i8> @llvm.s390.vfaezb(<16 x i8> %a, <16 x i8> %b, i32 1) -+ ret <16 x i8> %res -+} -+ -+; VFAEZH. -+define <8 x i16> @test_vfaezh(<8 x i16> %a, <8 x i16> %b) { -+; CHECK-LABEL: test_vfaezh: -+; CHECK: vfaezh %v24, %v24, %v26, 4 -+; CHECK: br %r14 -+ %res = call <8 x i16> @llvm.s390.vfaezh(<8 x i16> %a, <8 x i16> %b, i32 4) -+ ret <8 x i16> %res -+} -+ -+; VFAEZF. -+define <4 x i32> @test_vfaezf(<4 x i32> %a, <4 x i32> %b) { -+; CHECK-LABEL: test_vfaezf: -+; CHECK: vfaezf %v24, %v24, %v26, 8 -+; CHECK: br %r14 -+ %res = call <4 x i32> @llvm.s390.vfaezf(<4 x i32> %a, <4 x i32> %b, i32 8) -+ ret <4 x i32> %res -+} -+ -+; VFAEZBS. -+define <16 x i8> @test_vfaezbs(<16 x i8> %a, <16 x i8> %b, i32 *%ccptr) { -+; CHECK-LABEL: test_vfaezbs: -+; CHECK: vfaezbs %v24, %v24, %v26, 0 -+; CHECK: ipm [[REG:%r[0-5]]] -+; CHECK: srl [[REG]], 28 -+; CHECK: st [[REG]], 0(%r2) -+; CHECK: br %r14 -+ %call = call {<16 x i8>, i32} @llvm.s390.vfaezbs(<16 x i8> %a, <16 x i8> %b, -+ i32 0) -+ %res = extractvalue {<16 x i8>, i32} %call, 0 -+ %cc = extractvalue {<16 x i8>, i32} %call, 1 -+ store i32 %cc, i32 *%ccptr -+ ret <16 x i8> %res -+} -+ -+; VFAEZHS. -+define <8 x i16> @test_vfaezhs(<8 x i16> %a, <8 x i16> %b, i32 *%ccptr) { -+; CHECK-LABEL: test_vfaezhs: -+; CHECK: vfaezhs %v24, %v24, %v26, 4 -+; CHECK: ipm [[REG:%r[0-5]]] -+; CHECK: srl [[REG]], 28 -+; CHECK: st [[REG]], 0(%r2) -+; CHECK: br %r14 -+ %call = call {<8 x i16>, i32} @llvm.s390.vfaezhs(<8 x i16> %a, <8 x i16> %b, -+ i32 4) -+ %res = extractvalue {<8 x i16>, i32} %call, 0 -+ %cc = extractvalue {<8 x i16>, i32} %call, 1 -+ store i32 %cc, i32 *%ccptr -+ ret <8 x i16> %res -+} -+ -+; VFAEZFS. -+define <4 x i32> @test_vfaezfs(<4 x i32> %a, <4 x i32> %b, i32 *%ccptr) { -+; CHECK-LABEL: test_vfaezfs: -+; CHECK: vfaezfs %v24, %v24, %v26, 8 -+; CHECK: ipm [[REG:%r[0-5]]] -+; CHECK: srl [[REG]], 28 -+; CHECK: st [[REG]], 0(%r2) -+; CHECK: br %r14 -+ %call = call {<4 x i32>, i32} @llvm.s390.vfaezfs(<4 x i32> %a, <4 x i32> %b, -+ i32 8) -+ %res = extractvalue {<4 x i32>, i32} %call, 0 -+ %cc = extractvalue {<4 x i32>, i32} %call, 1 -+ store i32 %cc, i32 *%ccptr -+ ret <4 x i32> %res -+} -+ -+; VFEEB. -+define <16 x i8> @test_vfeeb_0(<16 x i8> %a, <16 x i8> %b) { -+; CHECK-LABEL: test_vfeeb_0: -+; CHECK: vfeeb %v24, %v24, %v26 -+; CHECK: br %r14 -+ %res = call <16 x i8> @llvm.s390.vfeeb(<16 x i8> %a, <16 x i8> %b) -+ ret <16 x i8> %res -+} -+ -+; VFEEH. -+define <8 x i16> @test_vfeeh(<8 x i16> %a, <8 x i16> %b) { -+; CHECK-LABEL: test_vfeeh: -+; CHECK: vfeeh %v24, %v24, %v26 -+; CHECK: br %r14 -+ %res = call <8 x i16> @llvm.s390.vfeeh(<8 x i16> %a, <8 x i16> %b) -+ ret <8 x i16> %res -+} -+ -+; VFEEF. -+define <4 x i32> @test_vfeef(<4 x i32> %a, <4 x i32> %b) { -+; CHECK-LABEL: test_vfeef: -+; CHECK: vfeef %v24, %v24, %v26 -+; CHECK: br %r14 -+ %res = call <4 x i32> @llvm.s390.vfeef(<4 x i32> %a, <4 x i32> %b) -+ ret <4 x i32> %res -+} -+ -+; VFEEBS. -+define <16 x i8> @test_vfeebs(<16 x i8> %a, <16 x i8> %b, i32 *%ccptr) { -+; CHECK-LABEL: test_vfeebs: -+; CHECK: vfeebs %v24, %v24, %v26 -+; CHECK: ipm [[REG:%r[0-5]]] -+; CHECK: srl [[REG]], 28 -+; CHECK: st [[REG]], 0(%r2) -+; CHECK: br %r14 -+ %call = call {<16 x i8>, i32} @llvm.s390.vfeebs(<16 x i8> %a, <16 x i8> %b) -+ %res = extractvalue {<16 x i8>, i32} %call, 0 -+ %cc = extractvalue {<16 x i8>, i32} %call, 1 -+ store i32 %cc, i32 *%ccptr -+ ret <16 x i8> %res -+} -+ -+; VFEEHS. -+define <8 x i16> @test_vfeehs(<8 x i16> %a, <8 x i16> %b, i32 *%ccptr) { -+; CHECK-LABEL: test_vfeehs: -+; CHECK: vfeehs %v24, %v24, %v26 -+; CHECK: ipm [[REG:%r[0-5]]] -+; CHECK: srl [[REG]], 28 -+; CHECK: st [[REG]], 0(%r2) -+; CHECK: br %r14 -+ %call = call {<8 x i16>, i32} @llvm.s390.vfeehs(<8 x i16> %a, <8 x i16> %b) -+ %res = extractvalue {<8 x i16>, i32} %call, 0 -+ %cc = extractvalue {<8 x i16>, i32} %call, 1 -+ store i32 %cc, i32 *%ccptr -+ ret <8 x i16> %res -+} -+ -+; VFEEFS. -+define <4 x i32> @test_vfeefs(<4 x i32> %a, <4 x i32> %b, i32 *%ccptr) { -+; CHECK-LABEL: test_vfeefs: -+; CHECK: vfeefs %v24, %v24, %v26 -+; CHECK: ipm [[REG:%r[0-5]]] -+; CHECK: srl [[REG]], 28 -+; CHECK: st [[REG]], 0(%r2) -+; CHECK: br %r14 -+ %call = call {<4 x i32>, i32} @llvm.s390.vfeefs(<4 x i32> %a, <4 x i32> %b) -+ %res = extractvalue {<4 x i32>, i32} %call, 0 -+ %cc = extractvalue {<4 x i32>, i32} %call, 1 -+ store i32 %cc, i32 *%ccptr -+ ret <4 x i32> %res -+} -+ -+; VFEEZB. -+define <16 x i8> @test_vfeezb(<16 x i8> %a, <16 x i8> %b) { -+; CHECK-LABEL: test_vfeezb: -+; CHECK: vfeezb %v24, %v24, %v26 -+; CHECK: br %r14 -+ %res = call <16 x i8> @llvm.s390.vfeezb(<16 x i8> %a, <16 x i8> %b) -+ ret <16 x i8> %res -+} -+ -+; VFEEZH. -+define <8 x i16> @test_vfeezh(<8 x i16> %a, <8 x i16> %b) { -+; CHECK-LABEL: test_vfeezh: -+; CHECK: vfeezh %v24, %v24, %v26 -+; CHECK: br %r14 -+ %res = call <8 x i16> @llvm.s390.vfeezh(<8 x i16> %a, <8 x i16> %b) -+ ret <8 x i16> %res -+} -+ -+; VFEEZF. -+define <4 x i32> @test_vfeezf(<4 x i32> %a, <4 x i32> %b) { -+; CHECK-LABEL: test_vfeezf: -+; CHECK: vfeezf %v24, %v24, %v26 -+; CHECK: br %r14 -+ %res = call <4 x i32> @llvm.s390.vfeezf(<4 x i32> %a, <4 x i32> %b) -+ ret <4 x i32> %res -+} -+ -+; VFEEZBS. -+define <16 x i8> @test_vfeezbs(<16 x i8> %a, <16 x i8> %b, i32 *%ccptr) { -+; CHECK-LABEL: test_vfeezbs: -+; CHECK: vfeezbs %v24, %v24, %v26 -+; CHECK: ipm [[REG:%r[0-5]]] -+; CHECK: srl [[REG]], 28 -+; CHECK: st [[REG]], 0(%r2) -+; CHECK: br %r14 -+ %call = call {<16 x i8>, i32} @llvm.s390.vfeezbs(<16 x i8> %a, <16 x i8> %b) -+ %res = extractvalue {<16 x i8>, i32} %call, 0 -+ %cc = extractvalue {<16 x i8>, i32} %call, 1 -+ store i32 %cc, i32 *%ccptr -+ ret <16 x i8> %res -+} -+ -+; VFEEZHS. -+define <8 x i16> @test_vfeezhs(<8 x i16> %a, <8 x i16> %b, i32 *%ccptr) { -+; CHECK-LABEL: test_vfeezhs: -+; CHECK: vfeezhs %v24, %v24, %v26 -+; CHECK: ipm [[REG:%r[0-5]]] -+; CHECK: srl [[REG]], 28 -+; CHECK: st [[REG]], 0(%r2) -+; CHECK: br %r14 -+ %call = call {<8 x i16>, i32} @llvm.s390.vfeezhs(<8 x i16> %a, <8 x i16> %b) -+ %res = extractvalue {<8 x i16>, i32} %call, 0 -+ %cc = extractvalue {<8 x i16>, i32} %call, 1 -+ store i32 %cc, i32 *%ccptr -+ ret <8 x i16> %res -+} -+ -+; VFEEZFS. -+define <4 x i32> @test_vfeezfs(<4 x i32> %a, <4 x i32> %b, i32 *%ccptr) { -+; CHECK-LABEL: test_vfeezfs: -+; CHECK: vfeezfs %v24, %v24, %v26 -+; CHECK: ipm [[REG:%r[0-5]]] -+; CHECK: srl [[REG]], 28 -+; CHECK: st [[REG]], 0(%r2) -+; CHECK: br %r14 -+ %call = call {<4 x i32>, i32} @llvm.s390.vfeezfs(<4 x i32> %a, <4 x i32> %b) -+ %res = extractvalue {<4 x i32>, i32} %call, 0 -+ %cc = extractvalue {<4 x i32>, i32} %call, 1 -+ store i32 %cc, i32 *%ccptr -+ ret <4 x i32> %res -+} -+ -+; VFENEB. -+define <16 x i8> @test_vfeneb_0(<16 x i8> %a, <16 x i8> %b) { -+; CHECK-LABEL: test_vfeneb_0: -+; CHECK: vfeneb %v24, %v24, %v26 -+; CHECK: br %r14 -+ %res = call <16 x i8> @llvm.s390.vfeneb(<16 x i8> %a, <16 x i8> %b) -+ ret <16 x i8> %res -+} -+ -+; VFENEH. -+define <8 x i16> @test_vfeneh(<8 x i16> %a, <8 x i16> %b) { -+; CHECK-LABEL: test_vfeneh: -+; CHECK: vfeneh %v24, %v24, %v26 -+; CHECK: br %r14 -+ %res = call <8 x i16> @llvm.s390.vfeneh(<8 x i16> %a, <8 x i16> %b) -+ ret <8 x i16> %res -+} -+ -+; VFENEF. -+define <4 x i32> @test_vfenef(<4 x i32> %a, <4 x i32> %b) { -+; CHECK-LABEL: test_vfenef: -+; CHECK: vfenef %v24, %v24, %v26 -+; CHECK: br %r14 -+ %res = call <4 x i32> @llvm.s390.vfenef(<4 x i32> %a, <4 x i32> %b) -+ ret <4 x i32> %res -+} -+ -+; VFENEBS. -+define <16 x i8> @test_vfenebs(<16 x i8> %a, <16 x i8> %b, i32 *%ccptr) { -+; CHECK-LABEL: test_vfenebs: -+; CHECK: vfenebs %v24, %v24, %v26 -+; CHECK: ipm [[REG:%r[0-5]]] -+; CHECK: srl [[REG]], 28 -+; CHECK: st [[REG]], 0(%r2) -+; CHECK: br %r14 -+ %call = call {<16 x i8>, i32} @llvm.s390.vfenebs(<16 x i8> %a, <16 x i8> %b) -+ %res = extractvalue {<16 x i8>, i32} %call, 0 -+ %cc = extractvalue {<16 x i8>, i32} %call, 1 -+ store i32 %cc, i32 *%ccptr -+ ret <16 x i8> %res -+} -+ -+; VFENEHS. -+define <8 x i16> @test_vfenehs(<8 x i16> %a, <8 x i16> %b, i32 *%ccptr) { -+; CHECK-LABEL: test_vfenehs: -+; CHECK: vfenehs %v24, %v24, %v26 -+; CHECK: ipm [[REG:%r[0-5]]] -+; CHECK: srl [[REG]], 28 -+; CHECK: st [[REG]], 0(%r2) -+; CHECK: br %r14 -+ %call = call {<8 x i16>, i32} @llvm.s390.vfenehs(<8 x i16> %a, <8 x i16> %b) -+ %res = extractvalue {<8 x i16>, i32} %call, 0 -+ %cc = extractvalue {<8 x i16>, i32} %call, 1 -+ store i32 %cc, i32 *%ccptr -+ ret <8 x i16> %res -+} -+ -+; VFENEFS. -+define <4 x i32> @test_vfenefs(<4 x i32> %a, <4 x i32> %b, i32 *%ccptr) { -+; CHECK-LABEL: test_vfenefs: -+; CHECK: vfenefs %v24, %v24, %v26 -+; CHECK: ipm [[REG:%r[0-5]]] -+; CHECK: srl [[REG]], 28 -+; CHECK: st [[REG]], 0(%r2) -+; CHECK: br %r14 -+ %call = call {<4 x i32>, i32} @llvm.s390.vfenefs(<4 x i32> %a, <4 x i32> %b) -+ %res = extractvalue {<4 x i32>, i32} %call, 0 -+ %cc = extractvalue {<4 x i32>, i32} %call, 1 -+ store i32 %cc, i32 *%ccptr -+ ret <4 x i32> %res -+} -+ -+; VFENEZB. -+define <16 x i8> @test_vfenezb(<16 x i8> %a, <16 x i8> %b) { -+; CHECK-LABEL: test_vfenezb: -+; CHECK: vfenezb %v24, %v24, %v26 -+; CHECK: br %r14 -+ %res = call <16 x i8> @llvm.s390.vfenezb(<16 x i8> %a, <16 x i8> %b) -+ ret <16 x i8> %res -+} -+ -+; VFENEZH. -+define <8 x i16> @test_vfenezh(<8 x i16> %a, <8 x i16> %b) { -+; CHECK-LABEL: test_vfenezh: -+; CHECK: vfenezh %v24, %v24, %v26 -+; CHECK: br %r14 -+ %res = call <8 x i16> @llvm.s390.vfenezh(<8 x i16> %a, <8 x i16> %b) -+ ret <8 x i16> %res -+} -+ -+; VFENEZF. -+define <4 x i32> @test_vfenezf(<4 x i32> %a, <4 x i32> %b) { -+; CHECK-LABEL: test_vfenezf: -+; CHECK: vfenezf %v24, %v24, %v26 -+; CHECK: br %r14 -+ %res = call <4 x i32> @llvm.s390.vfenezf(<4 x i32> %a, <4 x i32> %b) -+ ret <4 x i32> %res -+} -+ -+; VFENEZBS. -+define <16 x i8> @test_vfenezbs(<16 x i8> %a, <16 x i8> %b, i32 *%ccptr) { -+; CHECK-LABEL: test_vfenezbs: -+; CHECK: vfenezbs %v24, %v24, %v26 -+; CHECK: ipm [[REG:%r[0-5]]] -+; CHECK: srl [[REG]], 28 -+; CHECK: st [[REG]], 0(%r2) -+; CHECK: br %r14 -+ %call = call {<16 x i8>, i32} @llvm.s390.vfenezbs(<16 x i8> %a, <16 x i8> %b) -+ %res = extractvalue {<16 x i8>, i32} %call, 0 -+ %cc = extractvalue {<16 x i8>, i32} %call, 1 -+ store i32 %cc, i32 *%ccptr -+ ret <16 x i8> %res -+} -+ -+; VFENEZHS. -+define <8 x i16> @test_vfenezhs(<8 x i16> %a, <8 x i16> %b, i32 *%ccptr) { -+; CHECK-LABEL: test_vfenezhs: -+; CHECK: vfenezhs %v24, %v24, %v26 -+; CHECK: ipm [[REG:%r[0-5]]] -+; CHECK: srl [[REG]], 28 -+; CHECK: st [[REG]], 0(%r2) -+; CHECK: br %r14 -+ %call = call {<8 x i16>, i32} @llvm.s390.vfenezhs(<8 x i16> %a, <8 x i16> %b) -+ %res = extractvalue {<8 x i16>, i32} %call, 0 -+ %cc = extractvalue {<8 x i16>, i32} %call, 1 -+ store i32 %cc, i32 *%ccptr -+ ret <8 x i16> %res -+} -+ -+; VFENEZFS. -+define <4 x i32> @test_vfenezfs(<4 x i32> %a, <4 x i32> %b, i32 *%ccptr) { -+; CHECK-LABEL: test_vfenezfs: -+; CHECK: vfenezfs %v24, %v24, %v26 -+; CHECK: ipm [[REG:%r[0-5]]] -+; CHECK: srl [[REG]], 28 -+; CHECK: st [[REG]], 0(%r2) -+; CHECK: br %r14 -+ %call = call {<4 x i32>, i32} @llvm.s390.vfenezfs(<4 x i32> %a, <4 x i32> %b) -+ %res = extractvalue {<4 x i32>, i32} %call, 0 -+ %cc = extractvalue {<4 x i32>, i32} %call, 1 -+ store i32 %cc, i32 *%ccptr -+ ret <4 x i32> %res -+} -+ -+; VISTRB. -+define <16 x i8> @test_vistrb(<16 x i8> %a) { -+; CHECK-LABEL: test_vistrb: -+; CHECK: vistrb %v24, %v24 -+; CHECK: br %r14 -+ %res = call <16 x i8> @llvm.s390.vistrb(<16 x i8> %a) -+ ret <16 x i8> %res -+} -+ -+; VISTRH. -+define <8 x i16> @test_vistrh(<8 x i16> %a) { -+; CHECK-LABEL: test_vistrh: -+; CHECK: vistrh %v24, %v24 -+; CHECK: br %r14 -+ %res = call <8 x i16> @llvm.s390.vistrh(<8 x i16> %a) -+ ret <8 x i16> %res -+} -+ -+; VISTRF. -+define <4 x i32> @test_vistrf(<4 x i32> %a) { -+; CHECK-LABEL: test_vistrf: -+; CHECK: vistrf %v24, %v24 -+; CHECK: br %r14 -+ %res = call <4 x i32> @llvm.s390.vistrf(<4 x i32> %a) -+ ret <4 x i32> %res -+} -+ -+; VISTRBS. -+define <16 x i8> @test_vistrbs(<16 x i8> %a, i32 *%ccptr) { -+; CHECK-LABEL: test_vistrbs: -+; CHECK: vistrbs %v24, %v24 -+; CHECK: ipm [[REG:%r[0-5]]] -+; CHECK: srl [[REG]], 28 -+; CHECK: st [[REG]], 0(%r2) -+; CHECK: br %r14 -+ %call = call {<16 x i8>, i32} @llvm.s390.vistrbs(<16 x i8> %a) -+ %res = extractvalue {<16 x i8>, i32} %call, 0 -+ %cc = extractvalue {<16 x i8>, i32} %call, 1 -+ store i32 %cc, i32 *%ccptr -+ ret <16 x i8> %res -+} -+ -+; VISTRHS. -+define <8 x i16> @test_vistrhs(<8 x i16> %a, i32 *%ccptr) { -+; CHECK-LABEL: test_vistrhs: -+; CHECK: vistrhs %v24, %v24 -+; CHECK: ipm [[REG:%r[0-5]]] -+; CHECK: srl [[REG]], 28 -+; CHECK: st [[REG]], 0(%r2) -+; CHECK: br %r14 -+ %call = call {<8 x i16>, i32} @llvm.s390.vistrhs(<8 x i16> %a) -+ %res = extractvalue {<8 x i16>, i32} %call, 0 -+ %cc = extractvalue {<8 x i16>, i32} %call, 1 -+ store i32 %cc, i32 *%ccptr -+ ret <8 x i16> %res -+} -+ -+; VISTRFS. -+define <4 x i32> @test_vistrfs(<4 x i32> %a, i32 *%ccptr) { -+; CHECK-LABEL: test_vistrfs: -+; CHECK: vistrfs %v24, %v24 -+; CHECK: ipm [[REG:%r[0-5]]] -+; CHECK: srl [[REG]], 28 -+; CHECK: st [[REG]], 0(%r2) -+; CHECK: br %r14 -+ %call = call {<4 x i32>, i32} @llvm.s390.vistrfs(<4 x i32> %a) -+ %res = extractvalue {<4 x i32>, i32} %call, 0 -+ %cc = extractvalue {<4 x i32>, i32} %call, 1 -+ store i32 %cc, i32 *%ccptr -+ ret <4 x i32> %res -+} -+ -+; VSTRCB with !IN !RT. -+define <16 x i8> @test_vstrcb_0(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) { -+; CHECK-LABEL: test_vstrcb_0: -+; CHECK: vstrcb %v24, %v24, %v26, %v28, 0 -+; CHECK: br %r14 -+ %res = call <16 x i8> @llvm.s390.vstrcb(<16 x i8> %a, <16 x i8> %b, -+ <16 x i8> %c, i32 0) -+ ret <16 x i8> %res -+} -+ -+; VSTRCB with !IN RT. -+define <16 x i8> @test_vstrcb_4(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) { -+; CHECK-LABEL: test_vstrcb_4: -+; CHECK: vstrcb %v24, %v24, %v26, %v28, 4 -+; CHECK: br %r14 -+ %res = call <16 x i8> @llvm.s390.vstrcb(<16 x i8> %a, <16 x i8> %b, -+ <16 x i8> %c, i32 4) -+ ret <16 x i8> %res -+} -+ -+; VSTRCB with IN !RT. -+define <16 x i8> @test_vstrcb_8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) { -+; CHECK-LABEL: test_vstrcb_8: -+; CHECK: vstrcb %v24, %v24, %v26, %v28, 8 -+; CHECK: br %r14 -+ %res = call <16 x i8> @llvm.s390.vstrcb(<16 x i8> %a, <16 x i8> %b, -+ <16 x i8> %c, i32 8) -+ ret <16 x i8> %res -+} -+ -+; VSTRCB with IN RT. -+define <16 x i8> @test_vstrcb_12(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) { -+; CHECK-LABEL: test_vstrcb_12: -+; CHECK: vstrcb %v24, %v24, %v26, %v28, 12 -+; CHECK: br %r14 -+ %res = call <16 x i8> @llvm.s390.vstrcb(<16 x i8> %a, <16 x i8> %b, -+ <16 x i8> %c, i32 12) -+ ret <16 x i8> %res -+} -+ -+; VSTRCB with CS -- should be ignored. -+define <16 x i8> @test_vstrcb_1(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) { -+; CHECK-LABEL: test_vstrcb_1: -+; CHECK: vstrcb %v24, %v24, %v26, %v28, 0 -+; CHECK: br %r14 -+ %res = call <16 x i8> @llvm.s390.vstrcb(<16 x i8> %a, <16 x i8> %b, -+ <16 x i8> %c, i32 1) -+ ret <16 x i8> %res -+} -+ -+; VSTRCH. -+define <8 x i16> @test_vstrch(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) { -+; CHECK-LABEL: test_vstrch: -+; CHECK: vstrch %v24, %v24, %v26, %v28, 4 -+; CHECK: br %r14 -+ %res = call <8 x i16> @llvm.s390.vstrch(<8 x i16> %a, <8 x i16> %b, -+ <8 x i16> %c, i32 4) -+ ret <8 x i16> %res -+} -+ -+; VSTRCF. -+define <4 x i32> @test_vstrcf(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { -+; CHECK-LABEL: test_vstrcf: -+; CHECK: vstrcf %v24, %v24, %v26, %v28, 8 -+; CHECK: br %r14 -+ %res = call <4 x i32> @llvm.s390.vstrcf(<4 x i32> %a, <4 x i32> %b, -+ <4 x i32> %c, i32 8) -+ ret <4 x i32> %res -+} -+ -+; VSTRCBS. -+define <16 x i8> @test_vstrcbs(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, -+ i32 *%ccptr) { -+; CHECK-LABEL: test_vstrcbs: -+; CHECK: vstrcbs %v24, %v24, %v26, %v28, 0 -+; CHECK: ipm [[REG:%r[0-5]]] -+; CHECK: srl [[REG]], 28 -+; CHECK: st [[REG]], 0(%r2) -+; CHECK: br %r14 -+ %call = call {<16 x i8>, i32} @llvm.s390.vstrcbs(<16 x i8> %a, <16 x i8> %b, -+ <16 x i8> %c, i32 0) -+ %res = extractvalue {<16 x i8>, i32} %call, 0 -+ %cc = extractvalue {<16 x i8>, i32} %call, 1 -+ store i32 %cc, i32 *%ccptr -+ ret <16 x i8> %res -+} -+ -+; VSTRCHS. -+define <8 x i16> @test_vstrchs(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c, -+ i32 *%ccptr) { -+; CHECK-LABEL: test_vstrchs: -+; CHECK: vstrchs %v24, %v24, %v26, %v28, 4 -+; CHECK: ipm [[REG:%r[0-5]]] -+; CHECK: srl [[REG]], 28 -+; CHECK: st [[REG]], 0(%r2) -+; CHECK: br %r14 -+ %call = call {<8 x i16>, i32} @llvm.s390.vstrchs(<8 x i16> %a, <8 x i16> %b, -+ <8 x i16> %c, i32 4) -+ %res = extractvalue {<8 x i16>, i32} %call, 0 -+ %cc = extractvalue {<8 x i16>, i32} %call, 1 -+ store i32 %cc, i32 *%ccptr -+ ret <8 x i16> %res -+} -+ -+; VSTRCFS. -+define <4 x i32> @test_vstrcfs(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, -+ i32 *%ccptr) { -+; CHECK-LABEL: test_vstrcfs: -+; CHECK: vstrcfs %v24, %v24, %v26, %v28, 8 -+; CHECK: ipm [[REG:%r[0-5]]] -+; CHECK: srl [[REG]], 28 -+; CHECK: st [[REG]], 0(%r2) -+; CHECK: br %r14 -+ %call = call {<4 x i32>, i32} @llvm.s390.vstrcfs(<4 x i32> %a, <4 x i32> %b, -+ <4 x i32> %c, i32 8) -+ %res = extractvalue {<4 x i32>, i32} %call, 0 -+ %cc = extractvalue {<4 x i32>, i32} %call, 1 -+ store i32 %cc, i32 *%ccptr -+ ret <4 x i32> %res -+} -+ -+; VSTRCZB with !IN !RT. -+define <16 x i8> @test_vstrczb_0(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) { -+; CHECK-LABEL: test_vstrczb_0: -+; CHECK: vstrczb %v24, %v24, %v26, %v28, 0 -+; CHECK: br %r14 -+ %res = call <16 x i8> @llvm.s390.vstrczb(<16 x i8> %a, <16 x i8> %b, -+ <16 x i8> %c, i32 0) -+ ret <16 x i8> %res -+} -+ -+; VSTRCZB with !IN RT. -+define <16 x i8> @test_vstrczb_4(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) { -+; CHECK-LABEL: test_vstrczb_4: -+; CHECK: vstrczb %v24, %v24, %v26, %v28, 4 -+; CHECK: br %r14 -+ %res = call <16 x i8> @llvm.s390.vstrczb(<16 x i8> %a, <16 x i8> %b, -+ <16 x i8> %c, i32 4) -+ ret <16 x i8> %res -+} -+ -+; VSTRCZB with IN !RT. -+define <16 x i8> @test_vstrczb_8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) { -+; CHECK-LABEL: test_vstrczb_8: -+; CHECK: vstrczb %v24, %v24, %v26, %v28, 8 -+; CHECK: br %r14 -+ %res = call <16 x i8> @llvm.s390.vstrczb(<16 x i8> %a, <16 x i8> %b, -+ <16 x i8> %c, i32 8) -+ ret <16 x i8> %res -+} -+ -+; VSTRCZB with IN RT. -+define <16 x i8> @test_vstrczb_12(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) { -+; CHECK-LABEL: test_vstrczb_12: -+; CHECK: vstrczb %v24, %v24, %v26, %v28, 12 -+; CHECK: br %r14 -+ %res = call <16 x i8> @llvm.s390.vstrczb(<16 x i8> %a, <16 x i8> %b, -+ <16 x i8> %c, i32 12) -+ ret <16 x i8> %res -+} -+ -+; VSTRCZB with CS -- should be ignored. -+define <16 x i8> @test_vstrczb_1(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) { -+; CHECK-LABEL: test_vstrczb_1: -+; CHECK: vstrczb %v24, %v24, %v26, %v28, 0 -+; CHECK: br %r14 -+ %res = call <16 x i8> @llvm.s390.vstrczb(<16 x i8> %a, <16 x i8> %b, -+ <16 x i8> %c, i32 1) -+ ret <16 x i8> %res -+} -+ -+; VSTRCZH. -+define <8 x i16> @test_vstrczh(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) { -+; CHECK-LABEL: test_vstrczh: -+; CHECK: vstrczh %v24, %v24, %v26, %v28, 4 -+; CHECK: br %r14 -+ %res = call <8 x i16> @llvm.s390.vstrczh(<8 x i16> %a, <8 x i16> %b, -+ <8 x i16> %c, i32 4) -+ ret <8 x i16> %res -+} -+ -+; VSTRCZF. -+define <4 x i32> @test_vstrczf(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { -+; CHECK-LABEL: test_vstrczf: -+; CHECK: vstrczf %v24, %v24, %v26, %v28, 8 -+; CHECK: br %r14 -+ %res = call <4 x i32> @llvm.s390.vstrczf(<4 x i32> %a, <4 x i32> %b, -+ <4 x i32> %c, i32 8) -+ ret <4 x i32> %res -+} -+ -+; VSTRCZBS. -+define <16 x i8> @test_vstrczbs(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, -+ i32 *%ccptr) { -+; CHECK-LABEL: test_vstrczbs: -+; CHECK: vstrczbs %v24, %v24, %v26, %v28, 0 -+; CHECK: ipm [[REG:%r[0-5]]] -+; CHECK: srl [[REG]], 28 -+; CHECK: st [[REG]], 0(%r2) -+; CHECK: br %r14 -+ %call = call {<16 x i8>, i32} @llvm.s390.vstrczbs(<16 x i8> %a, <16 x i8> %b, -+ <16 x i8> %c, i32 0) -+ %res = extractvalue {<16 x i8>, i32} %call, 0 -+ %cc = extractvalue {<16 x i8>, i32} %call, 1 -+ store i32 %cc, i32 *%ccptr -+ ret <16 x i8> %res -+} -+ -+; VSTRCZHS. -+define <8 x i16> @test_vstrczhs(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c, -+ i32 *%ccptr) { -+; CHECK-LABEL: test_vstrczhs: -+; CHECK: vstrczhs %v24, %v24, %v26, %v28, 4 -+; CHECK: ipm [[REG:%r[0-5]]] -+; CHECK: srl [[REG]], 28 -+; CHECK: st [[REG]], 0(%r2) -+; CHECK: br %r14 -+ %call = call {<8 x i16>, i32} @llvm.s390.vstrczhs(<8 x i16> %a, <8 x i16> %b, -+ <8 x i16> %c, i32 4) -+ %res = extractvalue {<8 x i16>, i32} %call, 0 -+ %cc = extractvalue {<8 x i16>, i32} %call, 1 -+ store i32 %cc, i32 *%ccptr -+ ret <8 x i16> %res -+} -+ -+; VSTRCZFS. -+define <4 x i32> @test_vstrczfs(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, -+ i32 *%ccptr) { -+; CHECK-LABEL: test_vstrczfs: -+; CHECK: vstrczfs %v24, %v24, %v26, %v28, 8 -+; CHECK: ipm [[REG:%r[0-5]]] -+; CHECK: srl [[REG]], 28 -+; CHECK: st [[REG]], 0(%r2) -+; CHECK: br %r14 -+ %call = call {<4 x i32>, i32} @llvm.s390.vstrczfs(<4 x i32> %a, <4 x i32> %b, -+ <4 x i32> %c, i32 8) -+ %res = extractvalue {<4 x i32>, i32} %call, 0 -+ %cc = extractvalue {<4 x i32>, i32} %call, 1 -+ store i32 %cc, i32 *%ccptr -+ ret <4 x i32> %res -+} -+ -+; VFCEDBS with no processing of the result. -+define i32 @test_vfcedbs(<2 x double> %a, <2 x double> %b) { -+; CHECK-LABEL: test_vfcedbs: -+; CHECK: vfcedbs {{%v[0-9]+}}, %v24, %v26 -+; CHECK: ipm %r2 -+; CHECK: srl %r2, 28 -+; CHECK: br %r14 -+ %call = call {<2 x i64>, i32} @llvm.s390.vfcedbs(<2 x double> %a, -+ <2 x double> %b) -+ %res = extractvalue {<2 x i64>, i32} %call, 1 -+ ret i32 %res -+} -+ -+; VFCEDBS, returning 1 if any elements are equal (CC != 3). -+define i32 @test_vfcedbs_any_bool(<2 x double> %a, <2 x double> %b) { -+; CHECK-LABEL: test_vfcedbs_any_bool: -+; CHECK: vfcedbs {{%v[0-9]+}}, %v24, %v26 -+; CHECK: ipm %r2 -+; CHECK: afi %r2, -536870912 -+; CHECK: srl %r2, 31 -+; CHECK: br %r14 -+ %call = call {<2 x i64>, i32} @llvm.s390.vfcedbs(<2 x double> %a, -+ <2 x double> %b) -+ %res = extractvalue {<2 x i64>, i32} %call, 1 -+ %cmp = icmp ne i32 %res, 3 -+ %ext = zext i1 %cmp to i32 -+ ret i32 %ext -+} -+ -+; VFCEDBS, storing to %ptr if any elements are equal. -+define <2 x i64> @test_vfcedbs_any_store(<2 x double> %a, <2 x double> %b, -+ i32 *%ptr) { -+; CHECK-LABEL: test_vfcedbs_any_store: -+; CHECK-NOT: %r -+; CHECK: vfcedbs %v24, %v24, %v26 -+; CHECK-NEXT: {{jo|jnle}} {{\.L*}} -+; CHECK: mvhi 0(%r2), 0 -+; CHECK: br %r14 -+ %call = call {<2 x i64>, i32} @llvm.s390.vfcedbs(<2 x double> %a, -+ <2 x double> %b) -+ %res = extractvalue {<2 x i64>, i32} %call, 0 -+ %cc = extractvalue {<2 x i64>, i32} %call, 1 -+ %cmp = icmp ule i32 %cc, 2 -+ br i1 %cmp, label %store, label %exit -+ -+store: -+ store i32 0, i32 *%ptr -+ br label %exit -+ -+exit: -+ ret <2 x i64> %res -+} -+ -+; VFCHDBS with no processing of the result. -+define i32 @test_vfchdbs(<2 x double> %a, <2 x double> %b) { -+; CHECK-LABEL: test_vfchdbs: -+; CHECK: vfchdbs {{%v[0-9]+}}, %v24, %v26 -+; CHECK: ipm %r2 -+; CHECK: srl %r2, 28 -+; CHECK: br %r14 -+ %call = call {<2 x i64>, i32} @llvm.s390.vfchdbs(<2 x double> %a, -+ <2 x double> %b) -+ %res = extractvalue {<2 x i64>, i32} %call, 1 -+ ret i32 %res -+} -+ -+; VFCHDBS, returning 1 if not all elements are higher. -+define i32 @test_vfchdbs_notall_bool(<2 x double> %a, <2 x double> %b) { -+; CHECK-LABEL: test_vfchdbs_notall_bool: -+; CHECK: vfchdbs {{%v[0-9]+}}, %v24, %v26 -+; CHECK: ipm [[REG:%r[0-5]]] -+; CHECK: risblg %r2, [[REG]], 31, 159, 36 -+; CHECK: br %r14 -+ %call = call {<2 x i64>, i32} @llvm.s390.vfchdbs(<2 x double> %a, -+ <2 x double> %b) -+ %res = extractvalue {<2 x i64>, i32} %call, 1 -+ %cmp = icmp sge i32 %res, 1 -+ %ext = zext i1 %cmp to i32 -+ ret i32 %ext -+} -+ -+; VFCHDBS, storing to %ptr if not all elements are higher. -+define <2 x i64> @test_vfchdbs_notall_store(<2 x double> %a, <2 x double> %b, -+ i32 *%ptr) { -+; CHECK-LABEL: test_vfchdbs_notall_store: -+; CHECK-NOT: %r -+; CHECK: vfchdbs %v24, %v24, %v26 -+; CHECK-NEXT: {{jhe|je}} {{\.L*}} -+; CHECK: mvhi 0(%r2), 0 -+; CHECK: br %r14 -+ %call = call {<2 x i64>, i32} @llvm.s390.vfchdbs(<2 x double> %a, -+ <2 x double> %b) -+ %res = extractvalue {<2 x i64>, i32} %call, 0 -+ %cc = extractvalue {<2 x i64>, i32} %call, 1 -+ %cmp = icmp ugt i32 %cc, 0 -+ br i1 %cmp, label %store, label %exit -+ -+store: -+ store i32 0, i32 *%ptr -+ br label %exit -+ -+exit: -+ ret <2 x i64> %res -+} -+ -+; VFCHEDBS with no processing of the result. -+define i32 @test_vfchedbs(<2 x double> %a, <2 x double> %b) { -+; CHECK-LABEL: test_vfchedbs: -+; CHECK: vfchedbs {{%v[0-9]+}}, %v24, %v26 -+; CHECK: ipm %r2 -+; CHECK: srl %r2, 28 -+; CHECK: br %r14 -+ %call = call {<2 x i64>, i32} @llvm.s390.vfchedbs(<2 x double> %a, -+ <2 x double> %b) -+ %res = extractvalue {<2 x i64>, i32} %call, 1 -+ ret i32 %res -+} -+ -+; VFCHEDBS, returning 1 if neither element is higher or equal. -+define i32 @test_vfchedbs_none_bool(<2 x double> %a, <2 x double> %b) { -+; CHECK-LABEL: test_vfchedbs_none_bool: -+; CHECK: vfchedbs {{%v[0-9]+}}, %v24, %v26 -+; CHECK: ipm [[REG:%r[0-5]]] -+; CHECK: risblg %r2, [[REG]], 31, 159, 35 -+; CHECK: br %r14 -+ %call = call {<2 x i64>, i32} @llvm.s390.vfchedbs(<2 x double> %a, -+ <2 x double> %b) -+ %res = extractvalue {<2 x i64>, i32} %call, 1 -+ %cmp = icmp eq i32 %res, 3 -+ %ext = zext i1 %cmp to i32 -+ ret i32 %ext -+} -+ -+; VFCHEDBS, storing to %ptr if neither element is higher or equal. -+define <2 x i64> @test_vfchedbs_none_store(<2 x double> %a, <2 x double> %b, -+ i32 *%ptr) { -+; CHECK-LABEL: test_vfchedbs_none_store: -+; CHECK-NOT: %r -+; CHECK: vfchedbs %v24, %v24, %v26 -+; CHECK-NEXT: {{jno|jle}} {{\.L*}} -+; CHECK: mvhi 0(%r2), 0 -+; CHECK: br %r14 -+ %call = call {<2 x i64>, i32} @llvm.s390.vfchedbs(<2 x double> %a, -+ <2 x double> %b) -+ %res = extractvalue {<2 x i64>, i32} %call, 0 -+ %cc = extractvalue {<2 x i64>, i32} %call, 1 -+ %cmp = icmp uge i32 %cc, 3 -+ br i1 %cmp, label %store, label %exit -+ -+store: -+ store i32 0, i32 *%ptr -+ br label %exit -+ -+exit: -+ ret <2 x i64> %res -+} -+ -+; VFTCIDB with the lowest useful class selector and no processing of the result. -+define i32 @test_vftcidb(<2 x double> %a) { -+; CHECK-LABEL: test_vftcidb: -+; CHECK: vftcidb {{%v[0-9]+}}, %v24, 1 -+; CHECK: ipm %r2 -+; CHECK: srl %r2, 28 -+; CHECK: br %r14 -+ %call = call {<2 x i64>, i32} @llvm.s390.vftcidb(<2 x double> %a, i32 1) -+ %res = extractvalue {<2 x i64>, i32} %call, 1 -+ ret i32 %res -+} -+ -+; VFTCIDB with the highest useful class selector, returning 1 if all elements -+; have the right class (CC == 0). -+define i32 @test_vftcidb_all_bool(<2 x double> %a) { -+; CHECK-LABEL: test_vftcidb_all_bool: -+; CHECK: vftcidb {{%v[0-9]+}}, %v24, 4094 -+; CHECK: afi %r2, -268435456 -+; CHECK: srl %r2, 31 -+; CHECK: br %r14 -+ %call = call {<2 x i64>, i32} @llvm.s390.vftcidb(<2 x double> %a, i32 4094) -+ %res = extractvalue {<2 x i64>, i32} %call, 1 -+ %cmp = icmp eq i32 %res, 0 -+ %ext = zext i1 %cmp to i32 -+ ret i32 %ext -+} -+ -+; VFIDB with a rounding mode not usable via standard intrinsics. -+define <2 x double> @test_vfidb_0_4(<2 x double> %a) { -+; CHECK-LABEL: test_vfidb_0_4: -+; CHECK: vfidb %v24, %v24, 0, 4 -+; CHECK: br %r14 -+ %res = call <2 x double> @llvm.s390.vfidb(<2 x double> %a, i32 0, i32 4) -+ ret <2 x double> %res -+} -+ -+; VFIDB with IEEE-inexact exception suppressed. -+define <2 x double> @test_vfidb_4_0(<2 x double> %a) { -+; CHECK-LABEL: test_vfidb_4_0: -+; CHECK: vfidb %v24, %v24, 4, 0 -+; CHECK: br %r14 -+ %res = call <2 x double> @llvm.s390.vfidb(<2 x double> %a, i32 4, i32 0) -+ ret <2 x double> %res -+} -+ -Index: llvm-36/test/CodeGen/SystemZ/vec-log-01.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-log-01.ll -@@ -0,0 +1,15 @@ -+; Test v2f64 logarithm. -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -+ -+declare <2 x double> @llvm.log.v2f64(<2 x double>) -+ -+define <2 x double> @f1(<2 x double> %val) { -+; CHECK-LABEL: f1: -+; CHECK: brasl %r14, log@PLT -+; CHECK: brasl %r14, log@PLT -+; CHECK: vmrhg %v24, -+; CHECK: br %r14 -+ %ret = call <2 x double> @llvm.log.v2f64(<2 x double> %val) -+ ret <2 x double> %ret -+} -Index: llvm-36/test/CodeGen/SystemZ/vec-max-01.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-max-01.ll -@@ -0,0 +1,83 @@ -+; Test v16i8 maximum. -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -+ -+; Test with slt. -+define <16 x i8> @f1(<16 x i8> %val1, <16 x i8> %val2) { -+; CHECK-LABEL: f1: -+; CHECK: vmxb %v24, {{%v24, %v26|%v26, %v24}} -+; CHECK: br %r14 -+ %cmp = icmp slt <16 x i8> %val1, %val2 -+ %ret = select <16 x i1> %cmp, <16 x i8> %val2, <16 x i8> %val1 -+ ret <16 x i8> %ret -+} -+ -+; Test with sle. -+define <16 x i8> @f2(<16 x i8> %val1, <16 x i8> %val2) { -+; CHECK-LABEL: f2: -+; CHECK: vmxb %v24, {{%v24, %v26|%v26, %v24}} -+; CHECK: br %r14 -+ %cmp = icmp sle <16 x i8> %val1, %val2 -+ %ret = select <16 x i1> %cmp, <16 x i8> %val2, <16 x i8> %val1 -+ ret <16 x i8> %ret -+} -+ -+; Test with sgt. -+define <16 x i8> @f3(<16 x i8> %val1, <16 x i8> %val2) { -+; CHECK-LABEL: f3: -+; CHECK: vmxb %v24, {{%v24, %v26|%v26, %v24}} -+; CHECK: br %r14 -+ %cmp = icmp sgt <16 x i8> %val1, %val2 -+ %ret = select <16 x i1> %cmp, <16 x i8> %val1, <16 x i8> %val2 -+ ret <16 x i8> %ret -+} -+ -+; Test with sge. -+define <16 x i8> @f4(<16 x i8> %val1, <16 x i8> %val2) { -+; CHECK-LABEL: f4: -+; CHECK: vmxb %v24, {{%v24, %v26|%v26, %v24}} -+; CHECK: br %r14 -+ %cmp = icmp sge <16 x i8> %val1, %val2 -+ %ret = select <16 x i1> %cmp, <16 x i8> %val1, <16 x i8> %val2 -+ ret <16 x i8> %ret -+} -+ -+; Test with ult. -+define <16 x i8> @f5(<16 x i8> %val1, <16 x i8> %val2) { -+; CHECK-LABEL: f5: -+; CHECK: vmxlb %v24, {{%v24, %v26|%v26, %v24}} -+; CHECK: br %r14 -+ %cmp = icmp ult <16 x i8> %val1, %val2 -+ %ret = select <16 x i1> %cmp, <16 x i8> %val2, <16 x i8> %val1 -+ ret <16 x i8> %ret -+} -+ -+; Test with ule. -+define <16 x i8> @f6(<16 x i8> %val1, <16 x i8> %val2) { -+; CHECK-LABEL: f6: -+; CHECK: vmxlb %v24, {{%v24, %v26|%v26, %v24}} -+; CHECK: br %r14 -+ %cmp = icmp ule <16 x i8> %val1, %val2 -+ %ret = select <16 x i1> %cmp, <16 x i8> %val2, <16 x i8> %val1 -+ ret <16 x i8> %ret -+} -+ -+; Test with ugt. -+define <16 x i8> @f7(<16 x i8> %val1, <16 x i8> %val2) { -+; CHECK-LABEL: f7: -+; CHECK: vmxlb %v24, {{%v24, %v26|%v26, %v24}} -+; CHECK: br %r14 -+ %cmp = icmp ugt <16 x i8> %val1, %val2 -+ %ret = select <16 x i1> %cmp, <16 x i8> %val1, <16 x i8> %val2 -+ ret <16 x i8> %ret -+} -+ -+; Test with uge. -+define <16 x i8> @f8(<16 x i8> %val1, <16 x i8> %val2) { -+; CHECK-LABEL: f8: -+; CHECK: vmxlb %v24, {{%v24, %v26|%v26, %v24}} -+; CHECK: br %r14 -+ %cmp = icmp uge <16 x i8> %val1, %val2 -+ %ret = select <16 x i1> %cmp, <16 x i8> %val1, <16 x i8> %val2 -+ ret <16 x i8> %ret -+} -Index: llvm-36/test/CodeGen/SystemZ/vec-max-02.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-max-02.ll -@@ -0,0 +1,83 @@ -+; Test v8i16 maximum. -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -+ -+; Test with slt. -+define <8 x i16> @f1(<8 x i16> %val1, <8 x i16> %val2) { -+; CHECK-LABEL: f1: -+; CHECK: vmxh %v24, {{%v24, %v26|%v26, %v24}} -+; CHECK: br %r14 -+ %cmp = icmp slt <8 x i16> %val1, %val2 -+ %ret = select <8 x i1> %cmp, <8 x i16> %val2, <8 x i16> %val1 -+ ret <8 x i16> %ret -+} -+ -+; Test with sle. -+define <8 x i16> @f2(<8 x i16> %val1, <8 x i16> %val2) { -+; CHECK-LABEL: f2: -+; CHECK: vmxh %v24, {{%v24, %v26|%v26, %v24}} -+; CHECK: br %r14 -+ %cmp = icmp sle <8 x i16> %val1, %val2 -+ %ret = select <8 x i1> %cmp, <8 x i16> %val2, <8 x i16> %val1 -+ ret <8 x i16> %ret -+} -+ -+; Test with sgt. -+define <8 x i16> @f3(<8 x i16> %val1, <8 x i16> %val2) { -+; CHECK-LABEL: f3: -+; CHECK: vmxh %v24, {{%v24, %v26|%v26, %v24}} -+; CHECK: br %r14 -+ %cmp = icmp sgt <8 x i16> %val1, %val2 -+ %ret = select <8 x i1> %cmp, <8 x i16> %val1, <8 x i16> %val2 -+ ret <8 x i16> %ret -+} -+ -+; Test with sge. -+define <8 x i16> @f4(<8 x i16> %val1, <8 x i16> %val2) { -+; CHECK-LABEL: f4: -+; CHECK: vmxh %v24, {{%v24, %v26|%v26, %v24}} -+; CHECK: br %r14 -+ %cmp = icmp sge <8 x i16> %val1, %val2 -+ %ret = select <8 x i1> %cmp, <8 x i16> %val1, <8 x i16> %val2 -+ ret <8 x i16> %ret -+} -+ -+; Test with ult. -+define <8 x i16> @f5(<8 x i16> %val1, <8 x i16> %val2) { -+; CHECK-LABEL: f5: -+; CHECK: vmxlh %v24, {{%v24, %v26|%v26, %v24}} -+; CHECK: br %r14 -+ %cmp = icmp ult <8 x i16> %val1, %val2 -+ %ret = select <8 x i1> %cmp, <8 x i16> %val2, <8 x i16> %val1 -+ ret <8 x i16> %ret -+} -+ -+; Test with ule. -+define <8 x i16> @f6(<8 x i16> %val1, <8 x i16> %val2) { -+; CHECK-LABEL: f6: -+; CHECK: vmxlh %v24, {{%v24, %v26|%v26, %v24}} -+; CHECK: br %r14 -+ %cmp = icmp ule <8 x i16> %val1, %val2 -+ %ret = select <8 x i1> %cmp, <8 x i16> %val2, <8 x i16> %val1 -+ ret <8 x i16> %ret -+} -+ -+; Test with ugt. -+define <8 x i16> @f7(<8 x i16> %val1, <8 x i16> %val2) { -+; CHECK-LABEL: f7: -+; CHECK: vmxlh %v24, {{%v24, %v26|%v26, %v24}} -+; CHECK: br %r14 -+ %cmp = icmp ugt <8 x i16> %val1, %val2 -+ %ret = select <8 x i1> %cmp, <8 x i16> %val1, <8 x i16> %val2 -+ ret <8 x i16> %ret -+} -+ -+; Test with uge. -+define <8 x i16> @f8(<8 x i16> %val1, <8 x i16> %val2) { -+; CHECK-LABEL: f8: -+; CHECK: vmxlh %v24, {{%v24, %v26|%v26, %v24}} -+; CHECK: br %r14 -+ %cmp = icmp uge <8 x i16> %val1, %val2 -+ %ret = select <8 x i1> %cmp, <8 x i16> %val1, <8 x i16> %val2 -+ ret <8 x i16> %ret -+} -Index: llvm-36/test/CodeGen/SystemZ/vec-max-03.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-max-03.ll -@@ -0,0 +1,83 @@ -+; Test v4i32 maximum. -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -+ -+; Test with slt. -+define <4 x i32> @f1(<4 x i32> %val1, <4 x i32> %val2) { -+; CHECK-LABEL: f1: -+; CHECK: vmxf %v24, {{%v24, %v26|%v26, %v24}} -+; CHECK: br %r14 -+ %cmp = icmp slt <4 x i32> %val1, %val2 -+ %ret = select <4 x i1> %cmp, <4 x i32> %val2, <4 x i32> %val1 -+ ret <4 x i32> %ret -+} -+ -+; Test with sle. -+define <4 x i32> @f2(<4 x i32> %val1, <4 x i32> %val2) { -+; CHECK-LABEL: f2: -+; CHECK: vmxf %v24, {{%v24, %v26|%v26, %v24}} -+; CHECK: br %r14 -+ %cmp = icmp sle <4 x i32> %val1, %val2 -+ %ret = select <4 x i1> %cmp, <4 x i32> %val2, <4 x i32> %val1 -+ ret <4 x i32> %ret -+} -+ -+; Test with sgt. -+define <4 x i32> @f3(<4 x i32> %val1, <4 x i32> %val2) { -+; CHECK-LABEL: f3: -+; CHECK: vmxf %v24, {{%v24, %v26|%v26, %v24}} -+; CHECK: br %r14 -+ %cmp = icmp sgt <4 x i32> %val1, %val2 -+ %ret = select <4 x i1> %cmp, <4 x i32> %val1, <4 x i32> %val2 -+ ret <4 x i32> %ret -+} -+ -+; Test with sge. -+define <4 x i32> @f4(<4 x i32> %val1, <4 x i32> %val2) { -+; CHECK-LABEL: f4: -+; CHECK: vmxf %v24, {{%v24, %v26|%v26, %v24}} -+; CHECK: br %r14 -+ %cmp = icmp sge <4 x i32> %val1, %val2 -+ %ret = select <4 x i1> %cmp, <4 x i32> %val1, <4 x i32> %val2 -+ ret <4 x i32> %ret -+} -+ -+; Test with ult. -+define <4 x i32> @f5(<4 x i32> %val1, <4 x i32> %val2) { -+; CHECK-LABEL: f5: -+; CHECK: vmxlf %v24, {{%v24, %v26|%v26, %v24}} -+; CHECK: br %r14 -+ %cmp = icmp ult <4 x i32> %val1, %val2 -+ %ret = select <4 x i1> %cmp, <4 x i32> %val2, <4 x i32> %val1 -+ ret <4 x i32> %ret -+} -+ -+; Test with ule. -+define <4 x i32> @f6(<4 x i32> %val1, <4 x i32> %val2) { -+; CHECK-LABEL: f6: -+; CHECK: vmxlf %v24, {{%v24, %v26|%v26, %v24}} -+; CHECK: br %r14 -+ %cmp = icmp ule <4 x i32> %val1, %val2 -+ %ret = select <4 x i1> %cmp, <4 x i32> %val2, <4 x i32> %val1 -+ ret <4 x i32> %ret -+} -+ -+; Test with ugt. -+define <4 x i32> @f7(<4 x i32> %val1, <4 x i32> %val2) { -+; CHECK-LABEL: f7: -+; CHECK: vmxlf %v24, {{%v24, %v26|%v26, %v24}} -+; CHECK: br %r14 -+ %cmp = icmp ugt <4 x i32> %val1, %val2 -+ %ret = select <4 x i1> %cmp, <4 x i32> %val1, <4 x i32> %val2 -+ ret <4 x i32> %ret -+} -+ -+; Test with uge. -+define <4 x i32> @f8(<4 x i32> %val1, <4 x i32> %val2) { -+; CHECK-LABEL: f8: -+; CHECK: vmxlf %v24, {{%v24, %v26|%v26, %v24}} -+; CHECK: br %r14 -+ %cmp = icmp uge <4 x i32> %val1, %val2 -+ %ret = select <4 x i1> %cmp, <4 x i32> %val1, <4 x i32> %val2 -+ ret <4 x i32> %ret -+} -Index: llvm-36/test/CodeGen/SystemZ/vec-max-04.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-max-04.ll -@@ -0,0 +1,83 @@ -+; Test v2i64 maximum. -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -+ -+; Test with slt. -+define <2 x i64> @f1(<2 x i64> %val1, <2 x i64> %val2) { -+; CHECK-LABEL: f1: -+; CHECK: vmxg %v24, {{%v24, %v26|%v26, %v24}} -+; CHECK: br %r14 -+ %cmp = icmp slt <2 x i64> %val1, %val2 -+ %ret = select <2 x i1> %cmp, <2 x i64> %val2, <2 x i64> %val1 -+ ret <2 x i64> %ret -+} -+ -+; Test with sle. -+define <2 x i64> @f2(<2 x i64> %val1, <2 x i64> %val2) { -+; CHECK-LABEL: f2: -+; CHECK: vmxg %v24, {{%v24, %v26|%v26, %v24}} -+; CHECK: br %r14 -+ %cmp = icmp sle <2 x i64> %val1, %val2 -+ %ret = select <2 x i1> %cmp, <2 x i64> %val2, <2 x i64> %val1 -+ ret <2 x i64> %ret -+} -+ -+; Test with sgt. -+define <2 x i64> @f3(<2 x i64> %val1, <2 x i64> %val2) { -+; CHECK-LABEL: f3: -+; CHECK: vmxg %v24, {{%v24, %v26|%v26, %v24}} -+; CHECK: br %r14 -+ %cmp = icmp sgt <2 x i64> %val1, %val2 -+ %ret = select <2 x i1> %cmp, <2 x i64> %val1, <2 x i64> %val2 -+ ret <2 x i64> %ret -+} -+ -+; Test with sge. -+define <2 x i64> @f4(<2 x i64> %val1, <2 x i64> %val2) { -+; CHECK-LABEL: f4: -+; CHECK: vmxg %v24, {{%v24, %v26|%v26, %v24}} -+; CHECK: br %r14 -+ %cmp = icmp sge <2 x i64> %val1, %val2 -+ %ret = select <2 x i1> %cmp, <2 x i64> %val1, <2 x i64> %val2 -+ ret <2 x i64> %ret -+} -+ -+; Test with ult. -+define <2 x i64> @f5(<2 x i64> %val1, <2 x i64> %val2) { -+; CHECK-LABEL: f5: -+; CHECK: vmxlg %v24, {{%v24, %v26|%v26, %v24}} -+; CHECK: br %r14 -+ %cmp = icmp ult <2 x i64> %val1, %val2 -+ %ret = select <2 x i1> %cmp, <2 x i64> %val2, <2 x i64> %val1 -+ ret <2 x i64> %ret -+} -+ -+; Test with ule. -+define <2 x i64> @f6(<2 x i64> %val1, <2 x i64> %val2) { -+; CHECK-LABEL: f6: -+; CHECK: vmxlg %v24, {{%v24, %v26|%v26, %v24}} -+; CHECK: br %r14 -+ %cmp = icmp ule <2 x i64> %val1, %val2 -+ %ret = select <2 x i1> %cmp, <2 x i64> %val2, <2 x i64> %val1 -+ ret <2 x i64> %ret -+} -+ -+; Test with ugt. -+define <2 x i64> @f7(<2 x i64> %val1, <2 x i64> %val2) { -+; CHECK-LABEL: f7: -+; CHECK: vmxlg %v24, {{%v24, %v26|%v26, %v24}} -+; CHECK: br %r14 -+ %cmp = icmp ugt <2 x i64> %val1, %val2 -+ %ret = select <2 x i1> %cmp, <2 x i64> %val1, <2 x i64> %val2 -+ ret <2 x i64> %ret -+} -+ -+; Test with uge. -+define <2 x i64> @f8(<2 x i64> %val1, <2 x i64> %val2) { -+; CHECK-LABEL: f8: -+; CHECK: vmxlg %v24, {{%v24, %v26|%v26, %v24}} -+; CHECK: br %r14 -+ %cmp = icmp uge <2 x i64> %val1, %val2 -+ %ret = select <2 x i1> %cmp, <2 x i64> %val1, <2 x i64> %val2 -+ ret <2 x i64> %ret -+} -Index: llvm-36/test/CodeGen/SystemZ/vec-min-01.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-min-01.ll -@@ -0,0 +1,83 @@ -+; Test v16i8 minimum. -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -+ -+; Test with slt. -+define <16 x i8> @f1(<16 x i8> %val1, <16 x i8> %val2) { -+; CHECK-LABEL: f1: -+; CHECK: vmnb %v24, {{%v24, %v26|%v26, %v24}} -+; CHECK: br %r14 -+ %cmp = icmp slt <16 x i8> %val2, %val1 -+ %ret = select <16 x i1> %cmp, <16 x i8> %val2, <16 x i8> %val1 -+ ret <16 x i8> %ret -+} -+ -+; Test with sle. -+define <16 x i8> @f2(<16 x i8> %val1, <16 x i8> %val2) { -+; CHECK-LABEL: f2: -+; CHECK: vmnb %v24, {{%v24, %v26|%v26, %v24}} -+; CHECK: br %r14 -+ %cmp = icmp sle <16 x i8> %val2, %val1 -+ %ret = select <16 x i1> %cmp, <16 x i8> %val2, <16 x i8> %val1 -+ ret <16 x i8> %ret -+} -+ -+; Test with sgt. -+define <16 x i8> @f3(<16 x i8> %val1, <16 x i8> %val2) { -+; CHECK-LABEL: f3: -+; CHECK: vmnb %v24, {{%v24, %v26|%v26, %v24}} -+; CHECK: br %r14 -+ %cmp = icmp sgt <16 x i8> %val2, %val1 -+ %ret = select <16 x i1> %cmp, <16 x i8> %val1, <16 x i8> %val2 -+ ret <16 x i8> %ret -+} -+ -+; Test with sge. -+define <16 x i8> @f4(<16 x i8> %val1, <16 x i8> %val2) { -+; CHECK-LABEL: f4: -+; CHECK: vmnb %v24, {{%v24, %v26|%v26, %v24}} -+; CHECK: br %r14 -+ %cmp = icmp sge <16 x i8> %val2, %val1 -+ %ret = select <16 x i1> %cmp, <16 x i8> %val1, <16 x i8> %val2 -+ ret <16 x i8> %ret -+} -+ -+; Test with ult. -+define <16 x i8> @f5(<16 x i8> %val1, <16 x i8> %val2) { -+; CHECK-LABEL: f5: -+; CHECK: vmnlb %v24, {{%v24, %v26|%v26, %v24}} -+; CHECK: br %r14 -+ %cmp = icmp ult <16 x i8> %val2, %val1 -+ %ret = select <16 x i1> %cmp, <16 x i8> %val2, <16 x i8> %val1 -+ ret <16 x i8> %ret -+} -+ -+; Test with ule. -+define <16 x i8> @f6(<16 x i8> %val1, <16 x i8> %val2) { -+; CHECK-LABEL: f6: -+; CHECK: vmnlb %v24, {{%v24, %v26|%v26, %v24}} -+; CHECK: br %r14 -+ %cmp = icmp ule <16 x i8> %val2, %val1 -+ %ret = select <16 x i1> %cmp, <16 x i8> %val2, <16 x i8> %val1 -+ ret <16 x i8> %ret -+} -+ -+; Test with ugt. -+define <16 x i8> @f7(<16 x i8> %val1, <16 x i8> %val2) { -+; CHECK-LABEL: f7: -+; CHECK: vmnlb %v24, {{%v24, %v26|%v26, %v24}} -+; CHECK: br %r14 -+ %cmp = icmp ugt <16 x i8> %val2, %val1 -+ %ret = select <16 x i1> %cmp, <16 x i8> %val1, <16 x i8> %val2 -+ ret <16 x i8> %ret -+} -+ -+; Test with uge. -+define <16 x i8> @f8(<16 x i8> %val1, <16 x i8> %val2) { -+; CHECK-LABEL: f8: -+; CHECK: vmnlb %v24, {{%v24, %v26|%v26, %v24}} -+; CHECK: br %r14 -+ %cmp = icmp uge <16 x i8> %val2, %val1 -+ %ret = select <16 x i1> %cmp, <16 x i8> %val1, <16 x i8> %val2 -+ ret <16 x i8> %ret -+} -Index: llvm-36/test/CodeGen/SystemZ/vec-min-02.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-min-02.ll -@@ -0,0 +1,83 @@ -+; Test v8i16 minimum. -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -+ -+; Test with slt. -+define <8 x i16> @f1(<8 x i16> %val1, <8 x i16> %val2) { -+; CHECK-LABEL: f1: -+; CHECK: vmnh %v24, {{%v24, %v26|%v26, %v24}} -+; CHECK: br %r14 -+ %cmp = icmp slt <8 x i16> %val2, %val1 -+ %ret = select <8 x i1> %cmp, <8 x i16> %val2, <8 x i16> %val1 -+ ret <8 x i16> %ret -+} -+ -+; Test with sle. -+define <8 x i16> @f2(<8 x i16> %val1, <8 x i16> %val2) { -+; CHECK-LABEL: f2: -+; CHECK: vmnh %v24, {{%v24, %v26|%v26, %v24}} -+; CHECK: br %r14 -+ %cmp = icmp sle <8 x i16> %val2, %val1 -+ %ret = select <8 x i1> %cmp, <8 x i16> %val2, <8 x i16> %val1 -+ ret <8 x i16> %ret -+} -+ -+; Test with sgt. -+define <8 x i16> @f3(<8 x i16> %val1, <8 x i16> %val2) { -+; CHECK-LABEL: f3: -+; CHECK: vmnh %v24, {{%v24, %v26|%v26, %v24}} -+; CHECK: br %r14 -+ %cmp = icmp sgt <8 x i16> %val2, %val1 -+ %ret = select <8 x i1> %cmp, <8 x i16> %val1, <8 x i16> %val2 -+ ret <8 x i16> %ret -+} -+ -+; Test with sge. -+define <8 x i16> @f4(<8 x i16> %val1, <8 x i16> %val2) { -+; CHECK-LABEL: f4: -+; CHECK: vmnh %v24, {{%v24, %v26|%v26, %v24}} -+; CHECK: br %r14 -+ %cmp = icmp sge <8 x i16> %val2, %val1 -+ %ret = select <8 x i1> %cmp, <8 x i16> %val1, <8 x i16> %val2 -+ ret <8 x i16> %ret -+} -+ -+; Test with ult. -+define <8 x i16> @f5(<8 x i16> %val1, <8 x i16> %val2) { -+; CHECK-LABEL: f5: -+; CHECK: vmnlh %v24, {{%v24, %v26|%v26, %v24}} -+; CHECK: br %r14 -+ %cmp = icmp ult <8 x i16> %val2, %val1 -+ %ret = select <8 x i1> %cmp, <8 x i16> %val2, <8 x i16> %val1 -+ ret <8 x i16> %ret -+} -+ -+; Test with ule. -+define <8 x i16> @f6(<8 x i16> %val1, <8 x i16> %val2) { -+; CHECK-LABEL: f6: -+; CHECK: vmnlh %v24, {{%v24, %v26|%v26, %v24}} -+; CHECK: br %r14 -+ %cmp = icmp ule <8 x i16> %val2, %val1 -+ %ret = select <8 x i1> %cmp, <8 x i16> %val2, <8 x i16> %val1 -+ ret <8 x i16> %ret -+} -+ -+; Test with ugt. -+define <8 x i16> @f7(<8 x i16> %val1, <8 x i16> %val2) { -+; CHECK-LABEL: f7: -+; CHECK: vmnlh %v24, {{%v24, %v26|%v26, %v24}} -+; CHECK: br %r14 -+ %cmp = icmp ugt <8 x i16> %val2, %val1 -+ %ret = select <8 x i1> %cmp, <8 x i16> %val1, <8 x i16> %val2 -+ ret <8 x i16> %ret -+} -+ -+; Test with uge. -+define <8 x i16> @f8(<8 x i16> %val1, <8 x i16> %val2) { -+; CHECK-LABEL: f8: -+; CHECK: vmnlh %v24, {{%v24, %v26|%v26, %v24}} -+; CHECK: br %r14 -+ %cmp = icmp uge <8 x i16> %val2, %val1 -+ %ret = select <8 x i1> %cmp, <8 x i16> %val1, <8 x i16> %val2 -+ ret <8 x i16> %ret -+} -Index: llvm-36/test/CodeGen/SystemZ/vec-min-03.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-min-03.ll -@@ -0,0 +1,83 @@ -+; Test v4i32 minimum. -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -+ -+; Test with slt. -+define <4 x i32> @f1(<4 x i32> %val1, <4 x i32> %val2) { -+; CHECK-LABEL: f1: -+; CHECK: vmnf %v24, {{%v24, %v26|%v26, %v24}} -+; CHECK: br %r14 -+ %cmp = icmp slt <4 x i32> %val2, %val1 -+ %ret = select <4 x i1> %cmp, <4 x i32> %val2, <4 x i32> %val1 -+ ret <4 x i32> %ret -+} -+ -+; Test with sle. -+define <4 x i32> @f2(<4 x i32> %val1, <4 x i32> %val2) { -+; CHECK-LABEL: f2: -+; CHECK: vmnf %v24, {{%v24, %v26|%v26, %v24}} -+; CHECK: br %r14 -+ %cmp = icmp sle <4 x i32> %val2, %val1 -+ %ret = select <4 x i1> %cmp, <4 x i32> %val2, <4 x i32> %val1 -+ ret <4 x i32> %ret -+} -+ -+; Test with sgt. -+define <4 x i32> @f3(<4 x i32> %val1, <4 x i32> %val2) { -+; CHECK-LABEL: f3: -+; CHECK: vmnf %v24, {{%v24, %v26|%v26, %v24}} -+; CHECK: br %r14 -+ %cmp = icmp sgt <4 x i32> %val2, %val1 -+ %ret = select <4 x i1> %cmp, <4 x i32> %val1, <4 x i32> %val2 -+ ret <4 x i32> %ret -+} -+ -+; Test with sge. -+define <4 x i32> @f4(<4 x i32> %val1, <4 x i32> %val2) { -+; CHECK-LABEL: f4: -+; CHECK: vmnf %v24, {{%v24, %v26|%v26, %v24}} -+; CHECK: br %r14 -+ %cmp = icmp sge <4 x i32> %val2, %val1 -+ %ret = select <4 x i1> %cmp, <4 x i32> %val1, <4 x i32> %val2 -+ ret <4 x i32> %ret -+} -+ -+; Test with ult. -+define <4 x i32> @f5(<4 x i32> %val1, <4 x i32> %val2) { -+; CHECK-LABEL: f5: -+; CHECK: vmnlf %v24, {{%v24, %v26|%v26, %v24}} -+; CHECK: br %r14 -+ %cmp = icmp ult <4 x i32> %val2, %val1 -+ %ret = select <4 x i1> %cmp, <4 x i32> %val2, <4 x i32> %val1 -+ ret <4 x i32> %ret -+} -+ -+; Test with ule. -+define <4 x i32> @f6(<4 x i32> %val1, <4 x i32> %val2) { -+; CHECK-LABEL: f6: -+; CHECK: vmnlf %v24, {{%v24, %v26|%v26, %v24}} -+; CHECK: br %r14 -+ %cmp = icmp ule <4 x i32> %val2, %val1 -+ %ret = select <4 x i1> %cmp, <4 x i32> %val2, <4 x i32> %val1 -+ ret <4 x i32> %ret -+} -+ -+; Test with ugt. -+define <4 x i32> @f7(<4 x i32> %val1, <4 x i32> %val2) { -+; CHECK-LABEL: f7: -+; CHECK: vmnlf %v24, {{%v24, %v26|%v26, %v24}} -+; CHECK: br %r14 -+ %cmp = icmp ugt <4 x i32> %val2, %val1 -+ %ret = select <4 x i1> %cmp, <4 x i32> %val1, <4 x i32> %val2 -+ ret <4 x i32> %ret -+} -+ -+; Test with uge. -+define <4 x i32> @f8(<4 x i32> %val1, <4 x i32> %val2) { -+; CHECK-LABEL: f8: -+; CHECK: vmnlf %v24, {{%v24, %v26|%v26, %v24}} -+; CHECK: br %r14 -+ %cmp = icmp uge <4 x i32> %val2, %val1 -+ %ret = select <4 x i1> %cmp, <4 x i32> %val1, <4 x i32> %val2 -+ ret <4 x i32> %ret -+} -Index: llvm-36/test/CodeGen/SystemZ/vec-min-04.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-min-04.ll -@@ -0,0 +1,83 @@ -+; Test v2i64 minimum. -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -+ -+; Test with slt. -+define <2 x i64> @f1(<2 x i64> %val1, <2 x i64> %val2) { -+; CHECK-LABEL: f1: -+; CHECK: vmng %v24, {{%v24, %v26|%v26, %v24}} -+; CHECK: br %r14 -+ %cmp = icmp slt <2 x i64> %val2, %val1 -+ %ret = select <2 x i1> %cmp, <2 x i64> %val2, <2 x i64> %val1 -+ ret <2 x i64> %ret -+} -+ -+; Test with sle. -+define <2 x i64> @f2(<2 x i64> %val1, <2 x i64> %val2) { -+; CHECK-LABEL: f2: -+; CHECK: vmng %v24, {{%v24, %v26|%v26, %v24}} -+; CHECK: br %r14 -+ %cmp = icmp sle <2 x i64> %val2, %val1 -+ %ret = select <2 x i1> %cmp, <2 x i64> %val2, <2 x i64> %val1 -+ ret <2 x i64> %ret -+} -+ -+; Test with sgt. -+define <2 x i64> @f3(<2 x i64> %val1, <2 x i64> %val2) { -+; CHECK-LABEL: f3: -+; CHECK: vmng %v24, {{%v24, %v26|%v26, %v24}} -+; CHECK: br %r14 -+ %cmp = icmp sgt <2 x i64> %val2, %val1 -+ %ret = select <2 x i1> %cmp, <2 x i64> %val1, <2 x i64> %val2 -+ ret <2 x i64> %ret -+} -+ -+; Test with sge. -+define <2 x i64> @f4(<2 x i64> %val1, <2 x i64> %val2) { -+; CHECK-LABEL: f4: -+; CHECK: vmng %v24, {{%v24, %v26|%v26, %v24}} -+; CHECK: br %r14 -+ %cmp = icmp sge <2 x i64> %val2, %val1 -+ %ret = select <2 x i1> %cmp, <2 x i64> %val1, <2 x i64> %val2 -+ ret <2 x i64> %ret -+} -+ -+; Test with ult. -+define <2 x i64> @f5(<2 x i64> %val1, <2 x i64> %val2) { -+; CHECK-LABEL: f5: -+; CHECK: vmnlg %v24, {{%v24, %v26|%v26, %v24}} -+; CHECK: br %r14 -+ %cmp = icmp ult <2 x i64> %val2, %val1 -+ %ret = select <2 x i1> %cmp, <2 x i64> %val2, <2 x i64> %val1 -+ ret <2 x i64> %ret -+} -+ -+; Test with ule. -+define <2 x i64> @f6(<2 x i64> %val1, <2 x i64> %val2) { -+; CHECK-LABEL: f6: -+; CHECK: vmnlg %v24, {{%v24, %v26|%v26, %v24}} -+; CHECK: br %r14 -+ %cmp = icmp ule <2 x i64> %val2, %val1 -+ %ret = select <2 x i1> %cmp, <2 x i64> %val2, <2 x i64> %val1 -+ ret <2 x i64> %ret -+} -+ -+; Test with ugt. -+define <2 x i64> @f7(<2 x i64> %val1, <2 x i64> %val2) { -+; CHECK-LABEL: f7: -+; CHECK: vmnlg %v24, {{%v24, %v26|%v26, %v24}} -+; CHECK: br %r14 -+ %cmp = icmp ugt <2 x i64> %val2, %val1 -+ %ret = select <2 x i1> %cmp, <2 x i64> %val1, <2 x i64> %val2 -+ ret <2 x i64> %ret -+} -+ -+; Test with uge. -+define <2 x i64> @f8(<2 x i64> %val1, <2 x i64> %val2) { -+; CHECK-LABEL: f8: -+; CHECK: vmnlg %v24, {{%v24, %v26|%v26, %v24}} -+; CHECK: br %r14 -+ %cmp = icmp uge <2 x i64> %val2, %val1 -+ %ret = select <2 x i1> %cmp, <2 x i64> %val1, <2 x i64> %val2 -+ ret <2 x i64> %ret -+} -Index: llvm-36/test/CodeGen/SystemZ/vec-move-01.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-move-01.ll -@@ -0,0 +1,107 @@ -+; Test vector register moves. -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -+ -+; Test v16i8 moves. -+define <16 x i8> @f1(<16 x i8> %val1, <16 x i8> %val2) { -+; CHECK-LABEL: f1: -+; CHECK: vlr %v24, %v26 -+; CHECK: br %r14 -+ ret <16 x i8> %val2 -+} -+ -+; Test v8i16 moves. -+define <8 x i16> @f2(<8 x i16> %val1, <8 x i16> %val2) { -+; CHECK-LABEL: f2: -+; CHECK: vlr %v24, %v26 -+; CHECK: br %r14 -+ ret <8 x i16> %val2 -+} -+ -+; Test v4i32 moves. -+define <4 x i32> @f3(<4 x i32> %val1, <4 x i32> %val2) { -+; CHECK-LABEL: f3: -+; CHECK: vlr %v24, %v26 -+; CHECK: br %r14 -+ ret <4 x i32> %val2 -+} -+ -+; Test v2i64 moves. -+define <2 x i64> @f4(<2 x i64> %val1, <2 x i64> %val2) { -+; CHECK-LABEL: f4: -+; CHECK: vlr %v24, %v26 -+; CHECK: br %r14 -+ ret <2 x i64> %val2 -+} -+ -+; Test v4f32 moves. -+define <4 x float> @f5(<4 x float> %val1, <4 x float> %val2) { -+; CHECK-LABEL: f5: -+; CHECK: vlr %v24, %v26 -+; CHECK: br %r14 -+ ret <4 x float> %val2 -+} -+ -+; Test v2f64 moves. -+define <2 x double> @f6(<2 x double> %val1, <2 x double> %val2) { -+; CHECK-LABEL: f6: -+; CHECK: vlr %v24, %v26 -+; CHECK: br %r14 -+ ret <2 x double> %val2 -+} -+ -+; Test v2i8 moves. -+define <2 x i8> @f7(<2 x i8> %val1, <2 x i8> %val2) { -+; CHECK-LABEL: f7: -+; CHECK: vlr %v24, %v26 -+; CHECK: br %r14 -+ ret <2 x i8> %val2 -+} -+ -+; Test v4i8 moves. -+define <4 x i8> @f8(<4 x i8> %val1, <4 x i8> %val2) { -+; CHECK-LABEL: f8: -+; CHECK: vlr %v24, %v26 -+; CHECK: br %r14 -+ ret <4 x i8> %val2 -+} -+ -+; Test v8i8 moves. -+define <8 x i8> @f9(<8 x i8> %val1, <8 x i8> %val2) { -+; CHECK-LABEL: f9: -+; CHECK: vlr %v24, %v26 -+; CHECK: br %r14 -+ ret <8 x i8> %val2 -+} -+ -+; Test v2i16 moves. -+define <2 x i16> @f10(<2 x i16> %val1, <2 x i16> %val2) { -+; CHECK-LABEL: f10: -+; CHECK: vlr %v24, %v26 -+; CHECK: br %r14 -+ ret <2 x i16> %val2 -+} -+ -+; Test v4i16 moves. -+define <4 x i16> @f11(<4 x i16> %val1, <4 x i16> %val2) { -+; CHECK-LABEL: f11: -+; CHECK: vlr %v24, %v26 -+; CHECK: br %r14 -+ ret <4 x i16> %val2 -+} -+ -+; Test v2i32 moves. -+define <2 x i32> @f12(<2 x i32> %val1, <2 x i32> %val2) { -+; CHECK-LABEL: f12: -+; CHECK: vlr %v24, %v26 -+; CHECK: br %r14 -+ ret <2 x i32> %val2 -+} -+ -+; Test v2f32 moves. -+define <2 x float> @f13(<2 x float> %val1, <2 x float> %val2) { -+; CHECK-LABEL: f13: -+; CHECK: vlr %v24, %v26 -+; CHECK: br %r14 -+ ret <2 x float> %val2 -+} -Index: llvm-36/test/CodeGen/SystemZ/vec-move-02.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-move-02.ll -@@ -0,0 +1,174 @@ -+; Test vector loads. -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -+ -+; Test v16i8 loads. -+define <16 x i8> @f1(<16 x i8> *%ptr) { -+; CHECK-LABEL: f1: -+; CHECK: vl %v24, 0(%r2) -+; CHECK: br %r14 -+ %ret = load <16 x i8> *%ptr -+ ret <16 x i8> %ret -+} -+ -+; Test v8i16 loads. -+define <8 x i16> @f2(<8 x i16> *%ptr) { -+; CHECK-LABEL: f2: -+; CHECK: vl %v24, 0(%r2) -+; CHECK: br %r14 -+ %ret = load <8 x i16> *%ptr -+ ret <8 x i16> %ret -+} -+ -+; Test v4i32 loads. -+define <4 x i32> @f3(<4 x i32> *%ptr) { -+; CHECK-LABEL: f3: -+; CHECK: vl %v24, 0(%r2) -+; CHECK: br %r14 -+ %ret = load <4 x i32> *%ptr -+ ret <4 x i32> %ret -+} -+ -+; Test v2i64 loads. -+define <2 x i64> @f4(<2 x i64> *%ptr) { -+; CHECK-LABEL: f4: -+; CHECK: vl %v24, 0(%r2) -+; CHECK: br %r14 -+ %ret = load <2 x i64> *%ptr -+ ret <2 x i64> %ret -+} -+ -+; Test v4f32 loads. -+define <4 x float> @f5(<4 x float> *%ptr) { -+; CHECK-LABEL: f5: -+; CHECK: vl %v24, 0(%r2) -+; CHECK: br %r14 -+ %ret = load <4 x float> *%ptr -+ ret <4 x float> %ret -+} -+ -+; Test v2f64 loads. -+define <2 x double> @f6(<2 x double> *%ptr) { -+; CHECK-LABEL: f6: -+; CHECK: vl %v24, 0(%r2) -+; CHECK: br %r14 -+ %ret = load <2 x double> *%ptr -+ ret <2 x double> %ret -+} -+ -+; Test the highest aligned in-range offset. -+define <16 x i8> @f7(<16 x i8> *%base) { -+; CHECK-LABEL: f7: -+; CHECK: vl %v24, 4080(%r2) -+; CHECK: br %r14 -+ %ptr = getelementptr <16 x i8> *%base, i64 255 -+ %ret = load <16 x i8> *%ptr -+ ret <16 x i8> %ret -+} -+ -+; Test the highest unaligned in-range offset. -+define <16 x i8> @f8(i8 *%base) { -+; CHECK-LABEL: f8: -+; CHECK: vl %v24, 4095(%r2) -+; CHECK: br %r14 -+ %addr = getelementptr i8 *%base, i64 4095 -+ %ptr = bitcast i8 *%addr to <16 x i8> * -+ %ret = load <16 x i8> *%ptr, align 1 -+ ret <16 x i8> %ret -+} -+ -+; Test the next offset up, which requires separate address logic, -+define <16 x i8> @f9(<16 x i8> *%base) { -+; CHECK-LABEL: f9: -+; CHECK: aghi %r2, 4096 -+; CHECK: vl %v24, 0(%r2) -+; CHECK: br %r14 -+ %ptr = getelementptr <16 x i8> *%base, i64 256 -+ %ret = load <16 x i8> *%ptr -+ ret <16 x i8> %ret -+} -+ -+; Test negative offsets, which also require separate address logic, -+define <16 x i8> @f10(<16 x i8> *%base) { -+; CHECK-LABEL: f10: -+; CHECK: aghi %r2, -16 -+; CHECK: vl %v24, 0(%r2) -+; CHECK: br %r14 -+ %ptr = getelementptr <16 x i8> *%base, i64 -1 -+ %ret = load <16 x i8> *%ptr -+ ret <16 x i8> %ret -+} -+ -+; Check that indexes are allowed. -+define <16 x i8> @f11(i8 *%base, i64 %index) { -+; CHECK-LABEL: f11: -+; CHECK: vl %v24, 0(%r3,%r2) -+; CHECK: br %r14 -+ %addr = getelementptr i8 *%base, i64 %index -+ %ptr = bitcast i8 *%addr to <16 x i8> * -+ %ret = load <16 x i8> *%ptr, align 1 -+ ret <16 x i8> %ret -+} -+ -+; Test v2i8 loads. -+define <2 x i8> @f12(<2 x i8> *%ptr) { -+; CHECK-LABEL: f12: -+; CHECK: vlreph %v24, 0(%r2) -+; CHECK: br %r14 -+ %ret = load <2 x i8> *%ptr -+ ret <2 x i8> %ret -+} -+ -+; Test v4i8 loads. -+define <4 x i8> @f13(<4 x i8> *%ptr) { -+; CHECK-LABEL: f13: -+; CHECK: vlrepf %v24, 0(%r2) -+; CHECK: br %r14 -+ %ret = load <4 x i8> *%ptr -+ ret <4 x i8> %ret -+} -+ -+; Test v8i8 loads. -+define <8 x i8> @f14(<8 x i8> *%ptr) { -+; CHECK-LABEL: f14: -+; CHECK: vlrepg %v24, 0(%r2) -+; CHECK: br %r14 -+ %ret = load <8 x i8> *%ptr -+ ret <8 x i8> %ret -+} -+ -+; Test v2i16 loads. -+define <2 x i16> @f15(<2 x i16> *%ptr) { -+; CHECK-LABEL: f15: -+; CHECK: vlrepf %v24, 0(%r2) -+; CHECK: br %r14 -+ %ret = load <2 x i16> *%ptr -+ ret <2 x i16> %ret -+} -+ -+; Test v4i16 loads. -+define <4 x i16> @f16(<4 x i16> *%ptr) { -+; CHECK-LABEL: f16: -+; CHECK: vlrepg %v24, 0(%r2) -+; CHECK: br %r14 -+ %ret = load <4 x i16> *%ptr -+ ret <4 x i16> %ret -+} -+ -+; Test v2i32 loads. -+define <2 x i32> @f17(<2 x i32> *%ptr) { -+; CHECK-LABEL: f17: -+; CHECK: vlrepg %v24, 0(%r2) -+; CHECK: br %r14 -+ %ret = load <2 x i32> *%ptr -+ ret <2 x i32> %ret -+} -+ -+; Test v2f32 loads. -+define <2 x float> @f18(<2 x float> *%ptr) { -+; CHECK-LABEL: f18: -+; CHECK: vlrepg %v24, 0(%r2) -+; CHECK: br %r14 -+ %ret = load <2 x float> *%ptr -+ ret <2 x float> %ret -+} -Index: llvm-36/test/CodeGen/SystemZ/vec-move-03.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-move-03.ll -@@ -0,0 +1,174 @@ -+; Test vector stores. -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -+ -+; Test v16i8 stores. -+define void @f1(<16 x i8> %val, <16 x i8> *%ptr) { -+; CHECK-LABEL: f1: -+; CHECK: vst %v24, 0(%r2) -+; CHECK: br %r14 -+ store <16 x i8> %val, <16 x i8> *%ptr -+ ret void -+} -+ -+; Test v8i16 stores. -+define void @f2(<8 x i16> %val, <8 x i16> *%ptr) { -+; CHECK-LABEL: f2: -+; CHECK: vst %v24, 0(%r2) -+; CHECK: br %r14 -+ store <8 x i16> %val, <8 x i16> *%ptr -+ ret void -+} -+ -+; Test v4i32 stores. -+define void @f3(<4 x i32> %val, <4 x i32> *%ptr) { -+; CHECK-LABEL: f3: -+; CHECK: vst %v24, 0(%r2) -+; CHECK: br %r14 -+ store <4 x i32> %val, <4 x i32> *%ptr -+ ret void -+} -+ -+; Test v2i64 stores. -+define void @f4(<2 x i64> %val, <2 x i64> *%ptr) { -+; CHECK-LABEL: f4: -+; CHECK: vst %v24, 0(%r2) -+; CHECK: br %r14 -+ store <2 x i64> %val, <2 x i64> *%ptr -+ ret void -+} -+ -+; Test v4f32 stores. -+define void @f5(<4 x float> %val, <4 x float> *%ptr) { -+; CHECK-LABEL: f5: -+; CHECK: vst %v24, 0(%r2) -+; CHECK: br %r14 -+ store <4 x float> %val, <4 x float> *%ptr -+ ret void -+} -+ -+; Test v2f64 stores. -+define void @f6(<2 x double> %val, <2 x double> *%ptr) { -+; CHECK-LABEL: f6: -+; CHECK: vst %v24, 0(%r2) -+; CHECK: br %r14 -+ store <2 x double> %val, <2 x double> *%ptr -+ ret void -+} -+ -+; Test the highest aligned in-range offset. -+define void @f7(<16 x i8> %val, <16 x i8> *%base) { -+; CHECK-LABEL: f7: -+; CHECK: vst %v24, 4080(%r2) -+; CHECK: br %r14 -+ %ptr = getelementptr <16 x i8> *%base, i64 255 -+ store <16 x i8> %val, <16 x i8> *%ptr -+ ret void -+} -+ -+; Test the highest unaligned in-range offset. -+define void @f8(<16 x i8> %val, i8 *%base) { -+; CHECK-LABEL: f8: -+; CHECK: vst %v24, 4095(%r2) -+; CHECK: br %r14 -+ %addr = getelementptr i8 *%base, i64 4095 -+ %ptr = bitcast i8 *%addr to <16 x i8> * -+ store <16 x i8> %val, <16 x i8> *%ptr, align 1 -+ ret void -+} -+ -+; Test the next offset up, which requires separate address logic, -+define void @f9(<16 x i8> %val, <16 x i8> *%base) { -+; CHECK-LABEL: f9: -+; CHECK: aghi %r2, 4096 -+; CHECK: vst %v24, 0(%r2) -+; CHECK: br %r14 -+ %ptr = getelementptr <16 x i8> *%base, i64 256 -+ store <16 x i8> %val, <16 x i8> *%ptr -+ ret void -+} -+ -+; Test negative offsets, which also require separate address logic, -+define void @f10(<16 x i8> %val, <16 x i8> *%base) { -+; CHECK-LABEL: f10: -+; CHECK: aghi %r2, -16 -+; CHECK: vst %v24, 0(%r2) -+; CHECK: br %r14 -+ %ptr = getelementptr <16 x i8> *%base, i64 -1 -+ store <16 x i8> %val, <16 x i8> *%ptr -+ ret void -+} -+ -+; Check that indexes are allowed. -+define void @f11(<16 x i8> %val, i8 *%base, i64 %index) { -+; CHECK-LABEL: f11: -+; CHECK: vst %v24, 0(%r3,%r2) -+; CHECK: br %r14 -+ %addr = getelementptr i8 *%base, i64 %index -+ %ptr = bitcast i8 *%addr to <16 x i8> * -+ store <16 x i8> %val, <16 x i8> *%ptr, align 1 -+ ret void -+} -+ -+; Test v2i8 stores. -+define void @f12(<2 x i8> %val, <2 x i8> *%ptr) { -+; CHECK-LABEL: f12: -+; CHECK: vsteh %v24, 0(%r2), 0 -+; CHECK: br %r14 -+ store <2 x i8> %val, <2 x i8> *%ptr -+ ret void -+} -+ -+; Test v4i8 stores. -+define void @f13(<4 x i8> %val, <4 x i8> *%ptr) { -+; CHECK-LABEL: f13: -+; CHECK: vstef %v24, 0(%r2) -+; CHECK: br %r14 -+ store <4 x i8> %val, <4 x i8> *%ptr -+ ret void -+} -+ -+; Test v8i8 stores. -+define void @f14(<8 x i8> %val, <8 x i8> *%ptr) { -+; CHECK-LABEL: f14: -+; CHECK: vsteg %v24, 0(%r2) -+; CHECK: br %r14 -+ store <8 x i8> %val, <8 x i8> *%ptr -+ ret void -+} -+ -+; Test v2i16 stores. -+define void @f15(<2 x i16> %val, <2 x i16> *%ptr) { -+; CHECK-LABEL: f15: -+; CHECK: vstef %v24, 0(%r2), 0 -+; CHECK: br %r14 -+ store <2 x i16> %val, <2 x i16> *%ptr -+ ret void -+} -+ -+; Test v4i16 stores. -+define void @f16(<4 x i16> %val, <4 x i16> *%ptr) { -+; CHECK-LABEL: f16: -+; CHECK: vsteg %v24, 0(%r2) -+; CHECK: br %r14 -+ store <4 x i16> %val, <4 x i16> *%ptr -+ ret void -+} -+ -+; Test v2i32 stores. -+define void @f17(<2 x i32> %val, <2 x i32> *%ptr) { -+; CHECK-LABEL: f17: -+; CHECK: vsteg %v24, 0(%r2), 0 -+; CHECK: br %r14 -+ store <2 x i32> %val, <2 x i32> *%ptr -+ ret void -+} -+ -+; Test v2f32 stores. -+define void @f18(<2 x float> %val, <2 x float> *%ptr) { -+; CHECK-LABEL: f18: -+; CHECK: vsteg %v24, 0(%r2), 0 -+; CHECK: br %r14 -+ store <2 x float> %val, <2 x float> *%ptr -+ ret void -+} -Index: llvm-36/test/CodeGen/SystemZ/vec-move-04.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-move-04.ll -@@ -0,0 +1,179 @@ -+; Test vector insertion of register variables. -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -+ -+; Test v16i8 insertion into the first element. -+define <16 x i8> @f1(<16 x i8> %val, i8 %element) { -+; CHECK-LABEL: f1: -+; CHECK: vlvgb %v24, %r2, 0 -+; CHECK: br %r14 -+ %ret = insertelement <16 x i8> %val, i8 %element, i32 0 -+ ret <16 x i8> %ret -+} -+ -+; Test v16i8 insertion into the last element. -+define <16 x i8> @f2(<16 x i8> %val, i8 %element) { -+; CHECK-LABEL: f2: -+; CHECK: vlvgb %v24, %r2, 15 -+; CHECK: br %r14 -+ %ret = insertelement <16 x i8> %val, i8 %element, i32 15 -+ ret <16 x i8> %ret -+} -+ -+; Test v16i8 insertion into a variable element. -+define <16 x i8> @f3(<16 x i8> %val, i8 %element, i32 %index) { -+; CHECK-LABEL: f3: -+; CHECK: vlvgb %v24, %r2, 0(%r3) -+; CHECK: br %r14 -+ %ret = insertelement <16 x i8> %val, i8 %element, i32 %index -+ ret <16 x i8> %ret -+} -+ -+; Test v8i16 insertion into the first element. -+define <8 x i16> @f4(<8 x i16> %val, i16 %element) { -+; CHECK-LABEL: f4: -+; CHECK: vlvgh %v24, %r2, 0 -+; CHECK: br %r14 -+ %ret = insertelement <8 x i16> %val, i16 %element, i32 0 -+ ret <8 x i16> %ret -+} -+ -+; Test v8i16 insertion into the last element. -+define <8 x i16> @f5(<8 x i16> %val, i16 %element) { -+; CHECK-LABEL: f5: -+; CHECK: vlvgh %v24, %r2, 7 -+; CHECK: br %r14 -+ %ret = insertelement <8 x i16> %val, i16 %element, i32 7 -+ ret <8 x i16> %ret -+} -+ -+; Test v8i16 insertion into a variable element. -+define <8 x i16> @f6(<8 x i16> %val, i16 %element, i32 %index) { -+; CHECK-LABEL: f6: -+; CHECK: vlvgh %v24, %r2, 0(%r3) -+; CHECK: br %r14 -+ %ret = insertelement <8 x i16> %val, i16 %element, i32 %index -+ ret <8 x i16> %ret -+} -+ -+; Test v4i32 insertion into the first element. -+define <4 x i32> @f7(<4 x i32> %val, i32 %element) { -+; CHECK-LABEL: f7: -+; CHECK: vlvgf %v24, %r2, 0 -+; CHECK: br %r14 -+ %ret = insertelement <4 x i32> %val, i32 %element, i32 0 -+ ret <4 x i32> %ret -+} -+ -+; Test v4i32 insertion into the last element. -+define <4 x i32> @f8(<4 x i32> %val, i32 %element) { -+; CHECK-LABEL: f8: -+; CHECK: vlvgf %v24, %r2, 3 -+; CHECK: br %r14 -+ %ret = insertelement <4 x i32> %val, i32 %element, i32 3 -+ ret <4 x i32> %ret -+} -+ -+; Test v4i32 insertion into a variable element. -+define <4 x i32> @f9(<4 x i32> %val, i32 %element, i32 %index) { -+; CHECK-LABEL: f9: -+; CHECK: vlvgf %v24, %r2, 0(%r3) -+; CHECK: br %r14 -+ %ret = insertelement <4 x i32> %val, i32 %element, i32 %index -+ ret <4 x i32> %ret -+} -+ -+; Test v2i64 insertion into the first element. -+define <2 x i64> @f10(<2 x i64> %val, i64 %element) { -+; CHECK-LABEL: f10: -+; CHECK: vlvgg %v24, %r2, 0 -+; CHECK: br %r14 -+ %ret = insertelement <2 x i64> %val, i64 %element, i32 0 -+ ret <2 x i64> %ret -+} -+ -+; Test v2i64 insertion into the last element. -+define <2 x i64> @f11(<2 x i64> %val, i64 %element) { -+; CHECK-LABEL: f11: -+; CHECK: vlvgg %v24, %r2, 1 -+; CHECK: br %r14 -+ %ret = insertelement <2 x i64> %val, i64 %element, i32 1 -+ ret <2 x i64> %ret -+} -+ -+; Test v2i64 insertion into a variable element. -+define <2 x i64> @f12(<2 x i64> %val, i64 %element, i32 %index) { -+; CHECK-LABEL: f12: -+; CHECK: vlvgg %v24, %r2, 0(%r3) -+; CHECK: br %r14 -+ %ret = insertelement <2 x i64> %val, i64 %element, i32 %index -+ ret <2 x i64> %ret -+} -+ -+; Test v4f32 insertion into the first element. -+define <4 x float> @f13(<4 x float> %val, float %element) { -+; CHECK-LABEL: f13: -+; CHECK: vlgvf [[REG:%r[0-5]]], %v0, 0 -+; CHECK: vlvgf %v24, [[REG]], 0 -+; CHECK: br %r14 -+ %ret = insertelement <4 x float> %val, float %element, i32 0 -+ ret <4 x float> %ret -+} -+ -+; Test v4f32 insertion into the last element. -+define <4 x float> @f14(<4 x float> %val, float %element) { -+; CHECK-LABEL: f14: -+; CHECK: vlgvf [[REG:%r[0-5]]], %v0, 0 -+; CHECK: vlvgf %v24, [[REG]], 3 -+; CHECK: br %r14 -+ %ret = insertelement <4 x float> %val, float %element, i32 3 -+ ret <4 x float> %ret -+} -+ -+; Test v4f32 insertion into a variable element. -+define <4 x float> @f15(<4 x float> %val, float %element, i32 %index) { -+; CHECK-LABEL: f15: -+; CHECK: vlgvf [[REG:%r[0-5]]], %v0, 0 -+; CHECK: vlvgf %v24, [[REG]], 0(%r2) -+; CHECK: br %r14 -+ %ret = insertelement <4 x float> %val, float %element, i32 %index -+ ret <4 x float> %ret -+} -+ -+; Test v2f64 insertion into the first element. -+define <2 x double> @f16(<2 x double> %val, double %element) { -+; CHECK-LABEL: f16: -+; CHECK: vpdi %v24, %v0, %v24, 1 -+; CHECK: br %r14 -+ %ret = insertelement <2 x double> %val, double %element, i32 0 -+ ret <2 x double> %ret -+} -+ -+; Test v2f64 insertion into the last element. -+define <2 x double> @f17(<2 x double> %val, double %element) { -+; CHECK-LABEL: f17: -+; CHECK: vpdi %v24, %v24, %v0, 0 -+; CHECK: br %r14 -+ %ret = insertelement <2 x double> %val, double %element, i32 1 -+ ret <2 x double> %ret -+} -+ -+; Test v2f64 insertion into a variable element. -+define <2 x double> @f18(<2 x double> %val, double %element, i32 %index) { -+; CHECK-LABEL: f18: -+; CHECK: lgdr [[REG:%r[0-5]]], %f0 -+; CHECK: vlvgg %v24, [[REG]], 0(%r2) -+; CHECK: br %r14 -+ %ret = insertelement <2 x double> %val, double %element, i32 %index -+ ret <2 x double> %ret -+} -+ -+; Test v16i8 insertion into a variable element plus one. -+define <16 x i8> @f19(<16 x i8> %val, i8 %element, i32 %index) { -+; CHECK-LABEL: f19: -+; CHECK: vlvgb %v24, %r2, 1(%r3) -+; CHECK: br %r14 -+ %add = add i32 %index, 1 -+ %ret = insertelement <16 x i8> %val, i8 %element, i32 %add -+ ret <16 x i8> %ret -+} -Index: llvm-36/test/CodeGen/SystemZ/vec-move-05.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-move-05.ll -@@ -0,0 +1,249 @@ -+; Test vector extraction. -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -+ -+; Test v16i8 extraction of the first element. -+define i8 @f1(<16 x i8> %val) { -+; CHECK-LABEL: f1: -+; CHECK: vlgvb %r2, %v24, 0 -+; CHECK: br %r14 -+ %ret = extractelement <16 x i8> %val, i32 0 -+ ret i8 %ret -+} -+ -+; Test v16i8 extraction of the last element. -+define i8 @f2(<16 x i8> %val) { -+; CHECK-LABEL: f2: -+; CHECK: vlgvb %r2, %v24, 15 -+; CHECK: br %r14 -+ %ret = extractelement <16 x i8> %val, i32 15 -+ ret i8 %ret -+} -+ -+; Test v16i8 extractions of an absurd element number. This must compile -+; but we don't care what it does. -+define i8 @f3(<16 x i8> %val) { -+; CHECK-LABEL: f3: -+; CHECK-NOT: vlgvb %r2, %v24, 100000 -+; CHECK: br %r14 -+ %ret = extractelement <16 x i8> %val, i32 100000 -+ ret i8 %ret -+} -+ -+; Test v16i8 extraction of a variable element. -+define i8 @f4(<16 x i8> %val, i32 %index) { -+; CHECK-LABEL: f4: -+; CHECK: vlgvb %r2, %v24, 0(%r2) -+; CHECK: br %r14 -+ %ret = extractelement <16 x i8> %val, i32 %index -+ ret i8 %ret -+} -+ -+; Test v8i16 extraction of the first element. -+define i16 @f5(<8 x i16> %val) { -+; CHECK-LABEL: f5: -+; CHECK: vlgvh %r2, %v24, 0 -+; CHECK: br %r14 -+ %ret = extractelement <8 x i16> %val, i32 0 -+ ret i16 %ret -+} -+ -+; Test v8i16 extraction of the last element. -+define i16 @f6(<8 x i16> %val) { -+; CHECK-LABEL: f6: -+; CHECK: vlgvh %r2, %v24, 7 -+; CHECK: br %r14 -+ %ret = extractelement <8 x i16> %val, i32 7 -+ ret i16 %ret -+} -+ -+; Test v8i16 extractions of an absurd element number. This must compile -+; but we don't care what it does. -+define i16 @f7(<8 x i16> %val) { -+; CHECK-LABEL: f7: -+; CHECK-NOT: vlgvh %r2, %v24, 100000 -+; CHECK: br %r14 -+ %ret = extractelement <8 x i16> %val, i32 100000 -+ ret i16 %ret -+} -+ -+; Test v8i16 extraction of a variable element. -+define i16 @f8(<8 x i16> %val, i32 %index) { -+; CHECK-LABEL: f8: -+; CHECK: vlgvh %r2, %v24, 0(%r2) -+; CHECK: br %r14 -+ %ret = extractelement <8 x i16> %val, i32 %index -+ ret i16 %ret -+} -+ -+; Test v4i32 extraction of the first element. -+define i32 @f9(<4 x i32> %val) { -+; CHECK-LABEL: f9: -+; CHECK: vlgvf %r2, %v24, 0 -+; CHECK: br %r14 -+ %ret = extractelement <4 x i32> %val, i32 0 -+ ret i32 %ret -+} -+ -+; Test v4i32 extraction of the last element. -+define i32 @f10(<4 x i32> %val) { -+; CHECK-LABEL: f10: -+; CHECK: vlgvf %r2, %v24, 3 -+; CHECK: br %r14 -+ %ret = extractelement <4 x i32> %val, i32 3 -+ ret i32 %ret -+} -+ -+; Test v4i32 extractions of an absurd element number. This must compile -+; but we don't care what it does. -+define i32 @f11(<4 x i32> %val) { -+; CHECK-LABEL: f11: -+; CHECK-NOT: vlgvf %r2, %v24, 100000 -+; CHECK: br %r14 -+ %ret = extractelement <4 x i32> %val, i32 100000 -+ ret i32 %ret -+} -+ -+; Test v4i32 extraction of a variable element. -+define i32 @f12(<4 x i32> %val, i32 %index) { -+; CHECK-LABEL: f12: -+; CHECK: vlgvf %r2, %v24, 0(%r2) -+; CHECK: br %r14 -+ %ret = extractelement <4 x i32> %val, i32 %index -+ ret i32 %ret -+} -+ -+; Test v2i64 extraction of the first element. -+define i64 @f13(<2 x i64> %val) { -+; CHECK-LABEL: f13: -+; CHECK: vlgvg %r2, %v24, 0 -+; CHECK: br %r14 -+ %ret = extractelement <2 x i64> %val, i32 0 -+ ret i64 %ret -+} -+ -+; Test v2i64 extraction of the last element. -+define i64 @f14(<2 x i64> %val) { -+; CHECK-LABEL: f14: -+; CHECK: vlgvg %r2, %v24, 1 -+; CHECK: br %r14 -+ %ret = extractelement <2 x i64> %val, i32 1 -+ ret i64 %ret -+} -+ -+; Test v2i64 extractions of an absurd element number. This must compile -+; but we don't care what it does. -+define i64 @f15(<2 x i64> %val) { -+; CHECK-LABEL: f15: -+; CHECK-NOT: vlgvg %r2, %v24, 100000 -+; CHECK: br %r14 -+ %ret = extractelement <2 x i64> %val, i32 100000 -+ ret i64 %ret -+} -+ -+; Test v2i64 extraction of a variable element. -+define i64 @f16(<2 x i64> %val, i32 %index) { -+; CHECK-LABEL: f16: -+; CHECK: vlgvg %r2, %v24, 0(%r2) -+; CHECK: br %r14 -+ %ret = extractelement <2 x i64> %val, i32 %index -+ ret i64 %ret -+} -+ -+; Test v4f32 extraction of element 0. -+define float @f17(<4 x float> %val) { -+; CHECK-LABEL: f17: -+; CHECK: vlr %v0, %v24 -+; CHECK: br %r14 -+ %ret = extractelement <4 x float> %val, i32 0 -+ ret float %ret -+} -+ -+; Test v4f32 extraction of element 1. -+define float @f18(<4 x float> %val) { -+; CHECK-LABEL: f18: -+; CHECK: vrepf %v0, %v24, 1 -+; CHECK: br %r14 -+ %ret = extractelement <4 x float> %val, i32 1 -+ ret float %ret -+} -+ -+; Test v4f32 extraction of element 2. -+define float @f19(<4 x float> %val) { -+; CHECK-LABEL: f19: -+; CHECK: vrepf %v0, %v24, 2 -+; CHECK: br %r14 -+ %ret = extractelement <4 x float> %val, i32 2 -+ ret float %ret -+} -+ -+; Test v4f32 extraction of element 3. -+define float @f20(<4 x float> %val) { -+; CHECK-LABEL: f20: -+; CHECK: vrepf %v0, %v24, 3 -+; CHECK: br %r14 -+ %ret = extractelement <4 x float> %val, i32 3 -+ ret float %ret -+} -+ -+; Test v4f32 extractions of an absurd element number. This must compile -+; but we don't care what it does. -+define float @f21(<4 x float> %val) { -+ %ret = extractelement <4 x float> %val, i32 100000 -+ ret float %ret -+} -+ -+; Test v4f32 extraction of a variable element. -+define float @f22(<4 x float> %val, i32 %index) { -+; CHECK-LABEL: f22: -+; CHECK: vlgvf [[REG:%r[0-5]]], %v24, 0(%r2) -+; CHECK: vlvgf %v0, [[REG]], 0 -+; CHECK: br %r14 -+ %ret = extractelement <4 x float> %val, i32 %index -+ ret float %ret -+} -+ -+; Test v2f64 extraction of the first element. -+define double @f23(<2 x double> %val) { -+; CHECK-LABEL: f23: -+; CHECK: vlr %v0, %v24 -+; CHECK: br %r14 -+ %ret = extractelement <2 x double> %val, i32 0 -+ ret double %ret -+} -+ -+; Test v2f64 extraction of the last element. -+define double @f24(<2 x double> %val) { -+; CHECK-LABEL: f24: -+; CHECK: vrepg %v0, %v24, 1 -+; CHECK: br %r14 -+ %ret = extractelement <2 x double> %val, i32 1 -+ ret double %ret -+} -+ -+; Test v2f64 extractions of an absurd element number. This must compile -+; but we don't care what it does. -+define double @f25(<2 x double> %val) { -+ %ret = extractelement <2 x double> %val, i32 100000 -+ ret double %ret -+} -+ -+; Test v2f64 extraction of a variable element. -+define double @f26(<2 x double> %val, i32 %index) { -+; CHECK-LABEL: f26: -+; CHECK: vlgvg [[REG:%r[0-5]]], %v24, 0(%r2) -+; CHECK: ldgr %f0, [[REG]] -+; CHECK: br %r14 -+ %ret = extractelement <2 x double> %val, i32 %index -+ ret double %ret -+} -+ -+; Test v16i8 extraction of a variable element with an offset. -+define i8 @f27(<16 x i8> %val, i32 %index) { -+; CHECK-LABEL: f27: -+; CHECK: vlgvb %r2, %v24, 1(%r2) -+; CHECK: br %r14 -+ %add = add i32 %index, 1 -+ %ret = extractelement <16 x i8> %val, i32 %add -+ ret i8 %ret -+} -Index: llvm-36/test/CodeGen/SystemZ/vec-move-06.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-move-06.ll -@@ -0,0 +1,13 @@ -+; Test vector builds using VLVGP. -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -+ -+; Test the basic v2i64 usage. -+define <2 x i64> @f1(i64 %a, i64 %b) { -+; CHECK-LABEL: f1: -+; CHECK: vlvgp %v24, %r2, %r3 -+; CHECK: br %r14 -+ %veca = insertelement <2 x i64> undef, i64 %a, i32 0 -+ %vecb = insertelement <2 x i64> %veca, i64 %b, i32 1 -+ ret <2 x i64> %vecb -+} -Index: llvm-36/test/CodeGen/SystemZ/vec-move-07.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-move-07.ll -@@ -0,0 +1,57 @@ -+; Test scalar_to_vector expansion. -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -+ -+; Test v16i8. -+define <16 x i8> @f1(i8 %val) { -+; CHECK-LABEL: f1: -+; CHECK: vlvgb %v24, %r2, 0 -+; CHECK: br %r14 -+ %ret = insertelement <16 x i8> undef, i8 %val, i32 0 -+ ret <16 x i8> %ret -+} -+ -+; Test v8i16. -+define <8 x i16> @f2(i16 %val) { -+; CHECK-LABEL: f2: -+; CHECK: vlvgh %v24, %r2, 0 -+; CHECK: br %r14 -+ %ret = insertelement <8 x i16> undef, i16 %val, i32 0 -+ ret <8 x i16> %ret -+} -+ -+; Test v4i32. -+define <4 x i32> @f3(i32 %val) { -+; CHECK-LABEL: f3: -+; CHECK: vlvgf %v24, %r2, 0 -+; CHECK: br %r14 -+ %ret = insertelement <4 x i32> undef, i32 %val, i32 0 -+ ret <4 x i32> %ret -+} -+ -+; Test v2i64. Here we load %val into both halves. -+define <2 x i64> @f4(i64 %val) { -+; CHECK-LABEL: f4: -+; CHECK: vlvgp %v24, %r2, %r2 -+; CHECK: br %r14 -+ %ret = insertelement <2 x i64> undef, i64 %val, i32 0 -+ ret <2 x i64> %ret -+} -+ -+; Test v4f32, which is just a move. -+define <4 x float> @f5(float %val) { -+; CHECK-LABEL: f5: -+; CHECK: vlr %v24, %v0 -+; CHECK: br %r14 -+ %ret = insertelement <4 x float> undef, float %val, i32 0 -+ ret <4 x float> %ret -+} -+ -+; Likewise v2f64. -+define <2 x double> @f6(double %val) { -+; CHECK-LABEL: f6: -+; CHECK: vlr %v24, %v0 -+; CHECK: br %r14 -+ %ret = insertelement <2 x double> undef, double %val, i32 0 -+ ret <2 x double> %ret -+} -Index: llvm-36/test/CodeGen/SystemZ/vec-move-08.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-move-08.ll -@@ -0,0 +1,444 @@ -+; Test vector insertion of memory values. -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -+ -+; Test v16i8 insertion into the first element. -+define <16 x i8> @f1(<16 x i8> %val, i8 *%ptr) { -+; CHECK-LABEL: f1: -+; CHECK: vleb %v24, 0(%r2), 0 -+; CHECK: br %r14 -+ %element = load i8 *%ptr -+ %ret = insertelement <16 x i8> %val, i8 %element, i32 0 -+ ret <16 x i8> %ret -+} -+ -+; Test v16i8 insertion into the last element. -+define <16 x i8> @f2(<16 x i8> %val, i8 *%ptr) { -+; CHECK-LABEL: f2: -+; CHECK: vleb %v24, 0(%r2), 15 -+; CHECK: br %r14 -+ %element = load i8 *%ptr -+ %ret = insertelement <16 x i8> %val, i8 %element, i32 15 -+ ret <16 x i8> %ret -+} -+ -+; Test v16i8 insertion with the highest in-range offset. -+define <16 x i8> @f3(<16 x i8> %val, i8 *%base) { -+; CHECK-LABEL: f3: -+; CHECK: vleb %v24, 4095(%r2), 10 -+; CHECK: br %r14 -+ %ptr = getelementptr i8 *%base, i32 4095 -+ %element = load i8 *%ptr -+ %ret = insertelement <16 x i8> %val, i8 %element, i32 10 -+ ret <16 x i8> %ret -+} -+ -+; Test v16i8 insertion with the first ouf-of-range offset. -+define <16 x i8> @f4(<16 x i8> %val, i8 *%base) { -+; CHECK-LABEL: f4: -+; CHECK: aghi %r2, 4096 -+; CHECK: vleb %v24, 0(%r2), 5 -+; CHECK: br %r14 -+ %ptr = getelementptr i8 *%base, i32 4096 -+ %element = load i8 *%ptr -+ %ret = insertelement <16 x i8> %val, i8 %element, i32 5 -+ ret <16 x i8> %ret -+} -+ -+; Test v16i8 insertion into a variable element. -+define <16 x i8> @f5(<16 x i8> %val, i8 *%ptr, i32 %index) { -+; CHECK-LABEL: f5: -+; CHECK-NOT: vleb -+; CHECK: br %r14 -+ %element = load i8 *%ptr -+ %ret = insertelement <16 x i8> %val, i8 %element, i32 %index -+ ret <16 x i8> %ret -+} -+ -+; Test v8i16 insertion into the first element. -+define <8 x i16> @f6(<8 x i16> %val, i16 *%ptr) { -+; CHECK-LABEL: f6: -+; CHECK: vleh %v24, 0(%r2), 0 -+; CHECK: br %r14 -+ %element = load i16 *%ptr -+ %ret = insertelement <8 x i16> %val, i16 %element, i32 0 -+ ret <8 x i16> %ret -+} -+ -+; Test v8i16 insertion into the last element. -+define <8 x i16> @f7(<8 x i16> %val, i16 *%ptr) { -+; CHECK-LABEL: f7: -+; CHECK: vleh %v24, 0(%r2), 7 -+; CHECK: br %r14 -+ %element = load i16 *%ptr -+ %ret = insertelement <8 x i16> %val, i16 %element, i32 7 -+ ret <8 x i16> %ret -+} -+ -+; Test v8i16 insertion with the highest in-range offset. -+define <8 x i16> @f8(<8 x i16> %val, i16 *%base) { -+; CHECK-LABEL: f8: -+; CHECK: vleh %v24, 4094(%r2), 5 -+; CHECK: br %r14 -+ %ptr = getelementptr i16 *%base, i32 2047 -+ %element = load i16 *%ptr -+ %ret = insertelement <8 x i16> %val, i16 %element, i32 5 -+ ret <8 x i16> %ret -+} -+ -+; Test v8i16 insertion with the first ouf-of-range offset. -+define <8 x i16> @f9(<8 x i16> %val, i16 *%base) { -+; CHECK-LABEL: f9: -+; CHECK: aghi %r2, 4096 -+; CHECK: vleh %v24, 0(%r2), 1 -+; CHECK: br %r14 -+ %ptr = getelementptr i16 *%base, i32 2048 -+ %element = load i16 *%ptr -+ %ret = insertelement <8 x i16> %val, i16 %element, i32 1 -+ ret <8 x i16> %ret -+} -+ -+; Test v8i16 insertion into a variable element. -+define <8 x i16> @f10(<8 x i16> %val, i16 *%ptr, i32 %index) { -+; CHECK-LABEL: f10: -+; CHECK-NOT: vleh -+; CHECK: br %r14 -+ %element = load i16 *%ptr -+ %ret = insertelement <8 x i16> %val, i16 %element, i32 %index -+ ret <8 x i16> %ret -+} -+ -+; Test v4i32 insertion into the first element. -+define <4 x i32> @f11(<4 x i32> %val, i32 *%ptr) { -+; CHECK-LABEL: f11: -+; CHECK: vlef %v24, 0(%r2), 0 -+; CHECK: br %r14 -+ %element = load i32 *%ptr -+ %ret = insertelement <4 x i32> %val, i32 %element, i32 0 -+ ret <4 x i32> %ret -+} -+ -+; Test v4i32 insertion into the last element. -+define <4 x i32> @f12(<4 x i32> %val, i32 *%ptr) { -+; CHECK-LABEL: f12: -+; CHECK: vlef %v24, 0(%r2), 3 -+; CHECK: br %r14 -+ %element = load i32 *%ptr -+ %ret = insertelement <4 x i32> %val, i32 %element, i32 3 -+ ret <4 x i32> %ret -+} -+ -+; Test v4i32 insertion with the highest in-range offset. -+define <4 x i32> @f13(<4 x i32> %val, i32 *%base) { -+; CHECK-LABEL: f13: -+; CHECK: vlef %v24, 4092(%r2), 2 -+; CHECK: br %r14 -+ %ptr = getelementptr i32 *%base, i32 1023 -+ %element = load i32 *%ptr -+ %ret = insertelement <4 x i32> %val, i32 %element, i32 2 -+ ret <4 x i32> %ret -+} -+ -+; Test v4i32 insertion with the first ouf-of-range offset. -+define <4 x i32> @f14(<4 x i32> %val, i32 *%base) { -+; CHECK-LABEL: f14: -+; CHECK: aghi %r2, 4096 -+; CHECK: vlef %v24, 0(%r2), 1 -+; CHECK: br %r14 -+ %ptr = getelementptr i32 *%base, i32 1024 -+ %element = load i32 *%ptr -+ %ret = insertelement <4 x i32> %val, i32 %element, i32 1 -+ ret <4 x i32> %ret -+} -+ -+; Test v4i32 insertion into a variable element. -+define <4 x i32> @f15(<4 x i32> %val, i32 *%ptr, i32 %index) { -+; CHECK-LABEL: f15: -+; CHECK-NOT: vlef -+; CHECK: br %r14 -+ %element = load i32 *%ptr -+ %ret = insertelement <4 x i32> %val, i32 %element, i32 %index -+ ret <4 x i32> %ret -+} -+ -+; Test v2i64 insertion into the first element. -+define <2 x i64> @f16(<2 x i64> %val, i64 *%ptr) { -+; CHECK-LABEL: f16: -+; CHECK: vleg %v24, 0(%r2), 0 -+; CHECK: br %r14 -+ %element = load i64 *%ptr -+ %ret = insertelement <2 x i64> %val, i64 %element, i32 0 -+ ret <2 x i64> %ret -+} -+ -+; Test v2i64 insertion into the last element. -+define <2 x i64> @f17(<2 x i64> %val, i64 *%ptr) { -+; CHECK-LABEL: f17: -+; CHECK: vleg %v24, 0(%r2), 1 -+; CHECK: br %r14 -+ %element = load i64 *%ptr -+ %ret = insertelement <2 x i64> %val, i64 %element, i32 1 -+ ret <2 x i64> %ret -+} -+ -+; Test v2i64 insertion with the highest in-range offset. -+define <2 x i64> @f18(<2 x i64> %val, i64 *%base) { -+; CHECK-LABEL: f18: -+; CHECK: vleg %v24, 4088(%r2), 1 -+; CHECK: br %r14 -+ %ptr = getelementptr i64 *%base, i32 511 -+ %element = load i64 *%ptr -+ %ret = insertelement <2 x i64> %val, i64 %element, i32 1 -+ ret <2 x i64> %ret -+} -+ -+; Test v2i64 insertion with the first ouf-of-range offset. -+define <2 x i64> @f19(<2 x i64> %val, i64 *%base) { -+; CHECK-LABEL: f19: -+; CHECK: aghi %r2, 4096 -+; CHECK: vleg %v24, 0(%r2), 0 -+; CHECK: br %r14 -+ %ptr = getelementptr i64 *%base, i32 512 -+ %element = load i64 *%ptr -+ %ret = insertelement <2 x i64> %val, i64 %element, i32 0 -+ ret <2 x i64> %ret -+} -+ -+; Test v2i64 insertion into a variable element. -+define <2 x i64> @f20(<2 x i64> %val, i64 *%ptr, i32 %index) { -+; CHECK-LABEL: f20: -+; CHECK-NOT: vleg -+; CHECK: br %r14 -+ %element = load i64 *%ptr -+ %ret = insertelement <2 x i64> %val, i64 %element, i32 %index -+ ret <2 x i64> %ret -+} -+ -+; Test v4f32 insertion into the first element. -+define <4 x float> @f21(<4 x float> %val, float *%ptr) { -+; CHECK-LABEL: f21: -+; CHECK: vlef %v24, 0(%r2), 0 -+; CHECK: br %r14 -+ %element = load float *%ptr -+ %ret = insertelement <4 x float> %val, float %element, i32 0 -+ ret <4 x float> %ret -+} -+ -+; Test v4f32 insertion into the last element. -+define <4 x float> @f22(<4 x float> %val, float *%ptr) { -+; CHECK-LABEL: f22: -+; CHECK: vlef %v24, 0(%r2), 3 -+; CHECK: br %r14 -+ %element = load float *%ptr -+ %ret = insertelement <4 x float> %val, float %element, i32 3 -+ ret <4 x float> %ret -+} -+ -+; Test v4f32 insertion with the highest in-range offset. -+define <4 x float> @f23(<4 x float> %val, float *%base) { -+; CHECK-LABEL: f23: -+; CHECK: vlef %v24, 4092(%r2), 2 -+; CHECK: br %r14 -+ %ptr = getelementptr float *%base, i32 1023 -+ %element = load float *%ptr -+ %ret = insertelement <4 x float> %val, float %element, i32 2 -+ ret <4 x float> %ret -+} -+ -+; Test v4f32 insertion with the first ouf-of-range offset. -+define <4 x float> @f24(<4 x float> %val, float *%base) { -+; CHECK-LABEL: f24: -+; CHECK: aghi %r2, 4096 -+; CHECK: vlef %v24, 0(%r2), 1 -+; CHECK: br %r14 -+ %ptr = getelementptr float *%base, i32 1024 -+ %element = load float *%ptr -+ %ret = insertelement <4 x float> %val, float %element, i32 1 -+ ret <4 x float> %ret -+} -+ -+; Test v4f32 insertion into a variable element. -+define <4 x float> @f25(<4 x float> %val, float *%ptr, i32 %index) { -+; CHECK-LABEL: f25: -+; CHECK-NOT: vlef -+; CHECK: br %r14 -+ %element = load float *%ptr -+ %ret = insertelement <4 x float> %val, float %element, i32 %index -+ ret <4 x float> %ret -+} -+ -+; Test v2f64 insertion into the first element. -+define <2 x double> @f26(<2 x double> %val, double *%ptr) { -+; CHECK-LABEL: f26: -+; CHECK: vleg %v24, 0(%r2), 0 -+; CHECK: br %r14 -+ %element = load double *%ptr -+ %ret = insertelement <2 x double> %val, double %element, i32 0 -+ ret <2 x double> %ret -+} -+ -+; Test v2f64 insertion into the last element. -+define <2 x double> @f27(<2 x double> %val, double *%ptr) { -+; CHECK-LABEL: f27: -+; CHECK: vleg %v24, 0(%r2), 1 -+; CHECK: br %r14 -+ %element = load double *%ptr -+ %ret = insertelement <2 x double> %val, double %element, i32 1 -+ ret <2 x double> %ret -+} -+ -+; Test v2f64 insertion with the highest in-range offset. -+define <2 x double> @f28(<2 x double> %val, double *%base) { -+; CHECK-LABEL: f28: -+; CHECK: vleg %v24, 4088(%r2), 1 -+; CHECK: br %r14 -+ %ptr = getelementptr double *%base, i32 511 -+ %element = load double *%ptr -+ %ret = insertelement <2 x double> %val, double %element, i32 1 -+ ret <2 x double> %ret -+} -+ -+; Test v2f64 insertion with the first ouf-of-range offset. -+define <2 x double> @f29(<2 x double> %val, double *%base) { -+; CHECK-LABEL: f29: -+; CHECK: aghi %r2, 4096 -+; CHECK: vleg %v24, 0(%r2), 0 -+; CHECK: br %r14 -+ %ptr = getelementptr double *%base, i32 512 -+ %element = load double *%ptr -+ %ret = insertelement <2 x double> %val, double %element, i32 0 -+ ret <2 x double> %ret -+} -+ -+; Test v2f64 insertion into a variable element. -+define <2 x double> @f30(<2 x double> %val, double *%ptr, i32 %index) { -+; CHECK-LABEL: f30: -+; CHECK-NOT: vleg -+; CHECK: br %r14 -+ %element = load double *%ptr -+ %ret = insertelement <2 x double> %val, double %element, i32 %index -+ ret <2 x double> %ret -+} -+ -+; Test a v4i32 gather of the first element. -+define <4 x i32> @f31(<4 x i32> %val, <4 x i32> %index, i64 %base) { -+; CHECK-LABEL: f31: -+; CHECK: vgef %v24, 0(%v26,%r2), 0 -+; CHECK: br %r14 -+ %elem = extractelement <4 x i32> %index, i32 0 -+ %ext = zext i32 %elem to i64 -+ %add = add i64 %base, %ext -+ %ptr = inttoptr i64 %add to i32 * -+ %element = load i32 *%ptr -+ %ret = insertelement <4 x i32> %val, i32 %element, i32 0 -+ ret <4 x i32> %ret -+} -+ -+; Test a v4i32 gather of the last element. -+define <4 x i32> @f32(<4 x i32> %val, <4 x i32> %index, i64 %base) { -+; CHECK-LABEL: f32: -+; CHECK: vgef %v24, 0(%v26,%r2), 3 -+; CHECK: br %r14 -+ %elem = extractelement <4 x i32> %index, i32 3 -+ %ext = zext i32 %elem to i64 -+ %add = add i64 %base, %ext -+ %ptr = inttoptr i64 %add to i32 * -+ %element = load i32 *%ptr -+ %ret = insertelement <4 x i32> %val, i32 %element, i32 3 -+ ret <4 x i32> %ret -+} -+ -+; Test a v4i32 gather with the highest in-range offset. -+define <4 x i32> @f33(<4 x i32> %val, <4 x i32> %index, i64 %base) { -+; CHECK-LABEL: f33: -+; CHECK: vgef %v24, 4095(%v26,%r2), 1 -+; CHECK: br %r14 -+ %elem = extractelement <4 x i32> %index, i32 1 -+ %ext = zext i32 %elem to i64 -+ %add1 = add i64 %base, %ext -+ %add2 = add i64 %add1, 4095 -+ %ptr = inttoptr i64 %add2 to i32 * -+ %element = load i32 *%ptr -+ %ret = insertelement <4 x i32> %val, i32 %element, i32 1 -+ ret <4 x i32> %ret -+} -+ -+; Test a v2i64 gather of the first element. -+define <2 x i64> @f34(<2 x i64> %val, <2 x i64> %index, i64 %base) { -+; CHECK-LABEL: f34: -+; CHECK: vgeg %v24, 0(%v26,%r2), 0 -+; CHECK: br %r14 -+ %elem = extractelement <2 x i64> %index, i32 0 -+ %add = add i64 %base, %elem -+ %ptr = inttoptr i64 %add to i64 * -+ %element = load i64 *%ptr -+ %ret = insertelement <2 x i64> %val, i64 %element, i32 0 -+ ret <2 x i64> %ret -+} -+ -+; Test a v2i64 gather of the last element. -+define <2 x i64> @f35(<2 x i64> %val, <2 x i64> %index, i64 %base) { -+; CHECK-LABEL: f35: -+; CHECK: vgeg %v24, 0(%v26,%r2), 1 -+; CHECK: br %r14 -+ %elem = extractelement <2 x i64> %index, i32 1 -+ %add = add i64 %base, %elem -+ %ptr = inttoptr i64 %add to i64 * -+ %element = load i64 *%ptr -+ %ret = insertelement <2 x i64> %val, i64 %element, i32 1 -+ ret <2 x i64> %ret -+} -+ -+; Test a v4f32 gather of the first element. -+define <4 x float> @f36(<4 x float> %val, <4 x i32> %index, i64 %base) { -+; CHECK-LABEL: f36: -+; CHECK: vgef %v24, 0(%v26,%r2), 0 -+; CHECK: br %r14 -+ %elem = extractelement <4 x i32> %index, i32 0 -+ %ext = zext i32 %elem to i64 -+ %add = add i64 %base, %ext -+ %ptr = inttoptr i64 %add to float * -+ %element = load float *%ptr -+ %ret = insertelement <4 x float> %val, float %element, i32 0 -+ ret <4 x float> %ret -+} -+ -+; Test a v4f32 gather of the last element. -+define <4 x float> @f37(<4 x float> %val, <4 x i32> %index, i64 %base) { -+; CHECK-LABEL: f37: -+; CHECK: vgef %v24, 0(%v26,%r2), 3 -+; CHECK: br %r14 -+ %elem = extractelement <4 x i32> %index, i32 3 -+ %ext = zext i32 %elem to i64 -+ %add = add i64 %base, %ext -+ %ptr = inttoptr i64 %add to float * -+ %element = load float *%ptr -+ %ret = insertelement <4 x float> %val, float %element, i32 3 -+ ret <4 x float> %ret -+} -+ -+; Test a v2f64 gather of the first element. -+define <2 x double> @f38(<2 x double> %val, <2 x i64> %index, i64 %base) { -+; CHECK-LABEL: f38: -+; CHECK: vgeg %v24, 0(%v26,%r2), 0 -+; CHECK: br %r14 -+ %elem = extractelement <2 x i64> %index, i32 0 -+ %add = add i64 %base, %elem -+ %ptr = inttoptr i64 %add to double * -+ %element = load double *%ptr -+ %ret = insertelement <2 x double> %val, double %element, i32 0 -+ ret <2 x double> %ret -+} -+ -+; Test a v2f64 gather of the last element. -+define <2 x double> @f39(<2 x double> %val, <2 x i64> %index, i64 %base) { -+; CHECK-LABEL: f39: -+; CHECK: vgeg %v24, 0(%v26,%r2), 1 -+; CHECK: br %r14 -+ %elem = extractelement <2 x i64> %index, i32 1 -+ %add = add i64 %base, %elem -+ %ptr = inttoptr i64 %add to double * -+ %element = load double *%ptr -+ %ret = insertelement <2 x double> %val, double %element, i32 1 -+ ret <2 x double> %ret -+} -Index: llvm-36/test/CodeGen/SystemZ/vec-move-09.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-move-09.ll -@@ -0,0 +1,291 @@ -+; Test vector insertion of constants. -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -+ -+; Test v16i8 insertion into the first element. -+define <16 x i8> @f1(<16 x i8> %val) { -+; CHECK-LABEL: f1: -+; CHECK: vleib %v24, 0, 0 -+; CHECK: br %r14 -+ %ret = insertelement <16 x i8> %val, i8 0, i32 0 -+ ret <16 x i8> %ret -+} -+ -+; Test v16i8 insertion into the last element. -+define <16 x i8> @f2(<16 x i8> %val) { -+; CHECK-LABEL: f2: -+; CHECK: vleib %v24, 100, 15 -+; CHECK: br %r14 -+ %ret = insertelement <16 x i8> %val, i8 100, i32 15 -+ ret <16 x i8> %ret -+} -+ -+; Test v16i8 insertion with the maximum signed value. -+define <16 x i8> @f3(<16 x i8> %val) { -+; CHECK-LABEL: f3: -+; CHECK: vleib %v24, 127, 10 -+; CHECK: br %r14 -+ %ret = insertelement <16 x i8> %val, i8 127, i32 10 -+ ret <16 x i8> %ret -+} -+ -+; Test v16i8 insertion with the minimum signed value. -+define <16 x i8> @f4(<16 x i8> %val) { -+; CHECK-LABEL: f4: -+; CHECK: vleib %v24, -128, 11 -+; CHECK: br %r14 -+ %ret = insertelement <16 x i8> %val, i8 128, i32 11 -+ ret <16 x i8> %ret -+} -+ -+; Test v16i8 insertion with the maximum unsigned value. -+define <16 x i8> @f5(<16 x i8> %val) { -+; CHECK-LABEL: f5: -+; CHECK: vleib %v24, -1, 12 -+; CHECK: br %r14 -+ %ret = insertelement <16 x i8> %val, i8 255, i32 12 -+ ret <16 x i8> %ret -+} -+ -+; Test v16i8 insertion into a variable element. -+define <16 x i8> @f6(<16 x i8> %val, i32 %index) { -+; CHECK-LABEL: f6: -+; CHECK-NOT: vleib -+; CHECK: br %r14 -+ %ret = insertelement <16 x i8> %val, i8 0, i32 %index -+ ret <16 x i8> %ret -+} -+ -+; Test v8i16 insertion into the first element. -+define <8 x i16> @f7(<8 x i16> %val) { -+; CHECK-LABEL: f7: -+; CHECK: vleih %v24, 0, 0 -+; CHECK: br %r14 -+ %ret = insertelement <8 x i16> %val, i16 0, i32 0 -+ ret <8 x i16> %ret -+} -+ -+; Test v8i16 insertion into the last element. -+define <8 x i16> @f8(<8 x i16> %val) { -+; CHECK-LABEL: f8: -+; CHECK: vleih %v24, 0, 7 -+; CHECK: br %r14 -+ %ret = insertelement <8 x i16> %val, i16 0, i32 7 -+ ret <8 x i16> %ret -+} -+ -+; Test v8i16 insertion with the maximum signed value. -+define <8 x i16> @f9(<8 x i16> %val) { -+; CHECK-LABEL: f9: -+; CHECK: vleih %v24, 32767, 4 -+; CHECK: br %r14 -+ %ret = insertelement <8 x i16> %val, i16 32767, i32 4 -+ ret <8 x i16> %ret -+} -+ -+; Test v8i16 insertion with the minimum signed value. -+define <8 x i16> @f10(<8 x i16> %val) { -+; CHECK-LABEL: f10: -+; CHECK: vleih %v24, -32768, 5 -+; CHECK: br %r14 -+ %ret = insertelement <8 x i16> %val, i16 32768, i32 5 -+ ret <8 x i16> %ret -+} -+ -+; Test v8i16 insertion with the maximum unsigned value. -+define <8 x i16> @f11(<8 x i16> %val) { -+; CHECK-LABEL: f11: -+; CHECK: vleih %v24, -1, 6 -+; CHECK: br %r14 -+ %ret = insertelement <8 x i16> %val, i16 65535, i32 6 -+ ret <8 x i16> %ret -+} -+ -+; Test v8i16 insertion into a variable element. -+define <8 x i16> @f12(<8 x i16> %val, i32 %index) { -+; CHECK-LABEL: f12: -+; CHECK-NOT: vleih -+; CHECK: br %r14 -+ %ret = insertelement <8 x i16> %val, i16 0, i32 %index -+ ret <8 x i16> %ret -+} -+ -+; Test v4i32 insertion into the first element. -+define <4 x i32> @f13(<4 x i32> %val) { -+; CHECK-LABEL: f13: -+; CHECK: vleif %v24, 0, 0 -+; CHECK: br %r14 -+ %ret = insertelement <4 x i32> %val, i32 0, i32 0 -+ ret <4 x i32> %ret -+} -+ -+; Test v4i32 insertion into the last element. -+define <4 x i32> @f14(<4 x i32> %val) { -+; CHECK-LABEL: f14: -+; CHECK: vleif %v24, 0, 3 -+; CHECK: br %r14 -+ %ret = insertelement <4 x i32> %val, i32 0, i32 3 -+ ret <4 x i32> %ret -+} -+ -+; Test v4i32 insertion with the maximum value allowed by VLEIF. -+define <4 x i32> @f15(<4 x i32> %val) { -+; CHECK-LABEL: f15: -+; CHECK: vleif %v24, 32767, 1 -+; CHECK: br %r14 -+ %ret = insertelement <4 x i32> %val, i32 32767, i32 1 -+ ret <4 x i32> %ret -+} -+ -+; Test v4i32 insertion with the next value up. -+define <4 x i32> @f16(<4 x i32> %val) { -+; CHECK-LABEL: f16: -+; CHECK-NOT: vleif -+; CHECK: br %r14 -+ %ret = insertelement <4 x i32> %val, i32 32768, i32 1 -+ ret <4 x i32> %ret -+} -+ -+; Test v4i32 insertion with the minimum value allowed by VLEIF. -+define <4 x i32> @f17(<4 x i32> %val) { -+; CHECK-LABEL: f17: -+; CHECK: vleif %v24, -32768, 2 -+; CHECK: br %r14 -+ %ret = insertelement <4 x i32> %val, i32 -32768, i32 2 -+ ret <4 x i32> %ret -+} -+ -+; Test v4i32 insertion with the next value down. -+define <4 x i32> @f18(<4 x i32> %val) { -+; CHECK-LABEL: f18: -+; CHECK-NOT: vleif -+; CHECK: br %r14 -+ %ret = insertelement <4 x i32> %val, i32 -32769, i32 2 -+ ret <4 x i32> %ret -+} -+ -+; Test v4i32 insertion into a variable element. -+define <4 x i32> @f19(<4 x i32> %val, i32 %index) { -+; CHECK-LABEL: f19: -+; CHECK-NOT: vleif -+; CHECK: br %r14 -+ %ret = insertelement <4 x i32> %val, i32 0, i32 %index -+ ret <4 x i32> %ret -+} -+ -+; Test v2i64 insertion into the first element. -+define <2 x i64> @f20(<2 x i64> %val) { -+; CHECK-LABEL: f20: -+; CHECK: vleig %v24, 0, 0 -+; CHECK: br %r14 -+ %ret = insertelement <2 x i64> %val, i64 0, i32 0 -+ ret <2 x i64> %ret -+} -+ -+; Test v2i64 insertion into the last element. -+define <2 x i64> @f21(<2 x i64> %val) { -+; CHECK-LABEL: f21: -+; CHECK: vleig %v24, 0, 1 -+; CHECK: br %r14 -+ %ret = insertelement <2 x i64> %val, i64 0, i32 1 -+ ret <2 x i64> %ret -+} -+ -+; Test v2i64 insertion with the maximum value allowed by VLEIG. -+define <2 x i64> @f22(<2 x i64> %val) { -+; CHECK-LABEL: f22: -+; CHECK: vleig %v24, 32767, 1 -+; CHECK: br %r14 -+ %ret = insertelement <2 x i64> %val, i64 32767, i32 1 -+ ret <2 x i64> %ret -+} -+ -+; Test v2i64 insertion with the next value up. -+define <2 x i64> @f23(<2 x i64> %val) { -+; CHECK-LABEL: f23: -+; CHECK-NOT: vleig -+; CHECK: br %r14 -+ %ret = insertelement <2 x i64> %val, i64 32768, i32 1 -+ ret <2 x i64> %ret -+} -+ -+; Test v2i64 insertion with the minimum value allowed by VLEIG. -+define <2 x i64> @f24(<2 x i64> %val) { -+; CHECK-LABEL: f24: -+; CHECK: vleig %v24, -32768, 0 -+; CHECK: br %r14 -+ %ret = insertelement <2 x i64> %val, i64 -32768, i32 0 -+ ret <2 x i64> %ret -+} -+ -+; Test v2i64 insertion with the next value down. -+define <2 x i64> @f25(<2 x i64> %val) { -+; CHECK-LABEL: f25: -+; CHECK-NOT: vleig -+; CHECK: br %r14 -+ %ret = insertelement <2 x i64> %val, i64 -32769, i32 0 -+ ret <2 x i64> %ret -+} -+ -+; Test v2i64 insertion into a variable element. -+define <2 x i64> @f26(<2 x i64> %val, i32 %index) { -+; CHECK-LABEL: f26: -+; CHECK-NOT: vleig -+; CHECK: br %r14 -+ %ret = insertelement <2 x i64> %val, i64 0, i32 %index -+ ret <2 x i64> %ret -+} -+ -+; Test v4f32 insertion of 0 into the first element. -+define <4 x float> @f27(<4 x float> %val) { -+; CHECK-LABEL: f27: -+; CHECK: vleif %v24, 0, 0 -+; CHECK: br %r14 -+ %ret = insertelement <4 x float> %val, float 0.0, i32 0 -+ ret <4 x float> %ret -+} -+ -+; Test v4f32 insertion of 0 into the last element. -+define <4 x float> @f28(<4 x float> %val) { -+; CHECK-LABEL: f28: -+; CHECK: vleif %v24, 0, 3 -+; CHECK: br %r14 -+ %ret = insertelement <4 x float> %val, float 0.0, i32 3 -+ ret <4 x float> %ret -+} -+ -+; Test v4f32 insertion of a nonzero value. -+define <4 x float> @f29(<4 x float> %val) { -+; CHECK-LABEL: f29: -+; CHECK-NOT: vleif -+; CHECK: br %r14 -+ %ret = insertelement <4 x float> %val, float 1.0, i32 1 -+ ret <4 x float> %ret -+} -+ -+; Test v2f64 insertion of 0 into the first element. -+define <2 x double> @f30(<2 x double> %val) { -+; CHECK-LABEL: f30: -+; CHECK: vleig %v24, 0, 0 -+; CHECK: br %r14 -+ %ret = insertelement <2 x double> %val, double 0.0, i32 0 -+ ret <2 x double> %ret -+} -+ -+; Test v2f64 insertion of 0 into the last element. -+define <2 x double> @f31(<2 x double> %val) { -+; CHECK-LABEL: f31: -+; CHECK: vleig %v24, 0, 1 -+; CHECK: br %r14 -+ %ret = insertelement <2 x double> %val, double 0.0, i32 1 -+ ret <2 x double> %ret -+} -+ -+; Test v2f64 insertion of a nonzero value. -+define <2 x double> @f32(<2 x double> %val) { -+; CHECK-LABEL: f32: -+; CHECK-NOT: vleig -+; CHECK: br %r14 -+ %ret = insertelement <2 x double> %val, double 1.0, i32 1 -+ ret <2 x double> %ret -+} -Index: llvm-36/test/CodeGen/SystemZ/vec-move-10.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-move-10.ll -@@ -0,0 +1,499 @@ -+; Test vector extraction to memory. -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -+ -+; Test v16i8 extraction from the first element. -+define void @f1(<16 x i8> %val, i8 *%ptr) { -+; CHECK-LABEL: f1: -+; CHECK: vsteb %v24, 0(%r2), 0 -+; CHECK: br %r14 -+ %element = extractelement <16 x i8> %val, i32 0 -+ store i8 %element, i8 *%ptr -+ ret void -+} -+ -+; Test v16i8 extraction from the last element. -+define void @f2(<16 x i8> %val, i8 *%ptr) { -+; CHECK-LABEL: f2: -+; CHECK: vsteb %v24, 0(%r2), 15 -+; CHECK: br %r14 -+ %element = extractelement <16 x i8> %val, i32 15 -+ store i8 %element, i8 *%ptr -+ ret void -+} -+ -+; Test v16i8 extraction of an invalid element. This must compile, -+; but we don't care what it does. -+define void @f3(<16 x i8> %val, i8 *%ptr) { -+; CHECK-LABEL: f3: -+; CHECK-NOT: vsteb %v24, 0(%r2), 16 -+; CHECK: br %r14 -+ %element = extractelement <16 x i8> %val, i32 16 -+ store i8 %element, i8 *%ptr -+ ret void -+} -+ -+; Test v16i8 extraction with the highest in-range offset. -+define void @f4(<16 x i8> %val, i8 *%base) { -+; CHECK-LABEL: f4: -+; CHECK: vsteb %v24, 4095(%r2), 10 -+; CHECK: br %r14 -+ %ptr = getelementptr i8 *%base, i32 4095 -+ %element = extractelement <16 x i8> %val, i32 10 -+ store i8 %element, i8 *%ptr -+ ret void -+} -+ -+; Test v16i8 extraction with the first ouf-of-range offset. -+define void @f5(<16 x i8> %val, i8 *%base) { -+; CHECK-LABEL: f5: -+; CHECK: aghi %r2, 4096 -+; CHECK: vsteb %v24, 0(%r2), 5 -+; CHECK: br %r14 -+ %ptr = getelementptr i8 *%base, i32 4096 -+ %element = extractelement <16 x i8> %val, i32 5 -+ store i8 %element, i8 *%ptr -+ ret void -+} -+ -+; Test v16i8 extraction from a variable element. -+define void @f6(<16 x i8> %val, i8 *%ptr, i32 %index) { -+; CHECK-LABEL: f6: -+; CHECK-NOT: vsteb -+; CHECK: br %r14 -+ %element = extractelement <16 x i8> %val, i32 %index -+ store i8 %element, i8 *%ptr -+ ret void -+} -+ -+; Test v8i16 extraction from the first element. -+define void @f7(<8 x i16> %val, i16 *%ptr) { -+; CHECK-LABEL: f7: -+; CHECK: vsteh %v24, 0(%r2), 0 -+; CHECK: br %r14 -+ %element = extractelement <8 x i16> %val, i32 0 -+ store i16 %element, i16 *%ptr -+ ret void -+} -+ -+; Test v8i16 extraction from the last element. -+define void @f8(<8 x i16> %val, i16 *%ptr) { -+; CHECK-LABEL: f8: -+; CHECK: vsteh %v24, 0(%r2), 7 -+; CHECK: br %r14 -+ %element = extractelement <8 x i16> %val, i32 7 -+ store i16 %element, i16 *%ptr -+ ret void -+} -+ -+; Test v8i16 extraction of an invalid element. This must compile, -+; but we don't care what it does. -+define void @f9(<8 x i16> %val, i16 *%ptr) { -+; CHECK-LABEL: f9: -+; CHECK-NOT: vsteh %v24, 0(%r2), 8 -+; CHECK: br %r14 -+ %element = extractelement <8 x i16> %val, i32 8 -+ store i16 %element, i16 *%ptr -+ ret void -+} -+ -+; Test v8i16 extraction with the highest in-range offset. -+define void @f10(<8 x i16> %val, i16 *%base) { -+; CHECK-LABEL: f10: -+; CHECK: vsteh %v24, 4094(%r2), 5 -+; CHECK: br %r14 -+ %ptr = getelementptr i16 *%base, i32 2047 -+ %element = extractelement <8 x i16> %val, i32 5 -+ store i16 %element, i16 *%ptr -+ ret void -+} -+ -+; Test v8i16 extraction with the first ouf-of-range offset. -+define void @f11(<8 x i16> %val, i16 *%base) { -+; CHECK-LABEL: f11: -+; CHECK: aghi %r2, 4096 -+; CHECK: vsteh %v24, 0(%r2), 1 -+; CHECK: br %r14 -+ %ptr = getelementptr i16 *%base, i32 2048 -+ %element = extractelement <8 x i16> %val, i32 1 -+ store i16 %element, i16 *%ptr -+ ret void -+} -+ -+; Test v8i16 extraction from a variable element. -+define void @f12(<8 x i16> %val, i16 *%ptr, i32 %index) { -+; CHECK-LABEL: f12: -+; CHECK-NOT: vsteh -+; CHECK: br %r14 -+ %element = extractelement <8 x i16> %val, i32 %index -+ store i16 %element, i16 *%ptr -+ ret void -+} -+ -+; Test v4i32 extraction from the first element. -+define void @f13(<4 x i32> %val, i32 *%ptr) { -+; CHECK-LABEL: f13: -+; CHECK: vstef %v24, 0(%r2), 0 -+; CHECK: br %r14 -+ %element = extractelement <4 x i32> %val, i32 0 -+ store i32 %element, i32 *%ptr -+ ret void -+} -+ -+; Test v4i32 extraction from the last element. -+define void @f14(<4 x i32> %val, i32 *%ptr) { -+; CHECK-LABEL: f14: -+; CHECK: vstef %v24, 0(%r2), 3 -+; CHECK: br %r14 -+ %element = extractelement <4 x i32> %val, i32 3 -+ store i32 %element, i32 *%ptr -+ ret void -+} -+ -+; Test v4i32 extraction of an invalid element. This must compile, -+; but we don't care what it does. -+define void @f15(<4 x i32> %val, i32 *%ptr) { -+; CHECK-LABEL: f15: -+; CHECK-NOT: vstef %v24, 0(%r2), 4 -+; CHECK: br %r14 -+ %element = extractelement <4 x i32> %val, i32 4 -+ store i32 %element, i32 *%ptr -+ ret void -+} -+ -+; Test v4i32 extraction with the highest in-range offset. -+define void @f16(<4 x i32> %val, i32 *%base) { -+; CHECK-LABEL: f16: -+; CHECK: vstef %v24, 4092(%r2), 2 -+; CHECK: br %r14 -+ %ptr = getelementptr i32 *%base, i32 1023 -+ %element = extractelement <4 x i32> %val, i32 2 -+ store i32 %element, i32 *%ptr -+ ret void -+} -+ -+; Test v4i32 extraction with the first ouf-of-range offset. -+define void @f17(<4 x i32> %val, i32 *%base) { -+; CHECK-LABEL: f17: -+; CHECK: aghi %r2, 4096 -+; CHECK: vstef %v24, 0(%r2), 1 -+; CHECK: br %r14 -+ %ptr = getelementptr i32 *%base, i32 1024 -+ %element = extractelement <4 x i32> %val, i32 1 -+ store i32 %element, i32 *%ptr -+ ret void -+} -+ -+; Test v4i32 extraction from a variable element. -+define void @f18(<4 x i32> %val, i32 *%ptr, i32 %index) { -+; CHECK-LABEL: f18: -+; CHECK-NOT: vstef -+; CHECK: br %r14 -+ %element = extractelement <4 x i32> %val, i32 %index -+ store i32 %element, i32 *%ptr -+ ret void -+} -+ -+; Test v2i64 extraction from the first element. -+define void @f19(<2 x i64> %val, i64 *%ptr) { -+; CHECK-LABEL: f19: -+; CHECK: vsteg %v24, 0(%r2), 0 -+; CHECK: br %r14 -+ %element = extractelement <2 x i64> %val, i32 0 -+ store i64 %element, i64 *%ptr -+ ret void -+} -+ -+; Test v2i64 extraction from the last element. -+define void @f20(<2 x i64> %val, i64 *%ptr) { -+; CHECK-LABEL: f20: -+; CHECK: vsteg %v24, 0(%r2), 1 -+; CHECK: br %r14 -+ %element = extractelement <2 x i64> %val, i32 1 -+ store i64 %element, i64 *%ptr -+ ret void -+} -+ -+; Test v2i64 extraction of an invalid element. This must compile, -+; but we don't care what it does. -+define void @f21(<2 x i64> %val, i64 *%ptr) { -+; CHECK-LABEL: f21: -+; CHECK-NOT: vsteg %v24, 0(%r2), 2 -+; CHECK: br %r14 -+ %element = extractelement <2 x i64> %val, i32 2 -+ store i64 %element, i64 *%ptr -+ ret void -+} -+ -+; Test v2i64 extraction with the highest in-range offset. -+define void @f22(<2 x i64> %val, i64 *%base) { -+; CHECK-LABEL: f22: -+; CHECK: vsteg %v24, 4088(%r2), 1 -+; CHECK: br %r14 -+ %ptr = getelementptr i64 *%base, i32 511 -+ %element = extractelement <2 x i64> %val, i32 1 -+ store i64 %element, i64 *%ptr -+ ret void -+} -+ -+; Test v2i64 extraction with the first ouf-of-range offset. -+define void @f23(<2 x i64> %val, i64 *%base) { -+; CHECK-LABEL: f23: -+; CHECK: aghi %r2, 4096 -+; CHECK: vsteg %v24, 0(%r2), 0 -+; CHECK: br %r14 -+ %ptr = getelementptr i64 *%base, i32 512 -+ %element = extractelement <2 x i64> %val, i32 0 -+ store i64 %element, i64 *%ptr -+ ret void -+} -+ -+; Test v2i64 extraction from a variable element. -+define void @f24(<2 x i64> %val, i64 *%ptr, i32 %index) { -+; CHECK-LABEL: f24: -+; CHECK-NOT: vsteg -+; CHECK: br %r14 -+ %element = extractelement <2 x i64> %val, i32 %index -+ store i64 %element, i64 *%ptr -+ ret void -+} -+ -+; Test v4f32 extraction from the first element. -+define void @f25(<4 x float> %val, float *%ptr) { -+; CHECK-LABEL: f25: -+; CHECK: vstef %v24, 0(%r2), 0 -+; CHECK: br %r14 -+ %element = extractelement <4 x float> %val, i32 0 -+ store float %element, float *%ptr -+ ret void -+} -+ -+; Test v4f32 extraction from the last element. -+define void @f26(<4 x float> %val, float *%ptr) { -+; CHECK-LABEL: f26: -+; CHECK: vstef %v24, 0(%r2), 3 -+; CHECK: br %r14 -+ %element = extractelement <4 x float> %val, i32 3 -+ store float %element, float *%ptr -+ ret void -+} -+ -+; Test v4f32 extraction of an invalid element. This must compile, -+; but we don't care what it does. -+define void @f27(<4 x float> %val, float *%ptr) { -+; CHECK-LABEL: f27: -+; CHECK-NOT: vstef %v24, 0(%r2), 4 -+; CHECK: br %r14 -+ %element = extractelement <4 x float> %val, i32 4 -+ store float %element, float *%ptr -+ ret void -+} -+ -+; Test v4f32 extraction with the highest in-range offset. -+define void @f28(<4 x float> %val, float *%base) { -+; CHECK-LABEL: f28: -+; CHECK: vstef %v24, 4092(%r2), 2 -+; CHECK: br %r14 -+ %ptr = getelementptr float *%base, i32 1023 -+ %element = extractelement <4 x float> %val, i32 2 -+ store float %element, float *%ptr -+ ret void -+} -+ -+; Test v4f32 extraction with the first ouf-of-range offset. -+define void @f29(<4 x float> %val, float *%base) { -+; CHECK-LABEL: f29: -+; CHECK: aghi %r2, 4096 -+; CHECK: vstef %v24, 0(%r2), 1 -+; CHECK: br %r14 -+ %ptr = getelementptr float *%base, i32 1024 -+ %element = extractelement <4 x float> %val, i32 1 -+ store float %element, float *%ptr -+ ret void -+} -+ -+; Test v4f32 extraction from a variable element. -+define void @f30(<4 x float> %val, float *%ptr, i32 %index) { -+; CHECK-LABEL: f30: -+; CHECK-NOT: vstef -+; CHECK: br %r14 -+ %element = extractelement <4 x float> %val, i32 %index -+ store float %element, float *%ptr -+ ret void -+} -+ -+; Test v2f64 extraction from the first element. -+define void @f32(<2 x double> %val, double *%ptr) { -+; CHECK-LABEL: f32: -+; CHECK: vsteg %v24, 0(%r2), 0 -+; CHECK: br %r14 -+ %element = extractelement <2 x double> %val, i32 0 -+ store double %element, double *%ptr -+ ret void -+} -+ -+; Test v2f64 extraction from the last element. -+define void @f33(<2 x double> %val, double *%ptr) { -+; CHECK-LABEL: f33: -+; CHECK: vsteg %v24, 0(%r2), 1 -+; CHECK: br %r14 -+ %element = extractelement <2 x double> %val, i32 1 -+ store double %element, double *%ptr -+ ret void -+} -+ -+; Test v2f64 extraction with the highest in-range offset. -+define void @f34(<2 x double> %val, double *%base) { -+; CHECK-LABEL: f34: -+; CHECK: vsteg %v24, 4088(%r2), 1 -+; CHECK: br %r14 -+ %ptr = getelementptr double *%base, i32 511 -+ %element = extractelement <2 x double> %val, i32 1 -+ store double %element, double *%ptr -+ ret void -+} -+ -+; Test v2f64 extraction with the first ouf-of-range offset. -+define void @f35(<2 x double> %val, double *%base) { -+; CHECK-LABEL: f35: -+; CHECK: aghi %r2, 4096 -+; CHECK: vsteg %v24, 0(%r2), 0 -+; CHECK: br %r14 -+ %ptr = getelementptr double *%base, i32 512 -+ %element = extractelement <2 x double> %val, i32 0 -+ store double %element, double *%ptr -+ ret void -+} -+ -+; Test v2f64 extraction from a variable element. -+define void @f36(<2 x double> %val, double *%ptr, i32 %index) { -+; CHECK-LABEL: f36: -+; CHECK-NOT: vsteg -+; CHECK: br %r14 -+ %element = extractelement <2 x double> %val, i32 %index -+ store double %element, double *%ptr -+ ret void -+} -+ -+; Test a v4i32 scatter of the first element. -+define void @f37(<4 x i32> %val, <4 x i32> %index, i64 %base) { -+; CHECK-LABEL: f37: -+; CHECK: vscef %v24, 0(%v26,%r2), 0 -+; CHECK: br %r14 -+ %elem = extractelement <4 x i32> %index, i32 0 -+ %ext = zext i32 %elem to i64 -+ %add = add i64 %base, %ext -+ %ptr = inttoptr i64 %add to i32 * -+ %element = extractelement <4 x i32> %val, i32 0 -+ store i32 %element, i32 *%ptr -+ ret void -+} -+ -+; Test a v4i32 scatter of the last element. -+define void @f38(<4 x i32> %val, <4 x i32> %index, i64 %base) { -+; CHECK-LABEL: f38: -+; CHECK: vscef %v24, 0(%v26,%r2), 3 -+; CHECK: br %r14 -+ %elem = extractelement <4 x i32> %index, i32 3 -+ %ext = zext i32 %elem to i64 -+ %add = add i64 %base, %ext -+ %ptr = inttoptr i64 %add to i32 * -+ %element = extractelement <4 x i32> %val, i32 3 -+ store i32 %element, i32 *%ptr -+ ret void -+} -+ -+; Test a v4i32 scatter with the highest in-range offset. -+define void @f39(<4 x i32> %val, <4 x i32> %index, i64 %base) { -+; CHECK-LABEL: f39: -+; CHECK: vscef %v24, 4095(%v26,%r2), 1 -+; CHECK: br %r14 -+ %elem = extractelement <4 x i32> %index, i32 1 -+ %ext = zext i32 %elem to i64 -+ %add1 = add i64 %base, %ext -+ %add2 = add i64 %add1, 4095 -+ %ptr = inttoptr i64 %add2 to i32 * -+ %element = extractelement <4 x i32> %val, i32 1 -+ store i32 %element, i32 *%ptr -+ ret void -+} -+ -+; Test a v2i64 scatter of the first element. -+define void @f40(<2 x i64> %val, <2 x i64> %index, i64 %base) { -+; CHECK-LABEL: f40: -+; CHECK: vsceg %v24, 0(%v26,%r2), 0 -+; CHECK: br %r14 -+ %elem = extractelement <2 x i64> %index, i32 0 -+ %add = add i64 %base, %elem -+ %ptr = inttoptr i64 %add to i64 * -+ %element = extractelement <2 x i64> %val, i32 0 -+ store i64 %element, i64 *%ptr -+ ret void -+} -+ -+; Test a v2i64 scatter of the last element. -+define void @f41(<2 x i64> %val, <2 x i64> %index, i64 %base) { -+; CHECK-LABEL: f41: -+; CHECK: vsceg %v24, 0(%v26,%r2), 1 -+; CHECK: br %r14 -+ %elem = extractelement <2 x i64> %index, i32 1 -+ %add = add i64 %base, %elem -+ %ptr = inttoptr i64 %add to i64 * -+ %element = extractelement <2 x i64> %val, i32 1 -+ store i64 %element, i64 *%ptr -+ ret void -+} -+ -+; Test a v4f32 scatter of the first element. -+define void @f42(<4 x float> %val, <4 x i32> %index, i64 %base) { -+; CHECK-LABEL: f42: -+; CHECK: vscef %v24, 0(%v26,%r2), 0 -+; CHECK: br %r14 -+ %elem = extractelement <4 x i32> %index, i32 0 -+ %ext = zext i32 %elem to i64 -+ %add = add i64 %base, %ext -+ %ptr = inttoptr i64 %add to float * -+ %element = extractelement <4 x float> %val, i32 0 -+ store float %element, float *%ptr -+ ret void -+} -+ -+; Test a v4f32 scatter of the last element. -+define void @f43(<4 x float> %val, <4 x i32> %index, i64 %base) { -+; CHECK-LABEL: f43: -+; CHECK: vscef %v24, 0(%v26,%r2), 3 -+; CHECK: br %r14 -+ %elem = extractelement <4 x i32> %index, i32 3 -+ %ext = zext i32 %elem to i64 -+ %add = add i64 %base, %ext -+ %ptr = inttoptr i64 %add to float * -+ %element = extractelement <4 x float> %val, i32 3 -+ store float %element, float *%ptr -+ ret void -+} -+ -+; Test a v2f64 scatter of the first element. -+define void @f44(<2 x double> %val, <2 x i64> %index, i64 %base) { -+; CHECK-LABEL: f44: -+; CHECK: vsceg %v24, 0(%v26,%r2), 0 -+; CHECK: br %r14 -+ %elem = extractelement <2 x i64> %index, i32 0 -+ %add = add i64 %base, %elem -+ %ptr = inttoptr i64 %add to double * -+ %element = extractelement <2 x double> %val, i32 0 -+ store double %element, double *%ptr -+ ret void -+} -+ -+; Test a v2f64 scatter of the last element. -+define void @f45(<2 x double> %val, <2 x i64> %index, i64 %base) { -+; CHECK-LABEL: f45: -+; CHECK: vsceg %v24, 0(%v26,%r2), 1 -+; CHECK: br %r14 -+ %elem = extractelement <2 x i64> %index, i32 1 -+ %add = add i64 %base, %elem -+ %ptr = inttoptr i64 %add to double * -+ %element = extractelement <2 x double> %val, i32 1 -+ store double %element, double *%ptr -+ ret void -+} -Index: llvm-36/test/CodeGen/SystemZ/vec-move-11.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-move-11.ll -@@ -0,0 +1,111 @@ -+; Test insertions of register values into a nonzero index of an undef. -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -+ -+; Test v16i8 insertion into an undef, with an arbitrary index. -+define <16 x i8> @f1(i8 %val) { -+; CHECK-LABEL: f1: -+; CHECK: vlvgb %v24, %r2, 12 -+; CHECK-NEXT: br %r14 -+ %ret = insertelement <16 x i8> undef, i8 %val, i32 12 -+ ret <16 x i8> %ret -+} -+ -+; Test v16i8 insertion into an undef, with the first good index for VLVGP. -+define <16 x i8> @f2(i8 %val) { -+; CHECK-LABEL: f2: -+; CHECK: vlvgp %v24, %r2, %r2 -+; CHECK-NEXT: br %r14 -+ %ret = insertelement <16 x i8> undef, i8 %val, i32 7 -+ ret <16 x i8> %ret -+} -+ -+; Test v16i8 insertion into an undef, with the second good index for VLVGP. -+define <16 x i8> @f3(i8 %val) { -+; CHECK-LABEL: f3: -+; CHECK: vlvgp %v24, %r2, %r2 -+; CHECK-NEXT: br %r14 -+ %ret = insertelement <16 x i8> undef, i8 %val, i32 15 -+ ret <16 x i8> %ret -+} -+ -+; Test v8i16 insertion into an undef, with an arbitrary index. -+define <8 x i16> @f4(i16 %val) { -+; CHECK-LABEL: f4: -+; CHECK: vlvgh %v24, %r2, 5 -+; CHECK-NEXT: br %r14 -+ %ret = insertelement <8 x i16> undef, i16 %val, i32 5 -+ ret <8 x i16> %ret -+} -+ -+; Test v8i16 insertion into an undef, with the first good index for VLVGP. -+define <8 x i16> @f5(i16 %val) { -+; CHECK-LABEL: f5: -+; CHECK: vlvgp %v24, %r2, %r2 -+; CHECK-NEXT: br %r14 -+ %ret = insertelement <8 x i16> undef, i16 %val, i32 3 -+ ret <8 x i16> %ret -+} -+ -+; Test v8i16 insertion into an undef, with the second good index for VLVGP. -+define <8 x i16> @f6(i16 %val) { -+; CHECK-LABEL: f6: -+; CHECK: vlvgp %v24, %r2, %r2 -+; CHECK-NEXT: br %r14 -+ %ret = insertelement <8 x i16> undef, i16 %val, i32 7 -+ ret <8 x i16> %ret -+} -+ -+; Test v4i32 insertion into an undef, with an arbitrary index. -+define <4 x i32> @f7(i32 %val) { -+; CHECK-LABEL: f7: -+; CHECK: vlvgf %v24, %r2, 2 -+; CHECK-NEXT: br %r14 -+ %ret = insertelement <4 x i32> undef, i32 %val, i32 2 -+ ret <4 x i32> %ret -+} -+ -+; Test v4i32 insertion into an undef, with the first good index for VLVGP. -+define <4 x i32> @f8(i32 %val) { -+; CHECK-LABEL: f8: -+; CHECK: vlvgp %v24, %r2, %r2 -+; CHECK-NEXT: br %r14 -+ %ret = insertelement <4 x i32> undef, i32 %val, i32 1 -+ ret <4 x i32> %ret -+} -+ -+; Test v4i32 insertion into an undef, with the second good index for VLVGP. -+define <4 x i32> @f9(i32 %val) { -+; CHECK-LABEL: f9: -+; CHECK: vlvgp %v24, %r2, %r2 -+; CHECK-NEXT: br %r14 -+ %ret = insertelement <4 x i32> undef, i32 %val, i32 3 -+ ret <4 x i32> %ret -+} -+ -+; Test v2i64 insertion into an undef. -+define <2 x i64> @f10(i64 %val) { -+; CHECK-LABEL: f10: -+; CHECK: vlvgp %v24, %r2, %r2 -+; CHECK-NEXT: br %r14 -+ %ret = insertelement <2 x i64> undef, i64 %val, i32 1 -+ ret <2 x i64> %ret -+} -+ -+; Test v4f32 insertion into an undef. -+define <4 x float> @f11(float %val) { -+; CHECK-LABEL: f11: -+; CHECK: vrepf %v24, %v0, 0 -+; CHECK: br %r14 -+ %ret = insertelement <4 x float> undef, float %val, i32 2 -+ ret <4 x float> %ret -+} -+ -+; Test v2f64 insertion into an undef. -+define <2 x double> @f12(double %val) { -+; CHECK-LABEL: f12: -+; CHECK: vrepg %v24, %v0, 0 -+; CHECK: br %r14 -+ %ret = insertelement <2 x double> undef, double %val, i32 1 -+ ret <2 x double> %ret -+} -Index: llvm-36/test/CodeGen/SystemZ/vec-move-12.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-move-12.ll -@@ -0,0 +1,123 @@ -+; Test insertions of memory values into a nonzero index of an undef. -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -+ -+; Test v16i8 insertion into an undef, with an arbitrary index. -+define <16 x i8> @f1(i8 *%ptr) { -+; CHECK-LABEL: f1: -+; CHECK: vlrepb %v24, 0(%r2) -+; CHECK-NEXT: br %r14 -+ %val = load i8 *%ptr -+ %ret = insertelement <16 x i8> undef, i8 %val, i32 12 -+ ret <16 x i8> %ret -+} -+ -+; Test v16i8 insertion into an undef, with the first good index for VLVGP. -+define <16 x i8> @f2(i8 *%ptr) { -+; CHECK-LABEL: f2: -+; CHECK: {{vlrepb|vllezb}} %v24, 0(%r2) -+; CHECK-NEXT: br %r14 -+ %val = load i8 *%ptr -+ %ret = insertelement <16 x i8> undef, i8 %val, i32 7 -+ ret <16 x i8> %ret -+} -+ -+; Test v16i8 insertion into an undef, with the second good index for VLVGP. -+define <16 x i8> @f3(i8 *%ptr) { -+; CHECK-LABEL: f3: -+; CHECK: vlrepb %v24, 0(%r2) -+; CHECK-NEXT: br %r14 -+ %val = load i8 *%ptr -+ %ret = insertelement <16 x i8> undef, i8 %val, i32 15 -+ ret <16 x i8> %ret -+} -+ -+; Test v8i16 insertion into an undef, with an arbitrary index. -+define <8 x i16> @f4(i16 *%ptr) { -+; CHECK-LABEL: f4: -+; CHECK: vlreph %v24, 0(%r2) -+; CHECK-NEXT: br %r14 -+ %val = load i16 *%ptr -+ %ret = insertelement <8 x i16> undef, i16 %val, i32 5 -+ ret <8 x i16> %ret -+} -+ -+; Test v8i16 insertion into an undef, with the first good index for VLVGP. -+define <8 x i16> @f5(i16 *%ptr) { -+; CHECK-LABEL: f5: -+; CHECK: {{vlreph|vllezh}} %v24, 0(%r2) -+; CHECK-NEXT: br %r14 -+ %val = load i16 *%ptr -+ %ret = insertelement <8 x i16> undef, i16 %val, i32 3 -+ ret <8 x i16> %ret -+} -+ -+; Test v8i16 insertion into an undef, with the second good index for VLVGP. -+define <8 x i16> @f6(i16 *%ptr) { -+; CHECK-LABEL: f6: -+; CHECK: vlreph %v24, 0(%r2) -+; CHECK-NEXT: br %r14 -+ %val = load i16 *%ptr -+ %ret = insertelement <8 x i16> undef, i16 %val, i32 7 -+ ret <8 x i16> %ret -+} -+ -+; Test v4i32 insertion into an undef, with an arbitrary index. -+define <4 x i32> @f7(i32 *%ptr) { -+; CHECK-LABEL: f7: -+; CHECK: vlrepf %v24, 0(%r2) -+; CHECK-NEXT: br %r14 -+ %val = load i32 *%ptr -+ %ret = insertelement <4 x i32> undef, i32 %val, i32 2 -+ ret <4 x i32> %ret -+} -+ -+; Test v4i32 insertion into an undef, with the first good index for VLVGP. -+define <4 x i32> @f8(i32 *%ptr) { -+; CHECK-LABEL: f8: -+; CHECK: {{vlrepf|vllezf}} %v24, 0(%r2) -+; CHECK-NEXT: br %r14 -+ %val = load i32 *%ptr -+ %ret = insertelement <4 x i32> undef, i32 %val, i32 1 -+ ret <4 x i32> %ret -+} -+ -+; Test v4i32 insertion into an undef, with the second good index for VLVGP. -+define <4 x i32> @f9(i32 *%ptr) { -+; CHECK-LABEL: f9: -+; CHECK: vlrepf %v24, 0(%r2) -+; CHECK-NEXT: br %r14 -+ %val = load i32 *%ptr -+ %ret = insertelement <4 x i32> undef, i32 %val, i32 3 -+ ret <4 x i32> %ret -+} -+ -+; Test v2i64 insertion into an undef. -+define <2 x i64> @f10(i64 *%ptr) { -+; CHECK-LABEL: f10: -+; CHECK: vlrepg %v24, 0(%r2) -+; CHECK-NEXT: br %r14 -+ %val = load i64 *%ptr -+ %ret = insertelement <2 x i64> undef, i64 %val, i32 1 -+ ret <2 x i64> %ret -+} -+ -+; Test v4f32 insertion into an undef. -+define <4 x float> @f11(float *%ptr) { -+; CHECK-LABEL: f11: -+; CHECK: vlrepf %v24, 0(%r2) -+; CHECK: br %r14 -+ %val = load float *%ptr -+ %ret = insertelement <4 x float> undef, float %val, i32 2 -+ ret <4 x float> %ret -+} -+ -+; Test v2f64 insertion into an undef. -+define <2 x double> @f12(double *%ptr) { -+; CHECK-LABEL: f12: -+; CHECK: vlrepg %v24, 0(%r2) -+; CHECK: br %r14 -+ %val = load double *%ptr -+ %ret = insertelement <2 x double> undef, double %val, i32 1 -+ ret <2 x double> %ret -+} -Index: llvm-36/test/CodeGen/SystemZ/vec-move-13.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-move-13.ll -@@ -0,0 +1,69 @@ -+; Test insertions of register values into 0. -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -+ -+; Test v16i8 insertion into 0. -+define <16 x i8> @f1(i8 %val1, i8 %val2) { -+; CHECK-LABEL: f1: -+; CHECK: vgbm %v24, 0 -+; CHECK-DAG: vlvgb %v24, %r2, 2 -+; CHECK-DAG: vlvgb %v24, %r3, 12 -+; CHECK: br %r14 -+ %vec1 = insertelement <16 x i8> zeroinitializer, i8 %val1, i32 2 -+ %vec2 = insertelement <16 x i8> %vec1, i8 %val2, i32 12 -+ ret <16 x i8> %vec2 -+} -+ -+; Test v8i16 insertion into 0. -+define <8 x i16> @f2(i16 %val1, i16 %val2) { -+; CHECK-LABEL: f2: -+; CHECK: vgbm %v24, 0 -+; CHECK-DAG: vlvgh %v24, %r2, 3 -+; CHECK-DAG: vlvgh %v24, %r3, 5 -+; CHECK: br %r14 -+ %vec1 = insertelement <8 x i16> zeroinitializer, i16 %val1, i32 3 -+ %vec2 = insertelement <8 x i16> %vec1, i16 %val2, i32 5 -+ ret <8 x i16> %vec2 -+} -+ -+; Test v4i32 insertion into 0. -+define <4 x i32> @f3(i32 %val) { -+; CHECK-LABEL: f3: -+; CHECK: vgbm %v24, 0 -+; CHECK: vlvgf %v24, %r2, 3 -+; CHECK: br %r14 -+ %ret = insertelement <4 x i32> zeroinitializer, i32 %val, i32 3 -+ ret <4 x i32> %ret -+} -+ -+; Test v2i64 insertion into 0. -+define <2 x i64> @f4(i64 %val) { -+; CHECK-LABEL: f4: -+; CHECK: lghi [[REG:%r[0-5]]], 0 -+; CHECK: vlvgp %v24, [[REG]], %r2 -+; CHECK: br %r14 -+ %ret = insertelement <2 x i64> zeroinitializer, i64 %val, i32 1 -+ ret <2 x i64> %ret -+} -+ -+; Test v4f32 insertion into 0. -+define <4 x float> @f5(float %val) { -+; CHECK-LABEL: f5: -+; CHECK-DAG: vuplhf [[REG:%v[0-9]+]], %v0 -+; CHECK-DAG: vgbm [[ZERO:%v[0-9]+]], 0 -+; CHECK: vmrhg %v24, [[ZERO]], [[REG]] -+; CHECK: br %r14 -+ %ret = insertelement <4 x float> zeroinitializer, float %val, i32 3 -+ ret <4 x float> %ret -+} -+ -+; Test v2f64 insertion into 0. -+define <2 x double> @f6(double %val) { -+; CHECK-LABEL: f6: -+; CHECK: vgbm [[REG:%v[0-9]+]], 0 -+; CHECK: vmrhg %v24, [[REG]], %v0 -+; CHECK: br %r14 -+ %ret = insertelement <2 x double> zeroinitializer, double %val, i32 1 -+ ret <2 x double> %ret -+} -+ -Index: llvm-36/test/CodeGen/SystemZ/vec-move-14.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-move-14.ll -@@ -0,0 +1,96 @@ -+; Test insertions of memory values into 0. -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -+ -+; Test VLLEZB. -+define <16 x i8> @f1(i8 *%ptr) { -+; CHECK-LABEL: f1: -+; CHECK: vllezb %v24, 0(%r2) -+; CHECK: br %r14 -+ %val = load i8 *%ptr -+ %ret = insertelement <16 x i8> zeroinitializer, i8 %val, i32 7 -+ ret <16 x i8> %ret -+} -+ -+; Test VLLEZB with the highest in-range offset. -+define <16 x i8> @f2(i8 *%base) { -+; CHECK-LABEL: f2: -+; CHECK: vllezb %v24, 4095(%r2) -+; CHECK: br %r14 -+ %ptr = getelementptr i8 *%base, i64 4095 -+ %val = load i8 *%ptr -+ %ret = insertelement <16 x i8> zeroinitializer, i8 %val, i32 7 -+ ret <16 x i8> %ret -+} -+ -+; Test VLLEZB with the next highest offset. -+define <16 x i8> @f3(i8 *%base) { -+; CHECK-LABEL: f3: -+; CHECK-NOT: vllezb %v24, 4096(%r2) -+; CHECK: br %r14 -+ %ptr = getelementptr i8 *%base, i64 4096 -+ %val = load i8 *%ptr -+ %ret = insertelement <16 x i8> zeroinitializer, i8 %val, i32 7 -+ ret <16 x i8> %ret -+} -+ -+; Test that VLLEZB allows an index. -+define <16 x i8> @f4(i8 *%base, i64 %index) { -+; CHECK-LABEL: f4: -+; CHECK: vllezb %v24, 0({{%r2,%r3|%r3,%r2}}) -+; CHECK: br %r14 -+ %ptr = getelementptr i8 *%base, i64 %index -+ %val = load i8 *%ptr -+ %ret = insertelement <16 x i8> zeroinitializer, i8 %val, i32 7 -+ ret <16 x i8> %ret -+} -+ -+; Test VLLEZH. -+define <8 x i16> @f5(i16 *%ptr) { -+; CHECK-LABEL: f5: -+; CHECK: vllezh %v24, 0(%r2) -+; CHECK: br %r14 -+ %val = load i16 *%ptr -+ %ret = insertelement <8 x i16> zeroinitializer, i16 %val, i32 3 -+ ret <8 x i16> %ret -+} -+ -+; Test VLLEZF. -+define <4 x i32> @f6(i32 *%ptr) { -+; CHECK-LABEL: f6: -+; CHECK: vllezf %v24, 0(%r2) -+; CHECK: br %r14 -+ %val = load i32 *%ptr -+ %ret = insertelement <4 x i32> zeroinitializer, i32 %val, i32 1 -+ ret <4 x i32> %ret -+} -+ -+; Test VLLEZG. -+define <2 x i64> @f7(i64 *%ptr) { -+; CHECK-LABEL: f7: -+; CHECK: vllezg %v24, 0(%r2) -+; CHECK: br %r14 -+ %val = load i64 *%ptr -+ %ret = insertelement <2 x i64> zeroinitializer, i64 %val, i32 0 -+ ret <2 x i64> %ret -+} -+ -+; Test VLLEZF with a float. -+define <4 x float> @f8(float *%ptr) { -+; CHECK-LABEL: f8: -+; CHECK: vllezf %v24, 0(%r2) -+; CHECK: br %r14 -+ %val = load float *%ptr -+ %ret = insertelement <4 x float> zeroinitializer, float %val, i32 1 -+ ret <4 x float> %ret -+} -+ -+; Test VLLEZG with a double. -+define <2 x double> @f9(double *%ptr) { -+; CHECK-LABEL: f9: -+; CHECK: vllezg %v24, 0(%r2) -+; CHECK: br %r14 -+ %val = load double *%ptr -+ %ret = insertelement <2 x double> zeroinitializer, double %val, i32 0 -+ ret <2 x double> %ret -+} -Index: llvm-36/test/CodeGen/SystemZ/vec-move-15.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-move-15.ll -@@ -0,0 +1,105 @@ -+; Test vector sign-extending loads. -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -+ -+; Test a v16i1->v16i8 extension. -+define <16 x i8> @f1(<16 x i1> *%ptr) { -+; No expected output, but must compile. -+ %val = load <16 x i1> *%ptr -+ %ret = sext <16 x i1> %val to <16 x i8> -+ ret <16 x i8> %ret -+} -+ -+; Test a v8i1->v8i16 extension. -+define <8 x i16> @f2(<8 x i1> *%ptr) { -+; No expected output, but must compile. -+ %val = load <8 x i1> *%ptr -+ %ret = sext <8 x i1> %val to <8 x i16> -+ ret <8 x i16> %ret -+} -+ -+; Test a v8i8->v8i16 extension. -+define <8 x i16> @f3(<8 x i8> *%ptr) { -+; CHECK-LABEL: f3: -+; CHECK: vlrepg [[REG1:%v[0-9]+]], 0(%r2) -+; CHECK: vuphb %v24, [[REG1]] -+; CHECK: br %r14 -+ %val = load <8 x i8> *%ptr -+ %ret = sext <8 x i8> %val to <8 x i16> -+ ret <8 x i16> %ret -+} -+ -+; Test a v4i1->v4i32 extension. -+define <4 x i32> @f4(<4 x i1> *%ptr) { -+; No expected output, but must compile. -+ %val = load <4 x i1> *%ptr -+ %ret = sext <4 x i1> %val to <4 x i32> -+ ret <4 x i32> %ret -+} -+ -+; Test a v4i8->v4i32 extension. -+define <4 x i32> @f5(<4 x i8> *%ptr) { -+; CHECK-LABEL: f5: -+; CHECK: vlrepf [[REG1:%v[0-9]+]], 0(%r2) -+; CHECK: vuphb [[REG2:%v[0-9]+]], [[REG1]] -+; CHECK: vuphh %v24, [[REG2]] -+; CHECK: br %r14 -+ %val = load <4 x i8> *%ptr -+ %ret = sext <4 x i8> %val to <4 x i32> -+ ret <4 x i32> %ret -+} -+ -+; Test a v4i16->v4i32 extension. -+define <4 x i32> @f6(<4 x i16> *%ptr) { -+; CHECK-LABEL: f6: -+; CHECK: vlrepg [[REG1:%v[0-9]+]], 0(%r2) -+; CHECK: vuphh %v24, [[REG1]] -+; CHECK: br %r14 -+ %val = load <4 x i16> *%ptr -+ %ret = sext <4 x i16> %val to <4 x i32> -+ ret <4 x i32> %ret -+} -+ -+; Test a v2i1->v2i64 extension. -+define <2 x i64> @f7(<2 x i1> *%ptr) { -+; No expected output, but must compile. -+ %val = load <2 x i1> *%ptr -+ %ret = sext <2 x i1> %val to <2 x i64> -+ ret <2 x i64> %ret -+} -+ -+; Test a v2i8->v2i64 extension. -+define <2 x i64> @f8(<2 x i8> *%ptr) { -+; CHECK-LABEL: f8: -+; CHECK: vlreph [[REG1:%v[0-9]+]], 0(%r2) -+; CHECK: vuphb [[REG2:%v[0-9]+]], [[REG1]] -+; CHECK: vuphh [[REG3:%v[0-9]+]], [[REG2]] -+; CHECK: vuphf %v24, [[REG3]] -+; CHECK: br %r14 -+ %val = load <2 x i8> *%ptr -+ %ret = sext <2 x i8> %val to <2 x i64> -+ ret <2 x i64> %ret -+} -+ -+; Test a v2i16->v2i64 extension. -+define <2 x i64> @f9(<2 x i16> *%ptr) { -+; CHECK-LABEL: f9: -+; CHECK: vlrepf [[REG1:%v[0-9]+]], 0(%r2) -+; CHECK: vuphh [[REG2:%v[0-9]+]], [[REG1]] -+; CHECK: vuphf %v24, [[REG2]] -+; CHECK: br %r14 -+ %val = load <2 x i16> *%ptr -+ %ret = sext <2 x i16> %val to <2 x i64> -+ ret <2 x i64> %ret -+} -+ -+; Test a v2i32->v2i64 extension. -+define <2 x i64> @f10(<2 x i32> *%ptr) { -+; CHECK-LABEL: f10: -+; CHECK: vlrepg [[REG1:%v[0-9]+]], 0(%r2) -+; CHECK: vuphf %v24, [[REG1]] -+; CHECK: br %r14 -+ %val = load <2 x i32> *%ptr -+ %ret = sext <2 x i32> %val to <2 x i64> -+ ret <2 x i64> %ret -+} -Index: llvm-36/test/CodeGen/SystemZ/vec-move-16.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-move-16.ll -@@ -0,0 +1,105 @@ -+; Test vector zero-extending loads. -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -+ -+; Test a v16i1->v16i8 extension. -+define <16 x i8> @f1(<16 x i1> *%ptr) { -+; No expected output, but must compile. -+ %val = load <16 x i1> *%ptr -+ %ret = zext <16 x i1> %val to <16 x i8> -+ ret <16 x i8> %ret -+} -+ -+; Test a v8i1->v8i16 extension. -+define <8 x i16> @f2(<8 x i1> *%ptr) { -+; No expected output, but must compile. -+ %val = load <8 x i1> *%ptr -+ %ret = zext <8 x i1> %val to <8 x i16> -+ ret <8 x i16> %ret -+} -+ -+; Test a v8i8->v8i16 extension. -+define <8 x i16> @f3(<8 x i8> *%ptr) { -+; CHECK-LABEL: f3: -+; CHECK: vlrepg [[REG1:%v[0-9]+]], 0(%r2) -+; CHECK: vuplhb %v24, [[REG1]] -+; CHECK: br %r14 -+ %val = load <8 x i8> *%ptr -+ %ret = zext <8 x i8> %val to <8 x i16> -+ ret <8 x i16> %ret -+} -+ -+; Test a v4i1->v4i32 extension. -+define <4 x i32> @f4(<4 x i1> *%ptr) { -+; No expected output, but must compile. -+ %val = load <4 x i1> *%ptr -+ %ret = zext <4 x i1> %val to <4 x i32> -+ ret <4 x i32> %ret -+} -+ -+; Test a v4i8->v4i32 extension. -+define <4 x i32> @f5(<4 x i8> *%ptr) { -+; CHECK-LABEL: f5: -+; CHECK: vlrepf [[REG1:%v[0-9]+]], 0(%r2) -+; CHECK: vuplhb [[REG2:%v[0-9]+]], [[REG1]] -+; CHECK: vuplhh %v24, [[REG2]] -+; CHECK: br %r14 -+ %val = load <4 x i8> *%ptr -+ %ret = zext <4 x i8> %val to <4 x i32> -+ ret <4 x i32> %ret -+} -+ -+; Test a v4i16->v4i32 extension. -+define <4 x i32> @f6(<4 x i16> *%ptr) { -+; CHECK-LABEL: f6: -+; CHECK: vlrepg [[REG1:%v[0-9]+]], 0(%r2) -+; CHECK: vuplhh %v24, [[REG1]] -+; CHECK: br %r14 -+ %val = load <4 x i16> *%ptr -+ %ret = zext <4 x i16> %val to <4 x i32> -+ ret <4 x i32> %ret -+} -+ -+; Test a v2i1->v2i64 extension. -+define <2 x i64> @f7(<2 x i1> *%ptr) { -+; No expected output, but must compile. -+ %val = load <2 x i1> *%ptr -+ %ret = zext <2 x i1> %val to <2 x i64> -+ ret <2 x i64> %ret -+} -+ -+; Test a v2i8->v2i64 extension. -+define <2 x i64> @f8(<2 x i8> *%ptr) { -+; CHECK-LABEL: f8: -+; CHECK: vlreph [[REG1:%v[0-9]+]], 0(%r2) -+; CHECK: vuplhb [[REG2:%v[0-9]+]], [[REG1]] -+; CHECK: vuplhh [[REG3:%v[0-9]+]], [[REG2]] -+; CHECK: vuplhf %v24, [[REG3]] -+; CHECK: br %r14 -+ %val = load <2 x i8> *%ptr -+ %ret = zext <2 x i8> %val to <2 x i64> -+ ret <2 x i64> %ret -+} -+ -+; Test a v2i16->v2i64 extension. -+define <2 x i64> @f9(<2 x i16> *%ptr) { -+; CHECK-LABEL: f9: -+; CHECK: vlrepf [[REG1:%v[0-9]+]], 0(%r2) -+; CHECK: vuplhh [[REG2:%v[0-9]+]], [[REG1]] -+; CHECK: vuplhf %v24, [[REG2]] -+; CHECK: br %r14 -+ %val = load <2 x i16> *%ptr -+ %ret = zext <2 x i16> %val to <2 x i64> -+ ret <2 x i64> %ret -+} -+ -+; Test a v2i32->v2i64 extension. -+define <2 x i64> @f10(<2 x i32> *%ptr) { -+; CHECK-LABEL: f10: -+; CHECK: vlrepg [[REG1:%v[0-9]+]], 0(%r2) -+; CHECK: vuplhf %v24, [[REG1]] -+; CHECK: br %r14 -+ %val = load <2 x i32> *%ptr -+ %ret = zext <2 x i32> %val to <2 x i64> -+ ret <2 x i64> %ret -+} -Index: llvm-36/test/CodeGen/SystemZ/vec-move-17.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-move-17.ll -@@ -0,0 +1,104 @@ -+; Test vector truncating stores. -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -+ -+; Test a v16i8->v16i1 truncation. -+define void @f1(<16 x i8> %val, <16 x i1> *%ptr) { -+; No expected output, but must compile. -+ %trunc = trunc <16 x i8> %val to <16 x i1> -+ store <16 x i1> %trunc, <16 x i1> *%ptr -+ ret void -+} -+ -+; Test a v8i16->v8i1 truncation. -+define void @f2(<8 x i16> %val, <8 x i1> *%ptr) { -+; No expected output, but must compile. -+ %trunc = trunc <8 x i16> %val to <8 x i1> -+ store <8 x i1> %trunc, <8 x i1> *%ptr -+ ret void -+} -+ -+; Test a v8i16->v8i8 truncation. -+define void @f3(<8 x i16> %val, <8 x i8> *%ptr) { -+; CHECK-LABEL: f3: -+; CHECK: vpkh [[REG1:%v[0-9]+]], %v24, %v24 -+; CHECK: vsteg [[REG1]], 0(%r2) -+; CHECK: br %r14 -+ %trunc = trunc <8 x i16> %val to <8 x i8> -+ store <8 x i8> %trunc, <8 x i8> *%ptr -+ ret void -+} -+ -+; Test a v4i32->v4i1 truncation. -+define void @f4(<4 x i32> %val, <4 x i1> *%ptr) { -+; No expected output, but must compile. -+ %trunc = trunc <4 x i32> %val to <4 x i1> -+ store <4 x i1> %trunc, <4 x i1> *%ptr -+ ret void -+} -+ -+; Test a v4i32->v4i8 truncation. At the moment we use a VPERM rather than -+; a chain of packs. -+define void @f5(<4 x i32> %val, <4 x i8> *%ptr) { -+; CHECK-LABEL: f5: -+; CHECK: vperm [[REG:%v[0-9]+]], -+; CHECK: vstef [[REG]], 0(%r2) -+; CHECK: br %r14 -+ %trunc = trunc <4 x i32> %val to <4 x i8> -+ store <4 x i8> %trunc, <4 x i8> *%ptr -+ ret void -+} -+ -+; Test a v4i32->v4i16 truncation. -+define void @f6(<4 x i32> %val, <4 x i16> *%ptr) { -+; CHECK-LABEL: f6: -+; CHECK: vpkf [[REG1:%v[0-9]+]], %v24, %v24 -+; CHECK: vsteg [[REG1]], 0(%r2) -+; CHECK: br %r14 -+ %trunc = trunc <4 x i32> %val to <4 x i16> -+ store <4 x i16> %trunc, <4 x i16> *%ptr -+ ret void -+} -+ -+; Test a v2i64->v2i1 truncation. -+define void @f7(<2 x i64> %val, <2 x i1> *%ptr) { -+; No expected output, but must compile. -+ %trunc = trunc <2 x i64> %val to <2 x i1> -+ store <2 x i1> %trunc, <2 x i1> *%ptr -+ ret void -+} -+ -+; Test a v2i64->v2i8 truncation. At the moment we use a VPERM rather than -+; a chain of packs. -+define void @f8(<2 x i64> %val, <2 x i8> *%ptr) { -+; CHECK-LABEL: f8: -+; CHECK: vperm [[REG:%v[0-9]+]], -+; CHECK: vsteh [[REG]], 0(%r2) -+; CHECK: br %r14 -+ %trunc = trunc <2 x i64> %val to <2 x i8> -+ store <2 x i8> %trunc, <2 x i8> *%ptr -+ ret void -+} -+ -+; Test a v2i64->v2i16 truncation. At the moment we use a VPERM rather than -+; a chain of packs. -+define void @f9(<2 x i64> %val, <2 x i16> *%ptr) { -+; CHECK-LABEL: f9: -+; CHECK: vperm [[REG:%v[0-9]+]], -+; CHECK: vstef [[REG]], 0(%r2) -+; CHECK: br %r14 -+ %trunc = trunc <2 x i64> %val to <2 x i16> -+ store <2 x i16> %trunc, <2 x i16> *%ptr -+ ret void -+} -+ -+; Test a v2i64->v2i32 truncation. -+define void @f10(<2 x i64> %val, <2 x i32> *%ptr) { -+; CHECK-LABEL: f10: -+; CHECK: vpkg [[REG1:%v[0-9]+]], %v24, %v24 -+; CHECK: vsteg [[REG1]], 0(%r2) -+; CHECK: br %r14 -+ %trunc = trunc <2 x i64> %val to <2 x i32> -+ store <2 x i32> %trunc, <2 x i32> *%ptr -+ ret void -+} -Index: llvm-36/test/CodeGen/SystemZ/vec-mul-01.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-mul-01.ll -@@ -0,0 +1,60 @@ -+; Test vector multiplication. -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -+ -+; Test a v16i8 multiplication. -+define <16 x i8> @f1(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) { -+; CHECK-LABEL: f1: -+; CHECK: vmlb %v24, %v26, %v28 -+; CHECK: br %r14 -+ %ret = mul <16 x i8> %val1, %val2 -+ ret <16 x i8> %ret -+} -+ -+; Test a v8i16 multiplication. -+define <8 x i16> @f2(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) { -+; CHECK-LABEL: f2: -+; CHECK: vmlhw %v24, %v26, %v28 -+; CHECK: br %r14 -+ %ret = mul <8 x i16> %val1, %val2 -+ ret <8 x i16> %ret -+} -+ -+; Test a v4i32 multiplication. -+define <4 x i32> @f3(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) { -+; CHECK-LABEL: f3: -+; CHECK: vmlf %v24, %v26, %v28 -+; CHECK: br %r14 -+ %ret = mul <4 x i32> %val1, %val2 -+ ret <4 x i32> %ret -+} -+ -+; Test a v2i64 multiplication. There's no vector equivalent. -+define <2 x i64> @f4(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) { -+; CHECK-LABEL: f4: -+; CHECK-NOT: vmlg -+; CHECK: br %r14 -+ %ret = mul <2 x i64> %val1, %val2 -+ ret <2 x i64> %ret -+} -+ -+; Test a v2f64 multiplication. -+define <2 x double> @f5(<2 x double> %dummy, <2 x double> %val1, -+ <2 x double> %val2) { -+; CHECK-LABEL: f5: -+; CHECK: vfmdb %v24, %v26, %v28 -+; CHECK: br %r14 -+ %ret = fmul <2 x double> %val1, %val2 -+ ret <2 x double> %ret -+} -+ -+; Test an f64 multiplication that uses vector registers. -+define double @f6(<2 x double> %val1, <2 x double> %val2) { -+; CHECK-LABEL: f6: -+; CHECK: wfmdb %f0, %v24, %v26 -+; CHECK: br %r14 -+ %scalar1 = extractelement <2 x double> %val1, i32 0 -+ %scalar2 = extractelement <2 x double> %val2, i32 0 -+ %ret = fmul double %scalar1, %scalar2 -+ ret double %ret -+} -Index: llvm-36/test/CodeGen/SystemZ/vec-mul-02.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-mul-02.ll -@@ -0,0 +1,63 @@ -+; Test vector multiply-and-add. -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -+ -+declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) -+ -+; Test a v16i8 multiply-and-add. -+define <16 x i8> @f1(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2, -+ <16 x i8> %val3) { -+; CHECK-LABEL: f1: -+; CHECK: vmalb %v24, %v26, %v28, %v30 -+; CHECK: br %r14 -+ %mul = mul <16 x i8> %val1, %val2 -+ %ret = add <16 x i8> %mul, %val3 -+ ret <16 x i8> %ret -+} -+ -+; Test a v8i16 multiply-and-add. -+define <8 x i16> @f2(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2, -+ <8 x i16> %val3) { -+; CHECK-LABEL: f2: -+; CHECK: vmalhw %v24, %v26, %v28, %v30 -+; CHECK: br %r14 -+ %mul = mul <8 x i16> %val1, %val2 -+ %ret = add <8 x i16> %mul, %val3 -+ ret <8 x i16> %ret -+} -+ -+; Test a v4i32 multiply-and-add. -+define <4 x i32> @f3(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2, -+ <4 x i32> %val3) { -+; CHECK-LABEL: f3: -+; CHECK: vmalf %v24, %v26, %v28, %v30 -+; CHECK: br %r14 -+ %mul = mul <4 x i32> %val1, %val2 -+ %ret = add <4 x i32> %mul, %val3 -+ ret <4 x i32> %ret -+} -+ -+; Test a v2f64 multiply-and-add. -+define <2 x double> @f4(<2 x double> %dummy, <2 x double> %val1, -+ <2 x double> %val2, <2 x double> %val3) { -+; CHECK-LABEL: f4: -+; CHECK: vfmadb %v24, %v26, %v28, %v30 -+; CHECK: br %r14 -+ %ret = call <2 x double> @llvm.fma.v2f64 (<2 x double> %val1, -+ <2 x double> %val2, -+ <2 x double> %val3) -+ ret <2 x double> %ret -+} -+ -+; Test a v2f64 multiply-and-subtract. -+define <2 x double> @f5(<2 x double> %dummy, <2 x double> %val1, -+ <2 x double> %val2, <2 x double> %val3) { -+; CHECK-LABEL: f5: -+; CHECK: vfmsdb %v24, %v26, %v28, %v30 -+; CHECK: br %r14 -+ %negval3 = fsub <2 x double> , %val3 -+ %ret = call <2 x double> @llvm.fma.v2f64 (<2 x double> %val1, -+ <2 x double> %val2, -+ <2 x double> %negval3) -+ ret <2 x double> %ret -+} -Index: llvm-36/test/CodeGen/SystemZ/vec-neg-01.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-neg-01.ll -@@ -0,0 +1,58 @@ -+; Test vector negation. -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -+ -+; Test a v16i8 negation. -+define <16 x i8> @f1(<16 x i8> %dummy, <16 x i8> %val) { -+; CHECK-LABEL: f1: -+; CHECK: vlcb %v24, %v26 -+; CHECK: br %r14 -+ %ret = sub <16 x i8> zeroinitializer, %val -+ ret <16 x i8> %ret -+} -+ -+; Test a v8i16 negation. -+define <8 x i16> @f2(<8 x i16> %dummy, <8 x i16> %val) { -+; CHECK-LABEL: f2: -+; CHECK: vlch %v24, %v26 -+; CHECK: br %r14 -+ %ret = sub <8 x i16> zeroinitializer, %val -+ ret <8 x i16> %ret -+} -+ -+; Test a v4i32 negation. -+define <4 x i32> @f3(<4 x i32> %dummy, <4 x i32> %val) { -+; CHECK-LABEL: f3: -+; CHECK: vlcf %v24, %v26 -+; CHECK: br %r14 -+ %ret = sub <4 x i32> zeroinitializer, %val -+ ret <4 x i32> %ret -+} -+ -+; Test a v2i64 negation. -+define <2 x i64> @f4(<2 x i64> %dummy, <2 x i64> %val) { -+; CHECK-LABEL: f4: -+; CHECK: vlcg %v24, %v26 -+; CHECK: br %r14 -+ %ret = sub <2 x i64> zeroinitializer, %val -+ ret <2 x i64> %ret -+} -+ -+; Test a v2f64 negation. -+define <2 x double> @f5(<2 x double> %dummy, <2 x double> %val) { -+; CHECK-LABEL: f5: -+; CHECK: vflcdb %v24, %v26 -+; CHECK: br %r14 -+ %ret = fsub <2 x double> , %val -+ ret <2 x double> %ret -+} -+ -+; Test an f64 negation that uses vector registers. -+define double @f6(<2 x double> %val) { -+; CHECK-LABEL: f6: -+; CHECK: wflcdb %f0, %v24 -+; CHECK: br %r14 -+ %scalar = extractelement <2 x double> %val, i32 0 -+ %ret = fsub double -0.0, %scalar -+ ret double %ret -+} -Index: llvm-36/test/CodeGen/SystemZ/vec-or-01.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-or-01.ll -@@ -0,0 +1,39 @@ -+; Test vector OR. -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -+ -+; Test a v16i8 OR. -+define <16 x i8> @f1(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) { -+; CHECK-LABEL: f1: -+; CHECK: vo %v24, %v26, %v28 -+; CHECK: br %r14 -+ %ret = or <16 x i8> %val1, %val2 -+ ret <16 x i8> %ret -+} -+ -+; Test a v8i16 OR. -+define <8 x i16> @f2(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) { -+; CHECK-LABEL: f2: -+; CHECK: vo %v24, %v26, %v28 -+; CHECK: br %r14 -+ %ret = or <8 x i16> %val1, %val2 -+ ret <8 x i16> %ret -+} -+ -+; Test a v4i32 OR. -+define <4 x i32> @f3(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) { -+; CHECK-LABEL: f3: -+; CHECK: vo %v24, %v26, %v28 -+; CHECK: br %r14 -+ %ret = or <4 x i32> %val1, %val2 -+ ret <4 x i32> %ret -+} -+ -+; Test a v2i64 OR. -+define <2 x i64> @f4(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) { -+; CHECK-LABEL: f4: -+; CHECK: vo %v24, %v26, %v28 -+; CHECK: br %r14 -+ %ret = or <2 x i64> %val1, %val2 -+ ret <2 x i64> %ret -+} -Index: llvm-36/test/CodeGen/SystemZ/vec-or-02.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-or-02.ll -@@ -0,0 +1,107 @@ -+; Test vector (or (and X, Z), (and Y, (not Z))) patterns. -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -+ -+; Test v16i8. -+define <16 x i8> @f1(<16 x i8> %val1, <16 x i8> %val2, <16 x i8> %val3) { -+; CHECK-LABEL: f1: -+; CHECK: vsel %v24, %v24, %v26, %v28 -+; CHECK: br %r14 -+ %not = xor <16 x i8> %val3, -+ %and1 = and <16 x i8> %val1, %val3 -+ %and2 = and <16 x i8> %val2, %not -+ %ret = or <16 x i8> %and1, %and2 -+ ret <16 x i8> %ret -+} -+ -+; ...and again with the XOR applied to the other operand of the AND. -+define <16 x i8> @f2(<16 x i8> %val1, <16 x i8> %val2, <16 x i8> %val3) { -+; CHECK-LABEL: f2: -+; CHECK: vsel %v24, %v26, %v24, %v28 -+; CHECK: br %r14 -+ %not = xor <16 x i8> %val3, -+ %and1 = and <16 x i8> %val1, %not -+ %and2 = and <16 x i8> %val2, %val3 -+ %ret = or <16 x i8> %and1, %and2 -+ ret <16 x i8> %ret -+} -+ -+; Test v8i16. -+define <8 x i16> @f3(<8 x i16> %val1, <8 x i16> %val2, <8 x i16> %val3) { -+; CHECK-LABEL: f3: -+; CHECK: vsel %v24, %v24, %v26, %v28 -+; CHECK: br %r14 -+ %not = xor <8 x i16> %val3, -+ %and1 = and <8 x i16> %val1, %val3 -+ %and2 = and <8 x i16> %val2, %not -+ %ret = or <8 x i16> %and1, %and2 -+ ret <8 x i16> %ret -+} -+ -+; ...and again with the XOR applied to the other operand of the AND. -+define <8 x i16> @f4(<8 x i16> %val1, <8 x i16> %val2, <8 x i16> %val3) { -+; CHECK-LABEL: f4: -+; CHECK: vsel %v24, %v26, %v24, %v28 -+; CHECK: br %r14 -+ %not = xor <8 x i16> %val3, -+ %and1 = and <8 x i16> %val1, %not -+ %and2 = and <8 x i16> %val2, %val3 -+ %ret = or <8 x i16> %and1, %and2 -+ ret <8 x i16> %ret -+} -+ -+; Test v4i32. -+define <4 x i32> @f5(<4 x i32> %val1, <4 x i32> %val2, <4 x i32> %val3) { -+; CHECK-LABEL: f5: -+; CHECK: vsel %v24, %v24, %v26, %v28 -+; CHECK: br %r14 -+ %not = xor <4 x i32> %val3, -+ %and1 = and <4 x i32> %val1, %val3 -+ %and2 = and <4 x i32> %val2, %not -+ %ret = or <4 x i32> %and1, %and2 -+ ret <4 x i32> %ret -+} -+ -+; ...and again with the XOR applied to the other operand of the AND. -+define <4 x i32> @f6(<4 x i32> %val1, <4 x i32> %val2, <4 x i32> %val3) { -+; CHECK-LABEL: f6: -+; CHECK: vsel %v24, %v26, %v24, %v28 -+; CHECK: br %r14 -+ %not = xor <4 x i32> %val3, -+ %and1 = and <4 x i32> %val1, %not -+ %and2 = and <4 x i32> %val2, %val3 -+ %ret = or <4 x i32> %and1, %and2 -+ ret <4 x i32> %ret -+} -+ -+; Test v2i64. -+define <2 x i64> @f7(<2 x i64> %val1, <2 x i64> %val2, <2 x i64> %val3) { -+; CHECK-LABEL: f7: -+; CHECK: vsel %v24, %v24, %v26, %v28 -+; CHECK: br %r14 -+ %not = xor <2 x i64> %val3, -+ %and1 = and <2 x i64> %val1, %val3 -+ %and2 = and <2 x i64> %val2, %not -+ %ret = or <2 x i64> %and1, %and2 -+ ret <2 x i64> %ret -+} -+ -+; ...and again with the XOR applied to the other operand of the AND. -+define <2 x i64> @f8(<2 x i64> %val1, <2 x i64> %val2, <2 x i64> %val3) { -+; CHECK-LABEL: f8: -+; CHECK: vsel %v24, %v26, %v24, %v28 -+; CHECK: br %r14 -+ %not = xor <2 x i64> %val3, -+ %and1 = and <2 x i64> %val1, %not -+ %and2 = and <2 x i64> %val2, %val3 -+ %ret = or <2 x i64> %and1, %and2 -+ ret <2 x i64> %ret -+} -Index: llvm-36/test/CodeGen/SystemZ/vec-perm-01.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-perm-01.ll -@@ -0,0 +1,175 @@ -+; Test vector splat. -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -+ -+; Test v16i8 splat of the first element. -+define <16 x i8> @f1(<16 x i8> %val) { -+; CHECK-LABEL: f1: -+; CHECK: vrepb %v24, %v24, 0 -+; CHECK: br %r14 -+ %ret = shufflevector <16 x i8> %val, <16 x i8> undef, -+ <16 x i32> zeroinitializer -+ ret <16 x i8> %ret -+} -+ -+; Test v16i8 splat of the last element. -+define <16 x i8> @f2(<16 x i8> %val) { -+; CHECK-LABEL: f2: -+; CHECK: vrepb %v24, %v24, 15 -+; CHECK: br %r14 -+ %ret = shufflevector <16 x i8> %val, <16 x i8> undef, -+ <16 x i32> -+ ret <16 x i8> %ret -+} -+ -+; Test v16i8 splat of an arbitrary element, using the second operand of -+; the shufflevector. -+define <16 x i8> @f3(<16 x i8> %val) { -+; CHECK-LABEL: f3: -+; CHECK: vrepb %v24, %v24, 4 -+; CHECK: br %r14 -+ %ret = shufflevector <16 x i8> undef, <16 x i8> %val, -+ <16 x i32> -+ ret <16 x i8> %ret -+} -+ -+; Test v8i16 splat of the first element. -+define <8 x i16> @f4(<8 x i16> %val) { -+; CHECK-LABEL: f4: -+; CHECK: vreph %v24, %v24, 0 -+; CHECK: br %r14 -+ %ret = shufflevector <8 x i16> %val, <8 x i16> undef, -+ <8 x i32> zeroinitializer -+ ret <8 x i16> %ret -+} -+ -+; Test v8i16 splat of the last element. -+define <8 x i16> @f5(<8 x i16> %val) { -+; CHECK-LABEL: f5: -+; CHECK: vreph %v24, %v24, 7 -+; CHECK: br %r14 -+ %ret = shufflevector <8 x i16> %val, <8 x i16> undef, -+ <8 x i32> -+ ret <8 x i16> %ret -+} -+ -+; Test v8i16 splat of an arbitrary element, using the second operand of -+; the shufflevector. -+define <8 x i16> @f6(<8 x i16> %val) { -+; CHECK-LABEL: f6: -+; CHECK: vreph %v24, %v24, 2 -+; CHECK: br %r14 -+ %ret = shufflevector <8 x i16> undef, <8 x i16> %val, -+ <8 x i32> -+ ret <8 x i16> %ret -+} -+ -+; Test v4i32 splat of the first element. -+define <4 x i32> @f7(<4 x i32> %val) { -+; CHECK-LABEL: f7: -+; CHECK: vrepf %v24, %v24, 0 -+; CHECK: br %r14 -+ %ret = shufflevector <4 x i32> %val, <4 x i32> undef, -+ <4 x i32> zeroinitializer -+ ret <4 x i32> %ret -+} -+ -+; Test v4i32 splat of the last element. -+define <4 x i32> @f8(<4 x i32> %val) { -+; CHECK-LABEL: f8: -+; CHECK: vrepf %v24, %v24, 3 -+; CHECK: br %r14 -+ %ret = shufflevector <4 x i32> %val, <4 x i32> undef, -+ <4 x i32> -+ ret <4 x i32> %ret -+} -+ -+; Test v4i32 splat of an arbitrary element, using the second operand of -+; the shufflevector. -+define <4 x i32> @f9(<4 x i32> %val) { -+; CHECK-LABEL: f9: -+; CHECK: vrepf %v24, %v24, 1 -+; CHECK: br %r14 -+ %ret = shufflevector <4 x i32> undef, <4 x i32> %val, -+ <4 x i32> -+ ret <4 x i32> %ret -+} -+ -+; Test v2i64 splat of the first element. -+define <2 x i64> @f10(<2 x i64> %val) { -+; CHECK-LABEL: f10: -+; CHECK: vrepg %v24, %v24, 0 -+; CHECK: br %r14 -+ %ret = shufflevector <2 x i64> %val, <2 x i64> undef, -+ <2 x i32> zeroinitializer -+ ret <2 x i64> %ret -+} -+ -+; Test v2i64 splat of the last element. -+define <2 x i64> @f11(<2 x i64> %val) { -+; CHECK-LABEL: f11: -+; CHECK: vrepg %v24, %v24, 1 -+; CHECK: br %r14 -+ %ret = shufflevector <2 x i64> %val, <2 x i64> undef, -+ <2 x i32> -+ ret <2 x i64> %ret -+} -+ -+; Test v4f32 splat of the first element. -+define <4 x float> @f12(<4 x float> %val) { -+; CHECK-LABEL: f12: -+; CHECK: vrepf %v24, %v24, 0 -+; CHECK: br %r14 -+ %ret = shufflevector <4 x float> %val, <4 x float> undef, -+ <4 x i32> zeroinitializer -+ ret <4 x float> %ret -+} -+ -+; Test v4f32 splat of the last element. -+define <4 x float> @f13(<4 x float> %val) { -+; CHECK-LABEL: f13: -+; CHECK: vrepf %v24, %v24, 3 -+; CHECK: br %r14 -+ %ret = shufflevector <4 x float> %val, <4 x float> undef, -+ <4 x i32> -+ ret <4 x float> %ret -+} -+ -+; Test v4f32 splat of an arbitrary element, using the second operand of -+; the shufflevector. -+define <4 x float> @f14(<4 x float> %val) { -+; CHECK-LABEL: f14: -+; CHECK: vrepf %v24, %v24, 1 -+; CHECK: br %r14 -+ %ret = shufflevector <4 x float> undef, <4 x float> %val, -+ <4 x i32> -+ ret <4 x float> %ret -+} -+ -+; Test v2f64 splat of the first element. -+define <2 x double> @f15(<2 x double> %val) { -+; CHECK-LABEL: f15: -+; CHECK: vrepg %v24, %v24, 0 -+; CHECK: br %r14 -+ %ret = shufflevector <2 x double> %val, <2 x double> undef, -+ <2 x i32> zeroinitializer -+ ret <2 x double> %ret -+} -+ -+; Test v2f64 splat of the last element. -+define <2 x double> @f16(<2 x double> %val) { -+; CHECK-LABEL: f16: -+; CHECK: vrepg %v24, %v24, 1 -+; CHECK: br %r14 -+ %ret = shufflevector <2 x double> %val, <2 x double> undef, -+ <2 x i32> -+ ret <2 x double> %ret -+} -Index: llvm-36/test/CodeGen/SystemZ/vec-perm-02.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-perm-02.ll -@@ -0,0 +1,200 @@ -+; Test replications of a scalar register value, represented as splats. -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -+ -+; Test v16i8 splat of the first element. -+define <16 x i8> @f1(i8 %scalar) { -+; CHECK-LABEL: f1: -+; CHECK: vlvgp [[REG:%v[0-9]+]], %r2, %r2 -+; CHECK: vrepb %v24, [[REG]], 7 -+; CHECK: br %r14 -+ %val = insertelement <16 x i8> undef, i8 %scalar, i32 0 -+ %ret = shufflevector <16 x i8> %val, <16 x i8> undef, -+ <16 x i32> zeroinitializer -+ ret <16 x i8> %ret -+} -+ -+; Test v16i8 splat of the last element. -+define <16 x i8> @f2(i8 %scalar) { -+; CHECK-LABEL: f2: -+; CHECK: vlvgp [[REG:%v[0-9]+]], %r2, %r2 -+; CHECK: vrepb %v24, [[REG]], 7 -+; CHECK: br %r14 -+ %val = insertelement <16 x i8> undef, i8 %scalar, i32 15 -+ %ret = shufflevector <16 x i8> %val, <16 x i8> undef, -+ <16 x i32> -+ ret <16 x i8> %ret -+} -+ -+; Test v16i8 splat of an arbitrary element, using the second operand of -+; the shufflevector. -+define <16 x i8> @f3(i8 %scalar) { -+; CHECK-LABEL: f3: -+; CHECK: vlvgp [[REG:%v[0-9]+]], %r2, %r2 -+; CHECK: vrepb %v24, [[REG]], 7 -+; CHECK: br %r14 -+ %val = insertelement <16 x i8> undef, i8 %scalar, i32 4 -+ %ret = shufflevector <16 x i8> undef, <16 x i8> %val, -+ <16 x i32> -+ ret <16 x i8> %ret -+} -+ -+; Test v8i16 splat of the first element. -+define <8 x i16> @f4(i16 %scalar) { -+; CHECK-LABEL: f4: -+; CHECK: vlvgp [[REG:%v[0-9]+]], %r2, %r2 -+; CHECK: vreph %v24, [[REG]], 3 -+; CHECK: br %r14 -+ %val = insertelement <8 x i16> undef, i16 %scalar, i32 0 -+ %ret = shufflevector <8 x i16> %val, <8 x i16> undef, -+ <8 x i32> zeroinitializer -+ ret <8 x i16> %ret -+} -+ -+; Test v8i16 splat of the last element. -+define <8 x i16> @f5(i16 %scalar) { -+; CHECK-LABEL: f5: -+; CHECK: vlvgp [[REG:%v[0-9]+]], %r2, %r2 -+; CHECK: vreph %v24, [[REG]], 3 -+; CHECK: br %r14 -+ %val = insertelement <8 x i16> undef, i16 %scalar, i32 7 -+ %ret = shufflevector <8 x i16> %val, <8 x i16> undef, -+ <8 x i32> -+ ret <8 x i16> %ret -+} -+ -+; Test v8i16 splat of an arbitrary element, using the second operand of -+; the shufflevector. -+define <8 x i16> @f6(i16 %scalar) { -+; CHECK-LABEL: f6: -+; CHECK: vlvgp [[REG:%v[0-9]+]], %r2, %r2 -+; CHECK: vreph %v24, [[REG]], 3 -+; CHECK: br %r14 -+ %val = insertelement <8 x i16> undef, i16 %scalar, i32 2 -+ %ret = shufflevector <8 x i16> undef, <8 x i16> %val, -+ <8 x i32> -+ ret <8 x i16> %ret -+} -+ -+; Test v4i32 splat of the first element. -+define <4 x i32> @f7(i32 %scalar) { -+; CHECK-LABEL: f7: -+; CHECK: vlvgp [[REG:%v[0-9]+]], %r2, %r2 -+; CHECK: vrepf %v24, [[REG]], 1 -+; CHECK: br %r14 -+ %val = insertelement <4 x i32> undef, i32 %scalar, i32 0 -+ %ret = shufflevector <4 x i32> %val, <4 x i32> undef, -+ <4 x i32> zeroinitializer -+ ret <4 x i32> %ret -+} -+ -+; Test v4i32 splat of the last element. -+define <4 x i32> @f8(i32 %scalar) { -+; CHECK-LABEL: f8: -+; CHECK: vlvgp [[REG:%v[0-9]+]], %r2, %r2 -+; CHECK: vrepf %v24, [[REG]], 1 -+; CHECK: br %r14 -+ %val = insertelement <4 x i32> undef, i32 %scalar, i32 3 -+ %ret = shufflevector <4 x i32> %val, <4 x i32> undef, -+ <4 x i32> -+ ret <4 x i32> %ret -+} -+ -+; Test v4i32 splat of an arbitrary element, using the second operand of -+; the shufflevector. -+define <4 x i32> @f9(i32 %scalar) { -+; CHECK-LABEL: f9: -+; CHECK: vlvgp [[REG:%v[0-9]+]], %r2, %r2 -+; CHECK: vrepf %v24, [[REG]], 1 -+; CHECK: br %r14 -+ %val = insertelement <4 x i32> undef, i32 %scalar, i32 1 -+ %ret = shufflevector <4 x i32> undef, <4 x i32> %val, -+ <4 x i32> -+ ret <4 x i32> %ret -+} -+ -+; Test v2i64 splat of the first element. -+define <2 x i64> @f10(i64 %scalar) { -+; CHECK-LABEL: f10: -+; CHECK: vlvgp %v24, %r2, %r2 -+; CHECK: br %r14 -+ %val = insertelement <2 x i64> undef, i64 %scalar, i32 0 -+ %ret = shufflevector <2 x i64> %val, <2 x i64> undef, -+ <2 x i32> zeroinitializer -+ ret <2 x i64> %ret -+} -+ -+; Test v2i64 splat of the last element. -+define <2 x i64> @f11(i64 %scalar) { -+; CHECK-LABEL: f11: -+; CHECK: vlvgp %v24, %r2, %r2 -+; CHECK: br %r14 -+ %val = insertelement <2 x i64> undef, i64 %scalar, i32 1 -+ %ret = shufflevector <2 x i64> %val, <2 x i64> undef, -+ <2 x i32> -+ ret <2 x i64> %ret -+} -+ -+; Test v4f32 splat of the first element. -+define <4 x float> @f12(float %scalar) { -+; CHECK-LABEL: f12: -+; CHECK: vrepf %v24, %v0, 0 -+; CHECK: br %r14 -+ %val = insertelement <4 x float> undef, float %scalar, i32 0 -+ %ret = shufflevector <4 x float> %val, <4 x float> undef, -+ <4 x i32> zeroinitializer -+ ret <4 x float> %ret -+} -+ -+; Test v4f32 splat of the last element. -+define <4 x float> @f13(float %scalar) { -+; CHECK-LABEL: f13: -+; CHECK: vrepf %v24, %v0, 0 -+; CHECK: br %r14 -+ %val = insertelement <4 x float> undef, float %scalar, i32 3 -+ %ret = shufflevector <4 x float> %val, <4 x float> undef, -+ <4 x i32> -+ ret <4 x float> %ret -+} -+ -+; Test v4f32 splat of an arbitrary element, using the second operand of -+; the shufflevector. -+define <4 x float> @f14(float %scalar) { -+; CHECK-LABEL: f14: -+; CHECK: vrepf %v24, %v0, 0 -+; CHECK: br %r14 -+ %val = insertelement <4 x float> undef, float %scalar, i32 1 -+ %ret = shufflevector <4 x float> undef, <4 x float> %val, -+ <4 x i32> -+ ret <4 x float> %ret -+} -+ -+; Test v2f64 splat of the first element. -+define <2 x double> @f15(double %scalar) { -+; CHECK-LABEL: f15: -+; CHECK: vrepg %v24, %v0, 0 -+; CHECK: br %r14 -+ %val = insertelement <2 x double> undef, double %scalar, i32 0 -+ %ret = shufflevector <2 x double> %val, <2 x double> undef, -+ <2 x i32> zeroinitializer -+ ret <2 x double> %ret -+} -+ -+; Test v2f64 splat of the last element. -+define <2 x double> @f16(double %scalar) { -+; CHECK-LABEL: f16: -+; CHECK: vrepg %v24, %v0, 0 -+; CHECK: br %r14 -+ %val = insertelement <2 x double> undef, double %scalar, i32 1 -+ %ret = shufflevector <2 x double> %val, <2 x double> undef, -+ <2 x i32> -+ ret <2 x double> %ret -+} -Index: llvm-36/test/CodeGen/SystemZ/vec-perm-03.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-perm-03.ll -@@ -0,0 +1,251 @@ -+; Test replications of a scalar memory value, represented as splats. -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -+ -+; Test a v16i8 replicating load with no offset. -+define <16 x i8> @f1(i8 *%ptr) { -+; CHECK-LABEL: f1: -+; CHECK: vlrepb %v24, 0(%r2) -+; CHECK: br %r14 -+ %scalar = load i8 *%ptr -+ %val = insertelement <16 x i8> undef, i8 %scalar, i32 0 -+ %ret = shufflevector <16 x i8> %val, <16 x i8> undef, -+ <16 x i32> zeroinitializer -+ ret <16 x i8> %ret -+} -+ -+; Test a v16i8 replicating load with the maximum in-range offset. -+define <16 x i8> @f2(i8 *%base) { -+; CHECK-LABEL: f2: -+; CHECK: vlrepb %v24, 4095(%r2) -+; CHECK: br %r14 -+ %ptr = getelementptr i8 *%base, i64 4095 -+ %scalar = load i8 *%ptr -+ %val = insertelement <16 x i8> undef, i8 %scalar, i32 0 -+ %ret = shufflevector <16 x i8> %val, <16 x i8> undef, -+ <16 x i32> zeroinitializer -+ ret <16 x i8> %ret -+} -+ -+; Test a v16i8 replicating load with the first out-of-range offset. -+define <16 x i8> @f3(i8 *%base) { -+; CHECK-LABEL: f3: -+; CHECK: aghi %r2, 4096 -+; CHECK: vlrepb %v24, 0(%r2) -+; CHECK: br %r14 -+ %ptr = getelementptr i8 *%base, i64 4096 -+ %scalar = load i8 *%ptr -+ %val = insertelement <16 x i8> undef, i8 %scalar, i32 0 -+ %ret = shufflevector <16 x i8> %val, <16 x i8> undef, -+ <16 x i32> zeroinitializer -+ ret <16 x i8> %ret -+} -+ -+; Test a v8i16 replicating load with no offset. -+define <8 x i16> @f4(i16 *%ptr) { -+; CHECK-LABEL: f4: -+; CHECK: vlreph %v24, 0(%r2) -+; CHECK: br %r14 -+ %scalar = load i16 *%ptr -+ %val = insertelement <8 x i16> undef, i16 %scalar, i32 0 -+ %ret = shufflevector <8 x i16> %val, <8 x i16> undef, -+ <8 x i32> zeroinitializer -+ ret <8 x i16> %ret -+} -+ -+; Test a v8i16 replicating load with the maximum in-range offset. -+define <8 x i16> @f5(i16 *%base) { -+; CHECK-LABEL: f5: -+; CHECK: vlreph %v24, 4094(%r2) -+; CHECK: br %r14 -+ %ptr = getelementptr i16 *%base, i64 2047 -+ %scalar = load i16 *%ptr -+ %val = insertelement <8 x i16> undef, i16 %scalar, i32 0 -+ %ret = shufflevector <8 x i16> %val, <8 x i16> undef, -+ <8 x i32> zeroinitializer -+ ret <8 x i16> %ret -+} -+ -+; Test a v8i16 replicating load with the first out-of-range offset. -+define <8 x i16> @f6(i16 *%base) { -+; CHECK-LABEL: f6: -+; CHECK: aghi %r2, 4096 -+; CHECK: vlreph %v24, 0(%r2) -+; CHECK: br %r14 -+ %ptr = getelementptr i16 *%base, i64 2048 -+ %scalar = load i16 *%ptr -+ %val = insertelement <8 x i16> undef, i16 %scalar, i32 0 -+ %ret = shufflevector <8 x i16> %val, <8 x i16> undef, -+ <8 x i32> zeroinitializer -+ ret <8 x i16> %ret -+} -+ -+; Test a v4i32 replicating load with no offset. -+define <4 x i32> @f7(i32 *%ptr) { -+; CHECK-LABEL: f7: -+; CHECK: vlrepf %v24, 0(%r2) -+; CHECK: br %r14 -+ %scalar = load i32 *%ptr -+ %val = insertelement <4 x i32> undef, i32 %scalar, i32 0 -+ %ret = shufflevector <4 x i32> %val, <4 x i32> undef, -+ <4 x i32> zeroinitializer -+ ret <4 x i32> %ret -+} -+ -+; Test a v4i32 replicating load with the maximum in-range offset. -+define <4 x i32> @f8(i32 *%base) { -+; CHECK-LABEL: f8: -+; CHECK: vlrepf %v24, 4092(%r2) -+; CHECK: br %r14 -+ %ptr = getelementptr i32 *%base, i64 1023 -+ %scalar = load i32 *%ptr -+ %val = insertelement <4 x i32> undef, i32 %scalar, i32 0 -+ %ret = shufflevector <4 x i32> %val, <4 x i32> undef, -+ <4 x i32> zeroinitializer -+ ret <4 x i32> %ret -+} -+ -+; Test a v4i32 replicating load with the first out-of-range offset. -+define <4 x i32> @f9(i32 *%base) { -+; CHECK-LABEL: f9: -+; CHECK: aghi %r2, 4096 -+; CHECK: vlrepf %v24, 0(%r2) -+; CHECK: br %r14 -+ %ptr = getelementptr i32 *%base, i64 1024 -+ %scalar = load i32 *%ptr -+ %val = insertelement <4 x i32> undef, i32 %scalar, i32 0 -+ %ret = shufflevector <4 x i32> %val, <4 x i32> undef, -+ <4 x i32> zeroinitializer -+ ret <4 x i32> %ret -+} -+ -+; Test a v2i64 replicating load with no offset. -+define <2 x i64> @f10(i64 *%ptr) { -+; CHECK-LABEL: f10: -+; CHECK: vlrepg %v24, 0(%r2) -+; CHECK: br %r14 -+ %scalar = load i64 *%ptr -+ %val = insertelement <2 x i64> undef, i64 %scalar, i32 0 -+ %ret = shufflevector <2 x i64> %val, <2 x i64> undef, -+ <2 x i32> zeroinitializer -+ ret <2 x i64> %ret -+} -+ -+; Test a v2i64 replicating load with the maximum in-range offset. -+define <2 x i64> @f11(i64 *%base) { -+; CHECK-LABEL: f11: -+; CHECK: vlrepg %v24, 4088(%r2) -+; CHECK: br %r14 -+ %ptr = getelementptr i64 *%base, i32 511 -+ %scalar = load i64 *%ptr -+ %val = insertelement <2 x i64> undef, i64 %scalar, i32 0 -+ %ret = shufflevector <2 x i64> %val, <2 x i64> undef, -+ <2 x i32> zeroinitializer -+ ret <2 x i64> %ret -+} -+ -+; Test a v2i64 replicating load with the first out-of-range offset. -+define <2 x i64> @f12(i64 *%base) { -+; CHECK-LABEL: f12: -+; CHECK: aghi %r2, 4096 -+; CHECK: vlrepg %v24, 0(%r2) -+; CHECK: br %r14 -+ %ptr = getelementptr i64 *%base, i32 512 -+ %scalar = load i64 *%ptr -+ %val = insertelement <2 x i64> undef, i64 %scalar, i32 0 -+ %ret = shufflevector <2 x i64> %val, <2 x i64> undef, -+ <2 x i32> zeroinitializer -+ ret <2 x i64> %ret -+} -+ -+; Test a v4f32 replicating load with no offset. -+define <4 x float> @f13(float *%ptr) { -+; CHECK-LABEL: f13: -+; CHECK: vlrepf %v24, 0(%r2) -+; CHECK: br %r14 -+ %scalar = load float *%ptr -+ %val = insertelement <4 x float> undef, float %scalar, i32 0 -+ %ret = shufflevector <4 x float> %val, <4 x float> undef, -+ <4 x i32> zeroinitializer -+ ret <4 x float> %ret -+} -+ -+; Test a v4f32 replicating load with the maximum in-range offset. -+define <4 x float> @f14(float *%base) { -+; CHECK-LABEL: f14: -+; CHECK: vlrepf %v24, 4092(%r2) -+; CHECK: br %r14 -+ %ptr = getelementptr float *%base, i64 1023 -+ %scalar = load float *%ptr -+ %val = insertelement <4 x float> undef, float %scalar, i32 0 -+ %ret = shufflevector <4 x float> %val, <4 x float> undef, -+ <4 x i32> zeroinitializer -+ ret <4 x float> %ret -+} -+ -+; Test a v4f32 replicating load with the first out-of-range offset. -+define <4 x float> @f15(float *%base) { -+; CHECK-LABEL: f15: -+; CHECK: aghi %r2, 4096 -+; CHECK: vlrepf %v24, 0(%r2) -+; CHECK: br %r14 -+ %ptr = getelementptr float *%base, i64 1024 -+ %scalar = load float *%ptr -+ %val = insertelement <4 x float> undef, float %scalar, i32 0 -+ %ret = shufflevector <4 x float> %val, <4 x float> undef, -+ <4 x i32> zeroinitializer -+ ret <4 x float> %ret -+} -+ -+; Test a v2f64 replicating load with no offset. -+define <2 x double> @f16(double *%ptr) { -+; CHECK-LABEL: f16: -+; CHECK: vlrepg %v24, 0(%r2) -+; CHECK: br %r14 -+ %scalar = load double *%ptr -+ %val = insertelement <2 x double> undef, double %scalar, i32 0 -+ %ret = shufflevector <2 x double> %val, <2 x double> undef, -+ <2 x i32> zeroinitializer -+ ret <2 x double> %ret -+} -+ -+; Test a v2f64 replicating load with the maximum in-range offset. -+define <2 x double> @f17(double *%base) { -+; CHECK-LABEL: f17: -+; CHECK: vlrepg %v24, 4088(%r2) -+; CHECK: br %r14 -+ %ptr = getelementptr double *%base, i32 511 -+ %scalar = load double *%ptr -+ %val = insertelement <2 x double> undef, double %scalar, i32 0 -+ %ret = shufflevector <2 x double> %val, <2 x double> undef, -+ <2 x i32> zeroinitializer -+ ret <2 x double> %ret -+} -+ -+; Test a v2f64 replicating load with the first out-of-range offset. -+define <2 x double> @f18(double *%base) { -+; CHECK-LABEL: f18: -+; CHECK: aghi %r2, 4096 -+; CHECK: vlrepg %v24, 0(%r2) -+; CHECK: br %r14 -+ %ptr = getelementptr double *%base, i32 512 -+ %scalar = load double *%ptr -+ %val = insertelement <2 x double> undef, double %scalar, i32 0 -+ %ret = shufflevector <2 x double> %val, <2 x double> undef, -+ <2 x i32> zeroinitializer -+ ret <2 x double> %ret -+} -+ -+; Test a v16i8 replicating load with an index. -+define <16 x i8> @f19(i8 *%base, i64 %index) { -+; CHECK-LABEL: f19: -+; CHECK: vlrepb %v24, 1023(%r3,%r2) -+; CHECK: br %r14 -+ %ptr1 = getelementptr i8 *%base, i64 %index -+ %ptr = getelementptr i8 *%ptr1, i64 1023 -+ %scalar = load i8 *%ptr -+ %val = insertelement <16 x i8> undef, i8 %scalar, i32 0 -+ %ret = shufflevector <16 x i8> %val, <16 x i8> undef, -+ <16 x i32> zeroinitializer -+ ret <16 x i8> %ret -+} -Index: llvm-36/test/CodeGen/SystemZ/vec-perm-04.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-perm-04.ll -@@ -0,0 +1,200 @@ -+; Test vector merge high. -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -+ -+; Test a canonical v16i8 merge high. -+define <16 x i8> @f1(<16 x i8> %val1, <16 x i8> %val2) { -+; CHECK-LABEL: f1: -+; CHECK: vmrhb %v24, %v24, %v26 -+; CHECK: br %r14 -+ %ret = shufflevector <16 x i8> %val1, <16 x i8> %val2, -+ <16 x i32> -+ ret <16 x i8> %ret -+} -+ -+; Test a reversed v16i8 merge high. -+define <16 x i8> @f2(<16 x i8> %val1, <16 x i8> %val2) { -+; CHECK-LABEL: f2: -+; CHECK: vmrhb %v24, %v26, %v24 -+; CHECK: br %r14 -+ %ret = shufflevector <16 x i8> %val1, <16 x i8> %val2, -+ <16 x i32> -+ ret <16 x i8> %ret -+} -+ -+; Test a v16i8 merge high with only the first operand being used. -+define <16 x i8> @f3(<16 x i8> %val1, <16 x i8> %val2) { -+; CHECK-LABEL: f3: -+; CHECK: vmrhb %v24, %v24, %v24 -+; CHECK: br %r14 -+ %ret = shufflevector <16 x i8> %val1, <16 x i8> %val2, -+ <16 x i32> -+ ret <16 x i8> %ret -+} -+ -+; Test a v16i8 merge high with only the second operand being used. -+; This is converted into @f3 by target-independent code. -+define <16 x i8> @f4(<16 x i8> %val1, <16 x i8> %val2) { -+; CHECK-LABEL: f4: -+; CHECK: vmrhb %v24, %v26, %v26 -+; CHECK: br %r14 -+ %ret = shufflevector <16 x i8> %val1, <16 x i8> %val2, -+ <16 x i32> -+ ret <16 x i8> %ret -+} -+ -+; Test a v16i8 merge with both operands being the same. This too is -+; converted into @f3 by target-independent code. -+define <16 x i8> @f5(<16 x i8> %val) { -+; CHECK-LABEL: f5: -+; CHECK: vmrhb %v24, %v24, %v24 -+; CHECK: br %r14 -+ %ret = shufflevector <16 x i8> %val, <16 x i8> %val, -+ <16 x i32> -+ ret <16 x i8> %ret -+} -+ -+; Test a v16i8 merge in which some of the indices are don't care. -+define <16 x i8> @f6(<16 x i8> %val1, <16 x i8> %val2) { -+; CHECK-LABEL: f6: -+; CHECK: vmrhb %v24, %v24, %v26 -+; CHECK: br %r14 -+ %ret = shufflevector <16 x i8> %val1, <16 x i8> %val2, -+ <16 x i32> -+ ret <16 x i8> %ret -+} -+ -+; Test a v16i8 merge in which one of the operands is undefined and where -+; indices for that operand are "don't care". Target-independent code -+; converts the indices themselves into "undef"s. -+define <16 x i8> @f7(<16 x i8> %val) { -+; CHECK-LABEL: f7: -+; CHECK: vmrhb %v24, %v24, %v24 -+; CHECK: br %r14 -+ %ret = shufflevector <16 x i8> undef, <16 x i8> %val, -+ <16 x i32> -+ ret <16 x i8> %ret -+} -+ -+; Test a canonical v8i16 merge high. -+define <8 x i16> @f8(<8 x i16> %val1, <8 x i16> %val2) { -+; CHECK-LABEL: f8: -+; CHECK: vmrhh %v24, %v24, %v26 -+; CHECK: br %r14 -+ %ret = shufflevector <8 x i16> %val1, <8 x i16> %val2, -+ <8 x i32> -+ ret <8 x i16> %ret -+} -+ -+; Test a reversed v8i16 merge high. -+define <8 x i16> @f9(<8 x i16> %val1, <8 x i16> %val2) { -+; CHECK-LABEL: f9: -+; CHECK: vmrhh %v24, %v26, %v24 -+; CHECK: br %r14 -+ %ret = shufflevector <8 x i16> %val1, <8 x i16> %val2, -+ <8 x i32> -+ ret <8 x i16> %ret -+} -+ -+; Test a canonical v4i32 merge high. -+define <4 x i32> @f10(<4 x i32> %val1, <4 x i32> %val2) { -+; CHECK-LABEL: f10: -+; CHECK: vmrhf %v24, %v24, %v26 -+; CHECK: br %r14 -+ %ret = shufflevector <4 x i32> %val1, <4 x i32> %val2, -+ <4 x i32> -+ ret <4 x i32> %ret -+} -+ -+; Test a reversed v4i32 merge high. -+define <4 x i32> @f11(<4 x i32> %val1, <4 x i32> %val2) { -+; CHECK-LABEL: f11: -+; CHECK: vmrhf %v24, %v26, %v24 -+; CHECK: br %r14 -+ %ret = shufflevector <4 x i32> %val1, <4 x i32> %val2, -+ <4 x i32> -+ ret <4 x i32> %ret -+} -+ -+; Test a canonical v2i64 merge high. -+define <2 x i64> @f12(<2 x i64> %val1, <2 x i64> %val2) { -+; CHECK-LABEL: f12: -+; CHECK: vmrhg %v24, %v24, %v26 -+; CHECK: br %r14 -+ %ret = shufflevector <2 x i64> %val1, <2 x i64> %val2, -+ <2 x i32> -+ ret <2 x i64> %ret -+} -+ -+; Test a reversed v2i64 merge high. -+define <2 x i64> @f13(<2 x i64> %val1, <2 x i64> %val2) { -+; CHECK-LABEL: f13: -+; CHECK: vmrhg %v24, %v26, %v24 -+; CHECK: br %r14 -+ %ret = shufflevector <2 x i64> %val1, <2 x i64> %val2, -+ <2 x i32> -+ ret <2 x i64> %ret -+} -+ -+; Test a canonical v4f32 merge high. -+define <4 x float> @f14(<4 x float> %val1, <4 x float> %val2) { -+; CHECK-LABEL: f14: -+; CHECK: vmrhf %v24, %v24, %v26 -+; CHECK: br %r14 -+ %ret = shufflevector <4 x float> %val1, <4 x float> %val2, -+ <4 x i32> -+ ret <4 x float> %ret -+} -+ -+; Test a reversed v4f32 merge high. -+define <4 x float> @f15(<4 x float> %val1, <4 x float> %val2) { -+; CHECK-LABEL: f15: -+; CHECK: vmrhf %v24, %v26, %v24 -+; CHECK: br %r14 -+ %ret = shufflevector <4 x float> %val1, <4 x float> %val2, -+ <4 x i32> -+ ret <4 x float> %ret -+} -+ -+; Test a canonical v2f64 merge high. -+define <2 x double> @f16(<2 x double> %val1, <2 x double> %val2) { -+; CHECK-LABEL: f16: -+; CHECK: vmrhg %v24, %v24, %v26 -+; CHECK: br %r14 -+ %ret = shufflevector <2 x double> %val1, <2 x double> %val2, -+ <2 x i32> -+ ret <2 x double> %ret -+} -+ -+; Test a reversed v2f64 merge high. -+define <2 x double> @f17(<2 x double> %val1, <2 x double> %val2) { -+; CHECK-LABEL: f17: -+; CHECK: vmrhg %v24, %v26, %v24 -+; CHECK: br %r14 -+ %ret = shufflevector <2 x double> %val1, <2 x double> %val2, -+ <2 x i32> -+ ret <2 x double> %ret -+} -Index: llvm-36/test/CodeGen/SystemZ/vec-perm-05.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-perm-05.ll -@@ -0,0 +1,200 @@ -+; Test vector merge low. -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -+ -+; Test a canonical v16i8 merge low. -+define <16 x i8> @f1(<16 x i8> %val1, <16 x i8> %val2) { -+; CHECK-LABEL: f1: -+; CHECK: vmrlb %v24, %v24, %v26 -+; CHECK: br %r14 -+ %ret = shufflevector <16 x i8> %val1, <16 x i8> %val2, -+ <16 x i32> -+ ret <16 x i8> %ret -+} -+ -+; Test a reversed v16i8 merge low. -+define <16 x i8> @f2(<16 x i8> %val1, <16 x i8> %val2) { -+; CHECK-LABEL: f2: -+; CHECK: vmrlb %v24, %v26, %v24 -+; CHECK: br %r14 -+ %ret = shufflevector <16 x i8> %val1, <16 x i8> %val2, -+ <16 x i32> -+ ret <16 x i8> %ret -+} -+ -+; Test a v16i8 merge low with only the first operand being used. -+define <16 x i8> @f3(<16 x i8> %val1, <16 x i8> %val2) { -+; CHECK-LABEL: f3: -+; CHECK: vmrlb %v24, %v24, %v24 -+; CHECK: br %r14 -+ %ret = shufflevector <16 x i8> %val1, <16 x i8> %val2, -+ <16 x i32> -+ ret <16 x i8> %ret -+} -+ -+; Test a v16i8 merge low with only the second operand being used. -+; This is converted into @f3 by target-independent code. -+define <16 x i8> @f4(<16 x i8> %val1, <16 x i8> %val2) { -+; CHECK-LABEL: f4: -+; CHECK: vmrlb %v24, %v26, %v26 -+; CHECK: br %r14 -+ %ret = shufflevector <16 x i8> %val1, <16 x i8> %val2, -+ <16 x i32> -+ ret <16 x i8> %ret -+} -+ -+; Test a v16i8 merge with both operands being the same. This too is -+; converted into @f3 by target-independent code. -+define <16 x i8> @f5(<16 x i8> %val) { -+; CHECK-LABEL: f5: -+; CHECK: vmrlb %v24, %v24, %v24 -+; CHECK: br %r14 -+ %ret = shufflevector <16 x i8> %val, <16 x i8> %val, -+ <16 x i32> -+ ret <16 x i8> %ret -+} -+ -+; Test a v16i8 merge in which some of the indices are don't care. -+define <16 x i8> @f6(<16 x i8> %val1, <16 x i8> %val2) { -+; CHECK-LABEL: f6: -+; CHECK: vmrlb %v24, %v24, %v26 -+; CHECK: br %r14 -+ %ret = shufflevector <16 x i8> %val1, <16 x i8> %val2, -+ <16 x i32> -+ ret <16 x i8> %ret -+} -+ -+; Test a v16i8 merge in which one of the operands is undefined and where -+; indices for that operand are "don't care". Target-independent code -+; converts the indices themselves into "undef"s. -+define <16 x i8> @f7(<16 x i8> %val) { -+; CHECK-LABEL: f7: -+; CHECK: vmrlb %v24, %v24, %v24 -+; CHECK: br %r14 -+ %ret = shufflevector <16 x i8> undef, <16 x i8> %val, -+ <16 x i32> -+ ret <16 x i8> %ret -+} -+ -+; Test a canonical v8i16 merge low. -+define <8 x i16> @f8(<8 x i16> %val1, <8 x i16> %val2) { -+; CHECK-LABEL: f8: -+; CHECK: vmrlh %v24, %v24, %v26 -+; CHECK: br %r14 -+ %ret = shufflevector <8 x i16> %val1, <8 x i16> %val2, -+ <8 x i32> -+ ret <8 x i16> %ret -+} -+ -+; Test a reversed v8i16 merge low. -+define <8 x i16> @f9(<8 x i16> %val1, <8 x i16> %val2) { -+; CHECK-LABEL: f9: -+; CHECK: vmrlh %v24, %v26, %v24 -+; CHECK: br %r14 -+ %ret = shufflevector <8 x i16> %val1, <8 x i16> %val2, -+ <8 x i32> -+ ret <8 x i16> %ret -+} -+ -+; Test a canonical v4i32 merge low. -+define <4 x i32> @f10(<4 x i32> %val1, <4 x i32> %val2) { -+; CHECK-LABEL: f10: -+; CHECK: vmrlf %v24, %v24, %v26 -+; CHECK: br %r14 -+ %ret = shufflevector <4 x i32> %val1, <4 x i32> %val2, -+ <4 x i32> -+ ret <4 x i32> %ret -+} -+ -+; Test a reversed v4i32 merge low. -+define <4 x i32> @f11(<4 x i32> %val1, <4 x i32> %val2) { -+; CHECK-LABEL: f11: -+; CHECK: vmrlf %v24, %v26, %v24 -+; CHECK: br %r14 -+ %ret = shufflevector <4 x i32> %val1, <4 x i32> %val2, -+ <4 x i32> -+ ret <4 x i32> %ret -+} -+ -+; Test a canonical v2i64 merge low. -+define <2 x i64> @f12(<2 x i64> %val1, <2 x i64> %val2) { -+; CHECK-LABEL: f12: -+; CHECK: vmrlg %v24, %v24, %v26 -+; CHECK: br %r14 -+ %ret = shufflevector <2 x i64> %val1, <2 x i64> %val2, -+ <2 x i32> -+ ret <2 x i64> %ret -+} -+ -+; Test a reversed v2i64 merge low. -+define <2 x i64> @f13(<2 x i64> %val1, <2 x i64> %val2) { -+; CHECK-LABEL: f13: -+; CHECK: vmrlg %v24, %v26, %v24 -+; CHECK: br %r14 -+ %ret = shufflevector <2 x i64> %val1, <2 x i64> %val2, -+ <2 x i32> -+ ret <2 x i64> %ret -+} -+ -+; Test a canonical v4f32 merge low. -+define <4 x float> @f14(<4 x float> %val1, <4 x float> %val2) { -+; CHECK-LABEL: f14: -+; CHECK: vmrlf %v24, %v24, %v26 -+; CHECK: br %r14 -+ %ret = shufflevector <4 x float> %val1, <4 x float> %val2, -+ <4 x i32> -+ ret <4 x float> %ret -+} -+ -+; Test a reversed v4f32 merge low. -+define <4 x float> @f15(<4 x float> %val1, <4 x float> %val2) { -+; CHECK-LABEL: f15: -+; CHECK: vmrlf %v24, %v26, %v24 -+; CHECK: br %r14 -+ %ret = shufflevector <4 x float> %val1, <4 x float> %val2, -+ <4 x i32> -+ ret <4 x float> %ret -+} -+ -+; Test a canonical v2f64 merge low. -+define <2 x double> @f16(<2 x double> %val1, <2 x double> %val2) { -+; CHECK-LABEL: f16: -+; CHECK: vmrlg %v24, %v24, %v26 -+; CHECK: br %r14 -+ %ret = shufflevector <2 x double> %val1, <2 x double> %val2, -+ <2 x i32> -+ ret <2 x double> %ret -+} -+ -+; Test a reversed v2f64 merge low. -+define <2 x double> @f17(<2 x double> %val1, <2 x double> %val2) { -+; CHECK-LABEL: f17: -+; CHECK: vmrlg %v24, %v26, %v24 -+; CHECK: br %r14 -+ %ret = shufflevector <2 x double> %val1, <2 x double> %val2, -+ <2 x i32> -+ ret <2 x double> %ret -+} -Index: llvm-36/test/CodeGen/SystemZ/vec-perm-06.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-perm-06.ll -@@ -0,0 +1,160 @@ -+; Test vector pack. -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -+ -+; Test a canonical v16i8 pack. -+define <16 x i8> @f1(<16 x i8> %val1, <16 x i8> %val2) { -+; CHECK-LABEL: f1: -+; CHECK: vpkh %v24, %v24, %v26 -+; CHECK: br %r14 -+ %ret = shufflevector <16 x i8> %val1, <16 x i8> %val2, -+ <16 x i32> -+ ret <16 x i8> %ret -+} -+ -+; Test a reversed v16i8 pack. -+define <16 x i8> @f2(<16 x i8> %val1, <16 x i8> %val2) { -+; CHECK-LABEL: f2: -+; CHECK: vpkh %v24, %v26, %v24 -+; CHECK: br %r14 -+ %ret = shufflevector <16 x i8> %val1, <16 x i8> %val2, -+ <16 x i32> -+ ret <16 x i8> %ret -+} -+ -+; Test a v16i8 pack with only the first operand being used. -+define <16 x i8> @f3(<16 x i8> %val1, <16 x i8> %val2) { -+; CHECK-LABEL: f3: -+; CHECK: vpkh %v24, %v24, %v24 -+; CHECK: br %r14 -+ %ret = shufflevector <16 x i8> %val1, <16 x i8> %val2, -+ <16 x i32> -+ ret <16 x i8> %ret -+} -+ -+; Test a v16i8 pack with only the second operand being used. -+; This is converted into @f3 by target-independent code. -+define <16 x i8> @f4(<16 x i8> %val1, <16 x i8> %val2) { -+; CHECK-LABEL: f4: -+; CHECK: vpkh %v24, %v26, %v26 -+; CHECK: br %r14 -+ %ret = shufflevector <16 x i8> %val1, <16 x i8> %val2, -+ <16 x i32> -+ ret <16 x i8> %ret -+} -+ -+; Test a v16i8 pack with both operands being the same. This too is -+; converted into @f3 by target-independent code. -+define <16 x i8> @f5(<16 x i8> %val) { -+; CHECK-LABEL: f5: -+; CHECK: vpkh %v24, %v24, %v24 -+; CHECK: br %r14 -+ %ret = shufflevector <16 x i8> %val, <16 x i8> %val, -+ <16 x i32> -+ ret <16 x i8> %ret -+} -+ -+; Test a v16i8 pack in which some of the indices are don't care. -+define <16 x i8> @f6(<16 x i8> %val1, <16 x i8> %val2) { -+; CHECK-LABEL: f6: -+; CHECK: vpkh %v24, %v24, %v26 -+; CHECK: br %r14 -+ %ret = shufflevector <16 x i8> %val1, <16 x i8> %val2, -+ <16 x i32> -+ ret <16 x i8> %ret -+} -+ -+; Test a v16i8 pack in which one of the operands is undefined and where -+; indices for that operand are "don't care". Target-independent code -+; converts the indices themselves into "undef"s. -+define <16 x i8> @f7(<16 x i8> %val) { -+; CHECK-LABEL: f7: -+; CHECK: vpkh %v24, %v24, %v24 -+; CHECK: br %r14 -+ %ret = shufflevector <16 x i8> undef, <16 x i8> %val, -+ <16 x i32> -+ ret <16 x i8> %ret -+} -+ -+; Test a canonical v8i16 pack. -+define <8 x i16> @f8(<8 x i16> %val1, <8 x i16> %val2) { -+; CHECK-LABEL: f8: -+; CHECK: vpkf %v24, %v24, %v26 -+; CHECK: br %r14 -+ %ret = shufflevector <8 x i16> %val1, <8 x i16> %val2, -+ <8 x i32> -+ ret <8 x i16> %ret -+} -+ -+; Test a reversed v8i16 pack. -+define <8 x i16> @f9(<8 x i16> %val1, <8 x i16> %val2) { -+; CHECK-LABEL: f9: -+; CHECK: vpkf %v24, %v26, %v24 -+; CHECK: br %r14 -+ %ret = shufflevector <8 x i16> %val1, <8 x i16> %val2, -+ <8 x i32> -+ ret <8 x i16> %ret -+} -+ -+; Test a canonical v4i32 pack. -+define <4 x i32> @f10(<4 x i32> %val1, <4 x i32> %val2) { -+; CHECK-LABEL: f10: -+; CHECK: vpkg %v24, %v24, %v26 -+; CHECK: br %r14 -+ %ret = shufflevector <4 x i32> %val1, <4 x i32> %val2, -+ <4 x i32> -+ ret <4 x i32> %ret -+} -+ -+; Test a reversed v4i32 pack. -+define <4 x i32> @f11(<4 x i32> %val1, <4 x i32> %val2) { -+; CHECK-LABEL: f11: -+; CHECK: vpkg %v24, %v26, %v24 -+; CHECK: br %r14 -+ %ret = shufflevector <4 x i32> %val1, <4 x i32> %val2, -+ <4 x i32> -+ ret <4 x i32> %ret -+} -+ -+; Test a canonical v4f32 pack. -+define <4 x float> @f12(<4 x float> %val1, <4 x float> %val2) { -+; CHECK-LABEL: f12: -+; CHECK: vpkg %v24, %v24, %v26 -+; CHECK: br %r14 -+ %ret = shufflevector <4 x float> %val1, <4 x float> %val2, -+ <4 x i32> -+ ret <4 x float> %ret -+} -+ -+; Test a reversed v4f32 pack. -+define <4 x float> @f13(<4 x float> %val1, <4 x float> %val2) { -+; CHECK-LABEL: f13: -+; CHECK: vpkg %v24, %v26, %v24 -+; CHECK: br %r14 -+ %ret = shufflevector <4 x float> %val1, <4 x float> %val2, -+ <4 x i32> -+ ret <4 x float> %ret -+} -Index: llvm-36/test/CodeGen/SystemZ/vec-perm-07.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-perm-07.ll -@@ -0,0 +1,145 @@ -+; Test vector shift left double immediate. -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -+ -+; Test a v16i8 shift with the lowest useful shift amount. -+define <16 x i8> @f1(<16 x i8> %val1, <16 x i8> %val2) { -+; CHECK-LABEL: f1: -+; CHECK: vsldb %v24, %v24, %v26, 1 -+; CHECK: br %r14 -+ %ret = shufflevector <16 x i8> %val1, <16 x i8> %val2, -+ <16 x i32> -+ ret <16 x i8> %ret -+} -+ -+; Test a v16i8 shift with the highest shift amount. -+define <16 x i8> @f2(<16 x i8> %val1, <16 x i8> %val2) { -+; CHECK-LABEL: f2: -+; CHECK: vsldb %v24, %v24, %v26, 15 -+; CHECK: br %r14 -+ %ret = shufflevector <16 x i8> %val1, <16 x i8> %val2, -+ <16 x i32> -+ ret <16 x i8> %ret -+} -+ -+; Test a v16i8 shift in which the operands need to be reversed. -+define <16 x i8> @f3(<16 x i8> %val1, <16 x i8> %val2) { -+; CHECK-LABEL: f3: -+; CHECK: vsldb %v24, %v26, %v24, 4 -+; CHECK: br %r14 -+ %ret = shufflevector <16 x i8> %val1, <16 x i8> %val2, -+ <16 x i32> -+ ret <16 x i8> %ret -+} -+ -+; Test a v16i8 shift in which the operands need to be duplicated. -+define <16 x i8> @f4(<16 x i8> %val) { -+; CHECK-LABEL: f4: -+; CHECK: vsldb %v24, %v24, %v24, 7 -+; CHECK: br %r14 -+ %ret = shufflevector <16 x i8> %val, <16 x i8> undef, -+ <16 x i32> -+ ret <16 x i8> %ret -+} -+ -+; Test a v16i8 shift in which some of the indices are undefs. -+define <16 x i8> @f5(<16 x i8> %val1, <16 x i8> %val2) { -+; CHECK-LABEL: f5: -+; CHECK: vsldb %v24, %v24, %v26, 11 -+; CHECK: br %r14 -+ %ret = shufflevector <16 x i8> %val1, <16 x i8> %val2, -+ <16 x i32> -+ ret <16 x i8> %ret -+} -+ -+; ...and again with reversed operands. -+define <16 x i8> @f6(<16 x i8> %val1, <16 x i8> %val2) { -+; CHECK-LABEL: f6: -+; CHECK: vsldb %v24, %v26, %v24, 13 -+; CHECK: br %r14 -+ %ret = shufflevector <16 x i8> %val1, <16 x i8> %val2, -+ <16 x i32> -+ ret <16 x i8> %ret -+} -+ -+; Test a v8i16 shift with the lowest useful shift amount. -+define <8 x i16> @f7(<8 x i16> %val1, <8 x i16> %val2) { -+; CHECK-LABEL: f7: -+; CHECK: vsldb %v24, %v24, %v26, 2 -+; CHECK: br %r14 -+ %ret = shufflevector <8 x i16> %val1, <8 x i16> %val2, -+ <8 x i32> -+ ret <8 x i16> %ret -+} -+ -+; Test a v8i16 shift with the highest useful shift amount. -+define <8 x i16> @f8(<8 x i16> %val1, <8 x i16> %val2) { -+; CHECK-LABEL: f8: -+; CHECK: vsldb %v24, %v24, %v26, 14 -+; CHECK: br %r14 -+ %ret = shufflevector <8 x i16> %val1, <8 x i16> %val2, -+ <8 x i32> -+ ret <8 x i16> %ret -+} -+ -+; Test a v4i32 shift with the lowest useful shift amount. -+define <4 x i32> @f9(<4 x i32> %val1, <4 x i32> %val2) { -+; CHECK-LABEL: f9: -+; CHECK: vsldb %v24, %v24, %v26, 4 -+; CHECK: br %r14 -+ %ret = shufflevector <4 x i32> %val1, <4 x i32> %val2, -+ <4 x i32> -+ ret <4 x i32> %ret -+} -+ -+; Test a v4i32 shift with the highest useful shift amount. -+define <4 x i32> @f10(<4 x i32> %val1, <4 x i32> %val2) { -+; CHECK-LABEL: f10: -+; CHECK: vsldb %v24, %v24, %v26, 12 -+; CHECK: br %r14 -+ %ret = shufflevector <4 x i32> %val1, <4 x i32> %val2, -+ <4 x i32> -+ ret <4 x i32> %ret -+} -+ -+; Test a v4f32 shift with the lowest useful shift amount. -+define <4 x float> @f12(<4 x float> %val1, <4 x float> %val2) { -+; CHECK-LABEL: f12: -+; CHECK: vsldb %v24, %v24, %v26, 4 -+; CHECK: br %r14 -+ %ret = shufflevector <4 x float> %val1, <4 x float> %val2, -+ <4 x i32> -+ ret <4 x float> %ret -+} -+ -+; Test a v4f32 shift with the highest useful shift amount. -+define <4 x float> @f13(<4 x float> %val1, <4 x float> %val2) { -+; CHECK-LABEL: f13: -+; CHECK: vsldb %v24, %v24, %v26, 12 -+; CHECK: br %r14 -+ %ret = shufflevector <4 x float> %val1, <4 x float> %val2, -+ <4 x i32> -+ ret <4 x float> %ret -+} -+ -+; We use VPDI for v2i64 shuffles. -Index: llvm-36/test/CodeGen/SystemZ/vec-perm-08.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-perm-08.ll -@@ -0,0 +1,170 @@ -+; Test vector permutes using VPDI. -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -+ -+; Test a high1/low2 permute for v16i8. -+define <16 x i8> @f1(<16 x i8> %val1, <16 x i8> %val2) { -+; CHECK-LABEL: f1: -+; CHECK: vpdi %v24, %v24, %v26, 1 -+; CHECK: br %r14 -+ %ret = shufflevector <16 x i8> %val1, <16 x i8> %val2, -+ <16 x i32> -+ ret <16 x i8> %ret -+} -+ -+; Test a low2/high1 permute for v16i8. -+define <16 x i8> @f2(<16 x i8> %val1, <16 x i8> %val2) { -+; CHECK-LABEL: f2: -+; CHECK: vpdi %v24, %v26, %v24, 4 -+; CHECK: br %r14 -+ %ret = shufflevector <16 x i8> %val1, <16 x i8> %val2, -+ <16 x i32> -+ ret <16 x i8> %ret -+} -+ -+; Test a low1/high2 permute for v16i8. -+define <16 x i8> @f3(<16 x i8> %val1, <16 x i8> %val2) { -+; CHECK-LABEL: f3: -+; CHECK: vpdi %v24, %v24, %v26, 4 -+; CHECK: br %r14 -+ %ret = shufflevector <16 x i8> %val1, <16 x i8> %val2, -+ <16 x i32> -+ ret <16 x i8> %ret -+} -+ -+; Test a high2/low1 permute for v16i8. -+define <16 x i8> @f4(<16 x i8> %val1, <16 x i8> %val2) { -+; CHECK-LABEL: f4: -+; CHECK: vpdi %v24, %v26, %v24, 1 -+; CHECK: br %r14 -+ %ret = shufflevector <16 x i8> %val1, <16 x i8> %val2, -+ <16 x i32> -+ ret <16 x i8> %ret -+} -+ -+; Test reversing two doublewords in a v16i8. -+define <16 x i8> @f5(<16 x i8> %val) { -+; CHECK-LABEL: f5: -+; CHECK: vpdi %v24, %v24, %v24, 4 -+; CHECK: br %r14 -+ %ret = shufflevector <16 x i8> %val, <16 x i8> undef, -+ <16 x i32> -+ ret <16 x i8> %ret -+} -+ -+; Test a high1/low2 permute for v8i16. -+define <8 x i16> @f6(<8 x i16> %val1, <8 x i16> %val2) { -+; CHECK-LABEL: f6: -+; CHECK: vpdi %v24, %v24, %v26, 1 -+; CHECK: br %r14 -+ %ret = shufflevector <8 x i16> %val1, <8 x i16> %val2, -+ <8 x i32> -+ ret <8 x i16> %ret -+} -+ -+; Test a low2/high1 permute for v8i16. -+define <8 x i16> @f7(<8 x i16> %val1, <8 x i16> %val2) { -+; CHECK-LABEL: f7: -+; CHECK: vpdi %v24, %v26, %v24, 4 -+; CHECK: br %r14 -+ %ret = shufflevector <8 x i16> %val1, <8 x i16> %val2, -+ <8 x i32> -+ ret <8 x i16> %ret -+} -+ -+; Test a high1/low2 permute for v4i32. -+define <4 x i32> @f8(<4 x i32> %val1, <4 x i32> %val2) { -+; CHECK-LABEL: f8: -+; CHECK: vpdi %v24, %v24, %v26, 1 -+; CHECK: br %r14 -+ %ret = shufflevector <4 x i32> %val1, <4 x i32> %val2, -+ <4 x i32> -+ ret <4 x i32> %ret -+} -+ -+; Test a low2/high1 permute for v4i32. -+define <4 x i32> @f9(<4 x i32> %val1, <4 x i32> %val2) { -+; CHECK-LABEL: f9: -+; CHECK: vpdi %v24, %v26, %v24, 4 -+; CHECK: br %r14 -+ %ret = shufflevector <4 x i32> %val1, <4 x i32> %val2, -+ <4 x i32> -+ ret <4 x i32> %ret -+} -+ -+; Test a high1/low2 permute for v2i64. -+define <2 x i64> @f10(<2 x i64> %val1, <2 x i64> %val2) { -+; CHECK-LABEL: f10: -+; CHECK: vpdi %v24, %v24, %v26, 1 -+; CHECK: br %r14 -+ %ret = shufflevector <2 x i64> %val1, <2 x i64> %val2, -+ <2 x i32> -+ ret <2 x i64> %ret -+} -+ -+; Test low2/high1 permute for v2i64. -+define <2 x i64> @f11(<2 x i64> %val1, <2 x i64> %val2) { -+; CHECK-LABEL: f11: -+; CHECK: vpdi %v24, %v26, %v24, 4 -+; CHECK: br %r14 -+ %ret = shufflevector <2 x i64> %val1, <2 x i64> %val2, -+ <2 x i32> -+ ret <2 x i64> %ret -+} -+ -+; Test a high1/low2 permute for v4f32. -+define <4 x float> @f12(<4 x float> %val1, <4 x float> %val2) { -+; CHECK-LABEL: f12: -+; CHECK: vpdi %v24, %v24, %v26, 1 -+; CHECK: br %r14 -+ %ret = shufflevector <4 x float> %val1, <4 x float> %val2, -+ <4 x i32> -+ ret <4 x float> %ret -+} -+ -+; Test a low2/high1 permute for v4f32. -+define <4 x float> @f13(<4 x float> %val1, <4 x float> %val2) { -+; CHECK-LABEL: f13: -+; CHECK: vpdi %v24, %v26, %v24, 4 -+; CHECK: br %r14 -+ %ret = shufflevector <4 x float> %val1, <4 x float> %val2, -+ <4 x i32> -+ ret <4 x float> %ret -+} -+ -+; Test a high1/low2 permute for v2f64. -+define <2 x double> @f14(<2 x double> %val1, <2 x double> %val2) { -+; CHECK-LABEL: f14: -+; CHECK: vpdi %v24, %v24, %v26, 1 -+; CHECK: br %r14 -+ %ret = shufflevector <2 x double> %val1, <2 x double> %val2, -+ <2 x i32> -+ ret <2 x double> %ret -+} -+ -+; Test a low2/high1 permute for v2f64. -+define <2 x double> @f15(<2 x double> %val1, <2 x double> %val2) { -+; CHECK-LABEL: f15: -+; CHECK: vpdi %v24, %v26, %v24, 4 -+; CHECK: br %r14 -+ %ret = shufflevector <2 x double> %val1, <2 x double> %val2, -+ <2 x i32> -+ ret <2 x double> %ret -+} -Index: llvm-36/test/CodeGen/SystemZ/vec-perm-09.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-perm-09.ll -@@ -0,0 +1,38 @@ -+; Test general vector permute of a v16i8. -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | \ -+; RUN: FileCheck -check-prefix=CHECK-CODE %s -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | \ -+; RUN: FileCheck -check-prefix=CHECK-VECTOR %s -+ -+define <16 x i8> @f1(<16 x i8> %val1, <16 x i8> %val2) { -+; CHECK-CODE-LABEL: f1: -+; CHECK-CODE: larl [[REG:%r[0-5]]], -+; CHECK-CODE: vl [[MASK:%v[0-9]+]], 0([[REG]]) -+; CHECK-CODE: vperm %v24, %v24, %v26, [[MASK]] -+; CHECK-CODE: br %r14 -+; -+; CHECK-VECTOR: .byte 1 -+; CHECK-VECTOR-NEXT: .byte 19 -+; CHECK-VECTOR-NEXT: .byte 6 -+; CHECK-VECTOR-NEXT: .byte 5 -+; CHECK-VECTOR-NEXT: .byte 20 -+; CHECK-VECTOR-NEXT: .byte 22 -+; CHECK-VECTOR-NEXT: .byte 1 -+; CHECK-VECTOR-NEXT: .byte 1 -+; CHECK-VECTOR-NEXT: .byte 25 -+; CHECK-VECTOR-NEXT: .byte 29 -+; CHECK-VECTOR-NEXT: .byte 11 -+; Any byte would be OK here -+; CHECK-VECTOR-NEXT: .space 1 -+; CHECK-VECTOR-NEXT: .byte 31 -+; CHECK-VECTOR-NEXT: .byte 4 -+; CHECK-VECTOR-NEXT: .byte 15 -+; CHECK-VECTOR-NEXT: .byte 19 -+ %ret = shufflevector <16 x i8> %val1, <16 x i8> %val2, -+ <16 x i32> -+ ret <16 x i8> %ret -+} -Index: llvm-36/test/CodeGen/SystemZ/vec-perm-10.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-perm-10.ll -@@ -0,0 +1,36 @@ -+; Test general vector permute of a v8i16. -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | \ -+; RUN: FileCheck -check-prefix=CHECK-CODE %s -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | \ -+; RUN: FileCheck -check-prefix=CHECK-VECTOR %s -+ -+define <8 x i16> @f1(<8 x i16> %val1, <8 x i16> %val2) { -+; CHECK-CODE-LABEL: f1: -+; CHECK-CODE: larl [[REG:%r[0-5]]], -+; CHECK-CODE: vl [[MASK:%v[0-9]+]], 0([[REG]]) -+; CHECK-CODE: vperm %v24, %v26, %v24, [[MASK]] -+; CHECK-CODE: br %r14 -+; -+; CHECK-VECTOR: .byte 0 -+; CHECK-VECTOR-NEXT: .byte 1 -+; CHECK-VECTOR-NEXT: .byte 26 -+; CHECK-VECTOR-NEXT: .byte 27 -+; Any 2 bytes would be OK here -+; CHECK-VECTOR-NEXT: .space 1 -+; CHECK-VECTOR-NEXT: .space 1 -+; CHECK-VECTOR-NEXT: .byte 28 -+; CHECK-VECTOR-NEXT: .byte 29 -+; CHECK-VECTOR-NEXT: .byte 6 -+; CHECK-VECTOR-NEXT: .byte 7 -+; CHECK-VECTOR-NEXT: .byte 14 -+; CHECK-VECTOR-NEXT: .byte 15 -+; CHECK-VECTOR-NEXT: .byte 8 -+; CHECK-VECTOR-NEXT: .byte 9 -+; CHECK-VECTOR-NEXT: .byte 16 -+; CHECK-VECTOR-NEXT: .byte 17 -+ %ret = shufflevector <8 x i16> %val1, <8 x i16> %val2, -+ <8 x i32> -+ ret <8 x i16> %ret -+} -Index: llvm-36/test/CodeGen/SystemZ/vec-perm-11.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-perm-11.ll -@@ -0,0 +1,35 @@ -+; Test general vector permute of a v4i32. -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | \ -+; RUN: FileCheck -check-prefix=CHECK-CODE %s -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | \ -+; RUN: FileCheck -check-prefix=CHECK-VECTOR %s -+ -+define <4 x i32> @f1(<4 x i32> %val1, <4 x i32> %val2) { -+; CHECK-CODE-LABEL: f1: -+; CHECK-CODE: larl [[REG:%r[0-5]]], -+; CHECK-CODE: vl [[MASK:%v[0-9]+]], 0([[REG]]) -+; CHECK-CODE: vperm %v24, %v26, %v24, [[MASK]] -+; CHECK-CODE: br %r14 -+; -+; CHECK-VECTOR: .byte 4 -+; CHECK-VECTOR-NEXT: .byte 5 -+; CHECK-VECTOR-NEXT: .byte 6 -+; CHECK-VECTOR-NEXT: .byte 7 -+; CHECK-VECTOR-NEXT: .byte 20 -+; CHECK-VECTOR-NEXT: .byte 21 -+; CHECK-VECTOR-NEXT: .byte 22 -+; CHECK-VECTOR-NEXT: .byte 23 -+; Any 4 bytes would be OK here -+; CHECK-VECTOR-NEXT: .space 1 -+; CHECK-VECTOR-NEXT: .space 1 -+; CHECK-VECTOR-NEXT: .space 1 -+; CHECK-VECTOR-NEXT: .space 1 -+; CHECK-VECTOR-NEXT: .byte 12 -+; CHECK-VECTOR-NEXT: .byte 13 -+; CHECK-VECTOR-NEXT: .byte 14 -+; CHECK-VECTOR-NEXT: .byte 15 -+ %ret = shufflevector <4 x i32> %val1, <4 x i32> %val2, -+ <4 x i32> -+ ret <4 x i32> %ret -+} -Index: llvm-36/test/CodeGen/SystemZ/vec-round-01.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-round-01.ll -@@ -0,0 +1,118 @@ -+; Test v2f64 rounding. -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -+ -+declare double @llvm.rint.f64(double) -+declare double @llvm.nearbyint.f64(double) -+declare double @llvm.floor.f64(double) -+declare double @llvm.ceil.f64(double) -+declare double @llvm.trunc.f64(double) -+declare double @llvm.round.f64(double) -+declare <2 x double> @llvm.rint.v2f64(<2 x double>) -+declare <2 x double> @llvm.nearbyint.v2f64(<2 x double>) -+declare <2 x double> @llvm.floor.v2f64(<2 x double>) -+declare <2 x double> @llvm.ceil.v2f64(<2 x double>) -+declare <2 x double> @llvm.trunc.v2f64(<2 x double>) -+declare <2 x double> @llvm.round.v2f64(<2 x double>) -+ -+define <2 x double> @f1(<2 x double> %val) { -+; CHECK-LABEL: f1: -+; CHECK: vfidb %v24, %v24, 0, 0 -+; CHECK: br %r14 -+ %res = call <2 x double> @llvm.rint.v2f64(<2 x double> %val) -+ ret <2 x double> %res -+} -+ -+define <2 x double> @f2(<2 x double> %val) { -+; CHECK-LABEL: f2: -+; CHECK: vfidb %v24, %v24, 4, 0 -+; CHECK: br %r14 -+ %res = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %val) -+ ret <2 x double> %res -+} -+ -+define <2 x double> @f3(<2 x double> %val) { -+; CHECK-LABEL: f3: -+; CHECK: vfidb %v24, %v24, 4, 7 -+; CHECK: br %r14 -+ %res = call <2 x double> @llvm.floor.v2f64(<2 x double> %val) -+ ret <2 x double> %res -+} -+ -+define <2 x double> @f4(<2 x double> %val) { -+; CHECK-LABEL: f4: -+; CHECK: vfidb %v24, %v24, 4, 6 -+; CHECK: br %r14 -+ %res = call <2 x double> @llvm.ceil.v2f64(<2 x double> %val) -+ ret <2 x double> %res -+} -+ -+define <2 x double> @f5(<2 x double> %val) { -+; CHECK-LABEL: f5: -+; CHECK: vfidb %v24, %v24, 4, 5 -+; CHECK: br %r14 -+ %res = call <2 x double> @llvm.trunc.v2f64(<2 x double> %val) -+ ret <2 x double> %res -+} -+ -+define <2 x double> @f6(<2 x double> %val) { -+; CHECK-LABEL: f6: -+; CHECK: vfidb %v24, %v24, 4, 1 -+; CHECK: br %r14 -+ %res = call <2 x double> @llvm.round.v2f64(<2 x double> %val) -+ ret <2 x double> %res -+} -+ -+define double @f7(<2 x double> %val) { -+; CHECK-LABEL: f7: -+; CHECK: wfidb %f0, %v24, 0, 0 -+; CHECK: br %r14 -+ %scalar = extractelement <2 x double> %val, i32 0 -+ %res = call double @llvm.rint.f64(double %scalar) -+ ret double %res -+} -+ -+define double @f8(<2 x double> %val) { -+; CHECK-LABEL: f8: -+; CHECK: wfidb %f0, %v24, 4, 0 -+; CHECK: br %r14 -+ %scalar = extractelement <2 x double> %val, i32 0 -+ %res = call double @llvm.nearbyint.f64(double %scalar) -+ ret double %res -+} -+ -+define double @f9(<2 x double> %val) { -+; CHECK-LABEL: f9: -+; CHECK: wfidb %f0, %v24, 4, 7 -+; CHECK: br %r14 -+ %scalar = extractelement <2 x double> %val, i32 0 -+ %res = call double @llvm.floor.f64(double %scalar) -+ ret double %res -+} -+ -+define double @f10(<2 x double> %val) { -+; CHECK-LABEL: f10: -+; CHECK: wfidb %f0, %v24, 4, 6 -+; CHECK: br %r14 -+ %scalar = extractelement <2 x double> %val, i32 0 -+ %res = call double @llvm.ceil.f64(double %scalar) -+ ret double %res -+} -+ -+define double @f11(<2 x double> %val) { -+; CHECK-LABEL: f11: -+; CHECK: wfidb %f0, %v24, 4, 5 -+; CHECK: br %r14 -+ %scalar = extractelement <2 x double> %val, i32 0 -+ %res = call double @llvm.trunc.f64(double %scalar) -+ ret double %res -+} -+ -+define double @f12(<2 x double> %val) { -+; CHECK-LABEL: f12: -+; CHECK: wfidb %f0, %v24, 4, 1 -+; CHECK: br %r14 -+ %scalar = extractelement <2 x double> %val, i32 0 -+ %res = call double @llvm.round.f64(double %scalar) -+ ret double %res -+} -Index: llvm-36/test/CodeGen/SystemZ/vec-shift-01.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-shift-01.ll -@@ -0,0 +1,39 @@ -+; Test vector shift left with vector shift amount. -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -+ -+; Test a v16i8 shift. -+define <16 x i8> @f1(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) { -+; CHECK-LABEL: f1: -+; CHECK: veslvb %v24, %v26, %v28 -+; CHECK: br %r14 -+ %ret = shl <16 x i8> %val1, %val2 -+ ret <16 x i8> %ret -+} -+ -+; Test a v8i16 shift. -+define <8 x i16> @f2(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) { -+; CHECK-LABEL: f2: -+; CHECK: veslvh %v24, %v26, %v28 -+; CHECK: br %r14 -+ %ret = shl <8 x i16> %val1, %val2 -+ ret <8 x i16> %ret -+} -+ -+; Test a v4i32 shift. -+define <4 x i32> @f3(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) { -+; CHECK-LABEL: f3: -+; CHECK: veslvf %v24, %v26, %v28 -+; CHECK: br %r14 -+ %ret = shl <4 x i32> %val1, %val2 -+ ret <4 x i32> %ret -+} -+ -+; Test a v2i64 shift. -+define <2 x i64> @f4(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) { -+; CHECK-LABEL: f4: -+; CHECK: veslvg %v24, %v26, %v28 -+; CHECK: br %r14 -+ %ret = shl <2 x i64> %val1, %val2 -+ ret <2 x i64> %ret -+} -Index: llvm-36/test/CodeGen/SystemZ/vec-shift-02.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-shift-02.ll -@@ -0,0 +1,39 @@ -+; Test vector arithmetic shift right with vector shift amount. -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -+ -+; Test a v16i8 shift. -+define <16 x i8> @f1(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) { -+; CHECK-LABEL: f1: -+; CHECK: vesravb %v24, %v26, %v28 -+; CHECK: br %r14 -+ %ret = ashr <16 x i8> %val1, %val2 -+ ret <16 x i8> %ret -+} -+ -+; Test a v8i16 shift. -+define <8 x i16> @f2(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) { -+; CHECK-LABEL: f2: -+; CHECK: vesravh %v24, %v26, %v28 -+; CHECK: br %r14 -+ %ret = ashr <8 x i16> %val1, %val2 -+ ret <8 x i16> %ret -+} -+ -+; Test a v4i32 shift. -+define <4 x i32> @f3(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) { -+; CHECK-LABEL: f3: -+; CHECK: vesravf %v24, %v26, %v28 -+; CHECK: br %r14 -+ %ret = ashr <4 x i32> %val1, %val2 -+ ret <4 x i32> %ret -+} -+ -+; Test a v2i64 shift. -+define <2 x i64> @f4(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) { -+; CHECK-LABEL: f4: -+; CHECK: vesravg %v24, %v26, %v28 -+; CHECK: br %r14 -+ %ret = ashr <2 x i64> %val1, %val2 -+ ret <2 x i64> %ret -+} -Index: llvm-36/test/CodeGen/SystemZ/vec-shift-03.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-shift-03.ll -@@ -0,0 +1,39 @@ -+; Test vector logical shift right with vector shift amount. -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -+ -+; Test a v16i8 shift. -+define <16 x i8> @f1(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) { -+; CHECK-LABEL: f1: -+; CHECK: vesrlvb %v24, %v26, %v28 -+; CHECK: br %r14 -+ %ret = lshr <16 x i8> %val1, %val2 -+ ret <16 x i8> %ret -+} -+ -+; Test a v8i16 shift. -+define <8 x i16> @f2(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) { -+; CHECK-LABEL: f2: -+; CHECK: vesrlvh %v24, %v26, %v28 -+; CHECK: br %r14 -+ %ret = lshr <8 x i16> %val1, %val2 -+ ret <8 x i16> %ret -+} -+ -+; Test a v4i32 shift. -+define <4 x i32> @f3(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) { -+; CHECK-LABEL: f3: -+; CHECK: vesrlvf %v24, %v26, %v28 -+; CHECK: br %r14 -+ %ret = lshr <4 x i32> %val1, %val2 -+ ret <4 x i32> %ret -+} -+ -+; Test a v2i64 shift. -+define <2 x i64> @f4(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) { -+; CHECK-LABEL: f4: -+; CHECK: vesrlvg %v24, %v26, %v28 -+; CHECK: br %r14 -+ %ret = lshr <2 x i64> %val1, %val2 -+ ret <2 x i64> %ret -+} -Index: llvm-36/test/CodeGen/SystemZ/vec-shift-04.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-shift-04.ll -@@ -0,0 +1,134 @@ -+; Test vector shift left with scalar shift amount. -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -+ -+; Test a v16i8 shift by a variable. -+define <16 x i8> @f1(<16 x i8> %dummy, <16 x i8> %val1, i32 %shift) { -+; CHECK-LABEL: f1: -+; CHECK: veslb %v24, %v26, 0(%r2) -+; CHECK: br %r14 -+ %truncshift = trunc i32 %shift to i8 -+ %shiftvec = insertelement <16 x i8> undef, i8 %truncshift, i32 0 -+ %val2 = shufflevector <16 x i8> %shiftvec, <16 x i8> undef, -+ <16 x i32> zeroinitializer -+ %ret = shl <16 x i8> %val1, %val2 -+ ret <16 x i8> %ret -+} -+ -+; Test a v16i8 shift by the lowest useful constant. -+define <16 x i8> @f2(<16 x i8> %dummy, <16 x i8> %val) { -+; CHECK-LABEL: f2: -+; CHECK: veslb %v24, %v26, 1 -+; CHECK: br %r14 -+ %ret = shl <16 x i8> %val, -+ ret <16 x i8> %ret -+} -+ -+; Test a v16i8 shift by the highest useful constant. -+define <16 x i8> @f3(<16 x i8> %dummy, <16 x i8> %val) { -+; CHECK-LABEL: f3: -+; CHECK: veslb %v24, %v26, 7 -+; CHECK: br %r14 -+ %ret = shl <16 x i8> %val, -+ ret <16 x i8> %ret -+} -+ -+; Test a v8i16 shift by a variable. -+define <8 x i16> @f4(<8 x i16> %dummy, <8 x i16> %val1, i32 %shift) { -+; CHECK-LABEL: f4: -+; CHECK: veslh %v24, %v26, 0(%r2) -+; CHECK: br %r14 -+ %truncshift = trunc i32 %shift to i16 -+ %shiftvec = insertelement <8 x i16> undef, i16 %truncshift, i32 0 -+ %val2 = shufflevector <8 x i16> %shiftvec, <8 x i16> undef, -+ <8 x i32> zeroinitializer -+ %ret = shl <8 x i16> %val1, %val2 -+ ret <8 x i16> %ret -+} -+ -+; Test a v8i16 shift by the lowest useful constant. -+define <8 x i16> @f5(<8 x i16> %dummy, <8 x i16> %val) { -+; CHECK-LABEL: f5: -+; CHECK: veslh %v24, %v26, 1 -+; CHECK: br %r14 -+ %ret = shl <8 x i16> %val, -+ ret <8 x i16> %ret -+} -+ -+; Test a v8i16 shift by the highest useful constant. -+define <8 x i16> @f6(<8 x i16> %dummy, <8 x i16> %val) { -+; CHECK-LABEL: f6: -+; CHECK: veslh %v24, %v26, 15 -+; CHECK: br %r14 -+ %ret = shl <8 x i16> %val, -+ ret <8 x i16> %ret -+} -+ -+; Test a v4i32 shift by a variable. -+define <4 x i32> @f7(<4 x i32> %dummy, <4 x i32> %val1, i32 %shift) { -+; CHECK-LABEL: f7: -+; CHECK: veslf %v24, %v26, 0(%r2) -+; CHECK: br %r14 -+ %shiftvec = insertelement <4 x i32> undef, i32 %shift, i32 0 -+ %val2 = shufflevector <4 x i32> %shiftvec, <4 x i32> undef, -+ <4 x i32> zeroinitializer -+ %ret = shl <4 x i32> %val1, %val2 -+ ret <4 x i32> %ret -+} -+ -+; Test a v4i32 shift by the lowest useful constant. -+define <4 x i32> @f8(<4 x i32> %dummy, <4 x i32> %val) { -+; CHECK-LABEL: f8: -+; CHECK: veslf %v24, %v26, 1 -+; CHECK: br %r14 -+ %ret = shl <4 x i32> %val, -+ ret <4 x i32> %ret -+} -+ -+; Test a v4i32 shift by the highest useful constant. -+define <4 x i32> @f9(<4 x i32> %dummy, <4 x i32> %val) { -+; CHECK-LABEL: f9: -+; CHECK: veslf %v24, %v26, 31 -+; CHECK: br %r14 -+ %ret = shl <4 x i32> %val, -+ ret <4 x i32> %ret -+} -+ -+; Test a v2i64 shift by a variable. -+define <2 x i64> @f10(<2 x i64> %dummy, <2 x i64> %val1, i32 %shift) { -+; CHECK-LABEL: f10: -+; CHECK: veslg %v24, %v26, 0(%r2) -+; CHECK: br %r14 -+ %extshift = sext i32 %shift to i64 -+ %shiftvec = insertelement <2 x i64> undef, i64 %extshift, i32 0 -+ %val2 = shufflevector <2 x i64> %shiftvec, <2 x i64> undef, -+ <2 x i32> zeroinitializer -+ %ret = shl <2 x i64> %val1, %val2 -+ ret <2 x i64> %ret -+} -+ -+; Test a v2i64 shift by the lowest useful constant. -+define <2 x i64> @f11(<2 x i64> %dummy, <2 x i64> %val) { -+; CHECK-LABEL: f11: -+; CHECK: veslg %v24, %v26, 1 -+; CHECK: br %r14 -+ %ret = shl <2 x i64> %val, -+ ret <2 x i64> %ret -+} -+ -+; Test a v2i64 shift by the highest useful constant. -+define <2 x i64> @f12(<2 x i64> %dummy, <2 x i64> %val) { -+; CHECK-LABEL: f12: -+; CHECK: veslg %v24, %v26, 63 -+; CHECK: br %r14 -+ %ret = shl <2 x i64> %val, -+ ret <2 x i64> %ret -+} -Index: llvm-36/test/CodeGen/SystemZ/vec-shift-05.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-shift-05.ll -@@ -0,0 +1,134 @@ -+; Test vector arithmetic shift right with scalar shift amount. -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -+ -+; Test a v16i8 shift by a variable. -+define <16 x i8> @f1(<16 x i8> %dummy, <16 x i8> %val1, i32 %shift) { -+; CHECK-LABEL: f1: -+; CHECK: vesrab %v24, %v26, 0(%r2) -+; CHECK: br %r14 -+ %truncshift = trunc i32 %shift to i8 -+ %shiftvec = insertelement <16 x i8> undef, i8 %truncshift, i32 0 -+ %val2 = shufflevector <16 x i8> %shiftvec, <16 x i8> undef, -+ <16 x i32> zeroinitializer -+ %ret = ashr <16 x i8> %val1, %val2 -+ ret <16 x i8> %ret -+} -+ -+; Test a v16i8 shift by the lowest useful constant. -+define <16 x i8> @f2(<16 x i8> %dummy, <16 x i8> %val) { -+; CHECK-LABEL: f2: -+; CHECK: vesrab %v24, %v26, 1 -+; CHECK: br %r14 -+ %ret = ashr <16 x i8> %val, -+ ret <16 x i8> %ret -+} -+ -+; Test a v16i8 shift by the highest useful constant. -+define <16 x i8> @f3(<16 x i8> %dummy, <16 x i8> %val) { -+; CHECK-LABEL: f3: -+; CHECK: vesrab %v24, %v26, 7 -+; CHECK: br %r14 -+ %ret = ashr <16 x i8> %val, -+ ret <16 x i8> %ret -+} -+ -+; Test a v8i16 shift by a variable. -+define <8 x i16> @f4(<8 x i16> %dummy, <8 x i16> %val1, i32 %shift) { -+; CHECK-LABEL: f4: -+; CHECK: vesrah %v24, %v26, 0(%r2) -+; CHECK: br %r14 -+ %truncshift = trunc i32 %shift to i16 -+ %shiftvec = insertelement <8 x i16> undef, i16 %truncshift, i32 0 -+ %val2 = shufflevector <8 x i16> %shiftvec, <8 x i16> undef, -+ <8 x i32> zeroinitializer -+ %ret = ashr <8 x i16> %val1, %val2 -+ ret <8 x i16> %ret -+} -+ -+; Test a v8i16 shift by the lowest useful constant. -+define <8 x i16> @f5(<8 x i16> %dummy, <8 x i16> %val) { -+; CHECK-LABEL: f5: -+; CHECK: vesrah %v24, %v26, 1 -+; CHECK: br %r14 -+ %ret = ashr <8 x i16> %val, -+ ret <8 x i16> %ret -+} -+ -+; Test a v8i16 shift by the highest useful constant. -+define <8 x i16> @f6(<8 x i16> %dummy, <8 x i16> %val) { -+; CHECK-LABEL: f6: -+; CHECK: vesrah %v24, %v26, 15 -+; CHECK: br %r14 -+ %ret = ashr <8 x i16> %val, -+ ret <8 x i16> %ret -+} -+ -+; Test a v4i32 shift by a variable. -+define <4 x i32> @f7(<4 x i32> %dummy, <4 x i32> %val1, i32 %shift) { -+; CHECK-LABEL: f7: -+; CHECK: vesraf %v24, %v26, 0(%r2) -+; CHECK: br %r14 -+ %shiftvec = insertelement <4 x i32> undef, i32 %shift, i32 0 -+ %val2 = shufflevector <4 x i32> %shiftvec, <4 x i32> undef, -+ <4 x i32> zeroinitializer -+ %ret = ashr <4 x i32> %val1, %val2 -+ ret <4 x i32> %ret -+} -+ -+; Test a v4i32 shift by the lowest useful constant. -+define <4 x i32> @f8(<4 x i32> %dummy, <4 x i32> %val) { -+; CHECK-LABEL: f8: -+; CHECK: vesraf %v24, %v26, 1 -+; CHECK: br %r14 -+ %ret = ashr <4 x i32> %val, -+ ret <4 x i32> %ret -+} -+ -+; Test a v4i32 shift by the highest useful constant. -+define <4 x i32> @f9(<4 x i32> %dummy, <4 x i32> %val) { -+; CHECK-LABEL: f9: -+; CHECK: vesraf %v24, %v26, 31 -+; CHECK: br %r14 -+ %ret = ashr <4 x i32> %val, -+ ret <4 x i32> %ret -+} -+ -+; Test a v2i64 shift by a variable. -+define <2 x i64> @f10(<2 x i64> %dummy, <2 x i64> %val1, i32 %shift) { -+; CHECK-LABEL: f10: -+; CHECK: vesrag %v24, %v26, 0(%r2) -+; CHECK: br %r14 -+ %extshift = sext i32 %shift to i64 -+ %shiftvec = insertelement <2 x i64> undef, i64 %extshift, i32 0 -+ %val2 = shufflevector <2 x i64> %shiftvec, <2 x i64> undef, -+ <2 x i32> zeroinitializer -+ %ret = ashr <2 x i64> %val1, %val2 -+ ret <2 x i64> %ret -+} -+ -+; Test a v2i64 shift by the lowest useful constant. -+define <2 x i64> @f11(<2 x i64> %dummy, <2 x i64> %val) { -+; CHECK-LABEL: f11: -+; CHECK: vesrag %v24, %v26, 1 -+; CHECK: br %r14 -+ %ret = ashr <2 x i64> %val, -+ ret <2 x i64> %ret -+} -+ -+; Test a v2i64 shift by the highest useful constant. -+define <2 x i64> @f12(<2 x i64> %dummy, <2 x i64> %val) { -+; CHECK-LABEL: f12: -+; CHECK: vesrag %v24, %v26, 63 -+; CHECK: br %r14 -+ %ret = ashr <2 x i64> %val, -+ ret <2 x i64> %ret -+} -Index: llvm-36/test/CodeGen/SystemZ/vec-shift-06.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-shift-06.ll -@@ -0,0 +1,134 @@ -+; Test vector logical shift right with scalar shift amount. -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -+ -+; Test a v16i8 shift by a variable. -+define <16 x i8> @f1(<16 x i8> %dummy, <16 x i8> %val1, i32 %shift) { -+; CHECK-LABEL: f1: -+; CHECK: vesrlb %v24, %v26, 0(%r2) -+; CHECK: br %r14 -+ %truncshift = trunc i32 %shift to i8 -+ %shiftvec = insertelement <16 x i8> undef, i8 %truncshift, i32 0 -+ %val2 = shufflevector <16 x i8> %shiftvec, <16 x i8> undef, -+ <16 x i32> zeroinitializer -+ %ret = lshr <16 x i8> %val1, %val2 -+ ret <16 x i8> %ret -+} -+ -+; Test a v16i8 shift by the lowest useful constant. -+define <16 x i8> @f2(<16 x i8> %dummy, <16 x i8> %val) { -+; CHECK-LABEL: f2: -+; CHECK: vesrlb %v24, %v26, 1 -+; CHECK: br %r14 -+ %ret = lshr <16 x i8> %val, -+ ret <16 x i8> %ret -+} -+ -+; Test a v16i8 shift by the highest useful constant. -+define <16 x i8> @f3(<16 x i8> %dummy, <16 x i8> %val) { -+; CHECK-LABEL: f3: -+; CHECK: vesrlb %v24, %v26, 7 -+; CHECK: br %r14 -+ %ret = lshr <16 x i8> %val, -+ ret <16 x i8> %ret -+} -+ -+; Test a v8i16 shift by a variable. -+define <8 x i16> @f4(<8 x i16> %dummy, <8 x i16> %val1, i32 %shift) { -+; CHECK-LABEL: f4: -+; CHECK: vesrlh %v24, %v26, 0(%r2) -+; CHECK: br %r14 -+ %truncshift = trunc i32 %shift to i16 -+ %shiftvec = insertelement <8 x i16> undef, i16 %truncshift, i32 0 -+ %val2 = shufflevector <8 x i16> %shiftvec, <8 x i16> undef, -+ <8 x i32> zeroinitializer -+ %ret = lshr <8 x i16> %val1, %val2 -+ ret <8 x i16> %ret -+} -+ -+; Test a v8i16 shift by the lowest useful constant. -+define <8 x i16> @f5(<8 x i16> %dummy, <8 x i16> %val) { -+; CHECK-LABEL: f5: -+; CHECK: vesrlh %v24, %v26, 1 -+; CHECK: br %r14 -+ %ret = lshr <8 x i16> %val, -+ ret <8 x i16> %ret -+} -+ -+; Test a v8i16 shift by the highest useful constant. -+define <8 x i16> @f6(<8 x i16> %dummy, <8 x i16> %val) { -+; CHECK-LABEL: f6: -+; CHECK: vesrlh %v24, %v26, 15 -+; CHECK: br %r14 -+ %ret = lshr <8 x i16> %val, -+ ret <8 x i16> %ret -+} -+ -+; Test a v4i32 shift by a variable. -+define <4 x i32> @f7(<4 x i32> %dummy, <4 x i32> %val1, i32 %shift) { -+; CHECK-LABEL: f7: -+; CHECK: vesrlf %v24, %v26, 0(%r2) -+; CHECK: br %r14 -+ %shiftvec = insertelement <4 x i32> undef, i32 %shift, i32 0 -+ %val2 = shufflevector <4 x i32> %shiftvec, <4 x i32> undef, -+ <4 x i32> zeroinitializer -+ %ret = lshr <4 x i32> %val1, %val2 -+ ret <4 x i32> %ret -+} -+ -+; Test a v4i32 shift by the lowest useful constant. -+define <4 x i32> @f8(<4 x i32> %dummy, <4 x i32> %val) { -+; CHECK-LABEL: f8: -+; CHECK: vesrlf %v24, %v26, 1 -+; CHECK: br %r14 -+ %ret = lshr <4 x i32> %val, -+ ret <4 x i32> %ret -+} -+ -+; Test a v4i32 shift by the highest useful constant. -+define <4 x i32> @f9(<4 x i32> %dummy, <4 x i32> %val) { -+; CHECK-LABEL: f9: -+; CHECK: vesrlf %v24, %v26, 31 -+; CHECK: br %r14 -+ %ret = lshr <4 x i32> %val, -+ ret <4 x i32> %ret -+} -+ -+; Test a v2i64 shift by a variable. -+define <2 x i64> @f10(<2 x i64> %dummy, <2 x i64> %val1, i32 %shift) { -+; CHECK-LABEL: f10: -+; CHECK: vesrlg %v24, %v26, 0(%r2) -+; CHECK: br %r14 -+ %extshift = sext i32 %shift to i64 -+ %shiftvec = insertelement <2 x i64> undef, i64 %extshift, i32 0 -+ %val2 = shufflevector <2 x i64> %shiftvec, <2 x i64> undef, -+ <2 x i32> zeroinitializer -+ %ret = lshr <2 x i64> %val1, %val2 -+ ret <2 x i64> %ret -+} -+ -+; Test a v2i64 shift by the lowest useful constant. -+define <2 x i64> @f11(<2 x i64> %dummy, <2 x i64> %val) { -+; CHECK-LABEL: f11: -+; CHECK: vesrlg %v24, %v26, 1 -+; CHECK: br %r14 -+ %ret = lshr <2 x i64> %val, -+ ret <2 x i64> %ret -+} -+ -+; Test a v2i64 shift by the highest useful constant. -+define <2 x i64> @f12(<2 x i64> %dummy, <2 x i64> %val) { -+; CHECK-LABEL: f12: -+; CHECK: vesrlg %v24, %v26, 63 -+; CHECK: br %r14 -+ %ret = lshr <2 x i64> %val, -+ ret <2 x i64> %ret -+} -Index: llvm-36/test/CodeGen/SystemZ/vec-shift-07.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-shift-07.ll -@@ -0,0 +1,182 @@ -+; Test vector sign extensions. -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -+ -+; Test a v16i1->v16i8 extension. -+define <16 x i8> @f1(<16 x i8> %val) { -+; CHECK-LABEL: f1: -+; CHECK: veslb [[REG:%v[0-9]+]], %v24, 7 -+; CHECK: vesrab %v24, [[REG]], 7 -+; CHECK: br %r14 -+ %trunc = trunc <16 x i8> %val to <16 x i1> -+ %ret = sext <16 x i1> %trunc to <16 x i8> -+ ret <16 x i8> %ret -+} -+ -+; Test a v8i1->v8i16 extension. -+define <8 x i16> @f2(<8 x i16> %val) { -+; CHECK-LABEL: f2: -+; CHECK: veslh [[REG:%v[0-9]+]], %v24, 15 -+; CHECK: vesrah %v24, [[REG]], 15 -+; CHECK: br %r14 -+ %trunc = trunc <8 x i16> %val to <8 x i1> -+ %ret = sext <8 x i1> %trunc to <8 x i16> -+ ret <8 x i16> %ret -+} -+ -+; Test a v8i8->v8i16 extension. -+define <8 x i16> @f3(<8 x i16> %val) { -+; CHECK-LABEL: f3: -+; CHECK: veslh [[REG:%v[0-9]+]], %v24, 8 -+; CHECK: vesrah %v24, [[REG]], 8 -+; CHECK: br %r14 -+ %trunc = trunc <8 x i16> %val to <8 x i8> -+ %ret = sext <8 x i8> %trunc to <8 x i16> -+ ret <8 x i16> %ret -+} -+ -+; Test a v4i1->v4i32 extension. -+define <4 x i32> @f4(<4 x i32> %val) { -+; CHECK-LABEL: f4: -+; CHECK: veslf [[REG:%v[0-9]+]], %v24, 31 -+; CHECK: vesraf %v24, [[REG]], 31 -+; CHECK: br %r14 -+ %trunc = trunc <4 x i32> %val to <4 x i1> -+ %ret = sext <4 x i1> %trunc to <4 x i32> -+ ret <4 x i32> %ret -+} -+ -+; Test a v4i8->v4i32 extension. -+define <4 x i32> @f5(<4 x i32> %val) { -+; CHECK-LABEL: f5: -+; CHECK: veslf [[REG:%v[0-9]+]], %v24, 24 -+; CHECK: vesraf %v24, [[REG]], 24 -+; CHECK: br %r14 -+ %trunc = trunc <4 x i32> %val to <4 x i8> -+ %ret = sext <4 x i8> %trunc to <4 x i32> -+ ret <4 x i32> %ret -+} -+ -+; Test a v4i16->v4i32 extension. -+define <4 x i32> @f6(<4 x i32> %val) { -+; CHECK-LABEL: f6: -+; CHECK: veslf [[REG:%v[0-9]+]], %v24, 16 -+; CHECK: vesraf %v24, [[REG]], 16 -+; CHECK: br %r14 -+ %trunc = trunc <4 x i32> %val to <4 x i16> -+ %ret = sext <4 x i16> %trunc to <4 x i32> -+ ret <4 x i32> %ret -+} -+ -+; Test a v2i1->v2i64 extension. -+define <2 x i64> @f7(<2 x i64> %val) { -+; CHECK-LABEL: f7: -+; CHECK: veslg [[REG:%v[0-9]+]], %v24, 63 -+; CHECK: vesrag %v24, [[REG]], 63 -+; CHECK: br %r14 -+ %trunc = trunc <2 x i64> %val to <2 x i1> -+ %ret = sext <2 x i1> %trunc to <2 x i64> -+ ret <2 x i64> %ret -+} -+ -+; Test a v2i8->v2i64 extension. -+define <2 x i64> @f8(<2 x i64> %val) { -+; CHECK-LABEL: f8: -+; CHECK: vsegb %v24, %v24 -+; CHECK: br %r14 -+ %trunc = trunc <2 x i64> %val to <2 x i8> -+ %ret = sext <2 x i8> %trunc to <2 x i64> -+ ret <2 x i64> %ret -+} -+ -+; Test a v2i16->v2i64 extension. -+define <2 x i64> @f9(<2 x i64> %val) { -+; CHECK-LABEL: f9: -+; CHECK: vsegh %v24, %v24 -+; CHECK: br %r14 -+ %trunc = trunc <2 x i64> %val to <2 x i16> -+ %ret = sext <2 x i16> %trunc to <2 x i64> -+ ret <2 x i64> %ret -+} -+ -+; Test a v2i32->v2i64 extension. -+define <2 x i64> @f10(<2 x i64> %val) { -+; CHECK-LABEL: f10: -+; CHECK: vsegf %v24, %v24 -+; CHECK: br %r14 -+ %trunc = trunc <2 x i64> %val to <2 x i32> -+ %ret = sext <2 x i32> %trunc to <2 x i64> -+ ret <2 x i64> %ret -+} -+ -+; Test an alternative v2i8->v2i64 extension. -+define <2 x i64> @f11(<2 x i64> %val) { -+; CHECK-LABEL: f11: -+; CHECK: vsegb %v24, %v24 -+; CHECK: br %r14 -+ %shl = shl <2 x i64> %val, -+ %ret = ashr <2 x i64> %shl, -+ ret <2 x i64> %ret -+} -+ -+; Test an alternative v2i16->v2i64 extension. -+define <2 x i64> @f12(<2 x i64> %val) { -+; CHECK-LABEL: f12: -+; CHECK: vsegh %v24, %v24 -+; CHECK: br %r14 -+ %shl = shl <2 x i64> %val, -+ %ret = ashr <2 x i64> %shl, -+ ret <2 x i64> %ret -+} -+ -+; Test an alternative v2i32->v2i64 extension. -+define <2 x i64> @f13(<2 x i64> %val) { -+; CHECK-LABEL: f13: -+; CHECK: vsegf %v24, %v24 -+; CHECK: br %r14 -+ %shl = shl <2 x i64> %val, -+ %ret = ashr <2 x i64> %shl, -+ ret <2 x i64> %ret -+} -+ -+; Test an extraction-based v2i8->v2i64 extension. -+define <2 x i64> @f14(<16 x i8> %val) { -+; CHECK-LABEL: f14: -+; CHECK: vsegb %v24, %v24 -+; CHECK: br %r14 -+ %elt0 = extractelement <16 x i8> %val, i32 7 -+ %elt1 = extractelement <16 x i8> %val, i32 15 -+ %ext0 = sext i8 %elt0 to i64 -+ %ext1 = sext i8 %elt1 to i64 -+ %vec0 = insertelement <2 x i64> undef, i64 %ext0, i32 0 -+ %vec1 = insertelement <2 x i64> %vec0, i64 %ext1, i32 1 -+ ret <2 x i64> %vec1 -+} -+ -+; Test an extraction-based v2i16->v2i64 extension. -+define <2 x i64> @f15(<16 x i16> %val) { -+; CHECK-LABEL: f15: -+; CHECK: vsegh %v24, %v24 -+; CHECK: br %r14 -+ %elt0 = extractelement <16 x i16> %val, i32 3 -+ %elt1 = extractelement <16 x i16> %val, i32 7 -+ %ext0 = sext i16 %elt0 to i64 -+ %ext1 = sext i16 %elt1 to i64 -+ %vec0 = insertelement <2 x i64> undef, i64 %ext0, i32 0 -+ %vec1 = insertelement <2 x i64> %vec0, i64 %ext1, i32 1 -+ ret <2 x i64> %vec1 -+} -+ -+; Test an extraction-based v2i32->v2i64 extension. -+define <2 x i64> @f16(<16 x i32> %val) { -+; CHECK-LABEL: f16: -+; CHECK: vsegf %v24, %v24 -+; CHECK: br %r14 -+ %elt0 = extractelement <16 x i32> %val, i32 1 -+ %elt1 = extractelement <16 x i32> %val, i32 3 -+ %ext0 = sext i32 %elt0 to i64 -+ %ext1 = sext i32 %elt1 to i64 -+ %vec0 = insertelement <2 x i64> undef, i64 %ext0, i32 0 -+ %vec1 = insertelement <2 x i64> %vec0, i64 %ext1, i32 1 -+ ret <2 x i64> %vec1 -+} -Index: llvm-36/test/CodeGen/SystemZ/vec-sqrt-01.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-sqrt-01.ll -@@ -0,0 +1,23 @@ -+; Test f64 and v2f64 square root. -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -+ -+declare double @llvm.sqrt.f64(double) -+declare <2 x double> @llvm.sqrt.v2f64(<2 x double>) -+ -+define <2 x double> @f1(<2 x double> %val) { -+; CHECK-LABEL: f1: -+; CHECK: vfsqdb %v24, %v24 -+; CHECK: br %r14 -+ %ret = call <2 x double> @llvm.sqrt.v2f64(<2 x double> %val) -+ ret <2 x double> %ret -+} -+ -+define double @f2(<2 x double> %val) { -+; CHECK-LABEL: f2: -+; CHECK: wfsqdb %f0, %v24 -+; CHECK: br %r14 -+ %scalar = extractelement <2 x double> %val, i32 0 -+ %ret = call double @llvm.sqrt.f64(double %scalar) -+ ret double %ret -+} -Index: llvm-36/test/CodeGen/SystemZ/vec-sub-01.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-sub-01.ll -@@ -0,0 +1,148 @@ -+; Test vector subtraction. -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -+ -+; Test a v16i8 subtraction. -+define <16 x i8> @f1(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) { -+; CHECK-LABEL: f1: -+; CHECK: vsb %v24, %v26, %v28 -+; CHECK: br %r14 -+ %ret = sub <16 x i8> %val1, %val2 -+ ret <16 x i8> %ret -+} -+ -+; Test a v8i16 subtraction. -+define <8 x i16> @f2(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) { -+; CHECK-LABEL: f2: -+; CHECK: vsh %v24, %v26, %v28 -+; CHECK: br %r14 -+ %ret = sub <8 x i16> %val1, %val2 -+ ret <8 x i16> %ret -+} -+ -+; Test a v4i32 subtraction. -+define <4 x i32> @f3(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) { -+; CHECK-LABEL: f3: -+; CHECK: vsf %v24, %v26, %v28 -+; CHECK: br %r14 -+ %ret = sub <4 x i32> %val1, %val2 -+ ret <4 x i32> %ret -+} -+ -+; Test a v2i64 subtraction. -+define <2 x i64> @f4(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) { -+; CHECK-LABEL: f4: -+; CHECK: vsg %v24, %v26, %v28 -+; CHECK: br %r14 -+ %ret = sub <2 x i64> %val1, %val2 -+ ret <2 x i64> %ret -+} -+ -+; Test a v4f32 subtraction, as an example of an operation that needs to be -+; scalarized and reassembled. At present there's an unnecessary move that -+; could be avoided with smarter ordering. It also isn't important whether -+; the VSLDBs use the result of the VLRs or use %v24 and %v26 directly. -+define <4 x float> @f5(<4 x float> %val1, <4 x float> %val2) { -+; CHECK-LABEL: f5: -+; CHECK-DAG: vlr %v[[A1:[0-5]]], %v24 -+; CHECK-DAG: vlr %v[[A2:[0-5]]], %v26 -+; CHECK-DAG: vrepf %v[[B1:[0-5]]], %v[[A1]], 1 -+; CHECK-DAG: vrepf %v[[B2:[0-5]]], %v[[A2]], 1 -+; CHECK-DAG: vrepf %v[[C1:[0-5]]], %v[[A1]], 2 -+; CHECK-DAG: vrepf %v[[C2:[0-5]]], %v[[A2]], 2 -+; CHECK-DAG: vrepf %v[[D1:[0-5]]], %v[[A1]], 3 -+; CHECK-DAG: vrepf %v[[D2:[0-5]]], %v[[A2]], 3 -+; CHECK-DAG: ler %f[[A1copy:[0-5]]], %f[[A1]] -+; CHECK-DAG: sebr %f[[A1copy]], %f[[A2]] -+; CHECK-DAG: sebr %f[[B1]], %f[[B2]] -+; CHECK-DAG: sebr %f[[C1]], %f[[C2]] -+; CHECK-DAG: sebr %f[[D1]], %f[[D2]] -+; CHECK-DAG: vmrhf [[HIGH:%v[0-9]+]], %v[[A1copy]], %v[[B1]] -+; CHECK-DAG: vmrhf [[LOW:%v[0-9]+]], %v[[C1]], %v[[D1]] -+; CHECK: vmrhg %v24, [[HIGH]], [[LOW]] -+; CHECK: br %r14 -+ %ret = fsub <4 x float> %val1, %val2 -+ ret <4 x float> %ret -+} -+ -+; Test a v2f64 subtraction. -+define <2 x double> @f6(<2 x double> %dummy, <2 x double> %val1, -+ <2 x double> %val2) { -+; CHECK-LABEL: f6: -+; CHECK: vfsdb %v24, %v26, %v28 -+; CHECK: br %r14 -+ %ret = fsub <2 x double> %val1, %val2 -+ ret <2 x double> %ret -+} -+ -+; Test an f64 subtraction that uses vector registers. -+define double @f7(<2 x double> %val1, <2 x double> %val2) { -+; CHECK-LABEL: f7: -+; CHECK: wfsdb %f0, %v24, %v26 -+; CHECK: br %r14 -+ %scalar1 = extractelement <2 x double> %val1, i32 0 -+ %scalar2 = extractelement <2 x double> %val2, i32 0 -+ %ret = fsub double %scalar1, %scalar2 -+ ret double %ret -+} -+ -+; Test a v2i8 subtraction, which gets promoted to v16i8. -+define <2 x i8> @f8(<2 x i8> %dummy, <2 x i8> %val1, <2 x i8> %val2) { -+; CHECK-LABEL: f8: -+; CHECK: vsb %v24, %v26, %v28 -+; CHECK: br %r14 -+ %ret = sub <2 x i8> %val1, %val2 -+ ret <2 x i8> %ret -+} -+ -+; Test a v4i8 subtraction, which gets promoted to v16i8. -+define <4 x i8> @f9(<4 x i8> %dummy, <4 x i8> %val1, <4 x i8> %val2) { -+; CHECK-LABEL: f9: -+; CHECK: vsb %v24, %v26, %v28 -+; CHECK: br %r14 -+ %ret = sub <4 x i8> %val1, %val2 -+ ret <4 x i8> %ret -+} -+ -+; Test a v8i8 subtraction, which gets promoted to v16i8. -+define <8 x i8> @f10(<8 x i8> %dummy, <8 x i8> %val1, <8 x i8> %val2) { -+; CHECK-LABEL: f10: -+; CHECK: vsb %v24, %v26, %v28 -+; CHECK: br %r14 -+ %ret = sub <8 x i8> %val1, %val2 -+ ret <8 x i8> %ret -+} -+ -+; Test a v2i16 subtraction, which gets promoted to v8i16. -+define <2 x i16> @f11(<2 x i16> %dummy, <2 x i16> %val1, <2 x i16> %val2) { -+; CHECK-LABEL: f11: -+; CHECK: vsh %v24, %v26, %v28 -+; CHECK: br %r14 -+ %ret = sub <2 x i16> %val1, %val2 -+ ret <2 x i16> %ret -+} -+ -+; Test a v4i16 subtraction, which gets promoted to v8i16. -+define <4 x i16> @f12(<4 x i16> %dummy, <4 x i16> %val1, <4 x i16> %val2) { -+; CHECK-LABEL: f12: -+; CHECK: vsh %v24, %v26, %v28 -+; CHECK: br %r14 -+ %ret = sub <4 x i16> %val1, %val2 -+ ret <4 x i16> %ret -+} -+ -+; Test a v2i32 subtraction, which gets promoted to v4i32. -+define <2 x i32> @f13(<2 x i32> %dummy, <2 x i32> %val1, <2 x i32> %val2) { -+; CHECK-LABEL: f13: -+; CHECK: vsf %v24, %v26, %v28 -+; CHECK: br %r14 -+ %ret = sub <2 x i32> %val1, %val2 -+ ret <2 x i32> %ret -+} -+ -+; Test a v2f32 subtraction, which gets promoted to v4f32. -+define <2 x float> @f14(<2 x float> %val1, <2 x float> %val2) { -+; No particular output expected, but must compile. -+ %ret = fsub <2 x float> %val1, %val2 -+ ret <2 x float> %ret -+} -Index: llvm-36/test/CodeGen/SystemZ/vec-xor-01.ll -=================================================================== ---- /dev/null -+++ llvm-36/test/CodeGen/SystemZ/vec-xor-01.ll -@@ -0,0 +1,39 @@ -+; Test vector XOR. -+; -+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -+ -+; Test a v16i8 XOR. -+define <16 x i8> @f1(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) { -+; CHECK-LABEL: f1: -+; CHECK: vx %v24, %v26, %v28 -+; CHECK: br %r14 -+ %ret = xor <16 x i8> %val1, %val2 -+ ret <16 x i8> %ret -+} -+ -+; Test a v8i16 XOR. -+define <8 x i16> @f2(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) { -+; CHECK-LABEL: f2: -+; CHECK: vx %v24, %v26, %v28 -+; CHECK: br %r14 -+ %ret = xor <8 x i16> %val1, %val2 -+ ret <8 x i16> %ret -+} -+ -+; Test a v4i32 XOR. -+define <4 x i32> @f3(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) { -+; CHECK-LABEL: f3: -+; CHECK: vx %v24, %v26, %v28 -+; CHECK: br %r14 -+ %ret = xor <4 x i32> %val1, %val2 -+ ret <4 x i32> %ret -+} -+ -+; Test a v2i64 XOR. -+define <2 x i64> @f4(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) { -+; CHECK-LABEL: f4: -+; CHECK: vx %v24, %v26, %v28 -+; CHECK: br %r14 -+ %ret = xor <2 x i64> %val1, %val2 -+ ret <2 x i64> %ret -+} -Index: llvm-36/test/MC/Disassembler/SystemZ/insns-z13-bad.txt -=================================================================== ---- /dev/null -+++ llvm-36/test/MC/Disassembler/SystemZ/insns-z13-bad.txt -@@ -0,0 +1,39 @@ -+# Test z13 instructions that don't have PC-relative operands. -+# RUN: llvm-mc --disassemble %s -triple=s390x-linux-gnu -mcpu=z13 2>&1 \ -+# RUN: | FileCheck %s -+ -+# This would be "vlef %v0, 0, 4", but element 4 is invalid. -+# -+#CHECK: warning: invalid instruction encoding -+#CHECK-NEXT: 0xe7 0x00 0x00 0x00 0x40 0x03 -+0xe7 0x00 0x00 0x00 0x40 0x03 -+ -+# ...and again with element 15 -+# -+#CHECK: warning: invalid instruction encoding -+#CHECK-NEXT: 0xe7 0x00 0x00 0x00 0xf0 0x03 -+0xe7 0x00 0x00 0x00 0xf0 0x03 -+ -+# This would be "vleg %v0, 0, 2", but element 2 is invalid. -+# -+#CHECK: warning: invalid instruction encoding -+#CHECK-NEXT: 0xe7 0x00 0x00 0x00 0x20 0x02 -+0xe7 0x00 0x00 0x00 0x20 0x02 -+ -+# ...and again with element 15 -+# -+#CHECK: warning: invalid instruction encoding -+#CHECK-NEXT: 0xe7 0x00 0x00 0x00 0xf0 0x02 -+0xe7 0x00 0x00 0x00 0xf0 0x02 -+ -+# This would be "vleh %v0, 0, 8", but element 8 is invalid. -+# -+#CHECK: warning: invalid instruction encoding -+#CHECK-NEXT: 0xe7 0x00 0x00 0x00 0x80 0x01 -+0xe7 0x00 0x00 0x00 0x80 0x01 -+ -+# ...and again with element 15 -+# -+#CHECK: warning: invalid instruction encoding -+#CHECK-NEXT: 0xe7 0x00 0x00 0x00 0xf0 0x01 -+0xe7 0x00 0x00 0x00 0xf0 0x01 -Index: llvm-36/test/MC/Disassembler/SystemZ/insns-z13.txt -=================================================================== ---- /dev/null -+++ llvm-36/test/MC/Disassembler/SystemZ/insns-z13.txt -@@ -0,0 +1,3315 @@ -+# Test z13 instructions that don't have PC-relative operands. -+# RUN: llvm-mc --disassemble %s -triple=s390x-linux-gnu -mcpu=z13 \ -+# RUN: | FileCheck %s -+ -+#CHECK: lcbb %r0, 0, 0 -+0xe7 0x00 0x00 0x00 0x00 0x27 -+ -+#CHECK: lcbb %r1, 2475(%r7,%r8), 12 -+0xe7 0x17 0x89 0xab 0xc0 0x27 -+ -+#CHECK: lcbb %r15, 4095(%r15,%r15), 15 -+0xe7 0xff 0xff 0xff 0xf0 0x27 -+ -+#CHECK: vab %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x00 0xf3 -+ -+#CHECK: vab %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x0a 0xf3 -+ -+#CHECK: vab %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x0e 0xf3 -+ -+#CHECK: vaccb %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x00 0xf1 -+ -+#CHECK: vaccb %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x0a 0xf1 -+ -+#CHECK: vaccb %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x0e 0xf1 -+ -+#CHECK: vacccq %v0, %v0, %v0, %v0 -+0xe7 0x00 0x04 0x00 0x00 0xb9 -+ -+#CHECK: vacccq %v3, %v20, %v5, %v22 -+0xe7 0x34 0x54 0x00 0x65 0xb9 -+ -+#CHECK: vacccq %v31, %v31, %v31, %v31 -+0xe7 0xff 0xf4 0x00 0xff 0xb9 -+ -+#CHECK: vaccf %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x20 0xf1 -+ -+#CHECK: vaccf %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x2a 0xf1 -+ -+#CHECK: vaccf %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x2e 0xf1 -+ -+#CHECK: vaccg %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x30 0xf1 -+ -+#CHECK: vaccg %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x3a 0xf1 -+ -+#CHECK: vaccg %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x3e 0xf1 -+ -+#CHECK: vacch %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x10 0xf1 -+ -+#CHECK: vacch %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x1a 0xf1 -+ -+#CHECK: vacch %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x1e 0xf1 -+ -+#CHECK: vaccq %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x40 0xf1 -+ -+#CHECK: vaccq %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x4a 0xf1 -+ -+#CHECK: vaccq %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x4e 0xf1 -+ -+#CHECK: vacq %v0, %v0, %v0, %v0 -+0xe7 0x00 0x04 0x00 0x00 0xbb -+ -+#CHECK: vacq %v3, %v20, %v5, %v22 -+0xe7 0x34 0x54 0x00 0x65 0xbb -+ -+#CHECK: vacq %v31, %v31, %v31, %v31 -+0xe7 0xff 0xf4 0x00 0xff 0xbb -+ -+#CHECK: vaf %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x20 0xf3 -+ -+#CHECK: vaf %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x2a 0xf3 -+ -+#CHECK: vaf %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x2e 0xf3 -+ -+#CHECK: vag %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x30 0xf3 -+ -+#CHECK: vag %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x3a 0xf3 -+ -+#CHECK: vag %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x3e 0xf3 -+ -+#CHECK: vah %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x10 0xf3 -+ -+#CHECK: vah %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x1a 0xf3 -+ -+#CHECK: vah %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x1e 0xf3 -+ -+#CHECK: vaq %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x40 0xf3 -+ -+#CHECK: vaq %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x4a 0xf3 -+ -+#CHECK: vaq %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x4e 0xf3 -+ -+#CHECK: vavgb %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x00 0xf2 -+ -+#CHECK: vavgb %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x0a 0xf2 -+ -+#CHECK: vavgb %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x0e 0xf2 -+ -+#CHECK: vavgf %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x20 0xf2 -+ -+#CHECK: vavgf %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x2a 0xf2 -+ -+#CHECK: vavgf %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x2e 0xf2 -+ -+#CHECK: vavgg %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x30 0xf2 -+ -+#CHECK: vavgg %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x3a 0xf2 -+ -+#CHECK: vavgg %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x3e 0xf2 -+ -+#CHECK: vavgh %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x10 0xf2 -+ -+#CHECK: vavgh %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x1a 0xf2 -+ -+#CHECK: vavgh %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x1e 0xf2 -+ -+#CHECK: vavglb %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x00 0xf0 -+ -+#CHECK: vavglb %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x0a 0xf0 -+ -+#CHECK: vavglb %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x0e 0xf0 -+ -+#CHECK: vavglf %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x20 0xf0 -+ -+#CHECK: vavglf %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x2a 0xf0 -+ -+#CHECK: vavglf %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x2e 0xf0 -+ -+#CHECK: vavglg %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x30 0xf0 -+ -+#CHECK: vavglg %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x3a 0xf0 -+ -+#CHECK: vavglg %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x3e 0xf0 -+ -+#CHECK: vavglh %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x10 0xf0 -+ -+#CHECK: vavglh %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x1a 0xf0 -+ -+#CHECK: vavglh %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x1e 0xf0 -+ -+#CHECK: vcdgb %v0, %v0, 0, 0 -+0xe7 0x00 0x00 0x00 0x30 0xc3 -+ -+#CHECK: vcdgb %v19, %v14, 4, 10 -+0xe7 0x3e 0x00 0xa4 0x38 0xc3 -+ -+#CHECK: vcdgb %v31, %v31, 7, 15 -+0xe7 0xff 0x00 0xf7 0x3c 0xc3 -+ -+#CHECK: vcdlgb %v0, %v0, 0, 0 -+0xe7 0x00 0x00 0x00 0x30 0xc1 -+ -+#CHECK: vcdlgb %v19, %v14, 4, 10 -+0xe7 0x3e 0x00 0xa4 0x38 0xc1 -+ -+#CHECK: vcdlgb %v31, %v31, 7, 15 -+0xe7 0xff 0x00 0xf7 0x3c 0xc1 -+ -+#CHECK: vceqb %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x00 0xf8 -+ -+#CHECK: vceqb %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x0a 0xf8 -+ -+#CHECK: vceqbs %v7, %v24, %v9 -+0xe7 0x78 0x90 0x10 0x04 0xf8 -+ -+#CHECK: vceqb %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x0e 0xf8 -+ -+#CHECK: vceqf %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x20 0xf8 -+ -+#CHECK: vceqf %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x2a 0xf8 -+ -+#CHECK: vceqfs %v7, %v24, %v9 -+0xe7 0x78 0x90 0x10 0x24 0xf8 -+ -+#CHECK: vceqf %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x2e 0xf8 -+ -+#CHECK: vceqg %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x30 0xf8 -+ -+#CHECK: vceqg %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x3a 0xf8 -+ -+#CHECK: vceqgs %v7, %v24, %v9 -+0xe7 0x78 0x90 0x10 0x34 0xf8 -+ -+#CHECK: vceqg %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x3e 0xf8 -+ -+#CHECK: vceqh %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x10 0xf8 -+ -+#CHECK: vceqh %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x1a 0xf8 -+ -+#CHECK: vceqhs %v7, %v24, %v9 -+0xe7 0x78 0x90 0x10 0x14 0xf8 -+ -+#CHECK: vceqh %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x1e 0xf8 -+ -+#CHECK: vcgdb %v0, %v0, 0, 0 -+0xe7 0x00 0x00 0x00 0x30 0xc2 -+ -+#CHECK: vcgdb %v19, %v14, 4, 10 -+0xe7 0x3e 0x00 0xa4 0x38 0xc2 -+ -+#CHECK: vcgdb %v31, %v31, 7, 15 -+0xe7 0xff 0x00 0xf7 0x3c 0xc2 -+ -+#CHECK: vchb %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x00 0xfb -+ -+#CHECK: vchb %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x0a 0xfb -+ -+#CHECK: vchbs %v7, %v24, %v9 -+0xe7 0x78 0x90 0x10 0x04 0xfb -+ -+#CHECK: vchb %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x0e 0xfb -+ -+#CHECK: vchf %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x20 0xfb -+ -+#CHECK: vchf %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x2a 0xfb -+ -+#CHECK: vchfs %v7, %v24, %v9 -+0xe7 0x78 0x90 0x10 0x24 0xfb -+ -+#CHECK: vchf %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x2e 0xfb -+ -+#CHECK: vchg %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x30 0xfb -+ -+#CHECK: vchg %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x3a 0xfb -+ -+#CHECK: vchgs %v7, %v24, %v9 -+0xe7 0x78 0x90 0x10 0x34 0xfb -+ -+#CHECK: vchg %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x3e 0xfb -+ -+#CHECK: vchh %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x10 0xfb -+ -+#CHECK: vchh %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x1a 0xfb -+ -+#CHECK: vchhs %v7, %v24, %v9 -+0xe7 0x78 0x90 0x10 0x14 0xfb -+ -+#CHECK: vchh %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x1e 0xfb -+ -+#CHECK: vchlb %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x00 0xf9 -+ -+#CHECK: vchlb %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x0a 0xf9 -+ -+#CHECK: vchlbs %v7, %v24, %v9 -+0xe7 0x78 0x90 0x10 0x04 0xf9 -+ -+#CHECK: vchlb %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x0e 0xf9 -+ -+#CHECK: vchlf %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x20 0xf9 -+ -+#CHECK: vchlf %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x2a 0xf9 -+ -+#CHECK: vchlfs %v7, %v24, %v9 -+0xe7 0x78 0x90 0x10 0x24 0xf9 -+ -+#CHECK: vchlf %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x2e 0xf9 -+ -+#CHECK: vchlg %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x30 0xf9 -+ -+#CHECK: vchlg %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x3a 0xf9 -+ -+#CHECK: vchlgs %v7, %v24, %v9 -+0xe7 0x78 0x90 0x10 0x34 0xf9 -+ -+#CHECK: vchlg %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x3e 0xf9 -+ -+#CHECK: vchlh %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x10 0xf9 -+ -+#CHECK: vchlh %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x1a 0xf9 -+ -+#CHECK: vchlhs %v7, %v24, %v9 -+0xe7 0x78 0x90 0x10 0x14 0xf9 -+ -+#CHECK: vchlh %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x1e 0xf9 -+ -+#CHECK: vcksm %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x00 0x66 -+ -+#CHECK: vcksm %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x0a 0x66 -+ -+#CHECK: vcksm %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x0e 0x66 -+ -+#CHECK: vclgdb %v0, %v0, 0, 0 -+0xe7 0x00 0x00 0x00 0x30 0xc0 -+ -+#CHECK: vclgdb %v19, %v14, 4, 10 -+0xe7 0x3e 0x00 0xa4 0x38 0xc0 -+ -+#CHECK: vclgdb %v31, %v31, 7, 15 -+0xe7 0xff 0x00 0xf7 0x3c 0xc0 -+ -+#CHECK: vclzb %v0, %v0 -+0xe7 0x00 0x00 0x00 0x00 0x53 -+ -+#CHECK: vclzb %v19, %v14 -+0xe7 0x3e 0x00 0x00 0x08 0x53 -+ -+#CHECK: vclzb %v31, %v31 -+0xe7 0xff 0x00 0x00 0x0c 0x53 -+ -+#CHECK: vclzf %v0, %v0 -+0xe7 0x00 0x00 0x00 0x20 0x53 -+ -+#CHECK: vclzf %v19, %v14 -+0xe7 0x3e 0x00 0x00 0x28 0x53 -+ -+#CHECK: vclzf %v31, %v31 -+0xe7 0xff 0x00 0x00 0x2c 0x53 -+ -+#CHECK: vclzg %v0, %v0 -+0xe7 0x00 0x00 0x00 0x30 0x53 -+ -+#CHECK: vclzg %v19, %v14 -+0xe7 0x3e 0x00 0x00 0x38 0x53 -+ -+#CHECK: vclzg %v31, %v31 -+0xe7 0xff 0x00 0x00 0x3c 0x53 -+ -+#CHECK: vclzh %v0, %v0 -+0xe7 0x00 0x00 0x00 0x10 0x53 -+ -+#CHECK: vclzh %v19, %v14 -+0xe7 0x3e 0x00 0x00 0x18 0x53 -+ -+#CHECK: vclzh %v31, %v31 -+0xe7 0xff 0x00 0x00 0x1c 0x53 -+ -+#CHECK: vctzb %v0, %v0 -+0xe7 0x00 0x00 0x00 0x00 0x52 -+ -+#CHECK: vctzb %v19, %v14 -+0xe7 0x3e 0x00 0x00 0x08 0x52 -+ -+#CHECK: vctzb %v31, %v31 -+0xe7 0xff 0x00 0x00 0x0c 0x52 -+ -+#CHECK: vctzf %v0, %v0 -+0xe7 0x00 0x00 0x00 0x20 0x52 -+ -+#CHECK: vctzf %v19, %v14 -+0xe7 0x3e 0x00 0x00 0x28 0x52 -+ -+#CHECK: vctzf %v31, %v31 -+0xe7 0xff 0x00 0x00 0x2c 0x52 -+ -+#CHECK: vctzg %v0, %v0 -+0xe7 0x00 0x00 0x00 0x30 0x52 -+ -+#CHECK: vctzg %v19, %v14 -+0xe7 0x3e 0x00 0x00 0x38 0x52 -+ -+#CHECK: vctzg %v31, %v31 -+0xe7 0xff 0x00 0x00 0x3c 0x52 -+ -+#CHECK: vctzh %v0, %v0 -+0xe7 0x00 0x00 0x00 0x10 0x52 -+ -+#CHECK: vctzh %v19, %v14 -+0xe7 0x3e 0x00 0x00 0x18 0x52 -+ -+#CHECK: vctzh %v31, %v31 -+0xe7 0xff 0x00 0x00 0x1c 0x52 -+ -+#CHECK: vecb %v0, %v0 -+0xe7 0x00 0x00 0x00 0x00 0xdb -+ -+#CHECK: vecb %v19, %v14 -+0xe7 0x3e 0x00 0x00 0x08 0xdb -+ -+#CHECK: vecb %v31, %v31 -+0xe7 0xff 0x00 0x00 0x0c 0xdb -+ -+#CHECK: vecf %v0, %v0 -+0xe7 0x00 0x00 0x00 0x20 0xdb -+ -+#CHECK: vecf %v19, %v14 -+0xe7 0x3e 0x00 0x00 0x28 0xdb -+ -+#CHECK: vecf %v31, %v31 -+0xe7 0xff 0x00 0x00 0x2c 0xdb -+ -+#CHECK: vecg %v0, %v0 -+0xe7 0x00 0x00 0x00 0x30 0xdb -+ -+#CHECK: vecg %v19, %v14 -+0xe7 0x3e 0x00 0x00 0x38 0xdb -+ -+#CHECK: vecg %v31, %v31 -+0xe7 0xff 0x00 0x00 0x3c 0xdb -+ -+#CHECK: vech %v0, %v0 -+0xe7 0x00 0x00 0x00 0x10 0xdb -+ -+#CHECK: vech %v19, %v14 -+0xe7 0x3e 0x00 0x00 0x18 0xdb -+ -+#CHECK: vech %v31, %v31 -+0xe7 0xff 0x00 0x00 0x1c 0xdb -+ -+#CHECK: veclb %v0, %v0 -+0xe7 0x00 0x00 0x00 0x00 0xd9 -+ -+#CHECK: veclb %v19, %v14 -+0xe7 0x3e 0x00 0x00 0x08 0xd9 -+ -+#CHECK: veclb %v31, %v31 -+0xe7 0xff 0x00 0x00 0x0c 0xd9 -+ -+#CHECK: veclf %v0, %v0 -+0xe7 0x00 0x00 0x00 0x20 0xd9 -+ -+#CHECK: veclf %v19, %v14 -+0xe7 0x3e 0x00 0x00 0x28 0xd9 -+ -+#CHECK: veclf %v31, %v31 -+0xe7 0xff 0x00 0x00 0x2c 0xd9 -+ -+#CHECK: veclg %v0, %v0 -+0xe7 0x00 0x00 0x00 0x30 0xd9 -+ -+#CHECK: veclg %v19, %v14 -+0xe7 0x3e 0x00 0x00 0x38 0xd9 -+ -+#CHECK: veclg %v31, %v31 -+0xe7 0xff 0x00 0x00 0x3c 0xd9 -+ -+#CHECK: veclh %v0, %v0 -+0xe7 0x00 0x00 0x00 0x10 0xd9 -+ -+#CHECK: veclh %v19, %v14 -+0xe7 0x3e 0x00 0x00 0x18 0xd9 -+ -+#CHECK: veclh %v31, %v31 -+0xe7 0xff 0x00 0x00 0x1c 0xd9 -+ -+#CHECK: verimb %v0, %v0, %v0, 0 -+0xe7 0x00 0x00 0x00 0x00 0x72 -+ -+#CHECK: verimb %v3, %v20, %v5, 103 -+0xe7 0x34 0x50 0x67 0x04 0x72 -+ -+#CHECK: verimb %v31, %v31, %v31, 255 -+0xe7 0xff 0xf0 0xff 0x0e 0x72 -+ -+#CHECK: verimf %v0, %v0, %v0, 0 -+0xe7 0x00 0x00 0x00 0x20 0x72 -+ -+#CHECK: verimf %v3, %v20, %v5, 103 -+0xe7 0x34 0x50 0x67 0x24 0x72 -+ -+#CHECK: verimf %v31, %v31, %v31, 255 -+0xe7 0xff 0xf0 0xff 0x2e 0x72 -+ -+#CHECK: verimg %v0, %v0, %v0, 0 -+0xe7 0x00 0x00 0x00 0x30 0x72 -+ -+#CHECK: verimg %v3, %v20, %v5, 103 -+0xe7 0x34 0x50 0x67 0x34 0x72 -+ -+#CHECK: verimg %v31, %v31, %v31, 255 -+0xe7 0xff 0xf0 0xff 0x3e 0x72 -+ -+#CHECK: verimh %v0, %v0, %v0, 0 -+0xe7 0x00 0x00 0x00 0x10 0x72 -+ -+#CHECK: verimh %v3, %v20, %v5, 103 -+0xe7 0x34 0x50 0x67 0x14 0x72 -+ -+#CHECK: verimh %v31, %v31, %v31, 255 -+0xe7 0xff 0xf0 0xff 0x1e 0x72 -+ -+#CHECK: verllvb %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x00 0x73 -+ -+#CHECK: verllvb %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x0a 0x73 -+ -+#CHECK: verllvb %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x0e 0x73 -+ -+#CHECK: verllvf %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x20 0x73 -+ -+#CHECK: verllvf %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x2a 0x73 -+ -+#CHECK: verllvf %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x2e 0x73 -+ -+#CHECK: verllvg %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x30 0x73 -+ -+#CHECK: verllvg %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x3a 0x73 -+ -+#CHECK: verllvg %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x3e 0x73 -+ -+#CHECK: verllvh %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x10 0x73 -+ -+#CHECK: verllvh %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x1a 0x73 -+ -+#CHECK: verllvh %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x1e 0x73 -+ -+#CHECK: verllb %v0, %v0, 0 -+0xe7 0x00 0x00 0x00 0x00 0x33 -+ -+#CHECK: verllb %v12, %v18, 1110(%r3) -+0xe7 0xc2 0x34 0x56 0x04 0x33 -+ -+#CHECK: verllb %v31, %v31, 4095(%r15) -+0xe7 0xff 0xff 0xff 0x0c 0x33 -+ -+#CHECK: verllf %v0, %v0, 0 -+0xe7 0x00 0x00 0x00 0x20 0x33 -+ -+#CHECK: verllf %v12, %v18, 1110(%r3) -+0xe7 0xc2 0x34 0x56 0x24 0x33 -+ -+#CHECK: verllf %v31, %v31, 4095(%r15) -+0xe7 0xff 0xff 0xff 0x2c 0x33 -+ -+#CHECK: verllg %v0, %v0, 0 -+0xe7 0x00 0x00 0x00 0x30 0x33 -+ -+#CHECK: verllg %v12, %v18, 1110(%r3) -+0xe7 0xc2 0x34 0x56 0x34 0x33 -+ -+#CHECK: verllg %v31, %v31, 4095(%r15) -+0xe7 0xff 0xff 0xff 0x3c 0x33 -+ -+#CHECK: verllh %v0, %v0, 0 -+0xe7 0x00 0x00 0x00 0x10 0x33 -+ -+#CHECK: verllh %v12, %v18, 1110(%r3) -+0xe7 0xc2 0x34 0x56 0x14 0x33 -+ -+#CHECK: verllh %v31, %v31, 4095(%r15) -+0xe7 0xff 0xff 0xff 0x1c 0x33 -+ -+#CHECK: veslvb %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x00 0x70 -+ -+#CHECK: veslvb %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x0a 0x70 -+ -+#CHECK: veslvb %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x0e 0x70 -+ -+#CHECK: veslvf %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x20 0x70 -+ -+#CHECK: veslvf %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x2a 0x70 -+ -+#CHECK: veslvf %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x2e 0x70 -+ -+#CHECK: veslvg %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x30 0x70 -+ -+#CHECK: veslvg %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x3a 0x70 -+ -+#CHECK: veslvg %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x3e 0x70 -+ -+#CHECK: veslvh %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x10 0x70 -+ -+#CHECK: veslvh %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x1a 0x70 -+ -+#CHECK: veslvh %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x1e 0x70 -+ -+#CHECK: veslb %v0, %v0, 0 -+0xe7 0x00 0x00 0x00 0x00 0x30 -+ -+#CHECK: veslb %v12, %v18, 1110(%r3) -+0xe7 0xc2 0x34 0x56 0x04 0x30 -+ -+#CHECK: veslb %v31, %v31, 4095(%r15) -+0xe7 0xff 0xff 0xff 0x0c 0x30 -+ -+#CHECK: veslf %v0, %v0, 0 -+0xe7 0x00 0x00 0x00 0x20 0x30 -+ -+#CHECK: veslf %v12, %v18, 1110(%r3) -+0xe7 0xc2 0x34 0x56 0x24 0x30 -+ -+#CHECK: veslf %v31, %v31, 4095(%r15) -+0xe7 0xff 0xff 0xff 0x2c 0x30 -+ -+#CHECK: veslg %v0, %v0, 0 -+0xe7 0x00 0x00 0x00 0x30 0x30 -+ -+#CHECK: veslg %v12, %v18, 1110(%r3) -+0xe7 0xc2 0x34 0x56 0x34 0x30 -+ -+#CHECK: veslg %v31, %v31, 4095(%r15) -+0xe7 0xff 0xff 0xff 0x3c 0x30 -+ -+#CHECK: veslh %v0, %v0, 0 -+0xe7 0x00 0x00 0x00 0x10 0x30 -+ -+#CHECK: veslh %v12, %v18, 1110(%r3) -+0xe7 0xc2 0x34 0x56 0x14 0x30 -+ -+#CHECK: veslh %v31, %v31, 4095(%r15) -+0xe7 0xff 0xff 0xff 0x1c 0x30 -+ -+#CHECK: vesravb %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x00 0x7a -+ -+#CHECK: vesravb %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x0a 0x7a -+ -+#CHECK: vesravb %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x0e 0x7a -+ -+#CHECK: vesravf %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x20 0x7a -+ -+#CHECK: vesravf %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x2a 0x7a -+ -+#CHECK: vesravf %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x2e 0x7a -+ -+#CHECK: vesravg %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x30 0x7a -+ -+#CHECK: vesravg %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x3a 0x7a -+ -+#CHECK: vesravg %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x3e 0x7a -+ -+#CHECK: vesravh %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x10 0x7a -+ -+#CHECK: vesravh %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x1a 0x7a -+ -+#CHECK: vesravh %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x1e 0x7a -+ -+#CHECK: vesrab %v0, %v0, 0 -+0xe7 0x00 0x00 0x00 0x00 0x3a -+ -+#CHECK: vesrab %v12, %v18, 1110(%r3) -+0xe7 0xc2 0x34 0x56 0x04 0x3a -+ -+#CHECK: vesrab %v31, %v31, 4095(%r15) -+0xe7 0xff 0xff 0xff 0x0c 0x3a -+ -+#CHECK: vesraf %v0, %v0, 0 -+0xe7 0x00 0x00 0x00 0x20 0x3a -+ -+#CHECK: vesraf %v12, %v18, 1110(%r3) -+0xe7 0xc2 0x34 0x56 0x24 0x3a -+ -+#CHECK: vesraf %v31, %v31, 4095(%r15) -+0xe7 0xff 0xff 0xff 0x2c 0x3a -+ -+#CHECK: vesrag %v0, %v0, 0 -+0xe7 0x00 0x00 0x00 0x30 0x3a -+ -+#CHECK: vesrag %v12, %v18, 1110(%r3) -+0xe7 0xc2 0x34 0x56 0x34 0x3a -+ -+#CHECK: vesrag %v31, %v31, 4095(%r15) -+0xe7 0xff 0xff 0xff 0x3c 0x3a -+ -+#CHECK: vesrah %v0, %v0, 0 -+0xe7 0x00 0x00 0x00 0x10 0x3a -+ -+#CHECK: vesrah %v12, %v18, 1110(%r3) -+0xe7 0xc2 0x34 0x56 0x14 0x3a -+ -+#CHECK: vesrah %v31, %v31, 4095(%r15) -+0xe7 0xff 0xff 0xff 0x1c 0x3a -+ -+#CHECK: vesrlvb %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x00 0x78 -+ -+#CHECK: vesrlvb %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x0a 0x78 -+ -+#CHECK: vesrlvb %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x0e 0x78 -+ -+#CHECK: vesrlvf %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x20 0x78 -+ -+#CHECK: vesrlvf %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x2a 0x78 -+ -+#CHECK: vesrlvf %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x2e 0x78 -+ -+#CHECK: vesrlvg %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x30 0x78 -+ -+#CHECK: vesrlvg %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x3a 0x78 -+ -+#CHECK: vesrlvg %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x3e 0x78 -+ -+#CHECK: vesrlvh %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x10 0x78 -+ -+#CHECK: vesrlvh %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x1a 0x78 -+ -+#CHECK: vesrlvh %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x1e 0x78 -+ -+#CHECK: vesrlb %v0, %v0, 0 -+0xe7 0x00 0x00 0x00 0x00 0x38 -+ -+#CHECK: vesrlb %v12, %v18, 1110(%r3) -+0xe7 0xc2 0x34 0x56 0x04 0x38 -+ -+#CHECK: vesrlb %v31, %v31, 4095(%r15) -+0xe7 0xff 0xff 0xff 0x0c 0x38 -+ -+#CHECK: vesrlf %v0, %v0, 0 -+0xe7 0x00 0x00 0x00 0x20 0x38 -+ -+#CHECK: vesrlf %v12, %v18, 1110(%r3) -+0xe7 0xc2 0x34 0x56 0x24 0x38 -+ -+#CHECK: vesrlf %v31, %v31, 4095(%r15) -+0xe7 0xff 0xff 0xff 0x2c 0x38 -+ -+#CHECK: vesrlg %v0, %v0, 0 -+0xe7 0x00 0x00 0x00 0x30 0x38 -+ -+#CHECK: vesrlg %v12, %v18, 1110(%r3) -+0xe7 0xc2 0x34 0x56 0x34 0x38 -+ -+#CHECK: vesrlg %v31, %v31, 4095(%r15) -+0xe7 0xff 0xff 0xff 0x3c 0x38 -+ -+#CHECK: vesrlh %v0, %v0, 0 -+0xe7 0x00 0x00 0x00 0x10 0x38 -+ -+#CHECK: vesrlh %v12, %v18, 1110(%r3) -+0xe7 0xc2 0x34 0x56 0x14 0x38 -+ -+#CHECK: vesrlh %v31, %v31, 4095(%r15) -+0xe7 0xff 0xff 0xff 0x1c 0x38 -+ -+#CHECK: vfadb %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x30 0xe3 -+ -+#CHECK: vfadb %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x3a 0xe3 -+ -+#CHECK: vfadb %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x3e 0xe3 -+ -+#CHECK: vfaeb %v0, %v0, %v0, 0 -+0xe7 0x00 0x00 0x00 0x00 0x82 -+ -+#CHECK: vfaeb %v0, %v0, %v0, 12 -+0xe7 0x00 0x00 0xc0 0x00 0x82 -+ -+#CHECK: vfaeb %v18, %v3, %v20, 0 -+0xe7 0x23 0x40 0x00 0x0a 0x82 -+ -+#CHECK: vfaeb %v31, %v31, %v31, 4 -+0xe7 0xff 0xf0 0x40 0x0e 0x82 -+ -+#CHECK: vfaebs %v31, %v31, %v31, 8 -+0xe7 0xff 0xf0 0x90 0x0e 0x82 -+ -+#CHECK: vfaezb %v31, %v31, %v31, 4 -+0xe7 0xff 0xf0 0x60 0x0e 0x82 -+ -+#CHECK: vfaezbs %v31, %v31, %v31, 8 -+0xe7 0xff 0xf0 0xb0 0x0e 0x82 -+ -+#CHECK: vfaef %v0, %v0, %v0, 0 -+0xe7 0x00 0x00 0x00 0x20 0x82 -+ -+#CHECK: vfaef %v0, %v0, %v0, 12 -+0xe7 0x00 0x00 0xc0 0x20 0x82 -+ -+#CHECK: vfaef %v18, %v3, %v20, 0 -+0xe7 0x23 0x40 0x00 0x2a 0x82 -+ -+#CHECK: vfaef %v31, %v31, %v31, 4 -+0xe7 0xff 0xf0 0x40 0x2e 0x82 -+ -+#CHECK: vfaefs %v31, %v31, %v31, 8 -+0xe7 0xff 0xf0 0x90 0x2e 0x82 -+ -+#CHECK: vfaezf %v31, %v31, %v31, 4 -+0xe7 0xff 0xf0 0x60 0x2e 0x82 -+ -+#CHECK: vfaezfs %v31, %v31, %v31, 8 -+0xe7 0xff 0xf0 0xb0 0x2e 0x82 -+ -+#CHECK: vfaeh %v0, %v0, %v0, 0 -+0xe7 0x00 0x00 0x00 0x10 0x82 -+ -+#CHECK: vfaeh %v0, %v0, %v0, 12 -+0xe7 0x00 0x00 0xc0 0x10 0x82 -+ -+#CHECK: vfaeh %v18, %v3, %v20, 0 -+0xe7 0x23 0x40 0x00 0x1a 0x82 -+ -+#CHECK: vfaeh %v31, %v31, %v31, 4 -+0xe7 0xff 0xf0 0x40 0x1e 0x82 -+ -+#CHECK: vfaehs %v31, %v31, %v31, 8 -+0xe7 0xff 0xf0 0x90 0x1e 0x82 -+ -+#CHECK: vfaezh %v31, %v31, %v31, 4 -+0xe7 0xff 0xf0 0x60 0x1e 0x82 -+ -+#CHECK: vfaezhs %v31, %v31, %v31, 8 -+0xe7 0xff 0xf0 0xb0 0x1e 0x82 -+ -+#CHECK: vfcedb %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x30 0xe8 -+ -+#CHECK: vfcedb %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x3a 0xe8 -+ -+#CHECK: vfcedb %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x3e 0xe8 -+ -+#CHECK: vfcedbs %v0, %v0, %v0 -+0xe7 0x00 0x00 0x10 0x30 0xe8 -+ -+#CHECK: vfcedbs %v18, %v3, %v20 -+0xe7 0x23 0x40 0x10 0x3a 0xe8 -+ -+#CHECK: vfcedbs %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x10 0x3e 0xe8 -+ -+#CHECK: vfchdb %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x30 0xeb -+ -+#CHECK: vfchdb %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x3a 0xeb -+ -+#CHECK: vfchdb %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x3e 0xeb -+ -+#CHECK: vfchdbs %v0, %v0, %v0 -+0xe7 0x00 0x00 0x10 0x30 0xeb -+ -+#CHECK: vfchdbs %v18, %v3, %v20 -+0xe7 0x23 0x40 0x10 0x3a 0xeb -+ -+#CHECK: vfchdbs %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x10 0x3e 0xeb -+ -+#CHECK: vfchedb %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x30 0xea -+ -+#CHECK: vfchedb %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x3a 0xea -+ -+#CHECK: vfchedb %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x3e 0xea -+ -+#CHECK: vfchedbs %v0, %v0, %v0 -+0xe7 0x00 0x00 0x10 0x30 0xea -+ -+#CHECK: vfchedbs %v18, %v3, %v20 -+0xe7 0x23 0x40 0x10 0x3a 0xea -+ -+#CHECK: vfchedbs %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x10 0x3e 0xea -+ -+#CHECK: vfddb %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x30 0xe5 -+ -+#CHECK: vfddb %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x3a 0xe5 -+ -+#CHECK: vfddb %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x3e 0xe5 -+ -+#CHECK: vfeeb %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x00 0x80 -+ -+#CHECK: vfeeb %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x0a 0x80 -+ -+#CHECK: vfeebs %v7, %v24, %v9 -+0xe7 0x78 0x90 0x10 0x04 0x80 -+ -+#CHECK: vfeezb %v18, %v3, %v20 -+0xe7 0x23 0x40 0x20 0x0a 0x80 -+ -+#CHECK: vfeezbs %v7, %v24, %v9 -+0xe7 0x78 0x90 0x30 0x04 0x80 -+ -+#CHECK: vfeeb %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x0e 0x80 -+ -+#CHECK: vfeef %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x20 0x80 -+ -+#CHECK: vfeef %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x2a 0x80 -+ -+#CHECK: vfeefs %v7, %v24, %v9 -+0xe7 0x78 0x90 0x10 0x24 0x80 -+ -+#CHECK: vfeezf %v18, %v3, %v20 -+0xe7 0x23 0x40 0x20 0x2a 0x80 -+ -+#CHECK: vfeezfs %v7, %v24, %v9 -+0xe7 0x78 0x90 0x30 0x24 0x80 -+ -+#CHECK: vfeef %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x2e 0x80 -+ -+#CHECK: vfeeh %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x10 0x80 -+ -+#CHECK: vfeeh %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x1a 0x80 -+ -+#CHECK: vfeehs %v7, %v24, %v9 -+0xe7 0x78 0x90 0x10 0x14 0x80 -+ -+#CHECK: vfeezh %v18, %v3, %v20 -+0xe7 0x23 0x40 0x20 0x1a 0x80 -+ -+#CHECK: vfeezhs %v7, %v24, %v9 -+0xe7 0x78 0x90 0x30 0x14 0x80 -+ -+#CHECK: vfeeh %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x1e 0x80 -+ -+#CHECK: vfeneb %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x00 0x81 -+ -+#CHECK: vfeneb %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x0a 0x81 -+ -+#CHECK: vfenebs %v7, %v24, %v9 -+0xe7 0x78 0x90 0x10 0x04 0x81 -+ -+#CHECK: vfenezb %v18, %v3, %v20 -+0xe7 0x23 0x40 0x20 0x0a 0x81 -+ -+#CHECK: vfenezbs %v7, %v24, %v9 -+0xe7 0x78 0x90 0x30 0x04 0x81 -+ -+#CHECK: vfeneb %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x0e 0x81 -+ -+#CHECK: vfenef %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x20 0x81 -+ -+#CHECK: vfenef %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x2a 0x81 -+ -+#CHECK: vfenefs %v7, %v24, %v9 -+0xe7 0x78 0x90 0x10 0x24 0x81 -+ -+#CHECK: vfenezf %v18, %v3, %v20 -+0xe7 0x23 0x40 0x20 0x2a 0x81 -+ -+#CHECK: vfenezfs %v7, %v24, %v9 -+0xe7 0x78 0x90 0x30 0x24 0x81 -+ -+#CHECK: vfenef %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x2e 0x81 -+ -+#CHECK: vfeneh %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x10 0x81 -+ -+#CHECK: vfeneh %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x1a 0x81 -+ -+#CHECK: vfenehs %v7, %v24, %v9 -+0xe7 0x78 0x90 0x10 0x14 0x81 -+ -+#CHECK: vfenezh %v18, %v3, %v20 -+0xe7 0x23 0x40 0x20 0x1a 0x81 -+ -+#CHECK: vfenezhs %v7, %v24, %v9 -+0xe7 0x78 0x90 0x30 0x14 0x81 -+ -+#CHECK: vfeneh %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x1e 0x81 -+ -+#CHECK: vfidb %v0, %v0, 0, 0 -+0xe7 0x00 0x00 0x00 0x30 0xc7 -+ -+#CHECK: vfidb %v19, %v14, 4, 10 -+0xe7 0x3e 0x00 0xa4 0x38 0xc7 -+ -+#CHECK: vfidb %v31, %v31, 7, 15 -+0xe7 0xff 0x00 0xf7 0x3c 0xc7 -+ -+#CHECK: vistrb %v0, %v0 -+0xe7 0x00 0x00 0x00 0x00 0x5c -+ -+#CHECK: vistrb %v18, %v3 -+0xe7 0x23 0x00 0x00 0x08 0x5c -+ -+#CHECK: vistrbs %v7, %v24 -+0xe7 0x78 0x00 0x10 0x04 0x5c -+ -+#CHECK: vistrb %v31, %v31 -+0xe7 0xff 0x00 0x00 0x0c 0x5c -+ -+#CHECK: vistrf %v0, %v0 -+0xe7 0x00 0x00 0x00 0x20 0x5c -+ -+#CHECK: vistrf %v18, %v3 -+0xe7 0x23 0x00 0x00 0x28 0x5c -+ -+#CHECK: vistrfs %v7, %v24 -+0xe7 0x78 0x00 0x10 0x24 0x5c -+ -+#CHECK: vistrf %v31, %v31 -+0xe7 0xff 0x00 0x00 0x2c 0x5c -+ -+#CHECK: vistrh %v0, %v0 -+0xe7 0x00 0x00 0x00 0x10 0x5c -+ -+#CHECK: vistrh %v18, %v3 -+0xe7 0x23 0x00 0x00 0x18 0x5c -+ -+#CHECK: vistrhs %v7, %v24 -+0xe7 0x78 0x00 0x10 0x14 0x5c -+ -+#CHECK: vistrh %v31, %v31 -+0xe7 0xff 0x00 0x00 0x1c 0x5c -+ -+#CHECK: vfmadb %v0, %v0, %v0, %v0 -+0xe7 0x00 0x03 0x00 0x00 0x8f -+ -+#CHECK: vfmadb %v3, %v20, %v5, %v22 -+0xe7 0x34 0x53 0x00 0x65 0x8f -+ -+#CHECK: vfmadb %v31, %v31, %v31, %v31 -+0xe7 0xff 0xf3 0x00 0xff 0x8f -+ -+#CHECK: vfmdb %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x30 0xe7 -+ -+#CHECK: vfmdb %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x3a 0xe7 -+ -+#CHECK: vfmdb %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x3e 0xe7 -+ -+#CHECK: vfmsdb %v0, %v0, %v0, %v0 -+0xe7 0x00 0x03 0x00 0x00 0x8e -+ -+#CHECK: vfmsdb %v3, %v20, %v5, %v22 -+0xe7 0x34 0x53 0x00 0x65 0x8e -+ -+#CHECK: vfmsdb %v31, %v31, %v31, %v31 -+0xe7 0xff 0xf3 0x00 0xff 0x8e -+ -+#CHECK: vfsdb %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x30 0xe2 -+ -+#CHECK: vfsdb %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x3a 0xe2 -+ -+#CHECK: vfsdb %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x3e 0xe2 -+ -+#CHECK: vzero %v0 -+0xe7 0x00 0x00 0x00 0x00 0x44 -+ -+#CHECK: vgbm %v0, 1 -+0xe7 0x00 0x00 0x01 0x00 0x44 -+ -+#CHECK: vgbm %v0, 65534 -+0xe7 0x00 0xff 0xfe 0x00 0x44 -+ -+#CHECK: vone %v0 -+0xe7 0x00 0xff 0xff 0x00 0x44 -+ -+#CHECK: vgbm %v17, 4660 -+0xe7 0x10 0x12 0x34 0x08 0x44 -+ -+#CHECK: vone %v31 -+0xe7 0xf0 0xff 0xff 0x08 0x44 -+ -+#CHECK: vgef %v0, 0(%v0), 0 -+0xe7 0x00 0x00 0x00 0x00 0x13 -+ -+#CHECK: vgef %v10, 1000(%v19,%r7), 2 -+0xe7 0xa3 0x73 0xe8 0x24 0x13 -+ -+#CHECK: vgef %v31, 4095(%v31,%r15), 3 -+0xe7 0xff 0xff 0xff 0x3c 0x13 -+ -+#CHECK: vgeg %v0, 0(%v0), 0 -+0xe7 0x00 0x00 0x00 0x00 0x12 -+ -+#CHECK: vgeg %v10, 1000(%v19,%r7), 1 -+0xe7 0xa3 0x73 0xe8 0x14 0x12 -+ -+#CHECK: vgeg %v31, 4095(%v31,%r15), 1 -+0xe7 0xff 0xff 0xff 0x1c 0x12 -+ -+#CHECK: vgfmab %v0, %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x00 0xbc -+ -+#CHECK: vgfmab %v3, %v20, %v5, %v22 -+0xe7 0x34 0x50 0x00 0x65 0xbc -+ -+#CHECK: vgfmab %v31, %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0xff 0xbc -+ -+#CHECK: vgfmaf %v0, %v0, %v0, %v0 -+0xe7 0x00 0x02 0x00 0x00 0xbc -+ -+#CHECK: vgfmaf %v3, %v20, %v5, %v22 -+0xe7 0x34 0x52 0x00 0x65 0xbc -+ -+#CHECK: vgfmaf %v31, %v31, %v31, %v31 -+0xe7 0xff 0xf2 0x00 0xff 0xbc -+ -+#CHECK: vgfmag %v0, %v0, %v0, %v0 -+0xe7 0x00 0x03 0x00 0x00 0xbc -+ -+#CHECK: vgfmag %v3, %v20, %v5, %v22 -+0xe7 0x34 0x53 0x00 0x65 0xbc -+ -+#CHECK: vgfmag %v31, %v31, %v31, %v31 -+0xe7 0xff 0xf3 0x00 0xff 0xbc -+ -+#CHECK: vgfmah %v0, %v0, %v0, %v0 -+0xe7 0x00 0x01 0x00 0x00 0xbc -+ -+#CHECK: vgfmah %v3, %v20, %v5, %v22 -+0xe7 0x34 0x51 0x00 0x65 0xbc -+ -+#CHECK: vgfmah %v31, %v31, %v31, %v31 -+0xe7 0xff 0xf1 0x00 0xff 0xbc -+ -+#CHECK: vgfmb %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x00 0xb4 -+ -+#CHECK: vgfmb %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x0a 0xb4 -+ -+#CHECK: vgfmb %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x0e 0xb4 -+ -+#CHECK: vgfmf %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x20 0xb4 -+ -+#CHECK: vgfmf %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x2a 0xb4 -+ -+#CHECK: vgfmf %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x2e 0xb4 -+ -+#CHECK: vgfmg %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x30 0xb4 -+ -+#CHECK: vgfmg %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x3a 0xb4 -+ -+#CHECK: vgfmg %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x3e 0xb4 -+ -+#CHECK: vgfmh %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x10 0xb4 -+ -+#CHECK: vgfmh %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x1a 0xb4 -+ -+#CHECK: vgfmh %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x1e 0xb4 -+ -+#CHECK: vgmb %v0, 0, 0 -+0xe7 0x00 0x00 0x00 0x00 0x46 -+ -+#CHECK: vgmb %v22, 55, 66 -+0xe7 0x60 0x37 0x42 0x08 0x46 -+ -+#CHECK: vgmb %v31, 255, 255 -+0xe7 0xf0 0xff 0xff 0x08 0x46 -+ -+#CHECK: vgmf %v0, 0, 0 -+0xe7 0x00 0x00 0x00 0x20 0x46 -+ -+#CHECK: vgmf %v22, 55, 66 -+0xe7 0x60 0x37 0x42 0x28 0x46 -+ -+#CHECK: vgmf %v31, 255, 255 -+0xe7 0xf0 0xff 0xff 0x28 0x46 -+ -+#CHECK: vgmg %v0, 0, 0 -+0xe7 0x00 0x00 0x00 0x30 0x46 -+ -+#CHECK: vgmg %v22, 55, 66 -+0xe7 0x60 0x37 0x42 0x38 0x46 -+ -+#CHECK: vgmg %v31, 255, 255 -+0xe7 0xf0 0xff 0xff 0x38 0x46 -+ -+#CHECK: vgmh %v0, 0, 0 -+0xe7 0x00 0x00 0x00 0x10 0x46 -+ -+#CHECK: vgmh %v22, 55, 66 -+0xe7 0x60 0x37 0x42 0x18 0x46 -+ -+#CHECK: vgmh %v31, 255, 255 -+0xe7 0xf0 0xff 0xff 0x18 0x46 -+ -+#CHECK: vl %v0, 0 -+0xe7 0x00 0x00 0x00 0x00 0x06 -+ -+#CHECK: vl %v17, 2475(%r7,%r8) -+0xe7 0x17 0x89 0xab 0x08 0x06 -+ -+#CHECK: vl %v31, 4095(%r15,%r15) -+0xe7 0xff 0xff 0xff 0x08 0x06 -+ -+#CHECK: vlbb %v0, 0, 0 -+0xe7 0x00 0x00 0x00 0x00 0x07 -+ -+#CHECK: vlbb %v17, 2475(%r7,%r8), 12 -+0xe7 0x17 0x89 0xab 0xc8 0x07 -+ -+#CHECK: vlbb %v31, 4095(%r15,%r15), 15 -+0xe7 0xff 0xff 0xff 0xf8 0x07 -+ -+#CHECK: vlcb %v0, %v0 -+0xe7 0x00 0x00 0x00 0x00 0xde -+ -+#CHECK: vlcb %v19, %v14 -+0xe7 0x3e 0x00 0x00 0x08 0xde -+ -+#CHECK: vlcb %v31, %v31 -+0xe7 0xff 0x00 0x00 0x0c 0xde -+ -+#CHECK: vlcf %v0, %v0 -+0xe7 0x00 0x00 0x00 0x20 0xde -+ -+#CHECK: vlcf %v19, %v14 -+0xe7 0x3e 0x00 0x00 0x28 0xde -+ -+#CHECK: vlcf %v31, %v31 -+0xe7 0xff 0x00 0x00 0x2c 0xde -+ -+#CHECK: vlcg %v0, %v0 -+0xe7 0x00 0x00 0x00 0x30 0xde -+ -+#CHECK: vlcg %v19, %v14 -+0xe7 0x3e 0x00 0x00 0x38 0xde -+ -+#CHECK: vlcg %v31, %v31 -+0xe7 0xff 0x00 0x00 0x3c 0xde -+ -+#CHECK: vlch %v0, %v0 -+0xe7 0x00 0x00 0x00 0x10 0xde -+ -+#CHECK: vlch %v19, %v14 -+0xe7 0x3e 0x00 0x00 0x18 0xde -+ -+#CHECK: vlch %v31, %v31 -+0xe7 0xff 0x00 0x00 0x1c 0xde -+ -+#CHECK: vldeb %v0, %v0 -+0xe7 0x00 0x00 0x00 0x20 0xc4 -+ -+#CHECK: vldeb %v19, %v14 -+0xe7 0x3e 0x00 0x00 0x28 0xc4 -+ -+#CHECK: vldeb %v31, %v31 -+0xe7 0xff 0x00 0x00 0x2c 0xc4 -+ -+#CHECK: vleb %v0, 0, 0 -+0xe7 0x00 0x00 0x00 0x00 0x00 -+ -+#CHECK: vleb %v17, 2475(%r7,%r8), 12 -+0xe7 0x17 0x89 0xab 0xc8 0x00 -+ -+#CHECK: vleb %v31, 4095(%r15,%r15), 15 -+0xe7 0xff 0xff 0xff 0xf8 0x00 -+ -+#CHECK: vledb %v0, %v0, 0, 0 -+0xe7 0x00 0x00 0x00 0x30 0xc5 -+ -+#CHECK: vledb %v19, %v14, 4, 10 -+0xe7 0x3e 0x00 0xa4 0x38 0xc5 -+ -+#CHECK: vledb %v31, %v31, 7, 15 -+0xe7 0xff 0x00 0xf7 0x3c 0xc5 -+ -+#CHECK: vlef %v0, 0, 0 -+0xe7 0x00 0x00 0x00 0x00 0x03 -+ -+#CHECK: vlef %v17, 2475(%r7,%r8), 2 -+0xe7 0x17 0x89 0xab 0x28 0x03 -+ -+#CHECK: vlef %v31, 4095(%r15,%r15), 3 -+0xe7 0xff 0xff 0xff 0x38 0x03 -+ -+#CHECK: vleg %v0, 0, 0 -+0xe7 0x00 0x00 0x00 0x00 0x02 -+ -+#CHECK: vleg %v17, 2475(%r7,%r8), 1 -+0xe7 0x17 0x89 0xab 0x18 0x02 -+ -+#CHECK: vleg %v31, 4095(%r15,%r15), 1 -+0xe7 0xff 0xff 0xff 0x18 0x02 -+ -+#CHECK: vleh %v0, 0, 0 -+0xe7 0x00 0x00 0x00 0x00 0x01 -+ -+#CHECK: vleh %v17, 2475(%r7,%r8), 5 -+0xe7 0x17 0x89 0xab 0x58 0x01 -+ -+#CHECK: vleh %v31, 4095(%r15,%r15), 7 -+0xe7 0xff 0xff 0xff 0x78 0x01 -+ -+#CHECK: vleib %v0, 0, 0 -+0xe7 0x00 0x00 0x00 0x00 0x40 -+ -+#CHECK: vleib %v23, -30293, 12 -+0xe7 0x70 0x89 0xab 0xc8 0x40 -+ -+#CHECK: vleib %v31, -1, 15 -+0xe7 0xf0 0xff 0xff 0xf8 0x40 -+ -+#CHECK: vleif %v0, 0, 0 -+0xe7 0x00 0x00 0x00 0x00 0x43 -+ -+#CHECK: vleif %v23, -30293, 2 -+0xe7 0x70 0x89 0xab 0x28 0x43 -+ -+#CHECK: vleif %v31, -1, 3 -+0xe7 0xf0 0xff 0xff 0x38 0x43 -+ -+#CHECK: vleig %v0, 0, 0 -+0xe7 0x00 0x00 0x00 0x00 0x42 -+ -+#CHECK: vleig %v23, -30293, 1 -+0xe7 0x70 0x89 0xab 0x18 0x42 -+ -+#CHECK: vleig %v31, -1, 1 -+0xe7 0xf0 0xff 0xff 0x18 0x42 -+ -+#CHECK: vleih %v0, 0, 0 -+0xe7 0x00 0x00 0x00 0x00 0x41 -+ -+#CHECK: vleih %v23, -30293, 5 -+0xe7 0x70 0x89 0xab 0x58 0x41 -+ -+#CHECK: vleih %v31, -1, 7 -+0xe7 0xf0 0xff 0xff 0x78 0x41 -+ -+#CHECK: vflcdb %v0, %v0 -+0xe7 0x00 0x00 0x00 0x30 0xcc -+ -+#CHECK: vflcdb %v19, %v14 -+0xe7 0x3e 0x00 0x00 0x38 0xcc -+ -+#CHECK: vflcdb %v31, %v31 -+0xe7 0xff 0x00 0x00 0x3c 0xcc -+ -+#CHECK: vflndb %v0, %v0 -+0xe7 0x00 0x00 0x10 0x30 0xcc -+ -+#CHECK: vflndb %v19, %v14 -+0xe7 0x3e 0x00 0x10 0x38 0xcc -+ -+#CHECK: vflndb %v31, %v31 -+0xe7 0xff 0x00 0x10 0x3c 0xcc -+ -+#CHECK: vflpdb %v0, %v0 -+0xe7 0x00 0x00 0x20 0x30 0xcc -+ -+#CHECK: vflpdb %v19, %v14 -+0xe7 0x3e 0x00 0x20 0x38 0xcc -+ -+#CHECK: vflpdb %v31, %v31 -+0xe7 0xff 0x00 0x20 0x3c 0xcc -+ -+#CHECK: vlgvb %r0, %v0, 0 -+0xe7 0x00 0x00 0x00 0x00 0x21 -+ -+#CHECK: vlgvb %r2, %v19, 1383(%r4) -+0xe7 0x23 0x45 0x67 0x04 0x21 -+ -+#CHECK: vlgvb %r15, %v31, 4095(%r15) -+0xe7 0xff 0xff 0xff 0x04 0x21 -+ -+#CHECK: vlgvf %r0, %v0, 0 -+0xe7 0x00 0x00 0x00 0x20 0x21 -+ -+#CHECK: vlgvf %r2, %v19, 1383(%r4) -+0xe7 0x23 0x45 0x67 0x24 0x21 -+ -+#CHECK: vlgvf %r15, %v31, 4095(%r15) -+0xe7 0xff 0xff 0xff 0x24 0x21 -+ -+#CHECK: vlgvg %r0, %v0, 0 -+0xe7 0x00 0x00 0x00 0x30 0x21 -+ -+#CHECK: vlgvg %r2, %v19, 1383(%r4) -+0xe7 0x23 0x45 0x67 0x34 0x21 -+ -+#CHECK: vlgvg %r15, %v31, 4095(%r15) -+0xe7 0xff 0xff 0xff 0x34 0x21 -+ -+#CHECK: vlgvh %r0, %v0, 0 -+0xe7 0x00 0x00 0x00 0x10 0x21 -+ -+#CHECK: vlgvh %r2, %v19, 1383(%r4) -+0xe7 0x23 0x45 0x67 0x14 0x21 -+ -+#CHECK: vlgvh %r15, %v31, 4095(%r15) -+0xe7 0xff 0xff 0xff 0x14 0x21 -+ -+#CHECK: vfsqdb %v0, %v0 -+0xe7 0x00 0x00 0x00 0x30 0xce -+ -+#CHECK: vfsqdb %v19, %v14 -+0xe7 0x3e 0x00 0x00 0x38 0xce -+ -+#CHECK: vfsqdb %v31, %v31 -+0xe7 0xff 0x00 0x00 0x3c 0xce -+ -+#CHECK: vftcidb %v0, %v0, 0 -+0xe7 0x00 0x00 0x00 0x30 0x4a -+ -+#CHECK: vftcidb %v19, %v4, 1383 -+0xe7 0x34 0x56 0x70 0x38 0x4a -+ -+#CHECK: vftcidb %v31, %v31, 4095 -+0xe7 0xff 0xff 0xf0 0x3c 0x4a -+ -+#CHECK: vll %v0, %r0, 0 -+0xe7 0x00 0x00 0x00 0x00 0x37 -+ -+#CHECK: vll %v18, %r3, 1383(%r4) -+0xe7 0x23 0x45 0x67 0x08 0x37 -+ -+#CHECK: vll %v31, %r15, 4095(%r15) -+0xe7 0xff 0xff 0xff 0x08 0x37 -+ -+#CHECK: vllezb %v0, 0 -+0xe7 0x00 0x00 0x00 0x00 0x04 -+ -+#CHECK: vllezb %v17, 2475(%r7,%r8) -+0xe7 0x17 0x89 0xab 0x08 0x04 -+ -+#CHECK: vllezb %v31, 4095(%r15,%r15) -+0xe7 0xff 0xff 0xff 0x08 0x04 -+ -+#CHECK: vllezf %v0, 0 -+0xe7 0x00 0x00 0x00 0x20 0x04 -+ -+#CHECK: vllezf %v17, 2475(%r7,%r8) -+0xe7 0x17 0x89 0xab 0x28 0x04 -+ -+#CHECK: vllezf %v31, 4095(%r15,%r15) -+0xe7 0xff 0xff 0xff 0x28 0x04 -+ -+#CHECK: vllezg %v0, 0 -+0xe7 0x00 0x00 0x00 0x30 0x04 -+ -+#CHECK: vllezg %v17, 2475(%r7,%r8) -+0xe7 0x17 0x89 0xab 0x38 0x04 -+ -+#CHECK: vllezg %v31, 4095(%r15,%r15) -+0xe7 0xff 0xff 0xff 0x38 0x04 -+ -+#CHECK: vllezh %v0, 0 -+0xe7 0x00 0x00 0x00 0x10 0x04 -+ -+#CHECK: vllezh %v17, 2475(%r7,%r8) -+0xe7 0x17 0x89 0xab 0x18 0x04 -+ -+#CHECK: vllezh %v31, 4095(%r15,%r15) -+0xe7 0xff 0xff 0xff 0x18 0x04 -+ -+#CHECK: vlm %v0, %v0, 0 -+0xe7 0x00 0x00 0x00 0x00 0x36 -+ -+#CHECK: vlm %v12, %v18, 1110(%r3) -+0xe7 0xc2 0x34 0x56 0x04 0x36 -+ -+#CHECK: vlm %v31, %v31, 4095(%r15) -+0xe7 0xff 0xff 0xff 0x0c 0x36 -+ -+#CHECK: vlpb %v0, %v0 -+0xe7 0x00 0x00 0x00 0x00 0xdf -+ -+#CHECK: vlpb %v19, %v14 -+0xe7 0x3e 0x00 0x00 0x08 0xdf -+ -+#CHECK: vlpb %v31, %v31 -+0xe7 0xff 0x00 0x00 0x0c 0xdf -+ -+#CHECK: vlpf %v0, %v0 -+0xe7 0x00 0x00 0x00 0x20 0xdf -+ -+#CHECK: vlpf %v19, %v14 -+0xe7 0x3e 0x00 0x00 0x28 0xdf -+ -+#CHECK: vlpf %v31, %v31 -+0xe7 0xff 0x00 0x00 0x2c 0xdf -+ -+#CHECK: vlpg %v0, %v0 -+0xe7 0x00 0x00 0x00 0x30 0xdf -+ -+#CHECK: vlpg %v19, %v14 -+0xe7 0x3e 0x00 0x00 0x38 0xdf -+ -+#CHECK: vlpg %v31, %v31 -+0xe7 0xff 0x00 0x00 0x3c 0xdf -+ -+#CHECK: vlph %v0, %v0 -+0xe7 0x00 0x00 0x00 0x10 0xdf -+ -+#CHECK: vlph %v19, %v14 -+0xe7 0x3e 0x00 0x00 0x18 0xdf -+ -+#CHECK: vlph %v31, %v31 -+0xe7 0xff 0x00 0x00 0x1c 0xdf -+ -+#CHECK: vlr %v0, %v0 -+0xe7 0x00 0x00 0x00 0x00 0x56 -+ -+#CHECK: vlr %v19, %v14 -+0xe7 0x3e 0x00 0x00 0x08 0x56 -+ -+#CHECK: vlr %v31, %v31 -+0xe7 0xff 0x00 0x00 0x0c 0x56 -+ -+#CHECK: vlrepb %v0, 0 -+0xe7 0x00 0x00 0x00 0x00 0x05 -+ -+#CHECK: vlrepb %v17, 2475(%r7,%r8) -+0xe7 0x17 0x89 0xab 0x08 0x05 -+ -+#CHECK: vlrepb %v31, 4095(%r15,%r15) -+0xe7 0xff 0xff 0xff 0x08 0x05 -+ -+#CHECK: vlrepf %v0, 0 -+0xe7 0x00 0x00 0x00 0x20 0x05 -+ -+#CHECK: vlrepf %v17, 2475(%r7,%r8) -+0xe7 0x17 0x89 0xab 0x28 0x05 -+ -+#CHECK: vlrepf %v31, 4095(%r15,%r15) -+0xe7 0xff 0xff 0xff 0x28 0x05 -+ -+#CHECK: vlrepg %v0, 0 -+0xe7 0x00 0x00 0x00 0x30 0x05 -+ -+#CHECK: vlrepg %v17, 2475(%r7,%r8) -+0xe7 0x17 0x89 0xab 0x38 0x05 -+ -+#CHECK: vlrepg %v31, 4095(%r15,%r15) -+0xe7 0xff 0xff 0xff 0x38 0x05 -+ -+#CHECK: vlreph %v0, 0 -+0xe7 0x00 0x00 0x00 0x10 0x05 -+ -+#CHECK: vlreph %v17, 2475(%r7,%r8) -+0xe7 0x17 0x89 0xab 0x18 0x05 -+ -+#CHECK: vlreph %v31, 4095(%r15,%r15) -+0xe7 0xff 0xff 0xff 0x18 0x05 -+ -+#CHECK: vlvgb %v0, %r0, 0 -+0xe7 0x00 0x00 0x00 0x00 0x22 -+ -+#CHECK: vlvgb %v18, %r3, 1383(%r4) -+0xe7 0x23 0x45 0x67 0x08 0x22 -+ -+#CHECK: vlvgb %v31, %r15, 4095(%r15) -+0xe7 0xff 0xff 0xff 0x08 0x22 -+ -+#CHECK: vlvgf %v0, %r0, 0 -+0xe7 0x00 0x00 0x00 0x20 0x22 -+ -+#CHECK: vlvgf %v18, %r3, 1383(%r4) -+0xe7 0x23 0x45 0x67 0x28 0x22 -+ -+#CHECK: vlvgf %v31, %r15, 4095(%r15) -+0xe7 0xff 0xff 0xff 0x28 0x22 -+ -+#CHECK: vlvgg %v0, %r0, 0 -+0xe7 0x00 0x00 0x00 0x30 0x22 -+ -+#CHECK: vlvgg %v18, %r3, 1383(%r4) -+0xe7 0x23 0x45 0x67 0x38 0x22 -+ -+#CHECK: vlvgg %v31, %r15, 4095(%r15) -+0xe7 0xff 0xff 0xff 0x38 0x22 -+ -+#CHECK: vlvgh %v0, %r0, 0 -+0xe7 0x00 0x00 0x00 0x10 0x22 -+ -+#CHECK: vlvgh %v18, %r3, 1383(%r4) -+0xe7 0x23 0x45 0x67 0x18 0x22 -+ -+#CHECK: vlvgh %v31, %r15, 4095(%r15) -+0xe7 0xff 0xff 0xff 0x18 0x22 -+ -+#CHECK: vlvgp %v0, %r0, %r0 -+0xe7 0x00 0x00 0x00 0x00 0x62 -+ -+#CHECK: vlvgp %v18, %r3, %r4 -+0xe7 0x23 0x40 0x00 0x08 0x62 -+ -+#CHECK: vlvgp %v31, %r15, %r15 -+0xe7 0xff 0xf0 0x00 0x08 0x62 -+ -+#CHECK: vmaeb %v0, %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x00 0xae -+ -+#CHECK: vmaeb %v3, %v20, %v5, %v22 -+0xe7 0x34 0x50 0x00 0x65 0xae -+ -+#CHECK: vmaeb %v31, %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0xff 0xae -+ -+#CHECK: vmaef %v0, %v0, %v0, %v0 -+0xe7 0x00 0x02 0x00 0x00 0xae -+ -+#CHECK: vmaef %v3, %v20, %v5, %v22 -+0xe7 0x34 0x52 0x00 0x65 0xae -+ -+#CHECK: vmaef %v31, %v31, %v31, %v31 -+0xe7 0xff 0xf2 0x00 0xff 0xae -+ -+#CHECK: vmaeh %v0, %v0, %v0, %v0 -+0xe7 0x00 0x01 0x00 0x00 0xae -+ -+#CHECK: vmaeh %v3, %v20, %v5, %v22 -+0xe7 0x34 0x51 0x00 0x65 0xae -+ -+#CHECK: vmaeh %v31, %v31, %v31, %v31 -+0xe7 0xff 0xf1 0x00 0xff 0xae -+ -+#CHECK: vmahb %v0, %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x00 0xab -+ -+#CHECK: vmahb %v3, %v20, %v5, %v22 -+0xe7 0x34 0x50 0x00 0x65 0xab -+ -+#CHECK: vmahb %v31, %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0xff 0xab -+ -+#CHECK: vmahf %v0, %v0, %v0, %v0 -+0xe7 0x00 0x02 0x00 0x00 0xab -+ -+#CHECK: vmahf %v3, %v20, %v5, %v22 -+0xe7 0x34 0x52 0x00 0x65 0xab -+ -+#CHECK: vmahf %v31, %v31, %v31, %v31 -+0xe7 0xff 0xf2 0x00 0xff 0xab -+ -+#CHECK: vmahh %v0, %v0, %v0, %v0 -+0xe7 0x00 0x01 0x00 0x00 0xab -+ -+#CHECK: vmahh %v3, %v20, %v5, %v22 -+0xe7 0x34 0x51 0x00 0x65 0xab -+ -+#CHECK: vmahh %v31, %v31, %v31, %v31 -+0xe7 0xff 0xf1 0x00 0xff 0xab -+ -+#CHECK: vmalb %v0, %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x00 0xaa -+ -+#CHECK: vmalb %v3, %v20, %v5, %v22 -+0xe7 0x34 0x50 0x00 0x65 0xaa -+ -+#CHECK: vmalb %v31, %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0xff 0xaa -+ -+#CHECK: vmaleb %v0, %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x00 0xac -+ -+#CHECK: vmaleb %v3, %v20, %v5, %v22 -+0xe7 0x34 0x50 0x00 0x65 0xac -+ -+#CHECK: vmaleb %v31, %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0xff 0xac -+ -+#CHECK: vmalef %v0, %v0, %v0, %v0 -+0xe7 0x00 0x02 0x00 0x00 0xac -+ -+#CHECK: vmalef %v3, %v20, %v5, %v22 -+0xe7 0x34 0x52 0x00 0x65 0xac -+ -+#CHECK: vmalef %v31, %v31, %v31, %v31 -+0xe7 0xff 0xf2 0x00 0xff 0xac -+ -+#CHECK: vmaleh %v0, %v0, %v0, %v0 -+0xe7 0x00 0x01 0x00 0x00 0xac -+ -+#CHECK: vmaleh %v3, %v20, %v5, %v22 -+0xe7 0x34 0x51 0x00 0x65 0xac -+ -+#CHECK: vmaleh %v31, %v31, %v31, %v31 -+0xe7 0xff 0xf1 0x00 0xff 0xac -+ -+#CHECK: vmalf %v0, %v0, %v0, %v0 -+0xe7 0x00 0x02 0x00 0x00 0xaa -+ -+#CHECK: vmalf %v3, %v20, %v5, %v22 -+0xe7 0x34 0x52 0x00 0x65 0xaa -+ -+#CHECK: vmalf %v31, %v31, %v31, %v31 -+0xe7 0xff 0xf2 0x00 0xff 0xaa -+ -+#CHECK: vmalhb %v0, %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x00 0xa9 -+ -+#CHECK: vmalhb %v3, %v20, %v5, %v22 -+0xe7 0x34 0x50 0x00 0x65 0xa9 -+ -+#CHECK: vmalhb %v31, %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0xff 0xa9 -+ -+#CHECK: vmalhf %v0, %v0, %v0, %v0 -+0xe7 0x00 0x02 0x00 0x00 0xa9 -+ -+#CHECK: vmalhf %v3, %v20, %v5, %v22 -+0xe7 0x34 0x52 0x00 0x65 0xa9 -+ -+#CHECK: vmalhf %v31, %v31, %v31, %v31 -+0xe7 0xff 0xf2 0x00 0xff 0xa9 -+ -+#CHECK: vmalhh %v0, %v0, %v0, %v0 -+0xe7 0x00 0x01 0x00 0x00 0xa9 -+ -+#CHECK: vmalhh %v3, %v20, %v5, %v22 -+0xe7 0x34 0x51 0x00 0x65 0xa9 -+ -+#CHECK: vmalhh %v31, %v31, %v31, %v31 -+0xe7 0xff 0xf1 0x00 0xff 0xa9 -+ -+#CHECK: vmalhw %v0, %v0, %v0, %v0 -+0xe7 0x00 0x01 0x00 0x00 0xaa -+ -+#CHECK: vmalhw %v3, %v20, %v5, %v22 -+0xe7 0x34 0x51 0x00 0x65 0xaa -+ -+#CHECK: vmalhw %v31, %v31, %v31, %v31 -+0xe7 0xff 0xf1 0x00 0xff 0xaa -+ -+#CHECK: vmalob %v0, %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x00 0xad -+ -+#CHECK: vmalob %v3, %v20, %v5, %v22 -+0xe7 0x34 0x50 0x00 0x65 0xad -+ -+#CHECK: vmalob %v31, %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0xff 0xad -+ -+#CHECK: vmalof %v0, %v0, %v0, %v0 -+0xe7 0x00 0x02 0x00 0x00 0xad -+ -+#CHECK: vmalof %v3, %v20, %v5, %v22 -+0xe7 0x34 0x52 0x00 0x65 0xad -+ -+#CHECK: vmalof %v31, %v31, %v31, %v31 -+0xe7 0xff 0xf2 0x00 0xff 0xad -+ -+#CHECK: vmaloh %v0, %v0, %v0, %v0 -+0xe7 0x00 0x01 0x00 0x00 0xad -+ -+#CHECK: vmaloh %v3, %v20, %v5, %v22 -+0xe7 0x34 0x51 0x00 0x65 0xad -+ -+#CHECK: vmaloh %v31, %v31, %v31, %v31 -+0xe7 0xff 0xf1 0x00 0xff 0xad -+ -+#CHECK: vmaob %v0, %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x00 0xaf -+ -+#CHECK: vmaob %v3, %v20, %v5, %v22 -+0xe7 0x34 0x50 0x00 0x65 0xaf -+ -+#CHECK: vmaob %v31, %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0xff 0xaf -+ -+#CHECK: vmaof %v0, %v0, %v0, %v0 -+0xe7 0x00 0x02 0x00 0x00 0xaf -+ -+#CHECK: vmaof %v3, %v20, %v5, %v22 -+0xe7 0x34 0x52 0x00 0x65 0xaf -+ -+#CHECK: vmaof %v31, %v31, %v31, %v31 -+0xe7 0xff 0xf2 0x00 0xff 0xaf -+ -+#CHECK: vmaoh %v0, %v0, %v0, %v0 -+0xe7 0x00 0x01 0x00 0x00 0xaf -+ -+#CHECK: vmaoh %v3, %v20, %v5, %v22 -+0xe7 0x34 0x51 0x00 0x65 0xaf -+ -+#CHECK: vmaoh %v31, %v31, %v31, %v31 -+0xe7 0xff 0xf1 0x00 0xff 0xaf -+ -+#CHECK: vmeb %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x00 0xa6 -+ -+#CHECK: vmeb %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x0a 0xa6 -+ -+#CHECK: vmeb %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x0e 0xa6 -+ -+#CHECK: vmef %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x20 0xa6 -+ -+#CHECK: vmef %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x2a 0xa6 -+ -+#CHECK: vmef %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x2e 0xa6 -+ -+#CHECK: vmeh %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x10 0xa6 -+ -+#CHECK: vmeh %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x1a 0xa6 -+ -+#CHECK: vmeh %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x1e 0xa6 -+ -+#CHECK: vmhb %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x00 0xa3 -+ -+#CHECK: vmhb %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x0a 0xa3 -+ -+#CHECK: vmhb %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x0e 0xa3 -+ -+#CHECK: vmhf %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x20 0xa3 -+ -+#CHECK: vmhf %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x2a 0xa3 -+ -+#CHECK: vmhf %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x2e 0xa3 -+ -+#CHECK: vmhh %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x10 0xa3 -+ -+#CHECK: vmhh %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x1a 0xa3 -+ -+#CHECK: vmhh %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x1e 0xa3 -+ -+#CHECK: vmlb %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x00 0xa2 -+ -+#CHECK: vmlb %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x0a 0xa2 -+ -+#CHECK: vmlb %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x0e 0xa2 -+ -+#CHECK: vmlf %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x20 0xa2 -+ -+#CHECK: vmlf %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x2a 0xa2 -+ -+#CHECK: vmlf %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x2e 0xa2 -+ -+#CHECK: vmleb %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x00 0xa4 -+ -+#CHECK: vmleb %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x0a 0xa4 -+ -+#CHECK: vmleb %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x0e 0xa4 -+ -+#CHECK: vmlef %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x20 0xa4 -+ -+#CHECK: vmlef %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x2a 0xa4 -+ -+#CHECK: vmlef %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x2e 0xa4 -+ -+#CHECK: vmleh %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x10 0xa4 -+ -+#CHECK: vmleh %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x1a 0xa4 -+ -+#CHECK: vmleh %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x1e 0xa4 -+ -+#CHECK: vmlhb %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x00 0xa1 -+ -+#CHECK: vmlhb %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x0a 0xa1 -+ -+#CHECK: vmlhb %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x0e 0xa1 -+ -+#CHECK: vmlhf %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x20 0xa1 -+ -+#CHECK: vmlhf %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x2a 0xa1 -+ -+#CHECK: vmlhf %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x2e 0xa1 -+ -+#CHECK: vmlhh %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x10 0xa1 -+ -+#CHECK: vmlhh %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x1a 0xa1 -+ -+#CHECK: vmlhh %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x1e 0xa1 -+ -+#CHECK: vmlhw %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x10 0xa2 -+ -+#CHECK: vmlhw %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x1a 0xa2 -+ -+#CHECK: vmlhw %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x1e 0xa2 -+ -+#CHECK: vmlob %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x00 0xa5 -+ -+#CHECK: vmlob %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x0a 0xa5 -+ -+#CHECK: vmlob %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x0e 0xa5 -+ -+#CHECK: vmlof %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x20 0xa5 -+ -+#CHECK: vmlof %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x2a 0xa5 -+ -+#CHECK: vmlof %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x2e 0xa5 -+ -+#CHECK: vmloh %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x10 0xa5 -+ -+#CHECK: vmloh %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x1a 0xa5 -+ -+#CHECK: vmloh %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x1e 0xa5 -+ -+#CHECK: vmnb %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x00 0xfe -+ -+#CHECK: vmnb %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x0a 0xfe -+ -+#CHECK: vmnb %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x0e 0xfe -+ -+#CHECK: vmnf %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x20 0xfe -+ -+#CHECK: vmnf %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x2a 0xfe -+ -+#CHECK: vmnf %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x2e 0xfe -+ -+#CHECK: vmng %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x30 0xfe -+ -+#CHECK: vmng %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x3a 0xfe -+ -+#CHECK: vmng %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x3e 0xfe -+ -+#CHECK: vmnh %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x10 0xfe -+ -+#CHECK: vmnh %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x1a 0xfe -+ -+#CHECK: vmnh %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x1e 0xfe -+ -+#CHECK: vmnlb %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x00 0xfc -+ -+#CHECK: vmnlb %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x0a 0xfc -+ -+#CHECK: vmnlb %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x0e 0xfc -+ -+#CHECK: vmnlf %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x20 0xfc -+ -+#CHECK: vmnlf %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x2a 0xfc -+ -+#CHECK: vmnlf %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x2e 0xfc -+ -+#CHECK: vmnlg %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x30 0xfc -+ -+#CHECK: vmnlg %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x3a 0xfc -+ -+#CHECK: vmnlg %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x3e 0xfc -+ -+#CHECK: vmnlh %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x10 0xfc -+ -+#CHECK: vmnlh %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x1a 0xfc -+ -+#CHECK: vmnlh %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x1e 0xfc -+ -+#CHECK: vmob %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x00 0xa7 -+ -+#CHECK: vmob %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x0a 0xa7 -+ -+#CHECK: vmob %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x0e 0xa7 -+ -+#CHECK: vmof %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x20 0xa7 -+ -+#CHECK: vmof %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x2a 0xa7 -+ -+#CHECK: vmof %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x2e 0xa7 -+ -+#CHECK: vmoh %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x10 0xa7 -+ -+#CHECK: vmoh %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x1a 0xa7 -+ -+#CHECK: vmoh %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x1e 0xa7 -+ -+#CHECK: vmrhb %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x00 0x61 -+ -+#CHECK: vmrhb %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x0a 0x61 -+ -+#CHECK: vmrhb %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x0e 0x61 -+ -+#CHECK: vmrhf %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x20 0x61 -+ -+#CHECK: vmrhf %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x2a 0x61 -+ -+#CHECK: vmrhf %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x2e 0x61 -+ -+#CHECK: vmrhg %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x30 0x61 -+ -+#CHECK: vmrhg %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x3a 0x61 -+ -+#CHECK: vmrhg %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x3e 0x61 -+ -+#CHECK: vmrhh %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x10 0x61 -+ -+#CHECK: vmrhh %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x1a 0x61 -+ -+#CHECK: vmrhh %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x1e 0x61 -+ -+#CHECK: vmrlb %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x00 0x60 -+ -+#CHECK: vmrlb %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x0a 0x60 -+ -+#CHECK: vmrlb %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x0e 0x60 -+ -+#CHECK: vmrlf %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x20 0x60 -+ -+#CHECK: vmrlf %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x2a 0x60 -+ -+#CHECK: vmrlf %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x2e 0x60 -+ -+#CHECK: vmrlg %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x30 0x60 -+ -+#CHECK: vmrlg %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x3a 0x60 -+ -+#CHECK: vmrlg %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x3e 0x60 -+ -+#CHECK: vmrlh %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x10 0x60 -+ -+#CHECK: vmrlh %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x1a 0x60 -+ -+#CHECK: vmrlh %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x1e 0x60 -+ -+#CHECK: vmxb %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x00 0xff -+ -+#CHECK: vmxb %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x0a 0xff -+ -+#CHECK: vmxb %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x0e 0xff -+ -+#CHECK: vmxf %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x20 0xff -+ -+#CHECK: vmxf %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x2a 0xff -+ -+#CHECK: vmxf %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x2e 0xff -+ -+#CHECK: vmxg %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x30 0xff -+ -+#CHECK: vmxg %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x3a 0xff -+ -+#CHECK: vmxg %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x3e 0xff -+ -+#CHECK: vmxh %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x10 0xff -+ -+#CHECK: vmxh %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x1a 0xff -+ -+#CHECK: vmxh %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x1e 0xff -+ -+#CHECK: vmxlb %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x00 0xfd -+ -+#CHECK: vmxlb %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x0a 0xfd -+ -+#CHECK: vmxlb %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x0e 0xfd -+ -+#CHECK: vmxlf %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x20 0xfd -+ -+#CHECK: vmxlf %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x2a 0xfd -+ -+#CHECK: vmxlf %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x2e 0xfd -+ -+#CHECK: vmxlg %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x30 0xfd -+ -+#CHECK: vmxlg %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x3a 0xfd -+ -+#CHECK: vmxlg %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x3e 0xfd -+ -+#CHECK: vmxlh %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x10 0xfd -+ -+#CHECK: vmxlh %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x1a 0xfd -+ -+#CHECK: vmxlh %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x1e 0xfd -+ -+#CHECK: vn %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x00 0x68 -+ -+#CHECK: vn %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x0a 0x68 -+ -+#CHECK: vn %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x0e 0x68 -+ -+#CHECK: vnc %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x00 0x69 -+ -+#CHECK: vnc %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x0a 0x69 -+ -+#CHECK: vnc %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x0e 0x69 -+ -+#CHECK: vno %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x00 0x6b -+ -+#CHECK: vno %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x0a 0x6b -+ -+#CHECK: vno %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x0e 0x6b -+ -+#CHECK: vo %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x00 0x6a -+ -+#CHECK: vo %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x0a 0x6a -+ -+#CHECK: vo %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x0e 0x6a -+ -+#CHECK: vpdi %v0, %v0, %v0, 0 -+0xe7 0x00 0x00 0x00 0x00 0x84 -+ -+#CHECK: vpdi %v3, %v20, %v5, 4 -+0xe7 0x34 0x50 0x00 0x44 0x84 -+ -+#CHECK: vpdi %v31, %v31, %v31, 15 -+0xe7 0xff 0xf0 0x00 0xfe 0x84 -+ -+#CHECK: vperm %v0, %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x00 0x8c -+ -+#CHECK: vperm %v3, %v20, %v5, %v22 -+0xe7 0x34 0x50 0x00 0x65 0x8c -+ -+#CHECK: vperm %v31, %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0xff 0x8c -+ -+#CHECK: vpkf %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x20 0x94 -+ -+#CHECK: vpkf %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x2a 0x94 -+ -+#CHECK: vpkf %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x2e 0x94 -+ -+#CHECK: vpkg %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x30 0x94 -+ -+#CHECK: vpkg %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x3a 0x94 -+ -+#CHECK: vpkg %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x3e 0x94 -+ -+#CHECK: vpkh %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x10 0x94 -+ -+#CHECK: vpkh %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x1a 0x94 -+ -+#CHECK: vpkh %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x1e 0x94 -+ -+#CHECK: vpklsf %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x20 0x95 -+ -+#CHECK: vpklsf %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x2a 0x95 -+ -+#CHECK: vpklsfs %v7, %v24, %v9 -+0xe7 0x78 0x90 0x10 0x24 0x95 -+ -+#CHECK: vpklsf %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x2e 0x95 -+ -+#CHECK: vpklsg %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x30 0x95 -+ -+#CHECK: vpklsg %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x3a 0x95 -+ -+#CHECK: vpklsgs %v7, %v24, %v9 -+0xe7 0x78 0x90 0x10 0x34 0x95 -+ -+#CHECK: vpklsg %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x3e 0x95 -+ -+#CHECK: vpklsh %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x10 0x95 -+ -+#CHECK: vpklsh %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x1a 0x95 -+ -+#CHECK: vpklshs %v7, %v24, %v9 -+0xe7 0x78 0x90 0x10 0x14 0x95 -+ -+#CHECK: vpklsh %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x1e 0x95 -+ -+#CHECK: vpksf %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x20 0x97 -+ -+#CHECK: vpksf %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x2a 0x97 -+ -+#CHECK: vpksfs %v7, %v24, %v9 -+0xe7 0x78 0x90 0x10 0x24 0x97 -+ -+#CHECK: vpksf %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x2e 0x97 -+ -+#CHECK: vpksg %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x30 0x97 -+ -+#CHECK: vpksg %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x3a 0x97 -+ -+#CHECK: vpksgs %v7, %v24, %v9 -+0xe7 0x78 0x90 0x10 0x34 0x97 -+ -+#CHECK: vpksg %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x3e 0x97 -+ -+#CHECK: vpksh %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x10 0x97 -+ -+#CHECK: vpksh %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x1a 0x97 -+ -+#CHECK: vpkshs %v7, %v24, %v9 -+0xe7 0x78 0x90 0x10 0x14 0x97 -+ -+#CHECK: vpksh %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x1e 0x97 -+ -+#CHECK: vpopct %v0, %v0, 0 -+0xe7 0x00 0x00 0x00 0x00 0x50 -+ -+#CHECK: vpopct %v19, %v14, 0 -+0xe7 0x3e 0x00 0x00 0x08 0x50 -+ -+#CHECK: vpopct %v31, %v31 -+0xe7 0xff 0x00 0x00 0x0c 0x50 -+ -+#CHECK: vrepb %v0, %v0, 0 -+0xe7 0x00 0x00 0x00 0x00 0x4d -+ -+#CHECK: vrepb %v19, %v4, 22136 -+0xe7 0x34 0x56 0x78 0x08 0x4d -+ -+#CHECK: vrepb %v31, %v31, 65535 -+0xe7 0xff 0xff 0xff 0x0c 0x4d -+ -+#CHECK: vrepf %v0, %v0, 0 -+0xe7 0x00 0x00 0x00 0x20 0x4d -+ -+#CHECK: vrepf %v19, %v4, 22136 -+0xe7 0x34 0x56 0x78 0x28 0x4d -+ -+#CHECK: vrepf %v31, %v31, 65535 -+0xe7 0xff 0xff 0xff 0x2c 0x4d -+ -+#CHECK: vrepg %v0, %v0, 0 -+0xe7 0x00 0x00 0x00 0x30 0x4d -+ -+#CHECK: vrepg %v19, %v4, 22136 -+0xe7 0x34 0x56 0x78 0x38 0x4d -+ -+#CHECK: vrepg %v31, %v31, 65535 -+0xe7 0xff 0xff 0xff 0x3c 0x4d -+ -+#CHECK: vreph %v0, %v0, 0 -+0xe7 0x00 0x00 0x00 0x10 0x4d -+ -+#CHECK: vreph %v19, %v4, 22136 -+0xe7 0x34 0x56 0x78 0x18 0x4d -+ -+#CHECK: vreph %v31, %v31, 65535 -+0xe7 0xff 0xff 0xff 0x1c 0x4d -+ -+#CHECK: vrepib %v0, 0 -+0xe7 0x00 0x00 0x00 0x00 0x45 -+ -+#CHECK: vrepib %v23, -30293 -+0xe7 0x70 0x89 0xab 0x08 0x45 -+ -+#CHECK: vrepib %v31, -1 -+0xe7 0xf0 0xff 0xff 0x08 0x45 -+ -+#CHECK: vrepif %v0, 0 -+0xe7 0x00 0x00 0x00 0x20 0x45 -+ -+#CHECK: vrepif %v23, -30293 -+0xe7 0x70 0x89 0xab 0x28 0x45 -+ -+#CHECK: vrepif %v31, -1 -+0xe7 0xf0 0xff 0xff 0x28 0x45 -+ -+#CHECK: vrepig %v0, 0 -+0xe7 0x00 0x00 0x00 0x30 0x45 -+ -+#CHECK: vrepig %v23, -30293 -+0xe7 0x70 0x89 0xab 0x38 0x45 -+ -+#CHECK: vrepig %v31, -1 -+0xe7 0xf0 0xff 0xff 0x38 0x45 -+ -+#CHECK: vrepih %v0, 0 -+0xe7 0x00 0x00 0x00 0x10 0x45 -+ -+#CHECK: vrepih %v23, -30293 -+0xe7 0x70 0x89 0xab 0x18 0x45 -+ -+#CHECK: vrepih %v31, -1 -+0xe7 0xf0 0xff 0xff 0x18 0x45 -+ -+#CHECK: vsb %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x00 0xf7 -+ -+#CHECK: vsb %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x0a 0xf7 -+ -+#CHECK: vsb %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x0e 0xf7 -+ -+#CHECK: vsbiq %v0, %v0, %v0, %v0 -+0xe7 0x00 0x04 0x00 0x00 0xbf -+ -+#CHECK: vsbiq %v3, %v20, %v5, %v22 -+0xe7 0x34 0x54 0x00 0x65 0xbf -+ -+#CHECK: vsbiq %v31, %v31, %v31, %v31 -+0xe7 0xff 0xf4 0x00 0xff 0xbf -+ -+#CHECK: vsbcbiq %v0, %v0, %v0, %v0 -+0xe7 0x00 0x04 0x00 0x00 0xbd -+ -+#CHECK: vsbcbiq %v3, %v20, %v5, %v22 -+0xe7 0x34 0x54 0x00 0x65 0xbd -+ -+#CHECK: vsbcbiq %v31, %v31, %v31, %v31 -+0xe7 0xff 0xf4 0x00 0xff 0xbd -+ -+#CHECK: vscbib %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x00 0xf5 -+ -+#CHECK: vscbib %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x0a 0xf5 -+ -+#CHECK: vscbib %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x0e 0xf5 -+ -+#CHECK: vscbif %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x20 0xf5 -+ -+#CHECK: vscbif %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x2a 0xf5 -+ -+#CHECK: vscbif %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x2e 0xf5 -+ -+#CHECK: vscbig %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x30 0xf5 -+ -+#CHECK: vscbig %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x3a 0xf5 -+ -+#CHECK: vscbig %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x3e 0xf5 -+ -+#CHECK: vscbih %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x10 0xf5 -+ -+#CHECK: vscbih %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x1a 0xf5 -+ -+#CHECK: vscbih %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x1e 0xf5 -+ -+#CHECK: vscbiq %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x40 0xf5 -+ -+#CHECK: vscbiq %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x4a 0xf5 -+ -+#CHECK: vscbiq %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x4e 0xf5 -+ -+#CHECK: vscef %v0, 0(%v0), 0 -+0xe7 0x00 0x00 0x00 0x00 0x1b -+ -+#CHECK: vscef %v10, 1000(%v19,%r7), 2 -+0xe7 0xa3 0x73 0xe8 0x24 0x1b -+ -+#CHECK: vscef %v31, 4095(%v31,%r15), 3 -+0xe7 0xff 0xff 0xff 0x3c 0x1b -+ -+#CHECK: vsceg %v0, 0(%v0), 0 -+0xe7 0x00 0x00 0x00 0x00 0x1a -+ -+#CHECK: vsceg %v10, 1000(%v19,%r7), 1 -+0xe7 0xa3 0x73 0xe8 0x14 0x1a -+ -+#CHECK: vsceg %v31, 4095(%v31,%r15), 1 -+0xe7 0xff 0xff 0xff 0x1c 0x1a -+ -+#CHECK: vsegb %v0, %v0 -+0xe7 0x00 0x00 0x00 0x00 0x5f -+ -+#CHECK: vsegb %v19, %v14 -+0xe7 0x3e 0x00 0x00 0x08 0x5f -+ -+#CHECK: vsegb %v31, %v31 -+0xe7 0xff 0x00 0x00 0x0c 0x5f -+ -+#CHECK: vsegf %v0, %v0 -+0xe7 0x00 0x00 0x00 0x20 0x5f -+ -+#CHECK: vsegf %v19, %v14 -+0xe7 0x3e 0x00 0x00 0x28 0x5f -+ -+#CHECK: vsegf %v31, %v31 -+0xe7 0xff 0x00 0x00 0x2c 0x5f -+ -+#CHECK: vsegh %v0, %v0 -+0xe7 0x00 0x00 0x00 0x10 0x5f -+ -+#CHECK: vsegh %v19, %v14 -+0xe7 0x3e 0x00 0x00 0x18 0x5f -+ -+#CHECK: vsegh %v31, %v31 -+0xe7 0xff 0x00 0x00 0x1c 0x5f -+ -+#CHECK: vsel %v0, %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x00 0x8d -+ -+#CHECK: vsel %v3, %v20, %v5, %v22 -+0xe7 0x34 0x50 0x00 0x65 0x8d -+ -+#CHECK: vsel %v31, %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0xff 0x8d -+ -+#CHECK: vsf %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x20 0xf7 -+ -+#CHECK: vsf %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x2a 0xf7 -+ -+#CHECK: vsf %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x2e 0xf7 -+ -+#CHECK: vsg %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x30 0xf7 -+ -+#CHECK: vsg %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x3a 0xf7 -+ -+#CHECK: vsg %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x3e 0xf7 -+ -+#CHECK: vsh %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x10 0xf7 -+ -+#CHECK: vsh %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x1a 0xf7 -+ -+#CHECK: vsh %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x1e 0xf7 -+ -+#CHECK: vsl %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x00 0x74 -+ -+#CHECK: vsl %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x0a 0x74 -+ -+#CHECK: vsl %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x0e 0x74 -+ -+#CHECK: vslb %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x00 0x75 -+ -+#CHECK: vslb %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x0a 0x75 -+ -+#CHECK: vslb %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x0e 0x75 -+ -+#CHECK: vsldb %v0, %v0, %v0, 0 -+0xe7 0x00 0x00 0x00 0x00 0x77 -+ -+#CHECK: vsldb %v3, %v20, %v5, 103 -+0xe7 0x34 0x50 0x67 0x04 0x77 -+ -+#CHECK: vsldb %v31, %v31, %v31, 255 -+0xe7 0xff 0xf0 0xff 0x0e 0x77 -+ -+#CHECK: vsq %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x40 0xf7 -+ -+#CHECK: vsq %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x4a 0xf7 -+ -+#CHECK: vsq %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x4e 0xf7 -+ -+#CHECK: vsra %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x00 0x7e -+ -+#CHECK: vsra %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x0a 0x7e -+ -+#CHECK: vsra %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x0e 0x7e -+ -+#CHECK: vsrab %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x00 0x7f -+ -+#CHECK: vsrab %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x0a 0x7f -+ -+#CHECK: vsrab %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x0e 0x7f -+ -+#CHECK: vsrl %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x00 0x7c -+ -+#CHECK: vsrl %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x0a 0x7c -+ -+#CHECK: vsrl %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x0e 0x7c -+ -+#CHECK: vsrlb %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x00 0x7d -+ -+#CHECK: vsrlb %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x0a 0x7d -+ -+#CHECK: vsrlb %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x0e 0x7d -+ -+#CHECK: vst %v0, 0 -+0xe7 0x00 0x00 0x00 0x00 0x0E -+ -+#CHECK: vst %v17, 2475(%r7,%r8) -+0xe7 0x17 0x89 0xab 0x08 0x0E -+ -+#CHECK: vst %v31, 4095(%r15,%r15) -+0xe7 0xff 0xff 0xff 0x08 0x0E -+ -+#CHECK: vsteb %v0, 0, 0 -+0xe7 0x00 0x00 0x00 0x00 0x08 -+ -+#CHECK: vsteb %v17, 2475(%r7,%r8), 12 -+0xe7 0x17 0x89 0xab 0xc8 0x08 -+ -+#CHECK: vsteb %v31, 4095(%r15,%r15), 15 -+0xe7 0xff 0xff 0xff 0xf8 0x08 -+ -+#CHECK: vstef %v0, 0, 0 -+0xe7 0x00 0x00 0x00 0x00 0x0b -+ -+#CHECK: vstef %v17, 2475(%r7,%r8), 2 -+0xe7 0x17 0x89 0xab 0x28 0x0b -+ -+#CHECK: vstef %v31, 4095(%r15,%r15), 3 -+0xe7 0xff 0xff 0xff 0x38 0x0b -+ -+#CHECK: vsteg %v0, 0, 0 -+0xe7 0x00 0x00 0x00 0x00 0x0a -+ -+#CHECK: vsteg %v17, 2475(%r7,%r8), 1 -+0xe7 0x17 0x89 0xab 0x18 0x0a -+ -+#CHECK: vsteg %v31, 4095(%r15,%r15), 1 -+0xe7 0xff 0xff 0xff 0x18 0x0a -+ -+#CHECK: vsteh %v0, 0, 0 -+0xe7 0x00 0x00 0x00 0x00 0x09 -+ -+#CHECK: vsteh %v17, 2475(%r7,%r8), 5 -+0xe7 0x17 0x89 0xab 0x58 0x09 -+ -+#CHECK: vsteh %v31, 4095(%r15,%r15), 7 -+0xe7 0xff 0xff 0xff 0x78 0x09 -+ -+#CHECK: vstl %v0, %r0, 0 -+0xe7 0x00 0x00 0x00 0x00 0x3f -+ -+#CHECK: vstl %v18, %r3, 1383(%r4) -+0xe7 0x23 0x45 0x67 0x08 0x3f -+ -+#CHECK: vstl %v31, %r15, 4095(%r15) -+0xe7 0xff 0xff 0xff 0x08 0x3f -+ -+#CHECK: vstm %v0, %v0, 0 -+0xe7 0x00 0x00 0x00 0x00 0x3e -+ -+#CHECK: vstm %v12, %v18, 1110(%r3) -+0xe7 0xc2 0x34 0x56 0x04 0x3e -+ -+#CHECK: vstm %v31, %v31, 4095(%r15) -+0xe7 0xff 0xff 0xff 0x0c 0x3e -+ -+#CHECK: vstrcb %v0, %v0, %v0, %v0, 0 -+0xe7 0x00 0x00 0x00 0x00 0x8a -+ -+#CHECK: vstrcb %v0, %v0, %v0, %v0, 12 -+0xe7 0x00 0x00 0xc0 0x00 0x8a -+ -+#CHECK: vstrcb %v18, %v3, %v20, %v5, 0 -+0xe7 0x23 0x40 0x00 0x5a 0x8a -+ -+#CHECK: vstrcb %v31, %v31, %v31, %v31, 4 -+0xe7 0xff 0xf0 0x40 0xff 0x8a -+ -+#CHECK: vstrcbs %v31, %v31, %v31, %v31, 8 -+0xe7 0xff 0xf0 0x90 0xff 0x8a -+ -+#CHECK: vstrczb %v31, %v31, %v31, %v31, 4 -+0xe7 0xff 0xf0 0x60 0xff 0x8a -+ -+#CHECK: vstrczbs %v31, %v31, %v31, %v31, 8 -+0xe7 0xff 0xf0 0xb0 0xff 0x8a -+ -+#CHECK: vstrcf %v0, %v0, %v0, %v0, 0 -+0xe7 0x00 0x02 0x00 0x00 0x8a -+ -+#CHECK: vstrcf %v0, %v0, %v0, %v0, 12 -+0xe7 0x00 0x02 0xc0 0x00 0x8a -+ -+#CHECK: vstrcf %v18, %v3, %v20, %v5, 0 -+0xe7 0x23 0x42 0x00 0x5a 0x8a -+ -+#CHECK: vstrcf %v31, %v31, %v31, %v31, 4 -+0xe7 0xff 0xf2 0x40 0xff 0x8a -+ -+#CHECK: vstrcfs %v31, %v31, %v31, %v31, 8 -+0xe7 0xff 0xf2 0x90 0xff 0x8a -+ -+#CHECK: vstrczf %v31, %v31, %v31, %v31, 4 -+0xe7 0xff 0xf2 0x60 0xff 0x8a -+ -+#CHECK: vstrczfs %v31, %v31, %v31, %v31, 8 -+0xe7 0xff 0xf2 0xb0 0xff 0x8a -+ -+#CHECK: vstrch %v0, %v0, %v0, %v0, 0 -+0xe7 0x00 0x01 0x00 0x00 0x8a -+ -+#CHECK: vstrch %v0, %v0, %v0, %v0, 12 -+0xe7 0x00 0x01 0xc0 0x00 0x8a -+ -+#CHECK: vstrch %v18, %v3, %v20, %v5, 0 -+0xe7 0x23 0x41 0x00 0x5a 0x8a -+ -+#CHECK: vstrch %v31, %v31, %v31, %v31, 4 -+0xe7 0xff 0xf1 0x40 0xff 0x8a -+ -+#CHECK: vstrchs %v31, %v31, %v31, %v31, 8 -+0xe7 0xff 0xf1 0x90 0xff 0x8a -+ -+#CHECK: vstrczh %v31, %v31, %v31, %v31, 4 -+0xe7 0xff 0xf1 0x60 0xff 0x8a -+ -+#CHECK: vstrczhs %v31, %v31, %v31, %v31, 8 -+0xe7 0xff 0xf1 0xb0 0xff 0x8a -+ -+#CHECK: vsumgh %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x10 0x65 -+ -+#CHECK: vsumgh %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x1a 0x65 -+ -+#CHECK: vsumgh %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x1e 0x65 -+ -+#CHECK: vsumgf %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x20 0x65 -+ -+#CHECK: vsumgf %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x2a 0x65 -+ -+#CHECK: vsumgf %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x2e 0x65 -+ -+#CHECK: vsumqf %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x20 0x67 -+ -+#CHECK: vsumqf %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x2a 0x67 -+ -+#CHECK: vsumqf %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x2e 0x67 -+ -+#CHECK: vsumqg %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x30 0x67 -+ -+#CHECK: vsumqg %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x3a 0x67 -+ -+#CHECK: vsumqg %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x3e 0x67 -+ -+#CHECK: vsumb %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x00 0x64 -+ -+#CHECK: vsumb %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x0a 0x64 -+ -+#CHECK: vsumb %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x0e 0x64 -+ -+#CHECK: vsumh %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x10 0x64 -+ -+#CHECK: vsumh %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x1a 0x64 -+ -+#CHECK: vsumh %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x1e 0x64 -+ -+#CHECK: vtm %v0, %v0 -+0xe7 0x00 0x00 0x00 0x00 0xd8 -+ -+#CHECK: vtm %v19, %v14 -+0xe7 0x3e 0x00 0x00 0x08 0xd8 -+ -+#CHECK: vtm %v31, %v31 -+0xe7 0xff 0x00 0x00 0x0c 0xd8 -+ -+#CHECK: vuphb %v0, %v0 -+0xe7 0x00 0x00 0x00 0x00 0xd7 -+ -+#CHECK: vuphb %v19, %v14 -+0xe7 0x3e 0x00 0x00 0x08 0xd7 -+ -+#CHECK: vuphb %v31, %v31 -+0xe7 0xff 0x00 0x00 0x0c 0xd7 -+ -+#CHECK: vuphf %v0, %v0 -+0xe7 0x00 0x00 0x00 0x20 0xd7 -+ -+#CHECK: vuphf %v19, %v14 -+0xe7 0x3e 0x00 0x00 0x28 0xd7 -+ -+#CHECK: vuphf %v31, %v31 -+0xe7 0xff 0x00 0x00 0x2c 0xd7 -+ -+#CHECK: vuphh %v0, %v0 -+0xe7 0x00 0x00 0x00 0x10 0xd7 -+ -+#CHECK: vuphh %v19, %v14 -+0xe7 0x3e 0x00 0x00 0x18 0xd7 -+ -+#CHECK: vuphh %v31, %v31 -+0xe7 0xff 0x00 0x00 0x1c 0xd7 -+ -+#CHECK: vuplhb %v0, %v0 -+0xe7 0x00 0x00 0x00 0x00 0xd5 -+ -+#CHECK: vuplhb %v19, %v14 -+0xe7 0x3e 0x00 0x00 0x08 0xd5 -+ -+#CHECK: vuplhb %v31, %v31 -+0xe7 0xff 0x00 0x00 0x0c 0xd5 -+ -+#CHECK: vuplhf %v0, %v0 -+0xe7 0x00 0x00 0x00 0x20 0xd5 -+ -+#CHECK: vuplhf %v19, %v14 -+0xe7 0x3e 0x00 0x00 0x28 0xd5 -+ -+#CHECK: vuplhf %v31, %v31 -+0xe7 0xff 0x00 0x00 0x2c 0xd5 -+ -+#CHECK: vuplhh %v0, %v0 -+0xe7 0x00 0x00 0x00 0x10 0xd5 -+ -+#CHECK: vuplhh %v19, %v14 -+0xe7 0x3e 0x00 0x00 0x18 0xd5 -+ -+#CHECK: vuplhh %v31, %v31 -+0xe7 0xff 0x00 0x00 0x1c 0xd5 -+ -+#CHECK: vuplb %v0, %v0 -+0xe7 0x00 0x00 0x00 0x00 0xd6 -+ -+#CHECK: vuplb %v19, %v14 -+0xe7 0x3e 0x00 0x00 0x08 0xd6 -+ -+#CHECK: vuplb %v31, %v31 -+0xe7 0xff 0x00 0x00 0x0c 0xd6 -+ -+#CHECK: vuplf %v0, %v0 -+0xe7 0x00 0x00 0x00 0x20 0xd6 -+ -+#CHECK: vuplf %v19, %v14 -+0xe7 0x3e 0x00 0x00 0x28 0xd6 -+ -+#CHECK: vuplf %v31, %v31 -+0xe7 0xff 0x00 0x00 0x2c 0xd6 -+ -+#CHECK: vuplhw %v0, %v0 -+0xe7 0x00 0x00 0x00 0x10 0xd6 -+ -+#CHECK: vuplhw %v19, %v14 -+0xe7 0x3e 0x00 0x00 0x18 0xd6 -+ -+#CHECK: vuplhw %v31, %v31 -+0xe7 0xff 0x00 0x00 0x1c 0xd6 -+ -+#CHECK: vupllb %v0, %v0 -+0xe7 0x00 0x00 0x00 0x00 0xd4 -+ -+#CHECK: vupllb %v19, %v14 -+0xe7 0x3e 0x00 0x00 0x08 0xd4 -+ -+#CHECK: vupllb %v31, %v31 -+0xe7 0xff 0x00 0x00 0x0c 0xd4 -+ -+#CHECK: vupllf %v0, %v0 -+0xe7 0x00 0x00 0x00 0x20 0xd4 -+ -+#CHECK: vupllf %v19, %v14 -+0xe7 0x3e 0x00 0x00 0x28 0xd4 -+ -+#CHECK: vupllf %v31, %v31 -+0xe7 0xff 0x00 0x00 0x2c 0xd4 -+ -+#CHECK: vupllh %v0, %v0 -+0xe7 0x00 0x00 0x00 0x10 0xd4 -+ -+#CHECK: vupllh %v19, %v14 -+0xe7 0x3e 0x00 0x00 0x18 0xd4 -+ -+#CHECK: vupllh %v31, %v31 -+0xe7 0xff 0x00 0x00 0x1c 0xd4 -+ -+#CHECK: vx %v0, %v0, %v0 -+0xe7 0x00 0x00 0x00 0x00 0x6d -+ -+#CHECK: vx %v18, %v3, %v20 -+0xe7 0x23 0x40 0x00 0x0a 0x6d -+ -+#CHECK: vx %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x00 0x0e 0x6d -+ -+#CHECK: wcdgb %f0, %f0, 0, 0 -+0xe7 0x00 0x00 0x08 0x30 0xc3 -+ -+#CHECK: wcdgb %v19, %f14, 4, 10 -+0xe7 0x3e 0x00 0xac 0x38 0xc3 -+ -+#CHECK: wcdgb %v31, %v31, 7, 15 -+0xe7 0xff 0x00 0xff 0x3c 0xc3 -+ -+#CHECK: wcdlgb %f0, %f0, 0, 0 -+0xe7 0x00 0x00 0x08 0x30 0xc1 -+ -+#CHECK: wcdlgb %v19, %f14, 4, 10 -+0xe7 0x3e 0x00 0xac 0x38 0xc1 -+ -+#CHECK: wcdlgb %v31, %v31, 7, 15 -+0xe7 0xff 0x00 0xff 0x3c 0xc1 -+ -+#CHECK: wcgdb %f0, %f0, 0, 0 -+0xe7 0x00 0x00 0x08 0x30 0xc2 -+ -+#CHECK: wcgdb %v19, %f14, 4, 10 -+0xe7 0x3e 0x00 0xac 0x38 0xc2 -+ -+#CHECK: wcgdb %v31, %v31, 7, 15 -+0xe7 0xff 0x00 0xff 0x3c 0xc2 -+ -+#CHECK: wclgdb %f0, %f0, 0, 0 -+0xe7 0x00 0x00 0x08 0x30 0xc0 -+ -+#CHECK: wclgdb %v19, %f14, 4, 10 -+0xe7 0x3e 0x00 0xac 0x38 0xc0 -+ -+#CHECK: wclgdb %v31, %v31, 7, 15 -+0xe7 0xff 0x00 0xff 0x3c 0xc0 -+ -+#CHECK: wfadb %f0, %f0, %f0 -+0xe7 0x00 0x00 0x08 0x30 0xe3 -+ -+#CHECK: wfadb %v18, %f3, %v20 -+0xe7 0x23 0x40 0x08 0x3a 0xe3 -+ -+#CHECK: wfadb %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x08 0x3e 0xe3 -+ -+#CHECK: wfcdb %f0, %f0 -+0xe7 0x00 0x00 0x00 0x30 0xcb -+ -+#CHECK: wfcdb %v19, %f14 -+0xe7 0x3e 0x00 0x00 0x38 0xcb -+ -+#CHECK: wfcdb %v31, %v31 -+0xe7 0xff 0x00 0x00 0x3c 0xcb -+ -+#CHECK: wfcedb %f0, %f0, %f0 -+0xe7 0x00 0x00 0x08 0x30 0xe8 -+ -+#CHECK: wfcedb %v18, %f3, %v20 -+0xe7 0x23 0x40 0x08 0x3a 0xe8 -+ -+#CHECK: wfcedb %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x08 0x3e 0xe8 -+ -+#CHECK: wfcedbs %f0, %f0, %f0 -+0xe7 0x00 0x00 0x18 0x30 0xe8 -+ -+#CHECK: wfcedbs %v18, %f3, %v20 -+0xe7 0x23 0x40 0x18 0x3a 0xe8 -+ -+#CHECK: wfcedbs %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x18 0x3e 0xe8 -+ -+#CHECK: wfchdb %f0, %f0, %f0 -+0xe7 0x00 0x00 0x08 0x30 0xeb -+ -+#CHECK: wfchdb %v18, %f3, %v20 -+0xe7 0x23 0x40 0x08 0x3a 0xeb -+ -+#CHECK: wfchdb %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x08 0x3e 0xeb -+ -+#CHECK: wfchdbs %f0, %f0, %f0 -+0xe7 0x00 0x00 0x18 0x30 0xeb -+ -+#CHECK: wfchdbs %v18, %f3, %v20 -+0xe7 0x23 0x40 0x18 0x3a 0xeb -+ -+#CHECK: wfchdbs %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x18 0x3e 0xeb -+ -+#CHECK: wfchedb %f0, %f0, %f0 -+0xe7 0x00 0x00 0x08 0x30 0xea -+ -+#CHECK: wfchedb %v18, %f3, %v20 -+0xe7 0x23 0x40 0x08 0x3a 0xea -+ -+#CHECK: wfchedb %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x08 0x3e 0xea -+ -+#CHECK: wfchedbs %f0, %f0, %f0 -+0xe7 0x00 0x00 0x18 0x30 0xea -+ -+#CHECK: wfchedbs %v18, %f3, %v20 -+0xe7 0x23 0x40 0x18 0x3a 0xea -+ -+#CHECK: wfchedbs %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x18 0x3e 0xea -+ -+#CHECK: wfddb %f0, %f0, %f0 -+0xe7 0x00 0x00 0x08 0x30 0xe5 -+ -+#CHECK: wfddb %v18, %f3, %v20 -+0xe7 0x23 0x40 0x08 0x3a 0xe5 -+ -+#CHECK: wfddb %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x08 0x3e 0xe5 -+ -+#CHECK: wfidb %f0, %f0, 0, 0 -+0xe7 0x00 0x00 0x08 0x30 0xc7 -+ -+#CHECK: wfidb %v19, %f14, 4, 10 -+0xe7 0x3e 0x00 0xac 0x38 0xc7 -+ -+#CHECK: wfidb %v31, %v31, 7, 15 -+0xe7 0xff 0x00 0xff 0x3c 0xc7 -+ -+#CHECK: wfkdb %f0, %f0 -+0xe7 0x00 0x00 0x00 0x30 0xca -+ -+#CHECK: wfkdb %v19, %f14 -+0xe7 0x3e 0x00 0x00 0x38 0xca -+ -+#CHECK: wfkdb %v31, %v31 -+0xe7 0xff 0x00 0x00 0x3c 0xca -+ -+#CHECK: wflcdb %f0, %f0 -+0xe7 0x00 0x00 0x08 0x30 0xcc -+ -+#CHECK: wflcdb %v19, %f14 -+0xe7 0x3e 0x00 0x08 0x38 0xcc -+ -+#CHECK: wflcdb %v31, %v31 -+0xe7 0xff 0x00 0x08 0x3c 0xcc -+ -+#CHECK: wflndb %f0, %f0 -+0xe7 0x00 0x00 0x18 0x30 0xcc -+ -+#CHECK: wflndb %v19, %f14 -+0xe7 0x3e 0x00 0x18 0x38 0xcc -+ -+#CHECK: wflndb %v31, %v31 -+0xe7 0xff 0x00 0x18 0x3c 0xcc -+ -+#CHECK: wflpdb %f0, %f0 -+0xe7 0x00 0x00 0x28 0x30 0xcc -+ -+#CHECK: wflpdb %v19, %f14 -+0xe7 0x3e 0x00 0x28 0x38 0xcc -+ -+#CHECK: wflpdb %v31, %v31 -+0xe7 0xff 0x00 0x28 0x3c 0xcc -+ -+#CHECK: wfmadb %f0, %f0, %f0, %f0 -+0xe7 0x00 0x03 0x08 0x00 0x8f -+ -+#CHECK: wfmadb %f3, %v20, %f5, %v22 -+0xe7 0x34 0x53 0x08 0x65 0x8f -+ -+#CHECK: wfmadb %v31, %v31, %v31, %v31 -+0xe7 0xff 0xf3 0x08 0xff 0x8f -+ -+#CHECK: wfmdb %f0, %f0, %f0 -+0xe7 0x00 0x00 0x08 0x30 0xe7 -+ -+#CHECK: wfmdb %v18, %f3, %v20 -+0xe7 0x23 0x40 0x08 0x3a 0xe7 -+ -+#CHECK: wfmdb %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x08 0x3e 0xe7 -+ -+#CHECK: wfmsdb %f0, %f0, %f0, %f0 -+0xe7 0x00 0x03 0x08 0x00 0x8e -+ -+#CHECK: wfmsdb %f3, %v20, %f5, %v22 -+0xe7 0x34 0x53 0x08 0x65 0x8e -+ -+#CHECK: wfmsdb %v31, %v31, %v31, %v31 -+0xe7 0xff 0xf3 0x08 0xff 0x8e -+ -+#CHECK: wfsdb %f0, %f0, %f0 -+0xe7 0x00 0x00 0x08 0x30 0xe2 -+ -+#CHECK: wfsdb %v18, %f3, %v20 -+0xe7 0x23 0x40 0x08 0x3a 0xe2 -+ -+#CHECK: wfsdb %v31, %v31, %v31 -+0xe7 0xff 0xf0 0x08 0x3e 0xe2 -+ -+#CHECK: wfsqdb %f0, %f0 -+0xe7 0x00 0x00 0x08 0x30 0xce -+ -+#CHECK: wfsqdb %v19, %f14 -+0xe7 0x3e 0x00 0x08 0x38 0xce -+ -+#CHECK: wfsqdb %v31, %v31 -+0xe7 0xff 0x00 0x08 0x3c 0xce -+ -+#CHECK: wftcidb %f0, %f0, 0 -+0xe7 0x00 0x00 0x08 0x30 0x4a -+ -+#CHECK: wftcidb %v19, %f4, 1383 -+0xe7 0x34 0x56 0x78 0x38 0x4a -+ -+#CHECK: wftcidb %v31, %v31, 4095 -+0xe7 0xff 0xff 0xf8 0x3c 0x4a -+ -+#CHECK: wldeb %f0, %f0 -+0xe7 0x00 0x00 0x08 0x20 0xc4 -+ -+#CHECK: wldeb %v19, %f14 -+0xe7 0x3e 0x00 0x08 0x28 0xc4 -+ -+#CHECK: wldeb %v31, %v31 -+0xe7 0xff 0x00 0x08 0x2c 0xc4 -+ -+#CHECK: wledb %f0, %f0, 0, 0 -+0xe7 0x00 0x00 0x08 0x30 0xc5 -+ -+#CHECK: wledb %v19, %f14, 4, 10 -+0xe7 0x3e 0x00 0xac 0x38 0xc5 -+ -+#CHECK: wledb %v31, %v31, 7, 15 -+0xe7 0xff 0x00 0xff 0x3c 0xc5 -Index: llvm-36/test/MC/Disassembler/SystemZ/insns.txt -=================================================================== ---- llvm-36.orig/test/MC/Disassembler/SystemZ/insns.txt -+++ llvm-36/test/MC/Disassembler/SystemZ/insns.txt -@@ -2503,6 +2503,15 @@ - # CHECK: ear %r15, %a15 - 0xb2 0x4f 0x00 0xff - -+# CHECK: etnd %r0 -+0xb2 0xec 0x00 0x00 -+ -+# CHECK: etnd %r15 -+0xb2 0xec 0x00 0xf0 -+ -+# CHECK: etnd %r7 -+0xb2 0xec 0x00 0x70 -+ - # CHECK: fidbr %f0, 0, %f0 - 0xb3 0x5f 0x00 0x00 - -@@ -6034,6 +6043,36 @@ - # CHECK: ny %r15, 0 - 0xe3 0xf0 0x00 0x00 0x00 0x54 - -+# CHECK: ntstg %r0, -524288 -+0xe3 0x00 0x00 0x00 0x80 0x25 -+ -+# CHECK: ntstg %r0, -1 -+0xe3 0x00 0x0f 0xff 0xff 0x25 -+ -+# CHECK: ntstg %r0, 0 -+0xe3 0x00 0x00 0x00 0x00 0x25 -+ -+# CHECK: ntstg %r0, 1 -+0xe3 0x00 0x00 0x01 0x00 0x25 -+ -+# CHECK: ntstg %r0, 524287 -+0xe3 0x00 0x0f 0xff 0x7f 0x25 -+ -+# CHECK: ntstg %r0, 0(%r1) -+0xe3 0x00 0x10 0x00 0x00 0x25 -+ -+# CHECK: ntstg %r0, 0(%r15) -+0xe3 0x00 0xf0 0x00 0x00 0x25 -+ -+# CHECK: ntstg %r0, 524287(%r1,%r15) -+0xe3 0x01 0xff 0xff 0x7f 0x25 -+ -+# CHECK: ntstg %r0, 524287(%r15,%r1) -+0xe3 0x0f 0x1f 0xff 0x7f 0x25 -+ -+# CHECK: ntstg %r15, 0 -+0xe3 0xf0 0x00 0x00 0x00 0x25 -+ - # CHECK: oc 0(1), 0 - 0xd6 0x00 0x00 0x00 0x00 0x00 - -@@ -6334,6 +6373,33 @@ - # CHECK: pfd 15, 0 - 0xe3 0xf0 0x00 0x00 0x00 0x36 - -+# CHECK: popcnt %r0, %r0 -+0xb9 0xe1 0x00 0x00 -+ -+# CHECK: popcnt %r0, %r15 -+0xb9 0xe1 0x00 0x0f -+ -+# CHECK: popcnt %r15, %r0 -+0xb9 0xe1 0x00 0xf0 -+ -+# CHECK: popcnt %r7, %r8 -+0xb9 0xe1 0x00 0x78 -+ -+# CHECK: ppa %r0, %r0, 0 -+0xb2 0xe8 0x00 0x00 -+ -+# CHECK: ppa %r0, %r0, 15 -+0xb2 0xe8 0xf0 0x00 -+ -+# CHECK: ppa %r0, %r15, 0 -+0xb2 0xe8 0x00 0x0f -+ -+# CHECK: ppa %r4, %r6, 7 -+0xb2 0xe8 0x70 0x46 -+ -+# CHECK: ppa %r15, %r0, 0 -+0xb2 0xe8 0x00 0xf0 -+ - # CHECK: risbg %r0, %r0, 0, 0, 0 - 0xec 0x00 0x00 0x00 0x00 0x55 - -@@ -6355,6 +6421,27 @@ - # CHECK: risbg %r4, %r5, 6, 7, 8 - 0xec 0x45 0x06 0x07 0x08 0x55 - -+# CHECK: risbgn %r0, %r0, 0, 0, 0 -+0xec 0x00 0x00 0x00 0x00 0x59 -+ -+# CHECK: risbgn %r0, %r0, 0, 0, 63 -+0xec 0x00 0x00 0x00 0x3f 0x59 -+ -+# CHECK: risbgn %r0, %r0, 0, 255, 0 -+0xec 0x00 0x00 0xff 0x00 0x59 -+ -+# CHECK: risbgn %r0, %r0, 255, 0, 0 -+0xec 0x00 0xff 0x00 0x00 0x59 -+ -+# CHECK: risbgn %r0, %r15, 0, 0, 0 -+0xec 0x0f 0x00 0x00 0x00 0x59 -+ -+# CHECK: risbgn %r15, %r0, 0, 0, 0 -+0xec 0xf0 0x00 0x00 0x00 0x59 -+ -+# CHECK: risbgn %r4, %r5, 6, 7, 8 -+0xec 0x45 0x06 0x07 0x08 0x59 -+ - # CHECK: risbhg %r0, %r0, 0, 0, 0 - 0xec 0x00 0x00 0x00 0x00 0x5d - -@@ -8029,6 +8116,93 @@ - # CHECK: sy %r15, 0 - 0xe3 0xf0 0x00 0x00 0x00 0x5b - -+# CHECK: tabort 0 -+0xb2 0xfc 0x00 0x00 -+ -+# CHECK: tabort 0(%r1) -+0xb2 0xfc 0x10 0x00 -+ -+# CHECK: tabort 0(%r15) -+0xb2 0xfc 0xf0 0x00 -+ -+# CHECK: tabort 4095 -+0xb2 0xfc 0x0f 0xff -+ -+# CHECK: tabort 4095(%r1) -+0xb2 0xfc 0x1f 0xff -+ -+# CHECK: tabort 4095(%r15) -+0xb2 0xfc 0xff 0xff -+ -+# CHECK: tbegin 0, 0 -+0xe5 0x60 0x00 0x00 0x00 0x00 -+ -+# CHECK: tbegin 4095, 0 -+0xe5 0x60 0x0f 0xff 0x00 0x00 -+ -+# CHECK: tbegin 0, 0 -+0xe5 0x60 0x00 0x00 0x00 0x00 -+ -+# CHECK: tbegin 0, 1 -+0xe5 0x60 0x00 0x00 0x00 0x01 -+ -+# CHECK: tbegin 0, 32767 -+0xe5 0x60 0x00 0x00 0x7f 0xff -+ -+# CHECK: tbegin 0, 32768 -+0xe5 0x60 0x00 0x00 0x80 0x00 -+ -+# CHECK: tbegin 0, 65535 -+0xe5 0x60 0x00 0x00 0xff 0xff -+ -+# CHECK: tbegin 0(%r1), 42 -+0xe5 0x60 0x10 0x00 0x00 0x2a -+ -+# CHECK: tbegin 0(%r15), 42 -+0xe5 0x60 0xf0 0x00 0x00 0x2a -+ -+# CHECK: tbegin 4095(%r1), 42 -+0xe5 0x60 0x1f 0xff 0x00 0x2a -+ -+# CHECK: tbegin 4095(%r15), 42 -+0xe5 0x60 0xff 0xff 0x00 0x2a -+ -+# CHECK: tbeginc 0, 0 -+0xe5 0x61 0x00 0x00 0x00 0x00 -+ -+# CHECK: tbeginc 4095, 0 -+0xe5 0x61 0x0f 0xff 0x00 0x00 -+ -+# CHECK: tbeginc 0, 0 -+0xe5 0x61 0x00 0x00 0x00 0x00 -+ -+# CHECK: tbeginc 0, 1 -+0xe5 0x61 0x00 0x00 0x00 0x01 -+ -+# CHECK: tbeginc 0, 32767 -+0xe5 0x61 0x00 0x00 0x7f 0xff -+ -+# CHECK: tbeginc 0, 32768 -+0xe5 0x61 0x00 0x00 0x80 0x00 -+ -+# CHECK: tbeginc 0, 65535 -+0xe5 0x61 0x00 0x00 0xff 0xff -+ -+# CHECK: tbeginc 0(%r1), 42 -+0xe5 0x61 0x10 0x00 0x00 0x2a -+ -+# CHECK: tbeginc 0(%r15), 42 -+0xe5 0x61 0xf0 0x00 0x00 0x2a -+ -+# CHECK: tbeginc 4095(%r1), 42 -+0xe5 0x61 0x1f 0xff 0x00 0x2a -+ -+# CHECK: tbeginc 4095(%r15), 42 -+0xe5 0x61 0xff 0xff 0x00 0x2a -+ -+# CHECK: tend -+0xb2 0xf8 0x00 0x00 -+ - # CHECK: tm 0, 0 - 0x91 0x00 0x00 0x00 - -Index: llvm-36/test/MC/SystemZ/fixups.s -=================================================================== ---- /dev/null -+++ llvm-36/test/MC/SystemZ/fixups.s -@@ -0,0 +1,119 @@ -+ -+# RUN: llvm-mc -triple s390x-unknown-unknown --show-encoding %s | FileCheck %s -+ -+# RUN: llvm-mc -triple s390x-unknown-unknown -filetype=obj %s | \ -+# RUN: llvm-readobj -r | FileCheck %s -check-prefix=CHECK-REL -+ -+# CHECK: larl %r14, target # encoding: [0xc0,0xe0,A,A,A,A] -+# CHECK-NEXT: # fixup A - offset: 2, value: target+2, kind: FK_390_PC32DBL -+# CHECK-REL: 0x{{[0-9A-F]*2}} R_390_PC32DBL target 0x2 -+ .align 16 -+ larl %r14, target -+ -+# CHECK: larl %r14, target@GOT # encoding: [0xc0,0xe0,A,A,A,A] -+# CHECK-NEXT: # fixup A - offset: 2, value: target@GOT+2, kind: FK_390_PC32DBL -+# CHECK-REL: 0x{{[0-9A-F]*2}} R_390_GOTENT target 0x2 -+ .align 16 -+ larl %r14, target@got -+ -+# CHECK: larl %r14, target@INDNTPOFF # encoding: [0xc0,0xe0,A,A,A,A] -+# CHECK-NEXT: # fixup A - offset: 2, value: target@INDNTPOFF+2, kind: FK_390_PC32DBL -+# CHECK-REL: 0x{{[0-9A-F]*2}} R_390_TLS_IEENT target 0x2 -+ .align 16 -+ larl %r14, target@indntpoff -+ -+# CHECK: brasl %r14, target # encoding: [0xc0,0xe5,A,A,A,A] -+# CHECK-NEXT: # fixup A - offset: 2, value: target+2, kind: FK_390_PC32DBL -+# CHECK-REL: 0x{{[0-9A-F]*2}} R_390_PC32DBL target 0x2 -+ .align 16 -+ brasl %r14, target -+ -+# CHECK: brasl %r14, target@PLT # encoding: [0xc0,0xe5,A,A,A,A] -+# CHECK-NEXT: # fixup A - offset: 2, value: target@PLT+2, kind: FK_390_PC32DBL -+# CHECK-REL: 0x{{[0-9A-F]*2}} R_390_PLT32DBL target 0x2 -+ .align 16 -+ brasl %r14, target@plt -+ -+# CHECK: brasl %r14, target@PLT:tls_gdcall:sym # encoding: [0xc0,0xe5,A,A,A,A] -+# CHECK-NEXT: # fixup A - offset: 2, value: target@PLT+2, kind: FK_390_PC32DBL -+# CHECK-NEXT: # fixup B - offset: 0, value: sym@TLSGD, kind: FK_390_TLS_CALL -+# CHECK-REL: 0x{{[0-9A-F]*0}} R_390_TLS_GDCALL sym 0x0 -+# CHECK-REL: 0x{{[0-9A-F]*2}} R_390_PLT32DBL target 0x2 -+ .align 16 -+ brasl %r14, target@plt:tls_gdcall:sym -+ -+# CHECK: brasl %r14, target@PLT:tls_ldcall:sym # encoding: [0xc0,0xe5,A,A,A,A] -+# CHECK-NEXT: # fixup A - offset: 2, value: target@PLT+2, kind: FK_390_PC32DBL -+# CHECK-NEXT: # fixup B - offset: 0, value: sym@TLSLDM, kind: FK_390_TLS_CALL -+# CHECK-REL: 0x{{[0-9A-F]*0}} R_390_TLS_LDCALL sym 0x0 -+# CHECK-REL: 0x{{[0-9A-F]*2}} R_390_PLT32DBL target 0x2 -+ .align 16 -+ brasl %r14, target@plt:tls_ldcall:sym -+ -+# CHECK: bras %r14, target # encoding: [0xa7,0xe5,A,A] -+# CHECK-NEXT: # fixup A - offset: 2, value: target+2, kind: FK_390_PC16DBL -+# CHECK-REL: 0x{{[0-9A-F]*2}} R_390_PC16DBL target 0x2 -+ .align 16 -+ bras %r14, target -+ -+# CHECK: bras %r14, target@PLT # encoding: [0xa7,0xe5,A,A] -+# CHECK-NEXT: # fixup A - offset: 2, value: target@PLT+2, kind: FK_390_PC16DBL -+# CHECK-REL: 0x{{[0-9A-F]*2}} R_390_PLT16DBL target 0x2 -+ .align 16 -+ bras %r14, target@plt -+ -+# CHECK: bras %r14, target@PLT:tls_gdcall:sym # encoding: [0xa7,0xe5,A,A] -+# CHECK-NEXT: # fixup A - offset: 2, value: target@PLT+2, kind: FK_390_PC16DBL -+# CHECK-NEXT: # fixup B - offset: 0, value: sym@TLSGD, kind: FK_390_TLS_CALL -+# CHECK-REL: 0x{{[0-9A-F]*0}} R_390_TLS_GDCALL sym 0x0 -+# CHECK-REL: 0x{{[0-9A-F]*2}} R_390_PLT16DBL target 0x2 -+ .align 16 -+ bras %r14, target@plt:tls_gdcall:sym -+ -+# CHECK: bras %r14, target@PLT:tls_ldcall:sym # encoding: [0xa7,0xe5,A,A] -+# CHECK-NEXT: # fixup A - offset: 2, value: target@PLT+2, kind: FK_390_PC16DBL -+# CHECK-NEXT: # fixup B - offset: 0, value: sym@TLSLDM, kind: FK_390_TLS_CALL -+# CHECK-REL: 0x{{[0-9A-F]*0}} R_390_TLS_LDCALL sym 0x0 -+# CHECK-REL: 0x{{[0-9A-F]*2}} R_390_PLT16DBL target 0x2 -+ .align 16 -+ bras %r14, target@plt:tls_ldcall:sym -+ -+ -+# Data relocs -+# llvm-mc does not show any "encoding" string for data, so we just check the relocs -+ -+# CHECK-REL: .rela.data -+ .data -+ -+# CHECK-REL: 0x{{[0-9A-F]*0}} R_390_TLS_LE64 target 0x0 -+ .align 16 -+ .quad target@ntpoff -+ -+# CHECK-REL: 0x{{[0-9A-F]*0}} R_390_TLS_LDO64 target 0x0 -+ .align 16 -+ .quad target@dtpoff -+ -+# CHECK-REL: 0x{{[0-9A-F]*0}} R_390_TLS_LDM64 target 0x0 -+ .align 16 -+ .quad target@tlsldm -+ -+# CHECK-REL: 0x{{[0-9A-F]*0}} R_390_TLS_GD64 target 0x0 -+ .align 16 -+ .quad target@tlsgd -+ -+# CHECK-REL: 0x{{[0-9A-F]*0}} R_390_TLS_LE32 target 0x0 -+ .align 16 -+ .long target@ntpoff -+ -+# CHECK-REL: 0x{{[0-9A-F]*0}} R_390_TLS_LDO32 target 0x0 -+ .align 16 -+ .long target@dtpoff -+ -+# CHECK-REL: 0x{{[0-9A-F]*0}} R_390_TLS_LDM32 target 0x0 -+ .align 16 -+ .long target@tlsldm -+ -+# CHECK-REL: 0x{{[0-9A-F]*0}} R_390_TLS_GD32 target 0x0 -+ .align 16 -+ .long target@tlsgd -+ -Index: llvm-36/test/MC/SystemZ/insn-bad-z13.s -=================================================================== ---- /dev/null -+++ llvm-36/test/MC/SystemZ/insn-bad-z13.s -@@ -0,0 +1,1201 @@ -+# For z13 only. -+# RUN: not llvm-mc -triple s390x-linux-gnu -mcpu=z13 < %s 2> %t -+# RUN: FileCheck < %t %s -+ -+#CHECK: error: invalid operand -+#CHECK: lcbb %r0, 0, -1 -+#CHECK: error: invalid operand -+#CHECK: lcbb %r0, 0, 16 -+#CHECK: error: invalid operand -+#CHECK: lcbb %r0, -1, 0 -+#CHECK: error: invalid operand -+#CHECK: lcbb %r0, 4096, 0 -+#CHECK: error: invalid use of vector addressing -+#CHECK: lcbb %r0, 0(%v1,%r2), 0 -+ -+ lcbb %r0, 0, -1 -+ lcbb %r0, 0, 16 -+ lcbb %r0, -1, 0 -+ lcbb %r0, 4096, 0 -+ lcbb %r0, 0(%v1,%r2), 0 -+ -+#CHECK: error: invalid operand -+#CHECK: vcdgb %v0, %v0, 0, -1 -+#CHECK: error: invalid operand -+#CHECK: vcdgb %v0, %v0, 0, 16 -+#CHECK: error: invalid operand -+#CHECK: vcdgb %v0, %v0, -1, 0 -+#CHECK: error: invalid operand -+#CHECK: vcdgb %v0, %v0, 16, 0 -+ -+ vcdgb %v0, %v0, 0, -1 -+ vcdgb %v0, %v0, 0, 16 -+ vcdgb %v0, %v0, -1, 0 -+ vcdgb %v0, %v0, 16, 0 -+ -+#CHECK: error: invalid operand -+#CHECK: vcdlgb %v0, %v0, 0, -1 -+#CHECK: error: invalid operand -+#CHECK: vcdlgb %v0, %v0, 0, 16 -+#CHECK: error: invalid operand -+#CHECK: vcdlgb %v0, %v0, -1, 0 -+#CHECK: error: invalid operand -+#CHECK: vcdlgb %v0, %v0, 16, 0 -+ -+ vcdlgb %v0, %v0, 0, -1 -+ vcdlgb %v0, %v0, 0, 16 -+ vcdlgb %v0, %v0, -1, 0 -+ vcdlgb %v0, %v0, 16, 0 -+ -+#CHECK: error: invalid operand -+#CHECK: vcgdb %v0, %v0, 0, -1 -+#CHECK: error: invalid operand -+#CHECK: vcgdb %v0, %v0, 0, 16 -+#CHECK: error: invalid operand -+#CHECK: vcgdb %v0, %v0, -1, 0 -+#CHECK: error: invalid operand -+#CHECK: vcgdb %v0, %v0, 16, 0 -+ -+ vcgdb %v0, %v0, 0, -1 -+ vcgdb %v0, %v0, 0, 16 -+ vcgdb %v0, %v0, -1, 0 -+ vcgdb %v0, %v0, 16, 0 -+ -+#CHECK: error: invalid operand -+#CHECK: vclgdb %v0, %v0, 0, -1 -+#CHECK: error: invalid operand -+#CHECK: vclgdb %v0, %v0, 0, 16 -+#CHECK: error: invalid operand -+#CHECK: vclgdb %v0, %v0, -1, 0 -+#CHECK: error: invalid operand -+#CHECK: vclgdb %v0, %v0, 16, 0 -+ -+ vclgdb %v0, %v0, 0, -1 -+ vclgdb %v0, %v0, 0, 16 -+ vclgdb %v0, %v0, -1, 0 -+ vclgdb %v0, %v0, 16, 0 -+ -+#CHECK: error: invalid operand -+#CHECK: verimb %v0, %v0, %v0, -1 -+#CHECK: error: invalid operand -+#CHECK: verimb %v0, %v0, %v0, 256 -+ -+ verimb %v0, %v0, %v0, -1 -+ verimb %v0, %v0, %v0, 256 -+ -+#CHECK: error: invalid operand -+#CHECK: verimf %v0, %v0, %v0, -1 -+#CHECK: error: invalid operand -+#CHECK: verimf %v0, %v0, %v0, 256 -+ -+ verimf %v0, %v0, %v0, -1 -+ verimf %v0, %v0, %v0, 256 -+ -+#CHECK: error: invalid operand -+#CHECK: verimg %v0, %v0, %v0, -1 -+#CHECK: error: invalid operand -+#CHECK: verimg %v0, %v0, %v0, 256 -+ -+ verimg %v0, %v0, %v0, -1 -+ verimg %v0, %v0, %v0, 256 -+ -+#CHECK: error: invalid operand -+#CHECK: verimh %v0, %v0, %v0, -1 -+#CHECK: error: invalid operand -+#CHECK: verimh %v0, %v0, %v0, 256 -+ -+ verimh %v0, %v0, %v0, -1 -+ verimh %v0, %v0, %v0, 256 -+ -+#CHECK: error: invalid operand -+#CHECK: verllb %v0, %v0, -1 -+#CHECK: error: invalid operand -+#CHECK: verllb %v0, %v0, 4096 -+ -+ verllb %v0, %v0, -1 -+ verllb %v0, %v0, 4096 -+ -+#CHECK: error: invalid operand -+#CHECK: verllf %v0, %v0, -1 -+#CHECK: error: invalid operand -+#CHECK: verllf %v0, %v0, 4096 -+ -+ verllf %v0, %v0, -1 -+ verllf %v0, %v0, 4096 -+ -+#CHECK: error: invalid operand -+#CHECK: verllg %v0, %v0, -1 -+#CHECK: error: invalid operand -+#CHECK: verllg %v0, %v0, 4096 -+ -+ verllg %v0, %v0, -1 -+ verllg %v0, %v0, 4096 -+ -+#CHECK: error: invalid operand -+#CHECK: verllh %v0, %v0, -1 -+#CHECK: error: invalid operand -+#CHECK: verllh %v0, %v0, 4096 -+ -+ verllh %v0, %v0, -1 -+ verllh %v0, %v0, 4096 -+ -+#CHECK: error: invalid operand -+#CHECK: veslb %v0, %v0, -1 -+#CHECK: error: invalid operand -+#CHECK: veslb %v0, %v0, 4096 -+ -+ veslb %v0, %v0, -1 -+ veslb %v0, %v0, 4096 -+ -+#CHECK: error: invalid operand -+#CHECK: veslf %v0, %v0, -1 -+#CHECK: error: invalid operand -+#CHECK: veslf %v0, %v0, 4096 -+ -+ veslf %v0, %v0, -1 -+ veslf %v0, %v0, 4096 -+ -+#CHECK: error: invalid operand -+#CHECK: veslg %v0, %v0, -1 -+#CHECK: error: invalid operand -+#CHECK: veslg %v0, %v0, 4096 -+ -+ veslg %v0, %v0, -1 -+ veslg %v0, %v0, 4096 -+ -+#CHECK: error: invalid operand -+#CHECK: veslh %v0, %v0, -1 -+#CHECK: error: invalid operand -+#CHECK: veslh %v0, %v0, 4096 -+ -+ veslh %v0, %v0, -1 -+ veslh %v0, %v0, 4096 -+ -+#CHECK: error: invalid operand -+#CHECK: vesrab %v0, %v0, -1 -+#CHECK: error: invalid operand -+#CHECK: vesrab %v0, %v0, 4096 -+ -+ vesrab %v0, %v0, -1 -+ vesrab %v0, %v0, 4096 -+ -+#CHECK: error: invalid operand -+#CHECK: vesraf %v0, %v0, -1 -+#CHECK: error: invalid operand -+#CHECK: vesraf %v0, %v0, 4096 -+ -+ vesraf %v0, %v0, -1 -+ vesraf %v0, %v0, 4096 -+ -+#CHECK: error: invalid operand -+#CHECK: vesrag %v0, %v0, -1 -+#CHECK: error: invalid operand -+#CHECK: vesrag %v0, %v0, 4096 -+ -+ vesrag %v0, %v0, -1 -+ vesrag %v0, %v0, 4096 -+ -+#CHECK: error: invalid operand -+#CHECK: vesrah %v0, %v0, -1 -+#CHECK: error: invalid operand -+#CHECK: vesrah %v0, %v0, 4096 -+ -+ vesrah %v0, %v0, -1 -+ vesrah %v0, %v0, 4096 -+ -+#CHECK: error: invalid operand -+#CHECK: vesrlb %v0, %v0, -1 -+#CHECK: error: invalid operand -+#CHECK: vesrlb %v0, %v0, 4096 -+ -+ vesrlb %v0, %v0, -1 -+ vesrlb %v0, %v0, 4096 -+ -+#CHECK: error: invalid operand -+#CHECK: vesrlf %v0, %v0, -1 -+#CHECK: error: invalid operand -+#CHECK: vesrlf %v0, %v0, 4096 -+ -+ vesrlf %v0, %v0, -1 -+ vesrlf %v0, %v0, 4096 -+ -+#CHECK: error: invalid operand -+#CHECK: vesrlg %v0, %v0, -1 -+#CHECK: error: invalid operand -+#CHECK: vesrlg %v0, %v0, 4096 -+ -+ vesrlg %v0, %v0, -1 -+ vesrlg %v0, %v0, 4096 -+ -+#CHECK: error: invalid operand -+#CHECK: vesrlh %v0, %v0, -1 -+#CHECK: error: invalid operand -+#CHECK: vesrlh %v0, %v0, 4096 -+ -+ vesrlh %v0, %v0, -1 -+ vesrlh %v0, %v0, 4096 -+ -+#CHECK: error: invalid operand -+#CHECK: vfaeb %v0, %v0, %v0, -1 -+#CHECK: error: invalid operand -+#CHECK: vfaeb %v0, %v0, %v0, 16 -+#CHECK: error: too few operands -+#CHECK: vfaeb %v0, %v0 -+#CHECK: error: invalid operand -+#CHECK: vfaeb %v0, %v0, %v0, 0, 0 -+ -+ vfaeb %v0, %v0, %v0, -1 -+ vfaeb %v0, %v0, %v0, 16 -+ vfaeb %v0, %v0 -+ vfaeb %v0, %v0, %v0, 0, 0 -+ -+#CHECK: error: invalid operand -+#CHECK: vfaebs %v0, %v0, %v0, -1 -+#CHECK: error: invalid operand -+#CHECK: vfaebs %v0, %v0, %v0, 16 -+#CHECK: error: too few operands -+#CHECK: vfaebs %v0, %v0 -+#CHECK: error: invalid operand -+#CHECK: vfaebs %v0, %v0, %v0, 0, 0 -+ -+ vfaebs %v0, %v0, %v0, -1 -+ vfaebs %v0, %v0, %v0, 16 -+ vfaebs %v0, %v0 -+ vfaebs %v0, %v0, %v0, 0, 0 -+ -+#CHECK: error: invalid operand -+#CHECK: vfaef %v0, %v0, %v0, -1 -+#CHECK: error: invalid operand -+#CHECK: vfaef %v0, %v0, %v0, 16 -+#CHECK: error: too few operands -+#CHECK: vfaef %v0, %v0 -+#CHECK: error: invalid operand -+#CHECK: vfaef %v0, %v0, %v0, 0, 0 -+ -+ vfaef %v0, %v0, %v0, -1 -+ vfaef %v0, %v0, %v0, 16 -+ vfaef %v0, %v0 -+ vfaef %v0, %v0, %v0, 0, 0 -+ -+#CHECK: error: invalid operand -+#CHECK: vfaeh %v0, %v0, %v0, -1 -+#CHECK: error: invalid operand -+#CHECK: vfaeh %v0, %v0, %v0, 16 -+#CHECK: error: too few operands -+#CHECK: vfaeh %v0, %v0 -+#CHECK: error: invalid operand -+#CHECK: vfaeh %v0, %v0, %v0, 0, 0 -+ -+ vfaeh %v0, %v0, %v0, -1 -+ vfaeh %v0, %v0, %v0, 16 -+ vfaeh %v0, %v0 -+ vfaeh %v0, %v0, %v0, 0, 0 -+ -+#CHECK: error: invalid operand -+#CHECK: vfaezh %v0, %v0, %v0, -1 -+#CHECK: error: invalid operand -+#CHECK: vfaezh %v0, %v0, %v0, 16 -+#CHECK: error: too few operands -+#CHECK: vfaezh %v0, %v0 -+#CHECK: error: invalid operand -+#CHECK: vfaezh %v0, %v0, %v0, 0, 0 -+ -+ vfaezh %v0, %v0, %v0, -1 -+ vfaezh %v0, %v0, %v0, 16 -+ vfaezh %v0, %v0 -+ vfaezh %v0, %v0, %v0, 0, 0 -+ -+#CHECK: error: invalid operand -+#CHECK: vfaezfs %v0, %v0, %v0, -1 -+#CHECK: error: invalid operand -+#CHECK: vfaezfs %v0, %v0, %v0, 16 -+#CHECK: error: too few operands -+#CHECK: vfaezfs %v0, %v0 -+#CHECK: error: invalid operand -+#CHECK: vfaezfs %v0, %v0, %v0, 0, 0 -+ -+ vfaezfs %v0, %v0, %v0, -1 -+ vfaezfs %v0, %v0, %v0, 16 -+ vfaezfs %v0, %v0 -+ vfaezfs %v0, %v0, %v0, 0, 0 -+ -+#CHECK: error: invalid operand -+#CHECK: vfidb %v0, %v0, 0, -1 -+#CHECK: error: invalid operand -+#CHECK: vfidb %v0, %v0, 0, 16 -+#CHECK: error: invalid operand -+#CHECK: vfidb %v0, %v0, -1, 0 -+#CHECK: error: invalid operand -+#CHECK: vfidb %v0, %v0, 16, 0 -+ -+ vfidb %v0, %v0, 0, -1 -+ vfidb %v0, %v0, 0, 16 -+ vfidb %v0, %v0, -1, 0 -+ vfidb %v0, %v0, 16, 0 -+ -+#CHECK: error: invalid operand -+#CHECK: vftcidb %v0, %v0, -1 -+#CHECK: error: invalid operand -+#CHECK: vftcidb %v0, %v0, 4096 -+ -+ vftcidb %v0, %v0, -1 -+ vftcidb %v0, %v0, 4096 -+ -+#CHECK: error: invalid operand -+#CHECK: vgbm %v0, -1 -+#CHECK: error: invalid operand -+#CHECK: vgbm %v0, 0x10000 -+ -+ vgbm %v0, -1 -+ vgbm %v0, 0x10000 -+ -+#CHECK: error: vector index required -+#CHECK: vgef %v0, 0(%r1), 0 -+#CHECK: error: vector index required -+#CHECK: vgef %v0, 0(%r2,%r1), 0 -+#CHECK: error: invalid operand -+#CHECK: vgef %v0, 0(%v0,%r1), -1 -+#CHECK: error: invalid operand -+#CHECK: vgef %v0, 0(%v0,%r1), 4 -+#CHECK: error: invalid operand -+#CHECK: vgef %v0, -1(%v0,%r1), 0 -+#CHECK: error: invalid operand -+#CHECK: vgef %v0, 4096(%v0,%r1), 0 -+ -+ vgef %v0, 0(%r1), 0 -+ vgef %v0, 0(%r2,%r1), 0 -+ vgef %v0, 0(%v0,%r1), -1 -+ vgef %v0, 0(%v0,%r1), 4 -+ vgef %v0, -1(%v0,%r1), 0 -+ vgef %v0, 4096(%v0,%r1), 0 -+ -+#CHECK: error: vector index required -+#CHECK: vgeg %v0, 0(%r1), 0 -+#CHECK: error: vector index required -+#CHECK: vgeg %v0, 0(%r2,%r1), 0 -+#CHECK: error: invalid operand -+#CHECK: vgeg %v0, 0(%v0,%r1), -1 -+#CHECK: error: invalid operand -+#CHECK: vgeg %v0, 0(%v0,%r1), 2 -+#CHECK: error: invalid operand -+#CHECK: vgeg %v0, -1(%v0,%r1), 0 -+#CHECK: error: invalid operand -+#CHECK: vgeg %v0, 4096(%v0,%r1), 0 -+ -+ vgeg %v0, 0(%r1), 0 -+ vgeg %v0, 0(%r2,%r1), 0 -+ vgeg %v0, 0(%v0,%r1), -1 -+ vgeg %v0, 0(%v0,%r1), 2 -+ vgeg %v0, -1(%v0,%r1), 0 -+ vgeg %v0, 4096(%v0,%r1), 0 -+ -+#CHECK: error: invalid operand -+#CHECK: vgmb %v0, 0, -1 -+#CHECK: error: invalid operand -+#CHECK: vgmb %v0, 0, -1 -+#CHECK: error: invalid operand -+#CHECK: vgmb %v0, -1, 0 -+#CHECK: error: invalid operand -+#CHECK: vgmb %v0, 256, 0 -+ -+ vgmb %v0, 0, -1 -+ vgmb %v0, 0, -1 -+ vgmb %v0, -1, 0 -+ vgmb %v0, 256, 0 -+ -+#CHECK: error: invalid operand -+#CHECK: vgmf %v0, 0, -1 -+#CHECK: error: invalid operand -+#CHECK: vgmf %v0, 0, -1 -+#CHECK: error: invalid operand -+#CHECK: vgmf %v0, -1, 0 -+#CHECK: error: invalid operand -+#CHECK: vgmf %v0, 256, 0 -+ -+ vgmf %v0, 0, -1 -+ vgmf %v0, 0, -1 -+ vgmf %v0, -1, 0 -+ vgmf %v0, 256, 0 -+ -+#CHECK: error: invalid operand -+#CHECK: vgmg %v0, 0, -1 -+#CHECK: error: invalid operand -+#CHECK: vgmg %v0, 0, -1 -+#CHECK: error: invalid operand -+#CHECK: vgmg %v0, -1, 0 -+#CHECK: error: invalid operand -+#CHECK: vgmg %v0, 256, 0 -+ -+ vgmg %v0, 0, -1 -+ vgmg %v0, 0, -1 -+ vgmg %v0, -1, 0 -+ vgmg %v0, 256, 0 -+ -+#CHECK: error: invalid operand -+#CHECK: vgmh %v0, 0, -1 -+#CHECK: error: invalid operand -+#CHECK: vgmh %v0, 0, -1 -+#CHECK: error: invalid operand -+#CHECK: vgmh %v0, -1, 0 -+#CHECK: error: invalid operand -+#CHECK: vgmh %v0, 256, 0 -+ -+ vgmh %v0, 0, -1 -+ vgmh %v0, 0, -1 -+ vgmh %v0, -1, 0 -+ vgmh %v0, 256, 0 -+ -+#CHECK: error: invalid operand -+#CHECK: vl %v0, -1 -+#CHECK: error: invalid operand -+#CHECK: vl %v0, 4096 -+#CHECK: error: invalid use of vector addressing -+#CHECK: vl %v0, 0(%v1,%r2) -+ -+ vl %v0, -1 -+ vl %v0, 4096 -+ vl %v0, 0(%v1,%r2) -+ -+#CHECK: error: invalid operand -+#CHECK: vlbb %v0, 0, -1 -+#CHECK: error: invalid operand -+#CHECK: vlbb %v0, 0, 16 -+#CHECK: error: invalid operand -+#CHECK: vlbb %v0, -1, 0 -+#CHECK: error: invalid operand -+#CHECK: vlbb %v0, 4096, 0 -+#CHECK: error: invalid use of vector addressing -+#CHECK: vlbb %v0, 0(%v1,%r2), 0 -+ -+ vlbb %v0, 0, -1 -+ vlbb %v0, 0, 16 -+ vlbb %v0, -1, 0 -+ vlbb %v0, 4096, 0 -+ vlbb %v0, 0(%v1,%r2), 0 -+ -+#CHECK: error: invalid operand -+#CHECK: vleb %v0, 0, -1 -+#CHECK: error: invalid operand -+#CHECK: vleb %v0, 0, 16 -+#CHECK: error: invalid operand -+#CHECK: vleb %v0, -1, 0 -+#CHECK: error: invalid operand -+#CHECK: vleb %v0, 4096, 0 -+#CHECK: error: invalid use of vector addressing -+#CHECK: vleb %v0, 0(%v1,%r2), 0 -+ -+ vleb %v0, 0, -1 -+ vleb %v0, 0, 16 -+ vleb %v0, -1, 0 -+ vleb %v0, 4096, 0 -+ vleb %v0, 0(%v1,%r2), 0 -+ -+#CHECK: error: invalid operand -+#CHECK: vledb %v0, %v0, 0, -1 -+#CHECK: error: invalid operand -+#CHECK: vledb %v0, %v0, 0, 16 -+#CHECK: error: invalid operand -+#CHECK: vledb %v0, %v0, -1, 0 -+#CHECK: error: invalid operand -+#CHECK: vledb %v0, %v0, 16, 0 -+ -+ vledb %v0, %v0, 0, -1 -+ vledb %v0, %v0, 0, 16 -+ vledb %v0, %v0, -1, 0 -+ vledb %v0, %v0, 16, 0 -+ -+#CHECK: error: invalid operand -+#CHECK: vlef %v0, 0, -1 -+#CHECK: error: invalid operand -+#CHECK: vlef %v0, 0, 4 -+#CHECK: error: invalid operand -+#CHECK: vlef %v0, -1, 0 -+#CHECK: error: invalid operand -+#CHECK: vlef %v0, 4096, 0 -+#CHECK: error: invalid use of vector addressing -+#CHECK: vlef %v0, 0(%v1,%r2), 0 -+ -+ vlef %v0, 0, -1 -+ vlef %v0, 0, 4 -+ vlef %v0, -1, 0 -+ vlef %v0, 4096, 0 -+ vlef %v0, 0(%v1,%r2), 0 -+ -+#CHECK: error: invalid operand -+#CHECK: vleg %v0, 0, -1 -+#CHECK: error: invalid operand -+#CHECK: vleg %v0, 0, 2 -+#CHECK: error: invalid operand -+#CHECK: vleg %v0, -1, 0 -+#CHECK: error: invalid operand -+#CHECK: vleg %v0, 4096, 0 -+#CHECK: error: invalid use of vector addressing -+#CHECK: vleg %v0, 0(%v1,%r2), 0 -+ -+ vleg %v0, 0, -1 -+ vleg %v0, 0, 2 -+ vleg %v0, -1, 0 -+ vleg %v0, 4096, 0 -+ vleg %v0, 0(%v1,%r2), 0 -+ -+#CHECK: error: invalid operand -+#CHECK: vleh %v0, 0, -1 -+#CHECK: error: invalid operand -+#CHECK: vleh %v0, 0, 8 -+#CHECK: error: invalid operand -+#CHECK: vleh %v0, -1, 0 -+#CHECK: error: invalid operand -+#CHECK: vleh %v0, 4096, 0 -+#CHECK: error: invalid use of vector addressing -+#CHECK: vleh %v0, 0(%v1,%r2), 0 -+ -+ vleh %v0, 0, -1 -+ vleh %v0, 0, 8 -+ vleh %v0, -1, 0 -+ vleh %v0, 4096, 0 -+ vleh %v0, 0(%v1,%r2), 0 -+ -+#CHECK: error: invalid operand -+#CHECK: vleib %v0, 0, -1 -+#CHECK: error: invalid operand -+#CHECK: vleib %v0, 0, 16 -+#CHECK: error: invalid operand -+#CHECK: vleib %v0, -32769, 0 -+#CHECK: error: invalid operand -+#CHECK: vleib %v0, 32768, 0 -+ -+ vleib %v0, 0, -1 -+ vleib %v0, 0, 16 -+ vleib %v0, -32769, 0 -+ vleib %v0, 32768, 0 -+ -+#CHECK: error: invalid operand -+#CHECK: vleif %v0, 0, -1 -+#CHECK: error: invalid operand -+#CHECK: vleif %v0, 0, 4 -+#CHECK: error: invalid operand -+#CHECK: vleif %v0, -32769, 0 -+#CHECK: error: invalid operand -+#CHECK: vleif %v0, 32768, 0 -+ -+ vleif %v0, 0, -1 -+ vleif %v0, 0, 4 -+ vleif %v0, -32769, 0 -+ vleif %v0, 32768, 0 -+ -+#CHECK: error: invalid operand -+#CHECK: vleig %v0, 0, -1 -+#CHECK: error: invalid operand -+#CHECK: vleig %v0, 0, 2 -+#CHECK: error: invalid operand -+#CHECK: vleig %v0, -32769, 0 -+#CHECK: error: invalid operand -+#CHECK: vleig %v0, 32768, 0 -+ -+ vleig %v0, 0, -1 -+ vleig %v0, 0, 2 -+ vleig %v0, -32769, 0 -+ vleig %v0, 32768, 0 -+ -+#CHECK: error: invalid operand -+#CHECK: vleih %v0, 0, -1 -+#CHECK: error: invalid operand -+#CHECK: vleih %v0, 0, 8 -+#CHECK: error: invalid operand -+#CHECK: vleih %v0, -32769, 0 -+#CHECK: error: invalid operand -+#CHECK: vleih %v0, 32768, 0 -+ -+ vleih %v0, 0, -1 -+ vleih %v0, 0, 8 -+ vleih %v0, -32769, 0 -+ vleih %v0, 32768, 0 -+ -+#CHECK: error: invalid operand -+#CHECK: vlgvb %r0, %v0, -1 -+#CHECK: error: invalid operand -+#CHECK: vlgvb %r0, %v0, 4096 -+#CHECK: error: %r0 used in an address -+#CHECK: vlgvb %r0, %v0, 0(%r0) -+ -+ vlgvb %r0, %v0, -1 -+ vlgvb %r0, %v0, 4096 -+ vlgvb %r0, %v0, 0(%r0) -+ -+#CHECK: error: invalid operand -+#CHECK: vlgvf %r0, %v0, -1 -+#CHECK: error: invalid operand -+#CHECK: vlgvf %r0, %v0, 4096 -+#CHECK: error: %r0 used in an address -+#CHECK: vlgvf %r0, %v0, 0(%r0) -+ -+ vlgvf %r0, %v0, -1 -+ vlgvf %r0, %v0, 4096 -+ vlgvf %r0, %v0, 0(%r0) -+ -+#CHECK: error: invalid operand -+#CHECK: vlgvg %r0, %v0, -1 -+#CHECK: error: invalid operand -+#CHECK: vlgvg %r0, %v0, 4096 -+#CHECK: error: %r0 used in an address -+#CHECK: vlgvg %r0, %v0, 0(%r0) -+ -+ vlgvg %r0, %v0, -1 -+ vlgvg %r0, %v0, 4096 -+ vlgvg %r0, %v0, 0(%r0) -+ -+#CHECK: error: invalid operand -+#CHECK: vlgvh %r0, %v0, -1 -+#CHECK: error: invalid operand -+#CHECK: vlgvh %r0, %v0, 4096 -+#CHECK: error: %r0 used in an address -+#CHECK: vlgvh %r0, %v0, 0(%r0) -+ -+ vlgvh %r0, %v0, -1 -+ vlgvh %r0, %v0, 4096 -+ vlgvh %r0, %v0, 0(%r0) -+ -+#CHECK: error: invalid operand -+#CHECK: vll %v0, %r0, -1 -+#CHECK: error: invalid operand -+#CHECK: vll %v0, %r0, 4096 -+#CHECK: error: %r0 used in an address -+#CHECK: vll %v0, %r0, 0(%r0) -+ -+ vll %v0, %r0, -1 -+ vll %v0, %r0, 4096 -+ vll %v0, %r0, 0(%r0) -+ -+#CHECK: error: invalid operand -+#CHECK: vllezb %v0, -1 -+#CHECK: error: invalid operand -+#CHECK: vllezb %v0, 4096 -+#CHECK: error: invalid use of vector addressing -+#CHECK: vllezb %v0, 0(%v1,%r2) -+ -+ vllezb %v0, -1 -+ vllezb %v0, 4096 -+ vllezb %v0, 0(%v1,%r2) -+ -+#CHECK: error: invalid operand -+#CHECK: vllezf %v0, -1 -+#CHECK: error: invalid operand -+#CHECK: vllezf %v0, 4096 -+#CHECK: error: invalid use of vector addressing -+#CHECK: vllezf %v0, 0(%v1,%r2) -+ -+ vllezf %v0, -1 -+ vllezf %v0, 4096 -+ vllezf %v0, 0(%v1,%r2) -+ -+#CHECK: error: invalid operand -+#CHECK: vllezg %v0, -1 -+#CHECK: error: invalid operand -+#CHECK: vllezg %v0, 4096 -+#CHECK: error: invalid use of vector addressing -+#CHECK: vllezg %v0, 0(%v1,%r2) -+ -+ vllezg %v0, -1 -+ vllezg %v0, 4096 -+ vllezg %v0, 0(%v1,%r2) -+ -+#CHECK: error: invalid operand -+#CHECK: vllezh %v0, -1 -+#CHECK: error: invalid operand -+#CHECK: vllezh %v0, 4096 -+#CHECK: error: invalid use of vector addressing -+#CHECK: vllezh %v0, 0(%v1,%r2) -+ -+ vllezh %v0, -1 -+ vllezh %v0, 4096 -+ vllezh %v0, 0(%v1,%r2) -+ -+#CHECK: error: invalid operand -+#CHECK: vlm %v0, %v0, -1 -+#CHECK: error: invalid operand -+#CHECK: vlm %v0, %v0, 4096 -+ -+ vlm %v0, %v0, -1 -+ vlm %v0, %v0, 4096 -+ -+#CHECK: error: invalid operand -+#CHECK: vlrepb %v0, -1 -+#CHECK: error: invalid operand -+#CHECK: vlrepb %v0, 4096 -+#CHECK: error: invalid use of vector addressing -+#CHECK: vlrepb %v0, 0(%v1,%r2) -+ -+ vlrepb %v0, -1 -+ vlrepb %v0, 4096 -+ vlrepb %v0, 0(%v1,%r2) -+ -+#CHECK: error: invalid operand -+#CHECK: vlrepf %v0, -1 -+#CHECK: error: invalid operand -+#CHECK: vlrepf %v0, 4096 -+#CHECK: error: invalid use of vector addressing -+#CHECK: vlrepf %v0, 0(%v1,%r2) -+ -+ vlrepf %v0, -1 -+ vlrepf %v0, 4096 -+ vlrepf %v0, 0(%v1,%r2) -+ -+#CHECK: error: invalid operand -+#CHECK: vlrepg %v0, -1 -+#CHECK: error: invalid operand -+#CHECK: vlrepg %v0, 4096 -+#CHECK: error: invalid use of vector addressing -+#CHECK: vlrepg %v0, 0(%v1,%r2) -+ -+ vlrepg %v0, -1 -+ vlrepg %v0, 4096 -+ vlrepg %v0, 0(%v1,%r2) -+ -+#CHECK: error: invalid operand -+#CHECK: vlreph %v0, -1 -+#CHECK: error: invalid operand -+#CHECK: vlreph %v0, 4096 -+#CHECK: error: invalid use of vector addressing -+#CHECK: vlreph %v0, 0(%v1,%r2) -+ -+ vlreph %v0, -1 -+ vlreph %v0, 4096 -+ vlreph %v0, 0(%v1,%r2) -+ -+#CHECK: error: invalid operand -+#CHECK: vlvgb %v0, %r0, -1 -+#CHECK: error: invalid operand -+#CHECK: vlvgb %v0, %r0, 4096 -+#CHECK: error: %r0 used in an address -+#CHECK: vlvgb %v0, %r0, 0(%r0) -+ -+ vlvgb %v0, %r0, -1 -+ vlvgb %v0, %r0, 4096 -+ vlvgb %v0, %r0, 0(%r0) -+ -+#CHECK: error: invalid operand -+#CHECK: vlvgf %v0, %r0, -1 -+#CHECK: error: invalid operand -+#CHECK: vlvgf %v0, %r0, 4096 -+#CHECK: error: %r0 used in an address -+#CHECK: vlvgf %v0, %r0, 0(%r0) -+ -+ vlvgf %v0, %r0, -1 -+ vlvgf %v0, %r0, 4096 -+ vlvgf %v0, %r0, 0(%r0) -+ -+#CHECK: error: invalid operand -+#CHECK: vlvgg %v0, %r0, -1 -+#CHECK: error: invalid operand -+#CHECK: vlvgg %v0, %r0, 4096 -+#CHECK: error: %r0 used in an address -+#CHECK: vlvgg %v0, %r0, 0(%r0) -+ -+ vlvgg %v0, %r0, -1 -+ vlvgg %v0, %r0, 4096 -+ vlvgg %v0, %r0, 0(%r0) -+ -+#CHECK: error: invalid operand -+#CHECK: vlvgh %v0, %r0, -1 -+#CHECK: error: invalid operand -+#CHECK: vlvgh %v0, %r0, 4096 -+#CHECK: error: %r0 used in an address -+#CHECK: vlvgh %v0, %r0, 0(%r0) -+ -+ vlvgh %v0, %r0, -1 -+ vlvgh %v0, %r0, 4096 -+ vlvgh %v0, %r0, 0(%r0) -+ -+#CHECK: error: invalid operand -+#CHECK: vpdi %v0, %v0, %v0, -1 -+#CHECK: error: invalid operand -+#CHECK: vpdi %v0, %v0, %v0, 16 -+ -+ vpdi %v0, %v0, %v0, -1 -+ vpdi %v0, %v0, %v0, 16 -+ -+#CHECK: error: invalid operand -+#CHECK: vrepb %v0, %v0, -1 -+#CHECK: error: invalid operand -+#CHECK: vrepb %v0, %v0, 65536 -+ -+ vrepb %v0, %v0, -1 -+ vrepb %v0, %v0, 65536 -+ -+#CHECK: error: invalid operand -+#CHECK: vrepf %v0, %v0, -1 -+#CHECK: error: invalid operand -+#CHECK: vrepf %v0, %v0, 65536 -+ -+ vrepf %v0, %v0, -1 -+ vrepf %v0, %v0, 65536 -+ -+#CHECK: error: invalid operand -+#CHECK: vrepg %v0, %v0, -1 -+#CHECK: error: invalid operand -+#CHECK: vrepg %v0, %v0, 65536 -+ -+ vrepg %v0, %v0, -1 -+ vrepg %v0, %v0, 65536 -+ -+#CHECK: error: invalid operand -+#CHECK: vreph %v0, %v0, -1 -+#CHECK: error: invalid operand -+#CHECK: vreph %v0, %v0, 65536 -+ -+ vreph %v0, %v0, -1 -+ vreph %v0, %v0, 65536 -+ -+#CHECK: error: invalid operand -+#CHECK: vrepib %v0, -32769 -+#CHECK: error: invalid operand -+#CHECK: vrepib %v0, 32768 -+ -+ vrepib %v0, -32769 -+ vrepib %v0, 32768 -+ -+#CHECK: error: invalid operand -+#CHECK: vrepif %v0, -32769 -+#CHECK: error: invalid operand -+#CHECK: vrepif %v0, 32768 -+ -+ vrepif %v0, -32769 -+ vrepif %v0, 32768 -+ -+#CHECK: error: invalid operand -+#CHECK: vrepig %v0, -32769 -+#CHECK: error: invalid operand -+#CHECK: vrepig %v0, 32768 -+ -+ vrepig %v0, -32769 -+ vrepig %v0, 32768 -+ -+#CHECK: error: invalid operand -+#CHECK: vrepih %v0, -32769 -+#CHECK: error: invalid operand -+#CHECK: vrepih %v0, 32768 -+ -+ vrepih %v0, -32769 -+ vrepih %v0, 32768 -+ -+#CHECK: error: vector index required -+#CHECK: vscef %v0, 0(%r1), 0 -+#CHECK: error: vector index required -+#CHECK: vscef %v0, 0(%r2,%r1), 0 -+#CHECK: error: invalid operand -+#CHECK: vscef %v0, 0(%v0,%r1), -1 -+#CHECK: error: invalid operand -+#CHECK: vscef %v0, 0(%v0,%r1), 4 -+#CHECK: error: invalid operand -+#CHECK: vscef %v0, -1(%v0,%r1), 0 -+#CHECK: error: invalid operand -+#CHECK: vscef %v0, 4096(%v0,%r1), 0 -+ -+ vscef %v0, 0(%r1), 0 -+ vscef %v0, 0(%r2,%r1), 0 -+ vscef %v0, 0(%v0,%r1), -1 -+ vscef %v0, 0(%v0,%r1), 4 -+ vscef %v0, -1(%v0,%r1), 0 -+ vscef %v0, 4096(%v0,%r1), 0 -+ -+#CHECK: error: vector index required -+#CHECK: vsceg %v0, 0(%r1), 0 -+#CHECK: error: vector index required -+#CHECK: vsceg %v0, 0(%r2,%r1), 0 -+#CHECK: error: invalid operand -+#CHECK: vsceg %v0, 0(%v0,%r1), -1 -+#CHECK: error: invalid operand -+#CHECK: vsceg %v0, 0(%v0,%r1), 2 -+#CHECK: error: invalid operand -+#CHECK: vsceg %v0, -1(%v0,%r1), 0 -+#CHECK: error: invalid operand -+#CHECK: vsceg %v0, 4096(%v0,%r1), 0 -+ -+ vsceg %v0, 0(%r1), 0 -+ vsceg %v0, 0(%r2,%r1), 0 -+ vsceg %v0, 0(%v0,%r1), -1 -+ vsceg %v0, 0(%v0,%r1), 2 -+ vsceg %v0, -1(%v0,%r1), 0 -+ vsceg %v0, 4096(%v0,%r1), 0 -+ -+#CHECK: error: invalid operand -+#CHECK: vsldb %v0, %v0, %v0, -1 -+#CHECK: error: invalid operand -+#CHECK: vsldb %v0, %v0, %v0, 256 -+ -+ vsldb %v0, %v0, %v0, -1 -+ vsldb %v0, %v0, %v0, 256 -+ -+#CHECK: error: invalid operand -+#CHECK: vst %v0, -1 -+#CHECK: error: invalid operand -+#CHECK: vst %v0, 4096 -+#CHECK: error: invalid use of vector addressing -+#CHECK: vst %v0, 0(%v1,%r2) -+ -+ vst %v0, -1 -+ vst %v0, 4096 -+ vst %v0, 0(%v1,%r2) -+ -+#CHECK: error: invalid operand -+#CHECK: vsteb %v0, 0, -1 -+#CHECK: error: invalid operand -+#CHECK: vsteb %v0, 0, 16 -+#CHECK: error: invalid operand -+#CHECK: vsteb %v0, -1, 0 -+#CHECK: error: invalid operand -+#CHECK: vsteb %v0, 4096, 0 -+#CHECK: error: invalid use of vector addressing -+#CHECK: vsteb %v0, 0(%v1,%r2), 0 -+ -+ vsteb %v0, 0, -1 -+ vsteb %v0, 0, 16 -+ vsteb %v0, -1, 0 -+ vsteb %v0, 4096, 0 -+ vsteb %v0, 0(%v1,%r2), 0 -+ -+#CHECK: error: invalid operand -+#CHECK: vstef %v0, 0, -1 -+#CHECK: error: invalid operand -+#CHECK: vstef %v0, 0, 4 -+#CHECK: error: invalid operand -+#CHECK: vstef %v0, -1, 0 -+#CHECK: error: invalid operand -+#CHECK: vstef %v0, 4096, 0 -+#CHECK: error: invalid use of vector addressing -+#CHECK: vstef %v0, 0(%v1,%r2), 0 -+ -+ vstef %v0, 0, -1 -+ vstef %v0, 0, 4 -+ vstef %v0, -1, 0 -+ vstef %v0, 4096, 0 -+ vstef %v0, 0(%v1,%r2), 0 -+ -+#CHECK: error: invalid operand -+#CHECK: vsteg %v0, 0, -1 -+#CHECK: error: invalid operand -+#CHECK: vsteg %v0, 0, 2 -+#CHECK: error: invalid operand -+#CHECK: vsteg %v0, -1, 0 -+#CHECK: error: invalid operand -+#CHECK: vsteg %v0, 4096, 0 -+#CHECK: error: invalid use of vector addressing -+#CHECK: vsteg %v0, 0(%v1,%r2), 0 -+ -+ vsteg %v0, 0, -1 -+ vsteg %v0, 0, 2 -+ vsteg %v0, -1, 0 -+ vsteg %v0, 4096, 0 -+ vsteg %v0, 0(%v1,%r2), 0 -+ -+#CHECK: error: invalid operand -+#CHECK: vsteh %v0, 0, -1 -+#CHECK: error: invalid operand -+#CHECK: vsteh %v0, 0, 8 -+#CHECK: error: invalid operand -+#CHECK: vsteh %v0, -1, 0 -+#CHECK: error: invalid operand -+#CHECK: vsteh %v0, 4096, 0 -+#CHECK: error: invalid use of vector addressing -+#CHECK: vsteh %v0, 0(%v1,%r2), 0 -+ -+ vsteh %v0, 0, -1 -+ vsteh %v0, 0, 8 -+ vsteh %v0, -1, 0 -+ vsteh %v0, 4096, 0 -+ vsteh %v0, 0(%v1,%r2), 0 -+ -+#CHECK: error: invalid operand -+#CHECK: vstl %v0, %r0, -1 -+#CHECK: error: invalid operand -+#CHECK: vstl %v0, %r0, 4096 -+#CHECK: error: %r0 used in an address -+#CHECK: vstl %v0, %r0, 0(%r0) -+ -+ vstl %v0, %r0, -1 -+ vstl %v0, %r0, 4096 -+ vstl %v0, %r0, 0(%r0) -+ -+#CHECK: error: invalid operand -+#CHECK: vstm %v0, %v0, -1 -+#CHECK: error: invalid operand -+#CHECK: vstm %v0, %v0, 4096 -+ -+ vstm %v0, %v0, -1 -+ vstm %v0, %v0, 4096 -+ -+#CHECK: error: invalid operand -+#CHECK: vstrcb %v0, %v0, %v0, %v0, -1 -+#CHECK: error: invalid operand -+#CHECK: vstrcb %v0, %v0, %v0, %v0, 16 -+#CHECK: error: too few operands -+#CHECK: vstrcb %v0, %v0, %v0 -+#CHECK: error: invalid operand -+#CHECK: vstrcb %v0, %v0, %v0, %v0, 0, 0 -+ -+ vstrcb %v0, %v0, %v0, %v0, -1 -+ vstrcb %v0, %v0, %v0, %v0, 16 -+ vstrcb %v0, %v0, %v0 -+ vstrcb %v0, %v0, %v0, %v0, 0, 0 -+ -+#CHECK: error: invalid operand -+#CHECK: vstrcbs %v0, %v0, %v0, %v0, -1 -+#CHECK: error: invalid operand -+#CHECK: vstrcbs %v0, %v0, %v0, %v0, 16 -+#CHECK: error: too few operands -+#CHECK: vstrcbs %v0, %v0, %v0 -+#CHECK: error: invalid operand -+#CHECK: vstrcbs %v0, %v0, %v0, %v0, 0, 0 -+ -+ vstrcbs %v0, %v0, %v0, %v0, -1 -+ vstrcbs %v0, %v0, %v0, %v0, 16 -+ vstrcbs %v0, %v0, %v0 -+ vstrcbs %v0, %v0, %v0, %v0, 0, 0 -+ -+#CHECK: error: invalid operand -+#CHECK: vstrcf %v0, %v0, %v0, %v0, -1 -+#CHECK: error: invalid operand -+#CHECK: vstrcf %v0, %v0, %v0, %v0, 16 -+#CHECK: error: too few operands -+#CHECK: vstrcf %v0, %v0, %v0 -+#CHECK: error: invalid operand -+#CHECK: vstrcf %v0, %v0, %v0, %v0, 0, 0 -+ -+ vstrcf %v0, %v0, %v0, %v0, -1 -+ vstrcf %v0, %v0, %v0, %v0, 16 -+ vstrcf %v0, %v0, %v0 -+ vstrcf %v0, %v0, %v0, %v0, 0, 0 -+ -+#CHECK: error: invalid operand -+#CHECK: vstrch %v0, %v0, %v0, %v0, -1 -+#CHECK: error: invalid operand -+#CHECK: vstrch %v0, %v0, %v0, %v0, 16 -+#CHECK: error: too few operands -+#CHECK: vstrch %v0, %v0, %v0 -+#CHECK: error: invalid operand -+#CHECK: vstrch %v0, %v0, %v0, %v0, 0, 0 -+ -+ vstrch %v0, %v0, %v0, %v0, -1 -+ vstrch %v0, %v0, %v0, %v0, 16 -+ vstrch %v0, %v0, %v0 -+ vstrch %v0, %v0, %v0, %v0, 0, 0 -+ -+#CHECK: error: invalid operand -+#CHECK: vstrczh %v0, %v0, %v0, %v0, -1 -+#CHECK: error: invalid operand -+#CHECK: vstrczh %v0, %v0, %v0, %v0, 16 -+#CHECK: error: too few operands -+#CHECK: vstrczh %v0, %v0, %v0 -+#CHECK: error: invalid operand -+#CHECK: vstrczh %v0, %v0, %v0, %v0, 0, 0 -+ -+ vstrczh %v0, %v0, %v0, %v0, -1 -+ vstrczh %v0, %v0, %v0, %v0, 16 -+ vstrczh %v0, %v0, %v0 -+ vstrczh %v0, %v0, %v0, %v0, 0, 0 -+ -+#CHECK: error: invalid operand -+#CHECK: vstrczfs %v0, %v0, %v0, %v0, -1 -+#CHECK: error: invalid operand -+#CHECK: vstrczfs %v0, %v0, %v0, %v0, 16 -+#CHECK: error: too few operands -+#CHECK: vstrczfs %v0, %v0, %v0 -+#CHECK: error: invalid operand -+#CHECK: vstrczfs %v0, %v0, %v0, %v0, 0, 0 -+ -+ vstrczfs %v0, %v0, %v0, %v0, -1 -+ vstrczfs %v0, %v0, %v0, %v0, 16 -+ vstrczfs %v0, %v0, %v0 -+ vstrczfs %v0, %v0, %v0, %v0, 0, 0 -+ -+#CHECK: error: invalid operand -+#CHECK: wcdgb %v0, %v0, 0, -1 -+#CHECK: error: invalid operand -+#CHECK: wcdgb %v0, %v0, 0, 16 -+#CHECK: error: invalid operand -+#CHECK: wcdgb %v0, %v0, -1, 0 -+#CHECK: error: invalid operand -+#CHECK: wcdgb %v0, %v0, 16, 0 -+ -+ wcdgb %v0, %v0, 0, -1 -+ wcdgb %v0, %v0, 0, 16 -+ wcdgb %v0, %v0, -1, 0 -+ wcdgb %v0, %v0, 16, 0 -+ -+#CHECK: error: invalid operand -+#CHECK: wcdlgb %v0, %v0, 0, -1 -+#CHECK: error: invalid operand -+#CHECK: wcdlgb %v0, %v0, 0, 16 -+#CHECK: error: invalid operand -+#CHECK: wcdlgb %v0, %v0, -1, 0 -+#CHECK: error: invalid operand -+#CHECK: wcdlgb %v0, %v0, 16, 0 -+ -+ wcdlgb %v0, %v0, 0, -1 -+ wcdlgb %v0, %v0, 0, 16 -+ wcdlgb %v0, %v0, -1, 0 -+ wcdlgb %v0, %v0, 16, 0 -+ -+#CHECK: error: invalid operand -+#CHECK: wcgdb %v0, %v0, 0, -1 -+#CHECK: error: invalid operand -+#CHECK: wcgdb %v0, %v0, 0, 16 -+#CHECK: error: invalid operand -+#CHECK: wcgdb %v0, %v0, -1, 0 -+#CHECK: error: invalid operand -+#CHECK: wcgdb %v0, %v0, 16, 0 -+ -+ wcgdb %v0, %v0, 0, -1 -+ wcgdb %v0, %v0, 0, 16 -+ wcgdb %v0, %v0, -1, 0 -+ wcgdb %v0, %v0, 16, 0 -+ -+#CHECK: error: invalid operand -+#CHECK: wclgdb %v0, %v0, 0, -1 -+#CHECK: error: invalid operand -+#CHECK: wclgdb %v0, %v0, 0, 16 -+#CHECK: error: invalid operand -+#CHECK: wclgdb %v0, %v0, -1, 0 -+#CHECK: error: invalid operand -+#CHECK: wclgdb %v0, %v0, 16, 0 -+ -+ wclgdb %v0, %v0, 0, -1 -+ wclgdb %v0, %v0, 0, 16 -+ wclgdb %v0, %v0, -1, 0 -+ wclgdb %v0, %v0, 16, 0 -+ -+#CHECK: error: invalid operand -+#CHECK: wfidb %v0, %v0, 0, -1 -+#CHECK: error: invalid operand -+#CHECK: wfidb %v0, %v0, 0, 16 -+#CHECK: error: invalid operand -+#CHECK: wfidb %v0, %v0, -1, 0 -+#CHECK: error: invalid operand -+#CHECK: wfidb %v0, %v0, 16, 0 -+ -+ wfidb %v0, %v0, 0, -1 -+ wfidb %v0, %v0, 0, 16 -+ wfidb %v0, %v0, -1, 0 -+ wfidb %v0, %v0, 16, 0 -+ -+#CHECK: error: invalid operand -+#CHECK: wftcidb %v0, %v0, -1 -+#CHECK: error: invalid operand -+#CHECK: wftcidb %v0, %v0, 4096 -+ -+ wftcidb %v0, %v0, -1 -+ wftcidb %v0, %v0, 4096 -+ -+#CHECK: error: invalid operand -+#CHECK: wledb %v0, %v0, 0, -1 -+#CHECK: error: invalid operand -+#CHECK: wledb %v0, %v0, 0, 16 -+#CHECK: error: invalid operand -+#CHECK: wledb %v0, %v0, -1, 0 -+#CHECK: error: invalid operand -+#CHECK: wledb %v0, %v0, 16, 0 -+ -+ wledb %v0, %v0, 0, -1 -+ wledb %v0, %v0, 0, 16 -+ wledb %v0, %v0, -1, 0 -+ wledb %v0, %v0, 16, 0 -Index: llvm-36/test/MC/SystemZ/insn-bad-z196.s -=================================================================== ---- llvm-36.orig/test/MC/SystemZ/insn-bad-z196.s -+++ llvm-36/test/MC/SystemZ/insn-bad-z196.s -@@ -244,6 +244,11 @@ - cxlgbr %f0, 16, %r0, 0 - cxlgbr %f2, 0, %r0, 0 - -+#CHECK: error: {{(instruction requires: transactional-execution)?}} -+#CHECK: etnd %r7 -+ -+ etnd %r7 -+ - #CHECK: error: invalid operand - #CHECK: fidbra %f0, 0, %f0, -1 - #CHECK: error: invalid operand -@@ -546,6 +551,21 @@ - locr %r0,%r0,-1 - locr %r0,%r0,16 - -+#CHECK: error: {{(instruction requires: transactional-execution)?}} -+#CHECK: ntstg %r0, 524287(%r1,%r15) -+ -+ ntstg %r0, 524287(%r1,%r15) -+ -+#CHECK: error: {{(instruction requires: processor-assist)?}} -+#CHECK: ppa %r4, %r6, 7 -+ -+ ppa %r4, %r6, 7 -+ -+#CHECK: error: {{(instruction requires: miscellaneous-extensions)?}} -+#CHECK: risbgn %r1, %r2, 0, 0, 0 -+ -+ risbgn %r1, %r2, 0, 0, 0 -+ - #CHECK: error: invalid operand - #CHECK: risbhg %r0,%r0,0,0,-1 - #CHECK: error: invalid operand -@@ -685,3 +705,24 @@ - stocg %r0,-524289,1 - stocg %r0,524288,1 - stocg %r0,0(%r1,%r2),1 -+ -+#CHECK: error: {{(instruction requires: transactional-execution)?}} -+#CHECK: tabort 4095(%r1) -+ -+ tabort 4095(%r1) -+ -+#CHECK: error: {{(instruction requires: transactional-execution)?}} -+#CHECK: tbegin 4095(%r1), 42 -+ -+ tbegin 4095(%r1), 42 -+ -+#CHECK: error: {{(instruction requires: transactional-execution)?}} -+#CHECK: tbeginc 4095(%r1), 42 -+ -+ tbeginc 4095(%r1), 42 -+ -+#CHECK: error: {{(instruction requires: transactional-execution)?}} -+#CHECK: tend -+ -+ tend -+ -Index: llvm-36/test/MC/SystemZ/insn-bad-zEC12.s -=================================================================== ---- /dev/null -+++ llvm-36/test/MC/SystemZ/insn-bad-zEC12.s -@@ -0,0 +1,1578 @@ -+# For zEC12 only. -+# RUN: not llvm-mc -triple s390x-linux-gnu -mcpu=zEC12 < %s 2> %t -+# RUN: FileCheck < %t %s -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: lcbb %r0, 0, 0 -+ -+ lcbb %r0, 0, 0 -+ -+#CHECK: error: invalid operand -+#CHECK: ntstg %r0, -524289 -+#CHECK: error: invalid operand -+#CHECK: ntstg %r0, 524288 -+ -+ ntstg %r0, -524289 -+ ntstg %r0, 524288 -+ -+#CHECK: error: invalid operand -+#CHECK: ppa %r0, %r0, -1 -+#CHECK: error: invalid operand -+#CHECK: ppa %r0, %r0, 16 -+ -+ ppa %r0, %r0, -1 -+ ppa %r0, %r0, 16 -+ -+#CHECK: error: invalid operand -+#CHECK: risbgn %r0,%r0,0,0,-1 -+#CHECK: error: invalid operand -+#CHECK: risbgn %r0,%r0,0,0,64 -+#CHECK: error: invalid operand -+#CHECK: risbgn %r0,%r0,0,-1,0 -+#CHECK: error: invalid operand -+#CHECK: risbgn %r0,%r0,0,256,0 -+#CHECK: error: invalid operand -+#CHECK: risbgn %r0,%r0,-1,0,0 -+#CHECK: error: invalid operand -+#CHECK: risbgn %r0,%r0,256,0,0 -+ -+ risbgn %r0,%r0,0,0,-1 -+ risbgn %r0,%r0,0,0,64 -+ risbgn %r0,%r0,0,-1,0 -+ risbgn %r0,%r0,0,256,0 -+ risbgn %r0,%r0,-1,0,0 -+ risbgn %r0,%r0,256,0,0 -+ -+#CHECK: error: invalid operand -+#CHECK: tabort -1 -+#CHECK: error: invalid operand -+#CHECK: tabort 4096 -+#CHECK: error: invalid use of indexed addressing -+#CHECK: tabort 0(%r1,%r2) -+ -+ tabort -1 -+ tabort 4096 -+ tabort 0(%r1,%r2) -+ -+#CHECK: error: invalid operand -+#CHECK: tbegin -1, 0 -+#CHECK: error: invalid operand -+#CHECK: tbegin 4096, 0 -+#CHECK: error: invalid use of indexed addressing -+#CHECK: tbegin 0(%r1,%r2), 0 -+#CHECK: error: invalid operand -+#CHECK: tbegin 0, -1 -+#CHECK: error: invalid operand -+#CHECK: tbegin 0, 65536 -+ -+ tbegin -1, 0 -+ tbegin 4096, 0 -+ tbegin 0(%r1,%r2), 0 -+ tbegin 0, -1 -+ tbegin 0, 65536 -+ -+#CHECK: error: invalid operand -+#CHECK: tbeginc -1, 0 -+#CHECK: error: invalid operand -+#CHECK: tbeginc 4096, 0 -+#CHECK: error: invalid use of indexed addressing -+#CHECK: tbeginc 0(%r1,%r2), 0 -+#CHECK: error: invalid operand -+#CHECK: tbeginc 0, -1 -+#CHECK: error: invalid operand -+#CHECK: tbeginc 0, 65536 -+ -+ tbeginc -1, 0 -+ tbeginc 4096, 0 -+ tbeginc 0(%r1,%r2), 0 -+ tbeginc 0, -1 -+ tbeginc 0, 65536 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vab %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vaf %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vag %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vah %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vaq %v0, %v0, %v0 -+ -+ vab %v0, %v0, %v0 -+ vaf %v0, %v0, %v0 -+ vag %v0, %v0, %v0 -+ vah %v0, %v0, %v0 -+ vaq %v0, %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vaccb %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vaccf %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vaccg %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vacch %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vaccq %v0, %v0, %v0 -+ -+ vaccb %v0, %v0, %v0 -+ vaccf %v0, %v0, %v0 -+ vaccg %v0, %v0, %v0 -+ vacch %v0, %v0, %v0 -+ vaccq %v0, %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vacccq %v0, %v0, %v0 -+ -+ vacccq %v0, %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vacq %v0, %v0, %v0 -+ -+ vacq %v0, %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vavgb %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vavgf %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vavgg %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vavgh %v0, %v0, %v0 -+ -+ vavgb %v0, %v0, %v0 -+ vavgf %v0, %v0, %v0 -+ vavgg %v0, %v0, %v0 -+ vavgh %v0, %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vavglb %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vavglf %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vavglg %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vavglh %v0, %v0, %v0 -+ -+ vavglb %v0, %v0, %v0 -+ vavglf %v0, %v0, %v0 -+ vavglg %v0, %v0, %v0 -+ vavglh %v0, %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vcdgb %v0, %v0, 0, 0 -+ -+ vcdgb %v0, %v0, 0, 0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vcdlgb %v0, %v0, 0, 0 -+ -+ vcdlgb %v0, %v0, 0, 0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vceqb %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vceqf %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vceqg %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vceqh %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vceqbs %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vceqhs %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vceqfs %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vceqgs %v0, %v0, %v0 -+ -+ vceqb %v0, %v0, %v0 -+ vceqf %v0, %v0, %v0 -+ vceqg %v0, %v0, %v0 -+ vceqh %v0, %v0, %v0 -+ vceqbs %v0, %v0, %v0 -+ vceqhs %v0, %v0, %v0 -+ vceqfs %v0, %v0, %v0 -+ vceqgs %v0, %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vcgdb %v0, %v0, 0, 0 -+ -+ vcgdb %v0, %v0, 0, 0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vchb %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vchf %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vchg %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vchh %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vchbs %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vchhs %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vchfs %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vchgs %v0, %v0, %v0 -+ -+ vchb %v0, %v0, %v0 -+ vchf %v0, %v0, %v0 -+ vchg %v0, %v0, %v0 -+ vchh %v0, %v0, %v0 -+ vchbs %v0, %v0, %v0 -+ vchhs %v0, %v0, %v0 -+ vchfs %v0, %v0, %v0 -+ vchgs %v0, %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vchlb %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vchlf %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vchlg %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vchlh %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vchlbs %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vchlhs %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vchlfs %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vchlgs %v0, %v0, %v0 -+ -+ vchlb %v0, %v0, %v0 -+ vchlf %v0, %v0, %v0 -+ vchlg %v0, %v0, %v0 -+ vchlh %v0, %v0, %v0 -+ vchlbs %v0, %v0, %v0 -+ vchlhs %v0, %v0, %v0 -+ vchlfs %v0, %v0, %v0 -+ vchlgs %v0, %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vcksm %v0, %v0, %v0 -+ -+ vcksm %v0, %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vclgdb %v0, %v0, 0, 0 -+ -+ vclgdb %v0, %v0, 0, 0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vclzb %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vclzf %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vclzg %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vclzh %v0, %v0 -+ -+ vclzb %v0, %v0 -+ vclzf %v0, %v0 -+ vclzg %v0, %v0 -+ vclzh %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vctzb %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vctzf %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vctzg %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vctzh %v0, %v0 -+ -+ vctzb %v0, %v0 -+ vctzf %v0, %v0 -+ vctzg %v0, %v0 -+ vctzh %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vecb %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vecf %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vecg %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vech %v0, %v0 -+ -+ vecb %v0, %v0 -+ vecf %v0, %v0 -+ vecg %v0, %v0 -+ vech %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: verimb %v0, %v0, %v0, 0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: verimf %v0, %v0, %v0, 0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: verimg %v0, %v0, %v0, 0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: verimh %v0, %v0, %v0, 0 -+ -+ verimb %v0, %v0, %v0, 0 -+ verimf %v0, %v0, %v0, 0 -+ verimg %v0, %v0, %v0, 0 -+ verimh %v0, %v0, %v0, 0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: veclb %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: veclf %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: veclg %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: veclh %v0, %v0 -+ -+ veclb %v0, %v0 -+ veclf %v0, %v0 -+ veclg %v0, %v0 -+ veclh %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: verllvb %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: verllvf %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: verllvg %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: verllvh %v0, %v0, %v0 -+ -+ verllvb %v0, %v0, %v0 -+ verllvf %v0, %v0, %v0 -+ verllvg %v0, %v0, %v0 -+ verllvh %v0, %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: verllb %v0, %v0, 0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: verllf %v0, %v0, 0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: verllg %v0, %v0, 0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: verllh %v0, %v0, 0 -+ -+ verllb %v0, %v0, 0 -+ verllf %v0, %v0, 0 -+ verllg %v0, %v0, 0 -+ verllh %v0, %v0, 0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: veslvb %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: veslvf %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: veslvg %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: veslvh %v0, %v0, %v0 -+ -+ veslvb %v0, %v0, %v0 -+ veslvf %v0, %v0, %v0 -+ veslvg %v0, %v0, %v0 -+ veslvh %v0, %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: veslb %v0, %v0, 0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: veslf %v0, %v0, 0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: veslg %v0, %v0, 0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: veslh %v0, %v0, 0 -+ -+ veslb %v0, %v0, 0 -+ veslf %v0, %v0, 0 -+ veslg %v0, %v0, 0 -+ veslh %v0, %v0, 0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vesravb %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vesravf %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vesravg %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vesravh %v0, %v0, %v0 -+ -+ vesravb %v0, %v0, %v0 -+ vesravf %v0, %v0, %v0 -+ vesravg %v0, %v0, %v0 -+ vesravh %v0, %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vesrab %v0, %v0, 0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vesraf %v0, %v0, 0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vesrag %v0, %v0, 0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vesrah %v0, %v0, 0 -+ -+ vesrab %v0, %v0, 0 -+ vesraf %v0, %v0, 0 -+ vesrag %v0, %v0, 0 -+ vesrah %v0, %v0, 0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vesrlvb %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vesrlvf %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vesrlvg %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vesrlvh %v0, %v0, %v0 -+ -+ vesrlvb %v0, %v0, %v0 -+ vesrlvf %v0, %v0, %v0 -+ vesrlvg %v0, %v0, %v0 -+ vesrlvh %v0, %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vesrlb %v0, %v0, 0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vesrlf %v0, %v0, 0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vesrlg %v0, %v0, 0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vesrlh %v0, %v0, 0 -+ -+ vesrlb %v0, %v0, 0 -+ vesrlf %v0, %v0, 0 -+ vesrlg %v0, %v0, 0 -+ vesrlh %v0, %v0, 0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vfadb %v0, %v0, %v0 -+ -+ vfadb %v0, %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vfcedb %v0, %v0, %v0 -+#CHECK: vfcedbs %v0, %v0, %v0 -+ -+ vfcedb %v0, %v0, %v0 -+ vfcedbs %v0, %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vfchdb %v0, %v0, %v0 -+#CHECK: vfchdbs %v0, %v0, %v0 -+ -+ vfchdb %v0, %v0, %v0 -+ vfchdbs %v0, %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vfddb %v0, %v0, %v0 -+ -+ vfddb %v0, %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vfaeb %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vfaezb %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vfaebs %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vfaezbs %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vfaeh %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vfaezh %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vfaehs %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vfaezhs %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vfaef %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vfaezf %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vfaefs %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vfaezfs %v0, %v0, %v0 -+ -+ vfaeb %v0, %v0, %v0 -+ vfaezb %v0, %v0, %v0 -+ vfaebs %v0, %v0, %v0 -+ vfaezbs %v0, %v0, %v0 -+ vfaeh %v0, %v0, %v0 -+ vfaezh %v0, %v0, %v0 -+ vfaehs %v0, %v0, %v0 -+ vfaezhs %v0, %v0, %v0 -+ vfaef %v0, %v0, %v0 -+ vfaezf %v0, %v0, %v0 -+ vfaefs %v0, %v0, %v0 -+ vfaezfs %v0, %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vfeeb %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vfeezb %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vfeebs %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vfeezbs %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vfeeh %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vfeezh %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vfeehs %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vfeezhs %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vfeef %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vfeezf %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vfeefs %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vfeezfs %v0, %v0, %v0 -+ -+ vfeeb %v0, %v0, %v0 -+ vfeezb %v0, %v0, %v0 -+ vfeebs %v0, %v0, %v0 -+ vfeezbs %v0, %v0, %v0 -+ vfeeh %v0, %v0, %v0 -+ vfeezh %v0, %v0, %v0 -+ vfeehs %v0, %v0, %v0 -+ vfeezhs %v0, %v0, %v0 -+ vfeef %v0, %v0, %v0 -+ vfeezf %v0, %v0, %v0 -+ vfeefs %v0, %v0, %v0 -+ vfeezfs %v0, %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vfeneb %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vfenezb %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vfenebs %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vfenezbs %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vfeneh %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vfenezh %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vfenehs %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vfenezhs %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vfenef %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vfenezf %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vfenefs %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vfenezfs %v0, %v0, %v0 -+ -+ vfeneb %v0, %v0, %v0 -+ vfenezb %v0, %v0, %v0 -+ vfenebs %v0, %v0, %v0 -+ vfenezbs %v0, %v0, %v0 -+ vfeneh %v0, %v0, %v0 -+ vfenezh %v0, %v0, %v0 -+ vfenehs %v0, %v0, %v0 -+ vfenezhs %v0, %v0, %v0 -+ vfenef %v0, %v0, %v0 -+ vfenezf %v0, %v0, %v0 -+ vfenefs %v0, %v0, %v0 -+ vfenezfs %v0, %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vfidb %v0, %v0, 0, 0 -+ -+ vfidb %v0, %v0, 0, 0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vistrb %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vistrbs %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vistrh %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vistrhs %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vistrf %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vistrfs %v0, %v0 -+ -+ vistrb %v0, %v0 -+ vistrbs %v0, %v0 -+ vistrh %v0, %v0 -+ vistrhs %v0, %v0 -+ vistrf %v0, %v0 -+ vistrfs %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vflcdb %v0, %v0 -+ -+ vflcdb %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vflndb %v0, %v0 -+ -+ vflndb %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vflpdb %v0, %v0 -+ -+ vflpdb %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vfmadb %v0, %v0, %v0, %v0 -+ -+ vfmadb %v0, %v0, %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vfmdb %v0, %v0, %v0 -+ -+ vfmdb %v0, %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vfmsdb %v0, %v0, %v0, %v0 -+ -+ vfmsdb %v0, %v0, %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vfsdb %v0, %v0, %v0 -+ -+ vfsdb %v0, %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vfsqdb %v0, %v0 -+ -+ vfsqdb %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vftcidb %v0, %v0, 0 -+ -+ vftcidb %v0, %v0, 0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vgbm %v0, 0 -+ -+ vgbm %v0, 0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vgef %v0, 0(%v0, %r1), 0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vgeg %v0, 0(%v0, %r1), 0 -+ -+ vgef %v0, 0(%v0, %r1), 0 -+ vgeg %v0, 0(%v0, %r1), 0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vgfmab %v0, %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vgfmaf %v0, %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vgfmag %v0, %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vgfmah %v0, %v0, %v0, %v0 -+ -+ vgfmab %v0, %v0, %v0, %v0 -+ vgfmaf %v0, %v0, %v0, %v0 -+ vgfmag %v0, %v0, %v0, %v0 -+ vgfmah %v0, %v0, %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vgfmb %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vgfmf %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vgfmg %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vgfmh %v0, %v0, %v0 -+ -+ vgfmb %v0, %v0, %v0 -+ vgfmf %v0, %v0, %v0 -+ vgfmg %v0, %v0, %v0 -+ vgfmh %v0, %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vgmb %v0, 0, 0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vgmf %v0, 0, 0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vgmg %v0, 0, 0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vgmh %v0, 0, 0 -+ -+ vgmb %v0, 0, 0 -+ vgmf %v0, 0, 0 -+ vgmg %v0, 0, 0 -+ vgmh %v0, 0, 0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vl %v0, 0 -+ -+ vl %v0, 0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vlbb %v0, 0, 0 -+ -+ vlbb %v0, 0, 0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vlcb %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vlcf %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vlcg %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vlch %v0, %v0 -+ -+ vlcb %v0, %v0 -+ vlcf %v0, %v0 -+ vlcg %v0, %v0 -+ vlch %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vldeb %v0, %v0 -+ -+ vldeb %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vleb %v0, 0, 0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vlef %v0, 0, 0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vleg %v0, 0, 0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vleh %v0, 0, 0 -+ -+ vleb %v0, 0, 0 -+ vlef %v0, 0, 0 -+ vleg %v0, 0, 0 -+ vleh %v0, 0, 0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vledb %v0, %v0, 0, 0 -+ -+ vledb %v0, %v0, 0, 0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vleib %v0, 0, 0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vleif %v0, 0, 0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vleig %v0, 0, 0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vleih %v0, 0, 0 -+ -+ vleib %v0, 0, 0 -+ vleif %v0, 0, 0 -+ vleig %v0, 0, 0 -+ vleih %v0, 0, 0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vlgvb %r0, %v0, 0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vlgvf %r0, %v0, 0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vlgvg %r0, %v0, 0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vlgvh %r0, %v0, 0 -+ -+ vlgvb %r0, %v0, 0 -+ vlgvf %r0, %v0, 0 -+ vlgvg %r0, %v0, 0 -+ vlgvh %r0, %v0, 0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vll %v0, %r0, 0 -+ -+ vll %v0, %r0, 0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vllezb %v0, 0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vllezf %v0, 0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vllezg %v0, 0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vllezh %v0, 0 -+ -+ vllezb %v0, 0 -+ vllezf %v0, 0 -+ vllezg %v0, 0 -+ vllezh %v0, 0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vlm %v0, %v0, 0 -+ -+ vlm %v0, %v0, 0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vlpb %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vlpf %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vlpg %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vlph %v0, %v0 -+ -+ vlpb %v0, %v0 -+ vlpf %v0, %v0 -+ vlpg %v0, %v0 -+ vlph %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vlr %v0, %v0 -+ -+ vlr %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vlrepb %v0, 0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vlrepf %v0, 0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vlrepg %v0, 0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vlreph %v0, 0 -+ -+ vlrepb %v0, 0 -+ vlrepf %v0, 0 -+ vlrepg %v0, 0 -+ vlreph %v0, 0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vlvgb %v0, %r0, 0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vlvgf %v0, %r0, 0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vlvgg %v0, %r0, 0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vlvgh %v0, %r0, 0 -+ -+ vlvgb %v0, %r0, 0 -+ vlvgf %v0, %r0, 0 -+ vlvgg %v0, %r0, 0 -+ vlvgh %v0, %r0, 0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vlvgp %v0, %r0, %r0 -+ -+ vlvgp %v0, %r0, %r0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vmaeb %v0, %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vmaef %v0, %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vmaeh %v0, %v0, %v0, %v0 -+ -+ vmaeb %v0, %v0, %v0, %v0 -+ vmaef %v0, %v0, %v0, %v0 -+ vmaeh %v0, %v0, %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vmahb %v0, %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vmahf %v0, %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vmahh %v0, %v0, %v0, %v0 -+ -+ vmahb %v0, %v0, %v0, %v0 -+ vmahf %v0, %v0, %v0, %v0 -+ vmahh %v0, %v0, %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vmalb %v0, %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vmalf %v0, %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vmalhw %v0, %v0, %v0, %v0 -+ -+ vmalb %v0, %v0, %v0, %v0 -+ vmalf %v0, %v0, %v0, %v0 -+ vmalhw %v0, %v0, %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vmaleb %v0, %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vmalef %v0, %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vmaleh %v0, %v0, %v0, %v0 -+ -+ vmaleb %v0, %v0, %v0, %v0 -+ vmalef %v0, %v0, %v0, %v0 -+ vmaleh %v0, %v0, %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vmalhb %v0, %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vmalhf %v0, %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vmalhh %v0, %v0, %v0, %v0 -+ -+ vmalhb %v0, %v0, %v0, %v0 -+ vmalhf %v0, %v0, %v0, %v0 -+ vmalhh %v0, %v0, %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vmalob %v0, %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vmalof %v0, %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vmaloh %v0, %v0, %v0, %v0 -+ -+ vmalob %v0, %v0, %v0, %v0 -+ vmalof %v0, %v0, %v0, %v0 -+ vmaloh %v0, %v0, %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vmaob %v0, %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vmaof %v0, %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vmaoh %v0, %v0, %v0, %v0 -+ -+ vmaob %v0, %v0, %v0, %v0 -+ vmaof %v0, %v0, %v0, %v0 -+ vmaoh %v0, %v0, %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vmeb %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vmef %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vmeh %v0, %v0, %v0 -+ -+ vmeb %v0, %v0, %v0 -+ vmef %v0, %v0, %v0 -+ vmeh %v0, %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vmhb %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vmhf %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vmhh %v0, %v0, %v0 -+ -+ vmhb %v0, %v0, %v0 -+ vmhf %v0, %v0, %v0 -+ vmhh %v0, %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vmlb %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vmlf %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vmlh %v0, %v0, %v0 -+ -+ vmlb %v0, %v0, %v0 -+ vmlf %v0, %v0, %v0 -+ vmlh %v0, %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vmleb %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vmlef %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vmleh %v0, %v0, %v0 -+ -+ vmleb %v0, %v0, %v0 -+ vmlef %v0, %v0, %v0 -+ vmleh %v0, %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vmlhb %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vmlhf %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vmlhh %v0, %v0, %v0 -+ -+ vmlhb %v0, %v0, %v0 -+ vmlhf %v0, %v0, %v0 -+ vmlhh %v0, %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vmlob %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vmlof %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vmloh %v0, %v0, %v0 -+ -+ vmlob %v0, %v0, %v0 -+ vmlof %v0, %v0, %v0 -+ vmloh %v0, %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vmnb %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vmnf %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vmng %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vmnh %v0, %v0, %v0 -+ -+ vmnb %v0, %v0, %v0 -+ vmnf %v0, %v0, %v0 -+ vmng %v0, %v0, %v0 -+ vmnh %v0, %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vmnlb %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vmnlf %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vmnlg %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vmnlh %v0, %v0, %v0 -+ -+ vmnlb %v0, %v0, %v0 -+ vmnlf %v0, %v0, %v0 -+ vmnlg %v0, %v0, %v0 -+ vmnlh %v0, %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vmob %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vmof %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vmoh %v0, %v0, %v0 -+ -+ vmob %v0, %v0, %v0 -+ vmof %v0, %v0, %v0 -+ vmoh %v0, %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vmrhb %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vmrhf %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vmrhg %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vmrhh %v0, %v0, %v0 -+ -+ vmrhb %v0, %v0, %v0 -+ vmrhf %v0, %v0, %v0 -+ vmrhg %v0, %v0, %v0 -+ vmrhh %v0, %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vmrlb %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vmrlf %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vmrlg %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vmrlh %v0, %v0, %v0 -+ -+ vmrlb %v0, %v0, %v0 -+ vmrlf %v0, %v0, %v0 -+ vmrlg %v0, %v0, %v0 -+ vmrlh %v0, %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vmxb %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vmxf %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vmxg %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vmxh %v0, %v0, %v0 -+ -+ vmxb %v0, %v0, %v0 -+ vmxf %v0, %v0, %v0 -+ vmxg %v0, %v0, %v0 -+ vmxh %v0, %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vmxlb %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vmxlf %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vmxlg %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vmxlh %v0, %v0, %v0 -+ -+ vmxlb %v0, %v0, %v0 -+ vmxlf %v0, %v0, %v0 -+ vmxlg %v0, %v0, %v0 -+ vmxlh %v0, %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vn %v0, %v0, %v0 -+ -+ vn %v0, %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vnc %v0, %v0, %v0 -+ -+ vnc %v0, %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vno %v0, %v0, %v0 -+ -+ vno %v0, %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vo %v0, %v0, %v0 -+ -+ vo %v0, %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vone %v0 -+ -+ vone %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vpdi %v0, %v0, %v0, 0 -+ -+ vpdi %v0, %v0, %v0, 0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vperm %v0, %v0, %v0, %v0 -+ -+ vperm %v0, %v0, %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vpkf %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vpkg %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vpkh %v0, %v0, %v0 -+ -+ vpkf %v0, %v0, %v0 -+ vpkg %v0, %v0, %v0 -+ vpkh %v0, %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vpksf %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vpksg %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vpksh %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vpksfs %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vpksgs %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vpkshs %v0, %v0, %v0 -+ -+ vpksf %v0, %v0, %v0 -+ vpksg %v0, %v0, %v0 -+ vpksh %v0, %v0, %v0 -+ vpksfs %v0, %v0, %v0 -+ vpksgs %v0, %v0, %v0 -+ vpkshs %v0, %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vpklsf %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vpklsg %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vpklsh %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vpklsfs %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vpklsgs %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vpklshs %v0, %v0, %v0 -+ -+ vpklsf %v0, %v0, %v0 -+ vpklsg %v0, %v0, %v0 -+ vpklsh %v0, %v0, %v0 -+ vpklsfs %v0, %v0, %v0 -+ vpklsgs %v0, %v0, %v0 -+ vpklshs %v0, %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vpopct %v0, %v0 -+ -+ vpopct %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vrepb %v0, %v0, 0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vrepf %v0, %v0, 0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vrepg %v0, %v0, 0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vreph %v0, %v0, 0 -+ -+ vrepb %v0, %v0, 0 -+ vrepf %v0, %v0, 0 -+ vrepg %v0, %v0, 0 -+ vreph %v0, %v0, 0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vrepib %v0, 0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vrepif %v0, 0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vrepig %v0, 0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vrepih %v0, 0 -+ -+ vrepib %v0, 0 -+ vrepif %v0, 0 -+ vrepig %v0, 0 -+ vrepih %v0, 0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vsb %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vsf %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vsg %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vsh %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vsq %v0, %v0, %v0 -+ -+ vsb %v0, %v0, %v0 -+ vsf %v0, %v0, %v0 -+ vsg %v0, %v0, %v0 -+ vsh %v0, %v0, %v0 -+ vsq %v0, %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vsbcbiq %v0, %v0, %v0 -+ -+ vsbcbiq %v0, %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vsbiq %v0, %v0, %v0 -+ -+ vsbiq %v0, %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vscbib %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vscbif %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vscbig %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vscbih %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vscbiq %v0, %v0, %v0 -+ -+ vscbib %v0, %v0, %v0 -+ vscbif %v0, %v0, %v0 -+ vscbig %v0, %v0, %v0 -+ vscbih %v0, %v0, %v0 -+ vscbiq %v0, %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vscef %v0, 0(%v0, %r1), 0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vsceg %v0, 0(%v0, %r1), 0 -+ -+ vscef %v0, 0(%v0, %r1), 0 -+ vsceg %v0, 0(%v0, %r1), 0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vsegb %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vsegf %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vsegh %v0, %v0 -+ -+ vsegb %v0, %v0 -+ vsegf %v0, %v0 -+ vsegh %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vsel %v0, %v0, %v0, %v0 -+ -+ vsel %v0, %v0, %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vsl %v0, %v0, %v0 -+ -+ vsl %v0, %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vslb %v0, %v0, %v0 -+ -+ vslb %v0, %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vsldb %v0, %v0, %v0, 0 -+ -+ vsldb %v0, %v0, %v0, 0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vsra %v0, %v0, %v0 -+ -+ vsra %v0, %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vsrab %v0, %v0, %v0 -+ -+ vsrab %v0, %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vsrl %v0, %v0, %v0 -+ -+ vsrl %v0, %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vsrlb %v0, %v0, %v0 -+ -+ vsrlb %v0, %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vst %v0, 0 -+ -+ vst %v0, 0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vstl %v0, %r0, 0 -+ -+ vstl %v0, %r0, 0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vstm %v0, %v0, 0 -+ -+ vstm %v0, %v0, 0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vstrcb %v0, %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vstrczb %v0, %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vstrcbs %v0, %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vstrczbs %v0, %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vstrch %v0, %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vstrczh %v0, %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vstrchs %v0, %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vstrczhs %v0, %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vstrcf %v0, %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vstrczf %v0, %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vstrcfs %v0, %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vstrczfs %v0, %v0, %v0, %v0 -+ -+ vstrcb %v0, %v0, %v0, %v0 -+ vstrczb %v0, %v0, %v0, %v0 -+ vstrcbs %v0, %v0, %v0, %v0 -+ vstrczbs %v0, %v0, %v0, %v0 -+ vstrch %v0, %v0, %v0, %v0 -+ vstrczh %v0, %v0, %v0, %v0 -+ vstrchs %v0, %v0, %v0, %v0 -+ vstrczhs %v0, %v0, %v0, %v0 -+ vstrcf %v0, %v0, %v0, %v0 -+ vstrczf %v0, %v0, %v0, %v0 -+ vstrcfs %v0, %v0, %v0, %v0 -+ vstrczfs %v0, %v0, %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vsumgh %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vsumgf %v0, %v0, %v0 -+ -+ vsumgh %v0, %v0, %v0 -+ vsumgf %v0, %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vsumqf %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vsumqg %v0, %v0, %v0 -+ -+ vsumqf %v0, %v0, %v0 -+ vsumqg %v0, %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vsumb %v0, %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vsumh %v0, %v0, %v0 -+ -+ vsumb %v0, %v0, %v0 -+ vsumh %v0, %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vtm %v0, %v0 -+ -+ vtm %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vuphb %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vuphf %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vuphh %v0, %v0 -+ -+ vuphb %v0, %v0 -+ vuphf %v0, %v0 -+ vuphh %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vuplhb %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vuplhf %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vuplhh %v0, %v0 -+ -+ vuplhb %v0, %v0 -+ vuplhf %v0, %v0 -+ vuplhh %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vuplb %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vuplf %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vuplhw %v0, %v0 -+ -+ vuplb %v0, %v0 -+ vuplf %v0, %v0 -+ vuplhw %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vupllb %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vupllf %v0, %v0 -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vupllh %v0, %v0 -+ -+ vupllb %v0, %v0 -+ vupllf %v0, %v0 -+ vupllh %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vx %v0, %v0, %v0 -+ -+ vx %v0, %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: vzero %v0 -+ -+ vzero %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: wcdgb %v0, %v0, 0, 0 -+ -+ wcdgb %v0, %v0, 0, 0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: wcdlgb %v0, %v0, 0, 0 -+ -+ wcdlgb %v0, %v0, 0, 0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: wcgdb %v0, %v0, 0, 0 -+ -+ wcgdb %v0, %v0, 0, 0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: wclgdb %v0, %v0, 0, 0 -+ -+ wclgdb %v0, %v0, 0, 0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: wfadb %v0, %v0, %v0 -+ -+ wfadb %v0, %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: wfcdb %v0, %v0 -+ -+ wfcdb %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: wfcedb %v0, %v0, %v0 -+#CHECK: wfcedbs %v0, %v0, %v0 -+ -+ wfcedb %v0, %v0, %v0 -+ wfcedbs %v0, %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: wfchdb %v0, %v0, %v0 -+#CHECK: wfchdbs %v0, %v0, %v0 -+ -+ wfchdb %v0, %v0, %v0 -+ wfchdbs %v0, %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: wfchedb %v0, %v0, %v0 -+#CHECK: wfchedbs %v0, %v0, %v0 -+ -+ wfchedb %v0, %v0, %v0 -+ wfchedbs %v0, %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: wfddb %v0, %v0, %v0 -+ -+ wfddb %v0, %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: wfidb %v0, %v0, 0, 0 -+ -+ wfidb %v0, %v0, 0, 0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: wfkdb %v0, %v0 -+ -+ wfkdb %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: wflcdb %v0, %v0 -+ -+ wflcdb %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: wflndb %v0, %v0 -+ -+ wflndb %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: wflpdb %v0, %v0 -+ -+ wflpdb %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: wfmadb %v0, %v0, %v0, %v0 -+ -+ wfmadb %v0, %v0, %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: wfmdb %v0, %v0, %v0 -+ -+ wfmdb %v0, %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: wfmsdb %v0, %v0, %v0, %v0 -+ -+ wfmsdb %v0, %v0, %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: wfsdb %v0, %v0, %v0 -+ -+ wfsdb %v0, %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: wfsqdb %v0, %v0 -+ -+ wfsqdb %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: wftcidb %v0, %v0, 0 -+ -+ wftcidb %v0, %v0, 0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: wldeb %v0, %v0 -+ -+ wldeb %v0, %v0 -+ -+#CHECK: error: {{(instruction requires: vector)?}} -+#CHECK: wledb %v0, %v0, 0, 0 -+ -+ wledb %v0, %v0, 0, 0 -Index: llvm-36/test/MC/SystemZ/insn-bad.s -=================================================================== ---- llvm-36.orig/test/MC/SystemZ/insn-bad.s -+++ llvm-36/test/MC/SystemZ/insn-bad.s -@@ -2666,6 +2666,11 @@ - pfdrl 1, 1 - pfdrl 1, 0x100000000 - -+#CHECK: error: {{(instruction requires: population-count)?}} -+#CHECK: popcnt %r0, %r0 -+ -+ popcnt %r0, %r0 -+ - #CHECK: error: invalid operand - #CHECK: risbg %r0,%r0,0,0,-1 - #CHECK: error: invalid operand -Index: llvm-36/test/MC/SystemZ/insn-good-z13.s -=================================================================== ---- /dev/null -+++ llvm-36/test/MC/SystemZ/insn-good-z13.s -@@ -0,0 +1,5039 @@ -+# For z13 and above. -+# RUN: llvm-mc -triple s390x-linux-gnu -mcpu=z13 -show-encoding %s \ -+# RUN: | FileCheck %s -+ -+#CHECK: lcbb %r0, 0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x27] -+#CHECK: lcbb %r0, 0, 15 # encoding: [0xe7,0x00,0x00,0x00,0xf0,0x27] -+#CHECK: lcbb %r0, 4095, 0 # encoding: [0xe7,0x00,0x0f,0xff,0x00,0x27] -+#CHECK: lcbb %r0, 0(%r15), 0 # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x27] -+#CHECK: lcbb %r0, 0(%r15,%r1), 0 # encoding: [0xe7,0x0f,0x10,0x00,0x00,0x27] -+#CHECK: lcbb %r15, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x27] -+#CHECK: lcbb %r2, 1383(%r3,%r4), 8 # encoding: [0xe7,0x23,0x45,0x67,0x80,0x27] -+ -+ lcbb %r0, 0, 0 -+ lcbb %r0, 0, 15 -+ lcbb %r0, 4095, 0 -+ lcbb %r0, 0(%r15), 0 -+ lcbb %r0, 0(%r15,%r1), 0 -+ lcbb %r15, 0, 0 -+ lcbb %r2, 1383(%r3,%r4), 8 -+ -+#CHECK: vab %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0xf3] -+#CHECK: vab %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0xf3] -+#CHECK: vab %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xf3] -+#CHECK: vab %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xf3] -+#CHECK: vab %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x0a,0xf3] -+ -+ vab %v0, %v0, %v0 -+ vab %v0, %v0, %v31 -+ vab %v0, %v31, %v0 -+ vab %v31, %v0, %v0 -+ vab %v18, %v3, %v20 -+ -+#CHECK: vaccb %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0xf1] -+#CHECK: vaccb %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0xf1] -+#CHECK: vaccb %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xf1] -+#CHECK: vaccb %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xf1] -+#CHECK: vaccb %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x0a,0xf1] -+ -+ vaccb %v0, %v0, %v0 -+ vaccb %v0, %v0, %v31 -+ vaccb %v0, %v31, %v0 -+ vaccb %v31, %v0, %v0 -+ vaccb %v18, %v3, %v20 -+ -+#CHECK: vacccq %v0, %v0, %v0, %v0 # encoding: [0xe7,0x00,0x04,0x00,0x00,0xb9] -+#CHECK: vacccq %v0, %v0, %v0, %v31 # encoding: [0xe7,0x00,0x04,0x00,0xf1,0xb9] -+#CHECK: vacccq %v0, %v0, %v31, %v0 # encoding: [0xe7,0x00,0xf4,0x00,0x02,0xb9] -+#CHECK: vacccq %v0, %v31, %v0, %v0 # encoding: [0xe7,0x0f,0x04,0x00,0x04,0xb9] -+#CHECK: vacccq %v31, %v0, %v0, %v0 # encoding: [0xe7,0xf0,0x04,0x00,0x08,0xb9] -+#CHECK: vacccq %v13, %v17, %v21, %v25 # encoding: [0xe7,0xd1,0x54,0x00,0x97,0xb9] -+ -+ vacccq %v0, %v0, %v0, %v0 -+ vacccq %v0, %v0, %v0, %v31 -+ vacccq %v0, %v0, %v31, %v0 -+ vacccq %v0, %v31, %v0, %v0 -+ vacccq %v31, %v0, %v0, %v0 -+ vacccq %v13, %v17, %v21, %v25 -+ -+#CHECK: vaccf %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0xf1] -+#CHECK: vaccf %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x22,0xf1] -+#CHECK: vaccf %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xf1] -+#CHECK: vaccf %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xf1] -+#CHECK: vaccf %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x2a,0xf1] -+ -+ vaccf %v0, %v0, %v0 -+ vaccf %v0, %v0, %v31 -+ vaccf %v0, %v31, %v0 -+ vaccf %v31, %v0, %v0 -+ vaccf %v18, %v3, %v20 -+ -+#CHECK: vaccg %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0xf1] -+#CHECK: vaccg %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x32,0xf1] -+#CHECK: vaccg %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xf1] -+#CHECK: vaccg %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xf1] -+#CHECK: vaccg %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x3a,0xf1] -+ -+ vaccg %v0, %v0, %v0 -+ vaccg %v0, %v0, %v31 -+ vaccg %v0, %v31, %v0 -+ vaccg %v31, %v0, %v0 -+ vaccg %v18, %v3, %v20 -+ -+#CHECK: vacch %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0xf1] -+#CHECK: vacch %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x12,0xf1] -+#CHECK: vacch %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0xf1] -+#CHECK: vacch %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0xf1] -+#CHECK: vacch %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x1a,0xf1] -+ -+ vacch %v0, %v0, %v0 -+ vacch %v0, %v0, %v31 -+ vacch %v0, %v31, %v0 -+ vacch %v31, %v0, %v0 -+ vacch %v18, %v3, %v20 -+ -+#CHECK: vaccq %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x40,0xf1] -+#CHECK: vaccq %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x42,0xf1] -+#CHECK: vaccq %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x44,0xf1] -+#CHECK: vaccq %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x48,0xf1] -+#CHECK: vaccq %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x4a,0xf1] -+ -+ vaccq %v0, %v0, %v0 -+ vaccq %v0, %v0, %v31 -+ vaccq %v0, %v31, %v0 -+ vaccq %v31, %v0, %v0 -+ vaccq %v18, %v3, %v20 -+ -+#CHECK: vacq %v0, %v0, %v0, %v0 # encoding: [0xe7,0x00,0x04,0x00,0x00,0xbb] -+#CHECK: vacq %v0, %v0, %v0, %v31 # encoding: [0xe7,0x00,0x04,0x00,0xf1,0xbb] -+#CHECK: vacq %v0, %v0, %v31, %v0 # encoding: [0xe7,0x00,0xf4,0x00,0x02,0xbb] -+#CHECK: vacq %v0, %v31, %v0, %v0 # encoding: [0xe7,0x0f,0x04,0x00,0x04,0xbb] -+#CHECK: vacq %v31, %v0, %v0, %v0 # encoding: [0xe7,0xf0,0x04,0x00,0x08,0xbb] -+#CHECK: vacq %v13, %v17, %v21, %v25 # encoding: [0xe7,0xd1,0x54,0x00,0x97,0xbb] -+ -+ vacq %v0, %v0, %v0, %v0 -+ vacq %v0, %v0, %v0, %v31 -+ vacq %v0, %v0, %v31, %v0 -+ vacq %v0, %v31, %v0, %v0 -+ vacq %v31, %v0, %v0, %v0 -+ vacq %v13, %v17, %v21, %v25 -+ -+#CHECK: vaf %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0xf3] -+#CHECK: vaf %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x22,0xf3] -+#CHECK: vaf %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xf3] -+#CHECK: vaf %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xf3] -+#CHECK: vaf %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x2a,0xf3] -+ -+ vaf %v0, %v0, %v0 -+ vaf %v0, %v0, %v31 -+ vaf %v0, %v31, %v0 -+ vaf %v31, %v0, %v0 -+ vaf %v18, %v3, %v20 -+ -+#CHECK: vag %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0xf3] -+#CHECK: vag %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x32,0xf3] -+#CHECK: vag %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xf3] -+#CHECK: vag %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xf3] -+#CHECK: vag %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x3a,0xf3] -+ -+ vag %v0, %v0, %v0 -+ vag %v0, %v0, %v31 -+ vag %v0, %v31, %v0 -+ vag %v31, %v0, %v0 -+ vag %v18, %v3, %v20 -+ -+#CHECK: vah %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0xf3] -+#CHECK: vah %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x12,0xf3] -+#CHECK: vah %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0xf3] -+#CHECK: vah %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0xf3] -+#CHECK: vah %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x1a,0xf3] -+ -+ vah %v0, %v0, %v0 -+ vah %v0, %v0, %v31 -+ vah %v0, %v31, %v0 -+ vah %v31, %v0, %v0 -+ vah %v18, %v3, %v20 -+ -+#CHECK: vaq %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x40,0xf3] -+#CHECK: vaq %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x42,0xf3] -+#CHECK: vaq %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x44,0xf3] -+#CHECK: vaq %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x48,0xf3] -+#CHECK: vaq %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x4a,0xf3] -+ -+ vaq %v0, %v0, %v0 -+ vaq %v0, %v0, %v31 -+ vaq %v0, %v31, %v0 -+ vaq %v31, %v0, %v0 -+ vaq %v18, %v3, %v20 -+ -+#CHECK: vavgb %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0xf2] -+#CHECK: vavgb %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0xf2] -+#CHECK: vavgb %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xf2] -+#CHECK: vavgb %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xf2] -+#CHECK: vavgb %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x0a,0xf2] -+ -+ vavgb %v0, %v0, %v0 -+ vavgb %v0, %v0, %v31 -+ vavgb %v0, %v31, %v0 -+ vavgb %v31, %v0, %v0 -+ vavgb %v18, %v3, %v20 -+ -+#CHECK: vavgf %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0xf2] -+#CHECK: vavgf %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x22,0xf2] -+#CHECK: vavgf %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xf2] -+#CHECK: vavgf %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xf2] -+#CHECK: vavgf %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x2a,0xf2] -+ -+ vavgf %v0, %v0, %v0 -+ vavgf %v0, %v0, %v31 -+ vavgf %v0, %v31, %v0 -+ vavgf %v31, %v0, %v0 -+ vavgf %v18, %v3, %v20 -+ -+#CHECK: vavgg %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0xf2] -+#CHECK: vavgg %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x32,0xf2] -+#CHECK: vavgg %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xf2] -+#CHECK: vavgg %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xf2] -+#CHECK: vavgg %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x3a,0xf2] -+ -+ vavgg %v0, %v0, %v0 -+ vavgg %v0, %v0, %v31 -+ vavgg %v0, %v31, %v0 -+ vavgg %v31, %v0, %v0 -+ vavgg %v18, %v3, %v20 -+ -+#CHECK: vavgh %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0xf2] -+#CHECK: vavgh %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x12,0xf2] -+#CHECK: vavgh %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0xf2] -+#CHECK: vavgh %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0xf2] -+#CHECK: vavgh %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x1a,0xf2] -+ -+ vavgh %v0, %v0, %v0 -+ vavgh %v0, %v0, %v31 -+ vavgh %v0, %v31, %v0 -+ vavgh %v31, %v0, %v0 -+ vavgh %v18, %v3, %v20 -+ -+#CHECK: vavglb %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0xf0] -+#CHECK: vavglb %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0xf0] -+#CHECK: vavglb %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xf0] -+#CHECK: vavglb %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xf0] -+#CHECK: vavglb %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x0a,0xf0] -+ -+ vavglb %v0, %v0, %v0 -+ vavglb %v0, %v0, %v31 -+ vavglb %v0, %v31, %v0 -+ vavglb %v31, %v0, %v0 -+ vavglb %v18, %v3, %v20 -+ -+#CHECK: vavglf %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0xf0] -+#CHECK: vavglf %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x22,0xf0] -+#CHECK: vavglf %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xf0] -+#CHECK: vavglf %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xf0] -+#CHECK: vavglf %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x2a,0xf0] -+ -+ vavglf %v0, %v0, %v0 -+ vavglf %v0, %v0, %v31 -+ vavglf %v0, %v31, %v0 -+ vavglf %v31, %v0, %v0 -+ vavglf %v18, %v3, %v20 -+ -+#CHECK: vavglg %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0xf0] -+#CHECK: vavglg %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x32,0xf0] -+#CHECK: vavglg %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xf0] -+#CHECK: vavglg %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xf0] -+#CHECK: vavglg %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x3a,0xf0] -+ -+ vavglg %v0, %v0, %v0 -+ vavglg %v0, %v0, %v31 -+ vavglg %v0, %v31, %v0 -+ vavglg %v31, %v0, %v0 -+ vavglg %v18, %v3, %v20 -+ -+#CHECK: vavglh %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0xf0] -+#CHECK: vavglh %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x12,0xf0] -+#CHECK: vavglh %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0xf0] -+#CHECK: vavglh %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0xf0] -+#CHECK: vavglh %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x1a,0xf0] -+ -+ vavglh %v0, %v0, %v0 -+ vavglh %v0, %v0, %v31 -+ vavglh %v0, %v31, %v0 -+ vavglh %v31, %v0, %v0 -+ vavglh %v18, %v3, %v20 -+ -+#CHECK: vcdgb %v0, %v0, 0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0xc3] -+#CHECK: vcdgb %v0, %v0, 0, 15 # encoding: [0xe7,0x00,0x00,0xf0,0x30,0xc3] -+#CHECK: vcdgb %v0, %v0, 4, 0 # encoding: [0xe7,0x00,0x00,0x04,0x30,0xc3] -+#CHECK: vcdgb %v0, %v0, 12, 0 # encoding: [0xe7,0x00,0x00,0x0c,0x30,0xc3] -+#CHECK: vcdgb %v0, %v31, 0, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xc3] -+#CHECK: vcdgb %v31, %v0, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xc3] -+#CHECK: vcdgb %v14, %v17, 4, 10 # encoding: [0xe7,0xe1,0x00,0xa4,0x34,0xc3] -+ -+ vcdgb %v0, %v0, 0, 0 -+ vcdgb %v0, %v0, 0, 15 -+ vcdgb %v0, %v0, 4, 0 -+ vcdgb %v0, %v0, 12, 0 -+ vcdgb %v0, %v31, 0, 0 -+ vcdgb %v31, %v0, 0, 0 -+ vcdgb %v14, %v17, 4, 10 -+ -+#CHECK: vcdlgb %v0, %v0, 0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0xc1] -+#CHECK: vcdlgb %v0, %v0, 0, 15 # encoding: [0xe7,0x00,0x00,0xf0,0x30,0xc1] -+#CHECK: vcdlgb %v0, %v0, 4, 0 # encoding: [0xe7,0x00,0x00,0x04,0x30,0xc1] -+#CHECK: vcdlgb %v0, %v0, 12, 0 # encoding: [0xe7,0x00,0x00,0x0c,0x30,0xc1] -+#CHECK: vcdlgb %v0, %v31, 0, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xc1] -+#CHECK: vcdlgb %v31, %v0, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xc1] -+#CHECK: vcdlgb %v14, %v17, 4, 10 # encoding: [0xe7,0xe1,0x00,0xa4,0x34,0xc1] -+ -+ vcdlgb %v0, %v0, 0, 0 -+ vcdlgb %v0, %v0, 0, 15 -+ vcdlgb %v0, %v0, 4, 0 -+ vcdlgb %v0, %v0, 12, 0 -+ vcdlgb %v0, %v31, 0, 0 -+ vcdlgb %v31, %v0, 0, 0 -+ vcdlgb %v14, %v17, 4, 10 -+ -+#CHECK: vcksm %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x66] -+#CHECK: vcksm %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0x66] -+#CHECK: vcksm %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x66] -+#CHECK: vcksm %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x66] -+#CHECK: vcksm %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x0a,0x66] -+ -+ vcksm %v0, %v0, %v0 -+ vcksm %v0, %v0, %v31 -+ vcksm %v0, %v31, %v0 -+ vcksm %v31, %v0, %v0 -+ vcksm %v18, %v3, %v20 -+ -+#CHECK: vceqb %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0xf8] -+#CHECK: vceqb %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0xf8] -+#CHECK: vceqb %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xf8] -+#CHECK: vceqb %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xf8] -+#CHECK: vceqb %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x0a,0xf8] -+#CHECK: vceqbs %v5, %v22, %v7 # encoding: [0xe7,0x56,0x70,0x10,0x04,0xf8] -+ -+ vceqb %v0, %v0, %v0 -+ vceqb %v0, %v0, %v31 -+ vceqb %v0, %v31, %v0 -+ vceqb %v31, %v0, %v0 -+ vceqb %v18, %v3, %v20 -+ vceqbs %v5, %v22, %v7 -+ -+#CHECK: vceqf %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0xf8] -+#CHECK: vceqf %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x22,0xf8] -+#CHECK: vceqf %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xf8] -+#CHECK: vceqf %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xf8] -+#CHECK: vceqf %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x2a,0xf8] -+#CHECK: vceqfs %v5, %v22, %v7 # encoding: [0xe7,0x56,0x70,0x10,0x24,0xf8] -+ -+ vceqf %v0, %v0, %v0 -+ vceqf %v0, %v0, %v31 -+ vceqf %v0, %v31, %v0 -+ vceqf %v31, %v0, %v0 -+ vceqf %v18, %v3, %v20 -+ vceqfs %v5, %v22, %v7 -+ -+#CHECK: vceqg %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0xf8] -+#CHECK: vceqg %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x32,0xf8] -+#CHECK: vceqg %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xf8] -+#CHECK: vceqg %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xf8] -+#CHECK: vceqg %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x3a,0xf8] -+#CHECK: vceqgs %v5, %v22, %v7 # encoding: [0xe7,0x56,0x70,0x10,0x34,0xf8] -+ -+ vceqg %v0, %v0, %v0 -+ vceqg %v0, %v0, %v31 -+ vceqg %v0, %v31, %v0 -+ vceqg %v31, %v0, %v0 -+ vceqg %v18, %v3, %v20 -+ vceqgs %v5, %v22, %v7 -+ -+#CHECK: vceqh %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0xf8] -+#CHECK: vceqh %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x12,0xf8] -+#CHECK: vceqh %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0xf8] -+#CHECK: vceqh %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0xf8] -+#CHECK: vceqh %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x1a,0xf8] -+#CHECK: vceqhs %v5, %v22, %v7 # encoding: [0xe7,0x56,0x70,0x10,0x14,0xf8] -+ -+ vceqh %v0, %v0, %v0 -+ vceqh %v0, %v0, %v31 -+ vceqh %v0, %v31, %v0 -+ vceqh %v31, %v0, %v0 -+ vceqh %v18, %v3, %v20 -+ vceqhs %v5, %v22, %v7 -+ -+#CHECK: vcgdb %v0, %v0, 0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0xc2] -+#CHECK: vcgdb %v0, %v0, 0, 15 # encoding: [0xe7,0x00,0x00,0xf0,0x30,0xc2] -+#CHECK: vcgdb %v0, %v0, 4, 0 # encoding: [0xe7,0x00,0x00,0x04,0x30,0xc2] -+#CHECK: vcgdb %v0, %v0, 12, 0 # encoding: [0xe7,0x00,0x00,0x0c,0x30,0xc2] -+#CHECK: vcgdb %v0, %v31, 0, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xc2] -+#CHECK: vcgdb %v31, %v0, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xc2] -+#CHECK: vcgdb %v14, %v17, 4, 10 # encoding: [0xe7,0xe1,0x00,0xa4,0x34,0xc2] -+ -+ vcgdb %v0, %v0, 0, 0 -+ vcgdb %v0, %v0, 0, 15 -+ vcgdb %v0, %v0, 4, 0 -+ vcgdb %v0, %v0, 12, 0 -+ vcgdb %v0, %v31, 0, 0 -+ vcgdb %v31, %v0, 0, 0 -+ vcgdb %v14, %v17, 4, 10 -+ -+#CHECK: vchb %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0xfb] -+#CHECK: vchb %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0xfb] -+#CHECK: vchb %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xfb] -+#CHECK: vchb %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xfb] -+#CHECK: vchb %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x0a,0xfb] -+#CHECK: vchbs %v5, %v22, %v7 # encoding: [0xe7,0x56,0x70,0x10,0x04,0xfb] -+ -+ vchb %v0, %v0, %v0 -+ vchb %v0, %v0, %v31 -+ vchb %v0, %v31, %v0 -+ vchb %v31, %v0, %v0 -+ vchb %v18, %v3, %v20 -+ vchbs %v5, %v22, %v7 -+ -+#CHECK: vchf %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0xfb] -+#CHECK: vchf %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x22,0xfb] -+#CHECK: vchf %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xfb] -+#CHECK: vchf %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xfb] -+#CHECK: vchf %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x2a,0xfb] -+#CHECK: vchfs %v5, %v22, %v7 # encoding: [0xe7,0x56,0x70,0x10,0x24,0xfb] -+ -+ vchf %v0, %v0, %v0 -+ vchf %v0, %v0, %v31 -+ vchf %v0, %v31, %v0 -+ vchf %v31, %v0, %v0 -+ vchf %v18, %v3, %v20 -+ vchfs %v5, %v22, %v7 -+ -+#CHECK: vchg %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0xfb] -+#CHECK: vchg %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x32,0xfb] -+#CHECK: vchg %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xfb] -+#CHECK: vchg %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xfb] -+#CHECK: vchg %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x3a,0xfb] -+#CHECK: vchgs %v5, %v22, %v7 # encoding: [0xe7,0x56,0x70,0x10,0x34,0xfb] -+ -+ vchg %v0, %v0, %v0 -+ vchg %v0, %v0, %v31 -+ vchg %v0, %v31, %v0 -+ vchg %v31, %v0, %v0 -+ vchg %v18, %v3, %v20 -+ vchgs %v5, %v22, %v7 -+ -+#CHECK: vchh %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0xfb] -+#CHECK: vchh %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x12,0xfb] -+#CHECK: vchh %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0xfb] -+#CHECK: vchh %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0xfb] -+#CHECK: vchh %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x1a,0xfb] -+#CHECK: vchhs %v5, %v22, %v7 # encoding: [0xe7,0x56,0x70,0x10,0x14,0xfb] -+ -+ vchh %v0, %v0, %v0 -+ vchh %v0, %v0, %v31 -+ vchh %v0, %v31, %v0 -+ vchh %v31, %v0, %v0 -+ vchh %v18, %v3, %v20 -+ vchhs %v5, %v22, %v7 -+ -+#CHECK: vchlb %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0xf9] -+#CHECK: vchlb %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0xf9] -+#CHECK: vchlb %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xf9] -+#CHECK: vchlb %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xf9] -+#CHECK: vchlb %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x0a,0xf9] -+#CHECK: vchlbs %v5, %v22, %v7 # encoding: [0xe7,0x56,0x70,0x10,0x04,0xf9] -+ -+ vchlb %v0, %v0, %v0 -+ vchlb %v0, %v0, %v31 -+ vchlb %v0, %v31, %v0 -+ vchlb %v31, %v0, %v0 -+ vchlb %v18, %v3, %v20 -+ vchlbs %v5, %v22, %v7 -+ -+#CHECK: vchlf %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0xf9] -+#CHECK: vchlf %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x22,0xf9] -+#CHECK: vchlf %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xf9] -+#CHECK: vchlf %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xf9] -+#CHECK: vchlf %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x2a,0xf9] -+#CHECK: vchlfs %v5, %v22, %v7 # encoding: [0xe7,0x56,0x70,0x10,0x24,0xf9] -+ -+ vchlf %v0, %v0, %v0 -+ vchlf %v0, %v0, %v31 -+ vchlf %v0, %v31, %v0 -+ vchlf %v31, %v0, %v0 -+ vchlf %v18, %v3, %v20 -+ vchlfs %v5, %v22, %v7 -+ -+#CHECK: vchlg %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0xf9] -+#CHECK: vchlg %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x32,0xf9] -+#CHECK: vchlg %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xf9] -+#CHECK: vchlg %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xf9] -+#CHECK: vchlg %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x3a,0xf9] -+#CHECK: vchlgs %v5, %v22, %v7 # encoding: [0xe7,0x56,0x70,0x10,0x34,0xf9] -+ -+ vchlg %v0, %v0, %v0 -+ vchlg %v0, %v0, %v31 -+ vchlg %v0, %v31, %v0 -+ vchlg %v31, %v0, %v0 -+ vchlg %v18, %v3, %v20 -+ vchlgs %v5, %v22, %v7 -+ -+#CHECK: vchlh %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0xf9] -+#CHECK: vchlh %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x12,0xf9] -+#CHECK: vchlh %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0xf9] -+#CHECK: vchlh %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0xf9] -+#CHECK: vchlh %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x1a,0xf9] -+#CHECK: vchlhs %v5, %v22, %v7 # encoding: [0xe7,0x56,0x70,0x10,0x14,0xf9] -+ -+ vchlh %v0, %v0, %v0 -+ vchlh %v0, %v0, %v31 -+ vchlh %v0, %v31, %v0 -+ vchlh %v31, %v0, %v0 -+ vchlh %v18, %v3, %v20 -+ vchlhs %v5, %v22, %v7 -+ -+#CHECK: vclgdb %v0, %v0, 0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0xc0] -+#CHECK: vclgdb %v0, %v0, 0, 15 # encoding: [0xe7,0x00,0x00,0xf0,0x30,0xc0] -+#CHECK: vclgdb %v0, %v0, 4, 0 # encoding: [0xe7,0x00,0x00,0x04,0x30,0xc0] -+#CHECK: vclgdb %v0, %v0, 12, 0 # encoding: [0xe7,0x00,0x00,0x0c,0x30,0xc0] -+#CHECK: vclgdb %v0, %v31, 0, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xc0] -+#CHECK: vclgdb %v31, %v0, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xc0] -+#CHECK: vclgdb %v14, %v17, 4, 10 # encoding: [0xe7,0xe1,0x00,0xa4,0x34,0xc0] -+ -+ vclgdb %v0, %v0, 0, 0 -+ vclgdb %v0, %v0, 0, 15 -+ vclgdb %v0, %v0, 4, 0 -+ vclgdb %v0, %v0, 12, 0 -+ vclgdb %v0, %v31, 0, 0 -+ vclgdb %v31, %v0, 0, 0 -+ vclgdb %v14, %v17, 4, 10 -+ -+#CHECK: vclzb %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x53] -+#CHECK: vclzb %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x00,0x53] -+#CHECK: vclzb %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x53] -+#CHECK: vclzb %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x53] -+#CHECK: vclzb %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x53] -+#CHECK: vclzb %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x04,0x53] -+ -+ vclzb %v0, %v0 -+ vclzb %v0, %v15 -+ vclzb %v0, %v31 -+ vclzb %v15, %v0 -+ vclzb %v31, %v0 -+ vclzb %v14, %v17 -+ -+#CHECK: vclzf %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0x53] -+#CHECK: vclzf %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x20,0x53] -+#CHECK: vclzf %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0x53] -+#CHECK: vclzf %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x20,0x53] -+#CHECK: vclzf %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0x53] -+#CHECK: vclzf %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x24,0x53] -+ -+ vclzf %v0, %v0 -+ vclzf %v0, %v15 -+ vclzf %v0, %v31 -+ vclzf %v15, %v0 -+ vclzf %v31, %v0 -+ vclzf %v14, %v17 -+ -+#CHECK: vclzg %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0x53] -+#CHECK: vclzg %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x30,0x53] -+#CHECK: vclzg %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0x53] -+#CHECK: vclzg %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x30,0x53] -+#CHECK: vclzg %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0x53] -+#CHECK: vclzg %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x34,0x53] -+ -+ vclzg %v0, %v0 -+ vclzg %v0, %v15 -+ vclzg %v0, %v31 -+ vclzg %v15, %v0 -+ vclzg %v31, %v0 -+ vclzg %v14, %v17 -+ -+#CHECK: vclzh %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0x53] -+#CHECK: vclzh %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x10,0x53] -+#CHECK: vclzh %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0x53] -+#CHECK: vclzh %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x10,0x53] -+#CHECK: vclzh %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0x53] -+#CHECK: vclzh %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x14,0x53] -+ -+ vclzh %v0, %v0 -+ vclzh %v0, %v15 -+ vclzh %v0, %v31 -+ vclzh %v15, %v0 -+ vclzh %v31, %v0 -+ vclzh %v14, %v17 -+ -+#CHECK: vctzb %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x52] -+#CHECK: vctzb %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x00,0x52] -+#CHECK: vctzb %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x52] -+#CHECK: vctzb %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x52] -+#CHECK: vctzb %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x52] -+#CHECK: vctzb %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x04,0x52] -+ -+ vctzb %v0, %v0 -+ vctzb %v0, %v15 -+ vctzb %v0, %v31 -+ vctzb %v15, %v0 -+ vctzb %v31, %v0 -+ vctzb %v14, %v17 -+ -+#CHECK: vctzf %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0x52] -+#CHECK: vctzf %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x20,0x52] -+#CHECK: vctzf %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0x52] -+#CHECK: vctzf %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x20,0x52] -+#CHECK: vctzf %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0x52] -+#CHECK: vctzf %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x24,0x52] -+ -+ vctzf %v0, %v0 -+ vctzf %v0, %v15 -+ vctzf %v0, %v31 -+ vctzf %v15, %v0 -+ vctzf %v31, %v0 -+ vctzf %v14, %v17 -+ -+#CHECK: vctzg %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0x52] -+#CHECK: vctzg %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x30,0x52] -+#CHECK: vctzg %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0x52] -+#CHECK: vctzg %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x30,0x52] -+#CHECK: vctzg %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0x52] -+#CHECK: vctzg %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x34,0x52] -+ -+ vctzg %v0, %v0 -+ vctzg %v0, %v15 -+ vctzg %v0, %v31 -+ vctzg %v15, %v0 -+ vctzg %v31, %v0 -+ vctzg %v14, %v17 -+ -+#CHECK: vctzh %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0x52] -+#CHECK: vctzh %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x10,0x52] -+#CHECK: vctzh %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0x52] -+#CHECK: vctzh %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x10,0x52] -+#CHECK: vctzh %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0x52] -+#CHECK: vctzh %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x14,0x52] -+ -+ vctzh %v0, %v0 -+ vctzh %v0, %v15 -+ vctzh %v0, %v31 -+ vctzh %v15, %v0 -+ vctzh %v31, %v0 -+ vctzh %v14, %v17 -+ -+#CHECK: vecb %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0xdb] -+#CHECK: vecb %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x00,0xdb] -+#CHECK: vecb %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xdb] -+#CHECK: vecb %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0xdb] -+#CHECK: vecb %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xdb] -+#CHECK: vecb %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x04,0xdb] -+ -+ vecb %v0, %v0 -+ vecb %v0, %v15 -+ vecb %v0, %v31 -+ vecb %v15, %v0 -+ vecb %v31, %v0 -+ vecb %v14, %v17 -+ -+#CHECK: vecf %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0xdb] -+#CHECK: vecf %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x20,0xdb] -+#CHECK: vecf %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xdb] -+#CHECK: vecf %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x20,0xdb] -+#CHECK: vecf %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xdb] -+#CHECK: vecf %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x24,0xdb] -+ -+ vecf %v0, %v0 -+ vecf %v0, %v15 -+ vecf %v0, %v31 -+ vecf %v15, %v0 -+ vecf %v31, %v0 -+ vecf %v14, %v17 -+ -+#CHECK: vecg %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0xdb] -+#CHECK: vecg %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x30,0xdb] -+#CHECK: vecg %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xdb] -+#CHECK: vecg %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x30,0xdb] -+#CHECK: vecg %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xdb] -+#CHECK: vecg %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x34,0xdb] -+ -+ vecg %v0, %v0 -+ vecg %v0, %v15 -+ vecg %v0, %v31 -+ vecg %v15, %v0 -+ vecg %v31, %v0 -+ vecg %v14, %v17 -+ -+#CHECK: vech %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0xdb] -+#CHECK: vech %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x10,0xdb] -+#CHECK: vech %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0xdb] -+#CHECK: vech %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x10,0xdb] -+#CHECK: vech %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0xdb] -+#CHECK: vech %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x14,0xdb] -+ -+ vech %v0, %v0 -+ vech %v0, %v15 -+ vech %v0, %v31 -+ vech %v15, %v0 -+ vech %v31, %v0 -+ vech %v14, %v17 -+ -+#CHECK: veclb %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0xd9] -+#CHECK: veclb %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x00,0xd9] -+#CHECK: veclb %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xd9] -+#CHECK: veclb %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0xd9] -+#CHECK: veclb %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xd9] -+#CHECK: veclb %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x04,0xd9] -+ -+ veclb %v0, %v0 -+ veclb %v0, %v15 -+ veclb %v0, %v31 -+ veclb %v15, %v0 -+ veclb %v31, %v0 -+ veclb %v14, %v17 -+ -+#CHECK: veclf %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0xd9] -+#CHECK: veclf %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x20,0xd9] -+#CHECK: veclf %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xd9] -+#CHECK: veclf %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x20,0xd9] -+#CHECK: veclf %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xd9] -+#CHECK: veclf %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x24,0xd9] -+ -+ veclf %v0, %v0 -+ veclf %v0, %v15 -+ veclf %v0, %v31 -+ veclf %v15, %v0 -+ veclf %v31, %v0 -+ veclf %v14, %v17 -+ -+#CHECK: veclg %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0xd9] -+#CHECK: veclg %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x30,0xd9] -+#CHECK: veclg %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xd9] -+#CHECK: veclg %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x30,0xd9] -+#CHECK: veclg %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xd9] -+#CHECK: veclg %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x34,0xd9] -+ -+ veclg %v0, %v0 -+ veclg %v0, %v15 -+ veclg %v0, %v31 -+ veclg %v15, %v0 -+ veclg %v31, %v0 -+ veclg %v14, %v17 -+ -+#CHECK: veclh %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0xd9] -+#CHECK: veclh %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x10,0xd9] -+#CHECK: veclh %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0xd9] -+#CHECK: veclh %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x10,0xd9] -+#CHECK: veclh %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0xd9] -+#CHECK: veclh %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x14,0xd9] -+ -+ veclh %v0, %v0 -+ veclh %v0, %v15 -+ veclh %v0, %v31 -+ veclh %v15, %v0 -+ veclh %v31, %v0 -+ veclh %v14, %v17 -+ -+#CHECK: verimb %v0, %v0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x72] -+#CHECK: verimb %v0, %v0, %v0, 255 # encoding: [0xe7,0x00,0x00,0xff,0x00,0x72] -+#CHECK: verimb %v0, %v0, %v31, 0 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0x72] -+#CHECK: verimb %v0, %v31, %v0, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x72] -+#CHECK: verimb %v31, %v0, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x72] -+#CHECK: verimb %v13, %v17, %v21, 121 # encoding: [0xe7,0xd1,0x50,0x79,0x06,0x72] -+ -+ verimb %v0, %v0, %v0, 0 -+ verimb %v0, %v0, %v0, 255 -+ verimb %v0, %v0, %v31, 0 -+ verimb %v0, %v31, %v0, 0 -+ verimb %v31, %v0, %v0, 0 -+ verimb %v13, %v17, %v21, 0x79 -+ -+#CHECK: verimf %v0, %v0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0x72] -+#CHECK: verimf %v0, %v0, %v0, 255 # encoding: [0xe7,0x00,0x00,0xff,0x20,0x72] -+#CHECK: verimf %v0, %v0, %v31, 0 # encoding: [0xe7,0x00,0xf0,0x00,0x22,0x72] -+#CHECK: verimf %v0, %v31, %v0, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0x72] -+#CHECK: verimf %v31, %v0, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0x72] -+#CHECK: verimf %v13, %v17, %v21, 121 # encoding: [0xe7,0xd1,0x50,0x79,0x26,0x72] -+ -+ verimf %v0, %v0, %v0, 0 -+ verimf %v0, %v0, %v0, 255 -+ verimf %v0, %v0, %v31, 0 -+ verimf %v0, %v31, %v0, 0 -+ verimf %v31, %v0, %v0, 0 -+ verimf %v13, %v17, %v21, 0x79 -+ -+#CHECK: verimg %v0, %v0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0x72] -+#CHECK: verimg %v0, %v0, %v0, 255 # encoding: [0xe7,0x00,0x00,0xff,0x30,0x72] -+#CHECK: verimg %v0, %v0, %v31, 0 # encoding: [0xe7,0x00,0xf0,0x00,0x32,0x72] -+#CHECK: verimg %v0, %v31, %v0, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0x72] -+#CHECK: verimg %v31, %v0, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0x72] -+#CHECK: verimg %v13, %v17, %v21, 121 # encoding: [0xe7,0xd1,0x50,0x79,0x36,0x72] -+ -+ verimg %v0, %v0, %v0, 0 -+ verimg %v0, %v0, %v0, 255 -+ verimg %v0, %v0, %v31, 0 -+ verimg %v0, %v31, %v0, 0 -+ verimg %v31, %v0, %v0, 0 -+ verimg %v13, %v17, %v21, 0x79 -+ -+#CHECK: verimh %v0, %v0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0x72] -+#CHECK: verimh %v0, %v0, %v0, 255 # encoding: [0xe7,0x00,0x00,0xff,0x10,0x72] -+#CHECK: verimh %v0, %v0, %v31, 0 # encoding: [0xe7,0x00,0xf0,0x00,0x12,0x72] -+#CHECK: verimh %v0, %v31, %v0, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0x72] -+#CHECK: verimh %v31, %v0, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0x72] -+#CHECK: verimh %v13, %v17, %v21, 121 # encoding: [0xe7,0xd1,0x50,0x79,0x16,0x72] -+ -+ verimh %v0, %v0, %v0, 0 -+ verimh %v0, %v0, %v0, 255 -+ verimh %v0, %v0, %v31, 0 -+ verimh %v0, %v31, %v0, 0 -+ verimh %v31, %v0, %v0, 0 -+ verimh %v13, %v17, %v21, 0x79 -+ -+#CHECK: verllvb %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x73] -+#CHECK: verllvb %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0x73] -+#CHECK: verllvb %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x73] -+#CHECK: verllvb %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x73] -+#CHECK: verllvb %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x0a,0x73] -+ -+ verllvb %v0, %v0, %v0 -+ verllvb %v0, %v0, %v31 -+ verllvb %v0, %v31, %v0 -+ verllvb %v31, %v0, %v0 -+ verllvb %v18, %v3, %v20 -+ -+#CHECK: verllvf %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0x73] -+#CHECK: verllvf %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x22,0x73] -+#CHECK: verllvf %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0x73] -+#CHECK: verllvf %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0x73] -+#CHECK: verllvf %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x2a,0x73] -+ -+ verllvf %v0, %v0, %v0 -+ verllvf %v0, %v0, %v31 -+ verllvf %v0, %v31, %v0 -+ verllvf %v31, %v0, %v0 -+ verllvf %v18, %v3, %v20 -+ -+#CHECK: verllvg %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0x73] -+#CHECK: verllvg %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x32,0x73] -+#CHECK: verllvg %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0x73] -+#CHECK: verllvg %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0x73] -+#CHECK: verllvg %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x3a,0x73] -+ -+ verllvg %v0, %v0, %v0 -+ verllvg %v0, %v0, %v31 -+ verllvg %v0, %v31, %v0 -+ verllvg %v31, %v0, %v0 -+ verllvg %v18, %v3, %v20 -+ -+#CHECK: verllvh %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0x73] -+#CHECK: verllvh %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x12,0x73] -+#CHECK: verllvh %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0x73] -+#CHECK: verllvh %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0x73] -+#CHECK: verllvh %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x1a,0x73] -+ -+ verllvh %v0, %v0, %v0 -+ verllvh %v0, %v0, %v31 -+ verllvh %v0, %v31, %v0 -+ verllvh %v31, %v0, %v0 -+ verllvh %v18, %v3, %v20 -+ -+#CHECK: verllb %v0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x33] -+#CHECK: verllb %v0, %v0, 4095 # encoding: [0xe7,0x00,0x0f,0xff,0x00,0x33] -+#CHECK: verllb %v0, %v0, 0(%r15) # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x33] -+#CHECK: verllb %v0, %v31, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x33] -+#CHECK: verllb %v31, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x33] -+#CHECK: verllb %v14, %v17, 1074(%r5) # encoding: [0xe7,0xe1,0x54,0x32,0x04,0x33] -+ -+ verllb %v0, %v0, 0 -+ verllb %v0, %v0, 4095 -+ verllb %v0, %v0, 0(%r15) -+ verllb %v0, %v31, 0 -+ verllb %v31, %v0, 0 -+ verllb %v14, %v17, 1074(%r5) -+ -+#CHECK: verllf %v0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0x33] -+#CHECK: verllf %v0, %v0, 4095 # encoding: [0xe7,0x00,0x0f,0xff,0x20,0x33] -+#CHECK: verllf %v0, %v0, 0(%r15) # encoding: [0xe7,0x00,0xf0,0x00,0x20,0x33] -+#CHECK: verllf %v0, %v31, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0x33] -+#CHECK: verllf %v31, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0x33] -+#CHECK: verllf %v14, %v17, 1074(%r5) # encoding: [0xe7,0xe1,0x54,0x32,0x24,0x33] -+ -+ verllf %v0, %v0, 0 -+ verllf %v0, %v0, 4095 -+ verllf %v0, %v0, 0(%r15) -+ verllf %v0, %v31, 0 -+ verllf %v31, %v0, 0 -+ verllf %v14, %v17, 1074(%r5) -+ -+#CHECK: verllg %v0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0x33] -+#CHECK: verllg %v0, %v0, 4095 # encoding: [0xe7,0x00,0x0f,0xff,0x30,0x33] -+#CHECK: verllg %v0, %v0, 0(%r15) # encoding: [0xe7,0x00,0xf0,0x00,0x30,0x33] -+#CHECK: verllg %v0, %v31, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0x33] -+#CHECK: verllg %v31, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0x33] -+#CHECK: verllg %v14, %v17, 1074(%r5) # encoding: [0xe7,0xe1,0x54,0x32,0x34,0x33] -+ -+ verllg %v0, %v0, 0 -+ verllg %v0, %v0, 4095 -+ verllg %v0, %v0, 0(%r15) -+ verllg %v0, %v31, 0 -+ verllg %v31, %v0, 0 -+ verllg %v14, %v17, 1074(%r5) -+ -+#CHECK: verllh %v0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0x33] -+#CHECK: verllh %v0, %v0, 4095 # encoding: [0xe7,0x00,0x0f,0xff,0x10,0x33] -+#CHECK: verllh %v0, %v0, 0(%r15) # encoding: [0xe7,0x00,0xf0,0x00,0x10,0x33] -+#CHECK: verllh %v0, %v31, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0x33] -+#CHECK: verllh %v31, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0x33] -+#CHECK: verllh %v14, %v17, 1074(%r5) # encoding: [0xe7,0xe1,0x54,0x32,0x14,0x33] -+ -+ verllh %v0, %v0, 0 -+ verllh %v0, %v0, 4095 -+ verllh %v0, %v0, 0(%r15) -+ verllh %v0, %v31, 0 -+ verllh %v31, %v0, 0 -+ verllh %v14, %v17, 1074(%r5) -+ -+#CHECK: veslvb %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x70] -+#CHECK: veslvb %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0x70] -+#CHECK: veslvb %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x70] -+#CHECK: veslvb %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x70] -+#CHECK: veslvb %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x0a,0x70] -+ -+ veslvb %v0, %v0, %v0 -+ veslvb %v0, %v0, %v31 -+ veslvb %v0, %v31, %v0 -+ veslvb %v31, %v0, %v0 -+ veslvb %v18, %v3, %v20 -+ -+#CHECK: veslvf %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0x70] -+#CHECK: veslvf %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x22,0x70] -+#CHECK: veslvf %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0x70] -+#CHECK: veslvf %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0x70] -+#CHECK: veslvf %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x2a,0x70] -+ -+ veslvf %v0, %v0, %v0 -+ veslvf %v0, %v0, %v31 -+ veslvf %v0, %v31, %v0 -+ veslvf %v31, %v0, %v0 -+ veslvf %v18, %v3, %v20 -+ -+#CHECK: veslvg %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0x70] -+#CHECK: veslvg %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x32,0x70] -+#CHECK: veslvg %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0x70] -+#CHECK: veslvg %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0x70] -+#CHECK: veslvg %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x3a,0x70] -+ -+ veslvg %v0, %v0, %v0 -+ veslvg %v0, %v0, %v31 -+ veslvg %v0, %v31, %v0 -+ veslvg %v31, %v0, %v0 -+ veslvg %v18, %v3, %v20 -+ -+#CHECK: veslvh %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0x70] -+#CHECK: veslvh %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x12,0x70] -+#CHECK: veslvh %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0x70] -+#CHECK: veslvh %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0x70] -+#CHECK: veslvh %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x1a,0x70] -+ -+ veslvh %v0, %v0, %v0 -+ veslvh %v0, %v0, %v31 -+ veslvh %v0, %v31, %v0 -+ veslvh %v31, %v0, %v0 -+ veslvh %v18, %v3, %v20 -+ -+#CHECK: veslb %v0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x30] -+#CHECK: veslb %v0, %v0, 4095 # encoding: [0xe7,0x00,0x0f,0xff,0x00,0x30] -+#CHECK: veslb %v0, %v0, 0(%r15) # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x30] -+#CHECK: veslb %v0, %v31, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x30] -+#CHECK: veslb %v31, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x30] -+#CHECK: veslb %v14, %v17, 1074(%r5) # encoding: [0xe7,0xe1,0x54,0x32,0x04,0x30] -+ -+ veslb %v0, %v0, 0 -+ veslb %v0, %v0, 4095 -+ veslb %v0, %v0, 0(%r15) -+ veslb %v0, %v31, 0 -+ veslb %v31, %v0, 0 -+ veslb %v14, %v17, 1074(%r5) -+ -+#CHECK: veslf %v0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0x30] -+#CHECK: veslf %v0, %v0, 4095 # encoding: [0xe7,0x00,0x0f,0xff,0x20,0x30] -+#CHECK: veslf %v0, %v0, 0(%r15) # encoding: [0xe7,0x00,0xf0,0x00,0x20,0x30] -+#CHECK: veslf %v0, %v31, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0x30] -+#CHECK: veslf %v31, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0x30] -+#CHECK: veslf %v14, %v17, 1074(%r5) # encoding: [0xe7,0xe1,0x54,0x32,0x24,0x30] -+ -+ veslf %v0, %v0, 0 -+ veslf %v0, %v0, 4095 -+ veslf %v0, %v0, 0(%r15) -+ veslf %v0, %v31, 0 -+ veslf %v31, %v0, 0 -+ veslf %v14, %v17, 1074(%r5) -+ -+#CHECK: veslg %v0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0x30] -+#CHECK: veslg %v0, %v0, 4095 # encoding: [0xe7,0x00,0x0f,0xff,0x30,0x30] -+#CHECK: veslg %v0, %v0, 0(%r15) # encoding: [0xe7,0x00,0xf0,0x00,0x30,0x30] -+#CHECK: veslg %v0, %v31, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0x30] -+#CHECK: veslg %v31, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0x30] -+#CHECK: veslg %v14, %v17, 1074(%r5) # encoding: [0xe7,0xe1,0x54,0x32,0x34,0x30] -+ -+ veslg %v0, %v0, 0 -+ veslg %v0, %v0, 4095 -+ veslg %v0, %v0, 0(%r15) -+ veslg %v0, %v31, 0 -+ veslg %v31, %v0, 0 -+ veslg %v14, %v17, 1074(%r5) -+ -+#CHECK: veslh %v0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0x30] -+#CHECK: veslh %v0, %v0, 4095 # encoding: [0xe7,0x00,0x0f,0xff,0x10,0x30] -+#CHECK: veslh %v0, %v0, 0(%r15) # encoding: [0xe7,0x00,0xf0,0x00,0x10,0x30] -+#CHECK: veslh %v0, %v31, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0x30] -+#CHECK: veslh %v31, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0x30] -+#CHECK: veslh %v14, %v17, 1074(%r5) # encoding: [0xe7,0xe1,0x54,0x32,0x14,0x30] -+ -+ veslh %v0, %v0, 0 -+ veslh %v0, %v0, 4095 -+ veslh %v0, %v0, 0(%r15) -+ veslh %v0, %v31, 0 -+ veslh %v31, %v0, 0 -+ veslh %v14, %v17, 1074(%r5) -+ -+#CHECK: vesravb %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x7a] -+#CHECK: vesravb %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0x7a] -+#CHECK: vesravb %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x7a] -+#CHECK: vesravb %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x7a] -+#CHECK: vesravb %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x0a,0x7a] -+ -+ vesravb %v0, %v0, %v0 -+ vesravb %v0, %v0, %v31 -+ vesravb %v0, %v31, %v0 -+ vesravb %v31, %v0, %v0 -+ vesravb %v18, %v3, %v20 -+ -+#CHECK: vesravf %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0x7a] -+#CHECK: vesravf %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x22,0x7a] -+#CHECK: vesravf %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0x7a] -+#CHECK: vesravf %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0x7a] -+#CHECK: vesravf %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x2a,0x7a] -+ -+ vesravf %v0, %v0, %v0 -+ vesravf %v0, %v0, %v31 -+ vesravf %v0, %v31, %v0 -+ vesravf %v31, %v0, %v0 -+ vesravf %v18, %v3, %v20 -+ -+#CHECK: vesravg %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0x7a] -+#CHECK: vesravg %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x32,0x7a] -+#CHECK: vesravg %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0x7a] -+#CHECK: vesravg %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0x7a] -+#CHECK: vesravg %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x3a,0x7a] -+ -+ vesravg %v0, %v0, %v0 -+ vesravg %v0, %v0, %v31 -+ vesravg %v0, %v31, %v0 -+ vesravg %v31, %v0, %v0 -+ vesravg %v18, %v3, %v20 -+ -+#CHECK: vesravh %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0x7a] -+#CHECK: vesravh %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x12,0x7a] -+#CHECK: vesravh %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0x7a] -+#CHECK: vesravh %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0x7a] -+#CHECK: vesravh %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x1a,0x7a] -+ -+ vesravh %v0, %v0, %v0 -+ vesravh %v0, %v0, %v31 -+ vesravh %v0, %v31, %v0 -+ vesravh %v31, %v0, %v0 -+ vesravh %v18, %v3, %v20 -+ -+#CHECK: vesrab %v0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x3a] -+#CHECK: vesrab %v0, %v0, 4095 # encoding: [0xe7,0x00,0x0f,0xff,0x00,0x3a] -+#CHECK: vesrab %v0, %v0, 0(%r15) # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x3a] -+#CHECK: vesrab %v0, %v31, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x3a] -+#CHECK: vesrab %v31, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x3a] -+#CHECK: vesrab %v14, %v17, 1074(%r5) # encoding: [0xe7,0xe1,0x54,0x32,0x04,0x3a] -+ -+ vesrab %v0, %v0, 0 -+ vesrab %v0, %v0, 4095 -+ vesrab %v0, %v0, 0(%r15) -+ vesrab %v0, %v31, 0 -+ vesrab %v31, %v0, 0 -+ vesrab %v14, %v17, 1074(%r5) -+ -+#CHECK: vesraf %v0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0x3a] -+#CHECK: vesraf %v0, %v0, 4095 # encoding: [0xe7,0x00,0x0f,0xff,0x20,0x3a] -+#CHECK: vesraf %v0, %v0, 0(%r15) # encoding: [0xe7,0x00,0xf0,0x00,0x20,0x3a] -+#CHECK: vesraf %v0, %v31, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0x3a] -+#CHECK: vesraf %v31, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0x3a] -+#CHECK: vesraf %v14, %v17, 1074(%r5) # encoding: [0xe7,0xe1,0x54,0x32,0x24,0x3a] -+ -+ vesraf %v0, %v0, 0 -+ vesraf %v0, %v0, 4095 -+ vesraf %v0, %v0, 0(%r15) -+ vesraf %v0, %v31, 0 -+ vesraf %v31, %v0, 0 -+ vesraf %v14, %v17, 1074(%r5) -+ -+#CHECK: vesrag %v0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0x3a] -+#CHECK: vesrag %v0, %v0, 4095 # encoding: [0xe7,0x00,0x0f,0xff,0x30,0x3a] -+#CHECK: vesrag %v0, %v0, 0(%r15) # encoding: [0xe7,0x00,0xf0,0x00,0x30,0x3a] -+#CHECK: vesrag %v0, %v31, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0x3a] -+#CHECK: vesrag %v31, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0x3a] -+#CHECK: vesrag %v14, %v17, 1074(%r5) # encoding: [0xe7,0xe1,0x54,0x32,0x34,0x3a] -+ -+ vesrag %v0, %v0, 0 -+ vesrag %v0, %v0, 4095 -+ vesrag %v0, %v0, 0(%r15) -+ vesrag %v0, %v31, 0 -+ vesrag %v31, %v0, 0 -+ vesrag %v14, %v17, 1074(%r5) -+ -+#CHECK: vesrah %v0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0x3a] -+#CHECK: vesrah %v0, %v0, 4095 # encoding: [0xe7,0x00,0x0f,0xff,0x10,0x3a] -+#CHECK: vesrah %v0, %v0, 0(%r15) # encoding: [0xe7,0x00,0xf0,0x00,0x10,0x3a] -+#CHECK: vesrah %v0, %v31, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0x3a] -+#CHECK: vesrah %v31, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0x3a] -+#CHECK: vesrah %v14, %v17, 1074(%r5) # encoding: [0xe7,0xe1,0x54,0x32,0x14,0x3a] -+ -+ vesrah %v0, %v0, 0 -+ vesrah %v0, %v0, 4095 -+ vesrah %v0, %v0, 0(%r15) -+ vesrah %v0, %v31, 0 -+ vesrah %v31, %v0, 0 -+ vesrah %v14, %v17, 1074(%r5) -+ -+#CHECK: vesrlvb %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x78] -+#CHECK: vesrlvb %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0x78] -+#CHECK: vesrlvb %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x78] -+#CHECK: vesrlvb %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x78] -+#CHECK: vesrlvb %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x0a,0x78] -+ -+ vesrlvb %v0, %v0, %v0 -+ vesrlvb %v0, %v0, %v31 -+ vesrlvb %v0, %v31, %v0 -+ vesrlvb %v31, %v0, %v0 -+ vesrlvb %v18, %v3, %v20 -+ -+#CHECK: vesrlvf %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0x78] -+#CHECK: vesrlvf %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x22,0x78] -+#CHECK: vesrlvf %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0x78] -+#CHECK: vesrlvf %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0x78] -+#CHECK: vesrlvf %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x2a,0x78] -+ -+ vesrlvf %v0, %v0, %v0 -+ vesrlvf %v0, %v0, %v31 -+ vesrlvf %v0, %v31, %v0 -+ vesrlvf %v31, %v0, %v0 -+ vesrlvf %v18, %v3, %v20 -+ -+#CHECK: vesrlvg %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0x78] -+#CHECK: vesrlvg %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x32,0x78] -+#CHECK: vesrlvg %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0x78] -+#CHECK: vesrlvg %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0x78] -+#CHECK: vesrlvg %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x3a,0x78] -+ -+ vesrlvg %v0, %v0, %v0 -+ vesrlvg %v0, %v0, %v31 -+ vesrlvg %v0, %v31, %v0 -+ vesrlvg %v31, %v0, %v0 -+ vesrlvg %v18, %v3, %v20 -+ -+#CHECK: vesrlvh %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0x78] -+#CHECK: vesrlvh %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x12,0x78] -+#CHECK: vesrlvh %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0x78] -+#CHECK: vesrlvh %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0x78] -+#CHECK: vesrlvh %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x1a,0x78] -+ -+ vesrlvh %v0, %v0, %v0 -+ vesrlvh %v0, %v0, %v31 -+ vesrlvh %v0, %v31, %v0 -+ vesrlvh %v31, %v0, %v0 -+ vesrlvh %v18, %v3, %v20 -+ -+#CHECK: vesrlb %v0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x38] -+#CHECK: vesrlb %v0, %v0, 4095 # encoding: [0xe7,0x00,0x0f,0xff,0x00,0x38] -+#CHECK: vesrlb %v0, %v0, 0(%r15) # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x38] -+#CHECK: vesrlb %v0, %v31, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x38] -+#CHECK: vesrlb %v31, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x38] -+#CHECK: vesrlb %v14, %v17, 1074(%r5) # encoding: [0xe7,0xe1,0x54,0x32,0x04,0x38] -+ -+ vesrlb %v0, %v0, 0 -+ vesrlb %v0, %v0, 4095 -+ vesrlb %v0, %v0, 0(%r15) -+ vesrlb %v0, %v31, 0 -+ vesrlb %v31, %v0, 0 -+ vesrlb %v14, %v17, 1074(%r5) -+ -+#CHECK: vesrlf %v0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0x38] -+#CHECK: vesrlf %v0, %v0, 4095 # encoding: [0xe7,0x00,0x0f,0xff,0x20,0x38] -+#CHECK: vesrlf %v0, %v0, 0(%r15) # encoding: [0xe7,0x00,0xf0,0x00,0x20,0x38] -+#CHECK: vesrlf %v0, %v31, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0x38] -+#CHECK: vesrlf %v31, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0x38] -+#CHECK: vesrlf %v14, %v17, 1074(%r5) # encoding: [0xe7,0xe1,0x54,0x32,0x24,0x38] -+ -+ vesrlf %v0, %v0, 0 -+ vesrlf %v0, %v0, 4095 -+ vesrlf %v0, %v0, 0(%r15) -+ vesrlf %v0, %v31, 0 -+ vesrlf %v31, %v0, 0 -+ vesrlf %v14, %v17, 1074(%r5) -+ -+#CHECK: vesrlg %v0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0x38] -+#CHECK: vesrlg %v0, %v0, 4095 # encoding: [0xe7,0x00,0x0f,0xff,0x30,0x38] -+#CHECK: vesrlg %v0, %v0, 0(%r15) # encoding: [0xe7,0x00,0xf0,0x00,0x30,0x38] -+#CHECK: vesrlg %v0, %v31, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0x38] -+#CHECK: vesrlg %v31, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0x38] -+#CHECK: vesrlg %v14, %v17, 1074(%r5) # encoding: [0xe7,0xe1,0x54,0x32,0x34,0x38] -+ -+ vesrlg %v0, %v0, 0 -+ vesrlg %v0, %v0, 4095 -+ vesrlg %v0, %v0, 0(%r15) -+ vesrlg %v0, %v31, 0 -+ vesrlg %v31, %v0, 0 -+ vesrlg %v14, %v17, 1074(%r5) -+ -+#CHECK: vesrlh %v0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0x38] -+#CHECK: vesrlh %v0, %v0, 4095 # encoding: [0xe7,0x00,0x0f,0xff,0x10,0x38] -+#CHECK: vesrlh %v0, %v0, 0(%r15) # encoding: [0xe7,0x00,0xf0,0x00,0x10,0x38] -+#CHECK: vesrlh %v0, %v31, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0x38] -+#CHECK: vesrlh %v31, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0x38] -+#CHECK: vesrlh %v14, %v17, 1074(%r5) # encoding: [0xe7,0xe1,0x54,0x32,0x14,0x38] -+ -+ vesrlh %v0, %v0, 0 -+ vesrlh %v0, %v0, 4095 -+ vesrlh %v0, %v0, 0(%r15) -+ vesrlh %v0, %v31, 0 -+ vesrlh %v31, %v0, 0 -+ vesrlh %v14, %v17, 1074(%r5) -+ -+#CHECK: vfadb %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0xe3] -+#CHECK: vfadb %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x32,0xe3] -+#CHECK: vfadb %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xe3] -+#CHECK: vfadb %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xe3] -+#CHECK: vfadb %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x3a,0xe3] -+ -+ vfadb %v0, %v0, %v0 -+ vfadb %v0, %v0, %v31 -+ vfadb %v0, %v31, %v0 -+ vfadb %v31, %v0, %v0 -+ vfadb %v18, %v3, %v20 -+ -+#CHECK: vfaeb %v0, %v0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x82] -+#CHECK: vfaeb %v0, %v0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x82] -+#CHECK: vfaeb %v0, %v0, %v0, 12 # encoding: [0xe7,0x00,0x00,0xc0,0x00,0x82] -+#CHECK: vfaeb %v0, %v0, %v15, 0 # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x82] -+#CHECK: vfaeb %v0, %v0, %v31, 0 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0x82] -+#CHECK: vfaeb %v0, %v15, %v0, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x00,0x82] -+#CHECK: vfaeb %v0, %v31, %v0, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x82] -+#CHECK: vfaeb %v15, %v0, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x82] -+#CHECK: vfaeb %v31, %v0, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x82] -+#CHECK: vfaeb %v18, %v3, %v20, 4 # encoding: [0xe7,0x23,0x40,0x40,0x0a,0x82] -+#CHECK: vfaeb %v18, %v3, %v20, 15 # encoding: [0xe7,0x23,0x40,0xf0,0x0a,0x82] -+#CHECK: vfaebs %v18, %v3, %v20, 8 # encoding: [0xe7,0x23,0x40,0x90,0x0a,0x82] -+#CHECK: vfaezb %v18, %v3, %v20, 4 # encoding: [0xe7,0x23,0x40,0x60,0x0a,0x82] -+#CHECK: vfaezbs %v18, %v3, %v20, 8 # encoding: [0xe7,0x23,0x40,0xb0,0x0a,0x82] -+#CHECK: vfaezbs %v18, %v3, %v20, 15 # encoding: [0xe7,0x23,0x40,0xf0,0x0a,0x82] -+ -+ vfaeb %v0, %v0, %v0 -+ vfaeb %v0, %v0, %v0, 0 -+ vfaeb %v0, %v0, %v0, 12 -+ vfaeb %v0, %v0, %v15 -+ vfaeb %v0, %v0, %v31 -+ vfaeb %v0, %v15, %v0 -+ vfaeb %v0, %v31, %v0 -+ vfaeb %v15, %v0, %v0 -+ vfaeb %v31, %v0, %v0 -+ vfaeb %v18, %v3, %v20, 4 -+ vfaeb %v18, %v3, %v20, 15 -+ vfaebs %v18, %v3, %v20, 8 -+ vfaezb %v18, %v3, %v20, 4 -+ vfaezbs %v18, %v3, %v20, 8 -+ vfaezbs %v18, %v3, %v20, 15 -+ -+#CHECK: vfaef %v0, %v0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0x82] -+#CHECK: vfaef %v0, %v0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0x82] -+#CHECK: vfaef %v0, %v0, %v0, 12 # encoding: [0xe7,0x00,0x00,0xc0,0x20,0x82] -+#CHECK: vfaef %v0, %v0, %v15, 0 # encoding: [0xe7,0x00,0xf0,0x00,0x20,0x82] -+#CHECK: vfaef %v0, %v0, %v31, 0 # encoding: [0xe7,0x00,0xf0,0x00,0x22,0x82] -+#CHECK: vfaef %v0, %v15, %v0, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x20,0x82] -+#CHECK: vfaef %v0, %v31, %v0, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0x82] -+#CHECK: vfaef %v15, %v0, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x20,0x82] -+#CHECK: vfaef %v31, %v0, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0x82] -+#CHECK: vfaef %v18, %v3, %v20, 4 # encoding: [0xe7,0x23,0x40,0x40,0x2a,0x82] -+#CHECK: vfaef %v18, %v3, %v20, 15 # encoding: [0xe7,0x23,0x40,0xf0,0x2a,0x82] -+#CHECK: vfaefs %v18, %v3, %v20, 8 # encoding: [0xe7,0x23,0x40,0x90,0x2a,0x82] -+#CHECK: vfaezf %v18, %v3, %v20, 4 # encoding: [0xe7,0x23,0x40,0x60,0x2a,0x82] -+#CHECK: vfaezfs %v18, %v3, %v20, 8 # encoding: [0xe7,0x23,0x40,0xb0,0x2a,0x82] -+#CHECK: vfaezfs %v18, %v3, %v20, 15 # encoding: [0xe7,0x23,0x40,0xf0,0x2a,0x82] -+ -+ vfaef %v0, %v0, %v0 -+ vfaef %v0, %v0, %v0, 0 -+ vfaef %v0, %v0, %v0, 12 -+ vfaef %v0, %v0, %v15 -+ vfaef %v0, %v0, %v31 -+ vfaef %v0, %v15, %v0 -+ vfaef %v0, %v31, %v0 -+ vfaef %v15, %v0, %v0 -+ vfaef %v31, %v0, %v0 -+ vfaef %v18, %v3, %v20, 4 -+ vfaef %v18, %v3, %v20, 15 -+ vfaefs %v18, %v3, %v20, 8 -+ vfaezf %v18, %v3, %v20, 4 -+ vfaezfs %v18, %v3, %v20, 8 -+ vfaezfs %v18, %v3, %v20, 15 -+ -+#CHECK: vfaeh %v0, %v0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0x82] -+#CHECK: vfaeh %v0, %v0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0x82] -+#CHECK: vfaeh %v0, %v0, %v0, 12 # encoding: [0xe7,0x00,0x00,0xc0,0x10,0x82] -+#CHECK: vfaeh %v0, %v0, %v15, 0 # encoding: [0xe7,0x00,0xf0,0x00,0x10,0x82] -+#CHECK: vfaeh %v0, %v0, %v31, 0 # encoding: [0xe7,0x00,0xf0,0x00,0x12,0x82] -+#CHECK: vfaeh %v0, %v15, %v0, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x10,0x82] -+#CHECK: vfaeh %v0, %v31, %v0, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0x82] -+#CHECK: vfaeh %v15, %v0, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x10,0x82] -+#CHECK: vfaeh %v31, %v0, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0x82] -+#CHECK: vfaeh %v18, %v3, %v20, 4 # encoding: [0xe7,0x23,0x40,0x40,0x1a,0x82] -+#CHECK: vfaeh %v18, %v3, %v20, 15 # encoding: [0xe7,0x23,0x40,0xf0,0x1a,0x82] -+#CHECK: vfaehs %v18, %v3, %v20, 8 # encoding: [0xe7,0x23,0x40,0x90,0x1a,0x82] -+#CHECK: vfaezh %v18, %v3, %v20, 4 # encoding: [0xe7,0x23,0x40,0x60,0x1a,0x82] -+#CHECK: vfaezhs %v18, %v3, %v20, 8 # encoding: [0xe7,0x23,0x40,0xb0,0x1a,0x82] -+#CHECK: vfaezhs %v18, %v3, %v20, 15 # encoding: [0xe7,0x23,0x40,0xf0,0x1a,0x82] -+ -+ vfaeh %v0, %v0, %v0 -+ vfaeh %v0, %v0, %v0, 0 -+ vfaeh %v0, %v0, %v0, 12 -+ vfaeh %v0, %v0, %v15 -+ vfaeh %v0, %v0, %v31 -+ vfaeh %v0, %v15, %v0 -+ vfaeh %v0, %v31, %v0 -+ vfaeh %v15, %v0, %v0 -+ vfaeh %v31, %v0, %v0 -+ vfaeh %v18, %v3, %v20, 4 -+ vfaeh %v18, %v3, %v20, 15 -+ vfaehs %v18, %v3, %v20, 8 -+ vfaezh %v18, %v3, %v20, 4 -+ vfaezhs %v18, %v3, %v20, 8 -+ vfaezhs %v18, %v3, %v20, 15 -+ -+#CHECK: vfcedb %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0xe8] -+#CHECK: vfcedb %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x32,0xe8] -+#CHECK: vfcedb %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xe8] -+#CHECK: vfcedb %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xe8] -+#CHECK: vfcedb %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x3a,0xe8] -+ -+ vfcedb %v0, %v0, %v0 -+ vfcedb %v0, %v0, %v31 -+ vfcedb %v0, %v31, %v0 -+ vfcedb %v31, %v0, %v0 -+ vfcedb %v18, %v3, %v20 -+ -+#CHECK: vfcedbs %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x10,0x30,0xe8] -+#CHECK: vfcedbs %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x10,0x32,0xe8] -+#CHECK: vfcedbs %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x10,0x34,0xe8] -+#CHECK: vfcedbs %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x10,0x38,0xe8] -+#CHECK: vfcedbs %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x10,0x3a,0xe8] -+ -+ vfcedbs %v0, %v0, %v0 -+ vfcedbs %v0, %v0, %v31 -+ vfcedbs %v0, %v31, %v0 -+ vfcedbs %v31, %v0, %v0 -+ vfcedbs %v18, %v3, %v20 -+ -+#CHECK: vfchdb %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0xeb] -+#CHECK: vfchdb %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x32,0xeb] -+#CHECK: vfchdb %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xeb] -+#CHECK: vfchdb %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xeb] -+#CHECK: vfchdb %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x3a,0xeb] -+ -+ vfchdb %v0, %v0, %v0 -+ vfchdb %v0, %v0, %v31 -+ vfchdb %v0, %v31, %v0 -+ vfchdb %v31, %v0, %v0 -+ vfchdb %v18, %v3, %v20 -+ -+#CHECK: vfchdbs %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x10,0x30,0xeb] -+#CHECK: vfchdbs %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x10,0x32,0xeb] -+#CHECK: vfchdbs %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x10,0x34,0xeb] -+#CHECK: vfchdbs %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x10,0x38,0xeb] -+#CHECK: vfchdbs %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x10,0x3a,0xeb] -+ -+ vfchdbs %v0, %v0, %v0 -+ vfchdbs %v0, %v0, %v31 -+ vfchdbs %v0, %v31, %v0 -+ vfchdbs %v31, %v0, %v0 -+ vfchdbs %v18, %v3, %v20 -+ -+#CHECK: vfchedb %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0xea] -+#CHECK: vfchedb %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x32,0xea] -+#CHECK: vfchedb %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xea] -+#CHECK: vfchedb %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xea] -+#CHECK: vfchedb %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x3a,0xea] -+ -+ vfchedb %v0, %v0, %v0 -+ vfchedb %v0, %v0, %v31 -+ vfchedb %v0, %v31, %v0 -+ vfchedb %v31, %v0, %v0 -+ vfchedb %v18, %v3, %v20 -+ -+#CHECK: vfchedbs %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x10,0x30,0xea] -+#CHECK: vfchedbs %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x10,0x32,0xea] -+#CHECK: vfchedbs %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x10,0x34,0xea] -+#CHECK: vfchedbs %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x10,0x38,0xea] -+#CHECK: vfchedbs %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x10,0x3a,0xea] -+ -+ vfchedbs %v0, %v0, %v0 -+ vfchedbs %v0, %v0, %v31 -+ vfchedbs %v0, %v31, %v0 -+ vfchedbs %v31, %v0, %v0 -+ vfchedbs %v18, %v3, %v20 -+ -+#CHECK: vfddb %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0xe5] -+#CHECK: vfddb %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x32,0xe5] -+#CHECK: vfddb %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xe5] -+#CHECK: vfddb %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xe5] -+#CHECK: vfddb %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x3a,0xe5] -+ -+ vfddb %v0, %v0, %v0 -+ vfddb %v0, %v0, %v31 -+ vfddb %v0, %v31, %v0 -+ vfddb %v31, %v0, %v0 -+ vfddb %v18, %v3, %v20 -+ -+#CHECK: vfeeb %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x80] -+#CHECK: vfeeb %v0, %v0, %v15 # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x80] -+#CHECK: vfeeb %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0x80] -+#CHECK: vfeeb %v0, %v15, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x00,0x80] -+#CHECK: vfeeb %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x80] -+#CHECK: vfeeb %v15, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x80] -+#CHECK: vfeeb %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x80] -+#CHECK: vfeeb %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x0a,0x80] -+#CHECK: vfeebs %v5, %v22, %v7 # encoding: [0xe7,0x56,0x70,0x10,0x04,0x80] -+#CHECK: vfeezb %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x20,0x0a,0x80] -+#CHECK: vfeezbs %v5, %v22, %v7 # encoding: [0xe7,0x56,0x70,0x30,0x04,0x80] -+ -+ vfeeb %v0, %v0, %v0 -+ vfeeb %v0, %v0, %v15 -+ vfeeb %v0, %v0, %v31 -+ vfeeb %v0, %v15, %v0 -+ vfeeb %v0, %v31, %v0 -+ vfeeb %v15, %v0, %v0 -+ vfeeb %v31, %v0, %v0 -+ vfeeb %v18, %v3, %v20 -+ vfeebs %v5, %v22, %v7 -+ vfeezb %v18, %v3, %v20 -+ vfeezbs %v5, %v22, %v7 -+ -+#CFECK: vfeef %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0x80] -+#CFECK: vfeef %v0, %v0, %v15 # encoding: [0xe7,0x00,0xf0,0x00,0x20,0x80] -+#CFECK: vfeef %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x22,0x80] -+#CFECK: vfeef %v0, %v15, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x20,0x80] -+#CFECK: vfeef %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0x80] -+#CFECK: vfeef %v15, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x20,0x80] -+#CFECK: vfeef %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0x80] -+#CFECK: vfeef %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x2a,0x80] -+#CFECK: vfeefs %v5, %v22, %v7 # encoding: [0xe7,0x56,0x70,0x10,0x24,0x80] -+#CFECK: vfeezf %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x20,0x2a,0x80] -+#CFECK: vfeezfs %v5, %v22, %v7 # encoding: [0xe7,0x56,0x70,0x30,0x24,0x80] -+ -+ vfeef %v0, %v0, %v0 -+ vfeef %v0, %v0, %v15 -+ vfeef %v0, %v0, %v31 -+ vfeef %v0, %v15, %v0 -+ vfeef %v0, %v31, %v0 -+ vfeef %v15, %v0, %v0 -+ vfeef %v31, %v0, %v0 -+ vfeef %v18, %v3, %v20 -+ vfeefs %v5, %v22, %v7 -+ vfeezf %v18, %v3, %v20 -+ vfeezfs %v5, %v22, %v7 -+ -+#CHECK: vfeeh %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0x80] -+#CHECK: vfeeh %v0, %v0, %v15 # encoding: [0xe7,0x00,0xf0,0x00,0x10,0x80] -+#CHECK: vfeeh %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x12,0x80] -+#CHECK: vfeeh %v0, %v15, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x10,0x80] -+#CHECK: vfeeh %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0x80] -+#CHECK: vfeeh %v15, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x10,0x80] -+#CHECK: vfeeh %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0x80] -+#CHECK: vfeeh %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x1a,0x80] -+#CHECK: vfeehs %v5, %v22, %v7 # encoding: [0xe7,0x56,0x70,0x10,0x14,0x80] -+#CHECK: vfeezh %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x20,0x1a,0x80] -+#CHECK: vfeezhs %v5, %v22, %v7 # encoding: [0xe7,0x56,0x70,0x30,0x14,0x80] -+ -+ vfeeh %v0, %v0, %v0 -+ vfeeh %v0, %v0, %v15 -+ vfeeh %v0, %v0, %v31 -+ vfeeh %v0, %v15, %v0 -+ vfeeh %v0, %v31, %v0 -+ vfeeh %v15, %v0, %v0 -+ vfeeh %v31, %v0, %v0 -+ vfeeh %v18, %v3, %v20 -+ vfeehs %v5, %v22, %v7 -+ vfeezh %v18, %v3, %v20 -+ vfeezhs %v5, %v22, %v7 -+ -+#CHECK: vfeneb %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x81] -+#CHECK: vfeneb %v0, %v0, %v15 # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x81] -+#CHECK: vfeneb %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0x81] -+#CHECK: vfeneb %v0, %v15, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x00,0x81] -+#CHECK: vfeneb %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x81] -+#CHECK: vfeneb %v15, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x81] -+#CHECK: vfeneb %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x81] -+#CHECK: vfeneb %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x0a,0x81] -+#CHECK: vfenebs %v5, %v22, %v7 # encoding: [0xe7,0x56,0x70,0x10,0x04,0x81] -+#CHECK: vfenezb %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x20,0x0a,0x81] -+#CHECK: vfenezbs %v5, %v22, %v7 # encoding: [0xe7,0x56,0x70,0x30,0x04,0x81] -+ -+ vfeneb %v0, %v0, %v0 -+ vfeneb %v0, %v0, %v15 -+ vfeneb %v0, %v0, %v31 -+ vfeneb %v0, %v15, %v0 -+ vfeneb %v0, %v31, %v0 -+ vfeneb %v15, %v0, %v0 -+ vfeneb %v31, %v0, %v0 -+ vfeneb %v18, %v3, %v20 -+ vfenebs %v5, %v22, %v7 -+ vfenezb %v18, %v3, %v20 -+ vfenezbs %v5, %v22, %v7 -+ -+#CFECK: vfenef %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0x81] -+#CFECK: vfenef %v0, %v0, %v15 # encoding: [0xe7,0x00,0xf0,0x00,0x20,0x81] -+#CFECK: vfenef %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x22,0x81] -+#CFECK: vfenef %v0, %v15, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x20,0x81] -+#CFECK: vfenef %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0x81] -+#CFECK: vfenef %v15, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x20,0x81] -+#CFECK: vfenef %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0x81] -+#CFECK: vfenef %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x2a,0x81] -+#CFECK: vfenefs %v5, %v22, %v7 # encoding: [0xe7,0x56,0x70,0x10,0x24,0x81] -+#CFECK: vfenezf %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x20,0x2a,0x81] -+#CFECK: vfenezfs %v5, %v22, %v7 # encoding: [0xe7,0x56,0x70,0x30,0x24,0x81] -+ -+ vfenef %v0, %v0, %v0 -+ vfenef %v0, %v0, %v15 -+ vfenef %v0, %v0, %v31 -+ vfenef %v0, %v15, %v0 -+ vfenef %v0, %v31, %v0 -+ vfenef %v15, %v0, %v0 -+ vfenef %v31, %v0, %v0 -+ vfenef %v18, %v3, %v20 -+ vfenefs %v5, %v22, %v7 -+ vfenezf %v18, %v3, %v20 -+ vfenezfs %v5, %v22, %v7 -+ -+#CHECK: vfeneh %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0x81] -+#CHECK: vfeneh %v0, %v0, %v15 # encoding: [0xe7,0x00,0xf0,0x00,0x10,0x81] -+#CHECK: vfeneh %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x12,0x81] -+#CHECK: vfeneh %v0, %v15, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x10,0x81] -+#CHECK: vfeneh %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0x81] -+#CHECK: vfeneh %v15, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x10,0x81] -+#CHECK: vfeneh %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0x81] -+#CHECK: vfeneh %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x1a,0x81] -+#CHECK: vfenehs %v5, %v22, %v7 # encoding: [0xe7,0x56,0x70,0x10,0x14,0x81] -+#CHECK: vfenezh %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x20,0x1a,0x81] -+#CHECK: vfenezhs %v5, %v22, %v7 # encoding: [0xe7,0x56,0x70,0x30,0x14,0x81] -+ -+ vfeneh %v0, %v0, %v0 -+ vfeneh %v0, %v0, %v15 -+ vfeneh %v0, %v0, %v31 -+ vfeneh %v0, %v15, %v0 -+ vfeneh %v0, %v31, %v0 -+ vfeneh %v15, %v0, %v0 -+ vfeneh %v31, %v0, %v0 -+ vfeneh %v18, %v3, %v20 -+ vfenehs %v5, %v22, %v7 -+ vfenezh %v18, %v3, %v20 -+ vfenezhs %v5, %v22, %v7 -+ -+#CHECK: vfidb %v0, %v0, 0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0xc7] -+#CHECK: vfidb %v0, %v0, 0, 15 # encoding: [0xe7,0x00,0x00,0xf0,0x30,0xc7] -+#CHECK: vfidb %v0, %v0, 4, 0 # encoding: [0xe7,0x00,0x00,0x04,0x30,0xc7] -+#CHECK: vfidb %v0, %v0, 12, 0 # encoding: [0xe7,0x00,0x00,0x0c,0x30,0xc7] -+#CHECK: vfidb %v0, %v31, 0, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xc7] -+#CHECK: vfidb %v31, %v0, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xc7] -+#CHECK: vfidb %v14, %v17, 4, 10 # encoding: [0xe7,0xe1,0x00,0xa4,0x34,0xc7] -+ -+ vfidb %v0, %v0, 0, 0 -+ vfidb %v0, %v0, 0, 15 -+ vfidb %v0, %v0, 4, 0 -+ vfidb %v0, %v0, 12, 0 -+ vfidb %v0, %v31, 0, 0 -+ vfidb %v31, %v0, 0, 0 -+ vfidb %v14, %v17, 4, 10 -+ -+#CHECK: vistrb %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x5c] -+#CHECK: vistrb %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x00,0x5c] -+#CHECK: vistrb %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x5c] -+#CHECK: vistrb %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x5c] -+#CHECK: vistrb %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x5c] -+#CHECK: vistrb %v18, %v3 # encoding: [0xe7,0x23,0x00,0x00,0x08,0x5c] -+#CHECK: vistrbs %v5, %v22 # encoding: [0xe7,0x56,0x00,0x10,0x04,0x5c] -+ -+ vistrb %v0, %v0 -+ vistrb %v0, %v15 -+ vistrb %v0, %v31 -+ vistrb %v15, %v0 -+ vistrb %v31, %v0 -+ vistrb %v18, %v3 -+ vistrbs %v5, %v22 -+ -+#CBECK: vistrf %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0x5c] -+#CBECK: vistrf %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x20,0x5c] -+#CBECK: vistrf %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0x5c] -+#CBECK: vistrf %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x20,0x5c] -+#CBECK: vistrf %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0x5c] -+#CBECK: vistrf %v18, %v3 # encoding: [0xe7,0x23,0x00,0x00,0x28,0x5c] -+#CBECK: vistrfs %v5, %v22 # encoding: [0xe7,0x56,0x00,0x10,0x24,0x5c] -+ -+ vistrf %v0, %v0 -+ vistrf %v0, %v15 -+ vistrf %v0, %v31 -+ vistrf %v15, %v0 -+ vistrf %v31, %v0 -+ vistrf %v18, %v3 -+ vistrfs %v5, %v22 -+ -+#CHECK: vistrh %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0x5c] -+#CHECK: vistrh %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x10,0x5c] -+#CHECK: vistrh %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0x5c] -+#CHECK: vistrh %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x10,0x5c] -+#CHECK: vistrh %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0x5c] -+#CHECK: vistrh %v18, %v3 # encoding: [0xe7,0x23,0x00,0x00,0x18,0x5c] -+#CHECK: vistrhs %v5, %v22 # encoding: [0xe7,0x56,0x00,0x10,0x14,0x5c] -+ -+ vistrh %v0, %v0 -+ vistrh %v0, %v15 -+ vistrh %v0, %v31 -+ vistrh %v15, %v0 -+ vistrh %v31, %v0 -+ vistrh %v18, %v3 -+ vistrhs %v5, %v22 -+ -+#CHECK: vflcdb %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0xcc] -+#CHECK: vflcdb %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x30,0xcc] -+#CHECK: vflcdb %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xcc] -+#CHECK: vflcdb %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x30,0xcc] -+#CHECK: vflcdb %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xcc] -+#CHECK: vflcdb %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x34,0xcc] -+ -+ vflcdb %v0, %v0 -+ vflcdb %v0, %v15 -+ vflcdb %v0, %v31 -+ vflcdb %v15, %v0 -+ vflcdb %v31, %v0 -+ vflcdb %v14, %v17 -+ -+#CHECK: vflndb %v0, %v0 # encoding: [0xe7,0x00,0x00,0x10,0x30,0xcc] -+#CHECK: vflndb %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x10,0x30,0xcc] -+#CHECK: vflndb %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x10,0x34,0xcc] -+#CHECK: vflndb %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x10,0x30,0xcc] -+#CHECK: vflndb %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x10,0x38,0xcc] -+#CHECK: vflndb %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x10,0x34,0xcc] -+ -+ vflndb %v0, %v0 -+ vflndb %v0, %v15 -+ vflndb %v0, %v31 -+ vflndb %v15, %v0 -+ vflndb %v31, %v0 -+ vflndb %v14, %v17 -+ -+#CHECK: vflpdb %v0, %v0 # encoding: [0xe7,0x00,0x00,0x20,0x30,0xcc] -+#CHECK: vflpdb %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x20,0x30,0xcc] -+#CHECK: vflpdb %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x20,0x34,0xcc] -+#CHECK: vflpdb %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x20,0x30,0xcc] -+#CHECK: vflpdb %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x20,0x38,0xcc] -+#CHECK: vflpdb %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x20,0x34,0xcc] -+ -+ vflpdb %v0, %v0 -+ vflpdb %v0, %v15 -+ vflpdb %v0, %v31 -+ vflpdb %v15, %v0 -+ vflpdb %v31, %v0 -+ vflpdb %v14, %v17 -+ -+#CHECK: vfmadb %v0, %v0, %v0, %v0 # encoding: [0xe7,0x00,0x03,0x00,0x00,0x8f] -+#CHECK: vfmadb %v0, %v0, %v0, %v31 # encoding: [0xe7,0x00,0x03,0x00,0xf1,0x8f] -+#CHECK: vfmadb %v0, %v0, %v31, %v0 # encoding: [0xe7,0x00,0xf3,0x00,0x02,0x8f] -+#CHECK: vfmadb %v0, %v31, %v0, %v0 # encoding: [0xe7,0x0f,0x03,0x00,0x04,0x8f] -+#CHECK: vfmadb %v31, %v0, %v0, %v0 # encoding: [0xe7,0xf0,0x03,0x00,0x08,0x8f] -+#CHECK: vfmadb %v13, %v17, %v21, %v25 # encoding: [0xe7,0xd1,0x53,0x00,0x97,0x8f] -+ -+ vfmadb %v0, %v0, %v0, %v0 -+ vfmadb %v0, %v0, %v0, %v31 -+ vfmadb %v0, %v0, %v31, %v0 -+ vfmadb %v0, %v31, %v0, %v0 -+ vfmadb %v31, %v0, %v0, %v0 -+ vfmadb %v13, %v17, %v21, %v25 -+ -+#CHECK: vfmdb %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0xe7] -+#CHECK: vfmdb %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x32,0xe7] -+#CHECK: vfmdb %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xe7] -+#CHECK: vfmdb %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xe7] -+#CHECK: vfmdb %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x3a,0xe7] -+ -+ vfmdb %v0, %v0, %v0 -+ vfmdb %v0, %v0, %v31 -+ vfmdb %v0, %v31, %v0 -+ vfmdb %v31, %v0, %v0 -+ vfmdb %v18, %v3, %v20 -+ -+#CHECK: vfmsdb %v0, %v0, %v0, %v0 # encoding: [0xe7,0x00,0x03,0x00,0x00,0x8e] -+#CHECK: vfmsdb %v0, %v0, %v0, %v31 # encoding: [0xe7,0x00,0x03,0x00,0xf1,0x8e] -+#CHECK: vfmsdb %v0, %v0, %v31, %v0 # encoding: [0xe7,0x00,0xf3,0x00,0x02,0x8e] -+#CHECK: vfmsdb %v0, %v31, %v0, %v0 # encoding: [0xe7,0x0f,0x03,0x00,0x04,0x8e] -+#CHECK: vfmsdb %v31, %v0, %v0, %v0 # encoding: [0xe7,0xf0,0x03,0x00,0x08,0x8e] -+#CHECK: vfmsdb %v13, %v17, %v21, %v25 # encoding: [0xe7,0xd1,0x53,0x00,0x97,0x8e] -+ -+ vfmsdb %v0, %v0, %v0, %v0 -+ vfmsdb %v0, %v0, %v0, %v31 -+ vfmsdb %v0, %v0, %v31, %v0 -+ vfmsdb %v0, %v31, %v0, %v0 -+ vfmsdb %v31, %v0, %v0, %v0 -+ vfmsdb %v13, %v17, %v21, %v25 -+ -+#CHECK: vfsdb %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0xe2] -+#CHECK: vfsdb %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x32,0xe2] -+#CHECK: vfsdb %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xe2] -+#CHECK: vfsdb %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xe2] -+#CHECK: vfsdb %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x3a,0xe2] -+ -+ vfsdb %v0, %v0, %v0 -+ vfsdb %v0, %v0, %v31 -+ vfsdb %v0, %v31, %v0 -+ vfsdb %v31, %v0, %v0 -+ vfsdb %v18, %v3, %v20 -+ -+#CHECK: vfsqdb %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0xce] -+#CHECK: vfsqdb %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x30,0xce] -+#CHECK: vfsqdb %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xce] -+#CHECK: vfsqdb %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x30,0xce] -+#CHECK: vfsqdb %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xce] -+#CHECK: vfsqdb %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x34,0xce] -+ -+ vfsqdb %v0, %v0 -+ vfsqdb %v0, %v15 -+ vfsqdb %v0, %v31 -+ vfsqdb %v15, %v0 -+ vfsqdb %v31, %v0 -+ vfsqdb %v14, %v17 -+ -+#CHECK: vftcidb %v0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0x4a] -+#CHECK: vftcidb %v0, %v0, 4095 # encoding: [0xe7,0x00,0xff,0xf0,0x30,0x4a] -+#CHECK: vftcidb %v0, %v15, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x30,0x4a] -+#CHECK: vftcidb %v0, %v31, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0x4a] -+#CHECK: vftcidb %v15, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x30,0x4a] -+#CHECK: vftcidb %v31, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0x4a] -+#CHECK: vftcidb %v4, %v21, 1656 # encoding: [0xe7,0x45,0x67,0x80,0x34,0x4a] -+ -+ vftcidb %v0, %v0, 0 -+ vftcidb %v0, %v0, 4095 -+ vftcidb %v0, %v15, 0 -+ vftcidb %v0, %v31, 0 -+ vftcidb %v15, %v0, 0 -+ vftcidb %v31, %v0, 0 -+ vftcidb %v4, %v21, 0x678 -+ -+#CHECK: vgbm %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x44] -+#CHECK: vgbm %v0, 65535 # encoding: [0xe7,0x00,0xff,0xff,0x00,0x44] -+#CHECK: vgbm %v15, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x44] -+#CHECK: vgbm %v31, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x44] -+#CHECK: vgbm %v17, 4660 # encoding: [0xe7,0x10,0x12,0x34,0x08,0x44] -+ -+ vgbm %v0, 0 -+ vgbm %v0, 0xffff -+ vgbm %v15, 0 -+ vgbm %v31, 0 -+ vgbm %v17, 0x1234 -+ -+#CHECK: vgef %v0, 0(%v0), 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x13] -+#CHECK: vgef %v0, 0(%v0,%r1), 0 # encoding: [0xe7,0x00,0x10,0x00,0x00,0x13] -+#CHECK: vgef %v0, 0(%v0,%r1), 3 # encoding: [0xe7,0x00,0x10,0x00,0x30,0x13] -+#CHECK: vgef %v0, 0(%v0,%r15), 0 # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x13] -+#CHECK: vgef %v0, 0(%v15,%r1), 0 # encoding: [0xe7,0x0f,0x10,0x00,0x00,0x13] -+#CHECK: vgef %v0, 0(%v31,%r1), 0 # encoding: [0xe7,0x0f,0x10,0x00,0x04,0x13] -+#CHECK: vgef %v0, 4095(%v0,%r1), 0 # encoding: [0xe7,0x00,0x1f,0xff,0x00,0x13] -+#CHECK: vgef %v15, 0(%v0,%r1), 0 # encoding: [0xe7,0xf0,0x10,0x00,0x00,0x13] -+#CHECK: vgef %v31, 0(%v0,%r1), 0 # encoding: [0xe7,0xf0,0x10,0x00,0x08,0x13] -+#CHECK: vgef %v10, 1000(%v19,%r7), 1 # encoding: [0xe7,0xa3,0x73,0xe8,0x14,0x13] -+ -+ vgef %v0, 0(%v0), 0 -+ vgef %v0, 0(%v0,%r1), 0 -+ vgef %v0, 0(%v0,%r1), 3 -+ vgef %v0, 0(%v0,%r15), 0 -+ vgef %v0, 0(%v15,%r1), 0 -+ vgef %v0, 0(%v31,%r1), 0 -+ vgef %v0, 4095(%v0, %r1), 0 -+ vgef %v15, 0(%v0,%r1), 0 -+ vgef %v31, 0(%v0,%r1), 0 -+ vgef %v10, 1000(%v19,%r7), 1 -+ -+#CHECK: vgeg %v0, 0(%v0), 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x12] -+#CHECK: vgeg %v0, 0(%v0,%r1), 0 # encoding: [0xe7,0x00,0x10,0x00,0x00,0x12] -+#CHECK: vgeg %v0, 0(%v0,%r1), 1 # encoding: [0xe7,0x00,0x10,0x00,0x10,0x12] -+#CHECK: vgeg %v0, 0(%v0,%r15), 0 # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x12] -+#CHECK: vgeg %v0, 0(%v15,%r1), 0 # encoding: [0xe7,0x0f,0x10,0x00,0x00,0x12] -+#CHECK: vgeg %v0, 0(%v31,%r1), 0 # encoding: [0xe7,0x0f,0x10,0x00,0x04,0x12] -+#CHECK: vgeg %v0, 4095(%v0,%r1), 0 # encoding: [0xe7,0x00,0x1f,0xff,0x00,0x12] -+#CHECK: vgeg %v15, 0(%v0,%r1), 0 # encoding: [0xe7,0xf0,0x10,0x00,0x00,0x12] -+#CHECK: vgeg %v31, 0(%v0,%r1), 0 # encoding: [0xe7,0xf0,0x10,0x00,0x08,0x12] -+#CHECK: vgeg %v10, 1000(%v19,%r7), 1 # encoding: [0xe7,0xa3,0x73,0xe8,0x14,0x12] -+ -+ vgeg %v0, 0(%v0), 0 -+ vgeg %v0, 0(%v0,%r1), 0 -+ vgeg %v0, 0(%v0,%r1), 1 -+ vgeg %v0, 0(%v0,%r15), 0 -+ vgeg %v0, 0(%v15,%r1), 0 -+ vgeg %v0, 0(%v31,%r1), 0 -+ vgeg %v0, 4095(%v0,%r1), 0 -+ vgeg %v15, 0(%v0,%r1), 0 -+ vgeg %v31, 0(%v0,%r1), 0 -+ vgeg %v10, 1000(%v19,%r7), 1 -+ -+#CHECK: vgfmab %v0, %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0xbc] -+#CHECK: vgfmab %v0, %v0, %v0, %v31 # encoding: [0xe7,0x00,0x00,0x00,0xf1,0xbc] -+#CHECK: vgfmab %v0, %v0, %v31, %v0 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0xbc] -+#CHECK: vgfmab %v0, %v31, %v0, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xbc] -+#CHECK: vgfmab %v31, %v0, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xbc] -+#CHECK: vgfmab %v13, %v17, %v21, %v25 # encoding: [0xe7,0xd1,0x50,0x00,0x97,0xbc] -+ -+ vgfmab %v0, %v0, %v0, %v0 -+ vgfmab %v0, %v0, %v0, %v31 -+ vgfmab %v0, %v0, %v31, %v0 -+ vgfmab %v0, %v31, %v0, %v0 -+ vgfmab %v31, %v0, %v0, %v0 -+ vgfmab %v13, %v17, %v21, %v25 -+ -+#CHECK: vgfmaf %v0, %v0, %v0, %v0 # encoding: [0xe7,0x00,0x02,0x00,0x00,0xbc] -+#CHECK: vgfmaf %v0, %v0, %v0, %v31 # encoding: [0xe7,0x00,0x02,0x00,0xf1,0xbc] -+#CHECK: vgfmaf %v0, %v0, %v31, %v0 # encoding: [0xe7,0x00,0xf2,0x00,0x02,0xbc] -+#CHECK: vgfmaf %v0, %v31, %v0, %v0 # encoding: [0xe7,0x0f,0x02,0x00,0x04,0xbc] -+#CHECK: vgfmaf %v31, %v0, %v0, %v0 # encoding: [0xe7,0xf0,0x02,0x00,0x08,0xbc] -+#CHECK: vgfmaf %v13, %v17, %v21, %v25 # encoding: [0xe7,0xd1,0x52,0x00,0x97,0xbc] -+ -+ vgfmaf %v0, %v0, %v0, %v0 -+ vgfmaf %v0, %v0, %v0, %v31 -+ vgfmaf %v0, %v0, %v31, %v0 -+ vgfmaf %v0, %v31, %v0, %v0 -+ vgfmaf %v31, %v0, %v0, %v0 -+ vgfmaf %v13, %v17, %v21, %v25 -+ -+#CHECK: vgfmag %v0, %v0, %v0, %v0 # encoding: [0xe7,0x00,0x03,0x00,0x00,0xbc] -+#CHECK: vgfmag %v0, %v0, %v0, %v31 # encoding: [0xe7,0x00,0x03,0x00,0xf1,0xbc] -+#CHECK: vgfmag %v0, %v0, %v31, %v0 # encoding: [0xe7,0x00,0xf3,0x00,0x02,0xbc] -+#CHECK: vgfmag %v0, %v31, %v0, %v0 # encoding: [0xe7,0x0f,0x03,0x00,0x04,0xbc] -+#CHECK: vgfmag %v31, %v0, %v0, %v0 # encoding: [0xe7,0xf0,0x03,0x00,0x08,0xbc] -+#CHECK: vgfmag %v13, %v17, %v21, %v25 # encoding: [0xe7,0xd1,0x53,0x00,0x97,0xbc] -+ -+ vgfmag %v0, %v0, %v0, %v0 -+ vgfmag %v0, %v0, %v0, %v31 -+ vgfmag %v0, %v0, %v31, %v0 -+ vgfmag %v0, %v31, %v0, %v0 -+ vgfmag %v31, %v0, %v0, %v0 -+ vgfmag %v13, %v17, %v21, %v25 -+ -+#CHECK: vgfmah %v0, %v0, %v0, %v0 # encoding: [0xe7,0x00,0x01,0x00,0x00,0xbc] -+#CHECK: vgfmah %v0, %v0, %v0, %v31 # encoding: [0xe7,0x00,0x01,0x00,0xf1,0xbc] -+#CHECK: vgfmah %v0, %v0, %v31, %v0 # encoding: [0xe7,0x00,0xf1,0x00,0x02,0xbc] -+#CHECK: vgfmah %v0, %v31, %v0, %v0 # encoding: [0xe7,0x0f,0x01,0x00,0x04,0xbc] -+#CHECK: vgfmah %v31, %v0, %v0, %v0 # encoding: [0xe7,0xf0,0x01,0x00,0x08,0xbc] -+#CHECK: vgfmah %v13, %v17, %v21, %v25 # encoding: [0xe7,0xd1,0x51,0x00,0x97,0xbc] -+ -+ vgfmah %v0, %v0, %v0, %v0 -+ vgfmah %v0, %v0, %v0, %v31 -+ vgfmah %v0, %v0, %v31, %v0 -+ vgfmah %v0, %v31, %v0, %v0 -+ vgfmah %v31, %v0, %v0, %v0 -+ vgfmah %v13, %v17, %v21, %v25 -+ -+#CHECK: vgfmb %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0xb4] -+#CHECK: vgfmb %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0xb4] -+#CHECK: vgfmb %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xb4] -+#CHECK: vgfmb %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xb4] -+#CHECK: vgfmb %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x0a,0xb4] -+ -+ vgfmb %v0, %v0, %v0 -+ vgfmb %v0, %v0, %v31 -+ vgfmb %v0, %v31, %v0 -+ vgfmb %v31, %v0, %v0 -+ vgfmb %v18, %v3, %v20 -+ -+#CHECK: vgfmf %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0xb4] -+#CHECK: vgfmf %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x22,0xb4] -+#CHECK: vgfmf %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xb4] -+#CHECK: vgfmf %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xb4] -+#CHECK: vgfmf %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x2a,0xb4] -+ -+ vgfmf %v0, %v0, %v0 -+ vgfmf %v0, %v0, %v31 -+ vgfmf %v0, %v31, %v0 -+ vgfmf %v31, %v0, %v0 -+ vgfmf %v18, %v3, %v20 -+ -+#CHECK: vgfmg %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0xb4] -+#CHECK: vgfmg %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x32,0xb4] -+#CHECK: vgfmg %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xb4] -+#CHECK: vgfmg %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xb4] -+#CHECK: vgfmg %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x3a,0xb4] -+ -+ vgfmg %v0, %v0, %v0 -+ vgfmg %v0, %v0, %v31 -+ vgfmg %v0, %v31, %v0 -+ vgfmg %v31, %v0, %v0 -+ vgfmg %v18, %v3, %v20 -+ -+#CHECK: vgfmh %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0xb4] -+#CHECK: vgfmh %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x12,0xb4] -+#CHECK: vgfmh %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0xb4] -+#CHECK: vgfmh %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0xb4] -+#CHECK: vgfmh %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x1a,0xb4] -+ -+ vgfmh %v0, %v0, %v0 -+ vgfmh %v0, %v0, %v31 -+ vgfmh %v0, %v31, %v0 -+ vgfmh %v31, %v0, %v0 -+ vgfmh %v18, %v3, %v20 -+ -+#CHECK: vgmb %v0, 0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x46] -+#CHECK: vgmb %v0, 0, 255 # encoding: [0xe7,0x00,0x00,0xff,0x00,0x46] -+#CHECK: vgmb %v0, 255, 0 # encoding: [0xe7,0x00,0xff,0x00,0x00,0x46] -+#CHECK: vgmb %v15, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x46] -+#CHECK: vgmb %v31, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x46] -+#CHECK: vgmb %v21, 2, 3 # encoding: [0xe7,0x50,0x02,0x03,0x08,0x46] -+ -+ vgmb %v0, 0, 0 -+ vgmb %v0, 0, 255 -+ vgmb %v0, 255, 0 -+ vgmb %v15, 0, 0 -+ vgmb %v31, 0, 0 -+ vgmb %v21, 2, 3 -+ -+#CHECK: vgmf %v0, 0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0x46] -+#CHECK: vgmf %v0, 0, 255 # encoding: [0xe7,0x00,0x00,0xff,0x20,0x46] -+#CHECK: vgmf %v0, 255, 0 # encoding: [0xe7,0x00,0xff,0x00,0x20,0x46] -+#CHECK: vgmf %v15, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x20,0x46] -+#CHECK: vgmf %v31, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0x46] -+#CHECK: vgmf %v21, 2, 3 # encoding: [0xe7,0x50,0x02,0x03,0x28,0x46] -+ -+ vgmf %v0, 0, 0 -+ vgmf %v0, 0, 255 -+ vgmf %v0, 255, 0 -+ vgmf %v15, 0, 0 -+ vgmf %v31, 0, 0 -+ vgmf %v21, 2, 3 -+ -+#CHECK: vgmg %v0, 0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0x46] -+#CHECK: vgmg %v0, 0, 255 # encoding: [0xe7,0x00,0x00,0xff,0x30,0x46] -+#CHECK: vgmg %v0, 255, 0 # encoding: [0xe7,0x00,0xff,0x00,0x30,0x46] -+#CHECK: vgmg %v15, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x30,0x46] -+#CHECK: vgmg %v31, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0x46] -+#CHECK: vgmg %v21, 2, 3 # encoding: [0xe7,0x50,0x02,0x03,0x38,0x46] -+ -+ vgmg %v0, 0, 0 -+ vgmg %v0, 0, 255 -+ vgmg %v0, 255, 0 -+ vgmg %v15, 0, 0 -+ vgmg %v31, 0, 0 -+ vgmg %v21, 2, 3 -+ -+#CHECK: vgmh %v0, 0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0x46] -+#CHECK: vgmh %v0, 0, 255 # encoding: [0xe7,0x00,0x00,0xff,0x10,0x46] -+#CHECK: vgmh %v0, 255, 0 # encoding: [0xe7,0x00,0xff,0x00,0x10,0x46] -+#CHECK: vgmh %v15, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x10,0x46] -+#CHECK: vgmh %v31, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0x46] -+#CHECK: vgmh %v21, 2, 3 # encoding: [0xe7,0x50,0x02,0x03,0x18,0x46] -+ -+ vgmh %v0, 0, 0 -+ vgmh %v0, 0, 255 -+ vgmh %v0, 255, 0 -+ vgmh %v15, 0, 0 -+ vgmh %v31, 0, 0 -+ vgmh %v21, 2, 3 -+ -+#CHECK: vl %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x06] -+#CHECK: vl %v0, 4095 # encoding: [0xe7,0x00,0x0f,0xff,0x00,0x06] -+#CHECK: vl %v0, 0(%r15) # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x06] -+#CHECK: vl %v0, 0(%r15,%r1) # encoding: [0xe7,0x0f,0x10,0x00,0x00,0x06] -+#CHECK: vl %v15, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x06] -+#CHECK: vl %v31, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x06] -+#CHECK: vl %v18, 1383(%r3,%r4) # encoding: [0xe7,0x23,0x45,0x67,0x08,0x06] -+ -+ vl %v0, 0 -+ vl %v0, 4095 -+ vl %v0, 0(%r15) -+ vl %v0, 0(%r15,%r1) -+ vl %v15, 0 -+ vl %v31, 0 -+ vl %v18, 0x567(%r3,%r4) -+ -+#CHECK: vlbb %v0, 0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x07] -+#CHECK: vlbb %v0, 0, 15 # encoding: [0xe7,0x00,0x00,0x00,0xf0,0x07] -+#CHECK: vlbb %v0, 4095, 0 # encoding: [0xe7,0x00,0x0f,0xff,0x00,0x07] -+#CHECK: vlbb %v0, 0(%r15), 0 # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x07] -+#CHECK: vlbb %v0, 0(%r15,%r1), 0 # encoding: [0xe7,0x0f,0x10,0x00,0x00,0x07] -+#CHECK: vlbb %v15, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x07] -+#CHECK: vlbb %v31, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x07] -+#CHECK: vlbb %v18, 1383(%r3,%r4), 8 # encoding: [0xe7,0x23,0x45,0x67,0x88,0x07] -+ -+ vlbb %v0, 0, 0 -+ vlbb %v0, 0, 15 -+ vlbb %v0, 4095, 0 -+ vlbb %v0, 0(%r15), 0 -+ vlbb %v0, 0(%r15,%r1), 0 -+ vlbb %v15, 0, 0 -+ vlbb %v31, 0, 0 -+ vlbb %v18, 1383(%r3,%r4), 8 -+ -+#CHECK: vlcb %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0xde] -+#CHECK: vlcb %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x00,0xde] -+#CHECK: vlcb %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xde] -+#CHECK: vlcb %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0xde] -+#CHECK: vlcb %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xde] -+#CHECK: vlcb %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x04,0xde] -+ -+ vlcb %v0, %v0 -+ vlcb %v0, %v15 -+ vlcb %v0, %v31 -+ vlcb %v15, %v0 -+ vlcb %v31, %v0 -+ vlcb %v14, %v17 -+ -+#CHECK: vlcf %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0xde] -+#CHECK: vlcf %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x20,0xde] -+#CHECK: vlcf %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xde] -+#CHECK: vlcf %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x20,0xde] -+#CHECK: vlcf %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xde] -+#CHECK: vlcf %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x24,0xde] -+ -+ vlcf %v0, %v0 -+ vlcf %v0, %v15 -+ vlcf %v0, %v31 -+ vlcf %v15, %v0 -+ vlcf %v31, %v0 -+ vlcf %v14, %v17 -+ -+#CHECK: vlcg %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0xde] -+#CHECK: vlcg %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x30,0xde] -+#CHECK: vlcg %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xde] -+#CHECK: vlcg %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x30,0xde] -+#CHECK: vlcg %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xde] -+#CHECK: vlcg %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x34,0xde] -+ -+ vlcg %v0, %v0 -+ vlcg %v0, %v15 -+ vlcg %v0, %v31 -+ vlcg %v15, %v0 -+ vlcg %v31, %v0 -+ vlcg %v14, %v17 -+ -+#CHECK: vlch %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0xde] -+#CHECK: vlch %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x10,0xde] -+#CHECK: vlch %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0xde] -+#CHECK: vlch %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x10,0xde] -+#CHECK: vlch %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0xde] -+#CHECK: vlch %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x14,0xde] -+ -+ vlch %v0, %v0 -+ vlch %v0, %v15 -+ vlch %v0, %v31 -+ vlch %v15, %v0 -+ vlch %v31, %v0 -+ vlch %v14, %v17 -+ -+#CHECK: vldeb %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0xc4] -+#CHECK: vldeb %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x20,0xc4] -+#CHECK: vldeb %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xc4] -+#CHECK: vldeb %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x20,0xc4] -+#CHECK: vldeb %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xc4] -+#CHECK: vldeb %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x24,0xc4] -+ -+ vldeb %v0, %v0 -+ vldeb %v0, %v15 -+ vldeb %v0, %v31 -+ vldeb %v15, %v0 -+ vldeb %v31, %v0 -+ vldeb %v14, %v17 -+ -+#CHECK: vleb %v0, 0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x00] -+#CHECK: vleb %v0, 0, 15 # encoding: [0xe7,0x00,0x00,0x00,0xf0,0x00] -+#CHECK: vleb %v0, 4095, 0 # encoding: [0xe7,0x00,0x0f,0xff,0x00,0x00] -+#CHECK: vleb %v0, 0(%r15), 0 # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x00] -+#CHECK: vleb %v0, 0(%r15,%r1), 0 # encoding: [0xe7,0x0f,0x10,0x00,0x00,0x00] -+#CHECK: vleb %v15, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x00] -+#CHECK: vleb %v31, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x00] -+#CHECK: vleb %v18, 1383(%r3,%r4), 8 # encoding: [0xe7,0x23,0x45,0x67,0x88,0x00] -+ -+ vleb %v0, 0, 0 -+ vleb %v0, 0, 15 -+ vleb %v0, 4095, 0 -+ vleb %v0, 0(%r15), 0 -+ vleb %v0, 0(%r15,%r1), 0 -+ vleb %v15, 0, 0 -+ vleb %v31, 0, 0 -+ vleb %v18, 1383(%r3,%r4), 8 -+ -+#CHECK: vledb %v0, %v0, 0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0xc5] -+#CHECK: vledb %v0, %v0, 0, 15 # encoding: [0xe7,0x00,0x00,0xf0,0x30,0xc5] -+#CHECK: vledb %v0, %v0, 4, 0 # encoding: [0xe7,0x00,0x00,0x04,0x30,0xc5] -+#CHECK: vledb %v0, %v0, 12, 0 # encoding: [0xe7,0x00,0x00,0x0c,0x30,0xc5] -+#CHECK: vledb %v0, %v31, 0, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xc5] -+#CHECK: vledb %v31, %v0, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xc5] -+#CHECK: vledb %v14, %v17, 4, 10 # encoding: [0xe7,0xe1,0x00,0xa4,0x34,0xc5] -+ -+ vledb %v0, %v0, 0, 0 -+ vledb %v0, %v0, 0, 15 -+ vledb %v0, %v0, 4, 0 -+ vledb %v0, %v0, 12, 0 -+ vledb %v0, %v31, 0, 0 -+ vledb %v31, %v0, 0, 0 -+ vledb %v14, %v17, 4, 10 -+ -+#CHECK: vlef %v0, 0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x03] -+#CHECK: vlef %v0, 0, 3 # encoding: [0xe7,0x00,0x00,0x00,0x30,0x03] -+#CHECK: vlef %v0, 4095, 0 # encoding: [0xe7,0x00,0x0f,0xff,0x00,0x03] -+#CHECK: vlef %v0, 0(%r15), 0 # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x03] -+#CHECK: vlef %v0, 0(%r15,%r1), 0 # encoding: [0xe7,0x0f,0x10,0x00,0x00,0x03] -+#CHECK: vlef %v15, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x03] -+#CHECK: vlef %v31, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x03] -+#CHECK: vlef %v18, 1383(%r3,%r4), 2 # encoding: [0xe7,0x23,0x45,0x67,0x28,0x03] -+ -+ vlef %v0, 0, 0 -+ vlef %v0, 0, 3 -+ vlef %v0, 4095, 0 -+ vlef %v0, 0(%r15), 0 -+ vlef %v0, 0(%r15,%r1), 0 -+ vlef %v15, 0, 0 -+ vlef %v31, 0, 0 -+ vlef %v18, 1383(%r3,%r4), 2 -+ -+#CHECK: vleg %v0, 0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x02] -+#CHECK: vleg %v0, 0, 1 # encoding: [0xe7,0x00,0x00,0x00,0x10,0x02] -+#CHECK: vleg %v0, 4095, 0 # encoding: [0xe7,0x00,0x0f,0xff,0x00,0x02] -+#CHECK: vleg %v0, 0(%r15), 0 # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x02] -+#CHECK: vleg %v0, 0(%r15,%r1), 0 # encoding: [0xe7,0x0f,0x10,0x00,0x00,0x02] -+#CHECK: vleg %v15, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x02] -+#CHECK: vleg %v31, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x02] -+#CHECK: vleg %v18, 1383(%r3,%r4), 1 # encoding: [0xe7,0x23,0x45,0x67,0x18,0x02] -+ -+ vleg %v0, 0, 0 -+ vleg %v0, 0, 1 -+ vleg %v0, 4095, 0 -+ vleg %v0, 0(%r15), 0 -+ vleg %v0, 0(%r15,%r1), 0 -+ vleg %v15, 0, 0 -+ vleg %v31, 0, 0 -+ vleg %v18, 1383(%r3,%r4), 1 -+ -+#CHECK: vleh %v0, 0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x01] -+#CHECK: vleh %v0, 0, 7 # encoding: [0xe7,0x00,0x00,0x00,0x70,0x01] -+#CHECK: vleh %v0, 4095, 0 # encoding: [0xe7,0x00,0x0f,0xff,0x00,0x01] -+#CHECK: vleh %v0, 0(%r15), 0 # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x01] -+#CHECK: vleh %v0, 0(%r15,%r1), 0 # encoding: [0xe7,0x0f,0x10,0x00,0x00,0x01] -+#CHECK: vleh %v15, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x01] -+#CHECK: vleh %v31, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x01] -+#CHECK: vleh %v18, 1383(%r3,%r4), 4 # encoding: [0xe7,0x23,0x45,0x67,0x48,0x01] -+ -+ vleh %v0, 0, 0 -+ vleh %v0, 0, 7 -+ vleh %v0, 4095, 0 -+ vleh %v0, 0(%r15), 0 -+ vleh %v0, 0(%r15,%r1), 0 -+ vleh %v15, 0, 0 -+ vleh %v31, 0, 0 -+ vleh %v18, 1383(%r3,%r4), 4 -+ -+#CHECK: vleib %v0, 0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x40] -+#CHECK: vleib %v0, 0, 15 # encoding: [0xe7,0x00,0x00,0x00,0xf0,0x40] -+#CHECK: vleib %v0, -32768, 0 # encoding: [0xe7,0x00,0x80,0x00,0x00,0x40] -+#CHECK: vleib %v0, 32767, 0 # encoding: [0xe7,0x00,0x7f,0xff,0x00,0x40] -+#CHECK: vleib %v15, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x40] -+#CHECK: vleib %v31, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x40] -+#CHECK: vleib %v18, 13398, 11 # encoding: [0xe7,0x20,0x34,0x56,0xb8,0x40] -+ -+ vleib %v0, 0, 0 -+ vleib %v0, 0, 15 -+ vleib %v0, -32768, 0 -+ vleib %v0, 32767, 0 -+ vleib %v15, 0, 0 -+ vleib %v31, 0, 0 -+ vleib %v18, 0x3456, 11 -+ -+#CHECK: vleif %v0, 0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x43] -+#CHECK: vleif %v0, 0, 3 # encoding: [0xe7,0x00,0x00,0x00,0x30,0x43] -+#CHECK: vleif %v0, -32768, 0 # encoding: [0xe7,0x00,0x80,0x00,0x00,0x43] -+#CHECK: vleif %v0, 32767, 0 # encoding: [0xe7,0x00,0x7f,0xff,0x00,0x43] -+#CHECK: vleif %v15, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x43] -+#CHECK: vleif %v31, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x43] -+#CHECK: vleif %v18, 13398, 3 # encoding: [0xe7,0x20,0x34,0x56,0x38,0x43] -+ -+ vleif %v0, 0, 0 -+ vleif %v0, 0, 3 -+ vleif %v0, -32768, 0 -+ vleif %v0, 32767, 0 -+ vleif %v15, 0, 0 -+ vleif %v31, 0, 0 -+ vleif %v18, 0x3456, 3 -+ -+#CHECK: vleig %v0, 0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x42] -+#CHECK: vleig %v0, 0, 1 # encoding: [0xe7,0x00,0x00,0x00,0x10,0x42] -+#CHECK: vleig %v0, -32768, 0 # encoding: [0xe7,0x00,0x80,0x00,0x00,0x42] -+#CHECK: vleig %v0, 32767, 0 # encoding: [0xe7,0x00,0x7f,0xff,0x00,0x42] -+#CHECK: vleig %v15, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x42] -+#CHECK: vleig %v31, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x42] -+#CHECK: vleig %v18, 13398, 1 # encoding: [0xe7,0x20,0x34,0x56,0x18,0x42] -+ -+ vleig %v0, 0, 0 -+ vleig %v0, 0, 1 -+ vleig %v0, -32768, 0 -+ vleig %v0, 32767, 0 -+ vleig %v15, 0, 0 -+ vleig %v31, 0, 0 -+ vleig %v18, 0x3456, 1 -+ -+#CHECK: vleih %v0, 0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x41] -+#CHECK: vleih %v0, 0, 7 # encoding: [0xe7,0x00,0x00,0x00,0x70,0x41] -+#CHECK: vleih %v0, -32768, 0 # encoding: [0xe7,0x00,0x80,0x00,0x00,0x41] -+#CHECK: vleih %v0, 32767, 0 # encoding: [0xe7,0x00,0x7f,0xff,0x00,0x41] -+#CHECK: vleih %v15, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x41] -+#CHECK: vleih %v31, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x41] -+#CHECK: vleih %v18, 13398, 7 # encoding: [0xe7,0x20,0x34,0x56,0x78,0x41] -+ -+ vleih %v0, 0, 0 -+ vleih %v0, 0, 7 -+ vleih %v0, -32768, 0 -+ vleih %v0, 32767, 0 -+ vleih %v15, 0, 0 -+ vleih %v31, 0, 0 -+ vleih %v18, 0x3456, 7 -+ -+#CHECK: vlgvb %r0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x21] -+#CHECK: vlgvb %r0, %v0, 4095 # encoding: [0xe7,0x00,0x0f,0xff,0x00,0x21] -+#CHECK: vlgvb %r0, %v0, 0(%r15) # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x21] -+#CHECK: vlgvb %r0, %v15, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x00,0x21] -+#CHECK: vlgvb %r0, %v31, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x21] -+#CHECK: vlgvb %r15, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x21] -+#CHECK: vlgvb %r2, %v19, 1383(%r4) # encoding: [0xe7,0x23,0x45,0x67,0x04,0x21] -+ -+ vlgvb %r0, %v0, 0 -+ vlgvb %r0, %v0, 4095 -+ vlgvb %r0, %v0, 0(%r15) -+ vlgvb %r0, %v15, 0 -+ vlgvb %r0, %v31, 0 -+ vlgvb %r15, %v0, 0 -+ vlgvb %r2, %v19, 1383(%r4) -+ -+#CHECK: vlgvf %r0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0x21] -+#CHECK: vlgvf %r0, %v0, 4095 # encoding: [0xe7,0x00,0x0f,0xff,0x20,0x21] -+#CHECK: vlgvf %r0, %v0, 0(%r15) # encoding: [0xe7,0x00,0xf0,0x00,0x20,0x21] -+#CHECK: vlgvf %r0, %v15, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x20,0x21] -+#CHECK: vlgvf %r0, %v31, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0x21] -+#CHECK: vlgvf %r15, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x20,0x21] -+#CHECK: vlgvf %r2, %v19, 1383(%r4) # encoding: [0xe7,0x23,0x45,0x67,0x24,0x21] -+ -+ vlgvf %r0, %v0, 0 -+ vlgvf %r0, %v0, 4095 -+ vlgvf %r0, %v0, 0(%r15) -+ vlgvf %r0, %v15, 0 -+ vlgvf %r0, %v31, 0 -+ vlgvf %r15, %v0, 0 -+ vlgvf %r2, %v19, 1383(%r4) -+ -+#CHECK: vlgvg %r0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0x21] -+#CHECK: vlgvg %r0, %v0, 4095 # encoding: [0xe7,0x00,0x0f,0xff,0x30,0x21] -+#CHECK: vlgvg %r0, %v0, 0(%r15) # encoding: [0xe7,0x00,0xf0,0x00,0x30,0x21] -+#CHECK: vlgvg %r0, %v15, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x30,0x21] -+#CHECK: vlgvg %r0, %v31, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0x21] -+#CHECK: vlgvg %r15, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x30,0x21] -+#CHECK: vlgvg %r2, %v19, 1383(%r4) # encoding: [0xe7,0x23,0x45,0x67,0x34,0x21] -+ -+ vlgvg %r0, %v0, 0 -+ vlgvg %r0, %v0, 4095 -+ vlgvg %r0, %v0, 0(%r15) -+ vlgvg %r0, %v15, 0 -+ vlgvg %r0, %v31, 0 -+ vlgvg %r15, %v0, 0 -+ vlgvg %r2, %v19, 1383(%r4) -+ -+#CHECK: vlgvh %r0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0x21] -+#CHECK: vlgvh %r0, %v0, 4095 # encoding: [0xe7,0x00,0x0f,0xff,0x10,0x21] -+#CHECK: vlgvh %r0, %v0, 0(%r15) # encoding: [0xe7,0x00,0xf0,0x00,0x10,0x21] -+#CHECK: vlgvh %r0, %v15, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x10,0x21] -+#CHECK: vlgvh %r0, %v31, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0x21] -+#CHECK: vlgvh %r15, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x10,0x21] -+#CHECK: vlgvh %r2, %v19, 1383(%r4) # encoding: [0xe7,0x23,0x45,0x67,0x14,0x21] -+ -+ vlgvh %r0, %v0, 0 -+ vlgvh %r0, %v0, 4095 -+ vlgvh %r0, %v0, 0(%r15) -+ vlgvh %r0, %v15, 0 -+ vlgvh %r0, %v31, 0 -+ vlgvh %r15, %v0, 0 -+ vlgvh %r2, %v19, 1383(%r4) -+ -+#CHECK: vll %v0, %r0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x37] -+#CHECK: vll %v0, %r0, 4095 # encoding: [0xe7,0x00,0x0f,0xff,0x00,0x37] -+#CHECK: vll %v0, %r0, 0(%r15) # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x37] -+#CHECK: vll %v0, %r15, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x00,0x37] -+#CHECK: vll %v15, %r0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x37] -+#CHECK: vll %v31, %r0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x37] -+#CHECK: vll %v18, %r3, 1383(%r4) # encoding: [0xe7,0x23,0x45,0x67,0x08,0x37] -+ -+ vll %v0, %r0, 0 -+ vll %v0, %r0, 4095 -+ vll %v0, %r0, 0(%r15) -+ vll %v0, %r15, 0 -+ vll %v15, %r0, 0 -+ vll %v31, %r0, 0 -+ vll %v18, %r3, 1383(%r4) -+ -+#CHECK: vllezb %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x04] -+#CHECK: vllezb %v0, 4095 # encoding: [0xe7,0x00,0x0f,0xff,0x00,0x04] -+#CHECK: vllezb %v0, 0(%r15) # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x04] -+#CHECK: vllezb %v0, 0(%r15,%r1) # encoding: [0xe7,0x0f,0x10,0x00,0x00,0x04] -+#CHECK: vllezb %v15, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x04] -+#CHECK: vllezb %v31, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x04] -+#CHECK: vllezb %v18, 1383(%r3,%r4) # encoding: [0xe7,0x23,0x45,0x67,0x08,0x04] -+ -+ vllezb %v0, 0 -+ vllezb %v0, 4095 -+ vllezb %v0, 0(%r15) -+ vllezb %v0, 0(%r15,%r1) -+ vllezb %v15, 0 -+ vllezb %v31, 0 -+ vllezb %v18, 0x567(%r3,%r4) -+ -+#CHECK: vllezf %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0x04] -+#CHECK: vllezf %v0, 4095 # encoding: [0xe7,0x00,0x0f,0xff,0x20,0x04] -+#CHECK: vllezf %v0, 0(%r15) # encoding: [0xe7,0x00,0xf0,0x00,0x20,0x04] -+#CHECK: vllezf %v0, 0(%r15,%r1) # encoding: [0xe7,0x0f,0x10,0x00,0x20,0x04] -+#CHECK: vllezf %v15, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x20,0x04] -+#CHECK: vllezf %v31, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0x04] -+#CHECK: vllezf %v18, 1383(%r3,%r4) # encoding: [0xe7,0x23,0x45,0x67,0x28,0x04] -+ -+ vllezf %v0, 0 -+ vllezf %v0, 4095 -+ vllezf %v0, 0(%r15) -+ vllezf %v0, 0(%r15,%r1) -+ vllezf %v15, 0 -+ vllezf %v31, 0 -+ vllezf %v18, 0x567(%r3,%r4) -+ -+#CHECK: vllezg %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0x04] -+#CHECK: vllezg %v0, 4095 # encoding: [0xe7,0x00,0x0f,0xff,0x30,0x04] -+#CHECK: vllezg %v0, 0(%r15) # encoding: [0xe7,0x00,0xf0,0x00,0x30,0x04] -+#CHECK: vllezg %v0, 0(%r15,%r1) # encoding: [0xe7,0x0f,0x10,0x00,0x30,0x04] -+#CHECK: vllezg %v15, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x30,0x04] -+#CHECK: vllezg %v31, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0x04] -+#CHECK: vllezg %v18, 1383(%r3,%r4) # encoding: [0xe7,0x23,0x45,0x67,0x38,0x04] -+ -+ vllezg %v0, 0 -+ vllezg %v0, 4095 -+ vllezg %v0, 0(%r15) -+ vllezg %v0, 0(%r15,%r1) -+ vllezg %v15, 0 -+ vllezg %v31, 0 -+ vllezg %v18, 0x567(%r3,%r4) -+ -+#CHECK: vllezh %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0x04] -+#CHECK: vllezh %v0, 4095 # encoding: [0xe7,0x00,0x0f,0xff,0x10,0x04] -+#CHECK: vllezh %v0, 0(%r15) # encoding: [0xe7,0x00,0xf0,0x00,0x10,0x04] -+#CHECK: vllezh %v0, 0(%r15,%r1) # encoding: [0xe7,0x0f,0x10,0x00,0x10,0x04] -+#CHECK: vllezh %v15, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x10,0x04] -+#CHECK: vllezh %v31, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0x04] -+#CHECK: vllezh %v18, 1383(%r3,%r4) # encoding: [0xe7,0x23,0x45,0x67,0x18,0x04] -+ -+ vllezh %v0, 0 -+ vllezh %v0, 4095 -+ vllezh %v0, 0(%r15) -+ vllezh %v0, 0(%r15,%r1) -+ vllezh %v15, 0 -+ vllezh %v31, 0 -+ vllezh %v18, 0x567(%r3,%r4) -+ -+#CHECK: vlm %v0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x36] -+#CHECK: vlm %v0, %v0, 4095 # encoding: [0xe7,0x00,0x0f,0xff,0x00,0x36] -+#CHECK: vlm %v0, %v0, 0(%r15) # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x36] -+#CHECK: vlm %v0, %v31, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x36] -+#CHECK: vlm %v31, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x36] -+#CHECK: vlm %v14, %v17, 1074(%r5) # encoding: [0xe7,0xe1,0x54,0x32,0x04,0x36] -+ -+ vlm %v0, %v0, 0 -+ vlm %v0, %v0, 4095 -+ vlm %v0, %v0, 0(%r15) -+ vlm %v0, %v31, 0 -+ vlm %v31, %v0, 0 -+ vlm %v14, %v17, 1074(%r5) -+ -+#CHECK: vlpb %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0xdf] -+#CHECK: vlpb %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x00,0xdf] -+#CHECK: vlpb %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xdf] -+#CHECK: vlpb %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0xdf] -+#CHECK: vlpb %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xdf] -+#CHECK: vlpb %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x04,0xdf] -+ -+ vlpb %v0, %v0 -+ vlpb %v0, %v15 -+ vlpb %v0, %v31 -+ vlpb %v15, %v0 -+ vlpb %v31, %v0 -+ vlpb %v14, %v17 -+ -+#CHECK: vlpf %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0xdf] -+#CHECK: vlpf %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x20,0xdf] -+#CHECK: vlpf %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xdf] -+#CHECK: vlpf %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x20,0xdf] -+#CHECK: vlpf %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xdf] -+#CHECK: vlpf %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x24,0xdf] -+ -+ vlpf %v0, %v0 -+ vlpf %v0, %v15 -+ vlpf %v0, %v31 -+ vlpf %v15, %v0 -+ vlpf %v31, %v0 -+ vlpf %v14, %v17 -+ -+#CHECK: vlpg %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0xdf] -+#CHECK: vlpg %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x30,0xdf] -+#CHECK: vlpg %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xdf] -+#CHECK: vlpg %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x30,0xdf] -+#CHECK: vlpg %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xdf] -+#CHECK: vlpg %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x34,0xdf] -+ -+ vlpg %v0, %v0 -+ vlpg %v0, %v15 -+ vlpg %v0, %v31 -+ vlpg %v15, %v0 -+ vlpg %v31, %v0 -+ vlpg %v14, %v17 -+ -+#CHECK: vlph %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0xdf] -+#CHECK: vlph %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x10,0xdf] -+#CHECK: vlph %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0xdf] -+#CHECK: vlph %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x10,0xdf] -+#CHECK: vlph %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0xdf] -+#CHECK: vlph %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x14,0xdf] -+ -+ vlph %v0, %v0 -+ vlph %v0, %v15 -+ vlph %v0, %v31 -+ vlph %v15, %v0 -+ vlph %v31, %v0 -+ vlph %v14, %v17 -+ -+#CHECK: vlr %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x56] -+#CHECK: vlr %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x00,0x56] -+#CHECK: vlr %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x56] -+#CHECK: vlr %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x56] -+#CHECK: vlr %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x56] -+#CHECK: vlr %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x04,0x56] -+ -+ vlr %v0, %v0 -+ vlr %v0, %v15 -+ vlr %v0, %v31 -+ vlr %v15, %v0 -+ vlr %v31, %v0 -+ vlr %v14, %v17 -+ -+#CHECK: vlrepb %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x05] -+#CHECK: vlrepb %v0, 4095 # encoding: [0xe7,0x00,0x0f,0xff,0x00,0x05] -+#CHECK: vlrepb %v0, 0(%r15) # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x05] -+#CHECK: vlrepb %v0, 0(%r15,%r1) # encoding: [0xe7,0x0f,0x10,0x00,0x00,0x05] -+#CHECK: vlrepb %v15, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x05] -+#CHECK: vlrepb %v31, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x05] -+#CHECK: vlrepb %v18, 1383(%r3,%r4) # encoding: [0xe7,0x23,0x45,0x67,0x08,0x05] -+ -+ vlrepb %v0, 0 -+ vlrepb %v0, 4095 -+ vlrepb %v0, 0(%r15) -+ vlrepb %v0, 0(%r15,%r1) -+ vlrepb %v15, 0 -+ vlrepb %v31, 0 -+ vlrepb %v18, 0x567(%r3,%r4) -+ -+#CHECK: vlrepf %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0x05] -+#CHECK: vlrepf %v0, 4095 # encoding: [0xe7,0x00,0x0f,0xff,0x20,0x05] -+#CHECK: vlrepf %v0, 0(%r15) # encoding: [0xe7,0x00,0xf0,0x00,0x20,0x05] -+#CHECK: vlrepf %v0, 0(%r15,%r1) # encoding: [0xe7,0x0f,0x10,0x00,0x20,0x05] -+#CHECK: vlrepf %v15, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x20,0x05] -+#CHECK: vlrepf %v31, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0x05] -+#CHECK: vlrepf %v18, 1383(%r3,%r4) # encoding: [0xe7,0x23,0x45,0x67,0x28,0x05] -+ -+ vlrepf %v0, 0 -+ vlrepf %v0, 4095 -+ vlrepf %v0, 0(%r15) -+ vlrepf %v0, 0(%r15,%r1) -+ vlrepf %v15, 0 -+ vlrepf %v31, 0 -+ vlrepf %v18, 0x567(%r3,%r4) -+ -+#CHECK: vlrepg %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0x05] -+#CHECK: vlrepg %v0, 4095 # encoding: [0xe7,0x00,0x0f,0xff,0x30,0x05] -+#CHECK: vlrepg %v0, 0(%r15) # encoding: [0xe7,0x00,0xf0,0x00,0x30,0x05] -+#CHECK: vlrepg %v0, 0(%r15,%r1) # encoding: [0xe7,0x0f,0x10,0x00,0x30,0x05] -+#CHECK: vlrepg %v15, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x30,0x05] -+#CHECK: vlrepg %v31, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0x05] -+#CHECK: vlrepg %v18, 1383(%r3,%r4) # encoding: [0xe7,0x23,0x45,0x67,0x38,0x05] -+ -+ vlrepg %v0, 0 -+ vlrepg %v0, 4095 -+ vlrepg %v0, 0(%r15) -+ vlrepg %v0, 0(%r15,%r1) -+ vlrepg %v15, 0 -+ vlrepg %v31, 0 -+ vlrepg %v18, 0x567(%r3,%r4) -+ -+#CHECK: vlreph %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0x05] -+#CHECK: vlreph %v0, 4095 # encoding: [0xe7,0x00,0x0f,0xff,0x10,0x05] -+#CHECK: vlreph %v0, 0(%r15) # encoding: [0xe7,0x00,0xf0,0x00,0x10,0x05] -+#CHECK: vlreph %v0, 0(%r15,%r1) # encoding: [0xe7,0x0f,0x10,0x00,0x10,0x05] -+#CHECK: vlreph %v15, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x10,0x05] -+#CHECK: vlreph %v31, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0x05] -+#CHECK: vlreph %v18, 1383(%r3,%r4) # encoding: [0xe7,0x23,0x45,0x67,0x18,0x05] -+ -+ vlreph %v0, 0 -+ vlreph %v0, 4095 -+ vlreph %v0, 0(%r15) -+ vlreph %v0, 0(%r15,%r1) -+ vlreph %v15, 0 -+ vlreph %v31, 0 -+ vlreph %v18, 0x567(%r3,%r4) -+ -+#CHECK: vlvgb %v0, %r0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x22] -+#CHECK: vlvgb %v0, %r0, 4095 # encoding: [0xe7,0x00,0x0f,0xff,0x00,0x22] -+#CHECK: vlvgb %v0, %r0, 0(%r15) # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x22] -+#CHECK: vlvgb %v0, %r15, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x00,0x22] -+#CHECK: vlvgb %v15, %r0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x22] -+#CHECK: vlvgb %v31, %r0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x22] -+#CHECK: vlvgb %v18, %r3, 1383(%r4) # encoding: [0xe7,0x23,0x45,0x67,0x08,0x22] -+ -+ vlvgb %v0, %r0, 0 -+ vlvgb %v0, %r0, 4095 -+ vlvgb %v0, %r0, 0(%r15) -+ vlvgb %v0, %r15, 0 -+ vlvgb %v15, %r0, 0 -+ vlvgb %v31, %r0, 0 -+ vlvgb %v18, %r3, 1383(%r4) -+ -+#CHECK: vlvgf %v0, %r0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0x22] -+#CHECK: vlvgf %v0, %r0, 4095 # encoding: [0xe7,0x00,0x0f,0xff,0x20,0x22] -+#CHECK: vlvgf %v0, %r0, 0(%r15) # encoding: [0xe7,0x00,0xf0,0x00,0x20,0x22] -+#CHECK: vlvgf %v0, %r15, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x20,0x22] -+#CHECK: vlvgf %v15, %r0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x20,0x22] -+#CHECK: vlvgf %v31, %r0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0x22] -+#CHECK: vlvgf %v18, %r3, 1383(%r4) # encoding: [0xe7,0x23,0x45,0x67,0x28,0x22] -+ -+ vlvgf %v0, %r0, 0 -+ vlvgf %v0, %r0, 4095 -+ vlvgf %v0, %r0, 0(%r15) -+ vlvgf %v0, %r15, 0 -+ vlvgf %v15, %r0, 0 -+ vlvgf %v31, %r0, 0 -+ vlvgf %v18, %r3, 1383(%r4) -+ -+#CHECK: vlvgg %v0, %r0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0x22] -+#CHECK: vlvgg %v0, %r0, 4095 # encoding: [0xe7,0x00,0x0f,0xff,0x30,0x22] -+#CHECK: vlvgg %v0, %r0, 0(%r15) # encoding: [0xe7,0x00,0xf0,0x00,0x30,0x22] -+#CHECK: vlvgg %v0, %r15, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x30,0x22] -+#CHECK: vlvgg %v15, %r0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x30,0x22] -+#CHECK: vlvgg %v31, %r0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0x22] -+#CHECK: vlvgg %v18, %r3, 1383(%r4) # encoding: [0xe7,0x23,0x45,0x67,0x38,0x22] -+ -+ vlvgg %v0, %r0, 0 -+ vlvgg %v0, %r0, 4095 -+ vlvgg %v0, %r0, 0(%r15) -+ vlvgg %v0, %r15, 0 -+ vlvgg %v15, %r0, 0 -+ vlvgg %v31, %r0, 0 -+ vlvgg %v18, %r3, 1383(%r4) -+ -+#CHECK: vlvgh %v0, %r0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0x22] -+#CHECK: vlvgh %v0, %r0, 4095 # encoding: [0xe7,0x00,0x0f,0xff,0x10,0x22] -+#CHECK: vlvgh %v0, %r0, 0(%r15) # encoding: [0xe7,0x00,0xf0,0x00,0x10,0x22] -+#CHECK: vlvgh %v0, %r15, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x10,0x22] -+#CHECK: vlvgh %v15, %r0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x10,0x22] -+#CHECK: vlvgh %v31, %r0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0x22] -+#CHECK: vlvgh %v18, %r3, 1383(%r4) # encoding: [0xe7,0x23,0x45,0x67,0x18,0x22] -+ -+ vlvgh %v0, %r0, 0 -+ vlvgh %v0, %r0, 4095 -+ vlvgh %v0, %r0, 0(%r15) -+ vlvgh %v0, %r15, 0 -+ vlvgh %v15, %r0, 0 -+ vlvgh %v31, %r0, 0 -+ vlvgh %v18, %r3, 1383(%r4) -+ -+#CHECK: vlvgp %v0, %r0, %r0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x62] -+#CHECK: vlvgp %v0, %r0, %r15 # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x62] -+#CHECK: vlvgp %v0, %r15, %r0 # encoding: [0xe7,0x0f,0x00,0x00,0x00,0x62] -+#CHECK: vlvgp %v15, %r0, %r0 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x62] -+#CHECK: vlvgp %v31, %r0, %r0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x62] -+#CHECK: vlvgp %v18, %r3, %r4 # encoding: [0xe7,0x23,0x40,0x00,0x08,0x62] -+ -+ vlvgp %v0, %r0, %r0 -+ vlvgp %v0, %r0, %r15 -+ vlvgp %v0, %r15, %r0 -+ vlvgp %v15, %r0, %r0 -+ vlvgp %v31, %r0, %r0 -+ vlvgp %v18, %r3, %r4 -+ -+#CHECK: vmaeb %v0, %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0xae] -+#CHECK: vmaeb %v0, %v0, %v0, %v31 # encoding: [0xe7,0x00,0x00,0x00,0xf1,0xae] -+#CHECK: vmaeb %v0, %v0, %v31, %v0 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0xae] -+#CHECK: vmaeb %v0, %v31, %v0, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xae] -+#CHECK: vmaeb %v31, %v0, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xae] -+#CHECK: vmaeb %v13, %v17, %v21, %v25 # encoding: [0xe7,0xd1,0x50,0x00,0x97,0xae] -+ -+ vmaeb %v0, %v0, %v0, %v0 -+ vmaeb %v0, %v0, %v0, %v31 -+ vmaeb %v0, %v0, %v31, %v0 -+ vmaeb %v0, %v31, %v0, %v0 -+ vmaeb %v31, %v0, %v0, %v0 -+ vmaeb %v13, %v17, %v21, %v25 -+ -+#CHECK: vmaef %v0, %v0, %v0, %v0 # encoding: [0xe7,0x00,0x02,0x00,0x00,0xae] -+#CHECK: vmaef %v0, %v0, %v0, %v31 # encoding: [0xe7,0x00,0x02,0x00,0xf1,0xae] -+#CHECK: vmaef %v0, %v0, %v31, %v0 # encoding: [0xe7,0x00,0xf2,0x00,0x02,0xae] -+#CHECK: vmaef %v0, %v31, %v0, %v0 # encoding: [0xe7,0x0f,0x02,0x00,0x04,0xae] -+#CHECK: vmaef %v31, %v0, %v0, %v0 # encoding: [0xe7,0xf0,0x02,0x00,0x08,0xae] -+#CHECK: vmaef %v13, %v17, %v21, %v25 # encoding: [0xe7,0xd1,0x52,0x00,0x97,0xae] -+ -+ vmaef %v0, %v0, %v0, %v0 -+ vmaef %v0, %v0, %v0, %v31 -+ vmaef %v0, %v0, %v31, %v0 -+ vmaef %v0, %v31, %v0, %v0 -+ vmaef %v31, %v0, %v0, %v0 -+ vmaef %v13, %v17, %v21, %v25 -+ -+#CHECK: vmaeh %v0, %v0, %v0, %v0 # encoding: [0xe7,0x00,0x01,0x00,0x00,0xae] -+#CHECK: vmaeh %v0, %v0, %v0, %v31 # encoding: [0xe7,0x00,0x01,0x00,0xf1,0xae] -+#CHECK: vmaeh %v0, %v0, %v31, %v0 # encoding: [0xe7,0x00,0xf1,0x00,0x02,0xae] -+#CHECK: vmaeh %v0, %v31, %v0, %v0 # encoding: [0xe7,0x0f,0x01,0x00,0x04,0xae] -+#CHECK: vmaeh %v31, %v0, %v0, %v0 # encoding: [0xe7,0xf0,0x01,0x00,0x08,0xae] -+#CHECK: vmaeh %v13, %v17, %v21, %v25 # encoding: [0xe7,0xd1,0x51,0x00,0x97,0xae] -+ -+ vmaeh %v0, %v0, %v0, %v0 -+ vmaeh %v0, %v0, %v0, %v31 -+ vmaeh %v0, %v0, %v31, %v0 -+ vmaeh %v0, %v31, %v0, %v0 -+ vmaeh %v31, %v0, %v0, %v0 -+ vmaeh %v13, %v17, %v21, %v25 -+ -+#CHECK: vmahb %v0, %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0xab] -+#CHECK: vmahb %v0, %v0, %v0, %v31 # encoding: [0xe7,0x00,0x00,0x00,0xf1,0xab] -+#CHECK: vmahb %v0, %v0, %v31, %v0 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0xab] -+#CHECK: vmahb %v0, %v31, %v0, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xab] -+#CHECK: vmahb %v31, %v0, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xab] -+#CHECK: vmahb %v13, %v17, %v21, %v25 # encoding: [0xe7,0xd1,0x50,0x00,0x97,0xab] -+ -+ vmahb %v0, %v0, %v0, %v0 -+ vmahb %v0, %v0, %v0, %v31 -+ vmahb %v0, %v0, %v31, %v0 -+ vmahb %v0, %v31, %v0, %v0 -+ vmahb %v31, %v0, %v0, %v0 -+ vmahb %v13, %v17, %v21, %v25 -+ -+#CHECK: vmahf %v0, %v0, %v0, %v0 # encoding: [0xe7,0x00,0x02,0x00,0x00,0xab] -+#CHECK: vmahf %v0, %v0, %v0, %v31 # encoding: [0xe7,0x00,0x02,0x00,0xf1,0xab] -+#CHECK: vmahf %v0, %v0, %v31, %v0 # encoding: [0xe7,0x00,0xf2,0x00,0x02,0xab] -+#CHECK: vmahf %v0, %v31, %v0, %v0 # encoding: [0xe7,0x0f,0x02,0x00,0x04,0xab] -+#CHECK: vmahf %v31, %v0, %v0, %v0 # encoding: [0xe7,0xf0,0x02,0x00,0x08,0xab] -+#CHECK: vmahf %v13, %v17, %v21, %v25 # encoding: [0xe7,0xd1,0x52,0x00,0x97,0xab] -+ -+ vmahf %v0, %v0, %v0, %v0 -+ vmahf %v0, %v0, %v0, %v31 -+ vmahf %v0, %v0, %v31, %v0 -+ vmahf %v0, %v31, %v0, %v0 -+ vmahf %v31, %v0, %v0, %v0 -+ vmahf %v13, %v17, %v21, %v25 -+ -+#CHECK: vmahh %v0, %v0, %v0, %v0 # encoding: [0xe7,0x00,0x01,0x00,0x00,0xab] -+#CHECK: vmahh %v0, %v0, %v0, %v31 # encoding: [0xe7,0x00,0x01,0x00,0xf1,0xab] -+#CHECK: vmahh %v0, %v0, %v31, %v0 # encoding: [0xe7,0x00,0xf1,0x00,0x02,0xab] -+#CHECK: vmahh %v0, %v31, %v0, %v0 # encoding: [0xe7,0x0f,0x01,0x00,0x04,0xab] -+#CHECK: vmahh %v31, %v0, %v0, %v0 # encoding: [0xe7,0xf0,0x01,0x00,0x08,0xab] -+#CHECK: vmahh %v13, %v17, %v21, %v25 # encoding: [0xe7,0xd1,0x51,0x00,0x97,0xab] -+ -+ vmahh %v0, %v0, %v0, %v0 -+ vmahh %v0, %v0, %v0, %v31 -+ vmahh %v0, %v0, %v31, %v0 -+ vmahh %v0, %v31, %v0, %v0 -+ vmahh %v31, %v0, %v0, %v0 -+ vmahh %v13, %v17, %v21, %v25 -+ -+#CHECK: vmalb %v0, %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0xaa] -+#CHECK: vmalb %v0, %v0, %v0, %v31 # encoding: [0xe7,0x00,0x00,0x00,0xf1,0xaa] -+#CHECK: vmalb %v0, %v0, %v31, %v0 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0xaa] -+#CHECK: vmalb %v0, %v31, %v0, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xaa] -+#CHECK: vmalb %v31, %v0, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xaa] -+#CHECK: vmalb %v13, %v17, %v21, %v25 # encoding: [0xe7,0xd1,0x50,0x00,0x97,0xaa] -+ -+ vmalb %v0, %v0, %v0, %v0 -+ vmalb %v0, %v0, %v0, %v31 -+ vmalb %v0, %v0, %v31, %v0 -+ vmalb %v0, %v31, %v0, %v0 -+ vmalb %v31, %v0, %v0, %v0 -+ vmalb %v13, %v17, %v21, %v25 -+ -+#CHECK: vmaleb %v0, %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0xac] -+#CHECK: vmaleb %v0, %v0, %v0, %v31 # encoding: [0xe7,0x00,0x00,0x00,0xf1,0xac] -+#CHECK: vmaleb %v0, %v0, %v31, %v0 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0xac] -+#CHECK: vmaleb %v0, %v31, %v0, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xac] -+#CHECK: vmaleb %v31, %v0, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xac] -+#CHECK: vmaleb %v13, %v17, %v21, %v25 # encoding: [0xe7,0xd1,0x50,0x00,0x97,0xac] -+ -+ vmaleb %v0, %v0, %v0, %v0 -+ vmaleb %v0, %v0, %v0, %v31 -+ vmaleb %v0, %v0, %v31, %v0 -+ vmaleb %v0, %v31, %v0, %v0 -+ vmaleb %v31, %v0, %v0, %v0 -+ vmaleb %v13, %v17, %v21, %v25 -+ -+#CHECK: vmalef %v0, %v0, %v0, %v0 # encoding: [0xe7,0x00,0x02,0x00,0x00,0xac] -+#CHECK: vmalef %v0, %v0, %v0, %v31 # encoding: [0xe7,0x00,0x02,0x00,0xf1,0xac] -+#CHECK: vmalef %v0, %v0, %v31, %v0 # encoding: [0xe7,0x00,0xf2,0x00,0x02,0xac] -+#CHECK: vmalef %v0, %v31, %v0, %v0 # encoding: [0xe7,0x0f,0x02,0x00,0x04,0xac] -+#CHECK: vmalef %v31, %v0, %v0, %v0 # encoding: [0xe7,0xf0,0x02,0x00,0x08,0xac] -+#CHECK: vmalef %v13, %v17, %v21, %v25 # encoding: [0xe7,0xd1,0x52,0x00,0x97,0xac] -+ -+ vmalef %v0, %v0, %v0, %v0 -+ vmalef %v0, %v0, %v0, %v31 -+ vmalef %v0, %v0, %v31, %v0 -+ vmalef %v0, %v31, %v0, %v0 -+ vmalef %v31, %v0, %v0, %v0 -+ vmalef %v13, %v17, %v21, %v25 -+ -+#CHECK: vmaleh %v0, %v0, %v0, %v0 # encoding: [0xe7,0x00,0x01,0x00,0x00,0xac] -+#CHECK: vmaleh %v0, %v0, %v0, %v31 # encoding: [0xe7,0x00,0x01,0x00,0xf1,0xac] -+#CHECK: vmaleh %v0, %v0, %v31, %v0 # encoding: [0xe7,0x00,0xf1,0x00,0x02,0xac] -+#CHECK: vmaleh %v0, %v31, %v0, %v0 # encoding: [0xe7,0x0f,0x01,0x00,0x04,0xac] -+#CHECK: vmaleh %v31, %v0, %v0, %v0 # encoding: [0xe7,0xf0,0x01,0x00,0x08,0xac] -+#CHECK: vmaleh %v13, %v17, %v21, %v25 # encoding: [0xe7,0xd1,0x51,0x00,0x97,0xac] -+ -+ vmaleh %v0, %v0, %v0, %v0 -+ vmaleh %v0, %v0, %v0, %v31 -+ vmaleh %v0, %v0, %v31, %v0 -+ vmaleh %v0, %v31, %v0, %v0 -+ vmaleh %v31, %v0, %v0, %v0 -+ vmaleh %v13, %v17, %v21, %v25 -+ -+#CHECK: vmalf %v0, %v0, %v0, %v0 # encoding: [0xe7,0x00,0x02,0x00,0x00,0xaa] -+#CHECK: vmalf %v0, %v0, %v0, %v31 # encoding: [0xe7,0x00,0x02,0x00,0xf1,0xaa] -+#CHECK: vmalf %v0, %v0, %v31, %v0 # encoding: [0xe7,0x00,0xf2,0x00,0x02,0xaa] -+#CHECK: vmalf %v0, %v31, %v0, %v0 # encoding: [0xe7,0x0f,0x02,0x00,0x04,0xaa] -+#CHECK: vmalf %v31, %v0, %v0, %v0 # encoding: [0xe7,0xf0,0x02,0x00,0x08,0xaa] -+#CHECK: vmalf %v13, %v17, %v21, %v25 # encoding: [0xe7,0xd1,0x52,0x00,0x97,0xaa] -+ -+ vmalf %v0, %v0, %v0, %v0 -+ vmalf %v0, %v0, %v0, %v31 -+ vmalf %v0, %v0, %v31, %v0 -+ vmalf %v0, %v31, %v0, %v0 -+ vmalf %v31, %v0, %v0, %v0 -+ vmalf %v13, %v17, %v21, %v25 -+ -+#CHECK: vmalhb %v0, %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0xa9] -+#CHECK: vmalhb %v0, %v0, %v0, %v31 # encoding: [0xe7,0x00,0x00,0x00,0xf1,0xa9] -+#CHECK: vmalhb %v0, %v0, %v31, %v0 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0xa9] -+#CHECK: vmalhb %v0, %v31, %v0, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xa9] -+#CHECK: vmalhb %v31, %v0, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xa9] -+#CHECK: vmalhb %v13, %v17, %v21, %v25 # encoding: [0xe7,0xd1,0x50,0x00,0x97,0xa9] -+ -+ vmalhb %v0, %v0, %v0, %v0 -+ vmalhb %v0, %v0, %v0, %v31 -+ vmalhb %v0, %v0, %v31, %v0 -+ vmalhb %v0, %v31, %v0, %v0 -+ vmalhb %v31, %v0, %v0, %v0 -+ vmalhb %v13, %v17, %v21, %v25 -+ -+#CHECK: vmalhf %v0, %v0, %v0, %v0 # encoding: [0xe7,0x00,0x02,0x00,0x00,0xa9] -+#CHECK: vmalhf %v0, %v0, %v0, %v31 # encoding: [0xe7,0x00,0x02,0x00,0xf1,0xa9] -+#CHECK: vmalhf %v0, %v0, %v31, %v0 # encoding: [0xe7,0x00,0xf2,0x00,0x02,0xa9] -+#CHECK: vmalhf %v0, %v31, %v0, %v0 # encoding: [0xe7,0x0f,0x02,0x00,0x04,0xa9] -+#CHECK: vmalhf %v31, %v0, %v0, %v0 # encoding: [0xe7,0xf0,0x02,0x00,0x08,0xa9] -+#CHECK: vmalhf %v13, %v17, %v21, %v25 # encoding: [0xe7,0xd1,0x52,0x00,0x97,0xa9] -+ -+ vmalhf %v0, %v0, %v0, %v0 -+ vmalhf %v0, %v0, %v0, %v31 -+ vmalhf %v0, %v0, %v31, %v0 -+ vmalhf %v0, %v31, %v0, %v0 -+ vmalhf %v31, %v0, %v0, %v0 -+ vmalhf %v13, %v17, %v21, %v25 -+ -+#CHECK: vmalhh %v0, %v0, %v0, %v0 # encoding: [0xe7,0x00,0x01,0x00,0x00,0xa9] -+#CHECK: vmalhh %v0, %v0, %v0, %v31 # encoding: [0xe7,0x00,0x01,0x00,0xf1,0xa9] -+#CHECK: vmalhh %v0, %v0, %v31, %v0 # encoding: [0xe7,0x00,0xf1,0x00,0x02,0xa9] -+#CHECK: vmalhh %v0, %v31, %v0, %v0 # encoding: [0xe7,0x0f,0x01,0x00,0x04,0xa9] -+#CHECK: vmalhh %v31, %v0, %v0, %v0 # encoding: [0xe7,0xf0,0x01,0x00,0x08,0xa9] -+#CHECK: vmalhh %v13, %v17, %v21, %v25 # encoding: [0xe7,0xd1,0x51,0x00,0x97,0xa9] -+ -+ vmalhh %v0, %v0, %v0, %v0 -+ vmalhh %v0, %v0, %v0, %v31 -+ vmalhh %v0, %v0, %v31, %v0 -+ vmalhh %v0, %v31, %v0, %v0 -+ vmalhh %v31, %v0, %v0, %v0 -+ vmalhh %v13, %v17, %v21, %v25 -+ -+#CHECK: vmalhw %v0, %v0, %v0, %v0 # encoding: [0xe7,0x00,0x01,0x00,0x00,0xaa] -+#CHECK: vmalhw %v0, %v0, %v0, %v31 # encoding: [0xe7,0x00,0x01,0x00,0xf1,0xaa] -+#CHECK: vmalhw %v0, %v0, %v31, %v0 # encoding: [0xe7,0x00,0xf1,0x00,0x02,0xaa] -+#CHECK: vmalhw %v0, %v31, %v0, %v0 # encoding: [0xe7,0x0f,0x01,0x00,0x04,0xaa] -+#CHECK: vmalhw %v31, %v0, %v0, %v0 # encoding: [0xe7,0xf0,0x01,0x00,0x08,0xaa] -+#CHECK: vmalhw %v13, %v17, %v21, %v25 # encoding: [0xe7,0xd1,0x51,0x00,0x97,0xaa] -+ -+ vmalhw %v0, %v0, %v0, %v0 -+ vmalhw %v0, %v0, %v0, %v31 -+ vmalhw %v0, %v0, %v31, %v0 -+ vmalhw %v0, %v31, %v0, %v0 -+ vmalhw %v31, %v0, %v0, %v0 -+ vmalhw %v13, %v17, %v21, %v25 -+ -+#CHECK: vmalob %v0, %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0xad] -+#CHECK: vmalob %v0, %v0, %v0, %v31 # encoding: [0xe7,0x00,0x00,0x00,0xf1,0xad] -+#CHECK: vmalob %v0, %v0, %v31, %v0 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0xad] -+#CHECK: vmalob %v0, %v31, %v0, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xad] -+#CHECK: vmalob %v31, %v0, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xad] -+#CHECK: vmalob %v13, %v17, %v21, %v25 # encoding: [0xe7,0xd1,0x50,0x00,0x97,0xad] -+ -+ vmalob %v0, %v0, %v0, %v0 -+ vmalob %v0, %v0, %v0, %v31 -+ vmalob %v0, %v0, %v31, %v0 -+ vmalob %v0, %v31, %v0, %v0 -+ vmalob %v31, %v0, %v0, %v0 -+ vmalob %v13, %v17, %v21, %v25 -+ -+#CHECK: vmalof %v0, %v0, %v0, %v0 # encoding: [0xe7,0x00,0x02,0x00,0x00,0xad] -+#CHECK: vmalof %v0, %v0, %v0, %v31 # encoding: [0xe7,0x00,0x02,0x00,0xf1,0xad] -+#CHECK: vmalof %v0, %v0, %v31, %v0 # encoding: [0xe7,0x00,0xf2,0x00,0x02,0xad] -+#CHECK: vmalof %v0, %v31, %v0, %v0 # encoding: [0xe7,0x0f,0x02,0x00,0x04,0xad] -+#CHECK: vmalof %v31, %v0, %v0, %v0 # encoding: [0xe7,0xf0,0x02,0x00,0x08,0xad] -+#CHECK: vmalof %v13, %v17, %v21, %v25 # encoding: [0xe7,0xd1,0x52,0x00,0x97,0xad] -+ -+ vmalof %v0, %v0, %v0, %v0 -+ vmalof %v0, %v0, %v0, %v31 -+ vmalof %v0, %v0, %v31, %v0 -+ vmalof %v0, %v31, %v0, %v0 -+ vmalof %v31, %v0, %v0, %v0 -+ vmalof %v13, %v17, %v21, %v25 -+ -+#CHECK: vmaloh %v0, %v0, %v0, %v0 # encoding: [0xe7,0x00,0x01,0x00,0x00,0xad] -+#CHECK: vmaloh %v0, %v0, %v0, %v31 # encoding: [0xe7,0x00,0x01,0x00,0xf1,0xad] -+#CHECK: vmaloh %v0, %v0, %v31, %v0 # encoding: [0xe7,0x00,0xf1,0x00,0x02,0xad] -+#CHECK: vmaloh %v0, %v31, %v0, %v0 # encoding: [0xe7,0x0f,0x01,0x00,0x04,0xad] -+#CHECK: vmaloh %v31, %v0, %v0, %v0 # encoding: [0xe7,0xf0,0x01,0x00,0x08,0xad] -+#CHECK: vmaloh %v13, %v17, %v21, %v25 # encoding: [0xe7,0xd1,0x51,0x00,0x97,0xad] -+ -+ vmaloh %v0, %v0, %v0, %v0 -+ vmaloh %v0, %v0, %v0, %v31 -+ vmaloh %v0, %v0, %v31, %v0 -+ vmaloh %v0, %v31, %v0, %v0 -+ vmaloh %v31, %v0, %v0, %v0 -+ vmaloh %v13, %v17, %v21, %v25 -+ -+#CHECK: vmaob %v0, %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0xaf] -+#CHECK: vmaob %v0, %v0, %v0, %v31 # encoding: [0xe7,0x00,0x00,0x00,0xf1,0xaf] -+#CHECK: vmaob %v0, %v0, %v31, %v0 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0xaf] -+#CHECK: vmaob %v0, %v31, %v0, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xaf] -+#CHECK: vmaob %v31, %v0, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xaf] -+#CHECK: vmaob %v13, %v17, %v21, %v25 # encoding: [0xe7,0xd1,0x50,0x00,0x97,0xaf] -+ -+ vmaob %v0, %v0, %v0, %v0 -+ vmaob %v0, %v0, %v0, %v31 -+ vmaob %v0, %v0, %v31, %v0 -+ vmaob %v0, %v31, %v0, %v0 -+ vmaob %v31, %v0, %v0, %v0 -+ vmaob %v13, %v17, %v21, %v25 -+ -+#CHECK: vmaof %v0, %v0, %v0, %v0 # encoding: [0xe7,0x00,0x02,0x00,0x00,0xaf] -+#CHECK: vmaof %v0, %v0, %v0, %v31 # encoding: [0xe7,0x00,0x02,0x00,0xf1,0xaf] -+#CHECK: vmaof %v0, %v0, %v31, %v0 # encoding: [0xe7,0x00,0xf2,0x00,0x02,0xaf] -+#CHECK: vmaof %v0, %v31, %v0, %v0 # encoding: [0xe7,0x0f,0x02,0x00,0x04,0xaf] -+#CHECK: vmaof %v31, %v0, %v0, %v0 # encoding: [0xe7,0xf0,0x02,0x00,0x08,0xaf] -+#CHECK: vmaof %v13, %v17, %v21, %v25 # encoding: [0xe7,0xd1,0x52,0x00,0x97,0xaf] -+ -+ vmaof %v0, %v0, %v0, %v0 -+ vmaof %v0, %v0, %v0, %v31 -+ vmaof %v0, %v0, %v31, %v0 -+ vmaof %v0, %v31, %v0, %v0 -+ vmaof %v31, %v0, %v0, %v0 -+ vmaof %v13, %v17, %v21, %v25 -+ -+#CHECK: vmaoh %v0, %v0, %v0, %v0 # encoding: [0xe7,0x00,0x01,0x00,0x00,0xaf] -+#CHECK: vmaoh %v0, %v0, %v0, %v31 # encoding: [0xe7,0x00,0x01,0x00,0xf1,0xaf] -+#CHECK: vmaoh %v0, %v0, %v31, %v0 # encoding: [0xe7,0x00,0xf1,0x00,0x02,0xaf] -+#CHECK: vmaoh %v0, %v31, %v0, %v0 # encoding: [0xe7,0x0f,0x01,0x00,0x04,0xaf] -+#CHECK: vmaoh %v31, %v0, %v0, %v0 # encoding: [0xe7,0xf0,0x01,0x00,0x08,0xaf] -+#CHECK: vmaoh %v13, %v17, %v21, %v25 # encoding: [0xe7,0xd1,0x51,0x00,0x97,0xaf] -+ -+ vmaoh %v0, %v0, %v0, %v0 -+ vmaoh %v0, %v0, %v0, %v31 -+ vmaoh %v0, %v0, %v31, %v0 -+ vmaoh %v0, %v31, %v0, %v0 -+ vmaoh %v31, %v0, %v0, %v0 -+ vmaoh %v13, %v17, %v21, %v25 -+ -+#CHECK: vmeb %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0xa6] -+#CHECK: vmeb %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0xa6] -+#CHECK: vmeb %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xa6] -+#CHECK: vmeb %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xa6] -+#CHECK: vmeb %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x0a,0xa6] -+ -+ vmeb %v0, %v0, %v0 -+ vmeb %v0, %v0, %v31 -+ vmeb %v0, %v31, %v0 -+ vmeb %v31, %v0, %v0 -+ vmeb %v18, %v3, %v20 -+ -+#CHECK: vmef %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0xa6] -+#CHECK: vmef %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x22,0xa6] -+#CHECK: vmef %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xa6] -+#CHECK: vmef %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xa6] -+#CHECK: vmef %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x2a,0xa6] -+ -+ vmef %v0, %v0, %v0 -+ vmef %v0, %v0, %v31 -+ vmef %v0, %v31, %v0 -+ vmef %v31, %v0, %v0 -+ vmef %v18, %v3, %v20 -+ -+#CHECK: vmeh %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0xa6] -+#CHECK: vmeh %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x12,0xa6] -+#CHECK: vmeh %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0xa6] -+#CHECK: vmeh %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0xa6] -+#CHECK: vmeh %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x1a,0xa6] -+ -+ vmeh %v0, %v0, %v0 -+ vmeh %v0, %v0, %v31 -+ vmeh %v0, %v31, %v0 -+ vmeh %v31, %v0, %v0 -+ vmeh %v18, %v3, %v20 -+ -+#CHECK: vmhb %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0xa3] -+#CHECK: vmhb %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0xa3] -+#CHECK: vmhb %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xa3] -+#CHECK: vmhb %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xa3] -+#CHECK: vmhb %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x0a,0xa3] -+ -+ vmhb %v0, %v0, %v0 -+ vmhb %v0, %v0, %v31 -+ vmhb %v0, %v31, %v0 -+ vmhb %v31, %v0, %v0 -+ vmhb %v18, %v3, %v20 -+ -+#CHECK: vmhf %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0xa3] -+#CHECK: vmhf %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x22,0xa3] -+#CHECK: vmhf %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xa3] -+#CHECK: vmhf %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xa3] -+#CHECK: vmhf %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x2a,0xa3] -+ -+ vmhf %v0, %v0, %v0 -+ vmhf %v0, %v0, %v31 -+ vmhf %v0, %v31, %v0 -+ vmhf %v31, %v0, %v0 -+ vmhf %v18, %v3, %v20 -+ -+#CHECK: vmhh %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0xa3] -+#CHECK: vmhh %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x12,0xa3] -+#CHECK: vmhh %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0xa3] -+#CHECK: vmhh %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0xa3] -+#CHECK: vmhh %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x1a,0xa3] -+ -+ vmhh %v0, %v0, %v0 -+ vmhh %v0, %v0, %v31 -+ vmhh %v0, %v31, %v0 -+ vmhh %v31, %v0, %v0 -+ vmhh %v18, %v3, %v20 -+ -+#CHECK: vmlb %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0xa2] -+#CHECK: vmlb %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0xa2] -+#CHECK: vmlb %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xa2] -+#CHECK: vmlb %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xa2] -+#CHECK: vmlb %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x0a,0xa2] -+ -+ vmlb %v0, %v0, %v0 -+ vmlb %v0, %v0, %v31 -+ vmlb %v0, %v31, %v0 -+ vmlb %v31, %v0, %v0 -+ vmlb %v18, %v3, %v20 -+ -+#CHECK: vmleb %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0xa4] -+#CHECK: vmleb %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0xa4] -+#CHECK: vmleb %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xa4] -+#CHECK: vmleb %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xa4] -+#CHECK: vmleb %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x0a,0xa4] -+ -+ vmleb %v0, %v0, %v0 -+ vmleb %v0, %v0, %v31 -+ vmleb %v0, %v31, %v0 -+ vmleb %v31, %v0, %v0 -+ vmleb %v18, %v3, %v20 -+ -+#CHECK: vmlef %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0xa4] -+#CHECK: vmlef %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x22,0xa4] -+#CHECK: vmlef %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xa4] -+#CHECK: vmlef %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xa4] -+#CHECK: vmlef %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x2a,0xa4] -+ -+ vmlef %v0, %v0, %v0 -+ vmlef %v0, %v0, %v31 -+ vmlef %v0, %v31, %v0 -+ vmlef %v31, %v0, %v0 -+ vmlef %v18, %v3, %v20 -+ -+#CHECK: vmleh %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0xa4] -+#CHECK: vmleh %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x12,0xa4] -+#CHECK: vmleh %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0xa4] -+#CHECK: vmleh %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0xa4] -+#CHECK: vmleh %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x1a,0xa4] -+ -+ vmleh %v0, %v0, %v0 -+ vmleh %v0, %v0, %v31 -+ vmleh %v0, %v31, %v0 -+ vmleh %v31, %v0, %v0 -+ vmleh %v18, %v3, %v20 -+ -+#CHECK: vmlf %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0xa2] -+#CHECK: vmlf %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x22,0xa2] -+#CHECK: vmlf %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xa2] -+#CHECK: vmlf %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xa2] -+#CHECK: vmlf %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x2a,0xa2] -+ -+ vmlf %v0, %v0, %v0 -+ vmlf %v0, %v0, %v31 -+ vmlf %v0, %v31, %v0 -+ vmlf %v31, %v0, %v0 -+ vmlf %v18, %v3, %v20 -+ -+#CHECK: vmlhb %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0xa1] -+#CHECK: vmlhb %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0xa1] -+#CHECK: vmlhb %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xa1] -+#CHECK: vmlhb %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xa1] -+#CHECK: vmlhb %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x0a,0xa1] -+ -+ vmlhb %v0, %v0, %v0 -+ vmlhb %v0, %v0, %v31 -+ vmlhb %v0, %v31, %v0 -+ vmlhb %v31, %v0, %v0 -+ vmlhb %v18, %v3, %v20 -+ -+#CHECK: vmlhf %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0xa1] -+#CHECK: vmlhf %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x22,0xa1] -+#CHECK: vmlhf %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xa1] -+#CHECK: vmlhf %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xa1] -+#CHECK: vmlhf %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x2a,0xa1] -+ -+ vmlhf %v0, %v0, %v0 -+ vmlhf %v0, %v0, %v31 -+ vmlhf %v0, %v31, %v0 -+ vmlhf %v31, %v0, %v0 -+ vmlhf %v18, %v3, %v20 -+ -+#CHECK: vmlhh %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0xa1] -+#CHECK: vmlhh %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x12,0xa1] -+#CHECK: vmlhh %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0xa1] -+#CHECK: vmlhh %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0xa1] -+#CHECK: vmlhh %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x1a,0xa1] -+ -+ vmlhh %v0, %v0, %v0 -+ vmlhh %v0, %v0, %v31 -+ vmlhh %v0, %v31, %v0 -+ vmlhh %v31, %v0, %v0 -+ vmlhh %v18, %v3, %v20 -+ -+#CHECK: vmlhw %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0xa2] -+#CHECK: vmlhw %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x12,0xa2] -+#CHECK: vmlhw %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0xa2] -+#CHECK: vmlhw %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0xa2] -+#CHECK: vmlhw %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x1a,0xa2] -+ -+ vmlhw %v0, %v0, %v0 -+ vmlhw %v0, %v0, %v31 -+ vmlhw %v0, %v31, %v0 -+ vmlhw %v31, %v0, %v0 -+ vmlhw %v18, %v3, %v20 -+ -+#CHECK: vmlob %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0xa5] -+#CHECK: vmlob %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0xa5] -+#CHECK: vmlob %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xa5] -+#CHECK: vmlob %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xa5] -+#CHECK: vmlob %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x0a,0xa5] -+ -+ vmlob %v0, %v0, %v0 -+ vmlob %v0, %v0, %v31 -+ vmlob %v0, %v31, %v0 -+ vmlob %v31, %v0, %v0 -+ vmlob %v18, %v3, %v20 -+ -+#CHECK: vmlof %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0xa5] -+#CHECK: vmlof %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x22,0xa5] -+#CHECK: vmlof %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xa5] -+#CHECK: vmlof %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xa5] -+#CHECK: vmlof %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x2a,0xa5] -+ -+ vmlof %v0, %v0, %v0 -+ vmlof %v0, %v0, %v31 -+ vmlof %v0, %v31, %v0 -+ vmlof %v31, %v0, %v0 -+ vmlof %v18, %v3, %v20 -+ -+#CHECK: vmloh %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0xa5] -+#CHECK: vmloh %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x12,0xa5] -+#CHECK: vmloh %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0xa5] -+#CHECK: vmloh %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0xa5] -+#CHECK: vmloh %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x1a,0xa5] -+ -+ vmloh %v0, %v0, %v0 -+ vmloh %v0, %v0, %v31 -+ vmloh %v0, %v31, %v0 -+ vmloh %v31, %v0, %v0 -+ vmloh %v18, %v3, %v20 -+ -+#CHECK: vmnb %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0xfe] -+#CHECK: vmnb %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0xfe] -+#CHECK: vmnb %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xfe] -+#CHECK: vmnb %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xfe] -+#CHECK: vmnb %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x0a,0xfe] -+ -+ vmnb %v0, %v0, %v0 -+ vmnb %v0, %v0, %v31 -+ vmnb %v0, %v31, %v0 -+ vmnb %v31, %v0, %v0 -+ vmnb %v18, %v3, %v20 -+ -+#CHECK: vmnf %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0xfe] -+#CHECK: vmnf %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x22,0xfe] -+#CHECK: vmnf %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xfe] -+#CHECK: vmnf %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xfe] -+#CHECK: vmnf %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x2a,0xfe] -+ -+ vmnf %v0, %v0, %v0 -+ vmnf %v0, %v0, %v31 -+ vmnf %v0, %v31, %v0 -+ vmnf %v31, %v0, %v0 -+ vmnf %v18, %v3, %v20 -+ -+#CHECK: vmng %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0xfe] -+#CHECK: vmng %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x32,0xfe] -+#CHECK: vmng %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xfe] -+#CHECK: vmng %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xfe] -+#CHECK: vmng %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x3a,0xfe] -+ -+ vmng %v0, %v0, %v0 -+ vmng %v0, %v0, %v31 -+ vmng %v0, %v31, %v0 -+ vmng %v31, %v0, %v0 -+ vmng %v18, %v3, %v20 -+ -+#CHECK: vmnh %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0xfe] -+#CHECK: vmnh %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x12,0xfe] -+#CHECK: vmnh %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0xfe] -+#CHECK: vmnh %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0xfe] -+#CHECK: vmnh %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x1a,0xfe] -+ -+ vmnh %v0, %v0, %v0 -+ vmnh %v0, %v0, %v31 -+ vmnh %v0, %v31, %v0 -+ vmnh %v31, %v0, %v0 -+ vmnh %v18, %v3, %v20 -+ -+#CHECK: vmnlb %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0xfc] -+#CHECK: vmnlb %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0xfc] -+#CHECK: vmnlb %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xfc] -+#CHECK: vmnlb %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xfc] -+#CHECK: vmnlb %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x0a,0xfc] -+ -+ vmnlb %v0, %v0, %v0 -+ vmnlb %v0, %v0, %v31 -+ vmnlb %v0, %v31, %v0 -+ vmnlb %v31, %v0, %v0 -+ vmnlb %v18, %v3, %v20 -+ -+#CHECK: vmnlf %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0xfc] -+#CHECK: vmnlf %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x22,0xfc] -+#CHECK: vmnlf %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xfc] -+#CHECK: vmnlf %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xfc] -+#CHECK: vmnlf %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x2a,0xfc] -+ -+ vmnlf %v0, %v0, %v0 -+ vmnlf %v0, %v0, %v31 -+ vmnlf %v0, %v31, %v0 -+ vmnlf %v31, %v0, %v0 -+ vmnlf %v18, %v3, %v20 -+ -+#CHECK: vmnlg %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0xfc] -+#CHECK: vmnlg %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x32,0xfc] -+#CHECK: vmnlg %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xfc] -+#CHECK: vmnlg %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xfc] -+#CHECK: vmnlg %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x3a,0xfc] -+ -+ vmnlg %v0, %v0, %v0 -+ vmnlg %v0, %v0, %v31 -+ vmnlg %v0, %v31, %v0 -+ vmnlg %v31, %v0, %v0 -+ vmnlg %v18, %v3, %v20 -+ -+#CHECK: vmnlh %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0xfc] -+#CHECK: vmnlh %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x12,0xfc] -+#CHECK: vmnlh %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0xfc] -+#CHECK: vmnlh %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0xfc] -+#CHECK: vmnlh %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x1a,0xfc] -+ -+ vmnlh %v0, %v0, %v0 -+ vmnlh %v0, %v0, %v31 -+ vmnlh %v0, %v31, %v0 -+ vmnlh %v31, %v0, %v0 -+ vmnlh %v18, %v3, %v20 -+ -+#CHECK: vmob %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0xa7] -+#CHECK: vmob %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0xa7] -+#CHECK: vmob %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xa7] -+#CHECK: vmob %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xa7] -+#CHECK: vmob %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x0a,0xa7] -+ -+ vmob %v0, %v0, %v0 -+ vmob %v0, %v0, %v31 -+ vmob %v0, %v31, %v0 -+ vmob %v31, %v0, %v0 -+ vmob %v18, %v3, %v20 -+ -+#CHECK: vmof %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0xa7] -+#CHECK: vmof %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x22,0xa7] -+#CHECK: vmof %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xa7] -+#CHECK: vmof %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xa7] -+#CHECK: vmof %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x2a,0xa7] -+ -+ vmof %v0, %v0, %v0 -+ vmof %v0, %v0, %v31 -+ vmof %v0, %v31, %v0 -+ vmof %v31, %v0, %v0 -+ vmof %v18, %v3, %v20 -+ -+#CHECK: vmoh %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0xa7] -+#CHECK: vmoh %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x12,0xa7] -+#CHECK: vmoh %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0xa7] -+#CHECK: vmoh %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0xa7] -+#CHECK: vmoh %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x1a,0xa7] -+ -+ vmoh %v0, %v0, %v0 -+ vmoh %v0, %v0, %v31 -+ vmoh %v0, %v31, %v0 -+ vmoh %v31, %v0, %v0 -+ vmoh %v18, %v3, %v20 -+ -+#CHECK: vmrhb %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x61] -+#CHECK: vmrhb %v0, %v0, %v15 # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x61] -+#CHECK: vmrhb %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0x61] -+#CHECK: vmrhb %v0, %v15, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x00,0x61] -+#CHECK: vmrhb %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x61] -+#CHECK: vmrhb %v15, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x61] -+#CHECK: vmrhb %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x61] -+#CHECK: vmrhb %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x0a,0x61] -+ -+ vmrhb %v0, %v0, %v0 -+ vmrhb %v0, %v0, %v15 -+ vmrhb %v0, %v0, %v31 -+ vmrhb %v0, %v15, %v0 -+ vmrhb %v0, %v31, %v0 -+ vmrhb %v15, %v0, %v0 -+ vmrhb %v31, %v0, %v0 -+ vmrhb %v18, %v3, %v20 -+ -+#CHECK: vmrhf %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0x61] -+#CHECK: vmrhf %v0, %v0, %v15 # encoding: [0xe7,0x00,0xf0,0x00,0x20,0x61] -+#CHECK: vmrhf %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x22,0x61] -+#CHECK: vmrhf %v0, %v15, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x20,0x61] -+#CHECK: vmrhf %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0x61] -+#CHECK: vmrhf %v15, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x20,0x61] -+#CHECK: vmrhf %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0x61] -+#CHECK: vmrhf %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x2a,0x61] -+ -+ vmrhf %v0, %v0, %v0 -+ vmrhf %v0, %v0, %v15 -+ vmrhf %v0, %v0, %v31 -+ vmrhf %v0, %v15, %v0 -+ vmrhf %v0, %v31, %v0 -+ vmrhf %v15, %v0, %v0 -+ vmrhf %v31, %v0, %v0 -+ vmrhf %v18, %v3, %v20 -+ -+#CHECK: vmrhg %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0x61] -+#CHECK: vmrhg %v0, %v0, %v15 # encoding: [0xe7,0x00,0xf0,0x00,0x30,0x61] -+#CHECK: vmrhg %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x32,0x61] -+#CHECK: vmrhg %v0, %v15, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x30,0x61] -+#CHECK: vmrhg %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0x61] -+#CHECK: vmrhg %v15, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x30,0x61] -+#CHECK: vmrhg %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0x61] -+#CHECK: vmrhg %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x3a,0x61] -+ -+ vmrhg %v0, %v0, %v0 -+ vmrhg %v0, %v0, %v15 -+ vmrhg %v0, %v0, %v31 -+ vmrhg %v0, %v15, %v0 -+ vmrhg %v0, %v31, %v0 -+ vmrhg %v15, %v0, %v0 -+ vmrhg %v31, %v0, %v0 -+ vmrhg %v18, %v3, %v20 -+ -+#CHECK: vmrhh %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0x61] -+#CHECK: vmrhh %v0, %v0, %v15 # encoding: [0xe7,0x00,0xf0,0x00,0x10,0x61] -+#CHECK: vmrhh %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x12,0x61] -+#CHECK: vmrhh %v0, %v15, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x10,0x61] -+#CHECK: vmrhh %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0x61] -+#CHECK: vmrhh %v15, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x10,0x61] -+#CHECK: vmrhh %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0x61] -+#CHECK: vmrhh %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x1a,0x61] -+ -+ vmrhh %v0, %v0, %v0 -+ vmrhh %v0, %v0, %v15 -+ vmrhh %v0, %v0, %v31 -+ vmrhh %v0, %v15, %v0 -+ vmrhh %v0, %v31, %v0 -+ vmrhh %v15, %v0, %v0 -+ vmrhh %v31, %v0, %v0 -+ vmrhh %v18, %v3, %v20 -+ -+#CHECK: vmrlb %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x60] -+#CHECK: vmrlb %v0, %v0, %v15 # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x60] -+#CHECK: vmrlb %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0x60] -+#CHECK: vmrlb %v0, %v15, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x00,0x60] -+#CHECK: vmrlb %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x60] -+#CHECK: vmrlb %v15, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x60] -+#CHECK: vmrlb %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x60] -+#CHECK: vmrlb %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x0a,0x60] -+ -+ vmrlb %v0, %v0, %v0 -+ vmrlb %v0, %v0, %v15 -+ vmrlb %v0, %v0, %v31 -+ vmrlb %v0, %v15, %v0 -+ vmrlb %v0, %v31, %v0 -+ vmrlb %v15, %v0, %v0 -+ vmrlb %v31, %v0, %v0 -+ vmrlb %v18, %v3, %v20 -+ -+#CHECK: vmrlf %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0x60] -+#CHECK: vmrlf %v0, %v0, %v15 # encoding: [0xe7,0x00,0xf0,0x00,0x20,0x60] -+#CHECK: vmrlf %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x22,0x60] -+#CHECK: vmrlf %v0, %v15, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x20,0x60] -+#CHECK: vmrlf %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0x60] -+#CHECK: vmrlf %v15, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x20,0x60] -+#CHECK: vmrlf %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0x60] -+#CHECK: vmrlf %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x2a,0x60] -+ -+ vmrlf %v0, %v0, %v0 -+ vmrlf %v0, %v0, %v15 -+ vmrlf %v0, %v0, %v31 -+ vmrlf %v0, %v15, %v0 -+ vmrlf %v0, %v31, %v0 -+ vmrlf %v15, %v0, %v0 -+ vmrlf %v31, %v0, %v0 -+ vmrlf %v18, %v3, %v20 -+ -+#CHECK: vmrlg %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0x60] -+#CHECK: vmrlg %v0, %v0, %v15 # encoding: [0xe7,0x00,0xf0,0x00,0x30,0x60] -+#CHECK: vmrlg %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x32,0x60] -+#CHECK: vmrlg %v0, %v15, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x30,0x60] -+#CHECK: vmrlg %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0x60] -+#CHECK: vmrlg %v15, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x30,0x60] -+#CHECK: vmrlg %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0x60] -+#CHECK: vmrlg %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x3a,0x60] -+ -+ vmrlg %v0, %v0, %v0 -+ vmrlg %v0, %v0, %v15 -+ vmrlg %v0, %v0, %v31 -+ vmrlg %v0, %v15, %v0 -+ vmrlg %v0, %v31, %v0 -+ vmrlg %v15, %v0, %v0 -+ vmrlg %v31, %v0, %v0 -+ vmrlg %v18, %v3, %v20 -+ -+#CHECK: vmrlh %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0x60] -+#CHECK: vmrlh %v0, %v0, %v15 # encoding: [0xe7,0x00,0xf0,0x00,0x10,0x60] -+#CHECK: vmrlh %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x12,0x60] -+#CHECK: vmrlh %v0, %v15, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x10,0x60] -+#CHECK: vmrlh %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0x60] -+#CHECK: vmrlh %v15, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x10,0x60] -+#CHECK: vmrlh %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0x60] -+#CHECK: vmrlh %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x1a,0x60] -+ -+ vmrlh %v0, %v0, %v0 -+ vmrlh %v0, %v0, %v15 -+ vmrlh %v0, %v0, %v31 -+ vmrlh %v0, %v15, %v0 -+ vmrlh %v0, %v31, %v0 -+ vmrlh %v15, %v0, %v0 -+ vmrlh %v31, %v0, %v0 -+ vmrlh %v18, %v3, %v20 -+ -+#CHECK: vmxb %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0xff] -+#CHECK: vmxb %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0xff] -+#CHECK: vmxb %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xff] -+#CHECK: vmxb %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xff] -+#CHECK: vmxb %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x0a,0xff] -+ -+ vmxb %v0, %v0, %v0 -+ vmxb %v0, %v0, %v31 -+ vmxb %v0, %v31, %v0 -+ vmxb %v31, %v0, %v0 -+ vmxb %v18, %v3, %v20 -+ -+#CHECK: vmxf %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0xff] -+#CHECK: vmxf %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x22,0xff] -+#CHECK: vmxf %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xff] -+#CHECK: vmxf %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xff] -+#CHECK: vmxf %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x2a,0xff] -+ -+ vmxf %v0, %v0, %v0 -+ vmxf %v0, %v0, %v31 -+ vmxf %v0, %v31, %v0 -+ vmxf %v31, %v0, %v0 -+ vmxf %v18, %v3, %v20 -+ -+#CHECK: vmxg %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0xff] -+#CHECK: vmxg %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x32,0xff] -+#CHECK: vmxg %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xff] -+#CHECK: vmxg %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xff] -+#CHECK: vmxg %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x3a,0xff] -+ -+ vmxg %v0, %v0, %v0 -+ vmxg %v0, %v0, %v31 -+ vmxg %v0, %v31, %v0 -+ vmxg %v31, %v0, %v0 -+ vmxg %v18, %v3, %v20 -+ -+#CHECK: vmxh %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0xff] -+#CHECK: vmxh %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x12,0xff] -+#CHECK: vmxh %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0xff] -+#CHECK: vmxh %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0xff] -+#CHECK: vmxh %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x1a,0xff] -+ -+ vmxh %v0, %v0, %v0 -+ vmxh %v0, %v0, %v31 -+ vmxh %v0, %v31, %v0 -+ vmxh %v31, %v0, %v0 -+ vmxh %v18, %v3, %v20 -+ -+#CHECK: vmxlb %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0xfd] -+#CHECK: vmxlb %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0xfd] -+#CHECK: vmxlb %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xfd] -+#CHECK: vmxlb %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xfd] -+#CHECK: vmxlb %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x0a,0xfd] -+ -+ vmxlb %v0, %v0, %v0 -+ vmxlb %v0, %v0, %v31 -+ vmxlb %v0, %v31, %v0 -+ vmxlb %v31, %v0, %v0 -+ vmxlb %v18, %v3, %v20 -+ -+#CHECK: vmxlf %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0xfd] -+#CHECK: vmxlf %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x22,0xfd] -+#CHECK: vmxlf %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xfd] -+#CHECK: vmxlf %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xfd] -+#CHECK: vmxlf %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x2a,0xfd] -+ -+ vmxlf %v0, %v0, %v0 -+ vmxlf %v0, %v0, %v31 -+ vmxlf %v0, %v31, %v0 -+ vmxlf %v31, %v0, %v0 -+ vmxlf %v18, %v3, %v20 -+ -+#CHECK: vmxlg %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0xfd] -+#CHECK: vmxlg %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x32,0xfd] -+#CHECK: vmxlg %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xfd] -+#CHECK: vmxlg %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xfd] -+#CHECK: vmxlg %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x3a,0xfd] -+ -+ vmxlg %v0, %v0, %v0 -+ vmxlg %v0, %v0, %v31 -+ vmxlg %v0, %v31, %v0 -+ vmxlg %v31, %v0, %v0 -+ vmxlg %v18, %v3, %v20 -+ -+#CHECK: vmxlh %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0xfd] -+#CHECK: vmxlh %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x12,0xfd] -+#CHECK: vmxlh %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0xfd] -+#CHECK: vmxlh %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0xfd] -+#CHECK: vmxlh %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x1a,0xfd] -+ -+ vmxlh %v0, %v0, %v0 -+ vmxlh %v0, %v0, %v31 -+ vmxlh %v0, %v31, %v0 -+ vmxlh %v31, %v0, %v0 -+ vmxlh %v18, %v3, %v20 -+ -+#CHECK: vn %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x68] -+#CHECK: vn %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0x68] -+#CHECK: vn %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x68] -+#CHECK: vn %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x68] -+#CHECK: vn %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x0a,0x68] -+ -+ vn %v0, %v0, %v0 -+ vn %v0, %v0, %v31 -+ vn %v0, %v31, %v0 -+ vn %v31, %v0, %v0 -+ vn %v18, %v3, %v20 -+ -+#CHECK: vnc %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x69] -+#CHECK: vnc %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0x69] -+#CHECK: vnc %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x69] -+#CHECK: vnc %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x69] -+#CHECK: vnc %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x0a,0x69] -+ -+ vnc %v0, %v0, %v0 -+ vnc %v0, %v0, %v31 -+ vnc %v0, %v31, %v0 -+ vnc %v31, %v0, %v0 -+ vnc %v18, %v3, %v20 -+ -+#CHECK: vno %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x6b] -+#CHECK: vno %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0x6b] -+#CHECK: vno %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x6b] -+#CHECK: vno %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x6b] -+#CHECK: vno %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x0a,0x6b] -+ -+ vno %v0, %v0, %v0 -+ vno %v0, %v0, %v31 -+ vno %v0, %v31, %v0 -+ vno %v31, %v0, %v0 -+ vno %v18, %v3, %v20 -+ -+#CHECK: vo %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x6a] -+#CHECK: vo %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0x6a] -+#CHECK: vo %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x6a] -+#CHECK: vo %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x6a] -+#CHECK: vo %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x0a,0x6a] -+ -+ vo %v0, %v0, %v0 -+ vo %v0, %v0, %v31 -+ vo %v0, %v31, %v0 -+ vo %v31, %v0, %v0 -+ vo %v18, %v3, %v20 -+ -+#CHECK: vone %v0 # encoding: [0xe7,0x00,0xff,0xff,0x00,0x44] -+#CHECK: vone %v15 # encoding: [0xe7,0xf0,0xff,0xff,0x00,0x44] -+#CHECK: vone %v22 # encoding: [0xe7,0x60,0xff,0xff,0x08,0x44] -+#CHECK: vone %v31 # encoding: [0xe7,0xf0,0xff,0xff,0x08,0x44] -+ -+ vone %v0 -+ vone %v15 -+ vone %v22 -+ vone %v31 -+ -+#CHECK: vpdi %v0, %v0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x84] -+#CHECK: vpdi %v0, %v0, %v0, 5 # encoding: [0xe7,0x00,0x00,0x00,0x50,0x84] -+#CHECK: vpdi %v0, %v0, %v31, 0 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0x84] -+#CHECK: vpdi %v0, %v31, %v0, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x84] -+#CHECK: vpdi %v31, %v0, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x84] -+#CHECK: vpdi %v13, %v17, %v21, 4 # encoding: [0xe7,0xd1,0x50,0x00,0x46,0x84] -+ -+ vpdi %v0, %v0, %v0, 0 -+ vpdi %v0, %v0, %v0, 5 -+ vpdi %v0, %v0, %v31, 0 -+ vpdi %v0, %v31, %v0, 0 -+ vpdi %v31, %v0, %v0, 0 -+ vpdi %v13, %v17, %v21, 4 -+ -+#CHECK: vperm %v0, %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x8c] -+#CHECK: vperm %v0, %v0, %v0, %v31 # encoding: [0xe7,0x00,0x00,0x00,0xf1,0x8c] -+#CHECK: vperm %v0, %v0, %v31, %v0 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0x8c] -+#CHECK: vperm %v0, %v31, %v0, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x8c] -+#CHECK: vperm %v31, %v0, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x8c] -+#CHECK: vperm %v13, %v17, %v21, %v25 # encoding: [0xe7,0xd1,0x50,0x00,0x97,0x8c] -+ -+ vperm %v0, %v0, %v0, %v0 -+ vperm %v0, %v0, %v0, %v31 -+ vperm %v0, %v0, %v31, %v0 -+ vperm %v0, %v31, %v0, %v0 -+ vperm %v31, %v0, %v0, %v0 -+ vperm %v13, %v17, %v21, %v25 -+ -+#CHECK: vpkf %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0x94] -+#CHECK: vpkf %v0, %v0, %v15 # encoding: [0xe7,0x00,0xf0,0x00,0x20,0x94] -+#CHECK: vpkf %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x22,0x94] -+#CHECK: vpkf %v0, %v15, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x20,0x94] -+#CHECK: vpkf %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0x94] -+#CHECK: vpkf %v15, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x20,0x94] -+#CHECK: vpkf %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0x94] -+#CHECK: vpkf %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x2a,0x94] -+ -+ vpkf %v0, %v0, %v0 -+ vpkf %v0, %v0, %v15 -+ vpkf %v0, %v0, %v31 -+ vpkf %v0, %v15, %v0 -+ vpkf %v0, %v31, %v0 -+ vpkf %v15, %v0, %v0 -+ vpkf %v31, %v0, %v0 -+ vpkf %v18, %v3, %v20 -+ -+#CHECK: vpkg %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0x94] -+#CHECK: vpkg %v0, %v0, %v15 # encoding: [0xe7,0x00,0xf0,0x00,0x30,0x94] -+#CHECK: vpkg %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x32,0x94] -+#CHECK: vpkg %v0, %v15, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x30,0x94] -+#CHECK: vpkg %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0x94] -+#CHECK: vpkg %v15, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x30,0x94] -+#CHECK: vpkg %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0x94] -+#CHECK: vpkg %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x3a,0x94] -+ -+ vpkg %v0, %v0, %v0 -+ vpkg %v0, %v0, %v15 -+ vpkg %v0, %v0, %v31 -+ vpkg %v0, %v15, %v0 -+ vpkg %v0, %v31, %v0 -+ vpkg %v15, %v0, %v0 -+ vpkg %v31, %v0, %v0 -+ vpkg %v18, %v3, %v20 -+ -+#CHECK: vpkh %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0x94] -+#CHECK: vpkh %v0, %v0, %v15 # encoding: [0xe7,0x00,0xf0,0x00,0x10,0x94] -+#CHECK: vpkh %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x12,0x94] -+#CHECK: vpkh %v0, %v15, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x10,0x94] -+#CHECK: vpkh %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0x94] -+#CHECK: vpkh %v15, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x10,0x94] -+#CHECK: vpkh %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0x94] -+#CHECK: vpkh %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x1a,0x94] -+ -+ vpkh %v0, %v0, %v0 -+ vpkh %v0, %v0, %v15 -+ vpkh %v0, %v0, %v31 -+ vpkh %v0, %v15, %v0 -+ vpkh %v0, %v31, %v0 -+ vpkh %v15, %v0, %v0 -+ vpkh %v31, %v0, %v0 -+ vpkh %v18, %v3, %v20 -+ -+#CHECK: vpklsf %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0x95] -+#CHECK: vpklsf %v0, %v0, %v15 # encoding: [0xe7,0x00,0xf0,0x00,0x20,0x95] -+#CHECK: vpklsf %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x22,0x95] -+#CHECK: vpklsf %v0, %v15, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x20,0x95] -+#CHECK: vpklsf %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0x95] -+#CHECK: vpklsf %v15, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x20,0x95] -+#CHECK: vpklsf %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0x95] -+#CHECK: vpklsf %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x2a,0x95] -+#CHECK: vpklsfs %v5, %v22, %v7 # encoding: [0xe7,0x56,0x70,0x10,0x24,0x95] -+ -+ vpklsf %v0, %v0, %v0 -+ vpklsf %v0, %v0, %v15 -+ vpklsf %v0, %v0, %v31 -+ vpklsf %v0, %v15, %v0 -+ vpklsf %v0, %v31, %v0 -+ vpklsf %v15, %v0, %v0 -+ vpklsf %v31, %v0, %v0 -+ vpklsf %v18, %v3, %v20 -+ vpklsfs %v5, %v22, %v7 -+ -+#CHECK: vpklsg %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0x95] -+#CHECK: vpklsg %v0, %v0, %v15 # encoding: [0xe7,0x00,0xf0,0x00,0x30,0x95] -+#CHECK: vpklsg %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x32,0x95] -+#CHECK: vpklsg %v0, %v15, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x30,0x95] -+#CHECK: vpklsg %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0x95] -+#CHECK: vpklsg %v15, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x30,0x95] -+#CHECK: vpklsg %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0x95] -+#CHECK: vpklsg %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x3a,0x95] -+#CHECK: vpklsgs %v5, %v22, %v7 # encoding: [0xe7,0x56,0x70,0x10,0x34,0x95] -+ -+ vpklsg %v0, %v0, %v0 -+ vpklsg %v0, %v0, %v15 -+ vpklsg %v0, %v0, %v31 -+ vpklsg %v0, %v15, %v0 -+ vpklsg %v0, %v31, %v0 -+ vpklsg %v15, %v0, %v0 -+ vpklsg %v31, %v0, %v0 -+ vpklsg %v18, %v3, %v20 -+ vpklsgs %v5, %v22, %v7 -+ -+#CHECK: vpklsh %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0x95] -+#CHECK: vpklsh %v0, %v0, %v15 # encoding: [0xe7,0x00,0xf0,0x00,0x10,0x95] -+#CHECK: vpklsh %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x12,0x95] -+#CHECK: vpklsh %v0, %v15, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x10,0x95] -+#CHECK: vpklsh %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0x95] -+#CHECK: vpklsh %v15, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x10,0x95] -+#CHECK: vpklsh %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0x95] -+#CHECK: vpklsh %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x1a,0x95] -+#CHECK: vpklshs %v5, %v22, %v7 # encoding: [0xe7,0x56,0x70,0x10,0x14,0x95] -+ -+ vpklsh %v0, %v0, %v0 -+ vpklsh %v0, %v0, %v15 -+ vpklsh %v0, %v0, %v31 -+ vpklsh %v0, %v15, %v0 -+ vpklsh %v0, %v31, %v0 -+ vpklsh %v15, %v0, %v0 -+ vpklsh %v31, %v0, %v0 -+ vpklsh %v18, %v3, %v20 -+ vpklshs %v5, %v22, %v7 -+ -+#CHECK: vpksf %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0x97] -+#CHECK: vpksf %v0, %v0, %v15 # encoding: [0xe7,0x00,0xf0,0x00,0x20,0x97] -+#CHECK: vpksf %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x22,0x97] -+#CHECK: vpksf %v0, %v15, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x20,0x97] -+#CHECK: vpksf %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0x97] -+#CHECK: vpksf %v15, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x20,0x97] -+#CHECK: vpksf %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0x97] -+#CHECK: vpksf %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x2a,0x97] -+#CHECK: vpksfs %v5, %v22, %v7 # encoding: [0xe7,0x56,0x70,0x10,0x24,0x97] -+ -+ vpksf %v0, %v0, %v0 -+ vpksf %v0, %v0, %v15 -+ vpksf %v0, %v0, %v31 -+ vpksf %v0, %v15, %v0 -+ vpksf %v0, %v31, %v0 -+ vpksf %v15, %v0, %v0 -+ vpksf %v31, %v0, %v0 -+ vpksf %v18, %v3, %v20 -+ vpksfs %v5, %v22, %v7 -+ -+#CHECK: vpksg %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0x97] -+#CHECK: vpksg %v0, %v0, %v15 # encoding: [0xe7,0x00,0xf0,0x00,0x30,0x97] -+#CHECK: vpksg %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x32,0x97] -+#CHECK: vpksg %v0, %v15, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x30,0x97] -+#CHECK: vpksg %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0x97] -+#CHECK: vpksg %v15, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x30,0x97] -+#CHECK: vpksg %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0x97] -+#CHECK: vpksg %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x3a,0x97] -+#CHECK: vpksgs %v5, %v22, %v7 # encoding: [0xe7,0x56,0x70,0x10,0x34,0x97] -+ -+ vpksg %v0, %v0, %v0 -+ vpksg %v0, %v0, %v15 -+ vpksg %v0, %v0, %v31 -+ vpksg %v0, %v15, %v0 -+ vpksg %v0, %v31, %v0 -+ vpksg %v15, %v0, %v0 -+ vpksg %v31, %v0, %v0 -+ vpksg %v18, %v3, %v20 -+ vpksgs %v5, %v22, %v7 -+ -+#CHECK: vpksh %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0x97] -+#CHECK: vpksh %v0, %v0, %v15 # encoding: [0xe7,0x00,0xf0,0x00,0x10,0x97] -+#CHECK: vpksh %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x12,0x97] -+#CHECK: vpksh %v0, %v15, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x10,0x97] -+#CHECK: vpksh %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0x97] -+#CHECK: vpksh %v15, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x10,0x97] -+#CHECK: vpksh %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0x97] -+#CHECK: vpksh %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x1a,0x97] -+#CHECK: vpkshs %v5, %v22, %v7 # encoding: [0xe7,0x56,0x70,0x10,0x14,0x97] -+ -+ vpksh %v0, %v0, %v0 -+ vpksh %v0, %v0, %v15 -+ vpksh %v0, %v0, %v31 -+ vpksh %v0, %v15, %v0 -+ vpksh %v0, %v31, %v0 -+ vpksh %v15, %v0, %v0 -+ vpksh %v31, %v0, %v0 -+ vpksh %v18, %v3, %v20 -+ vpkshs %v5, %v22, %v7 -+ -+#CHECK: vpopct %v0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x50] -+#CHECK: vpopct %v0, %v15, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x00,0x50] -+#CHECK: vpopct %v0, %v31, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x50] -+#CHECK: vpopct %v15, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x50] -+#CHECK: vpopct %v31, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x50] -+#CHECK: vpopct %v14, %v17, 0 # encoding: [0xe7,0xe1,0x00,0x00,0x04,0x50] -+ -+ vpopct %v0, %v0, 0 -+ vpopct %v0, %v15, 0 -+ vpopct %v0, %v31, 0 -+ vpopct %v15, %v0, 0 -+ vpopct %v31, %v0, 0 -+ vpopct %v14, %v17, 0 -+ -+#CHECK: vrepb %v0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x4d] -+#CHECK: vrepb %v0, %v0, 65535 # encoding: [0xe7,0x00,0xff,0xff,0x00,0x4d] -+#CHECK: vrepb %v0, %v15, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x00,0x4d] -+#CHECK: vrepb %v0, %v31, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x4d] -+#CHECK: vrepb %v15, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x4d] -+#CHECK: vrepb %v31, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x4d] -+#CHECK: vrepb %v4, %v21, 26505 # encoding: [0xe7,0x45,0x67,0x89,0x04,0x4d] -+ -+ vrepb %v0, %v0, 0 -+ vrepb %v0, %v0, 65535 -+ vrepb %v0, %v15, 0 -+ vrepb %v0, %v31, 0 -+ vrepb %v15, %v0, 0 -+ vrepb %v31, %v0, 0 -+ vrepb %v4, %v21, 0x6789 -+ -+#CHECK: vrepf %v0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0x4d] -+#CHECK: vrepf %v0, %v0, 65535 # encoding: [0xe7,0x00,0xff,0xff,0x20,0x4d] -+#CHECK: vrepf %v0, %v15, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x20,0x4d] -+#CHECK: vrepf %v0, %v31, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0x4d] -+#CHECK: vrepf %v15, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x20,0x4d] -+#CHECK: vrepf %v31, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0x4d] -+#CHECK: vrepf %v4, %v21, 26505 # encoding: [0xe7,0x45,0x67,0x89,0x24,0x4d] -+ -+ vrepf %v0, %v0, 0 -+ vrepf %v0, %v0, 65535 -+ vrepf %v0, %v15, 0 -+ vrepf %v0, %v31, 0 -+ vrepf %v15, %v0, 0 -+ vrepf %v31, %v0, 0 -+ vrepf %v4, %v21, 0x6789 -+ -+#CHECK: vrepg %v0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0x4d] -+#CHECK: vrepg %v0, %v0, 65535 # encoding: [0xe7,0x00,0xff,0xff,0x30,0x4d] -+#CHECK: vrepg %v0, %v15, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x30,0x4d] -+#CHECK: vrepg %v0, %v31, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0x4d] -+#CHECK: vrepg %v15, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x30,0x4d] -+#CHECK: vrepg %v31, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0x4d] -+#CHECK: vrepg %v4, %v21, 26505 # encoding: [0xe7,0x45,0x67,0x89,0x34,0x4d] -+ -+ vrepg %v0, %v0, 0 -+ vrepg %v0, %v0, 65535 -+ vrepg %v0, %v15, 0 -+ vrepg %v0, %v31, 0 -+ vrepg %v15, %v0, 0 -+ vrepg %v31, %v0, 0 -+ vrepg %v4, %v21, 0x6789 -+ -+#CHECK: vreph %v0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0x4d] -+#CHECK: vreph %v0, %v0, 65535 # encoding: [0xe7,0x00,0xff,0xff,0x10,0x4d] -+#CHECK: vreph %v0, %v15, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x10,0x4d] -+#CHECK: vreph %v0, %v31, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0x4d] -+#CHECK: vreph %v15, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x10,0x4d] -+#CHECK: vreph %v31, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0x4d] -+#CHECK: vreph %v4, %v21, 26505 # encoding: [0xe7,0x45,0x67,0x89,0x14,0x4d] -+ -+ vreph %v0, %v0, 0 -+ vreph %v0, %v0, 65535 -+ vreph %v0, %v15, 0 -+ vreph %v0, %v31, 0 -+ vreph %v15, %v0, 0 -+ vreph %v31, %v0, 0 -+ vreph %v4, %v21, 0x6789 -+ -+#CHECK: vrepib %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x45] -+#CHECK: vrepib %v0, -32768 # encoding: [0xe7,0x00,0x80,0x00,0x00,0x45] -+#CHECK: vrepib %v0, 32767 # encoding: [0xe7,0x00,0x7f,0xff,0x00,0x45] -+#CHECK: vrepib %v15, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x45] -+#CHECK: vrepib %v31, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x45] -+#CHECK: vrepib %v18, 13398 # encoding: [0xe7,0x20,0x34,0x56,0x08,0x45] -+ -+ vrepib %v0, 0 -+ vrepib %v0, -32768 -+ vrepib %v0, 32767 -+ vrepib %v15, 0 -+ vrepib %v31, 0 -+ vrepib %v18, 0x3456 -+ -+#CHECK: vrepif %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0x45] -+#CHECK: vrepif %v0, -32768 # encoding: [0xe7,0x00,0x80,0x00,0x20,0x45] -+#CHECK: vrepif %v0, 32767 # encoding: [0xe7,0x00,0x7f,0xff,0x20,0x45] -+#CHECK: vrepif %v15, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x20,0x45] -+#CHECK: vrepif %v31, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0x45] -+#CHECK: vrepif %v18, 13398 # encoding: [0xe7,0x20,0x34,0x56,0x28,0x45] -+ -+ vrepif %v0, 0 -+ vrepif %v0, -32768 -+ vrepif %v0, 32767 -+ vrepif %v15, 0 -+ vrepif %v31, 0 -+ vrepif %v18, 0x3456 -+ -+#CHECK: vrepig %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0x45] -+#CHECK: vrepig %v0, -32768 # encoding: [0xe7,0x00,0x80,0x00,0x30,0x45] -+#CHECK: vrepig %v0, 32767 # encoding: [0xe7,0x00,0x7f,0xff,0x30,0x45] -+#CHECK: vrepig %v15, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x30,0x45] -+#CHECK: vrepig %v31, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0x45] -+#CHECK: vrepig %v18, 13398 # encoding: [0xe7,0x20,0x34,0x56,0x38,0x45] -+ -+ vrepig %v0, 0 -+ vrepig %v0, -32768 -+ vrepig %v0, 32767 -+ vrepig %v15, 0 -+ vrepig %v31, 0 -+ vrepig %v18, 0x3456 -+ -+#CHECK: vrepih %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0x45] -+#CHECK: vrepih %v0, -32768 # encoding: [0xe7,0x00,0x80,0x00,0x10,0x45] -+#CHECK: vrepih %v0, 32767 # encoding: [0xe7,0x00,0x7f,0xff,0x10,0x45] -+#CHECK: vrepih %v15, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x10,0x45] -+#CHECK: vrepih %v31, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0x45] -+#CHECK: vrepih %v18, 13398 # encoding: [0xe7,0x20,0x34,0x56,0x18,0x45] -+ -+ vrepih %v0, 0 -+ vrepih %v0, -32768 -+ vrepih %v0, 32767 -+ vrepih %v15, 0 -+ vrepih %v31, 0 -+ vrepih %v18, 0x3456 -+ -+#CHECK: vsb %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0xf7] -+#CHECK: vsb %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0xf7] -+#CHECK: vsb %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xf7] -+#CHECK: vsb %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xf7] -+#CHECK: vsb %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x0a,0xf7] -+ -+ vsb %v0, %v0, %v0 -+ vsb %v0, %v0, %v31 -+ vsb %v0, %v31, %v0 -+ vsb %v31, %v0, %v0 -+ vsb %v18, %v3, %v20 -+ -+#CHECK: vsbcbiq %v0, %v0, %v0, %v0 # encoding: [0xe7,0x00,0x04,0x00,0x00,0xbd] -+#CHECK: vsbcbiq %v0, %v0, %v0, %v31 # encoding: [0xe7,0x00,0x04,0x00,0xf1,0xbd] -+#CHECK: vsbcbiq %v0, %v0, %v31, %v0 # encoding: [0xe7,0x00,0xf4,0x00,0x02,0xbd] -+#CHECK: vsbcbiq %v0, %v31, %v0, %v0 # encoding: [0xe7,0x0f,0x04,0x00,0x04,0xbd] -+#CHECK: vsbcbiq %v31, %v0, %v0, %v0 # encoding: [0xe7,0xf0,0x04,0x00,0x08,0xbd] -+#CHECK: vsbcbiq %v13, %v17, %v21, %v25 # encoding: [0xe7,0xd1,0x54,0x00,0x97,0xbd] -+ -+ vsbcbiq %v0, %v0, %v0, %v0 -+ vsbcbiq %v0, %v0, %v0, %v31 -+ vsbcbiq %v0, %v0, %v31, %v0 -+ vsbcbiq %v0, %v31, %v0, %v0 -+ vsbcbiq %v31, %v0, %v0, %v0 -+ vsbcbiq %v13, %v17, %v21, %v25 -+ -+#CHECK: vsbiq %v0, %v0, %v0, %v0 # encoding: [0xe7,0x00,0x04,0x00,0x00,0xbf] -+#CHECK: vsbiq %v0, %v0, %v0, %v31 # encoding: [0xe7,0x00,0x04,0x00,0xf1,0xbf] -+#CHECK: vsbiq %v0, %v0, %v31, %v0 # encoding: [0xe7,0x00,0xf4,0x00,0x02,0xbf] -+#CHECK: vsbiq %v0, %v31, %v0, %v0 # encoding: [0xe7,0x0f,0x04,0x00,0x04,0xbf] -+#CHECK: vsbiq %v31, %v0, %v0, %v0 # encoding: [0xe7,0xf0,0x04,0x00,0x08,0xbf] -+#CHECK: vsbiq %v13, %v17, %v21, %v25 # encoding: [0xe7,0xd1,0x54,0x00,0x97,0xbf] -+ -+ vsbiq %v0, %v0, %v0, %v0 -+ vsbiq %v0, %v0, %v0, %v31 -+ vsbiq %v0, %v0, %v31, %v0 -+ vsbiq %v0, %v31, %v0, %v0 -+ vsbiq %v31, %v0, %v0, %v0 -+ vsbiq %v13, %v17, %v21, %v25 -+ -+#CHECK: vscbib %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0xf5] -+#CHECK: vscbib %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0xf5] -+#CHECK: vscbib %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xf5] -+#CHECK: vscbib %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xf5] -+#CHECK: vscbib %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x0a,0xf5] -+ -+ vscbib %v0, %v0, %v0 -+ vscbib %v0, %v0, %v31 -+ vscbib %v0, %v31, %v0 -+ vscbib %v31, %v0, %v0 -+ vscbib %v18, %v3, %v20 -+ -+#CHECK: vscbif %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0xf5] -+#CHECK: vscbif %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x22,0xf5] -+#CHECK: vscbif %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xf5] -+#CHECK: vscbif %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xf5] -+#CHECK: vscbif %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x2a,0xf5] -+ -+ vscbif %v0, %v0, %v0 -+ vscbif %v0, %v0, %v31 -+ vscbif %v0, %v31, %v0 -+ vscbif %v31, %v0, %v0 -+ vscbif %v18, %v3, %v20 -+ -+#CHECK: vscbig %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0xf5] -+#CHECK: vscbig %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x32,0xf5] -+#CHECK: vscbig %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xf5] -+#CHECK: vscbig %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xf5] -+#CHECK: vscbig %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x3a,0xf5] -+ -+ vscbig %v0, %v0, %v0 -+ vscbig %v0, %v0, %v31 -+ vscbig %v0, %v31, %v0 -+ vscbig %v31, %v0, %v0 -+ vscbig %v18, %v3, %v20 -+ -+#CHECK: vscbih %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0xf5] -+#CHECK: vscbih %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x12,0xf5] -+#CHECK: vscbih %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0xf5] -+#CHECK: vscbih %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0xf5] -+#CHECK: vscbih %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x1a,0xf5] -+ -+ vscbih %v0, %v0, %v0 -+ vscbih %v0, %v0, %v31 -+ vscbih %v0, %v31, %v0 -+ vscbih %v31, %v0, %v0 -+ vscbih %v18, %v3, %v20 -+ -+#CHECK: vscbiq %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x40,0xf5] -+#CHECK: vscbiq %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x42,0xf5] -+#CHECK: vscbiq %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x44,0xf5] -+#CHECK: vscbiq %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x48,0xf5] -+#CHECK: vscbiq %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x4a,0xf5] -+ -+ vscbiq %v0, %v0, %v0 -+ vscbiq %v0, %v0, %v31 -+ vscbiq %v0, %v31, %v0 -+ vscbiq %v31, %v0, %v0 -+ vscbiq %v18, %v3, %v20 -+ -+#CHECK: vscef %v0, 0(%v0), 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x1b] -+#CHECK: vscef %v0, 0(%v0,%r1), 0 # encoding: [0xe7,0x00,0x10,0x00,0x00,0x1b] -+#CHECK: vscef %v0, 0(%v0,%r1), 3 # encoding: [0xe7,0x00,0x10,0x00,0x30,0x1b] -+#CHECK: vscef %v0, 0(%v0,%r15), 0 # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x1b] -+#CHECK: vscef %v0, 0(%v15,%r1), 0 # encoding: [0xe7,0x0f,0x10,0x00,0x00,0x1b] -+#CHECK: vscef %v0, 0(%v31,%r1), 0 # encoding: [0xe7,0x0f,0x10,0x00,0x04,0x1b] -+#CHECK: vscef %v0, 4095(%v0,%r1), 0 # encoding: [0xe7,0x00,0x1f,0xff,0x00,0x1b] -+#CHECK: vscef %v15, 0(%v0,%r1), 0 # encoding: [0xe7,0xf0,0x10,0x00,0x00,0x1b] -+#CHECK: vscef %v31, 0(%v0,%r1), 0 # encoding: [0xe7,0xf0,0x10,0x00,0x08,0x1b] -+#CHECK: vscef %v10, 1000(%v19,%r7), 1 # encoding: [0xe7,0xa3,0x73,0xe8,0x14,0x1b] -+ -+ vscef %v0, 0(%v0), 0 -+ vscef %v0, 0(%v0,%r1), 0 -+ vscef %v0, 0(%v0,%r1), 3 -+ vscef %v0, 0(%v0,%r15), 0 -+ vscef %v0, 0(%v15,%r1), 0 -+ vscef %v0, 0(%v31,%r1), 0 -+ vscef %v0, 4095(%v0, %r1), 0 -+ vscef %v15, 0(%v0,%r1), 0 -+ vscef %v31, 0(%v0,%r1), 0 -+ vscef %v10, 1000(%v19,%r7), 1 -+ -+#CHECK: vsceg %v0, 0(%v0), 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x1a] -+#CHECK: vsceg %v0, 0(%v0,%r1), 0 # encoding: [0xe7,0x00,0x10,0x00,0x00,0x1a] -+#CHECK: vsceg %v0, 0(%v0,%r1), 1 # encoding: [0xe7,0x00,0x10,0x00,0x10,0x1a] -+#CHECK: vsceg %v0, 0(%v0,%r15), 0 # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x1a] -+#CHECK: vsceg %v0, 0(%v15,%r1), 0 # encoding: [0xe7,0x0f,0x10,0x00,0x00,0x1a] -+#CHECK: vsceg %v0, 0(%v31,%r1), 0 # encoding: [0xe7,0x0f,0x10,0x00,0x04,0x1a] -+#CHECK: vsceg %v0, 4095(%v0,%r1), 0 # encoding: [0xe7,0x00,0x1f,0xff,0x00,0x1a] -+#CHECK: vsceg %v15, 0(%v0,%r1), 0 # encoding: [0xe7,0xf0,0x10,0x00,0x00,0x1a] -+#CHECK: vsceg %v31, 0(%v0,%r1), 0 # encoding: [0xe7,0xf0,0x10,0x00,0x08,0x1a] -+#CHECK: vsceg %v10, 1000(%v19,%r7), 1 # encoding: [0xe7,0xa3,0x73,0xe8,0x14,0x1a] -+ -+ vsceg %v0, 0(%v0), 0 -+ vsceg %v0, 0(%v0,%r1), 0 -+ vsceg %v0, 0(%v0,%r1), 1 -+ vsceg %v0, 0(%v0,%r15), 0 -+ vsceg %v0, 0(%v15,%r1), 0 -+ vsceg %v0, 0(%v31,%r1), 0 -+ vsceg %v0, 4095(%v0,%r1), 0 -+ vsceg %v15, 0(%v0,%r1), 0 -+ vsceg %v31, 0(%v0,%r1), 0 -+ vsceg %v10, 1000(%v19,%r7), 1 -+ -+#CHECK: vsel %v0, %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x8d] -+#CHECK: vsel %v0, %v0, %v0, %v31 # encoding: [0xe7,0x00,0x00,0x00,0xf1,0x8d] -+#CHECK: vsel %v0, %v0, %v31, %v0 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0x8d] -+#CHECK: vsel %v0, %v31, %v0, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x8d] -+#CHECK: vsel %v31, %v0, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x8d] -+#CHECK: vsel %v13, %v17, %v21, %v25 # encoding: [0xe7,0xd1,0x50,0x00,0x97,0x8d] -+ -+ vsel %v0, %v0, %v0, %v0 -+ vsel %v0, %v0, %v0, %v31 -+ vsel %v0, %v0, %v31, %v0 -+ vsel %v0, %v31, %v0, %v0 -+ vsel %v31, %v0, %v0, %v0 -+ vsel %v13, %v17, %v21, %v25 -+ -+#CHECK: vsegb %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x5f] -+#CHECK: vsegb %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x00,0x5f] -+#CHECK: vsegb %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x5f] -+#CHECK: vsegb %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x5f] -+#CHECK: vsegb %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x5f] -+#CHECK: vsegb %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x04,0x5f] -+ -+ vsegb %v0, %v0 -+ vsegb %v0, %v15 -+ vsegb %v0, %v31 -+ vsegb %v15, %v0 -+ vsegb %v31, %v0 -+ vsegb %v14, %v17 -+ -+#CHECK: vsegf %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0x5f] -+#CHECK: vsegf %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x20,0x5f] -+#CHECK: vsegf %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0x5f] -+#CHECK: vsegf %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x20,0x5f] -+#CHECK: vsegf %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0x5f] -+#CHECK: vsegf %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x24,0x5f] -+ -+ vsegf %v0, %v0 -+ vsegf %v0, %v15 -+ vsegf %v0, %v31 -+ vsegf %v15, %v0 -+ vsegf %v31, %v0 -+ vsegf %v14, %v17 -+ -+#CHECK: vsegh %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0x5f] -+#CHECK: vsegh %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x10,0x5f] -+#CHECK: vsegh %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0x5f] -+#CHECK: vsegh %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x10,0x5f] -+#CHECK: vsegh %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0x5f] -+#CHECK: vsegh %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x14,0x5f] -+ -+ vsegh %v0, %v0 -+ vsegh %v0, %v15 -+ vsegh %v0, %v31 -+ vsegh %v15, %v0 -+ vsegh %v31, %v0 -+ vsegh %v14, %v17 -+ -+#CHECK: vsf %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0xf7] -+#CHECK: vsf %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x22,0xf7] -+#CHECK: vsf %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xf7] -+#CHECK: vsf %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xf7] -+#CHECK: vsf %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x2a,0xf7] -+ -+ vsf %v0, %v0, %v0 -+ vsf %v0, %v0, %v31 -+ vsf %v0, %v31, %v0 -+ vsf %v31, %v0, %v0 -+ vsf %v18, %v3, %v20 -+ -+#CHECK: vsg %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0xf7] -+#CHECK: vsg %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x32,0xf7] -+#CHECK: vsg %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xf7] -+#CHECK: vsg %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xf7] -+#CHECK: vsg %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x3a,0xf7] -+ -+ vsg %v0, %v0, %v0 -+ vsg %v0, %v0, %v31 -+ vsg %v0, %v31, %v0 -+ vsg %v31, %v0, %v0 -+ vsg %v18, %v3, %v20 -+ -+#CHECK: vsh %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0xf7] -+#CHECK: vsh %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x12,0xf7] -+#CHECK: vsh %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0xf7] -+#CHECK: vsh %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0xf7] -+#CHECK: vsh %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x1a,0xf7] -+ -+ vsh %v0, %v0, %v0 -+ vsh %v0, %v0, %v31 -+ vsh %v0, %v31, %v0 -+ vsh %v31, %v0, %v0 -+ vsh %v18, %v3, %v20 -+ -+#CHECK: vsl %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x74] -+#CHECK: vsl %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0x74] -+#CHECK: vsl %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x74] -+#CHECK: vsl %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x74] -+#CHECK: vsl %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x0a,0x74] -+ -+ vsl %v0, %v0, %v0 -+ vsl %v0, %v0, %v31 -+ vsl %v0, %v31, %v0 -+ vsl %v31, %v0, %v0 -+ vsl %v18, %v3, %v20 -+ -+#CHECK: vslb %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x75] -+#CHECK: vslb %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0x75] -+#CHECK: vslb %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x75] -+#CHECK: vslb %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x75] -+#CHECK: vslb %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x0a,0x75] -+ -+ vslb %v0, %v0, %v0 -+ vslb %v0, %v0, %v31 -+ vslb %v0, %v31, %v0 -+ vslb %v31, %v0, %v0 -+ vslb %v18, %v3, %v20 -+ -+#CHECK: vsldb %v0, %v0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x77] -+#CHECK: vsldb %v0, %v0, %v0, 255 # encoding: [0xe7,0x00,0x00,0xff,0x00,0x77] -+#CHECK: vsldb %v0, %v0, %v31, 0 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0x77] -+#CHECK: vsldb %v0, %v31, %v0, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x77] -+#CHECK: vsldb %v31, %v0, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x77] -+#CHECK: vsldb %v13, %v17, %v21, 121 # encoding: [0xe7,0xd1,0x50,0x79,0x06,0x77] -+ -+ vsldb %v0, %v0, %v0, 0 -+ vsldb %v0, %v0, %v0, 255 -+ vsldb %v0, %v0, %v31, 0 -+ vsldb %v0, %v31, %v0, 0 -+ vsldb %v31, %v0, %v0, 0 -+ vsldb %v13, %v17, %v21, 0x79 -+ -+#CHECK: vsq %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x40,0xf7] -+#CHECK: vsq %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x42,0xf7] -+#CHECK: vsq %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x44,0xf7] -+#CHECK: vsq %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x48,0xf7] -+#CHECK: vsq %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x4a,0xf7] -+ -+ vsq %v0, %v0, %v0 -+ vsq %v0, %v0, %v31 -+ vsq %v0, %v31, %v0 -+ vsq %v31, %v0, %v0 -+ vsq %v18, %v3, %v20 -+ -+#CHECK: vsra %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x7e] -+#CHECK: vsra %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0x7e] -+#CHECK: vsra %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x7e] -+#CHECK: vsra %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x7e] -+#CHECK: vsra %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x0a,0x7e] -+ -+ vsra %v0, %v0, %v0 -+ vsra %v0, %v0, %v31 -+ vsra %v0, %v31, %v0 -+ vsra %v31, %v0, %v0 -+ vsra %v18, %v3, %v20 -+ -+#CHECK: vsrab %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x7f] -+#CHECK: vsrab %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0x7f] -+#CHECK: vsrab %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x7f] -+#CHECK: vsrab %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x7f] -+#CHECK: vsrab %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x0a,0x7f] -+ -+ vsrab %v0, %v0, %v0 -+ vsrab %v0, %v0, %v31 -+ vsrab %v0, %v31, %v0 -+ vsrab %v31, %v0, %v0 -+ vsrab %v18, %v3, %v20 -+ -+#CHECK: vsrl %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x7c] -+#CHECK: vsrl %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0x7c] -+#CHECK: vsrl %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x7c] -+#CHECK: vsrl %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x7c] -+#CHECK: vsrl %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x0a,0x7c] -+ -+ vsrl %v0, %v0, %v0 -+ vsrl %v0, %v0, %v31 -+ vsrl %v0, %v31, %v0 -+ vsrl %v31, %v0, %v0 -+ vsrl %v18, %v3, %v20 -+ -+#CHECK: vsrlb %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x7d] -+#CHECK: vsrlb %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0x7d] -+#CHECK: vsrlb %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x7d] -+#CHECK: vsrlb %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x7d] -+#CHECK: vsrlb %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x0a,0x7d] -+ -+ vsrlb %v0, %v0, %v0 -+ vsrlb %v0, %v0, %v31 -+ vsrlb %v0, %v31, %v0 -+ vsrlb %v31, %v0, %v0 -+ vsrlb %v18, %v3, %v20 -+ -+#CHECK: vst %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x0e] -+#CHECK: vst %v0, 4095 # encoding: [0xe7,0x00,0x0f,0xff,0x00,0x0e] -+#CHECK: vst %v0, 0(%r15) # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x0e] -+#CHECK: vst %v0, 0(%r15,%r1) # encoding: [0xe7,0x0f,0x10,0x00,0x00,0x0e] -+#CHECK: vst %v15, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x0e] -+#CHECK: vst %v31, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x0e] -+#CHECK: vst %v18, 1383(%r3,%r4) # encoding: [0xe7,0x23,0x45,0x67,0x08,0x0e] -+ -+ vst %v0, 0 -+ vst %v0, 4095 -+ vst %v0, 0(%r15) -+ vst %v0, 0(%r15,%r1) -+ vst %v15, 0 -+ vst %v31, 0 -+ vst %v18, 0x567(%r3,%r4) -+ -+#CHECK: vsteb %v0, 0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x08] -+#CHECK: vsteb %v0, 0, 15 # encoding: [0xe7,0x00,0x00,0x00,0xf0,0x08] -+#CHECK: vsteb %v0, 4095, 0 # encoding: [0xe7,0x00,0x0f,0xff,0x00,0x08] -+#CHECK: vsteb %v0, 0(%r15), 0 # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x08] -+#CHECK: vsteb %v0, 0(%r15,%r1), 0 # encoding: [0xe7,0x0f,0x10,0x00,0x00,0x08] -+#CHECK: vsteb %v15, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x08] -+#CHECK: vsteb %v31, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x08] -+#CHECK: vsteb %v18, 1383(%r3,%r4), 8 # encoding: [0xe7,0x23,0x45,0x67,0x88,0x08] -+ -+ vsteb %v0, 0, 0 -+ vsteb %v0, 0, 15 -+ vsteb %v0, 4095, 0 -+ vsteb %v0, 0(%r15), 0 -+ vsteb %v0, 0(%r15,%r1), 0 -+ vsteb %v15, 0, 0 -+ vsteb %v31, 0, 0 -+ vsteb %v18, 1383(%r3,%r4), 8 -+ -+#CHECK: vstef %v0, 0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x0b] -+#CHECK: vstef %v0, 0, 3 # encoding: [0xe7,0x00,0x00,0x00,0x30,0x0b] -+#CHECK: vstef %v0, 4095, 0 # encoding: [0xe7,0x00,0x0f,0xff,0x00,0x0b] -+#CHECK: vstef %v0, 0(%r15), 0 # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x0b] -+#CHECK: vstef %v0, 0(%r15,%r1), 0 # encoding: [0xe7,0x0f,0x10,0x00,0x00,0x0b] -+#CHECK: vstef %v15, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x0b] -+#CHECK: vstef %v31, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x0b] -+#CHECK: vstef %v18, 1383(%r3,%r4), 2 # encoding: [0xe7,0x23,0x45,0x67,0x28,0x0b] -+ -+ vstef %v0, 0, 0 -+ vstef %v0, 0, 3 -+ vstef %v0, 4095, 0 -+ vstef %v0, 0(%r15), 0 -+ vstef %v0, 0(%r15,%r1), 0 -+ vstef %v15, 0, 0 -+ vstef %v31, 0, 0 -+ vstef %v18, 1383(%r3,%r4), 2 -+ -+#CHECK: vsteg %v0, 0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x0a] -+#CHECK: vsteg %v0, 0, 1 # encoding: [0xe7,0x00,0x00,0x00,0x10,0x0a] -+#CHECK: vsteg %v0, 4095, 0 # encoding: [0xe7,0x00,0x0f,0xff,0x00,0x0a] -+#CHECK: vsteg %v0, 0(%r15), 0 # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x0a] -+#CHECK: vsteg %v0, 0(%r15,%r1), 0 # encoding: [0xe7,0x0f,0x10,0x00,0x00,0x0a] -+#CHECK: vsteg %v15, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x0a] -+#CHECK: vsteg %v31, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x0a] -+#CHECK: vsteg %v18, 1383(%r3,%r4), 1 # encoding: [0xe7,0x23,0x45,0x67,0x18,0x0a] -+ -+ vsteg %v0, 0, 0 -+ vsteg %v0, 0, 1 -+ vsteg %v0, 4095, 0 -+ vsteg %v0, 0(%r15), 0 -+ vsteg %v0, 0(%r15,%r1), 0 -+ vsteg %v15, 0, 0 -+ vsteg %v31, 0, 0 -+ vsteg %v18, 1383(%r3,%r4), 1 -+ -+#CHECK: vsteh %v0, 0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x09] -+#CHECK: vsteh %v0, 0, 7 # encoding: [0xe7,0x00,0x00,0x00,0x70,0x09] -+#CHECK: vsteh %v0, 4095, 0 # encoding: [0xe7,0x00,0x0f,0xff,0x00,0x09] -+#CHECK: vsteh %v0, 0(%r15), 0 # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x09] -+#CHECK: vsteh %v0, 0(%r15,%r1), 0 # encoding: [0xe7,0x0f,0x10,0x00,0x00,0x09] -+#CHECK: vsteh %v15, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x09] -+#CHECK: vsteh %v31, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x09] -+#CHECK: vsteh %v18, 1383(%r3,%r4), 4 # encoding: [0xe7,0x23,0x45,0x67,0x48,0x09] -+ -+ vsteh %v0, 0, 0 -+ vsteh %v0, 0, 7 -+ vsteh %v0, 4095, 0 -+ vsteh %v0, 0(%r15), 0 -+ vsteh %v0, 0(%r15,%r1), 0 -+ vsteh %v15, 0, 0 -+ vsteh %v31, 0, 0 -+ vsteh %v18, 1383(%r3,%r4), 4 -+ -+#CHECK: vstl %v0, %r0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x3f] -+#CHECK: vstl %v0, %r0, 4095 # encoding: [0xe7,0x00,0x0f,0xff,0x00,0x3f] -+#CHECK: vstl %v0, %r0, 0(%r15) # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x3f] -+#CHECK: vstl %v0, %r15, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x00,0x3f] -+#CHECK: vstl %v15, %r0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x3f] -+#CHECK: vstl %v31, %r0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x3f] -+#CHECK: vstl %v18, %r3, 1383(%r4) # encoding: [0xe7,0x23,0x45,0x67,0x08,0x3f] -+ -+ vstl %v0, %r0, 0 -+ vstl %v0, %r0, 4095 -+ vstl %v0, %r0, 0(%r15) -+ vstl %v0, %r15, 0 -+ vstl %v15, %r0, 0 -+ vstl %v31, %r0, 0 -+ vstl %v18, %r3, 1383(%r4) -+ -+#CHECK: vstm %v0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x3e] -+#CHECK: vstm %v0, %v0, 4095 # encoding: [0xe7,0x00,0x0f,0xff,0x00,0x3e] -+#CHECK: vstm %v0, %v0, 0(%r15) # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x3e] -+#CHECK: vstm %v0, %v31, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x3e] -+#CHECK: vstm %v31, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x3e] -+#CHECK: vstm %v14, %v17, 1074(%r5) # encoding: [0xe7,0xe1,0x54,0x32,0x04,0x3e] -+ -+ vstm %v0, %v0, 0 -+ vstm %v0, %v0, 4095 -+ vstm %v0, %v0, 0(%r15) -+ vstm %v0, %v31, 0 -+ vstm %v31, %v0, 0 -+ vstm %v14, %v17, 1074(%r5) -+ -+#CHECK: vstrcb %v0, %v0, %v0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x8a] -+#CHECK: vstrcb %v0, %v0, %v0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x8a] -+#CHECK: vstrcb %v0, %v0, %v0, %v0, 12 # encoding: [0xe7,0x00,0x00,0xc0,0x00,0x8a] -+#CHECK: vstrcb %v0, %v0, %v0, %v15, 0 # encoding: [0xe7,0x00,0x00,0x00,0xf0,0x8a] -+#CHECK: vstrcb %v0, %v0, %v0, %v31, 0 # encoding: [0xe7,0x00,0x00,0x00,0xf1,0x8a] -+#CHECK: vstrcb %v0, %v0, %v15, %v0, 0 # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x8a] -+#CHECK: vstrcb %v0, %v0, %v31, %v0, 0 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0x8a] -+#CHECK: vstrcb %v0, %v15, %v0, %v0, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x00,0x8a] -+#CHECK: vstrcb %v0, %v31, %v0, %v0, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x8a] -+#CHECK: vstrcb %v15, %v0, %v0, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x8a] -+#CHECK: vstrcb %v31, %v0, %v0, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x8a] -+#CHECK: vstrcb %v18, %v3, %v20, %v5, 4 # encoding: [0xe7,0x23,0x40,0x40,0x5a,0x8a] -+#CHECK: vstrcb %v18, %v3, %v20, %v5, 15 # encoding: [0xe7,0x23,0x40,0xf0,0x5a,0x8a] -+#CHECK: vstrcbs %v18, %v3, %v20, %v5, 8 # encoding: [0xe7,0x23,0x40,0x90,0x5a,0x8a] -+#CHECK: vstrczb %v18, %v3, %v20, %v5, 4 # encoding: [0xe7,0x23,0x40,0x60,0x5a,0x8a] -+#CHECK: vstrczbs %v18, %v3, %v20, %v5, 8 # encoding: [0xe7,0x23,0x40,0xb0,0x5a,0x8a] -+#CHECK: vstrczbs %v18, %v3, %v20, %v5, 15 # encoding: [0xe7,0x23,0x40,0xf0,0x5a,0x8a] -+ -+ vstrcb %v0, %v0, %v0, %v0 -+ vstrcb %v0, %v0, %v0, %v0, 0 -+ vstrcb %v0, %v0, %v0, %v0, 12 -+ vstrcb %v0, %v0, %v0, %v15 -+ vstrcb %v0, %v0, %v0, %v31 -+ vstrcb %v0, %v0, %v15, %v0 -+ vstrcb %v0, %v0, %v31, %v0 -+ vstrcb %v0, %v15, %v0, %v0 -+ vstrcb %v0, %v31, %v0, %v0 -+ vstrcb %v15, %v0, %v0, %v0 -+ vstrcb %v31, %v0, %v0, %v0 -+ vstrcb %v18, %v3, %v20, %v5, 4 -+ vstrcb %v18, %v3, %v20, %v5, 15 -+ vstrcbs %v18, %v3, %v20, %v5, 8 -+ vstrczb %v18, %v3, %v20, %v5, 4 -+ vstrczbs %v18, %v3, %v20, %v5, 8 -+ vstrczbs %v18, %v3, %v20, %v5, 15 -+ -+#CHECK: vstrcf %v0, %v0, %v0, %v0, 0 # encoding: [0xe7,0x00,0x02,0x00,0x00,0x8a] -+#CHECK: vstrcf %v0, %v0, %v0, %v0, 0 # encoding: [0xe7,0x00,0x02,0x00,0x00,0x8a] -+#CHECK: vstrcf %v0, %v0, %v0, %v0, 12 # encoding: [0xe7,0x00,0x02,0xc0,0x00,0x8a] -+#CHECK: vstrcf %v0, %v0, %v0, %v15, 0 # encoding: [0xe7,0x00,0x02,0x00,0xf0,0x8a] -+#CHECK: vstrcf %v0, %v0, %v0, %v31, 0 # encoding: [0xe7,0x00,0x02,0x00,0xf1,0x8a] -+#CHECK: vstrcf %v0, %v0, %v15, %v0, 0 # encoding: [0xe7,0x00,0xf2,0x00,0x00,0x8a] -+#CHECK: vstrcf %v0, %v0, %v31, %v0, 0 # encoding: [0xe7,0x00,0xf2,0x00,0x02,0x8a] -+#CHECK: vstrcf %v0, %v15, %v0, %v0, 0 # encoding: [0xe7,0x0f,0x02,0x00,0x00,0x8a] -+#CHECK: vstrcf %v0, %v31, %v0, %v0, 0 # encoding: [0xe7,0x0f,0x02,0x00,0x04,0x8a] -+#CHECK: vstrcf %v15, %v0, %v0, %v0, 0 # encoding: [0xe7,0xf0,0x02,0x00,0x00,0x8a] -+#CHECK: vstrcf %v31, %v0, %v0, %v0, 0 # encoding: [0xe7,0xf0,0x02,0x00,0x08,0x8a] -+#CHECK: vstrcf %v18, %v3, %v20, %v5, 4 # encoding: [0xe7,0x23,0x42,0x40,0x5a,0x8a] -+#CHECK: vstrcf %v18, %v3, %v20, %v5, 15 # encoding: [0xe7,0x23,0x42,0xf0,0x5a,0x8a] -+#CHECK: vstrcfs %v18, %v3, %v20, %v5, 8 # encoding: [0xe7,0x23,0x42,0x90,0x5a,0x8a] -+#CHECK: vstrczf %v18, %v3, %v20, %v5, 4 # encoding: [0xe7,0x23,0x42,0x60,0x5a,0x8a] -+#CHECK: vstrczfs %v18, %v3, %v20, %v5, 8 # encoding: [0xe7,0x23,0x42,0xb0,0x5a,0x8a] -+#CHECK: vstrczfs %v18, %v3, %v20, %v5, 15 # encoding: [0xe7,0x23,0x42,0xf0,0x5a,0x8a] -+ -+ vstrcf %v0, %v0, %v0, %v0 -+ vstrcf %v0, %v0, %v0, %v0, 0 -+ vstrcf %v0, %v0, %v0, %v0, 12 -+ vstrcf %v0, %v0, %v0, %v15 -+ vstrcf %v0, %v0, %v0, %v31 -+ vstrcf %v0, %v0, %v15, %v0 -+ vstrcf %v0, %v0, %v31, %v0 -+ vstrcf %v0, %v15, %v0, %v0 -+ vstrcf %v0, %v31, %v0, %v0 -+ vstrcf %v15, %v0, %v0, %v0 -+ vstrcf %v31, %v0, %v0, %v0 -+ vstrcf %v18, %v3, %v20, %v5, 4 -+ vstrcf %v18, %v3, %v20, %v5, 15 -+ vstrcfs %v18, %v3, %v20, %v5, 8 -+ vstrczf %v18, %v3, %v20, %v5, 4 -+ vstrczfs %v18, %v3, %v20, %v5, 8 -+ vstrczfs %v18, %v3, %v20, %v5, 15 -+ -+#CHECK: vstrch %v0, %v0, %v0, %v0, 0 # encoding: [0xe7,0x00,0x01,0x00,0x00,0x8a] -+#CHECK: vstrch %v0, %v0, %v0, %v0, 0 # encoding: [0xe7,0x00,0x01,0x00,0x00,0x8a] -+#CHECK: vstrch %v0, %v0, %v0, %v0, 12 # encoding: [0xe7,0x00,0x01,0xc0,0x00,0x8a] -+#CHECK: vstrch %v0, %v0, %v0, %v15, 0 # encoding: [0xe7,0x00,0x01,0x00,0xf0,0x8a] -+#CHECK: vstrch %v0, %v0, %v0, %v31, 0 # encoding: [0xe7,0x00,0x01,0x00,0xf1,0x8a] -+#CHECK: vstrch %v0, %v0, %v15, %v0, 0 # encoding: [0xe7,0x00,0xf1,0x00,0x00,0x8a] -+#CHECK: vstrch %v0, %v0, %v31, %v0, 0 # encoding: [0xe7,0x00,0xf1,0x00,0x02,0x8a] -+#CHECK: vstrch %v0, %v15, %v0, %v0, 0 # encoding: [0xe7,0x0f,0x01,0x00,0x00,0x8a] -+#CHECK: vstrch %v0, %v31, %v0, %v0, 0 # encoding: [0xe7,0x0f,0x01,0x00,0x04,0x8a] -+#CHECK: vstrch %v15, %v0, %v0, %v0, 0 # encoding: [0xe7,0xf0,0x01,0x00,0x00,0x8a] -+#CHECK: vstrch %v31, %v0, %v0, %v0, 0 # encoding: [0xe7,0xf0,0x01,0x00,0x08,0x8a] -+#CHECK: vstrch %v18, %v3, %v20, %v5, 4 # encoding: [0xe7,0x23,0x41,0x40,0x5a,0x8a] -+#CHECK: vstrch %v18, %v3, %v20, %v5, 15 # encoding: [0xe7,0x23,0x41,0xf0,0x5a,0x8a] -+#CHECK: vstrchs %v18, %v3, %v20, %v5, 8 # encoding: [0xe7,0x23,0x41,0x90,0x5a,0x8a] -+#CHECK: vstrczh %v18, %v3, %v20, %v5, 4 # encoding: [0xe7,0x23,0x41,0x60,0x5a,0x8a] -+#CHECK: vstrczhs %v18, %v3, %v20, %v5, 8 # encoding: [0xe7,0x23,0x41,0xb0,0x5a,0x8a] -+#CHECK: vstrczhs %v18, %v3, %v20, %v5, 15 # encoding: [0xe7,0x23,0x41,0xf0,0x5a,0x8a] -+ -+ vstrch %v0, %v0, %v0, %v0 -+ vstrch %v0, %v0, %v0, %v0, 0 -+ vstrch %v0, %v0, %v0, %v0, 12 -+ vstrch %v0, %v0, %v0, %v15 -+ vstrch %v0, %v0, %v0, %v31 -+ vstrch %v0, %v0, %v15, %v0 -+ vstrch %v0, %v0, %v31, %v0 -+ vstrch %v0, %v15, %v0, %v0 -+ vstrch %v0, %v31, %v0, %v0 -+ vstrch %v15, %v0, %v0, %v0 -+ vstrch %v31, %v0, %v0, %v0 -+ vstrch %v18, %v3, %v20, %v5, 4 -+ vstrch %v18, %v3, %v20, %v5, 15 -+ vstrchs %v18, %v3, %v20, %v5, 8 -+ vstrczh %v18, %v3, %v20, %v5, 4 -+ vstrczhs %v18, %v3, %v20, %v5, 8 -+ vstrczhs %v18, %v3, %v20, %v5, 15 -+ -+#CHECK: vsumgh %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0x65] -+#CHECK: vsumgh %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x12,0x65] -+#CHECK: vsumgh %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0x65] -+#CHECK: vsumgh %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0x65] -+#CHECK: vsumgh %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x1a,0x65] -+ -+ vsumgh %v0, %v0, %v0 -+ vsumgh %v0, %v0, %v31 -+ vsumgh %v0, %v31, %v0 -+ vsumgh %v31, %v0, %v0 -+ vsumgh %v18, %v3, %v20 -+ -+#CHECK: vsumgf %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0x65] -+#CHECK: vsumgf %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x22,0x65] -+#CHECK: vsumgf %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0x65] -+#CHECK: vsumgf %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0x65] -+#CHECK: vsumgf %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x2a,0x65] -+ -+ vsumgf %v0, %v0, %v0 -+ vsumgf %v0, %v0, %v31 -+ vsumgf %v0, %v31, %v0 -+ vsumgf %v31, %v0, %v0 -+ vsumgf %v18, %v3, %v20 -+ -+#CHECK: vsumqf %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0x67] -+#CHECK: vsumqf %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x22,0x67] -+#CHECK: vsumqf %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0x67] -+#CHECK: vsumqf %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0x67] -+#CHECK: vsumqf %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x2a,0x67] -+ -+ vsumqf %v0, %v0, %v0 -+ vsumqf %v0, %v0, %v31 -+ vsumqf %v0, %v31, %v0 -+ vsumqf %v31, %v0, %v0 -+ vsumqf %v18, %v3, %v20 -+ -+#CHECK: vsumqg %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0x67] -+#CHECK: vsumqg %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x32,0x67] -+#CHECK: vsumqg %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0x67] -+#CHECK: vsumqg %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0x67] -+#CHECK: vsumqg %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x3a,0x67] -+ -+ vsumqg %v0, %v0, %v0 -+ vsumqg %v0, %v0, %v31 -+ vsumqg %v0, %v31, %v0 -+ vsumqg %v31, %v0, %v0 -+ vsumqg %v18, %v3, %v20 -+ -+#CHECK: vsumb %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x64] -+#CHECK: vsumb %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0x64] -+#CHECK: vsumb %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x64] -+#CHECK: vsumb %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x64] -+#CHECK: vsumb %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x0a,0x64] -+ -+ vsumb %v0, %v0, %v0 -+ vsumb %v0, %v0, %v31 -+ vsumb %v0, %v31, %v0 -+ vsumb %v31, %v0, %v0 -+ vsumb %v18, %v3, %v20 -+ -+#CHECK: vsumh %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0x64] -+#CHECK: vsumh %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x12,0x64] -+#CHECK: vsumh %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0x64] -+#CHECK: vsumh %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0x64] -+#CHECK: vsumh %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x1a,0x64] -+ -+ vsumh %v0, %v0, %v0 -+ vsumh %v0, %v0, %v31 -+ vsumh %v0, %v31, %v0 -+ vsumh %v31, %v0, %v0 -+ vsumh %v18, %v3, %v20 -+ -+#CHECK: vtm %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0xd8] -+#CHECK: vtm %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x00,0xd8] -+#CHECK: vtm %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xd8] -+#CHECK: vtm %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0xd8] -+#CHECK: vtm %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xd8] -+#CHECK: vtm %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x04,0xd8] -+ -+ vtm %v0, %v0 -+ vtm %v0, %v15 -+ vtm %v0, %v31 -+ vtm %v15, %v0 -+ vtm %v31, %v0 -+ vtm %v14, %v17 -+ -+#CHECK: vuphb %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0xd7] -+#CHECK: vuphb %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x00,0xd7] -+#CHECK: vuphb %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xd7] -+#CHECK: vuphb %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0xd7] -+#CHECK: vuphb %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xd7] -+#CHECK: vuphb %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x04,0xd7] -+ -+ vuphb %v0, %v0 -+ vuphb %v0, %v15 -+ vuphb %v0, %v31 -+ vuphb %v15, %v0 -+ vuphb %v31, %v0 -+ vuphb %v14, %v17 -+ -+#CHECK: vuphf %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0xd7] -+#CHECK: vuphf %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x20,0xd7] -+#CHECK: vuphf %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xd7] -+#CHECK: vuphf %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x20,0xd7] -+#CHECK: vuphf %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xd7] -+#CHECK: vuphf %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x24,0xd7] -+ -+ vuphf %v0, %v0 -+ vuphf %v0, %v15 -+ vuphf %v0, %v31 -+ vuphf %v15, %v0 -+ vuphf %v31, %v0 -+ vuphf %v14, %v17 -+ -+#CHECK: vuphh %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0xd7] -+#CHECK: vuphh %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x10,0xd7] -+#CHECK: vuphh %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0xd7] -+#CHECK: vuphh %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x10,0xd7] -+#CHECK: vuphh %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0xd7] -+#CHECK: vuphh %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x14,0xd7] -+ -+ vuphh %v0, %v0 -+ vuphh %v0, %v15 -+ vuphh %v0, %v31 -+ vuphh %v15, %v0 -+ vuphh %v31, %v0 -+ vuphh %v14, %v17 -+ -+#CHECK: vuplhb %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0xd5] -+#CHECK: vuplhb %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x00,0xd5] -+#CHECK: vuplhb %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xd5] -+#CHECK: vuplhb %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0xd5] -+#CHECK: vuplhb %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xd5] -+#CHECK: vuplhb %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x04,0xd5] -+ -+ vuplhb %v0, %v0 -+ vuplhb %v0, %v15 -+ vuplhb %v0, %v31 -+ vuplhb %v15, %v0 -+ vuplhb %v31, %v0 -+ vuplhb %v14, %v17 -+ -+#CHECK: vuplhf %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0xd5] -+#CHECK: vuplhf %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x20,0xd5] -+#CHECK: vuplhf %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xd5] -+#CHECK: vuplhf %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x20,0xd5] -+#CHECK: vuplhf %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xd5] -+#CHECK: vuplhf %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x24,0xd5] -+ -+ vuplhf %v0, %v0 -+ vuplhf %v0, %v15 -+ vuplhf %v0, %v31 -+ vuplhf %v15, %v0 -+ vuplhf %v31, %v0 -+ vuplhf %v14, %v17 -+ -+#CHECK: vuplhh %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0xd5] -+#CHECK: vuplhh %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x10,0xd5] -+#CHECK: vuplhh %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0xd5] -+#CHECK: vuplhh %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x10,0xd5] -+#CHECK: vuplhh %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0xd5] -+#CHECK: vuplhh %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x14,0xd5] -+ -+ vuplhh %v0, %v0 -+ vuplhh %v0, %v15 -+ vuplhh %v0, %v31 -+ vuplhh %v15, %v0 -+ vuplhh %v31, %v0 -+ vuplhh %v14, %v17 -+ -+#CHECK: vuplb %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0xd6] -+#CHECK: vuplb %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x00,0xd6] -+#CHECK: vuplb %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xd6] -+#CHECK: vuplb %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0xd6] -+#CHECK: vuplb %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xd6] -+#CHECK: vuplb %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x04,0xd6] -+ -+ vuplb %v0, %v0 -+ vuplb %v0, %v15 -+ vuplb %v0, %v31 -+ vuplb %v15, %v0 -+ vuplb %v31, %v0 -+ vuplb %v14, %v17 -+ -+#CHECK: vuplf %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0xd6] -+#CHECK: vuplf %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x20,0xd6] -+#CHECK: vuplf %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xd6] -+#CHECK: vuplf %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x20,0xd6] -+#CHECK: vuplf %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xd6] -+#CHECK: vuplf %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x24,0xd6] -+ -+ vuplf %v0, %v0 -+ vuplf %v0, %v15 -+ vuplf %v0, %v31 -+ vuplf %v15, %v0 -+ vuplf %v31, %v0 -+ vuplf %v14, %v17 -+ -+#CHECK: vuplhw %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0xd6] -+#CHECK: vuplhw %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x10,0xd6] -+#CHECK: vuplhw %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0xd6] -+#CHECK: vuplhw %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x10,0xd6] -+#CHECK: vuplhw %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0xd6] -+#CHECK: vuplhw %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x14,0xd6] -+ -+ vuplhw %v0, %v0 -+ vuplhw %v0, %v15 -+ vuplhw %v0, %v31 -+ vuplhw %v15, %v0 -+ vuplhw %v31, %v0 -+ vuplhw %v14, %v17 -+ -+#CHECK: vupllb %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0xd4] -+#CHECK: vupllb %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x00,0xd4] -+#CHECK: vupllb %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xd4] -+#CHECK: vupllb %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0xd4] -+#CHECK: vupllb %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xd4] -+#CHECK: vupllb %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x04,0xd4] -+ -+ vupllb %v0, %v0 -+ vupllb %v0, %v15 -+ vupllb %v0, %v31 -+ vupllb %v15, %v0 -+ vupllb %v31, %v0 -+ vupllb %v14, %v17 -+ -+#CHECK: vupllf %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0xd4] -+#CHECK: vupllf %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x20,0xd4] -+#CHECK: vupllf %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xd4] -+#CHECK: vupllf %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x20,0xd4] -+#CHECK: vupllf %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xd4] -+#CHECK: vupllf %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x24,0xd4] -+ -+ vupllf %v0, %v0 -+ vupllf %v0, %v15 -+ vupllf %v0, %v31 -+ vupllf %v15, %v0 -+ vupllf %v31, %v0 -+ vupllf %v14, %v17 -+ -+#CHECK: vupllh %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0xd4] -+#CHECK: vupllh %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x10,0xd4] -+#CHECK: vupllh %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0xd4] -+#CHECK: vupllh %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x10,0xd4] -+#CHECK: vupllh %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0xd4] -+#CHECK: vupllh %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x14,0xd4] -+ -+ vupllh %v0, %v0 -+ vupllh %v0, %v15 -+ vupllh %v0, %v31 -+ vupllh %v15, %v0 -+ vupllh %v31, %v0 -+ vupllh %v14, %v17 -+ -+#CHECK: vx %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x6d] -+#CHECK: vx %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0x6d] -+#CHECK: vx %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x6d] -+#CHECK: vx %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x6d] -+#CHECK: vx %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x0a,0x6d] -+ -+ vx %v0, %v0, %v0 -+ vx %v0, %v0, %v31 -+ vx %v0, %v31, %v0 -+ vx %v31, %v0, %v0 -+ vx %v18, %v3, %v20 -+ -+#CHECK: vzero %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x44] -+#CHECK: vzero %v11 # encoding: [0xe7,0xb0,0x00,0x00,0x00,0x44] -+#CHECK: vzero %v15 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x44] -+#CHECK: vzero %v31 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x44] -+ -+ vzero %v0 -+ vzero %v11 -+ vzero %v15 -+ vzero %v31 -+ -+#CHECK: wcdgb %f0, %f0, 0, 0 # encoding: [0xe7,0x00,0x00,0x08,0x30,0xc3] -+#CHECK: wcdgb %f0, %f0, 0, 15 # encoding: [0xe7,0x00,0x00,0xf8,0x30,0xc3] -+#CHECK: wcdgb %f0, %f0, 4, 0 # encoding: [0xe7,0x00,0x00,0x0c,0x30,0xc3] -+#CHECK: wcdgb %f0, %f0, 12, 0 # encoding: [0xe7,0x00,0x00,0x0c,0x30,0xc3] -+#CHECK: wcdgb %f0, %v31, 0, 0 # encoding: [0xe7,0x0f,0x00,0x08,0x34,0xc3] -+#CHECK: wcdgb %v31, %f0, 0, 0 # encoding: [0xe7,0xf0,0x00,0x08,0x38,0xc3] -+#CHECK: wcdgb %f14, %v17, 4, 10 # encoding: [0xe7,0xe1,0x00,0xac,0x34,0xc3] -+ -+ wcdgb %v0, %v0, 0, 0 -+ wcdgb %v0, %v0, 0, 15 -+ wcdgb %v0, %v0, 4, 0 -+ wcdgb %v0, %v0, 12, 0 -+ wcdgb %v0, %v31, 0, 0 -+ wcdgb %v31, %v0, 0, 0 -+ wcdgb %v14, %v17, 4, 10 -+ -+#CHECK: wcdlgb %f0, %f0, 0, 0 # encoding: [0xe7,0x00,0x00,0x08,0x30,0xc1] -+#CHECK: wcdlgb %f0, %f0, 0, 15 # encoding: [0xe7,0x00,0x00,0xf8,0x30,0xc1] -+#CHECK: wcdlgb %f0, %f0, 4, 0 # encoding: [0xe7,0x00,0x00,0x0c,0x30,0xc1] -+#CHECK: wcdlgb %f0, %f0, 12, 0 # encoding: [0xe7,0x00,0x00,0x0c,0x30,0xc1] -+#CHECK: wcdlgb %f0, %v31, 0, 0 # encoding: [0xe7,0x0f,0x00,0x08,0x34,0xc1] -+#CHECK: wcdlgb %v31, %f0, 0, 0 # encoding: [0xe7,0xf0,0x00,0x08,0x38,0xc1] -+#CHECK: wcdlgb %f14, %v17, 4, 10 # encoding: [0xe7,0xe1,0x00,0xac,0x34,0xc1] -+ -+ wcdlgb %v0, %v0, 0, 0 -+ wcdlgb %v0, %v0, 0, 15 -+ wcdlgb %v0, %v0, 4, 0 -+ wcdlgb %v0, %v0, 12, 0 -+ wcdlgb %v0, %v31, 0, 0 -+ wcdlgb %v31, %v0, 0, 0 -+ wcdlgb %v14, %v17, 4, 10 -+ -+#CHECK: wcgdb %f0, %f0, 0, 0 # encoding: [0xe7,0x00,0x00,0x08,0x30,0xc2] -+#CHECK: wcgdb %f0, %f0, 0, 15 # encoding: [0xe7,0x00,0x00,0xf8,0x30,0xc2] -+#CHECK: wcgdb %f0, %f0, 4, 0 # encoding: [0xe7,0x00,0x00,0x0c,0x30,0xc2] -+#CHECK: wcgdb %f0, %f0, 12, 0 # encoding: [0xe7,0x00,0x00,0x0c,0x30,0xc2] -+#CHECK: wcgdb %f0, %v31, 0, 0 # encoding: [0xe7,0x0f,0x00,0x08,0x34,0xc2] -+#CHECK: wcgdb %v31, %f0, 0, 0 # encoding: [0xe7,0xf0,0x00,0x08,0x38,0xc2] -+#CHECK: wcgdb %f14, %v17, 4, 10 # encoding: [0xe7,0xe1,0x00,0xac,0x34,0xc2] -+ -+ wcgdb %v0, %v0, 0, 0 -+ wcgdb %v0, %v0, 0, 15 -+ wcgdb %v0, %v0, 4, 0 -+ wcgdb %v0, %v0, 12, 0 -+ wcgdb %v0, %v31, 0, 0 -+ wcgdb %v31, %v0, 0, 0 -+ wcgdb %v14, %v17, 4, 10 -+ -+#CHECK: wclgdb %f0, %f0, 0, 0 # encoding: [0xe7,0x00,0x00,0x08,0x30,0xc0] -+#CHECK: wclgdb %f0, %f0, 0, 15 # encoding: [0xe7,0x00,0x00,0xf8,0x30,0xc0] -+#CHECK: wclgdb %f0, %f0, 4, 0 # encoding: [0xe7,0x00,0x00,0x0c,0x30,0xc0] -+#CHECK: wclgdb %f0, %f0, 12, 0 # encoding: [0xe7,0x00,0x00,0x0c,0x30,0xc0] -+#CHECK: wclgdb %f0, %v31, 0, 0 # encoding: [0xe7,0x0f,0x00,0x08,0x34,0xc0] -+#CHECK: wclgdb %v31, %f0, 0, 0 # encoding: [0xe7,0xf0,0x00,0x08,0x38,0xc0] -+#CHECK: wclgdb %f14, %v17, 4, 10 # encoding: [0xe7,0xe1,0x00,0xac,0x34,0xc0] -+ -+ wclgdb %v0, %v0, 0, 0 -+ wclgdb %v0, %v0, 0, 15 -+ wclgdb %v0, %v0, 4, 0 -+ wclgdb %v0, %v0, 12, 0 -+ wclgdb %v0, %v31, 0, 0 -+ wclgdb %v31, %v0, 0, 0 -+ wclgdb %v14, %v17, 4, 10 -+ -+#CHECK: wfadb %f0, %f0, %f0 # encoding: [0xe7,0x00,0x00,0x08,0x30,0xe3] -+#CHECK: wfadb %f0, %f0, %v31 # encoding: [0xe7,0x00,0xf0,0x08,0x32,0xe3] -+#CHECK: wfadb %f0, %v31, %f0 # encoding: [0xe7,0x0f,0x00,0x08,0x34,0xe3] -+#CHECK: wfadb %v31, %f0, %f0 # encoding: [0xe7,0xf0,0x00,0x08,0x38,0xe3] -+#CHECK: wfadb %v18, %f3, %v20 # encoding: [0xe7,0x23,0x40,0x08,0x3a,0xe3] -+ -+ wfadb %v0, %v0, %v0 -+ wfadb %v0, %v0, %v31 -+ wfadb %v0, %v31, %v0 -+ wfadb %v31, %v0, %v0 -+ wfadb %v18, %v3, %v20 -+ -+#CHECK: wfcdb %f0, %f0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0xcb] -+#CHECK: wfcdb %f0, %f15 # encoding: [0xe7,0x0f,0x00,0x00,0x30,0xcb] -+#CHECK: wfcdb %f0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xcb] -+#CHECK: wfcdb %f15, %f0 # encoding: [0xe7,0xf0,0x00,0x00,0x30,0xcb] -+#CHECK: wfcdb %v31, %f0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xcb] -+#CHECK: wfcdb %f14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x34,0xcb] -+ -+ wfcdb %v0, %v0 -+ wfcdb %v0, %v15 -+ wfcdb %v0, %v31 -+ wfcdb %v15, %v0 -+ wfcdb %v31, %v0 -+ wfcdb %v14, %v17 -+ -+#CHECK: wfcedb %f0, %f0, %f0 # encoding: [0xe7,0x00,0x00,0x08,0x30,0xe8] -+#CHECK: wfcedb %f0, %f0, %v31 # encoding: [0xe7,0x00,0xf0,0x08,0x32,0xe8] -+#CHECK: wfcedb %f0, %v31, %f0 # encoding: [0xe7,0x0f,0x00,0x08,0x34,0xe8] -+#CHECK: wfcedb %v31, %f0, %f0 # encoding: [0xe7,0xf0,0x00,0x08,0x38,0xe8] -+#CHECK: wfcedb %v18, %f3, %v20 # encoding: [0xe7,0x23,0x40,0x08,0x3a,0xe8] -+ -+ wfcedb %v0, %v0, %v0 -+ wfcedb %v0, %v0, %v31 -+ wfcedb %v0, %v31, %v0 -+ wfcedb %v31, %v0, %v0 -+ wfcedb %v18, %v3, %v20 -+ -+#CHECK: wfcedbs %f0, %f0, %f0 # encoding: [0xe7,0x00,0x00,0x18,0x30,0xe8] -+#CHECK: wfcedbs %f0, %f0, %v31 # encoding: [0xe7,0x00,0xf0,0x18,0x32,0xe8] -+#CHECK: wfcedbs %f0, %v31, %f0 # encoding: [0xe7,0x0f,0x00,0x18,0x34,0xe8] -+#CHECK: wfcedbs %v31, %f0, %f0 # encoding: [0xe7,0xf0,0x00,0x18,0x38,0xe8] -+#CHECK: wfcedbs %v18, %f3, %v20 # encoding: [0xe7,0x23,0x40,0x18,0x3a,0xe8] -+ -+ wfcedbs %v0, %v0, %v0 -+ wfcedbs %v0, %v0, %v31 -+ wfcedbs %v0, %v31, %v0 -+ wfcedbs %v31, %v0, %v0 -+ wfcedbs %v18, %v3, %v20 -+ -+#CHECK: wfchdb %f0, %f0, %f0 # encoding: [0xe7,0x00,0x00,0x08,0x30,0xeb] -+#CHECK: wfchdb %f0, %f0, %v31 # encoding: [0xe7,0x00,0xf0,0x08,0x32,0xeb] -+#CHECK: wfchdb %f0, %v31, %f0 # encoding: [0xe7,0x0f,0x00,0x08,0x34,0xeb] -+#CHECK: wfchdb %v31, %f0, %f0 # encoding: [0xe7,0xf0,0x00,0x08,0x38,0xeb] -+#CHECK: wfchdb %v18, %f3, %v20 # encoding: [0xe7,0x23,0x40,0x08,0x3a,0xeb] -+ -+ wfchdb %v0, %v0, %v0 -+ wfchdb %v0, %v0, %v31 -+ wfchdb %v0, %v31, %v0 -+ wfchdb %v31, %v0, %v0 -+ wfchdb %v18, %v3, %v20 -+ -+#CHECK: wfchdbs %f0, %f0, %f0 # encoding: [0xe7,0x00,0x00,0x18,0x30,0xeb] -+#CHECK: wfchdbs %f0, %f0, %v31 # encoding: [0xe7,0x00,0xf0,0x18,0x32,0xeb] -+#CHECK: wfchdbs %f0, %v31, %f0 # encoding: [0xe7,0x0f,0x00,0x18,0x34,0xeb] -+#CHECK: wfchdbs %v31, %f0, %f0 # encoding: [0xe7,0xf0,0x00,0x18,0x38,0xeb] -+#CHECK: wfchdbs %v18, %f3, %v20 # encoding: [0xe7,0x23,0x40,0x18,0x3a,0xeb] -+ -+ wfchdbs %v0, %v0, %v0 -+ wfchdbs %v0, %v0, %v31 -+ wfchdbs %v0, %v31, %v0 -+ wfchdbs %v31, %v0, %v0 -+ wfchdbs %v18, %v3, %v20 -+ -+#CHECK: wfchedb %f0, %f0, %f0 # encoding: [0xe7,0x00,0x00,0x08,0x30,0xea] -+#CHECK: wfchedb %f0, %f0, %v31 # encoding: [0xe7,0x00,0xf0,0x08,0x32,0xea] -+#CHECK: wfchedb %f0, %v31, %f0 # encoding: [0xe7,0x0f,0x00,0x08,0x34,0xea] -+#CHECK: wfchedb %v31, %f0, %f0 # encoding: [0xe7,0xf0,0x00,0x08,0x38,0xea] -+#CHECK: wfchedb %v18, %f3, %v20 # encoding: [0xe7,0x23,0x40,0x08,0x3a,0xea] -+ -+ wfchedb %v0, %v0, %v0 -+ wfchedb %v0, %v0, %v31 -+ wfchedb %v0, %v31, %v0 -+ wfchedb %v31, %v0, %v0 -+ wfchedb %v18, %v3, %v20 -+ -+#CHECK: wfchedbs %f0, %f0, %f0 # encoding: [0xe7,0x00,0x00,0x18,0x30,0xea] -+#CHECK: wfchedbs %f0, %f0, %v31 # encoding: [0xe7,0x00,0xf0,0x18,0x32,0xea] -+#CHECK: wfchedbs %f0, %v31, %f0 # encoding: [0xe7,0x0f,0x00,0x18,0x34,0xea] -+#CHECK: wfchedbs %v31, %f0, %f0 # encoding: [0xe7,0xf0,0x00,0x18,0x38,0xea] -+#CHECK: wfchedbs %v18, %f3, %v20 # encoding: [0xe7,0x23,0x40,0x18,0x3a,0xea] -+ -+ wfchedbs %v0, %v0, %v0 -+ wfchedbs %v0, %v0, %v31 -+ wfchedbs %v0, %v31, %v0 -+ wfchedbs %v31, %v0, %v0 -+ wfchedbs %v18, %v3, %v20 -+ -+#CHECK: wfddb %f0, %f0, %f0 # encoding: [0xe7,0x00,0x00,0x08,0x30,0xe5] -+#CHECK: wfddb %f0, %f0, %v31 # encoding: [0xe7,0x00,0xf0,0x08,0x32,0xe5] -+#CHECK: wfddb %f0, %v31, %f0 # encoding: [0xe7,0x0f,0x00,0x08,0x34,0xe5] -+#CHECK: wfddb %v31, %f0, %f0 # encoding: [0xe7,0xf0,0x00,0x08,0x38,0xe5] -+#CHECK: wfddb %v18, %f3, %v20 # encoding: [0xe7,0x23,0x40,0x08,0x3a,0xe5] -+ -+ wfddb %v0, %v0, %v0 -+ wfddb %v0, %v0, %v31 -+ wfddb %v0, %v31, %v0 -+ wfddb %v31, %v0, %v0 -+ wfddb %v18, %v3, %v20 -+ -+#CHECK: wfidb %f0, %f0, 0, 0 # encoding: [0xe7,0x00,0x00,0x08,0x30,0xc7] -+#CHECK: wfidb %f0, %f0, 0, 15 # encoding: [0xe7,0x00,0x00,0xf8,0x30,0xc7] -+#CHECK: wfidb %f0, %f0, 4, 0 # encoding: [0xe7,0x00,0x00,0x0c,0x30,0xc7] -+#CHECK: wfidb %f0, %f0, 12, 0 # encoding: [0xe7,0x00,0x00,0x0c,0x30,0xc7] -+#CHECK: wfidb %f0, %v31, 0, 0 # encoding: [0xe7,0x0f,0x00,0x08,0x34,0xc7] -+#CHECK: wfidb %v31, %f0, 0, 0 # encoding: [0xe7,0xf0,0x00,0x08,0x38,0xc7] -+#CHECK: wfidb %f14, %v17, 4, 10 # encoding: [0xe7,0xe1,0x00,0xac,0x34,0xc7] -+ -+ wfidb %v0, %v0, 0, 0 -+ wfidb %v0, %v0, 0, 15 -+ wfidb %v0, %v0, 4, 0 -+ wfidb %v0, %v0, 12, 0 -+ wfidb %v0, %v31, 0, 0 -+ wfidb %v31, %v0, 0, 0 -+ wfidb %v14, %v17, 4, 10 -+ -+#CHECK: wfkdb %f0, %f0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0xca] -+#CHECK: wfkdb %f0, %f15 # encoding: [0xe7,0x0f,0x00,0x00,0x30,0xca] -+#CHECK: wfkdb %f0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xca] -+#CHECK: wfkdb %f15, %f0 # encoding: [0xe7,0xf0,0x00,0x00,0x30,0xca] -+#CHECK: wfkdb %v31, %f0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xca] -+#CHECK: wfkdb %f14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x34,0xca] -+ -+ wfkdb %v0, %v0 -+ wfkdb %v0, %v15 -+ wfkdb %v0, %v31 -+ wfkdb %v15, %v0 -+ wfkdb %v31, %v0 -+ wfkdb %v14, %v17 -+ -+#CHECK: wflcdb %f0, %f0 # encoding: [0xe7,0x00,0x00,0x08,0x30,0xcc] -+#CHECK: wflcdb %f0, %f15 # encoding: [0xe7,0x0f,0x00,0x08,0x30,0xcc] -+#CHECK: wflcdb %f0, %v31 # encoding: [0xe7,0x0f,0x00,0x08,0x34,0xcc] -+#CHECK: wflcdb %f15, %f0 # encoding: [0xe7,0xf0,0x00,0x08,0x30,0xcc] -+#CHECK: wflcdb %v31, %f0 # encoding: [0xe7,0xf0,0x00,0x08,0x38,0xcc] -+#CHECK: wflcdb %f14, %v17 # encoding: [0xe7,0xe1,0x00,0x08,0x34,0xcc] -+ -+ wflcdb %v0, %v0 -+ wflcdb %v0, %v15 -+ wflcdb %v0, %v31 -+ wflcdb %v15, %v0 -+ wflcdb %v31, %v0 -+ wflcdb %v14, %v17 -+ -+#CHECK: wflndb %f0, %f0 # encoding: [0xe7,0x00,0x00,0x18,0x30,0xcc] -+#CHECK: wflndb %f0, %f15 # encoding: [0xe7,0x0f,0x00,0x18,0x30,0xcc] -+#CHECK: wflndb %f0, %v31 # encoding: [0xe7,0x0f,0x00,0x18,0x34,0xcc] -+#CHECK: wflndb %f15, %f0 # encoding: [0xe7,0xf0,0x00,0x18,0x30,0xcc] -+#CHECK: wflndb %v31, %f0 # encoding: [0xe7,0xf0,0x00,0x18,0x38,0xcc] -+#CHECK: wflndb %f14, %v17 # encoding: [0xe7,0xe1,0x00,0x18,0x34,0xcc] -+ -+ wflndb %v0, %v0 -+ wflndb %v0, %v15 -+ wflndb %v0, %v31 -+ wflndb %v15, %v0 -+ wflndb %v31, %v0 -+ wflndb %v14, %v17 -+ -+#CHECK: wflpdb %f0, %f0 # encoding: [0xe7,0x00,0x00,0x28,0x30,0xcc] -+#CHECK: wflpdb %f0, %f15 # encoding: [0xe7,0x0f,0x00,0x28,0x30,0xcc] -+#CHECK: wflpdb %f0, %v31 # encoding: [0xe7,0x0f,0x00,0x28,0x34,0xcc] -+#CHECK: wflpdb %f15, %f0 # encoding: [0xe7,0xf0,0x00,0x28,0x30,0xcc] -+#CHECK: wflpdb %v31, %f0 # encoding: [0xe7,0xf0,0x00,0x28,0x38,0xcc] -+#CHECK: wflpdb %f14, %v17 # encoding: [0xe7,0xe1,0x00,0x28,0x34,0xcc] -+ -+ wflpdb %v0, %v0 -+ wflpdb %v0, %v15 -+ wflpdb %v0, %v31 -+ wflpdb %v15, %v0 -+ wflpdb %v31, %v0 -+ wflpdb %v14, %v17 -+ -+#CHECK: wfmadb %f0, %f0, %f0, %f0 # encoding: [0xe7,0x00,0x03,0x08,0x00,0x8f] -+#CHECK: wfmadb %f0, %f0, %f0, %v31 # encoding: [0xe7,0x00,0x03,0x08,0xf1,0x8f] -+#CHECK: wfmadb %f0, %f0, %v31, %f0 # encoding: [0xe7,0x00,0xf3,0x08,0x02,0x8f] -+#CHECK: wfmadb %f0, %v31, %f0, %f0 # encoding: [0xe7,0x0f,0x03,0x08,0x04,0x8f] -+#CHECK: wfmadb %v31, %f0, %f0, %f0 # encoding: [0xe7,0xf0,0x03,0x08,0x08,0x8f] -+#CHECK: wfmadb %f13, %v17, %v21, %v25 # encoding: [0xe7,0xd1,0x53,0x08,0x97,0x8f] -+ -+ wfmadb %v0, %v0, %v0, %v0 -+ wfmadb %v0, %v0, %v0, %v31 -+ wfmadb %v0, %v0, %v31, %v0 -+ wfmadb %v0, %v31, %v0, %v0 -+ wfmadb %v31, %v0, %v0, %v0 -+ wfmadb %v13, %v17, %v21, %v25 -+ -+#CHECK: wfmdb %f0, %f0, %f0 # encoding: [0xe7,0x00,0x00,0x08,0x30,0xe7] -+#CHECK: wfmdb %f0, %f0, %v31 # encoding: [0xe7,0x00,0xf0,0x08,0x32,0xe7] -+#CHECK: wfmdb %f0, %v31, %f0 # encoding: [0xe7,0x0f,0x00,0x08,0x34,0xe7] -+#CHECK: wfmdb %v31, %f0, %f0 # encoding: [0xe7,0xf0,0x00,0x08,0x38,0xe7] -+#CHECK: wfmdb %v18, %f3, %v20 # encoding: [0xe7,0x23,0x40,0x08,0x3a,0xe7] -+ -+ wfmdb %v0, %v0, %v0 -+ wfmdb %v0, %v0, %v31 -+ wfmdb %v0, %v31, %v0 -+ wfmdb %v31, %v0, %v0 -+ wfmdb %v18, %v3, %v20 -+ -+#CHECK: wfmsdb %f0, %f0, %f0, %f0 # encoding: [0xe7,0x00,0x03,0x08,0x00,0x8e] -+#CHECK: wfmsdb %f0, %f0, %f0, %v31 # encoding: [0xe7,0x00,0x03,0x08,0xf1,0x8e] -+#CHECK: wfmsdb %f0, %f0, %v31, %f0 # encoding: [0xe7,0x00,0xf3,0x08,0x02,0x8e] -+#CHECK: wfmsdb %f0, %v31, %f0, %f0 # encoding: [0xe7,0x0f,0x03,0x08,0x04,0x8e] -+#CHECK: wfmsdb %v31, %f0, %f0, %f0 # encoding: [0xe7,0xf0,0x03,0x08,0x08,0x8e] -+#CHECK: wfmsdb %f13, %v17, %v21, %v25 # encoding: [0xe7,0xd1,0x53,0x08,0x97,0x8e] -+ -+ wfmsdb %v0, %v0, %v0, %v0 -+ wfmsdb %v0, %v0, %v0, %v31 -+ wfmsdb %v0, %v0, %v31, %v0 -+ wfmsdb %v0, %v31, %v0, %v0 -+ wfmsdb %v31, %v0, %v0, %v0 -+ wfmsdb %v13, %v17, %v21, %v25 -+ -+#CHECK: wfsdb %f0, %f0, %f0 # encoding: [0xe7,0x00,0x00,0x08,0x30,0xe2] -+#CHECK: wfsdb %f0, %f0, %v31 # encoding: [0xe7,0x00,0xf0,0x08,0x32,0xe2] -+#CHECK: wfsdb %f0, %v31, %f0 # encoding: [0xe7,0x0f,0x00,0x08,0x34,0xe2] -+#CHECK: wfsdb %v31, %f0, %f0 # encoding: [0xe7,0xf0,0x00,0x08,0x38,0xe2] -+#CHECK: wfsdb %v18, %f3, %v20 # encoding: [0xe7,0x23,0x40,0x08,0x3a,0xe2] -+ -+ wfsdb %v0, %v0, %v0 -+ wfsdb %v0, %v0, %v31 -+ wfsdb %v0, %v31, %v0 -+ wfsdb %v31, %v0, %v0 -+ wfsdb %v18, %v3, %v20 -+ -+#CHECK: wfsqdb %f0, %f0 # encoding: [0xe7,0x00,0x00,0x08,0x30,0xce] -+#CHECK: wfsqdb %f0, %f15 # encoding: [0xe7,0x0f,0x00,0x08,0x30,0xce] -+#CHECK: wfsqdb %f0, %v31 # encoding: [0xe7,0x0f,0x00,0x08,0x34,0xce] -+#CHECK: wfsqdb %f15, %f0 # encoding: [0xe7,0xf0,0x00,0x08,0x30,0xce] -+#CHECK: wfsqdb %v31, %f0 # encoding: [0xe7,0xf0,0x00,0x08,0x38,0xce] -+#CHECK: wfsqdb %f14, %v17 # encoding: [0xe7,0xe1,0x00,0x08,0x34,0xce] -+ -+ wfsqdb %v0, %v0 -+ wfsqdb %v0, %v15 -+ wfsqdb %v0, %v31 -+ wfsqdb %v15, %v0 -+ wfsqdb %v31, %v0 -+ wfsqdb %v14, %v17 -+ -+#CHECK: wftcidb %f0, %f0, 0 # encoding: [0xe7,0x00,0x00,0x08,0x30,0x4a] -+#CHECK: wftcidb %f0, %f0, 4095 # encoding: [0xe7,0x00,0xff,0xf8,0x30,0x4a] -+#CHECK: wftcidb %f0, %f15, 0 # encoding: [0xe7,0x0f,0x00,0x08,0x30,0x4a] -+#CHECK: wftcidb %f0, %v31, 0 # encoding: [0xe7,0x0f,0x00,0x08,0x34,0x4a] -+#CHECK: wftcidb %f15, %f0, 0 # encoding: [0xe7,0xf0,0x00,0x08,0x30,0x4a] -+#CHECK: wftcidb %v31, %f0, 0 # encoding: [0xe7,0xf0,0x00,0x08,0x38,0x4a] -+#CHECK: wftcidb %f4, %v21, 1656 # encoding: [0xe7,0x45,0x67,0x88,0x34,0x4a] -+ -+ wftcidb %v0, %v0, 0 -+ wftcidb %v0, %v0, 4095 -+ wftcidb %v0, %v15, 0 -+ wftcidb %v0, %v31, 0 -+ wftcidb %v15, %v0, 0 -+ wftcidb %v31, %v0, 0 -+ wftcidb %v4, %v21, 0x678 -+ -+#CHECK: wldeb %f0, %f0 # encoding: [0xe7,0x00,0x00,0x08,0x20,0xc4] -+#CHECK: wldeb %f0, %f15 # encoding: [0xe7,0x0f,0x00,0x08,0x20,0xc4] -+#CHECK: wldeb %f0, %v31 # encoding: [0xe7,0x0f,0x00,0x08,0x24,0xc4] -+#CHECK: wldeb %f15, %f0 # encoding: [0xe7,0xf0,0x00,0x08,0x20,0xc4] -+#CHECK: wldeb %v31, %f0 # encoding: [0xe7,0xf0,0x00,0x08,0x28,0xc4] -+#CHECK: wldeb %f14, %v17 # encoding: [0xe7,0xe1,0x00,0x08,0x24,0xc4] -+ -+ wldeb %v0, %v0 -+ wldeb %v0, %v15 -+ wldeb %v0, %v31 -+ wldeb %v15, %v0 -+ wldeb %v31, %v0 -+ wldeb %v14, %v17 -+ -+#CHECK: wledb %f0, %f0, 0, 0 # encoding: [0xe7,0x00,0x00,0x08,0x30,0xc5] -+#CHECK: wledb %f0, %f0, 0, 15 # encoding: [0xe7,0x00,0x00,0xf8,0x30,0xc5] -+#CHECK: wledb %f0, %f0, 4, 0 # encoding: [0xe7,0x00,0x00,0x0c,0x30,0xc5] -+#CHECK: wledb %f0, %f0, 12, 0 # encoding: [0xe7,0x00,0x00,0x0c,0x30,0xc5] -+#CHECK: wledb %f0, %v31, 0, 0 # encoding: [0xe7,0x0f,0x00,0x08,0x34,0xc5] -+#CHECK: wledb %v31, %f0, 0, 0 # encoding: [0xe7,0xf0,0x00,0x08,0x38,0xc5] -+#CHECK: wledb %f14, %v17, 4, 10 # encoding: [0xe7,0xe1,0x00,0xac,0x34,0xc5] -+ -+ wledb %v0, %v0, 0, 0 -+ wledb %v0, %v0, 0, 15 -+ wledb %v0, %v0, 4, 0 -+ wledb %v0, %v0, 12, 0 -+ wledb %v0, %v31, 0, 0 -+ wledb %v31, %v0, 0, 0 -+ wledb %v14, %v17, 4, 10 -Index: llvm-36/test/MC/SystemZ/insn-good-z196.s -=================================================================== ---- llvm-36.orig/test/MC/SystemZ/insn-good-z196.s -+++ llvm-36/test/MC/SystemZ/insn-good-z196.s -@@ -1021,6 +1021,16 @@ - ork %r15,%r0,%r0 - ork %r7,%r8,%r9 - -+#CHECK: popcnt %r0, %r0 # encoding: [0xb9,0xe1,0x00,0x00] -+#CHECK: popcnt %r0, %r15 # encoding: [0xb9,0xe1,0x00,0x0f] -+#CHECK: popcnt %r15, %r0 # encoding: [0xb9,0xe1,0x00,0xf0] -+#CHECK: popcnt %r7, %r8 # encoding: [0xb9,0xe1,0x00,0x78] -+ -+ popcnt %r0,%r0 -+ popcnt %r0,%r15 -+ popcnt %r15,%r0 -+ popcnt %r7,%r8 -+ - #CHECK: risbhg %r0, %r0, 0, 0, 0 # encoding: [0xec,0x00,0x00,0x00,0x00,0x5d] - #CHECK: risbhg %r0, %r0, 0, 0, 63 # encoding: [0xec,0x00,0x00,0x00,0x3f,0x5d] - #CHECK: risbhg %r0, %r0, 0, 255, 0 # encoding: [0xec,0x00,0x00,0xff,0x00,0x5d] -Index: llvm-36/test/MC/SystemZ/insn-good-zEC12.s -=================================================================== ---- /dev/null -+++ llvm-36/test/MC/SystemZ/insn-good-zEC12.s -@@ -0,0 +1,126 @@ -+# For zEC12 and above. -+# RUN: llvm-mc -triple s390x-linux-gnu -mcpu=zEC12 -show-encoding %s | FileCheck %s -+ -+#CHECK: etnd %r0 # encoding: [0xb2,0xec,0x00,0x00] -+#CHECK: etnd %r15 # encoding: [0xb2,0xec,0x00,0xf0] -+#CHECK: etnd %r7 # encoding: [0xb2,0xec,0x00,0x70] -+ -+ etnd %r0 -+ etnd %r15 -+ etnd %r7 -+ -+#CHECK: ntstg %r0, -524288 # encoding: [0xe3,0x00,0x00,0x00,0x80,0x25] -+#CHECK: ntstg %r0, -1 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x25] -+#CHECK: ntstg %r0, 0 # encoding: [0xe3,0x00,0x00,0x00,0x00,0x25] -+#CHECK: ntstg %r0, 1 # encoding: [0xe3,0x00,0x00,0x01,0x00,0x25] -+#CHECK: ntstg %r0, 524287 # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x25] -+#CHECK: ntstg %r0, 0(%r1) # encoding: [0xe3,0x00,0x10,0x00,0x00,0x25] -+#CHECK: ntstg %r0, 0(%r15) # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x25] -+#CHECK: ntstg %r0, 524287(%r1,%r15) # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x25] -+#CHECK: ntstg %r0, 524287(%r15,%r1) # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x25] -+#CHECK: ntstg %r15, 0 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x25] -+ -+ ntstg %r0, -524288 -+ ntstg %r0, -1 -+ ntstg %r0, 0 -+ ntstg %r0, 1 -+ ntstg %r0, 524287 -+ ntstg %r0, 0(%r1) -+ ntstg %r0, 0(%r15) -+ ntstg %r0, 524287(%r1,%r15) -+ ntstg %r0, 524287(%r15,%r1) -+ ntstg %r15, 0 -+ -+#CHECK: ppa %r0, %r0, 0 # encoding: [0xb2,0xe8,0x00,0x00] -+#CHECK: ppa %r0, %r0, 15 # encoding: [0xb2,0xe8,0xf0,0x00] -+#CHECK: ppa %r0, %r15, 0 # encoding: [0xb2,0xe8,0x00,0x0f] -+#CHECK: ppa %r4, %r6, 7 # encoding: [0xb2,0xe8,0x70,0x46] -+#CHECK: ppa %r15, %r0, 0 # encoding: [0xb2,0xe8,0x00,0xf0] -+ -+ ppa %r0, %r0, 0 -+ ppa %r0, %r0, 15 -+ ppa %r0, %r15, 0 -+ ppa %r4, %r6, 7 -+ ppa %r15, %r0, 0 -+ -+#CHECK: risbgn %r0, %r0, 0, 0, 0 # encoding: [0xec,0x00,0x00,0x00,0x00,0x59] -+#CHECK: risbgn %r0, %r0, 0, 0, 63 # encoding: [0xec,0x00,0x00,0x00,0x3f,0x59] -+#CHECK: risbgn %r0, %r0, 0, 255, 0 # encoding: [0xec,0x00,0x00,0xff,0x00,0x59] -+#CHECK: risbgn %r0, %r0, 255, 0, 0 # encoding: [0xec,0x00,0xff,0x00,0x00,0x59] -+#CHECK: risbgn %r0, %r15, 0, 0, 0 # encoding: [0xec,0x0f,0x00,0x00,0x00,0x59] -+#CHECK: risbgn %r15, %r0, 0, 0, 0 # encoding: [0xec,0xf0,0x00,0x00,0x00,0x59] -+#CHECK: risbgn %r4, %r5, 6, 7, 8 # encoding: [0xec,0x45,0x06,0x07,0x08,0x59] -+ -+ risbgn %r0,%r0,0,0,0 -+ risbgn %r0,%r0,0,0,63 -+ risbgn %r0,%r0,0,255,0 -+ risbgn %r0,%r0,255,0,0 -+ risbgn %r0,%r15,0,0,0 -+ risbgn %r15,%r0,0,0,0 -+ risbgn %r4,%r5,6,7,8 -+ -+#CHECK: tabort 0 # encoding: [0xb2,0xfc,0x00,0x00] -+#CHECK: tabort 0(%r1) # encoding: [0xb2,0xfc,0x10,0x00] -+#CHECK: tabort 0(%r15) # encoding: [0xb2,0xfc,0xf0,0x00] -+#CHECK: tabort 4095 # encoding: [0xb2,0xfc,0x0f,0xff] -+#CHECK: tabort 4095(%r1) # encoding: [0xb2,0xfc,0x1f,0xff] -+#CHECK: tabort 4095(%r15) # encoding: [0xb2,0xfc,0xff,0xff] -+ -+ tabort 0 -+ tabort 0(%r1) -+ tabort 0(%r15) -+ tabort 4095 -+ tabort 4095(%r1) -+ tabort 4095(%r15) -+ -+#CHECK: tbegin 0, 0 # encoding: [0xe5,0x60,0x00,0x00,0x00,0x00] -+#CHECK: tbegin 4095, 0 # encoding: [0xe5,0x60,0x0f,0xff,0x00,0x00] -+#CHECK: tbegin 0, 0 # encoding: [0xe5,0x60,0x00,0x00,0x00,0x00] -+#CHECK: tbegin 0, 1 # encoding: [0xe5,0x60,0x00,0x00,0x00,0x01] -+#CHECK: tbegin 0, 32767 # encoding: [0xe5,0x60,0x00,0x00,0x7f,0xff] -+#CHECK: tbegin 0, 32768 # encoding: [0xe5,0x60,0x00,0x00,0x80,0x00] -+#CHECK: tbegin 0, 65535 # encoding: [0xe5,0x60,0x00,0x00,0xff,0xff] -+#CHECK: tbegin 0(%r1), 42 # encoding: [0xe5,0x60,0x10,0x00,0x00,0x2a] -+#CHECK: tbegin 0(%r15), 42 # encoding: [0xe5,0x60,0xf0,0x00,0x00,0x2a] -+#CHECK: tbegin 4095(%r1), 42 # encoding: [0xe5,0x60,0x1f,0xff,0x00,0x2a] -+#CHECK: tbegin 4095(%r15), 42 # encoding: [0xe5,0x60,0xff,0xff,0x00,0x2a] -+ -+ tbegin 0, 0 -+ tbegin 4095, 0 -+ tbegin 0, 0 -+ tbegin 0, 1 -+ tbegin 0, 32767 -+ tbegin 0, 32768 -+ tbegin 0, 65535 -+ tbegin 0(%r1), 42 -+ tbegin 0(%r15), 42 -+ tbegin 4095(%r1), 42 -+ tbegin 4095(%r15), 42 -+ -+#CHECK: tbeginc 0, 0 # encoding: [0xe5,0x61,0x00,0x00,0x00,0x00] -+#CHECK: tbeginc 4095, 0 # encoding: [0xe5,0x61,0x0f,0xff,0x00,0x00] -+#CHECK: tbeginc 0, 0 # encoding: [0xe5,0x61,0x00,0x00,0x00,0x00] -+#CHECK: tbeginc 0, 1 # encoding: [0xe5,0x61,0x00,0x00,0x00,0x01] -+#CHECK: tbeginc 0, 32767 # encoding: [0xe5,0x61,0x00,0x00,0x7f,0xff] -+#CHECK: tbeginc 0, 32768 # encoding: [0xe5,0x61,0x00,0x00,0x80,0x00] -+#CHECK: tbeginc 0, 65535 # encoding: [0xe5,0x61,0x00,0x00,0xff,0xff] -+#CHECK: tbeginc 0(%r1), 42 # encoding: [0xe5,0x61,0x10,0x00,0x00,0x2a] -+#CHECK: tbeginc 0(%r15), 42 # encoding: [0xe5,0x61,0xf0,0x00,0x00,0x2a] -+#CHECK: tbeginc 4095(%r1), 42 # encoding: [0xe5,0x61,0x1f,0xff,0x00,0x2a] -+#CHECK: tbeginc 4095(%r15), 42 # encoding: [0xe5,0x61,0xff,0xff,0x00,0x2a] -+ -+ tbeginc 0, 0 -+ tbeginc 4095, 0 -+ tbeginc 0, 0 -+ tbeginc 0, 1 -+ tbeginc 0, 32767 -+ tbeginc 0, 32768 -+ tbeginc 0, 65535 -+ tbeginc 0(%r1), 42 -+ tbeginc 0(%r15), 42 -+ tbeginc 4095(%r1), 42 -+ tbeginc 4095(%r15), 42 -+ -+#CHECK: tend # encoding: [0xb2,0xf8,0x00,0x00] -+ -+ tend -Index: llvm-36/test/MC/SystemZ/tokens.s -=================================================================== ---- llvm-36.orig/test/MC/SystemZ/tokens.s -+++ llvm-36/test/MC/SystemZ/tokens.s -@@ -13,10 +13,16 @@ - #CHECK: foo 100(200,%r0), 300 - #CHECK: error: invalid instruction - #CHECK: foo 100(200,%r1), 300 --#CHECK: error: invalid operand -+#CHECK: error: invalid address register - #CHECK: foo 100(%a0), 200 - #CHECK: error: %r0 used in an address - #CHECK: foo 100(%r0), 200 -+#CHECK: error: %r0 used in an address -+#CHECK: foo 100(%v1,%r0), 200 -+#CHECK: error: invalid instruction -+#CHECK: foo 100(%v0,%r1), 200 -+#CHECK: error: invalid instruction -+#CHECK: foo 100(%v31), 200 - #CHECK: error: invalid operand - #CHECK: foo 100(%r1,%a0), 200 - #CHECK: error: %r0 used in an address -@@ -45,6 +51,12 @@ - #CHECK: foo %a15, 200 - #CHECK: error: invalid register - #CHECK: foo %a16, 200 -+#CHECK: error: invalid instruction -+#CHECK: foo %v0, 200 -+#CHECK: error: invalid instruction -+#CHECK: foo %v31, 200 -+#CHECK: error: invalid register -+#CHECK: foo %v32, 200 - #CHECK: error: invalid register - #CHECK: foo %c, 200 - #CHECK: error: invalid register -@@ -60,6 +72,9 @@ - foo 100(200,%r1), 300 - foo 100(%a0), 200 - foo 100(%r0), 200 -+ foo 100(%v1,%r0), 200 -+ foo 100(%v0,%r1), 200 -+ foo 100(%v31), 200 - foo 100(%r1,%a0), 200 - foo 100(%r1,%r0), 200 - foo 100(%r1,%r2, 200 -@@ -74,6 +89,9 @@ - foo %a0, 200 - foo %a15, 200 - foo %a16, 200 -+ foo %v0, 200 -+ foo %v31, 200 -+ foo %v32, 200 - foo %c, 200 - foo %, 200 - foo {, 200 diff --git a/SOURCES/make-llvm-snapshot.sh b/SOURCES/make-llvm-snapshot.sh deleted file mode 100755 index 40aa6c9..0000000 --- a/SOURCES/make-llvm-snapshot.sh +++ /dev/null @@ -1,13 +0,0 @@ -#!/bin/sh - -DIRNAME=llvm-$( date +%Y%m%d ) -URL=http://llvm.org/svn/llvm-project/llvm/branches/release_33/ -#URL=http://llvm.org/svn/llvm-project/llvm/trunk/ - -rm -rf $DIRNAME -svn co $URL $DIRNAME |& tail -1 > revision -mv revision $DIRNAME -rm -rf $DIRNAME/.svn - -tar Jcf $DIRNAME.tar.xz $DIRNAME -rm -rf $DIRNAME diff --git a/SPECS/llvm.spec b/SPECS/llvm.spec index 450f2b8..6952d0f 100644 --- a/SPECS/llvm.spec +++ b/SPECS/llvm.spec @@ -1,3 +1,10 @@ +# Components enabled if supported by target architecture: +%ifarch %ix86 x86_64 + %bcond_without gold +%else + %bcond_with gold +%endif + %if 0%{?rhel} == 6 %define rhel6 1 %endif @@ -6,45 +13,50 @@ # consequently we build swrast on them instead of llvmpipe. ExcludeArch: ppc s390 %{?rhel6:s390x} -#global svndate 20131023 -#global prerel rc4 - -Name: mesa-private-llvm -Version: 3.6.2 -Release: 2%{?prerel:.%prerel}%{?dist} -Summary: llvm engine for Mesa - -Group: System Environment/Libraries -License: NCSA -URL: http://llvm.org/ -Source0: http://llvm.org/releases/%{version}/%{?prerel}/llvm-%{version}%{?prerel}.src.tar.xz -#Source0: llvm-%{svndate}.tar.xz -Source1: make-llvm-snapshot.sh -# multilib fixes -Source2: llvm-Config-config.h -Source3: llvm-Config-llvm-config.h - -# Data files should be installed with timestamps preserved -Patch0: llvm-2.6-timestamp.patch - -# llvm Z13 backports (#1182150) -Patch1: llvm-z13-backports.patch -Patch2: llvm-3.6-large-struct-return.patch - -# llvm aarch64 bug fix (#1254386) -Patch10: 0001-AArch64-Fix-invalid-use-of-references-to-BuildMI.patch -# add model detection for skylake and broadwell -Patch11: llvm-3.6.2-nerf-skylake.patch - -BuildRequires: bison -BuildRequires: chrpath -BuildRequires: flex -BuildRequires: gcc-c++ >= 3.4 -BuildRequires: groff -BuildRequires: libtool-ltdl-devel -BuildRequires: zip -# for DejaGNU test suite -BuildRequires: dejagnu tcl-devel python +%ifarch s390x +%global host_target SystemZ +%endif +%ifarch ppc64 ppc64le +%global host_target PowerPC +%endif +%ifarch %ix86 x86_64 +%global host_target X86 +%endif +%ifarch aarch64 +%global host_target AArch64 +%endif +%ifarch %{arm} +%global host_target ARM +%endif + +%ifnarch s390x +%global amdgpu ;AMDGPU +%endif + +Name: mesa-private-llvm +Version: 3.8.1 +Release: 1%{?dist} +Summary: llvm engine for Mesa + +Group: System Environment/Libraries +License: NCSA +URL: http://llvm.org +Source0: http://llvm.org/releases/%{version}/llvm-%{version}.src.tar.xz +Source100: llvm-config.h + +# recognize s390 as SystemZ when configuring build +#Patch0: llvm-3.7.1-cmake-s390.patch + +Patch1: fix-cmake-include.patch +Patch2: llvm-3.8.1-rhel-7.3.patch + +BuildRequires: cmake +BuildRequires: zlib-devel +%if %{with gold} +BuildRequires: binutils-devel +%endif +BuildRequires: libstdc++-static +BuildRequires: python %description This package contains the LLVM-based runtime support for Mesa. It is not a @@ -52,98 +64,87 @@ fully-featured build of LLVM, and use by any package other than Mesa is not supported. %package devel -Summary: Libraries and header files for Mesa's llvm engine -Group: Development/Libraries -Requires: %{name}%{?_isa} = %{version}-%{release} -Requires: libstdc++-devel >= 3.4 +Summary: Libraries and header files for LLVM +Requires: %{name}%{?_isa} = %{version}-%{release} %description devel This package contains library and header files needed to build the LLVM support in Mesa. %prep -%setup -q -n llvm-%{version}%{?prerel}.src -rm -r -f tools/clang - -# llvm patches -%patch0 -p1 -b .timestamp -%patch1 -p1 -b .z13 -%patch2 -p1 -b .large-struct -%patch10 -p1 -b .aarch64-fix -%patch11 -p1 -b .skl-fix - -# fix ld search path -sed -i 's|/lib /usr/lib $lt_ld_extra|%{_libdir} $lt_ld_extra|' \ - ./configure - -# mangle the library name -sed -i 's|^LLVM_VERSION_SUFFIX=|&-mesa|' ./configure - -%ifnarch s390x -%define r600 ,r600 -%endif +%setup -q -n llvm-%{version}.src +#patch0 -p1 -b .s390 +%patch1 -p1 -b .fixinc +%patch2 -p1 %build -export CC=gcc -export CXX=g++ -%configure \ - --prefix=%{_prefix} \ - --libdir=%{_libdir} \ - --includedir=%{_includedir}/mesa-private \ - --with-extra-ld-options=-Wl,-Bsymbolic,--default-symver \ - --enable-targets=host%{?r600} \ - --enable-bindings=none \ - --enable-debug-runtime \ - --enable-jit \ - --enable-shared \ - --enable-optimized \ - --disable-clang-arcmt \ - --disable-clang-static-analyzer \ - --disable-clang-rewriter \ - --disable-assertions \ - --disable-docs \ - --disable-libffi \ - --disable-terminfo \ - --disable-timestamps \ - %{nil} - -# FIXME file this -# configure does not properly specify libdir or includedir -sed -i 's|(PROJ_prefix)/lib|(PROJ_prefix)/%{_lib}|g' Makefile.config -sed -i 's|(PROJ_prefix)/include|&/mesa-private|g' Makefile.config -#sed -i 's|LLVM_VERSION_SUFFIX := |& -mesa|g' Makefile.config - -# FIXME upstream need to fix this -# llvm-config.cpp hardcodes lib in it -sed -i 's|ActiveLibDir = ActivePrefix + "/lib"|ActiveLibDir = ActivePrefix + "/%{_lib}"|g' tools/llvm-config/llvm-config.cpp + sed -i 's|ActiveIncludeDir = ActivePrefix + "/include|&/mesa-private|g' tools/llvm-config/llvm-config.cpp -make %{_smp_mflags} VERBOSE=1 OPTIMIZE_OPTION="%{optflags} -fno-strict-aliasing" +mkdir -p _build +cd _build + +# force off shared libs as cmake macros turns it on. +%cmake .. \ + -DINCLUDE_INSTALL_DIR=%{_includedir}/mesa-private \ + -DLLVM_VERSION_SUFFIX="-mesa" \ + -DBUILD_SHARED_LIBS:BOOL=OFF \ + -DCMAKE_BUILD_TYPE=RelWithDebInfo \ + -DCMAKE_SHARED_LINKER_FLAGS="-Wl,-Bsymbolic -static-libstdc++" \ +%if 0%{?__isa_bits} == 64 + -DLLVM_LIBDIR_SUFFIX=64 \ +%else + -DLLVM_LIBDIR_SUFFIX= \ +%endif + \ + -DLLVM_TARGETS_TO_BUILD="%{host_target}%{?amdgpu}" \ + -DLLVM_ENABLE_LIBCXX:BOOL=OFF \ + -DLLVM_ENABLE_ZLIB:BOOL=ON \ + -DLLVM_ENABLE_FFI:BOOL=OFF \ + -DLLVM_ENABLE_RTTI:BOOL=OFF \ +%if %{with gold} + -DLLVM_BINUTILS_INCDIR=%{_includedir} \ +%endif + \ + -DLLVM_BUILD_RUNTIME:BOOL=ON \ + \ + -DLLVM_INCLUDE_TOOLS:BOOL=ON \ + -DLLVM_BUILD_TOOLS:BOOL=ON \ + \ + -DLLVM_INCLUDE_TESTS:BOOL=ON \ + -DLLVM_BUILD_TESTS:BOOL=ON \ + \ + -DLLVM_INCLUDE_EXAMPLES:BOOL=OFF \ + -DLLVM_BUILD_EXAMPLES:BOOL=OFF \ + \ + -DLLVM_INCLUDE_UTILS:BOOL=ON \ + -DLLVM_INSTALL_UTILS:BOOL=OFF \ + \ + -DLLVM_INCLUDE_DOCS:BOOL=OFF \ + -DLLVM_BUILD_DOCS:BOOL=OFF \ + -DLLVM_ENABLE_SPHINX:BOOL=OFF \ + -DLLVM_ENABLE_DOXYGEN:BOOL=OFF \ + \ + -DLLVM_BUILD_LLVM_DYLIB:BOOL=ON \ + -DLLVM_DYLIB_EXPORT_ALL:BOOL=ON \ + -DLLVM_LINK_LLVM_DYLIB:BOOL=ON \ + -DLLVM_BUILD_EXTERNAL_COMPILER_RT:BOOL=ON \ + -DLLVM_INSTALL_TOOLCHAIN_ONLY:BOOL=OFF + +make %{?_smp_mflags} VERBOSE=1 %install +cd _build make install DESTDIR=%{buildroot} -# rename the few binaries we're keeping -mv %{buildroot}%{_bindir}/llvm-config %{buildroot}%{_bindir}/%{name}-config-%{__isa_bits} - -pushd %{buildroot}%{_includedir}/mesa-private/llvm/Config -mv config.h config-%{__isa_bits}.h -cp -p %{SOURCE2} config.h -mv llvm-config.h llvm-config-%{__isa_bits}.h -cp -p %{SOURCE3} llvm-config.h -popd - -file %{buildroot}/%{_bindir}/* %{buildroot}/%{bindir}/*.so | \ - awk -F: '$2~/ELF/{print $1}' | \ - xargs -r chrpath -d - -# FIXME file this bug -sed -i 's,ABS_RUN_DIR/lib",ABS_RUN_DIR/%{_lib}/%{name}",' \ - %{buildroot}%{_bindir}/%{name}-config-%{__isa_bits} +# fix multi-lib +mv -v %{buildroot}%{_bindir}/llvm-config %{buildroot}%{_bindir}/%{name}-config-%{__isa_bits} +mv -v %{buildroot}%{_includedir}/mesa-private/llvm/Config/llvm-config{,-%{__isa_bits}}.h +install -m 0644 %{SOURCE100} %{buildroot}%{_includedir}/mesa-private/llvm/Config/llvm-config.h rm -f %{buildroot}%{_libdir}/*.a -rm -f %{buildroot}%{_libdir}/libLLVM-%{version}.so +rm -f %{buildroot}%{_libdir}/libLLVM.so # remove documentation makefiles: # they require the build directory to work @@ -155,103 +156,74 @@ ls %{buildroot}%{_libdir}/* | grep -v libLLVM | xargs rm -f rm -rf %{buildroot}%{_mandir}/man1 # RHEL: Strip out some headers Mesa doesn't need -rm -rf %{buildroot}%{_includedir}/mesa-private/llvm/{Analysis,Assembly} -rm -rf %{buildroot}%{_includedir}/mesa-private/llvm/{DebugInfo,Option} +rm -rf %{buildroot}%{_includedir}/mesa-private/llvm/{Assembly} +rm -rf %{buildroot}%{_includedir}/mesa-private/llvm/Option rm -rf %{buildroot}%{_includedir}/mesa-private/llvm/TableGen +rm -rf %{buildroot}%{_includedir}/llvm-c/lto.h # RHEL: Strip out cmake build foo rm -rf %{buildroot}%{_datadir}/llvm/cmake %check -# the Koji build server does not seem to have enough RAM -# for the default 16 threads - -# just log the results, don't fail the build -make check LIT_ARGS="-v -j4" | tee llvm-testlog-%{_arch}.txt +cd _build +# 3.8.1 note: skx failures are XFAIL. the skylake backport does not wire +# up AVX512 for skylake, but the tests are from code that expects that. +# safe to ignore. +make check-all || : %post -p /sbin/ldconfig %postun -p /sbin/ldconfig %files -%defattr(-,root,root,-) %doc LICENSE.TXT -%{_libdir}/libLLVM-3.6-mesa.so +%{_libdir}/libLLVM-3.8*-mesa.so %files devel -%defattr(-,root,root,-) %{_bindir}/%{name}-config-%{__isa_bits} %{_includedir}/mesa-private/llvm %{_includedir}/mesa-private/llvm-c %changelog -* Wed Oct 14 2015 Adam Jackson 3.6.2-2 -- Teach CPU detection about Skylake/Broadwell, treat them like Haswell - -* Mon Aug 24 2015 Dave Airlie 3.6.2-1 -- fix aarch64 bugs via 3.6.2 + patch - -* Tue Aug 18 2015 Adam Jackson 3.6.1-2 -- Fix large struct return on s390 - -* Tue May 26 2015 Dave Airlie 3.6.1-1 -- rebase to llvm 3.6.1 - -* Thu May 21 2015 Dave Airlie 3.6.0-3 -- backport llvm z13 support from IBM - -* Wed May 13 2015 Dave Airlie 3.6.0-2 -- mesa needs Object headers now. - -* Wed May 13 2015 Dave Airlie 3.6.0-1 -- llvm 3.6.0 final +* Wed Jul 13 2016 Adam Jackson - 3.8.1-1 +- Update to 3.8.1 +- Sync some x86 getHostCPUName updates from trunk -* Mon Feb 23 2015 Adam Jackson 3.6.0-0.1 -- llvm 3.6.0 rc4 +* Tue Jun 14 2016 Dave Airlie - 3.8.0-2 +- drop private cmake build -* Tue Sep 09 2014 Dave Airlie 3.5.0-1 -- llvm 3.5.0 final +* Thu Mar 10 2016 Dave Airlie 3.8.0-1 +- llvm 3.8.0 final release -* Wed Aug 27 2014 Adam Jackson 3.5.0-0.1.rc3 -- llvm 3.5.0 RC3 +* Thu Mar 03 2016 Dave Airlie 3.8.0-0.2 +- llvm 3.8.0 rc3 release -* Wed Aug 27 2014 Dave Airlie 3.4.2-1 -- llvm 3.4.2 for RHEL 7.1 +* Fri Feb 19 2016 Dave Airlie 3.8.0-0.1 +- llvm 3.8.0 rc2 release -* Tue Jan 28 2014 Adam Jackson 3.3-0.8.20131023 -- Disable %%check, only fails in places that don't matter to Mesa (#1028575) +* Tue Feb 16 2016 Dan HorĂ¡k 3.7.1-7 +- recognize s390 as SystemZ when configuring build -* Fri Jan 24 2014 Daniel Mach - 3.3-0.7.20131023 -- Mass rebuild 2014-01-24 +* Sat Feb 13 2016 Dave Airlie 3.7.1-6 +- export C++ API for mesa. -* Fri Dec 27 2013 Daniel Mach - 3.3-0.6.20131023 -- Mass rebuild 2013-12-27 +* Sat Feb 13 2016 Dave Airlie 3.7.1-5 +- reintroduce llvm-static, clang needs it currently. -* Wed Oct 23 2013 Jerome Glisse 3.3-0.5.20131023 -- 3.3.1 snapshot +* Fri Feb 12 2016 Dave Airlie 3.7.1-4 +- jump back to single llvm library, the split libs aren't working very well. -* Tue Aug 20 2013 Adam Jackson 3.3-0.4.rc3 -- Build with -fno-strict-aliasing +* Fri Feb 05 2016 Dave Airlie 3.7.1-3 +- add missing obsoletes (#1303497) -* Tue Jun 18 2013 Adam Jackson 3.3-0.3.rc3 -- Port to RHEL6 -- Don't bother building R600 on s390x +* Thu Feb 04 2016 Fedora Release Engineering - 3.7.1-2 +- Rebuilt for https://fedoraproject.org/wiki/Fedora_24_Mass_Rebuild -* Tue Jun 11 2013 Adam Jackson 3.3-0.2.rc3 -- 3.3 rc3 -- Drop tblgen -- Strip out some headers +* Thu Jan 07 2016 Jan Vcelak 3.7.1-1 +- new upstream release +- enable gold linker -* Tue May 14 2013 Adam Jackson 3.3-0.1.rc1 -- Update to 3.3 rc1 -- Move library to %%{_libdir} to avoid rpath headaches -- Link with -Bsymbolic and --default-symver -- --disable-libffi -- Misc spec cleanup +* Wed Nov 04 2015 Jan Vcelak 3.7.0-100 +- fix Requires for subpackages on the main package -* Wed Dec 05 2012 Adam Jackson 3.1-13 -- Forked spec for RHEL7 Mesa's private use - - no ocaml support - - no doxygen build - - no clang support - - no static archives - - no libraries, binaries, or manpages not needed by Mesa +* Tue Oct 06 2015 Jan Vcelak 3.7.0-100 +- initial version using cmake build system