From 003cedd10f98b0ff2abe2773f7a08da10d60f22f Mon Sep 17 00:00:00 2001 From: CentOS Sources Date: Nov 19 2015 15:45:07 +0000 Subject: import mesa-private-llvm-3.6.2-2.el7 --- diff --git a/.gitignore b/.gitignore index 5af8eef..7c6c75c 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1 @@ -SOURCES/llvm-3.5.0.src.tar.xz +SOURCES/llvm-3.6.2.src.tar.xz diff --git a/.mesa-private-llvm.metadata b/.mesa-private-llvm.metadata index 5cfad28..fb549fa 100644 --- a/.mesa-private-llvm.metadata +++ b/.mesa-private-llvm.metadata @@ -1 +1 @@ -58d817ac2ff573386941e7735d30702fe71267d5 SOURCES/llvm-3.5.0.src.tar.xz +7a00257eb2bc9431e4c77c3a36b033072c54bc7e SOURCES/llvm-3.6.2.src.tar.xz diff --git a/SOURCES/0001-AArch64-Fix-invalid-use-of-references-to-BuildMI.patch b/SOURCES/0001-AArch64-Fix-invalid-use-of-references-to-BuildMI.patch new file mode 100644 index 0000000..e4ea42c --- /dev/null +++ b/SOURCES/0001-AArch64-Fix-invalid-use-of-references-to-BuildMI.patch @@ -0,0 +1,48 @@ +From 5717e28019e7348a04f63dcf965121171da15c62 Mon Sep 17 00:00:00 2001 +From: James Molloy +Date: Thu, 16 Apr 2015 11:37:40 +0000 +Subject: [PATCH] [AArch64] Fix invalid use of references to BuildMI. + +This was found in GCC PR65773 (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=65773). + +We shouldn't be taking a reference to the temporary that BuildMI returns, we must copy it. + +git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@235088 91177308-0d34-0410-b5e6-96231b3b80d8 +--- + lib/Target/AArch64/AArch64InstrInfo.cpp | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/lib/Target/AArch64/AArch64InstrInfo.cpp b/lib/Target/AArch64/AArch64InstrInfo.cpp +index 8e0af2d..db231c4 100644 +--- a/lib/Target/AArch64/AArch64InstrInfo.cpp ++++ b/lib/Target/AArch64/AArch64InstrInfo.cpp +@@ -1526,7 +1526,7 @@ void AArch64InstrInfo::copyPhysRegTuple( + } + + for (; SubReg != End; SubReg += Incr) { +- const MachineInstrBuilder &MIB = BuildMI(MBB, I, DL, get(Opcode)); ++ const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode)); + AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI); + AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI); + AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI); +@@ -1904,7 +1904,7 @@ void AArch64InstrInfo::storeRegToStackSlot( + } + assert(Opc && "Unknown register class"); + +- const MachineInstrBuilder &MI = BuildMI(MBB, MBBI, DL, get(Opc)) ++ const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DL, get(Opc)) + .addReg(SrcReg, getKillRegState(isKill)) + .addFrameIndex(FI); + +@@ -2002,7 +2002,7 @@ void AArch64InstrInfo::loadRegFromStackSlot( + } + assert(Opc && "Unknown register class"); + +- const MachineInstrBuilder &MI = BuildMI(MBB, MBBI, DL, get(Opc)) ++ const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DL, get(Opc)) + .addReg(DestReg, getDefRegState(true)) + .addFrameIndex(FI); + if (Offset) +-- +2.4.3 + diff --git a/SOURCES/llvm-3.5.0-build-fix.patch b/SOURCES/llvm-3.5.0-build-fix.patch deleted file mode 100644 index da1eaec..0000000 --- a/SOURCES/llvm-3.5.0-build-fix.patch +++ /dev/null @@ -1,43 +0,0 @@ -Error.cpp:28:44: error: declaration of ‘virtual const char* {anonymous}::_object_error_category::name() const’ has a different exception specifier - const char *_object_error_category::name() const { - ^ -Error.cpp:23:15: error: from previous declaration ‘virtual const char* {anonymous}::_object_error_category::name() const noexcept (true)’ - const char* name() const LLVM_NOEXCEPT override; - ^ - -diff -up llvm.src/lib/Object/Error.cpp.jx llvm.src/lib/Object/Error.cpp ---- llvm.src/lib/Object/Error.cpp.jx 2014-06-13 11:36:17.000000000 -0400 -+++ llvm.src/lib/Object/Error.cpp 2014-08-11 13:11:46.135014527 -0400 -@@ -25,7 +25,7 @@ public: - }; - } - --const char *_object_error_category::name() const { -+const char *_object_error_category::name() const LLVM_NOEXCEPT { - return "llvm.object"; - } - -diff -up llvm.src/tools/llvm-readobj/Error.cpp.jx llvm.src/tools/llvm-readobj/Error.cpp ---- llvm.src/tools/llvm-readobj/Error.cpp.jx 2014-06-13 11:36:17.000000000 -0400 -+++ llvm.src/tools/llvm-readobj/Error.cpp 2014-08-11 13:49:16.624287424 -0400 -@@ -24,7 +24,7 @@ public: - }; - } // namespace - --const char *_readobj_error_category::name() const { -+const char *_readobj_error_category::name() const LLVM_NOEXCEPT { - return "llvm.readobj"; - } - -diff -up llvm.src/tools/obj2yaml/Error.cpp.jx llvm.src/tools/obj2yaml/Error.cpp ---- llvm.src/tools/obj2yaml/Error.cpp.jx 2014-06-13 11:36:17.000000000 -0400 -+++ llvm.src/tools/obj2yaml/Error.cpp 2014-08-11 14:04:05.841996088 -0400 -@@ -20,7 +20,7 @@ public: - }; - } // namespace - --const char *_obj2yaml_error_category::name() const { return "obj2yaml"; } -+const char *_obj2yaml_error_category::name() const LLVM_NOEXCEPT { return "obj2yaml"; } - - std::string _obj2yaml_error_category::message(int ev) const { - switch (static_cast(ev)) { diff --git a/SOURCES/llvm-3.6-large-struct-return.patch b/SOURCES/llvm-3.6-large-struct-return.patch new file mode 100644 index 0000000..d387539 --- /dev/null +++ b/SOURCES/llvm-3.6-large-struct-return.patch @@ -0,0 +1,368 @@ +------------------------------------------------------------------------ +r244889 | uweigand | 2015-08-13 15:37:06 +0200 (Thu, 13 Aug 2015) | 22 lines + +[SystemZ] Support large LLVM IR struct return values + +Recent mesa/llvmpipe crashes on SystemZ due to a failed assertion when +attempting to compile a routine with a return type of + { <4 x float>, <4 x float>, <4 x float>, <4 x float> } +on a system without vector instruction support. + +This is because after legalizing the vector type, we get a return value +consisting of 16 floats, which cannot all be returned in registers. + +Usually, what should happen in this case is that the target's CanLowerReturn +routine rejects the return type, in which case SelectionDAG falls back to +implementing a structure return in memory via implicit reference. + +However, the SystemZ target never actually implemented any CanLowerReturn +routine, and thus would accept any struct return type. + +This patch fixes the crash by implementing CanLowerReturn. As a side effect, +this also handles fp128 return values, fixing a todo that was noted in +SystemZCallingConv.td. + +Index: llvm-36/lib/Target/SystemZ/SystemZCallingConv.td +=================================================================== +--- llvm-36.orig/lib/Target/SystemZ/SystemZCallingConv.td ++++ llvm-36/lib/Target/SystemZ/SystemZCallingConv.td +@@ -53,10 +53,6 @@ def RetCC_SystemZ : CallingConv<[ + CCIfSubtarget<"hasVector()", + CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], + CCAssignToReg<[V24, V26, V28, V30, V25, V27, V29, V31]>>> +- +- // ABI-compliant code returns long double by reference, but that conversion +- // is left to higher-level code. Perhaps we could add an f128 definition +- // here for code that doesn't care about the ABI? + ]>; + + //===----------------------------------------------------------------------===// +Index: llvm-36/lib/Target/SystemZ/SystemZISelLowering.cpp +=================================================================== +--- llvm-36.orig/lib/Target/SystemZ/SystemZISelLowering.cpp ++++ llvm-36/lib/Target/SystemZ/SystemZISelLowering.cpp +@@ -1169,6 +1169,20 @@ SystemZTargetLowering::LowerCall(CallLow + return Chain; + } + ++bool SystemZTargetLowering:: ++CanLowerReturn(CallingConv::ID CallConv, ++ MachineFunction &MF, bool isVarArg, ++ const SmallVectorImpl &Outs, ++ LLVMContext &Context) const { ++ // Detect unsupported vector return types. ++ if (Subtarget.hasVector()) ++ VerifyVectorTypes(Outs); ++ ++ SmallVector RetLocs; ++ CCState RetCCInfo(CallConv, isVarArg, MF, RetLocs, Context); ++ return RetCCInfo.CheckReturn(Outs, RetCC_SystemZ); ++} ++ + SDValue + SystemZTargetLowering::LowerReturn(SDValue Chain, + CallingConv::ID CallConv, bool IsVarArg, +Index: llvm-36/lib/Target/SystemZ/SystemZISelLowering.h +=================================================================== +--- llvm-36.orig/lib/Target/SystemZ/SystemZISelLowering.h ++++ llvm-36/lib/Target/SystemZ/SystemZISelLowering.h +@@ -401,6 +401,10 @@ public: + SDValue LowerCall(CallLoweringInfo &CLI, + SmallVectorImpl &InVals) const override; + ++ bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, ++ bool isVarArg, ++ const SmallVectorImpl &Outs, ++ LLVMContext &Context) const override; + SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, + const SmallVectorImpl &Outs, + const SmallVectorImpl &OutVals, +Index: llvm-36/test/CodeGen/SystemZ/args-04.ll +=================================================================== +--- llvm-36.orig/test/CodeGen/SystemZ/args-04.ll ++++ llvm-36/test/CodeGen/SystemZ/args-04.ll +@@ -124,3 +124,17 @@ define void @f13(fp128 *%r2, i16 %r3, i3 + store fp128 %y, fp128 *%r2 + ret void + } ++ ++; Explicit fp128 return values are likewise passed indirectly. ++define fp128 @f14(fp128 %r3) { ++; CHECK-LABEL: f14: ++; CHECK: ld %f0, 0(%r3) ++; CHECK: ld %f2, 8(%r3) ++; CHECK: axbr %f0, %f0 ++; CHECK: std %f0, 0(%r2) ++; CHECK: std %f2, 8(%r2) ++; CHECK: br %r14 ++ %y = fadd fp128 %r3, %r3 ++ ret fp128 %y ++} ++ +Index: llvm-36/test/CodeGen/SystemZ/args-07.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/args-07.ll +@@ -0,0 +1,60 @@ ++; Test multiple return values (LLVM ABI extension) ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s ++ ++; Up to four integer return values fit into GPRs. ++define { i64, i64, i64, i64 } @f1() { ++; CHECK-LABEL: f1: ++; CHECK: lghi %r2, 0 ++; CHECK: lghi %r3, 1 ++; CHECK: lghi %r4, 2 ++; CHECK: lghi %r5, 3 ++; CHECK: br %r14 ++ ret { i64, i64, i64, i64 } { i64 0, i64 1, i64 2, i64 3 } ++} ++ ++; More than four integer return values use sret. ++define { i64, i64, i64, i64, i64 } @f2() { ++; CHECK-LABEL: f2: ++; CHECK: mvghi 32(%r2), 4 ++; CHECK: mvghi 24(%r2), 3 ++; CHECK: mvghi 16(%r2), 2 ++; CHECK: mvghi 8(%r2), 1 ++; CHECK: mvghi 0(%r2), 0 ++; CHECK: br %r14 ++ ret { i64, i64, i64, i64, i64 } { i64 0, i64 1, i64 2, i64 3, i64 4 } ++} ++ ++; Up to four floating-point return values fit into FPRs. ++define { double, double, double, double } @f3() { ++; CHECK-LABEL: f3: ++; CHECK: larl [[TMP:%r[0-5]]], .LCPI ++; CHECK: ldeb %f0, 0([[TMP]]) ++; CHECK: larl [[TMP:%r[0-5]]], .LCPI ++; CHECK: ldeb %f2, 0([[TMP]]) ++; CHECK: larl [[TMP:%r[0-5]]], .LCPI ++; CHECK: ldeb %f4, 0([[TMP]]) ++; CHECK: larl [[TMP:%r[0-5]]], .LCPI ++; CHECK: ldeb %f6, 0([[TMP]]) ++; CHECK: br %r14 ++ ret { double, double, double, double } ++ { double 1.0, double 2.0, double 3.0, double 4.0 } ++} ++ ++; More than four floating-point return values use sret. ++define { double, double, double, double, double } @f4() { ++; CHECK-LABEL: f4: ++; CHECK: llihh [[TMP:%r[0-5]]], 16404 ++; CHECK: stg [[TMP]], 32(%r2) ++; CHECK: llihh [[TMP:%r[0-5]]], 16400 ++; CHECK: stg [[TMP]], 24(%r2) ++; CHECK: llihh [[TMP:%r[0-5]]], 16392 ++; CHECK: stg [[TMP]], 16(%r2) ++; CHECK: llihh [[TMP:%r[0-5]]], 16384 ++; CHECK: stg [[TMP]], 8(%r2) ++; CHECK: llihh [[TMP:%r[0-5]]], 16368 ++; CHECK: stg [[TMP]], 0(%r2) ++; CHECK: br %r14 ++ ret { double, double, double, double, double } ++ { double 1.0, double 2.0, double 3.0, double 4.0, double 5.0 } ++} +Index: llvm-36/test/CodeGen/SystemZ/args-08.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/args-08.ll +@@ -0,0 +1,57 @@ ++; Test calling functions with multiple return values (LLVM ABI extension) ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s ++ ++; Up to four integer return values fit into GPRs. ++declare { i64, i64, i64, i64 } @bar1() ++ ++define i64 @f1() { ++; CHECK-LABEL: f1: ++; CHECK: brasl %r14, bar1 ++; CHECK: lgr %r2, %r5 ++; CHECK: br %r14 ++ %mret = call { i64, i64, i64, i64 } @bar1() ++ %ret = extractvalue { i64, i64, i64, i64 } %mret, 3 ++ ret i64 %ret ++} ++ ++; More than four integer return values use sret. ++declare { i64, i64, i64, i64, i64 } @bar2() ++ ++define i64 @f2() { ++; CHECK-LABEL: f2: ++; CHECK: la %r2, 160(%r15) ++; CHECK: brasl %r14, bar2 ++; CHECK: lg %r2, 192(%r15) ++; CHECK: br %r14 ++ %mret = call { i64, i64, i64, i64, i64 } @bar2() ++ %ret = extractvalue { i64, i64, i64, i64, i64 } %mret, 4 ++ ret i64 %ret ++} ++ ++; Up to four floating-point return values fit into GPRs. ++declare { double, double, double, double } @bar3() ++ ++define double @f3() { ++; CHECK-LABEL: f3: ++; CHECK: brasl %r14, bar3 ++; CHECK: ldr %f0, %f6 ++; CHECK: br %r14 ++ %mret = call { double, double, double, double } @bar3() ++ %ret = extractvalue { double, double, double, double } %mret, 3 ++ ret double %ret ++} ++ ++; More than four integer return values use sret. ++declare { double, double, double, double, double } @bar4() ++ ++define double @f4() { ++; CHECK-LABEL: f4: ++; CHECK: la %r2, 160(%r15) ++; CHECK: brasl %r14, bar4 ++; CHECK: ld %f0, 192(%r15) ++; CHECK: br %r14 ++ %mret = call { double, double, double, double, double } @bar4() ++ %ret = extractvalue { double, double, double, double, double } %mret, 4 ++ ret double %ret ++} +Index: llvm-36/test/CodeGen/SystemZ/vec-args-06.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-args-06.ll +@@ -0,0 +1,83 @@ ++; Test multiple return values (LLVM ABI extension) ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ++ ++; Up to eight vector return values fit into VRs. ++define { <2 x double>, <2 x double>, <2 x double>, <2 x double>, ++ <2 x double>, <2 x double>, <2 x double>, <2 x double> } @f1() { ++; CHECK-LABEL: f1: ++; CHECK: larl [[TMP:%r[0-5]]], .LCPI ++; CHECK: vl %v24, 0([[TMP]]) ++; CHECK: larl [[TMP:%r[0-5]]], .LCPI ++; CHECK: vl %v26, 0([[TMP]]) ++; CHECK: larl [[TMP:%r[0-5]]], .LCPI ++; CHECK: vl %v28, 0([[TMP]]) ++; CHECK: larl [[TMP:%r[0-5]]], .LCPI ++; CHECK: vl %v30, 0([[TMP]]) ++; CHECK: larl [[TMP:%r[0-5]]], .LCPI ++; CHECK: vl %v25, 0([[TMP]]) ++; CHECK: larl [[TMP:%r[0-5]]], .LCPI ++; CHECK: vl %v27, 0([[TMP]]) ++; CHECK: larl [[TMP:%r[0-5]]], .LCPI ++; CHECK: vl %v29, 0([[TMP]]) ++; CHECK: larl [[TMP:%r[0-5]]], .LCPI ++; CHECK: vl %v31, 0([[TMP]]) ++; CHECK: br %r14 ++ ret { <2 x double>, <2 x double>, <2 x double>, <2 x double>, ++ <2 x double>, <2 x double>, <2 x double>, <2 x double> } ++ { <2 x double> , ++ <2 x double> , ++ <2 x double> , ++ <2 x double> , ++ <2 x double> , ++ <2 x double> , ++ <2 x double> , ++ <2 x double> } ++} ++ ++; More than eight vector return values use sret. ++define { <2 x double>, <2 x double>, <2 x double>, <2 x double>, ++ <2 x double>, <2 x double>, <2 x double>, <2 x double>, ++ <2 x double> } @f2() { ++; CHECK-LABEL: f2: ++; CHECK: larl [[TMP:%r[0-5]]], .LCPI ++; CHECK: vl [[VTMP:%v[0-9]+]], 0([[TMP]]) ++; CHECK: vst [[VTMP]], 128(%r2) ++; CHECK: larl [[TMP:%r[0-5]]], .LCPI ++; CHECK: vl [[VTMP:%v[0-9]+]], 0([[TMP]]) ++; CHECK: vst [[VTMP]], 112(%r2) ++; CHECK: larl [[TMP:%r[0-5]]], .LCPI ++; CHECK: vl [[VTMP:%v[0-9]+]], 0([[TMP]]) ++; CHECK: vst [[VTMP]], 96(%r2) ++; CHECK: larl [[TMP:%r[0-5]]], .LCPI ++; CHECK: vl [[VTMP:%v[0-9]+]], 0([[TMP]]) ++; CHECK: vst [[VTMP]], 80(%r2) ++; CHECK: larl [[TMP:%r[0-5]]], .LCPI ++; CHECK: vl [[VTMP:%v[0-9]+]], 0([[TMP]]) ++; CHECK: vst [[VTMP]], 64(%r2) ++; CHECK: larl [[TMP:%r[0-5]]], .LCPI ++; CHECK: vl [[VTMP:%v[0-9]+]], 0([[TMP]]) ++; CHECK: vst [[VTMP]], 48(%r2) ++; CHECK: larl [[TMP:%r[0-5]]], .LCPI ++; CHECK: vl [[VTMP:%v[0-9]+]], 0([[TMP]]) ++; CHECK: vst [[VTMP]], 32(%r2) ++; CHECK: larl [[TMP:%r[0-5]]], .LCPI ++; CHECK: vl [[VTMP:%v[0-9]+]], 0([[TMP]]) ++; CHECK: vst [[VTMP]], 16(%r2) ++; CHECK: larl [[TMP:%r[0-5]]], .LCPI ++; CHECK: vl [[VTMP:%v[0-9]+]], 0([[TMP]]) ++; CHECK: vst [[VTMP]], 0(%r2) ++; CHECK: br %r14 ++ ret { <2 x double>, <2 x double>, <2 x double>, <2 x double>, ++ <2 x double>, <2 x double>, <2 x double>, <2 x double>, ++ <2 x double> } ++ { <2 x double> , ++ <2 x double> , ++ <2 x double> , ++ <2 x double> , ++ <2 x double> , ++ <2 x double> , ++ <2 x double> , ++ <2 x double> , ++ <2 x double> } ++} +Index: llvm-36/test/CodeGen/SystemZ/vec-args-07.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-args-07.ll +@@ -0,0 +1,47 @@ ++; Test calling functions with multiple return values (LLVM ABI extension) ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ++ ++; Up to eight vector return values fit into VRs. ++declare { <2 x double>, <2 x double>, <2 x double>, <2 x double>, ++ <2 x double>, <2 x double>, <2 x double>, <2 x double> } @bar1() ++ ++define <2 x double> @f1() { ++; CHECK-LABEL: f1: ++; CHECK: brasl %r14, bar1 ++; CHECK: vlr %v24, %v31 ++; CHECK: br %r14 ++ %mret = call { <2 x double>, <2 x double>, ++ <2 x double>, <2 x double>, ++ <2 x double>, <2 x double>, ++ <2 x double>, <2 x double> } @bar1() ++ %ret = extractvalue { <2 x double>, <2 x double>, ++ <2 x double>, <2 x double>, ++ <2 x double>, <2 x double>, ++ <2 x double>, <2 x double> } %mret, 7 ++ ret <2 x double> %ret ++} ++ ++; More than eight vector return values use sret. ++declare { <2 x double>, <2 x double>, <2 x double>, <2 x double>, ++ <2 x double>, <2 x double>, <2 x double>, <2 x double>, ++ <2 x double> } @bar2() ++ ++define <2 x double> @f2() { ++; CHECK-LABEL: f2: ++; CHECK: la %r2, 160(%r15) ++; CHECK: brasl %r14, bar2 ++; CHECK: vl %v24, 288(%r15) ++; CHECK: br %r14 ++ %mret = call { <2 x double>, <2 x double>, ++ <2 x double>, <2 x double>, ++ <2 x double>, <2 x double>, ++ <2 x double>, <2 x double>, ++ <2 x double> } @bar2() ++ %ret = extractvalue { <2 x double>, <2 x double>, ++ <2 x double>, <2 x double>, ++ <2 x double>, <2 x double>, ++ <2 x double>, <2 x double>, ++ <2 x double> } %mret, 8 ++ ret <2 x double> %ret ++} diff --git a/SOURCES/llvm-3.6.2-nerf-skylake.patch b/SOURCES/llvm-3.6.2-nerf-skylake.patch new file mode 100644 index 0000000..5f8c3c4 --- /dev/null +++ b/SOURCES/llvm-3.6.2-nerf-skylake.patch @@ -0,0 +1,28 @@ +Skylake Pentium has the charming property of not supporting AVX, and +getHostCPUName will return 'x86-64' since it doesn't know about skl at +all in 3.6.x. This confuses llvmpipe quite badly, as we'll emit SSE4.1 +intrinsics but llvm will think they're not valid, and we'll cough and +die with a "Cannot select" message. + +Fix this by treating Skylake (and Broadwell, which also isn't present +in 3.6) as if they were Haswell. This isn't quite what upstream does, +but upstream has changed this API a bit and introduced a getHostCPUFeatures +to complement it, and while it looks like a much better approach it's +quite a bit more invasive. + +diff -up llvm-3.6.2.src/lib/Support/Host.cpp.jx llvm-3.6.2.src/lib/Support/Host.cpp +--- llvm-3.6.2.src/lib/Support/Host.cpp.jx 2015-10-01 12:08:39.000000000 -0400 ++++ llvm-3.6.2.src/lib/Support/Host.cpp 2015-10-13 10:51:03.736425351 -0400 +@@ -362,6 +362,12 @@ StringRef sys::getHostCPUName() { + case 63: + case 69: + case 70: ++ // Broadwell: ++ case 61: ++ case 71: ++ // Skylake: ++ case 78: ++ case 94: + // Not all Haswell processors support AVX too (such as the Pentium + // versions instead of the i7 versions). + return HasAVX2 ? "core-avx2" : "corei7"; diff --git a/SOURCES/llvm-z13-backports.patch b/SOURCES/llvm-z13-backports.patch new file mode 100644 index 0000000..c6aebb4 --- /dev/null +++ b/SOURCES/llvm-z13-backports.patch @@ -0,0 +1,39981 @@ +This patch backports z13 support and a number of other SystemZ +enhancements to the LLVM 3.6 release branch. + +The patch consists of backports of the following mainline revisions: +229652, 229654, 229658, 233540, 233541, 233688, 233689, 233690, 233700, +233736, 233803, 236430, 236432, 236433, 236520, 236521, 236522, 236523, +236524, 236525, 236526, 236527, 236528, 236529, 236530 + +Index: llvm-36/include/llvm/IR/Intrinsics.td +=================================================================== +--- llvm-36.orig/include/llvm/IR/Intrinsics.td ++++ llvm-36/include/llvm/IR/Intrinsics.td +@@ -594,3 +594,4 @@ include "llvm/IR/IntrinsicsHexagon.td" + include "llvm/IR/IntrinsicsNVVM.td" + include "llvm/IR/IntrinsicsMips.td" + include "llvm/IR/IntrinsicsR600.td" ++include "llvm/IR/IntrinsicsSystemZ.td" +Index: llvm-36/include/llvm/IR/IntrinsicsSystemZ.td +=================================================================== +--- /dev/null ++++ llvm-36/include/llvm/IR/IntrinsicsSystemZ.td +@@ -0,0 +1,378 @@ ++//===- IntrinsicsSystemZ.td - Defines SystemZ intrinsics ---*- tablegen -*-===// ++// ++// The LLVM Compiler Infrastructure ++// ++// This file is distributed under the University of Illinois Open Source ++// License. See LICENSE.TXT for details. ++// ++//===----------------------------------------------------------------------===// ++// ++// This file defines all of the SystemZ-specific intrinsics. ++// ++//===----------------------------------------------------------------------===// ++ ++class SystemZUnaryConv ++ : GCCBuiltin<"__builtin_s390_" ## name>, ++ Intrinsic<[result], [arg], [IntrNoMem]>; ++ ++class SystemZUnary ++ : SystemZUnaryConv; ++ ++class SystemZUnaryConvCC ++ : Intrinsic<[result, llvm_i32_ty], [arg], [IntrNoMem]>; ++ ++class SystemZUnaryCC ++ : SystemZUnaryConvCC; ++ ++class SystemZBinaryConv ++ : GCCBuiltin<"__builtin_s390_" ## name>, ++ Intrinsic<[result], [arg, arg], [IntrNoMem]>; ++ ++class SystemZBinary ++ : SystemZBinaryConv; ++ ++class SystemZBinaryInt ++ : GCCBuiltin<"__builtin_s390_" ## name>, ++ Intrinsic<[type], [type, llvm_i32_ty], [IntrNoMem]>; ++ ++class SystemZBinaryConvCC ++ : Intrinsic<[result, llvm_i32_ty], [arg, arg], [IntrNoMem]>; ++ ++class SystemZBinaryConvIntCC ++ : Intrinsic<[result, llvm_i32_ty], [arg, llvm_i32_ty], [IntrNoMem]>; ++ ++class SystemZBinaryCC ++ : SystemZBinaryConvCC; ++ ++class SystemZTernaryConv ++ : GCCBuiltin<"__builtin_s390_" ## name>, ++ Intrinsic<[result], [arg, arg, result], [IntrNoMem]>; ++ ++class SystemZTernary ++ : SystemZTernaryConv; ++ ++class SystemZTernaryInt ++ : GCCBuiltin<"__builtin_s390_" ## name>, ++ Intrinsic<[type], [type, type, llvm_i32_ty], [IntrNoMem]>; ++ ++class SystemZTernaryIntCC ++ : Intrinsic<[type, llvm_i32_ty], [type, type, llvm_i32_ty], [IntrNoMem]>; ++ ++class SystemZQuaternaryInt ++ : GCCBuiltin<"__builtin_s390_" ## name>, ++ Intrinsic<[type], [type, type, type, llvm_i32_ty], [IntrNoMem]>; ++ ++class SystemZQuaternaryIntCC ++ : Intrinsic<[type, llvm_i32_ty], [type, type, type, llvm_i32_ty], ++ [IntrNoMem]>; ++ ++multiclass SystemZUnaryExtBHF { ++ def b : SystemZUnaryConv; ++ def h : SystemZUnaryConv; ++ def f : SystemZUnaryConv; ++} ++ ++multiclass SystemZUnaryExtBHWF { ++ def b : SystemZUnaryConv; ++ def hw : SystemZUnaryConv; ++ def f : SystemZUnaryConv; ++} ++ ++multiclass SystemZUnaryBHF { ++ def b : SystemZUnary; ++ def h : SystemZUnary; ++ def f : SystemZUnary; ++} ++ ++multiclass SystemZUnaryBHFG : SystemZUnaryBHF { ++ def g : SystemZUnary; ++} ++ ++multiclass SystemZUnaryCCBHF { ++ def bs : SystemZUnaryCC; ++ def hs : SystemZUnaryCC; ++ def fs : SystemZUnaryCC; ++} ++ ++multiclass SystemZBinaryTruncHFG { ++ def h : SystemZBinaryConv; ++ def f : SystemZBinaryConv; ++ def g : SystemZBinaryConv; ++} ++ ++multiclass SystemZBinaryTruncCCHFG { ++ def hs : SystemZBinaryConvCC; ++ def fs : SystemZBinaryConvCC; ++ def gs : SystemZBinaryConvCC; ++} ++ ++multiclass SystemZBinaryExtBHF { ++ def b : SystemZBinaryConv; ++ def h : SystemZBinaryConv; ++ def f : SystemZBinaryConv; ++} ++ ++multiclass SystemZBinaryExtBHFG : SystemZBinaryExtBHF { ++ def g : SystemZBinaryConv; ++} ++ ++multiclass SystemZBinaryBHF { ++ def b : SystemZBinary; ++ def h : SystemZBinary; ++ def f : SystemZBinary; ++} ++ ++multiclass SystemZBinaryBHFG : SystemZBinaryBHF { ++ def g : SystemZBinary; ++} ++ ++multiclass SystemZBinaryIntBHFG { ++ def b : SystemZBinaryInt; ++ def h : SystemZBinaryInt; ++ def f : SystemZBinaryInt; ++ def g : SystemZBinaryInt; ++} ++ ++multiclass SystemZBinaryCCBHF { ++ def bs : SystemZBinaryCC; ++ def hs : SystemZBinaryCC; ++ def fs : SystemZBinaryCC; ++} ++ ++multiclass SystemZCompareBHFG { ++ def bs : SystemZBinaryCC; ++ def hs : SystemZBinaryCC; ++ def fs : SystemZBinaryCC; ++ def gs : SystemZBinaryCC; ++} ++ ++multiclass SystemZTernaryExtBHF { ++ def b : SystemZTernaryConv; ++ def h : SystemZTernaryConv; ++ def f : SystemZTernaryConv; ++} ++ ++multiclass SystemZTernaryExtBHFG : SystemZTernaryExtBHF { ++ def g : SystemZTernaryConv; ++} ++ ++multiclass SystemZTernaryBHF { ++ def b : SystemZTernary; ++ def h : SystemZTernary; ++ def f : SystemZTernary; ++} ++ ++multiclass SystemZTernaryIntBHF { ++ def b : SystemZTernaryInt; ++ def h : SystemZTernaryInt; ++ def f : SystemZTernaryInt; ++} ++ ++multiclass SystemZTernaryIntCCBHF { ++ def bs : SystemZTernaryIntCC; ++ def hs : SystemZTernaryIntCC; ++ def fs : SystemZTernaryIntCC; ++} ++ ++multiclass SystemZQuaternaryIntBHF { ++ def b : SystemZQuaternaryInt; ++ def h : SystemZQuaternaryInt; ++ def f : SystemZQuaternaryInt; ++} ++ ++multiclass SystemZQuaternaryIntBHFG : SystemZQuaternaryIntBHF { ++ def g : SystemZQuaternaryInt; ++} ++ ++multiclass SystemZQuaternaryIntCCBHF { ++ def bs : SystemZQuaternaryIntCC; ++ def hs : SystemZQuaternaryIntCC; ++ def fs : SystemZQuaternaryIntCC; ++} ++ ++//===----------------------------------------------------------------------===// ++// ++// Transactional-execution intrinsics ++// ++//===----------------------------------------------------------------------===// ++ ++def llvm_ptr64_ty : LLVMPointerType; ++ ++let TargetPrefix = "s390" in { ++ def int_s390_tbegin : Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], ++ [IntrNoDuplicate]>; ++ ++ def int_s390_tbegin_nofloat : Intrinsic<[llvm_i32_ty], ++ [llvm_ptr_ty, llvm_i32_ty], ++ [IntrNoDuplicate]>; ++ ++ def int_s390_tbeginc : Intrinsic<[], [llvm_ptr_ty, llvm_i32_ty], ++ [IntrNoDuplicate]>; ++ ++ def int_s390_tabort : Intrinsic<[], [llvm_i64_ty], ++ [IntrNoReturn, Throws]>; ++ ++ def int_s390_tend : GCCBuiltin<"__builtin_tend">, ++ Intrinsic<[llvm_i32_ty], []>; ++ ++ def int_s390_etnd : GCCBuiltin<"__builtin_tx_nesting_depth">, ++ Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>; ++ ++ def int_s390_ntstg : Intrinsic<[], [llvm_i64_ty, llvm_ptr64_ty], ++ [IntrReadWriteArgMem]>; ++ ++ def int_s390_ppa_txassist : GCCBuiltin<"__builtin_tx_assist">, ++ Intrinsic<[], [llvm_i32_ty]>; ++} ++ ++//===----------------------------------------------------------------------===// ++// ++// Vector intrinsics ++// ++//===----------------------------------------------------------------------===// ++ ++let TargetPrefix = "s390" in { ++ def int_s390_lcbb : GCCBuiltin<"__builtin_s390_lcbb">, ++ Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], ++ [IntrNoMem]>; ++ ++ def int_s390_vlbb : GCCBuiltin<"__builtin_s390_vlbb">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_ptr_ty, llvm_i32_ty], ++ [IntrReadArgMem]>; ++ ++ def int_s390_vll : GCCBuiltin<"__builtin_s390_vll">, ++ Intrinsic<[llvm_v16i8_ty], [llvm_i32_ty, llvm_ptr_ty], ++ [IntrReadArgMem]>; ++ ++ def int_s390_vpdi : GCCBuiltin<"__builtin_s390_vpdi">, ++ Intrinsic<[llvm_v2i64_ty], ++ [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty], ++ [IntrNoMem]>; ++ ++ def int_s390_vperm : GCCBuiltin<"__builtin_s390_vperm">, ++ Intrinsic<[llvm_v16i8_ty], ++ [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty], ++ [IntrNoMem]>; ++ ++ defm int_s390_vpks : SystemZBinaryTruncHFG<"vpks">; ++ defm int_s390_vpks : SystemZBinaryTruncCCHFG; ++ ++ defm int_s390_vpkls : SystemZBinaryTruncHFG<"vpkls">; ++ defm int_s390_vpkls : SystemZBinaryTruncCCHFG; ++ ++ def int_s390_vstl : GCCBuiltin<"__builtin_s390_vstl">, ++ Intrinsic<[], [llvm_v16i8_ty, llvm_i32_ty, llvm_ptr_ty], ++ // In fact write-only but there's no property ++ // for that. ++ [IntrReadWriteArgMem]>; ++ ++ defm int_s390_vupl : SystemZUnaryExtBHWF<"vupl">; ++ defm int_s390_vupll : SystemZUnaryExtBHF<"vupll">; ++ ++ defm int_s390_vuph : SystemZUnaryExtBHF<"vuph">; ++ defm int_s390_vuplh : SystemZUnaryExtBHF<"vuplh">; ++ ++ defm int_s390_vacc : SystemZBinaryBHFG<"vacc">; ++ ++ def int_s390_vaq : SystemZBinary<"vaq", llvm_v16i8_ty>; ++ def int_s390_vacq : SystemZTernary<"vacq", llvm_v16i8_ty>; ++ def int_s390_vaccq : SystemZBinary<"vaccq", llvm_v16i8_ty>; ++ def int_s390_vacccq : SystemZTernary<"vacccq", llvm_v16i8_ty>; ++ ++ defm int_s390_vavg : SystemZBinaryBHFG<"vavg">; ++ defm int_s390_vavgl : SystemZBinaryBHFG<"vavgl">; ++ ++ def int_s390_vcksm : SystemZBinary<"vcksm", llvm_v4i32_ty>; ++ ++ defm int_s390_vgfm : SystemZBinaryExtBHFG<"vgfm">; ++ defm int_s390_vgfma : SystemZTernaryExtBHFG<"vgfma">; ++ ++ defm int_s390_vmah : SystemZTernaryBHF<"vmah">; ++ defm int_s390_vmalh : SystemZTernaryBHF<"vmalh">; ++ defm int_s390_vmae : SystemZTernaryExtBHF<"vmae">; ++ defm int_s390_vmale : SystemZTernaryExtBHF<"vmale">; ++ defm int_s390_vmao : SystemZTernaryExtBHF<"vmao">; ++ defm int_s390_vmalo : SystemZTernaryExtBHF<"vmalo">; ++ ++ defm int_s390_vmh : SystemZBinaryBHF<"vmh">; ++ defm int_s390_vmlh : SystemZBinaryBHF<"vmlh">; ++ defm int_s390_vme : SystemZBinaryExtBHF<"vme">; ++ defm int_s390_vmle : SystemZBinaryExtBHF<"vmle">; ++ defm int_s390_vmo : SystemZBinaryExtBHF<"vmo">; ++ defm int_s390_vmlo : SystemZBinaryExtBHF<"vmlo">; ++ ++ defm int_s390_verllv : SystemZBinaryBHFG<"verllv">; ++ defm int_s390_verll : SystemZBinaryIntBHFG<"verll">; ++ defm int_s390_verim : SystemZQuaternaryIntBHFG<"verim">; ++ ++ def int_s390_vsl : SystemZBinary<"vsl", llvm_v16i8_ty>; ++ def int_s390_vslb : SystemZBinary<"vslb", llvm_v16i8_ty>; ++ def int_s390_vsra : SystemZBinary<"vsra", llvm_v16i8_ty>; ++ def int_s390_vsrab : SystemZBinary<"vsrab", llvm_v16i8_ty>; ++ def int_s390_vsrl : SystemZBinary<"vsrl", llvm_v16i8_ty>; ++ def int_s390_vsrlb : SystemZBinary<"vsrlb", llvm_v16i8_ty>; ++ ++ def int_s390_vsldb : GCCBuiltin<"__builtin_s390_vsldb">, ++ Intrinsic<[llvm_v16i8_ty], ++ [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i32_ty], ++ [IntrNoMem]>; ++ ++ defm int_s390_vscbi : SystemZBinaryBHFG<"vscbi">; ++ ++ def int_s390_vsq : SystemZBinary<"vsq", llvm_v16i8_ty>; ++ def int_s390_vsbiq : SystemZTernary<"vsbiq", llvm_v16i8_ty>; ++ def int_s390_vscbiq : SystemZBinary<"vscbiq", llvm_v16i8_ty>; ++ def int_s390_vsbcbiq : SystemZTernary<"vsbcbiq", llvm_v16i8_ty>; ++ ++ def int_s390_vsumb : SystemZBinaryConv<"vsumb", llvm_v4i32_ty, llvm_v16i8_ty>; ++ def int_s390_vsumh : SystemZBinaryConv<"vsumh", llvm_v4i32_ty, llvm_v8i16_ty>; ++ ++ def int_s390_vsumgh : SystemZBinaryConv<"vsumgh", llvm_v2i64_ty, ++ llvm_v8i16_ty>; ++ def int_s390_vsumgf : SystemZBinaryConv<"vsumgf", llvm_v2i64_ty, ++ llvm_v4i32_ty>; ++ ++ def int_s390_vsumqf : SystemZBinaryConv<"vsumqf", llvm_v16i8_ty, ++ llvm_v4i32_ty>; ++ def int_s390_vsumqg : SystemZBinaryConv<"vsumqg", llvm_v16i8_ty, ++ llvm_v2i64_ty>; ++ ++ def int_s390_vtm : SystemZBinaryConv<"vtm", llvm_i32_ty, llvm_v16i8_ty>; ++ ++ defm int_s390_vceq : SystemZCompareBHFG<"vceq">; ++ defm int_s390_vch : SystemZCompareBHFG<"vch">; ++ defm int_s390_vchl : SystemZCompareBHFG<"vchl">; ++ ++ defm int_s390_vfae : SystemZTernaryIntBHF<"vfae">; ++ defm int_s390_vfae : SystemZTernaryIntCCBHF; ++ defm int_s390_vfaez : SystemZTernaryIntBHF<"vfaez">; ++ defm int_s390_vfaez : SystemZTernaryIntCCBHF; ++ ++ defm int_s390_vfee : SystemZBinaryBHF<"vfee">; ++ defm int_s390_vfee : SystemZBinaryCCBHF; ++ defm int_s390_vfeez : SystemZBinaryBHF<"vfeez">; ++ defm int_s390_vfeez : SystemZBinaryCCBHF; ++ ++ defm int_s390_vfene : SystemZBinaryBHF<"vfene">; ++ defm int_s390_vfene : SystemZBinaryCCBHF; ++ defm int_s390_vfenez : SystemZBinaryBHF<"vfenez">; ++ defm int_s390_vfenez : SystemZBinaryCCBHF; ++ ++ defm int_s390_vistr : SystemZUnaryBHF<"vistr">; ++ defm int_s390_vistr : SystemZUnaryCCBHF; ++ ++ defm int_s390_vstrc : SystemZQuaternaryIntBHF<"vstrc">; ++ defm int_s390_vstrc : SystemZQuaternaryIntCCBHF; ++ defm int_s390_vstrcz : SystemZQuaternaryIntBHF<"vstrcz">; ++ defm int_s390_vstrcz : SystemZQuaternaryIntCCBHF; ++ ++ def int_s390_vfcedbs : SystemZBinaryConvCC; ++ def int_s390_vfchdbs : SystemZBinaryConvCC; ++ def int_s390_vfchedbs : SystemZBinaryConvCC; ++ ++ def int_s390_vftcidb : SystemZBinaryConvIntCC; ++ ++ def int_s390_vfidb : Intrinsic<[llvm_v2f64_ty], ++ [llvm_v2f64_ty, llvm_i32_ty, llvm_i32_ty], ++ [IntrNoMem]>; ++} +Index: llvm-36/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +=================================================================== +--- llvm-36.orig/lib/CodeGen/SelectionDAG/DAGCombiner.cpp ++++ llvm-36/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +@@ -10496,18 +10496,13 @@ SDValue DAGCombiner::ReplaceExtractVecto + if (auto *ConstEltNo = dyn_cast(EltNo)) { + int Elt = ConstEltNo->getZExtValue(); + unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8; +- if (TLI.isBigEndian()) +- PtrOff = InVecVT.getSizeInBits() / 8 - PtrOff; + Offset = DAG.getConstant(PtrOff, PtrType); + MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff); + } else { ++ Offset = DAG.getZExtOrTrunc(EltNo, SDLoc(EVE), PtrType); + Offset = DAG.getNode( +- ISD::MUL, SDLoc(EVE), EltNo.getValueType(), EltNo, +- DAG.getConstant(VecEltVT.getStoreSize(), EltNo.getValueType())); +- if (TLI.isBigEndian()) +- Offset = DAG.getNode( +- ISD::SUB, SDLoc(EVE), EltNo.getValueType(), +- DAG.getConstant(InVecVT.getStoreSize(), EltNo.getValueType()), Offset); ++ ISD::MUL, SDLoc(EVE), PtrType, Offset, ++ DAG.getConstant(VecEltVT.getStoreSize(), PtrType)); + MPI = OriginalLoad->getPointerInfo(); + } + NewPtr = DAG.getNode(ISD::ADD, SDLoc(EVE), PtrType, NewPtr, Offset); +Index: llvm-36/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +=================================================================== +--- llvm-36.orig/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp ++++ llvm-36/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +@@ -2888,7 +2888,10 @@ static EVT FindMemType(SelectionDAG& DAG + unsigned MemVTWidth = MemVT.getSizeInBits(); + if (MemVT.getSizeInBits() <= WidenEltWidth) + break; +- if (TLI.isTypeLegal(MemVT) && (WidenWidth % MemVTWidth) == 0 && ++ auto Action = TLI.getTypeAction(*DAG.getContext(), MemVT); ++ if ((Action == TargetLowering::TypeLegal || ++ Action == TargetLowering::TypePromoteInteger) && ++ (WidenWidth % MemVTWidth) == 0 && + isPowerOf2_32(WidenWidth / MemVTWidth) && + (MemVTWidth <= Width || + (Align!=0 && MemVTWidth<=AlignInBits && MemVTWidth<=Width+WidenEx))) { +Index: llvm-36/lib/Support/Host.cpp +=================================================================== +--- llvm-36.orig/lib/Support/Host.cpp ++++ llvm-36/lib/Support/Host.cpp +@@ -655,6 +655,28 @@ StringRef sys::getHostCPUName() { + StringRef Str(buffer, CPUInfoSize); + SmallVector Lines; + Str.split(Lines, "\n"); ++ ++ // Look for the CPU features. ++ SmallVector CPUFeatures; ++ for (unsigned I = 0, E = Lines.size(); I != E; ++I) ++ if (Lines[I].startswith("features")) { ++ size_t Pos = Lines[I].find(":"); ++ if (Pos != StringRef::npos) { ++ Lines[I].drop_front(Pos + 1).split(CPUFeatures, " "); ++ break; ++ } ++ } ++ ++ // We need to check for the presence of vector support independently of ++ // the machine type, since we may only use the vector register set when ++ // supported by the kernel (and hypervisor). ++ bool HaveVectorSupport = false; ++ for (unsigned I = 0, E = CPUFeatures.size(); I != E; ++I) { ++ if (CPUFeatures[I] == "vx") ++ HaveVectorSupport = true; ++ } ++ ++ // Now check the processor machine type. + for (unsigned I = 0, E = Lines.size(); I != E; ++I) { + if (Lines[I].startswith("processor ")) { + size_t Pos = Lines[I].find("machine = "); +@@ -662,6 +684,8 @@ StringRef sys::getHostCPUName() { + Pos += sizeof("machine = ") - 1; + unsigned int Id; + if (!Lines[I].drop_front(Pos).getAsInteger(10, Id)) { ++ if (Id >= 2964 && HaveVectorSupport) ++ return "z13"; + if (Id >= 2827) + return "zEC12"; + if (Id >= 2817) +Index: llvm-36/lib/Support/Triple.cpp +=================================================================== +--- llvm-36.orig/lib/Support/Triple.cpp ++++ llvm-36/lib/Support/Triple.cpp +@@ -89,7 +89,7 @@ const char *Triple::getArchTypePrefix(Ar + case sparcv9: + case sparc: return "sparc"; + +- case systemz: return "systemz"; ++ case systemz: return "s390"; + + case x86: + case x86_64: return "x86"; +Index: llvm-36/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp +=================================================================== +--- llvm-36.orig/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp ++++ llvm-36/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp +@@ -39,13 +39,17 @@ enum RegisterKind { + ADDR64Reg, + FP32Reg, + FP64Reg, +- FP128Reg ++ FP128Reg, ++ VR32Reg, ++ VR64Reg, ++ VR128Reg + }; + + enum MemoryKind { + BDMem, + BDXMem, +- BDLMem ++ BDLMem, ++ BDVMem + }; + + class SystemZOperand : public MCParsedAsmOperand { +@@ -57,6 +61,7 @@ private: + KindReg, + KindAccessReg, + KindImm, ++ KindImmTLS, + KindMem + }; + +@@ -84,23 +89,31 @@ private: + }; + + // Base + Disp + Index, where Base and Index are LLVM registers or 0. +- // RegKind says what type the registers have (ADDR32Reg or ADDR64Reg). +- // Length is the operand length for D(L,B)-style operands, otherwise +- // it is null. ++ // MemKind says what type of memory this is and RegKind says what type ++ // the base register has (ADDR32Reg or ADDR64Reg). Length is the operand ++ // length for D(L,B)-style operands, otherwise it is null. + struct MemOp { +- unsigned Base : 8; +- unsigned Index : 8; +- unsigned RegKind : 8; +- unsigned Unused : 8; ++ unsigned Base : 12; ++ unsigned Index : 12; ++ unsigned MemKind : 4; ++ unsigned RegKind : 4; + const MCExpr *Disp; + const MCExpr *Length; + }; + ++ // Imm is an immediate operand, and Sym is an optional TLS symbol ++ // for use with a __tls_get_offset marker relocation. ++ struct ImmTLSOp { ++ const MCExpr *Imm; ++ const MCExpr *Sym; ++ }; ++ + union { + TokenOp Token; + RegOp Reg; + unsigned AccessReg; + const MCExpr *Imm; ++ ImmTLSOp ImmTLS; + MemOp Mem; + }; + +@@ -149,10 +162,11 @@ public: + return Op; + } + static std::unique_ptr +- createMem(RegisterKind RegKind, unsigned Base, const MCExpr *Disp, +- unsigned Index, const MCExpr *Length, SMLoc StartLoc, +- SMLoc EndLoc) { ++ createMem(MemoryKind MemKind, RegisterKind RegKind, unsigned Base, ++ const MCExpr *Disp, unsigned Index, const MCExpr *Length, ++ SMLoc StartLoc, SMLoc EndLoc) { + auto Op = make_unique(KindMem, StartLoc, EndLoc); ++ Op->Mem.MemKind = MemKind; + Op->Mem.RegKind = RegKind; + Op->Mem.Base = Base; + Op->Mem.Index = Index; +@@ -160,6 +174,14 @@ public: + Op->Mem.Length = Length; + return Op; + } ++ static std::unique_ptr ++ createImmTLS(const MCExpr *Imm, const MCExpr *Sym, ++ SMLoc StartLoc, SMLoc EndLoc) { ++ auto Op = make_unique(KindImmTLS, StartLoc, EndLoc); ++ Op->ImmTLS.Imm = Imm; ++ Op->ImmTLS.Sym = Sym; ++ return Op; ++ } + + // Token operands + bool isToken() const override { +@@ -200,24 +222,40 @@ public: + return Imm; + } + ++ // Immediate operands with optional TLS symbol. ++ bool isImmTLS() const { ++ return Kind == KindImmTLS; ++ } ++ + // Memory operands. + bool isMem() const override { + return Kind == KindMem; + } +- bool isMem(RegisterKind RegKind, MemoryKind MemKind) const { ++ bool isMem(MemoryKind MemKind) const { + return (Kind == KindMem && +- Mem.RegKind == RegKind && +- (MemKind == BDXMem || !Mem.Index) && +- (MemKind == BDLMem) == (Mem.Length != nullptr)); ++ (Mem.MemKind == MemKind || ++ // A BDMem can be treated as a BDXMem in which the index ++ // register field is 0. ++ (Mem.MemKind == BDMem && MemKind == BDXMem))); ++ } ++ bool isMem(MemoryKind MemKind, RegisterKind RegKind) const { ++ return isMem(MemKind) && Mem.RegKind == RegKind; + } +- bool isMemDisp12(RegisterKind RegKind, MemoryKind MemKind) const { +- return isMem(RegKind, MemKind) && inRange(Mem.Disp, 0, 0xfff); ++ bool isMemDisp12(MemoryKind MemKind, RegisterKind RegKind) const { ++ return isMem(MemKind, RegKind) && inRange(Mem.Disp, 0, 0xfff); + } +- bool isMemDisp20(RegisterKind RegKind, MemoryKind MemKind) const { +- return isMem(RegKind, MemKind) && inRange(Mem.Disp, -524288, 524287); ++ bool isMemDisp20(MemoryKind MemKind, RegisterKind RegKind) const { ++ return isMem(MemKind, RegKind) && inRange(Mem.Disp, -524288, 524287); + } + bool isMemDisp12Len8(RegisterKind RegKind) const { +- return isMemDisp12(RegKind, BDLMem) && inRange(Mem.Length, 1, 0x100); ++ return isMemDisp12(BDLMem, RegKind) && inRange(Mem.Length, 1, 0x100); ++ } ++ void addBDVAddrOperands(MCInst &Inst, unsigned N) const { ++ assert(N == 3 && "Invalid number of operands"); ++ assert(isMem(BDVMem) && "Invalid operand type"); ++ Inst.addOperand(MCOperand::CreateReg(Mem.Base)); ++ addExpr(Inst, Mem.Disp); ++ Inst.addOperand(MCOperand::CreateReg(Mem.Index)); + } + + // Override MCParsedAsmOperand. +@@ -242,24 +280,31 @@ public: + } + void addBDAddrOperands(MCInst &Inst, unsigned N) const { + assert(N == 2 && "Invalid number of operands"); +- assert(Kind == KindMem && Mem.Index == 0 && "Invalid operand type"); ++ assert(isMem(BDMem) && "Invalid operand type"); + Inst.addOperand(MCOperand::CreateReg(Mem.Base)); + addExpr(Inst, Mem.Disp); + } + void addBDXAddrOperands(MCInst &Inst, unsigned N) const { + assert(N == 3 && "Invalid number of operands"); +- assert(Kind == KindMem && "Invalid operand type"); ++ assert(isMem(BDXMem) && "Invalid operand type"); + Inst.addOperand(MCOperand::CreateReg(Mem.Base)); + addExpr(Inst, Mem.Disp); + Inst.addOperand(MCOperand::CreateReg(Mem.Index)); + } + void addBDLAddrOperands(MCInst &Inst, unsigned N) const { + assert(N == 3 && "Invalid number of operands"); +- assert(Kind == KindMem && "Invalid operand type"); ++ assert(isMem(BDLMem) && "Invalid operand type"); + Inst.addOperand(MCOperand::CreateReg(Mem.Base)); + addExpr(Inst, Mem.Disp); + addExpr(Inst, Mem.Length); + } ++ void addImmTLSOperands(MCInst &Inst, unsigned N) const { ++ assert(N == 2 && "Invalid number of operands"); ++ assert(Kind == KindImmTLS && "Invalid operand type"); ++ addExpr(Inst, ImmTLS.Imm); ++ if (ImmTLS.Sym) ++ addExpr(Inst, ImmTLS.Sym); ++ } + + // Used by the TableGen code to check for particular operand types. + bool isGR32() const { return isReg(GR32Reg); } +@@ -273,17 +318,26 @@ public: + bool isFP32() const { return isReg(FP32Reg); } + bool isFP64() const { return isReg(FP64Reg); } + bool isFP128() const { return isReg(FP128Reg); } +- bool isBDAddr32Disp12() const { return isMemDisp12(ADDR32Reg, BDMem); } +- bool isBDAddr32Disp20() const { return isMemDisp20(ADDR32Reg, BDMem); } +- bool isBDAddr64Disp12() const { return isMemDisp12(ADDR64Reg, BDMem); } +- bool isBDAddr64Disp20() const { return isMemDisp20(ADDR64Reg, BDMem); } +- bool isBDXAddr64Disp12() const { return isMemDisp12(ADDR64Reg, BDXMem); } +- bool isBDXAddr64Disp20() const { return isMemDisp20(ADDR64Reg, BDXMem); } ++ bool isVR32() const { return isReg(VR32Reg); } ++ bool isVR64() const { return isReg(VR64Reg); } ++ bool isVF128() const { return false; } ++ bool isVR128() const { return isReg(VR128Reg); } ++ bool isBDAddr32Disp12() const { return isMemDisp12(BDMem, ADDR32Reg); } ++ bool isBDAddr32Disp20() const { return isMemDisp20(BDMem, ADDR32Reg); } ++ bool isBDAddr64Disp12() const { return isMemDisp12(BDMem, ADDR64Reg); } ++ bool isBDAddr64Disp20() const { return isMemDisp20(BDMem, ADDR64Reg); } ++ bool isBDXAddr64Disp12() const { return isMemDisp12(BDXMem, ADDR64Reg); } ++ bool isBDXAddr64Disp20() const { return isMemDisp20(BDXMem, ADDR64Reg); } + bool isBDLAddr64Disp12Len8() const { return isMemDisp12Len8(ADDR64Reg); } ++ bool isBDVAddr64Disp12() const { return isMemDisp12(BDVMem, ADDR64Reg); } ++ bool isU1Imm() const { return isImm(0, 1); } ++ bool isU2Imm() const { return isImm(0, 3); } ++ bool isU3Imm() const { return isImm(0, 7); } + bool isU4Imm() const { return isImm(0, 15); } + bool isU6Imm() const { return isImm(0, 63); } + bool isU8Imm() const { return isImm(0, 255); } + bool isS8Imm() const { return isImm(-128, 127); } ++ bool isU12Imm() const { return isImm(0, 4095); } + bool isU16Imm() const { return isImm(0, 65535); } + bool isS16Imm() const { return isImm(-32768, 32767); } + bool isU32Imm() const { return isImm(0, (1LL << 32) - 1); } +@@ -300,6 +354,7 @@ private: + enum RegisterGroup { + RegGR, + RegFP, ++ RegV, + RegAccess + }; + struct Register { +@@ -318,12 +373,15 @@ private: + RegisterKind Kind); + + bool parseAddress(unsigned &Base, const MCExpr *&Disp, +- unsigned &Index, const MCExpr *&Length, ++ unsigned &Index, bool &IsVector, const MCExpr *&Length, + const unsigned *Regs, RegisterKind RegKind); + + OperandMatchResultTy parseAddress(OperandVector &Operands, +- const unsigned *Regs, RegisterKind RegKind, +- MemoryKind MemKind); ++ MemoryKind MemKind, const unsigned *Regs, ++ RegisterKind RegKind); ++ ++ OperandMatchResultTy parsePCRel(OperandVector &Operands, int64_t MinVal, ++ int64_t MaxVal, bool AllowTLS); + + bool parseOperand(OperandVector &Operands, StringRef Mnemonic); + +@@ -382,26 +440,45 @@ public: + OperandMatchResultTy parseFP128(OperandVector &Operands) { + return parseRegister(Operands, RegFP, SystemZMC::FP128Regs, FP128Reg); + } ++ OperandMatchResultTy parseVR32(OperandVector &Operands) { ++ return parseRegister(Operands, RegV, SystemZMC::VR32Regs, VR32Reg); ++ } ++ OperandMatchResultTy parseVR64(OperandVector &Operands) { ++ return parseRegister(Operands, RegV, SystemZMC::VR64Regs, VR64Reg); ++ } ++ OperandMatchResultTy parseVF128(OperandVector &Operands) { ++ llvm_unreachable("Shouldn't be used as an operand"); ++ } ++ OperandMatchResultTy parseVR128(OperandVector &Operands) { ++ return parseRegister(Operands, RegV, SystemZMC::VR128Regs, VR128Reg); ++ } + OperandMatchResultTy parseBDAddr32(OperandVector &Operands) { +- return parseAddress(Operands, SystemZMC::GR32Regs, ADDR32Reg, BDMem); ++ return parseAddress(Operands, BDMem, SystemZMC::GR32Regs, ADDR32Reg); + } + OperandMatchResultTy parseBDAddr64(OperandVector &Operands) { +- return parseAddress(Operands, SystemZMC::GR64Regs, ADDR64Reg, BDMem); ++ return parseAddress(Operands, BDMem, SystemZMC::GR64Regs, ADDR64Reg); + } + OperandMatchResultTy parseBDXAddr64(OperandVector &Operands) { +- return parseAddress(Operands, SystemZMC::GR64Regs, ADDR64Reg, BDXMem); ++ return parseAddress(Operands, BDXMem, SystemZMC::GR64Regs, ADDR64Reg); + } + OperandMatchResultTy parseBDLAddr64(OperandVector &Operands) { +- return parseAddress(Operands, SystemZMC::GR64Regs, ADDR64Reg, BDLMem); ++ return parseAddress(Operands, BDLMem, SystemZMC::GR64Regs, ADDR64Reg); ++ } ++ OperandMatchResultTy parseBDVAddr64(OperandVector &Operands) { ++ return parseAddress(Operands, BDVMem, SystemZMC::GR64Regs, ADDR64Reg); + } + OperandMatchResultTy parseAccessReg(OperandVector &Operands); +- OperandMatchResultTy parsePCRel(OperandVector &Operands, int64_t MinVal, +- int64_t MaxVal); + OperandMatchResultTy parsePCRel16(OperandVector &Operands) { +- return parsePCRel(Operands, -(1LL << 16), (1LL << 16) - 1); ++ return parsePCRel(Operands, -(1LL << 16), (1LL << 16) - 1, false); + } + OperandMatchResultTy parsePCRel32(OperandVector &Operands) { +- return parsePCRel(Operands, -(1LL << 32), (1LL << 32) - 1); ++ return parsePCRel(Operands, -(1LL << 32), (1LL << 32) - 1, false); ++ } ++ OperandMatchResultTy parsePCRelTLS16(OperandVector &Operands) { ++ return parsePCRel(Operands, -(1LL << 16), (1LL << 16) - 1, true); ++ } ++ OperandMatchResultTy parsePCRelTLS32(OperandVector &Operands) { ++ return parsePCRel(Operands, -(1LL << 32), (1LL << 32) - 1, true); + } + }; + } // end anonymous namespace +@@ -443,6 +520,8 @@ bool SystemZAsmParser::parseRegister(Reg + Reg.Group = RegGR; + else if (Prefix == 'f' && Reg.Num < 16) + Reg.Group = RegFP; ++ else if (Prefix == 'v' && Reg.Num < 32) ++ Reg.Group = RegV; + else if (Prefix == 'a' && Reg.Num < 16) + Reg.Group = RegAccess; + else +@@ -493,8 +572,8 @@ SystemZAsmParser::parseRegister(OperandV + // Regs maps asm register numbers to LLVM register numbers and RegKind + // says what kind of address register we're using (ADDR32Reg or ADDR64Reg). + bool SystemZAsmParser::parseAddress(unsigned &Base, const MCExpr *&Disp, +- unsigned &Index, const MCExpr *&Length, +- const unsigned *Regs, ++ unsigned &Index, bool &IsVector, ++ const MCExpr *&Length, const unsigned *Regs, + RegisterKind RegKind) { + // Parse the displacement, which must always be present. + if (getParser().parseExpression(Disp)) +@@ -503,6 +582,7 @@ bool SystemZAsmParser::parseAddress(unsi + // Parse the optional base and index. + Index = 0; + Base = 0; ++ IsVector = false; + Length = nullptr; + if (getLexer().is(AsmToken::LParen)) { + Parser.Lex(); +@@ -510,12 +590,23 @@ bool SystemZAsmParser::parseAddress(unsi + if (getLexer().is(AsmToken::Percent)) { + // Parse the first register and decide whether it's a base or an index. + Register Reg; +- if (parseRegister(Reg, RegGR, Regs, RegKind)) ++ if (parseRegister(Reg)) + return true; +- if (getLexer().is(AsmToken::Comma)) +- Index = Reg.Num; +- else +- Base = Reg.Num; ++ if (Reg.Group == RegV) { ++ // A vector index register. The base register is optional. ++ IsVector = true; ++ Index = SystemZMC::VR128Regs[Reg.Num]; ++ } else if (Reg.Group == RegGR) { ++ if (Reg.Num == 0) ++ return Error(Reg.StartLoc, "%r0 used in an address"); ++ // If the are two registers, the first one is the index and the ++ // second is the base. ++ if (getLexer().is(AsmToken::Comma)) ++ Index = Regs[Reg.Num]; ++ else ++ Base = Regs[Reg.Num]; ++ } else ++ return Error(Reg.StartLoc, "invalid address register"); + } else { + // Parse the length. + if (getParser().parseExpression(Length)) +@@ -542,37 +633,46 @@ bool SystemZAsmParser::parseAddress(unsi + // Parse a memory operand and add it to Operands. The other arguments + // are as above. + SystemZAsmParser::OperandMatchResultTy +-SystemZAsmParser::parseAddress(OperandVector &Operands, const unsigned *Regs, +- RegisterKind RegKind, MemoryKind MemKind) { ++SystemZAsmParser::parseAddress(OperandVector &Operands, MemoryKind MemKind, ++ const unsigned *Regs, RegisterKind RegKind) { + SMLoc StartLoc = Parser.getTok().getLoc(); + unsigned Base, Index; ++ bool IsVector; + const MCExpr *Disp; + const MCExpr *Length; +- if (parseAddress(Base, Disp, Index, Length, Regs, RegKind)) ++ if (parseAddress(Base, Disp, Index, IsVector, Length, Regs, RegKind)) + return MatchOperand_ParseFail; + +- if (Index && MemKind != BDXMem) +- { +- Error(StartLoc, "invalid use of indexed addressing"); +- return MatchOperand_ParseFail; +- } ++ if (IsVector && MemKind != BDVMem) { ++ Error(StartLoc, "invalid use of vector addressing"); ++ return MatchOperand_ParseFail; ++ } + +- if (Length && MemKind != BDLMem) +- { +- Error(StartLoc, "invalid use of length addressing"); +- return MatchOperand_ParseFail; +- } ++ if (!IsVector && MemKind == BDVMem) { ++ Error(StartLoc, "vector index required in address"); ++ return MatchOperand_ParseFail; ++ } + +- if (!Length && MemKind == BDLMem) +- { +- Error(StartLoc, "missing length in address"); +- return MatchOperand_ParseFail; +- } ++ if (Index && MemKind != BDXMem && MemKind != BDVMem) { ++ Error(StartLoc, "invalid use of indexed addressing"); ++ return MatchOperand_ParseFail; ++ } ++ ++ if (Length && MemKind != BDLMem) { ++ Error(StartLoc, "invalid use of length addressing"); ++ return MatchOperand_ParseFail; ++ } ++ ++ if (!Length && MemKind == BDLMem) { ++ Error(StartLoc, "missing length in address"); ++ return MatchOperand_ParseFail; ++ } + + SMLoc EndLoc = + SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); +- Operands.push_back(SystemZOperand::createMem(RegKind, Base, Disp, Index, +- Length, StartLoc, EndLoc)); ++ Operands.push_back(SystemZOperand::createMem(MemKind, RegKind, Base, Disp, ++ Index, Length, StartLoc, ++ EndLoc)); + return MatchOperand_Success; + } + +@@ -589,6 +689,8 @@ bool SystemZAsmParser::ParseRegister(uns + RegNo = SystemZMC::GR64Regs[Reg.Num]; + else if (Reg.Group == RegFP) + RegNo = SystemZMC::FP64Regs[Reg.Num]; ++ else if (Reg.Group == RegV) ++ RegNo = SystemZMC::VR128Regs[Reg.Num]; + else + // FIXME: Access registers aren't modelled as LLVM registers yet. + return Error(Reg.StartLoc, "invalid operand for instruction"); +@@ -661,8 +763,10 @@ bool SystemZAsmParser::parseOperand(Oper + // so we treat any plain expression as an immediate. + SMLoc StartLoc = Parser.getTok().getLoc(); + unsigned Base, Index; ++ bool IsVector; + const MCExpr *Expr, *Length; +- if (parseAddress(Base, Expr, Index, Length, SystemZMC::GR64Regs, ADDR64Reg)) ++ if (parseAddress(Base, Expr, Index, IsVector, Length, SystemZMC::GR64Regs, ++ ADDR64Reg)) + return true; + + SMLoc EndLoc = +@@ -743,7 +847,7 @@ SystemZAsmParser::parseAccessReg(Operand + + SystemZAsmParser::OperandMatchResultTy + SystemZAsmParser::parsePCRel(OperandVector &Operands, int64_t MinVal, +- int64_t MaxVal) { ++ int64_t MaxVal, bool AllowTLS) { + MCContext &Ctx = getContext(); + MCStreamer &Out = getStreamer(); + const MCExpr *Expr; +@@ -766,9 +870,54 @@ SystemZAsmParser::parsePCRel(OperandVect + Expr = Value == 0 ? Base : MCBinaryExpr::CreateAdd(Base, Expr, Ctx); + } + ++ // Optionally match :tls_gdcall: or :tls_ldcall: followed by a TLS symbol. ++ const MCExpr *Sym = nullptr; ++ if (AllowTLS && getLexer().is(AsmToken::Colon)) { ++ Parser.Lex(); ++ ++ if (Parser.getTok().isNot(AsmToken::Identifier)) { ++ Error(Parser.getTok().getLoc(), "unexpected token"); ++ return MatchOperand_ParseFail; ++ } ++ ++ MCSymbolRefExpr::VariantKind Kind = MCSymbolRefExpr::VK_None; ++ StringRef Name = Parser.getTok().getString(); ++ if (Name == "tls_gdcall") ++ Kind = MCSymbolRefExpr::VK_TLSGD; ++ else if (Name == "tls_ldcall") ++ Kind = MCSymbolRefExpr::VK_TLSLDM; ++ else { ++ Error(Parser.getTok().getLoc(), "unknown TLS tag"); ++ return MatchOperand_ParseFail; ++ } ++ Parser.Lex(); ++ ++ if (Parser.getTok().isNot(AsmToken::Colon)) { ++ Error(Parser.getTok().getLoc(), "unexpected token"); ++ return MatchOperand_ParseFail; ++ } ++ Parser.Lex(); ++ ++ if (Parser.getTok().isNot(AsmToken::Identifier)) { ++ Error(Parser.getTok().getLoc(), "unexpected token"); ++ return MatchOperand_ParseFail; ++ } ++ ++ StringRef Identifier = Parser.getTok().getString(); ++ Sym = MCSymbolRefExpr::Create(Ctx.GetOrCreateSymbol(Identifier), ++ Kind, Ctx); ++ Parser.Lex(); ++ } ++ + SMLoc EndLoc = + SMLoc::getFromPointer(Parser.getTok().getLoc().getPointer() - 1); +- Operands.push_back(SystemZOperand::createImm(Expr, StartLoc, EndLoc)); ++ ++ if (AllowTLS) ++ Operands.push_back(SystemZOperand::createImmTLS(Expr, Sym, ++ StartLoc, EndLoc)); ++ else ++ Operands.push_back(SystemZOperand::createImm(Expr, StartLoc, EndLoc)); ++ + return MatchOperand_Success; + } + +Index: llvm-36/lib/Target/SystemZ/CMakeLists.txt +=================================================================== +--- llvm-36.orig/lib/Target/SystemZ/CMakeLists.txt ++++ llvm-36/lib/Target/SystemZ/CMakeLists.txt +@@ -20,6 +20,7 @@ add_llvm_target(SystemZCodeGen + SystemZISelDAGToDAG.cpp + SystemZISelLowering.cpp + SystemZInstrInfo.cpp ++ SystemZLDCleanup.cpp + SystemZLongBranch.cpp + SystemZMachineFunctionInfo.cpp + SystemZMCInstLower.cpp +@@ -28,6 +29,7 @@ add_llvm_target(SystemZCodeGen + SystemZShortenInst.cpp + SystemZSubtarget.cpp + SystemZTargetMachine.cpp ++ SystemZTargetTransformInfo.cpp + ) + + add_subdirectory(AsmParser) +Index: llvm-36/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp +=================================================================== +--- llvm-36.orig/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp ++++ llvm-36/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp +@@ -47,8 +47,8 @@ extern "C" void LLVMInitializeSystemZDis + } + + static DecodeStatus decodeRegisterClass(MCInst &Inst, uint64_t RegNo, +- const unsigned *Regs) { +- assert(RegNo < 16 && "Invalid register"); ++ const unsigned *Regs, unsigned Size) { ++ assert(RegNo < Size && "Invalid register"); + RegNo = Regs[RegNo]; + if (RegNo == 0) + return MCDisassembler::Fail; +@@ -59,61 +59,81 @@ static DecodeStatus decodeRegisterClass( + static DecodeStatus DecodeGR32BitRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { +- return decodeRegisterClass(Inst, RegNo, SystemZMC::GR32Regs); ++ return decodeRegisterClass(Inst, RegNo, SystemZMC::GR32Regs, 16); + } + + static DecodeStatus DecodeGRH32BitRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { +- return decodeRegisterClass(Inst, RegNo, SystemZMC::GRH32Regs); ++ return decodeRegisterClass(Inst, RegNo, SystemZMC::GRH32Regs, 16); + } + + static DecodeStatus DecodeGR64BitRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { +- return decodeRegisterClass(Inst, RegNo, SystemZMC::GR64Regs); ++ return decodeRegisterClass(Inst, RegNo, SystemZMC::GR64Regs, 16); + } + + static DecodeStatus DecodeGR128BitRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { +- return decodeRegisterClass(Inst, RegNo, SystemZMC::GR128Regs); ++ return decodeRegisterClass(Inst, RegNo, SystemZMC::GR128Regs, 16); + } + + static DecodeStatus DecodeADDR64BitRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { +- return decodeRegisterClass(Inst, RegNo, SystemZMC::GR64Regs); ++ return decodeRegisterClass(Inst, RegNo, SystemZMC::GR64Regs, 16); + } + + static DecodeStatus DecodeFP32BitRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { +- return decodeRegisterClass(Inst, RegNo, SystemZMC::FP32Regs); ++ return decodeRegisterClass(Inst, RegNo, SystemZMC::FP32Regs, 16); + } + + static DecodeStatus DecodeFP64BitRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { +- return decodeRegisterClass(Inst, RegNo, SystemZMC::FP64Regs); ++ return decodeRegisterClass(Inst, RegNo, SystemZMC::FP64Regs, 16); + } + + static DecodeStatus DecodeFP128BitRegisterClass(MCInst &Inst, uint64_t RegNo, + uint64_t Address, + const void *Decoder) { +- return decodeRegisterClass(Inst, RegNo, SystemZMC::FP128Regs); ++ return decodeRegisterClass(Inst, RegNo, SystemZMC::FP128Regs, 16); ++} ++ ++static DecodeStatus DecodeVR32BitRegisterClass(MCInst &Inst, uint64_t RegNo, ++ uint64_t Address, ++ const void *Decoder) { ++ return decodeRegisterClass(Inst, RegNo, SystemZMC::VR32Regs, 32); ++} ++ ++static DecodeStatus DecodeVR64BitRegisterClass(MCInst &Inst, uint64_t RegNo, ++ uint64_t Address, ++ const void *Decoder) { ++ return decodeRegisterClass(Inst, RegNo, SystemZMC::VR64Regs, 32); ++} ++ ++static DecodeStatus DecodeVR128BitRegisterClass(MCInst &Inst, uint64_t RegNo, ++ uint64_t Address, ++ const void *Decoder) { ++ return decodeRegisterClass(Inst, RegNo, SystemZMC::VR128Regs, 32); + } + + template + static DecodeStatus decodeUImmOperand(MCInst &Inst, uint64_t Imm) { +- assert(isUInt(Imm) && "Invalid immediate"); ++ if (!isUInt(Imm)) ++ return MCDisassembler::Fail; + Inst.addOperand(MCOperand::CreateImm(Imm)); + return MCDisassembler::Success; + } + + template + static DecodeStatus decodeSImmOperand(MCInst &Inst, uint64_t Imm) { +- assert(isUInt(Imm) && "Invalid immediate"); ++ if (!isUInt(Imm)) ++ return MCDisassembler::Fail; + Inst.addOperand(MCOperand::CreateImm(SignExtend64(Imm))); + return MCDisassembler::Success; + } +@@ -124,6 +144,21 @@ static DecodeStatus decodeAccessRegOpera + return decodeUImmOperand<4>(Inst, Imm); + } + ++static DecodeStatus decodeU1ImmOperand(MCInst &Inst, uint64_t Imm, ++ uint64_t Address, const void *Decoder) { ++ return decodeUImmOperand<1>(Inst, Imm); ++} ++ ++static DecodeStatus decodeU2ImmOperand(MCInst &Inst, uint64_t Imm, ++ uint64_t Address, const void *Decoder) { ++ return decodeUImmOperand<2>(Inst, Imm); ++} ++ ++static DecodeStatus decodeU3ImmOperand(MCInst &Inst, uint64_t Imm, ++ uint64_t Address, const void *Decoder) { ++ return decodeUImmOperand<3>(Inst, Imm); ++} ++ + static DecodeStatus decodeU4ImmOperand(MCInst &Inst, uint64_t Imm, + uint64_t Address, const void *Decoder) { + return decodeUImmOperand<4>(Inst, Imm); +@@ -139,6 +174,11 @@ static DecodeStatus decodeU8ImmOperand(M + return decodeUImmOperand<8>(Inst, Imm); + } + ++static DecodeStatus decodeU12ImmOperand(MCInst &Inst, uint64_t Imm, ++ uint64_t Address, const void *Decoder) { ++ return decodeUImmOperand<12>(Inst, Imm); ++} ++ + static DecodeStatus decodeU16ImmOperand(MCInst &Inst, uint64_t Imm, + uint64_t Address, const void *Decoder) { + return decodeUImmOperand<16>(Inst, Imm); +@@ -240,6 +280,18 @@ static DecodeStatus decodeBDLAddr12Len8O + return MCDisassembler::Success; + } + ++static DecodeStatus decodeBDVAddr12Operand(MCInst &Inst, uint64_t Field, ++ const unsigned *Regs) { ++ uint64_t Index = Field >> 16; ++ uint64_t Base = (Field >> 12) & 0xf; ++ uint64_t Disp = Field & 0xfff; ++ assert(Index < 32 && "Invalid BDVAddr12"); ++ Inst.addOperand(MCOperand::CreateReg(Base == 0 ? 0 : Regs[Base])); ++ Inst.addOperand(MCOperand::CreateImm(Disp)); ++ Inst.addOperand(MCOperand::CreateReg(SystemZMC::VR128Regs[Index])); ++ return MCDisassembler::Success; ++} ++ + static DecodeStatus decodeBDAddr32Disp12Operand(MCInst &Inst, uint64_t Field, + uint64_t Address, + const void *Decoder) { +@@ -283,6 +335,12 @@ static DecodeStatus decodeBDLAddr64Disp1 + return decodeBDLAddr12Len8Operand(Inst, Field, SystemZMC::GR64Regs); + } + ++static DecodeStatus decodeBDVAddr64Disp12Operand(MCInst &Inst, uint64_t Field, ++ uint64_t Address, ++ const void *Decoder) { ++ return decodeBDVAddr12Operand(Inst, Field, SystemZMC::GR64Regs); ++} ++ + #include "SystemZGenDisassemblerTables.inc" + + DecodeStatus SystemZDisassembler::getInstruction(MCInst &MI, uint64_t &Size, +Index: llvm-36/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp +=================================================================== +--- llvm-36.orig/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp ++++ llvm-36/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.cpp +@@ -10,6 +10,7 @@ + #include "SystemZInstPrinter.h" + #include "llvm/MC/MCExpr.h" + #include "llvm/MC/MCInstrInfo.h" ++#include "llvm/MC/MCSymbol.h" + #include "llvm/Support/raw_ostream.h" + + using namespace llvm; +@@ -21,13 +22,17 @@ using namespace llvm; + void SystemZInstPrinter::printAddress(unsigned Base, int64_t Disp, + unsigned Index, raw_ostream &O) { + O << Disp; +- if (Base) { ++ if (Base || Index) { + O << '('; +- if (Index) +- O << '%' << getRegisterName(Index) << ','; +- O << '%' << getRegisterName(Base) << ')'; +- } else +- assert(!Index && "Shouldn't have an index without a base"); ++ if (Index) { ++ O << '%' << getRegisterName(Index); ++ if (Base) ++ O << ','; ++ } ++ if (Base) ++ O << '%' << getRegisterName(Base); ++ O << ')'; ++ } + } + + void SystemZInstPrinter::printOperand(const MCOperand &MO, raw_ostream &O) { +@@ -51,60 +56,78 @@ void SystemZInstPrinter::printRegName(ra + O << '%' << getRegisterName(RegNo); + } + +-void SystemZInstPrinter::printU4ImmOperand(const MCInst *MI, int OpNum, +- raw_ostream &O) { ++template ++void printUImmOperand(const MCInst *MI, int OpNum, raw_ostream &O) { + int64_t Value = MI->getOperand(OpNum).getImm(); +- assert(isUInt<4>(Value) && "Invalid u4imm argument"); ++ assert(isUInt(Value) && "Invalid uimm argument"); + O << Value; + } + +-void SystemZInstPrinter::printU6ImmOperand(const MCInst *MI, int OpNum, +- raw_ostream &O) { ++template ++void printSImmOperand(const MCInst *MI, int OpNum, raw_ostream &O) { + int64_t Value = MI->getOperand(OpNum).getImm(); +- assert(isUInt<6>(Value) && "Invalid u6imm argument"); ++ assert(isInt(Value) && "Invalid simm argument"); + O << Value; + } + ++void SystemZInstPrinter::printU1ImmOperand(const MCInst *MI, int OpNum, ++ raw_ostream &O) { ++ printUImmOperand<1>(MI, OpNum, O); ++} ++ ++void SystemZInstPrinter::printU2ImmOperand(const MCInst *MI, int OpNum, ++ raw_ostream &O) { ++ printUImmOperand<2>(MI, OpNum, O); ++} ++ ++void SystemZInstPrinter::printU3ImmOperand(const MCInst *MI, int OpNum, ++ raw_ostream &O) { ++ printUImmOperand<3>(MI, OpNum, O); ++} ++ ++void SystemZInstPrinter::printU4ImmOperand(const MCInst *MI, int OpNum, ++ raw_ostream &O) { ++ printUImmOperand<4>(MI, OpNum, O); ++} ++ ++void SystemZInstPrinter::printU6ImmOperand(const MCInst *MI, int OpNum, ++ raw_ostream &O) { ++ printUImmOperand<6>(MI, OpNum, O); ++} ++ + void SystemZInstPrinter::printS8ImmOperand(const MCInst *MI, int OpNum, + raw_ostream &O) { +- int64_t Value = MI->getOperand(OpNum).getImm(); +- assert(isInt<8>(Value) && "Invalid s8imm argument"); +- O << Value; ++ printSImmOperand<8>(MI, OpNum, O); + } + + void SystemZInstPrinter::printU8ImmOperand(const MCInst *MI, int OpNum, + raw_ostream &O) { +- int64_t Value = MI->getOperand(OpNum).getImm(); +- assert(isUInt<8>(Value) && "Invalid u8imm argument"); +- O << Value; ++ printUImmOperand<8>(MI, OpNum, O); ++} ++ ++void SystemZInstPrinter::printU12ImmOperand(const MCInst *MI, int OpNum, ++ raw_ostream &O) { ++ printUImmOperand<12>(MI, OpNum, O); + } + + void SystemZInstPrinter::printS16ImmOperand(const MCInst *MI, int OpNum, + raw_ostream &O) { +- int64_t Value = MI->getOperand(OpNum).getImm(); +- assert(isInt<16>(Value) && "Invalid s16imm argument"); +- O << Value; ++ printSImmOperand<16>(MI, OpNum, O); + } + + void SystemZInstPrinter::printU16ImmOperand(const MCInst *MI, int OpNum, + raw_ostream &O) { +- int64_t Value = MI->getOperand(OpNum).getImm(); +- assert(isUInt<16>(Value) && "Invalid u16imm argument"); +- O << Value; ++ printUImmOperand<16>(MI, OpNum, O); + } + + void SystemZInstPrinter::printS32ImmOperand(const MCInst *MI, int OpNum, + raw_ostream &O) { +- int64_t Value = MI->getOperand(OpNum).getImm(); +- assert(isInt<32>(Value) && "Invalid s32imm argument"); +- O << Value; ++ printSImmOperand<32>(MI, OpNum, O); + } + + void SystemZInstPrinter::printU32ImmOperand(const MCInst *MI, int OpNum, + raw_ostream &O) { +- int64_t Value = MI->getOperand(OpNum).getImm(); +- assert(isUInt<32>(Value) && "Invalid u32imm argument"); +- O << Value; ++ printUImmOperand<32>(MI, OpNum, O); + } + + void SystemZInstPrinter::printAccessRegOperand(const MCInst *MI, int OpNum, +@@ -124,6 +147,29 @@ void SystemZInstPrinter::printPCRelOpera + O << *MO.getExpr(); + } + ++void SystemZInstPrinter::printPCRelTLSOperand(const MCInst *MI, int OpNum, ++ raw_ostream &O) { ++ // Output the PC-relative operand. ++ printPCRelOperand(MI, OpNum, O); ++ ++ // Output the TLS marker if present. ++ if ((unsigned)OpNum + 1 < MI->getNumOperands()) { ++ const MCOperand &MO = MI->getOperand(OpNum + 1); ++ const MCSymbolRefExpr &refExp = cast(*MO.getExpr()); ++ switch (refExp.getKind()) { ++ case MCSymbolRefExpr::VK_TLSGD: ++ O << ":tls_gdcall:"; ++ break; ++ case MCSymbolRefExpr::VK_TLSLDM: ++ O << ":tls_ldcall:"; ++ break; ++ default: ++ llvm_unreachable("Unexpected symbol kind"); ++ } ++ O << refExp.getSymbol().getName(); ++ } ++} ++ + void SystemZInstPrinter::printOperand(const MCInst *MI, int OpNum, + raw_ostream &O) { + printOperand(MI->getOperand(OpNum), O); +@@ -153,6 +199,13 @@ void SystemZInstPrinter::printBDLAddrOpe + O << ')'; + } + ++void SystemZInstPrinter::printBDVAddrOperand(const MCInst *MI, int OpNum, ++ raw_ostream &O) { ++ printAddress(MI->getOperand(OpNum).getReg(), ++ MI->getOperand(OpNum + 1).getImm(), ++ MI->getOperand(OpNum + 2).getReg(), O); ++} ++ + void SystemZInstPrinter::printCond4Operand(const MCInst *MI, int OpNum, + raw_ostream &O) { + static const char *const CondNames[] = { +Index: llvm-36/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h +=================================================================== +--- llvm-36.orig/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h ++++ llvm-36/lib/Target/SystemZ/InstPrinter/SystemZInstPrinter.h +@@ -47,15 +47,21 @@ private: + void printBDAddrOperand(const MCInst *MI, int OpNum, raw_ostream &O); + void printBDXAddrOperand(const MCInst *MI, int OpNum, raw_ostream &O); + void printBDLAddrOperand(const MCInst *MI, int OpNum, raw_ostream &O); ++ void printBDVAddrOperand(const MCInst *MI, int OpNum, raw_ostream &O); ++ void printU1ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O); ++ void printU2ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O); ++ void printU3ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O); + void printU4ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O); + void printU6ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O); + void printS8ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O); + void printU8ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O); ++ void printU12ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O); + void printS16ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O); + void printU16ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O); + void printS32ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O); + void printU32ImmOperand(const MCInst *MI, int OpNum, raw_ostream &O); + void printPCRelOperand(const MCInst *MI, int OpNum, raw_ostream &O); ++ void printPCRelTLSOperand(const MCInst *MI, int OpNum, raw_ostream &O); + void printAccessRegOperand(const MCInst *MI, int OpNum, raw_ostream &O); + + // Print the mnemonic for a condition-code mask ("ne", "lh", etc.) +Index: llvm-36/lib/Target/SystemZ/LLVMBuild.txt +=================================================================== +--- llvm-36.orig/lib/Target/SystemZ/LLVMBuild.txt ++++ llvm-36/lib/Target/SystemZ/LLVMBuild.txt +@@ -31,5 +31,5 @@ has_jit = 1 + type = Library + name = SystemZCodeGen + parent = SystemZ +-required_libraries = AsmPrinter CodeGen Core MC SelectionDAG Support SystemZAsmPrinter SystemZDesc SystemZInfo Target ++required_libraries = Analysis AsmPrinter CodeGen Core MC SelectionDAG Support SystemZAsmPrinter SystemZDesc SystemZInfo Target + add_to_library_groups = SystemZ +Index: llvm-36/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp +=================================================================== +--- llvm-36.orig/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp ++++ llvm-36/lib/Target/SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp +@@ -27,9 +27,10 @@ static uint64_t extractBitsForFixup(MCFi + switch (unsigned(Kind)) { + case SystemZ::FK_390_PC16DBL: + case SystemZ::FK_390_PC32DBL: +- case SystemZ::FK_390_PLT16DBL: +- case SystemZ::FK_390_PLT32DBL: + return (int64_t)Value / 2; ++ ++ case SystemZ::FK_390_TLS_CALL: ++ return 0; + } + + llvm_unreachable("Unknown fixup kind!"); +@@ -72,8 +73,7 @@ SystemZMCAsmBackend::getFixupKindInfo(MC + const static MCFixupKindInfo Infos[SystemZ::NumTargetFixupKinds] = { + { "FK_390_PC16DBL", 0, 16, MCFixupKindInfo::FKF_IsPCRel }, + { "FK_390_PC32DBL", 0, 32, MCFixupKindInfo::FKF_IsPCRel }, +- { "FK_390_PLT16DBL", 0, 16, MCFixupKindInfo::FKF_IsPCRel }, +- { "FK_390_PLT32DBL", 0, 32, MCFixupKindInfo::FKF_IsPCRel } ++ { "FK_390_TLS_CALL", 0, 0, 0 } + }; + + if (Kind < FirstTargetFixupKind) +Index: llvm-36/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp +=================================================================== +--- llvm-36.orig/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp ++++ llvm-36/lib/Target/SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp +@@ -70,24 +70,43 @@ private: + uint64_t getBDLAddr12Len8Encoding(const MCInst &MI, unsigned OpNum, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const; ++ uint64_t getBDVAddr12Encoding(const MCInst &MI, unsigned OpNum, ++ SmallVectorImpl &Fixups, ++ const MCSubtargetInfo &STI) const; + + // Operand OpNum of MI needs a PC-relative fixup of kind Kind at + // Offset bytes from the start of MI. Add the fixup to Fixups + // and return the in-place addend, which since we're a RELA target +- // is always 0. ++ // is always 0. If AllowTLS is true and optional operand OpNum + 1 ++ // is present, also emit a TLS call fixup for it. + uint64_t getPCRelEncoding(const MCInst &MI, unsigned OpNum, + SmallVectorImpl &Fixups, +- unsigned Kind, int64_t Offset) const; ++ unsigned Kind, int64_t Offset, ++ bool AllowTLS) const; + + uint64_t getPC16DBLEncoding(const MCInst &MI, unsigned OpNum, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { +- return getPCRelEncoding(MI, OpNum, Fixups, SystemZ::FK_390_PC16DBL, 2); ++ return getPCRelEncoding(MI, OpNum, Fixups, ++ SystemZ::FK_390_PC16DBL, 2, false); + } + uint64_t getPC32DBLEncoding(const MCInst &MI, unsigned OpNum, + SmallVectorImpl &Fixups, + const MCSubtargetInfo &STI) const { +- return getPCRelEncoding(MI, OpNum, Fixups, SystemZ::FK_390_PC32DBL, 2); ++ return getPCRelEncoding(MI, OpNum, Fixups, ++ SystemZ::FK_390_PC32DBL, 2, false); ++ } ++ uint64_t getPC16DBLTLSEncoding(const MCInst &MI, unsigned OpNum, ++ SmallVectorImpl &Fixups, ++ const MCSubtargetInfo &STI) const { ++ return getPCRelEncoding(MI, OpNum, Fixups, ++ SystemZ::FK_390_PC16DBL, 2, true); ++ } ++ uint64_t getPC32DBLTLSEncoding(const MCInst &MI, unsigned OpNum, ++ SmallVectorImpl &Fixups, ++ const MCSubtargetInfo &STI) const { ++ return getPCRelEncoding(MI, OpNum, Fixups, ++ SystemZ::FK_390_PC32DBL, 2, true); + } + }; + } // end anonymous namespace +@@ -178,10 +197,22 @@ getBDLAddr12Len8Encoding(const MCInst &M + return (Len << 16) | (Base << 12) | Disp; + } + ++uint64_t SystemZMCCodeEmitter:: ++getBDVAddr12Encoding(const MCInst &MI, unsigned OpNum, ++ SmallVectorImpl &Fixups, ++ const MCSubtargetInfo &STI) const { ++ uint64_t Base = getMachineOpValue(MI, MI.getOperand(OpNum), Fixups, STI); ++ uint64_t Disp = getMachineOpValue(MI, MI.getOperand(OpNum + 1), Fixups, STI); ++ uint64_t Index = getMachineOpValue(MI, MI.getOperand(OpNum + 2), Fixups, STI); ++ assert(isUInt<4>(Base) && isUInt<12>(Disp) && isUInt<5>(Index)); ++ return (Index << 16) | (Base << 12) | Disp; ++} ++ + uint64_t + SystemZMCCodeEmitter::getPCRelEncoding(const MCInst &MI, unsigned OpNum, + SmallVectorImpl &Fixups, +- unsigned Kind, int64_t Offset) const { ++ unsigned Kind, int64_t Offset, ++ bool AllowTLS) const { + const MCOperand &MO = MI.getOperand(OpNum); + const MCExpr *Expr; + if (MO.isImm()) +@@ -198,6 +229,13 @@ SystemZMCCodeEmitter::getPCRelEncoding(c + } + } + Fixups.push_back(MCFixup::Create(Offset, Expr, (MCFixupKind)Kind)); ++ ++ // Output the fixup for the TLS marker if present. ++ if (AllowTLS && OpNum + 1 < MI.getNumOperands()) { ++ const MCOperand &MOTLS = MI.getOperand(OpNum + 1); ++ Fixups.push_back(MCFixup::Create(0, MOTLS.getExpr(), ++ (MCFixupKind)SystemZ::FK_390_TLS_CALL)); ++ } + return 0; + } + +Index: llvm-36/lib/Target/SystemZ/MCTargetDesc/SystemZMCFixups.h +=================================================================== +--- llvm-36.orig/lib/Target/SystemZ/MCTargetDesc/SystemZMCFixups.h ++++ llvm-36/lib/Target/SystemZ/MCTargetDesc/SystemZMCFixups.h +@@ -18,8 +18,7 @@ enum FixupKind { + // These correspond directly to R_390_* relocations. + FK_390_PC16DBL = FirstTargetFixupKind, + FK_390_PC32DBL, +- FK_390_PLT16DBL, +- FK_390_PLT32DBL, ++ FK_390_TLS_CALL, + + // Marker + LastTargetFixupKind, +Index: llvm-36/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp +=================================================================== +--- llvm-36.orig/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp ++++ llvm-36/lib/Target/SystemZ/MCTargetDesc/SystemZMCObjectWriter.cpp +@@ -55,8 +55,6 @@ static unsigned getPCRelReloc(unsigned K + case FK_Data_8: return ELF::R_390_PC64; + case SystemZ::FK_390_PC16DBL: return ELF::R_390_PC16DBL; + case SystemZ::FK_390_PC32DBL: return ELF::R_390_PC32DBL; +- case SystemZ::FK_390_PLT16DBL: return ELF::R_390_PLT16DBL; +- case SystemZ::FK_390_PLT32DBL: return ELF::R_390_PLT32DBL; + } + llvm_unreachable("Unsupported PC-relative address"); + } +@@ -70,6 +68,35 @@ static unsigned getTLSLEReloc(unsigned K + llvm_unreachable("Unsupported absolute address"); + } + ++// Return the R_390_TLS_LDO* relocation type for MCFixupKind Kind. ++static unsigned getTLSLDOReloc(unsigned Kind) { ++ switch (Kind) { ++ case FK_Data_4: return ELF::R_390_TLS_LDO32; ++ case FK_Data_8: return ELF::R_390_TLS_LDO64; ++ } ++ llvm_unreachable("Unsupported absolute address"); ++} ++ ++// Return the R_390_TLS_LDM* relocation type for MCFixupKind Kind. ++static unsigned getTLSLDMReloc(unsigned Kind) { ++ switch (Kind) { ++ case FK_Data_4: return ELF::R_390_TLS_LDM32; ++ case FK_Data_8: return ELF::R_390_TLS_LDM64; ++ case SystemZ::FK_390_TLS_CALL: return ELF::R_390_TLS_LDCALL; ++ } ++ llvm_unreachable("Unsupported absolute address"); ++} ++ ++// Return the R_390_TLS_GD* relocation type for MCFixupKind Kind. ++static unsigned getTLSGDReloc(unsigned Kind) { ++ switch (Kind) { ++ case FK_Data_4: return ELF::R_390_TLS_GD32; ++ case FK_Data_8: return ELF::R_390_TLS_GD64; ++ case SystemZ::FK_390_TLS_CALL: return ELF::R_390_TLS_GDCALL; ++ } ++ llvm_unreachable("Unsupported absolute address"); ++} ++ + // Return the PLT relocation counterpart of MCFixupKind Kind. + static unsigned getPLTReloc(unsigned Kind) { + switch (Kind) { +@@ -94,6 +121,23 @@ unsigned SystemZObjectWriter::GetRelocTy + assert(!IsPCRel && "NTPOFF shouldn't be PC-relative"); + return getTLSLEReloc(Kind); + ++ case MCSymbolRefExpr::VK_INDNTPOFF: ++ if (IsPCRel && Kind == SystemZ::FK_390_PC32DBL) ++ return ELF::R_390_TLS_IEENT; ++ llvm_unreachable("Only PC-relative INDNTPOFF accesses are supported for now"); ++ ++ case MCSymbolRefExpr::VK_DTPOFF: ++ assert(!IsPCRel && "DTPOFF shouldn't be PC-relative"); ++ return getTLSLDOReloc(Kind); ++ ++ case MCSymbolRefExpr::VK_TLSLDM: ++ assert(!IsPCRel && "TLSLDM shouldn't be PC-relative"); ++ return getTLSLDMReloc(Kind); ++ ++ case MCSymbolRefExpr::VK_TLSGD: ++ assert(!IsPCRel && "TLSGD shouldn't be PC-relative"); ++ return getTLSGDReloc(Kind); ++ + case MCSymbolRefExpr::VK_GOT: + if (IsPCRel && Kind == SystemZ::FK_390_PC32DBL) + return ELF::R_390_GOTENT; +Index: llvm-36/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp +=================================================================== +--- llvm-36.orig/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp ++++ llvm-36/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp +@@ -76,6 +76,39 @@ const unsigned SystemZMC::FP128Regs[16] + SystemZ::F12Q, SystemZ::F13Q, 0, 0 + }; + ++const unsigned SystemZMC::VR32Regs[32] = { ++ SystemZ::F0S, SystemZ::F1S, SystemZ::F2S, SystemZ::F3S, ++ SystemZ::F4S, SystemZ::F5S, SystemZ::F6S, SystemZ::F7S, ++ SystemZ::F8S, SystemZ::F9S, SystemZ::F10S, SystemZ::F11S, ++ SystemZ::F12S, SystemZ::F13S, SystemZ::F14S, SystemZ::F15S, ++ SystemZ::F16S, SystemZ::F17S, SystemZ::F18S, SystemZ::F19S, ++ SystemZ::F20S, SystemZ::F21S, SystemZ::F22S, SystemZ::F23S, ++ SystemZ::F24S, SystemZ::F25S, SystemZ::F26S, SystemZ::F27S, ++ SystemZ::F28S, SystemZ::F29S, SystemZ::F30S, SystemZ::F31S ++}; ++ ++const unsigned SystemZMC::VR64Regs[32] = { ++ SystemZ::F0D, SystemZ::F1D, SystemZ::F2D, SystemZ::F3D, ++ SystemZ::F4D, SystemZ::F5D, SystemZ::F6D, SystemZ::F7D, ++ SystemZ::F8D, SystemZ::F9D, SystemZ::F10D, SystemZ::F11D, ++ SystemZ::F12D, SystemZ::F13D, SystemZ::F14D, SystemZ::F15D, ++ SystemZ::F16D, SystemZ::F17D, SystemZ::F18D, SystemZ::F19D, ++ SystemZ::F20D, SystemZ::F21D, SystemZ::F22D, SystemZ::F23D, ++ SystemZ::F24D, SystemZ::F25D, SystemZ::F26D, SystemZ::F27D, ++ SystemZ::F28D, SystemZ::F29D, SystemZ::F30D, SystemZ::F31D ++}; ++ ++const unsigned SystemZMC::VR128Regs[32] = { ++ SystemZ::V0, SystemZ::V1, SystemZ::V2, SystemZ::V3, ++ SystemZ::V4, SystemZ::V5, SystemZ::V6, SystemZ::V7, ++ SystemZ::V8, SystemZ::V9, SystemZ::V10, SystemZ::V11, ++ SystemZ::V12, SystemZ::V13, SystemZ::V14, SystemZ::V15, ++ SystemZ::V16, SystemZ::V17, SystemZ::V18, SystemZ::V19, ++ SystemZ::V20, SystemZ::V21, SystemZ::V22, SystemZ::V23, ++ SystemZ::V24, SystemZ::V25, SystemZ::V26, SystemZ::V27, ++ SystemZ::V28, SystemZ::V29, SystemZ::V30, SystemZ::V31 ++}; ++ + unsigned SystemZMC::getFirstReg(unsigned Reg) { + static unsigned Map[SystemZ::NUM_TARGET_REGS]; + static bool Initialized = false; +@@ -85,10 +118,13 @@ unsigned SystemZMC::getFirstReg(unsigned + Map[GRH32Regs[I]] = I; + Map[GR64Regs[I]] = I; + Map[GR128Regs[I]] = I; +- Map[FP32Regs[I]] = I; +- Map[FP64Regs[I]] = I; + Map[FP128Regs[I]] = I; + } ++ for (unsigned I = 0; I < 32; ++I) { ++ Map[VR32Regs[I]] = I; ++ Map[VR64Regs[I]] = I; ++ Map[VR128Regs[I]] = I; ++ } + } + assert(Reg < SystemZ::NUM_TARGET_REGS); + return Map[Reg]; +Index: llvm-36/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h +=================================================================== +--- llvm-36.orig/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h ++++ llvm-36/lib/Target/SystemZ/MCTargetDesc/SystemZMCTargetDesc.h +@@ -48,6 +48,9 @@ extern const unsigned GR128Regs[16]; + extern const unsigned FP32Regs[16]; + extern const unsigned FP64Regs[16]; + extern const unsigned FP128Regs[16]; ++extern const unsigned VR32Regs[32]; ++extern const unsigned VR64Regs[32]; ++extern const unsigned VR128Regs[32]; + + // Return the 0-based number of the first architectural register that + // contains the given LLVM register. E.g. R1D -> 1. +@@ -67,6 +70,11 @@ inline unsigned getRegAsGR32(unsigned Re + inline unsigned getRegAsGRH32(unsigned Reg) { + return GRH32Regs[getFirstReg(Reg)]; + } ++ ++// Return the given register as a VR128. ++inline unsigned getRegAsVR128(unsigned Reg) { ++ return VR128Regs[getFirstReg(Reg)]; ++} + } // end namespace SystemZMC + + MCCodeEmitter *createSystemZMCCodeEmitter(const MCInstrInfo &MCII, +Index: llvm-36/lib/Target/SystemZ/SystemZ.h +=================================================================== +--- llvm-36.orig/lib/Target/SystemZ/SystemZ.h ++++ llvm-36/lib/Target/SystemZ/SystemZ.h +@@ -21,6 +21,7 @@ + namespace llvm { + class SystemZTargetMachine; + class FunctionPass; ++class ImmutablePass; + + namespace SystemZ { + // Condition-code mask values. +@@ -68,6 +69,25 @@ const unsigned CCMASK_TM_MSB_0 = C + const unsigned CCMASK_TM_MSB_1 = CCMASK_2 | CCMASK_3; + const unsigned CCMASK_TM = CCMASK_ANY; + ++// Condition-code mask assignments for TRANSACTION_BEGIN. ++const unsigned CCMASK_TBEGIN_STARTED = CCMASK_0; ++const unsigned CCMASK_TBEGIN_INDETERMINATE = CCMASK_1; ++const unsigned CCMASK_TBEGIN_TRANSIENT = CCMASK_2; ++const unsigned CCMASK_TBEGIN_PERSISTENT = CCMASK_3; ++const unsigned CCMASK_TBEGIN = CCMASK_ANY; ++ ++// Condition-code mask assignments for TRANSACTION_END. ++const unsigned CCMASK_TEND_TX = CCMASK_0; ++const unsigned CCMASK_TEND_NOTX = CCMASK_2; ++const unsigned CCMASK_TEND = CCMASK_TEND_TX | CCMASK_TEND_NOTX; ++ ++// Condition-code mask assignments for vector comparisons (and similar ++// operations). ++const unsigned CCMASK_VCMP_ALL = CCMASK_0; ++const unsigned CCMASK_VCMP_MIXED = CCMASK_1; ++const unsigned CCMASK_VCMP_NONE = CCMASK_3; ++const unsigned CCMASK_VCMP = CCMASK_0 | CCMASK_1 | CCMASK_3; ++ + // The position of the low CC bit in an IPM result. + const unsigned IPM_CC = 28; + +@@ -75,6 +95,13 @@ const unsigned IPM_CC = 28; + const unsigned PFD_READ = 1; + const unsigned PFD_WRITE = 2; + ++// Number of bits in a vector register. ++const unsigned VectorBits = 128; ++ ++// Number of bytes in a vector register (and consequently the number of ++// bytes in a general permute vector). ++const unsigned VectorBytes = VectorBits / 8; ++ + // Return true if Val fits an LLILL operand. + static inline bool isImmLL(uint64_t Val) { + return (Val & ~0x000000000000ffffULL) == 0; +@@ -111,6 +138,9 @@ FunctionPass *createSystemZISelDag(Syste + FunctionPass *createSystemZElimComparePass(SystemZTargetMachine &TM); + FunctionPass *createSystemZShortenInstPass(SystemZTargetMachine &TM); + FunctionPass *createSystemZLongBranchPass(SystemZTargetMachine &TM); ++FunctionPass *createSystemZLDCleanupPass(SystemZTargetMachine &TM); ++ImmutablePass *createSystemZTargetTransformInfoPass( ++ const SystemZTargetMachine *TM); + } // end namespace llvm + + #endif +Index: llvm-36/lib/Target/SystemZ/SystemZ.td +=================================================================== +--- llvm-36.orig/lib/Target/SystemZ/SystemZ.td ++++ llvm-36/lib/Target/SystemZ/SystemZ.td +@@ -40,6 +40,7 @@ include "SystemZOperands.td" + include "SystemZPatterns.td" + include "SystemZInstrFormats.td" + include "SystemZInstrInfo.td" ++include "SystemZInstrVector.td" + include "SystemZInstrFP.td" + + def SystemZInstrInfo : InstrInfo {} +Index: llvm-36/lib/Target/SystemZ/SystemZAsmPrinter.cpp +=================================================================== +--- llvm-36.orig/lib/Target/SystemZ/SystemZAsmPrinter.cpp ++++ llvm-36/lib/Target/SystemZ/SystemZAsmPrinter.cpp +@@ -66,6 +66,41 @@ static MCInst lowerRIEfLow(const Machine + .addImm(MI->getOperand(5).getImm()); + } + ++static const MCSymbolRefExpr *getTLSGetOffset(MCContext &Context) { ++ StringRef Name = "__tls_get_offset"; ++ return MCSymbolRefExpr::Create(Context.GetOrCreateSymbol(Name), ++ MCSymbolRefExpr::VK_PLT, ++ Context); ++} ++ ++static const MCSymbolRefExpr *getGlobalOffsetTable(MCContext &Context) { ++ StringRef Name = "_GLOBAL_OFFSET_TABLE_"; ++ return MCSymbolRefExpr::Create(Context.GetOrCreateSymbol(Name), ++ MCSymbolRefExpr::VK_None, ++ Context); ++} ++ ++// MI loads the high part of a vector from memory. Return an instruction ++// that uses replicating vector load Opcode to do the same thing. ++static MCInst lowerSubvectorLoad(const MachineInstr *MI, unsigned Opcode) { ++ return MCInstBuilder(Opcode) ++ .addReg(SystemZMC::getRegAsVR128(MI->getOperand(0).getReg())) ++ .addReg(MI->getOperand(1).getReg()) ++ .addImm(MI->getOperand(2).getImm()) ++ .addReg(MI->getOperand(3).getReg()); ++} ++ ++// MI stores the high part of a vector to memory. Return an instruction ++// that uses elemental vector store Opcode to do the same thing. ++static MCInst lowerSubvectorStore(const MachineInstr *MI, unsigned Opcode) { ++ return MCInstBuilder(Opcode) ++ .addReg(SystemZMC::getRegAsVR128(MI->getOperand(0).getReg())) ++ .addReg(MI->getOperand(1).getReg()) ++ .addImm(MI->getOperand(2).getImm()) ++ .addReg(MI->getOperand(3).getReg()) ++ .addImm(0); ++} ++ + void SystemZAsmPrinter::EmitInstruction(const MachineInstr *MI) { + SystemZMCInstLower Lower(MF->getContext(), *this); + MCInst LoweredMI; +@@ -95,6 +130,26 @@ void SystemZAsmPrinter::EmitInstruction( + LoweredMI = MCInstBuilder(SystemZ::BR).addReg(SystemZ::R1D); + break; + ++ case SystemZ::TLS_GDCALL: ++ LoweredMI = MCInstBuilder(SystemZ::BRASL) ++ .addReg(SystemZ::R14D) ++ .addExpr(getTLSGetOffset(MF->getContext())) ++ .addExpr(Lower.getExpr(MI->getOperand(0), MCSymbolRefExpr::VK_TLSGD)); ++ break; ++ ++ case SystemZ::TLS_LDCALL: ++ LoweredMI = MCInstBuilder(SystemZ::BRASL) ++ .addReg(SystemZ::R14D) ++ .addExpr(getTLSGetOffset(MF->getContext())) ++ .addExpr(Lower.getExpr(MI->getOperand(0), MCSymbolRefExpr::VK_TLSLDM)); ++ break; ++ ++ case SystemZ::GOT: ++ LoweredMI = MCInstBuilder(SystemZ::LARL) ++ .addReg(MI->getOperand(0).getReg()) ++ .addExpr(getGlobalOffsetTable(MF->getContext())); ++ break; ++ + case SystemZ::IILF64: + LoweredMI = MCInstBuilder(SystemZ::IILF) + .addReg(SystemZMC::getRegAsGR32(MI->getOperand(0).getReg())) +@@ -117,6 +172,51 @@ void SystemZAsmPrinter::EmitInstruction( + LoweredMI = lowerRIEfLow(MI, SystemZ::RISBLG); + break; + ++ case SystemZ::VLVGP32: ++ LoweredMI = MCInstBuilder(SystemZ::VLVGP) ++ .addReg(MI->getOperand(0).getReg()) ++ .addReg(SystemZMC::getRegAsGR64(MI->getOperand(1).getReg())) ++ .addReg(SystemZMC::getRegAsGR64(MI->getOperand(2).getReg())); ++ break; ++ ++ case SystemZ::VLR32: ++ case SystemZ::VLR64: ++ LoweredMI = MCInstBuilder(SystemZ::VLR) ++ .addReg(SystemZMC::getRegAsVR128(MI->getOperand(0).getReg())) ++ .addReg(SystemZMC::getRegAsVR128(MI->getOperand(1).getReg())); ++ break; ++ ++ case SystemZ::VL32: ++ LoweredMI = lowerSubvectorLoad(MI, SystemZ::VLREPF); ++ break; ++ ++ case SystemZ::VL64: ++ LoweredMI = lowerSubvectorLoad(MI, SystemZ::VLREPG); ++ break; ++ ++ case SystemZ::VST32: ++ LoweredMI = lowerSubvectorStore(MI, SystemZ::VSTEF); ++ break; ++ ++ case SystemZ::VST64: ++ LoweredMI = lowerSubvectorStore(MI, SystemZ::VSTEG); ++ break; ++ ++ case SystemZ::LFER: ++ LoweredMI = MCInstBuilder(SystemZ::VLGVF) ++ .addReg(SystemZMC::getRegAsGR64(MI->getOperand(0).getReg())) ++ .addReg(SystemZMC::getRegAsVR128(MI->getOperand(1).getReg())) ++ .addReg(0).addImm(0); ++ break; ++ ++ case SystemZ::LEFR: ++ LoweredMI = MCInstBuilder(SystemZ::VLVGF) ++ .addReg(SystemZMC::getRegAsVR128(MI->getOperand(0).getReg())) ++ .addReg(SystemZMC::getRegAsVR128(MI->getOperand(0).getReg())) ++ .addReg(MI->getOperand(1).getReg()) ++ .addReg(0).addImm(0); ++ break; ++ + #define LOWER_LOW(NAME) \ + case SystemZ::NAME##64: LoweredMI = lowerRILow(MI, SystemZ::NAME); break + +@@ -172,6 +272,9 @@ void SystemZAsmPrinter::EmitInstruction( + static MCSymbolRefExpr::VariantKind + getModifierVariantKind(SystemZCP::SystemZCPModifier Modifier) { + switch (Modifier) { ++ case SystemZCP::TLSGD: return MCSymbolRefExpr::VK_TLSGD; ++ case SystemZCP::TLSLDM: return MCSymbolRefExpr::VK_TLSLDM; ++ case SystemZCP::DTPOFF: return MCSymbolRefExpr::VK_DTPOFF; + case SystemZCP::NTPOFF: return MCSymbolRefExpr::VK_NTPOFF; + } + llvm_unreachable("Invalid SystemCPModifier!"); +Index: llvm-36/lib/Target/SystemZ/SystemZCallingConv.h +=================================================================== +--- llvm-36.orig/lib/Target/SystemZ/SystemZCallingConv.h ++++ llvm-36/lib/Target/SystemZ/SystemZCallingConv.h +@@ -10,6 +10,9 @@ + #ifndef LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZCALLINGCONV_H + #define LLVM_LIB_TARGET_SYSTEMZ_SYSTEMZCALLINGCONV_H + ++#include "llvm/ADT/SmallVector.h" ++#include "llvm/CodeGen/CallingConvLower.h" ++ + namespace llvm { + namespace SystemZ { + const unsigned NumArgGPRs = 5; +@@ -18,6 +21,64 @@ namespace SystemZ { + const unsigned NumArgFPRs = 4; + extern const unsigned ArgFPRs[NumArgFPRs]; + } // end namespace SystemZ ++ ++class SystemZCCState : public CCState { ++private: ++ /// Records whether the value was a fixed argument. ++ /// See ISD::OutputArg::IsFixed. ++ SmallVector ArgIsFixed; ++ ++ /// Records whether the value was widened from a short vector type. ++ SmallVector ArgIsShortVector; ++ ++ // Check whether ArgVT is a short vector type. ++ bool IsShortVectorType(EVT ArgVT) { ++ return ArgVT.isVector() && ArgVT.getStoreSize() <= 8; ++ } ++ ++public: ++ SystemZCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF, ++ SmallVectorImpl &locs, LLVMContext &C) ++ : CCState(CC, isVarArg, MF, locs, C) {} ++ ++ void AnalyzeFormalArguments(const SmallVectorImpl &Ins, ++ CCAssignFn Fn) { ++ // Formal arguments are always fixed. ++ ArgIsFixed.clear(); ++ for (unsigned i = 0; i < Ins.size(); ++i) ++ ArgIsFixed.push_back(true); ++ // Record whether the call operand was a short vector. ++ ArgIsShortVector.clear(); ++ for (unsigned i = 0; i < Ins.size(); ++i) ++ ArgIsShortVector.push_back(IsShortVectorType(Ins[i].ArgVT)); ++ ++ CCState::AnalyzeFormalArguments(Ins, Fn); ++ } ++ ++ void AnalyzeCallOperands(const SmallVectorImpl &Outs, ++ CCAssignFn Fn) { ++ // Record whether the call operand was a fixed argument. ++ ArgIsFixed.clear(); ++ for (unsigned i = 0; i < Outs.size(); ++i) ++ ArgIsFixed.push_back(Outs[i].IsFixed); ++ // Record whether the call operand was a short vector. ++ ArgIsShortVector.clear(); ++ for (unsigned i = 0; i < Outs.size(); ++i) ++ ArgIsShortVector.push_back(IsShortVectorType(Outs[i].ArgVT)); ++ ++ CCState::AnalyzeCallOperands(Outs, Fn); ++ } ++ ++ // This version of AnalyzeCallOperands in the base class is not usable ++ // since we must provide a means of accessing ISD::OutputArg::IsFixed. ++ void AnalyzeCallOperands(const SmallVectorImpl &Outs, ++ SmallVectorImpl &Flags, ++ CCAssignFn Fn) = delete; ++ ++ bool IsFixed(unsigned ValNo) { return ArgIsFixed[ValNo]; } ++ bool IsShortVector(unsigned ValNo) { return ArgIsShortVector[ValNo]; } ++}; ++ + } // end namespace llvm + + #endif +Index: llvm-36/lib/Target/SystemZ/SystemZCallingConv.td +=================================================================== +--- llvm-36.orig/lib/Target/SystemZ/SystemZCallingConv.td ++++ llvm-36/lib/Target/SystemZ/SystemZCallingConv.td +@@ -12,6 +12,20 @@ + class CCIfExtend + : CCIf<"ArgFlags.isSExt() || ArgFlags.isZExt()", A>; + ++class CCIfSubtarget ++ : CCIf" ++ "(State.getMachineFunction().getSubtarget()).", F), ++ A>; ++ ++// Match if this specific argument is a fixed (i.e. named) argument. ++class CCIfFixed ++ : CCIf<"static_cast(&State)->IsFixed(ValNo)", A>; ++ ++// Match if this specific argument was widened from a short vector type. ++class CCIfShortVector ++ : CCIf<"static_cast(&State)->IsShortVector(ValNo)", A>; ++ ++ + //===----------------------------------------------------------------------===// + // z/Linux return value calling convention + //===----------------------------------------------------------------------===// +@@ -31,7 +45,14 @@ def RetCC_SystemZ : CallingConv<[ + // doesn't care about the ABI. All floating-point argument registers + // are call-clobbered, so we can use all of them here. + CCIfType<[f32], CCAssignToReg<[F0S, F2S, F4S, F6S]>>, +- CCIfType<[f64], CCAssignToReg<[F0D, F2D, F4D, F6D]>> ++ CCIfType<[f64], CCAssignToReg<[F0D, F2D, F4D, F6D]>>, ++ ++ // Similarly for vectors, with V24 being the ABI-compliant choice. ++ // Sub-128 vectors are returned in the same way, but they're widened ++ // to one of these types during type legalization. ++ CCIfSubtarget<"hasVector()", ++ CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], ++ CCAssignToReg<[V24, V26, V28, V30, V25, V27, V29, V31]>>> + + // ABI-compliant code returns long double by reference, but that conversion + // is left to higher-level code. Perhaps we could add an f128 definition +@@ -60,6 +81,25 @@ def CC_SystemZ : CallingConv<[ + CCIfType<[f32], CCAssignToReg<[F0S, F2S, F4S, F6S]>>, + CCIfType<[f64], CCAssignToReg<[F0D, F2D, F4D, F6D]>>, + ++ // The first 8 named vector arguments are passed in V24-V31. Sub-128 vectors ++ // are passed in the same way, but they're widened to one of these types ++ // during type legalization. ++ CCIfSubtarget<"hasVector()", ++ CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], ++ CCIfFixed>>>, ++ ++ // However, sub-128 vectors which need to go on the stack occupy just a ++ // single 8-byte-aligned 8-byte stack slot. Pass as i64. ++ CCIfSubtarget<"hasVector()", ++ CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], ++ CCIfShortVector>>>, ++ ++ // Other vector arguments are passed in 8-byte-aligned 16-byte stack slots. ++ CCIfSubtarget<"hasVector()", ++ CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], ++ CCAssignToStack<16, 8>>>, ++ + // Other arguments are passed in 8-byte-aligned 8-byte stack slots. + CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>> + ]>; +Index: llvm-36/lib/Target/SystemZ/SystemZConstantPoolValue.cpp +=================================================================== +--- llvm-36.orig/lib/Target/SystemZ/SystemZConstantPoolValue.cpp ++++ llvm-36/lib/Target/SystemZ/SystemZConstantPoolValue.cpp +@@ -28,6 +28,11 @@ SystemZConstantPoolValue::Create(const G + + unsigned SystemZConstantPoolValue::getRelocationInfo() const { + switch (Modifier) { ++ case SystemZCP::TLSGD: ++ case SystemZCP::TLSLDM: ++ case SystemZCP::DTPOFF: ++ // May require a dynamic relocation. ++ return 2; + case SystemZCP::NTPOFF: + // May require a relocation, but the relocations are always resolved + // by the static linker. +Index: llvm-36/lib/Target/SystemZ/SystemZConstantPoolValue.h +=================================================================== +--- llvm-36.orig/lib/Target/SystemZ/SystemZConstantPoolValue.h ++++ llvm-36/lib/Target/SystemZ/SystemZConstantPoolValue.h +@@ -19,13 +19,17 @@ class GlobalValue; + + namespace SystemZCP { + enum SystemZCPModifier { ++ TLSGD, ++ TLSLDM, ++ DTPOFF, + NTPOFF + }; + } // end namespace SystemZCP + + /// A SystemZ-specific constant pool value. At present, the only +-/// defined constant pool values are offsets of thread-local variables +-/// (written x@NTPOFF). ++/// defined constant pool values are module IDs or offsets of ++/// thread-local variables (written x@TLSGD, x@TLSLDM, x@DTPOFF, ++/// or x@NTPOFF). + class SystemZConstantPoolValue : public MachineConstantPoolValue { + const GlobalValue *GV; + SystemZCP::SystemZCPModifier Modifier; +Index: llvm-36/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp +=================================================================== +--- llvm-36.orig/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp ++++ llvm-36/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp +@@ -256,6 +256,13 @@ class SystemZDAGToDAGISel : public Selec + Addr, Base, Disp, Index); + } + ++ // Try to match Addr as an address with a base, 12-bit displacement ++ // and index, where the index is element Elem of a vector. ++ // Return true on success, storing the base, displacement and vector ++ // in Base, Disp and Index respectively. ++ bool selectBDVAddr12Only(SDValue Addr, SDValue Elem, SDValue &Base, ++ SDValue &Disp, SDValue &Index) const; ++ + // Check whether (or Op (and X InsertMask)) is effectively an insertion + // of X into bits InsertMask of some Y != Op. Return true if so and + // set Op to that Y. +@@ -293,6 +300,12 @@ class SystemZDAGToDAGISel : public Selec + SDNode *splitLargeImmediate(unsigned Opcode, SDNode *Node, SDValue Op0, + uint64_t UpperVal, uint64_t LowerVal); + ++ // Try to use gather instruction Opcode to implement vector insertion N. ++ SDNode *tryGather(SDNode *N, unsigned Opcode); ++ ++ // Try to use scatter instruction Opcode to implement store Store. ++ SDNode *tryScatter(StoreSDNode *Store, unsigned Opcode); ++ + // Return true if Load and Store are loads and stores of the same size + // and are guaranteed not to overlap. Such operations can be implemented + // using block (SS-format) instructions. +@@ -643,6 +656,30 @@ bool SystemZDAGToDAGISel::selectBDXAddr( + return true; + } + ++bool SystemZDAGToDAGISel::selectBDVAddr12Only(SDValue Addr, SDValue Elem, ++ SDValue &Base, ++ SDValue &Disp, ++ SDValue &Index) const { ++ SDValue Regs[2]; ++ if (selectBDXAddr12Only(Addr, Regs[0], Disp, Regs[1]) && ++ Regs[0].getNode() && Regs[1].getNode()) { ++ for (unsigned int I = 0; I < 2; ++I) { ++ Base = Regs[I]; ++ Index = Regs[1 - I]; ++ // We can't tell here whether the index vector has the right type ++ // for the access; the caller needs to do that instead. ++ if (Index.getOpcode() == ISD::ZERO_EXTEND) ++ Index = Index.getOperand(0); ++ if (Index.getOpcode() == ISD::EXTRACT_VECTOR_ELT && ++ Index.getOperand(1) == Elem) { ++ Index = Index.getOperand(0); ++ return true; ++ } ++ } ++ } ++ return false; ++} ++ + bool SystemZDAGToDAGISel::detectOrAndInsertion(SDValue &Op, + uint64_t InsertMask) const { + // We're only interested in cases where the insertion is into some operand +@@ -896,6 +933,9 @@ SDNode *SystemZDAGToDAGISel::tryRISBGZer + } + + unsigned Opcode = SystemZ::RISBG; ++ // Prefer RISBGN if available, since it does not clobber CC. ++ if (Subtarget.hasMiscellaneousExtensions()) ++ Opcode = SystemZ::RISBGN; + EVT OpcodeVT = MVT::i64; + if (VT == MVT::i32 && Subtarget.hasHighWord()) { + Opcode = SystemZ::RISBMux; +@@ -943,9 +983,13 @@ SDNode *SystemZDAGToDAGISel::tryRxSBG(SD + + // See whether we can avoid an AND in the first operand by converting + // ROSBG to RISBG. +- if (Opcode == SystemZ::ROSBG && detectOrAndInsertion(Op0, RxSBG[I].Mask)) ++ if (Opcode == SystemZ::ROSBG && detectOrAndInsertion(Op0, RxSBG[I].Mask)) { + Opcode = SystemZ::RISBG; +- ++ // Prefer RISBGN if available, since it does not clobber CC. ++ if (Subtarget.hasMiscellaneousExtensions()) ++ Opcode = SystemZ::RISBGN; ++ } ++ + EVT VT = N->getValueType(0); + SDValue Ops[5] = { + convertTo(SDLoc(N), MVT::i64, Op0), +@@ -973,6 +1017,71 @@ SDNode *SystemZDAGToDAGISel::splitLargeI + return Or.getNode(); + } + ++SDNode *SystemZDAGToDAGISel::tryGather(SDNode *N, unsigned Opcode) { ++ SDValue ElemV = N->getOperand(2); ++ auto *ElemN = dyn_cast(ElemV); ++ if (!ElemN) ++ return 0; ++ ++ unsigned Elem = ElemN->getZExtValue(); ++ EVT VT = N->getValueType(0); ++ if (Elem >= VT.getVectorNumElements()) ++ return 0; ++ ++ auto *Load = dyn_cast(N->getOperand(1)); ++ if (!Load || !Load->hasOneUse()) ++ return 0; ++ if (Load->getMemoryVT().getSizeInBits() != ++ Load->getValueType(0).getSizeInBits()) ++ return 0; ++ ++ SDValue Base, Disp, Index; ++ if (!selectBDVAddr12Only(Load->getBasePtr(), ElemV, Base, Disp, Index) || ++ Index.getValueType() != VT.changeVectorElementTypeToInteger()) ++ return 0; ++ ++ SDLoc DL(Load); ++ SDValue Ops[] = { ++ N->getOperand(0), Base, Disp, Index, ++ CurDAG->getTargetConstant(Elem, MVT::i32), Load->getChain() ++ }; ++ SDNode *Res = CurDAG->getMachineNode(Opcode, DL, VT, MVT::Other, Ops); ++ ReplaceUses(SDValue(Load, 1), SDValue(Res, 1)); ++ return Res; ++} ++ ++SDNode *SystemZDAGToDAGISel::tryScatter(StoreSDNode *Store, unsigned Opcode) { ++ SDValue Value = Store->getValue(); ++ if (Value.getOpcode() != ISD::EXTRACT_VECTOR_ELT) ++ return 0; ++ if (Store->getMemoryVT().getSizeInBits() != ++ Value.getValueType().getSizeInBits()) ++ return 0; ++ ++ SDValue ElemV = Value.getOperand(1); ++ auto *ElemN = dyn_cast(ElemV); ++ if (!ElemN) ++ return 0; ++ ++ SDValue Vec = Value.getOperand(0); ++ EVT VT = Vec.getValueType(); ++ unsigned Elem = ElemN->getZExtValue(); ++ if (Elem >= VT.getVectorNumElements()) ++ return 0; ++ ++ SDValue Base, Disp, Index; ++ if (!selectBDVAddr12Only(Store->getBasePtr(), ElemV, Base, Disp, Index) || ++ Index.getValueType() != VT.changeVectorElementTypeToInteger()) ++ return 0; ++ ++ SDLoc DL(Store); ++ SDValue Ops[] = { ++ Vec, Base, Disp, Index, CurDAG->getTargetConstant(Elem, MVT::i32), ++ Store->getChain() ++ }; ++ return CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops); ++} ++ + bool SystemZDAGToDAGISel::canUseBlockOperation(StoreSDNode *Store, + LoadSDNode *Load) const { + // Check that the two memory operands have the same size. +@@ -1109,6 +1218,26 @@ SDNode *SystemZDAGToDAGISel::Select(SDNo + } + break; + } ++ ++ case ISD::INSERT_VECTOR_ELT: { ++ EVT VT = Node->getValueType(0); ++ unsigned ElemBitSize = VT.getVectorElementType().getSizeInBits(); ++ if (ElemBitSize == 32) ++ ResNode = tryGather(Node, SystemZ::VGEF); ++ else if (ElemBitSize == 64) ++ ResNode = tryGather(Node, SystemZ::VGEG); ++ break; ++ } ++ ++ case ISD::STORE: { ++ auto *Store = cast(Node); ++ unsigned ElemBitSize = Store->getValue().getValueType().getSizeInBits(); ++ if (ElemBitSize == 32) ++ ResNode = tryScatter(Store, SystemZ::VSCEF); ++ else if (ElemBitSize == 64) ++ ResNode = tryScatter(Store, SystemZ::VSCEG); ++ break; ++ } + } + + // Select the default instruction +Index: llvm-36/lib/Target/SystemZ/SystemZISelLowering.cpp +=================================================================== +--- llvm-36.orig/lib/Target/SystemZ/SystemZISelLowering.cpp ++++ llvm-36/lib/Target/SystemZ/SystemZISelLowering.cpp +@@ -20,6 +20,7 @@ + #include "llvm/CodeGen/MachineInstrBuilder.h" + #include "llvm/CodeGen/MachineRegisterInfo.h" + #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" ++#include "llvm/IR/Intrinsics.h" + #include + + using namespace llvm; +@@ -90,11 +91,25 @@ SystemZTargetLowering::SystemZTargetLowe + addRegisterClass(MVT::i32, &SystemZ::GRX32BitRegClass); + else + addRegisterClass(MVT::i32, &SystemZ::GR32BitRegClass); +- addRegisterClass(MVT::i64, &SystemZ::GR64BitRegClass); +- addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass); +- addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass); ++ addRegisterClass(MVT::i64, &SystemZ::GR64BitRegClass); ++ if (Subtarget.hasVector()) { ++ addRegisterClass(MVT::f32, &SystemZ::VR32BitRegClass); ++ addRegisterClass(MVT::f64, &SystemZ::VR64BitRegClass); ++ } else { ++ addRegisterClass(MVT::f32, &SystemZ::FP32BitRegClass); ++ addRegisterClass(MVT::f64, &SystemZ::FP64BitRegClass); ++ } + addRegisterClass(MVT::f128, &SystemZ::FP128BitRegClass); + ++ if (Subtarget.hasVector()) { ++ addRegisterClass(MVT::v16i8, &SystemZ::VR128BitRegClass); ++ addRegisterClass(MVT::v8i16, &SystemZ::VR128BitRegClass); ++ addRegisterClass(MVT::v4i32, &SystemZ::VR128BitRegClass); ++ addRegisterClass(MVT::v2i64, &SystemZ::VR128BitRegClass); ++ addRegisterClass(MVT::v4f32, &SystemZ::VR128BitRegClass); ++ addRegisterClass(MVT::v2f64, &SystemZ::VR128BitRegClass); ++ } ++ + // Compute derived properties from the register classes + computeRegisterProperties(); + +@@ -110,7 +125,7 @@ SystemZTargetLowering::SystemZTargetLowe + setSchedulingPreference(Sched::RegPressure); + + setBooleanContents(ZeroOrOneBooleanContent); +- setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct? ++ setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); + + // Instructions are strings of 2-byte aligned 2-byte values. + setMinFunctionAlignment(2); +@@ -163,8 +178,13 @@ SystemZTargetLowering::SystemZTargetLowe + // available, or if the operand is constant. + setOperationAction(ISD::ATOMIC_LOAD_SUB, VT, Custom); + ++ // Use POPCNT on z196 and above. ++ if (Subtarget.hasPopulationCount()) ++ setOperationAction(ISD::CTPOP, VT, Custom); ++ else ++ setOperationAction(ISD::CTPOP, VT, Expand); ++ + // No special instructions for these. +- setOperationAction(ISD::CTPOP, VT, Expand); + setOperationAction(ISD::CTTZ, VT, Expand); + setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Expand); + setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Expand); +@@ -244,6 +264,90 @@ SystemZTargetLowering::SystemZTargetLowe + // Handle prefetches with PFD or PFDRL. + setOperationAction(ISD::PREFETCH, MVT::Other, Custom); + ++ for (MVT VT : MVT::vector_valuetypes()) { ++ // Assume by default that all vector operations need to be expanded. ++ for (unsigned Opcode = 0; Opcode < ISD::BUILTIN_OP_END; ++Opcode) ++ if (getOperationAction(Opcode, VT) == Legal) ++ setOperationAction(Opcode, VT, Expand); ++ ++ // Likewise all truncating stores and extending loads. ++ for (MVT InnerVT : MVT::vector_valuetypes()) { ++ setTruncStoreAction(VT, InnerVT, Expand); ++ setLoadExtAction(ISD::SEXTLOAD, VT, InnerVT, Expand); ++ setLoadExtAction(ISD::ZEXTLOAD, VT, InnerVT, Expand); ++ setLoadExtAction(ISD::EXTLOAD, VT, InnerVT, Expand); ++ } ++ ++ if (isTypeLegal(VT)) { ++ // These operations are legal for anything that can be stored in a ++ // vector register, even if there is no native support for the format ++ // as such. In particular, we can do these for v4f32 even though there ++ // are no specific instructions for that format. ++ setOperationAction(ISD::LOAD, VT, Legal); ++ setOperationAction(ISD::STORE, VT, Legal); ++ setOperationAction(ISD::VSELECT, VT, Legal); ++ setOperationAction(ISD::BITCAST, VT, Legal); ++ setOperationAction(ISD::UNDEF, VT, Legal); ++ ++ // Likewise, except that we need to replace the nodes with something ++ // more specific. ++ setOperationAction(ISD::BUILD_VECTOR, VT, Custom); ++ setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom); ++ } ++ } ++ ++ // Handle integer vector types. ++ for (MVT VT : MVT::integer_vector_valuetypes()) { ++ if (isTypeLegal(VT)) { ++ // These operations have direct equivalents. ++ setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Legal); ++ setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Legal); ++ setOperationAction(ISD::ADD, VT, Legal); ++ setOperationAction(ISD::SUB, VT, Legal); ++ if (VT != MVT::v2i64) ++ setOperationAction(ISD::MUL, VT, Legal); ++ setOperationAction(ISD::AND, VT, Legal); ++ setOperationAction(ISD::OR, VT, Legal); ++ setOperationAction(ISD::XOR, VT, Legal); ++ setOperationAction(ISD::CTPOP, VT, Custom); ++ setOperationAction(ISD::CTTZ, VT, Legal); ++ setOperationAction(ISD::CTLZ, VT, Legal); ++ setOperationAction(ISD::CTTZ_ZERO_UNDEF, VT, Custom); ++ setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Custom); ++ ++ // Convert a GPR scalar to a vector by inserting it into element 0. ++ setOperationAction(ISD::SCALAR_TO_VECTOR, VT, Custom); ++ ++ // Use a series of unpacks for extensions. ++ setOperationAction(ISD::SIGN_EXTEND_VECTOR_INREG, VT, Custom); ++ setOperationAction(ISD::ZERO_EXTEND_VECTOR_INREG, VT, Custom); ++ ++ // Detect shifts by a scalar amount and convert them into ++ // V*_BY_SCALAR. ++ setOperationAction(ISD::SHL, VT, Custom); ++ setOperationAction(ISD::SRA, VT, Custom); ++ setOperationAction(ISD::SRL, VT, Custom); ++ ++ // At present ROTL isn't matched by DAGCombiner. ROTR should be ++ // converted into ROTL. ++ setOperationAction(ISD::ROTL, VT, Expand); ++ setOperationAction(ISD::ROTR, VT, Expand); ++ ++ // Map SETCCs onto one of VCE, VCH or VCHL, swapping the operands ++ // and inverting the result as necessary. ++ setOperationAction(ISD::SETCC, VT, Custom); ++ } ++ } ++ ++ if (Subtarget.hasVector()) { ++ // There should be no need to check for float types other than v2f64 ++ // since <2 x f32> isn't a legal type. ++ setOperationAction(ISD::FP_TO_SINT, MVT::v2i64, Legal); ++ setOperationAction(ISD::FP_TO_UINT, MVT::v2i64, Legal); ++ setOperationAction(ISD::SINT_TO_FP, MVT::v2i64, Legal); ++ setOperationAction(ISD::UINT_TO_FP, MVT::v2i64, Legal); ++ } ++ + // Handle floating-point types. + for (unsigned I = MVT::FIRST_FP_VALUETYPE; + I <= MVT::LAST_FP_VALUETYPE; +@@ -269,6 +373,36 @@ SystemZTargetLowering::SystemZTargetLowe + } + } + ++ // Handle floating-point vector types. ++ if (Subtarget.hasVector()) { ++ // Scalar-to-vector conversion is just a subreg. ++ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v4f32, Legal); ++ setOperationAction(ISD::SCALAR_TO_VECTOR, MVT::v2f64, Legal); ++ ++ // Some insertions and extractions can be done directly but others ++ // need to go via integers. ++ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4f32, Custom); ++ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f64, Custom); ++ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v4f32, Custom); ++ setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v2f64, Custom); ++ ++ // These operations have direct equivalents. ++ setOperationAction(ISD::FADD, MVT::v2f64, Legal); ++ setOperationAction(ISD::FNEG, MVT::v2f64, Legal); ++ setOperationAction(ISD::FSUB, MVT::v2f64, Legal); ++ setOperationAction(ISD::FMUL, MVT::v2f64, Legal); ++ setOperationAction(ISD::FMA, MVT::v2f64, Legal); ++ setOperationAction(ISD::FDIV, MVT::v2f64, Legal); ++ setOperationAction(ISD::FABS, MVT::v2f64, Legal); ++ setOperationAction(ISD::FSQRT, MVT::v2f64, Legal); ++ setOperationAction(ISD::FRINT, MVT::v2f64, Legal); ++ setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal); ++ setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal); ++ setOperationAction(ISD::FCEIL, MVT::v2f64, Legal); ++ setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal); ++ setOperationAction(ISD::FROUND, MVT::v2f64, Legal); ++ } ++ + // We have fused multiply-addition for f32 and f64 but not f128. + setOperationAction(ISD::FMA, MVT::f32, Legal); + setOperationAction(ISD::FMA, MVT::f64, Legal); +@@ -287,8 +421,10 @@ SystemZTargetLowering::SystemZTargetLowe + + // We have 64-bit FPR<->GPR moves, but need special handling for + // 32-bit forms. +- setOperationAction(ISD::BITCAST, MVT::i32, Custom); +- setOperationAction(ISD::BITCAST, MVT::f32, Custom); ++ if (!Subtarget.hasVector()) { ++ setOperationAction(ISD::BITCAST, MVT::i32, Custom); ++ setOperationAction(ISD::BITCAST, MVT::f32, Custom); ++ } + + // VASTART and VACOPY need to deal with the SystemZ-specific varargs + // structure, but VAEND is a no-op. +@@ -298,6 +434,13 @@ SystemZTargetLowering::SystemZTargetLowe + + // Codes for which we want to perform some z-specific combinations. + setTargetDAGCombine(ISD::SIGN_EXTEND); ++ setTargetDAGCombine(ISD::STORE); ++ setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT); ++ setTargetDAGCombine(ISD::FP_ROUND); ++ ++ // Handle intrinsics. ++ setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); ++ setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); + + // We want to use MVC in preference to even a single load/store pair. + MaxStoresPerMemcpy = 0; +@@ -342,6 +485,16 @@ bool SystemZTargetLowering::isFPImmLegal + return Imm.isZero() || Imm.isNegZero(); + } + ++bool SystemZTargetLowering::isLegalICmpImmediate(int64_t Imm) const { ++ // We can use CGFI or CLGFI. ++ return isInt<32>(Imm) || isUInt<32>(Imm); ++} ++ ++bool SystemZTargetLowering::isLegalAddImmediate(int64_t Imm) const { ++ // We can use ALGFI or SLGFI. ++ return isUInt<32>(Imm) || isUInt<32>(-Imm); ++} ++ + bool SystemZTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, + unsigned, + unsigned, +@@ -623,6 +776,24 @@ bool SystemZTargetLowering::mayBeEmitted + return true; + } + ++// We do not yet support 128-bit single-element vector types. If the user ++// attempts to use such types as function argument or return type, prefer ++// to error out instead of emitting code violating the ABI. ++static void VerifyVectorType(MVT VT, EVT ArgVT) { ++ if (ArgVT.isVector() && !VT.isVector()) ++ report_fatal_error("Unsupported vector argument or return type"); ++} ++ ++static void VerifyVectorTypes(const SmallVectorImpl &Ins) { ++ for (unsigned i = 0; i < Ins.size(); ++i) ++ VerifyVectorType(Ins[i].VT, Ins[i].ArgVT); ++} ++ ++static void VerifyVectorTypes(const SmallVectorImpl &Outs) { ++ for (unsigned i = 0; i < Outs.size(); ++i) ++ VerifyVectorType(Outs[i].VT, Outs[i].ArgVT); ++} ++ + // Value is a value that has been passed to us in the location described by VA + // (and so has type VA.getLocVT()). Convert Value to VA.getValVT(), chaining + // any loads onto Chain. +@@ -643,7 +814,15 @@ static SDValue convertLocVTToValVT(Selec + else if (VA.getLocInfo() == CCValAssign::Indirect) + Value = DAG.getLoad(VA.getValVT(), DL, Chain, Value, + MachinePointerInfo(), false, false, false, 0); +- else ++ else if (VA.getLocInfo() == CCValAssign::BCvt) { ++ // If this is a short vector argument loaded from the stack, ++ // extend from i64 to full vector size and then bitcast. ++ assert(VA.getLocVT() == MVT::i64); ++ assert(VA.getValVT().isVector()); ++ Value = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v2i64, ++ Value, DAG.getUNDEF(MVT::i64)); ++ Value = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Value); ++ } else + assert(VA.getLocInfo() == CCValAssign::Full && "Unsupported getLocInfo"); + return Value; + } +@@ -660,6 +839,14 @@ static SDValue convertValVTToLocVT(Selec + return DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Value); + case CCValAssign::AExt: + return DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Value); ++ case CCValAssign::BCvt: ++ // If this is a short vector argument to be stored to the stack, ++ // bitcast to v2i64 and then extract first element. ++ assert(VA.getLocVT() == MVT::i64); ++ assert(VA.getValVT().isVector()); ++ Value = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Value); ++ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VA.getLocVT(), Value, ++ DAG.getConstant(0, MVT::i32)); + case CCValAssign::Full: + return Value; + default: +@@ -680,9 +867,13 @@ LowerFormalArguments(SDValue Chain, Call + auto *TFL = static_cast( + DAG.getSubtarget().getFrameLowering()); + ++ // Detect unsupported vector argument types. ++ if (Subtarget.hasVector()) ++ VerifyVectorTypes(Ins); ++ + // Assign locations to all of the incoming arguments. + SmallVector ArgLocs; +- CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); ++ SystemZCCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); + CCInfo.AnalyzeFormalArguments(Ins, CC_SystemZ); + + unsigned NumFixedGPRs = 0; +@@ -714,6 +905,14 @@ LowerFormalArguments(SDValue Chain, Call + NumFixedFPRs += 1; + RC = &SystemZ::FP64BitRegClass; + break; ++ case MVT::v16i8: ++ case MVT::v8i16: ++ case MVT::v4i32: ++ case MVT::v2i64: ++ case MVT::v4f32: ++ case MVT::v2f64: ++ RC = &SystemZ::VR128BitRegClass; ++ break; + } + + unsigned VReg = MRI.createVirtualRegister(RC); +@@ -818,9 +1017,15 @@ SystemZTargetLowering::LowerCall(CallLow + MachineFunction &MF = DAG.getMachineFunction(); + EVT PtrVT = getPointerTy(); + ++ // Detect unsupported vector argument and return types. ++ if (Subtarget.hasVector()) { ++ VerifyVectorTypes(Outs); ++ VerifyVectorTypes(Ins); ++ } ++ + // Analyze the operands of the call, assigning locations to each operand. + SmallVector ArgLocs; +- CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); ++ SystemZCCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); + ArgCCInfo.AnalyzeCallOperands(Outs, CC_SystemZ); + + // We don't support GuaranteedTailCallOpt, only automatically-detected +@@ -972,6 +1177,10 @@ SystemZTargetLowering::LowerReturn(SDVal + SDLoc DL, SelectionDAG &DAG) const { + MachineFunction &MF = DAG.getMachineFunction(); + ++ // Detect unsupported vector return types. ++ if (Subtarget.hasVector()) ++ VerifyVectorTypes(Outs); ++ + // Assign locations to each returned value. + SmallVector RetLocs; + CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext()); +@@ -1015,6 +1224,207 @@ prepareVolatileOrAtomicLoad(SDValue Chai + return DAG.getNode(SystemZISD::SERIALIZE, DL, MVT::Other, Chain); + } + ++// Return true if Op is an intrinsic node with chain that returns the CC value ++// as its only (other) argument. Provide the associated SystemZISD opcode and ++// the mask of valid CC values if so. ++static bool isIntrinsicWithCCAndChain(SDValue Op, unsigned &Opcode, ++ unsigned &CCValid) { ++ unsigned Id = cast(Op.getOperand(1))->getZExtValue(); ++ switch (Id) { ++ case Intrinsic::s390_tbegin: ++ Opcode = SystemZISD::TBEGIN; ++ CCValid = SystemZ::CCMASK_TBEGIN; ++ return true; ++ ++ case Intrinsic::s390_tbegin_nofloat: ++ Opcode = SystemZISD::TBEGIN_NOFLOAT; ++ CCValid = SystemZ::CCMASK_TBEGIN; ++ return true; ++ ++ case Intrinsic::s390_tend: ++ Opcode = SystemZISD::TEND; ++ CCValid = SystemZ::CCMASK_TEND; ++ return true; ++ ++ default: ++ return false; ++ } ++} ++ ++// Return true if Op is an intrinsic node without chain that returns the ++// CC value as its final argument. Provide the associated SystemZISD ++// opcode and the mask of valid CC values if so. ++static bool isIntrinsicWithCC(SDValue Op, unsigned &Opcode, unsigned &CCValid) { ++ unsigned Id = cast(Op.getOperand(0))->getZExtValue(); ++ switch (Id) { ++ case Intrinsic::s390_vpkshs: ++ case Intrinsic::s390_vpksfs: ++ case Intrinsic::s390_vpksgs: ++ Opcode = SystemZISD::PACKS_CC; ++ CCValid = SystemZ::CCMASK_VCMP; ++ return true; ++ ++ case Intrinsic::s390_vpklshs: ++ case Intrinsic::s390_vpklsfs: ++ case Intrinsic::s390_vpklsgs: ++ Opcode = SystemZISD::PACKLS_CC; ++ CCValid = SystemZ::CCMASK_VCMP; ++ return true; ++ ++ case Intrinsic::s390_vceqbs: ++ case Intrinsic::s390_vceqhs: ++ case Intrinsic::s390_vceqfs: ++ case Intrinsic::s390_vceqgs: ++ Opcode = SystemZISD::VICMPES; ++ CCValid = SystemZ::CCMASK_VCMP; ++ return true; ++ ++ case Intrinsic::s390_vchbs: ++ case Intrinsic::s390_vchhs: ++ case Intrinsic::s390_vchfs: ++ case Intrinsic::s390_vchgs: ++ Opcode = SystemZISD::VICMPHS; ++ CCValid = SystemZ::CCMASK_VCMP; ++ return true; ++ ++ case Intrinsic::s390_vchlbs: ++ case Intrinsic::s390_vchlhs: ++ case Intrinsic::s390_vchlfs: ++ case Intrinsic::s390_vchlgs: ++ Opcode = SystemZISD::VICMPHLS; ++ CCValid = SystemZ::CCMASK_VCMP; ++ return true; ++ ++ case Intrinsic::s390_vtm: ++ Opcode = SystemZISD::VTM; ++ CCValid = SystemZ::CCMASK_VCMP; ++ return true; ++ ++ case Intrinsic::s390_vfaebs: ++ case Intrinsic::s390_vfaehs: ++ case Intrinsic::s390_vfaefs: ++ Opcode = SystemZISD::VFAE_CC; ++ CCValid = SystemZ::CCMASK_ANY; ++ return true; ++ ++ case Intrinsic::s390_vfaezbs: ++ case Intrinsic::s390_vfaezhs: ++ case Intrinsic::s390_vfaezfs: ++ Opcode = SystemZISD::VFAEZ_CC; ++ CCValid = SystemZ::CCMASK_ANY; ++ return true; ++ ++ case Intrinsic::s390_vfeebs: ++ case Intrinsic::s390_vfeehs: ++ case Intrinsic::s390_vfeefs: ++ Opcode = SystemZISD::VFEE_CC; ++ CCValid = SystemZ::CCMASK_ANY; ++ return true; ++ ++ case Intrinsic::s390_vfeezbs: ++ case Intrinsic::s390_vfeezhs: ++ case Intrinsic::s390_vfeezfs: ++ Opcode = SystemZISD::VFEEZ_CC; ++ CCValid = SystemZ::CCMASK_ANY; ++ return true; ++ ++ case Intrinsic::s390_vfenebs: ++ case Intrinsic::s390_vfenehs: ++ case Intrinsic::s390_vfenefs: ++ Opcode = SystemZISD::VFENE_CC; ++ CCValid = SystemZ::CCMASK_ANY; ++ return true; ++ ++ case Intrinsic::s390_vfenezbs: ++ case Intrinsic::s390_vfenezhs: ++ case Intrinsic::s390_vfenezfs: ++ Opcode = SystemZISD::VFENEZ_CC; ++ CCValid = SystemZ::CCMASK_ANY; ++ return true; ++ ++ case Intrinsic::s390_vistrbs: ++ case Intrinsic::s390_vistrhs: ++ case Intrinsic::s390_vistrfs: ++ Opcode = SystemZISD::VISTR_CC; ++ CCValid = SystemZ::CCMASK_0 | SystemZ::CCMASK_3; ++ return true; ++ ++ case Intrinsic::s390_vstrcbs: ++ case Intrinsic::s390_vstrchs: ++ case Intrinsic::s390_vstrcfs: ++ Opcode = SystemZISD::VSTRC_CC; ++ CCValid = SystemZ::CCMASK_ANY; ++ return true; ++ ++ case Intrinsic::s390_vstrczbs: ++ case Intrinsic::s390_vstrczhs: ++ case Intrinsic::s390_vstrczfs: ++ Opcode = SystemZISD::VSTRCZ_CC; ++ CCValid = SystemZ::CCMASK_ANY; ++ return true; ++ ++ case Intrinsic::s390_vfcedbs: ++ Opcode = SystemZISD::VFCMPES; ++ CCValid = SystemZ::CCMASK_VCMP; ++ return true; ++ ++ case Intrinsic::s390_vfchdbs: ++ Opcode = SystemZISD::VFCMPHS; ++ CCValid = SystemZ::CCMASK_VCMP; ++ return true; ++ ++ case Intrinsic::s390_vfchedbs: ++ Opcode = SystemZISD::VFCMPHES; ++ CCValid = SystemZ::CCMASK_VCMP; ++ return true; ++ ++ case Intrinsic::s390_vftcidb: ++ Opcode = SystemZISD::VFTCI; ++ CCValid = SystemZ::CCMASK_VCMP; ++ return true; ++ ++ default: ++ return false; ++ } ++} ++ ++// Emit an intrinsic with chain with a glued value instead of its CC result. ++static SDValue emitIntrinsicWithChainAndGlue(SelectionDAG &DAG, SDValue Op, ++ unsigned Opcode) { ++ // Copy all operands except the intrinsic ID. ++ unsigned NumOps = Op.getNumOperands(); ++ SmallVector Ops; ++ Ops.reserve(NumOps - 1); ++ Ops.push_back(Op.getOperand(0)); ++ for (unsigned I = 2; I < NumOps; ++I) ++ Ops.push_back(Op.getOperand(I)); ++ ++ assert(Op->getNumValues() == 2 && "Expected only CC result and chain"); ++ SDVTList RawVTs = DAG.getVTList(MVT::Other, MVT::Glue); ++ SDValue Intr = DAG.getNode(Opcode, SDLoc(Op), RawVTs, Ops); ++ SDValue OldChain = SDValue(Op.getNode(), 1); ++ SDValue NewChain = SDValue(Intr.getNode(), 0); ++ DAG.ReplaceAllUsesOfValueWith(OldChain, NewChain); ++ return Intr; ++} ++ ++// Emit an intrinsic with a glued value instead of its CC result. ++static SDValue emitIntrinsicWithGlue(SelectionDAG &DAG, SDValue Op, ++ unsigned Opcode) { ++ // Copy all operands except the intrinsic ID. ++ unsigned NumOps = Op.getNumOperands(); ++ SmallVector Ops; ++ Ops.reserve(NumOps - 1); ++ for (unsigned I = 1; I < NumOps; ++I) ++ Ops.push_back(Op.getOperand(I)); ++ ++ if (Op->getNumValues() == 1) ++ return DAG.getNode(Opcode, SDLoc(Op), MVT::Glue, Ops); ++ assert(Op->getNumValues() == 2 && "Expected exactly one non-CC result"); ++ SDVTList RawVTs = DAG.getVTList(Op->getValueType(0), MVT::Glue); ++ return DAG.getNode(Opcode, SDLoc(Op), RawVTs, Ops); ++} ++ + // CC is a comparison that will be implemented using an integer or + // floating-point comparison. Return the condition code mask for + // a branch on true. In the integer case, CCMASK_CMP_UO is set for +@@ -1529,6 +1939,8 @@ static void adjustForTestUnderMask(Selec + MaskVal = -(CmpVal & -CmpVal); + NewC.ICmpType = SystemZICMP::UnsignedOnly; + } ++ if (!MaskVal) ++ return; + + // Check whether the combination of mask, comparison value and comparison + // type are suitable. +@@ -1570,9 +1982,57 @@ static void adjustForTestUnderMask(Selec + C.CCMask = NewCCMask; + } + ++// Return a Comparison that tests the condition-code result of intrinsic ++// node Call against constant integer CC using comparison code Cond. ++// Opcode is the opcode of the SystemZISD operation for the intrinsic ++// and CCValid is the set of possible condition-code results. ++static Comparison getIntrinsicCmp(SelectionDAG &DAG, unsigned Opcode, ++ SDValue Call, unsigned CCValid, uint64_t CC, ++ ISD::CondCode Cond) { ++ Comparison C(Call, SDValue()); ++ C.Opcode = Opcode; ++ C.CCValid = CCValid; ++ if (Cond == ISD::SETEQ) ++ // bit 3 for CC==0, bit 0 for CC==3, always false for CC>3. ++ C.CCMask = CC < 4 ? 1 << (3 - CC) : 0; ++ else if (Cond == ISD::SETNE) ++ // ...and the inverse of that. ++ C.CCMask = CC < 4 ? ~(1 << (3 - CC)) : -1; ++ else if (Cond == ISD::SETLT || Cond == ISD::SETULT) ++ // bits above bit 3 for CC==0 (always false), bits above bit 0 for CC==3, ++ // always true for CC>3. ++ C.CCMask = CC < 4 ? -1 << (4 - CC) : -1; ++ else if (Cond == ISD::SETGE || Cond == ISD::SETUGE) ++ // ...and the inverse of that. ++ C.CCMask = CC < 4 ? ~(-1 << (4 - CC)) : 0; ++ else if (Cond == ISD::SETLE || Cond == ISD::SETULE) ++ // bit 3 and above for CC==0, bit 0 and above for CC==3 (always true), ++ // always true for CC>3. ++ C.CCMask = CC < 4 ? -1 << (3 - CC) : -1; ++ else if (Cond == ISD::SETGT || Cond == ISD::SETUGT) ++ // ...and the inverse of that. ++ C.CCMask = CC < 4 ? ~(-1 << (3 - CC)) : 0; ++ else ++ llvm_unreachable("Unexpected integer comparison type"); ++ C.CCMask &= CCValid; ++ return C; ++} ++ + // Decide how to implement a comparison of type Cond between CmpOp0 with CmpOp1. + static Comparison getCmp(SelectionDAG &DAG, SDValue CmpOp0, SDValue CmpOp1, + ISD::CondCode Cond) { ++ if (CmpOp1.getOpcode() == ISD::Constant) { ++ uint64_t Constant = cast(CmpOp1)->getZExtValue(); ++ unsigned Opcode, CCValid; ++ if (CmpOp0.getOpcode() == ISD::INTRINSIC_W_CHAIN && ++ CmpOp0.getResNo() == 0 && CmpOp0->hasNUsesOfValue(1, 0) && ++ isIntrinsicWithCCAndChain(CmpOp0, Opcode, CCValid)) ++ return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, Constant, Cond); ++ if (CmpOp0.getOpcode() == ISD::INTRINSIC_WO_CHAIN && ++ CmpOp0.getResNo() == CmpOp0->getNumValues() - 1 && ++ isIntrinsicWithCC(CmpOp0, Opcode, CCValid)) ++ return getIntrinsicCmp(DAG, Opcode, CmpOp0, CCValid, Constant, Cond); ++ } + Comparison C(CmpOp0, CmpOp1); + C.CCMask = CCMaskForCondCode(Cond); + if (C.Op0.getValueType().isFloatingPoint()) { +@@ -1614,6 +2074,20 @@ static Comparison getCmp(SelectionDAG &D + + // Emit the comparison instruction described by C. + static SDValue emitCmp(SelectionDAG &DAG, SDLoc DL, Comparison &C) { ++ if (!C.Op1.getNode()) { ++ SDValue Op; ++ switch (C.Op0.getOpcode()) { ++ case ISD::INTRINSIC_W_CHAIN: ++ Op = emitIntrinsicWithChainAndGlue(DAG, C.Op0, C.Opcode); ++ break; ++ case ISD::INTRINSIC_WO_CHAIN: ++ Op = emitIntrinsicWithGlue(DAG, C.Op0, C.Opcode); ++ break; ++ default: ++ llvm_unreachable("Invalid comparison operands"); ++ } ++ return SDValue(Op.getNode(), Op->getNumValues() - 1); ++ } + if (C.Opcode == SystemZISD::ICMP) + return DAG.getNode(SystemZISD::ICMP, DL, MVT::Glue, C.Op0, C.Op1, + DAG.getConstant(C.ICmpType, MVT::i32)); +@@ -1682,12 +2156,142 @@ static SDValue emitSETCC(SelectionDAG &D + return Result; + } + ++// Return the SystemISD vector comparison operation for CC, or 0 if it cannot ++// be done directly. IsFP is true if CC is for a floating-point rather than ++// integer comparison. ++static unsigned getVectorComparison(ISD::CondCode CC, bool IsFP) { ++ switch (CC) { ++ case ISD::SETOEQ: ++ case ISD::SETEQ: ++ return IsFP ? SystemZISD::VFCMPE : SystemZISD::VICMPE; ++ ++ case ISD::SETOGE: ++ case ISD::SETGE: ++ return IsFP ? SystemZISD::VFCMPHE : 0; ++ ++ case ISD::SETOGT: ++ case ISD::SETGT: ++ return IsFP ? SystemZISD::VFCMPH : SystemZISD::VICMPH; ++ ++ case ISD::SETUGT: ++ return IsFP ? 0 : SystemZISD::VICMPHL; ++ ++ default: ++ return 0; ++ } ++} ++ ++// Return the SystemZISD vector comparison operation for CC or its inverse, ++// or 0 if neither can be done directly. Indicate in Invert whether the ++// result is for the inverse of CC. IsFP is true if CC is for a ++// floating-point rather than integer comparison. ++static unsigned getVectorComparisonOrInvert(ISD::CondCode CC, bool IsFP, ++ bool &Invert) { ++ if (unsigned Opcode = getVectorComparison(CC, IsFP)) { ++ Invert = false; ++ return Opcode; ++ } ++ ++ CC = ISD::getSetCCInverse(CC, !IsFP); ++ if (unsigned Opcode = getVectorComparison(CC, IsFP)) { ++ Invert = true; ++ return Opcode; ++ } ++ ++ return 0; ++} ++ ++// Return a v2f64 that contains the extended form of elements Start and Start+1 ++// of v4f32 value Op. ++static SDValue expandV4F32ToV2F64(SelectionDAG &DAG, int Start, SDLoc DL, ++ SDValue Op) { ++ int Mask[] = { Start, -1, Start + 1, -1 }; ++ Op = DAG.getVectorShuffle(MVT::v4f32, DL, Op, DAG.getUNDEF(MVT::v4f32), Mask); ++ return DAG.getNode(SystemZISD::VEXTEND, DL, MVT::v2f64, Op); ++} ++ ++// Build a comparison of vectors CmpOp0 and CmpOp1 using opcode Opcode, ++// producing a result of type VT. ++static SDValue getVectorCmp(SelectionDAG &DAG, unsigned Opcode, SDLoc DL, ++ EVT VT, SDValue CmpOp0, SDValue CmpOp1) { ++ // There is no hardware support for v4f32, so extend the vector into ++ // two v2f64s and compare those. ++ if (CmpOp0.getValueType() == MVT::v4f32) { ++ SDValue H0 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp0); ++ SDValue L0 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp0); ++ SDValue H1 = expandV4F32ToV2F64(DAG, 0, DL, CmpOp1); ++ SDValue L1 = expandV4F32ToV2F64(DAG, 2, DL, CmpOp1); ++ SDValue HRes = DAG.getNode(Opcode, DL, MVT::v2i64, H0, H1); ++ SDValue LRes = DAG.getNode(Opcode, DL, MVT::v2i64, L0, L1); ++ return DAG.getNode(SystemZISD::PACK, DL, VT, HRes, LRes); ++ } ++ return DAG.getNode(Opcode, DL, VT, CmpOp0, CmpOp1); ++} ++ ++// Lower a vector comparison of type CC between CmpOp0 and CmpOp1, producing ++// an integer mask of type VT. ++static SDValue lowerVectorSETCC(SelectionDAG &DAG, SDLoc DL, EVT VT, ++ ISD::CondCode CC, SDValue CmpOp0, ++ SDValue CmpOp1) { ++ bool IsFP = CmpOp0.getValueType().isFloatingPoint(); ++ bool Invert = false; ++ SDValue Cmp; ++ switch (CC) { ++ // Handle tests for order using (or (ogt y x) (oge x y)). ++ case ISD::SETUO: ++ Invert = true; ++ case ISD::SETO: { ++ assert(IsFP && "Unexpected integer comparison"); ++ SDValue LT = getVectorCmp(DAG, SystemZISD::VFCMPH, DL, VT, CmpOp1, CmpOp0); ++ SDValue GE = getVectorCmp(DAG, SystemZISD::VFCMPHE, DL, VT, CmpOp0, CmpOp1); ++ Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GE); ++ break; ++ } ++ ++ // Handle <> tests using (or (ogt y x) (ogt x y)). ++ case ISD::SETUEQ: ++ Invert = true; ++ case ISD::SETONE: { ++ assert(IsFP && "Unexpected integer comparison"); ++ SDValue LT = getVectorCmp(DAG, SystemZISD::VFCMPH, DL, VT, CmpOp1, CmpOp0); ++ SDValue GT = getVectorCmp(DAG, SystemZISD::VFCMPH, DL, VT, CmpOp0, CmpOp1); ++ Cmp = DAG.getNode(ISD::OR, DL, VT, LT, GT); ++ break; ++ } ++ ++ // Otherwise a single comparison is enough. It doesn't really ++ // matter whether we try the inversion or the swap first, since ++ // there are no cases where both work. ++ default: ++ if (unsigned Opcode = getVectorComparisonOrInvert(CC, IsFP, Invert)) ++ Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp0, CmpOp1); ++ else { ++ CC = ISD::getSetCCSwappedOperands(CC); ++ if (unsigned Opcode = getVectorComparisonOrInvert(CC, IsFP, Invert)) ++ Cmp = getVectorCmp(DAG, Opcode, DL, VT, CmpOp1, CmpOp0); ++ else ++ llvm_unreachable("Unhandled comparison"); ++ } ++ break; ++ } ++ if (Invert) { ++ SDValue Mask = DAG.getNode(SystemZISD::BYTE_MASK, DL, MVT::v16i8, ++ DAG.getConstant(65535, MVT::i32)); ++ Mask = DAG.getNode(ISD::BITCAST, DL, VT, Mask); ++ Cmp = DAG.getNode(ISD::XOR, DL, VT, Cmp, Mask); ++ } ++ return Cmp; ++} ++ + SDValue SystemZTargetLowering::lowerSETCC(SDValue Op, + SelectionDAG &DAG) const { + SDValue CmpOp0 = Op.getOperand(0); + SDValue CmpOp1 = Op.getOperand(1); + ISD::CondCode CC = cast(Op.getOperand(2))->get(); + SDLoc DL(Op); ++ EVT VT = Op.getValueType(); ++ if (VT.isVector()) ++ return lowerVectorSETCC(DAG, DL, VT, CC, CmpOp0, CmpOp1); + + Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC)); + SDValue Glue = emitCmp(DAG, DL, C); +@@ -1695,7 +2299,6 @@ SDValue SystemZTargetLowering::lowerSETC + } + + SDValue SystemZTargetLowering::lowerBR_CC(SDValue Op, SelectionDAG &DAG) const { +- SDValue Chain = Op.getOperand(0); + ISD::CondCode CC = cast(Op.getOperand(1))->get(); + SDValue CmpOp0 = Op.getOperand(2); + SDValue CmpOp1 = Op.getOperand(3); +@@ -1705,7 +2308,7 @@ SDValue SystemZTargetLowering::lowerBR_C + Comparison C(getCmp(DAG, CmpOp0, CmpOp1, CC)); + SDValue Glue = emitCmp(DAG, DL, C); + return DAG.getNode(SystemZISD::BR_CCMASK, DL, Op.getValueType(), +- Chain, DAG.getConstant(C.CCValid, MVT::i32), ++ Op.getOperand(0), DAG.getConstant(C.CCValid, MVT::i32), + DAG.getConstant(C.CCMask, MVT::i32), Dest, Glue); + } + +@@ -1831,6 +2434,52 @@ SDValue SystemZTargetLowering::lowerGlob + return Result; + } + ++SDValue SystemZTargetLowering::lowerTLSGetOffset(GlobalAddressSDNode *Node, ++ SelectionDAG &DAG, ++ unsigned Opcode, ++ SDValue GOTOffset) const { ++ SDLoc DL(Node); ++ EVT PtrVT = getPointerTy(); ++ SDValue Chain = DAG.getEntryNode(); ++ SDValue Glue; ++ ++ // __tls_get_offset takes the GOT offset in %r2 and the GOT in %r12. ++ SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT); ++ Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R12D, GOT, Glue); ++ Glue = Chain.getValue(1); ++ Chain = DAG.getCopyToReg(Chain, DL, SystemZ::R2D, GOTOffset, Glue); ++ Glue = Chain.getValue(1); ++ ++ // The first call operand is the chain and the second is the TLS symbol. ++ SmallVector Ops; ++ Ops.push_back(Chain); ++ Ops.push_back(DAG.getTargetGlobalAddress(Node->getGlobal(), DL, ++ Node->getValueType(0), ++ 0, 0)); ++ ++ // Add argument registers to the end of the list so that they are ++ // known live into the call. ++ Ops.push_back(DAG.getRegister(SystemZ::R2D, PtrVT)); ++ Ops.push_back(DAG.getRegister(SystemZ::R12D, PtrVT)); ++ ++ // Add a register mask operand representing the call-preserved registers. ++ const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); ++ const uint32_t *Mask = TRI->getCallPreservedMask(CallingConv::C); ++ assert(Mask && "Missing call preserved mask for calling convention"); ++ Ops.push_back(DAG.getRegisterMask(Mask)); ++ ++ // Glue the call to the argument copies. ++ Ops.push_back(Glue); ++ ++ // Emit the call. ++ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); ++ Chain = DAG.getNode(Opcode, DL, NodeTys, Ops); ++ Glue = Chain.getValue(1); ++ ++ // Copy the return value from %r2. ++ return DAG.getCopyFromReg(Chain, DL, SystemZ::R2D, PtrVT, Glue); ++} ++ + SDValue SystemZTargetLowering::lowerGlobalTLSAddress(GlobalAddressSDNode *Node, + SelectionDAG &DAG) const { + SDLoc DL(Node); +@@ -1838,9 +2487,6 @@ SDValue SystemZTargetLowering::lowerGlob + EVT PtrVT = getPointerTy(); + TLSModel::Model model = DAG.getTarget().getTLSModel(GV); + +- if (model != TLSModel::LocalExec) +- llvm_unreachable("only local-exec TLS mode supported"); +- + // The high part of the thread pointer is in access register 0. + SDValue TPHi = DAG.getNode(SystemZISD::EXTRACT_ACCESS, DL, MVT::i32, + DAG.getConstant(0, MVT::i32)); +@@ -1856,15 +2502,79 @@ SDValue SystemZTargetLowering::lowerGlob + DAG.getConstant(32, PtrVT)); + SDValue TP = DAG.getNode(ISD::OR, DL, PtrVT, TPHiShifted, TPLo); + +- // Get the offset of GA from the thread pointer. +- SystemZConstantPoolValue *CPV = +- SystemZConstantPoolValue::Create(GV, SystemZCP::NTPOFF); +- +- // Force the offset into the constant pool and load it from there. +- SDValue CPAddr = DAG.getConstantPool(CPV, PtrVT, 8); +- SDValue Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), +- CPAddr, MachinePointerInfo::getConstantPool(), +- false, false, false, 0); ++ // Get the offset of GA from the thread pointer, based on the TLS model. ++ SDValue Offset; ++ switch (model) { ++ case TLSModel::GeneralDynamic: { ++ // Load the GOT offset of the tls_index (module ID / per-symbol offset). ++ SystemZConstantPoolValue *CPV = ++ SystemZConstantPoolValue::Create(GV, SystemZCP::TLSGD); ++ ++ Offset = DAG.getConstantPool(CPV, PtrVT, 8); ++ Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), ++ Offset, MachinePointerInfo::getConstantPool(), ++ false, false, false, 0); ++ ++ // Call __tls_get_offset to retrieve the offset. ++ Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_GDCALL, Offset); ++ break; ++ } ++ ++ case TLSModel::LocalDynamic: { ++ // Load the GOT offset of the module ID. ++ SystemZConstantPoolValue *CPV = ++ SystemZConstantPoolValue::Create(GV, SystemZCP::TLSLDM); ++ ++ Offset = DAG.getConstantPool(CPV, PtrVT, 8); ++ Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), ++ Offset, MachinePointerInfo::getConstantPool(), ++ false, false, false, 0); ++ ++ // Call __tls_get_offset to retrieve the module base offset. ++ Offset = lowerTLSGetOffset(Node, DAG, SystemZISD::TLS_LDCALL, Offset); ++ ++ // Note: The SystemZLDCleanupPass will remove redundant computations ++ // of the module base offset. Count total number of local-dynamic ++ // accesses to trigger execution of that pass. ++ SystemZMachineFunctionInfo* MFI = ++ DAG.getMachineFunction().getInfo(); ++ MFI->incNumLocalDynamicTLSAccesses(); ++ ++ // Add the per-symbol offset. ++ CPV = SystemZConstantPoolValue::Create(GV, SystemZCP::DTPOFF); ++ ++ SDValue DTPOffset = DAG.getConstantPool(CPV, PtrVT, 8); ++ DTPOffset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), ++ DTPOffset, MachinePointerInfo::getConstantPool(), ++ false, false, false, 0); ++ ++ Offset = DAG.getNode(ISD::ADD, DL, PtrVT, Offset, DTPOffset); ++ break; ++ } ++ ++ case TLSModel::InitialExec: { ++ // Load the offset from the GOT. ++ Offset = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, ++ SystemZII::MO_INDNTPOFF); ++ Offset = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Offset); ++ Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), ++ Offset, MachinePointerInfo::getGOT(), ++ false, false, false, 0); ++ break; ++ } ++ ++ case TLSModel::LocalExec: { ++ // Force the offset into the constant pool and load it from there. ++ SystemZConstantPoolValue *CPV = ++ SystemZConstantPoolValue::Create(GV, SystemZCP::NTPOFF); ++ ++ Offset = DAG.getConstantPool(CPV, PtrVT, 8); ++ Offset = DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), ++ Offset, MachinePointerInfo::getConstantPool(), ++ false, false, false, 0); ++ break; ++ } ++ } + + // Add the base and offset together. + return DAG.getNode(ISD::ADD, DL, PtrVT, TP, Offset); +@@ -1916,6 +2626,13 @@ SDValue SystemZTargetLowering::lowerBITC + EVT InVT = In.getValueType(); + EVT ResVT = Op.getValueType(); + ++ // Convert loads directly. This is normally done by DAGCombiner, ++ // but we need this case for bitcasts that are created during lowering ++ // and which are then lowered themselves. ++ if (auto *LoadN = dyn_cast(In)) ++ return DAG.getLoad(ResVT, DL, LoadN->getChain(), LoadN->getBasePtr(), ++ LoadN->getMemOperand()); ++ + if (InVT == MVT::i32 && ResVT == MVT::f32) { + SDValue In64; + if (Subtarget.hasHighWord()) { +@@ -1929,12 +2646,12 @@ SDValue SystemZTargetLowering::lowerBITC + DAG.getConstant(32, MVT::i64)); + } + SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::f64, In64); +- return DAG.getTargetExtractSubreg(SystemZ::subreg_h32, ++ return DAG.getTargetExtractSubreg(SystemZ::subreg_r32, + DL, MVT::f32, Out64); + } + if (InVT == MVT::f32 && ResVT == MVT::i32) { + SDNode *U64 = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f64); +- SDValue In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_h32, DL, ++ SDValue In64 = DAG.getTargetInsertSubreg(SystemZ::subreg_r32, DL, + MVT::f64, SDValue(U64, 0), In); + SDValue Out64 = DAG.getNode(ISD::BITCAST, DL, MVT::i64, In64); + if (Subtarget.hasHighWord()) +@@ -2187,6 +2904,80 @@ SDValue SystemZTargetLowering::lowerOR(S + MVT::i64, HighOp, Low32); + } + ++SDValue SystemZTargetLowering::lowerCTPOP(SDValue Op, ++ SelectionDAG &DAG) const { ++ EVT VT = Op.getValueType(); ++ SDLoc DL(Op); ++ Op = Op.getOperand(0); ++ ++ // Handle vector types via VPOPCT. ++ if (VT.isVector()) { ++ Op = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Op); ++ Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::v16i8, Op); ++ switch (VT.getVectorElementType().getSizeInBits()) { ++ case 8: ++ break; ++ case 16: { ++ Op = DAG.getNode(ISD::BITCAST, DL, VT, Op); ++ SDValue Shift = DAG.getConstant(8, MVT::i32); ++ SDValue Tmp = DAG.getNode(SystemZISD::VSHL_BY_SCALAR, DL, VT, Op, Shift); ++ Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp); ++ Op = DAG.getNode(SystemZISD::VSRL_BY_SCALAR, DL, VT, Op, Shift); ++ break; ++ } ++ case 32: { ++ SDValue Tmp = DAG.getNode(SystemZISD::BYTE_MASK, DL, MVT::v16i8, ++ DAG.getConstant(0, MVT::i32)); ++ Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp); ++ break; ++ } ++ case 64: { ++ SDValue Tmp = DAG.getNode(SystemZISD::BYTE_MASK, DL, MVT::v16i8, ++ DAG.getConstant(0, MVT::i32)); ++ Op = DAG.getNode(SystemZISD::VSUM, DL, MVT::v4i32, Op, Tmp); ++ Op = DAG.getNode(SystemZISD::VSUM, DL, VT, Op, Tmp); ++ break; ++ } ++ default: ++ llvm_unreachable("Unexpected type"); ++ } ++ return Op; ++ } ++ ++ // Get the known-zero mask for the operand. ++ APInt KnownZero, KnownOne; ++ DAG.computeKnownBits(Op, KnownZero, KnownOne); ++ unsigned NumSignificantBits = (~KnownZero).getActiveBits(); ++ if (NumSignificantBits == 0) ++ return DAG.getConstant(0, VT); ++ ++ // Skip known-zero high parts of the operand. ++ int64_t OrigBitSize = VT.getSizeInBits(); ++ int64_t BitSize = (int64_t)1 << Log2_32_Ceil(NumSignificantBits); ++ BitSize = std::min(BitSize, OrigBitSize); ++ ++ // The POPCNT instruction counts the number of bits in each byte. ++ Op = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op); ++ Op = DAG.getNode(SystemZISD::POPCNT, DL, MVT::i64, Op); ++ Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op); ++ ++ // Add up per-byte counts in a binary tree. All bits of Op at ++ // position larger than BitSize remain zero throughout. ++ for (int64_t I = BitSize / 2; I >= 8; I = I / 2) { ++ SDValue Tmp = DAG.getNode(ISD::SHL, DL, VT, Op, DAG.getConstant(I, VT)); ++ if (BitSize != OrigBitSize) ++ Tmp = DAG.getNode(ISD::AND, DL, VT, Tmp, ++ DAG.getConstant(((uint64_t)1 << BitSize) - 1, VT)); ++ Op = DAG.getNode(ISD::ADD, DL, VT, Op, Tmp); ++ } ++ ++ // Extract overall result from high byte. ++ if (BitSize > 8) ++ Op = DAG.getNode(ISD::SRL, DL, VT, Op, DAG.getConstant(BitSize - 8, VT)); ++ ++ return Op; ++} ++ + // Op is an atomic load. Lower it into a normal volatile load. + SDValue SystemZTargetLowering::lowerATOMIC_LOAD(SDValue Op, + SelectionDAG &DAG) const { +@@ -2400,6 +3191,1069 @@ SDValue SystemZTargetLowering::lowerPREF + Node->getMemoryVT(), Node->getMemOperand()); + } + ++// Return an i32 that contains the value of CC immediately after After, ++// whose final operand must be MVT::Glue. ++static SDValue getCCResult(SelectionDAG &DAG, SDNode *After) { ++ SDValue Glue = SDValue(After, After->getNumValues() - 1); ++ SDValue IPM = DAG.getNode(SystemZISD::IPM, SDLoc(After), MVT::i32, Glue); ++ return DAG.getNode(ISD::SRL, SDLoc(After), MVT::i32, IPM, ++ DAG.getConstant(SystemZ::IPM_CC, MVT::i32)); ++} ++ ++SDValue ++SystemZTargetLowering::lowerINTRINSIC_W_CHAIN(SDValue Op, ++ SelectionDAG &DAG) const { ++ unsigned Opcode, CCValid; ++ if (isIntrinsicWithCCAndChain(Op, Opcode, CCValid)) { ++ assert(Op->getNumValues() == 2 && "Expected only CC result and chain"); ++ SDValue Glued = emitIntrinsicWithChainAndGlue(DAG, Op, Opcode); ++ SDValue CC = getCCResult(DAG, Glued.getNode()); ++ DAG.ReplaceAllUsesOfValueWith(SDValue(Op.getNode(), 0), CC); ++ return SDValue(); ++ } ++ ++ return SDValue(); ++} ++ ++SDValue ++SystemZTargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op, ++ SelectionDAG &DAG) const { ++ unsigned Opcode, CCValid; ++ if (isIntrinsicWithCC(Op, Opcode, CCValid)) { ++ SDValue Glued = emitIntrinsicWithGlue(DAG, Op, Opcode); ++ SDValue CC = getCCResult(DAG, Glued.getNode()); ++ if (Op->getNumValues() == 1) ++ return CC; ++ assert(Op->getNumValues() == 2 && "Expected a CC and non-CC result"); ++ return DAG.getNode(ISD::MERGE_VALUES, SDLoc(Op), Op->getVTList(), ++ Glued, CC); ++ } ++ ++ unsigned Id = cast(Op.getOperand(0))->getZExtValue(); ++ switch (Id) { ++ case Intrinsic::s390_vpdi: ++ return DAG.getNode(SystemZISD::PERMUTE_DWORDS, SDLoc(Op), Op.getValueType(), ++ Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); ++ ++ case Intrinsic::s390_vperm: ++ return DAG.getNode(SystemZISD::PERMUTE, SDLoc(Op), Op.getValueType(), ++ Op.getOperand(1), Op.getOperand(2), Op.getOperand(3)); ++ ++ case Intrinsic::s390_vuphb: ++ case Intrinsic::s390_vuphh: ++ case Intrinsic::s390_vuphf: ++ return DAG.getNode(SystemZISD::UNPACK_HIGH, SDLoc(Op), Op.getValueType(), ++ Op.getOperand(1)); ++ ++ case Intrinsic::s390_vuplhb: ++ case Intrinsic::s390_vuplhh: ++ case Intrinsic::s390_vuplhf: ++ return DAG.getNode(SystemZISD::UNPACKL_HIGH, SDLoc(Op), Op.getValueType(), ++ Op.getOperand(1)); ++ ++ case Intrinsic::s390_vuplb: ++ case Intrinsic::s390_vuplhw: ++ case Intrinsic::s390_vuplf: ++ return DAG.getNode(SystemZISD::UNPACK_LOW, SDLoc(Op), Op.getValueType(), ++ Op.getOperand(1)); ++ ++ case Intrinsic::s390_vupllb: ++ case Intrinsic::s390_vupllh: ++ case Intrinsic::s390_vupllf: ++ return DAG.getNode(SystemZISD::UNPACKL_LOW, SDLoc(Op), Op.getValueType(), ++ Op.getOperand(1)); ++ ++ case Intrinsic::s390_vsumb: ++ case Intrinsic::s390_vsumh: ++ case Intrinsic::s390_vsumgh: ++ case Intrinsic::s390_vsumgf: ++ case Intrinsic::s390_vsumqf: ++ case Intrinsic::s390_vsumqg: ++ return DAG.getNode(SystemZISD::VSUM, SDLoc(Op), Op.getValueType(), ++ Op.getOperand(1), Op.getOperand(2)); ++ } ++ ++ return SDValue(); ++} ++ ++namespace { ++// Says that SystemZISD operation Opcode can be used to perform the equivalent ++// of a VPERM with permute vector Bytes. If Opcode takes three operands, ++// Operand is the constant third operand, otherwise it is the number of ++// bytes in each element of the result. ++struct Permute { ++ unsigned Opcode; ++ unsigned Operand; ++ unsigned char Bytes[SystemZ::VectorBytes]; ++}; ++} ++ ++static const Permute PermuteForms[] = { ++ // VMRHG ++ { SystemZISD::MERGE_HIGH, 8, ++ { 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23 } }, ++ // VMRHF ++ { SystemZISD::MERGE_HIGH, 4, ++ { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } }, ++ // VMRHH ++ { SystemZISD::MERGE_HIGH, 2, ++ { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } }, ++ // VMRHB ++ { SystemZISD::MERGE_HIGH, 1, ++ { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } }, ++ // VMRLG ++ { SystemZISD::MERGE_LOW, 8, ++ { 8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31 } }, ++ // VMRLF ++ { SystemZISD::MERGE_LOW, 4, ++ { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } }, ++ // VMRLH ++ { SystemZISD::MERGE_LOW, 2, ++ { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } }, ++ // VMRLB ++ { SystemZISD::MERGE_LOW, 1, ++ { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } }, ++ // VPKG ++ { SystemZISD::PACK, 4, ++ { 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31 } }, ++ // VPKF ++ { SystemZISD::PACK, 2, ++ { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } }, ++ // VPKH ++ { SystemZISD::PACK, 1, ++ { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } }, ++ // VPDI V1, V2, 4 (low half of V1, high half of V2) ++ { SystemZISD::PERMUTE_DWORDS, 4, ++ { 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23 } }, ++ // VPDI V1, V2, 1 (high half of V1, low half of V2) ++ { SystemZISD::PERMUTE_DWORDS, 1, ++ { 0, 1, 2, 3, 4, 5, 6, 7, 24, 25, 26, 27, 28, 29, 30, 31 } } ++}; ++ ++// Called after matching a vector shuffle against a particular pattern. ++// Both the original shuffle and the pattern have two vector operands. ++// OpNos[0] is the operand of the original shuffle that should be used for ++// operand 0 of the pattern, or -1 if operand 0 of the pattern can be anything. ++// OpNos[1] is the same for operand 1 of the pattern. Resolve these -1s and ++// set OpNo0 and OpNo1 to the shuffle operands that should actually be used ++// for operands 0 and 1 of the pattern. ++static bool chooseShuffleOpNos(int *OpNos, unsigned &OpNo0, unsigned &OpNo1) { ++ if (OpNos[0] < 0) { ++ if (OpNos[1] < 0) ++ return false; ++ OpNo0 = OpNo1 = OpNos[1]; ++ } else if (OpNos[1] < 0) { ++ OpNo0 = OpNo1 = OpNos[0]; ++ } else { ++ OpNo0 = OpNos[0]; ++ OpNo1 = OpNos[1]; ++ } ++ return true; ++} ++ ++// Bytes is a VPERM-like permute vector, except that -1 is used for ++// undefined bytes. Return true if the VPERM can be implemented using P. ++// When returning true set OpNo0 to the VPERM operand that should be ++// used for operand 0 of P and likewise OpNo1 for operand 1 of P. ++// ++// For example, if swapping the VPERM operands allows P to match, OpNo0 ++// will be 1 and OpNo1 will be 0. If instead Bytes only refers to one ++// operand, but rewriting it to use two duplicated operands allows it to ++// match P, then OpNo0 and OpNo1 will be the same. ++static bool matchPermute(const SmallVectorImpl &Bytes, const Permute &P, ++ unsigned &OpNo0, unsigned &OpNo1) { ++ int OpNos[] = { -1, -1 }; ++ for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) { ++ int Elt = Bytes[I]; ++ if (Elt >= 0) { ++ // Make sure that the two permute vectors use the same suboperand ++ // byte number. Only the operand numbers (the high bits) are ++ // allowed to differ. ++ if ((Elt ^ P.Bytes[I]) & (SystemZ::VectorBytes - 1)) ++ return false; ++ int ModelOpNo = P.Bytes[I] / SystemZ::VectorBytes; ++ int RealOpNo = unsigned(Elt) / SystemZ::VectorBytes; ++ // Make sure that the operand mappings are consistent with previous ++ // elements. ++ if (OpNos[ModelOpNo] == 1 - RealOpNo) ++ return false; ++ OpNos[ModelOpNo] = RealOpNo; ++ } ++ } ++ return chooseShuffleOpNos(OpNos, OpNo0, OpNo1); ++} ++ ++// As above, but search for a matching permute. ++static const Permute *matchPermute(const SmallVectorImpl &Bytes, ++ unsigned &OpNo0, unsigned &OpNo1) { ++ for (auto &P : PermuteForms) ++ if (matchPermute(Bytes, P, OpNo0, OpNo1)) ++ return &P; ++ return nullptr; ++} ++ ++// Bytes is a VPERM-like permute vector, except that -1 is used for ++// undefined bytes. This permute is an operand of an outer permute. ++// See whether redistributing the -1 bytes gives a shuffle that can be ++// implemented using P. If so, set Transform to a VPERM-like permute vector ++// that, when applied to the result of P, gives the original permute in Bytes. ++static bool matchDoublePermute(const SmallVectorImpl &Bytes, ++ const Permute &P, ++ SmallVectorImpl &Transform) { ++ unsigned To = 0; ++ for (unsigned From = 0; From < SystemZ::VectorBytes; ++From) { ++ int Elt = Bytes[From]; ++ if (Elt < 0) ++ // Byte number From of the result is undefined. ++ Transform[From] = -1; ++ else { ++ while (P.Bytes[To] != Elt) { ++ To += 1; ++ if (To == SystemZ::VectorBytes) ++ return false; ++ } ++ Transform[From] = To; ++ } ++ } ++ return true; ++} ++ ++// As above, but search for a matching permute. ++static const Permute *matchDoublePermute(const SmallVectorImpl &Bytes, ++ SmallVectorImpl &Transform) { ++ for (auto &P : PermuteForms) ++ if (matchDoublePermute(Bytes, P, Transform)) ++ return &P; ++ return nullptr; ++} ++ ++// Convert the mask of the given VECTOR_SHUFFLE into a byte-level mask, ++// as if it had type vNi8. ++static void getVPermMask(ShuffleVectorSDNode *VSN, ++ SmallVectorImpl &Bytes) { ++ EVT VT = VSN->getValueType(0); ++ unsigned NumElements = VT.getVectorNumElements(); ++ unsigned BytesPerElement = VT.getVectorElementType().getStoreSize(); ++ Bytes.resize(NumElements * BytesPerElement, -1); ++ for (unsigned I = 0; I < NumElements; ++I) { ++ int Index = VSN->getMaskElt(I); ++ if (Index >= 0) ++ for (unsigned J = 0; J < BytesPerElement; ++J) ++ Bytes[I * BytesPerElement + J] = Index * BytesPerElement + J; ++ } ++} ++ ++// Bytes is a VPERM-like permute vector, except that -1 is used for ++// undefined bytes. See whether bytes [Start, Start + BytesPerElement) of ++// the result come from a contiguous sequence of bytes from one input. ++// Set Base to the selector for the first byte if so. ++static bool getShuffleInput(const SmallVectorImpl &Bytes, unsigned Start, ++ unsigned BytesPerElement, int &Base) { ++ Base = -1; ++ for (unsigned I = 0; I < BytesPerElement; ++I) { ++ if (Bytes[Start + I] >= 0) { ++ unsigned Elem = Bytes[Start + I]; ++ if (Base < 0) { ++ Base = Elem - I; ++ // Make sure the bytes would come from one input operand. ++ if (unsigned(Base) % Bytes.size() + BytesPerElement > Bytes.size()) ++ return false; ++ } else if (unsigned(Base) != Elem - I) ++ return false; ++ } ++ } ++ return true; ++} ++ ++// Bytes is a VPERM-like permute vector, except that -1 is used for ++// undefined bytes. Return true if it can be performed using VSLDI. ++// When returning true, set StartIndex to the shift amount and OpNo0 ++// and OpNo1 to the VPERM operands that should be used as the first ++// and second shift operand respectively. ++static bool isShlDoublePermute(const SmallVectorImpl &Bytes, ++ unsigned &StartIndex, unsigned &OpNo0, ++ unsigned &OpNo1) { ++ int OpNos[] = { -1, -1 }; ++ int Shift = -1; ++ for (unsigned I = 0; I < 16; ++I) { ++ int Index = Bytes[I]; ++ if (Index >= 0) { ++ int ExpectedShift = (Index - I) % SystemZ::VectorBytes; ++ int ModelOpNo = unsigned(ExpectedShift + I) / SystemZ::VectorBytes; ++ int RealOpNo = unsigned(Index) / SystemZ::VectorBytes; ++ if (Shift < 0) ++ Shift = ExpectedShift; ++ else if (Shift != ExpectedShift) ++ return false; ++ // Make sure that the operand mappings are consistent with previous ++ // elements. ++ if (OpNos[ModelOpNo] == 1 - RealOpNo) ++ return false; ++ OpNos[ModelOpNo] = RealOpNo; ++ } ++ } ++ StartIndex = Shift; ++ return chooseShuffleOpNos(OpNos, OpNo0, OpNo1); ++} ++ ++// Create a node that performs P on operands Op0 and Op1, casting the ++// operands to the appropriate type. The type of the result is determined by P. ++static SDValue getPermuteNode(SelectionDAG &DAG, SDLoc DL, ++ const Permute &P, SDValue Op0, SDValue Op1) { ++ // VPDI (PERMUTE_DWORDS) always operates on v2i64s. The input ++ // elements of a PACK are twice as wide as the outputs. ++ unsigned InBytes = (P.Opcode == SystemZISD::PERMUTE_DWORDS ? 8 : ++ P.Opcode == SystemZISD::PACK ? P.Operand * 2 : ++ P.Operand); ++ // Cast both operands to the appropriate type. ++ MVT InVT = MVT::getVectorVT(MVT::getIntegerVT(InBytes * 8), ++ SystemZ::VectorBytes / InBytes); ++ Op0 = DAG.getNode(ISD::BITCAST, DL, InVT, Op0); ++ Op1 = DAG.getNode(ISD::BITCAST, DL, InVT, Op1); ++ SDValue Op; ++ if (P.Opcode == SystemZISD::PERMUTE_DWORDS) { ++ SDValue Op2 = DAG.getConstant(P.Operand, MVT::i32); ++ Op = DAG.getNode(SystemZISD::PERMUTE_DWORDS, DL, InVT, Op0, Op1, Op2); ++ } else if (P.Opcode == SystemZISD::PACK) { ++ MVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(P.Operand * 8), ++ SystemZ::VectorBytes / P.Operand); ++ Op = DAG.getNode(SystemZISD::PACK, DL, OutVT, Op0, Op1); ++ } else { ++ Op = DAG.getNode(P.Opcode, DL, InVT, Op0, Op1); ++ } ++ return Op; ++} ++ ++// Bytes is a VPERM-like permute vector, except that -1 is used for ++// undefined bytes. Implement it on operands Ops[0] and Ops[1] using ++// VSLDI or VPERM. ++static SDValue getGeneralPermuteNode(SelectionDAG &DAG, SDLoc DL, SDValue *Ops, ++ const SmallVectorImpl &Bytes) { ++ for (unsigned I = 0; I < 2; ++I) ++ Ops[I] = DAG.getNode(ISD::BITCAST, DL, MVT::v16i8, Ops[I]); ++ ++ // First see whether VSLDI can be used. ++ unsigned StartIndex, OpNo0, OpNo1; ++ if (isShlDoublePermute(Bytes, StartIndex, OpNo0, OpNo1)) ++ return DAG.getNode(SystemZISD::SHL_DOUBLE, DL, MVT::v16i8, Ops[OpNo0], ++ Ops[OpNo1], DAG.getConstant(StartIndex, MVT::i32)); ++ ++ // Fall back on VPERM. Construct an SDNode for the permute vector. ++ SDValue IndexNodes[SystemZ::VectorBytes]; ++ for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) ++ if (Bytes[I] >= 0) ++ IndexNodes[I] = DAG.getConstant(Bytes[I], MVT::i32); ++ else ++ IndexNodes[I] = DAG.getUNDEF(MVT::i32); ++ SDValue Op2 = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v16i8, IndexNodes); ++ return DAG.getNode(SystemZISD::PERMUTE, DL, MVT::v16i8, Ops[0], Ops[1], Op2); ++} ++ ++namespace { ++// Describes a general N-operand vector shuffle. ++struct GeneralShuffle { ++ GeneralShuffle(EVT vt) : VT(vt) {} ++ void addUndef(); ++ void add(SDValue, unsigned); ++ SDValue getNode(SelectionDAG &, SDLoc); ++ ++ // The operands of the shuffle. ++ SmallVector Ops; ++ ++ // Index I is -1 if byte I of the result is undefined. Otherwise the ++ // result comes from byte Bytes[I] % SystemZ::VectorBytes of operand ++ // Bytes[I] / SystemZ::VectorBytes. ++ SmallVector Bytes; ++ ++ // The type of the shuffle result. ++ EVT VT; ++}; ++} ++ ++// Add an extra undefined element to the shuffle. ++void GeneralShuffle::addUndef() { ++ unsigned BytesPerElement = VT.getVectorElementType().getStoreSize(); ++ for (unsigned I = 0; I < BytesPerElement; ++I) ++ Bytes.push_back(-1); ++} ++ ++// Add an extra element to the shuffle, taking it from element Elem of Op. ++// A null Op indicates a vector input whose value will be calculated later; ++// there is at most one such input per shuffle and it always has the same ++// type as the result. ++void GeneralShuffle::add(SDValue Op, unsigned Elem) { ++ unsigned BytesPerElement = VT.getVectorElementType().getStoreSize(); ++ ++ // The source vector can have wider elements than the result, ++ // either through an explicit TRUNCATE or because of type legalization. ++ // We want the least significant part. ++ EVT FromVT = Op.getNode() ? Op.getValueType() : VT; ++ unsigned FromBytesPerElement = FromVT.getVectorElementType().getStoreSize(); ++ assert(FromBytesPerElement >= BytesPerElement && ++ "Invalid EXTRACT_VECTOR_ELT"); ++ unsigned Byte = ((Elem * FromBytesPerElement) % SystemZ::VectorBytes + ++ (FromBytesPerElement - BytesPerElement)); ++ ++ // Look through things like shuffles and bitcasts. ++ while (Op.getNode()) { ++ if (Op.getOpcode() == ISD::BITCAST) ++ Op = Op.getOperand(0); ++ else if (Op.getOpcode() == ISD::VECTOR_SHUFFLE && Op.hasOneUse()) { ++ // See whether the bytes we need come from a contiguous part of one ++ // operand. ++ SmallVector OpBytes; ++ getVPermMask(cast(Op), OpBytes); ++ int NewByte; ++ if (!getShuffleInput(OpBytes, Byte, BytesPerElement, NewByte)) ++ break; ++ if (NewByte < 0) { ++ addUndef(); ++ return; ++ } ++ Op = Op.getOperand(unsigned(NewByte) / SystemZ::VectorBytes); ++ Byte = unsigned(NewByte) % SystemZ::VectorBytes; ++ } else if (Op.getOpcode() == ISD::UNDEF) { ++ addUndef(); ++ return; ++ } else ++ break; ++ } ++ ++ // Make sure that the source of the extraction is in Ops. ++ unsigned OpNo = 0; ++ for (; OpNo < Ops.size(); ++OpNo) ++ if (Ops[OpNo] == Op) ++ break; ++ if (OpNo == Ops.size()) ++ Ops.push_back(Op); ++ ++ // Add the element to Bytes. ++ unsigned Base = OpNo * SystemZ::VectorBytes + Byte; ++ for (unsigned I = 0; I < BytesPerElement; ++I) ++ Bytes.push_back(Base + I); ++} ++ ++// Return SDNodes for the completed shuffle. ++SDValue GeneralShuffle::getNode(SelectionDAG &DAG, SDLoc DL) { ++ assert(Bytes.size() == SystemZ::VectorBytes && "Incomplete vector"); ++ ++ if (Ops.size() == 0) ++ return DAG.getUNDEF(VT); ++ ++ // Make sure that there are at least two shuffle operands. ++ if (Ops.size() == 1) ++ Ops.push_back(DAG.getUNDEF(MVT::v16i8)); ++ ++ // Create a tree of shuffles, deferring root node until after the loop. ++ // Try to redistribute the undefined elements of non-root nodes so that ++ // the non-root shuffles match something like a pack or merge, then adjust ++ // the parent node's permute vector to compensate for the new order. ++ // Among other things, this copes with vectors like <2 x i16> that were ++ // padded with undefined elements during type legalization. ++ // ++ // In the best case this redistribution will lead to the whole tree ++ // using packs and merges. It should rarely be a loss in other cases. ++ unsigned Stride = 1; ++ for (; Stride * 2 < Ops.size(); Stride *= 2) { ++ for (unsigned I = 0; I < Ops.size() - Stride; I += Stride * 2) { ++ SDValue SubOps[] = { Ops[I], Ops[I + Stride] }; ++ ++ // Create a mask for just these two operands. ++ SmallVector NewBytes(SystemZ::VectorBytes); ++ for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) { ++ unsigned OpNo = unsigned(Bytes[J]) / SystemZ::VectorBytes; ++ unsigned Byte = unsigned(Bytes[J]) % SystemZ::VectorBytes; ++ if (OpNo == I) ++ NewBytes[J] = Byte; ++ else if (OpNo == I + Stride) ++ NewBytes[J] = SystemZ::VectorBytes + Byte; ++ else ++ NewBytes[J] = -1; ++ } ++ // See if it would be better to reorganize NewMask to avoid using VPERM. ++ SmallVector NewBytesMap(SystemZ::VectorBytes); ++ if (const Permute *P = matchDoublePermute(NewBytes, NewBytesMap)) { ++ Ops[I] = getPermuteNode(DAG, DL, *P, SubOps[0], SubOps[1]); ++ // Applying NewBytesMap to Ops[I] gets back to NewBytes. ++ for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) { ++ if (NewBytes[J] >= 0) { ++ assert(unsigned(NewBytesMap[J]) < SystemZ::VectorBytes && ++ "Invalid double permute"); ++ Bytes[J] = I * SystemZ::VectorBytes + NewBytesMap[J]; ++ } else ++ assert(NewBytesMap[J] < 0 && "Invalid double permute"); ++ } ++ } else { ++ // Just use NewBytes on the operands. ++ Ops[I] = getGeneralPermuteNode(DAG, DL, SubOps, NewBytes); ++ for (unsigned J = 0; J < SystemZ::VectorBytes; ++J) ++ if (NewBytes[J] >= 0) ++ Bytes[J] = I * SystemZ::VectorBytes + J; ++ } ++ } ++ } ++ ++ // Now we just have 2 inputs. Put the second operand in Ops[1]. ++ if (Stride > 1) { ++ Ops[1] = Ops[Stride]; ++ for (unsigned I = 0; I < SystemZ::VectorBytes; ++I) ++ if (Bytes[I] >= int(SystemZ::VectorBytes)) ++ Bytes[I] -= (Stride - 1) * SystemZ::VectorBytes; ++ } ++ ++ // Look for an instruction that can do the permute without resorting ++ // to VPERM. ++ unsigned OpNo0, OpNo1; ++ SDValue Op; ++ if (const Permute *P = matchPermute(Bytes, OpNo0, OpNo1)) ++ Op = getPermuteNode(DAG, DL, *P, Ops[OpNo0], Ops[OpNo1]); ++ else ++ Op = getGeneralPermuteNode(DAG, DL, &Ops[0], Bytes); ++ return DAG.getNode(ISD::BITCAST, DL, VT, Op); ++} ++ ++// Return true if the given BUILD_VECTOR is a scalar-to-vector conversion. ++static bool isScalarToVector(SDValue Op) { ++ for (unsigned I = 1, E = Op.getNumOperands(); I != E; ++I) ++ if (Op.getOperand(I).getOpcode() != ISD::UNDEF) ++ return false; ++ return true; ++} ++ ++// Return a vector of type VT that contains Value in the first element. ++// The other elements don't matter. ++static SDValue buildScalarToVector(SelectionDAG &DAG, SDLoc DL, EVT VT, ++ SDValue Value) { ++ // If we have a constant, replicate it to all elements and let the ++ // BUILD_VECTOR lowering take care of it. ++ if (Value.getOpcode() == ISD::Constant || ++ Value.getOpcode() == ISD::ConstantFP) { ++ SmallVector Ops(VT.getVectorNumElements(), Value); ++ return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Ops); ++ } ++ if (Value.getOpcode() == ISD::UNDEF) ++ return DAG.getUNDEF(VT); ++ return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Value); ++} ++ ++// Return a vector of type VT in which Op0 is in element 0 and Op1 is in ++// element 1. Used for cases in which replication is cheap. ++static SDValue buildMergeScalars(SelectionDAG &DAG, SDLoc DL, EVT VT, ++ SDValue Op0, SDValue Op1) { ++ if (Op0.getOpcode() == ISD::UNDEF) { ++ if (Op1.getOpcode() == ISD::UNDEF) ++ return DAG.getUNDEF(VT); ++ return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op1); ++ } ++ if (Op1.getOpcode() == ISD::UNDEF) ++ return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0); ++ return DAG.getNode(SystemZISD::MERGE_HIGH, DL, VT, ++ buildScalarToVector(DAG, DL, VT, Op0), ++ buildScalarToVector(DAG, DL, VT, Op1)); ++} ++ ++// Extend GPR scalars Op0 and Op1 to doublewords and return a v2i64 ++// vector for them. ++static SDValue joinDwords(SelectionDAG &DAG, SDLoc DL, SDValue Op0, ++ SDValue Op1) { ++ if (Op0.getOpcode() == ISD::UNDEF && Op1.getOpcode() == ISD::UNDEF) ++ return DAG.getUNDEF(MVT::v2i64); ++ // If one of the two inputs is undefined then replicate the other one, ++ // in order to avoid using another register unnecessarily. ++ if (Op0.getOpcode() == ISD::UNDEF) ++ Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1); ++ else if (Op1.getOpcode() == ISD::UNDEF) ++ Op0 = Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0); ++ else { ++ Op0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0); ++ Op1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op1); ++ } ++ return DAG.getNode(SystemZISD::JOIN_DWORDS, DL, MVT::v2i64, Op0, Op1); ++} ++ ++// Try to represent constant BUILD_VECTOR node BVN using a ++// SystemZISD::BYTE_MASK-style mask. Store the mask value in Mask ++// on success. ++static bool tryBuildVectorByteMask(BuildVectorSDNode *BVN, uint64_t &Mask) { ++ EVT ElemVT = BVN->getValueType(0).getVectorElementType(); ++ unsigned BytesPerElement = ElemVT.getStoreSize(); ++ for (unsigned I = 0, E = BVN->getNumOperands(); I != E; ++I) { ++ SDValue Op = BVN->getOperand(I); ++ if (Op.getOpcode() != ISD::UNDEF) { ++ uint64_t Value; ++ if (Op.getOpcode() == ISD::Constant) ++ Value = dyn_cast(Op)->getZExtValue(); ++ else if (Op.getOpcode() == ISD::ConstantFP) ++ Value = (dyn_cast(Op)->getValueAPF().bitcastToAPInt() ++ .getZExtValue()); ++ else ++ return false; ++ for (unsigned J = 0; J < BytesPerElement; ++J) { ++ uint64_t Byte = (Value >> (J * 8)) & 0xff; ++ if (Byte == 0xff) ++ Mask |= 1 << ((E - I - 1) * BytesPerElement + J); ++ else if (Byte != 0) ++ return false; ++ } ++ } ++ } ++ return true; ++} ++ ++// Try to load a vector constant in which BitsPerElement-bit value Value ++// is replicated to fill the vector. VT is the type of the resulting ++// constant, which may have elements of a different size from BitsPerElement. ++// Return the SDValue of the constant on success, otherwise return ++// an empty value. ++static SDValue tryBuildVectorReplicate(SelectionDAG &DAG, ++ const SystemZInstrInfo *TII, ++ SDLoc DL, EVT VT, uint64_t Value, ++ unsigned BitsPerElement) { ++ // Signed 16-bit values can be replicated using VREPI. ++ int64_t SignedValue = SignExtend64(Value, BitsPerElement); ++ if (isInt<16>(SignedValue)) { ++ MVT VecVT = MVT::getVectorVT(MVT::getIntegerVT(BitsPerElement), ++ SystemZ::VectorBits / BitsPerElement); ++ SDValue Op = DAG.getNode(SystemZISD::REPLICATE, DL, VecVT, ++ DAG.getConstant(SignedValue, MVT::i32)); ++ return DAG.getNode(ISD::BITCAST, DL, VT, Op); ++ } ++ // See whether rotating the constant left some N places gives a value that ++ // is one less than a power of 2 (i.e. all zeros followed by all ones). ++ // If so we can use VGM. ++ unsigned Start, End; ++ if (TII->isRxSBGMask(Value, BitsPerElement, Start, End)) { ++ // isRxSBGMask returns the bit numbers for a full 64-bit value, ++ // with 0 denoting 1 << 63 and 63 denoting 1. Convert them to ++ // bit numbers for an BitsPerElement value, so that 0 denotes ++ // 1 << (BitsPerElement-1). ++ Start -= 64 - BitsPerElement; ++ End -= 64 - BitsPerElement; ++ MVT VecVT = MVT::getVectorVT(MVT::getIntegerVT(BitsPerElement), ++ SystemZ::VectorBits / BitsPerElement); ++ SDValue Op = DAG.getNode(SystemZISD::ROTATE_MASK, DL, VecVT, ++ DAG.getConstant(Start, MVT::i32), ++ DAG.getConstant(End, MVT::i32)); ++ return DAG.getNode(ISD::BITCAST, DL, VT, Op); ++ } ++ return SDValue(); ++} ++ ++// If a BUILD_VECTOR contains some EXTRACT_VECTOR_ELTs, it's usually ++// better to use VECTOR_SHUFFLEs on them, only using BUILD_VECTOR for ++// the non-EXTRACT_VECTOR_ELT elements. See if the given BUILD_VECTOR ++// would benefit from this representation and return it if so. ++static SDValue tryBuildVectorShuffle(SelectionDAG &DAG, ++ BuildVectorSDNode *BVN) { ++ EVT VT = BVN->getValueType(0); ++ unsigned NumElements = VT.getVectorNumElements(); ++ ++ // Represent the BUILD_VECTOR as an N-operand VECTOR_SHUFFLE-like operation ++ // on byte vectors. If there are non-EXTRACT_VECTOR_ELT elements that still ++ // need a BUILD_VECTOR, add an additional placeholder operand for that ++ // BUILD_VECTOR and store its operands in ResidueOps. ++ GeneralShuffle GS(VT); ++ SmallVector ResidueOps; ++ bool FoundOne = false; ++ for (unsigned I = 0; I < NumElements; ++I) { ++ SDValue Op = BVN->getOperand(I); ++ if (Op.getOpcode() == ISD::TRUNCATE) ++ Op = Op.getOperand(0); ++ if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT && ++ Op.getOperand(1).getOpcode() == ISD::Constant) { ++ unsigned Elem = cast(Op.getOperand(1))->getZExtValue(); ++ GS.add(Op.getOperand(0), Elem); ++ FoundOne = true; ++ } else if (Op.getOpcode() == ISD::UNDEF) { ++ GS.addUndef(); ++ } else { ++ GS.add(SDValue(), ResidueOps.size()); ++ ResidueOps.push_back(Op); ++ } ++ } ++ ++ // Nothing to do if there are no EXTRACT_VECTOR_ELTs. ++ if (!FoundOne) ++ return SDValue(); ++ ++ // Create the BUILD_VECTOR for the remaining elements, if any. ++ if (!ResidueOps.empty()) { ++ while (ResidueOps.size() < NumElements) ++ ResidueOps.push_back(DAG.getUNDEF(VT.getVectorElementType())); ++ for (auto &Op : GS.Ops) { ++ if (!Op.getNode()) { ++ Op = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BVN), VT, ResidueOps); ++ break; ++ } ++ } ++ } ++ return GS.getNode(DAG, SDLoc(BVN)); ++} ++ ++// Combine GPR scalar values Elems into a vector of type VT. ++static SDValue buildVector(SelectionDAG &DAG, SDLoc DL, EVT VT, ++ SmallVectorImpl &Elems) { ++ // See whether there is a single replicated value. ++ SDValue Single; ++ unsigned int NumElements = Elems.size(); ++ unsigned int Count = 0; ++ for (auto Elem : Elems) { ++ if (Elem.getOpcode() != ISD::UNDEF) { ++ if (!Single.getNode()) ++ Single = Elem; ++ else if (Elem != Single) { ++ Single = SDValue(); ++ break; ++ } ++ Count += 1; ++ } ++ } ++ // There are three cases here: ++ // ++ // - if the only defined element is a loaded one, the best sequence ++ // is a replicating load. ++ // ++ // - otherwise, if the only defined element is an i64 value, we will ++ // end up with the same VLVGP sequence regardless of whether we short-cut ++ // for replication or fall through to the later code. ++ // ++ // - otherwise, if the only defined element is an i32 or smaller value, ++ // we would need 2 instructions to replicate it: VLVGP followed by VREPx. ++ // This is only a win if the single defined element is used more than once. ++ // In other cases we're better off using a single VLVGx. ++ if (Single.getNode() && (Count > 1 || Single.getOpcode() == ISD::LOAD)) ++ return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Single); ++ ++ // The best way of building a v2i64 from two i64s is to use VLVGP. ++ if (VT == MVT::v2i64) ++ return joinDwords(DAG, DL, Elems[0], Elems[1]); ++ ++ // Use a 64-bit merge high to combine two doubles. ++ if (VT == MVT::v2f64) ++ return buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]); ++ ++ // Build v4f32 values directly from the FPRs: ++ // ++ // ++ // V V VMRHF ++ // ++ // V VMRHG ++ // ++ if (VT == MVT::v4f32) { ++ SDValue Op01 = buildMergeScalars(DAG, DL, VT, Elems[0], Elems[1]); ++ SDValue Op23 = buildMergeScalars(DAG, DL, VT, Elems[2], Elems[3]); ++ // Avoid unnecessary undefs by reusing the other operand. ++ if (Op01.getOpcode() == ISD::UNDEF) ++ Op01 = Op23; ++ else if (Op23.getOpcode() == ISD::UNDEF) ++ Op23 = Op01; ++ // Merging identical replications is a no-op. ++ if (Op01.getOpcode() == SystemZISD::REPLICATE && Op01 == Op23) ++ return Op01; ++ Op01 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op01); ++ Op23 = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Op23); ++ SDValue Op = DAG.getNode(SystemZISD::MERGE_HIGH, ++ DL, MVT::v2i64, Op01, Op23); ++ return DAG.getNode(ISD::BITCAST, DL, VT, Op); ++ } ++ ++ // Collect the constant terms. ++ SmallVector Constants(NumElements, SDValue()); ++ SmallVector Done(NumElements, false); ++ ++ unsigned NumConstants = 0; ++ for (unsigned I = 0; I < NumElements; ++I) { ++ SDValue Elem = Elems[I]; ++ if (Elem.getOpcode() == ISD::Constant || ++ Elem.getOpcode() == ISD::ConstantFP) { ++ NumConstants += 1; ++ Constants[I] = Elem; ++ Done[I] = true; ++ } ++ } ++ // If there was at least one constant, fill in the other elements of ++ // Constants with undefs to get a full vector constant and use that ++ // as the starting point. ++ SDValue Result; ++ if (NumConstants > 0) { ++ for (unsigned I = 0; I < NumElements; ++I) ++ if (!Constants[I].getNode()) ++ Constants[I] = DAG.getUNDEF(Elems[I].getValueType()); ++ Result = DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Constants); ++ } else { ++ // Otherwise try to use VLVGP to start the sequence in order to ++ // avoid a false dependency on any previous contents of the vector ++ // register. This only makes sense if one of the associated elements ++ // is defined. ++ unsigned I1 = NumElements / 2 - 1; ++ unsigned I2 = NumElements - 1; ++ bool Def1 = (Elems[I1].getOpcode() != ISD::UNDEF); ++ bool Def2 = (Elems[I2].getOpcode() != ISD::UNDEF); ++ if (Def1 || Def2) { ++ SDValue Elem1 = Elems[Def1 ? I1 : I2]; ++ SDValue Elem2 = Elems[Def2 ? I2 : I1]; ++ Result = DAG.getNode(ISD::BITCAST, DL, VT, ++ joinDwords(DAG, DL, Elem1, Elem2)); ++ Done[I1] = true; ++ Done[I2] = true; ++ } else ++ Result = DAG.getUNDEF(VT); ++ } ++ ++ // Use VLVGx to insert the other elements. ++ for (unsigned I = 0; I < NumElements; ++I) ++ if (!Done[I] && Elems[I].getOpcode() != ISD::UNDEF) ++ Result = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Result, Elems[I], ++ DAG.getConstant(I, MVT::i32)); ++ return Result; ++} ++ ++SDValue SystemZTargetLowering::lowerBUILD_VECTOR(SDValue Op, ++ SelectionDAG &DAG) const { ++ const SystemZInstrInfo *TII = ++ static_cast(Subtarget.getInstrInfo()); ++ auto *BVN = cast(Op.getNode()); ++ SDLoc DL(Op); ++ EVT VT = Op.getValueType(); ++ ++ if (BVN->isConstant()) { ++ // Try using VECTOR GENERATE BYTE MASK. This is the architecturally- ++ // preferred way of creating all-zero and all-one vectors so give it ++ // priority over other methods below. ++ uint64_t Mask = 0; ++ if (tryBuildVectorByteMask(BVN, Mask)) { ++ SDValue Op = DAG.getNode(SystemZISD::BYTE_MASK, DL, MVT::v16i8, ++ DAG.getConstant(Mask, MVT::i32)); ++ return DAG.getNode(ISD::BITCAST, DL, VT, Op); ++ } ++ ++ // Try using some form of replication. ++ APInt SplatBits, SplatUndef; ++ unsigned SplatBitSize; ++ bool HasAnyUndefs; ++ if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs, ++ 8, true) && ++ SplatBitSize <= 64) { ++ // First try assuming that any undefined bits above the highest set bit ++ // and below the lowest set bit are 1s. This increases the likelihood of ++ // being able to use a sign-extended element value in VECTOR REPLICATE ++ // IMMEDIATE or a wraparound mask in VECTOR GENERATE MASK. ++ uint64_t SplatBitsZ = SplatBits.getZExtValue(); ++ uint64_t SplatUndefZ = SplatUndef.getZExtValue(); ++ uint64_t Lower = (SplatUndefZ ++ & ((uint64_t(1) << findFirstSet(SplatBitsZ)) - 1)); ++ uint64_t Upper = (SplatUndefZ ++ & ~((uint64_t(1) << findLastSet(SplatBitsZ)) - 1)); ++ uint64_t Value = SplatBitsZ | Upper | Lower; ++ SDValue Op = tryBuildVectorReplicate(DAG, TII, DL, VT, Value, ++ SplatBitSize); ++ if (Op.getNode()) ++ return Op; ++ ++ // Now try assuming that any undefined bits between the first and ++ // last defined set bits are set. This increases the chances of ++ // using a non-wraparound mask. ++ uint64_t Middle = SplatUndefZ & ~Upper & ~Lower; ++ Value = SplatBitsZ | Middle; ++ Op = tryBuildVectorReplicate(DAG, TII, DL, VT, Value, SplatBitSize); ++ if (Op.getNode()) ++ return Op; ++ } ++ ++ // Fall back to loading it from memory. ++ return SDValue(); ++ } ++ ++ // See if we should use shuffles to construct the vector from other vectors. ++ SDValue Res = tryBuildVectorShuffle(DAG, BVN); ++ if (Res.getNode()) ++ return Res; ++ ++ // Detect SCALAR_TO_VECTOR conversions. ++ if (isOperationLegal(ISD::SCALAR_TO_VECTOR, VT) && isScalarToVector(Op)) ++ return buildScalarToVector(DAG, DL, VT, Op.getOperand(0)); ++ ++ // Otherwise use buildVector to build the vector up from GPRs. ++ unsigned NumElements = Op.getNumOperands(); ++ SmallVector Ops(NumElements); ++ for (unsigned I = 0; I < NumElements; ++I) ++ Ops[I] = Op.getOperand(I); ++ return buildVector(DAG, DL, VT, Ops); ++} ++ ++SDValue SystemZTargetLowering::lowerVECTOR_SHUFFLE(SDValue Op, ++ SelectionDAG &DAG) const { ++ auto *VSN = cast(Op.getNode()); ++ SDLoc DL(Op); ++ EVT VT = Op.getValueType(); ++ unsigned NumElements = VT.getVectorNumElements(); ++ ++ if (VSN->isSplat()) { ++ SDValue Op0 = Op.getOperand(0); ++ unsigned Index = VSN->getSplatIndex(); ++ assert(Index < VT.getVectorNumElements() && ++ "Splat index should be defined and in first operand"); ++ // See whether the value we're splatting is directly available as a scalar. ++ if ((Index == 0 && Op0.getOpcode() == ISD::SCALAR_TO_VECTOR) || ++ Op0.getOpcode() == ISD::BUILD_VECTOR) ++ return DAG.getNode(SystemZISD::REPLICATE, DL, VT, Op0.getOperand(Index)); ++ // Otherwise keep it as a vector-to-vector operation. ++ return DAG.getNode(SystemZISD::SPLAT, DL, VT, Op.getOperand(0), ++ DAG.getConstant(Index, MVT::i32)); ++ } ++ ++ GeneralShuffle GS(VT); ++ for (unsigned I = 0; I < NumElements; ++I) { ++ int Elt = VSN->getMaskElt(I); ++ if (Elt < 0) ++ GS.addUndef(); ++ else ++ GS.add(Op.getOperand(unsigned(Elt) / NumElements), ++ unsigned(Elt) % NumElements); ++ } ++ return GS.getNode(DAG, SDLoc(VSN)); ++} ++ ++SDValue SystemZTargetLowering::lowerSCALAR_TO_VECTOR(SDValue Op, ++ SelectionDAG &DAG) const { ++ SDLoc DL(Op); ++ // Just insert the scalar into element 0 of an undefined vector. ++ return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, ++ Op.getValueType(), DAG.getUNDEF(Op.getValueType()), ++ Op.getOperand(0), DAG.getConstant(0, MVT::i32)); ++} ++ ++SDValue SystemZTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op, ++ SelectionDAG &DAG) const { ++ // Handle insertions of floating-point values. ++ SDLoc DL(Op); ++ SDValue Op0 = Op.getOperand(0); ++ SDValue Op1 = Op.getOperand(1); ++ SDValue Op2 = Op.getOperand(2); ++ EVT VT = Op.getValueType(); ++ ++ // Insertions into constant indices of a v2f64 can be done using VPDI. ++ // However, if the inserted value is a bitcast or a constant then it's ++ // better to use GPRs, as below. ++ if (VT == MVT::v2f64 && ++ Op1.getOpcode() != ISD::BITCAST && ++ Op1.getOpcode() != ISD::ConstantFP && ++ Op2.getOpcode() == ISD::Constant) { ++ uint64_t Index = dyn_cast(Op2)->getZExtValue(); ++ unsigned Mask = VT.getVectorNumElements() - 1; ++ if (Index <= Mask) ++ return Op; ++ } ++ ++ // Otherwise bitcast to the equivalent integer form and insert via a GPR. ++ MVT IntVT = MVT::getIntegerVT(VT.getVectorElementType().getSizeInBits()); ++ MVT IntVecVT = MVT::getVectorVT(IntVT, VT.getVectorNumElements()); ++ SDValue Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntVecVT, ++ DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0), ++ DAG.getNode(ISD::BITCAST, DL, IntVT, Op1), Op2); ++ return DAG.getNode(ISD::BITCAST, DL, VT, Res); ++} ++ ++SDValue ++SystemZTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op, ++ SelectionDAG &DAG) const { ++ // Handle extractions of floating-point values. ++ SDLoc DL(Op); ++ SDValue Op0 = Op.getOperand(0); ++ SDValue Op1 = Op.getOperand(1); ++ EVT VT = Op.getValueType(); ++ EVT VecVT = Op0.getValueType(); ++ ++ // Extractions of constant indices can be done directly. ++ if (auto *CIndexN = dyn_cast(Op1)) { ++ uint64_t Index = CIndexN->getZExtValue(); ++ unsigned Mask = VecVT.getVectorNumElements() - 1; ++ if (Index <= Mask) ++ return Op; ++ } ++ ++ // Otherwise bitcast to the equivalent integer form and extract via a GPR. ++ MVT IntVT = MVT::getIntegerVT(VT.getSizeInBits()); ++ MVT IntVecVT = MVT::getVectorVT(IntVT, VecVT.getVectorNumElements()); ++ SDValue Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, IntVT, ++ DAG.getNode(ISD::BITCAST, DL, IntVecVT, Op0), Op1); ++ return DAG.getNode(ISD::BITCAST, DL, VT, Res); ++} ++ ++SDValue ++SystemZTargetLowering::lowerExtendVectorInreg(SDValue Op, SelectionDAG &DAG, ++ unsigned UnpackHigh) const { ++ SDValue PackedOp = Op.getOperand(0); ++ EVT OutVT = Op.getValueType(); ++ EVT InVT = PackedOp.getValueType(); ++ unsigned ToBits = OutVT.getVectorElementType().getSizeInBits(); ++ unsigned FromBits = InVT.getVectorElementType().getSizeInBits(); ++ do { ++ FromBits *= 2; ++ EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(FromBits), ++ SystemZ::VectorBits / FromBits); ++ PackedOp = DAG.getNode(UnpackHigh, SDLoc(PackedOp), OutVT, PackedOp); ++ } while (FromBits != ToBits); ++ return PackedOp; ++} ++ ++SDValue SystemZTargetLowering::lowerShift(SDValue Op, SelectionDAG &DAG, ++ unsigned ByScalar) const { ++ // Look for cases where a vector shift can use the *_BY_SCALAR form. ++ SDValue Op0 = Op.getOperand(0); ++ SDValue Op1 = Op.getOperand(1); ++ SDLoc DL(Op); ++ EVT VT = Op.getValueType(); ++ unsigned ElemBitSize = VT.getVectorElementType().getSizeInBits(); ++ ++ // See whether the shift vector is a splat represented as BUILD_VECTOR. ++ if (auto *BVN = dyn_cast(Op1)) { ++ APInt SplatBits, SplatUndef; ++ unsigned SplatBitSize; ++ bool HasAnyUndefs; ++ // Check for constant splats. Use ElemBitSize as the minimum element ++ // width and reject splats that need wider elements. ++ if (BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs, ++ ElemBitSize, true) && ++ SplatBitSize == ElemBitSize) { ++ SDValue Shift = DAG.getConstant(SplatBits.getZExtValue() & 0xfff, ++ MVT::i32); ++ return DAG.getNode(ByScalar, DL, VT, Op0, Shift); ++ } ++ // Check for variable splats. ++ BitVector UndefElements; ++ SDValue Splat = BVN->getSplatValue(&UndefElements); ++ if (Splat) { ++ // Since i32 is the smallest legal type, we either need a no-op ++ // or a truncation. ++ SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Splat); ++ return DAG.getNode(ByScalar, DL, VT, Op0, Shift); ++ } ++ } ++ ++ // See whether the shift vector is a splat represented as SHUFFLE_VECTOR, ++ // and the shift amount is directly available in a GPR. ++ if (auto *VSN = dyn_cast(Op1)) { ++ if (VSN->isSplat()) { ++ SDValue VSNOp0 = VSN->getOperand(0); ++ unsigned Index = VSN->getSplatIndex(); ++ assert(Index < VT.getVectorNumElements() && ++ "Splat index should be defined and in first operand"); ++ if ((Index == 0 && VSNOp0.getOpcode() == ISD::SCALAR_TO_VECTOR) || ++ VSNOp0.getOpcode() == ISD::BUILD_VECTOR) { ++ // Since i32 is the smallest legal type, we either need a no-op ++ // or a truncation. ++ SDValue Shift = DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, ++ VSNOp0.getOperand(Index)); ++ return DAG.getNode(ByScalar, DL, VT, Op0, Shift); ++ } ++ } ++ } ++ ++ // Otherwise just treat the current form as legal. ++ return Op; ++} ++ + SDValue SystemZTargetLowering::LowerOperation(SDValue Op, + SelectionDAG &DAG) const { + switch (Op.getOpcode()) { +@@ -2437,6 +4291,14 @@ SDValue SystemZTargetLowering::LowerOper + return lowerUDIVREM(Op, DAG); + case ISD::OR: + return lowerOR(Op, DAG); ++ case ISD::CTPOP: ++ return lowerCTPOP(Op, DAG); ++ case ISD::CTLZ_ZERO_UNDEF: ++ return DAG.getNode(ISD::CTLZ, SDLoc(Op), ++ Op.getValueType(), Op.getOperand(0)); ++ case ISD::CTTZ_ZERO_UNDEF: ++ return DAG.getNode(ISD::CTTZ, SDLoc(Op), ++ Op.getValueType(), Op.getOperand(0)); + case ISD::ATOMIC_SWAP: + return lowerATOMIC_LOAD_OP(Op, DAG, SystemZISD::ATOMIC_SWAPW); + case ISD::ATOMIC_STORE: +@@ -2471,6 +4333,30 @@ SDValue SystemZTargetLowering::LowerOper + return lowerSTACKRESTORE(Op, DAG); + case ISD::PREFETCH: + return lowerPREFETCH(Op, DAG); ++ case ISD::INTRINSIC_W_CHAIN: ++ return lowerINTRINSIC_W_CHAIN(Op, DAG); ++ case ISD::INTRINSIC_WO_CHAIN: ++ return lowerINTRINSIC_WO_CHAIN(Op, DAG); ++ case ISD::BUILD_VECTOR: ++ return lowerBUILD_VECTOR(Op, DAG); ++ case ISD::VECTOR_SHUFFLE: ++ return lowerVECTOR_SHUFFLE(Op, DAG); ++ case ISD::SCALAR_TO_VECTOR: ++ return lowerSCALAR_TO_VECTOR(Op, DAG); ++ case ISD::INSERT_VECTOR_ELT: ++ return lowerINSERT_VECTOR_ELT(Op, DAG); ++ case ISD::EXTRACT_VECTOR_ELT: ++ return lowerEXTRACT_VECTOR_ELT(Op, DAG); ++ case ISD::SIGN_EXTEND_VECTOR_INREG: ++ return lowerExtendVectorInreg(Op, DAG, SystemZISD::UNPACK_HIGH); ++ case ISD::ZERO_EXTEND_VECTOR_INREG: ++ return lowerExtendVectorInreg(Op, DAG, SystemZISD::UNPACKL_HIGH); ++ case ISD::SHL: ++ return lowerShift(Op, DAG, SystemZISD::VSHL_BY_SCALAR); ++ case ISD::SRL: ++ return lowerShift(Op, DAG, SystemZISD::VSRL_BY_SCALAR); ++ case ISD::SRA: ++ return lowerShift(Op, DAG, SystemZISD::VSRA_BY_SCALAR); + default: + llvm_unreachable("Unexpected node to lower"); + } +@@ -2482,6 +4368,8 @@ const char *SystemZTargetLowering::getTa + OPCODE(RET_FLAG); + OPCODE(CALL); + OPCODE(SIBCALL); ++ OPCODE(TLS_GDCALL); ++ OPCODE(TLS_LDCALL); + OPCODE(PCREL_WRAPPER); + OPCODE(PCREL_OFFSET); + OPCODE(IABS); +@@ -2492,7 +4380,9 @@ const char *SystemZTargetLowering::getTa + OPCODE(SELECT_CCMASK); + OPCODE(ADJDYNALLOC); + OPCODE(EXTRACT_ACCESS); ++ OPCODE(POPCNT); + OPCODE(UMUL_LOHI64); ++ OPCODE(SDIVREM32); + OPCODE(SDIVREM64); + OPCODE(UDIVREM32); + OPCODE(UDIVREM64); +@@ -2506,11 +4396,60 @@ const char *SystemZTargetLowering::getTa + OPCODE(XC_LOOP); + OPCODE(CLC); + OPCODE(CLC_LOOP); +- OPCODE(STRCMP); + OPCODE(STPCPY); ++ OPCODE(STRCMP); + OPCODE(SEARCH_STRING); + OPCODE(IPM); + OPCODE(SERIALIZE); ++ OPCODE(TBEGIN); ++ OPCODE(TBEGIN_NOFLOAT); ++ OPCODE(TEND); ++ OPCODE(BYTE_MASK); ++ OPCODE(ROTATE_MASK); ++ OPCODE(REPLICATE); ++ OPCODE(JOIN_DWORDS); ++ OPCODE(SPLAT); ++ OPCODE(MERGE_HIGH); ++ OPCODE(MERGE_LOW); ++ OPCODE(SHL_DOUBLE); ++ OPCODE(PERMUTE_DWORDS); ++ OPCODE(PERMUTE); ++ OPCODE(PACK); ++ OPCODE(PACKS_CC); ++ OPCODE(PACKLS_CC); ++ OPCODE(UNPACK_HIGH); ++ OPCODE(UNPACKL_HIGH); ++ OPCODE(UNPACK_LOW); ++ OPCODE(UNPACKL_LOW); ++ OPCODE(VSHL_BY_SCALAR); ++ OPCODE(VSRL_BY_SCALAR); ++ OPCODE(VSRA_BY_SCALAR); ++ OPCODE(VSUM); ++ OPCODE(VICMPE); ++ OPCODE(VICMPH); ++ OPCODE(VICMPHL); ++ OPCODE(VICMPES); ++ OPCODE(VICMPHS); ++ OPCODE(VICMPHLS); ++ OPCODE(VFCMPE); ++ OPCODE(VFCMPH); ++ OPCODE(VFCMPHE); ++ OPCODE(VFCMPES); ++ OPCODE(VFCMPHS); ++ OPCODE(VFCMPHES); ++ OPCODE(VFTCI); ++ OPCODE(VEXTEND); ++ OPCODE(VROUND); ++ OPCODE(VTM); ++ OPCODE(VFAE_CC); ++ OPCODE(VFAEZ_CC); ++ OPCODE(VFEE_CC); ++ OPCODE(VFEEZ_CC); ++ OPCODE(VFENE_CC); ++ OPCODE(VFENEZ_CC); ++ OPCODE(VISTR_CC); ++ OPCODE(VSTRC_CC); ++ OPCODE(VSTRCZ_CC); + OPCODE(ATOMIC_SWAPW); + OPCODE(ATOMIC_LOADW_ADD); + OPCODE(ATOMIC_LOADW_SUB); +@@ -2529,6 +4468,157 @@ const char *SystemZTargetLowering::getTa + #undef OPCODE + } + ++// Return true if VT is a vector whose elements are a whole number of bytes ++// in width. ++static bool canTreatAsByteVector(EVT VT) { ++ return VT.isVector() && VT.getVectorElementType().getSizeInBits() % 8 == 0; ++} ++ ++// Try to simplify an EXTRACT_VECTOR_ELT from a vector of type VecVT ++// producing a result of type ResVT. Op is a possibly bitcast version ++// of the input vector and Index is the index (based on type VecVT) that ++// should be extracted. Return the new extraction if a simplification ++// was possible or if Force is true. ++SDValue SystemZTargetLowering::combineExtract(SDLoc DL, EVT ResVT, EVT VecVT, ++ SDValue Op, unsigned Index, ++ DAGCombinerInfo &DCI, ++ bool Force) const { ++ SelectionDAG &DAG = DCI.DAG; ++ ++ // The number of bytes being extracted. ++ unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize(); ++ ++ for (;;) { ++ unsigned Opcode = Op.getOpcode(); ++ if (Opcode == ISD::BITCAST) ++ // Look through bitcasts. ++ Op = Op.getOperand(0); ++ else if (Opcode == ISD::VECTOR_SHUFFLE && ++ canTreatAsByteVector(Op.getValueType())) { ++ // Get a VPERM-like permute mask and see whether the bytes covered ++ // by the extracted element are a contiguous sequence from one ++ // source operand. ++ SmallVector Bytes; ++ getVPermMask(cast(Op), Bytes); ++ int First; ++ if (!getShuffleInput(Bytes, Index * BytesPerElement, ++ BytesPerElement, First)) ++ break; ++ if (First < 0) ++ return DAG.getUNDEF(ResVT); ++ // Make sure the contiguous sequence starts at a multiple of the ++ // original element size. ++ unsigned Byte = unsigned(First) % Bytes.size(); ++ if (Byte % BytesPerElement != 0) ++ break; ++ // We can get the extracted value directly from an input. ++ Index = Byte / BytesPerElement; ++ Op = Op.getOperand(unsigned(First) / Bytes.size()); ++ Force = true; ++ } else if (Opcode == ISD::BUILD_VECTOR && ++ canTreatAsByteVector(Op.getValueType())) { ++ // We can only optimize this case if the BUILD_VECTOR elements are ++ // at least as wide as the extracted value. ++ EVT OpVT = Op.getValueType(); ++ unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize(); ++ if (OpBytesPerElement < BytesPerElement) ++ break; ++ // Make sure that the least-significant bit of the extracted value ++ // is the least significant bit of an input. ++ unsigned End = (Index + 1) * BytesPerElement; ++ if (End % OpBytesPerElement != 0) ++ break; ++ // We're extracting the low part of one operand of the BUILD_VECTOR. ++ Op = Op.getOperand(End / OpBytesPerElement - 1); ++ if (!Op.getValueType().isInteger()) { ++ EVT VT = MVT::getIntegerVT(Op.getValueType().getSizeInBits()); ++ Op = DAG.getNode(ISD::BITCAST, DL, VT, Op); ++ DCI.AddToWorklist(Op.getNode()); ++ } ++ EVT VT = MVT::getIntegerVT(ResVT.getSizeInBits()); ++ Op = DAG.getNode(ISD::TRUNCATE, DL, VT, Op); ++ if (VT != ResVT) { ++ DCI.AddToWorklist(Op.getNode()); ++ Op = DAG.getNode(ISD::BITCAST, DL, ResVT, Op); ++ } ++ return Op; ++ } else if ((Opcode == ISD::SIGN_EXTEND_VECTOR_INREG || ++ Opcode == ISD::ZERO_EXTEND_VECTOR_INREG || ++ Opcode == ISD::ANY_EXTEND_VECTOR_INREG) && ++ canTreatAsByteVector(Op.getValueType()) && ++ canTreatAsByteVector(Op.getOperand(0).getValueType())) { ++ // Make sure that only the unextended bits are significant. ++ EVT ExtVT = Op.getValueType(); ++ EVT OpVT = Op.getOperand(0).getValueType(); ++ unsigned ExtBytesPerElement = ExtVT.getVectorElementType().getStoreSize(); ++ unsigned OpBytesPerElement = OpVT.getVectorElementType().getStoreSize(); ++ unsigned Byte = Index * BytesPerElement; ++ unsigned SubByte = Byte % ExtBytesPerElement; ++ unsigned MinSubByte = ExtBytesPerElement - OpBytesPerElement; ++ if (SubByte < MinSubByte || ++ SubByte + BytesPerElement > ExtBytesPerElement) ++ break; ++ // Get the byte offset of the unextended element ++ Byte = Byte / ExtBytesPerElement * OpBytesPerElement; ++ // ...then add the byte offset relative to that element. ++ Byte += SubByte - MinSubByte; ++ if (Byte % BytesPerElement != 0) ++ break; ++ Op = Op.getOperand(0); ++ Index = Byte / BytesPerElement; ++ Force = true; ++ } else ++ break; ++ } ++ if (Force) { ++ if (Op.getValueType() != VecVT) { ++ Op = DAG.getNode(ISD::BITCAST, DL, VecVT, Op); ++ DCI.AddToWorklist(Op.getNode()); ++ } ++ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ResVT, Op, ++ DAG.getConstant(Index, MVT::i32)); ++ } ++ return SDValue(); ++} ++ ++// Optimize vector operations in scalar value Op on the basis that Op ++// is truncated to TruncVT. ++SDValue ++SystemZTargetLowering::combineTruncateExtract(SDLoc DL, EVT TruncVT, SDValue Op, ++ DAGCombinerInfo &DCI) const { ++ // If we have (trunc (extract_vector_elt X, Y)), try to turn it into ++ // (extract_vector_elt (bitcast X), Y'), where (bitcast X) has elements ++ // of type TruncVT. ++ if (Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT && ++ TruncVT.getSizeInBits() % 8 == 0) { ++ SDValue Vec = Op.getOperand(0); ++ EVT VecVT = Vec.getValueType(); ++ if (canTreatAsByteVector(VecVT)) { ++ if (auto *IndexN = dyn_cast(Op.getOperand(1))) { ++ unsigned BytesPerElement = VecVT.getVectorElementType().getStoreSize(); ++ unsigned TruncBytes = TruncVT.getStoreSize(); ++ if (BytesPerElement % TruncBytes == 0) { ++ // Calculate the value of Y' in the above description. We are ++ // splitting the original elements into Scale equal-sized pieces ++ // and for truncation purposes want the last (least-significant) ++ // of these pieces for IndexN. This is easiest to do by calculating ++ // the start index of the following element and then subtracting 1. ++ unsigned Scale = BytesPerElement / TruncBytes; ++ unsigned NewIndex = (IndexN->getZExtValue() + 1) * Scale - 1; ++ ++ // Defer the creation of the bitcast from X to combineExtract, ++ // which might be able to optimize the extraction. ++ VecVT = MVT::getVectorVT(MVT::getIntegerVT(TruncBytes * 8), ++ VecVT.getStoreSize() / TruncBytes); ++ EVT ResVT = (TruncBytes < 4 ? MVT::i32 : TruncVT); ++ return combineExtract(DL, ResVT, VecVT, Vec, NewIndex, DCI, true); ++ } ++ } ++ } ++ } ++ return SDValue(); ++} ++ + SDValue SystemZTargetLowering::PerformDAGCombine(SDNode *N, + DAGCombinerInfo &DCI) const { + SelectionDAG &DAG = DCI.DAG; +@@ -2559,6 +4649,114 @@ SDValue SystemZTargetLowering::PerformDA + } + } + } ++ if (Opcode == SystemZISD::MERGE_HIGH || ++ Opcode == SystemZISD::MERGE_LOW) { ++ SDValue Op0 = N->getOperand(0); ++ SDValue Op1 = N->getOperand(1); ++ if (Op0.getOpcode() == ISD::BITCAST) ++ Op0 = Op0.getOperand(0); ++ if (Op0.getOpcode() == SystemZISD::BYTE_MASK && ++ cast(Op0.getOperand(0))->getZExtValue() == 0) { ++ // (z_merge_* 0, 0) -> 0. This is mostly useful for using VLLEZF ++ // for v4f32. ++ if (Op1 == N->getOperand(0)) ++ return Op1; ++ // (z_merge_? 0, X) -> (z_unpackl_? 0, X). ++ EVT VT = Op1.getValueType(); ++ unsigned ElemBytes = VT.getVectorElementType().getStoreSize(); ++ if (ElemBytes <= 4) { ++ Opcode = (Opcode == SystemZISD::MERGE_HIGH ? ++ SystemZISD::UNPACKL_HIGH : SystemZISD::UNPACKL_LOW); ++ EVT InVT = VT.changeVectorElementTypeToInteger(); ++ EVT OutVT = MVT::getVectorVT(MVT::getIntegerVT(ElemBytes * 16), ++ SystemZ::VectorBytes / ElemBytes / 2); ++ if (VT != InVT) { ++ Op1 = DAG.getNode(ISD::BITCAST, SDLoc(N), InVT, Op1); ++ DCI.AddToWorklist(Op1.getNode()); ++ } ++ SDValue Op = DAG.getNode(Opcode, SDLoc(N), OutVT, Op1); ++ DCI.AddToWorklist(Op.getNode()); ++ return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op); ++ } ++ } ++ } ++ // If we have (truncstoreiN (extract_vector_elt X, Y), Z) then it is better ++ // for the extraction to be done on a vMiN value, so that we can use VSTE. ++ // If X has wider elements then convert it to: ++ // (truncstoreiN (extract_vector_elt (bitcast X), Y2), Z). ++ if (Opcode == ISD::STORE) { ++ auto *SN = cast(N); ++ EVT MemVT = SN->getMemoryVT(); ++ if (MemVT.isInteger()) { ++ SDValue Value = combineTruncateExtract(SDLoc(N), MemVT, ++ SN->getValue(), DCI); ++ if (Value.getNode()) { ++ DCI.AddToWorklist(Value.getNode()); ++ ++ // Rewrite the store with the new form of stored value. ++ return DAG.getTruncStore(SN->getChain(), SDLoc(SN), Value, ++ SN->getBasePtr(), SN->getMemoryVT(), ++ SN->getMemOperand()); ++ } ++ } ++ } ++ // Try to simplify a vector extraction. ++ if (Opcode == ISD::EXTRACT_VECTOR_ELT) { ++ if (auto *IndexN = dyn_cast(N->getOperand(1))) { ++ SDValue Op0 = N->getOperand(0); ++ EVT VecVT = Op0.getValueType(); ++ return combineExtract(SDLoc(N), N->getValueType(0), VecVT, Op0, ++ IndexN->getZExtValue(), DCI, false); ++ } ++ } ++ // (join_dwords X, X) == (replicate X) ++ if (Opcode == SystemZISD::JOIN_DWORDS && ++ N->getOperand(0) == N->getOperand(1)) ++ return DAG.getNode(SystemZISD::REPLICATE, SDLoc(N), N->getValueType(0), ++ N->getOperand(0)); ++ // (fround (extract_vector_elt X 0)) ++ // (fround (extract_vector_elt X 1)) -> ++ // (extract_vector_elt (VROUND X) 0) ++ // (extract_vector_elt (VROUND X) 1) ++ // ++ // This is a special case since the target doesn't really support v2f32s. ++ if (Opcode == ISD::FP_ROUND) { ++ SDValue Op0 = N->getOperand(0); ++ if (N->getValueType(0) == MVT::f32 && ++ Op0.hasOneUse() && ++ Op0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && ++ Op0.getOperand(0).getValueType() == MVT::v2f64 && ++ Op0.getOperand(1).getOpcode() == ISD::Constant && ++ cast(Op0.getOperand(1))->getZExtValue() == 0) { ++ SDValue Vec = Op0.getOperand(0); ++ for (auto *U : Vec->uses()) { ++ if (U != Op0.getNode() && ++ U->hasOneUse() && ++ U->getOpcode() == ISD::EXTRACT_VECTOR_ELT && ++ U->getOperand(0) == Vec && ++ U->getOperand(1).getOpcode() == ISD::Constant && ++ cast(U->getOperand(1))->getZExtValue() == 1) { ++ SDValue OtherRound = SDValue(*U->use_begin(), 0); ++ if (OtherRound.getOpcode() == ISD::FP_ROUND && ++ OtherRound.getOperand(0) == SDValue(U, 0) && ++ OtherRound.getValueType() == MVT::f32) { ++ SDValue VRound = DAG.getNode(SystemZISD::VROUND, SDLoc(N), ++ MVT::v4f32, Vec); ++ DCI.AddToWorklist(VRound.getNode()); ++ SDValue Extract1 = ++ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(U), MVT::f32, ++ VRound, DAG.getConstant(2, MVT::i32)); ++ DCI.AddToWorklist(Extract1.getNode()); ++ DAG.ReplaceAllUsesOfValueWith(OtherRound, Extract1); ++ SDValue Extract0 = ++ DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(Op0), MVT::f32, ++ VRound, DAG.getConstant(0, MVT::i32)); ++ return Extract0; ++ } ++ } ++ } ++ } ++ } + return SDValue(); + } + +@@ -3338,6 +5536,57 @@ SystemZTargetLowering::emitStringWrapper + return DoneMBB; + } + ++// Update TBEGIN instruction with final opcode and register clobbers. ++MachineBasicBlock * ++SystemZTargetLowering::emitTransactionBegin(MachineInstr *MI, ++ MachineBasicBlock *MBB, ++ unsigned Opcode, ++ bool NoFloat) const { ++ MachineFunction &MF = *MBB->getParent(); ++ const TargetFrameLowering *TFI = Subtarget.getFrameLowering(); ++ const SystemZInstrInfo *TII = Subtarget.getInstrInfo(); ++ ++ // Update opcode. ++ MI->setDesc(TII->get(Opcode)); ++ ++ // We cannot handle a TBEGIN that clobbers the stack or frame pointer. ++ // Make sure to add the corresponding GRSM bits if they are missing. ++ uint64_t Control = MI->getOperand(2).getImm(); ++ static const unsigned GPRControlBit[16] = { ++ 0x8000, 0x8000, 0x4000, 0x4000, 0x2000, 0x2000, 0x1000, 0x1000, ++ 0x0800, 0x0800, 0x0400, 0x0400, 0x0200, 0x0200, 0x0100, 0x0100 ++ }; ++ Control |= GPRControlBit[15]; ++ if (TFI->hasFP(MF)) ++ Control |= GPRControlBit[11]; ++ MI->getOperand(2).setImm(Control); ++ ++ // Add GPR clobbers. ++ for (int I = 0; I < 16; I++) { ++ if ((Control & GPRControlBit[I]) == 0) { ++ unsigned Reg = SystemZMC::GR64Regs[I]; ++ MI->addOperand(MachineOperand::CreateReg(Reg, true, true)); ++ } ++ } ++ ++ // Add FPR/VR clobbers. ++ if (!NoFloat && (Control & 4) != 0) { ++ if (Subtarget.hasVector()) { ++ for (int I = 0; I < 32; I++) { ++ unsigned Reg = SystemZMC::VR128Regs[I]; ++ MI->addOperand(MachineOperand::CreateReg(Reg, true, true)); ++ } ++ } else { ++ for (int I = 0; I < 16; I++) { ++ unsigned Reg = SystemZMC::FP64Regs[I]; ++ MI->addOperand(MachineOperand::CreateReg(Reg, true, true)); ++ } ++ } ++ } ++ ++ return MBB; ++} ++ + MachineBasicBlock *SystemZTargetLowering:: + EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const { + switch (MI->getOpcode()) { +@@ -3579,6 +5828,12 @@ EmitInstrWithCustomInserter(MachineInstr + return emitStringWrapper(MI, MBB, SystemZ::MVST); + case SystemZ::SRSTLoop: + return emitStringWrapper(MI, MBB, SystemZ::SRST); ++ case SystemZ::TBEGIN: ++ return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, false); ++ case SystemZ::TBEGIN_nofloat: ++ return emitTransactionBegin(MI, MBB, SystemZ::TBEGIN, true); ++ case SystemZ::TBEGINC: ++ return emitTransactionBegin(MI, MBB, SystemZ::TBEGINC, true); + default: + llvm_unreachable("Unexpected instr type to insert"); + } +Index: llvm-36/lib/Target/SystemZ/SystemZISelLowering.h +=================================================================== +--- llvm-36.orig/lib/Target/SystemZ/SystemZISelLowering.h ++++ llvm-36/lib/Target/SystemZ/SystemZISelLowering.h +@@ -34,6 +34,11 @@ enum { + CALL, + SIBCALL, + ++ // TLS calls. Like regular calls, except operand 1 is the TLS symbol. ++ // (The call target is implicitly __tls_get_offset.) ++ TLS_GDCALL, ++ TLS_LDCALL, ++ + // Wraps a TargetGlobalAddress that should be loaded using PC-relative + // accesses (LARL). Operand 0 is the address. + PCREL_WRAPPER, +@@ -82,6 +87,9 @@ enum { + // the number of the register. + EXTRACT_ACCESS, + ++ // Count number of bits set in operand 0 per byte. ++ POPCNT, ++ + // Wrappers around the ISD opcodes of the same name. The output and + // first input operands are GR128s. The trailing numbers are the + // widths of the second operand in bits. +@@ -138,6 +146,135 @@ enum { + // Perform a serialization operation. (BCR 15,0 or BCR 14,0.) + SERIALIZE, + ++ // Transaction begin. The first operand is the chain, the second ++ // the TDB pointer, and the third the immediate control field. ++ // Returns chain and glue. ++ TBEGIN, ++ TBEGIN_NOFLOAT, ++ ++ // Transaction end. Just the chain operand. Returns chain and glue. ++ TEND, ++ ++ // Create a vector constant by filling byte N of the result with bit ++ // 15-N of the single operand. ++ BYTE_MASK, ++ ++ // Create a vector constant by replicating an element-sized RISBG-style mask. ++ // The first operand specifies the starting set bit and the second operand ++ // specifies the ending set bit. Both operands count from the MSB of the ++ // element. ++ ROTATE_MASK, ++ ++ // Replicate a GPR scalar value into all elements of a vector. ++ REPLICATE, ++ ++ // Create a vector from two i64 GPRs. ++ JOIN_DWORDS, ++ ++ // Replicate one element of a vector into all elements. The first operand ++ // is the vector and the second is the index of the element to replicate. ++ SPLAT, ++ ++ // Interleave elements from the high half of operand 0 and the high half ++ // of operand 1. ++ MERGE_HIGH, ++ ++ // Likewise for the low halves. ++ MERGE_LOW, ++ ++ // Concatenate the vectors in the first two operands, shift them left ++ // by the third operand, and take the first half of the result. ++ SHL_DOUBLE, ++ ++ // Take one element of the first v2i64 operand and the one element of ++ // the second v2i64 operand and concatenate them to form a v2i64 result. ++ // The third operand is a 4-bit value of the form 0A0B, where A and B ++ // are the element selectors for the first operand and second operands ++ // respectively. ++ PERMUTE_DWORDS, ++ ++ // Perform a general vector permute on vector operands 0 and 1. ++ // Each byte of operand 2 controls the corresponding byte of the result, ++ // in the same way as a byte-level VECTOR_SHUFFLE mask. ++ PERMUTE, ++ ++ // Pack vector operands 0 and 1 into a single vector with half-sized elements. ++ PACK, ++ ++ // Likewise, but saturate the result and set CC. PACKS_CC does signed ++ // saturation and PACKLS_CC does unsigned saturation. ++ PACKS_CC, ++ PACKLS_CC, ++ ++ // Unpack the first half of vector operand 0 into double-sized elements. ++ // UNPACK_HIGH sign-extends and UNPACKL_HIGH zero-extends. ++ UNPACK_HIGH, ++ UNPACKL_HIGH, ++ ++ // Likewise for the second half. ++ UNPACK_LOW, ++ UNPACKL_LOW, ++ ++ // Shift each element of vector operand 0 by the number of bits specified ++ // by scalar operand 1. ++ VSHL_BY_SCALAR, ++ VSRL_BY_SCALAR, ++ VSRA_BY_SCALAR, ++ ++ // For each element of the output type, sum across all sub-elements of ++ // operand 0 belonging to the corresponding element, and add in the ++ // rightmost sub-element of the corresponding element of operand 1. ++ VSUM, ++ ++ // Compare integer vector operands 0 and 1 to produce the usual 0/-1 ++ // vector result. VICMPE is for equality, VICMPH for "signed greater than" ++ // and VICMPHL for "unsigned greater than". ++ VICMPE, ++ VICMPH, ++ VICMPHL, ++ ++ // Likewise, but also set the condition codes on the result. ++ VICMPES, ++ VICMPHS, ++ VICMPHLS, ++ ++ // Compare floating-point vector operands 0 and 1 to preoduce the usual 0/-1 ++ // vector result. VFCMPE is for "ordered and equal", VFCMPH for "ordered and ++ // greater than" and VFCMPHE for "ordered and greater than or equal to". ++ VFCMPE, ++ VFCMPH, ++ VFCMPHE, ++ ++ // Likewise, but also set the condition codes on the result. ++ VFCMPES, ++ VFCMPHS, ++ VFCMPHES, ++ ++ // Test floating-point data class for vectors. ++ VFTCI, ++ ++ // Extend the even f32 elements of vector operand 0 to produce a vector ++ // of f64 elements. ++ VEXTEND, ++ ++ // Round the f64 elements of vector operand 0 to f32s and store them in the ++ // even elements of the result. ++ VROUND, ++ ++ // AND the two vector operands together and set CC based on the result. ++ VTM, ++ ++ // String operations that set CC as a side-effect. ++ VFAE_CC, ++ VFAEZ_CC, ++ VFEE_CC, ++ VFEEZ_CC, ++ VFENE_CC, ++ VFENEZ_CC, ++ VISTR_CC, ++ VSTRC_CC, ++ VSTRCZ_CC, ++ + // Wrappers around the inner loop of an 8- or 16-bit ATOMIC_SWAP or + // ATOMIC_LOAD_. + // +@@ -204,9 +341,33 @@ public: + MVT getScalarShiftAmountTy(EVT LHSTy) const override { + return MVT::i32; + } ++ MVT getVectorIdxTy() const override { ++ // Only the lower 12 bits of an element index are used, so we don't ++ // want to clobber the upper 32 bits of a GPR unnecessarily. ++ return MVT::i32; ++ } ++ TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(EVT VT) ++ const override { ++ // Widen subvectors to the full width rather than promoting integer ++ // elements. This is better because: ++ // ++ // (a) it means that we can handle the ABI for passing and returning ++ // sub-128 vectors without having to handle them as legal types. ++ // ++ // (b) we don't have instructions to extend on load and truncate on store, ++ // so promoting the integers is less efficient. ++ // ++ // (c) there are no multiplication instructions for the widest integer ++ // type (v2i64). ++ if (VT.getVectorElementType().getSizeInBits() % 8 == 0) ++ return TypeWidenVector; ++ return TargetLoweringBase::getPreferredVectorAction(VT); ++ } + EVT getSetCCResultType(LLVMContext &, EVT) const override; + bool isFMAFasterThanFMulAndFAdd(EVT VT) const override; + bool isFPImmLegal(const APFloat &Imm, EVT VT) const override; ++ bool isLegalICmpImmediate(int64_t Imm) const override; ++ bool isLegalAddImmediate(int64_t Imm) const override; + bool isLegalAddressingMode(const AddrMode &AM, Type *Ty) const override; + bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, + unsigned Align, +@@ -257,6 +418,9 @@ private: + SDValue lowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerGlobalAddress(GlobalAddressSDNode *Node, + SelectionDAG &DAG) const; ++ SDValue lowerTLSGetOffset(GlobalAddressSDNode *Node, ++ SelectionDAG &DAG, unsigned Opcode, ++ SDValue GOTOffset) const; + SDValue lowerGlobalTLSAddress(GlobalAddressSDNode *Node, + SelectionDAG &DAG) const; + SDValue lowerBlockAddress(BlockAddressSDNode *Node, +@@ -272,6 +436,7 @@ private: + SDValue lowerUDIVREM(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerBITCAST(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerOR(SDValue Op, SelectionDAG &DAG) const; ++ SDValue lowerCTPOP(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerATOMIC_LOAD(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerATOMIC_STORE(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerATOMIC_LOAD_OP(SDValue Op, SelectionDAG &DAG, +@@ -282,6 +447,22 @@ private: + SDValue lowerSTACKSAVE(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerSTACKRESTORE(SDValue Op, SelectionDAG &DAG) const; + SDValue lowerPREFETCH(SDValue Op, SelectionDAG &DAG) const; ++ SDValue lowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const; ++ SDValue lowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; ++ SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const; ++ SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; ++ SDValue lowerSCALAR_TO_VECTOR(SDValue Op, SelectionDAG &DAG) const; ++ SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; ++ SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const; ++ SDValue lowerExtendVectorInreg(SDValue Op, SelectionDAG &DAG, ++ unsigned UnpackHigh) const; ++ SDValue lowerShift(SDValue Op, SelectionDAG &DAG, unsigned ByScalar) const; ++ ++ SDValue combineExtract(SDLoc DL, EVT ElemVT, EVT VecVT, SDValue OrigOp, ++ unsigned Index, DAGCombinerInfo &DCI, ++ bool Force) const; ++ SDValue combineTruncateExtract(SDLoc DL, EVT TruncVT, SDValue Op, ++ DAGCombinerInfo &DCI) const; + + // If the last instruction before MBBI in MBB was some form of COMPARE, + // try to replace it with a COMPARE AND BRANCH just before MBBI. +@@ -319,6 +500,10 @@ private: + MachineBasicBlock *emitStringWrapper(MachineInstr *MI, + MachineBasicBlock *BB, + unsigned Opcode) const; ++ MachineBasicBlock *emitTransactionBegin(MachineInstr *MI, ++ MachineBasicBlock *MBB, ++ unsigned Opcode, ++ bool NoFloat) const; + }; + } // end namespace llvm + +Index: llvm-36/lib/Target/SystemZ/SystemZInstrFP.td +=================================================================== +--- llvm-36.orig/lib/Target/SystemZ/SystemZInstrFP.td ++++ llvm-36/lib/Target/SystemZ/SystemZInstrFP.td +@@ -46,9 +46,14 @@ let Defs = [CC], CCValues = 0xF, Compare + defm LTDBR : LoadAndTestRRE<"ltdb", 0xB312, FP64>; + defm LTXBR : LoadAndTestRRE<"ltxb", 0xB342, FP128>; + } +-defm : CompareZeroFP; +-defm : CompareZeroFP; +-defm : CompareZeroFP; ++// Note that the comparison against zero operation is not available if we ++// have vector support, since load-and-test instructions will partially ++// clobber the target (vector) register. ++let Predicates = [FeatureNoVector] in { ++ defm : CompareZeroFP; ++ defm : CompareZeroFP; ++ defm : CompareZeroFP; ++} + + // Moves between 64-bit integer and floating-point registers. + def LGDR : UnaryRRE<"lgd", 0xB3CD, bitconvert, GR64, FP64>; +@@ -98,6 +103,9 @@ let canFoldAsLoad = 1, SimpleBDXLoad = 1 + defm LE : UnaryRXPair<"le", 0x78, 0xED64, load, FP32, 4>; + defm LD : UnaryRXPair<"ld", 0x68, 0xED65, load, FP64, 8>; + ++ // For z13 we prefer LDE over LE to avoid partial register dependencies. ++ def LDE32 : UnaryRXE<"lde", 0xED24, null_frag, FP32, 4>; ++ + // These instructions are split after register allocation, so we don't + // want a custom inserter. + let Has20BitOffset = 1, HasIndex = 1, Is128Bit = 1 in { +@@ -141,7 +149,7 @@ def LDXBRA : UnaryRRF4<"ldxbra", 0xB345, + Requires<[FeatureFPExtension]>; + + def : Pat<(f32 (fround FP128:$src)), +- (EXTRACT_SUBREG (LEXBR FP128:$src), subreg_hh32)>; ++ (EXTRACT_SUBREG (LEXBR FP128:$src), subreg_hr32)>; + def : Pat<(f64 (fround FP128:$src)), + (EXTRACT_SUBREG (LDXBR FP128:$src), subreg_h64)>; + +@@ -345,13 +353,13 @@ def MDB : BinaryRXE<"mdb", 0xED1C, fmu + def MDEBR : BinaryRRE<"mdeb", 0xB30C, null_frag, FP64, FP32>; + def : Pat<(fmul (f64 (fextend FP32:$src1)), (f64 (fextend FP32:$src2))), + (MDEBR (INSERT_SUBREG (f64 (IMPLICIT_DEF)), +- FP32:$src1, subreg_h32), FP32:$src2)>; ++ FP32:$src1, subreg_r32), FP32:$src2)>; + + // f64 multiplication of an FP32 register and an f32 memory. + def MDEB : BinaryRXE<"mdeb", 0xED0C, null_frag, FP64, load, 4>; + def : Pat<(fmul (f64 (fextend FP32:$src1)), + (f64 (extloadf32 bdxaddr12only:$addr))), +- (MDEB (INSERT_SUBREG (f64 (IMPLICIT_DEF)), FP32:$src1, subreg_h32), ++ (MDEB (INSERT_SUBREG (f64 (IMPLICIT_DEF)), FP32:$src1, subreg_r32), + bdxaddr12only:$addr)>; + + // f128 multiplication of two FP64 registers. +Index: llvm-36/lib/Target/SystemZ/SystemZInstrFormats.td +=================================================================== +--- llvm-36.orig/lib/Target/SystemZ/SystemZInstrFormats.td ++++ llvm-36/lib/Target/SystemZ/SystemZInstrFormats.td +@@ -142,10 +142,13 @@ def getThreeOperandOpcode : InstrMapping + // Formats are specified using operand field declarations of the form: + // + // bits<4> Rn : register input or output for operand n ++// bits<5> Vn : vector register input or output for operand n + // bits In : immediate value of width m for operand n + // bits<4> BDn : address operand n, which has a base and a displacement + // bits XBDn : address operand n, which has an index, a base and a + // displacement ++// bits VBDn : address operand n, which has a vector index, a base and a ++// displacement + // bits<4> Xn : index register for address operand n + // bits<4> Mn : mode value for operand n + // +@@ -339,11 +342,13 @@ class InstRXE op, dag outs, dag + + bits<4> R1; + bits<20> XBD2; ++ bits<4> M3; + + let Inst{47-40} = op{15-8}; + let Inst{39-36} = R1; + let Inst{35-16} = XBD2; +- let Inst{15-8} = 0; ++ let Inst{15-12} = M3; ++ let Inst{11-8} = 0; + let Inst{7-0} = op{7-0}; + + let HasIndex = 1; +@@ -473,6 +478,393 @@ class InstSS op, dag outs, dag i + let Inst{15-0} = BD2; + } + ++class InstS op, dag outs, dag ins, string asmstr, list pattern> ++ : InstSystemZ<4, outs, ins, asmstr, pattern> { ++ field bits<32> Inst; ++ field bits<32> SoftFail = 0; ++ ++ bits<16> BD2; ++ ++ let Inst{31-16} = op; ++ let Inst{15-0} = BD2; ++} ++ ++class InstVRIa op, dag outs, dag ins, string asmstr, list pattern> ++ : InstSystemZ<6, outs, ins, asmstr, pattern> { ++ field bits<48> Inst; ++ field bits<48> SoftFail = 0; ++ ++ bits<5> V1; ++ bits<16> I2; ++ bits<4> M3; ++ ++ let Inst{47-40} = op{15-8}; ++ let Inst{39-36} = V1{3-0}; ++ let Inst{35-32} = 0; ++ let Inst{31-16} = I2; ++ let Inst{15-12} = M3; ++ let Inst{11} = V1{4}; ++ let Inst{10-8} = 0; ++ let Inst{7-0} = op{7-0}; ++} ++ ++class InstVRIb op, dag outs, dag ins, string asmstr, list pattern> ++ : InstSystemZ<6, outs, ins, asmstr, pattern> { ++ field bits<48> Inst; ++ field bits<48> SoftFail = 0; ++ ++ bits<5> V1; ++ bits<8> I2; ++ bits<8> I3; ++ bits<4> M4; ++ ++ let Inst{47-40} = op{15-8}; ++ let Inst{39-36} = V1{3-0}; ++ let Inst{35-32} = 0; ++ let Inst{31-24} = I2; ++ let Inst{23-16} = I3; ++ let Inst{15-12} = M4; ++ let Inst{11} = V1{4}; ++ let Inst{10-8} = 0; ++ let Inst{7-0} = op{7-0}; ++} ++ ++class InstVRIc op, dag outs, dag ins, string asmstr, list pattern> ++ : InstSystemZ<6, outs, ins, asmstr, pattern> { ++ field bits<48> Inst; ++ field bits<48> SoftFail = 0; ++ ++ bits<5> V1; ++ bits<5> V3; ++ bits<16> I2; ++ bits<4> M4; ++ ++ let Inst{47-40} = op{15-8}; ++ let Inst{39-36} = V1{3-0}; ++ let Inst{35-32} = V3{3-0}; ++ let Inst{31-16} = I2; ++ let Inst{15-12} = M4; ++ let Inst{11} = V1{4}; ++ let Inst{10} = V3{4}; ++ let Inst{9-8} = 0; ++ let Inst{7-0} = op{7-0}; ++} ++ ++class InstVRId op, dag outs, dag ins, string asmstr, list pattern> ++ : InstSystemZ<6, outs, ins, asmstr, pattern> { ++ field bits<48> Inst; ++ field bits<48> SoftFail = 0; ++ ++ bits<5> V1; ++ bits<5> V2; ++ bits<5> V3; ++ bits<8> I4; ++ bits<4> M5; ++ ++ let Inst{47-40} = op{15-8}; ++ let Inst{39-36} = V1{3-0}; ++ let Inst{35-32} = V2{3-0}; ++ let Inst{31-28} = V3{3-0}; ++ let Inst{27-24} = 0; ++ let Inst{23-16} = I4; ++ let Inst{15-12} = M5; ++ let Inst{11} = V1{4}; ++ let Inst{10} = V2{4}; ++ let Inst{9} = V3{4}; ++ let Inst{8} = 0; ++ let Inst{7-0} = op{7-0}; ++} ++ ++class InstVRIe op, dag outs, dag ins, string asmstr, list pattern> ++ : InstSystemZ<6, outs, ins, asmstr, pattern> { ++ field bits<48> Inst; ++ field bits<48> SoftFail = 0; ++ ++ bits<5> V1; ++ bits<5> V2; ++ bits<12> I3; ++ bits<4> M4; ++ bits<4> M5; ++ ++ let Inst{47-40} = op{15-8}; ++ let Inst{39-36} = V1{3-0}; ++ let Inst{35-32} = V2{3-0}; ++ let Inst{31-20} = I3; ++ let Inst{19-16} = M5; ++ let Inst{15-12} = M4; ++ let Inst{11} = V1{4}; ++ let Inst{10} = V2{4}; ++ let Inst{9-8} = 0; ++ let Inst{7-0} = op{7-0}; ++} ++ ++// Depending on the instruction mnemonic, certain bits may be or-ed into ++// the M4 value provided as explicit operand. These are passed as m4or. ++class InstVRRa op, dag outs, dag ins, string asmstr, list pattern, ++ bits<4> m4or = 0> ++ : InstSystemZ<6, outs, ins, asmstr, pattern> { ++ field bits<48> Inst; ++ field bits<48> SoftFail = 0; ++ ++ bits<5> V1; ++ bits<5> V2; ++ bits<4> M3; ++ bits<4> M4; ++ bits<4> M5; ++ ++ let Inst{47-40} = op{15-8}; ++ let Inst{39-36} = V1{3-0}; ++ let Inst{35-32} = V2{3-0}; ++ let Inst{31-24} = 0; ++ let Inst{23-20} = M5; ++ let Inst{19} = !if (!eq (m4or{3}, 1), 1, M4{3}); ++ let Inst{18} = !if (!eq (m4or{2}, 1), 1, M4{2}); ++ let Inst{17} = !if (!eq (m4or{1}, 1), 1, M4{1}); ++ let Inst{16} = !if (!eq (m4or{0}, 1), 1, M4{0}); ++ let Inst{15-12} = M3; ++ let Inst{11} = V1{4}; ++ let Inst{10} = V2{4}; ++ let Inst{9-8} = 0; ++ let Inst{7-0} = op{7-0}; ++} ++ ++// Depending on the instruction mnemonic, certain bits may be or-ed into ++// the M5 value provided as explicit operand. These are passed as m5or. ++class InstVRRb op, dag outs, dag ins, string asmstr, list pattern, ++ bits<4> m5or = 0> ++ : InstSystemZ<6, outs, ins, asmstr, pattern> { ++ field bits<48> Inst; ++ field bits<48> SoftFail = 0; ++ ++ bits<5> V1; ++ bits<5> V2; ++ bits<5> V3; ++ bits<4> M4; ++ bits<4> M5; ++ ++ let Inst{47-40} = op{15-8}; ++ let Inst{39-36} = V1{3-0}; ++ let Inst{35-32} = V2{3-0}; ++ let Inst{31-28} = V3{3-0}; ++ let Inst{27-24} = 0; ++ let Inst{23} = !if (!eq (m5or{3}, 1), 1, M5{3}); ++ let Inst{22} = !if (!eq (m5or{2}, 1), 1, M5{2}); ++ let Inst{21} = !if (!eq (m5or{1}, 1), 1, M5{1}); ++ let Inst{20} = !if (!eq (m5or{0}, 1), 1, M5{0}); ++ let Inst{19-16} = 0; ++ let Inst{15-12} = M4; ++ let Inst{11} = V1{4}; ++ let Inst{10} = V2{4}; ++ let Inst{9} = V3{4}; ++ let Inst{8} = 0; ++ let Inst{7-0} = op{7-0}; ++} ++ ++class InstVRRc op, dag outs, dag ins, string asmstr, list pattern> ++ : InstSystemZ<6, outs, ins, asmstr, pattern> { ++ field bits<48> Inst; ++ field bits<48> SoftFail = 0; ++ ++ bits<5> V1; ++ bits<5> V2; ++ bits<5> V3; ++ bits<4> M4; ++ bits<4> M5; ++ bits<4> M6; ++ ++ let Inst{47-40} = op{15-8}; ++ let Inst{39-36} = V1{3-0}; ++ let Inst{35-32} = V2{3-0}; ++ let Inst{31-28} = V3{3-0}; ++ let Inst{27-24} = 0; ++ let Inst{23-20} = M6; ++ let Inst{19-16} = M5; ++ let Inst{15-12} = M4; ++ let Inst{11} = V1{4}; ++ let Inst{10} = V2{4}; ++ let Inst{9} = V3{4}; ++ let Inst{8} = 0; ++ let Inst{7-0} = op{7-0}; ++} ++ ++// Depending on the instruction mnemonic, certain bits may be or-ed into ++// the M6 value provided as explicit operand. These are passed as m6or. ++class InstVRRd op, dag outs, dag ins, string asmstr, list pattern, ++ bits<4> m6or = 0> ++ : InstSystemZ<6, outs, ins, asmstr, pattern> { ++ field bits<48> Inst; ++ field bits<48> SoftFail = 0; ++ ++ bits<5> V1; ++ bits<5> V2; ++ bits<5> V3; ++ bits<5> V4; ++ bits<4> M5; ++ bits<4> M6; ++ ++ let Inst{47-40} = op{15-8}; ++ let Inst{39-36} = V1{3-0}; ++ let Inst{35-32} = V2{3-0}; ++ let Inst{31-28} = V3{3-0}; ++ let Inst{27-24} = M5; ++ let Inst{23} = !if (!eq (m6or{3}, 1), 1, M6{3}); ++ let Inst{22} = !if (!eq (m6or{2}, 1), 1, M6{2}); ++ let Inst{21} = !if (!eq (m6or{1}, 1), 1, M6{1}); ++ let Inst{20} = !if (!eq (m6or{0}, 1), 1, M6{0}); ++ let Inst{19-16} = 0; ++ let Inst{15-12} = V4{3-0}; ++ let Inst{11} = V1{4}; ++ let Inst{10} = V2{4}; ++ let Inst{9} = V3{4}; ++ let Inst{8} = V4{4}; ++ let Inst{7-0} = op{7-0}; ++} ++ ++class InstVRRe op, dag outs, dag ins, string asmstr, list pattern> ++ : InstSystemZ<6, outs, ins, asmstr, pattern> { ++ field bits<48> Inst; ++ field bits<48> SoftFail = 0; ++ ++ bits<5> V1; ++ bits<5> V2; ++ bits<5> V3; ++ bits<5> V4; ++ bits<4> M5; ++ bits<4> M6; ++ ++ let Inst{47-40} = op{15-8}; ++ let Inst{39-36} = V1{3-0}; ++ let Inst{35-32} = V2{3-0}; ++ let Inst{31-28} = V3{3-0}; ++ let Inst{27-24} = M6; ++ let Inst{23-20} = 0; ++ let Inst{19-16} = M5; ++ let Inst{15-12} = V4{3-0}; ++ let Inst{11} = V1{4}; ++ let Inst{10} = V2{4}; ++ let Inst{9} = V3{4}; ++ let Inst{8} = V4{4}; ++ let Inst{7-0} = op{7-0}; ++} ++ ++class InstVRRf op, dag outs, dag ins, string asmstr, list pattern> ++ : InstSystemZ<6, outs, ins, asmstr, pattern> { ++ field bits<48> Inst; ++ field bits<48> SoftFail = 0; ++ ++ bits<5> V1; ++ bits<4> R2; ++ bits<4> R3; ++ ++ let Inst{47-40} = op{15-8}; ++ let Inst{39-36} = V1{3-0}; ++ let Inst{35-32} = R2; ++ let Inst{31-28} = R3; ++ let Inst{27-12} = 0; ++ let Inst{11} = V1{4}; ++ let Inst{10-8} = 0; ++ let Inst{7-0} = op{7-0}; ++} ++ ++class InstVRSa op, dag outs, dag ins, string asmstr, list pattern> ++ : InstSystemZ<6, outs, ins, asmstr, pattern> { ++ field bits<48> Inst; ++ field bits<48> SoftFail = 0; ++ ++ bits<5> V1; ++ bits<16> BD2; ++ bits<5> V3; ++ bits<4> M4; ++ ++ let Inst{47-40} = op{15-8}; ++ let Inst{39-36} = V1{3-0}; ++ let Inst{35-32} = V3{3-0}; ++ let Inst{31-16} = BD2; ++ let Inst{15-12} = M4; ++ let Inst{11} = V1{4}; ++ let Inst{10} = V3{4}; ++ let Inst{9-8} = 0; ++ let Inst{7-0} = op{7-0}; ++} ++ ++class InstVRSb op, dag outs, dag ins, string asmstr, list pattern> ++ : InstSystemZ<6, outs, ins, asmstr, pattern> { ++ field bits<48> Inst; ++ field bits<48> SoftFail = 0; ++ ++ bits<5> V1; ++ bits<16> BD2; ++ bits<4> R3; ++ bits<4> M4; ++ ++ let Inst{47-40} = op{15-8}; ++ let Inst{39-36} = V1{3-0}; ++ let Inst{35-32} = R3; ++ let Inst{31-16} = BD2; ++ let Inst{15-12} = M4; ++ let Inst{11} = V1{4}; ++ let Inst{10-8} = 0; ++ let Inst{7-0} = op{7-0}; ++} ++ ++class InstVRSc op, dag outs, dag ins, string asmstr, list pattern> ++ : InstSystemZ<6, outs, ins, asmstr, pattern> { ++ field bits<48> Inst; ++ field bits<48> SoftFail = 0; ++ ++ bits<4> R1; ++ bits<16> BD2; ++ bits<5> V3; ++ bits<4> M4; ++ ++ let Inst{47-40} = op{15-8}; ++ let Inst{39-36} = R1; ++ let Inst{35-32} = V3{3-0}; ++ let Inst{31-16} = BD2; ++ let Inst{15-12} = M4; ++ let Inst{11} = 0; ++ let Inst{10} = V3{4}; ++ let Inst{9-8} = 0; ++ let Inst{7-0} = op{7-0}; ++} ++ ++class InstVRV op, dag outs, dag ins, string asmstr, list pattern> ++ : InstSystemZ<6, outs, ins, asmstr, pattern> { ++ field bits<48> Inst; ++ field bits<48> SoftFail = 0; ++ ++ bits<5> V1; ++ bits<21> VBD2; ++ bits<4> M3; ++ ++ let Inst{47-40} = op{15-8}; ++ let Inst{39-36} = V1{3-0}; ++ let Inst{35-16} = VBD2{19-0}; ++ let Inst{15-12} = M3; ++ let Inst{11} = V1{4}; ++ let Inst{10} = VBD2{20}; ++ let Inst{9-8} = 0; ++ let Inst{7-0} = op{7-0}; ++} ++ ++class InstVRX op, dag outs, dag ins, string asmstr, list pattern> ++ : InstSystemZ<6, outs, ins, asmstr, pattern> { ++ field bits<48> Inst; ++ field bits<48> SoftFail = 0; ++ ++ bits<5> V1; ++ bits<20> XBD2; ++ bits<4> M3; ++ ++ let Inst{47-40} = op{15-8}; ++ let Inst{39-36} = V1{3-0}; ++ let Inst{35-16} = XBD2; ++ let Inst{15-12} = M3; ++ let Inst{11} = V1{4}; ++ let Inst{10-8} = 0; ++ let Inst{7-0} = op{7-0}; ++} ++ + //===----------------------------------------------------------------------===// + // Instruction definitions with semantics + //===----------------------------------------------------------------------===// +@@ -492,12 +884,6 @@ class InstSS op, dag outs, dag i + // form of the source register in the destination register and + // branches on the result. + // +-// Store: +-// One register or immediate input operand and one address input operand. +-// The instruction stores the first operand to the address. +-// +-// This category is used for both pure and truncating stores. +-// + // LoadMultiple: + // One address input operand and two explicit output operands. + // The instruction loads a range of registers from the address, +@@ -510,18 +896,35 @@ class InstSS op, dag outs, dag i + // with the explicit operands giving the first and last register + // to store. Other stored registers are added as implicit uses. + // ++// StoreLength: ++// One value operand, one length operand and one address operand. ++// The instruction stores the value operand to the address but ++// doesn't write more than the number of bytes specified by the ++// length operand. ++// + // Unary: + // One register output operand and one input operand. + // ++// Store: ++// One address operand and one other input operand. The instruction ++// stores to the address. ++// + // Binary: + // One register output operand and two input operands. + // ++// StoreBinary: ++// One address operand and two other input operands. The instruction ++// stores to the address. ++// + // Compare: + // Two input operands and an implicit CC output operand. + // + // Ternary: + // One register output operand and three input operands. + // ++// Quaternary: ++// One register output operand and four input operands. ++// + // LoadAndOp: + // One output operand and two input operands, one of which is an address. + // The instruction both reads from and writes to the address. +@@ -556,6 +959,12 @@ class InherentRRE opcode, bits<16> value> ++ : InstVRIa { ++ let I2 = value; ++ let M3 = 0; ++} ++ + class BranchUnaryRI opcode, RegisterOperand cls> + : InstRI { +@@ -571,6 +980,13 @@ class LoadMultipleRSY opcode> ++ : InstVRSa { ++ let M4 = 0; ++ let mayLoad = 1; ++} ++ + class StoreRILPC opcode, SDPatternOperator operator, + RegisterOperand cls> + : InstRIL opcode, SDPatternOperator operator, ++ TypedReg tr, bits<5> bytes, bits<4> type = 0> ++ : InstVRX { ++ let M3 = type; ++ let mayStore = 1; ++ let AccessBytes = bytes; ++} ++ ++class StoreLengthVRSb opcode, ++ SDPatternOperator operator, bits<5> bytes> ++ : InstVRSb { ++ let M4 = 0; ++ let mayStore = 1; ++ let AccessBytes = bytes; ++} ++ + class StoreMultipleRSY opcode, RegisterOperand cls> + : InstRSY { + let mayStore = 1; + } + ++class StoreMultipleVRSa opcode> ++ : InstVRSa { ++ let M4 = 0; ++ let mayStore = 1; ++} ++ + // StoreSI* instructions are used to store an integer to memory, but the + // addresses are more restricted than for normal stores. If we are in the + // situation of having to force either the address into a register or the +@@ -857,6 +1300,7 @@ class UnaryRXE + let OpType = "mem"; + let mayLoad = 1; + let AccessBytes = bytes; ++ let M3 = 0; + } + + class UnaryRXY opcode, SDPatternOperator operator, +@@ -883,6 +1327,46 @@ multiclass UnaryRXPair opcode, SDPatternOperator operator, ++ TypedReg tr, Immediate imm, bits<4> type = 0> ++ : InstVRIa { ++ let M3 = type; ++} ++ ++class UnaryVRRa opcode, SDPatternOperator operator, ++ TypedReg tr1, TypedReg tr2, bits<4> type = 0, bits<4> m4 = 0, ++ bits<4> m5 = 0> ++ : InstVRRa { ++ let M3 = type; ++ let M4 = m4; ++ let M5 = m5; ++} ++ ++multiclass UnaryVRRaSPair opcode, ++ SDPatternOperator operator, ++ SDPatternOperator operator_cc, TypedReg tr1, ++ TypedReg tr2, bits<4> type, bits<4> modifier = 0, ++ bits<4> modifier_cc = 1> { ++ def "" : UnaryVRRa; ++ let Defs = [CC] in ++ def S : UnaryVRRa; ++} ++ ++class UnaryVRX opcode, SDPatternOperator operator, ++ TypedReg tr, bits<5> bytes, bits<4> type = 0> ++ : InstVRX { ++ let M3 = type; ++ let mayLoad = 1; ++ let AccessBytes = bytes; ++} ++ + class BinaryRR opcode, SDPatternOperator operator, + RegisterOperand cls1, RegisterOperand cls2> + : InstRR opcode, SDPatternOperator operator, +@@ -1094,6 +1579,148 @@ multiclass BinarySIPair opcode, SDPatternOperator operator, ++ TypedReg tr, bits<4> type> ++ : InstVRIb { ++ let M4 = type; ++} ++ ++class BinaryVRIc opcode, SDPatternOperator operator, ++ TypedReg tr1, TypedReg tr2, bits<4> type> ++ : InstVRIc { ++ let M4 = type; ++} ++ ++class BinaryVRIe opcode, SDPatternOperator operator, ++ TypedReg tr1, TypedReg tr2, bits<4> type, bits<4> m5> ++ : InstVRIe { ++ let M4 = type; ++ let M5 = m5; ++} ++ ++class BinaryVRRa opcode> ++ : InstVRRa { ++ let M4 = 0; ++ let M5 = 0; ++} ++ ++class BinaryVRRb opcode, SDPatternOperator operator, ++ TypedReg tr1, TypedReg tr2, bits<4> type = 0, ++ bits<4> modifier = 0> ++ : InstVRRb { ++ let M4 = type; ++ let M5 = modifier; ++} ++ ++// Declare a pair of instructions, one which sets CC and one which doesn't. ++// The CC-setting form ends with "S" and sets the low bit of M5. ++multiclass BinaryVRRbSPair opcode, ++ SDPatternOperator operator, ++ SDPatternOperator operator_cc, TypedReg tr1, ++ TypedReg tr2, bits<4> type, ++ bits<4> modifier = 0, bits<4> modifier_cc = 1> { ++ def "" : BinaryVRRb; ++ let Defs = [CC] in ++ def S : BinaryVRRb; ++} ++ ++class BinaryVRRc opcode, SDPatternOperator operator, ++ TypedReg tr1, TypedReg tr2, bits<4> type = 0, bits<4> m5 = 0, ++ bits<4> m6 = 0> ++ : InstVRRc { ++ let M4 = type; ++ let M5 = m5; ++ let M6 = m6; ++} ++ ++multiclass BinaryVRRcSPair opcode, ++ SDPatternOperator operator, ++ SDPatternOperator operator_cc, TypedReg tr1, ++ TypedReg tr2, bits<4> type, bits<4> m5, ++ bits<4> modifier = 0, bits<4> modifier_cc = 1> { ++ def "" : BinaryVRRc; ++ let Defs = [CC] in ++ def S : BinaryVRRc; ++} ++ ++class BinaryVRRf opcode, SDPatternOperator operator, ++ TypedReg tr> ++ : InstVRRf; ++ ++class BinaryVRSa opcode, SDPatternOperator operator, ++ TypedReg tr1, TypedReg tr2, bits<4> type> ++ : InstVRSa { ++ let M4 = type; ++} ++ ++class BinaryVRSb opcode, SDPatternOperator operator, ++ bits<5> bytes> ++ : InstVRSb { ++ let M4 = 0; ++ let mayLoad = 1; ++ let AccessBytes = bytes; ++} ++ ++class BinaryVRSc opcode, SDPatternOperator operator, ++ TypedReg tr, bits<4> type> ++ : InstVRSc { ++ let M4 = type; ++} ++ ++class BinaryVRX opcode, SDPatternOperator operator, ++ TypedReg tr, bits<5> bytes> ++ : InstVRX { ++ let mayLoad = 1; ++ let AccessBytes = bytes; ++} ++ ++class StoreBinaryVRV opcode, bits<5> bytes, ++ Immediate index> ++ : InstVRV { ++ let mayStore = 1; ++ let AccessBytes = bytes; ++} ++ ++class StoreBinaryVRX opcode, ++ SDPatternOperator operator, TypedReg tr, bits<5> bytes, ++ Immediate index> ++ : InstVRX { ++ let mayStore = 1; ++ let AccessBytes = bytes; ++} ++ + class CompareRR opcode, SDPatternOperator operator, + RegisterOperand cls1, RegisterOperand cls2> + : InstRR opcode, SDPatternOperator operator, +@@ -1235,6 +1863,17 @@ multiclass CompareSIPair opcode, SDPatternOperator operator, ++ TypedReg tr, bits<4> type> ++ : InstVRRa { ++ let isCompare = 1; ++ let M3 = type; ++ let M4 = 0; ++ let M5 = 0; ++} ++ + class TernaryRRD opcode, + SDPatternOperator operator, RegisterOperand cls> + : InstRRD opcode, SDPatternOperator operator, ++ TypedReg tr1, TypedReg tr2, Immediate imm, Immediate index> ++ : InstVRIa { ++ let Constraints = "$V1 = $V1src"; ++ let DisableEncoding = "$V1src"; ++} ++ ++class TernaryVRId opcode, SDPatternOperator operator, ++ TypedReg tr1, TypedReg tr2, bits<4> type> ++ : InstVRId { ++ let M5 = type; ++} ++ ++class TernaryVRRa opcode, SDPatternOperator operator, ++ TypedReg tr1, TypedReg tr2, bits<4> type, bits<4> m4or> ++ : InstVRRa { ++ let M3 = type; ++} ++ ++class TernaryVRRb opcode, SDPatternOperator operator, ++ TypedReg tr1, TypedReg tr2, bits<4> type, ++ SDPatternOperator m5mask, bits<4> m5or> ++ : InstVRRb { ++ let M4 = type; ++} ++ ++multiclass TernaryVRRbSPair opcode, ++ SDPatternOperator operator, ++ SDPatternOperator operator_cc, TypedReg tr1, ++ TypedReg tr2, bits<4> type, bits<4> m5or> { ++ def "" : TernaryVRRb; ++ def : InstAlias(NAME) tr1.op:$V1, tr2.op:$V2, ++ tr2.op:$V3, 0)>; ++ let Defs = [CC] in ++ def S : TernaryVRRb; ++ def : InstAlias(NAME#"S") tr1.op:$V1, tr2.op:$V2, ++ tr2.op:$V3, 0)>; ++} ++ ++class TernaryVRRc opcode, SDPatternOperator operator, ++ TypedReg tr1, TypedReg tr2> ++ : InstVRRc { ++ let M5 = 0; ++ let M6 = 0; ++} ++ ++class TernaryVRRd opcode, SDPatternOperator operator, ++ TypedReg tr1, TypedReg tr2, bits<4> type = 0> ++ : InstVRRd { ++ let M5 = type; ++ let M6 = 0; ++} ++ ++class TernaryVRRe opcode, SDPatternOperator operator, ++ TypedReg tr1, TypedReg tr2, bits<4> m5 = 0, bits<4> type = 0> ++ : InstVRRe { ++ let M5 = m5; ++ let M6 = type; ++} ++ ++class TernaryVRSb opcode, SDPatternOperator operator, ++ TypedReg tr1, TypedReg tr2, RegisterOperand cls, bits<4> type> ++ : InstVRSb { ++ let Constraints = "$V1 = $V1src"; ++ let DisableEncoding = "$V1src"; ++ let M4 = type; ++} ++ ++class TernaryVRV opcode, bits<5> bytes, ++ Immediate index> ++ : InstVRV { ++ let Constraints = "$V1 = $V1src"; ++ let DisableEncoding = "$V1src"; ++ let mayLoad = 1; ++ let AccessBytes = bytes; ++} ++ ++class TernaryVRX opcode, SDPatternOperator operator, ++ TypedReg tr1, TypedReg tr2, bits<5> bytes, Immediate index> ++ : InstVRX { ++ let Constraints = "$V1 = $V1src"; ++ let DisableEncoding = "$V1src"; ++ let mayLoad = 1; ++ let AccessBytes = bytes; ++} ++ ++class QuaternaryVRId opcode, SDPatternOperator operator, ++ TypedReg tr1, TypedReg tr2, bits<4> type> ++ : InstVRId { ++ let Constraints = "$V1 = $V1src"; ++ let DisableEncoding = "$V1src"; ++ let M5 = type; ++} ++ ++class QuaternaryVRRd opcode, ++ SDPatternOperator operator, TypedReg tr1, TypedReg tr2, ++ bits<4> type, SDPatternOperator m6mask, bits<4> m6or> ++ : InstVRRd { ++ let M5 = type; ++} ++ ++multiclass QuaternaryVRRdSPair opcode, ++ SDPatternOperator operator, ++ SDPatternOperator operator_cc, TypedReg tr1, ++ TypedReg tr2, bits<4> type, bits<4> m6or> { ++ def "" : QuaternaryVRRd; ++ def : InstAlias(NAME) tr1.op:$V1, tr2.op:$V2, ++ tr2.op:$V3, tr2.op:$V4, 0)>; ++ let Defs = [CC] in ++ def S : QuaternaryVRRd; ++ def : InstAlias(NAME#"S") tr1.op:$V1, tr2.op:$V2, ++ tr2.op:$V3, tr2.op:$V4, 0)>; ++} ++ + class LoadAndOpRSY opcode, SDPatternOperator operator, + RegisterOperand cls, AddressingMode mode = bdaddr20only> + : InstRSY opcode, + RegisterOperand cls> { + def "" : UnaryRRE; +- let isCodeGenOnly = 1 in ++ let isCodeGenOnly = 1, Predicates = [FeatureNoVector] in + def Compare : CompareRRE; + } + +@@ -1577,6 +2401,26 @@ class Alias ++ : Alias<6, (outs cls1:$src1), (ins cls2:$src2), []>; ++ ++// An alias of a UnaryVRR*, but with different register sizes. ++class UnaryAliasVRR ++ : Alias<6, (outs tr1.op:$V1), (ins tr2.op:$V2), ++ [(set tr1.op:$V1, (tr1.vt (operator (tr2.vt tr2.op:$V2))))]>; ++ ++// An alias of a UnaryVRX, but with different register sizes. ++class UnaryAliasVRX ++ : Alias<6, (outs tr.op:$V1), (ins mode:$XBD2), ++ [(set tr.op:$V1, (tr.vt (operator mode:$XBD2)))]>; ++ ++// An alias of a StoreVRX, but with different register sizes. ++class StoreAliasVRX ++ : Alias<6, (outs), (ins tr.op:$V1, mode:$XBD2), ++ [(operator (tr.vt tr.op:$V1), mode:$XBD2)]>; ++ + // An alias of a BinaryRI, but with different register sizes. + class BinaryAliasRI +@@ -1593,6 +2437,10 @@ class BinaryAliasRIL ++ : Alias<6, (outs VR128:$V1), (ins cls:$R2, cls:$R3), []>; ++ + // An alias of a CompareRI, but with different register sizes. + class CompareAliasRI +Index: llvm-36/lib/Target/SystemZ/SystemZInstrInfo.cpp +=================================================================== +--- llvm-36.orig/lib/Target/SystemZ/SystemZInstrInfo.cpp ++++ llvm-36/lib/Target/SystemZ/SystemZInstrInfo.cpp +@@ -578,6 +578,12 @@ SystemZInstrInfo::copyPhysReg(MachineBas + Opcode = SystemZ::LDR; + else if (SystemZ::FP128BitRegClass.contains(DestReg, SrcReg)) + Opcode = SystemZ::LXR; ++ else if (SystemZ::VR32BitRegClass.contains(DestReg, SrcReg)) ++ Opcode = SystemZ::VLR32; ++ else if (SystemZ::VR64BitRegClass.contains(DestReg, SrcReg)) ++ Opcode = SystemZ::VLR64; ++ else if (SystemZ::VR128BitRegClass.contains(DestReg, SrcReg)) ++ Opcode = SystemZ::VLR; + else + llvm_unreachable("Impossible reg-to-reg copy"); + +@@ -723,9 +729,12 @@ SystemZInstrInfo::convertToThreeAddress( + unsigned Start, End; + if (isRxSBGMask(Imm, And.RegSize, Start, End)) { + unsigned NewOpcode; +- if (And.RegSize == 64) ++ if (And.RegSize == 64) { + NewOpcode = SystemZ::RISBG; +- else { ++ // Prefer RISBGN if available, since it does not clobber CC. ++ if (STI.hasMiscellaneousExtensions()) ++ NewOpcode = SystemZ::RISBGN; ++ } else { + NewOpcode = SystemZ::RISBMux; + Start &= 31; + End &= 31; +@@ -1114,6 +1123,16 @@ void SystemZInstrInfo::getLoadStoreOpcod + } else if (RC == &SystemZ::FP128BitRegClass) { + LoadOpcode = SystemZ::LX; + StoreOpcode = SystemZ::STX; ++ } else if (RC == &SystemZ::VR32BitRegClass) { ++ LoadOpcode = SystemZ::VL32; ++ StoreOpcode = SystemZ::VST32; ++ } else if (RC == &SystemZ::VR64BitRegClass) { ++ LoadOpcode = SystemZ::VL64; ++ StoreOpcode = SystemZ::VST64; ++ } else if (RC == &SystemZ::VF128BitRegClass || ++ RC == &SystemZ::VR128BitRegClass) { ++ LoadOpcode = SystemZ::VL; ++ StoreOpcode = SystemZ::VST; + } else + llvm_unreachable("Unsupported regclass to load or store"); + } +@@ -1147,17 +1166,22 @@ unsigned SystemZInstrInfo::getOpcodeForO + + unsigned SystemZInstrInfo::getLoadAndTest(unsigned Opcode) const { + switch (Opcode) { +- case SystemZ::L: return SystemZ::LT; +- case SystemZ::LY: return SystemZ::LT; +- case SystemZ::LG: return SystemZ::LTG; +- case SystemZ::LGF: return SystemZ::LTGF; +- case SystemZ::LR: return SystemZ::LTR; +- case SystemZ::LGFR: return SystemZ::LTGFR; +- case SystemZ::LGR: return SystemZ::LTGR; +- case SystemZ::LER: return SystemZ::LTEBR; +- case SystemZ::LDR: return SystemZ::LTDBR; +- case SystemZ::LXR: return SystemZ::LTXBR; +- default: return 0; ++ case SystemZ::L: return SystemZ::LT; ++ case SystemZ::LY: return SystemZ::LT; ++ case SystemZ::LG: return SystemZ::LTG; ++ case SystemZ::LGF: return SystemZ::LTGF; ++ case SystemZ::LR: return SystemZ::LTR; ++ case SystemZ::LGFR: return SystemZ::LTGFR; ++ case SystemZ::LGR: return SystemZ::LTGR; ++ case SystemZ::LER: return SystemZ::LTEBR; ++ case SystemZ::LDR: return SystemZ::LTDBR; ++ case SystemZ::LXR: return SystemZ::LTXBR; ++ // On zEC12 we prefer to use RISBGN. But if there is a chance to ++ // actually use the condition code, we may turn it back into RISGB. ++ // Note that RISBG is not really a "load-and-test" instruction, ++ // but sets the same condition code values, so is OK to use here. ++ case SystemZ::RISBGN: return SystemZ::RISBG; ++ default: return 0; + } + } + +@@ -1178,6 +1202,7 @@ static bool isStringOfOnes(uint64_t Mask + bool SystemZInstrInfo::isRxSBGMask(uint64_t Mask, unsigned BitSize, + unsigned &Start, unsigned &End) const { + // Reject trivial all-zero masks. ++ Mask &= allOnes(BitSize); + if (Mask == 0) + return false; + +Index: llvm-36/lib/Target/SystemZ/SystemZInstrInfo.h +=================================================================== +--- llvm-36.orig/lib/Target/SystemZ/SystemZInstrInfo.h ++++ llvm-36/lib/Target/SystemZ/SystemZInstrInfo.h +@@ -56,10 +56,13 @@ static inline unsigned getCompareZeroCCM + // SystemZ MachineOperand target flags. + enum { + // Masks out the bits for the access model. +- MO_SYMBOL_MODIFIER = (1 << 0), ++ MO_SYMBOL_MODIFIER = (3 << 0), + + // @GOT (aka @GOTENT) +- MO_GOT = (1 << 0) ++ MO_GOT = (1 << 0), ++ ++ // @INDNTPOFF ++ MO_INDNTPOFF = (2 << 0) + }; + // Classifies a branch. + enum BranchType { +Index: llvm-36/lib/Target/SystemZ/SystemZInstrInfo.td +=================================================================== +--- llvm-36.orig/lib/Target/SystemZ/SystemZInstrInfo.td ++++ llvm-36/lib/Target/SystemZ/SystemZInstrInfo.td +@@ -249,11 +249,21 @@ let isCall = 1, isTerminator = 1, isRetu + def CallBR : Alias<2, (outs), (ins), [(z_sibcall R1D)]>; + } + ++// TLS calls. These will be lowered into a call to __tls_get_offset, ++// with an extra relocation specifying the TLS symbol. ++let isCall = 1, Defs = [R14D, CC] in { ++ def TLS_GDCALL : Alias<6, (outs), (ins tlssym:$I2, variable_ops), ++ [(z_tls_gdcall tglobaltlsaddr:$I2)]>; ++ def TLS_LDCALL : Alias<6, (outs), (ins tlssym:$I2, variable_ops), ++ [(z_tls_ldcall tglobaltlsaddr:$I2)]>; ++} ++ + // Define the general form of the call instructions for the asm parser. + // These instructions don't hard-code %r14 as the return address register. +-def BRAS : InstRI<0xA75, (outs), (ins GR64:$R1, brtarget16:$I2), ++// Allow an optional TLS marker symbol to generate TLS call relocations. ++def BRAS : InstRI<0xA75, (outs), (ins GR64:$R1, brtarget16tls:$I2), + "bras\t$R1, $I2", []>; +-def BRASL : InstRIL<0xC05, (outs), (ins GR64:$R1, brtarget32:$I2), ++def BRASL : InstRIL<0xC05, (outs), (ins GR64:$R1, brtarget32tls:$I2), + "brasl\t$R1, $I2", []>; + def BASR : InstRR<0x0D, (outs), (ins GR64:$R1, ADDR64:$R2), + "basr\t$R1, $R2", []>; +@@ -587,6 +597,12 @@ let hasSideEffects = 0, isAsCheapAsAMove + [(set GR64:$R1, pcrel32:$I2)]>; + } + ++// Load the Global Offset Table address. This will be lowered into a ++// larl $R1, _GLOBAL_OFFSET_TABLE_ ++// instruction. ++def GOT : Alias<6, (outs GR64:$R1), (ins), ++ [(set GR64:$R1, (global_offset_table))]>; ++ + //===----------------------------------------------------------------------===// + // Absolute and Negation + //===----------------------------------------------------------------------===// +@@ -1045,6 +1061,10 @@ let Defs = [CC] in { + def RISBG : RotateSelectRIEf<"risbg", 0xEC55, GR64, GR64>; + } + ++// On zEC12 we have a variant of RISBG that does not set CC. ++let Predicates = [FeatureMiscellaneousExtensions] in ++ def RISBGN : RotateSelectRIEf<"risbgn", 0xEC59, GR64, GR64>; ++ + // Forms of RISBG that only affect one word of the destination register. + // They do not set CC. + let Predicates = [FeatureHighWord] in { +@@ -1342,6 +1362,60 @@ let Defs = [CC] in { + } + + //===----------------------------------------------------------------------===// ++// Transactional execution ++//===----------------------------------------------------------------------===// ++ ++let Predicates = [FeatureTransactionalExecution] in { ++ // Transaction Begin ++ let hasSideEffects = 1, mayStore = 1, ++ usesCustomInserter = 1, Defs = [CC] in { ++ def TBEGIN : InstSIL<0xE560, ++ (outs), (ins bdaddr12only:$BD1, imm32zx16:$I2), ++ "tbegin\t$BD1, $I2", ++ [(z_tbegin bdaddr12only:$BD1, imm32zx16:$I2)]>; ++ def TBEGIN_nofloat : Pseudo<(outs), (ins bdaddr12only:$BD1, imm32zx16:$I2), ++ [(z_tbegin_nofloat bdaddr12only:$BD1, ++ imm32zx16:$I2)]>; ++ def TBEGINC : InstSIL<0xE561, ++ (outs), (ins bdaddr12only:$BD1, imm32zx16:$I2), ++ "tbeginc\t$BD1, $I2", ++ [(int_s390_tbeginc bdaddr12only:$BD1, ++ imm32zx16:$I2)]>; ++ } ++ ++ // Transaction End ++ let hasSideEffects = 1, Defs = [CC], BD2 = 0 in ++ def TEND : InstS<0xB2F8, (outs), (ins), "tend", [(z_tend)]>; ++ ++ // Transaction Abort ++ let hasSideEffects = 1, isTerminator = 1, isBarrier = 1 in ++ def TABORT : InstS<0xB2FC, (outs), (ins bdaddr12only:$BD2), ++ "tabort\t$BD2", ++ [(int_s390_tabort bdaddr12only:$BD2)]>; ++ ++ // Nontransactional Store ++ let hasSideEffects = 1 in ++ def NTSTG : StoreRXY<"ntstg", 0xE325, int_s390_ntstg, GR64, 8>; ++ ++ // Extract Transaction Nesting Depth ++ let hasSideEffects = 1 in ++ def ETND : InherentRRE<"etnd", 0xB2EC, GR32, (int_s390_etnd)>; ++} ++ ++//===----------------------------------------------------------------------===// ++// Processor assist ++//===----------------------------------------------------------------------===// ++ ++let Predicates = [FeatureProcessorAssist] in { ++ let hasSideEffects = 1, R4 = 0 in ++ def PPA : InstRRF<0xB2E8, (outs), (ins GR64:$R1, GR64:$R2, imm32zx4:$R3), ++ "ppa\t$R1, $R2, $R3", []>; ++ def : Pat<(int_s390_ppa_txassist GR32:$src), ++ (PPA (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, subreg_l32), ++ 0, 1)>; ++} ++ ++//===----------------------------------------------------------------------===// + // Miscellaneous Instructions. + //===----------------------------------------------------------------------===// + +@@ -1366,6 +1440,13 @@ let Defs = [CC] in { + def : Pat<(ctlz GR64:$src), + (EXTRACT_SUBREG (FLOGR GR64:$src), subreg_h64)>; + ++// Population count. Counts bits set per byte. ++let Predicates = [FeaturePopulationCount], Defs = [CC] in { ++ def POPCNT : InstRRE<0xB9E1, (outs GR64:$R1), (ins GR64:$R2), ++ "popcnt\t$R1, $R2", ++ [(set GR64:$R1, (z_popcnt GR64:$R2))]>; ++} ++ + // Use subregs to populate the "don't care" bits in a 32-bit to 64-bit anyext. + def : Pat<(i64 (anyext GR32:$src)), + (INSERT_SUBREG (i64 (IMPLICIT_DEF)), GR32:$src, subreg_l32)>; +Index: llvm-36/lib/Target/SystemZ/SystemZInstrVector.td +=================================================================== +--- /dev/null ++++ llvm-36/lib/Target/SystemZ/SystemZInstrVector.td +@@ -0,0 +1,1097 @@ ++//==- SystemZInstrVector.td - SystemZ Vector instructions ------*- tblgen-*-==// ++// ++// The LLVM Compiler Infrastructure ++// ++// This file is distributed under the University of Illinois Open Source ++// License. See LICENSE.TXT for details. ++// ++//===----------------------------------------------------------------------===// ++ ++//===----------------------------------------------------------------------===// ++// Move instructions ++//===----------------------------------------------------------------------===// ++ ++let Predicates = [FeatureVector] in { ++ // Register move. ++ def VLR : UnaryVRRa<"vlr", 0xE756, null_frag, v128any, v128any>; ++ def VLR32 : UnaryAliasVRR; ++ def VLR64 : UnaryAliasVRR; ++ ++ // Load GR from VR element. ++ def VLGVB : BinaryVRSc<"vlgvb", 0xE721, null_frag, v128b, 0>; ++ def VLGVH : BinaryVRSc<"vlgvh", 0xE721, null_frag, v128h, 1>; ++ def VLGVF : BinaryVRSc<"vlgvf", 0xE721, null_frag, v128f, 2>; ++ def VLGVG : BinaryVRSc<"vlgvg", 0xE721, z_vector_extract, v128g, 3>; ++ ++ // Load VR element from GR. ++ def VLVGB : TernaryVRSb<"vlvgb", 0xE722, z_vector_insert, ++ v128b, v128b, GR32, 0>; ++ def VLVGH : TernaryVRSb<"vlvgh", 0xE722, z_vector_insert, ++ v128h, v128h, GR32, 1>; ++ def VLVGF : TernaryVRSb<"vlvgf", 0xE722, z_vector_insert, ++ v128f, v128f, GR32, 2>; ++ def VLVGG : TernaryVRSb<"vlvgg", 0xE722, z_vector_insert, ++ v128g, v128g, GR64, 3>; ++ ++ // Load VR from GRs disjoint. ++ def VLVGP : BinaryVRRf<"vlvgp", 0xE762, z_join_dwords, v128g>; ++ def VLVGP32 : BinaryAliasVRRf; ++} ++ ++// Extractions always assign to the full GR64, even if the element would ++// fit in the lower 32 bits. Sub-i64 extracts therefore need to take a ++// subreg of the result. ++class VectorExtractSubreg ++ : Pat<(i32 (z_vector_extract (type VR128:$vec), shift12only:$index)), ++ (EXTRACT_SUBREG (insn VR128:$vec, shift12only:$index), subreg_l32)>; ++ ++def : VectorExtractSubreg; ++def : VectorExtractSubreg; ++def : VectorExtractSubreg; ++ ++//===----------------------------------------------------------------------===// ++// Immediate instructions ++//===----------------------------------------------------------------------===// ++ ++let Predicates = [FeatureVector] in { ++ // Generate byte mask. ++ def VZERO : InherentVRIa<"vzero", 0xE744, 0>; ++ def VONE : InherentVRIa<"vone", 0xE744, 0xffff>; ++ def VGBM : UnaryVRIa<"vgbm", 0xE744, z_byte_mask, v128b, imm32zx16>; ++ ++ // Generate mask. ++ def VGMB : BinaryVRIb<"vgmb", 0xE746, z_rotate_mask, v128b, 0>; ++ def VGMH : BinaryVRIb<"vgmh", 0xE746, z_rotate_mask, v128h, 1>; ++ def VGMF : BinaryVRIb<"vgmf", 0xE746, z_rotate_mask, v128f, 2>; ++ def VGMG : BinaryVRIb<"vgmg", 0xE746, z_rotate_mask, v128g, 3>; ++ ++ // Load element immediate. ++ // ++ // We want these instructions to be used ahead of VLVG* where possible. ++ // However, VLVG* takes a variable BD-format index whereas VLEI takes ++ // a plain immediate index. This means that VLVG* has an extra "base" ++ // register operand and is 3 units more complex. Bumping the complexity ++ // of the VLEI* instructions by 4 means that they are strictly better ++ // than VLVG* in cases where both forms match. ++ let AddedComplexity = 4 in { ++ def VLEIB : TernaryVRIa<"vleib", 0xE740, z_vector_insert, ++ v128b, v128b, imm32sx16trunc, imm32zx4>; ++ def VLEIH : TernaryVRIa<"vleih", 0xE741, z_vector_insert, ++ v128h, v128h, imm32sx16trunc, imm32zx3>; ++ def VLEIF : TernaryVRIa<"vleif", 0xE743, z_vector_insert, ++ v128f, v128f, imm32sx16, imm32zx2>; ++ def VLEIG : TernaryVRIa<"vleig", 0xE742, z_vector_insert, ++ v128g, v128g, imm64sx16, imm32zx1>; ++ } ++ ++ // Replicate immediate. ++ def VREPIB : UnaryVRIa<"vrepib", 0xE745, z_replicate, v128b, imm32sx16, 0>; ++ def VREPIH : UnaryVRIa<"vrepih", 0xE745, z_replicate, v128h, imm32sx16, 1>; ++ def VREPIF : UnaryVRIa<"vrepif", 0xE745, z_replicate, v128f, imm32sx16, 2>; ++ def VREPIG : UnaryVRIa<"vrepig", 0xE745, z_replicate, v128g, imm32sx16, 3>; ++} ++ ++//===----------------------------------------------------------------------===// ++// Loads ++//===----------------------------------------------------------------------===// ++ ++let Predicates = [FeatureVector] in { ++ // Load. ++ def VL : UnaryVRX<"vl", 0xE706, null_frag, v128any, 16>; ++ ++ // Load to block boundary. The number of loaded bytes is only known ++ // at run time. The instruction is really polymorphic, but v128b matches ++ // the return type of the associated intrinsic. ++ def VLBB : BinaryVRX<"vlbb", 0xE707, int_s390_vlbb, v128b, 0>; ++ ++ // Load count to block boundary. ++ let Defs = [CC] in ++ def LCBB : InstRXE<0xE727, (outs GR32:$R1), ++ (ins bdxaddr12only:$XBD2, imm32zx4:$M3), ++ "lcbb\t$R1, $XBD2, $M3", ++ [(set GR32:$R1, (int_s390_lcbb bdxaddr12only:$XBD2, ++ imm32zx4:$M3))]>; ++ ++ // Load with length. The number of loaded bytes is only known at run time. ++ def VLL : BinaryVRSb<"vll", 0xE737, int_s390_vll, 0>; ++ ++ // Load multiple. ++ def VLM : LoadMultipleVRSa<"vlm", 0xE736>; ++ ++ // Load and replicate ++ def VLREPB : UnaryVRX<"vlrepb", 0xE705, z_replicate_loadi8, v128b, 1, 0>; ++ def VLREPH : UnaryVRX<"vlreph", 0xE705, z_replicate_loadi16, v128h, 2, 1>; ++ def VLREPF : UnaryVRX<"vlrepf", 0xE705, z_replicate_loadi32, v128f, 4, 2>; ++ def VLREPG : UnaryVRX<"vlrepg", 0xE705, z_replicate_loadi64, v128g, 8, 3>; ++ def : Pat<(v4f32 (z_replicate_loadf32 bdxaddr12only:$addr)), ++ (VLREPF bdxaddr12only:$addr)>; ++ def : Pat<(v2f64 (z_replicate_loadf64 bdxaddr12only:$addr)), ++ (VLREPG bdxaddr12only:$addr)>; ++ ++ // Use VLREP to load subvectors. These patterns use "12pair" because ++ // LEY and LDY offer full 20-bit displacement fields. It's often better ++ // to use those instructions rather than force a 20-bit displacement ++ // into a GPR temporary. ++ def VL32 : UnaryAliasVRX; ++ def VL64 : UnaryAliasVRX; ++ ++ // Load logical element and zero. ++ def VLLEZB : UnaryVRX<"vllezb", 0xE704, z_vllezi8, v128b, 1, 0>; ++ def VLLEZH : UnaryVRX<"vllezh", 0xE704, z_vllezi16, v128h, 2, 1>; ++ def VLLEZF : UnaryVRX<"vllezf", 0xE704, z_vllezi32, v128f, 4, 2>; ++ def VLLEZG : UnaryVRX<"vllezg", 0xE704, z_vllezi64, v128g, 8, 3>; ++ def : Pat<(v4f32 (z_vllezf32 bdxaddr12only:$addr)), ++ (VLLEZF bdxaddr12only:$addr)>; ++ def : Pat<(v2f64 (z_vllezf64 bdxaddr12only:$addr)), ++ (VLLEZG bdxaddr12only:$addr)>; ++ ++ // Load element. ++ def VLEB : TernaryVRX<"vleb", 0xE700, z_vlei8, v128b, v128b, 1, imm32zx4>; ++ def VLEH : TernaryVRX<"vleh", 0xE701, z_vlei16, v128h, v128h, 2, imm32zx3>; ++ def VLEF : TernaryVRX<"vlef", 0xE703, z_vlei32, v128f, v128f, 4, imm32zx2>; ++ def VLEG : TernaryVRX<"vleg", 0xE702, z_vlei64, v128g, v128g, 8, imm32zx1>; ++ def : Pat<(z_vlef32 (v4f32 VR128:$val), bdxaddr12only:$addr, imm32zx2:$index), ++ (VLEF VR128:$val, bdxaddr12only:$addr, imm32zx2:$index)>; ++ def : Pat<(z_vlef64 (v2f64 VR128:$val), bdxaddr12only:$addr, imm32zx1:$index), ++ (VLEG VR128:$val, bdxaddr12only:$addr, imm32zx1:$index)>; ++ ++ // Gather element. ++ def VGEF : TernaryVRV<"vgef", 0xE713, 4, imm32zx2>; ++ def VGEG : TernaryVRV<"vgeg", 0xE712, 8, imm32zx1>; ++} ++ ++// Use replicating loads if we're inserting a single element into an ++// undefined vector. This avoids a false dependency on the previous ++// register contents. ++multiclass ReplicatePeephole { ++ def : Pat<(vectype (z_vector_insert ++ (undef), (scalartype (load bdxaddr12only:$addr)), 0)), ++ (vlrep bdxaddr12only:$addr)>; ++ def : Pat<(vectype (scalar_to_vector ++ (scalartype (load bdxaddr12only:$addr)))), ++ (vlrep bdxaddr12only:$addr)>; ++} ++defm : ReplicatePeephole; ++defm : ReplicatePeephole; ++defm : ReplicatePeephole; ++defm : ReplicatePeephole; ++defm : ReplicatePeephole; ++defm : ReplicatePeephole; ++ ++//===----------------------------------------------------------------------===// ++// Stores ++//===----------------------------------------------------------------------===// ++ ++let Predicates = [FeatureVector] in { ++ // Store. ++ def VST : StoreVRX<"vst", 0xE70E, null_frag, v128any, 16>; ++ ++ // Store with length. The number of stored bytes is only known at run time. ++ def VSTL : StoreLengthVRSb<"vstl", 0xE73F, int_s390_vstl, 0>; ++ ++ // Store multiple. ++ def VSTM : StoreMultipleVRSa<"vstm", 0xE73E>; ++ ++ // Store element. ++ def VSTEB : StoreBinaryVRX<"vsteb", 0xE708, z_vstei8, v128b, 1, imm32zx4>; ++ def VSTEH : StoreBinaryVRX<"vsteh", 0xE709, z_vstei16, v128h, 2, imm32zx3>; ++ def VSTEF : StoreBinaryVRX<"vstef", 0xE70B, z_vstei32, v128f, 4, imm32zx2>; ++ def VSTEG : StoreBinaryVRX<"vsteg", 0xE70A, z_vstei64, v128g, 8, imm32zx1>; ++ def : Pat<(z_vstef32 (v4f32 VR128:$val), bdxaddr12only:$addr, ++ imm32zx2:$index), ++ (VSTEF VR128:$val, bdxaddr12only:$addr, imm32zx2:$index)>; ++ def : Pat<(z_vstef64 (v2f64 VR128:$val), bdxaddr12only:$addr, ++ imm32zx1:$index), ++ (VSTEG VR128:$val, bdxaddr12only:$addr, imm32zx1:$index)>; ++ ++ // Use VSTE to store subvectors. These patterns use "12pair" because ++ // STEY and STDY offer full 20-bit displacement fields. It's often better ++ // to use those instructions rather than force a 20-bit displacement ++ // into a GPR temporary. ++ def VST32 : StoreAliasVRX; ++ def VST64 : StoreAliasVRX; ++ ++ // Scatter element. ++ def VSCEF : StoreBinaryVRV<"vscef", 0xE71B, 4, imm32zx2>; ++ def VSCEG : StoreBinaryVRV<"vsceg", 0xE71A, 8, imm32zx1>; ++} ++ ++//===----------------------------------------------------------------------===// ++// Selects and permutes ++//===----------------------------------------------------------------------===// ++ ++let Predicates = [FeatureVector] in { ++ // Merge high. ++ def VMRHB : BinaryVRRc<"vmrhb", 0xE761, z_merge_high, v128b, v128b, 0>; ++ def VMRHH : BinaryVRRc<"vmrhh", 0xE761, z_merge_high, v128h, v128h, 1>; ++ def VMRHF : BinaryVRRc<"vmrhf", 0xE761, z_merge_high, v128f, v128f, 2>; ++ def VMRHG : BinaryVRRc<"vmrhg", 0xE761, z_merge_high, v128g, v128g, 3>; ++ def : BinaryRRWithType; ++ def : BinaryRRWithType; ++ ++ // Merge low. ++ def VMRLB : BinaryVRRc<"vmrlb", 0xE760, z_merge_low, v128b, v128b, 0>; ++ def VMRLH : BinaryVRRc<"vmrlh", 0xE760, z_merge_low, v128h, v128h, 1>; ++ def VMRLF : BinaryVRRc<"vmrlf", 0xE760, z_merge_low, v128f, v128f, 2>; ++ def VMRLG : BinaryVRRc<"vmrlg", 0xE760, z_merge_low, v128g, v128g, 3>; ++ def : BinaryRRWithType; ++ def : BinaryRRWithType; ++ ++ // Permute. ++ def VPERM : TernaryVRRe<"vperm", 0xE78C, z_permute, v128b, v128b>; ++ ++ // Permute doubleword immediate. ++ def VPDI : TernaryVRRc<"vpdi", 0xE784, z_permute_dwords, v128g, v128g>; ++ ++ // Replicate. ++ def VREPB : BinaryVRIc<"vrepb", 0xE74D, z_splat, v128b, v128b, 0>; ++ def VREPH : BinaryVRIc<"vreph", 0xE74D, z_splat, v128h, v128h, 1>; ++ def VREPF : BinaryVRIc<"vrepf", 0xE74D, z_splat, v128f, v128f, 2>; ++ def VREPG : BinaryVRIc<"vrepg", 0xE74D, z_splat, v128g, v128g, 3>; ++ def : Pat<(v4f32 (z_splat VR128:$vec, imm32zx16:$index)), ++ (VREPF VR128:$vec, imm32zx16:$index)>; ++ def : Pat<(v2f64 (z_splat VR128:$vec, imm32zx16:$index)), ++ (VREPG VR128:$vec, imm32zx16:$index)>; ++ ++ // Select. ++ def VSEL : TernaryVRRe<"vsel", 0xE78D, null_frag, v128any, v128any>; ++} ++ ++//===----------------------------------------------------------------------===// ++// Widening and narrowing ++//===----------------------------------------------------------------------===// ++ ++let Predicates = [FeatureVector] in { ++ // Pack ++ def VPKH : BinaryVRRc<"vpkh", 0xE794, z_pack, v128b, v128h, 1>; ++ def VPKF : BinaryVRRc<"vpkf", 0xE794, z_pack, v128h, v128f, 2>; ++ def VPKG : BinaryVRRc<"vpkg", 0xE794, z_pack, v128f, v128g, 3>; ++ ++ // Pack saturate. ++ defm VPKSH : BinaryVRRbSPair<"vpksh", 0xE797, int_s390_vpksh, z_packs_cc, ++ v128b, v128h, 1>; ++ defm VPKSF : BinaryVRRbSPair<"vpksf", 0xE797, int_s390_vpksf, z_packs_cc, ++ v128h, v128f, 2>; ++ defm VPKSG : BinaryVRRbSPair<"vpksg", 0xE797, int_s390_vpksg, z_packs_cc, ++ v128f, v128g, 3>; ++ ++ // Pack saturate logical. ++ defm VPKLSH : BinaryVRRbSPair<"vpklsh", 0xE795, int_s390_vpklsh, z_packls_cc, ++ v128b, v128h, 1>; ++ defm VPKLSF : BinaryVRRbSPair<"vpklsf", 0xE795, int_s390_vpklsf, z_packls_cc, ++ v128h, v128f, 2>; ++ defm VPKLSG : BinaryVRRbSPair<"vpklsg", 0xE795, int_s390_vpklsg, z_packls_cc, ++ v128f, v128g, 3>; ++ ++ // Sign-extend to doubleword. ++ def VSEGB : UnaryVRRa<"vsegb", 0xE75F, z_vsei8, v128g, v128g, 0>; ++ def VSEGH : UnaryVRRa<"vsegh", 0xE75F, z_vsei16, v128g, v128g, 1>; ++ def VSEGF : UnaryVRRa<"vsegf", 0xE75F, z_vsei32, v128g, v128g, 2>; ++ def : Pat<(z_vsei8_by_parts (v16i8 VR128:$src)), (VSEGB VR128:$src)>; ++ def : Pat<(z_vsei16_by_parts (v8i16 VR128:$src)), (VSEGH VR128:$src)>; ++ def : Pat<(z_vsei32_by_parts (v4i32 VR128:$src)), (VSEGF VR128:$src)>; ++ ++ // Unpack high. ++ def VUPHB : UnaryVRRa<"vuphb", 0xE7D7, z_unpack_high, v128h, v128b, 0>; ++ def VUPHH : UnaryVRRa<"vuphh", 0xE7D7, z_unpack_high, v128f, v128h, 1>; ++ def VUPHF : UnaryVRRa<"vuphf", 0xE7D7, z_unpack_high, v128g, v128f, 2>; ++ ++ // Unpack logical high. ++ def VUPLHB : UnaryVRRa<"vuplhb", 0xE7D5, z_unpackl_high, v128h, v128b, 0>; ++ def VUPLHH : UnaryVRRa<"vuplhh", 0xE7D5, z_unpackl_high, v128f, v128h, 1>; ++ def VUPLHF : UnaryVRRa<"vuplhf", 0xE7D5, z_unpackl_high, v128g, v128f, 2>; ++ ++ // Unpack low. ++ def VUPLB : UnaryVRRa<"vuplb", 0xE7D6, z_unpack_low, v128h, v128b, 0>; ++ def VUPLHW : UnaryVRRa<"vuplhw", 0xE7D6, z_unpack_low, v128f, v128h, 1>; ++ def VUPLF : UnaryVRRa<"vuplf", 0xE7D6, z_unpack_low, v128g, v128f, 2>; ++ ++ // Unpack logical low. ++ def VUPLLB : UnaryVRRa<"vupllb", 0xE7D4, z_unpackl_low, v128h, v128b, 0>; ++ def VUPLLH : UnaryVRRa<"vupllh", 0xE7D4, z_unpackl_low, v128f, v128h, 1>; ++ def VUPLLF : UnaryVRRa<"vupllf", 0xE7D4, z_unpackl_low, v128g, v128f, 2>; ++} ++ ++//===----------------------------------------------------------------------===// ++// Instantiating generic operations for specific types. ++//===----------------------------------------------------------------------===// ++ ++multiclass GenericVectorOps { ++ let Predicates = [FeatureVector] in { ++ def : Pat<(type (load bdxaddr12only:$addr)), ++ (VL bdxaddr12only:$addr)>; ++ def : Pat<(store (type VR128:$src), bdxaddr12only:$addr), ++ (VST VR128:$src, bdxaddr12only:$addr)>; ++ def : Pat<(type (vselect (inttype VR128:$x), VR128:$y, VR128:$z)), ++ (VSEL VR128:$y, VR128:$z, VR128:$x)>; ++ def : Pat<(type (vselect (inttype (z_vnot VR128:$x)), VR128:$y, VR128:$z)), ++ (VSEL VR128:$z, VR128:$y, VR128:$x)>; ++ } ++} ++ ++defm : GenericVectorOps; ++defm : GenericVectorOps; ++defm : GenericVectorOps; ++defm : GenericVectorOps; ++defm : GenericVectorOps; ++defm : GenericVectorOps; ++ ++//===----------------------------------------------------------------------===// ++// Integer arithmetic ++//===----------------------------------------------------------------------===// ++ ++let Predicates = [FeatureVector] in { ++ // Add. ++ def VAB : BinaryVRRc<"vab", 0xE7F3, add, v128b, v128b, 0>; ++ def VAH : BinaryVRRc<"vah", 0xE7F3, add, v128h, v128h, 1>; ++ def VAF : BinaryVRRc<"vaf", 0xE7F3, add, v128f, v128f, 2>; ++ def VAG : BinaryVRRc<"vag", 0xE7F3, add, v128g, v128g, 3>; ++ def VAQ : BinaryVRRc<"vaq", 0xE7F3, int_s390_vaq, v128q, v128q, 4>; ++ ++ // Add compute carry. ++ def VACCB : BinaryVRRc<"vaccb", 0xE7F1, int_s390_vaccb, v128b, v128b, 0>; ++ def VACCH : BinaryVRRc<"vacch", 0xE7F1, int_s390_vacch, v128h, v128h, 1>; ++ def VACCF : BinaryVRRc<"vaccf", 0xE7F1, int_s390_vaccf, v128f, v128f, 2>; ++ def VACCG : BinaryVRRc<"vaccg", 0xE7F1, int_s390_vaccg, v128g, v128g, 3>; ++ def VACCQ : BinaryVRRc<"vaccq", 0xE7F1, int_s390_vaccq, v128q, v128q, 4>; ++ ++ // Add with carry. ++ def VACQ : TernaryVRRd<"vacq", 0xE7BB, int_s390_vacq, v128q, v128q, 4>; ++ ++ // Add with carry compute carry. ++ def VACCCQ : TernaryVRRd<"vacccq", 0xE7B9, int_s390_vacccq, v128q, v128q, 4>; ++ ++ // And. ++ def VN : BinaryVRRc<"vn", 0xE768, null_frag, v128any, v128any>; ++ ++ // And with complement. ++ def VNC : BinaryVRRc<"vnc", 0xE769, null_frag, v128any, v128any>; ++ ++ // Average. ++ def VAVGB : BinaryVRRc<"vavgb", 0xE7F2, int_s390_vavgb, v128b, v128b, 0>; ++ def VAVGH : BinaryVRRc<"vavgh", 0xE7F2, int_s390_vavgh, v128h, v128h, 1>; ++ def VAVGF : BinaryVRRc<"vavgf", 0xE7F2, int_s390_vavgf, v128f, v128f, 2>; ++ def VAVGG : BinaryVRRc<"vavgg", 0xE7F2, int_s390_vavgg, v128g, v128g, 3>; ++ ++ // Average logical. ++ def VAVGLB : BinaryVRRc<"vavglb", 0xE7F0, int_s390_vavglb, v128b, v128b, 0>; ++ def VAVGLH : BinaryVRRc<"vavglh", 0xE7F0, int_s390_vavglh, v128h, v128h, 1>; ++ def VAVGLF : BinaryVRRc<"vavglf", 0xE7F0, int_s390_vavglf, v128f, v128f, 2>; ++ def VAVGLG : BinaryVRRc<"vavglg", 0xE7F0, int_s390_vavglg, v128g, v128g, 3>; ++ ++ // Checksum. ++ def VCKSM : BinaryVRRc<"vcksm", 0xE766, int_s390_vcksm, v128f, v128f>; ++ ++ // Count leading zeros. ++ def VCLZB : UnaryVRRa<"vclzb", 0xE753, ctlz, v128b, v128b, 0>; ++ def VCLZH : UnaryVRRa<"vclzh", 0xE753, ctlz, v128h, v128h, 1>; ++ def VCLZF : UnaryVRRa<"vclzf", 0xE753, ctlz, v128f, v128f, 2>; ++ def VCLZG : UnaryVRRa<"vclzg", 0xE753, ctlz, v128g, v128g, 3>; ++ ++ // Count trailing zeros. ++ def VCTZB : UnaryVRRa<"vctzb", 0xE752, cttz, v128b, v128b, 0>; ++ def VCTZH : UnaryVRRa<"vctzh", 0xE752, cttz, v128h, v128h, 1>; ++ def VCTZF : UnaryVRRa<"vctzf", 0xE752, cttz, v128f, v128f, 2>; ++ def VCTZG : UnaryVRRa<"vctzg", 0xE752, cttz, v128g, v128g, 3>; ++ ++ // Exclusive or. ++ def VX : BinaryVRRc<"vx", 0xE76D, null_frag, v128any, v128any>; ++ ++ // Galois field multiply sum. ++ def VGFMB : BinaryVRRc<"vgfmb", 0xE7B4, int_s390_vgfmb, v128h, v128b, 0>; ++ def VGFMH : BinaryVRRc<"vgfmh", 0xE7B4, int_s390_vgfmh, v128f, v128h, 1>; ++ def VGFMF : BinaryVRRc<"vgfmf", 0xE7B4, int_s390_vgfmf, v128g, v128f, 2>; ++ def VGFMG : BinaryVRRc<"vgfmg", 0xE7B4, int_s390_vgfmg, v128q, v128g, 3>; ++ ++ // Galois field multiply sum and accumulate. ++ def VGFMAB : TernaryVRRd<"vgfmab", 0xE7BC, int_s390_vgfmab, v128h, v128b, 0>; ++ def VGFMAH : TernaryVRRd<"vgfmah", 0xE7BC, int_s390_vgfmah, v128f, v128h, 1>; ++ def VGFMAF : TernaryVRRd<"vgfmaf", 0xE7BC, int_s390_vgfmaf, v128g, v128f, 2>; ++ def VGFMAG : TernaryVRRd<"vgfmag", 0xE7BC, int_s390_vgfmag, v128q, v128g, 3>; ++ ++ // Load complement. ++ def VLCB : UnaryVRRa<"vlcb", 0xE7DE, z_vneg, v128b, v128b, 0>; ++ def VLCH : UnaryVRRa<"vlch", 0xE7DE, z_vneg, v128h, v128h, 1>; ++ def VLCF : UnaryVRRa<"vlcf", 0xE7DE, z_vneg, v128f, v128f, 2>; ++ def VLCG : UnaryVRRa<"vlcg", 0xE7DE, z_vneg, v128g, v128g, 3>; ++ ++ // Load positive. ++ def VLPB : UnaryVRRa<"vlpb", 0xE7DF, z_viabs8, v128b, v128b, 0>; ++ def VLPH : UnaryVRRa<"vlph", 0xE7DF, z_viabs16, v128h, v128h, 1>; ++ def VLPF : UnaryVRRa<"vlpf", 0xE7DF, z_viabs32, v128f, v128f, 2>; ++ def VLPG : UnaryVRRa<"vlpg", 0xE7DF, z_viabs64, v128g, v128g, 3>; ++ ++ // Maximum. ++ def VMXB : BinaryVRRc<"vmxb", 0xE7FF, null_frag, v128b, v128b, 0>; ++ def VMXH : BinaryVRRc<"vmxh", 0xE7FF, null_frag, v128h, v128h, 1>; ++ def VMXF : BinaryVRRc<"vmxf", 0xE7FF, null_frag, v128f, v128f, 2>; ++ def VMXG : BinaryVRRc<"vmxg", 0xE7FF, null_frag, v128g, v128g, 3>; ++ ++ // Maximum logical. ++ def VMXLB : BinaryVRRc<"vmxlb", 0xE7FD, null_frag, v128b, v128b, 0>; ++ def VMXLH : BinaryVRRc<"vmxlh", 0xE7FD, null_frag, v128h, v128h, 1>; ++ def VMXLF : BinaryVRRc<"vmxlf", 0xE7FD, null_frag, v128f, v128f, 2>; ++ def VMXLG : BinaryVRRc<"vmxlg", 0xE7FD, null_frag, v128g, v128g, 3>; ++ ++ // Minimum. ++ def VMNB : BinaryVRRc<"vmnb", 0xE7FE, null_frag, v128b, v128b, 0>; ++ def VMNH : BinaryVRRc<"vmnh", 0xE7FE, null_frag, v128h, v128h, 1>; ++ def VMNF : BinaryVRRc<"vmnf", 0xE7FE, null_frag, v128f, v128f, 2>; ++ def VMNG : BinaryVRRc<"vmng", 0xE7FE, null_frag, v128g, v128g, 3>; ++ ++ // Minimum logical. ++ def VMNLB : BinaryVRRc<"vmnlb", 0xE7FC, null_frag, v128b, v128b, 0>; ++ def VMNLH : BinaryVRRc<"vmnlh", 0xE7FC, null_frag, v128h, v128h, 1>; ++ def VMNLF : BinaryVRRc<"vmnlf", 0xE7FC, null_frag, v128f, v128f, 2>; ++ def VMNLG : BinaryVRRc<"vmnlg", 0xE7FC, null_frag, v128g, v128g, 3>; ++ ++ // Multiply and add low. ++ def VMALB : TernaryVRRd<"vmalb", 0xE7AA, z_muladd, v128b, v128b, 0>; ++ def VMALHW : TernaryVRRd<"vmalhw", 0xE7AA, z_muladd, v128h, v128h, 1>; ++ def VMALF : TernaryVRRd<"vmalf", 0xE7AA, z_muladd, v128f, v128f, 2>; ++ ++ // Multiply and add high. ++ def VMAHB : TernaryVRRd<"vmahb", 0xE7AB, int_s390_vmahb, v128b, v128b, 0>; ++ def VMAHH : TernaryVRRd<"vmahh", 0xE7AB, int_s390_vmahh, v128h, v128h, 1>; ++ def VMAHF : TernaryVRRd<"vmahf", 0xE7AB, int_s390_vmahf, v128f, v128f, 2>; ++ ++ // Multiply and add logical high. ++ def VMALHB : TernaryVRRd<"vmalhb", 0xE7A9, int_s390_vmalhb, v128b, v128b, 0>; ++ def VMALHH : TernaryVRRd<"vmalhh", 0xE7A9, int_s390_vmalhh, v128h, v128h, 1>; ++ def VMALHF : TernaryVRRd<"vmalhf", 0xE7A9, int_s390_vmalhf, v128f, v128f, 2>; ++ ++ // Multiply and add even. ++ def VMAEB : TernaryVRRd<"vmaeb", 0xE7AE, int_s390_vmaeb, v128h, v128b, 0>; ++ def VMAEH : TernaryVRRd<"vmaeh", 0xE7AE, int_s390_vmaeh, v128f, v128h, 1>; ++ def VMAEF : TernaryVRRd<"vmaef", 0xE7AE, int_s390_vmaef, v128g, v128f, 2>; ++ ++ // Multiply and add logical even. ++ def VMALEB : TernaryVRRd<"vmaleb", 0xE7AC, int_s390_vmaleb, v128h, v128b, 0>; ++ def VMALEH : TernaryVRRd<"vmaleh", 0xE7AC, int_s390_vmaleh, v128f, v128h, 1>; ++ def VMALEF : TernaryVRRd<"vmalef", 0xE7AC, int_s390_vmalef, v128g, v128f, 2>; ++ ++ // Multiply and add odd. ++ def VMAOB : TernaryVRRd<"vmaob", 0xE7AF, int_s390_vmaob, v128h, v128b, 0>; ++ def VMAOH : TernaryVRRd<"vmaoh", 0xE7AF, int_s390_vmaoh, v128f, v128h, 1>; ++ def VMAOF : TernaryVRRd<"vmaof", 0xE7AF, int_s390_vmaof, v128g, v128f, 2>; ++ ++ // Multiply and add logical odd. ++ def VMALOB : TernaryVRRd<"vmalob", 0xE7AD, int_s390_vmalob, v128h, v128b, 0>; ++ def VMALOH : TernaryVRRd<"vmaloh", 0xE7AD, int_s390_vmaloh, v128f, v128h, 1>; ++ def VMALOF : TernaryVRRd<"vmalof", 0xE7AD, int_s390_vmalof, v128g, v128f, 2>; ++ ++ // Multiply high. ++ def VMHB : BinaryVRRc<"vmhb", 0xE7A3, int_s390_vmhb, v128b, v128b, 0>; ++ def VMHH : BinaryVRRc<"vmhh", 0xE7A3, int_s390_vmhh, v128h, v128h, 1>; ++ def VMHF : BinaryVRRc<"vmhf", 0xE7A3, int_s390_vmhf, v128f, v128f, 2>; ++ ++ // Multiply logical high. ++ def VMLHB : BinaryVRRc<"vmlhb", 0xE7A1, int_s390_vmlhb, v128b, v128b, 0>; ++ def VMLHH : BinaryVRRc<"vmlhh", 0xE7A1, int_s390_vmlhh, v128h, v128h, 1>; ++ def VMLHF : BinaryVRRc<"vmlhf", 0xE7A1, int_s390_vmlhf, v128f, v128f, 2>; ++ ++ // Multiply low. ++ def VMLB : BinaryVRRc<"vmlb", 0xE7A2, mul, v128b, v128b, 0>; ++ def VMLHW : BinaryVRRc<"vmlhw", 0xE7A2, mul, v128h, v128h, 1>; ++ def VMLF : BinaryVRRc<"vmlf", 0xE7A2, mul, v128f, v128f, 2>; ++ ++ // Multiply even. ++ def VMEB : BinaryVRRc<"vmeb", 0xE7A6, int_s390_vmeb, v128h, v128b, 0>; ++ def VMEH : BinaryVRRc<"vmeh", 0xE7A6, int_s390_vmeh, v128f, v128h, 1>; ++ def VMEF : BinaryVRRc<"vmef", 0xE7A6, int_s390_vmef, v128g, v128f, 2>; ++ ++ // Multiply logical even. ++ def VMLEB : BinaryVRRc<"vmleb", 0xE7A4, int_s390_vmleb, v128h, v128b, 0>; ++ def VMLEH : BinaryVRRc<"vmleh", 0xE7A4, int_s390_vmleh, v128f, v128h, 1>; ++ def VMLEF : BinaryVRRc<"vmlef", 0xE7A4, int_s390_vmlef, v128g, v128f, 2>; ++ ++ // Multiply odd. ++ def VMOB : BinaryVRRc<"vmob", 0xE7A7, int_s390_vmob, v128h, v128b, 0>; ++ def VMOH : BinaryVRRc<"vmoh", 0xE7A7, int_s390_vmoh, v128f, v128h, 1>; ++ def VMOF : BinaryVRRc<"vmof", 0xE7A7, int_s390_vmof, v128g, v128f, 2>; ++ ++ // Multiply logical odd. ++ def VMLOB : BinaryVRRc<"vmlob", 0xE7A5, int_s390_vmlob, v128h, v128b, 0>; ++ def VMLOH : BinaryVRRc<"vmloh", 0xE7A5, int_s390_vmloh, v128f, v128h, 1>; ++ def VMLOF : BinaryVRRc<"vmlof", 0xE7A5, int_s390_vmlof, v128g, v128f, 2>; ++ ++ // Nor. ++ def VNO : BinaryVRRc<"vno", 0xE76B, null_frag, v128any, v128any>; ++ ++ // Or. ++ def VO : BinaryVRRc<"vo", 0xE76A, null_frag, v128any, v128any>; ++ ++ // Population count. ++ def VPOPCT : BinaryVRRa<"vpopct", 0xE750>; ++ def : Pat<(v16i8 (z_popcnt VR128:$x)), (VPOPCT VR128:$x, 0)>; ++ ++ // Element rotate left logical (with vector shift amount). ++ def VERLLVB : BinaryVRRc<"verllvb", 0xE773, int_s390_verllvb, ++ v128b, v128b, 0>; ++ def VERLLVH : BinaryVRRc<"verllvh", 0xE773, int_s390_verllvh, ++ v128h, v128h, 1>; ++ def VERLLVF : BinaryVRRc<"verllvf", 0xE773, int_s390_verllvf, ++ v128f, v128f, 2>; ++ def VERLLVG : BinaryVRRc<"verllvg", 0xE773, int_s390_verllvg, ++ v128g, v128g, 3>; ++ ++ // Element rotate left logical (with scalar shift amount). ++ def VERLLB : BinaryVRSa<"verllb", 0xE733, int_s390_verllb, v128b, v128b, 0>; ++ def VERLLH : BinaryVRSa<"verllh", 0xE733, int_s390_verllh, v128h, v128h, 1>; ++ def VERLLF : BinaryVRSa<"verllf", 0xE733, int_s390_verllf, v128f, v128f, 2>; ++ def VERLLG : BinaryVRSa<"verllg", 0xE733, int_s390_verllg, v128g, v128g, 3>; ++ ++ // Element rotate and insert under mask. ++ def VERIMB : QuaternaryVRId<"verimb", 0xE772, int_s390_verimb, v128b, v128b, 0>; ++ def VERIMH : QuaternaryVRId<"verimh", 0xE772, int_s390_verimh, v128h, v128h, 1>; ++ def VERIMF : QuaternaryVRId<"verimf", 0xE772, int_s390_verimf, v128f, v128f, 2>; ++ def VERIMG : QuaternaryVRId<"verimg", 0xE772, int_s390_verimg, v128g, v128g, 3>; ++ ++ // Element shift left (with vector shift amount). ++ def VESLVB : BinaryVRRc<"veslvb", 0xE770, z_vshl, v128b, v128b, 0>; ++ def VESLVH : BinaryVRRc<"veslvh", 0xE770, z_vshl, v128h, v128h, 1>; ++ def VESLVF : BinaryVRRc<"veslvf", 0xE770, z_vshl, v128f, v128f, 2>; ++ def VESLVG : BinaryVRRc<"veslvg", 0xE770, z_vshl, v128g, v128g, 3>; ++ ++ // Element shift left (with scalar shift amount). ++ def VESLB : BinaryVRSa<"veslb", 0xE730, z_vshl_by_scalar, v128b, v128b, 0>; ++ def VESLH : BinaryVRSa<"veslh", 0xE730, z_vshl_by_scalar, v128h, v128h, 1>; ++ def VESLF : BinaryVRSa<"veslf", 0xE730, z_vshl_by_scalar, v128f, v128f, 2>; ++ def VESLG : BinaryVRSa<"veslg", 0xE730, z_vshl_by_scalar, v128g, v128g, 3>; ++ ++ // Element shift right arithmetic (with vector shift amount). ++ def VESRAVB : BinaryVRRc<"vesravb", 0xE77A, z_vsra, v128b, v128b, 0>; ++ def VESRAVH : BinaryVRRc<"vesravh", 0xE77A, z_vsra, v128h, v128h, 1>; ++ def VESRAVF : BinaryVRRc<"vesravf", 0xE77A, z_vsra, v128f, v128f, 2>; ++ def VESRAVG : BinaryVRRc<"vesravg", 0xE77A, z_vsra, v128g, v128g, 3>; ++ ++ // Element shift right arithmetic (with scalar shift amount). ++ def VESRAB : BinaryVRSa<"vesrab", 0xE73A, z_vsra_by_scalar, v128b, v128b, 0>; ++ def VESRAH : BinaryVRSa<"vesrah", 0xE73A, z_vsra_by_scalar, v128h, v128h, 1>; ++ def VESRAF : BinaryVRSa<"vesraf", 0xE73A, z_vsra_by_scalar, v128f, v128f, 2>; ++ def VESRAG : BinaryVRSa<"vesrag", 0xE73A, z_vsra_by_scalar, v128g, v128g, 3>; ++ ++ // Element shift right logical (with vector shift amount). ++ def VESRLVB : BinaryVRRc<"vesrlvb", 0xE778, z_vsrl, v128b, v128b, 0>; ++ def VESRLVH : BinaryVRRc<"vesrlvh", 0xE778, z_vsrl, v128h, v128h, 1>; ++ def VESRLVF : BinaryVRRc<"vesrlvf", 0xE778, z_vsrl, v128f, v128f, 2>; ++ def VESRLVG : BinaryVRRc<"vesrlvg", 0xE778, z_vsrl, v128g, v128g, 3>; ++ ++ // Element shift right logical (with scalar shift amount). ++ def VESRLB : BinaryVRSa<"vesrlb", 0xE738, z_vsrl_by_scalar, v128b, v128b, 0>; ++ def VESRLH : BinaryVRSa<"vesrlh", 0xE738, z_vsrl_by_scalar, v128h, v128h, 1>; ++ def VESRLF : BinaryVRSa<"vesrlf", 0xE738, z_vsrl_by_scalar, v128f, v128f, 2>; ++ def VESRLG : BinaryVRSa<"vesrlg", 0xE738, z_vsrl_by_scalar, v128g, v128g, 3>; ++ ++ // Shift left. ++ def VSL : BinaryVRRc<"vsl", 0xE774, int_s390_vsl, v128b, v128b>; ++ ++ // Shift left by byte. ++ def VSLB : BinaryVRRc<"vslb", 0xE775, int_s390_vslb, v128b, v128b>; ++ ++ // Shift left double by byte. ++ def VSLDB : TernaryVRId<"vsldb", 0xE777, z_shl_double, v128b, v128b, 0>; ++ def : Pat<(int_s390_vsldb VR128:$x, VR128:$y, imm32zx8:$z), ++ (VSLDB VR128:$x, VR128:$y, imm32zx8:$z)>; ++ ++ // Shift right arithmetic. ++ def VSRA : BinaryVRRc<"vsra", 0xE77E, int_s390_vsra, v128b, v128b>; ++ ++ // Shift right arithmetic by byte. ++ def VSRAB : BinaryVRRc<"vsrab", 0xE77F, int_s390_vsrab, v128b, v128b>; ++ ++ // Shift right logical. ++ def VSRL : BinaryVRRc<"vsrl", 0xE77C, int_s390_vsrl, v128b, v128b>; ++ ++ // Shift right logical by byte. ++ def VSRLB : BinaryVRRc<"vsrlb", 0xE77D, int_s390_vsrlb, v128b, v128b>; ++ ++ // Subtract. ++ def VSB : BinaryVRRc<"vsb", 0xE7F7, sub, v128b, v128b, 0>; ++ def VSH : BinaryVRRc<"vsh", 0xE7F7, sub, v128h, v128h, 1>; ++ def VSF : BinaryVRRc<"vsf", 0xE7F7, sub, v128f, v128f, 2>; ++ def VSG : BinaryVRRc<"vsg", 0xE7F7, sub, v128g, v128g, 3>; ++ def VSQ : BinaryVRRc<"vsq", 0xE7F7, int_s390_vsq, v128q, v128q, 4>; ++ ++ // Subtract compute borrow indication. ++ def VSCBIB : BinaryVRRc<"vscbib", 0xE7F5, int_s390_vscbib, v128b, v128b, 0>; ++ def VSCBIH : BinaryVRRc<"vscbih", 0xE7F5, int_s390_vscbih, v128h, v128h, 1>; ++ def VSCBIF : BinaryVRRc<"vscbif", 0xE7F5, int_s390_vscbif, v128f, v128f, 2>; ++ def VSCBIG : BinaryVRRc<"vscbig", 0xE7F5, int_s390_vscbig, v128g, v128g, 3>; ++ def VSCBIQ : BinaryVRRc<"vscbiq", 0xE7F5, int_s390_vscbiq, v128q, v128q, 4>; ++ ++ // Subtract with borrow indication. ++ def VSBIQ : TernaryVRRd<"vsbiq", 0xE7BF, int_s390_vsbiq, v128q, v128q, 4>; ++ ++ // Subtract with borrow compute borrow indication. ++ def VSBCBIQ : TernaryVRRd<"vsbcbiq", 0xE7BD, int_s390_vsbcbiq, ++ v128q, v128q, 4>; ++ ++ // Sum across doubleword. ++ def VSUMGH : BinaryVRRc<"vsumgh", 0xE765, z_vsum, v128g, v128h, 1>; ++ def VSUMGF : BinaryVRRc<"vsumgf", 0xE765, z_vsum, v128g, v128f, 2>; ++ ++ // Sum across quadword. ++ def VSUMQF : BinaryVRRc<"vsumqf", 0xE767, z_vsum, v128q, v128f, 2>; ++ def VSUMQG : BinaryVRRc<"vsumqg", 0xE767, z_vsum, v128q, v128g, 3>; ++ ++ // Sum across word. ++ def VSUMB : BinaryVRRc<"vsumb", 0xE764, z_vsum, v128f, v128b, 0>; ++ def VSUMH : BinaryVRRc<"vsumh", 0xE764, z_vsum, v128f, v128h, 1>; ++} ++ ++// Instantiate the bitwise ops for type TYPE. ++multiclass BitwiseVectorOps { ++ let Predicates = [FeatureVector] in { ++ def : Pat<(type (and VR128:$x, VR128:$y)), (VN VR128:$x, VR128:$y)>; ++ def : Pat<(type (and VR128:$x, (z_vnot VR128:$y))), ++ (VNC VR128:$x, VR128:$y)>; ++ def : Pat<(type (or VR128:$x, VR128:$y)), (VO VR128:$x, VR128:$y)>; ++ def : Pat<(type (xor VR128:$x, VR128:$y)), (VX VR128:$x, VR128:$y)>; ++ def : Pat<(type (or (and VR128:$x, VR128:$z), ++ (and VR128:$y, (z_vnot VR128:$z)))), ++ (VSEL VR128:$x, VR128:$y, VR128:$z)>; ++ def : Pat<(type (z_vnot (or VR128:$x, VR128:$y))), ++ (VNO VR128:$x, VR128:$y)>; ++ def : Pat<(type (z_vnot VR128:$x)), (VNO VR128:$x, VR128:$x)>; ++ } ++} ++ ++defm : BitwiseVectorOps; ++defm : BitwiseVectorOps; ++defm : BitwiseVectorOps; ++defm : BitwiseVectorOps; ++ ++// Instantiate additional patterns for absolute-related expressions on ++// type TYPE. LC is the negate instruction for TYPE and LP is the absolute ++// instruction. ++multiclass IntegerAbsoluteVectorOps { ++ let Predicates = [FeatureVector] in { ++ def : Pat<(type (vselect (type (z_vicmph_zero VR128:$x)), ++ (z_vneg VR128:$x), VR128:$x)), ++ (lc (lp VR128:$x))>; ++ def : Pat<(type (vselect (type (z_vnot (z_vicmph_zero VR128:$x))), ++ VR128:$x, (z_vneg VR128:$x))), ++ (lc (lp VR128:$x))>; ++ def : Pat<(type (vselect (type (z_vicmpl_zero VR128:$x)), ++ VR128:$x, (z_vneg VR128:$x))), ++ (lc (lp VR128:$x))>; ++ def : Pat<(type (vselect (type (z_vnot (z_vicmpl_zero VR128:$x))), ++ (z_vneg VR128:$x), VR128:$x)), ++ (lc (lp VR128:$x))>; ++ def : Pat<(type (or (and (z_vsra_by_scalar VR128:$x, (i32 shift)), ++ (z_vneg VR128:$x)), ++ (and (z_vnot (z_vsra_by_scalar VR128:$x, (i32 shift))), ++ VR128:$x))), ++ (lp VR128:$x)>; ++ def : Pat<(type (or (and (z_vsra_by_scalar VR128:$x, (i32 shift)), ++ VR128:$x), ++ (and (z_vnot (z_vsra_by_scalar VR128:$x, (i32 shift))), ++ (z_vneg VR128:$x)))), ++ (lc (lp VR128:$x))>; ++ } ++} ++ ++defm : IntegerAbsoluteVectorOps; ++defm : IntegerAbsoluteVectorOps; ++defm : IntegerAbsoluteVectorOps; ++defm : IntegerAbsoluteVectorOps; ++ ++// Instantiate minimum- and maximum-related patterns for TYPE. CMPH is the ++// signed or unsigned "set if greater than" comparison instruction and ++// MIN and MAX are the associated minimum and maximum instructions. ++multiclass IntegerMinMaxVectorOps { ++ let Predicates = [FeatureVector] in { ++ def : Pat<(type (vselect (cmph VR128:$x, VR128:$y), VR128:$x, VR128:$y)), ++ (max VR128:$x, VR128:$y)>; ++ def : Pat<(type (vselect (cmph VR128:$x, VR128:$y), VR128:$y, VR128:$x)), ++ (min VR128:$x, VR128:$y)>; ++ def : Pat<(type (vselect (z_vnot (cmph VR128:$x, VR128:$y)), ++ VR128:$x, VR128:$y)), ++ (min VR128:$x, VR128:$y)>; ++ def : Pat<(type (vselect (z_vnot (cmph VR128:$x, VR128:$y)), ++ VR128:$y, VR128:$x)), ++ (max VR128:$x, VR128:$y)>; ++ } ++} ++ ++// Signed min/max. ++defm : IntegerMinMaxVectorOps; ++defm : IntegerMinMaxVectorOps; ++defm : IntegerMinMaxVectorOps; ++defm : IntegerMinMaxVectorOps; ++ ++// Unsigned min/max. ++defm : IntegerMinMaxVectorOps; ++defm : IntegerMinMaxVectorOps; ++defm : IntegerMinMaxVectorOps; ++defm : IntegerMinMaxVectorOps; ++ ++//===----------------------------------------------------------------------===// ++// Integer comparison ++//===----------------------------------------------------------------------===// ++ ++let Predicates = [FeatureVector] in { ++ // Element compare. ++ let Defs = [CC] in { ++ def VECB : CompareVRRa<"vecb", 0xE7DB, null_frag, v128b, 0>; ++ def VECH : CompareVRRa<"vech", 0xE7DB, null_frag, v128h, 1>; ++ def VECF : CompareVRRa<"vecf", 0xE7DB, null_frag, v128f, 2>; ++ def VECG : CompareVRRa<"vecg", 0xE7DB, null_frag, v128g, 3>; ++ } ++ ++ // Element compare logical. ++ let Defs = [CC] in { ++ def VECLB : CompareVRRa<"veclb", 0xE7D9, null_frag, v128b, 0>; ++ def VECLH : CompareVRRa<"veclh", 0xE7D9, null_frag, v128h, 1>; ++ def VECLF : CompareVRRa<"veclf", 0xE7D9, null_frag, v128f, 2>; ++ def VECLG : CompareVRRa<"veclg", 0xE7D9, null_frag, v128g, 3>; ++ } ++ ++ // Compare equal. ++ defm VCEQB : BinaryVRRbSPair<"vceqb", 0xE7F8, z_vicmpe, z_vicmpes, ++ v128b, v128b, 0>; ++ defm VCEQH : BinaryVRRbSPair<"vceqh", 0xE7F8, z_vicmpe, z_vicmpes, ++ v128h, v128h, 1>; ++ defm VCEQF : BinaryVRRbSPair<"vceqf", 0xE7F8, z_vicmpe, z_vicmpes, ++ v128f, v128f, 2>; ++ defm VCEQG : BinaryVRRbSPair<"vceqg", 0xE7F8, z_vicmpe, z_vicmpes, ++ v128g, v128g, 3>; ++ ++ // Compare high. ++ defm VCHB : BinaryVRRbSPair<"vchb", 0xE7FB, z_vicmph, z_vicmphs, ++ v128b, v128b, 0>; ++ defm VCHH : BinaryVRRbSPair<"vchh", 0xE7FB, z_vicmph, z_vicmphs, ++ v128h, v128h, 1>; ++ defm VCHF : BinaryVRRbSPair<"vchf", 0xE7FB, z_vicmph, z_vicmphs, ++ v128f, v128f, 2>; ++ defm VCHG : BinaryVRRbSPair<"vchg", 0xE7FB, z_vicmph, z_vicmphs, ++ v128g, v128g, 3>; ++ ++ // Compare high logical. ++ defm VCHLB : BinaryVRRbSPair<"vchlb", 0xE7F9, z_vicmphl, z_vicmphls, ++ v128b, v128b, 0>; ++ defm VCHLH : BinaryVRRbSPair<"vchlh", 0xE7F9, z_vicmphl, z_vicmphls, ++ v128h, v128h, 1>; ++ defm VCHLF : BinaryVRRbSPair<"vchlf", 0xE7F9, z_vicmphl, z_vicmphls, ++ v128f, v128f, 2>; ++ defm VCHLG : BinaryVRRbSPair<"vchlg", 0xE7F9, z_vicmphl, z_vicmphls, ++ v128g, v128g, 3>; ++ ++ // Test under mask. ++ let Defs = [CC] in ++ def VTM : CompareVRRa<"vtm", 0xE7D8, z_vtm, v128b, 0>; ++} ++ ++//===----------------------------------------------------------------------===// ++// Floating-point arithmetic ++//===----------------------------------------------------------------------===// ++ ++// See comments in SystemZInstrFP.td for the suppression flags and ++// rounding modes. ++multiclass VectorRounding { ++ def : FPConversion; ++ def : FPConversion; ++ def : FPConversion; ++ def : FPConversion; ++ def : FPConversion; ++ def : FPConversion; ++} ++ ++let Predicates = [FeatureVector] in { ++ // Add. ++ def VFADB : BinaryVRRc<"vfadb", 0xE7E3, fadd, v128db, v128db, 3, 0>; ++ def WFADB : BinaryVRRc<"wfadb", 0xE7E3, fadd, v64db, v64db, 3, 8>; ++ ++ // Convert from fixed 64-bit. ++ def VCDGB : TernaryVRRa<"vcdgb", 0xE7C3, null_frag, v128db, v128g, 3, 0>; ++ def WCDGB : TernaryVRRa<"wcdgb", 0xE7C3, null_frag, v64db, v64g, 3, 8>; ++ def : FPConversion; ++ ++ // Convert from logical 64-bit. ++ def VCDLGB : TernaryVRRa<"vcdlgb", 0xE7C1, null_frag, v128db, v128g, 3, 0>; ++ def WCDLGB : TernaryVRRa<"wcdlgb", 0xE7C1, null_frag, v64db, v64g, 3, 8>; ++ def : FPConversion; ++ ++ // Convert to fixed 64-bit. ++ def VCGDB : TernaryVRRa<"vcgdb", 0xE7C2, null_frag, v128g, v128db, 3, 0>; ++ def WCGDB : TernaryVRRa<"wcgdb", 0xE7C2, null_frag, v64g, v64db, 3, 8>; ++ // Rounding mode should agree with SystemZInstrFP.td. ++ def : FPConversion; ++ ++ // Convert to logical 64-bit. ++ def VCLGDB : TernaryVRRa<"vclgdb", 0xE7C0, null_frag, v128g, v128db, 3, 0>; ++ def WCLGDB : TernaryVRRa<"wclgdb", 0xE7C0, null_frag, v64g, v64db, 3, 8>; ++ // Rounding mode should agree with SystemZInstrFP.td. ++ def : FPConversion; ++ ++ // Divide. ++ def VFDDB : BinaryVRRc<"vfddb", 0xE7E5, fdiv, v128db, v128db, 3, 0>; ++ def WFDDB : BinaryVRRc<"wfddb", 0xE7E5, fdiv, v64db, v64db, 3, 8>; ++ ++ // Load FP integer. ++ def VFIDB : TernaryVRRa<"vfidb", 0xE7C7, int_s390_vfidb, v128db, v128db, 3, 0>; ++ def WFIDB : TernaryVRRa<"wfidb", 0xE7C7, null_frag, v64db, v64db, 3, 8>; ++ defm : VectorRounding; ++ defm : VectorRounding; ++ ++ // Load lengthened. ++ def VLDEB : UnaryVRRa<"vldeb", 0xE7C4, z_vextend, v128db, v128eb, 2, 0>; ++ def WLDEB : UnaryVRRa<"wldeb", 0xE7C4, fextend, v64db, v32eb, 2, 8>; ++ ++ // Load rounded, ++ def VLEDB : TernaryVRRa<"vledb", 0xE7C5, null_frag, v128eb, v128db, 3, 0>; ++ def WLEDB : TernaryVRRa<"wledb", 0xE7C5, null_frag, v32eb, v64db, 3, 8>; ++ def : Pat<(v4f32 (z_vround (v2f64 VR128:$src))), (VLEDB VR128:$src, 0, 0)>; ++ def : FPConversion; ++ ++ // Multiply. ++ def VFMDB : BinaryVRRc<"vfmdb", 0xE7E7, fmul, v128db, v128db, 3, 0>; ++ def WFMDB : BinaryVRRc<"wfmdb", 0xE7E7, fmul, v64db, v64db, 3, 8>; ++ ++ // Multiply and add. ++ def VFMADB : TernaryVRRe<"vfmadb", 0xE78F, fma, v128db, v128db, 0, 3>; ++ def WFMADB : TernaryVRRe<"wfmadb", 0xE78F, fma, v64db, v64db, 8, 3>; ++ ++ // Multiply and subtract. ++ def VFMSDB : TernaryVRRe<"vfmsdb", 0xE78E, fms, v128db, v128db, 0, 3>; ++ def WFMSDB : TernaryVRRe<"wfmsdb", 0xE78E, fms, v64db, v64db, 8, 3>; ++ ++ // Load complement, ++ def VFLCDB : UnaryVRRa<"vflcdb", 0xE7CC, fneg, v128db, v128db, 3, 0, 0>; ++ def WFLCDB : UnaryVRRa<"wflcdb", 0xE7CC, fneg, v64db, v64db, 3, 8, 0>; ++ ++ // Load negative. ++ def VFLNDB : UnaryVRRa<"vflndb", 0xE7CC, fnabs, v128db, v128db, 3, 0, 1>; ++ def WFLNDB : UnaryVRRa<"wflndb", 0xE7CC, fnabs, v64db, v64db, 3, 8, 1>; ++ ++ // Load positive. ++ def VFLPDB : UnaryVRRa<"vflpdb", 0xE7CC, fabs, v128db, v128db, 3, 0, 2>; ++ def WFLPDB : UnaryVRRa<"wflpdb", 0xE7CC, fabs, v64db, v64db, 3, 8, 2>; ++ ++ // Square root. ++ def VFSQDB : UnaryVRRa<"vfsqdb", 0xE7CE, fsqrt, v128db, v128db, 3, 0>; ++ def WFSQDB : UnaryVRRa<"wfsqdb", 0xE7CE, fsqrt, v64db, v64db, 3, 8>; ++ ++ // Subtract. ++ def VFSDB : BinaryVRRc<"vfsdb", 0xE7E2, fsub, v128db, v128db, 3, 0>; ++ def WFSDB : BinaryVRRc<"wfsdb", 0xE7E2, fsub, v64db, v64db, 3, 8>; ++ ++ // Test data class immediate. ++ let Defs = [CC] in { ++ def VFTCIDB : BinaryVRIe<"vftcidb", 0xE74A, z_vftci, v128g, v128db, 3, 0>; ++ def WFTCIDB : BinaryVRIe<"wftcidb", 0xE74A, null_frag, v64g, v64db, 3, 8>; ++ } ++} ++ ++//===----------------------------------------------------------------------===// ++// Floating-point comparison ++//===----------------------------------------------------------------------===// ++ ++let Predicates = [FeatureVector] in { ++ // Compare scalar. ++ let Defs = [CC] in ++ def WFCDB : CompareVRRa<"wfcdb", 0xE7CB, z_fcmp, v64db, 3>; ++ ++ // Compare and signal scalar. ++ let Defs = [CC] in ++ def WFKDB : CompareVRRa<"wfkdb", 0xE7CA, null_frag, v64db, 3>; ++ ++ // Compare equal. ++ defm VFCEDB : BinaryVRRcSPair<"vfcedb", 0xE7E8, z_vfcmpe, z_vfcmpes, ++ v128g, v128db, 3, 0>; ++ defm WFCEDB : BinaryVRRcSPair<"wfcedb", 0xE7E8, null_frag, null_frag, ++ v64g, v64db, 3, 8>; ++ ++ // Compare high. ++ defm VFCHDB : BinaryVRRcSPair<"vfchdb", 0xE7EB, z_vfcmph, z_vfcmphs, ++ v128g, v128db, 3, 0>; ++ defm WFCHDB : BinaryVRRcSPair<"wfchdb", 0xE7EB, null_frag, null_frag, ++ v64g, v64db, 3, 8>; ++ ++ // Compare high or equal. ++ defm VFCHEDB : BinaryVRRcSPair<"vfchedb", 0xE7EA, z_vfcmphe, z_vfcmphes, ++ v128g, v128db, 3, 0>; ++ defm WFCHEDB : BinaryVRRcSPair<"wfchedb", 0xE7EA, null_frag, null_frag, ++ v64g, v64db, 3, 8>; ++} ++ ++//===----------------------------------------------------------------------===// ++// Conversions ++//===----------------------------------------------------------------------===// ++ ++def : Pat<(v16i8 (bitconvert (v8i16 VR128:$src))), (v16i8 VR128:$src)>; ++def : Pat<(v16i8 (bitconvert (v4i32 VR128:$src))), (v16i8 VR128:$src)>; ++def : Pat<(v16i8 (bitconvert (v2i64 VR128:$src))), (v16i8 VR128:$src)>; ++def : Pat<(v16i8 (bitconvert (v4f32 VR128:$src))), (v16i8 VR128:$src)>; ++def : Pat<(v16i8 (bitconvert (v2f64 VR128:$src))), (v16i8 VR128:$src)>; ++ ++def : Pat<(v8i16 (bitconvert (v16i8 VR128:$src))), (v8i16 VR128:$src)>; ++def : Pat<(v8i16 (bitconvert (v4i32 VR128:$src))), (v8i16 VR128:$src)>; ++def : Pat<(v8i16 (bitconvert (v2i64 VR128:$src))), (v8i16 VR128:$src)>; ++def : Pat<(v8i16 (bitconvert (v4f32 VR128:$src))), (v8i16 VR128:$src)>; ++def : Pat<(v8i16 (bitconvert (v2f64 VR128:$src))), (v8i16 VR128:$src)>; ++ ++def : Pat<(v4i32 (bitconvert (v16i8 VR128:$src))), (v4i32 VR128:$src)>; ++def : Pat<(v4i32 (bitconvert (v8i16 VR128:$src))), (v4i32 VR128:$src)>; ++def : Pat<(v4i32 (bitconvert (v2i64 VR128:$src))), (v4i32 VR128:$src)>; ++def : Pat<(v4i32 (bitconvert (v4f32 VR128:$src))), (v4i32 VR128:$src)>; ++def : Pat<(v4i32 (bitconvert (v2f64 VR128:$src))), (v4i32 VR128:$src)>; ++ ++def : Pat<(v2i64 (bitconvert (v16i8 VR128:$src))), (v2i64 VR128:$src)>; ++def : Pat<(v2i64 (bitconvert (v8i16 VR128:$src))), (v2i64 VR128:$src)>; ++def : Pat<(v2i64 (bitconvert (v4i32 VR128:$src))), (v2i64 VR128:$src)>; ++def : Pat<(v2i64 (bitconvert (v4f32 VR128:$src))), (v2i64 VR128:$src)>; ++def : Pat<(v2i64 (bitconvert (v2f64 VR128:$src))), (v2i64 VR128:$src)>; ++ ++def : Pat<(v4f32 (bitconvert (v16i8 VR128:$src))), (v4f32 VR128:$src)>; ++def : Pat<(v4f32 (bitconvert (v8i16 VR128:$src))), (v4f32 VR128:$src)>; ++def : Pat<(v4f32 (bitconvert (v4i32 VR128:$src))), (v4f32 VR128:$src)>; ++def : Pat<(v4f32 (bitconvert (v2i64 VR128:$src))), (v4f32 VR128:$src)>; ++def : Pat<(v4f32 (bitconvert (v2f64 VR128:$src))), (v4f32 VR128:$src)>; ++ ++def : Pat<(v2f64 (bitconvert (v16i8 VR128:$src))), (v2f64 VR128:$src)>; ++def : Pat<(v2f64 (bitconvert (v8i16 VR128:$src))), (v2f64 VR128:$src)>; ++def : Pat<(v2f64 (bitconvert (v4i32 VR128:$src))), (v2f64 VR128:$src)>; ++def : Pat<(v2f64 (bitconvert (v2i64 VR128:$src))), (v2f64 VR128:$src)>; ++def : Pat<(v2f64 (bitconvert (v4f32 VR128:$src))), (v2f64 VR128:$src)>; ++ ++//===----------------------------------------------------------------------===// ++// Replicating scalars ++//===----------------------------------------------------------------------===// ++ ++// Define patterns for replicating a scalar GR32 into a vector of type TYPE. ++// INDEX is 8 minus the element size in bytes. ++class VectorReplicateScalar index> ++ : Pat<(type (z_replicate GR32:$scalar)), ++ (insn (VLVGP32 GR32:$scalar, GR32:$scalar), index)>; ++ ++def : VectorReplicateScalar; ++def : VectorReplicateScalar; ++def : VectorReplicateScalar; ++ ++// i64 replications are just a single isntruction. ++def : Pat<(v2i64 (z_replicate GR64:$scalar)), ++ (VLVGP GR64:$scalar, GR64:$scalar)>; ++ ++//===----------------------------------------------------------------------===// ++// Floating-point insertion and extraction ++//===----------------------------------------------------------------------===// ++ ++// Moving 32-bit values between GPRs and FPRs can be done using VLVGF ++// and VLGVF. ++def LEFR : UnaryAliasVRS; ++def LFER : UnaryAliasVRS; ++def : Pat<(f32 (bitconvert (i32 GR32:$src))), (LEFR GR32:$src)>; ++def : Pat<(i32 (bitconvert (f32 VR32:$src))), ++ (EXTRACT_SUBREG (LFER VR32:$src), subreg_l32)>; ++ ++// Floating-point values are stored in element 0 of the corresponding ++// vector register. Scalar to vector conversion is just a subreg and ++// scalar replication can just replicate element 0 of the vector register. ++multiclass ScalarToVectorFP { ++ def : Pat<(vt (scalar_to_vector cls:$scalar)), ++ (INSERT_SUBREG (vt (IMPLICIT_DEF)), cls:$scalar, subreg)>; ++ def : Pat<(vt (z_replicate cls:$scalar)), ++ (vrep (INSERT_SUBREG (vt (IMPLICIT_DEF)), cls:$scalar, ++ subreg), 0)>; ++} ++defm : ScalarToVectorFP; ++defm : ScalarToVectorFP; ++ ++// Match v2f64 insertions. The AddedComplexity counters the 3 added by ++// TableGen for the base register operand in VLVG-based integer insertions ++// and ensures that this version is strictly better. ++let AddedComplexity = 4 in { ++ def : Pat<(z_vector_insert (v2f64 VR128:$vec), FP64:$elt, 0), ++ (VPDI (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FP64:$elt, ++ subreg_r64), VR128:$vec, 1)>; ++ def : Pat<(z_vector_insert (v2f64 VR128:$vec), FP64:$elt, 1), ++ (VPDI VR128:$vec, (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FP64:$elt, ++ subreg_r64), 0)>; ++} ++ ++// We extract floating-point element X by replicating (for elements other ++// than 0) and then taking a high subreg. The AddedComplexity counters the ++// 3 added by TableGen for the base register operand in VLGV-based integer ++// extractions and ensures that this version is strictly better. ++let AddedComplexity = 4 in { ++ def : Pat<(f32 (z_vector_extract (v4f32 VR128:$vec), 0)), ++ (EXTRACT_SUBREG VR128:$vec, subreg_r32)>; ++ def : Pat<(f32 (z_vector_extract (v4f32 VR128:$vec), imm32zx2:$index)), ++ (EXTRACT_SUBREG (VREPF VR128:$vec, imm32zx2:$index), subreg_r32)>; ++ ++ def : Pat<(f64 (z_vector_extract (v2f64 VR128:$vec), 0)), ++ (EXTRACT_SUBREG VR128:$vec, subreg_r64)>; ++ def : Pat<(f64 (z_vector_extract (v2f64 VR128:$vec), imm32zx1:$index)), ++ (EXTRACT_SUBREG (VREPG VR128:$vec, imm32zx1:$index), subreg_r64)>; ++} ++ ++//===----------------------------------------------------------------------===// ++// String instructions ++//===----------------------------------------------------------------------===// ++ ++let Predicates = [FeatureVector] in { ++ defm VFAEB : TernaryVRRbSPair<"vfaeb", 0xE782, int_s390_vfaeb, z_vfae_cc, ++ v128b, v128b, 0, 0>; ++ defm VFAEH : TernaryVRRbSPair<"vfaeh", 0xE782, int_s390_vfaeh, z_vfae_cc, ++ v128h, v128h, 1, 0>; ++ defm VFAEF : TernaryVRRbSPair<"vfaef", 0xE782, int_s390_vfaef, z_vfae_cc, ++ v128f, v128f, 2, 0>; ++ defm VFAEZB : TernaryVRRbSPair<"vfaezb", 0xE782, int_s390_vfaezb, z_vfaez_cc, ++ v128b, v128b, 0, 2>; ++ defm VFAEZH : TernaryVRRbSPair<"vfaezh", 0xE782, int_s390_vfaezh, z_vfaez_cc, ++ v128h, v128h, 1, 2>; ++ defm VFAEZF : TernaryVRRbSPair<"vfaezf", 0xE782, int_s390_vfaezf, z_vfaez_cc, ++ v128f, v128f, 2, 2>; ++ ++ defm VFEEB : BinaryVRRbSPair<"vfeeb", 0xE780, int_s390_vfeeb, z_vfee_cc, ++ v128b, v128b, 0, 0, 1>; ++ defm VFEEH : BinaryVRRbSPair<"vfeeh", 0xE780, int_s390_vfeeh, z_vfee_cc, ++ v128h, v128h, 1, 0, 1>; ++ defm VFEEF : BinaryVRRbSPair<"vfeef", 0xE780, int_s390_vfeef, z_vfee_cc, ++ v128f, v128f, 2, 0, 1>; ++ defm VFEEZB : BinaryVRRbSPair<"vfeezb", 0xE780, int_s390_vfeezb, z_vfeez_cc, ++ v128b, v128b, 0, 2, 3>; ++ defm VFEEZH : BinaryVRRbSPair<"vfeezh", 0xE780, int_s390_vfeezh, z_vfeez_cc, ++ v128h, v128h, 1, 2, 3>; ++ defm VFEEZF : BinaryVRRbSPair<"vfeezf", 0xE780, int_s390_vfeezf, z_vfeez_cc, ++ v128f, v128f, 2, 2, 3>; ++ ++ defm VFENEB : BinaryVRRbSPair<"vfeneb", 0xE781, int_s390_vfeneb, z_vfene_cc, ++ v128b, v128b, 0, 0, 1>; ++ defm VFENEH : BinaryVRRbSPair<"vfeneh", 0xE781, int_s390_vfeneh, z_vfene_cc, ++ v128h, v128h, 1, 0, 1>; ++ defm VFENEF : BinaryVRRbSPair<"vfenef", 0xE781, int_s390_vfenef, z_vfene_cc, ++ v128f, v128f, 2, 0, 1>; ++ defm VFENEZB : BinaryVRRbSPair<"vfenezb", 0xE781, int_s390_vfenezb, ++ z_vfenez_cc, v128b, v128b, 0, 2, 3>; ++ defm VFENEZH : BinaryVRRbSPair<"vfenezh", 0xE781, int_s390_vfenezh, ++ z_vfenez_cc, v128h, v128h, 1, 2, 3>; ++ defm VFENEZF : BinaryVRRbSPair<"vfenezf", 0xE781, int_s390_vfenezf, ++ z_vfenez_cc, v128f, v128f, 2, 2, 3>; ++ ++ defm VISTRB : UnaryVRRaSPair<"vistrb", 0xE75C, int_s390_vistrb, z_vistr_cc, ++ v128b, v128b, 0>; ++ defm VISTRH : UnaryVRRaSPair<"vistrh", 0xE75C, int_s390_vistrh, z_vistr_cc, ++ v128h, v128h, 1>; ++ defm VISTRF : UnaryVRRaSPair<"vistrf", 0xE75C, int_s390_vistrf, z_vistr_cc, ++ v128f, v128f, 2>; ++ ++ defm VSTRCB : QuaternaryVRRdSPair<"vstrcb", 0xE78A, int_s390_vstrcb, ++ z_vstrc_cc, v128b, v128b, 0, 0>; ++ defm VSTRCH : QuaternaryVRRdSPair<"vstrch", 0xE78A, int_s390_vstrch, ++ z_vstrc_cc, v128h, v128h, 1, 0>; ++ defm VSTRCF : QuaternaryVRRdSPair<"vstrcf", 0xE78A, int_s390_vstrcf, ++ z_vstrc_cc, v128f, v128f, 2, 0>; ++ defm VSTRCZB : QuaternaryVRRdSPair<"vstrczb", 0xE78A, int_s390_vstrczb, ++ z_vstrcz_cc, v128b, v128b, 0, 2>; ++ defm VSTRCZH : QuaternaryVRRdSPair<"vstrczh", 0xE78A, int_s390_vstrczh, ++ z_vstrcz_cc, v128h, v128h, 1, 2>; ++ defm VSTRCZF : QuaternaryVRRdSPair<"vstrczf", 0xE78A, int_s390_vstrczf, ++ z_vstrcz_cc, v128f, v128f, 2, 2>; ++} +Index: llvm-36/lib/Target/SystemZ/SystemZLDCleanup.cpp +=================================================================== +--- /dev/null ++++ llvm-36/lib/Target/SystemZ/SystemZLDCleanup.cpp +@@ -0,0 +1,143 @@ ++//===-- SystemZLDCleanup.cpp - Clean up local-dynamic TLS accesses --------===// ++// ++// The LLVM Compiler Infrastructure ++// ++// This file is distributed under the University of Illinois Open Source ++// License. See LICENSE.TXT for details. ++// ++//===----------------------------------------------------------------------===// ++// ++// This pass combines multiple accesses to local-dynamic TLS variables so that ++// the TLS base address for the module is only fetched once per execution path ++// through the function. ++// ++//===----------------------------------------------------------------------===// ++ ++#include "SystemZTargetMachine.h" ++#include "SystemZMachineFunctionInfo.h" ++#include "llvm/CodeGen/MachineDominators.h" ++#include "llvm/CodeGen/MachineFunctionPass.h" ++#include "llvm/CodeGen/MachineInstrBuilder.h" ++#include "llvm/CodeGen/MachineRegisterInfo.h" ++#include "llvm/Target/TargetInstrInfo.h" ++#include "llvm/Target/TargetMachine.h" ++#include "llvm/Target/TargetRegisterInfo.h" ++ ++using namespace llvm; ++ ++namespace { ++ ++class SystemZLDCleanup : public MachineFunctionPass { ++public: ++ static char ID; ++ SystemZLDCleanup(const SystemZTargetMachine &tm) ++ : MachineFunctionPass(ID), TII(nullptr), MF(nullptr) {} ++ ++ const char *getPassName() const override { ++ return "SystemZ Local Dynamic TLS Access Clean-up"; ++ } ++ ++ bool runOnMachineFunction(MachineFunction &MF) override; ++ void getAnalysisUsage(AnalysisUsage &AU) const override; ++ ++private: ++ bool VisitNode(MachineDomTreeNode *Node, unsigned TLSBaseAddrReg); ++ MachineInstr *ReplaceTLSCall(MachineInstr *I, unsigned TLSBaseAddrReg); ++ MachineInstr *SetRegister(MachineInstr *I, unsigned *TLSBaseAddrReg); ++ ++ const SystemZInstrInfo *TII; ++ MachineFunction *MF; ++}; ++ ++char SystemZLDCleanup::ID = 0; ++ ++} // end anonymous namespace ++ ++FunctionPass *llvm::createSystemZLDCleanupPass(SystemZTargetMachine &TM) { ++ return new SystemZLDCleanup(TM); ++} ++ ++void SystemZLDCleanup::getAnalysisUsage(AnalysisUsage &AU) const { ++ AU.setPreservesCFG(); ++ AU.addRequired(); ++ MachineFunctionPass::getAnalysisUsage(AU); ++} ++ ++bool SystemZLDCleanup::runOnMachineFunction(MachineFunction &F) { ++ TII = static_cast(F.getSubtarget().getInstrInfo()); ++ MF = &F; ++ ++ SystemZMachineFunctionInfo* MFI = F.getInfo(); ++ if (MFI->getNumLocalDynamicTLSAccesses() < 2) { ++ // No point folding accesses if there isn't at least two. ++ return false; ++ } ++ ++ MachineDominatorTree *DT = &getAnalysis(); ++ return VisitNode(DT->getRootNode(), 0); ++} ++ ++// Visit the dominator subtree rooted at Node in pre-order. ++// If TLSBaseAddrReg is non-null, then use that to replace any ++// TLS_LDCALL instructions. Otherwise, create the register ++// when the first such instruction is seen, and then use it ++// as we encounter more instructions. ++bool SystemZLDCleanup::VisitNode(MachineDomTreeNode *Node, ++ unsigned TLSBaseAddrReg) { ++ MachineBasicBlock *BB = Node->getBlock(); ++ bool Changed = false; ++ ++ // Traverse the current block. ++ for (auto I = BB->begin(), E = BB->end(); I != E; ++I) { ++ switch (I->getOpcode()) { ++ case SystemZ::TLS_LDCALL: ++ if (TLSBaseAddrReg) ++ I = ReplaceTLSCall(I, TLSBaseAddrReg); ++ else ++ I = SetRegister(I, &TLSBaseAddrReg); ++ Changed = true; ++ break; ++ default: ++ break; ++ } ++ } ++ ++ // Visit the children of this block in the dominator tree. ++ for (auto I = Node->begin(), E = Node->end(); I != E; ++I) ++ Changed |= VisitNode(*I, TLSBaseAddrReg); ++ ++ return Changed; ++} ++ ++// Replace the TLS_LDCALL instruction I with a copy from TLSBaseAddrReg, ++// returning the new instruction. ++MachineInstr *SystemZLDCleanup::ReplaceTLSCall(MachineInstr *I, ++ unsigned TLSBaseAddrReg) { ++ // Insert a Copy from TLSBaseAddrReg to R2. ++ MachineInstr *Copy = BuildMI(*I->getParent(), I, I->getDebugLoc(), ++ TII->get(TargetOpcode::COPY), SystemZ::R2D) ++ .addReg(TLSBaseAddrReg); ++ ++ // Erase the TLS_LDCALL instruction. ++ I->eraseFromParent(); ++ ++ return Copy; ++} ++ ++// Create a virtal register in *TLSBaseAddrReg, and populate it by ++// inserting a copy instruction after I. Returns the new instruction. ++MachineInstr *SystemZLDCleanup::SetRegister(MachineInstr *I, ++ unsigned *TLSBaseAddrReg) { ++ // Create a virtual register for the TLS base address. ++ MachineRegisterInfo &RegInfo = MF->getRegInfo(); ++ *TLSBaseAddrReg = RegInfo.createVirtualRegister(&SystemZ::GR64BitRegClass); ++ ++ // Insert a copy from R2 to TLSBaseAddrReg. ++ MachineInstr *Next = I->getNextNode(); ++ MachineInstr *Copy = BuildMI(*I->getParent(), Next, I->getDebugLoc(), ++ TII->get(TargetOpcode::COPY), *TLSBaseAddrReg) ++ .addReg(SystemZ::R2D); ++ ++ return Copy; ++} ++ +Index: llvm-36/lib/Target/SystemZ/SystemZMCInstLower.cpp +=================================================================== +--- llvm-36.orig/lib/Target/SystemZ/SystemZMCInstLower.cpp ++++ llvm-36/lib/Target/SystemZ/SystemZMCInstLower.cpp +@@ -22,6 +22,8 @@ static MCSymbolRefExpr::VariantKind getV + return MCSymbolRefExpr::VK_None; + case SystemZII::MO_GOT: + return MCSymbolRefExpr::VK_GOT; ++ case SystemZII::MO_INDNTPOFF: ++ return MCSymbolRefExpr::VK_INDNTPOFF; + } + llvm_unreachable("Unrecognised MO_ACCESS_MODEL"); + } +Index: llvm-36/lib/Target/SystemZ/SystemZMachineFunctionInfo.h +=================================================================== +--- llvm-36.orig/lib/Target/SystemZ/SystemZMachineFunctionInfo.h ++++ llvm-36/lib/Target/SystemZ/SystemZMachineFunctionInfo.h +@@ -23,11 +23,13 @@ class SystemZMachineFunctionInfo : publi + unsigned VarArgsFrameIndex; + unsigned RegSaveFrameIndex; + bool ManipulatesSP; ++ unsigned NumLocalDynamics; + + public: + explicit SystemZMachineFunctionInfo(MachineFunction &MF) + : LowSavedGPR(0), HighSavedGPR(0), VarArgsFirstGPR(0), VarArgsFirstFPR(0), +- VarArgsFrameIndex(0), RegSaveFrameIndex(0), ManipulatesSP(false) {} ++ VarArgsFrameIndex(0), RegSaveFrameIndex(0), ManipulatesSP(false), ++ NumLocalDynamics(0) {} + + // Get and set the first call-saved GPR that should be saved and restored + // by this function. This is 0 if no GPRs need to be saved or restored. +@@ -61,6 +63,10 @@ public: + // e.g. through STACKSAVE or STACKRESTORE. + bool getManipulatesSP() const { return ManipulatesSP; } + void setManipulatesSP(bool MSP) { ManipulatesSP = MSP; } ++ ++ // Count number of local-dynamic TLS symbols used. ++ unsigned getNumLocalDynamicTLSAccesses() const { return NumLocalDynamics; } ++ void incNumLocalDynamicTLSAccesses() { ++NumLocalDynamics; } + }; + + } // end namespace llvm +Index: llvm-36/lib/Target/SystemZ/SystemZOperands.td +=================================================================== +--- llvm-36.orig/lib/Target/SystemZ/SystemZOperands.td ++++ llvm-36/lib/Target/SystemZ/SystemZOperands.td +@@ -16,6 +16,11 @@ class ImmediateAsmOperand + let Name = name; + let RenderMethod = "addImmOperands"; + } ++class ImmediateTLSAsmOperand ++ : AsmOperandClass { ++ let Name = name; ++ let RenderMethod = "addImmTLSOperands"; ++} + + // Constructs both a DAG pattern and instruction operand for an immediate + // of type VT. PRED returns true if a node is acceptable and XFORM returns +@@ -34,6 +39,11 @@ class PCRelAsmOperand : Imm + let PredicateMethod = "isImm"; + let ParserMethod = "parsePCRel"##size; + } ++class PCRelTLSAsmOperand ++ : ImmediateTLSAsmOperand<"PCRelTLS"##size> { ++ let PredicateMethod = "isImmTLS"; ++ let ParserMethod = "parsePCRelTLS"##size; ++} + + // Constructs an operand for a PC-relative address with address type VT. + // ASMOP is the associated asm operand. +@@ -41,6 +51,10 @@ class PCRelOperand : Operand { ++ let PrintMethod = "printPCRelTLSOperand"; ++ let ParserMatchClass = asmop; ++} + + // Constructs both a DAG pattern and instruction operand for a PC-relative + // address with address size VT. SELF is the name of the operand and +@@ -64,6 +78,22 @@ class AddressAsmOperand ++ : Operand("i"##bitsize)> { ++ let PrintMethod = "print"##format##"Operand"; ++ let EncoderMethod = "get"##format##dispsize##length##"Encoding"; ++ let DecoderMethod = ++ "decode"##format##bitsize##"Disp"##dispsize##length##"Operand"; ++ let MIOperandInfo = operands; ++ let ParserMatchClass = ++ !cast(format##bitsize##"Disp"##dispsize##length); ++} ++ + // Constructs both a DAG pattern and instruction operand for an addressing mode. + // FORMAT, BITSIZE, DISPSIZE and LENGTH are the parameters to an associated + // AddressAsmOperand. OPERANDS is a list of NUMOPS individual operands +@@ -79,15 +109,7 @@ class AddressingMode("i"##bitsize), numops, + "select"##seltype##dispsize##suffix##length, + [add, sub, or, frameindex, z_adjdynalloc]>, +- Operand("i"##bitsize)> { +- let PrintMethod = "print"##format##"Operand"; +- let EncoderMethod = "get"##format##dispsize##length##"Encoding"; +- let DecoderMethod = +- "decode"##format##bitsize##"Disp"##dispsize##length##"Operand"; +- let MIOperandInfo = operands; +- let ParserMatchClass = +- !cast(format##bitsize##"Disp"##dispsize##length); +-} ++ AddressOperand; + + // An addressing mode with a base and displacement but no index. + class BDMode +@@ -111,6 +133,13 @@ class BDLMode("disp"##dispsize##"imm"##bitsize), + !cast("imm"##bitsize))>; + ++// An addressing mode with a base, displacement and a vector index. ++class BDVMode ++ : AddressOperand("ADDR"##bitsize), ++ !cast("disp"##dispsize##"imm"##bitsize), ++ !cast("VR128"))>; ++ + //===----------------------------------------------------------------------===// + // Extracting immediate operands from nodes + // These all create MVT::i64 nodes to ensure the value is not sign-extended +@@ -163,6 +192,16 @@ def UIMM8 : SDNodeXFormgetTargetConstant(uint8_t(N->getZExtValue()), MVT::i64); + }]>; + ++// Truncate an immediate to a 8-bit unsigned quantity and mask off low bit. ++def UIMM8EVEN : SDNodeXFormgetTargetConstant(N->getZExtValue() & 0xfe, MVT::i64); ++}]>; ++ ++// Truncate an immediate to a 12-bit unsigned quantity. ++def UIMM12 : SDNodeXFormgetTargetConstant(N->getZExtValue() & 0xfff, MVT::i64); ++}]>; ++ + // Truncate an immediate to a 16-bit signed quantity. + def SIMM16 : SDNodeXFormgetTargetConstant(int16_t(N->getZExtValue()), MVT::i64); +@@ -192,10 +231,14 @@ def NEGIMM32 : SDNodeXForm; ++def U2Imm : ImmediateAsmOperand<"U2Imm">; ++def U3Imm : ImmediateAsmOperand<"U3Imm">; + def U4Imm : ImmediateAsmOperand<"U4Imm">; + def U6Imm : ImmediateAsmOperand<"U6Imm">; + def S8Imm : ImmediateAsmOperand<"S8Imm">; + def U8Imm : ImmediateAsmOperand<"U8Imm">; ++def U12Imm : ImmediateAsmOperand<"U12Imm">; + def S16Imm : ImmediateAsmOperand<"S16Imm">; + def U16Imm : ImmediateAsmOperand<"U16Imm">; + def S32Imm : ImmediateAsmOperand<"S32Imm">; +@@ -226,10 +269,28 @@ def imm32lh16c : Immediate; + + // Short immediates ++def imm32zx1 : Immediate(N->getZExtValue()); ++}], NOOP_SDNodeXForm, "U1Imm">; ++ ++def imm32zx2 : Immediate(N->getZExtValue()); ++}], NOOP_SDNodeXForm, "U2Imm">; ++ ++def imm32zx3 : Immediate(N->getZExtValue()); ++}], NOOP_SDNodeXForm, "U3Imm">; ++ + def imm32zx4 : Immediate(N->getZExtValue()); + }], NOOP_SDNodeXForm, "U4Imm">; + ++// Note: this enforces an even value during code generation only. ++// When used from the assembler, any 4-bit value is allowed. ++def imm32zx4even : Immediate(N->getZExtValue()); ++}], UIMM8EVEN, "U4Imm">; ++ + def imm32zx6 : Immediate(N->getZExtValue()); + }], NOOP_SDNodeXForm, "U6Imm">; +@@ -244,6 +305,10 @@ def imm32zx8 : Immediate; + ++def imm32zx12 : Immediate(N->getZExtValue()); ++}], UIMM12, "U12Imm">; ++ + def imm32sx16 : Immediate(N->getSExtValue()); + }], SIMM16, "S16Imm">; +@@ -370,6 +435,8 @@ def fpimmneg0 : PatLeaf<(fpimm), [{ retu + // PC-relative asm operands. + def PCRel16 : PCRelAsmOperand<"16">; + def PCRel32 : PCRelAsmOperand<"32">; ++def PCRelTLS16 : PCRelTLSAsmOperand<"16">; ++def PCRelTLS32 : PCRelTLSAsmOperand<"32">; + + // PC-relative offsets of a basic block. The offset is sign-extended + // and multiplied by 2. +@@ -382,6 +449,20 @@ def brtarget32 : PCRelOperand { } ++def brtarget16tls : PCRelTLSOperand { ++ let MIOperandInfo = (ops brtarget16:$func, tlssym:$sym); ++ let EncoderMethod = "getPC16DBLTLSEncoding"; ++ let DecoderMethod = "decodePC16DBLOperand"; ++} ++def brtarget32tls : PCRelTLSOperand { ++ let MIOperandInfo = (ops brtarget32:$func, tlssym:$sym); ++ let EncoderMethod = "getPC32DBLTLSEncoding"; ++ let DecoderMethod = "decodePC32DBLOperand"; ++} ++ + // A PC-relative offset of a global value. The offset is sign-extended + // and multiplied by 2. + def pcrel32 : PCRelAddress { +@@ -408,6 +489,7 @@ def BDAddr64Disp20 : AddressAsmOper + def BDXAddr64Disp12 : AddressAsmOperand<"BDXAddr", "64", "12">; + def BDXAddr64Disp20 : AddressAsmOperand<"BDXAddr", "64", "20">; + def BDLAddr64Disp12Len8 : AddressAsmOperand<"BDLAddr", "64", "12", "Len8">; ++def BDVAddr64Disp12 : AddressAsmOperand<"BDVAddr", "64", "12">; + + // DAG patterns and operands for addressing modes. Each mode has + // the form [] where: +@@ -420,6 +502,7 @@ def BDLAddr64Disp12Len8 : AddressAsmOper + // laaddr : like bdxaddr, but used for Load Address operations + // dynalloc : base + displacement + index + ADJDYNALLOC + // bdladdr : base + displacement with a length field ++// bdvaddr : base + displacement with a vector index + // + // is one of: + // 12 : the displacement is an unsigned 12-bit value +@@ -452,6 +535,7 @@ def dynalloc12only : BDXMode<"DynAllo + def laaddr12pair : BDXMode<"LAAddr", "64", "12", "Pair">; + def laaddr20pair : BDXMode<"LAAddr", "64", "20", "Pair">; + def bdladdr12onlylen8 : BDLMode<"BDLAddr", "64", "12", "Only", "8">; ++def bdvaddr12only : BDVMode< "64", "12">; + + //===----------------------------------------------------------------------===// + // Miscellaneous +Index: llvm-36/lib/Target/SystemZ/SystemZOperators.td +=================================================================== +--- llvm-36.orig/lib/Target/SystemZ/SystemZOperators.td ++++ llvm-36/lib/Target/SystemZ/SystemZOperators.td +@@ -79,6 +79,64 @@ def SDT_ZI32Intrinsic : SDTypeProf + def SDT_ZPrefetch : SDTypeProfile<0, 2, + [SDTCisVT<0, i32>, + SDTCisPtrTy<1>]>; ++def SDT_ZTBegin : SDTypeProfile<0, 2, ++ [SDTCisPtrTy<0>, ++ SDTCisVT<1, i32>]>; ++def SDT_ZInsertVectorElt : SDTypeProfile<1, 3, ++ [SDTCisVec<0>, ++ SDTCisSameAs<0, 1>, ++ SDTCisVT<3, i32>]>; ++def SDT_ZExtractVectorElt : SDTypeProfile<1, 2, ++ [SDTCisVec<1>, ++ SDTCisVT<2, i32>]>; ++def SDT_ZReplicate : SDTypeProfile<1, 1, ++ [SDTCisVec<0>]>; ++def SDT_ZVecUnaryConv : SDTypeProfile<1, 1, ++ [SDTCisVec<0>, ++ SDTCisVec<1>]>; ++def SDT_ZVecUnary : SDTypeProfile<1, 1, ++ [SDTCisVec<0>, ++ SDTCisSameAs<0, 1>]>; ++def SDT_ZVecBinary : SDTypeProfile<1, 2, ++ [SDTCisVec<0>, ++ SDTCisSameAs<0, 1>, ++ SDTCisSameAs<0, 2>]>; ++def SDT_ZVecBinaryInt : SDTypeProfile<1, 2, ++ [SDTCisVec<0>, ++ SDTCisSameAs<0, 1>, ++ SDTCisVT<2, i32>]>; ++def SDT_ZVecBinaryConv : SDTypeProfile<1, 2, ++ [SDTCisVec<0>, ++ SDTCisVec<1>, ++ SDTCisSameAs<1, 2>]>; ++def SDT_ZVecBinaryConvInt : SDTypeProfile<1, 2, ++ [SDTCisVec<0>, ++ SDTCisVec<1>, ++ SDTCisVT<2, i32>]>; ++def SDT_ZRotateMask : SDTypeProfile<1, 2, ++ [SDTCisVec<0>, ++ SDTCisVT<1, i32>, ++ SDTCisVT<2, i32>]>; ++def SDT_ZJoinDwords : SDTypeProfile<1, 2, ++ [SDTCisVT<0, v2i64>, ++ SDTCisVT<1, i64>, ++ SDTCisVT<2, i64>]>; ++def SDT_ZVecTernary : SDTypeProfile<1, 3, ++ [SDTCisVec<0>, ++ SDTCisSameAs<0, 1>, ++ SDTCisSameAs<0, 2>, ++ SDTCisSameAs<0, 3>]>; ++def SDT_ZVecTernaryInt : SDTypeProfile<1, 3, ++ [SDTCisVec<0>, ++ SDTCisSameAs<0, 1>, ++ SDTCisSameAs<0, 2>, ++ SDTCisVT<3, i32>]>; ++def SDT_ZVecQuaternaryInt : SDTypeProfile<1, 4, ++ [SDTCisVec<0>, ++ SDTCisSameAs<0, 1>, ++ SDTCisSameAs<0, 2>, ++ SDTCisSameAs<0, 3>, ++ SDTCisVT<4, i32>]>; + + //===----------------------------------------------------------------------===// + // Node definitions +@@ -90,6 +148,7 @@ def callseq_start : SDNode<"ISD::C + def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_CallSeqEnd, + [SDNPHasChain, SDNPSideEffect, SDNPOptInGlue, + SDNPOutGlue]>; ++def global_offset_table : SDNode<"ISD::GLOBAL_OFFSET_TABLE", SDTPtrLeaf>; + + // Nodes for SystemZISD::*. See SystemZISelLowering.h for more details. + def z_retflag : SDNode<"SystemZISD::RET_FLAG", SDTNone, +@@ -100,6 +159,12 @@ def z_call : SDNode<"System + def z_sibcall : SDNode<"SystemZISD::SIBCALL", SDT_ZCall, + [SDNPHasChain, SDNPOutGlue, SDNPOptInGlue, + SDNPVariadic]>; ++def z_tls_gdcall : SDNode<"SystemZISD::TLS_GDCALL", SDT_ZCall, ++ [SDNPHasChain, SDNPInGlue, SDNPOutGlue, ++ SDNPVariadic]>; ++def z_tls_ldcall : SDNode<"SystemZISD::TLS_LDCALL", SDT_ZCall, ++ [SDNPHasChain, SDNPInGlue, SDNPOutGlue, ++ SDNPVariadic]>; + def z_pcrel_wrapper : SDNode<"SystemZISD::PCREL_WRAPPER", SDT_ZWrapPtr, []>; + def z_pcrel_offset : SDNode<"SystemZISD::PCREL_OFFSET", + SDT_ZWrapOffset, []>; +@@ -114,6 +179,7 @@ def z_select_ccmask : SDNode<"System + def z_adjdynalloc : SDNode<"SystemZISD::ADJDYNALLOC", SDT_ZAdjDynAlloc>; + def z_extract_access : SDNode<"SystemZISD::EXTRACT_ACCESS", + SDT_ZExtractAccess>; ++def z_popcnt : SDNode<"SystemZISD::POPCNT", SDTIntUnaryOp>; + def z_umul_lohi64 : SDNode<"SystemZISD::UMUL_LOHI64", SDT_ZGR128Binary64>; + def z_sdivrem32 : SDNode<"SystemZISD::SDIVREM32", SDT_ZGR128Binary32>; + def z_sdivrem64 : SDNode<"SystemZISD::SDIVREM64", SDT_ZGR128Binary64>; +@@ -123,6 +189,80 @@ def z_udivrem64 : SDNode<"System + def z_serialize : SDNode<"SystemZISD::SERIALIZE", SDTNone, + [SDNPHasChain, SDNPMayStore]>; + ++// Defined because the index is an i32 rather than a pointer. ++def z_vector_insert : SDNode<"ISD::INSERT_VECTOR_ELT", ++ SDT_ZInsertVectorElt>; ++def z_vector_extract : SDNode<"ISD::EXTRACT_VECTOR_ELT", ++ SDT_ZExtractVectorElt>; ++def z_byte_mask : SDNode<"SystemZISD::BYTE_MASK", SDT_ZReplicate>; ++def z_rotate_mask : SDNode<"SystemZISD::ROTATE_MASK", SDT_ZRotateMask>; ++def z_replicate : SDNode<"SystemZISD::REPLICATE", SDT_ZReplicate>; ++def z_join_dwords : SDNode<"SystemZISD::JOIN_DWORDS", SDT_ZJoinDwords>; ++def z_splat : SDNode<"SystemZISD::SPLAT", SDT_ZVecBinaryInt>; ++def z_merge_high : SDNode<"SystemZISD::MERGE_HIGH", SDT_ZVecBinary>; ++def z_merge_low : SDNode<"SystemZISD::MERGE_LOW", SDT_ZVecBinary>; ++def z_shl_double : SDNode<"SystemZISD::SHL_DOUBLE", SDT_ZVecTernaryInt>; ++def z_permute_dwords : SDNode<"SystemZISD::PERMUTE_DWORDS", ++ SDT_ZVecTernaryInt>; ++def z_permute : SDNode<"SystemZISD::PERMUTE", SDT_ZVecTernary>; ++def z_pack : SDNode<"SystemZISD::PACK", SDT_ZVecBinaryConv>; ++def z_packs_cc : SDNode<"SystemZISD::PACKS_CC", SDT_ZVecBinaryConv, ++ [SDNPOutGlue]>; ++def z_packls_cc : SDNode<"SystemZISD::PACKLS_CC", SDT_ZVecBinaryConv, ++ [SDNPOutGlue]>; ++def z_unpack_high : SDNode<"SystemZISD::UNPACK_HIGH", SDT_ZVecUnaryConv>; ++def z_unpackl_high : SDNode<"SystemZISD::UNPACKL_HIGH", SDT_ZVecUnaryConv>; ++def z_unpack_low : SDNode<"SystemZISD::UNPACK_LOW", SDT_ZVecUnaryConv>; ++def z_unpackl_low : SDNode<"SystemZISD::UNPACKL_LOW", SDT_ZVecUnaryConv>; ++def z_vshl_by_scalar : SDNode<"SystemZISD::VSHL_BY_SCALAR", ++ SDT_ZVecBinaryInt>; ++def z_vsrl_by_scalar : SDNode<"SystemZISD::VSRL_BY_SCALAR", ++ SDT_ZVecBinaryInt>; ++def z_vsra_by_scalar : SDNode<"SystemZISD::VSRA_BY_SCALAR", ++ SDT_ZVecBinaryInt>; ++def z_vsum : SDNode<"SystemZISD::VSUM", SDT_ZVecBinaryConv>; ++def z_vicmpe : SDNode<"SystemZISD::VICMPE", SDT_ZVecBinary>; ++def z_vicmph : SDNode<"SystemZISD::VICMPH", SDT_ZVecBinary>; ++def z_vicmphl : SDNode<"SystemZISD::VICMPHL", SDT_ZVecBinary>; ++def z_vicmpes : SDNode<"SystemZISD::VICMPES", SDT_ZVecBinary, ++ [SDNPOutGlue]>; ++def z_vicmphs : SDNode<"SystemZISD::VICMPHS", SDT_ZVecBinary, ++ [SDNPOutGlue]>; ++def z_vicmphls : SDNode<"SystemZISD::VICMPHLS", SDT_ZVecBinary, ++ [SDNPOutGlue]>; ++def z_vfcmpe : SDNode<"SystemZISD::VFCMPE", SDT_ZVecBinaryConv>; ++def z_vfcmph : SDNode<"SystemZISD::VFCMPH", SDT_ZVecBinaryConv>; ++def z_vfcmphe : SDNode<"SystemZISD::VFCMPHE", SDT_ZVecBinaryConv>; ++def z_vfcmpes : SDNode<"SystemZISD::VFCMPES", SDT_ZVecBinaryConv, ++ [SDNPOutGlue]>; ++def z_vfcmphs : SDNode<"SystemZISD::VFCMPHS", SDT_ZVecBinaryConv, ++ [SDNPOutGlue]>; ++def z_vfcmphes : SDNode<"SystemZISD::VFCMPHES", SDT_ZVecBinaryConv, ++ [SDNPOutGlue]>; ++def z_vextend : SDNode<"SystemZISD::VEXTEND", SDT_ZVecUnaryConv>; ++def z_vround : SDNode<"SystemZISD::VROUND", SDT_ZVecUnaryConv>; ++def z_vtm : SDNode<"SystemZISD::VTM", SDT_ZCmp, [SDNPOutGlue]>; ++def z_vfae_cc : SDNode<"SystemZISD::VFAE_CC", SDT_ZVecTernaryInt, ++ [SDNPOutGlue]>; ++def z_vfaez_cc : SDNode<"SystemZISD::VFAEZ_CC", SDT_ZVecTernaryInt, ++ [SDNPOutGlue]>; ++def z_vfee_cc : SDNode<"SystemZISD::VFEE_CC", SDT_ZVecBinary, ++ [SDNPOutGlue]>; ++def z_vfeez_cc : SDNode<"SystemZISD::VFEEZ_CC", SDT_ZVecBinary, ++ [SDNPOutGlue]>; ++def z_vfene_cc : SDNode<"SystemZISD::VFENE_CC", SDT_ZVecBinary, ++ [SDNPOutGlue]>; ++def z_vfenez_cc : SDNode<"SystemZISD::VFENEZ_CC", SDT_ZVecBinary, ++ [SDNPOutGlue]>; ++def z_vistr_cc : SDNode<"SystemZISD::VISTR_CC", SDT_ZVecUnary, ++ [SDNPOutGlue]>; ++def z_vstrc_cc : SDNode<"SystemZISD::VSTRC_CC", SDT_ZVecQuaternaryInt, ++ [SDNPOutGlue]>; ++def z_vstrcz_cc : SDNode<"SystemZISD::VSTRCZ_CC", ++ SDT_ZVecQuaternaryInt, [SDNPOutGlue]>; ++def z_vftci : SDNode<"SystemZISD::VFTCI", SDT_ZVecBinaryConvInt, ++ [SDNPOutGlue]>; ++ + class AtomicWOp + : SDNode<"SystemZISD::"##name, profile, + [SDNPHasChain, SDNPMayStore, SDNPMayLoad, SDNPMemOperand]>; +@@ -172,6 +312,19 @@ def z_prefetch : SDNode<"System + [SDNPHasChain, SDNPMayLoad, SDNPMayStore, + SDNPMemOperand]>; + ++def z_tbegin : SDNode<"SystemZISD::TBEGIN", SDT_ZTBegin, ++ [SDNPHasChain, SDNPOutGlue, SDNPMayStore, ++ SDNPSideEffect]>; ++def z_tbegin_nofloat : SDNode<"SystemZISD::TBEGIN_NOFLOAT", SDT_ZTBegin, ++ [SDNPHasChain, SDNPOutGlue, SDNPMayStore, ++ SDNPSideEffect]>; ++def z_tend : SDNode<"SystemZISD::TEND", SDTNone, ++ [SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>; ++ ++def z_vshl : SDNode<"ISD::SHL", SDT_ZVecBinary>; ++def z_vsra : SDNode<"ISD::SRA", SDT_ZVecBinary>; ++def z_vsrl : SDNode<"ISD::SRL", SDT_ZVecBinary>; ++ + //===----------------------------------------------------------------------===// + // Pattern fragments + //===----------------------------------------------------------------------===// +@@ -195,11 +348,21 @@ def sext8 : PatFrag<(ops node:$src), (s + def sext16 : PatFrag<(ops node:$src), (sext_inreg node:$src, i16)>; + def sext32 : PatFrag<(ops node:$src), (sext (i32 node:$src))>; + ++// Match extensions of an i32 to an i64, followed by an in-register sign ++// extension from a sub-i32 value. ++def sext8dbl : PatFrag<(ops node:$src), (sext8 (anyext node:$src))>; ++def sext16dbl : PatFrag<(ops node:$src), (sext16 (anyext node:$src))>; ++ + // Register zero-extend operations. Sub-32-bit values are represented as i32s. + def zext8 : PatFrag<(ops node:$src), (and node:$src, 0xff)>; + def zext16 : PatFrag<(ops node:$src), (and node:$src, 0xffff)>; + def zext32 : PatFrag<(ops node:$src), (zext (i32 node:$src))>; + ++// Match extensions of an i32 to an i64, followed by an AND of the low ++// i8 or i16 part. ++def zext8dbl : PatFrag<(ops node:$src), (zext8 (anyext node:$src))>; ++def zext16dbl : PatFrag<(ops node:$src), (zext16 (anyext node:$src))>; ++ + // Typed floating-point loads. + def loadf32 : PatFrag<(ops node:$src), (f32 (load node:$src))>; + def loadf64 : PatFrag<(ops node:$src), (f64 (load node:$src))>; +@@ -363,6 +526,14 @@ def z_iabs64 : PatFrag<(ops node:$src), + def z_inegabs32 : PatFrag<(ops node:$src), (ineg (z_iabs32 node:$src))>; + def z_inegabs64 : PatFrag<(ops node:$src), (ineg (z_iabs64 node:$src))>; + ++// Integer multiply-and-add ++def z_muladd : PatFrag<(ops node:$src1, node:$src2, node:$src3), ++ (add (mul node:$src1, node:$src2), node:$src3)>; ++ ++// Fused multiply-subtract, using the natural operand order. ++def fms : PatFrag<(ops node:$src1, node:$src2, node:$src3), ++ (fma node:$src1, node:$src2, (fneg node:$src3))>; ++ + // Fused multiply-add and multiply-subtract, but with the order of the + // operands matching SystemZ's MA and MS instructions. + def z_fma : PatFrag<(ops node:$src1, node:$src2, node:$src3), +@@ -383,3 +554,110 @@ class loadu + : PatFrag<(ops node:$value, node:$addr), + (store (operator node:$value), node:$addr)>; ++ ++// Vector representation of all-zeros and all-ones. ++def z_vzero : PatFrag<(ops), (bitconvert (v16i8 (z_byte_mask (i32 0))))>; ++def z_vones : PatFrag<(ops), (bitconvert (v16i8 (z_byte_mask (i32 65535))))>; ++ ++// Load a scalar and replicate it in all elements of a vector. ++class z_replicate_load ++ : PatFrag<(ops node:$addr), ++ (z_replicate (scalartype (load node:$addr)))>; ++def z_replicate_loadi8 : z_replicate_load; ++def z_replicate_loadi16 : z_replicate_load; ++def z_replicate_loadi32 : z_replicate_load; ++def z_replicate_loadi64 : z_replicate_load; ++def z_replicate_loadf32 : z_replicate_load; ++def z_replicate_loadf64 : z_replicate_load; ++ ++// Load a scalar and insert it into a single element of a vector. ++class z_vle ++ : PatFrag<(ops node:$vec, node:$addr, node:$index), ++ (z_vector_insert node:$vec, (scalartype (load node:$addr)), ++ node:$index)>; ++def z_vlei8 : z_vle; ++def z_vlei16 : z_vle; ++def z_vlei32 : z_vle; ++def z_vlei64 : z_vle; ++def z_vlef32 : z_vle; ++def z_vlef64 : z_vle; ++ ++// Load a scalar and insert it into the low element of the high i64 of a ++// zeroed vector. ++class z_vllez ++ : PatFrag<(ops node:$addr), ++ (z_vector_insert (z_vzero), ++ (scalartype (load node:$addr)), (i32 index))>; ++def z_vllezi8 : z_vllez; ++def z_vllezi16 : z_vllez; ++def z_vllezi32 : z_vllez; ++def z_vllezi64 : PatFrag<(ops node:$addr), ++ (z_join_dwords (i64 (load node:$addr)), (i64 0))>; ++// We use high merges to form a v4f32 from four f32s. Propagating zero ++// into all elements but index 1 gives this expression. ++def z_vllezf32 : PatFrag<(ops node:$addr), ++ (bitconvert ++ (z_merge_high ++ (v2i64 ++ (z_unpackl_high ++ (v4i32 ++ (bitconvert ++ (v4f32 (scalar_to_vector ++ (f32 (load node:$addr)))))))), ++ (v2i64 (z_vzero))))>; ++def z_vllezf64 : PatFrag<(ops node:$addr), ++ (z_merge_high ++ (scalar_to_vector (f64 (load node:$addr))), ++ (z_vzero))>; ++ ++// Store one element of a vector. ++class z_vste ++ : PatFrag<(ops node:$vec, node:$addr, node:$index), ++ (store (scalartype (z_vector_extract node:$vec, node:$index)), ++ node:$addr)>; ++def z_vstei8 : z_vste; ++def z_vstei16 : z_vste; ++def z_vstei32 : z_vste; ++def z_vstei64 : z_vste; ++def z_vstef32 : z_vste; ++def z_vstef64 : z_vste; ++ ++// Arithmetic negation on vectors. ++def z_vneg : PatFrag<(ops node:$x), (sub (z_vzero), node:$x)>; ++ ++// Bitwise negation on vectors. ++def z_vnot : PatFrag<(ops node:$x), (xor node:$x, (z_vones))>; ++ ++// Signed "integer greater than zero" on vectors. ++def z_vicmph_zero : PatFrag<(ops node:$x), (z_vicmph node:$x, (z_vzero))>; ++ ++// Signed "integer less than zero" on vectors. ++def z_vicmpl_zero : PatFrag<(ops node:$x), (z_vicmph (z_vzero), node:$x)>; ++ ++// Integer absolute on vectors. ++class z_viabs ++ : PatFrag<(ops node:$src), ++ (xor (add node:$src, (z_vsra_by_scalar node:$src, (i32 shift))), ++ (z_vsra_by_scalar node:$src, (i32 shift)))>; ++def z_viabs8 : z_viabs<7>; ++def z_viabs16 : z_viabs<15>; ++def z_viabs32 : z_viabs<31>; ++def z_viabs64 : z_viabs<63>; ++ ++// Sign-extend the i64 elements of a vector. ++class z_vse ++ : PatFrag<(ops node:$src), ++ (z_vsra_by_scalar (z_vshl_by_scalar node:$src, shift), shift)>; ++def z_vsei8 : z_vse<56>; ++def z_vsei16 : z_vse<48>; ++def z_vsei32 : z_vse<32>; ++ ++// ...and again with the extensions being done on individual i64 scalars. ++class z_vse_by_parts ++ : PatFrag<(ops node:$src), ++ (z_join_dwords ++ (operator (z_vector_extract node:$src, index1)), ++ (operator (z_vector_extract node:$src, index2)))>; ++def z_vsei8_by_parts : z_vse_by_parts; ++def z_vsei16_by_parts : z_vse_by_parts; ++def z_vsei32_by_parts : z_vse_by_parts; +Index: llvm-36/lib/Target/SystemZ/SystemZPatterns.td +=================================================================== +--- llvm-36.orig/lib/Target/SystemZ/SystemZPatterns.td ++++ llvm-36/lib/Target/SystemZ/SystemZPatterns.td +@@ -153,3 +153,17 @@ multiclass CompareZeroFP; + } ++ ++// Use INSN for performing binary operation OPERATION of type VT ++// on registers of class CLS. ++class BinaryRRWithType ++ : Pat<(vt (operator cls:$x, cls:$y)), (insn cls:$x, cls:$y)>; ++ ++// Use INSN to perform conversion operation OPERATOR, with the input being ++// TR2 and the output being TR1. SUPPRESS is 4 to suppress inexact conditions ++// and 0 to allow them. MODE is the rounding mode to use. ++class FPConversion suppress, bits<4> mode> ++ : Pat<(tr1.vt (operator (tr2.vt tr2.op:$vec))), ++ (insn tr2.op:$vec, suppress, mode)>; +Index: llvm-36/lib/Target/SystemZ/SystemZProcessors.td +=================================================================== +--- llvm-36.orig/lib/Target/SystemZ/SystemZProcessors.td ++++ llvm-36/lib/Target/SystemZ/SystemZProcessors.td +@@ -39,6 +39,11 @@ def FeatureFPExtension : SystemZFeature< + "Assume that the floating-point extension facility is installed" + >; + ++def FeaturePopulationCount : SystemZFeature< ++ "population-count", "PopulationCount", ++ "Assume that the population-count facility is installed" ++>; ++ + def FeatureFastSerialization : SystemZFeature< + "fast-serialization", "FastSerialization", + "Assume that the fast-serialization facility is installed" +@@ -50,13 +55,42 @@ def FeatureInterlockedAccess1 : SystemZF + >; + def FeatureNoInterlockedAccess1 : SystemZMissingFeature<"InterlockedAccess1">; + ++def FeatureMiscellaneousExtensions : SystemZFeature< ++ "miscellaneous-extensions", "MiscellaneousExtensions", ++ "Assume that the miscellaneous-extensions facility is installed" ++>; ++ ++def FeatureTransactionalExecution : SystemZFeature< ++ "transactional-execution", "TransactionalExecution", ++ "Assume that the transactional-execution facility is installed" ++>; ++ ++def FeatureProcessorAssist : SystemZFeature< ++ "processor-assist", "ProcessorAssist", ++ "Assume that the processor-assist facility is installed" ++>; ++ ++def FeatureVector : SystemZFeature< ++ "vector", "Vector", ++ "Assume that the vectory facility is installed" ++>; ++def FeatureNoVector : SystemZMissingFeature<"Vector">; ++ + def : Processor<"generic", NoItineraries, []>; + def : Processor<"z10", NoItineraries, []>; + def : Processor<"z196", NoItineraries, + [FeatureDistinctOps, FeatureLoadStoreOnCond, FeatureHighWord, +- FeatureFPExtension, FeatureFastSerialization, +- FeatureInterlockedAccess1]>; ++ FeatureFPExtension, FeaturePopulationCount, ++ FeatureFastSerialization, FeatureInterlockedAccess1]>; + def : Processor<"zEC12", NoItineraries, + [FeatureDistinctOps, FeatureLoadStoreOnCond, FeatureHighWord, +- FeatureFPExtension, FeatureFastSerialization, +- FeatureInterlockedAccess1]>; ++ FeatureFPExtension, FeaturePopulationCount, ++ FeatureFastSerialization, FeatureInterlockedAccess1, ++ FeatureMiscellaneousExtensions, ++ FeatureTransactionalExecution, FeatureProcessorAssist]>; ++def : Processor<"z13", NoItineraries, ++ [FeatureDistinctOps, FeatureLoadStoreOnCond, FeatureHighWord, ++ FeatureFPExtension, FeaturePopulationCount, ++ FeatureFastSerialization, FeatureInterlockedAccess1, ++ FeatureTransactionalExecution, FeatureProcessorAssist, ++ FeatureVector]>; +Index: llvm-36/lib/Target/SystemZ/SystemZRegisterInfo.td +=================================================================== +--- llvm-36.orig/lib/Target/SystemZ/SystemZRegisterInfo.td ++++ llvm-36/lib/Target/SystemZ/SystemZRegisterInfo.td +@@ -25,20 +25,24 @@ def subreg_l32 : SubRegIndex<32, 0>; + def subreg_h32 : SubRegIndex<32, 32>; // Also acts as subreg_lh32. + def subreg_l64 : SubRegIndex<64, 0>; + def subreg_h64 : SubRegIndex<64, 64>; ++def subreg_r32 : SubRegIndex<32, 32>; // Reinterpret a wider reg as 32 bits. ++def subreg_r64 : SubRegIndex<64, 64>; // Reinterpret a wider reg as 64 bits. + def subreg_hh32 : ComposedSubRegIndex; + def subreg_hl32 : ComposedSubRegIndex; ++def subreg_hr32 : ComposedSubRegIndex; + } + +-// Define a register class that contains values of type TYPE and an ++// Define a register class that contains values of types TYPES and an + // associated operand called NAME. SIZE is the size and alignment + // of the registers and REGLIST is the list of individual registers. +-multiclass SystemZRegClass { ++multiclass SystemZRegClass types, int size, ++ dag regList> { + def AsmOperand : AsmOperandClass { + let Name = name; + let ParserMethod = "parse"##name; + let RenderMethod = "addRegOperands"; + } +- def Bit : RegisterClass<"SystemZ", [type], size, regList> { ++ def Bit : RegisterClass<"SystemZ", types, size, regList> { + let Size = size; + } + def "" : RegisterOperand(name##"Bit")> { +@@ -84,16 +88,19 @@ foreach I = [0, 2, 4, 6, 8, 10, 12, 14] + + /// Allocate the callee-saved R6-R13 backwards. That way they can be saved + /// together with R14 and R15 in one prolog instruction. +-defm GR32 : SystemZRegClass<"GR32", i32, 32, (add (sequence "R%uL", 0, 5), +- (sequence "R%uL", 15, 6))>; +-defm GRH32 : SystemZRegClass<"GRH32", i32, 32, (add (sequence "R%uH", 0, 5), +- (sequence "R%uH", 15, 6))>; +-defm GR64 : SystemZRegClass<"GR64", i64, 64, (add (sequence "R%uD", 0, 5), +- (sequence "R%uD", 15, 6))>; ++defm GR32 : SystemZRegClass<"GR32", [i32], 32, ++ (add (sequence "R%uL", 0, 5), ++ (sequence "R%uL", 15, 6))>; ++defm GRH32 : SystemZRegClass<"GRH32", [i32], 32, ++ (add (sequence "R%uH", 0, 5), ++ (sequence "R%uH", 15, 6))>; ++defm GR64 : SystemZRegClass<"GR64", [i64], 64, ++ (add (sequence "R%uD", 0, 5), ++ (sequence "R%uD", 15, 6))>; + + // Combine the low and high GR32s into a single class. This can only be + // used for virtual registers if the high-word facility is available. +-defm GRX32 : SystemZRegClass<"GRX32", i32, 32, ++defm GRX32 : SystemZRegClass<"GRX32", [i32], 32, + (add (sequence "R%uL", 0, 5), + (sequence "R%uH", 0, 5), + R15L, R15H, R14L, R14H, R13L, R13H, +@@ -102,18 +109,17 @@ defm GRX32 : SystemZRegClass<"GRX32", i3 + + // The architecture doesn't really have any i128 support, so model the + // register pairs as untyped instead. +-defm GR128 : SystemZRegClass<"GR128", untyped, 128, (add R0Q, R2Q, R4Q, +- R12Q, R10Q, R8Q, R6Q, +- R14Q)>; ++defm GR128 : SystemZRegClass<"GR128", [untyped], 128, ++ (add R0Q, R2Q, R4Q, R12Q, R10Q, R8Q, R6Q, R14Q)>; + + // Base and index registers. Everything except R0, which in an address + // context evaluates as 0. +-defm ADDR32 : SystemZRegClass<"ADDR32", i32, 32, (sub GR32Bit, R0L)>; +-defm ADDR64 : SystemZRegClass<"ADDR64", i64, 64, (sub GR64Bit, R0D)>; ++defm ADDR32 : SystemZRegClass<"ADDR32", [i32], 32, (sub GR32Bit, R0L)>; ++defm ADDR64 : SystemZRegClass<"ADDR64", [i64], 64, (sub GR64Bit, R0D)>; + + // Not used directly, but needs to exist for ADDR32 and ADDR64 subregs + // of a GR128. +-defm ADDR128 : SystemZRegClass<"ADDR128", untyped, 128, (sub GR128Bit, R0Q)>; ++defm ADDR128 : SystemZRegClass<"ADDR128", [untyped], 128, (sub GR128Bit, R0Q)>; + + //===----------------------------------------------------------------------===// + // Floating-point registers +@@ -142,16 +148,36 @@ def F11Dwarf : DwarfMapping<29>; + def F13Dwarf : DwarfMapping<30>; + def F15Dwarf : DwarfMapping<31>; + +-// Lower 32 bits of one of the 16 64-bit floating-point registers ++def F16Dwarf : DwarfMapping<68>; ++def F18Dwarf : DwarfMapping<69>; ++def F20Dwarf : DwarfMapping<70>; ++def F22Dwarf : DwarfMapping<71>; ++ ++def F17Dwarf : DwarfMapping<72>; ++def F19Dwarf : DwarfMapping<73>; ++def F21Dwarf : DwarfMapping<74>; ++def F23Dwarf : DwarfMapping<75>; ++ ++def F24Dwarf : DwarfMapping<76>; ++def F26Dwarf : DwarfMapping<77>; ++def F28Dwarf : DwarfMapping<78>; ++def F30Dwarf : DwarfMapping<79>; ++ ++def F25Dwarf : DwarfMapping<80>; ++def F27Dwarf : DwarfMapping<81>; ++def F29Dwarf : DwarfMapping<82>; ++def F31Dwarf : DwarfMapping<83>; ++ ++// Upper 32 bits of one of the floating-point registers + class FPR32 num, string n> : SystemZReg { + let HWEncoding = num; + } + +-// One of the 16 64-bit floating-point registers +-class FPR64 num, string n, FPR32 low> +- : SystemZRegWithSubregs { ++// One of the floating-point registers. ++class FPR64 num, string n, FPR32 high> ++ : SystemZRegWithSubregs { + let HWEncoding = num; +- let SubRegIndices = [subreg_h32]; ++ let SubRegIndices = [subreg_r32]; + } + + // 8 pairs of FPR64s, with a one-register gap inbetween. +@@ -161,12 +187,17 @@ class FPR128 num, string n, FPR + let SubRegIndices = [subreg_l64, subreg_h64]; + } + +-// Floating-point registers ++// Floating-point registers. Registers 16-31 require the vector facility. + foreach I = 0-15 in { + def F#I#S : FPR32; + def F#I#D : FPR64("F"#I#"S")>, + DwarfRegNum<[!cast("F"#I#"Dwarf").Id]>; + } ++foreach I = 16-31 in { ++ def F#I#S : FPR32; ++ def F#I#D : FPR64("F"#I#"S")>, ++ DwarfRegNum<[!cast("F"#I#"Dwarf").Id]>; ++} + + foreach I = [0, 1, 4, 5, 8, 9, 12, 13] in { + def F#I#Q : FPR128("F"#!add(I, 2)#"D"), +@@ -175,10 +206,74 @@ foreach I = [0, 1, 4, 5, 8, 9, 12, 13] i + + // There's no store-multiple instruction for FPRs, so we're not fussy + // about the order in which call-saved registers are allocated. +-defm FP32 : SystemZRegClass<"FP32", f32, 32, (sequence "F%uS", 0, 15)>; +-defm FP64 : SystemZRegClass<"FP64", f64, 64, (sequence "F%uD", 0, 15)>; +-defm FP128 : SystemZRegClass<"FP128", f128, 128, (add F0Q, F1Q, F4Q, F5Q, +- F8Q, F9Q, F12Q, F13Q)>; ++defm FP32 : SystemZRegClass<"FP32", [f32], 32, (sequence "F%uS", 0, 15)>; ++defm FP64 : SystemZRegClass<"FP64", [f64], 64, (sequence "F%uD", 0, 15)>; ++defm FP128 : SystemZRegClass<"FP128", [f128], 128, ++ (add F0Q, F1Q, F4Q, F5Q, F8Q, F9Q, F12Q, F13Q)>; ++ ++//===----------------------------------------------------------------------===// ++// Vector registers ++//===----------------------------------------------------------------------===// ++ ++// A full 128-bit vector register, with an FPR64 as its high part. ++class VR128 num, string n, FPR64 high> ++ : SystemZRegWithSubregs { ++ let HWEncoding = num; ++ let SubRegIndices = [subreg_r64]; ++} ++ ++// Full vector registers. ++foreach I = 0-31 in { ++ def V#I : VR128("F"#I#"D")>, ++ DwarfRegNum<[!cast("F"#I#"Dwarf").Id]>; ++} ++ ++// Class used to store 32-bit values in the first element of a vector ++// register. f32 scalars are used for the WLEDB and WLDEB instructions. ++defm VR32 : SystemZRegClass<"VR32", [f32, v4i8, v2i16], 32, ++ (add (sequence "F%uS", 0, 7), ++ (sequence "F%uS", 16, 31), ++ (sequence "F%uS", 8, 15))>; ++ ++// Class used to store 64-bit values in the upper half of a vector register. ++// The vector facility also includes scalar f64 instructions that operate ++// on the full vector register set. ++defm VR64 : SystemZRegClass<"VR64", [f64, v8i8, v4i16, v2i32, v2f32], 64, ++ (add (sequence "F%uD", 0, 7), ++ (sequence "F%uD", 16, 31), ++ (sequence "F%uD", 8, 15))>; ++ ++// The subset of vector registers that can be used for floating-point ++// operations too. ++defm VF128 : SystemZRegClass<"VF128", ++ [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], 128, ++ (sequence "V%u", 0, 15)>; ++ ++// All vector registers. ++defm VR128 : SystemZRegClass<"VR128", ++ [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], 128, ++ (add (sequence "V%u", 0, 7), ++ (sequence "V%u", 16, 31), ++ (sequence "V%u", 8, 15))>; ++ ++// Attaches a ValueType to a register operand, to make the instruction ++// definitions easier. ++class TypedReg { ++ ValueType vt = vtin; ++ RegisterOperand op = opin; ++} ++ ++def v32eb : TypedReg; ++def v64g : TypedReg; ++def v64db : TypedReg; ++def v128b : TypedReg; ++def v128h : TypedReg; ++def v128f : TypedReg; ++def v128g : TypedReg; ++def v128q : TypedReg; ++def v128eb : TypedReg; ++def v128db : TypedReg; ++def v128any : TypedReg; + + //===----------------------------------------------------------------------===// + // Other registers +Index: llvm-36/lib/Target/SystemZ/SystemZShortenInst.cpp +=================================================================== +--- llvm-36.orig/lib/Target/SystemZ/SystemZShortenInst.cpp ++++ llvm-36/lib/Target/SystemZ/SystemZShortenInst.cpp +@@ -15,6 +15,7 @@ + + #include "SystemZTargetMachine.h" + #include "llvm/CodeGen/MachineFunctionPass.h" ++#include "llvm/CodeGen/MachineInstrBuilder.h" + + using namespace llvm; + +@@ -36,6 +37,10 @@ public: + private: + bool shortenIIF(MachineInstr &MI, unsigned *GPRMap, unsigned LiveOther, + unsigned LLIxL, unsigned LLIxH); ++ bool shortenOn0(MachineInstr &MI, unsigned Opcode); ++ bool shortenOn01(MachineInstr &MI, unsigned Opcode); ++ bool shortenOn001(MachineInstr &MI, unsigned Opcode); ++ bool shortenFPConv(MachineInstr &MI, unsigned Opcode); + + const SystemZInstrInfo *TII; + +@@ -97,6 +102,64 @@ bool SystemZShortenInst::shortenIIF(Mach + return false; + } + ++// Change MI's opcode to Opcode if register operand 0 has a 4-bit encoding. ++bool SystemZShortenInst::shortenOn0(MachineInstr &MI, unsigned Opcode) { ++ if (SystemZMC::getFirstReg(MI.getOperand(0).getReg()) < 16) { ++ MI.setDesc(TII->get(Opcode)); ++ return true; ++ } ++ return false; ++} ++ ++// Change MI's opcode to Opcode if register operands 0 and 1 have a ++// 4-bit encoding. ++bool SystemZShortenInst::shortenOn01(MachineInstr &MI, unsigned Opcode) { ++ if (SystemZMC::getFirstReg(MI.getOperand(0).getReg()) < 16 && ++ SystemZMC::getFirstReg(MI.getOperand(1).getReg()) < 16) { ++ MI.setDesc(TII->get(Opcode)); ++ return true; ++ } ++ return false; ++} ++ ++// Change MI's opcode to Opcode if register operands 0, 1 and 2 have a ++// 4-bit encoding and if operands 0 and 1 are tied. ++bool SystemZShortenInst::shortenOn001(MachineInstr &MI, unsigned Opcode) { ++ if (SystemZMC::getFirstReg(MI.getOperand(0).getReg()) < 16 && ++ MI.getOperand(1).getReg() == MI.getOperand(0).getReg() && ++ SystemZMC::getFirstReg(MI.getOperand(2).getReg()) < 16) { ++ MI.setDesc(TII->get(Opcode)); ++ return true; ++ } ++ return false; ++} ++ ++// MI is a vector-style conversion instruction with the operand order: ++// destination, source, exact-suppress, rounding-mode. If both registers ++// have a 4-bit encoding then change it to Opcode, which has operand order: ++// destination, rouding-mode, source, exact-suppress. ++bool SystemZShortenInst::shortenFPConv(MachineInstr &MI, unsigned Opcode) { ++ if (SystemZMC::getFirstReg(MI.getOperand(0).getReg()) < 16 && ++ SystemZMC::getFirstReg(MI.getOperand(1).getReg()) < 16) { ++ MachineOperand Dest(MI.getOperand(0)); ++ MachineOperand Src(MI.getOperand(1)); ++ MachineOperand Suppress(MI.getOperand(2)); ++ MachineOperand Mode(MI.getOperand(3)); ++ MI.RemoveOperand(3); ++ MI.RemoveOperand(2); ++ MI.RemoveOperand(1); ++ MI.RemoveOperand(0); ++ MI.setDesc(TII->get(Opcode)); ++ MachineInstrBuilder(*MI.getParent()->getParent(), &MI) ++ .addOperand(Dest) ++ .addOperand(Mode) ++ .addOperand(Src) ++ .addOperand(Suppress); ++ return true; ++ } ++ return false; ++} ++ + // Process all instructions in MBB. Return true if something changed. + bool SystemZShortenInst::processBlock(MachineBasicBlock &MBB) { + bool Changed = false; +@@ -117,13 +180,83 @@ bool SystemZShortenInst::processBlock(Ma + // Iterate backwards through the block looking for instructions to change. + for (auto MBBI = MBB.rbegin(), MBBE = MBB.rend(); MBBI != MBBE; ++MBBI) { + MachineInstr &MI = *MBBI; +- unsigned Opcode = MI.getOpcode(); +- if (Opcode == SystemZ::IILF) ++ switch (MI.getOpcode()) { ++ case SystemZ::IILF: + Changed |= shortenIIF(MI, LowGPRs, LiveHigh, SystemZ::LLILL, + SystemZ::LLILH); +- else if (Opcode == SystemZ::IIHF) ++ break; ++ ++ case SystemZ::IIHF: + Changed |= shortenIIF(MI, HighGPRs, LiveLow, SystemZ::LLIHL, + SystemZ::LLIHH); ++ break; ++ ++ case SystemZ::WFADB: ++ Changed |= shortenOn001(MI, SystemZ::ADBR); ++ break; ++ ++ case SystemZ::WFDDB: ++ Changed |= shortenOn001(MI, SystemZ::DDBR); ++ break; ++ ++ case SystemZ::WFIDB: ++ Changed |= shortenFPConv(MI, SystemZ::FIDBRA); ++ break; ++ ++ case SystemZ::WLDEB: ++ Changed |= shortenOn01(MI, SystemZ::LDEBR); ++ break; ++ ++ case SystemZ::WLEDB: ++ Changed |= shortenFPConv(MI, SystemZ::LEDBRA); ++ break; ++ ++ case SystemZ::WFMDB: ++ Changed |= shortenOn001(MI, SystemZ::MDBR); ++ break; ++ ++ case SystemZ::WFLCDB: ++ Changed |= shortenOn01(MI, SystemZ::LCDBR); ++ break; ++ ++ case SystemZ::WFLNDB: ++ Changed |= shortenOn01(MI, SystemZ::LNDBR); ++ break; ++ ++ case SystemZ::WFLPDB: ++ Changed |= shortenOn01(MI, SystemZ::LPDBR); ++ break; ++ ++ case SystemZ::WFSQDB: ++ Changed |= shortenOn01(MI, SystemZ::SQDBR); ++ break; ++ ++ case SystemZ::WFSDB: ++ Changed |= shortenOn001(MI, SystemZ::SDBR); ++ break; ++ ++ case SystemZ::WFCDB: ++ Changed |= shortenOn01(MI, SystemZ::CDBR); ++ break; ++ ++ case SystemZ::VL32: ++ // For z13 we prefer LDE over LE to avoid partial register dependencies. ++ Changed |= shortenOn0(MI, SystemZ::LDE32); ++ break; ++ ++ case SystemZ::VST32: ++ Changed |= shortenOn0(MI, SystemZ::STE); ++ break; ++ ++ case SystemZ::VL64: ++ Changed |= shortenOn0(MI, SystemZ::LD); ++ break; ++ ++ case SystemZ::VST64: ++ Changed |= shortenOn0(MI, SystemZ::STD); ++ break; ++ } ++ + unsigned UsedLow = 0; + unsigned UsedHigh = 0; + for (auto MOI = MI.operands_begin(), MOE = MI.operands_end(); +Index: llvm-36/lib/Target/SystemZ/SystemZSubtarget.cpp +=================================================================== +--- llvm-36.orig/lib/Target/SystemZ/SystemZSubtarget.cpp ++++ llvm-36/lib/Target/SystemZ/SystemZSubtarget.cpp +@@ -10,7 +10,6 @@ + #include "SystemZSubtarget.h" + #include "MCTargetDesc/SystemZMCTargetDesc.h" + #include "llvm/IR/GlobalValue.h" +-#include "llvm/Support/Host.h" + + using namespace llvm; + +@@ -23,15 +22,69 @@ using namespace llvm; + // Pin the vtable to this file. + void SystemZSubtarget::anchor() {} + ++// Determine whether we use the vector ABI. ++static bool UsesVectorABI(StringRef CPU, StringRef FS) { ++ // We use the vector ABI whenever the vector facility is avaiable. ++ // This is the case by default if CPU is z13 or later, and can be ++ // overridden via "[+-]vector" feature string elements. ++ bool VectorABI = true; ++ if (CPU.empty() || CPU == "generic" || ++ CPU == "z10" || CPU == "z196" || CPU == "zEC12") ++ VectorABI = false; ++ ++ SmallVector Features; ++ FS.split(Features, ",", -1, false /* KeepEmpty */); ++ for (auto &Feature : Features) { ++ if (Feature == "vector" || Feature == "+vector") ++ VectorABI = true; ++ if (Feature == "-vector") ++ VectorABI = false; ++ } ++ ++ return VectorABI; ++} ++ ++static std::string computeDataLayout(StringRef TT, StringRef CPU, ++ StringRef FS) { ++ const Triple Triple(TT); ++ bool VectorABI = UsesVectorABI(CPU, FS); ++ std::string Ret = ""; ++ ++ // Big endian. ++ Ret += "E"; ++ ++ // Data mangling. ++ Ret += DataLayout::getManglingComponent(Triple); ++ ++ // Make sure that global data has at least 16 bits of alignment by ++ // default, so that we can refer to it using LARL. We don't have any ++ // special requirements for stack variables though. ++ Ret += "-i1:8:16-i8:8:16"; ++ ++ // 64-bit integers are naturally aligned. ++ Ret += "-i64:64"; ++ ++ // 128-bit floats are aligned only to 64 bits. ++ Ret += "-f128:64"; ++ ++ // When using the vector ABI, 128-bit vectors are also aligned to 64 bits. ++ if (VectorABI) ++ Ret += "-v128:64"; ++ ++ // We prefer 16 bits of aligned for all globals; see above. ++ Ret += "-a:8:16"; ++ ++ // Integer registers are 32 or 64 bits. ++ Ret += "-n32:64"; ++ ++ return Ret; ++} ++ + SystemZSubtarget & + SystemZSubtarget::initializeSubtargetDependencies(StringRef CPU, StringRef FS) { + std::string CPUName = CPU; + if (CPUName.empty()) + CPUName = "generic"; +-#if defined(__linux__) && defined(__s390x__) +- if (CPUName == "generic") +- CPUName = sys::getHostCPUName(); +-#endif + // Parse features string. + ParseSubtargetFeatures(CPUName, FS); + return *this; +@@ -43,12 +96,12 @@ SystemZSubtarget::SystemZSubtarget(const + const TargetMachine &TM) + : SystemZGenSubtargetInfo(TT, CPU, FS), HasDistinctOps(false), + HasLoadStoreOnCond(false), HasHighWord(false), HasFPExtension(false), +- HasFastSerialization(false), HasInterlockedAccess1(false), ++ HasPopulationCount(false), HasFastSerialization(false), ++ HasInterlockedAccess1(false), HasMiscellaneousExtensions(false), ++ HasTransactionalExecution(false), HasProcessorAssist(false), ++ HasVector(false), + TargetTriple(TT), +- // Make sure that global data has at least 16 bits of alignment by +- // default, so that we can refer to it using LARL. We don't have any +- // special requirements for stack variables though. +- DL("E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-a:8:16-n32:64"), ++ DL(computeDataLayout(TT, CPU, FS)), + InstrInfo(initializeSubtargetDependencies(CPU, FS)), TLInfo(TM), + TSInfo(DL), FrameLowering() {} + +Index: llvm-36/lib/Target/SystemZ/SystemZSubtarget.h +=================================================================== +--- llvm-36.orig/lib/Target/SystemZ/SystemZSubtarget.h ++++ llvm-36/lib/Target/SystemZ/SystemZSubtarget.h +@@ -38,8 +38,13 @@ protected: + bool HasLoadStoreOnCond; + bool HasHighWord; + bool HasFPExtension; ++ bool HasPopulationCount; + bool HasFastSerialization; + bool HasInterlockedAccess1; ++ bool HasMiscellaneousExtensions; ++ bool HasTransactionalExecution; ++ bool HasProcessorAssist; ++ bool HasVector; + + private: + Triple TargetTriple; +@@ -88,12 +93,29 @@ public: + // Return true if the target has the floating-point extension facility. + bool hasFPExtension() const { return HasFPExtension; } + ++ // Return true if the target has the population-count facility. ++ bool hasPopulationCount() const { return HasPopulationCount; } ++ + // Return true if the target has the fast-serialization facility. + bool hasFastSerialization() const { return HasFastSerialization; } + + // Return true if the target has interlocked-access facility 1. + bool hasInterlockedAccess1() const { return HasInterlockedAccess1; } + ++ // Return true if the target has the miscellaneous-extensions facility. ++ bool hasMiscellaneousExtensions() const { ++ return HasMiscellaneousExtensions; ++ } ++ ++ // Return true if the target has the transactional-execution facility. ++ bool hasTransactionalExecution() const { return HasTransactionalExecution; } ++ ++ // Return true if the target has the processor-assist facility. ++ bool hasProcessorAssist() const { return HasProcessorAssist; } ++ ++ // Return true if the target has the vector facility. ++ bool hasVector() const { return HasVector; } ++ + // Return true if GV can be accessed using LARL for reloc model RM + // and code model CM. + bool isPC32DBLSymbol(const GlobalValue *GV, Reloc::Model RM, +Index: llvm-36/lib/Target/SystemZ/SystemZTargetMachine.cpp +=================================================================== +--- llvm-36.orig/lib/Target/SystemZ/SystemZTargetMachine.cpp ++++ llvm-36/lib/Target/SystemZ/SystemZTargetMachine.cpp +@@ -9,6 +9,7 @@ + + #include "SystemZTargetMachine.h" + #include "llvm/CodeGen/Passes.h" ++#include "llvm/PassManager.h" + #include "llvm/Support/TargetRegistry.h" + #include "llvm/Transforms/Scalar.h" + #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" +@@ -57,6 +58,10 @@ void SystemZPassConfig::addIRPasses() { + + bool SystemZPassConfig::addInstSelector() { + addPass(createSystemZISelDag(getSystemZTargetMachine(), getOptLevel())); ++ ++ if (getOptLevel() != CodeGenOpt::None) ++ addPass(createSystemZLDCleanupPass(getSystemZTargetMachine())); ++ + return false; + } + +@@ -100,3 +105,12 @@ void SystemZPassConfig::addPreEmitPass() + TargetPassConfig *SystemZTargetMachine::createPassConfig(PassManagerBase &PM) { + return new SystemZPassConfig(this, PM); + } ++ ++void SystemZTargetMachine::addAnalysisPasses(PassManagerBase &PM) { ++ // Add first the target-independent BasicTTI pass, then our SystemZ pass. ++ // This allows the SystemZ pass to delegate to the target independent layer ++ // when appropriate. ++ PM.add(createBasicTargetTransformInfoPass(this)); ++ PM.add(createSystemZTargetTransformInfoPass(this)); ++} ++ +Index: llvm-36/lib/Target/SystemZ/SystemZTargetMachine.h +=================================================================== +--- llvm-36.orig/lib/Target/SystemZ/SystemZTargetMachine.h ++++ llvm-36/lib/Target/SystemZ/SystemZTargetMachine.h +@@ -39,6 +39,7 @@ public: + } + // Override LLVMTargetMachine + TargetPassConfig *createPassConfig(PassManagerBase &PM) override; ++ void addAnalysisPasses(PassManagerBase &PM) override; + TargetLoweringObjectFile *getObjFileLowering() const override { + return TLOF.get(); + } +Index: llvm-36/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp +=================================================================== +--- /dev/null ++++ llvm-36/lib/Target/SystemZ/SystemZTargetTransformInfo.cpp +@@ -0,0 +1,334 @@ ++//===-- SystemZTargetTransformInfo.cpp - SystemZ-specific TTI -------------===// ++// ++// The LLVM Compiler Infrastructure ++// ++// This file is distributed under the University of Illinois Open Source ++// License. See LICENSE.TXT for details. ++// ++//===----------------------------------------------------------------------===// ++// ++// This file implements a TargetTransformInfo analysis pass specific to the ++// SystemZ target machine. It uses the target's detailed information to provide ++// more precise answers to certain TTI queries, while letting the target ++// independent and default TTI implementations handle the rest. ++// ++//===----------------------------------------------------------------------===// ++ ++#include "SystemZTargetMachine.h" ++#include "llvm/Analysis/TargetTransformInfo.h" ++#include "llvm/IR/IntrinsicInst.h" ++#include "llvm/Support/Debug.h" ++#include "llvm/Target/CostTable.h" ++#include "llvm/Target/TargetLowering.h" ++using namespace llvm; ++ ++#define DEBUG_TYPE "systemztti" ++ ++// Declare the pass initialization routine locally as target-specific passes ++// don't have a target-wide initialization entry point, and so we rely on the ++// pass constructor initialization. ++namespace llvm { ++void initializeSystemZTTIPass(PassRegistry &); ++} ++ ++namespace { ++ ++class SystemZTTI : public ImmutablePass, public TargetTransformInfo { ++ const SystemZSubtarget *ST; ++ const SystemZTargetLowering *TLI; ++ ++public: ++ SystemZTTI() : ImmutablePass(ID), ST(0), TLI(0) { ++ llvm_unreachable("This pass cannot be directly constructed"); ++ } ++ ++ SystemZTTI(const SystemZTargetMachine *TM) ++ : ImmutablePass(ID), ST(TM->getSubtargetImpl()), ++ TLI(TM->getSubtargetImpl()->getTargetLowering()) { ++ initializeSystemZTTIPass(*PassRegistry::getPassRegistry()); ++ } ++ ++ void initializePass() override { ++ pushTTIStack(this); ++ } ++ ++ void getAnalysisUsage(AnalysisUsage &AU) const override { ++ TargetTransformInfo::getAnalysisUsage(AU); ++ } ++ ++ // Pass identification. ++ static char ID; ++ ++ // Provide necessary pointer adjustments for the two base classes. ++ void *getAdjustedAnalysisPointer(const void *ID) override { ++ if (ID == &TargetTransformInfo::ID) ++ return (TargetTransformInfo*)this; ++ return this; ++ } ++ ++ /// \name Scalar TTI Implementations ++ /// @{ ++ ++ unsigned getIntImmCost(const APInt &Imm, Type *Ty); ++ ++ unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, ++ Type *Ty); ++ unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, ++ Type *Ty); ++ ++ PopcntSupportKind getPopcntSupport(unsigned TyWidth); ++ ++ /// @} ++ ++ /// \name Vector TTI Implementations ++ /// @{ ++ ++ unsigned getNumberOfRegisters(bool Vector); ++ unsigned getRegisterBitWidth(bool Vector); ++ ++ /// @} ++}; ++ ++} // end anonymous namespace ++ ++INITIALIZE_AG_PASS(SystemZTTI, TargetTransformInfo, "systemztti", ++ "SystemZ Target Transform Info", true, true, false) ++char SystemZTTI::ID = 0; ++ ++ImmutablePass * ++llvm::createSystemZTargetTransformInfoPass(const SystemZTargetMachine *TM) { ++ return new SystemZTTI(TM); ++} ++ ++ ++//===----------------------------------------------------------------------===// ++// ++// SystemZ cost model. ++// ++//===----------------------------------------------------------------------===// ++ ++unsigned SystemZTTI::getIntImmCost(const APInt &Imm, Type *Ty) { ++ assert(Ty->isIntegerTy()); ++ ++ unsigned BitSize = Ty->getPrimitiveSizeInBits(); ++ // There is no cost model for constants with a bit size of 0. Return TCC_Free ++ // here, so that constant hoisting will ignore this constant. ++ if (BitSize == 0) ++ return TCC_Free; ++ // No cost model for operations on integers larger than 64 bit implemented yet. ++ if (BitSize > 64) ++ return TCC_Free; ++ ++ if (Imm == 0) ++ return TCC_Free; ++ ++ if (Imm.getBitWidth() <= 64) { ++ // Constants loaded via lgfi. ++ if (isInt<32>(Imm.getSExtValue())) ++ return TCC_Basic; ++ // Constants loaded via llilf. ++ if (isUInt<32>(Imm.getZExtValue())) ++ return TCC_Basic; ++ // Constants loaded via llihf: ++ if ((Imm.getZExtValue() & 0xffffffff) == 0) ++ return TCC_Basic; ++ ++ return 2 * TCC_Basic; ++ } ++ ++ return 4 * TCC_Basic; ++} ++ ++unsigned SystemZTTI::getIntImmCost(unsigned Opcode, unsigned Idx, ++ const APInt &Imm, Type *Ty) { ++ assert(Ty->isIntegerTy()); ++ ++ unsigned BitSize = Ty->getPrimitiveSizeInBits(); ++ // There is no cost model for constants with a bit size of 0. Return TCC_Free ++ // here, so that constant hoisting will ignore this constant. ++ if (BitSize == 0) ++ return TCC_Free; ++ // No cost model for operations on integers larger than 64 bit implemented yet. ++ if (BitSize > 64) ++ return TCC_Free; ++ ++ switch (Opcode) { ++ default: ++ return TCC_Free; ++ case Instruction::GetElementPtr: ++ // Always hoist the base address of a GetElementPtr. This prevents the ++ // creation of new constants for every base constant that gets constant ++ // folded with the offset. ++ if (Idx == 0) ++ return 2 * TCC_Basic; ++ return TCC_Free; ++ case Instruction::Store: ++ if (Idx == 0 && Imm.getBitWidth() <= 64) { ++ // Any 8-bit immediate store can by implemented via mvi. ++ if (BitSize == 8) ++ return TCC_Free; ++ // 16-bit immediate values can be stored via mvhhi/mvhi/mvghi. ++ if (isInt<16>(Imm.getSExtValue())) ++ return TCC_Free; ++ } ++ break; ++ case Instruction::ICmp: ++ if (Idx == 1 && Imm.getBitWidth() <= 64) { ++ // Comparisons against signed 32-bit immediates implemented via cgfi. ++ if (isInt<32>(Imm.getSExtValue())) ++ return TCC_Free; ++ // Comparisons against unsigned 32-bit immediates implemented via clgfi. ++ if (isUInt<32>(Imm.getZExtValue())) ++ return TCC_Free; ++ } ++ break; ++ case Instruction::Add: ++ case Instruction::Sub: ++ if (Idx == 1 && Imm.getBitWidth() <= 64) { ++ // We use algfi/slgfi to add/subtract 32-bit unsigned immediates. ++ if (isUInt<32>(Imm.getZExtValue())) ++ return TCC_Free; ++ // Or their negation, by swapping addition vs. subtraction. ++ if (isUInt<32>(-Imm.getSExtValue())) ++ return TCC_Free; ++ } ++ break; ++ case Instruction::Mul: ++ if (Idx == 1 && Imm.getBitWidth() <= 64) { ++ // We use msgfi to multiply by 32-bit signed immediates. ++ if (isInt<32>(Imm.getSExtValue())) ++ return TCC_Free; ++ } ++ break; ++ case Instruction::Or: ++ case Instruction::Xor: ++ if (Idx == 1 && Imm.getBitWidth() <= 64) { ++ // Masks supported by oilf/xilf. ++ if (isUInt<32>(Imm.getZExtValue())) ++ return TCC_Free; ++ // Masks supported by oihf/xihf. ++ if ((Imm.getZExtValue() & 0xffffffff) == 0) ++ return TCC_Free; ++ } ++ break; ++ case Instruction::And: ++ if (Idx == 1 && Imm.getBitWidth() <= 64) { ++ // Any 32-bit AND operation can by implemented via nilf. ++ if (BitSize <= 32) ++ return TCC_Free; ++ // 64-bit masks supported by nilf. ++ if (isUInt<32>(~Imm.getZExtValue())) ++ return TCC_Free; ++ // 64-bit masks supported by nilh. ++ if ((Imm.getZExtValue() & 0xffffffff) == 0xffffffff) ++ return TCC_Free; ++ // Some 64-bit AND operations can be implemented via risbg. ++ const SystemZInstrInfo *TII = ST->getInstrInfo(); ++ unsigned Start, End; ++ if (TII->isRxSBGMask(Imm.getZExtValue(), BitSize, Start, End)) ++ return TCC_Free; ++ } ++ break; ++ case Instruction::Shl: ++ case Instruction::LShr: ++ case Instruction::AShr: ++ // Always return TCC_Free for the shift value of a shift instruction. ++ if (Idx == 1) ++ return TCC_Free; ++ break; ++ case Instruction::UDiv: ++ case Instruction::SDiv: ++ case Instruction::URem: ++ case Instruction::SRem: ++ case Instruction::Trunc: ++ case Instruction::ZExt: ++ case Instruction::SExt: ++ case Instruction::IntToPtr: ++ case Instruction::PtrToInt: ++ case Instruction::BitCast: ++ case Instruction::PHI: ++ case Instruction::Call: ++ case Instruction::Select: ++ case Instruction::Ret: ++ case Instruction::Load: ++ break; ++ } ++ ++ return SystemZTTI::getIntImmCost(Imm, Ty); ++} ++ ++unsigned SystemZTTI::getIntImmCost(Intrinsic::ID IID, unsigned Idx, ++ const APInt &Imm, Type *Ty) { ++ assert(Ty->isIntegerTy()); ++ ++ unsigned BitSize = Ty->getPrimitiveSizeInBits(); ++ // There is no cost model for constants with a bit size of 0. Return TCC_Free ++ // here, so that constant hoisting will ignore this constant. ++ if (BitSize == 0) ++ return TCC_Free; ++ // No cost model for operations on integers larger than 64 bit implemented yet. ++ if (BitSize > 64) ++ return TCC_Free; ++ ++ switch (IID) { ++ default: ++ return TCC_Free; ++ case Intrinsic::sadd_with_overflow: ++ case Intrinsic::uadd_with_overflow: ++ case Intrinsic::ssub_with_overflow: ++ case Intrinsic::usub_with_overflow: ++ // These get expanded to include a normal addition/subtraction. ++ if (Idx == 1 && Imm.getBitWidth() <= 64) { ++ if (isUInt<32>(Imm.getZExtValue())) ++ return TCC_Free; ++ if (isUInt<32>(-Imm.getSExtValue())) ++ return TCC_Free; ++ } ++ break; ++ case Intrinsic::smul_with_overflow: ++ case Intrinsic::umul_with_overflow: ++ // These get expanded to include a normal multiplication. ++ if (Idx == 1 && Imm.getBitWidth() <= 64) { ++ if (isInt<32>(Imm.getSExtValue())) ++ return TCC_Free; ++ } ++ break; ++ case Intrinsic::experimental_stackmap: ++ if ((Idx < 2) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue()))) ++ return TCC_Free; ++ break; ++ case Intrinsic::experimental_patchpoint_void: ++ case Intrinsic::experimental_patchpoint_i64: ++ if ((Idx < 4) || (Imm.getBitWidth() <= 64 && isInt<64>(Imm.getSExtValue()))) ++ return TCC_Free; ++ break; ++ } ++ return SystemZTTI::getIntImmCost(Imm, Ty); ++} ++ ++SystemZTTI::PopcntSupportKind ++SystemZTTI::getPopcntSupport(unsigned TyWidth) { ++ assert(isPowerOf2_32(TyWidth) && "Type width must be power of 2"); ++ if (ST->hasPopulationCount() && TyWidth <= 64) ++ return PSK_FastHardware; ++ return PSK_Software; ++} ++ ++unsigned SystemZTTI::getNumberOfRegisters(bool Vector) { ++ if (!Vector) ++ // Discount the stack pointer. Also leave out %r0, since it can't ++ // be used in an address. ++ return 14; ++ if (ST->hasVector()) ++ return 32; ++ return 0; ++} ++ ++unsigned SystemZTTI::getRegisterBitWidth(bool Vector) { ++ if (!Vector) ++ return 64; ++ if (ST->hasVector()) ++ return 128; ++ return 0; ++} ++ +Index: llvm-36/test/CodeGen/SystemZ/ctpop-01.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/ctpop-01.ll +@@ -0,0 +1,96 @@ ++; Test population-count instruction ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s ++ ++declare i32 @llvm.ctpop.i32(i32 %a) ++declare i64 @llvm.ctpop.i64(i64 %a) ++ ++define i32 @f1(i32 %a) { ++; CHECK-LABEL: f1: ++; CHECK: popcnt %r0, %r2 ++; CHECK: sllk %r1, %r0, 16 ++; CHECK: ar %r1, %r0 ++; CHECK: sllk %r2, %r1, 8 ++; CHECK: ar %r2, %r1 ++; CHECK: srl %r2, 24 ++; CHECK: br %r14 ++ ++ %popcnt = call i32 @llvm.ctpop.i32(i32 %a) ++ ret i32 %popcnt ++} ++ ++define i32 @f2(i32 %a) { ++; CHECK-LABEL: f2: ++; CHECK: llhr %r0, %r2 ++; CHECK: popcnt %r0, %r0 ++; CHECK: risblg %r2, %r0, 16, 151, 8 ++; CHECK: ar %r2, %r0 ++; CHECK: srl %r2, 8 ++; CHECK: br %r14 ++ %and = and i32 %a, 65535 ++ %popcnt = call i32 @llvm.ctpop.i32(i32 %and) ++ ret i32 %popcnt ++} ++ ++define i32 @f3(i32 %a) { ++; CHECK-LABEL: f3: ++; CHECK: llcr %r0, %r2 ++; CHECK: popcnt %r2, %r0 ++; CHECK: br %r14 ++ %and = and i32 %a, 255 ++ %popcnt = call i32 @llvm.ctpop.i32(i32 %and) ++ ret i32 %popcnt ++} ++ ++define i64 @f4(i64 %a) { ++; CHECK-LABEL: f4: ++; CHECK: popcnt %r0, %r2 ++; CHECK: sllg %r1, %r0, 32 ++; CHECK: agr %r1, %r0 ++; CHECK: sllg %r0, %r1, 16 ++; CHECK: agr %r0, %r1 ++; CHECK: sllg %r1, %r0, 8 ++; CHECK: agr %r1, %r0 ++; CHECK: srlg %r2, %r1, 56 ++; CHECK: br %r14 ++ %popcnt = call i64 @llvm.ctpop.i64(i64 %a) ++ ret i64 %popcnt ++} ++ ++define i64 @f5(i64 %a) { ++; CHECK-LABEL: f5: ++; CHECK: llgfr %r0, %r2 ++; CHECK: popcnt %r0, %r0 ++; CHECK: sllg %r1, %r0, 16 ++; CHECK: algfr %r0, %r1 ++; CHECK: sllg %r1, %r0, 8 ++; CHECK: algfr %r0, %r1 ++; CHECK: srlg %r2, %r0, 24 ++ %and = and i64 %a, 4294967295 ++ %popcnt = call i64 @llvm.ctpop.i64(i64 %and) ++ ret i64 %popcnt ++} ++ ++define i64 @f6(i64 %a) { ++; CHECK-LABEL: f6: ++; CHECK: llghr %r0, %r2 ++; CHECK: popcnt %r0, %r0 ++; CHECK: risbg %r1, %r0, 48, 183, 8 ++; CHECK: agr %r1, %r0 ++; CHECK: srlg %r2, %r1, 8 ++; CHECK: br %r14 ++ %and = and i64 %a, 65535 ++ %popcnt = call i64 @llvm.ctpop.i64(i64 %and) ++ ret i64 %popcnt ++} ++ ++define i64 @f7(i64 %a) { ++; CHECK-LABEL: f7: ++; CHECK: llgcr %r0, %r2 ++; CHECK: popcnt %r2, %r0 ++; CHECK: br %r14 ++ %and = and i64 %a, 255 ++ %popcnt = call i64 @llvm.ctpop.i64(i64 %and) ++ ret i64 %popcnt ++} ++ +Index: llvm-36/test/CodeGen/SystemZ/fp-abs-01.ll +=================================================================== +--- llvm-36.orig/test/CodeGen/SystemZ/fp-abs-01.ll ++++ llvm-36/test/CodeGen/SystemZ/fp-abs-01.ll +@@ -1,6 +1,7 @@ + ; Test floating-point absolute. + ; +-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s + + ; Test f32. + declare float @llvm.fabs.f32(float %f) +Index: llvm-36/test/CodeGen/SystemZ/fp-abs-02.ll +=================================================================== +--- llvm-36.orig/test/CodeGen/SystemZ/fp-abs-02.ll ++++ llvm-36/test/CodeGen/SystemZ/fp-abs-02.ll +@@ -1,6 +1,7 @@ + ; Test negated floating-point absolute. + ; +-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s + + ; Test f32. + declare float @llvm.fabs.f32(float %f) +Index: llvm-36/test/CodeGen/SystemZ/fp-add-02.ll +=================================================================== +--- llvm-36.orig/test/CodeGen/SystemZ/fp-add-02.ll ++++ llvm-36/test/CodeGen/SystemZ/fp-add-02.ll +@@ -1,7 +1,8 @@ + ; Test 64-bit floating-point addition. + ; +-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s +- ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \ ++; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s + declare double @foo() + + ; Check register addition. +@@ -76,7 +77,7 @@ define double @f6(double %f1, double *%b + define double @f7(double *%ptr0) { + ; CHECK-LABEL: f7: + ; CHECK: brasl %r14, foo@PLT +-; CHECK: adb %f0, 160(%r15) ++; CHECK-SCALAR: adb %f0, 160(%r15) + ; CHECK: br %r14 + %ptr1 = getelementptr double *%ptr0, i64 2 + %ptr2 = getelementptr double *%ptr0, i64 4 +Index: llvm-36/test/CodeGen/SystemZ/fp-cmp-02.ll +=================================================================== +--- llvm-36.orig/test/CodeGen/SystemZ/fp-cmp-02.ll ++++ llvm-36/test/CodeGen/SystemZ/fp-cmp-02.ll +@@ -1,7 +1,10 @@ + ; Test 64-bit floating-point comparison. The tests assume a z10 implementation + ; of select, using conditional branches rather than LOCGR. + ; +-; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \ ++; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 \ ++; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-VECTOR %s + + declare double @foo() + +@@ -9,8 +12,9 @@ declare double @foo() + define i64 @f1(i64 %a, i64 %b, double %f1, double %f2) { + ; CHECK-LABEL: f1: + ; CHECK: cdbr %f0, %f2 +-; CHECK-NEXT: je +-; CHECK: lgr %r2, %r3 ++; CHECK-SCALAR-NEXT: je ++; CHECK-SCALAR: lgr %r2, %r3 ++; CHECK-VECTOR-NEXT: locgrne %r2, %r3 + ; CHECK: br %r14 + %cond = fcmp oeq double %f1, %f2 + %res = select i1 %cond, i64 %a, i64 %b +@@ -21,8 +25,9 @@ define i64 @f1(i64 %a, i64 %b, double %f + define i64 @f2(i64 %a, i64 %b, double %f1, double *%ptr) { + ; CHECK-LABEL: f2: + ; CHECK: cdb %f0, 0(%r4) +-; CHECK-NEXT: je +-; CHECK: lgr %r2, %r3 ++; CHECK-SCALAR-NEXT: je ++; CHECK-SCALAR: lgr %r2, %r3 ++; CHECK-VECTOR-NEXT: locgrne %r2, %r3 + ; CHECK: br %r14 + %f2 = load double *%ptr + %cond = fcmp oeq double %f1, %f2 +@@ -34,8 +39,9 @@ define i64 @f2(i64 %a, i64 %b, double %f + define i64 @f3(i64 %a, i64 %b, double %f1, double *%base) { + ; CHECK-LABEL: f3: + ; CHECK: cdb %f0, 4088(%r4) +-; CHECK-NEXT: je +-; CHECK: lgr %r2, %r3 ++; CHECK-SCALAR-NEXT: je ++; CHECK-SCALAR: lgr %r2, %r3 ++; CHECK-VECTOR-NEXT: locgrne %r2, %r3 + ; CHECK: br %r14 + %ptr = getelementptr double *%base, i64 511 + %f2 = load double *%ptr +@@ -50,8 +56,9 @@ define i64 @f4(i64 %a, i64 %b, double %f + ; CHECK-LABEL: f4: + ; CHECK: aghi %r4, 4096 + ; CHECK: cdb %f0, 0(%r4) +-; CHECK-NEXT: je +-; CHECK: lgr %r2, %r3 ++; CHECK-SCALAR-NEXT: je ++; CHECK-SCALAR: lgr %r2, %r3 ++; CHECK-VECTOR-NEXT: locgrne %r2, %r3 + ; CHECK: br %r14 + %ptr = getelementptr double *%base, i64 512 + %f2 = load double *%ptr +@@ -65,8 +72,9 @@ define i64 @f5(i64 %a, i64 %b, double %f + ; CHECK-LABEL: f5: + ; CHECK: aghi %r4, -8 + ; CHECK: cdb %f0, 0(%r4) +-; CHECK-NEXT: je +-; CHECK: lgr %r2, %r3 ++; CHECK-SCALAR-NEXT: je ++; CHECK-SCALAR: lgr %r2, %r3 ++; CHECK-VECTOR-NEXT: locgrne %r2, %r3 + ; CHECK: br %r14 + %ptr = getelementptr double *%base, i64 -1 + %f2 = load double *%ptr +@@ -80,8 +88,9 @@ define i64 @f6(i64 %a, i64 %b, double %f + ; CHECK-LABEL: f6: + ; CHECK: sllg %r1, %r5, 3 + ; CHECK: cdb %f0, 800(%r1,%r4) +-; CHECK-NEXT: je +-; CHECK: lgr %r2, %r3 ++; CHECK-SCALAR-NEXT: je ++; CHECK-SCALAR: lgr %r2, %r3 ++; CHECK-VECTOR-NEXT: locgrne %r2, %r3 + ; CHECK: br %r14 + %ptr1 = getelementptr double *%base, i64 %index + %ptr2 = getelementptr double *%ptr1, i64 100 +@@ -95,7 +104,7 @@ define i64 @f6(i64 %a, i64 %b, double %f + define double @f7(double *%ptr0) { + ; CHECK-LABEL: f7: + ; CHECK: brasl %r14, foo@PLT +-; CHECK: cdb {{%f[0-9]+}}, 160(%r15) ++; CHECK-SCALAR: cdb {{%f[0-9]+}}, 160(%r15) + ; CHECK: br %r14 + %ptr1 = getelementptr double *%ptr0, i64 2 + %ptr2 = getelementptr double *%ptr0, i64 4 +@@ -152,9 +161,12 @@ define double @f7(double *%ptr0) { + ; Check comparison with zero. + define i64 @f8(i64 %a, i64 %b, double %f) { + ; CHECK-LABEL: f8: +-; CHECK: ltdbr %f0, %f0 +-; CHECK-NEXT: je +-; CHECK: lgr %r2, %r3 ++; CHECK-SCALAR: ltdbr %f0, %f0 ++; CHECK-SCALAR-NEXT: je ++; CHECK-SCALAR: lgr %r2, %r3 ++; CHECK-VECTOR: lzdr %f1 ++; CHECK-VECTOR-NEXT: cdbr %f0, %f1 ++; CHECK-VECTOR-NEXT: locgrne %r2, %r3 + ; CHECK: br %r14 + %cond = fcmp oeq double %f, 0.0 + %res = select i1 %cond, i64 %a, i64 %b +@@ -165,8 +177,9 @@ define i64 @f8(i64 %a, i64 %b, double %f + define i64 @f9(i64 %a, i64 %b, double %f2, double *%ptr) { + ; CHECK-LABEL: f9: + ; CHECK: cdb %f0, 0(%r4) +-; CHECK-NEXT: jl {{\.L.*}} +-; CHECK: lgr %r2, %r3 ++; CHECK-SCALAR-NEXT: jl ++; CHECK-SCALAR: lgr %r2, %r3 ++; CHECK-VECTOR-NEXT: locgrnl %r2, %r3 + ; CHECK: br %r14 + %f1 = load double *%ptr + %cond = fcmp ogt double %f1, %f2 +Index: llvm-36/test/CodeGen/SystemZ/fp-conv-01.ll +=================================================================== +--- llvm-36.orig/test/CodeGen/SystemZ/fp-conv-01.ll ++++ llvm-36/test/CodeGen/SystemZ/fp-conv-01.ll +@@ -1,11 +1,15 @@ + ; Test floating-point truncations. + ; +-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \ ++; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 \ ++; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-VECTOR %s + + ; Test f64->f32. + define float @f1(double %d1, double %d2) { + ; CHECK-LABEL: f1: +-; CHECK: ledbr %f0, %f2 ++; CHECK-SCALAR: ledbr %f0, %f2 ++; CHECK-VECTOR: ledbra %f0, 0, %f2, 0 + ; CHECK: br %r14 + %res = fptrunc double %d2 to float + ret float %res +@@ -50,8 +54,10 @@ define double @f4(fp128 *%ptr) { + define void @f5(double *%dst, fp128 *%ptr, double %d1, double %d2) { + ; CHECK-LABEL: f5: + ; CHECK: ldxbr %f1, %f1 +-; CHECK: adbr %f1, %f2 +-; CHECK: std %f1, 0(%r2) ++; CHECK-SCALAR: adbr %f1, %f2 ++; CHECK-SCALAR: std %f1, 0(%r2) ++; CHECK-VECTOR: wfadb [[REG:%f[0-9]+]], %f1, %f2 ++; CHECK-VECTOR: std [[REG]], 0(%r2) + ; CHECK: br %r14 + %val = load fp128 *%ptr + %conv = fptrunc fp128 %val to double +Index: llvm-36/test/CodeGen/SystemZ/fp-conv-02.ll +=================================================================== +--- llvm-36.orig/test/CodeGen/SystemZ/fp-conv-02.ll ++++ llvm-36/test/CodeGen/SystemZ/fp-conv-02.ll +@@ -1,6 +1,8 @@ + ; Test extensions of f32 to f64. + ; +-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \ ++; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s + + ; Check register extension. + define double @f1(float %val) { +@@ -74,7 +76,7 @@ define double @f6(float *%base, i64 %ind + ; to use LDEB if possible. + define void @f7(double *%ptr1, float *%ptr2) { + ; CHECK-LABEL: f7: +-; CHECK: ldeb {{%f[0-9]+}}, 16{{[04]}}(%r15) ++; CHECK-SCALAR: ldeb {{%f[0-9]+}}, 16{{[04]}}(%r15) + ; CHECK: br %r14 + %val0 = load volatile float *%ptr2 + %val1 = load volatile float *%ptr2 +Index: llvm-36/test/CodeGen/SystemZ/fp-div-02.ll +=================================================================== +--- llvm-36.orig/test/CodeGen/SystemZ/fp-div-02.ll ++++ llvm-36/test/CodeGen/SystemZ/fp-div-02.ll +@@ -1,6 +1,8 @@ + ; Test 64-bit floating-point division. + ; +-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \ ++; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s + + declare double @foo() + +@@ -76,7 +78,7 @@ define double @f6(double %f1, double *%b + define double @f7(double *%ptr0) { + ; CHECK-LABEL: f7: + ; CHECK: brasl %r14, foo@PLT +-; CHECK: ddb %f0, 160(%r15) ++; CHECK-SCALAR: ddb %f0, 160(%r15) + ; CHECK: br %r14 + %ptr1 = getelementptr double *%ptr0, i64 2 + %ptr2 = getelementptr double *%ptr0, i64 4 +Index: llvm-36/test/CodeGen/SystemZ/fp-move-01.ll +=================================================================== +--- llvm-36.orig/test/CodeGen/SystemZ/fp-move-01.ll ++++ llvm-36/test/CodeGen/SystemZ/fp-move-01.ll +@@ -1,11 +1,13 @@ + ; Test moves between FPRs. + ; +-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s + + ; Test f32 moves. + define float @f1(float %a, float %b) { + ; CHECK-LABEL: f1: + ; CHECK: ler %f0, %f2 ++; CHECK: br %r14 + ret float %b + } + +@@ -13,6 +15,7 @@ define float @f1(float %a, float %b) { + define double @f2(double %a, double %b) { + ; CHECK-LABEL: f2: + ; CHECK: ldr %f0, %f2 ++; CHECK: br %r14 + ret double %b + } + +@@ -22,6 +25,7 @@ define void @f3(fp128 *%x) { + ; CHECK-LABEL: f3: + ; CHECK: lxr + ; CHECK: axbr ++; CHECK: br %r14 + %val = load volatile fp128 *%x + %sum = fadd fp128 %val, %val + store volatile fp128 %sum, fp128 *%x +Index: llvm-36/test/CodeGen/SystemZ/fp-move-04.ll +=================================================================== +--- llvm-36.orig/test/CodeGen/SystemZ/fp-move-04.ll ++++ llvm-36/test/CodeGen/SystemZ/fp-move-04.ll +@@ -1,6 +1,7 @@ + ; Test 64-bit floating-point loads. + ; +-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s + + ; Test the low end of the LD range. + define double @f1(double *%src) { +Index: llvm-36/test/CodeGen/SystemZ/fp-move-07.ll +=================================================================== +--- llvm-36.orig/test/CodeGen/SystemZ/fp-move-07.ll ++++ llvm-36/test/CodeGen/SystemZ/fp-move-07.ll +@@ -1,6 +1,7 @@ + ; Test 64-bit floating-point stores. + ; +-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s + + ; Test the low end of the STD range. + define void @f1(double *%src, double %val) { +Index: llvm-36/test/CodeGen/SystemZ/fp-move-09.ll +=================================================================== +--- llvm-36.orig/test/CodeGen/SystemZ/fp-move-09.ll ++++ llvm-36/test/CodeGen/SystemZ/fp-move-09.ll +@@ -1,4 +1,4 @@ +-; Test moves between FPRs and GPRs for z196 and above. ++; Test moves between FPRs and GPRs for z196 and zEC12. + ; + ; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s + +Index: llvm-36/test/CodeGen/SystemZ/fp-move-10.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/fp-move-10.ll +@@ -0,0 +1,61 @@ ++; Test moves between FPRs and GPRs for z13 and above. ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ++ ++; Check that moves from i32s to floats use a low GR32 and vector operation. ++define float @f1(i16 *%ptr) { ++; CHECK-LABEL: f1: ++; CHECK: llh [[REG:%r[0-5]]], 0(%r2) ++; CHECK: oilh [[REG]], 16256 ++; CHECK: vlvgf %v0, [[REG]], 0 ++; CHECK: br %r14 ++ %base = load i16 *%ptr ++ %ext = zext i16 %base to i32 ++ %full = or i32 %ext, 1065353216 ++ %res = bitcast i32 %full to float ++ ret float %res ++} ++ ++; Check that moves from floats to i32s use a low GR32 and vector operation. ++define void @f2(float %val, i8 *%ptr) { ++; CHECK-LABEL: f2: ++; CHECK: vlgvf [[REG:%r[0-5]]], %v0, 0 ++; CHECK: stc [[REG]], 0(%r2) ++; CHECK: br %r14 ++ %res = bitcast float %val to i32 ++ %trunc = trunc i32 %res to i8 ++ store i8 %trunc, i8 *%ptr ++ ret void ++} ++ ++; Like f2, but with a conditional store. ++define void @f3(float %val, i8 *%ptr, i32 %which) { ++; CHECK-LABEL: f3: ++; CHECK-DAG: cijlh %r3, 0, ++; CHECK-DAG: vlgvf [[REG:%r[0-5]]], %v0, 0 ++; CHECK: stc [[REG]], 0(%r2) ++; CHECK: br %r14 ++ %int = bitcast float %val to i32 ++ %trunc = trunc i32 %int to i8 ++ %old = load i8 *%ptr ++ %cmp = icmp eq i32 %which, 0 ++ %res = select i1 %cmp, i8 %trunc, i8 %old ++ store i8 %res, i8 *%ptr ++ ret void ++} ++ ++; ...and again with 16-bit memory. ++define void @f4(float %val, i16 *%ptr, i32 %which) { ++; CHECK-LABEL: f4: ++; CHECK-DAG: cijlh %r3, 0, ++; CHECK-DAG: vlgvf [[REG:%r[0-5]]], %v0, 0 ++; CHECK: sth [[REG]], 0(%r2) ++; CHECK: br %r14 ++ %int = bitcast float %val to i32 ++ %trunc = trunc i32 %int to i16 ++ %old = load i16 *%ptr ++ %cmp = icmp eq i32 %which, 0 ++ %res = select i1 %cmp, i16 %trunc, i16 %old ++ store i16 %res, i16 *%ptr ++ ret void ++} +Index: llvm-36/test/CodeGen/SystemZ/fp-move-11.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/fp-move-11.ll +@@ -0,0 +1,110 @@ ++; Test 32-bit floating-point loads for z13. ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ++ ++; Test that we use LDE instead of LE - low end of the LE range. ++define float @f1(float *%src) { ++; CHECK-LABEL: f1: ++; CHECK: lde %f0, 0(%r2) ++; CHECK: br %r14 ++ %val = load float *%src ++ ret float %val ++} ++ ++; Test that we use LDE instead of LE - high end of the LE range. ++define float @f2(float *%src) { ++; CHECK-LABEL: f2: ++; CHECK: lde %f0, 4092(%r2) ++; CHECK: br %r14 ++ %ptr = getelementptr float *%src, i64 1023 ++ %val = load float *%ptr ++ ret float %val ++} ++ ++; Check the next word up, which should use LEY instead of LDE. ++define float @f3(float *%src) { ++; CHECK-LABEL: f3: ++; CHECK: ley %f0, 4096(%r2) ++; CHECK: br %r14 ++ %ptr = getelementptr float *%src, i64 1024 ++ %val = load float *%ptr ++ ret float %val ++} ++ ++; Check the high end of the aligned LEY range. ++define float @f4(float *%src) { ++; CHECK-LABEL: f4: ++; CHECK: ley %f0, 524284(%r2) ++; CHECK: br %r14 ++ %ptr = getelementptr float *%src, i64 131071 ++ %val = load float *%ptr ++ ret float %val ++} ++ ++; Check the next word up, which needs separate address logic. ++; Other sequences besides this one would be OK. ++define float @f5(float *%src) { ++; CHECK-LABEL: f5: ++; CHECK: agfi %r2, 524288 ++; CHECK: lde %f0, 0(%r2) ++; CHECK: br %r14 ++ %ptr = getelementptr float *%src, i64 131072 ++ %val = load float *%ptr ++ ret float %val ++} ++ ++; Check the high end of the negative aligned LEY range. ++define float @f6(float *%src) { ++; CHECK-LABEL: f6: ++; CHECK: ley %f0, -4(%r2) ++; CHECK: br %r14 ++ %ptr = getelementptr float *%src, i64 -1 ++ %val = load float *%ptr ++ ret float %val ++} ++ ++; Check the low end of the LEY range. ++define float @f7(float *%src) { ++; CHECK-LABEL: f7: ++; CHECK: ley %f0, -524288(%r2) ++; CHECK: br %r14 ++ %ptr = getelementptr float *%src, i64 -131072 ++ %val = load float *%ptr ++ ret float %val ++} ++ ++; Check the next word down, which needs separate address logic. ++; Other sequences besides this one would be OK. ++define float @f8(float *%src) { ++; CHECK-LABEL: f8: ++; CHECK: agfi %r2, -524292 ++; CHECK: lde %f0, 0(%r2) ++; CHECK: br %r14 ++ %ptr = getelementptr float *%src, i64 -131073 ++ %val = load float *%ptr ++ ret float %val ++} ++ ++; Check that LDE allows an index. ++define float @f9(i64 %src, i64 %index) { ++; CHECK-LABEL: f9: ++; CHECK: lde %f0, 4092({{%r3,%r2|%r2,%r3}}) ++; CHECK: br %r14 ++ %add1 = add i64 %src, %index ++ %add2 = add i64 %add1, 4092 ++ %ptr = inttoptr i64 %add2 to float * ++ %val = load float *%ptr ++ ret float %val ++} ++ ++; Check that LEY allows an index. ++define float @f10(i64 %src, i64 %index) { ++; CHECK-LABEL: f10: ++; CHECK: ley %f0, 4096({{%r3,%r2|%r2,%r3}}) ++; CHECK: br %r14 ++ %add1 = add i64 %src, %index ++ %add2 = add i64 %add1, 4096 ++ %ptr = inttoptr i64 %add2 to float * ++ %val = load float *%ptr ++ ret float %val ++} +Index: llvm-36/test/CodeGen/SystemZ/fp-mul-03.ll +=================================================================== +--- llvm-36.orig/test/CodeGen/SystemZ/fp-mul-03.ll ++++ llvm-36/test/CodeGen/SystemZ/fp-mul-03.ll +@@ -1,6 +1,8 @@ + ; Test multiplication of two f64s, producing an f64 result. + ; +-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \ ++; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s + + declare double @foo() + +@@ -76,7 +78,7 @@ define double @f6(double %f1, double *%b + define double @f7(double *%ptr0) { + ; CHECK-LABEL: f7: + ; CHECK: brasl %r14, foo@PLT +-; CHECK: mdb %f0, 160(%r15) ++; CHECK-SCALAR: mdb %f0, 160(%r15) + ; CHECK: br %r14 + %ptr1 = getelementptr double *%ptr0, i64 2 + %ptr2 = getelementptr double *%ptr0, i64 4 +Index: llvm-36/test/CodeGen/SystemZ/fp-mul-07.ll +=================================================================== +--- llvm-36.orig/test/CodeGen/SystemZ/fp-mul-07.ll ++++ llvm-36/test/CodeGen/SystemZ/fp-mul-07.ll +@@ -1,11 +1,15 @@ +-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \ ++; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 \ ++; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-VECTOR %s + + declare double @llvm.fma.f64(double %f1, double %f2, double %f3) + + define double @f1(double %f1, double %f2, double %acc) { + ; CHECK-LABEL: f1: +-; CHECK: madbr %f4, %f0, %f2 +-; CHECK: ldr %f0, %f4 ++; CHECK-SCALAR: madbr %f4, %f0, %f2 ++; CHECK-SCALAR: ldr %f0, %f4 ++; CHECK-VECTOR: wfmadb %f0, %f0, %f2, %f4 + ; CHECK: br %r14 + %res = call double @llvm.fma.f64 (double %f1, double %f2, double %acc) + ret double %res +Index: llvm-36/test/CodeGen/SystemZ/fp-mul-09.ll +=================================================================== +--- llvm-36.orig/test/CodeGen/SystemZ/fp-mul-09.ll ++++ llvm-36/test/CodeGen/SystemZ/fp-mul-09.ll +@@ -1,11 +1,15 @@ +-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \ ++; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 \ ++; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-VECTOR %s + + declare double @llvm.fma.f64(double %f1, double %f2, double %f3) + + define double @f1(double %f1, double %f2, double %acc) { + ; CHECK-LABEL: f1: +-; CHECK: msdbr %f4, %f0, %f2 +-; CHECK: ldr %f0, %f4 ++; CHECK-SCALAR: msdbr %f4, %f0, %f2 ++; CHECK-SCALAR: ldr %f0, %f4 ++; CHECK-VECTOR: wfmsdb %f0, %f0, %f2, %f4 + ; CHECK: br %r14 + %negacc = fsub double -0.0, %acc + %res = call double @llvm.fma.f64 (double %f1, double %f2, double %negacc) +Index: llvm-36/test/CodeGen/SystemZ/fp-neg-01.ll +=================================================================== +--- llvm-36.orig/test/CodeGen/SystemZ/fp-neg-01.ll ++++ llvm-36/test/CodeGen/SystemZ/fp-neg-01.ll +@@ -1,6 +1,7 @@ + ; Test floating-point negation. + ; +-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s + + ; Test f32. + define float @f1(float %f) { +Index: llvm-36/test/CodeGen/SystemZ/fp-round-02.ll +=================================================================== +--- llvm-36.orig/test/CodeGen/SystemZ/fp-round-02.ll ++++ llvm-36/test/CodeGen/SystemZ/fp-round-02.ll +@@ -1,6 +1,9 @@ + ; Test rounding functions for z196 and above. + ; +-; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | FileCheck %s ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 \ ++; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 \ ++; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-VECTOR %s + + ; Test rint for f32. + declare float @llvm.rint.f32(float %f) +@@ -16,7 +19,8 @@ define float @f1(float %f) { + declare double @llvm.rint.f64(double %f) + define double @f2(double %f) { + ; CHECK-LABEL: f2: +-; CHECK: fidbr %f0, 0, %f0 ++; CHECK-SCALAR: fidbr %f0, 0, %f0 ++; CHECK-VECTOR: fidbra %f0, 0, %f0, 0 + ; CHECK: br %r14 + %res = call double @llvm.rint.f64(double %f) + ret double %res +Index: llvm-36/test/CodeGen/SystemZ/fp-sqrt-02.ll +=================================================================== +--- llvm-36.orig/test/CodeGen/SystemZ/fp-sqrt-02.ll ++++ llvm-36/test/CodeGen/SystemZ/fp-sqrt-02.ll +@@ -1,6 +1,8 @@ + ; Test 64-bit square root. + ; +-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \ ++; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s + + declare double @llvm.sqrt.f64(double %f) + declare double @sqrt(double) +@@ -77,7 +79,7 @@ define double @f6(double *%base, i64 %in + ; to use SQDB if possible. + define void @f7(double *%ptr) { + ; CHECK-LABEL: f7: +-; CHECK: sqdb {{%f[0-9]+}}, 160(%r15) ++; CHECK-SCALAR: sqdb {{%f[0-9]+}}, 160(%r15) + ; CHECK: br %r14 + %val0 = load volatile double *%ptr + %val1 = load volatile double *%ptr +Index: llvm-36/test/CodeGen/SystemZ/fp-sub-02.ll +=================================================================== +--- llvm-36.orig/test/CodeGen/SystemZ/fp-sub-02.ll ++++ llvm-36/test/CodeGen/SystemZ/fp-sub-02.ll +@@ -1,6 +1,8 @@ + ; Test 64-bit floating-point subtraction. + ; +-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 \ ++; RUN: | FileCheck -check-prefix=CHECK -check-prefix=CHECK-SCALAR %s ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s + + declare double @foo() + +@@ -76,7 +78,7 @@ define double @f6(double %f1, double *%b + define double @f7(double *%ptr0) { + ; CHECK-LABEL: f7: + ; CHECK: brasl %r14, foo@PLT +-; CHECK: sdb %f0, 16{{[04]}}(%r15) ++; CHECK-SCALAR: sdb %f0, 16{{[04]}}(%r15) + ; CHECK: br %r14 + %ptr1 = getelementptr double *%ptr0, i64 2 + %ptr2 = getelementptr double *%ptr0, i64 4 +Index: llvm-36/test/CodeGen/SystemZ/frame-03.ll +=================================================================== +--- llvm-36.orig/test/CodeGen/SystemZ/frame-03.ll ++++ llvm-36/test/CodeGen/SystemZ/frame-03.ll +@@ -2,7 +2,7 @@ + ; uses a different register class, but the set of saved and restored + ; registers should be the same. + ; +-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s + + ; This function should require all FPRs, but no other spill slots. + ; We need to save and restore 8 of the 16 FPRs, so the frame size +Index: llvm-36/test/CodeGen/SystemZ/frame-07.ll +=================================================================== +--- llvm-36.orig/test/CodeGen/SystemZ/frame-07.ll ++++ llvm-36/test/CodeGen/SystemZ/frame-07.ll +@@ -1,7 +1,7 @@ + ; Test the saving and restoring of FPRs in large frames. + ; +-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck -check-prefix=CHECK-NOFP %s +-; RUN: llc < %s -mtriple=s390x-linux-gnu -disable-fp-elim | FileCheck -check-prefix=CHECK-FP %s ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck -check-prefix=CHECK-NOFP %s ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 -disable-fp-elim | FileCheck -check-prefix=CHECK-FP %s + + ; Test a frame size that requires some FPRs to be saved and loaded using + ; the 20-bit STDY and LDY while others can use the 12-bit STD and LD. +Index: llvm-36/test/CodeGen/SystemZ/frame-17.ll +=================================================================== +--- llvm-36.orig/test/CodeGen/SystemZ/frame-17.ll ++++ llvm-36/test/CodeGen/SystemZ/frame-17.ll +@@ -1,6 +1,6 @@ + ; Test spilling of FPRs. + ; +-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | FileCheck %s + + ; We need to save and restore 8 of the 16 FPRs and allocate an additional + ; 4-byte spill slot, rounded to 8 bytes. The frame size should be exactly +Index: llvm-36/test/CodeGen/SystemZ/frame-19.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/frame-19.ll +@@ -0,0 +1,314 @@ ++; Test spilling of vector registers. ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ++ ++; We need to allocate a 16-byte spill slot and save the 8 call-saved FPRs. ++; The frame size should be exactly 160 + 16 + 8 * 8 = 240. ++define void @f1(<16 x i8> *%ptr) { ++; CHECK-LABEL: f1: ++; CHECK: aghi %r15, -240 ++; CHECK-DAG: std %f8, ++; CHECK-DAG: std %f9, ++; CHECK-DAG: std %f10, ++; CHECK-DAG: std %f11, ++; CHECK-DAG: std %f12, ++; CHECK-DAG: std %f13, ++; CHECK-DAG: std %f14, ++; CHECK-DAG: std %f15, ++; CHECK: vst {{%v[0-9]+}}, 160(%r15) ++; CHECK: vl {{%v[0-9]+}}, 160(%r15) ++; CHECK-DAG: ld %f8, ++; CHECK-DAG: ld %f9, ++; CHECK-DAG: ld %f10, ++; CHECK-DAG: ld %f11, ++; CHECK-DAG: ld %f12, ++; CHECK-DAG: ld %f13, ++; CHECK-DAG: ld %f14, ++; CHECK-DAG: ld %f15, ++; CHECK: aghi %r15, 240 ++; CHECK: br %r14 ++ %v0 = load volatile <16 x i8> *%ptr ++ %v1 = load volatile <16 x i8> *%ptr ++ %v2 = load volatile <16 x i8> *%ptr ++ %v3 = load volatile <16 x i8> *%ptr ++ %v4 = load volatile <16 x i8> *%ptr ++ %v5 = load volatile <16 x i8> *%ptr ++ %v6 = load volatile <16 x i8> *%ptr ++ %v7 = load volatile <16 x i8> *%ptr ++ %v8 = load volatile <16 x i8> *%ptr ++ %v9 = load volatile <16 x i8> *%ptr ++ %v10 = load volatile <16 x i8> *%ptr ++ %v11 = load volatile <16 x i8> *%ptr ++ %v12 = load volatile <16 x i8> *%ptr ++ %v13 = load volatile <16 x i8> *%ptr ++ %v14 = load volatile <16 x i8> *%ptr ++ %v15 = load volatile <16 x i8> *%ptr ++ %v16 = load volatile <16 x i8> *%ptr ++ %v17 = load volatile <16 x i8> *%ptr ++ %v18 = load volatile <16 x i8> *%ptr ++ %v19 = load volatile <16 x i8> *%ptr ++ %v20 = load volatile <16 x i8> *%ptr ++ %v21 = load volatile <16 x i8> *%ptr ++ %v22 = load volatile <16 x i8> *%ptr ++ %v23 = load volatile <16 x i8> *%ptr ++ %v24 = load volatile <16 x i8> *%ptr ++ %v25 = load volatile <16 x i8> *%ptr ++ %v26 = load volatile <16 x i8> *%ptr ++ %v27 = load volatile <16 x i8> *%ptr ++ %v28 = load volatile <16 x i8> *%ptr ++ %v29 = load volatile <16 x i8> *%ptr ++ %v30 = load volatile <16 x i8> *%ptr ++ %v31 = load volatile <16 x i8> *%ptr ++ %vx = load volatile <16 x i8> *%ptr ++ store volatile <16 x i8> %vx, <16 x i8> *%ptr ++ store volatile <16 x i8> %v31, <16 x i8> *%ptr ++ store volatile <16 x i8> %v30, <16 x i8> *%ptr ++ store volatile <16 x i8> %v29, <16 x i8> *%ptr ++ store volatile <16 x i8> %v28, <16 x i8> *%ptr ++ store volatile <16 x i8> %v27, <16 x i8> *%ptr ++ store volatile <16 x i8> %v26, <16 x i8> *%ptr ++ store volatile <16 x i8> %v25, <16 x i8> *%ptr ++ store volatile <16 x i8> %v24, <16 x i8> *%ptr ++ store volatile <16 x i8> %v23, <16 x i8> *%ptr ++ store volatile <16 x i8> %v22, <16 x i8> *%ptr ++ store volatile <16 x i8> %v21, <16 x i8> *%ptr ++ store volatile <16 x i8> %v20, <16 x i8> *%ptr ++ store volatile <16 x i8> %v19, <16 x i8> *%ptr ++ store volatile <16 x i8> %v18, <16 x i8> *%ptr ++ store volatile <16 x i8> %v17, <16 x i8> *%ptr ++ store volatile <16 x i8> %v16, <16 x i8> *%ptr ++ store volatile <16 x i8> %v15, <16 x i8> *%ptr ++ store volatile <16 x i8> %v14, <16 x i8> *%ptr ++ store volatile <16 x i8> %v13, <16 x i8> *%ptr ++ store volatile <16 x i8> %v12, <16 x i8> *%ptr ++ store volatile <16 x i8> %v11, <16 x i8> *%ptr ++ store volatile <16 x i8> %v10, <16 x i8> *%ptr ++ store volatile <16 x i8> %v9, <16 x i8> *%ptr ++ store volatile <16 x i8> %v8, <16 x i8> *%ptr ++ store volatile <16 x i8> %v7, <16 x i8> *%ptr ++ store volatile <16 x i8> %v6, <16 x i8> *%ptr ++ store volatile <16 x i8> %v5, <16 x i8> *%ptr ++ store volatile <16 x i8> %v4, <16 x i8> *%ptr ++ store volatile <16 x i8> %v3, <16 x i8> *%ptr ++ store volatile <16 x i8> %v2, <16 x i8> *%ptr ++ store volatile <16 x i8> %v1, <16 x i8> *%ptr ++ store volatile <16 x i8> %v0, <16 x i8> *%ptr ++ ret void ++} ++ ++; Like f1, but no 16-byte slot should be needed. ++define void @f2(<16 x i8> *%ptr) { ++; CHECK-LABEL: f2: ++; CHECK: aghi %r15, -224 ++; CHECK-DAG: std %f8, ++; CHECK-DAG: std %f9, ++; CHECK-DAG: std %f10, ++; CHECK-DAG: std %f11, ++; CHECK-DAG: std %f12, ++; CHECK-DAG: std %f13, ++; CHECK-DAG: std %f14, ++; CHECK-DAG: std %f15, ++; CHECK-NOT: vst {{.*}}(%r15) ++; CHECK-NOT: vl {{.*}}(%r15) ++; CHECK-DAG: ld %f8, ++; CHECK-DAG: ld %f9, ++; CHECK-DAG: ld %f10, ++; CHECK-DAG: ld %f11, ++; CHECK-DAG: ld %f12, ++; CHECK-DAG: ld %f13, ++; CHECK-DAG: ld %f14, ++; CHECK-DAG: ld %f15, ++; CHECK: aghi %r15, 224 ++; CHECK: br %r14 ++ %v0 = load volatile <16 x i8> *%ptr ++ %v1 = load volatile <16 x i8> *%ptr ++ %v2 = load volatile <16 x i8> *%ptr ++ %v3 = load volatile <16 x i8> *%ptr ++ %v4 = load volatile <16 x i8> *%ptr ++ %v5 = load volatile <16 x i8> *%ptr ++ %v6 = load volatile <16 x i8> *%ptr ++ %v7 = load volatile <16 x i8> *%ptr ++ %v8 = load volatile <16 x i8> *%ptr ++ %v9 = load volatile <16 x i8> *%ptr ++ %v10 = load volatile <16 x i8> *%ptr ++ %v11 = load volatile <16 x i8> *%ptr ++ %v12 = load volatile <16 x i8> *%ptr ++ %v13 = load volatile <16 x i8> *%ptr ++ %v14 = load volatile <16 x i8> *%ptr ++ %v15 = load volatile <16 x i8> *%ptr ++ %v16 = load volatile <16 x i8> *%ptr ++ %v17 = load volatile <16 x i8> *%ptr ++ %v18 = load volatile <16 x i8> *%ptr ++ %v19 = load volatile <16 x i8> *%ptr ++ %v20 = load volatile <16 x i8> *%ptr ++ %v21 = load volatile <16 x i8> *%ptr ++ %v22 = load volatile <16 x i8> *%ptr ++ %v23 = load volatile <16 x i8> *%ptr ++ %v24 = load volatile <16 x i8> *%ptr ++ %v25 = load volatile <16 x i8> *%ptr ++ %v26 = load volatile <16 x i8> *%ptr ++ %v27 = load volatile <16 x i8> *%ptr ++ %v28 = load volatile <16 x i8> *%ptr ++ %v29 = load volatile <16 x i8> *%ptr ++ %v30 = load volatile <16 x i8> *%ptr ++ %v31 = load volatile <16 x i8> *%ptr ++ store volatile <16 x i8> %v31, <16 x i8> *%ptr ++ store volatile <16 x i8> %v30, <16 x i8> *%ptr ++ store volatile <16 x i8> %v29, <16 x i8> *%ptr ++ store volatile <16 x i8> %v28, <16 x i8> *%ptr ++ store volatile <16 x i8> %v27, <16 x i8> *%ptr ++ store volatile <16 x i8> %v26, <16 x i8> *%ptr ++ store volatile <16 x i8> %v25, <16 x i8> *%ptr ++ store volatile <16 x i8> %v24, <16 x i8> *%ptr ++ store volatile <16 x i8> %v23, <16 x i8> *%ptr ++ store volatile <16 x i8> %v22, <16 x i8> *%ptr ++ store volatile <16 x i8> %v21, <16 x i8> *%ptr ++ store volatile <16 x i8> %v20, <16 x i8> *%ptr ++ store volatile <16 x i8> %v19, <16 x i8> *%ptr ++ store volatile <16 x i8> %v18, <16 x i8> *%ptr ++ store volatile <16 x i8> %v17, <16 x i8> *%ptr ++ store volatile <16 x i8> %v16, <16 x i8> *%ptr ++ store volatile <16 x i8> %v15, <16 x i8> *%ptr ++ store volatile <16 x i8> %v14, <16 x i8> *%ptr ++ store volatile <16 x i8> %v13, <16 x i8> *%ptr ++ store volatile <16 x i8> %v12, <16 x i8> *%ptr ++ store volatile <16 x i8> %v11, <16 x i8> *%ptr ++ store volatile <16 x i8> %v10, <16 x i8> *%ptr ++ store volatile <16 x i8> %v9, <16 x i8> *%ptr ++ store volatile <16 x i8> %v8, <16 x i8> *%ptr ++ store volatile <16 x i8> %v7, <16 x i8> *%ptr ++ store volatile <16 x i8> %v6, <16 x i8> *%ptr ++ store volatile <16 x i8> %v5, <16 x i8> *%ptr ++ store volatile <16 x i8> %v4, <16 x i8> *%ptr ++ store volatile <16 x i8> %v3, <16 x i8> *%ptr ++ store volatile <16 x i8> %v2, <16 x i8> *%ptr ++ store volatile <16 x i8> %v1, <16 x i8> *%ptr ++ store volatile <16 x i8> %v0, <16 x i8> *%ptr ++ ret void ++} ++ ++; Like f2, but only %f8 should be saved. ++define void @f3(<16 x i8> *%ptr) { ++; CHECK-LABEL: f3: ++; CHECK: aghi %r15, -168 ++; CHECK-DAG: std %f8, ++; CHECK-NOT: vst {{.*}}(%r15) ++; CHECK-NOT: vl {{.*}}(%r15) ++; CHECK-NOT: %v9 ++; CHECK-NOT: %v10 ++; CHECK-NOT: %v11 ++; CHECK-NOT: %v12 ++; CHECK-NOT: %v13 ++; CHECK-NOT: %v14 ++; CHECK-NOT: %v15 ++; CHECK-DAG: ld %f8, ++; CHECK: aghi %r15, 168 ++; CHECK: br %r14 ++ %v0 = load volatile <16 x i8> *%ptr ++ %v1 = load volatile <16 x i8> *%ptr ++ %v2 = load volatile <16 x i8> *%ptr ++ %v3 = load volatile <16 x i8> *%ptr ++ %v4 = load volatile <16 x i8> *%ptr ++ %v5 = load volatile <16 x i8> *%ptr ++ %v6 = load volatile <16 x i8> *%ptr ++ %v7 = load volatile <16 x i8> *%ptr ++ %v8 = load volatile <16 x i8> *%ptr ++ %v16 = load volatile <16 x i8> *%ptr ++ %v17 = load volatile <16 x i8> *%ptr ++ %v18 = load volatile <16 x i8> *%ptr ++ %v19 = load volatile <16 x i8> *%ptr ++ %v20 = load volatile <16 x i8> *%ptr ++ %v21 = load volatile <16 x i8> *%ptr ++ %v22 = load volatile <16 x i8> *%ptr ++ %v23 = load volatile <16 x i8> *%ptr ++ %v24 = load volatile <16 x i8> *%ptr ++ %v25 = load volatile <16 x i8> *%ptr ++ %v26 = load volatile <16 x i8> *%ptr ++ %v27 = load volatile <16 x i8> *%ptr ++ %v28 = load volatile <16 x i8> *%ptr ++ %v29 = load volatile <16 x i8> *%ptr ++ %v30 = load volatile <16 x i8> *%ptr ++ %v31 = load volatile <16 x i8> *%ptr ++ store volatile <16 x i8> %v31, <16 x i8> *%ptr ++ store volatile <16 x i8> %v30, <16 x i8> *%ptr ++ store volatile <16 x i8> %v29, <16 x i8> *%ptr ++ store volatile <16 x i8> %v28, <16 x i8> *%ptr ++ store volatile <16 x i8> %v27, <16 x i8> *%ptr ++ store volatile <16 x i8> %v26, <16 x i8> *%ptr ++ store volatile <16 x i8> %v25, <16 x i8> *%ptr ++ store volatile <16 x i8> %v24, <16 x i8> *%ptr ++ store volatile <16 x i8> %v23, <16 x i8> *%ptr ++ store volatile <16 x i8> %v22, <16 x i8> *%ptr ++ store volatile <16 x i8> %v21, <16 x i8> *%ptr ++ store volatile <16 x i8> %v20, <16 x i8> *%ptr ++ store volatile <16 x i8> %v19, <16 x i8> *%ptr ++ store volatile <16 x i8> %v18, <16 x i8> *%ptr ++ store volatile <16 x i8> %v17, <16 x i8> *%ptr ++ store volatile <16 x i8> %v16, <16 x i8> *%ptr ++ store volatile <16 x i8> %v8, <16 x i8> *%ptr ++ store volatile <16 x i8> %v7, <16 x i8> *%ptr ++ store volatile <16 x i8> %v6, <16 x i8> *%ptr ++ store volatile <16 x i8> %v5, <16 x i8> *%ptr ++ store volatile <16 x i8> %v4, <16 x i8> *%ptr ++ store volatile <16 x i8> %v3, <16 x i8> *%ptr ++ store volatile <16 x i8> %v2, <16 x i8> *%ptr ++ store volatile <16 x i8> %v1, <16 x i8> *%ptr ++ store volatile <16 x i8> %v0, <16 x i8> *%ptr ++ ret void ++} ++ ++; Like f2, but no registers should be saved. ++define void @f4(<16 x i8> *%ptr) { ++; CHECK-LABEL: f4: ++; CHECK-NOT: %r15 ++; CHECK: br %r14 ++ %v0 = load volatile <16 x i8> *%ptr ++ %v1 = load volatile <16 x i8> *%ptr ++ %v2 = load volatile <16 x i8> *%ptr ++ %v3 = load volatile <16 x i8> *%ptr ++ %v4 = load volatile <16 x i8> *%ptr ++ %v5 = load volatile <16 x i8> *%ptr ++ %v6 = load volatile <16 x i8> *%ptr ++ %v7 = load volatile <16 x i8> *%ptr ++ %v16 = load volatile <16 x i8> *%ptr ++ %v17 = load volatile <16 x i8> *%ptr ++ %v18 = load volatile <16 x i8> *%ptr ++ %v19 = load volatile <16 x i8> *%ptr ++ %v20 = load volatile <16 x i8> *%ptr ++ %v21 = load volatile <16 x i8> *%ptr ++ %v22 = load volatile <16 x i8> *%ptr ++ %v23 = load volatile <16 x i8> *%ptr ++ %v24 = load volatile <16 x i8> *%ptr ++ %v25 = load volatile <16 x i8> *%ptr ++ %v26 = load volatile <16 x i8> *%ptr ++ %v27 = load volatile <16 x i8> *%ptr ++ %v28 = load volatile <16 x i8> *%ptr ++ %v29 = load volatile <16 x i8> *%ptr ++ %v30 = load volatile <16 x i8> *%ptr ++ %v31 = load volatile <16 x i8> *%ptr ++ store volatile <16 x i8> %v31, <16 x i8> *%ptr ++ store volatile <16 x i8> %v30, <16 x i8> *%ptr ++ store volatile <16 x i8> %v29, <16 x i8> *%ptr ++ store volatile <16 x i8> %v28, <16 x i8> *%ptr ++ store volatile <16 x i8> %v27, <16 x i8> *%ptr ++ store volatile <16 x i8> %v26, <16 x i8> *%ptr ++ store volatile <16 x i8> %v25, <16 x i8> *%ptr ++ store volatile <16 x i8> %v24, <16 x i8> *%ptr ++ store volatile <16 x i8> %v23, <16 x i8> *%ptr ++ store volatile <16 x i8> %v22, <16 x i8> *%ptr ++ store volatile <16 x i8> %v21, <16 x i8> *%ptr ++ store volatile <16 x i8> %v20, <16 x i8> *%ptr ++ store volatile <16 x i8> %v19, <16 x i8> *%ptr ++ store volatile <16 x i8> %v18, <16 x i8> *%ptr ++ store volatile <16 x i8> %v17, <16 x i8> *%ptr ++ store volatile <16 x i8> %v16, <16 x i8> *%ptr ++ store volatile <16 x i8> %v7, <16 x i8> *%ptr ++ store volatile <16 x i8> %v6, <16 x i8> *%ptr ++ store volatile <16 x i8> %v5, <16 x i8> *%ptr ++ store volatile <16 x i8> %v4, <16 x i8> *%ptr ++ store volatile <16 x i8> %v3, <16 x i8> *%ptr ++ store volatile <16 x i8> %v2, <16 x i8> *%ptr ++ store volatile <16 x i8> %v1, <16 x i8> *%ptr ++ store volatile <16 x i8> %v0, <16 x i8> *%ptr ++ ret void ++} +Index: llvm-36/test/CodeGen/SystemZ/frame-20.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/frame-20.ll +@@ -0,0 +1,445 @@ ++; Like frame-03.ll, but for z13. In this case we have 16 more registers ++; available. ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ++ ++; This function should require all FPRs, but no other spill slots. ++; We need to save and restore 8 of the 16 FPRs, so the frame size ++; should be exactly 160 + 8 * 8 = 224. The CFA offset is 160 ++; (the caller-allocated part of the frame) + 224. ++define void @f1(double *%ptr) { ++; CHECK-LABEL: f1: ++; CHECK: aghi %r15, -224 ++; CHECK: .cfi_def_cfa_offset 384 ++; CHECK: std %f8, 216(%r15) ++; CHECK: std %f9, 208(%r15) ++; CHECK: std %f10, 200(%r15) ++; CHECK: std %f11, 192(%r15) ++; CHECK: std %f12, 184(%r15) ++; CHECK: std %f13, 176(%r15) ++; CHECK: std %f14, 168(%r15) ++; CHECK: std %f15, 160(%r15) ++; CHECK: .cfi_offset %f8, -168 ++; CHECK: .cfi_offset %f9, -176 ++; CHECK: .cfi_offset %f10, -184 ++; CHECK: .cfi_offset %f11, -192 ++; CHECK: .cfi_offset %f12, -200 ++; CHECK: .cfi_offset %f13, -208 ++; CHECK: .cfi_offset %f14, -216 ++; CHECK: .cfi_offset %f15, -224 ++; CHECK-DAG: ld %f0, 0(%r2) ++; CHECK-DAG: ld %f7, 0(%r2) ++; CHECK-DAG: ld %f8, 0(%r2) ++; CHECK-DAG: ld %f15, 0(%r2) ++; CHECK-DAG: vlrepg %v16, 0(%r2) ++; CHECK-DAG: vlrepg %v23, 0(%r2) ++; CHECK-DAG: vlrepg %v24, 0(%r2) ++; CHECK-DAG: vlrepg %v31, 0(%r2) ++; CHECK: ld %f8, 216(%r15) ++; CHECK: ld %f9, 208(%r15) ++; CHECK: ld %f10, 200(%r15) ++; CHECK: ld %f11, 192(%r15) ++; CHECK: ld %f12, 184(%r15) ++; CHECK: ld %f13, 176(%r15) ++; CHECK: ld %f14, 168(%r15) ++; CHECK: ld %f15, 160(%r15) ++; CHECK: aghi %r15, 224 ++; CHECK: br %r14 ++ %l0 = load volatile double *%ptr ++ %l1 = load volatile double *%ptr ++ %l2 = load volatile double *%ptr ++ %l3 = load volatile double *%ptr ++ %l4 = load volatile double *%ptr ++ %l5 = load volatile double *%ptr ++ %l6 = load volatile double *%ptr ++ %l7 = load volatile double *%ptr ++ %l8 = load volatile double *%ptr ++ %l9 = load volatile double *%ptr ++ %l10 = load volatile double *%ptr ++ %l11 = load volatile double *%ptr ++ %l12 = load volatile double *%ptr ++ %l13 = load volatile double *%ptr ++ %l14 = load volatile double *%ptr ++ %l15 = load volatile double *%ptr ++ %l16 = load volatile double *%ptr ++ %l17 = load volatile double *%ptr ++ %l18 = load volatile double *%ptr ++ %l19 = load volatile double *%ptr ++ %l20 = load volatile double *%ptr ++ %l21 = load volatile double *%ptr ++ %l22 = load volatile double *%ptr ++ %l23 = load volatile double *%ptr ++ %l24 = load volatile double *%ptr ++ %l25 = load volatile double *%ptr ++ %l26 = load volatile double *%ptr ++ %l27 = load volatile double *%ptr ++ %l28 = load volatile double *%ptr ++ %l29 = load volatile double *%ptr ++ %l30 = load volatile double *%ptr ++ %l31 = load volatile double *%ptr ++ %acc0 = fsub double %l0, %l0 ++ %acc1 = fsub double %l1, %acc0 ++ %acc2 = fsub double %l2, %acc1 ++ %acc3 = fsub double %l3, %acc2 ++ %acc4 = fsub double %l4, %acc3 ++ %acc5 = fsub double %l5, %acc4 ++ %acc6 = fsub double %l6, %acc5 ++ %acc7 = fsub double %l7, %acc6 ++ %acc8 = fsub double %l8, %acc7 ++ %acc9 = fsub double %l9, %acc8 ++ %acc10 = fsub double %l10, %acc9 ++ %acc11 = fsub double %l11, %acc10 ++ %acc12 = fsub double %l12, %acc11 ++ %acc13 = fsub double %l13, %acc12 ++ %acc14 = fsub double %l14, %acc13 ++ %acc15 = fsub double %l15, %acc14 ++ %acc16 = fsub double %l16, %acc15 ++ %acc17 = fsub double %l17, %acc16 ++ %acc18 = fsub double %l18, %acc17 ++ %acc19 = fsub double %l19, %acc18 ++ %acc20 = fsub double %l20, %acc19 ++ %acc21 = fsub double %l21, %acc20 ++ %acc22 = fsub double %l22, %acc21 ++ %acc23 = fsub double %l23, %acc22 ++ %acc24 = fsub double %l24, %acc23 ++ %acc25 = fsub double %l25, %acc24 ++ %acc26 = fsub double %l26, %acc25 ++ %acc27 = fsub double %l27, %acc26 ++ %acc28 = fsub double %l28, %acc27 ++ %acc29 = fsub double %l29, %acc28 ++ %acc30 = fsub double %l30, %acc29 ++ %acc31 = fsub double %l31, %acc30 ++ store volatile double %acc0, double *%ptr ++ store volatile double %acc1, double *%ptr ++ store volatile double %acc2, double *%ptr ++ store volatile double %acc3, double *%ptr ++ store volatile double %acc4, double *%ptr ++ store volatile double %acc5, double *%ptr ++ store volatile double %acc6, double *%ptr ++ store volatile double %acc7, double *%ptr ++ store volatile double %acc8, double *%ptr ++ store volatile double %acc9, double *%ptr ++ store volatile double %acc10, double *%ptr ++ store volatile double %acc11, double *%ptr ++ store volatile double %acc12, double *%ptr ++ store volatile double %acc13, double *%ptr ++ store volatile double %acc14, double *%ptr ++ store volatile double %acc15, double *%ptr ++ store volatile double %acc16, double *%ptr ++ store volatile double %acc17, double *%ptr ++ store volatile double %acc18, double *%ptr ++ store volatile double %acc19, double *%ptr ++ store volatile double %acc20, double *%ptr ++ store volatile double %acc21, double *%ptr ++ store volatile double %acc22, double *%ptr ++ store volatile double %acc23, double *%ptr ++ store volatile double %acc24, double *%ptr ++ store volatile double %acc25, double *%ptr ++ store volatile double %acc26, double *%ptr ++ store volatile double %acc27, double *%ptr ++ store volatile double %acc28, double *%ptr ++ store volatile double %acc29, double *%ptr ++ store volatile double %acc30, double *%ptr ++ store volatile double %acc31, double *%ptr ++ ret void ++} ++ ++; Like f1, but requires one fewer FPR. We allocate in numerical order, ++; so %f15 is the one that gets dropped. ++define void @f2(double *%ptr) { ++; CHECK-LABEL: f2: ++; CHECK: aghi %r15, -216 ++; CHECK: .cfi_def_cfa_offset 376 ++; CHECK: std %f8, 208(%r15) ++; CHECK: std %f9, 200(%r15) ++; CHECK: std %f10, 192(%r15) ++; CHECK: std %f11, 184(%r15) ++; CHECK: std %f12, 176(%r15) ++; CHECK: std %f13, 168(%r15) ++; CHECK: std %f14, 160(%r15) ++; CHECK: .cfi_offset %f8, -168 ++; CHECK: .cfi_offset %f9, -176 ++; CHECK: .cfi_offset %f10, -184 ++; CHECK: .cfi_offset %f11, -192 ++; CHECK: .cfi_offset %f12, -200 ++; CHECK: .cfi_offset %f13, -208 ++; CHECK: .cfi_offset %f14, -216 ++; CHECK-NOT: %v15 ++; CHECK-NOT: %f15 ++; CHECK: ld %f8, 208(%r15) ++; CHECK: ld %f9, 200(%r15) ++; CHECK: ld %f10, 192(%r15) ++; CHECK: ld %f11, 184(%r15) ++; CHECK: ld %f12, 176(%r15) ++; CHECK: ld %f13, 168(%r15) ++; CHECK: ld %f14, 160(%r15) ++; CHECK: aghi %r15, 216 ++; CHECK: br %r14 ++ %l0 = load volatile double *%ptr ++ %l1 = load volatile double *%ptr ++ %l2 = load volatile double *%ptr ++ %l3 = load volatile double *%ptr ++ %l4 = load volatile double *%ptr ++ %l5 = load volatile double *%ptr ++ %l6 = load volatile double *%ptr ++ %l7 = load volatile double *%ptr ++ %l8 = load volatile double *%ptr ++ %l9 = load volatile double *%ptr ++ %l10 = load volatile double *%ptr ++ %l11 = load volatile double *%ptr ++ %l12 = load volatile double *%ptr ++ %l13 = load volatile double *%ptr ++ %l14 = load volatile double *%ptr ++ %l16 = load volatile double *%ptr ++ %l17 = load volatile double *%ptr ++ %l18 = load volatile double *%ptr ++ %l19 = load volatile double *%ptr ++ %l20 = load volatile double *%ptr ++ %l21 = load volatile double *%ptr ++ %l22 = load volatile double *%ptr ++ %l23 = load volatile double *%ptr ++ %l24 = load volatile double *%ptr ++ %l25 = load volatile double *%ptr ++ %l26 = load volatile double *%ptr ++ %l27 = load volatile double *%ptr ++ %l28 = load volatile double *%ptr ++ %l29 = load volatile double *%ptr ++ %l30 = load volatile double *%ptr ++ %l31 = load volatile double *%ptr ++ %acc0 = fsub double %l0, %l0 ++ %acc1 = fsub double %l1, %acc0 ++ %acc2 = fsub double %l2, %acc1 ++ %acc3 = fsub double %l3, %acc2 ++ %acc4 = fsub double %l4, %acc3 ++ %acc5 = fsub double %l5, %acc4 ++ %acc6 = fsub double %l6, %acc5 ++ %acc7 = fsub double %l7, %acc6 ++ %acc8 = fsub double %l8, %acc7 ++ %acc9 = fsub double %l9, %acc8 ++ %acc10 = fsub double %l10, %acc9 ++ %acc11 = fsub double %l11, %acc10 ++ %acc12 = fsub double %l12, %acc11 ++ %acc13 = fsub double %l13, %acc12 ++ %acc14 = fsub double %l14, %acc13 ++ %acc16 = fsub double %l16, %acc14 ++ %acc17 = fsub double %l17, %acc16 ++ %acc18 = fsub double %l18, %acc17 ++ %acc19 = fsub double %l19, %acc18 ++ %acc20 = fsub double %l20, %acc19 ++ %acc21 = fsub double %l21, %acc20 ++ %acc22 = fsub double %l22, %acc21 ++ %acc23 = fsub double %l23, %acc22 ++ %acc24 = fsub double %l24, %acc23 ++ %acc25 = fsub double %l25, %acc24 ++ %acc26 = fsub double %l26, %acc25 ++ %acc27 = fsub double %l27, %acc26 ++ %acc28 = fsub double %l28, %acc27 ++ %acc29 = fsub double %l29, %acc28 ++ %acc30 = fsub double %l30, %acc29 ++ %acc31 = fsub double %l31, %acc30 ++ store volatile double %acc0, double *%ptr ++ store volatile double %acc1, double *%ptr ++ store volatile double %acc2, double *%ptr ++ store volatile double %acc3, double *%ptr ++ store volatile double %acc4, double *%ptr ++ store volatile double %acc5, double *%ptr ++ store volatile double %acc6, double *%ptr ++ store volatile double %acc7, double *%ptr ++ store volatile double %acc8, double *%ptr ++ store volatile double %acc9, double *%ptr ++ store volatile double %acc10, double *%ptr ++ store volatile double %acc11, double *%ptr ++ store volatile double %acc12, double *%ptr ++ store volatile double %acc13, double *%ptr ++ store volatile double %acc14, double *%ptr ++ store volatile double %acc16, double *%ptr ++ store volatile double %acc17, double *%ptr ++ store volatile double %acc18, double *%ptr ++ store volatile double %acc19, double *%ptr ++ store volatile double %acc20, double *%ptr ++ store volatile double %acc21, double *%ptr ++ store volatile double %acc22, double *%ptr ++ store volatile double %acc23, double *%ptr ++ store volatile double %acc24, double *%ptr ++ store volatile double %acc25, double *%ptr ++ store volatile double %acc26, double *%ptr ++ store volatile double %acc27, double *%ptr ++ store volatile double %acc28, double *%ptr ++ store volatile double %acc29, double *%ptr ++ store volatile double %acc30, double *%ptr ++ store volatile double %acc31, double *%ptr ++ ret void ++} ++ ++; Like f1, but should require only one call-saved FPR. ++define void @f3(double *%ptr) { ++; CHECK-LABEL: f3: ++; CHECK: aghi %r15, -168 ++; CHECK: .cfi_def_cfa_offset 328 ++; CHECK: std %f8, 160(%r15) ++; CHECK: .cfi_offset %f8, -168 ++; CHECK-NOT: {{%[fv]9}} ++; CHECK-NOT: {{%[fv]1[0-5]}} ++; CHECK: ld %f8, 160(%r15) ++; CHECK: aghi %r15, 168 ++; CHECK: br %r14 ++ %l0 = load volatile double *%ptr ++ %l1 = load volatile double *%ptr ++ %l2 = load volatile double *%ptr ++ %l3 = load volatile double *%ptr ++ %l4 = load volatile double *%ptr ++ %l5 = load volatile double *%ptr ++ %l6 = load volatile double *%ptr ++ %l7 = load volatile double *%ptr ++ %l8 = load volatile double *%ptr ++ %l16 = load volatile double *%ptr ++ %l17 = load volatile double *%ptr ++ %l18 = load volatile double *%ptr ++ %l19 = load volatile double *%ptr ++ %l20 = load volatile double *%ptr ++ %l21 = load volatile double *%ptr ++ %l22 = load volatile double *%ptr ++ %l23 = load volatile double *%ptr ++ %l24 = load volatile double *%ptr ++ %l25 = load volatile double *%ptr ++ %l26 = load volatile double *%ptr ++ %l27 = load volatile double *%ptr ++ %l28 = load volatile double *%ptr ++ %l29 = load volatile double *%ptr ++ %l30 = load volatile double *%ptr ++ %l31 = load volatile double *%ptr ++ %acc0 = fsub double %l0, %l0 ++ %acc1 = fsub double %l1, %acc0 ++ %acc2 = fsub double %l2, %acc1 ++ %acc3 = fsub double %l3, %acc2 ++ %acc4 = fsub double %l4, %acc3 ++ %acc5 = fsub double %l5, %acc4 ++ %acc6 = fsub double %l6, %acc5 ++ %acc7 = fsub double %l7, %acc6 ++ %acc8 = fsub double %l8, %acc7 ++ %acc16 = fsub double %l16, %acc8 ++ %acc17 = fsub double %l17, %acc16 ++ %acc18 = fsub double %l18, %acc17 ++ %acc19 = fsub double %l19, %acc18 ++ %acc20 = fsub double %l20, %acc19 ++ %acc21 = fsub double %l21, %acc20 ++ %acc22 = fsub double %l22, %acc21 ++ %acc23 = fsub double %l23, %acc22 ++ %acc24 = fsub double %l24, %acc23 ++ %acc25 = fsub double %l25, %acc24 ++ %acc26 = fsub double %l26, %acc25 ++ %acc27 = fsub double %l27, %acc26 ++ %acc28 = fsub double %l28, %acc27 ++ %acc29 = fsub double %l29, %acc28 ++ %acc30 = fsub double %l30, %acc29 ++ %acc31 = fsub double %l31, %acc30 ++ store volatile double %acc0, double *%ptr ++ store volatile double %acc1, double *%ptr ++ store volatile double %acc2, double *%ptr ++ store volatile double %acc3, double *%ptr ++ store volatile double %acc4, double *%ptr ++ store volatile double %acc5, double *%ptr ++ store volatile double %acc6, double *%ptr ++ store volatile double %acc7, double *%ptr ++ store volatile double %acc8, double *%ptr ++ store volatile double %acc16, double *%ptr ++ store volatile double %acc17, double *%ptr ++ store volatile double %acc18, double *%ptr ++ store volatile double %acc19, double *%ptr ++ store volatile double %acc20, double *%ptr ++ store volatile double %acc21, double *%ptr ++ store volatile double %acc22, double *%ptr ++ store volatile double %acc23, double *%ptr ++ store volatile double %acc24, double *%ptr ++ store volatile double %acc25, double *%ptr ++ store volatile double %acc26, double *%ptr ++ store volatile double %acc27, double *%ptr ++ store volatile double %acc28, double *%ptr ++ store volatile double %acc29, double *%ptr ++ store volatile double %acc30, double *%ptr ++ store volatile double %acc31, double *%ptr ++ ret void ++} ++ ++; This function should use all call-clobbered FPRs and vector registers ++; but no call-saved ones. It shouldn't need to create a frame. ++define void @f4(double *%ptr) { ++; CHECK-LABEL: f4: ++; CHECK-NOT: %r15 ++; CHECK-NOT: {{%[fv][89]}} ++; CHECK-NOT: {{%[fv]1[0-5]}} ++; CHECK: br %r14 ++ %l0 = load volatile double *%ptr ++ %l1 = load volatile double *%ptr ++ %l2 = load volatile double *%ptr ++ %l3 = load volatile double *%ptr ++ %l4 = load volatile double *%ptr ++ %l5 = load volatile double *%ptr ++ %l6 = load volatile double *%ptr ++ %l7 = load volatile double *%ptr ++ %l16 = load volatile double *%ptr ++ %l17 = load volatile double *%ptr ++ %l18 = load volatile double *%ptr ++ %l19 = load volatile double *%ptr ++ %l20 = load volatile double *%ptr ++ %l21 = load volatile double *%ptr ++ %l22 = load volatile double *%ptr ++ %l23 = load volatile double *%ptr ++ %l24 = load volatile double *%ptr ++ %l25 = load volatile double *%ptr ++ %l26 = load volatile double *%ptr ++ %l27 = load volatile double *%ptr ++ %l28 = load volatile double *%ptr ++ %l29 = load volatile double *%ptr ++ %l30 = load volatile double *%ptr ++ %l31 = load volatile double *%ptr ++ %acc0 = fsub double %l0, %l0 ++ %acc1 = fsub double %l1, %acc0 ++ %acc2 = fsub double %l2, %acc1 ++ %acc3 = fsub double %l3, %acc2 ++ %acc4 = fsub double %l4, %acc3 ++ %acc5 = fsub double %l5, %acc4 ++ %acc6 = fsub double %l6, %acc5 ++ %acc7 = fsub double %l7, %acc6 ++ %acc16 = fsub double %l16, %acc7 ++ %acc17 = fsub double %l17, %acc16 ++ %acc18 = fsub double %l18, %acc17 ++ %acc19 = fsub double %l19, %acc18 ++ %acc20 = fsub double %l20, %acc19 ++ %acc21 = fsub double %l21, %acc20 ++ %acc22 = fsub double %l22, %acc21 ++ %acc23 = fsub double %l23, %acc22 ++ %acc24 = fsub double %l24, %acc23 ++ %acc25 = fsub double %l25, %acc24 ++ %acc26 = fsub double %l26, %acc25 ++ %acc27 = fsub double %l27, %acc26 ++ %acc28 = fsub double %l28, %acc27 ++ %acc29 = fsub double %l29, %acc28 ++ %acc30 = fsub double %l30, %acc29 ++ %acc31 = fsub double %l31, %acc30 ++ store volatile double %acc0, double *%ptr ++ store volatile double %acc1, double *%ptr ++ store volatile double %acc2, double *%ptr ++ store volatile double %acc3, double *%ptr ++ store volatile double %acc4, double *%ptr ++ store volatile double %acc5, double *%ptr ++ store volatile double %acc6, double *%ptr ++ store volatile double %acc7, double *%ptr ++ store volatile double %acc16, double *%ptr ++ store volatile double %acc17, double *%ptr ++ store volatile double %acc18, double *%ptr ++ store volatile double %acc19, double *%ptr ++ store volatile double %acc20, double *%ptr ++ store volatile double %acc21, double *%ptr ++ store volatile double %acc22, double *%ptr ++ store volatile double %acc23, double *%ptr ++ store volatile double %acc24, double *%ptr ++ store volatile double %acc25, double *%ptr ++ store volatile double %acc26, double *%ptr ++ store volatile double %acc27, double *%ptr ++ store volatile double %acc28, double *%ptr ++ store volatile double %acc29, double *%ptr ++ store volatile double %acc30, double *%ptr ++ store volatile double %acc31, double *%ptr ++ ret void ++} +Index: llvm-36/test/CodeGen/SystemZ/htm-intrinsics.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/htm-intrinsics.ll +@@ -0,0 +1,352 @@ ++; Test transactional-execution intrinsics. ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=zEC12 | FileCheck %s ++ ++declare i32 @llvm.s390.tbegin(i8 *, i32) ++declare i32 @llvm.s390.tbegin.nofloat(i8 *, i32) ++declare void @llvm.s390.tbeginc(i8 *, i32) ++declare i32 @llvm.s390.tend() ++declare void @llvm.s390.tabort(i64) ++declare void @llvm.s390.ntstg(i64, i64 *) ++declare i32 @llvm.s390.etnd() ++declare void @llvm.s390.ppa.txassist(i32) ++ ++; TBEGIN. ++define void @test_tbegin() { ++; CHECK-LABEL: test_tbegin: ++; CHECK-NOT: stmg ++; CHECK: std %f8, ++; CHECK: std %f9, ++; CHECK: std %f10, ++; CHECK: std %f11, ++; CHECK: std %f12, ++; CHECK: std %f13, ++; CHECK: std %f14, ++; CHECK: std %f15, ++; CHECK: tbegin 0, 65292 ++; CHECK: ld %f8, ++; CHECK: ld %f9, ++; CHECK: ld %f10, ++; CHECK: ld %f11, ++; CHECK: ld %f12, ++; CHECK: ld %f13, ++; CHECK: ld %f14, ++; CHECK: ld %f15, ++; CHECK: br %r14 ++ call i32 @llvm.s390.tbegin(i8 *null, i32 65292) ++ ret void ++} ++ ++; TBEGIN (nofloat). ++define void @test_tbegin_nofloat1() { ++; CHECK-LABEL: test_tbegin_nofloat1: ++; CHECK-NOT: stmg ++; CHECK-NOT: std ++; CHECK: tbegin 0, 65292 ++; CHECK: br %r14 ++ call i32 @llvm.s390.tbegin.nofloat(i8 *null, i32 65292) ++ ret void ++} ++ ++; TBEGIN (nofloat) with integer CC return value. ++define i32 @test_tbegin_nofloat2() { ++; CHECK-LABEL: test_tbegin_nofloat2: ++; CHECK-NOT: stmg ++; CHECK-NOT: std ++; CHECK: tbegin 0, 65292 ++; CHECK: ipm %r2 ++; CHECK: srl %r2, 28 ++; CHECK: br %r14 ++ %res = call i32 @llvm.s390.tbegin.nofloat(i8 *null, i32 65292) ++ ret i32 %res ++} ++ ++; TBEGIN (nofloat) with implicit CC check. ++define void @test_tbegin_nofloat3(i32 *%ptr) { ++; CHECK-LABEL: test_tbegin_nofloat3: ++; CHECK-NOT: stmg ++; CHECK-NOT: std ++; CHECK: tbegin 0, 65292 ++; CHECK: jnh {{\.L*}} ++; CHECK: mvhi 0(%r2), 0 ++; CHECK: br %r14 ++ %res = call i32 @llvm.s390.tbegin.nofloat(i8 *null, i32 65292) ++ %cmp = icmp eq i32 %res, 2 ++ br i1 %cmp, label %if.then, label %if.end ++ ++if.then: ; preds = %entry ++ store i32 0, i32* %ptr, align 4 ++ br label %if.end ++ ++if.end: ; preds = %if.then, %entry ++ ret void ++} ++ ++; TBEGIN (nofloat) with dual CC use. ++define i32 @test_tbegin_nofloat4(i32 %pad, i32 *%ptr) { ++; CHECK-LABEL: test_tbegin_nofloat4: ++; CHECK-NOT: stmg ++; CHECK-NOT: std ++; CHECK: tbegin 0, 65292 ++; CHECK: ipm %r2 ++; CHECK: srl %r2, 28 ++; CHECK: cijlh %r2, 2, {{\.L*}} ++; CHECK: mvhi 0(%r3), 0 ++; CHECK: br %r14 ++ %res = call i32 @llvm.s390.tbegin.nofloat(i8 *null, i32 65292) ++ %cmp = icmp eq i32 %res, 2 ++ br i1 %cmp, label %if.then, label %if.end ++ ++if.then: ; preds = %entry ++ store i32 0, i32* %ptr, align 4 ++ br label %if.end ++ ++if.end: ; preds = %if.then, %entry ++ ret i32 %res ++} ++ ++; TBEGIN (nofloat) with register. ++define void @test_tbegin_nofloat5(i8 *%ptr) { ++; CHECK-LABEL: test_tbegin_nofloat5: ++; CHECK-NOT: stmg ++; CHECK-NOT: std ++; CHECK: tbegin 0(%r2), 65292 ++; CHECK: br %r14 ++ call i32 @llvm.s390.tbegin.nofloat(i8 *%ptr, i32 65292) ++ ret void ++} ++ ++; TBEGIN (nofloat) with GRSM 0x0f00. ++define void @test_tbegin_nofloat6() { ++; CHECK-LABEL: test_tbegin_nofloat6: ++; CHECK: stmg %r6, %r15, ++; CHECK-NOT: std ++; CHECK: tbegin 0, 3840 ++; CHECK: br %r14 ++ call i32 @llvm.s390.tbegin.nofloat(i8 *null, i32 3840) ++ ret void ++} ++ ++; TBEGIN (nofloat) with GRSM 0xf100. ++define void @test_tbegin_nofloat7() { ++; CHECK-LABEL: test_tbegin_nofloat7: ++; CHECK: stmg %r8, %r15, ++; CHECK-NOT: std ++; CHECK: tbegin 0, 61696 ++; CHECK: br %r14 ++ call i32 @llvm.s390.tbegin.nofloat(i8 *null, i32 61696) ++ ret void ++} ++ ++; TBEGIN (nofloat) with GRSM 0xfe00 -- stack pointer added automatically. ++define void @test_tbegin_nofloat8() { ++; CHECK-LABEL: test_tbegin_nofloat8: ++; CHECK-NOT: stmg ++; CHECK-NOT: std ++; CHECK: tbegin 0, 65280 ++; CHECK: br %r14 ++ call i32 @llvm.s390.tbegin.nofloat(i8 *null, i32 65024) ++ ret void ++} ++ ++; TBEGIN (nofloat) with GRSM 0xfb00 -- no frame pointer needed. ++define void @test_tbegin_nofloat9() { ++; CHECK-LABEL: test_tbegin_nofloat9: ++; CHECK: stmg %r10, %r15, ++; CHECK-NOT: std ++; CHECK: tbegin 0, 64256 ++; CHECK: br %r14 ++ call i32 @llvm.s390.tbegin.nofloat(i8 *null, i32 64256) ++ ret void ++} ++ ++; TBEGIN (nofloat) with GRSM 0xfb00 -- frame pointer added automatically. ++define void @test_tbegin_nofloat10(i64 %n) { ++; CHECK-LABEL: test_tbegin_nofloat10: ++; CHECK: stmg %r11, %r15, ++; CHECK-NOT: std ++; CHECK: tbegin 0, 65280 ++; CHECK: br %r14 ++ %buf = alloca i8, i64 %n ++ call i32 @llvm.s390.tbegin.nofloat(i8 *null, i32 64256) ++ ret void ++} ++ ++; TBEGINC. ++define void @test_tbeginc() { ++; CHECK-LABEL: test_tbeginc: ++; CHECK-NOT: stmg ++; CHECK-NOT: std ++; CHECK: tbeginc 0, 65288 ++; CHECK: br %r14 ++ call void @llvm.s390.tbeginc(i8 *null, i32 65288) ++ ret void ++} ++ ++; TEND with integer CC return value. ++define i32 @test_tend1() { ++; CHECK-LABEL: test_tend1: ++; CHECK: tend ++; CHECK: ipm %r2 ++; CHECK: srl %r2, 28 ++; CHECK: br %r14 ++ %res = call i32 @llvm.s390.tend() ++ ret i32 %res ++} ++ ++; TEND with implicit CC check. ++define void @test_tend3(i32 *%ptr) { ++; CHECK-LABEL: test_tend3: ++; CHECK: tend ++; CHECK: je {{\.L*}} ++; CHECK: mvhi 0(%r2), 0 ++; CHECK: br %r14 ++ %res = call i32 @llvm.s390.tend() ++ %cmp = icmp eq i32 %res, 2 ++ br i1 %cmp, label %if.then, label %if.end ++ ++if.then: ; preds = %entry ++ store i32 0, i32* %ptr, align 4 ++ br label %if.end ++ ++if.end: ; preds = %if.then, %entry ++ ret void ++} ++ ++; TEND with dual CC use. ++define i32 @test_tend2(i32 %pad, i32 *%ptr) { ++; CHECK-LABEL: test_tend2: ++; CHECK: tend ++; CHECK: ipm %r2 ++; CHECK: srl %r2, 28 ++; CHECK: cijlh %r2, 2, {{\.L*}} ++; CHECK: mvhi 0(%r3), 0 ++; CHECK: br %r14 ++ %res = call i32 @llvm.s390.tend() ++ %cmp = icmp eq i32 %res, 2 ++ br i1 %cmp, label %if.then, label %if.end ++ ++if.then: ; preds = %entry ++ store i32 0, i32* %ptr, align 4 ++ br label %if.end ++ ++if.end: ; preds = %if.then, %entry ++ ret i32 %res ++} ++ ++; TABORT with register only. ++define void @test_tabort1(i64 %val) { ++; CHECK-LABEL: test_tabort1: ++; CHECK: tabort 0(%r2) ++; CHECK: br %r14 ++ call void @llvm.s390.tabort(i64 %val) ++ ret void ++} ++ ++; TABORT with immediate only. ++define void @test_tabort2(i64 %val) { ++; CHECK-LABEL: test_tabort2: ++; CHECK: tabort 1234 ++; CHECK: br %r14 ++ call void @llvm.s390.tabort(i64 1234) ++ ret void ++} ++ ++; TABORT with register + immediate. ++define void @test_tabort3(i64 %val) { ++; CHECK-LABEL: test_tabort3: ++; CHECK: tabort 1234(%r2) ++; CHECK: br %r14 ++ %sum = add i64 %val, 1234 ++ call void @llvm.s390.tabort(i64 %sum) ++ ret void ++} ++ ++; TABORT with out-of-range immediate. ++define void @test_tabort4(i64 %val) { ++; CHECK-LABEL: test_tabort4: ++; CHECK: tabort 0({{%r[1-5]}}) ++; CHECK: br %r14 ++ call void @llvm.s390.tabort(i64 4096) ++ ret void ++} ++ ++; NTSTG with base pointer only. ++define void @test_ntstg1(i64 *%ptr, i64 %val) { ++; CHECK-LABEL: test_ntstg1: ++; CHECK: ntstg %r3, 0(%r2) ++; CHECK: br %r14 ++ call void @llvm.s390.ntstg(i64 %val, i64 *%ptr) ++ ret void ++} ++ ++; NTSTG with base and index. ++; Check that VSTL doesn't allow an index. ++define void @test_ntstg2(i64 *%base, i64 %index, i64 %val) { ++; CHECK-LABEL: test_ntstg2: ++; CHECK: sllg [[REG:%r[1-5]]], %r3, 3 ++; CHECK: ntstg %r4, 0([[REG]],%r2) ++; CHECK: br %r14 ++ %ptr = getelementptr i64 *%base, i64 %index ++ call void @llvm.s390.ntstg(i64 %val, i64 *%ptr) ++ ret void ++} ++ ++; NTSTG with the highest in-range displacement. ++define void @test_ntstg3(i64 *%base, i64 %val) { ++; CHECK-LABEL: test_ntstg3: ++; CHECK: ntstg %r3, 524280(%r2) ++; CHECK: br %r14 ++ %ptr = getelementptr i64 *%base, i64 65535 ++ call void @llvm.s390.ntstg(i64 %val, i64 *%ptr) ++ ret void ++} ++ ++; NTSTG with an out-of-range positive displacement. ++define void @test_ntstg4(i64 *%base, i64 %val) { ++; CHECK-LABEL: test_ntstg4: ++; CHECK: ntstg %r3, 0({{%r[1-5]}}) ++; CHECK: br %r14 ++ %ptr = getelementptr i64 *%base, i64 65536 ++ call void @llvm.s390.ntstg(i64 %val, i64 *%ptr) ++ ret void ++} ++ ++; NTSTG with the lowest in-range displacement. ++define void @test_ntstg5(i64 *%base, i64 %val) { ++; CHECK-LABEL: test_ntstg5: ++; CHECK: ntstg %r3, -524288(%r2) ++; CHECK: br %r14 ++ %ptr = getelementptr i64 *%base, i64 -65536 ++ call void @llvm.s390.ntstg(i64 %val, i64 *%ptr) ++ ret void ++} ++ ++; NTSTG with an out-of-range negative displacement. ++define void @test_ntstg6(i64 *%base, i64 %val) { ++; CHECK-LABEL: test_ntstg6: ++; CHECK: ntstg %r3, 0({{%r[1-5]}}) ++; CHECK: br %r14 ++ %ptr = getelementptr i64 *%base, i64 -65537 ++ call void @llvm.s390.ntstg(i64 %val, i64 *%ptr) ++ ret void ++} ++ ++; ETND. ++define i32 @test_etnd() { ++; CHECK-LABEL: test_etnd: ++; CHECK: etnd %r2 ++; CHECK: br %r14 ++ %res = call i32 @llvm.s390.etnd() ++ ret i32 %res ++} ++ ++; PPA (Transaction-Abort Assist) ++define void @test_ppa_txassist(i32 %val) { ++; CHECK-LABEL: test_ppa_txassist: ++; CHECK: ppa %r2, 0, 1 ++; CHECK: br %r14 ++ call void @llvm.s390.ppa.txassist(i32 %val) ++ ret void ++} ++ +Index: llvm-36/test/CodeGen/SystemZ/int-cmp-12.ll +=================================================================== +--- llvm-36.orig/test/CodeGen/SystemZ/int-cmp-12.ll ++++ llvm-36/test/CodeGen/SystemZ/int-cmp-12.ll +@@ -49,13 +49,24 @@ define double @f4(double %a, double %b, + ret double %res + } + +-; Check the next value up, which must use a register comparison. ++; Check the next value up, which can use a shifted comparison + define double @f5(double %a, double %b, i64 %i1) { + ; CHECK-LABEL: f5: +-; CHECK: clgrjl %r2, ++; CHECK: srlg [[REG:%r[0-5]]], %r2, 32 ++; CHECK: cgije [[REG]], 0 + ; CHECK: ldr %f0, %f2 + ; CHECK: br %r14 + %cond = icmp ult i64 %i1, 4294967296 + %res = select i1 %cond, double %a, double %b + ret double %res + } ++; Check the next value up, which must use a register comparison. ++define double @f6(double %a, double %b, i64 %i1) { ++; CHECK-LABEL: f6: ++; CHECK: clgrjl %r2, ++; CHECK: ldr %f0, %f2 ++; CHECK: br %r14 ++ %cond = icmp ult i64 %i1, 4294967297 ++ %res = select i1 %cond, double %a, double %b ++ ret double %res ++} +Index: llvm-36/test/CodeGen/SystemZ/int-cmp-47.ll +=================================================================== +--- llvm-36.orig/test/CodeGen/SystemZ/int-cmp-47.ll ++++ llvm-36/test/CodeGen/SystemZ/int-cmp-47.ll +@@ -309,7 +309,8 @@ exit: + define void @f17(i64 %a) { + ; CHECK-LABEL: f17: + ; CHECK-NOT: tmhh +-; CHECK: llihh {{%r[0-5]}}, 49151 ++; CHECK: srlg [[REG:%r[0-5]]], %r2, 48 ++; CHECK: cgfi [[REG]], 49151 + ; CHECK-NOT: tmhh + ; CHECK: br %r14 + entry: +Index: llvm-36/test/CodeGen/SystemZ/int-cmp-50.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/int-cmp-50.ll +@@ -0,0 +1,30 @@ ++; Verify that we do not crash on always-true conditions ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 -O0 ++; ++; This test was compiled using clang -O0 from the following source code: ++; ++; int test(unsigned long x) ++; { ++; return x >= 0 && x <= 15; ++; } ++ ++define signext i32 @test(i64 %x) { ++entry: ++ %x.addr = alloca i64, align 8 ++ store i64 %x, i64* %x.addr, align 8 ++ %0 = load i64 *%x.addr, align 8 ++ %cmp = icmp uge i64 %0, 0 ++ br i1 %cmp, label %land.rhs, label %land.end ++ ++land.rhs: ; preds = %entry ++ %1 = load i64 *%x.addr, align 8 ++ %cmp1 = icmp ule i64 %1, 15 ++ br label %land.end ++ ++land.end: ; preds = %land.rhs, %entry ++ %2 = phi i1 [ false, %entry ], [ %cmp1, %land.rhs ] ++ %land.ext = zext i1 %2 to i32 ++ ret i32 %land.ext ++} ++ +Index: llvm-36/test/CodeGen/SystemZ/risbg-03.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/risbg-03.ll +@@ -0,0 +1,30 @@ ++; Test use of RISBG vs RISBGN on zEC12. ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=zEC12 | FileCheck %s ++ ++; On zEC12, we generally prefer RISBGN. ++define i64 @f1(i64 %a, i64 %b) { ++; CHECK-LABEL: f1: ++; CHECK: risbgn %r2, %r3, 60, 62, 0 ++; CHECK: br %r14 ++ %anda = and i64 %a, -15 ++ %andb = and i64 %b, 14 ++ %or = or i64 %anda, %andb ++ ret i64 %or ++} ++ ++; But we may fall back to RISBG if we can use the condition code. ++define i64 @f2(i64 %a, i64 %b, i32* %c) { ++; CHECK-LABEL: f2: ++; CHECK: risbg %r2, %r3, 60, 62, 0 ++; CHECK-NEXT: ipm ++; CHECK: br %r14 ++ %anda = and i64 %a, -15 ++ %andb = and i64 %b, 14 ++ %or = or i64 %anda, %andb ++ %cmp = icmp sgt i64 %or, 0 ++ %conv = zext i1 %cmp to i32 ++ store i32 %conv, i32* %c, align 4 ++ ret i64 %or ++} ++ +Index: llvm-36/test/CodeGen/SystemZ/tls-01.ll +=================================================================== +--- llvm-36.orig/test/CodeGen/SystemZ/tls-01.ll ++++ llvm-36/test/CodeGen/SystemZ/tls-01.ll +@@ -1,7 +1,7 @@ +-; Test initial-exec TLS accesses. ++; Test local-exec TLS accesses. + ; +-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-MAIN +-; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-CP ++; RUN: llc < %s -mcpu=z10 -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-MAIN ++; RUN: llc < %s -mcpu=z10 -mtriple=s390x-linux-gnu | FileCheck %s -check-prefix=CHECK-CP + + @x = thread_local global i32 0 + +Index: llvm-36/test/CodeGen/SystemZ/tls-02.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/tls-02.ll +@@ -0,0 +1,18 @@ ++; Test initial-exec TLS accesses. ++; ++; RUN: llc < %s -mcpu=z10 -mtriple=s390x-linux-gnu -relocation-model=pic | FileCheck %s -check-prefix=CHECK-MAIN ++ ++@x = thread_local(initialexec) global i32 0 ++ ++; The offset must be loaded from the GOT. This TLS access model does ++; not use literal pool constants. ++define i32 *@foo() { ++; CHECK-MAIN-LABEL: foo: ++; CHECK-MAIN: ear [[HIGH:%r[0-5]]], %a0 ++; CHECK-MAIN: sllg %r2, [[HIGH]], 32 ++; CHECK-MAIN: ear %r2, %a1 ++; CHECK-MAIN: larl %r1, x@INDNTPOFF ++; CHECK-MAIN: ag %r2, 0(%r1) ++; CHECK-MAIN: br %r14 ++ ret i32 *@x ++} +Index: llvm-36/test/CodeGen/SystemZ/tls-03.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/tls-03.ll +@@ -0,0 +1,23 @@ ++; Test general-dynamic TLS accesses. ++; ++; RUN: llc < %s -mcpu=z10 -mtriple=s390x-linux-gnu -relocation-model=pic | FileCheck %s -check-prefix=CHECK-MAIN ++; RUN: llc < %s -mcpu=z10 -mtriple=s390x-linux-gnu -relocation-model=pic | FileCheck %s -check-prefix=CHECK-CP ++ ++@x = thread_local global i32 0 ++ ++; Call __tls_get_offset to retrieve the symbol's TLS offset. ++define i32 *@foo() { ++; CHECK-CP: .LCP{{.*}}: ++; CHECK-CP: .quad x@TLSGD ++; ++; CHECK-MAIN-LABEL: foo: ++; CHECK-MAIN-DAG: larl %r12, _GLOBAL_OFFSET_TABLE_ ++; CHECK-MAIN-DAG: lgrl %r2, .LCP{{.*}} ++; CHECK-MAIN: brasl %r14, __tls_get_offset@PLT:tls_gdcall:x ++; CHECK-MAIN: ear [[HIGH:%r[0-5]]], %a0 ++; CHECK-MAIN: sllg [[TP:%r[0-5]]], [[HIGH]], 32 ++; CHECK-MAIN: ear [[TP]], %a1 ++; CHECK-MAIN: agr %r2, [[TP]] ++; CHECK-MAIN: br %r14 ++ ret i32 *@x ++} +Index: llvm-36/test/CodeGen/SystemZ/tls-04.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/tls-04.ll +@@ -0,0 +1,28 @@ ++; Test local-dynamic TLS accesses. ++; ++; RUN: llc < %s -mcpu=z10 -mtriple=s390x-linux-gnu -relocation-model=pic | FileCheck %s -check-prefix=CHECK-MAIN ++; RUN: llc < %s -mcpu=z10 -mtriple=s390x-linux-gnu -relocation-model=pic | FileCheck %s -check-prefix=CHECK-CP ++ ++@x = thread_local(localdynamic) global i32 0 ++ ++; Call __tls_get_offset to retrieve the module's TLS base offset. ++; Add the per-symbol offset and the thread pointer. ++define i32 *@foo() { ++; CHECK-CP: .LCP{{.*}}_0: ++; CHECK-CP: .quad x@TLSLDM ++; CHECK-CP: .LCP{{.*}}_1: ++; CHECK-CP: .quad x@DTPOFF ++; ++; CHECK-MAIN-LABEL: foo: ++; CHECK-MAIN-DAG: larl %r12, _GLOBAL_OFFSET_TABLE_ ++; CHECK-MAIN-DAG: lgrl %r2, .LCP{{.*}}_0 ++; CHECK-MAIN: brasl %r14, __tls_get_offset@PLT:tls_ldcall:x ++; CHECK-MAIN: larl %r1, .LCP{{.*}}_1 ++; CHECK-MAIN: ag %r2, 0(%r1) ++; CHECK-MAIN: ear [[HIGH:%r[0-5]]], %a0 ++; CHECK-MAIN: sllg [[TP:%r[0-5]]], [[HIGH]], 32 ++; CHECK-MAIN: ear [[TP]], %a1 ++; CHECK-MAIN: agr %r2, [[TP]] ++; CHECK-MAIN: br %r14 ++ ret i32 *@x ++} +Index: llvm-36/test/CodeGen/SystemZ/tls-05.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/tls-05.ll +@@ -0,0 +1,15 @@ ++; Test general-dynamic TLS access optimizations. ++; ++; If we access the same TLS variable twice, there should only be ++; a single call to __tls_get_offset. ++; ++; RUN: llc < %s -mcpu=z10 -mtriple=s390x-linux-gnu -relocation-model=pic | grep "__tls_get_offset" | count 1 ++ ++@x = thread_local global i32 0 ++ ++define i32 @foo() { ++ %val = load i32* @x ++ %inc = add nsw i32 %val, 1 ++ store i32 %inc, i32* @x ++ ret i32 %val ++} +Index: llvm-36/test/CodeGen/SystemZ/tls-06.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/tls-06.ll +@@ -0,0 +1,17 @@ ++; Test general-dynamic TLS access optimizations. ++; ++; If we access two different TLS variables, we need two calls to ++; __tls_get_offset, but should load _GLOBAL_OFFSET_TABLE only once. ++; ++; RUN: llc < %s -mcpu=z10 -mtriple=s390x-linux-gnu -relocation-model=pic | grep "__tls_get_offset" | count 2 ++; RUN: llc < %s -mcpu=z10 -mtriple=s390x-linux-gnu -relocation-model=pic | grep "_GLOBAL_OFFSET_TABLE_" | count 1 ++ ++@x = thread_local global i32 0 ++@y = thread_local global i32 0 ++ ++define i32 @foo() { ++ %valx = load i32* @x ++ %valy = load i32* @y ++ %add = add nsw i32 %valx, %valy ++ ret i32 %add ++} +Index: llvm-36/test/CodeGen/SystemZ/tls-07.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/tls-07.ll +@@ -0,0 +1,16 @@ ++; Test local-dynamic TLS access optimizations. ++; ++; If we access two different local-dynamic TLS variables, we only ++; need a single call to __tls_get_offset. ++; ++; RUN: llc < %s -mcpu=z10 -mtriple=s390x-linux-gnu -relocation-model=pic | grep "__tls_get_offset" | count 1 ++ ++@x = thread_local(localdynamic) global i32 0 ++@y = thread_local(localdynamic) global i32 0 ++ ++define i32 @foo() { ++ %valx = load i32* @x ++ %valy = load i32* @y ++ %add = add nsw i32 %valx, %valy ++ ret i32 %add ++} +Index: llvm-36/test/CodeGen/SystemZ/vec-abi-align.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-abi-align.ll +@@ -0,0 +1,49 @@ ++; Verify that we use the vector ABI datalayout if and only if ++; the vector facility is present. ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu | \ ++; RUN: FileCheck -check-prefix=CHECK-NOVECTOR %s ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=generic | \ ++; RUN: FileCheck -check-prefix=CHECK-NOVECTOR %s ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z10 | \ ++; RUN: FileCheck -check-prefix=CHECK-NOVECTOR %s ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z196 | \ ++; RUN: FileCheck -check-prefix=CHECK-NOVECTOR %s ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=zEC12 | \ ++; RUN: FileCheck -check-prefix=CHECK-NOVECTOR %s ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | \ ++; RUN: FileCheck -check-prefix=CHECK-VECTOR %s ++ ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mattr=vector | \ ++; RUN: FileCheck -check-prefix=CHECK-VECTOR %s ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mattr=+vector | \ ++; RUN: FileCheck -check-prefix=CHECK-VECTOR %s ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mattr=-vector,vector | \ ++; RUN: FileCheck -check-prefix=CHECK-VECTOR %s ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mattr=-vector,+vector | \ ++; RUN: FileCheck -check-prefix=CHECK-VECTOR %s ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mattr=-vector | \ ++; RUN: FileCheck -check-prefix=CHECK-NOVECTOR %s ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mattr=vector,-vector | \ ++; RUN: FileCheck -check-prefix=CHECK-NOVECTOR %s ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mattr=+vector,-vector | \ ++; RUN: FileCheck -check-prefix=CHECK-NOVECTOR %s ++ ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 -mattr=-vector | \ ++; RUN: FileCheck -check-prefix=CHECK-NOVECTOR %s ++ ++%struct.S = type { i8, <2 x i64> } ++ ++define void @test(%struct.S* %s) nounwind { ++; CHECK-VECTOR-LABEL: @test ++; CHECK-VECTOR: vl %v0, 8(%r2) ++; CHECK-NOVECTOR-LABEL: @test ++; CHECK-NOVECTOR-DAG: agsi 16(%r2), 1 ++; CHECK-NOVECTOR-DAG: agsi 24(%r2), 1 ++ %ptr = getelementptr %struct.S* %s, i64 0, i32 1 ++ %vec = load <2 x i64>* %ptr ++ %add = add <2 x i64> %vec, ++ store <2 x i64> %add, <2 x i64>* %ptr ++ ret void ++} ++ +Index: llvm-36/test/CodeGen/SystemZ/vec-abs-01.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-abs-01.ll +@@ -0,0 +1,146 @@ ++; Test v16i8 absolute. ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ++ ++; Test with slt. ++define <16 x i8> @f1(<16 x i8> %val) { ++; CHECK-LABEL: f1: ++; CHECK: vlpb %v24, %v24 ++; CHECK: br %r14 ++ %cmp = icmp slt <16 x i8> %val, zeroinitializer ++ %neg = sub <16 x i8> zeroinitializer, %val ++ %ret = select <16 x i1> %cmp, <16 x i8> %neg, <16 x i8> %val ++ ret <16 x i8> %ret ++} ++ ++; Test with sle. ++define <16 x i8> @f2(<16 x i8> %val) { ++; CHECK-LABEL: f2: ++; CHECK: vlpb %v24, %v24 ++; CHECK: br %r14 ++ %cmp = icmp sle <16 x i8> %val, zeroinitializer ++ %neg = sub <16 x i8> zeroinitializer, %val ++ %ret = select <16 x i1> %cmp, <16 x i8> %neg, <16 x i8> %val ++ ret <16 x i8> %ret ++} ++ ++; Test with sgt. ++define <16 x i8> @f3(<16 x i8> %val) { ++; CHECK-LABEL: f3: ++; CHECK: vlpb %v24, %v24 ++; CHECK: br %r14 ++ %cmp = icmp sgt <16 x i8> %val, zeroinitializer ++ %neg = sub <16 x i8> zeroinitializer, %val ++ %ret = select <16 x i1> %cmp, <16 x i8> %val, <16 x i8> %neg ++ ret <16 x i8> %ret ++} ++ ++; Test with sge. ++define <16 x i8> @f4(<16 x i8> %val) { ++; CHECK-LABEL: f4: ++; CHECK: vlpb %v24, %v24 ++; CHECK: br %r14 ++ %cmp = icmp sge <16 x i8> %val, zeroinitializer ++ %neg = sub <16 x i8> zeroinitializer, %val ++ %ret = select <16 x i1> %cmp, <16 x i8> %val, <16 x i8> %neg ++ ret <16 x i8> %ret ++} ++ ++; Test that negative absolute uses VLPB too. There is no vector equivalent ++; of LOAD NEGATIVE. ++define <16 x i8> @f5(<16 x i8> %val) { ++; CHECK-LABEL: f5: ++; CHECK: vlpb [[REG:%v[0-9]+]], %v24 ++; CHECK: vlcb %v24, [[REG]] ++; CHECK: br %r14 ++ %cmp = icmp slt <16 x i8> %val, zeroinitializer ++ %neg = sub <16 x i8> zeroinitializer, %val ++ %abs = select <16 x i1> %cmp, <16 x i8> %neg, <16 x i8> %val ++ %ret = sub <16 x i8> zeroinitializer, %abs ++ ret <16 x i8> %ret ++} ++ ++; Try another form of negative absolute (slt version). ++define <16 x i8> @f6(<16 x i8> %val) { ++; CHECK-LABEL: f6: ++; CHECK: vlpb [[REG:%v[0-9]+]], %v24 ++; CHECK: vlcb %v24, [[REG]] ++; CHECK: br %r14 ++ %cmp = icmp slt <16 x i8> %val, zeroinitializer ++ %neg = sub <16 x i8> zeroinitializer, %val ++ %ret = select <16 x i1> %cmp, <16 x i8> %val, <16 x i8> %neg ++ ret <16 x i8> %ret ++} ++ ++; Test with sle. ++define <16 x i8> @f7(<16 x i8> %val) { ++; CHECK-LABEL: f7: ++; CHECK: vlpb [[REG:%v[0-9]+]], %v24 ++; CHECK: vlcb %v24, [[REG]] ++; CHECK: br %r14 ++ %cmp = icmp sle <16 x i8> %val, zeroinitializer ++ %neg = sub <16 x i8> zeroinitializer, %val ++ %ret = select <16 x i1> %cmp, <16 x i8> %val, <16 x i8> %neg ++ ret <16 x i8> %ret ++} ++ ++; Test with sgt. ++define <16 x i8> @f8(<16 x i8> %val) { ++; CHECK-LABEL: f8: ++; CHECK: vlpb [[REG:%v[0-9]+]], %v24 ++; CHECK: vlcb %v24, [[REG]] ++; CHECK: br %r14 ++ %cmp = icmp sgt <16 x i8> %val, zeroinitializer ++ %neg = sub <16 x i8> zeroinitializer, %val ++ %ret = select <16 x i1> %cmp, <16 x i8> %neg, <16 x i8> %val ++ ret <16 x i8> %ret ++} ++ ++; Test with sge. ++define <16 x i8> @f9(<16 x i8> %val) { ++; CHECK-LABEL: f9: ++; CHECK: vlpb [[REG:%v[0-9]+]], %v24 ++; CHECK: vlcb %v24, [[REG]] ++; CHECK: br %r14 ++ %cmp = icmp sge <16 x i8> %val, zeroinitializer ++ %neg = sub <16 x i8> zeroinitializer, %val ++ %ret = select <16 x i1> %cmp, <16 x i8> %neg, <16 x i8> %val ++ ret <16 x i8> %ret ++} ++ ++; Test with an SRA-based boolean vector. ++define <16 x i8> @f10(<16 x i8> %val) { ++; CHECK-LABEL: f10: ++; CHECK: vlpb %v24, %v24 ++; CHECK: br %r14 ++ %shr = ashr <16 x i8> %val, ++ ++ %neg = sub <16 x i8> zeroinitializer, %val ++ %and1 = and <16 x i8> %shr, %neg ++ %not = xor <16 x i8> %shr, ++ ++ %and2 = and <16 x i8> %not, %val ++ %ret = or <16 x i8> %and1, %and2 ++ ret <16 x i8> %ret ++} ++ ++; ...and again in reverse ++define <16 x i8> @f11(<16 x i8> %val) { ++; CHECK-LABEL: f11: ++; CHECK: vlpb [[REG:%v[0-9]+]], %v24 ++; CHECK: vlcb %v24, [[REG]] ++; CHECK: br %r14 ++ %shr = ashr <16 x i8> %val, ++ ++ %and1 = and <16 x i8> %shr, %val ++ %not = xor <16 x i8> %shr, ++ ++ %neg = sub <16 x i8> zeroinitializer, %val ++ %and2 = and <16 x i8> %not, %neg ++ %ret = or <16 x i8> %and1, %and2 ++ ret <16 x i8> %ret ++} +Index: llvm-36/test/CodeGen/SystemZ/vec-abs-02.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-abs-02.ll +@@ -0,0 +1,142 @@ ++; Test v8i16 absolute. ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ++ ++; Test with slt. ++define <8 x i16> @f1(<8 x i16> %val) { ++; CHECK-LABEL: f1: ++; CHECK: vlph %v24, %v24 ++; CHECK: br %r14 ++ %cmp = icmp slt <8 x i16> %val, zeroinitializer ++ %neg = sub <8 x i16> zeroinitializer, %val ++ %ret = select <8 x i1> %cmp, <8 x i16> %neg, <8 x i16> %val ++ ret <8 x i16> %ret ++} ++ ++; Test with sle. ++define <8 x i16> @f2(<8 x i16> %val) { ++; CHECK-LABEL: f2: ++; CHECK: vlph %v24, %v24 ++; CHECK: br %r14 ++ %cmp = icmp sle <8 x i16> %val, zeroinitializer ++ %neg = sub <8 x i16> zeroinitializer, %val ++ %ret = select <8 x i1> %cmp, <8 x i16> %neg, <8 x i16> %val ++ ret <8 x i16> %ret ++} ++ ++; Test with sgt. ++define <8 x i16> @f3(<8 x i16> %val) { ++; CHECK-LABEL: f3: ++; CHECK: vlph %v24, %v24 ++; CHECK: br %r14 ++ %cmp = icmp sgt <8 x i16> %val, zeroinitializer ++ %neg = sub <8 x i16> zeroinitializer, %val ++ %ret = select <8 x i1> %cmp, <8 x i16> %val, <8 x i16> %neg ++ ret <8 x i16> %ret ++} ++ ++; Test with sge. ++define <8 x i16> @f4(<8 x i16> %val) { ++; CHECK-LABEL: f4: ++; CHECK: vlph %v24, %v24 ++; CHECK: br %r14 ++ %cmp = icmp sge <8 x i16> %val, zeroinitializer ++ %neg = sub <8 x i16> zeroinitializer, %val ++ %ret = select <8 x i1> %cmp, <8 x i16> %val, <8 x i16> %neg ++ ret <8 x i16> %ret ++} ++ ++; Test that negative absolute uses VLPH too. There is no vector equivalent ++; of LOAD NEGATIVE. ++define <8 x i16> @f5(<8 x i16> %val) { ++; CHECK-LABEL: f5: ++; CHECK: vlph [[REG:%v[0-9]+]], %v24 ++; CHECK: vlch %v24, [[REG]] ++; CHECK: br %r14 ++ %cmp = icmp slt <8 x i16> %val, zeroinitializer ++ %neg = sub <8 x i16> zeroinitializer, %val ++ %abs = select <8 x i1> %cmp, <8 x i16> %neg, <8 x i16> %val ++ %ret = sub <8 x i16> zeroinitializer, %abs ++ ret <8 x i16> %ret ++} ++ ++; Try another form of negative absolute (slt version). ++define <8 x i16> @f6(<8 x i16> %val) { ++; CHECK-LABEL: f6: ++; CHECK: vlph [[REG:%v[0-9]+]], %v24 ++; CHECK: vlch %v24, [[REG]] ++; CHECK: br %r14 ++ %cmp = icmp slt <8 x i16> %val, zeroinitializer ++ %neg = sub <8 x i16> zeroinitializer, %val ++ %ret = select <8 x i1> %cmp, <8 x i16> %val, <8 x i16> %neg ++ ret <8 x i16> %ret ++} ++ ++; Test with sle. ++define <8 x i16> @f7(<8 x i16> %val) { ++; CHECK-LABEL: f7: ++; CHECK: vlph [[REG:%v[0-9]+]], %v24 ++; CHECK: vlch %v24, [[REG]] ++; CHECK: br %r14 ++ %cmp = icmp sle <8 x i16> %val, zeroinitializer ++ %neg = sub <8 x i16> zeroinitializer, %val ++ %ret = select <8 x i1> %cmp, <8 x i16> %val, <8 x i16> %neg ++ ret <8 x i16> %ret ++} ++ ++; Test with sgt. ++define <8 x i16> @f8(<8 x i16> %val) { ++; CHECK-LABEL: f8: ++; CHECK: vlph [[REG:%v[0-9]+]], %v24 ++; CHECK: vlch %v24, [[REG]] ++; CHECK: br %r14 ++ %cmp = icmp sgt <8 x i16> %val, zeroinitializer ++ %neg = sub <8 x i16> zeroinitializer, %val ++ %ret = select <8 x i1> %cmp, <8 x i16> %neg, <8 x i16> %val ++ ret <8 x i16> %ret ++} ++ ++; Test with sge. ++define <8 x i16> @f9(<8 x i16> %val) { ++; CHECK-LABEL: f9: ++; CHECK: vlph [[REG:%v[0-9]+]], %v24 ++; CHECK: vlch %v24, [[REG]] ++; CHECK: br %r14 ++ %cmp = icmp sge <8 x i16> %val, zeroinitializer ++ %neg = sub <8 x i16> zeroinitializer, %val ++ %ret = select <8 x i1> %cmp, <8 x i16> %neg, <8 x i16> %val ++ ret <8 x i16> %ret ++} ++ ++; Test with an SRA-based boolean vector. ++define <8 x i16> @f10(<8 x i16> %val) { ++; CHECK-LABEL: f10: ++; CHECK: vlph %v24, %v24 ++; CHECK: br %r14 ++ %shr = ashr <8 x i16> %val, ++ ++ %neg = sub <8 x i16> zeroinitializer, %val ++ %and1 = and <8 x i16> %shr, %neg ++ %not = xor <8 x i16> %shr, ++ ++ %and2 = and <8 x i16> %not, %val ++ %ret = or <8 x i16> %and1, %and2 ++ ret <8 x i16> %ret ++} ++ ++; ...and again in reverse ++define <8 x i16> @f11(<8 x i16> %val) { ++; CHECK-LABEL: f11: ++; CHECK: vlph [[REG:%v[0-9]+]], %v24 ++; CHECK: vlch %v24, [[REG]] ++; CHECK: br %r14 ++ %shr = ashr <8 x i16> %val, ++ ++ %and1 = and <8 x i16> %shr, %val ++ %not = xor <8 x i16> %shr, ++ ++ %neg = sub <8 x i16> zeroinitializer, %val ++ %and2 = and <8 x i16> %not, %neg ++ %ret = or <8 x i16> %and1, %and2 ++ ret <8 x i16> %ret ++} +Index: llvm-36/test/CodeGen/SystemZ/vec-abs-03.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-abs-03.ll +@@ -0,0 +1,138 @@ ++; Test v4i32 absolute. ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ++ ++; Test with slt. ++define <4 x i32> @f1(<4 x i32> %val) { ++; CHECK-LABEL: f1: ++; CHECK: vlpf %v24, %v24 ++; CHECK: br %r14 ++ %cmp = icmp slt <4 x i32> %val, zeroinitializer ++ %neg = sub <4 x i32> zeroinitializer, %val ++ %ret = select <4 x i1> %cmp, <4 x i32> %neg, <4 x i32> %val ++ ret <4 x i32> %ret ++} ++ ++; Test with sle. ++define <4 x i32> @f2(<4 x i32> %val) { ++; CHECK-LABEL: f2: ++; CHECK: vlpf %v24, %v24 ++; CHECK: br %r14 ++ %cmp = icmp sle <4 x i32> %val, zeroinitializer ++ %neg = sub <4 x i32> zeroinitializer, %val ++ %ret = select <4 x i1> %cmp, <4 x i32> %neg, <4 x i32> %val ++ ret <4 x i32> %ret ++} ++ ++; Test with sgt. ++define <4 x i32> @f3(<4 x i32> %val) { ++; CHECK-LABEL: f3: ++; CHECK: vlpf %v24, %v24 ++; CHECK: br %r14 ++ %cmp = icmp sgt <4 x i32> %val, zeroinitializer ++ %neg = sub <4 x i32> zeroinitializer, %val ++ %ret = select <4 x i1> %cmp, <4 x i32> %val, <4 x i32> %neg ++ ret <4 x i32> %ret ++} ++ ++; Test with sge. ++define <4 x i32> @f4(<4 x i32> %val) { ++; CHECK-LABEL: f4: ++; CHECK: vlpf %v24, %v24 ++; CHECK: br %r14 ++ %cmp = icmp sge <4 x i32> %val, zeroinitializer ++ %neg = sub <4 x i32> zeroinitializer, %val ++ %ret = select <4 x i1> %cmp, <4 x i32> %val, <4 x i32> %neg ++ ret <4 x i32> %ret ++} ++ ++; Test that negative absolute uses VLPF too. There is no vector equivalent ++; of LOAD NEGATIVE. ++define <4 x i32> @f5(<4 x i32> %val) { ++; CHECK-LABEL: f5: ++; CHECK: vlpf [[REG:%v[0-9]+]], %v24 ++; CHECK: vlcf %v24, [[REG]] ++; CHECK: br %r14 ++ %cmp = icmp slt <4 x i32> %val, zeroinitializer ++ %neg = sub <4 x i32> zeroinitializer, %val ++ %abs = select <4 x i1> %cmp, <4 x i32> %neg, <4 x i32> %val ++ %ret = sub <4 x i32> zeroinitializer, %abs ++ ret <4 x i32> %ret ++} ++ ++; Try another form of negative absolute (slt version). ++define <4 x i32> @f6(<4 x i32> %val) { ++; CHECK-LABEL: f6: ++; CHECK: vlpf [[REG:%v[0-9]+]], %v24 ++; CHECK: vlcf %v24, [[REG]] ++; CHECK: br %r14 ++ %cmp = icmp slt <4 x i32> %val, zeroinitializer ++ %neg = sub <4 x i32> zeroinitializer, %val ++ %ret = select <4 x i1> %cmp, <4 x i32> %val, <4 x i32> %neg ++ ret <4 x i32> %ret ++} ++ ++; Test with sle. ++define <4 x i32> @f7(<4 x i32> %val) { ++; CHECK-LABEL: f7: ++; CHECK: vlpf [[REG:%v[0-9]+]], %v24 ++; CHECK: vlcf %v24, [[REG]] ++; CHECK: br %r14 ++ %cmp = icmp sle <4 x i32> %val, zeroinitializer ++ %neg = sub <4 x i32> zeroinitializer, %val ++ %ret = select <4 x i1> %cmp, <4 x i32> %val, <4 x i32> %neg ++ ret <4 x i32> %ret ++} ++ ++; Test with sgt. ++define <4 x i32> @f8(<4 x i32> %val) { ++; CHECK-LABEL: f8: ++; CHECK: vlpf [[REG:%v[0-9]+]], %v24 ++; CHECK: vlcf %v24, [[REG]] ++; CHECK: br %r14 ++ %cmp = icmp sgt <4 x i32> %val, zeroinitializer ++ %neg = sub <4 x i32> zeroinitializer, %val ++ %ret = select <4 x i1> %cmp, <4 x i32> %neg, <4 x i32> %val ++ ret <4 x i32> %ret ++} ++ ++; Test with sge. ++define <4 x i32> @f9(<4 x i32> %val) { ++; CHECK-LABEL: f9: ++; CHECK: vlpf [[REG:%v[0-9]+]], %v24 ++; CHECK: vlcf %v24, [[REG]] ++; CHECK: br %r14 ++ %cmp = icmp sge <4 x i32> %val, zeroinitializer ++ %neg = sub <4 x i32> zeroinitializer, %val ++ %ret = select <4 x i1> %cmp, <4 x i32> %neg, <4 x i32> %val ++ ret <4 x i32> %ret ++} ++ ++; Test with an SRA-based boolean vector. ++define <4 x i32> @f10(<4 x i32> %val) { ++; CHECK-LABEL: f10: ++; CHECK: vlpf %v24, %v24 ++; CHECK: br %r14 ++ %shr = ashr <4 x i32> %val, ++ %neg = sub <4 x i32> zeroinitializer, %val ++ %and1 = and <4 x i32> %shr, %neg ++ %not = xor <4 x i32> %shr, ++ %and2 = and <4 x i32> %not, %val ++ %ret = or <4 x i32> %and1, %and2 ++ ret <4 x i32> %ret ++} ++ ++; ...and again in reverse ++define <4 x i32> @f11(<4 x i32> %val) { ++; CHECK-LABEL: f11: ++; CHECK: vlpf [[REG:%v[0-9]+]], %v24 ++; CHECK: vlcf %v24, [[REG]] ++; CHECK: br %r14 ++ %shr = ashr <4 x i32> %val, ++ %and1 = and <4 x i32> %shr, %val ++ %not = xor <4 x i32> %shr, ++ %neg = sub <4 x i32> zeroinitializer, %val ++ %and2 = and <4 x i32> %not, %neg ++ %ret = or <4 x i32> %and1, %and2 ++ ret <4 x i32> %ret ++} +Index: llvm-36/test/CodeGen/SystemZ/vec-abs-04.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-abs-04.ll +@@ -0,0 +1,138 @@ ++; Test v2i64 absolute. ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ++ ++; Test with slt. ++define <2 x i64> @f1(<2 x i64> %val) { ++; CHECK-LABEL: f1: ++; CHECK: vlpg %v24, %v24 ++; CHECK: br %r14 ++ %cmp = icmp slt <2 x i64> %val, zeroinitializer ++ %neg = sub <2 x i64> zeroinitializer, %val ++ %ret = select <2 x i1> %cmp, <2 x i64> %neg, <2 x i64> %val ++ ret <2 x i64> %ret ++} ++ ++; Test with sle. ++define <2 x i64> @f2(<2 x i64> %val) { ++; CHECK-LABEL: f2: ++; CHECK: vlpg %v24, %v24 ++; CHECK: br %r14 ++ %cmp = icmp sle <2 x i64> %val, zeroinitializer ++ %neg = sub <2 x i64> zeroinitializer, %val ++ %ret = select <2 x i1> %cmp, <2 x i64> %neg, <2 x i64> %val ++ ret <2 x i64> %ret ++} ++ ++; Test with sgt. ++define <2 x i64> @f3(<2 x i64> %val) { ++; CHECK-LABEL: f3: ++; CHECK: vlpg %v24, %v24 ++; CHECK: br %r14 ++ %cmp = icmp sgt <2 x i64> %val, zeroinitializer ++ %neg = sub <2 x i64> zeroinitializer, %val ++ %ret = select <2 x i1> %cmp, <2 x i64> %val, <2 x i64> %neg ++ ret <2 x i64> %ret ++} ++ ++; Test with sge. ++define <2 x i64> @f4(<2 x i64> %val) { ++; CHECK-LABEL: f4: ++; CHECK: vlpg %v24, %v24 ++; CHECK: br %r14 ++ %cmp = icmp sge <2 x i64> %val, zeroinitializer ++ %neg = sub <2 x i64> zeroinitializer, %val ++ %ret = select <2 x i1> %cmp, <2 x i64> %val, <2 x i64> %neg ++ ret <2 x i64> %ret ++} ++ ++; Test that negative absolute uses VLPG too. There is no vector equivalent ++; of LOAD NEGATIVE. ++define <2 x i64> @f5(<2 x i64> %val) { ++; CHECK-LABEL: f5: ++; CHECK: vlpg [[REG:%v[0-9]+]], %v24 ++; CHECK: vlcg %v24, [[REG]] ++; CHECK: br %r14 ++ %cmp = icmp slt <2 x i64> %val, zeroinitializer ++ %neg = sub <2 x i64> zeroinitializer, %val ++ %abs = select <2 x i1> %cmp, <2 x i64> %neg, <2 x i64> %val ++ %ret = sub <2 x i64> zeroinitializer, %abs ++ ret <2 x i64> %ret ++} ++ ++; Try another form of negative absolute (slt version). ++define <2 x i64> @f6(<2 x i64> %val) { ++; CHECK-LABEL: f6: ++; CHECK: vlpg [[REG:%v[0-9]+]], %v24 ++; CHECK: vlcg %v24, [[REG]] ++; CHECK: br %r14 ++ %cmp = icmp slt <2 x i64> %val, zeroinitializer ++ %neg = sub <2 x i64> zeroinitializer, %val ++ %ret = select <2 x i1> %cmp, <2 x i64> %val, <2 x i64> %neg ++ ret <2 x i64> %ret ++} ++ ++; Test with sle. ++define <2 x i64> @f7(<2 x i64> %val) { ++; CHECK-LABEL: f7: ++; CHECK: vlpg [[REG:%v[0-9]+]], %v24 ++; CHECK: vlcg %v24, [[REG]] ++; CHECK: br %r14 ++ %cmp = icmp sle <2 x i64> %val, zeroinitializer ++ %neg = sub <2 x i64> zeroinitializer, %val ++ %ret = select <2 x i1> %cmp, <2 x i64> %val, <2 x i64> %neg ++ ret <2 x i64> %ret ++} ++ ++; Test with sgt. ++define <2 x i64> @f8(<2 x i64> %val) { ++; CHECK-LABEL: f8: ++; CHECK: vlpg [[REG:%v[0-9]+]], %v24 ++; CHECK: vlcg %v24, [[REG]] ++; CHECK: br %r14 ++ %cmp = icmp sgt <2 x i64> %val, zeroinitializer ++ %neg = sub <2 x i64> zeroinitializer, %val ++ %ret = select <2 x i1> %cmp, <2 x i64> %neg, <2 x i64> %val ++ ret <2 x i64> %ret ++} ++ ++; Test with sge. ++define <2 x i64> @f9(<2 x i64> %val) { ++; CHECK-LABEL: f9: ++; CHECK: vlpg [[REG:%v[0-9]+]], %v24 ++; CHECK: vlcg %v24, [[REG]] ++; CHECK: br %r14 ++ %cmp = icmp sge <2 x i64> %val, zeroinitializer ++ %neg = sub <2 x i64> zeroinitializer, %val ++ %ret = select <2 x i1> %cmp, <2 x i64> %neg, <2 x i64> %val ++ ret <2 x i64> %ret ++} ++ ++; Test with an SRA-based boolean vector. ++define <2 x i64> @f10(<2 x i64> %val) { ++; CHECK-LABEL: f10: ++; CHECK: vlpg %v24, %v24 ++; CHECK: br %r14 ++ %shr = ashr <2 x i64> %val, ++ %neg = sub <2 x i64> zeroinitializer, %val ++ %and1 = and <2 x i64> %shr, %neg ++ %not = xor <2 x i64> %shr, ++ %and2 = and <2 x i64> %not, %val ++ %ret = or <2 x i64> %and1, %and2 ++ ret <2 x i64> %ret ++} ++ ++; ...and again in reverse ++define <2 x i64> @f11(<2 x i64> %val) { ++; CHECK-LABEL: f11: ++; CHECK: vlpg [[REG:%v[0-9]+]], %v24 ++; CHECK: vlcg %v24, [[REG]] ++; CHECK: br %r14 ++ %shr = ashr <2 x i64> %val, ++ %and1 = and <2 x i64> %shr, %val ++ %not = xor <2 x i64> %shr, ++ %neg = sub <2 x i64> zeroinitializer, %val ++ %and2 = and <2 x i64> %not, %neg ++ %ret = or <2 x i64> %and1, %and2 ++ ret <2 x i64> %ret ++} +Index: llvm-36/test/CodeGen/SystemZ/vec-abs-05.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-abs-05.ll +@@ -0,0 +1,46 @@ ++; Test f64 and v2f64 absolute. ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ++ ++declare double @llvm.fabs.f64(double) ++declare <2 x double> @llvm.fabs.v2f64(<2 x double>) ++ ++; Test a plain absolute. ++define <2 x double> @f1(<2 x double> %val) { ++; CHECK-LABEL: f1: ++; CHECK: vflpdb %v24, %v24 ++; CHECK: br %r14 ++ %ret = call <2 x double> @llvm.fabs.v2f64(<2 x double> %val) ++ ret <2 x double> %ret ++} ++ ++; Test a negative absolute. ++define <2 x double> @f2(<2 x double> %val) { ++; CHECK-LABEL: f2: ++; CHECK: vflndb %v24, %v24 ++; CHECK: br %r14 ++ %abs = call <2 x double> @llvm.fabs.v2f64(<2 x double> %val) ++ %ret = fsub <2 x double> , %abs ++ ret <2 x double> %ret ++} ++ ++; Test an f64 absolute that uses vector registers. ++define double @f3(<2 x double> %val) { ++; CHECK-LABEL: f3: ++; CHECK: wflpdb %f0, %v24 ++; CHECK: br %r14 ++ %scalar = extractelement <2 x double> %val, i32 0 ++ %ret = call double @llvm.fabs.f64(double %scalar) ++ ret double %ret ++} ++ ++; Test an f64 negative absolute that uses vector registers. ++define double @f4(<2 x double> %val) { ++; CHECK-LABEL: f4: ++; CHECK: wflndb %f0, %v24 ++; CHECK: br %r14 ++ %scalar = extractelement <2 x double> %val, i32 0 ++ %abs = call double @llvm.fabs.f64(double %scalar) ++ %ret = fsub double -0.0, %abs ++ ret double %ret ++} +Index: llvm-36/test/CodeGen/SystemZ/vec-add-01.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-add-01.ll +@@ -0,0 +1,60 @@ ++; Test vector addition. ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ++ ++; Test a v16i8 addition. ++define <16 x i8> @f1(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) { ++; CHECK-LABEL: f1: ++; CHECK: vab %v24, %v26, %v28 ++; CHECK: br %r14 ++ %ret = add <16 x i8> %val1, %val2 ++ ret <16 x i8> %ret ++} ++ ++; Test a v8i16 addition. ++define <8 x i16> @f2(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) { ++; CHECK-LABEL: f2: ++; CHECK: vah %v24, %v26, %v28 ++; CHECK: br %r14 ++ %ret = add <8 x i16> %val1, %val2 ++ ret <8 x i16> %ret ++} ++ ++; Test a v4i32 addition. ++define <4 x i32> @f3(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) { ++; CHECK-LABEL: f3: ++; CHECK: vaf %v24, %v26, %v28 ++; CHECK: br %r14 ++ %ret = add <4 x i32> %val1, %val2 ++ ret <4 x i32> %ret ++} ++ ++; Test a v2i64 addition. ++define <2 x i64> @f4(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) { ++; CHECK-LABEL: f4: ++; CHECK: vag %v24, %v26, %v28 ++; CHECK: br %r14 ++ %ret = add <2 x i64> %val1, %val2 ++ ret <2 x i64> %ret ++} ++ ++; Test a v2f64 addition. ++define <2 x double> @f5(<2 x double> %dummy, <2 x double> %val1, ++ <2 x double> %val2) { ++; CHECK-LABEL: f5: ++; CHECK: vfadb %v24, %v26, %v28 ++; CHECK: br %r14 ++ %ret = fadd <2 x double> %val1, %val2 ++ ret <2 x double> %ret ++} ++ ++; Test an f64 addition that uses vector registers. ++define double @f6(<2 x double> %val1, <2 x double> %val2) { ++; CHECK-LABEL: f6: ++; CHECK: wfadb %f0, %v24, %v26 ++; CHECK: br %r14 ++ %scalar1 = extractelement <2 x double> %val1, i32 0 ++ %scalar2 = extractelement <2 x double> %val2, i32 0 ++ %ret = fadd double %scalar1, %scalar2 ++ ret double %ret ++} +Index: llvm-36/test/CodeGen/SystemZ/vec-and-01.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-and-01.ll +@@ -0,0 +1,39 @@ ++; Test vector AND. ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ++ ++; Test a v16i8 AND. ++define <16 x i8> @f1(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) { ++; CHECK-LABEL: f1: ++; CHECK: vn %v24, %v26, %v28 ++; CHECK: br %r14 ++ %ret = and <16 x i8> %val1, %val2 ++ ret <16 x i8> %ret ++} ++ ++; Test a v8i16 AND. ++define <8 x i16> @f2(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) { ++; CHECK-LABEL: f2: ++; CHECK: vn %v24, %v26, %v28 ++; CHECK: br %r14 ++ %ret = and <8 x i16> %val1, %val2 ++ ret <8 x i16> %ret ++} ++ ++; Test a v4i32 AND. ++define <4 x i32> @f3(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) { ++; CHECK-LABEL: f3: ++; CHECK: vn %v24, %v26, %v28 ++; CHECK: br %r14 ++ %ret = and <4 x i32> %val1, %val2 ++ ret <4 x i32> %ret ++} ++ ++; Test a v2i64 AND. ++define <2 x i64> @f4(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) { ++; CHECK-LABEL: f4: ++; CHECK: vn %v24, %v26, %v28 ++; CHECK: br %r14 ++ %ret = and <2 x i64> %val1, %val2 ++ ret <2 x i64> %ret ++} +Index: llvm-36/test/CodeGen/SystemZ/vec-and-02.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-and-02.ll +@@ -0,0 +1,91 @@ ++; Test vector AND-NOT. ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ++ ++; Test a v16i8 AND-NOT. ++define <16 x i8> @f1(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) { ++; CHECK-LABEL: f1: ++; CHECK: vnc %v24, %v26, %v28 ++; CHECK: br %r14 ++ %not = xor <16 x i8> %val2, ++ %ret = and <16 x i8> %val1, %not ++ ret <16 x i8> %ret ++} ++ ++; ...and again with the reverse. ++define <16 x i8> @f2(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) { ++; CHECK-LABEL: f2: ++; CHECK: vnc %v24, %v28, %v26 ++; CHECK: br %r14 ++ %not = xor <16 x i8> %val1, ++ %ret = and <16 x i8> %not, %val2 ++ ret <16 x i8> %ret ++} ++ ++; Test a v8i16 AND-NOT. ++define <8 x i16> @f3(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) { ++; CHECK-LABEL: f3: ++; CHECK: vnc %v24, %v26, %v28 ++; CHECK: br %r14 ++ %not = xor <8 x i16> %val2, ++ %ret = and <8 x i16> %val1, %not ++ ret <8 x i16> %ret ++} ++ ++; ...and again with the reverse. ++define <8 x i16> @f4(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) { ++; CHECK-LABEL: f4: ++; CHECK: vnc %v24, %v28, %v26 ++; CHECK: br %r14 ++ %not = xor <8 x i16> %val1, ++ %ret = and <8 x i16> %not, %val2 ++ ret <8 x i16> %ret ++} ++ ++; Test a v4i32 AND-NOT. ++define <4 x i32> @f5(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) { ++; CHECK-LABEL: f5: ++; CHECK: vnc %v24, %v26, %v28 ++; CHECK: br %r14 ++ %not = xor <4 x i32> %val2, ++ %ret = and <4 x i32> %val1, %not ++ ret <4 x i32> %ret ++} ++ ++; ...and again with the reverse. ++define <4 x i32> @f6(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) { ++; CHECK-LABEL: f6: ++; CHECK: vnc %v24, %v28, %v26 ++; CHECK: br %r14 ++ %not = xor <4 x i32> %val1, ++ %ret = and <4 x i32> %not, %val2 ++ ret <4 x i32> %ret ++} ++ ++; Test a v2i64 AND-NOT. ++define <2 x i64> @f7(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) { ++; CHECK-LABEL: f7: ++; CHECK: vnc %v24, %v26, %v28 ++; CHECK: br %r14 ++ %not = xor <2 x i64> %val2, ++ %ret = and <2 x i64> %val1, %not ++ ret <2 x i64> %ret ++} ++ ++; ...and again with the reverse. ++define <2 x i64> @f8(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) { ++; CHECK-LABEL: f8: ++; CHECK: vnc %v24, %v28, %v26 ++; CHECK: br %r14 ++ %not = xor <2 x i64> %val1, ++ %ret = and <2 x i64> %not, %val2 ++ ret <2 x i64> %ret ++} +Index: llvm-36/test/CodeGen/SystemZ/vec-and-03.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-and-03.ll +@@ -0,0 +1,113 @@ ++; Test vector zero extensions, which need to be implemented as ANDs. ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ++ ++; Test a v16i1->v16i8 extension. ++define <16 x i8> @f1(<16 x i8> %val) { ++; CHECK-LABEL: f1: ++; CHECK: vrepib [[REG:%v[0-9]+]], 1 ++; CHECK: vn %v24, %v24, [[REG]] ++; CHECK: br %r14 ++ %trunc = trunc <16 x i8> %val to <16 x i1> ++ %ret = zext <16 x i1> %trunc to <16 x i8> ++ ret <16 x i8> %ret ++} ++ ++; Test a v8i1->v8i16 extension. ++define <8 x i16> @f2(<8 x i16> %val) { ++; CHECK-LABEL: f2: ++; CHECK: vrepih [[REG:%v[0-9]+]], 1 ++; CHECK: vn %v24, %v24, [[REG]] ++; CHECK: br %r14 ++ %trunc = trunc <8 x i16> %val to <8 x i1> ++ %ret = zext <8 x i1> %trunc to <8 x i16> ++ ret <8 x i16> %ret ++} ++ ++; Test a v8i8->v8i16 extension. ++define <8 x i16> @f3(<8 x i16> %val) { ++; CHECK-LABEL: f3: ++; CHECK: vgbm [[REG:%v[0-9]+]], 21845 ++; CHECK: vn %v24, %v24, [[REG]] ++; CHECK: br %r14 ++ %trunc = trunc <8 x i16> %val to <8 x i8> ++ %ret = zext <8 x i8> %trunc to <8 x i16> ++ ret <8 x i16> %ret ++} ++ ++; Test a v4i1->v4i32 extension. ++define <4 x i32> @f4(<4 x i32> %val) { ++; CHECK-LABEL: f4: ++; CHECK: vrepif [[REG:%v[0-9]+]], 1 ++; CHECK: vn %v24, %v24, [[REG]] ++; CHECK: br %r14 ++ %trunc = trunc <4 x i32> %val to <4 x i1> ++ %ret = zext <4 x i1> %trunc to <4 x i32> ++ ret <4 x i32> %ret ++} ++ ++; Test a v4i8->v4i32 extension. ++define <4 x i32> @f5(<4 x i32> %val) { ++; CHECK-LABEL: f5: ++; CHECK: vgbm [[REG:%v[0-9]+]], 4369 ++; CHECK: vn %v24, %v24, [[REG]] ++; CHECK: br %r14 ++ %trunc = trunc <4 x i32> %val to <4 x i8> ++ %ret = zext <4 x i8> %trunc to <4 x i32> ++ ret <4 x i32> %ret ++} ++ ++; Test a v4i16->v4i32 extension. ++define <4 x i32> @f6(<4 x i32> %val) { ++; CHECK-LABEL: f6: ++; CHECK: vgbm [[REG:%v[0-9]+]], 13107 ++; CHECK: vn %v24, %v24, [[REG]] ++; CHECK: br %r14 ++ %trunc = trunc <4 x i32> %val to <4 x i16> ++ %ret = zext <4 x i16> %trunc to <4 x i32> ++ ret <4 x i32> %ret ++} ++ ++; Test a v2i1->v2i64 extension. ++define <2 x i64> @f7(<2 x i64> %val) { ++; CHECK-LABEL: f7: ++; CHECK: vrepig [[REG:%v[0-9]+]], 1 ++; CHECK: vn %v24, %v24, [[REG]] ++; CHECK: br %r14 ++ %trunc = trunc <2 x i64> %val to <2 x i1> ++ %ret = zext <2 x i1> %trunc to <2 x i64> ++ ret <2 x i64> %ret ++} ++ ++; Test a v2i8->v2i64 extension. ++define <2 x i64> @f8(<2 x i64> %val) { ++; CHECK-LABEL: f8: ++; CHECK: vgbm [[REG:%v[0-9]+]], 257 ++; CHECK: vn %v24, %v24, [[REG]] ++; CHECK: br %r14 ++ %trunc = trunc <2 x i64> %val to <2 x i8> ++ %ret = zext <2 x i8> %trunc to <2 x i64> ++ ret <2 x i64> %ret ++} ++ ++; Test a v2i16->v2i64 extension. ++define <2 x i64> @f9(<2 x i64> %val) { ++; CHECK-LABEL: f9: ++; CHECK: vgbm [[REG:%v[0-9]+]], 771 ++; CHECK: vn %v24, %v24, [[REG]] ++; CHECK: br %r14 ++ %trunc = trunc <2 x i64> %val to <2 x i16> ++ %ret = zext <2 x i16> %trunc to <2 x i64> ++ ret <2 x i64> %ret ++} ++ ++; Test a v2i32->v2i64 extension. ++define <2 x i64> @f10(<2 x i64> %val) { ++; CHECK-LABEL: f10: ++; CHECK: vgbm [[REG:%v[0-9]+]], 3855 ++; CHECK: vn %v24, %v24, [[REG]] ++; CHECK: br %r14 ++ %trunc = trunc <2 x i64> %val to <2 x i32> ++ %ret = zext <2 x i32> %trunc to <2 x i64> ++ ret <2 x i64> %ret ++} +Index: llvm-36/test/CodeGen/SystemZ/vec-args-01.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-args-01.ll +@@ -0,0 +1,48 @@ ++; Test the handling of named vector arguments. ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -check-prefix=CHECK-VEC ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -check-prefix=CHECK-STACK ++ ++; This routine has 6 integer arguments, which fill up r2-r5 and ++; the stack slot at offset 160, and 10 vector arguments, which ++; fill up v24-v31 and the two double-wide stack slots at 168 ++; and 184. ++declare void @bar(i64, i64, i64, i64, i64, i64, ++ <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, ++ <4 x i32>, <4 x i32>, <4 x i32>, <4 x i32>, ++ <4 x i32>, <4 x i32>) ++ ++define void @foo() { ++; CHECK-VEC-LABEL: foo: ++; CHECK-VEC-DAG: vrepif %v24, 1 ++; CHECK-VEC-DAG: vrepif %v26, 2 ++; CHECK-VEC-DAG: vrepif %v28, 3 ++; CHECK-VEC-DAG: vrepif %v30, 4 ++; CHECK-VEC-DAG: vrepif %v25, 5 ++; CHECK-VEC-DAG: vrepif %v27, 6 ++; CHECK-VEC-DAG: vrepif %v29, 7 ++; CHECK-VEC-DAG: vrepif %v31, 8 ++; CHECK-VEC: brasl %r14, bar@PLT ++; ++; CHECK-STACK-LABEL: foo: ++; CHECK-STACK: aghi %r15, -200 ++; CHECK-STACK-DAG: mvghi 160(%r15), 6 ++; CHECK-STACK-DAG: vrepif [[REG1:%v[0-9]+]], 9 ++; CHECK-STACK-DAG: vst [[REG1]], 168(%r15) ++; CHECK-STACK-DAG: vrepif [[REG2:%v[0-9]+]], 10 ++; CHECK-STACK-DAG: vst [[REG2]], 184(%r15) ++; CHECK-STACK: brasl %r14, bar@PLT ++ ++ call void @bar (i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, ++ <4 x i32> , ++ <4 x i32> , ++ <4 x i32> , ++ <4 x i32> , ++ <4 x i32> , ++ <4 x i32> , ++ <4 x i32> , ++ <4 x i32> , ++ <4 x i32> , ++ <4 x i32> ) ++ ret void ++} +Index: llvm-36/test/CodeGen/SystemZ/vec-args-02.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-args-02.ll +@@ -0,0 +1,31 @@ ++; Test the handling of unnamed vector arguments. ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -check-prefix=CHECK-VEC ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -check-prefix=CHECK-STACK ++ ++; This routine is called with two named vector argument (passed ++; in %v24 and %v26) and two unnamed vector arguments (passed ++; in the double-wide stack slots at 160 and 176). ++declare void @bar(<4 x i32>, <4 x i32>, ...) ++ ++define void @foo() { ++; CHECK-VEC-LABEL: foo: ++; CHECK-VEC-DAG: vrepif %v24, 1 ++; CHECK-VEC-DAG: vrepif %v26, 2 ++; CHECK-VEC: brasl %r14, bar@PLT ++; ++; CHECK-STACK-LABEL: foo: ++; CHECK-STACK: aghi %r15, -192 ++; CHECK-STACK-DAG: vrepif [[REG1:%v[0-9]+]], 3 ++; CHECK-STACK-DAG: vst [[REG1]], 160(%r15) ++; CHECK-STACK-DAG: vrepif [[REG2:%v[0-9]+]], 4 ++; CHECK-STACK-DAG: vst [[REG2]], 176(%r15) ++; CHECK-STACK: brasl %r14, bar@PLT ++ ++ call void (<4 x i32>, <4 x i32>, ...)* @bar ++ (<4 x i32> , ++ <4 x i32> , ++ <4 x i32> , ++ <4 x i32> ) ++ ret void ++} +Index: llvm-36/test/CodeGen/SystemZ/vec-args-03.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-args-03.ll +@@ -0,0 +1,30 @@ ++; Test the handling of incoming vector arguments. ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ++ ++; This routine has 10 vector arguments, which fill up %v24-%v31 and ++; the two double-wide stack slots at 160 and 176. ++define <4 x i32> @foo(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3, <4 x i32> %v4, ++ <4 x i32> %v5, <4 x i32> %v6, <4 x i32> %v7, <4 x i32> %v8, ++ <4 x i32> %v9, <4 x i32> %v10) { ++; CHECK-LABEL: foo: ++; CHECK: vl [[REG1:%v[0-9]+]], 176(%r15) ++; CHECK: vsf %v24, %v26, [[REG1]] ++; CHECK: br %r14 ++ %y = sub <4 x i32> %v2, %v10 ++ ret <4 x i32> %y ++} ++ ++; This routine has 10 vector arguments, which fill up %v24-%v31 and ++; the two single-wide stack slots at 160 and 168. ++define <4 x i8> @bar(<4 x i8> %v1, <4 x i8> %v2, <4 x i8> %v3, <4 x i8> %v4, ++ <4 x i8> %v5, <4 x i8> %v6, <4 x i8> %v7, <4 x i8> %v8, ++ <4 x i8> %v9, <4 x i8> %v10) { ++; CHECK-LABEL: bar: ++; CHECK: vlrepg [[REG1:%v[0-9]+]], 168(%r15) ++; CHECK: vsb %v24, %v26, [[REG1]] ++; CHECK: br %r14 ++ %y = sub <4 x i8> %v2, %v10 ++ ret <4 x i8> %y ++} ++ +Index: llvm-36/test/CodeGen/SystemZ/vec-args-04.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-args-04.ll +@@ -0,0 +1,50 @@ ++; Test the handling of named short vector arguments. ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -check-prefix=CHECK-VEC ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -check-prefix=CHECK-STACK ++ ++; This routine has 12 vector arguments, which fill up %v24-%v31 ++; and the four single-wide stack slots starting at 160. ++declare void @bar(<1 x i8>, <2 x i8>, <4 x i8>, <8 x i8>, ++ <1 x i8>, <2 x i8>, <4 x i8>, <8 x i8>, ++ <1 x i8>, <2 x i8>, <4 x i8>, <8 x i8>) ++ ++define void @foo() { ++; CHECK-VEC-LABEL: foo: ++; CHECK-VEC-DAG: vrepib %v24, 1 ++; CHECK-VEC-DAG: vrepib %v26, 2 ++; CHECK-VEC-DAG: vrepib %v28, 3 ++; CHECK-VEC-DAG: vrepib %v30, 4 ++; CHECK-VEC-DAG: vrepib %v25, 5 ++; CHECK-VEC-DAG: vrepib %v27, 6 ++; CHECK-VEC-DAG: vrepib %v29, 7 ++; CHECK-VEC-DAG: vrepib %v31, 8 ++; CHECK-VEC: brasl %r14, bar@PLT ++; ++; CHECK-STACK-LABEL: foo: ++; CHECK-STACK: aghi %r15, -192 ++; CHECK-STACK-DAG: llihh [[REG1:%r[0-9]+]], 2304 ++; CHECK-STACK-DAG: stg [[REG1]], 160(%r15) ++; CHECK-STACK-DAG: llihh [[REG2:%r[0-9]+]], 2570 ++; CHECK-STACK-DAG: stg [[REG2]], 168(%r15) ++; CHECK-STACK-DAG: llihf [[REG3:%r[0-9]+]], 185273099 ++; CHECK-STACK-DAG: stg [[REG3]], 176(%r15) ++; CHECK-STACK-DAG: llihf [[REG4:%r[0-9]+]], 202116108 ++; CHECK-STACK-DAG: oilf [[REG4]], 202116108 ++; CHECK-STACK-DAG: stg [[REG4]], 176(%r15) ++; CHECK-STACK: brasl %r14, bar@PLT ++ ++ call void @bar (<1 x i8> , ++ <2 x i8> , ++ <4 x i8> , ++ <8 x i8> , ++ <1 x i8> , ++ <2 x i8> , ++ <4 x i8> , ++ <8 x i8> , ++ <1 x i8> , ++ <2 x i8> , ++ <4 x i8> , ++ <8 x i8> ) ++ ret void ++} +Index: llvm-36/test/CodeGen/SystemZ/vec-args-05.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-args-05.ll +@@ -0,0 +1,32 @@ ++; Test the handling of unnamed short vector arguments. ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -check-prefix=CHECK-VEC ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s -check-prefix=CHECK-STACK ++ ++; This routine is called with two named vector argument (passed ++; in %v24 and %v26) and two unnamed vector arguments (passed ++; in the single-wide stack slots at 160 and 168). ++declare void @bar(<4 x i8>, <4 x i8>, ...) ++ ++define void @foo() { ++; CHECK-VEC-LABEL: foo: ++; CHECK-VEC-DAG: vrepib %v24, 1 ++; CHECK-VEC-DAG: vrepib %v26, 2 ++; CHECK-VEC: brasl %r14, bar@PLT ++; ++; CHECK-STACK-LABEL: foo: ++; CHECK-STACK: aghi %r15, -176 ++; CHECK-STACK-DAG: llihf [[REG1:%r[0-9]+]], 50529027 ++; CHECK-STACK-DAG: stg [[REG1]], 160(%r15) ++; CHECK-STACK-DAG: llihf [[REG2:%r[0-9]+]], 67372036 ++; CHECK-STACK-DAG: stg [[REG2]], 168(%r15) ++; CHECK-STACK: brasl %r14, bar@PLT ++ ++ call void (<4 x i8>, <4 x i8>, ...)* @bar ++ (<4 x i8> , ++ <4 x i8> , ++ <4 x i8> , ++ <4 x i8> ) ++ ret void ++} ++ +Index: llvm-36/test/CodeGen/SystemZ/vec-args-error-01.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-args-error-01.ll +@@ -0,0 +1,9 @@ ++; Verify that we detect unsupported single-element vector types. ++ ++; RUN: not llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 2>&1 | FileCheck %s ++ ++define void @foo(<1 x i128>) { ++ ret void ++} ++ ++; CHECK: LLVM ERROR: Unsupported vector argument or return type +Index: llvm-36/test/CodeGen/SystemZ/vec-args-error-02.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-args-error-02.ll +@@ -0,0 +1,9 @@ ++; Verify that we detect unsupported single-element vector types. ++ ++; RUN: not llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 2>&1 | FileCheck %s ++ ++define <1 x i128> @foo() { ++ ret <1 x i128> ++} ++ ++; CHECK: LLVM ERROR: Unsupported vector argument or return type +Index: llvm-36/test/CodeGen/SystemZ/vec-args-error-03.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-args-error-03.ll +@@ -0,0 +1,12 @@ ++; Verify that we detect unsupported single-element vector types. ++ ++; RUN: not llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 2>&1 | FileCheck %s ++ ++declare void @bar(<1 x i128>) ++ ++define void @foo() { ++ call void @bar (<1 x i128> ) ++ ret void ++} ++ ++; CHECK: LLVM ERROR: Unsupported vector argument or return type +Index: llvm-36/test/CodeGen/SystemZ/vec-args-error-04.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-args-error-04.ll +@@ -0,0 +1,12 @@ ++; Verify that we detect unsupported single-element vector types. ++ ++; RUN: not llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 2>&1 | FileCheck %s ++ ++declare <1 x i128> @bar() ++ ++define void @foo() { ++ %res = call <1 x i128> @bar () ++ ret void ++} ++ ++; CHECK: LLVM ERROR: Unsupported vector argument or return type +Index: llvm-36/test/CodeGen/SystemZ/vec-args-error-05.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-args-error-05.ll +@@ -0,0 +1,9 @@ ++; Verify that we detect unsupported single-element vector types. ++ ++; RUN: not llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 2>&1 | FileCheck %s ++ ++define void @foo(<1 x fp128>) { ++ ret void ++} ++ ++; CHECK: LLVM ERROR: Unsupported vector argument or return type +Index: llvm-36/test/CodeGen/SystemZ/vec-args-error-06.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-args-error-06.ll +@@ -0,0 +1,9 @@ ++; Verify that we detect unsupported single-element vector types. ++ ++; RUN: not llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 2>&1 | FileCheck %s ++ ++define <1 x fp128> @foo() { ++ ret <1 x fp128> ++} ++ ++; CHECK: LLVM ERROR: Unsupported vector argument or return type +Index: llvm-36/test/CodeGen/SystemZ/vec-args-error-07.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-args-error-07.ll +@@ -0,0 +1,12 @@ ++; Verify that we detect unsupported single-element vector types. ++ ++; RUN: not llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 2>&1 | FileCheck %s ++ ++declare void @bar(<1 x fp128>) ++ ++define void @foo() { ++ call void @bar (<1 x fp128> ) ++ ret void ++} ++ ++; CHECK: LLVM ERROR: Unsupported vector argument or return type +Index: llvm-36/test/CodeGen/SystemZ/vec-args-error-08.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-args-error-08.ll +@@ -0,0 +1,12 @@ ++; Verify that we detect unsupported single-element vector types. ++ ++; RUN: not llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 2>&1 | FileCheck %s ++ ++declare <1 x fp128> @bar() ++ ++define void @foo() { ++ %res = call <1 x fp128> @bar () ++ ret void ++} ++ ++; CHECK: LLVM ERROR: Unsupported vector argument or return type +Index: llvm-36/test/CodeGen/SystemZ/vec-cmp-01.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-cmp-01.ll +@@ -0,0 +1,228 @@ ++; Test v16i8 comparisons. ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ++ ++; Test eq. ++define <16 x i8> @f1(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) { ++; CHECK-LABEL: f1: ++; CHECK: vceqb %v24, %v26, %v28 ++; CHECK-NEXT: br %r14 ++ %cmp = icmp eq <16 x i8> %val1, %val2 ++ %ret = sext <16 x i1> %cmp to <16 x i8> ++ ret <16 x i8> %ret ++} ++ ++; Test ne. ++define <16 x i8> @f2(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) { ++; CHECK-LABEL: f2: ++; CHECK: vceqb [[REG:%v[0-9]+]], %v26, %v28 ++; CHECK-NEXT: vno %v24, [[REG]], [[REG]] ++; CHECK-NEXT: br %r14 ++ %cmp = icmp ne <16 x i8> %val1, %val2 ++ %ret = sext <16 x i1> %cmp to <16 x i8> ++ ret <16 x i8> %ret ++} ++ ++; Test sgt. ++define <16 x i8> @f3(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) { ++; CHECK-LABEL: f3: ++; CHECK: vchb %v24, %v26, %v28 ++; CHECK-NEXT: br %r14 ++ %cmp = icmp sgt <16 x i8> %val1, %val2 ++ %ret = sext <16 x i1> %cmp to <16 x i8> ++ ret <16 x i8> %ret ++} ++ ++; Test sge. ++define <16 x i8> @f4(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) { ++; CHECK-LABEL: f4: ++; CHECK: vchb [[REG:%v[0-9]+]], %v28, %v26 ++; CHECK-NEXT: vno %v24, [[REG]], [[REG]] ++; CHECK-NEXT: br %r14 ++ %cmp = icmp sge <16 x i8> %val1, %val2 ++ %ret = sext <16 x i1> %cmp to <16 x i8> ++ ret <16 x i8> %ret ++} ++ ++; Test sle. ++define <16 x i8> @f5(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) { ++; CHECK-LABEL: f5: ++; CHECK: vchb [[REG:%v[0-9]+]], %v26, %v28 ++; CHECK-NEXT: vno %v24, [[REG]], [[REG]] ++; CHECK-NEXT: br %r14 ++ %cmp = icmp sle <16 x i8> %val1, %val2 ++ %ret = sext <16 x i1> %cmp to <16 x i8> ++ ret <16 x i8> %ret ++} ++ ++; Test slt. ++define <16 x i8> @f6(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) { ++; CHECK-LABEL: f6: ++; CHECK: vchb %v24, %v28, %v26 ++; CHECK-NEXT: br %r14 ++ %cmp = icmp slt <16 x i8> %val1, %val2 ++ %ret = sext <16 x i1> %cmp to <16 x i8> ++ ret <16 x i8> %ret ++} ++ ++; Test ugt. ++define <16 x i8> @f7(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) { ++; CHECK-LABEL: f7: ++; CHECK: vchlb %v24, %v26, %v28 ++; CHECK-NEXT: br %r14 ++ %cmp = icmp ugt <16 x i8> %val1, %val2 ++ %ret = sext <16 x i1> %cmp to <16 x i8> ++ ret <16 x i8> %ret ++} ++ ++; Test uge. ++define <16 x i8> @f8(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) { ++; CHECK-LABEL: f8: ++; CHECK: vchlb [[REG:%v[0-9]+]], %v28, %v26 ++; CHECK-NEXT: vno %v24, [[REG]], [[REG]] ++; CHECK-NEXT: br %r14 ++ %cmp = icmp uge <16 x i8> %val1, %val2 ++ %ret = sext <16 x i1> %cmp to <16 x i8> ++ ret <16 x i8> %ret ++} ++ ++; Test ule. ++define <16 x i8> @f9(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) { ++; CHECK-LABEL: f9: ++; CHECK: vchlb [[REG:%v[0-9]+]], %v26, %v28 ++; CHECK-NEXT: vno %v24, [[REG]], [[REG]] ++; CHECK-NEXT: br %r14 ++ %cmp = icmp ule <16 x i8> %val1, %val2 ++ %ret = sext <16 x i1> %cmp to <16 x i8> ++ ret <16 x i8> %ret ++} ++ ++; Test ult. ++define <16 x i8> @f10(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) { ++; CHECK-LABEL: f10: ++; CHECK: vchlb %v24, %v28, %v26 ++; CHECK-NEXT: br %r14 ++ %cmp = icmp ult <16 x i8> %val1, %val2 ++ %ret = sext <16 x i1> %cmp to <16 x i8> ++ ret <16 x i8> %ret ++} ++ ++; Test eq selects. ++define <16 x i8> @f11(<16 x i8> %val1, <16 x i8> %val2, ++ <16 x i8> %val3, <16 x i8> %val4) { ++; CHECK-LABEL: f11: ++; CHECK: vceqb [[REG:%v[0-9]+]], %v24, %v26 ++; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] ++; CHECK-NEXT: br %r14 ++ %cmp = icmp eq <16 x i8> %val1, %val2 ++ %ret = select <16 x i1> %cmp, <16 x i8> %val3, <16 x i8> %val4 ++ ret <16 x i8> %ret ++} ++ ++; Test ne selects. ++define <16 x i8> @f12(<16 x i8> %val1, <16 x i8> %val2, ++ <16 x i8> %val3, <16 x i8> %val4) { ++; CHECK-LABEL: f12: ++; CHECK: vceqb [[REG:%v[0-9]+]], %v24, %v26 ++; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] ++; CHECK-NEXT: br %r14 ++ %cmp = icmp ne <16 x i8> %val1, %val2 ++ %ret = select <16 x i1> %cmp, <16 x i8> %val3, <16 x i8> %val4 ++ ret <16 x i8> %ret ++} ++ ++; Test sgt selects. ++define <16 x i8> @f13(<16 x i8> %val1, <16 x i8> %val2, ++ <16 x i8> %val3, <16 x i8> %val4) { ++; CHECK-LABEL: f13: ++; CHECK: vchb [[REG:%v[0-9]+]], %v24, %v26 ++; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] ++; CHECK-NEXT: br %r14 ++ %cmp = icmp sgt <16 x i8> %val1, %val2 ++ %ret = select <16 x i1> %cmp, <16 x i8> %val3, <16 x i8> %val4 ++ ret <16 x i8> %ret ++} ++ ++; Test sge selects. ++define <16 x i8> @f14(<16 x i8> %val1, <16 x i8> %val2, ++ <16 x i8> %val3, <16 x i8> %val4) { ++; CHECK-LABEL: f14: ++; CHECK: vchb [[REG:%v[0-9]+]], %v26, %v24 ++; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] ++; CHECK-NEXT: br %r14 ++ %cmp = icmp sge <16 x i8> %val1, %val2 ++ %ret = select <16 x i1> %cmp, <16 x i8> %val3, <16 x i8> %val4 ++ ret <16 x i8> %ret ++} ++ ++; Test sle selects. ++define <16 x i8> @f15(<16 x i8> %val1, <16 x i8> %val2, ++ <16 x i8> %val3, <16 x i8> %val4) { ++; CHECK-LABEL: f15: ++; CHECK: vchb [[REG:%v[0-9]+]], %v24, %v26 ++; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] ++; CHECK-NEXT: br %r14 ++ %cmp = icmp sle <16 x i8> %val1, %val2 ++ %ret = select <16 x i1> %cmp, <16 x i8> %val3, <16 x i8> %val4 ++ ret <16 x i8> %ret ++} ++ ++; Test slt selects. ++define <16 x i8> @f16(<16 x i8> %val1, <16 x i8> %val2, ++ <16 x i8> %val3, <16 x i8> %val4) { ++; CHECK-LABEL: f16: ++; CHECK: vchb [[REG:%v[0-9]+]], %v26, %v24 ++; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] ++; CHECK-NEXT: br %r14 ++ %cmp = icmp slt <16 x i8> %val1, %val2 ++ %ret = select <16 x i1> %cmp, <16 x i8> %val3, <16 x i8> %val4 ++ ret <16 x i8> %ret ++} ++ ++; Test ugt selects. ++define <16 x i8> @f17(<16 x i8> %val1, <16 x i8> %val2, ++ <16 x i8> %val3, <16 x i8> %val4) { ++; CHECK-LABEL: f17: ++; CHECK: vchlb [[REG:%v[0-9]+]], %v24, %v26 ++; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] ++; CHECK-NEXT: br %r14 ++ %cmp = icmp ugt <16 x i8> %val1, %val2 ++ %ret = select <16 x i1> %cmp, <16 x i8> %val3, <16 x i8> %val4 ++ ret <16 x i8> %ret ++} ++ ++; Test uge selects. ++define <16 x i8> @f18(<16 x i8> %val1, <16 x i8> %val2, ++ <16 x i8> %val3, <16 x i8> %val4) { ++; CHECK-LABEL: f18: ++; CHECK: vchlb [[REG:%v[0-9]+]], %v26, %v24 ++; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] ++; CHECK-NEXT: br %r14 ++ %cmp = icmp uge <16 x i8> %val1, %val2 ++ %ret = select <16 x i1> %cmp, <16 x i8> %val3, <16 x i8> %val4 ++ ret <16 x i8> %ret ++} ++ ++; Test ule selects. ++define <16 x i8> @f19(<16 x i8> %val1, <16 x i8> %val2, ++ <16 x i8> %val3, <16 x i8> %val4) { ++; CHECK-LABEL: f19: ++; CHECK: vchlb [[REG:%v[0-9]+]], %v24, %v26 ++; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] ++; CHECK-NEXT: br %r14 ++ %cmp = icmp ule <16 x i8> %val1, %val2 ++ %ret = select <16 x i1> %cmp, <16 x i8> %val3, <16 x i8> %val4 ++ ret <16 x i8> %ret ++} ++ ++; Test ult selects. ++define <16 x i8> @f20(<16 x i8> %val1, <16 x i8> %val2, ++ <16 x i8> %val3, <16 x i8> %val4) { ++; CHECK-LABEL: f20: ++; CHECK: vchlb [[REG:%v[0-9]+]], %v26, %v24 ++; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] ++; CHECK-NEXT: br %r14 ++ %cmp = icmp ult <16 x i8> %val1, %val2 ++ %ret = select <16 x i1> %cmp, <16 x i8> %val3, <16 x i8> %val4 ++ ret <16 x i8> %ret ++} +Index: llvm-36/test/CodeGen/SystemZ/vec-cmp-02.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-cmp-02.ll +@@ -0,0 +1,228 @@ ++; Test v8i16 comparisons. ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ++ ++; Test eq. ++define <8 x i16> @f1(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) { ++; CHECK-LABEL: f1: ++; CHECK: vceqh %v24, %v26, %v28 ++; CHECK-NEXT: br %r14 ++ %cmp = icmp eq <8 x i16> %val1, %val2 ++ %ret = sext <8 x i1> %cmp to <8 x i16> ++ ret <8 x i16> %ret ++} ++ ++; Test ne. ++define <8 x i16> @f2(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) { ++; CHECK-LABEL: f2: ++; CHECK: vceqh [[REG:%v[0-9]+]], %v26, %v28 ++; CHECK-NEXT: vno %v24, [[REG]], [[REG]] ++; CHECK-NEXT: br %r14 ++ %cmp = icmp ne <8 x i16> %val1, %val2 ++ %ret = sext <8 x i1> %cmp to <8 x i16> ++ ret <8 x i16> %ret ++} ++ ++; Test sgt. ++define <8 x i16> @f3(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) { ++; CHECK-LABEL: f3: ++; CHECK: vchh %v24, %v26, %v28 ++; CHECK-NEXT: br %r14 ++ %cmp = icmp sgt <8 x i16> %val1, %val2 ++ %ret = sext <8 x i1> %cmp to <8 x i16> ++ ret <8 x i16> %ret ++} ++ ++; Test sge. ++define <8 x i16> @f4(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) { ++; CHECK-LABEL: f4: ++; CHECK: vchh [[REG:%v[0-9]+]], %v28, %v26 ++; CHECK-NEXT: vno %v24, [[REG]], [[REG]] ++; CHECK-NEXT: br %r14 ++ %cmp = icmp sge <8 x i16> %val1, %val2 ++ %ret = sext <8 x i1> %cmp to <8 x i16> ++ ret <8 x i16> %ret ++} ++ ++; Test sle. ++define <8 x i16> @f5(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) { ++; CHECK-LABEL: f5: ++; CHECK: vchh [[REG:%v[0-9]+]], %v26, %v28 ++; CHECK-NEXT: vno %v24, [[REG]], [[REG]] ++; CHECK-NEXT: br %r14 ++ %cmp = icmp sle <8 x i16> %val1, %val2 ++ %ret = sext <8 x i1> %cmp to <8 x i16> ++ ret <8 x i16> %ret ++} ++ ++; Test slt. ++define <8 x i16> @f6(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) { ++; CHECK-LABEL: f6: ++; CHECK: vchh %v24, %v28, %v26 ++; CHECK-NEXT: br %r14 ++ %cmp = icmp slt <8 x i16> %val1, %val2 ++ %ret = sext <8 x i1> %cmp to <8 x i16> ++ ret <8 x i16> %ret ++} ++ ++; Test ugt. ++define <8 x i16> @f7(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) { ++; CHECK-LABEL: f7: ++; CHECK: vchlh %v24, %v26, %v28 ++; CHECK-NEXT: br %r14 ++ %cmp = icmp ugt <8 x i16> %val1, %val2 ++ %ret = sext <8 x i1> %cmp to <8 x i16> ++ ret <8 x i16> %ret ++} ++ ++; Test uge. ++define <8 x i16> @f8(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) { ++; CHECK-LABEL: f8: ++; CHECK: vchlh [[REG:%v[0-9]+]], %v28, %v26 ++; CHECK-NEXT: vno %v24, [[REG]], [[REG]] ++; CHECK-NEXT: br %r14 ++ %cmp = icmp uge <8 x i16> %val1, %val2 ++ %ret = sext <8 x i1> %cmp to <8 x i16> ++ ret <8 x i16> %ret ++} ++ ++; Test ule. ++define <8 x i16> @f9(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) { ++; CHECK-LABEL: f9: ++; CHECK: vchlh [[REG:%v[0-9]+]], %v26, %v28 ++; CHECK-NEXT: vno %v24, [[REG]], [[REG]] ++; CHECK-NEXT: br %r14 ++ %cmp = icmp ule <8 x i16> %val1, %val2 ++ %ret = sext <8 x i1> %cmp to <8 x i16> ++ ret <8 x i16> %ret ++} ++ ++; Test ult. ++define <8 x i16> @f10(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) { ++; CHECK-LABEL: f10: ++; CHECK: vchlh %v24, %v28, %v26 ++; CHECK-NEXT: br %r14 ++ %cmp = icmp ult <8 x i16> %val1, %val2 ++ %ret = sext <8 x i1> %cmp to <8 x i16> ++ ret <8 x i16> %ret ++} ++ ++; Test eq selects. ++define <8 x i16> @f11(<8 x i16> %val1, <8 x i16> %val2, ++ <8 x i16> %val3, <8 x i16> %val4) { ++; CHECK-LABEL: f11: ++; CHECK: vceqh [[REG:%v[0-9]+]], %v24, %v26 ++; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] ++; CHECK-NEXT: br %r14 ++ %cmp = icmp eq <8 x i16> %val1, %val2 ++ %ret = select <8 x i1> %cmp, <8 x i16> %val3, <8 x i16> %val4 ++ ret <8 x i16> %ret ++} ++ ++; Test ne selects. ++define <8 x i16> @f12(<8 x i16> %val1, <8 x i16> %val2, ++ <8 x i16> %val3, <8 x i16> %val4) { ++; CHECK-LABEL: f12: ++; CHECK: vceqh [[REG:%v[0-9]+]], %v24, %v26 ++; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] ++; CHECK-NEXT: br %r14 ++ %cmp = icmp ne <8 x i16> %val1, %val2 ++ %ret = select <8 x i1> %cmp, <8 x i16> %val3, <8 x i16> %val4 ++ ret <8 x i16> %ret ++} ++ ++; Test sgt selects. ++define <8 x i16> @f13(<8 x i16> %val1, <8 x i16> %val2, ++ <8 x i16> %val3, <8 x i16> %val4) { ++; CHECK-LABEL: f13: ++; CHECK: vchh [[REG:%v[0-9]+]], %v24, %v26 ++; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] ++; CHECK-NEXT: br %r14 ++ %cmp = icmp sgt <8 x i16> %val1, %val2 ++ %ret = select <8 x i1> %cmp, <8 x i16> %val3, <8 x i16> %val4 ++ ret <8 x i16> %ret ++} ++ ++; Test sge selects. ++define <8 x i16> @f14(<8 x i16> %val1, <8 x i16> %val2, ++ <8 x i16> %val3, <8 x i16> %val4) { ++; CHECK-LABEL: f14: ++; CHECK: vchh [[REG:%v[0-9]+]], %v26, %v24 ++; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] ++; CHECK-NEXT: br %r14 ++ %cmp = icmp sge <8 x i16> %val1, %val2 ++ %ret = select <8 x i1> %cmp, <8 x i16> %val3, <8 x i16> %val4 ++ ret <8 x i16> %ret ++} ++ ++; Test sle selects. ++define <8 x i16> @f15(<8 x i16> %val1, <8 x i16> %val2, ++ <8 x i16> %val3, <8 x i16> %val4) { ++; CHECK-LABEL: f15: ++; CHECK: vchh [[REG:%v[0-9]+]], %v24, %v26 ++; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] ++; CHECK-NEXT: br %r14 ++ %cmp = icmp sle <8 x i16> %val1, %val2 ++ %ret = select <8 x i1> %cmp, <8 x i16> %val3, <8 x i16> %val4 ++ ret <8 x i16> %ret ++} ++ ++; Test slt selects. ++define <8 x i16> @f16(<8 x i16> %val1, <8 x i16> %val2, ++ <8 x i16> %val3, <8 x i16> %val4) { ++; CHECK-LABEL: f16: ++; CHECK: vchh [[REG:%v[0-9]+]], %v26, %v24 ++; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] ++; CHECK-NEXT: br %r14 ++ %cmp = icmp slt <8 x i16> %val1, %val2 ++ %ret = select <8 x i1> %cmp, <8 x i16> %val3, <8 x i16> %val4 ++ ret <8 x i16> %ret ++} ++ ++; Test ugt selects. ++define <8 x i16> @f17(<8 x i16> %val1, <8 x i16> %val2, ++ <8 x i16> %val3, <8 x i16> %val4) { ++; CHECK-LABEL: f17: ++; CHECK: vchlh [[REG:%v[0-9]+]], %v24, %v26 ++; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] ++; CHECK-NEXT: br %r14 ++ %cmp = icmp ugt <8 x i16> %val1, %val2 ++ %ret = select <8 x i1> %cmp, <8 x i16> %val3, <8 x i16> %val4 ++ ret <8 x i16> %ret ++} ++ ++; Test uge selects. ++define <8 x i16> @f18(<8 x i16> %val1, <8 x i16> %val2, ++ <8 x i16> %val3, <8 x i16> %val4) { ++; CHECK-LABEL: f18: ++; CHECK: vchlh [[REG:%v[0-9]+]], %v26, %v24 ++; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] ++; CHECK-NEXT: br %r14 ++ %cmp = icmp uge <8 x i16> %val1, %val2 ++ %ret = select <8 x i1> %cmp, <8 x i16> %val3, <8 x i16> %val4 ++ ret <8 x i16> %ret ++} ++ ++; Test ule selects. ++define <8 x i16> @f19(<8 x i16> %val1, <8 x i16> %val2, ++ <8 x i16> %val3, <8 x i16> %val4) { ++; CHECK-LABEL: f19: ++; CHECK: vchlh [[REG:%v[0-9]+]], %v24, %v26 ++; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] ++; CHECK-NEXT: br %r14 ++ %cmp = icmp ule <8 x i16> %val1, %val2 ++ %ret = select <8 x i1> %cmp, <8 x i16> %val3, <8 x i16> %val4 ++ ret <8 x i16> %ret ++} ++ ++; Test ult selects. ++define <8 x i16> @f20(<8 x i16> %val1, <8 x i16> %val2, ++ <8 x i16> %val3, <8 x i16> %val4) { ++; CHECK-LABEL: f20: ++; CHECK: vchlh [[REG:%v[0-9]+]], %v26, %v24 ++; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] ++; CHECK-NEXT: br %r14 ++ %cmp = icmp ult <8 x i16> %val1, %val2 ++ %ret = select <8 x i1> %cmp, <8 x i16> %val3, <8 x i16> %val4 ++ ret <8 x i16> %ret ++} +Index: llvm-36/test/CodeGen/SystemZ/vec-cmp-03.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-cmp-03.ll +@@ -0,0 +1,228 @@ ++; Test v4i32 comparisons. ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ++ ++; Test eq. ++define <4 x i32> @f1(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) { ++; CHECK-LABEL: f1: ++; CHECK: vceqf %v24, %v26, %v28 ++; CHECK-NEXT: br %r14 ++ %cmp = icmp eq <4 x i32> %val1, %val2 ++ %ret = sext <4 x i1> %cmp to <4 x i32> ++ ret <4 x i32> %ret ++} ++ ++; Test ne. ++define <4 x i32> @f2(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) { ++; CHECK-LABEL: f2: ++; CHECK: vceqf [[REG:%v[0-9]+]], %v26, %v28 ++; CHECK-NEXT: vno %v24, [[REG]], [[REG]] ++; CHECK-NEXT: br %r14 ++ %cmp = icmp ne <4 x i32> %val1, %val2 ++ %ret = sext <4 x i1> %cmp to <4 x i32> ++ ret <4 x i32> %ret ++} ++ ++; Test sgt. ++define <4 x i32> @f3(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) { ++; CHECK-LABEL: f3: ++; CHECK: vchf %v24, %v26, %v28 ++; CHECK-NEXT: br %r14 ++ %cmp = icmp sgt <4 x i32> %val1, %val2 ++ %ret = sext <4 x i1> %cmp to <4 x i32> ++ ret <4 x i32> %ret ++} ++ ++; Test sge. ++define <4 x i32> @f4(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) { ++; CHECK-LABEL: f4: ++; CHECK: vchf [[REG:%v[0-9]+]], %v28, %v26 ++; CHECK-NEXT: vno %v24, [[REG]], [[REG]] ++; CHECK-NEXT: br %r14 ++ %cmp = icmp sge <4 x i32> %val1, %val2 ++ %ret = sext <4 x i1> %cmp to <4 x i32> ++ ret <4 x i32> %ret ++} ++ ++; Test sle. ++define <4 x i32> @f5(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) { ++; CHECK-LABEL: f5: ++; CHECK: vchf [[REG:%v[0-9]+]], %v26, %v28 ++; CHECK-NEXT: vno %v24, [[REG]], [[REG]] ++; CHECK-NEXT: br %r14 ++ %cmp = icmp sle <4 x i32> %val1, %val2 ++ %ret = sext <4 x i1> %cmp to <4 x i32> ++ ret <4 x i32> %ret ++} ++ ++; Test slt. ++define <4 x i32> @f6(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) { ++; CHECK-LABEL: f6: ++; CHECK: vchf %v24, %v28, %v26 ++; CHECK-NEXT: br %r14 ++ %cmp = icmp slt <4 x i32> %val1, %val2 ++ %ret = sext <4 x i1> %cmp to <4 x i32> ++ ret <4 x i32> %ret ++} ++ ++; Test ugt. ++define <4 x i32> @f7(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) { ++; CHECK-LABEL: f7: ++; CHECK: vchlf %v24, %v26, %v28 ++; CHECK-NEXT: br %r14 ++ %cmp = icmp ugt <4 x i32> %val1, %val2 ++ %ret = sext <4 x i1> %cmp to <4 x i32> ++ ret <4 x i32> %ret ++} ++ ++; Test uge. ++define <4 x i32> @f8(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) { ++; CHECK-LABEL: f8: ++; CHECK: vchlf [[REG:%v[0-9]+]], %v28, %v26 ++; CHECK-NEXT: vno %v24, [[REG]], [[REG]] ++; CHECK-NEXT: br %r14 ++ %cmp = icmp uge <4 x i32> %val1, %val2 ++ %ret = sext <4 x i1> %cmp to <4 x i32> ++ ret <4 x i32> %ret ++} ++ ++; Test ule. ++define <4 x i32> @f9(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) { ++; CHECK-LABEL: f9: ++; CHECK: vchlf [[REG:%v[0-9]+]], %v26, %v28 ++; CHECK-NEXT: vno %v24, [[REG]], [[REG]] ++; CHECK-NEXT: br %r14 ++ %cmp = icmp ule <4 x i32> %val1, %val2 ++ %ret = sext <4 x i1> %cmp to <4 x i32> ++ ret <4 x i32> %ret ++} ++ ++; Test ult. ++define <4 x i32> @f10(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) { ++; CHECK-LABEL: f10: ++; CHECK: vchlf %v24, %v28, %v26 ++; CHECK-NEXT: br %r14 ++ %cmp = icmp ult <4 x i32> %val1, %val2 ++ %ret = sext <4 x i1> %cmp to <4 x i32> ++ ret <4 x i32> %ret ++} ++ ++; Test eq selects. ++define <4 x i32> @f11(<4 x i32> %val1, <4 x i32> %val2, ++ <4 x i32> %val3, <4 x i32> %val4) { ++; CHECK-LABEL: f11: ++; CHECK: vceqf [[REG:%v[0-9]+]], %v24, %v26 ++; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] ++; CHECK-NEXT: br %r14 ++ %cmp = icmp eq <4 x i32> %val1, %val2 ++ %ret = select <4 x i1> %cmp, <4 x i32> %val3, <4 x i32> %val4 ++ ret <4 x i32> %ret ++} ++ ++; Test ne selects. ++define <4 x i32> @f12(<4 x i32> %val1, <4 x i32> %val2, ++ <4 x i32> %val3, <4 x i32> %val4) { ++; CHECK-LABEL: f12: ++; CHECK: vceqf [[REG:%v[0-9]+]], %v24, %v26 ++; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] ++; CHECK-NEXT: br %r14 ++ %cmp = icmp ne <4 x i32> %val1, %val2 ++ %ret = select <4 x i1> %cmp, <4 x i32> %val3, <4 x i32> %val4 ++ ret <4 x i32> %ret ++} ++ ++; Test sgt selects. ++define <4 x i32> @f13(<4 x i32> %val1, <4 x i32> %val2, ++ <4 x i32> %val3, <4 x i32> %val4) { ++; CHECK-LABEL: f13: ++; CHECK: vchf [[REG:%v[0-9]+]], %v24, %v26 ++; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] ++; CHECK-NEXT: br %r14 ++ %cmp = icmp sgt <4 x i32> %val1, %val2 ++ %ret = select <4 x i1> %cmp, <4 x i32> %val3, <4 x i32> %val4 ++ ret <4 x i32> %ret ++} ++ ++; Test sge selects. ++define <4 x i32> @f14(<4 x i32> %val1, <4 x i32> %val2, ++ <4 x i32> %val3, <4 x i32> %val4) { ++; CHECK-LABEL: f14: ++; CHECK: vchf [[REG:%v[0-9]+]], %v26, %v24 ++; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] ++; CHECK-NEXT: br %r14 ++ %cmp = icmp sge <4 x i32> %val1, %val2 ++ %ret = select <4 x i1> %cmp, <4 x i32> %val3, <4 x i32> %val4 ++ ret <4 x i32> %ret ++} ++ ++; Test sle selects. ++define <4 x i32> @f15(<4 x i32> %val1, <4 x i32> %val2, ++ <4 x i32> %val3, <4 x i32> %val4) { ++; CHECK-LABEL: f15: ++; CHECK: vchf [[REG:%v[0-9]+]], %v24, %v26 ++; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] ++; CHECK-NEXT: br %r14 ++ %cmp = icmp sle <4 x i32> %val1, %val2 ++ %ret = select <4 x i1> %cmp, <4 x i32> %val3, <4 x i32> %val4 ++ ret <4 x i32> %ret ++} ++ ++; Test slt selects. ++define <4 x i32> @f16(<4 x i32> %val1, <4 x i32> %val2, ++ <4 x i32> %val3, <4 x i32> %val4) { ++; CHECK-LABEL: f16: ++; CHECK: vchf [[REG:%v[0-9]+]], %v26, %v24 ++; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] ++; CHECK-NEXT: br %r14 ++ %cmp = icmp slt <4 x i32> %val1, %val2 ++ %ret = select <4 x i1> %cmp, <4 x i32> %val3, <4 x i32> %val4 ++ ret <4 x i32> %ret ++} ++ ++; Test ugt selects. ++define <4 x i32> @f17(<4 x i32> %val1, <4 x i32> %val2, ++ <4 x i32> %val3, <4 x i32> %val4) { ++; CHECK-LABEL: f17: ++; CHECK: vchlf [[REG:%v[0-9]+]], %v24, %v26 ++; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] ++; CHECK-NEXT: br %r14 ++ %cmp = icmp ugt <4 x i32> %val1, %val2 ++ %ret = select <4 x i1> %cmp, <4 x i32> %val3, <4 x i32> %val4 ++ ret <4 x i32> %ret ++} ++ ++; Test uge selects. ++define <4 x i32> @f18(<4 x i32> %val1, <4 x i32> %val2, ++ <4 x i32> %val3, <4 x i32> %val4) { ++; CHECK-LABEL: f18: ++; CHECK: vchlf [[REG:%v[0-9]+]], %v26, %v24 ++; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] ++; CHECK-NEXT: br %r14 ++ %cmp = icmp uge <4 x i32> %val1, %val2 ++ %ret = select <4 x i1> %cmp, <4 x i32> %val3, <4 x i32> %val4 ++ ret <4 x i32> %ret ++} ++ ++; Test ule selects. ++define <4 x i32> @f19(<4 x i32> %val1, <4 x i32> %val2, ++ <4 x i32> %val3, <4 x i32> %val4) { ++; CHECK-LABEL: f19: ++; CHECK: vchlf [[REG:%v[0-9]+]], %v24, %v26 ++; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] ++; CHECK-NEXT: br %r14 ++ %cmp = icmp ule <4 x i32> %val1, %val2 ++ %ret = select <4 x i1> %cmp, <4 x i32> %val3, <4 x i32> %val4 ++ ret <4 x i32> %ret ++} ++ ++; Test ult selects. ++define <4 x i32> @f20(<4 x i32> %val1, <4 x i32> %val2, ++ <4 x i32> %val3, <4 x i32> %val4) { ++; CHECK-LABEL: f20: ++; CHECK: vchlf [[REG:%v[0-9]+]], %v26, %v24 ++; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] ++; CHECK-NEXT: br %r14 ++ %cmp = icmp ult <4 x i32> %val1, %val2 ++ %ret = select <4 x i1> %cmp, <4 x i32> %val3, <4 x i32> %val4 ++ ret <4 x i32> %ret ++} +Index: llvm-36/test/CodeGen/SystemZ/vec-cmp-04.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-cmp-04.ll +@@ -0,0 +1,228 @@ ++; Test v2i64 comparisons. ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ++ ++; Test eq. ++define <2 x i64> @f1(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) { ++; CHECK-LABEL: f1: ++; CHECK: vceqg %v24, %v26, %v28 ++; CHECK-NEXT: br %r14 ++ %cmp = icmp eq <2 x i64> %val1, %val2 ++ %ret = sext <2 x i1> %cmp to <2 x i64> ++ ret <2 x i64> %ret ++} ++ ++; Test ne. ++define <2 x i64> @f2(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) { ++; CHECK-LABEL: f2: ++; CHECK: vceqg [[REG:%v[0-9]+]], %v26, %v28 ++; CHECK-NEXT: vno %v24, [[REG]], [[REG]] ++; CHECK-NEXT: br %r14 ++ %cmp = icmp ne <2 x i64> %val1, %val2 ++ %ret = sext <2 x i1> %cmp to <2 x i64> ++ ret <2 x i64> %ret ++} ++ ++; Test sgt. ++define <2 x i64> @f3(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) { ++; CHECK-LABEL: f3: ++; CHECK: vchg %v24, %v26, %v28 ++; CHECK-NEXT: br %r14 ++ %cmp = icmp sgt <2 x i64> %val1, %val2 ++ %ret = sext <2 x i1> %cmp to <2 x i64> ++ ret <2 x i64> %ret ++} ++ ++; Test sge. ++define <2 x i64> @f4(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) { ++; CHECK-LABEL: f4: ++; CHECK: vchg [[REG:%v[0-9]+]], %v28, %v26 ++; CHECK-NEXT: vno %v24, [[REG]], [[REG]] ++; CHECK-NEXT: br %r14 ++ %cmp = icmp sge <2 x i64> %val1, %val2 ++ %ret = sext <2 x i1> %cmp to <2 x i64> ++ ret <2 x i64> %ret ++} ++ ++; Test sle. ++define <2 x i64> @f5(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) { ++; CHECK-LABEL: f5: ++; CHECK: vchg [[REG:%v[0-9]+]], %v26, %v28 ++; CHECK-NEXT: vno %v24, [[REG]], [[REG]] ++; CHECK-NEXT: br %r14 ++ %cmp = icmp sle <2 x i64> %val1, %val2 ++ %ret = sext <2 x i1> %cmp to <2 x i64> ++ ret <2 x i64> %ret ++} ++ ++; Test slt. ++define <2 x i64> @f6(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) { ++; CHECK-LABEL: f6: ++; CHECK: vchg %v24, %v28, %v26 ++; CHECK-NEXT: br %r14 ++ %cmp = icmp slt <2 x i64> %val1, %val2 ++ %ret = sext <2 x i1> %cmp to <2 x i64> ++ ret <2 x i64> %ret ++} ++ ++; Test ugt. ++define <2 x i64> @f7(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) { ++; CHECK-LABEL: f7: ++; CHECK: vchlg %v24, %v26, %v28 ++; CHECK-NEXT: br %r14 ++ %cmp = icmp ugt <2 x i64> %val1, %val2 ++ %ret = sext <2 x i1> %cmp to <2 x i64> ++ ret <2 x i64> %ret ++} ++ ++; Test uge. ++define <2 x i64> @f8(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) { ++; CHECK-LABEL: f8: ++; CHECK: vchlg [[REG:%v[0-9]+]], %v28, %v26 ++; CHECK-NEXT: vno %v24, [[REG]], [[REG]] ++; CHECK-NEXT: br %r14 ++ %cmp = icmp uge <2 x i64> %val1, %val2 ++ %ret = sext <2 x i1> %cmp to <2 x i64> ++ ret <2 x i64> %ret ++} ++ ++; Test ule. ++define <2 x i64> @f9(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) { ++; CHECK-LABEL: f9: ++; CHECK: vchlg [[REG:%v[0-9]+]], %v26, %v28 ++; CHECK-NEXT: vno %v24, [[REG]], [[REG]] ++; CHECK-NEXT: br %r14 ++ %cmp = icmp ule <2 x i64> %val1, %val2 ++ %ret = sext <2 x i1> %cmp to <2 x i64> ++ ret <2 x i64> %ret ++} ++ ++; Test ult. ++define <2 x i64> @f10(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) { ++; CHECK-LABEL: f10: ++; CHECK: vchlg %v24, %v28, %v26 ++; CHECK-NEXT: br %r14 ++ %cmp = icmp ult <2 x i64> %val1, %val2 ++ %ret = sext <2 x i1> %cmp to <2 x i64> ++ ret <2 x i64> %ret ++} ++ ++; Test eq selects. ++define <2 x i64> @f11(<2 x i64> %val1, <2 x i64> %val2, ++ <2 x i64> %val3, <2 x i64> %val4) { ++; CHECK-LABEL: f11: ++; CHECK: vceqg [[REG:%v[0-9]+]], %v24, %v26 ++; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] ++; CHECK-NEXT: br %r14 ++ %cmp = icmp eq <2 x i64> %val1, %val2 ++ %ret = select <2 x i1> %cmp, <2 x i64> %val3, <2 x i64> %val4 ++ ret <2 x i64> %ret ++} ++ ++; Test ne selects. ++define <2 x i64> @f12(<2 x i64> %val1, <2 x i64> %val2, ++ <2 x i64> %val3, <2 x i64> %val4) { ++; CHECK-LABEL: f12: ++; CHECK: vceqg [[REG:%v[0-9]+]], %v24, %v26 ++; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] ++; CHECK-NEXT: br %r14 ++ %cmp = icmp ne <2 x i64> %val1, %val2 ++ %ret = select <2 x i1> %cmp, <2 x i64> %val3, <2 x i64> %val4 ++ ret <2 x i64> %ret ++} ++ ++; Test sgt selects. ++define <2 x i64> @f13(<2 x i64> %val1, <2 x i64> %val2, ++ <2 x i64> %val3, <2 x i64> %val4) { ++; CHECK-LABEL: f13: ++; CHECK: vchg [[REG:%v[0-9]+]], %v24, %v26 ++; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] ++; CHECK-NEXT: br %r14 ++ %cmp = icmp sgt <2 x i64> %val1, %val2 ++ %ret = select <2 x i1> %cmp, <2 x i64> %val3, <2 x i64> %val4 ++ ret <2 x i64> %ret ++} ++ ++; Test sge selects. ++define <2 x i64> @f14(<2 x i64> %val1, <2 x i64> %val2, ++ <2 x i64> %val3, <2 x i64> %val4) { ++; CHECK-LABEL: f14: ++; CHECK: vchg [[REG:%v[0-9]+]], %v26, %v24 ++; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] ++; CHECK-NEXT: br %r14 ++ %cmp = icmp sge <2 x i64> %val1, %val2 ++ %ret = select <2 x i1> %cmp, <2 x i64> %val3, <2 x i64> %val4 ++ ret <2 x i64> %ret ++} ++ ++; Test sle selects. ++define <2 x i64> @f15(<2 x i64> %val1, <2 x i64> %val2, ++ <2 x i64> %val3, <2 x i64> %val4) { ++; CHECK-LABEL: f15: ++; CHECK: vchg [[REG:%v[0-9]+]], %v24, %v26 ++; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] ++; CHECK-NEXT: br %r14 ++ %cmp = icmp sle <2 x i64> %val1, %val2 ++ %ret = select <2 x i1> %cmp, <2 x i64> %val3, <2 x i64> %val4 ++ ret <2 x i64> %ret ++} ++ ++; Test slt selects. ++define <2 x i64> @f16(<2 x i64> %val1, <2 x i64> %val2, ++ <2 x i64> %val3, <2 x i64> %val4) { ++; CHECK-LABEL: f16: ++; CHECK: vchg [[REG:%v[0-9]+]], %v26, %v24 ++; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] ++; CHECK-NEXT: br %r14 ++ %cmp = icmp slt <2 x i64> %val1, %val2 ++ %ret = select <2 x i1> %cmp, <2 x i64> %val3, <2 x i64> %val4 ++ ret <2 x i64> %ret ++} ++ ++; Test ugt selects. ++define <2 x i64> @f17(<2 x i64> %val1, <2 x i64> %val2, ++ <2 x i64> %val3, <2 x i64> %val4) { ++; CHECK-LABEL: f17: ++; CHECK: vchlg [[REG:%v[0-9]+]], %v24, %v26 ++; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] ++; CHECK-NEXT: br %r14 ++ %cmp = icmp ugt <2 x i64> %val1, %val2 ++ %ret = select <2 x i1> %cmp, <2 x i64> %val3, <2 x i64> %val4 ++ ret <2 x i64> %ret ++} ++ ++; Test uge selects. ++define <2 x i64> @f18(<2 x i64> %val1, <2 x i64> %val2, ++ <2 x i64> %val3, <2 x i64> %val4) { ++; CHECK-LABEL: f18: ++; CHECK: vchlg [[REG:%v[0-9]+]], %v26, %v24 ++; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] ++; CHECK-NEXT: br %r14 ++ %cmp = icmp uge <2 x i64> %val1, %val2 ++ %ret = select <2 x i1> %cmp, <2 x i64> %val3, <2 x i64> %val4 ++ ret <2 x i64> %ret ++} ++ ++; Test ule selects. ++define <2 x i64> @f19(<2 x i64> %val1, <2 x i64> %val2, ++ <2 x i64> %val3, <2 x i64> %val4) { ++; CHECK-LABEL: f19: ++; CHECK: vchlg [[REG:%v[0-9]+]], %v24, %v26 ++; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] ++; CHECK-NEXT: br %r14 ++ %cmp = icmp ule <2 x i64> %val1, %val2 ++ %ret = select <2 x i1> %cmp, <2 x i64> %val3, <2 x i64> %val4 ++ ret <2 x i64> %ret ++} ++ ++; Test ult selects. ++define <2 x i64> @f20(<2 x i64> %val1, <2 x i64> %val2, ++ <2 x i64> %val3, <2 x i64> %val4) { ++; CHECK-LABEL: f20: ++; CHECK: vchlg [[REG:%v[0-9]+]], %v26, %v24 ++; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] ++; CHECK-NEXT: br %r14 ++ %cmp = icmp ult <2 x i64> %val1, %val2 ++ %ret = select <2 x i1> %cmp, <2 x i64> %val3, <2 x i64> %val4 ++ ret <2 x i64> %ret ++} +Index: llvm-36/test/CodeGen/SystemZ/vec-cmp-05.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-cmp-05.ll +@@ -0,0 +1,472 @@ ++; Test v4f32 comparisons. ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ++ ++; Test oeq. ++define <4 x i32> @f1(<4 x float> %val1, <4 x float> %val2) { ++; CHECK-LABEL: f1: ++; CHECK-DAG: vmrhf [[HIGH0E:%v[0-9]+]], %v24, %v24 ++; CHECK-DAG: vmrlf [[LOW0E:%v[0-9]+]], %v24, %v24 ++; CHECK-DAG: vmrhf [[HIGH1E:%v[0-9]+]], %v26, %v26 ++; CHECK-DAG: vmrlf [[LOW1E:%v[0-9]+]], %v26, %v26 ++; CHECK-DAG: vldeb [[HIGH0D:%v[0-9]+]], [[HIGH0E]] ++; CHECK-DAG: vldeb [[HIGH1D:%v[0-9]+]], [[HIGH1E]] ++; CHECK-DAG: vldeb [[LOW0D:%v[0-9]+]], [[LOW0E]] ++; CHECK-DAG: vldeb [[LOW1D:%v[0-9]+]], [[LOW1E]] ++; CHECK-DAG: vfcedb [[HIGHRES:%v[0-9]+]], [[HIGH0D]], [[HIGH1D]] ++; CHECK-DAG: vfcedb [[LOWRES:%v[0-9]+]], [[LOW0D]], [[LOW1D]] ++; CHECK: vpkg %v24, [[HIGHRES]], [[LOWRES]] ++; CHECK-NEXT: br %r14 ++ %cmp = fcmp oeq <4 x float> %val1, %val2 ++ %ret = sext <4 x i1> %cmp to <4 x i32> ++ ret <4 x i32> %ret ++} ++ ++; Test one. ++define <4 x i32> @f2(<4 x float> %val1, <4 x float> %val2) { ++; CHECK-LABEL: f2: ++; CHECK-DAG: vmrhf [[HIGH0E:%v[0-9]+]], %v24, %v24 ++; CHECK-DAG: vmrlf [[LOW0E:%v[0-9]+]], %v24, %v24 ++; CHECK-DAG: vmrhf [[HIGH1E:%v[0-9]+]], %v26, %v26 ++; CHECK-DAG: vmrlf [[LOW1E:%v[0-9]+]], %v26, %v26 ++; CHECK-DAG: vldeb [[HIGH0D:%v[0-9]+]], [[HIGH0E]] ++; CHECK-DAG: vldeb [[HIGH1D:%v[0-9]+]], [[HIGH1E]] ++; CHECK-DAG: vldeb [[LOW0D:%v[0-9]+]], [[LOW0E]] ++; CHECK-DAG: vldeb [[LOW1D:%v[0-9]+]], [[LOW1E]] ++; CHECK-DAG: vfchdb [[HIGHRES0:%v[0-9]+]], [[HIGH0D]], [[HIGH1D]] ++; CHECK-DAG: vfchdb [[LOWRES0:%v[0-9]+]], [[LOW0D]], [[LOW1D]] ++; CHECK-DAG: vfchdb [[HIGHRES1:%v[0-9]+]], [[HIGH1D]], [[HIGH0D]] ++; CHECK-DAG: vfchdb [[LOWRES1:%v[0-9]+]], [[LOW1D]], [[LOW0D]] ++; CHECK-DAG: vpkg [[RES0:%v[0-9]+]], [[HIGHRES0]], [[LOWRES0]] ++; CHECK-DAG: vpkg [[RES1:%v[0-9]+]], [[HIGHRES1]], [[LOWRES1]] ++; CHECK: vo %v24, [[RES1]], [[RES0]] ++; CHECK-NEXT: br %r14 ++ %cmp = fcmp one <4 x float> %val1, %val2 ++ %ret = sext <4 x i1> %cmp to <4 x i32> ++ ret <4 x i32> %ret ++} ++ ++; Test ogt. ++define <4 x i32> @f3(<4 x float> %val1, <4 x float> %val2) { ++; CHECK-LABEL: f3: ++; CHECK-DAG: vmrhf [[HIGH0E:%v[0-9]+]], %v24, %v24 ++; CHECK-DAG: vmrlf [[LOW0E:%v[0-9]+]], %v24, %v24 ++; CHECK-DAG: vmrhf [[HIGH1E:%v[0-9]+]], %v26, %v26 ++; CHECK-DAG: vmrlf [[LOW1E:%v[0-9]+]], %v26, %v26 ++; CHECK-DAG: vldeb [[HIGH0D:%v[0-9]+]], [[HIGH0E]] ++; CHECK-DAG: vldeb [[HIGH1D:%v[0-9]+]], [[HIGH1E]] ++; CHECK-DAG: vldeb [[LOW0D:%v[0-9]+]], [[LOW0E]] ++; CHECK-DAG: vldeb [[LOW1D:%v[0-9]+]], [[LOW1E]] ++; CHECK-DAG: vfchdb [[HIGHRES:%v[0-9]+]], [[HIGH0D]], [[HIGH1D]] ++; CHECK-DAG: vfchdb [[LOWRES:%v[0-9]+]], [[LOW0D]], [[LOW1D]] ++; CHECK: vpkg %v24, [[HIGHRES]], [[LOWRES]] ++; CHECK-NEXT: br %r14 ++ %cmp = fcmp ogt <4 x float> %val1, %val2 ++ %ret = sext <4 x i1> %cmp to <4 x i32> ++ ret <4 x i32> %ret ++} ++ ++; Test oge. ++define <4 x i32> @f4(<4 x float> %val1, <4 x float> %val2) { ++; CHECK-LABEL: f4: ++; CHECK-DAG: vmrhf [[HIGH0E:%v[0-9]+]], %v24, %v24 ++; CHECK-DAG: vmrlf [[LOW0E:%v[0-9]+]], %v24, %v24 ++; CHECK-DAG: vmrhf [[HIGH1E:%v[0-9]+]], %v26, %v26 ++; CHECK-DAG: vmrlf [[LOW1E:%v[0-9]+]], %v26, %v26 ++; CHECK-DAG: vldeb [[HIGH0D:%v[0-9]+]], [[HIGH0E]] ++; CHECK-DAG: vldeb [[HIGH1D:%v[0-9]+]], [[HIGH1E]] ++; CHECK-DAG: vldeb [[LOW0D:%v[0-9]+]], [[LOW0E]] ++; CHECK-DAG: vldeb [[LOW1D:%v[0-9]+]], [[LOW1E]] ++; CHECK-DAG: vfchedb [[HIGHRES:%v[0-9]+]], [[HIGH0D]], [[HIGH1D]] ++; CHECK-DAG: vfchedb [[LOWRES:%v[0-9]+]], [[LOW0D]], [[LOW1D]] ++; CHECK: vpkg %v24, [[HIGHRES]], [[LOWRES]] ++; CHECK-NEXT: br %r14 ++ %cmp = fcmp oge <4 x float> %val1, %val2 ++ %ret = sext <4 x i1> %cmp to <4 x i32> ++ ret <4 x i32> %ret ++} ++ ++; Test ole. ++define <4 x i32> @f5(<4 x float> %val1, <4 x float> %val2) { ++; CHECK-LABEL: f5: ++; CHECK-DAG: vmrhf [[HIGH0E:%v[0-9]+]], %v24, %v24 ++; CHECK-DAG: vmrlf [[LOW0E:%v[0-9]+]], %v24, %v24 ++; CHECK-DAG: vmrhf [[HIGH1E:%v[0-9]+]], %v26, %v26 ++; CHECK-DAG: vmrlf [[LOW1E:%v[0-9]+]], %v26, %v26 ++; CHECK-DAG: vldeb [[HIGH0D:%v[0-9]+]], [[HIGH0E]] ++; CHECK-DAG: vldeb [[HIGH1D:%v[0-9]+]], [[HIGH1E]] ++; CHECK-DAG: vldeb [[LOW0D:%v[0-9]+]], [[LOW0E]] ++; CHECK-DAG: vldeb [[LOW1D:%v[0-9]+]], [[LOW1E]] ++; CHECK-DAG: vfchedb [[HIGHRES:%v[0-9]+]], [[HIGH1D]], [[HIGH0D]] ++; CHECK-DAG: vfchedb [[LOWRES:%v[0-9]+]], [[LOW1D]], [[LOW0D]] ++; CHECK: vpkg %v24, [[HIGHRES]], [[LOWRES]] ++; CHECK-NEXT: br %r14 ++ %cmp = fcmp ole <4 x float> %val1, %val2 ++ %ret = sext <4 x i1> %cmp to <4 x i32> ++ ret <4 x i32> %ret ++} ++ ++; Test olt. ++define <4 x i32> @f6(<4 x float> %val1, <4 x float> %val2) { ++; CHECK-LABEL: f6: ++; CHECK-DAG: vmrhf [[HIGH0E:%v[0-9]+]], %v24, %v24 ++; CHECK-DAG: vmrlf [[LOW0E:%v[0-9]+]], %v24, %v24 ++; CHECK-DAG: vmrhf [[HIGH1E:%v[0-9]+]], %v26, %v26 ++; CHECK-DAG: vmrlf [[LOW1E:%v[0-9]+]], %v26, %v26 ++; CHECK-DAG: vldeb [[HIGH0D:%v[0-9]+]], [[HIGH0E]] ++; CHECK-DAG: vldeb [[HIGH1D:%v[0-9]+]], [[HIGH1E]] ++; CHECK-DAG: vldeb [[LOW0D:%v[0-9]+]], [[LOW0E]] ++; CHECK-DAG: vldeb [[LOW1D:%v[0-9]+]], [[LOW1E]] ++; CHECK-DAG: vfchdb [[HIGHRES:%v[0-9]+]], [[HIGH1D]], [[HIGH0D]] ++; CHECK-DAG: vfchdb [[LOWRES:%v[0-9]+]], [[LOW1D]], [[LOW0D]] ++; CHECK: vpkg %v24, [[HIGHRES]], [[LOWRES]] ++; CHECK-NEXT: br %r14 ++ %cmp = fcmp olt <4 x float> %val1, %val2 ++ %ret = sext <4 x i1> %cmp to <4 x i32> ++ ret <4 x i32> %ret ++} ++ ++; Test ueq. ++define <4 x i32> @f7(<4 x float> %val1, <4 x float> %val2) { ++; CHECK-LABEL: f7: ++; CHECK-DAG: vmrhf [[HIGH0E:%v[0-9]+]], %v24, %v24 ++; CHECK-DAG: vmrlf [[LOW0E:%v[0-9]+]], %v24, %v24 ++; CHECK-DAG: vmrhf [[HIGH1E:%v[0-9]+]], %v26, %v26 ++; CHECK-DAG: vmrlf [[LOW1E:%v[0-9]+]], %v26, %v26 ++; CHECK-DAG: vldeb [[HIGH0D:%v[0-9]+]], [[HIGH0E]] ++; CHECK-DAG: vldeb [[HIGH1D:%v[0-9]+]], [[HIGH1E]] ++; CHECK-DAG: vldeb [[LOW0D:%v[0-9]+]], [[LOW0E]] ++; CHECK-DAG: vldeb [[LOW1D:%v[0-9]+]], [[LOW1E]] ++; CHECK-DAG: vfchdb [[HIGHRES0:%v[0-9]+]], [[HIGH0D]], [[HIGH1D]] ++; CHECK-DAG: vfchdb [[LOWRES0:%v[0-9]+]], [[LOW0D]], [[LOW1D]] ++; CHECK-DAG: vfchdb [[HIGHRES1:%v[0-9]+]], [[HIGH1D]], [[HIGH0D]] ++; CHECK-DAG: vfchdb [[LOWRES1:%v[0-9]+]], [[LOW1D]], [[LOW0D]] ++; CHECK-DAG: vpkg [[RES0:%v[0-9]+]], [[HIGHRES0]], [[LOWRES0]] ++; CHECK-DAG: vpkg [[RES1:%v[0-9]+]], [[HIGHRES1]], [[LOWRES1]] ++; CHECK: vno %v24, [[RES1]], [[RES0]] ++; CHECK-NEXT: br %r14 ++ %cmp = fcmp ueq <4 x float> %val1, %val2 ++ %ret = sext <4 x i1> %cmp to <4 x i32> ++ ret <4 x i32> %ret ++} ++ ++; Test une. ++define <4 x i32> @f8(<4 x float> %val1, <4 x float> %val2) { ++; CHECK-LABEL: f8: ++; CHECK-DAG: vmrhf [[HIGH0E:%v[0-9]+]], %v24, %v24 ++; CHECK-DAG: vmrlf [[LOW0E:%v[0-9]+]], %v24, %v24 ++; CHECK-DAG: vmrhf [[HIGH1E:%v[0-9]+]], %v26, %v26 ++; CHECK-DAG: vmrlf [[LOW1E:%v[0-9]+]], %v26, %v26 ++; CHECK-DAG: vldeb [[HIGH0D:%v[0-9]+]], [[HIGH0E]] ++; CHECK-DAG: vldeb [[HIGH1D:%v[0-9]+]], [[HIGH1E]] ++; CHECK-DAG: vldeb [[LOW0D:%v[0-9]+]], [[LOW0E]] ++; CHECK-DAG: vldeb [[LOW1D:%v[0-9]+]], [[LOW1E]] ++; CHECK-DAG: vfcedb [[HIGHRES:%v[0-9]+]], [[HIGH0D]], [[HIGH1D]] ++; CHECK-DAG: vfcedb [[LOWRES:%v[0-9]+]], [[LOW0D]], [[LOW1D]] ++; CHECK: vpkg [[RES:%v[0-9]+]], [[HIGHRES]], [[LOWRES]] ++; CHECK-NEXT: vno %v24, [[RES]], [[RES]] ++; CHECK-NEXT: br %r14 ++ %cmp = fcmp une <4 x float> %val1, %val2 ++ %ret = sext <4 x i1> %cmp to <4 x i32> ++ ret <4 x i32> %ret ++} ++ ++; Test ugt. ++define <4 x i32> @f9(<4 x float> %val1, <4 x float> %val2) { ++; CHECK-LABEL: f9: ++; CHECK-DAG: vmrhf [[HIGH0E:%v[0-9]+]], %v24, %v24 ++; CHECK-DAG: vmrlf [[LOW0E:%v[0-9]+]], %v24, %v24 ++; CHECK-DAG: vmrhf [[HIGH1E:%v[0-9]+]], %v26, %v26 ++; CHECK-DAG: vmrlf [[LOW1E:%v[0-9]+]], %v26, %v26 ++; CHECK-DAG: vldeb [[HIGH0D:%v[0-9]+]], [[HIGH0E]] ++; CHECK-DAG: vldeb [[HIGH1D:%v[0-9]+]], [[HIGH1E]] ++; CHECK-DAG: vldeb [[LOW0D:%v[0-9]+]], [[LOW0E]] ++; CHECK-DAG: vldeb [[LOW1D:%v[0-9]+]], [[LOW1E]] ++; CHECK-DAG: vfchedb [[HIGHRES:%v[0-9]+]], [[HIGH1D]], [[HIGH0D]] ++; CHECK-DAG: vfchedb [[LOWRES:%v[0-9]+]], [[LOW1D]], [[LOW0D]] ++; CHECK: vpkg [[RES:%v[0-9]+]], [[HIGHRES]], [[LOWRES]] ++; CHECK-NEXT: vno %v24, [[RES]], [[RES]] ++; CHECK-NEXT: br %r14 ++ %cmp = fcmp ugt <4 x float> %val1, %val2 ++ %ret = sext <4 x i1> %cmp to <4 x i32> ++ ret <4 x i32> %ret ++} ++ ++; Test uge. ++define <4 x i32> @f10(<4 x float> %val1, <4 x float> %val2) { ++; CHECK-LABEL: f10: ++; CHECK-DAG: vmrhf [[HIGH0E:%v[0-9]+]], %v24, %v24 ++; CHECK-DAG: vmrlf [[LOW0E:%v[0-9]+]], %v24, %v24 ++; CHECK-DAG: vmrhf [[HIGH1E:%v[0-9]+]], %v26, %v26 ++; CHECK-DAG: vmrlf [[LOW1E:%v[0-9]+]], %v26, %v26 ++; CHECK-DAG: vldeb [[HIGH0D:%v[0-9]+]], [[HIGH0E]] ++; CHECK-DAG: vldeb [[HIGH1D:%v[0-9]+]], [[HIGH1E]] ++; CHECK-DAG: vldeb [[LOW0D:%v[0-9]+]], [[LOW0E]] ++; CHECK-DAG: vldeb [[LOW1D:%v[0-9]+]], [[LOW1E]] ++; CHECK-DAG: vfchdb [[HIGHRES:%v[0-9]+]], [[HIGH1D]], [[HIGH0D]] ++; CHECK-DAG: vfchdb [[LOWRES:%v[0-9]+]], [[LOW1D]], [[LOW0D]] ++; CHECK: vpkg [[RES:%v[0-9]+]], [[HIGHRES]], [[LOWRES]] ++; CHECK-NEXT: vno %v24, [[RES]], [[RES]] ++; CHECK-NEXT: br %r14 ++ %cmp = fcmp uge <4 x float> %val1, %val2 ++ %ret = sext <4 x i1> %cmp to <4 x i32> ++ ret <4 x i32> %ret ++} ++ ++; Test ule. ++define <4 x i32> @f11(<4 x float> %val1, <4 x float> %val2) { ++; CHECK-LABEL: f11: ++; CHECK-DAG: vmrhf [[HIGH0E:%v[0-9]+]], %v24, %v24 ++; CHECK-DAG: vmrlf [[LOW0E:%v[0-9]+]], %v24, %v24 ++; CHECK-DAG: vmrhf [[HIGH1E:%v[0-9]+]], %v26, %v26 ++; CHECK-DAG: vmrlf [[LOW1E:%v[0-9]+]], %v26, %v26 ++; CHECK-DAG: vldeb [[HIGH0D:%v[0-9]+]], [[HIGH0E]] ++; CHECK-DAG: vldeb [[HIGH1D:%v[0-9]+]], [[HIGH1E]] ++; CHECK-DAG: vldeb [[LOW0D:%v[0-9]+]], [[LOW0E]] ++; CHECK-DAG: vldeb [[LOW1D:%v[0-9]+]], [[LOW1E]] ++; CHECK-DAG: vfchdb [[HIGHRES:%v[0-9]+]], [[HIGH0D]], [[HIGH1D]] ++; CHECK-DAG: vfchdb [[LOWRES:%v[0-9]+]], [[LOW0D]], [[LOW1D]] ++; CHECK: vpkg [[RES:%v[0-9]+]], [[HIGHRES]], [[LOWRES]] ++; CHECK-NEXT: vno %v24, [[RES]], [[RES]] ++; CHECK-NEXT: br %r14 ++ %cmp = fcmp ule <4 x float> %val1, %val2 ++ %ret = sext <4 x i1> %cmp to <4 x i32> ++ ret <4 x i32> %ret ++} ++ ++; Test ult. ++define <4 x i32> @f12(<4 x float> %val1, <4 x float> %val2) { ++; CHECK-LABEL: f12: ++; CHECK-DAG: vmrhf [[HIGH0E:%v[0-9]+]], %v24, %v24 ++; CHECK-DAG: vmrlf [[LOW0E:%v[0-9]+]], %v24, %v24 ++; CHECK-DAG: vmrhf [[HIGH1E:%v[0-9]+]], %v26, %v26 ++; CHECK-DAG: vmrlf [[LOW1E:%v[0-9]+]], %v26, %v26 ++; CHECK-DAG: vldeb [[HIGH0D:%v[0-9]+]], [[HIGH0E]] ++; CHECK-DAG: vldeb [[HIGH1D:%v[0-9]+]], [[HIGH1E]] ++; CHECK-DAG: vldeb [[LOW0D:%v[0-9]+]], [[LOW0E]] ++; CHECK-DAG: vldeb [[LOW1D:%v[0-9]+]], [[LOW1E]] ++; CHECK-DAG: vfchedb [[HIGHRES:%v[0-9]+]], [[HIGH0D]], [[HIGH1D]] ++; CHECK-DAG: vfchedb [[LOWRES:%v[0-9]+]], [[LOW0D]], [[LOW1D]] ++; CHECK: vpkg [[RES:%v[0-9]+]], [[HIGHRES]], [[LOWRES]] ++; CHECK-NEXT: vno %v24, [[RES]], [[RES]] ++; CHECK-NEXT: br %r14 ++ %cmp = fcmp ult <4 x float> %val1, %val2 ++ %ret = sext <4 x i1> %cmp to <4 x i32> ++ ret <4 x i32> %ret ++} ++ ++; Test ord. ++define <4 x i32> @f13(<4 x float> %val1, <4 x float> %val2) { ++; CHECK-LABEL: f13: ++; CHECK-DAG: vmrhf [[HIGH0E:%v[0-9]+]], %v24, %v24 ++; CHECK-DAG: vmrlf [[LOW0E:%v[0-9]+]], %v24, %v24 ++; CHECK-DAG: vmrhf [[HIGH1E:%v[0-9]+]], %v26, %v26 ++; CHECK-DAG: vmrlf [[LOW1E:%v[0-9]+]], %v26, %v26 ++; CHECK-DAG: vldeb [[HIGH0D:%v[0-9]+]], [[HIGH0E]] ++; CHECK-DAG: vldeb [[HIGH1D:%v[0-9]+]], [[HIGH1E]] ++; CHECK-DAG: vldeb [[LOW0D:%v[0-9]+]], [[LOW0E]] ++; CHECK-DAG: vldeb [[LOW1D:%v[0-9]+]], [[LOW1E]] ++; CHECK-DAG: vfchedb [[HIGHRES0:%v[0-9]+]], [[HIGH0D]], [[HIGH1D]] ++; CHECK-DAG: vfchedb [[LOWRES0:%v[0-9]+]], [[LOW0D]], [[LOW1D]] ++; CHECK-DAG: vfchdb [[HIGHRES1:%v[0-9]+]], [[HIGH1D]], [[HIGH0D]] ++; CHECK-DAG: vfchdb [[LOWRES1:%v[0-9]+]], [[LOW1D]], [[LOW0D]] ++; CHECK-DAG: vpkg [[RES0:%v[0-9]+]], [[HIGHRES0]], [[LOWRES0]] ++; CHECK-DAG: vpkg [[RES1:%v[0-9]+]], [[HIGHRES1]], [[LOWRES1]] ++; CHECK: vo %v24, [[RES1]], [[RES0]] ++; CHECK-NEXT: br %r14 ++ %cmp = fcmp ord <4 x float> %val1, %val2 ++ %ret = sext <4 x i1> %cmp to <4 x i32> ++ ret <4 x i32> %ret ++} ++ ++; Test uno. ++define <4 x i32> @f14(<4 x float> %val1, <4 x float> %val2) { ++; CHECK-LABEL: f14: ++; CHECK-DAG: vmrhf [[HIGH0E:%v[0-9]+]], %v24, %v24 ++; CHECK-DAG: vmrlf [[LOW0E:%v[0-9]+]], %v24, %v24 ++; CHECK-DAG: vmrhf [[HIGH1E:%v[0-9]+]], %v26, %v26 ++; CHECK-DAG: vmrlf [[LOW1E:%v[0-9]+]], %v26, %v26 ++; CHECK-DAG: vldeb [[HIGH0D:%v[0-9]+]], [[HIGH0E]] ++; CHECK-DAG: vldeb [[HIGH1D:%v[0-9]+]], [[HIGH1E]] ++; CHECK-DAG: vldeb [[LOW0D:%v[0-9]+]], [[LOW0E]] ++; CHECK-DAG: vldeb [[LOW1D:%v[0-9]+]], [[LOW1E]] ++; CHECK-DAG: vfchedb [[HIGHRES0:%v[0-9]+]], [[HIGH0D]], [[HIGH1D]] ++; CHECK-DAG: vfchedb [[LOWRES0:%v[0-9]+]], [[LOW0D]], [[LOW1D]] ++; CHECK-DAG: vfchdb [[HIGHRES1:%v[0-9]+]], [[HIGH1D]], [[HIGH0D]] ++; CHECK-DAG: vfchdb [[LOWRES1:%v[0-9]+]], [[LOW1D]], [[LOW0D]] ++; CHECK-DAG: vpkg [[RES0:%v[0-9]+]], [[HIGHRES0]], [[LOWRES0]] ++; CHECK-DAG: vpkg [[RES1:%v[0-9]+]], [[HIGHRES1]], [[LOWRES1]] ++; CHECK: vno %v24, [[RES1]], [[RES0]] ++; CHECK-NEXT: br %r14 ++ %cmp = fcmp uno <4 x float> %val1, %val2 ++ %ret = sext <4 x i1> %cmp to <4 x i32> ++ ret <4 x i32> %ret ++} ++ ++; Test oeq selects. ++define <4 x float> @f15(<4 x float> %val1, <4 x float> %val2, ++ <4 x float> %val3, <4 x float> %val4) { ++; CHECK-LABEL: f15: ++; CHECK: vpkg [[REG:%v[0-9]+]], ++; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] ++; CHECK-NEXT: br %r14 ++ %cmp = fcmp oeq <4 x float> %val1, %val2 ++ %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 ++ ret <4 x float> %ret ++} ++ ++; Test one selects. ++define <4 x float> @f16(<4 x float> %val1, <4 x float> %val2, ++ <4 x float> %val3, <4 x float> %val4) { ++; CHECK-LABEL: f16: ++; CHECK: vo [[REG:%v[0-9]+]], ++; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] ++; CHECK-NEXT: br %r14 ++ %cmp = fcmp one <4 x float> %val1, %val2 ++ %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 ++ ret <4 x float> %ret ++} ++ ++; Test ogt selects. ++define <4 x float> @f17(<4 x float> %val1, <4 x float> %val2, ++ <4 x float> %val3, <4 x float> %val4) { ++; CHECK-LABEL: f17: ++; CHECK: vpkg [[REG:%v[0-9]+]], ++; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] ++; CHECK-NEXT: br %r14 ++ %cmp = fcmp ogt <4 x float> %val1, %val2 ++ %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 ++ ret <4 x float> %ret ++} ++ ++; Test oge selects. ++define <4 x float> @f18(<4 x float> %val1, <4 x float> %val2, ++ <4 x float> %val3, <4 x float> %val4) { ++; CHECK-LABEL: f18: ++; CHECK: vpkg [[REG:%v[0-9]+]], ++; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] ++; CHECK-NEXT: br %r14 ++ %cmp = fcmp oge <4 x float> %val1, %val2 ++ %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 ++ ret <4 x float> %ret ++} ++ ++; Test ole selects. ++define <4 x float> @f19(<4 x float> %val1, <4 x float> %val2, ++ <4 x float> %val3, <4 x float> %val4) { ++; CHECK-LABEL: f19: ++; CHECK: vpkg [[REG:%v[0-9]+]], ++; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] ++; CHECK-NEXT: br %r14 ++ %cmp = fcmp ole <4 x float> %val1, %val2 ++ %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 ++ ret <4 x float> %ret ++} ++ ++; Test olt selects. ++define <4 x float> @f20(<4 x float> %val1, <4 x float> %val2, ++ <4 x float> %val3, <4 x float> %val4) { ++; CHECK-LABEL: f20: ++; CHECK: vpkg [[REG:%v[0-9]+]], ++; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] ++; CHECK-NEXT: br %r14 ++ %cmp = fcmp olt <4 x float> %val1, %val2 ++ %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 ++ ret <4 x float> %ret ++} ++ ++; Test ueq selects. ++define <4 x float> @f21(<4 x float> %val1, <4 x float> %val2, ++ <4 x float> %val3, <4 x float> %val4) { ++; CHECK-LABEL: f21: ++; CHECK: vo [[REG:%v[0-9]+]], ++; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] ++; CHECK-NEXT: br %r14 ++ %cmp = fcmp ueq <4 x float> %val1, %val2 ++ %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 ++ ret <4 x float> %ret ++} ++ ++; Test une selects. ++define <4 x float> @f22(<4 x float> %val1, <4 x float> %val2, ++ <4 x float> %val3, <4 x float> %val4) { ++; CHECK-LABEL: f22: ++; CHECK: vpkg [[REG:%v[0-9]+]], ++; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] ++; CHECK-NEXT: br %r14 ++ %cmp = fcmp une <4 x float> %val1, %val2 ++ %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 ++ ret <4 x float> %ret ++} ++ ++; Test ugt selects. ++define <4 x float> @f23(<4 x float> %val1, <4 x float> %val2, ++ <4 x float> %val3, <4 x float> %val4) { ++; CHECK-LABEL: f23: ++; CHECK: vpkg [[REG:%v[0-9]+]], ++; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] ++; CHECK-NEXT: br %r14 ++ %cmp = fcmp ugt <4 x float> %val1, %val2 ++ %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 ++ ret <4 x float> %ret ++} ++ ++; Test uge selects. ++define <4 x float> @f24(<4 x float> %val1, <4 x float> %val2, ++ <4 x float> %val3, <4 x float> %val4) { ++; CHECK-LABEL: f24: ++; CHECK: vpkg [[REG:%v[0-9]+]], ++; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] ++; CHECK-NEXT: br %r14 ++ %cmp = fcmp uge <4 x float> %val1, %val2 ++ %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 ++ ret <4 x float> %ret ++} ++ ++; Test ule selects. ++define <4 x float> @f25(<4 x float> %val1, <4 x float> %val2, ++ <4 x float> %val3, <4 x float> %val4) { ++; CHECK-LABEL: f25: ++; CHECK: vpkg [[REG:%v[0-9]+]], ++; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] ++; CHECK-NEXT: br %r14 ++ %cmp = fcmp ule <4 x float> %val1, %val2 ++ %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 ++ ret <4 x float> %ret ++} ++ ++; Test ult selects. ++define <4 x float> @f26(<4 x float> %val1, <4 x float> %val2, ++ <4 x float> %val3, <4 x float> %val4) { ++; CHECK-LABEL: f26: ++; CHECK: vpkg [[REG:%v[0-9]+]], ++; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] ++; CHECK-NEXT: br %r14 ++ %cmp = fcmp ult <4 x float> %val1, %val2 ++ %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 ++ ret <4 x float> %ret ++} ++ ++; Test ord selects. ++define <4 x float> @f27(<4 x float> %val1, <4 x float> %val2, ++ <4 x float> %val3, <4 x float> %val4) { ++; CHECK-LABEL: f27: ++; CHECK: vo [[REG:%v[0-9]+]], ++; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] ++; CHECK-NEXT: br %r14 ++ %cmp = fcmp ord <4 x float> %val1, %val2 ++ %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 ++ ret <4 x float> %ret ++} ++ ++; Test uno selects. ++define <4 x float> @f28(<4 x float> %val1, <4 x float> %val2, ++ <4 x float> %val3, <4 x float> %val4) { ++; CHECK-LABEL: f28: ++; CHECK: vo [[REG:%v[0-9]+]], ++; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] ++; CHECK-NEXT: br %r14 ++ %cmp = fcmp uno <4 x float> %val1, %val2 ++ %ret = select <4 x i1> %cmp, <4 x float> %val3, <4 x float> %val4 ++ ret <4 x float> %ret ++} +Index: llvm-36/test/CodeGen/SystemZ/vec-cmp-06.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-cmp-06.ll +@@ -0,0 +1,349 @@ ++; Test f64 and v2f64 comparisons. ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ++ ++; Test oeq. ++define <2 x i64> @f1(<2 x i64> %dummy, <2 x double> %val1, <2 x double> %val2) { ++; CHECK-LABEL: f1: ++; CHECK: vfcedb %v24, %v26, %v28 ++; CHECK-NEXT: br %r14 ++ %cmp = fcmp oeq <2 x double> %val1, %val2 ++ %ret = sext <2 x i1> %cmp to <2 x i64> ++ ret <2 x i64> %ret ++} ++ ++; Test one. ++define <2 x i64> @f2(<2 x i64> %dummy, <2 x double> %val1, <2 x double> %val2) { ++; CHECK-LABEL: f2: ++; CHECK-DAG: vfchdb [[REG1:%v[0-9]+]], %v28, %v26 ++; CHECK-DAG: vfchdb [[REG2:%v[0-9]+]], %v26, %v28 ++; CHECK: vo %v24, [[REG1]], [[REG2]] ++; CHECK-NEXT: br %r14 ++ %cmp = fcmp one <2 x double> %val1, %val2 ++ %ret = sext <2 x i1> %cmp to <2 x i64> ++ ret <2 x i64> %ret ++} ++ ++; Test ogt. ++define <2 x i64> @f3(<2 x i64> %dummy, <2 x double> %val1, <2 x double> %val2) { ++; CHECK-LABEL: f3: ++; CHECK: vfchdb %v24, %v26, %v28 ++; CHECK-NEXT: br %r14 ++ %cmp = fcmp ogt <2 x double> %val1, %val2 ++ %ret = sext <2 x i1> %cmp to <2 x i64> ++ ret <2 x i64> %ret ++} ++ ++; Test oge. ++define <2 x i64> @f4(<2 x i64> %dummy, <2 x double> %val1, <2 x double> %val2) { ++; CHECK-LABEL: f4: ++; CHECK: vfchedb %v24, %v26, %v28 ++; CHECK-NEXT: br %r14 ++ %cmp = fcmp oge <2 x double> %val1, %val2 ++ %ret = sext <2 x i1> %cmp to <2 x i64> ++ ret <2 x i64> %ret ++} ++ ++; Test ole. ++define <2 x i64> @f5(<2 x i64> %dummy, <2 x double> %val1, <2 x double> %val2) { ++; CHECK-LABEL: f5: ++; CHECK: vfchedb %v24, %v28, %v26 ++; CHECK-NEXT: br %r14 ++ %cmp = fcmp ole <2 x double> %val1, %val2 ++ %ret = sext <2 x i1> %cmp to <2 x i64> ++ ret <2 x i64> %ret ++} ++ ++; Test olt. ++define <2 x i64> @f6(<2 x i64> %dummy, <2 x double> %val1, <2 x double> %val2) { ++; CHECK-LABEL: f6: ++; CHECK: vfchdb %v24, %v28, %v26 ++; CHECK-NEXT: br %r14 ++ %cmp = fcmp olt <2 x double> %val1, %val2 ++ %ret = sext <2 x i1> %cmp to <2 x i64> ++ ret <2 x i64> %ret ++} ++ ++; Test ueq. ++define <2 x i64> @f7(<2 x i64> %dummy, <2 x double> %val1, <2 x double> %val2) { ++; CHECK-LABEL: f7: ++; CHECK-DAG: vfchdb [[REG1:%v[0-9]+]], %v28, %v26 ++; CHECK-DAG: vfchdb [[REG2:%v[0-9]+]], %v26, %v28 ++; CHECK: vno %v24, [[REG1]], [[REG2]] ++; CHECK-NEXT: br %r14 ++ %cmp = fcmp ueq <2 x double> %val1, %val2 ++ %ret = sext <2 x i1> %cmp to <2 x i64> ++ ret <2 x i64> %ret ++} ++ ++; Test une. ++define <2 x i64> @f8(<2 x i64> %dummy, <2 x double> %val1, <2 x double> %val2) { ++; CHECK-LABEL: f8: ++; CHECK: vfcedb [[REG:%v[0-9]+]], %v26, %v28 ++; CHECK-NEXT: vno %v24, [[REG]], [[REG]] ++; CHECK-NEXT: br %r14 ++ %cmp = fcmp une <2 x double> %val1, %val2 ++ %ret = sext <2 x i1> %cmp to <2 x i64> ++ ret <2 x i64> %ret ++} ++ ++; Test ugt. ++define <2 x i64> @f9(<2 x i64> %dummy, <2 x double> %val1, <2 x double> %val2) { ++; CHECK-LABEL: f9: ++; CHECK: vfchedb [[REG:%v[0-9]+]], %v28, %v26 ++; CHECK-NEXT: vno %v24, [[REG]], [[REG]] ++; CHECK-NEXT: br %r14 ++ %cmp = fcmp ugt <2 x double> %val1, %val2 ++ %ret = sext <2 x i1> %cmp to <2 x i64> ++ ret <2 x i64> %ret ++} ++ ++; Test uge. ++define <2 x i64> @f10(<2 x i64> %dummy, <2 x double> %val1, ++ <2 x double> %val2) { ++; CHECK-LABEL: f10: ++; CHECK: vfchdb [[REG:%v[0-9]+]], %v28, %v26 ++; CHECK-NEXT: vno %v24, [[REG]], [[REG]] ++; CHECK-NEXT: br %r14 ++ %cmp = fcmp uge <2 x double> %val1, %val2 ++ %ret = sext <2 x i1> %cmp to <2 x i64> ++ ret <2 x i64> %ret ++} ++ ++; Test ule. ++define <2 x i64> @f11(<2 x i64> %dummy, <2 x double> %val1, ++ <2 x double> %val2) { ++; CHECK-LABEL: f11: ++; CHECK: vfchdb [[REG:%v[0-9]+]], %v26, %v28 ++; CHECK-NEXT: vno %v24, [[REG]], [[REG]] ++; CHECK-NEXT: br %r14 ++ %cmp = fcmp ule <2 x double> %val1, %val2 ++ %ret = sext <2 x i1> %cmp to <2 x i64> ++ ret <2 x i64> %ret ++} ++ ++; Test ult. ++define <2 x i64> @f12(<2 x i64> %dummy, <2 x double> %val1, ++ <2 x double> %val2) { ++; CHECK-LABEL: f12: ++; CHECK: vfchedb [[REG:%v[0-9]+]], %v26, %v28 ++; CHECK-NEXT: vno %v24, [[REG]], [[REG]] ++; CHECK-NEXT: br %r14 ++ %cmp = fcmp ult <2 x double> %val1, %val2 ++ %ret = sext <2 x i1> %cmp to <2 x i64> ++ ret <2 x i64> %ret ++} ++ ++; Test ord. ++define <2 x i64> @f13(<2 x i64> %dummy, <2 x double> %val1, ++ <2 x double> %val2) { ++; CHECK-LABEL: f13: ++; CHECK-DAG: vfchdb [[REG1:%v[0-9]+]], %v28, %v26 ++; CHECK-DAG: vfchedb [[REG2:%v[0-9]+]], %v26, %v28 ++; CHECK: vo %v24, [[REG1]], [[REG2]] ++; CHECK-NEXT: br %r14 ++ %cmp = fcmp ord <2 x double> %val1, %val2 ++ %ret = sext <2 x i1> %cmp to <2 x i64> ++ ret <2 x i64> %ret ++} ++ ++; Test uno. ++define <2 x i64> @f14(<2 x i64> %dummy, <2 x double> %val1, ++ <2 x double> %val2) { ++; CHECK-LABEL: f14: ++; CHECK-DAG: vfchdb [[REG1:%v[0-9]+]], %v28, %v26 ++; CHECK-DAG: vfchedb [[REG2:%v[0-9]+]], %v26, %v28 ++; CHECK: vno %v24, [[REG1]], [[REG2]] ++; CHECK-NEXT: br %r14 ++ %cmp = fcmp uno <2 x double> %val1, %val2 ++ %ret = sext <2 x i1> %cmp to <2 x i64> ++ ret <2 x i64> %ret ++} ++ ++; Test oeq selects. ++define <2 x double> @f15(<2 x double> %val1, <2 x double> %val2, ++ <2 x double> %val3, <2 x double> %val4) { ++; CHECK-LABEL: f15: ++; CHECK: vfcedb [[REG:%v[0-9]+]], %v24, %v26 ++; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] ++; CHECK-NEXT: br %r14 ++ %cmp = fcmp oeq <2 x double> %val1, %val2 ++ %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4 ++ ret <2 x double> %ret ++} ++ ++; Test one selects. ++define <2 x double> @f16(<2 x double> %val1, <2 x double> %val2, ++ <2 x double> %val3, <2 x double> %val4) { ++; CHECK-LABEL: f16: ++; CHECK-DAG: vfchdb [[REG1:%v[0-9]+]], %v26, %v24 ++; CHECK-DAG: vfchdb [[REG2:%v[0-9]+]], %v24, %v26 ++; CHECK: vo [[REG:%v[0-9]+]], [[REG1]], [[REG2]] ++; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] ++; CHECK-NEXT: br %r14 ++ %cmp = fcmp one <2 x double> %val1, %val2 ++ %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4 ++ ret <2 x double> %ret ++} ++ ++; Test ogt selects. ++define <2 x double> @f17(<2 x double> %val1, <2 x double> %val2, ++ <2 x double> %val3, <2 x double> %val4) { ++; CHECK-LABEL: f17: ++; CHECK: vfchdb [[REG:%v[0-9]+]], %v24, %v26 ++; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] ++; CHECK-NEXT: br %r14 ++ %cmp = fcmp ogt <2 x double> %val1, %val2 ++ %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4 ++ ret <2 x double> %ret ++} ++ ++; Test oge selects. ++define <2 x double> @f18(<2 x double> %val1, <2 x double> %val2, ++ <2 x double> %val3, <2 x double> %val4) { ++; CHECK-LABEL: f18: ++; CHECK: vfchedb [[REG:%v[0-9]+]], %v24, %v26 ++; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] ++; CHECK-NEXT: br %r14 ++ %cmp = fcmp oge <2 x double> %val1, %val2 ++ %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4 ++ ret <2 x double> %ret ++} ++ ++; Test ole selects. ++define <2 x double> @f19(<2 x double> %val1, <2 x double> %val2, ++ <2 x double> %val3, <2 x double> %val4) { ++; CHECK-LABEL: f19: ++; CHECK: vfchedb [[REG:%v[0-9]+]], %v26, %v24 ++; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] ++; CHECK-NEXT: br %r14 ++ %cmp = fcmp ole <2 x double> %val1, %val2 ++ %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4 ++ ret <2 x double> %ret ++} ++ ++; Test olt selects. ++define <2 x double> @f20(<2 x double> %val1, <2 x double> %val2, ++ <2 x double> %val3, <2 x double> %val4) { ++; CHECK-LABEL: f20: ++; CHECK: vfchdb [[REG:%v[0-9]+]], %v26, %v24 ++; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] ++; CHECK-NEXT: br %r14 ++ %cmp = fcmp olt <2 x double> %val1, %val2 ++ %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4 ++ ret <2 x double> %ret ++} ++ ++; Test ueq selects. ++define <2 x double> @f21(<2 x double> %val1, <2 x double> %val2, ++ <2 x double> %val3, <2 x double> %val4) { ++; CHECK-LABEL: f21: ++; CHECK-DAG: vfchdb [[REG1:%v[0-9]+]], %v26, %v24 ++; CHECK-DAG: vfchdb [[REG2:%v[0-9]+]], %v24, %v26 ++; CHECK: vo [[REG:%v[0-9]+]], [[REG1]], [[REG2]] ++; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] ++; CHECK-NEXT: br %r14 ++ %cmp = fcmp ueq <2 x double> %val1, %val2 ++ %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4 ++ ret <2 x double> %ret ++} ++ ++; Test une selects. ++define <2 x double> @f22(<2 x double> %val1, <2 x double> %val2, ++ <2 x double> %val3, <2 x double> %val4) { ++; CHECK-LABEL: f22: ++; CHECK: vfcedb [[REG:%v[0-9]+]], %v24, %v26 ++; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] ++; CHECK-NEXT: br %r14 ++ %cmp = fcmp une <2 x double> %val1, %val2 ++ %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4 ++ ret <2 x double> %ret ++} ++ ++; Test ugt selects. ++define <2 x double> @f23(<2 x double> %val1, <2 x double> %val2, ++ <2 x double> %val3, <2 x double> %val4) { ++; CHECK-LABEL: f23: ++; CHECK: vfchedb [[REG:%v[0-9]+]], %v26, %v24 ++; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] ++; CHECK-NEXT: br %r14 ++ %cmp = fcmp ugt <2 x double> %val1, %val2 ++ %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4 ++ ret <2 x double> %ret ++} ++ ++; Test uge selects. ++define <2 x double> @f24(<2 x double> %val1, <2 x double> %val2, ++ <2 x double> %val3, <2 x double> %val4) { ++; CHECK-LABEL: f24: ++; CHECK: vfchdb [[REG:%v[0-9]+]], %v26, %v24 ++; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] ++; CHECK-NEXT: br %r14 ++ %cmp = fcmp uge <2 x double> %val1, %val2 ++ %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4 ++ ret <2 x double> %ret ++} ++ ++; Test ule selects. ++define <2 x double> @f25(<2 x double> %val1, <2 x double> %val2, ++ <2 x double> %val3, <2 x double> %val4) { ++; CHECK-LABEL: f25: ++; CHECK: vfchdb [[REG:%v[0-9]+]], %v24, %v26 ++; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] ++; CHECK-NEXT: br %r14 ++ %cmp = fcmp ule <2 x double> %val1, %val2 ++ %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4 ++ ret <2 x double> %ret ++} ++ ++; Test ult selects. ++define <2 x double> @f26(<2 x double> %val1, <2 x double> %val2, ++ <2 x double> %val3, <2 x double> %val4) { ++; CHECK-LABEL: f26: ++; CHECK: vfchedb [[REG:%v[0-9]+]], %v24, %v26 ++; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] ++; CHECK-NEXT: br %r14 ++ %cmp = fcmp ult <2 x double> %val1, %val2 ++ %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4 ++ ret <2 x double> %ret ++} ++ ++; Test ord selects. ++define <2 x double> @f27(<2 x double> %val1, <2 x double> %val2, ++ <2 x double> %val3, <2 x double> %val4) { ++; CHECK-LABEL: f27: ++; CHECK-DAG: vfchdb [[REG1:%v[0-9]+]], %v26, %v24 ++; CHECK-DAG: vfchedb [[REG2:%v[0-9]+]], %v24, %v26 ++; CHECK: vo [[REG:%v[0-9]+]], [[REG1]], [[REG2]] ++; CHECK-NEXT: vsel %v24, %v28, %v30, [[REG]] ++; CHECK-NEXT: br %r14 ++ %cmp = fcmp ord <2 x double> %val1, %val2 ++ %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4 ++ ret <2 x double> %ret ++} ++ ++; Test uno selects. ++define <2 x double> @f28(<2 x double> %val1, <2 x double> %val2, ++ <2 x double> %val3, <2 x double> %val4) { ++; CHECK-LABEL: f28: ++; CHECK-DAG: vfchdb [[REG1:%v[0-9]+]], %v26, %v24 ++; CHECK-DAG: vfchedb [[REG2:%v[0-9]+]], %v24, %v26 ++; CHECK: vo [[REG:%v[0-9]+]], [[REG1]], [[REG2]] ++; CHECK-NEXT: vsel %v24, %v30, %v28, [[REG]] ++; CHECK-NEXT: br %r14 ++ %cmp = fcmp uno <2 x double> %val1, %val2 ++ %ret = select <2 x i1> %cmp, <2 x double> %val3, <2 x double> %val4 ++ ret <2 x double> %ret ++} ++ ++; Test an f64 comparison that uses vector registers. ++define i64 @f29(i64 %a, i64 %b, double %f1, <2 x double> %vec) { ++; CHECK-LABEL: f29: ++; CHECK: wfcdb %f0, %v24 ++; CHECK-NEXT: locgrne %r2, %r3 ++; CHECK: br %r14 ++ %f2 = extractelement <2 x double> %vec, i32 0 ++ %cond = fcmp oeq double %f1, %f2 ++ %res = select i1 %cond, i64 %a, i64 %b ++ ret i64 %res ++} +Index: llvm-36/test/CodeGen/SystemZ/vec-combine-01.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-combine-01.ll +@@ -0,0 +1,155 @@ ++; Test various target-specific DAG combiner patterns. ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ++ ++; Check that an extraction followed by a truncation is effectively treated ++; as a bitcast. ++define void @f1(<4 x i32> %v1, <4 x i32> %v2, i8 *%ptr1, i8 *%ptr2) { ++; CHECK-LABEL: f1: ++; CHECK: vaf [[REG:%v[0-9]+]], %v24, %v26 ++; CHECK-DAG: vsteb [[REG]], 0(%r2), 3 ++; CHECK-DAG: vsteb [[REG]], 0(%r3), 15 ++; CHECK: br %r14 ++ %add = add <4 x i32> %v1, %v2 ++ %elem1 = extractelement <4 x i32> %add, i32 0 ++ %elem2 = extractelement <4 x i32> %add, i32 3 ++ %trunc1 = trunc i32 %elem1 to i8 ++ %trunc2 = trunc i32 %elem2 to i8 ++ store i8 %trunc1, i8 *%ptr1 ++ store i8 %trunc2, i8 *%ptr2 ++ ret void ++} ++ ++; Test a case where a pack-type shuffle can be eliminated. ++define i16 @f2(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) { ++; CHECK-LABEL: f2: ++; CHECK-NOT: vpk ++; CHECK-DAG: vaf [[REG1:%v[0-9]+]], %v24, %v26 ++; CHECK-DAG: vaf [[REG2:%v[0-9]+]], %v26, %v28 ++; CHECK-DAG: vlgvh {{%r[0-5]}}, [[REG1]], 3 ++; CHECK-DAG: vlgvh {{%r[0-5]}}, [[REG2]], 7 ++; CHECK: br %r14 ++ %add1 = add <4 x i32> %v1, %v2 ++ %add2 = add <4 x i32> %v2, %v3 ++ %shuffle = shufflevector <4 x i32> %add1, <4 x i32> %add2, ++ <4 x i32> ++ %bitcast = bitcast <4 x i32> %shuffle to <8 x i16> ++ %elem1 = extractelement <8 x i16> %bitcast, i32 1 ++ %elem2 = extractelement <8 x i16> %bitcast, i32 7 ++ %res = add i16 %elem1, %elem2 ++ ret i16 %res ++} ++ ++; ...and again in a case where there's also a splat and a bitcast. ++define i16 @f3(<4 x i32> %v1, <4 x i32> %v2, <2 x i64> %v3) { ++; CHECK-LABEL: f3: ++; CHECK-NOT: vrepg ++; CHECK-NOT: vpk ++; CHECK-DAG: vaf [[REG:%v[0-9]+]], %v24, %v26 ++; CHECK-DAG: vlgvh {{%r[0-5]}}, [[REG]], 6 ++; CHECK-DAG: vlgvh {{%r[0-5]}}, %v28, 3 ++; CHECK: br %r14 ++ %add = add <4 x i32> %v1, %v2 ++ %splat = shufflevector <2 x i64> %v3, <2 x i64> undef, ++ <2 x i32> ++ %splatcast = bitcast <2 x i64> %splat to <4 x i32> ++ %shuffle = shufflevector <4 x i32> %add, <4 x i32> %splatcast, ++ <4 x i32> ++ %bitcast = bitcast <4 x i32> %shuffle to <8 x i16> ++ %elem1 = extractelement <8 x i16> %bitcast, i32 2 ++ %elem2 = extractelement <8 x i16> %bitcast, i32 7 ++ %res = add i16 %elem1, %elem2 ++ ret i16 %res ++} ++ ++; ...and again with a merge low instead of a pack. ++define i16 @f4(<4 x i32> %v1, <4 x i32> %v2, <2 x i64> %v3) { ++; CHECK-LABEL: f4: ++; CHECK-NOT: vrepg ++; CHECK-NOT: vmr ++; CHECK-DAG: vaf [[REG:%v[0-9]+]], %v24, %v26 ++; CHECK-DAG: vlgvh {{%r[0-5]}}, [[REG]], 6 ++; CHECK-DAG: vlgvh {{%r[0-5]}}, %v28, 3 ++; CHECK: br %r14 ++ %add = add <4 x i32> %v1, %v2 ++ %splat = shufflevector <2 x i64> %v3, <2 x i64> undef, ++ <2 x i32> ++ %splatcast = bitcast <2 x i64> %splat to <4 x i32> ++ %shuffle = shufflevector <4 x i32> %add, <4 x i32> %splatcast, ++ <4 x i32> ++ %bitcast = bitcast <4 x i32> %shuffle to <8 x i16> ++ %elem1 = extractelement <8 x i16> %bitcast, i32 4 ++ %elem2 = extractelement <8 x i16> %bitcast, i32 7 ++ %res = add i16 %elem1, %elem2 ++ ret i16 %res ++} ++ ++; ...and again with a merge high. ++define i16 @f5(<4 x i32> %v1, <4 x i32> %v2, <2 x i64> %v3) { ++; CHECK-LABEL: f5: ++; CHECK-NOT: vrepg ++; CHECK-NOT: vmr ++; CHECK-DAG: vaf [[REG:%v[0-9]+]], %v24, %v26 ++; CHECK-DAG: vlgvh {{%r[0-5]}}, [[REG]], 2 ++; CHECK-DAG: vlgvh {{%r[0-5]}}, %v28, 3 ++; CHECK: br %r14 ++ %add = add <4 x i32> %v1, %v2 ++ %splat = shufflevector <2 x i64> %v3, <2 x i64> undef, ++ <2 x i32> ++ %splatcast = bitcast <2 x i64> %splat to <4 x i32> ++ %shuffle = shufflevector <4 x i32> %add, <4 x i32> %splatcast, ++ <4 x i32> ++ %bitcast = bitcast <4 x i32> %shuffle to <8 x i16> ++ %elem1 = extractelement <8 x i16> %bitcast, i32 4 ++ %elem2 = extractelement <8 x i16> %bitcast, i32 7 ++ %res = add i16 %elem1, %elem2 ++ ret i16 %res ++} ++ ++; Test a case where an unpack high can be eliminated from the usual ++; load-extend sequence. ++define void @f6(<8 x i8> *%ptr1, i8 *%ptr2, i8 *%ptr3, i8 *%ptr4) { ++; CHECK-LABEL: f6: ++; CHECK: vlrepg [[REG:%v[0-9]+]], 0(%r2) ++; CHECK-NOT: vup ++; CHECK-DAG: vsteb [[REG]], 0(%r3), 1 ++; CHECK-DAG: vsteb [[REG]], 0(%r4), 2 ++; CHECK-DAG: vsteb [[REG]], 0(%r5), 7 ++; CHECK: br %r14 ++ %vec = load <8 x i8> *%ptr1 ++ %ext = sext <8 x i8> %vec to <8 x i16> ++ %elem1 = extractelement <8 x i16> %ext, i32 1 ++ %elem2 = extractelement <8 x i16> %ext, i32 2 ++ %elem3 = extractelement <8 x i16> %ext, i32 7 ++ %trunc1 = trunc i16 %elem1 to i8 ++ %trunc2 = trunc i16 %elem2 to i8 ++ %trunc3 = trunc i16 %elem3 to i8 ++ store i8 %trunc1, i8 *%ptr2 ++ store i8 %trunc2, i8 *%ptr3 ++ store i8 %trunc3, i8 *%ptr4 ++ ret void ++} ++ ++; ...and again with a bitcast inbetween. ++define void @f7(<4 x i8> *%ptr1, i8 *%ptr2, i8 *%ptr3, i8 *%ptr4) { ++; CHECK-LABEL: f7: ++; CHECK: vlrepf [[REG:%v[0-9]+]], 0(%r2) ++; CHECK-NOT: vup ++; CHECK-DAG: vsteb [[REG]], 0(%r3), 0 ++; CHECK-DAG: vsteb [[REG]], 0(%r4), 1 ++; CHECK-DAG: vsteb [[REG]], 0(%r5), 3 ++; CHECK: br %r14 ++ %vec = load <4 x i8> *%ptr1 ++ %ext = sext <4 x i8> %vec to <4 x i32> ++ %bitcast = bitcast <4 x i32> %ext to <8 x i16> ++ %elem1 = extractelement <8 x i16> %bitcast, i32 1 ++ %elem2 = extractelement <8 x i16> %bitcast, i32 3 ++ %elem3 = extractelement <8 x i16> %bitcast, i32 7 ++ %trunc1 = trunc i16 %elem1 to i8 ++ %trunc2 = trunc i16 %elem2 to i8 ++ %trunc3 = trunc i16 %elem3 to i8 ++ store i8 %trunc1, i8 *%ptr2 ++ store i8 %trunc2, i8 *%ptr3 ++ store i8 %trunc3, i8 *%ptr4 ++ ret void ++} +Index: llvm-36/test/CodeGen/SystemZ/vec-combine-02.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-combine-02.ll +@@ -0,0 +1,433 @@ ++; Test various representations of pack-like operations. ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ++ ++; One way of writing a <4 x i32> -> <8 x i16> pack. ++define <8 x i16> @f1(<4 x i32> %val0, <4 x i32> %val1) { ++; CHECK-LABEL: f1: ++; CHECK: vpkf %v24, %v24, %v26 ++; CHECK: br %r14 ++ %elem0 = extractelement <4 x i32> %val0, i32 0 ++ %elem1 = extractelement <4 x i32> %val0, i32 1 ++ %elem2 = extractelement <4 x i32> %val0, i32 2 ++ %elem3 = extractelement <4 x i32> %val0, i32 3 ++ %elem4 = extractelement <4 x i32> %val1, i32 0 ++ %elem5 = extractelement <4 x i32> %val1, i32 1 ++ %elem6 = extractelement <4 x i32> %val1, i32 2 ++ %elem7 = extractelement <4 x i32> %val1, i32 3 ++ %hboth0 = bitcast i32 %elem0 to <2 x i16> ++ %hboth1 = bitcast i32 %elem1 to <2 x i16> ++ %hboth2 = bitcast i32 %elem2 to <2 x i16> ++ %hboth3 = bitcast i32 %elem3 to <2 x i16> ++ %hboth4 = bitcast i32 %elem4 to <2 x i16> ++ %hboth5 = bitcast i32 %elem5 to <2 x i16> ++ %hboth6 = bitcast i32 %elem6 to <2 x i16> ++ %hboth7 = bitcast i32 %elem7 to <2 x i16> ++ %hlow0 = shufflevector <2 x i16> %hboth0, <2 x i16> %hboth1, ++ <2 x i32> ++ %hlow1 = shufflevector <2 x i16> %hboth2, <2 x i16> %hboth3, ++ <2 x i32> ++ %hlow2 = shufflevector <2 x i16> %hboth4, <2 x i16> %hboth5, ++ <2 x i32> ++ %hlow3 = shufflevector <2 x i16> %hboth6, <2 x i16> %hboth7, ++ <2 x i32> ++ %join0 = shufflevector <2 x i16> %hlow0, <2 x i16> %hlow1, ++ <4 x i32> ++ %join1 = shufflevector <2 x i16> %hlow2, <2 x i16> %hlow3, ++ <4 x i32> ++ %ret = shufflevector <4 x i16> %join0, <4 x i16> %join1, ++ <8 x i32> ++ ret <8 x i16> %ret ++} ++ ++; A different way of writing a <4 x i32> -> <8 x i16> pack. ++define <8 x i16> @f2(<4 x i32> %val0, <4 x i32> %val1) { ++; CHECK-LABEL: f2: ++; CHECK: vpkf %v24, %v24, %v26 ++; CHECK: br %r14 ++ %elem0 = extractelement <4 x i32> %val0, i32 0 ++ %elem1 = extractelement <4 x i32> %val0, i32 1 ++ %elem2 = extractelement <4 x i32> %val0, i32 2 ++ %elem3 = extractelement <4 x i32> %val0, i32 3 ++ %elem4 = extractelement <4 x i32> %val1, i32 0 ++ %elem5 = extractelement <4 x i32> %val1, i32 1 ++ %elem6 = extractelement <4 x i32> %val1, i32 2 ++ %elem7 = extractelement <4 x i32> %val1, i32 3 ++ %wvec0 = insertelement <4 x i32> undef, i32 %elem0, i32 0 ++ %wvec1 = insertelement <4 x i32> undef, i32 %elem1, i32 0 ++ %wvec2 = insertelement <4 x i32> undef, i32 %elem2, i32 0 ++ %wvec3 = insertelement <4 x i32> undef, i32 %elem3, i32 0 ++ %wvec4 = insertelement <4 x i32> undef, i32 %elem4, i32 0 ++ %wvec5 = insertelement <4 x i32> undef, i32 %elem5, i32 0 ++ %wvec6 = insertelement <4 x i32> undef, i32 %elem6, i32 0 ++ %wvec7 = insertelement <4 x i32> undef, i32 %elem7, i32 0 ++ %hvec0 = bitcast <4 x i32> %wvec0 to <8 x i16> ++ %hvec1 = bitcast <4 x i32> %wvec1 to <8 x i16> ++ %hvec2 = bitcast <4 x i32> %wvec2 to <8 x i16> ++ %hvec3 = bitcast <4 x i32> %wvec3 to <8 x i16> ++ %hvec4 = bitcast <4 x i32> %wvec4 to <8 x i16> ++ %hvec5 = bitcast <4 x i32> %wvec5 to <8 x i16> ++ %hvec6 = bitcast <4 x i32> %wvec6 to <8 x i16> ++ %hvec7 = bitcast <4 x i32> %wvec7 to <8 x i16> ++ %hlow0 = shufflevector <8 x i16> %hvec0, <8 x i16> %hvec1, ++ <8 x i32> ++ %hlow1 = shufflevector <8 x i16> %hvec2, <8 x i16> %hvec3, ++ <8 x i32> ++ %hlow2 = shufflevector <8 x i16> %hvec4, <8 x i16> %hvec5, ++ <8 x i32> ++ %hlow3 = shufflevector <8 x i16> %hvec6, <8 x i16> %hvec7, ++ <8 x i32> ++ %join0 = shufflevector <8 x i16> %hlow0, <8 x i16> %hlow1, ++ <8 x i32> ++ %join1 = shufflevector <8 x i16> %hlow2, <8 x i16> %hlow3, ++ <8 x i32> ++ %ret = shufflevector <8 x i16> %join0, <8 x i16> %join1, ++ <8 x i32> ++ ret <8 x i16> %ret ++} ++ ++; A direct pack operation. ++define <8 x i16> @f3(<4 x i32> %val0, <4 x i32> %val1) { ++; CHECK-LABEL: f3: ++; CHECK: vpkf %v24, %v24, %v26 ++; CHECK: br %r14 ++ %bitcast0 = bitcast <4 x i32> %val0 to <8 x i16> ++ %bitcast1 = bitcast <4 x i32> %val1 to <8 x i16> ++ %ret = shufflevector <8 x i16> %bitcast0, <8 x i16> %bitcast1, ++ <8 x i32> ++ ret <8 x i16> %ret ++} ++ ++; One way of writing a <4 x i32> -> <16 x i8> pack. It doesn't matter ++; whether the first pack is VPKF or VPKH since the even bytes of the ++; result are discarded. ++define <16 x i8> @f4(<4 x i32> %val0, <4 x i32> %val1, ++ <4 x i32> %val2, <4 x i32> %val3) { ++; CHECK-LABEL: f4: ++; CHECK-DAG: vpk{{[hf]}} [[REG1:%v[0-9]+]], %v24, %v26 ++; CHECK-DAG: vpk{{[hf]}} [[REG2:%v[0-9]+]], %v28, %v30 ++; CHECK: vpkh %v24, [[REG1]], [[REG2]] ++; CHECK: br %r14 ++ %bitcast0 = bitcast <4 x i32> %val0 to <8 x i16> ++ %bitcast1 = bitcast <4 x i32> %val1 to <8 x i16> ++ %bitcast2 = bitcast <4 x i32> %val2 to <8 x i16> ++ %bitcast3 = bitcast <4 x i32> %val3 to <8 x i16> ++ %join0 = shufflevector <8 x i16> %bitcast0, <8 x i16> %bitcast1, ++ <8 x i32> ++ %join1 = shufflevector <8 x i16> %bitcast2, <8 x i16> %bitcast3, ++ <8 x i32> ++ %bitcast4 = bitcast <8 x i16> %join0 to <16 x i8> ++ %bitcast5 = bitcast <8 x i16> %join1 to <16 x i8> ++ %ret = shufflevector <16 x i8> %bitcast4, <16 x i8> %bitcast5, ++ <16 x i32> ++ ret <16 x i8> %ret ++} ++ ++; Check the same operation, but with elements being extracted from the result. ++define void @f5(<4 x i32> %val0, <4 x i32> %val1, ++ <4 x i32> %val2, <4 x i32> %val3, ++ i8 *%base) { ++; CHECK-LABEL: f5: ++; CHECK-DAG: vsteb %v24, 0(%r2), 11 ++; CHECK-DAG: vsteb %v26, 1(%r2), 15 ++; CHECK-DAG: vsteb %v28, 2(%r2), 3 ++; CHECK-DAG: vsteb %v30, 3(%r2), 7 ++; CHECK: br %r14 ++ %bitcast0 = bitcast <4 x i32> %val0 to <8 x i16> ++ %bitcast1 = bitcast <4 x i32> %val1 to <8 x i16> ++ %bitcast2 = bitcast <4 x i32> %val2 to <8 x i16> ++ %bitcast3 = bitcast <4 x i32> %val3 to <8 x i16> ++ %join0 = shufflevector <8 x i16> %bitcast0, <8 x i16> %bitcast1, ++ <8 x i32> ++ %join1 = shufflevector <8 x i16> %bitcast2, <8 x i16> %bitcast3, ++ <8 x i32> ++ %bitcast4 = bitcast <8 x i16> %join0 to <16 x i8> ++ %bitcast5 = bitcast <8 x i16> %join1 to <16 x i8> ++ %vec = shufflevector <16 x i8> %bitcast4, <16 x i8> %bitcast5, ++ <16 x i32> ++ ++ %ptr0 = getelementptr i8 *%base, i64 0 ++ %ptr1 = getelementptr i8 *%base, i64 1 ++ %ptr2 = getelementptr i8 *%base, i64 2 ++ %ptr3 = getelementptr i8 *%base, i64 3 ++ ++ %byte0 = extractelement <16 x i8> %vec, i32 2 ++ %byte1 = extractelement <16 x i8> %vec, i32 7 ++ %byte2 = extractelement <16 x i8> %vec, i32 8 ++ %byte3 = extractelement <16 x i8> %vec, i32 13 ++ ++ store i8 %byte0, i8 *%ptr0 ++ store i8 %byte1, i8 *%ptr1 ++ store i8 %byte2, i8 *%ptr2 ++ store i8 %byte3, i8 *%ptr3 ++ ++ ret void ++} ++ ++; A different way of writing a <4 x i32> -> <16 x i8> pack. ++define <16 x i8> @f6(<4 x i32> %val0, <4 x i32> %val1, ++ <4 x i32> %val2, <4 x i32> %val3) { ++; CHECK-LABEL: f6: ++; CHECK-DAG: vpk{{[hf]}} [[REG1:%v[0-9]+]], %v24, %v26 ++; CHECK-DAG: vpk{{[hf]}} [[REG2:%v[0-9]+]], %v28, %v30 ++; CHECK: vpkh %v24, [[REG1]], [[REG2]] ++; CHECK: br %r14 ++ %elem0 = extractelement <4 x i32> %val0, i32 0 ++ %elem1 = extractelement <4 x i32> %val0, i32 1 ++ %elem2 = extractelement <4 x i32> %val0, i32 2 ++ %elem3 = extractelement <4 x i32> %val0, i32 3 ++ %elem4 = extractelement <4 x i32> %val1, i32 0 ++ %elem5 = extractelement <4 x i32> %val1, i32 1 ++ %elem6 = extractelement <4 x i32> %val1, i32 2 ++ %elem7 = extractelement <4 x i32> %val1, i32 3 ++ %elem8 = extractelement <4 x i32> %val2, i32 0 ++ %elem9 = extractelement <4 x i32> %val2, i32 1 ++ %elem10 = extractelement <4 x i32> %val2, i32 2 ++ %elem11 = extractelement <4 x i32> %val2, i32 3 ++ %elem12 = extractelement <4 x i32> %val3, i32 0 ++ %elem13 = extractelement <4 x i32> %val3, i32 1 ++ %elem14 = extractelement <4 x i32> %val3, i32 2 ++ %elem15 = extractelement <4 x i32> %val3, i32 3 ++ %bitcast0 = bitcast i32 %elem0 to <2 x i16> ++ %bitcast1 = bitcast i32 %elem1 to <2 x i16> ++ %bitcast2 = bitcast i32 %elem2 to <2 x i16> ++ %bitcast3 = bitcast i32 %elem3 to <2 x i16> ++ %bitcast4 = bitcast i32 %elem4 to <2 x i16> ++ %bitcast5 = bitcast i32 %elem5 to <2 x i16> ++ %bitcast6 = bitcast i32 %elem6 to <2 x i16> ++ %bitcast7 = bitcast i32 %elem7 to <2 x i16> ++ %bitcast8 = bitcast i32 %elem8 to <2 x i16> ++ %bitcast9 = bitcast i32 %elem9 to <2 x i16> ++ %bitcast10 = bitcast i32 %elem10 to <2 x i16> ++ %bitcast11 = bitcast i32 %elem11 to <2 x i16> ++ %bitcast12 = bitcast i32 %elem12 to <2 x i16> ++ %bitcast13 = bitcast i32 %elem13 to <2 x i16> ++ %bitcast14 = bitcast i32 %elem14 to <2 x i16> ++ %bitcast15 = bitcast i32 %elem15 to <2 x i16> ++ %low0 = shufflevector <2 x i16> %bitcast0, <2 x i16> %bitcast1, ++ <2 x i32> ++ %low1 = shufflevector <2 x i16> %bitcast2, <2 x i16> %bitcast3, ++ <2 x i32> ++ %low2 = shufflevector <2 x i16> %bitcast4, <2 x i16> %bitcast5, ++ <2 x i32> ++ %low3 = shufflevector <2 x i16> %bitcast6, <2 x i16> %bitcast7, ++ <2 x i32> ++ %low4 = shufflevector <2 x i16> %bitcast8, <2 x i16> %bitcast9, ++ <2 x i32> ++ %low5 = shufflevector <2 x i16> %bitcast10, <2 x i16> %bitcast11, ++ <2 x i32> ++ %low6 = shufflevector <2 x i16> %bitcast12, <2 x i16> %bitcast13, ++ <2 x i32> ++ %low7 = shufflevector <2 x i16> %bitcast14, <2 x i16> %bitcast15, ++ <2 x i32> ++ %bytes0 = bitcast <2 x i16> %low0 to <4 x i8> ++ %bytes1 = bitcast <2 x i16> %low1 to <4 x i8> ++ %bytes2 = bitcast <2 x i16> %low2 to <4 x i8> ++ %bytes3 = bitcast <2 x i16> %low3 to <4 x i8> ++ %bytes4 = bitcast <2 x i16> %low4 to <4 x i8> ++ %bytes5 = bitcast <2 x i16> %low5 to <4 x i8> ++ %bytes6 = bitcast <2 x i16> %low6 to <4 x i8> ++ %bytes7 = bitcast <2 x i16> %low7 to <4 x i8> ++ %blow0 = shufflevector <4 x i8> %bytes0, <4 x i8> %bytes1, ++ <4 x i32> ++ %blow1 = shufflevector <4 x i8> %bytes2, <4 x i8> %bytes3, ++ <4 x i32> ++ %blow2 = shufflevector <4 x i8> %bytes4, <4 x i8> %bytes5, ++ <4 x i32> ++ %blow3 = shufflevector <4 x i8> %bytes6, <4 x i8> %bytes7, ++ <4 x i32> ++ %join0 = shufflevector <4 x i8> %blow0, <4 x i8> %blow1, ++ <8 x i32> ++ %join1 = shufflevector <4 x i8> %blow2, <4 x i8> %blow3, ++ <8 x i32> ++ %ret = shufflevector <8 x i8> %join0, <8 x i8> %join1, ++ <16 x i32> ++ ret <16 x i8> %ret ++} ++ ++; One way of writing a <2 x i64> -> <16 x i8> pack. ++define <16 x i8> @f7(<2 x i64> %val0, <2 x i64> %val1, ++ <2 x i64> %val2, <2 x i64> %val3, ++ <2 x i64> %val4, <2 x i64> %val5, ++ <2 x i64> %val6, <2 x i64> %val7) { ++; CHECK-LABEL: f7: ++; CHECK-DAG: vpk{{[hfg]}} [[REG1:%v[0-9]+]], %v24, %v26 ++; CHECK-DAG: vpk{{[hfg]}} [[REG2:%v[0-9]+]], %v28, %v30 ++; CHECK-DAG: vpk{{[hfg]}} [[REG3:%v[0-9]+]], %v25, %v27 ++; CHECK-DAG: vpk{{[hfg]}} [[REG4:%v[0-9]+]], %v29, %v31 ++; CHECK-DAG: vpk{{[hf]}} [[REG5:%v[0-9]+]], [[REG1]], [[REG2]] ++; CHECK-DAG: vpk{{[hf]}} [[REG6:%v[0-9]+]], [[REG3]], [[REG4]] ++; CHECK: vpkh %v24, [[REG5]], [[REG6]] ++; CHECK: br %r14 ++ %elem0 = extractelement <2 x i64> %val0, i32 0 ++ %elem1 = extractelement <2 x i64> %val0, i32 1 ++ %elem2 = extractelement <2 x i64> %val1, i32 0 ++ %elem3 = extractelement <2 x i64> %val1, i32 1 ++ %elem4 = extractelement <2 x i64> %val2, i32 0 ++ %elem5 = extractelement <2 x i64> %val2, i32 1 ++ %elem6 = extractelement <2 x i64> %val3, i32 0 ++ %elem7 = extractelement <2 x i64> %val3, i32 1 ++ %elem8 = extractelement <2 x i64> %val4, i32 0 ++ %elem9 = extractelement <2 x i64> %val4, i32 1 ++ %elem10 = extractelement <2 x i64> %val5, i32 0 ++ %elem11 = extractelement <2 x i64> %val5, i32 1 ++ %elem12 = extractelement <2 x i64> %val6, i32 0 ++ %elem13 = extractelement <2 x i64> %val6, i32 1 ++ %elem14 = extractelement <2 x i64> %val7, i32 0 ++ %elem15 = extractelement <2 x i64> %val7, i32 1 ++ %bitcast0 = bitcast i64 %elem0 to <2 x i32> ++ %bitcast1 = bitcast i64 %elem1 to <2 x i32> ++ %bitcast2 = bitcast i64 %elem2 to <2 x i32> ++ %bitcast3 = bitcast i64 %elem3 to <2 x i32> ++ %bitcast4 = bitcast i64 %elem4 to <2 x i32> ++ %bitcast5 = bitcast i64 %elem5 to <2 x i32> ++ %bitcast6 = bitcast i64 %elem6 to <2 x i32> ++ %bitcast7 = bitcast i64 %elem7 to <2 x i32> ++ %bitcast8 = bitcast i64 %elem8 to <2 x i32> ++ %bitcast9 = bitcast i64 %elem9 to <2 x i32> ++ %bitcast10 = bitcast i64 %elem10 to <2 x i32> ++ %bitcast11 = bitcast i64 %elem11 to <2 x i32> ++ %bitcast12 = bitcast i64 %elem12 to <2 x i32> ++ %bitcast13 = bitcast i64 %elem13 to <2 x i32> ++ %bitcast14 = bitcast i64 %elem14 to <2 x i32> ++ %bitcast15 = bitcast i64 %elem15 to <2 x i32> ++ %low0 = shufflevector <2 x i32> %bitcast0, <2 x i32> %bitcast1, ++ <2 x i32> ++ %low1 = shufflevector <2 x i32> %bitcast2, <2 x i32> %bitcast3, ++ <2 x i32> ++ %low2 = shufflevector <2 x i32> %bitcast4, <2 x i32> %bitcast5, ++ <2 x i32> ++ %low3 = shufflevector <2 x i32> %bitcast6, <2 x i32> %bitcast7, ++ <2 x i32> ++ %low4 = shufflevector <2 x i32> %bitcast8, <2 x i32> %bitcast9, ++ <2 x i32> ++ %low5 = shufflevector <2 x i32> %bitcast10, <2 x i32> %bitcast11, ++ <2 x i32> ++ %low6 = shufflevector <2 x i32> %bitcast12, <2 x i32> %bitcast13, ++ <2 x i32> ++ %low7 = shufflevector <2 x i32> %bitcast14, <2 x i32> %bitcast15, ++ <2 x i32> ++ %half0 = bitcast <2 x i32> %low0 to <4 x i16> ++ %half1 = bitcast <2 x i32> %low1 to <4 x i16> ++ %half2 = bitcast <2 x i32> %low2 to <4 x i16> ++ %half3 = bitcast <2 x i32> %low3 to <4 x i16> ++ %half4 = bitcast <2 x i32> %low4 to <4 x i16> ++ %half5 = bitcast <2 x i32> %low5 to <4 x i16> ++ %half6 = bitcast <2 x i32> %low6 to <4 x i16> ++ %half7 = bitcast <2 x i32> %low7 to <4 x i16> ++ %hlow0 = shufflevector <4 x i16> %half0, <4 x i16> %half1, ++ <4 x i32> ++ %hlow1 = shufflevector <4 x i16> %half2, <4 x i16> %half3, ++ <4 x i32> ++ %hlow2 = shufflevector <4 x i16> %half4, <4 x i16> %half5, ++ <4 x i32> ++ %hlow3 = shufflevector <4 x i16> %half6, <4 x i16> %half7, ++ <4 x i32> ++ %bytes0 = bitcast <4 x i16> %hlow0 to <8 x i8> ++ %bytes1 = bitcast <4 x i16> %hlow1 to <8 x i8> ++ %bytes2 = bitcast <4 x i16> %hlow2 to <8 x i8> ++ %bytes3 = bitcast <4 x i16> %hlow3 to <8 x i8> ++ %join0 = shufflevector <8 x i8> %bytes0, <8 x i8> %bytes1, ++ <8 x i32> ++ %join1 = shufflevector <8 x i8> %bytes2, <8 x i8> %bytes3, ++ <8 x i32> ++ %ret = shufflevector <8 x i8> %join0, <8 x i8> %join1, ++ <16 x i32> ++ ret <16 x i8> %ret ++} ++ ++; Test a <2 x i64> -> <4 x f32> pack in which only individual elements are ++; needed. ++define float @f8(i64 %scalar0, i64 %scalar1, i64 %scalar2, i64 %scalar3) { ++; CHECK-LABEL: f8: ++; CHECK-NOT: vperm ++; CHECK-NOT: vpk ++; CHECK-NOT: vmrh ++; CHECK: aebr {{%f[0-7]}}, ++; CHECK: aebr {{%f[0-7]}}, ++; CHECK: meebr %f0, ++; CHECK: br %r14 ++ %vec0 = insertelement <2 x i64> undef, i64 %scalar0, i32 0 ++ %vec1 = insertelement <2 x i64> undef, i64 %scalar1, i32 0 ++ %vec2 = insertelement <2 x i64> undef, i64 %scalar2, i32 0 ++ %vec3 = insertelement <2 x i64> undef, i64 %scalar3, i32 0 ++ %join0 = shufflevector <2 x i64> %vec0, <2 x i64> %vec1, ++ <2 x i32> ++ %join1 = shufflevector <2 x i64> %vec2, <2 x i64> %vec3, ++ <2 x i32> ++ %bitcast0 = bitcast <2 x i64> %join0 to <4 x float> ++ %bitcast1 = bitcast <2 x i64> %join1 to <4 x float> ++ %pack = shufflevector <4 x float> %bitcast0, <4 x float> %bitcast1, ++ <4 x i32> ++ %elt0 = extractelement <4 x float> %pack, i32 0 ++ %elt1 = extractelement <4 x float> %pack, i32 1 ++ %elt2 = extractelement <4 x float> %pack, i32 2 ++ %elt3 = extractelement <4 x float> %pack, i32 3 ++ %add0 = fadd float %elt0, %elt2 ++ %add1 = fadd float %elt1, %elt3 ++ %ret = fmul float %add0, %add1 ++ ret float %ret ++} ++ ++; Test a <2 x f64> -> <4 x i32> pack in which only individual elements are ++; needed. ++define i32 @f9(double %scalar0, double %scalar1, double %scalar2, ++ double %scalar3) { ++; CHECK-LABEL: f9: ++; CHECK-NOT: vperm ++; CHECK-NOT: vpk ++; CHECK-NOT: vmrh ++; CHECK: ar {{%r[0-5]}}, ++; CHECK: ar {{%r[0-5]}}, ++; CHECK: or %r2, ++; CHECK: br %r14 ++ %vec0 = insertelement <2 x double> undef, double %scalar0, i32 0 ++ %vec1 = insertelement <2 x double> undef, double %scalar1, i32 0 ++ %vec2 = insertelement <2 x double> undef, double %scalar2, i32 0 ++ %vec3 = insertelement <2 x double> undef, double %scalar3, i32 0 ++ %join0 = shufflevector <2 x double> %vec0, <2 x double> %vec1, ++ <2 x i32> ++ %join1 = shufflevector <2 x double> %vec2, <2 x double> %vec3, ++ <2 x i32> ++ %bitcast0 = bitcast <2 x double> %join0 to <4 x i32> ++ %bitcast1 = bitcast <2 x double> %join1 to <4 x i32> ++ %pack = shufflevector <4 x i32> %bitcast0, <4 x i32> %bitcast1, ++ <4 x i32> ++ %elt0 = extractelement <4 x i32> %pack, i32 0 ++ %elt1 = extractelement <4 x i32> %pack, i32 1 ++ %elt2 = extractelement <4 x i32> %pack, i32 2 ++ %elt3 = extractelement <4 x i32> %pack, i32 3 ++ %add0 = add i32 %elt0, %elt2 ++ %add1 = add i32 %elt1, %elt3 ++ %ret = or i32 %add0, %add1 ++ ret i32 %ret ++} +Index: llvm-36/test/CodeGen/SystemZ/vec-const-01.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-const-01.ll +@@ -0,0 +1,103 @@ ++; Test vector byte masks, v16i8 version. ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ++ ++; Test an all-zeros vector. ++define <16 x i8> @f1() { ++; CHECK-LABEL: f1: ++; CHECK: vgbm %v24, 0 ++; CHECK: br %r14 ++ ret <16 x i8> zeroinitializer ++} ++ ++; Test an all-ones vector. ++define <16 x i8> @f2() { ++; CHECK-LABEL: f2: ++; CHECK: vgbm %v24, 65535 ++; CHECK: br %r14 ++ ret <16 x i8> ++} ++ ++; Test a mixed vector (mask 0x8c75). ++define <16 x i8> @f3() { ++; CHECK-LABEL: f3: ++; CHECK: vgbm %v24, 35957 ++; CHECK: br %r14 ++ ret <16 x i8> ++} ++ ++; Test that undefs are treated as zero. ++define <16 x i8> @f4() { ++; CHECK-LABEL: f4: ++; CHECK: vgbm %v24, 35957 ++; CHECK: br %r14 ++ ret <16 x i8> ++} ++ ++; Test that we don't use VGBM if one of the bytes is not 0 or 0xff. ++define <16 x i8> @f5() { ++; CHECK-LABEL: f5: ++; CHECK-NOT: vgbm ++; CHECK: br %r14 ++ ret <16 x i8> ++} ++ ++; Test an all-zeros v2i8 that gets promoted to v16i8. ++define <2 x i8> @f6() { ++; CHECK-LABEL: f6: ++; CHECK: vgbm %v24, 0 ++; CHECK: br %r14 ++ ret <2 x i8> zeroinitializer ++} ++ ++; Test a mixed v2i8 that gets promoted to v16i8 (mask 0x8000). ++define <2 x i8> @f7() { ++; CHECK-LABEL: f7: ++; CHECK: vgbm %v24, 32768 ++; CHECK: br %r14 ++ ret <2 x i8> ++} ++ ++; Test an all-zeros v4i8 that gets promoted to v16i8. ++define <4 x i8> @f8() { ++; CHECK-LABEL: f8: ++; CHECK: vgbm %v24, 0 ++; CHECK: br %r14 ++ ret <4 x i8> zeroinitializer ++} ++ ++; Test a mixed v4i8 that gets promoted to v16i8 (mask 0x9000). ++define <4 x i8> @f9() { ++; CHECK-LABEL: f9: ++; CHECK: vgbm %v24, 36864 ++; CHECK: br %r14 ++ ret <4 x i8> ++} ++ ++; Test an all-zeros v8i8 that gets promoted to v16i8. ++define <8 x i8> @f10() { ++; CHECK-LABEL: f10: ++; CHECK: vgbm %v24, 0 ++; CHECK: br %r14 ++ ret <8 x i8> zeroinitializer ++} ++ ++; Test a mixed v8i8 that gets promoted to v16i8 (mask 0xE500). ++define <8 x i8> @f11() { ++; CHECK-LABEL: f11: ++; CHECK: vgbm %v24, 58624 ++; CHECK: br %r14 ++ ret <8 x i8> ++} +Index: llvm-36/test/CodeGen/SystemZ/vec-const-02.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-const-02.ll +@@ -0,0 +1,79 @@ ++; Test vector byte masks, v8i16 version. ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ++ ++; Test an all-zeros vector. ++define <8 x i16> @f1() { ++; CHECK-LABEL: f1: ++; CHECK: vgbm %v24, 0 ++; CHECK: br %r14 ++ ret <8 x i16> zeroinitializer ++} ++ ++; Test an all-ones vector. ++define <8 x i16> @f2() { ++; CHECK-LABEL: f2: ++; CHECK: vgbm %v24, 65535 ++; CHECK: br %r14 ++ ret <8 x i16> ++} ++ ++; Test a mixed vector (mask 0x8c76). ++define <8 x i16> @f3() { ++; CHECK-LABEL: f3: ++; CHECK: vgbm %v24, 35958 ++; CHECK: br %r14 ++ ret <8 x i16> ++} ++ ++; Test that undefs are treated as zero. ++define <8 x i16> @f4() { ++; CHECK-LABEL: f4: ++; CHECK: vgbm %v24, 35958 ++; CHECK: br %r14 ++ ret <8 x i16> ++} ++ ++; Test that we don't use VGBM if one of the bytes is not 0 or 0xff. ++define <8 x i16> @f5() { ++; CHECK-LABEL: f5: ++; CHECK-NOT: vgbm ++; CHECK: br %r14 ++ ret <8 x i16> ++} ++ ++; Test an all-zeros v2i16 that gets promoted to v8i16. ++define <2 x i16> @f6() { ++; CHECK-LABEL: f6: ++; CHECK: vgbm %v24, 0 ++; CHECK: br %r14 ++ ret <2 x i16> zeroinitializer ++} ++ ++; Test a mixed v2i16 that gets promoted to v8i16 (mask 0xc000). ++define <2 x i16> @f7() { ++; CHECK-LABEL: f7: ++; CHECK: vgbm %v24, 49152 ++; CHECK: br %r14 ++ ret <2 x i16> ++} ++ ++; Test an all-zeros v4i16 that gets promoted to v8i16. ++define <4 x i16> @f8() { ++; CHECK-LABEL: f8: ++; CHECK: vgbm %v24, 0 ++; CHECK: br %r14 ++ ret <4 x i16> zeroinitializer ++} ++ ++; Test a mixed v4i16 that gets promoted to v8i16 (mask 0x7200). ++define <4 x i16> @f9() { ++; CHECK-LABEL: f9: ++; CHECK: vgbm %v24, 29184 ++; CHECK: br %r14 ++ ret <4 x i16> ++} +Index: llvm-36/test/CodeGen/SystemZ/vec-const-03.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-const-03.ll +@@ -0,0 +1,59 @@ ++; Test vector byte masks, v4i32 version. ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ++ ++; Test an all-zeros vector. ++define <4 x i32> @f1() { ++; CHECK-LABEL: f1: ++; CHECK: vgbm %v24, 0 ++; CHECK: br %r14 ++ ret <4 x i32> zeroinitializer ++} ++ ++; Test an all-ones vector. ++define <4 x i32> @f2() { ++; CHECK-LABEL: f2: ++; CHECK: vgbm %v24, 65535 ++; CHECK: br %r14 ++ ret <4 x i32> ++} ++ ++; Test a mixed vector (mask 0x8c76). ++define <4 x i32> @f3() { ++; CHECK-LABEL: f3: ++; CHECK: vgbm %v24, 35958 ++; CHECK: br %r14 ++ ret <4 x i32> ++} ++ ++; Test that undefs are treated as zero (mask 0x8076). ++define <4 x i32> @f4() { ++; CHECK-LABEL: f4: ++; CHECK: vgbm %v24, 32886 ++; CHECK: br %r14 ++ ret <4 x i32> ++} ++ ++; Test that we don't use VGBM if one of the bytes is not 0 or 0xff. ++define <4 x i32> @f5() { ++; CHECK-LABEL: f5: ++; CHECK-NOT: vgbm ++; CHECK: br %r14 ++ ret <4 x i32> ++} ++ ++; Test an all-zeros v2i32 that gets promoted to v4i32. ++define <2 x i32> @f6() { ++; CHECK-LABEL: f6: ++; CHECK: vgbm %v24, 0 ++; CHECK: br %r14 ++ ret <2 x i32> zeroinitializer ++} ++ ++; Test a mixed v2i32 that gets promoted to v4i32 (mask 0xae00). ++define <2 x i32> @f7() { ++; CHECK-LABEL: f7: ++; CHECK: vgbm %v24, 44544 ++; CHECK: br %r14 ++ ret <2 x i32> ++} +Index: llvm-36/test/CodeGen/SystemZ/vec-const-04.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-const-04.ll +@@ -0,0 +1,43 @@ ++; Test vector byte masks, v2i64 version. ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ++ ++; Test an all-zeros vector. ++define <2 x i64> @f1() { ++; CHECK-LABEL: f1: ++; CHECK: vgbm %v24, 0 ++; CHECK: br %r14 ++ ret <2 x i64> zeroinitializer ++} ++ ++; Test an all-ones vector. ++define <2 x i64> @f2() { ++; CHECK-LABEL: f2: ++; CHECK: vgbm %v24, 65535 ++; CHECK: br %r14 ++ ret <2 x i64> ++} ++ ++; Test a mixed vector (mask 0x8c76). ++define <2 x i64> @f3() { ++; CHECK-LABEL: f3: ++; CHECK: vgbm %v24, 35958 ++; CHECK: br %r14 ++ ret <2 x i64> ++} ++ ++; Test that undefs are treated as zero (mask 0x8c00). ++define <2 x i64> @f4() { ++; CHECK-LABEL: f4: ++; CHECK: vgbm %v24, 35840 ++; CHECK: br %r14 ++ ret <2 x i64> ++} ++ ++; Test that we don't use VGBM if one of the bytes is not 0 or 0xff. ++define <2 x i64> @f5() { ++; CHECK-LABEL: f5: ++; CHECK-NOT: vgbm ++; CHECK: br %r14 ++ ret <2 x i64> ++} +Index: llvm-36/test/CodeGen/SystemZ/vec-const-05.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-const-05.ll +@@ -0,0 +1,63 @@ ++; Test vector byte masks, v4f32 version. ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ++ ++; Test an all-zeros vector. ++define <4 x float> @f1() { ++; CHECK-LABEL: f1: ++; CHECK: vgbm %v24, 0 ++; CHECK: br %r14 ++ ret <4 x float> zeroinitializer ++} ++ ++; Test an all-ones vector. ++define <4 x float> @f2() { ++; CHECK-LABEL: f2: ++; CHECK: vgbm %v24, 65535 ++; CHECK: br %r14 ++ ret <4 x float> ++} ++ ++; Test a mixed vector (mask 0xc731). ++define <4 x float> @f3() { ++; CHECK-LABEL: f3: ++; CHECK: vgbm %v24, 50993 ++; CHECK: br %r14 ++ ret <4 x float> ++} ++ ++; Test that undefs are treated as zero (mask 0xc031). ++define <4 x float> @f4() { ++; CHECK-LABEL: f4: ++; CHECK: vgbm %v24, 49201 ++; CHECK: br %r14 ++ ret <4 x float> ++} ++ ++; Test that we don't use VGBM if one of the bytes is not 0 or 0xff. ++define <4 x float> @f5() { ++; CHECK-LABEL: f5: ++; CHECK-NOT: vgbm ++; CHECK: br %r14 ++ ret <4 x float> ++} ++ ++; Test an all-zeros v2f32 that gets promoted to v4f32. ++define <2 x float> @f6() { ++; CHECK-LABEL: f6: ++; CHECK: vgbm %v24, 0 ++; CHECK: br %r14 ++ ret <2 x float> zeroinitializer ++} ++ ++; Test a mixed v2f32 that gets promoted to v4f32 (mask 0xc700). ++define <2 x float> @f7() { ++; CHECK-LABEL: f7: ++; CHECK: vgbm %v24, 50944 ++; CHECK: br %r14 ++ ret <2 x float> ++} +Index: llvm-36/test/CodeGen/SystemZ/vec-const-06.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-const-06.ll +@@ -0,0 +1,43 @@ ++; Test vector byte masks, v2f64 version. ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ++ ++; Test an all-zeros vector. ++define <2 x double> @f1() { ++; CHECK-LABEL: f1: ++; CHECK: vgbm %v24, 0 ++; CHECK: br %r14 ++ ret <2 x double> zeroinitializer ++} ++ ++; Test an all-ones vector. ++define <2 x double> @f2() { ++; CHECK-LABEL: f2: ++; CHECK: vgbm %v24, 65535 ++; CHECK: br %r14 ++ ret <2 x double> ++} ++ ++; Test a mixed vector (mask 0x8c76). ++define <2 x double> @f3() { ++; CHECK-LABEL: f3: ++; CHECK: vgbm %v24, 35958 ++; CHECK: br %r14 ++ ret <2 x double> ++} ++ ++; Test that undefs are treated as zero (mask 0x8c00). ++define <2 x double> @f4() { ++; CHECK-LABEL: f4: ++; CHECK: vgbm %v24, 35840 ++; CHECK: br %r14 ++ ret <2 x double> ++} ++ ++; Test that we don't use VGBM if one of the bytes is not 0 or 0xff. ++define <2 x double> @f5() { ++; CHECK-LABEL: f5: ++; CHECK-NOT: vgbm ++; CHECK: br %r14 ++ ret <2 x double> ++} +Index: llvm-36/test/CodeGen/SystemZ/vec-const-07.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-const-07.ll +@@ -0,0 +1,229 @@ ++; Test vector replicates, v16i8 version. ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ++ ++; Test a byte-granularity replicate with the lowest useful value. ++define <16 x i8> @f1() { ++; CHECK-LABEL: f1: ++; CHECK: vrepib %v24, 1 ++; CHECK: br %r14 ++ ret <16 x i8> ++} ++ ++; Test a byte-granularity replicate with an arbitrary value. ++define <16 x i8> @f2() { ++; CHECK-LABEL: f2: ++; CHECK: vrepib %v24, -55 ++; CHECK: br %r14 ++ ret <16 x i8> ++} ++ ++; Test a byte-granularity replicate with the highest useful value. ++define <16 x i8> @f3() { ++; CHECK-LABEL: f3: ++; CHECK: vrepib %v24, -2 ++; CHECK: br %r14 ++ ret <16 x i8> ++} ++ ++; Test a halfword-granularity replicate with the lowest useful value. ++define <16 x i8> @f4() { ++; CHECK-LABEL: f4: ++; CHECK: vrepih %v24, 1 ++; CHECK: br %r14 ++ ret <16 x i8> ++} ++ ++; Test a halfword-granularity replicate with an arbitrary value. ++define <16 x i8> @f5() { ++; CHECK-LABEL: f5: ++; CHECK: vrepih %v24, 25650 ++; CHECK: br %r14 ++ ret <16 x i8> ++} ++ ++; Test a halfword-granularity replicate with the highest useful value. ++define <16 x i8> @f6() { ++; CHECK-LABEL: f6: ++; CHECK: vrepih %v24, -2 ++; CHECK: br %r14 ++ ret <16 x i8> ++} ++ ++; Test a word-granularity replicate with the lowest useful positive value. ++define <16 x i8> @f7() { ++; CHECK-LABEL: f7: ++; CHECK: vrepif %v24, 1 ++; CHECK: br %r14 ++ ret <16 x i8> ++} ++ ++; Test a word-granularity replicate with the highest in-range value. ++define <16 x i8> @f8() { ++; CHECK-LABEL: f8: ++; CHECK: vrepif %v24, 32767 ++; CHECK: br %r14 ++ ret <16 x i8> ++} ++ ++; Test a word-granularity replicate with the next highest value. ++; This cannot use VREPIF. ++define <16 x i8> @f9() { ++; CHECK-LABEL: f9: ++; CHECK-NOT: vrepif ++; CHECK: br %r14 ++ ret <16 x i8> ++} ++ ++; Test a word-granularity replicate with the lowest in-range value. ++define <16 x i8> @f10() { ++; CHECK-LABEL: f10: ++; CHECK: vrepif %v24, -32768 ++; CHECK: br %r14 ++ ret <16 x i8> ++} ++ ++; Test a word-granularity replicate with the next lowest value. ++; This cannot use VREPIF. ++define <16 x i8> @f11() { ++; CHECK-LABEL: f11: ++; CHECK-NOT: vrepif ++; CHECK: br %r14 ++ ret <16 x i8> ++} ++ ++; Test a word-granularity replicate with the highest useful negative value. ++define <16 x i8> @f12() { ++; CHECK-LABEL: f12: ++; CHECK: vrepif %v24, -2 ++; CHECK: br %r14 ++ ret <16 x i8> ++} ++ ++; Test a doubleword-granularity replicate with the lowest useful positive ++; value. ++define <16 x i8> @f13() { ++; CHECK-LABEL: f13: ++; CHECK: vrepig %v24, 1 ++; CHECK: br %r14 ++ ret <16 x i8> ++} ++ ++; Test a doubleword-granularity replicate with the highest in-range value. ++define <16 x i8> @f14() { ++; CHECK-LABEL: f14: ++; CHECK: vrepig %v24, 32767 ++; CHECK: br %r14 ++ ret <16 x i8> ++} ++ ++; Test a doubleword-granularity replicate with the next highest value. ++; This cannot use VREPIG. ++define <16 x i8> @f15() { ++; CHECK-LABEL: f15: ++; CHECK-NOT: vrepig ++; CHECK: br %r14 ++ ret <16 x i8> ++} ++ ++; Test a doubleword-granularity replicate with the lowest in-range value. ++define <16 x i8> @f16() { ++; CHECK-LABEL: f16: ++; CHECK: vrepig %v24, -32768 ++; CHECK: br %r14 ++ ret <16 x i8> ++} ++ ++; Test a doubleword-granularity replicate with the next lowest value. ++; This cannot use VREPIG. ++define <16 x i8> @f17() { ++; CHECK-LABEL: f17: ++; CHECK-NOT: vrepig ++; CHECK: br %r14 ++ ret <16 x i8> ++} ++ ++; Test a doubleword-granularity replicate with the highest useful negative ++; value. ++define <16 x i8> @f18() { ++; CHECK-LABEL: f18: ++; CHECK: vrepig %v24, -2 ++; CHECK: br %r14 ++ ret <16 x i8> ++} ++ ++; Repeat f14 with undefs optimistically treated as 0. ++define <16 x i8> @f19() { ++; CHECK-LABEL: f19: ++; CHECK: vrepig %v24, 32767 ++; CHECK: br %r14 ++ ret <16 x i8> ++} ++ ++; Repeat f18 with undefs optimistically treated as -1. ++define <16 x i8> @f20() { ++; CHECK-LABEL: f20: ++; CHECK: vrepig %v24, -2 ++; CHECK: br %r14 ++ ret <16 x i8> ++} +Index: llvm-36/test/CodeGen/SystemZ/vec-const-08.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-const-08.ll +@@ -0,0 +1,189 @@ ++; Test vector replicates, v8i16 version. ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ++ ++; Test a byte-granularity replicate with the lowest useful value. ++define <8 x i16> @f1() { ++; CHECK-LABEL: f1: ++; CHECK: vrepib %v24, 1 ++; CHECK: br %r14 ++ ret <8 x i16> ++} ++ ++; Test a byte-granularity replicate with an arbitrary value. ++define <8 x i16> @f2() { ++; CHECK-LABEL: f2: ++; CHECK: vrepib %v24, -55 ++; CHECK: br %r14 ++ ret <8 x i16> ++} ++ ++; Test a byte-granularity replicate with the highest useful value. ++define <8 x i16> @f3() { ++; CHECK-LABEL: f3: ++; CHECK: vrepib %v24, -2 ++; CHECK: br %r14 ++ ret <8 x i16> ++} ++ ++; Test a halfword-granularity replicate with the lowest useful value. ++define <8 x i16> @f4() { ++; CHECK-LABEL: f4: ++; CHECK: vrepih %v24, 1 ++; CHECK: br %r14 ++ ret <8 x i16> ++} ++ ++; Test a halfword-granularity replicate with an arbitrary value. ++define <8 x i16> @f5() { ++; CHECK-LABEL: f5: ++; CHECK: vrepih %v24, 25650 ++; CHECK: br %r14 ++ ret <8 x i16> ++} ++ ++; Test a halfword-granularity replicate with the highest useful value. ++define <8 x i16> @f6() { ++; CHECK-LABEL: f6: ++; CHECK: vrepih %v24, -2 ++; CHECK: br %r14 ++ ret <8 x i16> ++} ++ ++; Test a word-granularity replicate with the lowest useful positive value. ++define <8 x i16> @f7() { ++; CHECK-LABEL: f7: ++; CHECK: vrepif %v24, 1 ++; CHECK: br %r14 ++ ret <8 x i16> ++} ++ ++; Test a word-granularity replicate with the highest in-range value. ++define <8 x i16> @f8() { ++; CHECK-LABEL: f8: ++; CHECK: vrepif %v24, 32767 ++; CHECK: br %r14 ++ ret <8 x i16> ++} ++ ++; Test a word-granularity replicate with the next highest value. ++; This cannot use VREPIF. ++define <8 x i16> @f9() { ++; CHECK-LABEL: f9: ++; CHECK-NOT: vrepif ++; CHECK: br %r14 ++ ret <8 x i16> ++} ++ ++; Test a word-granularity replicate with the lowest in-range value. ++define <8 x i16> @f10() { ++; CHECK-LABEL: f10: ++; CHECK: vrepif %v24, -32768 ++; CHECK: br %r14 ++ ret <8 x i16> ++} ++ ++; Test a word-granularity replicate with the next lowest value. ++; This cannot use VREPIF. ++define <8 x i16> @f11() { ++; CHECK-LABEL: f11: ++; CHECK-NOT: vrepif ++; CHECK: br %r14 ++ ret <8 x i16> ++} ++ ++; Test a word-granularity replicate with the highest useful negative value. ++define <8 x i16> @f12() { ++; CHECK-LABEL: f12: ++; CHECK: vrepif %v24, -2 ++; CHECK: br %r14 ++ ret <8 x i16> ++} ++ ++; Test a doubleword-granularity replicate with the lowest useful positive ++; value. ++define <8 x i16> @f13() { ++; CHECK-LABEL: f13: ++; CHECK: vrepig %v24, 1 ++; CHECK: br %r14 ++ ret <8 x i16> ++} ++ ++; Test a doubleword-granularity replicate with the highest in-range value. ++define <8 x i16> @f14() { ++; CHECK-LABEL: f14: ++; CHECK: vrepig %v24, 32767 ++; CHECK: br %r14 ++ ret <8 x i16> ++} ++ ++; Test a doubleword-granularity replicate with the next highest value. ++; This cannot use VREPIG. ++define <8 x i16> @f15() { ++; CHECK-LABEL: f15: ++; CHECK-NOT: vrepig ++; CHECK: br %r14 ++ ret <8 x i16> ++} ++ ++; Test a doubleword-granularity replicate with the lowest in-range value. ++define <8 x i16> @f16() { ++; CHECK-LABEL: f16: ++; CHECK: vrepig %v24, -32768 ++; CHECK: br %r14 ++ ret <8 x i16> ++} ++ ++; Test a doubleword-granularity replicate with the next lowest value. ++; This cannot use VREPIG. ++define <8 x i16> @f17() { ++; CHECK-LABEL: f17: ++; CHECK-NOT: vrepig ++; CHECK: br %r14 ++ ret <8 x i16> ++} ++ ++; Test a doubleword-granularity replicate with the highest useful negative ++; value. ++define <8 x i16> @f18() { ++; CHECK-LABEL: f18: ++; CHECK: vrepig %v24, -2 ++; CHECK: br %r14 ++ ret <8 x i16> ++} ++ ++; Repeat f14 with undefs optimistically treated as 0. ++define <8 x i16> @f19() { ++; CHECK-LABEL: f19: ++; CHECK: vrepig %v24, 32767 ++; CHECK: br %r14 ++ ret <8 x i16> ++} ++ ++; Repeat f18 with undefs optimistically treated as -1. ++define <8 x i16> @f20() { ++; CHECK-LABEL: f20: ++; CHECK: vrepig %v24, -2 ++; CHECK: br %r14 ++ ret <8 x i16> ++} +Index: llvm-36/test/CodeGen/SystemZ/vec-const-09.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-const-09.ll +@@ -0,0 +1,169 @@ ++; Test vector replicates, v4i32 version. ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ++ ++; Test a byte-granularity replicate with the lowest useful value. ++define <4 x i32> @f1() { ++; CHECK-LABEL: f1: ++; CHECK: vrepib %v24, 1 ++; CHECK: br %r14 ++ ret <4 x i32> ++} ++ ++; Test a byte-granularity replicate with an arbitrary value. ++define <4 x i32> @f2() { ++; CHECK-LABEL: f2: ++; CHECK: vrepib %v24, -55 ++; CHECK: br %r14 ++ ret <4 x i32> ++} ++ ++; Test a byte-granularity replicate with the highest useful value. ++define <4 x i32> @f3() { ++; CHECK-LABEL: f3: ++; CHECK: vrepib %v24, -2 ++; CHECK: br %r14 ++ ret <4 x i32> ++} ++ ++; Test a halfword-granularity replicate with the lowest useful value. ++define <4 x i32> @f4() { ++; CHECK-LABEL: f4: ++; CHECK: vrepih %v24, 1 ++; CHECK: br %r14 ++ ret <4 x i32> ++} ++ ++; Test a halfword-granularity replicate with an arbitrary value. ++define <4 x i32> @f5() { ++; CHECK-LABEL: f5: ++; CHECK: vrepih %v24, 25650 ++; CHECK: br %r14 ++ ret <4 x i32> ++} ++ ++; Test a halfword-granularity replicate with the highest useful value. ++define <4 x i32> @f6() { ++; CHECK-LABEL: f6: ++; CHECK: vrepih %v24, -2 ++; CHECK: br %r14 ++ ret <4 x i32> ++} ++ ++; Test a word-granularity replicate with the lowest useful positive value. ++define <4 x i32> @f7() { ++; CHECK-LABEL: f7: ++; CHECK: vrepif %v24, 1 ++; CHECK: br %r14 ++ ret <4 x i32> ++} ++ ++; Test a word-granularity replicate with the highest in-range value. ++define <4 x i32> @f8() { ++; CHECK-LABEL: f8: ++; CHECK: vrepif %v24, 32767 ++; CHECK: br %r14 ++ ret <4 x i32> ++} ++ ++; Test a word-granularity replicate with the next highest value. ++; This cannot use VREPIF. ++define <4 x i32> @f9() { ++; CHECK-LABEL: f9: ++; CHECK-NOT: vrepif ++; CHECK: br %r14 ++ ret <4 x i32> ++} ++ ++; Test a word-granularity replicate with the lowest in-range value. ++define <4 x i32> @f10() { ++; CHECK-LABEL: f10: ++; CHECK: vrepif %v24, -32768 ++; CHECK: br %r14 ++ ret <4 x i32> ++} ++ ++; Test a word-granularity replicate with the next lowest value. ++; This cannot use VREPIF. ++define <4 x i32> @f11() { ++; CHECK-LABEL: f11: ++; CHECK-NOT: vrepif ++; CHECK: br %r14 ++ ret <4 x i32> ++} ++ ++; Test a word-granularity replicate with the highest useful negative value. ++define <4 x i32> @f12() { ++; CHECK-LABEL: f12: ++; CHECK: vrepif %v24, -2 ++; CHECK: br %r14 ++ ret <4 x i32> ++} ++ ++; Test a doubleword-granularity replicate with the lowest useful positive ++; value. ++define <4 x i32> @f13() { ++; CHECK-LABEL: f13: ++; CHECK: vrepig %v24, 1 ++; CHECK: br %r14 ++ ret <4 x i32> ++} ++ ++; Test a doubleword-granularity replicate with the highest in-range value. ++define <4 x i32> @f14() { ++; CHECK-LABEL: f14: ++; CHECK: vrepig %v24, 32767 ++; CHECK: br %r14 ++ ret <4 x i32> ++} ++ ++; Test a doubleword-granularity replicate with the next highest value. ++; This cannot use VREPIG. ++define <4 x i32> @f15() { ++; CHECK-LABEL: f15: ++; CHECK-NOT: vrepig ++; CHECK: br %r14 ++ ret <4 x i32> ++} ++ ++; Test a doubleword-granularity replicate with the lowest in-range value. ++define <4 x i32> @f16() { ++; CHECK-LABEL: f16: ++; CHECK: vrepig %v24, -32768 ++; CHECK: br %r14 ++ ret <4 x i32> ++} ++ ++; Test a doubleword-granularity replicate with the next lowest value. ++; This cannot use VREPIG. ++define <4 x i32> @f17() { ++; CHECK-LABEL: f17: ++; CHECK-NOT: vrepig ++; CHECK: br %r14 ++ ret <4 x i32> ++} ++ ++; Test a doubleword-granularity replicate with the highest useful negative ++; value. ++define <4 x i32> @f18() { ++; CHECK-LABEL: f18: ++; CHECK: vrepig %v24, -2 ++; CHECK: br %r14 ++ ret <4 x i32> ++} ++ ++; Repeat f14 with undefs optimistically treated as 0, 32767. ++define <4 x i32> @f19() { ++; CHECK-LABEL: f19: ++; CHECK: vrepig %v24, 32767 ++; CHECK: br %r14 ++ ret <4 x i32> ++} ++ ++; Repeat f18 with undefs optimistically treated as -2, -1. ++define <4 x i32> @f20() { ++; CHECK-LABEL: f20: ++; CHECK: vrepig %v24, -2 ++; CHECK: br %r14 ++ ret <4 x i32> ++} +Index: llvm-36/test/CodeGen/SystemZ/vec-const-10.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-const-10.ll +@@ -0,0 +1,169 @@ ++; Test vector replicates, v2i64 version. ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ++ ++; Test a byte-granularity replicate with the lowest useful value. ++define <2 x i64> @f1() { ++; CHECK-LABEL: f1: ++; CHECK: vrepib %v24, 1 ++; CHECK: br %r14 ++ ret <2 x i64> ++} ++ ++; Test a byte-granularity replicate with an arbitrary value. ++define <2 x i64> @f2() { ++; CHECK-LABEL: f2: ++; CHECK: vrepib %v24, -55 ++; CHECK: br %r14 ++ ret <2 x i64> ++} ++ ++; Test a byte-granularity replicate with the highest useful value. ++define <2 x i64> @f3() { ++; CHECK-LABEL: f3: ++; CHECK: vrepib %v24, -2 ++; CHECK: br %r14 ++ ret <2 x i64> ++} ++ ++; Test a halfword-granularity replicate with the lowest useful value. ++define <2 x i64> @f4() { ++; CHECK-LABEL: f4: ++; CHECK: vrepih %v24, 1 ++; CHECK: br %r14 ++ ret <2 x i64> ++} ++ ++; Test a halfword-granularity replicate with an arbitrary value. ++define <2 x i64> @f5() { ++; CHECK-LABEL: f5: ++; CHECK: vrepih %v24, 25650 ++; CHECK: br %r14 ++ ret <2 x i64> ++} ++ ++; Test a halfword-granularity replicate with the highest useful value. ++define <2 x i64> @f6() { ++; CHECK-LABEL: f6: ++; CHECK: vrepih %v24, -2 ++; CHECK: br %r14 ++ ret <2 x i64> ++} ++ ++; Test a word-granularity replicate with the lowest useful positive value. ++define <2 x i64> @f7() { ++; CHECK-LABEL: f7: ++; CHECK: vrepif %v24, 1 ++; CHECK: br %r14 ++ ret <2 x i64> ++} ++ ++; Test a word-granularity replicate with the highest in-range value. ++define <2 x i64> @f8() { ++; CHECK-LABEL: f8: ++; CHECK: vrepif %v24, 32767 ++; CHECK: br %r14 ++ ret <2 x i64> ++} ++ ++; Test a word-granularity replicate with the next highest value. ++; This cannot use VREPIF. ++define <2 x i64> @f9() { ++; CHECK-LABEL: f9: ++; CHECK-NOT: vrepif ++; CHECK: br %r14 ++ ret <2 x i64> ++} ++ ++; Test a word-granularity replicate with the lowest in-range value. ++define <2 x i64> @f10() { ++; CHECK-LABEL: f10: ++; CHECK: vrepif %v24, -32768 ++; CHECK: br %r14 ++ ret <2 x i64> ++} ++ ++; Test a word-granularity replicate with the next lowest value. ++; This cannot use VREPIF. ++define <2 x i64> @f11() { ++; CHECK-LABEL: f11: ++; CHECK-NOT: vrepif ++; CHECK: br %r14 ++ ret <2 x i64> ++} ++ ++; Test a word-granularity replicate with the highest useful negative value. ++define <2 x i64> @f12() { ++; CHECK-LABEL: f12: ++; CHECK: vrepif %v24, -2 ++; CHECK: br %r14 ++ ret <2 x i64> ++} ++ ++; Test a doubleword-granularity replicate with the lowest useful positive ++; value. ++define <2 x i64> @f13() { ++; CHECK-LABEL: f13: ++; CHECK: vrepig %v24, 1 ++; CHECK: br %r14 ++ ret <2 x i64> ++} ++ ++; Test a doubleword-granularity replicate with the highest in-range value. ++define <2 x i64> @f14() { ++; CHECK-LABEL: f14: ++; CHECK: vrepig %v24, 32767 ++; CHECK: br %r14 ++ ret <2 x i64> ++} ++ ++; Test a doubleword-granularity replicate with the next highest value. ++; This cannot use VREPIG. ++define <2 x i64> @f15() { ++; CHECK-LABEL: f15: ++; CHECK-NOT: vrepig ++; CHECK: br %r14 ++ ret <2 x i64> ++} ++ ++; Test a doubleword-granularity replicate with the lowest in-range value. ++define <2 x i64> @f16() { ++; CHECK-LABEL: f16: ++; CHECK: vrepig %v24, -32768 ++; CHECK: br %r14 ++ ret <2 x i64> ++} ++ ++; Test a doubleword-granularity replicate with the next lowest value. ++; This cannot use VREPIG. ++define <2 x i64> @f17() { ++; CHECK-LABEL: f17: ++; CHECK-NOT: vrepig ++; CHECK: br %r14 ++ ret <2 x i64> ++} ++ ++; Test a doubleword-granularity replicate with the highest useful negative ++; value. ++define <2 x i64> @f18() { ++; CHECK-LABEL: f18: ++; CHECK: vrepig %v24, -2 ++; CHECK: br %r14 ++ ret <2 x i64> ++} ++ ++; Repeat f14 with undefs optimistically treated as 32767. ++define <2 x i64> @f19() { ++; CHECK-LABEL: f19: ++; CHECK: vrepig %v24, 32767 ++; CHECK: br %r14 ++ ret <2 x i64> ++} ++ ++; Repeat f18 with undefs optimistically treated as -2. ++define <2 x i64> @f20() { ++; CHECK-LABEL: f20: ++; CHECK: vrepig %v24, -2 ++; CHECK: br %r14 ++ ret <2 x i64> ++} +Index: llvm-36/test/CodeGen/SystemZ/vec-const-11.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-const-11.ll +@@ -0,0 +1,189 @@ ++; Test vector replicates, v4f32 version. ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ++ ++; Test a byte-granularity replicate with the lowest useful value. ++define <4 x float> @f1() { ++; CHECK-LABEL: f1: ++; CHECK: vrepib %v24, 1 ++; CHECK: br %r14 ++ ret <4 x float> ++} ++ ++; Test a byte-granularity replicate with an arbitrary value. ++define <4 x float> @f2() { ++; CHECK-LABEL: f2: ++; CHECK: vrepib %v24, -55 ++; CHECK: br %r14 ++ ret <4 x float> ++} ++ ++; Test a byte-granularity replicate with the highest useful value. ++define <4 x float> @f3() { ++; CHECK-LABEL: f3: ++; CHECK: vrepib %v24, -2 ++; CHECK: br %r14 ++ ret <4 x float> ++} ++ ++; Test a halfword-granularity replicate with the lowest useful value. ++define <4 x float> @f4() { ++; CHECK-LABEL: f4: ++; CHECK: vrepih %v24, 1 ++; CHECK: br %r14 ++ ret <4 x float> ++} ++ ++; Test a halfword-granularity replicate with an arbitrary value. ++define <4 x float> @f5() { ++; CHECK-LABEL: f5: ++; CHECK: vrepih %v24, 25650 ++; CHECK: br %r14 ++ ret <4 x float> ++} ++ ++; Test a halfword-granularity replicate with the highest useful value. ++define <4 x float> @f6() { ++; CHECK-LABEL: f6: ++; CHECK: vrepih %v24, -2 ++; CHECK: br %r14 ++ ret <4 x float> ++} ++ ++; Test a word-granularity replicate with the lowest useful positive value. ++define <4 x float> @f7() { ++; CHECK-LABEL: f7: ++; CHECK: vrepif %v24, 1 ++; CHECK: br %r14 ++ ret <4 x float> ++} ++ ++; Test a word-granularity replicate with the highest in-range value. ++define <4 x float> @f8() { ++; CHECK-LABEL: f8: ++; CHECK: vrepif %v24, 32767 ++; CHECK: br %r14 ++ ret <4 x float> ++} ++ ++; Test a word-granularity replicate with the next highest value. ++; This cannot use VREPIF. ++define <4 x float> @f9() { ++; CHECK-LABEL: f9: ++; CHECK-NOT: vrepif ++; CHECK: br %r14 ++ ret <4 x float> ++} ++ ++; Test a word-granularity replicate with the lowest in-range value. ++define <4 x float> @f10() { ++; CHECK-LABEL: f10: ++; CHECK: vrepif %v24, -32768 ++; CHECK: br %r14 ++ ret <4 x float> ++} ++ ++; Test a word-granularity replicate with the next lowest value. ++; This cannot use VREPIF. ++define <4 x float> @f11() { ++; CHECK-LABEL: f11: ++; CHECK-NOT: vrepif ++; CHECK: br %r14 ++ ret <4 x float> ++} ++ ++; Test a word-granularity replicate with the highest useful negative value. ++define <4 x float> @f12() { ++; CHECK-LABEL: f12: ++; CHECK: vrepif %v24, -2 ++; CHECK: br %r14 ++ ret <4 x float> ++} ++ ++; Test a doubleword-granularity replicate with the lowest useful positive ++; value. ++define <4 x float> @f13() { ++; CHECK-LABEL: f13: ++; CHECK: vrepig %v24, 1 ++; CHECK: br %r14 ++ ret <4 x float> ++} ++ ++; Test a doubleword-granularity replicate with the highest in-range value. ++define <4 x float> @f14() { ++; CHECK-LABEL: f14: ++; CHECK: vrepig %v24, 32767 ++; CHECK: br %r14 ++ ret <4 x float> ++} ++ ++; Test a doubleword-granularity replicate with the next highest value. ++; This cannot use VREPIG. ++define <4 x float> @f15() { ++; CHECK-LABEL: f15: ++; CHECK-NOT: vrepig ++; CHECK: br %r14 ++ ret <4 x float> ++} ++ ++; Test a doubleword-granularity replicate with the lowest in-range value. ++define <4 x float> @f16() { ++; CHECK-LABEL: f16: ++; CHECK: vrepig %v24, -32768 ++; CHECK: br %r14 ++ ret <4 x float> ++} ++ ++; Test a doubleword-granularity replicate with the next lowest value. ++; This cannot use VREPIG. ++define <4 x float> @f17() { ++; CHECK-LABEL: f17: ++; CHECK-NOT: vrepig ++; CHECK: br %r14 ++ ret <4 x float> ++} ++ ++; Test a doubleword-granularity replicate with the highest useful negative ++; value. ++define <4 x float> @f18() { ++; CHECK-LABEL: f18: ++; CHECK: vrepig %v24, -2 ++; CHECK: br %r14 ++ ret <4 x float> ++} ++ ++; Repeat f14 with undefs optimistically treated as 0, 32767. ++define <4 x float> @f19() { ++; CHECK-LABEL: f19: ++; CHECK: vrepig %v24, 32767 ++; CHECK: br %r14 ++ ret <4 x float> ++} ++ ++; Repeat f18 with undefs optimistically treated as -2, -1. ++define <4 x float> @f20() { ++; CHECK-LABEL: f20: ++; CHECK: vrepig %v24, -2 ++; CHECK: br %r14 ++ ret <4 x float> ++} +Index: llvm-36/test/CodeGen/SystemZ/vec-const-12.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-const-12.ll +@@ -0,0 +1,169 @@ ++; Test vector replicates, v2f64 version. ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ++ ++; Test a byte-granularity replicate with the lowest useful value. ++define <2 x double> @f1() { ++; CHECK-LABEL: f1: ++; CHECK: vrepib %v24, 1 ++; CHECK: br %r14 ++ ret <2 x double> ++} ++ ++; Test a byte-granularity replicate with an arbitrary value. ++define <2 x double> @f2() { ++; CHECK-LABEL: f2: ++; CHECK: vrepib %v24, -55 ++; CHECK: br %r14 ++ ret <2 x double> ++} ++ ++; Test a byte-granularity replicate with the highest useful value. ++define <2 x double> @f3() { ++; CHECK-LABEL: f3: ++; CHECK: vrepib %v24, -2 ++; CHECK: br %r14 ++ ret <2 x double> ++} ++ ++; Test a halfword-granularity replicate with the lowest useful value. ++define <2 x double> @f4() { ++; CHECK-LABEL: f4: ++; CHECK: vrepih %v24, 1 ++; CHECK: br %r14 ++ ret <2 x double> ++} ++ ++; Test a halfword-granularity replicate with an arbitrary value. ++define <2 x double> @f5() { ++; CHECK-LABEL: f5: ++; CHECK: vrepih %v24, 25650 ++; CHECK: br %r14 ++ ret <2 x double> ++} ++ ++; Test a halfword-granularity replicate with the highest useful value. ++define <2 x double> @f6() { ++; CHECK-LABEL: f6: ++; CHECK: vrepih %v24, -2 ++; CHECK: br %r14 ++ ret <2 x double> ++} ++ ++; Test a word-granularity replicate with the lowest useful positive value. ++define <2 x double> @f7() { ++; CHECK-LABEL: f7: ++; CHECK: vrepif %v24, 1 ++; CHECK: br %r14 ++ ret <2 x double> ++} ++ ++; Test a word-granularity replicate with the highest in-range value. ++define <2 x double> @f8() { ++; CHECK-LABEL: f8: ++; CHECK: vrepif %v24, 32767 ++; CHECK: br %r14 ++ ret <2 x double> ++} ++ ++; Test a word-granularity replicate with the next highest value. ++; This cannot use VREPIF. ++define <2 x double> @f9() { ++; CHECK-LABEL: f9: ++; CHECK-NOT: vrepif ++; CHECK: br %r14 ++ ret <2 x double> ++} ++ ++; Test a word-granularity replicate with the lowest in-range value. ++define <2 x double> @f10() { ++; CHECK-LABEL: f10: ++; CHECK: vrepif %v24, -32768 ++; CHECK: br %r14 ++ ret <2 x double> ++} ++ ++; Test a word-granularity replicate with the next lowest value. ++; This cannot use VREPIF. ++define <2 x double> @f11() { ++; CHECK-LABEL: f11: ++; CHECK-NOT: vrepif ++; CHECK: br %r14 ++ ret <2 x double> ++} ++ ++; Test a word-granularity replicate with the highest useful negative value. ++define <2 x double> @f12() { ++; CHECK-LABEL: f12: ++; CHECK: vrepif %v24, -2 ++; CHECK: br %r14 ++ ret <2 x double> ++} ++ ++; Test a doubleword-granularity replicate with the lowest useful positive ++; value. ++define <2 x double> @f13() { ++; CHECK-LABEL: f13: ++; CHECK: vrepig %v24, 1 ++; CHECK: br %r14 ++ ret <2 x double> ++} ++ ++; Test a doubleword-granularity replicate with the highest in-range value. ++define <2 x double> @f14() { ++; CHECK-LABEL: f14: ++; CHECK: vrepig %v24, 32767 ++; CHECK: br %r14 ++ ret <2 x double> ++} ++ ++; Test a doubleword-granularity replicate with the next highest value. ++; This cannot use VREPIG. ++define <2 x double> @f15() { ++; CHECK-LABEL: f15: ++; CHECK-NOT: vrepig ++; CHECK: br %r14 ++ ret <2 x double> ++} ++ ++; Test a doubleword-granularity replicate with the lowest in-range value. ++define <2 x double> @f16() { ++; CHECK-LABEL: f16: ++; CHECK: vrepig %v24, -32768 ++; CHECK: br %r14 ++ ret <2 x double> ++} ++ ++; Test a doubleword-granularity replicate with the next lowest value. ++; This cannot use VREPIG. ++define <2 x double> @f17() { ++; CHECK-LABEL: f17: ++; CHECK-NOT: vrepig ++; CHECK: br %r14 ++ ret <2 x double> ++} ++ ++; Test a doubleword-granularity replicate with the highest useful negative ++; value. ++define <2 x double> @f18() { ++; CHECK-LABEL: f18: ++; CHECK: vrepig %v24, -2 ++; CHECK: br %r14 ++ ret <2 x double> ++} ++ ++; Repeat f14 with undefs optimistically treated as 32767. ++define <2 x double> @f19() { ++; CHECK-LABEL: f19: ++; CHECK: vrepig %v24, 32767 ++; CHECK: br %r14 ++ ret <2 x double> ++} ++ ++; Repeat f18 with undefs optimistically treated as -2. ++define <2 x double> @f20() { ++; CHECK-LABEL: f20: ++; CHECK: vrepig %v24, -2 ++; CHECK: br %r14 ++ ret <2 x double> ++} +Index: llvm-36/test/CodeGen/SystemZ/vec-const-13.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-const-13.ll +@@ -0,0 +1,193 @@ ++; Test vector replicates that use VECTOR GENERATE MASK, v16i8 version. ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ++ ++; Test a word-granularity replicate with the lowest value that cannot use ++; VREPIF. ++define <16 x i8> @f1() { ++; CHECK-LABEL: f1: ++; CHECK: vgmf %v24, 16, 16 ++; CHECK: br %r14 ++ ret <16 x i8> ++} ++ ++; Test a word-granularity replicate that has the lower 17 bits set. ++define <16 x i8> @f2() { ++; CHECK-LABEL: f2: ++; CHECK: vgmf %v24, 15, 31 ++; CHECK: br %r14 ++ ret <16 x i8> ++} ++ ++; Test a word-granularity replicate that has the upper 15 bits set. ++define <16 x i8> @f3() { ++; CHECK-LABEL: f3: ++; CHECK: vgmf %v24, 0, 14 ++; CHECK: br %r14 ++ ret <16 x i8> ++} ++ ++; Test a word-granularity replicate that has middle bits set. ++define <16 x i8> @f4() { ++; CHECK-LABEL: f4: ++; CHECK: vgmf %v24, 12, 17 ++; CHECK: br %r14 ++ ret <16 x i8> ++} ++ ++; Test a word-granularity replicate with a wrap-around mask. ++define <16 x i8> @f5() { ++; CHECK-LABEL: f5: ++; CHECK: vgmf %v24, 17, 15 ++; CHECK: br %r14 ++ ret <16 x i8> ++} ++ ++; Test a doubleword-granularity replicate with the lowest value that cannot ++; use VREPIG. ++define <16 x i8> @f6() { ++; CHECK-LABEL: f6: ++; CHECK: vgmg %v24, 48, 48 ++; CHECK: br %r14 ++ ret <16 x i8> ++} ++ ++; Test a doubleword-granularity replicate that has the lower 22 bits set. ++define <16 x i8> @f7() { ++; CHECK-LABEL: f7: ++; CHECK: vgmg %v24, 42, 63 ++; CHECK: br %r14 ++ ret <16 x i8> ++} ++ ++; Test a doubleword-granularity replicate that has the upper 45 bits set. ++define <16 x i8> @f8() { ++; CHECK-LABEL: f8: ++; CHECK: vgmg %v24, 0, 44 ++; CHECK: br %r14 ++ ret <16 x i8> ++} ++ ++; Test a doubleword-granularity replicate that has middle bits set. ++define <16 x i8> @f9() { ++; CHECK-LABEL: f9: ++; CHECK: vgmg %v24, 31, 42 ++; CHECK: br %r14 ++ ret <16 x i8> ++} ++ ++; Test a doubleword-granularity replicate with a wrap-around mask. ++define <16 x i8> @f10() { ++; CHECK-LABEL: f10: ++; CHECK: vgmg %v24, 18, 0 ++; CHECK: br %r14 ++ ret <16 x i8> ++} ++ ++; Retest f1 with arbitrary undefs instead of 0s. ++define <16 x i8> @f11() { ++; CHECK-LABEL: f11: ++; CHECK: vgmf %v24, 16, 16 ++; CHECK: br %r14 ++ ret <16 x i8> ++} ++ ++; Try a case where we want consistent undefs to be treated as 0. ++define <16 x i8> @f12() { ++; CHECK-LABEL: f12: ++; CHECK: vgmf %v24, 15, 23 ++; CHECK: br %r14 ++ ret <16 x i8> ++} ++ ++; ...and again with the lower bits of the replicated constant. ++define <16 x i8> @f13() { ++; CHECK-LABEL: f13: ++; CHECK: vgmf %v24, 15, 22 ++; CHECK: br %r14 ++ ret <16 x i8> ++} ++ ++; Try a case where we want consistent undefs to be treated as -1. ++define <16 x i8> @f14() { ++; CHECK-LABEL: f14: ++; CHECK: vgmf %v24, 28, 8 ++; CHECK: br %r14 ++ ret <16 x i8> ++} ++ ++; ...and again with the lower bits of the replicated constant. ++define <16 x i8> @f15() { ++; CHECK-LABEL: f15: ++; CHECK: vgmf %v24, 18, 3 ++; CHECK: br %r14 ++ ret <16 x i8> ++} ++ ++; Repeat f9 with arbitrary undefs. ++define <16 x i8> @f16() { ++; CHECK-LABEL: f16: ++; CHECK: vgmg %v24, 31, 42 ++; CHECK: br %r14 ++ ret <16 x i8> ++} ++ ++; Try a case where we want some consistent undefs to be treated as 0 ++; and some to be treated as 255. ++define <16 x i8> @f17() { ++; CHECK-LABEL: f17: ++; CHECK: vgmg %v24, 23, 35 ++; CHECK: br %r14 ++ ret <16 x i8> ++} +Index: llvm-36/test/CodeGen/SystemZ/vec-const-14.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-const-14.ll +@@ -0,0 +1,113 @@ ++; Test vector replicates that use VECTOR GENERATE MASK, v8i16 version. ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ++ ++; Test a word-granularity replicate with the lowest value that cannot use ++; VREPIF. ++define <8 x i16> @f1() { ++; CHECK-LABEL: f1: ++; CHECK: vgmf %v24, 16, 16 ++; CHECK: br %r14 ++ ret <8 x i16> ++} ++ ++; Test a word-granularity replicate that has the lower 17 bits set. ++define <8 x i16> @f2() { ++; CHECK-LABEL: f2: ++; CHECK: vgmf %v24, 15, 31 ++; CHECK: br %r14 ++ ret <8 x i16> ++} ++ ++; Test a word-granularity replicate that has the upper 15 bits set. ++define <8 x i16> @f3() { ++; CHECK-LABEL: f3: ++; CHECK: vgmf %v24, 0, 14 ++; CHECK: br %r14 ++ ret <8 x i16> ++} ++ ++; Test a word-granularity replicate that has middle bits set. ++define <8 x i16> @f4() { ++; CHECK-LABEL: f4: ++; CHECK: vgmf %v24, 12, 17 ++; CHECK: br %r14 ++ ret <8 x i16> ++} ++ ++; Test a word-granularity replicate with a wrap-around mask. ++define <8 x i16> @f5() { ++; CHECK-LABEL: f5: ++; CHECK: vgmf %v24, 17, 15 ++; CHECK: br %r14 ++ ret <8 x i16> ++} ++ ++; Test a doubleword-granularity replicate with the lowest value that cannot ++; use VREPIG. ++define <8 x i16> @f6() { ++; CHECK-LABEL: f6: ++; CHECK: vgmg %v24, 48, 48 ++; CHECK: br %r14 ++ ret <8 x i16> ++} ++ ++; Test a doubleword-granularity replicate that has the lower 22 bits set. ++define <8 x i16> @f7() { ++; CHECK-LABEL: f7: ++; CHECK: vgmg %v24, 42, 63 ++; CHECK: br %r14 ++ ret <8 x i16> ++} ++ ++; Test a doubleword-granularity replicate that has the upper 45 bits set. ++define <8 x i16> @f8() { ++; CHECK-LABEL: f8: ++; CHECK: vgmg %v24, 0, 44 ++; CHECK: br %r14 ++ ret <8 x i16> ++} ++ ++; Test a doubleword-granularity replicate that has middle bits set. ++define <8 x i16> @f9() { ++; CHECK-LABEL: f9: ++; CHECK: vgmg %v24, 31, 42 ++; CHECK: br %r14 ++ ret <8 x i16> ++} ++ ++; Test a doubleword-granularity replicate with a wrap-around mask. ++define <8 x i16> @f10() { ++; CHECK-LABEL: f10: ++; CHECK: vgmg %v24, 18, 0 ++; CHECK: br %r14 ++ ret <8 x i16> ++} ++ ++; Retest f1 with arbitrary undefs instead of 0s. ++define <8 x i16> @f11() { ++; CHECK-LABEL: f11: ++; CHECK: vgmf %v24, 16, 16 ++; CHECK: br %r14 ++ ret <8 x i16> ++} ++ ++; ...likewise f9. ++define <8 x i16> @f12() { ++; CHECK-LABEL: f12: ++; CHECK: vgmg %v24, 31, 42 ++; CHECK: br %r14 ++ ret <8 x i16> ++} +Index: llvm-36/test/CodeGen/SystemZ/vec-const-15.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-const-15.ll +@@ -0,0 +1,85 @@ ++; Test vector replicates that use VECTOR GENERATE MASK, v4i32 version. ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ++ ++; Test a word-granularity replicate with the lowest value that cannot use ++; VREPIF. ++define <4 x i32> @f1() { ++; CHECK-LABEL: f1: ++; CHECK: vgmf %v24, 16, 16 ++; CHECK: br %r14 ++ ret <4 x i32> ++} ++ ++; Test a word-granularity replicate that has the lower 17 bits set. ++define <4 x i32> @f2() { ++; CHECK-LABEL: f2: ++; CHECK: vgmf %v24, 15, 31 ++; CHECK: br %r14 ++ ret <4 x i32> ++} ++ ++; Test a word-granularity replicate that has the upper 15 bits set. ++define <4 x i32> @f3() { ++; CHECK-LABEL: f3: ++; CHECK: vgmf %v24, 0, 14 ++; CHECK: br %r14 ++ ret <4 x i32> ++} ++ ++; Test a word-granularity replicate that has middle bits set. ++define <4 x i32> @f4() { ++; CHECK-LABEL: f4: ++; CHECK: vgmf %v24, 12, 17 ++; CHECK: br %r14 ++ ret <4 x i32> ++} ++ ++; Test a word-granularity replicate with a wrap-around mask. ++define <4 x i32> @f5() { ++; CHECK-LABEL: f5: ++; CHECK: vgmf %v24, 17, 15 ++; CHECK: br %r14 ++ ret <4 x i32> ++} ++ ++; Test a doubleword-granularity replicate with the lowest value that cannot ++; use VREPIG. ++define <4 x i32> @f6() { ++; CHECK-LABEL: f6: ++; CHECK: vgmg %v24, 48, 48 ++; CHECK: br %r14 ++ ret <4 x i32> ++} ++ ++; Test a doubleword-granularity replicate that has the lower 22 bits set. ++define <4 x i32> @f7() { ++; CHECK-LABEL: f7: ++; CHECK: vgmg %v24, 42, 63 ++; CHECK: br %r14 ++ ret <4 x i32> ++} ++ ++; Test a doubleword-granularity replicate that has the upper 45 bits set. ++define <4 x i32> @f8() { ++; CHECK-LABEL: f8: ++; CHECK: vgmg %v24, 0, 44 ++; CHECK: br %r14 ++ ret <4 x i32> ++} ++ ++; Test a doubleword-granularity replicate that has middle bits set. ++define <4 x i32> @f9() { ++; CHECK-LABEL: f9: ++; CHECK: vgmg %v24, 31, 42 ++; CHECK: br %r14 ++ ret <4 x i32> ++} ++ ++; Test a doubleword-granularity replicate with a wrap-around mask. ++define <4 x i32> @f10() { ++; CHECK-LABEL: f10: ++; CHECK: vgmg %v24, 18, 0 ++; CHECK: br %r14 ++ ret <4 x i32> ++} +Index: llvm-36/test/CodeGen/SystemZ/vec-const-16.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-const-16.ll +@@ -0,0 +1,85 @@ ++; Test vector replicates that use VECTOR GENERATE MASK, v2i64 version. ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ++ ++; Test a word-granularity replicate with the lowest value that cannot use ++; VREPIF. ++define <2 x i64> @f1() { ++; CHECK-LABEL: f1: ++; CHECK: vgmf %v24, 16, 16 ++; CHECK: br %r14 ++ ret <2 x i64> ++} ++ ++; Test a word-granularity replicate that has the lower 17 bits set. ++define <2 x i64> @f2() { ++; CHECK-LABEL: f2: ++; CHECK: vgmf %v24, 15, 31 ++; CHECK: br %r14 ++ ret <2 x i64> ++} ++ ++; Test a word-granularity replicate that has the upper 15 bits set. ++define <2 x i64> @f3() { ++; CHECK-LABEL: f3: ++; CHECK: vgmf %v24, 0, 14 ++; CHECK: br %r14 ++ ret <2 x i64> ++} ++ ++; Test a word-granularity replicate that has middle bits set. ++define <2 x i64> @f4() { ++; CHECK-LABEL: f4: ++; CHECK: vgmf %v24, 12, 17 ++; CHECK: br %r14 ++ ret <2 x i64> ++} ++ ++; Test a word-granularity replicate with a wrap-around mask. ++define <2 x i64> @f5() { ++; CHECK-LABEL: f5: ++; CHECK: vgmf %v24, 17, 15 ++; CHECK: br %r14 ++ ret <2 x i64> ++} ++ ++; Test a doubleword-granularity replicate with the lowest value that cannot ++; use VREPIG. ++define <2 x i64> @f6() { ++; CHECK-LABEL: f6: ++; CHECK: vgmg %v24, 48, 48 ++; CHECK: br %r14 ++ ret <2 x i64> ++} ++ ++; Test a doubleword-granularity replicate that has the lower 22 bits set. ++define <2 x i64> @f7() { ++; CHECK-LABEL: f7: ++; CHECK: vgmg %v24, 42, 63 ++; CHECK: br %r14 ++ ret <2 x i64> ++} ++ ++; Test a doubleword-granularity replicate that has the upper 45 bits set. ++define <2 x i64> @f8() { ++; CHECK-LABEL: f8: ++; CHECK: vgmg %v24, 0, 44 ++; CHECK: br %r14 ++ ret <2 x i64> ++} ++ ++; Test a doubleword-granularity replicate that has middle bits set. ++define <2 x i64> @f9() { ++; CHECK-LABEL: f9: ++; CHECK: vgmg %v24, 31, 42 ++; CHECK: br %r14 ++ ret <2 x i64> ++} ++ ++; Test a doubleword-granularity replicate with a wrap-around mask. ++define <2 x i64> @f10() { ++; CHECK-LABEL: f10: ++; CHECK: vgmg %v24, 18, 0 ++; CHECK: br %r14 ++ ret <2 x i64> ++} +Index: llvm-36/test/CodeGen/SystemZ/vec-const-17.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-const-17.ll +@@ -0,0 +1,95 @@ ++; Test vector replicates that use VECTOR GENERATE MASK, v4f32 version. ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ++ ++; Test a word-granularity replicate with the lowest value that cannot use ++; VREPIF. ++define <4 x float> @f1() { ++; CHECK-LABEL: f1: ++; CHECK: vgmf %v24, 16, 16 ++; CHECK: br %r14 ++ ret <4 x float> ++} ++ ++; Test a word-granularity replicate that has the lower 17 bits set. ++define <4 x float> @f2() { ++; CHECK-LABEL: f2: ++; CHECK: vgmf %v24, 15, 31 ++; CHECK: br %r14 ++ ret <4 x float> ++} ++ ++; Test a word-granularity replicate that has the upper 15 bits set. ++define <4 x float> @f3() { ++; CHECK-LABEL: f3: ++; CHECK: vgmf %v24, 0, 14 ++; CHECK: br %r14 ++ ret <4 x float> ++} ++ ++; Test a word-granularity replicate that has middle bits set. ++define <4 x float> @f4() { ++; CHECK-LABEL: f4: ++; CHECK: vgmf %v24, 2, 8 ++; CHECK: br %r14 ++ ret <4 x float> ++} ++ ++; Test a word-granularity replicate with a wrap-around mask. ++define <4 x float> @f5() { ++; CHECK-LABEL: f5: ++; CHECK: vgmf %v24, 9, 1 ++; CHECK: br %r14 ++ ret <4 x float> ++} ++ ++; Test a doubleword-granularity replicate with the lowest value that cannot ++; use VREPIG. ++define <4 x float> @f6() { ++; CHECK-LABEL: f6: ++; CHECK: vgmg %v24, 48, 48 ++; CHECK: br %r14 ++ ret <4 x float> ++} ++ ++; Test a doubleword-granularity replicate that has the lower 22 bits set. ++define <4 x float> @f7() { ++; CHECK-LABEL: f7: ++; CHECK: vgmg %v24, 42, 63 ++; CHECK: br %r14 ++ ret <4 x float> ++} ++ ++; Test a doubleword-granularity replicate that has the upper 45 bits set. ++define <4 x float> @f8() { ++; CHECK-LABEL: f8: ++; CHECK: vgmg %v24, 0, 44 ++; CHECK: br %r14 ++ ret <4 x float> ++} ++ ++; Test a doubleword-granularity replicate that has middle bits set. ++define <4 x float> @f9() { ++; CHECK-LABEL: f9: ++; CHECK: vgmg %v24, 34, 41 ++; CHECK: br %r14 ++ ret <4 x float> ++} ++ ++; Test a doubleword-granularity replicate with a wrap-around mask. ++define <4 x float> @f10() { ++; CHECK-LABEL: f10: ++; CHECK: vgmg %v24, 32, 0 ++; CHECK: br %r14 ++ ret <4 x float> ++} +Index: llvm-36/test/CodeGen/SystemZ/vec-const-18.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-const-18.ll +@@ -0,0 +1,85 @@ ++; Test vector replicates that use VECTOR GENERATE MASK, v2f64 version. ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ++ ++; Test a word-granularity replicate with the lowest value that cannot use ++; VREPIF. ++define <2 x double> @f1() { ++; CHECK-LABEL: f1: ++; CHECK: vgmf %v24, 16, 16 ++; CHECK: br %r14 ++ ret <2 x double> ++} ++ ++; Test a word-granularity replicate that has the lower 17 bits set. ++define <2 x double> @f2() { ++; CHECK-LABEL: f2: ++; CHECK: vgmf %v24, 15, 31 ++; CHECK: br %r14 ++ ret <2 x double> ++} ++ ++; Test a word-granularity replicate that has the upper 15 bits set. ++define <2 x double> @f3() { ++; CHECK-LABEL: f3: ++; CHECK: vgmf %v24, 0, 14 ++; CHECK: br %r14 ++ ret <2 x double> ++} ++ ++; Test a word-granularity replicate that has middle bits set. ++define <2 x double> @f4() { ++; CHECK-LABEL: f4: ++; CHECK: vgmf %v24, 2, 11 ++; CHECK: br %r14 ++ ret <2 x double> ++} ++ ++; Test a word-granularity replicate with a wrap-around mask. ++define <2 x double> @f5() { ++; CHECK-LABEL: f5: ++; CHECK: vgmf %v24, 17, 15 ++; CHECK: br %r14 ++ ret <2 x double> ++} ++ ++; Test a doubleword-granularity replicate with the lowest value that cannot ++; use VREPIG. ++define <2 x double> @f6() { ++; CHECK-LABEL: f6: ++; CHECK: vgmg %v24, 48, 48 ++; CHECK: br %r14 ++ ret <2 x double> ++} ++ ++; Test a doubleword-granularity replicate that has the lower 22 bits set. ++define <2 x double> @f7() { ++; CHECK-LABEL: f7: ++; CHECK: vgmg %v24, 42, 63 ++; CHECK: br %r14 ++ ret <2 x double> ++} ++ ++; Test a doubleword-granularity replicate that has the upper 45 bits set. ++define <2 x double> @f8() { ++; CHECK-LABEL: f8: ++; CHECK: vgmg %v24, 0, 44 ++; CHECK: br %r14 ++ ret <2 x double> ++} ++ ++; Test a doubleword-granularity replicate that has middle bits set. ++define <2 x double> @f9() { ++; CHECK-LABEL: f9: ++; CHECK: vgmg %v24, 2, 11 ++; CHECK: br %r14 ++ ret <2 x double> ++} ++ ++; Test a doubleword-granularity replicate with a wrap-around mask. ++define <2 x double> @f10() { ++; CHECK-LABEL: f10: ++; CHECK: vgmg %v24, 10, 0 ++; CHECK: br %r14 ++ ret <2 x double> ++} +Index: llvm-36/test/CodeGen/SystemZ/vec-conv-01.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-conv-01.ll +@@ -0,0 +1,95 @@ ++; Test conversions between integer and float elements. ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ++ ++; Test conversion of f64s to signed i64s. ++define <2 x i64> @f1(<2 x double> %doubles) { ++; CHECK-LABEL: f1: ++; CHECK: vcgdb %v24, %v24, 0, 5 ++; CHECK: br %r14 ++ %dwords = fptosi <2 x double> %doubles to <2 x i64> ++ ret <2 x i64> %dwords ++} ++ ++; Test conversion of f64s to unsigned i64s. ++define <2 x i64> @f2(<2 x double> %doubles) { ++; CHECK-LABEL: f2: ++; CHECK: vclgdb %v24, %v24, 0, 5 ++; CHECK: br %r14 ++ %dwords = fptoui <2 x double> %doubles to <2 x i64> ++ ret <2 x i64> %dwords ++} ++ ++; Test conversion of signed i64s to f64s. ++define <2 x double> @f3(<2 x i64> %dwords) { ++; CHECK-LABEL: f3: ++; CHECK: vcdgb %v24, %v24, 0, 0 ++; CHECK: br %r14 ++ %doubles = sitofp <2 x i64> %dwords to <2 x double> ++ ret <2 x double> %doubles ++} ++ ++; Test conversion of unsigned i64s to f64s. ++define <2 x double> @f4(<2 x i64> %dwords) { ++; CHECK-LABEL: f4: ++; CHECK: vcdlgb %v24, %v24, 0, 0 ++; CHECK: br %r14 ++ %doubles = uitofp <2 x i64> %dwords to <2 x double> ++ ret <2 x double> %doubles ++} ++ ++; Test conversion of f64s to signed i32s, which must compile. ++define void @f5(<2 x double> %doubles, <2 x i32> *%ptr) { ++ %words = fptosi <2 x double> %doubles to <2 x i32> ++ store <2 x i32> %words, <2 x i32> *%ptr ++ ret void ++} ++ ++; Test conversion of f64s to unsigned i32s, which must compile. ++define void @f6(<2 x double> %doubles, <2 x i32> *%ptr) { ++ %words = fptoui <2 x double> %doubles to <2 x i32> ++ store <2 x i32> %words, <2 x i32> *%ptr ++ ret void ++} ++ ++; Test conversion of signed i32s to f64s, which must compile. ++define <2 x double> @f7(<2 x i32> *%ptr) { ++ %words = load <2 x i32> *%ptr ++ %doubles = sitofp <2 x i32> %words to <2 x double> ++ ret <2 x double> %doubles ++} ++ ++; Test conversion of unsigned i32s to f64s, which must compile. ++define <2 x double> @f8(<2 x i32> *%ptr) { ++ %words = load <2 x i32> *%ptr ++ %doubles = uitofp <2 x i32> %words to <2 x double> ++ ret <2 x double> %doubles ++} ++ ++; Test conversion of f32s to signed i64s, which must compile. ++define <2 x i64> @f9(<2 x float> *%ptr) { ++ %floats = load <2 x float> *%ptr ++ %dwords = fptosi <2 x float> %floats to <2 x i64> ++ ret <2 x i64> %dwords ++} ++ ++; Test conversion of f32s to unsigned i64s, which must compile. ++define <2 x i64> @f10(<2 x float> *%ptr) { ++ %floats = load <2 x float> *%ptr ++ %dwords = fptoui <2 x float> %floats to <2 x i64> ++ ret <2 x i64> %dwords ++} ++ ++; Test conversion of signed i64s to f32, which must compile. ++define void @f11(<2 x i64> %dwords, <2 x float> *%ptr) { ++ %floats = sitofp <2 x i64> %dwords to <2 x float> ++ store <2 x float> %floats, <2 x float> *%ptr ++ ret void ++} ++ ++; Test conversion of unsigned i64s to f32, which must compile. ++define void @f12(<2 x i64> %dwords, <2 x float> *%ptr) { ++ %floats = uitofp <2 x i64> %dwords to <2 x float> ++ store <2 x float> %floats, <2 x float> *%ptr ++ ret void ++} +Index: llvm-36/test/CodeGen/SystemZ/vec-conv-02.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-conv-02.ll +@@ -0,0 +1,33 @@ ++; Test conversions between different-sized float elements. ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ++ ++; Test cases where both elements of a v2f64 are converted to f32s. ++define void @f1(<2 x double> %val, <2 x float> *%ptr) { ++; CHECK-LABEL: f1: ++; CHECK: vledb {{%v[0-9]+}}, %v24, 0, 0 ++; CHECK: br %r14 ++ %res = fptrunc <2 x double> %val to <2 x float> ++ store <2 x float> %res, <2 x float> *%ptr ++ ret void ++} ++ ++; Test conversion of an f64 in a vector register to an f32. ++define float @f2(<2 x double> %vec) { ++; CHECK-LABEL: f2: ++; CHECK: wledb %f0, %v24 ++; CHECK: br %r14 ++ %scalar = extractelement <2 x double> %vec, i32 0 ++ %ret = fptrunc double %scalar to float ++ ret float %ret ++} ++ ++; Test conversion of an f32 in a vector register to an f64. ++define double @f3(<4 x float> %vec) { ++; CHECK-LABEL: f3: ++; CHECK: wldeb %f0, %v24 ++; CHECK: br %r14 ++ %scalar = extractelement <4 x float> %vec, i32 0 ++ %ret = fpext float %scalar to double ++ ret double %ret ++} +Index: llvm-36/test/CodeGen/SystemZ/vec-ctlz-01.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-ctlz-01.ll +@@ -0,0 +1,81 @@ ++; Test vector count leading zeros ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ++ ++declare <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %src, i1 %is_zero_undef) ++declare <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %src, i1 %is_zero_undef) ++declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %src, i1 %is_zero_undef) ++declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %src, i1 %is_zero_undef) ++ ++define <16 x i8> @f1(<16 x i8> %a) { ++; CHECK-LABEL: f1: ++; CHECK: vclzb %v24, %v24 ++; CHECK: br %r14 ++ ++ %res = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 false) ++ ret <16 x i8> %res ++} ++ ++define <16 x i8> @f2(<16 x i8> %a) { ++; CHECK-LABEL: f2: ++; CHECK: vclzb %v24, %v24 ++; CHECK: br %r14 ++ ++ %res = call <16 x i8> @llvm.ctlz.v16i8(<16 x i8> %a, i1 true) ++ ret <16 x i8> %res ++} ++ ++define <8 x i16> @f3(<8 x i16> %a) { ++; CHECK-LABEL: f3: ++; CHECK: vclzh %v24, %v24 ++; CHECK: br %r14 ++ ++ %res = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 false) ++ ret <8 x i16> %res ++} ++ ++define <8 x i16> @f4(<8 x i16> %a) { ++; CHECK-LABEL: f4: ++; CHECK: vclzh %v24, %v24 ++; CHECK: br %r14 ++ ++ %res = call <8 x i16> @llvm.ctlz.v8i16(<8 x i16> %a, i1 true) ++ ret <8 x i16> %res ++} ++ ++define <4 x i32> @f5(<4 x i32> %a) { ++; CHECK-LABEL: f5: ++; CHECK: vclzf %v24, %v24 ++; CHECK: br %r14 ++ ++ %res = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false) ++ ret <4 x i32> %res ++} ++ ++define <4 x i32> @f6(<4 x i32> %a) { ++; CHECK-LABEL: f6: ++; CHECK: vclzf %v24, %v24 ++; CHECK: br %r14 ++ ++ %res = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 true) ++ ret <4 x i32> %res ++} ++ ++define <2 x i64> @f7(<2 x i64> %a) { ++; CHECK-LABEL: f7: ++; CHECK: vclzg %v24, %v24 ++; CHECK: br %r14 ++ ++ %res = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 false) ++ ret <2 x i64> %res ++} ++ ++define <2 x i64> @f8(<2 x i64> %a) { ++; CHECK-LABEL: f8: ++; CHECK: vclzg %v24, %v24 ++; CHECK: br %r14 ++ ++ %res = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 true) ++ ret <2 x i64> %res ++} ++ +Index: llvm-36/test/CodeGen/SystemZ/vec-ctpop-01.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-ctpop-01.ll +@@ -0,0 +1,53 @@ ++; Test vector population-count instruction ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ++ ++declare <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a) ++declare <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %a) ++declare <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %a) ++declare <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a) ++ ++define <16 x i8> @f1(<16 x i8> %a) { ++; CHECK-LABEL: f1: ++; CHECK: vpopct %v24, %v24, 0 ++; CHECK: br %r14 ++ ++ %popcnt = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %a) ++ ret <16 x i8> %popcnt ++} ++ ++define <8 x i16> @f2(<8 x i16> %a) { ++; CHECK-LABEL: f2: ++; CHECK: vpopct [[T1:%v[0-9]+]], %v24, 0 ++; CHECK: veslh [[T2:%v[0-9]+]], [[T1]], 8 ++; CHECK: vah [[T3:%v[0-9]+]], [[T1]], [[T2]] ++; CHECK: vesrlh %v24, [[T3]], 8 ++; CHECK: br %r14 ++ ++ %popcnt = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %a) ++ ret <8 x i16> %popcnt ++} ++ ++define <4 x i32> @f3(<4 x i32> %a) { ++; CHECK-LABEL: f3: ++; CHECK: vpopct [[T1:%v[0-9]+]], %v24, 0 ++; CHECK: vgbm [[T2:%v[0-9]+]], 0 ++; CHECK: vsumb %v24, [[T1]], [[T2]] ++; CHECK: br %r14 ++ ++ %popcnt = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> %a) ++ ret <4 x i32> %popcnt ++} ++ ++define <2 x i64> @f4(<2 x i64> %a) { ++; CHECK-LABEL: f4: ++; CHECK: vpopct [[T1:%v[0-9]+]], %v24, 0 ++; CHECK: vgbm [[T2:%v[0-9]+]], 0 ++; CHECK: vsumb [[T3:%v[0-9]+]], [[T1]], [[T2]] ++; CHECK: vsumgf %v24, [[T3]], [[T2]] ++; CHECK: br %r14 ++ ++ %popcnt = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> %a) ++ ret <2 x i64> %popcnt ++} ++ +Index: llvm-36/test/CodeGen/SystemZ/vec-cttz-01.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-cttz-01.ll +@@ -0,0 +1,81 @@ ++; Test vector count trailing zeros ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ++ ++declare <16 x i8> @llvm.cttz.v16i8(<16 x i8> %src, i1 %is_zero_undef) ++declare <8 x i16> @llvm.cttz.v8i16(<8 x i16> %src, i1 %is_zero_undef) ++declare <4 x i32> @llvm.cttz.v4i32(<4 x i32> %src, i1 %is_zero_undef) ++declare <2 x i64> @llvm.cttz.v2i64(<2 x i64> %src, i1 %is_zero_undef) ++ ++define <16 x i8> @f1(<16 x i8> %a) { ++; CHECK-LABEL: f1: ++; CHECK: vctzb %v24, %v24 ++; CHECK: br %r14 ++ ++ %res = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 false) ++ ret <16 x i8> %res ++} ++ ++define <16 x i8> @f2(<16 x i8> %a) { ++; CHECK-LABEL: f2: ++; CHECK: vctzb %v24, %v24 ++; CHECK: br %r14 ++ ++ %res = call <16 x i8> @llvm.cttz.v16i8(<16 x i8> %a, i1 true) ++ ret <16 x i8> %res ++} ++ ++define <8 x i16> @f3(<8 x i16> %a) { ++; CHECK-LABEL: f3: ++; CHECK: vctzh %v24, %v24 ++; CHECK: br %r14 ++ ++ %res = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 false) ++ ret <8 x i16> %res ++} ++ ++define <8 x i16> @f4(<8 x i16> %a) { ++; CHECK-LABEL: f4: ++; CHECK: vctzh %v24, %v24 ++; CHECK: br %r14 ++ ++ %res = call <8 x i16> @llvm.cttz.v8i16(<8 x i16> %a, i1 true) ++ ret <8 x i16> %res ++} ++ ++define <4 x i32> @f5(<4 x i32> %a) { ++; CHECK-LABEL: f5: ++; CHECK: vctzf %v24, %v24 ++; CHECK: br %r14 ++ ++ %res = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 false) ++ ret <4 x i32> %res ++} ++ ++define <4 x i32> @f6(<4 x i32> %a) { ++; CHECK-LABEL: f6: ++; CHECK: vctzf %v24, %v24 ++; CHECK: br %r14 ++ ++ %res = call <4 x i32> @llvm.cttz.v4i32(<4 x i32> %a, i1 true) ++ ret <4 x i32> %res ++} ++ ++define <2 x i64> @f7(<2 x i64> %a) { ++; CHECK-LABEL: f7: ++; CHECK: vctzg %v24, %v24 ++; CHECK: br %r14 ++ ++ %res = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 false) ++ ret <2 x i64> %res ++} ++ ++define <2 x i64> @f8(<2 x i64> %a) { ++; CHECK-LABEL: f8: ++; CHECK: vctzg %v24, %v24 ++; CHECK: br %r14 ++ ++ %res = call <2 x i64> @llvm.cttz.v2i64(<2 x i64> %a, i1 true) ++ ret <2 x i64> %res ++} ++ +Index: llvm-36/test/CodeGen/SystemZ/vec-div-01.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-div-01.ll +@@ -0,0 +1,83 @@ ++; Test vector division. There is no native integer support for this, ++; so the integer cases are really a test of the operation legalization code. ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ++ ++; Test a v16i8 division. ++define <16 x i8> @f1(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) { ++; CHECK-LABEL: f1: ++; CHECK: vlvgp [[REG:%v[0-9]+]], ++; CHECK-DAG: vlvgb [[REG]], {{%r[0-5]}}, 0 ++; CHECK-DAG: vlvgb [[REG]], {{%r[0-5]}}, 1 ++; CHECK-DAG: vlvgb [[REG]], {{%r[0-5]}}, 2 ++; CHECK-DAG: vlvgb [[REG]], {{%r[0-5]}}, 3 ++; CHECK-DAG: vlvgb [[REG]], {{%r[0-5]}}, 4 ++; CHECK-DAG: vlvgb [[REG]], {{%r[0-5]}}, 5 ++; CHECK-DAG: vlvgb [[REG]], {{%r[0-5]}}, 6 ++; CHECK-DAG: vlvgb [[REG]], {{%r[0-5]}}, 8 ++; CHECK-DAG: vlvgb [[REG]], {{%r[0-5]}}, 9 ++; CHECK-DAG: vlvgb [[REG]], {{%r[0-5]}}, 10 ++; CHECK-DAG: vlvgb [[REG]], {{%r[0-5]}}, 11 ++; CHECK-DAG: vlvgb [[REG]], {{%r[0-5]}}, 12 ++; CHECK-DAG: vlvgb [[REG]], {{%r[0-5]}}, 13 ++; CHECK-DAG: vlvgb [[REG]], {{%r[0-5]}}, 14 ++; CHECK: br %r14 ++ %ret = sdiv <16 x i8> %val1, %val2 ++ ret <16 x i8> %ret ++} ++ ++; Test a v8i16 division. ++define <8 x i16> @f2(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) { ++; CHECK-LABEL: f2: ++; CHECK: vlvgp [[REG:%v[0-9]+]], ++; CHECK-DAG: vlvgh [[REG]], {{%r[0-5]}}, 0 ++; CHECK-DAG: vlvgh [[REG]], {{%r[0-5]}}, 1 ++; CHECK-DAG: vlvgh [[REG]], {{%r[0-5]}}, 2 ++; CHECK-DAG: vlvgh [[REG]], {{%r[0-5]}}, 4 ++; CHECK-DAG: vlvgh [[REG]], {{%r[0-5]}}, 5 ++; CHECK-DAG: vlvgh [[REG]], {{%r[0-5]}}, 6 ++; CHECK: br %r14 ++ %ret = sdiv <8 x i16> %val1, %val2 ++ ret <8 x i16> %ret ++} ++ ++; Test a v4i32 division. ++define <4 x i32> @f3(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) { ++; CHECK-LABEL: f3: ++; CHECK: vlvgp [[REG:%v[0-9]+]], ++; CHECK-DAG: vlvgf [[REG]], {{%r[0-5]}}, 0 ++; CHECK-DAG: vlvgf [[REG]], {{%r[0-5]}}, 2 ++; CHECK: br %r14 ++ %ret = sdiv <4 x i32> %val1, %val2 ++ ret <4 x i32> %ret ++} ++ ++; Test a v2i64 division. ++define <2 x i64> @f4(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) { ++; CHECK-LABEL: f4: ++; CHECK: vlvgp %v24, ++; CHECK: br %r14 ++ %ret = sdiv <2 x i64> %val1, %val2 ++ ret <2 x i64> %ret ++} ++ ++; Test a v2f64 division. ++define <2 x double> @f5(<2 x double> %dummy, <2 x double> %val1, ++ <2 x double> %val2) { ++; CHECK-LABEL: f5: ++; CHECK: vfddb %v24, %v26, %v28 ++; CHECK: br %r14 ++ %ret = fdiv <2 x double> %val1, %val2 ++ ret <2 x double> %ret ++} ++ ++; Test an f64 division that uses vector registers. ++define double @f6(<2 x double> %val1, <2 x double> %val2) { ++; CHECK-LABEL: f6: ++; CHECK: wfddb %f0, %v24, %v26 ++; CHECK: br %r14 ++ %scalar1 = extractelement <2 x double> %val1, i32 0 ++ %scalar2 = extractelement <2 x double> %val2, i32 0 ++ %ret = fdiv double %scalar1, %scalar2 ++ ret double %ret ++} +Index: llvm-36/test/CodeGen/SystemZ/vec-extract-01.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-extract-01.ll +@@ -0,0 +1,13 @@ ++; Verify ReplaceExtractVectorEltOfLoadWithNarrowedLoad fixes ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ++ ++; Test a memory copy of a v2i32 (via the constant pool). ++define void @f1(<2 x i32> *%dest) { ++; CHECK-LABEL: f1: ++; CHECK: lgrl [[REG:%r[0-5]]], {{[._A-Za-z0-9]}} ++; CHECK: stg [[REG]], 0(%r2) ++; CHECK: br %r14 ++ store <2 x i32> , <2 x i32> *%dest ++ ret void ++} +Index: llvm-36/test/CodeGen/SystemZ/vec-extract-02.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-extract-02.ll +@@ -0,0 +1,15 @@ ++; Verify ReplaceExtractVectorEltOfLoadWithNarrowedLoad fixes ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ++ ++; Test a case where a vector extraction can be simplified to a scalar load. ++; The index must be extended from i32 to i64. ++define i32 @f1(<4 x i32> *%ptr, i32 %index) { ++; CHECK-LABEL: f1: ++; CHECK: risbg {{%r[0-5]}}, %r3, 30, 189, 2 ++; CHECK: l %r2, ++; CHECK: br %r14 ++ %vec = load <4 x i32> *%ptr ++ %res = extractelement <4 x i32> %vec, i32 %index ++ ret i32 %res ++} +Index: llvm-36/test/CodeGen/SystemZ/vec-intrinsics.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-intrinsics.ll +@@ -0,0 +1,3335 @@ ++; Test vector intrinsics. ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ++ ++declare i32 @llvm.s390.lcbb(i8 *, i32) ++declare <16 x i8> @llvm.s390.vlbb(i8 *, i32) ++declare <16 x i8> @llvm.s390.vll(i32, i8 *) ++declare <2 x i64> @llvm.s390.vpdi(<2 x i64>, <2 x i64>, i32) ++declare <16 x i8> @llvm.s390.vperm(<16 x i8>, <16 x i8>, <16 x i8>) ++declare <16 x i8> @llvm.s390.vpksh(<8 x i16>, <8 x i16>) ++declare <8 x i16> @llvm.s390.vpksf(<4 x i32>, <4 x i32>) ++declare <4 x i32> @llvm.s390.vpksg(<2 x i64>, <2 x i64>) ++declare {<16 x i8>, i32} @llvm.s390.vpkshs(<8 x i16>, <8 x i16>) ++declare {<8 x i16>, i32} @llvm.s390.vpksfs(<4 x i32>, <4 x i32>) ++declare {<4 x i32>, i32} @llvm.s390.vpksgs(<2 x i64>, <2 x i64>) ++declare <16 x i8> @llvm.s390.vpklsh(<8 x i16>, <8 x i16>) ++declare <8 x i16> @llvm.s390.vpklsf(<4 x i32>, <4 x i32>) ++declare <4 x i32> @llvm.s390.vpklsg(<2 x i64>, <2 x i64>) ++declare {<16 x i8>, i32} @llvm.s390.vpklshs(<8 x i16>, <8 x i16>) ++declare {<8 x i16>, i32} @llvm.s390.vpklsfs(<4 x i32>, <4 x i32>) ++declare {<4 x i32>, i32} @llvm.s390.vpklsgs(<2 x i64>, <2 x i64>) ++declare void @llvm.s390.vstl(<16 x i8>, i32, i8 *) ++declare <8 x i16> @llvm.s390.vuphb(<16 x i8>) ++declare <4 x i32> @llvm.s390.vuphh(<8 x i16>) ++declare <2 x i64> @llvm.s390.vuphf(<4 x i32>) ++declare <8 x i16> @llvm.s390.vuplhb(<16 x i8>) ++declare <4 x i32> @llvm.s390.vuplhh(<8 x i16>) ++declare <2 x i64> @llvm.s390.vuplhf(<4 x i32>) ++declare <8 x i16> @llvm.s390.vuplb(<16 x i8>) ++declare <4 x i32> @llvm.s390.vuplhw(<8 x i16>) ++declare <2 x i64> @llvm.s390.vuplf(<4 x i32>) ++declare <8 x i16> @llvm.s390.vupllb(<16 x i8>) ++declare <4 x i32> @llvm.s390.vupllh(<8 x i16>) ++declare <2 x i64> @llvm.s390.vupllf(<4 x i32>) ++declare <16 x i8> @llvm.s390.vaccb(<16 x i8>, <16 x i8>) ++declare <8 x i16> @llvm.s390.vacch(<8 x i16>, <8 x i16>) ++declare <4 x i32> @llvm.s390.vaccf(<4 x i32>, <4 x i32>) ++declare <2 x i64> @llvm.s390.vaccg(<2 x i64>, <2 x i64>) ++declare <16 x i8> @llvm.s390.vaq(<16 x i8>, <16 x i8>) ++declare <16 x i8> @llvm.s390.vacq(<16 x i8>, <16 x i8>, <16 x i8>) ++declare <16 x i8> @llvm.s390.vaccq(<16 x i8>, <16 x i8>) ++declare <16 x i8> @llvm.s390.vacccq(<16 x i8>, <16 x i8>, <16 x i8>) ++declare <16 x i8> @llvm.s390.vavgb(<16 x i8>, <16 x i8>) ++declare <8 x i16> @llvm.s390.vavgh(<8 x i16>, <8 x i16>) ++declare <4 x i32> @llvm.s390.vavgf(<4 x i32>, <4 x i32>) ++declare <2 x i64> @llvm.s390.vavgg(<2 x i64>, <2 x i64>) ++declare <16 x i8> @llvm.s390.vavglb(<16 x i8>, <16 x i8>) ++declare <8 x i16> @llvm.s390.vavglh(<8 x i16>, <8 x i16>) ++declare <4 x i32> @llvm.s390.vavglf(<4 x i32>, <4 x i32>) ++declare <2 x i64> @llvm.s390.vavglg(<2 x i64>, <2 x i64>) ++declare <4 x i32> @llvm.s390.vcksm(<4 x i32>, <4 x i32>) ++declare <8 x i16> @llvm.s390.vgfmb(<16 x i8>, <16 x i8>) ++declare <4 x i32> @llvm.s390.vgfmh(<8 x i16>, <8 x i16>) ++declare <2 x i64> @llvm.s390.vgfmf(<4 x i32>, <4 x i32>) ++declare <16 x i8> @llvm.s390.vgfmg(<2 x i64>, <2 x i64>) ++declare <8 x i16> @llvm.s390.vgfmab(<16 x i8>, <16 x i8>, <8 x i16>) ++declare <4 x i32> @llvm.s390.vgfmah(<8 x i16>, <8 x i16>, <4 x i32>) ++declare <2 x i64> @llvm.s390.vgfmaf(<4 x i32>, <4 x i32>, <2 x i64>) ++declare <16 x i8> @llvm.s390.vgfmag(<2 x i64>, <2 x i64>, <16 x i8>) ++declare <16 x i8> @llvm.s390.vmahb(<16 x i8>, <16 x i8>, <16 x i8>) ++declare <8 x i16> @llvm.s390.vmahh(<8 x i16>, <8 x i16>, <8 x i16>) ++declare <4 x i32> @llvm.s390.vmahf(<4 x i32>, <4 x i32>, <4 x i32>) ++declare <16 x i8> @llvm.s390.vmalhb(<16 x i8>, <16 x i8>, <16 x i8>) ++declare <8 x i16> @llvm.s390.vmalhh(<8 x i16>, <8 x i16>, <8 x i16>) ++declare <4 x i32> @llvm.s390.vmalhf(<4 x i32>, <4 x i32>, <4 x i32>) ++declare <8 x i16> @llvm.s390.vmaeb(<16 x i8>, <16 x i8>, <8 x i16>) ++declare <4 x i32> @llvm.s390.vmaeh(<8 x i16>, <8 x i16>, <4 x i32>) ++declare <2 x i64> @llvm.s390.vmaef(<4 x i32>, <4 x i32>, <2 x i64>) ++declare <8 x i16> @llvm.s390.vmaleb(<16 x i8>, <16 x i8>, <8 x i16>) ++declare <4 x i32> @llvm.s390.vmaleh(<8 x i16>, <8 x i16>, <4 x i32>) ++declare <2 x i64> @llvm.s390.vmalef(<4 x i32>, <4 x i32>, <2 x i64>) ++declare <8 x i16> @llvm.s390.vmaob(<16 x i8>, <16 x i8>, <8 x i16>) ++declare <4 x i32> @llvm.s390.vmaoh(<8 x i16>, <8 x i16>, <4 x i32>) ++declare <2 x i64> @llvm.s390.vmaof(<4 x i32>, <4 x i32>, <2 x i64>) ++declare <8 x i16> @llvm.s390.vmalob(<16 x i8>, <16 x i8>, <8 x i16>) ++declare <4 x i32> @llvm.s390.vmaloh(<8 x i16>, <8 x i16>, <4 x i32>) ++declare <2 x i64> @llvm.s390.vmalof(<4 x i32>, <4 x i32>, <2 x i64>) ++declare <16 x i8> @llvm.s390.vmhb(<16 x i8>, <16 x i8>) ++declare <8 x i16> @llvm.s390.vmhh(<8 x i16>, <8 x i16>) ++declare <4 x i32> @llvm.s390.vmhf(<4 x i32>, <4 x i32>) ++declare <16 x i8> @llvm.s390.vmlhb(<16 x i8>, <16 x i8>) ++declare <8 x i16> @llvm.s390.vmlhh(<8 x i16>, <8 x i16>) ++declare <4 x i32> @llvm.s390.vmlhf(<4 x i32>, <4 x i32>) ++declare <8 x i16> @llvm.s390.vmeb(<16 x i8>, <16 x i8>) ++declare <4 x i32> @llvm.s390.vmeh(<8 x i16>, <8 x i16>) ++declare <2 x i64> @llvm.s390.vmef(<4 x i32>, <4 x i32>) ++declare <8 x i16> @llvm.s390.vmleb(<16 x i8>, <16 x i8>) ++declare <4 x i32> @llvm.s390.vmleh(<8 x i16>, <8 x i16>) ++declare <2 x i64> @llvm.s390.vmlef(<4 x i32>, <4 x i32>) ++declare <8 x i16> @llvm.s390.vmob(<16 x i8>, <16 x i8>) ++declare <4 x i32> @llvm.s390.vmoh(<8 x i16>, <8 x i16>) ++declare <2 x i64> @llvm.s390.vmof(<4 x i32>, <4 x i32>) ++declare <8 x i16> @llvm.s390.vmlob(<16 x i8>, <16 x i8>) ++declare <4 x i32> @llvm.s390.vmloh(<8 x i16>, <8 x i16>) ++declare <2 x i64> @llvm.s390.vmlof(<4 x i32>, <4 x i32>) ++declare <16 x i8> @llvm.s390.verllvb(<16 x i8>, <16 x i8>) ++declare <8 x i16> @llvm.s390.verllvh(<8 x i16>, <8 x i16>) ++declare <4 x i32> @llvm.s390.verllvf(<4 x i32>, <4 x i32>) ++declare <2 x i64> @llvm.s390.verllvg(<2 x i64>, <2 x i64>) ++declare <16 x i8> @llvm.s390.verllb(<16 x i8>, i32) ++declare <8 x i16> @llvm.s390.verllh(<8 x i16>, i32) ++declare <4 x i32> @llvm.s390.verllf(<4 x i32>, i32) ++declare <2 x i64> @llvm.s390.verllg(<2 x i64>, i32) ++declare <16 x i8> @llvm.s390.verimb(<16 x i8>, <16 x i8>, <16 x i8>, i32) ++declare <8 x i16> @llvm.s390.verimh(<8 x i16>, <8 x i16>, <8 x i16>, i32) ++declare <4 x i32> @llvm.s390.verimf(<4 x i32>, <4 x i32>, <4 x i32>, i32) ++declare <2 x i64> @llvm.s390.verimg(<2 x i64>, <2 x i64>, <2 x i64>, i32) ++declare <16 x i8> @llvm.s390.vsl(<16 x i8>, <16 x i8>) ++declare <16 x i8> @llvm.s390.vslb(<16 x i8>, <16 x i8>) ++declare <16 x i8> @llvm.s390.vsra(<16 x i8>, <16 x i8>) ++declare <16 x i8> @llvm.s390.vsrab(<16 x i8>, <16 x i8>) ++declare <16 x i8> @llvm.s390.vsrl(<16 x i8>, <16 x i8>) ++declare <16 x i8> @llvm.s390.vsrlb(<16 x i8>, <16 x i8>) ++declare <16 x i8> @llvm.s390.vsldb(<16 x i8>, <16 x i8>, i32) ++declare <16 x i8> @llvm.s390.vscbib(<16 x i8>, <16 x i8>) ++declare <8 x i16> @llvm.s390.vscbih(<8 x i16>, <8 x i16>) ++declare <4 x i32> @llvm.s390.vscbif(<4 x i32>, <4 x i32>) ++declare <2 x i64> @llvm.s390.vscbig(<2 x i64>, <2 x i64>) ++declare <16 x i8> @llvm.s390.vsq(<16 x i8>, <16 x i8>) ++declare <16 x i8> @llvm.s390.vsbiq(<16 x i8>, <16 x i8>, <16 x i8>) ++declare <16 x i8> @llvm.s390.vscbiq(<16 x i8>, <16 x i8>) ++declare <16 x i8> @llvm.s390.vsbcbiq(<16 x i8>, <16 x i8>, <16 x i8>) ++declare <4 x i32> @llvm.s390.vsumb(<16 x i8>, <16 x i8>) ++declare <4 x i32> @llvm.s390.vsumh(<8 x i16>, <8 x i16>) ++declare <2 x i64> @llvm.s390.vsumgh(<8 x i16>, <8 x i16>) ++declare <2 x i64> @llvm.s390.vsumgf(<4 x i32>, <4 x i32>) ++declare <16 x i8> @llvm.s390.vsumqf(<4 x i32>, <4 x i32>) ++declare <16 x i8> @llvm.s390.vsumqg(<2 x i64>, <2 x i64>) ++declare i32 @llvm.s390.vtm(<16 x i8>, <16 x i8>) ++declare {<16 x i8>, i32} @llvm.s390.vceqbs(<16 x i8>, <16 x i8>) ++declare {<8 x i16>, i32} @llvm.s390.vceqhs(<8 x i16>, <8 x i16>) ++declare {<4 x i32>, i32} @llvm.s390.vceqfs(<4 x i32>, <4 x i32>) ++declare {<2 x i64>, i32} @llvm.s390.vceqgs(<2 x i64>, <2 x i64>) ++declare {<16 x i8>, i32} @llvm.s390.vchbs(<16 x i8>, <16 x i8>) ++declare {<8 x i16>, i32} @llvm.s390.vchhs(<8 x i16>, <8 x i16>) ++declare {<4 x i32>, i32} @llvm.s390.vchfs(<4 x i32>, <4 x i32>) ++declare {<2 x i64>, i32} @llvm.s390.vchgs(<2 x i64>, <2 x i64>) ++declare {<16 x i8>, i32} @llvm.s390.vchlbs(<16 x i8>, <16 x i8>) ++declare {<8 x i16>, i32} @llvm.s390.vchlhs(<8 x i16>, <8 x i16>) ++declare {<4 x i32>, i32} @llvm.s390.vchlfs(<4 x i32>, <4 x i32>) ++declare {<2 x i64>, i32} @llvm.s390.vchlgs(<2 x i64>, <2 x i64>) ++declare <16 x i8> @llvm.s390.vfaeb(<16 x i8>, <16 x i8>, i32) ++declare <8 x i16> @llvm.s390.vfaeh(<8 x i16>, <8 x i16>, i32) ++declare <4 x i32> @llvm.s390.vfaef(<4 x i32>, <4 x i32>, i32) ++declare {<16 x i8>, i32} @llvm.s390.vfaebs(<16 x i8>, <16 x i8>, i32) ++declare {<8 x i16>, i32} @llvm.s390.vfaehs(<8 x i16>, <8 x i16>, i32) ++declare {<4 x i32>, i32} @llvm.s390.vfaefs(<4 x i32>, <4 x i32>, i32) ++declare <16 x i8> @llvm.s390.vfaezb(<16 x i8>, <16 x i8>, i32) ++declare <8 x i16> @llvm.s390.vfaezh(<8 x i16>, <8 x i16>, i32) ++declare <4 x i32> @llvm.s390.vfaezf(<4 x i32>, <4 x i32>, i32) ++declare {<16 x i8>, i32} @llvm.s390.vfaezbs(<16 x i8>, <16 x i8>, i32) ++declare {<8 x i16>, i32} @llvm.s390.vfaezhs(<8 x i16>, <8 x i16>, i32) ++declare {<4 x i32>, i32} @llvm.s390.vfaezfs(<4 x i32>, <4 x i32>, i32) ++declare <16 x i8> @llvm.s390.vfeeb(<16 x i8>, <16 x i8>) ++declare <8 x i16> @llvm.s390.vfeeh(<8 x i16>, <8 x i16>) ++declare <4 x i32> @llvm.s390.vfeef(<4 x i32>, <4 x i32>) ++declare {<16 x i8>, i32} @llvm.s390.vfeebs(<16 x i8>, <16 x i8>) ++declare {<8 x i16>, i32} @llvm.s390.vfeehs(<8 x i16>, <8 x i16>) ++declare {<4 x i32>, i32} @llvm.s390.vfeefs(<4 x i32>, <4 x i32>) ++declare <16 x i8> @llvm.s390.vfeezb(<16 x i8>, <16 x i8>) ++declare <8 x i16> @llvm.s390.vfeezh(<8 x i16>, <8 x i16>) ++declare <4 x i32> @llvm.s390.vfeezf(<4 x i32>, <4 x i32>) ++declare {<16 x i8>, i32} @llvm.s390.vfeezbs(<16 x i8>, <16 x i8>) ++declare {<8 x i16>, i32} @llvm.s390.vfeezhs(<8 x i16>, <8 x i16>) ++declare {<4 x i32>, i32} @llvm.s390.vfeezfs(<4 x i32>, <4 x i32>) ++declare <16 x i8> @llvm.s390.vfeneb(<16 x i8>, <16 x i8>) ++declare <8 x i16> @llvm.s390.vfeneh(<8 x i16>, <8 x i16>) ++declare <4 x i32> @llvm.s390.vfenef(<4 x i32>, <4 x i32>) ++declare {<16 x i8>, i32} @llvm.s390.vfenebs(<16 x i8>, <16 x i8>) ++declare {<8 x i16>, i32} @llvm.s390.vfenehs(<8 x i16>, <8 x i16>) ++declare {<4 x i32>, i32} @llvm.s390.vfenefs(<4 x i32>, <4 x i32>) ++declare <16 x i8> @llvm.s390.vfenezb(<16 x i8>, <16 x i8>) ++declare <8 x i16> @llvm.s390.vfenezh(<8 x i16>, <8 x i16>) ++declare <4 x i32> @llvm.s390.vfenezf(<4 x i32>, <4 x i32>) ++declare {<16 x i8>, i32} @llvm.s390.vfenezbs(<16 x i8>, <16 x i8>) ++declare {<8 x i16>, i32} @llvm.s390.vfenezhs(<8 x i16>, <8 x i16>) ++declare {<4 x i32>, i32} @llvm.s390.vfenezfs(<4 x i32>, <4 x i32>) ++declare <16 x i8> @llvm.s390.vistrb(<16 x i8>) ++declare <8 x i16> @llvm.s390.vistrh(<8 x i16>) ++declare <4 x i32> @llvm.s390.vistrf(<4 x i32>) ++declare {<16 x i8>, i32} @llvm.s390.vistrbs(<16 x i8>) ++declare {<8 x i16>, i32} @llvm.s390.vistrhs(<8 x i16>) ++declare {<4 x i32>, i32} @llvm.s390.vistrfs(<4 x i32>) ++declare <16 x i8> @llvm.s390.vstrcb(<16 x i8>, <16 x i8>, <16 x i8>, i32) ++declare <8 x i16> @llvm.s390.vstrch(<8 x i16>, <8 x i16>, <8 x i16>, i32) ++declare <4 x i32> @llvm.s390.vstrcf(<4 x i32>, <4 x i32>, <4 x i32>, i32) ++declare {<16 x i8>, i32} @llvm.s390.vstrcbs(<16 x i8>, <16 x i8>, <16 x i8>, ++ i32) ++declare {<8 x i16>, i32} @llvm.s390.vstrchs(<8 x i16>, <8 x i16>, <8 x i16>, ++ i32) ++declare {<4 x i32>, i32} @llvm.s390.vstrcfs(<4 x i32>, <4 x i32>, <4 x i32>, ++ i32) ++declare <16 x i8> @llvm.s390.vstrczb(<16 x i8>, <16 x i8>, <16 x i8>, i32) ++declare <8 x i16> @llvm.s390.vstrczh(<8 x i16>, <8 x i16>, <8 x i16>, i32) ++declare <4 x i32> @llvm.s390.vstrczf(<4 x i32>, <4 x i32>, <4 x i32>, i32) ++declare {<16 x i8>, i32} @llvm.s390.vstrczbs(<16 x i8>, <16 x i8>, <16 x i8>, ++ i32) ++declare {<8 x i16>, i32} @llvm.s390.vstrczhs(<8 x i16>, <8 x i16>, <8 x i16>, ++ i32) ++declare {<4 x i32>, i32} @llvm.s390.vstrczfs(<4 x i32>, <4 x i32>, <4 x i32>, ++ i32) ++declare {<2 x i64>, i32} @llvm.s390.vfcedbs(<2 x double>, <2 x double>) ++declare {<2 x i64>, i32} @llvm.s390.vfchdbs(<2 x double>, <2 x double>) ++declare {<2 x i64>, i32} @llvm.s390.vfchedbs(<2 x double>, <2 x double>) ++declare {<2 x i64>, i32} @llvm.s390.vftcidb(<2 x double>, i32) ++declare <2 x double> @llvm.s390.vfidb(<2 x double>, i32, i32) ++ ++; LCBB with the lowest M3 operand. ++define i32 @test_lcbb1(i8 *%ptr) { ++; CHECK-LABEL: test_lcbb1: ++; CHECK: lcbb %r2, 0(%r2), 0 ++; CHECK: br %r14 ++ %res = call i32 @llvm.s390.lcbb(i8 *%ptr, i32 0) ++ ret i32 %res ++} ++ ++; LCBB with the highest M3 operand. ++define i32 @test_lcbb2(i8 *%ptr) { ++; CHECK-LABEL: test_lcbb2: ++; CHECK: lcbb %r2, 0(%r2), 15 ++; CHECK: br %r14 ++ %res = call i32 @llvm.s390.lcbb(i8 *%ptr, i32 15) ++ ret i32 %res ++} ++ ++; LCBB with a displacement and index. ++define i32 @test_lcbb3(i8 *%base, i64 %index) { ++; CHECK-LABEL: test_lcbb3: ++; CHECK: lcbb %r2, 4095({{%r2,%r3|%r3,%r2}}), 4 ++; CHECK: br %r14 ++ %add = add i64 %index, 4095 ++ %ptr = getelementptr i8 *%base, i64 %add ++ %res = call i32 @llvm.s390.lcbb(i8 *%ptr, i32 4) ++ ret i32 %res ++} ++ ++; LCBB with an out-of-range displacement. ++define i32 @test_lcbb4(i8 *%base) { ++; CHECK-LABEL: test_lcbb4: ++; CHECK: lcbb %r2, 0({{%r[1-5]}}), 5 ++; CHECK: br %r14 ++ %ptr = getelementptr i8 *%base, i64 4096 ++ %res = call i32 @llvm.s390.lcbb(i8 *%ptr, i32 5) ++ ret i32 %res ++} ++ ++; VLBB with the lowest M3 operand. ++define <16 x i8> @test_vlbb1(i8 *%ptr) { ++; CHECK-LABEL: test_vlbb1: ++; CHECK: vlbb %v24, 0(%r2), 0 ++; CHECK: br %r14 ++ %res = call <16 x i8> @llvm.s390.vlbb(i8 *%ptr, i32 0) ++ ret <16 x i8> %res ++} ++ ++; VLBB with the highest M3 operand. ++define <16 x i8> @test_vlbb2(i8 *%ptr) { ++; CHECK-LABEL: test_vlbb2: ++; CHECK: vlbb %v24, 0(%r2), 15 ++; CHECK: br %r14 ++ %res = call <16 x i8> @llvm.s390.vlbb(i8 *%ptr, i32 15) ++ ret <16 x i8> %res ++} ++ ++; VLBB with a displacement and index. ++define <16 x i8> @test_vlbb3(i8 *%base, i64 %index) { ++; CHECK-LABEL: test_vlbb3: ++; CHECK: vlbb %v24, 4095({{%r2,%r3|%r3,%r2}}), 4 ++; CHECK: br %r14 ++ %add = add i64 %index, 4095 ++ %ptr = getelementptr i8 *%base, i64 %add ++ %res = call <16 x i8> @llvm.s390.vlbb(i8 *%ptr, i32 4) ++ ret <16 x i8> %res ++} ++ ++; VLBB with an out-of-range displacement. ++define <16 x i8> @test_vlbb4(i8 *%base) { ++; CHECK-LABEL: test_vlbb4: ++; CHECK: vlbb %v24, 0({{%r[1-5]}}), 5 ++; CHECK: br %r14 ++ %ptr = getelementptr i8 *%base, i64 4096 ++ %res = call <16 x i8> @llvm.s390.vlbb(i8 *%ptr, i32 5) ++ ret <16 x i8> %res ++} ++ ++; VLL with the lowest in-range displacement. ++define <16 x i8> @test_vll1(i8 *%ptr, i32 %length) { ++; CHECK-LABEL: test_vll1: ++; CHECK: vll %v24, %r3, 0(%r2) ++; CHECK: br %r14 ++ %res = call <16 x i8> @llvm.s390.vll(i32 %length, i8 *%ptr) ++ ret <16 x i8> %res ++} ++ ++; VLL with the highest in-range displacement. ++define <16 x i8> @test_vll2(i8 *%base, i32 %length) { ++; CHECK-LABEL: test_vll2: ++; CHECK: vll %v24, %r3, 4095(%r2) ++; CHECK: br %r14 ++ %ptr = getelementptr i8 *%base, i64 4095 ++ %res = call <16 x i8> @llvm.s390.vll(i32 %length, i8 *%ptr) ++ ret <16 x i8> %res ++} ++ ++; VLL with an out-of-range displacementa. ++define <16 x i8> @test_vll3(i8 *%base, i32 %length) { ++; CHECK-LABEL: test_vll3: ++; CHECK: vll %v24, %r3, 0({{%r[1-5]}}) ++; CHECK: br %r14 ++ %ptr = getelementptr i8 *%base, i64 4096 ++ %res = call <16 x i8> @llvm.s390.vll(i32 %length, i8 *%ptr) ++ ret <16 x i8> %res ++} ++ ++; Check that VLL doesn't allow an index. ++define <16 x i8> @test_vll4(i8 *%base, i64 %index, i32 %length) { ++; CHECK-LABEL: test_vll4: ++; CHECK: vll %v24, %r4, 0({{%r[1-5]}}) ++; CHECK: br %r14 ++ %ptr = getelementptr i8 *%base, i64 %index ++ %res = call <16 x i8> @llvm.s390.vll(i32 %length, i8 *%ptr) ++ ret <16 x i8> %res ++} ++ ++; VPDI taking element 0 from each half. ++define <2 x i64> @test_vpdi1(<2 x i64> %a, <2 x i64> %b) { ++; CHECK-LABEL: test_vpdi1: ++; CHECK: vpdi %v24, %v24, %v26, 0 ++; CHECK: br %r14 ++ %res = call <2 x i64> @llvm.s390.vpdi(<2 x i64> %a, <2 x i64> %b, i32 0) ++ ret <2 x i64> %res ++} ++ ++; VPDI taking element 1 from each half. ++define <2 x i64> @test_vpdi2(<2 x i64> %a, <2 x i64> %b) { ++; CHECK-LABEL: test_vpdi2: ++; CHECK: vpdi %v24, %v24, %v26, 10 ++; CHECK: br %r14 ++ %res = call <2 x i64> @llvm.s390.vpdi(<2 x i64> %a, <2 x i64> %b, i32 10) ++ ret <2 x i64> %res ++} ++ ++; VPERM. ++define <16 x i8> @test_vperm(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) { ++; CHECK-LABEL: test_vperm: ++; CHECK: vperm %v24, %v24, %v26, %v28 ++; CHECK: br %r14 ++ %res = call <16 x i8> @llvm.s390.vperm(<16 x i8> %a, <16 x i8> %b, ++ <16 x i8> %c) ++ ret <16 x i8> %res ++} ++ ++; VPKSH. ++define <16 x i8> @test_vpksh(<8 x i16> %a, <8 x i16> %b) { ++; CHECK-LABEL: test_vpksh: ++; CHECK: vpksh %v24, %v24, %v26 ++; CHECK: br %r14 ++ %res = call <16 x i8> @llvm.s390.vpksh(<8 x i16> %a, <8 x i16> %b) ++ ret <16 x i8> %res ++} ++ ++; VPKSF. ++define <8 x i16> @test_vpksf(<4 x i32> %a, <4 x i32> %b) { ++; CHECK-LABEL: test_vpksf: ++; CHECK: vpksf %v24, %v24, %v26 ++; CHECK: br %r14 ++ %res = call <8 x i16> @llvm.s390.vpksf(<4 x i32> %a, <4 x i32> %b) ++ ret <8 x i16> %res ++} ++ ++; VPKSG. ++define <4 x i32> @test_vpksg(<2 x i64> %a, <2 x i64> %b) { ++; CHECK-LABEL: test_vpksg: ++; CHECK: vpksg %v24, %v24, %v26 ++; CHECK: br %r14 ++ %res = call <4 x i32> @llvm.s390.vpksg(<2 x i64> %a, <2 x i64> %b) ++ ret <4 x i32> %res ++} ++ ++; VPKSHS with no processing of the result. ++define <16 x i8> @test_vpkshs(<8 x i16> %a, <8 x i16> %b, i32 *%ccptr) { ++; CHECK-LABEL: test_vpkshs: ++; CHECK: vpkshs %v24, %v24, %v26 ++; CHECK: ipm [[REG:%r[0-5]]] ++; CHECK: srl [[REG]], 28 ++; CHECK: st [[REG]], 0(%r2) ++; CHECK: br %r14 ++ %call = call {<16 x i8>, i32} @llvm.s390.vpkshs(<8 x i16> %a, <8 x i16> %b) ++ %res = extractvalue {<16 x i8>, i32} %call, 0 ++ %cc = extractvalue {<16 x i8>, i32} %call, 1 ++ store i32 %cc, i32 *%ccptr ++ ret <16 x i8> %res ++} ++ ++; VPKSHS, storing to %ptr if all values were saturated. ++define <16 x i8> @test_vpkshs_all_store(<8 x i16> %a, <8 x i16> %b, i32 *%ptr) { ++; CHECK-LABEL: test_vpkshs_all_store: ++; CHECK: vpkshs %v24, %v24, %v26 ++; CHECK-NEXT: {{jno|jle}} {{\.L*}} ++; CHECK: mvhi 0(%r2), 0 ++; CHECK: br %r14 ++ %call = call {<16 x i8>, i32} @llvm.s390.vpkshs(<8 x i16> %a, <8 x i16> %b) ++ %res = extractvalue {<16 x i8>, i32} %call, 0 ++ %cc = extractvalue {<16 x i8>, i32} %call, 1 ++ %cmp = icmp uge i32 %cc, 3 ++ br i1 %cmp, label %store, label %exit ++ ++store: ++ store i32 0, i32 *%ptr ++ br label %exit ++ ++exit: ++ ret <16 x i8> %res ++} ++ ++; VPKSFS with no processing of the result. ++define <8 x i16> @test_vpksfs(<4 x i32> %a, <4 x i32> %b, i32 *%ccptr) { ++; CHECK-LABEL: test_vpksfs: ++; CHECK: vpksfs %v24, %v24, %v26 ++; CHECK: ipm [[REG:%r[0-5]]] ++; CHECK: srl [[REG]], 28 ++; CHECK: st [[REG]], 0(%r2) ++; CHECK: br %r14 ++ %call = call {<8 x i16>, i32} @llvm.s390.vpksfs(<4 x i32> %a, <4 x i32> %b) ++ %res = extractvalue {<8 x i16>, i32} %call, 0 ++ %cc = extractvalue {<8 x i16>, i32} %call, 1 ++ store i32 %cc, i32 *%ccptr ++ ret <8 x i16> %res ++} ++ ++; VPKSFS, storing to %ptr if any values were saturated. ++define <8 x i16> @test_vpksfs_any_store(<4 x i32> %a, <4 x i32> %b, i32 *%ptr) { ++; CHECK-LABEL: test_vpksfs_any_store: ++; CHECK: vpksfs %v24, %v24, %v26 ++; CHECK-NEXT: {{jhe|je}} {{\.L*}} ++; CHECK: mvhi 0(%r2), 0 ++; CHECK: br %r14 ++ %call = call {<8 x i16>, i32} @llvm.s390.vpksfs(<4 x i32> %a, <4 x i32> %b) ++ %res = extractvalue {<8 x i16>, i32} %call, 0 ++ %cc = extractvalue {<8 x i16>, i32} %call, 1 ++ %cmp = icmp ugt i32 %cc, 0 ++ br i1 %cmp, label %store, label %exit ++ ++store: ++ store i32 0, i32 *%ptr ++ br label %exit ++ ++exit: ++ ret <8 x i16> %res ++} ++ ++; VPKSGS with no processing of the result. ++define <4 x i32> @test_vpksgs(<2 x i64> %a, <2 x i64> %b, i32 *%ccptr) { ++; CHECK-LABEL: test_vpksgs: ++; CHECK: vpksgs %v24, %v24, %v26 ++; CHECK: ipm [[REG:%r[0-5]]] ++; CHECK: srl [[REG]], 28 ++; CHECK: st [[REG]], 0(%r2) ++; CHECK: br %r14 ++ %call = call {<4 x i32>, i32} @llvm.s390.vpksgs(<2 x i64> %a, <2 x i64> %b) ++ %res = extractvalue {<4 x i32>, i32} %call, 0 ++ %cc = extractvalue {<4 x i32>, i32} %call, 1 ++ store i32 %cc, i32 *%ccptr ++ ret <4 x i32> %res ++} ++ ++; VPKSGS, storing to %ptr if no elements were saturated ++define <4 x i32> @test_vpksgs_none_store(<2 x i64> %a, <2 x i64> %b, ++ i32 *%ptr) { ++; CHECK-LABEL: test_vpksgs_none_store: ++; CHECK: vpksgs %v24, %v24, %v26 ++; CHECK-NEXT: {{jnhe|jne}} {{\.L*}} ++; CHECK: mvhi 0(%r2), 0 ++; CHECK: br %r14 ++ %call = call {<4 x i32>, i32} @llvm.s390.vpksgs(<2 x i64> %a, <2 x i64> %b) ++ %res = extractvalue {<4 x i32>, i32} %call, 0 ++ %cc = extractvalue {<4 x i32>, i32} %call, 1 ++ %cmp = icmp sle i32 %cc, 0 ++ br i1 %cmp, label %store, label %exit ++ ++store: ++ store i32 0, i32 *%ptr ++ br label %exit ++ ++exit: ++ ret <4 x i32> %res ++} ++ ++; VPKLSH. ++define <16 x i8> @test_vpklsh(<8 x i16> %a, <8 x i16> %b) { ++; CHECK-LABEL: test_vpklsh: ++; CHECK: vpklsh %v24, %v24, %v26 ++; CHECK: br %r14 ++ %res = call <16 x i8> @llvm.s390.vpklsh(<8 x i16> %a, <8 x i16> %b) ++ ret <16 x i8> %res ++} ++ ++; VPKLSF. ++define <8 x i16> @test_vpklsf(<4 x i32> %a, <4 x i32> %b) { ++; CHECK-LABEL: test_vpklsf: ++; CHECK: vpklsf %v24, %v24, %v26 ++; CHECK: br %r14 ++ %res = call <8 x i16> @llvm.s390.vpklsf(<4 x i32> %a, <4 x i32> %b) ++ ret <8 x i16> %res ++} ++ ++; VPKLSG. ++define <4 x i32> @test_vpklsg(<2 x i64> %a, <2 x i64> %b) { ++; CHECK-LABEL: test_vpklsg: ++; CHECK: vpklsg %v24, %v24, %v26 ++; CHECK: br %r14 ++ %res = call <4 x i32> @llvm.s390.vpklsg(<2 x i64> %a, <2 x i64> %b) ++ ret <4 x i32> %res ++} ++ ++; VPKLSHS with no processing of the result. ++define <16 x i8> @test_vpklshs(<8 x i16> %a, <8 x i16> %b, i32 *%ccptr) { ++; CHECK-LABEL: test_vpklshs: ++; CHECK: vpklshs %v24, %v24, %v26 ++; CHECK: ipm [[REG:%r[0-5]]] ++; CHECK: srl [[REG]], 28 ++; CHECK: st [[REG]], 0(%r2) ++; CHECK: br %r14 ++ %call = call {<16 x i8>, i32} @llvm.s390.vpklshs(<8 x i16> %a, <8 x i16> %b) ++ %res = extractvalue {<16 x i8>, i32} %call, 0 ++ %cc = extractvalue {<16 x i8>, i32} %call, 1 ++ store i32 %cc, i32 *%ccptr ++ ret <16 x i8> %res ++} ++ ++; VPKLSHS, storing to %ptr if all values were saturated. ++define <16 x i8> @test_vpklshs_all_store(<8 x i16> %a, <8 x i16> %b, ++ i32 *%ptr) { ++; CHECK-LABEL: test_vpklshs_all_store: ++; CHECK: vpklshs %v24, %v24, %v26 ++; CHECK-NEXT: {{jno|jle}} {{\.L*}} ++; CHECK: mvhi 0(%r2), 0 ++; CHECK: br %r14 ++ %call = call {<16 x i8>, i32} @llvm.s390.vpklshs(<8 x i16> %a, <8 x i16> %b) ++ %res = extractvalue {<16 x i8>, i32} %call, 0 ++ %cc = extractvalue {<16 x i8>, i32} %call, 1 ++ %cmp = icmp eq i32 %cc, 3 ++ br i1 %cmp, label %store, label %exit ++ ++store: ++ store i32 0, i32 *%ptr ++ br label %exit ++ ++exit: ++ ret <16 x i8> %res ++} ++ ++; VPKLSFS with no processing of the result. ++define <8 x i16> @test_vpklsfs(<4 x i32> %a, <4 x i32> %b, i32 *%ccptr) { ++; CHECK-LABEL: test_vpklsfs: ++; CHECK: vpklsfs %v24, %v24, %v26 ++; CHECK: ipm [[REG:%r[0-5]]] ++; CHECK: srl [[REG]], 28 ++; CHECK: st [[REG]], 0(%r2) ++; CHECK: br %r14 ++ %call = call {<8 x i16>, i32} @llvm.s390.vpklsfs(<4 x i32> %a, <4 x i32> %b) ++ %res = extractvalue {<8 x i16>, i32} %call, 0 ++ %cc = extractvalue {<8 x i16>, i32} %call, 1 ++ store i32 %cc, i32 *%ccptr ++ ret <8 x i16> %res ++} ++ ++; VPKLSFS, storing to %ptr if any values were saturated. ++define <8 x i16> @test_vpklsfs_any_store(<4 x i32> %a, <4 x i32> %b, ++ i32 *%ptr) { ++; CHECK-LABEL: test_vpklsfs_any_store: ++; CHECK: vpklsfs %v24, %v24, %v26 ++; CHECK-NEXT: {{jhe|je}} {{\.L*}} ++; CHECK: mvhi 0(%r2), 0 ++; CHECK: br %r14 ++ %call = call {<8 x i16>, i32} @llvm.s390.vpklsfs(<4 x i32> %a, <4 x i32> %b) ++ %res = extractvalue {<8 x i16>, i32} %call, 0 ++ %cc = extractvalue {<8 x i16>, i32} %call, 1 ++ %cmp = icmp ne i32 %cc, 0 ++ br i1 %cmp, label %store, label %exit ++ ++store: ++ store i32 0, i32 *%ptr ++ br label %exit ++ ++exit: ++ ret <8 x i16> %res ++} ++ ++; VPKLSGS with no processing of the result. ++define <4 x i32> @test_vpklsgs(<2 x i64> %a, <2 x i64> %b, i32 *%ccptr) { ++; CHECK-LABEL: test_vpklsgs: ++; CHECK: vpklsgs %v24, %v24, %v26 ++; CHECK: ipm [[REG:%r[0-5]]] ++; CHECK: srl [[REG]], 28 ++; CHECK: st [[REG]], 0(%r2) ++; CHECK: br %r14 ++ %call = call {<4 x i32>, i32} @llvm.s390.vpklsgs(<2 x i64> %a, <2 x i64> %b) ++ %res = extractvalue {<4 x i32>, i32} %call, 0 ++ %cc = extractvalue {<4 x i32>, i32} %call, 1 ++ store i32 %cc, i32 *%ccptr ++ ret <4 x i32> %res ++} ++ ++; VPKLSGS, storing to %ptr if no elements were saturated ++define <4 x i32> @test_vpklsgs_none_store(<2 x i64> %a, <2 x i64> %b, ++ i32 *%ptr) { ++; CHECK-LABEL: test_vpklsgs_none_store: ++; CHECK: vpklsgs %v24, %v24, %v26 ++; CHECK-NEXT: {{jnhe|jne}} {{\.L*}} ++; CHECK: mvhi 0(%r2), 0 ++; CHECK: br %r14 ++ %call = call {<4 x i32>, i32} @llvm.s390.vpklsgs(<2 x i64> %a, <2 x i64> %b) ++ %res = extractvalue {<4 x i32>, i32} %call, 0 ++ %cc = extractvalue {<4 x i32>, i32} %call, 1 ++ %cmp = icmp eq i32 %cc, 0 ++ br i1 %cmp, label %store, label %exit ++ ++store: ++ store i32 0, i32 *%ptr ++ br label %exit ++ ++exit: ++ ret <4 x i32> %res ++} ++ ++; VSTL with the lowest in-range displacement. ++define void @test_vstl1(<16 x i8> %vec, i8 *%ptr, i32 %length) { ++; CHECK-LABEL: test_vstl1: ++; CHECK: vstl %v24, %r3, 0(%r2) ++; CHECK: br %r14 ++ call void @llvm.s390.vstl(<16 x i8> %vec, i32 %length, i8 *%ptr) ++ ret void ++} ++ ++; VSTL with the highest in-range displacement. ++define void @test_vstl2(<16 x i8> %vec, i8 *%base, i32 %length) { ++; CHECK-LABEL: test_vstl2: ++; CHECK: vstl %v24, %r3, 4095(%r2) ++; CHECK: br %r14 ++ %ptr = getelementptr i8 *%base, i64 4095 ++ call void @llvm.s390.vstl(<16 x i8> %vec, i32 %length, i8 *%ptr) ++ ret void ++} ++ ++; VSTL with an out-of-range displacement. ++define void @test_vstl3(<16 x i8> %vec, i8 *%base, i32 %length) { ++; CHECK-LABEL: test_vstl3: ++; CHECK: vstl %v24, %r3, 0({{%r[1-5]}}) ++; CHECK: br %r14 ++ %ptr = getelementptr i8 *%base, i64 4096 ++ call void @llvm.s390.vstl(<16 x i8> %vec, i32 %length, i8 *%ptr) ++ ret void ++} ++ ++; Check that VSTL doesn't allow an index. ++define void @test_vstl4(<16 x i8> %vec, i8 *%base, i64 %index, i32 %length) { ++; CHECK-LABEL: test_vstl4: ++; CHECK: vstl %v24, %r4, 0({{%r[1-5]}}) ++; CHECK: br %r14 ++ %ptr = getelementptr i8 *%base, i64 %index ++ call void @llvm.s390.vstl(<16 x i8> %vec, i32 %length, i8 *%ptr) ++ ret void ++} ++ ++; VUPHB. ++define <8 x i16> @test_vuphb(<16 x i8> %a) { ++; CHECK-LABEL: test_vuphb: ++; CHECK: vuphb %v24, %v24 ++; CHECK: br %r14 ++ %res = call <8 x i16> @llvm.s390.vuphb(<16 x i8> %a) ++ ret <8 x i16> %res ++} ++ ++; VUPHH. ++define <4 x i32> @test_vuphh(<8 x i16> %a) { ++; CHECK-LABEL: test_vuphh: ++; CHECK: vuphh %v24, %v24 ++; CHECK: br %r14 ++ %res = call <4 x i32> @llvm.s390.vuphh(<8 x i16> %a) ++ ret <4 x i32> %res ++} ++ ++; VUPHF. ++define <2 x i64> @test_vuphf(<4 x i32> %a) { ++; CHECK-LABEL: test_vuphf: ++; CHECK: vuphf %v24, %v24 ++; CHECK: br %r14 ++ %res = call <2 x i64> @llvm.s390.vuphf(<4 x i32> %a) ++ ret <2 x i64> %res ++} ++ ++; VUPLHB. ++define <8 x i16> @test_vuplhb(<16 x i8> %a) { ++; CHECK-LABEL: test_vuplhb: ++; CHECK: vuplhb %v24, %v24 ++; CHECK: br %r14 ++ %res = call <8 x i16> @llvm.s390.vuplhb(<16 x i8> %a) ++ ret <8 x i16> %res ++} ++ ++; VUPLHH. ++define <4 x i32> @test_vuplhh(<8 x i16> %a) { ++; CHECK-LABEL: test_vuplhh: ++; CHECK: vuplhh %v24, %v24 ++; CHECK: br %r14 ++ %res = call <4 x i32> @llvm.s390.vuplhh(<8 x i16> %a) ++ ret <4 x i32> %res ++} ++ ++; VUPLHF. ++define <2 x i64> @test_vuplhf(<4 x i32> %a) { ++; CHECK-LABEL: test_vuplhf: ++; CHECK: vuplhf %v24, %v24 ++; CHECK: br %r14 ++ %res = call <2 x i64> @llvm.s390.vuplhf(<4 x i32> %a) ++ ret <2 x i64> %res ++} ++ ++; VUPLB. ++define <8 x i16> @test_vuplb(<16 x i8> %a) { ++; CHECK-LABEL: test_vuplb: ++; CHECK: vuplb %v24, %v24 ++; CHECK: br %r14 ++ %res = call <8 x i16> @llvm.s390.vuplb(<16 x i8> %a) ++ ret <8 x i16> %res ++} ++ ++; VUPLHW. ++define <4 x i32> @test_vuplhw(<8 x i16> %a) { ++; CHECK-LABEL: test_vuplhw: ++; CHECK: vuplhw %v24, %v24 ++; CHECK: br %r14 ++ %res = call <4 x i32> @llvm.s390.vuplhw(<8 x i16> %a) ++ ret <4 x i32> %res ++} ++ ++; VUPLF. ++define <2 x i64> @test_vuplf(<4 x i32> %a) { ++; CHECK-LABEL: test_vuplf: ++; CHECK: vuplf %v24, %v24 ++; CHECK: br %r14 ++ %res = call <2 x i64> @llvm.s390.vuplf(<4 x i32> %a) ++ ret <2 x i64> %res ++} ++ ++; VUPLLB. ++define <8 x i16> @test_vupllb(<16 x i8> %a) { ++; CHECK-LABEL: test_vupllb: ++; CHECK: vupllb %v24, %v24 ++; CHECK: br %r14 ++ %res = call <8 x i16> @llvm.s390.vupllb(<16 x i8> %a) ++ ret <8 x i16> %res ++} ++ ++; VUPLLH. ++define <4 x i32> @test_vupllh(<8 x i16> %a) { ++; CHECK-LABEL: test_vupllh: ++; CHECK: vupllh %v24, %v24 ++; CHECK: br %r14 ++ %res = call <4 x i32> @llvm.s390.vupllh(<8 x i16> %a) ++ ret <4 x i32> %res ++} ++ ++; VUPLLF. ++define <2 x i64> @test_vupllf(<4 x i32> %a) { ++; CHECK-LABEL: test_vupllf: ++; CHECK: vupllf %v24, %v24 ++; CHECK: br %r14 ++ %res = call <2 x i64> @llvm.s390.vupllf(<4 x i32> %a) ++ ret <2 x i64> %res ++} ++ ++; VACCB. ++define <16 x i8> @test_vaccb(<16 x i8> %a, <16 x i8> %b) { ++; CHECK-LABEL: test_vaccb: ++; CHECK: vaccb %v24, %v24, %v26 ++; CHECK: br %r14 ++ %res = call <16 x i8> @llvm.s390.vaccb(<16 x i8> %a, <16 x i8> %b) ++ ret <16 x i8> %res ++} ++ ++; VACCH. ++define <8 x i16> @test_vacch(<8 x i16> %a, <8 x i16> %b) { ++; CHECK-LABEL: test_vacch: ++; CHECK: vacch %v24, %v24, %v26 ++; CHECK: br %r14 ++ %res = call <8 x i16> @llvm.s390.vacch(<8 x i16> %a, <8 x i16> %b) ++ ret <8 x i16> %res ++} ++ ++; VACCF. ++define <4 x i32> @test_vaccf(<4 x i32> %a, <4 x i32> %b) { ++; CHECK-LABEL: test_vaccf: ++; CHECK: vaccf %v24, %v24, %v26 ++; CHECK: br %r14 ++ %res = call <4 x i32> @llvm.s390.vaccf(<4 x i32> %a, <4 x i32> %b) ++ ret <4 x i32> %res ++} ++ ++; VACCG. ++define <2 x i64> @test_vaccg(<2 x i64> %a, <2 x i64> %b) { ++; CHECK-LABEL: test_vaccg: ++; CHECK: vaccg %v24, %v24, %v26 ++; CHECK: br %r14 ++ %res = call <2 x i64> @llvm.s390.vaccg(<2 x i64> %a, <2 x i64> %b) ++ ret <2 x i64> %res ++} ++ ++; VAQ. ++define <16 x i8> @test_vaq(<16 x i8> %a, <16 x i8> %b) { ++; CHECK-LABEL: test_vaq: ++; CHECK: vaq %v24, %v24, %v26 ++; CHECK: br %r14 ++ %res = call <16 x i8> @llvm.s390.vaq(<16 x i8> %a, <16 x i8> %b) ++ ret <16 x i8> %res ++} ++ ++; VACQ. ++define <16 x i8> @test_vacq(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) { ++; CHECK-LABEL: test_vacq: ++; CHECK: vacq %v24, %v24, %v26, %v28 ++; CHECK: br %r14 ++ %res = call <16 x i8> @llvm.s390.vacq(<16 x i8> %a, <16 x i8> %b, ++ <16 x i8> %c) ++ ret <16 x i8> %res ++} ++ ++; VACCQ. ++define <16 x i8> @test_vaccq(<16 x i8> %a, <16 x i8> %b) { ++; CHECK-LABEL: test_vaccq: ++; CHECK: vaccq %v24, %v24, %v26 ++; CHECK: br %r14 ++ %res = call <16 x i8> @llvm.s390.vaccq(<16 x i8> %a, <16 x i8> %b) ++ ret <16 x i8> %res ++} ++ ++; VACCCQ. ++define <16 x i8> @test_vacccq(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) { ++; CHECK-LABEL: test_vacccq: ++; CHECK: vacccq %v24, %v24, %v26, %v28 ++; CHECK: br %r14 ++ %res = call <16 x i8> @llvm.s390.vacccq(<16 x i8> %a, <16 x i8> %b, ++ <16 x i8> %c) ++ ret <16 x i8> %res ++} ++ ++; VAVGB. ++define <16 x i8> @test_vavgb(<16 x i8> %a, <16 x i8> %b) { ++; CHECK-LABEL: test_vavgb: ++; CHECK: vavgb %v24, %v24, %v26 ++; CHECK: br %r14 ++ %res = call <16 x i8> @llvm.s390.vavgb(<16 x i8> %a, <16 x i8> %b) ++ ret <16 x i8> %res ++} ++ ++; VAVGH. ++define <8 x i16> @test_vavgh(<8 x i16> %a, <8 x i16> %b) { ++; CHECK-LABEL: test_vavgh: ++; CHECK: vavgh %v24, %v24, %v26 ++; CHECK: br %r14 ++ %res = call <8 x i16> @llvm.s390.vavgh(<8 x i16> %a, <8 x i16> %b) ++ ret <8 x i16> %res ++} ++ ++; VAVGF. ++define <4 x i32> @test_vavgf(<4 x i32> %a, <4 x i32> %b) { ++; CHECK-LABEL: test_vavgf: ++; CHECK: vavgf %v24, %v24, %v26 ++; CHECK: br %r14 ++ %res = call <4 x i32> @llvm.s390.vavgf(<4 x i32> %a, <4 x i32> %b) ++ ret <4 x i32> %res ++} ++ ++; VAVGG. ++define <2 x i64> @test_vavgg(<2 x i64> %a, <2 x i64> %b) { ++; CHECK-LABEL: test_vavgg: ++; CHECK: vavgg %v24, %v24, %v26 ++; CHECK: br %r14 ++ %res = call <2 x i64> @llvm.s390.vavgg(<2 x i64> %a, <2 x i64> %b) ++ ret <2 x i64> %res ++} ++ ++; VAVGLB. ++define <16 x i8> @test_vavglb(<16 x i8> %a, <16 x i8> %b) { ++; CHECK-LABEL: test_vavglb: ++; CHECK: vavglb %v24, %v24, %v26 ++; CHECK: br %r14 ++ %res = call <16 x i8> @llvm.s390.vavglb(<16 x i8> %a, <16 x i8> %b) ++ ret <16 x i8> %res ++} ++ ++; VAVGLH. ++define <8 x i16> @test_vavglh(<8 x i16> %a, <8 x i16> %b) { ++; CHECK-LABEL: test_vavglh: ++; CHECK: vavglh %v24, %v24, %v26 ++; CHECK: br %r14 ++ %res = call <8 x i16> @llvm.s390.vavglh(<8 x i16> %a, <8 x i16> %b) ++ ret <8 x i16> %res ++} ++ ++; VAVGLF. ++define <4 x i32> @test_vavglf(<4 x i32> %a, <4 x i32> %b) { ++; CHECK-LABEL: test_vavglf: ++; CHECK: vavglf %v24, %v24, %v26 ++; CHECK: br %r14 ++ %res = call <4 x i32> @llvm.s390.vavglf(<4 x i32> %a, <4 x i32> %b) ++ ret <4 x i32> %res ++} ++ ++; VAVGLG. ++define <2 x i64> @test_vavglg(<2 x i64> %a, <2 x i64> %b) { ++; CHECK-LABEL: test_vavglg: ++; CHECK: vavglg %v24, %v24, %v26 ++; CHECK: br %r14 ++ %res = call <2 x i64> @llvm.s390.vavglg(<2 x i64> %a, <2 x i64> %b) ++ ret <2 x i64> %res ++} ++ ++; VCKSM. ++define <4 x i32> @test_vcksm(<4 x i32> %a, <4 x i32> %b) { ++; CHECK-LABEL: test_vcksm: ++; CHECK: vcksm %v24, %v24, %v26 ++; CHECK: br %r14 ++ %res = call <4 x i32> @llvm.s390.vcksm(<4 x i32> %a, <4 x i32> %b) ++ ret <4 x i32> %res ++} ++ ++; VGFMB. ++define <8 x i16> @test_vgfmb(<16 x i8> %a, <16 x i8> %b) { ++; CHECK-LABEL: test_vgfmb: ++; CHECK: vgfmb %v24, %v24, %v26 ++; CHECK: br %r14 ++ %res = call <8 x i16> @llvm.s390.vgfmb(<16 x i8> %a, <16 x i8> %b) ++ ret <8 x i16> %res ++} ++ ++; VGFMH. ++define <4 x i32> @test_vgfmh(<8 x i16> %a, <8 x i16> %b) { ++; CHECK-LABEL: test_vgfmh: ++; CHECK: vgfmh %v24, %v24, %v26 ++; CHECK: br %r14 ++ %res = call <4 x i32> @llvm.s390.vgfmh(<8 x i16> %a, <8 x i16> %b) ++ ret <4 x i32> %res ++} ++ ++; VGFMF. ++define <2 x i64> @test_vgfmf(<4 x i32> %a, <4 x i32> %b) { ++; CHECK-LABEL: test_vgfmf: ++; CHECK: vgfmf %v24, %v24, %v26 ++; CHECK: br %r14 ++ %res = call <2 x i64> @llvm.s390.vgfmf(<4 x i32> %a, <4 x i32> %b) ++ ret <2 x i64> %res ++} ++ ++; VGFMG. ++define <16 x i8> @test_vgfmg(<2 x i64> %a, <2 x i64> %b) { ++; CHECK-LABEL: test_vgfmg: ++; CHECK: vgfmg %v24, %v24, %v26 ++; CHECK: br %r14 ++ %res = call <16 x i8> @llvm.s390.vgfmg(<2 x i64> %a, <2 x i64> %b) ++ ret <16 x i8> %res ++} ++ ++; VGFMAB. ++define <8 x i16> @test_vgfmab(<16 x i8> %a, <16 x i8> %b, <8 x i16> %c) { ++; CHECK-LABEL: test_vgfmab: ++; CHECK: vgfmab %v24, %v24, %v26, %v28 ++; CHECK: br %r14 ++ %res = call <8 x i16> @llvm.s390.vgfmab(<16 x i8> %a, <16 x i8> %b, ++ <8 x i16> %c) ++ ret <8 x i16> %res ++} ++ ++; VGFMAH. ++define <4 x i32> @test_vgfmah(<8 x i16> %a, <8 x i16> %b, <4 x i32> %c) { ++; CHECK-LABEL: test_vgfmah: ++; CHECK: vgfmah %v24, %v24, %v26, %v28 ++; CHECK: br %r14 ++ %res = call <4 x i32> @llvm.s390.vgfmah(<8 x i16> %a, <8 x i16> %b, ++ <4 x i32> %c) ++ ret <4 x i32> %res ++} ++ ++; VGFMAF. ++define <2 x i64> @test_vgfmaf(<4 x i32> %a, <4 x i32> %b, <2 x i64> %c) { ++; CHECK-LABEL: test_vgfmaf: ++; CHECK: vgfmaf %v24, %v24, %v26, %v28 ++; CHECK: br %r14 ++ %res = call <2 x i64> @llvm.s390.vgfmaf(<4 x i32> %a, <4 x i32> %b, ++ <2 x i64> %c) ++ ret <2 x i64> %res ++} ++ ++; VGFMAG. ++define <16 x i8> @test_vgfmag(<2 x i64> %a, <2 x i64> %b, <16 x i8> %c) { ++; CHECK-LABEL: test_vgfmag: ++; CHECK: vgfmag %v24, %v24, %v26, %v28 ++; CHECK: br %r14 ++ %res = call <16 x i8> @llvm.s390.vgfmag(<2 x i64> %a, <2 x i64> %b, ++ <16 x i8> %c) ++ ret <16 x i8> %res ++} ++ ++; VMAHB. ++define <16 x i8> @test_vmahb(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) { ++; CHECK-LABEL: test_vmahb: ++; CHECK: vmahb %v24, %v24, %v26, %v28 ++; CHECK: br %r14 ++ %res = call <16 x i8> @llvm.s390.vmahb(<16 x i8> %a, <16 x i8> %b, ++ <16 x i8> %c) ++ ret <16 x i8> %res ++} ++ ++; VMAHH. ++define <8 x i16> @test_vmahh(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) { ++; CHECK-LABEL: test_vmahh: ++; CHECK: vmahh %v24, %v24, %v26, %v28 ++; CHECK: br %r14 ++ %res = call <8 x i16> @llvm.s390.vmahh(<8 x i16> %a, <8 x i16> %b, ++ <8 x i16> %c) ++ ret <8 x i16> %res ++} ++ ++; VMAHF. ++define <4 x i32> @test_vmahf(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ++; CHECK-LABEL: test_vmahf: ++; CHECK: vmahf %v24, %v24, %v26, %v28 ++; CHECK: br %r14 ++ %res = call <4 x i32> @llvm.s390.vmahf(<4 x i32> %a, <4 x i32> %b, ++ <4 x i32> %c) ++ ret <4 x i32> %res ++} ++ ++; VMALHB. ++define <16 x i8> @test_vmalhb(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) { ++; CHECK-LABEL: test_vmalhb: ++; CHECK: vmalhb %v24, %v24, %v26, %v28 ++; CHECK: br %r14 ++ %res = call <16 x i8> @llvm.s390.vmalhb(<16 x i8> %a, <16 x i8> %b, ++ <16 x i8> %c) ++ ret <16 x i8> %res ++} ++ ++; VMALHH. ++define <8 x i16> @test_vmalhh(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) { ++; CHECK-LABEL: test_vmalhh: ++; CHECK: vmalhh %v24, %v24, %v26, %v28 ++; CHECK: br %r14 ++ %res = call <8 x i16> @llvm.s390.vmalhh(<8 x i16> %a, <8 x i16> %b, ++ <8 x i16> %c) ++ ret <8 x i16> %res ++} ++ ++; VMALHF. ++define <4 x i32> @test_vmalhf(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ++; CHECK-LABEL: test_vmalhf: ++; CHECK: vmalhf %v24, %v24, %v26, %v28 ++; CHECK: br %r14 ++ %res = call <4 x i32> @llvm.s390.vmalhf(<4 x i32> %a, <4 x i32> %b, ++ <4 x i32> %c) ++ ret <4 x i32> %res ++} ++ ++; VMAEB. ++define <8 x i16> @test_vmaeb(<16 x i8> %a, <16 x i8> %b, <8 x i16> %c) { ++; CHECK-LABEL: test_vmaeb: ++; CHECK: vmaeb %v24, %v24, %v26, %v28 ++; CHECK: br %r14 ++ %res = call <8 x i16> @llvm.s390.vmaeb(<16 x i8> %a, <16 x i8> %b, ++ <8 x i16> %c) ++ ret <8 x i16> %res ++} ++ ++; VMAEH. ++define <4 x i32> @test_vmaeh(<8 x i16> %a, <8 x i16> %b, <4 x i32> %c) { ++; CHECK-LABEL: test_vmaeh: ++; CHECK: vmaeh %v24, %v24, %v26, %v28 ++; CHECK: br %r14 ++ %res = call <4 x i32> @llvm.s390.vmaeh(<8 x i16> %a, <8 x i16> %b, ++ <4 x i32> %c) ++ ret <4 x i32> %res ++} ++ ++; VMAEF. ++define <2 x i64> @test_vmaef(<4 x i32> %a, <4 x i32> %b, <2 x i64> %c) { ++; CHECK-LABEL: test_vmaef: ++; CHECK: vmaef %v24, %v24, %v26, %v28 ++; CHECK: br %r14 ++ %res = call <2 x i64> @llvm.s390.vmaef(<4 x i32> %a, <4 x i32> %b, ++ <2 x i64> %c) ++ ret <2 x i64> %res ++} ++ ++; VMALEB. ++define <8 x i16> @test_vmaleb(<16 x i8> %a, <16 x i8> %b, <8 x i16> %c) { ++; CHECK-LABEL: test_vmaleb: ++; CHECK: vmaleb %v24, %v24, %v26, %v28 ++; CHECK: br %r14 ++ %res = call <8 x i16> @llvm.s390.vmaleb(<16 x i8> %a, <16 x i8> %b, ++ <8 x i16> %c) ++ ret <8 x i16> %res ++} ++ ++; VMALEH. ++define <4 x i32> @test_vmaleh(<8 x i16> %a, <8 x i16> %b, <4 x i32> %c) { ++; CHECK-LABEL: test_vmaleh: ++; CHECK: vmaleh %v24, %v24, %v26, %v28 ++; CHECK: br %r14 ++ %res = call <4 x i32> @llvm.s390.vmaleh(<8 x i16> %a, <8 x i16> %b, ++ <4 x i32> %c) ++ ret <4 x i32> %res ++} ++ ++; VMALEF. ++define <2 x i64> @test_vmalef(<4 x i32> %a, <4 x i32> %b, <2 x i64> %c) { ++; CHECK-LABEL: test_vmalef: ++; CHECK: vmalef %v24, %v24, %v26, %v28 ++; CHECK: br %r14 ++ %res = call <2 x i64> @llvm.s390.vmalef(<4 x i32> %a, <4 x i32> %b, ++ <2 x i64> %c) ++ ret <2 x i64> %res ++} ++ ++; VMAOB. ++define <8 x i16> @test_vmaob(<16 x i8> %a, <16 x i8> %b, <8 x i16> %c) { ++; CHECK-LABEL: test_vmaob: ++; CHECK: vmaob %v24, %v24, %v26, %v28 ++; CHECK: br %r14 ++ %res = call <8 x i16> @llvm.s390.vmaob(<16 x i8> %a, <16 x i8> %b, ++ <8 x i16> %c) ++ ret <8 x i16> %res ++} ++ ++; VMAOH. ++define <4 x i32> @test_vmaoh(<8 x i16> %a, <8 x i16> %b, <4 x i32> %c) { ++; CHECK-LABEL: test_vmaoh: ++; CHECK: vmaoh %v24, %v24, %v26, %v28 ++; CHECK: br %r14 ++ %res = call <4 x i32> @llvm.s390.vmaoh(<8 x i16> %a, <8 x i16> %b, ++ <4 x i32> %c) ++ ret <4 x i32> %res ++} ++ ++; VMAOF. ++define <2 x i64> @test_vmaof(<4 x i32> %a, <4 x i32> %b, <2 x i64> %c) { ++; CHECK-LABEL: test_vmaof: ++; CHECK: vmaof %v24, %v24, %v26, %v28 ++; CHECK: br %r14 ++ %res = call <2 x i64> @llvm.s390.vmaof(<4 x i32> %a, <4 x i32> %b, ++ <2 x i64> %c) ++ ret <2 x i64> %res ++} ++ ++; VMALOB. ++define <8 x i16> @test_vmalob(<16 x i8> %a, <16 x i8> %b, <8 x i16> %c) { ++; CHECK-LABEL: test_vmalob: ++; CHECK: vmalob %v24, %v24, %v26, %v28 ++; CHECK: br %r14 ++ %res = call <8 x i16> @llvm.s390.vmalob(<16 x i8> %a, <16 x i8> %b, ++ <8 x i16> %c) ++ ret <8 x i16> %res ++} ++ ++; VMALOH. ++define <4 x i32> @test_vmaloh(<8 x i16> %a, <8 x i16> %b, <4 x i32> %c) { ++; CHECK-LABEL: test_vmaloh: ++; CHECK: vmaloh %v24, %v24, %v26, %v28 ++; CHECK: br %r14 ++ %res = call <4 x i32> @llvm.s390.vmaloh(<8 x i16> %a, <8 x i16> %b, ++ <4 x i32> %c) ++ ret <4 x i32> %res ++} ++ ++; VMALOF. ++define <2 x i64> @test_vmalof(<4 x i32> %a, <4 x i32> %b, <2 x i64> %c) { ++; CHECK-LABEL: test_vmalof: ++; CHECK: vmalof %v24, %v24, %v26, %v28 ++; CHECK: br %r14 ++ %res = call <2 x i64> @llvm.s390.vmalof(<4 x i32> %a, <4 x i32> %b, ++ <2 x i64> %c) ++ ret <2 x i64> %res ++} ++ ++; VMHB. ++define <16 x i8> @test_vmhb(<16 x i8> %a, <16 x i8> %b) { ++; CHECK-LABEL: test_vmhb: ++; CHECK: vmhb %v24, %v24, %v26 ++; CHECK: br %r14 ++ %res = call <16 x i8> @llvm.s390.vmhb(<16 x i8> %a, <16 x i8> %b) ++ ret <16 x i8> %res ++} ++ ++; VMHH. ++define <8 x i16> @test_vmhh(<8 x i16> %a, <8 x i16> %b) { ++; CHECK-LABEL: test_vmhh: ++; CHECK: vmhh %v24, %v24, %v26 ++; CHECK: br %r14 ++ %res = call <8 x i16> @llvm.s390.vmhh(<8 x i16> %a, <8 x i16> %b) ++ ret <8 x i16> %res ++} ++ ++; VMHF. ++define <4 x i32> @test_vmhf(<4 x i32> %a, <4 x i32> %b) { ++; CHECK-LABEL: test_vmhf: ++; CHECK: vmhf %v24, %v24, %v26 ++; CHECK: br %r14 ++ %res = call <4 x i32> @llvm.s390.vmhf(<4 x i32> %a, <4 x i32> %b) ++ ret <4 x i32> %res ++} ++ ++; VMLHB. ++define <16 x i8> @test_vmlhb(<16 x i8> %a, <16 x i8> %b) { ++; CHECK-LABEL: test_vmlhb: ++; CHECK: vmlhb %v24, %v24, %v26 ++; CHECK: br %r14 ++ %res = call <16 x i8> @llvm.s390.vmlhb(<16 x i8> %a, <16 x i8> %b) ++ ret <16 x i8> %res ++} ++ ++; VMLHH. ++define <8 x i16> @test_vmlhh(<8 x i16> %a, <8 x i16> %b) { ++; CHECK-LABEL: test_vmlhh: ++; CHECK: vmlhh %v24, %v24, %v26 ++; CHECK: br %r14 ++ %res = call <8 x i16> @llvm.s390.vmlhh(<8 x i16> %a, <8 x i16> %b) ++ ret <8 x i16> %res ++} ++ ++; VMLHF. ++define <4 x i32> @test_vmlhf(<4 x i32> %a, <4 x i32> %b) { ++; CHECK-LABEL: test_vmlhf: ++; CHECK: vmlhf %v24, %v24, %v26 ++; CHECK: br %r14 ++ %res = call <4 x i32> @llvm.s390.vmlhf(<4 x i32> %a, <4 x i32> %b) ++ ret <4 x i32> %res ++} ++ ++; VMEB. ++define <8 x i16> @test_vmeb(<16 x i8> %a, <16 x i8> %b) { ++; CHECK-LABEL: test_vmeb: ++; CHECK: vmeb %v24, %v24, %v26 ++; CHECK: br %r14 ++ %res = call <8 x i16> @llvm.s390.vmeb(<16 x i8> %a, <16 x i8> %b) ++ ret <8 x i16> %res ++} ++ ++; VMEH. ++define <4 x i32> @test_vmeh(<8 x i16> %a, <8 x i16> %b) { ++; CHECK-LABEL: test_vmeh: ++; CHECK: vmeh %v24, %v24, %v26 ++; CHECK: br %r14 ++ %res = call <4 x i32> @llvm.s390.vmeh(<8 x i16> %a, <8 x i16> %b) ++ ret <4 x i32> %res ++} ++ ++; VMEF. ++define <2 x i64> @test_vmef(<4 x i32> %a, <4 x i32> %b) { ++; CHECK-LABEL: test_vmef: ++; CHECK: vmef %v24, %v24, %v26 ++; CHECK: br %r14 ++ %res = call <2 x i64> @llvm.s390.vmef(<4 x i32> %a, <4 x i32> %b) ++ ret <2 x i64> %res ++} ++ ++; VMLEB. ++define <8 x i16> @test_vmleb(<16 x i8> %a, <16 x i8> %b) { ++; CHECK-LABEL: test_vmleb: ++; CHECK: vmleb %v24, %v24, %v26 ++; CHECK: br %r14 ++ %res = call <8 x i16> @llvm.s390.vmleb(<16 x i8> %a, <16 x i8> %b) ++ ret <8 x i16> %res ++} ++ ++; VMLEH. ++define <4 x i32> @test_vmleh(<8 x i16> %a, <8 x i16> %b) { ++; CHECK-LABEL: test_vmleh: ++; CHECK: vmleh %v24, %v24, %v26 ++; CHECK: br %r14 ++ %res = call <4 x i32> @llvm.s390.vmleh(<8 x i16> %a, <8 x i16> %b) ++ ret <4 x i32> %res ++} ++ ++; VMLEF. ++define <2 x i64> @test_vmlef(<4 x i32> %a, <4 x i32> %b) { ++; CHECK-LABEL: test_vmlef: ++; CHECK: vmlef %v24, %v24, %v26 ++; CHECK: br %r14 ++ %res = call <2 x i64> @llvm.s390.vmlef(<4 x i32> %a, <4 x i32> %b) ++ ret <2 x i64> %res ++} ++ ++; VMOB. ++define <8 x i16> @test_vmob(<16 x i8> %a, <16 x i8> %b) { ++; CHECK-LABEL: test_vmob: ++; CHECK: vmob %v24, %v24, %v26 ++; CHECK: br %r14 ++ %res = call <8 x i16> @llvm.s390.vmob(<16 x i8> %a, <16 x i8> %b) ++ ret <8 x i16> %res ++} ++ ++; VMOH. ++define <4 x i32> @test_vmoh(<8 x i16> %a, <8 x i16> %b) { ++; CHECK-LABEL: test_vmoh: ++; CHECK: vmoh %v24, %v24, %v26 ++; CHECK: br %r14 ++ %res = call <4 x i32> @llvm.s390.vmoh(<8 x i16> %a, <8 x i16> %b) ++ ret <4 x i32> %res ++} ++ ++; VMOF. ++define <2 x i64> @test_vmof(<4 x i32> %a, <4 x i32> %b) { ++; CHECK-LABEL: test_vmof: ++; CHECK: vmof %v24, %v24, %v26 ++; CHECK: br %r14 ++ %res = call <2 x i64> @llvm.s390.vmof(<4 x i32> %a, <4 x i32> %b) ++ ret <2 x i64> %res ++} ++ ++; VMLOB. ++define <8 x i16> @test_vmlob(<16 x i8> %a, <16 x i8> %b) { ++; CHECK-LABEL: test_vmlob: ++; CHECK: vmlob %v24, %v24, %v26 ++; CHECK: br %r14 ++ %res = call <8 x i16> @llvm.s390.vmlob(<16 x i8> %a, <16 x i8> %b) ++ ret <8 x i16> %res ++} ++ ++; VMLOH. ++define <4 x i32> @test_vmloh(<8 x i16> %a, <8 x i16> %b) { ++; CHECK-LABEL: test_vmloh: ++; CHECK: vmloh %v24, %v24, %v26 ++; CHECK: br %r14 ++ %res = call <4 x i32> @llvm.s390.vmloh(<8 x i16> %a, <8 x i16> %b) ++ ret <4 x i32> %res ++} ++ ++; VMLOF. ++define <2 x i64> @test_vmlof(<4 x i32> %a, <4 x i32> %b) { ++; CHECK-LABEL: test_vmlof: ++; CHECK: vmlof %v24, %v24, %v26 ++; CHECK: br %r14 ++ %res = call <2 x i64> @llvm.s390.vmlof(<4 x i32> %a, <4 x i32> %b) ++ ret <2 x i64> %res ++} ++ ++; VERLLVB. ++define <16 x i8> @test_verllvb(<16 x i8> %a, <16 x i8> %b) { ++; CHECK-LABEL: test_verllvb: ++; CHECK: verllvb %v24, %v24, %v26 ++; CHECK: br %r14 ++ %res = call <16 x i8> @llvm.s390.verllvb(<16 x i8> %a, <16 x i8> %b) ++ ret <16 x i8> %res ++} ++ ++; VERLLVH. ++define <8 x i16> @test_verllvh(<8 x i16> %a, <8 x i16> %b) { ++; CHECK-LABEL: test_verllvh: ++; CHECK: verllvh %v24, %v24, %v26 ++; CHECK: br %r14 ++ %res = call <8 x i16> @llvm.s390.verllvh(<8 x i16> %a, <8 x i16> %b) ++ ret <8 x i16> %res ++} ++ ++; VERLLVF. ++define <4 x i32> @test_verllvf(<4 x i32> %a, <4 x i32> %b) { ++; CHECK-LABEL: test_verllvf: ++; CHECK: verllvf %v24, %v24, %v26 ++; CHECK: br %r14 ++ %res = call <4 x i32> @llvm.s390.verllvf(<4 x i32> %a, <4 x i32> %b) ++ ret <4 x i32> %res ++} ++ ++; VERLLVG. ++define <2 x i64> @test_verllvg(<2 x i64> %a, <2 x i64> %b) { ++; CHECK-LABEL: test_verllvg: ++; CHECK: verllvg %v24, %v24, %v26 ++; CHECK: br %r14 ++ %res = call <2 x i64> @llvm.s390.verllvg(<2 x i64> %a, <2 x i64> %b) ++ ret <2 x i64> %res ++} ++ ++; VERLLB. ++define <16 x i8> @test_verllb(<16 x i8> %a, i32 %b) { ++; CHECK-LABEL: test_verllb: ++; CHECK: verllb %v24, %v24, 0(%r2) ++; CHECK: br %r14 ++ %res = call <16 x i8> @llvm.s390.verllb(<16 x i8> %a, i32 %b) ++ ret <16 x i8> %res ++} ++ ++; VERLLH. ++define <8 x i16> @test_verllh(<8 x i16> %a, i32 %b) { ++; CHECK-LABEL: test_verllh: ++; CHECK: verllh %v24, %v24, 0(%r2) ++; CHECK: br %r14 ++ %res = call <8 x i16> @llvm.s390.verllh(<8 x i16> %a, i32 %b) ++ ret <8 x i16> %res ++} ++ ++; VERLLF. ++define <4 x i32> @test_verllf(<4 x i32> %a, i32 %b) { ++; CHECK-LABEL: test_verllf: ++; CHECK: verllf %v24, %v24, 0(%r2) ++; CHECK: br %r14 ++ %res = call <4 x i32> @llvm.s390.verllf(<4 x i32> %a, i32 %b) ++ ret <4 x i32> %res ++} ++ ++; VERLLG. ++define <2 x i64> @test_verllg(<2 x i64> %a, i32 %b) { ++; CHECK-LABEL: test_verllg: ++; CHECK: verllg %v24, %v24, 0(%r2) ++; CHECK: br %r14 ++ %res = call <2 x i64> @llvm.s390.verllg(<2 x i64> %a, i32 %b) ++ ret <2 x i64> %res ++} ++ ++; VERLLB with the smallest count. ++define <16 x i8> @test_verllb_1(<16 x i8> %a) { ++; CHECK-LABEL: test_verllb_1: ++; CHECK: verllb %v24, %v24, 1 ++; CHECK: br %r14 ++ %res = call <16 x i8> @llvm.s390.verllb(<16 x i8> %a, i32 1) ++ ret <16 x i8> %res ++} ++ ++; VERLLB with the largest count. ++define <16 x i8> @test_verllb_4095(<16 x i8> %a) { ++; CHECK-LABEL: test_verllb_4095: ++; CHECK: verllb %v24, %v24, 4095 ++; CHECK: br %r14 ++ %res = call <16 x i8> @llvm.s390.verllb(<16 x i8> %a, i32 4095) ++ ret <16 x i8> %res ++} ++ ++; VERLLB with the largest count + 1. ++define <16 x i8> @test_verllb_4096(<16 x i8> %a) { ++; CHECK-LABEL: test_verllb_4096: ++; CHECK: lhi [[REG:%r[1-5]]], 4096 ++; CHECK: verllb %v24, %v24, 0([[REG]]) ++; CHECK: br %r14 ++ %res = call <16 x i8> @llvm.s390.verllb(<16 x i8> %a, i32 4096) ++ ret <16 x i8> %res ++} ++ ++; VERIMB. ++define <16 x i8> @test_verimb(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) { ++; CHECK-LABEL: test_verimb: ++; CHECK: verimb %v24, %v26, %v28, 1 ++; CHECK: br %r14 ++ %res = call <16 x i8> @llvm.s390.verimb(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, i32 1) ++ ret <16 x i8> %res ++} ++ ++; VERIMH. ++define <8 x i16> @test_verimh(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) { ++; CHECK-LABEL: test_verimh: ++; CHECK: verimh %v24, %v26, %v28, 1 ++; CHECK: br %r14 ++ %res = call <8 x i16> @llvm.s390.verimh(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c, i32 1) ++ ret <8 x i16> %res ++} ++ ++; VERIMF. ++define <4 x i32> @test_verimf(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ++; CHECK-LABEL: test_verimf: ++; CHECK: verimf %v24, %v26, %v28, 1 ++; CHECK: br %r14 ++ %res = call <4 x i32> @llvm.s390.verimf(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, i32 1) ++ ret <4 x i32> %res ++} ++ ++; VERIMG. ++define <2 x i64> @test_verimg(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) { ++; CHECK-LABEL: test_verimg: ++; CHECK: verimg %v24, %v26, %v28, 1 ++; CHECK: br %r14 ++ %res = call <2 x i64> @llvm.s390.verimg(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c, i32 1) ++ ret <2 x i64> %res ++} ++ ++; VERIMB with a different mask. ++define <16 x i8> @test_verimb_254(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) { ++; CHECK-LABEL: test_verimb_254: ++; CHECK: verimb %v24, %v26, %v28, 254 ++; CHECK: br %r14 ++ %res = call <16 x i8> @llvm.s390.verimb(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, i32 254) ++ ret <16 x i8> %res ++} ++ ++; VSL. ++define <16 x i8> @test_vsl(<16 x i8> %a, <16 x i8> %b) { ++; CHECK-LABEL: test_vsl: ++; CHECK: vsl %v24, %v24, %v26 ++; CHECK: br %r14 ++ %res = call <16 x i8> @llvm.s390.vsl(<16 x i8> %a, <16 x i8> %b) ++ ret <16 x i8> %res ++} ++ ++; VSLB. ++define <16 x i8> @test_vslb(<16 x i8> %a, <16 x i8> %b) { ++; CHECK-LABEL: test_vslb: ++; CHECK: vslb %v24, %v24, %v26 ++; CHECK: br %r14 ++ %res = call <16 x i8> @llvm.s390.vslb(<16 x i8> %a, <16 x i8> %b) ++ ret <16 x i8> %res ++} ++ ++; VSRA. ++define <16 x i8> @test_vsra(<16 x i8> %a, <16 x i8> %b) { ++; CHECK-LABEL: test_vsra: ++; CHECK: vsra %v24, %v24, %v26 ++; CHECK: br %r14 ++ %res = call <16 x i8> @llvm.s390.vsra(<16 x i8> %a, <16 x i8> %b) ++ ret <16 x i8> %res ++} ++ ++; VSRAB. ++define <16 x i8> @test_vsrab(<16 x i8> %a, <16 x i8> %b) { ++; CHECK-LABEL: test_vsrab: ++; CHECK: vsrab %v24, %v24, %v26 ++; CHECK: br %r14 ++ %res = call <16 x i8> @llvm.s390.vsrab(<16 x i8> %a, <16 x i8> %b) ++ ret <16 x i8> %res ++} ++ ++; VSRL. ++define <16 x i8> @test_vsrl(<16 x i8> %a, <16 x i8> %b) { ++; CHECK-LABEL: test_vsrl: ++; CHECK: vsrl %v24, %v24, %v26 ++; CHECK: br %r14 ++ %res = call <16 x i8> @llvm.s390.vsrl(<16 x i8> %a, <16 x i8> %b) ++ ret <16 x i8> %res ++} ++ ++; VSRLB. ++define <16 x i8> @test_vsrlb(<16 x i8> %a, <16 x i8> %b) { ++; CHECK-LABEL: test_vsrlb: ++; CHECK: vsrlb %v24, %v24, %v26 ++; CHECK: br %r14 ++ %res = call <16 x i8> @llvm.s390.vsrlb(<16 x i8> %a, <16 x i8> %b) ++ ret <16 x i8> %res ++} ++ ++; VSLDB with the minimum useful value. ++define <16 x i8> @test_vsldb_1(<16 x i8> %a, <16 x i8> %b) { ++; CHECK-LABEL: test_vsldb_1: ++; CHECK: vsldb %v24, %v24, %v26, 1 ++; CHECK: br %r14 ++ %res = call <16 x i8> @llvm.s390.vsldb(<16 x i8> %a, <16 x i8> %b, i32 1) ++ ret <16 x i8> %res ++} ++ ++; VSLDB with the maximum value. ++define <16 x i8> @test_vsldb_15(<16 x i8> %a, <16 x i8> %b) { ++; CHECK-LABEL: test_vsldb_15: ++; CHECK: vsldb %v24, %v24, %v26, 15 ++; CHECK: br %r14 ++ %res = call <16 x i8> @llvm.s390.vsldb(<16 x i8> %a, <16 x i8> %b, i32 15) ++ ret <16 x i8> %res ++} ++ ++; VSCBIB. ++define <16 x i8> @test_vscbib(<16 x i8> %a, <16 x i8> %b) { ++; CHECK-LABEL: test_vscbib: ++; CHECK: vscbib %v24, %v24, %v26 ++; CHECK: br %r14 ++ %res = call <16 x i8> @llvm.s390.vscbib(<16 x i8> %a, <16 x i8> %b) ++ ret <16 x i8> %res ++} ++ ++; VSCBIH. ++define <8 x i16> @test_vscbih(<8 x i16> %a, <8 x i16> %b) { ++; CHECK-LABEL: test_vscbih: ++; CHECK: vscbih %v24, %v24, %v26 ++; CHECK: br %r14 ++ %res = call <8 x i16> @llvm.s390.vscbih(<8 x i16> %a, <8 x i16> %b) ++ ret <8 x i16> %res ++} ++ ++; VSCBIF. ++define <4 x i32> @test_vscbif(<4 x i32> %a, <4 x i32> %b) { ++; CHECK-LABEL: test_vscbif: ++; CHECK: vscbif %v24, %v24, %v26 ++; CHECK: br %r14 ++ %res = call <4 x i32> @llvm.s390.vscbif(<4 x i32> %a, <4 x i32> %b) ++ ret <4 x i32> %res ++} ++ ++; VSCBIG. ++define <2 x i64> @test_vscbig(<2 x i64> %a, <2 x i64> %b) { ++; CHECK-LABEL: test_vscbig: ++; CHECK: vscbig %v24, %v24, %v26 ++; CHECK: br %r14 ++ %res = call <2 x i64> @llvm.s390.vscbig(<2 x i64> %a, <2 x i64> %b) ++ ret <2 x i64> %res ++} ++ ++; VSQ. ++define <16 x i8> @test_vsq(<16 x i8> %a, <16 x i8> %b) { ++; CHECK-LABEL: test_vsq: ++; CHECK: vsq %v24, %v24, %v26 ++; CHECK: br %r14 ++ %res = call <16 x i8> @llvm.s390.vsq(<16 x i8> %a, <16 x i8> %b) ++ ret <16 x i8> %res ++} ++ ++; VSBIQ. ++define <16 x i8> @test_vsbiq(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) { ++; CHECK-LABEL: test_vsbiq: ++; CHECK: vsbiq %v24, %v24, %v26, %v28 ++; CHECK: br %r14 ++ %res = call <16 x i8> @llvm.s390.vsbiq(<16 x i8> %a, <16 x i8> %b, ++ <16 x i8> %c) ++ ret <16 x i8> %res ++} ++ ++; VSCBIQ. ++define <16 x i8> @test_vscbiq(<16 x i8> %a, <16 x i8> %b) { ++; CHECK-LABEL: test_vscbiq: ++; CHECK: vscbiq %v24, %v24, %v26 ++; CHECK: br %r14 ++ %res = call <16 x i8> @llvm.s390.vscbiq(<16 x i8> %a, <16 x i8> %b) ++ ret <16 x i8> %res ++} ++ ++; VSBCBIQ. ++define <16 x i8> @test_vsbcbiq(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) { ++; CHECK-LABEL: test_vsbcbiq: ++; CHECK: vsbcbiq %v24, %v24, %v26, %v28 ++; CHECK: br %r14 ++ %res = call <16 x i8> @llvm.s390.vsbcbiq(<16 x i8> %a, <16 x i8> %b, ++ <16 x i8> %c) ++ ret <16 x i8> %res ++} ++ ++; VSUMB. ++define <4 x i32> @test_vsumb(<16 x i8> %a, <16 x i8> %b) { ++; CHECK-LABEL: test_vsumb: ++; CHECK: vsumb %v24, %v24, %v26 ++; CHECK: br %r14 ++ %res = call <4 x i32> @llvm.s390.vsumb(<16 x i8> %a, <16 x i8> %b) ++ ret <4 x i32> %res ++} ++ ++; VSUMH. ++define <4 x i32> @test_vsumh(<8 x i16> %a, <8 x i16> %b) { ++; CHECK-LABEL: test_vsumh: ++; CHECK: vsumh %v24, %v24, %v26 ++; CHECK: br %r14 ++ %res = call <4 x i32> @llvm.s390.vsumh(<8 x i16> %a, <8 x i16> %b) ++ ret <4 x i32> %res ++} ++ ++; VSUMGH. ++define <2 x i64> @test_vsumgh(<8 x i16> %a, <8 x i16> %b) { ++; CHECK-LABEL: test_vsumgh: ++; CHECK: vsumgh %v24, %v24, %v26 ++; CHECK: br %r14 ++ %res = call <2 x i64> @llvm.s390.vsumgh(<8 x i16> %a, <8 x i16> %b) ++ ret <2 x i64> %res ++} ++ ++; VSUMGF. ++define <2 x i64> @test_vsumgf(<4 x i32> %a, <4 x i32> %b) { ++; CHECK-LABEL: test_vsumgf: ++; CHECK: vsumgf %v24, %v24, %v26 ++; CHECK: br %r14 ++ %res = call <2 x i64> @llvm.s390.vsumgf(<4 x i32> %a, <4 x i32> %b) ++ ret <2 x i64> %res ++} ++ ++; VSUMQF. ++define <16 x i8> @test_vsumqf(<4 x i32> %a, <4 x i32> %b) { ++; CHECK-LABEL: test_vsumqf: ++; CHECK: vsumqf %v24, %v24, %v26 ++; CHECK: br %r14 ++ %res = call <16 x i8> @llvm.s390.vsumqf(<4 x i32> %a, <4 x i32> %b) ++ ret <16 x i8> %res ++} ++ ++; VSUMQG. ++define <16 x i8> @test_vsumqg(<2 x i64> %a, <2 x i64> %b) { ++; CHECK-LABEL: test_vsumqg: ++; CHECK: vsumqg %v24, %v24, %v26 ++; CHECK: br %r14 ++ %res = call <16 x i8> @llvm.s390.vsumqg(<2 x i64> %a, <2 x i64> %b) ++ ret <16 x i8> %res ++} ++ ++; VTM with no processing of the result. ++define i32 @test_vtm(<16 x i8> %a, <16 x i8> %b) { ++; CHECK-LABEL: test_vtm: ++; CHECK: vtm %v24, %v26 ++; CHECK: ipm %r2 ++; CHECK: srl %r2, 28 ++; CHECK: br %r14 ++ %res = call i32 @llvm.s390.vtm(<16 x i8> %a, <16 x i8> %b) ++ ret i32 %res ++} ++ ++; VTM, storing to %ptr if all bits are set. ++define void @test_vtm_all_store(<16 x i8> %a, <16 x i8> %b, i32 *%ptr) { ++; CHECK-LABEL: test_vtm_all_store: ++; CHECK-NOT: %r ++; CHECK: vtm %v24, %v26 ++; CHECK-NEXT: {{jno|jle}} {{\.L*}} ++; CHECK: mvhi 0(%r2), 0 ++; CHECK: br %r14 ++ %res = call i32 @llvm.s390.vtm(<16 x i8> %a, <16 x i8> %b) ++ %cmp = icmp sge i32 %res, 3 ++ br i1 %cmp, label %store, label %exit ++ ++store: ++ store i32 0, i32 *%ptr ++ br label %exit ++ ++exit: ++ ret void ++} ++ ++; VCEQBS with no processing of the result. ++define i32 @test_vceqbs(<16 x i8> %a, <16 x i8> %b) { ++; CHECK-LABEL: test_vceqbs: ++; CHECK: vceqbs {{%v[0-9]+}}, %v24, %v26 ++; CHECK: ipm %r2 ++; CHECK: srl %r2, 28 ++; CHECK: br %r14 ++ %call = call {<16 x i8>, i32} @llvm.s390.vceqbs(<16 x i8> %a, <16 x i8> %b) ++ %res = extractvalue {<16 x i8>, i32} %call, 1 ++ ret i32 %res ++} ++ ++; VCEQBS, returning 1 if any elements are equal (CC != 3). ++define i32 @test_vceqbs_any_bool(<16 x i8> %a, <16 x i8> %b) { ++; CHECK-LABEL: test_vceqbs_any_bool: ++; CHECK: vceqbs {{%v[0-9]+}}, %v24, %v26 ++; CHECK: ipm %r2 ++; CHECK: afi %r2, -536870912 ++; CHECK: srl %r2, 31 ++; CHECK: br %r14 ++ %call = call {<16 x i8>, i32} @llvm.s390.vceqbs(<16 x i8> %a, <16 x i8> %b) ++ %res = extractvalue {<16 x i8>, i32} %call, 1 ++ %cmp = icmp ne i32 %res, 3 ++ %ext = zext i1 %cmp to i32 ++ ret i32 %ext ++} ++ ++; VCEQBS, storing to %ptr if any elements are equal. ++define <16 x i8> @test_vceqbs_any_store(<16 x i8> %a, <16 x i8> %b, i32 *%ptr) { ++; CHECK-LABEL: test_vceqbs_any_store: ++; CHECK-NOT: %r ++; CHECK: vceqbs %v24, %v24, %v26 ++; CHECK-NEXT: {{jo|jnle}} {{\.L*}} ++; CHECK: mvhi 0(%r2), 0 ++; CHECK: br %r14 ++ %call = call {<16 x i8>, i32} @llvm.s390.vceqbs(<16 x i8> %a, <16 x i8> %b) ++ %res = extractvalue {<16 x i8>, i32} %call, 0 ++ %cc = extractvalue {<16 x i8>, i32} %call, 1 ++ %cmp = icmp ule i32 %cc, 2 ++ br i1 %cmp, label %store, label %exit ++ ++store: ++ store i32 0, i32 *%ptr ++ br label %exit ++ ++exit: ++ ret <16 x i8> %res ++} ++ ++; VCEQHS with no processing of the result. ++define i32 @test_vceqhs(<8 x i16> %a, <8 x i16> %b) { ++; CHECK-LABEL: test_vceqhs: ++; CHECK: vceqhs {{%v[0-9]+}}, %v24, %v26 ++; CHECK: ipm %r2 ++; CHECK: srl %r2, 28 ++; CHECK: br %r14 ++ %call = call {<8 x i16>, i32} @llvm.s390.vceqhs(<8 x i16> %a, <8 x i16> %b) ++ %res = extractvalue {<8 x i16>, i32} %call, 1 ++ ret i32 %res ++} ++ ++; VCEQHS, returning 1 if not all elements are equal. ++define i32 @test_vceqhs_notall_bool(<8 x i16> %a, <8 x i16> %b) { ++; CHECK-LABEL: test_vceqhs_notall_bool: ++; CHECK: vceqhs {{%v[0-9]+}}, %v24, %v26 ++; CHECK: ipm [[REG:%r[0-5]]] ++; CHECK: risblg %r2, [[REG]], 31, 159, 36 ++; CHECK: br %r14 ++ %call = call {<8 x i16>, i32} @llvm.s390.vceqhs(<8 x i16> %a, <8 x i16> %b) ++ %res = extractvalue {<8 x i16>, i32} %call, 1 ++ %cmp = icmp sge i32 %res, 1 ++ %ext = zext i1 %cmp to i32 ++ ret i32 %ext ++} ++ ++; VCEQHS, storing to %ptr if not all elements are equal. ++define <8 x i16> @test_vceqhs_notall_store(<8 x i16> %a, <8 x i16> %b, ++ i32 *%ptr) { ++; CHECK-LABEL: test_vceqhs_notall_store: ++; CHECK-NOT: %r ++; CHECK: vceqhs %v24, %v24, %v26 ++; CHECK-NEXT: {{jhe|je}} {{\.L*}} ++; CHECK: mvhi 0(%r2), 0 ++; CHECK: br %r14 ++ %call = call {<8 x i16>, i32} @llvm.s390.vceqhs(<8 x i16> %a, <8 x i16> %b) ++ %res = extractvalue {<8 x i16>, i32} %call, 0 ++ %cc = extractvalue {<8 x i16>, i32} %call, 1 ++ %cmp = icmp ugt i32 %cc, 0 ++ br i1 %cmp, label %store, label %exit ++ ++store: ++ store i32 0, i32 *%ptr ++ br label %exit ++ ++exit: ++ ret <8 x i16> %res ++} ++ ++; VCEQFS with no processing of the result. ++define i32 @test_vceqfs(<4 x i32> %a, <4 x i32> %b) { ++; CHECK-LABEL: test_vceqfs: ++; CHECK: vceqfs {{%v[0-9]+}}, %v24, %v26 ++; CHECK: ipm %r2 ++; CHECK: srl %r2, 28 ++; CHECK: br %r14 ++ %call = call {<4 x i32>, i32} @llvm.s390.vceqfs(<4 x i32> %a, <4 x i32> %b) ++ %res = extractvalue {<4 x i32>, i32} %call, 1 ++ ret i32 %res ++} ++ ++; VCEQFS, returning 1 if no elements are equal. ++define i32 @test_vceqfs_none_bool(<4 x i32> %a, <4 x i32> %b) { ++; CHECK-LABEL: test_vceqfs_none_bool: ++; CHECK: vceqfs {{%v[0-9]+}}, %v24, %v26 ++; CHECK: ipm [[REG:%r[0-5]]] ++; CHECK: risblg %r2, [[REG]], 31, 159, 35 ++; CHECK: br %r14 ++ %call = call {<4 x i32>, i32} @llvm.s390.vceqfs(<4 x i32> %a, <4 x i32> %b) ++ %res = extractvalue {<4 x i32>, i32} %call, 1 ++ %cmp = icmp eq i32 %res, 3 ++ %ext = zext i1 %cmp to i32 ++ ret i32 %ext ++} ++ ++; VCEQFS, storing to %ptr if no elements are equal. ++define <4 x i32> @test_vceqfs_none_store(<4 x i32> %a, <4 x i32> %b, ++ i32 *%ptr) { ++; CHECK-LABEL: test_vceqfs_none_store: ++; CHECK-NOT: %r ++; CHECK: vceqfs %v24, %v24, %v26 ++; CHECK-NEXT: {{jno|jle}} {{\.L*}} ++; CHECK: mvhi 0(%r2), 0 ++; CHECK: br %r14 ++ %call = call {<4 x i32>, i32} @llvm.s390.vceqfs(<4 x i32> %a, <4 x i32> %b) ++ %res = extractvalue {<4 x i32>, i32} %call, 0 ++ %cc = extractvalue {<4 x i32>, i32} %call, 1 ++ %cmp = icmp uge i32 %cc, 3 ++ br i1 %cmp, label %store, label %exit ++ ++store: ++ store i32 0, i32 *%ptr ++ br label %exit ++ ++exit: ++ ret <4 x i32> %res ++} ++ ++; VCEQGS with no processing of the result. ++define i32 @test_vceqgs(<2 x i64> %a, <2 x i64> %b) { ++; CHECK-LABEL: test_vceqgs: ++; CHECK: vceqgs {{%v[0-9]+}}, %v24, %v26 ++; CHECK: ipm %r2 ++; CHECK: srl %r2, 28 ++; CHECK: br %r14 ++ %call = call {<2 x i64>, i32} @llvm.s390.vceqgs(<2 x i64> %a, <2 x i64> %b) ++ %res = extractvalue {<2 x i64>, i32} %call, 1 ++ ret i32 %res ++} ++ ++; VCEQGS returning 1 if all elements are equal (CC == 0). ++define i32 @test_vceqgs_all_bool(<2 x i64> %a, <2 x i64> %b) { ++; CHECK-LABEL: test_vceqgs_all_bool: ++; CHECK: vceqgs {{%v[0-9]+}}, %v24, %v26 ++; CHECK: ipm %r2 ++; CHECK: afi %r2, -268435456 ++; CHECK: srl %r2, 31 ++; CHECK: br %r14 ++ %call = call {<2 x i64>, i32} @llvm.s390.vceqgs(<2 x i64> %a, <2 x i64> %b) ++ %res = extractvalue {<2 x i64>, i32} %call, 1 ++ %cmp = icmp ult i32 %res, 1 ++ %ext = zext i1 %cmp to i32 ++ ret i32 %ext ++} ++ ++; VCEQGS, storing to %ptr if all elements are equal. ++define <2 x i64> @test_vceqgs_all_store(<2 x i64> %a, <2 x i64> %b, i32 *%ptr) { ++; CHECK-LABEL: test_vceqgs_all_store: ++; CHECK-NOT: %r ++; CHECK: vceqgs %v24, %v24, %v26 ++; CHECK-NEXT: {{jnhe|jne}} {{\.L*}} ++; CHECK: mvhi 0(%r2), 0 ++; CHECK: br %r14 ++ %call = call {<2 x i64>, i32} @llvm.s390.vceqgs(<2 x i64> %a, <2 x i64> %b) ++ %res = extractvalue {<2 x i64>, i32} %call, 0 ++ %cc = extractvalue {<2 x i64>, i32} %call, 1 ++ %cmp = icmp sle i32 %cc, 0 ++ br i1 %cmp, label %store, label %exit ++ ++store: ++ store i32 0, i32 *%ptr ++ br label %exit ++ ++exit: ++ ret <2 x i64> %res ++} ++ ++; VCHBS with no processing of the result. ++define i32 @test_vchbs(<16 x i8> %a, <16 x i8> %b) { ++; CHECK-LABEL: test_vchbs: ++; CHECK: vchbs {{%v[0-9]+}}, %v24, %v26 ++; CHECK: ipm %r2 ++; CHECK: srl %r2, 28 ++; CHECK: br %r14 ++ %call = call {<16 x i8>, i32} @llvm.s390.vchbs(<16 x i8> %a, <16 x i8> %b) ++ %res = extractvalue {<16 x i8>, i32} %call, 1 ++ ret i32 %res ++} ++ ++; VCHBS, returning 1 if any elements are higher (CC != 3). ++define i32 @test_vchbs_any_bool(<16 x i8> %a, <16 x i8> %b) { ++; CHECK-LABEL: test_vchbs_any_bool: ++; CHECK: vchbs {{%v[0-9]+}}, %v24, %v26 ++; CHECK: ipm %r2 ++; CHECK: afi %r2, -536870912 ++; CHECK: srl %r2, 31 ++; CHECK: br %r14 ++ %call = call {<16 x i8>, i32} @llvm.s390.vchbs(<16 x i8> %a, <16 x i8> %b) ++ %res = extractvalue {<16 x i8>, i32} %call, 1 ++ %cmp = icmp ne i32 %res, 3 ++ %ext = zext i1 %cmp to i32 ++ ret i32 %ext ++} ++ ++; VCHBS, storing to %ptr if any elements are higher. ++define <16 x i8> @test_vchbs_any_store(<16 x i8> %a, <16 x i8> %b, i32 *%ptr) { ++; CHECK-LABEL: test_vchbs_any_store: ++; CHECK-NOT: %r ++; CHECK: vchbs %v24, %v24, %v26 ++; CHECK-NEXT: {{jo|jnle}} {{\.L*}} ++; CHECK: mvhi 0(%r2), 0 ++; CHECK: br %r14 ++ %call = call {<16 x i8>, i32} @llvm.s390.vchbs(<16 x i8> %a, <16 x i8> %b) ++ %res = extractvalue {<16 x i8>, i32} %call, 0 ++ %cc = extractvalue {<16 x i8>, i32} %call, 1 ++ %cmp = icmp ule i32 %cc, 2 ++ br i1 %cmp, label %store, label %exit ++ ++store: ++ store i32 0, i32 *%ptr ++ br label %exit ++ ++exit: ++ ret <16 x i8> %res ++} ++ ++; VCHHS with no processing of the result. ++define i32 @test_vchhs(<8 x i16> %a, <8 x i16> %b) { ++; CHECK-LABEL: test_vchhs: ++; CHECK: vchhs {{%v[0-9]+}}, %v24, %v26 ++; CHECK: ipm %r2 ++; CHECK: srl %r2, 28 ++; CHECK: br %r14 ++ %call = call {<8 x i16>, i32} @llvm.s390.vchhs(<8 x i16> %a, <8 x i16> %b) ++ %res = extractvalue {<8 x i16>, i32} %call, 1 ++ ret i32 %res ++} ++ ++; VCHHS, returning 1 if not all elements are higher. ++define i32 @test_vchhs_notall_bool(<8 x i16> %a, <8 x i16> %b) { ++; CHECK-LABEL: test_vchhs_notall_bool: ++; CHECK: vchhs {{%v[0-9]+}}, %v24, %v26 ++; CHECK: ipm [[REG:%r[0-5]]] ++; CHECK: risblg %r2, [[REG]], 31, 159, 36 ++; CHECK: br %r14 ++ %call = call {<8 x i16>, i32} @llvm.s390.vchhs(<8 x i16> %a, <8 x i16> %b) ++ %res = extractvalue {<8 x i16>, i32} %call, 1 ++ %cmp = icmp sge i32 %res, 1 ++ %ext = zext i1 %cmp to i32 ++ ret i32 %ext ++} ++ ++; VCHHS, storing to %ptr if not all elements are higher. ++define <8 x i16> @test_vchhs_notall_store(<8 x i16> %a, <8 x i16> %b, ++ i32 *%ptr) { ++; CHECK-LABEL: test_vchhs_notall_store: ++; CHECK-NOT: %r ++; CHECK: vchhs %v24, %v24, %v26 ++; CHECK-NEXT: {{jhe|je}} {{\.L*}} ++; CHECK: mvhi 0(%r2), 0 ++; CHECK: br %r14 ++ %call = call {<8 x i16>, i32} @llvm.s390.vchhs(<8 x i16> %a, <8 x i16> %b) ++ %res = extractvalue {<8 x i16>, i32} %call, 0 ++ %cc = extractvalue {<8 x i16>, i32} %call, 1 ++ %cmp = icmp ugt i32 %cc, 0 ++ br i1 %cmp, label %store, label %exit ++ ++store: ++ store i32 0, i32 *%ptr ++ br label %exit ++ ++exit: ++ ret <8 x i16> %res ++} ++ ++; VCHFS with no processing of the result. ++define i32 @test_vchfs(<4 x i32> %a, <4 x i32> %b) { ++; CHECK-LABEL: test_vchfs: ++; CHECK: vchfs {{%v[0-9]+}}, %v24, %v26 ++; CHECK: ipm %r2 ++; CHECK: srl %r2, 28 ++; CHECK: br %r14 ++ %call = call {<4 x i32>, i32} @llvm.s390.vchfs(<4 x i32> %a, <4 x i32> %b) ++ %res = extractvalue {<4 x i32>, i32} %call, 1 ++ ret i32 %res ++} ++ ++; VCHFS, returning 1 if no elements are higher. ++define i32 @test_vchfs_none_bool(<4 x i32> %a, <4 x i32> %b) { ++; CHECK-LABEL: test_vchfs_none_bool: ++; CHECK: vchfs {{%v[0-9]+}}, %v24, %v26 ++; CHECK: ipm [[REG:%r[0-5]]] ++; CHECK: risblg %r2, [[REG]], 31, 159, 35 ++; CHECK: br %r14 ++ %call = call {<4 x i32>, i32} @llvm.s390.vchfs(<4 x i32> %a, <4 x i32> %b) ++ %res = extractvalue {<4 x i32>, i32} %call, 1 ++ %cmp = icmp eq i32 %res, 3 ++ %ext = zext i1 %cmp to i32 ++ ret i32 %ext ++} ++ ++; VCHFS, storing to %ptr if no elements are higher. ++define <4 x i32> @test_vchfs_none_store(<4 x i32> %a, <4 x i32> %b, i32 *%ptr) { ++; CHECK-LABEL: test_vchfs_none_store: ++; CHECK-NOT: %r ++; CHECK: vchfs %v24, %v24, %v26 ++; CHECK-NEXT: {{jno|jle}} {{\.L*}} ++; CHECK: mvhi 0(%r2), 0 ++; CHECK: br %r14 ++ %call = call {<4 x i32>, i32} @llvm.s390.vchfs(<4 x i32> %a, <4 x i32> %b) ++ %res = extractvalue {<4 x i32>, i32} %call, 0 ++ %cc = extractvalue {<4 x i32>, i32} %call, 1 ++ %cmp = icmp uge i32 %cc, 3 ++ br i1 %cmp, label %store, label %exit ++ ++store: ++ store i32 0, i32 *%ptr ++ br label %exit ++ ++exit: ++ ret <4 x i32> %res ++} ++ ++; VCHGS with no processing of the result. ++define i32 @test_vchgs(<2 x i64> %a, <2 x i64> %b) { ++; CHECK-LABEL: test_vchgs: ++; CHECK: vchgs {{%v[0-9]+}}, %v24, %v26 ++; CHECK: ipm %r2 ++; CHECK: srl %r2, 28 ++; CHECK: br %r14 ++ %call = call {<2 x i64>, i32} @llvm.s390.vchgs(<2 x i64> %a, <2 x i64> %b) ++ %res = extractvalue {<2 x i64>, i32} %call, 1 ++ ret i32 %res ++} ++ ++; VCHGS returning 1 if all elements are higher (CC == 0). ++define i32 @test_vchgs_all_bool(<2 x i64> %a, <2 x i64> %b) { ++; CHECK-LABEL: test_vchgs_all_bool: ++; CHECK: vchgs {{%v[0-9]+}}, %v24, %v26 ++; CHECK: ipm %r2 ++; CHECK: afi %r2, -268435456 ++; CHECK: srl %r2, 31 ++; CHECK: br %r14 ++ %call = call {<2 x i64>, i32} @llvm.s390.vchgs(<2 x i64> %a, <2 x i64> %b) ++ %res = extractvalue {<2 x i64>, i32} %call, 1 ++ %cmp = icmp ult i32 %res, 1 ++ %ext = zext i1 %cmp to i32 ++ ret i32 %ext ++} ++ ++; VCHGS, storing to %ptr if all elements are higher. ++define <2 x i64> @test_vchgs_all_store(<2 x i64> %a, <2 x i64> %b, i32 *%ptr) { ++; CHECK-LABEL: test_vchgs_all_store: ++; CHECK-NOT: %r ++; CHECK: vchgs %v24, %v24, %v26 ++; CHECK-NEXT: {{jnhe|jne}} {{\.L*}} ++; CHECK: mvhi 0(%r2), 0 ++; CHECK: br %r14 ++ %call = call {<2 x i64>, i32} @llvm.s390.vchgs(<2 x i64> %a, <2 x i64> %b) ++ %res = extractvalue {<2 x i64>, i32} %call, 0 ++ %cc = extractvalue {<2 x i64>, i32} %call, 1 ++ %cmp = icmp sle i32 %cc, 0 ++ br i1 %cmp, label %store, label %exit ++ ++store: ++ store i32 0, i32 *%ptr ++ br label %exit ++ ++exit: ++ ret <2 x i64> %res ++} ++ ++; VCHLBS with no processing of the result. ++define i32 @test_vchlbs(<16 x i8> %a, <16 x i8> %b) { ++; CHECK-LABEL: test_vchlbs: ++; CHECK: vchlbs {{%v[0-9]+}}, %v24, %v26 ++; CHECK: ipm %r2 ++; CHECK: srl %r2, 28 ++; CHECK: br %r14 ++ %call = call {<16 x i8>, i32} @llvm.s390.vchlbs(<16 x i8> %a, <16 x i8> %b) ++ %res = extractvalue {<16 x i8>, i32} %call, 1 ++ ret i32 %res ++} ++ ++; VCHLBS, returning 1 if any elements are higher (CC != 3). ++define i32 @test_vchlbs_any_bool(<16 x i8> %a, <16 x i8> %b) { ++; CHECK-LABEL: test_vchlbs_any_bool: ++; CHECK: vchlbs {{%v[0-9]+}}, %v24, %v26 ++; CHECK: ipm %r2 ++; CHECK: afi %r2, -536870912 ++; CHECK: srl %r2, 31 ++; CHECK: br %r14 ++ %call = call {<16 x i8>, i32} @llvm.s390.vchlbs(<16 x i8> %a, <16 x i8> %b) ++ %res = extractvalue {<16 x i8>, i32} %call, 1 ++ %cmp = icmp ne i32 %res, 3 ++ %ext = zext i1 %cmp to i32 ++ ret i32 %ext ++} ++ ++; VCHLBS, storing to %ptr if any elements are higher. ++define <16 x i8> @test_vchlbs_any_store(<16 x i8> %a, <16 x i8> %b, i32 *%ptr) { ++; CHECK-LABEL: test_vchlbs_any_store: ++; CHECK-NOT: %r ++; CHECK: vchlbs %v24, %v24, %v26 ++; CHECK-NEXT: {{jo|jnle}} {{\.L*}} ++; CHECK: mvhi 0(%r2), 0 ++; CHECK: br %r14 ++ %call = call {<16 x i8>, i32} @llvm.s390.vchlbs(<16 x i8> %a, <16 x i8> %b) ++ %res = extractvalue {<16 x i8>, i32} %call, 0 ++ %cc = extractvalue {<16 x i8>, i32} %call, 1 ++ %cmp = icmp sle i32 %cc, 2 ++ br i1 %cmp, label %store, label %exit ++ ++store: ++ store i32 0, i32 *%ptr ++ br label %exit ++ ++exit: ++ ret <16 x i8> %res ++} ++ ++; VCHLHS with no processing of the result. ++define i32 @test_vchlhs(<8 x i16> %a, <8 x i16> %b) { ++; CHECK-LABEL: test_vchlhs: ++; CHECK: vchlhs {{%v[0-9]+}}, %v24, %v26 ++; CHECK: ipm %r2 ++; CHECK: srl %r2, 28 ++; CHECK: br %r14 ++ %call = call {<8 x i16>, i32} @llvm.s390.vchlhs(<8 x i16> %a, <8 x i16> %b) ++ %res = extractvalue {<8 x i16>, i32} %call, 1 ++ ret i32 %res ++} ++ ++; VCHLHS, returning 1 if not all elements are higher. ++define i32 @test_vchlhs_notall_bool(<8 x i16> %a, <8 x i16> %b) { ++; CHECK-LABEL: test_vchlhs_notall_bool: ++; CHECK: vchlhs {{%v[0-9]+}}, %v24, %v26 ++; CHECK: ipm [[REG:%r[0-5]]] ++; CHECK: risblg %r2, [[REG]], 31, 159, 36 ++; CHECK: br %r14 ++ %call = call {<8 x i16>, i32} @llvm.s390.vchlhs(<8 x i16> %a, <8 x i16> %b) ++ %res = extractvalue {<8 x i16>, i32} %call, 1 ++ %cmp = icmp uge i32 %res, 1 ++ %ext = zext i1 %cmp to i32 ++ ret i32 %ext ++} ++ ++; VCHLHS, storing to %ptr if not all elements are higher. ++define <8 x i16> @test_vchlhs_notall_store(<8 x i16> %a, <8 x i16> %b, ++ i32 *%ptr) { ++; CHECK-LABEL: test_vchlhs_notall_store: ++; CHECK-NOT: %r ++; CHECK: vchlhs %v24, %v24, %v26 ++; CHECK-NEXT: {{jhe|je}} {{\.L*}} ++; CHECK: mvhi 0(%r2), 0 ++; CHECK: br %r14 ++ %call = call {<8 x i16>, i32} @llvm.s390.vchlhs(<8 x i16> %a, <8 x i16> %b) ++ %res = extractvalue {<8 x i16>, i32} %call, 0 ++ %cc = extractvalue {<8 x i16>, i32} %call, 1 ++ %cmp = icmp sgt i32 %cc, 0 ++ br i1 %cmp, label %store, label %exit ++ ++store: ++ store i32 0, i32 *%ptr ++ br label %exit ++ ++exit: ++ ret <8 x i16> %res ++} ++ ++; VCHLFS with no processing of the result. ++define i32 @test_vchlfs(<4 x i32> %a, <4 x i32> %b) { ++; CHECK-LABEL: test_vchlfs: ++; CHECK: vchlfs {{%v[0-9]+}}, %v24, %v26 ++; CHECK: ipm %r2 ++; CHECK: srl %r2, 28 ++; CHECK: br %r14 ++ %call = call {<4 x i32>, i32} @llvm.s390.vchlfs(<4 x i32> %a, <4 x i32> %b) ++ %res = extractvalue {<4 x i32>, i32} %call, 1 ++ ret i32 %res ++} ++ ++; VCHLFS, returning 1 if no elements are higher. ++define i32 @test_vchlfs_none_bool(<4 x i32> %a, <4 x i32> %b) { ++; CHECK-LABEL: test_vchlfs_none_bool: ++; CHECK: vchlfs {{%v[0-9]+}}, %v24, %v26 ++; CHECK: ipm [[REG:%r[0-5]]] ++; CHECK: risblg %r2, [[REG]], 31, 159, 35 ++; CHECK: br %r14 ++ %call = call {<4 x i32>, i32} @llvm.s390.vchlfs(<4 x i32> %a, <4 x i32> %b) ++ %res = extractvalue {<4 x i32>, i32} %call, 1 ++ %cmp = icmp eq i32 %res, 3 ++ %ext = zext i1 %cmp to i32 ++ ret i32 %ext ++} ++ ++; VCHLFS, storing to %ptr if no elements are higher. ++define <4 x i32> @test_vchlfs_none_store(<4 x i32> %a, <4 x i32> %b, ++ i32 *%ptr) { ++; CHECK-LABEL: test_vchlfs_none_store: ++; CHECK-NOT: %r ++; CHECK: vchlfs %v24, %v24, %v26 ++; CHECK-NEXT: {{jno|jle}} {{\.L*}} ++; CHECK: mvhi 0(%r2), 0 ++; CHECK: br %r14 ++ %call = call {<4 x i32>, i32} @llvm.s390.vchlfs(<4 x i32> %a, <4 x i32> %b) ++ %res = extractvalue {<4 x i32>, i32} %call, 0 ++ %cc = extractvalue {<4 x i32>, i32} %call, 1 ++ %cmp = icmp sge i32 %cc, 3 ++ br i1 %cmp, label %store, label %exit ++ ++store: ++ store i32 0, i32 *%ptr ++ br label %exit ++ ++exit: ++ ret <4 x i32> %res ++} ++ ++; VCHLGS with no processing of the result. ++define i32 @test_vchlgs(<2 x i64> %a, <2 x i64> %b) { ++; CHECK-LABEL: test_vchlgs: ++; CHECK: vchlgs {{%v[0-9]+}}, %v24, %v26 ++; CHECK: ipm %r2 ++; CHECK: srl %r2, 28 ++; CHECK: br %r14 ++ %call = call {<2 x i64>, i32} @llvm.s390.vchlgs(<2 x i64> %a, <2 x i64> %b) ++ %res = extractvalue {<2 x i64>, i32} %call, 1 ++ ret i32 %res ++} ++ ++; VCHLGS returning 1 if all elements are higher (CC == 0). ++define i32 @test_vchlgs_all_bool(<2 x i64> %a, <2 x i64> %b) { ++; CHECK-LABEL: test_vchlgs_all_bool: ++; CHECK: vchlgs {{%v[0-9]+}}, %v24, %v26 ++; CHECK: ipm %r2 ++; CHECK: afi %r2, -268435456 ++; CHECK: srl %r2, 31 ++; CHECK: br %r14 ++ %call = call {<2 x i64>, i32} @llvm.s390.vchlgs(<2 x i64> %a, <2 x i64> %b) ++ %res = extractvalue {<2 x i64>, i32} %call, 1 ++ %cmp = icmp slt i32 %res, 1 ++ %ext = zext i1 %cmp to i32 ++ ret i32 %ext ++} ++ ++; VCHLGS, storing to %ptr if all elements are higher. ++define <2 x i64> @test_vchlgs_all_store(<2 x i64> %a, <2 x i64> %b, i32 *%ptr) { ++; CHECK-LABEL: test_vchlgs_all_store: ++; CHECK-NOT: %r ++; CHECK: vchlgs %v24, %v24, %v26 ++; CHECK-NEXT: {{jnhe|jne}} {{\.L*}} ++; CHECK: mvhi 0(%r2), 0 ++; CHECK: br %r14 ++ %call = call {<2 x i64>, i32} @llvm.s390.vchlgs(<2 x i64> %a, <2 x i64> %b) ++ %res = extractvalue {<2 x i64>, i32} %call, 0 ++ %cc = extractvalue {<2 x i64>, i32} %call, 1 ++ %cmp = icmp ule i32 %cc, 0 ++ br i1 %cmp, label %store, label %exit ++ ++store: ++ store i32 0, i32 *%ptr ++ br label %exit ++ ++exit: ++ ret <2 x i64> %res ++} ++ ++; VFAEB with !IN !RT. ++define <16 x i8> @test_vfaeb_0(<16 x i8> %a, <16 x i8> %b) { ++; CHECK-LABEL: test_vfaeb_0: ++; CHECK: vfaeb %v24, %v24, %v26, 0 ++; CHECK: br %r14 ++ %res = call <16 x i8> @llvm.s390.vfaeb(<16 x i8> %a, <16 x i8> %b, i32 0) ++ ret <16 x i8> %res ++} ++ ++; VFAEB with !IN RT. ++define <16 x i8> @test_vfaeb_4(<16 x i8> %a, <16 x i8> %b) { ++; CHECK-LABEL: test_vfaeb_4: ++; CHECK: vfaeb %v24, %v24, %v26, 4 ++; CHECK: br %r14 ++ %res = call <16 x i8> @llvm.s390.vfaeb(<16 x i8> %a, <16 x i8> %b, i32 4) ++ ret <16 x i8> %res ++} ++ ++; VFAEB with IN !RT. ++define <16 x i8> @test_vfaeb_8(<16 x i8> %a, <16 x i8> %b) { ++; CHECK-LABEL: test_vfaeb_8: ++; CHECK: vfaeb %v24, %v24, %v26, 8 ++; CHECK: br %r14 ++ %res = call <16 x i8> @llvm.s390.vfaeb(<16 x i8> %a, <16 x i8> %b, i32 8) ++ ret <16 x i8> %res ++} ++ ++; VFAEB with IN RT. ++define <16 x i8> @test_vfaeb_12(<16 x i8> %a, <16 x i8> %b) { ++; CHECK-LABEL: test_vfaeb_12: ++; CHECK: vfaeb %v24, %v24, %v26, 12 ++; CHECK: br %r14 ++ %res = call <16 x i8> @llvm.s390.vfaeb(<16 x i8> %a, <16 x i8> %b, i32 12) ++ ret <16 x i8> %res ++} ++ ++; VFAEB with CS -- should be ignored. ++define <16 x i8> @test_vfaeb_1(<16 x i8> %a, <16 x i8> %b) { ++; CHECK-LABEL: test_vfaeb_1: ++; CHECK: vfaeb %v24, %v24, %v26, 0 ++; CHECK: br %r14 ++ %res = call <16 x i8> @llvm.s390.vfaeb(<16 x i8> %a, <16 x i8> %b, i32 1) ++ ret <16 x i8> %res ++} ++ ++; VFAEH. ++define <8 x i16> @test_vfaeh(<8 x i16> %a, <8 x i16> %b) { ++; CHECK-LABEL: test_vfaeh: ++; CHECK: vfaeh %v24, %v24, %v26, 4 ++; CHECK: br %r14 ++ %res = call <8 x i16> @llvm.s390.vfaeh(<8 x i16> %a, <8 x i16> %b, i32 4) ++ ret <8 x i16> %res ++} ++ ++; VFAEF. ++define <4 x i32> @test_vfaef(<4 x i32> %a, <4 x i32> %b) { ++; CHECK-LABEL: test_vfaef: ++; CHECK: vfaef %v24, %v24, %v26, 8 ++; CHECK: br %r14 ++ %res = call <4 x i32> @llvm.s390.vfaef(<4 x i32> %a, <4 x i32> %b, i32 8) ++ ret <4 x i32> %res ++} ++ ++; VFAEBS. ++define <16 x i8> @test_vfaebs(<16 x i8> %a, <16 x i8> %b, i32 *%ccptr) { ++; CHECK-LABEL: test_vfaebs: ++; CHECK: vfaebs %v24, %v24, %v26, 0 ++; CHECK: ipm [[REG:%r[0-5]]] ++; CHECK: srl [[REG]], 28 ++; CHECK: st [[REG]], 0(%r2) ++; CHECK: br %r14 ++ %call = call {<16 x i8>, i32} @llvm.s390.vfaebs(<16 x i8> %a, <16 x i8> %b, ++ i32 0) ++ %res = extractvalue {<16 x i8>, i32} %call, 0 ++ %cc = extractvalue {<16 x i8>, i32} %call, 1 ++ store i32 %cc, i32 *%ccptr ++ ret <16 x i8> %res ++} ++ ++; VFAEHS. ++define <8 x i16> @test_vfaehs(<8 x i16> %a, <8 x i16> %b, i32 *%ccptr) { ++; CHECK-LABEL: test_vfaehs: ++; CHECK: vfaehs %v24, %v24, %v26, 4 ++; CHECK: ipm [[REG:%r[0-5]]] ++; CHECK: srl [[REG]], 28 ++; CHECK: st [[REG]], 0(%r2) ++; CHECK: br %r14 ++ %call = call {<8 x i16>, i32} @llvm.s390.vfaehs(<8 x i16> %a, <8 x i16> %b, ++ i32 4) ++ %res = extractvalue {<8 x i16>, i32} %call, 0 ++ %cc = extractvalue {<8 x i16>, i32} %call, 1 ++ store i32 %cc, i32 *%ccptr ++ ret <8 x i16> %res ++} ++ ++; VFAEFS. ++define <4 x i32> @test_vfaefs(<4 x i32> %a, <4 x i32> %b, i32 *%ccptr) { ++; CHECK-LABEL: test_vfaefs: ++; CHECK: vfaefs %v24, %v24, %v26, 8 ++; CHECK: ipm [[REG:%r[0-5]]] ++; CHECK: srl [[REG]], 28 ++; CHECK: st [[REG]], 0(%r2) ++; CHECK: br %r14 ++ %call = call {<4 x i32>, i32} @llvm.s390.vfaefs(<4 x i32> %a, <4 x i32> %b, ++ i32 8) ++ %res = extractvalue {<4 x i32>, i32} %call, 0 ++ %cc = extractvalue {<4 x i32>, i32} %call, 1 ++ store i32 %cc, i32 *%ccptr ++ ret <4 x i32> %res ++} ++ ++; VFAEZB with !IN !RT. ++define <16 x i8> @test_vfaezb_0(<16 x i8> %a, <16 x i8> %b) { ++; CHECK-LABEL: test_vfaezb_0: ++; CHECK: vfaezb %v24, %v24, %v26, 0 ++; CHECK: br %r14 ++ %res = call <16 x i8> @llvm.s390.vfaezb(<16 x i8> %a, <16 x i8> %b, i32 0) ++ ret <16 x i8> %res ++} ++ ++; VFAEZB with !IN RT. ++define <16 x i8> @test_vfaezb_4(<16 x i8> %a, <16 x i8> %b) { ++; CHECK-LABEL: test_vfaezb_4: ++; CHECK: vfaezb %v24, %v24, %v26, 4 ++; CHECK: br %r14 ++ %res = call <16 x i8> @llvm.s390.vfaezb(<16 x i8> %a, <16 x i8> %b, i32 4) ++ ret <16 x i8> %res ++} ++ ++; VFAEZB with IN !RT. ++define <16 x i8> @test_vfaezb_8(<16 x i8> %a, <16 x i8> %b) { ++; CHECK-LABEL: test_vfaezb_8: ++; CHECK: vfaezb %v24, %v24, %v26, 8 ++; CHECK: br %r14 ++ %res = call <16 x i8> @llvm.s390.vfaezb(<16 x i8> %a, <16 x i8> %b, i32 8) ++ ret <16 x i8> %res ++} ++ ++; VFAEZB with IN RT. ++define <16 x i8> @test_vfaezb_12(<16 x i8> %a, <16 x i8> %b) { ++; CHECK-LABEL: test_vfaezb_12: ++; CHECK: vfaezb %v24, %v24, %v26, 12 ++; CHECK: br %r14 ++ %res = call <16 x i8> @llvm.s390.vfaezb(<16 x i8> %a, <16 x i8> %b, i32 12) ++ ret <16 x i8> %res ++} ++ ++; VFAEZB with CS -- should be ignored. ++define <16 x i8> @test_vfaezb_1(<16 x i8> %a, <16 x i8> %b) { ++; CHECK-LABEL: test_vfaezb_1: ++; CHECK: vfaezb %v24, %v24, %v26, 0 ++; CHECK: br %r14 ++ %res = call <16 x i8> @llvm.s390.vfaezb(<16 x i8> %a, <16 x i8> %b, i32 1) ++ ret <16 x i8> %res ++} ++ ++; VFAEZH. ++define <8 x i16> @test_vfaezh(<8 x i16> %a, <8 x i16> %b) { ++; CHECK-LABEL: test_vfaezh: ++; CHECK: vfaezh %v24, %v24, %v26, 4 ++; CHECK: br %r14 ++ %res = call <8 x i16> @llvm.s390.vfaezh(<8 x i16> %a, <8 x i16> %b, i32 4) ++ ret <8 x i16> %res ++} ++ ++; VFAEZF. ++define <4 x i32> @test_vfaezf(<4 x i32> %a, <4 x i32> %b) { ++; CHECK-LABEL: test_vfaezf: ++; CHECK: vfaezf %v24, %v24, %v26, 8 ++; CHECK: br %r14 ++ %res = call <4 x i32> @llvm.s390.vfaezf(<4 x i32> %a, <4 x i32> %b, i32 8) ++ ret <4 x i32> %res ++} ++ ++; VFAEZBS. ++define <16 x i8> @test_vfaezbs(<16 x i8> %a, <16 x i8> %b, i32 *%ccptr) { ++; CHECK-LABEL: test_vfaezbs: ++; CHECK: vfaezbs %v24, %v24, %v26, 0 ++; CHECK: ipm [[REG:%r[0-5]]] ++; CHECK: srl [[REG]], 28 ++; CHECK: st [[REG]], 0(%r2) ++; CHECK: br %r14 ++ %call = call {<16 x i8>, i32} @llvm.s390.vfaezbs(<16 x i8> %a, <16 x i8> %b, ++ i32 0) ++ %res = extractvalue {<16 x i8>, i32} %call, 0 ++ %cc = extractvalue {<16 x i8>, i32} %call, 1 ++ store i32 %cc, i32 *%ccptr ++ ret <16 x i8> %res ++} ++ ++; VFAEZHS. ++define <8 x i16> @test_vfaezhs(<8 x i16> %a, <8 x i16> %b, i32 *%ccptr) { ++; CHECK-LABEL: test_vfaezhs: ++; CHECK: vfaezhs %v24, %v24, %v26, 4 ++; CHECK: ipm [[REG:%r[0-5]]] ++; CHECK: srl [[REG]], 28 ++; CHECK: st [[REG]], 0(%r2) ++; CHECK: br %r14 ++ %call = call {<8 x i16>, i32} @llvm.s390.vfaezhs(<8 x i16> %a, <8 x i16> %b, ++ i32 4) ++ %res = extractvalue {<8 x i16>, i32} %call, 0 ++ %cc = extractvalue {<8 x i16>, i32} %call, 1 ++ store i32 %cc, i32 *%ccptr ++ ret <8 x i16> %res ++} ++ ++; VFAEZFS. ++define <4 x i32> @test_vfaezfs(<4 x i32> %a, <4 x i32> %b, i32 *%ccptr) { ++; CHECK-LABEL: test_vfaezfs: ++; CHECK: vfaezfs %v24, %v24, %v26, 8 ++; CHECK: ipm [[REG:%r[0-5]]] ++; CHECK: srl [[REG]], 28 ++; CHECK: st [[REG]], 0(%r2) ++; CHECK: br %r14 ++ %call = call {<4 x i32>, i32} @llvm.s390.vfaezfs(<4 x i32> %a, <4 x i32> %b, ++ i32 8) ++ %res = extractvalue {<4 x i32>, i32} %call, 0 ++ %cc = extractvalue {<4 x i32>, i32} %call, 1 ++ store i32 %cc, i32 *%ccptr ++ ret <4 x i32> %res ++} ++ ++; VFEEB. ++define <16 x i8> @test_vfeeb_0(<16 x i8> %a, <16 x i8> %b) { ++; CHECK-LABEL: test_vfeeb_0: ++; CHECK: vfeeb %v24, %v24, %v26 ++; CHECK: br %r14 ++ %res = call <16 x i8> @llvm.s390.vfeeb(<16 x i8> %a, <16 x i8> %b) ++ ret <16 x i8> %res ++} ++ ++; VFEEH. ++define <8 x i16> @test_vfeeh(<8 x i16> %a, <8 x i16> %b) { ++; CHECK-LABEL: test_vfeeh: ++; CHECK: vfeeh %v24, %v24, %v26 ++; CHECK: br %r14 ++ %res = call <8 x i16> @llvm.s390.vfeeh(<8 x i16> %a, <8 x i16> %b) ++ ret <8 x i16> %res ++} ++ ++; VFEEF. ++define <4 x i32> @test_vfeef(<4 x i32> %a, <4 x i32> %b) { ++; CHECK-LABEL: test_vfeef: ++; CHECK: vfeef %v24, %v24, %v26 ++; CHECK: br %r14 ++ %res = call <4 x i32> @llvm.s390.vfeef(<4 x i32> %a, <4 x i32> %b) ++ ret <4 x i32> %res ++} ++ ++; VFEEBS. ++define <16 x i8> @test_vfeebs(<16 x i8> %a, <16 x i8> %b, i32 *%ccptr) { ++; CHECK-LABEL: test_vfeebs: ++; CHECK: vfeebs %v24, %v24, %v26 ++; CHECK: ipm [[REG:%r[0-5]]] ++; CHECK: srl [[REG]], 28 ++; CHECK: st [[REG]], 0(%r2) ++; CHECK: br %r14 ++ %call = call {<16 x i8>, i32} @llvm.s390.vfeebs(<16 x i8> %a, <16 x i8> %b) ++ %res = extractvalue {<16 x i8>, i32} %call, 0 ++ %cc = extractvalue {<16 x i8>, i32} %call, 1 ++ store i32 %cc, i32 *%ccptr ++ ret <16 x i8> %res ++} ++ ++; VFEEHS. ++define <8 x i16> @test_vfeehs(<8 x i16> %a, <8 x i16> %b, i32 *%ccptr) { ++; CHECK-LABEL: test_vfeehs: ++; CHECK: vfeehs %v24, %v24, %v26 ++; CHECK: ipm [[REG:%r[0-5]]] ++; CHECK: srl [[REG]], 28 ++; CHECK: st [[REG]], 0(%r2) ++; CHECK: br %r14 ++ %call = call {<8 x i16>, i32} @llvm.s390.vfeehs(<8 x i16> %a, <8 x i16> %b) ++ %res = extractvalue {<8 x i16>, i32} %call, 0 ++ %cc = extractvalue {<8 x i16>, i32} %call, 1 ++ store i32 %cc, i32 *%ccptr ++ ret <8 x i16> %res ++} ++ ++; VFEEFS. ++define <4 x i32> @test_vfeefs(<4 x i32> %a, <4 x i32> %b, i32 *%ccptr) { ++; CHECK-LABEL: test_vfeefs: ++; CHECK: vfeefs %v24, %v24, %v26 ++; CHECK: ipm [[REG:%r[0-5]]] ++; CHECK: srl [[REG]], 28 ++; CHECK: st [[REG]], 0(%r2) ++; CHECK: br %r14 ++ %call = call {<4 x i32>, i32} @llvm.s390.vfeefs(<4 x i32> %a, <4 x i32> %b) ++ %res = extractvalue {<4 x i32>, i32} %call, 0 ++ %cc = extractvalue {<4 x i32>, i32} %call, 1 ++ store i32 %cc, i32 *%ccptr ++ ret <4 x i32> %res ++} ++ ++; VFEEZB. ++define <16 x i8> @test_vfeezb(<16 x i8> %a, <16 x i8> %b) { ++; CHECK-LABEL: test_vfeezb: ++; CHECK: vfeezb %v24, %v24, %v26 ++; CHECK: br %r14 ++ %res = call <16 x i8> @llvm.s390.vfeezb(<16 x i8> %a, <16 x i8> %b) ++ ret <16 x i8> %res ++} ++ ++; VFEEZH. ++define <8 x i16> @test_vfeezh(<8 x i16> %a, <8 x i16> %b) { ++; CHECK-LABEL: test_vfeezh: ++; CHECK: vfeezh %v24, %v24, %v26 ++; CHECK: br %r14 ++ %res = call <8 x i16> @llvm.s390.vfeezh(<8 x i16> %a, <8 x i16> %b) ++ ret <8 x i16> %res ++} ++ ++; VFEEZF. ++define <4 x i32> @test_vfeezf(<4 x i32> %a, <4 x i32> %b) { ++; CHECK-LABEL: test_vfeezf: ++; CHECK: vfeezf %v24, %v24, %v26 ++; CHECK: br %r14 ++ %res = call <4 x i32> @llvm.s390.vfeezf(<4 x i32> %a, <4 x i32> %b) ++ ret <4 x i32> %res ++} ++ ++; VFEEZBS. ++define <16 x i8> @test_vfeezbs(<16 x i8> %a, <16 x i8> %b, i32 *%ccptr) { ++; CHECK-LABEL: test_vfeezbs: ++; CHECK: vfeezbs %v24, %v24, %v26 ++; CHECK: ipm [[REG:%r[0-5]]] ++; CHECK: srl [[REG]], 28 ++; CHECK: st [[REG]], 0(%r2) ++; CHECK: br %r14 ++ %call = call {<16 x i8>, i32} @llvm.s390.vfeezbs(<16 x i8> %a, <16 x i8> %b) ++ %res = extractvalue {<16 x i8>, i32} %call, 0 ++ %cc = extractvalue {<16 x i8>, i32} %call, 1 ++ store i32 %cc, i32 *%ccptr ++ ret <16 x i8> %res ++} ++ ++; VFEEZHS. ++define <8 x i16> @test_vfeezhs(<8 x i16> %a, <8 x i16> %b, i32 *%ccptr) { ++; CHECK-LABEL: test_vfeezhs: ++; CHECK: vfeezhs %v24, %v24, %v26 ++; CHECK: ipm [[REG:%r[0-5]]] ++; CHECK: srl [[REG]], 28 ++; CHECK: st [[REG]], 0(%r2) ++; CHECK: br %r14 ++ %call = call {<8 x i16>, i32} @llvm.s390.vfeezhs(<8 x i16> %a, <8 x i16> %b) ++ %res = extractvalue {<8 x i16>, i32} %call, 0 ++ %cc = extractvalue {<8 x i16>, i32} %call, 1 ++ store i32 %cc, i32 *%ccptr ++ ret <8 x i16> %res ++} ++ ++; VFEEZFS. ++define <4 x i32> @test_vfeezfs(<4 x i32> %a, <4 x i32> %b, i32 *%ccptr) { ++; CHECK-LABEL: test_vfeezfs: ++; CHECK: vfeezfs %v24, %v24, %v26 ++; CHECK: ipm [[REG:%r[0-5]]] ++; CHECK: srl [[REG]], 28 ++; CHECK: st [[REG]], 0(%r2) ++; CHECK: br %r14 ++ %call = call {<4 x i32>, i32} @llvm.s390.vfeezfs(<4 x i32> %a, <4 x i32> %b) ++ %res = extractvalue {<4 x i32>, i32} %call, 0 ++ %cc = extractvalue {<4 x i32>, i32} %call, 1 ++ store i32 %cc, i32 *%ccptr ++ ret <4 x i32> %res ++} ++ ++; VFENEB. ++define <16 x i8> @test_vfeneb_0(<16 x i8> %a, <16 x i8> %b) { ++; CHECK-LABEL: test_vfeneb_0: ++; CHECK: vfeneb %v24, %v24, %v26 ++; CHECK: br %r14 ++ %res = call <16 x i8> @llvm.s390.vfeneb(<16 x i8> %a, <16 x i8> %b) ++ ret <16 x i8> %res ++} ++ ++; VFENEH. ++define <8 x i16> @test_vfeneh(<8 x i16> %a, <8 x i16> %b) { ++; CHECK-LABEL: test_vfeneh: ++; CHECK: vfeneh %v24, %v24, %v26 ++; CHECK: br %r14 ++ %res = call <8 x i16> @llvm.s390.vfeneh(<8 x i16> %a, <8 x i16> %b) ++ ret <8 x i16> %res ++} ++ ++; VFENEF. ++define <4 x i32> @test_vfenef(<4 x i32> %a, <4 x i32> %b) { ++; CHECK-LABEL: test_vfenef: ++; CHECK: vfenef %v24, %v24, %v26 ++; CHECK: br %r14 ++ %res = call <4 x i32> @llvm.s390.vfenef(<4 x i32> %a, <4 x i32> %b) ++ ret <4 x i32> %res ++} ++ ++; VFENEBS. ++define <16 x i8> @test_vfenebs(<16 x i8> %a, <16 x i8> %b, i32 *%ccptr) { ++; CHECK-LABEL: test_vfenebs: ++; CHECK: vfenebs %v24, %v24, %v26 ++; CHECK: ipm [[REG:%r[0-5]]] ++; CHECK: srl [[REG]], 28 ++; CHECK: st [[REG]], 0(%r2) ++; CHECK: br %r14 ++ %call = call {<16 x i8>, i32} @llvm.s390.vfenebs(<16 x i8> %a, <16 x i8> %b) ++ %res = extractvalue {<16 x i8>, i32} %call, 0 ++ %cc = extractvalue {<16 x i8>, i32} %call, 1 ++ store i32 %cc, i32 *%ccptr ++ ret <16 x i8> %res ++} ++ ++; VFENEHS. ++define <8 x i16> @test_vfenehs(<8 x i16> %a, <8 x i16> %b, i32 *%ccptr) { ++; CHECK-LABEL: test_vfenehs: ++; CHECK: vfenehs %v24, %v24, %v26 ++; CHECK: ipm [[REG:%r[0-5]]] ++; CHECK: srl [[REG]], 28 ++; CHECK: st [[REG]], 0(%r2) ++; CHECK: br %r14 ++ %call = call {<8 x i16>, i32} @llvm.s390.vfenehs(<8 x i16> %a, <8 x i16> %b) ++ %res = extractvalue {<8 x i16>, i32} %call, 0 ++ %cc = extractvalue {<8 x i16>, i32} %call, 1 ++ store i32 %cc, i32 *%ccptr ++ ret <8 x i16> %res ++} ++ ++; VFENEFS. ++define <4 x i32> @test_vfenefs(<4 x i32> %a, <4 x i32> %b, i32 *%ccptr) { ++; CHECK-LABEL: test_vfenefs: ++; CHECK: vfenefs %v24, %v24, %v26 ++; CHECK: ipm [[REG:%r[0-5]]] ++; CHECK: srl [[REG]], 28 ++; CHECK: st [[REG]], 0(%r2) ++; CHECK: br %r14 ++ %call = call {<4 x i32>, i32} @llvm.s390.vfenefs(<4 x i32> %a, <4 x i32> %b) ++ %res = extractvalue {<4 x i32>, i32} %call, 0 ++ %cc = extractvalue {<4 x i32>, i32} %call, 1 ++ store i32 %cc, i32 *%ccptr ++ ret <4 x i32> %res ++} ++ ++; VFENEZB. ++define <16 x i8> @test_vfenezb(<16 x i8> %a, <16 x i8> %b) { ++; CHECK-LABEL: test_vfenezb: ++; CHECK: vfenezb %v24, %v24, %v26 ++; CHECK: br %r14 ++ %res = call <16 x i8> @llvm.s390.vfenezb(<16 x i8> %a, <16 x i8> %b) ++ ret <16 x i8> %res ++} ++ ++; VFENEZH. ++define <8 x i16> @test_vfenezh(<8 x i16> %a, <8 x i16> %b) { ++; CHECK-LABEL: test_vfenezh: ++; CHECK: vfenezh %v24, %v24, %v26 ++; CHECK: br %r14 ++ %res = call <8 x i16> @llvm.s390.vfenezh(<8 x i16> %a, <8 x i16> %b) ++ ret <8 x i16> %res ++} ++ ++; VFENEZF. ++define <4 x i32> @test_vfenezf(<4 x i32> %a, <4 x i32> %b) { ++; CHECK-LABEL: test_vfenezf: ++; CHECK: vfenezf %v24, %v24, %v26 ++; CHECK: br %r14 ++ %res = call <4 x i32> @llvm.s390.vfenezf(<4 x i32> %a, <4 x i32> %b) ++ ret <4 x i32> %res ++} ++ ++; VFENEZBS. ++define <16 x i8> @test_vfenezbs(<16 x i8> %a, <16 x i8> %b, i32 *%ccptr) { ++; CHECK-LABEL: test_vfenezbs: ++; CHECK: vfenezbs %v24, %v24, %v26 ++; CHECK: ipm [[REG:%r[0-5]]] ++; CHECK: srl [[REG]], 28 ++; CHECK: st [[REG]], 0(%r2) ++; CHECK: br %r14 ++ %call = call {<16 x i8>, i32} @llvm.s390.vfenezbs(<16 x i8> %a, <16 x i8> %b) ++ %res = extractvalue {<16 x i8>, i32} %call, 0 ++ %cc = extractvalue {<16 x i8>, i32} %call, 1 ++ store i32 %cc, i32 *%ccptr ++ ret <16 x i8> %res ++} ++ ++; VFENEZHS. ++define <8 x i16> @test_vfenezhs(<8 x i16> %a, <8 x i16> %b, i32 *%ccptr) { ++; CHECK-LABEL: test_vfenezhs: ++; CHECK: vfenezhs %v24, %v24, %v26 ++; CHECK: ipm [[REG:%r[0-5]]] ++; CHECK: srl [[REG]], 28 ++; CHECK: st [[REG]], 0(%r2) ++; CHECK: br %r14 ++ %call = call {<8 x i16>, i32} @llvm.s390.vfenezhs(<8 x i16> %a, <8 x i16> %b) ++ %res = extractvalue {<8 x i16>, i32} %call, 0 ++ %cc = extractvalue {<8 x i16>, i32} %call, 1 ++ store i32 %cc, i32 *%ccptr ++ ret <8 x i16> %res ++} ++ ++; VFENEZFS. ++define <4 x i32> @test_vfenezfs(<4 x i32> %a, <4 x i32> %b, i32 *%ccptr) { ++; CHECK-LABEL: test_vfenezfs: ++; CHECK: vfenezfs %v24, %v24, %v26 ++; CHECK: ipm [[REG:%r[0-5]]] ++; CHECK: srl [[REG]], 28 ++; CHECK: st [[REG]], 0(%r2) ++; CHECK: br %r14 ++ %call = call {<4 x i32>, i32} @llvm.s390.vfenezfs(<4 x i32> %a, <4 x i32> %b) ++ %res = extractvalue {<4 x i32>, i32} %call, 0 ++ %cc = extractvalue {<4 x i32>, i32} %call, 1 ++ store i32 %cc, i32 *%ccptr ++ ret <4 x i32> %res ++} ++ ++; VISTRB. ++define <16 x i8> @test_vistrb(<16 x i8> %a) { ++; CHECK-LABEL: test_vistrb: ++; CHECK: vistrb %v24, %v24 ++; CHECK: br %r14 ++ %res = call <16 x i8> @llvm.s390.vistrb(<16 x i8> %a) ++ ret <16 x i8> %res ++} ++ ++; VISTRH. ++define <8 x i16> @test_vistrh(<8 x i16> %a) { ++; CHECK-LABEL: test_vistrh: ++; CHECK: vistrh %v24, %v24 ++; CHECK: br %r14 ++ %res = call <8 x i16> @llvm.s390.vistrh(<8 x i16> %a) ++ ret <8 x i16> %res ++} ++ ++; VISTRF. ++define <4 x i32> @test_vistrf(<4 x i32> %a) { ++; CHECK-LABEL: test_vistrf: ++; CHECK: vistrf %v24, %v24 ++; CHECK: br %r14 ++ %res = call <4 x i32> @llvm.s390.vistrf(<4 x i32> %a) ++ ret <4 x i32> %res ++} ++ ++; VISTRBS. ++define <16 x i8> @test_vistrbs(<16 x i8> %a, i32 *%ccptr) { ++; CHECK-LABEL: test_vistrbs: ++; CHECK: vistrbs %v24, %v24 ++; CHECK: ipm [[REG:%r[0-5]]] ++; CHECK: srl [[REG]], 28 ++; CHECK: st [[REG]], 0(%r2) ++; CHECK: br %r14 ++ %call = call {<16 x i8>, i32} @llvm.s390.vistrbs(<16 x i8> %a) ++ %res = extractvalue {<16 x i8>, i32} %call, 0 ++ %cc = extractvalue {<16 x i8>, i32} %call, 1 ++ store i32 %cc, i32 *%ccptr ++ ret <16 x i8> %res ++} ++ ++; VISTRHS. ++define <8 x i16> @test_vistrhs(<8 x i16> %a, i32 *%ccptr) { ++; CHECK-LABEL: test_vistrhs: ++; CHECK: vistrhs %v24, %v24 ++; CHECK: ipm [[REG:%r[0-5]]] ++; CHECK: srl [[REG]], 28 ++; CHECK: st [[REG]], 0(%r2) ++; CHECK: br %r14 ++ %call = call {<8 x i16>, i32} @llvm.s390.vistrhs(<8 x i16> %a) ++ %res = extractvalue {<8 x i16>, i32} %call, 0 ++ %cc = extractvalue {<8 x i16>, i32} %call, 1 ++ store i32 %cc, i32 *%ccptr ++ ret <8 x i16> %res ++} ++ ++; VISTRFS. ++define <4 x i32> @test_vistrfs(<4 x i32> %a, i32 *%ccptr) { ++; CHECK-LABEL: test_vistrfs: ++; CHECK: vistrfs %v24, %v24 ++; CHECK: ipm [[REG:%r[0-5]]] ++; CHECK: srl [[REG]], 28 ++; CHECK: st [[REG]], 0(%r2) ++; CHECK: br %r14 ++ %call = call {<4 x i32>, i32} @llvm.s390.vistrfs(<4 x i32> %a) ++ %res = extractvalue {<4 x i32>, i32} %call, 0 ++ %cc = extractvalue {<4 x i32>, i32} %call, 1 ++ store i32 %cc, i32 *%ccptr ++ ret <4 x i32> %res ++} ++ ++; VSTRCB with !IN !RT. ++define <16 x i8> @test_vstrcb_0(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) { ++; CHECK-LABEL: test_vstrcb_0: ++; CHECK: vstrcb %v24, %v24, %v26, %v28, 0 ++; CHECK: br %r14 ++ %res = call <16 x i8> @llvm.s390.vstrcb(<16 x i8> %a, <16 x i8> %b, ++ <16 x i8> %c, i32 0) ++ ret <16 x i8> %res ++} ++ ++; VSTRCB with !IN RT. ++define <16 x i8> @test_vstrcb_4(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) { ++; CHECK-LABEL: test_vstrcb_4: ++; CHECK: vstrcb %v24, %v24, %v26, %v28, 4 ++; CHECK: br %r14 ++ %res = call <16 x i8> @llvm.s390.vstrcb(<16 x i8> %a, <16 x i8> %b, ++ <16 x i8> %c, i32 4) ++ ret <16 x i8> %res ++} ++ ++; VSTRCB with IN !RT. ++define <16 x i8> @test_vstrcb_8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) { ++; CHECK-LABEL: test_vstrcb_8: ++; CHECK: vstrcb %v24, %v24, %v26, %v28, 8 ++; CHECK: br %r14 ++ %res = call <16 x i8> @llvm.s390.vstrcb(<16 x i8> %a, <16 x i8> %b, ++ <16 x i8> %c, i32 8) ++ ret <16 x i8> %res ++} ++ ++; VSTRCB with IN RT. ++define <16 x i8> @test_vstrcb_12(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) { ++; CHECK-LABEL: test_vstrcb_12: ++; CHECK: vstrcb %v24, %v24, %v26, %v28, 12 ++; CHECK: br %r14 ++ %res = call <16 x i8> @llvm.s390.vstrcb(<16 x i8> %a, <16 x i8> %b, ++ <16 x i8> %c, i32 12) ++ ret <16 x i8> %res ++} ++ ++; VSTRCB with CS -- should be ignored. ++define <16 x i8> @test_vstrcb_1(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) { ++; CHECK-LABEL: test_vstrcb_1: ++; CHECK: vstrcb %v24, %v24, %v26, %v28, 0 ++; CHECK: br %r14 ++ %res = call <16 x i8> @llvm.s390.vstrcb(<16 x i8> %a, <16 x i8> %b, ++ <16 x i8> %c, i32 1) ++ ret <16 x i8> %res ++} ++ ++; VSTRCH. ++define <8 x i16> @test_vstrch(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) { ++; CHECK-LABEL: test_vstrch: ++; CHECK: vstrch %v24, %v24, %v26, %v28, 4 ++; CHECK: br %r14 ++ %res = call <8 x i16> @llvm.s390.vstrch(<8 x i16> %a, <8 x i16> %b, ++ <8 x i16> %c, i32 4) ++ ret <8 x i16> %res ++} ++ ++; VSTRCF. ++define <4 x i32> @test_vstrcf(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ++; CHECK-LABEL: test_vstrcf: ++; CHECK: vstrcf %v24, %v24, %v26, %v28, 8 ++; CHECK: br %r14 ++ %res = call <4 x i32> @llvm.s390.vstrcf(<4 x i32> %a, <4 x i32> %b, ++ <4 x i32> %c, i32 8) ++ ret <4 x i32> %res ++} ++ ++; VSTRCBS. ++define <16 x i8> @test_vstrcbs(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, ++ i32 *%ccptr) { ++; CHECK-LABEL: test_vstrcbs: ++; CHECK: vstrcbs %v24, %v24, %v26, %v28, 0 ++; CHECK: ipm [[REG:%r[0-5]]] ++; CHECK: srl [[REG]], 28 ++; CHECK: st [[REG]], 0(%r2) ++; CHECK: br %r14 ++ %call = call {<16 x i8>, i32} @llvm.s390.vstrcbs(<16 x i8> %a, <16 x i8> %b, ++ <16 x i8> %c, i32 0) ++ %res = extractvalue {<16 x i8>, i32} %call, 0 ++ %cc = extractvalue {<16 x i8>, i32} %call, 1 ++ store i32 %cc, i32 *%ccptr ++ ret <16 x i8> %res ++} ++ ++; VSTRCHS. ++define <8 x i16> @test_vstrchs(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c, ++ i32 *%ccptr) { ++; CHECK-LABEL: test_vstrchs: ++; CHECK: vstrchs %v24, %v24, %v26, %v28, 4 ++; CHECK: ipm [[REG:%r[0-5]]] ++; CHECK: srl [[REG]], 28 ++; CHECK: st [[REG]], 0(%r2) ++; CHECK: br %r14 ++ %call = call {<8 x i16>, i32} @llvm.s390.vstrchs(<8 x i16> %a, <8 x i16> %b, ++ <8 x i16> %c, i32 4) ++ %res = extractvalue {<8 x i16>, i32} %call, 0 ++ %cc = extractvalue {<8 x i16>, i32} %call, 1 ++ store i32 %cc, i32 *%ccptr ++ ret <8 x i16> %res ++} ++ ++; VSTRCFS. ++define <4 x i32> @test_vstrcfs(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, ++ i32 *%ccptr) { ++; CHECK-LABEL: test_vstrcfs: ++; CHECK: vstrcfs %v24, %v24, %v26, %v28, 8 ++; CHECK: ipm [[REG:%r[0-5]]] ++; CHECK: srl [[REG]], 28 ++; CHECK: st [[REG]], 0(%r2) ++; CHECK: br %r14 ++ %call = call {<4 x i32>, i32} @llvm.s390.vstrcfs(<4 x i32> %a, <4 x i32> %b, ++ <4 x i32> %c, i32 8) ++ %res = extractvalue {<4 x i32>, i32} %call, 0 ++ %cc = extractvalue {<4 x i32>, i32} %call, 1 ++ store i32 %cc, i32 *%ccptr ++ ret <4 x i32> %res ++} ++ ++; VSTRCZB with !IN !RT. ++define <16 x i8> @test_vstrczb_0(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) { ++; CHECK-LABEL: test_vstrczb_0: ++; CHECK: vstrczb %v24, %v24, %v26, %v28, 0 ++; CHECK: br %r14 ++ %res = call <16 x i8> @llvm.s390.vstrczb(<16 x i8> %a, <16 x i8> %b, ++ <16 x i8> %c, i32 0) ++ ret <16 x i8> %res ++} ++ ++; VSTRCZB with !IN RT. ++define <16 x i8> @test_vstrczb_4(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) { ++; CHECK-LABEL: test_vstrczb_4: ++; CHECK: vstrczb %v24, %v24, %v26, %v28, 4 ++; CHECK: br %r14 ++ %res = call <16 x i8> @llvm.s390.vstrczb(<16 x i8> %a, <16 x i8> %b, ++ <16 x i8> %c, i32 4) ++ ret <16 x i8> %res ++} ++ ++; VSTRCZB with IN !RT. ++define <16 x i8> @test_vstrczb_8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) { ++; CHECK-LABEL: test_vstrczb_8: ++; CHECK: vstrczb %v24, %v24, %v26, %v28, 8 ++; CHECK: br %r14 ++ %res = call <16 x i8> @llvm.s390.vstrczb(<16 x i8> %a, <16 x i8> %b, ++ <16 x i8> %c, i32 8) ++ ret <16 x i8> %res ++} ++ ++; VSTRCZB with IN RT. ++define <16 x i8> @test_vstrczb_12(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) { ++; CHECK-LABEL: test_vstrczb_12: ++; CHECK: vstrczb %v24, %v24, %v26, %v28, 12 ++; CHECK: br %r14 ++ %res = call <16 x i8> @llvm.s390.vstrczb(<16 x i8> %a, <16 x i8> %b, ++ <16 x i8> %c, i32 12) ++ ret <16 x i8> %res ++} ++ ++; VSTRCZB with CS -- should be ignored. ++define <16 x i8> @test_vstrczb_1(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) { ++; CHECK-LABEL: test_vstrczb_1: ++; CHECK: vstrczb %v24, %v24, %v26, %v28, 0 ++; CHECK: br %r14 ++ %res = call <16 x i8> @llvm.s390.vstrczb(<16 x i8> %a, <16 x i8> %b, ++ <16 x i8> %c, i32 1) ++ ret <16 x i8> %res ++} ++ ++; VSTRCZH. ++define <8 x i16> @test_vstrczh(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) { ++; CHECK-LABEL: test_vstrczh: ++; CHECK: vstrczh %v24, %v24, %v26, %v28, 4 ++; CHECK: br %r14 ++ %res = call <8 x i16> @llvm.s390.vstrczh(<8 x i16> %a, <8 x i16> %b, ++ <8 x i16> %c, i32 4) ++ ret <8 x i16> %res ++} ++ ++; VSTRCZF. ++define <4 x i32> @test_vstrczf(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ++; CHECK-LABEL: test_vstrczf: ++; CHECK: vstrczf %v24, %v24, %v26, %v28, 8 ++; CHECK: br %r14 ++ %res = call <4 x i32> @llvm.s390.vstrczf(<4 x i32> %a, <4 x i32> %b, ++ <4 x i32> %c, i32 8) ++ ret <4 x i32> %res ++} ++ ++; VSTRCZBS. ++define <16 x i8> @test_vstrczbs(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c, ++ i32 *%ccptr) { ++; CHECK-LABEL: test_vstrczbs: ++; CHECK: vstrczbs %v24, %v24, %v26, %v28, 0 ++; CHECK: ipm [[REG:%r[0-5]]] ++; CHECK: srl [[REG]], 28 ++; CHECK: st [[REG]], 0(%r2) ++; CHECK: br %r14 ++ %call = call {<16 x i8>, i32} @llvm.s390.vstrczbs(<16 x i8> %a, <16 x i8> %b, ++ <16 x i8> %c, i32 0) ++ %res = extractvalue {<16 x i8>, i32} %call, 0 ++ %cc = extractvalue {<16 x i8>, i32} %call, 1 ++ store i32 %cc, i32 *%ccptr ++ ret <16 x i8> %res ++} ++ ++; VSTRCZHS. ++define <8 x i16> @test_vstrczhs(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c, ++ i32 *%ccptr) { ++; CHECK-LABEL: test_vstrczhs: ++; CHECK: vstrczhs %v24, %v24, %v26, %v28, 4 ++; CHECK: ipm [[REG:%r[0-5]]] ++; CHECK: srl [[REG]], 28 ++; CHECK: st [[REG]], 0(%r2) ++; CHECK: br %r14 ++ %call = call {<8 x i16>, i32} @llvm.s390.vstrczhs(<8 x i16> %a, <8 x i16> %b, ++ <8 x i16> %c, i32 4) ++ %res = extractvalue {<8 x i16>, i32} %call, 0 ++ %cc = extractvalue {<8 x i16>, i32} %call, 1 ++ store i32 %cc, i32 *%ccptr ++ ret <8 x i16> %res ++} ++ ++; VSTRCZFS. ++define <4 x i32> @test_vstrczfs(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c, ++ i32 *%ccptr) { ++; CHECK-LABEL: test_vstrczfs: ++; CHECK: vstrczfs %v24, %v24, %v26, %v28, 8 ++; CHECK: ipm [[REG:%r[0-5]]] ++; CHECK: srl [[REG]], 28 ++; CHECK: st [[REG]], 0(%r2) ++; CHECK: br %r14 ++ %call = call {<4 x i32>, i32} @llvm.s390.vstrczfs(<4 x i32> %a, <4 x i32> %b, ++ <4 x i32> %c, i32 8) ++ %res = extractvalue {<4 x i32>, i32} %call, 0 ++ %cc = extractvalue {<4 x i32>, i32} %call, 1 ++ store i32 %cc, i32 *%ccptr ++ ret <4 x i32> %res ++} ++ ++; VFCEDBS with no processing of the result. ++define i32 @test_vfcedbs(<2 x double> %a, <2 x double> %b) { ++; CHECK-LABEL: test_vfcedbs: ++; CHECK: vfcedbs {{%v[0-9]+}}, %v24, %v26 ++; CHECK: ipm %r2 ++; CHECK: srl %r2, 28 ++; CHECK: br %r14 ++ %call = call {<2 x i64>, i32} @llvm.s390.vfcedbs(<2 x double> %a, ++ <2 x double> %b) ++ %res = extractvalue {<2 x i64>, i32} %call, 1 ++ ret i32 %res ++} ++ ++; VFCEDBS, returning 1 if any elements are equal (CC != 3). ++define i32 @test_vfcedbs_any_bool(<2 x double> %a, <2 x double> %b) { ++; CHECK-LABEL: test_vfcedbs_any_bool: ++; CHECK: vfcedbs {{%v[0-9]+}}, %v24, %v26 ++; CHECK: ipm %r2 ++; CHECK: afi %r2, -536870912 ++; CHECK: srl %r2, 31 ++; CHECK: br %r14 ++ %call = call {<2 x i64>, i32} @llvm.s390.vfcedbs(<2 x double> %a, ++ <2 x double> %b) ++ %res = extractvalue {<2 x i64>, i32} %call, 1 ++ %cmp = icmp ne i32 %res, 3 ++ %ext = zext i1 %cmp to i32 ++ ret i32 %ext ++} ++ ++; VFCEDBS, storing to %ptr if any elements are equal. ++define <2 x i64> @test_vfcedbs_any_store(<2 x double> %a, <2 x double> %b, ++ i32 *%ptr) { ++; CHECK-LABEL: test_vfcedbs_any_store: ++; CHECK-NOT: %r ++; CHECK: vfcedbs %v24, %v24, %v26 ++; CHECK-NEXT: {{jo|jnle}} {{\.L*}} ++; CHECK: mvhi 0(%r2), 0 ++; CHECK: br %r14 ++ %call = call {<2 x i64>, i32} @llvm.s390.vfcedbs(<2 x double> %a, ++ <2 x double> %b) ++ %res = extractvalue {<2 x i64>, i32} %call, 0 ++ %cc = extractvalue {<2 x i64>, i32} %call, 1 ++ %cmp = icmp ule i32 %cc, 2 ++ br i1 %cmp, label %store, label %exit ++ ++store: ++ store i32 0, i32 *%ptr ++ br label %exit ++ ++exit: ++ ret <2 x i64> %res ++} ++ ++; VFCHDBS with no processing of the result. ++define i32 @test_vfchdbs(<2 x double> %a, <2 x double> %b) { ++; CHECK-LABEL: test_vfchdbs: ++; CHECK: vfchdbs {{%v[0-9]+}}, %v24, %v26 ++; CHECK: ipm %r2 ++; CHECK: srl %r2, 28 ++; CHECK: br %r14 ++ %call = call {<2 x i64>, i32} @llvm.s390.vfchdbs(<2 x double> %a, ++ <2 x double> %b) ++ %res = extractvalue {<2 x i64>, i32} %call, 1 ++ ret i32 %res ++} ++ ++; VFCHDBS, returning 1 if not all elements are higher. ++define i32 @test_vfchdbs_notall_bool(<2 x double> %a, <2 x double> %b) { ++; CHECK-LABEL: test_vfchdbs_notall_bool: ++; CHECK: vfchdbs {{%v[0-9]+}}, %v24, %v26 ++; CHECK: ipm [[REG:%r[0-5]]] ++; CHECK: risblg %r2, [[REG]], 31, 159, 36 ++; CHECK: br %r14 ++ %call = call {<2 x i64>, i32} @llvm.s390.vfchdbs(<2 x double> %a, ++ <2 x double> %b) ++ %res = extractvalue {<2 x i64>, i32} %call, 1 ++ %cmp = icmp sge i32 %res, 1 ++ %ext = zext i1 %cmp to i32 ++ ret i32 %ext ++} ++ ++; VFCHDBS, storing to %ptr if not all elements are higher. ++define <2 x i64> @test_vfchdbs_notall_store(<2 x double> %a, <2 x double> %b, ++ i32 *%ptr) { ++; CHECK-LABEL: test_vfchdbs_notall_store: ++; CHECK-NOT: %r ++; CHECK: vfchdbs %v24, %v24, %v26 ++; CHECK-NEXT: {{jhe|je}} {{\.L*}} ++; CHECK: mvhi 0(%r2), 0 ++; CHECK: br %r14 ++ %call = call {<2 x i64>, i32} @llvm.s390.vfchdbs(<2 x double> %a, ++ <2 x double> %b) ++ %res = extractvalue {<2 x i64>, i32} %call, 0 ++ %cc = extractvalue {<2 x i64>, i32} %call, 1 ++ %cmp = icmp ugt i32 %cc, 0 ++ br i1 %cmp, label %store, label %exit ++ ++store: ++ store i32 0, i32 *%ptr ++ br label %exit ++ ++exit: ++ ret <2 x i64> %res ++} ++ ++; VFCHEDBS with no processing of the result. ++define i32 @test_vfchedbs(<2 x double> %a, <2 x double> %b) { ++; CHECK-LABEL: test_vfchedbs: ++; CHECK: vfchedbs {{%v[0-9]+}}, %v24, %v26 ++; CHECK: ipm %r2 ++; CHECK: srl %r2, 28 ++; CHECK: br %r14 ++ %call = call {<2 x i64>, i32} @llvm.s390.vfchedbs(<2 x double> %a, ++ <2 x double> %b) ++ %res = extractvalue {<2 x i64>, i32} %call, 1 ++ ret i32 %res ++} ++ ++; VFCHEDBS, returning 1 if neither element is higher or equal. ++define i32 @test_vfchedbs_none_bool(<2 x double> %a, <2 x double> %b) { ++; CHECK-LABEL: test_vfchedbs_none_bool: ++; CHECK: vfchedbs {{%v[0-9]+}}, %v24, %v26 ++; CHECK: ipm [[REG:%r[0-5]]] ++; CHECK: risblg %r2, [[REG]], 31, 159, 35 ++; CHECK: br %r14 ++ %call = call {<2 x i64>, i32} @llvm.s390.vfchedbs(<2 x double> %a, ++ <2 x double> %b) ++ %res = extractvalue {<2 x i64>, i32} %call, 1 ++ %cmp = icmp eq i32 %res, 3 ++ %ext = zext i1 %cmp to i32 ++ ret i32 %ext ++} ++ ++; VFCHEDBS, storing to %ptr if neither element is higher or equal. ++define <2 x i64> @test_vfchedbs_none_store(<2 x double> %a, <2 x double> %b, ++ i32 *%ptr) { ++; CHECK-LABEL: test_vfchedbs_none_store: ++; CHECK-NOT: %r ++; CHECK: vfchedbs %v24, %v24, %v26 ++; CHECK-NEXT: {{jno|jle}} {{\.L*}} ++; CHECK: mvhi 0(%r2), 0 ++; CHECK: br %r14 ++ %call = call {<2 x i64>, i32} @llvm.s390.vfchedbs(<2 x double> %a, ++ <2 x double> %b) ++ %res = extractvalue {<2 x i64>, i32} %call, 0 ++ %cc = extractvalue {<2 x i64>, i32} %call, 1 ++ %cmp = icmp uge i32 %cc, 3 ++ br i1 %cmp, label %store, label %exit ++ ++store: ++ store i32 0, i32 *%ptr ++ br label %exit ++ ++exit: ++ ret <2 x i64> %res ++} ++ ++; VFTCIDB with the lowest useful class selector and no processing of the result. ++define i32 @test_vftcidb(<2 x double> %a) { ++; CHECK-LABEL: test_vftcidb: ++; CHECK: vftcidb {{%v[0-9]+}}, %v24, 1 ++; CHECK: ipm %r2 ++; CHECK: srl %r2, 28 ++; CHECK: br %r14 ++ %call = call {<2 x i64>, i32} @llvm.s390.vftcidb(<2 x double> %a, i32 1) ++ %res = extractvalue {<2 x i64>, i32} %call, 1 ++ ret i32 %res ++} ++ ++; VFTCIDB with the highest useful class selector, returning 1 if all elements ++; have the right class (CC == 0). ++define i32 @test_vftcidb_all_bool(<2 x double> %a) { ++; CHECK-LABEL: test_vftcidb_all_bool: ++; CHECK: vftcidb {{%v[0-9]+}}, %v24, 4094 ++; CHECK: afi %r2, -268435456 ++; CHECK: srl %r2, 31 ++; CHECK: br %r14 ++ %call = call {<2 x i64>, i32} @llvm.s390.vftcidb(<2 x double> %a, i32 4094) ++ %res = extractvalue {<2 x i64>, i32} %call, 1 ++ %cmp = icmp eq i32 %res, 0 ++ %ext = zext i1 %cmp to i32 ++ ret i32 %ext ++} ++ ++; VFIDB with a rounding mode not usable via standard intrinsics. ++define <2 x double> @test_vfidb_0_4(<2 x double> %a) { ++; CHECK-LABEL: test_vfidb_0_4: ++; CHECK: vfidb %v24, %v24, 0, 4 ++; CHECK: br %r14 ++ %res = call <2 x double> @llvm.s390.vfidb(<2 x double> %a, i32 0, i32 4) ++ ret <2 x double> %res ++} ++ ++; VFIDB with IEEE-inexact exception suppressed. ++define <2 x double> @test_vfidb_4_0(<2 x double> %a) { ++; CHECK-LABEL: test_vfidb_4_0: ++; CHECK: vfidb %v24, %v24, 4, 0 ++; CHECK: br %r14 ++ %res = call <2 x double> @llvm.s390.vfidb(<2 x double> %a, i32 4, i32 0) ++ ret <2 x double> %res ++} ++ +Index: llvm-36/test/CodeGen/SystemZ/vec-log-01.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-log-01.ll +@@ -0,0 +1,15 @@ ++; Test v2f64 logarithm. ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ++ ++declare <2 x double> @llvm.log.v2f64(<2 x double>) ++ ++define <2 x double> @f1(<2 x double> %val) { ++; CHECK-LABEL: f1: ++; CHECK: brasl %r14, log@PLT ++; CHECK: brasl %r14, log@PLT ++; CHECK: vmrhg %v24, ++; CHECK: br %r14 ++ %ret = call <2 x double> @llvm.log.v2f64(<2 x double> %val) ++ ret <2 x double> %ret ++} +Index: llvm-36/test/CodeGen/SystemZ/vec-max-01.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-max-01.ll +@@ -0,0 +1,83 @@ ++; Test v16i8 maximum. ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ++ ++; Test with slt. ++define <16 x i8> @f1(<16 x i8> %val1, <16 x i8> %val2) { ++; CHECK-LABEL: f1: ++; CHECK: vmxb %v24, {{%v24, %v26|%v26, %v24}} ++; CHECK: br %r14 ++ %cmp = icmp slt <16 x i8> %val1, %val2 ++ %ret = select <16 x i1> %cmp, <16 x i8> %val2, <16 x i8> %val1 ++ ret <16 x i8> %ret ++} ++ ++; Test with sle. ++define <16 x i8> @f2(<16 x i8> %val1, <16 x i8> %val2) { ++; CHECK-LABEL: f2: ++; CHECK: vmxb %v24, {{%v24, %v26|%v26, %v24}} ++; CHECK: br %r14 ++ %cmp = icmp sle <16 x i8> %val1, %val2 ++ %ret = select <16 x i1> %cmp, <16 x i8> %val2, <16 x i8> %val1 ++ ret <16 x i8> %ret ++} ++ ++; Test with sgt. ++define <16 x i8> @f3(<16 x i8> %val1, <16 x i8> %val2) { ++; CHECK-LABEL: f3: ++; CHECK: vmxb %v24, {{%v24, %v26|%v26, %v24}} ++; CHECK: br %r14 ++ %cmp = icmp sgt <16 x i8> %val1, %val2 ++ %ret = select <16 x i1> %cmp, <16 x i8> %val1, <16 x i8> %val2 ++ ret <16 x i8> %ret ++} ++ ++; Test with sge. ++define <16 x i8> @f4(<16 x i8> %val1, <16 x i8> %val2) { ++; CHECK-LABEL: f4: ++; CHECK: vmxb %v24, {{%v24, %v26|%v26, %v24}} ++; CHECK: br %r14 ++ %cmp = icmp sge <16 x i8> %val1, %val2 ++ %ret = select <16 x i1> %cmp, <16 x i8> %val1, <16 x i8> %val2 ++ ret <16 x i8> %ret ++} ++ ++; Test with ult. ++define <16 x i8> @f5(<16 x i8> %val1, <16 x i8> %val2) { ++; CHECK-LABEL: f5: ++; CHECK: vmxlb %v24, {{%v24, %v26|%v26, %v24}} ++; CHECK: br %r14 ++ %cmp = icmp ult <16 x i8> %val1, %val2 ++ %ret = select <16 x i1> %cmp, <16 x i8> %val2, <16 x i8> %val1 ++ ret <16 x i8> %ret ++} ++ ++; Test with ule. ++define <16 x i8> @f6(<16 x i8> %val1, <16 x i8> %val2) { ++; CHECK-LABEL: f6: ++; CHECK: vmxlb %v24, {{%v24, %v26|%v26, %v24}} ++; CHECK: br %r14 ++ %cmp = icmp ule <16 x i8> %val1, %val2 ++ %ret = select <16 x i1> %cmp, <16 x i8> %val2, <16 x i8> %val1 ++ ret <16 x i8> %ret ++} ++ ++; Test with ugt. ++define <16 x i8> @f7(<16 x i8> %val1, <16 x i8> %val2) { ++; CHECK-LABEL: f7: ++; CHECK: vmxlb %v24, {{%v24, %v26|%v26, %v24}} ++; CHECK: br %r14 ++ %cmp = icmp ugt <16 x i8> %val1, %val2 ++ %ret = select <16 x i1> %cmp, <16 x i8> %val1, <16 x i8> %val2 ++ ret <16 x i8> %ret ++} ++ ++; Test with uge. ++define <16 x i8> @f8(<16 x i8> %val1, <16 x i8> %val2) { ++; CHECK-LABEL: f8: ++; CHECK: vmxlb %v24, {{%v24, %v26|%v26, %v24}} ++; CHECK: br %r14 ++ %cmp = icmp uge <16 x i8> %val1, %val2 ++ %ret = select <16 x i1> %cmp, <16 x i8> %val1, <16 x i8> %val2 ++ ret <16 x i8> %ret ++} +Index: llvm-36/test/CodeGen/SystemZ/vec-max-02.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-max-02.ll +@@ -0,0 +1,83 @@ ++; Test v8i16 maximum. ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ++ ++; Test with slt. ++define <8 x i16> @f1(<8 x i16> %val1, <8 x i16> %val2) { ++; CHECK-LABEL: f1: ++; CHECK: vmxh %v24, {{%v24, %v26|%v26, %v24}} ++; CHECK: br %r14 ++ %cmp = icmp slt <8 x i16> %val1, %val2 ++ %ret = select <8 x i1> %cmp, <8 x i16> %val2, <8 x i16> %val1 ++ ret <8 x i16> %ret ++} ++ ++; Test with sle. ++define <8 x i16> @f2(<8 x i16> %val1, <8 x i16> %val2) { ++; CHECK-LABEL: f2: ++; CHECK: vmxh %v24, {{%v24, %v26|%v26, %v24}} ++; CHECK: br %r14 ++ %cmp = icmp sle <8 x i16> %val1, %val2 ++ %ret = select <8 x i1> %cmp, <8 x i16> %val2, <8 x i16> %val1 ++ ret <8 x i16> %ret ++} ++ ++; Test with sgt. ++define <8 x i16> @f3(<8 x i16> %val1, <8 x i16> %val2) { ++; CHECK-LABEL: f3: ++; CHECK: vmxh %v24, {{%v24, %v26|%v26, %v24}} ++; CHECK: br %r14 ++ %cmp = icmp sgt <8 x i16> %val1, %val2 ++ %ret = select <8 x i1> %cmp, <8 x i16> %val1, <8 x i16> %val2 ++ ret <8 x i16> %ret ++} ++ ++; Test with sge. ++define <8 x i16> @f4(<8 x i16> %val1, <8 x i16> %val2) { ++; CHECK-LABEL: f4: ++; CHECK: vmxh %v24, {{%v24, %v26|%v26, %v24}} ++; CHECK: br %r14 ++ %cmp = icmp sge <8 x i16> %val1, %val2 ++ %ret = select <8 x i1> %cmp, <8 x i16> %val1, <8 x i16> %val2 ++ ret <8 x i16> %ret ++} ++ ++; Test with ult. ++define <8 x i16> @f5(<8 x i16> %val1, <8 x i16> %val2) { ++; CHECK-LABEL: f5: ++; CHECK: vmxlh %v24, {{%v24, %v26|%v26, %v24}} ++; CHECK: br %r14 ++ %cmp = icmp ult <8 x i16> %val1, %val2 ++ %ret = select <8 x i1> %cmp, <8 x i16> %val2, <8 x i16> %val1 ++ ret <8 x i16> %ret ++} ++ ++; Test with ule. ++define <8 x i16> @f6(<8 x i16> %val1, <8 x i16> %val2) { ++; CHECK-LABEL: f6: ++; CHECK: vmxlh %v24, {{%v24, %v26|%v26, %v24}} ++; CHECK: br %r14 ++ %cmp = icmp ule <8 x i16> %val1, %val2 ++ %ret = select <8 x i1> %cmp, <8 x i16> %val2, <8 x i16> %val1 ++ ret <8 x i16> %ret ++} ++ ++; Test with ugt. ++define <8 x i16> @f7(<8 x i16> %val1, <8 x i16> %val2) { ++; CHECK-LABEL: f7: ++; CHECK: vmxlh %v24, {{%v24, %v26|%v26, %v24}} ++; CHECK: br %r14 ++ %cmp = icmp ugt <8 x i16> %val1, %val2 ++ %ret = select <8 x i1> %cmp, <8 x i16> %val1, <8 x i16> %val2 ++ ret <8 x i16> %ret ++} ++ ++; Test with uge. ++define <8 x i16> @f8(<8 x i16> %val1, <8 x i16> %val2) { ++; CHECK-LABEL: f8: ++; CHECK: vmxlh %v24, {{%v24, %v26|%v26, %v24}} ++; CHECK: br %r14 ++ %cmp = icmp uge <8 x i16> %val1, %val2 ++ %ret = select <8 x i1> %cmp, <8 x i16> %val1, <8 x i16> %val2 ++ ret <8 x i16> %ret ++} +Index: llvm-36/test/CodeGen/SystemZ/vec-max-03.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-max-03.ll +@@ -0,0 +1,83 @@ ++; Test v4i32 maximum. ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ++ ++; Test with slt. ++define <4 x i32> @f1(<4 x i32> %val1, <4 x i32> %val2) { ++; CHECK-LABEL: f1: ++; CHECK: vmxf %v24, {{%v24, %v26|%v26, %v24}} ++; CHECK: br %r14 ++ %cmp = icmp slt <4 x i32> %val1, %val2 ++ %ret = select <4 x i1> %cmp, <4 x i32> %val2, <4 x i32> %val1 ++ ret <4 x i32> %ret ++} ++ ++; Test with sle. ++define <4 x i32> @f2(<4 x i32> %val1, <4 x i32> %val2) { ++; CHECK-LABEL: f2: ++; CHECK: vmxf %v24, {{%v24, %v26|%v26, %v24}} ++; CHECK: br %r14 ++ %cmp = icmp sle <4 x i32> %val1, %val2 ++ %ret = select <4 x i1> %cmp, <4 x i32> %val2, <4 x i32> %val1 ++ ret <4 x i32> %ret ++} ++ ++; Test with sgt. ++define <4 x i32> @f3(<4 x i32> %val1, <4 x i32> %val2) { ++; CHECK-LABEL: f3: ++; CHECK: vmxf %v24, {{%v24, %v26|%v26, %v24}} ++; CHECK: br %r14 ++ %cmp = icmp sgt <4 x i32> %val1, %val2 ++ %ret = select <4 x i1> %cmp, <4 x i32> %val1, <4 x i32> %val2 ++ ret <4 x i32> %ret ++} ++ ++; Test with sge. ++define <4 x i32> @f4(<4 x i32> %val1, <4 x i32> %val2) { ++; CHECK-LABEL: f4: ++; CHECK: vmxf %v24, {{%v24, %v26|%v26, %v24}} ++; CHECK: br %r14 ++ %cmp = icmp sge <4 x i32> %val1, %val2 ++ %ret = select <4 x i1> %cmp, <4 x i32> %val1, <4 x i32> %val2 ++ ret <4 x i32> %ret ++} ++ ++; Test with ult. ++define <4 x i32> @f5(<4 x i32> %val1, <4 x i32> %val2) { ++; CHECK-LABEL: f5: ++; CHECK: vmxlf %v24, {{%v24, %v26|%v26, %v24}} ++; CHECK: br %r14 ++ %cmp = icmp ult <4 x i32> %val1, %val2 ++ %ret = select <4 x i1> %cmp, <4 x i32> %val2, <4 x i32> %val1 ++ ret <4 x i32> %ret ++} ++ ++; Test with ule. ++define <4 x i32> @f6(<4 x i32> %val1, <4 x i32> %val2) { ++; CHECK-LABEL: f6: ++; CHECK: vmxlf %v24, {{%v24, %v26|%v26, %v24}} ++; CHECK: br %r14 ++ %cmp = icmp ule <4 x i32> %val1, %val2 ++ %ret = select <4 x i1> %cmp, <4 x i32> %val2, <4 x i32> %val1 ++ ret <4 x i32> %ret ++} ++ ++; Test with ugt. ++define <4 x i32> @f7(<4 x i32> %val1, <4 x i32> %val2) { ++; CHECK-LABEL: f7: ++; CHECK: vmxlf %v24, {{%v24, %v26|%v26, %v24}} ++; CHECK: br %r14 ++ %cmp = icmp ugt <4 x i32> %val1, %val2 ++ %ret = select <4 x i1> %cmp, <4 x i32> %val1, <4 x i32> %val2 ++ ret <4 x i32> %ret ++} ++ ++; Test with uge. ++define <4 x i32> @f8(<4 x i32> %val1, <4 x i32> %val2) { ++; CHECK-LABEL: f8: ++; CHECK: vmxlf %v24, {{%v24, %v26|%v26, %v24}} ++; CHECK: br %r14 ++ %cmp = icmp uge <4 x i32> %val1, %val2 ++ %ret = select <4 x i1> %cmp, <4 x i32> %val1, <4 x i32> %val2 ++ ret <4 x i32> %ret ++} +Index: llvm-36/test/CodeGen/SystemZ/vec-max-04.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-max-04.ll +@@ -0,0 +1,83 @@ ++; Test v2i64 maximum. ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ++ ++; Test with slt. ++define <2 x i64> @f1(<2 x i64> %val1, <2 x i64> %val2) { ++; CHECK-LABEL: f1: ++; CHECK: vmxg %v24, {{%v24, %v26|%v26, %v24}} ++; CHECK: br %r14 ++ %cmp = icmp slt <2 x i64> %val1, %val2 ++ %ret = select <2 x i1> %cmp, <2 x i64> %val2, <2 x i64> %val1 ++ ret <2 x i64> %ret ++} ++ ++; Test with sle. ++define <2 x i64> @f2(<2 x i64> %val1, <2 x i64> %val2) { ++; CHECK-LABEL: f2: ++; CHECK: vmxg %v24, {{%v24, %v26|%v26, %v24}} ++; CHECK: br %r14 ++ %cmp = icmp sle <2 x i64> %val1, %val2 ++ %ret = select <2 x i1> %cmp, <2 x i64> %val2, <2 x i64> %val1 ++ ret <2 x i64> %ret ++} ++ ++; Test with sgt. ++define <2 x i64> @f3(<2 x i64> %val1, <2 x i64> %val2) { ++; CHECK-LABEL: f3: ++; CHECK: vmxg %v24, {{%v24, %v26|%v26, %v24}} ++; CHECK: br %r14 ++ %cmp = icmp sgt <2 x i64> %val1, %val2 ++ %ret = select <2 x i1> %cmp, <2 x i64> %val1, <2 x i64> %val2 ++ ret <2 x i64> %ret ++} ++ ++; Test with sge. ++define <2 x i64> @f4(<2 x i64> %val1, <2 x i64> %val2) { ++; CHECK-LABEL: f4: ++; CHECK: vmxg %v24, {{%v24, %v26|%v26, %v24}} ++; CHECK: br %r14 ++ %cmp = icmp sge <2 x i64> %val1, %val2 ++ %ret = select <2 x i1> %cmp, <2 x i64> %val1, <2 x i64> %val2 ++ ret <2 x i64> %ret ++} ++ ++; Test with ult. ++define <2 x i64> @f5(<2 x i64> %val1, <2 x i64> %val2) { ++; CHECK-LABEL: f5: ++; CHECK: vmxlg %v24, {{%v24, %v26|%v26, %v24}} ++; CHECK: br %r14 ++ %cmp = icmp ult <2 x i64> %val1, %val2 ++ %ret = select <2 x i1> %cmp, <2 x i64> %val2, <2 x i64> %val1 ++ ret <2 x i64> %ret ++} ++ ++; Test with ule. ++define <2 x i64> @f6(<2 x i64> %val1, <2 x i64> %val2) { ++; CHECK-LABEL: f6: ++; CHECK: vmxlg %v24, {{%v24, %v26|%v26, %v24}} ++; CHECK: br %r14 ++ %cmp = icmp ule <2 x i64> %val1, %val2 ++ %ret = select <2 x i1> %cmp, <2 x i64> %val2, <2 x i64> %val1 ++ ret <2 x i64> %ret ++} ++ ++; Test with ugt. ++define <2 x i64> @f7(<2 x i64> %val1, <2 x i64> %val2) { ++; CHECK-LABEL: f7: ++; CHECK: vmxlg %v24, {{%v24, %v26|%v26, %v24}} ++; CHECK: br %r14 ++ %cmp = icmp ugt <2 x i64> %val1, %val2 ++ %ret = select <2 x i1> %cmp, <2 x i64> %val1, <2 x i64> %val2 ++ ret <2 x i64> %ret ++} ++ ++; Test with uge. ++define <2 x i64> @f8(<2 x i64> %val1, <2 x i64> %val2) { ++; CHECK-LABEL: f8: ++; CHECK: vmxlg %v24, {{%v24, %v26|%v26, %v24}} ++; CHECK: br %r14 ++ %cmp = icmp uge <2 x i64> %val1, %val2 ++ %ret = select <2 x i1> %cmp, <2 x i64> %val1, <2 x i64> %val2 ++ ret <2 x i64> %ret ++} +Index: llvm-36/test/CodeGen/SystemZ/vec-min-01.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-min-01.ll +@@ -0,0 +1,83 @@ ++; Test v16i8 minimum. ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ++ ++; Test with slt. ++define <16 x i8> @f1(<16 x i8> %val1, <16 x i8> %val2) { ++; CHECK-LABEL: f1: ++; CHECK: vmnb %v24, {{%v24, %v26|%v26, %v24}} ++; CHECK: br %r14 ++ %cmp = icmp slt <16 x i8> %val2, %val1 ++ %ret = select <16 x i1> %cmp, <16 x i8> %val2, <16 x i8> %val1 ++ ret <16 x i8> %ret ++} ++ ++; Test with sle. ++define <16 x i8> @f2(<16 x i8> %val1, <16 x i8> %val2) { ++; CHECK-LABEL: f2: ++; CHECK: vmnb %v24, {{%v24, %v26|%v26, %v24}} ++; CHECK: br %r14 ++ %cmp = icmp sle <16 x i8> %val2, %val1 ++ %ret = select <16 x i1> %cmp, <16 x i8> %val2, <16 x i8> %val1 ++ ret <16 x i8> %ret ++} ++ ++; Test with sgt. ++define <16 x i8> @f3(<16 x i8> %val1, <16 x i8> %val2) { ++; CHECK-LABEL: f3: ++; CHECK: vmnb %v24, {{%v24, %v26|%v26, %v24}} ++; CHECK: br %r14 ++ %cmp = icmp sgt <16 x i8> %val2, %val1 ++ %ret = select <16 x i1> %cmp, <16 x i8> %val1, <16 x i8> %val2 ++ ret <16 x i8> %ret ++} ++ ++; Test with sge. ++define <16 x i8> @f4(<16 x i8> %val1, <16 x i8> %val2) { ++; CHECK-LABEL: f4: ++; CHECK: vmnb %v24, {{%v24, %v26|%v26, %v24}} ++; CHECK: br %r14 ++ %cmp = icmp sge <16 x i8> %val2, %val1 ++ %ret = select <16 x i1> %cmp, <16 x i8> %val1, <16 x i8> %val2 ++ ret <16 x i8> %ret ++} ++ ++; Test with ult. ++define <16 x i8> @f5(<16 x i8> %val1, <16 x i8> %val2) { ++; CHECK-LABEL: f5: ++; CHECK: vmnlb %v24, {{%v24, %v26|%v26, %v24}} ++; CHECK: br %r14 ++ %cmp = icmp ult <16 x i8> %val2, %val1 ++ %ret = select <16 x i1> %cmp, <16 x i8> %val2, <16 x i8> %val1 ++ ret <16 x i8> %ret ++} ++ ++; Test with ule. ++define <16 x i8> @f6(<16 x i8> %val1, <16 x i8> %val2) { ++; CHECK-LABEL: f6: ++; CHECK: vmnlb %v24, {{%v24, %v26|%v26, %v24}} ++; CHECK: br %r14 ++ %cmp = icmp ule <16 x i8> %val2, %val1 ++ %ret = select <16 x i1> %cmp, <16 x i8> %val2, <16 x i8> %val1 ++ ret <16 x i8> %ret ++} ++ ++; Test with ugt. ++define <16 x i8> @f7(<16 x i8> %val1, <16 x i8> %val2) { ++; CHECK-LABEL: f7: ++; CHECK: vmnlb %v24, {{%v24, %v26|%v26, %v24}} ++; CHECK: br %r14 ++ %cmp = icmp ugt <16 x i8> %val2, %val1 ++ %ret = select <16 x i1> %cmp, <16 x i8> %val1, <16 x i8> %val2 ++ ret <16 x i8> %ret ++} ++ ++; Test with uge. ++define <16 x i8> @f8(<16 x i8> %val1, <16 x i8> %val2) { ++; CHECK-LABEL: f8: ++; CHECK: vmnlb %v24, {{%v24, %v26|%v26, %v24}} ++; CHECK: br %r14 ++ %cmp = icmp uge <16 x i8> %val2, %val1 ++ %ret = select <16 x i1> %cmp, <16 x i8> %val1, <16 x i8> %val2 ++ ret <16 x i8> %ret ++} +Index: llvm-36/test/CodeGen/SystemZ/vec-min-02.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-min-02.ll +@@ -0,0 +1,83 @@ ++; Test v8i16 minimum. ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ++ ++; Test with slt. ++define <8 x i16> @f1(<8 x i16> %val1, <8 x i16> %val2) { ++; CHECK-LABEL: f1: ++; CHECK: vmnh %v24, {{%v24, %v26|%v26, %v24}} ++; CHECK: br %r14 ++ %cmp = icmp slt <8 x i16> %val2, %val1 ++ %ret = select <8 x i1> %cmp, <8 x i16> %val2, <8 x i16> %val1 ++ ret <8 x i16> %ret ++} ++ ++; Test with sle. ++define <8 x i16> @f2(<8 x i16> %val1, <8 x i16> %val2) { ++; CHECK-LABEL: f2: ++; CHECK: vmnh %v24, {{%v24, %v26|%v26, %v24}} ++; CHECK: br %r14 ++ %cmp = icmp sle <8 x i16> %val2, %val1 ++ %ret = select <8 x i1> %cmp, <8 x i16> %val2, <8 x i16> %val1 ++ ret <8 x i16> %ret ++} ++ ++; Test with sgt. ++define <8 x i16> @f3(<8 x i16> %val1, <8 x i16> %val2) { ++; CHECK-LABEL: f3: ++; CHECK: vmnh %v24, {{%v24, %v26|%v26, %v24}} ++; CHECK: br %r14 ++ %cmp = icmp sgt <8 x i16> %val2, %val1 ++ %ret = select <8 x i1> %cmp, <8 x i16> %val1, <8 x i16> %val2 ++ ret <8 x i16> %ret ++} ++ ++; Test with sge. ++define <8 x i16> @f4(<8 x i16> %val1, <8 x i16> %val2) { ++; CHECK-LABEL: f4: ++; CHECK: vmnh %v24, {{%v24, %v26|%v26, %v24}} ++; CHECK: br %r14 ++ %cmp = icmp sge <8 x i16> %val2, %val1 ++ %ret = select <8 x i1> %cmp, <8 x i16> %val1, <8 x i16> %val2 ++ ret <8 x i16> %ret ++} ++ ++; Test with ult. ++define <8 x i16> @f5(<8 x i16> %val1, <8 x i16> %val2) { ++; CHECK-LABEL: f5: ++; CHECK: vmnlh %v24, {{%v24, %v26|%v26, %v24}} ++; CHECK: br %r14 ++ %cmp = icmp ult <8 x i16> %val2, %val1 ++ %ret = select <8 x i1> %cmp, <8 x i16> %val2, <8 x i16> %val1 ++ ret <8 x i16> %ret ++} ++ ++; Test with ule. ++define <8 x i16> @f6(<8 x i16> %val1, <8 x i16> %val2) { ++; CHECK-LABEL: f6: ++; CHECK: vmnlh %v24, {{%v24, %v26|%v26, %v24}} ++; CHECK: br %r14 ++ %cmp = icmp ule <8 x i16> %val2, %val1 ++ %ret = select <8 x i1> %cmp, <8 x i16> %val2, <8 x i16> %val1 ++ ret <8 x i16> %ret ++} ++ ++; Test with ugt. ++define <8 x i16> @f7(<8 x i16> %val1, <8 x i16> %val2) { ++; CHECK-LABEL: f7: ++; CHECK: vmnlh %v24, {{%v24, %v26|%v26, %v24}} ++; CHECK: br %r14 ++ %cmp = icmp ugt <8 x i16> %val2, %val1 ++ %ret = select <8 x i1> %cmp, <8 x i16> %val1, <8 x i16> %val2 ++ ret <8 x i16> %ret ++} ++ ++; Test with uge. ++define <8 x i16> @f8(<8 x i16> %val1, <8 x i16> %val2) { ++; CHECK-LABEL: f8: ++; CHECK: vmnlh %v24, {{%v24, %v26|%v26, %v24}} ++; CHECK: br %r14 ++ %cmp = icmp uge <8 x i16> %val2, %val1 ++ %ret = select <8 x i1> %cmp, <8 x i16> %val1, <8 x i16> %val2 ++ ret <8 x i16> %ret ++} +Index: llvm-36/test/CodeGen/SystemZ/vec-min-03.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-min-03.ll +@@ -0,0 +1,83 @@ ++; Test v4i32 minimum. ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ++ ++; Test with slt. ++define <4 x i32> @f1(<4 x i32> %val1, <4 x i32> %val2) { ++; CHECK-LABEL: f1: ++; CHECK: vmnf %v24, {{%v24, %v26|%v26, %v24}} ++; CHECK: br %r14 ++ %cmp = icmp slt <4 x i32> %val2, %val1 ++ %ret = select <4 x i1> %cmp, <4 x i32> %val2, <4 x i32> %val1 ++ ret <4 x i32> %ret ++} ++ ++; Test with sle. ++define <4 x i32> @f2(<4 x i32> %val1, <4 x i32> %val2) { ++; CHECK-LABEL: f2: ++; CHECK: vmnf %v24, {{%v24, %v26|%v26, %v24}} ++; CHECK: br %r14 ++ %cmp = icmp sle <4 x i32> %val2, %val1 ++ %ret = select <4 x i1> %cmp, <4 x i32> %val2, <4 x i32> %val1 ++ ret <4 x i32> %ret ++} ++ ++; Test with sgt. ++define <4 x i32> @f3(<4 x i32> %val1, <4 x i32> %val2) { ++; CHECK-LABEL: f3: ++; CHECK: vmnf %v24, {{%v24, %v26|%v26, %v24}} ++; CHECK: br %r14 ++ %cmp = icmp sgt <4 x i32> %val2, %val1 ++ %ret = select <4 x i1> %cmp, <4 x i32> %val1, <4 x i32> %val2 ++ ret <4 x i32> %ret ++} ++ ++; Test with sge. ++define <4 x i32> @f4(<4 x i32> %val1, <4 x i32> %val2) { ++; CHECK-LABEL: f4: ++; CHECK: vmnf %v24, {{%v24, %v26|%v26, %v24}} ++; CHECK: br %r14 ++ %cmp = icmp sge <4 x i32> %val2, %val1 ++ %ret = select <4 x i1> %cmp, <4 x i32> %val1, <4 x i32> %val2 ++ ret <4 x i32> %ret ++} ++ ++; Test with ult. ++define <4 x i32> @f5(<4 x i32> %val1, <4 x i32> %val2) { ++; CHECK-LABEL: f5: ++; CHECK: vmnlf %v24, {{%v24, %v26|%v26, %v24}} ++; CHECK: br %r14 ++ %cmp = icmp ult <4 x i32> %val2, %val1 ++ %ret = select <4 x i1> %cmp, <4 x i32> %val2, <4 x i32> %val1 ++ ret <4 x i32> %ret ++} ++ ++; Test with ule. ++define <4 x i32> @f6(<4 x i32> %val1, <4 x i32> %val2) { ++; CHECK-LABEL: f6: ++; CHECK: vmnlf %v24, {{%v24, %v26|%v26, %v24}} ++; CHECK: br %r14 ++ %cmp = icmp ule <4 x i32> %val2, %val1 ++ %ret = select <4 x i1> %cmp, <4 x i32> %val2, <4 x i32> %val1 ++ ret <4 x i32> %ret ++} ++ ++; Test with ugt. ++define <4 x i32> @f7(<4 x i32> %val1, <4 x i32> %val2) { ++; CHECK-LABEL: f7: ++; CHECK: vmnlf %v24, {{%v24, %v26|%v26, %v24}} ++; CHECK: br %r14 ++ %cmp = icmp ugt <4 x i32> %val2, %val1 ++ %ret = select <4 x i1> %cmp, <4 x i32> %val1, <4 x i32> %val2 ++ ret <4 x i32> %ret ++} ++ ++; Test with uge. ++define <4 x i32> @f8(<4 x i32> %val1, <4 x i32> %val2) { ++; CHECK-LABEL: f8: ++; CHECK: vmnlf %v24, {{%v24, %v26|%v26, %v24}} ++; CHECK: br %r14 ++ %cmp = icmp uge <4 x i32> %val2, %val1 ++ %ret = select <4 x i1> %cmp, <4 x i32> %val1, <4 x i32> %val2 ++ ret <4 x i32> %ret ++} +Index: llvm-36/test/CodeGen/SystemZ/vec-min-04.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-min-04.ll +@@ -0,0 +1,83 @@ ++; Test v2i64 minimum. ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ++ ++; Test with slt. ++define <2 x i64> @f1(<2 x i64> %val1, <2 x i64> %val2) { ++; CHECK-LABEL: f1: ++; CHECK: vmng %v24, {{%v24, %v26|%v26, %v24}} ++; CHECK: br %r14 ++ %cmp = icmp slt <2 x i64> %val2, %val1 ++ %ret = select <2 x i1> %cmp, <2 x i64> %val2, <2 x i64> %val1 ++ ret <2 x i64> %ret ++} ++ ++; Test with sle. ++define <2 x i64> @f2(<2 x i64> %val1, <2 x i64> %val2) { ++; CHECK-LABEL: f2: ++; CHECK: vmng %v24, {{%v24, %v26|%v26, %v24}} ++; CHECK: br %r14 ++ %cmp = icmp sle <2 x i64> %val2, %val1 ++ %ret = select <2 x i1> %cmp, <2 x i64> %val2, <2 x i64> %val1 ++ ret <2 x i64> %ret ++} ++ ++; Test with sgt. ++define <2 x i64> @f3(<2 x i64> %val1, <2 x i64> %val2) { ++; CHECK-LABEL: f3: ++; CHECK: vmng %v24, {{%v24, %v26|%v26, %v24}} ++; CHECK: br %r14 ++ %cmp = icmp sgt <2 x i64> %val2, %val1 ++ %ret = select <2 x i1> %cmp, <2 x i64> %val1, <2 x i64> %val2 ++ ret <2 x i64> %ret ++} ++ ++; Test with sge. ++define <2 x i64> @f4(<2 x i64> %val1, <2 x i64> %val2) { ++; CHECK-LABEL: f4: ++; CHECK: vmng %v24, {{%v24, %v26|%v26, %v24}} ++; CHECK: br %r14 ++ %cmp = icmp sge <2 x i64> %val2, %val1 ++ %ret = select <2 x i1> %cmp, <2 x i64> %val1, <2 x i64> %val2 ++ ret <2 x i64> %ret ++} ++ ++; Test with ult. ++define <2 x i64> @f5(<2 x i64> %val1, <2 x i64> %val2) { ++; CHECK-LABEL: f5: ++; CHECK: vmnlg %v24, {{%v24, %v26|%v26, %v24}} ++; CHECK: br %r14 ++ %cmp = icmp ult <2 x i64> %val2, %val1 ++ %ret = select <2 x i1> %cmp, <2 x i64> %val2, <2 x i64> %val1 ++ ret <2 x i64> %ret ++} ++ ++; Test with ule. ++define <2 x i64> @f6(<2 x i64> %val1, <2 x i64> %val2) { ++; CHECK-LABEL: f6: ++; CHECK: vmnlg %v24, {{%v24, %v26|%v26, %v24}} ++; CHECK: br %r14 ++ %cmp = icmp ule <2 x i64> %val2, %val1 ++ %ret = select <2 x i1> %cmp, <2 x i64> %val2, <2 x i64> %val1 ++ ret <2 x i64> %ret ++} ++ ++; Test with ugt. ++define <2 x i64> @f7(<2 x i64> %val1, <2 x i64> %val2) { ++; CHECK-LABEL: f7: ++; CHECK: vmnlg %v24, {{%v24, %v26|%v26, %v24}} ++; CHECK: br %r14 ++ %cmp = icmp ugt <2 x i64> %val2, %val1 ++ %ret = select <2 x i1> %cmp, <2 x i64> %val1, <2 x i64> %val2 ++ ret <2 x i64> %ret ++} ++ ++; Test with uge. ++define <2 x i64> @f8(<2 x i64> %val1, <2 x i64> %val2) { ++; CHECK-LABEL: f8: ++; CHECK: vmnlg %v24, {{%v24, %v26|%v26, %v24}} ++; CHECK: br %r14 ++ %cmp = icmp uge <2 x i64> %val2, %val1 ++ %ret = select <2 x i1> %cmp, <2 x i64> %val1, <2 x i64> %val2 ++ ret <2 x i64> %ret ++} +Index: llvm-36/test/CodeGen/SystemZ/vec-move-01.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-move-01.ll +@@ -0,0 +1,107 @@ ++; Test vector register moves. ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ++ ++; Test v16i8 moves. ++define <16 x i8> @f1(<16 x i8> %val1, <16 x i8> %val2) { ++; CHECK-LABEL: f1: ++; CHECK: vlr %v24, %v26 ++; CHECK: br %r14 ++ ret <16 x i8> %val2 ++} ++ ++; Test v8i16 moves. ++define <8 x i16> @f2(<8 x i16> %val1, <8 x i16> %val2) { ++; CHECK-LABEL: f2: ++; CHECK: vlr %v24, %v26 ++; CHECK: br %r14 ++ ret <8 x i16> %val2 ++} ++ ++; Test v4i32 moves. ++define <4 x i32> @f3(<4 x i32> %val1, <4 x i32> %val2) { ++; CHECK-LABEL: f3: ++; CHECK: vlr %v24, %v26 ++; CHECK: br %r14 ++ ret <4 x i32> %val2 ++} ++ ++; Test v2i64 moves. ++define <2 x i64> @f4(<2 x i64> %val1, <2 x i64> %val2) { ++; CHECK-LABEL: f4: ++; CHECK: vlr %v24, %v26 ++; CHECK: br %r14 ++ ret <2 x i64> %val2 ++} ++ ++; Test v4f32 moves. ++define <4 x float> @f5(<4 x float> %val1, <4 x float> %val2) { ++; CHECK-LABEL: f5: ++; CHECK: vlr %v24, %v26 ++; CHECK: br %r14 ++ ret <4 x float> %val2 ++} ++ ++; Test v2f64 moves. ++define <2 x double> @f6(<2 x double> %val1, <2 x double> %val2) { ++; CHECK-LABEL: f6: ++; CHECK: vlr %v24, %v26 ++; CHECK: br %r14 ++ ret <2 x double> %val2 ++} ++ ++; Test v2i8 moves. ++define <2 x i8> @f7(<2 x i8> %val1, <2 x i8> %val2) { ++; CHECK-LABEL: f7: ++; CHECK: vlr %v24, %v26 ++; CHECK: br %r14 ++ ret <2 x i8> %val2 ++} ++ ++; Test v4i8 moves. ++define <4 x i8> @f8(<4 x i8> %val1, <4 x i8> %val2) { ++; CHECK-LABEL: f8: ++; CHECK: vlr %v24, %v26 ++; CHECK: br %r14 ++ ret <4 x i8> %val2 ++} ++ ++; Test v8i8 moves. ++define <8 x i8> @f9(<8 x i8> %val1, <8 x i8> %val2) { ++; CHECK-LABEL: f9: ++; CHECK: vlr %v24, %v26 ++; CHECK: br %r14 ++ ret <8 x i8> %val2 ++} ++ ++; Test v2i16 moves. ++define <2 x i16> @f10(<2 x i16> %val1, <2 x i16> %val2) { ++; CHECK-LABEL: f10: ++; CHECK: vlr %v24, %v26 ++; CHECK: br %r14 ++ ret <2 x i16> %val2 ++} ++ ++; Test v4i16 moves. ++define <4 x i16> @f11(<4 x i16> %val1, <4 x i16> %val2) { ++; CHECK-LABEL: f11: ++; CHECK: vlr %v24, %v26 ++; CHECK: br %r14 ++ ret <4 x i16> %val2 ++} ++ ++; Test v2i32 moves. ++define <2 x i32> @f12(<2 x i32> %val1, <2 x i32> %val2) { ++; CHECK-LABEL: f12: ++; CHECK: vlr %v24, %v26 ++; CHECK: br %r14 ++ ret <2 x i32> %val2 ++} ++ ++; Test v2f32 moves. ++define <2 x float> @f13(<2 x float> %val1, <2 x float> %val2) { ++; CHECK-LABEL: f13: ++; CHECK: vlr %v24, %v26 ++; CHECK: br %r14 ++ ret <2 x float> %val2 ++} +Index: llvm-36/test/CodeGen/SystemZ/vec-move-02.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-move-02.ll +@@ -0,0 +1,174 @@ ++; Test vector loads. ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ++ ++; Test v16i8 loads. ++define <16 x i8> @f1(<16 x i8> *%ptr) { ++; CHECK-LABEL: f1: ++; CHECK: vl %v24, 0(%r2) ++; CHECK: br %r14 ++ %ret = load <16 x i8> *%ptr ++ ret <16 x i8> %ret ++} ++ ++; Test v8i16 loads. ++define <8 x i16> @f2(<8 x i16> *%ptr) { ++; CHECK-LABEL: f2: ++; CHECK: vl %v24, 0(%r2) ++; CHECK: br %r14 ++ %ret = load <8 x i16> *%ptr ++ ret <8 x i16> %ret ++} ++ ++; Test v4i32 loads. ++define <4 x i32> @f3(<4 x i32> *%ptr) { ++; CHECK-LABEL: f3: ++; CHECK: vl %v24, 0(%r2) ++; CHECK: br %r14 ++ %ret = load <4 x i32> *%ptr ++ ret <4 x i32> %ret ++} ++ ++; Test v2i64 loads. ++define <2 x i64> @f4(<2 x i64> *%ptr) { ++; CHECK-LABEL: f4: ++; CHECK: vl %v24, 0(%r2) ++; CHECK: br %r14 ++ %ret = load <2 x i64> *%ptr ++ ret <2 x i64> %ret ++} ++ ++; Test v4f32 loads. ++define <4 x float> @f5(<4 x float> *%ptr) { ++; CHECK-LABEL: f5: ++; CHECK: vl %v24, 0(%r2) ++; CHECK: br %r14 ++ %ret = load <4 x float> *%ptr ++ ret <4 x float> %ret ++} ++ ++; Test v2f64 loads. ++define <2 x double> @f6(<2 x double> *%ptr) { ++; CHECK-LABEL: f6: ++; CHECK: vl %v24, 0(%r2) ++; CHECK: br %r14 ++ %ret = load <2 x double> *%ptr ++ ret <2 x double> %ret ++} ++ ++; Test the highest aligned in-range offset. ++define <16 x i8> @f7(<16 x i8> *%base) { ++; CHECK-LABEL: f7: ++; CHECK: vl %v24, 4080(%r2) ++; CHECK: br %r14 ++ %ptr = getelementptr <16 x i8> *%base, i64 255 ++ %ret = load <16 x i8> *%ptr ++ ret <16 x i8> %ret ++} ++ ++; Test the highest unaligned in-range offset. ++define <16 x i8> @f8(i8 *%base) { ++; CHECK-LABEL: f8: ++; CHECK: vl %v24, 4095(%r2) ++; CHECK: br %r14 ++ %addr = getelementptr i8 *%base, i64 4095 ++ %ptr = bitcast i8 *%addr to <16 x i8> * ++ %ret = load <16 x i8> *%ptr, align 1 ++ ret <16 x i8> %ret ++} ++ ++; Test the next offset up, which requires separate address logic, ++define <16 x i8> @f9(<16 x i8> *%base) { ++; CHECK-LABEL: f9: ++; CHECK: aghi %r2, 4096 ++; CHECK: vl %v24, 0(%r2) ++; CHECK: br %r14 ++ %ptr = getelementptr <16 x i8> *%base, i64 256 ++ %ret = load <16 x i8> *%ptr ++ ret <16 x i8> %ret ++} ++ ++; Test negative offsets, which also require separate address logic, ++define <16 x i8> @f10(<16 x i8> *%base) { ++; CHECK-LABEL: f10: ++; CHECK: aghi %r2, -16 ++; CHECK: vl %v24, 0(%r2) ++; CHECK: br %r14 ++ %ptr = getelementptr <16 x i8> *%base, i64 -1 ++ %ret = load <16 x i8> *%ptr ++ ret <16 x i8> %ret ++} ++ ++; Check that indexes are allowed. ++define <16 x i8> @f11(i8 *%base, i64 %index) { ++; CHECK-LABEL: f11: ++; CHECK: vl %v24, 0(%r3,%r2) ++; CHECK: br %r14 ++ %addr = getelementptr i8 *%base, i64 %index ++ %ptr = bitcast i8 *%addr to <16 x i8> * ++ %ret = load <16 x i8> *%ptr, align 1 ++ ret <16 x i8> %ret ++} ++ ++; Test v2i8 loads. ++define <2 x i8> @f12(<2 x i8> *%ptr) { ++; CHECK-LABEL: f12: ++; CHECK: vlreph %v24, 0(%r2) ++; CHECK: br %r14 ++ %ret = load <2 x i8> *%ptr ++ ret <2 x i8> %ret ++} ++ ++; Test v4i8 loads. ++define <4 x i8> @f13(<4 x i8> *%ptr) { ++; CHECK-LABEL: f13: ++; CHECK: vlrepf %v24, 0(%r2) ++; CHECK: br %r14 ++ %ret = load <4 x i8> *%ptr ++ ret <4 x i8> %ret ++} ++ ++; Test v8i8 loads. ++define <8 x i8> @f14(<8 x i8> *%ptr) { ++; CHECK-LABEL: f14: ++; CHECK: vlrepg %v24, 0(%r2) ++; CHECK: br %r14 ++ %ret = load <8 x i8> *%ptr ++ ret <8 x i8> %ret ++} ++ ++; Test v2i16 loads. ++define <2 x i16> @f15(<2 x i16> *%ptr) { ++; CHECK-LABEL: f15: ++; CHECK: vlrepf %v24, 0(%r2) ++; CHECK: br %r14 ++ %ret = load <2 x i16> *%ptr ++ ret <2 x i16> %ret ++} ++ ++; Test v4i16 loads. ++define <4 x i16> @f16(<4 x i16> *%ptr) { ++; CHECK-LABEL: f16: ++; CHECK: vlrepg %v24, 0(%r2) ++; CHECK: br %r14 ++ %ret = load <4 x i16> *%ptr ++ ret <4 x i16> %ret ++} ++ ++; Test v2i32 loads. ++define <2 x i32> @f17(<2 x i32> *%ptr) { ++; CHECK-LABEL: f17: ++; CHECK: vlrepg %v24, 0(%r2) ++; CHECK: br %r14 ++ %ret = load <2 x i32> *%ptr ++ ret <2 x i32> %ret ++} ++ ++; Test v2f32 loads. ++define <2 x float> @f18(<2 x float> *%ptr) { ++; CHECK-LABEL: f18: ++; CHECK: vlrepg %v24, 0(%r2) ++; CHECK: br %r14 ++ %ret = load <2 x float> *%ptr ++ ret <2 x float> %ret ++} +Index: llvm-36/test/CodeGen/SystemZ/vec-move-03.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-move-03.ll +@@ -0,0 +1,174 @@ ++; Test vector stores. ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ++ ++; Test v16i8 stores. ++define void @f1(<16 x i8> %val, <16 x i8> *%ptr) { ++; CHECK-LABEL: f1: ++; CHECK: vst %v24, 0(%r2) ++; CHECK: br %r14 ++ store <16 x i8> %val, <16 x i8> *%ptr ++ ret void ++} ++ ++; Test v8i16 stores. ++define void @f2(<8 x i16> %val, <8 x i16> *%ptr) { ++; CHECK-LABEL: f2: ++; CHECK: vst %v24, 0(%r2) ++; CHECK: br %r14 ++ store <8 x i16> %val, <8 x i16> *%ptr ++ ret void ++} ++ ++; Test v4i32 stores. ++define void @f3(<4 x i32> %val, <4 x i32> *%ptr) { ++; CHECK-LABEL: f3: ++; CHECK: vst %v24, 0(%r2) ++; CHECK: br %r14 ++ store <4 x i32> %val, <4 x i32> *%ptr ++ ret void ++} ++ ++; Test v2i64 stores. ++define void @f4(<2 x i64> %val, <2 x i64> *%ptr) { ++; CHECK-LABEL: f4: ++; CHECK: vst %v24, 0(%r2) ++; CHECK: br %r14 ++ store <2 x i64> %val, <2 x i64> *%ptr ++ ret void ++} ++ ++; Test v4f32 stores. ++define void @f5(<4 x float> %val, <4 x float> *%ptr) { ++; CHECK-LABEL: f5: ++; CHECK: vst %v24, 0(%r2) ++; CHECK: br %r14 ++ store <4 x float> %val, <4 x float> *%ptr ++ ret void ++} ++ ++; Test v2f64 stores. ++define void @f6(<2 x double> %val, <2 x double> *%ptr) { ++; CHECK-LABEL: f6: ++; CHECK: vst %v24, 0(%r2) ++; CHECK: br %r14 ++ store <2 x double> %val, <2 x double> *%ptr ++ ret void ++} ++ ++; Test the highest aligned in-range offset. ++define void @f7(<16 x i8> %val, <16 x i8> *%base) { ++; CHECK-LABEL: f7: ++; CHECK: vst %v24, 4080(%r2) ++; CHECK: br %r14 ++ %ptr = getelementptr <16 x i8> *%base, i64 255 ++ store <16 x i8> %val, <16 x i8> *%ptr ++ ret void ++} ++ ++; Test the highest unaligned in-range offset. ++define void @f8(<16 x i8> %val, i8 *%base) { ++; CHECK-LABEL: f8: ++; CHECK: vst %v24, 4095(%r2) ++; CHECK: br %r14 ++ %addr = getelementptr i8 *%base, i64 4095 ++ %ptr = bitcast i8 *%addr to <16 x i8> * ++ store <16 x i8> %val, <16 x i8> *%ptr, align 1 ++ ret void ++} ++ ++; Test the next offset up, which requires separate address logic, ++define void @f9(<16 x i8> %val, <16 x i8> *%base) { ++; CHECK-LABEL: f9: ++; CHECK: aghi %r2, 4096 ++; CHECK: vst %v24, 0(%r2) ++; CHECK: br %r14 ++ %ptr = getelementptr <16 x i8> *%base, i64 256 ++ store <16 x i8> %val, <16 x i8> *%ptr ++ ret void ++} ++ ++; Test negative offsets, which also require separate address logic, ++define void @f10(<16 x i8> %val, <16 x i8> *%base) { ++; CHECK-LABEL: f10: ++; CHECK: aghi %r2, -16 ++; CHECK: vst %v24, 0(%r2) ++; CHECK: br %r14 ++ %ptr = getelementptr <16 x i8> *%base, i64 -1 ++ store <16 x i8> %val, <16 x i8> *%ptr ++ ret void ++} ++ ++; Check that indexes are allowed. ++define void @f11(<16 x i8> %val, i8 *%base, i64 %index) { ++; CHECK-LABEL: f11: ++; CHECK: vst %v24, 0(%r3,%r2) ++; CHECK: br %r14 ++ %addr = getelementptr i8 *%base, i64 %index ++ %ptr = bitcast i8 *%addr to <16 x i8> * ++ store <16 x i8> %val, <16 x i8> *%ptr, align 1 ++ ret void ++} ++ ++; Test v2i8 stores. ++define void @f12(<2 x i8> %val, <2 x i8> *%ptr) { ++; CHECK-LABEL: f12: ++; CHECK: vsteh %v24, 0(%r2), 0 ++; CHECK: br %r14 ++ store <2 x i8> %val, <2 x i8> *%ptr ++ ret void ++} ++ ++; Test v4i8 stores. ++define void @f13(<4 x i8> %val, <4 x i8> *%ptr) { ++; CHECK-LABEL: f13: ++; CHECK: vstef %v24, 0(%r2) ++; CHECK: br %r14 ++ store <4 x i8> %val, <4 x i8> *%ptr ++ ret void ++} ++ ++; Test v8i8 stores. ++define void @f14(<8 x i8> %val, <8 x i8> *%ptr) { ++; CHECK-LABEL: f14: ++; CHECK: vsteg %v24, 0(%r2) ++; CHECK: br %r14 ++ store <8 x i8> %val, <8 x i8> *%ptr ++ ret void ++} ++ ++; Test v2i16 stores. ++define void @f15(<2 x i16> %val, <2 x i16> *%ptr) { ++; CHECK-LABEL: f15: ++; CHECK: vstef %v24, 0(%r2), 0 ++; CHECK: br %r14 ++ store <2 x i16> %val, <2 x i16> *%ptr ++ ret void ++} ++ ++; Test v4i16 stores. ++define void @f16(<4 x i16> %val, <4 x i16> *%ptr) { ++; CHECK-LABEL: f16: ++; CHECK: vsteg %v24, 0(%r2) ++; CHECK: br %r14 ++ store <4 x i16> %val, <4 x i16> *%ptr ++ ret void ++} ++ ++; Test v2i32 stores. ++define void @f17(<2 x i32> %val, <2 x i32> *%ptr) { ++; CHECK-LABEL: f17: ++; CHECK: vsteg %v24, 0(%r2), 0 ++; CHECK: br %r14 ++ store <2 x i32> %val, <2 x i32> *%ptr ++ ret void ++} ++ ++; Test v2f32 stores. ++define void @f18(<2 x float> %val, <2 x float> *%ptr) { ++; CHECK-LABEL: f18: ++; CHECK: vsteg %v24, 0(%r2), 0 ++; CHECK: br %r14 ++ store <2 x float> %val, <2 x float> *%ptr ++ ret void ++} +Index: llvm-36/test/CodeGen/SystemZ/vec-move-04.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-move-04.ll +@@ -0,0 +1,179 @@ ++; Test vector insertion of register variables. ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ++ ++; Test v16i8 insertion into the first element. ++define <16 x i8> @f1(<16 x i8> %val, i8 %element) { ++; CHECK-LABEL: f1: ++; CHECK: vlvgb %v24, %r2, 0 ++; CHECK: br %r14 ++ %ret = insertelement <16 x i8> %val, i8 %element, i32 0 ++ ret <16 x i8> %ret ++} ++ ++; Test v16i8 insertion into the last element. ++define <16 x i8> @f2(<16 x i8> %val, i8 %element) { ++; CHECK-LABEL: f2: ++; CHECK: vlvgb %v24, %r2, 15 ++; CHECK: br %r14 ++ %ret = insertelement <16 x i8> %val, i8 %element, i32 15 ++ ret <16 x i8> %ret ++} ++ ++; Test v16i8 insertion into a variable element. ++define <16 x i8> @f3(<16 x i8> %val, i8 %element, i32 %index) { ++; CHECK-LABEL: f3: ++; CHECK: vlvgb %v24, %r2, 0(%r3) ++; CHECK: br %r14 ++ %ret = insertelement <16 x i8> %val, i8 %element, i32 %index ++ ret <16 x i8> %ret ++} ++ ++; Test v8i16 insertion into the first element. ++define <8 x i16> @f4(<8 x i16> %val, i16 %element) { ++; CHECK-LABEL: f4: ++; CHECK: vlvgh %v24, %r2, 0 ++; CHECK: br %r14 ++ %ret = insertelement <8 x i16> %val, i16 %element, i32 0 ++ ret <8 x i16> %ret ++} ++ ++; Test v8i16 insertion into the last element. ++define <8 x i16> @f5(<8 x i16> %val, i16 %element) { ++; CHECK-LABEL: f5: ++; CHECK: vlvgh %v24, %r2, 7 ++; CHECK: br %r14 ++ %ret = insertelement <8 x i16> %val, i16 %element, i32 7 ++ ret <8 x i16> %ret ++} ++ ++; Test v8i16 insertion into a variable element. ++define <8 x i16> @f6(<8 x i16> %val, i16 %element, i32 %index) { ++; CHECK-LABEL: f6: ++; CHECK: vlvgh %v24, %r2, 0(%r3) ++; CHECK: br %r14 ++ %ret = insertelement <8 x i16> %val, i16 %element, i32 %index ++ ret <8 x i16> %ret ++} ++ ++; Test v4i32 insertion into the first element. ++define <4 x i32> @f7(<4 x i32> %val, i32 %element) { ++; CHECK-LABEL: f7: ++; CHECK: vlvgf %v24, %r2, 0 ++; CHECK: br %r14 ++ %ret = insertelement <4 x i32> %val, i32 %element, i32 0 ++ ret <4 x i32> %ret ++} ++ ++; Test v4i32 insertion into the last element. ++define <4 x i32> @f8(<4 x i32> %val, i32 %element) { ++; CHECK-LABEL: f8: ++; CHECK: vlvgf %v24, %r2, 3 ++; CHECK: br %r14 ++ %ret = insertelement <4 x i32> %val, i32 %element, i32 3 ++ ret <4 x i32> %ret ++} ++ ++; Test v4i32 insertion into a variable element. ++define <4 x i32> @f9(<4 x i32> %val, i32 %element, i32 %index) { ++; CHECK-LABEL: f9: ++; CHECK: vlvgf %v24, %r2, 0(%r3) ++; CHECK: br %r14 ++ %ret = insertelement <4 x i32> %val, i32 %element, i32 %index ++ ret <4 x i32> %ret ++} ++ ++; Test v2i64 insertion into the first element. ++define <2 x i64> @f10(<2 x i64> %val, i64 %element) { ++; CHECK-LABEL: f10: ++; CHECK: vlvgg %v24, %r2, 0 ++; CHECK: br %r14 ++ %ret = insertelement <2 x i64> %val, i64 %element, i32 0 ++ ret <2 x i64> %ret ++} ++ ++; Test v2i64 insertion into the last element. ++define <2 x i64> @f11(<2 x i64> %val, i64 %element) { ++; CHECK-LABEL: f11: ++; CHECK: vlvgg %v24, %r2, 1 ++; CHECK: br %r14 ++ %ret = insertelement <2 x i64> %val, i64 %element, i32 1 ++ ret <2 x i64> %ret ++} ++ ++; Test v2i64 insertion into a variable element. ++define <2 x i64> @f12(<2 x i64> %val, i64 %element, i32 %index) { ++; CHECK-LABEL: f12: ++; CHECK: vlvgg %v24, %r2, 0(%r3) ++; CHECK: br %r14 ++ %ret = insertelement <2 x i64> %val, i64 %element, i32 %index ++ ret <2 x i64> %ret ++} ++ ++; Test v4f32 insertion into the first element. ++define <4 x float> @f13(<4 x float> %val, float %element) { ++; CHECK-LABEL: f13: ++; CHECK: vlgvf [[REG:%r[0-5]]], %v0, 0 ++; CHECK: vlvgf %v24, [[REG]], 0 ++; CHECK: br %r14 ++ %ret = insertelement <4 x float> %val, float %element, i32 0 ++ ret <4 x float> %ret ++} ++ ++; Test v4f32 insertion into the last element. ++define <4 x float> @f14(<4 x float> %val, float %element) { ++; CHECK-LABEL: f14: ++; CHECK: vlgvf [[REG:%r[0-5]]], %v0, 0 ++; CHECK: vlvgf %v24, [[REG]], 3 ++; CHECK: br %r14 ++ %ret = insertelement <4 x float> %val, float %element, i32 3 ++ ret <4 x float> %ret ++} ++ ++; Test v4f32 insertion into a variable element. ++define <4 x float> @f15(<4 x float> %val, float %element, i32 %index) { ++; CHECK-LABEL: f15: ++; CHECK: vlgvf [[REG:%r[0-5]]], %v0, 0 ++; CHECK: vlvgf %v24, [[REG]], 0(%r2) ++; CHECK: br %r14 ++ %ret = insertelement <4 x float> %val, float %element, i32 %index ++ ret <4 x float> %ret ++} ++ ++; Test v2f64 insertion into the first element. ++define <2 x double> @f16(<2 x double> %val, double %element) { ++; CHECK-LABEL: f16: ++; CHECK: vpdi %v24, %v0, %v24, 1 ++; CHECK: br %r14 ++ %ret = insertelement <2 x double> %val, double %element, i32 0 ++ ret <2 x double> %ret ++} ++ ++; Test v2f64 insertion into the last element. ++define <2 x double> @f17(<2 x double> %val, double %element) { ++; CHECK-LABEL: f17: ++; CHECK: vpdi %v24, %v24, %v0, 0 ++; CHECK: br %r14 ++ %ret = insertelement <2 x double> %val, double %element, i32 1 ++ ret <2 x double> %ret ++} ++ ++; Test v2f64 insertion into a variable element. ++define <2 x double> @f18(<2 x double> %val, double %element, i32 %index) { ++; CHECK-LABEL: f18: ++; CHECK: lgdr [[REG:%r[0-5]]], %f0 ++; CHECK: vlvgg %v24, [[REG]], 0(%r2) ++; CHECK: br %r14 ++ %ret = insertelement <2 x double> %val, double %element, i32 %index ++ ret <2 x double> %ret ++} ++ ++; Test v16i8 insertion into a variable element plus one. ++define <16 x i8> @f19(<16 x i8> %val, i8 %element, i32 %index) { ++; CHECK-LABEL: f19: ++; CHECK: vlvgb %v24, %r2, 1(%r3) ++; CHECK: br %r14 ++ %add = add i32 %index, 1 ++ %ret = insertelement <16 x i8> %val, i8 %element, i32 %add ++ ret <16 x i8> %ret ++} +Index: llvm-36/test/CodeGen/SystemZ/vec-move-05.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-move-05.ll +@@ -0,0 +1,249 @@ ++; Test vector extraction. ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ++ ++; Test v16i8 extraction of the first element. ++define i8 @f1(<16 x i8> %val) { ++; CHECK-LABEL: f1: ++; CHECK: vlgvb %r2, %v24, 0 ++; CHECK: br %r14 ++ %ret = extractelement <16 x i8> %val, i32 0 ++ ret i8 %ret ++} ++ ++; Test v16i8 extraction of the last element. ++define i8 @f2(<16 x i8> %val) { ++; CHECK-LABEL: f2: ++; CHECK: vlgvb %r2, %v24, 15 ++; CHECK: br %r14 ++ %ret = extractelement <16 x i8> %val, i32 15 ++ ret i8 %ret ++} ++ ++; Test v16i8 extractions of an absurd element number. This must compile ++; but we don't care what it does. ++define i8 @f3(<16 x i8> %val) { ++; CHECK-LABEL: f3: ++; CHECK-NOT: vlgvb %r2, %v24, 100000 ++; CHECK: br %r14 ++ %ret = extractelement <16 x i8> %val, i32 100000 ++ ret i8 %ret ++} ++ ++; Test v16i8 extraction of a variable element. ++define i8 @f4(<16 x i8> %val, i32 %index) { ++; CHECK-LABEL: f4: ++; CHECK: vlgvb %r2, %v24, 0(%r2) ++; CHECK: br %r14 ++ %ret = extractelement <16 x i8> %val, i32 %index ++ ret i8 %ret ++} ++ ++; Test v8i16 extraction of the first element. ++define i16 @f5(<8 x i16> %val) { ++; CHECK-LABEL: f5: ++; CHECK: vlgvh %r2, %v24, 0 ++; CHECK: br %r14 ++ %ret = extractelement <8 x i16> %val, i32 0 ++ ret i16 %ret ++} ++ ++; Test v8i16 extraction of the last element. ++define i16 @f6(<8 x i16> %val) { ++; CHECK-LABEL: f6: ++; CHECK: vlgvh %r2, %v24, 7 ++; CHECK: br %r14 ++ %ret = extractelement <8 x i16> %val, i32 7 ++ ret i16 %ret ++} ++ ++; Test v8i16 extractions of an absurd element number. This must compile ++; but we don't care what it does. ++define i16 @f7(<8 x i16> %val) { ++; CHECK-LABEL: f7: ++; CHECK-NOT: vlgvh %r2, %v24, 100000 ++; CHECK: br %r14 ++ %ret = extractelement <8 x i16> %val, i32 100000 ++ ret i16 %ret ++} ++ ++; Test v8i16 extraction of a variable element. ++define i16 @f8(<8 x i16> %val, i32 %index) { ++; CHECK-LABEL: f8: ++; CHECK: vlgvh %r2, %v24, 0(%r2) ++; CHECK: br %r14 ++ %ret = extractelement <8 x i16> %val, i32 %index ++ ret i16 %ret ++} ++ ++; Test v4i32 extraction of the first element. ++define i32 @f9(<4 x i32> %val) { ++; CHECK-LABEL: f9: ++; CHECK: vlgvf %r2, %v24, 0 ++; CHECK: br %r14 ++ %ret = extractelement <4 x i32> %val, i32 0 ++ ret i32 %ret ++} ++ ++; Test v4i32 extraction of the last element. ++define i32 @f10(<4 x i32> %val) { ++; CHECK-LABEL: f10: ++; CHECK: vlgvf %r2, %v24, 3 ++; CHECK: br %r14 ++ %ret = extractelement <4 x i32> %val, i32 3 ++ ret i32 %ret ++} ++ ++; Test v4i32 extractions of an absurd element number. This must compile ++; but we don't care what it does. ++define i32 @f11(<4 x i32> %val) { ++; CHECK-LABEL: f11: ++; CHECK-NOT: vlgvf %r2, %v24, 100000 ++; CHECK: br %r14 ++ %ret = extractelement <4 x i32> %val, i32 100000 ++ ret i32 %ret ++} ++ ++; Test v4i32 extraction of a variable element. ++define i32 @f12(<4 x i32> %val, i32 %index) { ++; CHECK-LABEL: f12: ++; CHECK: vlgvf %r2, %v24, 0(%r2) ++; CHECK: br %r14 ++ %ret = extractelement <4 x i32> %val, i32 %index ++ ret i32 %ret ++} ++ ++; Test v2i64 extraction of the first element. ++define i64 @f13(<2 x i64> %val) { ++; CHECK-LABEL: f13: ++; CHECK: vlgvg %r2, %v24, 0 ++; CHECK: br %r14 ++ %ret = extractelement <2 x i64> %val, i32 0 ++ ret i64 %ret ++} ++ ++; Test v2i64 extraction of the last element. ++define i64 @f14(<2 x i64> %val) { ++; CHECK-LABEL: f14: ++; CHECK: vlgvg %r2, %v24, 1 ++; CHECK: br %r14 ++ %ret = extractelement <2 x i64> %val, i32 1 ++ ret i64 %ret ++} ++ ++; Test v2i64 extractions of an absurd element number. This must compile ++; but we don't care what it does. ++define i64 @f15(<2 x i64> %val) { ++; CHECK-LABEL: f15: ++; CHECK-NOT: vlgvg %r2, %v24, 100000 ++; CHECK: br %r14 ++ %ret = extractelement <2 x i64> %val, i32 100000 ++ ret i64 %ret ++} ++ ++; Test v2i64 extraction of a variable element. ++define i64 @f16(<2 x i64> %val, i32 %index) { ++; CHECK-LABEL: f16: ++; CHECK: vlgvg %r2, %v24, 0(%r2) ++; CHECK: br %r14 ++ %ret = extractelement <2 x i64> %val, i32 %index ++ ret i64 %ret ++} ++ ++; Test v4f32 extraction of element 0. ++define float @f17(<4 x float> %val) { ++; CHECK-LABEL: f17: ++; CHECK: vlr %v0, %v24 ++; CHECK: br %r14 ++ %ret = extractelement <4 x float> %val, i32 0 ++ ret float %ret ++} ++ ++; Test v4f32 extraction of element 1. ++define float @f18(<4 x float> %val) { ++; CHECK-LABEL: f18: ++; CHECK: vrepf %v0, %v24, 1 ++; CHECK: br %r14 ++ %ret = extractelement <4 x float> %val, i32 1 ++ ret float %ret ++} ++ ++; Test v4f32 extraction of element 2. ++define float @f19(<4 x float> %val) { ++; CHECK-LABEL: f19: ++; CHECK: vrepf %v0, %v24, 2 ++; CHECK: br %r14 ++ %ret = extractelement <4 x float> %val, i32 2 ++ ret float %ret ++} ++ ++; Test v4f32 extraction of element 3. ++define float @f20(<4 x float> %val) { ++; CHECK-LABEL: f20: ++; CHECK: vrepf %v0, %v24, 3 ++; CHECK: br %r14 ++ %ret = extractelement <4 x float> %val, i32 3 ++ ret float %ret ++} ++ ++; Test v4f32 extractions of an absurd element number. This must compile ++; but we don't care what it does. ++define float @f21(<4 x float> %val) { ++ %ret = extractelement <4 x float> %val, i32 100000 ++ ret float %ret ++} ++ ++; Test v4f32 extraction of a variable element. ++define float @f22(<4 x float> %val, i32 %index) { ++; CHECK-LABEL: f22: ++; CHECK: vlgvf [[REG:%r[0-5]]], %v24, 0(%r2) ++; CHECK: vlvgf %v0, [[REG]], 0 ++; CHECK: br %r14 ++ %ret = extractelement <4 x float> %val, i32 %index ++ ret float %ret ++} ++ ++; Test v2f64 extraction of the first element. ++define double @f23(<2 x double> %val) { ++; CHECK-LABEL: f23: ++; CHECK: vlr %v0, %v24 ++; CHECK: br %r14 ++ %ret = extractelement <2 x double> %val, i32 0 ++ ret double %ret ++} ++ ++; Test v2f64 extraction of the last element. ++define double @f24(<2 x double> %val) { ++; CHECK-LABEL: f24: ++; CHECK: vrepg %v0, %v24, 1 ++; CHECK: br %r14 ++ %ret = extractelement <2 x double> %val, i32 1 ++ ret double %ret ++} ++ ++; Test v2f64 extractions of an absurd element number. This must compile ++; but we don't care what it does. ++define double @f25(<2 x double> %val) { ++ %ret = extractelement <2 x double> %val, i32 100000 ++ ret double %ret ++} ++ ++; Test v2f64 extraction of a variable element. ++define double @f26(<2 x double> %val, i32 %index) { ++; CHECK-LABEL: f26: ++; CHECK: vlgvg [[REG:%r[0-5]]], %v24, 0(%r2) ++; CHECK: ldgr %f0, [[REG]] ++; CHECK: br %r14 ++ %ret = extractelement <2 x double> %val, i32 %index ++ ret double %ret ++} ++ ++; Test v16i8 extraction of a variable element with an offset. ++define i8 @f27(<16 x i8> %val, i32 %index) { ++; CHECK-LABEL: f27: ++; CHECK: vlgvb %r2, %v24, 1(%r2) ++; CHECK: br %r14 ++ %add = add i32 %index, 1 ++ %ret = extractelement <16 x i8> %val, i32 %add ++ ret i8 %ret ++} +Index: llvm-36/test/CodeGen/SystemZ/vec-move-06.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-move-06.ll +@@ -0,0 +1,13 @@ ++; Test vector builds using VLVGP. ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ++ ++; Test the basic v2i64 usage. ++define <2 x i64> @f1(i64 %a, i64 %b) { ++; CHECK-LABEL: f1: ++; CHECK: vlvgp %v24, %r2, %r3 ++; CHECK: br %r14 ++ %veca = insertelement <2 x i64> undef, i64 %a, i32 0 ++ %vecb = insertelement <2 x i64> %veca, i64 %b, i32 1 ++ ret <2 x i64> %vecb ++} +Index: llvm-36/test/CodeGen/SystemZ/vec-move-07.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-move-07.ll +@@ -0,0 +1,57 @@ ++; Test scalar_to_vector expansion. ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ++ ++; Test v16i8. ++define <16 x i8> @f1(i8 %val) { ++; CHECK-LABEL: f1: ++; CHECK: vlvgb %v24, %r2, 0 ++; CHECK: br %r14 ++ %ret = insertelement <16 x i8> undef, i8 %val, i32 0 ++ ret <16 x i8> %ret ++} ++ ++; Test v8i16. ++define <8 x i16> @f2(i16 %val) { ++; CHECK-LABEL: f2: ++; CHECK: vlvgh %v24, %r2, 0 ++; CHECK: br %r14 ++ %ret = insertelement <8 x i16> undef, i16 %val, i32 0 ++ ret <8 x i16> %ret ++} ++ ++; Test v4i32. ++define <4 x i32> @f3(i32 %val) { ++; CHECK-LABEL: f3: ++; CHECK: vlvgf %v24, %r2, 0 ++; CHECK: br %r14 ++ %ret = insertelement <4 x i32> undef, i32 %val, i32 0 ++ ret <4 x i32> %ret ++} ++ ++; Test v2i64. Here we load %val into both halves. ++define <2 x i64> @f4(i64 %val) { ++; CHECK-LABEL: f4: ++; CHECK: vlvgp %v24, %r2, %r2 ++; CHECK: br %r14 ++ %ret = insertelement <2 x i64> undef, i64 %val, i32 0 ++ ret <2 x i64> %ret ++} ++ ++; Test v4f32, which is just a move. ++define <4 x float> @f5(float %val) { ++; CHECK-LABEL: f5: ++; CHECK: vlr %v24, %v0 ++; CHECK: br %r14 ++ %ret = insertelement <4 x float> undef, float %val, i32 0 ++ ret <4 x float> %ret ++} ++ ++; Likewise v2f64. ++define <2 x double> @f6(double %val) { ++; CHECK-LABEL: f6: ++; CHECK: vlr %v24, %v0 ++; CHECK: br %r14 ++ %ret = insertelement <2 x double> undef, double %val, i32 0 ++ ret <2 x double> %ret ++} +Index: llvm-36/test/CodeGen/SystemZ/vec-move-08.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-move-08.ll +@@ -0,0 +1,444 @@ ++; Test vector insertion of memory values. ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ++ ++; Test v16i8 insertion into the first element. ++define <16 x i8> @f1(<16 x i8> %val, i8 *%ptr) { ++; CHECK-LABEL: f1: ++; CHECK: vleb %v24, 0(%r2), 0 ++; CHECK: br %r14 ++ %element = load i8 *%ptr ++ %ret = insertelement <16 x i8> %val, i8 %element, i32 0 ++ ret <16 x i8> %ret ++} ++ ++; Test v16i8 insertion into the last element. ++define <16 x i8> @f2(<16 x i8> %val, i8 *%ptr) { ++; CHECK-LABEL: f2: ++; CHECK: vleb %v24, 0(%r2), 15 ++; CHECK: br %r14 ++ %element = load i8 *%ptr ++ %ret = insertelement <16 x i8> %val, i8 %element, i32 15 ++ ret <16 x i8> %ret ++} ++ ++; Test v16i8 insertion with the highest in-range offset. ++define <16 x i8> @f3(<16 x i8> %val, i8 *%base) { ++; CHECK-LABEL: f3: ++; CHECK: vleb %v24, 4095(%r2), 10 ++; CHECK: br %r14 ++ %ptr = getelementptr i8 *%base, i32 4095 ++ %element = load i8 *%ptr ++ %ret = insertelement <16 x i8> %val, i8 %element, i32 10 ++ ret <16 x i8> %ret ++} ++ ++; Test v16i8 insertion with the first ouf-of-range offset. ++define <16 x i8> @f4(<16 x i8> %val, i8 *%base) { ++; CHECK-LABEL: f4: ++; CHECK: aghi %r2, 4096 ++; CHECK: vleb %v24, 0(%r2), 5 ++; CHECK: br %r14 ++ %ptr = getelementptr i8 *%base, i32 4096 ++ %element = load i8 *%ptr ++ %ret = insertelement <16 x i8> %val, i8 %element, i32 5 ++ ret <16 x i8> %ret ++} ++ ++; Test v16i8 insertion into a variable element. ++define <16 x i8> @f5(<16 x i8> %val, i8 *%ptr, i32 %index) { ++; CHECK-LABEL: f5: ++; CHECK-NOT: vleb ++; CHECK: br %r14 ++ %element = load i8 *%ptr ++ %ret = insertelement <16 x i8> %val, i8 %element, i32 %index ++ ret <16 x i8> %ret ++} ++ ++; Test v8i16 insertion into the first element. ++define <8 x i16> @f6(<8 x i16> %val, i16 *%ptr) { ++; CHECK-LABEL: f6: ++; CHECK: vleh %v24, 0(%r2), 0 ++; CHECK: br %r14 ++ %element = load i16 *%ptr ++ %ret = insertelement <8 x i16> %val, i16 %element, i32 0 ++ ret <8 x i16> %ret ++} ++ ++; Test v8i16 insertion into the last element. ++define <8 x i16> @f7(<8 x i16> %val, i16 *%ptr) { ++; CHECK-LABEL: f7: ++; CHECK: vleh %v24, 0(%r2), 7 ++; CHECK: br %r14 ++ %element = load i16 *%ptr ++ %ret = insertelement <8 x i16> %val, i16 %element, i32 7 ++ ret <8 x i16> %ret ++} ++ ++; Test v8i16 insertion with the highest in-range offset. ++define <8 x i16> @f8(<8 x i16> %val, i16 *%base) { ++; CHECK-LABEL: f8: ++; CHECK: vleh %v24, 4094(%r2), 5 ++; CHECK: br %r14 ++ %ptr = getelementptr i16 *%base, i32 2047 ++ %element = load i16 *%ptr ++ %ret = insertelement <8 x i16> %val, i16 %element, i32 5 ++ ret <8 x i16> %ret ++} ++ ++; Test v8i16 insertion with the first ouf-of-range offset. ++define <8 x i16> @f9(<8 x i16> %val, i16 *%base) { ++; CHECK-LABEL: f9: ++; CHECK: aghi %r2, 4096 ++; CHECK: vleh %v24, 0(%r2), 1 ++; CHECK: br %r14 ++ %ptr = getelementptr i16 *%base, i32 2048 ++ %element = load i16 *%ptr ++ %ret = insertelement <8 x i16> %val, i16 %element, i32 1 ++ ret <8 x i16> %ret ++} ++ ++; Test v8i16 insertion into a variable element. ++define <8 x i16> @f10(<8 x i16> %val, i16 *%ptr, i32 %index) { ++; CHECK-LABEL: f10: ++; CHECK-NOT: vleh ++; CHECK: br %r14 ++ %element = load i16 *%ptr ++ %ret = insertelement <8 x i16> %val, i16 %element, i32 %index ++ ret <8 x i16> %ret ++} ++ ++; Test v4i32 insertion into the first element. ++define <4 x i32> @f11(<4 x i32> %val, i32 *%ptr) { ++; CHECK-LABEL: f11: ++; CHECK: vlef %v24, 0(%r2), 0 ++; CHECK: br %r14 ++ %element = load i32 *%ptr ++ %ret = insertelement <4 x i32> %val, i32 %element, i32 0 ++ ret <4 x i32> %ret ++} ++ ++; Test v4i32 insertion into the last element. ++define <4 x i32> @f12(<4 x i32> %val, i32 *%ptr) { ++; CHECK-LABEL: f12: ++; CHECK: vlef %v24, 0(%r2), 3 ++; CHECK: br %r14 ++ %element = load i32 *%ptr ++ %ret = insertelement <4 x i32> %val, i32 %element, i32 3 ++ ret <4 x i32> %ret ++} ++ ++; Test v4i32 insertion with the highest in-range offset. ++define <4 x i32> @f13(<4 x i32> %val, i32 *%base) { ++; CHECK-LABEL: f13: ++; CHECK: vlef %v24, 4092(%r2), 2 ++; CHECK: br %r14 ++ %ptr = getelementptr i32 *%base, i32 1023 ++ %element = load i32 *%ptr ++ %ret = insertelement <4 x i32> %val, i32 %element, i32 2 ++ ret <4 x i32> %ret ++} ++ ++; Test v4i32 insertion with the first ouf-of-range offset. ++define <4 x i32> @f14(<4 x i32> %val, i32 *%base) { ++; CHECK-LABEL: f14: ++; CHECK: aghi %r2, 4096 ++; CHECK: vlef %v24, 0(%r2), 1 ++; CHECK: br %r14 ++ %ptr = getelementptr i32 *%base, i32 1024 ++ %element = load i32 *%ptr ++ %ret = insertelement <4 x i32> %val, i32 %element, i32 1 ++ ret <4 x i32> %ret ++} ++ ++; Test v4i32 insertion into a variable element. ++define <4 x i32> @f15(<4 x i32> %val, i32 *%ptr, i32 %index) { ++; CHECK-LABEL: f15: ++; CHECK-NOT: vlef ++; CHECK: br %r14 ++ %element = load i32 *%ptr ++ %ret = insertelement <4 x i32> %val, i32 %element, i32 %index ++ ret <4 x i32> %ret ++} ++ ++; Test v2i64 insertion into the first element. ++define <2 x i64> @f16(<2 x i64> %val, i64 *%ptr) { ++; CHECK-LABEL: f16: ++; CHECK: vleg %v24, 0(%r2), 0 ++; CHECK: br %r14 ++ %element = load i64 *%ptr ++ %ret = insertelement <2 x i64> %val, i64 %element, i32 0 ++ ret <2 x i64> %ret ++} ++ ++; Test v2i64 insertion into the last element. ++define <2 x i64> @f17(<2 x i64> %val, i64 *%ptr) { ++; CHECK-LABEL: f17: ++; CHECK: vleg %v24, 0(%r2), 1 ++; CHECK: br %r14 ++ %element = load i64 *%ptr ++ %ret = insertelement <2 x i64> %val, i64 %element, i32 1 ++ ret <2 x i64> %ret ++} ++ ++; Test v2i64 insertion with the highest in-range offset. ++define <2 x i64> @f18(<2 x i64> %val, i64 *%base) { ++; CHECK-LABEL: f18: ++; CHECK: vleg %v24, 4088(%r2), 1 ++; CHECK: br %r14 ++ %ptr = getelementptr i64 *%base, i32 511 ++ %element = load i64 *%ptr ++ %ret = insertelement <2 x i64> %val, i64 %element, i32 1 ++ ret <2 x i64> %ret ++} ++ ++; Test v2i64 insertion with the first ouf-of-range offset. ++define <2 x i64> @f19(<2 x i64> %val, i64 *%base) { ++; CHECK-LABEL: f19: ++; CHECK: aghi %r2, 4096 ++; CHECK: vleg %v24, 0(%r2), 0 ++; CHECK: br %r14 ++ %ptr = getelementptr i64 *%base, i32 512 ++ %element = load i64 *%ptr ++ %ret = insertelement <2 x i64> %val, i64 %element, i32 0 ++ ret <2 x i64> %ret ++} ++ ++; Test v2i64 insertion into a variable element. ++define <2 x i64> @f20(<2 x i64> %val, i64 *%ptr, i32 %index) { ++; CHECK-LABEL: f20: ++; CHECK-NOT: vleg ++; CHECK: br %r14 ++ %element = load i64 *%ptr ++ %ret = insertelement <2 x i64> %val, i64 %element, i32 %index ++ ret <2 x i64> %ret ++} ++ ++; Test v4f32 insertion into the first element. ++define <4 x float> @f21(<4 x float> %val, float *%ptr) { ++; CHECK-LABEL: f21: ++; CHECK: vlef %v24, 0(%r2), 0 ++; CHECK: br %r14 ++ %element = load float *%ptr ++ %ret = insertelement <4 x float> %val, float %element, i32 0 ++ ret <4 x float> %ret ++} ++ ++; Test v4f32 insertion into the last element. ++define <4 x float> @f22(<4 x float> %val, float *%ptr) { ++; CHECK-LABEL: f22: ++; CHECK: vlef %v24, 0(%r2), 3 ++; CHECK: br %r14 ++ %element = load float *%ptr ++ %ret = insertelement <4 x float> %val, float %element, i32 3 ++ ret <4 x float> %ret ++} ++ ++; Test v4f32 insertion with the highest in-range offset. ++define <4 x float> @f23(<4 x float> %val, float *%base) { ++; CHECK-LABEL: f23: ++; CHECK: vlef %v24, 4092(%r2), 2 ++; CHECK: br %r14 ++ %ptr = getelementptr float *%base, i32 1023 ++ %element = load float *%ptr ++ %ret = insertelement <4 x float> %val, float %element, i32 2 ++ ret <4 x float> %ret ++} ++ ++; Test v4f32 insertion with the first ouf-of-range offset. ++define <4 x float> @f24(<4 x float> %val, float *%base) { ++; CHECK-LABEL: f24: ++; CHECK: aghi %r2, 4096 ++; CHECK: vlef %v24, 0(%r2), 1 ++; CHECK: br %r14 ++ %ptr = getelementptr float *%base, i32 1024 ++ %element = load float *%ptr ++ %ret = insertelement <4 x float> %val, float %element, i32 1 ++ ret <4 x float> %ret ++} ++ ++; Test v4f32 insertion into a variable element. ++define <4 x float> @f25(<4 x float> %val, float *%ptr, i32 %index) { ++; CHECK-LABEL: f25: ++; CHECK-NOT: vlef ++; CHECK: br %r14 ++ %element = load float *%ptr ++ %ret = insertelement <4 x float> %val, float %element, i32 %index ++ ret <4 x float> %ret ++} ++ ++; Test v2f64 insertion into the first element. ++define <2 x double> @f26(<2 x double> %val, double *%ptr) { ++; CHECK-LABEL: f26: ++; CHECK: vleg %v24, 0(%r2), 0 ++; CHECK: br %r14 ++ %element = load double *%ptr ++ %ret = insertelement <2 x double> %val, double %element, i32 0 ++ ret <2 x double> %ret ++} ++ ++; Test v2f64 insertion into the last element. ++define <2 x double> @f27(<2 x double> %val, double *%ptr) { ++; CHECK-LABEL: f27: ++; CHECK: vleg %v24, 0(%r2), 1 ++; CHECK: br %r14 ++ %element = load double *%ptr ++ %ret = insertelement <2 x double> %val, double %element, i32 1 ++ ret <2 x double> %ret ++} ++ ++; Test v2f64 insertion with the highest in-range offset. ++define <2 x double> @f28(<2 x double> %val, double *%base) { ++; CHECK-LABEL: f28: ++; CHECK: vleg %v24, 4088(%r2), 1 ++; CHECK: br %r14 ++ %ptr = getelementptr double *%base, i32 511 ++ %element = load double *%ptr ++ %ret = insertelement <2 x double> %val, double %element, i32 1 ++ ret <2 x double> %ret ++} ++ ++; Test v2f64 insertion with the first ouf-of-range offset. ++define <2 x double> @f29(<2 x double> %val, double *%base) { ++; CHECK-LABEL: f29: ++; CHECK: aghi %r2, 4096 ++; CHECK: vleg %v24, 0(%r2), 0 ++; CHECK: br %r14 ++ %ptr = getelementptr double *%base, i32 512 ++ %element = load double *%ptr ++ %ret = insertelement <2 x double> %val, double %element, i32 0 ++ ret <2 x double> %ret ++} ++ ++; Test v2f64 insertion into a variable element. ++define <2 x double> @f30(<2 x double> %val, double *%ptr, i32 %index) { ++; CHECK-LABEL: f30: ++; CHECK-NOT: vleg ++; CHECK: br %r14 ++ %element = load double *%ptr ++ %ret = insertelement <2 x double> %val, double %element, i32 %index ++ ret <2 x double> %ret ++} ++ ++; Test a v4i32 gather of the first element. ++define <4 x i32> @f31(<4 x i32> %val, <4 x i32> %index, i64 %base) { ++; CHECK-LABEL: f31: ++; CHECK: vgef %v24, 0(%v26,%r2), 0 ++; CHECK: br %r14 ++ %elem = extractelement <4 x i32> %index, i32 0 ++ %ext = zext i32 %elem to i64 ++ %add = add i64 %base, %ext ++ %ptr = inttoptr i64 %add to i32 * ++ %element = load i32 *%ptr ++ %ret = insertelement <4 x i32> %val, i32 %element, i32 0 ++ ret <4 x i32> %ret ++} ++ ++; Test a v4i32 gather of the last element. ++define <4 x i32> @f32(<4 x i32> %val, <4 x i32> %index, i64 %base) { ++; CHECK-LABEL: f32: ++; CHECK: vgef %v24, 0(%v26,%r2), 3 ++; CHECK: br %r14 ++ %elem = extractelement <4 x i32> %index, i32 3 ++ %ext = zext i32 %elem to i64 ++ %add = add i64 %base, %ext ++ %ptr = inttoptr i64 %add to i32 * ++ %element = load i32 *%ptr ++ %ret = insertelement <4 x i32> %val, i32 %element, i32 3 ++ ret <4 x i32> %ret ++} ++ ++; Test a v4i32 gather with the highest in-range offset. ++define <4 x i32> @f33(<4 x i32> %val, <4 x i32> %index, i64 %base) { ++; CHECK-LABEL: f33: ++; CHECK: vgef %v24, 4095(%v26,%r2), 1 ++; CHECK: br %r14 ++ %elem = extractelement <4 x i32> %index, i32 1 ++ %ext = zext i32 %elem to i64 ++ %add1 = add i64 %base, %ext ++ %add2 = add i64 %add1, 4095 ++ %ptr = inttoptr i64 %add2 to i32 * ++ %element = load i32 *%ptr ++ %ret = insertelement <4 x i32> %val, i32 %element, i32 1 ++ ret <4 x i32> %ret ++} ++ ++; Test a v2i64 gather of the first element. ++define <2 x i64> @f34(<2 x i64> %val, <2 x i64> %index, i64 %base) { ++; CHECK-LABEL: f34: ++; CHECK: vgeg %v24, 0(%v26,%r2), 0 ++; CHECK: br %r14 ++ %elem = extractelement <2 x i64> %index, i32 0 ++ %add = add i64 %base, %elem ++ %ptr = inttoptr i64 %add to i64 * ++ %element = load i64 *%ptr ++ %ret = insertelement <2 x i64> %val, i64 %element, i32 0 ++ ret <2 x i64> %ret ++} ++ ++; Test a v2i64 gather of the last element. ++define <2 x i64> @f35(<2 x i64> %val, <2 x i64> %index, i64 %base) { ++; CHECK-LABEL: f35: ++; CHECK: vgeg %v24, 0(%v26,%r2), 1 ++; CHECK: br %r14 ++ %elem = extractelement <2 x i64> %index, i32 1 ++ %add = add i64 %base, %elem ++ %ptr = inttoptr i64 %add to i64 * ++ %element = load i64 *%ptr ++ %ret = insertelement <2 x i64> %val, i64 %element, i32 1 ++ ret <2 x i64> %ret ++} ++ ++; Test a v4f32 gather of the first element. ++define <4 x float> @f36(<4 x float> %val, <4 x i32> %index, i64 %base) { ++; CHECK-LABEL: f36: ++; CHECK: vgef %v24, 0(%v26,%r2), 0 ++; CHECK: br %r14 ++ %elem = extractelement <4 x i32> %index, i32 0 ++ %ext = zext i32 %elem to i64 ++ %add = add i64 %base, %ext ++ %ptr = inttoptr i64 %add to float * ++ %element = load float *%ptr ++ %ret = insertelement <4 x float> %val, float %element, i32 0 ++ ret <4 x float> %ret ++} ++ ++; Test a v4f32 gather of the last element. ++define <4 x float> @f37(<4 x float> %val, <4 x i32> %index, i64 %base) { ++; CHECK-LABEL: f37: ++; CHECK: vgef %v24, 0(%v26,%r2), 3 ++; CHECK: br %r14 ++ %elem = extractelement <4 x i32> %index, i32 3 ++ %ext = zext i32 %elem to i64 ++ %add = add i64 %base, %ext ++ %ptr = inttoptr i64 %add to float * ++ %element = load float *%ptr ++ %ret = insertelement <4 x float> %val, float %element, i32 3 ++ ret <4 x float> %ret ++} ++ ++; Test a v2f64 gather of the first element. ++define <2 x double> @f38(<2 x double> %val, <2 x i64> %index, i64 %base) { ++; CHECK-LABEL: f38: ++; CHECK: vgeg %v24, 0(%v26,%r2), 0 ++; CHECK: br %r14 ++ %elem = extractelement <2 x i64> %index, i32 0 ++ %add = add i64 %base, %elem ++ %ptr = inttoptr i64 %add to double * ++ %element = load double *%ptr ++ %ret = insertelement <2 x double> %val, double %element, i32 0 ++ ret <2 x double> %ret ++} ++ ++; Test a v2f64 gather of the last element. ++define <2 x double> @f39(<2 x double> %val, <2 x i64> %index, i64 %base) { ++; CHECK-LABEL: f39: ++; CHECK: vgeg %v24, 0(%v26,%r2), 1 ++; CHECK: br %r14 ++ %elem = extractelement <2 x i64> %index, i32 1 ++ %add = add i64 %base, %elem ++ %ptr = inttoptr i64 %add to double * ++ %element = load double *%ptr ++ %ret = insertelement <2 x double> %val, double %element, i32 1 ++ ret <2 x double> %ret ++} +Index: llvm-36/test/CodeGen/SystemZ/vec-move-09.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-move-09.ll +@@ -0,0 +1,291 @@ ++; Test vector insertion of constants. ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ++ ++; Test v16i8 insertion into the first element. ++define <16 x i8> @f1(<16 x i8> %val) { ++; CHECK-LABEL: f1: ++; CHECK: vleib %v24, 0, 0 ++; CHECK: br %r14 ++ %ret = insertelement <16 x i8> %val, i8 0, i32 0 ++ ret <16 x i8> %ret ++} ++ ++; Test v16i8 insertion into the last element. ++define <16 x i8> @f2(<16 x i8> %val) { ++; CHECK-LABEL: f2: ++; CHECK: vleib %v24, 100, 15 ++; CHECK: br %r14 ++ %ret = insertelement <16 x i8> %val, i8 100, i32 15 ++ ret <16 x i8> %ret ++} ++ ++; Test v16i8 insertion with the maximum signed value. ++define <16 x i8> @f3(<16 x i8> %val) { ++; CHECK-LABEL: f3: ++; CHECK: vleib %v24, 127, 10 ++; CHECK: br %r14 ++ %ret = insertelement <16 x i8> %val, i8 127, i32 10 ++ ret <16 x i8> %ret ++} ++ ++; Test v16i8 insertion with the minimum signed value. ++define <16 x i8> @f4(<16 x i8> %val) { ++; CHECK-LABEL: f4: ++; CHECK: vleib %v24, -128, 11 ++; CHECK: br %r14 ++ %ret = insertelement <16 x i8> %val, i8 128, i32 11 ++ ret <16 x i8> %ret ++} ++ ++; Test v16i8 insertion with the maximum unsigned value. ++define <16 x i8> @f5(<16 x i8> %val) { ++; CHECK-LABEL: f5: ++; CHECK: vleib %v24, -1, 12 ++; CHECK: br %r14 ++ %ret = insertelement <16 x i8> %val, i8 255, i32 12 ++ ret <16 x i8> %ret ++} ++ ++; Test v16i8 insertion into a variable element. ++define <16 x i8> @f6(<16 x i8> %val, i32 %index) { ++; CHECK-LABEL: f6: ++; CHECK-NOT: vleib ++; CHECK: br %r14 ++ %ret = insertelement <16 x i8> %val, i8 0, i32 %index ++ ret <16 x i8> %ret ++} ++ ++; Test v8i16 insertion into the first element. ++define <8 x i16> @f7(<8 x i16> %val) { ++; CHECK-LABEL: f7: ++; CHECK: vleih %v24, 0, 0 ++; CHECK: br %r14 ++ %ret = insertelement <8 x i16> %val, i16 0, i32 0 ++ ret <8 x i16> %ret ++} ++ ++; Test v8i16 insertion into the last element. ++define <8 x i16> @f8(<8 x i16> %val) { ++; CHECK-LABEL: f8: ++; CHECK: vleih %v24, 0, 7 ++; CHECK: br %r14 ++ %ret = insertelement <8 x i16> %val, i16 0, i32 7 ++ ret <8 x i16> %ret ++} ++ ++; Test v8i16 insertion with the maximum signed value. ++define <8 x i16> @f9(<8 x i16> %val) { ++; CHECK-LABEL: f9: ++; CHECK: vleih %v24, 32767, 4 ++; CHECK: br %r14 ++ %ret = insertelement <8 x i16> %val, i16 32767, i32 4 ++ ret <8 x i16> %ret ++} ++ ++; Test v8i16 insertion with the minimum signed value. ++define <8 x i16> @f10(<8 x i16> %val) { ++; CHECK-LABEL: f10: ++; CHECK: vleih %v24, -32768, 5 ++; CHECK: br %r14 ++ %ret = insertelement <8 x i16> %val, i16 32768, i32 5 ++ ret <8 x i16> %ret ++} ++ ++; Test v8i16 insertion with the maximum unsigned value. ++define <8 x i16> @f11(<8 x i16> %val) { ++; CHECK-LABEL: f11: ++; CHECK: vleih %v24, -1, 6 ++; CHECK: br %r14 ++ %ret = insertelement <8 x i16> %val, i16 65535, i32 6 ++ ret <8 x i16> %ret ++} ++ ++; Test v8i16 insertion into a variable element. ++define <8 x i16> @f12(<8 x i16> %val, i32 %index) { ++; CHECK-LABEL: f12: ++; CHECK-NOT: vleih ++; CHECK: br %r14 ++ %ret = insertelement <8 x i16> %val, i16 0, i32 %index ++ ret <8 x i16> %ret ++} ++ ++; Test v4i32 insertion into the first element. ++define <4 x i32> @f13(<4 x i32> %val) { ++; CHECK-LABEL: f13: ++; CHECK: vleif %v24, 0, 0 ++; CHECK: br %r14 ++ %ret = insertelement <4 x i32> %val, i32 0, i32 0 ++ ret <4 x i32> %ret ++} ++ ++; Test v4i32 insertion into the last element. ++define <4 x i32> @f14(<4 x i32> %val) { ++; CHECK-LABEL: f14: ++; CHECK: vleif %v24, 0, 3 ++; CHECK: br %r14 ++ %ret = insertelement <4 x i32> %val, i32 0, i32 3 ++ ret <4 x i32> %ret ++} ++ ++; Test v4i32 insertion with the maximum value allowed by VLEIF. ++define <4 x i32> @f15(<4 x i32> %val) { ++; CHECK-LABEL: f15: ++; CHECK: vleif %v24, 32767, 1 ++; CHECK: br %r14 ++ %ret = insertelement <4 x i32> %val, i32 32767, i32 1 ++ ret <4 x i32> %ret ++} ++ ++; Test v4i32 insertion with the next value up. ++define <4 x i32> @f16(<4 x i32> %val) { ++; CHECK-LABEL: f16: ++; CHECK-NOT: vleif ++; CHECK: br %r14 ++ %ret = insertelement <4 x i32> %val, i32 32768, i32 1 ++ ret <4 x i32> %ret ++} ++ ++; Test v4i32 insertion with the minimum value allowed by VLEIF. ++define <4 x i32> @f17(<4 x i32> %val) { ++; CHECK-LABEL: f17: ++; CHECK: vleif %v24, -32768, 2 ++; CHECK: br %r14 ++ %ret = insertelement <4 x i32> %val, i32 -32768, i32 2 ++ ret <4 x i32> %ret ++} ++ ++; Test v4i32 insertion with the next value down. ++define <4 x i32> @f18(<4 x i32> %val) { ++; CHECK-LABEL: f18: ++; CHECK-NOT: vleif ++; CHECK: br %r14 ++ %ret = insertelement <4 x i32> %val, i32 -32769, i32 2 ++ ret <4 x i32> %ret ++} ++ ++; Test v4i32 insertion into a variable element. ++define <4 x i32> @f19(<4 x i32> %val, i32 %index) { ++; CHECK-LABEL: f19: ++; CHECK-NOT: vleif ++; CHECK: br %r14 ++ %ret = insertelement <4 x i32> %val, i32 0, i32 %index ++ ret <4 x i32> %ret ++} ++ ++; Test v2i64 insertion into the first element. ++define <2 x i64> @f20(<2 x i64> %val) { ++; CHECK-LABEL: f20: ++; CHECK: vleig %v24, 0, 0 ++; CHECK: br %r14 ++ %ret = insertelement <2 x i64> %val, i64 0, i32 0 ++ ret <2 x i64> %ret ++} ++ ++; Test v2i64 insertion into the last element. ++define <2 x i64> @f21(<2 x i64> %val) { ++; CHECK-LABEL: f21: ++; CHECK: vleig %v24, 0, 1 ++; CHECK: br %r14 ++ %ret = insertelement <2 x i64> %val, i64 0, i32 1 ++ ret <2 x i64> %ret ++} ++ ++; Test v2i64 insertion with the maximum value allowed by VLEIG. ++define <2 x i64> @f22(<2 x i64> %val) { ++; CHECK-LABEL: f22: ++; CHECK: vleig %v24, 32767, 1 ++; CHECK: br %r14 ++ %ret = insertelement <2 x i64> %val, i64 32767, i32 1 ++ ret <2 x i64> %ret ++} ++ ++; Test v2i64 insertion with the next value up. ++define <2 x i64> @f23(<2 x i64> %val) { ++; CHECK-LABEL: f23: ++; CHECK-NOT: vleig ++; CHECK: br %r14 ++ %ret = insertelement <2 x i64> %val, i64 32768, i32 1 ++ ret <2 x i64> %ret ++} ++ ++; Test v2i64 insertion with the minimum value allowed by VLEIG. ++define <2 x i64> @f24(<2 x i64> %val) { ++; CHECK-LABEL: f24: ++; CHECK: vleig %v24, -32768, 0 ++; CHECK: br %r14 ++ %ret = insertelement <2 x i64> %val, i64 -32768, i32 0 ++ ret <2 x i64> %ret ++} ++ ++; Test v2i64 insertion with the next value down. ++define <2 x i64> @f25(<2 x i64> %val) { ++; CHECK-LABEL: f25: ++; CHECK-NOT: vleig ++; CHECK: br %r14 ++ %ret = insertelement <2 x i64> %val, i64 -32769, i32 0 ++ ret <2 x i64> %ret ++} ++ ++; Test v2i64 insertion into a variable element. ++define <2 x i64> @f26(<2 x i64> %val, i32 %index) { ++; CHECK-LABEL: f26: ++; CHECK-NOT: vleig ++; CHECK: br %r14 ++ %ret = insertelement <2 x i64> %val, i64 0, i32 %index ++ ret <2 x i64> %ret ++} ++ ++; Test v4f32 insertion of 0 into the first element. ++define <4 x float> @f27(<4 x float> %val) { ++; CHECK-LABEL: f27: ++; CHECK: vleif %v24, 0, 0 ++; CHECK: br %r14 ++ %ret = insertelement <4 x float> %val, float 0.0, i32 0 ++ ret <4 x float> %ret ++} ++ ++; Test v4f32 insertion of 0 into the last element. ++define <4 x float> @f28(<4 x float> %val) { ++; CHECK-LABEL: f28: ++; CHECK: vleif %v24, 0, 3 ++; CHECK: br %r14 ++ %ret = insertelement <4 x float> %val, float 0.0, i32 3 ++ ret <4 x float> %ret ++} ++ ++; Test v4f32 insertion of a nonzero value. ++define <4 x float> @f29(<4 x float> %val) { ++; CHECK-LABEL: f29: ++; CHECK-NOT: vleif ++; CHECK: br %r14 ++ %ret = insertelement <4 x float> %val, float 1.0, i32 1 ++ ret <4 x float> %ret ++} ++ ++; Test v2f64 insertion of 0 into the first element. ++define <2 x double> @f30(<2 x double> %val) { ++; CHECK-LABEL: f30: ++; CHECK: vleig %v24, 0, 0 ++; CHECK: br %r14 ++ %ret = insertelement <2 x double> %val, double 0.0, i32 0 ++ ret <2 x double> %ret ++} ++ ++; Test v2f64 insertion of 0 into the last element. ++define <2 x double> @f31(<2 x double> %val) { ++; CHECK-LABEL: f31: ++; CHECK: vleig %v24, 0, 1 ++; CHECK: br %r14 ++ %ret = insertelement <2 x double> %val, double 0.0, i32 1 ++ ret <2 x double> %ret ++} ++ ++; Test v2f64 insertion of a nonzero value. ++define <2 x double> @f32(<2 x double> %val) { ++; CHECK-LABEL: f32: ++; CHECK-NOT: vleig ++; CHECK: br %r14 ++ %ret = insertelement <2 x double> %val, double 1.0, i32 1 ++ ret <2 x double> %ret ++} +Index: llvm-36/test/CodeGen/SystemZ/vec-move-10.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-move-10.ll +@@ -0,0 +1,499 @@ ++; Test vector extraction to memory. ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ++ ++; Test v16i8 extraction from the first element. ++define void @f1(<16 x i8> %val, i8 *%ptr) { ++; CHECK-LABEL: f1: ++; CHECK: vsteb %v24, 0(%r2), 0 ++; CHECK: br %r14 ++ %element = extractelement <16 x i8> %val, i32 0 ++ store i8 %element, i8 *%ptr ++ ret void ++} ++ ++; Test v16i8 extraction from the last element. ++define void @f2(<16 x i8> %val, i8 *%ptr) { ++; CHECK-LABEL: f2: ++; CHECK: vsteb %v24, 0(%r2), 15 ++; CHECK: br %r14 ++ %element = extractelement <16 x i8> %val, i32 15 ++ store i8 %element, i8 *%ptr ++ ret void ++} ++ ++; Test v16i8 extraction of an invalid element. This must compile, ++; but we don't care what it does. ++define void @f3(<16 x i8> %val, i8 *%ptr) { ++; CHECK-LABEL: f3: ++; CHECK-NOT: vsteb %v24, 0(%r2), 16 ++; CHECK: br %r14 ++ %element = extractelement <16 x i8> %val, i32 16 ++ store i8 %element, i8 *%ptr ++ ret void ++} ++ ++; Test v16i8 extraction with the highest in-range offset. ++define void @f4(<16 x i8> %val, i8 *%base) { ++; CHECK-LABEL: f4: ++; CHECK: vsteb %v24, 4095(%r2), 10 ++; CHECK: br %r14 ++ %ptr = getelementptr i8 *%base, i32 4095 ++ %element = extractelement <16 x i8> %val, i32 10 ++ store i8 %element, i8 *%ptr ++ ret void ++} ++ ++; Test v16i8 extraction with the first ouf-of-range offset. ++define void @f5(<16 x i8> %val, i8 *%base) { ++; CHECK-LABEL: f5: ++; CHECK: aghi %r2, 4096 ++; CHECK: vsteb %v24, 0(%r2), 5 ++; CHECK: br %r14 ++ %ptr = getelementptr i8 *%base, i32 4096 ++ %element = extractelement <16 x i8> %val, i32 5 ++ store i8 %element, i8 *%ptr ++ ret void ++} ++ ++; Test v16i8 extraction from a variable element. ++define void @f6(<16 x i8> %val, i8 *%ptr, i32 %index) { ++; CHECK-LABEL: f6: ++; CHECK-NOT: vsteb ++; CHECK: br %r14 ++ %element = extractelement <16 x i8> %val, i32 %index ++ store i8 %element, i8 *%ptr ++ ret void ++} ++ ++; Test v8i16 extraction from the first element. ++define void @f7(<8 x i16> %val, i16 *%ptr) { ++; CHECK-LABEL: f7: ++; CHECK: vsteh %v24, 0(%r2), 0 ++; CHECK: br %r14 ++ %element = extractelement <8 x i16> %val, i32 0 ++ store i16 %element, i16 *%ptr ++ ret void ++} ++ ++; Test v8i16 extraction from the last element. ++define void @f8(<8 x i16> %val, i16 *%ptr) { ++; CHECK-LABEL: f8: ++; CHECK: vsteh %v24, 0(%r2), 7 ++; CHECK: br %r14 ++ %element = extractelement <8 x i16> %val, i32 7 ++ store i16 %element, i16 *%ptr ++ ret void ++} ++ ++; Test v8i16 extraction of an invalid element. This must compile, ++; but we don't care what it does. ++define void @f9(<8 x i16> %val, i16 *%ptr) { ++; CHECK-LABEL: f9: ++; CHECK-NOT: vsteh %v24, 0(%r2), 8 ++; CHECK: br %r14 ++ %element = extractelement <8 x i16> %val, i32 8 ++ store i16 %element, i16 *%ptr ++ ret void ++} ++ ++; Test v8i16 extraction with the highest in-range offset. ++define void @f10(<8 x i16> %val, i16 *%base) { ++; CHECK-LABEL: f10: ++; CHECK: vsteh %v24, 4094(%r2), 5 ++; CHECK: br %r14 ++ %ptr = getelementptr i16 *%base, i32 2047 ++ %element = extractelement <8 x i16> %val, i32 5 ++ store i16 %element, i16 *%ptr ++ ret void ++} ++ ++; Test v8i16 extraction with the first ouf-of-range offset. ++define void @f11(<8 x i16> %val, i16 *%base) { ++; CHECK-LABEL: f11: ++; CHECK: aghi %r2, 4096 ++; CHECK: vsteh %v24, 0(%r2), 1 ++; CHECK: br %r14 ++ %ptr = getelementptr i16 *%base, i32 2048 ++ %element = extractelement <8 x i16> %val, i32 1 ++ store i16 %element, i16 *%ptr ++ ret void ++} ++ ++; Test v8i16 extraction from a variable element. ++define void @f12(<8 x i16> %val, i16 *%ptr, i32 %index) { ++; CHECK-LABEL: f12: ++; CHECK-NOT: vsteh ++; CHECK: br %r14 ++ %element = extractelement <8 x i16> %val, i32 %index ++ store i16 %element, i16 *%ptr ++ ret void ++} ++ ++; Test v4i32 extraction from the first element. ++define void @f13(<4 x i32> %val, i32 *%ptr) { ++; CHECK-LABEL: f13: ++; CHECK: vstef %v24, 0(%r2), 0 ++; CHECK: br %r14 ++ %element = extractelement <4 x i32> %val, i32 0 ++ store i32 %element, i32 *%ptr ++ ret void ++} ++ ++; Test v4i32 extraction from the last element. ++define void @f14(<4 x i32> %val, i32 *%ptr) { ++; CHECK-LABEL: f14: ++; CHECK: vstef %v24, 0(%r2), 3 ++; CHECK: br %r14 ++ %element = extractelement <4 x i32> %val, i32 3 ++ store i32 %element, i32 *%ptr ++ ret void ++} ++ ++; Test v4i32 extraction of an invalid element. This must compile, ++; but we don't care what it does. ++define void @f15(<4 x i32> %val, i32 *%ptr) { ++; CHECK-LABEL: f15: ++; CHECK-NOT: vstef %v24, 0(%r2), 4 ++; CHECK: br %r14 ++ %element = extractelement <4 x i32> %val, i32 4 ++ store i32 %element, i32 *%ptr ++ ret void ++} ++ ++; Test v4i32 extraction with the highest in-range offset. ++define void @f16(<4 x i32> %val, i32 *%base) { ++; CHECK-LABEL: f16: ++; CHECK: vstef %v24, 4092(%r2), 2 ++; CHECK: br %r14 ++ %ptr = getelementptr i32 *%base, i32 1023 ++ %element = extractelement <4 x i32> %val, i32 2 ++ store i32 %element, i32 *%ptr ++ ret void ++} ++ ++; Test v4i32 extraction with the first ouf-of-range offset. ++define void @f17(<4 x i32> %val, i32 *%base) { ++; CHECK-LABEL: f17: ++; CHECK: aghi %r2, 4096 ++; CHECK: vstef %v24, 0(%r2), 1 ++; CHECK: br %r14 ++ %ptr = getelementptr i32 *%base, i32 1024 ++ %element = extractelement <4 x i32> %val, i32 1 ++ store i32 %element, i32 *%ptr ++ ret void ++} ++ ++; Test v4i32 extraction from a variable element. ++define void @f18(<4 x i32> %val, i32 *%ptr, i32 %index) { ++; CHECK-LABEL: f18: ++; CHECK-NOT: vstef ++; CHECK: br %r14 ++ %element = extractelement <4 x i32> %val, i32 %index ++ store i32 %element, i32 *%ptr ++ ret void ++} ++ ++; Test v2i64 extraction from the first element. ++define void @f19(<2 x i64> %val, i64 *%ptr) { ++; CHECK-LABEL: f19: ++; CHECK: vsteg %v24, 0(%r2), 0 ++; CHECK: br %r14 ++ %element = extractelement <2 x i64> %val, i32 0 ++ store i64 %element, i64 *%ptr ++ ret void ++} ++ ++; Test v2i64 extraction from the last element. ++define void @f20(<2 x i64> %val, i64 *%ptr) { ++; CHECK-LABEL: f20: ++; CHECK: vsteg %v24, 0(%r2), 1 ++; CHECK: br %r14 ++ %element = extractelement <2 x i64> %val, i32 1 ++ store i64 %element, i64 *%ptr ++ ret void ++} ++ ++; Test v2i64 extraction of an invalid element. This must compile, ++; but we don't care what it does. ++define void @f21(<2 x i64> %val, i64 *%ptr) { ++; CHECK-LABEL: f21: ++; CHECK-NOT: vsteg %v24, 0(%r2), 2 ++; CHECK: br %r14 ++ %element = extractelement <2 x i64> %val, i32 2 ++ store i64 %element, i64 *%ptr ++ ret void ++} ++ ++; Test v2i64 extraction with the highest in-range offset. ++define void @f22(<2 x i64> %val, i64 *%base) { ++; CHECK-LABEL: f22: ++; CHECK: vsteg %v24, 4088(%r2), 1 ++; CHECK: br %r14 ++ %ptr = getelementptr i64 *%base, i32 511 ++ %element = extractelement <2 x i64> %val, i32 1 ++ store i64 %element, i64 *%ptr ++ ret void ++} ++ ++; Test v2i64 extraction with the first ouf-of-range offset. ++define void @f23(<2 x i64> %val, i64 *%base) { ++; CHECK-LABEL: f23: ++; CHECK: aghi %r2, 4096 ++; CHECK: vsteg %v24, 0(%r2), 0 ++; CHECK: br %r14 ++ %ptr = getelementptr i64 *%base, i32 512 ++ %element = extractelement <2 x i64> %val, i32 0 ++ store i64 %element, i64 *%ptr ++ ret void ++} ++ ++; Test v2i64 extraction from a variable element. ++define void @f24(<2 x i64> %val, i64 *%ptr, i32 %index) { ++; CHECK-LABEL: f24: ++; CHECK-NOT: vsteg ++; CHECK: br %r14 ++ %element = extractelement <2 x i64> %val, i32 %index ++ store i64 %element, i64 *%ptr ++ ret void ++} ++ ++; Test v4f32 extraction from the first element. ++define void @f25(<4 x float> %val, float *%ptr) { ++; CHECK-LABEL: f25: ++; CHECK: vstef %v24, 0(%r2), 0 ++; CHECK: br %r14 ++ %element = extractelement <4 x float> %val, i32 0 ++ store float %element, float *%ptr ++ ret void ++} ++ ++; Test v4f32 extraction from the last element. ++define void @f26(<4 x float> %val, float *%ptr) { ++; CHECK-LABEL: f26: ++; CHECK: vstef %v24, 0(%r2), 3 ++; CHECK: br %r14 ++ %element = extractelement <4 x float> %val, i32 3 ++ store float %element, float *%ptr ++ ret void ++} ++ ++; Test v4f32 extraction of an invalid element. This must compile, ++; but we don't care what it does. ++define void @f27(<4 x float> %val, float *%ptr) { ++; CHECK-LABEL: f27: ++; CHECK-NOT: vstef %v24, 0(%r2), 4 ++; CHECK: br %r14 ++ %element = extractelement <4 x float> %val, i32 4 ++ store float %element, float *%ptr ++ ret void ++} ++ ++; Test v4f32 extraction with the highest in-range offset. ++define void @f28(<4 x float> %val, float *%base) { ++; CHECK-LABEL: f28: ++; CHECK: vstef %v24, 4092(%r2), 2 ++; CHECK: br %r14 ++ %ptr = getelementptr float *%base, i32 1023 ++ %element = extractelement <4 x float> %val, i32 2 ++ store float %element, float *%ptr ++ ret void ++} ++ ++; Test v4f32 extraction with the first ouf-of-range offset. ++define void @f29(<4 x float> %val, float *%base) { ++; CHECK-LABEL: f29: ++; CHECK: aghi %r2, 4096 ++; CHECK: vstef %v24, 0(%r2), 1 ++; CHECK: br %r14 ++ %ptr = getelementptr float *%base, i32 1024 ++ %element = extractelement <4 x float> %val, i32 1 ++ store float %element, float *%ptr ++ ret void ++} ++ ++; Test v4f32 extraction from a variable element. ++define void @f30(<4 x float> %val, float *%ptr, i32 %index) { ++; CHECK-LABEL: f30: ++; CHECK-NOT: vstef ++; CHECK: br %r14 ++ %element = extractelement <4 x float> %val, i32 %index ++ store float %element, float *%ptr ++ ret void ++} ++ ++; Test v2f64 extraction from the first element. ++define void @f32(<2 x double> %val, double *%ptr) { ++; CHECK-LABEL: f32: ++; CHECK: vsteg %v24, 0(%r2), 0 ++; CHECK: br %r14 ++ %element = extractelement <2 x double> %val, i32 0 ++ store double %element, double *%ptr ++ ret void ++} ++ ++; Test v2f64 extraction from the last element. ++define void @f33(<2 x double> %val, double *%ptr) { ++; CHECK-LABEL: f33: ++; CHECK: vsteg %v24, 0(%r2), 1 ++; CHECK: br %r14 ++ %element = extractelement <2 x double> %val, i32 1 ++ store double %element, double *%ptr ++ ret void ++} ++ ++; Test v2f64 extraction with the highest in-range offset. ++define void @f34(<2 x double> %val, double *%base) { ++; CHECK-LABEL: f34: ++; CHECK: vsteg %v24, 4088(%r2), 1 ++; CHECK: br %r14 ++ %ptr = getelementptr double *%base, i32 511 ++ %element = extractelement <2 x double> %val, i32 1 ++ store double %element, double *%ptr ++ ret void ++} ++ ++; Test v2f64 extraction with the first ouf-of-range offset. ++define void @f35(<2 x double> %val, double *%base) { ++; CHECK-LABEL: f35: ++; CHECK: aghi %r2, 4096 ++; CHECK: vsteg %v24, 0(%r2), 0 ++; CHECK: br %r14 ++ %ptr = getelementptr double *%base, i32 512 ++ %element = extractelement <2 x double> %val, i32 0 ++ store double %element, double *%ptr ++ ret void ++} ++ ++; Test v2f64 extraction from a variable element. ++define void @f36(<2 x double> %val, double *%ptr, i32 %index) { ++; CHECK-LABEL: f36: ++; CHECK-NOT: vsteg ++; CHECK: br %r14 ++ %element = extractelement <2 x double> %val, i32 %index ++ store double %element, double *%ptr ++ ret void ++} ++ ++; Test a v4i32 scatter of the first element. ++define void @f37(<4 x i32> %val, <4 x i32> %index, i64 %base) { ++; CHECK-LABEL: f37: ++; CHECK: vscef %v24, 0(%v26,%r2), 0 ++; CHECK: br %r14 ++ %elem = extractelement <4 x i32> %index, i32 0 ++ %ext = zext i32 %elem to i64 ++ %add = add i64 %base, %ext ++ %ptr = inttoptr i64 %add to i32 * ++ %element = extractelement <4 x i32> %val, i32 0 ++ store i32 %element, i32 *%ptr ++ ret void ++} ++ ++; Test a v4i32 scatter of the last element. ++define void @f38(<4 x i32> %val, <4 x i32> %index, i64 %base) { ++; CHECK-LABEL: f38: ++; CHECK: vscef %v24, 0(%v26,%r2), 3 ++; CHECK: br %r14 ++ %elem = extractelement <4 x i32> %index, i32 3 ++ %ext = zext i32 %elem to i64 ++ %add = add i64 %base, %ext ++ %ptr = inttoptr i64 %add to i32 * ++ %element = extractelement <4 x i32> %val, i32 3 ++ store i32 %element, i32 *%ptr ++ ret void ++} ++ ++; Test a v4i32 scatter with the highest in-range offset. ++define void @f39(<4 x i32> %val, <4 x i32> %index, i64 %base) { ++; CHECK-LABEL: f39: ++; CHECK: vscef %v24, 4095(%v26,%r2), 1 ++; CHECK: br %r14 ++ %elem = extractelement <4 x i32> %index, i32 1 ++ %ext = zext i32 %elem to i64 ++ %add1 = add i64 %base, %ext ++ %add2 = add i64 %add1, 4095 ++ %ptr = inttoptr i64 %add2 to i32 * ++ %element = extractelement <4 x i32> %val, i32 1 ++ store i32 %element, i32 *%ptr ++ ret void ++} ++ ++; Test a v2i64 scatter of the first element. ++define void @f40(<2 x i64> %val, <2 x i64> %index, i64 %base) { ++; CHECK-LABEL: f40: ++; CHECK: vsceg %v24, 0(%v26,%r2), 0 ++; CHECK: br %r14 ++ %elem = extractelement <2 x i64> %index, i32 0 ++ %add = add i64 %base, %elem ++ %ptr = inttoptr i64 %add to i64 * ++ %element = extractelement <2 x i64> %val, i32 0 ++ store i64 %element, i64 *%ptr ++ ret void ++} ++ ++; Test a v2i64 scatter of the last element. ++define void @f41(<2 x i64> %val, <2 x i64> %index, i64 %base) { ++; CHECK-LABEL: f41: ++; CHECK: vsceg %v24, 0(%v26,%r2), 1 ++; CHECK: br %r14 ++ %elem = extractelement <2 x i64> %index, i32 1 ++ %add = add i64 %base, %elem ++ %ptr = inttoptr i64 %add to i64 * ++ %element = extractelement <2 x i64> %val, i32 1 ++ store i64 %element, i64 *%ptr ++ ret void ++} ++ ++; Test a v4f32 scatter of the first element. ++define void @f42(<4 x float> %val, <4 x i32> %index, i64 %base) { ++; CHECK-LABEL: f42: ++; CHECK: vscef %v24, 0(%v26,%r2), 0 ++; CHECK: br %r14 ++ %elem = extractelement <4 x i32> %index, i32 0 ++ %ext = zext i32 %elem to i64 ++ %add = add i64 %base, %ext ++ %ptr = inttoptr i64 %add to float * ++ %element = extractelement <4 x float> %val, i32 0 ++ store float %element, float *%ptr ++ ret void ++} ++ ++; Test a v4f32 scatter of the last element. ++define void @f43(<4 x float> %val, <4 x i32> %index, i64 %base) { ++; CHECK-LABEL: f43: ++; CHECK: vscef %v24, 0(%v26,%r2), 3 ++; CHECK: br %r14 ++ %elem = extractelement <4 x i32> %index, i32 3 ++ %ext = zext i32 %elem to i64 ++ %add = add i64 %base, %ext ++ %ptr = inttoptr i64 %add to float * ++ %element = extractelement <4 x float> %val, i32 3 ++ store float %element, float *%ptr ++ ret void ++} ++ ++; Test a v2f64 scatter of the first element. ++define void @f44(<2 x double> %val, <2 x i64> %index, i64 %base) { ++; CHECK-LABEL: f44: ++; CHECK: vsceg %v24, 0(%v26,%r2), 0 ++; CHECK: br %r14 ++ %elem = extractelement <2 x i64> %index, i32 0 ++ %add = add i64 %base, %elem ++ %ptr = inttoptr i64 %add to double * ++ %element = extractelement <2 x double> %val, i32 0 ++ store double %element, double *%ptr ++ ret void ++} ++ ++; Test a v2f64 scatter of the last element. ++define void @f45(<2 x double> %val, <2 x i64> %index, i64 %base) { ++; CHECK-LABEL: f45: ++; CHECK: vsceg %v24, 0(%v26,%r2), 1 ++; CHECK: br %r14 ++ %elem = extractelement <2 x i64> %index, i32 1 ++ %add = add i64 %base, %elem ++ %ptr = inttoptr i64 %add to double * ++ %element = extractelement <2 x double> %val, i32 1 ++ store double %element, double *%ptr ++ ret void ++} +Index: llvm-36/test/CodeGen/SystemZ/vec-move-11.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-move-11.ll +@@ -0,0 +1,111 @@ ++; Test insertions of register values into a nonzero index of an undef. ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ++ ++; Test v16i8 insertion into an undef, with an arbitrary index. ++define <16 x i8> @f1(i8 %val) { ++; CHECK-LABEL: f1: ++; CHECK: vlvgb %v24, %r2, 12 ++; CHECK-NEXT: br %r14 ++ %ret = insertelement <16 x i8> undef, i8 %val, i32 12 ++ ret <16 x i8> %ret ++} ++ ++; Test v16i8 insertion into an undef, with the first good index for VLVGP. ++define <16 x i8> @f2(i8 %val) { ++; CHECK-LABEL: f2: ++; CHECK: vlvgp %v24, %r2, %r2 ++; CHECK-NEXT: br %r14 ++ %ret = insertelement <16 x i8> undef, i8 %val, i32 7 ++ ret <16 x i8> %ret ++} ++ ++; Test v16i8 insertion into an undef, with the second good index for VLVGP. ++define <16 x i8> @f3(i8 %val) { ++; CHECK-LABEL: f3: ++; CHECK: vlvgp %v24, %r2, %r2 ++; CHECK-NEXT: br %r14 ++ %ret = insertelement <16 x i8> undef, i8 %val, i32 15 ++ ret <16 x i8> %ret ++} ++ ++; Test v8i16 insertion into an undef, with an arbitrary index. ++define <8 x i16> @f4(i16 %val) { ++; CHECK-LABEL: f4: ++; CHECK: vlvgh %v24, %r2, 5 ++; CHECK-NEXT: br %r14 ++ %ret = insertelement <8 x i16> undef, i16 %val, i32 5 ++ ret <8 x i16> %ret ++} ++ ++; Test v8i16 insertion into an undef, with the first good index for VLVGP. ++define <8 x i16> @f5(i16 %val) { ++; CHECK-LABEL: f5: ++; CHECK: vlvgp %v24, %r2, %r2 ++; CHECK-NEXT: br %r14 ++ %ret = insertelement <8 x i16> undef, i16 %val, i32 3 ++ ret <8 x i16> %ret ++} ++ ++; Test v8i16 insertion into an undef, with the second good index for VLVGP. ++define <8 x i16> @f6(i16 %val) { ++; CHECK-LABEL: f6: ++; CHECK: vlvgp %v24, %r2, %r2 ++; CHECK-NEXT: br %r14 ++ %ret = insertelement <8 x i16> undef, i16 %val, i32 7 ++ ret <8 x i16> %ret ++} ++ ++; Test v4i32 insertion into an undef, with an arbitrary index. ++define <4 x i32> @f7(i32 %val) { ++; CHECK-LABEL: f7: ++; CHECK: vlvgf %v24, %r2, 2 ++; CHECK-NEXT: br %r14 ++ %ret = insertelement <4 x i32> undef, i32 %val, i32 2 ++ ret <4 x i32> %ret ++} ++ ++; Test v4i32 insertion into an undef, with the first good index for VLVGP. ++define <4 x i32> @f8(i32 %val) { ++; CHECK-LABEL: f8: ++; CHECK: vlvgp %v24, %r2, %r2 ++; CHECK-NEXT: br %r14 ++ %ret = insertelement <4 x i32> undef, i32 %val, i32 1 ++ ret <4 x i32> %ret ++} ++ ++; Test v4i32 insertion into an undef, with the second good index for VLVGP. ++define <4 x i32> @f9(i32 %val) { ++; CHECK-LABEL: f9: ++; CHECK: vlvgp %v24, %r2, %r2 ++; CHECK-NEXT: br %r14 ++ %ret = insertelement <4 x i32> undef, i32 %val, i32 3 ++ ret <4 x i32> %ret ++} ++ ++; Test v2i64 insertion into an undef. ++define <2 x i64> @f10(i64 %val) { ++; CHECK-LABEL: f10: ++; CHECK: vlvgp %v24, %r2, %r2 ++; CHECK-NEXT: br %r14 ++ %ret = insertelement <2 x i64> undef, i64 %val, i32 1 ++ ret <2 x i64> %ret ++} ++ ++; Test v4f32 insertion into an undef. ++define <4 x float> @f11(float %val) { ++; CHECK-LABEL: f11: ++; CHECK: vrepf %v24, %v0, 0 ++; CHECK: br %r14 ++ %ret = insertelement <4 x float> undef, float %val, i32 2 ++ ret <4 x float> %ret ++} ++ ++; Test v2f64 insertion into an undef. ++define <2 x double> @f12(double %val) { ++; CHECK-LABEL: f12: ++; CHECK: vrepg %v24, %v0, 0 ++; CHECK: br %r14 ++ %ret = insertelement <2 x double> undef, double %val, i32 1 ++ ret <2 x double> %ret ++} +Index: llvm-36/test/CodeGen/SystemZ/vec-move-12.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-move-12.ll +@@ -0,0 +1,123 @@ ++; Test insertions of memory values into a nonzero index of an undef. ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ++ ++; Test v16i8 insertion into an undef, with an arbitrary index. ++define <16 x i8> @f1(i8 *%ptr) { ++; CHECK-LABEL: f1: ++; CHECK: vlrepb %v24, 0(%r2) ++; CHECK-NEXT: br %r14 ++ %val = load i8 *%ptr ++ %ret = insertelement <16 x i8> undef, i8 %val, i32 12 ++ ret <16 x i8> %ret ++} ++ ++; Test v16i8 insertion into an undef, with the first good index for VLVGP. ++define <16 x i8> @f2(i8 *%ptr) { ++; CHECK-LABEL: f2: ++; CHECK: {{vlrepb|vllezb}} %v24, 0(%r2) ++; CHECK-NEXT: br %r14 ++ %val = load i8 *%ptr ++ %ret = insertelement <16 x i8> undef, i8 %val, i32 7 ++ ret <16 x i8> %ret ++} ++ ++; Test v16i8 insertion into an undef, with the second good index for VLVGP. ++define <16 x i8> @f3(i8 *%ptr) { ++; CHECK-LABEL: f3: ++; CHECK: vlrepb %v24, 0(%r2) ++; CHECK-NEXT: br %r14 ++ %val = load i8 *%ptr ++ %ret = insertelement <16 x i8> undef, i8 %val, i32 15 ++ ret <16 x i8> %ret ++} ++ ++; Test v8i16 insertion into an undef, with an arbitrary index. ++define <8 x i16> @f4(i16 *%ptr) { ++; CHECK-LABEL: f4: ++; CHECK: vlreph %v24, 0(%r2) ++; CHECK-NEXT: br %r14 ++ %val = load i16 *%ptr ++ %ret = insertelement <8 x i16> undef, i16 %val, i32 5 ++ ret <8 x i16> %ret ++} ++ ++; Test v8i16 insertion into an undef, with the first good index for VLVGP. ++define <8 x i16> @f5(i16 *%ptr) { ++; CHECK-LABEL: f5: ++; CHECK: {{vlreph|vllezh}} %v24, 0(%r2) ++; CHECK-NEXT: br %r14 ++ %val = load i16 *%ptr ++ %ret = insertelement <8 x i16> undef, i16 %val, i32 3 ++ ret <8 x i16> %ret ++} ++ ++; Test v8i16 insertion into an undef, with the second good index for VLVGP. ++define <8 x i16> @f6(i16 *%ptr) { ++; CHECK-LABEL: f6: ++; CHECK: vlreph %v24, 0(%r2) ++; CHECK-NEXT: br %r14 ++ %val = load i16 *%ptr ++ %ret = insertelement <8 x i16> undef, i16 %val, i32 7 ++ ret <8 x i16> %ret ++} ++ ++; Test v4i32 insertion into an undef, with an arbitrary index. ++define <4 x i32> @f7(i32 *%ptr) { ++; CHECK-LABEL: f7: ++; CHECK: vlrepf %v24, 0(%r2) ++; CHECK-NEXT: br %r14 ++ %val = load i32 *%ptr ++ %ret = insertelement <4 x i32> undef, i32 %val, i32 2 ++ ret <4 x i32> %ret ++} ++ ++; Test v4i32 insertion into an undef, with the first good index for VLVGP. ++define <4 x i32> @f8(i32 *%ptr) { ++; CHECK-LABEL: f8: ++; CHECK: {{vlrepf|vllezf}} %v24, 0(%r2) ++; CHECK-NEXT: br %r14 ++ %val = load i32 *%ptr ++ %ret = insertelement <4 x i32> undef, i32 %val, i32 1 ++ ret <4 x i32> %ret ++} ++ ++; Test v4i32 insertion into an undef, with the second good index for VLVGP. ++define <4 x i32> @f9(i32 *%ptr) { ++; CHECK-LABEL: f9: ++; CHECK: vlrepf %v24, 0(%r2) ++; CHECK-NEXT: br %r14 ++ %val = load i32 *%ptr ++ %ret = insertelement <4 x i32> undef, i32 %val, i32 3 ++ ret <4 x i32> %ret ++} ++ ++; Test v2i64 insertion into an undef. ++define <2 x i64> @f10(i64 *%ptr) { ++; CHECK-LABEL: f10: ++; CHECK: vlrepg %v24, 0(%r2) ++; CHECK-NEXT: br %r14 ++ %val = load i64 *%ptr ++ %ret = insertelement <2 x i64> undef, i64 %val, i32 1 ++ ret <2 x i64> %ret ++} ++ ++; Test v4f32 insertion into an undef. ++define <4 x float> @f11(float *%ptr) { ++; CHECK-LABEL: f11: ++; CHECK: vlrepf %v24, 0(%r2) ++; CHECK: br %r14 ++ %val = load float *%ptr ++ %ret = insertelement <4 x float> undef, float %val, i32 2 ++ ret <4 x float> %ret ++} ++ ++; Test v2f64 insertion into an undef. ++define <2 x double> @f12(double *%ptr) { ++; CHECK-LABEL: f12: ++; CHECK: vlrepg %v24, 0(%r2) ++; CHECK: br %r14 ++ %val = load double *%ptr ++ %ret = insertelement <2 x double> undef, double %val, i32 1 ++ ret <2 x double> %ret ++} +Index: llvm-36/test/CodeGen/SystemZ/vec-move-13.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-move-13.ll +@@ -0,0 +1,69 @@ ++; Test insertions of register values into 0. ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ++ ++; Test v16i8 insertion into 0. ++define <16 x i8> @f1(i8 %val1, i8 %val2) { ++; CHECK-LABEL: f1: ++; CHECK: vgbm %v24, 0 ++; CHECK-DAG: vlvgb %v24, %r2, 2 ++; CHECK-DAG: vlvgb %v24, %r3, 12 ++; CHECK: br %r14 ++ %vec1 = insertelement <16 x i8> zeroinitializer, i8 %val1, i32 2 ++ %vec2 = insertelement <16 x i8> %vec1, i8 %val2, i32 12 ++ ret <16 x i8> %vec2 ++} ++ ++; Test v8i16 insertion into 0. ++define <8 x i16> @f2(i16 %val1, i16 %val2) { ++; CHECK-LABEL: f2: ++; CHECK: vgbm %v24, 0 ++; CHECK-DAG: vlvgh %v24, %r2, 3 ++; CHECK-DAG: vlvgh %v24, %r3, 5 ++; CHECK: br %r14 ++ %vec1 = insertelement <8 x i16> zeroinitializer, i16 %val1, i32 3 ++ %vec2 = insertelement <8 x i16> %vec1, i16 %val2, i32 5 ++ ret <8 x i16> %vec2 ++} ++ ++; Test v4i32 insertion into 0. ++define <4 x i32> @f3(i32 %val) { ++; CHECK-LABEL: f3: ++; CHECK: vgbm %v24, 0 ++; CHECK: vlvgf %v24, %r2, 3 ++; CHECK: br %r14 ++ %ret = insertelement <4 x i32> zeroinitializer, i32 %val, i32 3 ++ ret <4 x i32> %ret ++} ++ ++; Test v2i64 insertion into 0. ++define <2 x i64> @f4(i64 %val) { ++; CHECK-LABEL: f4: ++; CHECK: lghi [[REG:%r[0-5]]], 0 ++; CHECK: vlvgp %v24, [[REG]], %r2 ++; CHECK: br %r14 ++ %ret = insertelement <2 x i64> zeroinitializer, i64 %val, i32 1 ++ ret <2 x i64> %ret ++} ++ ++; Test v4f32 insertion into 0. ++define <4 x float> @f5(float %val) { ++; CHECK-LABEL: f5: ++; CHECK-DAG: vuplhf [[REG:%v[0-9]+]], %v0 ++; CHECK-DAG: vgbm [[ZERO:%v[0-9]+]], 0 ++; CHECK: vmrhg %v24, [[ZERO]], [[REG]] ++; CHECK: br %r14 ++ %ret = insertelement <4 x float> zeroinitializer, float %val, i32 3 ++ ret <4 x float> %ret ++} ++ ++; Test v2f64 insertion into 0. ++define <2 x double> @f6(double %val) { ++; CHECK-LABEL: f6: ++; CHECK: vgbm [[REG:%v[0-9]+]], 0 ++; CHECK: vmrhg %v24, [[REG]], %v0 ++; CHECK: br %r14 ++ %ret = insertelement <2 x double> zeroinitializer, double %val, i32 1 ++ ret <2 x double> %ret ++} ++ +Index: llvm-36/test/CodeGen/SystemZ/vec-move-14.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-move-14.ll +@@ -0,0 +1,96 @@ ++; Test insertions of memory values into 0. ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ++ ++; Test VLLEZB. ++define <16 x i8> @f1(i8 *%ptr) { ++; CHECK-LABEL: f1: ++; CHECK: vllezb %v24, 0(%r2) ++; CHECK: br %r14 ++ %val = load i8 *%ptr ++ %ret = insertelement <16 x i8> zeroinitializer, i8 %val, i32 7 ++ ret <16 x i8> %ret ++} ++ ++; Test VLLEZB with the highest in-range offset. ++define <16 x i8> @f2(i8 *%base) { ++; CHECK-LABEL: f2: ++; CHECK: vllezb %v24, 4095(%r2) ++; CHECK: br %r14 ++ %ptr = getelementptr i8 *%base, i64 4095 ++ %val = load i8 *%ptr ++ %ret = insertelement <16 x i8> zeroinitializer, i8 %val, i32 7 ++ ret <16 x i8> %ret ++} ++ ++; Test VLLEZB with the next highest offset. ++define <16 x i8> @f3(i8 *%base) { ++; CHECK-LABEL: f3: ++; CHECK-NOT: vllezb %v24, 4096(%r2) ++; CHECK: br %r14 ++ %ptr = getelementptr i8 *%base, i64 4096 ++ %val = load i8 *%ptr ++ %ret = insertelement <16 x i8> zeroinitializer, i8 %val, i32 7 ++ ret <16 x i8> %ret ++} ++ ++; Test that VLLEZB allows an index. ++define <16 x i8> @f4(i8 *%base, i64 %index) { ++; CHECK-LABEL: f4: ++; CHECK: vllezb %v24, 0({{%r2,%r3|%r3,%r2}}) ++; CHECK: br %r14 ++ %ptr = getelementptr i8 *%base, i64 %index ++ %val = load i8 *%ptr ++ %ret = insertelement <16 x i8> zeroinitializer, i8 %val, i32 7 ++ ret <16 x i8> %ret ++} ++ ++; Test VLLEZH. ++define <8 x i16> @f5(i16 *%ptr) { ++; CHECK-LABEL: f5: ++; CHECK: vllezh %v24, 0(%r2) ++; CHECK: br %r14 ++ %val = load i16 *%ptr ++ %ret = insertelement <8 x i16> zeroinitializer, i16 %val, i32 3 ++ ret <8 x i16> %ret ++} ++ ++; Test VLLEZF. ++define <4 x i32> @f6(i32 *%ptr) { ++; CHECK-LABEL: f6: ++; CHECK: vllezf %v24, 0(%r2) ++; CHECK: br %r14 ++ %val = load i32 *%ptr ++ %ret = insertelement <4 x i32> zeroinitializer, i32 %val, i32 1 ++ ret <4 x i32> %ret ++} ++ ++; Test VLLEZG. ++define <2 x i64> @f7(i64 *%ptr) { ++; CHECK-LABEL: f7: ++; CHECK: vllezg %v24, 0(%r2) ++; CHECK: br %r14 ++ %val = load i64 *%ptr ++ %ret = insertelement <2 x i64> zeroinitializer, i64 %val, i32 0 ++ ret <2 x i64> %ret ++} ++ ++; Test VLLEZF with a float. ++define <4 x float> @f8(float *%ptr) { ++; CHECK-LABEL: f8: ++; CHECK: vllezf %v24, 0(%r2) ++; CHECK: br %r14 ++ %val = load float *%ptr ++ %ret = insertelement <4 x float> zeroinitializer, float %val, i32 1 ++ ret <4 x float> %ret ++} ++ ++; Test VLLEZG with a double. ++define <2 x double> @f9(double *%ptr) { ++; CHECK-LABEL: f9: ++; CHECK: vllezg %v24, 0(%r2) ++; CHECK: br %r14 ++ %val = load double *%ptr ++ %ret = insertelement <2 x double> zeroinitializer, double %val, i32 0 ++ ret <2 x double> %ret ++} +Index: llvm-36/test/CodeGen/SystemZ/vec-move-15.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-move-15.ll +@@ -0,0 +1,105 @@ ++; Test vector sign-extending loads. ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ++ ++; Test a v16i1->v16i8 extension. ++define <16 x i8> @f1(<16 x i1> *%ptr) { ++; No expected output, but must compile. ++ %val = load <16 x i1> *%ptr ++ %ret = sext <16 x i1> %val to <16 x i8> ++ ret <16 x i8> %ret ++} ++ ++; Test a v8i1->v8i16 extension. ++define <8 x i16> @f2(<8 x i1> *%ptr) { ++; No expected output, but must compile. ++ %val = load <8 x i1> *%ptr ++ %ret = sext <8 x i1> %val to <8 x i16> ++ ret <8 x i16> %ret ++} ++ ++; Test a v8i8->v8i16 extension. ++define <8 x i16> @f3(<8 x i8> *%ptr) { ++; CHECK-LABEL: f3: ++; CHECK: vlrepg [[REG1:%v[0-9]+]], 0(%r2) ++; CHECK: vuphb %v24, [[REG1]] ++; CHECK: br %r14 ++ %val = load <8 x i8> *%ptr ++ %ret = sext <8 x i8> %val to <8 x i16> ++ ret <8 x i16> %ret ++} ++ ++; Test a v4i1->v4i32 extension. ++define <4 x i32> @f4(<4 x i1> *%ptr) { ++; No expected output, but must compile. ++ %val = load <4 x i1> *%ptr ++ %ret = sext <4 x i1> %val to <4 x i32> ++ ret <4 x i32> %ret ++} ++ ++; Test a v4i8->v4i32 extension. ++define <4 x i32> @f5(<4 x i8> *%ptr) { ++; CHECK-LABEL: f5: ++; CHECK: vlrepf [[REG1:%v[0-9]+]], 0(%r2) ++; CHECK: vuphb [[REG2:%v[0-9]+]], [[REG1]] ++; CHECK: vuphh %v24, [[REG2]] ++; CHECK: br %r14 ++ %val = load <4 x i8> *%ptr ++ %ret = sext <4 x i8> %val to <4 x i32> ++ ret <4 x i32> %ret ++} ++ ++; Test a v4i16->v4i32 extension. ++define <4 x i32> @f6(<4 x i16> *%ptr) { ++; CHECK-LABEL: f6: ++; CHECK: vlrepg [[REG1:%v[0-9]+]], 0(%r2) ++; CHECK: vuphh %v24, [[REG1]] ++; CHECK: br %r14 ++ %val = load <4 x i16> *%ptr ++ %ret = sext <4 x i16> %val to <4 x i32> ++ ret <4 x i32> %ret ++} ++ ++; Test a v2i1->v2i64 extension. ++define <2 x i64> @f7(<2 x i1> *%ptr) { ++; No expected output, but must compile. ++ %val = load <2 x i1> *%ptr ++ %ret = sext <2 x i1> %val to <2 x i64> ++ ret <2 x i64> %ret ++} ++ ++; Test a v2i8->v2i64 extension. ++define <2 x i64> @f8(<2 x i8> *%ptr) { ++; CHECK-LABEL: f8: ++; CHECK: vlreph [[REG1:%v[0-9]+]], 0(%r2) ++; CHECK: vuphb [[REG2:%v[0-9]+]], [[REG1]] ++; CHECK: vuphh [[REG3:%v[0-9]+]], [[REG2]] ++; CHECK: vuphf %v24, [[REG3]] ++; CHECK: br %r14 ++ %val = load <2 x i8> *%ptr ++ %ret = sext <2 x i8> %val to <2 x i64> ++ ret <2 x i64> %ret ++} ++ ++; Test a v2i16->v2i64 extension. ++define <2 x i64> @f9(<2 x i16> *%ptr) { ++; CHECK-LABEL: f9: ++; CHECK: vlrepf [[REG1:%v[0-9]+]], 0(%r2) ++; CHECK: vuphh [[REG2:%v[0-9]+]], [[REG1]] ++; CHECK: vuphf %v24, [[REG2]] ++; CHECK: br %r14 ++ %val = load <2 x i16> *%ptr ++ %ret = sext <2 x i16> %val to <2 x i64> ++ ret <2 x i64> %ret ++} ++ ++; Test a v2i32->v2i64 extension. ++define <2 x i64> @f10(<2 x i32> *%ptr) { ++; CHECK-LABEL: f10: ++; CHECK: vlrepg [[REG1:%v[0-9]+]], 0(%r2) ++; CHECK: vuphf %v24, [[REG1]] ++; CHECK: br %r14 ++ %val = load <2 x i32> *%ptr ++ %ret = sext <2 x i32> %val to <2 x i64> ++ ret <2 x i64> %ret ++} +Index: llvm-36/test/CodeGen/SystemZ/vec-move-16.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-move-16.ll +@@ -0,0 +1,105 @@ ++; Test vector zero-extending loads. ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ++ ++; Test a v16i1->v16i8 extension. ++define <16 x i8> @f1(<16 x i1> *%ptr) { ++; No expected output, but must compile. ++ %val = load <16 x i1> *%ptr ++ %ret = zext <16 x i1> %val to <16 x i8> ++ ret <16 x i8> %ret ++} ++ ++; Test a v8i1->v8i16 extension. ++define <8 x i16> @f2(<8 x i1> *%ptr) { ++; No expected output, but must compile. ++ %val = load <8 x i1> *%ptr ++ %ret = zext <8 x i1> %val to <8 x i16> ++ ret <8 x i16> %ret ++} ++ ++; Test a v8i8->v8i16 extension. ++define <8 x i16> @f3(<8 x i8> *%ptr) { ++; CHECK-LABEL: f3: ++; CHECK: vlrepg [[REG1:%v[0-9]+]], 0(%r2) ++; CHECK: vuplhb %v24, [[REG1]] ++; CHECK: br %r14 ++ %val = load <8 x i8> *%ptr ++ %ret = zext <8 x i8> %val to <8 x i16> ++ ret <8 x i16> %ret ++} ++ ++; Test a v4i1->v4i32 extension. ++define <4 x i32> @f4(<4 x i1> *%ptr) { ++; No expected output, but must compile. ++ %val = load <4 x i1> *%ptr ++ %ret = zext <4 x i1> %val to <4 x i32> ++ ret <4 x i32> %ret ++} ++ ++; Test a v4i8->v4i32 extension. ++define <4 x i32> @f5(<4 x i8> *%ptr) { ++; CHECK-LABEL: f5: ++; CHECK: vlrepf [[REG1:%v[0-9]+]], 0(%r2) ++; CHECK: vuplhb [[REG2:%v[0-9]+]], [[REG1]] ++; CHECK: vuplhh %v24, [[REG2]] ++; CHECK: br %r14 ++ %val = load <4 x i8> *%ptr ++ %ret = zext <4 x i8> %val to <4 x i32> ++ ret <4 x i32> %ret ++} ++ ++; Test a v4i16->v4i32 extension. ++define <4 x i32> @f6(<4 x i16> *%ptr) { ++; CHECK-LABEL: f6: ++; CHECK: vlrepg [[REG1:%v[0-9]+]], 0(%r2) ++; CHECK: vuplhh %v24, [[REG1]] ++; CHECK: br %r14 ++ %val = load <4 x i16> *%ptr ++ %ret = zext <4 x i16> %val to <4 x i32> ++ ret <4 x i32> %ret ++} ++ ++; Test a v2i1->v2i64 extension. ++define <2 x i64> @f7(<2 x i1> *%ptr) { ++; No expected output, but must compile. ++ %val = load <2 x i1> *%ptr ++ %ret = zext <2 x i1> %val to <2 x i64> ++ ret <2 x i64> %ret ++} ++ ++; Test a v2i8->v2i64 extension. ++define <2 x i64> @f8(<2 x i8> *%ptr) { ++; CHECK-LABEL: f8: ++; CHECK: vlreph [[REG1:%v[0-9]+]], 0(%r2) ++; CHECK: vuplhb [[REG2:%v[0-9]+]], [[REG1]] ++; CHECK: vuplhh [[REG3:%v[0-9]+]], [[REG2]] ++; CHECK: vuplhf %v24, [[REG3]] ++; CHECK: br %r14 ++ %val = load <2 x i8> *%ptr ++ %ret = zext <2 x i8> %val to <2 x i64> ++ ret <2 x i64> %ret ++} ++ ++; Test a v2i16->v2i64 extension. ++define <2 x i64> @f9(<2 x i16> *%ptr) { ++; CHECK-LABEL: f9: ++; CHECK: vlrepf [[REG1:%v[0-9]+]], 0(%r2) ++; CHECK: vuplhh [[REG2:%v[0-9]+]], [[REG1]] ++; CHECK: vuplhf %v24, [[REG2]] ++; CHECK: br %r14 ++ %val = load <2 x i16> *%ptr ++ %ret = zext <2 x i16> %val to <2 x i64> ++ ret <2 x i64> %ret ++} ++ ++; Test a v2i32->v2i64 extension. ++define <2 x i64> @f10(<2 x i32> *%ptr) { ++; CHECK-LABEL: f10: ++; CHECK: vlrepg [[REG1:%v[0-9]+]], 0(%r2) ++; CHECK: vuplhf %v24, [[REG1]] ++; CHECK: br %r14 ++ %val = load <2 x i32> *%ptr ++ %ret = zext <2 x i32> %val to <2 x i64> ++ ret <2 x i64> %ret ++} +Index: llvm-36/test/CodeGen/SystemZ/vec-move-17.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-move-17.ll +@@ -0,0 +1,104 @@ ++; Test vector truncating stores. ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ++ ++; Test a v16i8->v16i1 truncation. ++define void @f1(<16 x i8> %val, <16 x i1> *%ptr) { ++; No expected output, but must compile. ++ %trunc = trunc <16 x i8> %val to <16 x i1> ++ store <16 x i1> %trunc, <16 x i1> *%ptr ++ ret void ++} ++ ++; Test a v8i16->v8i1 truncation. ++define void @f2(<8 x i16> %val, <8 x i1> *%ptr) { ++; No expected output, but must compile. ++ %trunc = trunc <8 x i16> %val to <8 x i1> ++ store <8 x i1> %trunc, <8 x i1> *%ptr ++ ret void ++} ++ ++; Test a v8i16->v8i8 truncation. ++define void @f3(<8 x i16> %val, <8 x i8> *%ptr) { ++; CHECK-LABEL: f3: ++; CHECK: vpkh [[REG1:%v[0-9]+]], %v24, %v24 ++; CHECK: vsteg [[REG1]], 0(%r2) ++; CHECK: br %r14 ++ %trunc = trunc <8 x i16> %val to <8 x i8> ++ store <8 x i8> %trunc, <8 x i8> *%ptr ++ ret void ++} ++ ++; Test a v4i32->v4i1 truncation. ++define void @f4(<4 x i32> %val, <4 x i1> *%ptr) { ++; No expected output, but must compile. ++ %trunc = trunc <4 x i32> %val to <4 x i1> ++ store <4 x i1> %trunc, <4 x i1> *%ptr ++ ret void ++} ++ ++; Test a v4i32->v4i8 truncation. At the moment we use a VPERM rather than ++; a chain of packs. ++define void @f5(<4 x i32> %val, <4 x i8> *%ptr) { ++; CHECK-LABEL: f5: ++; CHECK: vperm [[REG:%v[0-9]+]], ++; CHECK: vstef [[REG]], 0(%r2) ++; CHECK: br %r14 ++ %trunc = trunc <4 x i32> %val to <4 x i8> ++ store <4 x i8> %trunc, <4 x i8> *%ptr ++ ret void ++} ++ ++; Test a v4i32->v4i16 truncation. ++define void @f6(<4 x i32> %val, <4 x i16> *%ptr) { ++; CHECK-LABEL: f6: ++; CHECK: vpkf [[REG1:%v[0-9]+]], %v24, %v24 ++; CHECK: vsteg [[REG1]], 0(%r2) ++; CHECK: br %r14 ++ %trunc = trunc <4 x i32> %val to <4 x i16> ++ store <4 x i16> %trunc, <4 x i16> *%ptr ++ ret void ++} ++ ++; Test a v2i64->v2i1 truncation. ++define void @f7(<2 x i64> %val, <2 x i1> *%ptr) { ++; No expected output, but must compile. ++ %trunc = trunc <2 x i64> %val to <2 x i1> ++ store <2 x i1> %trunc, <2 x i1> *%ptr ++ ret void ++} ++ ++; Test a v2i64->v2i8 truncation. At the moment we use a VPERM rather than ++; a chain of packs. ++define void @f8(<2 x i64> %val, <2 x i8> *%ptr) { ++; CHECK-LABEL: f8: ++; CHECK: vperm [[REG:%v[0-9]+]], ++; CHECK: vsteh [[REG]], 0(%r2) ++; CHECK: br %r14 ++ %trunc = trunc <2 x i64> %val to <2 x i8> ++ store <2 x i8> %trunc, <2 x i8> *%ptr ++ ret void ++} ++ ++; Test a v2i64->v2i16 truncation. At the moment we use a VPERM rather than ++; a chain of packs. ++define void @f9(<2 x i64> %val, <2 x i16> *%ptr) { ++; CHECK-LABEL: f9: ++; CHECK: vperm [[REG:%v[0-9]+]], ++; CHECK: vstef [[REG]], 0(%r2) ++; CHECK: br %r14 ++ %trunc = trunc <2 x i64> %val to <2 x i16> ++ store <2 x i16> %trunc, <2 x i16> *%ptr ++ ret void ++} ++ ++; Test a v2i64->v2i32 truncation. ++define void @f10(<2 x i64> %val, <2 x i32> *%ptr) { ++; CHECK-LABEL: f10: ++; CHECK: vpkg [[REG1:%v[0-9]+]], %v24, %v24 ++; CHECK: vsteg [[REG1]], 0(%r2) ++; CHECK: br %r14 ++ %trunc = trunc <2 x i64> %val to <2 x i32> ++ store <2 x i32> %trunc, <2 x i32> *%ptr ++ ret void ++} +Index: llvm-36/test/CodeGen/SystemZ/vec-mul-01.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-mul-01.ll +@@ -0,0 +1,60 @@ ++; Test vector multiplication. ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ++ ++; Test a v16i8 multiplication. ++define <16 x i8> @f1(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) { ++; CHECK-LABEL: f1: ++; CHECK: vmlb %v24, %v26, %v28 ++; CHECK: br %r14 ++ %ret = mul <16 x i8> %val1, %val2 ++ ret <16 x i8> %ret ++} ++ ++; Test a v8i16 multiplication. ++define <8 x i16> @f2(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) { ++; CHECK-LABEL: f2: ++; CHECK: vmlhw %v24, %v26, %v28 ++; CHECK: br %r14 ++ %ret = mul <8 x i16> %val1, %val2 ++ ret <8 x i16> %ret ++} ++ ++; Test a v4i32 multiplication. ++define <4 x i32> @f3(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) { ++; CHECK-LABEL: f3: ++; CHECK: vmlf %v24, %v26, %v28 ++; CHECK: br %r14 ++ %ret = mul <4 x i32> %val1, %val2 ++ ret <4 x i32> %ret ++} ++ ++; Test a v2i64 multiplication. There's no vector equivalent. ++define <2 x i64> @f4(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) { ++; CHECK-LABEL: f4: ++; CHECK-NOT: vmlg ++; CHECK: br %r14 ++ %ret = mul <2 x i64> %val1, %val2 ++ ret <2 x i64> %ret ++} ++ ++; Test a v2f64 multiplication. ++define <2 x double> @f5(<2 x double> %dummy, <2 x double> %val1, ++ <2 x double> %val2) { ++; CHECK-LABEL: f5: ++; CHECK: vfmdb %v24, %v26, %v28 ++; CHECK: br %r14 ++ %ret = fmul <2 x double> %val1, %val2 ++ ret <2 x double> %ret ++} ++ ++; Test an f64 multiplication that uses vector registers. ++define double @f6(<2 x double> %val1, <2 x double> %val2) { ++; CHECK-LABEL: f6: ++; CHECK: wfmdb %f0, %v24, %v26 ++; CHECK: br %r14 ++ %scalar1 = extractelement <2 x double> %val1, i32 0 ++ %scalar2 = extractelement <2 x double> %val2, i32 0 ++ %ret = fmul double %scalar1, %scalar2 ++ ret double %ret ++} +Index: llvm-36/test/CodeGen/SystemZ/vec-mul-02.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-mul-02.ll +@@ -0,0 +1,63 @@ ++; Test vector multiply-and-add. ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ++ ++declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>) ++ ++; Test a v16i8 multiply-and-add. ++define <16 x i8> @f1(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2, ++ <16 x i8> %val3) { ++; CHECK-LABEL: f1: ++; CHECK: vmalb %v24, %v26, %v28, %v30 ++; CHECK: br %r14 ++ %mul = mul <16 x i8> %val1, %val2 ++ %ret = add <16 x i8> %mul, %val3 ++ ret <16 x i8> %ret ++} ++ ++; Test a v8i16 multiply-and-add. ++define <8 x i16> @f2(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2, ++ <8 x i16> %val3) { ++; CHECK-LABEL: f2: ++; CHECK: vmalhw %v24, %v26, %v28, %v30 ++; CHECK: br %r14 ++ %mul = mul <8 x i16> %val1, %val2 ++ %ret = add <8 x i16> %mul, %val3 ++ ret <8 x i16> %ret ++} ++ ++; Test a v4i32 multiply-and-add. ++define <4 x i32> @f3(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2, ++ <4 x i32> %val3) { ++; CHECK-LABEL: f3: ++; CHECK: vmalf %v24, %v26, %v28, %v30 ++; CHECK: br %r14 ++ %mul = mul <4 x i32> %val1, %val2 ++ %ret = add <4 x i32> %mul, %val3 ++ ret <4 x i32> %ret ++} ++ ++; Test a v2f64 multiply-and-add. ++define <2 x double> @f4(<2 x double> %dummy, <2 x double> %val1, ++ <2 x double> %val2, <2 x double> %val3) { ++; CHECK-LABEL: f4: ++; CHECK: vfmadb %v24, %v26, %v28, %v30 ++; CHECK: br %r14 ++ %ret = call <2 x double> @llvm.fma.v2f64 (<2 x double> %val1, ++ <2 x double> %val2, ++ <2 x double> %val3) ++ ret <2 x double> %ret ++} ++ ++; Test a v2f64 multiply-and-subtract. ++define <2 x double> @f5(<2 x double> %dummy, <2 x double> %val1, ++ <2 x double> %val2, <2 x double> %val3) { ++; CHECK-LABEL: f5: ++; CHECK: vfmsdb %v24, %v26, %v28, %v30 ++; CHECK: br %r14 ++ %negval3 = fsub <2 x double> , %val3 ++ %ret = call <2 x double> @llvm.fma.v2f64 (<2 x double> %val1, ++ <2 x double> %val2, ++ <2 x double> %negval3) ++ ret <2 x double> %ret ++} +Index: llvm-36/test/CodeGen/SystemZ/vec-neg-01.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-neg-01.ll +@@ -0,0 +1,58 @@ ++; Test vector negation. ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ++ ++; Test a v16i8 negation. ++define <16 x i8> @f1(<16 x i8> %dummy, <16 x i8> %val) { ++; CHECK-LABEL: f1: ++; CHECK: vlcb %v24, %v26 ++; CHECK: br %r14 ++ %ret = sub <16 x i8> zeroinitializer, %val ++ ret <16 x i8> %ret ++} ++ ++; Test a v8i16 negation. ++define <8 x i16> @f2(<8 x i16> %dummy, <8 x i16> %val) { ++; CHECK-LABEL: f2: ++; CHECK: vlch %v24, %v26 ++; CHECK: br %r14 ++ %ret = sub <8 x i16> zeroinitializer, %val ++ ret <8 x i16> %ret ++} ++ ++; Test a v4i32 negation. ++define <4 x i32> @f3(<4 x i32> %dummy, <4 x i32> %val) { ++; CHECK-LABEL: f3: ++; CHECK: vlcf %v24, %v26 ++; CHECK: br %r14 ++ %ret = sub <4 x i32> zeroinitializer, %val ++ ret <4 x i32> %ret ++} ++ ++; Test a v2i64 negation. ++define <2 x i64> @f4(<2 x i64> %dummy, <2 x i64> %val) { ++; CHECK-LABEL: f4: ++; CHECK: vlcg %v24, %v26 ++; CHECK: br %r14 ++ %ret = sub <2 x i64> zeroinitializer, %val ++ ret <2 x i64> %ret ++} ++ ++; Test a v2f64 negation. ++define <2 x double> @f5(<2 x double> %dummy, <2 x double> %val) { ++; CHECK-LABEL: f5: ++; CHECK: vflcdb %v24, %v26 ++; CHECK: br %r14 ++ %ret = fsub <2 x double> , %val ++ ret <2 x double> %ret ++} ++ ++; Test an f64 negation that uses vector registers. ++define double @f6(<2 x double> %val) { ++; CHECK-LABEL: f6: ++; CHECK: wflcdb %f0, %v24 ++; CHECK: br %r14 ++ %scalar = extractelement <2 x double> %val, i32 0 ++ %ret = fsub double -0.0, %scalar ++ ret double %ret ++} +Index: llvm-36/test/CodeGen/SystemZ/vec-or-01.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-or-01.ll +@@ -0,0 +1,39 @@ ++; Test vector OR. ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ++ ++; Test a v16i8 OR. ++define <16 x i8> @f1(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) { ++; CHECK-LABEL: f1: ++; CHECK: vo %v24, %v26, %v28 ++; CHECK: br %r14 ++ %ret = or <16 x i8> %val1, %val2 ++ ret <16 x i8> %ret ++} ++ ++; Test a v8i16 OR. ++define <8 x i16> @f2(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) { ++; CHECK-LABEL: f2: ++; CHECK: vo %v24, %v26, %v28 ++; CHECK: br %r14 ++ %ret = or <8 x i16> %val1, %val2 ++ ret <8 x i16> %ret ++} ++ ++; Test a v4i32 OR. ++define <4 x i32> @f3(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) { ++; CHECK-LABEL: f3: ++; CHECK: vo %v24, %v26, %v28 ++; CHECK: br %r14 ++ %ret = or <4 x i32> %val1, %val2 ++ ret <4 x i32> %ret ++} ++ ++; Test a v2i64 OR. ++define <2 x i64> @f4(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) { ++; CHECK-LABEL: f4: ++; CHECK: vo %v24, %v26, %v28 ++; CHECK: br %r14 ++ %ret = or <2 x i64> %val1, %val2 ++ ret <2 x i64> %ret ++} +Index: llvm-36/test/CodeGen/SystemZ/vec-or-02.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-or-02.ll +@@ -0,0 +1,107 @@ ++; Test vector (or (and X, Z), (and Y, (not Z))) patterns. ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ++ ++; Test v16i8. ++define <16 x i8> @f1(<16 x i8> %val1, <16 x i8> %val2, <16 x i8> %val3) { ++; CHECK-LABEL: f1: ++; CHECK: vsel %v24, %v24, %v26, %v28 ++; CHECK: br %r14 ++ %not = xor <16 x i8> %val3, ++ %and1 = and <16 x i8> %val1, %val3 ++ %and2 = and <16 x i8> %val2, %not ++ %ret = or <16 x i8> %and1, %and2 ++ ret <16 x i8> %ret ++} ++ ++; ...and again with the XOR applied to the other operand of the AND. ++define <16 x i8> @f2(<16 x i8> %val1, <16 x i8> %val2, <16 x i8> %val3) { ++; CHECK-LABEL: f2: ++; CHECK: vsel %v24, %v26, %v24, %v28 ++; CHECK: br %r14 ++ %not = xor <16 x i8> %val3, ++ %and1 = and <16 x i8> %val1, %not ++ %and2 = and <16 x i8> %val2, %val3 ++ %ret = or <16 x i8> %and1, %and2 ++ ret <16 x i8> %ret ++} ++ ++; Test v8i16. ++define <8 x i16> @f3(<8 x i16> %val1, <8 x i16> %val2, <8 x i16> %val3) { ++; CHECK-LABEL: f3: ++; CHECK: vsel %v24, %v24, %v26, %v28 ++; CHECK: br %r14 ++ %not = xor <8 x i16> %val3, ++ %and1 = and <8 x i16> %val1, %val3 ++ %and2 = and <8 x i16> %val2, %not ++ %ret = or <8 x i16> %and1, %and2 ++ ret <8 x i16> %ret ++} ++ ++; ...and again with the XOR applied to the other operand of the AND. ++define <8 x i16> @f4(<8 x i16> %val1, <8 x i16> %val2, <8 x i16> %val3) { ++; CHECK-LABEL: f4: ++; CHECK: vsel %v24, %v26, %v24, %v28 ++; CHECK: br %r14 ++ %not = xor <8 x i16> %val3, ++ %and1 = and <8 x i16> %val1, %not ++ %and2 = and <8 x i16> %val2, %val3 ++ %ret = or <8 x i16> %and1, %and2 ++ ret <8 x i16> %ret ++} ++ ++; Test v4i32. ++define <4 x i32> @f5(<4 x i32> %val1, <4 x i32> %val2, <4 x i32> %val3) { ++; CHECK-LABEL: f5: ++; CHECK: vsel %v24, %v24, %v26, %v28 ++; CHECK: br %r14 ++ %not = xor <4 x i32> %val3, ++ %and1 = and <4 x i32> %val1, %val3 ++ %and2 = and <4 x i32> %val2, %not ++ %ret = or <4 x i32> %and1, %and2 ++ ret <4 x i32> %ret ++} ++ ++; ...and again with the XOR applied to the other operand of the AND. ++define <4 x i32> @f6(<4 x i32> %val1, <4 x i32> %val2, <4 x i32> %val3) { ++; CHECK-LABEL: f6: ++; CHECK: vsel %v24, %v26, %v24, %v28 ++; CHECK: br %r14 ++ %not = xor <4 x i32> %val3, ++ %and1 = and <4 x i32> %val1, %not ++ %and2 = and <4 x i32> %val2, %val3 ++ %ret = or <4 x i32> %and1, %and2 ++ ret <4 x i32> %ret ++} ++ ++; Test v2i64. ++define <2 x i64> @f7(<2 x i64> %val1, <2 x i64> %val2, <2 x i64> %val3) { ++; CHECK-LABEL: f7: ++; CHECK: vsel %v24, %v24, %v26, %v28 ++; CHECK: br %r14 ++ %not = xor <2 x i64> %val3, ++ %and1 = and <2 x i64> %val1, %val3 ++ %and2 = and <2 x i64> %val2, %not ++ %ret = or <2 x i64> %and1, %and2 ++ ret <2 x i64> %ret ++} ++ ++; ...and again with the XOR applied to the other operand of the AND. ++define <2 x i64> @f8(<2 x i64> %val1, <2 x i64> %val2, <2 x i64> %val3) { ++; CHECK-LABEL: f8: ++; CHECK: vsel %v24, %v26, %v24, %v28 ++; CHECK: br %r14 ++ %not = xor <2 x i64> %val3, ++ %and1 = and <2 x i64> %val1, %not ++ %and2 = and <2 x i64> %val2, %val3 ++ %ret = or <2 x i64> %and1, %and2 ++ ret <2 x i64> %ret ++} +Index: llvm-36/test/CodeGen/SystemZ/vec-perm-01.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-perm-01.ll +@@ -0,0 +1,175 @@ ++; Test vector splat. ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ++ ++; Test v16i8 splat of the first element. ++define <16 x i8> @f1(<16 x i8> %val) { ++; CHECK-LABEL: f1: ++; CHECK: vrepb %v24, %v24, 0 ++; CHECK: br %r14 ++ %ret = shufflevector <16 x i8> %val, <16 x i8> undef, ++ <16 x i32> zeroinitializer ++ ret <16 x i8> %ret ++} ++ ++; Test v16i8 splat of the last element. ++define <16 x i8> @f2(<16 x i8> %val) { ++; CHECK-LABEL: f2: ++; CHECK: vrepb %v24, %v24, 15 ++; CHECK: br %r14 ++ %ret = shufflevector <16 x i8> %val, <16 x i8> undef, ++ <16 x i32> ++ ret <16 x i8> %ret ++} ++ ++; Test v16i8 splat of an arbitrary element, using the second operand of ++; the shufflevector. ++define <16 x i8> @f3(<16 x i8> %val) { ++; CHECK-LABEL: f3: ++; CHECK: vrepb %v24, %v24, 4 ++; CHECK: br %r14 ++ %ret = shufflevector <16 x i8> undef, <16 x i8> %val, ++ <16 x i32> ++ ret <16 x i8> %ret ++} ++ ++; Test v8i16 splat of the first element. ++define <8 x i16> @f4(<8 x i16> %val) { ++; CHECK-LABEL: f4: ++; CHECK: vreph %v24, %v24, 0 ++; CHECK: br %r14 ++ %ret = shufflevector <8 x i16> %val, <8 x i16> undef, ++ <8 x i32> zeroinitializer ++ ret <8 x i16> %ret ++} ++ ++; Test v8i16 splat of the last element. ++define <8 x i16> @f5(<8 x i16> %val) { ++; CHECK-LABEL: f5: ++; CHECK: vreph %v24, %v24, 7 ++; CHECK: br %r14 ++ %ret = shufflevector <8 x i16> %val, <8 x i16> undef, ++ <8 x i32> ++ ret <8 x i16> %ret ++} ++ ++; Test v8i16 splat of an arbitrary element, using the second operand of ++; the shufflevector. ++define <8 x i16> @f6(<8 x i16> %val) { ++; CHECK-LABEL: f6: ++; CHECK: vreph %v24, %v24, 2 ++; CHECK: br %r14 ++ %ret = shufflevector <8 x i16> undef, <8 x i16> %val, ++ <8 x i32> ++ ret <8 x i16> %ret ++} ++ ++; Test v4i32 splat of the first element. ++define <4 x i32> @f7(<4 x i32> %val) { ++; CHECK-LABEL: f7: ++; CHECK: vrepf %v24, %v24, 0 ++; CHECK: br %r14 ++ %ret = shufflevector <4 x i32> %val, <4 x i32> undef, ++ <4 x i32> zeroinitializer ++ ret <4 x i32> %ret ++} ++ ++; Test v4i32 splat of the last element. ++define <4 x i32> @f8(<4 x i32> %val) { ++; CHECK-LABEL: f8: ++; CHECK: vrepf %v24, %v24, 3 ++; CHECK: br %r14 ++ %ret = shufflevector <4 x i32> %val, <4 x i32> undef, ++ <4 x i32> ++ ret <4 x i32> %ret ++} ++ ++; Test v4i32 splat of an arbitrary element, using the second operand of ++; the shufflevector. ++define <4 x i32> @f9(<4 x i32> %val) { ++; CHECK-LABEL: f9: ++; CHECK: vrepf %v24, %v24, 1 ++; CHECK: br %r14 ++ %ret = shufflevector <4 x i32> undef, <4 x i32> %val, ++ <4 x i32> ++ ret <4 x i32> %ret ++} ++ ++; Test v2i64 splat of the first element. ++define <2 x i64> @f10(<2 x i64> %val) { ++; CHECK-LABEL: f10: ++; CHECK: vrepg %v24, %v24, 0 ++; CHECK: br %r14 ++ %ret = shufflevector <2 x i64> %val, <2 x i64> undef, ++ <2 x i32> zeroinitializer ++ ret <2 x i64> %ret ++} ++ ++; Test v2i64 splat of the last element. ++define <2 x i64> @f11(<2 x i64> %val) { ++; CHECK-LABEL: f11: ++; CHECK: vrepg %v24, %v24, 1 ++; CHECK: br %r14 ++ %ret = shufflevector <2 x i64> %val, <2 x i64> undef, ++ <2 x i32> ++ ret <2 x i64> %ret ++} ++ ++; Test v4f32 splat of the first element. ++define <4 x float> @f12(<4 x float> %val) { ++; CHECK-LABEL: f12: ++; CHECK: vrepf %v24, %v24, 0 ++; CHECK: br %r14 ++ %ret = shufflevector <4 x float> %val, <4 x float> undef, ++ <4 x i32> zeroinitializer ++ ret <4 x float> %ret ++} ++ ++; Test v4f32 splat of the last element. ++define <4 x float> @f13(<4 x float> %val) { ++; CHECK-LABEL: f13: ++; CHECK: vrepf %v24, %v24, 3 ++; CHECK: br %r14 ++ %ret = shufflevector <4 x float> %val, <4 x float> undef, ++ <4 x i32> ++ ret <4 x float> %ret ++} ++ ++; Test v4f32 splat of an arbitrary element, using the second operand of ++; the shufflevector. ++define <4 x float> @f14(<4 x float> %val) { ++; CHECK-LABEL: f14: ++; CHECK: vrepf %v24, %v24, 1 ++; CHECK: br %r14 ++ %ret = shufflevector <4 x float> undef, <4 x float> %val, ++ <4 x i32> ++ ret <4 x float> %ret ++} ++ ++; Test v2f64 splat of the first element. ++define <2 x double> @f15(<2 x double> %val) { ++; CHECK-LABEL: f15: ++; CHECK: vrepg %v24, %v24, 0 ++; CHECK: br %r14 ++ %ret = shufflevector <2 x double> %val, <2 x double> undef, ++ <2 x i32> zeroinitializer ++ ret <2 x double> %ret ++} ++ ++; Test v2f64 splat of the last element. ++define <2 x double> @f16(<2 x double> %val) { ++; CHECK-LABEL: f16: ++; CHECK: vrepg %v24, %v24, 1 ++; CHECK: br %r14 ++ %ret = shufflevector <2 x double> %val, <2 x double> undef, ++ <2 x i32> ++ ret <2 x double> %ret ++} +Index: llvm-36/test/CodeGen/SystemZ/vec-perm-02.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-perm-02.ll +@@ -0,0 +1,200 @@ ++; Test replications of a scalar register value, represented as splats. ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ++ ++; Test v16i8 splat of the first element. ++define <16 x i8> @f1(i8 %scalar) { ++; CHECK-LABEL: f1: ++; CHECK: vlvgp [[REG:%v[0-9]+]], %r2, %r2 ++; CHECK: vrepb %v24, [[REG]], 7 ++; CHECK: br %r14 ++ %val = insertelement <16 x i8> undef, i8 %scalar, i32 0 ++ %ret = shufflevector <16 x i8> %val, <16 x i8> undef, ++ <16 x i32> zeroinitializer ++ ret <16 x i8> %ret ++} ++ ++; Test v16i8 splat of the last element. ++define <16 x i8> @f2(i8 %scalar) { ++; CHECK-LABEL: f2: ++; CHECK: vlvgp [[REG:%v[0-9]+]], %r2, %r2 ++; CHECK: vrepb %v24, [[REG]], 7 ++; CHECK: br %r14 ++ %val = insertelement <16 x i8> undef, i8 %scalar, i32 15 ++ %ret = shufflevector <16 x i8> %val, <16 x i8> undef, ++ <16 x i32> ++ ret <16 x i8> %ret ++} ++ ++; Test v16i8 splat of an arbitrary element, using the second operand of ++; the shufflevector. ++define <16 x i8> @f3(i8 %scalar) { ++; CHECK-LABEL: f3: ++; CHECK: vlvgp [[REG:%v[0-9]+]], %r2, %r2 ++; CHECK: vrepb %v24, [[REG]], 7 ++; CHECK: br %r14 ++ %val = insertelement <16 x i8> undef, i8 %scalar, i32 4 ++ %ret = shufflevector <16 x i8> undef, <16 x i8> %val, ++ <16 x i32> ++ ret <16 x i8> %ret ++} ++ ++; Test v8i16 splat of the first element. ++define <8 x i16> @f4(i16 %scalar) { ++; CHECK-LABEL: f4: ++; CHECK: vlvgp [[REG:%v[0-9]+]], %r2, %r2 ++; CHECK: vreph %v24, [[REG]], 3 ++; CHECK: br %r14 ++ %val = insertelement <8 x i16> undef, i16 %scalar, i32 0 ++ %ret = shufflevector <8 x i16> %val, <8 x i16> undef, ++ <8 x i32> zeroinitializer ++ ret <8 x i16> %ret ++} ++ ++; Test v8i16 splat of the last element. ++define <8 x i16> @f5(i16 %scalar) { ++; CHECK-LABEL: f5: ++; CHECK: vlvgp [[REG:%v[0-9]+]], %r2, %r2 ++; CHECK: vreph %v24, [[REG]], 3 ++; CHECK: br %r14 ++ %val = insertelement <8 x i16> undef, i16 %scalar, i32 7 ++ %ret = shufflevector <8 x i16> %val, <8 x i16> undef, ++ <8 x i32> ++ ret <8 x i16> %ret ++} ++ ++; Test v8i16 splat of an arbitrary element, using the second operand of ++; the shufflevector. ++define <8 x i16> @f6(i16 %scalar) { ++; CHECK-LABEL: f6: ++; CHECK: vlvgp [[REG:%v[0-9]+]], %r2, %r2 ++; CHECK: vreph %v24, [[REG]], 3 ++; CHECK: br %r14 ++ %val = insertelement <8 x i16> undef, i16 %scalar, i32 2 ++ %ret = shufflevector <8 x i16> undef, <8 x i16> %val, ++ <8 x i32> ++ ret <8 x i16> %ret ++} ++ ++; Test v4i32 splat of the first element. ++define <4 x i32> @f7(i32 %scalar) { ++; CHECK-LABEL: f7: ++; CHECK: vlvgp [[REG:%v[0-9]+]], %r2, %r2 ++; CHECK: vrepf %v24, [[REG]], 1 ++; CHECK: br %r14 ++ %val = insertelement <4 x i32> undef, i32 %scalar, i32 0 ++ %ret = shufflevector <4 x i32> %val, <4 x i32> undef, ++ <4 x i32> zeroinitializer ++ ret <4 x i32> %ret ++} ++ ++; Test v4i32 splat of the last element. ++define <4 x i32> @f8(i32 %scalar) { ++; CHECK-LABEL: f8: ++; CHECK: vlvgp [[REG:%v[0-9]+]], %r2, %r2 ++; CHECK: vrepf %v24, [[REG]], 1 ++; CHECK: br %r14 ++ %val = insertelement <4 x i32> undef, i32 %scalar, i32 3 ++ %ret = shufflevector <4 x i32> %val, <4 x i32> undef, ++ <4 x i32> ++ ret <4 x i32> %ret ++} ++ ++; Test v4i32 splat of an arbitrary element, using the second operand of ++; the shufflevector. ++define <4 x i32> @f9(i32 %scalar) { ++; CHECK-LABEL: f9: ++; CHECK: vlvgp [[REG:%v[0-9]+]], %r2, %r2 ++; CHECK: vrepf %v24, [[REG]], 1 ++; CHECK: br %r14 ++ %val = insertelement <4 x i32> undef, i32 %scalar, i32 1 ++ %ret = shufflevector <4 x i32> undef, <4 x i32> %val, ++ <4 x i32> ++ ret <4 x i32> %ret ++} ++ ++; Test v2i64 splat of the first element. ++define <2 x i64> @f10(i64 %scalar) { ++; CHECK-LABEL: f10: ++; CHECK: vlvgp %v24, %r2, %r2 ++; CHECK: br %r14 ++ %val = insertelement <2 x i64> undef, i64 %scalar, i32 0 ++ %ret = shufflevector <2 x i64> %val, <2 x i64> undef, ++ <2 x i32> zeroinitializer ++ ret <2 x i64> %ret ++} ++ ++; Test v2i64 splat of the last element. ++define <2 x i64> @f11(i64 %scalar) { ++; CHECK-LABEL: f11: ++; CHECK: vlvgp %v24, %r2, %r2 ++; CHECK: br %r14 ++ %val = insertelement <2 x i64> undef, i64 %scalar, i32 1 ++ %ret = shufflevector <2 x i64> %val, <2 x i64> undef, ++ <2 x i32> ++ ret <2 x i64> %ret ++} ++ ++; Test v4f32 splat of the first element. ++define <4 x float> @f12(float %scalar) { ++; CHECK-LABEL: f12: ++; CHECK: vrepf %v24, %v0, 0 ++; CHECK: br %r14 ++ %val = insertelement <4 x float> undef, float %scalar, i32 0 ++ %ret = shufflevector <4 x float> %val, <4 x float> undef, ++ <4 x i32> zeroinitializer ++ ret <4 x float> %ret ++} ++ ++; Test v4f32 splat of the last element. ++define <4 x float> @f13(float %scalar) { ++; CHECK-LABEL: f13: ++; CHECK: vrepf %v24, %v0, 0 ++; CHECK: br %r14 ++ %val = insertelement <4 x float> undef, float %scalar, i32 3 ++ %ret = shufflevector <4 x float> %val, <4 x float> undef, ++ <4 x i32> ++ ret <4 x float> %ret ++} ++ ++; Test v4f32 splat of an arbitrary element, using the second operand of ++; the shufflevector. ++define <4 x float> @f14(float %scalar) { ++; CHECK-LABEL: f14: ++; CHECK: vrepf %v24, %v0, 0 ++; CHECK: br %r14 ++ %val = insertelement <4 x float> undef, float %scalar, i32 1 ++ %ret = shufflevector <4 x float> undef, <4 x float> %val, ++ <4 x i32> ++ ret <4 x float> %ret ++} ++ ++; Test v2f64 splat of the first element. ++define <2 x double> @f15(double %scalar) { ++; CHECK-LABEL: f15: ++; CHECK: vrepg %v24, %v0, 0 ++; CHECK: br %r14 ++ %val = insertelement <2 x double> undef, double %scalar, i32 0 ++ %ret = shufflevector <2 x double> %val, <2 x double> undef, ++ <2 x i32> zeroinitializer ++ ret <2 x double> %ret ++} ++ ++; Test v2f64 splat of the last element. ++define <2 x double> @f16(double %scalar) { ++; CHECK-LABEL: f16: ++; CHECK: vrepg %v24, %v0, 0 ++; CHECK: br %r14 ++ %val = insertelement <2 x double> undef, double %scalar, i32 1 ++ %ret = shufflevector <2 x double> %val, <2 x double> undef, ++ <2 x i32> ++ ret <2 x double> %ret ++} +Index: llvm-36/test/CodeGen/SystemZ/vec-perm-03.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-perm-03.ll +@@ -0,0 +1,251 @@ ++; Test replications of a scalar memory value, represented as splats. ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ++ ++; Test a v16i8 replicating load with no offset. ++define <16 x i8> @f1(i8 *%ptr) { ++; CHECK-LABEL: f1: ++; CHECK: vlrepb %v24, 0(%r2) ++; CHECK: br %r14 ++ %scalar = load i8 *%ptr ++ %val = insertelement <16 x i8> undef, i8 %scalar, i32 0 ++ %ret = shufflevector <16 x i8> %val, <16 x i8> undef, ++ <16 x i32> zeroinitializer ++ ret <16 x i8> %ret ++} ++ ++; Test a v16i8 replicating load with the maximum in-range offset. ++define <16 x i8> @f2(i8 *%base) { ++; CHECK-LABEL: f2: ++; CHECK: vlrepb %v24, 4095(%r2) ++; CHECK: br %r14 ++ %ptr = getelementptr i8 *%base, i64 4095 ++ %scalar = load i8 *%ptr ++ %val = insertelement <16 x i8> undef, i8 %scalar, i32 0 ++ %ret = shufflevector <16 x i8> %val, <16 x i8> undef, ++ <16 x i32> zeroinitializer ++ ret <16 x i8> %ret ++} ++ ++; Test a v16i8 replicating load with the first out-of-range offset. ++define <16 x i8> @f3(i8 *%base) { ++; CHECK-LABEL: f3: ++; CHECK: aghi %r2, 4096 ++; CHECK: vlrepb %v24, 0(%r2) ++; CHECK: br %r14 ++ %ptr = getelementptr i8 *%base, i64 4096 ++ %scalar = load i8 *%ptr ++ %val = insertelement <16 x i8> undef, i8 %scalar, i32 0 ++ %ret = shufflevector <16 x i8> %val, <16 x i8> undef, ++ <16 x i32> zeroinitializer ++ ret <16 x i8> %ret ++} ++ ++; Test a v8i16 replicating load with no offset. ++define <8 x i16> @f4(i16 *%ptr) { ++; CHECK-LABEL: f4: ++; CHECK: vlreph %v24, 0(%r2) ++; CHECK: br %r14 ++ %scalar = load i16 *%ptr ++ %val = insertelement <8 x i16> undef, i16 %scalar, i32 0 ++ %ret = shufflevector <8 x i16> %val, <8 x i16> undef, ++ <8 x i32> zeroinitializer ++ ret <8 x i16> %ret ++} ++ ++; Test a v8i16 replicating load with the maximum in-range offset. ++define <8 x i16> @f5(i16 *%base) { ++; CHECK-LABEL: f5: ++; CHECK: vlreph %v24, 4094(%r2) ++; CHECK: br %r14 ++ %ptr = getelementptr i16 *%base, i64 2047 ++ %scalar = load i16 *%ptr ++ %val = insertelement <8 x i16> undef, i16 %scalar, i32 0 ++ %ret = shufflevector <8 x i16> %val, <8 x i16> undef, ++ <8 x i32> zeroinitializer ++ ret <8 x i16> %ret ++} ++ ++; Test a v8i16 replicating load with the first out-of-range offset. ++define <8 x i16> @f6(i16 *%base) { ++; CHECK-LABEL: f6: ++; CHECK: aghi %r2, 4096 ++; CHECK: vlreph %v24, 0(%r2) ++; CHECK: br %r14 ++ %ptr = getelementptr i16 *%base, i64 2048 ++ %scalar = load i16 *%ptr ++ %val = insertelement <8 x i16> undef, i16 %scalar, i32 0 ++ %ret = shufflevector <8 x i16> %val, <8 x i16> undef, ++ <8 x i32> zeroinitializer ++ ret <8 x i16> %ret ++} ++ ++; Test a v4i32 replicating load with no offset. ++define <4 x i32> @f7(i32 *%ptr) { ++; CHECK-LABEL: f7: ++; CHECK: vlrepf %v24, 0(%r2) ++; CHECK: br %r14 ++ %scalar = load i32 *%ptr ++ %val = insertelement <4 x i32> undef, i32 %scalar, i32 0 ++ %ret = shufflevector <4 x i32> %val, <4 x i32> undef, ++ <4 x i32> zeroinitializer ++ ret <4 x i32> %ret ++} ++ ++; Test a v4i32 replicating load with the maximum in-range offset. ++define <4 x i32> @f8(i32 *%base) { ++; CHECK-LABEL: f8: ++; CHECK: vlrepf %v24, 4092(%r2) ++; CHECK: br %r14 ++ %ptr = getelementptr i32 *%base, i64 1023 ++ %scalar = load i32 *%ptr ++ %val = insertelement <4 x i32> undef, i32 %scalar, i32 0 ++ %ret = shufflevector <4 x i32> %val, <4 x i32> undef, ++ <4 x i32> zeroinitializer ++ ret <4 x i32> %ret ++} ++ ++; Test a v4i32 replicating load with the first out-of-range offset. ++define <4 x i32> @f9(i32 *%base) { ++; CHECK-LABEL: f9: ++; CHECK: aghi %r2, 4096 ++; CHECK: vlrepf %v24, 0(%r2) ++; CHECK: br %r14 ++ %ptr = getelementptr i32 *%base, i64 1024 ++ %scalar = load i32 *%ptr ++ %val = insertelement <4 x i32> undef, i32 %scalar, i32 0 ++ %ret = shufflevector <4 x i32> %val, <4 x i32> undef, ++ <4 x i32> zeroinitializer ++ ret <4 x i32> %ret ++} ++ ++; Test a v2i64 replicating load with no offset. ++define <2 x i64> @f10(i64 *%ptr) { ++; CHECK-LABEL: f10: ++; CHECK: vlrepg %v24, 0(%r2) ++; CHECK: br %r14 ++ %scalar = load i64 *%ptr ++ %val = insertelement <2 x i64> undef, i64 %scalar, i32 0 ++ %ret = shufflevector <2 x i64> %val, <2 x i64> undef, ++ <2 x i32> zeroinitializer ++ ret <2 x i64> %ret ++} ++ ++; Test a v2i64 replicating load with the maximum in-range offset. ++define <2 x i64> @f11(i64 *%base) { ++; CHECK-LABEL: f11: ++; CHECK: vlrepg %v24, 4088(%r2) ++; CHECK: br %r14 ++ %ptr = getelementptr i64 *%base, i32 511 ++ %scalar = load i64 *%ptr ++ %val = insertelement <2 x i64> undef, i64 %scalar, i32 0 ++ %ret = shufflevector <2 x i64> %val, <2 x i64> undef, ++ <2 x i32> zeroinitializer ++ ret <2 x i64> %ret ++} ++ ++; Test a v2i64 replicating load with the first out-of-range offset. ++define <2 x i64> @f12(i64 *%base) { ++; CHECK-LABEL: f12: ++; CHECK: aghi %r2, 4096 ++; CHECK: vlrepg %v24, 0(%r2) ++; CHECK: br %r14 ++ %ptr = getelementptr i64 *%base, i32 512 ++ %scalar = load i64 *%ptr ++ %val = insertelement <2 x i64> undef, i64 %scalar, i32 0 ++ %ret = shufflevector <2 x i64> %val, <2 x i64> undef, ++ <2 x i32> zeroinitializer ++ ret <2 x i64> %ret ++} ++ ++; Test a v4f32 replicating load with no offset. ++define <4 x float> @f13(float *%ptr) { ++; CHECK-LABEL: f13: ++; CHECK: vlrepf %v24, 0(%r2) ++; CHECK: br %r14 ++ %scalar = load float *%ptr ++ %val = insertelement <4 x float> undef, float %scalar, i32 0 ++ %ret = shufflevector <4 x float> %val, <4 x float> undef, ++ <4 x i32> zeroinitializer ++ ret <4 x float> %ret ++} ++ ++; Test a v4f32 replicating load with the maximum in-range offset. ++define <4 x float> @f14(float *%base) { ++; CHECK-LABEL: f14: ++; CHECK: vlrepf %v24, 4092(%r2) ++; CHECK: br %r14 ++ %ptr = getelementptr float *%base, i64 1023 ++ %scalar = load float *%ptr ++ %val = insertelement <4 x float> undef, float %scalar, i32 0 ++ %ret = shufflevector <4 x float> %val, <4 x float> undef, ++ <4 x i32> zeroinitializer ++ ret <4 x float> %ret ++} ++ ++; Test a v4f32 replicating load with the first out-of-range offset. ++define <4 x float> @f15(float *%base) { ++; CHECK-LABEL: f15: ++; CHECK: aghi %r2, 4096 ++; CHECK: vlrepf %v24, 0(%r2) ++; CHECK: br %r14 ++ %ptr = getelementptr float *%base, i64 1024 ++ %scalar = load float *%ptr ++ %val = insertelement <4 x float> undef, float %scalar, i32 0 ++ %ret = shufflevector <4 x float> %val, <4 x float> undef, ++ <4 x i32> zeroinitializer ++ ret <4 x float> %ret ++} ++ ++; Test a v2f64 replicating load with no offset. ++define <2 x double> @f16(double *%ptr) { ++; CHECK-LABEL: f16: ++; CHECK: vlrepg %v24, 0(%r2) ++; CHECK: br %r14 ++ %scalar = load double *%ptr ++ %val = insertelement <2 x double> undef, double %scalar, i32 0 ++ %ret = shufflevector <2 x double> %val, <2 x double> undef, ++ <2 x i32> zeroinitializer ++ ret <2 x double> %ret ++} ++ ++; Test a v2f64 replicating load with the maximum in-range offset. ++define <2 x double> @f17(double *%base) { ++; CHECK-LABEL: f17: ++; CHECK: vlrepg %v24, 4088(%r2) ++; CHECK: br %r14 ++ %ptr = getelementptr double *%base, i32 511 ++ %scalar = load double *%ptr ++ %val = insertelement <2 x double> undef, double %scalar, i32 0 ++ %ret = shufflevector <2 x double> %val, <2 x double> undef, ++ <2 x i32> zeroinitializer ++ ret <2 x double> %ret ++} ++ ++; Test a v2f64 replicating load with the first out-of-range offset. ++define <2 x double> @f18(double *%base) { ++; CHECK-LABEL: f18: ++; CHECK: aghi %r2, 4096 ++; CHECK: vlrepg %v24, 0(%r2) ++; CHECK: br %r14 ++ %ptr = getelementptr double *%base, i32 512 ++ %scalar = load double *%ptr ++ %val = insertelement <2 x double> undef, double %scalar, i32 0 ++ %ret = shufflevector <2 x double> %val, <2 x double> undef, ++ <2 x i32> zeroinitializer ++ ret <2 x double> %ret ++} ++ ++; Test a v16i8 replicating load with an index. ++define <16 x i8> @f19(i8 *%base, i64 %index) { ++; CHECK-LABEL: f19: ++; CHECK: vlrepb %v24, 1023(%r3,%r2) ++; CHECK: br %r14 ++ %ptr1 = getelementptr i8 *%base, i64 %index ++ %ptr = getelementptr i8 *%ptr1, i64 1023 ++ %scalar = load i8 *%ptr ++ %val = insertelement <16 x i8> undef, i8 %scalar, i32 0 ++ %ret = shufflevector <16 x i8> %val, <16 x i8> undef, ++ <16 x i32> zeroinitializer ++ ret <16 x i8> %ret ++} +Index: llvm-36/test/CodeGen/SystemZ/vec-perm-04.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-perm-04.ll +@@ -0,0 +1,200 @@ ++; Test vector merge high. ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ++ ++; Test a canonical v16i8 merge high. ++define <16 x i8> @f1(<16 x i8> %val1, <16 x i8> %val2) { ++; CHECK-LABEL: f1: ++; CHECK: vmrhb %v24, %v24, %v26 ++; CHECK: br %r14 ++ %ret = shufflevector <16 x i8> %val1, <16 x i8> %val2, ++ <16 x i32> ++ ret <16 x i8> %ret ++} ++ ++; Test a reversed v16i8 merge high. ++define <16 x i8> @f2(<16 x i8> %val1, <16 x i8> %val2) { ++; CHECK-LABEL: f2: ++; CHECK: vmrhb %v24, %v26, %v24 ++; CHECK: br %r14 ++ %ret = shufflevector <16 x i8> %val1, <16 x i8> %val2, ++ <16 x i32> ++ ret <16 x i8> %ret ++} ++ ++; Test a v16i8 merge high with only the first operand being used. ++define <16 x i8> @f3(<16 x i8> %val1, <16 x i8> %val2) { ++; CHECK-LABEL: f3: ++; CHECK: vmrhb %v24, %v24, %v24 ++; CHECK: br %r14 ++ %ret = shufflevector <16 x i8> %val1, <16 x i8> %val2, ++ <16 x i32> ++ ret <16 x i8> %ret ++} ++ ++; Test a v16i8 merge high with only the second operand being used. ++; This is converted into @f3 by target-independent code. ++define <16 x i8> @f4(<16 x i8> %val1, <16 x i8> %val2) { ++; CHECK-LABEL: f4: ++; CHECK: vmrhb %v24, %v26, %v26 ++; CHECK: br %r14 ++ %ret = shufflevector <16 x i8> %val1, <16 x i8> %val2, ++ <16 x i32> ++ ret <16 x i8> %ret ++} ++ ++; Test a v16i8 merge with both operands being the same. This too is ++; converted into @f3 by target-independent code. ++define <16 x i8> @f5(<16 x i8> %val) { ++; CHECK-LABEL: f5: ++; CHECK: vmrhb %v24, %v24, %v24 ++; CHECK: br %r14 ++ %ret = shufflevector <16 x i8> %val, <16 x i8> %val, ++ <16 x i32> ++ ret <16 x i8> %ret ++} ++ ++; Test a v16i8 merge in which some of the indices are don't care. ++define <16 x i8> @f6(<16 x i8> %val1, <16 x i8> %val2) { ++; CHECK-LABEL: f6: ++; CHECK: vmrhb %v24, %v24, %v26 ++; CHECK: br %r14 ++ %ret = shufflevector <16 x i8> %val1, <16 x i8> %val2, ++ <16 x i32> ++ ret <16 x i8> %ret ++} ++ ++; Test a v16i8 merge in which one of the operands is undefined and where ++; indices for that operand are "don't care". Target-independent code ++; converts the indices themselves into "undef"s. ++define <16 x i8> @f7(<16 x i8> %val) { ++; CHECK-LABEL: f7: ++; CHECK: vmrhb %v24, %v24, %v24 ++; CHECK: br %r14 ++ %ret = shufflevector <16 x i8> undef, <16 x i8> %val, ++ <16 x i32> ++ ret <16 x i8> %ret ++} ++ ++; Test a canonical v8i16 merge high. ++define <8 x i16> @f8(<8 x i16> %val1, <8 x i16> %val2) { ++; CHECK-LABEL: f8: ++; CHECK: vmrhh %v24, %v24, %v26 ++; CHECK: br %r14 ++ %ret = shufflevector <8 x i16> %val1, <8 x i16> %val2, ++ <8 x i32> ++ ret <8 x i16> %ret ++} ++ ++; Test a reversed v8i16 merge high. ++define <8 x i16> @f9(<8 x i16> %val1, <8 x i16> %val2) { ++; CHECK-LABEL: f9: ++; CHECK: vmrhh %v24, %v26, %v24 ++; CHECK: br %r14 ++ %ret = shufflevector <8 x i16> %val1, <8 x i16> %val2, ++ <8 x i32> ++ ret <8 x i16> %ret ++} ++ ++; Test a canonical v4i32 merge high. ++define <4 x i32> @f10(<4 x i32> %val1, <4 x i32> %val2) { ++; CHECK-LABEL: f10: ++; CHECK: vmrhf %v24, %v24, %v26 ++; CHECK: br %r14 ++ %ret = shufflevector <4 x i32> %val1, <4 x i32> %val2, ++ <4 x i32> ++ ret <4 x i32> %ret ++} ++ ++; Test a reversed v4i32 merge high. ++define <4 x i32> @f11(<4 x i32> %val1, <4 x i32> %val2) { ++; CHECK-LABEL: f11: ++; CHECK: vmrhf %v24, %v26, %v24 ++; CHECK: br %r14 ++ %ret = shufflevector <4 x i32> %val1, <4 x i32> %val2, ++ <4 x i32> ++ ret <4 x i32> %ret ++} ++ ++; Test a canonical v2i64 merge high. ++define <2 x i64> @f12(<2 x i64> %val1, <2 x i64> %val2) { ++; CHECK-LABEL: f12: ++; CHECK: vmrhg %v24, %v24, %v26 ++; CHECK: br %r14 ++ %ret = shufflevector <2 x i64> %val1, <2 x i64> %val2, ++ <2 x i32> ++ ret <2 x i64> %ret ++} ++ ++; Test a reversed v2i64 merge high. ++define <2 x i64> @f13(<2 x i64> %val1, <2 x i64> %val2) { ++; CHECK-LABEL: f13: ++; CHECK: vmrhg %v24, %v26, %v24 ++; CHECK: br %r14 ++ %ret = shufflevector <2 x i64> %val1, <2 x i64> %val2, ++ <2 x i32> ++ ret <2 x i64> %ret ++} ++ ++; Test a canonical v4f32 merge high. ++define <4 x float> @f14(<4 x float> %val1, <4 x float> %val2) { ++; CHECK-LABEL: f14: ++; CHECK: vmrhf %v24, %v24, %v26 ++; CHECK: br %r14 ++ %ret = shufflevector <4 x float> %val1, <4 x float> %val2, ++ <4 x i32> ++ ret <4 x float> %ret ++} ++ ++; Test a reversed v4f32 merge high. ++define <4 x float> @f15(<4 x float> %val1, <4 x float> %val2) { ++; CHECK-LABEL: f15: ++; CHECK: vmrhf %v24, %v26, %v24 ++; CHECK: br %r14 ++ %ret = shufflevector <4 x float> %val1, <4 x float> %val2, ++ <4 x i32> ++ ret <4 x float> %ret ++} ++ ++; Test a canonical v2f64 merge high. ++define <2 x double> @f16(<2 x double> %val1, <2 x double> %val2) { ++; CHECK-LABEL: f16: ++; CHECK: vmrhg %v24, %v24, %v26 ++; CHECK: br %r14 ++ %ret = shufflevector <2 x double> %val1, <2 x double> %val2, ++ <2 x i32> ++ ret <2 x double> %ret ++} ++ ++; Test a reversed v2f64 merge high. ++define <2 x double> @f17(<2 x double> %val1, <2 x double> %val2) { ++; CHECK-LABEL: f17: ++; CHECK: vmrhg %v24, %v26, %v24 ++; CHECK: br %r14 ++ %ret = shufflevector <2 x double> %val1, <2 x double> %val2, ++ <2 x i32> ++ ret <2 x double> %ret ++} +Index: llvm-36/test/CodeGen/SystemZ/vec-perm-05.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-perm-05.ll +@@ -0,0 +1,200 @@ ++; Test vector merge low. ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ++ ++; Test a canonical v16i8 merge low. ++define <16 x i8> @f1(<16 x i8> %val1, <16 x i8> %val2) { ++; CHECK-LABEL: f1: ++; CHECK: vmrlb %v24, %v24, %v26 ++; CHECK: br %r14 ++ %ret = shufflevector <16 x i8> %val1, <16 x i8> %val2, ++ <16 x i32> ++ ret <16 x i8> %ret ++} ++ ++; Test a reversed v16i8 merge low. ++define <16 x i8> @f2(<16 x i8> %val1, <16 x i8> %val2) { ++; CHECK-LABEL: f2: ++; CHECK: vmrlb %v24, %v26, %v24 ++; CHECK: br %r14 ++ %ret = shufflevector <16 x i8> %val1, <16 x i8> %val2, ++ <16 x i32> ++ ret <16 x i8> %ret ++} ++ ++; Test a v16i8 merge low with only the first operand being used. ++define <16 x i8> @f3(<16 x i8> %val1, <16 x i8> %val2) { ++; CHECK-LABEL: f3: ++; CHECK: vmrlb %v24, %v24, %v24 ++; CHECK: br %r14 ++ %ret = shufflevector <16 x i8> %val1, <16 x i8> %val2, ++ <16 x i32> ++ ret <16 x i8> %ret ++} ++ ++; Test a v16i8 merge low with only the second operand being used. ++; This is converted into @f3 by target-independent code. ++define <16 x i8> @f4(<16 x i8> %val1, <16 x i8> %val2) { ++; CHECK-LABEL: f4: ++; CHECK: vmrlb %v24, %v26, %v26 ++; CHECK: br %r14 ++ %ret = shufflevector <16 x i8> %val1, <16 x i8> %val2, ++ <16 x i32> ++ ret <16 x i8> %ret ++} ++ ++; Test a v16i8 merge with both operands being the same. This too is ++; converted into @f3 by target-independent code. ++define <16 x i8> @f5(<16 x i8> %val) { ++; CHECK-LABEL: f5: ++; CHECK: vmrlb %v24, %v24, %v24 ++; CHECK: br %r14 ++ %ret = shufflevector <16 x i8> %val, <16 x i8> %val, ++ <16 x i32> ++ ret <16 x i8> %ret ++} ++ ++; Test a v16i8 merge in which some of the indices are don't care. ++define <16 x i8> @f6(<16 x i8> %val1, <16 x i8> %val2) { ++; CHECK-LABEL: f6: ++; CHECK: vmrlb %v24, %v24, %v26 ++; CHECK: br %r14 ++ %ret = shufflevector <16 x i8> %val1, <16 x i8> %val2, ++ <16 x i32> ++ ret <16 x i8> %ret ++} ++ ++; Test a v16i8 merge in which one of the operands is undefined and where ++; indices for that operand are "don't care". Target-independent code ++; converts the indices themselves into "undef"s. ++define <16 x i8> @f7(<16 x i8> %val) { ++; CHECK-LABEL: f7: ++; CHECK: vmrlb %v24, %v24, %v24 ++; CHECK: br %r14 ++ %ret = shufflevector <16 x i8> undef, <16 x i8> %val, ++ <16 x i32> ++ ret <16 x i8> %ret ++} ++ ++; Test a canonical v8i16 merge low. ++define <8 x i16> @f8(<8 x i16> %val1, <8 x i16> %val2) { ++; CHECK-LABEL: f8: ++; CHECK: vmrlh %v24, %v24, %v26 ++; CHECK: br %r14 ++ %ret = shufflevector <8 x i16> %val1, <8 x i16> %val2, ++ <8 x i32> ++ ret <8 x i16> %ret ++} ++ ++; Test a reversed v8i16 merge low. ++define <8 x i16> @f9(<8 x i16> %val1, <8 x i16> %val2) { ++; CHECK-LABEL: f9: ++; CHECK: vmrlh %v24, %v26, %v24 ++; CHECK: br %r14 ++ %ret = shufflevector <8 x i16> %val1, <8 x i16> %val2, ++ <8 x i32> ++ ret <8 x i16> %ret ++} ++ ++; Test a canonical v4i32 merge low. ++define <4 x i32> @f10(<4 x i32> %val1, <4 x i32> %val2) { ++; CHECK-LABEL: f10: ++; CHECK: vmrlf %v24, %v24, %v26 ++; CHECK: br %r14 ++ %ret = shufflevector <4 x i32> %val1, <4 x i32> %val2, ++ <4 x i32> ++ ret <4 x i32> %ret ++} ++ ++; Test a reversed v4i32 merge low. ++define <4 x i32> @f11(<4 x i32> %val1, <4 x i32> %val2) { ++; CHECK-LABEL: f11: ++; CHECK: vmrlf %v24, %v26, %v24 ++; CHECK: br %r14 ++ %ret = shufflevector <4 x i32> %val1, <4 x i32> %val2, ++ <4 x i32> ++ ret <4 x i32> %ret ++} ++ ++; Test a canonical v2i64 merge low. ++define <2 x i64> @f12(<2 x i64> %val1, <2 x i64> %val2) { ++; CHECK-LABEL: f12: ++; CHECK: vmrlg %v24, %v24, %v26 ++; CHECK: br %r14 ++ %ret = shufflevector <2 x i64> %val1, <2 x i64> %val2, ++ <2 x i32> ++ ret <2 x i64> %ret ++} ++ ++; Test a reversed v2i64 merge low. ++define <2 x i64> @f13(<2 x i64> %val1, <2 x i64> %val2) { ++; CHECK-LABEL: f13: ++; CHECK: vmrlg %v24, %v26, %v24 ++; CHECK: br %r14 ++ %ret = shufflevector <2 x i64> %val1, <2 x i64> %val2, ++ <2 x i32> ++ ret <2 x i64> %ret ++} ++ ++; Test a canonical v4f32 merge low. ++define <4 x float> @f14(<4 x float> %val1, <4 x float> %val2) { ++; CHECK-LABEL: f14: ++; CHECK: vmrlf %v24, %v24, %v26 ++; CHECK: br %r14 ++ %ret = shufflevector <4 x float> %val1, <4 x float> %val2, ++ <4 x i32> ++ ret <4 x float> %ret ++} ++ ++; Test a reversed v4f32 merge low. ++define <4 x float> @f15(<4 x float> %val1, <4 x float> %val2) { ++; CHECK-LABEL: f15: ++; CHECK: vmrlf %v24, %v26, %v24 ++; CHECK: br %r14 ++ %ret = shufflevector <4 x float> %val1, <4 x float> %val2, ++ <4 x i32> ++ ret <4 x float> %ret ++} ++ ++; Test a canonical v2f64 merge low. ++define <2 x double> @f16(<2 x double> %val1, <2 x double> %val2) { ++; CHECK-LABEL: f16: ++; CHECK: vmrlg %v24, %v24, %v26 ++; CHECK: br %r14 ++ %ret = shufflevector <2 x double> %val1, <2 x double> %val2, ++ <2 x i32> ++ ret <2 x double> %ret ++} ++ ++; Test a reversed v2f64 merge low. ++define <2 x double> @f17(<2 x double> %val1, <2 x double> %val2) { ++; CHECK-LABEL: f17: ++; CHECK: vmrlg %v24, %v26, %v24 ++; CHECK: br %r14 ++ %ret = shufflevector <2 x double> %val1, <2 x double> %val2, ++ <2 x i32> ++ ret <2 x double> %ret ++} +Index: llvm-36/test/CodeGen/SystemZ/vec-perm-06.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-perm-06.ll +@@ -0,0 +1,160 @@ ++; Test vector pack. ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ++ ++; Test a canonical v16i8 pack. ++define <16 x i8> @f1(<16 x i8> %val1, <16 x i8> %val2) { ++; CHECK-LABEL: f1: ++; CHECK: vpkh %v24, %v24, %v26 ++; CHECK: br %r14 ++ %ret = shufflevector <16 x i8> %val1, <16 x i8> %val2, ++ <16 x i32> ++ ret <16 x i8> %ret ++} ++ ++; Test a reversed v16i8 pack. ++define <16 x i8> @f2(<16 x i8> %val1, <16 x i8> %val2) { ++; CHECK-LABEL: f2: ++; CHECK: vpkh %v24, %v26, %v24 ++; CHECK: br %r14 ++ %ret = shufflevector <16 x i8> %val1, <16 x i8> %val2, ++ <16 x i32> ++ ret <16 x i8> %ret ++} ++ ++; Test a v16i8 pack with only the first operand being used. ++define <16 x i8> @f3(<16 x i8> %val1, <16 x i8> %val2) { ++; CHECK-LABEL: f3: ++; CHECK: vpkh %v24, %v24, %v24 ++; CHECK: br %r14 ++ %ret = shufflevector <16 x i8> %val1, <16 x i8> %val2, ++ <16 x i32> ++ ret <16 x i8> %ret ++} ++ ++; Test a v16i8 pack with only the second operand being used. ++; This is converted into @f3 by target-independent code. ++define <16 x i8> @f4(<16 x i8> %val1, <16 x i8> %val2) { ++; CHECK-LABEL: f4: ++; CHECK: vpkh %v24, %v26, %v26 ++; CHECK: br %r14 ++ %ret = shufflevector <16 x i8> %val1, <16 x i8> %val2, ++ <16 x i32> ++ ret <16 x i8> %ret ++} ++ ++; Test a v16i8 pack with both operands being the same. This too is ++; converted into @f3 by target-independent code. ++define <16 x i8> @f5(<16 x i8> %val) { ++; CHECK-LABEL: f5: ++; CHECK: vpkh %v24, %v24, %v24 ++; CHECK: br %r14 ++ %ret = shufflevector <16 x i8> %val, <16 x i8> %val, ++ <16 x i32> ++ ret <16 x i8> %ret ++} ++ ++; Test a v16i8 pack in which some of the indices are don't care. ++define <16 x i8> @f6(<16 x i8> %val1, <16 x i8> %val2) { ++; CHECK-LABEL: f6: ++; CHECK: vpkh %v24, %v24, %v26 ++; CHECK: br %r14 ++ %ret = shufflevector <16 x i8> %val1, <16 x i8> %val2, ++ <16 x i32> ++ ret <16 x i8> %ret ++} ++ ++; Test a v16i8 pack in which one of the operands is undefined and where ++; indices for that operand are "don't care". Target-independent code ++; converts the indices themselves into "undef"s. ++define <16 x i8> @f7(<16 x i8> %val) { ++; CHECK-LABEL: f7: ++; CHECK: vpkh %v24, %v24, %v24 ++; CHECK: br %r14 ++ %ret = shufflevector <16 x i8> undef, <16 x i8> %val, ++ <16 x i32> ++ ret <16 x i8> %ret ++} ++ ++; Test a canonical v8i16 pack. ++define <8 x i16> @f8(<8 x i16> %val1, <8 x i16> %val2) { ++; CHECK-LABEL: f8: ++; CHECK: vpkf %v24, %v24, %v26 ++; CHECK: br %r14 ++ %ret = shufflevector <8 x i16> %val1, <8 x i16> %val2, ++ <8 x i32> ++ ret <8 x i16> %ret ++} ++ ++; Test a reversed v8i16 pack. ++define <8 x i16> @f9(<8 x i16> %val1, <8 x i16> %val2) { ++; CHECK-LABEL: f9: ++; CHECK: vpkf %v24, %v26, %v24 ++; CHECK: br %r14 ++ %ret = shufflevector <8 x i16> %val1, <8 x i16> %val2, ++ <8 x i32> ++ ret <8 x i16> %ret ++} ++ ++; Test a canonical v4i32 pack. ++define <4 x i32> @f10(<4 x i32> %val1, <4 x i32> %val2) { ++; CHECK-LABEL: f10: ++; CHECK: vpkg %v24, %v24, %v26 ++; CHECK: br %r14 ++ %ret = shufflevector <4 x i32> %val1, <4 x i32> %val2, ++ <4 x i32> ++ ret <4 x i32> %ret ++} ++ ++; Test a reversed v4i32 pack. ++define <4 x i32> @f11(<4 x i32> %val1, <4 x i32> %val2) { ++; CHECK-LABEL: f11: ++; CHECK: vpkg %v24, %v26, %v24 ++; CHECK: br %r14 ++ %ret = shufflevector <4 x i32> %val1, <4 x i32> %val2, ++ <4 x i32> ++ ret <4 x i32> %ret ++} ++ ++; Test a canonical v4f32 pack. ++define <4 x float> @f12(<4 x float> %val1, <4 x float> %val2) { ++; CHECK-LABEL: f12: ++; CHECK: vpkg %v24, %v24, %v26 ++; CHECK: br %r14 ++ %ret = shufflevector <4 x float> %val1, <4 x float> %val2, ++ <4 x i32> ++ ret <4 x float> %ret ++} ++ ++; Test a reversed v4f32 pack. ++define <4 x float> @f13(<4 x float> %val1, <4 x float> %val2) { ++; CHECK-LABEL: f13: ++; CHECK: vpkg %v24, %v26, %v24 ++; CHECK: br %r14 ++ %ret = shufflevector <4 x float> %val1, <4 x float> %val2, ++ <4 x i32> ++ ret <4 x float> %ret ++} +Index: llvm-36/test/CodeGen/SystemZ/vec-perm-07.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-perm-07.ll +@@ -0,0 +1,145 @@ ++; Test vector shift left double immediate. ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ++ ++; Test a v16i8 shift with the lowest useful shift amount. ++define <16 x i8> @f1(<16 x i8> %val1, <16 x i8> %val2) { ++; CHECK-LABEL: f1: ++; CHECK: vsldb %v24, %v24, %v26, 1 ++; CHECK: br %r14 ++ %ret = shufflevector <16 x i8> %val1, <16 x i8> %val2, ++ <16 x i32> ++ ret <16 x i8> %ret ++} ++ ++; Test a v16i8 shift with the highest shift amount. ++define <16 x i8> @f2(<16 x i8> %val1, <16 x i8> %val2) { ++; CHECK-LABEL: f2: ++; CHECK: vsldb %v24, %v24, %v26, 15 ++; CHECK: br %r14 ++ %ret = shufflevector <16 x i8> %val1, <16 x i8> %val2, ++ <16 x i32> ++ ret <16 x i8> %ret ++} ++ ++; Test a v16i8 shift in which the operands need to be reversed. ++define <16 x i8> @f3(<16 x i8> %val1, <16 x i8> %val2) { ++; CHECK-LABEL: f3: ++; CHECK: vsldb %v24, %v26, %v24, 4 ++; CHECK: br %r14 ++ %ret = shufflevector <16 x i8> %val1, <16 x i8> %val2, ++ <16 x i32> ++ ret <16 x i8> %ret ++} ++ ++; Test a v16i8 shift in which the operands need to be duplicated. ++define <16 x i8> @f4(<16 x i8> %val) { ++; CHECK-LABEL: f4: ++; CHECK: vsldb %v24, %v24, %v24, 7 ++; CHECK: br %r14 ++ %ret = shufflevector <16 x i8> %val, <16 x i8> undef, ++ <16 x i32> ++ ret <16 x i8> %ret ++} ++ ++; Test a v16i8 shift in which some of the indices are undefs. ++define <16 x i8> @f5(<16 x i8> %val1, <16 x i8> %val2) { ++; CHECK-LABEL: f5: ++; CHECK: vsldb %v24, %v24, %v26, 11 ++; CHECK: br %r14 ++ %ret = shufflevector <16 x i8> %val1, <16 x i8> %val2, ++ <16 x i32> ++ ret <16 x i8> %ret ++} ++ ++; ...and again with reversed operands. ++define <16 x i8> @f6(<16 x i8> %val1, <16 x i8> %val2) { ++; CHECK-LABEL: f6: ++; CHECK: vsldb %v24, %v26, %v24, 13 ++; CHECK: br %r14 ++ %ret = shufflevector <16 x i8> %val1, <16 x i8> %val2, ++ <16 x i32> ++ ret <16 x i8> %ret ++} ++ ++; Test a v8i16 shift with the lowest useful shift amount. ++define <8 x i16> @f7(<8 x i16> %val1, <8 x i16> %val2) { ++; CHECK-LABEL: f7: ++; CHECK: vsldb %v24, %v24, %v26, 2 ++; CHECK: br %r14 ++ %ret = shufflevector <8 x i16> %val1, <8 x i16> %val2, ++ <8 x i32> ++ ret <8 x i16> %ret ++} ++ ++; Test a v8i16 shift with the highest useful shift amount. ++define <8 x i16> @f8(<8 x i16> %val1, <8 x i16> %val2) { ++; CHECK-LABEL: f8: ++; CHECK: vsldb %v24, %v24, %v26, 14 ++; CHECK: br %r14 ++ %ret = shufflevector <8 x i16> %val1, <8 x i16> %val2, ++ <8 x i32> ++ ret <8 x i16> %ret ++} ++ ++; Test a v4i32 shift with the lowest useful shift amount. ++define <4 x i32> @f9(<4 x i32> %val1, <4 x i32> %val2) { ++; CHECK-LABEL: f9: ++; CHECK: vsldb %v24, %v24, %v26, 4 ++; CHECK: br %r14 ++ %ret = shufflevector <4 x i32> %val1, <4 x i32> %val2, ++ <4 x i32> ++ ret <4 x i32> %ret ++} ++ ++; Test a v4i32 shift with the highest useful shift amount. ++define <4 x i32> @f10(<4 x i32> %val1, <4 x i32> %val2) { ++; CHECK-LABEL: f10: ++; CHECK: vsldb %v24, %v24, %v26, 12 ++; CHECK: br %r14 ++ %ret = shufflevector <4 x i32> %val1, <4 x i32> %val2, ++ <4 x i32> ++ ret <4 x i32> %ret ++} ++ ++; Test a v4f32 shift with the lowest useful shift amount. ++define <4 x float> @f12(<4 x float> %val1, <4 x float> %val2) { ++; CHECK-LABEL: f12: ++; CHECK: vsldb %v24, %v24, %v26, 4 ++; CHECK: br %r14 ++ %ret = shufflevector <4 x float> %val1, <4 x float> %val2, ++ <4 x i32> ++ ret <4 x float> %ret ++} ++ ++; Test a v4f32 shift with the highest useful shift amount. ++define <4 x float> @f13(<4 x float> %val1, <4 x float> %val2) { ++; CHECK-LABEL: f13: ++; CHECK: vsldb %v24, %v24, %v26, 12 ++; CHECK: br %r14 ++ %ret = shufflevector <4 x float> %val1, <4 x float> %val2, ++ <4 x i32> ++ ret <4 x float> %ret ++} ++ ++; We use VPDI for v2i64 shuffles. +Index: llvm-36/test/CodeGen/SystemZ/vec-perm-08.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-perm-08.ll +@@ -0,0 +1,170 @@ ++; Test vector permutes using VPDI. ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ++ ++; Test a high1/low2 permute for v16i8. ++define <16 x i8> @f1(<16 x i8> %val1, <16 x i8> %val2) { ++; CHECK-LABEL: f1: ++; CHECK: vpdi %v24, %v24, %v26, 1 ++; CHECK: br %r14 ++ %ret = shufflevector <16 x i8> %val1, <16 x i8> %val2, ++ <16 x i32> ++ ret <16 x i8> %ret ++} ++ ++; Test a low2/high1 permute for v16i8. ++define <16 x i8> @f2(<16 x i8> %val1, <16 x i8> %val2) { ++; CHECK-LABEL: f2: ++; CHECK: vpdi %v24, %v26, %v24, 4 ++; CHECK: br %r14 ++ %ret = shufflevector <16 x i8> %val1, <16 x i8> %val2, ++ <16 x i32> ++ ret <16 x i8> %ret ++} ++ ++; Test a low1/high2 permute for v16i8. ++define <16 x i8> @f3(<16 x i8> %val1, <16 x i8> %val2) { ++; CHECK-LABEL: f3: ++; CHECK: vpdi %v24, %v24, %v26, 4 ++; CHECK: br %r14 ++ %ret = shufflevector <16 x i8> %val1, <16 x i8> %val2, ++ <16 x i32> ++ ret <16 x i8> %ret ++} ++ ++; Test a high2/low1 permute for v16i8. ++define <16 x i8> @f4(<16 x i8> %val1, <16 x i8> %val2) { ++; CHECK-LABEL: f4: ++; CHECK: vpdi %v24, %v26, %v24, 1 ++; CHECK: br %r14 ++ %ret = shufflevector <16 x i8> %val1, <16 x i8> %val2, ++ <16 x i32> ++ ret <16 x i8> %ret ++} ++ ++; Test reversing two doublewords in a v16i8. ++define <16 x i8> @f5(<16 x i8> %val) { ++; CHECK-LABEL: f5: ++; CHECK: vpdi %v24, %v24, %v24, 4 ++; CHECK: br %r14 ++ %ret = shufflevector <16 x i8> %val, <16 x i8> undef, ++ <16 x i32> ++ ret <16 x i8> %ret ++} ++ ++; Test a high1/low2 permute for v8i16. ++define <8 x i16> @f6(<8 x i16> %val1, <8 x i16> %val2) { ++; CHECK-LABEL: f6: ++; CHECK: vpdi %v24, %v24, %v26, 1 ++; CHECK: br %r14 ++ %ret = shufflevector <8 x i16> %val1, <8 x i16> %val2, ++ <8 x i32> ++ ret <8 x i16> %ret ++} ++ ++; Test a low2/high1 permute for v8i16. ++define <8 x i16> @f7(<8 x i16> %val1, <8 x i16> %val2) { ++; CHECK-LABEL: f7: ++; CHECK: vpdi %v24, %v26, %v24, 4 ++; CHECK: br %r14 ++ %ret = shufflevector <8 x i16> %val1, <8 x i16> %val2, ++ <8 x i32> ++ ret <8 x i16> %ret ++} ++ ++; Test a high1/low2 permute for v4i32. ++define <4 x i32> @f8(<4 x i32> %val1, <4 x i32> %val2) { ++; CHECK-LABEL: f8: ++; CHECK: vpdi %v24, %v24, %v26, 1 ++; CHECK: br %r14 ++ %ret = shufflevector <4 x i32> %val1, <4 x i32> %val2, ++ <4 x i32> ++ ret <4 x i32> %ret ++} ++ ++; Test a low2/high1 permute for v4i32. ++define <4 x i32> @f9(<4 x i32> %val1, <4 x i32> %val2) { ++; CHECK-LABEL: f9: ++; CHECK: vpdi %v24, %v26, %v24, 4 ++; CHECK: br %r14 ++ %ret = shufflevector <4 x i32> %val1, <4 x i32> %val2, ++ <4 x i32> ++ ret <4 x i32> %ret ++} ++ ++; Test a high1/low2 permute for v2i64. ++define <2 x i64> @f10(<2 x i64> %val1, <2 x i64> %val2) { ++; CHECK-LABEL: f10: ++; CHECK: vpdi %v24, %v24, %v26, 1 ++; CHECK: br %r14 ++ %ret = shufflevector <2 x i64> %val1, <2 x i64> %val2, ++ <2 x i32> ++ ret <2 x i64> %ret ++} ++ ++; Test low2/high1 permute for v2i64. ++define <2 x i64> @f11(<2 x i64> %val1, <2 x i64> %val2) { ++; CHECK-LABEL: f11: ++; CHECK: vpdi %v24, %v26, %v24, 4 ++; CHECK: br %r14 ++ %ret = shufflevector <2 x i64> %val1, <2 x i64> %val2, ++ <2 x i32> ++ ret <2 x i64> %ret ++} ++ ++; Test a high1/low2 permute for v4f32. ++define <4 x float> @f12(<4 x float> %val1, <4 x float> %val2) { ++; CHECK-LABEL: f12: ++; CHECK: vpdi %v24, %v24, %v26, 1 ++; CHECK: br %r14 ++ %ret = shufflevector <4 x float> %val1, <4 x float> %val2, ++ <4 x i32> ++ ret <4 x float> %ret ++} ++ ++; Test a low2/high1 permute for v4f32. ++define <4 x float> @f13(<4 x float> %val1, <4 x float> %val2) { ++; CHECK-LABEL: f13: ++; CHECK: vpdi %v24, %v26, %v24, 4 ++; CHECK: br %r14 ++ %ret = shufflevector <4 x float> %val1, <4 x float> %val2, ++ <4 x i32> ++ ret <4 x float> %ret ++} ++ ++; Test a high1/low2 permute for v2f64. ++define <2 x double> @f14(<2 x double> %val1, <2 x double> %val2) { ++; CHECK-LABEL: f14: ++; CHECK: vpdi %v24, %v24, %v26, 1 ++; CHECK: br %r14 ++ %ret = shufflevector <2 x double> %val1, <2 x double> %val2, ++ <2 x i32> ++ ret <2 x double> %ret ++} ++ ++; Test a low2/high1 permute for v2f64. ++define <2 x double> @f15(<2 x double> %val1, <2 x double> %val2) { ++; CHECK-LABEL: f15: ++; CHECK: vpdi %v24, %v26, %v24, 4 ++; CHECK: br %r14 ++ %ret = shufflevector <2 x double> %val1, <2 x double> %val2, ++ <2 x i32> ++ ret <2 x double> %ret ++} +Index: llvm-36/test/CodeGen/SystemZ/vec-perm-09.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-perm-09.ll +@@ -0,0 +1,38 @@ ++; Test general vector permute of a v16i8. ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | \ ++; RUN: FileCheck -check-prefix=CHECK-CODE %s ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | \ ++; RUN: FileCheck -check-prefix=CHECK-VECTOR %s ++ ++define <16 x i8> @f1(<16 x i8> %val1, <16 x i8> %val2) { ++; CHECK-CODE-LABEL: f1: ++; CHECK-CODE: larl [[REG:%r[0-5]]], ++; CHECK-CODE: vl [[MASK:%v[0-9]+]], 0([[REG]]) ++; CHECK-CODE: vperm %v24, %v24, %v26, [[MASK]] ++; CHECK-CODE: br %r14 ++; ++; CHECK-VECTOR: .byte 1 ++; CHECK-VECTOR-NEXT: .byte 19 ++; CHECK-VECTOR-NEXT: .byte 6 ++; CHECK-VECTOR-NEXT: .byte 5 ++; CHECK-VECTOR-NEXT: .byte 20 ++; CHECK-VECTOR-NEXT: .byte 22 ++; CHECK-VECTOR-NEXT: .byte 1 ++; CHECK-VECTOR-NEXT: .byte 1 ++; CHECK-VECTOR-NEXT: .byte 25 ++; CHECK-VECTOR-NEXT: .byte 29 ++; CHECK-VECTOR-NEXT: .byte 11 ++; Any byte would be OK here ++; CHECK-VECTOR-NEXT: .space 1 ++; CHECK-VECTOR-NEXT: .byte 31 ++; CHECK-VECTOR-NEXT: .byte 4 ++; CHECK-VECTOR-NEXT: .byte 15 ++; CHECK-VECTOR-NEXT: .byte 19 ++ %ret = shufflevector <16 x i8> %val1, <16 x i8> %val2, ++ <16 x i32> ++ ret <16 x i8> %ret ++} +Index: llvm-36/test/CodeGen/SystemZ/vec-perm-10.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-perm-10.ll +@@ -0,0 +1,36 @@ ++; Test general vector permute of a v8i16. ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | \ ++; RUN: FileCheck -check-prefix=CHECK-CODE %s ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | \ ++; RUN: FileCheck -check-prefix=CHECK-VECTOR %s ++ ++define <8 x i16> @f1(<8 x i16> %val1, <8 x i16> %val2) { ++; CHECK-CODE-LABEL: f1: ++; CHECK-CODE: larl [[REG:%r[0-5]]], ++; CHECK-CODE: vl [[MASK:%v[0-9]+]], 0([[REG]]) ++; CHECK-CODE: vperm %v24, %v26, %v24, [[MASK]] ++; CHECK-CODE: br %r14 ++; ++; CHECK-VECTOR: .byte 0 ++; CHECK-VECTOR-NEXT: .byte 1 ++; CHECK-VECTOR-NEXT: .byte 26 ++; CHECK-VECTOR-NEXT: .byte 27 ++; Any 2 bytes would be OK here ++; CHECK-VECTOR-NEXT: .space 1 ++; CHECK-VECTOR-NEXT: .space 1 ++; CHECK-VECTOR-NEXT: .byte 28 ++; CHECK-VECTOR-NEXT: .byte 29 ++; CHECK-VECTOR-NEXT: .byte 6 ++; CHECK-VECTOR-NEXT: .byte 7 ++; CHECK-VECTOR-NEXT: .byte 14 ++; CHECK-VECTOR-NEXT: .byte 15 ++; CHECK-VECTOR-NEXT: .byte 8 ++; CHECK-VECTOR-NEXT: .byte 9 ++; CHECK-VECTOR-NEXT: .byte 16 ++; CHECK-VECTOR-NEXT: .byte 17 ++ %ret = shufflevector <8 x i16> %val1, <8 x i16> %val2, ++ <8 x i32> ++ ret <8 x i16> %ret ++} +Index: llvm-36/test/CodeGen/SystemZ/vec-perm-11.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-perm-11.ll +@@ -0,0 +1,35 @@ ++; Test general vector permute of a v4i32. ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | \ ++; RUN: FileCheck -check-prefix=CHECK-CODE %s ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | \ ++; RUN: FileCheck -check-prefix=CHECK-VECTOR %s ++ ++define <4 x i32> @f1(<4 x i32> %val1, <4 x i32> %val2) { ++; CHECK-CODE-LABEL: f1: ++; CHECK-CODE: larl [[REG:%r[0-5]]], ++; CHECK-CODE: vl [[MASK:%v[0-9]+]], 0([[REG]]) ++; CHECK-CODE: vperm %v24, %v26, %v24, [[MASK]] ++; CHECK-CODE: br %r14 ++; ++; CHECK-VECTOR: .byte 4 ++; CHECK-VECTOR-NEXT: .byte 5 ++; CHECK-VECTOR-NEXT: .byte 6 ++; CHECK-VECTOR-NEXT: .byte 7 ++; CHECK-VECTOR-NEXT: .byte 20 ++; CHECK-VECTOR-NEXT: .byte 21 ++; CHECK-VECTOR-NEXT: .byte 22 ++; CHECK-VECTOR-NEXT: .byte 23 ++; Any 4 bytes would be OK here ++; CHECK-VECTOR-NEXT: .space 1 ++; CHECK-VECTOR-NEXT: .space 1 ++; CHECK-VECTOR-NEXT: .space 1 ++; CHECK-VECTOR-NEXT: .space 1 ++; CHECK-VECTOR-NEXT: .byte 12 ++; CHECK-VECTOR-NEXT: .byte 13 ++; CHECK-VECTOR-NEXT: .byte 14 ++; CHECK-VECTOR-NEXT: .byte 15 ++ %ret = shufflevector <4 x i32> %val1, <4 x i32> %val2, ++ <4 x i32> ++ ret <4 x i32> %ret ++} +Index: llvm-36/test/CodeGen/SystemZ/vec-round-01.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-round-01.ll +@@ -0,0 +1,118 @@ ++; Test v2f64 rounding. ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ++ ++declare double @llvm.rint.f64(double) ++declare double @llvm.nearbyint.f64(double) ++declare double @llvm.floor.f64(double) ++declare double @llvm.ceil.f64(double) ++declare double @llvm.trunc.f64(double) ++declare double @llvm.round.f64(double) ++declare <2 x double> @llvm.rint.v2f64(<2 x double>) ++declare <2 x double> @llvm.nearbyint.v2f64(<2 x double>) ++declare <2 x double> @llvm.floor.v2f64(<2 x double>) ++declare <2 x double> @llvm.ceil.v2f64(<2 x double>) ++declare <2 x double> @llvm.trunc.v2f64(<2 x double>) ++declare <2 x double> @llvm.round.v2f64(<2 x double>) ++ ++define <2 x double> @f1(<2 x double> %val) { ++; CHECK-LABEL: f1: ++; CHECK: vfidb %v24, %v24, 0, 0 ++; CHECK: br %r14 ++ %res = call <2 x double> @llvm.rint.v2f64(<2 x double> %val) ++ ret <2 x double> %res ++} ++ ++define <2 x double> @f2(<2 x double> %val) { ++; CHECK-LABEL: f2: ++; CHECK: vfidb %v24, %v24, 4, 0 ++; CHECK: br %r14 ++ %res = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> %val) ++ ret <2 x double> %res ++} ++ ++define <2 x double> @f3(<2 x double> %val) { ++; CHECK-LABEL: f3: ++; CHECK: vfidb %v24, %v24, 4, 7 ++; CHECK: br %r14 ++ %res = call <2 x double> @llvm.floor.v2f64(<2 x double> %val) ++ ret <2 x double> %res ++} ++ ++define <2 x double> @f4(<2 x double> %val) { ++; CHECK-LABEL: f4: ++; CHECK: vfidb %v24, %v24, 4, 6 ++; CHECK: br %r14 ++ %res = call <2 x double> @llvm.ceil.v2f64(<2 x double> %val) ++ ret <2 x double> %res ++} ++ ++define <2 x double> @f5(<2 x double> %val) { ++; CHECK-LABEL: f5: ++; CHECK: vfidb %v24, %v24, 4, 5 ++; CHECK: br %r14 ++ %res = call <2 x double> @llvm.trunc.v2f64(<2 x double> %val) ++ ret <2 x double> %res ++} ++ ++define <2 x double> @f6(<2 x double> %val) { ++; CHECK-LABEL: f6: ++; CHECK: vfidb %v24, %v24, 4, 1 ++; CHECK: br %r14 ++ %res = call <2 x double> @llvm.round.v2f64(<2 x double> %val) ++ ret <2 x double> %res ++} ++ ++define double @f7(<2 x double> %val) { ++; CHECK-LABEL: f7: ++; CHECK: wfidb %f0, %v24, 0, 0 ++; CHECK: br %r14 ++ %scalar = extractelement <2 x double> %val, i32 0 ++ %res = call double @llvm.rint.f64(double %scalar) ++ ret double %res ++} ++ ++define double @f8(<2 x double> %val) { ++; CHECK-LABEL: f8: ++; CHECK: wfidb %f0, %v24, 4, 0 ++; CHECK: br %r14 ++ %scalar = extractelement <2 x double> %val, i32 0 ++ %res = call double @llvm.nearbyint.f64(double %scalar) ++ ret double %res ++} ++ ++define double @f9(<2 x double> %val) { ++; CHECK-LABEL: f9: ++; CHECK: wfidb %f0, %v24, 4, 7 ++; CHECK: br %r14 ++ %scalar = extractelement <2 x double> %val, i32 0 ++ %res = call double @llvm.floor.f64(double %scalar) ++ ret double %res ++} ++ ++define double @f10(<2 x double> %val) { ++; CHECK-LABEL: f10: ++; CHECK: wfidb %f0, %v24, 4, 6 ++; CHECK: br %r14 ++ %scalar = extractelement <2 x double> %val, i32 0 ++ %res = call double @llvm.ceil.f64(double %scalar) ++ ret double %res ++} ++ ++define double @f11(<2 x double> %val) { ++; CHECK-LABEL: f11: ++; CHECK: wfidb %f0, %v24, 4, 5 ++; CHECK: br %r14 ++ %scalar = extractelement <2 x double> %val, i32 0 ++ %res = call double @llvm.trunc.f64(double %scalar) ++ ret double %res ++} ++ ++define double @f12(<2 x double> %val) { ++; CHECK-LABEL: f12: ++; CHECK: wfidb %f0, %v24, 4, 1 ++; CHECK: br %r14 ++ %scalar = extractelement <2 x double> %val, i32 0 ++ %res = call double @llvm.round.f64(double %scalar) ++ ret double %res ++} +Index: llvm-36/test/CodeGen/SystemZ/vec-shift-01.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-shift-01.ll +@@ -0,0 +1,39 @@ ++; Test vector shift left with vector shift amount. ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ++ ++; Test a v16i8 shift. ++define <16 x i8> @f1(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) { ++; CHECK-LABEL: f1: ++; CHECK: veslvb %v24, %v26, %v28 ++; CHECK: br %r14 ++ %ret = shl <16 x i8> %val1, %val2 ++ ret <16 x i8> %ret ++} ++ ++; Test a v8i16 shift. ++define <8 x i16> @f2(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) { ++; CHECK-LABEL: f2: ++; CHECK: veslvh %v24, %v26, %v28 ++; CHECK: br %r14 ++ %ret = shl <8 x i16> %val1, %val2 ++ ret <8 x i16> %ret ++} ++ ++; Test a v4i32 shift. ++define <4 x i32> @f3(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) { ++; CHECK-LABEL: f3: ++; CHECK: veslvf %v24, %v26, %v28 ++; CHECK: br %r14 ++ %ret = shl <4 x i32> %val1, %val2 ++ ret <4 x i32> %ret ++} ++ ++; Test a v2i64 shift. ++define <2 x i64> @f4(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) { ++; CHECK-LABEL: f4: ++; CHECK: veslvg %v24, %v26, %v28 ++; CHECK: br %r14 ++ %ret = shl <2 x i64> %val1, %val2 ++ ret <2 x i64> %ret ++} +Index: llvm-36/test/CodeGen/SystemZ/vec-shift-02.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-shift-02.ll +@@ -0,0 +1,39 @@ ++; Test vector arithmetic shift right with vector shift amount. ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ++ ++; Test a v16i8 shift. ++define <16 x i8> @f1(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) { ++; CHECK-LABEL: f1: ++; CHECK: vesravb %v24, %v26, %v28 ++; CHECK: br %r14 ++ %ret = ashr <16 x i8> %val1, %val2 ++ ret <16 x i8> %ret ++} ++ ++; Test a v8i16 shift. ++define <8 x i16> @f2(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) { ++; CHECK-LABEL: f2: ++; CHECK: vesravh %v24, %v26, %v28 ++; CHECK: br %r14 ++ %ret = ashr <8 x i16> %val1, %val2 ++ ret <8 x i16> %ret ++} ++ ++; Test a v4i32 shift. ++define <4 x i32> @f3(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) { ++; CHECK-LABEL: f3: ++; CHECK: vesravf %v24, %v26, %v28 ++; CHECK: br %r14 ++ %ret = ashr <4 x i32> %val1, %val2 ++ ret <4 x i32> %ret ++} ++ ++; Test a v2i64 shift. ++define <2 x i64> @f4(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) { ++; CHECK-LABEL: f4: ++; CHECK: vesravg %v24, %v26, %v28 ++; CHECK: br %r14 ++ %ret = ashr <2 x i64> %val1, %val2 ++ ret <2 x i64> %ret ++} +Index: llvm-36/test/CodeGen/SystemZ/vec-shift-03.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-shift-03.ll +@@ -0,0 +1,39 @@ ++; Test vector logical shift right with vector shift amount. ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ++ ++; Test a v16i8 shift. ++define <16 x i8> @f1(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) { ++; CHECK-LABEL: f1: ++; CHECK: vesrlvb %v24, %v26, %v28 ++; CHECK: br %r14 ++ %ret = lshr <16 x i8> %val1, %val2 ++ ret <16 x i8> %ret ++} ++ ++; Test a v8i16 shift. ++define <8 x i16> @f2(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) { ++; CHECK-LABEL: f2: ++; CHECK: vesrlvh %v24, %v26, %v28 ++; CHECK: br %r14 ++ %ret = lshr <8 x i16> %val1, %val2 ++ ret <8 x i16> %ret ++} ++ ++; Test a v4i32 shift. ++define <4 x i32> @f3(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) { ++; CHECK-LABEL: f3: ++; CHECK: vesrlvf %v24, %v26, %v28 ++; CHECK: br %r14 ++ %ret = lshr <4 x i32> %val1, %val2 ++ ret <4 x i32> %ret ++} ++ ++; Test a v2i64 shift. ++define <2 x i64> @f4(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) { ++; CHECK-LABEL: f4: ++; CHECK: vesrlvg %v24, %v26, %v28 ++; CHECK: br %r14 ++ %ret = lshr <2 x i64> %val1, %val2 ++ ret <2 x i64> %ret ++} +Index: llvm-36/test/CodeGen/SystemZ/vec-shift-04.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-shift-04.ll +@@ -0,0 +1,134 @@ ++; Test vector shift left with scalar shift amount. ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ++ ++; Test a v16i8 shift by a variable. ++define <16 x i8> @f1(<16 x i8> %dummy, <16 x i8> %val1, i32 %shift) { ++; CHECK-LABEL: f1: ++; CHECK: veslb %v24, %v26, 0(%r2) ++; CHECK: br %r14 ++ %truncshift = trunc i32 %shift to i8 ++ %shiftvec = insertelement <16 x i8> undef, i8 %truncshift, i32 0 ++ %val2 = shufflevector <16 x i8> %shiftvec, <16 x i8> undef, ++ <16 x i32> zeroinitializer ++ %ret = shl <16 x i8> %val1, %val2 ++ ret <16 x i8> %ret ++} ++ ++; Test a v16i8 shift by the lowest useful constant. ++define <16 x i8> @f2(<16 x i8> %dummy, <16 x i8> %val) { ++; CHECK-LABEL: f2: ++; CHECK: veslb %v24, %v26, 1 ++; CHECK: br %r14 ++ %ret = shl <16 x i8> %val, ++ ret <16 x i8> %ret ++} ++ ++; Test a v16i8 shift by the highest useful constant. ++define <16 x i8> @f3(<16 x i8> %dummy, <16 x i8> %val) { ++; CHECK-LABEL: f3: ++; CHECK: veslb %v24, %v26, 7 ++; CHECK: br %r14 ++ %ret = shl <16 x i8> %val, ++ ret <16 x i8> %ret ++} ++ ++; Test a v8i16 shift by a variable. ++define <8 x i16> @f4(<8 x i16> %dummy, <8 x i16> %val1, i32 %shift) { ++; CHECK-LABEL: f4: ++; CHECK: veslh %v24, %v26, 0(%r2) ++; CHECK: br %r14 ++ %truncshift = trunc i32 %shift to i16 ++ %shiftvec = insertelement <8 x i16> undef, i16 %truncshift, i32 0 ++ %val2 = shufflevector <8 x i16> %shiftvec, <8 x i16> undef, ++ <8 x i32> zeroinitializer ++ %ret = shl <8 x i16> %val1, %val2 ++ ret <8 x i16> %ret ++} ++ ++; Test a v8i16 shift by the lowest useful constant. ++define <8 x i16> @f5(<8 x i16> %dummy, <8 x i16> %val) { ++; CHECK-LABEL: f5: ++; CHECK: veslh %v24, %v26, 1 ++; CHECK: br %r14 ++ %ret = shl <8 x i16> %val, ++ ret <8 x i16> %ret ++} ++ ++; Test a v8i16 shift by the highest useful constant. ++define <8 x i16> @f6(<8 x i16> %dummy, <8 x i16> %val) { ++; CHECK-LABEL: f6: ++; CHECK: veslh %v24, %v26, 15 ++; CHECK: br %r14 ++ %ret = shl <8 x i16> %val, ++ ret <8 x i16> %ret ++} ++ ++; Test a v4i32 shift by a variable. ++define <4 x i32> @f7(<4 x i32> %dummy, <4 x i32> %val1, i32 %shift) { ++; CHECK-LABEL: f7: ++; CHECK: veslf %v24, %v26, 0(%r2) ++; CHECK: br %r14 ++ %shiftvec = insertelement <4 x i32> undef, i32 %shift, i32 0 ++ %val2 = shufflevector <4 x i32> %shiftvec, <4 x i32> undef, ++ <4 x i32> zeroinitializer ++ %ret = shl <4 x i32> %val1, %val2 ++ ret <4 x i32> %ret ++} ++ ++; Test a v4i32 shift by the lowest useful constant. ++define <4 x i32> @f8(<4 x i32> %dummy, <4 x i32> %val) { ++; CHECK-LABEL: f8: ++; CHECK: veslf %v24, %v26, 1 ++; CHECK: br %r14 ++ %ret = shl <4 x i32> %val, ++ ret <4 x i32> %ret ++} ++ ++; Test a v4i32 shift by the highest useful constant. ++define <4 x i32> @f9(<4 x i32> %dummy, <4 x i32> %val) { ++; CHECK-LABEL: f9: ++; CHECK: veslf %v24, %v26, 31 ++; CHECK: br %r14 ++ %ret = shl <4 x i32> %val, ++ ret <4 x i32> %ret ++} ++ ++; Test a v2i64 shift by a variable. ++define <2 x i64> @f10(<2 x i64> %dummy, <2 x i64> %val1, i32 %shift) { ++; CHECK-LABEL: f10: ++; CHECK: veslg %v24, %v26, 0(%r2) ++; CHECK: br %r14 ++ %extshift = sext i32 %shift to i64 ++ %shiftvec = insertelement <2 x i64> undef, i64 %extshift, i32 0 ++ %val2 = shufflevector <2 x i64> %shiftvec, <2 x i64> undef, ++ <2 x i32> zeroinitializer ++ %ret = shl <2 x i64> %val1, %val2 ++ ret <2 x i64> %ret ++} ++ ++; Test a v2i64 shift by the lowest useful constant. ++define <2 x i64> @f11(<2 x i64> %dummy, <2 x i64> %val) { ++; CHECK-LABEL: f11: ++; CHECK: veslg %v24, %v26, 1 ++; CHECK: br %r14 ++ %ret = shl <2 x i64> %val, ++ ret <2 x i64> %ret ++} ++ ++; Test a v2i64 shift by the highest useful constant. ++define <2 x i64> @f12(<2 x i64> %dummy, <2 x i64> %val) { ++; CHECK-LABEL: f12: ++; CHECK: veslg %v24, %v26, 63 ++; CHECK: br %r14 ++ %ret = shl <2 x i64> %val, ++ ret <2 x i64> %ret ++} +Index: llvm-36/test/CodeGen/SystemZ/vec-shift-05.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-shift-05.ll +@@ -0,0 +1,134 @@ ++; Test vector arithmetic shift right with scalar shift amount. ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ++ ++; Test a v16i8 shift by a variable. ++define <16 x i8> @f1(<16 x i8> %dummy, <16 x i8> %val1, i32 %shift) { ++; CHECK-LABEL: f1: ++; CHECK: vesrab %v24, %v26, 0(%r2) ++; CHECK: br %r14 ++ %truncshift = trunc i32 %shift to i8 ++ %shiftvec = insertelement <16 x i8> undef, i8 %truncshift, i32 0 ++ %val2 = shufflevector <16 x i8> %shiftvec, <16 x i8> undef, ++ <16 x i32> zeroinitializer ++ %ret = ashr <16 x i8> %val1, %val2 ++ ret <16 x i8> %ret ++} ++ ++; Test a v16i8 shift by the lowest useful constant. ++define <16 x i8> @f2(<16 x i8> %dummy, <16 x i8> %val) { ++; CHECK-LABEL: f2: ++; CHECK: vesrab %v24, %v26, 1 ++; CHECK: br %r14 ++ %ret = ashr <16 x i8> %val, ++ ret <16 x i8> %ret ++} ++ ++; Test a v16i8 shift by the highest useful constant. ++define <16 x i8> @f3(<16 x i8> %dummy, <16 x i8> %val) { ++; CHECK-LABEL: f3: ++; CHECK: vesrab %v24, %v26, 7 ++; CHECK: br %r14 ++ %ret = ashr <16 x i8> %val, ++ ret <16 x i8> %ret ++} ++ ++; Test a v8i16 shift by a variable. ++define <8 x i16> @f4(<8 x i16> %dummy, <8 x i16> %val1, i32 %shift) { ++; CHECK-LABEL: f4: ++; CHECK: vesrah %v24, %v26, 0(%r2) ++; CHECK: br %r14 ++ %truncshift = trunc i32 %shift to i16 ++ %shiftvec = insertelement <8 x i16> undef, i16 %truncshift, i32 0 ++ %val2 = shufflevector <8 x i16> %shiftvec, <8 x i16> undef, ++ <8 x i32> zeroinitializer ++ %ret = ashr <8 x i16> %val1, %val2 ++ ret <8 x i16> %ret ++} ++ ++; Test a v8i16 shift by the lowest useful constant. ++define <8 x i16> @f5(<8 x i16> %dummy, <8 x i16> %val) { ++; CHECK-LABEL: f5: ++; CHECK: vesrah %v24, %v26, 1 ++; CHECK: br %r14 ++ %ret = ashr <8 x i16> %val, ++ ret <8 x i16> %ret ++} ++ ++; Test a v8i16 shift by the highest useful constant. ++define <8 x i16> @f6(<8 x i16> %dummy, <8 x i16> %val) { ++; CHECK-LABEL: f6: ++; CHECK: vesrah %v24, %v26, 15 ++; CHECK: br %r14 ++ %ret = ashr <8 x i16> %val, ++ ret <8 x i16> %ret ++} ++ ++; Test a v4i32 shift by a variable. ++define <4 x i32> @f7(<4 x i32> %dummy, <4 x i32> %val1, i32 %shift) { ++; CHECK-LABEL: f7: ++; CHECK: vesraf %v24, %v26, 0(%r2) ++; CHECK: br %r14 ++ %shiftvec = insertelement <4 x i32> undef, i32 %shift, i32 0 ++ %val2 = shufflevector <4 x i32> %shiftvec, <4 x i32> undef, ++ <4 x i32> zeroinitializer ++ %ret = ashr <4 x i32> %val1, %val2 ++ ret <4 x i32> %ret ++} ++ ++; Test a v4i32 shift by the lowest useful constant. ++define <4 x i32> @f8(<4 x i32> %dummy, <4 x i32> %val) { ++; CHECK-LABEL: f8: ++; CHECK: vesraf %v24, %v26, 1 ++; CHECK: br %r14 ++ %ret = ashr <4 x i32> %val, ++ ret <4 x i32> %ret ++} ++ ++; Test a v4i32 shift by the highest useful constant. ++define <4 x i32> @f9(<4 x i32> %dummy, <4 x i32> %val) { ++; CHECK-LABEL: f9: ++; CHECK: vesraf %v24, %v26, 31 ++; CHECK: br %r14 ++ %ret = ashr <4 x i32> %val, ++ ret <4 x i32> %ret ++} ++ ++; Test a v2i64 shift by a variable. ++define <2 x i64> @f10(<2 x i64> %dummy, <2 x i64> %val1, i32 %shift) { ++; CHECK-LABEL: f10: ++; CHECK: vesrag %v24, %v26, 0(%r2) ++; CHECK: br %r14 ++ %extshift = sext i32 %shift to i64 ++ %shiftvec = insertelement <2 x i64> undef, i64 %extshift, i32 0 ++ %val2 = shufflevector <2 x i64> %shiftvec, <2 x i64> undef, ++ <2 x i32> zeroinitializer ++ %ret = ashr <2 x i64> %val1, %val2 ++ ret <2 x i64> %ret ++} ++ ++; Test a v2i64 shift by the lowest useful constant. ++define <2 x i64> @f11(<2 x i64> %dummy, <2 x i64> %val) { ++; CHECK-LABEL: f11: ++; CHECK: vesrag %v24, %v26, 1 ++; CHECK: br %r14 ++ %ret = ashr <2 x i64> %val, ++ ret <2 x i64> %ret ++} ++ ++; Test a v2i64 shift by the highest useful constant. ++define <2 x i64> @f12(<2 x i64> %dummy, <2 x i64> %val) { ++; CHECK-LABEL: f12: ++; CHECK: vesrag %v24, %v26, 63 ++; CHECK: br %r14 ++ %ret = ashr <2 x i64> %val, ++ ret <2 x i64> %ret ++} +Index: llvm-36/test/CodeGen/SystemZ/vec-shift-06.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-shift-06.ll +@@ -0,0 +1,134 @@ ++; Test vector logical shift right with scalar shift amount. ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ++ ++; Test a v16i8 shift by a variable. ++define <16 x i8> @f1(<16 x i8> %dummy, <16 x i8> %val1, i32 %shift) { ++; CHECK-LABEL: f1: ++; CHECK: vesrlb %v24, %v26, 0(%r2) ++; CHECK: br %r14 ++ %truncshift = trunc i32 %shift to i8 ++ %shiftvec = insertelement <16 x i8> undef, i8 %truncshift, i32 0 ++ %val2 = shufflevector <16 x i8> %shiftvec, <16 x i8> undef, ++ <16 x i32> zeroinitializer ++ %ret = lshr <16 x i8> %val1, %val2 ++ ret <16 x i8> %ret ++} ++ ++; Test a v16i8 shift by the lowest useful constant. ++define <16 x i8> @f2(<16 x i8> %dummy, <16 x i8> %val) { ++; CHECK-LABEL: f2: ++; CHECK: vesrlb %v24, %v26, 1 ++; CHECK: br %r14 ++ %ret = lshr <16 x i8> %val, ++ ret <16 x i8> %ret ++} ++ ++; Test a v16i8 shift by the highest useful constant. ++define <16 x i8> @f3(<16 x i8> %dummy, <16 x i8> %val) { ++; CHECK-LABEL: f3: ++; CHECK: vesrlb %v24, %v26, 7 ++; CHECK: br %r14 ++ %ret = lshr <16 x i8> %val, ++ ret <16 x i8> %ret ++} ++ ++; Test a v8i16 shift by a variable. ++define <8 x i16> @f4(<8 x i16> %dummy, <8 x i16> %val1, i32 %shift) { ++; CHECK-LABEL: f4: ++; CHECK: vesrlh %v24, %v26, 0(%r2) ++; CHECK: br %r14 ++ %truncshift = trunc i32 %shift to i16 ++ %shiftvec = insertelement <8 x i16> undef, i16 %truncshift, i32 0 ++ %val2 = shufflevector <8 x i16> %shiftvec, <8 x i16> undef, ++ <8 x i32> zeroinitializer ++ %ret = lshr <8 x i16> %val1, %val2 ++ ret <8 x i16> %ret ++} ++ ++; Test a v8i16 shift by the lowest useful constant. ++define <8 x i16> @f5(<8 x i16> %dummy, <8 x i16> %val) { ++; CHECK-LABEL: f5: ++; CHECK: vesrlh %v24, %v26, 1 ++; CHECK: br %r14 ++ %ret = lshr <8 x i16> %val, ++ ret <8 x i16> %ret ++} ++ ++; Test a v8i16 shift by the highest useful constant. ++define <8 x i16> @f6(<8 x i16> %dummy, <8 x i16> %val) { ++; CHECK-LABEL: f6: ++; CHECK: vesrlh %v24, %v26, 15 ++; CHECK: br %r14 ++ %ret = lshr <8 x i16> %val, ++ ret <8 x i16> %ret ++} ++ ++; Test a v4i32 shift by a variable. ++define <4 x i32> @f7(<4 x i32> %dummy, <4 x i32> %val1, i32 %shift) { ++; CHECK-LABEL: f7: ++; CHECK: vesrlf %v24, %v26, 0(%r2) ++; CHECK: br %r14 ++ %shiftvec = insertelement <4 x i32> undef, i32 %shift, i32 0 ++ %val2 = shufflevector <4 x i32> %shiftvec, <4 x i32> undef, ++ <4 x i32> zeroinitializer ++ %ret = lshr <4 x i32> %val1, %val2 ++ ret <4 x i32> %ret ++} ++ ++; Test a v4i32 shift by the lowest useful constant. ++define <4 x i32> @f8(<4 x i32> %dummy, <4 x i32> %val) { ++; CHECK-LABEL: f8: ++; CHECK: vesrlf %v24, %v26, 1 ++; CHECK: br %r14 ++ %ret = lshr <4 x i32> %val, ++ ret <4 x i32> %ret ++} ++ ++; Test a v4i32 shift by the highest useful constant. ++define <4 x i32> @f9(<4 x i32> %dummy, <4 x i32> %val) { ++; CHECK-LABEL: f9: ++; CHECK: vesrlf %v24, %v26, 31 ++; CHECK: br %r14 ++ %ret = lshr <4 x i32> %val, ++ ret <4 x i32> %ret ++} ++ ++; Test a v2i64 shift by a variable. ++define <2 x i64> @f10(<2 x i64> %dummy, <2 x i64> %val1, i32 %shift) { ++; CHECK-LABEL: f10: ++; CHECK: vesrlg %v24, %v26, 0(%r2) ++; CHECK: br %r14 ++ %extshift = sext i32 %shift to i64 ++ %shiftvec = insertelement <2 x i64> undef, i64 %extshift, i32 0 ++ %val2 = shufflevector <2 x i64> %shiftvec, <2 x i64> undef, ++ <2 x i32> zeroinitializer ++ %ret = lshr <2 x i64> %val1, %val2 ++ ret <2 x i64> %ret ++} ++ ++; Test a v2i64 shift by the lowest useful constant. ++define <2 x i64> @f11(<2 x i64> %dummy, <2 x i64> %val) { ++; CHECK-LABEL: f11: ++; CHECK: vesrlg %v24, %v26, 1 ++; CHECK: br %r14 ++ %ret = lshr <2 x i64> %val, ++ ret <2 x i64> %ret ++} ++ ++; Test a v2i64 shift by the highest useful constant. ++define <2 x i64> @f12(<2 x i64> %dummy, <2 x i64> %val) { ++; CHECK-LABEL: f12: ++; CHECK: vesrlg %v24, %v26, 63 ++; CHECK: br %r14 ++ %ret = lshr <2 x i64> %val, ++ ret <2 x i64> %ret ++} +Index: llvm-36/test/CodeGen/SystemZ/vec-shift-07.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-shift-07.ll +@@ -0,0 +1,182 @@ ++; Test vector sign extensions. ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ++ ++; Test a v16i1->v16i8 extension. ++define <16 x i8> @f1(<16 x i8> %val) { ++; CHECK-LABEL: f1: ++; CHECK: veslb [[REG:%v[0-9]+]], %v24, 7 ++; CHECK: vesrab %v24, [[REG]], 7 ++; CHECK: br %r14 ++ %trunc = trunc <16 x i8> %val to <16 x i1> ++ %ret = sext <16 x i1> %trunc to <16 x i8> ++ ret <16 x i8> %ret ++} ++ ++; Test a v8i1->v8i16 extension. ++define <8 x i16> @f2(<8 x i16> %val) { ++; CHECK-LABEL: f2: ++; CHECK: veslh [[REG:%v[0-9]+]], %v24, 15 ++; CHECK: vesrah %v24, [[REG]], 15 ++; CHECK: br %r14 ++ %trunc = trunc <8 x i16> %val to <8 x i1> ++ %ret = sext <8 x i1> %trunc to <8 x i16> ++ ret <8 x i16> %ret ++} ++ ++; Test a v8i8->v8i16 extension. ++define <8 x i16> @f3(<8 x i16> %val) { ++; CHECK-LABEL: f3: ++; CHECK: veslh [[REG:%v[0-9]+]], %v24, 8 ++; CHECK: vesrah %v24, [[REG]], 8 ++; CHECK: br %r14 ++ %trunc = trunc <8 x i16> %val to <8 x i8> ++ %ret = sext <8 x i8> %trunc to <8 x i16> ++ ret <8 x i16> %ret ++} ++ ++; Test a v4i1->v4i32 extension. ++define <4 x i32> @f4(<4 x i32> %val) { ++; CHECK-LABEL: f4: ++; CHECK: veslf [[REG:%v[0-9]+]], %v24, 31 ++; CHECK: vesraf %v24, [[REG]], 31 ++; CHECK: br %r14 ++ %trunc = trunc <4 x i32> %val to <4 x i1> ++ %ret = sext <4 x i1> %trunc to <4 x i32> ++ ret <4 x i32> %ret ++} ++ ++; Test a v4i8->v4i32 extension. ++define <4 x i32> @f5(<4 x i32> %val) { ++; CHECK-LABEL: f5: ++; CHECK: veslf [[REG:%v[0-9]+]], %v24, 24 ++; CHECK: vesraf %v24, [[REG]], 24 ++; CHECK: br %r14 ++ %trunc = trunc <4 x i32> %val to <4 x i8> ++ %ret = sext <4 x i8> %trunc to <4 x i32> ++ ret <4 x i32> %ret ++} ++ ++; Test a v4i16->v4i32 extension. ++define <4 x i32> @f6(<4 x i32> %val) { ++; CHECK-LABEL: f6: ++; CHECK: veslf [[REG:%v[0-9]+]], %v24, 16 ++; CHECK: vesraf %v24, [[REG]], 16 ++; CHECK: br %r14 ++ %trunc = trunc <4 x i32> %val to <4 x i16> ++ %ret = sext <4 x i16> %trunc to <4 x i32> ++ ret <4 x i32> %ret ++} ++ ++; Test a v2i1->v2i64 extension. ++define <2 x i64> @f7(<2 x i64> %val) { ++; CHECK-LABEL: f7: ++; CHECK: veslg [[REG:%v[0-9]+]], %v24, 63 ++; CHECK: vesrag %v24, [[REG]], 63 ++; CHECK: br %r14 ++ %trunc = trunc <2 x i64> %val to <2 x i1> ++ %ret = sext <2 x i1> %trunc to <2 x i64> ++ ret <2 x i64> %ret ++} ++ ++; Test a v2i8->v2i64 extension. ++define <2 x i64> @f8(<2 x i64> %val) { ++; CHECK-LABEL: f8: ++; CHECK: vsegb %v24, %v24 ++; CHECK: br %r14 ++ %trunc = trunc <2 x i64> %val to <2 x i8> ++ %ret = sext <2 x i8> %trunc to <2 x i64> ++ ret <2 x i64> %ret ++} ++ ++; Test a v2i16->v2i64 extension. ++define <2 x i64> @f9(<2 x i64> %val) { ++; CHECK-LABEL: f9: ++; CHECK: vsegh %v24, %v24 ++; CHECK: br %r14 ++ %trunc = trunc <2 x i64> %val to <2 x i16> ++ %ret = sext <2 x i16> %trunc to <2 x i64> ++ ret <2 x i64> %ret ++} ++ ++; Test a v2i32->v2i64 extension. ++define <2 x i64> @f10(<2 x i64> %val) { ++; CHECK-LABEL: f10: ++; CHECK: vsegf %v24, %v24 ++; CHECK: br %r14 ++ %trunc = trunc <2 x i64> %val to <2 x i32> ++ %ret = sext <2 x i32> %trunc to <2 x i64> ++ ret <2 x i64> %ret ++} ++ ++; Test an alternative v2i8->v2i64 extension. ++define <2 x i64> @f11(<2 x i64> %val) { ++; CHECK-LABEL: f11: ++; CHECK: vsegb %v24, %v24 ++; CHECK: br %r14 ++ %shl = shl <2 x i64> %val, ++ %ret = ashr <2 x i64> %shl, ++ ret <2 x i64> %ret ++} ++ ++; Test an alternative v2i16->v2i64 extension. ++define <2 x i64> @f12(<2 x i64> %val) { ++; CHECK-LABEL: f12: ++; CHECK: vsegh %v24, %v24 ++; CHECK: br %r14 ++ %shl = shl <2 x i64> %val, ++ %ret = ashr <2 x i64> %shl, ++ ret <2 x i64> %ret ++} ++ ++; Test an alternative v2i32->v2i64 extension. ++define <2 x i64> @f13(<2 x i64> %val) { ++; CHECK-LABEL: f13: ++; CHECK: vsegf %v24, %v24 ++; CHECK: br %r14 ++ %shl = shl <2 x i64> %val, ++ %ret = ashr <2 x i64> %shl, ++ ret <2 x i64> %ret ++} ++ ++; Test an extraction-based v2i8->v2i64 extension. ++define <2 x i64> @f14(<16 x i8> %val) { ++; CHECK-LABEL: f14: ++; CHECK: vsegb %v24, %v24 ++; CHECK: br %r14 ++ %elt0 = extractelement <16 x i8> %val, i32 7 ++ %elt1 = extractelement <16 x i8> %val, i32 15 ++ %ext0 = sext i8 %elt0 to i64 ++ %ext1 = sext i8 %elt1 to i64 ++ %vec0 = insertelement <2 x i64> undef, i64 %ext0, i32 0 ++ %vec1 = insertelement <2 x i64> %vec0, i64 %ext1, i32 1 ++ ret <2 x i64> %vec1 ++} ++ ++; Test an extraction-based v2i16->v2i64 extension. ++define <2 x i64> @f15(<16 x i16> %val) { ++; CHECK-LABEL: f15: ++; CHECK: vsegh %v24, %v24 ++; CHECK: br %r14 ++ %elt0 = extractelement <16 x i16> %val, i32 3 ++ %elt1 = extractelement <16 x i16> %val, i32 7 ++ %ext0 = sext i16 %elt0 to i64 ++ %ext1 = sext i16 %elt1 to i64 ++ %vec0 = insertelement <2 x i64> undef, i64 %ext0, i32 0 ++ %vec1 = insertelement <2 x i64> %vec0, i64 %ext1, i32 1 ++ ret <2 x i64> %vec1 ++} ++ ++; Test an extraction-based v2i32->v2i64 extension. ++define <2 x i64> @f16(<16 x i32> %val) { ++; CHECK-LABEL: f16: ++; CHECK: vsegf %v24, %v24 ++; CHECK: br %r14 ++ %elt0 = extractelement <16 x i32> %val, i32 1 ++ %elt1 = extractelement <16 x i32> %val, i32 3 ++ %ext0 = sext i32 %elt0 to i64 ++ %ext1 = sext i32 %elt1 to i64 ++ %vec0 = insertelement <2 x i64> undef, i64 %ext0, i32 0 ++ %vec1 = insertelement <2 x i64> %vec0, i64 %ext1, i32 1 ++ ret <2 x i64> %vec1 ++} +Index: llvm-36/test/CodeGen/SystemZ/vec-sqrt-01.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-sqrt-01.ll +@@ -0,0 +1,23 @@ ++; Test f64 and v2f64 square root. ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ++ ++declare double @llvm.sqrt.f64(double) ++declare <2 x double> @llvm.sqrt.v2f64(<2 x double>) ++ ++define <2 x double> @f1(<2 x double> %val) { ++; CHECK-LABEL: f1: ++; CHECK: vfsqdb %v24, %v24 ++; CHECK: br %r14 ++ %ret = call <2 x double> @llvm.sqrt.v2f64(<2 x double> %val) ++ ret <2 x double> %ret ++} ++ ++define double @f2(<2 x double> %val) { ++; CHECK-LABEL: f2: ++; CHECK: wfsqdb %f0, %v24 ++; CHECK: br %r14 ++ %scalar = extractelement <2 x double> %val, i32 0 ++ %ret = call double @llvm.sqrt.f64(double %scalar) ++ ret double %ret ++} +Index: llvm-36/test/CodeGen/SystemZ/vec-sub-01.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-sub-01.ll +@@ -0,0 +1,148 @@ ++; Test vector subtraction. ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ++ ++; Test a v16i8 subtraction. ++define <16 x i8> @f1(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) { ++; CHECK-LABEL: f1: ++; CHECK: vsb %v24, %v26, %v28 ++; CHECK: br %r14 ++ %ret = sub <16 x i8> %val1, %val2 ++ ret <16 x i8> %ret ++} ++ ++; Test a v8i16 subtraction. ++define <8 x i16> @f2(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) { ++; CHECK-LABEL: f2: ++; CHECK: vsh %v24, %v26, %v28 ++; CHECK: br %r14 ++ %ret = sub <8 x i16> %val1, %val2 ++ ret <8 x i16> %ret ++} ++ ++; Test a v4i32 subtraction. ++define <4 x i32> @f3(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) { ++; CHECK-LABEL: f3: ++; CHECK: vsf %v24, %v26, %v28 ++; CHECK: br %r14 ++ %ret = sub <4 x i32> %val1, %val2 ++ ret <4 x i32> %ret ++} ++ ++; Test a v2i64 subtraction. ++define <2 x i64> @f4(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) { ++; CHECK-LABEL: f4: ++; CHECK: vsg %v24, %v26, %v28 ++; CHECK: br %r14 ++ %ret = sub <2 x i64> %val1, %val2 ++ ret <2 x i64> %ret ++} ++ ++; Test a v4f32 subtraction, as an example of an operation that needs to be ++; scalarized and reassembled. At present there's an unnecessary move that ++; could be avoided with smarter ordering. It also isn't important whether ++; the VSLDBs use the result of the VLRs or use %v24 and %v26 directly. ++define <4 x float> @f5(<4 x float> %val1, <4 x float> %val2) { ++; CHECK-LABEL: f5: ++; CHECK-DAG: vlr %v[[A1:[0-5]]], %v24 ++; CHECK-DAG: vlr %v[[A2:[0-5]]], %v26 ++; CHECK-DAG: vrepf %v[[B1:[0-5]]], %v[[A1]], 1 ++; CHECK-DAG: vrepf %v[[B2:[0-5]]], %v[[A2]], 1 ++; CHECK-DAG: vrepf %v[[C1:[0-5]]], %v[[A1]], 2 ++; CHECK-DAG: vrepf %v[[C2:[0-5]]], %v[[A2]], 2 ++; CHECK-DAG: vrepf %v[[D1:[0-5]]], %v[[A1]], 3 ++; CHECK-DAG: vrepf %v[[D2:[0-5]]], %v[[A2]], 3 ++; CHECK-DAG: ler %f[[A1copy:[0-5]]], %f[[A1]] ++; CHECK-DAG: sebr %f[[A1copy]], %f[[A2]] ++; CHECK-DAG: sebr %f[[B1]], %f[[B2]] ++; CHECK-DAG: sebr %f[[C1]], %f[[C2]] ++; CHECK-DAG: sebr %f[[D1]], %f[[D2]] ++; CHECK-DAG: vmrhf [[HIGH:%v[0-9]+]], %v[[A1copy]], %v[[B1]] ++; CHECK-DAG: vmrhf [[LOW:%v[0-9]+]], %v[[C1]], %v[[D1]] ++; CHECK: vmrhg %v24, [[HIGH]], [[LOW]] ++; CHECK: br %r14 ++ %ret = fsub <4 x float> %val1, %val2 ++ ret <4 x float> %ret ++} ++ ++; Test a v2f64 subtraction. ++define <2 x double> @f6(<2 x double> %dummy, <2 x double> %val1, ++ <2 x double> %val2) { ++; CHECK-LABEL: f6: ++; CHECK: vfsdb %v24, %v26, %v28 ++; CHECK: br %r14 ++ %ret = fsub <2 x double> %val1, %val2 ++ ret <2 x double> %ret ++} ++ ++; Test an f64 subtraction that uses vector registers. ++define double @f7(<2 x double> %val1, <2 x double> %val2) { ++; CHECK-LABEL: f7: ++; CHECK: wfsdb %f0, %v24, %v26 ++; CHECK: br %r14 ++ %scalar1 = extractelement <2 x double> %val1, i32 0 ++ %scalar2 = extractelement <2 x double> %val2, i32 0 ++ %ret = fsub double %scalar1, %scalar2 ++ ret double %ret ++} ++ ++; Test a v2i8 subtraction, which gets promoted to v16i8. ++define <2 x i8> @f8(<2 x i8> %dummy, <2 x i8> %val1, <2 x i8> %val2) { ++; CHECK-LABEL: f8: ++; CHECK: vsb %v24, %v26, %v28 ++; CHECK: br %r14 ++ %ret = sub <2 x i8> %val1, %val2 ++ ret <2 x i8> %ret ++} ++ ++; Test a v4i8 subtraction, which gets promoted to v16i8. ++define <4 x i8> @f9(<4 x i8> %dummy, <4 x i8> %val1, <4 x i8> %val2) { ++; CHECK-LABEL: f9: ++; CHECK: vsb %v24, %v26, %v28 ++; CHECK: br %r14 ++ %ret = sub <4 x i8> %val1, %val2 ++ ret <4 x i8> %ret ++} ++ ++; Test a v8i8 subtraction, which gets promoted to v16i8. ++define <8 x i8> @f10(<8 x i8> %dummy, <8 x i8> %val1, <8 x i8> %val2) { ++; CHECK-LABEL: f10: ++; CHECK: vsb %v24, %v26, %v28 ++; CHECK: br %r14 ++ %ret = sub <8 x i8> %val1, %val2 ++ ret <8 x i8> %ret ++} ++ ++; Test a v2i16 subtraction, which gets promoted to v8i16. ++define <2 x i16> @f11(<2 x i16> %dummy, <2 x i16> %val1, <2 x i16> %val2) { ++; CHECK-LABEL: f11: ++; CHECK: vsh %v24, %v26, %v28 ++; CHECK: br %r14 ++ %ret = sub <2 x i16> %val1, %val2 ++ ret <2 x i16> %ret ++} ++ ++; Test a v4i16 subtraction, which gets promoted to v8i16. ++define <4 x i16> @f12(<4 x i16> %dummy, <4 x i16> %val1, <4 x i16> %val2) { ++; CHECK-LABEL: f12: ++; CHECK: vsh %v24, %v26, %v28 ++; CHECK: br %r14 ++ %ret = sub <4 x i16> %val1, %val2 ++ ret <4 x i16> %ret ++} ++ ++; Test a v2i32 subtraction, which gets promoted to v4i32. ++define <2 x i32> @f13(<2 x i32> %dummy, <2 x i32> %val1, <2 x i32> %val2) { ++; CHECK-LABEL: f13: ++; CHECK: vsf %v24, %v26, %v28 ++; CHECK: br %r14 ++ %ret = sub <2 x i32> %val1, %val2 ++ ret <2 x i32> %ret ++} ++ ++; Test a v2f32 subtraction, which gets promoted to v4f32. ++define <2 x float> @f14(<2 x float> %val1, <2 x float> %val2) { ++; No particular output expected, but must compile. ++ %ret = fsub <2 x float> %val1, %val2 ++ ret <2 x float> %ret ++} +Index: llvm-36/test/CodeGen/SystemZ/vec-xor-01.ll +=================================================================== +--- /dev/null ++++ llvm-36/test/CodeGen/SystemZ/vec-xor-01.ll +@@ -0,0 +1,39 @@ ++; Test vector XOR. ++; ++; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s ++ ++; Test a v16i8 XOR. ++define <16 x i8> @f1(<16 x i8> %dummy, <16 x i8> %val1, <16 x i8> %val2) { ++; CHECK-LABEL: f1: ++; CHECK: vx %v24, %v26, %v28 ++; CHECK: br %r14 ++ %ret = xor <16 x i8> %val1, %val2 ++ ret <16 x i8> %ret ++} ++ ++; Test a v8i16 XOR. ++define <8 x i16> @f2(<8 x i16> %dummy, <8 x i16> %val1, <8 x i16> %val2) { ++; CHECK-LABEL: f2: ++; CHECK: vx %v24, %v26, %v28 ++; CHECK: br %r14 ++ %ret = xor <8 x i16> %val1, %val2 ++ ret <8 x i16> %ret ++} ++ ++; Test a v4i32 XOR. ++define <4 x i32> @f3(<4 x i32> %dummy, <4 x i32> %val1, <4 x i32> %val2) { ++; CHECK-LABEL: f3: ++; CHECK: vx %v24, %v26, %v28 ++; CHECK: br %r14 ++ %ret = xor <4 x i32> %val1, %val2 ++ ret <4 x i32> %ret ++} ++ ++; Test a v2i64 XOR. ++define <2 x i64> @f4(<2 x i64> %dummy, <2 x i64> %val1, <2 x i64> %val2) { ++; CHECK-LABEL: f4: ++; CHECK: vx %v24, %v26, %v28 ++; CHECK: br %r14 ++ %ret = xor <2 x i64> %val1, %val2 ++ ret <2 x i64> %ret ++} +Index: llvm-36/test/MC/Disassembler/SystemZ/insns-z13-bad.txt +=================================================================== +--- /dev/null ++++ llvm-36/test/MC/Disassembler/SystemZ/insns-z13-bad.txt +@@ -0,0 +1,39 @@ ++# Test z13 instructions that don't have PC-relative operands. ++# RUN: llvm-mc --disassemble %s -triple=s390x-linux-gnu -mcpu=z13 2>&1 \ ++# RUN: | FileCheck %s ++ ++# This would be "vlef %v0, 0, 4", but element 4 is invalid. ++# ++#CHECK: warning: invalid instruction encoding ++#CHECK-NEXT: 0xe7 0x00 0x00 0x00 0x40 0x03 ++0xe7 0x00 0x00 0x00 0x40 0x03 ++ ++# ...and again with element 15 ++# ++#CHECK: warning: invalid instruction encoding ++#CHECK-NEXT: 0xe7 0x00 0x00 0x00 0xf0 0x03 ++0xe7 0x00 0x00 0x00 0xf0 0x03 ++ ++# This would be "vleg %v0, 0, 2", but element 2 is invalid. ++# ++#CHECK: warning: invalid instruction encoding ++#CHECK-NEXT: 0xe7 0x00 0x00 0x00 0x20 0x02 ++0xe7 0x00 0x00 0x00 0x20 0x02 ++ ++# ...and again with element 15 ++# ++#CHECK: warning: invalid instruction encoding ++#CHECK-NEXT: 0xe7 0x00 0x00 0x00 0xf0 0x02 ++0xe7 0x00 0x00 0x00 0xf0 0x02 ++ ++# This would be "vleh %v0, 0, 8", but element 8 is invalid. ++# ++#CHECK: warning: invalid instruction encoding ++#CHECK-NEXT: 0xe7 0x00 0x00 0x00 0x80 0x01 ++0xe7 0x00 0x00 0x00 0x80 0x01 ++ ++# ...and again with element 15 ++# ++#CHECK: warning: invalid instruction encoding ++#CHECK-NEXT: 0xe7 0x00 0x00 0x00 0xf0 0x01 ++0xe7 0x00 0x00 0x00 0xf0 0x01 +Index: llvm-36/test/MC/Disassembler/SystemZ/insns-z13.txt +=================================================================== +--- /dev/null ++++ llvm-36/test/MC/Disassembler/SystemZ/insns-z13.txt +@@ -0,0 +1,3315 @@ ++# Test z13 instructions that don't have PC-relative operands. ++# RUN: llvm-mc --disassemble %s -triple=s390x-linux-gnu -mcpu=z13 \ ++# RUN: | FileCheck %s ++ ++#CHECK: lcbb %r0, 0, 0 ++0xe7 0x00 0x00 0x00 0x00 0x27 ++ ++#CHECK: lcbb %r1, 2475(%r7,%r8), 12 ++0xe7 0x17 0x89 0xab 0xc0 0x27 ++ ++#CHECK: lcbb %r15, 4095(%r15,%r15), 15 ++0xe7 0xff 0xff 0xff 0xf0 0x27 ++ ++#CHECK: vab %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x00 0xf3 ++ ++#CHECK: vab %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x0a 0xf3 ++ ++#CHECK: vab %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x0e 0xf3 ++ ++#CHECK: vaccb %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x00 0xf1 ++ ++#CHECK: vaccb %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x0a 0xf1 ++ ++#CHECK: vaccb %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x0e 0xf1 ++ ++#CHECK: vacccq %v0, %v0, %v0, %v0 ++0xe7 0x00 0x04 0x00 0x00 0xb9 ++ ++#CHECK: vacccq %v3, %v20, %v5, %v22 ++0xe7 0x34 0x54 0x00 0x65 0xb9 ++ ++#CHECK: vacccq %v31, %v31, %v31, %v31 ++0xe7 0xff 0xf4 0x00 0xff 0xb9 ++ ++#CHECK: vaccf %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x20 0xf1 ++ ++#CHECK: vaccf %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x2a 0xf1 ++ ++#CHECK: vaccf %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x2e 0xf1 ++ ++#CHECK: vaccg %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x30 0xf1 ++ ++#CHECK: vaccg %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x3a 0xf1 ++ ++#CHECK: vaccg %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x3e 0xf1 ++ ++#CHECK: vacch %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x10 0xf1 ++ ++#CHECK: vacch %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x1a 0xf1 ++ ++#CHECK: vacch %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x1e 0xf1 ++ ++#CHECK: vaccq %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x40 0xf1 ++ ++#CHECK: vaccq %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x4a 0xf1 ++ ++#CHECK: vaccq %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x4e 0xf1 ++ ++#CHECK: vacq %v0, %v0, %v0, %v0 ++0xe7 0x00 0x04 0x00 0x00 0xbb ++ ++#CHECK: vacq %v3, %v20, %v5, %v22 ++0xe7 0x34 0x54 0x00 0x65 0xbb ++ ++#CHECK: vacq %v31, %v31, %v31, %v31 ++0xe7 0xff 0xf4 0x00 0xff 0xbb ++ ++#CHECK: vaf %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x20 0xf3 ++ ++#CHECK: vaf %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x2a 0xf3 ++ ++#CHECK: vaf %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x2e 0xf3 ++ ++#CHECK: vag %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x30 0xf3 ++ ++#CHECK: vag %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x3a 0xf3 ++ ++#CHECK: vag %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x3e 0xf3 ++ ++#CHECK: vah %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x10 0xf3 ++ ++#CHECK: vah %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x1a 0xf3 ++ ++#CHECK: vah %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x1e 0xf3 ++ ++#CHECK: vaq %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x40 0xf3 ++ ++#CHECK: vaq %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x4a 0xf3 ++ ++#CHECK: vaq %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x4e 0xf3 ++ ++#CHECK: vavgb %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x00 0xf2 ++ ++#CHECK: vavgb %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x0a 0xf2 ++ ++#CHECK: vavgb %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x0e 0xf2 ++ ++#CHECK: vavgf %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x20 0xf2 ++ ++#CHECK: vavgf %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x2a 0xf2 ++ ++#CHECK: vavgf %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x2e 0xf2 ++ ++#CHECK: vavgg %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x30 0xf2 ++ ++#CHECK: vavgg %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x3a 0xf2 ++ ++#CHECK: vavgg %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x3e 0xf2 ++ ++#CHECK: vavgh %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x10 0xf2 ++ ++#CHECK: vavgh %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x1a 0xf2 ++ ++#CHECK: vavgh %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x1e 0xf2 ++ ++#CHECK: vavglb %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x00 0xf0 ++ ++#CHECK: vavglb %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x0a 0xf0 ++ ++#CHECK: vavglb %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x0e 0xf0 ++ ++#CHECK: vavglf %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x20 0xf0 ++ ++#CHECK: vavglf %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x2a 0xf0 ++ ++#CHECK: vavglf %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x2e 0xf0 ++ ++#CHECK: vavglg %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x30 0xf0 ++ ++#CHECK: vavglg %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x3a 0xf0 ++ ++#CHECK: vavglg %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x3e 0xf0 ++ ++#CHECK: vavglh %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x10 0xf0 ++ ++#CHECK: vavglh %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x1a 0xf0 ++ ++#CHECK: vavglh %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x1e 0xf0 ++ ++#CHECK: vcdgb %v0, %v0, 0, 0 ++0xe7 0x00 0x00 0x00 0x30 0xc3 ++ ++#CHECK: vcdgb %v19, %v14, 4, 10 ++0xe7 0x3e 0x00 0xa4 0x38 0xc3 ++ ++#CHECK: vcdgb %v31, %v31, 7, 15 ++0xe7 0xff 0x00 0xf7 0x3c 0xc3 ++ ++#CHECK: vcdlgb %v0, %v0, 0, 0 ++0xe7 0x00 0x00 0x00 0x30 0xc1 ++ ++#CHECK: vcdlgb %v19, %v14, 4, 10 ++0xe7 0x3e 0x00 0xa4 0x38 0xc1 ++ ++#CHECK: vcdlgb %v31, %v31, 7, 15 ++0xe7 0xff 0x00 0xf7 0x3c 0xc1 ++ ++#CHECK: vceqb %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x00 0xf8 ++ ++#CHECK: vceqb %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x0a 0xf8 ++ ++#CHECK: vceqbs %v7, %v24, %v9 ++0xe7 0x78 0x90 0x10 0x04 0xf8 ++ ++#CHECK: vceqb %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x0e 0xf8 ++ ++#CHECK: vceqf %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x20 0xf8 ++ ++#CHECK: vceqf %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x2a 0xf8 ++ ++#CHECK: vceqfs %v7, %v24, %v9 ++0xe7 0x78 0x90 0x10 0x24 0xf8 ++ ++#CHECK: vceqf %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x2e 0xf8 ++ ++#CHECK: vceqg %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x30 0xf8 ++ ++#CHECK: vceqg %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x3a 0xf8 ++ ++#CHECK: vceqgs %v7, %v24, %v9 ++0xe7 0x78 0x90 0x10 0x34 0xf8 ++ ++#CHECK: vceqg %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x3e 0xf8 ++ ++#CHECK: vceqh %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x10 0xf8 ++ ++#CHECK: vceqh %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x1a 0xf8 ++ ++#CHECK: vceqhs %v7, %v24, %v9 ++0xe7 0x78 0x90 0x10 0x14 0xf8 ++ ++#CHECK: vceqh %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x1e 0xf8 ++ ++#CHECK: vcgdb %v0, %v0, 0, 0 ++0xe7 0x00 0x00 0x00 0x30 0xc2 ++ ++#CHECK: vcgdb %v19, %v14, 4, 10 ++0xe7 0x3e 0x00 0xa4 0x38 0xc2 ++ ++#CHECK: vcgdb %v31, %v31, 7, 15 ++0xe7 0xff 0x00 0xf7 0x3c 0xc2 ++ ++#CHECK: vchb %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x00 0xfb ++ ++#CHECK: vchb %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x0a 0xfb ++ ++#CHECK: vchbs %v7, %v24, %v9 ++0xe7 0x78 0x90 0x10 0x04 0xfb ++ ++#CHECK: vchb %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x0e 0xfb ++ ++#CHECK: vchf %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x20 0xfb ++ ++#CHECK: vchf %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x2a 0xfb ++ ++#CHECK: vchfs %v7, %v24, %v9 ++0xe7 0x78 0x90 0x10 0x24 0xfb ++ ++#CHECK: vchf %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x2e 0xfb ++ ++#CHECK: vchg %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x30 0xfb ++ ++#CHECK: vchg %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x3a 0xfb ++ ++#CHECK: vchgs %v7, %v24, %v9 ++0xe7 0x78 0x90 0x10 0x34 0xfb ++ ++#CHECK: vchg %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x3e 0xfb ++ ++#CHECK: vchh %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x10 0xfb ++ ++#CHECK: vchh %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x1a 0xfb ++ ++#CHECK: vchhs %v7, %v24, %v9 ++0xe7 0x78 0x90 0x10 0x14 0xfb ++ ++#CHECK: vchh %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x1e 0xfb ++ ++#CHECK: vchlb %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x00 0xf9 ++ ++#CHECK: vchlb %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x0a 0xf9 ++ ++#CHECK: vchlbs %v7, %v24, %v9 ++0xe7 0x78 0x90 0x10 0x04 0xf9 ++ ++#CHECK: vchlb %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x0e 0xf9 ++ ++#CHECK: vchlf %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x20 0xf9 ++ ++#CHECK: vchlf %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x2a 0xf9 ++ ++#CHECK: vchlfs %v7, %v24, %v9 ++0xe7 0x78 0x90 0x10 0x24 0xf9 ++ ++#CHECK: vchlf %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x2e 0xf9 ++ ++#CHECK: vchlg %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x30 0xf9 ++ ++#CHECK: vchlg %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x3a 0xf9 ++ ++#CHECK: vchlgs %v7, %v24, %v9 ++0xe7 0x78 0x90 0x10 0x34 0xf9 ++ ++#CHECK: vchlg %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x3e 0xf9 ++ ++#CHECK: vchlh %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x10 0xf9 ++ ++#CHECK: vchlh %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x1a 0xf9 ++ ++#CHECK: vchlhs %v7, %v24, %v9 ++0xe7 0x78 0x90 0x10 0x14 0xf9 ++ ++#CHECK: vchlh %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x1e 0xf9 ++ ++#CHECK: vcksm %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x00 0x66 ++ ++#CHECK: vcksm %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x0a 0x66 ++ ++#CHECK: vcksm %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x0e 0x66 ++ ++#CHECK: vclgdb %v0, %v0, 0, 0 ++0xe7 0x00 0x00 0x00 0x30 0xc0 ++ ++#CHECK: vclgdb %v19, %v14, 4, 10 ++0xe7 0x3e 0x00 0xa4 0x38 0xc0 ++ ++#CHECK: vclgdb %v31, %v31, 7, 15 ++0xe7 0xff 0x00 0xf7 0x3c 0xc0 ++ ++#CHECK: vclzb %v0, %v0 ++0xe7 0x00 0x00 0x00 0x00 0x53 ++ ++#CHECK: vclzb %v19, %v14 ++0xe7 0x3e 0x00 0x00 0x08 0x53 ++ ++#CHECK: vclzb %v31, %v31 ++0xe7 0xff 0x00 0x00 0x0c 0x53 ++ ++#CHECK: vclzf %v0, %v0 ++0xe7 0x00 0x00 0x00 0x20 0x53 ++ ++#CHECK: vclzf %v19, %v14 ++0xe7 0x3e 0x00 0x00 0x28 0x53 ++ ++#CHECK: vclzf %v31, %v31 ++0xe7 0xff 0x00 0x00 0x2c 0x53 ++ ++#CHECK: vclzg %v0, %v0 ++0xe7 0x00 0x00 0x00 0x30 0x53 ++ ++#CHECK: vclzg %v19, %v14 ++0xe7 0x3e 0x00 0x00 0x38 0x53 ++ ++#CHECK: vclzg %v31, %v31 ++0xe7 0xff 0x00 0x00 0x3c 0x53 ++ ++#CHECK: vclzh %v0, %v0 ++0xe7 0x00 0x00 0x00 0x10 0x53 ++ ++#CHECK: vclzh %v19, %v14 ++0xe7 0x3e 0x00 0x00 0x18 0x53 ++ ++#CHECK: vclzh %v31, %v31 ++0xe7 0xff 0x00 0x00 0x1c 0x53 ++ ++#CHECK: vctzb %v0, %v0 ++0xe7 0x00 0x00 0x00 0x00 0x52 ++ ++#CHECK: vctzb %v19, %v14 ++0xe7 0x3e 0x00 0x00 0x08 0x52 ++ ++#CHECK: vctzb %v31, %v31 ++0xe7 0xff 0x00 0x00 0x0c 0x52 ++ ++#CHECK: vctzf %v0, %v0 ++0xe7 0x00 0x00 0x00 0x20 0x52 ++ ++#CHECK: vctzf %v19, %v14 ++0xe7 0x3e 0x00 0x00 0x28 0x52 ++ ++#CHECK: vctzf %v31, %v31 ++0xe7 0xff 0x00 0x00 0x2c 0x52 ++ ++#CHECK: vctzg %v0, %v0 ++0xe7 0x00 0x00 0x00 0x30 0x52 ++ ++#CHECK: vctzg %v19, %v14 ++0xe7 0x3e 0x00 0x00 0x38 0x52 ++ ++#CHECK: vctzg %v31, %v31 ++0xe7 0xff 0x00 0x00 0x3c 0x52 ++ ++#CHECK: vctzh %v0, %v0 ++0xe7 0x00 0x00 0x00 0x10 0x52 ++ ++#CHECK: vctzh %v19, %v14 ++0xe7 0x3e 0x00 0x00 0x18 0x52 ++ ++#CHECK: vctzh %v31, %v31 ++0xe7 0xff 0x00 0x00 0x1c 0x52 ++ ++#CHECK: vecb %v0, %v0 ++0xe7 0x00 0x00 0x00 0x00 0xdb ++ ++#CHECK: vecb %v19, %v14 ++0xe7 0x3e 0x00 0x00 0x08 0xdb ++ ++#CHECK: vecb %v31, %v31 ++0xe7 0xff 0x00 0x00 0x0c 0xdb ++ ++#CHECK: vecf %v0, %v0 ++0xe7 0x00 0x00 0x00 0x20 0xdb ++ ++#CHECK: vecf %v19, %v14 ++0xe7 0x3e 0x00 0x00 0x28 0xdb ++ ++#CHECK: vecf %v31, %v31 ++0xe7 0xff 0x00 0x00 0x2c 0xdb ++ ++#CHECK: vecg %v0, %v0 ++0xe7 0x00 0x00 0x00 0x30 0xdb ++ ++#CHECK: vecg %v19, %v14 ++0xe7 0x3e 0x00 0x00 0x38 0xdb ++ ++#CHECK: vecg %v31, %v31 ++0xe7 0xff 0x00 0x00 0x3c 0xdb ++ ++#CHECK: vech %v0, %v0 ++0xe7 0x00 0x00 0x00 0x10 0xdb ++ ++#CHECK: vech %v19, %v14 ++0xe7 0x3e 0x00 0x00 0x18 0xdb ++ ++#CHECK: vech %v31, %v31 ++0xe7 0xff 0x00 0x00 0x1c 0xdb ++ ++#CHECK: veclb %v0, %v0 ++0xe7 0x00 0x00 0x00 0x00 0xd9 ++ ++#CHECK: veclb %v19, %v14 ++0xe7 0x3e 0x00 0x00 0x08 0xd9 ++ ++#CHECK: veclb %v31, %v31 ++0xe7 0xff 0x00 0x00 0x0c 0xd9 ++ ++#CHECK: veclf %v0, %v0 ++0xe7 0x00 0x00 0x00 0x20 0xd9 ++ ++#CHECK: veclf %v19, %v14 ++0xe7 0x3e 0x00 0x00 0x28 0xd9 ++ ++#CHECK: veclf %v31, %v31 ++0xe7 0xff 0x00 0x00 0x2c 0xd9 ++ ++#CHECK: veclg %v0, %v0 ++0xe7 0x00 0x00 0x00 0x30 0xd9 ++ ++#CHECK: veclg %v19, %v14 ++0xe7 0x3e 0x00 0x00 0x38 0xd9 ++ ++#CHECK: veclg %v31, %v31 ++0xe7 0xff 0x00 0x00 0x3c 0xd9 ++ ++#CHECK: veclh %v0, %v0 ++0xe7 0x00 0x00 0x00 0x10 0xd9 ++ ++#CHECK: veclh %v19, %v14 ++0xe7 0x3e 0x00 0x00 0x18 0xd9 ++ ++#CHECK: veclh %v31, %v31 ++0xe7 0xff 0x00 0x00 0x1c 0xd9 ++ ++#CHECK: verimb %v0, %v0, %v0, 0 ++0xe7 0x00 0x00 0x00 0x00 0x72 ++ ++#CHECK: verimb %v3, %v20, %v5, 103 ++0xe7 0x34 0x50 0x67 0x04 0x72 ++ ++#CHECK: verimb %v31, %v31, %v31, 255 ++0xe7 0xff 0xf0 0xff 0x0e 0x72 ++ ++#CHECK: verimf %v0, %v0, %v0, 0 ++0xe7 0x00 0x00 0x00 0x20 0x72 ++ ++#CHECK: verimf %v3, %v20, %v5, 103 ++0xe7 0x34 0x50 0x67 0x24 0x72 ++ ++#CHECK: verimf %v31, %v31, %v31, 255 ++0xe7 0xff 0xf0 0xff 0x2e 0x72 ++ ++#CHECK: verimg %v0, %v0, %v0, 0 ++0xe7 0x00 0x00 0x00 0x30 0x72 ++ ++#CHECK: verimg %v3, %v20, %v5, 103 ++0xe7 0x34 0x50 0x67 0x34 0x72 ++ ++#CHECK: verimg %v31, %v31, %v31, 255 ++0xe7 0xff 0xf0 0xff 0x3e 0x72 ++ ++#CHECK: verimh %v0, %v0, %v0, 0 ++0xe7 0x00 0x00 0x00 0x10 0x72 ++ ++#CHECK: verimh %v3, %v20, %v5, 103 ++0xe7 0x34 0x50 0x67 0x14 0x72 ++ ++#CHECK: verimh %v31, %v31, %v31, 255 ++0xe7 0xff 0xf0 0xff 0x1e 0x72 ++ ++#CHECK: verllvb %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x00 0x73 ++ ++#CHECK: verllvb %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x0a 0x73 ++ ++#CHECK: verllvb %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x0e 0x73 ++ ++#CHECK: verllvf %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x20 0x73 ++ ++#CHECK: verllvf %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x2a 0x73 ++ ++#CHECK: verllvf %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x2e 0x73 ++ ++#CHECK: verllvg %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x30 0x73 ++ ++#CHECK: verllvg %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x3a 0x73 ++ ++#CHECK: verllvg %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x3e 0x73 ++ ++#CHECK: verllvh %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x10 0x73 ++ ++#CHECK: verllvh %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x1a 0x73 ++ ++#CHECK: verllvh %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x1e 0x73 ++ ++#CHECK: verllb %v0, %v0, 0 ++0xe7 0x00 0x00 0x00 0x00 0x33 ++ ++#CHECK: verllb %v12, %v18, 1110(%r3) ++0xe7 0xc2 0x34 0x56 0x04 0x33 ++ ++#CHECK: verllb %v31, %v31, 4095(%r15) ++0xe7 0xff 0xff 0xff 0x0c 0x33 ++ ++#CHECK: verllf %v0, %v0, 0 ++0xe7 0x00 0x00 0x00 0x20 0x33 ++ ++#CHECK: verllf %v12, %v18, 1110(%r3) ++0xe7 0xc2 0x34 0x56 0x24 0x33 ++ ++#CHECK: verllf %v31, %v31, 4095(%r15) ++0xe7 0xff 0xff 0xff 0x2c 0x33 ++ ++#CHECK: verllg %v0, %v0, 0 ++0xe7 0x00 0x00 0x00 0x30 0x33 ++ ++#CHECK: verllg %v12, %v18, 1110(%r3) ++0xe7 0xc2 0x34 0x56 0x34 0x33 ++ ++#CHECK: verllg %v31, %v31, 4095(%r15) ++0xe7 0xff 0xff 0xff 0x3c 0x33 ++ ++#CHECK: verllh %v0, %v0, 0 ++0xe7 0x00 0x00 0x00 0x10 0x33 ++ ++#CHECK: verllh %v12, %v18, 1110(%r3) ++0xe7 0xc2 0x34 0x56 0x14 0x33 ++ ++#CHECK: verllh %v31, %v31, 4095(%r15) ++0xe7 0xff 0xff 0xff 0x1c 0x33 ++ ++#CHECK: veslvb %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x00 0x70 ++ ++#CHECK: veslvb %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x0a 0x70 ++ ++#CHECK: veslvb %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x0e 0x70 ++ ++#CHECK: veslvf %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x20 0x70 ++ ++#CHECK: veslvf %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x2a 0x70 ++ ++#CHECK: veslvf %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x2e 0x70 ++ ++#CHECK: veslvg %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x30 0x70 ++ ++#CHECK: veslvg %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x3a 0x70 ++ ++#CHECK: veslvg %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x3e 0x70 ++ ++#CHECK: veslvh %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x10 0x70 ++ ++#CHECK: veslvh %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x1a 0x70 ++ ++#CHECK: veslvh %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x1e 0x70 ++ ++#CHECK: veslb %v0, %v0, 0 ++0xe7 0x00 0x00 0x00 0x00 0x30 ++ ++#CHECK: veslb %v12, %v18, 1110(%r3) ++0xe7 0xc2 0x34 0x56 0x04 0x30 ++ ++#CHECK: veslb %v31, %v31, 4095(%r15) ++0xe7 0xff 0xff 0xff 0x0c 0x30 ++ ++#CHECK: veslf %v0, %v0, 0 ++0xe7 0x00 0x00 0x00 0x20 0x30 ++ ++#CHECK: veslf %v12, %v18, 1110(%r3) ++0xe7 0xc2 0x34 0x56 0x24 0x30 ++ ++#CHECK: veslf %v31, %v31, 4095(%r15) ++0xe7 0xff 0xff 0xff 0x2c 0x30 ++ ++#CHECK: veslg %v0, %v0, 0 ++0xe7 0x00 0x00 0x00 0x30 0x30 ++ ++#CHECK: veslg %v12, %v18, 1110(%r3) ++0xe7 0xc2 0x34 0x56 0x34 0x30 ++ ++#CHECK: veslg %v31, %v31, 4095(%r15) ++0xe7 0xff 0xff 0xff 0x3c 0x30 ++ ++#CHECK: veslh %v0, %v0, 0 ++0xe7 0x00 0x00 0x00 0x10 0x30 ++ ++#CHECK: veslh %v12, %v18, 1110(%r3) ++0xe7 0xc2 0x34 0x56 0x14 0x30 ++ ++#CHECK: veslh %v31, %v31, 4095(%r15) ++0xe7 0xff 0xff 0xff 0x1c 0x30 ++ ++#CHECK: vesravb %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x00 0x7a ++ ++#CHECK: vesravb %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x0a 0x7a ++ ++#CHECK: vesravb %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x0e 0x7a ++ ++#CHECK: vesravf %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x20 0x7a ++ ++#CHECK: vesravf %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x2a 0x7a ++ ++#CHECK: vesravf %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x2e 0x7a ++ ++#CHECK: vesravg %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x30 0x7a ++ ++#CHECK: vesravg %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x3a 0x7a ++ ++#CHECK: vesravg %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x3e 0x7a ++ ++#CHECK: vesravh %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x10 0x7a ++ ++#CHECK: vesravh %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x1a 0x7a ++ ++#CHECK: vesravh %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x1e 0x7a ++ ++#CHECK: vesrab %v0, %v0, 0 ++0xe7 0x00 0x00 0x00 0x00 0x3a ++ ++#CHECK: vesrab %v12, %v18, 1110(%r3) ++0xe7 0xc2 0x34 0x56 0x04 0x3a ++ ++#CHECK: vesrab %v31, %v31, 4095(%r15) ++0xe7 0xff 0xff 0xff 0x0c 0x3a ++ ++#CHECK: vesraf %v0, %v0, 0 ++0xe7 0x00 0x00 0x00 0x20 0x3a ++ ++#CHECK: vesraf %v12, %v18, 1110(%r3) ++0xe7 0xc2 0x34 0x56 0x24 0x3a ++ ++#CHECK: vesraf %v31, %v31, 4095(%r15) ++0xe7 0xff 0xff 0xff 0x2c 0x3a ++ ++#CHECK: vesrag %v0, %v0, 0 ++0xe7 0x00 0x00 0x00 0x30 0x3a ++ ++#CHECK: vesrag %v12, %v18, 1110(%r3) ++0xe7 0xc2 0x34 0x56 0x34 0x3a ++ ++#CHECK: vesrag %v31, %v31, 4095(%r15) ++0xe7 0xff 0xff 0xff 0x3c 0x3a ++ ++#CHECK: vesrah %v0, %v0, 0 ++0xe7 0x00 0x00 0x00 0x10 0x3a ++ ++#CHECK: vesrah %v12, %v18, 1110(%r3) ++0xe7 0xc2 0x34 0x56 0x14 0x3a ++ ++#CHECK: vesrah %v31, %v31, 4095(%r15) ++0xe7 0xff 0xff 0xff 0x1c 0x3a ++ ++#CHECK: vesrlvb %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x00 0x78 ++ ++#CHECK: vesrlvb %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x0a 0x78 ++ ++#CHECK: vesrlvb %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x0e 0x78 ++ ++#CHECK: vesrlvf %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x20 0x78 ++ ++#CHECK: vesrlvf %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x2a 0x78 ++ ++#CHECK: vesrlvf %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x2e 0x78 ++ ++#CHECK: vesrlvg %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x30 0x78 ++ ++#CHECK: vesrlvg %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x3a 0x78 ++ ++#CHECK: vesrlvg %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x3e 0x78 ++ ++#CHECK: vesrlvh %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x10 0x78 ++ ++#CHECK: vesrlvh %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x1a 0x78 ++ ++#CHECK: vesrlvh %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x1e 0x78 ++ ++#CHECK: vesrlb %v0, %v0, 0 ++0xe7 0x00 0x00 0x00 0x00 0x38 ++ ++#CHECK: vesrlb %v12, %v18, 1110(%r3) ++0xe7 0xc2 0x34 0x56 0x04 0x38 ++ ++#CHECK: vesrlb %v31, %v31, 4095(%r15) ++0xe7 0xff 0xff 0xff 0x0c 0x38 ++ ++#CHECK: vesrlf %v0, %v0, 0 ++0xe7 0x00 0x00 0x00 0x20 0x38 ++ ++#CHECK: vesrlf %v12, %v18, 1110(%r3) ++0xe7 0xc2 0x34 0x56 0x24 0x38 ++ ++#CHECK: vesrlf %v31, %v31, 4095(%r15) ++0xe7 0xff 0xff 0xff 0x2c 0x38 ++ ++#CHECK: vesrlg %v0, %v0, 0 ++0xe7 0x00 0x00 0x00 0x30 0x38 ++ ++#CHECK: vesrlg %v12, %v18, 1110(%r3) ++0xe7 0xc2 0x34 0x56 0x34 0x38 ++ ++#CHECK: vesrlg %v31, %v31, 4095(%r15) ++0xe7 0xff 0xff 0xff 0x3c 0x38 ++ ++#CHECK: vesrlh %v0, %v0, 0 ++0xe7 0x00 0x00 0x00 0x10 0x38 ++ ++#CHECK: vesrlh %v12, %v18, 1110(%r3) ++0xe7 0xc2 0x34 0x56 0x14 0x38 ++ ++#CHECK: vesrlh %v31, %v31, 4095(%r15) ++0xe7 0xff 0xff 0xff 0x1c 0x38 ++ ++#CHECK: vfadb %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x30 0xe3 ++ ++#CHECK: vfadb %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x3a 0xe3 ++ ++#CHECK: vfadb %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x3e 0xe3 ++ ++#CHECK: vfaeb %v0, %v0, %v0, 0 ++0xe7 0x00 0x00 0x00 0x00 0x82 ++ ++#CHECK: vfaeb %v0, %v0, %v0, 12 ++0xe7 0x00 0x00 0xc0 0x00 0x82 ++ ++#CHECK: vfaeb %v18, %v3, %v20, 0 ++0xe7 0x23 0x40 0x00 0x0a 0x82 ++ ++#CHECK: vfaeb %v31, %v31, %v31, 4 ++0xe7 0xff 0xf0 0x40 0x0e 0x82 ++ ++#CHECK: vfaebs %v31, %v31, %v31, 8 ++0xe7 0xff 0xf0 0x90 0x0e 0x82 ++ ++#CHECK: vfaezb %v31, %v31, %v31, 4 ++0xe7 0xff 0xf0 0x60 0x0e 0x82 ++ ++#CHECK: vfaezbs %v31, %v31, %v31, 8 ++0xe7 0xff 0xf0 0xb0 0x0e 0x82 ++ ++#CHECK: vfaef %v0, %v0, %v0, 0 ++0xe7 0x00 0x00 0x00 0x20 0x82 ++ ++#CHECK: vfaef %v0, %v0, %v0, 12 ++0xe7 0x00 0x00 0xc0 0x20 0x82 ++ ++#CHECK: vfaef %v18, %v3, %v20, 0 ++0xe7 0x23 0x40 0x00 0x2a 0x82 ++ ++#CHECK: vfaef %v31, %v31, %v31, 4 ++0xe7 0xff 0xf0 0x40 0x2e 0x82 ++ ++#CHECK: vfaefs %v31, %v31, %v31, 8 ++0xe7 0xff 0xf0 0x90 0x2e 0x82 ++ ++#CHECK: vfaezf %v31, %v31, %v31, 4 ++0xe7 0xff 0xf0 0x60 0x2e 0x82 ++ ++#CHECK: vfaezfs %v31, %v31, %v31, 8 ++0xe7 0xff 0xf0 0xb0 0x2e 0x82 ++ ++#CHECK: vfaeh %v0, %v0, %v0, 0 ++0xe7 0x00 0x00 0x00 0x10 0x82 ++ ++#CHECK: vfaeh %v0, %v0, %v0, 12 ++0xe7 0x00 0x00 0xc0 0x10 0x82 ++ ++#CHECK: vfaeh %v18, %v3, %v20, 0 ++0xe7 0x23 0x40 0x00 0x1a 0x82 ++ ++#CHECK: vfaeh %v31, %v31, %v31, 4 ++0xe7 0xff 0xf0 0x40 0x1e 0x82 ++ ++#CHECK: vfaehs %v31, %v31, %v31, 8 ++0xe7 0xff 0xf0 0x90 0x1e 0x82 ++ ++#CHECK: vfaezh %v31, %v31, %v31, 4 ++0xe7 0xff 0xf0 0x60 0x1e 0x82 ++ ++#CHECK: vfaezhs %v31, %v31, %v31, 8 ++0xe7 0xff 0xf0 0xb0 0x1e 0x82 ++ ++#CHECK: vfcedb %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x30 0xe8 ++ ++#CHECK: vfcedb %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x3a 0xe8 ++ ++#CHECK: vfcedb %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x3e 0xe8 ++ ++#CHECK: vfcedbs %v0, %v0, %v0 ++0xe7 0x00 0x00 0x10 0x30 0xe8 ++ ++#CHECK: vfcedbs %v18, %v3, %v20 ++0xe7 0x23 0x40 0x10 0x3a 0xe8 ++ ++#CHECK: vfcedbs %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x10 0x3e 0xe8 ++ ++#CHECK: vfchdb %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x30 0xeb ++ ++#CHECK: vfchdb %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x3a 0xeb ++ ++#CHECK: vfchdb %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x3e 0xeb ++ ++#CHECK: vfchdbs %v0, %v0, %v0 ++0xe7 0x00 0x00 0x10 0x30 0xeb ++ ++#CHECK: vfchdbs %v18, %v3, %v20 ++0xe7 0x23 0x40 0x10 0x3a 0xeb ++ ++#CHECK: vfchdbs %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x10 0x3e 0xeb ++ ++#CHECK: vfchedb %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x30 0xea ++ ++#CHECK: vfchedb %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x3a 0xea ++ ++#CHECK: vfchedb %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x3e 0xea ++ ++#CHECK: vfchedbs %v0, %v0, %v0 ++0xe7 0x00 0x00 0x10 0x30 0xea ++ ++#CHECK: vfchedbs %v18, %v3, %v20 ++0xe7 0x23 0x40 0x10 0x3a 0xea ++ ++#CHECK: vfchedbs %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x10 0x3e 0xea ++ ++#CHECK: vfddb %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x30 0xe5 ++ ++#CHECK: vfddb %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x3a 0xe5 ++ ++#CHECK: vfddb %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x3e 0xe5 ++ ++#CHECK: vfeeb %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x00 0x80 ++ ++#CHECK: vfeeb %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x0a 0x80 ++ ++#CHECK: vfeebs %v7, %v24, %v9 ++0xe7 0x78 0x90 0x10 0x04 0x80 ++ ++#CHECK: vfeezb %v18, %v3, %v20 ++0xe7 0x23 0x40 0x20 0x0a 0x80 ++ ++#CHECK: vfeezbs %v7, %v24, %v9 ++0xe7 0x78 0x90 0x30 0x04 0x80 ++ ++#CHECK: vfeeb %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x0e 0x80 ++ ++#CHECK: vfeef %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x20 0x80 ++ ++#CHECK: vfeef %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x2a 0x80 ++ ++#CHECK: vfeefs %v7, %v24, %v9 ++0xe7 0x78 0x90 0x10 0x24 0x80 ++ ++#CHECK: vfeezf %v18, %v3, %v20 ++0xe7 0x23 0x40 0x20 0x2a 0x80 ++ ++#CHECK: vfeezfs %v7, %v24, %v9 ++0xe7 0x78 0x90 0x30 0x24 0x80 ++ ++#CHECK: vfeef %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x2e 0x80 ++ ++#CHECK: vfeeh %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x10 0x80 ++ ++#CHECK: vfeeh %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x1a 0x80 ++ ++#CHECK: vfeehs %v7, %v24, %v9 ++0xe7 0x78 0x90 0x10 0x14 0x80 ++ ++#CHECK: vfeezh %v18, %v3, %v20 ++0xe7 0x23 0x40 0x20 0x1a 0x80 ++ ++#CHECK: vfeezhs %v7, %v24, %v9 ++0xe7 0x78 0x90 0x30 0x14 0x80 ++ ++#CHECK: vfeeh %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x1e 0x80 ++ ++#CHECK: vfeneb %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x00 0x81 ++ ++#CHECK: vfeneb %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x0a 0x81 ++ ++#CHECK: vfenebs %v7, %v24, %v9 ++0xe7 0x78 0x90 0x10 0x04 0x81 ++ ++#CHECK: vfenezb %v18, %v3, %v20 ++0xe7 0x23 0x40 0x20 0x0a 0x81 ++ ++#CHECK: vfenezbs %v7, %v24, %v9 ++0xe7 0x78 0x90 0x30 0x04 0x81 ++ ++#CHECK: vfeneb %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x0e 0x81 ++ ++#CHECK: vfenef %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x20 0x81 ++ ++#CHECK: vfenef %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x2a 0x81 ++ ++#CHECK: vfenefs %v7, %v24, %v9 ++0xe7 0x78 0x90 0x10 0x24 0x81 ++ ++#CHECK: vfenezf %v18, %v3, %v20 ++0xe7 0x23 0x40 0x20 0x2a 0x81 ++ ++#CHECK: vfenezfs %v7, %v24, %v9 ++0xe7 0x78 0x90 0x30 0x24 0x81 ++ ++#CHECK: vfenef %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x2e 0x81 ++ ++#CHECK: vfeneh %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x10 0x81 ++ ++#CHECK: vfeneh %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x1a 0x81 ++ ++#CHECK: vfenehs %v7, %v24, %v9 ++0xe7 0x78 0x90 0x10 0x14 0x81 ++ ++#CHECK: vfenezh %v18, %v3, %v20 ++0xe7 0x23 0x40 0x20 0x1a 0x81 ++ ++#CHECK: vfenezhs %v7, %v24, %v9 ++0xe7 0x78 0x90 0x30 0x14 0x81 ++ ++#CHECK: vfeneh %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x1e 0x81 ++ ++#CHECK: vfidb %v0, %v0, 0, 0 ++0xe7 0x00 0x00 0x00 0x30 0xc7 ++ ++#CHECK: vfidb %v19, %v14, 4, 10 ++0xe7 0x3e 0x00 0xa4 0x38 0xc7 ++ ++#CHECK: vfidb %v31, %v31, 7, 15 ++0xe7 0xff 0x00 0xf7 0x3c 0xc7 ++ ++#CHECK: vistrb %v0, %v0 ++0xe7 0x00 0x00 0x00 0x00 0x5c ++ ++#CHECK: vistrb %v18, %v3 ++0xe7 0x23 0x00 0x00 0x08 0x5c ++ ++#CHECK: vistrbs %v7, %v24 ++0xe7 0x78 0x00 0x10 0x04 0x5c ++ ++#CHECK: vistrb %v31, %v31 ++0xe7 0xff 0x00 0x00 0x0c 0x5c ++ ++#CHECK: vistrf %v0, %v0 ++0xe7 0x00 0x00 0x00 0x20 0x5c ++ ++#CHECK: vistrf %v18, %v3 ++0xe7 0x23 0x00 0x00 0x28 0x5c ++ ++#CHECK: vistrfs %v7, %v24 ++0xe7 0x78 0x00 0x10 0x24 0x5c ++ ++#CHECK: vistrf %v31, %v31 ++0xe7 0xff 0x00 0x00 0x2c 0x5c ++ ++#CHECK: vistrh %v0, %v0 ++0xe7 0x00 0x00 0x00 0x10 0x5c ++ ++#CHECK: vistrh %v18, %v3 ++0xe7 0x23 0x00 0x00 0x18 0x5c ++ ++#CHECK: vistrhs %v7, %v24 ++0xe7 0x78 0x00 0x10 0x14 0x5c ++ ++#CHECK: vistrh %v31, %v31 ++0xe7 0xff 0x00 0x00 0x1c 0x5c ++ ++#CHECK: vfmadb %v0, %v0, %v0, %v0 ++0xe7 0x00 0x03 0x00 0x00 0x8f ++ ++#CHECK: vfmadb %v3, %v20, %v5, %v22 ++0xe7 0x34 0x53 0x00 0x65 0x8f ++ ++#CHECK: vfmadb %v31, %v31, %v31, %v31 ++0xe7 0xff 0xf3 0x00 0xff 0x8f ++ ++#CHECK: vfmdb %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x30 0xe7 ++ ++#CHECK: vfmdb %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x3a 0xe7 ++ ++#CHECK: vfmdb %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x3e 0xe7 ++ ++#CHECK: vfmsdb %v0, %v0, %v0, %v0 ++0xe7 0x00 0x03 0x00 0x00 0x8e ++ ++#CHECK: vfmsdb %v3, %v20, %v5, %v22 ++0xe7 0x34 0x53 0x00 0x65 0x8e ++ ++#CHECK: vfmsdb %v31, %v31, %v31, %v31 ++0xe7 0xff 0xf3 0x00 0xff 0x8e ++ ++#CHECK: vfsdb %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x30 0xe2 ++ ++#CHECK: vfsdb %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x3a 0xe2 ++ ++#CHECK: vfsdb %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x3e 0xe2 ++ ++#CHECK: vzero %v0 ++0xe7 0x00 0x00 0x00 0x00 0x44 ++ ++#CHECK: vgbm %v0, 1 ++0xe7 0x00 0x00 0x01 0x00 0x44 ++ ++#CHECK: vgbm %v0, 65534 ++0xe7 0x00 0xff 0xfe 0x00 0x44 ++ ++#CHECK: vone %v0 ++0xe7 0x00 0xff 0xff 0x00 0x44 ++ ++#CHECK: vgbm %v17, 4660 ++0xe7 0x10 0x12 0x34 0x08 0x44 ++ ++#CHECK: vone %v31 ++0xe7 0xf0 0xff 0xff 0x08 0x44 ++ ++#CHECK: vgef %v0, 0(%v0), 0 ++0xe7 0x00 0x00 0x00 0x00 0x13 ++ ++#CHECK: vgef %v10, 1000(%v19,%r7), 2 ++0xe7 0xa3 0x73 0xe8 0x24 0x13 ++ ++#CHECK: vgef %v31, 4095(%v31,%r15), 3 ++0xe7 0xff 0xff 0xff 0x3c 0x13 ++ ++#CHECK: vgeg %v0, 0(%v0), 0 ++0xe7 0x00 0x00 0x00 0x00 0x12 ++ ++#CHECK: vgeg %v10, 1000(%v19,%r7), 1 ++0xe7 0xa3 0x73 0xe8 0x14 0x12 ++ ++#CHECK: vgeg %v31, 4095(%v31,%r15), 1 ++0xe7 0xff 0xff 0xff 0x1c 0x12 ++ ++#CHECK: vgfmab %v0, %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x00 0xbc ++ ++#CHECK: vgfmab %v3, %v20, %v5, %v22 ++0xe7 0x34 0x50 0x00 0x65 0xbc ++ ++#CHECK: vgfmab %v31, %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0xff 0xbc ++ ++#CHECK: vgfmaf %v0, %v0, %v0, %v0 ++0xe7 0x00 0x02 0x00 0x00 0xbc ++ ++#CHECK: vgfmaf %v3, %v20, %v5, %v22 ++0xe7 0x34 0x52 0x00 0x65 0xbc ++ ++#CHECK: vgfmaf %v31, %v31, %v31, %v31 ++0xe7 0xff 0xf2 0x00 0xff 0xbc ++ ++#CHECK: vgfmag %v0, %v0, %v0, %v0 ++0xe7 0x00 0x03 0x00 0x00 0xbc ++ ++#CHECK: vgfmag %v3, %v20, %v5, %v22 ++0xe7 0x34 0x53 0x00 0x65 0xbc ++ ++#CHECK: vgfmag %v31, %v31, %v31, %v31 ++0xe7 0xff 0xf3 0x00 0xff 0xbc ++ ++#CHECK: vgfmah %v0, %v0, %v0, %v0 ++0xe7 0x00 0x01 0x00 0x00 0xbc ++ ++#CHECK: vgfmah %v3, %v20, %v5, %v22 ++0xe7 0x34 0x51 0x00 0x65 0xbc ++ ++#CHECK: vgfmah %v31, %v31, %v31, %v31 ++0xe7 0xff 0xf1 0x00 0xff 0xbc ++ ++#CHECK: vgfmb %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x00 0xb4 ++ ++#CHECK: vgfmb %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x0a 0xb4 ++ ++#CHECK: vgfmb %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x0e 0xb4 ++ ++#CHECK: vgfmf %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x20 0xb4 ++ ++#CHECK: vgfmf %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x2a 0xb4 ++ ++#CHECK: vgfmf %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x2e 0xb4 ++ ++#CHECK: vgfmg %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x30 0xb4 ++ ++#CHECK: vgfmg %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x3a 0xb4 ++ ++#CHECK: vgfmg %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x3e 0xb4 ++ ++#CHECK: vgfmh %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x10 0xb4 ++ ++#CHECK: vgfmh %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x1a 0xb4 ++ ++#CHECK: vgfmh %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x1e 0xb4 ++ ++#CHECK: vgmb %v0, 0, 0 ++0xe7 0x00 0x00 0x00 0x00 0x46 ++ ++#CHECK: vgmb %v22, 55, 66 ++0xe7 0x60 0x37 0x42 0x08 0x46 ++ ++#CHECK: vgmb %v31, 255, 255 ++0xe7 0xf0 0xff 0xff 0x08 0x46 ++ ++#CHECK: vgmf %v0, 0, 0 ++0xe7 0x00 0x00 0x00 0x20 0x46 ++ ++#CHECK: vgmf %v22, 55, 66 ++0xe7 0x60 0x37 0x42 0x28 0x46 ++ ++#CHECK: vgmf %v31, 255, 255 ++0xe7 0xf0 0xff 0xff 0x28 0x46 ++ ++#CHECK: vgmg %v0, 0, 0 ++0xe7 0x00 0x00 0x00 0x30 0x46 ++ ++#CHECK: vgmg %v22, 55, 66 ++0xe7 0x60 0x37 0x42 0x38 0x46 ++ ++#CHECK: vgmg %v31, 255, 255 ++0xe7 0xf0 0xff 0xff 0x38 0x46 ++ ++#CHECK: vgmh %v0, 0, 0 ++0xe7 0x00 0x00 0x00 0x10 0x46 ++ ++#CHECK: vgmh %v22, 55, 66 ++0xe7 0x60 0x37 0x42 0x18 0x46 ++ ++#CHECK: vgmh %v31, 255, 255 ++0xe7 0xf0 0xff 0xff 0x18 0x46 ++ ++#CHECK: vl %v0, 0 ++0xe7 0x00 0x00 0x00 0x00 0x06 ++ ++#CHECK: vl %v17, 2475(%r7,%r8) ++0xe7 0x17 0x89 0xab 0x08 0x06 ++ ++#CHECK: vl %v31, 4095(%r15,%r15) ++0xe7 0xff 0xff 0xff 0x08 0x06 ++ ++#CHECK: vlbb %v0, 0, 0 ++0xe7 0x00 0x00 0x00 0x00 0x07 ++ ++#CHECK: vlbb %v17, 2475(%r7,%r8), 12 ++0xe7 0x17 0x89 0xab 0xc8 0x07 ++ ++#CHECK: vlbb %v31, 4095(%r15,%r15), 15 ++0xe7 0xff 0xff 0xff 0xf8 0x07 ++ ++#CHECK: vlcb %v0, %v0 ++0xe7 0x00 0x00 0x00 0x00 0xde ++ ++#CHECK: vlcb %v19, %v14 ++0xe7 0x3e 0x00 0x00 0x08 0xde ++ ++#CHECK: vlcb %v31, %v31 ++0xe7 0xff 0x00 0x00 0x0c 0xde ++ ++#CHECK: vlcf %v0, %v0 ++0xe7 0x00 0x00 0x00 0x20 0xde ++ ++#CHECK: vlcf %v19, %v14 ++0xe7 0x3e 0x00 0x00 0x28 0xde ++ ++#CHECK: vlcf %v31, %v31 ++0xe7 0xff 0x00 0x00 0x2c 0xde ++ ++#CHECK: vlcg %v0, %v0 ++0xe7 0x00 0x00 0x00 0x30 0xde ++ ++#CHECK: vlcg %v19, %v14 ++0xe7 0x3e 0x00 0x00 0x38 0xde ++ ++#CHECK: vlcg %v31, %v31 ++0xe7 0xff 0x00 0x00 0x3c 0xde ++ ++#CHECK: vlch %v0, %v0 ++0xe7 0x00 0x00 0x00 0x10 0xde ++ ++#CHECK: vlch %v19, %v14 ++0xe7 0x3e 0x00 0x00 0x18 0xde ++ ++#CHECK: vlch %v31, %v31 ++0xe7 0xff 0x00 0x00 0x1c 0xde ++ ++#CHECK: vldeb %v0, %v0 ++0xe7 0x00 0x00 0x00 0x20 0xc4 ++ ++#CHECK: vldeb %v19, %v14 ++0xe7 0x3e 0x00 0x00 0x28 0xc4 ++ ++#CHECK: vldeb %v31, %v31 ++0xe7 0xff 0x00 0x00 0x2c 0xc4 ++ ++#CHECK: vleb %v0, 0, 0 ++0xe7 0x00 0x00 0x00 0x00 0x00 ++ ++#CHECK: vleb %v17, 2475(%r7,%r8), 12 ++0xe7 0x17 0x89 0xab 0xc8 0x00 ++ ++#CHECK: vleb %v31, 4095(%r15,%r15), 15 ++0xe7 0xff 0xff 0xff 0xf8 0x00 ++ ++#CHECK: vledb %v0, %v0, 0, 0 ++0xe7 0x00 0x00 0x00 0x30 0xc5 ++ ++#CHECK: vledb %v19, %v14, 4, 10 ++0xe7 0x3e 0x00 0xa4 0x38 0xc5 ++ ++#CHECK: vledb %v31, %v31, 7, 15 ++0xe7 0xff 0x00 0xf7 0x3c 0xc5 ++ ++#CHECK: vlef %v0, 0, 0 ++0xe7 0x00 0x00 0x00 0x00 0x03 ++ ++#CHECK: vlef %v17, 2475(%r7,%r8), 2 ++0xe7 0x17 0x89 0xab 0x28 0x03 ++ ++#CHECK: vlef %v31, 4095(%r15,%r15), 3 ++0xe7 0xff 0xff 0xff 0x38 0x03 ++ ++#CHECK: vleg %v0, 0, 0 ++0xe7 0x00 0x00 0x00 0x00 0x02 ++ ++#CHECK: vleg %v17, 2475(%r7,%r8), 1 ++0xe7 0x17 0x89 0xab 0x18 0x02 ++ ++#CHECK: vleg %v31, 4095(%r15,%r15), 1 ++0xe7 0xff 0xff 0xff 0x18 0x02 ++ ++#CHECK: vleh %v0, 0, 0 ++0xe7 0x00 0x00 0x00 0x00 0x01 ++ ++#CHECK: vleh %v17, 2475(%r7,%r8), 5 ++0xe7 0x17 0x89 0xab 0x58 0x01 ++ ++#CHECK: vleh %v31, 4095(%r15,%r15), 7 ++0xe7 0xff 0xff 0xff 0x78 0x01 ++ ++#CHECK: vleib %v0, 0, 0 ++0xe7 0x00 0x00 0x00 0x00 0x40 ++ ++#CHECK: vleib %v23, -30293, 12 ++0xe7 0x70 0x89 0xab 0xc8 0x40 ++ ++#CHECK: vleib %v31, -1, 15 ++0xe7 0xf0 0xff 0xff 0xf8 0x40 ++ ++#CHECK: vleif %v0, 0, 0 ++0xe7 0x00 0x00 0x00 0x00 0x43 ++ ++#CHECK: vleif %v23, -30293, 2 ++0xe7 0x70 0x89 0xab 0x28 0x43 ++ ++#CHECK: vleif %v31, -1, 3 ++0xe7 0xf0 0xff 0xff 0x38 0x43 ++ ++#CHECK: vleig %v0, 0, 0 ++0xe7 0x00 0x00 0x00 0x00 0x42 ++ ++#CHECK: vleig %v23, -30293, 1 ++0xe7 0x70 0x89 0xab 0x18 0x42 ++ ++#CHECK: vleig %v31, -1, 1 ++0xe7 0xf0 0xff 0xff 0x18 0x42 ++ ++#CHECK: vleih %v0, 0, 0 ++0xe7 0x00 0x00 0x00 0x00 0x41 ++ ++#CHECK: vleih %v23, -30293, 5 ++0xe7 0x70 0x89 0xab 0x58 0x41 ++ ++#CHECK: vleih %v31, -1, 7 ++0xe7 0xf0 0xff 0xff 0x78 0x41 ++ ++#CHECK: vflcdb %v0, %v0 ++0xe7 0x00 0x00 0x00 0x30 0xcc ++ ++#CHECK: vflcdb %v19, %v14 ++0xe7 0x3e 0x00 0x00 0x38 0xcc ++ ++#CHECK: vflcdb %v31, %v31 ++0xe7 0xff 0x00 0x00 0x3c 0xcc ++ ++#CHECK: vflndb %v0, %v0 ++0xe7 0x00 0x00 0x10 0x30 0xcc ++ ++#CHECK: vflndb %v19, %v14 ++0xe7 0x3e 0x00 0x10 0x38 0xcc ++ ++#CHECK: vflndb %v31, %v31 ++0xe7 0xff 0x00 0x10 0x3c 0xcc ++ ++#CHECK: vflpdb %v0, %v0 ++0xe7 0x00 0x00 0x20 0x30 0xcc ++ ++#CHECK: vflpdb %v19, %v14 ++0xe7 0x3e 0x00 0x20 0x38 0xcc ++ ++#CHECK: vflpdb %v31, %v31 ++0xe7 0xff 0x00 0x20 0x3c 0xcc ++ ++#CHECK: vlgvb %r0, %v0, 0 ++0xe7 0x00 0x00 0x00 0x00 0x21 ++ ++#CHECK: vlgvb %r2, %v19, 1383(%r4) ++0xe7 0x23 0x45 0x67 0x04 0x21 ++ ++#CHECK: vlgvb %r15, %v31, 4095(%r15) ++0xe7 0xff 0xff 0xff 0x04 0x21 ++ ++#CHECK: vlgvf %r0, %v0, 0 ++0xe7 0x00 0x00 0x00 0x20 0x21 ++ ++#CHECK: vlgvf %r2, %v19, 1383(%r4) ++0xe7 0x23 0x45 0x67 0x24 0x21 ++ ++#CHECK: vlgvf %r15, %v31, 4095(%r15) ++0xe7 0xff 0xff 0xff 0x24 0x21 ++ ++#CHECK: vlgvg %r0, %v0, 0 ++0xe7 0x00 0x00 0x00 0x30 0x21 ++ ++#CHECK: vlgvg %r2, %v19, 1383(%r4) ++0xe7 0x23 0x45 0x67 0x34 0x21 ++ ++#CHECK: vlgvg %r15, %v31, 4095(%r15) ++0xe7 0xff 0xff 0xff 0x34 0x21 ++ ++#CHECK: vlgvh %r0, %v0, 0 ++0xe7 0x00 0x00 0x00 0x10 0x21 ++ ++#CHECK: vlgvh %r2, %v19, 1383(%r4) ++0xe7 0x23 0x45 0x67 0x14 0x21 ++ ++#CHECK: vlgvh %r15, %v31, 4095(%r15) ++0xe7 0xff 0xff 0xff 0x14 0x21 ++ ++#CHECK: vfsqdb %v0, %v0 ++0xe7 0x00 0x00 0x00 0x30 0xce ++ ++#CHECK: vfsqdb %v19, %v14 ++0xe7 0x3e 0x00 0x00 0x38 0xce ++ ++#CHECK: vfsqdb %v31, %v31 ++0xe7 0xff 0x00 0x00 0x3c 0xce ++ ++#CHECK: vftcidb %v0, %v0, 0 ++0xe7 0x00 0x00 0x00 0x30 0x4a ++ ++#CHECK: vftcidb %v19, %v4, 1383 ++0xe7 0x34 0x56 0x70 0x38 0x4a ++ ++#CHECK: vftcidb %v31, %v31, 4095 ++0xe7 0xff 0xff 0xf0 0x3c 0x4a ++ ++#CHECK: vll %v0, %r0, 0 ++0xe7 0x00 0x00 0x00 0x00 0x37 ++ ++#CHECK: vll %v18, %r3, 1383(%r4) ++0xe7 0x23 0x45 0x67 0x08 0x37 ++ ++#CHECK: vll %v31, %r15, 4095(%r15) ++0xe7 0xff 0xff 0xff 0x08 0x37 ++ ++#CHECK: vllezb %v0, 0 ++0xe7 0x00 0x00 0x00 0x00 0x04 ++ ++#CHECK: vllezb %v17, 2475(%r7,%r8) ++0xe7 0x17 0x89 0xab 0x08 0x04 ++ ++#CHECK: vllezb %v31, 4095(%r15,%r15) ++0xe7 0xff 0xff 0xff 0x08 0x04 ++ ++#CHECK: vllezf %v0, 0 ++0xe7 0x00 0x00 0x00 0x20 0x04 ++ ++#CHECK: vllezf %v17, 2475(%r7,%r8) ++0xe7 0x17 0x89 0xab 0x28 0x04 ++ ++#CHECK: vllezf %v31, 4095(%r15,%r15) ++0xe7 0xff 0xff 0xff 0x28 0x04 ++ ++#CHECK: vllezg %v0, 0 ++0xe7 0x00 0x00 0x00 0x30 0x04 ++ ++#CHECK: vllezg %v17, 2475(%r7,%r8) ++0xe7 0x17 0x89 0xab 0x38 0x04 ++ ++#CHECK: vllezg %v31, 4095(%r15,%r15) ++0xe7 0xff 0xff 0xff 0x38 0x04 ++ ++#CHECK: vllezh %v0, 0 ++0xe7 0x00 0x00 0x00 0x10 0x04 ++ ++#CHECK: vllezh %v17, 2475(%r7,%r8) ++0xe7 0x17 0x89 0xab 0x18 0x04 ++ ++#CHECK: vllezh %v31, 4095(%r15,%r15) ++0xe7 0xff 0xff 0xff 0x18 0x04 ++ ++#CHECK: vlm %v0, %v0, 0 ++0xe7 0x00 0x00 0x00 0x00 0x36 ++ ++#CHECK: vlm %v12, %v18, 1110(%r3) ++0xe7 0xc2 0x34 0x56 0x04 0x36 ++ ++#CHECK: vlm %v31, %v31, 4095(%r15) ++0xe7 0xff 0xff 0xff 0x0c 0x36 ++ ++#CHECK: vlpb %v0, %v0 ++0xe7 0x00 0x00 0x00 0x00 0xdf ++ ++#CHECK: vlpb %v19, %v14 ++0xe7 0x3e 0x00 0x00 0x08 0xdf ++ ++#CHECK: vlpb %v31, %v31 ++0xe7 0xff 0x00 0x00 0x0c 0xdf ++ ++#CHECK: vlpf %v0, %v0 ++0xe7 0x00 0x00 0x00 0x20 0xdf ++ ++#CHECK: vlpf %v19, %v14 ++0xe7 0x3e 0x00 0x00 0x28 0xdf ++ ++#CHECK: vlpf %v31, %v31 ++0xe7 0xff 0x00 0x00 0x2c 0xdf ++ ++#CHECK: vlpg %v0, %v0 ++0xe7 0x00 0x00 0x00 0x30 0xdf ++ ++#CHECK: vlpg %v19, %v14 ++0xe7 0x3e 0x00 0x00 0x38 0xdf ++ ++#CHECK: vlpg %v31, %v31 ++0xe7 0xff 0x00 0x00 0x3c 0xdf ++ ++#CHECK: vlph %v0, %v0 ++0xe7 0x00 0x00 0x00 0x10 0xdf ++ ++#CHECK: vlph %v19, %v14 ++0xe7 0x3e 0x00 0x00 0x18 0xdf ++ ++#CHECK: vlph %v31, %v31 ++0xe7 0xff 0x00 0x00 0x1c 0xdf ++ ++#CHECK: vlr %v0, %v0 ++0xe7 0x00 0x00 0x00 0x00 0x56 ++ ++#CHECK: vlr %v19, %v14 ++0xe7 0x3e 0x00 0x00 0x08 0x56 ++ ++#CHECK: vlr %v31, %v31 ++0xe7 0xff 0x00 0x00 0x0c 0x56 ++ ++#CHECK: vlrepb %v0, 0 ++0xe7 0x00 0x00 0x00 0x00 0x05 ++ ++#CHECK: vlrepb %v17, 2475(%r7,%r8) ++0xe7 0x17 0x89 0xab 0x08 0x05 ++ ++#CHECK: vlrepb %v31, 4095(%r15,%r15) ++0xe7 0xff 0xff 0xff 0x08 0x05 ++ ++#CHECK: vlrepf %v0, 0 ++0xe7 0x00 0x00 0x00 0x20 0x05 ++ ++#CHECK: vlrepf %v17, 2475(%r7,%r8) ++0xe7 0x17 0x89 0xab 0x28 0x05 ++ ++#CHECK: vlrepf %v31, 4095(%r15,%r15) ++0xe7 0xff 0xff 0xff 0x28 0x05 ++ ++#CHECK: vlrepg %v0, 0 ++0xe7 0x00 0x00 0x00 0x30 0x05 ++ ++#CHECK: vlrepg %v17, 2475(%r7,%r8) ++0xe7 0x17 0x89 0xab 0x38 0x05 ++ ++#CHECK: vlrepg %v31, 4095(%r15,%r15) ++0xe7 0xff 0xff 0xff 0x38 0x05 ++ ++#CHECK: vlreph %v0, 0 ++0xe7 0x00 0x00 0x00 0x10 0x05 ++ ++#CHECK: vlreph %v17, 2475(%r7,%r8) ++0xe7 0x17 0x89 0xab 0x18 0x05 ++ ++#CHECK: vlreph %v31, 4095(%r15,%r15) ++0xe7 0xff 0xff 0xff 0x18 0x05 ++ ++#CHECK: vlvgb %v0, %r0, 0 ++0xe7 0x00 0x00 0x00 0x00 0x22 ++ ++#CHECK: vlvgb %v18, %r3, 1383(%r4) ++0xe7 0x23 0x45 0x67 0x08 0x22 ++ ++#CHECK: vlvgb %v31, %r15, 4095(%r15) ++0xe7 0xff 0xff 0xff 0x08 0x22 ++ ++#CHECK: vlvgf %v0, %r0, 0 ++0xe7 0x00 0x00 0x00 0x20 0x22 ++ ++#CHECK: vlvgf %v18, %r3, 1383(%r4) ++0xe7 0x23 0x45 0x67 0x28 0x22 ++ ++#CHECK: vlvgf %v31, %r15, 4095(%r15) ++0xe7 0xff 0xff 0xff 0x28 0x22 ++ ++#CHECK: vlvgg %v0, %r0, 0 ++0xe7 0x00 0x00 0x00 0x30 0x22 ++ ++#CHECK: vlvgg %v18, %r3, 1383(%r4) ++0xe7 0x23 0x45 0x67 0x38 0x22 ++ ++#CHECK: vlvgg %v31, %r15, 4095(%r15) ++0xe7 0xff 0xff 0xff 0x38 0x22 ++ ++#CHECK: vlvgh %v0, %r0, 0 ++0xe7 0x00 0x00 0x00 0x10 0x22 ++ ++#CHECK: vlvgh %v18, %r3, 1383(%r4) ++0xe7 0x23 0x45 0x67 0x18 0x22 ++ ++#CHECK: vlvgh %v31, %r15, 4095(%r15) ++0xe7 0xff 0xff 0xff 0x18 0x22 ++ ++#CHECK: vlvgp %v0, %r0, %r0 ++0xe7 0x00 0x00 0x00 0x00 0x62 ++ ++#CHECK: vlvgp %v18, %r3, %r4 ++0xe7 0x23 0x40 0x00 0x08 0x62 ++ ++#CHECK: vlvgp %v31, %r15, %r15 ++0xe7 0xff 0xf0 0x00 0x08 0x62 ++ ++#CHECK: vmaeb %v0, %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x00 0xae ++ ++#CHECK: vmaeb %v3, %v20, %v5, %v22 ++0xe7 0x34 0x50 0x00 0x65 0xae ++ ++#CHECK: vmaeb %v31, %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0xff 0xae ++ ++#CHECK: vmaef %v0, %v0, %v0, %v0 ++0xe7 0x00 0x02 0x00 0x00 0xae ++ ++#CHECK: vmaef %v3, %v20, %v5, %v22 ++0xe7 0x34 0x52 0x00 0x65 0xae ++ ++#CHECK: vmaef %v31, %v31, %v31, %v31 ++0xe7 0xff 0xf2 0x00 0xff 0xae ++ ++#CHECK: vmaeh %v0, %v0, %v0, %v0 ++0xe7 0x00 0x01 0x00 0x00 0xae ++ ++#CHECK: vmaeh %v3, %v20, %v5, %v22 ++0xe7 0x34 0x51 0x00 0x65 0xae ++ ++#CHECK: vmaeh %v31, %v31, %v31, %v31 ++0xe7 0xff 0xf1 0x00 0xff 0xae ++ ++#CHECK: vmahb %v0, %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x00 0xab ++ ++#CHECK: vmahb %v3, %v20, %v5, %v22 ++0xe7 0x34 0x50 0x00 0x65 0xab ++ ++#CHECK: vmahb %v31, %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0xff 0xab ++ ++#CHECK: vmahf %v0, %v0, %v0, %v0 ++0xe7 0x00 0x02 0x00 0x00 0xab ++ ++#CHECK: vmahf %v3, %v20, %v5, %v22 ++0xe7 0x34 0x52 0x00 0x65 0xab ++ ++#CHECK: vmahf %v31, %v31, %v31, %v31 ++0xe7 0xff 0xf2 0x00 0xff 0xab ++ ++#CHECK: vmahh %v0, %v0, %v0, %v0 ++0xe7 0x00 0x01 0x00 0x00 0xab ++ ++#CHECK: vmahh %v3, %v20, %v5, %v22 ++0xe7 0x34 0x51 0x00 0x65 0xab ++ ++#CHECK: vmahh %v31, %v31, %v31, %v31 ++0xe7 0xff 0xf1 0x00 0xff 0xab ++ ++#CHECK: vmalb %v0, %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x00 0xaa ++ ++#CHECK: vmalb %v3, %v20, %v5, %v22 ++0xe7 0x34 0x50 0x00 0x65 0xaa ++ ++#CHECK: vmalb %v31, %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0xff 0xaa ++ ++#CHECK: vmaleb %v0, %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x00 0xac ++ ++#CHECK: vmaleb %v3, %v20, %v5, %v22 ++0xe7 0x34 0x50 0x00 0x65 0xac ++ ++#CHECK: vmaleb %v31, %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0xff 0xac ++ ++#CHECK: vmalef %v0, %v0, %v0, %v0 ++0xe7 0x00 0x02 0x00 0x00 0xac ++ ++#CHECK: vmalef %v3, %v20, %v5, %v22 ++0xe7 0x34 0x52 0x00 0x65 0xac ++ ++#CHECK: vmalef %v31, %v31, %v31, %v31 ++0xe7 0xff 0xf2 0x00 0xff 0xac ++ ++#CHECK: vmaleh %v0, %v0, %v0, %v0 ++0xe7 0x00 0x01 0x00 0x00 0xac ++ ++#CHECK: vmaleh %v3, %v20, %v5, %v22 ++0xe7 0x34 0x51 0x00 0x65 0xac ++ ++#CHECK: vmaleh %v31, %v31, %v31, %v31 ++0xe7 0xff 0xf1 0x00 0xff 0xac ++ ++#CHECK: vmalf %v0, %v0, %v0, %v0 ++0xe7 0x00 0x02 0x00 0x00 0xaa ++ ++#CHECK: vmalf %v3, %v20, %v5, %v22 ++0xe7 0x34 0x52 0x00 0x65 0xaa ++ ++#CHECK: vmalf %v31, %v31, %v31, %v31 ++0xe7 0xff 0xf2 0x00 0xff 0xaa ++ ++#CHECK: vmalhb %v0, %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x00 0xa9 ++ ++#CHECK: vmalhb %v3, %v20, %v5, %v22 ++0xe7 0x34 0x50 0x00 0x65 0xa9 ++ ++#CHECK: vmalhb %v31, %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0xff 0xa9 ++ ++#CHECK: vmalhf %v0, %v0, %v0, %v0 ++0xe7 0x00 0x02 0x00 0x00 0xa9 ++ ++#CHECK: vmalhf %v3, %v20, %v5, %v22 ++0xe7 0x34 0x52 0x00 0x65 0xa9 ++ ++#CHECK: vmalhf %v31, %v31, %v31, %v31 ++0xe7 0xff 0xf2 0x00 0xff 0xa9 ++ ++#CHECK: vmalhh %v0, %v0, %v0, %v0 ++0xe7 0x00 0x01 0x00 0x00 0xa9 ++ ++#CHECK: vmalhh %v3, %v20, %v5, %v22 ++0xe7 0x34 0x51 0x00 0x65 0xa9 ++ ++#CHECK: vmalhh %v31, %v31, %v31, %v31 ++0xe7 0xff 0xf1 0x00 0xff 0xa9 ++ ++#CHECK: vmalhw %v0, %v0, %v0, %v0 ++0xe7 0x00 0x01 0x00 0x00 0xaa ++ ++#CHECK: vmalhw %v3, %v20, %v5, %v22 ++0xe7 0x34 0x51 0x00 0x65 0xaa ++ ++#CHECK: vmalhw %v31, %v31, %v31, %v31 ++0xe7 0xff 0xf1 0x00 0xff 0xaa ++ ++#CHECK: vmalob %v0, %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x00 0xad ++ ++#CHECK: vmalob %v3, %v20, %v5, %v22 ++0xe7 0x34 0x50 0x00 0x65 0xad ++ ++#CHECK: vmalob %v31, %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0xff 0xad ++ ++#CHECK: vmalof %v0, %v0, %v0, %v0 ++0xe7 0x00 0x02 0x00 0x00 0xad ++ ++#CHECK: vmalof %v3, %v20, %v5, %v22 ++0xe7 0x34 0x52 0x00 0x65 0xad ++ ++#CHECK: vmalof %v31, %v31, %v31, %v31 ++0xe7 0xff 0xf2 0x00 0xff 0xad ++ ++#CHECK: vmaloh %v0, %v0, %v0, %v0 ++0xe7 0x00 0x01 0x00 0x00 0xad ++ ++#CHECK: vmaloh %v3, %v20, %v5, %v22 ++0xe7 0x34 0x51 0x00 0x65 0xad ++ ++#CHECK: vmaloh %v31, %v31, %v31, %v31 ++0xe7 0xff 0xf1 0x00 0xff 0xad ++ ++#CHECK: vmaob %v0, %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x00 0xaf ++ ++#CHECK: vmaob %v3, %v20, %v5, %v22 ++0xe7 0x34 0x50 0x00 0x65 0xaf ++ ++#CHECK: vmaob %v31, %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0xff 0xaf ++ ++#CHECK: vmaof %v0, %v0, %v0, %v0 ++0xe7 0x00 0x02 0x00 0x00 0xaf ++ ++#CHECK: vmaof %v3, %v20, %v5, %v22 ++0xe7 0x34 0x52 0x00 0x65 0xaf ++ ++#CHECK: vmaof %v31, %v31, %v31, %v31 ++0xe7 0xff 0xf2 0x00 0xff 0xaf ++ ++#CHECK: vmaoh %v0, %v0, %v0, %v0 ++0xe7 0x00 0x01 0x00 0x00 0xaf ++ ++#CHECK: vmaoh %v3, %v20, %v5, %v22 ++0xe7 0x34 0x51 0x00 0x65 0xaf ++ ++#CHECK: vmaoh %v31, %v31, %v31, %v31 ++0xe7 0xff 0xf1 0x00 0xff 0xaf ++ ++#CHECK: vmeb %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x00 0xa6 ++ ++#CHECK: vmeb %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x0a 0xa6 ++ ++#CHECK: vmeb %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x0e 0xa6 ++ ++#CHECK: vmef %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x20 0xa6 ++ ++#CHECK: vmef %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x2a 0xa6 ++ ++#CHECK: vmef %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x2e 0xa6 ++ ++#CHECK: vmeh %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x10 0xa6 ++ ++#CHECK: vmeh %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x1a 0xa6 ++ ++#CHECK: vmeh %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x1e 0xa6 ++ ++#CHECK: vmhb %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x00 0xa3 ++ ++#CHECK: vmhb %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x0a 0xa3 ++ ++#CHECK: vmhb %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x0e 0xa3 ++ ++#CHECK: vmhf %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x20 0xa3 ++ ++#CHECK: vmhf %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x2a 0xa3 ++ ++#CHECK: vmhf %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x2e 0xa3 ++ ++#CHECK: vmhh %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x10 0xa3 ++ ++#CHECK: vmhh %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x1a 0xa3 ++ ++#CHECK: vmhh %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x1e 0xa3 ++ ++#CHECK: vmlb %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x00 0xa2 ++ ++#CHECK: vmlb %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x0a 0xa2 ++ ++#CHECK: vmlb %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x0e 0xa2 ++ ++#CHECK: vmlf %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x20 0xa2 ++ ++#CHECK: vmlf %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x2a 0xa2 ++ ++#CHECK: vmlf %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x2e 0xa2 ++ ++#CHECK: vmleb %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x00 0xa4 ++ ++#CHECK: vmleb %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x0a 0xa4 ++ ++#CHECK: vmleb %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x0e 0xa4 ++ ++#CHECK: vmlef %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x20 0xa4 ++ ++#CHECK: vmlef %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x2a 0xa4 ++ ++#CHECK: vmlef %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x2e 0xa4 ++ ++#CHECK: vmleh %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x10 0xa4 ++ ++#CHECK: vmleh %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x1a 0xa4 ++ ++#CHECK: vmleh %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x1e 0xa4 ++ ++#CHECK: vmlhb %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x00 0xa1 ++ ++#CHECK: vmlhb %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x0a 0xa1 ++ ++#CHECK: vmlhb %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x0e 0xa1 ++ ++#CHECK: vmlhf %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x20 0xa1 ++ ++#CHECK: vmlhf %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x2a 0xa1 ++ ++#CHECK: vmlhf %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x2e 0xa1 ++ ++#CHECK: vmlhh %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x10 0xa1 ++ ++#CHECK: vmlhh %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x1a 0xa1 ++ ++#CHECK: vmlhh %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x1e 0xa1 ++ ++#CHECK: vmlhw %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x10 0xa2 ++ ++#CHECK: vmlhw %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x1a 0xa2 ++ ++#CHECK: vmlhw %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x1e 0xa2 ++ ++#CHECK: vmlob %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x00 0xa5 ++ ++#CHECK: vmlob %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x0a 0xa5 ++ ++#CHECK: vmlob %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x0e 0xa5 ++ ++#CHECK: vmlof %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x20 0xa5 ++ ++#CHECK: vmlof %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x2a 0xa5 ++ ++#CHECK: vmlof %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x2e 0xa5 ++ ++#CHECK: vmloh %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x10 0xa5 ++ ++#CHECK: vmloh %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x1a 0xa5 ++ ++#CHECK: vmloh %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x1e 0xa5 ++ ++#CHECK: vmnb %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x00 0xfe ++ ++#CHECK: vmnb %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x0a 0xfe ++ ++#CHECK: vmnb %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x0e 0xfe ++ ++#CHECK: vmnf %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x20 0xfe ++ ++#CHECK: vmnf %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x2a 0xfe ++ ++#CHECK: vmnf %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x2e 0xfe ++ ++#CHECK: vmng %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x30 0xfe ++ ++#CHECK: vmng %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x3a 0xfe ++ ++#CHECK: vmng %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x3e 0xfe ++ ++#CHECK: vmnh %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x10 0xfe ++ ++#CHECK: vmnh %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x1a 0xfe ++ ++#CHECK: vmnh %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x1e 0xfe ++ ++#CHECK: vmnlb %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x00 0xfc ++ ++#CHECK: vmnlb %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x0a 0xfc ++ ++#CHECK: vmnlb %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x0e 0xfc ++ ++#CHECK: vmnlf %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x20 0xfc ++ ++#CHECK: vmnlf %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x2a 0xfc ++ ++#CHECK: vmnlf %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x2e 0xfc ++ ++#CHECK: vmnlg %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x30 0xfc ++ ++#CHECK: vmnlg %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x3a 0xfc ++ ++#CHECK: vmnlg %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x3e 0xfc ++ ++#CHECK: vmnlh %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x10 0xfc ++ ++#CHECK: vmnlh %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x1a 0xfc ++ ++#CHECK: vmnlh %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x1e 0xfc ++ ++#CHECK: vmob %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x00 0xa7 ++ ++#CHECK: vmob %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x0a 0xa7 ++ ++#CHECK: vmob %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x0e 0xa7 ++ ++#CHECK: vmof %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x20 0xa7 ++ ++#CHECK: vmof %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x2a 0xa7 ++ ++#CHECK: vmof %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x2e 0xa7 ++ ++#CHECK: vmoh %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x10 0xa7 ++ ++#CHECK: vmoh %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x1a 0xa7 ++ ++#CHECK: vmoh %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x1e 0xa7 ++ ++#CHECK: vmrhb %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x00 0x61 ++ ++#CHECK: vmrhb %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x0a 0x61 ++ ++#CHECK: vmrhb %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x0e 0x61 ++ ++#CHECK: vmrhf %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x20 0x61 ++ ++#CHECK: vmrhf %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x2a 0x61 ++ ++#CHECK: vmrhf %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x2e 0x61 ++ ++#CHECK: vmrhg %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x30 0x61 ++ ++#CHECK: vmrhg %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x3a 0x61 ++ ++#CHECK: vmrhg %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x3e 0x61 ++ ++#CHECK: vmrhh %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x10 0x61 ++ ++#CHECK: vmrhh %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x1a 0x61 ++ ++#CHECK: vmrhh %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x1e 0x61 ++ ++#CHECK: vmrlb %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x00 0x60 ++ ++#CHECK: vmrlb %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x0a 0x60 ++ ++#CHECK: vmrlb %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x0e 0x60 ++ ++#CHECK: vmrlf %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x20 0x60 ++ ++#CHECK: vmrlf %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x2a 0x60 ++ ++#CHECK: vmrlf %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x2e 0x60 ++ ++#CHECK: vmrlg %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x30 0x60 ++ ++#CHECK: vmrlg %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x3a 0x60 ++ ++#CHECK: vmrlg %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x3e 0x60 ++ ++#CHECK: vmrlh %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x10 0x60 ++ ++#CHECK: vmrlh %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x1a 0x60 ++ ++#CHECK: vmrlh %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x1e 0x60 ++ ++#CHECK: vmxb %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x00 0xff ++ ++#CHECK: vmxb %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x0a 0xff ++ ++#CHECK: vmxb %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x0e 0xff ++ ++#CHECK: vmxf %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x20 0xff ++ ++#CHECK: vmxf %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x2a 0xff ++ ++#CHECK: vmxf %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x2e 0xff ++ ++#CHECK: vmxg %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x30 0xff ++ ++#CHECK: vmxg %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x3a 0xff ++ ++#CHECK: vmxg %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x3e 0xff ++ ++#CHECK: vmxh %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x10 0xff ++ ++#CHECK: vmxh %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x1a 0xff ++ ++#CHECK: vmxh %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x1e 0xff ++ ++#CHECK: vmxlb %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x00 0xfd ++ ++#CHECK: vmxlb %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x0a 0xfd ++ ++#CHECK: vmxlb %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x0e 0xfd ++ ++#CHECK: vmxlf %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x20 0xfd ++ ++#CHECK: vmxlf %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x2a 0xfd ++ ++#CHECK: vmxlf %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x2e 0xfd ++ ++#CHECK: vmxlg %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x30 0xfd ++ ++#CHECK: vmxlg %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x3a 0xfd ++ ++#CHECK: vmxlg %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x3e 0xfd ++ ++#CHECK: vmxlh %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x10 0xfd ++ ++#CHECK: vmxlh %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x1a 0xfd ++ ++#CHECK: vmxlh %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x1e 0xfd ++ ++#CHECK: vn %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x00 0x68 ++ ++#CHECK: vn %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x0a 0x68 ++ ++#CHECK: vn %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x0e 0x68 ++ ++#CHECK: vnc %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x00 0x69 ++ ++#CHECK: vnc %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x0a 0x69 ++ ++#CHECK: vnc %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x0e 0x69 ++ ++#CHECK: vno %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x00 0x6b ++ ++#CHECK: vno %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x0a 0x6b ++ ++#CHECK: vno %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x0e 0x6b ++ ++#CHECK: vo %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x00 0x6a ++ ++#CHECK: vo %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x0a 0x6a ++ ++#CHECK: vo %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x0e 0x6a ++ ++#CHECK: vpdi %v0, %v0, %v0, 0 ++0xe7 0x00 0x00 0x00 0x00 0x84 ++ ++#CHECK: vpdi %v3, %v20, %v5, 4 ++0xe7 0x34 0x50 0x00 0x44 0x84 ++ ++#CHECK: vpdi %v31, %v31, %v31, 15 ++0xe7 0xff 0xf0 0x00 0xfe 0x84 ++ ++#CHECK: vperm %v0, %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x00 0x8c ++ ++#CHECK: vperm %v3, %v20, %v5, %v22 ++0xe7 0x34 0x50 0x00 0x65 0x8c ++ ++#CHECK: vperm %v31, %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0xff 0x8c ++ ++#CHECK: vpkf %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x20 0x94 ++ ++#CHECK: vpkf %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x2a 0x94 ++ ++#CHECK: vpkf %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x2e 0x94 ++ ++#CHECK: vpkg %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x30 0x94 ++ ++#CHECK: vpkg %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x3a 0x94 ++ ++#CHECK: vpkg %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x3e 0x94 ++ ++#CHECK: vpkh %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x10 0x94 ++ ++#CHECK: vpkh %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x1a 0x94 ++ ++#CHECK: vpkh %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x1e 0x94 ++ ++#CHECK: vpklsf %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x20 0x95 ++ ++#CHECK: vpklsf %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x2a 0x95 ++ ++#CHECK: vpklsfs %v7, %v24, %v9 ++0xe7 0x78 0x90 0x10 0x24 0x95 ++ ++#CHECK: vpklsf %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x2e 0x95 ++ ++#CHECK: vpklsg %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x30 0x95 ++ ++#CHECK: vpklsg %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x3a 0x95 ++ ++#CHECK: vpklsgs %v7, %v24, %v9 ++0xe7 0x78 0x90 0x10 0x34 0x95 ++ ++#CHECK: vpklsg %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x3e 0x95 ++ ++#CHECK: vpklsh %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x10 0x95 ++ ++#CHECK: vpklsh %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x1a 0x95 ++ ++#CHECK: vpklshs %v7, %v24, %v9 ++0xe7 0x78 0x90 0x10 0x14 0x95 ++ ++#CHECK: vpklsh %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x1e 0x95 ++ ++#CHECK: vpksf %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x20 0x97 ++ ++#CHECK: vpksf %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x2a 0x97 ++ ++#CHECK: vpksfs %v7, %v24, %v9 ++0xe7 0x78 0x90 0x10 0x24 0x97 ++ ++#CHECK: vpksf %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x2e 0x97 ++ ++#CHECK: vpksg %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x30 0x97 ++ ++#CHECK: vpksg %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x3a 0x97 ++ ++#CHECK: vpksgs %v7, %v24, %v9 ++0xe7 0x78 0x90 0x10 0x34 0x97 ++ ++#CHECK: vpksg %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x3e 0x97 ++ ++#CHECK: vpksh %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x10 0x97 ++ ++#CHECK: vpksh %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x1a 0x97 ++ ++#CHECK: vpkshs %v7, %v24, %v9 ++0xe7 0x78 0x90 0x10 0x14 0x97 ++ ++#CHECK: vpksh %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x1e 0x97 ++ ++#CHECK: vpopct %v0, %v0, 0 ++0xe7 0x00 0x00 0x00 0x00 0x50 ++ ++#CHECK: vpopct %v19, %v14, 0 ++0xe7 0x3e 0x00 0x00 0x08 0x50 ++ ++#CHECK: vpopct %v31, %v31 ++0xe7 0xff 0x00 0x00 0x0c 0x50 ++ ++#CHECK: vrepb %v0, %v0, 0 ++0xe7 0x00 0x00 0x00 0x00 0x4d ++ ++#CHECK: vrepb %v19, %v4, 22136 ++0xe7 0x34 0x56 0x78 0x08 0x4d ++ ++#CHECK: vrepb %v31, %v31, 65535 ++0xe7 0xff 0xff 0xff 0x0c 0x4d ++ ++#CHECK: vrepf %v0, %v0, 0 ++0xe7 0x00 0x00 0x00 0x20 0x4d ++ ++#CHECK: vrepf %v19, %v4, 22136 ++0xe7 0x34 0x56 0x78 0x28 0x4d ++ ++#CHECK: vrepf %v31, %v31, 65535 ++0xe7 0xff 0xff 0xff 0x2c 0x4d ++ ++#CHECK: vrepg %v0, %v0, 0 ++0xe7 0x00 0x00 0x00 0x30 0x4d ++ ++#CHECK: vrepg %v19, %v4, 22136 ++0xe7 0x34 0x56 0x78 0x38 0x4d ++ ++#CHECK: vrepg %v31, %v31, 65535 ++0xe7 0xff 0xff 0xff 0x3c 0x4d ++ ++#CHECK: vreph %v0, %v0, 0 ++0xe7 0x00 0x00 0x00 0x10 0x4d ++ ++#CHECK: vreph %v19, %v4, 22136 ++0xe7 0x34 0x56 0x78 0x18 0x4d ++ ++#CHECK: vreph %v31, %v31, 65535 ++0xe7 0xff 0xff 0xff 0x1c 0x4d ++ ++#CHECK: vrepib %v0, 0 ++0xe7 0x00 0x00 0x00 0x00 0x45 ++ ++#CHECK: vrepib %v23, -30293 ++0xe7 0x70 0x89 0xab 0x08 0x45 ++ ++#CHECK: vrepib %v31, -1 ++0xe7 0xf0 0xff 0xff 0x08 0x45 ++ ++#CHECK: vrepif %v0, 0 ++0xe7 0x00 0x00 0x00 0x20 0x45 ++ ++#CHECK: vrepif %v23, -30293 ++0xe7 0x70 0x89 0xab 0x28 0x45 ++ ++#CHECK: vrepif %v31, -1 ++0xe7 0xf0 0xff 0xff 0x28 0x45 ++ ++#CHECK: vrepig %v0, 0 ++0xe7 0x00 0x00 0x00 0x30 0x45 ++ ++#CHECK: vrepig %v23, -30293 ++0xe7 0x70 0x89 0xab 0x38 0x45 ++ ++#CHECK: vrepig %v31, -1 ++0xe7 0xf0 0xff 0xff 0x38 0x45 ++ ++#CHECK: vrepih %v0, 0 ++0xe7 0x00 0x00 0x00 0x10 0x45 ++ ++#CHECK: vrepih %v23, -30293 ++0xe7 0x70 0x89 0xab 0x18 0x45 ++ ++#CHECK: vrepih %v31, -1 ++0xe7 0xf0 0xff 0xff 0x18 0x45 ++ ++#CHECK: vsb %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x00 0xf7 ++ ++#CHECK: vsb %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x0a 0xf7 ++ ++#CHECK: vsb %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x0e 0xf7 ++ ++#CHECK: vsbiq %v0, %v0, %v0, %v0 ++0xe7 0x00 0x04 0x00 0x00 0xbf ++ ++#CHECK: vsbiq %v3, %v20, %v5, %v22 ++0xe7 0x34 0x54 0x00 0x65 0xbf ++ ++#CHECK: vsbiq %v31, %v31, %v31, %v31 ++0xe7 0xff 0xf4 0x00 0xff 0xbf ++ ++#CHECK: vsbcbiq %v0, %v0, %v0, %v0 ++0xe7 0x00 0x04 0x00 0x00 0xbd ++ ++#CHECK: vsbcbiq %v3, %v20, %v5, %v22 ++0xe7 0x34 0x54 0x00 0x65 0xbd ++ ++#CHECK: vsbcbiq %v31, %v31, %v31, %v31 ++0xe7 0xff 0xf4 0x00 0xff 0xbd ++ ++#CHECK: vscbib %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x00 0xf5 ++ ++#CHECK: vscbib %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x0a 0xf5 ++ ++#CHECK: vscbib %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x0e 0xf5 ++ ++#CHECK: vscbif %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x20 0xf5 ++ ++#CHECK: vscbif %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x2a 0xf5 ++ ++#CHECK: vscbif %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x2e 0xf5 ++ ++#CHECK: vscbig %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x30 0xf5 ++ ++#CHECK: vscbig %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x3a 0xf5 ++ ++#CHECK: vscbig %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x3e 0xf5 ++ ++#CHECK: vscbih %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x10 0xf5 ++ ++#CHECK: vscbih %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x1a 0xf5 ++ ++#CHECK: vscbih %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x1e 0xf5 ++ ++#CHECK: vscbiq %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x40 0xf5 ++ ++#CHECK: vscbiq %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x4a 0xf5 ++ ++#CHECK: vscbiq %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x4e 0xf5 ++ ++#CHECK: vscef %v0, 0(%v0), 0 ++0xe7 0x00 0x00 0x00 0x00 0x1b ++ ++#CHECK: vscef %v10, 1000(%v19,%r7), 2 ++0xe7 0xa3 0x73 0xe8 0x24 0x1b ++ ++#CHECK: vscef %v31, 4095(%v31,%r15), 3 ++0xe7 0xff 0xff 0xff 0x3c 0x1b ++ ++#CHECK: vsceg %v0, 0(%v0), 0 ++0xe7 0x00 0x00 0x00 0x00 0x1a ++ ++#CHECK: vsceg %v10, 1000(%v19,%r7), 1 ++0xe7 0xa3 0x73 0xe8 0x14 0x1a ++ ++#CHECK: vsceg %v31, 4095(%v31,%r15), 1 ++0xe7 0xff 0xff 0xff 0x1c 0x1a ++ ++#CHECK: vsegb %v0, %v0 ++0xe7 0x00 0x00 0x00 0x00 0x5f ++ ++#CHECK: vsegb %v19, %v14 ++0xe7 0x3e 0x00 0x00 0x08 0x5f ++ ++#CHECK: vsegb %v31, %v31 ++0xe7 0xff 0x00 0x00 0x0c 0x5f ++ ++#CHECK: vsegf %v0, %v0 ++0xe7 0x00 0x00 0x00 0x20 0x5f ++ ++#CHECK: vsegf %v19, %v14 ++0xe7 0x3e 0x00 0x00 0x28 0x5f ++ ++#CHECK: vsegf %v31, %v31 ++0xe7 0xff 0x00 0x00 0x2c 0x5f ++ ++#CHECK: vsegh %v0, %v0 ++0xe7 0x00 0x00 0x00 0x10 0x5f ++ ++#CHECK: vsegh %v19, %v14 ++0xe7 0x3e 0x00 0x00 0x18 0x5f ++ ++#CHECK: vsegh %v31, %v31 ++0xe7 0xff 0x00 0x00 0x1c 0x5f ++ ++#CHECK: vsel %v0, %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x00 0x8d ++ ++#CHECK: vsel %v3, %v20, %v5, %v22 ++0xe7 0x34 0x50 0x00 0x65 0x8d ++ ++#CHECK: vsel %v31, %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0xff 0x8d ++ ++#CHECK: vsf %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x20 0xf7 ++ ++#CHECK: vsf %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x2a 0xf7 ++ ++#CHECK: vsf %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x2e 0xf7 ++ ++#CHECK: vsg %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x30 0xf7 ++ ++#CHECK: vsg %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x3a 0xf7 ++ ++#CHECK: vsg %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x3e 0xf7 ++ ++#CHECK: vsh %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x10 0xf7 ++ ++#CHECK: vsh %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x1a 0xf7 ++ ++#CHECK: vsh %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x1e 0xf7 ++ ++#CHECK: vsl %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x00 0x74 ++ ++#CHECK: vsl %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x0a 0x74 ++ ++#CHECK: vsl %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x0e 0x74 ++ ++#CHECK: vslb %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x00 0x75 ++ ++#CHECK: vslb %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x0a 0x75 ++ ++#CHECK: vslb %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x0e 0x75 ++ ++#CHECK: vsldb %v0, %v0, %v0, 0 ++0xe7 0x00 0x00 0x00 0x00 0x77 ++ ++#CHECK: vsldb %v3, %v20, %v5, 103 ++0xe7 0x34 0x50 0x67 0x04 0x77 ++ ++#CHECK: vsldb %v31, %v31, %v31, 255 ++0xe7 0xff 0xf0 0xff 0x0e 0x77 ++ ++#CHECK: vsq %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x40 0xf7 ++ ++#CHECK: vsq %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x4a 0xf7 ++ ++#CHECK: vsq %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x4e 0xf7 ++ ++#CHECK: vsra %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x00 0x7e ++ ++#CHECK: vsra %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x0a 0x7e ++ ++#CHECK: vsra %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x0e 0x7e ++ ++#CHECK: vsrab %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x00 0x7f ++ ++#CHECK: vsrab %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x0a 0x7f ++ ++#CHECK: vsrab %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x0e 0x7f ++ ++#CHECK: vsrl %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x00 0x7c ++ ++#CHECK: vsrl %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x0a 0x7c ++ ++#CHECK: vsrl %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x0e 0x7c ++ ++#CHECK: vsrlb %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x00 0x7d ++ ++#CHECK: vsrlb %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x0a 0x7d ++ ++#CHECK: vsrlb %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x0e 0x7d ++ ++#CHECK: vst %v0, 0 ++0xe7 0x00 0x00 0x00 0x00 0x0E ++ ++#CHECK: vst %v17, 2475(%r7,%r8) ++0xe7 0x17 0x89 0xab 0x08 0x0E ++ ++#CHECK: vst %v31, 4095(%r15,%r15) ++0xe7 0xff 0xff 0xff 0x08 0x0E ++ ++#CHECK: vsteb %v0, 0, 0 ++0xe7 0x00 0x00 0x00 0x00 0x08 ++ ++#CHECK: vsteb %v17, 2475(%r7,%r8), 12 ++0xe7 0x17 0x89 0xab 0xc8 0x08 ++ ++#CHECK: vsteb %v31, 4095(%r15,%r15), 15 ++0xe7 0xff 0xff 0xff 0xf8 0x08 ++ ++#CHECK: vstef %v0, 0, 0 ++0xe7 0x00 0x00 0x00 0x00 0x0b ++ ++#CHECK: vstef %v17, 2475(%r7,%r8), 2 ++0xe7 0x17 0x89 0xab 0x28 0x0b ++ ++#CHECK: vstef %v31, 4095(%r15,%r15), 3 ++0xe7 0xff 0xff 0xff 0x38 0x0b ++ ++#CHECK: vsteg %v0, 0, 0 ++0xe7 0x00 0x00 0x00 0x00 0x0a ++ ++#CHECK: vsteg %v17, 2475(%r7,%r8), 1 ++0xe7 0x17 0x89 0xab 0x18 0x0a ++ ++#CHECK: vsteg %v31, 4095(%r15,%r15), 1 ++0xe7 0xff 0xff 0xff 0x18 0x0a ++ ++#CHECK: vsteh %v0, 0, 0 ++0xe7 0x00 0x00 0x00 0x00 0x09 ++ ++#CHECK: vsteh %v17, 2475(%r7,%r8), 5 ++0xe7 0x17 0x89 0xab 0x58 0x09 ++ ++#CHECK: vsteh %v31, 4095(%r15,%r15), 7 ++0xe7 0xff 0xff 0xff 0x78 0x09 ++ ++#CHECK: vstl %v0, %r0, 0 ++0xe7 0x00 0x00 0x00 0x00 0x3f ++ ++#CHECK: vstl %v18, %r3, 1383(%r4) ++0xe7 0x23 0x45 0x67 0x08 0x3f ++ ++#CHECK: vstl %v31, %r15, 4095(%r15) ++0xe7 0xff 0xff 0xff 0x08 0x3f ++ ++#CHECK: vstm %v0, %v0, 0 ++0xe7 0x00 0x00 0x00 0x00 0x3e ++ ++#CHECK: vstm %v12, %v18, 1110(%r3) ++0xe7 0xc2 0x34 0x56 0x04 0x3e ++ ++#CHECK: vstm %v31, %v31, 4095(%r15) ++0xe7 0xff 0xff 0xff 0x0c 0x3e ++ ++#CHECK: vstrcb %v0, %v0, %v0, %v0, 0 ++0xe7 0x00 0x00 0x00 0x00 0x8a ++ ++#CHECK: vstrcb %v0, %v0, %v0, %v0, 12 ++0xe7 0x00 0x00 0xc0 0x00 0x8a ++ ++#CHECK: vstrcb %v18, %v3, %v20, %v5, 0 ++0xe7 0x23 0x40 0x00 0x5a 0x8a ++ ++#CHECK: vstrcb %v31, %v31, %v31, %v31, 4 ++0xe7 0xff 0xf0 0x40 0xff 0x8a ++ ++#CHECK: vstrcbs %v31, %v31, %v31, %v31, 8 ++0xe7 0xff 0xf0 0x90 0xff 0x8a ++ ++#CHECK: vstrczb %v31, %v31, %v31, %v31, 4 ++0xe7 0xff 0xf0 0x60 0xff 0x8a ++ ++#CHECK: vstrczbs %v31, %v31, %v31, %v31, 8 ++0xe7 0xff 0xf0 0xb0 0xff 0x8a ++ ++#CHECK: vstrcf %v0, %v0, %v0, %v0, 0 ++0xe7 0x00 0x02 0x00 0x00 0x8a ++ ++#CHECK: vstrcf %v0, %v0, %v0, %v0, 12 ++0xe7 0x00 0x02 0xc0 0x00 0x8a ++ ++#CHECK: vstrcf %v18, %v3, %v20, %v5, 0 ++0xe7 0x23 0x42 0x00 0x5a 0x8a ++ ++#CHECK: vstrcf %v31, %v31, %v31, %v31, 4 ++0xe7 0xff 0xf2 0x40 0xff 0x8a ++ ++#CHECK: vstrcfs %v31, %v31, %v31, %v31, 8 ++0xe7 0xff 0xf2 0x90 0xff 0x8a ++ ++#CHECK: vstrczf %v31, %v31, %v31, %v31, 4 ++0xe7 0xff 0xf2 0x60 0xff 0x8a ++ ++#CHECK: vstrczfs %v31, %v31, %v31, %v31, 8 ++0xe7 0xff 0xf2 0xb0 0xff 0x8a ++ ++#CHECK: vstrch %v0, %v0, %v0, %v0, 0 ++0xe7 0x00 0x01 0x00 0x00 0x8a ++ ++#CHECK: vstrch %v0, %v0, %v0, %v0, 12 ++0xe7 0x00 0x01 0xc0 0x00 0x8a ++ ++#CHECK: vstrch %v18, %v3, %v20, %v5, 0 ++0xe7 0x23 0x41 0x00 0x5a 0x8a ++ ++#CHECK: vstrch %v31, %v31, %v31, %v31, 4 ++0xe7 0xff 0xf1 0x40 0xff 0x8a ++ ++#CHECK: vstrchs %v31, %v31, %v31, %v31, 8 ++0xe7 0xff 0xf1 0x90 0xff 0x8a ++ ++#CHECK: vstrczh %v31, %v31, %v31, %v31, 4 ++0xe7 0xff 0xf1 0x60 0xff 0x8a ++ ++#CHECK: vstrczhs %v31, %v31, %v31, %v31, 8 ++0xe7 0xff 0xf1 0xb0 0xff 0x8a ++ ++#CHECK: vsumgh %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x10 0x65 ++ ++#CHECK: vsumgh %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x1a 0x65 ++ ++#CHECK: vsumgh %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x1e 0x65 ++ ++#CHECK: vsumgf %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x20 0x65 ++ ++#CHECK: vsumgf %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x2a 0x65 ++ ++#CHECK: vsumgf %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x2e 0x65 ++ ++#CHECK: vsumqf %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x20 0x67 ++ ++#CHECK: vsumqf %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x2a 0x67 ++ ++#CHECK: vsumqf %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x2e 0x67 ++ ++#CHECK: vsumqg %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x30 0x67 ++ ++#CHECK: vsumqg %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x3a 0x67 ++ ++#CHECK: vsumqg %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x3e 0x67 ++ ++#CHECK: vsumb %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x00 0x64 ++ ++#CHECK: vsumb %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x0a 0x64 ++ ++#CHECK: vsumb %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x0e 0x64 ++ ++#CHECK: vsumh %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x10 0x64 ++ ++#CHECK: vsumh %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x1a 0x64 ++ ++#CHECK: vsumh %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x1e 0x64 ++ ++#CHECK: vtm %v0, %v0 ++0xe7 0x00 0x00 0x00 0x00 0xd8 ++ ++#CHECK: vtm %v19, %v14 ++0xe7 0x3e 0x00 0x00 0x08 0xd8 ++ ++#CHECK: vtm %v31, %v31 ++0xe7 0xff 0x00 0x00 0x0c 0xd8 ++ ++#CHECK: vuphb %v0, %v0 ++0xe7 0x00 0x00 0x00 0x00 0xd7 ++ ++#CHECK: vuphb %v19, %v14 ++0xe7 0x3e 0x00 0x00 0x08 0xd7 ++ ++#CHECK: vuphb %v31, %v31 ++0xe7 0xff 0x00 0x00 0x0c 0xd7 ++ ++#CHECK: vuphf %v0, %v0 ++0xe7 0x00 0x00 0x00 0x20 0xd7 ++ ++#CHECK: vuphf %v19, %v14 ++0xe7 0x3e 0x00 0x00 0x28 0xd7 ++ ++#CHECK: vuphf %v31, %v31 ++0xe7 0xff 0x00 0x00 0x2c 0xd7 ++ ++#CHECK: vuphh %v0, %v0 ++0xe7 0x00 0x00 0x00 0x10 0xd7 ++ ++#CHECK: vuphh %v19, %v14 ++0xe7 0x3e 0x00 0x00 0x18 0xd7 ++ ++#CHECK: vuphh %v31, %v31 ++0xe7 0xff 0x00 0x00 0x1c 0xd7 ++ ++#CHECK: vuplhb %v0, %v0 ++0xe7 0x00 0x00 0x00 0x00 0xd5 ++ ++#CHECK: vuplhb %v19, %v14 ++0xe7 0x3e 0x00 0x00 0x08 0xd5 ++ ++#CHECK: vuplhb %v31, %v31 ++0xe7 0xff 0x00 0x00 0x0c 0xd5 ++ ++#CHECK: vuplhf %v0, %v0 ++0xe7 0x00 0x00 0x00 0x20 0xd5 ++ ++#CHECK: vuplhf %v19, %v14 ++0xe7 0x3e 0x00 0x00 0x28 0xd5 ++ ++#CHECK: vuplhf %v31, %v31 ++0xe7 0xff 0x00 0x00 0x2c 0xd5 ++ ++#CHECK: vuplhh %v0, %v0 ++0xe7 0x00 0x00 0x00 0x10 0xd5 ++ ++#CHECK: vuplhh %v19, %v14 ++0xe7 0x3e 0x00 0x00 0x18 0xd5 ++ ++#CHECK: vuplhh %v31, %v31 ++0xe7 0xff 0x00 0x00 0x1c 0xd5 ++ ++#CHECK: vuplb %v0, %v0 ++0xe7 0x00 0x00 0x00 0x00 0xd6 ++ ++#CHECK: vuplb %v19, %v14 ++0xe7 0x3e 0x00 0x00 0x08 0xd6 ++ ++#CHECK: vuplb %v31, %v31 ++0xe7 0xff 0x00 0x00 0x0c 0xd6 ++ ++#CHECK: vuplf %v0, %v0 ++0xe7 0x00 0x00 0x00 0x20 0xd6 ++ ++#CHECK: vuplf %v19, %v14 ++0xe7 0x3e 0x00 0x00 0x28 0xd6 ++ ++#CHECK: vuplf %v31, %v31 ++0xe7 0xff 0x00 0x00 0x2c 0xd6 ++ ++#CHECK: vuplhw %v0, %v0 ++0xe7 0x00 0x00 0x00 0x10 0xd6 ++ ++#CHECK: vuplhw %v19, %v14 ++0xe7 0x3e 0x00 0x00 0x18 0xd6 ++ ++#CHECK: vuplhw %v31, %v31 ++0xe7 0xff 0x00 0x00 0x1c 0xd6 ++ ++#CHECK: vupllb %v0, %v0 ++0xe7 0x00 0x00 0x00 0x00 0xd4 ++ ++#CHECK: vupllb %v19, %v14 ++0xe7 0x3e 0x00 0x00 0x08 0xd4 ++ ++#CHECK: vupllb %v31, %v31 ++0xe7 0xff 0x00 0x00 0x0c 0xd4 ++ ++#CHECK: vupllf %v0, %v0 ++0xe7 0x00 0x00 0x00 0x20 0xd4 ++ ++#CHECK: vupllf %v19, %v14 ++0xe7 0x3e 0x00 0x00 0x28 0xd4 ++ ++#CHECK: vupllf %v31, %v31 ++0xe7 0xff 0x00 0x00 0x2c 0xd4 ++ ++#CHECK: vupllh %v0, %v0 ++0xe7 0x00 0x00 0x00 0x10 0xd4 ++ ++#CHECK: vupllh %v19, %v14 ++0xe7 0x3e 0x00 0x00 0x18 0xd4 ++ ++#CHECK: vupllh %v31, %v31 ++0xe7 0xff 0x00 0x00 0x1c 0xd4 ++ ++#CHECK: vx %v0, %v0, %v0 ++0xe7 0x00 0x00 0x00 0x00 0x6d ++ ++#CHECK: vx %v18, %v3, %v20 ++0xe7 0x23 0x40 0x00 0x0a 0x6d ++ ++#CHECK: vx %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x00 0x0e 0x6d ++ ++#CHECK: wcdgb %f0, %f0, 0, 0 ++0xe7 0x00 0x00 0x08 0x30 0xc3 ++ ++#CHECK: wcdgb %v19, %f14, 4, 10 ++0xe7 0x3e 0x00 0xac 0x38 0xc3 ++ ++#CHECK: wcdgb %v31, %v31, 7, 15 ++0xe7 0xff 0x00 0xff 0x3c 0xc3 ++ ++#CHECK: wcdlgb %f0, %f0, 0, 0 ++0xe7 0x00 0x00 0x08 0x30 0xc1 ++ ++#CHECK: wcdlgb %v19, %f14, 4, 10 ++0xe7 0x3e 0x00 0xac 0x38 0xc1 ++ ++#CHECK: wcdlgb %v31, %v31, 7, 15 ++0xe7 0xff 0x00 0xff 0x3c 0xc1 ++ ++#CHECK: wcgdb %f0, %f0, 0, 0 ++0xe7 0x00 0x00 0x08 0x30 0xc2 ++ ++#CHECK: wcgdb %v19, %f14, 4, 10 ++0xe7 0x3e 0x00 0xac 0x38 0xc2 ++ ++#CHECK: wcgdb %v31, %v31, 7, 15 ++0xe7 0xff 0x00 0xff 0x3c 0xc2 ++ ++#CHECK: wclgdb %f0, %f0, 0, 0 ++0xe7 0x00 0x00 0x08 0x30 0xc0 ++ ++#CHECK: wclgdb %v19, %f14, 4, 10 ++0xe7 0x3e 0x00 0xac 0x38 0xc0 ++ ++#CHECK: wclgdb %v31, %v31, 7, 15 ++0xe7 0xff 0x00 0xff 0x3c 0xc0 ++ ++#CHECK: wfadb %f0, %f0, %f0 ++0xe7 0x00 0x00 0x08 0x30 0xe3 ++ ++#CHECK: wfadb %v18, %f3, %v20 ++0xe7 0x23 0x40 0x08 0x3a 0xe3 ++ ++#CHECK: wfadb %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x08 0x3e 0xe3 ++ ++#CHECK: wfcdb %f0, %f0 ++0xe7 0x00 0x00 0x00 0x30 0xcb ++ ++#CHECK: wfcdb %v19, %f14 ++0xe7 0x3e 0x00 0x00 0x38 0xcb ++ ++#CHECK: wfcdb %v31, %v31 ++0xe7 0xff 0x00 0x00 0x3c 0xcb ++ ++#CHECK: wfcedb %f0, %f0, %f0 ++0xe7 0x00 0x00 0x08 0x30 0xe8 ++ ++#CHECK: wfcedb %v18, %f3, %v20 ++0xe7 0x23 0x40 0x08 0x3a 0xe8 ++ ++#CHECK: wfcedb %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x08 0x3e 0xe8 ++ ++#CHECK: wfcedbs %f0, %f0, %f0 ++0xe7 0x00 0x00 0x18 0x30 0xe8 ++ ++#CHECK: wfcedbs %v18, %f3, %v20 ++0xe7 0x23 0x40 0x18 0x3a 0xe8 ++ ++#CHECK: wfcedbs %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x18 0x3e 0xe8 ++ ++#CHECK: wfchdb %f0, %f0, %f0 ++0xe7 0x00 0x00 0x08 0x30 0xeb ++ ++#CHECK: wfchdb %v18, %f3, %v20 ++0xe7 0x23 0x40 0x08 0x3a 0xeb ++ ++#CHECK: wfchdb %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x08 0x3e 0xeb ++ ++#CHECK: wfchdbs %f0, %f0, %f0 ++0xe7 0x00 0x00 0x18 0x30 0xeb ++ ++#CHECK: wfchdbs %v18, %f3, %v20 ++0xe7 0x23 0x40 0x18 0x3a 0xeb ++ ++#CHECK: wfchdbs %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x18 0x3e 0xeb ++ ++#CHECK: wfchedb %f0, %f0, %f0 ++0xe7 0x00 0x00 0x08 0x30 0xea ++ ++#CHECK: wfchedb %v18, %f3, %v20 ++0xe7 0x23 0x40 0x08 0x3a 0xea ++ ++#CHECK: wfchedb %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x08 0x3e 0xea ++ ++#CHECK: wfchedbs %f0, %f0, %f0 ++0xe7 0x00 0x00 0x18 0x30 0xea ++ ++#CHECK: wfchedbs %v18, %f3, %v20 ++0xe7 0x23 0x40 0x18 0x3a 0xea ++ ++#CHECK: wfchedbs %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x18 0x3e 0xea ++ ++#CHECK: wfddb %f0, %f0, %f0 ++0xe7 0x00 0x00 0x08 0x30 0xe5 ++ ++#CHECK: wfddb %v18, %f3, %v20 ++0xe7 0x23 0x40 0x08 0x3a 0xe5 ++ ++#CHECK: wfddb %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x08 0x3e 0xe5 ++ ++#CHECK: wfidb %f0, %f0, 0, 0 ++0xe7 0x00 0x00 0x08 0x30 0xc7 ++ ++#CHECK: wfidb %v19, %f14, 4, 10 ++0xe7 0x3e 0x00 0xac 0x38 0xc7 ++ ++#CHECK: wfidb %v31, %v31, 7, 15 ++0xe7 0xff 0x00 0xff 0x3c 0xc7 ++ ++#CHECK: wfkdb %f0, %f0 ++0xe7 0x00 0x00 0x00 0x30 0xca ++ ++#CHECK: wfkdb %v19, %f14 ++0xe7 0x3e 0x00 0x00 0x38 0xca ++ ++#CHECK: wfkdb %v31, %v31 ++0xe7 0xff 0x00 0x00 0x3c 0xca ++ ++#CHECK: wflcdb %f0, %f0 ++0xe7 0x00 0x00 0x08 0x30 0xcc ++ ++#CHECK: wflcdb %v19, %f14 ++0xe7 0x3e 0x00 0x08 0x38 0xcc ++ ++#CHECK: wflcdb %v31, %v31 ++0xe7 0xff 0x00 0x08 0x3c 0xcc ++ ++#CHECK: wflndb %f0, %f0 ++0xe7 0x00 0x00 0x18 0x30 0xcc ++ ++#CHECK: wflndb %v19, %f14 ++0xe7 0x3e 0x00 0x18 0x38 0xcc ++ ++#CHECK: wflndb %v31, %v31 ++0xe7 0xff 0x00 0x18 0x3c 0xcc ++ ++#CHECK: wflpdb %f0, %f0 ++0xe7 0x00 0x00 0x28 0x30 0xcc ++ ++#CHECK: wflpdb %v19, %f14 ++0xe7 0x3e 0x00 0x28 0x38 0xcc ++ ++#CHECK: wflpdb %v31, %v31 ++0xe7 0xff 0x00 0x28 0x3c 0xcc ++ ++#CHECK: wfmadb %f0, %f0, %f0, %f0 ++0xe7 0x00 0x03 0x08 0x00 0x8f ++ ++#CHECK: wfmadb %f3, %v20, %f5, %v22 ++0xe7 0x34 0x53 0x08 0x65 0x8f ++ ++#CHECK: wfmadb %v31, %v31, %v31, %v31 ++0xe7 0xff 0xf3 0x08 0xff 0x8f ++ ++#CHECK: wfmdb %f0, %f0, %f0 ++0xe7 0x00 0x00 0x08 0x30 0xe7 ++ ++#CHECK: wfmdb %v18, %f3, %v20 ++0xe7 0x23 0x40 0x08 0x3a 0xe7 ++ ++#CHECK: wfmdb %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x08 0x3e 0xe7 ++ ++#CHECK: wfmsdb %f0, %f0, %f0, %f0 ++0xe7 0x00 0x03 0x08 0x00 0x8e ++ ++#CHECK: wfmsdb %f3, %v20, %f5, %v22 ++0xe7 0x34 0x53 0x08 0x65 0x8e ++ ++#CHECK: wfmsdb %v31, %v31, %v31, %v31 ++0xe7 0xff 0xf3 0x08 0xff 0x8e ++ ++#CHECK: wfsdb %f0, %f0, %f0 ++0xe7 0x00 0x00 0x08 0x30 0xe2 ++ ++#CHECK: wfsdb %v18, %f3, %v20 ++0xe7 0x23 0x40 0x08 0x3a 0xe2 ++ ++#CHECK: wfsdb %v31, %v31, %v31 ++0xe7 0xff 0xf0 0x08 0x3e 0xe2 ++ ++#CHECK: wfsqdb %f0, %f0 ++0xe7 0x00 0x00 0x08 0x30 0xce ++ ++#CHECK: wfsqdb %v19, %f14 ++0xe7 0x3e 0x00 0x08 0x38 0xce ++ ++#CHECK: wfsqdb %v31, %v31 ++0xe7 0xff 0x00 0x08 0x3c 0xce ++ ++#CHECK: wftcidb %f0, %f0, 0 ++0xe7 0x00 0x00 0x08 0x30 0x4a ++ ++#CHECK: wftcidb %v19, %f4, 1383 ++0xe7 0x34 0x56 0x78 0x38 0x4a ++ ++#CHECK: wftcidb %v31, %v31, 4095 ++0xe7 0xff 0xff 0xf8 0x3c 0x4a ++ ++#CHECK: wldeb %f0, %f0 ++0xe7 0x00 0x00 0x08 0x20 0xc4 ++ ++#CHECK: wldeb %v19, %f14 ++0xe7 0x3e 0x00 0x08 0x28 0xc4 ++ ++#CHECK: wldeb %v31, %v31 ++0xe7 0xff 0x00 0x08 0x2c 0xc4 ++ ++#CHECK: wledb %f0, %f0, 0, 0 ++0xe7 0x00 0x00 0x08 0x30 0xc5 ++ ++#CHECK: wledb %v19, %f14, 4, 10 ++0xe7 0x3e 0x00 0xac 0x38 0xc5 ++ ++#CHECK: wledb %v31, %v31, 7, 15 ++0xe7 0xff 0x00 0xff 0x3c 0xc5 +Index: llvm-36/test/MC/Disassembler/SystemZ/insns.txt +=================================================================== +--- llvm-36.orig/test/MC/Disassembler/SystemZ/insns.txt ++++ llvm-36/test/MC/Disassembler/SystemZ/insns.txt +@@ -2503,6 +2503,15 @@ + # CHECK: ear %r15, %a15 + 0xb2 0x4f 0x00 0xff + ++# CHECK: etnd %r0 ++0xb2 0xec 0x00 0x00 ++ ++# CHECK: etnd %r15 ++0xb2 0xec 0x00 0xf0 ++ ++# CHECK: etnd %r7 ++0xb2 0xec 0x00 0x70 ++ + # CHECK: fidbr %f0, 0, %f0 + 0xb3 0x5f 0x00 0x00 + +@@ -6034,6 +6043,36 @@ + # CHECK: ny %r15, 0 + 0xe3 0xf0 0x00 0x00 0x00 0x54 + ++# CHECK: ntstg %r0, -524288 ++0xe3 0x00 0x00 0x00 0x80 0x25 ++ ++# CHECK: ntstg %r0, -1 ++0xe3 0x00 0x0f 0xff 0xff 0x25 ++ ++# CHECK: ntstg %r0, 0 ++0xe3 0x00 0x00 0x00 0x00 0x25 ++ ++# CHECK: ntstg %r0, 1 ++0xe3 0x00 0x00 0x01 0x00 0x25 ++ ++# CHECK: ntstg %r0, 524287 ++0xe3 0x00 0x0f 0xff 0x7f 0x25 ++ ++# CHECK: ntstg %r0, 0(%r1) ++0xe3 0x00 0x10 0x00 0x00 0x25 ++ ++# CHECK: ntstg %r0, 0(%r15) ++0xe3 0x00 0xf0 0x00 0x00 0x25 ++ ++# CHECK: ntstg %r0, 524287(%r1,%r15) ++0xe3 0x01 0xff 0xff 0x7f 0x25 ++ ++# CHECK: ntstg %r0, 524287(%r15,%r1) ++0xe3 0x0f 0x1f 0xff 0x7f 0x25 ++ ++# CHECK: ntstg %r15, 0 ++0xe3 0xf0 0x00 0x00 0x00 0x25 ++ + # CHECK: oc 0(1), 0 + 0xd6 0x00 0x00 0x00 0x00 0x00 + +@@ -6334,6 +6373,33 @@ + # CHECK: pfd 15, 0 + 0xe3 0xf0 0x00 0x00 0x00 0x36 + ++# CHECK: popcnt %r0, %r0 ++0xb9 0xe1 0x00 0x00 ++ ++# CHECK: popcnt %r0, %r15 ++0xb9 0xe1 0x00 0x0f ++ ++# CHECK: popcnt %r15, %r0 ++0xb9 0xe1 0x00 0xf0 ++ ++# CHECK: popcnt %r7, %r8 ++0xb9 0xe1 0x00 0x78 ++ ++# CHECK: ppa %r0, %r0, 0 ++0xb2 0xe8 0x00 0x00 ++ ++# CHECK: ppa %r0, %r0, 15 ++0xb2 0xe8 0xf0 0x00 ++ ++# CHECK: ppa %r0, %r15, 0 ++0xb2 0xe8 0x00 0x0f ++ ++# CHECK: ppa %r4, %r6, 7 ++0xb2 0xe8 0x70 0x46 ++ ++# CHECK: ppa %r15, %r0, 0 ++0xb2 0xe8 0x00 0xf0 ++ + # CHECK: risbg %r0, %r0, 0, 0, 0 + 0xec 0x00 0x00 0x00 0x00 0x55 + +@@ -6355,6 +6421,27 @@ + # CHECK: risbg %r4, %r5, 6, 7, 8 + 0xec 0x45 0x06 0x07 0x08 0x55 + ++# CHECK: risbgn %r0, %r0, 0, 0, 0 ++0xec 0x00 0x00 0x00 0x00 0x59 ++ ++# CHECK: risbgn %r0, %r0, 0, 0, 63 ++0xec 0x00 0x00 0x00 0x3f 0x59 ++ ++# CHECK: risbgn %r0, %r0, 0, 255, 0 ++0xec 0x00 0x00 0xff 0x00 0x59 ++ ++# CHECK: risbgn %r0, %r0, 255, 0, 0 ++0xec 0x00 0xff 0x00 0x00 0x59 ++ ++# CHECK: risbgn %r0, %r15, 0, 0, 0 ++0xec 0x0f 0x00 0x00 0x00 0x59 ++ ++# CHECK: risbgn %r15, %r0, 0, 0, 0 ++0xec 0xf0 0x00 0x00 0x00 0x59 ++ ++# CHECK: risbgn %r4, %r5, 6, 7, 8 ++0xec 0x45 0x06 0x07 0x08 0x59 ++ + # CHECK: risbhg %r0, %r0, 0, 0, 0 + 0xec 0x00 0x00 0x00 0x00 0x5d + +@@ -8029,6 +8116,93 @@ + # CHECK: sy %r15, 0 + 0xe3 0xf0 0x00 0x00 0x00 0x5b + ++# CHECK: tabort 0 ++0xb2 0xfc 0x00 0x00 ++ ++# CHECK: tabort 0(%r1) ++0xb2 0xfc 0x10 0x00 ++ ++# CHECK: tabort 0(%r15) ++0xb2 0xfc 0xf0 0x00 ++ ++# CHECK: tabort 4095 ++0xb2 0xfc 0x0f 0xff ++ ++# CHECK: tabort 4095(%r1) ++0xb2 0xfc 0x1f 0xff ++ ++# CHECK: tabort 4095(%r15) ++0xb2 0xfc 0xff 0xff ++ ++# CHECK: tbegin 0, 0 ++0xe5 0x60 0x00 0x00 0x00 0x00 ++ ++# CHECK: tbegin 4095, 0 ++0xe5 0x60 0x0f 0xff 0x00 0x00 ++ ++# CHECK: tbegin 0, 0 ++0xe5 0x60 0x00 0x00 0x00 0x00 ++ ++# CHECK: tbegin 0, 1 ++0xe5 0x60 0x00 0x00 0x00 0x01 ++ ++# CHECK: tbegin 0, 32767 ++0xe5 0x60 0x00 0x00 0x7f 0xff ++ ++# CHECK: tbegin 0, 32768 ++0xe5 0x60 0x00 0x00 0x80 0x00 ++ ++# CHECK: tbegin 0, 65535 ++0xe5 0x60 0x00 0x00 0xff 0xff ++ ++# CHECK: tbegin 0(%r1), 42 ++0xe5 0x60 0x10 0x00 0x00 0x2a ++ ++# CHECK: tbegin 0(%r15), 42 ++0xe5 0x60 0xf0 0x00 0x00 0x2a ++ ++# CHECK: tbegin 4095(%r1), 42 ++0xe5 0x60 0x1f 0xff 0x00 0x2a ++ ++# CHECK: tbegin 4095(%r15), 42 ++0xe5 0x60 0xff 0xff 0x00 0x2a ++ ++# CHECK: tbeginc 0, 0 ++0xe5 0x61 0x00 0x00 0x00 0x00 ++ ++# CHECK: tbeginc 4095, 0 ++0xe5 0x61 0x0f 0xff 0x00 0x00 ++ ++# CHECK: tbeginc 0, 0 ++0xe5 0x61 0x00 0x00 0x00 0x00 ++ ++# CHECK: tbeginc 0, 1 ++0xe5 0x61 0x00 0x00 0x00 0x01 ++ ++# CHECK: tbeginc 0, 32767 ++0xe5 0x61 0x00 0x00 0x7f 0xff ++ ++# CHECK: tbeginc 0, 32768 ++0xe5 0x61 0x00 0x00 0x80 0x00 ++ ++# CHECK: tbeginc 0, 65535 ++0xe5 0x61 0x00 0x00 0xff 0xff ++ ++# CHECK: tbeginc 0(%r1), 42 ++0xe5 0x61 0x10 0x00 0x00 0x2a ++ ++# CHECK: tbeginc 0(%r15), 42 ++0xe5 0x61 0xf0 0x00 0x00 0x2a ++ ++# CHECK: tbeginc 4095(%r1), 42 ++0xe5 0x61 0x1f 0xff 0x00 0x2a ++ ++# CHECK: tbeginc 4095(%r15), 42 ++0xe5 0x61 0xff 0xff 0x00 0x2a ++ ++# CHECK: tend ++0xb2 0xf8 0x00 0x00 ++ + # CHECK: tm 0, 0 + 0x91 0x00 0x00 0x00 + +Index: llvm-36/test/MC/SystemZ/fixups.s +=================================================================== +--- /dev/null ++++ llvm-36/test/MC/SystemZ/fixups.s +@@ -0,0 +1,119 @@ ++ ++# RUN: llvm-mc -triple s390x-unknown-unknown --show-encoding %s | FileCheck %s ++ ++# RUN: llvm-mc -triple s390x-unknown-unknown -filetype=obj %s | \ ++# RUN: llvm-readobj -r | FileCheck %s -check-prefix=CHECK-REL ++ ++# CHECK: larl %r14, target # encoding: [0xc0,0xe0,A,A,A,A] ++# CHECK-NEXT: # fixup A - offset: 2, value: target+2, kind: FK_390_PC32DBL ++# CHECK-REL: 0x{{[0-9A-F]*2}} R_390_PC32DBL target 0x2 ++ .align 16 ++ larl %r14, target ++ ++# CHECK: larl %r14, target@GOT # encoding: [0xc0,0xe0,A,A,A,A] ++# CHECK-NEXT: # fixup A - offset: 2, value: target@GOT+2, kind: FK_390_PC32DBL ++# CHECK-REL: 0x{{[0-9A-F]*2}} R_390_GOTENT target 0x2 ++ .align 16 ++ larl %r14, target@got ++ ++# CHECK: larl %r14, target@INDNTPOFF # encoding: [0xc0,0xe0,A,A,A,A] ++# CHECK-NEXT: # fixup A - offset: 2, value: target@INDNTPOFF+2, kind: FK_390_PC32DBL ++# CHECK-REL: 0x{{[0-9A-F]*2}} R_390_TLS_IEENT target 0x2 ++ .align 16 ++ larl %r14, target@indntpoff ++ ++# CHECK: brasl %r14, target # encoding: [0xc0,0xe5,A,A,A,A] ++# CHECK-NEXT: # fixup A - offset: 2, value: target+2, kind: FK_390_PC32DBL ++# CHECK-REL: 0x{{[0-9A-F]*2}} R_390_PC32DBL target 0x2 ++ .align 16 ++ brasl %r14, target ++ ++# CHECK: brasl %r14, target@PLT # encoding: [0xc0,0xe5,A,A,A,A] ++# CHECK-NEXT: # fixup A - offset: 2, value: target@PLT+2, kind: FK_390_PC32DBL ++# CHECK-REL: 0x{{[0-9A-F]*2}} R_390_PLT32DBL target 0x2 ++ .align 16 ++ brasl %r14, target@plt ++ ++# CHECK: brasl %r14, target@PLT:tls_gdcall:sym # encoding: [0xc0,0xe5,A,A,A,A] ++# CHECK-NEXT: # fixup A - offset: 2, value: target@PLT+2, kind: FK_390_PC32DBL ++# CHECK-NEXT: # fixup B - offset: 0, value: sym@TLSGD, kind: FK_390_TLS_CALL ++# CHECK-REL: 0x{{[0-9A-F]*0}} R_390_TLS_GDCALL sym 0x0 ++# CHECK-REL: 0x{{[0-9A-F]*2}} R_390_PLT32DBL target 0x2 ++ .align 16 ++ brasl %r14, target@plt:tls_gdcall:sym ++ ++# CHECK: brasl %r14, target@PLT:tls_ldcall:sym # encoding: [0xc0,0xe5,A,A,A,A] ++# CHECK-NEXT: # fixup A - offset: 2, value: target@PLT+2, kind: FK_390_PC32DBL ++# CHECK-NEXT: # fixup B - offset: 0, value: sym@TLSLDM, kind: FK_390_TLS_CALL ++# CHECK-REL: 0x{{[0-9A-F]*0}} R_390_TLS_LDCALL sym 0x0 ++# CHECK-REL: 0x{{[0-9A-F]*2}} R_390_PLT32DBL target 0x2 ++ .align 16 ++ brasl %r14, target@plt:tls_ldcall:sym ++ ++# CHECK: bras %r14, target # encoding: [0xa7,0xe5,A,A] ++# CHECK-NEXT: # fixup A - offset: 2, value: target+2, kind: FK_390_PC16DBL ++# CHECK-REL: 0x{{[0-9A-F]*2}} R_390_PC16DBL target 0x2 ++ .align 16 ++ bras %r14, target ++ ++# CHECK: bras %r14, target@PLT # encoding: [0xa7,0xe5,A,A] ++# CHECK-NEXT: # fixup A - offset: 2, value: target@PLT+2, kind: FK_390_PC16DBL ++# CHECK-REL: 0x{{[0-9A-F]*2}} R_390_PLT16DBL target 0x2 ++ .align 16 ++ bras %r14, target@plt ++ ++# CHECK: bras %r14, target@PLT:tls_gdcall:sym # encoding: [0xa7,0xe5,A,A] ++# CHECK-NEXT: # fixup A - offset: 2, value: target@PLT+2, kind: FK_390_PC16DBL ++# CHECK-NEXT: # fixup B - offset: 0, value: sym@TLSGD, kind: FK_390_TLS_CALL ++# CHECK-REL: 0x{{[0-9A-F]*0}} R_390_TLS_GDCALL sym 0x0 ++# CHECK-REL: 0x{{[0-9A-F]*2}} R_390_PLT16DBL target 0x2 ++ .align 16 ++ bras %r14, target@plt:tls_gdcall:sym ++ ++# CHECK: bras %r14, target@PLT:tls_ldcall:sym # encoding: [0xa7,0xe5,A,A] ++# CHECK-NEXT: # fixup A - offset: 2, value: target@PLT+2, kind: FK_390_PC16DBL ++# CHECK-NEXT: # fixup B - offset: 0, value: sym@TLSLDM, kind: FK_390_TLS_CALL ++# CHECK-REL: 0x{{[0-9A-F]*0}} R_390_TLS_LDCALL sym 0x0 ++# CHECK-REL: 0x{{[0-9A-F]*2}} R_390_PLT16DBL target 0x2 ++ .align 16 ++ bras %r14, target@plt:tls_ldcall:sym ++ ++ ++# Data relocs ++# llvm-mc does not show any "encoding" string for data, so we just check the relocs ++ ++# CHECK-REL: .rela.data ++ .data ++ ++# CHECK-REL: 0x{{[0-9A-F]*0}} R_390_TLS_LE64 target 0x0 ++ .align 16 ++ .quad target@ntpoff ++ ++# CHECK-REL: 0x{{[0-9A-F]*0}} R_390_TLS_LDO64 target 0x0 ++ .align 16 ++ .quad target@dtpoff ++ ++# CHECK-REL: 0x{{[0-9A-F]*0}} R_390_TLS_LDM64 target 0x0 ++ .align 16 ++ .quad target@tlsldm ++ ++# CHECK-REL: 0x{{[0-9A-F]*0}} R_390_TLS_GD64 target 0x0 ++ .align 16 ++ .quad target@tlsgd ++ ++# CHECK-REL: 0x{{[0-9A-F]*0}} R_390_TLS_LE32 target 0x0 ++ .align 16 ++ .long target@ntpoff ++ ++# CHECK-REL: 0x{{[0-9A-F]*0}} R_390_TLS_LDO32 target 0x0 ++ .align 16 ++ .long target@dtpoff ++ ++# CHECK-REL: 0x{{[0-9A-F]*0}} R_390_TLS_LDM32 target 0x0 ++ .align 16 ++ .long target@tlsldm ++ ++# CHECK-REL: 0x{{[0-9A-F]*0}} R_390_TLS_GD32 target 0x0 ++ .align 16 ++ .long target@tlsgd ++ +Index: llvm-36/test/MC/SystemZ/insn-bad-z13.s +=================================================================== +--- /dev/null ++++ llvm-36/test/MC/SystemZ/insn-bad-z13.s +@@ -0,0 +1,1201 @@ ++# For z13 only. ++# RUN: not llvm-mc -triple s390x-linux-gnu -mcpu=z13 < %s 2> %t ++# RUN: FileCheck < %t %s ++ ++#CHECK: error: invalid operand ++#CHECK: lcbb %r0, 0, -1 ++#CHECK: error: invalid operand ++#CHECK: lcbb %r0, 0, 16 ++#CHECK: error: invalid operand ++#CHECK: lcbb %r0, -1, 0 ++#CHECK: error: invalid operand ++#CHECK: lcbb %r0, 4096, 0 ++#CHECK: error: invalid use of vector addressing ++#CHECK: lcbb %r0, 0(%v1,%r2), 0 ++ ++ lcbb %r0, 0, -1 ++ lcbb %r0, 0, 16 ++ lcbb %r0, -1, 0 ++ lcbb %r0, 4096, 0 ++ lcbb %r0, 0(%v1,%r2), 0 ++ ++#CHECK: error: invalid operand ++#CHECK: vcdgb %v0, %v0, 0, -1 ++#CHECK: error: invalid operand ++#CHECK: vcdgb %v0, %v0, 0, 16 ++#CHECK: error: invalid operand ++#CHECK: vcdgb %v0, %v0, -1, 0 ++#CHECK: error: invalid operand ++#CHECK: vcdgb %v0, %v0, 16, 0 ++ ++ vcdgb %v0, %v0, 0, -1 ++ vcdgb %v0, %v0, 0, 16 ++ vcdgb %v0, %v0, -1, 0 ++ vcdgb %v0, %v0, 16, 0 ++ ++#CHECK: error: invalid operand ++#CHECK: vcdlgb %v0, %v0, 0, -1 ++#CHECK: error: invalid operand ++#CHECK: vcdlgb %v0, %v0, 0, 16 ++#CHECK: error: invalid operand ++#CHECK: vcdlgb %v0, %v0, -1, 0 ++#CHECK: error: invalid operand ++#CHECK: vcdlgb %v0, %v0, 16, 0 ++ ++ vcdlgb %v0, %v0, 0, -1 ++ vcdlgb %v0, %v0, 0, 16 ++ vcdlgb %v0, %v0, -1, 0 ++ vcdlgb %v0, %v0, 16, 0 ++ ++#CHECK: error: invalid operand ++#CHECK: vcgdb %v0, %v0, 0, -1 ++#CHECK: error: invalid operand ++#CHECK: vcgdb %v0, %v0, 0, 16 ++#CHECK: error: invalid operand ++#CHECK: vcgdb %v0, %v0, -1, 0 ++#CHECK: error: invalid operand ++#CHECK: vcgdb %v0, %v0, 16, 0 ++ ++ vcgdb %v0, %v0, 0, -1 ++ vcgdb %v0, %v0, 0, 16 ++ vcgdb %v0, %v0, -1, 0 ++ vcgdb %v0, %v0, 16, 0 ++ ++#CHECK: error: invalid operand ++#CHECK: vclgdb %v0, %v0, 0, -1 ++#CHECK: error: invalid operand ++#CHECK: vclgdb %v0, %v0, 0, 16 ++#CHECK: error: invalid operand ++#CHECK: vclgdb %v0, %v0, -1, 0 ++#CHECK: error: invalid operand ++#CHECK: vclgdb %v0, %v0, 16, 0 ++ ++ vclgdb %v0, %v0, 0, -1 ++ vclgdb %v0, %v0, 0, 16 ++ vclgdb %v0, %v0, -1, 0 ++ vclgdb %v0, %v0, 16, 0 ++ ++#CHECK: error: invalid operand ++#CHECK: verimb %v0, %v0, %v0, -1 ++#CHECK: error: invalid operand ++#CHECK: verimb %v0, %v0, %v0, 256 ++ ++ verimb %v0, %v0, %v0, -1 ++ verimb %v0, %v0, %v0, 256 ++ ++#CHECK: error: invalid operand ++#CHECK: verimf %v0, %v0, %v0, -1 ++#CHECK: error: invalid operand ++#CHECK: verimf %v0, %v0, %v0, 256 ++ ++ verimf %v0, %v0, %v0, -1 ++ verimf %v0, %v0, %v0, 256 ++ ++#CHECK: error: invalid operand ++#CHECK: verimg %v0, %v0, %v0, -1 ++#CHECK: error: invalid operand ++#CHECK: verimg %v0, %v0, %v0, 256 ++ ++ verimg %v0, %v0, %v0, -1 ++ verimg %v0, %v0, %v0, 256 ++ ++#CHECK: error: invalid operand ++#CHECK: verimh %v0, %v0, %v0, -1 ++#CHECK: error: invalid operand ++#CHECK: verimh %v0, %v0, %v0, 256 ++ ++ verimh %v0, %v0, %v0, -1 ++ verimh %v0, %v0, %v0, 256 ++ ++#CHECK: error: invalid operand ++#CHECK: verllb %v0, %v0, -1 ++#CHECK: error: invalid operand ++#CHECK: verllb %v0, %v0, 4096 ++ ++ verllb %v0, %v0, -1 ++ verllb %v0, %v0, 4096 ++ ++#CHECK: error: invalid operand ++#CHECK: verllf %v0, %v0, -1 ++#CHECK: error: invalid operand ++#CHECK: verllf %v0, %v0, 4096 ++ ++ verllf %v0, %v0, -1 ++ verllf %v0, %v0, 4096 ++ ++#CHECK: error: invalid operand ++#CHECK: verllg %v0, %v0, -1 ++#CHECK: error: invalid operand ++#CHECK: verllg %v0, %v0, 4096 ++ ++ verllg %v0, %v0, -1 ++ verllg %v0, %v0, 4096 ++ ++#CHECK: error: invalid operand ++#CHECK: verllh %v0, %v0, -1 ++#CHECK: error: invalid operand ++#CHECK: verllh %v0, %v0, 4096 ++ ++ verllh %v0, %v0, -1 ++ verllh %v0, %v0, 4096 ++ ++#CHECK: error: invalid operand ++#CHECK: veslb %v0, %v0, -1 ++#CHECK: error: invalid operand ++#CHECK: veslb %v0, %v0, 4096 ++ ++ veslb %v0, %v0, -1 ++ veslb %v0, %v0, 4096 ++ ++#CHECK: error: invalid operand ++#CHECK: veslf %v0, %v0, -1 ++#CHECK: error: invalid operand ++#CHECK: veslf %v0, %v0, 4096 ++ ++ veslf %v0, %v0, -1 ++ veslf %v0, %v0, 4096 ++ ++#CHECK: error: invalid operand ++#CHECK: veslg %v0, %v0, -1 ++#CHECK: error: invalid operand ++#CHECK: veslg %v0, %v0, 4096 ++ ++ veslg %v0, %v0, -1 ++ veslg %v0, %v0, 4096 ++ ++#CHECK: error: invalid operand ++#CHECK: veslh %v0, %v0, -1 ++#CHECK: error: invalid operand ++#CHECK: veslh %v0, %v0, 4096 ++ ++ veslh %v0, %v0, -1 ++ veslh %v0, %v0, 4096 ++ ++#CHECK: error: invalid operand ++#CHECK: vesrab %v0, %v0, -1 ++#CHECK: error: invalid operand ++#CHECK: vesrab %v0, %v0, 4096 ++ ++ vesrab %v0, %v0, -1 ++ vesrab %v0, %v0, 4096 ++ ++#CHECK: error: invalid operand ++#CHECK: vesraf %v0, %v0, -1 ++#CHECK: error: invalid operand ++#CHECK: vesraf %v0, %v0, 4096 ++ ++ vesraf %v0, %v0, -1 ++ vesraf %v0, %v0, 4096 ++ ++#CHECK: error: invalid operand ++#CHECK: vesrag %v0, %v0, -1 ++#CHECK: error: invalid operand ++#CHECK: vesrag %v0, %v0, 4096 ++ ++ vesrag %v0, %v0, -1 ++ vesrag %v0, %v0, 4096 ++ ++#CHECK: error: invalid operand ++#CHECK: vesrah %v0, %v0, -1 ++#CHECK: error: invalid operand ++#CHECK: vesrah %v0, %v0, 4096 ++ ++ vesrah %v0, %v0, -1 ++ vesrah %v0, %v0, 4096 ++ ++#CHECK: error: invalid operand ++#CHECK: vesrlb %v0, %v0, -1 ++#CHECK: error: invalid operand ++#CHECK: vesrlb %v0, %v0, 4096 ++ ++ vesrlb %v0, %v0, -1 ++ vesrlb %v0, %v0, 4096 ++ ++#CHECK: error: invalid operand ++#CHECK: vesrlf %v0, %v0, -1 ++#CHECK: error: invalid operand ++#CHECK: vesrlf %v0, %v0, 4096 ++ ++ vesrlf %v0, %v0, -1 ++ vesrlf %v0, %v0, 4096 ++ ++#CHECK: error: invalid operand ++#CHECK: vesrlg %v0, %v0, -1 ++#CHECK: error: invalid operand ++#CHECK: vesrlg %v0, %v0, 4096 ++ ++ vesrlg %v0, %v0, -1 ++ vesrlg %v0, %v0, 4096 ++ ++#CHECK: error: invalid operand ++#CHECK: vesrlh %v0, %v0, -1 ++#CHECK: error: invalid operand ++#CHECK: vesrlh %v0, %v0, 4096 ++ ++ vesrlh %v0, %v0, -1 ++ vesrlh %v0, %v0, 4096 ++ ++#CHECK: error: invalid operand ++#CHECK: vfaeb %v0, %v0, %v0, -1 ++#CHECK: error: invalid operand ++#CHECK: vfaeb %v0, %v0, %v0, 16 ++#CHECK: error: too few operands ++#CHECK: vfaeb %v0, %v0 ++#CHECK: error: invalid operand ++#CHECK: vfaeb %v0, %v0, %v0, 0, 0 ++ ++ vfaeb %v0, %v0, %v0, -1 ++ vfaeb %v0, %v0, %v0, 16 ++ vfaeb %v0, %v0 ++ vfaeb %v0, %v0, %v0, 0, 0 ++ ++#CHECK: error: invalid operand ++#CHECK: vfaebs %v0, %v0, %v0, -1 ++#CHECK: error: invalid operand ++#CHECK: vfaebs %v0, %v0, %v0, 16 ++#CHECK: error: too few operands ++#CHECK: vfaebs %v0, %v0 ++#CHECK: error: invalid operand ++#CHECK: vfaebs %v0, %v0, %v0, 0, 0 ++ ++ vfaebs %v0, %v0, %v0, -1 ++ vfaebs %v0, %v0, %v0, 16 ++ vfaebs %v0, %v0 ++ vfaebs %v0, %v0, %v0, 0, 0 ++ ++#CHECK: error: invalid operand ++#CHECK: vfaef %v0, %v0, %v0, -1 ++#CHECK: error: invalid operand ++#CHECK: vfaef %v0, %v0, %v0, 16 ++#CHECK: error: too few operands ++#CHECK: vfaef %v0, %v0 ++#CHECK: error: invalid operand ++#CHECK: vfaef %v0, %v0, %v0, 0, 0 ++ ++ vfaef %v0, %v0, %v0, -1 ++ vfaef %v0, %v0, %v0, 16 ++ vfaef %v0, %v0 ++ vfaef %v0, %v0, %v0, 0, 0 ++ ++#CHECK: error: invalid operand ++#CHECK: vfaeh %v0, %v0, %v0, -1 ++#CHECK: error: invalid operand ++#CHECK: vfaeh %v0, %v0, %v0, 16 ++#CHECK: error: too few operands ++#CHECK: vfaeh %v0, %v0 ++#CHECK: error: invalid operand ++#CHECK: vfaeh %v0, %v0, %v0, 0, 0 ++ ++ vfaeh %v0, %v0, %v0, -1 ++ vfaeh %v0, %v0, %v0, 16 ++ vfaeh %v0, %v0 ++ vfaeh %v0, %v0, %v0, 0, 0 ++ ++#CHECK: error: invalid operand ++#CHECK: vfaezh %v0, %v0, %v0, -1 ++#CHECK: error: invalid operand ++#CHECK: vfaezh %v0, %v0, %v0, 16 ++#CHECK: error: too few operands ++#CHECK: vfaezh %v0, %v0 ++#CHECK: error: invalid operand ++#CHECK: vfaezh %v0, %v0, %v0, 0, 0 ++ ++ vfaezh %v0, %v0, %v0, -1 ++ vfaezh %v0, %v0, %v0, 16 ++ vfaezh %v0, %v0 ++ vfaezh %v0, %v0, %v0, 0, 0 ++ ++#CHECK: error: invalid operand ++#CHECK: vfaezfs %v0, %v0, %v0, -1 ++#CHECK: error: invalid operand ++#CHECK: vfaezfs %v0, %v0, %v0, 16 ++#CHECK: error: too few operands ++#CHECK: vfaezfs %v0, %v0 ++#CHECK: error: invalid operand ++#CHECK: vfaezfs %v0, %v0, %v0, 0, 0 ++ ++ vfaezfs %v0, %v0, %v0, -1 ++ vfaezfs %v0, %v0, %v0, 16 ++ vfaezfs %v0, %v0 ++ vfaezfs %v0, %v0, %v0, 0, 0 ++ ++#CHECK: error: invalid operand ++#CHECK: vfidb %v0, %v0, 0, -1 ++#CHECK: error: invalid operand ++#CHECK: vfidb %v0, %v0, 0, 16 ++#CHECK: error: invalid operand ++#CHECK: vfidb %v0, %v0, -1, 0 ++#CHECK: error: invalid operand ++#CHECK: vfidb %v0, %v0, 16, 0 ++ ++ vfidb %v0, %v0, 0, -1 ++ vfidb %v0, %v0, 0, 16 ++ vfidb %v0, %v0, -1, 0 ++ vfidb %v0, %v0, 16, 0 ++ ++#CHECK: error: invalid operand ++#CHECK: vftcidb %v0, %v0, -1 ++#CHECK: error: invalid operand ++#CHECK: vftcidb %v0, %v0, 4096 ++ ++ vftcidb %v0, %v0, -1 ++ vftcidb %v0, %v0, 4096 ++ ++#CHECK: error: invalid operand ++#CHECK: vgbm %v0, -1 ++#CHECK: error: invalid operand ++#CHECK: vgbm %v0, 0x10000 ++ ++ vgbm %v0, -1 ++ vgbm %v0, 0x10000 ++ ++#CHECK: error: vector index required ++#CHECK: vgef %v0, 0(%r1), 0 ++#CHECK: error: vector index required ++#CHECK: vgef %v0, 0(%r2,%r1), 0 ++#CHECK: error: invalid operand ++#CHECK: vgef %v0, 0(%v0,%r1), -1 ++#CHECK: error: invalid operand ++#CHECK: vgef %v0, 0(%v0,%r1), 4 ++#CHECK: error: invalid operand ++#CHECK: vgef %v0, -1(%v0,%r1), 0 ++#CHECK: error: invalid operand ++#CHECK: vgef %v0, 4096(%v0,%r1), 0 ++ ++ vgef %v0, 0(%r1), 0 ++ vgef %v0, 0(%r2,%r1), 0 ++ vgef %v0, 0(%v0,%r1), -1 ++ vgef %v0, 0(%v0,%r1), 4 ++ vgef %v0, -1(%v0,%r1), 0 ++ vgef %v0, 4096(%v0,%r1), 0 ++ ++#CHECK: error: vector index required ++#CHECK: vgeg %v0, 0(%r1), 0 ++#CHECK: error: vector index required ++#CHECK: vgeg %v0, 0(%r2,%r1), 0 ++#CHECK: error: invalid operand ++#CHECK: vgeg %v0, 0(%v0,%r1), -1 ++#CHECK: error: invalid operand ++#CHECK: vgeg %v0, 0(%v0,%r1), 2 ++#CHECK: error: invalid operand ++#CHECK: vgeg %v0, -1(%v0,%r1), 0 ++#CHECK: error: invalid operand ++#CHECK: vgeg %v0, 4096(%v0,%r1), 0 ++ ++ vgeg %v0, 0(%r1), 0 ++ vgeg %v0, 0(%r2,%r1), 0 ++ vgeg %v0, 0(%v0,%r1), -1 ++ vgeg %v0, 0(%v0,%r1), 2 ++ vgeg %v0, -1(%v0,%r1), 0 ++ vgeg %v0, 4096(%v0,%r1), 0 ++ ++#CHECK: error: invalid operand ++#CHECK: vgmb %v0, 0, -1 ++#CHECK: error: invalid operand ++#CHECK: vgmb %v0, 0, -1 ++#CHECK: error: invalid operand ++#CHECK: vgmb %v0, -1, 0 ++#CHECK: error: invalid operand ++#CHECK: vgmb %v0, 256, 0 ++ ++ vgmb %v0, 0, -1 ++ vgmb %v0, 0, -1 ++ vgmb %v0, -1, 0 ++ vgmb %v0, 256, 0 ++ ++#CHECK: error: invalid operand ++#CHECK: vgmf %v0, 0, -1 ++#CHECK: error: invalid operand ++#CHECK: vgmf %v0, 0, -1 ++#CHECK: error: invalid operand ++#CHECK: vgmf %v0, -1, 0 ++#CHECK: error: invalid operand ++#CHECK: vgmf %v0, 256, 0 ++ ++ vgmf %v0, 0, -1 ++ vgmf %v0, 0, -1 ++ vgmf %v0, -1, 0 ++ vgmf %v0, 256, 0 ++ ++#CHECK: error: invalid operand ++#CHECK: vgmg %v0, 0, -1 ++#CHECK: error: invalid operand ++#CHECK: vgmg %v0, 0, -1 ++#CHECK: error: invalid operand ++#CHECK: vgmg %v0, -1, 0 ++#CHECK: error: invalid operand ++#CHECK: vgmg %v0, 256, 0 ++ ++ vgmg %v0, 0, -1 ++ vgmg %v0, 0, -1 ++ vgmg %v0, -1, 0 ++ vgmg %v0, 256, 0 ++ ++#CHECK: error: invalid operand ++#CHECK: vgmh %v0, 0, -1 ++#CHECK: error: invalid operand ++#CHECK: vgmh %v0, 0, -1 ++#CHECK: error: invalid operand ++#CHECK: vgmh %v0, -1, 0 ++#CHECK: error: invalid operand ++#CHECK: vgmh %v0, 256, 0 ++ ++ vgmh %v0, 0, -1 ++ vgmh %v0, 0, -1 ++ vgmh %v0, -1, 0 ++ vgmh %v0, 256, 0 ++ ++#CHECK: error: invalid operand ++#CHECK: vl %v0, -1 ++#CHECK: error: invalid operand ++#CHECK: vl %v0, 4096 ++#CHECK: error: invalid use of vector addressing ++#CHECK: vl %v0, 0(%v1,%r2) ++ ++ vl %v0, -1 ++ vl %v0, 4096 ++ vl %v0, 0(%v1,%r2) ++ ++#CHECK: error: invalid operand ++#CHECK: vlbb %v0, 0, -1 ++#CHECK: error: invalid operand ++#CHECK: vlbb %v0, 0, 16 ++#CHECK: error: invalid operand ++#CHECK: vlbb %v0, -1, 0 ++#CHECK: error: invalid operand ++#CHECK: vlbb %v0, 4096, 0 ++#CHECK: error: invalid use of vector addressing ++#CHECK: vlbb %v0, 0(%v1,%r2), 0 ++ ++ vlbb %v0, 0, -1 ++ vlbb %v0, 0, 16 ++ vlbb %v0, -1, 0 ++ vlbb %v0, 4096, 0 ++ vlbb %v0, 0(%v1,%r2), 0 ++ ++#CHECK: error: invalid operand ++#CHECK: vleb %v0, 0, -1 ++#CHECK: error: invalid operand ++#CHECK: vleb %v0, 0, 16 ++#CHECK: error: invalid operand ++#CHECK: vleb %v0, -1, 0 ++#CHECK: error: invalid operand ++#CHECK: vleb %v0, 4096, 0 ++#CHECK: error: invalid use of vector addressing ++#CHECK: vleb %v0, 0(%v1,%r2), 0 ++ ++ vleb %v0, 0, -1 ++ vleb %v0, 0, 16 ++ vleb %v0, -1, 0 ++ vleb %v0, 4096, 0 ++ vleb %v0, 0(%v1,%r2), 0 ++ ++#CHECK: error: invalid operand ++#CHECK: vledb %v0, %v0, 0, -1 ++#CHECK: error: invalid operand ++#CHECK: vledb %v0, %v0, 0, 16 ++#CHECK: error: invalid operand ++#CHECK: vledb %v0, %v0, -1, 0 ++#CHECK: error: invalid operand ++#CHECK: vledb %v0, %v0, 16, 0 ++ ++ vledb %v0, %v0, 0, -1 ++ vledb %v0, %v0, 0, 16 ++ vledb %v0, %v0, -1, 0 ++ vledb %v0, %v0, 16, 0 ++ ++#CHECK: error: invalid operand ++#CHECK: vlef %v0, 0, -1 ++#CHECK: error: invalid operand ++#CHECK: vlef %v0, 0, 4 ++#CHECK: error: invalid operand ++#CHECK: vlef %v0, -1, 0 ++#CHECK: error: invalid operand ++#CHECK: vlef %v0, 4096, 0 ++#CHECK: error: invalid use of vector addressing ++#CHECK: vlef %v0, 0(%v1,%r2), 0 ++ ++ vlef %v0, 0, -1 ++ vlef %v0, 0, 4 ++ vlef %v0, -1, 0 ++ vlef %v0, 4096, 0 ++ vlef %v0, 0(%v1,%r2), 0 ++ ++#CHECK: error: invalid operand ++#CHECK: vleg %v0, 0, -1 ++#CHECK: error: invalid operand ++#CHECK: vleg %v0, 0, 2 ++#CHECK: error: invalid operand ++#CHECK: vleg %v0, -1, 0 ++#CHECK: error: invalid operand ++#CHECK: vleg %v0, 4096, 0 ++#CHECK: error: invalid use of vector addressing ++#CHECK: vleg %v0, 0(%v1,%r2), 0 ++ ++ vleg %v0, 0, -1 ++ vleg %v0, 0, 2 ++ vleg %v0, -1, 0 ++ vleg %v0, 4096, 0 ++ vleg %v0, 0(%v1,%r2), 0 ++ ++#CHECK: error: invalid operand ++#CHECK: vleh %v0, 0, -1 ++#CHECK: error: invalid operand ++#CHECK: vleh %v0, 0, 8 ++#CHECK: error: invalid operand ++#CHECK: vleh %v0, -1, 0 ++#CHECK: error: invalid operand ++#CHECK: vleh %v0, 4096, 0 ++#CHECK: error: invalid use of vector addressing ++#CHECK: vleh %v0, 0(%v1,%r2), 0 ++ ++ vleh %v0, 0, -1 ++ vleh %v0, 0, 8 ++ vleh %v0, -1, 0 ++ vleh %v0, 4096, 0 ++ vleh %v0, 0(%v1,%r2), 0 ++ ++#CHECK: error: invalid operand ++#CHECK: vleib %v0, 0, -1 ++#CHECK: error: invalid operand ++#CHECK: vleib %v0, 0, 16 ++#CHECK: error: invalid operand ++#CHECK: vleib %v0, -32769, 0 ++#CHECK: error: invalid operand ++#CHECK: vleib %v0, 32768, 0 ++ ++ vleib %v0, 0, -1 ++ vleib %v0, 0, 16 ++ vleib %v0, -32769, 0 ++ vleib %v0, 32768, 0 ++ ++#CHECK: error: invalid operand ++#CHECK: vleif %v0, 0, -1 ++#CHECK: error: invalid operand ++#CHECK: vleif %v0, 0, 4 ++#CHECK: error: invalid operand ++#CHECK: vleif %v0, -32769, 0 ++#CHECK: error: invalid operand ++#CHECK: vleif %v0, 32768, 0 ++ ++ vleif %v0, 0, -1 ++ vleif %v0, 0, 4 ++ vleif %v0, -32769, 0 ++ vleif %v0, 32768, 0 ++ ++#CHECK: error: invalid operand ++#CHECK: vleig %v0, 0, -1 ++#CHECK: error: invalid operand ++#CHECK: vleig %v0, 0, 2 ++#CHECK: error: invalid operand ++#CHECK: vleig %v0, -32769, 0 ++#CHECK: error: invalid operand ++#CHECK: vleig %v0, 32768, 0 ++ ++ vleig %v0, 0, -1 ++ vleig %v0, 0, 2 ++ vleig %v0, -32769, 0 ++ vleig %v0, 32768, 0 ++ ++#CHECK: error: invalid operand ++#CHECK: vleih %v0, 0, -1 ++#CHECK: error: invalid operand ++#CHECK: vleih %v0, 0, 8 ++#CHECK: error: invalid operand ++#CHECK: vleih %v0, -32769, 0 ++#CHECK: error: invalid operand ++#CHECK: vleih %v0, 32768, 0 ++ ++ vleih %v0, 0, -1 ++ vleih %v0, 0, 8 ++ vleih %v0, -32769, 0 ++ vleih %v0, 32768, 0 ++ ++#CHECK: error: invalid operand ++#CHECK: vlgvb %r0, %v0, -1 ++#CHECK: error: invalid operand ++#CHECK: vlgvb %r0, %v0, 4096 ++#CHECK: error: %r0 used in an address ++#CHECK: vlgvb %r0, %v0, 0(%r0) ++ ++ vlgvb %r0, %v0, -1 ++ vlgvb %r0, %v0, 4096 ++ vlgvb %r0, %v0, 0(%r0) ++ ++#CHECK: error: invalid operand ++#CHECK: vlgvf %r0, %v0, -1 ++#CHECK: error: invalid operand ++#CHECK: vlgvf %r0, %v0, 4096 ++#CHECK: error: %r0 used in an address ++#CHECK: vlgvf %r0, %v0, 0(%r0) ++ ++ vlgvf %r0, %v0, -1 ++ vlgvf %r0, %v0, 4096 ++ vlgvf %r0, %v0, 0(%r0) ++ ++#CHECK: error: invalid operand ++#CHECK: vlgvg %r0, %v0, -1 ++#CHECK: error: invalid operand ++#CHECK: vlgvg %r0, %v0, 4096 ++#CHECK: error: %r0 used in an address ++#CHECK: vlgvg %r0, %v0, 0(%r0) ++ ++ vlgvg %r0, %v0, -1 ++ vlgvg %r0, %v0, 4096 ++ vlgvg %r0, %v0, 0(%r0) ++ ++#CHECK: error: invalid operand ++#CHECK: vlgvh %r0, %v0, -1 ++#CHECK: error: invalid operand ++#CHECK: vlgvh %r0, %v0, 4096 ++#CHECK: error: %r0 used in an address ++#CHECK: vlgvh %r0, %v0, 0(%r0) ++ ++ vlgvh %r0, %v0, -1 ++ vlgvh %r0, %v0, 4096 ++ vlgvh %r0, %v0, 0(%r0) ++ ++#CHECK: error: invalid operand ++#CHECK: vll %v0, %r0, -1 ++#CHECK: error: invalid operand ++#CHECK: vll %v0, %r0, 4096 ++#CHECK: error: %r0 used in an address ++#CHECK: vll %v0, %r0, 0(%r0) ++ ++ vll %v0, %r0, -1 ++ vll %v0, %r0, 4096 ++ vll %v0, %r0, 0(%r0) ++ ++#CHECK: error: invalid operand ++#CHECK: vllezb %v0, -1 ++#CHECK: error: invalid operand ++#CHECK: vllezb %v0, 4096 ++#CHECK: error: invalid use of vector addressing ++#CHECK: vllezb %v0, 0(%v1,%r2) ++ ++ vllezb %v0, -1 ++ vllezb %v0, 4096 ++ vllezb %v0, 0(%v1,%r2) ++ ++#CHECK: error: invalid operand ++#CHECK: vllezf %v0, -1 ++#CHECK: error: invalid operand ++#CHECK: vllezf %v0, 4096 ++#CHECK: error: invalid use of vector addressing ++#CHECK: vllezf %v0, 0(%v1,%r2) ++ ++ vllezf %v0, -1 ++ vllezf %v0, 4096 ++ vllezf %v0, 0(%v1,%r2) ++ ++#CHECK: error: invalid operand ++#CHECK: vllezg %v0, -1 ++#CHECK: error: invalid operand ++#CHECK: vllezg %v0, 4096 ++#CHECK: error: invalid use of vector addressing ++#CHECK: vllezg %v0, 0(%v1,%r2) ++ ++ vllezg %v0, -1 ++ vllezg %v0, 4096 ++ vllezg %v0, 0(%v1,%r2) ++ ++#CHECK: error: invalid operand ++#CHECK: vllezh %v0, -1 ++#CHECK: error: invalid operand ++#CHECK: vllezh %v0, 4096 ++#CHECK: error: invalid use of vector addressing ++#CHECK: vllezh %v0, 0(%v1,%r2) ++ ++ vllezh %v0, -1 ++ vllezh %v0, 4096 ++ vllezh %v0, 0(%v1,%r2) ++ ++#CHECK: error: invalid operand ++#CHECK: vlm %v0, %v0, -1 ++#CHECK: error: invalid operand ++#CHECK: vlm %v0, %v0, 4096 ++ ++ vlm %v0, %v0, -1 ++ vlm %v0, %v0, 4096 ++ ++#CHECK: error: invalid operand ++#CHECK: vlrepb %v0, -1 ++#CHECK: error: invalid operand ++#CHECK: vlrepb %v0, 4096 ++#CHECK: error: invalid use of vector addressing ++#CHECK: vlrepb %v0, 0(%v1,%r2) ++ ++ vlrepb %v0, -1 ++ vlrepb %v0, 4096 ++ vlrepb %v0, 0(%v1,%r2) ++ ++#CHECK: error: invalid operand ++#CHECK: vlrepf %v0, -1 ++#CHECK: error: invalid operand ++#CHECK: vlrepf %v0, 4096 ++#CHECK: error: invalid use of vector addressing ++#CHECK: vlrepf %v0, 0(%v1,%r2) ++ ++ vlrepf %v0, -1 ++ vlrepf %v0, 4096 ++ vlrepf %v0, 0(%v1,%r2) ++ ++#CHECK: error: invalid operand ++#CHECK: vlrepg %v0, -1 ++#CHECK: error: invalid operand ++#CHECK: vlrepg %v0, 4096 ++#CHECK: error: invalid use of vector addressing ++#CHECK: vlrepg %v0, 0(%v1,%r2) ++ ++ vlrepg %v0, -1 ++ vlrepg %v0, 4096 ++ vlrepg %v0, 0(%v1,%r2) ++ ++#CHECK: error: invalid operand ++#CHECK: vlreph %v0, -1 ++#CHECK: error: invalid operand ++#CHECK: vlreph %v0, 4096 ++#CHECK: error: invalid use of vector addressing ++#CHECK: vlreph %v0, 0(%v1,%r2) ++ ++ vlreph %v0, -1 ++ vlreph %v0, 4096 ++ vlreph %v0, 0(%v1,%r2) ++ ++#CHECK: error: invalid operand ++#CHECK: vlvgb %v0, %r0, -1 ++#CHECK: error: invalid operand ++#CHECK: vlvgb %v0, %r0, 4096 ++#CHECK: error: %r0 used in an address ++#CHECK: vlvgb %v0, %r0, 0(%r0) ++ ++ vlvgb %v0, %r0, -1 ++ vlvgb %v0, %r0, 4096 ++ vlvgb %v0, %r0, 0(%r0) ++ ++#CHECK: error: invalid operand ++#CHECK: vlvgf %v0, %r0, -1 ++#CHECK: error: invalid operand ++#CHECK: vlvgf %v0, %r0, 4096 ++#CHECK: error: %r0 used in an address ++#CHECK: vlvgf %v0, %r0, 0(%r0) ++ ++ vlvgf %v0, %r0, -1 ++ vlvgf %v0, %r0, 4096 ++ vlvgf %v0, %r0, 0(%r0) ++ ++#CHECK: error: invalid operand ++#CHECK: vlvgg %v0, %r0, -1 ++#CHECK: error: invalid operand ++#CHECK: vlvgg %v0, %r0, 4096 ++#CHECK: error: %r0 used in an address ++#CHECK: vlvgg %v0, %r0, 0(%r0) ++ ++ vlvgg %v0, %r0, -1 ++ vlvgg %v0, %r0, 4096 ++ vlvgg %v0, %r0, 0(%r0) ++ ++#CHECK: error: invalid operand ++#CHECK: vlvgh %v0, %r0, -1 ++#CHECK: error: invalid operand ++#CHECK: vlvgh %v0, %r0, 4096 ++#CHECK: error: %r0 used in an address ++#CHECK: vlvgh %v0, %r0, 0(%r0) ++ ++ vlvgh %v0, %r0, -1 ++ vlvgh %v0, %r0, 4096 ++ vlvgh %v0, %r0, 0(%r0) ++ ++#CHECK: error: invalid operand ++#CHECK: vpdi %v0, %v0, %v0, -1 ++#CHECK: error: invalid operand ++#CHECK: vpdi %v0, %v0, %v0, 16 ++ ++ vpdi %v0, %v0, %v0, -1 ++ vpdi %v0, %v0, %v0, 16 ++ ++#CHECK: error: invalid operand ++#CHECK: vrepb %v0, %v0, -1 ++#CHECK: error: invalid operand ++#CHECK: vrepb %v0, %v0, 65536 ++ ++ vrepb %v0, %v0, -1 ++ vrepb %v0, %v0, 65536 ++ ++#CHECK: error: invalid operand ++#CHECK: vrepf %v0, %v0, -1 ++#CHECK: error: invalid operand ++#CHECK: vrepf %v0, %v0, 65536 ++ ++ vrepf %v0, %v0, -1 ++ vrepf %v0, %v0, 65536 ++ ++#CHECK: error: invalid operand ++#CHECK: vrepg %v0, %v0, -1 ++#CHECK: error: invalid operand ++#CHECK: vrepg %v0, %v0, 65536 ++ ++ vrepg %v0, %v0, -1 ++ vrepg %v0, %v0, 65536 ++ ++#CHECK: error: invalid operand ++#CHECK: vreph %v0, %v0, -1 ++#CHECK: error: invalid operand ++#CHECK: vreph %v0, %v0, 65536 ++ ++ vreph %v0, %v0, -1 ++ vreph %v0, %v0, 65536 ++ ++#CHECK: error: invalid operand ++#CHECK: vrepib %v0, -32769 ++#CHECK: error: invalid operand ++#CHECK: vrepib %v0, 32768 ++ ++ vrepib %v0, -32769 ++ vrepib %v0, 32768 ++ ++#CHECK: error: invalid operand ++#CHECK: vrepif %v0, -32769 ++#CHECK: error: invalid operand ++#CHECK: vrepif %v0, 32768 ++ ++ vrepif %v0, -32769 ++ vrepif %v0, 32768 ++ ++#CHECK: error: invalid operand ++#CHECK: vrepig %v0, -32769 ++#CHECK: error: invalid operand ++#CHECK: vrepig %v0, 32768 ++ ++ vrepig %v0, -32769 ++ vrepig %v0, 32768 ++ ++#CHECK: error: invalid operand ++#CHECK: vrepih %v0, -32769 ++#CHECK: error: invalid operand ++#CHECK: vrepih %v0, 32768 ++ ++ vrepih %v0, -32769 ++ vrepih %v0, 32768 ++ ++#CHECK: error: vector index required ++#CHECK: vscef %v0, 0(%r1), 0 ++#CHECK: error: vector index required ++#CHECK: vscef %v0, 0(%r2,%r1), 0 ++#CHECK: error: invalid operand ++#CHECK: vscef %v0, 0(%v0,%r1), -1 ++#CHECK: error: invalid operand ++#CHECK: vscef %v0, 0(%v0,%r1), 4 ++#CHECK: error: invalid operand ++#CHECK: vscef %v0, -1(%v0,%r1), 0 ++#CHECK: error: invalid operand ++#CHECK: vscef %v0, 4096(%v0,%r1), 0 ++ ++ vscef %v0, 0(%r1), 0 ++ vscef %v0, 0(%r2,%r1), 0 ++ vscef %v0, 0(%v0,%r1), -1 ++ vscef %v0, 0(%v0,%r1), 4 ++ vscef %v0, -1(%v0,%r1), 0 ++ vscef %v0, 4096(%v0,%r1), 0 ++ ++#CHECK: error: vector index required ++#CHECK: vsceg %v0, 0(%r1), 0 ++#CHECK: error: vector index required ++#CHECK: vsceg %v0, 0(%r2,%r1), 0 ++#CHECK: error: invalid operand ++#CHECK: vsceg %v0, 0(%v0,%r1), -1 ++#CHECK: error: invalid operand ++#CHECK: vsceg %v0, 0(%v0,%r1), 2 ++#CHECK: error: invalid operand ++#CHECK: vsceg %v0, -1(%v0,%r1), 0 ++#CHECK: error: invalid operand ++#CHECK: vsceg %v0, 4096(%v0,%r1), 0 ++ ++ vsceg %v0, 0(%r1), 0 ++ vsceg %v0, 0(%r2,%r1), 0 ++ vsceg %v0, 0(%v0,%r1), -1 ++ vsceg %v0, 0(%v0,%r1), 2 ++ vsceg %v0, -1(%v0,%r1), 0 ++ vsceg %v0, 4096(%v0,%r1), 0 ++ ++#CHECK: error: invalid operand ++#CHECK: vsldb %v0, %v0, %v0, -1 ++#CHECK: error: invalid operand ++#CHECK: vsldb %v0, %v0, %v0, 256 ++ ++ vsldb %v0, %v0, %v0, -1 ++ vsldb %v0, %v0, %v0, 256 ++ ++#CHECK: error: invalid operand ++#CHECK: vst %v0, -1 ++#CHECK: error: invalid operand ++#CHECK: vst %v0, 4096 ++#CHECK: error: invalid use of vector addressing ++#CHECK: vst %v0, 0(%v1,%r2) ++ ++ vst %v0, -1 ++ vst %v0, 4096 ++ vst %v0, 0(%v1,%r2) ++ ++#CHECK: error: invalid operand ++#CHECK: vsteb %v0, 0, -1 ++#CHECK: error: invalid operand ++#CHECK: vsteb %v0, 0, 16 ++#CHECK: error: invalid operand ++#CHECK: vsteb %v0, -1, 0 ++#CHECK: error: invalid operand ++#CHECK: vsteb %v0, 4096, 0 ++#CHECK: error: invalid use of vector addressing ++#CHECK: vsteb %v0, 0(%v1,%r2), 0 ++ ++ vsteb %v0, 0, -1 ++ vsteb %v0, 0, 16 ++ vsteb %v0, -1, 0 ++ vsteb %v0, 4096, 0 ++ vsteb %v0, 0(%v1,%r2), 0 ++ ++#CHECK: error: invalid operand ++#CHECK: vstef %v0, 0, -1 ++#CHECK: error: invalid operand ++#CHECK: vstef %v0, 0, 4 ++#CHECK: error: invalid operand ++#CHECK: vstef %v0, -1, 0 ++#CHECK: error: invalid operand ++#CHECK: vstef %v0, 4096, 0 ++#CHECK: error: invalid use of vector addressing ++#CHECK: vstef %v0, 0(%v1,%r2), 0 ++ ++ vstef %v0, 0, -1 ++ vstef %v0, 0, 4 ++ vstef %v0, -1, 0 ++ vstef %v0, 4096, 0 ++ vstef %v0, 0(%v1,%r2), 0 ++ ++#CHECK: error: invalid operand ++#CHECK: vsteg %v0, 0, -1 ++#CHECK: error: invalid operand ++#CHECK: vsteg %v0, 0, 2 ++#CHECK: error: invalid operand ++#CHECK: vsteg %v0, -1, 0 ++#CHECK: error: invalid operand ++#CHECK: vsteg %v0, 4096, 0 ++#CHECK: error: invalid use of vector addressing ++#CHECK: vsteg %v0, 0(%v1,%r2), 0 ++ ++ vsteg %v0, 0, -1 ++ vsteg %v0, 0, 2 ++ vsteg %v0, -1, 0 ++ vsteg %v0, 4096, 0 ++ vsteg %v0, 0(%v1,%r2), 0 ++ ++#CHECK: error: invalid operand ++#CHECK: vsteh %v0, 0, -1 ++#CHECK: error: invalid operand ++#CHECK: vsteh %v0, 0, 8 ++#CHECK: error: invalid operand ++#CHECK: vsteh %v0, -1, 0 ++#CHECK: error: invalid operand ++#CHECK: vsteh %v0, 4096, 0 ++#CHECK: error: invalid use of vector addressing ++#CHECK: vsteh %v0, 0(%v1,%r2), 0 ++ ++ vsteh %v0, 0, -1 ++ vsteh %v0, 0, 8 ++ vsteh %v0, -1, 0 ++ vsteh %v0, 4096, 0 ++ vsteh %v0, 0(%v1,%r2), 0 ++ ++#CHECK: error: invalid operand ++#CHECK: vstl %v0, %r0, -1 ++#CHECK: error: invalid operand ++#CHECK: vstl %v0, %r0, 4096 ++#CHECK: error: %r0 used in an address ++#CHECK: vstl %v0, %r0, 0(%r0) ++ ++ vstl %v0, %r0, -1 ++ vstl %v0, %r0, 4096 ++ vstl %v0, %r0, 0(%r0) ++ ++#CHECK: error: invalid operand ++#CHECK: vstm %v0, %v0, -1 ++#CHECK: error: invalid operand ++#CHECK: vstm %v0, %v0, 4096 ++ ++ vstm %v0, %v0, -1 ++ vstm %v0, %v0, 4096 ++ ++#CHECK: error: invalid operand ++#CHECK: vstrcb %v0, %v0, %v0, %v0, -1 ++#CHECK: error: invalid operand ++#CHECK: vstrcb %v0, %v0, %v0, %v0, 16 ++#CHECK: error: too few operands ++#CHECK: vstrcb %v0, %v0, %v0 ++#CHECK: error: invalid operand ++#CHECK: vstrcb %v0, %v0, %v0, %v0, 0, 0 ++ ++ vstrcb %v0, %v0, %v0, %v0, -1 ++ vstrcb %v0, %v0, %v0, %v0, 16 ++ vstrcb %v0, %v0, %v0 ++ vstrcb %v0, %v0, %v0, %v0, 0, 0 ++ ++#CHECK: error: invalid operand ++#CHECK: vstrcbs %v0, %v0, %v0, %v0, -1 ++#CHECK: error: invalid operand ++#CHECK: vstrcbs %v0, %v0, %v0, %v0, 16 ++#CHECK: error: too few operands ++#CHECK: vstrcbs %v0, %v0, %v0 ++#CHECK: error: invalid operand ++#CHECK: vstrcbs %v0, %v0, %v0, %v0, 0, 0 ++ ++ vstrcbs %v0, %v0, %v0, %v0, -1 ++ vstrcbs %v0, %v0, %v0, %v0, 16 ++ vstrcbs %v0, %v0, %v0 ++ vstrcbs %v0, %v0, %v0, %v0, 0, 0 ++ ++#CHECK: error: invalid operand ++#CHECK: vstrcf %v0, %v0, %v0, %v0, -1 ++#CHECK: error: invalid operand ++#CHECK: vstrcf %v0, %v0, %v0, %v0, 16 ++#CHECK: error: too few operands ++#CHECK: vstrcf %v0, %v0, %v0 ++#CHECK: error: invalid operand ++#CHECK: vstrcf %v0, %v0, %v0, %v0, 0, 0 ++ ++ vstrcf %v0, %v0, %v0, %v0, -1 ++ vstrcf %v0, %v0, %v0, %v0, 16 ++ vstrcf %v0, %v0, %v0 ++ vstrcf %v0, %v0, %v0, %v0, 0, 0 ++ ++#CHECK: error: invalid operand ++#CHECK: vstrch %v0, %v0, %v0, %v0, -1 ++#CHECK: error: invalid operand ++#CHECK: vstrch %v0, %v0, %v0, %v0, 16 ++#CHECK: error: too few operands ++#CHECK: vstrch %v0, %v0, %v0 ++#CHECK: error: invalid operand ++#CHECK: vstrch %v0, %v0, %v0, %v0, 0, 0 ++ ++ vstrch %v0, %v0, %v0, %v0, -1 ++ vstrch %v0, %v0, %v0, %v0, 16 ++ vstrch %v0, %v0, %v0 ++ vstrch %v0, %v0, %v0, %v0, 0, 0 ++ ++#CHECK: error: invalid operand ++#CHECK: vstrczh %v0, %v0, %v0, %v0, -1 ++#CHECK: error: invalid operand ++#CHECK: vstrczh %v0, %v0, %v0, %v0, 16 ++#CHECK: error: too few operands ++#CHECK: vstrczh %v0, %v0, %v0 ++#CHECK: error: invalid operand ++#CHECK: vstrczh %v0, %v0, %v0, %v0, 0, 0 ++ ++ vstrczh %v0, %v0, %v0, %v0, -1 ++ vstrczh %v0, %v0, %v0, %v0, 16 ++ vstrczh %v0, %v0, %v0 ++ vstrczh %v0, %v0, %v0, %v0, 0, 0 ++ ++#CHECK: error: invalid operand ++#CHECK: vstrczfs %v0, %v0, %v0, %v0, -1 ++#CHECK: error: invalid operand ++#CHECK: vstrczfs %v0, %v0, %v0, %v0, 16 ++#CHECK: error: too few operands ++#CHECK: vstrczfs %v0, %v0, %v0 ++#CHECK: error: invalid operand ++#CHECK: vstrczfs %v0, %v0, %v0, %v0, 0, 0 ++ ++ vstrczfs %v0, %v0, %v0, %v0, -1 ++ vstrczfs %v0, %v0, %v0, %v0, 16 ++ vstrczfs %v0, %v0, %v0 ++ vstrczfs %v0, %v0, %v0, %v0, 0, 0 ++ ++#CHECK: error: invalid operand ++#CHECK: wcdgb %v0, %v0, 0, -1 ++#CHECK: error: invalid operand ++#CHECK: wcdgb %v0, %v0, 0, 16 ++#CHECK: error: invalid operand ++#CHECK: wcdgb %v0, %v0, -1, 0 ++#CHECK: error: invalid operand ++#CHECK: wcdgb %v0, %v0, 16, 0 ++ ++ wcdgb %v0, %v0, 0, -1 ++ wcdgb %v0, %v0, 0, 16 ++ wcdgb %v0, %v0, -1, 0 ++ wcdgb %v0, %v0, 16, 0 ++ ++#CHECK: error: invalid operand ++#CHECK: wcdlgb %v0, %v0, 0, -1 ++#CHECK: error: invalid operand ++#CHECK: wcdlgb %v0, %v0, 0, 16 ++#CHECK: error: invalid operand ++#CHECK: wcdlgb %v0, %v0, -1, 0 ++#CHECK: error: invalid operand ++#CHECK: wcdlgb %v0, %v0, 16, 0 ++ ++ wcdlgb %v0, %v0, 0, -1 ++ wcdlgb %v0, %v0, 0, 16 ++ wcdlgb %v0, %v0, -1, 0 ++ wcdlgb %v0, %v0, 16, 0 ++ ++#CHECK: error: invalid operand ++#CHECK: wcgdb %v0, %v0, 0, -1 ++#CHECK: error: invalid operand ++#CHECK: wcgdb %v0, %v0, 0, 16 ++#CHECK: error: invalid operand ++#CHECK: wcgdb %v0, %v0, -1, 0 ++#CHECK: error: invalid operand ++#CHECK: wcgdb %v0, %v0, 16, 0 ++ ++ wcgdb %v0, %v0, 0, -1 ++ wcgdb %v0, %v0, 0, 16 ++ wcgdb %v0, %v0, -1, 0 ++ wcgdb %v0, %v0, 16, 0 ++ ++#CHECK: error: invalid operand ++#CHECK: wclgdb %v0, %v0, 0, -1 ++#CHECK: error: invalid operand ++#CHECK: wclgdb %v0, %v0, 0, 16 ++#CHECK: error: invalid operand ++#CHECK: wclgdb %v0, %v0, -1, 0 ++#CHECK: error: invalid operand ++#CHECK: wclgdb %v0, %v0, 16, 0 ++ ++ wclgdb %v0, %v0, 0, -1 ++ wclgdb %v0, %v0, 0, 16 ++ wclgdb %v0, %v0, -1, 0 ++ wclgdb %v0, %v0, 16, 0 ++ ++#CHECK: error: invalid operand ++#CHECK: wfidb %v0, %v0, 0, -1 ++#CHECK: error: invalid operand ++#CHECK: wfidb %v0, %v0, 0, 16 ++#CHECK: error: invalid operand ++#CHECK: wfidb %v0, %v0, -1, 0 ++#CHECK: error: invalid operand ++#CHECK: wfidb %v0, %v0, 16, 0 ++ ++ wfidb %v0, %v0, 0, -1 ++ wfidb %v0, %v0, 0, 16 ++ wfidb %v0, %v0, -1, 0 ++ wfidb %v0, %v0, 16, 0 ++ ++#CHECK: error: invalid operand ++#CHECK: wftcidb %v0, %v0, -1 ++#CHECK: error: invalid operand ++#CHECK: wftcidb %v0, %v0, 4096 ++ ++ wftcidb %v0, %v0, -1 ++ wftcidb %v0, %v0, 4096 ++ ++#CHECK: error: invalid operand ++#CHECK: wledb %v0, %v0, 0, -1 ++#CHECK: error: invalid operand ++#CHECK: wledb %v0, %v0, 0, 16 ++#CHECK: error: invalid operand ++#CHECK: wledb %v0, %v0, -1, 0 ++#CHECK: error: invalid operand ++#CHECK: wledb %v0, %v0, 16, 0 ++ ++ wledb %v0, %v0, 0, -1 ++ wledb %v0, %v0, 0, 16 ++ wledb %v0, %v0, -1, 0 ++ wledb %v0, %v0, 16, 0 +Index: llvm-36/test/MC/SystemZ/insn-bad-z196.s +=================================================================== +--- llvm-36.orig/test/MC/SystemZ/insn-bad-z196.s ++++ llvm-36/test/MC/SystemZ/insn-bad-z196.s +@@ -244,6 +244,11 @@ + cxlgbr %f0, 16, %r0, 0 + cxlgbr %f2, 0, %r0, 0 + ++#CHECK: error: {{(instruction requires: transactional-execution)?}} ++#CHECK: etnd %r7 ++ ++ etnd %r7 ++ + #CHECK: error: invalid operand + #CHECK: fidbra %f0, 0, %f0, -1 + #CHECK: error: invalid operand +@@ -546,6 +551,21 @@ + locr %r0,%r0,-1 + locr %r0,%r0,16 + ++#CHECK: error: {{(instruction requires: transactional-execution)?}} ++#CHECK: ntstg %r0, 524287(%r1,%r15) ++ ++ ntstg %r0, 524287(%r1,%r15) ++ ++#CHECK: error: {{(instruction requires: processor-assist)?}} ++#CHECK: ppa %r4, %r6, 7 ++ ++ ppa %r4, %r6, 7 ++ ++#CHECK: error: {{(instruction requires: miscellaneous-extensions)?}} ++#CHECK: risbgn %r1, %r2, 0, 0, 0 ++ ++ risbgn %r1, %r2, 0, 0, 0 ++ + #CHECK: error: invalid operand + #CHECK: risbhg %r0,%r0,0,0,-1 + #CHECK: error: invalid operand +@@ -685,3 +705,24 @@ + stocg %r0,-524289,1 + stocg %r0,524288,1 + stocg %r0,0(%r1,%r2),1 ++ ++#CHECK: error: {{(instruction requires: transactional-execution)?}} ++#CHECK: tabort 4095(%r1) ++ ++ tabort 4095(%r1) ++ ++#CHECK: error: {{(instruction requires: transactional-execution)?}} ++#CHECK: tbegin 4095(%r1), 42 ++ ++ tbegin 4095(%r1), 42 ++ ++#CHECK: error: {{(instruction requires: transactional-execution)?}} ++#CHECK: tbeginc 4095(%r1), 42 ++ ++ tbeginc 4095(%r1), 42 ++ ++#CHECK: error: {{(instruction requires: transactional-execution)?}} ++#CHECK: tend ++ ++ tend ++ +Index: llvm-36/test/MC/SystemZ/insn-bad-zEC12.s +=================================================================== +--- /dev/null ++++ llvm-36/test/MC/SystemZ/insn-bad-zEC12.s +@@ -0,0 +1,1578 @@ ++# For zEC12 only. ++# RUN: not llvm-mc -triple s390x-linux-gnu -mcpu=zEC12 < %s 2> %t ++# RUN: FileCheck < %t %s ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: lcbb %r0, 0, 0 ++ ++ lcbb %r0, 0, 0 ++ ++#CHECK: error: invalid operand ++#CHECK: ntstg %r0, -524289 ++#CHECK: error: invalid operand ++#CHECK: ntstg %r0, 524288 ++ ++ ntstg %r0, -524289 ++ ntstg %r0, 524288 ++ ++#CHECK: error: invalid operand ++#CHECK: ppa %r0, %r0, -1 ++#CHECK: error: invalid operand ++#CHECK: ppa %r0, %r0, 16 ++ ++ ppa %r0, %r0, -1 ++ ppa %r0, %r0, 16 ++ ++#CHECK: error: invalid operand ++#CHECK: risbgn %r0,%r0,0,0,-1 ++#CHECK: error: invalid operand ++#CHECK: risbgn %r0,%r0,0,0,64 ++#CHECK: error: invalid operand ++#CHECK: risbgn %r0,%r0,0,-1,0 ++#CHECK: error: invalid operand ++#CHECK: risbgn %r0,%r0,0,256,0 ++#CHECK: error: invalid operand ++#CHECK: risbgn %r0,%r0,-1,0,0 ++#CHECK: error: invalid operand ++#CHECK: risbgn %r0,%r0,256,0,0 ++ ++ risbgn %r0,%r0,0,0,-1 ++ risbgn %r0,%r0,0,0,64 ++ risbgn %r0,%r0,0,-1,0 ++ risbgn %r0,%r0,0,256,0 ++ risbgn %r0,%r0,-1,0,0 ++ risbgn %r0,%r0,256,0,0 ++ ++#CHECK: error: invalid operand ++#CHECK: tabort -1 ++#CHECK: error: invalid operand ++#CHECK: tabort 4096 ++#CHECK: error: invalid use of indexed addressing ++#CHECK: tabort 0(%r1,%r2) ++ ++ tabort -1 ++ tabort 4096 ++ tabort 0(%r1,%r2) ++ ++#CHECK: error: invalid operand ++#CHECK: tbegin -1, 0 ++#CHECK: error: invalid operand ++#CHECK: tbegin 4096, 0 ++#CHECK: error: invalid use of indexed addressing ++#CHECK: tbegin 0(%r1,%r2), 0 ++#CHECK: error: invalid operand ++#CHECK: tbegin 0, -1 ++#CHECK: error: invalid operand ++#CHECK: tbegin 0, 65536 ++ ++ tbegin -1, 0 ++ tbegin 4096, 0 ++ tbegin 0(%r1,%r2), 0 ++ tbegin 0, -1 ++ tbegin 0, 65536 ++ ++#CHECK: error: invalid operand ++#CHECK: tbeginc -1, 0 ++#CHECK: error: invalid operand ++#CHECK: tbeginc 4096, 0 ++#CHECK: error: invalid use of indexed addressing ++#CHECK: tbeginc 0(%r1,%r2), 0 ++#CHECK: error: invalid operand ++#CHECK: tbeginc 0, -1 ++#CHECK: error: invalid operand ++#CHECK: tbeginc 0, 65536 ++ ++ tbeginc -1, 0 ++ tbeginc 4096, 0 ++ tbeginc 0(%r1,%r2), 0 ++ tbeginc 0, -1 ++ tbeginc 0, 65536 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vab %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vaf %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vag %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vah %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vaq %v0, %v0, %v0 ++ ++ vab %v0, %v0, %v0 ++ vaf %v0, %v0, %v0 ++ vag %v0, %v0, %v0 ++ vah %v0, %v0, %v0 ++ vaq %v0, %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vaccb %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vaccf %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vaccg %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vacch %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vaccq %v0, %v0, %v0 ++ ++ vaccb %v0, %v0, %v0 ++ vaccf %v0, %v0, %v0 ++ vaccg %v0, %v0, %v0 ++ vacch %v0, %v0, %v0 ++ vaccq %v0, %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vacccq %v0, %v0, %v0 ++ ++ vacccq %v0, %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vacq %v0, %v0, %v0 ++ ++ vacq %v0, %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vavgb %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vavgf %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vavgg %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vavgh %v0, %v0, %v0 ++ ++ vavgb %v0, %v0, %v0 ++ vavgf %v0, %v0, %v0 ++ vavgg %v0, %v0, %v0 ++ vavgh %v0, %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vavglb %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vavglf %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vavglg %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vavglh %v0, %v0, %v0 ++ ++ vavglb %v0, %v0, %v0 ++ vavglf %v0, %v0, %v0 ++ vavglg %v0, %v0, %v0 ++ vavglh %v0, %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vcdgb %v0, %v0, 0, 0 ++ ++ vcdgb %v0, %v0, 0, 0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vcdlgb %v0, %v0, 0, 0 ++ ++ vcdlgb %v0, %v0, 0, 0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vceqb %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vceqf %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vceqg %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vceqh %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vceqbs %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vceqhs %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vceqfs %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vceqgs %v0, %v0, %v0 ++ ++ vceqb %v0, %v0, %v0 ++ vceqf %v0, %v0, %v0 ++ vceqg %v0, %v0, %v0 ++ vceqh %v0, %v0, %v0 ++ vceqbs %v0, %v0, %v0 ++ vceqhs %v0, %v0, %v0 ++ vceqfs %v0, %v0, %v0 ++ vceqgs %v0, %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vcgdb %v0, %v0, 0, 0 ++ ++ vcgdb %v0, %v0, 0, 0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vchb %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vchf %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vchg %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vchh %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vchbs %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vchhs %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vchfs %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vchgs %v0, %v0, %v0 ++ ++ vchb %v0, %v0, %v0 ++ vchf %v0, %v0, %v0 ++ vchg %v0, %v0, %v0 ++ vchh %v0, %v0, %v0 ++ vchbs %v0, %v0, %v0 ++ vchhs %v0, %v0, %v0 ++ vchfs %v0, %v0, %v0 ++ vchgs %v0, %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vchlb %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vchlf %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vchlg %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vchlh %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vchlbs %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vchlhs %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vchlfs %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vchlgs %v0, %v0, %v0 ++ ++ vchlb %v0, %v0, %v0 ++ vchlf %v0, %v0, %v0 ++ vchlg %v0, %v0, %v0 ++ vchlh %v0, %v0, %v0 ++ vchlbs %v0, %v0, %v0 ++ vchlhs %v0, %v0, %v0 ++ vchlfs %v0, %v0, %v0 ++ vchlgs %v0, %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vcksm %v0, %v0, %v0 ++ ++ vcksm %v0, %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vclgdb %v0, %v0, 0, 0 ++ ++ vclgdb %v0, %v0, 0, 0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vclzb %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vclzf %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vclzg %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vclzh %v0, %v0 ++ ++ vclzb %v0, %v0 ++ vclzf %v0, %v0 ++ vclzg %v0, %v0 ++ vclzh %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vctzb %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vctzf %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vctzg %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vctzh %v0, %v0 ++ ++ vctzb %v0, %v0 ++ vctzf %v0, %v0 ++ vctzg %v0, %v0 ++ vctzh %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vecb %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vecf %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vecg %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vech %v0, %v0 ++ ++ vecb %v0, %v0 ++ vecf %v0, %v0 ++ vecg %v0, %v0 ++ vech %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: verimb %v0, %v0, %v0, 0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: verimf %v0, %v0, %v0, 0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: verimg %v0, %v0, %v0, 0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: verimh %v0, %v0, %v0, 0 ++ ++ verimb %v0, %v0, %v0, 0 ++ verimf %v0, %v0, %v0, 0 ++ verimg %v0, %v0, %v0, 0 ++ verimh %v0, %v0, %v0, 0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: veclb %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: veclf %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: veclg %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: veclh %v0, %v0 ++ ++ veclb %v0, %v0 ++ veclf %v0, %v0 ++ veclg %v0, %v0 ++ veclh %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: verllvb %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: verllvf %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: verllvg %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: verllvh %v0, %v0, %v0 ++ ++ verllvb %v0, %v0, %v0 ++ verllvf %v0, %v0, %v0 ++ verllvg %v0, %v0, %v0 ++ verllvh %v0, %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: verllb %v0, %v0, 0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: verllf %v0, %v0, 0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: verllg %v0, %v0, 0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: verllh %v0, %v0, 0 ++ ++ verllb %v0, %v0, 0 ++ verllf %v0, %v0, 0 ++ verllg %v0, %v0, 0 ++ verllh %v0, %v0, 0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: veslvb %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: veslvf %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: veslvg %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: veslvh %v0, %v0, %v0 ++ ++ veslvb %v0, %v0, %v0 ++ veslvf %v0, %v0, %v0 ++ veslvg %v0, %v0, %v0 ++ veslvh %v0, %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: veslb %v0, %v0, 0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: veslf %v0, %v0, 0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: veslg %v0, %v0, 0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: veslh %v0, %v0, 0 ++ ++ veslb %v0, %v0, 0 ++ veslf %v0, %v0, 0 ++ veslg %v0, %v0, 0 ++ veslh %v0, %v0, 0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vesravb %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vesravf %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vesravg %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vesravh %v0, %v0, %v0 ++ ++ vesravb %v0, %v0, %v0 ++ vesravf %v0, %v0, %v0 ++ vesravg %v0, %v0, %v0 ++ vesravh %v0, %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vesrab %v0, %v0, 0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vesraf %v0, %v0, 0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vesrag %v0, %v0, 0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vesrah %v0, %v0, 0 ++ ++ vesrab %v0, %v0, 0 ++ vesraf %v0, %v0, 0 ++ vesrag %v0, %v0, 0 ++ vesrah %v0, %v0, 0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vesrlvb %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vesrlvf %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vesrlvg %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vesrlvh %v0, %v0, %v0 ++ ++ vesrlvb %v0, %v0, %v0 ++ vesrlvf %v0, %v0, %v0 ++ vesrlvg %v0, %v0, %v0 ++ vesrlvh %v0, %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vesrlb %v0, %v0, 0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vesrlf %v0, %v0, 0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vesrlg %v0, %v0, 0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vesrlh %v0, %v0, 0 ++ ++ vesrlb %v0, %v0, 0 ++ vesrlf %v0, %v0, 0 ++ vesrlg %v0, %v0, 0 ++ vesrlh %v0, %v0, 0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vfadb %v0, %v0, %v0 ++ ++ vfadb %v0, %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vfcedb %v0, %v0, %v0 ++#CHECK: vfcedbs %v0, %v0, %v0 ++ ++ vfcedb %v0, %v0, %v0 ++ vfcedbs %v0, %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vfchdb %v0, %v0, %v0 ++#CHECK: vfchdbs %v0, %v0, %v0 ++ ++ vfchdb %v0, %v0, %v0 ++ vfchdbs %v0, %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vfddb %v0, %v0, %v0 ++ ++ vfddb %v0, %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vfaeb %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vfaezb %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vfaebs %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vfaezbs %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vfaeh %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vfaezh %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vfaehs %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vfaezhs %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vfaef %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vfaezf %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vfaefs %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vfaezfs %v0, %v0, %v0 ++ ++ vfaeb %v0, %v0, %v0 ++ vfaezb %v0, %v0, %v0 ++ vfaebs %v0, %v0, %v0 ++ vfaezbs %v0, %v0, %v0 ++ vfaeh %v0, %v0, %v0 ++ vfaezh %v0, %v0, %v0 ++ vfaehs %v0, %v0, %v0 ++ vfaezhs %v0, %v0, %v0 ++ vfaef %v0, %v0, %v0 ++ vfaezf %v0, %v0, %v0 ++ vfaefs %v0, %v0, %v0 ++ vfaezfs %v0, %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vfeeb %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vfeezb %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vfeebs %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vfeezbs %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vfeeh %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vfeezh %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vfeehs %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vfeezhs %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vfeef %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vfeezf %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vfeefs %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vfeezfs %v0, %v0, %v0 ++ ++ vfeeb %v0, %v0, %v0 ++ vfeezb %v0, %v0, %v0 ++ vfeebs %v0, %v0, %v0 ++ vfeezbs %v0, %v0, %v0 ++ vfeeh %v0, %v0, %v0 ++ vfeezh %v0, %v0, %v0 ++ vfeehs %v0, %v0, %v0 ++ vfeezhs %v0, %v0, %v0 ++ vfeef %v0, %v0, %v0 ++ vfeezf %v0, %v0, %v0 ++ vfeefs %v0, %v0, %v0 ++ vfeezfs %v0, %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vfeneb %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vfenezb %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vfenebs %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vfenezbs %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vfeneh %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vfenezh %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vfenehs %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vfenezhs %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vfenef %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vfenezf %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vfenefs %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vfenezfs %v0, %v0, %v0 ++ ++ vfeneb %v0, %v0, %v0 ++ vfenezb %v0, %v0, %v0 ++ vfenebs %v0, %v0, %v0 ++ vfenezbs %v0, %v0, %v0 ++ vfeneh %v0, %v0, %v0 ++ vfenezh %v0, %v0, %v0 ++ vfenehs %v0, %v0, %v0 ++ vfenezhs %v0, %v0, %v0 ++ vfenef %v0, %v0, %v0 ++ vfenezf %v0, %v0, %v0 ++ vfenefs %v0, %v0, %v0 ++ vfenezfs %v0, %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vfidb %v0, %v0, 0, 0 ++ ++ vfidb %v0, %v0, 0, 0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vistrb %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vistrbs %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vistrh %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vistrhs %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vistrf %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vistrfs %v0, %v0 ++ ++ vistrb %v0, %v0 ++ vistrbs %v0, %v0 ++ vistrh %v0, %v0 ++ vistrhs %v0, %v0 ++ vistrf %v0, %v0 ++ vistrfs %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vflcdb %v0, %v0 ++ ++ vflcdb %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vflndb %v0, %v0 ++ ++ vflndb %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vflpdb %v0, %v0 ++ ++ vflpdb %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vfmadb %v0, %v0, %v0, %v0 ++ ++ vfmadb %v0, %v0, %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vfmdb %v0, %v0, %v0 ++ ++ vfmdb %v0, %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vfmsdb %v0, %v0, %v0, %v0 ++ ++ vfmsdb %v0, %v0, %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vfsdb %v0, %v0, %v0 ++ ++ vfsdb %v0, %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vfsqdb %v0, %v0 ++ ++ vfsqdb %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vftcidb %v0, %v0, 0 ++ ++ vftcidb %v0, %v0, 0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vgbm %v0, 0 ++ ++ vgbm %v0, 0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vgef %v0, 0(%v0, %r1), 0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vgeg %v0, 0(%v0, %r1), 0 ++ ++ vgef %v0, 0(%v0, %r1), 0 ++ vgeg %v0, 0(%v0, %r1), 0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vgfmab %v0, %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vgfmaf %v0, %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vgfmag %v0, %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vgfmah %v0, %v0, %v0, %v0 ++ ++ vgfmab %v0, %v0, %v0, %v0 ++ vgfmaf %v0, %v0, %v0, %v0 ++ vgfmag %v0, %v0, %v0, %v0 ++ vgfmah %v0, %v0, %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vgfmb %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vgfmf %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vgfmg %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vgfmh %v0, %v0, %v0 ++ ++ vgfmb %v0, %v0, %v0 ++ vgfmf %v0, %v0, %v0 ++ vgfmg %v0, %v0, %v0 ++ vgfmh %v0, %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vgmb %v0, 0, 0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vgmf %v0, 0, 0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vgmg %v0, 0, 0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vgmh %v0, 0, 0 ++ ++ vgmb %v0, 0, 0 ++ vgmf %v0, 0, 0 ++ vgmg %v0, 0, 0 ++ vgmh %v0, 0, 0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vl %v0, 0 ++ ++ vl %v0, 0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vlbb %v0, 0, 0 ++ ++ vlbb %v0, 0, 0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vlcb %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vlcf %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vlcg %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vlch %v0, %v0 ++ ++ vlcb %v0, %v0 ++ vlcf %v0, %v0 ++ vlcg %v0, %v0 ++ vlch %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vldeb %v0, %v0 ++ ++ vldeb %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vleb %v0, 0, 0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vlef %v0, 0, 0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vleg %v0, 0, 0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vleh %v0, 0, 0 ++ ++ vleb %v0, 0, 0 ++ vlef %v0, 0, 0 ++ vleg %v0, 0, 0 ++ vleh %v0, 0, 0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vledb %v0, %v0, 0, 0 ++ ++ vledb %v0, %v0, 0, 0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vleib %v0, 0, 0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vleif %v0, 0, 0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vleig %v0, 0, 0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vleih %v0, 0, 0 ++ ++ vleib %v0, 0, 0 ++ vleif %v0, 0, 0 ++ vleig %v0, 0, 0 ++ vleih %v0, 0, 0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vlgvb %r0, %v0, 0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vlgvf %r0, %v0, 0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vlgvg %r0, %v0, 0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vlgvh %r0, %v0, 0 ++ ++ vlgvb %r0, %v0, 0 ++ vlgvf %r0, %v0, 0 ++ vlgvg %r0, %v0, 0 ++ vlgvh %r0, %v0, 0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vll %v0, %r0, 0 ++ ++ vll %v0, %r0, 0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vllezb %v0, 0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vllezf %v0, 0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vllezg %v0, 0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vllezh %v0, 0 ++ ++ vllezb %v0, 0 ++ vllezf %v0, 0 ++ vllezg %v0, 0 ++ vllezh %v0, 0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vlm %v0, %v0, 0 ++ ++ vlm %v0, %v0, 0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vlpb %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vlpf %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vlpg %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vlph %v0, %v0 ++ ++ vlpb %v0, %v0 ++ vlpf %v0, %v0 ++ vlpg %v0, %v0 ++ vlph %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vlr %v0, %v0 ++ ++ vlr %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vlrepb %v0, 0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vlrepf %v0, 0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vlrepg %v0, 0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vlreph %v0, 0 ++ ++ vlrepb %v0, 0 ++ vlrepf %v0, 0 ++ vlrepg %v0, 0 ++ vlreph %v0, 0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vlvgb %v0, %r0, 0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vlvgf %v0, %r0, 0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vlvgg %v0, %r0, 0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vlvgh %v0, %r0, 0 ++ ++ vlvgb %v0, %r0, 0 ++ vlvgf %v0, %r0, 0 ++ vlvgg %v0, %r0, 0 ++ vlvgh %v0, %r0, 0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vlvgp %v0, %r0, %r0 ++ ++ vlvgp %v0, %r0, %r0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vmaeb %v0, %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vmaef %v0, %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vmaeh %v0, %v0, %v0, %v0 ++ ++ vmaeb %v0, %v0, %v0, %v0 ++ vmaef %v0, %v0, %v0, %v0 ++ vmaeh %v0, %v0, %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vmahb %v0, %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vmahf %v0, %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vmahh %v0, %v0, %v0, %v0 ++ ++ vmahb %v0, %v0, %v0, %v0 ++ vmahf %v0, %v0, %v0, %v0 ++ vmahh %v0, %v0, %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vmalb %v0, %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vmalf %v0, %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vmalhw %v0, %v0, %v0, %v0 ++ ++ vmalb %v0, %v0, %v0, %v0 ++ vmalf %v0, %v0, %v0, %v0 ++ vmalhw %v0, %v0, %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vmaleb %v0, %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vmalef %v0, %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vmaleh %v0, %v0, %v0, %v0 ++ ++ vmaleb %v0, %v0, %v0, %v0 ++ vmalef %v0, %v0, %v0, %v0 ++ vmaleh %v0, %v0, %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vmalhb %v0, %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vmalhf %v0, %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vmalhh %v0, %v0, %v0, %v0 ++ ++ vmalhb %v0, %v0, %v0, %v0 ++ vmalhf %v0, %v0, %v0, %v0 ++ vmalhh %v0, %v0, %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vmalob %v0, %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vmalof %v0, %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vmaloh %v0, %v0, %v0, %v0 ++ ++ vmalob %v0, %v0, %v0, %v0 ++ vmalof %v0, %v0, %v0, %v0 ++ vmaloh %v0, %v0, %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vmaob %v0, %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vmaof %v0, %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vmaoh %v0, %v0, %v0, %v0 ++ ++ vmaob %v0, %v0, %v0, %v0 ++ vmaof %v0, %v0, %v0, %v0 ++ vmaoh %v0, %v0, %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vmeb %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vmef %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vmeh %v0, %v0, %v0 ++ ++ vmeb %v0, %v0, %v0 ++ vmef %v0, %v0, %v0 ++ vmeh %v0, %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vmhb %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vmhf %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vmhh %v0, %v0, %v0 ++ ++ vmhb %v0, %v0, %v0 ++ vmhf %v0, %v0, %v0 ++ vmhh %v0, %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vmlb %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vmlf %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vmlh %v0, %v0, %v0 ++ ++ vmlb %v0, %v0, %v0 ++ vmlf %v0, %v0, %v0 ++ vmlh %v0, %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vmleb %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vmlef %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vmleh %v0, %v0, %v0 ++ ++ vmleb %v0, %v0, %v0 ++ vmlef %v0, %v0, %v0 ++ vmleh %v0, %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vmlhb %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vmlhf %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vmlhh %v0, %v0, %v0 ++ ++ vmlhb %v0, %v0, %v0 ++ vmlhf %v0, %v0, %v0 ++ vmlhh %v0, %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vmlob %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vmlof %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vmloh %v0, %v0, %v0 ++ ++ vmlob %v0, %v0, %v0 ++ vmlof %v0, %v0, %v0 ++ vmloh %v0, %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vmnb %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vmnf %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vmng %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vmnh %v0, %v0, %v0 ++ ++ vmnb %v0, %v0, %v0 ++ vmnf %v0, %v0, %v0 ++ vmng %v0, %v0, %v0 ++ vmnh %v0, %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vmnlb %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vmnlf %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vmnlg %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vmnlh %v0, %v0, %v0 ++ ++ vmnlb %v0, %v0, %v0 ++ vmnlf %v0, %v0, %v0 ++ vmnlg %v0, %v0, %v0 ++ vmnlh %v0, %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vmob %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vmof %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vmoh %v0, %v0, %v0 ++ ++ vmob %v0, %v0, %v0 ++ vmof %v0, %v0, %v0 ++ vmoh %v0, %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vmrhb %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vmrhf %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vmrhg %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vmrhh %v0, %v0, %v0 ++ ++ vmrhb %v0, %v0, %v0 ++ vmrhf %v0, %v0, %v0 ++ vmrhg %v0, %v0, %v0 ++ vmrhh %v0, %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vmrlb %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vmrlf %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vmrlg %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vmrlh %v0, %v0, %v0 ++ ++ vmrlb %v0, %v0, %v0 ++ vmrlf %v0, %v0, %v0 ++ vmrlg %v0, %v0, %v0 ++ vmrlh %v0, %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vmxb %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vmxf %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vmxg %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vmxh %v0, %v0, %v0 ++ ++ vmxb %v0, %v0, %v0 ++ vmxf %v0, %v0, %v0 ++ vmxg %v0, %v0, %v0 ++ vmxh %v0, %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vmxlb %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vmxlf %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vmxlg %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vmxlh %v0, %v0, %v0 ++ ++ vmxlb %v0, %v0, %v0 ++ vmxlf %v0, %v0, %v0 ++ vmxlg %v0, %v0, %v0 ++ vmxlh %v0, %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vn %v0, %v0, %v0 ++ ++ vn %v0, %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vnc %v0, %v0, %v0 ++ ++ vnc %v0, %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vno %v0, %v0, %v0 ++ ++ vno %v0, %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vo %v0, %v0, %v0 ++ ++ vo %v0, %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vone %v0 ++ ++ vone %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vpdi %v0, %v0, %v0, 0 ++ ++ vpdi %v0, %v0, %v0, 0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vperm %v0, %v0, %v0, %v0 ++ ++ vperm %v0, %v0, %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vpkf %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vpkg %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vpkh %v0, %v0, %v0 ++ ++ vpkf %v0, %v0, %v0 ++ vpkg %v0, %v0, %v0 ++ vpkh %v0, %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vpksf %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vpksg %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vpksh %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vpksfs %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vpksgs %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vpkshs %v0, %v0, %v0 ++ ++ vpksf %v0, %v0, %v0 ++ vpksg %v0, %v0, %v0 ++ vpksh %v0, %v0, %v0 ++ vpksfs %v0, %v0, %v0 ++ vpksgs %v0, %v0, %v0 ++ vpkshs %v0, %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vpklsf %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vpklsg %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vpklsh %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vpklsfs %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vpklsgs %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vpklshs %v0, %v0, %v0 ++ ++ vpklsf %v0, %v0, %v0 ++ vpklsg %v0, %v0, %v0 ++ vpklsh %v0, %v0, %v0 ++ vpklsfs %v0, %v0, %v0 ++ vpklsgs %v0, %v0, %v0 ++ vpklshs %v0, %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vpopct %v0, %v0 ++ ++ vpopct %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vrepb %v0, %v0, 0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vrepf %v0, %v0, 0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vrepg %v0, %v0, 0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vreph %v0, %v0, 0 ++ ++ vrepb %v0, %v0, 0 ++ vrepf %v0, %v0, 0 ++ vrepg %v0, %v0, 0 ++ vreph %v0, %v0, 0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vrepib %v0, 0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vrepif %v0, 0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vrepig %v0, 0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vrepih %v0, 0 ++ ++ vrepib %v0, 0 ++ vrepif %v0, 0 ++ vrepig %v0, 0 ++ vrepih %v0, 0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vsb %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vsf %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vsg %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vsh %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vsq %v0, %v0, %v0 ++ ++ vsb %v0, %v0, %v0 ++ vsf %v0, %v0, %v0 ++ vsg %v0, %v0, %v0 ++ vsh %v0, %v0, %v0 ++ vsq %v0, %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vsbcbiq %v0, %v0, %v0 ++ ++ vsbcbiq %v0, %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vsbiq %v0, %v0, %v0 ++ ++ vsbiq %v0, %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vscbib %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vscbif %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vscbig %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vscbih %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vscbiq %v0, %v0, %v0 ++ ++ vscbib %v0, %v0, %v0 ++ vscbif %v0, %v0, %v0 ++ vscbig %v0, %v0, %v0 ++ vscbih %v0, %v0, %v0 ++ vscbiq %v0, %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vscef %v0, 0(%v0, %r1), 0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vsceg %v0, 0(%v0, %r1), 0 ++ ++ vscef %v0, 0(%v0, %r1), 0 ++ vsceg %v0, 0(%v0, %r1), 0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vsegb %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vsegf %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vsegh %v0, %v0 ++ ++ vsegb %v0, %v0 ++ vsegf %v0, %v0 ++ vsegh %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vsel %v0, %v0, %v0, %v0 ++ ++ vsel %v0, %v0, %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vsl %v0, %v0, %v0 ++ ++ vsl %v0, %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vslb %v0, %v0, %v0 ++ ++ vslb %v0, %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vsldb %v0, %v0, %v0, 0 ++ ++ vsldb %v0, %v0, %v0, 0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vsra %v0, %v0, %v0 ++ ++ vsra %v0, %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vsrab %v0, %v0, %v0 ++ ++ vsrab %v0, %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vsrl %v0, %v0, %v0 ++ ++ vsrl %v0, %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vsrlb %v0, %v0, %v0 ++ ++ vsrlb %v0, %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vst %v0, 0 ++ ++ vst %v0, 0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vstl %v0, %r0, 0 ++ ++ vstl %v0, %r0, 0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vstm %v0, %v0, 0 ++ ++ vstm %v0, %v0, 0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vstrcb %v0, %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vstrczb %v0, %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vstrcbs %v0, %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vstrczbs %v0, %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vstrch %v0, %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vstrczh %v0, %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vstrchs %v0, %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vstrczhs %v0, %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vstrcf %v0, %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vstrczf %v0, %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vstrcfs %v0, %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vstrczfs %v0, %v0, %v0, %v0 ++ ++ vstrcb %v0, %v0, %v0, %v0 ++ vstrczb %v0, %v0, %v0, %v0 ++ vstrcbs %v0, %v0, %v0, %v0 ++ vstrczbs %v0, %v0, %v0, %v0 ++ vstrch %v0, %v0, %v0, %v0 ++ vstrczh %v0, %v0, %v0, %v0 ++ vstrchs %v0, %v0, %v0, %v0 ++ vstrczhs %v0, %v0, %v0, %v0 ++ vstrcf %v0, %v0, %v0, %v0 ++ vstrczf %v0, %v0, %v0, %v0 ++ vstrcfs %v0, %v0, %v0, %v0 ++ vstrczfs %v0, %v0, %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vsumgh %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vsumgf %v0, %v0, %v0 ++ ++ vsumgh %v0, %v0, %v0 ++ vsumgf %v0, %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vsumqf %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vsumqg %v0, %v0, %v0 ++ ++ vsumqf %v0, %v0, %v0 ++ vsumqg %v0, %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vsumb %v0, %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vsumh %v0, %v0, %v0 ++ ++ vsumb %v0, %v0, %v0 ++ vsumh %v0, %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vtm %v0, %v0 ++ ++ vtm %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vuphb %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vuphf %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vuphh %v0, %v0 ++ ++ vuphb %v0, %v0 ++ vuphf %v0, %v0 ++ vuphh %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vuplhb %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vuplhf %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vuplhh %v0, %v0 ++ ++ vuplhb %v0, %v0 ++ vuplhf %v0, %v0 ++ vuplhh %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vuplb %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vuplf %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vuplhw %v0, %v0 ++ ++ vuplb %v0, %v0 ++ vuplf %v0, %v0 ++ vuplhw %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vupllb %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vupllf %v0, %v0 ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vupllh %v0, %v0 ++ ++ vupllb %v0, %v0 ++ vupllf %v0, %v0 ++ vupllh %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vx %v0, %v0, %v0 ++ ++ vx %v0, %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: vzero %v0 ++ ++ vzero %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: wcdgb %v0, %v0, 0, 0 ++ ++ wcdgb %v0, %v0, 0, 0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: wcdlgb %v0, %v0, 0, 0 ++ ++ wcdlgb %v0, %v0, 0, 0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: wcgdb %v0, %v0, 0, 0 ++ ++ wcgdb %v0, %v0, 0, 0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: wclgdb %v0, %v0, 0, 0 ++ ++ wclgdb %v0, %v0, 0, 0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: wfadb %v0, %v0, %v0 ++ ++ wfadb %v0, %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: wfcdb %v0, %v0 ++ ++ wfcdb %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: wfcedb %v0, %v0, %v0 ++#CHECK: wfcedbs %v0, %v0, %v0 ++ ++ wfcedb %v0, %v0, %v0 ++ wfcedbs %v0, %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: wfchdb %v0, %v0, %v0 ++#CHECK: wfchdbs %v0, %v0, %v0 ++ ++ wfchdb %v0, %v0, %v0 ++ wfchdbs %v0, %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: wfchedb %v0, %v0, %v0 ++#CHECK: wfchedbs %v0, %v0, %v0 ++ ++ wfchedb %v0, %v0, %v0 ++ wfchedbs %v0, %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: wfddb %v0, %v0, %v0 ++ ++ wfddb %v0, %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: wfidb %v0, %v0, 0, 0 ++ ++ wfidb %v0, %v0, 0, 0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: wfkdb %v0, %v0 ++ ++ wfkdb %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: wflcdb %v0, %v0 ++ ++ wflcdb %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: wflndb %v0, %v0 ++ ++ wflndb %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: wflpdb %v0, %v0 ++ ++ wflpdb %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: wfmadb %v0, %v0, %v0, %v0 ++ ++ wfmadb %v0, %v0, %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: wfmdb %v0, %v0, %v0 ++ ++ wfmdb %v0, %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: wfmsdb %v0, %v0, %v0, %v0 ++ ++ wfmsdb %v0, %v0, %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: wfsdb %v0, %v0, %v0 ++ ++ wfsdb %v0, %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: wfsqdb %v0, %v0 ++ ++ wfsqdb %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: wftcidb %v0, %v0, 0 ++ ++ wftcidb %v0, %v0, 0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: wldeb %v0, %v0 ++ ++ wldeb %v0, %v0 ++ ++#CHECK: error: {{(instruction requires: vector)?}} ++#CHECK: wledb %v0, %v0, 0, 0 ++ ++ wledb %v0, %v0, 0, 0 +Index: llvm-36/test/MC/SystemZ/insn-bad.s +=================================================================== +--- llvm-36.orig/test/MC/SystemZ/insn-bad.s ++++ llvm-36/test/MC/SystemZ/insn-bad.s +@@ -2666,6 +2666,11 @@ + pfdrl 1, 1 + pfdrl 1, 0x100000000 + ++#CHECK: error: {{(instruction requires: population-count)?}} ++#CHECK: popcnt %r0, %r0 ++ ++ popcnt %r0, %r0 ++ + #CHECK: error: invalid operand + #CHECK: risbg %r0,%r0,0,0,-1 + #CHECK: error: invalid operand +Index: llvm-36/test/MC/SystemZ/insn-good-z13.s +=================================================================== +--- /dev/null ++++ llvm-36/test/MC/SystemZ/insn-good-z13.s +@@ -0,0 +1,5039 @@ ++# For z13 and above. ++# RUN: llvm-mc -triple s390x-linux-gnu -mcpu=z13 -show-encoding %s \ ++# RUN: | FileCheck %s ++ ++#CHECK: lcbb %r0, 0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x27] ++#CHECK: lcbb %r0, 0, 15 # encoding: [0xe7,0x00,0x00,0x00,0xf0,0x27] ++#CHECK: lcbb %r0, 4095, 0 # encoding: [0xe7,0x00,0x0f,0xff,0x00,0x27] ++#CHECK: lcbb %r0, 0(%r15), 0 # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x27] ++#CHECK: lcbb %r0, 0(%r15,%r1), 0 # encoding: [0xe7,0x0f,0x10,0x00,0x00,0x27] ++#CHECK: lcbb %r15, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x27] ++#CHECK: lcbb %r2, 1383(%r3,%r4), 8 # encoding: [0xe7,0x23,0x45,0x67,0x80,0x27] ++ ++ lcbb %r0, 0, 0 ++ lcbb %r0, 0, 15 ++ lcbb %r0, 4095, 0 ++ lcbb %r0, 0(%r15), 0 ++ lcbb %r0, 0(%r15,%r1), 0 ++ lcbb %r15, 0, 0 ++ lcbb %r2, 1383(%r3,%r4), 8 ++ ++#CHECK: vab %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0xf3] ++#CHECK: vab %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0xf3] ++#CHECK: vab %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xf3] ++#CHECK: vab %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xf3] ++#CHECK: vab %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x0a,0xf3] ++ ++ vab %v0, %v0, %v0 ++ vab %v0, %v0, %v31 ++ vab %v0, %v31, %v0 ++ vab %v31, %v0, %v0 ++ vab %v18, %v3, %v20 ++ ++#CHECK: vaccb %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0xf1] ++#CHECK: vaccb %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0xf1] ++#CHECK: vaccb %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xf1] ++#CHECK: vaccb %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xf1] ++#CHECK: vaccb %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x0a,0xf1] ++ ++ vaccb %v0, %v0, %v0 ++ vaccb %v0, %v0, %v31 ++ vaccb %v0, %v31, %v0 ++ vaccb %v31, %v0, %v0 ++ vaccb %v18, %v3, %v20 ++ ++#CHECK: vacccq %v0, %v0, %v0, %v0 # encoding: [0xe7,0x00,0x04,0x00,0x00,0xb9] ++#CHECK: vacccq %v0, %v0, %v0, %v31 # encoding: [0xe7,0x00,0x04,0x00,0xf1,0xb9] ++#CHECK: vacccq %v0, %v0, %v31, %v0 # encoding: [0xe7,0x00,0xf4,0x00,0x02,0xb9] ++#CHECK: vacccq %v0, %v31, %v0, %v0 # encoding: [0xe7,0x0f,0x04,0x00,0x04,0xb9] ++#CHECK: vacccq %v31, %v0, %v0, %v0 # encoding: [0xe7,0xf0,0x04,0x00,0x08,0xb9] ++#CHECK: vacccq %v13, %v17, %v21, %v25 # encoding: [0xe7,0xd1,0x54,0x00,0x97,0xb9] ++ ++ vacccq %v0, %v0, %v0, %v0 ++ vacccq %v0, %v0, %v0, %v31 ++ vacccq %v0, %v0, %v31, %v0 ++ vacccq %v0, %v31, %v0, %v0 ++ vacccq %v31, %v0, %v0, %v0 ++ vacccq %v13, %v17, %v21, %v25 ++ ++#CHECK: vaccf %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0xf1] ++#CHECK: vaccf %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x22,0xf1] ++#CHECK: vaccf %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xf1] ++#CHECK: vaccf %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xf1] ++#CHECK: vaccf %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x2a,0xf1] ++ ++ vaccf %v0, %v0, %v0 ++ vaccf %v0, %v0, %v31 ++ vaccf %v0, %v31, %v0 ++ vaccf %v31, %v0, %v0 ++ vaccf %v18, %v3, %v20 ++ ++#CHECK: vaccg %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0xf1] ++#CHECK: vaccg %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x32,0xf1] ++#CHECK: vaccg %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xf1] ++#CHECK: vaccg %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xf1] ++#CHECK: vaccg %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x3a,0xf1] ++ ++ vaccg %v0, %v0, %v0 ++ vaccg %v0, %v0, %v31 ++ vaccg %v0, %v31, %v0 ++ vaccg %v31, %v0, %v0 ++ vaccg %v18, %v3, %v20 ++ ++#CHECK: vacch %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0xf1] ++#CHECK: vacch %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x12,0xf1] ++#CHECK: vacch %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0xf1] ++#CHECK: vacch %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0xf1] ++#CHECK: vacch %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x1a,0xf1] ++ ++ vacch %v0, %v0, %v0 ++ vacch %v0, %v0, %v31 ++ vacch %v0, %v31, %v0 ++ vacch %v31, %v0, %v0 ++ vacch %v18, %v3, %v20 ++ ++#CHECK: vaccq %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x40,0xf1] ++#CHECK: vaccq %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x42,0xf1] ++#CHECK: vaccq %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x44,0xf1] ++#CHECK: vaccq %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x48,0xf1] ++#CHECK: vaccq %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x4a,0xf1] ++ ++ vaccq %v0, %v0, %v0 ++ vaccq %v0, %v0, %v31 ++ vaccq %v0, %v31, %v0 ++ vaccq %v31, %v0, %v0 ++ vaccq %v18, %v3, %v20 ++ ++#CHECK: vacq %v0, %v0, %v0, %v0 # encoding: [0xe7,0x00,0x04,0x00,0x00,0xbb] ++#CHECK: vacq %v0, %v0, %v0, %v31 # encoding: [0xe7,0x00,0x04,0x00,0xf1,0xbb] ++#CHECK: vacq %v0, %v0, %v31, %v0 # encoding: [0xe7,0x00,0xf4,0x00,0x02,0xbb] ++#CHECK: vacq %v0, %v31, %v0, %v0 # encoding: [0xe7,0x0f,0x04,0x00,0x04,0xbb] ++#CHECK: vacq %v31, %v0, %v0, %v0 # encoding: [0xe7,0xf0,0x04,0x00,0x08,0xbb] ++#CHECK: vacq %v13, %v17, %v21, %v25 # encoding: [0xe7,0xd1,0x54,0x00,0x97,0xbb] ++ ++ vacq %v0, %v0, %v0, %v0 ++ vacq %v0, %v0, %v0, %v31 ++ vacq %v0, %v0, %v31, %v0 ++ vacq %v0, %v31, %v0, %v0 ++ vacq %v31, %v0, %v0, %v0 ++ vacq %v13, %v17, %v21, %v25 ++ ++#CHECK: vaf %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0xf3] ++#CHECK: vaf %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x22,0xf3] ++#CHECK: vaf %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xf3] ++#CHECK: vaf %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xf3] ++#CHECK: vaf %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x2a,0xf3] ++ ++ vaf %v0, %v0, %v0 ++ vaf %v0, %v0, %v31 ++ vaf %v0, %v31, %v0 ++ vaf %v31, %v0, %v0 ++ vaf %v18, %v3, %v20 ++ ++#CHECK: vag %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0xf3] ++#CHECK: vag %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x32,0xf3] ++#CHECK: vag %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xf3] ++#CHECK: vag %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xf3] ++#CHECK: vag %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x3a,0xf3] ++ ++ vag %v0, %v0, %v0 ++ vag %v0, %v0, %v31 ++ vag %v0, %v31, %v0 ++ vag %v31, %v0, %v0 ++ vag %v18, %v3, %v20 ++ ++#CHECK: vah %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0xf3] ++#CHECK: vah %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x12,0xf3] ++#CHECK: vah %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0xf3] ++#CHECK: vah %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0xf3] ++#CHECK: vah %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x1a,0xf3] ++ ++ vah %v0, %v0, %v0 ++ vah %v0, %v0, %v31 ++ vah %v0, %v31, %v0 ++ vah %v31, %v0, %v0 ++ vah %v18, %v3, %v20 ++ ++#CHECK: vaq %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x40,0xf3] ++#CHECK: vaq %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x42,0xf3] ++#CHECK: vaq %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x44,0xf3] ++#CHECK: vaq %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x48,0xf3] ++#CHECK: vaq %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x4a,0xf3] ++ ++ vaq %v0, %v0, %v0 ++ vaq %v0, %v0, %v31 ++ vaq %v0, %v31, %v0 ++ vaq %v31, %v0, %v0 ++ vaq %v18, %v3, %v20 ++ ++#CHECK: vavgb %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0xf2] ++#CHECK: vavgb %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0xf2] ++#CHECK: vavgb %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xf2] ++#CHECK: vavgb %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xf2] ++#CHECK: vavgb %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x0a,0xf2] ++ ++ vavgb %v0, %v0, %v0 ++ vavgb %v0, %v0, %v31 ++ vavgb %v0, %v31, %v0 ++ vavgb %v31, %v0, %v0 ++ vavgb %v18, %v3, %v20 ++ ++#CHECK: vavgf %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0xf2] ++#CHECK: vavgf %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x22,0xf2] ++#CHECK: vavgf %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xf2] ++#CHECK: vavgf %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xf2] ++#CHECK: vavgf %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x2a,0xf2] ++ ++ vavgf %v0, %v0, %v0 ++ vavgf %v0, %v0, %v31 ++ vavgf %v0, %v31, %v0 ++ vavgf %v31, %v0, %v0 ++ vavgf %v18, %v3, %v20 ++ ++#CHECK: vavgg %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0xf2] ++#CHECK: vavgg %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x32,0xf2] ++#CHECK: vavgg %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xf2] ++#CHECK: vavgg %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xf2] ++#CHECK: vavgg %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x3a,0xf2] ++ ++ vavgg %v0, %v0, %v0 ++ vavgg %v0, %v0, %v31 ++ vavgg %v0, %v31, %v0 ++ vavgg %v31, %v0, %v0 ++ vavgg %v18, %v3, %v20 ++ ++#CHECK: vavgh %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0xf2] ++#CHECK: vavgh %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x12,0xf2] ++#CHECK: vavgh %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0xf2] ++#CHECK: vavgh %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0xf2] ++#CHECK: vavgh %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x1a,0xf2] ++ ++ vavgh %v0, %v0, %v0 ++ vavgh %v0, %v0, %v31 ++ vavgh %v0, %v31, %v0 ++ vavgh %v31, %v0, %v0 ++ vavgh %v18, %v3, %v20 ++ ++#CHECK: vavglb %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0xf0] ++#CHECK: vavglb %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0xf0] ++#CHECK: vavglb %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xf0] ++#CHECK: vavglb %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xf0] ++#CHECK: vavglb %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x0a,0xf0] ++ ++ vavglb %v0, %v0, %v0 ++ vavglb %v0, %v0, %v31 ++ vavglb %v0, %v31, %v0 ++ vavglb %v31, %v0, %v0 ++ vavglb %v18, %v3, %v20 ++ ++#CHECK: vavglf %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0xf0] ++#CHECK: vavglf %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x22,0xf0] ++#CHECK: vavglf %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xf0] ++#CHECK: vavglf %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xf0] ++#CHECK: vavglf %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x2a,0xf0] ++ ++ vavglf %v0, %v0, %v0 ++ vavglf %v0, %v0, %v31 ++ vavglf %v0, %v31, %v0 ++ vavglf %v31, %v0, %v0 ++ vavglf %v18, %v3, %v20 ++ ++#CHECK: vavglg %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0xf0] ++#CHECK: vavglg %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x32,0xf0] ++#CHECK: vavglg %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xf0] ++#CHECK: vavglg %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xf0] ++#CHECK: vavglg %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x3a,0xf0] ++ ++ vavglg %v0, %v0, %v0 ++ vavglg %v0, %v0, %v31 ++ vavglg %v0, %v31, %v0 ++ vavglg %v31, %v0, %v0 ++ vavglg %v18, %v3, %v20 ++ ++#CHECK: vavglh %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0xf0] ++#CHECK: vavglh %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x12,0xf0] ++#CHECK: vavglh %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0xf0] ++#CHECK: vavglh %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0xf0] ++#CHECK: vavglh %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x1a,0xf0] ++ ++ vavglh %v0, %v0, %v0 ++ vavglh %v0, %v0, %v31 ++ vavglh %v0, %v31, %v0 ++ vavglh %v31, %v0, %v0 ++ vavglh %v18, %v3, %v20 ++ ++#CHECK: vcdgb %v0, %v0, 0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0xc3] ++#CHECK: vcdgb %v0, %v0, 0, 15 # encoding: [0xe7,0x00,0x00,0xf0,0x30,0xc3] ++#CHECK: vcdgb %v0, %v0, 4, 0 # encoding: [0xe7,0x00,0x00,0x04,0x30,0xc3] ++#CHECK: vcdgb %v0, %v0, 12, 0 # encoding: [0xe7,0x00,0x00,0x0c,0x30,0xc3] ++#CHECK: vcdgb %v0, %v31, 0, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xc3] ++#CHECK: vcdgb %v31, %v0, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xc3] ++#CHECK: vcdgb %v14, %v17, 4, 10 # encoding: [0xe7,0xe1,0x00,0xa4,0x34,0xc3] ++ ++ vcdgb %v0, %v0, 0, 0 ++ vcdgb %v0, %v0, 0, 15 ++ vcdgb %v0, %v0, 4, 0 ++ vcdgb %v0, %v0, 12, 0 ++ vcdgb %v0, %v31, 0, 0 ++ vcdgb %v31, %v0, 0, 0 ++ vcdgb %v14, %v17, 4, 10 ++ ++#CHECK: vcdlgb %v0, %v0, 0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0xc1] ++#CHECK: vcdlgb %v0, %v0, 0, 15 # encoding: [0xe7,0x00,0x00,0xf0,0x30,0xc1] ++#CHECK: vcdlgb %v0, %v0, 4, 0 # encoding: [0xe7,0x00,0x00,0x04,0x30,0xc1] ++#CHECK: vcdlgb %v0, %v0, 12, 0 # encoding: [0xe7,0x00,0x00,0x0c,0x30,0xc1] ++#CHECK: vcdlgb %v0, %v31, 0, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xc1] ++#CHECK: vcdlgb %v31, %v0, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xc1] ++#CHECK: vcdlgb %v14, %v17, 4, 10 # encoding: [0xe7,0xe1,0x00,0xa4,0x34,0xc1] ++ ++ vcdlgb %v0, %v0, 0, 0 ++ vcdlgb %v0, %v0, 0, 15 ++ vcdlgb %v0, %v0, 4, 0 ++ vcdlgb %v0, %v0, 12, 0 ++ vcdlgb %v0, %v31, 0, 0 ++ vcdlgb %v31, %v0, 0, 0 ++ vcdlgb %v14, %v17, 4, 10 ++ ++#CHECK: vcksm %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x66] ++#CHECK: vcksm %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0x66] ++#CHECK: vcksm %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x66] ++#CHECK: vcksm %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x66] ++#CHECK: vcksm %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x0a,0x66] ++ ++ vcksm %v0, %v0, %v0 ++ vcksm %v0, %v0, %v31 ++ vcksm %v0, %v31, %v0 ++ vcksm %v31, %v0, %v0 ++ vcksm %v18, %v3, %v20 ++ ++#CHECK: vceqb %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0xf8] ++#CHECK: vceqb %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0xf8] ++#CHECK: vceqb %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xf8] ++#CHECK: vceqb %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xf8] ++#CHECK: vceqb %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x0a,0xf8] ++#CHECK: vceqbs %v5, %v22, %v7 # encoding: [0xe7,0x56,0x70,0x10,0x04,0xf8] ++ ++ vceqb %v0, %v0, %v0 ++ vceqb %v0, %v0, %v31 ++ vceqb %v0, %v31, %v0 ++ vceqb %v31, %v0, %v0 ++ vceqb %v18, %v3, %v20 ++ vceqbs %v5, %v22, %v7 ++ ++#CHECK: vceqf %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0xf8] ++#CHECK: vceqf %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x22,0xf8] ++#CHECK: vceqf %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xf8] ++#CHECK: vceqf %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xf8] ++#CHECK: vceqf %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x2a,0xf8] ++#CHECK: vceqfs %v5, %v22, %v7 # encoding: [0xe7,0x56,0x70,0x10,0x24,0xf8] ++ ++ vceqf %v0, %v0, %v0 ++ vceqf %v0, %v0, %v31 ++ vceqf %v0, %v31, %v0 ++ vceqf %v31, %v0, %v0 ++ vceqf %v18, %v3, %v20 ++ vceqfs %v5, %v22, %v7 ++ ++#CHECK: vceqg %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0xf8] ++#CHECK: vceqg %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x32,0xf8] ++#CHECK: vceqg %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xf8] ++#CHECK: vceqg %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xf8] ++#CHECK: vceqg %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x3a,0xf8] ++#CHECK: vceqgs %v5, %v22, %v7 # encoding: [0xe7,0x56,0x70,0x10,0x34,0xf8] ++ ++ vceqg %v0, %v0, %v0 ++ vceqg %v0, %v0, %v31 ++ vceqg %v0, %v31, %v0 ++ vceqg %v31, %v0, %v0 ++ vceqg %v18, %v3, %v20 ++ vceqgs %v5, %v22, %v7 ++ ++#CHECK: vceqh %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0xf8] ++#CHECK: vceqh %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x12,0xf8] ++#CHECK: vceqh %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0xf8] ++#CHECK: vceqh %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0xf8] ++#CHECK: vceqh %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x1a,0xf8] ++#CHECK: vceqhs %v5, %v22, %v7 # encoding: [0xe7,0x56,0x70,0x10,0x14,0xf8] ++ ++ vceqh %v0, %v0, %v0 ++ vceqh %v0, %v0, %v31 ++ vceqh %v0, %v31, %v0 ++ vceqh %v31, %v0, %v0 ++ vceqh %v18, %v3, %v20 ++ vceqhs %v5, %v22, %v7 ++ ++#CHECK: vcgdb %v0, %v0, 0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0xc2] ++#CHECK: vcgdb %v0, %v0, 0, 15 # encoding: [0xe7,0x00,0x00,0xf0,0x30,0xc2] ++#CHECK: vcgdb %v0, %v0, 4, 0 # encoding: [0xe7,0x00,0x00,0x04,0x30,0xc2] ++#CHECK: vcgdb %v0, %v0, 12, 0 # encoding: [0xe7,0x00,0x00,0x0c,0x30,0xc2] ++#CHECK: vcgdb %v0, %v31, 0, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xc2] ++#CHECK: vcgdb %v31, %v0, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xc2] ++#CHECK: vcgdb %v14, %v17, 4, 10 # encoding: [0xe7,0xe1,0x00,0xa4,0x34,0xc2] ++ ++ vcgdb %v0, %v0, 0, 0 ++ vcgdb %v0, %v0, 0, 15 ++ vcgdb %v0, %v0, 4, 0 ++ vcgdb %v0, %v0, 12, 0 ++ vcgdb %v0, %v31, 0, 0 ++ vcgdb %v31, %v0, 0, 0 ++ vcgdb %v14, %v17, 4, 10 ++ ++#CHECK: vchb %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0xfb] ++#CHECK: vchb %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0xfb] ++#CHECK: vchb %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xfb] ++#CHECK: vchb %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xfb] ++#CHECK: vchb %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x0a,0xfb] ++#CHECK: vchbs %v5, %v22, %v7 # encoding: [0xe7,0x56,0x70,0x10,0x04,0xfb] ++ ++ vchb %v0, %v0, %v0 ++ vchb %v0, %v0, %v31 ++ vchb %v0, %v31, %v0 ++ vchb %v31, %v0, %v0 ++ vchb %v18, %v3, %v20 ++ vchbs %v5, %v22, %v7 ++ ++#CHECK: vchf %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0xfb] ++#CHECK: vchf %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x22,0xfb] ++#CHECK: vchf %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xfb] ++#CHECK: vchf %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xfb] ++#CHECK: vchf %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x2a,0xfb] ++#CHECK: vchfs %v5, %v22, %v7 # encoding: [0xe7,0x56,0x70,0x10,0x24,0xfb] ++ ++ vchf %v0, %v0, %v0 ++ vchf %v0, %v0, %v31 ++ vchf %v0, %v31, %v0 ++ vchf %v31, %v0, %v0 ++ vchf %v18, %v3, %v20 ++ vchfs %v5, %v22, %v7 ++ ++#CHECK: vchg %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0xfb] ++#CHECK: vchg %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x32,0xfb] ++#CHECK: vchg %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xfb] ++#CHECK: vchg %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xfb] ++#CHECK: vchg %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x3a,0xfb] ++#CHECK: vchgs %v5, %v22, %v7 # encoding: [0xe7,0x56,0x70,0x10,0x34,0xfb] ++ ++ vchg %v0, %v0, %v0 ++ vchg %v0, %v0, %v31 ++ vchg %v0, %v31, %v0 ++ vchg %v31, %v0, %v0 ++ vchg %v18, %v3, %v20 ++ vchgs %v5, %v22, %v7 ++ ++#CHECK: vchh %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0xfb] ++#CHECK: vchh %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x12,0xfb] ++#CHECK: vchh %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0xfb] ++#CHECK: vchh %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0xfb] ++#CHECK: vchh %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x1a,0xfb] ++#CHECK: vchhs %v5, %v22, %v7 # encoding: [0xe7,0x56,0x70,0x10,0x14,0xfb] ++ ++ vchh %v0, %v0, %v0 ++ vchh %v0, %v0, %v31 ++ vchh %v0, %v31, %v0 ++ vchh %v31, %v0, %v0 ++ vchh %v18, %v3, %v20 ++ vchhs %v5, %v22, %v7 ++ ++#CHECK: vchlb %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0xf9] ++#CHECK: vchlb %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0xf9] ++#CHECK: vchlb %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xf9] ++#CHECK: vchlb %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xf9] ++#CHECK: vchlb %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x0a,0xf9] ++#CHECK: vchlbs %v5, %v22, %v7 # encoding: [0xe7,0x56,0x70,0x10,0x04,0xf9] ++ ++ vchlb %v0, %v0, %v0 ++ vchlb %v0, %v0, %v31 ++ vchlb %v0, %v31, %v0 ++ vchlb %v31, %v0, %v0 ++ vchlb %v18, %v3, %v20 ++ vchlbs %v5, %v22, %v7 ++ ++#CHECK: vchlf %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0xf9] ++#CHECK: vchlf %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x22,0xf9] ++#CHECK: vchlf %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xf9] ++#CHECK: vchlf %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xf9] ++#CHECK: vchlf %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x2a,0xf9] ++#CHECK: vchlfs %v5, %v22, %v7 # encoding: [0xe7,0x56,0x70,0x10,0x24,0xf9] ++ ++ vchlf %v0, %v0, %v0 ++ vchlf %v0, %v0, %v31 ++ vchlf %v0, %v31, %v0 ++ vchlf %v31, %v0, %v0 ++ vchlf %v18, %v3, %v20 ++ vchlfs %v5, %v22, %v7 ++ ++#CHECK: vchlg %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0xf9] ++#CHECK: vchlg %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x32,0xf9] ++#CHECK: vchlg %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xf9] ++#CHECK: vchlg %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xf9] ++#CHECK: vchlg %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x3a,0xf9] ++#CHECK: vchlgs %v5, %v22, %v7 # encoding: [0xe7,0x56,0x70,0x10,0x34,0xf9] ++ ++ vchlg %v0, %v0, %v0 ++ vchlg %v0, %v0, %v31 ++ vchlg %v0, %v31, %v0 ++ vchlg %v31, %v0, %v0 ++ vchlg %v18, %v3, %v20 ++ vchlgs %v5, %v22, %v7 ++ ++#CHECK: vchlh %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0xf9] ++#CHECK: vchlh %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x12,0xf9] ++#CHECK: vchlh %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0xf9] ++#CHECK: vchlh %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0xf9] ++#CHECK: vchlh %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x1a,0xf9] ++#CHECK: vchlhs %v5, %v22, %v7 # encoding: [0xe7,0x56,0x70,0x10,0x14,0xf9] ++ ++ vchlh %v0, %v0, %v0 ++ vchlh %v0, %v0, %v31 ++ vchlh %v0, %v31, %v0 ++ vchlh %v31, %v0, %v0 ++ vchlh %v18, %v3, %v20 ++ vchlhs %v5, %v22, %v7 ++ ++#CHECK: vclgdb %v0, %v0, 0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0xc0] ++#CHECK: vclgdb %v0, %v0, 0, 15 # encoding: [0xe7,0x00,0x00,0xf0,0x30,0xc0] ++#CHECK: vclgdb %v0, %v0, 4, 0 # encoding: [0xe7,0x00,0x00,0x04,0x30,0xc0] ++#CHECK: vclgdb %v0, %v0, 12, 0 # encoding: [0xe7,0x00,0x00,0x0c,0x30,0xc0] ++#CHECK: vclgdb %v0, %v31, 0, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xc0] ++#CHECK: vclgdb %v31, %v0, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xc0] ++#CHECK: vclgdb %v14, %v17, 4, 10 # encoding: [0xe7,0xe1,0x00,0xa4,0x34,0xc0] ++ ++ vclgdb %v0, %v0, 0, 0 ++ vclgdb %v0, %v0, 0, 15 ++ vclgdb %v0, %v0, 4, 0 ++ vclgdb %v0, %v0, 12, 0 ++ vclgdb %v0, %v31, 0, 0 ++ vclgdb %v31, %v0, 0, 0 ++ vclgdb %v14, %v17, 4, 10 ++ ++#CHECK: vclzb %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x53] ++#CHECK: vclzb %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x00,0x53] ++#CHECK: vclzb %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x53] ++#CHECK: vclzb %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x53] ++#CHECK: vclzb %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x53] ++#CHECK: vclzb %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x04,0x53] ++ ++ vclzb %v0, %v0 ++ vclzb %v0, %v15 ++ vclzb %v0, %v31 ++ vclzb %v15, %v0 ++ vclzb %v31, %v0 ++ vclzb %v14, %v17 ++ ++#CHECK: vclzf %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0x53] ++#CHECK: vclzf %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x20,0x53] ++#CHECK: vclzf %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0x53] ++#CHECK: vclzf %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x20,0x53] ++#CHECK: vclzf %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0x53] ++#CHECK: vclzf %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x24,0x53] ++ ++ vclzf %v0, %v0 ++ vclzf %v0, %v15 ++ vclzf %v0, %v31 ++ vclzf %v15, %v0 ++ vclzf %v31, %v0 ++ vclzf %v14, %v17 ++ ++#CHECK: vclzg %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0x53] ++#CHECK: vclzg %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x30,0x53] ++#CHECK: vclzg %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0x53] ++#CHECK: vclzg %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x30,0x53] ++#CHECK: vclzg %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0x53] ++#CHECK: vclzg %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x34,0x53] ++ ++ vclzg %v0, %v0 ++ vclzg %v0, %v15 ++ vclzg %v0, %v31 ++ vclzg %v15, %v0 ++ vclzg %v31, %v0 ++ vclzg %v14, %v17 ++ ++#CHECK: vclzh %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0x53] ++#CHECK: vclzh %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x10,0x53] ++#CHECK: vclzh %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0x53] ++#CHECK: vclzh %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x10,0x53] ++#CHECK: vclzh %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0x53] ++#CHECK: vclzh %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x14,0x53] ++ ++ vclzh %v0, %v0 ++ vclzh %v0, %v15 ++ vclzh %v0, %v31 ++ vclzh %v15, %v0 ++ vclzh %v31, %v0 ++ vclzh %v14, %v17 ++ ++#CHECK: vctzb %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x52] ++#CHECK: vctzb %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x00,0x52] ++#CHECK: vctzb %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x52] ++#CHECK: vctzb %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x52] ++#CHECK: vctzb %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x52] ++#CHECK: vctzb %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x04,0x52] ++ ++ vctzb %v0, %v0 ++ vctzb %v0, %v15 ++ vctzb %v0, %v31 ++ vctzb %v15, %v0 ++ vctzb %v31, %v0 ++ vctzb %v14, %v17 ++ ++#CHECK: vctzf %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0x52] ++#CHECK: vctzf %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x20,0x52] ++#CHECK: vctzf %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0x52] ++#CHECK: vctzf %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x20,0x52] ++#CHECK: vctzf %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0x52] ++#CHECK: vctzf %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x24,0x52] ++ ++ vctzf %v0, %v0 ++ vctzf %v0, %v15 ++ vctzf %v0, %v31 ++ vctzf %v15, %v0 ++ vctzf %v31, %v0 ++ vctzf %v14, %v17 ++ ++#CHECK: vctzg %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0x52] ++#CHECK: vctzg %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x30,0x52] ++#CHECK: vctzg %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0x52] ++#CHECK: vctzg %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x30,0x52] ++#CHECK: vctzg %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0x52] ++#CHECK: vctzg %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x34,0x52] ++ ++ vctzg %v0, %v0 ++ vctzg %v0, %v15 ++ vctzg %v0, %v31 ++ vctzg %v15, %v0 ++ vctzg %v31, %v0 ++ vctzg %v14, %v17 ++ ++#CHECK: vctzh %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0x52] ++#CHECK: vctzh %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x10,0x52] ++#CHECK: vctzh %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0x52] ++#CHECK: vctzh %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x10,0x52] ++#CHECK: vctzh %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0x52] ++#CHECK: vctzh %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x14,0x52] ++ ++ vctzh %v0, %v0 ++ vctzh %v0, %v15 ++ vctzh %v0, %v31 ++ vctzh %v15, %v0 ++ vctzh %v31, %v0 ++ vctzh %v14, %v17 ++ ++#CHECK: vecb %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0xdb] ++#CHECK: vecb %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x00,0xdb] ++#CHECK: vecb %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xdb] ++#CHECK: vecb %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0xdb] ++#CHECK: vecb %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xdb] ++#CHECK: vecb %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x04,0xdb] ++ ++ vecb %v0, %v0 ++ vecb %v0, %v15 ++ vecb %v0, %v31 ++ vecb %v15, %v0 ++ vecb %v31, %v0 ++ vecb %v14, %v17 ++ ++#CHECK: vecf %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0xdb] ++#CHECK: vecf %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x20,0xdb] ++#CHECK: vecf %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xdb] ++#CHECK: vecf %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x20,0xdb] ++#CHECK: vecf %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xdb] ++#CHECK: vecf %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x24,0xdb] ++ ++ vecf %v0, %v0 ++ vecf %v0, %v15 ++ vecf %v0, %v31 ++ vecf %v15, %v0 ++ vecf %v31, %v0 ++ vecf %v14, %v17 ++ ++#CHECK: vecg %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0xdb] ++#CHECK: vecg %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x30,0xdb] ++#CHECK: vecg %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xdb] ++#CHECK: vecg %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x30,0xdb] ++#CHECK: vecg %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xdb] ++#CHECK: vecg %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x34,0xdb] ++ ++ vecg %v0, %v0 ++ vecg %v0, %v15 ++ vecg %v0, %v31 ++ vecg %v15, %v0 ++ vecg %v31, %v0 ++ vecg %v14, %v17 ++ ++#CHECK: vech %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0xdb] ++#CHECK: vech %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x10,0xdb] ++#CHECK: vech %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0xdb] ++#CHECK: vech %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x10,0xdb] ++#CHECK: vech %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0xdb] ++#CHECK: vech %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x14,0xdb] ++ ++ vech %v0, %v0 ++ vech %v0, %v15 ++ vech %v0, %v31 ++ vech %v15, %v0 ++ vech %v31, %v0 ++ vech %v14, %v17 ++ ++#CHECK: veclb %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0xd9] ++#CHECK: veclb %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x00,0xd9] ++#CHECK: veclb %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xd9] ++#CHECK: veclb %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0xd9] ++#CHECK: veclb %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xd9] ++#CHECK: veclb %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x04,0xd9] ++ ++ veclb %v0, %v0 ++ veclb %v0, %v15 ++ veclb %v0, %v31 ++ veclb %v15, %v0 ++ veclb %v31, %v0 ++ veclb %v14, %v17 ++ ++#CHECK: veclf %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0xd9] ++#CHECK: veclf %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x20,0xd9] ++#CHECK: veclf %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xd9] ++#CHECK: veclf %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x20,0xd9] ++#CHECK: veclf %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xd9] ++#CHECK: veclf %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x24,0xd9] ++ ++ veclf %v0, %v0 ++ veclf %v0, %v15 ++ veclf %v0, %v31 ++ veclf %v15, %v0 ++ veclf %v31, %v0 ++ veclf %v14, %v17 ++ ++#CHECK: veclg %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0xd9] ++#CHECK: veclg %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x30,0xd9] ++#CHECK: veclg %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xd9] ++#CHECK: veclg %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x30,0xd9] ++#CHECK: veclg %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xd9] ++#CHECK: veclg %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x34,0xd9] ++ ++ veclg %v0, %v0 ++ veclg %v0, %v15 ++ veclg %v0, %v31 ++ veclg %v15, %v0 ++ veclg %v31, %v0 ++ veclg %v14, %v17 ++ ++#CHECK: veclh %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0xd9] ++#CHECK: veclh %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x10,0xd9] ++#CHECK: veclh %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0xd9] ++#CHECK: veclh %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x10,0xd9] ++#CHECK: veclh %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0xd9] ++#CHECK: veclh %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x14,0xd9] ++ ++ veclh %v0, %v0 ++ veclh %v0, %v15 ++ veclh %v0, %v31 ++ veclh %v15, %v0 ++ veclh %v31, %v0 ++ veclh %v14, %v17 ++ ++#CHECK: verimb %v0, %v0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x72] ++#CHECK: verimb %v0, %v0, %v0, 255 # encoding: [0xe7,0x00,0x00,0xff,0x00,0x72] ++#CHECK: verimb %v0, %v0, %v31, 0 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0x72] ++#CHECK: verimb %v0, %v31, %v0, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x72] ++#CHECK: verimb %v31, %v0, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x72] ++#CHECK: verimb %v13, %v17, %v21, 121 # encoding: [0xe7,0xd1,0x50,0x79,0x06,0x72] ++ ++ verimb %v0, %v0, %v0, 0 ++ verimb %v0, %v0, %v0, 255 ++ verimb %v0, %v0, %v31, 0 ++ verimb %v0, %v31, %v0, 0 ++ verimb %v31, %v0, %v0, 0 ++ verimb %v13, %v17, %v21, 0x79 ++ ++#CHECK: verimf %v0, %v0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0x72] ++#CHECK: verimf %v0, %v0, %v0, 255 # encoding: [0xe7,0x00,0x00,0xff,0x20,0x72] ++#CHECK: verimf %v0, %v0, %v31, 0 # encoding: [0xe7,0x00,0xf0,0x00,0x22,0x72] ++#CHECK: verimf %v0, %v31, %v0, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0x72] ++#CHECK: verimf %v31, %v0, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0x72] ++#CHECK: verimf %v13, %v17, %v21, 121 # encoding: [0xe7,0xd1,0x50,0x79,0x26,0x72] ++ ++ verimf %v0, %v0, %v0, 0 ++ verimf %v0, %v0, %v0, 255 ++ verimf %v0, %v0, %v31, 0 ++ verimf %v0, %v31, %v0, 0 ++ verimf %v31, %v0, %v0, 0 ++ verimf %v13, %v17, %v21, 0x79 ++ ++#CHECK: verimg %v0, %v0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0x72] ++#CHECK: verimg %v0, %v0, %v0, 255 # encoding: [0xe7,0x00,0x00,0xff,0x30,0x72] ++#CHECK: verimg %v0, %v0, %v31, 0 # encoding: [0xe7,0x00,0xf0,0x00,0x32,0x72] ++#CHECK: verimg %v0, %v31, %v0, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0x72] ++#CHECK: verimg %v31, %v0, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0x72] ++#CHECK: verimg %v13, %v17, %v21, 121 # encoding: [0xe7,0xd1,0x50,0x79,0x36,0x72] ++ ++ verimg %v0, %v0, %v0, 0 ++ verimg %v0, %v0, %v0, 255 ++ verimg %v0, %v0, %v31, 0 ++ verimg %v0, %v31, %v0, 0 ++ verimg %v31, %v0, %v0, 0 ++ verimg %v13, %v17, %v21, 0x79 ++ ++#CHECK: verimh %v0, %v0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0x72] ++#CHECK: verimh %v0, %v0, %v0, 255 # encoding: [0xe7,0x00,0x00,0xff,0x10,0x72] ++#CHECK: verimh %v0, %v0, %v31, 0 # encoding: [0xe7,0x00,0xf0,0x00,0x12,0x72] ++#CHECK: verimh %v0, %v31, %v0, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0x72] ++#CHECK: verimh %v31, %v0, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0x72] ++#CHECK: verimh %v13, %v17, %v21, 121 # encoding: [0xe7,0xd1,0x50,0x79,0x16,0x72] ++ ++ verimh %v0, %v0, %v0, 0 ++ verimh %v0, %v0, %v0, 255 ++ verimh %v0, %v0, %v31, 0 ++ verimh %v0, %v31, %v0, 0 ++ verimh %v31, %v0, %v0, 0 ++ verimh %v13, %v17, %v21, 0x79 ++ ++#CHECK: verllvb %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x73] ++#CHECK: verllvb %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0x73] ++#CHECK: verllvb %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x73] ++#CHECK: verllvb %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x73] ++#CHECK: verllvb %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x0a,0x73] ++ ++ verllvb %v0, %v0, %v0 ++ verllvb %v0, %v0, %v31 ++ verllvb %v0, %v31, %v0 ++ verllvb %v31, %v0, %v0 ++ verllvb %v18, %v3, %v20 ++ ++#CHECK: verllvf %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0x73] ++#CHECK: verllvf %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x22,0x73] ++#CHECK: verllvf %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0x73] ++#CHECK: verllvf %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0x73] ++#CHECK: verllvf %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x2a,0x73] ++ ++ verllvf %v0, %v0, %v0 ++ verllvf %v0, %v0, %v31 ++ verllvf %v0, %v31, %v0 ++ verllvf %v31, %v0, %v0 ++ verllvf %v18, %v3, %v20 ++ ++#CHECK: verllvg %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0x73] ++#CHECK: verllvg %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x32,0x73] ++#CHECK: verllvg %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0x73] ++#CHECK: verllvg %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0x73] ++#CHECK: verllvg %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x3a,0x73] ++ ++ verllvg %v0, %v0, %v0 ++ verllvg %v0, %v0, %v31 ++ verllvg %v0, %v31, %v0 ++ verllvg %v31, %v0, %v0 ++ verllvg %v18, %v3, %v20 ++ ++#CHECK: verllvh %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0x73] ++#CHECK: verllvh %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x12,0x73] ++#CHECK: verllvh %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0x73] ++#CHECK: verllvh %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0x73] ++#CHECK: verllvh %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x1a,0x73] ++ ++ verllvh %v0, %v0, %v0 ++ verllvh %v0, %v0, %v31 ++ verllvh %v0, %v31, %v0 ++ verllvh %v31, %v0, %v0 ++ verllvh %v18, %v3, %v20 ++ ++#CHECK: verllb %v0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x33] ++#CHECK: verllb %v0, %v0, 4095 # encoding: [0xe7,0x00,0x0f,0xff,0x00,0x33] ++#CHECK: verllb %v0, %v0, 0(%r15) # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x33] ++#CHECK: verllb %v0, %v31, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x33] ++#CHECK: verllb %v31, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x33] ++#CHECK: verllb %v14, %v17, 1074(%r5) # encoding: [0xe7,0xe1,0x54,0x32,0x04,0x33] ++ ++ verllb %v0, %v0, 0 ++ verllb %v0, %v0, 4095 ++ verllb %v0, %v0, 0(%r15) ++ verllb %v0, %v31, 0 ++ verllb %v31, %v0, 0 ++ verllb %v14, %v17, 1074(%r5) ++ ++#CHECK: verllf %v0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0x33] ++#CHECK: verllf %v0, %v0, 4095 # encoding: [0xe7,0x00,0x0f,0xff,0x20,0x33] ++#CHECK: verllf %v0, %v0, 0(%r15) # encoding: [0xe7,0x00,0xf0,0x00,0x20,0x33] ++#CHECK: verllf %v0, %v31, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0x33] ++#CHECK: verllf %v31, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0x33] ++#CHECK: verllf %v14, %v17, 1074(%r5) # encoding: [0xe7,0xe1,0x54,0x32,0x24,0x33] ++ ++ verllf %v0, %v0, 0 ++ verllf %v0, %v0, 4095 ++ verllf %v0, %v0, 0(%r15) ++ verllf %v0, %v31, 0 ++ verllf %v31, %v0, 0 ++ verllf %v14, %v17, 1074(%r5) ++ ++#CHECK: verllg %v0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0x33] ++#CHECK: verllg %v0, %v0, 4095 # encoding: [0xe7,0x00,0x0f,0xff,0x30,0x33] ++#CHECK: verllg %v0, %v0, 0(%r15) # encoding: [0xe7,0x00,0xf0,0x00,0x30,0x33] ++#CHECK: verllg %v0, %v31, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0x33] ++#CHECK: verllg %v31, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0x33] ++#CHECK: verllg %v14, %v17, 1074(%r5) # encoding: [0xe7,0xe1,0x54,0x32,0x34,0x33] ++ ++ verllg %v0, %v0, 0 ++ verllg %v0, %v0, 4095 ++ verllg %v0, %v0, 0(%r15) ++ verllg %v0, %v31, 0 ++ verllg %v31, %v0, 0 ++ verllg %v14, %v17, 1074(%r5) ++ ++#CHECK: verllh %v0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0x33] ++#CHECK: verllh %v0, %v0, 4095 # encoding: [0xe7,0x00,0x0f,0xff,0x10,0x33] ++#CHECK: verllh %v0, %v0, 0(%r15) # encoding: [0xe7,0x00,0xf0,0x00,0x10,0x33] ++#CHECK: verllh %v0, %v31, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0x33] ++#CHECK: verllh %v31, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0x33] ++#CHECK: verllh %v14, %v17, 1074(%r5) # encoding: [0xe7,0xe1,0x54,0x32,0x14,0x33] ++ ++ verllh %v0, %v0, 0 ++ verllh %v0, %v0, 4095 ++ verllh %v0, %v0, 0(%r15) ++ verllh %v0, %v31, 0 ++ verllh %v31, %v0, 0 ++ verllh %v14, %v17, 1074(%r5) ++ ++#CHECK: veslvb %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x70] ++#CHECK: veslvb %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0x70] ++#CHECK: veslvb %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x70] ++#CHECK: veslvb %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x70] ++#CHECK: veslvb %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x0a,0x70] ++ ++ veslvb %v0, %v0, %v0 ++ veslvb %v0, %v0, %v31 ++ veslvb %v0, %v31, %v0 ++ veslvb %v31, %v0, %v0 ++ veslvb %v18, %v3, %v20 ++ ++#CHECK: veslvf %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0x70] ++#CHECK: veslvf %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x22,0x70] ++#CHECK: veslvf %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0x70] ++#CHECK: veslvf %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0x70] ++#CHECK: veslvf %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x2a,0x70] ++ ++ veslvf %v0, %v0, %v0 ++ veslvf %v0, %v0, %v31 ++ veslvf %v0, %v31, %v0 ++ veslvf %v31, %v0, %v0 ++ veslvf %v18, %v3, %v20 ++ ++#CHECK: veslvg %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0x70] ++#CHECK: veslvg %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x32,0x70] ++#CHECK: veslvg %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0x70] ++#CHECK: veslvg %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0x70] ++#CHECK: veslvg %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x3a,0x70] ++ ++ veslvg %v0, %v0, %v0 ++ veslvg %v0, %v0, %v31 ++ veslvg %v0, %v31, %v0 ++ veslvg %v31, %v0, %v0 ++ veslvg %v18, %v3, %v20 ++ ++#CHECK: veslvh %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0x70] ++#CHECK: veslvh %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x12,0x70] ++#CHECK: veslvh %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0x70] ++#CHECK: veslvh %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0x70] ++#CHECK: veslvh %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x1a,0x70] ++ ++ veslvh %v0, %v0, %v0 ++ veslvh %v0, %v0, %v31 ++ veslvh %v0, %v31, %v0 ++ veslvh %v31, %v0, %v0 ++ veslvh %v18, %v3, %v20 ++ ++#CHECK: veslb %v0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x30] ++#CHECK: veslb %v0, %v0, 4095 # encoding: [0xe7,0x00,0x0f,0xff,0x00,0x30] ++#CHECK: veslb %v0, %v0, 0(%r15) # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x30] ++#CHECK: veslb %v0, %v31, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x30] ++#CHECK: veslb %v31, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x30] ++#CHECK: veslb %v14, %v17, 1074(%r5) # encoding: [0xe7,0xe1,0x54,0x32,0x04,0x30] ++ ++ veslb %v0, %v0, 0 ++ veslb %v0, %v0, 4095 ++ veslb %v0, %v0, 0(%r15) ++ veslb %v0, %v31, 0 ++ veslb %v31, %v0, 0 ++ veslb %v14, %v17, 1074(%r5) ++ ++#CHECK: veslf %v0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0x30] ++#CHECK: veslf %v0, %v0, 4095 # encoding: [0xe7,0x00,0x0f,0xff,0x20,0x30] ++#CHECK: veslf %v0, %v0, 0(%r15) # encoding: [0xe7,0x00,0xf0,0x00,0x20,0x30] ++#CHECK: veslf %v0, %v31, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0x30] ++#CHECK: veslf %v31, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0x30] ++#CHECK: veslf %v14, %v17, 1074(%r5) # encoding: [0xe7,0xe1,0x54,0x32,0x24,0x30] ++ ++ veslf %v0, %v0, 0 ++ veslf %v0, %v0, 4095 ++ veslf %v0, %v0, 0(%r15) ++ veslf %v0, %v31, 0 ++ veslf %v31, %v0, 0 ++ veslf %v14, %v17, 1074(%r5) ++ ++#CHECK: veslg %v0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0x30] ++#CHECK: veslg %v0, %v0, 4095 # encoding: [0xe7,0x00,0x0f,0xff,0x30,0x30] ++#CHECK: veslg %v0, %v0, 0(%r15) # encoding: [0xe7,0x00,0xf0,0x00,0x30,0x30] ++#CHECK: veslg %v0, %v31, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0x30] ++#CHECK: veslg %v31, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0x30] ++#CHECK: veslg %v14, %v17, 1074(%r5) # encoding: [0xe7,0xe1,0x54,0x32,0x34,0x30] ++ ++ veslg %v0, %v0, 0 ++ veslg %v0, %v0, 4095 ++ veslg %v0, %v0, 0(%r15) ++ veslg %v0, %v31, 0 ++ veslg %v31, %v0, 0 ++ veslg %v14, %v17, 1074(%r5) ++ ++#CHECK: veslh %v0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0x30] ++#CHECK: veslh %v0, %v0, 4095 # encoding: [0xe7,0x00,0x0f,0xff,0x10,0x30] ++#CHECK: veslh %v0, %v0, 0(%r15) # encoding: [0xe7,0x00,0xf0,0x00,0x10,0x30] ++#CHECK: veslh %v0, %v31, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0x30] ++#CHECK: veslh %v31, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0x30] ++#CHECK: veslh %v14, %v17, 1074(%r5) # encoding: [0xe7,0xe1,0x54,0x32,0x14,0x30] ++ ++ veslh %v0, %v0, 0 ++ veslh %v0, %v0, 4095 ++ veslh %v0, %v0, 0(%r15) ++ veslh %v0, %v31, 0 ++ veslh %v31, %v0, 0 ++ veslh %v14, %v17, 1074(%r5) ++ ++#CHECK: vesravb %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x7a] ++#CHECK: vesravb %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0x7a] ++#CHECK: vesravb %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x7a] ++#CHECK: vesravb %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x7a] ++#CHECK: vesravb %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x0a,0x7a] ++ ++ vesravb %v0, %v0, %v0 ++ vesravb %v0, %v0, %v31 ++ vesravb %v0, %v31, %v0 ++ vesravb %v31, %v0, %v0 ++ vesravb %v18, %v3, %v20 ++ ++#CHECK: vesravf %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0x7a] ++#CHECK: vesravf %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x22,0x7a] ++#CHECK: vesravf %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0x7a] ++#CHECK: vesravf %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0x7a] ++#CHECK: vesravf %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x2a,0x7a] ++ ++ vesravf %v0, %v0, %v0 ++ vesravf %v0, %v0, %v31 ++ vesravf %v0, %v31, %v0 ++ vesravf %v31, %v0, %v0 ++ vesravf %v18, %v3, %v20 ++ ++#CHECK: vesravg %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0x7a] ++#CHECK: vesravg %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x32,0x7a] ++#CHECK: vesravg %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0x7a] ++#CHECK: vesravg %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0x7a] ++#CHECK: vesravg %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x3a,0x7a] ++ ++ vesravg %v0, %v0, %v0 ++ vesravg %v0, %v0, %v31 ++ vesravg %v0, %v31, %v0 ++ vesravg %v31, %v0, %v0 ++ vesravg %v18, %v3, %v20 ++ ++#CHECK: vesravh %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0x7a] ++#CHECK: vesravh %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x12,0x7a] ++#CHECK: vesravh %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0x7a] ++#CHECK: vesravh %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0x7a] ++#CHECK: vesravh %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x1a,0x7a] ++ ++ vesravh %v0, %v0, %v0 ++ vesravh %v0, %v0, %v31 ++ vesravh %v0, %v31, %v0 ++ vesravh %v31, %v0, %v0 ++ vesravh %v18, %v3, %v20 ++ ++#CHECK: vesrab %v0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x3a] ++#CHECK: vesrab %v0, %v0, 4095 # encoding: [0xe7,0x00,0x0f,0xff,0x00,0x3a] ++#CHECK: vesrab %v0, %v0, 0(%r15) # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x3a] ++#CHECK: vesrab %v0, %v31, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x3a] ++#CHECK: vesrab %v31, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x3a] ++#CHECK: vesrab %v14, %v17, 1074(%r5) # encoding: [0xe7,0xe1,0x54,0x32,0x04,0x3a] ++ ++ vesrab %v0, %v0, 0 ++ vesrab %v0, %v0, 4095 ++ vesrab %v0, %v0, 0(%r15) ++ vesrab %v0, %v31, 0 ++ vesrab %v31, %v0, 0 ++ vesrab %v14, %v17, 1074(%r5) ++ ++#CHECK: vesraf %v0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0x3a] ++#CHECK: vesraf %v0, %v0, 4095 # encoding: [0xe7,0x00,0x0f,0xff,0x20,0x3a] ++#CHECK: vesraf %v0, %v0, 0(%r15) # encoding: [0xe7,0x00,0xf0,0x00,0x20,0x3a] ++#CHECK: vesraf %v0, %v31, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0x3a] ++#CHECK: vesraf %v31, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0x3a] ++#CHECK: vesraf %v14, %v17, 1074(%r5) # encoding: [0xe7,0xe1,0x54,0x32,0x24,0x3a] ++ ++ vesraf %v0, %v0, 0 ++ vesraf %v0, %v0, 4095 ++ vesraf %v0, %v0, 0(%r15) ++ vesraf %v0, %v31, 0 ++ vesraf %v31, %v0, 0 ++ vesraf %v14, %v17, 1074(%r5) ++ ++#CHECK: vesrag %v0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0x3a] ++#CHECK: vesrag %v0, %v0, 4095 # encoding: [0xe7,0x00,0x0f,0xff,0x30,0x3a] ++#CHECK: vesrag %v0, %v0, 0(%r15) # encoding: [0xe7,0x00,0xf0,0x00,0x30,0x3a] ++#CHECK: vesrag %v0, %v31, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0x3a] ++#CHECK: vesrag %v31, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0x3a] ++#CHECK: vesrag %v14, %v17, 1074(%r5) # encoding: [0xe7,0xe1,0x54,0x32,0x34,0x3a] ++ ++ vesrag %v0, %v0, 0 ++ vesrag %v0, %v0, 4095 ++ vesrag %v0, %v0, 0(%r15) ++ vesrag %v0, %v31, 0 ++ vesrag %v31, %v0, 0 ++ vesrag %v14, %v17, 1074(%r5) ++ ++#CHECK: vesrah %v0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0x3a] ++#CHECK: vesrah %v0, %v0, 4095 # encoding: [0xe7,0x00,0x0f,0xff,0x10,0x3a] ++#CHECK: vesrah %v0, %v0, 0(%r15) # encoding: [0xe7,0x00,0xf0,0x00,0x10,0x3a] ++#CHECK: vesrah %v0, %v31, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0x3a] ++#CHECK: vesrah %v31, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0x3a] ++#CHECK: vesrah %v14, %v17, 1074(%r5) # encoding: [0xe7,0xe1,0x54,0x32,0x14,0x3a] ++ ++ vesrah %v0, %v0, 0 ++ vesrah %v0, %v0, 4095 ++ vesrah %v0, %v0, 0(%r15) ++ vesrah %v0, %v31, 0 ++ vesrah %v31, %v0, 0 ++ vesrah %v14, %v17, 1074(%r5) ++ ++#CHECK: vesrlvb %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x78] ++#CHECK: vesrlvb %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0x78] ++#CHECK: vesrlvb %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x78] ++#CHECK: vesrlvb %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x78] ++#CHECK: vesrlvb %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x0a,0x78] ++ ++ vesrlvb %v0, %v0, %v0 ++ vesrlvb %v0, %v0, %v31 ++ vesrlvb %v0, %v31, %v0 ++ vesrlvb %v31, %v0, %v0 ++ vesrlvb %v18, %v3, %v20 ++ ++#CHECK: vesrlvf %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0x78] ++#CHECK: vesrlvf %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x22,0x78] ++#CHECK: vesrlvf %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0x78] ++#CHECK: vesrlvf %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0x78] ++#CHECK: vesrlvf %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x2a,0x78] ++ ++ vesrlvf %v0, %v0, %v0 ++ vesrlvf %v0, %v0, %v31 ++ vesrlvf %v0, %v31, %v0 ++ vesrlvf %v31, %v0, %v0 ++ vesrlvf %v18, %v3, %v20 ++ ++#CHECK: vesrlvg %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0x78] ++#CHECK: vesrlvg %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x32,0x78] ++#CHECK: vesrlvg %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0x78] ++#CHECK: vesrlvg %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0x78] ++#CHECK: vesrlvg %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x3a,0x78] ++ ++ vesrlvg %v0, %v0, %v0 ++ vesrlvg %v0, %v0, %v31 ++ vesrlvg %v0, %v31, %v0 ++ vesrlvg %v31, %v0, %v0 ++ vesrlvg %v18, %v3, %v20 ++ ++#CHECK: vesrlvh %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0x78] ++#CHECK: vesrlvh %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x12,0x78] ++#CHECK: vesrlvh %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0x78] ++#CHECK: vesrlvh %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0x78] ++#CHECK: vesrlvh %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x1a,0x78] ++ ++ vesrlvh %v0, %v0, %v0 ++ vesrlvh %v0, %v0, %v31 ++ vesrlvh %v0, %v31, %v0 ++ vesrlvh %v31, %v0, %v0 ++ vesrlvh %v18, %v3, %v20 ++ ++#CHECK: vesrlb %v0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x38] ++#CHECK: vesrlb %v0, %v0, 4095 # encoding: [0xe7,0x00,0x0f,0xff,0x00,0x38] ++#CHECK: vesrlb %v0, %v0, 0(%r15) # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x38] ++#CHECK: vesrlb %v0, %v31, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x38] ++#CHECK: vesrlb %v31, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x38] ++#CHECK: vesrlb %v14, %v17, 1074(%r5) # encoding: [0xe7,0xe1,0x54,0x32,0x04,0x38] ++ ++ vesrlb %v0, %v0, 0 ++ vesrlb %v0, %v0, 4095 ++ vesrlb %v0, %v0, 0(%r15) ++ vesrlb %v0, %v31, 0 ++ vesrlb %v31, %v0, 0 ++ vesrlb %v14, %v17, 1074(%r5) ++ ++#CHECK: vesrlf %v0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0x38] ++#CHECK: vesrlf %v0, %v0, 4095 # encoding: [0xe7,0x00,0x0f,0xff,0x20,0x38] ++#CHECK: vesrlf %v0, %v0, 0(%r15) # encoding: [0xe7,0x00,0xf0,0x00,0x20,0x38] ++#CHECK: vesrlf %v0, %v31, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0x38] ++#CHECK: vesrlf %v31, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0x38] ++#CHECK: vesrlf %v14, %v17, 1074(%r5) # encoding: [0xe7,0xe1,0x54,0x32,0x24,0x38] ++ ++ vesrlf %v0, %v0, 0 ++ vesrlf %v0, %v0, 4095 ++ vesrlf %v0, %v0, 0(%r15) ++ vesrlf %v0, %v31, 0 ++ vesrlf %v31, %v0, 0 ++ vesrlf %v14, %v17, 1074(%r5) ++ ++#CHECK: vesrlg %v0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0x38] ++#CHECK: vesrlg %v0, %v0, 4095 # encoding: [0xe7,0x00,0x0f,0xff,0x30,0x38] ++#CHECK: vesrlg %v0, %v0, 0(%r15) # encoding: [0xe7,0x00,0xf0,0x00,0x30,0x38] ++#CHECK: vesrlg %v0, %v31, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0x38] ++#CHECK: vesrlg %v31, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0x38] ++#CHECK: vesrlg %v14, %v17, 1074(%r5) # encoding: [0xe7,0xe1,0x54,0x32,0x34,0x38] ++ ++ vesrlg %v0, %v0, 0 ++ vesrlg %v0, %v0, 4095 ++ vesrlg %v0, %v0, 0(%r15) ++ vesrlg %v0, %v31, 0 ++ vesrlg %v31, %v0, 0 ++ vesrlg %v14, %v17, 1074(%r5) ++ ++#CHECK: vesrlh %v0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0x38] ++#CHECK: vesrlh %v0, %v0, 4095 # encoding: [0xe7,0x00,0x0f,0xff,0x10,0x38] ++#CHECK: vesrlh %v0, %v0, 0(%r15) # encoding: [0xe7,0x00,0xf0,0x00,0x10,0x38] ++#CHECK: vesrlh %v0, %v31, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0x38] ++#CHECK: vesrlh %v31, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0x38] ++#CHECK: vesrlh %v14, %v17, 1074(%r5) # encoding: [0xe7,0xe1,0x54,0x32,0x14,0x38] ++ ++ vesrlh %v0, %v0, 0 ++ vesrlh %v0, %v0, 4095 ++ vesrlh %v0, %v0, 0(%r15) ++ vesrlh %v0, %v31, 0 ++ vesrlh %v31, %v0, 0 ++ vesrlh %v14, %v17, 1074(%r5) ++ ++#CHECK: vfadb %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0xe3] ++#CHECK: vfadb %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x32,0xe3] ++#CHECK: vfadb %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xe3] ++#CHECK: vfadb %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xe3] ++#CHECK: vfadb %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x3a,0xe3] ++ ++ vfadb %v0, %v0, %v0 ++ vfadb %v0, %v0, %v31 ++ vfadb %v0, %v31, %v0 ++ vfadb %v31, %v0, %v0 ++ vfadb %v18, %v3, %v20 ++ ++#CHECK: vfaeb %v0, %v0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x82] ++#CHECK: vfaeb %v0, %v0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x82] ++#CHECK: vfaeb %v0, %v0, %v0, 12 # encoding: [0xe7,0x00,0x00,0xc0,0x00,0x82] ++#CHECK: vfaeb %v0, %v0, %v15, 0 # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x82] ++#CHECK: vfaeb %v0, %v0, %v31, 0 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0x82] ++#CHECK: vfaeb %v0, %v15, %v0, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x00,0x82] ++#CHECK: vfaeb %v0, %v31, %v0, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x82] ++#CHECK: vfaeb %v15, %v0, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x82] ++#CHECK: vfaeb %v31, %v0, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x82] ++#CHECK: vfaeb %v18, %v3, %v20, 4 # encoding: [0xe7,0x23,0x40,0x40,0x0a,0x82] ++#CHECK: vfaeb %v18, %v3, %v20, 15 # encoding: [0xe7,0x23,0x40,0xf0,0x0a,0x82] ++#CHECK: vfaebs %v18, %v3, %v20, 8 # encoding: [0xe7,0x23,0x40,0x90,0x0a,0x82] ++#CHECK: vfaezb %v18, %v3, %v20, 4 # encoding: [0xe7,0x23,0x40,0x60,0x0a,0x82] ++#CHECK: vfaezbs %v18, %v3, %v20, 8 # encoding: [0xe7,0x23,0x40,0xb0,0x0a,0x82] ++#CHECK: vfaezbs %v18, %v3, %v20, 15 # encoding: [0xe7,0x23,0x40,0xf0,0x0a,0x82] ++ ++ vfaeb %v0, %v0, %v0 ++ vfaeb %v0, %v0, %v0, 0 ++ vfaeb %v0, %v0, %v0, 12 ++ vfaeb %v0, %v0, %v15 ++ vfaeb %v0, %v0, %v31 ++ vfaeb %v0, %v15, %v0 ++ vfaeb %v0, %v31, %v0 ++ vfaeb %v15, %v0, %v0 ++ vfaeb %v31, %v0, %v0 ++ vfaeb %v18, %v3, %v20, 4 ++ vfaeb %v18, %v3, %v20, 15 ++ vfaebs %v18, %v3, %v20, 8 ++ vfaezb %v18, %v3, %v20, 4 ++ vfaezbs %v18, %v3, %v20, 8 ++ vfaezbs %v18, %v3, %v20, 15 ++ ++#CHECK: vfaef %v0, %v0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0x82] ++#CHECK: vfaef %v0, %v0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0x82] ++#CHECK: vfaef %v0, %v0, %v0, 12 # encoding: [0xe7,0x00,0x00,0xc0,0x20,0x82] ++#CHECK: vfaef %v0, %v0, %v15, 0 # encoding: [0xe7,0x00,0xf0,0x00,0x20,0x82] ++#CHECK: vfaef %v0, %v0, %v31, 0 # encoding: [0xe7,0x00,0xf0,0x00,0x22,0x82] ++#CHECK: vfaef %v0, %v15, %v0, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x20,0x82] ++#CHECK: vfaef %v0, %v31, %v0, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0x82] ++#CHECK: vfaef %v15, %v0, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x20,0x82] ++#CHECK: vfaef %v31, %v0, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0x82] ++#CHECK: vfaef %v18, %v3, %v20, 4 # encoding: [0xe7,0x23,0x40,0x40,0x2a,0x82] ++#CHECK: vfaef %v18, %v3, %v20, 15 # encoding: [0xe7,0x23,0x40,0xf0,0x2a,0x82] ++#CHECK: vfaefs %v18, %v3, %v20, 8 # encoding: [0xe7,0x23,0x40,0x90,0x2a,0x82] ++#CHECK: vfaezf %v18, %v3, %v20, 4 # encoding: [0xe7,0x23,0x40,0x60,0x2a,0x82] ++#CHECK: vfaezfs %v18, %v3, %v20, 8 # encoding: [0xe7,0x23,0x40,0xb0,0x2a,0x82] ++#CHECK: vfaezfs %v18, %v3, %v20, 15 # encoding: [0xe7,0x23,0x40,0xf0,0x2a,0x82] ++ ++ vfaef %v0, %v0, %v0 ++ vfaef %v0, %v0, %v0, 0 ++ vfaef %v0, %v0, %v0, 12 ++ vfaef %v0, %v0, %v15 ++ vfaef %v0, %v0, %v31 ++ vfaef %v0, %v15, %v0 ++ vfaef %v0, %v31, %v0 ++ vfaef %v15, %v0, %v0 ++ vfaef %v31, %v0, %v0 ++ vfaef %v18, %v3, %v20, 4 ++ vfaef %v18, %v3, %v20, 15 ++ vfaefs %v18, %v3, %v20, 8 ++ vfaezf %v18, %v3, %v20, 4 ++ vfaezfs %v18, %v3, %v20, 8 ++ vfaezfs %v18, %v3, %v20, 15 ++ ++#CHECK: vfaeh %v0, %v0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0x82] ++#CHECK: vfaeh %v0, %v0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0x82] ++#CHECK: vfaeh %v0, %v0, %v0, 12 # encoding: [0xe7,0x00,0x00,0xc0,0x10,0x82] ++#CHECK: vfaeh %v0, %v0, %v15, 0 # encoding: [0xe7,0x00,0xf0,0x00,0x10,0x82] ++#CHECK: vfaeh %v0, %v0, %v31, 0 # encoding: [0xe7,0x00,0xf0,0x00,0x12,0x82] ++#CHECK: vfaeh %v0, %v15, %v0, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x10,0x82] ++#CHECK: vfaeh %v0, %v31, %v0, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0x82] ++#CHECK: vfaeh %v15, %v0, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x10,0x82] ++#CHECK: vfaeh %v31, %v0, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0x82] ++#CHECK: vfaeh %v18, %v3, %v20, 4 # encoding: [0xe7,0x23,0x40,0x40,0x1a,0x82] ++#CHECK: vfaeh %v18, %v3, %v20, 15 # encoding: [0xe7,0x23,0x40,0xf0,0x1a,0x82] ++#CHECK: vfaehs %v18, %v3, %v20, 8 # encoding: [0xe7,0x23,0x40,0x90,0x1a,0x82] ++#CHECK: vfaezh %v18, %v3, %v20, 4 # encoding: [0xe7,0x23,0x40,0x60,0x1a,0x82] ++#CHECK: vfaezhs %v18, %v3, %v20, 8 # encoding: [0xe7,0x23,0x40,0xb0,0x1a,0x82] ++#CHECK: vfaezhs %v18, %v3, %v20, 15 # encoding: [0xe7,0x23,0x40,0xf0,0x1a,0x82] ++ ++ vfaeh %v0, %v0, %v0 ++ vfaeh %v0, %v0, %v0, 0 ++ vfaeh %v0, %v0, %v0, 12 ++ vfaeh %v0, %v0, %v15 ++ vfaeh %v0, %v0, %v31 ++ vfaeh %v0, %v15, %v0 ++ vfaeh %v0, %v31, %v0 ++ vfaeh %v15, %v0, %v0 ++ vfaeh %v31, %v0, %v0 ++ vfaeh %v18, %v3, %v20, 4 ++ vfaeh %v18, %v3, %v20, 15 ++ vfaehs %v18, %v3, %v20, 8 ++ vfaezh %v18, %v3, %v20, 4 ++ vfaezhs %v18, %v3, %v20, 8 ++ vfaezhs %v18, %v3, %v20, 15 ++ ++#CHECK: vfcedb %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0xe8] ++#CHECK: vfcedb %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x32,0xe8] ++#CHECK: vfcedb %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xe8] ++#CHECK: vfcedb %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xe8] ++#CHECK: vfcedb %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x3a,0xe8] ++ ++ vfcedb %v0, %v0, %v0 ++ vfcedb %v0, %v0, %v31 ++ vfcedb %v0, %v31, %v0 ++ vfcedb %v31, %v0, %v0 ++ vfcedb %v18, %v3, %v20 ++ ++#CHECK: vfcedbs %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x10,0x30,0xe8] ++#CHECK: vfcedbs %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x10,0x32,0xe8] ++#CHECK: vfcedbs %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x10,0x34,0xe8] ++#CHECK: vfcedbs %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x10,0x38,0xe8] ++#CHECK: vfcedbs %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x10,0x3a,0xe8] ++ ++ vfcedbs %v0, %v0, %v0 ++ vfcedbs %v0, %v0, %v31 ++ vfcedbs %v0, %v31, %v0 ++ vfcedbs %v31, %v0, %v0 ++ vfcedbs %v18, %v3, %v20 ++ ++#CHECK: vfchdb %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0xeb] ++#CHECK: vfchdb %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x32,0xeb] ++#CHECK: vfchdb %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xeb] ++#CHECK: vfchdb %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xeb] ++#CHECK: vfchdb %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x3a,0xeb] ++ ++ vfchdb %v0, %v0, %v0 ++ vfchdb %v0, %v0, %v31 ++ vfchdb %v0, %v31, %v0 ++ vfchdb %v31, %v0, %v0 ++ vfchdb %v18, %v3, %v20 ++ ++#CHECK: vfchdbs %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x10,0x30,0xeb] ++#CHECK: vfchdbs %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x10,0x32,0xeb] ++#CHECK: vfchdbs %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x10,0x34,0xeb] ++#CHECK: vfchdbs %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x10,0x38,0xeb] ++#CHECK: vfchdbs %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x10,0x3a,0xeb] ++ ++ vfchdbs %v0, %v0, %v0 ++ vfchdbs %v0, %v0, %v31 ++ vfchdbs %v0, %v31, %v0 ++ vfchdbs %v31, %v0, %v0 ++ vfchdbs %v18, %v3, %v20 ++ ++#CHECK: vfchedb %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0xea] ++#CHECK: vfchedb %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x32,0xea] ++#CHECK: vfchedb %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xea] ++#CHECK: vfchedb %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xea] ++#CHECK: vfchedb %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x3a,0xea] ++ ++ vfchedb %v0, %v0, %v0 ++ vfchedb %v0, %v0, %v31 ++ vfchedb %v0, %v31, %v0 ++ vfchedb %v31, %v0, %v0 ++ vfchedb %v18, %v3, %v20 ++ ++#CHECK: vfchedbs %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x10,0x30,0xea] ++#CHECK: vfchedbs %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x10,0x32,0xea] ++#CHECK: vfchedbs %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x10,0x34,0xea] ++#CHECK: vfchedbs %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x10,0x38,0xea] ++#CHECK: vfchedbs %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x10,0x3a,0xea] ++ ++ vfchedbs %v0, %v0, %v0 ++ vfchedbs %v0, %v0, %v31 ++ vfchedbs %v0, %v31, %v0 ++ vfchedbs %v31, %v0, %v0 ++ vfchedbs %v18, %v3, %v20 ++ ++#CHECK: vfddb %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0xe5] ++#CHECK: vfddb %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x32,0xe5] ++#CHECK: vfddb %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xe5] ++#CHECK: vfddb %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xe5] ++#CHECK: vfddb %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x3a,0xe5] ++ ++ vfddb %v0, %v0, %v0 ++ vfddb %v0, %v0, %v31 ++ vfddb %v0, %v31, %v0 ++ vfddb %v31, %v0, %v0 ++ vfddb %v18, %v3, %v20 ++ ++#CHECK: vfeeb %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x80] ++#CHECK: vfeeb %v0, %v0, %v15 # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x80] ++#CHECK: vfeeb %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0x80] ++#CHECK: vfeeb %v0, %v15, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x00,0x80] ++#CHECK: vfeeb %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x80] ++#CHECK: vfeeb %v15, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x80] ++#CHECK: vfeeb %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x80] ++#CHECK: vfeeb %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x0a,0x80] ++#CHECK: vfeebs %v5, %v22, %v7 # encoding: [0xe7,0x56,0x70,0x10,0x04,0x80] ++#CHECK: vfeezb %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x20,0x0a,0x80] ++#CHECK: vfeezbs %v5, %v22, %v7 # encoding: [0xe7,0x56,0x70,0x30,0x04,0x80] ++ ++ vfeeb %v0, %v0, %v0 ++ vfeeb %v0, %v0, %v15 ++ vfeeb %v0, %v0, %v31 ++ vfeeb %v0, %v15, %v0 ++ vfeeb %v0, %v31, %v0 ++ vfeeb %v15, %v0, %v0 ++ vfeeb %v31, %v0, %v0 ++ vfeeb %v18, %v3, %v20 ++ vfeebs %v5, %v22, %v7 ++ vfeezb %v18, %v3, %v20 ++ vfeezbs %v5, %v22, %v7 ++ ++#CFECK: vfeef %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0x80] ++#CFECK: vfeef %v0, %v0, %v15 # encoding: [0xe7,0x00,0xf0,0x00,0x20,0x80] ++#CFECK: vfeef %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x22,0x80] ++#CFECK: vfeef %v0, %v15, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x20,0x80] ++#CFECK: vfeef %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0x80] ++#CFECK: vfeef %v15, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x20,0x80] ++#CFECK: vfeef %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0x80] ++#CFECK: vfeef %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x2a,0x80] ++#CFECK: vfeefs %v5, %v22, %v7 # encoding: [0xe7,0x56,0x70,0x10,0x24,0x80] ++#CFECK: vfeezf %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x20,0x2a,0x80] ++#CFECK: vfeezfs %v5, %v22, %v7 # encoding: [0xe7,0x56,0x70,0x30,0x24,0x80] ++ ++ vfeef %v0, %v0, %v0 ++ vfeef %v0, %v0, %v15 ++ vfeef %v0, %v0, %v31 ++ vfeef %v0, %v15, %v0 ++ vfeef %v0, %v31, %v0 ++ vfeef %v15, %v0, %v0 ++ vfeef %v31, %v0, %v0 ++ vfeef %v18, %v3, %v20 ++ vfeefs %v5, %v22, %v7 ++ vfeezf %v18, %v3, %v20 ++ vfeezfs %v5, %v22, %v7 ++ ++#CHECK: vfeeh %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0x80] ++#CHECK: vfeeh %v0, %v0, %v15 # encoding: [0xe7,0x00,0xf0,0x00,0x10,0x80] ++#CHECK: vfeeh %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x12,0x80] ++#CHECK: vfeeh %v0, %v15, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x10,0x80] ++#CHECK: vfeeh %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0x80] ++#CHECK: vfeeh %v15, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x10,0x80] ++#CHECK: vfeeh %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0x80] ++#CHECK: vfeeh %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x1a,0x80] ++#CHECK: vfeehs %v5, %v22, %v7 # encoding: [0xe7,0x56,0x70,0x10,0x14,0x80] ++#CHECK: vfeezh %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x20,0x1a,0x80] ++#CHECK: vfeezhs %v5, %v22, %v7 # encoding: [0xe7,0x56,0x70,0x30,0x14,0x80] ++ ++ vfeeh %v0, %v0, %v0 ++ vfeeh %v0, %v0, %v15 ++ vfeeh %v0, %v0, %v31 ++ vfeeh %v0, %v15, %v0 ++ vfeeh %v0, %v31, %v0 ++ vfeeh %v15, %v0, %v0 ++ vfeeh %v31, %v0, %v0 ++ vfeeh %v18, %v3, %v20 ++ vfeehs %v5, %v22, %v7 ++ vfeezh %v18, %v3, %v20 ++ vfeezhs %v5, %v22, %v7 ++ ++#CHECK: vfeneb %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x81] ++#CHECK: vfeneb %v0, %v0, %v15 # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x81] ++#CHECK: vfeneb %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0x81] ++#CHECK: vfeneb %v0, %v15, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x00,0x81] ++#CHECK: vfeneb %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x81] ++#CHECK: vfeneb %v15, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x81] ++#CHECK: vfeneb %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x81] ++#CHECK: vfeneb %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x0a,0x81] ++#CHECK: vfenebs %v5, %v22, %v7 # encoding: [0xe7,0x56,0x70,0x10,0x04,0x81] ++#CHECK: vfenezb %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x20,0x0a,0x81] ++#CHECK: vfenezbs %v5, %v22, %v7 # encoding: [0xe7,0x56,0x70,0x30,0x04,0x81] ++ ++ vfeneb %v0, %v0, %v0 ++ vfeneb %v0, %v0, %v15 ++ vfeneb %v0, %v0, %v31 ++ vfeneb %v0, %v15, %v0 ++ vfeneb %v0, %v31, %v0 ++ vfeneb %v15, %v0, %v0 ++ vfeneb %v31, %v0, %v0 ++ vfeneb %v18, %v3, %v20 ++ vfenebs %v5, %v22, %v7 ++ vfenezb %v18, %v3, %v20 ++ vfenezbs %v5, %v22, %v7 ++ ++#CFECK: vfenef %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0x81] ++#CFECK: vfenef %v0, %v0, %v15 # encoding: [0xe7,0x00,0xf0,0x00,0x20,0x81] ++#CFECK: vfenef %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x22,0x81] ++#CFECK: vfenef %v0, %v15, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x20,0x81] ++#CFECK: vfenef %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0x81] ++#CFECK: vfenef %v15, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x20,0x81] ++#CFECK: vfenef %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0x81] ++#CFECK: vfenef %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x2a,0x81] ++#CFECK: vfenefs %v5, %v22, %v7 # encoding: [0xe7,0x56,0x70,0x10,0x24,0x81] ++#CFECK: vfenezf %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x20,0x2a,0x81] ++#CFECK: vfenezfs %v5, %v22, %v7 # encoding: [0xe7,0x56,0x70,0x30,0x24,0x81] ++ ++ vfenef %v0, %v0, %v0 ++ vfenef %v0, %v0, %v15 ++ vfenef %v0, %v0, %v31 ++ vfenef %v0, %v15, %v0 ++ vfenef %v0, %v31, %v0 ++ vfenef %v15, %v0, %v0 ++ vfenef %v31, %v0, %v0 ++ vfenef %v18, %v3, %v20 ++ vfenefs %v5, %v22, %v7 ++ vfenezf %v18, %v3, %v20 ++ vfenezfs %v5, %v22, %v7 ++ ++#CHECK: vfeneh %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0x81] ++#CHECK: vfeneh %v0, %v0, %v15 # encoding: [0xe7,0x00,0xf0,0x00,0x10,0x81] ++#CHECK: vfeneh %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x12,0x81] ++#CHECK: vfeneh %v0, %v15, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x10,0x81] ++#CHECK: vfeneh %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0x81] ++#CHECK: vfeneh %v15, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x10,0x81] ++#CHECK: vfeneh %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0x81] ++#CHECK: vfeneh %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x1a,0x81] ++#CHECK: vfenehs %v5, %v22, %v7 # encoding: [0xe7,0x56,0x70,0x10,0x14,0x81] ++#CHECK: vfenezh %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x20,0x1a,0x81] ++#CHECK: vfenezhs %v5, %v22, %v7 # encoding: [0xe7,0x56,0x70,0x30,0x14,0x81] ++ ++ vfeneh %v0, %v0, %v0 ++ vfeneh %v0, %v0, %v15 ++ vfeneh %v0, %v0, %v31 ++ vfeneh %v0, %v15, %v0 ++ vfeneh %v0, %v31, %v0 ++ vfeneh %v15, %v0, %v0 ++ vfeneh %v31, %v0, %v0 ++ vfeneh %v18, %v3, %v20 ++ vfenehs %v5, %v22, %v7 ++ vfenezh %v18, %v3, %v20 ++ vfenezhs %v5, %v22, %v7 ++ ++#CHECK: vfidb %v0, %v0, 0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0xc7] ++#CHECK: vfidb %v0, %v0, 0, 15 # encoding: [0xe7,0x00,0x00,0xf0,0x30,0xc7] ++#CHECK: vfidb %v0, %v0, 4, 0 # encoding: [0xe7,0x00,0x00,0x04,0x30,0xc7] ++#CHECK: vfidb %v0, %v0, 12, 0 # encoding: [0xe7,0x00,0x00,0x0c,0x30,0xc7] ++#CHECK: vfidb %v0, %v31, 0, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xc7] ++#CHECK: vfidb %v31, %v0, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xc7] ++#CHECK: vfidb %v14, %v17, 4, 10 # encoding: [0xe7,0xe1,0x00,0xa4,0x34,0xc7] ++ ++ vfidb %v0, %v0, 0, 0 ++ vfidb %v0, %v0, 0, 15 ++ vfidb %v0, %v0, 4, 0 ++ vfidb %v0, %v0, 12, 0 ++ vfidb %v0, %v31, 0, 0 ++ vfidb %v31, %v0, 0, 0 ++ vfidb %v14, %v17, 4, 10 ++ ++#CHECK: vistrb %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x5c] ++#CHECK: vistrb %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x00,0x5c] ++#CHECK: vistrb %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x5c] ++#CHECK: vistrb %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x5c] ++#CHECK: vistrb %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x5c] ++#CHECK: vistrb %v18, %v3 # encoding: [0xe7,0x23,0x00,0x00,0x08,0x5c] ++#CHECK: vistrbs %v5, %v22 # encoding: [0xe7,0x56,0x00,0x10,0x04,0x5c] ++ ++ vistrb %v0, %v0 ++ vistrb %v0, %v15 ++ vistrb %v0, %v31 ++ vistrb %v15, %v0 ++ vistrb %v31, %v0 ++ vistrb %v18, %v3 ++ vistrbs %v5, %v22 ++ ++#CBECK: vistrf %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0x5c] ++#CBECK: vistrf %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x20,0x5c] ++#CBECK: vistrf %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0x5c] ++#CBECK: vistrf %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x20,0x5c] ++#CBECK: vistrf %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0x5c] ++#CBECK: vistrf %v18, %v3 # encoding: [0xe7,0x23,0x00,0x00,0x28,0x5c] ++#CBECK: vistrfs %v5, %v22 # encoding: [0xe7,0x56,0x00,0x10,0x24,0x5c] ++ ++ vistrf %v0, %v0 ++ vistrf %v0, %v15 ++ vistrf %v0, %v31 ++ vistrf %v15, %v0 ++ vistrf %v31, %v0 ++ vistrf %v18, %v3 ++ vistrfs %v5, %v22 ++ ++#CHECK: vistrh %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0x5c] ++#CHECK: vistrh %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x10,0x5c] ++#CHECK: vistrh %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0x5c] ++#CHECK: vistrh %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x10,0x5c] ++#CHECK: vistrh %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0x5c] ++#CHECK: vistrh %v18, %v3 # encoding: [0xe7,0x23,0x00,0x00,0x18,0x5c] ++#CHECK: vistrhs %v5, %v22 # encoding: [0xe7,0x56,0x00,0x10,0x14,0x5c] ++ ++ vistrh %v0, %v0 ++ vistrh %v0, %v15 ++ vistrh %v0, %v31 ++ vistrh %v15, %v0 ++ vistrh %v31, %v0 ++ vistrh %v18, %v3 ++ vistrhs %v5, %v22 ++ ++#CHECK: vflcdb %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0xcc] ++#CHECK: vflcdb %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x30,0xcc] ++#CHECK: vflcdb %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xcc] ++#CHECK: vflcdb %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x30,0xcc] ++#CHECK: vflcdb %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xcc] ++#CHECK: vflcdb %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x34,0xcc] ++ ++ vflcdb %v0, %v0 ++ vflcdb %v0, %v15 ++ vflcdb %v0, %v31 ++ vflcdb %v15, %v0 ++ vflcdb %v31, %v0 ++ vflcdb %v14, %v17 ++ ++#CHECK: vflndb %v0, %v0 # encoding: [0xe7,0x00,0x00,0x10,0x30,0xcc] ++#CHECK: vflndb %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x10,0x30,0xcc] ++#CHECK: vflndb %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x10,0x34,0xcc] ++#CHECK: vflndb %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x10,0x30,0xcc] ++#CHECK: vflndb %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x10,0x38,0xcc] ++#CHECK: vflndb %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x10,0x34,0xcc] ++ ++ vflndb %v0, %v0 ++ vflndb %v0, %v15 ++ vflndb %v0, %v31 ++ vflndb %v15, %v0 ++ vflndb %v31, %v0 ++ vflndb %v14, %v17 ++ ++#CHECK: vflpdb %v0, %v0 # encoding: [0xe7,0x00,0x00,0x20,0x30,0xcc] ++#CHECK: vflpdb %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x20,0x30,0xcc] ++#CHECK: vflpdb %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x20,0x34,0xcc] ++#CHECK: vflpdb %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x20,0x30,0xcc] ++#CHECK: vflpdb %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x20,0x38,0xcc] ++#CHECK: vflpdb %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x20,0x34,0xcc] ++ ++ vflpdb %v0, %v0 ++ vflpdb %v0, %v15 ++ vflpdb %v0, %v31 ++ vflpdb %v15, %v0 ++ vflpdb %v31, %v0 ++ vflpdb %v14, %v17 ++ ++#CHECK: vfmadb %v0, %v0, %v0, %v0 # encoding: [0xe7,0x00,0x03,0x00,0x00,0x8f] ++#CHECK: vfmadb %v0, %v0, %v0, %v31 # encoding: [0xe7,0x00,0x03,0x00,0xf1,0x8f] ++#CHECK: vfmadb %v0, %v0, %v31, %v0 # encoding: [0xe7,0x00,0xf3,0x00,0x02,0x8f] ++#CHECK: vfmadb %v0, %v31, %v0, %v0 # encoding: [0xe7,0x0f,0x03,0x00,0x04,0x8f] ++#CHECK: vfmadb %v31, %v0, %v0, %v0 # encoding: [0xe7,0xf0,0x03,0x00,0x08,0x8f] ++#CHECK: vfmadb %v13, %v17, %v21, %v25 # encoding: [0xe7,0xd1,0x53,0x00,0x97,0x8f] ++ ++ vfmadb %v0, %v0, %v0, %v0 ++ vfmadb %v0, %v0, %v0, %v31 ++ vfmadb %v0, %v0, %v31, %v0 ++ vfmadb %v0, %v31, %v0, %v0 ++ vfmadb %v31, %v0, %v0, %v0 ++ vfmadb %v13, %v17, %v21, %v25 ++ ++#CHECK: vfmdb %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0xe7] ++#CHECK: vfmdb %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x32,0xe7] ++#CHECK: vfmdb %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xe7] ++#CHECK: vfmdb %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xe7] ++#CHECK: vfmdb %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x3a,0xe7] ++ ++ vfmdb %v0, %v0, %v0 ++ vfmdb %v0, %v0, %v31 ++ vfmdb %v0, %v31, %v0 ++ vfmdb %v31, %v0, %v0 ++ vfmdb %v18, %v3, %v20 ++ ++#CHECK: vfmsdb %v0, %v0, %v0, %v0 # encoding: [0xe7,0x00,0x03,0x00,0x00,0x8e] ++#CHECK: vfmsdb %v0, %v0, %v0, %v31 # encoding: [0xe7,0x00,0x03,0x00,0xf1,0x8e] ++#CHECK: vfmsdb %v0, %v0, %v31, %v0 # encoding: [0xe7,0x00,0xf3,0x00,0x02,0x8e] ++#CHECK: vfmsdb %v0, %v31, %v0, %v0 # encoding: [0xe7,0x0f,0x03,0x00,0x04,0x8e] ++#CHECK: vfmsdb %v31, %v0, %v0, %v0 # encoding: [0xe7,0xf0,0x03,0x00,0x08,0x8e] ++#CHECK: vfmsdb %v13, %v17, %v21, %v25 # encoding: [0xe7,0xd1,0x53,0x00,0x97,0x8e] ++ ++ vfmsdb %v0, %v0, %v0, %v0 ++ vfmsdb %v0, %v0, %v0, %v31 ++ vfmsdb %v0, %v0, %v31, %v0 ++ vfmsdb %v0, %v31, %v0, %v0 ++ vfmsdb %v31, %v0, %v0, %v0 ++ vfmsdb %v13, %v17, %v21, %v25 ++ ++#CHECK: vfsdb %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0xe2] ++#CHECK: vfsdb %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x32,0xe2] ++#CHECK: vfsdb %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xe2] ++#CHECK: vfsdb %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xe2] ++#CHECK: vfsdb %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x3a,0xe2] ++ ++ vfsdb %v0, %v0, %v0 ++ vfsdb %v0, %v0, %v31 ++ vfsdb %v0, %v31, %v0 ++ vfsdb %v31, %v0, %v0 ++ vfsdb %v18, %v3, %v20 ++ ++#CHECK: vfsqdb %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0xce] ++#CHECK: vfsqdb %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x30,0xce] ++#CHECK: vfsqdb %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xce] ++#CHECK: vfsqdb %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x30,0xce] ++#CHECK: vfsqdb %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xce] ++#CHECK: vfsqdb %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x34,0xce] ++ ++ vfsqdb %v0, %v0 ++ vfsqdb %v0, %v15 ++ vfsqdb %v0, %v31 ++ vfsqdb %v15, %v0 ++ vfsqdb %v31, %v0 ++ vfsqdb %v14, %v17 ++ ++#CHECK: vftcidb %v0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0x4a] ++#CHECK: vftcidb %v0, %v0, 4095 # encoding: [0xe7,0x00,0xff,0xf0,0x30,0x4a] ++#CHECK: vftcidb %v0, %v15, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x30,0x4a] ++#CHECK: vftcidb %v0, %v31, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0x4a] ++#CHECK: vftcidb %v15, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x30,0x4a] ++#CHECK: vftcidb %v31, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0x4a] ++#CHECK: vftcidb %v4, %v21, 1656 # encoding: [0xe7,0x45,0x67,0x80,0x34,0x4a] ++ ++ vftcidb %v0, %v0, 0 ++ vftcidb %v0, %v0, 4095 ++ vftcidb %v0, %v15, 0 ++ vftcidb %v0, %v31, 0 ++ vftcidb %v15, %v0, 0 ++ vftcidb %v31, %v0, 0 ++ vftcidb %v4, %v21, 0x678 ++ ++#CHECK: vgbm %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x44] ++#CHECK: vgbm %v0, 65535 # encoding: [0xe7,0x00,0xff,0xff,0x00,0x44] ++#CHECK: vgbm %v15, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x44] ++#CHECK: vgbm %v31, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x44] ++#CHECK: vgbm %v17, 4660 # encoding: [0xe7,0x10,0x12,0x34,0x08,0x44] ++ ++ vgbm %v0, 0 ++ vgbm %v0, 0xffff ++ vgbm %v15, 0 ++ vgbm %v31, 0 ++ vgbm %v17, 0x1234 ++ ++#CHECK: vgef %v0, 0(%v0), 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x13] ++#CHECK: vgef %v0, 0(%v0,%r1), 0 # encoding: [0xe7,0x00,0x10,0x00,0x00,0x13] ++#CHECK: vgef %v0, 0(%v0,%r1), 3 # encoding: [0xe7,0x00,0x10,0x00,0x30,0x13] ++#CHECK: vgef %v0, 0(%v0,%r15), 0 # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x13] ++#CHECK: vgef %v0, 0(%v15,%r1), 0 # encoding: [0xe7,0x0f,0x10,0x00,0x00,0x13] ++#CHECK: vgef %v0, 0(%v31,%r1), 0 # encoding: [0xe7,0x0f,0x10,0x00,0x04,0x13] ++#CHECK: vgef %v0, 4095(%v0,%r1), 0 # encoding: [0xe7,0x00,0x1f,0xff,0x00,0x13] ++#CHECK: vgef %v15, 0(%v0,%r1), 0 # encoding: [0xe7,0xf0,0x10,0x00,0x00,0x13] ++#CHECK: vgef %v31, 0(%v0,%r1), 0 # encoding: [0xe7,0xf0,0x10,0x00,0x08,0x13] ++#CHECK: vgef %v10, 1000(%v19,%r7), 1 # encoding: [0xe7,0xa3,0x73,0xe8,0x14,0x13] ++ ++ vgef %v0, 0(%v0), 0 ++ vgef %v0, 0(%v0,%r1), 0 ++ vgef %v0, 0(%v0,%r1), 3 ++ vgef %v0, 0(%v0,%r15), 0 ++ vgef %v0, 0(%v15,%r1), 0 ++ vgef %v0, 0(%v31,%r1), 0 ++ vgef %v0, 4095(%v0, %r1), 0 ++ vgef %v15, 0(%v0,%r1), 0 ++ vgef %v31, 0(%v0,%r1), 0 ++ vgef %v10, 1000(%v19,%r7), 1 ++ ++#CHECK: vgeg %v0, 0(%v0), 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x12] ++#CHECK: vgeg %v0, 0(%v0,%r1), 0 # encoding: [0xe7,0x00,0x10,0x00,0x00,0x12] ++#CHECK: vgeg %v0, 0(%v0,%r1), 1 # encoding: [0xe7,0x00,0x10,0x00,0x10,0x12] ++#CHECK: vgeg %v0, 0(%v0,%r15), 0 # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x12] ++#CHECK: vgeg %v0, 0(%v15,%r1), 0 # encoding: [0xe7,0x0f,0x10,0x00,0x00,0x12] ++#CHECK: vgeg %v0, 0(%v31,%r1), 0 # encoding: [0xe7,0x0f,0x10,0x00,0x04,0x12] ++#CHECK: vgeg %v0, 4095(%v0,%r1), 0 # encoding: [0xe7,0x00,0x1f,0xff,0x00,0x12] ++#CHECK: vgeg %v15, 0(%v0,%r1), 0 # encoding: [0xe7,0xf0,0x10,0x00,0x00,0x12] ++#CHECK: vgeg %v31, 0(%v0,%r1), 0 # encoding: [0xe7,0xf0,0x10,0x00,0x08,0x12] ++#CHECK: vgeg %v10, 1000(%v19,%r7), 1 # encoding: [0xe7,0xa3,0x73,0xe8,0x14,0x12] ++ ++ vgeg %v0, 0(%v0), 0 ++ vgeg %v0, 0(%v0,%r1), 0 ++ vgeg %v0, 0(%v0,%r1), 1 ++ vgeg %v0, 0(%v0,%r15), 0 ++ vgeg %v0, 0(%v15,%r1), 0 ++ vgeg %v0, 0(%v31,%r1), 0 ++ vgeg %v0, 4095(%v0,%r1), 0 ++ vgeg %v15, 0(%v0,%r1), 0 ++ vgeg %v31, 0(%v0,%r1), 0 ++ vgeg %v10, 1000(%v19,%r7), 1 ++ ++#CHECK: vgfmab %v0, %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0xbc] ++#CHECK: vgfmab %v0, %v0, %v0, %v31 # encoding: [0xe7,0x00,0x00,0x00,0xf1,0xbc] ++#CHECK: vgfmab %v0, %v0, %v31, %v0 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0xbc] ++#CHECK: vgfmab %v0, %v31, %v0, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xbc] ++#CHECK: vgfmab %v31, %v0, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xbc] ++#CHECK: vgfmab %v13, %v17, %v21, %v25 # encoding: [0xe7,0xd1,0x50,0x00,0x97,0xbc] ++ ++ vgfmab %v0, %v0, %v0, %v0 ++ vgfmab %v0, %v0, %v0, %v31 ++ vgfmab %v0, %v0, %v31, %v0 ++ vgfmab %v0, %v31, %v0, %v0 ++ vgfmab %v31, %v0, %v0, %v0 ++ vgfmab %v13, %v17, %v21, %v25 ++ ++#CHECK: vgfmaf %v0, %v0, %v0, %v0 # encoding: [0xe7,0x00,0x02,0x00,0x00,0xbc] ++#CHECK: vgfmaf %v0, %v0, %v0, %v31 # encoding: [0xe7,0x00,0x02,0x00,0xf1,0xbc] ++#CHECK: vgfmaf %v0, %v0, %v31, %v0 # encoding: [0xe7,0x00,0xf2,0x00,0x02,0xbc] ++#CHECK: vgfmaf %v0, %v31, %v0, %v0 # encoding: [0xe7,0x0f,0x02,0x00,0x04,0xbc] ++#CHECK: vgfmaf %v31, %v0, %v0, %v0 # encoding: [0xe7,0xf0,0x02,0x00,0x08,0xbc] ++#CHECK: vgfmaf %v13, %v17, %v21, %v25 # encoding: [0xe7,0xd1,0x52,0x00,0x97,0xbc] ++ ++ vgfmaf %v0, %v0, %v0, %v0 ++ vgfmaf %v0, %v0, %v0, %v31 ++ vgfmaf %v0, %v0, %v31, %v0 ++ vgfmaf %v0, %v31, %v0, %v0 ++ vgfmaf %v31, %v0, %v0, %v0 ++ vgfmaf %v13, %v17, %v21, %v25 ++ ++#CHECK: vgfmag %v0, %v0, %v0, %v0 # encoding: [0xe7,0x00,0x03,0x00,0x00,0xbc] ++#CHECK: vgfmag %v0, %v0, %v0, %v31 # encoding: [0xe7,0x00,0x03,0x00,0xf1,0xbc] ++#CHECK: vgfmag %v0, %v0, %v31, %v0 # encoding: [0xe7,0x00,0xf3,0x00,0x02,0xbc] ++#CHECK: vgfmag %v0, %v31, %v0, %v0 # encoding: [0xe7,0x0f,0x03,0x00,0x04,0xbc] ++#CHECK: vgfmag %v31, %v0, %v0, %v0 # encoding: [0xe7,0xf0,0x03,0x00,0x08,0xbc] ++#CHECK: vgfmag %v13, %v17, %v21, %v25 # encoding: [0xe7,0xd1,0x53,0x00,0x97,0xbc] ++ ++ vgfmag %v0, %v0, %v0, %v0 ++ vgfmag %v0, %v0, %v0, %v31 ++ vgfmag %v0, %v0, %v31, %v0 ++ vgfmag %v0, %v31, %v0, %v0 ++ vgfmag %v31, %v0, %v0, %v0 ++ vgfmag %v13, %v17, %v21, %v25 ++ ++#CHECK: vgfmah %v0, %v0, %v0, %v0 # encoding: [0xe7,0x00,0x01,0x00,0x00,0xbc] ++#CHECK: vgfmah %v0, %v0, %v0, %v31 # encoding: [0xe7,0x00,0x01,0x00,0xf1,0xbc] ++#CHECK: vgfmah %v0, %v0, %v31, %v0 # encoding: [0xe7,0x00,0xf1,0x00,0x02,0xbc] ++#CHECK: vgfmah %v0, %v31, %v0, %v0 # encoding: [0xe7,0x0f,0x01,0x00,0x04,0xbc] ++#CHECK: vgfmah %v31, %v0, %v0, %v0 # encoding: [0xe7,0xf0,0x01,0x00,0x08,0xbc] ++#CHECK: vgfmah %v13, %v17, %v21, %v25 # encoding: [0xe7,0xd1,0x51,0x00,0x97,0xbc] ++ ++ vgfmah %v0, %v0, %v0, %v0 ++ vgfmah %v0, %v0, %v0, %v31 ++ vgfmah %v0, %v0, %v31, %v0 ++ vgfmah %v0, %v31, %v0, %v0 ++ vgfmah %v31, %v0, %v0, %v0 ++ vgfmah %v13, %v17, %v21, %v25 ++ ++#CHECK: vgfmb %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0xb4] ++#CHECK: vgfmb %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0xb4] ++#CHECK: vgfmb %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xb4] ++#CHECK: vgfmb %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xb4] ++#CHECK: vgfmb %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x0a,0xb4] ++ ++ vgfmb %v0, %v0, %v0 ++ vgfmb %v0, %v0, %v31 ++ vgfmb %v0, %v31, %v0 ++ vgfmb %v31, %v0, %v0 ++ vgfmb %v18, %v3, %v20 ++ ++#CHECK: vgfmf %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0xb4] ++#CHECK: vgfmf %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x22,0xb4] ++#CHECK: vgfmf %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xb4] ++#CHECK: vgfmf %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xb4] ++#CHECK: vgfmf %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x2a,0xb4] ++ ++ vgfmf %v0, %v0, %v0 ++ vgfmf %v0, %v0, %v31 ++ vgfmf %v0, %v31, %v0 ++ vgfmf %v31, %v0, %v0 ++ vgfmf %v18, %v3, %v20 ++ ++#CHECK: vgfmg %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0xb4] ++#CHECK: vgfmg %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x32,0xb4] ++#CHECK: vgfmg %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xb4] ++#CHECK: vgfmg %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xb4] ++#CHECK: vgfmg %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x3a,0xb4] ++ ++ vgfmg %v0, %v0, %v0 ++ vgfmg %v0, %v0, %v31 ++ vgfmg %v0, %v31, %v0 ++ vgfmg %v31, %v0, %v0 ++ vgfmg %v18, %v3, %v20 ++ ++#CHECK: vgfmh %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0xb4] ++#CHECK: vgfmh %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x12,0xb4] ++#CHECK: vgfmh %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0xb4] ++#CHECK: vgfmh %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0xb4] ++#CHECK: vgfmh %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x1a,0xb4] ++ ++ vgfmh %v0, %v0, %v0 ++ vgfmh %v0, %v0, %v31 ++ vgfmh %v0, %v31, %v0 ++ vgfmh %v31, %v0, %v0 ++ vgfmh %v18, %v3, %v20 ++ ++#CHECK: vgmb %v0, 0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x46] ++#CHECK: vgmb %v0, 0, 255 # encoding: [0xe7,0x00,0x00,0xff,0x00,0x46] ++#CHECK: vgmb %v0, 255, 0 # encoding: [0xe7,0x00,0xff,0x00,0x00,0x46] ++#CHECK: vgmb %v15, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x46] ++#CHECK: vgmb %v31, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x46] ++#CHECK: vgmb %v21, 2, 3 # encoding: [0xe7,0x50,0x02,0x03,0x08,0x46] ++ ++ vgmb %v0, 0, 0 ++ vgmb %v0, 0, 255 ++ vgmb %v0, 255, 0 ++ vgmb %v15, 0, 0 ++ vgmb %v31, 0, 0 ++ vgmb %v21, 2, 3 ++ ++#CHECK: vgmf %v0, 0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0x46] ++#CHECK: vgmf %v0, 0, 255 # encoding: [0xe7,0x00,0x00,0xff,0x20,0x46] ++#CHECK: vgmf %v0, 255, 0 # encoding: [0xe7,0x00,0xff,0x00,0x20,0x46] ++#CHECK: vgmf %v15, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x20,0x46] ++#CHECK: vgmf %v31, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0x46] ++#CHECK: vgmf %v21, 2, 3 # encoding: [0xe7,0x50,0x02,0x03,0x28,0x46] ++ ++ vgmf %v0, 0, 0 ++ vgmf %v0, 0, 255 ++ vgmf %v0, 255, 0 ++ vgmf %v15, 0, 0 ++ vgmf %v31, 0, 0 ++ vgmf %v21, 2, 3 ++ ++#CHECK: vgmg %v0, 0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0x46] ++#CHECK: vgmg %v0, 0, 255 # encoding: [0xe7,0x00,0x00,0xff,0x30,0x46] ++#CHECK: vgmg %v0, 255, 0 # encoding: [0xe7,0x00,0xff,0x00,0x30,0x46] ++#CHECK: vgmg %v15, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x30,0x46] ++#CHECK: vgmg %v31, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0x46] ++#CHECK: vgmg %v21, 2, 3 # encoding: [0xe7,0x50,0x02,0x03,0x38,0x46] ++ ++ vgmg %v0, 0, 0 ++ vgmg %v0, 0, 255 ++ vgmg %v0, 255, 0 ++ vgmg %v15, 0, 0 ++ vgmg %v31, 0, 0 ++ vgmg %v21, 2, 3 ++ ++#CHECK: vgmh %v0, 0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0x46] ++#CHECK: vgmh %v0, 0, 255 # encoding: [0xe7,0x00,0x00,0xff,0x10,0x46] ++#CHECK: vgmh %v0, 255, 0 # encoding: [0xe7,0x00,0xff,0x00,0x10,0x46] ++#CHECK: vgmh %v15, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x10,0x46] ++#CHECK: vgmh %v31, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0x46] ++#CHECK: vgmh %v21, 2, 3 # encoding: [0xe7,0x50,0x02,0x03,0x18,0x46] ++ ++ vgmh %v0, 0, 0 ++ vgmh %v0, 0, 255 ++ vgmh %v0, 255, 0 ++ vgmh %v15, 0, 0 ++ vgmh %v31, 0, 0 ++ vgmh %v21, 2, 3 ++ ++#CHECK: vl %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x06] ++#CHECK: vl %v0, 4095 # encoding: [0xe7,0x00,0x0f,0xff,0x00,0x06] ++#CHECK: vl %v0, 0(%r15) # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x06] ++#CHECK: vl %v0, 0(%r15,%r1) # encoding: [0xe7,0x0f,0x10,0x00,0x00,0x06] ++#CHECK: vl %v15, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x06] ++#CHECK: vl %v31, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x06] ++#CHECK: vl %v18, 1383(%r3,%r4) # encoding: [0xe7,0x23,0x45,0x67,0x08,0x06] ++ ++ vl %v0, 0 ++ vl %v0, 4095 ++ vl %v0, 0(%r15) ++ vl %v0, 0(%r15,%r1) ++ vl %v15, 0 ++ vl %v31, 0 ++ vl %v18, 0x567(%r3,%r4) ++ ++#CHECK: vlbb %v0, 0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x07] ++#CHECK: vlbb %v0, 0, 15 # encoding: [0xe7,0x00,0x00,0x00,0xf0,0x07] ++#CHECK: vlbb %v0, 4095, 0 # encoding: [0xe7,0x00,0x0f,0xff,0x00,0x07] ++#CHECK: vlbb %v0, 0(%r15), 0 # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x07] ++#CHECK: vlbb %v0, 0(%r15,%r1), 0 # encoding: [0xe7,0x0f,0x10,0x00,0x00,0x07] ++#CHECK: vlbb %v15, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x07] ++#CHECK: vlbb %v31, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x07] ++#CHECK: vlbb %v18, 1383(%r3,%r4), 8 # encoding: [0xe7,0x23,0x45,0x67,0x88,0x07] ++ ++ vlbb %v0, 0, 0 ++ vlbb %v0, 0, 15 ++ vlbb %v0, 4095, 0 ++ vlbb %v0, 0(%r15), 0 ++ vlbb %v0, 0(%r15,%r1), 0 ++ vlbb %v15, 0, 0 ++ vlbb %v31, 0, 0 ++ vlbb %v18, 1383(%r3,%r4), 8 ++ ++#CHECK: vlcb %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0xde] ++#CHECK: vlcb %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x00,0xde] ++#CHECK: vlcb %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xde] ++#CHECK: vlcb %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0xde] ++#CHECK: vlcb %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xde] ++#CHECK: vlcb %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x04,0xde] ++ ++ vlcb %v0, %v0 ++ vlcb %v0, %v15 ++ vlcb %v0, %v31 ++ vlcb %v15, %v0 ++ vlcb %v31, %v0 ++ vlcb %v14, %v17 ++ ++#CHECK: vlcf %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0xde] ++#CHECK: vlcf %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x20,0xde] ++#CHECK: vlcf %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xde] ++#CHECK: vlcf %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x20,0xde] ++#CHECK: vlcf %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xde] ++#CHECK: vlcf %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x24,0xde] ++ ++ vlcf %v0, %v0 ++ vlcf %v0, %v15 ++ vlcf %v0, %v31 ++ vlcf %v15, %v0 ++ vlcf %v31, %v0 ++ vlcf %v14, %v17 ++ ++#CHECK: vlcg %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0xde] ++#CHECK: vlcg %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x30,0xde] ++#CHECK: vlcg %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xde] ++#CHECK: vlcg %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x30,0xde] ++#CHECK: vlcg %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xde] ++#CHECK: vlcg %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x34,0xde] ++ ++ vlcg %v0, %v0 ++ vlcg %v0, %v15 ++ vlcg %v0, %v31 ++ vlcg %v15, %v0 ++ vlcg %v31, %v0 ++ vlcg %v14, %v17 ++ ++#CHECK: vlch %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0xde] ++#CHECK: vlch %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x10,0xde] ++#CHECK: vlch %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0xde] ++#CHECK: vlch %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x10,0xde] ++#CHECK: vlch %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0xde] ++#CHECK: vlch %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x14,0xde] ++ ++ vlch %v0, %v0 ++ vlch %v0, %v15 ++ vlch %v0, %v31 ++ vlch %v15, %v0 ++ vlch %v31, %v0 ++ vlch %v14, %v17 ++ ++#CHECK: vldeb %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0xc4] ++#CHECK: vldeb %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x20,0xc4] ++#CHECK: vldeb %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xc4] ++#CHECK: vldeb %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x20,0xc4] ++#CHECK: vldeb %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xc4] ++#CHECK: vldeb %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x24,0xc4] ++ ++ vldeb %v0, %v0 ++ vldeb %v0, %v15 ++ vldeb %v0, %v31 ++ vldeb %v15, %v0 ++ vldeb %v31, %v0 ++ vldeb %v14, %v17 ++ ++#CHECK: vleb %v0, 0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x00] ++#CHECK: vleb %v0, 0, 15 # encoding: [0xe7,0x00,0x00,0x00,0xf0,0x00] ++#CHECK: vleb %v0, 4095, 0 # encoding: [0xe7,0x00,0x0f,0xff,0x00,0x00] ++#CHECK: vleb %v0, 0(%r15), 0 # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x00] ++#CHECK: vleb %v0, 0(%r15,%r1), 0 # encoding: [0xe7,0x0f,0x10,0x00,0x00,0x00] ++#CHECK: vleb %v15, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x00] ++#CHECK: vleb %v31, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x00] ++#CHECK: vleb %v18, 1383(%r3,%r4), 8 # encoding: [0xe7,0x23,0x45,0x67,0x88,0x00] ++ ++ vleb %v0, 0, 0 ++ vleb %v0, 0, 15 ++ vleb %v0, 4095, 0 ++ vleb %v0, 0(%r15), 0 ++ vleb %v0, 0(%r15,%r1), 0 ++ vleb %v15, 0, 0 ++ vleb %v31, 0, 0 ++ vleb %v18, 1383(%r3,%r4), 8 ++ ++#CHECK: vledb %v0, %v0, 0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0xc5] ++#CHECK: vledb %v0, %v0, 0, 15 # encoding: [0xe7,0x00,0x00,0xf0,0x30,0xc5] ++#CHECK: vledb %v0, %v0, 4, 0 # encoding: [0xe7,0x00,0x00,0x04,0x30,0xc5] ++#CHECK: vledb %v0, %v0, 12, 0 # encoding: [0xe7,0x00,0x00,0x0c,0x30,0xc5] ++#CHECK: vledb %v0, %v31, 0, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xc5] ++#CHECK: vledb %v31, %v0, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xc5] ++#CHECK: vledb %v14, %v17, 4, 10 # encoding: [0xe7,0xe1,0x00,0xa4,0x34,0xc5] ++ ++ vledb %v0, %v0, 0, 0 ++ vledb %v0, %v0, 0, 15 ++ vledb %v0, %v0, 4, 0 ++ vledb %v0, %v0, 12, 0 ++ vledb %v0, %v31, 0, 0 ++ vledb %v31, %v0, 0, 0 ++ vledb %v14, %v17, 4, 10 ++ ++#CHECK: vlef %v0, 0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x03] ++#CHECK: vlef %v0, 0, 3 # encoding: [0xe7,0x00,0x00,0x00,0x30,0x03] ++#CHECK: vlef %v0, 4095, 0 # encoding: [0xe7,0x00,0x0f,0xff,0x00,0x03] ++#CHECK: vlef %v0, 0(%r15), 0 # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x03] ++#CHECK: vlef %v0, 0(%r15,%r1), 0 # encoding: [0xe7,0x0f,0x10,0x00,0x00,0x03] ++#CHECK: vlef %v15, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x03] ++#CHECK: vlef %v31, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x03] ++#CHECK: vlef %v18, 1383(%r3,%r4), 2 # encoding: [0xe7,0x23,0x45,0x67,0x28,0x03] ++ ++ vlef %v0, 0, 0 ++ vlef %v0, 0, 3 ++ vlef %v0, 4095, 0 ++ vlef %v0, 0(%r15), 0 ++ vlef %v0, 0(%r15,%r1), 0 ++ vlef %v15, 0, 0 ++ vlef %v31, 0, 0 ++ vlef %v18, 1383(%r3,%r4), 2 ++ ++#CHECK: vleg %v0, 0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x02] ++#CHECK: vleg %v0, 0, 1 # encoding: [0xe7,0x00,0x00,0x00,0x10,0x02] ++#CHECK: vleg %v0, 4095, 0 # encoding: [0xe7,0x00,0x0f,0xff,0x00,0x02] ++#CHECK: vleg %v0, 0(%r15), 0 # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x02] ++#CHECK: vleg %v0, 0(%r15,%r1), 0 # encoding: [0xe7,0x0f,0x10,0x00,0x00,0x02] ++#CHECK: vleg %v15, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x02] ++#CHECK: vleg %v31, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x02] ++#CHECK: vleg %v18, 1383(%r3,%r4), 1 # encoding: [0xe7,0x23,0x45,0x67,0x18,0x02] ++ ++ vleg %v0, 0, 0 ++ vleg %v0, 0, 1 ++ vleg %v0, 4095, 0 ++ vleg %v0, 0(%r15), 0 ++ vleg %v0, 0(%r15,%r1), 0 ++ vleg %v15, 0, 0 ++ vleg %v31, 0, 0 ++ vleg %v18, 1383(%r3,%r4), 1 ++ ++#CHECK: vleh %v0, 0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x01] ++#CHECK: vleh %v0, 0, 7 # encoding: [0xe7,0x00,0x00,0x00,0x70,0x01] ++#CHECK: vleh %v0, 4095, 0 # encoding: [0xe7,0x00,0x0f,0xff,0x00,0x01] ++#CHECK: vleh %v0, 0(%r15), 0 # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x01] ++#CHECK: vleh %v0, 0(%r15,%r1), 0 # encoding: [0xe7,0x0f,0x10,0x00,0x00,0x01] ++#CHECK: vleh %v15, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x01] ++#CHECK: vleh %v31, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x01] ++#CHECK: vleh %v18, 1383(%r3,%r4), 4 # encoding: [0xe7,0x23,0x45,0x67,0x48,0x01] ++ ++ vleh %v0, 0, 0 ++ vleh %v0, 0, 7 ++ vleh %v0, 4095, 0 ++ vleh %v0, 0(%r15), 0 ++ vleh %v0, 0(%r15,%r1), 0 ++ vleh %v15, 0, 0 ++ vleh %v31, 0, 0 ++ vleh %v18, 1383(%r3,%r4), 4 ++ ++#CHECK: vleib %v0, 0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x40] ++#CHECK: vleib %v0, 0, 15 # encoding: [0xe7,0x00,0x00,0x00,0xf0,0x40] ++#CHECK: vleib %v0, -32768, 0 # encoding: [0xe7,0x00,0x80,0x00,0x00,0x40] ++#CHECK: vleib %v0, 32767, 0 # encoding: [0xe7,0x00,0x7f,0xff,0x00,0x40] ++#CHECK: vleib %v15, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x40] ++#CHECK: vleib %v31, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x40] ++#CHECK: vleib %v18, 13398, 11 # encoding: [0xe7,0x20,0x34,0x56,0xb8,0x40] ++ ++ vleib %v0, 0, 0 ++ vleib %v0, 0, 15 ++ vleib %v0, -32768, 0 ++ vleib %v0, 32767, 0 ++ vleib %v15, 0, 0 ++ vleib %v31, 0, 0 ++ vleib %v18, 0x3456, 11 ++ ++#CHECK: vleif %v0, 0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x43] ++#CHECK: vleif %v0, 0, 3 # encoding: [0xe7,0x00,0x00,0x00,0x30,0x43] ++#CHECK: vleif %v0, -32768, 0 # encoding: [0xe7,0x00,0x80,0x00,0x00,0x43] ++#CHECK: vleif %v0, 32767, 0 # encoding: [0xe7,0x00,0x7f,0xff,0x00,0x43] ++#CHECK: vleif %v15, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x43] ++#CHECK: vleif %v31, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x43] ++#CHECK: vleif %v18, 13398, 3 # encoding: [0xe7,0x20,0x34,0x56,0x38,0x43] ++ ++ vleif %v0, 0, 0 ++ vleif %v0, 0, 3 ++ vleif %v0, -32768, 0 ++ vleif %v0, 32767, 0 ++ vleif %v15, 0, 0 ++ vleif %v31, 0, 0 ++ vleif %v18, 0x3456, 3 ++ ++#CHECK: vleig %v0, 0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x42] ++#CHECK: vleig %v0, 0, 1 # encoding: [0xe7,0x00,0x00,0x00,0x10,0x42] ++#CHECK: vleig %v0, -32768, 0 # encoding: [0xe7,0x00,0x80,0x00,0x00,0x42] ++#CHECK: vleig %v0, 32767, 0 # encoding: [0xe7,0x00,0x7f,0xff,0x00,0x42] ++#CHECK: vleig %v15, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x42] ++#CHECK: vleig %v31, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x42] ++#CHECK: vleig %v18, 13398, 1 # encoding: [0xe7,0x20,0x34,0x56,0x18,0x42] ++ ++ vleig %v0, 0, 0 ++ vleig %v0, 0, 1 ++ vleig %v0, -32768, 0 ++ vleig %v0, 32767, 0 ++ vleig %v15, 0, 0 ++ vleig %v31, 0, 0 ++ vleig %v18, 0x3456, 1 ++ ++#CHECK: vleih %v0, 0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x41] ++#CHECK: vleih %v0, 0, 7 # encoding: [0xe7,0x00,0x00,0x00,0x70,0x41] ++#CHECK: vleih %v0, -32768, 0 # encoding: [0xe7,0x00,0x80,0x00,0x00,0x41] ++#CHECK: vleih %v0, 32767, 0 # encoding: [0xe7,0x00,0x7f,0xff,0x00,0x41] ++#CHECK: vleih %v15, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x41] ++#CHECK: vleih %v31, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x41] ++#CHECK: vleih %v18, 13398, 7 # encoding: [0xe7,0x20,0x34,0x56,0x78,0x41] ++ ++ vleih %v0, 0, 0 ++ vleih %v0, 0, 7 ++ vleih %v0, -32768, 0 ++ vleih %v0, 32767, 0 ++ vleih %v15, 0, 0 ++ vleih %v31, 0, 0 ++ vleih %v18, 0x3456, 7 ++ ++#CHECK: vlgvb %r0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x21] ++#CHECK: vlgvb %r0, %v0, 4095 # encoding: [0xe7,0x00,0x0f,0xff,0x00,0x21] ++#CHECK: vlgvb %r0, %v0, 0(%r15) # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x21] ++#CHECK: vlgvb %r0, %v15, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x00,0x21] ++#CHECK: vlgvb %r0, %v31, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x21] ++#CHECK: vlgvb %r15, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x21] ++#CHECK: vlgvb %r2, %v19, 1383(%r4) # encoding: [0xe7,0x23,0x45,0x67,0x04,0x21] ++ ++ vlgvb %r0, %v0, 0 ++ vlgvb %r0, %v0, 4095 ++ vlgvb %r0, %v0, 0(%r15) ++ vlgvb %r0, %v15, 0 ++ vlgvb %r0, %v31, 0 ++ vlgvb %r15, %v0, 0 ++ vlgvb %r2, %v19, 1383(%r4) ++ ++#CHECK: vlgvf %r0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0x21] ++#CHECK: vlgvf %r0, %v0, 4095 # encoding: [0xe7,0x00,0x0f,0xff,0x20,0x21] ++#CHECK: vlgvf %r0, %v0, 0(%r15) # encoding: [0xe7,0x00,0xf0,0x00,0x20,0x21] ++#CHECK: vlgvf %r0, %v15, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x20,0x21] ++#CHECK: vlgvf %r0, %v31, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0x21] ++#CHECK: vlgvf %r15, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x20,0x21] ++#CHECK: vlgvf %r2, %v19, 1383(%r4) # encoding: [0xe7,0x23,0x45,0x67,0x24,0x21] ++ ++ vlgvf %r0, %v0, 0 ++ vlgvf %r0, %v0, 4095 ++ vlgvf %r0, %v0, 0(%r15) ++ vlgvf %r0, %v15, 0 ++ vlgvf %r0, %v31, 0 ++ vlgvf %r15, %v0, 0 ++ vlgvf %r2, %v19, 1383(%r4) ++ ++#CHECK: vlgvg %r0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0x21] ++#CHECK: vlgvg %r0, %v0, 4095 # encoding: [0xe7,0x00,0x0f,0xff,0x30,0x21] ++#CHECK: vlgvg %r0, %v0, 0(%r15) # encoding: [0xe7,0x00,0xf0,0x00,0x30,0x21] ++#CHECK: vlgvg %r0, %v15, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x30,0x21] ++#CHECK: vlgvg %r0, %v31, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0x21] ++#CHECK: vlgvg %r15, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x30,0x21] ++#CHECK: vlgvg %r2, %v19, 1383(%r4) # encoding: [0xe7,0x23,0x45,0x67,0x34,0x21] ++ ++ vlgvg %r0, %v0, 0 ++ vlgvg %r0, %v0, 4095 ++ vlgvg %r0, %v0, 0(%r15) ++ vlgvg %r0, %v15, 0 ++ vlgvg %r0, %v31, 0 ++ vlgvg %r15, %v0, 0 ++ vlgvg %r2, %v19, 1383(%r4) ++ ++#CHECK: vlgvh %r0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0x21] ++#CHECK: vlgvh %r0, %v0, 4095 # encoding: [0xe7,0x00,0x0f,0xff,0x10,0x21] ++#CHECK: vlgvh %r0, %v0, 0(%r15) # encoding: [0xe7,0x00,0xf0,0x00,0x10,0x21] ++#CHECK: vlgvh %r0, %v15, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x10,0x21] ++#CHECK: vlgvh %r0, %v31, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0x21] ++#CHECK: vlgvh %r15, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x10,0x21] ++#CHECK: vlgvh %r2, %v19, 1383(%r4) # encoding: [0xe7,0x23,0x45,0x67,0x14,0x21] ++ ++ vlgvh %r0, %v0, 0 ++ vlgvh %r0, %v0, 4095 ++ vlgvh %r0, %v0, 0(%r15) ++ vlgvh %r0, %v15, 0 ++ vlgvh %r0, %v31, 0 ++ vlgvh %r15, %v0, 0 ++ vlgvh %r2, %v19, 1383(%r4) ++ ++#CHECK: vll %v0, %r0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x37] ++#CHECK: vll %v0, %r0, 4095 # encoding: [0xe7,0x00,0x0f,0xff,0x00,0x37] ++#CHECK: vll %v0, %r0, 0(%r15) # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x37] ++#CHECK: vll %v0, %r15, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x00,0x37] ++#CHECK: vll %v15, %r0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x37] ++#CHECK: vll %v31, %r0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x37] ++#CHECK: vll %v18, %r3, 1383(%r4) # encoding: [0xe7,0x23,0x45,0x67,0x08,0x37] ++ ++ vll %v0, %r0, 0 ++ vll %v0, %r0, 4095 ++ vll %v0, %r0, 0(%r15) ++ vll %v0, %r15, 0 ++ vll %v15, %r0, 0 ++ vll %v31, %r0, 0 ++ vll %v18, %r3, 1383(%r4) ++ ++#CHECK: vllezb %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x04] ++#CHECK: vllezb %v0, 4095 # encoding: [0xe7,0x00,0x0f,0xff,0x00,0x04] ++#CHECK: vllezb %v0, 0(%r15) # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x04] ++#CHECK: vllezb %v0, 0(%r15,%r1) # encoding: [0xe7,0x0f,0x10,0x00,0x00,0x04] ++#CHECK: vllezb %v15, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x04] ++#CHECK: vllezb %v31, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x04] ++#CHECK: vllezb %v18, 1383(%r3,%r4) # encoding: [0xe7,0x23,0x45,0x67,0x08,0x04] ++ ++ vllezb %v0, 0 ++ vllezb %v0, 4095 ++ vllezb %v0, 0(%r15) ++ vllezb %v0, 0(%r15,%r1) ++ vllezb %v15, 0 ++ vllezb %v31, 0 ++ vllezb %v18, 0x567(%r3,%r4) ++ ++#CHECK: vllezf %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0x04] ++#CHECK: vllezf %v0, 4095 # encoding: [0xe7,0x00,0x0f,0xff,0x20,0x04] ++#CHECK: vllezf %v0, 0(%r15) # encoding: [0xe7,0x00,0xf0,0x00,0x20,0x04] ++#CHECK: vllezf %v0, 0(%r15,%r1) # encoding: [0xe7,0x0f,0x10,0x00,0x20,0x04] ++#CHECK: vllezf %v15, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x20,0x04] ++#CHECK: vllezf %v31, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0x04] ++#CHECK: vllezf %v18, 1383(%r3,%r4) # encoding: [0xe7,0x23,0x45,0x67,0x28,0x04] ++ ++ vllezf %v0, 0 ++ vllezf %v0, 4095 ++ vllezf %v0, 0(%r15) ++ vllezf %v0, 0(%r15,%r1) ++ vllezf %v15, 0 ++ vllezf %v31, 0 ++ vllezf %v18, 0x567(%r3,%r4) ++ ++#CHECK: vllezg %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0x04] ++#CHECK: vllezg %v0, 4095 # encoding: [0xe7,0x00,0x0f,0xff,0x30,0x04] ++#CHECK: vllezg %v0, 0(%r15) # encoding: [0xe7,0x00,0xf0,0x00,0x30,0x04] ++#CHECK: vllezg %v0, 0(%r15,%r1) # encoding: [0xe7,0x0f,0x10,0x00,0x30,0x04] ++#CHECK: vllezg %v15, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x30,0x04] ++#CHECK: vllezg %v31, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0x04] ++#CHECK: vllezg %v18, 1383(%r3,%r4) # encoding: [0xe7,0x23,0x45,0x67,0x38,0x04] ++ ++ vllezg %v0, 0 ++ vllezg %v0, 4095 ++ vllezg %v0, 0(%r15) ++ vllezg %v0, 0(%r15,%r1) ++ vllezg %v15, 0 ++ vllezg %v31, 0 ++ vllezg %v18, 0x567(%r3,%r4) ++ ++#CHECK: vllezh %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0x04] ++#CHECK: vllezh %v0, 4095 # encoding: [0xe7,0x00,0x0f,0xff,0x10,0x04] ++#CHECK: vllezh %v0, 0(%r15) # encoding: [0xe7,0x00,0xf0,0x00,0x10,0x04] ++#CHECK: vllezh %v0, 0(%r15,%r1) # encoding: [0xe7,0x0f,0x10,0x00,0x10,0x04] ++#CHECK: vllezh %v15, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x10,0x04] ++#CHECK: vllezh %v31, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0x04] ++#CHECK: vllezh %v18, 1383(%r3,%r4) # encoding: [0xe7,0x23,0x45,0x67,0x18,0x04] ++ ++ vllezh %v0, 0 ++ vllezh %v0, 4095 ++ vllezh %v0, 0(%r15) ++ vllezh %v0, 0(%r15,%r1) ++ vllezh %v15, 0 ++ vllezh %v31, 0 ++ vllezh %v18, 0x567(%r3,%r4) ++ ++#CHECK: vlm %v0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x36] ++#CHECK: vlm %v0, %v0, 4095 # encoding: [0xe7,0x00,0x0f,0xff,0x00,0x36] ++#CHECK: vlm %v0, %v0, 0(%r15) # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x36] ++#CHECK: vlm %v0, %v31, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x36] ++#CHECK: vlm %v31, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x36] ++#CHECK: vlm %v14, %v17, 1074(%r5) # encoding: [0xe7,0xe1,0x54,0x32,0x04,0x36] ++ ++ vlm %v0, %v0, 0 ++ vlm %v0, %v0, 4095 ++ vlm %v0, %v0, 0(%r15) ++ vlm %v0, %v31, 0 ++ vlm %v31, %v0, 0 ++ vlm %v14, %v17, 1074(%r5) ++ ++#CHECK: vlpb %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0xdf] ++#CHECK: vlpb %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x00,0xdf] ++#CHECK: vlpb %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xdf] ++#CHECK: vlpb %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0xdf] ++#CHECK: vlpb %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xdf] ++#CHECK: vlpb %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x04,0xdf] ++ ++ vlpb %v0, %v0 ++ vlpb %v0, %v15 ++ vlpb %v0, %v31 ++ vlpb %v15, %v0 ++ vlpb %v31, %v0 ++ vlpb %v14, %v17 ++ ++#CHECK: vlpf %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0xdf] ++#CHECK: vlpf %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x20,0xdf] ++#CHECK: vlpf %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xdf] ++#CHECK: vlpf %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x20,0xdf] ++#CHECK: vlpf %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xdf] ++#CHECK: vlpf %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x24,0xdf] ++ ++ vlpf %v0, %v0 ++ vlpf %v0, %v15 ++ vlpf %v0, %v31 ++ vlpf %v15, %v0 ++ vlpf %v31, %v0 ++ vlpf %v14, %v17 ++ ++#CHECK: vlpg %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0xdf] ++#CHECK: vlpg %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x30,0xdf] ++#CHECK: vlpg %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xdf] ++#CHECK: vlpg %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x30,0xdf] ++#CHECK: vlpg %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xdf] ++#CHECK: vlpg %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x34,0xdf] ++ ++ vlpg %v0, %v0 ++ vlpg %v0, %v15 ++ vlpg %v0, %v31 ++ vlpg %v15, %v0 ++ vlpg %v31, %v0 ++ vlpg %v14, %v17 ++ ++#CHECK: vlph %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0xdf] ++#CHECK: vlph %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x10,0xdf] ++#CHECK: vlph %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0xdf] ++#CHECK: vlph %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x10,0xdf] ++#CHECK: vlph %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0xdf] ++#CHECK: vlph %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x14,0xdf] ++ ++ vlph %v0, %v0 ++ vlph %v0, %v15 ++ vlph %v0, %v31 ++ vlph %v15, %v0 ++ vlph %v31, %v0 ++ vlph %v14, %v17 ++ ++#CHECK: vlr %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x56] ++#CHECK: vlr %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x00,0x56] ++#CHECK: vlr %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x56] ++#CHECK: vlr %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x56] ++#CHECK: vlr %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x56] ++#CHECK: vlr %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x04,0x56] ++ ++ vlr %v0, %v0 ++ vlr %v0, %v15 ++ vlr %v0, %v31 ++ vlr %v15, %v0 ++ vlr %v31, %v0 ++ vlr %v14, %v17 ++ ++#CHECK: vlrepb %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x05] ++#CHECK: vlrepb %v0, 4095 # encoding: [0xe7,0x00,0x0f,0xff,0x00,0x05] ++#CHECK: vlrepb %v0, 0(%r15) # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x05] ++#CHECK: vlrepb %v0, 0(%r15,%r1) # encoding: [0xe7,0x0f,0x10,0x00,0x00,0x05] ++#CHECK: vlrepb %v15, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x05] ++#CHECK: vlrepb %v31, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x05] ++#CHECK: vlrepb %v18, 1383(%r3,%r4) # encoding: [0xe7,0x23,0x45,0x67,0x08,0x05] ++ ++ vlrepb %v0, 0 ++ vlrepb %v0, 4095 ++ vlrepb %v0, 0(%r15) ++ vlrepb %v0, 0(%r15,%r1) ++ vlrepb %v15, 0 ++ vlrepb %v31, 0 ++ vlrepb %v18, 0x567(%r3,%r4) ++ ++#CHECK: vlrepf %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0x05] ++#CHECK: vlrepf %v0, 4095 # encoding: [0xe7,0x00,0x0f,0xff,0x20,0x05] ++#CHECK: vlrepf %v0, 0(%r15) # encoding: [0xe7,0x00,0xf0,0x00,0x20,0x05] ++#CHECK: vlrepf %v0, 0(%r15,%r1) # encoding: [0xe7,0x0f,0x10,0x00,0x20,0x05] ++#CHECK: vlrepf %v15, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x20,0x05] ++#CHECK: vlrepf %v31, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0x05] ++#CHECK: vlrepf %v18, 1383(%r3,%r4) # encoding: [0xe7,0x23,0x45,0x67,0x28,0x05] ++ ++ vlrepf %v0, 0 ++ vlrepf %v0, 4095 ++ vlrepf %v0, 0(%r15) ++ vlrepf %v0, 0(%r15,%r1) ++ vlrepf %v15, 0 ++ vlrepf %v31, 0 ++ vlrepf %v18, 0x567(%r3,%r4) ++ ++#CHECK: vlrepg %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0x05] ++#CHECK: vlrepg %v0, 4095 # encoding: [0xe7,0x00,0x0f,0xff,0x30,0x05] ++#CHECK: vlrepg %v0, 0(%r15) # encoding: [0xe7,0x00,0xf0,0x00,0x30,0x05] ++#CHECK: vlrepg %v0, 0(%r15,%r1) # encoding: [0xe7,0x0f,0x10,0x00,0x30,0x05] ++#CHECK: vlrepg %v15, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x30,0x05] ++#CHECK: vlrepg %v31, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0x05] ++#CHECK: vlrepg %v18, 1383(%r3,%r4) # encoding: [0xe7,0x23,0x45,0x67,0x38,0x05] ++ ++ vlrepg %v0, 0 ++ vlrepg %v0, 4095 ++ vlrepg %v0, 0(%r15) ++ vlrepg %v0, 0(%r15,%r1) ++ vlrepg %v15, 0 ++ vlrepg %v31, 0 ++ vlrepg %v18, 0x567(%r3,%r4) ++ ++#CHECK: vlreph %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0x05] ++#CHECK: vlreph %v0, 4095 # encoding: [0xe7,0x00,0x0f,0xff,0x10,0x05] ++#CHECK: vlreph %v0, 0(%r15) # encoding: [0xe7,0x00,0xf0,0x00,0x10,0x05] ++#CHECK: vlreph %v0, 0(%r15,%r1) # encoding: [0xe7,0x0f,0x10,0x00,0x10,0x05] ++#CHECK: vlreph %v15, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x10,0x05] ++#CHECK: vlreph %v31, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0x05] ++#CHECK: vlreph %v18, 1383(%r3,%r4) # encoding: [0xe7,0x23,0x45,0x67,0x18,0x05] ++ ++ vlreph %v0, 0 ++ vlreph %v0, 4095 ++ vlreph %v0, 0(%r15) ++ vlreph %v0, 0(%r15,%r1) ++ vlreph %v15, 0 ++ vlreph %v31, 0 ++ vlreph %v18, 0x567(%r3,%r4) ++ ++#CHECK: vlvgb %v0, %r0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x22] ++#CHECK: vlvgb %v0, %r0, 4095 # encoding: [0xe7,0x00,0x0f,0xff,0x00,0x22] ++#CHECK: vlvgb %v0, %r0, 0(%r15) # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x22] ++#CHECK: vlvgb %v0, %r15, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x00,0x22] ++#CHECK: vlvgb %v15, %r0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x22] ++#CHECK: vlvgb %v31, %r0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x22] ++#CHECK: vlvgb %v18, %r3, 1383(%r4) # encoding: [0xe7,0x23,0x45,0x67,0x08,0x22] ++ ++ vlvgb %v0, %r0, 0 ++ vlvgb %v0, %r0, 4095 ++ vlvgb %v0, %r0, 0(%r15) ++ vlvgb %v0, %r15, 0 ++ vlvgb %v15, %r0, 0 ++ vlvgb %v31, %r0, 0 ++ vlvgb %v18, %r3, 1383(%r4) ++ ++#CHECK: vlvgf %v0, %r0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0x22] ++#CHECK: vlvgf %v0, %r0, 4095 # encoding: [0xe7,0x00,0x0f,0xff,0x20,0x22] ++#CHECK: vlvgf %v0, %r0, 0(%r15) # encoding: [0xe7,0x00,0xf0,0x00,0x20,0x22] ++#CHECK: vlvgf %v0, %r15, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x20,0x22] ++#CHECK: vlvgf %v15, %r0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x20,0x22] ++#CHECK: vlvgf %v31, %r0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0x22] ++#CHECK: vlvgf %v18, %r3, 1383(%r4) # encoding: [0xe7,0x23,0x45,0x67,0x28,0x22] ++ ++ vlvgf %v0, %r0, 0 ++ vlvgf %v0, %r0, 4095 ++ vlvgf %v0, %r0, 0(%r15) ++ vlvgf %v0, %r15, 0 ++ vlvgf %v15, %r0, 0 ++ vlvgf %v31, %r0, 0 ++ vlvgf %v18, %r3, 1383(%r4) ++ ++#CHECK: vlvgg %v0, %r0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0x22] ++#CHECK: vlvgg %v0, %r0, 4095 # encoding: [0xe7,0x00,0x0f,0xff,0x30,0x22] ++#CHECK: vlvgg %v0, %r0, 0(%r15) # encoding: [0xe7,0x00,0xf0,0x00,0x30,0x22] ++#CHECK: vlvgg %v0, %r15, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x30,0x22] ++#CHECK: vlvgg %v15, %r0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x30,0x22] ++#CHECK: vlvgg %v31, %r0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0x22] ++#CHECK: vlvgg %v18, %r3, 1383(%r4) # encoding: [0xe7,0x23,0x45,0x67,0x38,0x22] ++ ++ vlvgg %v0, %r0, 0 ++ vlvgg %v0, %r0, 4095 ++ vlvgg %v0, %r0, 0(%r15) ++ vlvgg %v0, %r15, 0 ++ vlvgg %v15, %r0, 0 ++ vlvgg %v31, %r0, 0 ++ vlvgg %v18, %r3, 1383(%r4) ++ ++#CHECK: vlvgh %v0, %r0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0x22] ++#CHECK: vlvgh %v0, %r0, 4095 # encoding: [0xe7,0x00,0x0f,0xff,0x10,0x22] ++#CHECK: vlvgh %v0, %r0, 0(%r15) # encoding: [0xe7,0x00,0xf0,0x00,0x10,0x22] ++#CHECK: vlvgh %v0, %r15, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x10,0x22] ++#CHECK: vlvgh %v15, %r0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x10,0x22] ++#CHECK: vlvgh %v31, %r0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0x22] ++#CHECK: vlvgh %v18, %r3, 1383(%r4) # encoding: [0xe7,0x23,0x45,0x67,0x18,0x22] ++ ++ vlvgh %v0, %r0, 0 ++ vlvgh %v0, %r0, 4095 ++ vlvgh %v0, %r0, 0(%r15) ++ vlvgh %v0, %r15, 0 ++ vlvgh %v15, %r0, 0 ++ vlvgh %v31, %r0, 0 ++ vlvgh %v18, %r3, 1383(%r4) ++ ++#CHECK: vlvgp %v0, %r0, %r0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x62] ++#CHECK: vlvgp %v0, %r0, %r15 # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x62] ++#CHECK: vlvgp %v0, %r15, %r0 # encoding: [0xe7,0x0f,0x00,0x00,0x00,0x62] ++#CHECK: vlvgp %v15, %r0, %r0 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x62] ++#CHECK: vlvgp %v31, %r0, %r0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x62] ++#CHECK: vlvgp %v18, %r3, %r4 # encoding: [0xe7,0x23,0x40,0x00,0x08,0x62] ++ ++ vlvgp %v0, %r0, %r0 ++ vlvgp %v0, %r0, %r15 ++ vlvgp %v0, %r15, %r0 ++ vlvgp %v15, %r0, %r0 ++ vlvgp %v31, %r0, %r0 ++ vlvgp %v18, %r3, %r4 ++ ++#CHECK: vmaeb %v0, %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0xae] ++#CHECK: vmaeb %v0, %v0, %v0, %v31 # encoding: [0xe7,0x00,0x00,0x00,0xf1,0xae] ++#CHECK: vmaeb %v0, %v0, %v31, %v0 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0xae] ++#CHECK: vmaeb %v0, %v31, %v0, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xae] ++#CHECK: vmaeb %v31, %v0, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xae] ++#CHECK: vmaeb %v13, %v17, %v21, %v25 # encoding: [0xe7,0xd1,0x50,0x00,0x97,0xae] ++ ++ vmaeb %v0, %v0, %v0, %v0 ++ vmaeb %v0, %v0, %v0, %v31 ++ vmaeb %v0, %v0, %v31, %v0 ++ vmaeb %v0, %v31, %v0, %v0 ++ vmaeb %v31, %v0, %v0, %v0 ++ vmaeb %v13, %v17, %v21, %v25 ++ ++#CHECK: vmaef %v0, %v0, %v0, %v0 # encoding: [0xe7,0x00,0x02,0x00,0x00,0xae] ++#CHECK: vmaef %v0, %v0, %v0, %v31 # encoding: [0xe7,0x00,0x02,0x00,0xf1,0xae] ++#CHECK: vmaef %v0, %v0, %v31, %v0 # encoding: [0xe7,0x00,0xf2,0x00,0x02,0xae] ++#CHECK: vmaef %v0, %v31, %v0, %v0 # encoding: [0xe7,0x0f,0x02,0x00,0x04,0xae] ++#CHECK: vmaef %v31, %v0, %v0, %v0 # encoding: [0xe7,0xf0,0x02,0x00,0x08,0xae] ++#CHECK: vmaef %v13, %v17, %v21, %v25 # encoding: [0xe7,0xd1,0x52,0x00,0x97,0xae] ++ ++ vmaef %v0, %v0, %v0, %v0 ++ vmaef %v0, %v0, %v0, %v31 ++ vmaef %v0, %v0, %v31, %v0 ++ vmaef %v0, %v31, %v0, %v0 ++ vmaef %v31, %v0, %v0, %v0 ++ vmaef %v13, %v17, %v21, %v25 ++ ++#CHECK: vmaeh %v0, %v0, %v0, %v0 # encoding: [0xe7,0x00,0x01,0x00,0x00,0xae] ++#CHECK: vmaeh %v0, %v0, %v0, %v31 # encoding: [0xe7,0x00,0x01,0x00,0xf1,0xae] ++#CHECK: vmaeh %v0, %v0, %v31, %v0 # encoding: [0xe7,0x00,0xf1,0x00,0x02,0xae] ++#CHECK: vmaeh %v0, %v31, %v0, %v0 # encoding: [0xe7,0x0f,0x01,0x00,0x04,0xae] ++#CHECK: vmaeh %v31, %v0, %v0, %v0 # encoding: [0xe7,0xf0,0x01,0x00,0x08,0xae] ++#CHECK: vmaeh %v13, %v17, %v21, %v25 # encoding: [0xe7,0xd1,0x51,0x00,0x97,0xae] ++ ++ vmaeh %v0, %v0, %v0, %v0 ++ vmaeh %v0, %v0, %v0, %v31 ++ vmaeh %v0, %v0, %v31, %v0 ++ vmaeh %v0, %v31, %v0, %v0 ++ vmaeh %v31, %v0, %v0, %v0 ++ vmaeh %v13, %v17, %v21, %v25 ++ ++#CHECK: vmahb %v0, %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0xab] ++#CHECK: vmahb %v0, %v0, %v0, %v31 # encoding: [0xe7,0x00,0x00,0x00,0xf1,0xab] ++#CHECK: vmahb %v0, %v0, %v31, %v0 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0xab] ++#CHECK: vmahb %v0, %v31, %v0, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xab] ++#CHECK: vmahb %v31, %v0, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xab] ++#CHECK: vmahb %v13, %v17, %v21, %v25 # encoding: [0xe7,0xd1,0x50,0x00,0x97,0xab] ++ ++ vmahb %v0, %v0, %v0, %v0 ++ vmahb %v0, %v0, %v0, %v31 ++ vmahb %v0, %v0, %v31, %v0 ++ vmahb %v0, %v31, %v0, %v0 ++ vmahb %v31, %v0, %v0, %v0 ++ vmahb %v13, %v17, %v21, %v25 ++ ++#CHECK: vmahf %v0, %v0, %v0, %v0 # encoding: [0xe7,0x00,0x02,0x00,0x00,0xab] ++#CHECK: vmahf %v0, %v0, %v0, %v31 # encoding: [0xe7,0x00,0x02,0x00,0xf1,0xab] ++#CHECK: vmahf %v0, %v0, %v31, %v0 # encoding: [0xe7,0x00,0xf2,0x00,0x02,0xab] ++#CHECK: vmahf %v0, %v31, %v0, %v0 # encoding: [0xe7,0x0f,0x02,0x00,0x04,0xab] ++#CHECK: vmahf %v31, %v0, %v0, %v0 # encoding: [0xe7,0xf0,0x02,0x00,0x08,0xab] ++#CHECK: vmahf %v13, %v17, %v21, %v25 # encoding: [0xe7,0xd1,0x52,0x00,0x97,0xab] ++ ++ vmahf %v0, %v0, %v0, %v0 ++ vmahf %v0, %v0, %v0, %v31 ++ vmahf %v0, %v0, %v31, %v0 ++ vmahf %v0, %v31, %v0, %v0 ++ vmahf %v31, %v0, %v0, %v0 ++ vmahf %v13, %v17, %v21, %v25 ++ ++#CHECK: vmahh %v0, %v0, %v0, %v0 # encoding: [0xe7,0x00,0x01,0x00,0x00,0xab] ++#CHECK: vmahh %v0, %v0, %v0, %v31 # encoding: [0xe7,0x00,0x01,0x00,0xf1,0xab] ++#CHECK: vmahh %v0, %v0, %v31, %v0 # encoding: [0xe7,0x00,0xf1,0x00,0x02,0xab] ++#CHECK: vmahh %v0, %v31, %v0, %v0 # encoding: [0xe7,0x0f,0x01,0x00,0x04,0xab] ++#CHECK: vmahh %v31, %v0, %v0, %v0 # encoding: [0xe7,0xf0,0x01,0x00,0x08,0xab] ++#CHECK: vmahh %v13, %v17, %v21, %v25 # encoding: [0xe7,0xd1,0x51,0x00,0x97,0xab] ++ ++ vmahh %v0, %v0, %v0, %v0 ++ vmahh %v0, %v0, %v0, %v31 ++ vmahh %v0, %v0, %v31, %v0 ++ vmahh %v0, %v31, %v0, %v0 ++ vmahh %v31, %v0, %v0, %v0 ++ vmahh %v13, %v17, %v21, %v25 ++ ++#CHECK: vmalb %v0, %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0xaa] ++#CHECK: vmalb %v0, %v0, %v0, %v31 # encoding: [0xe7,0x00,0x00,0x00,0xf1,0xaa] ++#CHECK: vmalb %v0, %v0, %v31, %v0 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0xaa] ++#CHECK: vmalb %v0, %v31, %v0, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xaa] ++#CHECK: vmalb %v31, %v0, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xaa] ++#CHECK: vmalb %v13, %v17, %v21, %v25 # encoding: [0xe7,0xd1,0x50,0x00,0x97,0xaa] ++ ++ vmalb %v0, %v0, %v0, %v0 ++ vmalb %v0, %v0, %v0, %v31 ++ vmalb %v0, %v0, %v31, %v0 ++ vmalb %v0, %v31, %v0, %v0 ++ vmalb %v31, %v0, %v0, %v0 ++ vmalb %v13, %v17, %v21, %v25 ++ ++#CHECK: vmaleb %v0, %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0xac] ++#CHECK: vmaleb %v0, %v0, %v0, %v31 # encoding: [0xe7,0x00,0x00,0x00,0xf1,0xac] ++#CHECK: vmaleb %v0, %v0, %v31, %v0 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0xac] ++#CHECK: vmaleb %v0, %v31, %v0, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xac] ++#CHECK: vmaleb %v31, %v0, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xac] ++#CHECK: vmaleb %v13, %v17, %v21, %v25 # encoding: [0xe7,0xd1,0x50,0x00,0x97,0xac] ++ ++ vmaleb %v0, %v0, %v0, %v0 ++ vmaleb %v0, %v0, %v0, %v31 ++ vmaleb %v0, %v0, %v31, %v0 ++ vmaleb %v0, %v31, %v0, %v0 ++ vmaleb %v31, %v0, %v0, %v0 ++ vmaleb %v13, %v17, %v21, %v25 ++ ++#CHECK: vmalef %v0, %v0, %v0, %v0 # encoding: [0xe7,0x00,0x02,0x00,0x00,0xac] ++#CHECK: vmalef %v0, %v0, %v0, %v31 # encoding: [0xe7,0x00,0x02,0x00,0xf1,0xac] ++#CHECK: vmalef %v0, %v0, %v31, %v0 # encoding: [0xe7,0x00,0xf2,0x00,0x02,0xac] ++#CHECK: vmalef %v0, %v31, %v0, %v0 # encoding: [0xe7,0x0f,0x02,0x00,0x04,0xac] ++#CHECK: vmalef %v31, %v0, %v0, %v0 # encoding: [0xe7,0xf0,0x02,0x00,0x08,0xac] ++#CHECK: vmalef %v13, %v17, %v21, %v25 # encoding: [0xe7,0xd1,0x52,0x00,0x97,0xac] ++ ++ vmalef %v0, %v0, %v0, %v0 ++ vmalef %v0, %v0, %v0, %v31 ++ vmalef %v0, %v0, %v31, %v0 ++ vmalef %v0, %v31, %v0, %v0 ++ vmalef %v31, %v0, %v0, %v0 ++ vmalef %v13, %v17, %v21, %v25 ++ ++#CHECK: vmaleh %v0, %v0, %v0, %v0 # encoding: [0xe7,0x00,0x01,0x00,0x00,0xac] ++#CHECK: vmaleh %v0, %v0, %v0, %v31 # encoding: [0xe7,0x00,0x01,0x00,0xf1,0xac] ++#CHECK: vmaleh %v0, %v0, %v31, %v0 # encoding: [0xe7,0x00,0xf1,0x00,0x02,0xac] ++#CHECK: vmaleh %v0, %v31, %v0, %v0 # encoding: [0xe7,0x0f,0x01,0x00,0x04,0xac] ++#CHECK: vmaleh %v31, %v0, %v0, %v0 # encoding: [0xe7,0xf0,0x01,0x00,0x08,0xac] ++#CHECK: vmaleh %v13, %v17, %v21, %v25 # encoding: [0xe7,0xd1,0x51,0x00,0x97,0xac] ++ ++ vmaleh %v0, %v0, %v0, %v0 ++ vmaleh %v0, %v0, %v0, %v31 ++ vmaleh %v0, %v0, %v31, %v0 ++ vmaleh %v0, %v31, %v0, %v0 ++ vmaleh %v31, %v0, %v0, %v0 ++ vmaleh %v13, %v17, %v21, %v25 ++ ++#CHECK: vmalf %v0, %v0, %v0, %v0 # encoding: [0xe7,0x00,0x02,0x00,0x00,0xaa] ++#CHECK: vmalf %v0, %v0, %v0, %v31 # encoding: [0xe7,0x00,0x02,0x00,0xf1,0xaa] ++#CHECK: vmalf %v0, %v0, %v31, %v0 # encoding: [0xe7,0x00,0xf2,0x00,0x02,0xaa] ++#CHECK: vmalf %v0, %v31, %v0, %v0 # encoding: [0xe7,0x0f,0x02,0x00,0x04,0xaa] ++#CHECK: vmalf %v31, %v0, %v0, %v0 # encoding: [0xe7,0xf0,0x02,0x00,0x08,0xaa] ++#CHECK: vmalf %v13, %v17, %v21, %v25 # encoding: [0xe7,0xd1,0x52,0x00,0x97,0xaa] ++ ++ vmalf %v0, %v0, %v0, %v0 ++ vmalf %v0, %v0, %v0, %v31 ++ vmalf %v0, %v0, %v31, %v0 ++ vmalf %v0, %v31, %v0, %v0 ++ vmalf %v31, %v0, %v0, %v0 ++ vmalf %v13, %v17, %v21, %v25 ++ ++#CHECK: vmalhb %v0, %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0xa9] ++#CHECK: vmalhb %v0, %v0, %v0, %v31 # encoding: [0xe7,0x00,0x00,0x00,0xf1,0xa9] ++#CHECK: vmalhb %v0, %v0, %v31, %v0 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0xa9] ++#CHECK: vmalhb %v0, %v31, %v0, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xa9] ++#CHECK: vmalhb %v31, %v0, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xa9] ++#CHECK: vmalhb %v13, %v17, %v21, %v25 # encoding: [0xe7,0xd1,0x50,0x00,0x97,0xa9] ++ ++ vmalhb %v0, %v0, %v0, %v0 ++ vmalhb %v0, %v0, %v0, %v31 ++ vmalhb %v0, %v0, %v31, %v0 ++ vmalhb %v0, %v31, %v0, %v0 ++ vmalhb %v31, %v0, %v0, %v0 ++ vmalhb %v13, %v17, %v21, %v25 ++ ++#CHECK: vmalhf %v0, %v0, %v0, %v0 # encoding: [0xe7,0x00,0x02,0x00,0x00,0xa9] ++#CHECK: vmalhf %v0, %v0, %v0, %v31 # encoding: [0xe7,0x00,0x02,0x00,0xf1,0xa9] ++#CHECK: vmalhf %v0, %v0, %v31, %v0 # encoding: [0xe7,0x00,0xf2,0x00,0x02,0xa9] ++#CHECK: vmalhf %v0, %v31, %v0, %v0 # encoding: [0xe7,0x0f,0x02,0x00,0x04,0xa9] ++#CHECK: vmalhf %v31, %v0, %v0, %v0 # encoding: [0xe7,0xf0,0x02,0x00,0x08,0xa9] ++#CHECK: vmalhf %v13, %v17, %v21, %v25 # encoding: [0xe7,0xd1,0x52,0x00,0x97,0xa9] ++ ++ vmalhf %v0, %v0, %v0, %v0 ++ vmalhf %v0, %v0, %v0, %v31 ++ vmalhf %v0, %v0, %v31, %v0 ++ vmalhf %v0, %v31, %v0, %v0 ++ vmalhf %v31, %v0, %v0, %v0 ++ vmalhf %v13, %v17, %v21, %v25 ++ ++#CHECK: vmalhh %v0, %v0, %v0, %v0 # encoding: [0xe7,0x00,0x01,0x00,0x00,0xa9] ++#CHECK: vmalhh %v0, %v0, %v0, %v31 # encoding: [0xe7,0x00,0x01,0x00,0xf1,0xa9] ++#CHECK: vmalhh %v0, %v0, %v31, %v0 # encoding: [0xe7,0x00,0xf1,0x00,0x02,0xa9] ++#CHECK: vmalhh %v0, %v31, %v0, %v0 # encoding: [0xe7,0x0f,0x01,0x00,0x04,0xa9] ++#CHECK: vmalhh %v31, %v0, %v0, %v0 # encoding: [0xe7,0xf0,0x01,0x00,0x08,0xa9] ++#CHECK: vmalhh %v13, %v17, %v21, %v25 # encoding: [0xe7,0xd1,0x51,0x00,0x97,0xa9] ++ ++ vmalhh %v0, %v0, %v0, %v0 ++ vmalhh %v0, %v0, %v0, %v31 ++ vmalhh %v0, %v0, %v31, %v0 ++ vmalhh %v0, %v31, %v0, %v0 ++ vmalhh %v31, %v0, %v0, %v0 ++ vmalhh %v13, %v17, %v21, %v25 ++ ++#CHECK: vmalhw %v0, %v0, %v0, %v0 # encoding: [0xe7,0x00,0x01,0x00,0x00,0xaa] ++#CHECK: vmalhw %v0, %v0, %v0, %v31 # encoding: [0xe7,0x00,0x01,0x00,0xf1,0xaa] ++#CHECK: vmalhw %v0, %v0, %v31, %v0 # encoding: [0xe7,0x00,0xf1,0x00,0x02,0xaa] ++#CHECK: vmalhw %v0, %v31, %v0, %v0 # encoding: [0xe7,0x0f,0x01,0x00,0x04,0xaa] ++#CHECK: vmalhw %v31, %v0, %v0, %v0 # encoding: [0xe7,0xf0,0x01,0x00,0x08,0xaa] ++#CHECK: vmalhw %v13, %v17, %v21, %v25 # encoding: [0xe7,0xd1,0x51,0x00,0x97,0xaa] ++ ++ vmalhw %v0, %v0, %v0, %v0 ++ vmalhw %v0, %v0, %v0, %v31 ++ vmalhw %v0, %v0, %v31, %v0 ++ vmalhw %v0, %v31, %v0, %v0 ++ vmalhw %v31, %v0, %v0, %v0 ++ vmalhw %v13, %v17, %v21, %v25 ++ ++#CHECK: vmalob %v0, %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0xad] ++#CHECK: vmalob %v0, %v0, %v0, %v31 # encoding: [0xe7,0x00,0x00,0x00,0xf1,0xad] ++#CHECK: vmalob %v0, %v0, %v31, %v0 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0xad] ++#CHECK: vmalob %v0, %v31, %v0, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xad] ++#CHECK: vmalob %v31, %v0, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xad] ++#CHECK: vmalob %v13, %v17, %v21, %v25 # encoding: [0xe7,0xd1,0x50,0x00,0x97,0xad] ++ ++ vmalob %v0, %v0, %v0, %v0 ++ vmalob %v0, %v0, %v0, %v31 ++ vmalob %v0, %v0, %v31, %v0 ++ vmalob %v0, %v31, %v0, %v0 ++ vmalob %v31, %v0, %v0, %v0 ++ vmalob %v13, %v17, %v21, %v25 ++ ++#CHECK: vmalof %v0, %v0, %v0, %v0 # encoding: [0xe7,0x00,0x02,0x00,0x00,0xad] ++#CHECK: vmalof %v0, %v0, %v0, %v31 # encoding: [0xe7,0x00,0x02,0x00,0xf1,0xad] ++#CHECK: vmalof %v0, %v0, %v31, %v0 # encoding: [0xe7,0x00,0xf2,0x00,0x02,0xad] ++#CHECK: vmalof %v0, %v31, %v0, %v0 # encoding: [0xe7,0x0f,0x02,0x00,0x04,0xad] ++#CHECK: vmalof %v31, %v0, %v0, %v0 # encoding: [0xe7,0xf0,0x02,0x00,0x08,0xad] ++#CHECK: vmalof %v13, %v17, %v21, %v25 # encoding: [0xe7,0xd1,0x52,0x00,0x97,0xad] ++ ++ vmalof %v0, %v0, %v0, %v0 ++ vmalof %v0, %v0, %v0, %v31 ++ vmalof %v0, %v0, %v31, %v0 ++ vmalof %v0, %v31, %v0, %v0 ++ vmalof %v31, %v0, %v0, %v0 ++ vmalof %v13, %v17, %v21, %v25 ++ ++#CHECK: vmaloh %v0, %v0, %v0, %v0 # encoding: [0xe7,0x00,0x01,0x00,0x00,0xad] ++#CHECK: vmaloh %v0, %v0, %v0, %v31 # encoding: [0xe7,0x00,0x01,0x00,0xf1,0xad] ++#CHECK: vmaloh %v0, %v0, %v31, %v0 # encoding: [0xe7,0x00,0xf1,0x00,0x02,0xad] ++#CHECK: vmaloh %v0, %v31, %v0, %v0 # encoding: [0xe7,0x0f,0x01,0x00,0x04,0xad] ++#CHECK: vmaloh %v31, %v0, %v0, %v0 # encoding: [0xe7,0xf0,0x01,0x00,0x08,0xad] ++#CHECK: vmaloh %v13, %v17, %v21, %v25 # encoding: [0xe7,0xd1,0x51,0x00,0x97,0xad] ++ ++ vmaloh %v0, %v0, %v0, %v0 ++ vmaloh %v0, %v0, %v0, %v31 ++ vmaloh %v0, %v0, %v31, %v0 ++ vmaloh %v0, %v31, %v0, %v0 ++ vmaloh %v31, %v0, %v0, %v0 ++ vmaloh %v13, %v17, %v21, %v25 ++ ++#CHECK: vmaob %v0, %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0xaf] ++#CHECK: vmaob %v0, %v0, %v0, %v31 # encoding: [0xe7,0x00,0x00,0x00,0xf1,0xaf] ++#CHECK: vmaob %v0, %v0, %v31, %v0 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0xaf] ++#CHECK: vmaob %v0, %v31, %v0, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xaf] ++#CHECK: vmaob %v31, %v0, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xaf] ++#CHECK: vmaob %v13, %v17, %v21, %v25 # encoding: [0xe7,0xd1,0x50,0x00,0x97,0xaf] ++ ++ vmaob %v0, %v0, %v0, %v0 ++ vmaob %v0, %v0, %v0, %v31 ++ vmaob %v0, %v0, %v31, %v0 ++ vmaob %v0, %v31, %v0, %v0 ++ vmaob %v31, %v0, %v0, %v0 ++ vmaob %v13, %v17, %v21, %v25 ++ ++#CHECK: vmaof %v0, %v0, %v0, %v0 # encoding: [0xe7,0x00,0x02,0x00,0x00,0xaf] ++#CHECK: vmaof %v0, %v0, %v0, %v31 # encoding: [0xe7,0x00,0x02,0x00,0xf1,0xaf] ++#CHECK: vmaof %v0, %v0, %v31, %v0 # encoding: [0xe7,0x00,0xf2,0x00,0x02,0xaf] ++#CHECK: vmaof %v0, %v31, %v0, %v0 # encoding: [0xe7,0x0f,0x02,0x00,0x04,0xaf] ++#CHECK: vmaof %v31, %v0, %v0, %v0 # encoding: [0xe7,0xf0,0x02,0x00,0x08,0xaf] ++#CHECK: vmaof %v13, %v17, %v21, %v25 # encoding: [0xe7,0xd1,0x52,0x00,0x97,0xaf] ++ ++ vmaof %v0, %v0, %v0, %v0 ++ vmaof %v0, %v0, %v0, %v31 ++ vmaof %v0, %v0, %v31, %v0 ++ vmaof %v0, %v31, %v0, %v0 ++ vmaof %v31, %v0, %v0, %v0 ++ vmaof %v13, %v17, %v21, %v25 ++ ++#CHECK: vmaoh %v0, %v0, %v0, %v0 # encoding: [0xe7,0x00,0x01,0x00,0x00,0xaf] ++#CHECK: vmaoh %v0, %v0, %v0, %v31 # encoding: [0xe7,0x00,0x01,0x00,0xf1,0xaf] ++#CHECK: vmaoh %v0, %v0, %v31, %v0 # encoding: [0xe7,0x00,0xf1,0x00,0x02,0xaf] ++#CHECK: vmaoh %v0, %v31, %v0, %v0 # encoding: [0xe7,0x0f,0x01,0x00,0x04,0xaf] ++#CHECK: vmaoh %v31, %v0, %v0, %v0 # encoding: [0xe7,0xf0,0x01,0x00,0x08,0xaf] ++#CHECK: vmaoh %v13, %v17, %v21, %v25 # encoding: [0xe7,0xd1,0x51,0x00,0x97,0xaf] ++ ++ vmaoh %v0, %v0, %v0, %v0 ++ vmaoh %v0, %v0, %v0, %v31 ++ vmaoh %v0, %v0, %v31, %v0 ++ vmaoh %v0, %v31, %v0, %v0 ++ vmaoh %v31, %v0, %v0, %v0 ++ vmaoh %v13, %v17, %v21, %v25 ++ ++#CHECK: vmeb %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0xa6] ++#CHECK: vmeb %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0xa6] ++#CHECK: vmeb %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xa6] ++#CHECK: vmeb %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xa6] ++#CHECK: vmeb %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x0a,0xa6] ++ ++ vmeb %v0, %v0, %v0 ++ vmeb %v0, %v0, %v31 ++ vmeb %v0, %v31, %v0 ++ vmeb %v31, %v0, %v0 ++ vmeb %v18, %v3, %v20 ++ ++#CHECK: vmef %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0xa6] ++#CHECK: vmef %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x22,0xa6] ++#CHECK: vmef %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xa6] ++#CHECK: vmef %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xa6] ++#CHECK: vmef %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x2a,0xa6] ++ ++ vmef %v0, %v0, %v0 ++ vmef %v0, %v0, %v31 ++ vmef %v0, %v31, %v0 ++ vmef %v31, %v0, %v0 ++ vmef %v18, %v3, %v20 ++ ++#CHECK: vmeh %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0xa6] ++#CHECK: vmeh %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x12,0xa6] ++#CHECK: vmeh %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0xa6] ++#CHECK: vmeh %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0xa6] ++#CHECK: vmeh %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x1a,0xa6] ++ ++ vmeh %v0, %v0, %v0 ++ vmeh %v0, %v0, %v31 ++ vmeh %v0, %v31, %v0 ++ vmeh %v31, %v0, %v0 ++ vmeh %v18, %v3, %v20 ++ ++#CHECK: vmhb %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0xa3] ++#CHECK: vmhb %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0xa3] ++#CHECK: vmhb %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xa3] ++#CHECK: vmhb %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xa3] ++#CHECK: vmhb %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x0a,0xa3] ++ ++ vmhb %v0, %v0, %v0 ++ vmhb %v0, %v0, %v31 ++ vmhb %v0, %v31, %v0 ++ vmhb %v31, %v0, %v0 ++ vmhb %v18, %v3, %v20 ++ ++#CHECK: vmhf %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0xa3] ++#CHECK: vmhf %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x22,0xa3] ++#CHECK: vmhf %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xa3] ++#CHECK: vmhf %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xa3] ++#CHECK: vmhf %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x2a,0xa3] ++ ++ vmhf %v0, %v0, %v0 ++ vmhf %v0, %v0, %v31 ++ vmhf %v0, %v31, %v0 ++ vmhf %v31, %v0, %v0 ++ vmhf %v18, %v3, %v20 ++ ++#CHECK: vmhh %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0xa3] ++#CHECK: vmhh %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x12,0xa3] ++#CHECK: vmhh %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0xa3] ++#CHECK: vmhh %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0xa3] ++#CHECK: vmhh %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x1a,0xa3] ++ ++ vmhh %v0, %v0, %v0 ++ vmhh %v0, %v0, %v31 ++ vmhh %v0, %v31, %v0 ++ vmhh %v31, %v0, %v0 ++ vmhh %v18, %v3, %v20 ++ ++#CHECK: vmlb %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0xa2] ++#CHECK: vmlb %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0xa2] ++#CHECK: vmlb %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xa2] ++#CHECK: vmlb %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xa2] ++#CHECK: vmlb %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x0a,0xa2] ++ ++ vmlb %v0, %v0, %v0 ++ vmlb %v0, %v0, %v31 ++ vmlb %v0, %v31, %v0 ++ vmlb %v31, %v0, %v0 ++ vmlb %v18, %v3, %v20 ++ ++#CHECK: vmleb %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0xa4] ++#CHECK: vmleb %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0xa4] ++#CHECK: vmleb %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xa4] ++#CHECK: vmleb %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xa4] ++#CHECK: vmleb %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x0a,0xa4] ++ ++ vmleb %v0, %v0, %v0 ++ vmleb %v0, %v0, %v31 ++ vmleb %v0, %v31, %v0 ++ vmleb %v31, %v0, %v0 ++ vmleb %v18, %v3, %v20 ++ ++#CHECK: vmlef %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0xa4] ++#CHECK: vmlef %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x22,0xa4] ++#CHECK: vmlef %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xa4] ++#CHECK: vmlef %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xa4] ++#CHECK: vmlef %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x2a,0xa4] ++ ++ vmlef %v0, %v0, %v0 ++ vmlef %v0, %v0, %v31 ++ vmlef %v0, %v31, %v0 ++ vmlef %v31, %v0, %v0 ++ vmlef %v18, %v3, %v20 ++ ++#CHECK: vmleh %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0xa4] ++#CHECK: vmleh %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x12,0xa4] ++#CHECK: vmleh %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0xa4] ++#CHECK: vmleh %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0xa4] ++#CHECK: vmleh %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x1a,0xa4] ++ ++ vmleh %v0, %v0, %v0 ++ vmleh %v0, %v0, %v31 ++ vmleh %v0, %v31, %v0 ++ vmleh %v31, %v0, %v0 ++ vmleh %v18, %v3, %v20 ++ ++#CHECK: vmlf %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0xa2] ++#CHECK: vmlf %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x22,0xa2] ++#CHECK: vmlf %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xa2] ++#CHECK: vmlf %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xa2] ++#CHECK: vmlf %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x2a,0xa2] ++ ++ vmlf %v0, %v0, %v0 ++ vmlf %v0, %v0, %v31 ++ vmlf %v0, %v31, %v0 ++ vmlf %v31, %v0, %v0 ++ vmlf %v18, %v3, %v20 ++ ++#CHECK: vmlhb %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0xa1] ++#CHECK: vmlhb %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0xa1] ++#CHECK: vmlhb %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xa1] ++#CHECK: vmlhb %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xa1] ++#CHECK: vmlhb %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x0a,0xa1] ++ ++ vmlhb %v0, %v0, %v0 ++ vmlhb %v0, %v0, %v31 ++ vmlhb %v0, %v31, %v0 ++ vmlhb %v31, %v0, %v0 ++ vmlhb %v18, %v3, %v20 ++ ++#CHECK: vmlhf %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0xa1] ++#CHECK: vmlhf %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x22,0xa1] ++#CHECK: vmlhf %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xa1] ++#CHECK: vmlhf %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xa1] ++#CHECK: vmlhf %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x2a,0xa1] ++ ++ vmlhf %v0, %v0, %v0 ++ vmlhf %v0, %v0, %v31 ++ vmlhf %v0, %v31, %v0 ++ vmlhf %v31, %v0, %v0 ++ vmlhf %v18, %v3, %v20 ++ ++#CHECK: vmlhh %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0xa1] ++#CHECK: vmlhh %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x12,0xa1] ++#CHECK: vmlhh %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0xa1] ++#CHECK: vmlhh %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0xa1] ++#CHECK: vmlhh %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x1a,0xa1] ++ ++ vmlhh %v0, %v0, %v0 ++ vmlhh %v0, %v0, %v31 ++ vmlhh %v0, %v31, %v0 ++ vmlhh %v31, %v0, %v0 ++ vmlhh %v18, %v3, %v20 ++ ++#CHECK: vmlhw %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0xa2] ++#CHECK: vmlhw %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x12,0xa2] ++#CHECK: vmlhw %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0xa2] ++#CHECK: vmlhw %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0xa2] ++#CHECK: vmlhw %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x1a,0xa2] ++ ++ vmlhw %v0, %v0, %v0 ++ vmlhw %v0, %v0, %v31 ++ vmlhw %v0, %v31, %v0 ++ vmlhw %v31, %v0, %v0 ++ vmlhw %v18, %v3, %v20 ++ ++#CHECK: vmlob %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0xa5] ++#CHECK: vmlob %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0xa5] ++#CHECK: vmlob %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xa5] ++#CHECK: vmlob %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xa5] ++#CHECK: vmlob %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x0a,0xa5] ++ ++ vmlob %v0, %v0, %v0 ++ vmlob %v0, %v0, %v31 ++ vmlob %v0, %v31, %v0 ++ vmlob %v31, %v0, %v0 ++ vmlob %v18, %v3, %v20 ++ ++#CHECK: vmlof %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0xa5] ++#CHECK: vmlof %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x22,0xa5] ++#CHECK: vmlof %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xa5] ++#CHECK: vmlof %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xa5] ++#CHECK: vmlof %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x2a,0xa5] ++ ++ vmlof %v0, %v0, %v0 ++ vmlof %v0, %v0, %v31 ++ vmlof %v0, %v31, %v0 ++ vmlof %v31, %v0, %v0 ++ vmlof %v18, %v3, %v20 ++ ++#CHECK: vmloh %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0xa5] ++#CHECK: vmloh %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x12,0xa5] ++#CHECK: vmloh %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0xa5] ++#CHECK: vmloh %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0xa5] ++#CHECK: vmloh %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x1a,0xa5] ++ ++ vmloh %v0, %v0, %v0 ++ vmloh %v0, %v0, %v31 ++ vmloh %v0, %v31, %v0 ++ vmloh %v31, %v0, %v0 ++ vmloh %v18, %v3, %v20 ++ ++#CHECK: vmnb %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0xfe] ++#CHECK: vmnb %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0xfe] ++#CHECK: vmnb %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xfe] ++#CHECK: vmnb %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xfe] ++#CHECK: vmnb %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x0a,0xfe] ++ ++ vmnb %v0, %v0, %v0 ++ vmnb %v0, %v0, %v31 ++ vmnb %v0, %v31, %v0 ++ vmnb %v31, %v0, %v0 ++ vmnb %v18, %v3, %v20 ++ ++#CHECK: vmnf %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0xfe] ++#CHECK: vmnf %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x22,0xfe] ++#CHECK: vmnf %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xfe] ++#CHECK: vmnf %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xfe] ++#CHECK: vmnf %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x2a,0xfe] ++ ++ vmnf %v0, %v0, %v0 ++ vmnf %v0, %v0, %v31 ++ vmnf %v0, %v31, %v0 ++ vmnf %v31, %v0, %v0 ++ vmnf %v18, %v3, %v20 ++ ++#CHECK: vmng %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0xfe] ++#CHECK: vmng %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x32,0xfe] ++#CHECK: vmng %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xfe] ++#CHECK: vmng %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xfe] ++#CHECK: vmng %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x3a,0xfe] ++ ++ vmng %v0, %v0, %v0 ++ vmng %v0, %v0, %v31 ++ vmng %v0, %v31, %v0 ++ vmng %v31, %v0, %v0 ++ vmng %v18, %v3, %v20 ++ ++#CHECK: vmnh %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0xfe] ++#CHECK: vmnh %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x12,0xfe] ++#CHECK: vmnh %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0xfe] ++#CHECK: vmnh %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0xfe] ++#CHECK: vmnh %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x1a,0xfe] ++ ++ vmnh %v0, %v0, %v0 ++ vmnh %v0, %v0, %v31 ++ vmnh %v0, %v31, %v0 ++ vmnh %v31, %v0, %v0 ++ vmnh %v18, %v3, %v20 ++ ++#CHECK: vmnlb %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0xfc] ++#CHECK: vmnlb %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0xfc] ++#CHECK: vmnlb %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xfc] ++#CHECK: vmnlb %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xfc] ++#CHECK: vmnlb %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x0a,0xfc] ++ ++ vmnlb %v0, %v0, %v0 ++ vmnlb %v0, %v0, %v31 ++ vmnlb %v0, %v31, %v0 ++ vmnlb %v31, %v0, %v0 ++ vmnlb %v18, %v3, %v20 ++ ++#CHECK: vmnlf %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0xfc] ++#CHECK: vmnlf %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x22,0xfc] ++#CHECK: vmnlf %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xfc] ++#CHECK: vmnlf %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xfc] ++#CHECK: vmnlf %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x2a,0xfc] ++ ++ vmnlf %v0, %v0, %v0 ++ vmnlf %v0, %v0, %v31 ++ vmnlf %v0, %v31, %v0 ++ vmnlf %v31, %v0, %v0 ++ vmnlf %v18, %v3, %v20 ++ ++#CHECK: vmnlg %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0xfc] ++#CHECK: vmnlg %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x32,0xfc] ++#CHECK: vmnlg %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xfc] ++#CHECK: vmnlg %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xfc] ++#CHECK: vmnlg %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x3a,0xfc] ++ ++ vmnlg %v0, %v0, %v0 ++ vmnlg %v0, %v0, %v31 ++ vmnlg %v0, %v31, %v0 ++ vmnlg %v31, %v0, %v0 ++ vmnlg %v18, %v3, %v20 ++ ++#CHECK: vmnlh %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0xfc] ++#CHECK: vmnlh %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x12,0xfc] ++#CHECK: vmnlh %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0xfc] ++#CHECK: vmnlh %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0xfc] ++#CHECK: vmnlh %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x1a,0xfc] ++ ++ vmnlh %v0, %v0, %v0 ++ vmnlh %v0, %v0, %v31 ++ vmnlh %v0, %v31, %v0 ++ vmnlh %v31, %v0, %v0 ++ vmnlh %v18, %v3, %v20 ++ ++#CHECK: vmob %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0xa7] ++#CHECK: vmob %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0xa7] ++#CHECK: vmob %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xa7] ++#CHECK: vmob %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xa7] ++#CHECK: vmob %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x0a,0xa7] ++ ++ vmob %v0, %v0, %v0 ++ vmob %v0, %v0, %v31 ++ vmob %v0, %v31, %v0 ++ vmob %v31, %v0, %v0 ++ vmob %v18, %v3, %v20 ++ ++#CHECK: vmof %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0xa7] ++#CHECK: vmof %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x22,0xa7] ++#CHECK: vmof %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xa7] ++#CHECK: vmof %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xa7] ++#CHECK: vmof %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x2a,0xa7] ++ ++ vmof %v0, %v0, %v0 ++ vmof %v0, %v0, %v31 ++ vmof %v0, %v31, %v0 ++ vmof %v31, %v0, %v0 ++ vmof %v18, %v3, %v20 ++ ++#CHECK: vmoh %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0xa7] ++#CHECK: vmoh %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x12,0xa7] ++#CHECK: vmoh %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0xa7] ++#CHECK: vmoh %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0xa7] ++#CHECK: vmoh %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x1a,0xa7] ++ ++ vmoh %v0, %v0, %v0 ++ vmoh %v0, %v0, %v31 ++ vmoh %v0, %v31, %v0 ++ vmoh %v31, %v0, %v0 ++ vmoh %v18, %v3, %v20 ++ ++#CHECK: vmrhb %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x61] ++#CHECK: vmrhb %v0, %v0, %v15 # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x61] ++#CHECK: vmrhb %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0x61] ++#CHECK: vmrhb %v0, %v15, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x00,0x61] ++#CHECK: vmrhb %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x61] ++#CHECK: vmrhb %v15, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x61] ++#CHECK: vmrhb %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x61] ++#CHECK: vmrhb %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x0a,0x61] ++ ++ vmrhb %v0, %v0, %v0 ++ vmrhb %v0, %v0, %v15 ++ vmrhb %v0, %v0, %v31 ++ vmrhb %v0, %v15, %v0 ++ vmrhb %v0, %v31, %v0 ++ vmrhb %v15, %v0, %v0 ++ vmrhb %v31, %v0, %v0 ++ vmrhb %v18, %v3, %v20 ++ ++#CHECK: vmrhf %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0x61] ++#CHECK: vmrhf %v0, %v0, %v15 # encoding: [0xe7,0x00,0xf0,0x00,0x20,0x61] ++#CHECK: vmrhf %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x22,0x61] ++#CHECK: vmrhf %v0, %v15, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x20,0x61] ++#CHECK: vmrhf %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0x61] ++#CHECK: vmrhf %v15, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x20,0x61] ++#CHECK: vmrhf %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0x61] ++#CHECK: vmrhf %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x2a,0x61] ++ ++ vmrhf %v0, %v0, %v0 ++ vmrhf %v0, %v0, %v15 ++ vmrhf %v0, %v0, %v31 ++ vmrhf %v0, %v15, %v0 ++ vmrhf %v0, %v31, %v0 ++ vmrhf %v15, %v0, %v0 ++ vmrhf %v31, %v0, %v0 ++ vmrhf %v18, %v3, %v20 ++ ++#CHECK: vmrhg %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0x61] ++#CHECK: vmrhg %v0, %v0, %v15 # encoding: [0xe7,0x00,0xf0,0x00,0x30,0x61] ++#CHECK: vmrhg %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x32,0x61] ++#CHECK: vmrhg %v0, %v15, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x30,0x61] ++#CHECK: vmrhg %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0x61] ++#CHECK: vmrhg %v15, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x30,0x61] ++#CHECK: vmrhg %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0x61] ++#CHECK: vmrhg %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x3a,0x61] ++ ++ vmrhg %v0, %v0, %v0 ++ vmrhg %v0, %v0, %v15 ++ vmrhg %v0, %v0, %v31 ++ vmrhg %v0, %v15, %v0 ++ vmrhg %v0, %v31, %v0 ++ vmrhg %v15, %v0, %v0 ++ vmrhg %v31, %v0, %v0 ++ vmrhg %v18, %v3, %v20 ++ ++#CHECK: vmrhh %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0x61] ++#CHECK: vmrhh %v0, %v0, %v15 # encoding: [0xe7,0x00,0xf0,0x00,0x10,0x61] ++#CHECK: vmrhh %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x12,0x61] ++#CHECK: vmrhh %v0, %v15, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x10,0x61] ++#CHECK: vmrhh %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0x61] ++#CHECK: vmrhh %v15, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x10,0x61] ++#CHECK: vmrhh %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0x61] ++#CHECK: vmrhh %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x1a,0x61] ++ ++ vmrhh %v0, %v0, %v0 ++ vmrhh %v0, %v0, %v15 ++ vmrhh %v0, %v0, %v31 ++ vmrhh %v0, %v15, %v0 ++ vmrhh %v0, %v31, %v0 ++ vmrhh %v15, %v0, %v0 ++ vmrhh %v31, %v0, %v0 ++ vmrhh %v18, %v3, %v20 ++ ++#CHECK: vmrlb %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x60] ++#CHECK: vmrlb %v0, %v0, %v15 # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x60] ++#CHECK: vmrlb %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0x60] ++#CHECK: vmrlb %v0, %v15, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x00,0x60] ++#CHECK: vmrlb %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x60] ++#CHECK: vmrlb %v15, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x60] ++#CHECK: vmrlb %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x60] ++#CHECK: vmrlb %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x0a,0x60] ++ ++ vmrlb %v0, %v0, %v0 ++ vmrlb %v0, %v0, %v15 ++ vmrlb %v0, %v0, %v31 ++ vmrlb %v0, %v15, %v0 ++ vmrlb %v0, %v31, %v0 ++ vmrlb %v15, %v0, %v0 ++ vmrlb %v31, %v0, %v0 ++ vmrlb %v18, %v3, %v20 ++ ++#CHECK: vmrlf %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0x60] ++#CHECK: vmrlf %v0, %v0, %v15 # encoding: [0xe7,0x00,0xf0,0x00,0x20,0x60] ++#CHECK: vmrlf %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x22,0x60] ++#CHECK: vmrlf %v0, %v15, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x20,0x60] ++#CHECK: vmrlf %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0x60] ++#CHECK: vmrlf %v15, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x20,0x60] ++#CHECK: vmrlf %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0x60] ++#CHECK: vmrlf %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x2a,0x60] ++ ++ vmrlf %v0, %v0, %v0 ++ vmrlf %v0, %v0, %v15 ++ vmrlf %v0, %v0, %v31 ++ vmrlf %v0, %v15, %v0 ++ vmrlf %v0, %v31, %v0 ++ vmrlf %v15, %v0, %v0 ++ vmrlf %v31, %v0, %v0 ++ vmrlf %v18, %v3, %v20 ++ ++#CHECK: vmrlg %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0x60] ++#CHECK: vmrlg %v0, %v0, %v15 # encoding: [0xe7,0x00,0xf0,0x00,0x30,0x60] ++#CHECK: vmrlg %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x32,0x60] ++#CHECK: vmrlg %v0, %v15, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x30,0x60] ++#CHECK: vmrlg %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0x60] ++#CHECK: vmrlg %v15, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x30,0x60] ++#CHECK: vmrlg %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0x60] ++#CHECK: vmrlg %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x3a,0x60] ++ ++ vmrlg %v0, %v0, %v0 ++ vmrlg %v0, %v0, %v15 ++ vmrlg %v0, %v0, %v31 ++ vmrlg %v0, %v15, %v0 ++ vmrlg %v0, %v31, %v0 ++ vmrlg %v15, %v0, %v0 ++ vmrlg %v31, %v0, %v0 ++ vmrlg %v18, %v3, %v20 ++ ++#CHECK: vmrlh %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0x60] ++#CHECK: vmrlh %v0, %v0, %v15 # encoding: [0xe7,0x00,0xf0,0x00,0x10,0x60] ++#CHECK: vmrlh %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x12,0x60] ++#CHECK: vmrlh %v0, %v15, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x10,0x60] ++#CHECK: vmrlh %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0x60] ++#CHECK: vmrlh %v15, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x10,0x60] ++#CHECK: vmrlh %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0x60] ++#CHECK: vmrlh %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x1a,0x60] ++ ++ vmrlh %v0, %v0, %v0 ++ vmrlh %v0, %v0, %v15 ++ vmrlh %v0, %v0, %v31 ++ vmrlh %v0, %v15, %v0 ++ vmrlh %v0, %v31, %v0 ++ vmrlh %v15, %v0, %v0 ++ vmrlh %v31, %v0, %v0 ++ vmrlh %v18, %v3, %v20 ++ ++#CHECK: vmxb %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0xff] ++#CHECK: vmxb %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0xff] ++#CHECK: vmxb %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xff] ++#CHECK: vmxb %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xff] ++#CHECK: vmxb %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x0a,0xff] ++ ++ vmxb %v0, %v0, %v0 ++ vmxb %v0, %v0, %v31 ++ vmxb %v0, %v31, %v0 ++ vmxb %v31, %v0, %v0 ++ vmxb %v18, %v3, %v20 ++ ++#CHECK: vmxf %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0xff] ++#CHECK: vmxf %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x22,0xff] ++#CHECK: vmxf %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xff] ++#CHECK: vmxf %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xff] ++#CHECK: vmxf %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x2a,0xff] ++ ++ vmxf %v0, %v0, %v0 ++ vmxf %v0, %v0, %v31 ++ vmxf %v0, %v31, %v0 ++ vmxf %v31, %v0, %v0 ++ vmxf %v18, %v3, %v20 ++ ++#CHECK: vmxg %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0xff] ++#CHECK: vmxg %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x32,0xff] ++#CHECK: vmxg %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xff] ++#CHECK: vmxg %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xff] ++#CHECK: vmxg %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x3a,0xff] ++ ++ vmxg %v0, %v0, %v0 ++ vmxg %v0, %v0, %v31 ++ vmxg %v0, %v31, %v0 ++ vmxg %v31, %v0, %v0 ++ vmxg %v18, %v3, %v20 ++ ++#CHECK: vmxh %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0xff] ++#CHECK: vmxh %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x12,0xff] ++#CHECK: vmxh %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0xff] ++#CHECK: vmxh %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0xff] ++#CHECK: vmxh %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x1a,0xff] ++ ++ vmxh %v0, %v0, %v0 ++ vmxh %v0, %v0, %v31 ++ vmxh %v0, %v31, %v0 ++ vmxh %v31, %v0, %v0 ++ vmxh %v18, %v3, %v20 ++ ++#CHECK: vmxlb %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0xfd] ++#CHECK: vmxlb %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0xfd] ++#CHECK: vmxlb %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xfd] ++#CHECK: vmxlb %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xfd] ++#CHECK: vmxlb %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x0a,0xfd] ++ ++ vmxlb %v0, %v0, %v0 ++ vmxlb %v0, %v0, %v31 ++ vmxlb %v0, %v31, %v0 ++ vmxlb %v31, %v0, %v0 ++ vmxlb %v18, %v3, %v20 ++ ++#CHECK: vmxlf %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0xfd] ++#CHECK: vmxlf %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x22,0xfd] ++#CHECK: vmxlf %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xfd] ++#CHECK: vmxlf %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xfd] ++#CHECK: vmxlf %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x2a,0xfd] ++ ++ vmxlf %v0, %v0, %v0 ++ vmxlf %v0, %v0, %v31 ++ vmxlf %v0, %v31, %v0 ++ vmxlf %v31, %v0, %v0 ++ vmxlf %v18, %v3, %v20 ++ ++#CHECK: vmxlg %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0xfd] ++#CHECK: vmxlg %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x32,0xfd] ++#CHECK: vmxlg %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xfd] ++#CHECK: vmxlg %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xfd] ++#CHECK: vmxlg %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x3a,0xfd] ++ ++ vmxlg %v0, %v0, %v0 ++ vmxlg %v0, %v0, %v31 ++ vmxlg %v0, %v31, %v0 ++ vmxlg %v31, %v0, %v0 ++ vmxlg %v18, %v3, %v20 ++ ++#CHECK: vmxlh %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0xfd] ++#CHECK: vmxlh %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x12,0xfd] ++#CHECK: vmxlh %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0xfd] ++#CHECK: vmxlh %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0xfd] ++#CHECK: vmxlh %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x1a,0xfd] ++ ++ vmxlh %v0, %v0, %v0 ++ vmxlh %v0, %v0, %v31 ++ vmxlh %v0, %v31, %v0 ++ vmxlh %v31, %v0, %v0 ++ vmxlh %v18, %v3, %v20 ++ ++#CHECK: vn %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x68] ++#CHECK: vn %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0x68] ++#CHECK: vn %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x68] ++#CHECK: vn %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x68] ++#CHECK: vn %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x0a,0x68] ++ ++ vn %v0, %v0, %v0 ++ vn %v0, %v0, %v31 ++ vn %v0, %v31, %v0 ++ vn %v31, %v0, %v0 ++ vn %v18, %v3, %v20 ++ ++#CHECK: vnc %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x69] ++#CHECK: vnc %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0x69] ++#CHECK: vnc %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x69] ++#CHECK: vnc %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x69] ++#CHECK: vnc %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x0a,0x69] ++ ++ vnc %v0, %v0, %v0 ++ vnc %v0, %v0, %v31 ++ vnc %v0, %v31, %v0 ++ vnc %v31, %v0, %v0 ++ vnc %v18, %v3, %v20 ++ ++#CHECK: vno %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x6b] ++#CHECK: vno %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0x6b] ++#CHECK: vno %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x6b] ++#CHECK: vno %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x6b] ++#CHECK: vno %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x0a,0x6b] ++ ++ vno %v0, %v0, %v0 ++ vno %v0, %v0, %v31 ++ vno %v0, %v31, %v0 ++ vno %v31, %v0, %v0 ++ vno %v18, %v3, %v20 ++ ++#CHECK: vo %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x6a] ++#CHECK: vo %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0x6a] ++#CHECK: vo %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x6a] ++#CHECK: vo %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x6a] ++#CHECK: vo %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x0a,0x6a] ++ ++ vo %v0, %v0, %v0 ++ vo %v0, %v0, %v31 ++ vo %v0, %v31, %v0 ++ vo %v31, %v0, %v0 ++ vo %v18, %v3, %v20 ++ ++#CHECK: vone %v0 # encoding: [0xe7,0x00,0xff,0xff,0x00,0x44] ++#CHECK: vone %v15 # encoding: [0xe7,0xf0,0xff,0xff,0x00,0x44] ++#CHECK: vone %v22 # encoding: [0xe7,0x60,0xff,0xff,0x08,0x44] ++#CHECK: vone %v31 # encoding: [0xe7,0xf0,0xff,0xff,0x08,0x44] ++ ++ vone %v0 ++ vone %v15 ++ vone %v22 ++ vone %v31 ++ ++#CHECK: vpdi %v0, %v0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x84] ++#CHECK: vpdi %v0, %v0, %v0, 5 # encoding: [0xe7,0x00,0x00,0x00,0x50,0x84] ++#CHECK: vpdi %v0, %v0, %v31, 0 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0x84] ++#CHECK: vpdi %v0, %v31, %v0, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x84] ++#CHECK: vpdi %v31, %v0, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x84] ++#CHECK: vpdi %v13, %v17, %v21, 4 # encoding: [0xe7,0xd1,0x50,0x00,0x46,0x84] ++ ++ vpdi %v0, %v0, %v0, 0 ++ vpdi %v0, %v0, %v0, 5 ++ vpdi %v0, %v0, %v31, 0 ++ vpdi %v0, %v31, %v0, 0 ++ vpdi %v31, %v0, %v0, 0 ++ vpdi %v13, %v17, %v21, 4 ++ ++#CHECK: vperm %v0, %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x8c] ++#CHECK: vperm %v0, %v0, %v0, %v31 # encoding: [0xe7,0x00,0x00,0x00,0xf1,0x8c] ++#CHECK: vperm %v0, %v0, %v31, %v0 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0x8c] ++#CHECK: vperm %v0, %v31, %v0, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x8c] ++#CHECK: vperm %v31, %v0, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x8c] ++#CHECK: vperm %v13, %v17, %v21, %v25 # encoding: [0xe7,0xd1,0x50,0x00,0x97,0x8c] ++ ++ vperm %v0, %v0, %v0, %v0 ++ vperm %v0, %v0, %v0, %v31 ++ vperm %v0, %v0, %v31, %v0 ++ vperm %v0, %v31, %v0, %v0 ++ vperm %v31, %v0, %v0, %v0 ++ vperm %v13, %v17, %v21, %v25 ++ ++#CHECK: vpkf %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0x94] ++#CHECK: vpkf %v0, %v0, %v15 # encoding: [0xe7,0x00,0xf0,0x00,0x20,0x94] ++#CHECK: vpkf %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x22,0x94] ++#CHECK: vpkf %v0, %v15, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x20,0x94] ++#CHECK: vpkf %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0x94] ++#CHECK: vpkf %v15, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x20,0x94] ++#CHECK: vpkf %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0x94] ++#CHECK: vpkf %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x2a,0x94] ++ ++ vpkf %v0, %v0, %v0 ++ vpkf %v0, %v0, %v15 ++ vpkf %v0, %v0, %v31 ++ vpkf %v0, %v15, %v0 ++ vpkf %v0, %v31, %v0 ++ vpkf %v15, %v0, %v0 ++ vpkf %v31, %v0, %v0 ++ vpkf %v18, %v3, %v20 ++ ++#CHECK: vpkg %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0x94] ++#CHECK: vpkg %v0, %v0, %v15 # encoding: [0xe7,0x00,0xf0,0x00,0x30,0x94] ++#CHECK: vpkg %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x32,0x94] ++#CHECK: vpkg %v0, %v15, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x30,0x94] ++#CHECK: vpkg %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0x94] ++#CHECK: vpkg %v15, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x30,0x94] ++#CHECK: vpkg %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0x94] ++#CHECK: vpkg %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x3a,0x94] ++ ++ vpkg %v0, %v0, %v0 ++ vpkg %v0, %v0, %v15 ++ vpkg %v0, %v0, %v31 ++ vpkg %v0, %v15, %v0 ++ vpkg %v0, %v31, %v0 ++ vpkg %v15, %v0, %v0 ++ vpkg %v31, %v0, %v0 ++ vpkg %v18, %v3, %v20 ++ ++#CHECK: vpkh %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0x94] ++#CHECK: vpkh %v0, %v0, %v15 # encoding: [0xe7,0x00,0xf0,0x00,0x10,0x94] ++#CHECK: vpkh %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x12,0x94] ++#CHECK: vpkh %v0, %v15, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x10,0x94] ++#CHECK: vpkh %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0x94] ++#CHECK: vpkh %v15, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x10,0x94] ++#CHECK: vpkh %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0x94] ++#CHECK: vpkh %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x1a,0x94] ++ ++ vpkh %v0, %v0, %v0 ++ vpkh %v0, %v0, %v15 ++ vpkh %v0, %v0, %v31 ++ vpkh %v0, %v15, %v0 ++ vpkh %v0, %v31, %v0 ++ vpkh %v15, %v0, %v0 ++ vpkh %v31, %v0, %v0 ++ vpkh %v18, %v3, %v20 ++ ++#CHECK: vpklsf %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0x95] ++#CHECK: vpklsf %v0, %v0, %v15 # encoding: [0xe7,0x00,0xf0,0x00,0x20,0x95] ++#CHECK: vpklsf %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x22,0x95] ++#CHECK: vpklsf %v0, %v15, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x20,0x95] ++#CHECK: vpklsf %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0x95] ++#CHECK: vpklsf %v15, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x20,0x95] ++#CHECK: vpklsf %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0x95] ++#CHECK: vpklsf %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x2a,0x95] ++#CHECK: vpklsfs %v5, %v22, %v7 # encoding: [0xe7,0x56,0x70,0x10,0x24,0x95] ++ ++ vpklsf %v0, %v0, %v0 ++ vpklsf %v0, %v0, %v15 ++ vpklsf %v0, %v0, %v31 ++ vpklsf %v0, %v15, %v0 ++ vpklsf %v0, %v31, %v0 ++ vpklsf %v15, %v0, %v0 ++ vpklsf %v31, %v0, %v0 ++ vpklsf %v18, %v3, %v20 ++ vpklsfs %v5, %v22, %v7 ++ ++#CHECK: vpklsg %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0x95] ++#CHECK: vpklsg %v0, %v0, %v15 # encoding: [0xe7,0x00,0xf0,0x00,0x30,0x95] ++#CHECK: vpklsg %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x32,0x95] ++#CHECK: vpklsg %v0, %v15, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x30,0x95] ++#CHECK: vpklsg %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0x95] ++#CHECK: vpklsg %v15, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x30,0x95] ++#CHECK: vpklsg %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0x95] ++#CHECK: vpklsg %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x3a,0x95] ++#CHECK: vpklsgs %v5, %v22, %v7 # encoding: [0xe7,0x56,0x70,0x10,0x34,0x95] ++ ++ vpklsg %v0, %v0, %v0 ++ vpklsg %v0, %v0, %v15 ++ vpklsg %v0, %v0, %v31 ++ vpklsg %v0, %v15, %v0 ++ vpklsg %v0, %v31, %v0 ++ vpklsg %v15, %v0, %v0 ++ vpklsg %v31, %v0, %v0 ++ vpklsg %v18, %v3, %v20 ++ vpklsgs %v5, %v22, %v7 ++ ++#CHECK: vpklsh %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0x95] ++#CHECK: vpklsh %v0, %v0, %v15 # encoding: [0xe7,0x00,0xf0,0x00,0x10,0x95] ++#CHECK: vpklsh %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x12,0x95] ++#CHECK: vpklsh %v0, %v15, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x10,0x95] ++#CHECK: vpklsh %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0x95] ++#CHECK: vpklsh %v15, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x10,0x95] ++#CHECK: vpklsh %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0x95] ++#CHECK: vpklsh %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x1a,0x95] ++#CHECK: vpklshs %v5, %v22, %v7 # encoding: [0xe7,0x56,0x70,0x10,0x14,0x95] ++ ++ vpklsh %v0, %v0, %v0 ++ vpklsh %v0, %v0, %v15 ++ vpklsh %v0, %v0, %v31 ++ vpklsh %v0, %v15, %v0 ++ vpklsh %v0, %v31, %v0 ++ vpklsh %v15, %v0, %v0 ++ vpklsh %v31, %v0, %v0 ++ vpklsh %v18, %v3, %v20 ++ vpklshs %v5, %v22, %v7 ++ ++#CHECK: vpksf %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0x97] ++#CHECK: vpksf %v0, %v0, %v15 # encoding: [0xe7,0x00,0xf0,0x00,0x20,0x97] ++#CHECK: vpksf %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x22,0x97] ++#CHECK: vpksf %v0, %v15, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x20,0x97] ++#CHECK: vpksf %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0x97] ++#CHECK: vpksf %v15, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x20,0x97] ++#CHECK: vpksf %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0x97] ++#CHECK: vpksf %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x2a,0x97] ++#CHECK: vpksfs %v5, %v22, %v7 # encoding: [0xe7,0x56,0x70,0x10,0x24,0x97] ++ ++ vpksf %v0, %v0, %v0 ++ vpksf %v0, %v0, %v15 ++ vpksf %v0, %v0, %v31 ++ vpksf %v0, %v15, %v0 ++ vpksf %v0, %v31, %v0 ++ vpksf %v15, %v0, %v0 ++ vpksf %v31, %v0, %v0 ++ vpksf %v18, %v3, %v20 ++ vpksfs %v5, %v22, %v7 ++ ++#CHECK: vpksg %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0x97] ++#CHECK: vpksg %v0, %v0, %v15 # encoding: [0xe7,0x00,0xf0,0x00,0x30,0x97] ++#CHECK: vpksg %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x32,0x97] ++#CHECK: vpksg %v0, %v15, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x30,0x97] ++#CHECK: vpksg %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0x97] ++#CHECK: vpksg %v15, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x30,0x97] ++#CHECK: vpksg %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0x97] ++#CHECK: vpksg %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x3a,0x97] ++#CHECK: vpksgs %v5, %v22, %v7 # encoding: [0xe7,0x56,0x70,0x10,0x34,0x97] ++ ++ vpksg %v0, %v0, %v0 ++ vpksg %v0, %v0, %v15 ++ vpksg %v0, %v0, %v31 ++ vpksg %v0, %v15, %v0 ++ vpksg %v0, %v31, %v0 ++ vpksg %v15, %v0, %v0 ++ vpksg %v31, %v0, %v0 ++ vpksg %v18, %v3, %v20 ++ vpksgs %v5, %v22, %v7 ++ ++#CHECK: vpksh %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0x97] ++#CHECK: vpksh %v0, %v0, %v15 # encoding: [0xe7,0x00,0xf0,0x00,0x10,0x97] ++#CHECK: vpksh %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x12,0x97] ++#CHECK: vpksh %v0, %v15, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x10,0x97] ++#CHECK: vpksh %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0x97] ++#CHECK: vpksh %v15, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x10,0x97] ++#CHECK: vpksh %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0x97] ++#CHECK: vpksh %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x1a,0x97] ++#CHECK: vpkshs %v5, %v22, %v7 # encoding: [0xe7,0x56,0x70,0x10,0x14,0x97] ++ ++ vpksh %v0, %v0, %v0 ++ vpksh %v0, %v0, %v15 ++ vpksh %v0, %v0, %v31 ++ vpksh %v0, %v15, %v0 ++ vpksh %v0, %v31, %v0 ++ vpksh %v15, %v0, %v0 ++ vpksh %v31, %v0, %v0 ++ vpksh %v18, %v3, %v20 ++ vpkshs %v5, %v22, %v7 ++ ++#CHECK: vpopct %v0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x50] ++#CHECK: vpopct %v0, %v15, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x00,0x50] ++#CHECK: vpopct %v0, %v31, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x50] ++#CHECK: vpopct %v15, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x50] ++#CHECK: vpopct %v31, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x50] ++#CHECK: vpopct %v14, %v17, 0 # encoding: [0xe7,0xe1,0x00,0x00,0x04,0x50] ++ ++ vpopct %v0, %v0, 0 ++ vpopct %v0, %v15, 0 ++ vpopct %v0, %v31, 0 ++ vpopct %v15, %v0, 0 ++ vpopct %v31, %v0, 0 ++ vpopct %v14, %v17, 0 ++ ++#CHECK: vrepb %v0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x4d] ++#CHECK: vrepb %v0, %v0, 65535 # encoding: [0xe7,0x00,0xff,0xff,0x00,0x4d] ++#CHECK: vrepb %v0, %v15, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x00,0x4d] ++#CHECK: vrepb %v0, %v31, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x4d] ++#CHECK: vrepb %v15, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x4d] ++#CHECK: vrepb %v31, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x4d] ++#CHECK: vrepb %v4, %v21, 26505 # encoding: [0xe7,0x45,0x67,0x89,0x04,0x4d] ++ ++ vrepb %v0, %v0, 0 ++ vrepb %v0, %v0, 65535 ++ vrepb %v0, %v15, 0 ++ vrepb %v0, %v31, 0 ++ vrepb %v15, %v0, 0 ++ vrepb %v31, %v0, 0 ++ vrepb %v4, %v21, 0x6789 ++ ++#CHECK: vrepf %v0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0x4d] ++#CHECK: vrepf %v0, %v0, 65535 # encoding: [0xe7,0x00,0xff,0xff,0x20,0x4d] ++#CHECK: vrepf %v0, %v15, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x20,0x4d] ++#CHECK: vrepf %v0, %v31, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0x4d] ++#CHECK: vrepf %v15, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x20,0x4d] ++#CHECK: vrepf %v31, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0x4d] ++#CHECK: vrepf %v4, %v21, 26505 # encoding: [0xe7,0x45,0x67,0x89,0x24,0x4d] ++ ++ vrepf %v0, %v0, 0 ++ vrepf %v0, %v0, 65535 ++ vrepf %v0, %v15, 0 ++ vrepf %v0, %v31, 0 ++ vrepf %v15, %v0, 0 ++ vrepf %v31, %v0, 0 ++ vrepf %v4, %v21, 0x6789 ++ ++#CHECK: vrepg %v0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0x4d] ++#CHECK: vrepg %v0, %v0, 65535 # encoding: [0xe7,0x00,0xff,0xff,0x30,0x4d] ++#CHECK: vrepg %v0, %v15, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x30,0x4d] ++#CHECK: vrepg %v0, %v31, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0x4d] ++#CHECK: vrepg %v15, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x30,0x4d] ++#CHECK: vrepg %v31, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0x4d] ++#CHECK: vrepg %v4, %v21, 26505 # encoding: [0xe7,0x45,0x67,0x89,0x34,0x4d] ++ ++ vrepg %v0, %v0, 0 ++ vrepg %v0, %v0, 65535 ++ vrepg %v0, %v15, 0 ++ vrepg %v0, %v31, 0 ++ vrepg %v15, %v0, 0 ++ vrepg %v31, %v0, 0 ++ vrepg %v4, %v21, 0x6789 ++ ++#CHECK: vreph %v0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0x4d] ++#CHECK: vreph %v0, %v0, 65535 # encoding: [0xe7,0x00,0xff,0xff,0x10,0x4d] ++#CHECK: vreph %v0, %v15, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x10,0x4d] ++#CHECK: vreph %v0, %v31, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0x4d] ++#CHECK: vreph %v15, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x10,0x4d] ++#CHECK: vreph %v31, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0x4d] ++#CHECK: vreph %v4, %v21, 26505 # encoding: [0xe7,0x45,0x67,0x89,0x14,0x4d] ++ ++ vreph %v0, %v0, 0 ++ vreph %v0, %v0, 65535 ++ vreph %v0, %v15, 0 ++ vreph %v0, %v31, 0 ++ vreph %v15, %v0, 0 ++ vreph %v31, %v0, 0 ++ vreph %v4, %v21, 0x6789 ++ ++#CHECK: vrepib %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x45] ++#CHECK: vrepib %v0, -32768 # encoding: [0xe7,0x00,0x80,0x00,0x00,0x45] ++#CHECK: vrepib %v0, 32767 # encoding: [0xe7,0x00,0x7f,0xff,0x00,0x45] ++#CHECK: vrepib %v15, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x45] ++#CHECK: vrepib %v31, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x45] ++#CHECK: vrepib %v18, 13398 # encoding: [0xe7,0x20,0x34,0x56,0x08,0x45] ++ ++ vrepib %v0, 0 ++ vrepib %v0, -32768 ++ vrepib %v0, 32767 ++ vrepib %v15, 0 ++ vrepib %v31, 0 ++ vrepib %v18, 0x3456 ++ ++#CHECK: vrepif %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0x45] ++#CHECK: vrepif %v0, -32768 # encoding: [0xe7,0x00,0x80,0x00,0x20,0x45] ++#CHECK: vrepif %v0, 32767 # encoding: [0xe7,0x00,0x7f,0xff,0x20,0x45] ++#CHECK: vrepif %v15, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x20,0x45] ++#CHECK: vrepif %v31, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0x45] ++#CHECK: vrepif %v18, 13398 # encoding: [0xe7,0x20,0x34,0x56,0x28,0x45] ++ ++ vrepif %v0, 0 ++ vrepif %v0, -32768 ++ vrepif %v0, 32767 ++ vrepif %v15, 0 ++ vrepif %v31, 0 ++ vrepif %v18, 0x3456 ++ ++#CHECK: vrepig %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0x45] ++#CHECK: vrepig %v0, -32768 # encoding: [0xe7,0x00,0x80,0x00,0x30,0x45] ++#CHECK: vrepig %v0, 32767 # encoding: [0xe7,0x00,0x7f,0xff,0x30,0x45] ++#CHECK: vrepig %v15, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x30,0x45] ++#CHECK: vrepig %v31, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0x45] ++#CHECK: vrepig %v18, 13398 # encoding: [0xe7,0x20,0x34,0x56,0x38,0x45] ++ ++ vrepig %v0, 0 ++ vrepig %v0, -32768 ++ vrepig %v0, 32767 ++ vrepig %v15, 0 ++ vrepig %v31, 0 ++ vrepig %v18, 0x3456 ++ ++#CHECK: vrepih %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0x45] ++#CHECK: vrepih %v0, -32768 # encoding: [0xe7,0x00,0x80,0x00,0x10,0x45] ++#CHECK: vrepih %v0, 32767 # encoding: [0xe7,0x00,0x7f,0xff,0x10,0x45] ++#CHECK: vrepih %v15, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x10,0x45] ++#CHECK: vrepih %v31, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0x45] ++#CHECK: vrepih %v18, 13398 # encoding: [0xe7,0x20,0x34,0x56,0x18,0x45] ++ ++ vrepih %v0, 0 ++ vrepih %v0, -32768 ++ vrepih %v0, 32767 ++ vrepih %v15, 0 ++ vrepih %v31, 0 ++ vrepih %v18, 0x3456 ++ ++#CHECK: vsb %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0xf7] ++#CHECK: vsb %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0xf7] ++#CHECK: vsb %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xf7] ++#CHECK: vsb %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xf7] ++#CHECK: vsb %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x0a,0xf7] ++ ++ vsb %v0, %v0, %v0 ++ vsb %v0, %v0, %v31 ++ vsb %v0, %v31, %v0 ++ vsb %v31, %v0, %v0 ++ vsb %v18, %v3, %v20 ++ ++#CHECK: vsbcbiq %v0, %v0, %v0, %v0 # encoding: [0xe7,0x00,0x04,0x00,0x00,0xbd] ++#CHECK: vsbcbiq %v0, %v0, %v0, %v31 # encoding: [0xe7,0x00,0x04,0x00,0xf1,0xbd] ++#CHECK: vsbcbiq %v0, %v0, %v31, %v0 # encoding: [0xe7,0x00,0xf4,0x00,0x02,0xbd] ++#CHECK: vsbcbiq %v0, %v31, %v0, %v0 # encoding: [0xe7,0x0f,0x04,0x00,0x04,0xbd] ++#CHECK: vsbcbiq %v31, %v0, %v0, %v0 # encoding: [0xe7,0xf0,0x04,0x00,0x08,0xbd] ++#CHECK: vsbcbiq %v13, %v17, %v21, %v25 # encoding: [0xe7,0xd1,0x54,0x00,0x97,0xbd] ++ ++ vsbcbiq %v0, %v0, %v0, %v0 ++ vsbcbiq %v0, %v0, %v0, %v31 ++ vsbcbiq %v0, %v0, %v31, %v0 ++ vsbcbiq %v0, %v31, %v0, %v0 ++ vsbcbiq %v31, %v0, %v0, %v0 ++ vsbcbiq %v13, %v17, %v21, %v25 ++ ++#CHECK: vsbiq %v0, %v0, %v0, %v0 # encoding: [0xe7,0x00,0x04,0x00,0x00,0xbf] ++#CHECK: vsbiq %v0, %v0, %v0, %v31 # encoding: [0xe7,0x00,0x04,0x00,0xf1,0xbf] ++#CHECK: vsbiq %v0, %v0, %v31, %v0 # encoding: [0xe7,0x00,0xf4,0x00,0x02,0xbf] ++#CHECK: vsbiq %v0, %v31, %v0, %v0 # encoding: [0xe7,0x0f,0x04,0x00,0x04,0xbf] ++#CHECK: vsbiq %v31, %v0, %v0, %v0 # encoding: [0xe7,0xf0,0x04,0x00,0x08,0xbf] ++#CHECK: vsbiq %v13, %v17, %v21, %v25 # encoding: [0xe7,0xd1,0x54,0x00,0x97,0xbf] ++ ++ vsbiq %v0, %v0, %v0, %v0 ++ vsbiq %v0, %v0, %v0, %v31 ++ vsbiq %v0, %v0, %v31, %v0 ++ vsbiq %v0, %v31, %v0, %v0 ++ vsbiq %v31, %v0, %v0, %v0 ++ vsbiq %v13, %v17, %v21, %v25 ++ ++#CHECK: vscbib %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0xf5] ++#CHECK: vscbib %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0xf5] ++#CHECK: vscbib %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xf5] ++#CHECK: vscbib %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xf5] ++#CHECK: vscbib %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x0a,0xf5] ++ ++ vscbib %v0, %v0, %v0 ++ vscbib %v0, %v0, %v31 ++ vscbib %v0, %v31, %v0 ++ vscbib %v31, %v0, %v0 ++ vscbib %v18, %v3, %v20 ++ ++#CHECK: vscbif %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0xf5] ++#CHECK: vscbif %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x22,0xf5] ++#CHECK: vscbif %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xf5] ++#CHECK: vscbif %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xf5] ++#CHECK: vscbif %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x2a,0xf5] ++ ++ vscbif %v0, %v0, %v0 ++ vscbif %v0, %v0, %v31 ++ vscbif %v0, %v31, %v0 ++ vscbif %v31, %v0, %v0 ++ vscbif %v18, %v3, %v20 ++ ++#CHECK: vscbig %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0xf5] ++#CHECK: vscbig %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x32,0xf5] ++#CHECK: vscbig %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xf5] ++#CHECK: vscbig %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xf5] ++#CHECK: vscbig %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x3a,0xf5] ++ ++ vscbig %v0, %v0, %v0 ++ vscbig %v0, %v0, %v31 ++ vscbig %v0, %v31, %v0 ++ vscbig %v31, %v0, %v0 ++ vscbig %v18, %v3, %v20 ++ ++#CHECK: vscbih %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0xf5] ++#CHECK: vscbih %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x12,0xf5] ++#CHECK: vscbih %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0xf5] ++#CHECK: vscbih %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0xf5] ++#CHECK: vscbih %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x1a,0xf5] ++ ++ vscbih %v0, %v0, %v0 ++ vscbih %v0, %v0, %v31 ++ vscbih %v0, %v31, %v0 ++ vscbih %v31, %v0, %v0 ++ vscbih %v18, %v3, %v20 ++ ++#CHECK: vscbiq %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x40,0xf5] ++#CHECK: vscbiq %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x42,0xf5] ++#CHECK: vscbiq %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x44,0xf5] ++#CHECK: vscbiq %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x48,0xf5] ++#CHECK: vscbiq %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x4a,0xf5] ++ ++ vscbiq %v0, %v0, %v0 ++ vscbiq %v0, %v0, %v31 ++ vscbiq %v0, %v31, %v0 ++ vscbiq %v31, %v0, %v0 ++ vscbiq %v18, %v3, %v20 ++ ++#CHECK: vscef %v0, 0(%v0), 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x1b] ++#CHECK: vscef %v0, 0(%v0,%r1), 0 # encoding: [0xe7,0x00,0x10,0x00,0x00,0x1b] ++#CHECK: vscef %v0, 0(%v0,%r1), 3 # encoding: [0xe7,0x00,0x10,0x00,0x30,0x1b] ++#CHECK: vscef %v0, 0(%v0,%r15), 0 # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x1b] ++#CHECK: vscef %v0, 0(%v15,%r1), 0 # encoding: [0xe7,0x0f,0x10,0x00,0x00,0x1b] ++#CHECK: vscef %v0, 0(%v31,%r1), 0 # encoding: [0xe7,0x0f,0x10,0x00,0x04,0x1b] ++#CHECK: vscef %v0, 4095(%v0,%r1), 0 # encoding: [0xe7,0x00,0x1f,0xff,0x00,0x1b] ++#CHECK: vscef %v15, 0(%v0,%r1), 0 # encoding: [0xe7,0xf0,0x10,0x00,0x00,0x1b] ++#CHECK: vscef %v31, 0(%v0,%r1), 0 # encoding: [0xe7,0xf0,0x10,0x00,0x08,0x1b] ++#CHECK: vscef %v10, 1000(%v19,%r7), 1 # encoding: [0xe7,0xa3,0x73,0xe8,0x14,0x1b] ++ ++ vscef %v0, 0(%v0), 0 ++ vscef %v0, 0(%v0,%r1), 0 ++ vscef %v0, 0(%v0,%r1), 3 ++ vscef %v0, 0(%v0,%r15), 0 ++ vscef %v0, 0(%v15,%r1), 0 ++ vscef %v0, 0(%v31,%r1), 0 ++ vscef %v0, 4095(%v0, %r1), 0 ++ vscef %v15, 0(%v0,%r1), 0 ++ vscef %v31, 0(%v0,%r1), 0 ++ vscef %v10, 1000(%v19,%r7), 1 ++ ++#CHECK: vsceg %v0, 0(%v0), 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x1a] ++#CHECK: vsceg %v0, 0(%v0,%r1), 0 # encoding: [0xe7,0x00,0x10,0x00,0x00,0x1a] ++#CHECK: vsceg %v0, 0(%v0,%r1), 1 # encoding: [0xe7,0x00,0x10,0x00,0x10,0x1a] ++#CHECK: vsceg %v0, 0(%v0,%r15), 0 # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x1a] ++#CHECK: vsceg %v0, 0(%v15,%r1), 0 # encoding: [0xe7,0x0f,0x10,0x00,0x00,0x1a] ++#CHECK: vsceg %v0, 0(%v31,%r1), 0 # encoding: [0xe7,0x0f,0x10,0x00,0x04,0x1a] ++#CHECK: vsceg %v0, 4095(%v0,%r1), 0 # encoding: [0xe7,0x00,0x1f,0xff,0x00,0x1a] ++#CHECK: vsceg %v15, 0(%v0,%r1), 0 # encoding: [0xe7,0xf0,0x10,0x00,0x00,0x1a] ++#CHECK: vsceg %v31, 0(%v0,%r1), 0 # encoding: [0xe7,0xf0,0x10,0x00,0x08,0x1a] ++#CHECK: vsceg %v10, 1000(%v19,%r7), 1 # encoding: [0xe7,0xa3,0x73,0xe8,0x14,0x1a] ++ ++ vsceg %v0, 0(%v0), 0 ++ vsceg %v0, 0(%v0,%r1), 0 ++ vsceg %v0, 0(%v0,%r1), 1 ++ vsceg %v0, 0(%v0,%r15), 0 ++ vsceg %v0, 0(%v15,%r1), 0 ++ vsceg %v0, 0(%v31,%r1), 0 ++ vsceg %v0, 4095(%v0,%r1), 0 ++ vsceg %v15, 0(%v0,%r1), 0 ++ vsceg %v31, 0(%v0,%r1), 0 ++ vsceg %v10, 1000(%v19,%r7), 1 ++ ++#CHECK: vsel %v0, %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x8d] ++#CHECK: vsel %v0, %v0, %v0, %v31 # encoding: [0xe7,0x00,0x00,0x00,0xf1,0x8d] ++#CHECK: vsel %v0, %v0, %v31, %v0 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0x8d] ++#CHECK: vsel %v0, %v31, %v0, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x8d] ++#CHECK: vsel %v31, %v0, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x8d] ++#CHECK: vsel %v13, %v17, %v21, %v25 # encoding: [0xe7,0xd1,0x50,0x00,0x97,0x8d] ++ ++ vsel %v0, %v0, %v0, %v0 ++ vsel %v0, %v0, %v0, %v31 ++ vsel %v0, %v0, %v31, %v0 ++ vsel %v0, %v31, %v0, %v0 ++ vsel %v31, %v0, %v0, %v0 ++ vsel %v13, %v17, %v21, %v25 ++ ++#CHECK: vsegb %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x5f] ++#CHECK: vsegb %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x00,0x5f] ++#CHECK: vsegb %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x5f] ++#CHECK: vsegb %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x5f] ++#CHECK: vsegb %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x5f] ++#CHECK: vsegb %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x04,0x5f] ++ ++ vsegb %v0, %v0 ++ vsegb %v0, %v15 ++ vsegb %v0, %v31 ++ vsegb %v15, %v0 ++ vsegb %v31, %v0 ++ vsegb %v14, %v17 ++ ++#CHECK: vsegf %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0x5f] ++#CHECK: vsegf %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x20,0x5f] ++#CHECK: vsegf %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0x5f] ++#CHECK: vsegf %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x20,0x5f] ++#CHECK: vsegf %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0x5f] ++#CHECK: vsegf %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x24,0x5f] ++ ++ vsegf %v0, %v0 ++ vsegf %v0, %v15 ++ vsegf %v0, %v31 ++ vsegf %v15, %v0 ++ vsegf %v31, %v0 ++ vsegf %v14, %v17 ++ ++#CHECK: vsegh %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0x5f] ++#CHECK: vsegh %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x10,0x5f] ++#CHECK: vsegh %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0x5f] ++#CHECK: vsegh %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x10,0x5f] ++#CHECK: vsegh %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0x5f] ++#CHECK: vsegh %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x14,0x5f] ++ ++ vsegh %v0, %v0 ++ vsegh %v0, %v15 ++ vsegh %v0, %v31 ++ vsegh %v15, %v0 ++ vsegh %v31, %v0 ++ vsegh %v14, %v17 ++ ++#CHECK: vsf %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0xf7] ++#CHECK: vsf %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x22,0xf7] ++#CHECK: vsf %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xf7] ++#CHECK: vsf %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xf7] ++#CHECK: vsf %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x2a,0xf7] ++ ++ vsf %v0, %v0, %v0 ++ vsf %v0, %v0, %v31 ++ vsf %v0, %v31, %v0 ++ vsf %v31, %v0, %v0 ++ vsf %v18, %v3, %v20 ++ ++#CHECK: vsg %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0xf7] ++#CHECK: vsg %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x32,0xf7] ++#CHECK: vsg %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xf7] ++#CHECK: vsg %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xf7] ++#CHECK: vsg %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x3a,0xf7] ++ ++ vsg %v0, %v0, %v0 ++ vsg %v0, %v0, %v31 ++ vsg %v0, %v31, %v0 ++ vsg %v31, %v0, %v0 ++ vsg %v18, %v3, %v20 ++ ++#CHECK: vsh %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0xf7] ++#CHECK: vsh %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x12,0xf7] ++#CHECK: vsh %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0xf7] ++#CHECK: vsh %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0xf7] ++#CHECK: vsh %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x1a,0xf7] ++ ++ vsh %v0, %v0, %v0 ++ vsh %v0, %v0, %v31 ++ vsh %v0, %v31, %v0 ++ vsh %v31, %v0, %v0 ++ vsh %v18, %v3, %v20 ++ ++#CHECK: vsl %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x74] ++#CHECK: vsl %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0x74] ++#CHECK: vsl %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x74] ++#CHECK: vsl %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x74] ++#CHECK: vsl %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x0a,0x74] ++ ++ vsl %v0, %v0, %v0 ++ vsl %v0, %v0, %v31 ++ vsl %v0, %v31, %v0 ++ vsl %v31, %v0, %v0 ++ vsl %v18, %v3, %v20 ++ ++#CHECK: vslb %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x75] ++#CHECK: vslb %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0x75] ++#CHECK: vslb %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x75] ++#CHECK: vslb %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x75] ++#CHECK: vslb %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x0a,0x75] ++ ++ vslb %v0, %v0, %v0 ++ vslb %v0, %v0, %v31 ++ vslb %v0, %v31, %v0 ++ vslb %v31, %v0, %v0 ++ vslb %v18, %v3, %v20 ++ ++#CHECK: vsldb %v0, %v0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x77] ++#CHECK: vsldb %v0, %v0, %v0, 255 # encoding: [0xe7,0x00,0x00,0xff,0x00,0x77] ++#CHECK: vsldb %v0, %v0, %v31, 0 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0x77] ++#CHECK: vsldb %v0, %v31, %v0, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x77] ++#CHECK: vsldb %v31, %v0, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x77] ++#CHECK: vsldb %v13, %v17, %v21, 121 # encoding: [0xe7,0xd1,0x50,0x79,0x06,0x77] ++ ++ vsldb %v0, %v0, %v0, 0 ++ vsldb %v0, %v0, %v0, 255 ++ vsldb %v0, %v0, %v31, 0 ++ vsldb %v0, %v31, %v0, 0 ++ vsldb %v31, %v0, %v0, 0 ++ vsldb %v13, %v17, %v21, 0x79 ++ ++#CHECK: vsq %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x40,0xf7] ++#CHECK: vsq %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x42,0xf7] ++#CHECK: vsq %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x44,0xf7] ++#CHECK: vsq %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x48,0xf7] ++#CHECK: vsq %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x4a,0xf7] ++ ++ vsq %v0, %v0, %v0 ++ vsq %v0, %v0, %v31 ++ vsq %v0, %v31, %v0 ++ vsq %v31, %v0, %v0 ++ vsq %v18, %v3, %v20 ++ ++#CHECK: vsra %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x7e] ++#CHECK: vsra %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0x7e] ++#CHECK: vsra %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x7e] ++#CHECK: vsra %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x7e] ++#CHECK: vsra %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x0a,0x7e] ++ ++ vsra %v0, %v0, %v0 ++ vsra %v0, %v0, %v31 ++ vsra %v0, %v31, %v0 ++ vsra %v31, %v0, %v0 ++ vsra %v18, %v3, %v20 ++ ++#CHECK: vsrab %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x7f] ++#CHECK: vsrab %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0x7f] ++#CHECK: vsrab %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x7f] ++#CHECK: vsrab %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x7f] ++#CHECK: vsrab %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x0a,0x7f] ++ ++ vsrab %v0, %v0, %v0 ++ vsrab %v0, %v0, %v31 ++ vsrab %v0, %v31, %v0 ++ vsrab %v31, %v0, %v0 ++ vsrab %v18, %v3, %v20 ++ ++#CHECK: vsrl %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x7c] ++#CHECK: vsrl %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0x7c] ++#CHECK: vsrl %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x7c] ++#CHECK: vsrl %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x7c] ++#CHECK: vsrl %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x0a,0x7c] ++ ++ vsrl %v0, %v0, %v0 ++ vsrl %v0, %v0, %v31 ++ vsrl %v0, %v31, %v0 ++ vsrl %v31, %v0, %v0 ++ vsrl %v18, %v3, %v20 ++ ++#CHECK: vsrlb %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x7d] ++#CHECK: vsrlb %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0x7d] ++#CHECK: vsrlb %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x7d] ++#CHECK: vsrlb %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x7d] ++#CHECK: vsrlb %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x0a,0x7d] ++ ++ vsrlb %v0, %v0, %v0 ++ vsrlb %v0, %v0, %v31 ++ vsrlb %v0, %v31, %v0 ++ vsrlb %v31, %v0, %v0 ++ vsrlb %v18, %v3, %v20 ++ ++#CHECK: vst %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x0e] ++#CHECK: vst %v0, 4095 # encoding: [0xe7,0x00,0x0f,0xff,0x00,0x0e] ++#CHECK: vst %v0, 0(%r15) # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x0e] ++#CHECK: vst %v0, 0(%r15,%r1) # encoding: [0xe7,0x0f,0x10,0x00,0x00,0x0e] ++#CHECK: vst %v15, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x0e] ++#CHECK: vst %v31, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x0e] ++#CHECK: vst %v18, 1383(%r3,%r4) # encoding: [0xe7,0x23,0x45,0x67,0x08,0x0e] ++ ++ vst %v0, 0 ++ vst %v0, 4095 ++ vst %v0, 0(%r15) ++ vst %v0, 0(%r15,%r1) ++ vst %v15, 0 ++ vst %v31, 0 ++ vst %v18, 0x567(%r3,%r4) ++ ++#CHECK: vsteb %v0, 0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x08] ++#CHECK: vsteb %v0, 0, 15 # encoding: [0xe7,0x00,0x00,0x00,0xf0,0x08] ++#CHECK: vsteb %v0, 4095, 0 # encoding: [0xe7,0x00,0x0f,0xff,0x00,0x08] ++#CHECK: vsteb %v0, 0(%r15), 0 # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x08] ++#CHECK: vsteb %v0, 0(%r15,%r1), 0 # encoding: [0xe7,0x0f,0x10,0x00,0x00,0x08] ++#CHECK: vsteb %v15, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x08] ++#CHECK: vsteb %v31, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x08] ++#CHECK: vsteb %v18, 1383(%r3,%r4), 8 # encoding: [0xe7,0x23,0x45,0x67,0x88,0x08] ++ ++ vsteb %v0, 0, 0 ++ vsteb %v0, 0, 15 ++ vsteb %v0, 4095, 0 ++ vsteb %v0, 0(%r15), 0 ++ vsteb %v0, 0(%r15,%r1), 0 ++ vsteb %v15, 0, 0 ++ vsteb %v31, 0, 0 ++ vsteb %v18, 1383(%r3,%r4), 8 ++ ++#CHECK: vstef %v0, 0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x0b] ++#CHECK: vstef %v0, 0, 3 # encoding: [0xe7,0x00,0x00,0x00,0x30,0x0b] ++#CHECK: vstef %v0, 4095, 0 # encoding: [0xe7,0x00,0x0f,0xff,0x00,0x0b] ++#CHECK: vstef %v0, 0(%r15), 0 # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x0b] ++#CHECK: vstef %v0, 0(%r15,%r1), 0 # encoding: [0xe7,0x0f,0x10,0x00,0x00,0x0b] ++#CHECK: vstef %v15, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x0b] ++#CHECK: vstef %v31, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x0b] ++#CHECK: vstef %v18, 1383(%r3,%r4), 2 # encoding: [0xe7,0x23,0x45,0x67,0x28,0x0b] ++ ++ vstef %v0, 0, 0 ++ vstef %v0, 0, 3 ++ vstef %v0, 4095, 0 ++ vstef %v0, 0(%r15), 0 ++ vstef %v0, 0(%r15,%r1), 0 ++ vstef %v15, 0, 0 ++ vstef %v31, 0, 0 ++ vstef %v18, 1383(%r3,%r4), 2 ++ ++#CHECK: vsteg %v0, 0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x0a] ++#CHECK: vsteg %v0, 0, 1 # encoding: [0xe7,0x00,0x00,0x00,0x10,0x0a] ++#CHECK: vsteg %v0, 4095, 0 # encoding: [0xe7,0x00,0x0f,0xff,0x00,0x0a] ++#CHECK: vsteg %v0, 0(%r15), 0 # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x0a] ++#CHECK: vsteg %v0, 0(%r15,%r1), 0 # encoding: [0xe7,0x0f,0x10,0x00,0x00,0x0a] ++#CHECK: vsteg %v15, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x0a] ++#CHECK: vsteg %v31, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x0a] ++#CHECK: vsteg %v18, 1383(%r3,%r4), 1 # encoding: [0xe7,0x23,0x45,0x67,0x18,0x0a] ++ ++ vsteg %v0, 0, 0 ++ vsteg %v0, 0, 1 ++ vsteg %v0, 4095, 0 ++ vsteg %v0, 0(%r15), 0 ++ vsteg %v0, 0(%r15,%r1), 0 ++ vsteg %v15, 0, 0 ++ vsteg %v31, 0, 0 ++ vsteg %v18, 1383(%r3,%r4), 1 ++ ++#CHECK: vsteh %v0, 0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x09] ++#CHECK: vsteh %v0, 0, 7 # encoding: [0xe7,0x00,0x00,0x00,0x70,0x09] ++#CHECK: vsteh %v0, 4095, 0 # encoding: [0xe7,0x00,0x0f,0xff,0x00,0x09] ++#CHECK: vsteh %v0, 0(%r15), 0 # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x09] ++#CHECK: vsteh %v0, 0(%r15,%r1), 0 # encoding: [0xe7,0x0f,0x10,0x00,0x00,0x09] ++#CHECK: vsteh %v15, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x09] ++#CHECK: vsteh %v31, 0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x09] ++#CHECK: vsteh %v18, 1383(%r3,%r4), 4 # encoding: [0xe7,0x23,0x45,0x67,0x48,0x09] ++ ++ vsteh %v0, 0, 0 ++ vsteh %v0, 0, 7 ++ vsteh %v0, 4095, 0 ++ vsteh %v0, 0(%r15), 0 ++ vsteh %v0, 0(%r15,%r1), 0 ++ vsteh %v15, 0, 0 ++ vsteh %v31, 0, 0 ++ vsteh %v18, 1383(%r3,%r4), 4 ++ ++#CHECK: vstl %v0, %r0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x3f] ++#CHECK: vstl %v0, %r0, 4095 # encoding: [0xe7,0x00,0x0f,0xff,0x00,0x3f] ++#CHECK: vstl %v0, %r0, 0(%r15) # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x3f] ++#CHECK: vstl %v0, %r15, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x00,0x3f] ++#CHECK: vstl %v15, %r0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x3f] ++#CHECK: vstl %v31, %r0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x3f] ++#CHECK: vstl %v18, %r3, 1383(%r4) # encoding: [0xe7,0x23,0x45,0x67,0x08,0x3f] ++ ++ vstl %v0, %r0, 0 ++ vstl %v0, %r0, 4095 ++ vstl %v0, %r0, 0(%r15) ++ vstl %v0, %r15, 0 ++ vstl %v15, %r0, 0 ++ vstl %v31, %r0, 0 ++ vstl %v18, %r3, 1383(%r4) ++ ++#CHECK: vstm %v0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x3e] ++#CHECK: vstm %v0, %v0, 4095 # encoding: [0xe7,0x00,0x0f,0xff,0x00,0x3e] ++#CHECK: vstm %v0, %v0, 0(%r15) # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x3e] ++#CHECK: vstm %v0, %v31, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x3e] ++#CHECK: vstm %v31, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x3e] ++#CHECK: vstm %v14, %v17, 1074(%r5) # encoding: [0xe7,0xe1,0x54,0x32,0x04,0x3e] ++ ++ vstm %v0, %v0, 0 ++ vstm %v0, %v0, 4095 ++ vstm %v0, %v0, 0(%r15) ++ vstm %v0, %v31, 0 ++ vstm %v31, %v0, 0 ++ vstm %v14, %v17, 1074(%r5) ++ ++#CHECK: vstrcb %v0, %v0, %v0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x8a] ++#CHECK: vstrcb %v0, %v0, %v0, %v0, 0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x8a] ++#CHECK: vstrcb %v0, %v0, %v0, %v0, 12 # encoding: [0xe7,0x00,0x00,0xc0,0x00,0x8a] ++#CHECK: vstrcb %v0, %v0, %v0, %v15, 0 # encoding: [0xe7,0x00,0x00,0x00,0xf0,0x8a] ++#CHECK: vstrcb %v0, %v0, %v0, %v31, 0 # encoding: [0xe7,0x00,0x00,0x00,0xf1,0x8a] ++#CHECK: vstrcb %v0, %v0, %v15, %v0, 0 # encoding: [0xe7,0x00,0xf0,0x00,0x00,0x8a] ++#CHECK: vstrcb %v0, %v0, %v31, %v0, 0 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0x8a] ++#CHECK: vstrcb %v0, %v15, %v0, %v0, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x00,0x8a] ++#CHECK: vstrcb %v0, %v31, %v0, %v0, 0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x8a] ++#CHECK: vstrcb %v15, %v0, %v0, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x8a] ++#CHECK: vstrcb %v31, %v0, %v0, %v0, 0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x8a] ++#CHECK: vstrcb %v18, %v3, %v20, %v5, 4 # encoding: [0xe7,0x23,0x40,0x40,0x5a,0x8a] ++#CHECK: vstrcb %v18, %v3, %v20, %v5, 15 # encoding: [0xe7,0x23,0x40,0xf0,0x5a,0x8a] ++#CHECK: vstrcbs %v18, %v3, %v20, %v5, 8 # encoding: [0xe7,0x23,0x40,0x90,0x5a,0x8a] ++#CHECK: vstrczb %v18, %v3, %v20, %v5, 4 # encoding: [0xe7,0x23,0x40,0x60,0x5a,0x8a] ++#CHECK: vstrczbs %v18, %v3, %v20, %v5, 8 # encoding: [0xe7,0x23,0x40,0xb0,0x5a,0x8a] ++#CHECK: vstrczbs %v18, %v3, %v20, %v5, 15 # encoding: [0xe7,0x23,0x40,0xf0,0x5a,0x8a] ++ ++ vstrcb %v0, %v0, %v0, %v0 ++ vstrcb %v0, %v0, %v0, %v0, 0 ++ vstrcb %v0, %v0, %v0, %v0, 12 ++ vstrcb %v0, %v0, %v0, %v15 ++ vstrcb %v0, %v0, %v0, %v31 ++ vstrcb %v0, %v0, %v15, %v0 ++ vstrcb %v0, %v0, %v31, %v0 ++ vstrcb %v0, %v15, %v0, %v0 ++ vstrcb %v0, %v31, %v0, %v0 ++ vstrcb %v15, %v0, %v0, %v0 ++ vstrcb %v31, %v0, %v0, %v0 ++ vstrcb %v18, %v3, %v20, %v5, 4 ++ vstrcb %v18, %v3, %v20, %v5, 15 ++ vstrcbs %v18, %v3, %v20, %v5, 8 ++ vstrczb %v18, %v3, %v20, %v5, 4 ++ vstrczbs %v18, %v3, %v20, %v5, 8 ++ vstrczbs %v18, %v3, %v20, %v5, 15 ++ ++#CHECK: vstrcf %v0, %v0, %v0, %v0, 0 # encoding: [0xe7,0x00,0x02,0x00,0x00,0x8a] ++#CHECK: vstrcf %v0, %v0, %v0, %v0, 0 # encoding: [0xe7,0x00,0x02,0x00,0x00,0x8a] ++#CHECK: vstrcf %v0, %v0, %v0, %v0, 12 # encoding: [0xe7,0x00,0x02,0xc0,0x00,0x8a] ++#CHECK: vstrcf %v0, %v0, %v0, %v15, 0 # encoding: [0xe7,0x00,0x02,0x00,0xf0,0x8a] ++#CHECK: vstrcf %v0, %v0, %v0, %v31, 0 # encoding: [0xe7,0x00,0x02,0x00,0xf1,0x8a] ++#CHECK: vstrcf %v0, %v0, %v15, %v0, 0 # encoding: [0xe7,0x00,0xf2,0x00,0x00,0x8a] ++#CHECK: vstrcf %v0, %v0, %v31, %v0, 0 # encoding: [0xe7,0x00,0xf2,0x00,0x02,0x8a] ++#CHECK: vstrcf %v0, %v15, %v0, %v0, 0 # encoding: [0xe7,0x0f,0x02,0x00,0x00,0x8a] ++#CHECK: vstrcf %v0, %v31, %v0, %v0, 0 # encoding: [0xe7,0x0f,0x02,0x00,0x04,0x8a] ++#CHECK: vstrcf %v15, %v0, %v0, %v0, 0 # encoding: [0xe7,0xf0,0x02,0x00,0x00,0x8a] ++#CHECK: vstrcf %v31, %v0, %v0, %v0, 0 # encoding: [0xe7,0xf0,0x02,0x00,0x08,0x8a] ++#CHECK: vstrcf %v18, %v3, %v20, %v5, 4 # encoding: [0xe7,0x23,0x42,0x40,0x5a,0x8a] ++#CHECK: vstrcf %v18, %v3, %v20, %v5, 15 # encoding: [0xe7,0x23,0x42,0xf0,0x5a,0x8a] ++#CHECK: vstrcfs %v18, %v3, %v20, %v5, 8 # encoding: [0xe7,0x23,0x42,0x90,0x5a,0x8a] ++#CHECK: vstrczf %v18, %v3, %v20, %v5, 4 # encoding: [0xe7,0x23,0x42,0x60,0x5a,0x8a] ++#CHECK: vstrczfs %v18, %v3, %v20, %v5, 8 # encoding: [0xe7,0x23,0x42,0xb0,0x5a,0x8a] ++#CHECK: vstrczfs %v18, %v3, %v20, %v5, 15 # encoding: [0xe7,0x23,0x42,0xf0,0x5a,0x8a] ++ ++ vstrcf %v0, %v0, %v0, %v0 ++ vstrcf %v0, %v0, %v0, %v0, 0 ++ vstrcf %v0, %v0, %v0, %v0, 12 ++ vstrcf %v0, %v0, %v0, %v15 ++ vstrcf %v0, %v0, %v0, %v31 ++ vstrcf %v0, %v0, %v15, %v0 ++ vstrcf %v0, %v0, %v31, %v0 ++ vstrcf %v0, %v15, %v0, %v0 ++ vstrcf %v0, %v31, %v0, %v0 ++ vstrcf %v15, %v0, %v0, %v0 ++ vstrcf %v31, %v0, %v0, %v0 ++ vstrcf %v18, %v3, %v20, %v5, 4 ++ vstrcf %v18, %v3, %v20, %v5, 15 ++ vstrcfs %v18, %v3, %v20, %v5, 8 ++ vstrczf %v18, %v3, %v20, %v5, 4 ++ vstrczfs %v18, %v3, %v20, %v5, 8 ++ vstrczfs %v18, %v3, %v20, %v5, 15 ++ ++#CHECK: vstrch %v0, %v0, %v0, %v0, 0 # encoding: [0xe7,0x00,0x01,0x00,0x00,0x8a] ++#CHECK: vstrch %v0, %v0, %v0, %v0, 0 # encoding: [0xe7,0x00,0x01,0x00,0x00,0x8a] ++#CHECK: vstrch %v0, %v0, %v0, %v0, 12 # encoding: [0xe7,0x00,0x01,0xc0,0x00,0x8a] ++#CHECK: vstrch %v0, %v0, %v0, %v15, 0 # encoding: [0xe7,0x00,0x01,0x00,0xf0,0x8a] ++#CHECK: vstrch %v0, %v0, %v0, %v31, 0 # encoding: [0xe7,0x00,0x01,0x00,0xf1,0x8a] ++#CHECK: vstrch %v0, %v0, %v15, %v0, 0 # encoding: [0xe7,0x00,0xf1,0x00,0x00,0x8a] ++#CHECK: vstrch %v0, %v0, %v31, %v0, 0 # encoding: [0xe7,0x00,0xf1,0x00,0x02,0x8a] ++#CHECK: vstrch %v0, %v15, %v0, %v0, 0 # encoding: [0xe7,0x0f,0x01,0x00,0x00,0x8a] ++#CHECK: vstrch %v0, %v31, %v0, %v0, 0 # encoding: [0xe7,0x0f,0x01,0x00,0x04,0x8a] ++#CHECK: vstrch %v15, %v0, %v0, %v0, 0 # encoding: [0xe7,0xf0,0x01,0x00,0x00,0x8a] ++#CHECK: vstrch %v31, %v0, %v0, %v0, 0 # encoding: [0xe7,0xf0,0x01,0x00,0x08,0x8a] ++#CHECK: vstrch %v18, %v3, %v20, %v5, 4 # encoding: [0xe7,0x23,0x41,0x40,0x5a,0x8a] ++#CHECK: vstrch %v18, %v3, %v20, %v5, 15 # encoding: [0xe7,0x23,0x41,0xf0,0x5a,0x8a] ++#CHECK: vstrchs %v18, %v3, %v20, %v5, 8 # encoding: [0xe7,0x23,0x41,0x90,0x5a,0x8a] ++#CHECK: vstrczh %v18, %v3, %v20, %v5, 4 # encoding: [0xe7,0x23,0x41,0x60,0x5a,0x8a] ++#CHECK: vstrczhs %v18, %v3, %v20, %v5, 8 # encoding: [0xe7,0x23,0x41,0xb0,0x5a,0x8a] ++#CHECK: vstrczhs %v18, %v3, %v20, %v5, 15 # encoding: [0xe7,0x23,0x41,0xf0,0x5a,0x8a] ++ ++ vstrch %v0, %v0, %v0, %v0 ++ vstrch %v0, %v0, %v0, %v0, 0 ++ vstrch %v0, %v0, %v0, %v0, 12 ++ vstrch %v0, %v0, %v0, %v15 ++ vstrch %v0, %v0, %v0, %v31 ++ vstrch %v0, %v0, %v15, %v0 ++ vstrch %v0, %v0, %v31, %v0 ++ vstrch %v0, %v15, %v0, %v0 ++ vstrch %v0, %v31, %v0, %v0 ++ vstrch %v15, %v0, %v0, %v0 ++ vstrch %v31, %v0, %v0, %v0 ++ vstrch %v18, %v3, %v20, %v5, 4 ++ vstrch %v18, %v3, %v20, %v5, 15 ++ vstrchs %v18, %v3, %v20, %v5, 8 ++ vstrczh %v18, %v3, %v20, %v5, 4 ++ vstrczhs %v18, %v3, %v20, %v5, 8 ++ vstrczhs %v18, %v3, %v20, %v5, 15 ++ ++#CHECK: vsumgh %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0x65] ++#CHECK: vsumgh %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x12,0x65] ++#CHECK: vsumgh %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0x65] ++#CHECK: vsumgh %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0x65] ++#CHECK: vsumgh %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x1a,0x65] ++ ++ vsumgh %v0, %v0, %v0 ++ vsumgh %v0, %v0, %v31 ++ vsumgh %v0, %v31, %v0 ++ vsumgh %v31, %v0, %v0 ++ vsumgh %v18, %v3, %v20 ++ ++#CHECK: vsumgf %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0x65] ++#CHECK: vsumgf %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x22,0x65] ++#CHECK: vsumgf %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0x65] ++#CHECK: vsumgf %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0x65] ++#CHECK: vsumgf %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x2a,0x65] ++ ++ vsumgf %v0, %v0, %v0 ++ vsumgf %v0, %v0, %v31 ++ vsumgf %v0, %v31, %v0 ++ vsumgf %v31, %v0, %v0 ++ vsumgf %v18, %v3, %v20 ++ ++#CHECK: vsumqf %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0x67] ++#CHECK: vsumqf %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x22,0x67] ++#CHECK: vsumqf %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0x67] ++#CHECK: vsumqf %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0x67] ++#CHECK: vsumqf %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x2a,0x67] ++ ++ vsumqf %v0, %v0, %v0 ++ vsumqf %v0, %v0, %v31 ++ vsumqf %v0, %v31, %v0 ++ vsumqf %v31, %v0, %v0 ++ vsumqf %v18, %v3, %v20 ++ ++#CHECK: vsumqg %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0x67] ++#CHECK: vsumqg %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x32,0x67] ++#CHECK: vsumqg %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0x67] ++#CHECK: vsumqg %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0x67] ++#CHECK: vsumqg %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x3a,0x67] ++ ++ vsumqg %v0, %v0, %v0 ++ vsumqg %v0, %v0, %v31 ++ vsumqg %v0, %v31, %v0 ++ vsumqg %v31, %v0, %v0 ++ vsumqg %v18, %v3, %v20 ++ ++#CHECK: vsumb %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x64] ++#CHECK: vsumb %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0x64] ++#CHECK: vsumb %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x64] ++#CHECK: vsumb %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x64] ++#CHECK: vsumb %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x0a,0x64] ++ ++ vsumb %v0, %v0, %v0 ++ vsumb %v0, %v0, %v31 ++ vsumb %v0, %v31, %v0 ++ vsumb %v31, %v0, %v0 ++ vsumb %v18, %v3, %v20 ++ ++#CHECK: vsumh %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0x64] ++#CHECK: vsumh %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x12,0x64] ++#CHECK: vsumh %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0x64] ++#CHECK: vsumh %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0x64] ++#CHECK: vsumh %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x1a,0x64] ++ ++ vsumh %v0, %v0, %v0 ++ vsumh %v0, %v0, %v31 ++ vsumh %v0, %v31, %v0 ++ vsumh %v31, %v0, %v0 ++ vsumh %v18, %v3, %v20 ++ ++#CHECK: vtm %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0xd8] ++#CHECK: vtm %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x00,0xd8] ++#CHECK: vtm %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xd8] ++#CHECK: vtm %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0xd8] ++#CHECK: vtm %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xd8] ++#CHECK: vtm %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x04,0xd8] ++ ++ vtm %v0, %v0 ++ vtm %v0, %v15 ++ vtm %v0, %v31 ++ vtm %v15, %v0 ++ vtm %v31, %v0 ++ vtm %v14, %v17 ++ ++#CHECK: vuphb %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0xd7] ++#CHECK: vuphb %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x00,0xd7] ++#CHECK: vuphb %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xd7] ++#CHECK: vuphb %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0xd7] ++#CHECK: vuphb %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xd7] ++#CHECK: vuphb %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x04,0xd7] ++ ++ vuphb %v0, %v0 ++ vuphb %v0, %v15 ++ vuphb %v0, %v31 ++ vuphb %v15, %v0 ++ vuphb %v31, %v0 ++ vuphb %v14, %v17 ++ ++#CHECK: vuphf %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0xd7] ++#CHECK: vuphf %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x20,0xd7] ++#CHECK: vuphf %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xd7] ++#CHECK: vuphf %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x20,0xd7] ++#CHECK: vuphf %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xd7] ++#CHECK: vuphf %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x24,0xd7] ++ ++ vuphf %v0, %v0 ++ vuphf %v0, %v15 ++ vuphf %v0, %v31 ++ vuphf %v15, %v0 ++ vuphf %v31, %v0 ++ vuphf %v14, %v17 ++ ++#CHECK: vuphh %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0xd7] ++#CHECK: vuphh %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x10,0xd7] ++#CHECK: vuphh %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0xd7] ++#CHECK: vuphh %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x10,0xd7] ++#CHECK: vuphh %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0xd7] ++#CHECK: vuphh %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x14,0xd7] ++ ++ vuphh %v0, %v0 ++ vuphh %v0, %v15 ++ vuphh %v0, %v31 ++ vuphh %v15, %v0 ++ vuphh %v31, %v0 ++ vuphh %v14, %v17 ++ ++#CHECK: vuplhb %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0xd5] ++#CHECK: vuplhb %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x00,0xd5] ++#CHECK: vuplhb %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xd5] ++#CHECK: vuplhb %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0xd5] ++#CHECK: vuplhb %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xd5] ++#CHECK: vuplhb %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x04,0xd5] ++ ++ vuplhb %v0, %v0 ++ vuplhb %v0, %v15 ++ vuplhb %v0, %v31 ++ vuplhb %v15, %v0 ++ vuplhb %v31, %v0 ++ vuplhb %v14, %v17 ++ ++#CHECK: vuplhf %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0xd5] ++#CHECK: vuplhf %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x20,0xd5] ++#CHECK: vuplhf %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xd5] ++#CHECK: vuplhf %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x20,0xd5] ++#CHECK: vuplhf %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xd5] ++#CHECK: vuplhf %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x24,0xd5] ++ ++ vuplhf %v0, %v0 ++ vuplhf %v0, %v15 ++ vuplhf %v0, %v31 ++ vuplhf %v15, %v0 ++ vuplhf %v31, %v0 ++ vuplhf %v14, %v17 ++ ++#CHECK: vuplhh %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0xd5] ++#CHECK: vuplhh %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x10,0xd5] ++#CHECK: vuplhh %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0xd5] ++#CHECK: vuplhh %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x10,0xd5] ++#CHECK: vuplhh %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0xd5] ++#CHECK: vuplhh %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x14,0xd5] ++ ++ vuplhh %v0, %v0 ++ vuplhh %v0, %v15 ++ vuplhh %v0, %v31 ++ vuplhh %v15, %v0 ++ vuplhh %v31, %v0 ++ vuplhh %v14, %v17 ++ ++#CHECK: vuplb %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0xd6] ++#CHECK: vuplb %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x00,0xd6] ++#CHECK: vuplb %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xd6] ++#CHECK: vuplb %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0xd6] ++#CHECK: vuplb %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xd6] ++#CHECK: vuplb %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x04,0xd6] ++ ++ vuplb %v0, %v0 ++ vuplb %v0, %v15 ++ vuplb %v0, %v31 ++ vuplb %v15, %v0 ++ vuplb %v31, %v0 ++ vuplb %v14, %v17 ++ ++#CHECK: vuplf %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0xd6] ++#CHECK: vuplf %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x20,0xd6] ++#CHECK: vuplf %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xd6] ++#CHECK: vuplf %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x20,0xd6] ++#CHECK: vuplf %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xd6] ++#CHECK: vuplf %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x24,0xd6] ++ ++ vuplf %v0, %v0 ++ vuplf %v0, %v15 ++ vuplf %v0, %v31 ++ vuplf %v15, %v0 ++ vuplf %v31, %v0 ++ vuplf %v14, %v17 ++ ++#CHECK: vuplhw %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0xd6] ++#CHECK: vuplhw %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x10,0xd6] ++#CHECK: vuplhw %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0xd6] ++#CHECK: vuplhw %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x10,0xd6] ++#CHECK: vuplhw %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0xd6] ++#CHECK: vuplhw %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x14,0xd6] ++ ++ vuplhw %v0, %v0 ++ vuplhw %v0, %v15 ++ vuplhw %v0, %v31 ++ vuplhw %v15, %v0 ++ vuplhw %v31, %v0 ++ vuplhw %v14, %v17 ++ ++#CHECK: vupllb %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0xd4] ++#CHECK: vupllb %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x00,0xd4] ++#CHECK: vupllb %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0xd4] ++#CHECK: vupllb %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0xd4] ++#CHECK: vupllb %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0xd4] ++#CHECK: vupllb %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x04,0xd4] ++ ++ vupllb %v0, %v0 ++ vupllb %v0, %v15 ++ vupllb %v0, %v31 ++ vupllb %v15, %v0 ++ vupllb %v31, %v0 ++ vupllb %v14, %v17 ++ ++#CHECK: vupllf %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x20,0xd4] ++#CHECK: vupllf %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x20,0xd4] ++#CHECK: vupllf %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x24,0xd4] ++#CHECK: vupllf %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x20,0xd4] ++#CHECK: vupllf %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x28,0xd4] ++#CHECK: vupllf %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x24,0xd4] ++ ++ vupllf %v0, %v0 ++ vupllf %v0, %v15 ++ vupllf %v0, %v31 ++ vupllf %v15, %v0 ++ vupllf %v31, %v0 ++ vupllf %v14, %v17 ++ ++#CHECK: vupllh %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x10,0xd4] ++#CHECK: vupllh %v0, %v15 # encoding: [0xe7,0x0f,0x00,0x00,0x10,0xd4] ++#CHECK: vupllh %v0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x14,0xd4] ++#CHECK: vupllh %v15, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x10,0xd4] ++#CHECK: vupllh %v31, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x18,0xd4] ++#CHECK: vupllh %v14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x14,0xd4] ++ ++ vupllh %v0, %v0 ++ vupllh %v0, %v15 ++ vupllh %v0, %v31 ++ vupllh %v15, %v0 ++ vupllh %v31, %v0 ++ vupllh %v14, %v17 ++ ++#CHECK: vx %v0, %v0, %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x6d] ++#CHECK: vx %v0, %v0, %v31 # encoding: [0xe7,0x00,0xf0,0x00,0x02,0x6d] ++#CHECK: vx %v0, %v31, %v0 # encoding: [0xe7,0x0f,0x00,0x00,0x04,0x6d] ++#CHECK: vx %v31, %v0, %v0 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x6d] ++#CHECK: vx %v18, %v3, %v20 # encoding: [0xe7,0x23,0x40,0x00,0x0a,0x6d] ++ ++ vx %v0, %v0, %v0 ++ vx %v0, %v0, %v31 ++ vx %v0, %v31, %v0 ++ vx %v31, %v0, %v0 ++ vx %v18, %v3, %v20 ++ ++#CHECK: vzero %v0 # encoding: [0xe7,0x00,0x00,0x00,0x00,0x44] ++#CHECK: vzero %v11 # encoding: [0xe7,0xb0,0x00,0x00,0x00,0x44] ++#CHECK: vzero %v15 # encoding: [0xe7,0xf0,0x00,0x00,0x00,0x44] ++#CHECK: vzero %v31 # encoding: [0xe7,0xf0,0x00,0x00,0x08,0x44] ++ ++ vzero %v0 ++ vzero %v11 ++ vzero %v15 ++ vzero %v31 ++ ++#CHECK: wcdgb %f0, %f0, 0, 0 # encoding: [0xe7,0x00,0x00,0x08,0x30,0xc3] ++#CHECK: wcdgb %f0, %f0, 0, 15 # encoding: [0xe7,0x00,0x00,0xf8,0x30,0xc3] ++#CHECK: wcdgb %f0, %f0, 4, 0 # encoding: [0xe7,0x00,0x00,0x0c,0x30,0xc3] ++#CHECK: wcdgb %f0, %f0, 12, 0 # encoding: [0xe7,0x00,0x00,0x0c,0x30,0xc3] ++#CHECK: wcdgb %f0, %v31, 0, 0 # encoding: [0xe7,0x0f,0x00,0x08,0x34,0xc3] ++#CHECK: wcdgb %v31, %f0, 0, 0 # encoding: [0xe7,0xf0,0x00,0x08,0x38,0xc3] ++#CHECK: wcdgb %f14, %v17, 4, 10 # encoding: [0xe7,0xe1,0x00,0xac,0x34,0xc3] ++ ++ wcdgb %v0, %v0, 0, 0 ++ wcdgb %v0, %v0, 0, 15 ++ wcdgb %v0, %v0, 4, 0 ++ wcdgb %v0, %v0, 12, 0 ++ wcdgb %v0, %v31, 0, 0 ++ wcdgb %v31, %v0, 0, 0 ++ wcdgb %v14, %v17, 4, 10 ++ ++#CHECK: wcdlgb %f0, %f0, 0, 0 # encoding: [0xe7,0x00,0x00,0x08,0x30,0xc1] ++#CHECK: wcdlgb %f0, %f0, 0, 15 # encoding: [0xe7,0x00,0x00,0xf8,0x30,0xc1] ++#CHECK: wcdlgb %f0, %f0, 4, 0 # encoding: [0xe7,0x00,0x00,0x0c,0x30,0xc1] ++#CHECK: wcdlgb %f0, %f0, 12, 0 # encoding: [0xe7,0x00,0x00,0x0c,0x30,0xc1] ++#CHECK: wcdlgb %f0, %v31, 0, 0 # encoding: [0xe7,0x0f,0x00,0x08,0x34,0xc1] ++#CHECK: wcdlgb %v31, %f0, 0, 0 # encoding: [0xe7,0xf0,0x00,0x08,0x38,0xc1] ++#CHECK: wcdlgb %f14, %v17, 4, 10 # encoding: [0xe7,0xe1,0x00,0xac,0x34,0xc1] ++ ++ wcdlgb %v0, %v0, 0, 0 ++ wcdlgb %v0, %v0, 0, 15 ++ wcdlgb %v0, %v0, 4, 0 ++ wcdlgb %v0, %v0, 12, 0 ++ wcdlgb %v0, %v31, 0, 0 ++ wcdlgb %v31, %v0, 0, 0 ++ wcdlgb %v14, %v17, 4, 10 ++ ++#CHECK: wcgdb %f0, %f0, 0, 0 # encoding: [0xe7,0x00,0x00,0x08,0x30,0xc2] ++#CHECK: wcgdb %f0, %f0, 0, 15 # encoding: [0xe7,0x00,0x00,0xf8,0x30,0xc2] ++#CHECK: wcgdb %f0, %f0, 4, 0 # encoding: [0xe7,0x00,0x00,0x0c,0x30,0xc2] ++#CHECK: wcgdb %f0, %f0, 12, 0 # encoding: [0xe7,0x00,0x00,0x0c,0x30,0xc2] ++#CHECK: wcgdb %f0, %v31, 0, 0 # encoding: [0xe7,0x0f,0x00,0x08,0x34,0xc2] ++#CHECK: wcgdb %v31, %f0, 0, 0 # encoding: [0xe7,0xf0,0x00,0x08,0x38,0xc2] ++#CHECK: wcgdb %f14, %v17, 4, 10 # encoding: [0xe7,0xe1,0x00,0xac,0x34,0xc2] ++ ++ wcgdb %v0, %v0, 0, 0 ++ wcgdb %v0, %v0, 0, 15 ++ wcgdb %v0, %v0, 4, 0 ++ wcgdb %v0, %v0, 12, 0 ++ wcgdb %v0, %v31, 0, 0 ++ wcgdb %v31, %v0, 0, 0 ++ wcgdb %v14, %v17, 4, 10 ++ ++#CHECK: wclgdb %f0, %f0, 0, 0 # encoding: [0xe7,0x00,0x00,0x08,0x30,0xc0] ++#CHECK: wclgdb %f0, %f0, 0, 15 # encoding: [0xe7,0x00,0x00,0xf8,0x30,0xc0] ++#CHECK: wclgdb %f0, %f0, 4, 0 # encoding: [0xe7,0x00,0x00,0x0c,0x30,0xc0] ++#CHECK: wclgdb %f0, %f0, 12, 0 # encoding: [0xe7,0x00,0x00,0x0c,0x30,0xc0] ++#CHECK: wclgdb %f0, %v31, 0, 0 # encoding: [0xe7,0x0f,0x00,0x08,0x34,0xc0] ++#CHECK: wclgdb %v31, %f0, 0, 0 # encoding: [0xe7,0xf0,0x00,0x08,0x38,0xc0] ++#CHECK: wclgdb %f14, %v17, 4, 10 # encoding: [0xe7,0xe1,0x00,0xac,0x34,0xc0] ++ ++ wclgdb %v0, %v0, 0, 0 ++ wclgdb %v0, %v0, 0, 15 ++ wclgdb %v0, %v0, 4, 0 ++ wclgdb %v0, %v0, 12, 0 ++ wclgdb %v0, %v31, 0, 0 ++ wclgdb %v31, %v0, 0, 0 ++ wclgdb %v14, %v17, 4, 10 ++ ++#CHECK: wfadb %f0, %f0, %f0 # encoding: [0xe7,0x00,0x00,0x08,0x30,0xe3] ++#CHECK: wfadb %f0, %f0, %v31 # encoding: [0xe7,0x00,0xf0,0x08,0x32,0xe3] ++#CHECK: wfadb %f0, %v31, %f0 # encoding: [0xe7,0x0f,0x00,0x08,0x34,0xe3] ++#CHECK: wfadb %v31, %f0, %f0 # encoding: [0xe7,0xf0,0x00,0x08,0x38,0xe3] ++#CHECK: wfadb %v18, %f3, %v20 # encoding: [0xe7,0x23,0x40,0x08,0x3a,0xe3] ++ ++ wfadb %v0, %v0, %v0 ++ wfadb %v0, %v0, %v31 ++ wfadb %v0, %v31, %v0 ++ wfadb %v31, %v0, %v0 ++ wfadb %v18, %v3, %v20 ++ ++#CHECK: wfcdb %f0, %f0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0xcb] ++#CHECK: wfcdb %f0, %f15 # encoding: [0xe7,0x0f,0x00,0x00,0x30,0xcb] ++#CHECK: wfcdb %f0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xcb] ++#CHECK: wfcdb %f15, %f0 # encoding: [0xe7,0xf0,0x00,0x00,0x30,0xcb] ++#CHECK: wfcdb %v31, %f0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xcb] ++#CHECK: wfcdb %f14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x34,0xcb] ++ ++ wfcdb %v0, %v0 ++ wfcdb %v0, %v15 ++ wfcdb %v0, %v31 ++ wfcdb %v15, %v0 ++ wfcdb %v31, %v0 ++ wfcdb %v14, %v17 ++ ++#CHECK: wfcedb %f0, %f0, %f0 # encoding: [0xe7,0x00,0x00,0x08,0x30,0xe8] ++#CHECK: wfcedb %f0, %f0, %v31 # encoding: [0xe7,0x00,0xf0,0x08,0x32,0xe8] ++#CHECK: wfcedb %f0, %v31, %f0 # encoding: [0xe7,0x0f,0x00,0x08,0x34,0xe8] ++#CHECK: wfcedb %v31, %f0, %f0 # encoding: [0xe7,0xf0,0x00,0x08,0x38,0xe8] ++#CHECK: wfcedb %v18, %f3, %v20 # encoding: [0xe7,0x23,0x40,0x08,0x3a,0xe8] ++ ++ wfcedb %v0, %v0, %v0 ++ wfcedb %v0, %v0, %v31 ++ wfcedb %v0, %v31, %v0 ++ wfcedb %v31, %v0, %v0 ++ wfcedb %v18, %v3, %v20 ++ ++#CHECK: wfcedbs %f0, %f0, %f0 # encoding: [0xe7,0x00,0x00,0x18,0x30,0xe8] ++#CHECK: wfcedbs %f0, %f0, %v31 # encoding: [0xe7,0x00,0xf0,0x18,0x32,0xe8] ++#CHECK: wfcedbs %f0, %v31, %f0 # encoding: [0xe7,0x0f,0x00,0x18,0x34,0xe8] ++#CHECK: wfcedbs %v31, %f0, %f0 # encoding: [0xe7,0xf0,0x00,0x18,0x38,0xe8] ++#CHECK: wfcedbs %v18, %f3, %v20 # encoding: [0xe7,0x23,0x40,0x18,0x3a,0xe8] ++ ++ wfcedbs %v0, %v0, %v0 ++ wfcedbs %v0, %v0, %v31 ++ wfcedbs %v0, %v31, %v0 ++ wfcedbs %v31, %v0, %v0 ++ wfcedbs %v18, %v3, %v20 ++ ++#CHECK: wfchdb %f0, %f0, %f0 # encoding: [0xe7,0x00,0x00,0x08,0x30,0xeb] ++#CHECK: wfchdb %f0, %f0, %v31 # encoding: [0xe7,0x00,0xf0,0x08,0x32,0xeb] ++#CHECK: wfchdb %f0, %v31, %f0 # encoding: [0xe7,0x0f,0x00,0x08,0x34,0xeb] ++#CHECK: wfchdb %v31, %f0, %f0 # encoding: [0xe7,0xf0,0x00,0x08,0x38,0xeb] ++#CHECK: wfchdb %v18, %f3, %v20 # encoding: [0xe7,0x23,0x40,0x08,0x3a,0xeb] ++ ++ wfchdb %v0, %v0, %v0 ++ wfchdb %v0, %v0, %v31 ++ wfchdb %v0, %v31, %v0 ++ wfchdb %v31, %v0, %v0 ++ wfchdb %v18, %v3, %v20 ++ ++#CHECK: wfchdbs %f0, %f0, %f0 # encoding: [0xe7,0x00,0x00,0x18,0x30,0xeb] ++#CHECK: wfchdbs %f0, %f0, %v31 # encoding: [0xe7,0x00,0xf0,0x18,0x32,0xeb] ++#CHECK: wfchdbs %f0, %v31, %f0 # encoding: [0xe7,0x0f,0x00,0x18,0x34,0xeb] ++#CHECK: wfchdbs %v31, %f0, %f0 # encoding: [0xe7,0xf0,0x00,0x18,0x38,0xeb] ++#CHECK: wfchdbs %v18, %f3, %v20 # encoding: [0xe7,0x23,0x40,0x18,0x3a,0xeb] ++ ++ wfchdbs %v0, %v0, %v0 ++ wfchdbs %v0, %v0, %v31 ++ wfchdbs %v0, %v31, %v0 ++ wfchdbs %v31, %v0, %v0 ++ wfchdbs %v18, %v3, %v20 ++ ++#CHECK: wfchedb %f0, %f0, %f0 # encoding: [0xe7,0x00,0x00,0x08,0x30,0xea] ++#CHECK: wfchedb %f0, %f0, %v31 # encoding: [0xe7,0x00,0xf0,0x08,0x32,0xea] ++#CHECK: wfchedb %f0, %v31, %f0 # encoding: [0xe7,0x0f,0x00,0x08,0x34,0xea] ++#CHECK: wfchedb %v31, %f0, %f0 # encoding: [0xe7,0xf0,0x00,0x08,0x38,0xea] ++#CHECK: wfchedb %v18, %f3, %v20 # encoding: [0xe7,0x23,0x40,0x08,0x3a,0xea] ++ ++ wfchedb %v0, %v0, %v0 ++ wfchedb %v0, %v0, %v31 ++ wfchedb %v0, %v31, %v0 ++ wfchedb %v31, %v0, %v0 ++ wfchedb %v18, %v3, %v20 ++ ++#CHECK: wfchedbs %f0, %f0, %f0 # encoding: [0xe7,0x00,0x00,0x18,0x30,0xea] ++#CHECK: wfchedbs %f0, %f0, %v31 # encoding: [0xe7,0x00,0xf0,0x18,0x32,0xea] ++#CHECK: wfchedbs %f0, %v31, %f0 # encoding: [0xe7,0x0f,0x00,0x18,0x34,0xea] ++#CHECK: wfchedbs %v31, %f0, %f0 # encoding: [0xe7,0xf0,0x00,0x18,0x38,0xea] ++#CHECK: wfchedbs %v18, %f3, %v20 # encoding: [0xe7,0x23,0x40,0x18,0x3a,0xea] ++ ++ wfchedbs %v0, %v0, %v0 ++ wfchedbs %v0, %v0, %v31 ++ wfchedbs %v0, %v31, %v0 ++ wfchedbs %v31, %v0, %v0 ++ wfchedbs %v18, %v3, %v20 ++ ++#CHECK: wfddb %f0, %f0, %f0 # encoding: [0xe7,0x00,0x00,0x08,0x30,0xe5] ++#CHECK: wfddb %f0, %f0, %v31 # encoding: [0xe7,0x00,0xf0,0x08,0x32,0xe5] ++#CHECK: wfddb %f0, %v31, %f0 # encoding: [0xe7,0x0f,0x00,0x08,0x34,0xe5] ++#CHECK: wfddb %v31, %f0, %f0 # encoding: [0xe7,0xf0,0x00,0x08,0x38,0xe5] ++#CHECK: wfddb %v18, %f3, %v20 # encoding: [0xe7,0x23,0x40,0x08,0x3a,0xe5] ++ ++ wfddb %v0, %v0, %v0 ++ wfddb %v0, %v0, %v31 ++ wfddb %v0, %v31, %v0 ++ wfddb %v31, %v0, %v0 ++ wfddb %v18, %v3, %v20 ++ ++#CHECK: wfidb %f0, %f0, 0, 0 # encoding: [0xe7,0x00,0x00,0x08,0x30,0xc7] ++#CHECK: wfidb %f0, %f0, 0, 15 # encoding: [0xe7,0x00,0x00,0xf8,0x30,0xc7] ++#CHECK: wfidb %f0, %f0, 4, 0 # encoding: [0xe7,0x00,0x00,0x0c,0x30,0xc7] ++#CHECK: wfidb %f0, %f0, 12, 0 # encoding: [0xe7,0x00,0x00,0x0c,0x30,0xc7] ++#CHECK: wfidb %f0, %v31, 0, 0 # encoding: [0xe7,0x0f,0x00,0x08,0x34,0xc7] ++#CHECK: wfidb %v31, %f0, 0, 0 # encoding: [0xe7,0xf0,0x00,0x08,0x38,0xc7] ++#CHECK: wfidb %f14, %v17, 4, 10 # encoding: [0xe7,0xe1,0x00,0xac,0x34,0xc7] ++ ++ wfidb %v0, %v0, 0, 0 ++ wfidb %v0, %v0, 0, 15 ++ wfidb %v0, %v0, 4, 0 ++ wfidb %v0, %v0, 12, 0 ++ wfidb %v0, %v31, 0, 0 ++ wfidb %v31, %v0, 0, 0 ++ wfidb %v14, %v17, 4, 10 ++ ++#CHECK: wfkdb %f0, %f0 # encoding: [0xe7,0x00,0x00,0x00,0x30,0xca] ++#CHECK: wfkdb %f0, %f15 # encoding: [0xe7,0x0f,0x00,0x00,0x30,0xca] ++#CHECK: wfkdb %f0, %v31 # encoding: [0xe7,0x0f,0x00,0x00,0x34,0xca] ++#CHECK: wfkdb %f15, %f0 # encoding: [0xe7,0xf0,0x00,0x00,0x30,0xca] ++#CHECK: wfkdb %v31, %f0 # encoding: [0xe7,0xf0,0x00,0x00,0x38,0xca] ++#CHECK: wfkdb %f14, %v17 # encoding: [0xe7,0xe1,0x00,0x00,0x34,0xca] ++ ++ wfkdb %v0, %v0 ++ wfkdb %v0, %v15 ++ wfkdb %v0, %v31 ++ wfkdb %v15, %v0 ++ wfkdb %v31, %v0 ++ wfkdb %v14, %v17 ++ ++#CHECK: wflcdb %f0, %f0 # encoding: [0xe7,0x00,0x00,0x08,0x30,0xcc] ++#CHECK: wflcdb %f0, %f15 # encoding: [0xe7,0x0f,0x00,0x08,0x30,0xcc] ++#CHECK: wflcdb %f0, %v31 # encoding: [0xe7,0x0f,0x00,0x08,0x34,0xcc] ++#CHECK: wflcdb %f15, %f0 # encoding: [0xe7,0xf0,0x00,0x08,0x30,0xcc] ++#CHECK: wflcdb %v31, %f0 # encoding: [0xe7,0xf0,0x00,0x08,0x38,0xcc] ++#CHECK: wflcdb %f14, %v17 # encoding: [0xe7,0xe1,0x00,0x08,0x34,0xcc] ++ ++ wflcdb %v0, %v0 ++ wflcdb %v0, %v15 ++ wflcdb %v0, %v31 ++ wflcdb %v15, %v0 ++ wflcdb %v31, %v0 ++ wflcdb %v14, %v17 ++ ++#CHECK: wflndb %f0, %f0 # encoding: [0xe7,0x00,0x00,0x18,0x30,0xcc] ++#CHECK: wflndb %f0, %f15 # encoding: [0xe7,0x0f,0x00,0x18,0x30,0xcc] ++#CHECK: wflndb %f0, %v31 # encoding: [0xe7,0x0f,0x00,0x18,0x34,0xcc] ++#CHECK: wflndb %f15, %f0 # encoding: [0xe7,0xf0,0x00,0x18,0x30,0xcc] ++#CHECK: wflndb %v31, %f0 # encoding: [0xe7,0xf0,0x00,0x18,0x38,0xcc] ++#CHECK: wflndb %f14, %v17 # encoding: [0xe7,0xe1,0x00,0x18,0x34,0xcc] ++ ++ wflndb %v0, %v0 ++ wflndb %v0, %v15 ++ wflndb %v0, %v31 ++ wflndb %v15, %v0 ++ wflndb %v31, %v0 ++ wflndb %v14, %v17 ++ ++#CHECK: wflpdb %f0, %f0 # encoding: [0xe7,0x00,0x00,0x28,0x30,0xcc] ++#CHECK: wflpdb %f0, %f15 # encoding: [0xe7,0x0f,0x00,0x28,0x30,0xcc] ++#CHECK: wflpdb %f0, %v31 # encoding: [0xe7,0x0f,0x00,0x28,0x34,0xcc] ++#CHECK: wflpdb %f15, %f0 # encoding: [0xe7,0xf0,0x00,0x28,0x30,0xcc] ++#CHECK: wflpdb %v31, %f0 # encoding: [0xe7,0xf0,0x00,0x28,0x38,0xcc] ++#CHECK: wflpdb %f14, %v17 # encoding: [0xe7,0xe1,0x00,0x28,0x34,0xcc] ++ ++ wflpdb %v0, %v0 ++ wflpdb %v0, %v15 ++ wflpdb %v0, %v31 ++ wflpdb %v15, %v0 ++ wflpdb %v31, %v0 ++ wflpdb %v14, %v17 ++ ++#CHECK: wfmadb %f0, %f0, %f0, %f0 # encoding: [0xe7,0x00,0x03,0x08,0x00,0x8f] ++#CHECK: wfmadb %f0, %f0, %f0, %v31 # encoding: [0xe7,0x00,0x03,0x08,0xf1,0x8f] ++#CHECK: wfmadb %f0, %f0, %v31, %f0 # encoding: [0xe7,0x00,0xf3,0x08,0x02,0x8f] ++#CHECK: wfmadb %f0, %v31, %f0, %f0 # encoding: [0xe7,0x0f,0x03,0x08,0x04,0x8f] ++#CHECK: wfmadb %v31, %f0, %f0, %f0 # encoding: [0xe7,0xf0,0x03,0x08,0x08,0x8f] ++#CHECK: wfmadb %f13, %v17, %v21, %v25 # encoding: [0xe7,0xd1,0x53,0x08,0x97,0x8f] ++ ++ wfmadb %v0, %v0, %v0, %v0 ++ wfmadb %v0, %v0, %v0, %v31 ++ wfmadb %v0, %v0, %v31, %v0 ++ wfmadb %v0, %v31, %v0, %v0 ++ wfmadb %v31, %v0, %v0, %v0 ++ wfmadb %v13, %v17, %v21, %v25 ++ ++#CHECK: wfmdb %f0, %f0, %f0 # encoding: [0xe7,0x00,0x00,0x08,0x30,0xe7] ++#CHECK: wfmdb %f0, %f0, %v31 # encoding: [0xe7,0x00,0xf0,0x08,0x32,0xe7] ++#CHECK: wfmdb %f0, %v31, %f0 # encoding: [0xe7,0x0f,0x00,0x08,0x34,0xe7] ++#CHECK: wfmdb %v31, %f0, %f0 # encoding: [0xe7,0xf0,0x00,0x08,0x38,0xe7] ++#CHECK: wfmdb %v18, %f3, %v20 # encoding: [0xe7,0x23,0x40,0x08,0x3a,0xe7] ++ ++ wfmdb %v0, %v0, %v0 ++ wfmdb %v0, %v0, %v31 ++ wfmdb %v0, %v31, %v0 ++ wfmdb %v31, %v0, %v0 ++ wfmdb %v18, %v3, %v20 ++ ++#CHECK: wfmsdb %f0, %f0, %f0, %f0 # encoding: [0xe7,0x00,0x03,0x08,0x00,0x8e] ++#CHECK: wfmsdb %f0, %f0, %f0, %v31 # encoding: [0xe7,0x00,0x03,0x08,0xf1,0x8e] ++#CHECK: wfmsdb %f0, %f0, %v31, %f0 # encoding: [0xe7,0x00,0xf3,0x08,0x02,0x8e] ++#CHECK: wfmsdb %f0, %v31, %f0, %f0 # encoding: [0xe7,0x0f,0x03,0x08,0x04,0x8e] ++#CHECK: wfmsdb %v31, %f0, %f0, %f0 # encoding: [0xe7,0xf0,0x03,0x08,0x08,0x8e] ++#CHECK: wfmsdb %f13, %v17, %v21, %v25 # encoding: [0xe7,0xd1,0x53,0x08,0x97,0x8e] ++ ++ wfmsdb %v0, %v0, %v0, %v0 ++ wfmsdb %v0, %v0, %v0, %v31 ++ wfmsdb %v0, %v0, %v31, %v0 ++ wfmsdb %v0, %v31, %v0, %v0 ++ wfmsdb %v31, %v0, %v0, %v0 ++ wfmsdb %v13, %v17, %v21, %v25 ++ ++#CHECK: wfsdb %f0, %f0, %f0 # encoding: [0xe7,0x00,0x00,0x08,0x30,0xe2] ++#CHECK: wfsdb %f0, %f0, %v31 # encoding: [0xe7,0x00,0xf0,0x08,0x32,0xe2] ++#CHECK: wfsdb %f0, %v31, %f0 # encoding: [0xe7,0x0f,0x00,0x08,0x34,0xe2] ++#CHECK: wfsdb %v31, %f0, %f0 # encoding: [0xe7,0xf0,0x00,0x08,0x38,0xe2] ++#CHECK: wfsdb %v18, %f3, %v20 # encoding: [0xe7,0x23,0x40,0x08,0x3a,0xe2] ++ ++ wfsdb %v0, %v0, %v0 ++ wfsdb %v0, %v0, %v31 ++ wfsdb %v0, %v31, %v0 ++ wfsdb %v31, %v0, %v0 ++ wfsdb %v18, %v3, %v20 ++ ++#CHECK: wfsqdb %f0, %f0 # encoding: [0xe7,0x00,0x00,0x08,0x30,0xce] ++#CHECK: wfsqdb %f0, %f15 # encoding: [0xe7,0x0f,0x00,0x08,0x30,0xce] ++#CHECK: wfsqdb %f0, %v31 # encoding: [0xe7,0x0f,0x00,0x08,0x34,0xce] ++#CHECK: wfsqdb %f15, %f0 # encoding: [0xe7,0xf0,0x00,0x08,0x30,0xce] ++#CHECK: wfsqdb %v31, %f0 # encoding: [0xe7,0xf0,0x00,0x08,0x38,0xce] ++#CHECK: wfsqdb %f14, %v17 # encoding: [0xe7,0xe1,0x00,0x08,0x34,0xce] ++ ++ wfsqdb %v0, %v0 ++ wfsqdb %v0, %v15 ++ wfsqdb %v0, %v31 ++ wfsqdb %v15, %v0 ++ wfsqdb %v31, %v0 ++ wfsqdb %v14, %v17 ++ ++#CHECK: wftcidb %f0, %f0, 0 # encoding: [0xe7,0x00,0x00,0x08,0x30,0x4a] ++#CHECK: wftcidb %f0, %f0, 4095 # encoding: [0xe7,0x00,0xff,0xf8,0x30,0x4a] ++#CHECK: wftcidb %f0, %f15, 0 # encoding: [0xe7,0x0f,0x00,0x08,0x30,0x4a] ++#CHECK: wftcidb %f0, %v31, 0 # encoding: [0xe7,0x0f,0x00,0x08,0x34,0x4a] ++#CHECK: wftcidb %f15, %f0, 0 # encoding: [0xe7,0xf0,0x00,0x08,0x30,0x4a] ++#CHECK: wftcidb %v31, %f0, 0 # encoding: [0xe7,0xf0,0x00,0x08,0x38,0x4a] ++#CHECK: wftcidb %f4, %v21, 1656 # encoding: [0xe7,0x45,0x67,0x88,0x34,0x4a] ++ ++ wftcidb %v0, %v0, 0 ++ wftcidb %v0, %v0, 4095 ++ wftcidb %v0, %v15, 0 ++ wftcidb %v0, %v31, 0 ++ wftcidb %v15, %v0, 0 ++ wftcidb %v31, %v0, 0 ++ wftcidb %v4, %v21, 0x678 ++ ++#CHECK: wldeb %f0, %f0 # encoding: [0xe7,0x00,0x00,0x08,0x20,0xc4] ++#CHECK: wldeb %f0, %f15 # encoding: [0xe7,0x0f,0x00,0x08,0x20,0xc4] ++#CHECK: wldeb %f0, %v31 # encoding: [0xe7,0x0f,0x00,0x08,0x24,0xc4] ++#CHECK: wldeb %f15, %f0 # encoding: [0xe7,0xf0,0x00,0x08,0x20,0xc4] ++#CHECK: wldeb %v31, %f0 # encoding: [0xe7,0xf0,0x00,0x08,0x28,0xc4] ++#CHECK: wldeb %f14, %v17 # encoding: [0xe7,0xe1,0x00,0x08,0x24,0xc4] ++ ++ wldeb %v0, %v0 ++ wldeb %v0, %v15 ++ wldeb %v0, %v31 ++ wldeb %v15, %v0 ++ wldeb %v31, %v0 ++ wldeb %v14, %v17 ++ ++#CHECK: wledb %f0, %f0, 0, 0 # encoding: [0xe7,0x00,0x00,0x08,0x30,0xc5] ++#CHECK: wledb %f0, %f0, 0, 15 # encoding: [0xe7,0x00,0x00,0xf8,0x30,0xc5] ++#CHECK: wledb %f0, %f0, 4, 0 # encoding: [0xe7,0x00,0x00,0x0c,0x30,0xc5] ++#CHECK: wledb %f0, %f0, 12, 0 # encoding: [0xe7,0x00,0x00,0x0c,0x30,0xc5] ++#CHECK: wledb %f0, %v31, 0, 0 # encoding: [0xe7,0x0f,0x00,0x08,0x34,0xc5] ++#CHECK: wledb %v31, %f0, 0, 0 # encoding: [0xe7,0xf0,0x00,0x08,0x38,0xc5] ++#CHECK: wledb %f14, %v17, 4, 10 # encoding: [0xe7,0xe1,0x00,0xac,0x34,0xc5] ++ ++ wledb %v0, %v0, 0, 0 ++ wledb %v0, %v0, 0, 15 ++ wledb %v0, %v0, 4, 0 ++ wledb %v0, %v0, 12, 0 ++ wledb %v0, %v31, 0, 0 ++ wledb %v31, %v0, 0, 0 ++ wledb %v14, %v17, 4, 10 +Index: llvm-36/test/MC/SystemZ/insn-good-z196.s +=================================================================== +--- llvm-36.orig/test/MC/SystemZ/insn-good-z196.s ++++ llvm-36/test/MC/SystemZ/insn-good-z196.s +@@ -1021,6 +1021,16 @@ + ork %r15,%r0,%r0 + ork %r7,%r8,%r9 + ++#CHECK: popcnt %r0, %r0 # encoding: [0xb9,0xe1,0x00,0x00] ++#CHECK: popcnt %r0, %r15 # encoding: [0xb9,0xe1,0x00,0x0f] ++#CHECK: popcnt %r15, %r0 # encoding: [0xb9,0xe1,0x00,0xf0] ++#CHECK: popcnt %r7, %r8 # encoding: [0xb9,0xe1,0x00,0x78] ++ ++ popcnt %r0,%r0 ++ popcnt %r0,%r15 ++ popcnt %r15,%r0 ++ popcnt %r7,%r8 ++ + #CHECK: risbhg %r0, %r0, 0, 0, 0 # encoding: [0xec,0x00,0x00,0x00,0x00,0x5d] + #CHECK: risbhg %r0, %r0, 0, 0, 63 # encoding: [0xec,0x00,0x00,0x00,0x3f,0x5d] + #CHECK: risbhg %r0, %r0, 0, 255, 0 # encoding: [0xec,0x00,0x00,0xff,0x00,0x5d] +Index: llvm-36/test/MC/SystemZ/insn-good-zEC12.s +=================================================================== +--- /dev/null ++++ llvm-36/test/MC/SystemZ/insn-good-zEC12.s +@@ -0,0 +1,126 @@ ++# For zEC12 and above. ++# RUN: llvm-mc -triple s390x-linux-gnu -mcpu=zEC12 -show-encoding %s | FileCheck %s ++ ++#CHECK: etnd %r0 # encoding: [0xb2,0xec,0x00,0x00] ++#CHECK: etnd %r15 # encoding: [0xb2,0xec,0x00,0xf0] ++#CHECK: etnd %r7 # encoding: [0xb2,0xec,0x00,0x70] ++ ++ etnd %r0 ++ etnd %r15 ++ etnd %r7 ++ ++#CHECK: ntstg %r0, -524288 # encoding: [0xe3,0x00,0x00,0x00,0x80,0x25] ++#CHECK: ntstg %r0, -1 # encoding: [0xe3,0x00,0x0f,0xff,0xff,0x25] ++#CHECK: ntstg %r0, 0 # encoding: [0xe3,0x00,0x00,0x00,0x00,0x25] ++#CHECK: ntstg %r0, 1 # encoding: [0xe3,0x00,0x00,0x01,0x00,0x25] ++#CHECK: ntstg %r0, 524287 # encoding: [0xe3,0x00,0x0f,0xff,0x7f,0x25] ++#CHECK: ntstg %r0, 0(%r1) # encoding: [0xe3,0x00,0x10,0x00,0x00,0x25] ++#CHECK: ntstg %r0, 0(%r15) # encoding: [0xe3,0x00,0xf0,0x00,0x00,0x25] ++#CHECK: ntstg %r0, 524287(%r1,%r15) # encoding: [0xe3,0x01,0xff,0xff,0x7f,0x25] ++#CHECK: ntstg %r0, 524287(%r15,%r1) # encoding: [0xe3,0x0f,0x1f,0xff,0x7f,0x25] ++#CHECK: ntstg %r15, 0 # encoding: [0xe3,0xf0,0x00,0x00,0x00,0x25] ++ ++ ntstg %r0, -524288 ++ ntstg %r0, -1 ++ ntstg %r0, 0 ++ ntstg %r0, 1 ++ ntstg %r0, 524287 ++ ntstg %r0, 0(%r1) ++ ntstg %r0, 0(%r15) ++ ntstg %r0, 524287(%r1,%r15) ++ ntstg %r0, 524287(%r15,%r1) ++ ntstg %r15, 0 ++ ++#CHECK: ppa %r0, %r0, 0 # encoding: [0xb2,0xe8,0x00,0x00] ++#CHECK: ppa %r0, %r0, 15 # encoding: [0xb2,0xe8,0xf0,0x00] ++#CHECK: ppa %r0, %r15, 0 # encoding: [0xb2,0xe8,0x00,0x0f] ++#CHECK: ppa %r4, %r6, 7 # encoding: [0xb2,0xe8,0x70,0x46] ++#CHECK: ppa %r15, %r0, 0 # encoding: [0xb2,0xe8,0x00,0xf0] ++ ++ ppa %r0, %r0, 0 ++ ppa %r0, %r0, 15 ++ ppa %r0, %r15, 0 ++ ppa %r4, %r6, 7 ++ ppa %r15, %r0, 0 ++ ++#CHECK: risbgn %r0, %r0, 0, 0, 0 # encoding: [0xec,0x00,0x00,0x00,0x00,0x59] ++#CHECK: risbgn %r0, %r0, 0, 0, 63 # encoding: [0xec,0x00,0x00,0x00,0x3f,0x59] ++#CHECK: risbgn %r0, %r0, 0, 255, 0 # encoding: [0xec,0x00,0x00,0xff,0x00,0x59] ++#CHECK: risbgn %r0, %r0, 255, 0, 0 # encoding: [0xec,0x00,0xff,0x00,0x00,0x59] ++#CHECK: risbgn %r0, %r15, 0, 0, 0 # encoding: [0xec,0x0f,0x00,0x00,0x00,0x59] ++#CHECK: risbgn %r15, %r0, 0, 0, 0 # encoding: [0xec,0xf0,0x00,0x00,0x00,0x59] ++#CHECK: risbgn %r4, %r5, 6, 7, 8 # encoding: [0xec,0x45,0x06,0x07,0x08,0x59] ++ ++ risbgn %r0,%r0,0,0,0 ++ risbgn %r0,%r0,0,0,63 ++ risbgn %r0,%r0,0,255,0 ++ risbgn %r0,%r0,255,0,0 ++ risbgn %r0,%r15,0,0,0 ++ risbgn %r15,%r0,0,0,0 ++ risbgn %r4,%r5,6,7,8 ++ ++#CHECK: tabort 0 # encoding: [0xb2,0xfc,0x00,0x00] ++#CHECK: tabort 0(%r1) # encoding: [0xb2,0xfc,0x10,0x00] ++#CHECK: tabort 0(%r15) # encoding: [0xb2,0xfc,0xf0,0x00] ++#CHECK: tabort 4095 # encoding: [0xb2,0xfc,0x0f,0xff] ++#CHECK: tabort 4095(%r1) # encoding: [0xb2,0xfc,0x1f,0xff] ++#CHECK: tabort 4095(%r15) # encoding: [0xb2,0xfc,0xff,0xff] ++ ++ tabort 0 ++ tabort 0(%r1) ++ tabort 0(%r15) ++ tabort 4095 ++ tabort 4095(%r1) ++ tabort 4095(%r15) ++ ++#CHECK: tbegin 0, 0 # encoding: [0xe5,0x60,0x00,0x00,0x00,0x00] ++#CHECK: tbegin 4095, 0 # encoding: [0xe5,0x60,0x0f,0xff,0x00,0x00] ++#CHECK: tbegin 0, 0 # encoding: [0xe5,0x60,0x00,0x00,0x00,0x00] ++#CHECK: tbegin 0, 1 # encoding: [0xe5,0x60,0x00,0x00,0x00,0x01] ++#CHECK: tbegin 0, 32767 # encoding: [0xe5,0x60,0x00,0x00,0x7f,0xff] ++#CHECK: tbegin 0, 32768 # encoding: [0xe5,0x60,0x00,0x00,0x80,0x00] ++#CHECK: tbegin 0, 65535 # encoding: [0xe5,0x60,0x00,0x00,0xff,0xff] ++#CHECK: tbegin 0(%r1), 42 # encoding: [0xe5,0x60,0x10,0x00,0x00,0x2a] ++#CHECK: tbegin 0(%r15), 42 # encoding: [0xe5,0x60,0xf0,0x00,0x00,0x2a] ++#CHECK: tbegin 4095(%r1), 42 # encoding: [0xe5,0x60,0x1f,0xff,0x00,0x2a] ++#CHECK: tbegin 4095(%r15), 42 # encoding: [0xe5,0x60,0xff,0xff,0x00,0x2a] ++ ++ tbegin 0, 0 ++ tbegin 4095, 0 ++ tbegin 0, 0 ++ tbegin 0, 1 ++ tbegin 0, 32767 ++ tbegin 0, 32768 ++ tbegin 0, 65535 ++ tbegin 0(%r1), 42 ++ tbegin 0(%r15), 42 ++ tbegin 4095(%r1), 42 ++ tbegin 4095(%r15), 42 ++ ++#CHECK: tbeginc 0, 0 # encoding: [0xe5,0x61,0x00,0x00,0x00,0x00] ++#CHECK: tbeginc 4095, 0 # encoding: [0xe5,0x61,0x0f,0xff,0x00,0x00] ++#CHECK: tbeginc 0, 0 # encoding: [0xe5,0x61,0x00,0x00,0x00,0x00] ++#CHECK: tbeginc 0, 1 # encoding: [0xe5,0x61,0x00,0x00,0x00,0x01] ++#CHECK: tbeginc 0, 32767 # encoding: [0xe5,0x61,0x00,0x00,0x7f,0xff] ++#CHECK: tbeginc 0, 32768 # encoding: [0xe5,0x61,0x00,0x00,0x80,0x00] ++#CHECK: tbeginc 0, 65535 # encoding: [0xe5,0x61,0x00,0x00,0xff,0xff] ++#CHECK: tbeginc 0(%r1), 42 # encoding: [0xe5,0x61,0x10,0x00,0x00,0x2a] ++#CHECK: tbeginc 0(%r15), 42 # encoding: [0xe5,0x61,0xf0,0x00,0x00,0x2a] ++#CHECK: tbeginc 4095(%r1), 42 # encoding: [0xe5,0x61,0x1f,0xff,0x00,0x2a] ++#CHECK: tbeginc 4095(%r15), 42 # encoding: [0xe5,0x61,0xff,0xff,0x00,0x2a] ++ ++ tbeginc 0, 0 ++ tbeginc 4095, 0 ++ tbeginc 0, 0 ++ tbeginc 0, 1 ++ tbeginc 0, 32767 ++ tbeginc 0, 32768 ++ tbeginc 0, 65535 ++ tbeginc 0(%r1), 42 ++ tbeginc 0(%r15), 42 ++ tbeginc 4095(%r1), 42 ++ tbeginc 4095(%r15), 42 ++ ++#CHECK: tend # encoding: [0xb2,0xf8,0x00,0x00] ++ ++ tend +Index: llvm-36/test/MC/SystemZ/tokens.s +=================================================================== +--- llvm-36.orig/test/MC/SystemZ/tokens.s ++++ llvm-36/test/MC/SystemZ/tokens.s +@@ -13,10 +13,16 @@ + #CHECK: foo 100(200,%r0), 300 + #CHECK: error: invalid instruction + #CHECK: foo 100(200,%r1), 300 +-#CHECK: error: invalid operand ++#CHECK: error: invalid address register + #CHECK: foo 100(%a0), 200 + #CHECK: error: %r0 used in an address + #CHECK: foo 100(%r0), 200 ++#CHECK: error: %r0 used in an address ++#CHECK: foo 100(%v1,%r0), 200 ++#CHECK: error: invalid instruction ++#CHECK: foo 100(%v0,%r1), 200 ++#CHECK: error: invalid instruction ++#CHECK: foo 100(%v31), 200 + #CHECK: error: invalid operand + #CHECK: foo 100(%r1,%a0), 200 + #CHECK: error: %r0 used in an address +@@ -45,6 +51,12 @@ + #CHECK: foo %a15, 200 + #CHECK: error: invalid register + #CHECK: foo %a16, 200 ++#CHECK: error: invalid instruction ++#CHECK: foo %v0, 200 ++#CHECK: error: invalid instruction ++#CHECK: foo %v31, 200 ++#CHECK: error: invalid register ++#CHECK: foo %v32, 200 + #CHECK: error: invalid register + #CHECK: foo %c, 200 + #CHECK: error: invalid register +@@ -60,6 +72,9 @@ + foo 100(200,%r1), 300 + foo 100(%a0), 200 + foo 100(%r0), 200 ++ foo 100(%v1,%r0), 200 ++ foo 100(%v0,%r1), 200 ++ foo 100(%v31), 200 + foo 100(%r1,%a0), 200 + foo 100(%r1,%r0), 200 + foo 100(%r1,%r2, 200 +@@ -74,6 +89,9 @@ + foo %a0, 200 + foo %a15, 200 + foo %a16, 200 ++ foo %v0, 200 ++ foo %v31, 200 ++ foo %v32, 200 + foo %c, 200 + foo %, 200 + foo {, 200 diff --git a/SPECS/llvm.spec b/SPECS/llvm.spec index 5fce613..450f2b8 100644 --- a/SPECS/llvm.spec +++ b/SPECS/llvm.spec @@ -6,18 +6,18 @@ # consequently we build swrast on them instead of llvmpipe. ExcludeArch: ppc s390 %{?rhel6:s390x} -#%global svndate 20131023 -#global prerel rc3 +#global svndate 20131023 +#global prerel rc4 Name: mesa-private-llvm -Version: 3.5.0 -Release: 1%{?dist} +Version: 3.6.2 +Release: 2%{?prerel:.%prerel}%{?dist} Summary: llvm engine for Mesa Group: System Environment/Libraries License: NCSA URL: http://llvm.org/ -Source0: http://llvm.org/pre-releases/3.5/llvm-3.5.0.src.tar.xz +Source0: http://llvm.org/releases/%{version}/%{?prerel}/llvm-%{version}%{?prerel}.src.tar.xz #Source0: llvm-%{svndate}.tar.xz Source1: make-llvm-snapshot.sh # multilib fixes @@ -26,7 +26,15 @@ Source3: llvm-Config-llvm-config.h # Data files should be installed with timestamps preserved Patch0: llvm-2.6-timestamp.patch -Patch1: llvm-3.5.0-build-fix.patch + +# llvm Z13 backports (#1182150) +Patch1: llvm-z13-backports.patch +Patch2: llvm-3.6-large-struct-return.patch + +# llvm aarch64 bug fix (#1254386) +Patch10: 0001-AArch64-Fix-invalid-use-of-references-to-BuildMI.patch +# add model detection for skylake and broadwell +Patch11: llvm-3.6.2-nerf-skylake.patch BuildRequires: bison BuildRequires: chrpath @@ -59,7 +67,10 @@ rm -r -f tools/clang # llvm patches %patch0 -p1 -b .timestamp -%patch1 -p1 -b .build +%patch1 -p1 -b .z13 +%patch2 -p1 -b .large-struct +%patch10 -p1 -b .aarch64-fix +%patch11 -p1 -b .skl-fix # fix ld search path sed -i 's|/lib /usr/lib $lt_ld_extra|%{_libdir} $lt_ld_extra|' \ @@ -94,14 +105,6 @@ export CXX=g++ --disable-libffi \ --disable-terminfo \ --disable-timestamps \ -%ifarch armv7hl armv7l - --with-cpu=cortex-a8 \ - --with-tune=cortex-a8 \ - --with-arch=armv7-a \ - --with-float=hard \ - --with-fpu=vfpv3-d16 \ - --with-abi=aapcs-linux \ -%endif %{nil} # FIXME file this @@ -123,9 +126,6 @@ make install DESTDIR=%{buildroot} # rename the few binaries we're keeping mv %{buildroot}%{_bindir}/llvm-config %{buildroot}%{_bindir}/%{name}-config-%{__isa_bits} -# silly -rm -f %{buildroot}%{_libdir}/llvm-3.5.0.so - pushd %{buildroot}%{_includedir}/mesa-private/llvm/Config mv config.h config-%{__isa_bits}.h cp -p %{SOURCE2} config.h @@ -156,7 +156,7 @@ rm -rf %{buildroot}%{_mandir}/man1 # RHEL: Strip out some headers Mesa doesn't need rm -rf %{buildroot}%{_includedir}/mesa-private/llvm/{Analysis,Assembly} -rm -rf %{buildroot}%{_includedir}/mesa-private/llvm/{DebugInfo,Object,Option} +rm -rf %{buildroot}%{_includedir}/mesa-private/llvm/{DebugInfo,Option} rm -rf %{buildroot}%{_includedir}/mesa-private/llvm/TableGen # RHEL: Strip out cmake build foo @@ -175,7 +175,7 @@ make check LIT_ARGS="-v -j4" | tee llvm-testlog-%{_arch}.txt %files %defattr(-,root,root,-) %doc LICENSE.TXT -%{_libdir}/libLLVM-3.5-mesa.so +%{_libdir}/libLLVM-3.6-mesa.so %files devel %defattr(-,root,root,-) @@ -184,6 +184,30 @@ make check LIT_ARGS="-v -j4" | tee llvm-testlog-%{_arch}.txt %{_includedir}/mesa-private/llvm-c %changelog +* Wed Oct 14 2015 Adam Jackson 3.6.2-2 +- Teach CPU detection about Skylake/Broadwell, treat them like Haswell + +* Mon Aug 24 2015 Dave Airlie 3.6.2-1 +- fix aarch64 bugs via 3.6.2 + patch + +* Tue Aug 18 2015 Adam Jackson 3.6.1-2 +- Fix large struct return on s390 + +* Tue May 26 2015 Dave Airlie 3.6.1-1 +- rebase to llvm 3.6.1 + +* Thu May 21 2015 Dave Airlie 3.6.0-3 +- backport llvm z13 support from IBM + +* Wed May 13 2015 Dave Airlie 3.6.0-2 +- mesa needs Object headers now. + +* Wed May 13 2015 Dave Airlie 3.6.0-1 +- llvm 3.6.0 final + +* Mon Feb 23 2015 Adam Jackson 3.6.0-0.1 +- llvm 3.6.0 rc4 + * Tue Sep 09 2014 Dave Airlie 3.5.0-1 - llvm 3.5.0 final