|
|
74e884 |
From a481ab548d038c1dfd52ee211b997e2dd33ff5ae Mon Sep 17 00:00:00 2001
|
|
|
74e884 |
From: Hal Finkel <hfinkel@anl.gov>
|
|
|
74e884 |
Date: Wed, 6 Sep 2017 03:08:26 +0000
|
|
|
74e884 |
Subject: [PATCH] [PowerPC] Don't use xscvdpspn on the P7
|
|
|
74e884 |
|
|
|
74e884 |
xscvdpspn was not introduced until the P8, so don't use it on the P7. Fixes a
|
|
|
74e884 |
regression introduced in r288152.
|
|
|
74e884 |
|
|
|
74e884 |
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@312612 91177308-0d34-0410-b5e6-96231b3b80d8
|
|
|
74e884 |
---
|
|
|
74e884 |
lib/Target/PowerPC/PPCISelLowering.cpp | 9 ++++++---
|
|
|
74e884 |
test/CodeGen/PowerPC/fp-splat.ll | 27 +++++++++++++++++++++++++++
|
|
|
74e884 |
2 files changed, 33 insertions(+), 3 deletions(-)
|
|
|
74e884 |
create mode 100644 test/CodeGen/PowerPC/fp-splat.ll
|
|
|
74e884 |
|
|
|
74e884 |
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
|
|
|
74e884 |
index 74dedaf..6295693 100644
|
|
|
74e884 |
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
|
|
|
74e884 |
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
|
|
|
74e884 |
@@ -7463,9 +7463,11 @@ static SDValue BuildVSLDOI(SDValue LHS, SDValue RHS, unsigned Amt, EVT VT,
|
|
|
74e884 |
/// - The node is a "load-and-splat"
|
|
|
74e884 |
/// In all other cases, we will choose to keep the BUILD_VECTOR.
|
|
|
74e884 |
static bool haveEfficientBuildVectorPattern(BuildVectorSDNode *V,
|
|
|
74e884 |
- bool HasDirectMove) {
|
|
|
74e884 |
+ bool HasDirectMove,
|
|
|
74e884 |
+ bool HasP8Vector) {
|
|
|
74e884 |
EVT VecVT = V->getValueType(0);
|
|
|
74e884 |
- bool RightType = VecVT == MVT::v2f64 || VecVT == MVT::v4f32 ||
|
|
|
74e884 |
+ bool RightType = VecVT == MVT::v2f64 ||
|
|
|
74e884 |
+ (HasP8Vector && VecVT == MVT::v4f32) ||
|
|
|
74e884 |
(HasDirectMove && (VecVT == MVT::v2i64 || VecVT == MVT::v4i32));
|
|
|
74e884 |
if (!RightType)
|
|
|
74e884 |
return false;
|
|
|
74e884 |
@@ -7627,7 +7629,8 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op,
|
|
|
74e884 |
// lowered to VSX instructions under certain conditions.
|
|
|
74e884 |
// Without VSX, there is no pattern more efficient than expanding the node.
|
|
|
74e884 |
if (Subtarget.hasVSX() &&
|
|
|
74e884 |
- haveEfficientBuildVectorPattern(BVN, Subtarget.hasDirectMove()))
|
|
|
74e884 |
+ haveEfficientBuildVectorPattern(BVN, Subtarget.hasDirectMove(),
|
|
|
74e884 |
+ Subtarget.hasP8Vector()))
|
|
|
74e884 |
return Op;
|
|
|
74e884 |
return SDValue();
|
|
|
74e884 |
}
|
|
|
74e884 |
diff --git a/test/CodeGen/PowerPC/fp-splat.ll b/test/CodeGen/PowerPC/fp-splat.ll
|
|
|
74e884 |
new file mode 100644
|
|
|
74e884 |
index 0000000..9b1ab21
|
|
|
74e884 |
--- /dev/null
|
|
|
74e884 |
+++ b/test/CodeGen/PowerPC/fp-splat.ll
|
|
|
74e884 |
@@ -0,0 +1,27 @@
|
|
|
74e884 |
+; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s -check-prefix=CHECK-P8 -check-prefix=CHECK
|
|
|
74e884 |
+; RUN: llc -mcpu=pwr7 -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s -check-prefix=CHECK-P7 -check-prefix=CHECK
|
|
|
74e884 |
+
|
|
|
74e884 |
+define <4 x float> @test1(float %a) {
|
|
|
74e884 |
+entry:
|
|
|
74e884 |
+; CHECK-LABEL: test1
|
|
|
74e884 |
+ %vecins = insertelement <4 x float> undef, float %a, i32 0
|
|
|
74e884 |
+ %vecins1 = insertelement <4 x float> %vecins, float %a, i32 1
|
|
|
74e884 |
+ %vecins2 = insertelement <4 x float> %vecins1, float %a, i32 2
|
|
|
74e884 |
+ %vecins3 = insertelement <4 x float> %vecins2, float %a, i32 3
|
|
|
74e884 |
+ ret <4 x float> %vecins3
|
|
|
74e884 |
+; CHECK-P8: xscvdpspn
|
|
|
74e884 |
+; CHECK-P7-NOT: xscvdpspn
|
|
|
74e884 |
+; CHECK: blr
|
|
|
74e884 |
+}
|
|
|
74e884 |
+
|
|
|
74e884 |
+define <2 x double> @test2(double %a) {
|
|
|
74e884 |
+entry:
|
|
|
74e884 |
+; CHECK-LABEL: test2
|
|
|
74e884 |
+ %vecins = insertelement <2 x double> undef, double %a, i32 0
|
|
|
74e884 |
+ %vecins1 = insertelement <2 x double> %vecins, double %a, i32 1
|
|
|
74e884 |
+ ret <2 x double> %vecins1
|
|
|
74e884 |
+; CHECK-P8: xxspltd
|
|
|
74e884 |
+; CHECK-P7: xxspltd
|
|
|
74e884 |
+; CHECK: blr
|
|
|
74e884 |
+}
|
|
|
74e884 |
+
|
|
|
74e884 |
--
|
|
|
74e884 |
1.8.3.1
|
|
|
74e884 |
|