Blame SOURCES/llvm-3.6-large-struct-return.patch

003ced
------------------------------------------------------------------------
003ced
r244889 | uweigand | 2015-08-13 15:37:06 +0200 (Thu, 13 Aug 2015) | 22 lines
003ced
003ced
[SystemZ] Support large LLVM IR struct return values
003ced
003ced
Recent mesa/llvmpipe crashes on SystemZ due to a failed assertion when
003ced
attempting to compile a routine with a return type of
003ced
  { <4 x float>, <4 x float>, <4 x float>, <4 x float> }
003ced
on a system without vector instruction support.
003ced
003ced
This is because after legalizing the vector type, we get a return value
003ced
consisting of 16 floats, which cannot all be returned in registers.
003ced
003ced
Usually, what should happen in this case is that the target's CanLowerReturn
003ced
routine rejects the return type, in which case SelectionDAG falls back to
003ced
implementing a structure return in memory via implicit reference.
003ced
003ced
However, the SystemZ target never actually implemented any CanLowerReturn
003ced
routine, and thus would accept any struct return type.
003ced
003ced
This patch fixes the crash by implementing CanLowerReturn.  As a side effect,
003ced
this also handles fp128 return values, fixing a todo that was noted in
003ced
SystemZCallingConv.td.
003ced
003ced
Index: llvm-36/lib/Target/SystemZ/SystemZCallingConv.td
003ced
===================================================================
003ced
--- llvm-36.orig/lib/Target/SystemZ/SystemZCallingConv.td
003ced
+++ llvm-36/lib/Target/SystemZ/SystemZCallingConv.td
003ced
@@ -53,10 +53,6 @@ def RetCC_SystemZ : CallingConv<[
003ced
   CCIfSubtarget<"hasVector()",
003ced
     CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
003ced
              CCAssignToReg<[V24, V26, V28, V30, V25, V27, V29, V31]>>>
003ced
-
003ced
-  // ABI-compliant code returns long double by reference, but that conversion
003ced
-  // is left to higher-level code.  Perhaps we could add an f128 definition
003ced
-  // here for code that doesn't care about the ABI?
003ced
 ]>;
003ced
 
003ced
 //===----------------------------------------------------------------------===//
003ced
Index: llvm-36/lib/Target/SystemZ/SystemZISelLowering.cpp
003ced
===================================================================
003ced
--- llvm-36.orig/lib/Target/SystemZ/SystemZISelLowering.cpp
003ced
+++ llvm-36/lib/Target/SystemZ/SystemZISelLowering.cpp
003ced
@@ -1169,6 +1169,20 @@ SystemZTargetLowering::LowerCall(CallLow
003ced
   return Chain;
003ced
 }
003ced
 
003ced
+bool SystemZTargetLowering::
003ced
+CanLowerReturn(CallingConv::ID CallConv,
003ced
+               MachineFunction &MF, bool isVarArg,
003ced
+               const SmallVectorImpl<ISD::OutputArg> &Outs,
003ced
+               LLVMContext &Context) const {
003ced
+  // Detect unsupported vector return types.
003ced
+  if (Subtarget.hasVector())
003ced
+    VerifyVectorTypes(Outs);
003ced
+
003ced
+  SmallVector<CCValAssign, 16> RetLocs;
003ced
+  CCState RetCCInfo(CallConv, isVarArg, MF, RetLocs, Context);
003ced
+  return RetCCInfo.CheckReturn(Outs, RetCC_SystemZ);
003ced
+}
003ced
+
003ced
 SDValue
003ced
 SystemZTargetLowering::LowerReturn(SDValue Chain,
003ced
                                    CallingConv::ID CallConv, bool IsVarArg,
003ced
Index: llvm-36/lib/Target/SystemZ/SystemZISelLowering.h
003ced
===================================================================
003ced
--- llvm-36.orig/lib/Target/SystemZ/SystemZISelLowering.h
003ced
+++ llvm-36/lib/Target/SystemZ/SystemZISelLowering.h
003ced
@@ -401,6 +401,10 @@ public:
003ced
   SDValue LowerCall(CallLoweringInfo &CLI,
003ced
                     SmallVectorImpl<SDValue> &InVals) const override;
003ced
 
003ced
+  bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
003ced
+                      bool isVarArg,
003ced
+                      const SmallVectorImpl<ISD::OutputArg> &Outs,
003ced
+                      LLVMContext &Context) const override;
003ced
   SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
003ced
                       const SmallVectorImpl<ISD::OutputArg> &Outs,
003ced
                       const SmallVectorImpl<SDValue> &OutVals,
003ced
Index: llvm-36/test/CodeGen/SystemZ/args-04.ll
003ced
===================================================================
003ced
--- llvm-36.orig/test/CodeGen/SystemZ/args-04.ll
003ced
+++ llvm-36/test/CodeGen/SystemZ/args-04.ll
003ced
@@ -124,3 +124,17 @@ define void @f13(fp128 *%r2, i16 %r3, i3
003ced
   store fp128 %y, fp128 *%r2
003ced
   ret void
003ced
 }
003ced
+
003ced
+; Explicit fp128 return values are likewise passed indirectly.
003ced
+define fp128 @f14(fp128 %r3) {
003ced
+; CHECK-LABEL: f14:
003ced
+; CHECK: ld %f0, 0(%r3)
003ced
+; CHECK: ld %f2, 8(%r3)
003ced
+; CHECK: axbr %f0, %f0
003ced
+; CHECK: std %f0, 0(%r2)
003ced
+; CHECK: std %f2, 8(%r2)
003ced
+; CHECK: br %r14
003ced
+  %y = fadd fp128 %r3, %r3
003ced
+  ret fp128 %y
003ced
+}
003ced
+
003ced
Index: llvm-36/test/CodeGen/SystemZ/args-07.ll
003ced
===================================================================
003ced
--- /dev/null
003ced
+++ llvm-36/test/CodeGen/SystemZ/args-07.ll
003ced
@@ -0,0 +1,60 @@
003ced
+; Test multiple return values (LLVM ABI extension)
003ced
+;
003ced
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
003ced
+
003ced
+; Up to four integer return values fit into GPRs.
003ced
+define { i64, i64, i64, i64 } @f1() {
003ced
+; CHECK-LABEL: f1:
003ced
+; CHECK: lghi %r2, 0
003ced
+; CHECK: lghi %r3, 1
003ced
+; CHECK: lghi %r4, 2
003ced
+; CHECK: lghi %r5, 3
003ced
+; CHECK: br %r14
003ced
+  ret { i64, i64, i64, i64 } { i64 0, i64 1, i64 2, i64 3 }
003ced
+}
003ced
+
003ced
+; More than four integer return values use sret.
003ced
+define { i64, i64, i64, i64, i64 } @f2() {
003ced
+; CHECK-LABEL: f2:
003ced
+; CHECK: mvghi 32(%r2), 4
003ced
+; CHECK: mvghi 24(%r2), 3
003ced
+; CHECK: mvghi 16(%r2), 2
003ced
+; CHECK: mvghi 8(%r2), 1
003ced
+; CHECK: mvghi 0(%r2), 0
003ced
+; CHECK: br %r14
003ced
+  ret { i64, i64, i64, i64, i64 } { i64 0, i64 1, i64 2, i64 3, i64 4 }
003ced
+}
003ced
+
003ced
+; Up to four floating-point return values fit into FPRs.
003ced
+define { double, double, double, double } @f3() {
003ced
+; CHECK-LABEL: f3:
003ced
+; CHECK: larl [[TMP:%r[0-5]]], .LCPI
003ced
+; CHECK: ldeb %f0, 0([[TMP]])
003ced
+; CHECK: larl [[TMP:%r[0-5]]], .LCPI
003ced
+; CHECK: ldeb %f2, 0([[TMP]])
003ced
+; CHECK: larl [[TMP:%r[0-5]]], .LCPI
003ced
+; CHECK: ldeb %f4, 0([[TMP]])
003ced
+; CHECK: larl [[TMP:%r[0-5]]], .LCPI
003ced
+; CHECK: ldeb %f6, 0([[TMP]])
003ced
+; CHECK: br %r14
003ced
+  ret { double, double, double, double }
003ced
+      { double 1.0, double 2.0, double 3.0, double 4.0 }
003ced
+}
003ced
+
003ced
+; More than four floating-point return values use sret.
003ced
+define { double, double, double, double, double } @f4() {
003ced
+; CHECK-LABEL: f4:
003ced
+; CHECK: llihh [[TMP:%r[0-5]]], 16404
003ced
+; CHECK: stg [[TMP]], 32(%r2)
003ced
+; CHECK: llihh [[TMP:%r[0-5]]], 16400
003ced
+; CHECK: stg [[TMP]], 24(%r2)
003ced
+; CHECK: llihh [[TMP:%r[0-5]]], 16392
003ced
+; CHECK: stg [[TMP]], 16(%r2)
003ced
+; CHECK: llihh [[TMP:%r[0-5]]], 16384
003ced
+; CHECK: stg [[TMP]], 8(%r2)
003ced
+; CHECK: llihh [[TMP:%r[0-5]]], 16368
003ced
+; CHECK: stg [[TMP]], 0(%r2)
003ced
+; CHECK: br %r14
003ced
+  ret { double, double, double, double, double }
003ced
+      { double 1.0, double 2.0, double 3.0, double 4.0, double 5.0 }
003ced
+}
003ced
Index: llvm-36/test/CodeGen/SystemZ/args-08.ll
003ced
===================================================================
003ced
--- /dev/null
003ced
+++ llvm-36/test/CodeGen/SystemZ/args-08.ll
003ced
@@ -0,0 +1,57 @@
003ced
+; Test calling functions with multiple return values (LLVM ABI extension)
003ced
+;
003ced
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
003ced
+
003ced
+; Up to four integer return values fit into GPRs.
003ced
+declare { i64, i64, i64, i64 } @bar1()
003ced
+
003ced
+define i64 @f1() {
003ced
+; CHECK-LABEL: f1:
003ced
+; CHECK: brasl %r14, bar1
003ced
+; CHECK: lgr %r2, %r5
003ced
+; CHECK: br %r14
003ced
+  %mret = call { i64, i64, i64, i64 } @bar1()
003ced
+  %ret = extractvalue { i64, i64, i64, i64 } %mret, 3
003ced
+  ret i64 %ret
003ced
+}
003ced
+
003ced
+; More than four integer return values use sret.
003ced
+declare { i64, i64, i64, i64, i64 } @bar2()
003ced
+
003ced
+define i64 @f2() {
003ced
+; CHECK-LABEL: f2:
003ced
+; CHECK: la %r2, 160(%r15)
003ced
+; CHECK: brasl %r14, bar2
003ced
+; CHECK: lg  %r2, 192(%r15)
003ced
+; CHECK: br %r14
003ced
+  %mret = call { i64, i64, i64, i64, i64 } @bar2()
003ced
+  %ret = extractvalue { i64, i64, i64, i64, i64 } %mret, 4
003ced
+  ret i64 %ret
003ced
+}
003ced
+
003ced
+; Up to four floating-point return values fit into GPRs.
003ced
+declare { double, double, double, double } @bar3()
003ced
+
003ced
+define double @f3() {
003ced
+; CHECK-LABEL: f3:
003ced
+; CHECK: brasl %r14, bar3
003ced
+; CHECK: ldr %f0, %f6
003ced
+; CHECK: br %r14
003ced
+  %mret = call { double, double, double, double } @bar3()
003ced
+  %ret = extractvalue { double, double, double, double } %mret, 3
003ced
+  ret double %ret
003ced
+}
003ced
+
003ced
+; More than four integer return values use sret.
003ced
+declare { double, double, double, double, double } @bar4()
003ced
+
003ced
+define double @f4() {
003ced
+; CHECK-LABEL: f4:
003ced
+; CHECK: la %r2, 160(%r15)
003ced
+; CHECK: brasl %r14, bar4
003ced
+; CHECK: ld  %f0, 192(%r15)
003ced
+; CHECK: br %r14
003ced
+  %mret = call { double, double, double, double, double } @bar4()
003ced
+  %ret = extractvalue { double, double, double, double, double } %mret, 4
003ced
+  ret double %ret
003ced
+}
003ced
Index: llvm-36/test/CodeGen/SystemZ/vec-args-06.ll
003ced
===================================================================
003ced
--- /dev/null
003ced
+++ llvm-36/test/CodeGen/SystemZ/vec-args-06.ll
003ced
@@ -0,0 +1,83 @@
003ced
+; Test multiple return values (LLVM ABI extension)
003ced
+;
003ced
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
003ced
+
003ced
+; Up to eight vector return values fit into VRs.
003ced
+define { <2 x double>, <2 x double>, <2 x double>, <2 x double>,
003ced
+         <2 x double>, <2 x double>, <2 x double>, <2 x double> } @f1() {
003ced
+; CHECK-LABEL: f1:
003ced
+; CHECK: larl [[TMP:%r[0-5]]], .LCPI
003ced
+; CHECK: vl %v24, 0([[TMP]])
003ced
+; CHECK: larl [[TMP:%r[0-5]]], .LCPI
003ced
+; CHECK: vl %v26, 0([[TMP]])
003ced
+; CHECK: larl [[TMP:%r[0-5]]], .LCPI
003ced
+; CHECK: vl %v28, 0([[TMP]])
003ced
+; CHECK: larl [[TMP:%r[0-5]]], .LCPI
003ced
+; CHECK: vl %v30, 0([[TMP]])
003ced
+; CHECK: larl [[TMP:%r[0-5]]], .LCPI
003ced
+; CHECK: vl %v25, 0([[TMP]])
003ced
+; CHECK: larl [[TMP:%r[0-5]]], .LCPI
003ced
+; CHECK: vl %v27, 0([[TMP]])
003ced
+; CHECK: larl [[TMP:%r[0-5]]], .LCPI
003ced
+; CHECK: vl %v29, 0([[TMP]])
003ced
+; CHECK: larl [[TMP:%r[0-5]]], .LCPI
003ced
+; CHECK: vl %v31, 0([[TMP]])
003ced
+; CHECK: br %r14
003ced
+  ret { <2 x double>, <2 x double>, <2 x double>, <2 x double>,
003ced
+        <2 x double>, <2 x double>, <2 x double>, <2 x double> }
003ced
+      { <2 x double> <double 1.0, double 1.1>,
003ced
+        <2 x double> <double 2.0, double 2.1>,
003ced
+        <2 x double> <double 3.0, double 3.1>,
003ced
+        <2 x double> <double 4.0, double 4.1>,
003ced
+        <2 x double> <double 5.0, double 5.1>,
003ced
+        <2 x double> <double 6.0, double 6.1>,
003ced
+        <2 x double> <double 7.0, double 7.1>,
003ced
+        <2 x double> <double 8.0, double 8.1> }
003ced
+}
003ced
+
003ced
+; More than eight vector return values use sret.
003ced
+define { <2 x double>, <2 x double>, <2 x double>, <2 x double>,
003ced
+         <2 x double>, <2 x double>, <2 x double>, <2 x double>,
003ced
+         <2 x double> } @f2() {
003ced
+; CHECK-LABEL: f2:
003ced
+; CHECK: larl [[TMP:%r[0-5]]], .LCPI
003ced
+; CHECK: vl [[VTMP:%v[0-9]+]], 0([[TMP]])
003ced
+; CHECK: vst [[VTMP]], 128(%r2)
003ced
+; CHECK: larl [[TMP:%r[0-5]]], .LCPI
003ced
+; CHECK: vl [[VTMP:%v[0-9]+]], 0([[TMP]])
003ced
+; CHECK: vst [[VTMP]], 112(%r2)
003ced
+; CHECK: larl [[TMP:%r[0-5]]], .LCPI
003ced
+; CHECK: vl [[VTMP:%v[0-9]+]], 0([[TMP]])
003ced
+; CHECK: vst [[VTMP]], 96(%r2)
003ced
+; CHECK: larl [[TMP:%r[0-5]]], .LCPI
003ced
+; CHECK: vl [[VTMP:%v[0-9]+]], 0([[TMP]])
003ced
+; CHECK: vst [[VTMP]], 80(%r2)
003ced
+; CHECK: larl [[TMP:%r[0-5]]], .LCPI
003ced
+; CHECK: vl [[VTMP:%v[0-9]+]], 0([[TMP]])
003ced
+; CHECK: vst [[VTMP]], 64(%r2)
003ced
+; CHECK: larl [[TMP:%r[0-5]]], .LCPI
003ced
+; CHECK: vl [[VTMP:%v[0-9]+]], 0([[TMP]])
003ced
+; CHECK: vst [[VTMP]], 48(%r2)
003ced
+; CHECK: larl [[TMP:%r[0-5]]], .LCPI
003ced
+; CHECK: vl [[VTMP:%v[0-9]+]], 0([[TMP]])
003ced
+; CHECK: vst [[VTMP]], 32(%r2)
003ced
+; CHECK: larl [[TMP:%r[0-5]]], .LCPI
003ced
+; CHECK: vl [[VTMP:%v[0-9]+]], 0([[TMP]])
003ced
+; CHECK: vst [[VTMP]], 16(%r2)
003ced
+; CHECK: larl [[TMP:%r[0-5]]], .LCPI
003ced
+; CHECK: vl [[VTMP:%v[0-9]+]], 0([[TMP]])
003ced
+; CHECK: vst [[VTMP]], 0(%r2)
003ced
+; CHECK: br %r14
003ced
+  ret { <2 x double>, <2 x double>, <2 x double>, <2 x double>,
003ced
+        <2 x double>, <2 x double>, <2 x double>, <2 x double>,
003ced
+        <2 x double> }
003ced
+      { <2 x double> <double 1.0, double 1.1>,
003ced
+        <2 x double> <double 2.0, double 2.1>,
003ced
+        <2 x double> <double 3.0, double 3.1>,
003ced
+        <2 x double> <double 4.0, double 4.1>,
003ced
+        <2 x double> <double 5.0, double 5.1>,
003ced
+        <2 x double> <double 6.0, double 6.1>,
003ced
+        <2 x double> <double 7.0, double 7.1>,
003ced
+        <2 x double> <double 8.0, double 8.1>,
003ced
+        <2 x double> <double 9.0, double 9.1> }
003ced
+}
003ced
Index: llvm-36/test/CodeGen/SystemZ/vec-args-07.ll
003ced
===================================================================
003ced
--- /dev/null
003ced
+++ llvm-36/test/CodeGen/SystemZ/vec-args-07.ll
003ced
@@ -0,0 +1,47 @@
003ced
+; Test calling functions with multiple return values (LLVM ABI extension)
003ced
+;
003ced
+; RUN: llc < %s -mtriple=s390x-linux-gnu -mcpu=z13 | FileCheck %s
003ced
+
003ced
+; Up to eight vector return values fit into VRs.
003ced
+declare { <2 x double>, <2 x double>, <2 x double>, <2 x double>,
003ced
+          <2 x double>, <2 x double>, <2 x double>, <2 x double> } @bar1()
003ced
+
003ced
+define <2 x double> @f1() {
003ced
+; CHECK-LABEL: f1:
003ced
+; CHECK: brasl %r14, bar1
003ced
+; CHECK: vlr %v24, %v31
003ced
+; CHECK: br %r14
003ced
+  %mret = call { <2 x double>, <2 x double>,
003ced
+                 <2 x double>, <2 x double>,
003ced
+                 <2 x double>, <2 x double>,
003ced
+                 <2 x double>, <2 x double> } @bar1()
003ced
+  %ret = extractvalue { <2 x double>, <2 x double>,
003ced
+                        <2 x double>, <2 x double>,
003ced
+                        <2 x double>, <2 x double>,
003ced
+                        <2 x double>, <2 x double> } %mret, 7
003ced
+  ret <2 x double> %ret
003ced
+}
003ced
+
003ced
+; More than eight vector return values use sret.
003ced
+declare { <2 x double>, <2 x double>, <2 x double>, <2 x double>,
003ced
+          <2 x double>, <2 x double>, <2 x double>, <2 x double>,
003ced
+          <2 x double> } @bar2()
003ced
+
003ced
+define <2 x double> @f2() {
003ced
+; CHECK-LABEL: f2:
003ced
+; CHECK: la %r2, 160(%r15)
003ced
+; CHECK: brasl %r14, bar2
003ced
+; CHECK: vl  %v24, 288(%r15)
003ced
+; CHECK: br %r14
003ced
+  %mret = call { <2 x double>, <2 x double>,
003ced
+                 <2 x double>, <2 x double>,
003ced
+                 <2 x double>, <2 x double>,
003ced
+                 <2 x double>, <2 x double>,
003ced
+                 <2 x double> } @bar2()
003ced
+  %ret = extractvalue { <2 x double>, <2 x double>,
003ced
+                        <2 x double>, <2 x double>,
003ced
+                        <2 x double>, <2 x double>,
003ced
+                        <2 x double>, <2 x double>,
003ced
+                        <2 x double> } %mret, 8
003ced
+  ret <2 x double> %ret
003ced
+}