Blame SOURCES/0001-SystemZ-Assign-the-full-space-for-promoted-and-split.patch

3ac0c7
From c6f9d6db7b0c4677d1aae8977505fe6340a3aae2 Mon Sep 17 00:00:00 2001
3ac0c7
From: Josh Stone <cuviper@gmail.com>
3ac0c7
Date: Wed, 10 Mar 2021 15:52:27 -0800
3ac0c7
Subject: [PATCH] [SystemZ]  Assign the full space for promoted and split
3ac0c7
 outgoing args. (#95)
3ac0c7
3ac0c7
When a large "irregular" (e.g. i96) integer call argument is converted to
3ac0c7
indirect, 64-bit parts are stored to the stack. The full stack space
3ac0c7
(e.g. i128) was not allocated prior to this patch, but rather just the exact
3ac0c7
space of the original type. This caused neighboring values on the stack to be
3ac0c7
overwritten.
3ac0c7
3ac0c7
Thanks to Josh Stone for reporting this.
3ac0c7
3ac0c7
Review: Ulrich Weigand
3ac0c7
Fixes https://bugs.llvm.org/show_bug.cgi?id=49322
3ac0c7
Differential Revision: https://reviews.llvm.org/D97514
3ac0c7
3ac0c7
(cherry picked from commit 52bbbf4d4459239e0f461bc302ada89e2c5d07fc)
3ac0c7
3ac0c7
Co-authored-by: Jonas Paulsson <paulsson@linux.vnet.ibm.com>
3ac0c7
---
3ac0c7
 .../Target/SystemZ/SystemZISelLowering.cpp    | 22 ++++++--
3ac0c7
 llvm/test/CodeGen/SystemZ/args-11.ll          | 54 +++++++++++++++++++
3ac0c7
 2 files changed, 72 insertions(+), 4 deletions(-)
3ac0c7
 create mode 100644 llvm/test/CodeGen/SystemZ/args-11.ll
3ac0c7
3ac0c7
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
3ac0c7
index eb1e51341ec4..faf7b3eaef3c 100644
3ac0c7
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
3ac0c7
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
3ac0c7
@@ -1543,6 +1543,7 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
3ac0c7
   bool IsVarArg = CLI.IsVarArg;
3ac0c7
   MachineFunction &MF = DAG.getMachineFunction();
3ac0c7
   EVT PtrVT = getPointerTy(MF.getDataLayout());
3ac0c7
+  LLVMContext &Ctx = *DAG.getContext();
3ac0c7
 
3ac0c7
   // Detect unsupported vector argument and return types.
3ac0c7
   if (Subtarget.hasVector()) {
3ac0c7
@@ -1552,7 +1553,7 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
3ac0c7
 
3ac0c7
   // Analyze the operands of the call, assigning locations to each operand.
3ac0c7
   SmallVector<CCValAssign, 16> ArgLocs;
3ac0c7
-  SystemZCCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
3ac0c7
+  SystemZCCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, Ctx);
3ac0c7
   ArgCCInfo.AnalyzeCallOperands(Outs, CC_SystemZ);
3ac0c7
 
3ac0c7
   // We don't support GuaranteedTailCallOpt, only automatically-detected
3ac0c7
@@ -1577,14 +1578,25 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
3ac0c7
 
3ac0c7
     if (VA.getLocInfo() == CCValAssign::Indirect) {
3ac0c7
       // Store the argument in a stack slot and pass its address.
3ac0c7
-      SDValue SpillSlot = DAG.CreateStackTemporary(Outs[I].ArgVT);
3ac0c7
+      unsigned ArgIndex = Outs[I].OrigArgIndex;
3ac0c7
+      EVT SlotVT;
3ac0c7
+      if (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) {
3ac0c7
+        // Allocate the full stack space for a promoted (and split) argument.
3ac0c7
+        Type *OrigArgType = CLI.Args[Outs[I].OrigArgIndex].Ty;
3ac0c7
+        EVT OrigArgVT = getValueType(MF.getDataLayout(), OrigArgType);
3ac0c7
+        MVT PartVT = getRegisterTypeForCallingConv(Ctx, CLI.CallConv, OrigArgVT);
3ac0c7
+        unsigned N = getNumRegistersForCallingConv(Ctx, CLI.CallConv, OrigArgVT);
3ac0c7
+        SlotVT = EVT::getIntegerVT(Ctx, PartVT.getSizeInBits() * N);
3ac0c7
+      } else {
3ac0c7
+        SlotVT = Outs[I].ArgVT;
3ac0c7
+      }
3ac0c7
+      SDValue SpillSlot = DAG.CreateStackTemporary(SlotVT);
3ac0c7
       int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
3ac0c7
       MemOpChains.push_back(
3ac0c7
           DAG.getStore(Chain, DL, ArgValue, SpillSlot,
3ac0c7
                        MachinePointerInfo::getFixedStack(MF, FI)));
3ac0c7
       // If the original argument was split (e.g. i128), we need
3ac0c7
       // to store all parts of it here (and pass just one address).
3ac0c7
-      unsigned ArgIndex = Outs[I].OrigArgIndex;
3ac0c7
       assert (Outs[I].PartOffset == 0);
3ac0c7
       while (I + 1 != E && Outs[I + 1].OrigArgIndex == ArgIndex) {
3ac0c7
         SDValue PartValue = OutVals[I + 1];
3ac0c7
@@ -1594,6 +1606,8 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
3ac0c7
         MemOpChains.push_back(
3ac0c7
             DAG.getStore(Chain, DL, PartValue, Address,
3ac0c7
                          MachinePointerInfo::getFixedStack(MF, FI)));
3ac0c7
+        assert((PartOffset + PartValue.getValueType().getStoreSize() <=
3ac0c7
+                SlotVT.getStoreSize()) && "Not enough space for argument part!");
3ac0c7
         ++I;
3ac0c7
       }
3ac0c7
       ArgValue = SpillSlot;
3ac0c7
@@ -1687,7 +1701,7 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
3ac0c7
 
3ac0c7
   // Assign locations to each value returned by this call.
3ac0c7
   SmallVector<CCValAssign, 16> RetLocs;
3ac0c7
-  CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, *DAG.getContext());
3ac0c7
+  CCState RetCCInfo(CallConv, IsVarArg, MF, RetLocs, Ctx);
3ac0c7
   RetCCInfo.AnalyzeCallResult(Ins, RetCC_SystemZ);
3ac0c7
 
3ac0c7
   // Copy all of the result registers out of their specified physreg.
3ac0c7
diff --git a/llvm/test/CodeGen/SystemZ/args-11.ll b/llvm/test/CodeGen/SystemZ/args-11.ll
3ac0c7
new file mode 100644
3ac0c7
index 000000000000..b355f9d6da15
3ac0c7
--- /dev/null
3ac0c7
+++ b/llvm/test/CodeGen/SystemZ/args-11.ll
3ac0c7
@@ -0,0 +1,54 @@
3ac0c7
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
3ac0c7
+; Test outgoing promoted arguments that are split (and passed by reference).
3ac0c7
+;
3ac0c7
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
3ac0c7
+
3ac0c7
+; The i96 arg is promoted to i128 and should get the full stack space.
3ac0c7
+declare void @fn1(i96)
3ac0c7
+define i32 @fn2() {
3ac0c7
+; CHECK-LABEL: fn2:
3ac0c7
+; CHECK:       # %bb.0:
3ac0c7
+; CHECK-NEXT:    stmg %r14, %r15, 112(%r15)
3ac0c7
+; CHECK-NEXT:    .cfi_offset %r14, -48
3ac0c7
+; CHECK-NEXT:    .cfi_offset %r15, -40
3ac0c7
+; CHECK-NEXT:    aghi %r15, -184
3ac0c7
+; CHECK-NEXT:    .cfi_def_cfa_offset 344
3ac0c7
+; CHECK-NEXT:    mvhi 180(%r15), -1
3ac0c7
+; CHECK-NEXT:    mvghi 168(%r15), 0
3ac0c7
+; CHECK-NEXT:    la %r2, 160(%r15)
3ac0c7
+; CHECK-NEXT:    mvghi 160(%r15), 0
3ac0c7
+; CHECK-NEXT:    brasl %r14, fn1@PLT
3ac0c7
+; CHECK-NEXT:    l %r2, 180(%r15)
3ac0c7
+; CHECK-NEXT:    lmg %r14, %r15, 296(%r15)
3ac0c7
+; CHECK-NEXT:    br %r14
3ac0c7
+  %1 = alloca i32
3ac0c7
+  store i32 -1, i32* %1
3ac0c7
+  call void @fn1(i96 0)
3ac0c7
+  %2 = load i32, i32* %1
3ac0c7
+  ret i32 %2
3ac0c7
+}
3ac0c7
+
3ac0c7
+declare void @fn3(i136)
3ac0c7
+define i32 @fn4() {
3ac0c7
+; CHECK-LABEL: fn4:
3ac0c7
+; CHECK:       # %bb.0:
3ac0c7
+; CHECK-NEXT:    stmg %r14, %r15, 112(%r15)
3ac0c7
+; CHECK-NEXT:    .cfi_offset %r14, -48
3ac0c7
+; CHECK-NEXT:    .cfi_offset %r15, -40
3ac0c7
+; CHECK-NEXT:    aghi %r15, -192
3ac0c7
+; CHECK-NEXT:    .cfi_def_cfa_offset 352
3ac0c7
+; CHECK-NEXT:    mvhi 188(%r15), -1
3ac0c7
+; CHECK-NEXT:    mvghi 176(%r15), 0
3ac0c7
+; CHECK-NEXT:    mvghi 168(%r15), 0
3ac0c7
+; CHECK-NEXT:    la %r2, 160(%r15)
3ac0c7
+; CHECK-NEXT:    mvghi 160(%r15), 0
3ac0c7
+; CHECK-NEXT:    brasl %r14, fn3@PLT
3ac0c7
+; CHECK-NEXT:    l %r2, 188(%r15)
3ac0c7
+; CHECK-NEXT:    lmg %r14, %r15, 304(%r15)
3ac0c7
+; CHECK-NEXT:    br %r14
3ac0c7
+  %1 = alloca i32
3ac0c7
+  store i32 -1, i32* %1
3ac0c7
+  call void @fn3(i136 0)
3ac0c7
+  %2 = load i32, i32* %1
3ac0c7
+  ret i32 %2
3ac0c7
+}
3ac0c7
-- 
3ac0c7
2.30.2
3ac0c7