Blame SOURCES/0001-InstCombine-Fix-big-endian-miscompile-of-bitcast-zex.patch

377986
From f8e146f3430de3a6cd904f3f3f7aa1bfaefee14c Mon Sep 17 00:00:00 2001
377986
From: Bjorn Pettersson <bjorn.a.pettersson@ericsson.com>
377986
Date: Thu, 28 Nov 2019 23:18:28 +0100
377986
Subject: [PATCH] [InstCombine] Fix big-endian miscompile of (bitcast
377986
 (zext/trunc (bitcast)))
377986
377986
Summary:
377986
optimizeVectorResize is rewriting patterns like:
377986
  %1 = bitcast vector %src to integer
377986
  %2 = trunc/zext %1
377986
  %dst = bitcast %2 to vector
377986
377986
Since bitcasting between integer an vector types gives
377986
different integer values depending on endianness, we need
377986
to take endianness into account. As it happens the old
377986
implementation only produced the correct result for little
377986
endian targets.
377986
377986
Fixes: https://bugs.llvm.org/show_bug.cgi?id=44178
377986
377986
Reviewers: spatel, lattner, lebedev.ri
377986
377986
Reviewed By: spatel, lebedev.ri
377986
377986
Subscribers: lebedev.ri, hiraditya, uabelho, llvm-commits
377986
377986
Tags: #llvm
377986
377986
Differential Revision: https://reviews.llvm.org/D70844
377986
377986
(cherry picked from commit a9d6b0e5444741d08ff1df7cf71d1559e7fefc1f)
377986
---
377986
 .../InstCombine/InstCombineCasts.cpp          | 79 +++++++++++++------
377986
 llvm/test/Transforms/InstCombine/cast.ll      |  6 +-
377986
 2 files changed, 60 insertions(+), 25 deletions(-)
377986
377986
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
377986
index 2c9ba203fbf3..0af3de300e77 100644
377986
--- a/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
377986
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCasts.cpp
377986
@@ -18,6 +18,7 @@
377986
 #include "llvm/IR/DIBuilder.h"
377986
 #include "llvm/IR/PatternMatch.h"
377986
 #include "llvm/Support/KnownBits.h"
377986
+#include <numeric>
377986
 using namespace llvm;
377986
 using namespace PatternMatch;
377986
 
377986
@@ -1820,12 +1821,24 @@ Instruction *InstCombiner::visitPtrToInt(PtrToIntInst &CI) {
377986
 }
377986
 
377986
 /// This input value (which is known to have vector type) is being zero extended
377986
-/// or truncated to the specified vector type.
377986
+/// or truncated to the specified vector type. Since the zext/trunc is done
377986
+/// using an integer type, we have a (bitcast(cast(bitcast))) pattern,
377986
+/// endianness will impact which end of the vector that is extended or
377986
+/// truncated.
377986
+///
377986
+/// A vector is always stored with index 0 at the lowest address, which
377986
+/// corresponds to the most significant bits for a big endian stored integer and
377986
+/// the least significant bits for little endian. A trunc/zext of an integer
377986
+/// impacts the big end of the integer. Thus, we need to add/remove elements at
377986
+/// the front of the vector for big endian targets, and the back of the vector
377986
+/// for little endian targets.
377986
+///
377986
 /// Try to replace it with a shuffle (and vector/vector bitcast) if possible.
377986
 ///
377986
 /// The source and destination vector types may have different element types.
377986
-static Instruction *optimizeVectorResize(Value *InVal, VectorType *DestTy,
377986
-                                         InstCombiner &IC) {
377986
+static Instruction *optimizeVectorResizeWithIntegerBitCasts(Value *InVal,
377986
+                                                            VectorType *DestTy,
377986
+                                                            InstCombiner &IC) {
377986
   // We can only do this optimization if the output is a multiple of the input
377986
   // element size, or the input is a multiple of the output element size.
377986
   // Convert the input type to have the same element type as the output.
377986
@@ -1844,31 +1857,53 @@ static Instruction *optimizeVectorResize(Value *InVal, VectorType *DestTy,
377986
     InVal = IC.Builder.CreateBitCast(InVal, SrcTy);
377986
   }
377986
 
377986
+  bool IsBigEndian = IC.getDataLayout().isBigEndian();
377986
+  unsigned SrcElts = SrcTy->getNumElements();
377986
+  unsigned DestElts = DestTy->getNumElements();
377986
+
377986
+  assert(SrcElts != DestElts && "Element counts should be different.");
377986
+
377986
   // Now that the element types match, get the shuffle mask and RHS of the
377986
   // shuffle to use, which depends on whether we're increasing or decreasing the
377986
   // size of the input.
377986
-  SmallVector<uint32_t, 16> ShuffleMask;
377986
+  SmallVector<uint32_t, 16> ShuffleMaskStorage;
377986
+  ArrayRef<uint32_t> ShuffleMask;
377986
   Value *V2;
377986
 
377986
-  if (SrcTy->getNumElements() > DestTy->getNumElements()) {
377986
-    // If we're shrinking the number of elements, just shuffle in the low
377986
-    // elements from the input and use undef as the second shuffle input.
377986
-    V2 = UndefValue::get(SrcTy);
377986
-    for (unsigned i = 0, e = DestTy->getNumElements(); i != e; ++i)
377986
-      ShuffleMask.push_back(i);
377986
+  // Produce an identify shuffle mask for the src vector.
377986
+  ShuffleMaskStorage.resize(SrcElts);
377986
+  std::iota(ShuffleMaskStorage.begin(), ShuffleMaskStorage.end(), 0);
377986
 
377986
+  if (SrcElts > DestElts) {
377986
+    // If we're shrinking the number of elements (rewriting an integer
377986
+    // truncate), just shuffle in the elements corresponding to the least
377986
+    // significant bits from the input and use undef as the second shuffle
377986
+    // input.
377986
+    V2 = UndefValue::get(SrcTy);
377986
+    // Make sure the shuffle mask selects the "least significant bits" by
377986
+    // keeping elements from back of the src vector for big endian, and from the
377986
+    // front for little endian.
377986
+    ShuffleMask = ShuffleMaskStorage;
377986
+    if (IsBigEndian)
377986
+      ShuffleMask = ShuffleMask.take_back(DestElts);
377986
+    else
377986
+      ShuffleMask = ShuffleMask.take_front(DestElts);
377986
   } else {
377986
-    // If we're increasing the number of elements, shuffle in all of the
377986
-    // elements from InVal and fill the rest of the result elements with zeros
377986
-    // from a constant zero.
377986
+    // If we're increasing the number of elements (rewriting an integer zext),
377986
+    // shuffle in all of the elements from InVal. Fill the rest of the result
377986
+    // elements with zeros from a constant zero.
377986
     V2 = Constant::getNullValue(SrcTy);
377986
-    unsigned SrcElts = SrcTy->getNumElements();
377986
-    for (unsigned i = 0, e = SrcElts; i != e; ++i)
377986
-      ShuffleMask.push_back(i);
377986
-
377986
-    // The excess elements reference the first element of the zero input.
377986
-    for (unsigned i = 0, e = DestTy->getNumElements()-SrcElts; i != e; ++i)
377986
-      ShuffleMask.push_back(SrcElts);
377986
+    // Use first elt from V2 when indicating zero in the shuffle mask.
377986
+    uint32_t NullElt = SrcElts;
377986
+    // Extend with null values in the "most significant bits" by adding elements
377986
+    // in front of the src vector for big endian, and at the back for little
377986
+    // endian.
377986
+    unsigned DeltaElts = DestElts - SrcElts;
377986
+    if (IsBigEndian)
377986
+      ShuffleMaskStorage.insert(ShuffleMaskStorage.begin(), DeltaElts, NullElt);
377986
+    else
377986
+      ShuffleMaskStorage.append(DeltaElts, NullElt);
377986
+    ShuffleMask = ShuffleMaskStorage;
377986
   }
377986
 
377986
   return new ShuffleVectorInst(InVal, V2,
377986
@@ -2359,8 +2394,8 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
377986
         CastInst *SrcCast = cast<CastInst>(Src);
377986
         if (BitCastInst *BCIn = dyn_cast<BitCastInst>(SrcCast->getOperand(0)))
377986
           if (isa<VectorType>(BCIn->getOperand(0)->getType()))
377986
-            if (Instruction *I = optimizeVectorResize(BCIn->getOperand(0),
377986
-                                               cast<VectorType>(DestTy), *this))
377986
+            if (Instruction *I = optimizeVectorResizeWithIntegerBitCasts(
377986
+                    BCIn->getOperand(0), cast<VectorType>(DestTy), *this))
377986
               return I;
377986
       }
377986
 
377986
diff --git a/llvm/test/Transforms/InstCombine/cast.ll b/llvm/test/Transforms/InstCombine/cast.ll
377986
index b6d1eda0601d..3ce8de033422 100644
377986
--- a/llvm/test/Transforms/InstCombine/cast.ll
377986
+++ b/llvm/test/Transforms/InstCombine/cast.ll
377986
@@ -824,7 +824,7 @@ define i64 @test59(i8 %A, i8 %B) {
377986
 
377986
 define <3 x i32> @test60(<4 x i32> %call4) {
377986
 ; CHECK-LABEL: @test60(
377986
-; CHECK-NEXT:    [[P10:%.*]] = shufflevector <4 x i32> [[CALL4:%.*]], <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
377986
+; CHECK-NEXT:    [[P10:%.*]] = shufflevector <4 x i32> [[CALL4:%.*]], <4 x i32> undef, <3 x i32> <i32 1, i32 2, i32 3>
377986
 ; CHECK-NEXT:    ret <3 x i32> [[P10]]
377986
 ;
377986
   %p11 = bitcast <4 x i32> %call4 to i128
377986
@@ -836,7 +836,7 @@ define <3 x i32> @test60(<4 x i32> %call4) {
377986
 
377986
 define <4 x i32> @test61(<3 x i32> %call4) {
377986
 ; CHECK-LABEL: @test61(
377986
-; CHECK-NEXT:    [[P10:%.*]] = shufflevector <3 x i32> [[CALL4:%.*]], <3 x i32> <i32 0, i32 undef, i32 undef>, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
377986
+; CHECK-NEXT:    [[P10:%.*]] = shufflevector <3 x i32> [[CALL4:%.*]], <3 x i32> <i32 0, i32 undef, i32 undef>, <4 x i32> <i32 3, i32 0, i32 1, i32 2>
377986
 ; CHECK-NEXT:    ret <4 x i32> [[P10]]
377986
 ;
377986
   %p11 = bitcast <3 x i32> %call4 to i96
377986
@@ -848,7 +848,7 @@ define <4 x i32> @test61(<3 x i32> %call4) {
377986
 define <4 x i32> @test62(<3 x float> %call4) {
377986
 ; CHECK-LABEL: @test62(
377986
 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <3 x float> [[CALL4:%.*]] to <3 x i32>
377986
-; CHECK-NEXT:    [[P10:%.*]] = shufflevector <3 x i32> [[TMP1]], <3 x i32> <i32 0, i32 undef, i32 undef>, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
377986
+; CHECK-NEXT:    [[P10:%.*]] = shufflevector <3 x i32> [[TMP1]], <3 x i32> <i32 0, i32 undef, i32 undef>, <4 x i32> <i32 3, i32 0, i32 1, i32 2>
377986
 ; CHECK-NEXT:    ret <4 x i32> [[P10]]
377986
 ;
377986
   %p11 = bitcast <3 x float> %call4 to i96
377986
-- 
377986
2.26.2
377986