diff --git a/.gitignore b/.gitignore index 5105db5..ddbc9dd 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ -SOURCES/llvm-3.8.1.src.tar.xz +SOURCES/cmake-3.4.3.tar.gz +SOURCES/llvm-3.9.1.src.tar.xz diff --git a/.mesa-private-llvm.metadata b/.mesa-private-llvm.metadata index 049decf..d7a792d 100644 --- a/.mesa-private-llvm.metadata +++ b/.mesa-private-llvm.metadata @@ -1 +1,2 @@ -e0c48c4c182424b99999367d688cd8ce7876827b SOURCES/llvm-3.8.1.src.tar.xz +49e4f05d46d4752e514b19ba36bf97d20a7da66a SOURCES/cmake-3.4.3.tar.gz +ce801cf456b8dacd565ce8df8288b4d90e7317ff SOURCES/llvm-3.9.1.src.tar.xz diff --git a/SOURCES/0001-Revert-InstCombine-transform-bitcasted-bitwise-logic.patch b/SOURCES/0001-Revert-InstCombine-transform-bitcasted-bitwise-logic.patch new file mode 100644 index 0000000..dc3c292 --- /dev/null +++ b/SOURCES/0001-Revert-InstCombine-transform-bitcasted-bitwise-logic.patch @@ -0,0 +1,164 @@ +From 6674146ac94c8744c807ed06e1bdb99cef87b2fe Mon Sep 17 00:00:00 2001 +From: root +Date: Fri, 14 Apr 2017 16:06:58 -0400 +Subject: [PATCH] Revert "[InstCombine] transform bitcasted bitwise logic ops + with constants (PR26702)" + +This reverts commit 76b12c4bf0bbc5c70def7b5d083a8a70547ea4e3. + +Conflicts: + lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +--- + lib/Transforms/InstCombine/InstCombineAndOrXor.cpp | 34 +++++----------------- + test/Transforms/InstCombine/bitcast-bigendian.ll | 14 +++++---- + test/Transforms/InstCombine/bitcast.ll | 14 +++++---- + 3 files changed, 23 insertions(+), 39 deletions(-) + +diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +index 1a6459b..36c2136 100644 +--- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp ++++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +@@ -1201,41 +1201,21 @@ Instruction *InstCombiner::foldCastedBitwiseLogic(BinaryOperator &I) { + + Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); + CastInst *Cast0 = dyn_cast(Op0); +- if (!Cast0) +- return nullptr; +- +- // This must be a cast from an integer or integer vector source type to allow +- // transformation of the logic operation to the source type. +- Type *DestTy = I.getType(); +- Type *SrcTy = Cast0->getSrcTy(); +- if (!SrcTy->isIntOrIntVectorTy()) +- return nullptr; +- +- // If one operand is a bitcast and the other is a constant, move the logic +- // operation ahead of the bitcast. That is, do the logic operation in the +- // original type. This can eliminate useless bitcasts and allow normal +- // combines that would otherwise be impeded by the bitcast. Canonicalization +- // ensures that if there is a constant operand, it will be the second operand. +- Value *BC = nullptr; +- Constant *C = nullptr; +- if ((match(Op0, m_BitCast(m_Value(BC))) && match(Op1, m_Constant(C)))) { +- Value *NewConstant = ConstantExpr::getBitCast(C, SrcTy); +- Value *NewOp = Builder->CreateBinOp(LogicOpc, BC, NewConstant, I.getName()); +- return CastInst::CreateBitOrPointerCast(NewOp, DestTy); +- } +- + CastInst *Cast1 = dyn_cast(Op1); +- if (!Cast1) ++ if (!Cast0 || !Cast1) + return nullptr; + +- // Both operands of the logic operation are casts. The casts must be of the +- // same type for reduction. ++ // The casts must be of the same type, and this must be a cast from an integer ++ // or integer vector source type. + auto CastOpcode = Cast0->getOpcode(); +- if (CastOpcode != Cast1->getOpcode() || SrcTy != Cast1->getSrcTy()) ++ Type *SrcTy = Cast0->getSrcTy(); ++ if ((CastOpcode != Cast1->getOpcode()) || (SrcTy != Cast1->getSrcTy()) || ++ !SrcTy->isIntOrIntVectorTy()) + return nullptr; + + Value *Cast0Src = Cast0->getOperand(0); + Value *Cast1Src = Cast1->getOperand(0); ++ Type *DestTy = I.getType(); + + // fold (logic (cast A), (cast B)) -> (cast (logic A, B)) + +diff --git a/test/Transforms/InstCombine/bitcast-bigendian.ll b/test/Transforms/InstCombine/bitcast-bigendian.ll +index f558ecc..4042dad 100644 +--- a/test/Transforms/InstCombine/bitcast-bigendian.ll ++++ b/test/Transforms/InstCombine/bitcast-bigendian.ll +@@ -90,6 +90,8 @@ define <2 x float> @test6(float %A){ + ; CHECK: ret + } + ++; FIXME: Do the logic in the original type for the following 3 tests. ++ + ; Verify that 'xor' of vector and constant is done as a vector bitwise op before the bitcast. + + define <2 x i32> @xor_bitcast_vec_to_vec(<1 x i64> %a) { +@@ -98,8 +100,8 @@ define <2 x i32> @xor_bitcast_vec_to_vec(<1 x i64> %a) { + ret <2 x i32> %t2 + + ; CHECK-LABEL: @xor_bitcast_vec_to_vec( +-; CHECK-NEXT: %t21 = xor <1 x i64> %a, +-; CHECK-NEXT: %t2 = bitcast <1 x i64> %t21 to <2 x i32> ++; CHECK-NEXT: %t1 = bitcast <1 x i64> %a to <2 x i32> ++; CHECK-NEXT: %t2 = xor <2 x i32> %t1, + ; CHECK-NEXT: ret <2 x i32> %t2 + } + +@@ -111,8 +113,8 @@ define i64 @and_bitcast_vec_to_int(<2 x i32> %a) { + ret i64 %t2 + + ; CHECK-LABEL: @and_bitcast_vec_to_int( +-; CHECK-NEXT: %t21 = and <2 x i32> %a, +-; CHECK-NEXT: %t2 = bitcast <2 x i32> %t21 to i64 ++; CHECK-NEXT: %t1 = bitcast <2 x i32> %a to i64 ++; CHECK-NEXT: %t2 = and i64 %t1, 3 + ; CHECK-NEXT: ret i64 %t2 + } + +@@ -124,8 +126,8 @@ define <2 x i32> @or_bitcast_int_to_vec(i64 %a) { + ret <2 x i32> %t2 + + ; CHECK-LABEL: @or_bitcast_int_to_vec( +-; CHECK-NEXT: %t21 = or i64 %a, 4294967298 +-; CHECK-NEXT: %t2 = bitcast i64 %t21 to <2 x i32> ++; CHECK-NEXT: %t1 = bitcast i64 %a to <2 x i32> ++; CHECK-NEXT: %t2 = or <2 x i32> %t1, + ; CHECK-NEXT: ret <2 x i32> %t2 + } + +diff --git a/test/Transforms/InstCombine/bitcast.ll b/test/Transforms/InstCombine/bitcast.ll +index 7495859..34e9206 100644 +--- a/test/Transforms/InstCombine/bitcast.ll ++++ b/test/Transforms/InstCombine/bitcast.ll +@@ -30,6 +30,8 @@ define <2 x i32> @xor_two_vector_bitcasts(<1 x i64> %a, <1 x i64> %b) { + ; CHECK-NEXT: ret <2 x i32> %t3 + } + ++; FIXME: Do the logic in the original type for the following 3 tests. ++ + ; Verify that 'xor' of vector and constant is done as a vector bitwise op before the bitcast. + + define <2 x i32> @xor_bitcast_vec_to_vec(<1 x i64> %a) { +@@ -38,8 +40,8 @@ define <2 x i32> @xor_bitcast_vec_to_vec(<1 x i64> %a) { + ret <2 x i32> %t2 + + ; CHECK-LABEL: @xor_bitcast_vec_to_vec( +-; CHECK-NEXT: %t21 = xor <1 x i64> %a, +-; CHECK-NEXT: %t2 = bitcast <1 x i64> %t21 to <2 x i32> ++; CHECK-NEXT: %t1 = bitcast <1 x i64> %a to <2 x i32> ++; CHECK-NEXT: %t2 = xor <2 x i32> %t1, + ; CHECK-NEXT: ret <2 x i32> %t2 + } + +@@ -51,8 +53,8 @@ define i64 @and_bitcast_vec_to_int(<2 x i32> %a) { + ret i64 %t2 + + ; CHECK-LABEL: @and_bitcast_vec_to_int( +-; CHECK-NEXT: %t21 = and <2 x i32> %a, +-; CHECK-NEXT: %t2 = bitcast <2 x i32> %t21 to i64 ++; CHECK-NEXT: %t1 = bitcast <2 x i32> %a to i64 ++; CHECK-NEXT: %t2 = and i64 %t1, 3 + ; CHECK-NEXT: ret i64 %t2 + } + +@@ -64,8 +66,8 @@ define <2 x i32> @or_bitcast_int_to_vec(i64 %a) { + ret <2 x i32> %t2 + + ; CHECK-LABEL: @or_bitcast_int_to_vec( +-; CHECK-NEXT: %t21 = or i64 %a, 8589934593 +-; CHECK-NEXT: %t2 = bitcast i64 %t21 to <2 x i32> ++; CHECK-NEXT: %t1 = bitcast i64 %a to <2 x i32> ++; CHECK-NEXT: %t2 = or <2 x i32> %t1, + ; CHECK-NEXT: ret <2 x i32> %t2 + } + +-- +1.8.3.1 + diff --git a/SOURCES/0001-Revert-Merging-r280589.patch b/SOURCES/0001-Revert-Merging-r280589.patch new file mode 100644 index 0000000..ef950f4 --- /dev/null +++ b/SOURCES/0001-Revert-Merging-r280589.patch @@ -0,0 +1,119 @@ +From 95b15b3d2f180b15267032e16c947c0f9b8a112d Mon Sep 17 00:00:00 2001 +From: Dave Airlie +Date: Wed, 1 Mar 2017 13:02:38 +1000 +Subject: [PATCH] Revert "Merging r280589:" + +This reverts commit 25e2616626caafb896517e18cd8aa724fba2b200. +--- + lib/Target/AMDGPU/SIInstructions.td | 1 - + lib/Target/AMDGPU/SIWholeQuadMode.cpp | 7 +++++ + test/CodeGen/AMDGPU/wqm.ll | 49 +++-------------------------------- + 3 files changed, 11 insertions(+), 46 deletions(-) + +diff --git a/lib/Target/AMDGPU/SIInstructions.td b/lib/Target/AMDGPU/SIInstructions.td +index dde5f2f..18b7d5d 100644 +--- a/lib/Target/AMDGPU/SIInstructions.td ++++ b/lib/Target/AMDGPU/SIInstructions.td +@@ -2029,7 +2029,6 @@ def SI_RETURN : PseudoInstSI < + let hasSideEffects = 1; + let SALU = 1; + let hasNoSchedulingInfo = 1; +- let DisableWQM = 1; + } + + let Uses = [EXEC], Defs = [EXEC, VCC, M0], +diff --git a/lib/Target/AMDGPU/SIWholeQuadMode.cpp b/lib/Target/AMDGPU/SIWholeQuadMode.cpp +index 1534d58..b200c15 100644 +--- a/lib/Target/AMDGPU/SIWholeQuadMode.cpp ++++ b/lib/Target/AMDGPU/SIWholeQuadMode.cpp +@@ -219,6 +219,13 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF, + markInstruction(MI, Flags, Worklist); + GlobalFlags |= Flags; + } ++ ++ if (WQMOutputs && MBB.succ_empty()) { ++ // This is a prolog shader. Make sure we go back to exact mode at the end. ++ Blocks[&MBB].OutNeeds = StateExact; ++ Worklist.push_back(&MBB); ++ GlobalFlags |= StateExact; ++ } + } + + return GlobalFlags; +diff --git a/test/CodeGen/AMDGPU/wqm.ll b/test/CodeGen/AMDGPU/wqm.ll +index 41e4264..809a7ba 100644 +--- a/test/CodeGen/AMDGPU/wqm.ll ++++ b/test/CodeGen/AMDGPU/wqm.ll +@@ -17,18 +17,17 @@ main_body: + ;CHECK-LABEL: {{^}}test2: + ;CHECK-NEXT: ; %main_body + ;CHECK-NEXT: s_wqm_b64 exec, exec ++;CHECK: image_sample + ;CHECK-NOT: exec +-define amdgpu_ps void @test2(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, float addrspace(1)* inreg %ptr, <4 x i32> %c) { ++;CHECK: _load_dword v0, ++define amdgpu_ps float @test2(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, float addrspace(1)* inreg %ptr, <4 x i32> %c) { + main_body: + %c.1 = call <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32> %c, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) + %c.2 = bitcast <4 x float> %c.1 to <4 x i32> + %c.3 = extractelement <4 x i32> %c.2, i32 0 + %gep = getelementptr float, float addrspace(1)* %ptr, i32 %c.3 + %data = load float, float addrspace(1)* %gep +- +- call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %data, float undef, float undef, float undef) +- +- ret void ++ ret float %data + } + + ; ... but disabled for stores (and, in this simple case, not re-enabled). +@@ -415,46 +414,6 @@ entry: + ret void + } + +-; Must return to exact at the end of a non-void returning shader, +-; otherwise the EXEC mask exported by the epilog will be wrong. This is true +-; even if the shader has no kills, because a kill could have happened in a +-; previous shader fragment. +-; +-; CHECK-LABEL: {{^}}test_nonvoid_return: +-; CHECK: s_mov_b64 [[LIVE:s\[[0-9]+:[0-9]+\]]], exec +-; CHECK: s_wqm_b64 exec, exec +-; +-; CHECK: s_and_b64 exec, exec, [[LIVE]] +-; CHECK-NOT: exec +-define amdgpu_ps <4 x float> @test_nonvoid_return() nounwind { +- %tex = call <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) +- %tex.i = bitcast <4 x float> %tex to <4 x i32> +- %dtex = call <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32> %tex.i, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) +- ret <4 x float> %dtex +-} +- +-; CHECK-LABEL: {{^}}test_nonvoid_return_unreachable: +-; CHECK: s_mov_b64 [[LIVE:s\[[0-9]+:[0-9]+\]]], exec +-; CHECK: s_wqm_b64 exec, exec +-; +-; CHECK: s_and_b64 exec, exec, [[LIVE]] +-; CHECK-NOT: exec +-define amdgpu_ps <4 x float> @test_nonvoid_return_unreachable(i32 inreg %c) nounwind { +-entry: +- %tex = call <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) +- %tex.i = bitcast <4 x float> %tex to <4 x i32> +- %dtex = call <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32> %tex.i, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0) +- +- %cc = icmp sgt i32 %c, 0 +- br i1 %cc, label %if, label %else +- +-if: +- store volatile <4 x float> %dtex, <4 x float>* undef +- unreachable +- +-else: +- ret <4 x float> %dtex +-} + + declare void @llvm.amdgcn.image.store.v4i32(<4 x float>, <4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1 + declare void @llvm.amdgcn.buffer.store.f32(float, <4 x i32>, i32, i32, i1, i1) #1 +-- +2.9.3 + diff --git a/SOURCES/llvm-3.8.1-rhel-7.3.patch b/SOURCES/llvm-3.8.1-rhel-7.3.patch deleted file mode 100644 index 4caea83..0000000 --- a/SOURCES/llvm-3.8.1-rhel-7.3.patch +++ /dev/null @@ -1,344 +0,0 @@ -diff --git a/lib/Support/Host.cpp b/lib/Support/Host.cpp -index c0f9e07..94bf580 100644 ---- a/lib/Support/Host.cpp -+++ b/lib/Support/Host.cpp -@@ -290,107 +290,112 @@ StringRef sys::getHostCPUName() { - } - case 6: - switch (Model) { -- case 1: // Pentium Pro processor -+ case 0x01: // Pentium Pro processor - return "pentiumpro"; - -- case 3: // Intel Pentium II OverDrive processor, Pentium II processor, -- // model 03 -- case 5: // Pentium II processor, model 05, Pentium II Xeon processor, -- // model 05, and Intel Celeron processor, model 05 -- case 6: // Celeron processor, model 06 -+ case 0x03: // Intel Pentium II OverDrive processor, Pentium II processor, -+ // model 03 -+ case 0x05: // Pentium II processor, model 05, Pentium II Xeon processor, -+ // model 05, and Intel Celeron processor, model 05 -+ case 0x06: // Celeron processor, model 06 - return "pentium2"; - -- case 7: // Pentium III processor, model 07, and Pentium III Xeon -- // processor, model 07 -- case 8: // Pentium III processor, model 08, Pentium III Xeon processor, -- // model 08, and Celeron processor, model 08 -- case 10: // Pentium III Xeon processor, model 0Ah -- case 11: // Pentium III processor, model 0Bh -+ case 0x07: // Pentium III processor, model 07, and Pentium III Xeon -+ // processor, model 07 -+ case 0x08: // Pentium III processor, model 08, Pentium III Xeon processor, -+ // model 08, and Celeron processor, model 08 -+ case 0x0a: // Pentium III Xeon processor, model 0Ah -+ case 0x0b: // Pentium III processor, model 0Bh - return "pentium3"; - -- case 9: // Intel Pentium M processor, Intel Celeron M processor model 09. -- case 13: // Intel Pentium M processor, Intel Celeron M processor, model -- // 0Dh. All processors are manufactured using the 90 nm process. -- case 21: // Intel EP80579 Integrated Processor and Intel EP80579 -- // Integrated Processor with Intel QuickAssist Technology -+ case 0x09: // Intel Pentium M processor, Intel Celeron M processor model 09. -+ case 0x0d: // Intel Pentium M processor, Intel Celeron M processor, model -+ // 0Dh. All processors are manufactured using the 90 nm process. -+ case 0x15: // Intel EP80579 Integrated Processor and Intel EP80579 -+ // Integrated Processor with Intel QuickAssist Technology - return "pentium-m"; - -- case 14: // Intel Core Duo processor, Intel Core Solo processor, model -- // 0Eh. All processors are manufactured using the 65 nm process. -+ case 0x0e: // Intel Core Duo processor, Intel Core Solo processor, model -+ // 0Eh. All processors are manufactured using the 65 nm process. - return "yonah"; - -- case 15: // Intel Core 2 Duo processor, Intel Core 2 Duo mobile -- // processor, Intel Core 2 Quad processor, Intel Core 2 Quad -- // mobile processor, Intel Core 2 Extreme processor, Intel -- // Pentium Dual-Core processor, Intel Xeon processor, model -- // 0Fh. All processors are manufactured using the 65 nm process. -- case 22: // Intel Celeron processor model 16h. All processors are -- // manufactured using the 65 nm process -+ case 0x0f: // Intel Core 2 Duo processor, Intel Core 2 Duo mobile -+ // processor, Intel Core 2 Quad processor, Intel Core 2 Quad -+ // mobile processor, Intel Core 2 Extreme processor, Intel -+ // Pentium Dual-Core processor, Intel Xeon processor, model -+ // 0Fh. All processors are manufactured using the 65 nm process. -+ case 0x16: // Intel Celeron processor model 16h. All processors are -+ // manufactured using the 65 nm process - return "core2"; - -- case 23: // Intel Core 2 Extreme processor, Intel Xeon processor, model -- // 17h. All processors are manufactured using the 45 nm process. -- // -- // 45nm: Penryn , Wolfdale, Yorkfield (XE) -- case 29: // Intel Xeon processor MP. All processors are manufactured using -- // the 45 nm process. -+ case 0x17: // Intel Core 2 Extreme processor, Intel Xeon processor, model -+ // 17h. All processors are manufactured using the 45 nm process. -+ // -+ // 45nm: Penryn , Wolfdale, Yorkfield (XE) -+ case 0x1d: // Intel Xeon processor MP. All processors are manufactured using -+ // the 45 nm process. - return "penryn"; - -- case 26: // Intel Core i7 processor and Intel Xeon processor. All -- // processors are manufactured using the 45 nm process. -- case 30: // Intel(R) Core(TM) i7 CPU 870 @ 2.93GHz. -- // As found in a Summer 2010 model iMac. -- case 46: // Nehalem EX -+ case 0x1a: // Intel Core i7 processor and Intel Xeon processor. All -+ // processors are manufactured using the 45 nm process. -+ case 0x1e: // Intel(R) Core(TM) i7 CPU 870 @ 2.93GHz. -+ // As found in a Summer 2010 model iMac. -+ case 0x2e: // Nehalem EX - return "nehalem"; -- case 37: // Intel Core i7, laptop version. -- case 44: // Intel Core i7 processor and Intel Xeon processor. All -- // processors are manufactured using the 32 nm process. -- case 47: // Westmere EX -+ case 0x25: // Intel Core i7, laptop version. -+ case 0x2c: // Intel Core i7 processor and Intel Xeon processor. All -+ // processors are manufactured using the 32 nm process. -+ case 0x2f: // Westmere EX - return "westmere"; - -- // SandyBridge: -- case 42: // Intel Core i7 processor. All processors are manufactured -- // using the 32 nm process. -- case 45: -+ case 0x2a: // Intel Core i7 processor. All processors are manufactured -+ // using the 32 nm process. -+ case 0x2d: - return "sandybridge"; - -- // Ivy Bridge: -- case 58: -- case 62: // Ivy Bridge EP -+ case 0x3a: -+ case 0x3e: // Ivy Bridge EP - return "ivybridge"; - - // Haswell: -- case 60: -- case 63: -- case 69: -- case 70: -+ case 0x3c: -+ case 0x3f: -+ case 0x45: -+ case 0x46: - return "haswell"; - - // Broadwell: -- case 61: -- case 71: -+ case 0x3d: -+ case 0x47: -+ case 0x4f: -+ case 0x56: - return "broadwell"; - - // Skylake: -- case 78: -- case 94: -+ case 0x4e: -+ // return "skylake-avx512"; -+ case 0x5e: - return "skylake"; - -- case 28: // Most 45 nm Intel Atom processors -- case 38: // 45 nm Atom Lincroft -- case 39: // 32 nm Atom Medfield -- case 53: // 32 nm Atom Midview -- case 54: // 32 nm Atom Midview -+ case 0x1c: // Most 45 nm Intel Atom processors -+ case 0x26: // 45 nm Atom Lincroft -+ case 0x27: // 32 nm Atom Medfield -+ case 0x35: // 32 nm Atom Midview -+ case 0x36: // 32 nm Atom Midview - return "bonnell"; - - // Atom Silvermont codes from the Intel software optimization guide. -- case 55: -- case 74: -- case 77: -- case 90: -- case 93: -+ case 0x37: -+ case 0x4a: -+ case 0x4d: -+ case 0x5a: -+ case 0x5d: -+ case 0x4c: // really airmont - return "silvermont"; - -+ case 0x57: -+ return "knl"; -+ - default: // Unknown family 6 CPU, try to guess. - if (HasAVX512) - return "knl"; -@@ -823,6 +828,7 @@ bool sys::getHostCPUFeatures(StringMap &Features) { - Features["avx512cd"] = HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save; - Features["avx512bw"] = HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save; - Features["avx512vl"] = HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save; -+ Features["avx512vbmi"] = HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save; - - bool HasLeafD = MaxLevel >= 0xd && - !GetX86CpuIDAndInfoEx(0xd, 0x1, &EAX, &EBX, &ECX, &EDX); -diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td -index 8902a85..9b1bf43 100644 ---- a/lib/Target/X86/X86.td -+++ b/lib/Target/X86/X86.td -@@ -134,6 +134,9 @@ def FeatureBWI : SubtargetFeature<"avx512bw", "HasBWI", "true", - def FeatureVLX : SubtargetFeature<"avx512vl", "HasVLX", "true", - "Enable AVX-512 Vector Length eXtensions", - [FeatureAVX512]>; -+def FeatureVBMI : SubtargetFeature<"avx512vbmi", "HasVBMI", "true", -+ "Enable AVX-512 Vector Bit Manipulation Instructions", -+ [FeatureAVX512]>; - def FeaturePKU : SubtargetFeature<"pku", "HasPKU", "true", - "Enable protection keys">; - def FeaturePCLMUL : SubtargetFeature<"pclmul", "HasPCLMUL", "true", -@@ -454,6 +457,9 @@ class BroadwellProc : ProcessorModel; - def : BroadwellProc<"broadwell">; - -+def : HaswellProc<"skylake">; // RHEL mustard -+def : HaswellProc<"skx">; // RHEL mustard -+ - // FIXME: define KNL model - class KnightsLandingProc : ProcessorModel : ProcessorModel; - def : KnightsLandingProc<"knl">; - --// FIXME: define SKX model --class SkylakeProc : ProcessorModel; --def : SkylakeProc<"skylake">; --def : SkylakeProc<"skx">; // Legacy alias. -- -- - // AMD CPUs. - - def : Proc<"k6", [FeatureSlowUAMem16, FeatureMMX]>; -diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td -index 9c8339a..eed4319 100644 ---- a/lib/Target/X86/X86InstrInfo.td -+++ b/lib/Target/X86/X86InstrInfo.td -@@ -773,7 +773,7 @@ def HasVLX : Predicate<"Subtarget->hasVLX()">, - def NoVLX : Predicate<"!Subtarget->hasVLX()">; - def NoVLX_Or_NoBWI : Predicate<"!Subtarget->hasVLX() || !Subtarget->hasBWI()">; - def NoVLX_Or_NoDQI : Predicate<"!Subtarget->hasVLX() || !Subtarget->hasDQI()">; --def PKU : Predicate<"!Subtarget->hasPKU()">; -+def PKU : Predicate<"Subtarget->hasPKU()">; - - def HasPOPCNT : Predicate<"Subtarget->hasPOPCNT()">; - def HasAES : Predicate<"Subtarget->hasAES()">; -@@ -795,6 +795,7 @@ def HasFSGSBase : Predicate<"Subtarget->hasFSGSBase()">; - def HasLZCNT : Predicate<"Subtarget->hasLZCNT()">; - def HasBMI : Predicate<"Subtarget->hasBMI()">; - def HasBMI2 : Predicate<"Subtarget->hasBMI2()">; -+def HasVBMI : Predicate<"Subtarget->hasVBMI()">; - def HasRTM : Predicate<"Subtarget->hasRTM()">; - def HasHLE : Predicate<"Subtarget->hasHLE()">; - def HasTSX : Predicate<"Subtarget->hasRTM() || Subtarget->hasHLE()">; -diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp -index 8ef08c9..739de59 100644 ---- a/lib/Target/X86/X86Subtarget.cpp -+++ b/lib/Target/X86/X86Subtarget.cpp -@@ -261,6 +261,7 @@ void X86Subtarget::initializeEnvironment() { - HasLZCNT = false; - HasBMI = false; - HasBMI2 = false; -+ HasVBMI = false; - HasRTM = false; - HasHLE = false; - HasERI = false; -diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h -index 13d1026..c1adb44 100644 ---- a/lib/Target/X86/X86Subtarget.h -+++ b/lib/Target/X86/X86Subtarget.h -@@ -134,6 +134,9 @@ protected: - /// Processor has BMI2 instructions. - bool HasBMI2; - -+ /// Processor has VBMI instructions. -+ bool HasVBMI; -+ - /// Processor has RTM instructions. - bool HasRTM; - -@@ -374,6 +377,7 @@ public: - bool hasLZCNT() const { return HasLZCNT; } - bool hasBMI() const { return HasBMI; } - bool hasBMI2() const { return HasBMI2; } -+ bool hasVBMI() const { return HasVBMI; } - bool hasRTM() const { return HasRTM; } - bool hasHLE() const { return HasHLE; } - bool hasADX() const { return HasADX; } -diff --git a/test/CodeGen/X86/slow-unaligned-mem.ll b/test/CodeGen/X86/slow-unaligned-mem.ll -index 27cbef6..c25435b 100644 ---- a/test/CodeGen/X86/slow-unaligned-mem.ll -+++ b/test/CodeGen/X86/slow-unaligned-mem.ll -@@ -14,15 +14,14 @@ - - ; Intel chips with fast unaligned memory accesses - --; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=silvermont 2>&1 | FileCheck %s --check-prefix=FAST --; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=nehalem 2>&1 | FileCheck %s --check-prefix=FAST --; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=westmere 2>&1 | FileCheck %s --check-prefix=FAST --; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=sandybridge 2>&1 | FileCheck %s --check-prefix=FAST --; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=ivybridge 2>&1 | FileCheck %s --check-prefix=FAST --; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=haswell 2>&1 | FileCheck %s --check-prefix=FAST --; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=broadwell 2>&1 | FileCheck %s --check-prefix=FAST --; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=knl 2>&1 | FileCheck %s --check-prefix=FAST --; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=skylake 2>&1 | FileCheck %s --check-prefix=FAST -+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=silvermont 2>&1 | FileCheck %s --check-prefix=FAST -+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=nehalem 2>&1 | FileCheck %s --check-prefix=FAST -+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=westmere 2>&1 | FileCheck %s --check-prefix=FAST -+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=sandybridge 2>&1 | FileCheck %s --check-prefix=FAST -+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=ivybridge 2>&1 | FileCheck %s --check-prefix=FAST -+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=haswell 2>&1 | FileCheck %s --check-prefix=FAST -+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=broadwell 2>&1 | FileCheck %s --check-prefix=FAST -+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=knl 2>&1 | FileCheck %s --check-prefix=FAST - - ; AMD chips with slow unaligned memory accesses - diff --git a/SPECS/llvm.spec b/SPECS/llvm.spec index 6952d0f..2ed9ded 100644 --- a/SPECS/llvm.spec +++ b/SPECS/llvm.spec @@ -34,21 +34,20 @@ ExcludeArch: ppc s390 %{?rhel6:s390x} %endif Name: mesa-private-llvm -Version: 3.8.1 -Release: 1%{?dist} +Version: 3.9.1 +Release: 3%{?dist} Summary: llvm engine for Mesa Group: System Environment/Libraries License: NCSA URL: http://llvm.org Source0: http://llvm.org/releases/%{version}/llvm-%{version}.src.tar.xz +Source1: cmake-3.4.3.tar.gz Source100: llvm-config.h -# recognize s390 as SystemZ when configuring build -#Patch0: llvm-3.7.1-cmake-s390.patch - Patch1: fix-cmake-include.patch -Patch2: llvm-3.8.1-rhel-7.3.patch +Patch2: 0001-Revert-Merging-r280589.patch +Patch3: 0001-Revert-InstCombine-transform-bitcasted-bitwise-logic.patch BuildRequires: cmake BuildRequires: zlib-devel @@ -73,17 +72,30 @@ support in Mesa. %prep %setup -q -n llvm-%{version}.src -#patch0 -p1 -b .s390 + +tar xf %{SOURCE1} + %patch1 -p1 -b .fixinc -%patch2 -p1 +%patch2 -p1 -b .radeonsi-fix +%patch3 -p1 -b .bigendian-fix %build +BUILD_DIR=`pwd`/cmake_build +cd cmake-3.4.3 +cmake . -DCMAKE_INSTALL_PREFIX=$BUILD_DIR +make +make install +cd - + + sed -i 's|ActiveIncludeDir = ActivePrefix + "/include|&/mesa-private|g' tools/llvm-config/llvm-config.cpp mkdir -p _build cd _build +export PATH=$BUILD_DIR/bin:$PATH +%global __cmake $BUILD_DIR/bin/cmake # force off shared libs as cmake macros turns it on. %cmake .. \ -DINCLUDE_INSTALL_DIR=%{_includedir}/mesa-private \ @@ -163,6 +175,7 @@ rm -rf %{buildroot}%{_includedir}/llvm-c/lto.h # RHEL: Strip out cmake build foo rm -rf %{buildroot}%{_datadir}/llvm/cmake +rm -rf %{buildroot}%{_libdir}/cmake/llvm %check cd _build @@ -176,7 +189,7 @@ make check-all || : %files %doc LICENSE.TXT -%{_libdir}/libLLVM-3.8*-mesa.so +%{_libdir}/libLLVM-3.9*-mesa.so %files devel %{_bindir}/%{name}-config-%{__isa_bits} @@ -184,6 +197,15 @@ make check-all || : %{_includedir}/mesa-private/llvm-c %changelog +* Wed May 03 2017 Lyude Paul - 3.9.1-3 +- Add temporary revert for #1445423 + +* Fri Mar 24 2017 Tom Stellard - 3.9.1-2 +- Add fix for radeonsi regression + +* Tue Jan 10 2017 Jeff Law - 3.9.1-1 +- Update to 3.9.1 + * Wed Jul 13 2016 Adam Jackson - 3.8.1-1 - Update to 3.8.1 - Sync some x86 getHostCPUName updates from trunk