diff --git a/.gitignore b/.gitignore
index 5105db5..ddbc9dd 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,2 @@
-SOURCES/llvm-3.8.1.src.tar.xz
+SOURCES/cmake-3.4.3.tar.gz
+SOURCES/llvm-3.9.1.src.tar.xz
diff --git a/.mesa-private-llvm.metadata b/.mesa-private-llvm.metadata
index 049decf..d7a792d 100644
--- a/.mesa-private-llvm.metadata
+++ b/.mesa-private-llvm.metadata
@@ -1 +1,2 @@
-e0c48c4c182424b99999367d688cd8ce7876827b SOURCES/llvm-3.8.1.src.tar.xz
+49e4f05d46d4752e514b19ba36bf97d20a7da66a SOURCES/cmake-3.4.3.tar.gz
+ce801cf456b8dacd565ce8df8288b4d90e7317ff SOURCES/llvm-3.9.1.src.tar.xz
diff --git a/SOURCES/0001-Revert-InstCombine-transform-bitcasted-bitwise-logic.patch b/SOURCES/0001-Revert-InstCombine-transform-bitcasted-bitwise-logic.patch
new file mode 100644
index 0000000..dc3c292
--- /dev/null
+++ b/SOURCES/0001-Revert-InstCombine-transform-bitcasted-bitwise-logic.patch
@@ -0,0 +1,164 @@
+From 6674146ac94c8744c807ed06e1bdb99cef87b2fe Mon Sep 17 00:00:00 2001
+From: root <root@ibm-z10-50.rhts.eng.bos.redhat.com>
+Date: Fri, 14 Apr 2017 16:06:58 -0400
+Subject: [PATCH] Revert "[InstCombine] transform bitcasted bitwise logic ops
+ with constants (PR26702)"
+
+This reverts commit 76b12c4bf0bbc5c70def7b5d083a8a70547ea4e3.
+
+Conflicts:
+	lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+---
+ lib/Transforms/InstCombine/InstCombineAndOrXor.cpp | 34 +++++-----------------
+ test/Transforms/InstCombine/bitcast-bigendian.ll   | 14 +++++----
+ test/Transforms/InstCombine/bitcast.ll             | 14 +++++----
+ 3 files changed, 23 insertions(+), 39 deletions(-)
+
+diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+index 1a6459b..36c2136 100644
+--- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
++++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+@@ -1201,41 +1201,21 @@ Instruction *InstCombiner::foldCastedBitwiseLogic(BinaryOperator &I) {
+ 
+   Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+   CastInst *Cast0 = dyn_cast<CastInst>(Op0);
+-  if (!Cast0)
+-    return nullptr;
+-
+-  // This must be a cast from an integer or integer vector source type to allow
+-  // transformation of the logic operation to the source type.
+-  Type *DestTy = I.getType();
+-  Type *SrcTy = Cast0->getSrcTy();
+-  if (!SrcTy->isIntOrIntVectorTy())
+-    return nullptr;
+-
+-  // If one operand is a bitcast and the other is a constant, move the logic
+-  // operation ahead of the bitcast. That is, do the logic operation in the
+-  // original type. This can eliminate useless bitcasts and allow normal
+-  // combines that would otherwise be impeded by the bitcast. Canonicalization
+-  // ensures that if there is a constant operand, it will be the second operand.
+-  Value *BC = nullptr;
+-  Constant *C = nullptr;
+-  if ((match(Op0, m_BitCast(m_Value(BC))) && match(Op1, m_Constant(C)))) {
+-    Value *NewConstant = ConstantExpr::getBitCast(C, SrcTy);
+-    Value *NewOp = Builder->CreateBinOp(LogicOpc, BC, NewConstant, I.getName());
+-    return CastInst::CreateBitOrPointerCast(NewOp, DestTy);
+-  }
+-
+   CastInst *Cast1 = dyn_cast<CastInst>(Op1);
+-  if (!Cast1)
++  if (!Cast0 || !Cast1)
+     return nullptr;
+ 
+-  // Both operands of the logic operation are casts. The casts must be of the
+-  // same type for reduction.
++  // The casts must be of the same type, and this must be a cast from an integer
++  // or integer vector source type.
+   auto CastOpcode = Cast0->getOpcode();
+-  if (CastOpcode != Cast1->getOpcode() || SrcTy != Cast1->getSrcTy())
++  Type *SrcTy = Cast0->getSrcTy();
++  if ((CastOpcode != Cast1->getOpcode()) || (SrcTy != Cast1->getSrcTy()) ||
++      !SrcTy->isIntOrIntVectorTy())
+     return nullptr;
+ 
+   Value *Cast0Src = Cast0->getOperand(0);
+   Value *Cast1Src = Cast1->getOperand(0);
++  Type *DestTy = I.getType();
+ 
+   // fold (logic (cast A), (cast B)) -> (cast (logic A, B))
+ 
+diff --git a/test/Transforms/InstCombine/bitcast-bigendian.ll b/test/Transforms/InstCombine/bitcast-bigendian.ll
+index f558ecc..4042dad 100644
+--- a/test/Transforms/InstCombine/bitcast-bigendian.ll
++++ b/test/Transforms/InstCombine/bitcast-bigendian.ll
+@@ -90,6 +90,8 @@ define <2 x float> @test6(float %A){
+ ; CHECK: ret
+ }
+ 
++; FIXME: Do the logic in the original type for the following 3 tests.
++
+ ; Verify that 'xor' of vector and constant is done as a vector bitwise op before the bitcast.
+ 
+ define <2 x i32> @xor_bitcast_vec_to_vec(<1 x i64> %a) {
+@@ -98,8 +100,8 @@ define <2 x i32> @xor_bitcast_vec_to_vec(<1 x i64> %a) {
+   ret <2 x i32> %t2
+ 
+ ; CHECK-LABEL: @xor_bitcast_vec_to_vec(
+-; CHECK-NEXT:  %t21 = xor <1 x i64> %a, <i64 4294967298> 
+-; CHECK-NEXT:  %t2 = bitcast <1 x i64> %t21 to <2 x i32>
++; CHECK-NEXT:  %t1 = bitcast <1 x i64> %a to <2 x i32>
++; CHECK-NEXT:  %t2 = xor <2 x i32> %t1, <i32 1, i32 2>
+ ; CHECK-NEXT:  ret <2 x i32> %t2
+ }
+ 
+@@ -111,8 +113,8 @@ define i64 @and_bitcast_vec_to_int(<2 x i32> %a) {
+   ret i64 %t2
+ 
+ ; CHECK-LABEL: @and_bitcast_vec_to_int(
+-; CHECK-NEXT:  %t21 = and <2 x i32> %a, <i32 0, i32 3>
+-; CHECK-NEXT:  %t2 = bitcast <2 x i32> %t21 to i64
++; CHECK-NEXT:  %t1 = bitcast <2 x i32> %a to i64
++; CHECK-NEXT:  %t2 = and i64 %t1, 3
+ ; CHECK-NEXT:  ret i64 %t2
+ }
+ 
+@@ -124,8 +126,8 @@ define <2 x i32> @or_bitcast_int_to_vec(i64 %a) {
+   ret <2 x i32> %t2
+ 
+ ; CHECK-LABEL: @or_bitcast_int_to_vec(
+-; CHECK-NEXT:  %t21 = or i64 %a, 4294967298
+-; CHECK-NEXT:  %t2 = bitcast i64 %t21 to <2 x i32>
++; CHECK-NEXT:  %t1 = bitcast i64 %a to <2 x i32>
++; CHECK-NEXT:  %t2 = or <2 x i32> %t1, <i32 1, i32 2>
+ ; CHECK-NEXT:  ret <2 x i32> %t2
+ }
+ 
+diff --git a/test/Transforms/InstCombine/bitcast.ll b/test/Transforms/InstCombine/bitcast.ll
+index 7495859..34e9206 100644
+--- a/test/Transforms/InstCombine/bitcast.ll
++++ b/test/Transforms/InstCombine/bitcast.ll
+@@ -30,6 +30,8 @@ define <2 x i32> @xor_two_vector_bitcasts(<1 x i64> %a, <1 x i64> %b) {
+ ; CHECK-NEXT:  ret <2 x i32> %t3
+ }
+ 
++; FIXME: Do the logic in the original type for the following 3 tests.
++
+ ; Verify that 'xor' of vector and constant is done as a vector bitwise op before the bitcast.
+ 
+ define <2 x i32> @xor_bitcast_vec_to_vec(<1 x i64> %a) {
+@@ -38,8 +40,8 @@ define <2 x i32> @xor_bitcast_vec_to_vec(<1 x i64> %a) {
+   ret <2 x i32> %t2
+ 
+ ; CHECK-LABEL: @xor_bitcast_vec_to_vec(
+-; CHECK-NEXT:  %t21 = xor <1 x i64> %a, <i64 8589934593> 
+-; CHECK-NEXT:  %t2 = bitcast <1 x i64> %t21 to <2 x i32>
++; CHECK-NEXT:  %t1 = bitcast <1 x i64> %a to <2 x i32>
++; CHECK-NEXT:  %t2 = xor <2 x i32> %t1, <i32 1, i32 2>
+ ; CHECK-NEXT:  ret <2 x i32> %t2
+ }
+ 
+@@ -51,8 +53,8 @@ define i64 @and_bitcast_vec_to_int(<2 x i32> %a) {
+   ret i64 %t2
+ 
+ ; CHECK-LABEL: @and_bitcast_vec_to_int(
+-; CHECK-NEXT:  %t21 = and <2 x i32> %a, <i32 3, i32 0>
+-; CHECK-NEXT:  %t2 = bitcast <2 x i32> %t21 to i64
++; CHECK-NEXT:  %t1 = bitcast <2 x i32> %a to i64
++; CHECK-NEXT:  %t2 = and i64 %t1, 3
+ ; CHECK-NEXT:  ret i64 %t2
+ }
+ 
+@@ -64,8 +66,8 @@ define <2 x i32> @or_bitcast_int_to_vec(i64 %a) {
+   ret <2 x i32> %t2
+ 
+ ; CHECK-LABEL: @or_bitcast_int_to_vec(
+-; CHECK-NEXT:  %t21 = or i64 %a, 8589934593
+-; CHECK-NEXT:  %t2 = bitcast i64 %t21 to <2 x i32>
++; CHECK-NEXT:  %t1 = bitcast i64 %a to <2 x i32>
++; CHECK-NEXT:  %t2 = or <2 x i32> %t1, <i32 1, i32 2>
+ ; CHECK-NEXT:  ret <2 x i32> %t2
+ }
+ 
+-- 
+1.8.3.1
+
diff --git a/SOURCES/0001-Revert-Merging-r280589.patch b/SOURCES/0001-Revert-Merging-r280589.patch
new file mode 100644
index 0000000..ef950f4
--- /dev/null
+++ b/SOURCES/0001-Revert-Merging-r280589.patch
@@ -0,0 +1,119 @@
+From 95b15b3d2f180b15267032e16c947c0f9b8a112d Mon Sep 17 00:00:00 2001
+From: Dave Airlie <airlied@redhat.com>
+Date: Wed, 1 Mar 2017 13:02:38 +1000
+Subject: [PATCH] Revert "Merging r280589:"
+
+This reverts commit 25e2616626caafb896517e18cd8aa724fba2b200.
+---
+ lib/Target/AMDGPU/SIInstructions.td   |  1 -
+ lib/Target/AMDGPU/SIWholeQuadMode.cpp |  7 +++++
+ test/CodeGen/AMDGPU/wqm.ll            | 49 +++--------------------------------
+ 3 files changed, 11 insertions(+), 46 deletions(-)
+
+diff --git a/lib/Target/AMDGPU/SIInstructions.td b/lib/Target/AMDGPU/SIInstructions.td
+index dde5f2f..18b7d5d 100644
+--- a/lib/Target/AMDGPU/SIInstructions.td
++++ b/lib/Target/AMDGPU/SIInstructions.td
+@@ -2029,7 +2029,6 @@ def SI_RETURN : PseudoInstSI <
+   let hasSideEffects = 1;
+   let SALU = 1;
+   let hasNoSchedulingInfo = 1;
+-  let DisableWQM = 1;
+ }
+ 
+ let Uses = [EXEC], Defs = [EXEC, VCC, M0],
+diff --git a/lib/Target/AMDGPU/SIWholeQuadMode.cpp b/lib/Target/AMDGPU/SIWholeQuadMode.cpp
+index 1534d58..b200c15 100644
+--- a/lib/Target/AMDGPU/SIWholeQuadMode.cpp
++++ b/lib/Target/AMDGPU/SIWholeQuadMode.cpp
+@@ -219,6 +219,13 @@ char SIWholeQuadMode::scanInstructions(MachineFunction &MF,
+       markInstruction(MI, Flags, Worklist);
+       GlobalFlags |= Flags;
+     }
++
++    if (WQMOutputs && MBB.succ_empty()) {
++      // This is a prolog shader. Make sure we go back to exact mode at the end.
++      Blocks[&MBB].OutNeeds = StateExact;
++      Worklist.push_back(&MBB);
++      GlobalFlags |= StateExact;
++    }
+   }
+ 
+   return GlobalFlags;
+diff --git a/test/CodeGen/AMDGPU/wqm.ll b/test/CodeGen/AMDGPU/wqm.ll
+index 41e4264..809a7ba 100644
+--- a/test/CodeGen/AMDGPU/wqm.ll
++++ b/test/CodeGen/AMDGPU/wqm.ll
+@@ -17,18 +17,17 @@ main_body:
+ ;CHECK-LABEL: {{^}}test2:
+ ;CHECK-NEXT: ; %main_body
+ ;CHECK-NEXT: s_wqm_b64 exec, exec
++;CHECK: image_sample
+ ;CHECK-NOT: exec
+-define amdgpu_ps void @test2(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, float addrspace(1)* inreg %ptr, <4 x i32> %c) {
++;CHECK: _load_dword v0,
++define amdgpu_ps float @test2(<8 x i32> inreg %rsrc, <4 x i32> inreg %sampler, float addrspace(1)* inreg %ptr, <4 x i32> %c) {
+ main_body:
+   %c.1 = call <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32> %c, <8 x i32> %rsrc, <4 x i32> %sampler, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
+   %c.2 = bitcast <4 x float> %c.1 to <4 x i32>
+   %c.3 = extractelement <4 x i32> %c.2, i32 0
+   %gep = getelementptr float, float addrspace(1)* %ptr, i32 %c.3
+   %data = load float, float addrspace(1)* %gep
+-
+-  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %data, float undef, float undef, float undef)
+-
+-  ret void
++  ret float %data
+ }
+ 
+ ; ... but disabled for stores (and, in this simple case, not re-enabled).
+@@ -415,46 +414,6 @@ entry:
+   ret void
+ }
+ 
+-; Must return to exact at the end of a non-void returning shader,
+-; otherwise the EXEC mask exported by the epilog will be wrong. This is true
+-; even if the shader has no kills, because a kill could have happened in a
+-; previous shader fragment.
+-;
+-; CHECK-LABEL: {{^}}test_nonvoid_return:
+-; CHECK: s_mov_b64 [[LIVE:s\[[0-9]+:[0-9]+\]]], exec
+-; CHECK: s_wqm_b64 exec, exec
+-;
+-; CHECK: s_and_b64 exec, exec, [[LIVE]]
+-; CHECK-NOT: exec
+-define amdgpu_ps <4 x float> @test_nonvoid_return() nounwind {
+-  %tex = call <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
+-  %tex.i = bitcast <4 x float> %tex to <4 x i32>
+-  %dtex = call <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32> %tex.i, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
+-  ret <4 x float> %dtex
+-}
+-
+-; CHECK-LABEL: {{^}}test_nonvoid_return_unreachable:
+-; CHECK: s_mov_b64 [[LIVE:s\[[0-9]+:[0-9]+\]]], exec
+-; CHECK: s_wqm_b64 exec, exec
+-;
+-; CHECK: s_and_b64 exec, exec, [[LIVE]]
+-; CHECK-NOT: exec
+-define amdgpu_ps <4 x float> @test_nonvoid_return_unreachable(i32 inreg %c) nounwind {
+-entry:
+-  %tex = call <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32> undef, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
+-  %tex.i = bitcast <4 x float> %tex to <4 x i32>
+-  %dtex = call <4 x float> @llvm.SI.image.sample.v4i32(<4 x i32> %tex.i, <8 x i32> undef, <4 x i32> undef, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
+-
+-  %cc = icmp sgt i32 %c, 0
+-  br i1 %cc, label %if, label %else
+-
+-if:
+-  store volatile <4 x float> %dtex, <4 x float>* undef
+-  unreachable
+-
+-else:
+-  ret <4 x float> %dtex
+-}
+ 
+ declare void @llvm.amdgcn.image.store.v4i32(<4 x float>, <4 x i32>, <8 x i32>, i32, i1, i1, i1, i1) #1
+ declare void @llvm.amdgcn.buffer.store.f32(float, <4 x i32>, i32, i32, i1, i1) #1
+-- 
+2.9.3
+
diff --git a/SOURCES/llvm-3.8.1-rhel-7.3.patch b/SOURCES/llvm-3.8.1-rhel-7.3.patch
deleted file mode 100644
index 4caea83..0000000
--- a/SOURCES/llvm-3.8.1-rhel-7.3.patch
+++ /dev/null
@@ -1,344 +0,0 @@
-diff --git a/lib/Support/Host.cpp b/lib/Support/Host.cpp
-index c0f9e07..94bf580 100644
---- a/lib/Support/Host.cpp
-+++ b/lib/Support/Host.cpp
-@@ -290,107 +290,112 @@ StringRef sys::getHostCPUName() {
-       }
-     case 6:
-       switch (Model) {
--      case  1: // Pentium Pro processor
-+      case 0x01: // Pentium Pro processor
-         return "pentiumpro";
- 
--      case  3: // Intel Pentium II OverDrive processor, Pentium II processor,
--               // model 03
--      case  5: // Pentium II processor, model 05, Pentium II Xeon processor,
--               // model 05, and Intel Celeron processor, model 05
--      case  6: // Celeron processor, model 06
-+      case 0x03: // Intel Pentium II OverDrive processor, Pentium II processor,
-+                 // model 03
-+      case 0x05: // Pentium II processor, model 05, Pentium II Xeon processor,
-+                 // model 05, and Intel Celeron processor, model 05
-+      case 0x06: // Celeron processor, model 06
-         return "pentium2";
- 
--      case  7: // Pentium III processor, model 07, and Pentium III Xeon
--               // processor, model 07
--      case  8: // Pentium III processor, model 08, Pentium III Xeon processor,
--               // model 08, and Celeron processor, model 08
--      case 10: // Pentium III Xeon processor, model 0Ah
--      case 11: // Pentium III processor, model 0Bh
-+      case 0x07: // Pentium III processor, model 07, and Pentium III Xeon
-+                 // processor, model 07
-+      case 0x08: // Pentium III processor, model 08, Pentium III Xeon processor,
-+                 // model 08, and Celeron processor, model 08
-+      case 0x0a: // Pentium III Xeon processor, model 0Ah
-+      case 0x0b: // Pentium III processor, model 0Bh
-         return "pentium3";
- 
--      case  9: // Intel Pentium M processor, Intel Celeron M processor model 09.
--      case 13: // Intel Pentium M processor, Intel Celeron M processor, model
--               // 0Dh. All processors are manufactured using the 90 nm process.
--      case 21: // Intel EP80579 Integrated Processor and Intel EP80579
--               // Integrated Processor with Intel QuickAssist Technology
-+      case 0x09: // Intel Pentium M processor, Intel Celeron M processor model 09.
-+      case 0x0d: // Intel Pentium M processor, Intel Celeron M processor, model
-+                 // 0Dh. All processors are manufactured using the 90 nm process.
-+      case 0x15: // Intel EP80579 Integrated Processor and Intel EP80579
-+                 // Integrated Processor with Intel QuickAssist Technology
-         return "pentium-m";
- 
--      case 14: // Intel Core Duo processor, Intel Core Solo processor, model
--               // 0Eh. All processors are manufactured using the 65 nm process.
-+      case 0x0e: // Intel Core Duo processor, Intel Core Solo processor, model
-+                 // 0Eh. All processors are manufactured using the 65 nm process.
-         return "yonah";
- 
--      case 15: // Intel Core 2 Duo processor, Intel Core 2 Duo mobile
--               // processor, Intel Core 2 Quad processor, Intel Core 2 Quad
--               // mobile processor, Intel Core 2 Extreme processor, Intel
--               // Pentium Dual-Core processor, Intel Xeon processor, model
--               // 0Fh. All processors are manufactured using the 65 nm process.
--      case 22: // Intel Celeron processor model 16h. All processors are
--               // manufactured using the 65 nm process
-+      case 0x0f: // Intel Core 2 Duo processor, Intel Core 2 Duo mobile
-+                 // processor, Intel Core 2 Quad processor, Intel Core 2 Quad
-+                 // mobile processor, Intel Core 2 Extreme processor, Intel
-+                 // Pentium Dual-Core processor, Intel Xeon processor, model
-+                 // 0Fh. All processors are manufactured using the 65 nm process.
-+      case 0x16: // Intel Celeron processor model 16h. All processors are
-+                 // manufactured using the 65 nm process
-         return "core2";
- 
--      case 23: // Intel Core 2 Extreme processor, Intel Xeon processor, model
--               // 17h. All processors are manufactured using the 45 nm process.
--               //
--               // 45nm: Penryn , Wolfdale, Yorkfield (XE)
--      case 29: // Intel Xeon processor MP. All processors are manufactured using
--               // the 45 nm process.
-+      case 0x17: // Intel Core 2 Extreme processor, Intel Xeon processor, model
-+                 // 17h. All processors are manufactured using the 45 nm process.
-+                 //
-+                 // 45nm: Penryn , Wolfdale, Yorkfield (XE)
-+      case 0x1d: // Intel Xeon processor MP. All processors are manufactured using
-+                 // the 45 nm process.
-         return "penryn";
- 
--      case 26: // Intel Core i7 processor and Intel Xeon processor. All
--               // processors are manufactured using the 45 nm process.
--      case 30: // Intel(R) Core(TM) i7 CPU         870  @ 2.93GHz.
--               // As found in a Summer 2010 model iMac.
--      case 46: // Nehalem EX
-+      case 0x1a: // Intel Core i7 processor and Intel Xeon processor. All
-+                 // processors are manufactured using the 45 nm process.
-+      case 0x1e: // Intel(R) Core(TM) i7 CPU         870  @ 2.93GHz.
-+                 // As found in a Summer 2010 model iMac.
-+      case 0x2e: // Nehalem EX
-         return "nehalem";
--      case 37: // Intel Core i7, laptop version.
--      case 44: // Intel Core i7 processor and Intel Xeon processor. All
--               // processors are manufactured using the 32 nm process.
--      case 47: // Westmere EX
-+      case 0x25: // Intel Core i7, laptop version.
-+      case 0x2c: // Intel Core i7 processor and Intel Xeon processor. All
-+                 // processors are manufactured using the 32 nm process.
-+      case 0x2f: // Westmere EX
-         return "westmere";
- 
--      // SandyBridge:
--      case 42: // Intel Core i7 processor. All processors are manufactured
--               // using the 32 nm process.
--      case 45:
-+      case 0x2a: // Intel Core i7 processor. All processors are manufactured
-+                 // using the 32 nm process.
-+      case 0x2d:
-         return "sandybridge";
- 
--      // Ivy Bridge:
--      case 58:
--      case 62: // Ivy Bridge EP
-+      case 0x3a:
-+      case 0x3e: // Ivy Bridge EP
-         return "ivybridge";
- 
-       // Haswell:
--      case 60:
--      case 63:
--      case 69:
--      case 70:
-+      case 0x3c:
-+      case 0x3f:
-+      case 0x45:
-+      case 0x46:
-         return "haswell";
- 
-       // Broadwell:
--      case 61:
--      case 71:
-+      case 0x3d:
-+      case 0x47:
-+      case 0x4f:
-+      case 0x56:
-         return "broadwell";
- 
-       // Skylake:
--      case 78:
--      case 94:
-+      case 0x4e:
-+        // return "skylake-avx512";
-+      case 0x5e:
-         return "skylake";
- 
--      case 28: // Most 45 nm Intel Atom processors
--      case 38: // 45 nm Atom Lincroft
--      case 39: // 32 nm Atom Medfield
--      case 53: // 32 nm Atom Midview
--      case 54: // 32 nm Atom Midview
-+      case 0x1c: // Most 45 nm Intel Atom processors
-+      case 0x26: // 45 nm Atom Lincroft
-+      case 0x27: // 32 nm Atom Medfield
-+      case 0x35: // 32 nm Atom Midview
-+      case 0x36: // 32 nm Atom Midview
-         return "bonnell";
- 
-       // Atom Silvermont codes from the Intel software optimization guide.
--      case 55:
--      case 74:
--      case 77:
--      case 90:
--      case 93:
-+      case 0x37:
-+      case 0x4a:
-+      case 0x4d:
-+      case 0x5a:
-+      case 0x5d:
-+      case 0x4c: // really airmont
-         return "silvermont";
- 
-+      case 0x57:
-+        return "knl";
-+
-       default: // Unknown family 6 CPU, try to guess.
-         if (HasAVX512)
-           return "knl";
-@@ -823,6 +828,7 @@ bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
-   Features["avx512cd"] = HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save;
-   Features["avx512bw"] = HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save;
-   Features["avx512vl"] = HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save;
-+  Features["avx512vbmi"] = HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save;
- 
-   bool HasLeafD = MaxLevel >= 0xd &&
-     !GetX86CpuIDAndInfoEx(0xd, 0x1, &EAX, &EBX, &ECX, &EDX);
-diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td
-index 8902a85..9b1bf43 100644
---- a/lib/Target/X86/X86.td
-+++ b/lib/Target/X86/X86.td
-@@ -134,6 +134,9 @@ def FeatureBWI     : SubtargetFeature<"avx512bw", "HasBWI", "true",
- def FeatureVLX     : SubtargetFeature<"avx512vl", "HasVLX", "true",
-                       "Enable AVX-512 Vector Length eXtensions",
-                                       [FeatureAVX512]>;
-+def FeatureVBMI     : SubtargetFeature<"avx512vbmi", "HasVBMI", "true",
-+                      "Enable AVX-512 Vector Bit Manipulation Instructions",
-+                                      [FeatureAVX512]>;
- def FeaturePKU   : SubtargetFeature<"pku", "HasPKU", "true",
-                       "Enable protection keys">;
- def FeaturePCLMUL  : SubtargetFeature<"pclmul", "HasPCLMUL", "true",
-@@ -454,6 +457,9 @@ class BroadwellProc<string Name> : ProcessorModel<Name, HaswellModel, [
- ]>;
- def : BroadwellProc<"broadwell">;
- 
-+def : HaswellProc<"skylake">; // RHEL mustard
-+def : HaswellProc<"skx">; // RHEL mustard
-+
- // FIXME: define KNL model
- class KnightsLandingProc<string Name> : ProcessorModel<Name, HaswellModel, [
-   FeatureMMX,
-@@ -484,45 +490,6 @@ class KnightsLandingProc<string Name> : ProcessorModel<Name, HaswellModel, [
- ]>;
- def : KnightsLandingProc<"knl">;
- 
--// FIXME: define SKX model
--class SkylakeProc<string Name> : ProcessorModel<Name, HaswellModel, [
--  FeatureMMX,
--  FeatureAVX512,
--  FeatureFXSR,
--  FeatureCDI,
--  FeatureDQI,
--  FeatureBWI,
--  FeatureVLX,
--  FeaturePKU,
--  FeatureCMPXCHG16B,
--  FeatureSlowBTMem,
--  FeaturePOPCNT,
--  FeatureAES,
--  FeaturePCLMUL,
--  FeatureXSAVE,
--  FeatureXSAVEOPT,
--  FeatureRDRAND,
--  FeatureF16C,
--  FeatureFSGSBase,
--  FeatureMOVBE,
--  FeatureLZCNT,
--  FeatureBMI,
--  FeatureBMI2,
--  FeatureFMA,
--  FeatureRTM,
--  FeatureHLE,
--  FeatureADX,
--  FeatureRDSEED,
--  FeatureSlowIncDec,
--  FeatureMPX,
--  FeatureXSAVEC,
--  FeatureXSAVES,
--  FeatureLAHFSAHF
--]>;
--def : SkylakeProc<"skylake">;
--def : SkylakeProc<"skx">; // Legacy alias.
--
--
- // AMD CPUs.
- 
- def : Proc<"k6",              [FeatureSlowUAMem16, FeatureMMX]>;
-diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
-index 9c8339a..eed4319 100644
---- a/lib/Target/X86/X86InstrInfo.td
-+++ b/lib/Target/X86/X86InstrInfo.td
-@@ -773,7 +773,7 @@ def HasVLX       : Predicate<"Subtarget->hasVLX()">,
- def NoVLX        : Predicate<"!Subtarget->hasVLX()">;
- def NoVLX_Or_NoBWI : Predicate<"!Subtarget->hasVLX() || !Subtarget->hasBWI()">;
- def NoVLX_Or_NoDQI : Predicate<"!Subtarget->hasVLX() || !Subtarget->hasDQI()">;
--def PKU        : Predicate<"!Subtarget->hasPKU()">;
-+def PKU        : Predicate<"Subtarget->hasPKU()">;
- 
- def HasPOPCNT    : Predicate<"Subtarget->hasPOPCNT()">;
- def HasAES       : Predicate<"Subtarget->hasAES()">;
-@@ -795,6 +795,7 @@ def HasFSGSBase  : Predicate<"Subtarget->hasFSGSBase()">;
- def HasLZCNT     : Predicate<"Subtarget->hasLZCNT()">;
- def HasBMI       : Predicate<"Subtarget->hasBMI()">;
- def HasBMI2      : Predicate<"Subtarget->hasBMI2()">;
-+def HasVBMI      : Predicate<"Subtarget->hasVBMI()">;
- def HasRTM       : Predicate<"Subtarget->hasRTM()">;
- def HasHLE       : Predicate<"Subtarget->hasHLE()">;
- def HasTSX       : Predicate<"Subtarget->hasRTM() || Subtarget->hasHLE()">;
-diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
-index 8ef08c9..739de59 100644
---- a/lib/Target/X86/X86Subtarget.cpp
-+++ b/lib/Target/X86/X86Subtarget.cpp
-@@ -261,6 +261,7 @@ void X86Subtarget::initializeEnvironment() {
-   HasLZCNT = false;
-   HasBMI = false;
-   HasBMI2 = false;
-+  HasVBMI = false;
-   HasRTM = false;
-   HasHLE = false;
-   HasERI = false;
-diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
-index 13d1026..c1adb44 100644
---- a/lib/Target/X86/X86Subtarget.h
-+++ b/lib/Target/X86/X86Subtarget.h
-@@ -134,6 +134,9 @@ protected:
-   /// Processor has BMI2 instructions.
-   bool HasBMI2;
- 
-+  /// Processor has VBMI instructions.
-+  bool HasVBMI;
-+
-   /// Processor has RTM instructions.
-   bool HasRTM;
- 
-@@ -374,6 +377,7 @@ public:
-   bool hasLZCNT() const { return HasLZCNT; }
-   bool hasBMI() const { return HasBMI; }
-   bool hasBMI2() const { return HasBMI2; }
-+  bool hasVBMI() const { return HasVBMI; }
-   bool hasRTM() const { return HasRTM; }
-   bool hasHLE() const { return HasHLE; }
-   bool hasADX() const { return HasADX; }
-diff --git a/test/CodeGen/X86/slow-unaligned-mem.ll b/test/CodeGen/X86/slow-unaligned-mem.ll
-index 27cbef6..c25435b 100644
---- a/test/CodeGen/X86/slow-unaligned-mem.ll
-+++ b/test/CodeGen/X86/slow-unaligned-mem.ll
-@@ -14,15 +14,14 @@
- 
- ; Intel chips with fast unaligned memory accesses
- 
--; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=silvermont    2>&1 | FileCheck %s --check-prefix=FAST
--; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=nehalem       2>&1 | FileCheck %s --check-prefix=FAST
--; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=westmere      2>&1 | FileCheck %s --check-prefix=FAST
--; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=sandybridge   2>&1 | FileCheck %s --check-prefix=FAST
--; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=ivybridge     2>&1 | FileCheck %s --check-prefix=FAST
--; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=haswell       2>&1 | FileCheck %s --check-prefix=FAST
--; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=broadwell     2>&1 | FileCheck %s --check-prefix=FAST
--; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=knl           2>&1 | FileCheck %s --check-prefix=FAST
--; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=skylake       2>&1 | FileCheck %s --check-prefix=FAST
-+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=silvermont     2>&1 | FileCheck %s --check-prefix=FAST
-+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=nehalem        2>&1 | FileCheck %s --check-prefix=FAST
-+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=westmere       2>&1 | FileCheck %s --check-prefix=FAST
-+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=sandybridge    2>&1 | FileCheck %s --check-prefix=FAST
-+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=ivybridge      2>&1 | FileCheck %s --check-prefix=FAST
-+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=haswell        2>&1 | FileCheck %s --check-prefix=FAST
-+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=broadwell      2>&1 | FileCheck %s --check-prefix=FAST
-+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=knl            2>&1 | FileCheck %s --check-prefix=FAST
- 
- ; AMD chips with slow unaligned memory accesses
- 
diff --git a/SPECS/llvm.spec b/SPECS/llvm.spec
index 6952d0f..2ed9ded 100644
--- a/SPECS/llvm.spec
+++ b/SPECS/llvm.spec
@@ -34,21 +34,20 @@ ExcludeArch: ppc s390 %{?rhel6:s390x}
 %endif
 
 Name:		mesa-private-llvm
-Version:	3.8.1
-Release:	1%{?dist}
+Version:	3.9.1
+Release:	3%{?dist}
 Summary:	llvm engine for Mesa
 
 Group:          System Environment/Libraries
 License:	NCSA
 URL:		http://llvm.org
 Source0:	http://llvm.org/releases/%{version}/llvm-%{version}.src.tar.xz
+Source1:        cmake-3.4.3.tar.gz
 Source100:	llvm-config.h
 
-# recognize s390 as SystemZ when configuring build
-#Patch0:		llvm-3.7.1-cmake-s390.patch
-
 Patch1: fix-cmake-include.patch
-Patch2: llvm-3.8.1-rhel-7.3.patch
+Patch2: 0001-Revert-Merging-r280589.patch
+Patch3: 0001-Revert-InstCombine-transform-bitcasted-bitwise-logic.patch
 
 BuildRequires:	cmake
 BuildRequires:	zlib-devel
@@ -73,17 +72,30 @@ support in Mesa.
 
 %prep
 %setup -q -n llvm-%{version}.src
-#patch0 -p1 -b .s390
+
+tar xf %{SOURCE1}
+
 %patch1 -p1 -b .fixinc
-%patch2 -p1
+%patch2 -p1 -b .radeonsi-fix
+%patch3 -p1 -b .bigendian-fix
 
 %build
 
+BUILD_DIR=`pwd`/cmake_build
+cd cmake-3.4.3
+cmake . -DCMAKE_INSTALL_PREFIX=$BUILD_DIR
+make
+make install
+cd -
+
+
 sed -i 's|ActiveIncludeDir = ActivePrefix + "/include|&/mesa-private|g' tools/llvm-config/llvm-config.cpp
 
 mkdir -p _build
 cd _build
 
+export PATH=$BUILD_DIR/bin:$PATH
+%global __cmake $BUILD_DIR/bin/cmake
 # force off shared libs as cmake macros turns it on.
 %cmake .. \
 	-DINCLUDE_INSTALL_DIR=%{_includedir}/mesa-private \
@@ -163,6 +175,7 @@ rm -rf %{buildroot}%{_includedir}/llvm-c/lto.h
 
 # RHEL: Strip out cmake build foo
 rm -rf %{buildroot}%{_datadir}/llvm/cmake
+rm -rf %{buildroot}%{_libdir}/cmake/llvm
 
 %check
 cd _build
@@ -176,7 +189,7 @@ make check-all || :
 
 %files
 %doc LICENSE.TXT
-%{_libdir}/libLLVM-3.8*-mesa.so
+%{_libdir}/libLLVM-3.9*-mesa.so
 
 %files devel
 %{_bindir}/%{name}-config-%{__isa_bits}
@@ -184,6 +197,15 @@ make check-all || :
 %{_includedir}/mesa-private/llvm-c
 
 %changelog
+* Wed May 03 2017 Lyude Paul <lyude@redhat.com> - 3.9.1-3
+- Add temporary revert for #1445423
+
+* Fri Mar 24 2017 Tom Stellard <tstellar@redhat.com> - 3.9.1-2
+- Add fix for radeonsi regression
+
+* Tue Jan 10 2017 Jeff Law  <law@redhat.com> - 3.9.1-1
+- Update to 3.9.1
+
 * Wed Jul 13 2016 Adam Jackson <ajax@redhat.com> - 3.8.1-1
 - Update to 3.8.1
 - Sync some x86 getHostCPUName updates from trunk