diff --git a/.gitignore b/.gitignore
index f1fe27b..a4e3e90 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1 @@
-SOURCES/mesa-20.1.2.tar.xz
+SOURCES/mesa-20.3.3.tar.xz
diff --git a/.mesa.metadata b/.mesa.metadata
index 87f9f7f..3d34979 100644
--- a/.mesa.metadata
+++ b/.mesa.metadata
@@ -1 +1 @@
-b90fe9ca8c3bdad043e86cd1af93bcf83e1da3fb SOURCES/mesa-20.1.2.tar.xz
+c0e42fada2b306a6d9740376398c0d8b0a130427 SOURCES/mesa-20.3.3.tar.xz
diff --git a/SOURCES/0001-gallivm-nir-fix-big-endian-64-bit-splitting-merging.patch b/SOURCES/0001-gallivm-nir-fix-big-endian-64-bit-splitting-merging.patch
deleted file mode 100644
index 30fc63d..0000000
--- a/SOURCES/0001-gallivm-nir-fix-big-endian-64-bit-splitting-merging.patch
+++ /dev/null
@@ -1,45 +0,0 @@
-From fcf3f45728a22250ad15db7e230545147fc28c2e Mon Sep 17 00:00:00 2001
-From: Dave Airlie <airlied@redhat.com>
-Date: Mon, 29 Jun 2020 14:59:20 +1000
-Subject: [PATCH] gallivm/nir: fix big-endian 64-bit splitting/merging.
-
-The shuffles need to be swapped to do this properly on big-endian
----
- src/gallium/auxiliary/gallivm/lp_bld_nir.c | 10 ++++++++++
- 1 file changed, 10 insertions(+)
-
-diff --git a/src/gallium/auxiliary/gallivm/lp_bld_nir.c b/src/gallium/auxiliary/gallivm/lp_bld_nir.c
-index f14475e839d..2c4135ccc05 100644
---- a/src/gallium/auxiliary/gallivm/lp_bld_nir.c
-+++ b/src/gallium/auxiliary/gallivm/lp_bld_nir.c
-@@ -353,8 +353,13 @@ static LLVMValueRef split_64bit(struct lp_build_nir_context *bld_base,
-    LLVMValueRef shuffles2[LP_MAX_VECTOR_WIDTH/32];
-    int len = bld_base->base.type.length * 2;
-    for (unsigned i = 0; i < bld_base->base.type.length; i++) {
-+#if UTIL_ARCH_LITTLE_ENDIAN
-       shuffles[i] = lp_build_const_int32(gallivm, i * 2);
-       shuffles2[i] = lp_build_const_int32(gallivm, (i * 2) + 1);
-+#else
-+      shuffles[i] = lp_build_const_int32(gallivm, (i * 2) + 1);
-+      shuffles2[i] = lp_build_const_int32(gallivm, (i * 2));
-+#endif
-    }
- 
-    src = LLVMBuildBitCast(gallivm->builder, src, LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), len), "");
-@@ -378,8 +383,13 @@ merge_64bit(struct lp_build_nir_context *bld_base,
-    assert(len <= (2 * (LP_MAX_VECTOR_WIDTH/32)));
- 
-    for (i = 0; i < bld_base->base.type.length * 2; i+=2) {
-+#if UTIL_ARCH_LITTLE_ENDIAN
-       shuffles[i] = lp_build_const_int32(gallivm, i / 2);
-       shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
-+#else
-+      shuffles[i] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
-+      shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2);
-+#endif
-    }
-    return LLVMBuildShuffleVector(builder, input, input2, LLVMConstVector(shuffles, len), "");
- }
--- 
-2.26.2
-
diff --git a/SOURCES/0001-gallivm-nir-fix-const-loading-on-big-endian-systems.patch b/SOURCES/0001-gallivm-nir-fix-const-loading-on-big-endian-systems.patch
deleted file mode 100644
index 33c573f..0000000
--- a/SOURCES/0001-gallivm-nir-fix-const-loading-on-big-endian-systems.patch
+++ /dev/null
@@ -1,33 +0,0 @@
-From ea7bf3941eeef8320c711a6f66b5e73077cc6e6b Mon Sep 17 00:00:00 2001
-From: Dave Airlie <airlied@redhat.com>
-Date: Mon, 29 Jun 2020 07:40:13 +1000
-Subject: [PATCH] gallivm/nir: fix const loading on big endian systems
-
-The code was expecting the lower 32-bits of the 64-bit to be
-what it wanted, don't be implicit, pull the value from the union.
-
-This should fix rendering on big endian systems since NIR was
-introduced.
-
-Fixes: 44a6b0107b37 ("gallivm: add nir->llvm translation (v2)")
-Reviewed-by: Timothy Arceri <tarceri@itsqueeze.com>
----
- src/gallium/auxiliary/gallivm/lp_bld_nir.c | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/src/gallium/auxiliary/gallivm/lp_bld_nir.c b/src/gallium/auxiliary/gallivm/lp_bld_nir.c
-index 9aa582a0e8a..f14475e839d 100644
---- a/src/gallium/auxiliary/gallivm/lp_bld_nir.c
-+++ b/src/gallium/auxiliary/gallivm/lp_bld_nir.c
-@@ -865,7 +865,7 @@ static void visit_load_const(struct lp_build_nir_context *bld_base,
-    LLVMValueRef result[NIR_MAX_VEC_COMPONENTS];
-    struct lp_build_context *int_bld = get_int_bld(bld_base, true, instr->def.bit_size);
-    for (unsigned i = 0; i < instr->def.num_components; i++)
--      result[i] = lp_build_const_int_vec(bld_base->base.gallivm, int_bld->type, instr->value[i].u64);
-+      result[i] = lp_build_const_int_vec(bld_base->base.gallivm, int_bld->type, instr->def.bit_size == 32 ? instr->value[i].u32 : instr->value[i].u64);
-    assign_ssa_dest(bld_base, &instr->def, result);
- }
- 
--- 
-2.26.2
-
diff --git a/SOURCES/0001-glsl-fix-constant-packing-for-64-bit-big-endian.patch b/SOURCES/0001-glsl-fix-constant-packing-for-64-bit-big-endian.patch
deleted file mode 100644
index 4e37ce3..0000000
--- a/SOURCES/0001-glsl-fix-constant-packing-for-64-bit-big-endian.patch
+++ /dev/null
@@ -1,81 +0,0 @@
-From 5fc0b580cecb1529659d5d3719412fb7cbffac0d Mon Sep 17 00:00:00 2001
-From: Dave Airlie <airlied@redhat.com>
-Date: Mon, 29 Jun 2020 13:26:56 +1000
-Subject: [PATCH] glsl: fix constant packing for 64-bit big endian.
-
-In a piglit run on s390 a lot of double tests fail, explicitly
-packing/shifting things rather than using memcpy seems to help
----
- src/compiler/glsl/ir_constant_expression.cpp | 15 +++++++++++++++
- src/compiler/glsl/ir_expression_operation.py | 20 ++++++++++----------
- 2 files changed, 25 insertions(+), 10 deletions(-)
-
-diff --git a/src/compiler/glsl/ir_constant_expression.cpp b/src/compiler/glsl/ir_constant_expression.cpp
-index 636196886b3..595cc821797 100644
---- a/src/compiler/glsl/ir_constant_expression.cpp
-+++ b/src/compiler/glsl/ir_constant_expression.cpp
-@@ -452,6 +452,21 @@ isub64_saturate(int64_t a, int64_t b)
-    return a - b;
- }
- 
-+static uint64_t
-+pack_2x32(uint32_t a, uint32_t b)
-+{
-+   uint64_t v = a;
-+   v |= (uint64_t)b << 32;
-+   return v;
-+}
-+
-+static void
-+unpack_2x32(uint64_t p, uint32_t *a, uint32_t *b)
-+{
-+   *a = p & 0xffffffff;
-+   *b = (p >> 32);
-+}
-+
- /**
-  * Get the constant that is ultimately referenced by an r-value, in a constant
-  * expression evaluation context.
-diff --git a/src/compiler/glsl/ir_expression_operation.py b/src/compiler/glsl/ir_expression_operation.py
-index d2c4d41024f..1c4e6b358e1 100644
---- a/src/compiler/glsl/ir_expression_operation.py
-+++ b/src/compiler/glsl/ir_expression_operation.py
-@@ -560,14 +560,14 @@ ir_expression_operation = [
-    operation("saturate", 1, printable_name="sat", source_types=(float_type,), c_expression="CLAMP({src0}, 0.0f, 1.0f)"),
- 
-    # Double packing, part of ARB_gpu_shader_fp64.
--   operation("pack_double_2x32", 1, printable_name="packDouble2x32", source_types=(uint_type,), dest_type=double_type, c_expression="memcpy(&data.d[0], &op[0]->value.u[0], sizeof(double))", flags=frozenset((horizontal_operation, non_assign_operation))),
--   operation("unpack_double_2x32", 1, printable_name="unpackDouble2x32", source_types=(double_type,), dest_type=uint_type, c_expression="memcpy(&data.u[0], &op[0]->value.d[0], sizeof(double))", flags=frozenset((horizontal_operation, non_assign_operation))),
-+   operation("pack_double_2x32", 1, printable_name="packDouble2x32", source_types=(uint_type,), dest_type=double_type, c_expression="data.u64[0] = pack_2x32(op[0]->value.u[0], op[0]->value.u[1])", flags=frozenset((horizontal_operation, non_assign_operation))),
-+   operation("unpack_double_2x32", 1, printable_name="unpackDouble2x32", source_types=(double_type,), dest_type=uint_type, c_expression="unpack_2x32(op[0]->value.u64[0], &data.u[0], &data.u[1])", flags=frozenset((horizontal_operation, non_assign_operation))),
- 
-    # Sampler/Image packing, part of ARB_bindless_texture.
--   operation("pack_sampler_2x32", 1, printable_name="packSampler2x32", source_types=(uint_type,), dest_type=uint64_type, c_expression="memcpy(&data.u64[0], &op[0]->value.u[0], sizeof(uint64_t))", flags=frozenset((horizontal_operation, non_assign_operation))),
--   operation("pack_image_2x32", 1, printable_name="packImage2x32", source_types=(uint_type,), dest_type=uint64_type, c_expression="memcpy(&data.u64[0], &op[0]->value.u[0], sizeof(uint64_t))", flags=frozenset((horizontal_operation, non_assign_operation))),
--   operation("unpack_sampler_2x32", 1, printable_name="unpackSampler2x32", source_types=(uint64_type,), dest_type=uint_type, c_expression="memcpy(&data.u[0], &op[0]->value.u64[0], sizeof(uint64_t))", flags=frozenset((horizontal_operation, non_assign_operation))),
--   operation("unpack_image_2x32", 1, printable_name="unpackImage2x32", source_types=(uint64_type,), dest_type=uint_type, c_expression="memcpy(&data.u[0], &op[0]->value.u64[0], sizeof(uint64_t))", flags=frozenset((horizontal_operation, non_assign_operation))),
-+   operation("pack_sampler_2x32", 1, printable_name="packSampler2x32", source_types=(uint_type,), dest_type=uint64_type, c_expression="data.u64[0] = pack_2x32(op[0]->value.u[0], op[0]->value.u[1])", flags=frozenset((horizontal_operation, non_assign_operation))),
-+   operation("pack_image_2x32", 1, printable_name="packImage2x32", source_types=(uint_type,), dest_type=uint64_type, c_expression="data.u64[0] = pack_2x32(op[0]->value.u[0], op[0]->value.u[1])", flags=frozenset((horizontal_operation, non_assign_operation))),
-+   operation("unpack_sampler_2x32", 1, printable_name="unpackSampler2x32", source_types=(uint64_type,), dest_type=uint_type, c_expression="unpack_2x32(op[0]->value.u64[0], &data.u[0], &data.u[1])", flags=frozenset((horizontal_operation, non_assign_operation))),
-+   operation("unpack_image_2x32", 1, printable_name="unpackImage2x32", source_types=(uint64_type,), dest_type=uint_type, c_expression="unpack_2x32(op[0]->value.u64[0], &data.u[0], &data.u[1])", flags=frozenset((horizontal_operation, non_assign_operation))),
- 
-    operation("frexp_sig", 1),
-    operation("frexp_exp", 1),
-@@ -592,10 +592,10 @@ ir_expression_operation = [
-    operation("ssbo_unsized_array_length", 1),
- 
-    # 64-bit integer packing ops.
--   operation("pack_int_2x32", 1, printable_name="packInt2x32", source_types=(int_type,), dest_type=int64_type, c_expression="memcpy(&data.i64[0], &op[0]->value.i[0], sizeof(int64_t))", flags=frozenset((horizontal_operation, non_assign_operation))),
--   operation("pack_uint_2x32", 1, printable_name="packUint2x32", source_types=(uint_type,), dest_type=uint64_type, c_expression="memcpy(&data.u64[0], &op[0]->value.u[0], sizeof(uint64_t))", flags=frozenset((horizontal_operation, non_assign_operation))),
--   operation("unpack_int_2x32", 1, printable_name="unpackInt2x32", source_types=(int64_type,), dest_type=int_type, c_expression="memcpy(&data.i[0], &op[0]->value.i64[0], sizeof(int64_t))", flags=frozenset((horizontal_operation, non_assign_operation))),
--   operation("unpack_uint_2x32", 1, printable_name="unpackUint2x32", source_types=(uint64_type,), dest_type=uint_type, c_expression="memcpy(&data.u[0], &op[0]->value.u64[0], sizeof(uint64_t))", flags=frozenset((horizontal_operation, non_assign_operation))),
-+   operation("pack_int_2x32", 1, printable_name="packInt2x32", source_types=(int_type,), dest_type=int64_type, c_expression="data.u64[0] = pack_2x32(op[0]->value.u[0], op[0]->value.u[1])", flags=frozenset((horizontal_operation, non_assign_operation))),
-+   operation("pack_uint_2x32", 1, printable_name="packUint2x32", source_types=(uint_type,), dest_type=uint64_type, c_expression="data.u64[0] = pack_2x32(op[0]->value.u[0], op[0]->value.u[1])", flags=frozenset((horizontal_operation, non_assign_operation))),
-+   operation("unpack_int_2x32", 1, printable_name="unpackInt2x32", source_types=(int64_type,), dest_type=int_type, c_expression="unpack_2x32(op[0]->value.u64[0], &data.u[0], &data.u[1])", flags=frozenset((horizontal_operation, non_assign_operation))),
-+   operation("unpack_uint_2x32", 1, printable_name="unpackUint2x32", source_types=(uint64_type,), dest_type=uint_type, c_expression="unpack_2x32(op[0]->value.u64[0], &data.u[0], &data.u[1])", flags=frozenset((horizontal_operation, non_assign_operation))),
- 
-    operation("add", 2, printable_name="+", source_types=numeric_types, c_expression="{src0} + {src1}", flags=vector_scalar_operation),
-    operation("sub", 2, printable_name="-", source_types=numeric_types, c_expression="{src0} - {src1}", flags=vector_scalar_operation),
--- 
-2.26.2
-
diff --git a/SOURCES/0001-nir-use-bitfield_insert-instead-of-bfi-in-nir_lower_.patch b/SOURCES/0001-nir-use-bitfield_insert-instead-of-bfi-in-nir_lower_.patch
deleted file mode 100644
index 0daf825..0000000
--- a/SOURCES/0001-nir-use-bitfield_insert-instead-of-bfi-in-nir_lower_.patch
+++ /dev/null
@@ -1,34 +0,0 @@
-From d3ec950f0d8492b980a91844ffd744d7e7824277 Mon Sep 17 00:00:00 2001
-From: Ben Skeggs <bskeggs@redhat.com>
-Date: Sat, 6 Jun 2020 16:58:00 +1000
-Subject: [PATCH] nir: use bitfield_insert instead of bfi in
- nir_lower_double_ops
-
-NVIDIA hardware doesn't have an equivilant to bfi, but we do already have
-a lowering for bitfield_insert->bfi.
-
-Signed-off-by: Ben Skeggs <bskeggs@redhat.com>
-Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
-Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5373>
----
- src/compiler/nir/nir_lower_double_ops.c | 4 +++-
- 1 file changed, 3 insertions(+), 1 deletion(-)
-
-diff --git a/src/compiler/nir/nir_lower_double_ops.c b/src/compiler/nir/nir_lower_double_ops.c
-index f9c93a910a5..73226fd62ef 100644
---- a/src/compiler/nir/nir_lower_double_ops.c
-+++ b/src/compiler/nir/nir_lower_double_ops.c
-@@ -49,7 +49,9 @@ set_exponent(nir_builder *b, nir_ssa_def *src, nir_ssa_def *exp)
-    /* The exponent is bits 52-62, or 20-30 of the high word, so set the exponent
-     * to 1023
-     */
--   nir_ssa_def *new_hi = nir_bfi(b, nir_imm_int(b, 0x7ff00000), exp, hi);
-+   nir_ssa_def *new_hi = nir_bitfield_insert(b, hi, exp,
-+                                             nir_imm_int(b, 20),
-+                                             nir_imm_int(b, 11));
-    /* recombine */
-    return nir_pack_64_2x32_split(b, lo, new_hi);
- }
--- 
-2.26.2
-
diff --git a/SOURCES/Makefile b/SOURCES/Makefile
index c431c49..eea9f33 100644
--- a/SOURCES/Makefile
+++ b/SOURCES/Makefile
@@ -1,4 +1,4 @@
-VERSION ?= 20.1.2
+VERSION ?= 20.3.3
 SANITIZE ?= 1
 
 DIRNAME = mesa-${VERSION}
diff --git a/SOURCES/anv-remove-warning.patch b/SOURCES/anv-remove-warning.patch
new file mode 100644
index 0000000..130a050
--- /dev/null
+++ b/SOURCES/anv-remove-warning.patch
@@ -0,0 +1,13 @@
+diff -up mesa-20.3.3/src/intel/vulkan/anv_perf.c.dma mesa-20.3.3/src/intel/vulkan/anv_perf.c
+--- mesa-20.3.3/src/intel/vulkan/anv_perf.c.dma	2021-02-16 12:56:09.881084752 +1000
++++ mesa-20.3.3/src/intel/vulkan/anv_perf.c	2021-02-16 12:56:14.626213956 +1000
+@@ -47,9 +47,6 @@ anv_get_perf(const struct gen_device_inf
+    gen_perf_init_metrics(perf, devinfo, fd, false /* pipeline statistics */);
+ 
+    if (!perf->n_queries) {
+-      if (perf->platform_supported)
+-         mesa_logw("Performance support disabled, "
+-                   "consider sysctl dev.i915.perf_stream_paranoid=0\n");
+       goto err;
+    }
+ 
diff --git a/SOURCES/lavapipe-disable-env-var.patch b/SOURCES/lavapipe-disable-env-var.patch
new file mode 100644
index 0000000..9b59577
--- /dev/null
+++ b/SOURCES/lavapipe-disable-env-var.patch
@@ -0,0 +1,13 @@
+diff -up mesa-20.3.0-rc1/src/gallium/frontends/lavapipe/lvp_device.c.dma mesa-20.3.0-rc1/src/gallium/frontends/lavapipe/lvp_device.c
+--- mesa-20.3.0-rc1/src/gallium/frontends/lavapipe/lvp_device.c.dma	2020-11-19 15:11:42.483134826 +1000
++++ mesa-20.3.0-rc1/src/gallium/frontends/lavapipe/lvp_device.c	2020-11-19 15:13:08.556425782 +1000
+@@ -118,6 +118,9 @@ VkResult lvp_CreateInstance(
+       client_version = VK_API_VERSION_1_0;
+    }
+ 
++   if (!getenv("RH_SW_VULKAN"))
++      return VK_ERROR_INITIALIZATION_FAILED;
++
+    instance = vk_zalloc2(&default_alloc, pAllocator, sizeof(*instance), 8,
+                          VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
+    if (!instance)
diff --git a/SOURCES/mesa-20.3.3-stable-fixes.patch b/SOURCES/mesa-20.3.3-stable-fixes.patch
new file mode 100644
index 0000000..231e20b
--- /dev/null
+++ b/SOURCES/mesa-20.3.3-stable-fixes.patch
@@ -0,0 +1,930 @@
+diff --git a/src/amd/vulkan/radv_query.c b/src/amd/vulkan/radv_query.c
+index d49bc0f0564..90512d4f276 100644
+--- a/src/amd/vulkan/radv_query.c
++++ b/src/amd/vulkan/radv_query.c
+@@ -1679,13 +1679,14 @@ static void emit_begin_query(struct radv_cmd_buffer *cmd_buffer,
+ 
+ 			va += 8 * idx;
+ 
+-			si_cs_emit_write_event_eop(cs,
+-						   cmd_buffer->device->physical_device->rad_info.chip_class,
+-						   radv_cmd_buffer_uses_mec(cmd_buffer),
+-						   V_028A90_PS_DONE, 0,
+-						   EOP_DST_SEL_TC_L2,
+-						   EOP_DATA_SEL_GDS,
+-						   va, EOP_DATA_GDS(0, 1), 0);
++			radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
++			radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_GDS) |
++					COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) |
++					COPY_DATA_WR_CONFIRM);
++			radeon_emit(cs, 0);
++			radeon_emit(cs, 0);
++			radeon_emit(cs, va);
++			radeon_emit(cs, va >> 32);
+ 
+ 			/* Record that the command buffer needs GDS. */
+ 			cmd_buffer->gds_needed = true;
+@@ -1769,13 +1770,14 @@ static void emit_end_query(struct radv_cmd_buffer *cmd_buffer,
+ 
+ 			va += 8 * idx;
+ 
+-			si_cs_emit_write_event_eop(cs,
+-						   cmd_buffer->device->physical_device->rad_info.chip_class,
+-						   radv_cmd_buffer_uses_mec(cmd_buffer),
+-						   V_028A90_PS_DONE, 0,
+-						   EOP_DST_SEL_TC_L2,
+-						   EOP_DATA_SEL_GDS,
+-						   va, EOP_DATA_GDS(0, 1), 0);
++			radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
++			radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_GDS) |
++					COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) |
++					COPY_DATA_WR_CONFIRM);
++			radeon_emit(cs, 0);
++			radeon_emit(cs, 0);
++			radeon_emit(cs, va);
++			radeon_emit(cs, va >> 32);
+ 
+ 			cmd_buffer->state.active_pipeline_gds_queries--;
+ 		}
+diff --git a/src/amd/vulkan/radv_shader.h b/src/amd/vulkan/radv_shader.h
+index 9d9491d4361..2eb3ba4e64e 100644
+--- a/src/amd/vulkan/radv_shader.h
++++ b/src/amd/vulkan/radv_shader.h
+@@ -573,9 +573,11 @@ get_tcs_num_patches(unsigned tcs_num_input_vertices,
+ 	if (chip_class >= GFX7 && family != CHIP_STONEY)
+ 		hardware_lds_size = 65536;
+ 
+-	num_patches = MIN2(num_patches, hardware_lds_size / (input_patch_size + output_patch_size));
++	if (input_patch_size + output_patch_size)
++		num_patches = MIN2(num_patches, hardware_lds_size / (input_patch_size + output_patch_size));
+ 	/* Make sure the output data fits in the offchip buffer */
+-	num_patches = MIN2(num_patches, (tess_offchip_block_dw_size * 4) / output_patch_size);
++	if (output_patch_size)
++		num_patches = MIN2(num_patches, (tess_offchip_block_dw_size * 4) / output_patch_size);
+ 	/* Not necessary for correctness, but improves performance. The
+ 	 * specific value is taken from the proprietary driver.
+ 	 */
+diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c
+index 1eef6aac70c..a6a663d97a6 100644
+--- a/src/gallium/auxiliary/cso_cache/cso_context.c
++++ b/src/gallium/auxiliary/cso_cache/cso_context.c
+@@ -402,10 +402,13 @@ void cso_destroy_context( struct cso_context *ctx )
+                                                 PIPE_SHADER_CAP_MAX_SHADER_BUFFERS);
+             int maxcb = scr->get_shader_param(scr, sh,
+                                               PIPE_SHADER_CAP_MAX_CONST_BUFFERS);
++            int maximg = scr->get_shader_param(scr, sh,
++                                              PIPE_SHADER_CAP_MAX_SHADER_IMAGES);
+             assert(maxsam <= PIPE_MAX_SAMPLERS);
+             assert(maxview <= PIPE_MAX_SHADER_SAMPLER_VIEWS);
+             assert(maxssbo <= PIPE_MAX_SHADER_BUFFERS);
+             assert(maxcb <= PIPE_MAX_CONSTANT_BUFFERS);
++            assert(maximg <= PIPE_MAX_SHADER_IMAGES);
+             if (maxsam > 0) {
+                ctx->pipe->bind_sampler_states(ctx->pipe, sh, 0, maxsam, zeros);
+             }
+@@ -415,6 +418,9 @@ void cso_destroy_context( struct cso_context *ctx )
+             if (maxssbo > 0) {
+                ctx->pipe->set_shader_buffers(ctx->pipe, sh, 0, maxssbo, ssbos, 0);
+             }
++            if (maximg > 0) {
++               ctx->pipe->set_shader_images(ctx->pipe, sh, 0, maximg, NULL);
++            }
+             for (int i = 0; i < maxcb; i++) {
+                ctx->pipe->set_constant_buffer(ctx->pipe, sh, i, NULL);
+             }
+diff --git a/src/gallium/drivers/iris/iris_program.c b/src/gallium/drivers/iris/iris_program.c
+index 8157e921850..971fc80b5ac 100644
+--- a/src/gallium/drivers/iris/iris_program.c
++++ b/src/gallium/drivers/iris/iris_program.c
+@@ -2109,8 +2109,8 @@ iris_get_scratch_space(struct iris_context *ice,
+     * in the base configuration.
+     */
+    unsigned subslice_total = screen->subslice_total;
+-   if (devinfo->gen >= 12)
+-      subslice_total = devinfo->num_subslices[0];
++   if (devinfo->gen == 12)
++      subslice_total = (devinfo->is_dg1 || devinfo->gt == 2 ? 6 : 2);
+    else if (devinfo->gen == 11)
+       subslice_total = 8;
+    else if (devinfo->gen < 11)
+diff --git a/src/gallium/drivers/iris/iris_resolve.c b/src/gallium/drivers/iris/iris_resolve.c
+index 276ad62b1dd..045f43ed8c0 100644
+--- a/src/gallium/drivers/iris/iris_resolve.c
++++ b/src/gallium/drivers/iris/iris_resolve.c
+@@ -793,7 +793,9 @@ iris_resource_set_aux_state(struct iris_context *ice,
+       if (res->aux.state[level][start_layer + a] != aux_state) {
+          res->aux.state[level][start_layer + a] = aux_state;
+          /* XXX: Need to track which bindings to make dirty */
+-         ice->state.dirty |= IRIS_DIRTY_RENDER_BUFFER;
++         ice->state.dirty |= IRIS_DIRTY_RENDER_BUFFER |
++                             IRIS_DIRTY_RENDER_RESOLVES_AND_FLUSHES |
++                             IRIS_DIRTY_COMPUTE_RESOLVES_AND_FLUSHES;
+          ice->state.stage_dirty |= IRIS_ALL_STAGE_DIRTY_BINDINGS;
+       }
+    }
+diff --git a/src/gallium/drivers/iris/iris_resource.c b/src/gallium/drivers/iris/iris_resource.c
+index 8747ef4aa8a..3b34e32cd21 100644
+--- a/src/gallium/drivers/iris/iris_resource.c
++++ b/src/gallium/drivers/iris/iris_resource.c
+@@ -1125,6 +1125,20 @@ iris_flush_resource(struct pipe_context *ctx, struct pipe_resource *resource)
+                                 0, INTEL_REMAINING_LAYERS,
+                                 mod ? mod->aux_usage : ISL_AUX_USAGE_NONE,
+                                 mod ? mod->supports_clear_color : false);
++
++   if (!res->mod_info && res->aux.usage != ISL_AUX_USAGE_NONE) {
++      /* flush_resource may be used to prepare an image for sharing external
++       * to the driver (e.g. via eglCreateImage). To account for this, make
++       * sure to get rid of any compression that a consumer wouldn't know how
++       * to handle.
++       */
++      for (int i = 0; i < IRIS_BATCH_COUNT; i++) {
++         if (iris_batch_references(&ice->batches[i], res->bo))
++            iris_batch_flush(&ice->batches[i]);
++      }
++
++      iris_resource_disable_aux(res);
++   }
+ }
+ 
+ static void
+diff --git a/src/gallium/drivers/iris/iris_state.c b/src/gallium/drivers/iris/iris_state.c
+index 59a63f7bbab..b9ddb863a16 100644
+--- a/src/gallium/drivers/iris/iris_state.c
++++ b/src/gallium/drivers/iris/iris_state.c
+@@ -1666,6 +1666,8 @@ struct iris_rasterizer_state {
+    bool multisample;
+    bool force_persample_interp;
+    bool conservative_rasterization;
++   bool fill_mode_point;
++   bool fill_mode_line;
+    bool fill_mode_point_or_line;
+    enum pipe_sprite_coord_mode sprite_coord_mode; /* PIPE_SPRITE_* */
+    uint16_t sprite_coord_enable;
+@@ -1729,11 +1731,15 @@ iris_create_rasterizer_state(struct pipe_context *ctx,
+    cso->conservative_rasterization =
+       state->conservative_raster_mode == PIPE_CONSERVATIVE_RASTER_POST_SNAP;
+ 
+-   cso->fill_mode_point_or_line =
+-      state->fill_front == PIPE_POLYGON_MODE_LINE ||
++   cso->fill_mode_point =
+       state->fill_front == PIPE_POLYGON_MODE_POINT ||
+-      state->fill_back == PIPE_POLYGON_MODE_LINE ||
+       state->fill_back == PIPE_POLYGON_MODE_POINT;
++   cso->fill_mode_line =
++      state->fill_front == PIPE_POLYGON_MODE_LINE ||
++      state->fill_back == PIPE_POLYGON_MODE_LINE;
++   cso->fill_mode_point_or_line =
++      cso->fill_mode_point ||
++      cso->fill_mode_line;
+ 
+    if (state->clip_plane_enable != 0)
+       cso->num_clip_plane_consts = util_logbase2(state->clip_plane_enable) + 1;
+@@ -4059,6 +4065,28 @@ iris_emit_sbe_swiz(struct iris_batch *batch,
+    }
+ }
+ 
++static bool
++iris_is_drawing_points(const struct iris_context *ice)
++{
++   const struct iris_rasterizer_state *cso_rast = ice->state.cso_rast;
++
++   if (cso_rast->fill_mode_point) {
++      return true;
++   }
++
++   if (ice->shaders.prog[MESA_SHADER_GEOMETRY]) {
++      const struct brw_gs_prog_data *gs_prog_data =
++         (void *) ice->shaders.prog[MESA_SHADER_GEOMETRY]->prog_data;
++      return gs_prog_data->output_topology == _3DPRIM_POINTLIST;
++   } else if (ice->shaders.prog[MESA_SHADER_TESS_EVAL]) {
++      const struct brw_tes_prog_data *tes_data =
++         (void *) ice->shaders.prog[MESA_SHADER_TESS_EVAL]->prog_data;
++      return tes_data->output_topology == BRW_TESS_OUTPUT_TOPOLOGY_POINT;
++   } else {
++      return ice->state.prim_mode == PIPE_PRIM_POINTS;
++   }
++}
++
+ static unsigned
+ iris_calculate_point_sprite_overrides(const struct brw_wm_prog_data *prog_data,
+                                       const struct iris_rasterizer_state *cso)
+@@ -4093,7 +4121,8 @@ iris_emit_sbe(struct iris_batch *batch, const struct iris_context *ice)
+                                       &urb_read_offset, &urb_read_length);
+ 
+    unsigned sprite_coord_overrides =
+-      iris_calculate_point_sprite_overrides(wm_prog_data, cso_rast);
++      iris_is_drawing_points(ice) ?
++      iris_calculate_point_sprite_overrides(wm_prog_data, cso_rast) : 0;
+ 
+    iris_emit_cmd(batch, GENX(3DSTATE_SBE), sbe) {
+       sbe.AttributeSwizzleEnable = true;
+diff --git a/src/gallium/drivers/radeonsi/si_descriptors.c b/src/gallium/drivers/radeonsi/si_descriptors.c
+index 8f688fa3650..ef35f86b05f 100644
+--- a/src/gallium/drivers/radeonsi/si_descriptors.c
++++ b/src/gallium/drivers/radeonsi/si_descriptors.c
+@@ -1482,11 +1482,12 @@ void si_update_needs_color_decompress_masks(struct si_context *sctx)
+ /* Reset descriptors of buffer resources after \p buf has been invalidated.
+  * If buf == NULL, reset all descriptors.
+  */
+-static void si_reset_buffer_resources(struct si_context *sctx, struct si_buffer_resources *buffers,
++static bool si_reset_buffer_resources(struct si_context *sctx, struct si_buffer_resources *buffers,
+                                       unsigned descriptors_idx, uint64_t slot_mask,
+                                       struct pipe_resource *buf, enum radeon_bo_priority priority)
+ {
+    struct si_descriptors *descs = &sctx->descriptors[descriptors_idx];
++   bool noop = true;
+    uint64_t mask = buffers->enabled_mask & slot_mask;
+ 
+    while (mask) {
+@@ -1501,8 +1502,10 @@ static void si_reset_buffer_resources(struct si_context *sctx, struct si_buffer_
+             sctx, si_resource(buffer),
+             buffers->writable_mask & (1llu << i) ? RADEON_USAGE_READWRITE : RADEON_USAGE_READ,
+             priority, true);
++         noop = false;
+       }
+    }
++   return !noop;
+ }
+ 
+ /* Update all buffer bindings where the buffer is bound, including
+@@ -1577,11 +1580,15 @@ void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf)
+    }
+ 
+    if (!buffer || buffer->bind_history & PIPE_BIND_SHADER_BUFFER) {
+-      for (shader = 0; shader < SI_NUM_SHADERS; shader++)
+-         si_reset_buffer_resources(sctx, &sctx->const_and_shader_buffers[shader],
+-                                   si_const_and_shader_buffer_descriptors_idx(shader),
+-                                   u_bit_consecutive64(0, SI_NUM_SHADER_BUFFERS), buf,
+-                                   sctx->const_and_shader_buffers[shader].priority);
++      for (shader = 0; shader < SI_NUM_SHADERS; shader++) {
++         if (si_reset_buffer_resources(sctx, &sctx->const_and_shader_buffers[shader],
++                                       si_const_and_shader_buffer_descriptors_idx(shader),
++                                       u_bit_consecutive64(0, SI_NUM_SHADER_BUFFERS), buf,
++                                       sctx->const_and_shader_buffers[shader].priority) &&
++             shader == PIPE_SHADER_COMPUTE) {
++            sctx->compute_shaderbuf_sgprs_dirty = true;
++         }
++      }
+    }
+ 
+    if (!buffer || buffer->bind_history & PIPE_BIND_SAMPLER_VIEW) {
+@@ -1633,6 +1640,9 @@ void si_rebind_buffer(struct si_context *sctx, struct pipe_resource *buf)
+                radeon_add_to_gfx_buffer_list_check_mem(sctx, si_resource(buffer),
+                                                        RADEON_USAGE_READWRITE,
+                                                        RADEON_PRIO_SAMPLER_BUFFER, true);
++
++               if (shader == PIPE_SHADER_COMPUTE)
++                  sctx->compute_image_sgprs_dirty = true;
+             }
+          }
+       }
+diff --git a/src/gallium/frontends/dri/dri_helpers.c b/src/gallium/frontends/dri/dri_helpers.c
+index 01a1fb3d96c..5e87df35a55 100644
+--- a/src/gallium/frontends/dri/dri_helpers.c
++++ b/src/gallium/frontends/dri/dri_helpers.c
+@@ -258,7 +258,9 @@ dri2_create_image_from_renderbuffer2(__DRIcontext *context,
+ 				     int renderbuffer, void *loaderPrivate,
+                                      unsigned *error)
+ {
+-   struct gl_context *ctx = ((struct st_context *)dri_context(context)->st)->ctx;
++   struct st_context *st_ctx = (struct st_context *)dri_context(context)->st;
++   struct gl_context *ctx = st_ctx->ctx;
++   struct pipe_context *p_ctx = st_ctx->pipe;
+    struct gl_renderbuffer *rb;
+    struct pipe_resource *tex;
+    __DRIimage *img;
+@@ -299,6 +301,13 @@ dri2_create_image_from_renderbuffer2(__DRIcontext *context,
+ 
+    pipe_resource_reference(&img->texture, tex);
+ 
++   /* If the resource supports EGL_MESA_image_dma_buf_export, make sure that
++    * it's in a shareable state. Do this now while we still have the access to
++    * the context.
++    */
++   if (dri2_get_mapping_by_format(img->dri_format))
++      p_ctx->flush_resource(p_ctx, tex);
++
+    *error = __DRI_IMAGE_ERROR_SUCCESS;
+    return img;
+ }
+@@ -326,7 +335,9 @@ dri2_create_from_texture(__DRIcontext *context, int target, unsigned texture,
+                          void *loaderPrivate)
+ {
+    __DRIimage *img;
+-   struct gl_context *ctx = ((struct st_context *)dri_context(context)->st)->ctx;
++   struct st_context *st_ctx = (struct st_context *)dri_context(context)->st;
++   struct gl_context *ctx = st_ctx->ctx;
++   struct pipe_context *p_ctx = st_ctx->pipe;
+    struct gl_texture_object *obj;
+    struct pipe_resource *tex;
+    GLuint face = 0;
+@@ -376,6 +387,13 @@ dri2_create_from_texture(__DRIcontext *context, int target, unsigned texture,
+ 
+    pipe_resource_reference(&img->texture, tex);
+ 
++   /* If the resource supports EGL_MESA_image_dma_buf_export, make sure that
++    * it's in a shareable state. Do this now while we still have the access to
++    * the context.
++    */
++   if (dri2_get_mapping_by_format(img->dri_format))
++      p_ctx->flush_resource(p_ctx, tex);
++
+    *error = __DRI_IMAGE_ERROR_SUCCESS;
+    return img;
+ }
+@@ -547,6 +565,9 @@ dri2_get_mapping_by_fourcc(int fourcc)
+ const struct dri2_format_mapping *
+ dri2_get_mapping_by_format(int format)
+ {
++   if (format == __DRI_IMAGE_FORMAT_NONE)
++      return NULL;
++
+    for (unsigned i = 0; i < ARRAY_SIZE(dri2_format_table); i++) {
+       if (dri2_format_table[i].dri_format == format)
+          return &dri2_format_table[i];
+diff --git a/src/gallium/frontends/lavapipe/lvp_device.c b/src/gallium/frontends/lavapipe/lvp_device.c
+index 45734f95880..187aecde1f8 100644
+--- a/src/gallium/frontends/lavapipe/lvp_device.c
++++ b/src/gallium/frontends/lavapipe/lvp_device.c
+@@ -52,8 +52,6 @@ lvp_physical_device_init(struct lvp_physical_device *device,
+    if (!device->pscreen)
+       return vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
+ 
+-   fprintf(stderr, "WARNING: lavapipe is not a conformant vulkan implementation, testing use only.\n");
+-
+    device->max_images = device->pscreen->get_shader_param(device->pscreen, PIPE_SHADER_FRAGMENT, PIPE_SHADER_CAP_MAX_SHADER_IMAGES);
+    lvp_physical_device_get_supported_extensions(device, &device->supported_extensions);
+    result = lvp_init_wsi(device);
+@@ -575,6 +573,19 @@ void lvp_GetPhysicalDeviceProperties2(
+    }
+ }
+ 
++static void lvp_get_physical_device_queue_family_properties(
++   VkQueueFamilyProperties*                    pQueueFamilyProperties)
++{
++   *pQueueFamilyProperties = (VkQueueFamilyProperties) {
++      .queueFlags = VK_QUEUE_GRAPHICS_BIT |
++      VK_QUEUE_COMPUTE_BIT |
++      VK_QUEUE_TRANSFER_BIT,
++      .queueCount = 1,
++      .timestampValidBits = 64,
++      .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
++   };
++}
++
+ void lvp_GetPhysicalDeviceQueueFamilyProperties(
+    VkPhysicalDevice                            physicalDevice,
+    uint32_t*                                   pCount,
+@@ -586,15 +597,21 @@ void lvp_GetPhysicalDeviceQueueFamilyProperties(
+    }
+ 
+    assert(*pCount >= 1);
++   lvp_get_physical_device_queue_family_properties(pQueueFamilyProperties);
++}
+ 
+-   *pQueueFamilyProperties = (VkQueueFamilyProperties) {
+-      .queueFlags = VK_QUEUE_GRAPHICS_BIT |
+-      VK_QUEUE_COMPUTE_BIT |
+-      VK_QUEUE_TRANSFER_BIT,
+-      .queueCount = 1,
+-      .timestampValidBits = 64,
+-      .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
+-   };
++void lvp_GetPhysicalDeviceQueueFamilyProperties2(
++   VkPhysicalDevice                            physicalDevice,
++   uint32_t*                                   pCount,
++   VkQueueFamilyProperties2                   *pQueueFamilyProperties)
++{
++   if (pQueueFamilyProperties == NULL) {
++      *pCount = 1;
++      return;
++   }
++
++   assert(*pCount >= 1);
++   lvp_get_physical_device_queue_family_properties(&pQueueFamilyProperties->queueFamilyProperties);
+ }
+ 
+ void lvp_GetPhysicalDeviceMemoryProperties(
+@@ -617,6 +634,14 @@ void lvp_GetPhysicalDeviceMemoryProperties(
+    };
+ }
+ 
++void lvp_GetPhysicalDeviceMemoryProperties2(
++   VkPhysicalDevice                            physicalDevice,
++   VkPhysicalDeviceMemoryProperties2          *pMemoryProperties)
++{
++   lvp_GetPhysicalDeviceMemoryProperties(physicalDevice,
++                                         &pMemoryProperties->memoryProperties);
++}
++
+ PFN_vkVoidFunction lvp_GetInstanceProcAddr(
+    VkInstance                                  _instance,
+    const char*                                 pName)
+@@ -822,6 +847,8 @@ VkResult lvp_CreateDevice(
+    const VkAllocationCallbacks*                pAllocator,
+    VkDevice*                                   pDevice)
+ {
++   fprintf(stderr, "WARNING: lavapipe is not a conformant vulkan implementation, testing use only.\n");
++
+    LVP_FROM_HANDLE(lvp_physical_device, physical_device, physicalDevice);
+    struct lvp_device *device;
+ 
+diff --git a/src/glx/g_glxglvnddispatchfuncs.c b/src/glx/g_glxglvnddispatchfuncs.c
+index 0f02ed2d321..e0ea27c0b18 100644
+--- a/src/glx/g_glxglvnddispatchfuncs.c
++++ b/src/glx/g_glxglvnddispatchfuncs.c
+@@ -87,6 +87,7 @@ const char * const __glXDispatchTableStrings[DI_LAST_INDEX] = {
+     __ATTRIB(SelectEventSGIX),
+     // glXSwapBuffers implemented by libglvnd
+     __ATTRIB(SwapBuffersMscOML),
++    __ATTRIB(SwapIntervalEXT),
+     __ATTRIB(SwapIntervalMESA),
+     __ATTRIB(SwapIntervalSGI),
+     // glXUseXFont implemented by libglvnd
+@@ -893,6 +894,24 @@ static int dispatch_SwapIntervalMESA(unsigned int interval)
+ 
+ 
+ 
++static void dispatch_SwapIntervalEXT(Display *dpy, GLXDrawable drawable, int interval)
++{
++    PFNGLXSWAPINTERVALEXTPROC pSwapIntervalEXT;
++    __GLXvendorInfo *dd;
++
++    dd = GetDispatchFromDrawable(dpy, drawable);
++    if (dd == NULL)
++        return;
++
++    __FETCH_FUNCTION_PTR(SwapIntervalEXT);
++    if (pSwapIntervalEXT == NULL)
++        return;
++
++    pSwapIntervalEXT(dpy, drawable, interval);
++}
++
++
++
+ static Bool dispatch_WaitForMscOML(Display *dpy, GLXDrawable drawable,
+                                       int64_t target_msc, int64_t divisor,
+                                       int64_t remainder, int64_t *ust,
+@@ -974,6 +993,7 @@ const void * const __glXDispatchFunctions[DI_LAST_INDEX + 1] = {
+     __ATTRIB(ReleaseTexImageEXT),
+     __ATTRIB(SelectEventSGIX),
+     __ATTRIB(SwapBuffersMscOML),
++    __ATTRIB(SwapIntervalEXT),
+     __ATTRIB(SwapIntervalMESA),
+     __ATTRIB(SwapIntervalSGI),
+     __ATTRIB(WaitForMscOML),
+diff --git a/src/glx/g_glxglvnddispatchindices.h b/src/glx/g_glxglvnddispatchindices.h
+index 3ba50a74abb..b65d078098f 100644
+--- a/src/glx/g_glxglvnddispatchindices.h
++++ b/src/glx/g_glxglvnddispatchindices.h
+@@ -79,6 +79,7 @@ typedef enum __GLXdispatchIndex {
+     DI_SelectEventSGIX,
+     // SwapBuffers implemented by libglvnd
+     DI_SwapBuffersMscOML,
++    DI_SwapIntervalEXT,
+     DI_SwapIntervalMESA,
+     DI_SwapIntervalSGI,
+     // UseXFont implemented by libglvnd
+diff --git a/src/intel/common/gen_mi_builder.h b/src/intel/common/gen_mi_builder.h
+index ddd8459ef07..47fb98e99f7 100644
+--- a/src/intel/common/gen_mi_builder.h
++++ b/src/intel/common/gen_mi_builder.h
+@@ -932,6 +932,13 @@ gen_mi_store_address(struct gen_mi_builder *b,
+ static inline void
+ gen_mi_self_mod_barrier(struct gen_mi_builder *b)
+ {
++   /* First make sure all the memory writes from previous modifying commands
++    * have landed. We want to do this before going through the CS cache,
++    * otherwise we could be fetching memory that hasn't been written to yet.
++    */
++   gen_mi_builder_emit(b, GENX(PIPE_CONTROL), pc) {
++      pc.CommandStreamerStallEnable = true;
++   }
+    /* Documentation says Gen11+ should be able to invalidate the command cache
+     * but experiment show it doesn't work properly, so for now just get over
+     * the CS prefetch.
+diff --git a/src/intel/compiler/brw_fs_copy_propagation.cpp b/src/intel/compiler/brw_fs_copy_propagation.cpp
+index 917c3abfe9e..6896987055f 100644
+--- a/src/intel/compiler/brw_fs_copy_propagation.cpp
++++ b/src/intel/compiler/brw_fs_copy_propagation.cpp
+@@ -437,6 +437,7 @@ instruction_requires_packed_data(fs_inst *inst)
+    case FS_OPCODE_DDX_COARSE:
+    case FS_OPCODE_DDY_FINE:
+    case FS_OPCODE_DDY_COARSE:
++   case SHADER_OPCODE_QUAD_SWIZZLE:
+       return true;
+    default:
+       return false;
+diff --git a/src/intel/compiler/brw_ir_fs.h b/src/intel/compiler/brw_ir_fs.h
+index 6ba3a6ca97e..3a4acc1834a 100644
+--- a/src/intel/compiler/brw_ir_fs.h
++++ b/src/intel/compiler/brw_ir_fs.h
+@@ -451,13 +451,15 @@ regs_written(const fs_inst *inst)
+  * Return the number of dataflow registers read by the instruction (either
+  * fully or partially) counted from 'floor(reg_offset(inst->src[i]) /
+  * register_size)'.  The somewhat arbitrary register size unit is 4B for the
+- * UNIFORM and IMM files and 32B for all other files.
++ * UNIFORM files and 32B for all other files.
+  */
+ inline unsigned
+ regs_read(const fs_inst *inst, unsigned i)
+ {
+-   const unsigned reg_size =
+-      inst->src[i].file == UNIFORM || inst->src[i].file == IMM ? 4 : REG_SIZE;
++   if (inst->src[i].file == IMM)
++      return 1;
++
++   const unsigned reg_size = inst->src[i].file == UNIFORM ? 4 : REG_SIZE;
+    return DIV_ROUND_UP(reg_offset(inst->src[i]) % reg_size +
+                        inst->size_read(i) -
+                        MIN2(inst->size_read(i), reg_padding(inst->src[i])),
+diff --git a/src/intel/vulkan/anv_allocator.c b/src/intel/vulkan/anv_allocator.c
+index 9007cd00e85..48811912e95 100644
+--- a/src/intel/vulkan/anv_allocator.c
++++ b/src/intel/vulkan/anv_allocator.c
+@@ -1447,8 +1447,8 @@ anv_scratch_pool_alloc(struct anv_device *device, struct anv_scratch_pool *pool,
+     * For, Gen11+, scratch space allocation is based on the number of threads
+     * in the base configuration.
+     */
+-   if (devinfo->gen >= 12)
+-      subslices = devinfo->num_subslices[0];
++   if (devinfo->gen == 12)
++      subslices = (devinfo->is_dg1 || devinfo->gt == 2 ? 6 : 2);
+    else if (devinfo->gen == 11)
+       subslices = 8;
+    else if (devinfo->gen >= 9)
+diff --git a/src/intel/vulkan/anv_image.c b/src/intel/vulkan/anv_image.c
+index 0290431f145..80307cd612f 100644
+--- a/src/intel/vulkan/anv_image.c
++++ b/src/intel/vulkan/anv_image.c
+@@ -684,6 +684,25 @@ choose_drm_format_mod(const struct anv_physical_device *device,
+       return NULL;
+ }
+ 
++static VkImageUsageFlags
++anv_image_create_usage(const VkImageCreateInfo *pCreateInfo,
++                       VkImageUsageFlags usage)
++{
++   /* Add TRANSFER_SRC usage for multisample attachment images. This is
++    * because we might internally use the TRANSFER_SRC layout on them for
++    * blorp operations associated with resolving those into other attachments
++    * at the end of a subpass.
++    *
++    * Without this additional usage, we compute an incorrect AUX state in
++    * anv_layout_to_aux_state().
++    */
++   if (pCreateInfo->samples > VK_SAMPLE_COUNT_1_BIT &&
++       (usage & (VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT |
++                 VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT)))
++      usage |= VK_IMAGE_USAGE_TRANSFER_SRC_BIT;
++   return usage;
++}
++
+ VkResult
+ anv_image_create(VkDevice _device,
+                  const struct anv_image_create_info *create_info,
+@@ -732,7 +751,7 @@ anv_image_create(VkDevice _device,
+    image->levels = pCreateInfo->mipLevels;
+    image->array_size = pCreateInfo->arrayLayers;
+    image->samples = pCreateInfo->samples;
+-   image->usage = pCreateInfo->usage;
++   image->usage = anv_image_create_usage(pCreateInfo, pCreateInfo->usage);
+    image->create_flags = pCreateInfo->flags;
+    image->tiling = pCreateInfo->tiling;
+    image->disjoint = pCreateInfo->flags & VK_IMAGE_CREATE_DISJOINT_BIT;
+@@ -745,8 +764,11 @@ anv_image_create(VkDevice _device,
+       const VkImageStencilUsageCreateInfoEXT *stencil_usage_info =
+          vk_find_struct_const(pCreateInfo->pNext,
+                               IMAGE_STENCIL_USAGE_CREATE_INFO_EXT);
+-      if (stencil_usage_info)
+-         image->stencil_usage = stencil_usage_info->stencilUsage;
++      if (stencil_usage_info) {
++         image->stencil_usage =
++            anv_image_create_usage(pCreateInfo,
++                                   stencil_usage_info->stencilUsage);
++      }
+    }
+ 
+    /* In case of external format, We don't know format yet,
+diff --git a/src/intel/vulkan/anv_pass.c b/src/intel/vulkan/anv_pass.c
+index af23b87969d..1818f6c587b 100644
+--- a/src/intel/vulkan/anv_pass.c
++++ b/src/intel/vulkan/anv_pass.c
+@@ -23,6 +23,7 @@
+ 
+ #include "anv_private.h"
+ 
++#include "vk_format_info.h"
+ #include "vk_util.h"
+ 
+ static void
+@@ -406,6 +407,70 @@ num_subpass_attachments2(const VkSubpassDescription2KHR *desc)
+           (ds_resolve && ds_resolve->pDepthStencilResolveAttachment);
+ }
+ 
++static bool
++vk_image_layout_depth_only(VkImageLayout layout)
++{
++   switch (layout) {
++   case VK_IMAGE_LAYOUT_DEPTH_READ_ONLY_OPTIMAL:
++   case VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL:
++      return true;
++
++   default:
++      return false;
++   }
++}
++
++/* From the Vulkan Specification 1.2.166 - VkAttachmentReference2:
++ *
++ *   "If layout only specifies the layout of the depth aspect of the
++ *    attachment, the layout of the stencil aspect is specified by the
++ *    stencilLayout member of a VkAttachmentReferenceStencilLayout structure
++ *    included in the pNext chain. Otherwise, layout describes the layout for
++ *    all relevant image aspects."
++ */
++static VkImageLayout
++stencil_ref_layout(const VkAttachmentReference2KHR *att_ref)
++{
++   if (!vk_image_layout_depth_only(att_ref->layout))
++      return att_ref->layout;
++
++   const VkAttachmentReferenceStencilLayoutKHR *stencil_ref =
++      vk_find_struct_const(att_ref->pNext,
++                           ATTACHMENT_REFERENCE_STENCIL_LAYOUT_KHR);
++   if (!stencil_ref)
++      return VK_IMAGE_LAYOUT_UNDEFINED;
++   return stencil_ref->stencilLayout;
++}
++
++/* From the Vulkan Specification 1.2.166 - VkAttachmentDescription2:
++ *
++ *   "If format is a depth/stencil format, and initialLayout only specifies
++ *    the initial layout of the depth aspect of the attachment, the initial
++ *    layout of the stencil aspect is specified by the stencilInitialLayout
++ *    member of a VkAttachmentDescriptionStencilLayout structure included in
++ *    the pNext chain. Otherwise, initialLayout describes the initial layout
++ *    for all relevant image aspects."
++ */
++static VkImageLayout
++stencil_desc_layout(const VkAttachmentDescription2KHR *att_desc, bool final)
++{
++   if (!vk_format_has_stencil(att_desc->format))
++      return VK_IMAGE_LAYOUT_UNDEFINED;
++
++   const VkImageLayout main_layout =
++      final ? att_desc->finalLayout : att_desc->initialLayout;
++   if (!vk_image_layout_depth_only(main_layout))
++      return main_layout;
++
++   const VkAttachmentDescriptionStencilLayoutKHR *stencil_desc =
++      vk_find_struct_const(att_desc->pNext,
++                           ATTACHMENT_DESCRIPTION_STENCIL_LAYOUT_KHR);
++   assert(stencil_desc);
++   return final ?
++      stencil_desc->stencilFinalLayout :
++      stencil_desc->stencilInitialLayout;
++}
++
+ VkResult anv_CreateRenderPass2(
+     VkDevice                                    _device,
+     const VkRenderPassCreateInfo2KHR*           pCreateInfo,
+@@ -450,10 +515,6 @@ VkResult anv_CreateRenderPass2(
+    pass->subpass_flushes = subpass_flushes;
+ 
+    for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
+-      const VkAttachmentDescriptionStencilLayoutKHR *stencil_layout =
+-         vk_find_struct_const(pCreateInfo->pAttachments[i].pNext,
+-                              ATTACHMENT_DESCRIPTION_STENCIL_LAYOUT_KHR);
+-
+       pass->attachments[i] = (struct anv_render_pass_attachment) {
+          .format                 = pCreateInfo->pAttachments[i].format,
+          .samples                = pCreateInfo->pAttachments[i].samples,
+@@ -463,12 +524,10 @@ VkResult anv_CreateRenderPass2(
+          .initial_layout         = pCreateInfo->pAttachments[i].initialLayout,
+          .final_layout           = pCreateInfo->pAttachments[i].finalLayout,
+ 
+-         .stencil_initial_layout = (stencil_layout ?
+-                                    stencil_layout->stencilInitialLayout :
+-                                    pCreateInfo->pAttachments[i].initialLayout),
+-         .stencil_final_layout   = (stencil_layout ?
+-                                    stencil_layout->stencilFinalLayout :
+-                                    pCreateInfo->pAttachments[i].finalLayout),
++         .stencil_initial_layout = stencil_desc_layout(&pCreateInfo->pAttachments[i],
++                                                       false),
++         .stencil_final_layout   = stencil_desc_layout(&pCreateInfo->pAttachments[i],
++                                                       true),
+       };
+    }
+ 
+@@ -487,17 +546,11 @@ VkResult anv_CreateRenderPass2(
+          subpass_attachments += desc->inputAttachmentCount;
+ 
+          for (uint32_t j = 0; j < desc->inputAttachmentCount; j++) {
+-            const VkAttachmentReferenceStencilLayoutKHR *stencil_layout =
+-               vk_find_struct_const(desc->pInputAttachments[j].pNext,
+-                                    ATTACHMENT_REFERENCE_STENCIL_LAYOUT_KHR);
+-
+             subpass->input_attachments[j] = (struct anv_subpass_attachment) {
+                .usage =          VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT,
+                .attachment =     desc->pInputAttachments[j].attachment,
+                .layout =         desc->pInputAttachments[j].layout,
+-               .stencil_layout = (stencil_layout ?
+-                                  stencil_layout->stencilLayout :
+-                                  desc->pInputAttachments[j].layout),
++               .stencil_layout = stencil_ref_layout(&desc->pInputAttachments[j]),
+             };
+          }
+       }
+@@ -531,17 +584,11 @@ VkResult anv_CreateRenderPass2(
+       if (desc->pDepthStencilAttachment) {
+          subpass->depth_stencil_attachment = subpass_attachments++;
+ 
+-         const VkAttachmentReferenceStencilLayoutKHR *stencil_attachment =
+-            vk_find_struct_const(desc->pDepthStencilAttachment->pNext,
+-                                 ATTACHMENT_REFERENCE_STENCIL_LAYOUT_KHR);
+-
+          *subpass->depth_stencil_attachment = (struct anv_subpass_attachment) {
+             .usage =          VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT,
+             .attachment =     desc->pDepthStencilAttachment->attachment,
+             .layout =         desc->pDepthStencilAttachment->layout,
+-            .stencil_layout = stencil_attachment ?
+-                              stencil_attachment->stencilLayout :
+-                              desc->pDepthStencilAttachment->layout,
++            .stencil_layout = stencil_ref_layout(desc->pDepthStencilAttachment),
+          };
+       }
+ 
+@@ -552,17 +599,11 @@ VkResult anv_CreateRenderPass2(
+       if (ds_resolve && ds_resolve->pDepthStencilResolveAttachment) {
+          subpass->ds_resolve_attachment = subpass_attachments++;
+ 
+-         const VkAttachmentReferenceStencilLayoutKHR *stencil_resolve_attachment =
+-            vk_find_struct_const(ds_resolve->pDepthStencilResolveAttachment->pNext,
+-                                 ATTACHMENT_REFERENCE_STENCIL_LAYOUT_KHR);
+-
+          *subpass->ds_resolve_attachment = (struct anv_subpass_attachment) {
+             .usage =          VK_IMAGE_USAGE_TRANSFER_DST_BIT,
+             .attachment =     ds_resolve->pDepthStencilResolveAttachment->attachment,
+             .layout =         ds_resolve->pDepthStencilResolveAttachment->layout,
+-            .stencil_layout = stencil_resolve_attachment ?
+-                              stencil_resolve_attachment->stencilLayout :
+-                              ds_resolve->pDepthStencilResolveAttachment->layout,
++            .stencil_layout = stencil_ref_layout(ds_resolve->pDepthStencilResolveAttachment),
+          };
+          subpass->depth_resolve_mode = ds_resolve->depthResolveMode;
+          subpass->stencil_resolve_mode = ds_resolve->stencilResolveMode;
+diff --git a/src/intel/vulkan/genX_cmd_buffer.c b/src/intel/vulkan/genX_cmd_buffer.c
+index a9c49e0f592..e3eb376fa5a 100644
+--- a/src/intel/vulkan/genX_cmd_buffer.c
++++ b/src/intel/vulkan/genX_cmd_buffer.c
+@@ -462,8 +462,10 @@ anv_image_init_aux_tt(struct anv_cmd_buffer *cmd_buffer,
+ {
+    uint32_t plane = anv_image_aspect_to_plane(image->aspects, aspect);
+ 
++   const struct anv_surface *surface = &image->planes[plane].surface;
+    uint64_t base_address =
+-      anv_address_physical(image->planes[plane].address);
++      anv_address_physical(anv_address_add(image->planes[plane].address,
++                                           surface->offset));
+ 
+    const struct isl_surf *isl_surf = &image->planes[plane].surface.isl;
+    uint64_t format_bits = gen_aux_map_format_bits_for_isl_surf(isl_surf);
+@@ -1231,6 +1233,17 @@ transition_color_buffer(struct anv_cmd_buffer *cmd_buffer,
+             uint32_t level_layer_count =
+                MIN2(layer_count, aux_layers - base_layer);
+ 
++            /* If will_full_fast_clear is set, the caller promises to
++             * fast-clear the largest portion of the specified range as it can.
++             * For color images, that means only the first LOD and array slice.
++             */
++            if (level == 0 && base_layer == 0 && will_full_fast_clear) {
++               base_layer++;
++               level_layer_count--;
++               if (level_layer_count == 0)
++                  continue;
++            }
++
+             anv_image_ccs_op(cmd_buffer, image,
+                              image->planes[plane].surface.isl.format,
+                              ISL_SWIZZLE_IDENTITY,
+@@ -1250,6 +1263,12 @@ transition_color_buffer(struct anv_cmd_buffer *cmd_buffer,
+                           "define an MCS buffer.");
+          }
+ 
++         /* If will_full_fast_clear is set, the caller promises to fast-clear
++          * the largest portion of the specified range as it can.
++          */
++         if (will_full_fast_clear)
++            return;
++
+          assert(base_level == 0 && level_count == 1);
+          anv_image_mcs_op(cmd_buffer, image,
+                           image->planes[plane].surface.isl.format,
+diff --git a/src/intel/vulkan/genX_pipeline.c b/src/intel/vulkan/genX_pipeline.c
+index 205e8677f19..33f071019b7 100644
+--- a/src/intel/vulkan/genX_pipeline.c
++++ b/src/intel/vulkan/genX_pipeline.c
+@@ -1180,7 +1180,22 @@ emit_cb_state(struct anv_graphics_pipeline *pipeline,
+ #endif
+          .LogicOpEnable = info->logicOpEnable,
+          .LogicOpFunction = vk_to_gen_logic_op[info->logicOp],
+-         .ColorBufferBlendEnable = a->blendEnable,
++         /* Vulkan specification 1.2.168, VkLogicOp:
++          *
++          *   "Logical operations are controlled by the logicOpEnable and
++          *    logicOp members of VkPipelineColorBlendStateCreateInfo. If
++          *    logicOpEnable is VK_TRUE, then a logical operation selected by
++          *    logicOp is applied between each color attachment and the
++          *    fragment’s corresponding output value, and blending of all
++          *    attachments is treated as if it were disabled."
++          *
++          * From the Broadwell PRM Volume 2d: Command Reference: Structures:
++          * BLEND_STATE_ENTRY:
++          *
++          *   "Enabling LogicOp and Color Buffer Blending at the same time is
++          *    UNDEFINED"
++          */
++         .ColorBufferBlendEnable = !info->logicOpEnable && a->blendEnable,
+          .ColorClampRange = COLORCLAMP_RTFORMAT,
+          .PreBlendColorClampEnable = true,
+          .PostBlendColorClampEnable = true,
+diff --git a/src/intel/vulkan/vk_format_info.h b/src/intel/vulkan/vk_format_info.h
+index 006e1f4a6ad..4e72c244742 100644
+--- a/src/intel/vulkan/vk_format_info.h
++++ b/src/intel/vulkan/vk_format_info.h
+@@ -164,4 +164,11 @@ vk_format_has_depth(VkFormat format)
+    return aspects & VK_IMAGE_ASPECT_DEPTH_BIT;
+ }
+ 
++static inline bool
++vk_format_has_stencil(VkFormat format)
++{
++   const VkImageAspectFlags aspects = vk_format_aspects(format);
++   return aspects & VK_IMAGE_ASPECT_STENCIL_BIT;
++}
++
+ #endif /* VK_FORMAT_INFO_H */
+diff --git a/src/mesa/state_tracker/st_pbo.c b/src/mesa/state_tracker/st_pbo.c
+index 65a1ce8862a..b03921c1be6 100644
+--- a/src/mesa/state_tracker/st_pbo.c
++++ b/src/mesa/state_tracker/st_pbo.c
+@@ -431,16 +431,21 @@ create_fs(struct st_context *st, bool download,
+    nir_ssa_def *coord = nir_load_var(&b, fragcoord);
+ 
+    nir_ssa_def *layer = NULL;
+-   if (st->pbo.layers && need_layer && (!download || target == PIPE_TEXTURE_1D_ARRAY ||
+-                                                     target == PIPE_TEXTURE_2D_ARRAY ||
+-                                                     target == PIPE_TEXTURE_3D ||
+-                                                     target == PIPE_TEXTURE_CUBE ||
+-                                                     target == PIPE_TEXTURE_CUBE_ARRAY)) {
+-      nir_variable *var = nir_variable_create(b.shader, nir_var_shader_in,
+-                                              glsl_int_type(), "gl_Layer");
+-      var->data.location = VARYING_SLOT_LAYER;
+-      var->data.interpolation = INTERP_MODE_FLAT;
+-      layer = nir_load_var(&b, var);
++   if (st->pbo.layers && (!download || target == PIPE_TEXTURE_1D_ARRAY ||
++                                       target == PIPE_TEXTURE_2D_ARRAY ||
++                                       target == PIPE_TEXTURE_3D ||
++                                       target == PIPE_TEXTURE_CUBE ||
++                                       target == PIPE_TEXTURE_CUBE_ARRAY)) {
++      if (need_layer) {
++         nir_variable *var = nir_variable_create(b.shader, nir_var_shader_in,
++                                                glsl_int_type(), "gl_Layer");
++         var->data.location = VARYING_SLOT_LAYER;
++         var->data.interpolation = INTERP_MODE_FLAT;
++         layer = nir_load_var(&b, var);
++      }
++      else {
++         layer = zero;
++      }
+    }
+ 
+    /* offset_pos = param.xy + f2i(coord.xy) */
+diff --git a/src/util/format/u_format.csv b/src/util/format/u_format.csv
+index 8acfb869bdb..237c4c95475 100644
+--- a/src/util/format/u_format.csv
++++ b/src/util/format/u_format.csv
+@@ -500,7 +500,7 @@ PIPE_FORMAT_R4G4B4A4_UINT           , plain, 1, 1, 1, up4 , up4 , up4 , up4 , xy
+ PIPE_FORMAT_B4G4R4A4_UINT           , plain, 1, 1, 1, up4 , up4 , up4 , up4 , zyxw, rgb, up4 , up4 , up4 , up4 , yzwx
+ PIPE_FORMAT_A4R4G4B4_UINT           , plain, 1, 1, 1, up4 , up4 , up4 , up4 , yzwx, rgb, up4 , up4 , up4 , up4 , zyxw
+ PIPE_FORMAT_A4B4G4R4_UINT           , plain, 1, 1, 1, up4 , up4 , up4 , up4 , wzyx, rgb, up4 , up4 , up4 , up4 , xyzw
+-PIPE_FORMAT_A1R5G5B5_UINT           , plain, 1, 1, 1, up1 , up5 , up5 , up5 , wzyx, rgb, up5 , up5 , up5 , up1 , zyxw
++PIPE_FORMAT_A1R5G5B5_UINT           , plain, 1, 1, 1, up1 , up5 , up5 , up5 , yzwx, rgb, up5 , up5 , up5 , up1 , zyxw
+ PIPE_FORMAT_A1B5G5R5_UINT           , plain, 1, 1, 1, up1 , up5 , up5 , up5 , wzyx, rgb, up5 , up5 , up5 , up1 , xyzw
+ PIPE_FORMAT_R5G5B5A1_UINT           , plain, 1, 1, 1, up5 , up5 , up5 , up1 , xyzw, rgb, up5 , up5 , up5 , up1 , wzyx
+ PIPE_FORMAT_B5G5R5A1_UINT           , plain, 1, 1, 1, up5 , up5 , up5 , up1 , zyxw, rgb, up1 , up5 , up5 , up5 , yzwx
+diff --git a/src/vulkan/device-select-layer/VkLayer_MESA_device_select.json b/src/vulkan/device-select-layer/VkLayer_MESA_device_select.json
+index 1d5fffd0135..361ae9fe74e 100644
+--- a/src/vulkan/device-select-layer/VkLayer_MESA_device_select.json
++++ b/src/vulkan/device-select-layer/VkLayer_MESA_device_select.json
+@@ -4,7 +4,7 @@
+     "name": "VK_LAYER_MESA_device_select",
+     "type": "GLOBAL",
+     "library_path": "libVkLayer_MESA_device_select.so",
+-    "api_version": "1.1.73",
++    "api_version": "1.2.73",
+     "implementation_version": "1",
+     "description": "Linux device selection layer",
+     "functions": {
diff --git a/SOURCES/nouveau-tu1xx-support.patch b/SOURCES/nouveau-tu1xx-support.patch
deleted file mode 100644
index 1134f43..0000000
--- a/SOURCES/nouveau-tu1xx-support.patch
+++ /dev/null
@@ -1,9921 +0,0 @@
-diff --git a/src/gallium/drivers/nouveau/Makefile.sources b/src/gallium/drivers/nouveau/Makefile.sources
-index 6c360992a53..9de8168fbd9 100644
---- a/src/gallium/drivers/nouveau/Makefile.sources
-+++ b/src/gallium/drivers/nouveau/Makefile.sources
-@@ -151,6 +151,14 @@ NVC0_CODEGEN_SOURCES := \
- 	codegen/nv50_ir_target_nvc0.h
- 
- NVC0_C_SOURCES := \
-+	nvc0/cla0c0qmd.h \
-+	nvc0/clc0c0qmd.h \
-+	nvc0/clc3c0qmd.h \
-+	nvc0/drf.h \
-+	nvc0/qmd.h \
-+	nvc0/qmda0c0.c \
-+	nvc0/qmdc0c0.c \
-+	nvc0/qmdc3c0.c \
- 	nvc0/gm107_texture.xml.h \
- 	nvc0/nvc0_3d.xml.h \
- 	nvc0/nvc0_compute.c \
-diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
-index 42ee969c66b..d58c0d206ec 100644
---- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h
-+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
-@@ -67,8 +67,10 @@ enum operation
-    OP_AND,
-    OP_OR,
-    OP_XOR,
-+   OP_LOP3_LUT,
-    OP_SHL,
-    OP_SHR,
-+   OP_SHF,
-    OP_MAX,
-    OP_MIN,
-    OP_SAT, // CLAMP(f32, 0.0, 1.0)
-@@ -116,6 +118,7 @@ enum operation
-    OP_PINTERP,
-    OP_EMIT,    // emit vertex
-    OP_RESTART, // restart primitive
-+   OP_FINAL, // finish emitting primitives
-    OP_TEX,
-    OP_TXB, // texture bias
-    OP_TXL, // texure lod
-@@ -151,7 +154,10 @@ enum operation
-    OP_INSBF,  // insert first src1[8:15] bits of src0 into src2 at src1[0:7]
-    OP_EXTBF,  // place bits [K,K+N) of src0 into dst, src1 = 0xNNKK
-    OP_BFIND,  // find highest/lowest set bit
-+   OP_BREV,   // bitfield reverse
-+   OP_BMSK,   // bitfield mask
-    OP_PERMT,  // dst = bytes from src2,src0 selected by src1 (nvc0's src order)
-+   OP_SGXT,
-    OP_ATOM,
-    OP_BAR,    // execution barrier, sources = { id, thread count, predicate }
-    OP_VADD,   // byte/word vector operations
-@@ -167,6 +173,7 @@ enum operation
-    OP_SHFL, // warp shuffle
-    OP_VOTE,
-    OP_BUFQ, // buffer query
-+   OP_WARPSYNC,
-    OP_LAST
- };
- 
-@@ -254,11 +261,29 @@ enum operation
- #define NV50_IR_SUBOP_VOTE_ALL 0
- #define NV50_IR_SUBOP_VOTE_ANY 1
- #define NV50_IR_SUBOP_VOTE_UNI 2
-+#define NV50_IR_SUBOP_LOP3_LUT_SRC0 0xf0
-+#define NV50_IR_SUBOP_LOP3_LUT_SRC1 0xcc
-+#define NV50_IR_SUBOP_LOP3_LUT_SRC2 0xaa
-+#define NV50_IR_SUBOP_LOP3_LUT(exp) ({         \
-+      uint8_t a = NV50_IR_SUBOP_LOP3_LUT_SRC0; \
-+      uint8_t b = NV50_IR_SUBOP_LOP3_LUT_SRC1; \
-+      uint8_t c = NV50_IR_SUBOP_LOP3_LUT_SRC2; \
-+      (uint8_t)(exp);                          \
-+})
-+#define NV50_IR_SUBOP_BMSK_C (0 << 0)
-+#define NV50_IR_SUBOP_BMSK_W (1 << 0)
- 
- #define NV50_IR_SUBOP_MINMAX_LOW  1
- #define NV50_IR_SUBOP_MINMAX_MED  2
- #define NV50_IR_SUBOP_MINMAX_HIGH 3
- 
-+#define NV50_IR_SUBOP_SHF_L  (0 << 0)
-+#define NV50_IR_SUBOP_SHF_R  (1 << 0)
-+#define NV50_IR_SUBOP_SHF_LO (0 << 1)
-+#define NV50_IR_SUBOP_SHF_HI (1 << 1)
-+#define NV50_IR_SUBOP_SHF_C  (0 << 2)
-+#define NV50_IR_SUBOP_SHF_W  (1 << 2)
-+
- // xmad(src0, src1, 0) << 16 + src2
- #define NV50_IR_SUBOP_XMAD_PSL (1 << 0)
- // (xmad(src0, src1, src2) & 0xffff) | (src1 << 16)
-@@ -900,7 +925,7 @@ public:
- 
-    uint16_t subOp; // quadop, 1 for mul-high, etc.
- 
--   unsigned encSize    : 4; // encoding size in bytes
-+   unsigned encSize    : 5; // encoding size in bytes
-    unsigned saturate   : 1; // to [0.0f, 1.0f]
-    unsigned join       : 1; // converge control flow (use OP_JOIN until end)
-    unsigned fixed      : 1; // prevent dead code elimination
-diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
-index 5dc0e24c5dc..63ea7f5e7e8 100644
---- a/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
-+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_driver.h
-@@ -29,6 +29,8 @@
- #include "tgsi/tgsi_parse.h"
- #include "tgsi/tgsi_scan.h"
- 
-+struct nir_shader_compiler_options;
-+
- /*
-  * This struct constitutes linkage information in TGSI terminology.
-  *
-@@ -70,10 +72,12 @@ struct nv50_ir_prog_symbol
-    uint32_t offset;
- };
- 
-+#define NVISA_GF100_CHIPSET    0xc0
- #define NVISA_GK104_CHIPSET    0xe0
- #define NVISA_GK20A_CHIPSET    0xea
- #define NVISA_GM107_CHIPSET    0x110
- #define NVISA_GM200_CHIPSET    0x120
-+#define NVISA_GV100_CHIPSET    0x140
- 
- struct nv50_ir_prog_info
- {
-@@ -200,6 +204,9 @@ struct nv50_ir_prog_info
- extern "C" {
- #endif
- 
-+const struct nir_shader_compiler_options *
-+nv50_ir_nir_shader_compiler_options(int chipset);
-+
- extern int nv50_ir_generate_code(struct nv50_ir_prog_info *);
- 
- extern void nv50_ir_relocate_code(void *relocData, uint32_t *code,
-diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
-index e244bd0d610..dd8e1ab86c4 100644
---- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
-+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gm107.cpp
-@@ -23,6 +23,7 @@
-  */
- 
- #include "codegen/nv50_ir_target_gm107.h"
-+#include "codegen/nv50_ir_sched_gm107.h"
- 
- //#define GM107_DEBUG_SCHED_DATA
- 
-@@ -170,6 +171,7 @@ private:
-    void emitBFI();
-    void emitBFE();
-    void emitFLO();
-+   void emitPRMT();
- 
-    void emitLDSTs(int, DataType);
-    void emitLDSTc(int);
-@@ -2371,6 +2373,33 @@ CodeEmitterGM107::emitFLO()
-    emitGPR  (0x00, insn->def(0));
- }
- 
-+void
-+CodeEmitterGM107::emitPRMT()
-+{
-+   switch (insn->src(1).getFile()) {
-+   case FILE_GPR:
-+      emitInsn(0x5bc00000);
-+      emitGPR (0x14, insn->src(1));
-+      break;
-+   case FILE_MEMORY_CONST:
-+      emitInsn(0x4bc00000);
-+      emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
-+      break;
-+   case FILE_IMMEDIATE:
-+      emitInsn(0x36c00000);
-+      emitIMMD(0x14, 19, insn->src(1));
-+      break;
-+   default:
-+      assert(!"bad src1 file");
-+      break;
-+   }
-+
-+   emitField(0x30, 3, insn->subOp);
-+   emitGPR  (0x27, insn->src(2));
-+   emitGPR  (0x08, insn->src(0));
-+   emitGPR  (0x00, insn->def(0));
-+}
-+
- /*******************************************************************************
-  * memory
-  ******************************************************************************/
-@@ -3537,6 +3566,9 @@ CodeEmitterGM107::emitInstruction(Instruction *i)
-    case OP_BFIND:
-       emitFLO();
-       break;
-+   case OP_PERMT:
-+      emitPRMT();
-+      break;
-    case OP_SLCT:
-       if (isFloatType(insn->dType))
-          emitFCMP();
-@@ -3742,156 +3774,6 @@ CodeEmitterGM107::getMinEncodingSize(const Instruction *i) const
-  * sched data calculator
-  ******************************************************************************/
- 
--class SchedDataCalculatorGM107 : public Pass
--{
--public:
--   SchedDataCalculatorGM107(const TargetGM107 *targ) : targ(targ) {}
--
--private:
--   struct RegScores
--   {
--      struct ScoreData {
--         int r[256];
--         int p[8];
--         int c;
--      } rd, wr;
--      int base;
--
--      void rebase(const int base)
--      {
--         const int delta = this->base - base;
--         if (!delta)
--            return;
--         this->base = 0;
--
--         for (int i = 0; i < 256; ++i) {
--            rd.r[i] += delta;
--            wr.r[i] += delta;
--         }
--         for (int i = 0; i < 8; ++i) {
--            rd.p[i] += delta;
--            wr.p[i] += delta;
--         }
--         rd.c += delta;
--         wr.c += delta;
--      }
--      void wipe()
--      {
--         memset(&rd, 0, sizeof(rd));
--         memset(&wr, 0, sizeof(wr));
--      }
--      int getLatest(const ScoreData& d) const
--      {
--         int max = 0;
--         for (int i = 0; i < 256; ++i)
--            if (d.r[i] > max)
--               max = d.r[i];
--         for (int i = 0; i < 8; ++i)
--            if (d.p[i] > max)
--               max = d.p[i];
--         if (d.c > max)
--            max = d.c;
--         return max;
--      }
--      inline int getLatestRd() const
--      {
--         return getLatest(rd);
--      }
--      inline int getLatestWr() const
--      {
--         return getLatest(wr);
--      }
--      inline int getLatest() const
--      {
--         return MAX2(getLatestRd(), getLatestWr());
--      }
--      void setMax(const RegScores *that)
--      {
--         for (int i = 0; i < 256; ++i) {
--            rd.r[i] = MAX2(rd.r[i], that->rd.r[i]);
--            wr.r[i] = MAX2(wr.r[i], that->wr.r[i]);
--         }
--         for (int i = 0; i < 8; ++i) {
--            rd.p[i] = MAX2(rd.p[i], that->rd.p[i]);
--            wr.p[i] = MAX2(wr.p[i], that->wr.p[i]);
--         }
--         rd.c = MAX2(rd.c, that->rd.c);
--         wr.c = MAX2(wr.c, that->wr.c);
--      }
--      void print(int cycle)
--      {
--         for (int i = 0; i < 256; ++i) {
--            if (rd.r[i] > cycle)
--               INFO("rd $r%i @ %i\n", i, rd.r[i]);
--            if (wr.r[i] > cycle)
--               INFO("wr $r%i @ %i\n", i, wr.r[i]);
--         }
--         for (int i = 0; i < 8; ++i) {
--            if (rd.p[i] > cycle)
--               INFO("rd $p%i @ %i\n", i, rd.p[i]);
--            if (wr.p[i] > cycle)
--               INFO("wr $p%i @ %i\n", i, wr.p[i]);
--         }
--         if (rd.c > cycle)
--            INFO("rd $c @ %i\n", rd.c);
--         if (wr.c > cycle)
--            INFO("wr $c @ %i\n", wr.c);
--      }
--   };
--
--   RegScores *score; // for current BB
--   std::vector<RegScores> scoreBoards;
--
--   const TargetGM107 *targ;
--   bool visit(Function *);
--   bool visit(BasicBlock *);
--
--   void commitInsn(const Instruction *, int);
--   int calcDelay(const Instruction *, int) const;
--   void setDelay(Instruction *, int, const Instruction *);
--   void recordWr(const Value *, int, int);
--   void checkRd(const Value *, int, int&) const;
--
--   inline void emitYield(Instruction *);
--   inline void emitStall(Instruction *, uint8_t);
--   inline void emitReuse(Instruction *, uint8_t);
--   inline void emitWrDepBar(Instruction *, uint8_t);
--   inline void emitRdDepBar(Instruction *, uint8_t);
--   inline void emitWtDepBar(Instruction *, uint8_t);
--
--   inline int getStall(const Instruction *) const;
--   inline int getWrDepBar(const Instruction *) const;
--   inline int getRdDepBar(const Instruction *) const;
--   inline int getWtDepBar(const Instruction *) const;
--
--   void setReuseFlag(Instruction *);
--
--   inline void printSchedInfo(int, const Instruction *) const;
--
--   struct LiveBarUse {
--      LiveBarUse(Instruction *insn, Instruction *usei)
--         : insn(insn), usei(usei) { }
--      Instruction *insn;
--      Instruction *usei;
--   };
--
--   struct LiveBarDef {
--      LiveBarDef(Instruction *insn, Instruction *defi)
--         : insn(insn), defi(defi) { }
--      Instruction *insn;
--      Instruction *defi;
--   };
--
--   bool insertBarriers(BasicBlock *);
--
--   bool doesInsnWriteTo(const Instruction *insn, const Value *val) const;
--   Instruction *findFirstUse(const Instruction *) const;
--   Instruction *findFirstDef(const Instruction *) const;
--
--   bool needRdDepBar(const Instruction *) const;
--   bool needWrDepBar(const Instruction *) const;
--};
--
- inline void
- SchedDataCalculatorGM107::emitStall(Instruction *insn, uint8_t cnt)
- {
-diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gv100.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gv100.cpp
-new file mode 100644
-index 00000000000..0fbd47ccf88
---- /dev/null
-+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gv100.cpp
-@@ -0,0 +1,2011 @@
-+/*
-+ * Copyright 2020 Red Hat Inc.
-+ *
-+ * Permission is hereby granted, free of charge, to any person obtaining a
-+ * copy of this software and associated documentation files (the "Software"),
-+ * to deal in the Software without restriction, including without limitation
-+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
-+ * and/or sell copies of the Software, and to permit persons to whom the
-+ * Software is furnished to do so, subject to the following conditions:
-+ *
-+ * The above copyright notice and this permission notice shall be included in
-+ * all copies or substantial portions of the Software.
-+ *
-+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
-+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
-+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-+ * OTHER DEALINGS IN THE SOFTWARE.
-+ */
-+#include "codegen/nv50_ir_emit_gv100.h"
-+#include "codegen/nv50_ir_sched_gm107.h"
-+
-+namespace nv50_ir {
-+
-+/*******************************************************************************
-+ * instruction format helpers
-+ ******************************************************************************/
-+
-+#define FA_NODEF (1 << 0)
-+#define FA_RRR   (1 << 1)
-+#define FA_RRI   (1 << 2)
-+#define FA_RRC   (1 << 3)
-+#define FA_RIR   (1 << 4)
-+#define FA_RCR   (1 << 5)
-+
-+#define FA_SRC_MASK 0x0ff
-+#define FA_SRC_NEG  0x100
-+#define FA_SRC_ABS  0x200
-+
-+#define EMPTY -1
-+#define __(a) (a) // no source modifiers
-+#define _A(a) ((a) | FA_SRC_ABS)
-+#define N_(a) ((a) | FA_SRC_NEG)
-+#define NA(a) ((a) | FA_SRC_NEG | FA_SRC_ABS)
-+
-+void
-+CodeEmitterGV100::emitFormA_I32(int src)
-+{
-+   emitIMMD(32, 32, insn->src(src));
-+   if (insn->src(src).mod.abs())
-+      code[1] &= 0x7fffffff;
-+   if (insn->src(src).mod.neg())
-+      code[1] ^= 0x80000000;
-+}
-+
-+void
-+CodeEmitterGV100::emitFormA_RRC(uint16_t op, int src1, int src2)
-+{
-+   emitInsn(op);
-+   if (src1 >= 0) {
-+      emitNEG (75, (src1 & FA_SRC_MASK), (src1 & FA_SRC_NEG));
-+      emitABS (74, (src1 & FA_SRC_MASK), (src1 & FA_SRC_ABS));
-+      emitGPR (64, insn->src(src1 & FA_SRC_MASK));
-+   }
-+   if (src2 >= 0) {
-+      emitNEG (63, (src2 & FA_SRC_MASK), (src2 & FA_SRC_NEG));
-+      emitABS (62, (src2 & FA_SRC_MASK), (src2 & FA_SRC_ABS));
-+      emitCBUF(54, -1, 38, 0, 2, insn->src(src2 & FA_SRC_MASK));
-+   }
-+}
-+
-+void
-+CodeEmitterGV100::emitFormA_RRI(uint16_t op, int src1, int src2)
-+{
-+   emitInsn(op);
-+   if (src1 >= 0) {
-+      emitNEG (75, (src1 & FA_SRC_MASK), (src1 & FA_SRC_NEG));
-+      emitABS (74, (src1 & FA_SRC_MASK), (src1 & FA_SRC_ABS));
-+      emitGPR (64, insn->src(src1 & FA_SRC_MASK));
-+   }
-+   if (src2 >= 0)
-+      emitFormA_I32(src2 & FA_SRC_MASK);
-+}
-+
-+void
-+CodeEmitterGV100::emitFormA_RRR(uint16_t op, int src1, int src2)
-+{
-+   emitInsn(op);
-+   if (src2 >= 0) {
-+      emitNEG (75, (src2 & FA_SRC_MASK), (src2 & FA_SRC_NEG));
-+      emitABS (74, (src2 & FA_SRC_MASK), (src2 & FA_SRC_ABS));
-+      emitGPR (64, insn->src(src2 & FA_SRC_MASK));
-+   }
-+
-+   if (src1 >= 0) {
-+      emitNEG (63, (src1 & FA_SRC_MASK), (src1 & FA_SRC_NEG));
-+      emitABS (62, (src1 & FA_SRC_MASK), (src1 & FA_SRC_ABS));
-+      emitGPR (32, insn->src(src1 & FA_SRC_MASK));
-+   }
-+}
-+
-+void
-+CodeEmitterGV100::emitFormA(uint16_t op, uint8_t forms,
-+                            int src0, int src1, int src2)
-+{
-+   switch ((src1 < 0) ? FILE_GPR : insn->src(src1 & FA_SRC_MASK).getFile()) {
-+   case FILE_GPR:
-+      switch ((src2 < 0) ? FILE_GPR : insn->src(src2 & FA_SRC_MASK).getFile()) {
-+      case FILE_GPR:
-+         assert(forms & FA_RRR);
-+         emitFormA_RRR((1 << 9) | op, src1, src2);
-+         break;
-+      case FILE_IMMEDIATE:
-+         assert(forms & FA_RRI);
-+         emitFormA_RRI((2 << 9) | op, src1, src2);
-+         break;
-+      case FILE_MEMORY_CONST:
-+         assert(forms & FA_RRC);
-+         emitFormA_RRC((3 << 9) | op, src1, src2);
-+         break;
-+      default:
-+         assert(!"bad src2 file");
-+         break;
-+      }
-+      break;
-+   case FILE_IMMEDIATE:
-+      assert((src2 < 0) || insn->src(src2 & FA_SRC_MASK).getFile() == FILE_GPR);
-+      assert(forms & FA_RIR);
-+      emitFormA_RRI((4 << 9) | op, src2, src1);
-+      break;
-+   case FILE_MEMORY_CONST:
-+      assert((src2 < 0) || insn->src(src2 & FA_SRC_MASK).getFile() == FILE_GPR);
-+      assert(forms & FA_RCR);
-+      emitFormA_RRC((5 << 9) | op, src2, src1);
-+      break;
-+   default:
-+      assert(!"bad src1 file");
-+      break;
-+   }
-+
-+   if (src0 >= 0) {
-+      assert(insn->src(src0 & FA_SRC_MASK).getFile() == FILE_GPR);
-+      emitABS(73, (src0 & FA_SRC_MASK), (src0 & FA_SRC_ABS));
-+      emitNEG(72, (src0 & FA_SRC_MASK), (src0 & FA_SRC_NEG));
-+      emitGPR(24, insn->src(src0 & FA_SRC_MASK));
-+   }
-+
-+   if (!(forms & FA_NODEF))
-+      emitGPR(16, insn->def(0));
-+}
-+
-+/*******************************************************************************
-+ * control
-+ ******************************************************************************/
-+
-+void
-+CodeEmitterGV100::emitBRA()
-+{
-+   const FlowInstruction *insn = this->insn->asFlow();
-+   int64_t target = ((int64_t)insn->target.bb->binPos - (codeSize + 0x10)) / 4;
-+
-+   assert(!insn->indirect && !insn->absolute);
-+
-+   emitInsn (0x947);
-+   emitField(34, 48, target);
-+   emitPRED (87);
-+   emitField(86, 2, 0); // ./.INC/.DEC
-+}
-+
-+void
-+CodeEmitterGV100::emitEXIT()
-+{
-+   emitInsn (0x94d);
-+   emitNOT  (90);
-+   emitPRED (87);
-+   emitField(85, 1, 0); // .NO_ATEXIT
-+   emitField(84, 2, 0); // ./.KEEPREFCOUNT/.PREEMPTED/.INVALID3
-+}
-+
-+void
-+CodeEmitterGV100::emitKILL()
-+{
-+   emitInsn(0x95b);
-+   emitPRED(87);
-+}
-+
-+void
-+CodeEmitterGV100::emitNOP()
-+{
-+   emitInsn(0x918);
-+}
-+
-+void
-+CodeEmitterGV100::emitWARPSYNC()
-+{
-+   emitFormA(0x148, FA_NODEF | FA_RRR | FA_RIR | FA_RCR, EMPTY, __(0), EMPTY);
-+   emitNOT  (90);
-+   emitPRED (87);
-+}
-+
-+/*******************************************************************************
-+ * movement / conversion
-+ ******************************************************************************/
-+
-+void
-+CodeEmitterGV100::emitCS2R()
-+{
-+   emitInsn(0x805);
-+   emitSYS (72, insn->src(0));
-+   emitGPR (16, insn->def(0));
-+}
-+
-+void
-+CodeEmitterGV100::emitF2F()
-+{
-+   if (typeSizeof(insn->sType) != 8 && typeSizeof(insn->dType) != 8)
-+      emitFormA(0x104, FA_RRR | FA_RIR | FA_RCR, EMPTY, NA(0), EMPTY);
-+   else
-+      emitFormA(0x110, FA_RRR | FA_RIR | FA_RCR, EMPTY, NA(0), EMPTY);
-+   emitField(84, 2, util_logbase2(typeSizeof(insn->sType)));
-+   emitFMZ  (80, 1);
-+   emitRND  (78);
-+   emitField(75, 2, util_logbase2(typeSizeof(insn->dType)));
-+   emitField(60, 2, insn->subOp); // ./.H1/.INVALID2/.INVALID3
-+}
-+
-+void
-+CodeEmitterGV100::emitF2I()
-+{
-+   if (typeSizeof(insn->sType) != 8 && typeSizeof(insn->dType) != 8)
-+      emitFormA(0x105, FA_RRR | FA_RIR | FA_RCR, EMPTY, NA(0), EMPTY);
-+   else
-+      emitFormA(0x111, FA_RRR | FA_RIR | FA_RCR, EMPTY, NA(0), EMPTY);
-+   emitField(84, 2, util_logbase2(typeSizeof(insn->sType)));
-+   emitFMZ  (80, 1);
-+   emitRND  (78);
-+   emitField(77, 1, 0); // .NTZ
-+   emitField(75, 2, util_logbase2(typeSizeof(insn->dType)));
-+   emitField(72, 1, isSignedType(insn->dType));
-+}
-+
-+void
-+CodeEmitterGV100::emitFRND()
-+{
-+   int subop = 0;
-+
-+   switch (insn->op) {
-+   case OP_CVT:
-+      switch (insn->rnd) {
-+      case ROUND_NI: subop = 0; break;
-+      case ROUND_MI: subop = 1; break;
-+      case ROUND_PI: subop = 2; break;
-+      case ROUND_ZI: subop = 3; break;
-+      default:
-+         assert(!"invalid FRND mode");
-+         break;
-+      }
-+      break;
-+   case OP_FLOOR: subop = 1; break;
-+   case OP_CEIL : subop = 2; break;
-+   case OP_TRUNC: subop = 3; break;
-+   default:
-+      assert(!"invalid FRND opcode");
-+      break;
-+   }
-+
-+   if (typeSizeof(insn->sType) != 8 && typeSizeof(insn->dType) != 8)
-+      emitFormA(0x107, FA_RRR | FA_RIR | FA_RCR, EMPTY, NA(0), EMPTY);
-+   else
-+      emitFormA(0x113, FA_RRR | FA_RIR | FA_RCR, EMPTY, NA(0), EMPTY);
-+   emitField(84, 2, util_logbase2(typeSizeof(insn->sType)));
-+   emitFMZ  (80, 1);
-+   emitField(78, 2, subop);
-+   emitField(75, 2, util_logbase2(typeSizeof(insn->dType)));
-+}
-+
-+void
-+CodeEmitterGV100::emitI2F()
-+{
-+   if (typeSizeof(insn->sType) != 8 && typeSizeof(insn->dType) != 8)
-+      emitFormA(0x106, FA_RRR | FA_RIR | FA_RCR, EMPTY, __(0), EMPTY);
-+   else
-+      emitFormA(0x112, FA_RRR | FA_RIR | FA_RCR, EMPTY, __(0), EMPTY);
-+   emitField(84, 2, util_logbase2(typeSizeof(insn->sType)));
-+   emitRND  (78);
-+   emitField(75, 2, util_logbase2(typeSizeof(insn->dType)));
-+   emitField(74, 1, isSignedType(insn->sType));
-+   if (typeSizeof(insn->sType) == 2)
-+      emitField(60, 2, insn->subOp >> 1);
-+   else
-+      emitField(60, 2, insn->subOp); // ./.B1/.B2/.B3
-+}
-+
-+void
-+CodeEmitterGV100::emitMOV()
-+{
-+   switch (insn->def(0).getFile()) {
-+   case FILE_GPR:
-+      switch (insn->src(0).getFile()) {
-+      case FILE_GPR:
-+      case FILE_MEMORY_CONST:
-+      case FILE_IMMEDIATE:
-+         emitFormA(0x002, FA_RRR | FA_RIR | FA_RCR, EMPTY, __(0), EMPTY);
-+         emitField(72, 4, insn->lanes);
-+         break;
-+      case FILE_PREDICATE:
-+         emitInsn (0x807);
-+         emitGPR  (16, insn->def(0));
-+         emitGPR  (24);
-+         emitField(32, 32, 0xffffffff);
-+         emitField(90,  1, 1);
-+         emitPRED (87, insn->src(0));
-+         break;
-+      default:
-+         assert(!"bad src file");
-+         break;
-+      }
-+      break;
-+   case FILE_PREDICATE:
-+      emitInsn (0x20c);
-+      emitPRED (87);
-+      emitPRED (84);
-+      emitNOT  (71);
-+      emitPRED (68);
-+      emitPRED (81, insn->def(0));
-+      emitCond3(76, CC_NE);
-+      emitGPR  (24, insn->src(0));
-+      emitGPR  (32);
-+      break;
-+   default:
-+      assert(!"bad dst file");
-+      break;
-+   }
-+}
-+
-+void
-+CodeEmitterGV100::emitPRMT()
-+{
-+   emitFormA(0x016, FA_RRR | FA_RRI | FA_RRC | FA_RIR | FA_RCR, __(0), __(1), __(2));
-+   emitField(72, 3, insn->subOp);
-+}
-+
-+void
-+CodeEmitterGV100::emitS2R()
-+{
-+   emitInsn(0x919);
-+   emitSYS (72, insn->src(0));
-+   emitGPR (16, insn->def(0));
-+}
-+
-+static void
-+selpFlip(const FixupEntry *entry, uint32_t *code, const FixupData& data)
-+{
-+   int loc = entry->loc;
-+   if (data.force_persample_interp)
-+      code[loc + 2] |= 1 << 26;
-+   else
-+      code[loc + 2] &= ~(1 << 26);
-+}
-+
-+void
-+CodeEmitterGV100::emitSEL()
-+{
-+   emitFormA(0x007, FA_RRR | FA_RIR | FA_RCR, __(0), __(1), EMPTY);
-+   emitNOT  (90, insn->src(2));
-+   emitPRED (87, insn->src(2));
-+   if (insn->subOp == 1)
-+      addInterp(0, 0, selpFlip);
-+}
-+
-+void
-+CodeEmitterGV100::emitSHFL()
-+{
-+   switch (insn->src(1).getFile()) {
-+   case FILE_GPR:
-+      switch (insn->src(2).getFile()) {
-+      case FILE_GPR:
-+         emitInsn(0x389);
-+         emitGPR (64, insn->src(2));
-+         break;
-+      case FILE_IMMEDIATE:
-+         emitInsn(0x589);
-+         emitIMMD(40, 13, insn->src(2));
-+         break;
-+      default:
-+         assert(!"bad src2 file");
-+         break;
-+      }
-+      emitGPR(32, insn->src(1));
-+      break;
-+   case FILE_IMMEDIATE:
-+      switch (insn->src(2).getFile()) {
-+      case FILE_GPR:
-+         emitInsn(0x989);
-+         emitGPR (64, insn->src(2));
-+         break;
-+      case FILE_IMMEDIATE:
-+         emitInsn(0xf89);
-+         emitIMMD(40, 13, insn->src(2));
-+         break;
-+      default:
-+         assert(!"bad src2 file");
-+         break;
-+      }
-+      emitIMMD(53, 5, insn->src(1));
-+      break;
-+   default:
-+      assert(!"bad src1 file");
-+      break;
-+   }
-+
-+   if (insn->defExists(1))
-+      emitPRED(81, insn->def(1));
-+   else
-+      emitPRED(81);
-+
-+   emitField(58, 2, insn->subOp);
-+   emitGPR  (24, insn->src(0));
-+   emitGPR  (16, insn->def(0));
-+}
-+
-+/*******************************************************************************
-+ * fp32
-+ ******************************************************************************/
-+
-+void
-+CodeEmitterGV100::emitFADD()
-+{
-+   if (insn->src(1).getFile() == FILE_GPR)
-+      emitFormA(0x021, FA_RRR         , NA(0), NA(1), EMPTY);
-+   else
-+      emitFormA(0x021, FA_RRI | FA_RRC, NA(0), EMPTY, NA(1));
-+   emitFMZ  (80, 1);
-+   emitRND  (78);
-+   emitSAT  (77);
-+}
-+
-+void
-+CodeEmitterGV100::emitFFMA()
-+{
-+   emitFormA(0x023, FA_RRR | FA_RRI | FA_RRC | FA_RIR | FA_RCR, NA(0), NA(1), NA(2));
-+   emitField(80, 1, insn->ftz);
-+   emitRND  (78);
-+   emitSAT  (77);
-+   emitField(76, 1, insn->dnz);
-+}
-+
-+void
-+CodeEmitterGV100::emitFMNMX()
-+{
-+   emitFormA(0x009, FA_RRR | FA_RIR | FA_RCR, NA(0), NA(1), EMPTY);
-+   emitField(90, 1, insn->op == OP_MAX);
-+   emitPRED (87);
-+   emitFMZ  (80, 1);
-+}
-+
-+void
-+CodeEmitterGV100::emitFMUL()
-+{
-+   emitFormA(0x020, FA_RRR | FA_RIR | FA_RCR, NA(0), NA(1), EMPTY);
-+   emitField(80, 1, insn->ftz);
-+   emitPDIV (84);
-+   emitRND  (78);
-+   emitSAT  (77);
-+   emitField(76, 1, insn->dnz);
-+}
-+
-+void
-+CodeEmitterGV100::emitFSET_BF()
-+{
-+   const CmpInstruction *insn = this->insn->asCmp();
-+
-+   emitFormA(0x00a, FA_RRR | FA_RIR | FA_RCR, NA(0), NA(1), EMPTY);
-+   emitFMZ  (80, 1);
-+   emitCond4(76, insn->setCond);
-+
-+   if (insn->op != OP_SET) {
-+      switch (insn->op) {
-+      case OP_SET_AND: emitField(74, 2, 0); break;
-+      case OP_SET_OR : emitField(74, 2, 1); break;
-+      case OP_SET_XOR: emitField(74, 2, 2); break;
-+      default:
-+         assert(!"invalid set op");
-+         break;
-+      }
-+      emitNOT (90, insn->src(2));
-+      emitPRED(87, insn->src(2));
-+   } else {
-+      emitPRED(87);
-+   }
-+}
-+
-+void
-+CodeEmitterGV100::emitFSETP()
-+{
-+   const CmpInstruction *insn = this->insn->asCmp();
-+
-+   emitFormA(0x00b, FA_NODEF | FA_RRR | FA_RIR | FA_RCR, NA(0), NA(1), EMPTY);
-+   emitFMZ  (80, 1);
-+   emitCond4(76, insn->setCond);
-+
-+   if (insn->op != OP_SET) {
-+      switch (insn->op) {
-+      case OP_SET_AND: emitField(74, 2, 0); break;
-+      case OP_SET_OR : emitField(74, 2, 1); break;
-+      case OP_SET_XOR: emitField(74, 2, 2); break;
-+      default:
-+         assert(!"invalid set op");
-+         break;
-+      }
-+      emitNOT (90, insn->src(2));
-+      emitPRED(87, insn->src(2));
-+   } else {
-+      emitPRED(87);
-+   }
-+
-+   if (insn->defExists(1))
-+      emitPRED(84, insn->def(1));
-+   else
-+      emitPRED(84);
-+   emitPRED(81, insn->def(0));
-+}
-+
-+void
-+CodeEmitterGV100::emitFSWZADD()
-+{
-+   uint8_t subOp = 0;
-+
-+   // NP/PN swapped vs SM60
-+   for (int i = 0; i < 4; i++) {
-+      uint8_t p = ((insn->subOp >> (i * 2)) & 3);
-+      if (p == 1 || p == 2)
-+         p ^= 3;
-+      subOp |= p << (i * 2);
-+   }
-+
-+   emitInsn (0x822);
-+   emitFMZ  (80, 1);
-+   emitRND  (78);
-+   emitField(77, 1, insn->lanes); /* abused for .ndv */
-+   emitGPR  (64, insn->src(1));
-+   emitField(32, 8, subOp);
-+   emitGPR  (24, insn->src(0));
-+   emitGPR  (16, insn->def(0));
-+}
-+
-+void
-+CodeEmitterGV100::emitMUFU()
-+{
-+   int mufu = 0;
-+
-+   switch (insn->op) {
-+   case OP_COS : mufu = 0; break;
-+   case OP_SIN : mufu = 1; break;
-+   case OP_EX2 : mufu = 2; break;
-+   case OP_LG2 : mufu = 3; break;
-+   case OP_RCP : mufu = 4 + 2 * insn->subOp; break;
-+   case OP_RSQ : mufu = 5 + 2 * insn->subOp; break;
-+   case OP_SQRT: mufu = 8; break;
-+   default:
-+      assert(!"invalid mufu");
-+      break;
-+   }
-+
-+   emitFormA(0x108, FA_RRR | FA_RIR | FA_RCR, EMPTY, NA(0), EMPTY);
-+   emitField(74, 4, mufu);
-+}
-+
-+/*******************************************************************************
-+ * fp64
-+ ******************************************************************************/
-+
-+void
-+CodeEmitterGV100::emitDADD()
-+{
-+   emitFormA(0x029, FA_RRR | FA_RRI | FA_RRC, NA(0), EMPTY, NA(1));
-+   emitRND(78);
-+}
-+
-+void
-+CodeEmitterGV100::emitDFMA()
-+{
-+   emitFormA(0x02b, FA_RRR | FA_RRI | FA_RRC | FA_RIR | FA_RCR, NA(0), NA(1), NA(2));
-+   emitRND(78);
-+}
-+
-+void
-+CodeEmitterGV100::emitDMUL()
-+{
-+   emitFormA(0x028, FA_RRR | FA_RIR | FA_RCR, NA(0), NA(1), EMPTY);
-+   emitRND(78);
-+}
-+
-+void
-+CodeEmitterGV100::emitDSETP()
-+{
-+   const CmpInstruction *insn = this->insn->asCmp();
-+
-+   if (insn->src(1).getFile() == FILE_GPR)
-+      emitFormA(0x02a, FA_NODEF | FA_RRR         , NA(0), NA(1), EMPTY);
-+   else
-+      emitFormA(0x02a, FA_NODEF | FA_RRI | FA_RRC, NA(0), EMPTY, NA(1));
-+
-+   if (insn->op != OP_SET) {
-+      switch (insn->op) {
-+      case OP_SET_AND: emitField(74, 2, 0); break;
-+      case OP_SET_OR : emitField(74, 2, 1); break;
-+      case OP_SET_XOR: emitField(74, 2, 2); break;
-+      default:
-+         assert(!"invalid set op");
-+         break;
-+      }
-+      emitNOT (90, insn->src(2));
-+      emitPRED(87, insn->src(2));
-+   } else {
-+      emitPRED(87);
-+   }
-+
-+   if (insn->defExists(1))
-+      emitPRED(84, insn->def(1));
-+   else
-+      emitPRED(84);
-+   emitPRED (81, insn->def(0));
-+   emitCond4(76, insn->setCond);
-+}
-+
-+/*******************************************************************************
-+ * integer
-+ ******************************************************************************/
-+
-+void
-+CodeEmitterGV100::emitBMSK()
-+{
-+   emitFormA(0x01b, FA_RRR | FA_RIR | FA_RCR, __(0), __(1), EMPTY);
-+   emitField(75, 1, insn->subOp); // .C/.W
-+}
-+
-+void
-+CodeEmitterGV100::emitBREV()
-+{
-+   emitFormA(0x101, FA_RRR | FA_RIR | FA_RCR, EMPTY, __(0), EMPTY);
-+}
-+
-+void
-+CodeEmitterGV100::emitFLO()
-+{
-+   emitFormA(0x100, FA_RRR | FA_RIR | FA_RCR, EMPTY, __(0), EMPTY);
-+   emitPRED (81);
-+   emitField(74, 1, insn->subOp == NV50_IR_SUBOP_BFIND_SAMT);
-+   emitField(73, 1, isSignedType(insn->dType));
-+   emitNOT  (63, insn->src(0));
-+}
-+
-+void
-+CodeEmitterGV100::emitIABS()
-+{
-+   emitFormA(0x013, FA_RRR | FA_RIR | FA_RCR, EMPTY, __(0), EMPTY);
-+}
-+
-+void
-+CodeEmitterGV100::emitIADD3()
-+{
-+//   emitFormA(0x010, FA_RRR | FA_RIR | FA_RCR, N_(0), N_(1), N_(2));
-+   emitFormA(0x010, FA_RRR | FA_RIR | FA_RCR, N_(0), N_(1), EMPTY);
-+   emitGPR  (64); //XXX: fix when switching back to N_(2)
-+   emitPRED (84, NULL); // .CC1
-+   emitPRED (81, insn->flagsDef >= 0 ? insn->getDef(insn->flagsDef) : NULL);
-+   if (insn->flagsSrc >= 0) {
-+      emitField(74, 1, 1); // .X
-+      emitPRED (87, insn->getSrc(insn->flagsSrc));
-+      emitField(77, 4, 0xf); // .X1
-+   }
-+}
-+
-+void
-+CodeEmitterGV100::emitIMAD()
-+{
-+   emitFormA(0x024, FA_RRR | FA_RRI | FA_RRC | FA_RIR | FA_RCR, __(0), __(1), N_(2));
-+   emitField(73, 1, isSignedType(insn->sType));
-+}
-+
-+void
-+CodeEmitterGV100::emitIMAD_WIDE()
-+{
-+   emitFormA(0x025, FA_RRR |          FA_RRC | FA_RIR | FA_RCR, __(0), __(1), N_(2));
-+   emitPRED (81);
-+   emitField(73, 1, isSignedType(insn->sType));
-+}
-+
-+void
-+CodeEmitterGV100::emitISETP()
-+{
-+   const CmpInstruction *insn = this->insn->asCmp();
-+
-+   emitFormA(0x00c, FA_NODEF | FA_RRR | FA_RIR | FA_RCR, __(0), __(1), EMPTY);
-+
-+   if (insn->op != OP_SET) {
-+      switch (insn->op) {
-+      case OP_SET_AND: emitField(74, 2, 0); break;
-+      case OP_SET_OR : emitField(74, 2, 1); break;
-+      case OP_SET_XOR: emitField(74, 2, 2); break;
-+      default:
-+         assert(!"invalid set op");
-+         break;
-+      }
-+      emitNOT (90, insn->src(2));
-+      emitPRED(87, insn->src(2));
-+   } else {
-+      emitPRED(87);
-+   }
-+
-+   //XXX: CC->pred
-+   if (insn->flagsSrc >= 0) {
-+      assert(0);
-+      emitField(68, 4, 6);
-+   } else {
-+      emitNOT (71);
-+      if (!insn->subOp)
-+         emitPRED(68);
-+   }
-+
-+   if (insn->defExists(1))
-+      emitPRED(84, insn->def(1));
-+   else
-+      emitPRED(84);
-+   emitPRED (81, insn->def(0));
-+   emitCond3(76, insn->setCond);
-+   emitField(73, 1, isSignedType(insn->sType));
-+
-+   if (insn->subOp) { // .EX
-+      assert(0);
-+      emitField(72, 1, 1);
-+      emitPRED (68, insn->srcExists(3) ? insn->src(3) : insn->src(2));
-+   }
-+}
-+
-+void
-+CodeEmitterGV100::emitLEA()
-+{
-+   assert(insn->src(1).get()->asImm());
-+
-+   emitFormA(0x011, FA_RRR | FA_RIR | FA_RCR, N_(0), N_(2), EMPTY);
-+   emitPRED (81);
-+   emitIMMD (75, 5, insn->src(1));
-+   emitGPR  (64);
-+}
-+
-+void
-+CodeEmitterGV100::emitLOP3_LUT()
-+{
-+   emitFormA(0x012, FA_RRR | FA_RIR | FA_RCR, __(0), __(1), __(2));
-+   emitField(90, 1, 1);
-+   emitPRED (87);
-+   emitPRED (81);
-+   emitField(80, 1, 0); // .PAND
-+   emitField(72, 8, insn->subOp);
-+}
-+
-+void
-+CodeEmitterGV100::emitPOPC()
-+{
-+   emitFormA(0x109, FA_RRR | FA_RIR | FA_RCR, EMPTY, __(0), EMPTY);
-+   emitNOT  (63, insn->src(0));
-+}
-+
-+void
-+CodeEmitterGV100::emitSGXT()
-+{
-+   emitFormA(0x01a, FA_RRR | FA_RIR | FA_RCR, __(0), __(1), EMPTY);
-+   emitField(75, 1, 0); // .W
-+   emitField(73, 1, 1); // /.U32
-+}
-+
-+void
-+CodeEmitterGV100::emitSHF()
-+{
-+   emitFormA(0x019, FA_RRR | FA_RRI | FA_RRC | FA_RIR | FA_RCR, __(0), __(1), __(2));
-+   emitField(80, 1, !!(insn->subOp & NV50_IR_SUBOP_SHF_HI));
-+   emitField(76, 1, !!(insn->subOp & NV50_IR_SUBOP_SHF_R));
-+   emitField(75, 1, !!(insn->subOp & NV50_IR_SUBOP_SHF_W));
-+
-+   switch (insn->sType) {
-+   case TYPE_S64: emitField(73, 2, 0); break;
-+   case TYPE_U64: emitField(73, 2, 1); break;
-+   case TYPE_S32: emitField(73, 2, 2); break;
-+   case TYPE_U32:
-+   default:
-+      emitField(73, 2, 3);
-+      break;
-+   }
-+}
-+
-+/*******************************************************************************
-+ * load/stores
-+ ******************************************************************************/
-+
-+void
-+CodeEmitterGV100::emitALD()
-+{
-+   emitInsn (0x321);
-+   emitField(74, 2, (insn->getDef(0)->reg.size / 4) - 1);
-+   emitGPR  (32, insn->src(0).getIndirect(1));
-+   emitO    (79);
-+   emitP    (76);
-+   emitADDR (24, 40, 10, 0, insn->src(0));
-+   emitGPR  (16, insn->def(0));
-+}
-+
-+void
-+CodeEmitterGV100::emitAST()
-+{
-+   emitInsn (0x322);
-+   emitField(74, 2, (typeSizeof(insn->dType) / 4) - 1);
-+   emitGPR  (64, insn->src(0).getIndirect(1));
-+   emitP    (76);
-+   emitADDR (24, 40, 10, 0, insn->src(0));
-+   emitGPR  (32, insn->src(1));
-+}
-+
-+void
-+CodeEmitterGV100::emitATOM()
-+{
-+   unsigned subOp, dType;
-+
-+   if (insn->subOp != NV50_IR_SUBOP_ATOM_CAS) {
-+      emitInsn(0x38a);
-+
-+      if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH)
-+         subOp = 8;
-+      else
-+         subOp = insn->subOp;
-+      emitField(87, 4, subOp);
-+
-+      switch (insn->dType) {
-+      case TYPE_U32 : dType = 0; break;
-+      case TYPE_S32 : dType = 1; break;
-+      case TYPE_U64 : dType = 2; break;
-+      case TYPE_F32 : dType = 3; break;
-+      case TYPE_B128: dType = 4; break;
-+      case TYPE_S64 : dType = 5; break;
-+      default:
-+         assert(!"unexpected dType");
-+         dType = 0;
-+         break;
-+      }
-+      emitField(73, 3, dType);
-+   } else {
-+      emitInsn(0x38b);
-+
-+      switch (insn->dType) {
-+      case TYPE_U32: dType = 0; break;
-+      case TYPE_U64: dType = 2; break;
-+      default:
-+         assert(!"unexpected dType");
-+         dType = 0;
-+         break;
-+      }
-+      emitField(73, 3, dType);
-+   }
-+
-+   emitPRED (81);
-+   emitField(79, 2, 1);
-+   emitField(72, 1, insn->src(0).getIndirect(0)->getSize() == 8);
-+   emitGPR  (32, insn->src(1));
-+   emitADDR (24, 40, 24, 0, insn->src(0));
-+   emitGPR  (16, insn->def(0));
-+}
-+
-+void
-+CodeEmitterGV100::emitATOMS()
-+{
-+   unsigned dType, subOp;
-+
-+   if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS) {
-+      switch (insn->dType) {
-+      case TYPE_U32: dType = 0; break;
-+      case TYPE_S32: dType = 1; break;
-+      case TYPE_U64: dType = 2; break;
-+      default: assert(!"unexpected dType"); dType = 0; break;
-+      }
-+
-+      emitInsn (0x38d);
-+      emitField(87, 1, 0); // ATOMS.CAS/ATOMS.CAST
-+      emitField(73, 2, dType);
-+      emitGPR  (64, insn->src(2));
-+   } else {
-+      emitInsn(0x38c);
-+
-+      if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH)
-+         subOp = 8;
-+      else
-+         subOp = insn->subOp;
-+      emitField(87, 4, subOp);
-+
-+      switch (insn->dType) {
-+      case TYPE_U32: dType = 0; break;
-+      case TYPE_S32: dType = 1; break;
-+      case TYPE_U64: dType = 2; break;
-+      default: assert(!"unexpected dType"); dType = 0; break;
-+      }
-+
-+      emitField(73, 2, dType);
-+   }
-+
-+   emitGPR  (32, insn->src(1));
-+   emitADDR (24, 40, 24, 0, insn->src(0));
-+   emitGPR  (16, insn->def(0));
-+}
-+
-+void
-+CodeEmitterGV100::emitIPA()
-+{
-+   emitInsn (0x326);
-+   emitPRED (81, insn->defExists(1) ? insn->def(1) : NULL);
-+
-+   switch (insn->getInterpMode()) {
-+   case NV50_IR_INTERP_LINEAR     :
-+   case NV50_IR_INTERP_PERSPECTIVE: emitField(78, 2, 0); break;
-+   case NV50_IR_INTERP_FLAT       : emitField(78, 2, 1); break;
-+   case NV50_IR_INTERP_SC         : emitField(78, 2, 2); break;
-+   default:
-+      assert(!"invalid ipa mode");
-+      break;
-+   }
-+
-+   if (insn->getSampleMode() != NV50_IR_INTERP_OFFSET) {
-+      switch (insn->getSampleMode()) {
-+      case NV50_IR_INTERP_DEFAULT : emitField(76, 2, 0); break;
-+      case NV50_IR_INTERP_CENTROID: emitField(76, 2, 1); break;
-+      default:
-+         break;
-+      }
-+      emitGPR  (32);
-+   } else {
-+      emitField(76, 2, 2);
-+      emitGPR  (32, insn->src(1));
-+   }
-+
-+   assert(!insn->src(0).isIndirect(0));
-+   emitADDR (-1, 64, 8, 2, insn->src(0));
-+   emitGPR  (16, insn->def(0));
-+}
-+
-+void
-+CodeEmitterGV100::emitISBERD()
-+{
-+   emitInsn(0x923);
-+   emitGPR (24, insn->src(0));
-+   emitGPR (16, insn->def(0));
-+}
-+
-+void
-+CodeEmitterGV100::emitLDSTc(int pos)
-+{
-+   int mode = 0;
-+
-+   switch (insn->cache) {
-+   case CACHE_CA: mode = 0; break;
-+   case CACHE_CG: mode = 1; break;
-+   case CACHE_CS: mode = 2; break;
-+   case CACHE_CV: mode = 3; break;
-+   default:
-+      assert(!"invalid caching mode");
-+      break;
-+   }
-+
-+   emitField(pos, 2, mode);
-+}
-+
-+void
-+CodeEmitterGV100::emitLDSTs(int pos, DataType type)
-+{
-+   int data = 0;
-+
-+   switch (typeSizeof(type)) {
-+   case  1: data = isSignedType(type) ? 1 : 0; break;
-+   case  2: data = isSignedType(type) ? 3 : 2; break;
-+   case  4: data = 4; break;
-+   case  8: data = 5; break;
-+   case 16: data = 6; break;
-+   default:
-+      assert(!"bad type");
-+      break;
-+   }
-+
-+   emitField(pos, 3, data);
-+}
-+
-+void
-+CodeEmitterGV100::emitLD()
-+{
-+   emitInsn (0x980);
-+   emitField(79, 2, 2); // .CONSTANT/./.STRONG/.MMIO
-+   emitField(77, 2, 2); // .CTA/.SM/.GPU/.SYS
-+   emitLDSTs(73, insn->dType);
-+   emitField(72, 1, insn->src(0).getIndirect(0)->getSize() == 8);
-+   emitADDR (24, 32, 32, 0, insn->src(0));
-+   emitGPR  (16, insn->def(0));
-+}
-+
-+void
-+CodeEmitterGV100::emitLDC()
-+{
-+   emitFormA(0x182, FA_RCR, EMPTY, __(0), EMPTY);
-+   emitField(78, 2, insn->subOp);
-+   emitLDSTs(73, insn->dType);
-+   emitGPR  (24, insn->src(0).getIndirect(0));
-+}
-+
-+void
-+CodeEmitterGV100::emitLDL()
-+{
-+   emitInsn (0x983);
-+   emitField(84, 3, 1); // .EF/./.EL/.LU/.EU/.NA/.INVALID6/.INVALID7
-+   emitLDSTs(73, insn->dType);
-+   emitADDR (24, 40, 24, 0, insn->src(0));
-+   emitGPR  (16, insn->def(0));
-+}
-+
-+void
-+CodeEmitterGV100::emitLDS()
-+{
-+   emitInsn (0x984);
-+   emitLDSTs(73, insn->dType);
-+   emitADDR (24, 40, 24, 0, insn->src(0));
-+   emitGPR  (16, insn->def(0));
-+}
-+
-+void
-+CodeEmitterGV100::emitOUT()
-+{
-+   const int cut  = insn->op == OP_RESTART || insn->subOp;
-+   const int emit = insn->op == OP_EMIT;
-+
-+   if (insn->op != OP_FINAL)
-+      emitFormA(0x124, FA_RRR | FA_RIR, __(0), __(1), EMPTY);
-+   else
-+      emitFormA(0x124, FA_RRR | FA_RIR, __(0), EMPTY, EMPTY);
-+   emitField(78, 2, (cut << 1) | emit);
-+}
-+
-+void
-+CodeEmitterGV100::emitRED()
-+{
-+   unsigned dType;
-+
-+   switch (insn->dType) {
-+   case TYPE_U32: dType = 0; break;
-+   case TYPE_S32: dType = 1; break;
-+   case TYPE_U64: dType = 2; break;
-+   case TYPE_F32: dType = 3; break;
-+   case TYPE_B128: dType = 4; break;
-+   case TYPE_S64: dType = 5; break;
-+   default: assert(!"unexpected dType"); dType = 0; break;
-+   }
-+
-+   emitInsn (0x98e);
-+   emitField(87, 3, insn->subOp);
-+   emitField(84, 3, 1); // 0=.EF, 1=, 2=.EL, 3=.LU, 4=.EU, 5=.NA
-+   emitField(79, 2, 2); // .INVALID0/./.STRONG/.INVALID3
-+   emitField(77, 2, 2); // .CTA/.SM/.GPU/.SYS
-+   emitField(73, 3, dType);
-+   emitField(72, 1, insn->src(0).getIndirect(0)->getSize() == 8);
-+   emitGPR  (32, insn->src(1));
-+   emitADDR (24, 40, 24, 0, insn->src(0));
-+}
-+
-+void
-+CodeEmitterGV100::emitST()
-+{
-+   emitInsn (0x385);
-+   emitField(79, 2, 2); // .INVALID0/./.STRONG/.MMIO
-+   emitField(77, 2, 2); // .CTA/.SM/.GPU/.SYS
-+   emitLDSTs(73, insn->dType);
-+   emitField(72, 1, insn->src(0).getIndirect(0)->getSize() == 8);
-+   emitGPR  (64, insn->src(1));
-+   emitADDR (24, 32, 32, 0, insn->src(0));
-+}
-+
-+void
-+CodeEmitterGV100::emitSTL()
-+{
-+   emitInsn (0x387);
-+   emitField(84, 3, 1); // .EF/./.EL/.LU/.EU/.NA/.INVALID6/.INVALID7
-+   emitLDSTs(73, insn->dType);
-+   emitADDR (24, 40, 24, 0, insn->src(0));
-+   emitGPR  (32, insn->src(1));
-+}
-+
-+void
-+CodeEmitterGV100::emitSTS()
-+{
-+   emitInsn (0x388);
-+   emitLDSTs(73, insn->dType);
-+   emitADDR (24, 40, 24, 0, insn->src(0));
-+   emitGPR  (32, insn->src(1));
-+}
-+
-+/*******************************************************************************
-+ * texture
-+ ******************************************************************************/
-+
-+void
-+CodeEmitterGV100::emitTEXs(int pos)
-+{
-+   int src1 = insn->predSrc == 1 ? 2 : 1;
-+   if (insn->srcExists(src1))
-+      emitGPR(pos, insn->src(src1));
-+   else
-+      emitGPR(pos);
-+}
-+
-+void
-+CodeEmitterGV100::emitTEX()
-+{
-+   const TexInstruction *insn = this->insn->asTex();
-+   int lodm = 0;
-+
-+   if (!insn->tex.levelZero) {
-+      switch (insn->op) {
-+      case OP_TEX: lodm = 0; break;
-+      case OP_TXB: lodm = 2; break;
-+      case OP_TXL: lodm = 3; break;
-+      default:
-+         assert(!"invalid tex op");
-+         break;
-+      }
-+   } else {
-+      lodm = 1;
-+   }
-+
-+   if (insn->tex.rIndirectSrc < 0) {
-+      emitInsn (0xb60);
-+      emitField(54, 5, prog->driver->io.auxCBSlot);
-+      emitField(40, 14, insn->tex.r);
-+   } else {
-+      emitInsn (0x361);
-+      emitField(59, 1, 1); // .B
-+   }
-+   emitField(90, 1, insn->tex.liveOnly); // .NODEP
-+   emitField(87, 3, lodm);
-+   emitField(84, 3, 1); // 0=.EF, 1=, 2=.EL, 3=.LU, 4=.EU, 5=.NA
-+   emitField(78, 1, insn->tex.target.isShadow()); // .DC
-+   emitField(77, 1, insn->tex.derivAll); // .NDV
-+   emitField(76, 1, insn->tex.useOffsets == 1); // .AOFFI
-+   emitPRED (81);
-+   emitGPR  (64, insn->def(1));
-+   emitGPR  (16, insn->def(0));
-+   emitGPR  (24, insn->src(0));
-+   emitTEXs (32);
-+   emitField(63, 1, insn->tex.target.isArray());
-+   emitField(61, 2, insn->tex.target.isCube() ? 3 :
-+                    insn->tex.target.getDim() - 1);
-+   emitField(72, 4, insn->tex.mask);
-+}
-+
-+void
-+CodeEmitterGV100::emitTLD()
-+{
-+   const TexInstruction *insn = this->insn->asTex();
-+
-+   if (insn->tex.rIndirectSrc < 0) {
-+      emitInsn (0xb66);
-+      emitField(54, 5, prog->driver->io.auxCBSlot);
-+      emitField(40, 14, insn->tex.r);
-+   } else {
-+      emitInsn (0x367);
-+      emitField(59, 1, 1); // .B
-+   }
-+   emitField(90, 1, insn->tex.liveOnly);
-+   emitField(87, 3, insn->tex.levelZero ? 1 /* .LZ */ : 3 /* .LL */);
-+   emitPRED (81);
-+   emitField(78, 1, insn->tex.target.isMS());
-+   emitField(76, 1, insn->tex.useOffsets == 1);
-+   emitField(72, 4, insn->tex.mask);
-+   emitGPR  (64, insn->def(1));
-+   emitField(63, 1, insn->tex.target.isArray());
-+   emitField(61, 2, insn->tex.target.isCube() ? 3 :
-+                    insn->tex.target.getDim() - 1);
-+   emitTEXs (32);
-+   emitGPR  (24, insn->src(0));
-+   emitGPR  (16, insn->def(0));
-+}
-+
-+void
-+CodeEmitterGV100::emitTLD4()
-+{
-+   const TexInstruction *insn = this->insn->asTex();
-+
-+   if (insn->tex.rIndirectSrc < 0) {
-+      emitInsn (0xb63);
-+      emitField(54, 5, prog->driver->io.auxCBSlot);
-+      emitField(40, 14, insn->tex.r);
-+   } else {
-+      emitInsn (0x364);
-+      emitField(59, 1, 1); // .B
-+   }
-+   emitField(90, 1, insn->tex.liveOnly);
-+   emitField(87, 2, insn->tex.gatherComp);
-+   emitField(84, 1, 1); // !.EF
-+   emitPRED (81);
-+   emitField(78, 1, insn->tex.target.isShadow());
-+   emitField(77, 2, insn->tex.useOffsets == 4);
-+   emitField(76, 2, insn->tex.useOffsets == 1);
-+   emitField(72, 4, insn->tex.mask);
-+   emitGPR  (64, insn->def(1));
-+   emitField(63, 1, insn->tex.target.isArray());
-+   emitField(61, 2, insn->tex.target.isCube() ? 3 :
-+                    insn->tex.target.getDim() - 1);
-+   emitTEXs (32);
-+   emitGPR  (24, insn->src(0));
-+   emitGPR  (16, insn->def(0));
-+}
-+
-+void
-+CodeEmitterGV100::emitTMML()
-+{
-+   const TexInstruction *insn = this->insn->asTex();
-+
-+   if (insn->tex.rIndirectSrc < 0) {
-+      emitInsn (0xb69);
-+      emitField(54, 5, prog->driver->io.auxCBSlot);
-+      emitField(40, 14, insn->tex.r);
-+   } else {
-+      emitInsn (0x36a);
-+      emitField(59, 1, 1); // .B
-+   }
-+   emitField(90, 1, insn->tex.liveOnly);
-+   emitField(77, 1, insn->tex.derivAll);
-+   emitField(72, 4, insn->tex.mask);
-+   emitGPR  (64, insn->def(1));
-+   emitField(63, 1, insn->tex.target.isArray());
-+   emitField(61, 2, insn->tex.target.isCube() ? 3 :
-+                    insn->tex.target.getDim() - 1);
-+   emitTEXs (32);
-+   emitGPR  (24, insn->src(0));
-+   emitGPR  (16, insn->def(0));
-+}
-+
-+void
-+CodeEmitterGV100::emitTXD()
-+{
-+   const TexInstruction *insn = this->insn->asTex();
-+
-+   if (insn->tex.rIndirectSrc < 0) {
-+      emitInsn (0xb6c);
-+      emitField(54, 5, prog->driver->io.auxCBSlot);
-+      emitField(40, 14, insn->tex.r);
-+   } else {
-+      emitInsn (0x36d);
-+      emitField(59, 1, 1); // .B
-+   }
-+   emitField(90, 1, insn->tex.liveOnly);
-+   emitPRED (81);
-+   emitField(76, 1, insn->tex.useOffsets == 1);
-+   emitField(72, 4, insn->tex.mask);
-+   emitGPR  (64, insn->def(1));
-+   emitField(63, 1, insn->tex.target.isArray());
-+   emitField(61, 2, insn->tex.target.isCube() ? 3 :
-+                    insn->tex.target.getDim() - 1);
-+   emitTEXs (32);
-+   emitGPR  (24, insn->src(0));
-+   emitGPR  (16, insn->def(0));
-+}
-+
-+void
-+CodeEmitterGV100::emitTXQ()
-+{
-+   const TexInstruction *insn = this->insn->asTex();
-+   int type = 0;
-+
-+   switch (insn->tex.query) {
-+   case TXQ_DIMS           : type = 0x00; break;
-+   case TXQ_TYPE           : type = 0x01; break;
-+   case TXQ_SAMPLE_POSITION: type = 0x02; break;
-+   default:
-+      assert(!"invalid txq query");
-+      break;
-+   }
-+
-+   if (insn->tex.rIndirectSrc < 0) {
-+      emitInsn (0xb6f);
-+      emitField(54, 5, prog->driver->io.auxCBSlot);
-+      emitField(40, 14, insn->tex.r);
-+   } else {
-+      emitInsn (0x370);
-+      emitField(59, 1, 1); // .B
-+   }
-+   emitField(90, 1, insn->tex.liveOnly);
-+   emitField(72, 4, insn->tex.mask);
-+   emitGPR  (64, insn->def(1));
-+   emitField(62, 2, type);
-+   emitGPR  (24, insn->src(0));
-+   emitGPR  (16, insn->def(0));
-+}
-+
-+/*******************************************************************************
-+ * surface
-+ ******************************************************************************/
-+
-+void
-+CodeEmitterGV100::emitSUHandle(const int s)
-+{
-+   const TexInstruction *insn = this->insn->asTex();
-+
-+   assert(insn->op >= OP_SULDB && insn->op <= OP_SUREDP);
-+
-+   if (insn->src(s).getFile() == FILE_GPR) {
-+      emitGPR(64, insn->src(s));
-+   } else {
-+      assert(0);
-+      //XXX: not done
-+      ImmediateValue *imm = insn->getSrc(s)->asImm();
-+      assert(imm);
-+      emitField(0x33, 1, 1);
-+      emitField(0x24, 13, imm->reg.data.u32);
-+   }
-+}
-+
-+void
-+CodeEmitterGV100::emitSUTarget()
-+{
-+   const TexInstruction *insn = this->insn->asTex();
-+   int target = 0;
-+
-+   assert(insn->op >= OP_SULDB && insn->op <= OP_SUREDP);
-+
-+   if (insn->tex.target == TEX_TARGET_BUFFER) {
-+      target = 1;
-+   } else if (insn->tex.target == TEX_TARGET_1D_ARRAY) {
-+      target = 2;
-+   } else if (insn->tex.target == TEX_TARGET_2D ||
-+              insn->tex.target == TEX_TARGET_RECT) {
-+      target = 3;
-+   } else if (insn->tex.target == TEX_TARGET_2D_ARRAY ||
-+              insn->tex.target == TEX_TARGET_CUBE ||
-+              insn->tex.target == TEX_TARGET_CUBE_ARRAY) {
-+      target = 4;
-+   } else if (insn->tex.target == TEX_TARGET_3D) {
-+      target = 5;
-+   } else {
-+      assert(insn->tex.target == TEX_TARGET_1D);
-+   }
-+   emitField(61, 3, target);
-+}
-+
-+void
-+CodeEmitterGV100::emitSUATOM()
-+{
-+   const TexInstruction *insn = this->insn->asTex();
-+   uint8_t type = 0, subOp;
-+
-+   if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS)
-+      emitInsn(0x396);   // SUATOM.D.CAS
-+   else
-+      emitInsn(0x394);   // SUATOM.D
-+
-+   emitSUTarget();
-+
-+   // destination type
-+   switch (insn->dType) {
-+   case TYPE_S32: type = 1; break;
-+   case TYPE_U64: type = 2; break;
-+   case TYPE_F32: type = 3; break;
-+   case TYPE_S64: type = 5; break;
-+   default:
-+      assert(insn->dType == TYPE_U32);
-+      break;
-+   }
-+
-+   // atomic operation
-+   if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS) {
-+      subOp = 0;
-+   } else if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH) {
-+      subOp = 8;
-+   } else {
-+      subOp = insn->subOp;
-+   }
-+
-+   emitField(87, 4, subOp);
-+   emitPRED (81);
-+   emitField(79, 2, 1);
-+   emitField(73, 3, type);
-+   emitField(72, 1, 0); // .BA
-+   emitGPR  (32, insn->src(1));
-+   emitGPR  (24, insn->src(0));
-+   emitGPR  (16, insn->def(0));
-+
-+   emitSUHandle(2);
-+}
-+
-+void
-+CodeEmitterGV100::emitSULD()
-+{
-+   const TexInstruction *insn = this->insn->asTex();
-+   int type = 0;
-+
-+   if (insn->op == OP_SULDB) {
-+      emitInsn(0x99a);
-+      emitSUTarget();
-+
-+      switch (insn->dType) {
-+      case TYPE_U8:   type = 0; break;
-+      case TYPE_S8:   type = 1; break;
-+      case TYPE_U16:  type = 2; break;
-+      case TYPE_S16:  type = 3; break;
-+      case TYPE_U32:  type = 4; break;
-+      case TYPE_U64:  type = 5; break;
-+      case TYPE_B128: type = 6; break;
-+      default:
-+         assert(0);
-+         break;
-+      }
-+   //   emitLDSTc(0x18);
-+      emitField(73, 3, type);
-+   } else {
-+      emitInsn(0x998);
-+      emitSUTarget();
-+      emitField(72, 4, 0xf); // rgba
-+   }
-+
-+   emitPRED (81);
-+   emitField(79, 2, 1);
-+
-+   emitGPR  (16, insn->def(0));
-+   emitGPR  (24, insn->src(0));
-+
-+   emitSUHandle(1);
-+}
-+
-+void
-+CodeEmitterGV100::emitSUST()
-+{
-+   const TexInstruction *insn = this->insn->asTex();
-+
-+   emitInsn(0x99c); // SUST.P
-+#if 0
-+   if (insn->op == OP_SUSTB)
-+      emitField(0x34, 1, 1);
-+#endif
-+   emitSUTarget();
-+
-+
-+#if 0
-+   emitLDSTc(0x18);
-+#endif
-+
-+   emitField(79, 2, 1);
-+   emitField(72, 4, 0xf); // rgba
-+   emitGPR(32, insn->src(1));
-+   emitGPR(24, insn->src(0));
-+   emitSUHandle(2);
-+}
-+
-+/*******************************************************************************
-+ * misc
-+ ******************************************************************************/
-+
-+void
-+CodeEmitterGV100::emitAL2P()
-+{
-+   emitInsn (0x920);
-+   emitO    (79);
-+   emitField(74, 2, (insn->getDef(0)->reg.size / 4) - 1);
-+   emitField(40, 11, insn->src(0).get()->reg.data.offset);
-+   emitGPR  (24, insn->src(0).getIndirect(0));
-+   emitGPR  (16, insn->def(0));
-+}
-+
-+void
-+CodeEmitterGV100::emitBAR()
-+{
-+   uint8_t subop, redop = 0x00;
-+
-+   // 80
-+   //    01: DEFER_BLOCKING
-+   // 78:77
-+   //    00: SYNC
-+   //    01: ARV
-+   //    02: RED
-+   //    03: SCAN
-+   // 75:74
-+   //    00: RED.POPC
-+   //    01: RED.AND
-+   //    02: RED.OR
-+
-+   switch (insn->subOp) {
-+   case NV50_IR_SUBOP_BAR_RED_POPC: subop = 0x02; redop = 0x00; break;
-+   case NV50_IR_SUBOP_BAR_RED_AND : subop = 0x02; redop = 0x01; break;
-+   case NV50_IR_SUBOP_BAR_RED_OR  : subop = 0x02; redop = 0x02; break;
-+   case NV50_IR_SUBOP_BAR_ARRIVE  : subop = 0x01; break;
-+   default:
-+      subop = 0x00;
-+      assert(insn->subOp == NV50_IR_SUBOP_BAR_SYNC);
-+      break;
-+   }
-+
-+   if (insn->src(0).getFile() == FILE_GPR) {
-+      emitInsn ((1 << 9) | 0x11d);
-+      emitGPR  (32, insn->src(0)); //XXX: nvdisasm shows src0==src1
-+   } else {
-+      ImmediateValue *imm = insn->getSrc(0)->asImm();
-+      assert(imm);
-+      if (insn->src(1).getFile() == FILE_GPR) {
-+         emitInsn ((4 << 9) | 0x11d);
-+         emitGPR  (32, insn->src(1));
-+      } else {
-+         emitInsn ((5 << 9) | 0x11d);
-+      }
-+      emitField(54, 4, imm->reg.data.u32);
-+   }
-+
-+   emitField(77, 2, subop);
-+   emitField(74, 2, redop);
-+
-+   if (insn->srcExists(2) && (insn->predSrc != 2)) {
-+      emitField(90, 1, insn->src(2).mod == Modifier(NV50_IR_MOD_NOT));
-+      emitPRED (87, insn->src(2));
-+   } else {
-+      emitField(87, 3, 7);
-+   }
-+}
-+
-+void
-+CodeEmitterGV100::emitCCTL()
-+{
-+   if (insn->src(0).getFile() == FILE_MEMORY_GLOBAL)
-+      emitInsn(0x98f);
-+   else
-+      emitInsn(0x990);
-+   emitField(87, 4, insn->subOp);
-+   emitField(72, 1, insn->src(0).getIndirect(0)->getSize() == 8);
-+   emitADDR (24, 32, 32, 0, insn->src(0));
-+}
-+
-+void
-+CodeEmitterGV100::emitMEMBAR()
-+{
-+   emitInsn (0x992);
-+   switch (NV50_IR_SUBOP_MEMBAR_SCOPE(insn->subOp)) {
-+   case NV50_IR_SUBOP_MEMBAR_CTA: emitField(76, 3, 0); break;
-+   case NV50_IR_SUBOP_MEMBAR_GL : emitField(76, 3, 2); break;
-+   case NV50_IR_SUBOP_MEMBAR_SYS: emitField(76, 3, 3); break;
-+   default:
-+      assert(!"invalid scope");
-+      break;
-+   }
-+}
-+
-+void
-+CodeEmitterGV100::emitPIXLD()
-+{
-+   emitInsn (0x925);
-+   switch (insn->subOp) {
-+   case NV50_IR_SUBOP_PIXLD_COVMASK : emitField(78, 3, 1); break; // .COVMASK
-+   case NV50_IR_SUBOP_PIXLD_SAMPLEID: emitField(78, 3, 3); break; // .MY_INDEX
-+   default:
-+      assert(0);
-+      break;
-+   }
-+   emitPRED (71);
-+   emitGPR  (16, insn->def(0));
-+}
-+
-+void
-+CodeEmitterGV100::emitPLOP3_LUT()
-+{
-+   uint8_t op[2] = {};
-+
-+   switch (insn->op) {
-+   case OP_AND: op[0] = 0xf0 & 0xcc; break;
-+   case OP_OR : op[0] = 0xf0 | 0xcc; break;
-+   case OP_XOR: op[0] = 0xf0 ^ 0xcc; break;
-+   default:
-+      assert(!"invalid PLOP3");
-+      break;
-+   }
-+
-+   emitInsn(0x81c);
-+   emitNOT (90, insn->src(0));
-+   emitPRED(87, insn->src(0));
-+   emitPRED(84); // def(1)
-+   emitPRED(81, insn->def(0));
-+   emitNOT (80, insn->src(1));
-+   emitPRED(77, insn->src(1));
-+   emitField(72, 5, op[0] >> 3);
-+   emitNOT (71); // src(2)
-+   emitPRED(68); // src(2)
-+   emitField(64, 3, op[0] & 7);
-+   emitField(16, 8, op[1]);
-+}
-+
-+void
-+CodeEmitterGV100::emitVOTE()
-+{
-+   const ImmediateValue *imm;
-+   uint32_t u32;
-+
-+   int r = -1, p = -1;
-+   for (int i = 0; insn->defExists(i); i++) {
-+      if (insn->def(i).getFile() == FILE_GPR)
-+         r = i;
-+      else if (insn->def(i).getFile() == FILE_PREDICATE)
-+         p = i;
-+   }
-+
-+   emitInsn (0x806);
-+   emitField(72, 2, insn->subOp);
-+   if (r >= 0)
-+      emitGPR  (16, insn->def(r));
-+   else
-+      emitGPR  (16);
-+   if (p >= 0)
-+      emitPRED (81, insn->def(p));
-+   else
-+      emitPRED (81);
-+
-+   switch (insn->src(0).getFile()) {
-+   case FILE_PREDICATE:
-+      emitField(90, 1, insn->src(0).mod == Modifier(NV50_IR_MOD_NOT));
-+      emitPRED (87, insn->src(0));
-+      break;
-+   case FILE_IMMEDIATE:
-+      imm = insn->getSrc(0)->asImm();
-+      assert(imm);
-+      u32 = imm->reg.data.u32;
-+      assert(u32 == 0 || u32 == 1);
-+      emitField(90, 1, u32 == 0);
-+      emitPRED (87);
-+      break;
-+   default:
-+      assert(!"Unhandled src");
-+      break;
-+   }
-+}
-+
-+bool
-+CodeEmitterGV100::emitInstruction(Instruction *i)
-+{
-+   insn = i;
-+
-+   switch (insn->op) {
-+   case OP_ABS:
-+      assert(!isFloatType(insn->dType));
-+      emitIABS();
-+      break;
-+   case OP_ADD:
-+      if (isFloatType(insn->dType)) {
-+         if (insn->dType == TYPE_F32)
-+            emitFADD();
-+         else
-+            emitDADD();
-+      } else {
-+         emitIADD3();
-+      }
-+      break;
-+   case OP_AFETCH:
-+      emitAL2P();
-+      break;
-+   case OP_AND:
-+   case OP_OR:
-+   case OP_XOR:
-+      if (insn->def(0).getFile() == FILE_PREDICATE) {
-+         emitPLOP3_LUT();
-+      } else {
-+         assert(!"invalid logop");
-+         emitNOP();
-+      }
-+      break;
-+   case OP_ATOM:
-+      if (insn->src(0).getFile() == FILE_MEMORY_SHARED)
-+         emitATOMS();
-+      else
-+         if (!insn->defExists(0) && insn->subOp < NV50_IR_SUBOP_ATOM_CAS)
-+            emitRED();
-+         else
-+            emitATOM();
-+      break;
-+   case OP_BAR:
-+      emitBAR();
-+      break;
-+   case OP_BFIND:
-+      emitFLO();
-+      break;
-+   case OP_BMSK:
-+      emitBMSK();
-+      break;
-+   case OP_BREV:
-+      emitBREV();
-+      break;
-+   case OP_BRA:
-+   case OP_JOIN: //XXX
-+      emitBRA();
-+      break;
-+   case OP_CCTL:
-+      emitCCTL();
-+      break;
-+   case OP_CEIL:
-+   case OP_CVT:
-+   case OP_FLOOR:
-+   case OP_TRUNC:
-+      if (insn->op == OP_CVT && (insn->def(0).getFile() == FILE_PREDICATE ||
-+                                 insn->src(0).getFile() == FILE_PREDICATE)) {
-+         emitMOV();
-+      } else if (isFloatType(insn->dType)) {
-+         if (isFloatType(insn->sType)) {
-+            if (insn->sType == insn->dType)
-+               emitFRND();
-+            else
-+               emitF2F();
-+         } else {
-+            emitI2F();
-+         }
-+      } else {
-+         if (isFloatType(insn->sType)) {
-+            emitF2I();
-+         } else {
-+            assert(!"I2I");
-+            emitNOP();
-+         }
-+      }
-+      break;
-+   case OP_COS:
-+   case OP_EX2:
-+   case OP_LG2:
-+   case OP_RCP:
-+   case OP_RSQ:
-+   case OP_SIN:
-+   case OP_SQRT:
-+      emitMUFU();
-+      break;
-+   case OP_DISCARD:
-+      emitKILL();
-+      break;
-+   case OP_EMIT:
-+   case OP_FINAL:
-+   case OP_RESTART:
-+      emitOUT();
-+      break;
-+   case OP_EXIT:
-+      emitEXIT();
-+      break;
-+   case OP_EXPORT:
-+      emitAST();
-+      break;
-+   case OP_FMA:
-+   case OP_MAD:
-+      if (isFloatType(insn->dType)) {
-+         if (insn->dType == TYPE_F32)
-+            emitFFMA();
-+         else
-+            emitDFMA();
-+      } else {
-+         if (typeSizeof(insn->dType) != 8)
-+            emitIMAD();
-+         else
-+            emitIMAD_WIDE();
-+      }
-+      break;
-+   case OP_JOINAT: //XXX
-+      emitNOP();
-+      break;
-+   case OP_LINTERP:
-+      emitIPA();
-+      break;
-+   case OP_LOAD:
-+      switch (insn->src(0).getFile()) {
-+      case FILE_MEMORY_CONST : emitLDC(); break;
-+      case FILE_MEMORY_LOCAL : emitLDL(); break;
-+      case FILE_MEMORY_SHARED: emitLDS(); break;
-+      case FILE_MEMORY_GLOBAL: emitLD(); break;
-+      default:
-+         assert(!"invalid load");
-+         emitNOP();
-+         break;
-+      }
-+      break;
-+   case OP_LOP3_LUT:
-+      emitLOP3_LUT();
-+      break;
-+   case OP_MAX:
-+   case OP_MIN:
-+      if (isFloatType(insn->dType)) {
-+         if (insn->dType == TYPE_F32) {
-+            emitFMNMX();
-+         } else {
-+            assert(!"invalid FMNMX");
-+            emitNOP();
-+         }
-+      } else {
-+         assert(!"invalid MNMX");
-+         emitNOP();
-+      }
-+      break;
-+   case OP_MEMBAR:
-+      emitMEMBAR();
-+      break;
-+   case OP_MOV:
-+      emitMOV();
-+      break;
-+   case OP_MUL:
-+      if (isFloatType(insn->dType)) {
-+         if (insn->dType == TYPE_F32)
-+            emitFMUL();
-+         else
-+            emitDMUL();
-+      } else {
-+         assert(!"invalid IMUL");
-+         emitNOP();
-+      }
-+      break;
-+   case OP_PERMT:
-+      emitPRMT();
-+      break;
-+   case OP_PFETCH:
-+      emitISBERD();
-+      break;
-+   case OP_PIXLD:
-+      emitPIXLD();
-+      break;
-+   case OP_POPCNT:
-+      emitPOPC();
-+      break;
-+   case OP_QUADOP:
-+      emitFSWZADD();
-+      break;
-+   case OP_RDSV:
-+      if (targ->isCS2RSV(insn->getSrc(0)->reg.data.sv.sv))
-+         emitCS2R();
-+      else
-+         emitS2R();
-+      break;
-+   case OP_SELP:
-+      emitSEL();
-+      break;
-+   case OP_SET:
-+   case OP_SET_AND:
-+   case OP_SET_OR:
-+   case OP_SET_XOR:
-+      if (insn->def(0).getFile() != FILE_PREDICATE) {
-+         if (isFloatType(insn->dType)) {
-+            if (insn->dType == TYPE_F32) {
-+               emitFSET_BF();
-+            } else {
-+               assert(!"invalid FSET");
-+               emitNOP();
-+            }
-+         } else {
-+            assert(!"invalid SET");
-+            emitNOP();
-+         }
-+      } else {
-+         if (isFloatType(insn->sType))
-+            if (insn->sType == TYPE_F64)
-+               emitDSETP();
-+            else
-+               emitFSETP();
-+         else
-+            emitISETP();
-+      }
-+      break;
-+   case OP_SGXT:
-+      emitSGXT();
-+      break;
-+   case OP_SHF:
-+      emitSHF();
-+      break;
-+   case OP_SHFL:
-+      emitSHFL();
-+      break;
-+   case OP_SHLADD:
-+      emitLEA();
-+      break;
-+   case OP_STORE:
-+      switch (insn->src(0).getFile()) {
-+      case FILE_MEMORY_LOCAL : emitSTL(); break;
-+      case FILE_MEMORY_SHARED: emitSTS(); break;
-+      case FILE_MEMORY_GLOBAL: emitST(); break;
-+      default:
-+         assert(!"invalid store");
-+         emitNOP();
-+         break;
-+      }
-+      break;
-+   case OP_SULDB:
-+   case OP_SULDP:
-+      emitSULD();
-+      break;
-+   case OP_SUREDB:
-+   case OP_SUREDP:
-+      emitSUATOM();
-+      break;
-+   case OP_SUSTB:
-+   case OP_SUSTP:
-+      emitSUST();
-+      break;
-+   case OP_TEX:
-+   case OP_TXB:
-+   case OP_TXL:
-+      emitTEX();
-+      break;
-+   case OP_TXD:
-+      emitTXD();
-+      break;
-+   case OP_TXF:
-+      emitTLD();
-+      break;
-+   case OP_TXG:
-+      emitTLD4();
-+      break;
-+   case OP_TXLQ:
-+      emitTMML();
-+      break;
-+   case OP_TXQ:
-+      emitTXQ();
-+      break;
-+   case OP_VFETCH:
-+      emitALD();
-+      break;
-+   case OP_VOTE:
-+      emitVOTE();
-+      break;
-+   case OP_WARPSYNC:
-+      emitWARPSYNC();
-+      break;
-+   default:
-+      assert(!"invalid opcode");
-+      emitNOP();
-+      break;
-+   }
-+
-+   code[3] &= 0x000001ff;
-+   code[3] |= insn->sched << 9;
-+   code += 4;
-+   codeSize += 16;
-+   return true;
-+}
-+
-+void
-+CodeEmitterGV100::prepareEmission(BasicBlock *bb)
-+{
-+   Function *func = bb->getFunction();
-+   Instruction *i;
-+   int j;
-+
-+   for (j = func->bbCount - 1; j >= 0 && !func->bbArray[j]->binSize; --j);
-+
-+   for (; j >= 0; --j) {
-+      BasicBlock *in = func->bbArray[j];
-+      Instruction *exit = in->getExit();
-+
-+      if (exit && exit->op == OP_BRA && exit->asFlow()->target.bb == bb) {
-+         in->binSize -= 16;
-+         func->binSize -= 16;
-+
-+         for (++j; j < func->bbCount; ++j)
-+            func->bbArray[j]->binPos -= 16;
-+
-+         in->remove(exit);
-+      }
-+      bb->binPos = in->binPos + in->binSize;
-+      if (in->binSize) // no more no-op branches to bb
-+         break;
-+   }
-+   func->bbArray[func->bbCount++] = bb;
-+
-+   if (!bb->getExit())
-+      return;
-+
-+   for (i = bb->getEntry(); i; i = i->next) {
-+      i->encSize = getMinEncodingSize(i);
-+      bb->binSize += i->encSize;
-+   }
-+
-+   assert(!bb->getEntry() || (bb->getExit() && bb->getExit()->encSize == 16));
-+
-+   func->binSize += bb->binSize;
-+}
-+
-+void
-+CodeEmitterGV100::prepareEmission(Function *func)
-+{
-+   SchedDataCalculatorGM107 sched(targ);
-+   CodeEmitter::prepareEmission(func);
-+   sched.run(func, true, true);
-+}
-+
-+void
-+CodeEmitterGV100::prepareEmission(Program *prog)
-+{
-+   for (ArrayList::Iterator fi = prog->allFuncs.iterator();
-+        !fi.end(); fi.next()) {
-+      Function *func = reinterpret_cast<Function *>(fi.get());
-+      func->binPos = prog->binSize;
-+      prepareEmission(func);
-+      prog->binSize += func->binSize;
-+   }
-+
-+   this->prog = prog;
-+}
-+
-+CodeEmitterGV100::CodeEmitterGV100(TargetGV100 *target)
-+   : CodeEmitter(target), targ(target)
-+{
-+   code = NULL;
-+   codeSize = codeSizeLimit = 0;
-+   relocInfo = NULL;
-+}
-+};
-diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gv100.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gv100.h
-new file mode 100644
-index 00000000000..e97bf6580a1
---- /dev/null
-+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_gv100.h
-@@ -0,0 +1,403 @@
-+/*
-+ * Copyright 2020 Red Hat Inc.
-+ *
-+ * Permission is hereby granted, free of charge, to any person obtaining a
-+ * copy of this software and associated documentation files (the "Software"),
-+ * to deal in the Software without restriction, including without limitation
-+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
-+ * and/or sell copies of the Software, and to permit persons to whom the
-+ * Software is furnished to do so, subject to the following conditions:
-+ *
-+ * The above copyright notice and this permission notice shall be included in
-+ * all copies or substantial portions of the Software.
-+ *
-+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
-+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
-+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-+ * OTHER DEALINGS IN THE SOFTWARE.
-+ */
-+#ifndef __NV50_IR_EMIT_GV100_H__
-+#define __NV50_IR_EMIT_GV100_H__
-+#include "codegen/nv50_ir_target_gv100.h"
-+
-+namespace nv50_ir {
-+
-+class CodeEmitterGV100 : public CodeEmitter {
-+public:
-+   CodeEmitterGV100(TargetGV100 *target);
-+
-+   virtual bool emitInstruction(Instruction *);
-+   virtual uint32_t getMinEncodingSize(const Instruction *) const { return 16; }
-+
-+private:
-+   const Program *prog;
-+   const TargetGV100 *targ;
-+   const Instruction *insn;
-+
-+   virtual void prepareEmission(Program *);
-+   virtual void prepareEmission(Function *);
-+   virtual void prepareEmission(BasicBlock *);
-+
-+   inline void emitInsn(uint32_t op) {
-+      code[0] = op;
-+      code[1] = 0;
-+      code[2] = 0;
-+      code[3] = 0;
-+      if (insn->predSrc >= 0) {
-+         emitField(12, 3, insn->getSrc(insn->predSrc)->rep()->reg.data.id);
-+         emitField(15, 1, insn->cc == CC_NOT_P);
-+      } else {
-+         emitField(12, 3, 7);
-+      }
-+   };
-+
-+   inline void emitField(int b, int s, uint64_t v) {
-+      if (b >= 0) {
-+         uint64_t m = ~0ULL >> (64 - s);
-+         uint64_t d = v & m;
-+         assert(!(v & ~m) || (v & ~m) == ~m);
-+         if (b < 64 && b + s > 64) {
-+            *(uint64_t *)&code[0] |= d << b;
-+            *(uint64_t *)&code[2] |= d >> (64 - b);
-+         } else {
-+            *(uint64_t *)&code[(b/64*2)] |= d << (b & 0x3f);
-+         }
-+      }
-+   };
-+
-+   inline void emitABS(int pos, int src, bool supported)
-+   {
-+      if (insn->src(src).mod.abs()) {
-+         assert(supported);
-+         emitField(pos, 1, 1);
-+      }
-+   }
-+
-+   inline void emitABS(int pos, int src)
-+   {
-+      emitABS(pos, src, true);
-+   }
-+
-+   inline void emitNEG(int pos, int src, bool supported) {
-+      if (insn->src(src).mod.neg()) {
-+         assert(supported);
-+         emitField(pos, 1, 1);
-+      }
-+   }
-+
-+   inline void emitNEG(int pos, int src) {
-+      emitNEG(pos, src, true);
-+   }
-+
-+   inline void emitNOT(int pos) {
-+      emitField(pos, 1, 0);
-+   };
-+
-+   inline void emitNOT(int pos, const ValueRef &ref) {
-+      emitField(pos, 1, !!(ref.mod & Modifier(NV50_IR_MOD_NOT)));
-+   }
-+
-+   inline void emitSAT(int pos) {
-+      emitField(pos, 1, insn->saturate);
-+   }
-+
-+   inline void emitRND(int rmp, RoundMode rnd, int rip) {
-+      int rm = 0, ri = 0;
-+      switch (rnd) {
-+      case ROUND_NI: ri = 1;
-+      case ROUND_N : rm = 0; break;
-+      case ROUND_MI: ri = 1;
-+      case ROUND_M : rm = 1; break;
-+      case ROUND_PI: ri = 1;
-+      case ROUND_P : rm = 2; break;
-+      case ROUND_ZI: ri = 1;
-+      case ROUND_Z : rm = 3; break;
-+      default:
-+         assert(!"invalid round mode");
-+         break;
-+      }
-+      emitField(rip, 1, ri);
-+      emitField(rmp, 2, rm);
-+   }
-+
-+   inline void emitRND(int pos) {
-+      emitRND(pos, insn->rnd, -1);
-+   }
-+
-+   inline void emitFMZ(int pos, int len) {
-+      emitField(pos, len, insn->dnz << 1 | insn->ftz);
-+   }
-+
-+   inline void emitPDIV(int pos) {
-+      emitField(pos, 3, insn->postFactor + 4);
-+   }
-+
-+   inline void emitO(int pos) {
-+      emitField(pos, 1, insn->getSrc(0)->reg.file == FILE_SHADER_OUTPUT);
-+   }
-+
-+   inline void emitP(int pos) {
-+      emitField(pos, 1, insn->perPatch);
-+   }
-+
-+   inline void emitCond3(int pos, CondCode code) {
-+      int data = 0;
-+
-+      switch (code) {
-+      case CC_FL : data = 0x00; break;
-+      case CC_LTU:
-+      case CC_LT : data = 0x01; break;
-+      case CC_EQU:
-+      case CC_EQ : data = 0x02; break;
-+      case CC_LEU:
-+      case CC_LE : data = 0x03; break;
-+      case CC_GTU:
-+      case CC_GT : data = 0x04; break;
-+      case CC_NEU:
-+      case CC_NE : data = 0x05; break;
-+      case CC_GEU:
-+      case CC_GE : data = 0x06; break;
-+      case CC_TR : data = 0x07; break;
-+      default:
-+         assert(!"invalid cond3");
-+         break;
-+      }
-+
-+      emitField(pos, 3, data);
-+   }
-+
-+   inline void emitCond4(int pos, CondCode code) {
-+      int data = 0;
-+
-+      switch (code) {
-+      case CC_FL: data = 0x00; break;
-+      case CC_LT: data = 0x01; break;
-+      case CC_EQ: data = 0x02; break;
-+      case CC_LE: data = 0x03; break;
-+      case CC_GT: data = 0x04; break;
-+      case CC_NE: data = 0x05; break;
-+      case CC_GE: data = 0x06; break;
-+   //   case CC_NUM: data = 0x07; break;
-+   //   case CC_NAN: data = 0x08; break;
-+      case CC_LTU: data = 0x09; break;
-+      case CC_EQU: data = 0x0a; break;
-+      case CC_LEU: data = 0x0b; break;
-+      case CC_GTU: data = 0x0c; break;
-+      case CC_NEU: data = 0x0d; break;
-+      case CC_GEU: data = 0x0e; break;
-+      case CC_TR:  data = 0x0f; break;
-+      default:
-+         assert(!"invalid cond4");
-+         break;
-+      }
-+
-+      emitField(pos, 4, data);
-+   }
-+
-+   inline void emitSYS(int pos, const Value *val) {
-+      int id = val ? val->reg.data.id : -1;
-+
-+      switch (id) {
-+      case SV_LANEID         : id = 0x00; break;
-+      case SV_VERTEX_COUNT   : id = 0x10; break;
-+      case SV_INVOCATION_ID  : id = 0x11; break;
-+      case SV_THREAD_KILL    : id = 0x13; break;
-+      case SV_INVOCATION_INFO: id = 0x1d; break;
-+      case SV_COMBINED_TID   : id = 0x20; break;
-+      case SV_TID            : id = 0x21 + val->reg.data.sv.index; break;
-+      case SV_CTAID          : id = 0x25 + val->reg.data.sv.index; break;
-+      case SV_LANEMASK_EQ    : id = 0x38; break;
-+      case SV_LANEMASK_LT    : id = 0x39; break;
-+      case SV_LANEMASK_LE    : id = 0x3a; break;
-+      case SV_LANEMASK_GT    : id = 0x3b; break;
-+      case SV_LANEMASK_GE    : id = 0x3c; break;
-+      case SV_CLOCK          : id = 0x50 + val->reg.data.sv.index; break;
-+      default:
-+         assert(!"invalid system value");
-+         id = 0;
-+         break;
-+      }
-+
-+      emitField(pos, 8, id);
-+   }
-+
-+   inline void emitSYS(int pos, const ValueRef &ref) {
-+      emitSYS(pos, ref.get() ? ref.rep() : (const Value *)NULL);
-+   }
-+
-+   inline void emitGPR(int pos, const Value *val, int off) {
-+      emitField(pos, 8, val && !val->inFile(FILE_FLAGS) ?
-+                val->reg.data.id + off: 255);
-+   }
-+
-+   inline void emitGPR(int pos, const Value *v) {
-+      emitGPR(pos, v, 0);
-+   }
-+
-+   inline void emitGPR(int pos) {
-+      emitGPR(pos, (const Value *)NULL);
-+   }
-+
-+   inline void emitGPR(int pos, const ValueRef &ref) {
-+      emitGPR(pos, ref.get() ? ref.rep() : (const Value *)NULL);
-+   }
-+
-+   inline void emitGPR(int pos, const ValueRef *ref) {
-+      emitGPR(pos, ref ? ref->rep() : (const Value *)NULL);
-+   }
-+
-+   inline void emitGPR(int pos, const ValueDef &def) {
-+      emitGPR(pos, def.get() ? def.rep() : (const Value *)NULL);
-+   }
-+
-+   inline void emitGPR(int pos, const ValueDef &def, int off) {
-+      emitGPR(pos, def.get() ? def.rep() : (const Value *)NULL, off);
-+   }
-+
-+   inline void emitPRED(int pos, const Value *val) {
-+      emitField(pos, 3, val ? val->reg.data.id : 7);
-+   };
-+
-+   inline void emitPRED(int pos) {
-+      emitPRED(pos, (const Value *)NULL);
-+   }
-+
-+   inline void emitPRED(int pos, const ValueRef &ref) {
-+      emitPRED(pos, ref.get() ? ref.rep() : (const Value *)NULL);
-+   }
-+
-+   inline void emitPRED(int pos, const ValueDef &def) {
-+      emitPRED(pos, def.get() ? def.rep() : (const Value *)NULL);
-+   }
-+
-+   inline void emitCBUF(int buf, int gpr, int off, int len, int align,
-+                        const ValueRef &ref) {
-+      const Value *v = ref.get();
-+      const Symbol *s = v->asSym();
-+
-+      assert(!(s->reg.data.offset & ((1 << align) - 1)));
-+
-+      emitField(buf,  5, v->reg.fileIndex);
-+      if (gpr >= 0)
-+         emitGPR(gpr, ref.getIndirect(0));
-+      emitField(off, 16, s->reg.data.offset);
-+   }
-+
-+   inline void emitIMMD(int pos, int len, const ValueRef &ref) {
-+      const ImmediateValue *imm = ref.get()->asImm();
-+      uint32_t val = imm->reg.data.u32;
-+
-+      if (insn->sType == TYPE_F64) {
-+         assert(!(imm->reg.data.u64 & 0x00000000ffffffffULL));
-+         val = imm->reg.data.u64 >> 32;
-+      }
-+
-+      emitField(pos, len, val);
-+   }
-+
-+   inline void emitADDR(int gpr, int off, int len, int shr,
-+                        const ValueRef &ref) {
-+      const Value *v = ref.get();
-+      assert(!(v->reg.data.offset & ((1 << shr) - 1)));
-+      if (gpr >= 0)
-+         emitGPR(gpr, ref.getIndirect(0));
-+      emitField(off, len, v->reg.data.offset >> shr);
-+   }
-+
-+   inline void emitFormA(uint16_t op, uint8_t forms, int src0, int src1, int src2);
-+   inline void emitFormA_RRR(uint16_t op, int src1, int src2);
-+   inline void emitFormA_RRI(uint16_t op, int src1, int src2);
-+   inline void emitFormA_RRC(uint16_t op, int src1, int src2);
-+   inline void emitFormA_I32(int src);
-+
-+   void emitBRA();
-+   void emitEXIT();
-+   void emitKILL();
-+   void emitNOP();
-+   void emitWARPSYNC();
-+
-+   void emitCS2R();
-+   void emitF2F();
-+   void emitF2I();
-+   void emitFRND();
-+   void emitI2F();
-+   void emitMOV();
-+   void emitPRMT();
-+   void emitS2R();
-+   void emitSEL();
-+   void emitSHFL();
-+
-+   void emitFADD();
-+   void emitFFMA();
-+   void emitFMNMX();
-+   void emitFMUL();
-+   void emitFSET_BF();
-+   void emitFSETP();
-+   void emitFSWZADD();
-+   void emitMUFU();
-+
-+   void emitDADD();
-+   void emitDFMA();
-+   void emitDMUL();
-+   void emitDSETP();
-+
-+   void emitBMSK();
-+   void emitBREV();
-+   void emitFLO();
-+   void emitIABS();
-+   void emitIADD3();
-+   void emitIMAD();
-+   void emitIMAD_WIDE();
-+   void emitISETP();
-+   void emitLEA();
-+   void emitLOP3_LUT();
-+   void emitPOPC();
-+   void emitSGXT();
-+   void emitSHF();
-+
-+   void emitALD();
-+   void emitAST();
-+   void emitATOM();
-+   void emitATOMS();
-+   void emitIPA();
-+   void emitISBERD();
-+   void emitLDSTc(int);
-+   void emitLDSTs(int, DataType);
-+   void emitLD();
-+   void emitLDC();
-+   void emitLDL();
-+   void emitLDS();
-+   void emitOUT();
-+   void emitRED();
-+   void emitST();
-+   void emitSTL();
-+   void emitSTS();
-+
-+   void emitTEXs(int);
-+   void emitTEX();
-+   void emitTLD();
-+   void emitTLD4();
-+   void emitTMML();
-+   void emitTXD();
-+   void emitTXQ();
-+
-+   void emitSUHandle(const int);
-+   void emitSUTarget();
-+   void emitSUATOM();
-+   void emitSULD();
-+   void emitSUST();
-+
-+   void emitAL2P();
-+   void emitBAR();
-+   void emitCCTL();
-+   void emitMEMBAR();
-+   void emitPIXLD();
-+   void emitPLOP3_LUT();
-+   void emitVOTE();
-+};
-+
-+};
-+#endif
-diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
-index bd78b76f384..69f9cfad0d6 100644
---- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
-+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_nir.cpp
-@@ -571,6 +571,10 @@ Converter::getSubOp(nir_op op)
-    case nir_op_imul_high:
-    case nir_op_umul_high:
-       return NV50_IR_SUBOP_MUL_HIGH;
-+   case nir_op_ishl:
-+   case nir_op_ishr:
-+   case nir_op_ushr:
-+      return NV50_IR_SUBOP_SHIFT_WRAP;
-    default:
-       return 0;
-    }
-@@ -1067,7 +1071,11 @@ bool Converter::assignSlots() {
-          case TGSI_SEMANTIC_COLOR:
-             if (!var->data.fb_fetch_output)
-                info->prop.fp.numColourResults++;
--            info->prop.fp.separateFragData = true;
-+
-+            if (var->data.location == FRAG_RESULT_COLOR &&
-+                nir->info.outputs_written & BITFIELD64_BIT(var->data.location))
-+               info->prop.fp.separateFragData = true;
-+
-             // sometimes we get FRAG_RESULT_DATAX with data.index 0
-             // sometimes we get FRAG_RESULT_DATA0 with data.index X
-             index = index == 0 ? var->data.index : index;
-@@ -1617,6 +1625,7 @@ Converter::visit(nir_intrinsic_instr *insn)
- {
-    nir_intrinsic_op op = insn->intrinsic;
-    const nir_intrinsic_info &opInfo = nir_intrinsic_infos[op];
-+   unsigned dest_components = nir_intrinsic_dest_components(insn);
- 
-    switch (op) {
-    case nir_intrinsic_load_uniform: {
-@@ -1624,7 +1633,7 @@ Converter::visit(nir_intrinsic_instr *insn)
-       const DataType dType = getDType(insn);
-       Value *indirect;
-       uint32_t coffset = getIndirect(insn, 0, 0, indirect);
--      for (uint8_t i = 0; i < insn->num_components; ++i) {
-+      for (uint8_t i = 0; i < dest_components; ++i) {
-          loadFrom(FILE_MEMORY_CONST, 0, dType, newDefs[i], 16 * coffset, i, indirect);
-       }
-       break;
-@@ -1635,7 +1644,7 @@ Converter::visit(nir_intrinsic_instr *insn)
-       DataType dType = getSType(insn->src[0], false, false);
-       uint32_t idx = getIndirect(insn, op == nir_intrinsic_store_output ? 1 : 2, 0, indirect);
- 
--      for (uint8_t i = 0u; i < insn->num_components; ++i) {
-+      for (uint8_t i = 0u; i < nir_intrinsic_src_components(insn, 0); ++i) {
-          if (!((1u << i) & nir_intrinsic_write_mask(insn)))
-             continue;
- 
-@@ -1688,7 +1697,7 @@ Converter::visit(nir_intrinsic_instr *insn)
-          srcs.push_back(mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_LAYER, 0)));
-          srcs.push_back(mkOp1v(OP_RDSV, TYPE_U32, getSSA(), mkSysVal(SV_SAMPLE_INDEX, 0)));
- 
--         for (uint8_t i = 0u; i < insn->num_components; ++i) {
-+         for (uint8_t i = 0u; i < dest_components; ++i) {
-             defs.push_back(newDefs[i]);
-             mask |= 1 << i;
-          }
-@@ -1723,7 +1732,7 @@ Converter::visit(nir_intrinsic_instr *insn)
-          }
-       }
- 
--      for (uint8_t i = 0u; i < insn->num_components; ++i) {
-+      for (uint8_t i = 0u; i < dest_components; ++i) {
-          uint32_t address = getSlotAddress(insn, idx, i);
-          Symbol *sym = mkSymbol(input ? FILE_SHADER_INPUT : FILE_SHADER_OUTPUT, 0, dType, address);
-          if (prog->getType() == Program::TYPE_FRAGMENT) {
-@@ -1858,7 +1867,7 @@ Converter::visit(nir_intrinsic_instr *insn)
-       SVSemantic sv = convert(op);
-       LValues &newDefs = convert(&insn->dest);
- 
--      for (uint8_t i = 0u; i < insn->num_components; ++i) {
-+      for (uint8_t i = 0u; i < nir_intrinsic_dest_components(insn); ++i) {
-          Value *def;
-          if (typeSizeof(dType) == 8)
-             def = getSSA();
-@@ -1910,12 +1919,12 @@ Converter::visit(nir_intrinsic_instr *insn)
- 
-       if (op == nir_intrinsic_read_first_invocation) {
-          mkOp1(OP_VOTE, TYPE_U32, tmp, mkImm(1))->subOp = NV50_IR_SUBOP_VOTE_ANY;
--         mkOp2(OP_EXTBF, TYPE_U32, tmp, tmp, mkImm(0x2000))->subOp = NV50_IR_SUBOP_EXTBF_REV;
-+         mkOp1(OP_BREV, TYPE_U32, tmp, tmp);
-          mkOp1(OP_BFIND, TYPE_U32, tmp, tmp)->subOp = NV50_IR_SUBOP_BFIND_SAMT;
-       } else
-          tmp = getSrc(&insn->src[1], 0);
- 
--      for (uint8_t i = 0; i < insn->num_components; ++i) {
-+      for (uint8_t i = 0; i < dest_components; ++i) {
-          mkOp3(OP_SHFL, dType, newDefs[i], getSrc(&insn->src[0], i), tmp, mkImm(0x1f))
-             ->subOp = NV50_IR_SUBOP_SHFL_IDX;
-       }
-@@ -1931,7 +1940,7 @@ Converter::visit(nir_intrinsic_instr *insn)
- 
-       Value *vtxBase = mkOp2v(OP_PFETCH, TYPE_U32, getSSA(4, FILE_ADDRESS),
-                               mkImm(baseVertex), indirectVertex);
--      for (uint8_t i = 0u; i < insn->num_components; ++i) {
-+      for (uint8_t i = 0u; i < dest_components; ++i) {
-          uint32_t address = getSlotAddress(insn, idx, i);
-          loadFrom(FILE_SHADER_INPUT, 0, dType, newDefs[i], address, 0,
-                   indirectOffset, vtxBase, info->in[idx].patch);
-@@ -1954,7 +1963,7 @@ Converter::visit(nir_intrinsic_instr *insn)
- 
-       vtxBase = mkOp2v(OP_ADD, TYPE_U32, getSSA(4, FILE_ADDRESS), outBase, vtxBase);
- 
--      for (uint8_t i = 0u; i < insn->num_components; ++i) {
-+      for (uint8_t i = 0u; i < dest_components; ++i) {
-          uint32_t address = getSlotAddress(insn, idx, i);
-          loadFrom(FILE_SHADER_OUTPUT, 0, dType, newDefs[i], address, 0,
-                   indirectOffset, vtxBase, info->in[idx].patch);
-@@ -1978,7 +1987,7 @@ Converter::visit(nir_intrinsic_instr *insn)
-       uint32_t index = getIndirect(&insn->src[0], 0, indirectIndex) + 1;
-       uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset);
- 
--      for (uint8_t i = 0u; i < insn->num_components; ++i) {
-+      for (uint8_t i = 0u; i < dest_components; ++i) {
-          loadFrom(FILE_MEMORY_CONST, index, dType, newDefs[i], offset, i,
-                   indirectOffset, indirectIndex);
-       }
-@@ -2001,7 +2010,7 @@ Converter::visit(nir_intrinsic_instr *insn)
-       uint32_t buffer = getIndirect(&insn->src[1], 0, indirectBuffer);
-       uint32_t offset = getIndirect(&insn->src[2], 0, indirectOffset);
- 
--      for (uint8_t i = 0u; i < insn->num_components; ++i) {
-+      for (uint8_t i = 0u; i < nir_intrinsic_src_components(insn, 0); ++i) {
-          if (!((1u << i) & nir_intrinsic_write_mask(insn)))
-             continue;
-          Symbol *sym = mkSymbol(FILE_MEMORY_BUFFER, buffer, sType,
-@@ -2020,7 +2029,7 @@ Converter::visit(nir_intrinsic_instr *insn)
-       uint32_t buffer = getIndirect(&insn->src[0], 0, indirectBuffer);
-       uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset);
- 
--      for (uint8_t i = 0u; i < insn->num_components; ++i)
-+      for (uint8_t i = 0u; i < dest_components; ++i)
-          loadFrom(FILE_MEMORY_BUFFER, buffer, dType, newDefs[i], offset, i,
-                   indirectOffset, indirectBuffer);
- 
-@@ -2314,7 +2323,7 @@ Converter::visit(nir_intrinsic_instr *insn)
-       Value *indirectOffset;
-       uint32_t offset = getIndirect(&insn->src[1], 0, indirectOffset);
- 
--      for (uint8_t i = 0u; i < insn->num_components; ++i) {
-+      for (uint8_t i = 0u; i < nir_intrinsic_src_components(insn, 0); ++i) {
-          if (!((1u << i) & nir_intrinsic_write_mask(insn)))
-             continue;
-          Symbol *sym = mkSymbol(FILE_MEMORY_SHARED, 0, sType, offset + i * typeSizeof(sType));
-@@ -2328,7 +2337,7 @@ Converter::visit(nir_intrinsic_instr *insn)
-       Value *indirectOffset;
-       uint32_t offset = getIndirect(&insn->src[0], 0, indirectOffset);
- 
--      for (uint8_t i = 0u; i < insn->num_components; ++i)
-+      for (uint8_t i = 0u; i < dest_components; ++i)
-          loadFrom(FILE_MEMORY_SHARED, 0, dType, newDefs[i], offset, i, indirectOffset);
- 
-       break;
-@@ -2367,7 +2376,7 @@ Converter::visit(nir_intrinsic_instr *insn)
-       Value *indirectOffset;
-       uint32_t offset = getIndirect(&insn->src[0], 0, indirectOffset);
- 
--      for (auto i = 0u; i < insn->num_components; ++i)
-+      for (auto i = 0u; i < dest_components; ++i)
-          loadFrom(FILE_MEMORY_GLOBAL, 0, dType, newDefs[i], offset, i, indirectOffset);
- 
-       info->io.globalAccess |= 0x1;
-@@ -2376,7 +2385,7 @@ Converter::visit(nir_intrinsic_instr *insn)
-    case nir_intrinsic_store_global: {
-       DataType sType = getSType(insn->src[0], false, false);
- 
--      for (auto i = 0u; i < insn->num_components; ++i) {
-+      for (auto i = 0u; i < nir_intrinsic_src_components(insn, 0); ++i) {
-          if (!((1u << i) & nir_intrinsic_write_mask(insn)))
-             continue;
-          if (typeSizeof(sType) == 8) {
-@@ -2774,7 +2783,7 @@ Converter::visit(nir_alu_instr *insn)
-    case nir_op_bfm: {
-       DEFAULT_CHECKS;
-       LValues &newDefs = convert(&insn->dest);
--      mkOp3(OP_INSBF, dType, newDefs[0], getSrc(&insn->src[0]), loadImm(NULL, 0x808), getSrc(&insn->src[1]));
-+      mkOp2(OP_BMSK, dType, newDefs[0], getSrc(&insn->src[1]), getSrc(&insn->src[0]))->subOp = NV50_IR_SUBOP_BMSK_W;
-       break;
-    }
-    case nir_op_bitfield_insert: {
-@@ -2794,17 +2803,69 @@ Converter::visit(nir_alu_instr *insn)
-    case nir_op_bitfield_reverse: {
-       DEFAULT_CHECKS;
-       LValues &newDefs = convert(&insn->dest);
--      mkOp2(OP_EXTBF, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), mkImm(0x2000))->subOp = NV50_IR_SUBOP_EXTBF_REV;
-+      mkOp1(OP_BREV, TYPE_U32, newDefs[0], getSrc(&insn->src[0]));
-       break;
-    }
-    case nir_op_find_lsb: {
-       DEFAULT_CHECKS;
-       LValues &newDefs = convert(&insn->dest);
-       Value *tmp = getSSA();
--      mkOp2(OP_EXTBF, TYPE_U32, tmp, getSrc(&insn->src[0]), mkImm(0x2000))->subOp = NV50_IR_SUBOP_EXTBF_REV;
-+      mkOp1(OP_BREV, TYPE_U32, tmp, getSrc(&insn->src[0]));
-       mkOp1(OP_BFIND, TYPE_U32, newDefs[0], tmp)->subOp = NV50_IR_SUBOP_BFIND_SAMT;
-       break;
-    }
-+   case nir_op_extract_u8: {
-+      DEFAULT_CHECKS;
-+      LValues &newDefs = convert(&insn->dest);
-+      Value *prmt = getSSA();
-+      mkOp2(OP_OR, TYPE_U32, prmt, getSrc(&insn->src[1]), loadImm(NULL, 0x4440));
-+      mkOp3(OP_PERMT, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), prmt, loadImm(NULL, 0));
-+      break;
-+   }
-+   case nir_op_extract_i8: {
-+      DEFAULT_CHECKS;
-+      LValues &newDefs = convert(&insn->dest);
-+      Value *prmt = getSSA();
-+      mkOp3(OP_MAD, TYPE_U32, prmt, getSrc(&insn->src[1]), loadImm(NULL, 0x1111), loadImm(NULL, 0x8880));
-+      mkOp3(OP_PERMT, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), prmt, loadImm(NULL, 0));
-+      break;
-+   }
-+   case nir_op_extract_u16: {
-+      DEFAULT_CHECKS;
-+      LValues &newDefs = convert(&insn->dest);
-+      Value *prmt = getSSA();
-+      mkOp3(OP_MAD, TYPE_U32, prmt, getSrc(&insn->src[1]), loadImm(NULL, 0x22), loadImm(NULL, 0x4410));
-+      mkOp3(OP_PERMT, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), prmt, loadImm(NULL, 0));
-+      break;
-+   }
-+   case nir_op_extract_i16: {
-+      DEFAULT_CHECKS;
-+      LValues &newDefs = convert(&insn->dest);
-+      Value *prmt = getSSA();
-+      mkOp3(OP_MAD, TYPE_U32, prmt, getSrc(&insn->src[1]), loadImm(NULL, 0x2222), loadImm(NULL, 0x9910));
-+      mkOp3(OP_PERMT, TYPE_U32, newDefs[0], getSrc(&insn->src[0]), prmt, loadImm(NULL, 0));
-+      break;
-+   }
-+   case nir_op_urol: {
-+      DEFAULT_CHECKS;
-+      LValues &newDefs = convert(&insn->dest);
-+      mkOp3(OP_SHF, TYPE_U32, newDefs[0], getSrc(&insn->src[0]),
-+            getSrc(&insn->src[1]), getSrc(&insn->src[0]))
-+         ->subOp = NV50_IR_SUBOP_SHF_L |
-+                   NV50_IR_SUBOP_SHF_W |
-+                   NV50_IR_SUBOP_SHF_HI;
-+      break;
-+   }
-+   case nir_op_uror: {
-+      DEFAULT_CHECKS;
-+      LValues &newDefs = convert(&insn->dest);
-+      mkOp3(OP_SHF, TYPE_U32, newDefs[0], getSrc(&insn->src[0]),
-+            getSrc(&insn->src[1]), getSrc(&insn->src[0]))
-+         ->subOp = NV50_IR_SUBOP_SHF_R |
-+                   NV50_IR_SUBOP_SHF_W |
-+                   NV50_IR_SUBOP_SHF_LO;
-+      break;
-+   }
-    // boolean conversions
-    case nir_op_b2f32: {
-       DEFAULT_CHECKS;
-@@ -3224,6 +3285,11 @@ Converter::run()
-    NIR_PASS_V(nir, nir_lower_alu_to_scalar, NULL, NULL);
-    NIR_PASS_V(nir, nir_lower_phis_to_scalar);
- 
-+   /*TODO: improve this lowering/optimisation loop so that we can use
-+    *      nir_opt_idiv_const effectively before this.
-+    */
-+   NIR_PASS(progress, nir, nir_lower_idiv, nir_lower_idiv_precise);
-+
-    do {
-       progress = false;
-       NIR_PASS(progress, nir, nir_copy_prop);
-@@ -3285,3 +3351,125 @@ Program::makeFromNIR(struct nv50_ir_prog_info *info)
- }
- 
- } // namespace nv50_ir
-+
-+static nir_shader_compiler_options
-+nvir_nir_shader_compiler_options(int chipset)
-+{
-+   return {
-+      .lower_fdiv = (chipset >= NVISA_GV100_CHIPSET),
-+      .lower_ffma = false,
-+      .fuse_ffma = false, /* nir doesn't track mad vs fma */
-+      .lower_flrp16 = (chipset >= NVISA_GV100_CHIPSET),
-+      .lower_flrp32 = true,
-+      .lower_flrp64 = true,
-+      .lower_fpow = false, // TODO: nir's lowering is broken, or we could use it
-+      .lower_fsat = false,
-+      .lower_fsqrt = false, // TODO: only before gm200
-+      .lower_sincos = false,
-+      .lower_fmod = true,
-+      .lower_bitfield_extract = false,
-+      .lower_bitfield_extract_to_shifts = (chipset >= NVISA_GV100_CHIPSET),
-+      .lower_bitfield_insert = false,
-+      .lower_bitfield_insert_to_shifts = (chipset >= NVISA_GV100_CHIPSET),
-+      .lower_bitfield_insert_to_bitfield_select = false,
-+      .lower_bitfield_reverse = false,
-+      .lower_bit_count = false,
-+      .lower_ifind_msb = false,
-+      .lower_find_lsb = false,
-+      .lower_uadd_carry = true, // TODO
-+      .lower_usub_borrow = true, // TODO
-+      .lower_mul_high = false,
-+      .lower_negate = false,
-+      .lower_sub = true,
-+      .lower_scmp = true, // TODO: not implemented yet
-+      .lower_vector_cmp = false,
-+      .lower_idiv = true,
-+      .lower_bitops = false,
-+      .lower_isign = (chipset >= NVISA_GV100_CHIPSET),
-+      .lower_fsign = (chipset >= NVISA_GV100_CHIPSET),
-+      .lower_fdph = false,
-+      .lower_fdot = false,
-+      .fdot_replicates = false, // TODO
-+      .lower_ffloor = false, // TODO
-+      .lower_ffract = true,
-+      .lower_fceil = false, // TODO
-+      .lower_ftrunc = false,
-+      .lower_ldexp = true,
-+      .lower_pack_half_2x16 = true,
-+      .lower_pack_unorm_2x16 = true,
-+      .lower_pack_snorm_2x16 = true,
-+      .lower_pack_unorm_4x8 = true,
-+      .lower_pack_snorm_4x8 = true,
-+      .lower_unpack_half_2x16 = true,
-+      .lower_unpack_unorm_2x16 = true,
-+      .lower_unpack_snorm_2x16 = true,
-+      .lower_unpack_unorm_4x8 = true,
-+      .lower_unpack_snorm_4x8 = true,
-+      .lower_pack_split = false,
-+      .lower_extract_byte = (chipset < NVISA_GM107_CHIPSET),
-+      .lower_extract_word = (chipset < NVISA_GM107_CHIPSET),
-+      .lower_all_io_to_temps = false,
-+      .lower_all_io_to_elements = false,
-+      .vertex_id_zero_based = false,
-+      .lower_base_vertex = false,
-+      .lower_helper_invocation = false,
-+      .optimize_sample_mask_in = false,
-+      .lower_cs_local_index_from_id = true,
-+      .lower_cs_local_id_from_index = false,
-+      .lower_device_index_to_zero = false, // TODO
-+      .lower_wpos_pntc = false, // TODO
-+      .lower_hadd = true, // TODO
-+      .lower_add_sat = true, // TODO
-+      .vectorize_io = false,
-+      .lower_to_scalar = true,
-+      .unify_interfaces = false,
-+      .use_interpolated_input_intrinsics = true,
-+      .lower_mul_2x32_64 = true, // TODO
-+      .lower_rotate = (chipset < NVISA_GV100_CHIPSET),
-+      .has_imul24 = false,
-+      .intel_vec4 = false,
-+      .max_unroll_iterations = 32,
-+      .lower_int64_options = (nir_lower_int64_options) (
-+            ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_imul64 : 0) |
-+            ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_isign64 : 0) |
-+            nir_lower_divmod64 |
-+            ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_imul_high64 : 0) |
-+            ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_mov64 : 0) |
-+            ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_icmp64 : 0) |
-+            ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_iabs64 : 0) |
-+            ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_ineg64 : 0) |
-+            ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_logic64 : 0) |
-+            ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_minmax64 : 0) |
-+            ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_shift64 : 0) |
-+            ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_imul_2x32_64 : 0) |
-+            ((chipset >= NVISA_GM107_CHIPSET) ? nir_lower_extract64 : 0) |
-+            nir_lower_ufind_msb64
-+      ),
-+      .lower_doubles_options = (nir_lower_doubles_options) (
-+            ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_drcp : 0) |
-+            ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_dsqrt : 0) |
-+            ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_drsq : 0) |
-+            ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_dfract : 0) |
-+            nir_lower_dmod |
-+            ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_dsub : 0) |
-+            ((chipset >= NVISA_GV100_CHIPSET) ? nir_lower_ddiv : 0)
-+      )
-+   };
-+}
-+
-+static const nir_shader_compiler_options gf100_nir_shader_compiler_options =
-+nvir_nir_shader_compiler_options(NVISA_GF100_CHIPSET);
-+static const nir_shader_compiler_options gm107_nir_shader_compiler_options =
-+nvir_nir_shader_compiler_options(NVISA_GM107_CHIPSET);
-+static const nir_shader_compiler_options gv100_nir_shader_compiler_options =
-+nvir_nir_shader_compiler_options(NVISA_GV100_CHIPSET);
-+
-+const nir_shader_compiler_options *
-+nv50_ir_nir_shader_compiler_options(int chipset)
-+{
-+   if (chipset >= NVISA_GV100_CHIPSET)
-+      return &gv100_nir_shader_compiler_options;
-+   if (chipset >= NVISA_GM107_CHIPSET)
-+      return &gm107_nir_shader_compiler_options;
-+   return &gf100_nir_shader_compiler_options;
-+}
-diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
-index 60f3d582a0b..3fd76f64de0 100644
---- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
-+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
-@@ -3401,8 +3401,7 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
-       // ReadInvocationARB(src, findLSB(ballot(true)))
-       val0 = getScratch();
-       mkOp1(OP_VOTE, TYPE_U32, val0, mkImm(1))->subOp = NV50_IR_SUBOP_VOTE_ANY;
--      mkOp2(OP_EXTBF, TYPE_U32, val0, val0, mkImm(0x2000))
--         ->subOp = NV50_IR_SUBOP_EXTBF_REV;
-+      mkOp1(OP_BREV, TYPE_U32, val0, val0);
-       mkOp1(OP_BFIND, TYPE_U32, val0, val0)->subOp = NV50_IR_SUBOP_BFIND_SAMT;
-       src1 = val0;
-       /* fallthrough */
-@@ -3820,8 +3819,7 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
-       FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
-          src0 = fetchSrc(0, c);
-          val0 = getScratch();
--         geni = mkOp2(OP_EXTBF, TYPE_U32, val0, src0, mkImm(0x2000));
--         geni->subOp = NV50_IR_SUBOP_EXTBF_REV;
-+         mkOp1(OP_BREV, TYPE_U32, val0, src0);
-          geni = mkOp1(OP_BFIND, TYPE_U32, dst0[c], val0);
-          geni->subOp = NV50_IR_SUBOP_BFIND_SAMT;
-       }
-@@ -3836,8 +3834,7 @@ Converter::handleInstruction(const struct tgsi_full_instruction *insn)
-    case TGSI_OPCODE_BREV:
-       FOR_EACH_DST_ENABLED_CHANNEL(0, c, tgsi) {
-          src0 = fetchSrc(0, c);
--         geni = mkOp2(OP_EXTBF, TYPE_U32, dst0[c], src0, mkImm(0x2000));
--         geni->subOp = NV50_IR_SUBOP_EXTBF_REV;
-+         mkOp1(OP_BREV, TYPE_U32, dst0[c], src0);
-       }
-       break;
-    case TGSI_OPCODE_POPC:
-diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp
-index 49a5f3b01f2..9fad1dcfe89 100644
---- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp
-+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp
-@@ -239,9 +239,8 @@ GM107LoweringPass::handlePFETCH(Instruction *i)
-    Value *tmp1 = bld.getScratch();
-    Value *tmp2 = bld.getScratch();
-    bld.mkOp1(OP_RDSV, TYPE_U32, tmp0, bld.mkSysVal(SV_INVOCATION_INFO, 0));
--   bld.mkOp2(OP_SHR , TYPE_U32, tmp1, tmp0, bld.mkImm(16));
--   bld.mkOp2(OP_AND , TYPE_U32, tmp0, tmp0, bld.mkImm(0xff));
--   bld.mkOp2(OP_AND , TYPE_U32, tmp1, tmp1, bld.mkImm(0xff));
-+   bld.mkOp3(OP_PERMT, TYPE_U32, tmp1, tmp0, bld.mkImm(0x4442), bld.mkImm(0));
-+   bld.mkOp3(OP_PERMT, TYPE_U32, tmp0, tmp0, bld.mkImm(0x4440), bld.mkImm(0));
-    if (i->getSrc(1))
-       bld.mkOp2(OP_ADD , TYPE_U32, tmp2, i->getSrc(0), i->getSrc(1));
-    else
-diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.h
-index 71e5ea6417a..dfa1d035dac 100644
---- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.h
-+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.h
-@@ -21,6 +21,7 @@ class GM107LegalizeSSA : public NVC0LegalizeSSA
- private:
-    virtual bool visit(Instruction *);
- 
-+protected:
-    void handlePFETCH(Instruction *);
-    void handleLOAD(Instruction *);
- };
-diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gv100.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gv100.cpp
-new file mode 100644
-index 00000000000..4b6df0db588
---- /dev/null
-+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gv100.cpp
-@@ -0,0 +1,477 @@
-+/*
-+ * Copyright 2020 Red Hat Inc.
-+ *
-+ * Permission is hereby granted, free of charge, to any person obtaining a
-+ * copy of this software and associated documentation files (the "Software"),
-+ * to deal in the Software without restriction, including without limitation
-+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
-+ * and/or sell copies of the Software, and to permit persons to whom the
-+ * Software is furnished to do so, subject to the following conditions:
-+ *
-+ * The above copyright notice and this permission notice shall be included in
-+ * all copies or substantial portions of the Software.
-+ *
-+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
-+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
-+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-+ * OTHER DEALINGS IN THE SOFTWARE.
-+ */
-+#include "codegen/nv50_ir.h"
-+#include "codegen/nv50_ir_build_util.h"
-+
-+#include "codegen/nv50_ir_target_nvc0.h"
-+#include "codegen/nv50_ir_lowering_gv100.h"
-+
-+#include <limits>
-+
-+namespace nv50_ir {
-+
-+bool
-+GV100LegalizeSSA::handleCMP(Instruction *i)
-+{
-+   Value *pred = bld.getSSA(1, FILE_PREDICATE);
-+
-+   bld.mkCmp(OP_SET, reverseCondCode(i->asCmp()->setCond), TYPE_U8, pred,
-+             i->sType, bld.mkImm(0), i->getSrc(2));
-+   bld.mkOp3(OP_SELP, TYPE_U32, i->getDef(0), i->getSrc(0), i->getSrc(1), pred);
-+   return true;
-+}
-+
-+// NIR deals with most of these for us, but codegen generates more in pointer
-+// calculations from other lowering passes.
-+bool
-+GV100LegalizeSSA::handleIADD64(Instruction *i)
-+{
-+   Value *carry = bld.getSSA(1, FILE_PREDICATE);
-+   Value *def[2] = { bld.getSSA(), bld.getSSA() };
-+   Value *src[2][2];
-+
-+   for (int s = 0; s < 2; s++) {
-+      if (i->getSrc(s)->reg.size == 8) {
-+         bld.mkSplit(src[s], 4, i->getSrc(s));
-+      } else {
-+         src[s][0] = i->getSrc(s);
-+         src[s][1] = bld.mkImm(0);
-+      }
-+   }
-+
-+   bld.mkOp2(OP_ADD, TYPE_U32, def[0], src[0][0], src[1][0])->
-+      setFlagsDef(1, carry);
-+   bld.mkOp2(OP_ADD, TYPE_U32, def[1], src[0][1], src[1][1])->
-+      setFlagsSrc(2, carry);
-+   bld.mkOp2(OP_MERGE, i->dType, i->getDef(0), def[0], def[1]);
-+   return true;
-+}
-+
-+bool
-+GV100LegalizeSSA::handleIMAD_HIGH(Instruction *i)
-+{
-+   Value *def = bld.getSSA(8), *defs[2];
-+   Value *src2;
-+
-+   if (i->srcExists(2) &&
-+       (!i->getSrc(2)->asImm() || i->getSrc(2)->asImm()->reg.data.u32)) {
-+      Value *src2s[2] = { bld.getSSA(), bld.getSSA() };
-+      bld.mkMov(src2s[0], bld.mkImm(0));
-+      bld.mkMov(src2s[1], i->getSrc(2));
-+      src2 = bld.mkOp2(OP_MERGE, TYPE_U64, bld.getSSA(8), src2s[0], src2s[1])->getDef(0);
-+   } else {
-+      src2 = bld.mkImm(0);
-+   }
-+
-+   bld.mkOp3(OP_MAD, isSignedType(i->sType) ? TYPE_S64 : TYPE_U64, def,
-+             i->getSrc(0), i->getSrc(1), src2);
-+
-+   bld.mkSplit(defs, 4, def);
-+   i->def(0).replace(defs[1], false);
-+   return true;
-+}
-+
-+// XXX: We should be able to do this in GV100LoweringPass, but codegen messes
-+//      up somehow and swaps the condcode without swapping the sources.
-+//      - tests/spec/glsl-1.50/execution/geometry/primitive-id-in.shader_test
-+bool
-+GV100LegalizeSSA::handleIMNMX(Instruction *i)
-+{
-+   Value *pred = bld.getSSA(1, FILE_PREDICATE);
-+
-+   bld.mkCmp(OP_SET, (i->op == OP_MIN) ? CC_LT : CC_GT, i->dType, pred,
-+             i->sType, i->getSrc(0), i->getSrc(1));
-+   bld.mkOp3(OP_SELP, i->dType, i->getDef(0), i->getSrc(0), i->getSrc(1), pred);
-+   return true;
-+}
-+
-+bool
-+GV100LegalizeSSA::handleIMUL(Instruction *i)
-+{
-+   if (i->subOp == NV50_IR_SUBOP_MUL_HIGH)
-+      return handleIMAD_HIGH(i);
-+
-+   bld.mkOp3(OP_MAD, i->dType, i->getDef(0), i->getSrc(0), i->getSrc(1),
-+             bld.mkImm(0));
-+   return true;
-+}
-+
-+bool
-+GV100LegalizeSSA::handleLOP2(Instruction *i)
-+{
-+   uint8_t src0 = NV50_IR_SUBOP_LOP3_LUT_SRC0;
-+   uint8_t src1 = NV50_IR_SUBOP_LOP3_LUT_SRC1;
-+   uint8_t subOp;
-+
-+   if (i->src(0).mod & Modifier(NV50_IR_MOD_NOT))
-+      src0 = ~src0;
-+   if (i->src(1).mod & Modifier(NV50_IR_MOD_NOT))
-+      src1 = ~src1;
-+
-+   switch (i->op) {
-+   case OP_AND: subOp = src0 & src1; break;
-+   case OP_OR : subOp = src0 | src1; break;
-+   case OP_XOR: subOp = src0 ^ src1; break;
-+   default:
-+      assert(!"invalid LOP2 opcode");
-+      break;
-+   }
-+
-+   bld.mkOp3(OP_LOP3_LUT, TYPE_U32, i->getDef(0), i->getSrc(0), i->getSrc(1),
-+             bld.mkImm(0))->subOp = subOp;
-+   return true;
-+}
-+
-+bool
-+GV100LegalizeSSA::handleNOT(Instruction *i)
-+{
-+   bld.mkOp3(OP_LOP3_LUT, TYPE_U32, i->getDef(0), bld.mkImm(0), i->getSrc(0),
-+             bld.mkImm(0))->subOp = (uint8_t)~NV50_IR_SUBOP_LOP3_LUT_SRC1;
-+   return true;
-+}
-+
-+bool
-+GV100LegalizeSSA::handlePREEX2(Instruction *i)
-+{
-+   i->def(0).replace(i->src(0), false);
-+   return true;
-+}
-+
-+bool
-+GV100LegalizeSSA::handleQUADON(Instruction *i)
-+{
-+   handleSHFL(i); // Inserts OP_WARPSYNC
-+   return true;
-+}
-+
-+bool
-+GV100LegalizeSSA::handleQUADPOP(Instruction *i)
-+{
-+   return true;
-+}
-+
-+bool
-+GV100LegalizeSSA::handleSET(Instruction *i)
-+{
-+   Value *src2 = i->srcExists(2) ? i->getSrc(2) : NULL;
-+   Value *pred = bld.getSSA(1, FILE_PREDICATE), *met;
-+   Instruction *xsetp;
-+
-+   if (isFloatType(i->dType)) {
-+      if (i->sType == TYPE_F32)
-+         return false; // HW has FSET.BF
-+      met = bld.mkImm(0x3f800000);
-+   } else {
-+      met = bld.mkImm(0xffffffff);
-+   }
-+
-+   xsetp = bld.mkCmp(i->op, i->asCmp()->setCond, TYPE_U8, pred, i->sType,
-+                     i->getSrc(0), i->getSrc(1));
-+   xsetp->src(0).mod = i->src(0).mod;
-+   xsetp->src(1).mod = i->src(1).mod;
-+   xsetp->setSrc(2, src2);
-+
-+   i = bld.mkOp3(OP_SELP, TYPE_U32, i->getDef(0), bld.mkImm(0), met, pred);
-+   i->src(2).mod = Modifier(NV50_IR_MOD_NOT);
-+   return true;
-+}
-+
-+bool
-+GV100LegalizeSSA::handleSHFL(Instruction *i)
-+{
-+   Instruction *sync = new_Instruction(func, OP_WARPSYNC, TYPE_NONE);
-+   sync->fixed = 1;
-+   sync->setSrc(0, bld.mkImm(0xffffffff));
-+   i->bb->insertBefore(i, sync);
-+   return false;
-+}
-+
-+bool
-+GV100LegalizeSSA::handleSHL(Instruction *i)
-+{
-+   if (i->src(0).getFile() != FILE_GPR) {
-+      bld.mkOp3(OP_SHF, i->dType, i->getDef(0), bld.mkImm(0), i->getSrc(1),
-+                i->getSrc(0))->subOp = NV50_IR_SUBOP_SHF_L |
-+                                       NV50_IR_SUBOP_SHF_HI;
-+   } else {
-+      bld.mkOp3(OP_SHF, i->dType, i->getDef(0), i->getSrc(0), i->getSrc(1),
-+                bld.mkImm(0))->subOp = NV50_IR_SUBOP_SHF_L;
-+   }
-+   return true;
-+}
-+
-+bool
-+GV100LegalizeSSA::handleSHR(Instruction *i)
-+{
-+   bld.mkOp3(OP_SHF, i->dType, i->getDef(0), bld.mkImm(0), i->getSrc(1),
-+             i->getSrc(0))->subOp = NV50_IR_SUBOP_SHF_R | NV50_IR_SUBOP_SHF_HI;
-+   return true;
-+}
-+
-+bool
-+GV100LegalizeSSA::handleSUB(Instruction *i)
-+{
-+   Instruction *xadd =
-+      bld.mkOp2(OP_ADD, i->dType, i->getDef(0), i->getSrc(0), i->getSrc(1));
-+   xadd->src(0).mod = i->src(0).mod;
-+   xadd->src(1).mod = i->src(1).mod ^ Modifier(NV50_IR_MOD_NEG);
-+   return true;
-+}
-+
-+bool
-+GV100LegalizeSSA::visit(Instruction *i)
-+{
-+   bool lowered = false;
-+
-+   bld.setPosition(i, false);
-+
-+   switch (i->op) {
-+   case OP_AND:
-+   case OP_OR:
-+   case OP_XOR:
-+      if (i->def(0).getFile() != FILE_PREDICATE)
-+         lowered = handleLOP2(i);
-+      break;
-+   case OP_NOT:
-+      lowered = handleNOT(i);
-+      break;
-+   case OP_SHL:
-+      lowered = handleSHL(i);
-+      break;
-+   case OP_SHR:
-+      lowered = handleSHR(i);
-+      break;
-+   case OP_SET:
-+   case OP_SET_AND:
-+   case OP_SET_OR:
-+   case OP_SET_XOR:
-+      if (i->def(0).getFile() != FILE_PREDICATE)
-+         lowered = handleSET(i);
-+      break;
-+   case OP_SLCT:
-+      lowered = handleCMP(i);
-+      break;
-+   case OP_PREEX2:
-+      lowered = handlePREEX2(i);
-+      break;
-+   case OP_MUL:
-+      if (!isFloatType(i->dType))
-+         lowered = handleIMUL(i);
-+      break;
-+   case OP_MAD:
-+      if (!isFloatType(i->dType) && i->subOp == NV50_IR_SUBOP_MUL_HIGH)
-+         lowered = handleIMAD_HIGH(i);
-+      break;
-+   case OP_SHFL:
-+      lowered = handleSHFL(i);
-+      break;
-+   case OP_QUADON:
-+      lowered = handleQUADON(i);
-+      break;
-+   case OP_QUADPOP:
-+      lowered = handleQUADPOP(i);
-+      break;
-+   case OP_SUB:
-+      lowered = handleSUB(i);
-+      break;
-+   case OP_MAX:
-+   case OP_MIN:
-+      if (!isFloatType(i->dType))
-+         lowered = handleIMNMX(i);
-+      break;
-+   case OP_ADD:
-+      if (!isFloatType(i->dType) && typeSizeof(i->dType) == 8)
-+         lowered = handleIADD64(i);
-+      break;
-+   case OP_PFETCH:
-+      handlePFETCH(i);
-+      break;
-+   case OP_LOAD:
-+      handleLOAD(i);
-+      break;
-+   default:
-+      break;
-+   }
-+
-+   if (lowered)
-+      delete_Instruction(prog, i);
-+
-+   return true;
-+}
-+
-+bool
-+GV100LoweringPass::handleDMNMX(Instruction *i)
-+{
-+   Value *pred = bld.getSSA(1, FILE_PREDICATE);
-+   Value *src0[2], *src1[2], *dest[2];
-+
-+   bld.mkCmp(OP_SET, (i->op == OP_MIN) ? CC_LT : CC_GT, TYPE_U32, pred,
-+             i->sType, i->getSrc(0), i->getSrc(1));
-+   bld.mkSplit(src0, 4, i->getSrc(0));
-+   bld.mkSplit(src1, 4, i->getSrc(1));
-+   bld.mkSplit(dest, 4, i->getDef(0));
-+   bld.mkOp3(OP_SELP, TYPE_U32, dest[0], src0[0], src1[0], pred);
-+   bld.mkOp3(OP_SELP, TYPE_U32, dest[1], src0[1], src1[1], pred);
-+   bld.mkOp2(OP_MERGE, TYPE_U64, i->getDef(0), dest[0], dest[1]);
-+   return true;
-+}
-+
-+bool
-+GV100LoweringPass::handleEXTBF(Instruction *i)
-+{
-+   Value *bit = bld.getScratch();
-+   Value *cnt = bld.getScratch();
-+   Value *mask = bld.getScratch();
-+   Value *zero = bld.mkImm(0);
-+
-+   bld.mkOp3(OP_PERMT, TYPE_U32, bit, i->getSrc(1), bld.mkImm(0x4440), zero);
-+   bld.mkOp3(OP_PERMT, TYPE_U32, cnt, i->getSrc(1), bld.mkImm(0x4441), zero);
-+   bld.mkOp2(OP_BMSK, TYPE_U32, mask, bit, cnt);
-+   bld.mkOp2(OP_AND, TYPE_U32, mask, i->getSrc(0), mask);
-+   bld.mkOp2(OP_SHR, TYPE_U32, i->getDef(0), mask, bit);
-+   if (isSignedType(i->dType))
-+      bld.mkOp2(OP_SGXT, TYPE_S32, i->getDef(0), i->getDef(0), cnt);
-+
-+   return true;
-+}
-+
-+bool
-+GV100LoweringPass::handleFLOW(Instruction *i)
-+{
-+   i->op = OP_BRA;
-+   return false;
-+}
-+
-+bool
-+GV100LoweringPass::handleI2I(Instruction *i)
-+{
-+   bld.mkCvt(OP_CVT, TYPE_F32, i->getDef(0), i->sType, i->getSrc(0))->
-+      subOp = i->subOp;
-+   bld.mkCvt(OP_CVT, i->dType, i->getDef(0), TYPE_F32, i->getDef(0));
-+   return true;
-+}
-+
-+bool
-+GV100LoweringPass::handleINSBF(Instruction *i)
-+{
-+   Value *bit = bld.getScratch();
-+   Value *cnt = bld.getScratch();
-+   Value *mask = bld.getScratch();
-+   Value *src0 = bld.getScratch();
-+   Value *zero = bld.mkImm(0);
-+
-+   bld.mkOp3(OP_PERMT, TYPE_U32, bit, i->getSrc(1), bld.mkImm(0x4440), zero);
-+   bld.mkOp3(OP_PERMT, TYPE_U32, cnt, i->getSrc(1), bld.mkImm(0x4441), zero);
-+   bld.mkOp2(OP_BMSK, TYPE_U32, mask, zero, cnt);
-+
-+   bld.mkOp2(OP_AND, TYPE_U32, src0, i->getSrc(0), mask);
-+   bld.mkOp2(OP_SHL, TYPE_U32, src0, src0, bit);
-+
-+   bld.mkOp2(OP_SHL, TYPE_U32, mask, mask, bit);
-+   bld.mkOp3(OP_LOP3_LUT, TYPE_U32, i->getDef(0), src0, i->getSrc(2), mask)->
-+      subOp = NV50_IR_SUBOP_LOP3_LUT(a | (b & ~c));
-+
-+   return true;
-+}
-+
-+bool
-+GV100LoweringPass::handlePINTERP(Instruction *i)
-+{
-+   Value *src2 = i->srcExists(2) ? i->getSrc(2) : NULL;
-+   Instruction *ipa, *mul;
-+
-+   ipa = bld.mkOp2(OP_LINTERP, TYPE_F32, i->getDef(0), i->getSrc(0), src2);
-+   ipa->ipa = i->ipa;
-+   mul = bld.mkOp2(OP_MUL, TYPE_F32, i->getDef(0), i->getDef(0), i->getSrc(1));
-+
-+   if (i->getInterpMode() == NV50_IR_INTERP_SC) {
-+      ipa->setDef(1, bld.getSSA(1, FILE_PREDICATE));
-+      mul->setPredicate(CC_NOT_P, ipa->getDef(1));
-+   }
-+
-+   return true;
-+}
-+
-+bool
-+GV100LoweringPass::handlePREFLOW(Instruction *i)
-+{
-+   return true;
-+}
-+
-+bool
-+GV100LoweringPass::handlePRESIN(Instruction *i)
-+{
-+   const float f = 1.0 / (2.0 * 3.14159265);
-+   bld.mkOp2(OP_MUL, i->dType, i->getDef(0), i->getSrc(0), bld.mkImm(f));
-+   return true;
-+}
-+
-+bool
-+GV100LoweringPass::visit(Instruction *i)
-+{
-+   bool lowered = false;
-+
-+   bld.setPosition(i, false);
-+
-+   switch (i->op) {
-+   case OP_BREAK:
-+   case OP_CONT:
-+      lowered = handleFLOW(i);
-+      break;
-+   case OP_PREBREAK:
-+   case OP_PRECONT:
-+      lowered = handlePREFLOW(i);
-+      break;
-+   case OP_CVT:
-+      if (i->src(0).getFile() != FILE_PREDICATE &&
-+          i->def(0).getFile() != FILE_PREDICATE &&
-+          !isFloatType(i->dType) && !isFloatType(i->sType))
-+         lowered = handleI2I(i);
-+      break;
-+   case OP_EXTBF:
-+      lowered = handleEXTBF(i);
-+      break;
-+   case OP_INSBF:
-+      lowered = handleINSBF(i);
-+      break;
-+   case OP_MAX:
-+   case OP_MIN:
-+      if (i->dType == TYPE_F64)
-+         lowered = handleDMNMX(i);
-+      break;
-+   case OP_PINTERP:
-+      lowered = handlePINTERP(i);
-+      break;
-+   case OP_PRESIN:
-+      lowered = handlePRESIN(i);
-+      break;
-+   default:
-+      break;
-+   }
-+
-+   if (lowered)
-+      delete_Instruction(prog, i);
-+
-+   return true;
-+}
-+
-+} // namespace nv50_ir
-diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gv100.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gv100.h
-new file mode 100644
-index 00000000000..92fdb938244
---- /dev/null
-+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gv100.h
-@@ -0,0 +1,79 @@
-+/*
-+ * Copyright 2020 Red Hat Inc.
-+ *
-+ * Permission is hereby granted, free of charge, to any person obtaining a
-+ * copy of this software and associated documentation files (the "Software"),
-+ * to deal in the Software without restriction, including without limitation
-+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
-+ * and/or sell copies of the Software, and to permit persons to whom the
-+ * Software is furnished to do so, subject to the following conditions:
-+ *
-+ * The above copyright notice and this permission notice shall be included in
-+ * all copies or substantial portions of the Software.
-+ *
-+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
-+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
-+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-+ * OTHER DEALINGS IN THE SOFTWARE.
-+ */
-+#ifndef __NV50_IR_LOWERING_GV100_H__
-+#define __NV50_IR_LOWERING_GV100_H__
-+#include "codegen/nv50_ir_lowering_gm107.h"
-+
-+namespace nv50_ir {
-+
-+class GV100LoweringPass : public Pass
-+{
-+public:
-+   GV100LoweringPass(Program *p) {
-+      bld.setProgram(p);
-+   }
-+
-+private:
-+   BuildUtil bld;
-+
-+   virtual bool visit(Instruction *);
-+
-+   bool handleDMNMX(Instruction *);
-+   bool handleEXTBF(Instruction *);
-+   bool handleFLOW(Instruction *);
-+   bool handleI2I(Instruction *);
-+   bool handleINSBF(Instruction *);
-+   bool handlePINTERP(Instruction *);
-+   bool handlePREFLOW(Instruction *);
-+   bool handlePRESIN(Instruction *);
-+};
-+
-+class GV100LegalizeSSA : public GM107LegalizeSSA
-+{
-+public:
-+   GV100LegalizeSSA(Program *p) {
-+      bld.setProgram(p);
-+   }
-+
-+private:
-+   virtual bool visit(Function *) { return true; }
-+   virtual bool visit(BasicBlock *) { return true; }
-+   virtual bool visit(Instruction *);
-+
-+   bool handleCMP(Instruction *);
-+   bool handleIADD64(Instruction *);
-+   bool handleIMAD_HIGH(Instruction *);
-+   bool handleIMNMX(Instruction *);
-+   bool handleIMUL(Instruction *);
-+   bool handleLOP2(Instruction *);
-+   bool handleNOT(Instruction *);
-+   bool handlePREEX2(Instruction *);
-+   bool handleQUADON(Instruction *);
-+   bool handleQUADPOP(Instruction *);
-+   bool handleSET(Instruction *);
-+   bool handleSHFL(Instruction *);
-+   bool handleSHL(Instruction *);
-+   bool handleSHR(Instruction *);
-+   bool handleSUB(Instruction *);
-+};
-+}
-+#endif
-diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
-index a60881000fe..f100445e9d0 100644
---- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
-+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
-@@ -310,6 +310,14 @@ NVC0LegalizeSSA::handleSET(CmpInstruction *cmp)
-    cmp->sType = hTy;
- }
- 
-+void
-+NVC0LegalizeSSA::handleBREV(Instruction *i)
-+{
-+   i->op = OP_EXTBF;
-+   i->subOp = NV50_IR_SUBOP_EXTBF_REV;
-+   i->setSrc(1, bld.mkImm(0x2000));
-+}
-+
- bool
- NVC0LegalizeSSA::visit(Function *fn)
- {
-@@ -354,6 +362,9 @@ NVC0LegalizeSSA::visit(BasicBlock *bb)
-          if (typeSizeof(i->sType) == 8 && i->sType != TYPE_F64)
-             handleSET(i->asCmp());
-          break;
-+      case OP_BREV:
-+         handleBREV(i);
-+         break;
-       default:
-          break;
-       }
-@@ -856,11 +867,11 @@ NVC0LegalizePostRA::visit(BasicBlock *bb)
-                next = hi;
-          }
- 
--         if (i->op == OP_SAT || i->op == OP_NEG || i->op == OP_ABS)
--            replaceCvt(i);
--
-          if (i->op != OP_MOV && i->op != OP_PFETCH)
-             replaceZero(i);
-+
-+         if (i->op == OP_SAT || i->op == OP_NEG || i->op == OP_ABS)
-+            replaceCvt(i);
-       }
-    }
-    if (!bb->getEntry())
-@@ -887,6 +898,8 @@ NVC0LoweringPass::visit(Function *fn)
-       gpEmitAddress = bld.loadImm(NULL, 0)->asLValue();
-       if (fn->cfgExit) {
-          bld.setPosition(BasicBlock::get(fn->cfgExit)->getExit(), false);
-+         if (prog->getTarget()->getChipset() >= NVISA_GV100_CHIPSET)
-+            bld.mkOp1(OP_FINAL, TYPE_NONE, NULL, gpEmitAddress)->fixed = 1;
-          bld.mkMovToReg(0, gpEmitAddress);
-       }
-    }
-diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
-index b4c405a9ea5..a4925013ee4 100644
---- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
-+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
-@@ -68,6 +68,7 @@ private:
-    void handleSET(CmpInstruction *);
-    void handleTEXLOD(TexInstruction *);
-    void handleShift(Instruction *);
-+   void handleBREV(Instruction *);
- 
- protected:
-    BuildUtil bld;
-diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
-index 2f46b0e886a..3a4ec3ca561 100644
---- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
-+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
-@@ -558,6 +558,19 @@ ConstantFolding::expr(Instruction *i,
-    memset(&res.data, 0, sizeof(res.data));
- 
-    switch (i->op) {
-+   case OP_SGXT: {
-+      int bits = b->data.u32;
-+      if (bits) {
-+         uint32_t data = a->data.u32 & (0xffffffff >> (32 - bits));
-+         if (bits < 32 && (data & (1 << (bits - 1))))
-+            data = data - (1 << bits);
-+         res.data.u32 = data;
-+      }
-+      break;
-+   }
-+   case OP_BMSK:
-+      res.data.u32 = ((1 << b->data.u32) - 1) << a->data.u32;
-+      break;
-    case OP_MAD:
-    case OP_FMA:
-    case OP_MUL:
-@@ -780,6 +793,23 @@ ConstantFolding::expr(Instruction *i,
-    memset(&res.data, 0, sizeof(res.data));
- 
-    switch (i->op) {
-+   case OP_LOP3_LUT:
-+      for (int n = 0; n < 32; n++) {
-+         uint8_t lut = ((a->data.u32 >> n) & 1) << 2 |
-+                       ((b->data.u32 >> n) & 1) << 1 |
-+                       ((c->data.u32 >> n) & 1);
-+         res.data.u32 |= !!(i->subOp & (1 << lut)) << n;
-+      }
-+      break;
-+   case OP_PERMT:
-+      if (!i->subOp) {
-+         uint64_t input = (uint64_t)c->data.u32 << 32 | a->data.u32;
-+         uint16_t permt = b->data.u32;
-+         for (int n = 0 ; n < 4; n++, permt >>= 4)
-+            res.data.u32 |= ((input >> ((permt & 0xf) * 8)) & 0xff) << n * 8;
-+      } else
-+         return;
-+      break;
-    case OP_INSBF: {
-       int offset = b->data.u32 & 0xff;
-       int width = (b->data.u32 >> 8) & 0xff;
-@@ -1526,6 +1556,12 @@ ConstantFolding::opnd(Instruction *i, ImmediateValue &imm0, int s)
-       i->subOp = 0;
-       break;
-    }
-+   case OP_BREV: {
-+      uint32_t res = util_bitreverse(imm0.reg.data.u32);
-+      i->setSrc(0, new_ImmediateValue(i->bb->getProgram(), res));
-+      i->op = OP_MOV;
-+      break;
-+   }
-    case OP_POPCNT: {
-       // Only deal with 1-arg POPCNT here
-       if (i->srcExists(1))
-diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
-index 5dcbf3c3e0c..ce0d2507dc1 100644
---- a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
-+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
-@@ -93,8 +93,10 @@ const char *operationStr[OP_LAST + 1] =
-    "and",
-    "or",
-    "xor",
-+   "lop3 lut",
-    "shl",
-    "shr",
-+   "shf",
-    "max",
-    "min",
-    "sat",
-@@ -142,6 +144,7 @@ const char *operationStr[OP_LAST + 1] =
-    "pinterp",
-    "emit",
-    "restart",
-+   "final",
-    "tex",
-    "texbias",
-    "texlod",
-@@ -177,7 +180,10 @@ const char *operationStr[OP_LAST + 1] =
-    "insbf",
-    "extbf",
-    "bfind",
-+   "brev",
-+   "bmsk",
-    "permt",
-+   "sgxt",
-    "atom",
-    "bar",
-    "vadd",
-@@ -193,6 +199,7 @@ const char *operationStr[OP_LAST + 1] =
-    "shfl",
-    "vote",
-    "bufq",
-+   "warpsync",
-    "(invalid)"
- };
- 
-diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
-index 6df2664da22..4e5b21d9176 100644
---- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
-+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
-@@ -988,6 +988,8 @@ GCRA::coalesce(ArrayList& insns)
-    case 0x110:
-    case 0x120:
-    case 0x130:
-+   case 0x140:
-+   case 0x160:
-       ret = doCoalesce(insns, JOIN_MASK_UNION);
-       break;
-    default:
-@@ -2297,13 +2299,25 @@ RegAlloc::InsertConstraintsPass::texConstraintGM107(TexInstruction *tex)
-    if (isTextureOp(tex->op))
-       textureMask(tex);
- 
--   if (isScalarTexGM107(tex)) {
--      handleScalarTexGM107(tex);
--      return;
--   }
-+   if (targ->getChipset() < NVISA_GV100_CHIPSET) {
-+      if (isScalarTexGM107(tex)) {
-+         handleScalarTexGM107(tex);
-+         return;
-+      }
- 
--   assert(!tex->tex.scalar);
--   condenseDefs(tex);
-+      assert(!tex->tex.scalar);
-+      condenseDefs(tex);
-+   } else {
-+      if (isTextureOp(tex->op)) {
-+         int defCount = tex->defCount(0xff);
-+         if (defCount > 3)
-+            condenseDefs(tex, 2, 3);
-+         if (defCount > 1)
-+            condenseDefs(tex, 0, 1);
-+      } else {
-+         condenseDefs(tex);
-+      }
-+   }
- 
-    if (isSurfaceOp(tex->op)) {
-       int s = tex->tex.target.getDim() +
-@@ -2485,6 +2499,8 @@ RegAlloc::InsertConstraintsPass::visit(BasicBlock *bb)
-          case 0x110:
-          case 0x120:
-          case 0x130:
-+         case 0x140:
-+         case 0x160:
-             texConstraintGM107(tex);
-             break;
-          default:
-diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_sched_gm107.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_sched_gm107.h
-new file mode 100644
-index 00000000000..54443ae2770
---- /dev/null
-+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_sched_gm107.h
-@@ -0,0 +1,156 @@
-+#ifndef __NV50_IR_SCHED_GM107_H__
-+#define __NV50_IR_SCHED_GM107_H__
-+namespace nv50_ir {
-+
-+class SchedDataCalculatorGM107 : public Pass
-+{
-+public:
-+   SchedDataCalculatorGM107(const TargetGM107 *targ) : targ(targ) {}
-+
-+private:
-+   struct RegScores
-+   {
-+      struct ScoreData {
-+         int r[256];
-+         int p[8];
-+         int c;
-+      } rd, wr;
-+      int base;
-+
-+      void rebase(const int base)
-+      {
-+         const int delta = this->base - base;
-+         if (!delta)
-+            return;
-+         this->base = 0;
-+
-+         for (int i = 0; i < 256; ++i) {
-+            rd.r[i] += delta;
-+            wr.r[i] += delta;
-+         }
-+         for (int i = 0; i < 8; ++i) {
-+            rd.p[i] += delta;
-+            wr.p[i] += delta;
-+         }
-+         rd.c += delta;
-+         wr.c += delta;
-+      }
-+      void wipe()
-+      {
-+         memset(&rd, 0, sizeof(rd));
-+         memset(&wr, 0, sizeof(wr));
-+      }
-+      int getLatest(const ScoreData& d) const
-+      {
-+         int max = 0;
-+         for (int i = 0; i < 256; ++i)
-+            if (d.r[i] > max)
-+               max = d.r[i];
-+         for (int i = 0; i < 8; ++i)
-+            if (d.p[i] > max)
-+               max = d.p[i];
-+         if (d.c > max)
-+            max = d.c;
-+         return max;
-+      }
-+      inline int getLatestRd() const
-+      {
-+         return getLatest(rd);
-+      }
-+      inline int getLatestWr() const
-+      {
-+         return getLatest(wr);
-+      }
-+      inline int getLatest() const
-+      {
-+         return MAX2(getLatestRd(), getLatestWr());
-+      }
-+      void setMax(const RegScores *that)
-+      {
-+         for (int i = 0; i < 256; ++i) {
-+            rd.r[i] = MAX2(rd.r[i], that->rd.r[i]);
-+            wr.r[i] = MAX2(wr.r[i], that->wr.r[i]);
-+         }
-+         for (int i = 0; i < 8; ++i) {
-+            rd.p[i] = MAX2(rd.p[i], that->rd.p[i]);
-+            wr.p[i] = MAX2(wr.p[i], that->wr.p[i]);
-+         }
-+         rd.c = MAX2(rd.c, that->rd.c);
-+         wr.c = MAX2(wr.c, that->wr.c);
-+      }
-+      void print(int cycle)
-+      {
-+         for (int i = 0; i < 256; ++i) {
-+            if (rd.r[i] > cycle)
-+               INFO("rd $r%i @ %i\n", i, rd.r[i]);
-+            if (wr.r[i] > cycle)
-+               INFO("wr $r%i @ %i\n", i, wr.r[i]);
-+         }
-+         for (int i = 0; i < 8; ++i) {
-+            if (rd.p[i] > cycle)
-+               INFO("rd $p%i @ %i\n", i, rd.p[i]);
-+            if (wr.p[i] > cycle)
-+               INFO("wr $p%i @ %i\n", i, wr.p[i]);
-+         }
-+         if (rd.c > cycle)
-+            INFO("rd $c @ %i\n", rd.c);
-+         if (wr.c > cycle)
-+            INFO("wr $c @ %i\n", wr.c);
-+      }
-+   };
-+
-+   RegScores *score; // for current BB
-+   std::vector<RegScores> scoreBoards;
-+
-+   const TargetGM107 *targ;
-+   bool visit(Function *);
-+   bool visit(BasicBlock *);
-+
-+   void commitInsn(const Instruction *, int);
-+   int calcDelay(const Instruction *, int) const;
-+   void setDelay(Instruction *, int, const Instruction *);
-+   void recordWr(const Value *, int, int);
-+   void checkRd(const Value *, int, int&) const;
-+
-+   inline void emitYield(Instruction *);
-+   inline void emitStall(Instruction *, uint8_t);
-+   inline void emitReuse(Instruction *, uint8_t);
-+   inline void emitWrDepBar(Instruction *, uint8_t);
-+   inline void emitRdDepBar(Instruction *, uint8_t);
-+   inline void emitWtDepBar(Instruction *, uint8_t);
-+
-+   inline int getStall(const Instruction *) const;
-+   inline int getWrDepBar(const Instruction *) const;
-+   inline int getRdDepBar(const Instruction *) const;
-+   inline int getWtDepBar(const Instruction *) const;
-+
-+   void setReuseFlag(Instruction *);
-+
-+   inline void printSchedInfo(int, const Instruction *) const;
-+
-+   struct LiveBarUse {
-+      LiveBarUse(Instruction *insn, Instruction *usei)
-+         : insn(insn), usei(usei) { }
-+      Instruction *insn;
-+      Instruction *usei;
-+   };
-+
-+   struct LiveBarDef {
-+      LiveBarDef(Instruction *insn, Instruction *defi)
-+         : insn(insn), defi(defi) { }
-+      Instruction *insn;
-+      Instruction *defi;
-+   };
-+
-+   bool insertBarriers(BasicBlock *);
-+
-+   bool doesInsnWriteTo(const Instruction *insn, const Value *val) const;
-+   Instruction *findFirstUse(const Instruction *) const;
-+   Instruction *findFirstDef(const Instruction *) const;
-+
-+   bool needRdDepBar(const Instruction *) const;
-+   bool needWrDepBar(const Instruction *) const;
-+};
-+
-+}; // namespace nv50_ir
-+#endif
-diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
-index 5c6d0570ae2..765375a47df 100644
---- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
-+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.cpp
-@@ -33,7 +33,7 @@ const uint8_t Target::operationSrcNr[] =
-    2, 2, 2, 2, 2, 3, 3, 3, // ADD, SUB, MUL, DIV, MOD, MAD, FMA, SAD
-    3, 3,                   // SHLADD, XMAD
-    1, 1, 1,                // ABS, NEG, NOT
--   2, 2, 2, 2, 2,          // AND, OR, XOR, SHL, SHR
-+   2, 2, 2, 3, 2, 2, 3,    // AND, OR, XOR, LOP3_LUT, SHL, SHR, SHF
-    2, 2, 1,                // MAX, MIN, SAT
-    1, 1, 1, 1,             // CEIL, FLOOR, TRUNC, CVT
-    3, 3, 3, 2, 3, 3,       // SET_AND,OR,XOR, SET, SELP, SLCT
-@@ -43,7 +43,7 @@ const uint8_t Target::operationSrcNr[] =
-    0, 0, 0,                // PRERET,CONT,BREAK
-    0, 0, 0, 0, 0, 0,       // BRKPT, JOINAT, JOIN, DISCARD, EXIT, MEMBAR
-    1, 1, 1, 2, 1, 2,       // VFETCH, PFETCH, AFETCH, EXPORT, LINTERP, PINTERP
--   1, 1,                   // EMIT, RESTART
-+   1, 1, 1,                // EMIT, RESTART, FINAL
-    1, 1, 1,                // TEX, TXB, TXL,
-    1, 1, 1, 1, 1, 1, 2,    // TXF, TXQ, TXD, TXG, TXLQ, TEXCSAA, TEXPREP
-    1, 1, 2, 2, 2, 2, 2,    // SULDB, SULDP, SUSTB, SUSTP, SUREDB, SUREDP, SULEA
-@@ -51,13 +51,15 @@ const uint8_t Target::operationSrcNr[] =
-    0,                      // TEXBAR
-    1, 1,                   // DFDX, DFDY
-    1, 2, 1, 2, 0, 0,       // RDSV, WRSV, PIXLD, QUADOP, QUADON, QUADPOP
--   2, 3, 2, 1, 3,          // POPCNT, INSBF, EXTBF, BFIND, PERMT
-+   2, 3, 2, 1, 1, 2, 3,    // POPCNT, INSBF, EXTBF, BFIND, BREV, BMSK, PERMT
-+   2,                      // SGXT
-    2, 2,                   // ATOM, BAR
-    2, 2, 2, 2, 3, 2,       // VADD, VAVG, VMIN, VMAX, VSAD, VSET,
-    2, 2, 2, 1,             // VSHR, VSHL, VSEL, CCTL
-    3,                      // SHFL
-    1,                      // VOTE
-    1,                      // BUFQ
-+   1,                      // WARPSYNC
-    0
- };
- 
-@@ -75,10 +77,10 @@ const OpClass Target::operationClass[] =
-    OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH,
-    OPCLASS_ARITH, OPCLASS_ARITH,
-    OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH, OPCLASS_ARITH,
--   // ABS, NEG; NOT, AND, OR, XOR; SHL, SHR
-+   // ABS, NEG; NOT, AND, OR, XOR, LOP3_LUT; SHL, SHR, SHF
-    OPCLASS_CONVERT, OPCLASS_CONVERT,
--   OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC,
--   OPCLASS_SHIFT, OPCLASS_SHIFT,
-+   OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC, OPCLASS_LOGIC,
-+   OPCLASS_SHIFT, OPCLASS_SHIFT, OPCLASS_SHIFT,
-    // MAX, MIN
-    OPCLASS_COMPARE, OPCLASS_COMPARE,
-    // SAT, CEIL, FLOOR, TRUNC; CVT
-@@ -103,8 +105,8 @@ const OpClass Target::operationClass[] =
-    OPCLASS_LOAD, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_STORE,
-    // LINTERP, PINTERP
-    OPCLASS_SFU, OPCLASS_SFU,
--   // EMIT, RESTART
--   OPCLASS_CONTROL, OPCLASS_CONTROL,
-+   // EMIT, RESTART, FINAL
-+   OPCLASS_CONTROL, OPCLASS_CONTROL, OPCLASS_CONTROL,
-    // TEX, TXB, TXL, TXF; TXQ, TXD, TXG, TXLQ; TEXCSAA, TEXPREP
-    OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE,
-    OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE, OPCLASS_TEXTURE,
-@@ -119,9 +121,9 @@ const OpClass Target::operationClass[] =
-    // DFDX, DFDY, RDSV, WRSV; PIXLD, QUADOP, QUADON, QUADPOP
-    OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_OTHER,
-    OPCLASS_OTHER, OPCLASS_OTHER, OPCLASS_CONTROL, OPCLASS_CONTROL,
--   // POPCNT, INSBF, EXTBF, BFIND; PERMT
-+   // POPCNT, INSBF, EXTBF, BFIND, BREV, BMSK; PERMT, SGXT
-+   OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD,
-    OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD, OPCLASS_BITFIELD,
--   OPCLASS_BITFIELD,
-    // ATOM, BAR
-    OPCLASS_ATOMIC, OPCLASS_CONTROL,
-    // VADD, VAVG, VMIN, VMAX
-@@ -136,10 +138,13 @@ const OpClass Target::operationClass[] =
-    OPCLASS_OTHER,
-    // BUFQ
-    OPCLASS_OTHER,
-+   // WARPSYNC
-+   OPCLASS_OTHER,
-    OPCLASS_PSEUDO // LAST
- };
- 
- 
-+extern Target *getTargetGV100(unsigned int chipset);
- extern Target *getTargetGM107(unsigned int chipset);
- extern Target *getTargetNVC0(unsigned int chipset);
- extern Target *getTargetNV50(unsigned int chipset);
-@@ -149,6 +154,9 @@ Target *Target::create(unsigned int chipset)
-    STATIC_ASSERT(ARRAY_SIZE(operationSrcNr) == OP_LAST + 1);
-    STATIC_ASSERT(ARRAY_SIZE(operationClass) == OP_LAST + 1);
-    switch (chipset & ~0xf) {
-+   case 0x160:
-+   case 0x140:
-+      return getTargetGV100(chipset);
-    case 0x110:
-    case 0x120:
-    case 0x130:
-diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.h
-index afeca14d7d1..0f7db116577 100644
---- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target.h
-+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target.h
-@@ -200,7 +200,7 @@ public:
-       uint8_t dstMods;
-       uint16_t srcFiles[3];
-       uint16_t dstFiles;
--      unsigned int minEncSize  : 4;
-+      unsigned int minEncSize  : 5;
-       unsigned int vector      : 1;
-       unsigned int predicate   : 1;
-       unsigned int commutative : 1;
-diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gv100.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gv100.cpp
-new file mode 100644
-index 00000000000..fd969e1ece5
---- /dev/null
-+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gv100.cpp
-@@ -0,0 +1,594 @@
-+/*
-+ * Copyright 2020 Red Hat Inc.
-+ *
-+ * Permission is hereby granted, free of charge, to any person obtaining a
-+ * copy of this software and associated documentation files (the "Software"),
-+ * to deal in the Software without restriction, including without limitation
-+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
-+ * and/or sell copies of the Software, and to permit persons to whom the
-+ * Software is furnished to do so, subject to the following conditions:
-+ *
-+ * The above copyright notice and this permission notice shall be included in
-+ * all copies or substantial portions of the Software.
-+ *
-+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
-+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
-+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-+ * OTHER DEALINGS IN THE SOFTWARE.
-+ */
-+#include "codegen/nv50_ir_target_gv100.h"
-+#include "codegen/nv50_ir_lowering_gv100.h"
-+#include "codegen/nv50_ir_emit_gv100.h"
-+
-+namespace nv50_ir {
-+
-+void
-+TargetGV100::initOpInfo()
-+{
-+   unsigned int i, j;
-+
-+   static const operation commutative[] =
-+   {
-+      OP_ADD, OP_MUL, OP_MAD, OP_FMA, OP_MAX, OP_MIN,
-+      OP_SET_AND, OP_SET_OR, OP_SET_XOR, OP_SET, OP_SELP, OP_SLCT
-+   };
-+
-+   static const operation noDest[] =
-+   {
-+      OP_EXIT
-+   };
-+
-+   static const operation noPred[] =
-+   {
-+   };
-+
-+   for (i = 0; i < DATA_FILE_COUNT; ++i)
-+      nativeFileMap[i] = (DataFile)i;
-+   nativeFileMap[FILE_ADDRESS] = FILE_GPR;
-+   nativeFileMap[FILE_FLAGS] = FILE_PREDICATE;
-+
-+   for (i = 0; i < OP_LAST; ++i) {
-+      opInfo[i].variants = NULL;
-+      opInfo[i].op = (operation)i;
-+      opInfo[i].srcTypes = 1 << (int)TYPE_F32;
-+      opInfo[i].dstTypes = 1 << (int)TYPE_F32;
-+      opInfo[i].immdBits = 0;
-+      opInfo[i].srcNr = operationSrcNr[i];
-+
-+      for (j = 0; j < opInfo[i].srcNr; ++j) {
-+         opInfo[i].srcMods[j] = 0;
-+         opInfo[i].srcFiles[j] = 1 << (int)FILE_GPR;
-+      }
-+      opInfo[i].dstMods = 0;
-+      opInfo[i].dstFiles = 1 << (int)FILE_GPR;
-+
-+      opInfo[i].hasDest = 1;
-+      opInfo[i].vector = (i >= OP_TEX && i <= OP_TEXCSAA);
-+      opInfo[i].commutative = false; /* set below */
-+      opInfo[i].pseudo = (i < OP_MOV);
-+      opInfo[i].predicate = !opInfo[i].pseudo;
-+      opInfo[i].flow = (i >= OP_BRA && i <= OP_JOIN);
-+      opInfo[i].minEncSize = 16;
-+   }
-+   for (i = 0; i < ARRAY_SIZE(commutative); ++i)
-+      opInfo[commutative[i]].commutative = true;
-+   for (i = 0; i < ARRAY_SIZE(noDest); ++i)
-+      opInfo[noDest[i]].hasDest = 0;
-+   for (i = 0; i < ARRAY_SIZE(noPred); ++i)
-+      opInfo[noPred[i]].predicate = 0;
-+}
-+
-+struct opInfo {
-+   struct {
-+      uint8_t files;
-+      uint8_t mods;
-+   } src[3];
-+};
-+
-+#define SRC_NONE 0
-+#define SRC_R    (1 << FILE_GPR)
-+#define SRC_I    (1 << FILE_MEMORY_CONST)
-+#define SRC_C    (1 << FILE_IMMEDIATE)
-+#define SRC_RC   (SRC_R |         SRC_C)
-+#define SRC_RI   (SRC_R | SRC_I        )
-+#define SRC_RIC  (SRC_R | SRC_I | SRC_C)
-+
-+#define MOD_NONE 0
-+#define MOD_NEG  NV50_IR_MOD_NEG
-+#define MOD_ABS  NV50_IR_MOD_ABS
-+#define MOD_NOT  NV50_IR_MOD_NOT
-+#define MOD_NA   (MOD_NEG | MOD_ABS)
-+
-+#define OPINFO(O,SA,MA,SB,MB,SC,MC)                                            \
-+static struct opInfo                                                           \
-+opInfo_##O = {                                                                 \
-+   .src = { { SRC_##SA, MOD_##MA },                                            \
-+            { SRC_##SB, MOD_##MB },                                            \
-+            { SRC_##SC, MOD_##MC }},                                           \
-+};
-+
-+
-+/* Handled by GV100LegalizeSSA. */
-+OPINFO(FABS     , RIC , NA  , NONE, NONE, NONE, NONE);
-+OPINFO(FCMP     , R   , NONE, RIC , NONE, RIC , NONE); //XXX: use FSEL for mods
-+OPINFO(FNEG     , RIC , NA  , NONE, NONE, NONE, NONE);
-+OPINFO(FSET     , R   , NA  , RIC , NA  , NONE, NONE);
-+OPINFO(ICMP     , R   , NONE, RIC , NONE, RIC , NONE);
-+OPINFO(IMUL     , R   , NONE, RIC , NONE, NONE, NONE);
-+OPINFO(INEG     , RIC , NEG , NONE, NONE, NONE, NONE);
-+OPINFO(ISET     , R   , NONE, RIC , NONE, NONE, NONE);
-+OPINFO(LOP2     , R   , NOT , RIC , NOT , NONE, NONE);
-+OPINFO(NOT      , RIC , NONE, NONE, NONE, NONE, NONE);
-+OPINFO(SAT      , RIC , NA  , NONE, NONE, NONE, NONE);
-+OPINFO(SHL      , RIC , NONE, RIC , NONE, NONE, NONE);
-+OPINFO(SHR      , RIC , NONE, RIC , NONE, NONE, NONE);
-+OPINFO(SUB      , R   , NONE, RIC , NEG , NONE, NONE);
-+OPINFO(IMNMX    , R   , NONE, RIC , NONE, NONE, NONE);
-+
-+/* Handled by CodeEmitterGV100. */
-+OPINFO(AL2P     , NONE, NONE, NONE, NONE, NONE, NONE);
-+OPINFO(ALD      , NONE, NONE, NONE, NONE, NONE, NONE);
-+OPINFO(AST      , NONE, NONE, NONE, NONE, NONE, NONE);
-+OPINFO(ATOM     , NONE, NONE, NONE, NONE, NONE, NONE);
-+OPINFO(ATOMS    , NONE, NONE, NONE, NONE, NONE, NONE);
-+OPINFO(BAR      , NONE, NONE, NONE, NONE, NONE, NONE);
-+OPINFO(BRA      , NONE, NONE, NONE, NONE, NONE, NONE);
-+OPINFO(BMSK     , R   , NONE, RIC , NONE, NONE, NONE);
-+OPINFO(BREV     , RIC , NONE, NONE, NONE, NONE, NONE);
-+OPINFO(CCTL     , NONE, NONE, NONE, NONE, NONE, NONE);
-+//OPINFO(CS2R     , NONE, NONE, NONE, NONE, NONE, NONE);
-+OPINFO(DADD     , R   , NA  , RIC , NA  , NONE, NONE);
-+OPINFO(DFMA     , R   , NA  , RIC , NA  , RIC , NA  );
-+OPINFO(DMUL     , R   , NA  , RIC , NA  , NONE, NONE);
-+OPINFO(DSETP    , R   , NA  , RIC , NA  , NONE, NONE);
-+OPINFO(EXIT     , NONE, NONE, NONE, NONE, NONE, NONE);
-+OPINFO(F2F      , RIC , NA  , NONE, NONE, NONE, NONE);
-+OPINFO(F2I      , RIC , NA  , NONE, NONE, NONE, NONE);
-+OPINFO(FADD     , R   , NA  , RIC , NA  , NONE, NONE);
-+OPINFO(FFMA     , R   , NA  , RIC , NA  , RIC , NA  );
-+OPINFO(FLO      , RIC , NOT , NONE, NONE, NONE, NONE);
-+OPINFO(FMNMX    , R   , NA  , RIC , NA  , NONE, NONE);
-+OPINFO(FMUL     , R   , NA  , RIC , NA  , NONE, NONE);
-+OPINFO(FRND     , RIC , NA  , NONE, NONE, NONE, NONE);
-+OPINFO(FSET_BF  , R   , NA  , RIC , NA  , NONE, NONE);
-+OPINFO(FSETP    , R   , NA  , RIC , NA  , NONE, NONE);
-+OPINFO(FSWZADD  , R   , NONE, R   , NONE, NONE, NONE);
-+OPINFO(I2F      , RIC , NONE, NONE, NONE, NONE, NONE);
-+OPINFO(IABS     , RIC , NONE, NONE, NONE, NONE, NONE);
-+OPINFO(IADD3    , R   , NEG , RIC , NEG , R   , NEG );
-+OPINFO(IMAD     , R   , NONE, RIC , NONE, RIC , NEG );
-+OPINFO(IMAD_WIDE, R   , NONE, RIC , NONE, RC  , NEG );
-+OPINFO(IPA      , NONE, NONE, NONE, NONE, NONE, NONE);
-+OPINFO(ISBERD   , NONE, NONE, NONE, NONE, NONE, NONE);
-+OPINFO(ISETP    , R   , NONE, RIC , NONE, NONE, NONE);
-+OPINFO(KILL     , NONE, NONE, NONE, NONE, NONE, NONE);
-+OPINFO(LD       , NONE, NONE, NONE, NONE, NONE, NONE);
-+OPINFO(LDC      , NONE, NONE, NONE, NONE, NONE, NONE);
-+OPINFO(LDL      , NONE, NONE, NONE, NONE, NONE, NONE);
-+OPINFO(LDS      , NONE, NONE, NONE, NONE, NONE, NONE);
-+OPINFO(LEA      , R   , NEG , I   , NONE, RIC , NEG );
-+OPINFO(LOP3_LUT , R   , NONE, RIC , NONE, R   , NONE);
-+OPINFO(MEMBAR   , NONE, NONE, NONE, NONE, NONE, NONE);
-+OPINFO(MOV      , RIC , NONE, NONE, NONE, NONE, NONE);
-+OPINFO(MUFU     , RIC , NA  , NONE, NONE, NONE, NONE);
-+OPINFO(NOP      , NONE, NONE, NONE, NONE, NONE, NONE);
-+OPINFO(OUT      , R   , NONE, RI  , NONE, NONE, NONE);
-+OPINFO(PIXLD    , NONE, NONE, NONE, NONE, NONE, NONE);
-+OPINFO(PLOP3_LUT, NONE, NONE, NONE, NONE, NONE, NONE);
-+OPINFO(POPC     , RIC , NOT , NONE, NONE, NONE, NONE);
-+OPINFO(PRMT     , R   , NONE, RIC , NONE, RIC , NONE);
-+OPINFO(RED      , NONE, NONE, NONE, NONE, NONE, NONE);
-+OPINFO(SGXT     , R   , NONE, RIC , NONE, NONE, NONE);
-+OPINFO(S2R      , NONE, NONE, NONE, NONE, NONE, NONE);
-+OPINFO(SEL      , R   , NONE, RIC , NONE, NONE, NONE);
-+OPINFO(SHF      , R   , NONE, RIC , NONE, RIC , NONE);
-+OPINFO(SHFL     , R   , NONE, R   , NONE, R   , NONE);
-+OPINFO(ST       , NONE, NONE, NONE, NONE, NONE, NONE);
-+OPINFO(STL      , NONE, NONE, NONE, NONE, NONE, NONE);
-+OPINFO(STS      , NONE, NONE, NONE, NONE, NONE, NONE);
-+OPINFO(SUATOM   , NONE, NONE, NONE, NONE, NONE, NONE);
-+OPINFO(SULD     , NONE, NONE, NONE, NONE, NONE, NONE);
-+OPINFO(SUST     , NONE, NONE, NONE, NONE, NONE, NONE);
-+OPINFO(TEX      , NONE, NONE, NONE, NONE, NONE, NONE);
-+OPINFO(TLD      , NONE, NONE, NONE, NONE, NONE, NONE);
-+OPINFO(TLD4     , NONE, NONE, NONE, NONE, NONE, NONE);
-+OPINFO(TMML     , NONE, NONE, NONE, NONE, NONE, NONE);
-+OPINFO(TXD      , NONE, NONE, NONE, NONE, NONE, NONE);
-+OPINFO(TXQ      , NONE, NONE, NONE, NONE, NONE, NONE);
-+OPINFO(VOTE     , NONE, NONE, NONE, NONE, NONE, NONE);
-+OPINFO(WARPSYNC , R   , NONE, NONE, NONE, NONE, NONE);
-+
-+static const struct opInfo *
-+getOpInfo(const Instruction *i)
-+{
-+   switch (i->op) {
-+   case OP_ABS:
-+      if (isFloatType(i->dType))
-+         return &opInfo_FABS;
-+      return &opInfo_IABS;
-+   case OP_ADD:
-+      if (isFloatType(i->dType)) {
-+         if (i->dType == TYPE_F32)
-+            return &opInfo_FADD;
-+         else
-+            return &opInfo_DADD;
-+      } else {
-+         return &opInfo_IADD3;
-+      }
-+      break;
-+   case OP_AFETCH: return &opInfo_AL2P;
-+   case OP_AND:
-+   case OP_OR:
-+   case OP_XOR:
-+      if (i->def(0).getFile() == FILE_PREDICATE)
-+         return &opInfo_PLOP3_LUT;
-+      return &opInfo_LOP2;
-+   case OP_ATOM:
-+      if (i->src(0).getFile() == FILE_MEMORY_SHARED)
-+         return &opInfo_ATOMS;
-+      else
-+         if (!i->defExists(0) && i->subOp < NV50_IR_SUBOP_ATOM_CAS)
-+            return &opInfo_RED;
-+         else
-+            return &opInfo_ATOM;
-+      break;
-+   case OP_BAR: return &opInfo_BAR;
-+   case OP_BFIND: return &opInfo_FLO;
-+   case OP_BMSK: return &opInfo_BMSK;
-+   case OP_BREV: return &opInfo_BREV;
-+   case OP_BRA:
-+   case OP_JOIN: return &opInfo_BRA; //XXX
-+   case OP_CCTL: return &opInfo_CCTL;
-+   case OP_CEIL:
-+   case OP_CVT:
-+   case OP_FLOOR:
-+   case OP_TRUNC:
-+      if (i->op == OP_CVT && (i->def(0).getFile() == FILE_PREDICATE ||
-+                                 i->src(0).getFile() == FILE_PREDICATE)) {
-+         return &opInfo_MOV;
-+      } else if (isFloatType(i->dType)) {
-+         if (isFloatType(i->sType)) {
-+            if (i->sType == i->dType)
-+               return &opInfo_FRND;
-+            else
-+               return &opInfo_F2F;
-+         } else {
-+            return &opInfo_I2F;
-+         }
-+      } else {
-+         if (isFloatType(i->sType))
-+            return &opInfo_F2I;
-+      }
-+      break;
-+   case OP_COS:
-+   case OP_EX2:
-+   case OP_LG2:
-+   case OP_RCP:
-+   case OP_RSQ:
-+   case OP_SIN:
-+   case OP_SQRT: return &opInfo_MUFU;
-+   case OP_DISCARD: return &opInfo_KILL;
-+   case OP_EMIT:
-+   case OP_FINAL:
-+   case OP_RESTART: return &opInfo_OUT;
-+   case OP_EXIT: return &opInfo_EXIT;
-+   case OP_EXPORT: return &opInfo_AST;
-+   case OP_FMA:
-+   case OP_MAD:
-+      if (isFloatType(i->dType)) {
-+         if (i->dType == TYPE_F32)
-+            return &opInfo_FFMA;
-+         else
-+            return &opInfo_DFMA;
-+      } else {
-+         if (typeSizeof(i->dType) != 8)
-+            return &opInfo_IMAD;
-+         else
-+            return &opInfo_IMAD_WIDE;
-+      }
-+      break;
-+   case OP_JOINAT: return &opInfo_NOP; //XXX
-+   case OP_LINTERP: return &opInfo_IPA;
-+   case OP_LOAD:
-+      switch (i->src(0).getFile()) {
-+      case FILE_MEMORY_CONST : return &opInfo_LDC;
-+      case FILE_MEMORY_LOCAL : return &opInfo_LDL;
-+      case FILE_MEMORY_SHARED: return &opInfo_LDS;
-+      case FILE_MEMORY_GLOBAL: return &opInfo_LD;
-+      default:
-+         break;
-+      }
-+      break;
-+   case OP_LOP3_LUT: return &opInfo_LOP3_LUT;
-+   case OP_MAX:
-+   case OP_MIN:
-+      if (isFloatType(i->dType)) {
-+         if (i->dType == TYPE_F32)
-+            return &opInfo_FMNMX;
-+      } else {
-+         return &opInfo_IMNMX;
-+      }
-+      break;
-+   case OP_MEMBAR: return &opInfo_MEMBAR;
-+   case OP_MOV: return &opInfo_MOV;
-+   case OP_MUL:
-+      if (isFloatType(i->dType)) {
-+         if (i->dType == TYPE_F32)
-+            return &opInfo_FMUL;
-+         else
-+            return &opInfo_DMUL;
-+      }
-+      return &opInfo_IMUL;
-+   case OP_NEG:
-+      if (isFloatType(i->dType))
-+         return &opInfo_FNEG;
-+      return &opInfo_INEG;
-+   case OP_NOT: return &opInfo_NOT;
-+   case OP_PERMT: return &opInfo_PRMT;
-+   case OP_PFETCH: return &opInfo_ISBERD;
-+   case OP_PIXLD: return &opInfo_PIXLD;
-+   case OP_POPCNT: return &opInfo_POPC;
-+   case OP_QUADOP: return &opInfo_FSWZADD;
-+   case OP_RDSV:
-+#if 0
-+      if (targ->isCS2RSV(i->getSrc(0)->reg.data.sv.sv))
-+         return &opInfo_CS2R;
-+#endif
-+      return &opInfo_S2R;
-+   case OP_SAT: return &opInfo_SAT;
-+   case OP_SELP: return &opInfo_SEL;
-+   case OP_SET:
-+   case OP_SET_AND:
-+   case OP_SET_OR:
-+   case OP_SET_XOR:
-+      if (i->def(0).getFile() != FILE_PREDICATE) {
-+         if (isFloatType(i->dType)) {
-+            if (i->dType == TYPE_F32)
-+               return &opInfo_FSET_BF;
-+         } else {
-+            if (isFloatType(i->sType))
-+                  return &opInfo_FSET;
-+            return &opInfo_ISET;
-+         }
-+      } else {
-+         if (isFloatType(i->sType))
-+            if (i->sType == TYPE_F64)
-+               return &opInfo_DSETP;
-+            else
-+               return &opInfo_FSETP;
-+         else
-+            return &opInfo_ISETP;
-+      }
-+      break;
-+   case OP_SGXT: return &opInfo_SGXT;
-+   case OP_SHF: return &opInfo_SHF;
-+   case OP_SHFL: return &opInfo_SHFL;
-+   case OP_SHL: return &opInfo_SHL;
-+   case OP_SHLADD: return &opInfo_LEA;
-+   case OP_SHR: return &opInfo_SHR;
-+   case OP_SLCT:
-+      if (isFloatType(i->sType))
-+         return &opInfo_FCMP;
-+      return &opInfo_ICMP;
-+   case OP_STORE:
-+      switch (i->src(0).getFile()) {
-+      case FILE_MEMORY_LOCAL : return &opInfo_STL;
-+      case FILE_MEMORY_SHARED: return &opInfo_STS;
-+      case FILE_MEMORY_GLOBAL: return &opInfo_ST;
-+      default:
-+         break;
-+      }
-+      break;
-+   case OP_SUB: return &opInfo_SUB;
-+   case OP_SULDB:
-+   case OP_SULDP: return &opInfo_SULD;
-+   case OP_SUREDB:
-+   case OP_SUREDP: return &opInfo_SUATOM;
-+   case OP_SUSTB:
-+   case OP_SUSTP: return &opInfo_SUST;
-+   case OP_TEX:
-+   case OP_TXB:
-+   case OP_TXL: return &opInfo_TEX;
-+   case OP_TXD: return &opInfo_TXD;
-+   case OP_TXF: return &opInfo_TLD;
-+   case OP_TXG: return &opInfo_TLD4;
-+   case OP_TXLQ: return &opInfo_TMML;
-+   case OP_TXQ: return &opInfo_TXQ;
-+   case OP_VFETCH: return &opInfo_ALD;
-+   case OP_VOTE: return &opInfo_VOTE;
-+   case OP_WARPSYNC: return &opInfo_WARPSYNC;
-+   default:
-+      break;
-+   }
-+   return NULL;
-+}
-+
-+bool
-+TargetGV100::isSatSupported(const Instruction *i) const
-+{
-+   switch (i->dType) {
-+   case TYPE_F32:
-+      switch (i->op) {
-+      case OP_ADD:
-+      case OP_FMA:
-+      case OP_MAD:
-+      case OP_MUL: return true;
-+      default:
-+         break;
-+      }
-+      break;
-+   default:
-+      break;
-+   }
-+   return false;
-+}
-+
-+bool
-+TargetGV100::isModSupported(const Instruction *i, int s, Modifier mod) const
-+{
-+   const struct opInfo *info = nv50_ir::getOpInfo(i);
-+   uint8_t mods = 0;
-+   if (info && s < (int)ARRAY_SIZE(info->src))
-+      mods = info->src[s].mods;
-+   return (mod & Modifier(mods)) == mod;
-+}
-+
-+bool
-+TargetGV100::isOpSupported(operation op, DataType ty) const
-+{
-+   if (op == OP_MAD || op == OP_FMA)
-+      return true;
-+   if (ty == TYPE_F32) {
-+      if (op == OP_MAX)
-+         return true;
-+   }
-+   if (op == OP_RSQ)
-+      return true;
-+   if (op == OP_SET ||
-+       op == OP_SET_AND ||
-+       op == OP_SET_OR ||
-+       op == OP_SET_XOR)
-+      return true;
-+   if (op == OP_SHLADD)
-+      return true;
-+   return false;
-+}
-+
-+bool
-+TargetGV100::isBarrierRequired(const Instruction *i) const
-+{
-+   switch (i->op) {
-+   case OP_BREV:
-+      return true;
-+   default:
-+      break;
-+   }
-+
-+   return TargetGM107::isBarrierRequired(i);
-+}
-+
-+bool
-+TargetGV100::insnCanLoad(const Instruction *i, int s,
-+                         const Instruction *ld) const
-+{
-+   const struct opInfo *info = nv50_ir::getOpInfo(i);
-+   uint16_t files = 0;
-+
-+   if (ld->src(0).getFile() == FILE_IMMEDIATE && ld->getSrc(0)->reg.data.u64 == 0)
-+      return (!i->isPseudo() &&
-+              !i->asTex() &&
-+              i->op != OP_EXPORT && i->op != OP_STORE);
-+
-+   if (ld->src(0).isIndirect(0))
-+      return false;
-+
-+   if (info && s < (int)ARRAY_SIZE(info->src)) {
-+      files = info->src[s].files;
-+      if ((s == 1 && i->srcExists(2) && i->src(2).getFile() != FILE_GPR) ||
-+          (s == 2 && i->srcExists(1) && i->src(1).getFile() != FILE_GPR)) {
-+         files &= ~(1 << FILE_MEMORY_CONST);
-+         files &= ~(1 << FILE_IMMEDIATE);
-+      } else
-+      if ((i->op == OP_SHL || i->op == OP_SHR) &&
-+          ((s == 0 && i->srcExists(1) && i->src(1).getFile() != FILE_GPR) ||
-+           (s == 1 && i->srcExists(0) && i->src(0).getFile() != FILE_GPR))) {
-+         files &= ~(1 << FILE_MEMORY_CONST);
-+         files &= ~(1 << FILE_IMMEDIATE);
-+      }
-+   }
-+
-+   if (ld->src(0).getFile() == FILE_IMMEDIATE) {
-+      if (i->sType == TYPE_F64) {
-+         if (ld->getSrc(0)->asImm()->reg.data.u64 & 0x00000000ffffffff)
-+            return false;
-+      }
-+   }
-+
-+   return (files & (1 << ld->src(0).getFile()));
-+}
-+
-+void
-+TargetGV100::getBuiltinCode(const uint32_t **code, uint32_t *size) const
-+{
-+   //XXX: find out why gv100 (tu1xx is fine) hangs without this
-+   static uint32_t builtin[] = {
-+      0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
-+      0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
-+      0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
-+      0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
-+      0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
-+      0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
-+      0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
-+      0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
-+      0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
-+      0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
-+      0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
-+      0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
-+      0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
-+      0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
-+      0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
-+      0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
-+      0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
-+      0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
-+      0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
-+      0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
-+      0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
-+      0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
-+      0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
-+      0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
-+      0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
-+      0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
-+      0x0000794d, 0x00000000, 0x03800000, 0x03ffde00,
-+   };
-+   *code = builtin;
-+   *size = sizeof(builtin);
-+}
-+
-+uint32_t
-+TargetGV100::getBuiltinOffset(int builtin) const
-+{
-+   return 0;
-+}
-+
-+bool
-+TargetGV100::runLegalizePass(Program *prog, CGStage stage) const
-+{
-+   if (stage == CG_STAGE_PRE_SSA) {
-+      GM107LoweringPass pass1(prog);
-+      GV100LoweringPass pass2(prog);
-+      pass1.run(prog, false, true);
-+      pass2.run(prog, false, true);
-+      return true;
-+   } else
-+   if (stage == CG_STAGE_SSA) {
-+      GV100LegalizeSSA pass(prog);
-+      return pass.run(prog, false, true);
-+   } else
-+   if (stage == CG_STAGE_POST_RA) {
-+      NVC0LegalizePostRA pass(prog);
-+      return pass.run(prog, false, true);
-+   }
-+   return false;
-+}
-+
-+CodeEmitter *
-+TargetGV100::getCodeEmitter(Program::Type type)
-+{
-+   return new CodeEmitterGV100(this);
-+}
-+
-+TargetGV100::TargetGV100(unsigned int chipset)
-+   : TargetGM107(chipset)
-+{
-+   initOpInfo();
-+};
-+
-+Target *getTargetGV100(unsigned int chipset)
-+{
-+   return new TargetGV100(chipset);
-+}
-+
-+};
-diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gv100.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gv100.h
-new file mode 100644
-index 00000000000..897e6a22d30
---- /dev/null
-+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_gv100.h
-@@ -0,0 +1,52 @@
-+/*
-+ * Copyright 2020 Red Hat Inc.
-+ *
-+ * Permission is hereby granted, free of charge, to any person obtaining a
-+ * copy of this software and associated documentation files (the "Software"),
-+ * to deal in the Software without restriction, including without limitation
-+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
-+ * and/or sell copies of the Software, and to permit persons to whom the
-+ * Software is furnished to do so, subject to the following conditions:
-+ *
-+ * The above copyright notice and this permission notice shall be included in
-+ * all copies or substantial portions of the Software.
-+ *
-+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
-+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
-+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-+ * OTHER DEALINGS IN THE SOFTWARE.
-+ */
-+#ifndef __NV50_IR_TARGET_GV100_H__
-+#define __NV50_IR_TARGET_GV100_H__
-+#include "codegen/nv50_ir_target_gm107.h"
-+
-+namespace nv50_ir {
-+
-+class TargetGV100 : public TargetGM107 {
-+public:
-+   TargetGV100(unsigned int chipset);
-+
-+   virtual CodeEmitter *getCodeEmitter(Program::Type);
-+
-+   virtual bool runLegalizePass(Program *, CGStage stage) const;
-+
-+   virtual void getBuiltinCode(const uint32_t **code, uint32_t *size) const;
-+   virtual uint32_t getBuiltinOffset(int builtin) const;
-+
-+   virtual bool insnCanLoad(const Instruction *, int, const Instruction *) const;
-+   virtual bool isOpSupported(operation, DataType) const;
-+   virtual bool isModSupported(const Instruction *, int s, Modifier) const;
-+   virtual bool isSatSupported(const Instruction *) const;
-+
-+   virtual bool isBarrierRequired(const Instruction *) const;
-+
-+private:
-+   void initOpInfo();
-+   void initProps(const struct opProperties *, int);
-+};
-+
-+};
-+#endif
-diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
-index 60134b445db..ed5b343ccba 100644
---- a/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
-+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_target_nvc0.cpp
-@@ -30,7 +30,7 @@ Target *getTargetNVC0(unsigned int chipset)
- }
- 
- TargetNVC0::TargetNVC0(unsigned int card) :
--   Target(card < 0x110, false, card >= 0xe4)
-+   Target(card < 0x110, false, card >= 0xe4 && card < 0x140)
- {
-    chipset = card;
-    initOpInfo();
-diff --git a/src/gallium/drivers/nouveau/meson.build b/src/gallium/drivers/nouveau/meson.build
-index 7a1d18a6394..68cfebdf20c 100644
---- a/src/gallium/drivers/nouveau/meson.build
-+++ b/src/gallium/drivers/nouveau/meson.build
-@@ -150,17 +150,31 @@ files_libnouveau = files(
-   'codegen/nv50_ir_util.cpp',
-   'codegen/nv50_ir_util.h',
-   'codegen/unordered_set.h',
-+  'codegen/nv50_ir_emit_gv100.cpp',
-+  'codegen/nv50_ir_emit_gv100.h',
-   'codegen/nv50_ir_emit_gk110.cpp',
-   'codegen/nv50_ir_emit_gm107.cpp',
-   'codegen/nv50_ir_emit_nvc0.cpp',
-+  'codegen/nv50_ir_lowering_gv100.cpp',
-+  'codegen/nv50_ir_lowering_gv100.h',
-   'codegen/nv50_ir_lowering_gm107.cpp',
-   'codegen/nv50_ir_lowering_gm107.h',
-   'codegen/nv50_ir_lowering_nvc0.cpp',
-   'codegen/nv50_ir_lowering_nvc0.h',
-+  'codegen/nv50_ir_target_gv100.cpp',
-+  'codegen/nv50_ir_target_gv100.h',
-   'codegen/nv50_ir_target_gm107.cpp',
-   'codegen/nv50_ir_target_gm107.h',
-   'codegen/nv50_ir_target_nvc0.cpp',
-   'codegen/nv50_ir_target_nvc0.h',
-+  'nvc0/cla0c0qmd.h',
-+  'nvc0/clc0c0qmd.h',
-+  'nvc0/clc3c0qmd.h',
-+  'nvc0/drf.h',
-+  'nvc0/qmd.h',
-+  'nvc0/qmda0c0.c',
-+  'nvc0/qmdc0c0.c',
-+  'nvc0/qmdc3c0.c',
-   'nvc0/gm107_texture.xml.h',
-   'nvc0/nvc0_3d.xml.h',
-   'nvc0/nvc0_compute.c',
-diff --git a/src/gallium/drivers/nouveau/nouveau_screen.c b/src/gallium/drivers/nouveau/nouveau_screen.c
-index de9cce3812a..8606ba43c1a 100644
---- a/src/gallium/drivers/nouveau/nouveau_screen.c
-+++ b/src/gallium/drivers/nouveau/nouveau_screen.c
-@@ -188,7 +188,11 @@ nouveau_screen_init(struct nouveau_screen *screen, struct nouveau_device *dev)
-    if (nv_dbg)
-       nouveau_mesa_debug = atoi(nv_dbg);
- 
--   screen->prefer_nir = debug_get_bool_option("NV50_PROG_USE_NIR", false);
-+   if (dev->chipset < 0x140)
-+      screen->prefer_nir = debug_get_bool_option("NV50_PROG_USE_NIR", false);
-+   else
-+      screen->prefer_nir = true;
-+
-    screen->force_enable_cl = debug_get_bool_option("NOUVEAU_ENABLE_CL", false);
-    if (screen->force_enable_cl)
-       glsl_type_singleton_init_or_ref();
-diff --git a/src/gallium/drivers/nouveau/nv50/nv50_2d.xml.h b/src/gallium/drivers/nouveau/nv50/nv50_2d.xml.h
-index 899d73d7398..31e7cf82233 100644
---- a/src/gallium/drivers/nouveau/nv50/nv50_2d.xml.h
-+++ b/src/gallium/drivers/nouveau/nv50/nv50_2d.xml.h
-@@ -218,9 +218,7 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #define NV50_2D_PATTERN_SELECT_BITMAP_1X64			0x00000002
- #define NV50_2D_PATTERN_SELECT_COLOR				0x00000003
- 
--#define NVC0_2D_UNK02B8(i0)				       (0x000002b8 + 0x4*(i0))
--#define NVC0_2D_UNK02B8__ESIZE					0x00000004
--#define NVC0_2D_UNK02B8__LEN					0x00000009
-+#define NVC0_2D_SET_DST_COLOR_RENDER_TO_ZETA_SURFACE  0x000002b8
- 
- #define NVC0_2D_UNK2DC						0x000002dc
- 
-diff --git a/src/gallium/drivers/nouveau/nv_object.xml.h b/src/gallium/drivers/nouveau/nv_object.xml.h
-index 664bfae9f64..fac195d4846 100644
---- a/src/gallium/drivers/nouveau/nv_object.xml.h
-+++ b/src/gallium/drivers/nouveau/nv_object.xml.h
-@@ -195,6 +195,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #define GM200_3D_CLASS						0x0000b197
- #define GP100_3D_CLASS						0x0000c097
- #define GP102_3D_CLASS						0x0000c197
-+#define GV100_3D_CLASS						0x0000c397
-+#define TU102_3D_CLASS						0x0000c597
- #define NV50_2D_CLASS						0x0000502d
- #define NVC0_2D_CLASS						0x0000902d
- #define NV50_COMPUTE_CLASS					0x000050c0
-@@ -207,6 +209,8 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #define GM200_COMPUTE_CLASS					0x0000b1c0
- #define GP100_COMPUTE_CLASS					0x0000c0c0
- #define GP104_COMPUTE_CLASS					0x0000c1c0
-+#define GV100_COMPUTE_CLASS					0x0000c3c0
-+#define TU102_COMPUTE_CLASS					0x0000c5c0
- #define NV84_CRYPT_CLASS					0x000074c1
- #define BLOB_NVC0_PCOPY1_CLASS					0x000090b8
- #define BLOB_NVC0_PCOPY0_CLASS					0x000090b5
-diff --git a/src/gallium/drivers/nouveau/nvc0/cla0c0qmd.h b/src/gallium/drivers/nouveau/nvc0/cla0c0qmd.h
-new file mode 100644
-index 00000000000..c0829f1cdc2
---- /dev/null
-+++ b/src/gallium/drivers/nouveau/nvc0/cla0c0qmd.h
-@@ -0,0 +1,660 @@
-+/*******************************************************************************
-+    Copyright (c) 2016 NVIDIA Corporation
-+
-+    Permission is hereby granted, free of charge, to any person obtaining a copy
-+    of this software and associated documentation files (the "Software"), to
-+    deal in the Software without restriction, including without limitation the
-+    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
-+    sell copies of the Software, and to permit persons to whom the Software is
-+    furnished to do so, subject to the following conditions:
-+
-+        The above copyright notice and this permission notice shall be
-+        included in all copies or substantial portions of the Software.
-+
-+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-+    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-+    DEALINGS IN THE SOFTWARE.
-+
-+*******************************************************************************/
-+
-+/* AUTO GENERATED FILE -- DO NOT EDIT */
-+
-+#ifndef __CLA0C0QMD_H__
-+#define __CLA0C0QMD_H__
-+
-+/*
-+** Queue Meta Data, Version 00_06
-+ */
-+
-+// The below C preprocessor definitions describe "multi-word" structures, where
-+// fields may have bit numbers beyond 32.  For example, MW(127:96) means
-+// the field is in bits 0-31 of word number 3 of the structure.  The "MW(X:Y)"
-+// syntax is to distinguish from similar "X:Y" single-word definitions: the
-+// macros historically used for single-word definitions would fail with
-+// multi-word definitions.
-+//
-+// See nvmisc.h:DRF_VAL_MW() in the source code of the kernel
-+// interface layer of nvidia.ko for an example of how to manipulate
-+// these MW(X:Y) definitions.
-+
-+#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_A                         MW(30:0)
-+#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_B                         MW(31:31)
-+#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_C                         MW(62:32)
-+#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_D                         MW(63:63)
-+#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_E                         MW(94:64)
-+#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_F                         MW(95:95)
-+#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_G                         MW(126:96)
-+#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_H                         MW(127:127)
-+#define NVA0C0_QMDV00_06_QMD_RESERVED_A_A                          MW(159:128)
-+#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_I                         MW(191:160)
-+#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_J                         MW(196:192)
-+#define NVA0C0_QMDV00_06_QMD_RESERVED_A                            MW(199:197)
-+#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_K                         MW(200:200)
-+#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_K_FALSE                   0x00000000
-+#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_K_TRUE                    0x00000001
-+#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_L                         MW(201:201)
-+#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_L_FALSE                   0x00000000
-+#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_L_TRUE                    0x00000001
-+#define NVA0C0_QMDV00_06_SEMAPHORE_RELEASE_ENABLE0                 MW(202:202)
-+#define NVA0C0_QMDV00_06_SEMAPHORE_RELEASE_ENABLE0_FALSE           0x00000000
-+#define NVA0C0_QMDV00_06_SEMAPHORE_RELEASE_ENABLE0_TRUE            0x00000001
-+#define NVA0C0_QMDV00_06_SEMAPHORE_RELEASE_ENABLE1                 MW(203:203)
-+#define NVA0C0_QMDV00_06_SEMAPHORE_RELEASE_ENABLE1_FALSE           0x00000000
-+#define NVA0C0_QMDV00_06_SEMAPHORE_RELEASE_ENABLE1_TRUE            0x00000001
-+#define NVA0C0_QMDV00_06_QMD_RESERVED_B                            MW(207:204)
-+#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_M                         MW(222:208)
-+#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_N                         MW(223:223)
-+#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_N_FALSE                   0x00000000
-+#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_N_TRUE                    0x00000001
-+#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_O                         MW(248:224)
-+#define NVA0C0_QMDV00_06_QMD_RESERVED_C                            MW(249:249)
-+#define NVA0C0_QMDV00_06_INVALIDATE_TEXTURE_HEADER_CACHE           MW(250:250)
-+#define NVA0C0_QMDV00_06_INVALIDATE_TEXTURE_HEADER_CACHE_FALSE     0x00000000
-+#define NVA0C0_QMDV00_06_INVALIDATE_TEXTURE_HEADER_CACHE_TRUE      0x00000001
-+#define NVA0C0_QMDV00_06_INVALIDATE_TEXTURE_SAMPLER_CACHE          MW(251:251)
-+#define NVA0C0_QMDV00_06_INVALIDATE_TEXTURE_SAMPLER_CACHE_FALSE    0x00000000
-+#define NVA0C0_QMDV00_06_INVALIDATE_TEXTURE_SAMPLER_CACHE_TRUE     0x00000001
-+#define NVA0C0_QMDV00_06_INVALIDATE_TEXTURE_DATA_CACHE             MW(252:252)
-+#define NVA0C0_QMDV00_06_INVALIDATE_TEXTURE_DATA_CACHE_FALSE       0x00000000
-+#define NVA0C0_QMDV00_06_INVALIDATE_TEXTURE_DATA_CACHE_TRUE        0x00000001
-+#define NVA0C0_QMDV00_06_INVALIDATE_SHADER_DATA_CACHE              MW(253:253)
-+#define NVA0C0_QMDV00_06_INVALIDATE_SHADER_DATA_CACHE_FALSE        0x00000000
-+#define NVA0C0_QMDV00_06_INVALIDATE_SHADER_DATA_CACHE_TRUE         0x00000001
-+#define NVA0C0_QMDV00_06_INVALIDATE_INSTRUCTION_CACHE              MW(254:254)
-+#define NVA0C0_QMDV00_06_INVALIDATE_INSTRUCTION_CACHE_FALSE        0x00000000
-+#define NVA0C0_QMDV00_06_INVALIDATE_INSTRUCTION_CACHE_TRUE         0x00000001
-+#define NVA0C0_QMDV00_06_INVALIDATE_SHADER_CONSTANT_CACHE          MW(255:255)
-+#define NVA0C0_QMDV00_06_INVALIDATE_SHADER_CONSTANT_CACHE_FALSE    0x00000000
-+#define NVA0C0_QMDV00_06_INVALIDATE_SHADER_CONSTANT_CACHE_TRUE     0x00000001
-+#define NVA0C0_QMDV00_06_PROGRAM_OFFSET                            MW(287:256)
-+#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_P                         MW(319:288)
-+#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_Q                         MW(327:320)
-+#define NVA0C0_QMDV00_06_QMD_RESERVED_D                            MW(335:328)
-+#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_R                         MW(351:336)
-+#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_S                         MW(357:352)
-+#define NVA0C0_QMDV00_06_QMD_RESERVED_E                            MW(365:358)
-+#define NVA0C0_QMDV00_06_RELEASE_MEMBAR_TYPE                       MW(366:366)
-+#define NVA0C0_QMDV00_06_RELEASE_MEMBAR_TYPE_FE_NONE               0x00000000
-+#define NVA0C0_QMDV00_06_RELEASE_MEMBAR_TYPE_FE_SYSMEMBAR          0x00000001
-+#define NVA0C0_QMDV00_06_CWD_MEMBAR_TYPE                           MW(369:368)
-+#define NVA0C0_QMDV00_06_CWD_MEMBAR_TYPE_L1_NONE                   0x00000000
-+#define NVA0C0_QMDV00_06_CWD_MEMBAR_TYPE_L1_SYSMEMBAR              0x00000001
-+#define NVA0C0_QMDV00_06_CWD_MEMBAR_TYPE_L1_MEMBAR                 0x00000003
-+#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_T                         MW(370:370)
-+#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_T_FALSE                   0x00000000
-+#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_T_TRUE                    0x00000001
-+#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_U                         MW(371:371)
-+#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_U_FALSE                   0x00000000
-+#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_U_TRUE                    0x00000001
-+#define NVA0C0_QMDV00_06_THROTTLED                                 MW(372:372)
-+#define NVA0C0_QMDV00_06_THROTTLED_FALSE                           0x00000000
-+#define NVA0C0_QMDV00_06_THROTTLED_TRUE                            0x00000001
-+#define NVA0C0_QMDV00_06_QMD_RESERVED_E2_A                         MW(376:376)
-+#define NVA0C0_QMDV00_06_QMD_RESERVED_E2_B                         MW(377:377)
-+#define NVA0C0_QMDV00_06_API_VISIBLE_CALL_LIMIT                    MW(378:378)
-+#define NVA0C0_QMDV00_06_API_VISIBLE_CALL_LIMIT__32                0x00000000
-+#define NVA0C0_QMDV00_06_API_VISIBLE_CALL_LIMIT_NO_CHECK           0x00000001
-+#define NVA0C0_QMDV00_06_SHARED_MEMORY_BANK_MAPPING                MW(379:379)
-+#define NVA0C0_QMDV00_06_SHARED_MEMORY_BANK_MAPPING_FOUR_BYTES_PER_BANK 0x00000000
-+#define NVA0C0_QMDV00_06_SHARED_MEMORY_BANK_MAPPING_EIGHT_BYTES_PER_BANK 0x00000001
-+#define NVA0C0_QMDV00_06_SAMPLER_INDEX                             MW(382:382)
-+#define NVA0C0_QMDV00_06_SAMPLER_INDEX_INDEPENDENTLY               0x00000000
-+#define NVA0C0_QMDV00_06_SAMPLER_INDEX_VIA_HEADER_INDEX            0x00000001
-+#define NVA0C0_QMDV00_06_QMD_RESERVED_E3_A                         MW(383:383)
-+#define NVA0C0_QMDV00_06_CTA_RASTER_WIDTH                          MW(415:384)
-+#define NVA0C0_QMDV00_06_CTA_RASTER_HEIGHT                         MW(431:416)
-+#define NVA0C0_QMDV00_06_CTA_RASTER_DEPTH                          MW(447:432)
-+#define NVA0C0_QMDV00_06_CTA_RASTER_WIDTH_RESUME                   MW(479:448)
-+#define NVA0C0_QMDV00_06_CTA_RASTER_HEIGHT_RESUME                  MW(495:480)
-+#define NVA0C0_QMDV00_06_CTA_RASTER_DEPTH_RESUME                   MW(511:496)
-+#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_V                         MW(535:512)
-+#define NVA0C0_QMDV00_06_QMD_RESERVED_F                            MW(542:536)
-+#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_W                         MW(543:543)
-+#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_W_FALSE                   0x00000000
-+#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_W_TRUE                    0x00000001
-+#define NVA0C0_QMDV00_06_SHARED_MEMORY_SIZE                        MW(561:544)
-+#define NVA0C0_QMDV00_06_QMD_RESERVED_G                            MW(575:562)
-+#define NVA0C0_QMDV00_06_QMD_VERSION                               MW(579:576)
-+#define NVA0C0_QMDV00_06_QMD_MAJOR_VERSION                         MW(583:580)
-+#define NVA0C0_QMDV00_06_QMD_RESERVED_H                            MW(591:584)
-+#define NVA0C0_QMDV00_06_CTA_THREAD_DIMENSION0                     MW(607:592)
-+#define NVA0C0_QMDV00_06_CTA_THREAD_DIMENSION1                     MW(623:608)
-+#define NVA0C0_QMDV00_06_CTA_THREAD_DIMENSION2                     MW(639:624)
-+#define NVA0C0_QMDV00_06_CONSTANT_BUFFER_VALID(i)                  MW((640+(i)*1):(640+(i)*1))
-+#define NVA0C0_QMDV00_06_CONSTANT_BUFFER_VALID_FALSE               0x00000000
-+#define NVA0C0_QMDV00_06_CONSTANT_BUFFER_VALID_TRUE                0x00000001
-+#define NVA0C0_QMDV00_06_QMD_RESERVED_I                            MW(668:648)
-+#define NVA0C0_QMDV00_06_L1_CONFIGURATION                          MW(671:669)
-+#define NVA0C0_QMDV00_06_L1_CONFIGURATION_DIRECTLY_ADDRESSABLE_MEMORY_SIZE_16KB 0x00000001
-+#define NVA0C0_QMDV00_06_L1_CONFIGURATION_DIRECTLY_ADDRESSABLE_MEMORY_SIZE_32KB 0x00000002
-+#define NVA0C0_QMDV00_06_L1_CONFIGURATION_DIRECTLY_ADDRESSABLE_MEMORY_SIZE_48KB 0x00000003
-+#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_X                         MW(703:672)
-+#define NVA0C0_QMDV00_06_QMD_RESERVED_V1_Y                         MW(735:704)
-+#define NVA0C0_QMDV00_06_RELEASE0_ADDRESS_LOWER                    MW(767:736)
-+#define NVA0C0_QMDV00_06_RELEASE0_ADDRESS_UPPER                    MW(775:768)
-+#define NVA0C0_QMDV00_06_QMD_RESERVED_J                            MW(783:776)
-+#define NVA0C0_QMDV00_06_RELEASE0_REDUCTION_OP                     MW(790:788)
-+#define NVA0C0_QMDV00_06_RELEASE0_REDUCTION_OP_RED_ADD             0x00000000
-+#define NVA0C0_QMDV00_06_RELEASE0_REDUCTION_OP_RED_MIN             0x00000001
-+#define NVA0C0_QMDV00_06_RELEASE0_REDUCTION_OP_RED_MAX             0x00000002
-+#define NVA0C0_QMDV00_06_RELEASE0_REDUCTION_OP_RED_INC             0x00000003
-+#define NVA0C0_QMDV00_06_RELEASE0_REDUCTION_OP_RED_DEC             0x00000004
-+#define NVA0C0_QMDV00_06_RELEASE0_REDUCTION_OP_RED_AND             0x00000005
-+#define NVA0C0_QMDV00_06_RELEASE0_REDUCTION_OP_RED_OR              0x00000006
-+#define NVA0C0_QMDV00_06_RELEASE0_REDUCTION_OP_RED_XOR             0x00000007
-+#define NVA0C0_QMDV00_06_QMD_RESERVED_K                            MW(791:791)
-+#define NVA0C0_QMDV00_06_RELEASE0_REDUCTION_FORMAT                 MW(793:792)
-+#define NVA0C0_QMDV00_06_RELEASE0_REDUCTION_FORMAT_UNSIGNED_32     0x00000000
-+#define NVA0C0_QMDV00_06_RELEASE0_REDUCTION_FORMAT_SIGNED_32       0x00000001
-+#define NVA0C0_QMDV00_06_RELEASE0_REDUCTION_ENABLE                 MW(794:794)
-+#define NVA0C0_QMDV00_06_RELEASE0_REDUCTION_ENABLE_FALSE           0x00000000
-+#define NVA0C0_QMDV00_06_RELEASE0_REDUCTION_ENABLE_TRUE            0x00000001
-+#define NVA0C0_QMDV00_06_RELEASE0_STRUCTURE_SIZE                   MW(799:799)
-+#define NVA0C0_QMDV00_06_RELEASE0_STRUCTURE_SIZE_FOUR_WORDS        0x00000000
-+#define NVA0C0_QMDV00_06_RELEASE0_STRUCTURE_SIZE_ONE_WORD          0x00000001
-+#define NVA0C0_QMDV00_06_RELEASE0_PAYLOAD                          MW(831:800)
-+#define NVA0C0_QMDV00_06_RELEASE1_ADDRESS_LOWER                    MW(863:832)
-+#define NVA0C0_QMDV00_06_RELEASE1_ADDRESS_UPPER                    MW(871:864)
-+#define NVA0C0_QMDV00_06_QMD_RESERVED_L                            MW(879:872)
-+#define NVA0C0_QMDV00_06_RELEASE1_REDUCTION_OP                     MW(886:884)
-+#define NVA0C0_QMDV00_06_RELEASE1_REDUCTION_OP_RED_ADD             0x00000000
-+#define NVA0C0_QMDV00_06_RELEASE1_REDUCTION_OP_RED_MIN             0x00000001
-+#define NVA0C0_QMDV00_06_RELEASE1_REDUCTION_OP_RED_MAX             0x00000002
-+#define NVA0C0_QMDV00_06_RELEASE1_REDUCTION_OP_RED_INC             0x00000003
-+#define NVA0C0_QMDV00_06_RELEASE1_REDUCTION_OP_RED_DEC             0x00000004
-+#define NVA0C0_QMDV00_06_RELEASE1_REDUCTION_OP_RED_AND             0x00000005
-+#define NVA0C0_QMDV00_06_RELEASE1_REDUCTION_OP_RED_OR              0x00000006
-+#define NVA0C0_QMDV00_06_RELEASE1_REDUCTION_OP_RED_XOR             0x00000007
-+#define NVA0C0_QMDV00_06_QMD_RESERVED_M                            MW(887:887)
-+#define NVA0C0_QMDV00_06_RELEASE1_REDUCTION_FORMAT                 MW(889:888)
-+#define NVA0C0_QMDV00_06_RELEASE1_REDUCTION_FORMAT_UNSIGNED_32     0x00000000
-+#define NVA0C0_QMDV00_06_RELEASE1_REDUCTION_FORMAT_SIGNED_32       0x00000001
-+#define NVA0C0_QMDV00_06_RELEASE1_REDUCTION_ENABLE                 MW(890:890)
-+#define NVA0C0_QMDV00_06_RELEASE1_REDUCTION_ENABLE_FALSE           0x00000000
-+#define NVA0C0_QMDV00_06_RELEASE1_REDUCTION_ENABLE_TRUE            0x00000001
-+#define NVA0C0_QMDV00_06_RELEASE1_STRUCTURE_SIZE                   MW(895:895)
-+#define NVA0C0_QMDV00_06_RELEASE1_STRUCTURE_SIZE_FOUR_WORDS        0x00000000
-+#define NVA0C0_QMDV00_06_RELEASE1_STRUCTURE_SIZE_ONE_WORD          0x00000001
-+#define NVA0C0_QMDV00_06_RELEASE1_PAYLOAD                          MW(927:896)
-+#define NVA0C0_QMDV00_06_CONSTANT_BUFFER_ADDR_LOWER(i)             MW((959+(i)*64):(928+(i)*64))
-+#define NVA0C0_QMDV00_06_CONSTANT_BUFFER_ADDR_UPPER(i)             MW((967+(i)*64):(960+(i)*64))
-+#define NVA0C0_QMDV00_06_CONSTANT_BUFFER_RESERVED_ADDR(i)          MW((973+(i)*64):(968+(i)*64))
-+#define NVA0C0_QMDV00_06_CONSTANT_BUFFER_INVALIDATE(i)             MW((974+(i)*64):(974+(i)*64))
-+#define NVA0C0_QMDV00_06_CONSTANT_BUFFER_INVALIDATE_FALSE          0x00000000
-+#define NVA0C0_QMDV00_06_CONSTANT_BUFFER_INVALIDATE_TRUE           0x00000001
-+#define NVA0C0_QMDV00_06_CONSTANT_BUFFER_SIZE(i)                   MW((991+(i)*64):(975+(i)*64))
-+#define NVA0C0_QMDV00_06_SHADER_LOCAL_MEMORY_LOW_SIZE              MW(1463:1440)
-+#define NVA0C0_QMDV00_06_QMD_RESERVED_N                            MW(1466:1464)
-+#define NVA0C0_QMDV00_06_BARRIER_COUNT                             MW(1471:1467)
-+#define NVA0C0_QMDV00_06_SHADER_LOCAL_MEMORY_HIGH_SIZE             MW(1495:1472)
-+#define NVA0C0_QMDV00_06_REGISTER_COUNT                            MW(1503:1496)
-+#define NVA0C0_QMDV00_06_SHADER_LOCAL_MEMORY_CRS_SIZE              MW(1527:1504)
-+#define NVA0C0_QMDV00_06_SASS_VERSION                              MW(1535:1528)
-+#define NVA0C0_QMDV00_06_QMD_SPARE_A                               MW(1567:1536)
-+#define NVA0C0_QMDV00_06_QMD_SPARE_B                               MW(1599:1568)
-+#define NVA0C0_QMDV00_06_QMD_SPARE_C                               MW(1631:1600)
-+#define NVA0C0_QMDV00_06_QMD_SPARE_D                               MW(1663:1632)
-+#define NVA0C0_QMDV00_06_QMD_SPARE_E                               MW(1695:1664)
-+#define NVA0C0_QMDV00_06_QMD_SPARE_F                               MW(1727:1696)
-+#define NVA0C0_QMDV00_06_QMD_SPARE_G                               MW(1759:1728)
-+#define NVA0C0_QMDV00_06_QMD_SPARE_H                               MW(1791:1760)
-+#define NVA0C0_QMDV00_06_QMD_SPARE_I                               MW(1823:1792)
-+#define NVA0C0_QMDV00_06_QMD_SPARE_J                               MW(1855:1824)
-+#define NVA0C0_QMDV00_06_QMD_SPARE_K                               MW(1887:1856)
-+#define NVA0C0_QMDV00_06_QMD_SPARE_L                               MW(1919:1888)
-+#define NVA0C0_QMDV00_06_QMD_SPARE_M                               MW(1951:1920)
-+#define NVA0C0_QMDV00_06_QMD_SPARE_N                               MW(1983:1952)
-+#define NVA0C0_QMDV00_06_DEBUG_ID_UPPER                            MW(2015:1984)
-+#define NVA0C0_QMDV00_06_DEBUG_ID_LOWER                            MW(2047:2016)
-+
-+
-+/*
-+** Queue Meta Data, Version 01_06
-+ */
-+
-+#define NVA0C0_QMDV01_06_OUTER_PUT                                 MW(30:0)
-+#define NVA0C0_QMDV01_06_OUTER_OVERFLOW                            MW(31:31)
-+#define NVA0C0_QMDV01_06_OUTER_GET                                 MW(62:32)
-+#define NVA0C0_QMDV01_06_OUTER_STICKY_OVERFLOW                     MW(63:63)
-+#define NVA0C0_QMDV01_06_INNER_GET                                 MW(94:64)
-+#define NVA0C0_QMDV01_06_INNER_OVERFLOW                            MW(95:95)
-+#define NVA0C0_QMDV01_06_INNER_PUT                                 MW(126:96)
-+#define NVA0C0_QMDV01_06_INNER_STICKY_OVERFLOW                     MW(127:127)
-+#define NVA0C0_QMDV01_06_QMD_RESERVED_A_A                          MW(159:128)
-+#define NVA0C0_QMDV01_06_SCHEDULER_NEXT_QMD_POINTER                MW(191:160)
-+#define NVA0C0_QMDV01_06_QMD_GROUP_ID                              MW(197:192)
-+#define NVA0C0_QMDV01_06_QMD_RESERVED_A                            MW(199:198)
-+#define NVA0C0_QMDV01_06_SCHEDULE_ON_PUT_UPDATE_ENABLE             MW(200:200)
-+#define NVA0C0_QMDV01_06_SCHEDULE_ON_PUT_UPDATE_ENABLE_FALSE       0x00000000
-+#define NVA0C0_QMDV01_06_SCHEDULE_ON_PUT_UPDATE_ENABLE_TRUE        0x00000001
-+#define NVA0C0_QMDV01_06_ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST      MW(201:201)
-+#define NVA0C0_QMDV01_06_ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST_FALSE 0x00000000
-+#define NVA0C0_QMDV01_06_ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST_TRUE 0x00000001
-+#define NVA0C0_QMDV01_06_SEMAPHORE_RELEASE_ENABLE0                 MW(202:202)
-+#define NVA0C0_QMDV01_06_SEMAPHORE_RELEASE_ENABLE0_FALSE           0x00000000
-+#define NVA0C0_QMDV01_06_SEMAPHORE_RELEASE_ENABLE0_TRUE            0x00000001
-+#define NVA0C0_QMDV01_06_SEMAPHORE_RELEASE_ENABLE1                 MW(203:203)
-+#define NVA0C0_QMDV01_06_SEMAPHORE_RELEASE_ENABLE1_FALSE           0x00000000
-+#define NVA0C0_QMDV01_06_SEMAPHORE_RELEASE_ENABLE1_TRUE            0x00000001
-+#define NVA0C0_QMDV01_06_REQUIRE_SCHEDULING_PCAS                   MW(204:204)
-+#define NVA0C0_QMDV01_06_REQUIRE_SCHEDULING_PCAS_FALSE             0x00000000
-+#define NVA0C0_QMDV01_06_REQUIRE_SCHEDULING_PCAS_TRUE              0x00000001
-+#define NVA0C0_QMDV01_06_QMD_RESERVED_B                            MW(207:205)
-+#define NVA0C0_QMDV01_06_SKED_PRIVATE_LIST_ADDR                    MW(222:208)
-+#define NVA0C0_QMDV01_06_SKED_PRIVATE_LIST_VALID                   MW(223:223)
-+#define NVA0C0_QMDV01_06_SKED_PRIVATE_LIST_VALID_FALSE             0x00000000
-+#define NVA0C0_QMDV01_06_SKED_PRIVATE_LIST_VALID_TRUE              0x00000001
-+#define NVA0C0_QMDV01_06_CIRCULAR_QUEUE_SIZE                       MW(248:224)
-+#define NVA0C0_QMDV01_06_QMD_RESERVED_C                            MW(249:249)
-+#define NVA0C0_QMDV01_06_INVALIDATE_TEXTURE_HEADER_CACHE           MW(250:250)
-+#define NVA0C0_QMDV01_06_INVALIDATE_TEXTURE_HEADER_CACHE_FALSE     0x00000000
-+#define NVA0C0_QMDV01_06_INVALIDATE_TEXTURE_HEADER_CACHE_TRUE      0x00000001
-+#define NVA0C0_QMDV01_06_INVALIDATE_TEXTURE_SAMPLER_CACHE          MW(251:251)
-+#define NVA0C0_QMDV01_06_INVALIDATE_TEXTURE_SAMPLER_CACHE_FALSE    0x00000000
-+#define NVA0C0_QMDV01_06_INVALIDATE_TEXTURE_SAMPLER_CACHE_TRUE     0x00000001
-+#define NVA0C0_QMDV01_06_INVALIDATE_TEXTURE_DATA_CACHE             MW(252:252)
-+#define NVA0C0_QMDV01_06_INVALIDATE_TEXTURE_DATA_CACHE_FALSE       0x00000000
-+#define NVA0C0_QMDV01_06_INVALIDATE_TEXTURE_DATA_CACHE_TRUE        0x00000001
-+#define NVA0C0_QMDV01_06_INVALIDATE_SHADER_DATA_CACHE              MW(253:253)
-+#define NVA0C0_QMDV01_06_INVALIDATE_SHADER_DATA_CACHE_FALSE        0x00000000
-+#define NVA0C0_QMDV01_06_INVALIDATE_SHADER_DATA_CACHE_TRUE         0x00000001
-+#define NVA0C0_QMDV01_06_INVALIDATE_INSTRUCTION_CACHE              MW(254:254)
-+#define NVA0C0_QMDV01_06_INVALIDATE_INSTRUCTION_CACHE_FALSE        0x00000000
-+#define NVA0C0_QMDV01_06_INVALIDATE_INSTRUCTION_CACHE_TRUE         0x00000001
-+#define NVA0C0_QMDV01_06_INVALIDATE_SHADER_CONSTANT_CACHE          MW(255:255)
-+#define NVA0C0_QMDV01_06_INVALIDATE_SHADER_CONSTANT_CACHE_FALSE    0x00000000
-+#define NVA0C0_QMDV01_06_INVALIDATE_SHADER_CONSTANT_CACHE_TRUE     0x00000001
-+#define NVA0C0_QMDV01_06_PROGRAM_OFFSET                            MW(287:256)
-+#define NVA0C0_QMDV01_06_CIRCULAR_QUEUE_ADDR_LOWER                 MW(319:288)
-+#define NVA0C0_QMDV01_06_CIRCULAR_QUEUE_ADDR_UPPER                 MW(327:320)
-+#define NVA0C0_QMDV01_06_QMD_RESERVED_D                            MW(335:328)
-+#define NVA0C0_QMDV01_06_CIRCULAR_QUEUE_ENTRY_SIZE                 MW(351:336)
-+#define NVA0C0_QMDV01_06_CWD_REFERENCE_COUNT_ID                    MW(357:352)
-+#define NVA0C0_QMDV01_06_CWD_REFERENCE_COUNT_DELTA_MINUS_ONE       MW(365:358)
-+#define NVA0C0_QMDV01_06_RELEASE_MEMBAR_TYPE                       MW(366:366)
-+#define NVA0C0_QMDV01_06_RELEASE_MEMBAR_TYPE_FE_NONE               0x00000000
-+#define NVA0C0_QMDV01_06_RELEASE_MEMBAR_TYPE_FE_SYSMEMBAR          0x00000001
-+#define NVA0C0_QMDV01_06_CWD_REFERENCE_COUNT_INCR_ENABLE           MW(367:367)
-+#define NVA0C0_QMDV01_06_CWD_REFERENCE_COUNT_INCR_ENABLE_FALSE     0x00000000
-+#define NVA0C0_QMDV01_06_CWD_REFERENCE_COUNT_INCR_ENABLE_TRUE      0x00000001
-+#define NVA0C0_QMDV01_06_CWD_MEMBAR_TYPE                           MW(369:368)
-+#define NVA0C0_QMDV01_06_CWD_MEMBAR_TYPE_L1_NONE                   0x00000000
-+#define NVA0C0_QMDV01_06_CWD_MEMBAR_TYPE_L1_SYSMEMBAR              0x00000001
-+#define NVA0C0_QMDV01_06_CWD_MEMBAR_TYPE_L1_MEMBAR                 0x00000003
-+#define NVA0C0_QMDV01_06_SEQUENTIALLY_RUN_CTAS                     MW(370:370)
-+#define NVA0C0_QMDV01_06_SEQUENTIALLY_RUN_CTAS_FALSE               0x00000000
-+#define NVA0C0_QMDV01_06_SEQUENTIALLY_RUN_CTAS_TRUE                0x00000001
-+#define NVA0C0_QMDV01_06_CWD_REFERENCE_COUNT_DECR_ENABLE           MW(371:371)
-+#define NVA0C0_QMDV01_06_CWD_REFERENCE_COUNT_DECR_ENABLE_FALSE     0x00000000
-+#define NVA0C0_QMDV01_06_CWD_REFERENCE_COUNT_DECR_ENABLE_TRUE      0x00000001
-+#define NVA0C0_QMDV01_06_THROTTLED                                 MW(372:372)
-+#define NVA0C0_QMDV01_06_THROTTLED_FALSE                           0x00000000
-+#define NVA0C0_QMDV01_06_THROTTLED_TRUE                            0x00000001
-+#define NVA0C0_QMDV01_06_FP32_NAN_BEHAVIOR                         MW(376:376)
-+#define NVA0C0_QMDV01_06_FP32_NAN_BEHAVIOR_LEGACY                  0x00000000
-+#define NVA0C0_QMDV01_06_FP32_NAN_BEHAVIOR_FP64_COMPATIBLE         0x00000001
-+#define NVA0C0_QMDV01_06_FP32_F2I_NAN_BEHAVIOR                     MW(377:377)
-+#define NVA0C0_QMDV01_06_FP32_F2I_NAN_BEHAVIOR_PASS_ZERO           0x00000000
-+#define NVA0C0_QMDV01_06_FP32_F2I_NAN_BEHAVIOR_PASS_INDEFINITE     0x00000001
-+#define NVA0C0_QMDV01_06_API_VISIBLE_CALL_LIMIT                    MW(378:378)
-+#define NVA0C0_QMDV01_06_API_VISIBLE_CALL_LIMIT__32                0x00000000
-+#define NVA0C0_QMDV01_06_API_VISIBLE_CALL_LIMIT_NO_CHECK           0x00000001
-+#define NVA0C0_QMDV01_06_SHARED_MEMORY_BANK_MAPPING                MW(379:379)
-+#define NVA0C0_QMDV01_06_SHARED_MEMORY_BANK_MAPPING_FOUR_BYTES_PER_BANK 0x00000000
-+#define NVA0C0_QMDV01_06_SHARED_MEMORY_BANK_MAPPING_EIGHT_BYTES_PER_BANK 0x00000001
-+#define NVA0C0_QMDV01_06_SAMPLER_INDEX                             MW(382:382)
-+#define NVA0C0_QMDV01_06_SAMPLER_INDEX_INDEPENDENTLY               0x00000000
-+#define NVA0C0_QMDV01_06_SAMPLER_INDEX_VIA_HEADER_INDEX            0x00000001
-+#define NVA0C0_QMDV01_06_FP32_NARROW_INSTRUCTION                   MW(383:383)
-+#define NVA0C0_QMDV01_06_FP32_NARROW_INSTRUCTION_KEEP_DENORMS      0x00000000
-+#define NVA0C0_QMDV01_06_FP32_NARROW_INSTRUCTION_FLUSH_DENORMS     0x00000001
-+#define NVA0C0_QMDV01_06_CTA_RASTER_WIDTH                          MW(415:384)
-+#define NVA0C0_QMDV01_06_CTA_RASTER_HEIGHT                         MW(431:416)
-+#define NVA0C0_QMDV01_06_CTA_RASTER_DEPTH                          MW(447:432)
-+#define NVA0C0_QMDV01_06_CTA_RASTER_WIDTH_RESUME                   MW(479:448)
-+#define NVA0C0_QMDV01_06_CTA_RASTER_HEIGHT_RESUME                  MW(495:480)
-+#define NVA0C0_QMDV01_06_CTA_RASTER_DEPTH_RESUME                   MW(511:496)
-+#define NVA0C0_QMDV01_06_LAUNCH_QUOTA                              MW(535:512)
-+#define NVA0C0_QMDV01_06_QMD_RESERVED_F                            MW(542:536)
-+#define NVA0C0_QMDV01_06_LAUNCH_QUOTA_ENABLE                       MW(543:543)
-+#define NVA0C0_QMDV01_06_LAUNCH_QUOTA_ENABLE_FALSE                 0x00000000
-+#define NVA0C0_QMDV01_06_LAUNCH_QUOTA_ENABLE_TRUE                  0x00000001
-+#define NVA0C0_QMDV01_06_SHARED_MEMORY_SIZE                        MW(561:544)
-+#define NVA0C0_QMDV01_06_QMD_RESERVED_G                            MW(575:562)
-+#define NVA0C0_QMDV01_06_QMD_VERSION                               MW(579:576)
-+#define NVA0C0_QMDV01_06_QMD_MAJOR_VERSION                         MW(583:580)
-+#define NVA0C0_QMDV01_06_QMD_RESERVED_H                            MW(591:584)
-+#define NVA0C0_QMDV01_06_CTA_THREAD_DIMENSION0                     MW(607:592)
-+#define NVA0C0_QMDV01_06_CTA_THREAD_DIMENSION1                     MW(623:608)
-+#define NVA0C0_QMDV01_06_CTA_THREAD_DIMENSION2                     MW(639:624)
-+#define NVA0C0_QMDV01_06_CONSTANT_BUFFER_VALID(i)                  MW((640+(i)*1):(640+(i)*1))
-+#define NVA0C0_QMDV01_06_CONSTANT_BUFFER_VALID_FALSE               0x00000000
-+#define NVA0C0_QMDV01_06_CONSTANT_BUFFER_VALID_TRUE                0x00000001
-+#define NVA0C0_QMDV01_06_QMD_RESERVED_I                            MW(668:648)
-+#define NVA0C0_QMDV01_06_L1_CONFIGURATION                          MW(671:669)
-+#define NVA0C0_QMDV01_06_L1_CONFIGURATION_DIRECTLY_ADDRESSABLE_MEMORY_SIZE_16KB 0x00000001
-+#define NVA0C0_QMDV01_06_L1_CONFIGURATION_DIRECTLY_ADDRESSABLE_MEMORY_SIZE_32KB 0x00000002
-+#define NVA0C0_QMDV01_06_L1_CONFIGURATION_DIRECTLY_ADDRESSABLE_MEMORY_SIZE_48KB 0x00000003
-+#define NVA0C0_QMDV01_06_SM_DISABLE_MASK_LOWER                     MW(703:672)
-+#define NVA0C0_QMDV01_06_SM_DISABLE_MASK_UPPER                     MW(735:704)
-+#define NVA0C0_QMDV01_06_RELEASE0_ADDRESS_LOWER                    MW(767:736)
-+#define NVA0C0_QMDV01_06_RELEASE0_ADDRESS_UPPER                    MW(775:768)
-+#define NVA0C0_QMDV01_06_QMD_RESERVED_J                            MW(783:776)
-+#define NVA0C0_QMDV01_06_RELEASE0_REDUCTION_OP                     MW(790:788)
-+#define NVA0C0_QMDV01_06_RELEASE0_REDUCTION_OP_RED_ADD             0x00000000
-+#define NVA0C0_QMDV01_06_RELEASE0_REDUCTION_OP_RED_MIN             0x00000001
-+#define NVA0C0_QMDV01_06_RELEASE0_REDUCTION_OP_RED_MAX             0x00000002
-+#define NVA0C0_QMDV01_06_RELEASE0_REDUCTION_OP_RED_INC             0x00000003
-+#define NVA0C0_QMDV01_06_RELEASE0_REDUCTION_OP_RED_DEC             0x00000004
-+#define NVA0C0_QMDV01_06_RELEASE0_REDUCTION_OP_RED_AND             0x00000005
-+#define NVA0C0_QMDV01_06_RELEASE0_REDUCTION_OP_RED_OR              0x00000006
-+#define NVA0C0_QMDV01_06_RELEASE0_REDUCTION_OP_RED_XOR             0x00000007
-+#define NVA0C0_QMDV01_06_QMD_RESERVED_K                            MW(791:791)
-+#define NVA0C0_QMDV01_06_RELEASE0_REDUCTION_FORMAT                 MW(793:792)
-+#define NVA0C0_QMDV01_06_RELEASE0_REDUCTION_FORMAT_UNSIGNED_32     0x00000000
-+#define NVA0C0_QMDV01_06_RELEASE0_REDUCTION_FORMAT_SIGNED_32       0x00000001
-+#define NVA0C0_QMDV01_06_RELEASE0_REDUCTION_ENABLE                 MW(794:794)
-+#define NVA0C0_QMDV01_06_RELEASE0_REDUCTION_ENABLE_FALSE           0x00000000
-+#define NVA0C0_QMDV01_06_RELEASE0_REDUCTION_ENABLE_TRUE            0x00000001
-+#define NVA0C0_QMDV01_06_RELEASE0_STRUCTURE_SIZE                   MW(799:799)
-+#define NVA0C0_QMDV01_06_RELEASE0_STRUCTURE_SIZE_FOUR_WORDS        0x00000000
-+#define NVA0C0_QMDV01_06_RELEASE0_STRUCTURE_SIZE_ONE_WORD          0x00000001
-+#define NVA0C0_QMDV01_06_RELEASE0_PAYLOAD                          MW(831:800)
-+#define NVA0C0_QMDV01_06_RELEASE1_ADDRESS_LOWER                    MW(863:832)
-+#define NVA0C0_QMDV01_06_RELEASE1_ADDRESS_UPPER                    MW(871:864)
-+#define NVA0C0_QMDV01_06_QMD_RESERVED_L                            MW(879:872)
-+#define NVA0C0_QMDV01_06_RELEASE1_REDUCTION_OP                     MW(886:884)
-+#define NVA0C0_QMDV01_06_RELEASE1_REDUCTION_OP_RED_ADD             0x00000000
-+#define NVA0C0_QMDV01_06_RELEASE1_REDUCTION_OP_RED_MIN             0x00000001
-+#define NVA0C0_QMDV01_06_RELEASE1_REDUCTION_OP_RED_MAX             0x00000002
-+#define NVA0C0_QMDV01_06_RELEASE1_REDUCTION_OP_RED_INC             0x00000003
-+#define NVA0C0_QMDV01_06_RELEASE1_REDUCTION_OP_RED_DEC             0x00000004
-+#define NVA0C0_QMDV01_06_RELEASE1_REDUCTION_OP_RED_AND             0x00000005
-+#define NVA0C0_QMDV01_06_RELEASE1_REDUCTION_OP_RED_OR              0x00000006
-+#define NVA0C0_QMDV01_06_RELEASE1_REDUCTION_OP_RED_XOR             0x00000007
-+#define NVA0C0_QMDV01_06_QMD_RESERVED_M                            MW(887:887)
-+#define NVA0C0_QMDV01_06_RELEASE1_REDUCTION_FORMAT                 MW(889:888)
-+#define NVA0C0_QMDV01_06_RELEASE1_REDUCTION_FORMAT_UNSIGNED_32     0x00000000
-+#define NVA0C0_QMDV01_06_RELEASE1_REDUCTION_FORMAT_SIGNED_32       0x00000001
-+#define NVA0C0_QMDV01_06_RELEASE1_REDUCTION_ENABLE                 MW(890:890)
-+#define NVA0C0_QMDV01_06_RELEASE1_REDUCTION_ENABLE_FALSE           0x00000000
-+#define NVA0C0_QMDV01_06_RELEASE1_REDUCTION_ENABLE_TRUE            0x00000001
-+#define NVA0C0_QMDV01_06_RELEASE1_STRUCTURE_SIZE                   MW(895:895)
-+#define NVA0C0_QMDV01_06_RELEASE1_STRUCTURE_SIZE_FOUR_WORDS        0x00000000
-+#define NVA0C0_QMDV01_06_RELEASE1_STRUCTURE_SIZE_ONE_WORD          0x00000001
-+#define NVA0C0_QMDV01_06_RELEASE1_PAYLOAD                          MW(927:896)
-+#define NVA0C0_QMDV01_06_CONSTANT_BUFFER_ADDR_LOWER(i)             MW((959+(i)*64):(928+(i)*64))
-+#define NVA0C0_QMDV01_06_CONSTANT_BUFFER_ADDR_UPPER(i)             MW((967+(i)*64):(960+(i)*64))
-+#define NVA0C0_QMDV01_06_CONSTANT_BUFFER_RESERVED_ADDR(i)          MW((973+(i)*64):(968+(i)*64))
-+#define NVA0C0_QMDV01_06_CONSTANT_BUFFER_INVALIDATE(i)             MW((974+(i)*64):(974+(i)*64))
-+#define NVA0C0_QMDV01_06_CONSTANT_BUFFER_INVALIDATE_FALSE          0x00000000
-+#define NVA0C0_QMDV01_06_CONSTANT_BUFFER_INVALIDATE_TRUE           0x00000001
-+#define NVA0C0_QMDV01_06_CONSTANT_BUFFER_SIZE(i)                   MW((991+(i)*64):(975+(i)*64))
-+#define NVA0C0_QMDV01_06_SHADER_LOCAL_MEMORY_LOW_SIZE              MW(1463:1440)
-+#define NVA0C0_QMDV01_06_QMD_RESERVED_N                            MW(1466:1464)
-+#define NVA0C0_QMDV01_06_BARRIER_COUNT                             MW(1471:1467)
-+#define NVA0C0_QMDV01_06_SHADER_LOCAL_MEMORY_HIGH_SIZE             MW(1495:1472)
-+#define NVA0C0_QMDV01_06_REGISTER_COUNT                            MW(1503:1496)
-+#define NVA0C0_QMDV01_06_SHADER_LOCAL_MEMORY_CRS_SIZE              MW(1527:1504)
-+#define NVA0C0_QMDV01_06_SASS_VERSION                              MW(1535:1528)
-+#define NVA0C0_QMDV01_06_HW_ONLY_INNER_GET                         MW(1566:1536)
-+#define NVA0C0_QMDV01_06_HW_ONLY_REQUIRE_SCHEDULING_PCAS           MW(1567:1567)
-+#define NVA0C0_QMDV01_06_HW_ONLY_INNER_PUT                         MW(1598:1568)
-+#define NVA0C0_QMDV01_06_HW_ONLY_SCHEDULE_ON_PUT_UPDATE_ENABLE     MW(1599:1599)
-+#define NVA0C0_QMDV01_06_QUEUE_ENTRIES_PER_CTA_MINUS_ONE           MW(1606:1600)
-+#define NVA0C0_QMDV01_06_QMD_RESERVED_Q                            MW(1609:1607)
-+#define NVA0C0_QMDV01_06_COALESCE_WAITING_PERIOD                   MW(1617:1610)
-+#define NVA0C0_QMDV01_06_QMD_RESERVED_R                            MW(1631:1618)
-+#define NVA0C0_QMDV01_06_QMD_SPARE_D                               MW(1663:1632)
-+#define NVA0C0_QMDV01_06_QMD_SPARE_E                               MW(1695:1664)
-+#define NVA0C0_QMDV01_06_QMD_SPARE_F                               MW(1727:1696)
-+#define NVA0C0_QMDV01_06_QMD_SPARE_G                               MW(1759:1728)
-+#define NVA0C0_QMDV01_06_QMD_SPARE_H                               MW(1791:1760)
-+#define NVA0C0_QMDV01_06_QMD_SPARE_I                               MW(1823:1792)
-+#define NVA0C0_QMDV01_06_QMD_SPARE_J                               MW(1855:1824)
-+#define NVA0C0_QMDV01_06_QMD_SPARE_K                               MW(1887:1856)
-+#define NVA0C0_QMDV01_06_QMD_SPARE_L                               MW(1919:1888)
-+#define NVA0C0_QMDV01_06_QMD_SPARE_M                               MW(1951:1920)
-+#define NVA0C0_QMDV01_06_QMD_SPARE_N                               MW(1983:1952)
-+#define NVA0C0_QMDV01_06_DEBUG_ID_UPPER                            MW(2015:1984)
-+#define NVA0C0_QMDV01_06_DEBUG_ID_LOWER                            MW(2047:2016)
-+
-+
-+/*
-+** Queue Meta Data, Version 01_07
-+ */
-+
-+#define NVA0C0_QMDV01_07_OUTER_PUT                                 MW(30:0)
-+#define NVA0C0_QMDV01_07_OUTER_OVERFLOW                            MW(31:31)
-+#define NVA0C0_QMDV01_07_OUTER_GET                                 MW(62:32)
-+#define NVA0C0_QMDV01_07_OUTER_STICKY_OVERFLOW                     MW(63:63)
-+#define NVA0C0_QMDV01_07_INNER_GET                                 MW(94:64)
-+#define NVA0C0_QMDV01_07_INNER_OVERFLOW                            MW(95:95)
-+#define NVA0C0_QMDV01_07_INNER_PUT                                 MW(126:96)
-+#define NVA0C0_QMDV01_07_INNER_STICKY_OVERFLOW                     MW(127:127)
-+#define NVA0C0_QMDV01_07_QMD_RESERVED_A_A                          MW(159:128)
-+#define NVA0C0_QMDV01_07_DEPENDENT_QMD_POINTER                     MW(191:160)
-+#define NVA0C0_QMDV01_07_QMD_GROUP_ID                              MW(197:192)
-+#define NVA0C0_QMDV01_07_QMD_RESERVED_A                            MW(200:198)
-+#define NVA0C0_QMDV01_07_ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST      MW(201:201)
-+#define NVA0C0_QMDV01_07_ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST_FALSE 0x00000000
-+#define NVA0C0_QMDV01_07_ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST_TRUE 0x00000001
-+#define NVA0C0_QMDV01_07_SEMAPHORE_RELEASE_ENABLE0                 MW(202:202)
-+#define NVA0C0_QMDV01_07_SEMAPHORE_RELEASE_ENABLE0_FALSE           0x00000000
-+#define NVA0C0_QMDV01_07_SEMAPHORE_RELEASE_ENABLE0_TRUE            0x00000001
-+#define NVA0C0_QMDV01_07_SEMAPHORE_RELEASE_ENABLE1                 MW(203:203)
-+#define NVA0C0_QMDV01_07_SEMAPHORE_RELEASE_ENABLE1_FALSE           0x00000000
-+#define NVA0C0_QMDV01_07_SEMAPHORE_RELEASE_ENABLE1_TRUE            0x00000001
-+#define NVA0C0_QMDV01_07_REQUIRE_SCHEDULING_PCAS                   MW(204:204)
-+#define NVA0C0_QMDV01_07_REQUIRE_SCHEDULING_PCAS_FALSE             0x00000000
-+#define NVA0C0_QMDV01_07_REQUIRE_SCHEDULING_PCAS_TRUE              0x00000001
-+#define NVA0C0_QMDV01_07_DEPENDENT_QMD_SCHEDULE_ENABLE             MW(205:205)
-+#define NVA0C0_QMDV01_07_DEPENDENT_QMD_SCHEDULE_ENABLE_FALSE       0x00000000
-+#define NVA0C0_QMDV01_07_DEPENDENT_QMD_SCHEDULE_ENABLE_TRUE        0x00000001
-+#define NVA0C0_QMDV01_07_DEPENDENT_QMD_TYPE                        MW(206:206)
-+#define NVA0C0_QMDV01_07_DEPENDENT_QMD_TYPE_QUEUE                  0x00000000
-+#define NVA0C0_QMDV01_07_DEPENDENT_QMD_TYPE_GRID                   0x00000001
-+#define NVA0C0_QMDV01_07_DEPENDENT_QMD_FIELD_COPY                  MW(207:207)
-+#define NVA0C0_QMDV01_07_DEPENDENT_QMD_FIELD_COPY_FALSE            0x00000000
-+#define NVA0C0_QMDV01_07_DEPENDENT_QMD_FIELD_COPY_TRUE             0x00000001
-+#define NVA0C0_QMDV01_07_QMD_RESERVED_B                            MW(223:208)
-+#define NVA0C0_QMDV01_07_CIRCULAR_QUEUE_SIZE                       MW(248:224)
-+#define NVA0C0_QMDV01_07_QMD_RESERVED_C                            MW(249:249)
-+#define NVA0C0_QMDV01_07_INVALIDATE_TEXTURE_HEADER_CACHE           MW(250:250)
-+#define NVA0C0_QMDV01_07_INVALIDATE_TEXTURE_HEADER_CACHE_FALSE     0x00000000
-+#define NVA0C0_QMDV01_07_INVALIDATE_TEXTURE_HEADER_CACHE_TRUE      0x00000001
-+#define NVA0C0_QMDV01_07_INVALIDATE_TEXTURE_SAMPLER_CACHE          MW(251:251)
-+#define NVA0C0_QMDV01_07_INVALIDATE_TEXTURE_SAMPLER_CACHE_FALSE    0x00000000
-+#define NVA0C0_QMDV01_07_INVALIDATE_TEXTURE_SAMPLER_CACHE_TRUE     0x00000001
-+#define NVA0C0_QMDV01_07_INVALIDATE_TEXTURE_DATA_CACHE             MW(252:252)
-+#define NVA0C0_QMDV01_07_INVALIDATE_TEXTURE_DATA_CACHE_FALSE       0x00000000
-+#define NVA0C0_QMDV01_07_INVALIDATE_TEXTURE_DATA_CACHE_TRUE        0x00000001
-+#define NVA0C0_QMDV01_07_INVALIDATE_SHADER_DATA_CACHE              MW(253:253)
-+#define NVA0C0_QMDV01_07_INVALIDATE_SHADER_DATA_CACHE_FALSE        0x00000000
-+#define NVA0C0_QMDV01_07_INVALIDATE_SHADER_DATA_CACHE_TRUE         0x00000001
-+#define NVA0C0_QMDV01_07_INVALIDATE_INSTRUCTION_CACHE              MW(254:254)
-+#define NVA0C0_QMDV01_07_INVALIDATE_INSTRUCTION_CACHE_FALSE        0x00000000
-+#define NVA0C0_QMDV01_07_INVALIDATE_INSTRUCTION_CACHE_TRUE         0x00000001
-+#define NVA0C0_QMDV01_07_INVALIDATE_SHADER_CONSTANT_CACHE          MW(255:255)
-+#define NVA0C0_QMDV01_07_INVALIDATE_SHADER_CONSTANT_CACHE_FALSE    0x00000000
-+#define NVA0C0_QMDV01_07_INVALIDATE_SHADER_CONSTANT_CACHE_TRUE     0x00000001
-+#define NVA0C0_QMDV01_07_PROGRAM_OFFSET                            MW(287:256)
-+#define NVA0C0_QMDV01_07_CIRCULAR_QUEUE_ADDR_LOWER                 MW(319:288)
-+#define NVA0C0_QMDV01_07_CIRCULAR_QUEUE_ADDR_UPPER                 MW(327:320)
-+#define NVA0C0_QMDV01_07_QMD_RESERVED_D                            MW(335:328)
-+#define NVA0C0_QMDV01_07_CIRCULAR_QUEUE_ENTRY_SIZE                 MW(351:336)
-+#define NVA0C0_QMDV01_07_CWD_REFERENCE_COUNT_ID                    MW(357:352)
-+#define NVA0C0_QMDV01_07_CWD_REFERENCE_COUNT_DELTA_MINUS_ONE       MW(365:358)
-+#define NVA0C0_QMDV01_07_RELEASE_MEMBAR_TYPE                       MW(366:366)
-+#define NVA0C0_QMDV01_07_RELEASE_MEMBAR_TYPE_FE_NONE               0x00000000
-+#define NVA0C0_QMDV01_07_RELEASE_MEMBAR_TYPE_FE_SYSMEMBAR          0x00000001
-+#define NVA0C0_QMDV01_07_CWD_REFERENCE_COUNT_INCR_ENABLE           MW(367:367)
-+#define NVA0C0_QMDV01_07_CWD_REFERENCE_COUNT_INCR_ENABLE_FALSE     0x00000000
-+#define NVA0C0_QMDV01_07_CWD_REFERENCE_COUNT_INCR_ENABLE_TRUE      0x00000001
-+#define NVA0C0_QMDV01_07_CWD_MEMBAR_TYPE                           MW(369:368)
-+#define NVA0C0_QMDV01_07_CWD_MEMBAR_TYPE_L1_NONE                   0x00000000
-+#define NVA0C0_QMDV01_07_CWD_MEMBAR_TYPE_L1_SYSMEMBAR              0x00000001
-+#define NVA0C0_QMDV01_07_CWD_MEMBAR_TYPE_L1_MEMBAR                 0x00000003
-+#define NVA0C0_QMDV01_07_SEQUENTIALLY_RUN_CTAS                     MW(370:370)
-+#define NVA0C0_QMDV01_07_SEQUENTIALLY_RUN_CTAS_FALSE               0x00000000
-+#define NVA0C0_QMDV01_07_SEQUENTIALLY_RUN_CTAS_TRUE                0x00000001
-+#define NVA0C0_QMDV01_07_CWD_REFERENCE_COUNT_DECR_ENABLE           MW(371:371)
-+#define NVA0C0_QMDV01_07_CWD_REFERENCE_COUNT_DECR_ENABLE_FALSE     0x00000000
-+#define NVA0C0_QMDV01_07_CWD_REFERENCE_COUNT_DECR_ENABLE_TRUE      0x00000001
-+#define NVA0C0_QMDV01_07_THROTTLED                                 MW(372:372)
-+#define NVA0C0_QMDV01_07_THROTTLED_FALSE                           0x00000000
-+#define NVA0C0_QMDV01_07_THROTTLED_TRUE                            0x00000001
-+#define NVA0C0_QMDV01_07_FP32_NAN_BEHAVIOR                         MW(376:376)
-+#define NVA0C0_QMDV01_07_FP32_NAN_BEHAVIOR_LEGACY                  0x00000000
-+#define NVA0C0_QMDV01_07_FP32_NAN_BEHAVIOR_FP64_COMPATIBLE         0x00000001
-+#define NVA0C0_QMDV01_07_FP32_F2I_NAN_BEHAVIOR                     MW(377:377)
-+#define NVA0C0_QMDV01_07_FP32_F2I_NAN_BEHAVIOR_PASS_ZERO           0x00000000
-+#define NVA0C0_QMDV01_07_FP32_F2I_NAN_BEHAVIOR_PASS_INDEFINITE     0x00000001
-+#define NVA0C0_QMDV01_07_API_VISIBLE_CALL_LIMIT                    MW(378:378)
-+#define NVA0C0_QMDV01_07_API_VISIBLE_CALL_LIMIT__32                0x00000000
-+#define NVA0C0_QMDV01_07_API_VISIBLE_CALL_LIMIT_NO_CHECK           0x00000001
-+#define NVA0C0_QMDV01_07_SHARED_MEMORY_BANK_MAPPING                MW(379:379)
-+#define NVA0C0_QMDV01_07_SHARED_MEMORY_BANK_MAPPING_FOUR_BYTES_PER_BANK 0x00000000
-+#define NVA0C0_QMDV01_07_SHARED_MEMORY_BANK_MAPPING_EIGHT_BYTES_PER_BANK 0x00000001
-+#define NVA0C0_QMDV01_07_SAMPLER_INDEX                             MW(382:382)
-+#define NVA0C0_QMDV01_07_SAMPLER_INDEX_INDEPENDENTLY               0x00000000
-+#define NVA0C0_QMDV01_07_SAMPLER_INDEX_VIA_HEADER_INDEX            0x00000001
-+#define NVA0C0_QMDV01_07_FP32_NARROW_INSTRUCTION                   MW(383:383)
-+#define NVA0C0_QMDV01_07_FP32_NARROW_INSTRUCTION_KEEP_DENORMS      0x00000000
-+#define NVA0C0_QMDV01_07_FP32_NARROW_INSTRUCTION_FLUSH_DENORMS     0x00000001
-+#define NVA0C0_QMDV01_07_CTA_RASTER_WIDTH                          MW(415:384)
-+#define NVA0C0_QMDV01_07_CTA_RASTER_HEIGHT                         MW(431:416)
-+#define NVA0C0_QMDV01_07_CTA_RASTER_DEPTH                          MW(447:432)
-+#define NVA0C0_QMDV01_07_CTA_RASTER_WIDTH_RESUME                   MW(479:448)
-+#define NVA0C0_QMDV01_07_CTA_RASTER_HEIGHT_RESUME                  MW(495:480)
-+#define NVA0C0_QMDV01_07_CTA_RASTER_DEPTH_RESUME                   MW(511:496)
-+#define NVA0C0_QMDV01_07_QUEUE_ENTRIES_PER_CTA_MINUS_ONE           MW(518:512)
-+#define NVA0C0_QMDV01_07_COALESCE_WAITING_PERIOD                   MW(529:522)
-+#define NVA0C0_QMDV01_07_SHARED_MEMORY_SIZE                        MW(561:544)
-+#define NVA0C0_QMDV01_07_QMD_RESERVED_G                            MW(575:562)
-+#define NVA0C0_QMDV01_07_QMD_VERSION                               MW(579:576)
-+#define NVA0C0_QMDV01_07_QMD_MAJOR_VERSION                         MW(583:580)
-+#define NVA0C0_QMDV01_07_QMD_RESERVED_H                            MW(591:584)
-+#define NVA0C0_QMDV01_07_CTA_THREAD_DIMENSION0                     MW(607:592)
-+#define NVA0C0_QMDV01_07_CTA_THREAD_DIMENSION1                     MW(623:608)
-+#define NVA0C0_QMDV01_07_CTA_THREAD_DIMENSION2                     MW(639:624)
-+#define NVA0C0_QMDV01_07_CONSTANT_BUFFER_VALID(i)                  MW((640+(i)*1):(640+(i)*1))
-+#define NVA0C0_QMDV01_07_CONSTANT_BUFFER_VALID_FALSE               0x00000000
-+#define NVA0C0_QMDV01_07_CONSTANT_BUFFER_VALID_TRUE                0x00000001
-+#define NVA0C0_QMDV01_07_QMD_RESERVED_I                            MW(668:648)
-+#define NVA0C0_QMDV01_07_L1_CONFIGURATION                          MW(671:669)
-+#define NVA0C0_QMDV01_07_L1_CONFIGURATION_DIRECTLY_ADDRESSABLE_MEMORY_SIZE_16KB 0x00000001
-+#define NVA0C0_QMDV01_07_L1_CONFIGURATION_DIRECTLY_ADDRESSABLE_MEMORY_SIZE_32KB 0x00000002
-+#define NVA0C0_QMDV01_07_L1_CONFIGURATION_DIRECTLY_ADDRESSABLE_MEMORY_SIZE_48KB 0x00000003
-+#define NVA0C0_QMDV01_07_SM_DISABLE_MASK_LOWER                     MW(703:672)
-+#define NVA0C0_QMDV01_07_SM_DISABLE_MASK_UPPER                     MW(735:704)
-+#define NVA0C0_QMDV01_07_RELEASE0_ADDRESS_LOWER                    MW(767:736)
-+#define NVA0C0_QMDV01_07_RELEASE0_ADDRESS_UPPER                    MW(775:768)
-+#define NVA0C0_QMDV01_07_QMD_RESERVED_J                            MW(783:776)
-+#define NVA0C0_QMDV01_07_RELEASE0_REDUCTION_OP                     MW(790:788)
-+#define NVA0C0_QMDV01_07_RELEASE0_REDUCTION_OP_RED_ADD             0x00000000
-+#define NVA0C0_QMDV01_07_RELEASE0_REDUCTION_OP_RED_MIN             0x00000001
-+#define NVA0C0_QMDV01_07_RELEASE0_REDUCTION_OP_RED_MAX             0x00000002
-+#define NVA0C0_QMDV01_07_RELEASE0_REDUCTION_OP_RED_INC             0x00000003
-+#define NVA0C0_QMDV01_07_RELEASE0_REDUCTION_OP_RED_DEC             0x00000004
-+#define NVA0C0_QMDV01_07_RELEASE0_REDUCTION_OP_RED_AND             0x00000005
-+#define NVA0C0_QMDV01_07_RELEASE0_REDUCTION_OP_RED_OR              0x00000006
-+#define NVA0C0_QMDV01_07_RELEASE0_REDUCTION_OP_RED_XOR             0x00000007
-+#define NVA0C0_QMDV01_07_QMD_RESERVED_K                            MW(791:791)
-+#define NVA0C0_QMDV01_07_RELEASE0_REDUCTION_FORMAT                 MW(793:792)
-+#define NVA0C0_QMDV01_07_RELEASE0_REDUCTION_FORMAT_UNSIGNED_32     0x00000000
-+#define NVA0C0_QMDV01_07_RELEASE0_REDUCTION_FORMAT_SIGNED_32       0x00000001
-+#define NVA0C0_QMDV01_07_RELEASE0_REDUCTION_ENABLE                 MW(794:794)
-+#define NVA0C0_QMDV01_07_RELEASE0_REDUCTION_ENABLE_FALSE           0x00000000
-+#define NVA0C0_QMDV01_07_RELEASE0_REDUCTION_ENABLE_TRUE            0x00000001
-+#define NVA0C0_QMDV01_07_RELEASE0_STRUCTURE_SIZE                   MW(799:799)
-+#define NVA0C0_QMDV01_07_RELEASE0_STRUCTURE_SIZE_FOUR_WORDS        0x00000000
-+#define NVA0C0_QMDV01_07_RELEASE0_STRUCTURE_SIZE_ONE_WORD          0x00000001
-+#define NVA0C0_QMDV01_07_RELEASE0_PAYLOAD                          MW(831:800)
-+#define NVA0C0_QMDV01_07_RELEASE1_ADDRESS_LOWER                    MW(863:832)
-+#define NVA0C0_QMDV01_07_RELEASE1_ADDRESS_UPPER                    MW(871:864)
-+#define NVA0C0_QMDV01_07_QMD_RESERVED_L                            MW(879:872)
-+#define NVA0C0_QMDV01_07_RELEASE1_REDUCTION_OP                     MW(886:884)
-+#define NVA0C0_QMDV01_07_RELEASE1_REDUCTION_OP_RED_ADD             0x00000000
-+#define NVA0C0_QMDV01_07_RELEASE1_REDUCTION_OP_RED_MIN             0x00000001
-+#define NVA0C0_QMDV01_07_RELEASE1_REDUCTION_OP_RED_MAX             0x00000002
-+#define NVA0C0_QMDV01_07_RELEASE1_REDUCTION_OP_RED_INC             0x00000003
-+#define NVA0C0_QMDV01_07_RELEASE1_REDUCTION_OP_RED_DEC             0x00000004
-+#define NVA0C0_QMDV01_07_RELEASE1_REDUCTION_OP_RED_AND             0x00000005
-+#define NVA0C0_QMDV01_07_RELEASE1_REDUCTION_OP_RED_OR              0x00000006
-+#define NVA0C0_QMDV01_07_RELEASE1_REDUCTION_OP_RED_XOR             0x00000007
-+#define NVA0C0_QMDV01_07_QMD_RESERVED_M                            MW(887:887)
-+#define NVA0C0_QMDV01_07_RELEASE1_REDUCTION_FORMAT                 MW(889:888)
-+#define NVA0C0_QMDV01_07_RELEASE1_REDUCTION_FORMAT_UNSIGNED_32     0x00000000
-+#define NVA0C0_QMDV01_07_RELEASE1_REDUCTION_FORMAT_SIGNED_32       0x00000001
-+#define NVA0C0_QMDV01_07_RELEASE1_REDUCTION_ENABLE                 MW(890:890)
-+#define NVA0C0_QMDV01_07_RELEASE1_REDUCTION_ENABLE_FALSE           0x00000000
-+#define NVA0C0_QMDV01_07_RELEASE1_REDUCTION_ENABLE_TRUE            0x00000001
-+#define NVA0C0_QMDV01_07_RELEASE1_STRUCTURE_SIZE                   MW(895:895)
-+#define NVA0C0_QMDV01_07_RELEASE1_STRUCTURE_SIZE_FOUR_WORDS        0x00000000
-+#define NVA0C0_QMDV01_07_RELEASE1_STRUCTURE_SIZE_ONE_WORD          0x00000001
-+#define NVA0C0_QMDV01_07_RELEASE1_PAYLOAD                          MW(927:896)
-+#define NVA0C0_QMDV01_07_CONSTANT_BUFFER_ADDR_LOWER(i)             MW((959+(i)*64):(928+(i)*64))
-+#define NVA0C0_QMDV01_07_CONSTANT_BUFFER_ADDR_UPPER(i)             MW((967+(i)*64):(960+(i)*64))
-+#define NVA0C0_QMDV01_07_CONSTANT_BUFFER_RESERVED_ADDR(i)          MW((973+(i)*64):(968+(i)*64))
-+#define NVA0C0_QMDV01_07_CONSTANT_BUFFER_INVALIDATE(i)             MW((974+(i)*64):(974+(i)*64))
-+#define NVA0C0_QMDV01_07_CONSTANT_BUFFER_INVALIDATE_FALSE          0x00000000
-+#define NVA0C0_QMDV01_07_CONSTANT_BUFFER_INVALIDATE_TRUE           0x00000001
-+#define NVA0C0_QMDV01_07_CONSTANT_BUFFER_SIZE(i)                   MW((991+(i)*64):(975+(i)*64))
-+#define NVA0C0_QMDV01_07_SHADER_LOCAL_MEMORY_LOW_SIZE              MW(1463:1440)
-+#define NVA0C0_QMDV01_07_QMD_RESERVED_N                            MW(1466:1464)
-+#define NVA0C0_QMDV01_07_BARRIER_COUNT                             MW(1471:1467)
-+#define NVA0C0_QMDV01_07_SHADER_LOCAL_MEMORY_HIGH_SIZE             MW(1495:1472)
-+#define NVA0C0_QMDV01_07_REGISTER_COUNT                            MW(1503:1496)
-+#define NVA0C0_QMDV01_07_SHADER_LOCAL_MEMORY_CRS_SIZE              MW(1527:1504)
-+#define NVA0C0_QMDV01_07_SASS_VERSION                              MW(1535:1528)
-+#define NVA0C0_QMDV01_07_HW_ONLY_INNER_GET                         MW(1566:1536)
-+#define NVA0C0_QMDV01_07_HW_ONLY_REQUIRE_SCHEDULING_PCAS           MW(1567:1567)
-+#define NVA0C0_QMDV01_07_HW_ONLY_INNER_PUT                         MW(1598:1568)
-+#define NVA0C0_QMDV01_07_QMD_RESERVED_P                            MW(1599:1599)
-+#define NVA0C0_QMDV01_07_HW_ONLY_SPAN_LIST_HEAD_INDEX              MW(1629:1600)
-+#define NVA0C0_QMDV01_07_QMD_RESERVED_Q                            MW(1630:1630)
-+#define NVA0C0_QMDV01_07_HW_ONLY_SPAN_LIST_HEAD_INDEX_VALID        MW(1631:1631)
-+#define NVA0C0_QMDV01_07_HW_ONLY_SPAN_LIST_HEAD_INDEX_VALID_FALSE  0x00000000
-+#define NVA0C0_QMDV01_07_HW_ONLY_SPAN_LIST_HEAD_INDEX_VALID_TRUE   0x00000001
-+#define NVA0C0_QMDV01_07_HW_ONLY_SKED_NEXT_QMD_POINTER             MW(1663:1632)
-+#define NVA0C0_QMDV01_07_QMD_SPARE_E                               MW(1695:1664)
-+#define NVA0C0_QMDV01_07_QMD_SPARE_F                               MW(1727:1696)
-+#define NVA0C0_QMDV01_07_QMD_SPARE_G                               MW(1759:1728)
-+#define NVA0C0_QMDV01_07_QMD_SPARE_H                               MW(1791:1760)
-+#define NVA0C0_QMDV01_07_QMD_SPARE_I                               MW(1823:1792)
-+#define NVA0C0_QMDV01_07_QMD_SPARE_J                               MW(1855:1824)
-+#define NVA0C0_QMDV01_07_QMD_SPARE_K                               MW(1887:1856)
-+#define NVA0C0_QMDV01_07_QMD_SPARE_L                               MW(1919:1888)
-+#define NVA0C0_QMDV01_07_QMD_SPARE_M                               MW(1951:1920)
-+#define NVA0C0_QMDV01_07_QMD_SPARE_N                               MW(1983:1952)
-+#define NVA0C0_QMDV01_07_DEBUG_ID_UPPER                            MW(2015:1984)
-+#define NVA0C0_QMDV01_07_DEBUG_ID_LOWER                            MW(2047:2016)
-+
-+
-+
-+#endif // #ifndef __CLA0C0QMD_H__
-diff --git a/src/gallium/drivers/nouveau/nvc0/clc0c0qmd.h b/src/gallium/drivers/nouveau/nvc0/clc0c0qmd.h
-new file mode 100644
-index 00000000000..040bdcd9dcb
---- /dev/null
-+++ b/src/gallium/drivers/nouveau/nvc0/clc0c0qmd.h
-@@ -0,0 +1,665 @@
-+/*******************************************************************************
-+    Copyright (c) 2016 NVIDIA Corporation
-+
-+    Permission is hereby granted, free of charge, to any person obtaining a copy
-+    of this software and associated documentation files (the "Software"), to
-+    deal in the Software without restriction, including without limitation the
-+    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
-+    sell copies of the Software, and to permit persons to whom the Software is
-+    furnished to do so, subject to the following conditions:
-+
-+        The above copyright notice and this permission notice shall be
-+        included in all copies or substantial portions of the Software.
-+
-+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-+    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-+    DEALINGS IN THE SOFTWARE.
-+
-+*******************************************************************************/
-+
-+/* AUTO GENERATED FILE -- DO NOT EDIT */
-+
-+#ifndef __CLC0C0QMD_H__
-+#define __CLC0C0QMD_H__
-+
-+/*
-+** Queue Meta Data, Version 01_07
-+ */
-+
-+// The below C preprocessor definitions describe "multi-word" structures, where
-+// fields may have bit numbers beyond 32.  For example, MW(127:96) means
-+// the field is in bits 0-31 of word number 3 of the structure.  The "MW(X:Y)"
-+// syntax is to distinguish from similar "X:Y" single-word definitions: the
-+// macros historically used for single-word definitions would fail with
-+// multi-word definitions.
-+//
-+// See nvmisc.h:DRF_VAL_MW() in the source code of the kernel
-+// interface layer of nvidia.ko for an example of how to manipulate
-+// these MW(X:Y) definitions.
-+
-+#define NVC0C0_QMDV01_07_OUTER_PUT                                 MW(30:0)
-+#define NVC0C0_QMDV01_07_OUTER_OVERFLOW                            MW(31:31)
-+#define NVC0C0_QMDV01_07_OUTER_GET                                 MW(62:32)
-+#define NVC0C0_QMDV01_07_OUTER_STICKY_OVERFLOW                     MW(63:63)
-+#define NVC0C0_QMDV01_07_INNER_GET                                 MW(94:64)
-+#define NVC0C0_QMDV01_07_INNER_OVERFLOW                            MW(95:95)
-+#define NVC0C0_QMDV01_07_INNER_PUT                                 MW(126:96)
-+#define NVC0C0_QMDV01_07_INNER_STICKY_OVERFLOW                     MW(127:127)
-+#define NVC0C0_QMDV01_07_QMD_RESERVED_A_A                          MW(159:128)
-+#define NVC0C0_QMDV01_07_DEPENDENT_QMD_POINTER                     MW(191:160)
-+#define NVC0C0_QMDV01_07_QMD_GROUP_ID                              MW(197:192)
-+#define NVC0C0_QMDV01_07_SM_GLOBAL_CACHING_ENABLE                  MW(198:198)
-+#define NVC0C0_QMDV01_07_RUN_CTA_IN_ONE_SM_PARTITION               MW(199:199)
-+#define NVC0C0_QMDV01_07_RUN_CTA_IN_ONE_SM_PARTITION_FALSE         0x00000000
-+#define NVC0C0_QMDV01_07_RUN_CTA_IN_ONE_SM_PARTITION_TRUE          0x00000001
-+#define NVC0C0_QMDV01_07_IS_QUEUE                                  MW(200:200)
-+#define NVC0C0_QMDV01_07_IS_QUEUE_FALSE                            0x00000000
-+#define NVC0C0_QMDV01_07_IS_QUEUE_TRUE                             0x00000001
-+#define NVC0C0_QMDV01_07_ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST      MW(201:201)
-+#define NVC0C0_QMDV01_07_ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST_FALSE 0x00000000
-+#define NVC0C0_QMDV01_07_ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST_TRUE 0x00000001
-+#define NVC0C0_QMDV01_07_SEMAPHORE_RELEASE_ENABLE0                 MW(202:202)
-+#define NVC0C0_QMDV01_07_SEMAPHORE_RELEASE_ENABLE0_FALSE           0x00000000
-+#define NVC0C0_QMDV01_07_SEMAPHORE_RELEASE_ENABLE0_TRUE            0x00000001
-+#define NVC0C0_QMDV01_07_SEMAPHORE_RELEASE_ENABLE1                 MW(203:203)
-+#define NVC0C0_QMDV01_07_SEMAPHORE_RELEASE_ENABLE1_FALSE           0x00000000
-+#define NVC0C0_QMDV01_07_SEMAPHORE_RELEASE_ENABLE1_TRUE            0x00000001
-+#define NVC0C0_QMDV01_07_REQUIRE_SCHEDULING_PCAS                   MW(204:204)
-+#define NVC0C0_QMDV01_07_REQUIRE_SCHEDULING_PCAS_FALSE             0x00000000
-+#define NVC0C0_QMDV01_07_REQUIRE_SCHEDULING_PCAS_TRUE              0x00000001
-+#define NVC0C0_QMDV01_07_DEPENDENT_QMD_SCHEDULE_ENABLE             MW(205:205)
-+#define NVC0C0_QMDV01_07_DEPENDENT_QMD_SCHEDULE_ENABLE_FALSE       0x00000000
-+#define NVC0C0_QMDV01_07_DEPENDENT_QMD_SCHEDULE_ENABLE_TRUE        0x00000001
-+#define NVC0C0_QMDV01_07_DEPENDENT_QMD_TYPE                        MW(206:206)
-+#define NVC0C0_QMDV01_07_DEPENDENT_QMD_TYPE_QUEUE                  0x00000000
-+#define NVC0C0_QMDV01_07_DEPENDENT_QMD_TYPE_GRID                   0x00000001
-+#define NVC0C0_QMDV01_07_DEPENDENT_QMD_FIELD_COPY                  MW(207:207)
-+#define NVC0C0_QMDV01_07_DEPENDENT_QMD_FIELD_COPY_FALSE            0x00000000
-+#define NVC0C0_QMDV01_07_DEPENDENT_QMD_FIELD_COPY_TRUE             0x00000001
-+#define NVC0C0_QMDV01_07_QMD_RESERVED_B                            MW(223:208)
-+#define NVC0C0_QMDV01_07_CIRCULAR_QUEUE_SIZE                       MW(248:224)
-+#define NVC0C0_QMDV01_07_QMD_RESERVED_C                            MW(249:249)
-+#define NVC0C0_QMDV01_07_INVALIDATE_TEXTURE_HEADER_CACHE           MW(250:250)
-+#define NVC0C0_QMDV01_07_INVALIDATE_TEXTURE_HEADER_CACHE_FALSE     0x00000000
-+#define NVC0C0_QMDV01_07_INVALIDATE_TEXTURE_HEADER_CACHE_TRUE      0x00000001
-+#define NVC0C0_QMDV01_07_INVALIDATE_TEXTURE_SAMPLER_CACHE          MW(251:251)
-+#define NVC0C0_QMDV01_07_INVALIDATE_TEXTURE_SAMPLER_CACHE_FALSE    0x00000000
-+#define NVC0C0_QMDV01_07_INVALIDATE_TEXTURE_SAMPLER_CACHE_TRUE     0x00000001
-+#define NVC0C0_QMDV01_07_INVALIDATE_TEXTURE_DATA_CACHE             MW(252:252)
-+#define NVC0C0_QMDV01_07_INVALIDATE_TEXTURE_DATA_CACHE_FALSE       0x00000000
-+#define NVC0C0_QMDV01_07_INVALIDATE_TEXTURE_DATA_CACHE_TRUE        0x00000001
-+#define NVC0C0_QMDV01_07_INVALIDATE_SHADER_DATA_CACHE              MW(253:253)
-+#define NVC0C0_QMDV01_07_INVALIDATE_SHADER_DATA_CACHE_FALSE        0x00000000
-+#define NVC0C0_QMDV01_07_INVALIDATE_SHADER_DATA_CACHE_TRUE         0x00000001
-+#define NVC0C0_QMDV01_07_INVALIDATE_INSTRUCTION_CACHE              MW(254:254)
-+#define NVC0C0_QMDV01_07_INVALIDATE_INSTRUCTION_CACHE_FALSE        0x00000000
-+#define NVC0C0_QMDV01_07_INVALIDATE_INSTRUCTION_CACHE_TRUE         0x00000001
-+#define NVC0C0_QMDV01_07_INVALIDATE_SHADER_CONSTANT_CACHE          MW(255:255)
-+#define NVC0C0_QMDV01_07_INVALIDATE_SHADER_CONSTANT_CACHE_FALSE    0x00000000
-+#define NVC0C0_QMDV01_07_INVALIDATE_SHADER_CONSTANT_CACHE_TRUE     0x00000001
-+#define NVC0C0_QMDV01_07_PROGRAM_OFFSET                            MW(287:256)
-+#define NVC0C0_QMDV01_07_CIRCULAR_QUEUE_ADDR_LOWER                 MW(319:288)
-+#define NVC0C0_QMDV01_07_CIRCULAR_QUEUE_ADDR_UPPER                 MW(327:320)
-+#define NVC0C0_QMDV01_07_QMD_RESERVED_D                            MW(335:328)
-+#define NVC0C0_QMDV01_07_CIRCULAR_QUEUE_ENTRY_SIZE                 MW(351:336)
-+#define NVC0C0_QMDV01_07_CWD_REFERENCE_COUNT_ID                    MW(357:352)
-+#define NVC0C0_QMDV01_07_CWD_REFERENCE_COUNT_DELTA_MINUS_ONE       MW(365:358)
-+#define NVC0C0_QMDV01_07_RELEASE_MEMBAR_TYPE                       MW(366:366)
-+#define NVC0C0_QMDV01_07_RELEASE_MEMBAR_TYPE_FE_NONE               0x00000000
-+#define NVC0C0_QMDV01_07_RELEASE_MEMBAR_TYPE_FE_SYSMEMBAR          0x00000001
-+#define NVC0C0_QMDV01_07_CWD_REFERENCE_COUNT_INCR_ENABLE           MW(367:367)
-+#define NVC0C0_QMDV01_07_CWD_REFERENCE_COUNT_INCR_ENABLE_FALSE     0x00000000
-+#define NVC0C0_QMDV01_07_CWD_REFERENCE_COUNT_INCR_ENABLE_TRUE      0x00000001
-+#define NVC0C0_QMDV01_07_CWD_MEMBAR_TYPE                           MW(369:368)
-+#define NVC0C0_QMDV01_07_CWD_MEMBAR_TYPE_L1_NONE                   0x00000000
-+#define NVC0C0_QMDV01_07_CWD_MEMBAR_TYPE_L1_SYSMEMBAR              0x00000001
-+#define NVC0C0_QMDV01_07_CWD_MEMBAR_TYPE_L1_MEMBAR                 0x00000003
-+#define NVC0C0_QMDV01_07_SEQUENTIALLY_RUN_CTAS                     MW(370:370)
-+#define NVC0C0_QMDV01_07_SEQUENTIALLY_RUN_CTAS_FALSE               0x00000000
-+#define NVC0C0_QMDV01_07_SEQUENTIALLY_RUN_CTAS_TRUE                0x00000001
-+#define NVC0C0_QMDV01_07_CWD_REFERENCE_COUNT_DECR_ENABLE           MW(371:371)
-+#define NVC0C0_QMDV01_07_CWD_REFERENCE_COUNT_DECR_ENABLE_FALSE     0x00000000
-+#define NVC0C0_QMDV01_07_CWD_REFERENCE_COUNT_DECR_ENABLE_TRUE      0x00000001
-+#define NVC0C0_QMDV01_07_THROTTLED                                 MW(372:372)
-+#define NVC0C0_QMDV01_07_THROTTLED_FALSE                           0x00000000
-+#define NVC0C0_QMDV01_07_THROTTLED_TRUE                            0x00000001
-+#define NVC0C0_QMDV01_07_FP32_NAN_BEHAVIOR                         MW(376:376)
-+#define NVC0C0_QMDV01_07_FP32_NAN_BEHAVIOR_LEGACY                  0x00000000
-+#define NVC0C0_QMDV01_07_FP32_NAN_BEHAVIOR_FP64_COMPATIBLE         0x00000001
-+#define NVC0C0_QMDV01_07_FP32_F2I_NAN_BEHAVIOR                     MW(377:377)
-+#define NVC0C0_QMDV01_07_FP32_F2I_NAN_BEHAVIOR_PASS_ZERO           0x00000000
-+#define NVC0C0_QMDV01_07_FP32_F2I_NAN_BEHAVIOR_PASS_INDEFINITE     0x00000001
-+#define NVC0C0_QMDV01_07_API_VISIBLE_CALL_LIMIT                    MW(378:378)
-+#define NVC0C0_QMDV01_07_API_VISIBLE_CALL_LIMIT__32                0x00000000
-+#define NVC0C0_QMDV01_07_API_VISIBLE_CALL_LIMIT_NO_CHECK           0x00000001
-+#define NVC0C0_QMDV01_07_SHARED_MEMORY_BANK_MAPPING                MW(379:379)
-+#define NVC0C0_QMDV01_07_SHARED_MEMORY_BANK_MAPPING_FOUR_BYTES_PER_BANK 0x00000000
-+#define NVC0C0_QMDV01_07_SHARED_MEMORY_BANK_MAPPING_EIGHT_BYTES_PER_BANK 0x00000001
-+#define NVC0C0_QMDV01_07_SAMPLER_INDEX                             MW(382:382)
-+#define NVC0C0_QMDV01_07_SAMPLER_INDEX_INDEPENDENTLY               0x00000000
-+#define NVC0C0_QMDV01_07_SAMPLER_INDEX_VIA_HEADER_INDEX            0x00000001
-+#define NVC0C0_QMDV01_07_FP32_NARROW_INSTRUCTION                   MW(383:383)
-+#define NVC0C0_QMDV01_07_FP32_NARROW_INSTRUCTION_KEEP_DENORMS      0x00000000
-+#define NVC0C0_QMDV01_07_FP32_NARROW_INSTRUCTION_FLUSH_DENORMS     0x00000001
-+#define NVC0C0_QMDV01_07_CTA_RASTER_WIDTH                          MW(415:384)
-+#define NVC0C0_QMDV01_07_CTA_RASTER_HEIGHT                         MW(431:416)
-+#define NVC0C0_QMDV01_07_CTA_RASTER_DEPTH                          MW(447:432)
-+#define NVC0C0_QMDV01_07_CTA_RASTER_WIDTH_RESUME                   MW(479:448)
-+#define NVC0C0_QMDV01_07_CTA_RASTER_HEIGHT_RESUME                  MW(495:480)
-+#define NVC0C0_QMDV01_07_CTA_RASTER_DEPTH_RESUME                   MW(511:496)
-+#define NVC0C0_QMDV01_07_QUEUE_ENTRIES_PER_CTA_MINUS_ONE           MW(518:512)
-+#define NVC0C0_QMDV01_07_COALESCE_WAITING_PERIOD                   MW(529:522)
-+#define NVC0C0_QMDV01_07_SHARED_MEMORY_SIZE                        MW(561:544)
-+#define NVC0C0_QMDV01_07_QMD_RESERVED_G                            MW(575:562)
-+#define NVC0C0_QMDV01_07_QMD_VERSION                               MW(579:576)
-+#define NVC0C0_QMDV01_07_QMD_MAJOR_VERSION                         MW(583:580)
-+#define NVC0C0_QMDV01_07_QMD_RESERVED_H                            MW(591:584)
-+#define NVC0C0_QMDV01_07_CTA_THREAD_DIMENSION0                     MW(607:592)
-+#define NVC0C0_QMDV01_07_CTA_THREAD_DIMENSION1                     MW(623:608)
-+#define NVC0C0_QMDV01_07_CTA_THREAD_DIMENSION2                     MW(639:624)
-+#define NVC0C0_QMDV01_07_CONSTANT_BUFFER_VALID(i)                  MW((640+(i)*1):(640+(i)*1))
-+#define NVC0C0_QMDV01_07_CONSTANT_BUFFER_VALID_FALSE               0x00000000
-+#define NVC0C0_QMDV01_07_CONSTANT_BUFFER_VALID_TRUE                0x00000001
-+#define NVC0C0_QMDV01_07_QMD_RESERVED_I                            MW(668:648)
-+#define NVC0C0_QMDV01_07_L1_CONFIGURATION                          MW(671:669)
-+#define NVC0C0_QMDV01_07_L1_CONFIGURATION_DIRECTLY_ADDRESSABLE_MEMORY_SIZE_16KB 0x00000001
-+#define NVC0C0_QMDV01_07_L1_CONFIGURATION_DIRECTLY_ADDRESSABLE_MEMORY_SIZE_32KB 0x00000002
-+#define NVC0C0_QMDV01_07_L1_CONFIGURATION_DIRECTLY_ADDRESSABLE_MEMORY_SIZE_48KB 0x00000003
-+#define NVC0C0_QMDV01_07_SM_DISABLE_MASK_LOWER                     MW(703:672)
-+#define NVC0C0_QMDV01_07_SM_DISABLE_MASK_UPPER                     MW(735:704)
-+#define NVC0C0_QMDV01_07_RELEASE0_ADDRESS_LOWER                    MW(767:736)
-+#define NVC0C0_QMDV01_07_RELEASE0_ADDRESS_UPPER                    MW(775:768)
-+#define NVC0C0_QMDV01_07_QMD_RESERVED_J                            MW(783:776)
-+#define NVC0C0_QMDV01_07_RELEASE0_REDUCTION_OP                     MW(790:788)
-+#define NVC0C0_QMDV01_07_RELEASE0_REDUCTION_OP_RED_ADD             0x00000000
-+#define NVC0C0_QMDV01_07_RELEASE0_REDUCTION_OP_RED_MIN             0x00000001
-+#define NVC0C0_QMDV01_07_RELEASE0_REDUCTION_OP_RED_MAX             0x00000002
-+#define NVC0C0_QMDV01_07_RELEASE0_REDUCTION_OP_RED_INC             0x00000003
-+#define NVC0C0_QMDV01_07_RELEASE0_REDUCTION_OP_RED_DEC             0x00000004
-+#define NVC0C0_QMDV01_07_RELEASE0_REDUCTION_OP_RED_AND             0x00000005
-+#define NVC0C0_QMDV01_07_RELEASE0_REDUCTION_OP_RED_OR              0x00000006
-+#define NVC0C0_QMDV01_07_RELEASE0_REDUCTION_OP_RED_XOR             0x00000007
-+#define NVC0C0_QMDV01_07_QMD_RESERVED_K                            MW(791:791)
-+#define NVC0C0_QMDV01_07_RELEASE0_REDUCTION_FORMAT                 MW(793:792)
-+#define NVC0C0_QMDV01_07_RELEASE0_REDUCTION_FORMAT_UNSIGNED_32     0x00000000
-+#define NVC0C0_QMDV01_07_RELEASE0_REDUCTION_FORMAT_SIGNED_32       0x00000001
-+#define NVC0C0_QMDV01_07_RELEASE0_REDUCTION_ENABLE                 MW(794:794)
-+#define NVC0C0_QMDV01_07_RELEASE0_REDUCTION_ENABLE_FALSE           0x00000000
-+#define NVC0C0_QMDV01_07_RELEASE0_REDUCTION_ENABLE_TRUE            0x00000001
-+#define NVC0C0_QMDV01_07_RELEASE0_STRUCTURE_SIZE                   MW(799:799)
-+#define NVC0C0_QMDV01_07_RELEASE0_STRUCTURE_SIZE_FOUR_WORDS        0x00000000
-+#define NVC0C0_QMDV01_07_RELEASE0_STRUCTURE_SIZE_ONE_WORD          0x00000001
-+#define NVC0C0_QMDV01_07_RELEASE0_PAYLOAD                          MW(831:800)
-+#define NVC0C0_QMDV01_07_RELEASE1_ADDRESS_LOWER                    MW(863:832)
-+#define NVC0C0_QMDV01_07_RELEASE1_ADDRESS_UPPER                    MW(871:864)
-+#define NVC0C0_QMDV01_07_QMD_RESERVED_L                            MW(879:872)
-+#define NVC0C0_QMDV01_07_RELEASE1_REDUCTION_OP                     MW(886:884)
-+#define NVC0C0_QMDV01_07_RELEASE1_REDUCTION_OP_RED_ADD             0x00000000
-+#define NVC0C0_QMDV01_07_RELEASE1_REDUCTION_OP_RED_MIN             0x00000001
-+#define NVC0C0_QMDV01_07_RELEASE1_REDUCTION_OP_RED_MAX             0x00000002
-+#define NVC0C0_QMDV01_07_RELEASE1_REDUCTION_OP_RED_INC             0x00000003
-+#define NVC0C0_QMDV01_07_RELEASE1_REDUCTION_OP_RED_DEC             0x00000004
-+#define NVC0C0_QMDV01_07_RELEASE1_REDUCTION_OP_RED_AND             0x00000005
-+#define NVC0C0_QMDV01_07_RELEASE1_REDUCTION_OP_RED_OR              0x00000006
-+#define NVC0C0_QMDV01_07_RELEASE1_REDUCTION_OP_RED_XOR             0x00000007
-+#define NVC0C0_QMDV01_07_QMD_RESERVED_M                            MW(887:887)
-+#define NVC0C0_QMDV01_07_RELEASE1_REDUCTION_FORMAT                 MW(889:888)
-+#define NVC0C0_QMDV01_07_RELEASE1_REDUCTION_FORMAT_UNSIGNED_32     0x00000000
-+#define NVC0C0_QMDV01_07_RELEASE1_REDUCTION_FORMAT_SIGNED_32       0x00000001
-+#define NVC0C0_QMDV01_07_RELEASE1_REDUCTION_ENABLE                 MW(890:890)
-+#define NVC0C0_QMDV01_07_RELEASE1_REDUCTION_ENABLE_FALSE           0x00000000
-+#define NVC0C0_QMDV01_07_RELEASE1_REDUCTION_ENABLE_TRUE            0x00000001
-+#define NVC0C0_QMDV01_07_RELEASE1_STRUCTURE_SIZE                   MW(895:895)
-+#define NVC0C0_QMDV01_07_RELEASE1_STRUCTURE_SIZE_FOUR_WORDS        0x00000000
-+#define NVC0C0_QMDV01_07_RELEASE1_STRUCTURE_SIZE_ONE_WORD          0x00000001
-+#define NVC0C0_QMDV01_07_RELEASE1_PAYLOAD                          MW(927:896)
-+#define NVC0C0_QMDV01_07_CONSTANT_BUFFER_ADDR_LOWER(i)             MW((959+(i)*64):(928+(i)*64))
-+#define NVC0C0_QMDV01_07_CONSTANT_BUFFER_ADDR_UPPER(i)             MW((967+(i)*64):(960+(i)*64))
-+#define NVC0C0_QMDV01_07_CONSTANT_BUFFER_RESERVED_ADDR(i)          MW((973+(i)*64):(968+(i)*64))
-+#define NVC0C0_QMDV01_07_CONSTANT_BUFFER_INVALIDATE(i)             MW((974+(i)*64):(974+(i)*64))
-+#define NVC0C0_QMDV01_07_CONSTANT_BUFFER_INVALIDATE_FALSE          0x00000000
-+#define NVC0C0_QMDV01_07_CONSTANT_BUFFER_INVALIDATE_TRUE           0x00000001
-+#define NVC0C0_QMDV01_07_CONSTANT_BUFFER_SIZE(i)                   MW((991+(i)*64):(975+(i)*64))
-+#define NVC0C0_QMDV01_07_SHADER_LOCAL_MEMORY_LOW_SIZE              MW(1463:1440)
-+#define NVC0C0_QMDV01_07_QMD_RESERVED_N                            MW(1466:1464)
-+#define NVC0C0_QMDV01_07_BARRIER_COUNT                             MW(1471:1467)
-+#define NVC0C0_QMDV01_07_SHADER_LOCAL_MEMORY_HIGH_SIZE             MW(1495:1472)
-+#define NVC0C0_QMDV01_07_REGISTER_COUNT                            MW(1503:1496)
-+#define NVC0C0_QMDV01_07_SHADER_LOCAL_MEMORY_CRS_SIZE              MW(1527:1504)
-+#define NVC0C0_QMDV01_07_SASS_VERSION                              MW(1535:1528)
-+#define NVC0C0_QMDV01_07_HW_ONLY_INNER_GET                         MW(1566:1536)
-+#define NVC0C0_QMDV01_07_HW_ONLY_REQUIRE_SCHEDULING_PCAS           MW(1567:1567)
-+#define NVC0C0_QMDV01_07_HW_ONLY_INNER_PUT                         MW(1598:1568)
-+#define NVC0C0_QMDV01_07_HW_ONLY_SCG_TYPE                          MW(1599:1599)
-+#define NVC0C0_QMDV01_07_HW_ONLY_SPAN_LIST_HEAD_INDEX              MW(1629:1600)
-+#define NVC0C0_QMDV01_07_QMD_RESERVED_Q                            MW(1630:1630)
-+#define NVC0C0_QMDV01_07_HW_ONLY_SPAN_LIST_HEAD_INDEX_VALID        MW(1631:1631)
-+#define NVC0C0_QMDV01_07_HW_ONLY_SPAN_LIST_HEAD_INDEX_VALID_FALSE  0x00000000
-+#define NVC0C0_QMDV01_07_HW_ONLY_SPAN_LIST_HEAD_INDEX_VALID_TRUE   0x00000001
-+#define NVC0C0_QMDV01_07_HW_ONLY_SKED_NEXT_QMD_POINTER             MW(1663:1632)
-+#define NVC0C0_QMDV01_07_QMD_SPARE_E                               MW(1695:1664)
-+#define NVC0C0_QMDV01_07_QMD_SPARE_F                               MW(1727:1696)
-+#define NVC0C0_QMDV01_07_QMD_SPARE_G                               MW(1759:1728)
-+#define NVC0C0_QMDV01_07_QMD_SPARE_H                               MW(1791:1760)
-+#define NVC0C0_QMDV01_07_QMD_SPARE_I                               MW(1823:1792)
-+#define NVC0C0_QMDV01_07_QMD_SPARE_J                               MW(1855:1824)
-+#define NVC0C0_QMDV01_07_QMD_SPARE_K                               MW(1887:1856)
-+#define NVC0C0_QMDV01_07_QMD_SPARE_L                               MW(1919:1888)
-+#define NVC0C0_QMDV01_07_QMD_SPARE_M                               MW(1951:1920)
-+#define NVC0C0_QMDV01_07_QMD_SPARE_N                               MW(1983:1952)
-+#define NVC0C0_QMDV01_07_DEBUG_ID_UPPER                            MW(2015:1984)
-+#define NVC0C0_QMDV01_07_DEBUG_ID_LOWER                            MW(2047:2016)
-+
-+
-+/*
-+** Queue Meta Data, Version 02_00
-+ */
-+
-+#define NVC0C0_QMDV02_00_OUTER_PUT                                 MW(30:0)
-+#define NVC0C0_QMDV02_00_OUTER_OVERFLOW                            MW(31:31)
-+#define NVC0C0_QMDV02_00_OUTER_GET                                 MW(62:32)
-+#define NVC0C0_QMDV02_00_OUTER_STICKY_OVERFLOW                     MW(63:63)
-+#define NVC0C0_QMDV02_00_INNER_GET                                 MW(94:64)
-+#define NVC0C0_QMDV02_00_INNER_OVERFLOW                            MW(95:95)
-+#define NVC0C0_QMDV02_00_INNER_PUT                                 MW(126:96)
-+#define NVC0C0_QMDV02_00_INNER_STICKY_OVERFLOW                     MW(127:127)
-+#define NVC0C0_QMDV02_00_QMD_RESERVED_A_A                          MW(159:128)
-+#define NVC0C0_QMDV02_00_DEPENDENT_QMD_POINTER                     MW(191:160)
-+#define NVC0C0_QMDV02_00_QMD_GROUP_ID                              MW(197:192)
-+#define NVC0C0_QMDV02_00_SM_GLOBAL_CACHING_ENABLE                  MW(198:198)
-+#define NVC0C0_QMDV02_00_RUN_CTA_IN_ONE_SM_PARTITION               MW(199:199)
-+#define NVC0C0_QMDV02_00_RUN_CTA_IN_ONE_SM_PARTITION_FALSE         0x00000000
-+#define NVC0C0_QMDV02_00_RUN_CTA_IN_ONE_SM_PARTITION_TRUE          0x00000001
-+#define NVC0C0_QMDV02_00_IS_QUEUE                                  MW(200:200)
-+#define NVC0C0_QMDV02_00_IS_QUEUE_FALSE                            0x00000000
-+#define NVC0C0_QMDV02_00_IS_QUEUE_TRUE                             0x00000001
-+#define NVC0C0_QMDV02_00_ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST      MW(201:201)
-+#define NVC0C0_QMDV02_00_ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST_FALSE 0x00000000
-+#define NVC0C0_QMDV02_00_ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST_TRUE 0x00000001
-+#define NVC0C0_QMDV02_00_SEMAPHORE_RELEASE_ENABLE0                 MW(202:202)
-+#define NVC0C0_QMDV02_00_SEMAPHORE_RELEASE_ENABLE0_FALSE           0x00000000
-+#define NVC0C0_QMDV02_00_SEMAPHORE_RELEASE_ENABLE0_TRUE            0x00000001
-+#define NVC0C0_QMDV02_00_SEMAPHORE_RELEASE_ENABLE1                 MW(203:203)
-+#define NVC0C0_QMDV02_00_SEMAPHORE_RELEASE_ENABLE1_FALSE           0x00000000
-+#define NVC0C0_QMDV02_00_SEMAPHORE_RELEASE_ENABLE1_TRUE            0x00000001
-+#define NVC0C0_QMDV02_00_REQUIRE_SCHEDULING_PCAS                   MW(204:204)
-+#define NVC0C0_QMDV02_00_REQUIRE_SCHEDULING_PCAS_FALSE             0x00000000
-+#define NVC0C0_QMDV02_00_REQUIRE_SCHEDULING_PCAS_TRUE              0x00000001
-+#define NVC0C0_QMDV02_00_DEPENDENT_QMD_SCHEDULE_ENABLE             MW(205:205)
-+#define NVC0C0_QMDV02_00_DEPENDENT_QMD_SCHEDULE_ENABLE_FALSE       0x00000000
-+#define NVC0C0_QMDV02_00_DEPENDENT_QMD_SCHEDULE_ENABLE_TRUE        0x00000001
-+#define NVC0C0_QMDV02_00_DEPENDENT_QMD_TYPE                        MW(206:206)
-+#define NVC0C0_QMDV02_00_DEPENDENT_QMD_TYPE_QUEUE                  0x00000000
-+#define NVC0C0_QMDV02_00_DEPENDENT_QMD_TYPE_GRID                   0x00000001
-+#define NVC0C0_QMDV02_00_DEPENDENT_QMD_FIELD_COPY                  MW(207:207)
-+#define NVC0C0_QMDV02_00_DEPENDENT_QMD_FIELD_COPY_FALSE            0x00000000
-+#define NVC0C0_QMDV02_00_DEPENDENT_QMD_FIELD_COPY_TRUE             0x00000001
-+#define NVC0C0_QMDV02_00_QMD_RESERVED_B                            MW(223:208)
-+#define NVC0C0_QMDV02_00_CIRCULAR_QUEUE_SIZE                       MW(248:224)
-+#define NVC0C0_QMDV02_00_QMD_RESERVED_C                            MW(249:249)
-+#define NVC0C0_QMDV02_00_INVALIDATE_TEXTURE_HEADER_CACHE           MW(250:250)
-+#define NVC0C0_QMDV02_00_INVALIDATE_TEXTURE_HEADER_CACHE_FALSE     0x00000000
-+#define NVC0C0_QMDV02_00_INVALIDATE_TEXTURE_HEADER_CACHE_TRUE      0x00000001
-+#define NVC0C0_QMDV02_00_INVALIDATE_TEXTURE_SAMPLER_CACHE          MW(251:251)
-+#define NVC0C0_QMDV02_00_INVALIDATE_TEXTURE_SAMPLER_CACHE_FALSE    0x00000000
-+#define NVC0C0_QMDV02_00_INVALIDATE_TEXTURE_SAMPLER_CACHE_TRUE     0x00000001
-+#define NVC0C0_QMDV02_00_INVALIDATE_TEXTURE_DATA_CACHE             MW(252:252)
-+#define NVC0C0_QMDV02_00_INVALIDATE_TEXTURE_DATA_CACHE_FALSE       0x00000000
-+#define NVC0C0_QMDV02_00_INVALIDATE_TEXTURE_DATA_CACHE_TRUE        0x00000001
-+#define NVC0C0_QMDV02_00_INVALIDATE_SHADER_DATA_CACHE              MW(253:253)
-+#define NVC0C0_QMDV02_00_INVALIDATE_SHADER_DATA_CACHE_FALSE        0x00000000
-+#define NVC0C0_QMDV02_00_INVALIDATE_SHADER_DATA_CACHE_TRUE         0x00000001
-+#define NVC0C0_QMDV02_00_INVALIDATE_INSTRUCTION_CACHE              MW(254:254)
-+#define NVC0C0_QMDV02_00_INVALIDATE_INSTRUCTION_CACHE_FALSE        0x00000000
-+#define NVC0C0_QMDV02_00_INVALIDATE_INSTRUCTION_CACHE_TRUE         0x00000001
-+#define NVC0C0_QMDV02_00_INVALIDATE_SHADER_CONSTANT_CACHE          MW(255:255)
-+#define NVC0C0_QMDV02_00_INVALIDATE_SHADER_CONSTANT_CACHE_FALSE    0x00000000
-+#define NVC0C0_QMDV02_00_INVALIDATE_SHADER_CONSTANT_CACHE_TRUE     0x00000001
-+#define NVC0C0_QMDV02_00_PROGRAM_OFFSET                            MW(287:256)
-+#define NVC0C0_QMDV02_00_CIRCULAR_QUEUE_ADDR_LOWER                 MW(319:288)
-+#define NVC0C0_QMDV02_00_CIRCULAR_QUEUE_ADDR_UPPER                 MW(327:320)
-+#define NVC0C0_QMDV02_00_QMD_RESERVED_D                            MW(335:328)
-+#define NVC0C0_QMDV02_00_CIRCULAR_QUEUE_ENTRY_SIZE                 MW(351:336)
-+#define NVC0C0_QMDV02_00_CWD_REFERENCE_COUNT_ID                    MW(357:352)
-+#define NVC0C0_QMDV02_00_CWD_REFERENCE_COUNT_DELTA_MINUS_ONE       MW(365:358)
-+#define NVC0C0_QMDV02_00_RELEASE_MEMBAR_TYPE                       MW(366:366)
-+#define NVC0C0_QMDV02_00_RELEASE_MEMBAR_TYPE_FE_NONE               0x00000000
-+#define NVC0C0_QMDV02_00_RELEASE_MEMBAR_TYPE_FE_SYSMEMBAR          0x00000001
-+#define NVC0C0_QMDV02_00_CWD_REFERENCE_COUNT_INCR_ENABLE           MW(367:367)
-+#define NVC0C0_QMDV02_00_CWD_REFERENCE_COUNT_INCR_ENABLE_FALSE     0x00000000
-+#define NVC0C0_QMDV02_00_CWD_REFERENCE_COUNT_INCR_ENABLE_TRUE      0x00000001
-+#define NVC0C0_QMDV02_00_CWD_MEMBAR_TYPE                           MW(369:368)
-+#define NVC0C0_QMDV02_00_CWD_MEMBAR_TYPE_L1_NONE                   0x00000000
-+#define NVC0C0_QMDV02_00_CWD_MEMBAR_TYPE_L1_SYSMEMBAR              0x00000001
-+#define NVC0C0_QMDV02_00_CWD_MEMBAR_TYPE_L1_MEMBAR                 0x00000003
-+#define NVC0C0_QMDV02_00_SEQUENTIALLY_RUN_CTAS                     MW(370:370)
-+#define NVC0C0_QMDV02_00_SEQUENTIALLY_RUN_CTAS_FALSE               0x00000000
-+#define NVC0C0_QMDV02_00_SEQUENTIALLY_RUN_CTAS_TRUE                0x00000001
-+#define NVC0C0_QMDV02_00_CWD_REFERENCE_COUNT_DECR_ENABLE           MW(371:371)
-+#define NVC0C0_QMDV02_00_CWD_REFERENCE_COUNT_DECR_ENABLE_FALSE     0x00000000
-+#define NVC0C0_QMDV02_00_CWD_REFERENCE_COUNT_DECR_ENABLE_TRUE      0x00000001
-+#define NVC0C0_QMDV02_00_THROTTLED                                 MW(372:372)
-+#define NVC0C0_QMDV02_00_THROTTLED_FALSE                           0x00000000
-+#define NVC0C0_QMDV02_00_THROTTLED_TRUE                            0x00000001
-+#define NVC0C0_QMDV02_00_API_VISIBLE_CALL_LIMIT                    MW(378:378)
-+#define NVC0C0_QMDV02_00_API_VISIBLE_CALL_LIMIT__32                0x00000000
-+#define NVC0C0_QMDV02_00_API_VISIBLE_CALL_LIMIT_NO_CHECK           0x00000001
-+#define NVC0C0_QMDV02_00_SAMPLER_INDEX                             MW(382:382)
-+#define NVC0C0_QMDV02_00_SAMPLER_INDEX_INDEPENDENTLY               0x00000000
-+#define NVC0C0_QMDV02_00_SAMPLER_INDEX_VIA_HEADER_INDEX            0x00000001
-+#define NVC0C0_QMDV02_00_CTA_RASTER_WIDTH                          MW(415:384)
-+#define NVC0C0_QMDV02_00_CTA_RASTER_HEIGHT                         MW(431:416)
-+#define NVC0C0_QMDV02_00_QMD_RESERVED13A                           MW(447:432)
-+#define NVC0C0_QMDV02_00_CTA_RASTER_DEPTH                          MW(463:448)
-+#define NVC0C0_QMDV02_00_QMD_RESERVED14A                           MW(479:464)
-+#define NVC0C0_QMDV02_00_QMD_RESERVED15A                           MW(511:480)
-+#define NVC0C0_QMDV02_00_QUEUE_ENTRIES_PER_CTA_MINUS_ONE           MW(518:512)
-+#define NVC0C0_QMDV02_00_COALESCE_WAITING_PERIOD                   MW(529:522)
-+#define NVC0C0_QMDV02_00_SHARED_MEMORY_SIZE                        MW(561:544)
-+#define NVC0C0_QMDV02_00_QMD_RESERVED_G                            MW(575:562)
-+#define NVC0C0_QMDV02_00_QMD_VERSION                               MW(579:576)
-+#define NVC0C0_QMDV02_00_QMD_MAJOR_VERSION                         MW(583:580)
-+#define NVC0C0_QMDV02_00_QMD_RESERVED_H                            MW(591:584)
-+#define NVC0C0_QMDV02_00_CTA_THREAD_DIMENSION0                     MW(607:592)
-+#define NVC0C0_QMDV02_00_CTA_THREAD_DIMENSION1                     MW(623:608)
-+#define NVC0C0_QMDV02_00_CTA_THREAD_DIMENSION2                     MW(639:624)
-+#define NVC0C0_QMDV02_00_CONSTANT_BUFFER_VALID(i)                  MW((640+(i)*1):(640+(i)*1))
-+#define NVC0C0_QMDV02_00_CONSTANT_BUFFER_VALID_FALSE               0x00000000
-+#define NVC0C0_QMDV02_00_CONSTANT_BUFFER_VALID_TRUE                0x00000001
-+#define NVC0C0_QMDV02_00_QMD_RESERVED_I                            MW(671:648)
-+#define NVC0C0_QMDV02_00_SM_DISABLE_MASK_LOWER                     MW(703:672)
-+#define NVC0C0_QMDV02_00_SM_DISABLE_MASK_UPPER                     MW(735:704)
-+#define NVC0C0_QMDV02_00_RELEASE0_ADDRESS_LOWER                    MW(767:736)
-+#define NVC0C0_QMDV02_00_RELEASE0_ADDRESS_UPPER                    MW(775:768)
-+#define NVC0C0_QMDV02_00_QMD_RESERVED_J                            MW(783:776)
-+#define NVC0C0_QMDV02_00_RELEASE0_REDUCTION_OP                     MW(790:788)
-+#define NVC0C0_QMDV02_00_RELEASE0_REDUCTION_OP_RED_ADD             0x00000000
-+#define NVC0C0_QMDV02_00_RELEASE0_REDUCTION_OP_RED_MIN             0x00000001
-+#define NVC0C0_QMDV02_00_RELEASE0_REDUCTION_OP_RED_MAX             0x00000002
-+#define NVC0C0_QMDV02_00_RELEASE0_REDUCTION_OP_RED_INC             0x00000003
-+#define NVC0C0_QMDV02_00_RELEASE0_REDUCTION_OP_RED_DEC             0x00000004
-+#define NVC0C0_QMDV02_00_RELEASE0_REDUCTION_OP_RED_AND             0x00000005
-+#define NVC0C0_QMDV02_00_RELEASE0_REDUCTION_OP_RED_OR              0x00000006
-+#define NVC0C0_QMDV02_00_RELEASE0_REDUCTION_OP_RED_XOR             0x00000007
-+#define NVC0C0_QMDV02_00_QMD_RESERVED_K                            MW(791:791)
-+#define NVC0C0_QMDV02_00_RELEASE0_REDUCTION_FORMAT                 MW(793:792)
-+#define NVC0C0_QMDV02_00_RELEASE0_REDUCTION_FORMAT_UNSIGNED_32     0x00000000
-+#define NVC0C0_QMDV02_00_RELEASE0_REDUCTION_FORMAT_SIGNED_32       0x00000001
-+#define NVC0C0_QMDV02_00_RELEASE0_REDUCTION_ENABLE                 MW(794:794)
-+#define NVC0C0_QMDV02_00_RELEASE0_REDUCTION_ENABLE_FALSE           0x00000000
-+#define NVC0C0_QMDV02_00_RELEASE0_REDUCTION_ENABLE_TRUE            0x00000001
-+#define NVC0C0_QMDV02_00_RELEASE0_STRUCTURE_SIZE                   MW(799:799)
-+#define NVC0C0_QMDV02_00_RELEASE0_STRUCTURE_SIZE_FOUR_WORDS        0x00000000
-+#define NVC0C0_QMDV02_00_RELEASE0_STRUCTURE_SIZE_ONE_WORD          0x00000001
-+#define NVC0C0_QMDV02_00_RELEASE0_PAYLOAD                          MW(831:800)
-+#define NVC0C0_QMDV02_00_RELEASE1_ADDRESS_LOWER                    MW(863:832)
-+#define NVC0C0_QMDV02_00_RELEASE1_ADDRESS_UPPER                    MW(871:864)
-+#define NVC0C0_QMDV02_00_QMD_RESERVED_L                            MW(879:872)
-+#define NVC0C0_QMDV02_00_RELEASE1_REDUCTION_OP                     MW(886:884)
-+#define NVC0C0_QMDV02_00_RELEASE1_REDUCTION_OP_RED_ADD             0x00000000
-+#define NVC0C0_QMDV02_00_RELEASE1_REDUCTION_OP_RED_MIN             0x00000001
-+#define NVC0C0_QMDV02_00_RELEASE1_REDUCTION_OP_RED_MAX             0x00000002
-+#define NVC0C0_QMDV02_00_RELEASE1_REDUCTION_OP_RED_INC             0x00000003
-+#define NVC0C0_QMDV02_00_RELEASE1_REDUCTION_OP_RED_DEC             0x00000004
-+#define NVC0C0_QMDV02_00_RELEASE1_REDUCTION_OP_RED_AND             0x00000005
-+#define NVC0C0_QMDV02_00_RELEASE1_REDUCTION_OP_RED_OR              0x00000006
-+#define NVC0C0_QMDV02_00_RELEASE1_REDUCTION_OP_RED_XOR             0x00000007
-+#define NVC0C0_QMDV02_00_QMD_RESERVED_M                            MW(887:887)
-+#define NVC0C0_QMDV02_00_RELEASE1_REDUCTION_FORMAT                 MW(889:888)
-+#define NVC0C0_QMDV02_00_RELEASE1_REDUCTION_FORMAT_UNSIGNED_32     0x00000000
-+#define NVC0C0_QMDV02_00_RELEASE1_REDUCTION_FORMAT_SIGNED_32       0x00000001
-+#define NVC0C0_QMDV02_00_RELEASE1_REDUCTION_ENABLE                 MW(890:890)
-+#define NVC0C0_QMDV02_00_RELEASE1_REDUCTION_ENABLE_FALSE           0x00000000
-+#define NVC0C0_QMDV02_00_RELEASE1_REDUCTION_ENABLE_TRUE            0x00000001
-+#define NVC0C0_QMDV02_00_RELEASE1_STRUCTURE_SIZE                   MW(895:895)
-+#define NVC0C0_QMDV02_00_RELEASE1_STRUCTURE_SIZE_FOUR_WORDS        0x00000000
-+#define NVC0C0_QMDV02_00_RELEASE1_STRUCTURE_SIZE_ONE_WORD          0x00000001
-+#define NVC0C0_QMDV02_00_RELEASE1_PAYLOAD                          MW(927:896)
-+#define NVC0C0_QMDV02_00_SHADER_LOCAL_MEMORY_LOW_SIZE              MW(951:928)
-+#define NVC0C0_QMDV02_00_QMD_RESERVED_N                            MW(954:952)
-+#define NVC0C0_QMDV02_00_BARRIER_COUNT                             MW(959:955)
-+#define NVC0C0_QMDV02_00_SHADER_LOCAL_MEMORY_HIGH_SIZE             MW(983:960)
-+#define NVC0C0_QMDV02_00_REGISTER_COUNT                            MW(991:984)
-+#define NVC0C0_QMDV02_00_SHADER_LOCAL_MEMORY_CRS_SIZE              MW(1015:992)
-+#define NVC0C0_QMDV02_00_SASS_VERSION                              MW(1023:1016)
-+#define NVC0C0_QMDV02_00_CONSTANT_BUFFER_ADDR_LOWER(i)             MW((1055+(i)*64):(1024+(i)*64))
-+#define NVC0C0_QMDV02_00_CONSTANT_BUFFER_ADDR_UPPER(i)             MW((1072+(i)*64):(1056+(i)*64))
-+#define NVC0C0_QMDV02_00_CONSTANT_BUFFER_RESERVED_ADDR(i)          MW((1073+(i)*64):(1073+(i)*64))
-+#define NVC0C0_QMDV02_00_CONSTANT_BUFFER_INVALIDATE(i)             MW((1074+(i)*64):(1074+(i)*64))
-+#define NVC0C0_QMDV02_00_CONSTANT_BUFFER_INVALIDATE_FALSE          0x00000000
-+#define NVC0C0_QMDV02_00_CONSTANT_BUFFER_INVALIDATE_TRUE           0x00000001
-+#define NVC0C0_QMDV02_00_CONSTANT_BUFFER_SIZE_SHIFTED4(i)          MW((1087+(i)*64):(1075+(i)*64))
-+#define NVC0C0_QMDV02_00_HW_ONLY_INNER_GET                         MW(1566:1536)
-+#define NVC0C0_QMDV02_00_HW_ONLY_REQUIRE_SCHEDULING_PCAS           MW(1567:1567)
-+#define NVC0C0_QMDV02_00_HW_ONLY_INNER_PUT                         MW(1598:1568)
-+#define NVC0C0_QMDV02_00_HW_ONLY_SCG_TYPE                          MW(1599:1599)
-+#define NVC0C0_QMDV02_00_HW_ONLY_SPAN_LIST_HEAD_INDEX              MW(1629:1600)
-+#define NVC0C0_QMDV02_00_QMD_RESERVED_Q                            MW(1630:1630)
-+#define NVC0C0_QMDV02_00_HW_ONLY_SPAN_LIST_HEAD_INDEX_VALID        MW(1631:1631)
-+#define NVC0C0_QMDV02_00_HW_ONLY_SPAN_LIST_HEAD_INDEX_VALID_FALSE  0x00000000
-+#define NVC0C0_QMDV02_00_HW_ONLY_SPAN_LIST_HEAD_INDEX_VALID_TRUE   0x00000001
-+#define NVC0C0_QMDV02_00_HW_ONLY_SKED_NEXT_QMD_POINTER             MW(1663:1632)
-+#define NVC0C0_QMDV02_00_CTA_RASTER_WIDTH_RESUME                   MW(1695:1664)
-+#define NVC0C0_QMDV02_00_CTA_RASTER_HEIGHT_RESUME                  MW(1711:1696)
-+#define NVC0C0_QMDV02_00_CTA_RASTER_DEPTH_RESUME                   MW(1727:1712)
-+#define NVC0C0_QMDV02_00_QMD_SPARE_G                               MW(1759:1728)
-+#define NVC0C0_QMDV02_00_QMD_SPARE_H                               MW(1791:1760)
-+#define NVC0C0_QMDV02_00_QMD_SPARE_I                               MW(1823:1792)
-+#define NVC0C0_QMDV02_00_QMD_SPARE_J                               MW(1855:1824)
-+#define NVC0C0_QMDV02_00_QMD_SPARE_K                               MW(1887:1856)
-+#define NVC0C0_QMDV02_00_QMD_SPARE_L                               MW(1919:1888)
-+#define NVC0C0_QMDV02_00_QMD_SPARE_M                               MW(1951:1920)
-+#define NVC0C0_QMDV02_00_QMD_SPARE_N                               MW(1983:1952)
-+#define NVC0C0_QMDV02_00_DEBUG_ID_UPPER                            MW(2015:1984)
-+#define NVC0C0_QMDV02_00_DEBUG_ID_LOWER                            MW(2047:2016)
-+
-+
-+/*
-+** Queue Meta Data, Version 02_01
-+ */
-+
-+#define NVC0C0_QMDV02_01_OUTER_PUT                                 MW(30:0)
-+#define NVC0C0_QMDV02_01_OUTER_OVERFLOW                            MW(31:31)
-+#define NVC0C0_QMDV02_01_OUTER_GET                                 MW(62:32)
-+#define NVC0C0_QMDV02_01_OUTER_STICKY_OVERFLOW                     MW(63:63)
-+#define NVC0C0_QMDV02_01_INNER_GET                                 MW(94:64)
-+#define NVC0C0_QMDV02_01_INNER_OVERFLOW                            MW(95:95)
-+#define NVC0C0_QMDV02_01_INNER_PUT                                 MW(126:96)
-+#define NVC0C0_QMDV02_01_INNER_STICKY_OVERFLOW                     MW(127:127)
-+#define NVC0C0_QMDV02_01_QMD_GROUP_ID                              MW(133:128)
-+#define NVC0C0_QMDV02_01_SM_GLOBAL_CACHING_ENABLE                  MW(134:134)
-+#define NVC0C0_QMDV02_01_RUN_CTA_IN_ONE_SM_PARTITION               MW(135:135)
-+#define NVC0C0_QMDV02_01_RUN_CTA_IN_ONE_SM_PARTITION_FALSE         0x00000000
-+#define NVC0C0_QMDV02_01_RUN_CTA_IN_ONE_SM_PARTITION_TRUE          0x00000001
-+#define NVC0C0_QMDV02_01_IS_QUEUE                                  MW(136:136)
-+#define NVC0C0_QMDV02_01_IS_QUEUE_FALSE                            0x00000000
-+#define NVC0C0_QMDV02_01_IS_QUEUE_TRUE                             0x00000001
-+#define NVC0C0_QMDV02_01_ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST      MW(137:137)
-+#define NVC0C0_QMDV02_01_ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST_FALSE 0x00000000
-+#define NVC0C0_QMDV02_01_ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST_TRUE 0x00000001
-+#define NVC0C0_QMDV02_01_SEMAPHORE_RELEASE_ENABLE0                 MW(138:138)
-+#define NVC0C0_QMDV02_01_SEMAPHORE_RELEASE_ENABLE0_FALSE           0x00000000
-+#define NVC0C0_QMDV02_01_SEMAPHORE_RELEASE_ENABLE0_TRUE            0x00000001
-+#define NVC0C0_QMDV02_01_SEMAPHORE_RELEASE_ENABLE1                 MW(139:139)
-+#define NVC0C0_QMDV02_01_SEMAPHORE_RELEASE_ENABLE1_FALSE           0x00000000
-+#define NVC0C0_QMDV02_01_SEMAPHORE_RELEASE_ENABLE1_TRUE            0x00000001
-+#define NVC0C0_QMDV02_01_REQUIRE_SCHEDULING_PCAS                   MW(140:140)
-+#define NVC0C0_QMDV02_01_REQUIRE_SCHEDULING_PCAS_FALSE             0x00000000
-+#define NVC0C0_QMDV02_01_REQUIRE_SCHEDULING_PCAS_TRUE              0x00000001
-+#define NVC0C0_QMDV02_01_DEPENDENT_QMD_SCHEDULE_ENABLE             MW(141:141)
-+#define NVC0C0_QMDV02_01_DEPENDENT_QMD_SCHEDULE_ENABLE_FALSE       0x00000000
-+#define NVC0C0_QMDV02_01_DEPENDENT_QMD_SCHEDULE_ENABLE_TRUE        0x00000001
-+#define NVC0C0_QMDV02_01_DEPENDENT_QMD_TYPE                        MW(142:142)
-+#define NVC0C0_QMDV02_01_DEPENDENT_QMD_TYPE_QUEUE                  0x00000000
-+#define NVC0C0_QMDV02_01_DEPENDENT_QMD_TYPE_GRID                   0x00000001
-+#define NVC0C0_QMDV02_01_DEPENDENT_QMD_FIELD_COPY                  MW(143:143)
-+#define NVC0C0_QMDV02_01_DEPENDENT_QMD_FIELD_COPY_FALSE            0x00000000
-+#define NVC0C0_QMDV02_01_DEPENDENT_QMD_FIELD_COPY_TRUE             0x00000001
-+#define NVC0C0_QMDV02_01_QMD_RESERVED_B                            MW(159:144)
-+#define NVC0C0_QMDV02_01_CIRCULAR_QUEUE_SIZE                       MW(184:160)
-+#define NVC0C0_QMDV02_01_QMD_RESERVED_C                            MW(185:185)
-+#define NVC0C0_QMDV02_01_INVALIDATE_TEXTURE_HEADER_CACHE           MW(186:186)
-+#define NVC0C0_QMDV02_01_INVALIDATE_TEXTURE_HEADER_CACHE_FALSE     0x00000000
-+#define NVC0C0_QMDV02_01_INVALIDATE_TEXTURE_HEADER_CACHE_TRUE      0x00000001
-+#define NVC0C0_QMDV02_01_INVALIDATE_TEXTURE_SAMPLER_CACHE          MW(187:187)
-+#define NVC0C0_QMDV02_01_INVALIDATE_TEXTURE_SAMPLER_CACHE_FALSE    0x00000000
-+#define NVC0C0_QMDV02_01_INVALIDATE_TEXTURE_SAMPLER_CACHE_TRUE     0x00000001
-+#define NVC0C0_QMDV02_01_INVALIDATE_TEXTURE_DATA_CACHE             MW(188:188)
-+#define NVC0C0_QMDV02_01_INVALIDATE_TEXTURE_DATA_CACHE_FALSE       0x00000000
-+#define NVC0C0_QMDV02_01_INVALIDATE_TEXTURE_DATA_CACHE_TRUE        0x00000001
-+#define NVC0C0_QMDV02_01_INVALIDATE_SHADER_DATA_CACHE              MW(189:189)
-+#define NVC0C0_QMDV02_01_INVALIDATE_SHADER_DATA_CACHE_FALSE        0x00000000
-+#define NVC0C0_QMDV02_01_INVALIDATE_SHADER_DATA_CACHE_TRUE         0x00000001
-+#define NVC0C0_QMDV02_01_INVALIDATE_INSTRUCTION_CACHE              MW(190:190)
-+#define NVC0C0_QMDV02_01_INVALIDATE_INSTRUCTION_CACHE_FALSE        0x00000000
-+#define NVC0C0_QMDV02_01_INVALIDATE_INSTRUCTION_CACHE_TRUE         0x00000001
-+#define NVC0C0_QMDV02_01_INVALIDATE_SHADER_CONSTANT_CACHE          MW(191:191)
-+#define NVC0C0_QMDV02_01_INVALIDATE_SHADER_CONSTANT_CACHE_FALSE    0x00000000
-+#define NVC0C0_QMDV02_01_INVALIDATE_SHADER_CONSTANT_CACHE_TRUE     0x00000001
-+#define NVC0C0_QMDV02_01_CTA_RASTER_WIDTH_RESUME                   MW(223:192)
-+#define NVC0C0_QMDV02_01_CTA_RASTER_HEIGHT_RESUME                  MW(239:224)
-+#define NVC0C0_QMDV02_01_CTA_RASTER_DEPTH_RESUME                   MW(255:240)
-+#define NVC0C0_QMDV02_01_PROGRAM_OFFSET                            MW(287:256)
-+#define NVC0C0_QMDV02_01_CIRCULAR_QUEUE_ADDR_LOWER                 MW(319:288)
-+#define NVC0C0_QMDV02_01_CIRCULAR_QUEUE_ADDR_UPPER                 MW(327:320)
-+#define NVC0C0_QMDV02_01_QMD_RESERVED_D                            MW(335:328)
-+#define NVC0C0_QMDV02_01_CIRCULAR_QUEUE_ENTRY_SIZE                 MW(351:336)
-+#define NVC0C0_QMDV02_01_CWD_REFERENCE_COUNT_ID                    MW(357:352)
-+#define NVC0C0_QMDV02_01_CWD_REFERENCE_COUNT_DELTA_MINUS_ONE       MW(365:358)
-+#define NVC0C0_QMDV02_01_RELEASE_MEMBAR_TYPE                       MW(366:366)
-+#define NVC0C0_QMDV02_01_RELEASE_MEMBAR_TYPE_FE_NONE               0x00000000
-+#define NVC0C0_QMDV02_01_RELEASE_MEMBAR_TYPE_FE_SYSMEMBAR          0x00000001
-+#define NVC0C0_QMDV02_01_CWD_REFERENCE_COUNT_INCR_ENABLE           MW(367:367)
-+#define NVC0C0_QMDV02_01_CWD_REFERENCE_COUNT_INCR_ENABLE_FALSE     0x00000000
-+#define NVC0C0_QMDV02_01_CWD_REFERENCE_COUNT_INCR_ENABLE_TRUE      0x00000001
-+#define NVC0C0_QMDV02_01_CWD_MEMBAR_TYPE                           MW(369:368)
-+#define NVC0C0_QMDV02_01_CWD_MEMBAR_TYPE_L1_NONE                   0x00000000
-+#define NVC0C0_QMDV02_01_CWD_MEMBAR_TYPE_L1_SYSMEMBAR              0x00000001
-+#define NVC0C0_QMDV02_01_CWD_MEMBAR_TYPE_L1_MEMBAR                 0x00000003
-+#define NVC0C0_QMDV02_01_SEQUENTIALLY_RUN_CTAS                     MW(370:370)
-+#define NVC0C0_QMDV02_01_SEQUENTIALLY_RUN_CTAS_FALSE               0x00000000
-+#define NVC0C0_QMDV02_01_SEQUENTIALLY_RUN_CTAS_TRUE                0x00000001
-+#define NVC0C0_QMDV02_01_CWD_REFERENCE_COUNT_DECR_ENABLE           MW(371:371)
-+#define NVC0C0_QMDV02_01_CWD_REFERENCE_COUNT_DECR_ENABLE_FALSE     0x00000000
-+#define NVC0C0_QMDV02_01_CWD_REFERENCE_COUNT_DECR_ENABLE_TRUE      0x00000001
-+#define NVC0C0_QMDV02_01_THROTTLED                                 MW(372:372)
-+#define NVC0C0_QMDV02_01_THROTTLED_FALSE                           0x00000000
-+#define NVC0C0_QMDV02_01_THROTTLED_TRUE                            0x00000001
-+#define NVC0C0_QMDV02_01_API_VISIBLE_CALL_LIMIT                    MW(378:378)
-+#define NVC0C0_QMDV02_01_API_VISIBLE_CALL_LIMIT__32                0x00000000
-+#define NVC0C0_QMDV02_01_API_VISIBLE_CALL_LIMIT_NO_CHECK           0x00000001
-+#define NVC0C0_QMDV02_01_SAMPLER_INDEX                             MW(382:382)
-+#define NVC0C0_QMDV02_01_SAMPLER_INDEX_INDEPENDENTLY               0x00000000
-+#define NVC0C0_QMDV02_01_SAMPLER_INDEX_VIA_HEADER_INDEX            0x00000001
-+#define NVC0C0_QMDV02_01_CTA_RASTER_WIDTH                          MW(415:384)
-+#define NVC0C0_QMDV02_01_CTA_RASTER_HEIGHT                         MW(431:416)
-+#define NVC0C0_QMDV02_01_QMD_RESERVED13A                           MW(447:432)
-+#define NVC0C0_QMDV02_01_CTA_RASTER_DEPTH                          MW(463:448)
-+#define NVC0C0_QMDV02_01_QMD_RESERVED14A                           MW(479:464)
-+#define NVC0C0_QMDV02_01_DEPENDENT_QMD_POINTER                     MW(511:480)
-+#define NVC0C0_QMDV02_01_QUEUE_ENTRIES_PER_CTA_MINUS_ONE           MW(518:512)
-+#define NVC0C0_QMDV02_01_COALESCE_WAITING_PERIOD                   MW(529:522)
-+#define NVC0C0_QMDV02_01_SHARED_MEMORY_SIZE                        MW(561:544)
-+#define NVC0C0_QMDV02_01_QMD_RESERVED_G                            MW(575:562)
-+#define NVC0C0_QMDV02_01_QMD_VERSION                               MW(579:576)
-+#define NVC0C0_QMDV02_01_QMD_MAJOR_VERSION                         MW(583:580)
-+#define NVC0C0_QMDV02_01_QMD_RESERVED_H                            MW(591:584)
-+#define NVC0C0_QMDV02_01_CTA_THREAD_DIMENSION0                     MW(607:592)
-+#define NVC0C0_QMDV02_01_CTA_THREAD_DIMENSION1                     MW(623:608)
-+#define NVC0C0_QMDV02_01_CTA_THREAD_DIMENSION2                     MW(639:624)
-+#define NVC0C0_QMDV02_01_CONSTANT_BUFFER_VALID(i)                  MW((640+(i)*1):(640+(i)*1))
-+#define NVC0C0_QMDV02_01_CONSTANT_BUFFER_VALID_FALSE               0x00000000
-+#define NVC0C0_QMDV02_01_CONSTANT_BUFFER_VALID_TRUE                0x00000001
-+#define NVC0C0_QMDV02_01_QMD_RESERVED_I                            MW(671:648)
-+#define NVC0C0_QMDV02_01_SM_DISABLE_MASK_LOWER                     MW(703:672)
-+#define NVC0C0_QMDV02_01_SM_DISABLE_MASK_UPPER                     MW(735:704)
-+#define NVC0C0_QMDV02_01_RELEASE0_ADDRESS_LOWER                    MW(767:736)
-+#define NVC0C0_QMDV02_01_RELEASE0_ADDRESS_UPPER                    MW(775:768)
-+#define NVC0C0_QMDV02_01_QMD_RESERVED_J                            MW(783:776)
-+#define NVC0C0_QMDV02_01_RELEASE0_REDUCTION_OP                     MW(790:788)
-+#define NVC0C0_QMDV02_01_RELEASE0_REDUCTION_OP_RED_ADD             0x00000000
-+#define NVC0C0_QMDV02_01_RELEASE0_REDUCTION_OP_RED_MIN             0x00000001
-+#define NVC0C0_QMDV02_01_RELEASE0_REDUCTION_OP_RED_MAX             0x00000002
-+#define NVC0C0_QMDV02_01_RELEASE0_REDUCTION_OP_RED_INC             0x00000003
-+#define NVC0C0_QMDV02_01_RELEASE0_REDUCTION_OP_RED_DEC             0x00000004
-+#define NVC0C0_QMDV02_01_RELEASE0_REDUCTION_OP_RED_AND             0x00000005
-+#define NVC0C0_QMDV02_01_RELEASE0_REDUCTION_OP_RED_OR              0x00000006
-+#define NVC0C0_QMDV02_01_RELEASE0_REDUCTION_OP_RED_XOR             0x00000007
-+#define NVC0C0_QMDV02_01_QMD_RESERVED_K                            MW(791:791)
-+#define NVC0C0_QMDV02_01_RELEASE0_REDUCTION_FORMAT                 MW(793:792)
-+#define NVC0C0_QMDV02_01_RELEASE0_REDUCTION_FORMAT_UNSIGNED_32     0x00000000
-+#define NVC0C0_QMDV02_01_RELEASE0_REDUCTION_FORMAT_SIGNED_32       0x00000001
-+#define NVC0C0_QMDV02_01_RELEASE0_REDUCTION_ENABLE                 MW(794:794)
-+#define NVC0C0_QMDV02_01_RELEASE0_REDUCTION_ENABLE_FALSE           0x00000000
-+#define NVC0C0_QMDV02_01_RELEASE0_REDUCTION_ENABLE_TRUE            0x00000001
-+#define NVC0C0_QMDV02_01_RELEASE0_STRUCTURE_SIZE                   MW(799:799)
-+#define NVC0C0_QMDV02_01_RELEASE0_STRUCTURE_SIZE_FOUR_WORDS        0x00000000
-+#define NVC0C0_QMDV02_01_RELEASE0_STRUCTURE_SIZE_ONE_WORD          0x00000001
-+#define NVC0C0_QMDV02_01_RELEASE0_PAYLOAD                          MW(831:800)
-+#define NVC0C0_QMDV02_01_RELEASE1_ADDRESS_LOWER                    MW(863:832)
-+#define NVC0C0_QMDV02_01_RELEASE1_ADDRESS_UPPER                    MW(871:864)
-+#define NVC0C0_QMDV02_01_QMD_RESERVED_L                            MW(879:872)
-+#define NVC0C0_QMDV02_01_RELEASE1_REDUCTION_OP                     MW(886:884)
-+#define NVC0C0_QMDV02_01_RELEASE1_REDUCTION_OP_RED_ADD             0x00000000
-+#define NVC0C0_QMDV02_01_RELEASE1_REDUCTION_OP_RED_MIN             0x00000001
-+#define NVC0C0_QMDV02_01_RELEASE1_REDUCTION_OP_RED_MAX             0x00000002
-+#define NVC0C0_QMDV02_01_RELEASE1_REDUCTION_OP_RED_INC             0x00000003
-+#define NVC0C0_QMDV02_01_RELEASE1_REDUCTION_OP_RED_DEC             0x00000004
-+#define NVC0C0_QMDV02_01_RELEASE1_REDUCTION_OP_RED_AND             0x00000005
-+#define NVC0C0_QMDV02_01_RELEASE1_REDUCTION_OP_RED_OR              0x00000006
-+#define NVC0C0_QMDV02_01_RELEASE1_REDUCTION_OP_RED_XOR             0x00000007
-+#define NVC0C0_QMDV02_01_QMD_RESERVED_M                            MW(887:887)
-+#define NVC0C0_QMDV02_01_RELEASE1_REDUCTION_FORMAT                 MW(889:888)
-+#define NVC0C0_QMDV02_01_RELEASE1_REDUCTION_FORMAT_UNSIGNED_32     0x00000000
-+#define NVC0C0_QMDV02_01_RELEASE1_REDUCTION_FORMAT_SIGNED_32       0x00000001
-+#define NVC0C0_QMDV02_01_RELEASE1_REDUCTION_ENABLE                 MW(890:890)
-+#define NVC0C0_QMDV02_01_RELEASE1_REDUCTION_ENABLE_FALSE           0x00000000
-+#define NVC0C0_QMDV02_01_RELEASE1_REDUCTION_ENABLE_TRUE            0x00000001
-+#define NVC0C0_QMDV02_01_RELEASE1_STRUCTURE_SIZE                   MW(895:895)
-+#define NVC0C0_QMDV02_01_RELEASE1_STRUCTURE_SIZE_FOUR_WORDS        0x00000000
-+#define NVC0C0_QMDV02_01_RELEASE1_STRUCTURE_SIZE_ONE_WORD          0x00000001
-+#define NVC0C0_QMDV02_01_RELEASE1_PAYLOAD                          MW(927:896)
-+#define NVC0C0_QMDV02_01_SHADER_LOCAL_MEMORY_LOW_SIZE              MW(951:928)
-+#define NVC0C0_QMDV02_01_QMD_RESERVED_N                            MW(954:952)
-+#define NVC0C0_QMDV02_01_BARRIER_COUNT                             MW(959:955)
-+#define NVC0C0_QMDV02_01_SHADER_LOCAL_MEMORY_HIGH_SIZE             MW(983:960)
-+#define NVC0C0_QMDV02_01_REGISTER_COUNT                            MW(991:984)
-+#define NVC0C0_QMDV02_01_SHADER_LOCAL_MEMORY_CRS_SIZE              MW(1015:992)
-+#define NVC0C0_QMDV02_01_SASS_VERSION                              MW(1023:1016)
-+#define NVC0C0_QMDV02_01_CONSTANT_BUFFER_ADDR_LOWER(i)             MW((1055+(i)*64):(1024+(i)*64))
-+#define NVC0C0_QMDV02_01_CONSTANT_BUFFER_ADDR_UPPER(i)             MW((1072+(i)*64):(1056+(i)*64))
-+#define NVC0C0_QMDV02_01_CONSTANT_BUFFER_RESERVED_ADDR(i)          MW((1073+(i)*64):(1073+(i)*64))
-+#define NVC0C0_QMDV02_01_CONSTANT_BUFFER_INVALIDATE(i)             MW((1074+(i)*64):(1074+(i)*64))
-+#define NVC0C0_QMDV02_01_CONSTANT_BUFFER_INVALIDATE_FALSE          0x00000000
-+#define NVC0C0_QMDV02_01_CONSTANT_BUFFER_INVALIDATE_TRUE           0x00000001
-+#define NVC0C0_QMDV02_01_CONSTANT_BUFFER_SIZE_SHIFTED4(i)          MW((1087+(i)*64):(1075+(i)*64))
-+#define NVC0C0_QMDV02_01_QMD_RESERVED_R                            MW(1567:1536)
-+#define NVC0C0_QMDV02_01_QMD_RESERVED_S                            MW(1599:1568)
-+#define NVC0C0_QMDV02_01_HW_ONLY_INNER_GET                         MW(1630:1600)
-+#define NVC0C0_QMDV02_01_HW_ONLY_REQUIRE_SCHEDULING_PCAS           MW(1631:1631)
-+#define NVC0C0_QMDV02_01_HW_ONLY_INNER_PUT                         MW(1662:1632)
-+#define NVC0C0_QMDV02_01_HW_ONLY_SCG_TYPE                          MW(1663:1663)
-+#define NVC0C0_QMDV02_01_HW_ONLY_SPAN_LIST_HEAD_INDEX              MW(1693:1664)
-+#define NVC0C0_QMDV02_01_QMD_RESERVED_Q                            MW(1694:1694)
-+#define NVC0C0_QMDV02_01_HW_ONLY_SPAN_LIST_HEAD_INDEX_VALID        MW(1695:1695)
-+#define NVC0C0_QMDV02_01_HW_ONLY_SPAN_LIST_HEAD_INDEX_VALID_FALSE  0x00000000
-+#define NVC0C0_QMDV02_01_HW_ONLY_SPAN_LIST_HEAD_INDEX_VALID_TRUE   0x00000001
-+#define NVC0C0_QMDV02_01_HW_ONLY_SKED_NEXT_QMD_POINTER             MW(1727:1696)
-+#define NVC0C0_QMDV02_01_QMD_SPARE_G                               MW(1759:1728)
-+#define NVC0C0_QMDV02_01_QMD_SPARE_H                               MW(1791:1760)
-+#define NVC0C0_QMDV02_01_QMD_SPARE_I                               MW(1823:1792)
-+#define NVC0C0_QMDV02_01_QMD_SPARE_J                               MW(1855:1824)
-+#define NVC0C0_QMDV02_01_QMD_SPARE_K                               MW(1887:1856)
-+#define NVC0C0_QMDV02_01_QMD_SPARE_L                               MW(1919:1888)
-+#define NVC0C0_QMDV02_01_QMD_SPARE_M                               MW(1951:1920)
-+#define NVC0C0_QMDV02_01_QMD_SPARE_N                               MW(1983:1952)
-+#define NVC0C0_QMDV02_01_DEBUG_ID_UPPER                            MW(2015:1984)
-+#define NVC0C0_QMDV02_01_DEBUG_ID_LOWER                            MW(2047:2016)
-+
-+
-+
-+#endif // #ifndef __CLC0C0QMD_H__
-diff --git a/src/gallium/drivers/nouveau/nvc0/clc3c0qmd.h b/src/gallium/drivers/nouveau/nvc0/clc3c0qmd.h
-new file mode 100644
-index 00000000000..588cc639d32
---- /dev/null
-+++ b/src/gallium/drivers/nouveau/nvc0/clc3c0qmd.h
-@@ -0,0 +1,245 @@
-+/*******************************************************************************
-+    Copyright (c) 2001-2010 NVIDIA Corporation
-+
-+    Permission is hereby granted, free of charge, to any person obtaining a copy
-+    of this software and associated documentation files (the "Software"), to
-+    deal in the Software without restriction, including without limitation the
-+    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
-+    sell copies of the Software, and to permit persons to whom the Software is
-+    furnished to do so, subject to the following conditions:
-+
-+    The above copyright notice and this permission notice shall be
-+    included in all copies or substantial portions of the Software.
-+
-+    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-+    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-+    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
-+    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-+    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-+    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
-+    DEALINGS IN THE SOFTWARE.
-+
-+*******************************************************************************/
-+
-+/* AUTO GENERATED FILE -- DO NOT EDIT */
-+
-+#ifndef __CLC3C0QMD_H__
-+#define __CLC3C0QMD_H__
-+
-+/*
-+** Queue Meta Data, Version 02_02
-+ */
-+
-+// The below C preprocessor definitions describe "multi-word" structures, where
-+// fields may have bit numbers beyond 32.  For example, MW(127:96) means
-+// the field is in bits 0-31 of word number 3 of the structure.  The "MW(X:Y)"
-+// syntax is to distinguish from similar "X:Y" single-word definitions: the
-+// macros historically used for single-word definitions would fail with
-+// multi-word definitions.
-+//
-+// See nvmisc.h:DRF_VAL_MW() in the source code of the kernel
-+// interface layer of nvidia.ko for an example of how to manipulate
-+// these MW(X:Y) definitions.
-+
-+#define NVC3C0_QMDV02_02_OUTER_PUT                                 MW(30:0)
-+#define NVC3C0_QMDV02_02_OUTER_OVERFLOW                            MW(31:31)
-+#define NVC3C0_QMDV02_02_OUTER_GET                                 MW(62:32)
-+#define NVC3C0_QMDV02_02_OUTER_STICKY_OVERFLOW                     MW(63:63)
-+#define NVC3C0_QMDV02_02_INNER_GET                                 MW(94:64)
-+#define NVC3C0_QMDV02_02_INNER_OVERFLOW                            MW(95:95)
-+#define NVC3C0_QMDV02_02_INNER_PUT                                 MW(126:96)
-+#define NVC3C0_QMDV02_02_INNER_STICKY_OVERFLOW                     MW(127:127)
-+#define NVC3C0_QMDV02_02_QMD_GROUP_ID                              MW(133:128)
-+#define NVC3C0_QMDV02_02_SM_GLOBAL_CACHING_ENABLE                  MW(134:134)
-+#define NVC3C0_QMDV02_02_RUN_CTA_IN_ONE_SM_PARTITION               MW(135:135)
-+#define NVC3C0_QMDV02_02_RUN_CTA_IN_ONE_SM_PARTITION_FALSE         0x00000000
-+#define NVC3C0_QMDV02_02_RUN_CTA_IN_ONE_SM_PARTITION_TRUE          0x00000001
-+#define NVC3C0_QMDV02_02_IS_QUEUE                                  MW(136:136)
-+#define NVC3C0_QMDV02_02_IS_QUEUE_FALSE                            0x00000000
-+#define NVC3C0_QMDV02_02_IS_QUEUE_TRUE                             0x00000001
-+#define NVC3C0_QMDV02_02_ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST      MW(137:137)
-+#define NVC3C0_QMDV02_02_ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST_FALSE 0x00000000
-+#define NVC3C0_QMDV02_02_ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST_TRUE 0x00000001
-+#define NVC3C0_QMDV02_02_SEMAPHORE_RELEASE_ENABLE0                 MW(138:138)
-+#define NVC3C0_QMDV02_02_SEMAPHORE_RELEASE_ENABLE0_FALSE           0x00000000
-+#define NVC3C0_QMDV02_02_SEMAPHORE_RELEASE_ENABLE0_TRUE            0x00000001
-+#define NVC3C0_QMDV02_02_SEMAPHORE_RELEASE_ENABLE1                 MW(139:139)
-+#define NVC3C0_QMDV02_02_SEMAPHORE_RELEASE_ENABLE1_FALSE           0x00000000
-+#define NVC3C0_QMDV02_02_SEMAPHORE_RELEASE_ENABLE1_TRUE            0x00000001
-+#define NVC3C0_QMDV02_02_REQUIRE_SCHEDULING_PCAS                   MW(140:140)
-+#define NVC3C0_QMDV02_02_REQUIRE_SCHEDULING_PCAS_FALSE             0x00000000
-+#define NVC3C0_QMDV02_02_REQUIRE_SCHEDULING_PCAS_TRUE              0x00000001
-+#define NVC3C0_QMDV02_02_DEPENDENT_QMD_SCHEDULE_ENABLE             MW(141:141)
-+#define NVC3C0_QMDV02_02_DEPENDENT_QMD_SCHEDULE_ENABLE_FALSE       0x00000000
-+#define NVC3C0_QMDV02_02_DEPENDENT_QMD_SCHEDULE_ENABLE_TRUE        0x00000001
-+#define NVC3C0_QMDV02_02_DEPENDENT_QMD_TYPE                        MW(142:142)
-+#define NVC3C0_QMDV02_02_DEPENDENT_QMD_TYPE_QUEUE                  0x00000000
-+#define NVC3C0_QMDV02_02_DEPENDENT_QMD_TYPE_GRID                   0x00000001
-+#define NVC3C0_QMDV02_02_DEPENDENT_QMD_FIELD_COPY                  MW(143:143)
-+#define NVC3C0_QMDV02_02_DEPENDENT_QMD_FIELD_COPY_FALSE            0x00000000
-+#define NVC3C0_QMDV02_02_DEPENDENT_QMD_FIELD_COPY_TRUE             0x00000001
-+#define NVC3C0_QMDV02_02_QMD_RESERVED_B                            MW(159:144)
-+#define NVC3C0_QMDV02_02_CIRCULAR_QUEUE_SIZE                       MW(184:160)
-+#define NVC3C0_QMDV02_02_QMD_RESERVED_C                            MW(185:185)
-+#define NVC3C0_QMDV02_02_INVALIDATE_TEXTURE_HEADER_CACHE           MW(186:186)
-+#define NVC3C0_QMDV02_02_INVALIDATE_TEXTURE_HEADER_CACHE_FALSE     0x00000000
-+#define NVC3C0_QMDV02_02_INVALIDATE_TEXTURE_HEADER_CACHE_TRUE      0x00000001
-+#define NVC3C0_QMDV02_02_INVALIDATE_TEXTURE_SAMPLER_CACHE          MW(187:187)
-+#define NVC3C0_QMDV02_02_INVALIDATE_TEXTURE_SAMPLER_CACHE_FALSE    0x00000000
-+#define NVC3C0_QMDV02_02_INVALIDATE_TEXTURE_SAMPLER_CACHE_TRUE     0x00000001
-+#define NVC3C0_QMDV02_02_INVALIDATE_TEXTURE_DATA_CACHE             MW(188:188)
-+#define NVC3C0_QMDV02_02_INVALIDATE_TEXTURE_DATA_CACHE_FALSE       0x00000000
-+#define NVC3C0_QMDV02_02_INVALIDATE_TEXTURE_DATA_CACHE_TRUE        0x00000001
-+#define NVC3C0_QMDV02_02_INVALIDATE_SHADER_DATA_CACHE              MW(189:189)
-+#define NVC3C0_QMDV02_02_INVALIDATE_SHADER_DATA_CACHE_FALSE        0x00000000
-+#define NVC3C0_QMDV02_02_INVALIDATE_SHADER_DATA_CACHE_TRUE         0x00000001
-+#define NVC3C0_QMDV02_02_INVALIDATE_INSTRUCTION_CACHE              MW(190:190)
-+#define NVC3C0_QMDV02_02_INVALIDATE_INSTRUCTION_CACHE_FALSE        0x00000000
-+#define NVC3C0_QMDV02_02_INVALIDATE_INSTRUCTION_CACHE_TRUE         0x00000001
-+#define NVC3C0_QMDV02_02_INVALIDATE_SHADER_CONSTANT_CACHE          MW(191:191)
-+#define NVC3C0_QMDV02_02_INVALIDATE_SHADER_CONSTANT_CACHE_FALSE    0x00000000
-+#define NVC3C0_QMDV02_02_INVALIDATE_SHADER_CONSTANT_CACHE_TRUE     0x00000001
-+#define NVC3C0_QMDV02_02_CTA_RASTER_WIDTH_RESUME                   MW(223:192)
-+#define NVC3C0_QMDV02_02_CTA_RASTER_HEIGHT_RESUME                  MW(239:224)
-+#define NVC3C0_QMDV02_02_CTA_RASTER_DEPTH_RESUME                   MW(255:240)
-+#define NVC3C0_QMDV02_02_PROGRAM_OFFSET                            MW(287:256)
-+#define NVC3C0_QMDV02_02_CIRCULAR_QUEUE_ADDR_LOWER                 MW(319:288)
-+#define NVC3C0_QMDV02_02_CIRCULAR_QUEUE_ADDR_UPPER                 MW(327:320)
-+#define NVC3C0_QMDV02_02_QMD_RESERVED_D                            MW(335:328)
-+#define NVC3C0_QMDV02_02_CIRCULAR_QUEUE_ENTRY_SIZE                 MW(351:336)
-+#define NVC3C0_QMDV02_02_CWD_REFERENCE_COUNT_ID                    MW(357:352)
-+#define NVC3C0_QMDV02_02_CWD_REFERENCE_COUNT_DELTA_MINUS_ONE       MW(365:358)
-+#define NVC3C0_QMDV02_02_RELEASE_MEMBAR_TYPE                       MW(366:366)
-+#define NVC3C0_QMDV02_02_RELEASE_MEMBAR_TYPE_FE_NONE               0x00000000
-+#define NVC3C0_QMDV02_02_RELEASE_MEMBAR_TYPE_FE_SYSMEMBAR          0x00000001
-+#define NVC3C0_QMDV02_02_CWD_REFERENCE_COUNT_INCR_ENABLE           MW(367:367)
-+#define NVC3C0_QMDV02_02_CWD_REFERENCE_COUNT_INCR_ENABLE_FALSE     0x00000000
-+#define NVC3C0_QMDV02_02_CWD_REFERENCE_COUNT_INCR_ENABLE_TRUE      0x00000001
-+#define NVC3C0_QMDV02_02_CWD_MEMBAR_TYPE                           MW(369:368)
-+#define NVC3C0_QMDV02_02_CWD_MEMBAR_TYPE_L1_NONE                   0x00000000
-+#define NVC3C0_QMDV02_02_CWD_MEMBAR_TYPE_L1_SYSMEMBAR              0x00000001
-+#define NVC3C0_QMDV02_02_CWD_MEMBAR_TYPE_L1_MEMBAR                 0x00000003
-+#define NVC3C0_QMDV02_02_SEQUENTIALLY_RUN_CTAS                     MW(370:370)
-+#define NVC3C0_QMDV02_02_SEQUENTIALLY_RUN_CTAS_FALSE               0x00000000
-+#define NVC3C0_QMDV02_02_SEQUENTIALLY_RUN_CTAS_TRUE                0x00000001
-+#define NVC3C0_QMDV02_02_CWD_REFERENCE_COUNT_DECR_ENABLE           MW(371:371)
-+#define NVC3C0_QMDV02_02_CWD_REFERENCE_COUNT_DECR_ENABLE_FALSE     0x00000000
-+#define NVC3C0_QMDV02_02_CWD_REFERENCE_COUNT_DECR_ENABLE_TRUE      0x00000001
-+#define NVC3C0_QMDV02_02_API_VISIBLE_CALL_LIMIT                    MW(378:378)
-+#define NVC3C0_QMDV02_02_API_VISIBLE_CALL_LIMIT__32                0x00000000
-+#define NVC3C0_QMDV02_02_API_VISIBLE_CALL_LIMIT_NO_CHECK           0x00000001
-+#define NVC3C0_QMDV02_02_SAMPLER_INDEX                             MW(382:382)
-+#define NVC3C0_QMDV02_02_SAMPLER_INDEX_INDEPENDENTLY               0x00000000
-+#define NVC3C0_QMDV02_02_SAMPLER_INDEX_VIA_HEADER_INDEX            0x00000001
-+#define NVC3C0_QMDV02_02_CTA_RASTER_WIDTH                          MW(415:384)
-+#define NVC3C0_QMDV02_02_CTA_RASTER_HEIGHT                         MW(431:416)
-+#define NVC3C0_QMDV02_02_QMD_RESERVED13A                           MW(447:432)
-+#define NVC3C0_QMDV02_02_CTA_RASTER_DEPTH                          MW(463:448)
-+#define NVC3C0_QMDV02_02_QMD_RESERVED14A                           MW(479:464)
-+#define NVC3C0_QMDV02_02_DEPENDENT_QMD_POINTER                     MW(511:480)
-+#define NVC3C0_QMDV02_02_QUEUE_ENTRIES_PER_CTA_MINUS_ONE           MW(518:512)
-+#define NVC3C0_QMDV02_02_COALESCE_WAITING_PERIOD                   MW(529:522)
-+#define NVC3C0_QMDV02_02_SHARED_MEMORY_SIZE                        MW(561:544)
-+#define NVC3C0_QMDV02_02_MIN_SM_CONFIG_SHARED_MEM_SIZE             MW(568:562)
-+#define NVC3C0_QMDV02_02_MAX_SM_CONFIG_SHARED_MEM_SIZE             MW(575:569)
-+#define NVC3C0_QMDV02_02_QMD_VERSION                               MW(579:576)
-+#define NVC3C0_QMDV02_02_QMD_MAJOR_VERSION                         MW(583:580)
-+#define NVC3C0_QMDV02_02_QMD_RESERVED_H                            MW(591:584)
-+#define NVC3C0_QMDV02_02_CTA_THREAD_DIMENSION0                     MW(607:592)
-+#define NVC3C0_QMDV02_02_CTA_THREAD_DIMENSION1                     MW(623:608)
-+#define NVC3C0_QMDV02_02_CTA_THREAD_DIMENSION2                     MW(639:624)
-+#define NVC3C0_QMDV02_02_CONSTANT_BUFFER_VALID(i)                  MW((640+(i)*1):(640+(i)*1))
-+#define NVC3C0_QMDV02_02_CONSTANT_BUFFER_VALID_FALSE               0x00000000
-+#define NVC3C0_QMDV02_02_CONSTANT_BUFFER_VALID_TRUE                0x00000001
-+#define NVC3C0_QMDV02_02_REGISTER_COUNT_V                          MW(656:648)
-+#define NVC3C0_QMDV02_02_TARGET_SM_CONFIG_SHARED_MEM_SIZE          MW(663:657)
-+#define NVC3C0_QMDV02_02_FREE_CTA_SLOTS_EMPTY_SM                   MW(671:664)
-+#define NVC3C0_QMDV02_02_SM_DISABLE_MASK_LOWER                     MW(703:672)
-+#define NVC3C0_QMDV02_02_SM_DISABLE_MASK_UPPER                     MW(735:704)
-+#define NVC3C0_QMDV02_02_RELEASE0_ADDRESS_LOWER                    MW(767:736)
-+#define NVC3C0_QMDV02_02_RELEASE0_ADDRESS_UPPER                    MW(775:768)
-+#define NVC3C0_QMDV02_02_QMD_RESERVED_J                            MW(783:776)
-+#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_OP                     MW(790:788)
-+#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_OP_RED_ADD             0x00000000
-+#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_OP_RED_MIN             0x00000001
-+#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_OP_RED_MAX             0x00000002
-+#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_OP_RED_INC             0x00000003
-+#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_OP_RED_DEC             0x00000004
-+#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_OP_RED_AND             0x00000005
-+#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_OP_RED_OR              0x00000006
-+#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_OP_RED_XOR             0x00000007
-+#define NVC3C0_QMDV02_02_QMD_RESERVED_K                            MW(791:791)
-+#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_FORMAT                 MW(793:792)
-+#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_FORMAT_UNSIGNED_32     0x00000000
-+#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_FORMAT_SIGNED_32       0x00000001
-+#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_ENABLE                 MW(794:794)
-+#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_ENABLE_FALSE           0x00000000
-+#define NVC3C0_QMDV02_02_RELEASE0_REDUCTION_ENABLE_TRUE            0x00000001
-+#define NVC3C0_QMDV02_02_RELEASE0_STRUCTURE_SIZE                   MW(799:799)
-+#define NVC3C0_QMDV02_02_RELEASE0_STRUCTURE_SIZE_FOUR_WORDS        0x00000000
-+#define NVC3C0_QMDV02_02_RELEASE0_STRUCTURE_SIZE_ONE_WORD          0x00000001
-+#define NVC3C0_QMDV02_02_RELEASE0_PAYLOAD                          MW(831:800)
-+#define NVC3C0_QMDV02_02_RELEASE1_ADDRESS_LOWER                    MW(863:832)
-+#define NVC3C0_QMDV02_02_RELEASE1_ADDRESS_UPPER                    MW(871:864)
-+#define NVC3C0_QMDV02_02_QMD_RESERVED_L                            MW(879:872)
-+#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_OP                     MW(886:884)
-+#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_OP_RED_ADD             0x00000000
-+#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_OP_RED_MIN             0x00000001
-+#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_OP_RED_MAX             0x00000002
-+#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_OP_RED_INC             0x00000003
-+#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_OP_RED_DEC             0x00000004
-+#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_OP_RED_AND             0x00000005
-+#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_OP_RED_OR              0x00000006
-+#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_OP_RED_XOR             0x00000007
-+#define NVC3C0_QMDV02_02_QMD_RESERVED_M                            MW(887:887)
-+#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_FORMAT                 MW(889:888)
-+#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_FORMAT_UNSIGNED_32     0x00000000
-+#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_FORMAT_SIGNED_32       0x00000001
-+#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_ENABLE                 MW(890:890)
-+#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_ENABLE_FALSE           0x00000000
-+#define NVC3C0_QMDV02_02_RELEASE1_REDUCTION_ENABLE_TRUE            0x00000001
-+#define NVC3C0_QMDV02_02_RELEASE1_STRUCTURE_SIZE                   MW(895:895)
-+#define NVC3C0_QMDV02_02_RELEASE1_STRUCTURE_SIZE_FOUR_WORDS        0x00000000
-+#define NVC3C0_QMDV02_02_RELEASE1_STRUCTURE_SIZE_ONE_WORD          0x00000001
-+#define NVC3C0_QMDV02_02_RELEASE1_PAYLOAD                          MW(927:896)
-+#define NVC3C0_QMDV02_02_SHADER_LOCAL_MEMORY_LOW_SIZE              MW(951:928)
-+#define NVC3C0_QMDV02_02_QMD_RESERVED_N                            MW(954:952)
-+#define NVC3C0_QMDV02_02_BARRIER_COUNT                             MW(959:955)
-+#define NVC3C0_QMDV02_02_SHADER_LOCAL_MEMORY_HIGH_SIZE             MW(983:960)
-+#define NVC3C0_QMDV02_02_REGISTER_COUNT                            MW(991:984)
-+#define NVC3C0_QMDV02_02_SHADER_LOCAL_MEMORY_CRS_SIZE              MW(1015:992)
-+#define NVC3C0_QMDV02_02_SASS_VERSION                              MW(1023:1016)
-+#define NVC3C0_QMDV02_02_CONSTANT_BUFFER_ADDR_LOWER(i)             MW((1055+(i)*64):(1024+(i)*64))
-+#define NVC3C0_QMDV02_02_CONSTANT_BUFFER_ADDR_UPPER(i)             MW((1072+(i)*64):(1056+(i)*64))
-+#define NVC3C0_QMDV02_02_CONSTANT_BUFFER_RESERVED_ADDR(i)          MW((1073+(i)*64):(1073+(i)*64))
-+#define NVC3C0_QMDV02_02_CONSTANT_BUFFER_INVALIDATE(i)             MW((1074+(i)*64):(1074+(i)*64))
-+#define NVC3C0_QMDV02_02_CONSTANT_BUFFER_INVALIDATE_FALSE          0x00000000
-+#define NVC3C0_QMDV02_02_CONSTANT_BUFFER_INVALIDATE_TRUE           0x00000001
-+#define NVC3C0_QMDV02_02_CONSTANT_BUFFER_SIZE_SHIFTED4(i)          MW((1087+(i)*64):(1075+(i)*64))
-+#define NVC3C0_QMDV02_02_PROGRAM_ADDRESS_LOWER                     MW(1567:1536)
-+#define NVC3C0_QMDV02_02_PROGRAM_ADDRESS_UPPER                     MW(1584:1568)
-+#define NVC3C0_QMDV02_02_QMD_RESERVED_S                            MW(1599:1585)
-+#define NVC3C0_QMDV02_02_HW_ONLY_INNER_GET                         MW(1630:1600)
-+#define NVC3C0_QMDV02_02_HW_ONLY_REQUIRE_SCHEDULING_PCAS           MW(1631:1631)
-+#define NVC3C0_QMDV02_02_HW_ONLY_INNER_PUT                         MW(1662:1632)
-+#define NVC3C0_QMDV02_02_HW_ONLY_SCG_TYPE                          MW(1663:1663)
-+#define NVC3C0_QMDV02_02_HW_ONLY_SPAN_LIST_HEAD_INDEX              MW(1693:1664)
-+#define NVC3C0_QMDV02_02_QMD_RESERVED_Q                            MW(1694:1694)
-+#define NVC3C0_QMDV02_02_HW_ONLY_SPAN_LIST_HEAD_INDEX_VALID        MW(1695:1695)
-+#define NVC3C0_QMDV02_02_HW_ONLY_SPAN_LIST_HEAD_INDEX_VALID_FALSE  0x00000000
-+#define NVC3C0_QMDV02_02_HW_ONLY_SPAN_LIST_HEAD_INDEX_VALID_TRUE   0x00000001
-+#define NVC3C0_QMDV02_02_HW_ONLY_SKED_NEXT_QMD_POINTER             MW(1727:1696)
-+#define NVC3C0_QMDV02_02_QMD_SPARE_G                               MW(1759:1728)
-+#define NVC3C0_QMDV02_02_QMD_SPARE_H                               MW(1791:1760)
-+#define NVC3C0_QMDV02_02_QMD_SPARE_I                               MW(1823:1792)
-+#define NVC3C0_QMDV02_02_QMD_SPARE_J                               MW(1855:1824)
-+#define NVC3C0_QMDV02_02_QMD_SPARE_K                               MW(1887:1856)
-+#define NVC3C0_QMDV02_02_QMD_SPARE_L                               MW(1919:1888)
-+#define NVC3C0_QMDV02_02_QMD_SPARE_M                               MW(1951:1920)
-+#define NVC3C0_QMDV02_02_QMD_SPARE_N                               MW(1983:1952)
-+#define NVC3C0_QMDV02_02_DEBUG_ID_UPPER                            MW(2015:1984)
-+#define NVC3C0_QMDV02_02_DEBUG_ID_LOWER                            MW(2047:2016)
-+
-+
-+
-+#endif // #ifndef __CLC3C0QMD_H__
-diff --git a/src/gallium/drivers/nouveau/nvc0/drf.h b/src/gallium/drivers/nouveau/nvc0/drf.h
-new file mode 100644
-index 00000000000..bf95c8c3185
---- /dev/null
-+++ b/src/gallium/drivers/nouveau/nvc0/drf.h
-@@ -0,0 +1,119 @@
-+/*
-+ * Copyright 2019 Red Hat Inc.
-+ *
-+ * Permission is hereby granted, free of charge, to any person obtaining a
-+ * copy of this software and associated documentation files (the "Software"),
-+ * to deal in the Software without restriction, including without limitation
-+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
-+ * and/or sell copies of the Software, and to permit persons to whom the
-+ * Software is furnished to do so, subject to the following conditions:
-+ *
-+ * The above copyright notice and this permission notice shall be included in
-+ * all copies or substantial portions of the Software.
-+ *
-+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
-+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
-+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-+ * OTHER DEALINGS IN THE SOFTWARE.
-+ */
-+#ifndef __NVHW_DRF_H__
-+#define __NVHW_DRF_H__
-+
-+/* Helpers common to all DRF accessors. */
-+#define DRF_LO(drf)    (0 ? drf)
-+#define DRF_HI(drf)    (1 ? drf)
-+#define DRF_BITS(drf)  (DRF_HI(drf) - DRF_LO(drf) + 1)
-+#define DRF_MASK(drf)  (~0ULL >> (64 - DRF_BITS(drf)))
-+#define DRF_SMASK(drf) (DRF_MASK(drf) << DRF_LO(drf))
-+
-+/* Helpers for DRF-MW accessors. */
-+#define DRF_MX_MW(drf)      drf
-+#define DRF_MX(drf)         DRF_MX_##drf
-+#define DRF_MW(drf)         DRF_MX(drf)
-+#define DRF_MW_SPANS(o,drf) (DRF_LW_IDX((o),drf) != DRF_HW_IDX((o),drf))
-+#define DRF_MW_SIZE(o)      (sizeof((o)[0]) * 8)
-+
-+#define DRF_LW_IDX(o,drf)   (DRF_LO(DRF_MW(drf)) / DRF_MW_SIZE(o))
-+#define DRF_LW_LO(o,drf)    (DRF_LO(DRF_MW(drf)) % DRF_MW_SIZE(o))
-+#define DRF_LW_HI(o,drf)    (DRF_MW_SPANS((o),drf) ? (DRF_MW_SIZE(o) - 1) : DRF_HW_HI((o),drf))
-+#define DRF_LW_BITS(o,drf)  (DRF_LW_HI((o),drf) - DRF_LW_LO((o),drf) + 1)
-+#define DRF_LW_MASK(o,drf)  (~0ULL >> (64 - DRF_LW_BITS((o),drf)))
-+#define DRF_LW_SMASK(o,drf) (DRF_LW_MASK((o),drf) << DRF_LW_LO((o),drf))
-+#define DRF_LW_GET(o,drf)   (((o)[DRF_LW_IDX((o),drf)] >> DRF_LW_LO((o),drf)) & DRF_LW_MASK((o),drf))
-+#define DRF_LW_VAL(o,drf,v) (((v) & DRF_LW_MASK((o),drf)) << DRF_LW_LO((o),drf))
-+#define DRF_LW_CLR(o,drf)   ((o)[DRF_LW_IDX((o),drf)] & ~DRF_LW_SMASK((o),drf))
-+#define DRF_LW_SET(o,drf,v) (DRF_LW_CLR((o),drf) | DRF_LW_VAL((o),drf,(v)))
-+
-+#define DRF_HW_IDX(o,drf)   (DRF_HI(DRF_MW(drf)) / DRF_MW_SIZE(o))
-+#define DRF_HW_LO(o,drf)    0
-+#define DRF_HW_HI(o,drf)    (DRF_HI(DRF_MW(drf)) % DRF_MW_SIZE(o))
-+#define DRF_HW_BITS(o,drf)  (DRF_HW_HI((o),drf) - DRF_HW_LO((o),drf) + 1)
-+#define DRF_HW_MASK(o,drf)  (~0ULL >> (64 - DRF_HW_BITS((o),drf)))
-+#define DRF_HW_SMASK(o,drf) (DRF_HW_MASK((o),drf) << DRF_HW_LO((o),drf))
-+#define DRF_HW_GET(o,drf)   ((o)[DRF_HW_IDX(o,drf)] & DRF_HW_SMASK((o),drf))
-+#define DRF_HW_VAL(o,drf,v) (((long long)(v) >> DRF_LW_BITS((o),drf)) & DRF_HW_SMASK((o),drf))
-+#define DRF_HW_CLR(o,drf)   ((o)[DRF_HW_IDX((o),drf)] & ~DRF_HW_SMASK((o),drf))
-+#define DRF_HW_SET(o,drf,v) (DRF_HW_CLR((o),drf) | DRF_HW_VAL((o),drf,(v)))
-+
-+/* DRF accessors. */
-+#define NVVAL_X(drf,v) (((v) & DRF_MASK(drf)) << DRF_LO(drf))
-+#define NVVAL_N(X,d,r,f,  v) NVVAL_X(d##_##r##_##f, (v))
-+#define NVVAL_I(X,d,r,f,i,v) NVVAL_X(d##_##r##_##f(i), (v))
-+#define NVVAL_(X,_1,_2,_3,_4,_5,IMPL,...) IMPL
-+#define NVVAL(A...) NVVAL_(X, ##A, NVVAL_I, NVVAL_N)(X, ##A)
-+
-+#define NVDEF_N(X,d,r,f,  v) NVVAL_X(d##_##r##_##f, d##_##r##_##f##_##v)
-+#define NVDEF_I(X,d,r,f,i,v) NVVAL_X(d##_##r##_##f(i), d##_##r##_##f##_##v)
-+#define NVDEF_(X,_1,_2,_3,_4,_5,IMPL,...) IMPL
-+#define NVDEF(A...) NVDEF_(X, ##A, NVDEF_I, NVDEF_N)(X, ##A)
-+
-+#define NVVAL_GET_X(o,drf) (((o) >> DRF_LO(drf)) & DRF_MASK(drf))
-+#define NVVAL_GET_N(X,o,d,r,f  ) NVVAL_GET_X(o, d##_##r##_##f)
-+#define NVVAL_GET_I(X,o,d,r,f,i) NVVAL_GET_X(o, d##_##r##_##f(i))
-+#define NVVAL_GET_(X,_1,_2,_3,_4,_5,IMPL,...) IMPL
-+#define NVVAL_GET(A...) NVVAL_GET_(X, ##A, NVVAL_GET_I, NVVAL_GET_N)(X, ##A)
-+
-+#define NVVAL_SET_X(o,drf,v) (((o) & ~DRF_SMASK(drf)) | NVVAL_X(drf, (v)))
-+#define NVVAL_SET_N(X,o,d,r,f,  v) NVVAL_SET_X(o, d##_##r##_##f, (v))
-+#define NVVAL_SET_I(X,o,d,r,f,i,v) NVVAL_SET_X(o, d##_##r##_##f(i), (v))
-+#define NVVAL_SET_(X,_1,_2,_3,_4,_5,_6,IMPL,...) IMPL
-+#define NVVAL_SET(A...) NVVAL_SET_(X, ##A, NVVAL_SET_I, NVVAL_SET_N)(X, ##A)
-+
-+#define NVDEF_SET_N(X,o,d,r,f,  v)                                             \
-+	NVVAL_SET_X(o, d##_##r##_##f,    d##_##r##_##f##_##v)
-+#define NVDEF_SET_I(X,o,d,r,f,i,v)                                             \
-+	NVVAL_SET_X(o, d##_##r##_##f(i), d##_##r##_##f##_##v)
-+#define NVDEF_SET_(X,_1,_2,_3,_4,_5,_6,IMPL,...) IMPL
-+#define NVDEF_SET(A...) NVDEF_SET_(X, ##A, NVDEF_SET_I, NVDEF_SET_N)(X, ##A)
-+
-+/* DRF-MW accessors. */
-+#define NVVAL_MW_GET_X(o,drf)                                                  \
-+	((DRF_MW_SPANS((o),drf) ?                                              \
-+	  (DRF_HW_GET((o),drf) << DRF_LW_BITS((o),drf)) : 0) | DRF_LW_GET((o),drf))
-+#define NVVAL_MW_GET_N(X,o,d,r,f  ) NVVAL_MW_GET_X((o), d##_##r##_##f)
-+#define NVVAL_MW_GET_I(X,o,d,r,f,i) NVVAL_MW_GET_X((o), d##_##r##_##f(i))
-+#define NVVAL_MW_GET_(X,_1,_2,_3,_4,_5,IMPL,...) IMPL
-+#define NVVAL_MW_GET(A...) NVVAL_MW_GET_(X, ##A, NVVAL_MW_GET_I, NVVAL_MW_GET_N)(X, ##A)
-+
-+#define NVVAL_MW_SET_X(o,drf,v) do {                                           \
-+	(o)[DRF_LW_IDX((o),drf)] = DRF_LW_SET((o),drf,(v));                    \
-+	if (DRF_MW_SPANS((o),drf))                                             \
-+		(o)[DRF_HW_IDX((o),drf)] = DRF_HW_SET((o),drf,(v));            \
-+} while(0)
-+#define NVVAL_MW_SET_N(X,o,d,r,f,  v) NVVAL_MW_SET_X((o), d##_##r##_##f, (v))
-+#define NVVAL_MW_SET_I(X,o,d,r,f,i,v) NVVAL_MW_SET_X((o), d##_##r##_##f(i), (v))
-+#define NVVAL_MW_SET_(X,_1,_2,_3,_4,_5,_6,IMPL,...) IMPL
-+#define NVVAL_MW_SET(A...)                                                     \
-+	NVVAL_MW_SET_(X, ##A, NVVAL_MW_SET_I, NVVAL_MW_SET_N)(X, ##A)
-+
-+#define NVDEF_MW_SET_N(X,o,d,r,f,  v)                                          \
-+	NVVAL_MW_SET_X(o, d##_##r##_##f,    d##_##r##_##f##_##v)
-+#define NVDEF_MW_SET_I(X,o,d,r,f,i,v)                                          \
-+	NVVAL_MW_SET_X(o, d##_##r##_##f(i), d##_##r##_##f##_##v)
-+#define NVDEF_MW_SET_(X,_1,_2,_3,_4,_5,_6,IMPL,...) IMPL
-+#define NVDEF_MW_SET(A...)                                                     \
-+	NVDEF_MW_SET_(X, ##A, NVDEF_MW_SET_I, NVDEF_MW_SET_N)(X, ##A)
-+#endif
-diff --git a/src/gallium/drivers/nouveau/nvc0/mme/comc597.mme.h b/src/gallium/drivers/nouveau/nvc0/mme/comc597.mme.h
-new file mode 100644
-index 00000000000..390741cbd04
---- /dev/null
-+++ b/src/gallium/drivers/nouveau/nvc0/mme/comc597.mme.h
-@@ -0,0 +1,904 @@
-+#define NV_MME_PRED_MODE_UUUU                0
-+#define NV_MME_PRED_MODE_TTTT                1
-+#define NV_MME_PRED_MODE_FFFF                2
-+#define NV_MME_PRED_MODE_TTUU                3
-+#define NV_MME_PRED_MODE_FFUU                4
-+#define NV_MME_PRED_MODE_TFUU                5
-+#define NV_MME_PRED_MODE_TUUU                6
-+#define NV_MME_PRED_MODE_FUUU                7
-+#define NV_MME_PRED_MODE_UUTT                8
-+#define NV_MME_PRED_MODE_UUTF                9
-+#define NV_MME_PRED_MODE_UUTU                10
-+#define NV_MME_PRED_MODE_UUFT                11
-+#define NV_MME_PRED_MODE_UUFF                12
-+#define NV_MME_PRED_MODE_UUFU                13
-+#define NV_MME_PRED_MODE_UUUT                14
-+#define NV_MME_PRED_MODE_UUUF                15
-+
-+#define NV_MME_REG_R0                       0
-+#define NV_MME_REG_R1                       1
-+#define NV_MME_REG_R2                       2
-+#define NV_MME_REG_R3                       3
-+#define NV_MME_REG_R4                       4
-+#define NV_MME_REG_R5                       5
-+#define NV_MME_REG_R6                       6
-+#define NV_MME_REG_R7                       7
-+#define NV_MME_REG_R8                       8
-+#define NV_MME_REG_R9                       9
-+#define NV_MME_REG_R10                      10
-+#define NV_MME_REG_R11                      11
-+#define NV_MME_REG_R12                      12
-+#define NV_MME_REG_R13                      13
-+#define NV_MME_REG_R14                      14
-+#define NV_MME_REG_R15                      15
-+#define NV_MME_REG_R16                      16
-+#define NV_MME_REG_R17                      17
-+#define NV_MME_REG_R18                      18
-+#define NV_MME_REG_R19                      19
-+#define NV_MME_REG_R20                      20
-+#define NV_MME_REG_R21                      21
-+#define NV_MME_REG_R22                      22
-+#define NV_MME_REG_R23                      23
-+#define NV_MME_REG_ZERO                     24
-+#define NV_MME_REG_IMMED                    25
-+#define NV_MME_REG_IMMEDPAIR                26
-+#define NV_MME_REG_IMMED32                  27
-+#define NV_MME_REG_LOAD0                    28
-+#define NV_MME_REG_LOAD1                    29
-+
-+#define NV_MME_ALU_ADD                    0
-+#define NV_MME_ALU_ADDC                   1
-+#define NV_MME_ALU_SUB                    2
-+#define NV_MME_ALU_SUBB                   3
-+#define NV_MME_ALU_MUL                    4
-+#define NV_MME_ALU_MULH                   5
-+#define NV_MME_ALU_MULU                   6
-+#define NV_MME_ALU_EXTENDED               7
-+#define NV_MME_ALU_CLZ                    8
-+#define NV_MME_ALU_SLL                    9
-+#define NV_MME_ALU_SRL                    10
-+#define NV_MME_ALU_SRA                    11
-+#define NV_MME_ALU_AND                    12
-+#define NV_MME_ALU_NAND                   13
-+#define NV_MME_ALU_OR                     14
-+#define NV_MME_ALU_XOR                    15
-+#define NV_MME_ALU_MERGE                  16
-+#define NV_MME_ALU_SLT                    17
-+#define NV_MME_ALU_SLTU                   18
-+#define NV_MME_ALU_SLE                    19
-+#define NV_MME_ALU_SLEU                   20
-+#define NV_MME_ALU_SEQ                    21
-+#define NV_MME_ALU_STATE                  22
-+#define NV_MME_ALU_LOOP                   23
-+#define NV_MME_ALU_JAL                    24
-+#define NV_MME_ALU_BLT                    25
-+#define NV_MME_ALU_BLTU                   26
-+#define NV_MME_ALU_BLE                    27
-+#define NV_MME_ALU_BLEU                   28
-+#define NV_MME_ALU_BEQ                    29
-+#define NV_MME_ALU_DREAD                  30
-+#define NV_MME_ALU_DWRITE                 31
-+
-+#define NV_MME_OUT_NONE                 0
-+#define NV_MME_OUT_ALU0                 1
-+#define NV_MME_OUT_ALU1                 2
-+#define NV_MME_OUT_LOAD0                3
-+#define NV_MME_OUT_LOAD1                4
-+#define NV_MME_OUT_IMMED0               5
-+#define NV_MME_OUT_IMMED1               6
-+#define NV_MME_OUT_RESERVED             7
-+#define NV_MME_OUT_IMMEDHIGH0           8
-+#define NV_MME_OUT_IMMEDHIGH1           9
-+#define NV_MME_OUT_IMMED32_0            10
-+
-+#define MME_BITS(en,pm,pr,o0,d0,a0,b0,i0,o1,d1,a1,b1,i1,m0,e0,m1,e1)           \
-+   ((e1) << (92 - 64) | (m1) << (89 - 64) |                                    \
-+    (e0) << (85 - 64) | (m0) << (82 - 64) |                                    \
-+    (i1) << (66 - 64) | (b1) >> (64 - 61)),                                    \
-+   (((b1) & 7)  << (61 - 32) | (a1) << (56 - 32) |                             \
-+    (d1) << (51 - 32) | (o1) << (46 - 32) |                                    \
-+    (i0) >> (32 - 30)),                                                        \
-+   (((i0) & 3) << 30 | (b0) << 25 | (a0) << 20 | (d0) << 15 | (o0) << 10 |     \
-+    (pr) << 5 | (pm) << 1 | (en))
-+
-+#define MME_INSN(en,o0,d0,a0,b0,i0,m0,e0,o1,d1,a1,b1,i1,m1,e1)                 \
-+   MME_BITS((en), NV_MME_PRED_MODE_UUUU, NV_MME_REG_ZERO,                      \
-+            NV_MME_ALU_##o0, NV_MME_REG_##d0,                               \
-+            NV_MME_REG_##a0, NV_MME_REG_##b0, (i0),                            \
-+            NV_MME_ALU_##o1, NV_MME_REG_##d1,                               \
-+            NV_MME_REG_##a1, NV_MME_REG_##b1, (i1),                            \
-+            NV_MME_OUT_##m0, NV_MME_OUT_##e0,                                  \
-+            NV_MME_OUT_##m1, NV_MME_OUT_##e1)
-+
-+uint32_t mmec597_per_instance_bf[] = {
-+// r1 = load();      // count
-+// r3 = load();      // mask
-+// mthd(0x1880, 1);  // VERTEX_ARRAY_PER_INSTANCE[0]
-+   MME_INSN(0,   ADD,   R1, LOAD0,  ZERO,  (1<<12)|0x1880/4, IMMED0,   NONE,
-+                 ADD,   R3, LOAD1,  ZERO,                 0,   NONE,   NONE),
-+// while (HW_LOOP_COUNT < r1) {
-+//    send(r3 & 1);
-+//    r3 >>= 1;
-+// }
-+   MME_INSN(0,  LOOP, ZERO,    R1,  ZERO,            0x0003,   NONE,   NONE,
-+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
-+   MME_INSN(0,   AND, ZERO,    R3, IMMED,                 1,   NONE,   ALU0,
-+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
-+   MME_INSN(0,   SRL,   R3,    R3, IMMED,                 1,   NONE,   NONE,
-+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
-+   MME_INSN(1,   ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE,
-+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
-+   MME_INSN(0,   ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE,
-+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
-+};
-+
-+uint32_t mmec597_vertex_array_select[] = {
-+// r1 = load();            // array
-+// r2 = load();            // limit hi
-+// r3 = load();            // limit lo
-+// r4 = load();            // start hi
-+// r5 = load();            // start lo
-+// r6 = (r1 & 0x1f) << 2;
-+// r7 = (r1 & 0x1f) << 1;
-+// mthd(0x1c04 + r6, 1);   // VERTEX_ARRAY_START_HIGH[]
-+// send(r4);
-+// send(r5);
-+// mthd(0x0600 + r7, 1);   // VERTEX_ARRAY_LIMIT_HIGH[]
-+// send(r2);
-+// send(r3);
-+   MME_INSN(0,   ADD,   R1, LOAD0,  ZERO,                 0,   NONE,   NONE,
-+                 ADD,   R2, LOAD1,  ZERO,                 0,   NONE,   NONE),
-+   MME_INSN(0,   ADD,   R3, LOAD0,  ZERO,                 0,   NONE,   NONE,
-+                 ADD,   R4, LOAD1,  ZERO,                 0,   NONE,   NONE),
-+   MME_INSN(0,   ADD,   R5, LOAD0,  ZERO,                 0,   NONE,   NONE,
-+               MERGE,   R6,  ZERO,    R1,  (2<<10)|(5<<5)|0,   NONE,   NONE),
-+   MME_INSN(0, MERGE,   R7,  ZERO,    R1,  (1<<10)|(5<<5)|0,   ALU1,   NONE,
-+                 ADD, ZERO,    R6, IMMED,  (1<<12)|0x1c04/4,   NONE,   NONE),
-+   MME_INSN(0,   ADD, ZERO,    R4,  ZERO,                 0,   NONE,   ALU0,
-+                 ADD, ZERO,    R5,  ZERO,                 0,   NONE,   ALU1),
-+   MME_INSN(1,   ADD, ZERO,    R7, IMMED,  (1<<12)|0x0600/4,   ALU0,   ALU1,
-+                 ADD, ZERO,    R2,  ZERO,                 0,   NONE,   NONE),
-+   MME_INSN(0,   ADD, ZERO,    R3,  ZERO,                 0,   NONE,   ALU0,
-+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
-+};
-+
-+uint32_t mmec597_blend_enables[] = {
-+// r1 = load();         // enable mask
-+// mthd(0x1360, 1);     // NVC0_3D_BLEND_ENABLE[]
-+// send((r1 >> 0) & 1);
-+// send((r1 >> 1) & 1);
-+// send((r1 >> 2) & 1);
-+// send((r1 >> 3) & 1);
-+// send((r1 >> 4) & 1);
-+// send((r1 >> 5) & 1);
-+// send((r1 >> 6) & 1);
-+// send((r1 >> 7) & 1);
-+   MME_INSN(0,   ADD,   R1, LOAD0,  ZERO,                 0, IMMED1,   NONE,
-+                 ADD, ZERO,  ZERO,  ZERO,  (1<<12)|0x1360/4,   NONE,   NONE),
-+   MME_INSN(0, MERGE, ZERO,  ZERO,    R1,  (0<<10)|(1<<5)|0,   NONE,   ALU0,
-+               MERGE, ZERO,  ZERO,    R1,  (0<<10)|(1<<5)|1,   NONE,   ALU1),
-+   MME_INSN(0, MERGE, ZERO,  ZERO,    R1,  (0<<10)|(1<<5)|2,   NONE,   ALU0,
-+               MERGE, ZERO,  ZERO,    R1,  (0<<10)|(1<<5)|3,   NONE,   ALU1),
-+   MME_INSN(1, MERGE, ZERO,  ZERO,    R1,  (0<<10)|(1<<5)|4,   NONE,   ALU0,
-+               MERGE, ZERO,  ZERO,    R1,  (0<<10)|(1<<5)|5,   NONE,   ALU1),
-+   MME_INSN(0, MERGE, ZERO,  ZERO,    R1,  (0<<10)|(1<<5)|6,   NONE,   ALU0,
-+               MERGE, ZERO,  ZERO,    R1,  (0<<10)|(1<<5)|7,   NONE,   ALU1),
-+};
-+
-+uint32_t mmec597_poly_mode_front[] = {
-+// r1 = load();
-+// mthd(0x0dac,0);      // POLYGON_MODE_FRONT
-+// send(r1);
-+// r2 = read(0x0db0);   // POLYGON_MODE_BACK
-+// r3 = read(0x20c0);   // SP_SELECT[3]
-+// r7 = r1 | r2;
-+// r4 = read(0x2100);   // SP_SELECT[4]
-+// r6 = 0x60;
-+// r7 = r7 & 1;
-+// if (r7 != 0)
-+   MME_INSN(0,   ADD,   R1, LOAD0,  ZERO,  (0<<12)|0x0dac/4, IMMED0,   ALU0,
-+               STATE,   R2, IMMED,  ZERO,          0x0db0/4,   NONE,   NONE),
-+   MME_INSN(0, STATE,   R3, IMMED,  ZERO,          0x20c0/4,   NONE,   NONE,
-+                  OR,   R7,    R1,    R2,                 0,   NONE,   NONE),
-+   MME_INSN(0, STATE,   R4, IMMED,  ZERO,          0x2100/4,   NONE,   NONE,
-+                 ADD,   R6, IMMED,  ZERO,              0x60,   NONE,   NONE),
-+   MME_INSN(0,   AND,   R7,    R7, IMMED,                 1,   NONE,   NONE,
-+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
-+   MME_INSN(0,   BEQ, ZERO,    R7,  ZERO,    (2<<14)|0x0002,   NONE,   NONE,
-+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
-+//    r6 = 0x200;
-+   MME_INSN(0,   ADD,   R6, IMMED,  ZERO,             0x200,   NONE,   NONE,
-+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
-+// r7 = r3 | r4;
-+// r7 = r7 & 1;
-+// if (r7 != 0)
-+   MME_INSN(0,    OR,   R7,    R3,    R4,                 0,   NONE,   NONE,
-+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
-+   MME_INSN(0,   AND,   R7,    R7, IMMED,                 1,   NONE,   NONE,
-+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
-+   MME_INSN(0,   BEQ, ZERO,    R7,  ZERO,    (2<<14)|0x0002,   NONE,   NONE,
-+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
-+//    r6 = 0;
-+   MME_INSN(0,   ADD,   R6,  ZERO,  ZERO,                 0,   NONE,   NONE,
-+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
-+// mthd(0x02ec, 0);
-+// send(r6);
-+   MME_INSN(1,   ADD, ZERO,  ZERO,  ZERO,  (0<<12)|0x02ec/4, IMMED0,   NONE,
-+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
-+   MME_INSN(0,   ADD, ZERO,    R6,  ZERO,                 0,   NONE,   ALU0,
-+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
-+};
-+
-+uint32_t mmec597_poly_mode_back[] = {
-+// r1 = load();
-+// mthd(0x0db0,0);      // POLYGON_MODE_BACK
-+// send(r1);
-+// r2 = read(0x0dac);   // POLYGON_MODE_FRONT
-+// r3 = read(0x20c0);   // SP_SELECT[3]
-+// r7 = r1 | r2;
-+// r4 = read(0x2100);   // SP_SELECT[4]
-+// r6 = 0x60;
-+// r7 = r7 & 1;
-+// if (r7 != 0)
-+   MME_INSN(0,   ADD,   R1, LOAD0,  ZERO,  (0<<12)|0x0db0/4, IMMED0,   ALU0,
-+               STATE,   R2, IMMED,  ZERO,          0x0dac/4,   NONE,   NONE),
-+   MME_INSN(0, STATE,   R3, IMMED,  ZERO,          0x20c0/4,   NONE,   NONE,
-+                  OR,   R7,    R1,    R2,                 0,   NONE,   NONE),
-+   MME_INSN(0, STATE,   R4, IMMED,  ZERO,          0x2100/4,   NONE,   NONE,
-+                 ADD,   R6, IMMED,  ZERO,              0x60,   NONE,   NONE),
-+   MME_INSN(0,   AND,   R7,    R7, IMMED,                 1,   NONE,   NONE,
-+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
-+   MME_INSN(0,   BEQ, ZERO,    R7,  ZERO,    (2<<14)|0x0002,   NONE,   NONE,
-+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
-+//    r6 = 0x200;
-+   MME_INSN(0,   ADD,   R6, IMMED,  ZERO,             0x200,   NONE,   NONE,
-+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
-+// r7 = r3 | r4;
-+// r7 = r7 & 1;
-+// if (r7 != 0)
-+   MME_INSN(0,    OR,   R7,    R3,    R4,                 0,   NONE,   NONE,
-+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
-+   MME_INSN(0,   AND,   R7,    R7, IMMED,                 1,   NONE,   NONE,
-+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
-+   MME_INSN(0,   BEQ, ZERO,    R7,  ZERO,    (2<<14)|0x0002,   NONE,   NONE,
-+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
-+//    r6 = 0;
-+   MME_INSN(0,   ADD,   R6,  ZERO,  ZERO,                 0,   NONE,   NONE,
-+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
-+// mthd(0x02ec, 0);
-+// send(r6);
-+   MME_INSN(1,   ADD, ZERO,  ZERO,  ZERO,  (0<<12)|0x02ec/4, IMMED0,   NONE,
-+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
-+   MME_INSN(0,   ADD, ZERO,    R6,  ZERO,                 0,   NONE,   ALU0,
-+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
-+};
-+
-+uint32_t mmec597_gp_select[] = {
-+// r1 = load();
-+// mthd(0x2100,0);      // SP_SELECT[4]
-+// send(r1);
-+// r2 = read(0x0dac);   // POLYGON_MODE_FRONT
-+// r3 = read(0x0db0);   // POLYGON_MODE_BACK
-+// r7 = r2 | r3;
-+// r4 = read(0x20c0);   // SP_SELECT[3]
-+// r6 = 0x60;
-+// r7 = r7 & 1;
-+// if (r7 != 0)
-+   MME_INSN(0,   ADD,   R1, LOAD0,  ZERO,  (0<<12)|0x2100/4, IMMED0,   ALU0,
-+               STATE,   R2, IMMED,  ZERO,          0x0dac/4,   NONE,   NONE),
-+   MME_INSN(0, STATE,   R3, IMMED,  ZERO,          0x0db0/4,   NONE,   NONE,
-+                  OR,   R7,    R2,    R3,                 0,   NONE,   NONE),
-+   MME_INSN(0, STATE,   R4, IMMED,  ZERO,          0x20c0/4,   NONE,   NONE,
-+                 ADD,   R6, IMMED,  ZERO,              0x60,   NONE,   NONE),
-+   MME_INSN(0,   AND,   R7,    R7, IMMED,                 1,   NONE,   NONE,
-+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
-+   MME_INSN(0,   BEQ, ZERO,    R7,  ZERO,    (2<<14)|0x0002,   NONE,   NONE,
-+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
-+//    r6 = 0x200;
-+   MME_INSN(0,   ADD,   R6, IMMED,  ZERO,             0x200,   NONE,   NONE,
-+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
-+// r7 = r1 | r4;
-+// r7 = r7 & 1;
-+// if (r7 != 0)
-+   MME_INSN(0,    OR,   R7,    R1,    R4,                 0,   NONE,   NONE,
-+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
-+   MME_INSN(0,   AND,   R7,    R7, IMMED,                 1,   NONE,   NONE,
-+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
-+   MME_INSN(0,   BEQ, ZERO,    R7,  ZERO,    (2<<14)|0x0002,   NONE,   NONE,
-+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
-+//    r6 = 0;
-+   MME_INSN(0,   ADD,   R6,  ZERO,  ZERO,                 0,   NONE,   NONE,
-+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
-+// mthd(0x02ec, 0);
-+// send(r6);
-+   MME_INSN(1,   ADD, ZERO,  ZERO,  ZERO,  (0<<12)|0x02ec/4, IMMED0,   NONE,
-+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
-+   MME_INSN(0,   ADD, ZERO,    R6,  ZERO,                 0,   NONE,   ALU0,
-+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
-+};
-+
-+uint32_t mmec597_tep_select[] = {
-+// r1 = load();
-+// mthd(0x20c0,0);      // SP_SELECT[3]
-+// send(r1);
-+// r2 = read(0x0dac);   // POLYGON_MODE_FRONT
-+// r3 = read(0x0db0);   // POLYGON_MODE_BACK
-+// r7 = r2 | r3;
-+// r4 = read(0x2100);   // SP_SELECT[4]
-+// r6 = 0x60;
-+// r7 = r7 & 1;
-+// if (r7 != 0)
-+   MME_INSN(0,   ADD,   R1, LOAD0,  ZERO,  (0<<12)|0x20c0/4, IMMED0,   ALU0,
-+               STATE,   R2, IMMED,  ZERO,          0x0dac/4,   NONE,   NONE),
-+   MME_INSN(0, STATE,   R3, IMMED,  ZERO,          0x0db0/4,   NONE,   NONE,
-+                  OR,   R7,    R2,    R3,                 0,   NONE,   NONE),
-+   MME_INSN(0, STATE,   R4, IMMED,  ZERO,          0x2100/4,   NONE,   NONE,
-+                 ADD,   R6, IMMED,  ZERO,              0x60,   NONE,   NONE),
-+   MME_INSN(0,   AND,   R7,    R7, IMMED,                 1,   NONE,   NONE,
-+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
-+   MME_INSN(0,   BEQ, ZERO,    R7,  ZERO,    (2<<14)|0x0002,   NONE,   NONE,
-+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
-+//    r6 = 0x200;
-+   MME_INSN(0,   ADD,   R6, IMMED,  ZERO,             0x200,   NONE,   NONE,
-+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
-+// r7 = r1 | r4;
-+// r7 = r7 & 1;
-+// if (r7 != 0)
-+   MME_INSN(0,    OR,   R7,    R1,    R4,                 0,   NONE,   NONE,
-+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
-+   MME_INSN(0,   AND,   R7,    R7, IMMED,                 1,   NONE,   NONE,
-+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
-+   MME_INSN(0,   BEQ, ZERO,    R7,  ZERO,    (2<<14)|0x0002,   NONE,   NONE,
-+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
-+//    r6 = 0;
-+   MME_INSN(0,   ADD,   R6,  ZERO,  ZERO,                 0,   NONE,   NONE,
-+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
-+// mthd(0x02ec, 0);
-+// send(r6);
-+   MME_INSN(1,   ADD, ZERO,  ZERO,  ZERO,  (0<<12)|0x02ec/4, IMMED0,   NONE,
-+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
-+   MME_INSN(0,   ADD, ZERO,    R6,  ZERO,                 0,   NONE,   ALU0,
-+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
-+};
-+
-+uint32_t mmec597_draw_arrays_indirect[] = {
-+// r1 = load();         // mode
-+// r5 = read(0x1438);   // VB_INSTANCE_BASE
-+// r6 = load();         // start_drawid
-+// r7 = load();         // numparams
-+   MME_INSN(0,   ADD,   R1, LOAD0,  ZERO,                0,   NONE,   NONE,
-+                 ADD,   R6, LOAD1,  ZERO,                0,   NONE,   NONE),
-+   MME_INSN(0,   ADD,   R7, LOAD0,  ZERO,                0,   NONE,   NONE,
-+               STATE,   R5, IMMED,  ZERO,         0x1438/4,   NONE,   NONE),
-+// while (HW_LOOP_COUNT < r7) {
-+//    r2 = load();      // count
-+//    r3 = load();      // instance_count
-+//    mthd(0x0d74, 0);  // VERTEX_BUFFER_FIRST
-+//    send(load());     // start
-+//    r4 = load();      // start_instance
-+//    if (r3) {
-+   MME_INSN(0,  LOOP, ZERO,    R7,  ZERO,            0x000c,   NONE,   NONE,
-+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
-+   MME_INSN(0,   ADD,   R2, LOAD0,  ZERO,          0x0d74/4, IMMED0,   NONE,
-+                 ADD,   R3, LOAD1,  ZERO,                 0,   NONE,   NONE),
-+   MME_INSN(0,   ADD, ZERO, LOAD0,  ZERO,                 0,   NONE,   ALU0,
-+                 ADD,   R4, LOAD1,  ZERO,                 0,   NONE,   NONE),
-+   MME_INSN(0,   BEQ, ZERO,    R3,  ZERO,    (2<<14)|0x0008,   NONE,   NONE,
-+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
-+//       mthd(0x238c, 1);     // CB_POS
-+//       send(256 + 160);
-+//       send(0);             // base_vertex
-+//       send(r4);            // start_instance
-+//       send(r6);            // draw id
-+//       mthd(0x1438, 0);     // VB_INSTANCE_BASE
-+//       send(r4);
-+//       r1 = r1 & ~(1<<26);  // clear INSTANCE_NEXT
-+   MME_INSN(0,   ADD, ZERO,  ZERO,  ZERO,  (1<<12)|0x238c/4, IMMED0, IMMED1,
-+                 ADD, ZERO,  ZERO,  ZERO,         256 + 160,   NONE,   ALU0),
-+   MME_INSN(0,   ADD, ZERO,    R4,  ZERO,                 0,   NONE,   ALU0,
-+                 ADD, ZERO,    R6,  ZERO,                 0,   NONE,   ALU1),
-+   MME_INSN(0,   ADD, ZERO,    R4,  ZERO,          0x1438/4, IMMED0,   ALU0,
-+               MERGE,   R1,    R1,  ZERO, (26<<10)|(1<<5)|0,   NONE,   NONE),
-+//       do {
-+//          mthd(0x1618, 0);  // VERTEX_BEGIN_GL
-+//          send(r1);         // mode
-+//          mthd(0x0d78, 0);  // VERTEX_BUFFER_COUNT
-+//          send(r2);         // count
-+//          mthd(0x1614, 0);  // VERTEX_END_GL
-+//          send(0);
-+//          r1 |= (1<<26);    // set INSTANCE_NEXT
-+//       } while(--r3);
-+//    }
-+   MME_INSN(0,   ADD, ZERO,    R1,  ZERO,          0x1618/4, IMMED0,   ALU0,
-+                 ADD, ZERO,    R2,  ZERO,          0x0d78/4, IMMED1,   ALU1),
-+   MME_INSN(0,   ADD, ZERO,  ZERO,  ZERO,          0x1614/4, IMMED0,   ALU0,
-+                 ADD,   R4, IMMED,  ZERO,                 1,   NONE,   NONE),
-+   MME_INSN(0, MERGE,   R1,    R1,    R4, (26<<10)|(1<<5)|0,   NONE,   NONE,
-+                 SUB,   R3,    R3, IMMED,                 1,   NONE,   NONE),
-+   MME_INSN(0,   BEQ, ZERO,    R3,  ZERO,    (1<<14)|0x3ffd,   NONE,   NONE,
-+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
-+//    r6 = r6 + 1;
-+// };
-+   MME_INSN(0,   ADD,   R6,    R6, IMMED,                 1,   NONE,   NONE,
-+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
-+// mthd(0x1438, 0);  // restore VB_INSTANCE_BASE
-+// send(r5);
-+   MME_INSN(1,   ADD, ZERO,  ZERO,  ZERO,          0x1438/4, IMMED0,   NONE,
-+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
-+   MME_INSN(0,   ADD, ZERO,    R5,  ZERO,                 0,   NONE,      ALU0,
-+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,      NONE),
-+};
-+
-+uint32_t mmec597_draw_elts_indirect[] = {
-+// r1 = load();         // mode
-+// r8 = read(0x1434);   // VB_ELEMENT_BASE
-+// r9 = read(0x1438);   // VB_INSTANCE_BASE
-+// r6 = load();         // start_drawid
-+// r7 = load();         // numparams
-+   MME_INSN(0,   ADD,   R1, LOAD0,  ZERO,                 0,   NONE,   NONE,
-+               STATE,   R8, IMMED,  ZERO,          0x1434/4,   NONE,   NONE),
-+   MME_INSN(0, STATE,   R9, IMMED,  ZERO,          0x1438/4,   NONE,   NONE,
-+                 ADD,   R6, LOAD0,  ZERO,                 0,   NONE,   NONE),
-+   MME_INSN(0,   ADD,   R7, LOAD0,  ZERO,                 0,   NONE,   NONE,
-+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
-+// while (HW_LOOP_COUNT < r7) {
-+//    r3 = load();      // count
-+//    r2 = load();      // instance_count
-+//    mthd(0x17dc, 0);  // INDEX_BATCH_FIRST
-+//    send(load());     // start
-+//    r4 = load();      // index_bias
-+//    mthd(0x238c, 1);  // CB_POS
-+//    send(256 + 160);
-+//    send(r4);         // index_bias
-+//    r5 = load();      // start_instance
-+//    if (r2) {
-+   MME_INSN(0,  LOOP, ZERO,    R7,  ZERO,            0x000d,   NONE,   NONE,
-+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
-+   MME_INSN(0,   ADD,   R3, LOAD0,  ZERO,          0x17dc/4, IMMED0,   NONE,
-+                 ADD,   R2, LOAD1,  ZERO,                 0,   NONE,   NONE),
-+   MME_INSN(0,   ADD, ZERO, LOAD0,  ZERO,                 0,   NONE,   ALU0,
-+                 ADD,   R4, LOAD1,  ZERO,                 0,   NONE,   NONE),
-+   MME_INSN(0,   ADD, ZERO,  ZERO,  ZERO,  (1<<12)|0x238c/4, IMMED0, IMMED1,
-+                 ADD, ZERO,    R4,  ZERO,         256 + 160,   NONE,   ALU1),
-+   MME_INSN(0,   BEQ, ZERO,    R2,  ZERO,    (2<<14)|0x0008,   NONE,   NONE,
-+                 ADD,   R5, LOAD0,  ZERO,                 0,   NONE,   NONE),
-+//       send(r5);         // start_instance
-+//       send(r6);         // draw_id
-+//       mthd(0x1434, 1);  // VB_ELEMENT_BASE
-+//       send(r4);         // index_bias
-+//       send(r5);         // start_instance
-+//       mthd(0x1118, 0);  // VERTEX_ID_BASE
-+//       send(r4);         // index_bias
-+//       r1 &= ~(1 << 26); // clear INSTANCE_NEXT
-+   MME_INSN(0,   ADD, ZERO,    R5,  ZERO,                 0,   NONE,   ALU0,
-+                 ADD, ZERO,    R6,  ZERO,                 0,   NONE,   ALU1),
-+   MME_INSN(0,   ADD, ZERO,    R4,  ZERO,  (1<<12)|0x1434/4, IMMED0,   ALU0,
-+                 ADD, ZERO,    R5,  ZERO,                 0,   NONE,   ALU1),
-+   MME_INSN(0,   ADD, ZERO,    R4,  ZERO,          0x1118/4, IMMED0,   ALU0,
-+               MERGE,   R1,    R1,  ZERO, (26<<10)|(1<<5)|0,   NONE,   NONE),
-+//       do {
-+//          mthd(0x1618, 0);  // VERTEX_BEGIN_GL
-+//          send(r1);         // mode
-+//          mthd(0x17e0, 0);  // INDEX_BATCH_COUNT
-+//          send(r3);         // count
-+//          mthd(0x1614, 0);  // VERTEX_END_GL
-+//          send(0);
-+//          r1 |= (1 << 26);  // set INSTANCE_NEXT
-+//       } while (--r2);
-+//    }
-+   MME_INSN(0,   ADD, ZERO,    R1,  ZERO,          0x1618/4, IMMED0,   ALU0,
-+                 ADD, ZERO,    R3,  ZERO,          0x17e0/4, IMMED1,   ALU1),
-+   MME_INSN(0,   ADD, ZERO,  ZERO,  ZERO,          0x1614/4, IMMED0,   ALU0,
-+                 ADD,   R4, IMMED,  ZERO,                 1,   NONE,   NONE),
-+   MME_INSN(0, MERGE,   R1,    R1,    R4, (26<<10)|(1<<5)|0,   NONE,   NONE,
-+                 SUB,   R2,    R2, IMMED,                 1,   NONE,   NONE),
-+   MME_INSN(0,   BEQ, ZERO,    R2,  ZERO,    (1<<14)|0x3ffd,   NONE,   NONE,
-+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
-+//   r6 = r6 + 1;
-+// };
-+   MME_INSN(0,   ADD,   R6,    R6, IMMED,                 1,   NONE,   NONE,
-+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
-+// mthd(0x1434, 1);
-+// send(r8);         // restore VB_ELEMENT_BASE
-+// send(r9);         // restore VB_INSTANCE_BASE
-+// mthd(0x1118, 0);
-+// send(r8);         // restore VERTEX_ID_BASE
-+   MME_INSN(1,   ADD, ZERO,    R8,  ZERO,  (1<<12)|0x1434/4, IMMED0,   ALU0,
-+                 ADD, ZERO,    R9,  ZERO,                 0,   NONE,   ALU1),
-+   MME_INSN(0,   ADD, ZERO,    R8,  ZERO,          0x1118/4, IMMED0,   ALU0,
-+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
-+};
-+
-+uint32_t mmec597_draw_arrays_indirect_count[] = {
-+// r1 = load();         // mode
-+// r6 = load();         // start_drawid
-+// r7 = load();         // numparams
-+// r5 = load();         // totaldraws
-+// r8 = read(0x1438);   // VB_INSTANCE_BASE
-+// r5 = r5 - r6;        // remaining draws
-+// if (r5 > r7)
-+   MME_INSN(0,   ADD,   R1, LOAD0,  ZERO,                 0,   NONE,   NONE,
-+                 ADD,   R6, LOAD1,  ZERO,                 0,   NONE,   NONE),
-+   MME_INSN(0,   ADD,   R7, LOAD0,  ZERO,                 0,   NONE,   NONE,
-+                 ADD,   R5, LOAD1,  ZERO,                 0,   NONE,   NONE),
-+   MME_INSN(0, STATE,   R8, IMMED,  ZERO,          0x1438/4,   NONE,   NONE,
-+                 SUB,   R5,    R5,    R6,                 0,   NONE,   NONE),
-+   MME_INSN(0,   BLE, ZERO,    R5,    R7,    (2<<14)|0x0002,   NONE,   NONE,
-+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
-+//    r5 = r7;
-+   MME_INSN(0,   ADD,   R5,    R7,  ZERO,                 0,   NONE,   NONE,
-+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
-+// if (r5 >= 0) {
-+   MME_INSN(0,   BLT, ZERO,    R5,  ZERO,    (2<<14)|0x000e,   NONE,   NONE,
-+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
-+//    while (HW_LOOP_COUNT < r5) {
-+//       r2 = load();      // count
-+//       r3 = load();      // instance_count
-+//       mthd(0x0d74, 0);  // VERTEX_BUFFER_FIRST
-+//       send(load());     // start
-+//       r4 = load();      // start_instance
-+//       if (r3) {
-+   MME_INSN(0,  LOOP, ZERO,    R5,  ZERO,            0x000c,   NONE,   NONE,
-+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
-+   MME_INSN(0,   ADD,   R2, LOAD0,  ZERO,          0x0d74/4, IMMED0,   NONE,
-+                 ADD,   R3, LOAD1,  ZERO,                 0,   NONE,   NONE),
-+   MME_INSN(0,   ADD, ZERO, LOAD0,  ZERO,                 0,   NONE,   ALU0,
-+                 ADD,   R4, LOAD1,  ZERO,                 0,   NONE,   NONE),
-+   MME_INSN(0,   BEQ, ZERO,    R3,  ZERO,    (2<<14)|0x0008,   NONE,   NONE,
-+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
-+//          mthd(0x238c, 1);  // CB_POS
-+//          send(256 + 160);
-+//          send(0);          // base_vertex
-+//          send(r4);         // start_instance
-+//          send(r6);         // draw_id
-+//          mthd(0x1438, 0);  // VB_INSTANCE_BASE
-+//          send(r4);
-+//          r1 &= ~(1 << 26); // clear INSTANCE_NEXT
-+   MME_INSN(0,   ADD, ZERO,  ZERO,  ZERO,  (1<<12)|0x238c/4, IMMED0, IMMED1,
-+                 ADD, ZERO,  ZERO,  ZERO,           256+160,   NONE,   ALU0),
-+   MME_INSN(0,   ADD, ZERO,    R4,  ZERO,                 0,   NONE,   ALU0,
-+                 ADD, ZERO,    R6,  ZERO,                 0,   NONE,   ALU1),
-+   MME_INSN(0,   ADD, ZERO,    R4,  ZERO,          0x1438/4, IMMED0,   ALU0,
-+               MERGE,   R1,    R1,  ZERO, (26<<10)|(1<<5)|0,   NONE,   NONE),
-+//          do {
-+//             mthd(0x1618, 0);  // VERTEX_BEGIN_GL
-+//             send(r1);         // mode
-+//             mthd(0x0d78, 0);  // VERTEX_BUFFER_COUNT
-+//             send(r2);
-+//             mthd(0x1614, 0);  // VERTEX_END_GL
-+//             send(0);
-+//             r1 |= (1 << 26);  // set INSTANCE_NEXT
-+//          } while (--r3);
-+//       }
-+   MME_INSN(0,   ADD, ZERO,    R1,  ZERO,          0x1618/4, IMMED0,   ALU0,
-+                 ADD, ZERO,    R2,  ZERO,          0x0d78/4, IMMED1,   ALU1),
-+   MME_INSN(0,   ADD, ZERO,  ZERO,  ZERO,          0x1614/4, IMMED0,   ALU0,
-+                 ADD,   R4, IMMED,  ZERO,                 1,   NONE,   NONE),
-+   MME_INSN(0, MERGE,   R1,    R1,    R4, (26<<10)|(1<<5)|0,   NONE,   NONE,
-+                 SUB,   R3,    R3, IMMED,                 1,   NONE,   NONE),
-+   MME_INSN(0,   BEQ, ZERO,    R3,  ZERO,    (1<<14)|0x3ffd,   NONE,   NONE,
-+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
-+//       r6 = r6 + 1;   // draw_id++
-+//    }
-+   MME_INSN(0,   ADD,   R6,    R6, IMMED,                 1,   NONE,   NONE,
-+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
-+//    r7 = r7 - r5;  // unneeded params
-+// }
-+   MME_INSN(0,   SUB,   R7,    R7,    R5,                 0,   NONE,   NONE,
-+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
-+// while (HW_LOOP_COUNT < r7) {
-+//    load();
-+//    load();
-+//    load();
-+//    load();
-+// }
-+   MME_INSN(0,  LOOP, ZERO,    R7,  ZERO,            0x0003,   NONE,   NONE,
-+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
-+   MME_INSN(0,   ADD, ZERO, LOAD0,  ZERO,                 0,   NONE,   NONE,
-+                 ADD, ZERO, LOAD1,  ZERO,                 0,   NONE,   NONE),
-+   MME_INSN(0,   ADD, ZERO, LOAD0,  ZERO,                 0,   NONE,   NONE,
-+                 ADD, ZERO, LOAD1,  ZERO,                 0,   NONE,   NONE),
-+// exit mthd(0x1438, 0);   // VB_INSTANCE_BASE
-+// send(r8);
-+   MME_INSN(1,   ADD, ZERO,  ZERO,  ZERO,          0x1438/4, IMMED0,   NONE,
-+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
-+   MME_INSN(0,   ADD, ZERO,    R8,  ZERO,                 0,   NONE,   ALU0,
-+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
-+};
-+
-+uint32_t mmec597_draw_elts_indirect_count[] = {
-+// r8 = read(0x1434);
-+// r1 = load();
-+// r9 = read(0x1438);
-+// r6 = load();
-+// r7 = load();
-+// r5 = load();
-+// r5 = r5 - r6;
-+// if (r5 > r7)
-+   MME_INSN(0, STATE,   R8, IMMED,  ZERO,          0x1434/4,   NONE,   NONE,
-+                 ADD,   R1, LOAD0,  ZERO,                 0,   NONE,   NONE),
-+   MME_INSN(0, STATE,   R9, IMMED,  ZERO,          0x1438/4,   NONE,   NONE,
-+                 ADD,   R6, LOAD0,  ZERO,                 0,   NONE,   NONE),
-+   MME_INSN(0,   ADD,   R7, LOAD0,  ZERO,                 0,   NONE,   NONE,
-+                 ADD,   R5, LOAD1,  ZERO,                 0,   NONE,   NONE),
-+   MME_INSN(0,   SUB,   R5,    R5,    R6,                 0,   NONE,   NONE,
-+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
-+   MME_INSN(0,   BLE, ZERO,    R5,    R7,    (2<<14)|0x0002,   NONE,   NONE,
-+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
-+//    r5 = r7;
-+   MME_INSN(0,   ADD,   R5,    R7,  ZERO,                 0,   NONE,   NONE,
-+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
-+// if (r5 >= 0) {
-+   MME_INSN(0,   BLT, ZERO,    R5,  ZERO,    (2<<14)|0x000f,   NONE,   NONE,
-+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
-+//    while (HW_LOOP_COUNT < r5) {
-+//       r3 = load();
-+//       r2 = load();
-+//       mthd(0x17dc, 0);
-+//       send(load());
-+//       r4 = load();
-+//       mthd(0x238c, 1);
-+//       send(256 + 160);
-+//       send(r4);
-+//       r10 = load();
-+//       if (r2) {
-+   MME_INSN(0,  LOOP, ZERO,    R5,  ZERO,            0x000d,   NONE,   NONE,
-+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
-+   MME_INSN(0,   ADD,   R3, LOAD0,  ZERO,  (0<<12)|0x17dc/4, IMMED0,   NONE,
-+                 ADD,   R2, LOAD1,  ZERO,                 0,   NONE,   NONE),
-+   MME_INSN(0,   ADD, ZERO, LOAD0,  ZERO,  (1<<12)|0x238c/4,   NONE,   ALU0,
-+                 ADD,   R4, LOAD1,  ZERO,         256 + 160, IMMED0, IMMED1),
-+   MME_INSN(0,   ADD, ZERO,    R4,  ZERO,                 0,   NONE,   ALU0,
-+                 ADD,  R10, LOAD0,  ZERO,                 0,   NONE,   NONE),
-+   MME_INSN(0,   BEQ, ZERO,    R2,  ZERO,    (2<<14)|0x0008,   NONE,   NONE,
-+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
-+//          send(r10);
-+//          send(r6);
-+//          mthd(0x1434, 1);
-+//          send(r4);
-+//          send(r10);
-+//          mthd(0x1118, 0);
-+//          send(r4);
-+//          r1 &= ~(1 << 26);
-+   MME_INSN(0,   ADD, ZERO,   R10,  ZERO,                 0,   NONE,   ALU0,
-+                 ADD, ZERO,    R6,  ZERO,                 0,   NONE,   ALU1),
-+   MME_INSN(0,   ADD, ZERO,    R4,  ZERO,  (1<<12)|0x1434/4, IMMED0,   ALU0,
-+                 ADD, ZERO,   R10,  ZERO,                 0,   NONE,   ALU1),
-+   MME_INSN(0,   ADD, ZERO,    R4,  ZERO,  (0<<12)|0x1118/4, IMMED0,   ALU0,
-+               MERGE,   R1,    R1,  ZERO, (26<<10)|(1<<5)|0,   NONE,   NONE),
-+//          do {
-+//             mthd(0x1618, 0);
-+//             send(r1);
-+//             mthd(0x17e0, 0);
-+//             send(r3);
-+//             mthd(0x1614, 0);
-+//             send(0);
-+//             r1 |= (1 << 26);
-+//          } while (--r2);
-+//       }
-+   MME_INSN(0,   ADD, ZERO,    R1,  ZERO,          0x1618/4, IMMED0,   ALU0,
-+                 ADD, ZERO,    R3,  ZERO,          0x17e0/4, IMMED1,   ALU1),
-+   MME_INSN(0,   ADD, ZERO,  ZERO,  ZERO,          0x1614/4, IMMED0,   ALU0,
-+                 ADD,   R4, IMMED,  ZERO,                 1,   NONE,   NONE),
-+   MME_INSN(0, MERGE,   R1,    R1,    R4, (26<<10)|(1<<5)|0,   NONE,   NONE,
-+                 SUB,   R2,    R2, IMMED,                 1,   NONE,   NONE),
-+   MME_INSN(0,   BEQ, ZERO,    R2,  ZERO,    (1<<14)|0x3ffd,   NONE,   NONE,
-+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
-+//       r6 = r6 + 1;
-+//    }
-+   MME_INSN(0,   ADD,   R6,    R6, IMMED,                 1,   NONE,   NONE,
-+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
-+//    r7 = r7 - r5; // unneeded params
-+// }
-+   MME_INSN(0,   SUB,   R7,    R7,    R5,                 0,   NONE,   NONE,
-+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
-+// while (HW_LOOP_COUNT < r7) {
-+//    r2 = load();
-+//    r2 = load();
-+//    r2 = load();
-+//    r2 = load();
-+//    r2 = load();
-+// }
-+   MME_INSN(0,  LOOP, ZERO,    R7,  ZERO,            0x0004,   NONE,   NONE,
-+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
-+   MME_INSN(0,   ADD, ZERO, LOAD0,  ZERO,                 0,   NONE,   NONE,
-+                 ADD, ZERO, LOAD1,  ZERO,                 0,   NONE,   NONE),
-+   MME_INSN(0,   ADD, ZERO, LOAD0,  ZERO,                 0,   NONE,   NONE,
-+                 ADD, ZERO, LOAD1,  ZERO,                 0,   NONE,   NONE),
-+   MME_INSN(0,   ADD, ZERO, LOAD0,  ZERO,                 0,   NONE,   NONE,
-+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
-+// mthd(0x1434, 1);
-+// send(r8);
-+// send(r9);
-+// exit mthd(0x1118, 0);
-+// send(r8);
-+   MME_INSN(1,   ADD, ZERO,    R8,  ZERO,  (1<<12)|0x1434/4, IMMED0,   ALU0,
-+                 ADD, ZERO,    R9,  ZERO,                 0,   NONE,   ALU1),
-+   MME_INSN(0,   ADD, ZERO,    R8,  ZERO,  (0<<12)|0x1118/4, IMMED0,   ALU0,
-+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,   NONE),
-+};
-+
-+uint32_t mmec597_query_buffer_write[] = {
-+// r1 = load();   // clamp value
-+// r2 = load();   // end value (lo)
-+// r3 = load();   // end value (hi)
-+// r4 = load();   // start value (lo)
-+// r5 = load();   // start value (hi)
-+// r8 = load();   // desired sequence
-+// r9 = load();   // actual sequence
-+// r7 = load();   // query address (hi)
-+// r6 = load();   // query address (lo)
-+// if (r9 >= r8) {
-+   MME_INSN(0,   ADD,   R1, LOAD0,  ZERO,                 0,   NONE,      NONE,
-+                 ADD,   R2, LOAD1,  ZERO,                 0,   NONE,      NONE),
-+   MME_INSN(0,   ADD,   R3, LOAD0,  ZERO,                 0,   NONE,      NONE,
-+                 ADD,   R4, LOAD1,  ZERO,                 0,   NONE,      NONE),
-+   MME_INSN(0,   ADD,   R5, LOAD0,  ZERO,                 0,   NONE,      NONE,
-+                 ADD,   R8, LOAD1,  ZERO,                 0,   NONE,      NONE),
-+   MME_INSN(0,   ADD,   R9, LOAD0,  ZERO,                 0,   NONE,      NONE,
-+                 ADD,   R7, LOAD1,  ZERO,                 0,   NONE,      NONE),
-+   MME_INSN(0,   ADD,   R6, LOAD0,  ZERO,                 0,   NONE,      NONE,
-+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,      NONE),
-+   MME_INSN(0,   BLT, ZERO,    R9,    R8,    (2<<14)|0x000e,   NONE,      NONE,
-+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,      NONE),
-+//    [r3,r2] = [r3,r2] - [r5,r4];
-+//    if (r1) {
-+   MME_INSN(0,   SUB,   R2,    R2,    R4,                 0,   NONE,      NONE,
-+                SUBB,   R3,    R3,    R5,                 0,   NONE,      NONE),
-+   MME_INSN(0,   BEQ, ZERO,    R1,  ZERO,    (2<<14)|0x0004,   NONE,      NONE,
-+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,      NONE),
-+//       if (r3 != 0 || r1 < r2)
-+//          r2 = r1;
-+//    }
-+   MME_INSN(0,   BEQ, ZERO,    R3,  ZERO,    (1<<14)|0x0002,   NONE,      NONE,
-+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,      NONE),
-+   MME_INSN(0,  BLTU, ZERO,    R1,    R2,    (1<<14)|0x0002,   NONE,      NONE,
-+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,      NONE),
-+   MME_INSN(0,   ADD,   R2,    R1,  ZERO,                 0,   NONE,      NONE,
-+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,      NONE),
-+//    mthd(0x1b00, 1);
-+//    send(r7);
-+//    send(r6);
-+//    send(r2)
-+//    send(0x10000000);
-+//    if (!r1) {
-+   MME_INSN(0,   ADD, ZERO,    R7,  ZERO,  (1<<12)|0x1b00/4, IMMED0,      ALU0,
-+                 ADD, ZERO,    R6,  ZERO,                 0,   NONE,      ALU1),
-+   MME_INSN(0,   ADD, ZERO,    R2,  ZERO,                 0,   NONE,      ALU0,
-+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,      NONE),
-+   MME_INSN(0,   ADD, ZERO,  ZERO,  ZERO,            0x1000,   NONE, IMMED32_0,
-+                 ADD, ZERO,  ZERO,  ZERO,            0x0000,   NONE,      NONE),
-+   MME_INSN(0,   BEQ, ZERO,    R1,  ZERO,    (1<<14)|0x0004,   NONE,      NONE,
-+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,      NONE),
-+//       [r7,r6] = [r7,r6] + 4;
-+//       mthd(0x1b00, 1);
-+//       send(r7);
-+//       send(r6);
-+//       send(r3);
-+//       send(0x10000000);
-+//    }
-+   MME_INSN(0,   ADD, ZERO,    R6, IMMED,                 4, IMMED1,      ALU1,
-+                ADDC, ZERO,    R7,  ZERO,  (1<<12)|0x1b00/4,   NONE,      ALU0),
-+   MME_INSN(0,   ADD, ZERO,    R3,  ZERO,                 0,   NONE,      ALU0,
-+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,      NONE),
-+   MME_INSN(0,   ADD, ZERO,  ZERO,  ZERO,            0x1000,   NONE, IMMED32_0,
-+                 ADD, ZERO,  ZERO,  ZERO,            0x0000,   NONE,      NONE),
-+//    mthd(0x0110, 0);
-+//    send(0);
-+   MME_INSN(0,   ADD, ZERO,  ZERO,  ZERO,  (0<<12)|0x0110/4, IMMED0,      ALU0,
-+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,      NONE),
-+// }
-+   MME_INSN(1,   ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,      NONE,
-+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,      NONE),
-+   MME_INSN(0,   ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,      NONE,
-+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,      NONE),
-+};
-+
-+uint32_t mmec597_conservative_raster_state[] = {
-+// r1 = load();
-+// mthd(0x3400, 1);
-+// send(0);
-+// send(((r1 >> 8) & 7) << 23);
-+// send(0x03800000);
-+// mthd(0x2310, 1);
-+// send(0x00418800);
-+// r2 = r1 & 0xf;
-+// r3 = 16;
-+// r2 = r2 | (((r1 >> 4) & 0xf) << 8);
-+// mthd(0x0a1c, 8);
-+   MME_INSN(0,   ADD,   R1, LOAD0,  ZERO,  (1<<12)|0x3400/4, IMMED0,    IMMED1,
-+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,      NONE),
-+   MME_INSN(0, MERGE, ZERO,  ZERO,    R1, (23<<10)|(3<<5)|8,   NONE,      ALU0,
-+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,      NONE),
-+   MME_INSN(0,   ADD, ZERO,  ZERO,  ZERO,            0x0380,   NONE, IMMED32_0,
-+                 ADD, ZERO,  ZERO,  ZERO,            0x0000,   NONE,      NONE),
-+   MME_INSN(0,   ADD, ZERO,  ZERO,  ZERO,  (1<<12)|0x2310/4, IMMED0,      NONE,
-+                 ADD, ZERO,  ZERO,  ZERO,            0x0000,   NONE,      NONE),
-+   MME_INSN(0,   ADD, ZERO,  ZERO,  ZERO,            0x0041,   NONE, IMMED32_0,
-+                 ADD, ZERO,  ZERO,  ZERO,            0x8800,   NONE,      NONE),
-+   MME_INSN(0,   AND,   R2,    R1, IMMED,               0xf,   NONE,      NONE,
-+                 ADD,   R3,  ZERO, IMMED,                16,   NONE,      NONE),
-+   MME_INSN(0, MERGE,   R2,    R2,    R1,  (8<<10)|(4<<5)|4, IMMED1,      NONE,
-+                 ADD, ZERO,  ZERO,  ZERO,  (8<<12)|0x0a1c/4,   NONE,      NONE),
-+// while (HW_LOOP_COUNT < r3)
-+//    send(r2);
-+   MME_INSN(0,  LOOP, ZERO,    R3,  ZERO,            0x0002,   NONE,      NONE,
-+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,      NONE),
-+   MME_INSN(0,   ADD, ZERO,    R2,  ZERO,                 0,   NONE,      ALU0,
-+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,      NONE),
-+// mthd(0x1148, 0);
-+// send(1);
-+   MME_INSN(1,   ADD, ZERO,  ZERO,  ZERO,  (0<<12)|0x1148/4, IMMED0,      NONE,
-+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,      NONE),
-+   MME_INSN(0,   ADD, ZERO,  ZERO,  ZERO,                 1,   NONE,    IMMED1,
-+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,      NONE),
-+};
-+
-+uint32_t mmec597_compute_counter[] = {
-+// r0 = load();
-+// r1 = 1;
-+// r2 = 0;
-+// while (HW_LOOP_COUNT < r2) {
-+   MME_INSN(0,   ADD,   R0, LOAD0,  ZERO,                 0,   NONE,      NONE,
-+                 ADD,   R1, IMMED,  ZERO,                 1,   NONE,      NONE),
-+   MME_INSN(0,  LOOP, ZERO,    R0,  ZERO,            0x0003,   NONE,      NONE,
-+                 ADD,   R2,  ZERO,  ZERO,                 0,   NONE,      NONE),
-+//    r3 = load();
-+//    [r1,r0] *= r3;
-+// }
-+   MME_INSN(0,   ADD,   R3, LOAD0,  ZERO,                 0,   NONE,      NONE,
-+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,      NONE),
-+   MME_INSN(0,  MULU,   R1,    R1,    R3,                 0,   NONE,      NONE,
-+                MULH,   R2,  ZERO,  ZERO,                 0,   NONE,      NONE),
-+// r3 = read(0x3410);
-+// r4 = read(0x3414);
-+// [r4,r3] += [r2,r1];
-+// mthd(0x3410, 1);
-+// send(r3);
-+// send(r4);
-+   MME_INSN(0, STATE, ZERO,  ZERO,  ZERO,          0x3410/4,   NONE,      NONE,
-+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,      NONE),
-+   MME_INSN(1, STATE, ZERO,  ZERO,  ZERO,          0x3414/4,   NONE,      NONE,
-+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,      NONE),
-+   MME_INSN(0,   ADD,   R3,    R3,    R1,  (1<<12)|0x3410/4, IMMED0,      ALU0,
-+                ADDC,   R4,    R4,    R2,                 0,   NONE,      ALU1),
-+};
-+
-+uint32_t mmec597_compute_counter_to_query[] = {
-+// r1 = load();
-+// r3 = read(0x3410);
-+// r2 = load();
-+// r4 = read(0x3414);
-+// [r2,r1] = [r2,r1] + [r4,r3];
-+// mthd(0x1b00, 1);
-+// r3 = load();
-+// send(r3);
-+// r4 = load();
-+// send(r4);
-+// send(r1);
-+// send(0x10000000);
-+   MME_INSN(0,   ADD,   R1, LOAD0,  ZERO,                 0,   NONE,      NONE,
-+               STATE,   R3, IMMED,  ZERO,          0x3410/4,   NONE,      NONE),
-+   MME_INSN(0,   ADD,   R2, LOAD0,  ZERO,                 0,   NONE,      NONE,
-+               STATE,   R4, IMMED,  ZERO,          0x3414/4,   NONE,      NONE),
-+   MME_INSN(0,   ADD,   R1,    R1,    R3,  (1<<12)|0x1b00/4, IMMED0,      NONE,
-+                ADDC,   R2,    R2,    R4,                 0,   NONE,      NONE),
-+   MME_INSN(0,   ADD,   R3, LOAD0,  ZERO,                 0,   NONE,      ALU0,
-+                 ADD,   R4, LOAD1,  ZERO,                 0,   NONE,      ALU1),
-+   MME_INSN(0,   ADD, ZERO,    R1,  ZERO,                 0,   NONE,      ALU0,
-+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,      NONE),
-+   MME_INSN(0,   ADD, ZERO,  ZERO,  ZERO,            0x1000,   NONE, IMMED32_0,
-+                 ADD, ZERO,  ZERO,  ZERO,            0x0000,   NONE,      NONE),
-+// [r3,r4] = [r3,r4] + 4;
-+// mthd(0x1b00, 1);
-+// send(r3);
-+// send(r4);
-+// send(r2);
-+// send(0x10000000);
-+   MME_INSN(0,   ADD, ZERO,    R4, IMMED,                 4, IMMED1,      ALU1,
-+                ADDC, ZERO,    R3,  ZERO,  (1<<12)|0x1b00/4,   NONE,      ALU0),
-+   MME_INSN(1,   ADD, ZERO,    R2,  ZERO,                 0,   NONE,      ALU0,
-+                 ADD, ZERO,  ZERO,  ZERO,                 0,   NONE,      NONE),
-+   MME_INSN(0,   ADD, ZERO,  ZERO,  ZERO,            0x1000,   NONE, IMMED32_0,
-+                 ADD, ZERO,  ZERO,  ZERO,            0x0000,   NONE,      NONE),
-+};
-diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h b/src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h
-index 221bab3105b..539bdc75022 100644
---- a/src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h
-+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h
-@@ -157,6 +157,12 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #define NVC0_3D_UNK0220__ESIZE					0x00000004
- #define NVC0_3D_UNK0220__LEN					0x00000028
- 
-+#define TU102_3D_INDEX_ARRAY_LIMIT_HIGH				0x00000238
-+
-+#define TU102_3D_INDEX_ARRAY_LIMIT_LOW				0x0000023c
-+
-+#define TU102_3D_SET_COLOR_RENDER_TO_ZETA_SURFACE		0x000002b8
-+
- #define NVC0_3D_UNK02C0					0x000002c0
- 
- #define NVC0_3D_UNK02C4					0x000002c4
-@@ -278,6 +284,9 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #define NVC0_3D_UNK0400__ESIZE					0x00000004
- #define NVC0_3D_UNK0400__LEN					0x000000c0
- 
-+#define TU102_3D_VERTEX_ARRAY_LIMIT_HIGH(i0)		       (0x00000600 + 0x8*(i0))
-+#define TU102_3D_VERTEX_ARRAY_LIMIT_LOW(i0)		       (0x00000604 + 0x8*(i0))
-+
- #define NVC0_3D_TFB_STREAM(i0)				       (0x00000700 + 0x10*(i0))
- #define NVC0_3D_TFB_STREAM__ESIZE				0x00000010
- #define NVC0_3D_TFB_STREAM__LEN				0x00000004
-@@ -1787,6 +1796,9 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- #define NVC0_3D_SP_UNK14__ESIZE				0x00000004
- #define NVC0_3D_SP_UNK14__LEN					0x00000004
- 
-+#define GV100_3D_SP_ADDRESS_HIGH(i0)			       (0x00002014 + 0x40*(i0))
-+#define GV100_3D_SP_ADDRESS_LOW(i0)			       (0x00002018 + 0x40*(i0))
-+
- #define NVC0_3D_TEX_LIMITS(i0)				       (0x00002200 + 0x10*(i0))
- #define NVC0_3D_TEX_LIMITS__ESIZE				0x00000010
- #define NVC0_3D_TEX_LIMITS__LEN				0x00000005
-diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c b/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c
-index c897e4e8b97..69131fa22d3 100644
---- a/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c
-+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c
-@@ -37,6 +37,55 @@ nvc0_tex_choose_tile_dims(unsigned nx, unsigned ny, unsigned nz, bool is_3d)
-    return nv50_tex_choose_tile_dims_helper(nx, ny, nz, is_3d);
- }
- 
-+static uint32_t
-+tu102_mt_choose_storage_type(struct nv50_miptree *mt, bool compressed)
-+{
-+   uint32_t kind;
-+
-+   if (unlikely(mt->base.base.bind & PIPE_BIND_CURSOR))
-+      return 0;
-+   if (unlikely(mt->base.base.flags & NOUVEAU_RESOURCE_FLAG_LINEAR))
-+      return 0;
-+
-+   switch (mt->base.base.format) {
-+   case PIPE_FORMAT_Z16_UNORM:
-+      if (compressed)
-+         kind = 0x0b; // NV_MMU_PTE_KIND_Z16_COMPRESSIBLE_DISABLE_PLC
-+      else
-+         kind = 0x01; // NV_MMU_PTE_KIND_Z16
-+      break;
-+   case PIPE_FORMAT_X8Z24_UNORM:
-+   case PIPE_FORMAT_S8X24_UINT:
-+   case PIPE_FORMAT_S8_UINT_Z24_UNORM:
-+      if (compressed)
-+         kind = 0x0e; // NV_MMU_PTE_KIND_Z24S8_COMPRESSIBLE_DISABLE_PLC
-+      else
-+         kind = 0x05; // NV_MMU_PTE_KIND_Z24S8
-+      break;
-+   case PIPE_FORMAT_X24S8_UINT:
-+   case PIPE_FORMAT_Z24X8_UNORM:
-+   case PIPE_FORMAT_Z24_UNORM_S8_UINT:
-+      if (compressed)
-+         kind = 0x0c; // NV_MMU_PTE_KIND_S8Z24_COMPRESSIBLE_DISABLE_PLC
-+      else
-+         kind = 0x03; // NV_MMU_PTE_KIND_S8Z24
-+      break;
-+   case PIPE_FORMAT_X32_S8X24_UINT:
-+   case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
-+      if (compressed)
-+         kind = 0x0d; // NV_MMU_PTE_KIND_ZF32_X24S8_COMPRESSIBLE_DISABLE_PLC
-+      else
-+         kind = 0x04; // NV_MMU_PTE_KIND_ZF32_X24S8
-+      break;
-+   case PIPE_FORMAT_Z32_FLOAT:
-+   default:
-+      kind = 0x06;
-+      break;
-+   }
-+
-+   return kind;
-+}
-+
- static uint32_t
- nvc0_mt_choose_storage_type(struct nv50_miptree *mt, bool compressed)
- {
-@@ -357,7 +406,10 @@ nvc0_miptree_create(struct pipe_screen *pscreen,
-    if (pt->bind & PIPE_BIND_LINEAR)
-       pt->flags |= NOUVEAU_RESOURCE_FLAG_LINEAR;
- 
--   bo_config.nvc0.memtype = nvc0_mt_choose_storage_type(mt, compressed);
-+   if (dev->chipset < 0x160)
-+      bo_config.nvc0.memtype = nvc0_mt_choose_storage_type(mt, compressed);
-+   else
-+      bo_config.nvc0.memtype = tu102_mt_choose_storage_type(mt, compressed);
- 
-    if (!nvc0_miptree_init_ms_mode(mt)) {
-       FREE(mt);
-diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
-index 32aa82d168c..d2b2de47c8d 100644
---- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
-+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
-@@ -645,7 +645,10 @@ nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset,
-    prog->code_size = info->bin.codeSize;
-    prog->relocs = info->bin.relocData;
-    prog->fixups = info->bin.fixupData;
--   prog->num_gprs = MAX2(4, (info->bin.maxGPR + 1));
-+   if (info->target >= NVISA_GV100_CHIPSET)
-+      prog->num_gprs = MIN2(info->bin.maxGPR + 5, 256); //XXX: why?
-+   else
-+      prog->num_gprs = MAX2(4, (info->bin.maxGPR + 1));
-    prog->cp.smem_size = info->bin.smemSize;
-    prog->num_barriers = info->numBarriers;
- 
-@@ -734,7 +737,14 @@ nvc0_program_alloc_code(struct nvc0_context *nvc0, struct nvc0_program *prog)
-    struct nvc0_screen *screen = nvc0->screen;
-    const bool is_cp = prog->type == PIPE_SHADER_COMPUTE;
-    int ret;
--   uint32_t size = prog->code_size + (is_cp ? 0 : NVC0_SHADER_HEADER_SIZE);
-+   uint32_t size = prog->code_size;
-+
-+   if (!is_cp) {
-+      if (screen->eng3d->oclass < TU102_3D_CLASS)
-+         size += GF100_SHADER_HEADER_SIZE;
-+      else
-+         size += TU102_SHADER_HEADER_SIZE;
-+   }
- 
-    /* On Fermi, SP_START_ID must be aligned to 0x40.
-     * On Kepler, the first instruction must be aligned to 0x80 because
-@@ -750,7 +760,8 @@ nvc0_program_alloc_code(struct nvc0_context *nvc0, struct nvc0_program *prog)
-    prog->code_base = prog->mem->start;
- 
-    if (!is_cp) {
--      if (screen->base.class_3d >= NVE4_3D_CLASS) {
-+      if (screen->base.class_3d >= NVE4_3D_CLASS &&
-+          screen->base.class_3d < TU102_3D_CLASS) {
-          switch (prog->mem->start & 0xff) {
-          case 0x40: prog->code_base += 0x70; break;
-          case 0x80: prog->code_base += 0x30; break;
-@@ -777,7 +788,16 @@ nvc0_program_upload_code(struct nvc0_context *nvc0, struct nvc0_program *prog)
- {
-    struct nvc0_screen *screen = nvc0->screen;
-    const bool is_cp = prog->type == PIPE_SHADER_COMPUTE;
--   uint32_t code_pos = prog->code_base + (is_cp ? 0 : NVC0_SHADER_HEADER_SIZE);
-+   uint32_t code_pos = prog->code_base;
-+   uint32_t size_sph = 0;
-+
-+   if (!is_cp) {
-+      if (screen->eng3d->oclass < TU102_3D_CLASS)
-+         size_sph = GF100_SHADER_HEADER_SIZE;
-+      else
-+         size_sph = TU102_SHADER_HEADER_SIZE;
-+   }
-+   code_pos += size_sph;
- 
-    if (prog->relocs)
-       nv50_ir_relocate_code(prog->relocs, prog->code, code_pos,
-@@ -803,8 +823,7 @@ nvc0_program_upload_code(struct nvc0_context *nvc0, struct nvc0_program *prog)
- 
-    if (!is_cp)
-       nvc0->base.push_data(&nvc0->base, screen->text, prog->code_base,
--                           NV_VRAM_DOMAIN(&screen->base),
--                           NVC0_SHADER_HEADER_SIZE, prog->hdr);
-+                           NV_VRAM_DOMAIN(&screen->base), size_sph, prog->hdr);
- 
-    nvc0->base.push_data(&nvc0->base, screen->text, code_pos,
-                         NV_VRAM_DOMAIN(&screen->base), prog->code_size,
-@@ -817,7 +836,14 @@ nvc0_program_upload(struct nvc0_context *nvc0, struct nvc0_program *prog)
-    struct nvc0_screen *screen = nvc0->screen;
-    const bool is_cp = prog->type == PIPE_SHADER_COMPUTE;
-    int ret;
--   uint32_t size = prog->code_size + (is_cp ? 0 : NVC0_SHADER_HEADER_SIZE);
-+   uint32_t size = prog->code_size;
-+
-+   if (!is_cp) {
-+      if (screen->eng3d->oclass < TU102_3D_CLASS)
-+         size += GF100_SHADER_HEADER_SIZE;
-+      else
-+         size += TU102_SHADER_HEADER_SIZE;
-+   }
- 
-    ret = nvc0_program_alloc_code(nvc0, prog);
-    if (ret) {
-@@ -874,8 +900,7 @@ nvc0_program_upload(struct nvc0_context *nvc0, struct nvc0_program *prog)
-             BEGIN_NVC0(nvc0->base.pushbuf, NVC0_CP(FLUSH), 1);
-             PUSH_DATA (nvc0->base.pushbuf, NVC0_COMPUTE_FLUSH_CODE);
-          } else {
--            BEGIN_NVC0(nvc0->base.pushbuf, NVC0_3D(SP_START_ID(i)), 1);
--            PUSH_DATA (nvc0->base.pushbuf, progs[i]->code_base);
-+            nvc0_program_sp_start_id(nvc0, i, progs[i]);
-          }
-       }
-    }
-@@ -953,7 +978,7 @@ nvc0_program_symbol_offset(const struct nvc0_program *prog, uint32_t label)
-    unsigned base = 0;
-    unsigned i;
-    if (prog->type != PIPE_SHADER_COMPUTE)
--      base = NVC0_SHADER_HEADER_SIZE;
-+      base = GF100_SHADER_HEADER_SIZE;
-    for (i = 0; i < prog->cp.num_syms; ++i)
-       if (syms[i].label == label)
-          return prog->code_base + base + syms[i].offset;
-diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.h b/src/gallium/drivers/nouveau/nvc0/nvc0_program.h
-index 5684207aa54..2c465b342e9 100644
---- a/src/gallium/drivers/nouveau/nvc0/nvc0_program.h
-+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.h
-@@ -15,7 +15,9 @@ struct nvc0_transform_feedback_state {
- };
- 
- 
--#define NVC0_SHADER_HEADER_SIZE (20 * 4)
-+#define GF100_SHADER_HEADER_SIZE (20 * 4)
-+#define TU102_SHADER_HEADER_SIZE (32 * 4)
-+#define NVC0_MAX_SHADER_HEADER_SIZE TU102_SHADER_HEADER_SIZE
- 
- struct nvc0_program {
-    struct pipe_shader_state pipe;
-@@ -30,7 +32,7 @@ struct nvc0_program {
-    unsigned code_size;
-    unsigned parm_size; /* size of non-bindable uniforms (c0[]) */
- 
--   uint32_t hdr[20];
-+   uint32_t hdr[NVC0_MAX_SHADER_HEADER_SIZE/4];
-    uint32_t flags[2];
- 
-    struct {
-@@ -72,4 +74,6 @@ struct nvc0_program {
-    struct nouveau_heap *mem;
- };
- 
-+void
-+nvc0_program_sp_start_id(struct nvc0_context *, int, struct nvc0_program *);
- #endif
-diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
-index 7abbf762af2..07d74ddd50c 100644
---- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
-+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
-@@ -27,15 +27,17 @@
- #include "util/format/u_format_s3tc.h"
- #include "util/u_screen.h"
- #include "pipe/p_screen.h"
--#include "compiler/nir/nir.h"
- 
- #include "nouveau_vp3_video.h"
- 
-+#include "codegen/nv50_ir_driver.h"
-+
- #include "nvc0/nvc0_context.h"
- #include "nvc0/nvc0_screen.h"
- 
- #include "nvc0/mme/com9097.mme.h"
- #include "nvc0/mme/com90c0.mme.h"
-+#include "nvc0/mme/comc597.mme.h"
- 
- #include "nv50/g80_texture.xml.h"
- 
-@@ -443,8 +445,8 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen,
-    case PIPE_SHADER_CAP_PREFERRED_IR:
-       return screen->prefer_nir ? PIPE_SHADER_IR_NIR : PIPE_SHADER_IR_TGSI;
-    case PIPE_SHADER_CAP_SUPPORTED_IRS: {
--      uint32_t irs = 1 << PIPE_SHADER_IR_TGSI |
--                     1 << PIPE_SHADER_IR_NIR;
-+      uint32_t irs = 1 << PIPE_SHADER_IR_NIR |
-+         ((class_3d >= GV100_3D_CLASS) ? 0 : 1 << PIPE_SHADER_IR_TGSI);
-       if (screen->force_enable_cl)
-          irs |= 1 << PIPE_SHADER_IR_NIR_SERIALIZED;
-       return irs;
-@@ -467,6 +469,14 @@ nvc0_screen_get_shader_param(struct pipe_screen *pscreen,
-    case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
-       return shader != PIPE_SHADER_FRAGMENT;
-    case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
-+      /* HW doesn't support indirect addressing of fragment program inputs
-+       * on Volta.  The binary driver generates a function to handle every
-+       * possible indirection, and indirectly calls the function to handle
-+       * this instead.
-+       */
-+      if (class_3d >= GV100_3D_CLASS)
-+         return shader != PIPE_SHADER_FRAGMENT;
-+      return 1;
-    case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
-    case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
-       return 1;
-@@ -717,6 +727,26 @@ nvc0_graph_set_macro(struct nvc0_screen *screen, uint32_t m, unsigned pos,
-    return pos + size;
- }
- 
-+static int
-+tu102_graph_set_macro(struct nvc0_screen *screen, uint32_t m, unsigned pos,
-+                     unsigned size, const uint32_t *data)
-+{
-+   struct nouveau_pushbuf *push = screen->base.pushbuf;
-+
-+   size /= 4;
-+
-+   assert((pos + size) <= 0x800);
-+
-+   BEGIN_NVC0(push, SUBC_3D(NVC0_GRAPH_MACRO_ID), 2);
-+   PUSH_DATA (push, (m - 0x3800) / 8);
-+   PUSH_DATA (push, pos);
-+   BEGIN_1IC0(push, SUBC_3D(NVC0_GRAPH_MACRO_UPLOAD_POS), size + 1);
-+   PUSH_DATA (push, pos);
-+   PUSH_DATAp(push, data, size);
-+
-+   return pos + (size / 3);
-+}
-+
- static void
- nvc0_magic_3d_init(struct nouveau_pushbuf *push, uint16_t obj_class)
- {
-@@ -728,8 +758,10 @@ nvc0_magic_3d_init(struct nouveau_pushbuf *push, uint16_t obj_class)
-    BEGIN_NVC0(push, SUBC_3D(0x10ec), 2);
-    PUSH_DATA (push, 0xff);
-    PUSH_DATA (push, 0xff);
--   BEGIN_NVC0(push, SUBC_3D(0x074c), 1);
--   PUSH_DATA (push, 0x3f);
-+   if (obj_class < GV100_3D_CLASS) {
-+      BEGIN_NVC0(push, SUBC_3D(0x074c), 1);
-+      PUSH_DATA (push, 0x3f);
-+   }
- 
-    BEGIN_NVC0(push, SUBC_3D(0x16a8), 1);
-    PUSH_DATA (push, (3 << 16) | 3);
-@@ -761,8 +793,10 @@ nvc0_magic_3d_init(struct nouveau_pushbuf *push, uint16_t obj_class)
-    BEGIN_NVC0(push, SUBC_3D(0x0300), 1);
-    PUSH_DATA (push, 3);
- 
--   BEGIN_NVC0(push, SUBC_3D(0x02d0), 1);
--   PUSH_DATA (push, 0x3fffff);
-+   if (obj_class < GV100_3D_CLASS) {
-+      BEGIN_NVC0(push, SUBC_3D(0x02d0), 1);
-+      PUSH_DATA (push, 0x3fffff);
-+   }
-    BEGIN_NVC0(push, SUBC_3D(0x0fdc), 1);
-    PUSH_DATA (push, 1);
-    BEGIN_NVC0(push, SUBC_3D(0x19c0), 1);
-@@ -822,6 +856,8 @@ nvc0_screen_init_compute(struct nvc0_screen *screen)
-    case 0x110:
-    case 0x120:
-    case 0x130:
-+   case 0x140:
-+   case 0x160:
-       return nve4_screen_compute_setup(screen, screen->base.pushbuf);
-    default:
-       return -1;
-@@ -893,13 +929,15 @@ nvc0_screen_resize_text_area(struct nvc0_screen *screen, uint64_t size)
-    nouveau_heap_init(&screen->text_heap, 0, size - 0x100);
- 
-    /* update the code segment setup */
--   BEGIN_NVC0(push, NVC0_3D(CODE_ADDRESS_HIGH), 2);
--   PUSH_DATAh(push, screen->text->offset);
--   PUSH_DATA (push, screen->text->offset);
--   if (screen->compute) {
--      BEGIN_NVC0(push, NVC0_CP(CODE_ADDRESS_HIGH), 2);
-+   if (screen->eng3d->oclass < GV100_3D_CLASS) {
-+      BEGIN_NVC0(push, NVC0_3D(CODE_ADDRESS_HIGH), 2);
-       PUSH_DATAh(push, screen->text->offset);
-       PUSH_DATA (push, screen->text->offset);
-+      if (screen->compute) {
-+         BEGIN_NVC0(push, NVC0_CP(CODE_ADDRESS_HIGH), 2);
-+         PUSH_DATAh(push, screen->text->offset);
-+         PUSH_DATA (push, screen->text->offset);
-+      }
-    }
- 
-    return 0;
-@@ -939,74 +977,14 @@ nvc0_screen_bind_cb_3d(struct nvc0_screen *screen, bool *can_serialize,
-    IMMED_NVC0(push, NVC0_3D(CB_BIND(stage)), (index << 4) | (size >= 0));
- }
- 
--static const nir_shader_compiler_options nir_options = {
--   .lower_fdiv = false,
--   .lower_ffma = false,
--   .fuse_ffma = false, /* nir doesn't track mad vs fma */
--   .lower_flrp32 = true,
--   .lower_flrp64 = true,
--   .lower_fpow = false,
--   .lower_fsat = false,
--   .lower_fsqrt = false, // TODO: only before gm200
--   .lower_fmod = true,
--   .lower_bitfield_extract = false,
--   .lower_bitfield_extract_to_shifts = false,
--   .lower_bitfield_insert = false,
--   .lower_bitfield_insert_to_shifts = false,
--   .lower_bitfield_reverse = false,
--   .lower_bit_count = false,
--   .lower_ifind_msb = false,
--   .lower_find_lsb = false,
--   .lower_uadd_carry = true, // TODO
--   .lower_usub_borrow = true, // TODO
--   .lower_mul_high = false,
--   .lower_negate = false,
--   .lower_sub = true,
--   .lower_scmp = true, // TODO: not implemented yet
--   .lower_idiv = true,
--   .lower_isign = false, // TODO
--   .fdot_replicates = false, // TODO
--   .lower_ffloor = false, // TODO
--   .lower_ffract = true,
--   .lower_fceil = false, // TODO
--   .lower_ldexp = true,
--   .lower_pack_half_2x16 = true,
--   .lower_pack_unorm_2x16 = true,
--   .lower_pack_snorm_2x16 = true,
--   .lower_pack_unorm_4x8 = true,
--   .lower_pack_snorm_4x8 = true,
--   .lower_unpack_half_2x16 = true,
--   .lower_unpack_unorm_2x16 = true,
--   .lower_unpack_snorm_2x16 = true,
--   .lower_unpack_unorm_4x8 = true,
--   .lower_unpack_snorm_4x8 = true,
--   .lower_extract_byte = true,
--   .lower_extract_word = true,
--   .lower_all_io_to_temps = false,
--   .vertex_id_zero_based = false,
--   .lower_base_vertex = false,
--   .lower_helper_invocation = false,
--   .lower_cs_local_index_from_id = true,
--   .lower_cs_local_id_from_index = false,
--   .lower_device_index_to_zero = false, // TODO
--   .lower_wpos_pntc = false, // TODO
--   .lower_hadd = true, // TODO
--   .lower_add_sat = true, // TODO
--   .use_interpolated_input_intrinsics = true,
--   .lower_mul_2x32_64 = true, // TODO
--   .max_unroll_iterations = 32,
--   .lower_int64_options = nir_lower_ufind_msb64|nir_lower_divmod64, // TODO
--   .lower_doubles_options = nir_lower_dmod, // TODO
--   .lower_to_scalar = true,
--};
--
- static const void *
- nvc0_screen_get_compiler_options(struct pipe_screen *pscreen,
-                                  enum pipe_shader_ir ir,
-                                  enum pipe_shader_type shader)
- {
-+   struct nvc0_screen *screen = nvc0_screen(pscreen);
-    if (ir == PIPE_SHADER_IR_NIR)
--      return &nir_options;
-+      return nv50_ir_nir_shader_compiler_options(screen->base.device->chipset);
-    return NULL;
- }
- 
-@@ -1038,6 +1016,8 @@ nvc0_screen_create(struct nouveau_device *dev)
-    case 0x110:
-    case 0x120:
-    case 0x130:
-+   case 0x140:
-+   case 0x160:
-       break;
-    default:
-       return NULL;
-@@ -1104,16 +1084,19 @@ nvc0_screen_create(struct nouveau_device *dev)
-    screen->base.fence.emit = nvc0_screen_fence_emit;
-    screen->base.fence.update = nvc0_screen_fence_update;
- 
-+   if (dev->chipset < 0x140) {
-+      ret = nouveau_object_new(chan, (dev->chipset < 0xe0) ? 0x1f906e : 0x906e,
-+                               NVIF_CLASS_SW_GF100, NULL, 0, &screen->nvsw);
-+      if (ret)
-+         FAIL_SCREEN_INIT("Error creating SW object: %d\n", ret);
- 
--   ret = nouveau_object_new(chan, (dev->chipset < 0xe0) ? 0x1f906e : 0x906e,
--                            NVIF_CLASS_SW_GF100, NULL, 0, &screen->nvsw);
--   if (ret)
--      FAIL_SCREEN_INIT("Error creating SW object: %d\n", ret);
--
--   BEGIN_NVC0(push, SUBC_SW(NV01_SUBCHAN_OBJECT), 1);
--   PUSH_DATA (push, screen->nvsw->handle);
-+      BEGIN_NVC0(push, SUBC_SW(NV01_SUBCHAN_OBJECT), 1);
-+      PUSH_DATA (push, screen->nvsw->handle);
-+   }
- 
-    switch (dev->chipset & ~0xf) {
-+   case 0x160:
-+   case 0x140:
-    case 0x130:
-    case 0x120:
-    case 0x110:
-@@ -1167,6 +1150,12 @@ nvc0_screen_create(struct nouveau_device *dev)
-    PUSH_DATA (push, screen->fence.bo->offset + 16);
- 
-    switch (dev->chipset & ~0xf) {
-+   case 0x160:
-+      obj_class = TU102_3D_CLASS;
-+      break;
-+   case 0x140:
-+      obj_class = GV100_3D_CLASS;
-+      break;
-    case 0x130:
-       switch (dev->chipset) {
-       case 0x130:
-@@ -1414,25 +1403,47 @@ nvc0_screen_create(struct nouveau_device *dev)
-       PUSH_DATA (push, 16384 << 16);
-    }
- 
-+   if (screen->eng3d->oclass < TU102_3D_CLASS) {
- #define MK_MACRO(m, n) i = nvc0_graph_set_macro(screen, m, i, sizeof(n), n);
- 
--   i = 0;
--   MK_MACRO(NVC0_3D_MACRO_VERTEX_ARRAY_PER_INSTANCE, mme9097_per_instance_bf);
--   MK_MACRO(NVC0_3D_MACRO_BLEND_ENABLES, mme9097_blend_enables);
--   MK_MACRO(NVC0_3D_MACRO_VERTEX_ARRAY_SELECT, mme9097_vertex_array_select);
--   MK_MACRO(NVC0_3D_MACRO_TEP_SELECT, mme9097_tep_select);
--   MK_MACRO(NVC0_3D_MACRO_GP_SELECT, mme9097_gp_select);
--   MK_MACRO(NVC0_3D_MACRO_POLYGON_MODE_FRONT, mme9097_poly_mode_front);
--   MK_MACRO(NVC0_3D_MACRO_POLYGON_MODE_BACK, mme9097_poly_mode_back);
--   MK_MACRO(NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT, mme9097_draw_arrays_indirect);
--   MK_MACRO(NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT, mme9097_draw_elts_indirect);
--   MK_MACRO(NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT_COUNT, mme9097_draw_arrays_indirect_count);
--   MK_MACRO(NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT_COUNT, mme9097_draw_elts_indirect_count);
--   MK_MACRO(NVC0_3D_MACRO_QUERY_BUFFER_WRITE, mme9097_query_buffer_write);
--   MK_MACRO(NVC0_3D_MACRO_CONSERVATIVE_RASTER_STATE, mme9097_conservative_raster_state);
--   MK_MACRO(NVC0_3D_MACRO_COMPUTE_COUNTER, mme9097_compute_counter);
--   MK_MACRO(NVC0_3D_MACRO_COMPUTE_COUNTER_TO_QUERY, mme9097_compute_counter_to_query);
--   MK_MACRO(NVC0_CP_MACRO_LAUNCH_GRID_INDIRECT, mme90c0_launch_grid_indirect);
-+      i = 0;
-+      MK_MACRO(NVC0_3D_MACRO_VERTEX_ARRAY_PER_INSTANCE, mme9097_per_instance_bf);
-+      MK_MACRO(NVC0_3D_MACRO_BLEND_ENABLES, mme9097_blend_enables);
-+      MK_MACRO(NVC0_3D_MACRO_VERTEX_ARRAY_SELECT, mme9097_vertex_array_select);
-+      MK_MACRO(NVC0_3D_MACRO_TEP_SELECT, mme9097_tep_select);
-+      MK_MACRO(NVC0_3D_MACRO_GP_SELECT, mme9097_gp_select);
-+      MK_MACRO(NVC0_3D_MACRO_POLYGON_MODE_FRONT, mme9097_poly_mode_front);
-+      MK_MACRO(NVC0_3D_MACRO_POLYGON_MODE_BACK, mme9097_poly_mode_back);
-+      MK_MACRO(NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT, mme9097_draw_arrays_indirect);
-+      MK_MACRO(NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT, mme9097_draw_elts_indirect);
-+      MK_MACRO(NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT_COUNT, mme9097_draw_arrays_indirect_count);
-+      MK_MACRO(NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT_COUNT, mme9097_draw_elts_indirect_count);
-+      MK_MACRO(NVC0_3D_MACRO_QUERY_BUFFER_WRITE, mme9097_query_buffer_write);
-+      MK_MACRO(NVC0_3D_MACRO_CONSERVATIVE_RASTER_STATE, mme9097_conservative_raster_state);
-+      MK_MACRO(NVC0_3D_MACRO_COMPUTE_COUNTER, mme9097_compute_counter);
-+      MK_MACRO(NVC0_3D_MACRO_COMPUTE_COUNTER_TO_QUERY, mme9097_compute_counter_to_query);
-+      MK_MACRO(NVC0_CP_MACRO_LAUNCH_GRID_INDIRECT, mme90c0_launch_grid_indirect);
-+   } else {
-+#undef MK_MACRO
-+#define MK_MACRO(m, n) i = tu102_graph_set_macro(screen, m, i, sizeof(n), n);
-+
-+      i = 0;
-+      MK_MACRO(NVC0_3D_MACRO_VERTEX_ARRAY_PER_INSTANCE, mmec597_per_instance_bf);
-+      MK_MACRO(NVC0_3D_MACRO_BLEND_ENABLES, mmec597_blend_enables);
-+      MK_MACRO(NVC0_3D_MACRO_VERTEX_ARRAY_SELECT, mmec597_vertex_array_select);
-+      MK_MACRO(NVC0_3D_MACRO_TEP_SELECT, mmec597_tep_select);
-+      MK_MACRO(NVC0_3D_MACRO_GP_SELECT, mmec597_gp_select);
-+      MK_MACRO(NVC0_3D_MACRO_POLYGON_MODE_FRONT, mmec597_poly_mode_front);
-+      MK_MACRO(NVC0_3D_MACRO_POLYGON_MODE_BACK, mmec597_poly_mode_back);
-+      MK_MACRO(NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT, mmec597_draw_arrays_indirect);
-+      MK_MACRO(NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT, mmec597_draw_elts_indirect);
-+      MK_MACRO(NVC0_3D_MACRO_DRAW_ARRAYS_INDIRECT_COUNT, mmec597_draw_arrays_indirect_count);
-+      MK_MACRO(NVC0_3D_MACRO_DRAW_ELEMENTS_INDIRECT_COUNT, mmec597_draw_elts_indirect_count);
-+      MK_MACRO(NVC0_3D_MACRO_QUERY_BUFFER_WRITE, mmec597_query_buffer_write);
-+      MK_MACRO(NVC0_3D_MACRO_CONSERVATIVE_RASTER_STATE, mmec597_conservative_raster_state);
-+      MK_MACRO(NVC0_3D_MACRO_COMPUTE_COUNTER, mmec597_compute_counter);
-+      MK_MACRO(NVC0_3D_MACRO_COMPUTE_COUNTER_TO_QUERY, mmec597_compute_counter_to_query);
-+   }
- 
-    BEGIN_NVC0(push, NVC0_3D(RASTERIZE_ENABLE), 1);
-    PUSH_DATA (push, 1);
-diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c
-index b7e0c8a930f..490026b2c00 100644
---- a/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c
-+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c
-@@ -64,6 +64,22 @@ nvc0_program_validate(struct nvc0_context *nvc0, struct nvc0_program *prog)
-    return true; /* stream output info only */
- }
- 
-+void
-+nvc0_program_sp_start_id(struct nvc0_context *nvc0, int stage,
-+                         struct nvc0_program *prog)
-+{
-+   struct nouveau_pushbuf *push = nvc0->base.pushbuf;
-+
-+   if (nvc0->screen->eng3d->oclass < GV100_3D_CLASS) {
-+      BEGIN_NVC0(push, NVC0_3D(SP_START_ID(stage)), 1);
-+      PUSH_DATA (push, prog->code_base);
-+   } else {
-+      BEGIN_NVC0(push, SUBC_3D(GV100_3D_SP_ADDRESS_HIGH(stage)), 2);
-+      PUSH_DATAh(push, nvc0->screen->text->offset + prog->code_base);
-+      PUSH_DATA (push, nvc0->screen->text->offset + prog->code_base);
-+   }
-+}
-+
- void
- nvc0_vertprog_validate(struct nvc0_context *nvc0)
- {
-@@ -74,9 +90,9 @@ nvc0_vertprog_validate(struct nvc0_context *nvc0)
-          return;
-    nvc0_program_update_context_state(nvc0, vp, 0);
- 
--   BEGIN_NVC0(push, NVC0_3D(SP_SELECT(1)), 2);
-+   BEGIN_NVC0(push, NVC0_3D(SP_SELECT(1)), 1);
-    PUSH_DATA (push, 0x11);
--   PUSH_DATA (push, vp->code_base);
-+   nvc0_program_sp_start_id(nvc0, 1, vp);
-    BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(1)), 1);
-    PUSH_DATA (push, vp->num_gprs);
- 
-@@ -152,9 +168,9 @@ nvc0_fragprog_validate(struct nvc0_context *nvc0)
-                  fp->fp.post_depth_coverage);
-    }
- 
--   BEGIN_NVC0(push, NVC0_3D(SP_SELECT(5)), 2);
-+   BEGIN_NVC0(push, NVC0_3D(SP_SELECT(5)), 1);
-    PUSH_DATA (push, 0x51);
--   PUSH_DATA (push, fp->code_base);
-+   nvc0_program_sp_start_id(nvc0, 5, fp);
-    BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(5)), 1);
-    PUSH_DATA (push, fp->num_gprs);
- 
-@@ -176,9 +192,9 @@ nvc0_tctlprog_validate(struct nvc0_context *nvc0)
-          BEGIN_NVC0(push, NVC0_3D(TESS_MODE), 1);
-          PUSH_DATA (push, tp->tp.tess_mode);
-       }
--      BEGIN_NVC0(push, NVC0_3D(SP_SELECT(2)), 2);
-+      BEGIN_NVC0(push, NVC0_3D(SP_SELECT(2)), 1);
-       PUSH_DATA (push, 0x21);
--      PUSH_DATA (push, tp->code_base);
-+      nvc0_program_sp_start_id(nvc0, 2, tp);
-       BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(2)), 1);
-       PUSH_DATA (push, tp->num_gprs);
-    } else {
-@@ -186,9 +202,9 @@ nvc0_tctlprog_validate(struct nvc0_context *nvc0)
-       /* not a whole lot we can do to handle this failure */
-       if (!nvc0_program_validate(nvc0, tp))
-          assert(!"unable to validate empty tcp");
--      BEGIN_NVC0(push, NVC0_3D(SP_SELECT(2)), 2);
-+      BEGIN_NVC0(push, NVC0_3D(SP_SELECT(2)), 1);
-       PUSH_DATA (push, 0x20);
--      PUSH_DATA (push, tp->code_base);
-+      nvc0_program_sp_start_id(nvc0, 2, tp);
-    }
-    nvc0_program_update_context_state(nvc0, tp, 1);
- }
-@@ -206,8 +222,7 @@ nvc0_tevlprog_validate(struct nvc0_context *nvc0)
-       }
-       BEGIN_NVC0(push, NVC0_3D(MACRO_TEP_SELECT), 1);
-       PUSH_DATA (push, 0x31);
--      BEGIN_NVC0(push, NVC0_3D(SP_START_ID(3)), 1);
--      PUSH_DATA (push, tp->code_base);
-+      nvc0_program_sp_start_id(nvc0, 3, tp);
-       BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(3)), 1);
-       PUSH_DATA (push, tp->num_gprs);
-    } else {
-@@ -227,8 +242,7 @@ nvc0_gmtyprog_validate(struct nvc0_context *nvc0)
-    if (gp && nvc0_program_validate(nvc0, gp) && gp->code_size) {
-       BEGIN_NVC0(push, NVC0_3D(MACRO_GP_SELECT), 1);
-       PUSH_DATA (push, 0x41);
--      BEGIN_NVC0(push, NVC0_3D(SP_START_ID(4)), 1);
--      PUSH_DATA (push, gp->code_base);
-+      nvc0_program_sp_start_id(nvc0, 4, gp);
-       BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(4)), 1);
-       PUSH_DATA (push, gp->num_gprs);
-    } else {
-diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
-index 538effdb531..731b0b5dbf8 100644
---- a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
-+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
-@@ -29,6 +29,8 @@
- #include "util/format/u_format.h"
- #include "util/u_surface.h"
- 
-+#include "tgsi/tgsi_ureg.h"
-+
- #include "os/os_thread.h"
- 
- #include "nvc0/nvc0_context.h"
-@@ -138,6 +140,11 @@ nvc0_2d_texture_set(struct nouveau_pushbuf *push, bool dst,
-       PUSH_DATA (push, bo->offset + offset);
-    }
- 
-+   if (dst) {
-+      IMMED_NVC0(push, SUBC_2D(NVC0_2D_SET_DST_COLOR_RENDER_TO_ZETA_SURFACE),
-+                 util_format_is_depth_or_stencil(pformat));
-+   }
-+
- #if 0
-    if (dst) {
-       BEGIN_NVC0(push, SUBC_2D(NVC0_2D_CLIP_X), 4);
-@@ -772,7 +779,7 @@ gm200_evaluate_depth_buffer(struct pipe_context *pipe)
- struct nvc0_blitter
- {
-    struct nvc0_program *fp[NV50_BLIT_MAX_TEXTURE_TYPES][NV50_BLIT_MODES];
--   struct nvc0_program vp;
-+   struct nvc0_program *vp;
- 
-    struct nv50_tsc_entry sampler[2]; /* nearest, bilinear */
- 
-@@ -785,6 +792,7 @@ struct nvc0_blitctx
- {
-    struct nvc0_context *nvc0;
-    struct nvc0_program *fp;
-+   struct nvc0_program *vp;
-    uint8_t mode;
-    uint16_t color_mask;
-    uint8_t filter;
-@@ -809,78 +817,27 @@ struct nvc0_blitctx
-    struct nvc0_rasterizer_stateobj rast;
- };
- 
--static void
--nvc0_blitter_make_vp(struct nvc0_blitter *blit)
-+static void *
-+nvc0_blitter_make_vp(struct pipe_context *pipe)
- {
--   static const uint32_t code_nvc0[] =
--   {
--      0xfff11c26, 0x06000080, /* vfetch b64 $r4:$r5 a[0x80] */
--      0xfff01c46, 0x06000090, /* vfetch b96 $r0:$r1:$r2 a[0x90] */
--      0x13f01c26, 0x0a7e0070, /* export b64 o[0x70] $r4:$r5 */
--      0x03f01c46, 0x0a7e0080, /* export b96 o[0x80] $r0:$r1:$r2 */
--      0x00001de7, 0x80000000, /* exit */
--   };
--   static const uint32_t code_nve4[] =
--   {
--      0x00000007, 0x20000000, /* sched */
--      0xfff11c26, 0x06000080, /* vfetch b64 $r4:$r5 a[0x80] */
--      0xfff01c46, 0x06000090, /* vfetch b96 $r0:$r1:$r2 a[0x90] */
--      0x13f01c26, 0x0a7e0070, /* export b64 o[0x70] $r4:$r5 */
--      0x03f01c46, 0x0a7e0080, /* export b96 o[0x80] $r0:$r1:$r2 */
--      0x00001de7, 0x80000000, /* exit */
--   };
--   static const uint32_t code_gk110[] =
--   {
--      0x00000000, 0x08000000, /* sched */
--      0x401ffc12, 0x7ec7fc00, /* ld b64 $r4d a[0x80] 0x0 0x0 */
--      0x481ffc02, 0x7ecbfc00, /* ld b96 $r0t a[0x90] 0x0 0x0 */
--      0x381ffc12, 0x7f07fc00, /* st b64 a[0x70] $r4d 0x0 0x0 */
--      0x401ffc02, 0x7f0bfc00, /* st b96 a[0x80] $r0t 0x0 0x0 */
--      0x001c003c, 0x18000000, /* exit */
--   };
--   static const uint32_t code_gm107[] =
--   {
--      0xe4200701, 0x001d0400, /* sched (st 0x1 wr 0x0) (st 0x1 wr 0x1) (st 0x1 wr 0x2) */
--      0x0807ff00, 0xefd87f80, /* ld b32 $r0 a[0x80] 0x0 */
--      0x0847ff01, 0xefd87f80, /* ld b32 $r1 a[0x84] 0x0 */
--      0x0907ff02, 0xefd87f80, /* ld b32 $r2 a[0x90] 0x0 */
--      0xf0200761, 0x003f8400, /* sched (st 0x1 wr 0x3) (st 0x1 wr 0x4) (st 0x1 wt 0x1) */
--      0x0947ff03, 0xefd87f80, /* ld b32 $r3 a[0x94] 0x0 */
--      0x0987ff04, 0xefd87f80, /* ld b32 $r4 a[0x98] 0x0 */
--      0x0707ff00, 0xeff07f80, /* st b32 a[0x70] $r0 0x0 */
--      0xfc2017e1, 0x011f8404, /* sched (st 0x1 wt 0x2) (st 0x1 wt 0x4) (st 0x1 wt 0x8) */
--      0x0747ff01, 0xeff07f80, /* st b32 a[0x74] $r1 0x0 */
--      0x0807ff02, 0xeff07f80, /* st b32 a[0x80] $r2 0x0 */
--      0x0847ff03, 0xeff07f80, /* st b32 a[0x84] $r3 0x0 */
--      0xfde087e1, 0x001f8000, /* sched (st 0x1 wt 0x10) (st 0xf) (st 0x0) */
--      0x0887ff04, 0xeff07f80, /* st b32 a[0x88] $r4 0x0 */
--      0x0007000f, 0xe3000000, /* exit */
--   };
--
--   blit->vp.type = PIPE_SHADER_VERTEX;
--   blit->vp.translated = true;
--   if (blit->screen->base.class_3d >= GM107_3D_CLASS) {
--      blit->vp.code = (uint32_t *)code_gm107; /* const_cast */
--      blit->vp.code_size = sizeof(code_gm107);
--   } else
--   if (blit->screen->base.class_3d >= NVF0_3D_CLASS) {
--      blit->vp.code = (uint32_t *)code_gk110; /* const_cast */
--      blit->vp.code_size = sizeof(code_gk110);
--   } else
--   if (blit->screen->base.class_3d >= NVE4_3D_CLASS) {
--      blit->vp.code = (uint32_t *)code_nve4; /* const_cast */
--      blit->vp.code_size = sizeof(code_nve4);
--   } else {
--      blit->vp.code = (uint32_t *)code_nvc0; /* const_cast */
--      blit->vp.code_size = sizeof(code_nvc0);
--   }
--   blit->vp.num_gprs = 6;
--   blit->vp.vp.edgeflag = PIPE_MAX_ATTRIBS;
-+   struct ureg_program *ureg;
-+   struct ureg_src ipos, itex;
-+   struct ureg_dst opos, otex;
-+
-+   ureg = ureg_create(PIPE_SHADER_VERTEX);
-+   if (!ureg)
-+      return NULL;
-+
-+   opos = ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 0);
-+   ipos = ureg_DECL_vs_input(ureg, 0);
-+   otex = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 0);
-+   itex = ureg_DECL_vs_input(ureg, 1);
-+
-+   ureg_MOV(ureg, ureg_writemask(opos, TGSI_WRITEMASK_XY ), ipos);
-+   ureg_MOV(ureg, ureg_writemask(otex, TGSI_WRITEMASK_XYZ), itex);
-+   ureg_END(ureg);
- 
--   blit->vp.hdr[0]  = 0x00020461; /* vertprog magic */
--   blit->vp.hdr[4]  = 0x000ff000; /* no outputs read */
--   blit->vp.hdr[6]  = 0x00000073; /* a[0x80].xy, a[0x90].xyz */
--   blit->vp.hdr[13] = 0x00073000; /* o[0x70].xy, o[0x80].xyz */
-+   return ureg_create_shader_and_destroy(ureg, pipe);
- }
- 
- static void
-@@ -910,6 +867,20 @@ nvc0_blitter_make_sampler(struct nvc0_blitter *blit)
-       G80_TSC_1_MIP_FILTER_NONE;
- }
- 
-+static void
-+nvc0_blit_select_vp(struct nvc0_blitctx *ctx)
-+{
-+   struct nvc0_blitter *blitter = ctx->nvc0->screen->blitter;
-+
-+   if (!blitter->vp) {
-+      mtx_lock(&blitter->mutex);
-+      if (!blitter->vp)
-+         blitter->vp = nvc0_blitter_make_vp(&ctx->nvc0->base.pipe);
-+      mtx_unlock(&blitter->mutex);
-+   }
-+   ctx->vp = blitter->vp;
-+}
-+
- static void
- nvc0_blit_select_fp(struct nvc0_blitctx *ctx, const struct pipe_blit_info *info)
- {
-@@ -1082,7 +1053,7 @@ nvc0_blitctx_pre_blit(struct nvc0_blitctx *ctx,
- 
-    nvc0->rast = &ctx->rast;
- 
--   nvc0->vertprog = &blitter->vp;
-+   nvc0->vertprog = ctx->vp;
-    nvc0->tctlprog = NULL;
-    nvc0->tevlprog = NULL;
-    nvc0->gmtyprog = NULL;
-@@ -1221,6 +1192,7 @@ nvc0_blit_3d(struct nvc0_context *nvc0, const struct pipe_blit_info *info)
-    blit->filter = nv50_blit_get_filter(info);
-    blit->render_condition_enable = info->render_condition_enable;
- 
-+   nvc0_blit_select_vp(blit);
-    nvc0_blit_select_fp(blit, info);
-    nvc0_blitctx_pre_blit(blit, info);
- 
-@@ -1266,6 +1238,11 @@ nvc0_blit_3d(struct nvc0_context *nvc0, const struct pipe_blit_info *info)
-       }
-    }
- 
-+   if (screen->eng3d->oclass >= TU102_3D_CLASS) {
-+      IMMED_NVC0(push, SUBC_3D(TU102_3D_SET_COLOR_RENDER_TO_ZETA_SURFACE),
-+                 util_format_is_depth_or_stencil(info->dst.format));
-+   }
-+
-    IMMED_NVC0(push, NVC0_3D(VIEWPORT_TRANSFORM_EN), 0);
-    IMMED_NVC0(push, NVC0_3D(VIEW_VOLUME_CLIP_CTRL), 0x2 |
-               NVC0_3D_VIEW_VOLUME_CLIP_CTRL_DEPTH_RANGE_0_1);
-@@ -1326,7 +1303,10 @@ nvc0_blit_3d(struct nvc0_context *nvc0, const struct pipe_blit_info *info)
-    PUSH_DATAh(push, vtxbuf);
-    PUSH_DATA (push, vtxbuf);
-    PUSH_DATA (push, 0);
--   BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(0)), 2);
-+   if (screen->eng3d->oclass < TU102_3D_CLASS)
-+      BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(0)), 2);
-+   else
-+      BEGIN_NVC0(push, SUBC_3D(TU102_3D_VERTEX_ARRAY_LIMIT_HIGH(0)), 2);
-    PUSH_DATAh(push, vtxbuf + length - 1);
-    PUSH_DATA (push, vtxbuf + length - 1);
- 
-@@ -1403,6 +1383,8 @@ nvc0_blit_3d(struct nvc0_context *nvc0, const struct pipe_blit_info *info)
- 
-    /* restore viewport transform */
-    IMMED_NVC0(push, NVC0_3D(VIEWPORT_TRANSFORM_EN), 1);
-+   if (screen->eng3d->oclass >= TU102_3D_CLASS)
-+      IMMED_NVC0(push, SUBC_3D(TU102_3D_SET_COLOR_RENDER_TO_ZETA_SURFACE), 0);
- }
- 
- static void
-@@ -1697,7 +1679,6 @@ nvc0_blitter_create(struct nvc0_screen *screen)
- 
-    (void) mtx_init(&screen->blitter->mutex, mtx_plain);
- 
--   nvc0_blitter_make_vp(screen->blitter);
-    nvc0_blitter_make_sampler(screen->blitter);
- 
-    return true;
-diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
-index 92bd7eb5b8e..8287d8431b1 100644
---- a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
-+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
-@@ -360,7 +360,11 @@ nvc0_validate_vertex_buffers(struct nvc0_context *nvc0)
-          PUSH_DATAh(push, res->address + offset);
-          PUSH_DATA (push, res->address + offset);
-       }
--      BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(i)), 2);
-+
-+      if (nvc0->screen->eng3d->oclass < TU102_3D_CLASS)
-+         BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(i)), 2);
-+      else
-+         BEGIN_NVC0(push, SUBC_3D(TU102_3D_VERTEX_ARRAY_LIMIT_HIGH(i)), 2);
-       PUSH_DATAh(push, res->address + limit);
-       PUSH_DATA (push, res->address + limit);
- 
-@@ -406,7 +410,11 @@ nvc0_validate_vertex_buffers_shared(struct nvc0_context *nvc0)
-       PUSH_DATA (push, NVC0_3D_VERTEX_ARRAY_FETCH_ENABLE | vb->stride);
-       PUSH_DATAh(push, buf->address + offset);
-       PUSH_DATA (push, buf->address + offset);
--      BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(b)), 2);
-+
-+      if (nvc0->screen->eng3d->oclass < TU102_3D_CLASS)
-+         BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(b)), 2);
-+      else
-+         BEGIN_NVC0(push, SUBC_3D(TU102_3D_VERTEX_ARRAY_LIMIT_HIGH(b)), 2);
-       PUSH_DATAh(push, buf->address + limit);
-       PUSH_DATA (push, buf->address + limit);
- 
-@@ -961,12 +969,23 @@ nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
-       assert(nouveau_resource_mapped_by_gpu(&buf->base));
- 
-       PUSH_SPACE(push, 6);
--      BEGIN_NVC0(push, NVC0_3D(INDEX_ARRAY_START_HIGH), 5);
--      PUSH_DATAh(push, buf->address);
--      PUSH_DATA (push, buf->address);
--      PUSH_DATAh(push, buf->address + buf->base.width0 - 1);
--      PUSH_DATA (push, buf->address + buf->base.width0 - 1);
--      PUSH_DATA (push, info->index_size >> 1);
-+      if (nvc0->screen->eng3d->oclass < TU102_3D_CLASS) {
-+         BEGIN_NVC0(push, NVC0_3D(INDEX_ARRAY_START_HIGH), 5);
-+         PUSH_DATAh(push, buf->address);
-+         PUSH_DATA (push, buf->address);
-+         PUSH_DATAh(push, buf->address + buf->base.width0 - 1);
-+         PUSH_DATA (push, buf->address + buf->base.width0 - 1);
-+         PUSH_DATA (push, info->index_size >> 1);
-+      } else {
-+         BEGIN_NVC0(push, NVC0_3D(INDEX_ARRAY_START_HIGH), 2);
-+         PUSH_DATAh(push, buf->address);
-+         PUSH_DATA (push, buf->address);
-+         BEGIN_NVC0(push, SUBC_3D(TU102_3D_INDEX_ARRAY_LIMIT_HIGH), 2);
-+         PUSH_DATAh(push, buf->address + buf->base.width0 - 1);
-+         PUSH_DATA (push, buf->address + buf->base.width0 - 1);
-+         BEGIN_NVC0(push, NVC0_3D(INDEX_FORMAT), 1);
-+         PUSH_DATA (push, info->index_size >> 1);
-+      }
- 
-       BCTX_REFN(nvc0->bufctx_3d, 3D_IDX, buf, RD);
-    }
-diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c
-index 8aa7088dfec..d49a5dfd2cf 100644
---- a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c
-+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c
-@@ -228,7 +228,11 @@ nvc0_push_setup_vertex_array(struct nvc0_context *nvc0, const unsigned count)
-    BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_START_HIGH(0)), 2);
-    PUSH_DATAh(push, va);
-    PUSH_DATA (push, va);
--   BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(0)), 2);
-+
-+   if (nvc0->screen->eng3d->oclass < TU102_3D_CLASS)
-+      BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(0)), 2);
-+   else
-+      BEGIN_NVC0(push, SUBC_3D(TU102_3D_VERTEX_ARRAY_LIMIT_HIGH(0)), 2);
-    PUSH_DATAh(push, va + size - 1);
-    PUSH_DATA (push, va + size - 1);
- 
-@@ -771,7 +775,11 @@ nvc0_push_upload_vertex_ids(struct push_context *ctx,
-    PUSH_DATA (push, NVC0_3D_VERTEX_ARRAY_FETCH_ENABLE | index_size);
-    PUSH_DATAh(push, va);
-    PUSH_DATA (push, va);
--   BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(1)), 2);
-+
-+   if (nvc0->screen->eng3d->oclass < TU102_3D_CLASS)
-+      BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(1)), 2);
-+   else
-+      BEGIN_NVC0(push, SUBC_3D(TU102_3D_VERTEX_ARRAY_LIMIT_HIGH(1)), 2);
-    PUSH_DATAh(push, va + info->count * index_size - 1);
-    PUSH_DATA (push, va + info->count * index_size - 1);
- 
-diff --git a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
-index 146eeb35f85..d4687b652ba 100644
---- a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
-+++ b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
-@@ -27,11 +27,18 @@
- 
- #include "codegen/nv50_ir_driver.h"
- 
--#ifndef NDEBUG
--static void nve4_compute_dump_launch_desc(const struct nve4_cp_launch_desc *);
--static void gp100_compute_dump_launch_desc(const struct gp100_cp_launch_desc *);
--#endif
--
-+#include "drf.h"
-+#include "qmd.h"
-+#include "cla0c0qmd.h"
-+#include "clc0c0qmd.h"
-+#include "clc3c0qmd.h"
-+
-+#define NVA0C0_QMDV00_06_VAL_SET(p,a...) NVVAL_MW_SET((p), NVA0C0, QMDV00_06, ##a)
-+#define NVA0C0_QMDV00_06_DEF_SET(p,a...) NVDEF_MW_SET((p), NVA0C0, QMDV00_06, ##a)
-+#define NVC0C0_QMDV02_01_VAL_SET(p,a...) NVVAL_MW_SET((p), NVC0C0, QMDV02_01, ##a)
-+#define NVC0C0_QMDV02_01_DEF_SET(p,a...) NVDEF_MW_SET((p), NVC0C0, QMDV02_01, ##a)
-+#define NVC3C0_QMDV02_02_VAL_SET(p,a...) NVVAL_MW_SET((p), NVC3C0, QMDV02_02, ##a)
-+#define NVC3C0_QMDV02_02_DEF_SET(p,a...) NVDEF_MW_SET((p), NVC3C0, QMDV02_02, ##a)
- 
- int
- nve4_screen_compute_setup(struct nvc0_screen *screen,
-@@ -45,6 +52,12 @@ nve4_screen_compute_setup(struct nvc0_screen *screen,
-    uint64_t address;
- 
-    switch (dev->chipset & ~0xf) {
-+   case 0x160:
-+      obj_class = TU102_COMPUTE_CLASS;
-+      break;
-+   case 0x140:
-+      obj_class = GV100_COMPUTE_CLASS;
-+      break;
-    case 0x100:
-    case 0xf0:
-       obj_class = NVF0_COMPUTE_CLASS; /* GK110 */
-@@ -88,24 +101,35 @@ nve4_screen_compute_setup(struct nvc0_screen *screen,
-    PUSH_DATAh(push, screen->tls->size / screen->mp_count);
-    PUSH_DATA (push, (screen->tls->size / screen->mp_count) & ~0x7fff);
-    PUSH_DATA (push, 0xff);
--   BEGIN_NVC0(push, NVE4_CP(MP_TEMP_SIZE_HIGH(1)), 3);
--   PUSH_DATAh(push, screen->tls->size / screen->mp_count);
--   PUSH_DATA (push, (screen->tls->size / screen->mp_count) & ~0x7fff);
--   PUSH_DATA (push, 0xff);
-+   if (obj_class < GV100_COMPUTE_CLASS) {
-+      BEGIN_NVC0(push, NVE4_CP(MP_TEMP_SIZE_HIGH(1)), 3);
-+      PUSH_DATAh(push, screen->tls->size / screen->mp_count);
-+      PUSH_DATA (push, (screen->tls->size / screen->mp_count) & ~0x7fff);
-+      PUSH_DATA (push, 0xff);
-+   }
- 
-    /* Unified address space ? Who needs that ? Certainly not OpenCL.
-     *
-     * FATAL: Buffers with addresses inside [0x1000000, 0x3000000] will NOT be
-     *  accessible. We cannot prevent that at the moment, so expect failure.
-     */
--   BEGIN_NVC0(push, NVE4_CP(LOCAL_BASE), 1);
--   PUSH_DATA (push, 0xff << 24);
--   BEGIN_NVC0(push, NVE4_CP(SHARED_BASE), 1);
--   PUSH_DATA (push, 0xfe << 24);
--
--   BEGIN_NVC0(push, NVE4_CP(CODE_ADDRESS_HIGH), 2);
--   PUSH_DATAh(push, screen->text->offset);
--   PUSH_DATA (push, screen->text->offset);
-+   if (obj_class < GV100_COMPUTE_CLASS) {
-+      BEGIN_NVC0(push, NVE4_CP(LOCAL_BASE), 1);
-+      PUSH_DATA (push, 0xff << 24);
-+      BEGIN_NVC0(push, NVE4_CP(SHARED_BASE), 1);
-+      PUSH_DATA (push, 0xfe << 24);
-+
-+      BEGIN_NVC0(push, NVE4_CP(CODE_ADDRESS_HIGH), 2);
-+      PUSH_DATAh(push, screen->text->offset);
-+      PUSH_DATA (push, screen->text->offset);
-+   } else {
-+      BEGIN_NVC0(push, SUBC_CP(0x2a0), 2);
-+      PUSH_DATAh(push, 0xfeULL << 24);
-+      PUSH_DATA (push, 0xfeULL << 24);
-+      BEGIN_NVC0(push, SUBC_CP(0x7b0), 2);
-+      PUSH_DATAh(push, 0xffULL << 24);
-+      PUSH_DATA (push, 0xffULL << 24);
-+   }
- 
-    BEGIN_NVC0(push, SUBC_CP(0x0310), 1);
-    PUSH_DATA (push, (obj_class >= NVF0_COMPUTE_CLASS) ? 0x400 : 0x300);
-@@ -542,14 +566,35 @@ nve4_compute_upload_input(struct nvc0_context *nvc0,
-    PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB);
- }
- 
--static inline uint8_t
--nve4_compute_derive_cache_split(struct nvc0_context *nvc0, uint32_t shared_size)
-+static inline void
-+gp100_cp_launch_desc_set_cb(uint32_t *qmd, unsigned index,
-+                            struct nouveau_bo *bo, uint32_t base, uint32_t size)
-+{
-+   uint64_t address = bo->offset + base;
-+
-+   assert(index < 8);
-+   assert(!(base & 0xff));
-+
-+   NVC0C0_QMDV02_01_VAL_SET(qmd, CONSTANT_BUFFER_ADDR_LOWER, index, address);
-+   NVC0C0_QMDV02_01_VAL_SET(qmd, CONSTANT_BUFFER_ADDR_UPPER, index, address >> 32);
-+   NVC0C0_QMDV02_01_VAL_SET(qmd, CONSTANT_BUFFER_SIZE_SHIFTED4, index,
-+                                 DIV_ROUND_UP(size, 16));
-+   NVC0C0_QMDV02_01_DEF_SET(qmd, CONSTANT_BUFFER_VALID, index, TRUE);
-+}
-+
-+static inline void
-+nve4_cp_launch_desc_set_cb(uint32_t *qmd, unsigned index, struct nouveau_bo *bo,
-+                           uint32_t base, uint32_t size)
- {
--   if (shared_size > (32 << 10))
--      return NVC0_3D_CACHE_SPLIT_48K_SHARED_16K_L1;
--   if (shared_size > (16 << 10))
--      return NVE4_3D_CACHE_SPLIT_32K_SHARED_32K_L1;
--   return NVC1_3D_CACHE_SPLIT_16K_SHARED_48K_L1;
-+   uint64_t address = bo->offset + base;
-+
-+   assert(index < 8);
-+   assert(!(base & 0xff));
-+
-+   NVA0C0_QMDV00_06_VAL_SET(qmd, CONSTANT_BUFFER_ADDR_LOWER, index, address);
-+   NVA0C0_QMDV00_06_VAL_SET(qmd, CONSTANT_BUFFER_ADDR_UPPER, index, address >> 32);
-+   NVA0C0_QMDV00_06_VAL_SET(qmd, CONSTANT_BUFFER_SIZE, index, size);
-+   NVA0C0_QMDV00_06_DEF_SET(qmd, CONSTANT_BUFFER_VALID, index, TRUE);
- }
- 
- static void
-@@ -577,92 +622,182 @@ nve4_compute_setup_buf_cb(struct nvc0_context *nvc0, bool gp100, void *desc)
- }
- 
- static void
--nve4_compute_setup_launch_desc(struct nvc0_context *nvc0,
--                               struct nve4_cp_launch_desc *desc,
-+nve4_compute_setup_launch_desc(struct nvc0_context *nvc0, uint32_t *qmd,
-                                const struct pipe_grid_info *info)
- {
-    const struct nvc0_screen *screen = nvc0->screen;
-    const struct nvc0_program *cp = nvc0->compprog;
- 
--   nve4_cp_launch_desc_init_default(desc);
--
--   desc->entry = nvc0_program_symbol_offset(cp, info->pc);
--
--   desc->griddim_x = info->grid[0];
--   desc->griddim_y = info->grid[1];
--   desc->griddim_z = info->grid[2];
--   desc->blockdim_x = info->block[0];
--   desc->blockdim_y = info->block[1];
--   desc->blockdim_z = info->block[2];
--
--   desc->shared_size = align(cp->cp.smem_size, 0x100);
--   desc->local_size_p = (cp->hdr[1] & 0xfffff0) + align(cp->cp.lmem_size, 0x10);
--   desc->local_size_n = 0;
--   desc->cstack_size = 0x800;
--   desc->cache_split = nve4_compute_derive_cache_split(nvc0, cp->cp.smem_size);
-+   NVA0C0_QMDV00_06_DEF_SET(qmd, INVALIDATE_TEXTURE_HEADER_CACHE, TRUE);
-+   NVA0C0_QMDV00_06_DEF_SET(qmd, INVALIDATE_TEXTURE_SAMPLER_CACHE, TRUE);
-+   NVA0C0_QMDV00_06_DEF_SET(qmd, INVALIDATE_TEXTURE_DATA_CACHE, TRUE);
-+   NVA0C0_QMDV00_06_DEF_SET(qmd, INVALIDATE_SHADER_DATA_CACHE, TRUE);
-+   NVA0C0_QMDV00_06_DEF_SET(qmd, INVALIDATE_SHADER_CONSTANT_CACHE, TRUE);
-+   NVA0C0_QMDV00_06_DEF_SET(qmd, RELEASE_MEMBAR_TYPE, FE_SYSMEMBAR);
-+   NVA0C0_QMDV00_06_DEF_SET(qmd, CWD_MEMBAR_TYPE, L1_SYSMEMBAR);
-+   NVA0C0_QMDV00_06_DEF_SET(qmd, API_VISIBLE_CALL_LIMIT, NO_CHECK);
-+   NVA0C0_QMDV00_06_VAL_SET(qmd, SASS_VERSION, 0x30);
-+
-+   NVA0C0_QMDV00_06_VAL_SET(qmd, PROGRAM_OFFSET,
-+                                 nvc0_program_symbol_offset(cp, info->pc));
-+
-+   NVA0C0_QMDV00_06_VAL_SET(qmd, CTA_RASTER_WIDTH, info->grid[0]);
-+   NVA0C0_QMDV00_06_VAL_SET(qmd, CTA_RASTER_HEIGHT, info->grid[1]);
-+   NVA0C0_QMDV00_06_VAL_SET(qmd, CTA_RASTER_DEPTH, info->grid[2]);
-+   NVA0C0_QMDV00_06_VAL_SET(qmd, CTA_THREAD_DIMENSION0, info->block[0]);
-+   NVA0C0_QMDV00_06_VAL_SET(qmd, CTA_THREAD_DIMENSION1, info->block[1]);
-+   NVA0C0_QMDV00_06_VAL_SET(qmd, CTA_THREAD_DIMENSION2, info->block[2]);
-+
-+   NVA0C0_QMDV00_06_VAL_SET(qmd, SHARED_MEMORY_SIZE,
-+                                 align(cp->cp.smem_size, 0x100));
-+   NVA0C0_QMDV00_06_VAL_SET(qmd, SHADER_LOCAL_MEMORY_LOW_SIZE,
-+                                 (cp->hdr[1] & 0xfffff0) +
-+                                 align(cp->cp.lmem_size, 0x10));
-+   NVA0C0_QMDV00_06_VAL_SET(qmd, SHADER_LOCAL_MEMORY_HIGH_SIZE, 0);
-+   NVA0C0_QMDV00_06_VAL_SET(qmd, SHADER_LOCAL_MEMORY_CRS_SIZE, 0x800);
-+
-+   if (cp->cp.smem_size > (32 << 10))
-+      NVA0C0_QMDV00_06_DEF_SET(qmd, L1_CONFIGURATION,
-+                                    DIRECTLY_ADDRESSABLE_MEMORY_SIZE_48KB);
-+   else
-+   if (cp->cp.smem_size > (16 << 10))
-+      NVA0C0_QMDV00_06_DEF_SET(qmd, L1_CONFIGURATION,
-+                                    DIRECTLY_ADDRESSABLE_MEMORY_SIZE_32KB);
-+   else
-+      NVA0C0_QMDV00_06_DEF_SET(qmd, L1_CONFIGURATION,
-+                                    DIRECTLY_ADDRESSABLE_MEMORY_SIZE_16KB);
- 
--   desc->gpr_alloc = cp->num_gprs;
--   desc->bar_alloc = cp->num_barriers;
-+   NVA0C0_QMDV00_06_VAL_SET(qmd, REGISTER_COUNT, cp->num_gprs);
-+   NVA0C0_QMDV00_06_VAL_SET(qmd, BARRIER_COUNT, cp->num_barriers);
- 
-    // Only bind user uniforms and the driver constant buffer through the
-    // launch descriptor because UBOs are sticked to the driver cb to avoid the
-    // limitation of 8 CBs.
-    if (nvc0->constbuf[5][0].user || cp->parm_size) {
--      nve4_cp_launch_desc_set_cb(desc, 0, screen->uniform_bo,
-+      nve4_cp_launch_desc_set_cb(qmd, 0, screen->uniform_bo,
-                                  NVC0_CB_USR_INFO(5), 1 << 16);
- 
-       // Later logic will attempt to bind a real buffer at position 0. That
-       // should not happen if we've bound a user buffer.
-       assert(nvc0->constbuf[5][0].user || !nvc0->constbuf[5][0].u.buf);
-    }
--   nve4_cp_launch_desc_set_cb(desc, 7, screen->uniform_bo,
-+   nve4_cp_launch_desc_set_cb(qmd, 7, screen->uniform_bo,
-                               NVC0_CB_AUX_INFO(5), 1 << 11);
- 
--   nve4_compute_setup_buf_cb(nvc0, false, desc);
-+   nve4_compute_setup_buf_cb(nvc0, false, qmd);
- }
- 
- static void
--gp100_compute_setup_launch_desc(struct nvc0_context *nvc0,
--                                struct gp100_cp_launch_desc *desc,
-+gp100_compute_setup_launch_desc(struct nvc0_context *nvc0, uint32_t *qmd,
-                                 const struct pipe_grid_info *info)
- {
-    const struct nvc0_screen *screen = nvc0->screen;
-    const struct nvc0_program *cp = nvc0->compprog;
- 
--   gp100_cp_launch_desc_init_default(desc);
-+   NVC0C0_QMDV02_01_VAL_SET(qmd, SM_GLOBAL_CACHING_ENABLE, 1);
-+   NVC0C0_QMDV02_01_DEF_SET(qmd, RELEASE_MEMBAR_TYPE, FE_SYSMEMBAR);
-+   NVC0C0_QMDV02_01_DEF_SET(qmd, CWD_MEMBAR_TYPE, L1_SYSMEMBAR);
-+   NVC0C0_QMDV02_01_DEF_SET(qmd, API_VISIBLE_CALL_LIMIT, NO_CHECK);
-+
-+   NVC0C0_QMDV02_01_VAL_SET(qmd, PROGRAM_OFFSET,
-+                                 nvc0_program_symbol_offset(cp, info->pc));
-+
-+   NVC0C0_QMDV02_01_VAL_SET(qmd, CTA_RASTER_WIDTH, info->grid[0]);
-+   NVC0C0_QMDV02_01_VAL_SET(qmd, CTA_RASTER_HEIGHT, info->grid[1]);
-+   NVC0C0_QMDV02_01_VAL_SET(qmd, CTA_RASTER_DEPTH, info->grid[2]);
-+   NVC0C0_QMDV02_01_VAL_SET(qmd, CTA_THREAD_DIMENSION0, info->block[0]);
-+   NVC0C0_QMDV02_01_VAL_SET(qmd, CTA_THREAD_DIMENSION1, info->block[1]);
-+   NVC0C0_QMDV02_01_VAL_SET(qmd, CTA_THREAD_DIMENSION2, info->block[2]);
-+
-+   NVC0C0_QMDV02_01_VAL_SET(qmd, SHARED_MEMORY_SIZE,
-+                                 align(cp->cp.smem_size, 0x100));
-+   NVC0C0_QMDV02_01_VAL_SET(qmd, SHADER_LOCAL_MEMORY_LOW_SIZE,
-+                                 (cp->hdr[1] & 0xfffff0) +
-+                                 align(cp->cp.lmem_size, 0x10));
-+   NVC0C0_QMDV02_01_VAL_SET(qmd, SHADER_LOCAL_MEMORY_HIGH_SIZE, 0);
-+   NVC0C0_QMDV02_01_VAL_SET(qmd, SHADER_LOCAL_MEMORY_CRS_SIZE, 0x800);
- 
--   desc->entry = nvc0_program_symbol_offset(cp, info->pc);
-+   NVC0C0_QMDV02_01_VAL_SET(qmd, REGISTER_COUNT, cp->num_gprs);
-+   NVC0C0_QMDV02_01_VAL_SET(qmd, BARRIER_COUNT, cp->num_barriers);
- 
--   desc->griddim_x = info->grid[0];
--   desc->griddim_y = info->grid[1];
--   desc->griddim_z = info->grid[2];
--   desc->blockdim_x = info->block[0];
--   desc->blockdim_y = info->block[1];
--   desc->blockdim_z = info->block[2];
-+   // Only bind user uniforms and the driver constant buffer through the
-+   // launch descriptor because UBOs are sticked to the driver cb to avoid the
-+   // limitation of 8 CBs.
-+   if (nvc0->constbuf[5][0].user || cp->parm_size) {
-+      gp100_cp_launch_desc_set_cb(qmd, 0, screen->uniform_bo,
-+                                  NVC0_CB_USR_INFO(5), 1 << 16);
- 
--   desc->shared_size = align(cp->cp.smem_size, 0x100);
--   desc->local_size_p = (cp->hdr[1] & 0xfffff0) + align(cp->cp.lmem_size, 0x10);
--   desc->local_size_n = 0;
--   desc->cstack_size = 0x800;
-+      // Later logic will attempt to bind a real buffer at position 0. That
-+      // should not happen if we've bound a user buffer.
-+      assert(nvc0->constbuf[5][0].user || !nvc0->constbuf[5][0].u.buf);
-+   }
-+   gp100_cp_launch_desc_set_cb(qmd, 7, screen->uniform_bo,
-+                               NVC0_CB_AUX_INFO(5), 1 << 11);
-+
-+   nve4_compute_setup_buf_cb(nvc0, true, qmd);
-+}
-+
-+static int
-+gv100_sm_config_smem_size(u32 size)
-+{
-+   if      (size > 64 * 1024) size = 96 * 1024;
-+   else if (size > 32 * 1024) size = 64 * 1024;
-+   else if (size > 16 * 1024) size = 32 * 1024;
-+   else if (size >  8 * 1024) size = 16 * 1024;
-+   else                       size =  8 * 1024;
-+   return (size / 4096) + 1;
-+}
- 
--   desc->gpr_alloc = cp->num_gprs;
--   desc->bar_alloc = cp->num_barriers;
-+static void
-+gv100_compute_setup_launch_desc(struct nvc0_context *nvc0, u32 *qmd,
-+                                const struct pipe_grid_info *info)
-+{
-+   struct nvc0_program *cp = nvc0->compprog;
-+   struct nvc0_screen *screen = nvc0->screen;
-+   uint64_t entry =
-+      screen->text->offset + nvc0_program_symbol_offset(cp, info->pc);
-+
-+   NVC3C0_QMDV02_02_VAL_SET(qmd, SM_GLOBAL_CACHING_ENABLE, 1);
-+   NVC3C0_QMDV02_02_DEF_SET(qmd, API_VISIBLE_CALL_LIMIT, NO_CHECK);
-+   NVC3C0_QMDV02_02_DEF_SET(qmd, SAMPLER_INDEX, VIA_HEADER_INDEX);
-+   NVC3C0_QMDV02_02_VAL_SET(qmd, SHARED_MEMORY_SIZE,
-+                                  align(cp->cp.smem_size, 0x100));
-+   NVC3C0_QMDV02_02_VAL_SET(qmd, MIN_SM_CONFIG_SHARED_MEM_SIZE,
-+                                  gv100_sm_config_smem_size(8 * 1024));
-+   NVC3C0_QMDV02_02_VAL_SET(qmd, MAX_SM_CONFIG_SHARED_MEM_SIZE,
-+                                  gv100_sm_config_smem_size(96 * 1024));
-+   NVC3C0_QMDV02_02_VAL_SET(qmd, QMD_VERSION, 2);
-+   NVC3C0_QMDV02_02_VAL_SET(qmd, QMD_MAJOR_VERSION, 2);
-+   NVC3C0_QMDV02_02_VAL_SET(qmd, TARGET_SM_CONFIG_SHARED_MEM_SIZE,
-+                                  gv100_sm_config_smem_size(cp->cp.smem_size));
-+
-+   NVC3C0_QMDV02_02_VAL_SET(qmd, CTA_RASTER_WIDTH, info->grid[0]);
-+   NVC3C0_QMDV02_02_VAL_SET(qmd, CTA_RASTER_HEIGHT, info->grid[1]);
-+   NVC3C0_QMDV02_02_VAL_SET(qmd, CTA_RASTER_DEPTH, info->grid[2]);
-+   NVC3C0_QMDV02_02_VAL_SET(qmd, CTA_THREAD_DIMENSION0, info->block[0]);
-+   NVC3C0_QMDV02_02_VAL_SET(qmd, CTA_THREAD_DIMENSION1, info->block[1]);
-+   NVC3C0_QMDV02_02_VAL_SET(qmd, CTA_THREAD_DIMENSION2, info->block[2]);
-+   NVC3C0_QMDV02_02_VAL_SET(qmd, REGISTER_COUNT_V, cp->num_gprs);
-+   NVC3C0_QMDV02_02_VAL_SET(qmd, BARRIER_COUNT, cp->num_barriers);
- 
-    // Only bind user uniforms and the driver constant buffer through the
-    // launch descriptor because UBOs are sticked to the driver cb to avoid the
-    // limitation of 8 CBs.
-    if (nvc0->constbuf[5][0].user || cp->parm_size) {
--      gp100_cp_launch_desc_set_cb(desc, 0, screen->uniform_bo,
-+      gp100_cp_launch_desc_set_cb(qmd, 0, screen->uniform_bo,
-                                   NVC0_CB_USR_INFO(5), 1 << 16);
- 
-       // Later logic will attempt to bind a real buffer at position 0. That
-       // should not happen if we've bound a user buffer.
-       assert(nvc0->constbuf[5][0].user || !nvc0->constbuf[5][0].u.buf);
-    }
--   gp100_cp_launch_desc_set_cb(desc, 7, screen->uniform_bo,
-+   gp100_cp_launch_desc_set_cb(qmd, 7, screen->uniform_bo,
-                                NVC0_CB_AUX_INFO(5), 1 << 11);
- 
--   nve4_compute_setup_buf_cb(nvc0, true, desc);
-+   nve4_compute_setup_buf_cb(nvc0, true, qmd);
-+
-+   NVC3C0_QMDV02_02_VAL_SET(qmd, PROGRAM_ADDRESS_LOWER, entry & 0xffffffff);
-+   NVC3C0_QMDV02_02_VAL_SET(qmd, PROGRAM_ADDRESS_UPPER, entry >> 32);
- }
- 
- static inline void *
-@@ -677,6 +812,7 @@ nve4_compute_alloc_launch_desc(struct nouveau_context *nv,
-       ptr += adj;
-       *pgpuaddr += adj;
-    }
-+   memset(ptr, 0x00, 256);
-    return ptr;
- }
- 
-@@ -734,6 +870,9 @@ nve4_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info)
-    if (ret)
-       goto out;
- 
-+   if (nvc0->screen->compute->oclass >= GV100_COMPUTE_CLASS)
-+      gv100_compute_setup_launch_desc(nvc0, desc, info);
-+   else
-    if (nvc0->screen->compute->oclass >= GP100_COMPUTE_CLASS)
-       gp100_compute_setup_launch_desc(nvc0, desc, info);
-    else
-@@ -743,10 +882,14 @@ nve4_launch_grid(struct pipe_context *pipe, const struct pipe_grid_info *info)
- 
- #ifndef NDEBUG
-    if (debug_get_num_option("NV50_PROG_DEBUG", 0)) {
-+      debug_printf("Queue Meta Data:\n");
-+      if (nvc0->screen->compute->oclass >= GV100_COMPUTE_CLASS)
-+         NVC3C0QmdDump_V02_02(desc);
-+      else
-       if (nvc0->screen->compute->oclass >= GP100_COMPUTE_CLASS)
--         gp100_compute_dump_launch_desc(desc);
-+         NVC0C0QmdDump_V02_01(desc);
-       else
--         nve4_compute_dump_launch_desc(desc);
-+         NVA0C0QmdDump_V00_06(desc);
-    }
- #endif
- 
-@@ -877,115 +1020,6 @@ nve4_compute_validate_textures(struct nvc0_context *nvc0)
-    nvc0->dirty_3d |= NVC0_NEW_3D_TEXTURES;
- }
- 
--
--#ifndef NDEBUG
--static const char *nve4_cache_split_name(unsigned value)
--{
--   switch (value) {
--   case NVC1_3D_CACHE_SPLIT_16K_SHARED_48K_L1: return "16K_SHARED_48K_L1";
--   case NVE4_3D_CACHE_SPLIT_32K_SHARED_32K_L1: return "32K_SHARED_32K_L1";
--   case NVC0_3D_CACHE_SPLIT_48K_SHARED_16K_L1: return "48K_SHARED_16K_L1";
--   default:
--      return "(invalid)";
--   }
--}
--
--static void
--nve4_compute_dump_launch_desc(const struct nve4_cp_launch_desc *desc)
--{
--   const uint32_t *data = (const uint32_t *)desc;
--   unsigned i;
--   bool zero = false;
--
--   debug_printf("COMPUTE LAUNCH DESCRIPTOR:\n");
--
--   for (i = 0; i < sizeof(*desc); i += 4) {
--      if (data[i / 4]) {
--         debug_printf("[%x]: 0x%08x\n", i, data[i / 4]);
--         zero = false;
--      } else
--      if (!zero) {
--         debug_printf("...\n");
--         zero = true;
--      }
--   }
--
--   debug_printf("entry = 0x%x\n", desc->entry);
--   debug_printf("grid dimensions = %ux%ux%u\n",
--                desc->griddim_x, desc->griddim_y, desc->griddim_z);
--   debug_printf("block dimensions = %ux%ux%u\n",
--                desc->blockdim_x, desc->blockdim_y, desc->blockdim_z);
--   debug_printf("s[] size: 0x%x\n", desc->shared_size);
--   debug_printf("l[] size: -0x%x / +0x%x\n",
--                desc->local_size_n, desc->local_size_p);
--   debug_printf("stack size: 0x%x\n", desc->cstack_size);
--   debug_printf("barrier count: %u\n", desc->bar_alloc);
--   debug_printf("$r count: %u\n", desc->gpr_alloc);
--   debug_printf("cache split: %s\n", nve4_cache_split_name(desc->cache_split));
--   debug_printf("linked tsc: %d\n", desc->linked_tsc);
--
--   for (i = 0; i < 8; ++i) {
--      uint64_t address;
--      uint32_t size = desc->cb[i].size;
--      bool valid = !!(desc->cb_mask & (1 << i));
--
--      address = ((uint64_t)desc->cb[i].address_h << 32) | desc->cb[i].address_l;
--
--      if (!valid && !address && !size)
--         continue;
--      debug_printf("CB[%u]: address = 0x%"PRIx64", size 0x%x%s\n",
--                   i, address, size, valid ? "" : "  (invalid)");
--   }
--}
--
--static void
--gp100_compute_dump_launch_desc(const struct gp100_cp_launch_desc *desc)
--{
--   const uint32_t *data = (const uint32_t *)desc;
--   unsigned i;
--   bool zero = false;
--
--   debug_printf("COMPUTE LAUNCH DESCRIPTOR:\n");
--
--   for (i = 0; i < sizeof(*desc); i += 4) {
--      if (data[i / 4]) {
--         debug_printf("[%x]: 0x%08x\n", i, data[i / 4]);
--         zero = false;
--      } else
--      if (!zero) {
--         debug_printf("...\n");
--         zero = true;
--      }
--   }
--
--   debug_printf("entry = 0x%x\n", desc->entry);
--   debug_printf("grid dimensions = %ux%ux%u\n",
--                desc->griddim_x, desc->griddim_y, desc->griddim_z);
--   debug_printf("block dimensions = %ux%ux%u\n",
--                desc->blockdim_x, desc->blockdim_y, desc->blockdim_z);
--   debug_printf("s[] size: 0x%x\n", desc->shared_size);
--   debug_printf("l[] size: -0x%x / +0x%x\n",
--                desc->local_size_n, desc->local_size_p);
--   debug_printf("stack size: 0x%x\n", desc->cstack_size);
--   debug_printf("barrier count: %u\n", desc->bar_alloc);
--   debug_printf("$r count: %u\n", desc->gpr_alloc);
--   debug_printf("linked tsc: %d\n", desc->linked_tsc);
--
--   for (i = 0; i < 8; ++i) {
--      uint64_t address;
--      uint32_t size = desc->cb[i].size_sh4 << 4;
--      bool valid = !!(desc->cb_mask & (1 << i));
--
--      address = ((uint64_t)desc->cb[i].address_h << 32) | desc->cb[i].address_l;
--
--      if (!valid && !address && !size)
--         continue;
--      debug_printf("CB[%u]: address = 0x%"PRIx64", size 0x%x%s\n",
--                   i, address, size, valid ? "" : "  (invalid)");
--   }
--}
--#endif
--
- #ifdef NOUVEAU_NVE4_MP_TRAP_HANDLER
- static void
- nve4_compute_trap_info(struct nvc0_context *nvc0)
-diff --git a/src/gallium/drivers/nouveau/nvc0/nve4_compute.h b/src/gallium/drivers/nouveau/nvc0/nve4_compute.h
-index 7ff6935cc3d..d2599f7a71d 100644
---- a/src/gallium/drivers/nouveau/nvc0/nve4_compute.h
-+++ b/src/gallium/drivers/nouveau/nvc0/nve4_compute.h
-@@ -4,142 +4,6 @@
- 
- #include "nvc0/nve4_compute.xml.h"
- 
--struct nve4_cp_launch_desc
--{
--   u32 unk0[8];
--   u32 entry;
--   u32 unk9[2];
--   u32 unk11_0      : 30;
--   u32 linked_tsc   : 1;
--   u32 unk11_31     : 1;
--   u32 griddim_x    : 31;
--   u32 unk12        : 1;
--   u16 griddim_y;
--   u16 griddim_z;
--   u32 unk14[3];
--   u16 shared_size; /* must be aligned to 0x100 */
--   u16 unk17;
--   u16 unk18;
--   u16 blockdim_x;
--   u16 blockdim_y;
--   u16 blockdim_z;
--   u32 cb_mask      : 8;
--   u32 unk20_8      : 21;
--   u32 cache_split  : 2;
--   u32 unk20_31     : 1;
--   u32 unk21[8];
--   struct {
--      u32 address_l;
--      u32 address_h : 8;
--      u32 reserved  : 7;
--      u32 size      : 17;
--   } cb[8];
--   u32 local_size_p : 20;
--   u32 unk45_20     : 7;
--   u32 bar_alloc    : 5;
--   u32 local_size_n : 20;
--   u32 unk46_20     : 4;
--   u32 gpr_alloc    : 8;
--   u32 cstack_size  : 20;
--   u32 unk47_20     : 12;
--   u32 unk48[16];
--};
--
--struct gp100_cp_launch_desc
--{
--   u32 unk0[8];
--   u32 entry;
--   u32 unk9[2];
--   u32 unk11_0      : 30;
--   u32 linked_tsc   : 1;
--   u32 unk11_31     : 1;
--   u32 griddim_x    : 31;
--   u32 unk12        : 1;
--   u16 griddim_y;
--   u16 unk13;
--   u16 griddim_z;
--   u16 unk14;
--   u32 unk15[2];
--   u32 shared_size  : 18;
--   u32 unk17        : 14;
--   u16 unk18;
--   u16 blockdim_x;
--   u16 blockdim_y;
--   u16 blockdim_z;
--   u32 cb_mask      : 8;
--   u32 unk20        : 24;
--   u32 unk21[8];
--   u32 local_size_p : 24;
--   u32 unk29        : 3;
--   u32 bar_alloc    : 5;
--   u32 local_size_n : 24;
--   u32 gpr_alloc    : 8;
--   u32 cstack_size  : 24;
--   u32 unk31        : 8;
--   struct {
--      u32 address_l;
--      u32 address_h : 17;
--      u32 reserved  : 2;
--      u32 size_sh4  : 13;
--   } cb[8];
--   u32 unk48[16];
--};
--
--static inline void
--nve4_cp_launch_desc_init_default(struct nve4_cp_launch_desc *desc)
--{
--   memset(desc, 0, sizeof(*desc));
--
--   desc->unk0[7]  = 0xbc000000;
--   desc->unk11_0  = 0x04014000;
--   desc->unk47_20 = 0x300;
--}
--
--static inline void
--nve4_cp_launch_desc_set_cb(struct nve4_cp_launch_desc *desc,
--                           unsigned index,
--                           struct nouveau_bo *bo,
--                           uint32_t base, uint32_t size)
--{
--   uint64_t address = bo->offset + base;
--
--   assert(index < 8);
--   assert(!(base & 0xff));
--
--   desc->cb[index].address_l = address;
--   desc->cb[index].address_h = address >> 32;
--   desc->cb[index].size = size;
--
--   desc->cb_mask |= 1 << index;
--}
--
--static inline void
--gp100_cp_launch_desc_init_default(struct gp100_cp_launch_desc *desc)
--{
--   memset(desc, 0, sizeof(*desc));
--
--   desc->unk0[4]  = 0x40;
--   desc->unk11_0  = 0x04014000;
--}
--
--static inline void
--gp100_cp_launch_desc_set_cb(struct gp100_cp_launch_desc *desc,
--                            unsigned index,
--                            struct nouveau_bo *bo,
--                            uint32_t base, uint32_t size)
--{
--   uint64_t address = bo->offset + base;
--
--   assert(index < 8);
--   assert(!(base & 0xff));
--
--   desc->cb[index].address_l = address;
--   desc->cb[index].address_h = address >> 32;
--   desc->cb[index].size_sh4 = DIV_ROUND_UP(size, 16);
--
--   desc->cb_mask |= 1 << index;
--}
--
- struct nve4_mp_trap_info {
-    u32 lock;
-    u32 pc;
-diff --git a/src/gallium/drivers/nouveau/nvc0/qmd.h b/src/gallium/drivers/nouveau/nvc0/qmd.h
-new file mode 100644
-index 00000000000..86c290fe836
---- /dev/null
-+++ b/src/gallium/drivers/nouveau/nvc0/qmd.h
-@@ -0,0 +1,68 @@
-+#ifndef __NVHW_QMD_H__
-+#define __NVHW_QMD_H__
-+#include <stdio.h>
-+#include <stdint.h>
-+#include "util/u_debug.h"
-+#include "drf.h"
-+
-+#define NVQMD_ENUM_1(X,drf,v0)                                                 \
-+   [drf##_##v0] = #v0
-+#define NVQMD_ENUM_2(X,drf,v0,v1)                                              \
-+   [drf##_##v0] = #v0,                                                         \
-+   [drf##_##v1] = #v1
-+#define NVQMD_ENUM_3(X,drf,v0,v1,v2)                                           \
-+   [drf##_##v0] = #v0,                                                         \
-+   [drf##_##v1] = #v1,                                                         \
-+   [drf##_##v2] = #v2
-+#define NVQMD_ENUM_8(X,drf,v0,v1,v2,v3,v4,v5,v6,v7)                            \
-+   [drf##_##v0] = #v0,                                                         \
-+   [drf##_##v1] = #v1,                                                         \
-+   [drf##_##v2] = #v2,                                                         \
-+   [drf##_##v3] = #v3,                                                         \
-+   [drf##_##v4] = #v4,                                                         \
-+   [drf##_##v5] = #v5,                                                         \
-+   [drf##_##v6] = #v6,                                                         \
-+   [drf##_##v7] = #v7
-+
-+#define NVQMD_ENUM_(X,_1,_2,_3,_4,_5,_6,_7,_8,_9,IMPL,...) IMPL
-+#define NVQMD_ENUM(A...) NVQMD_ENUM_(X, ##A, NVQMD_ENUM_8, NVQMD_ENUM_7,       \
-+                                             NVQMD_ENUM_6, NVQMD_ENUM_5,       \
-+                                             NVQMD_ENUM_4, NVQMD_ENUM_3,       \
-+                                             NVQMD_ENUM_2, NVQMD_ENUM_1)(X, ##A)
-+
-+#define NVQMD_VAL_N(X,d,r,p,f,o) do {                                          \
-+   uint32_t val = NVVAL_MW_GET_X((p), d##_##r##_##f);                          \
-+   debug_printf("   %-36s: "o"\n", #f, val);                                   \
-+} while(0)
-+#define NVQMD_VAL_I(X,d,r,p,f,i,o) do {                                        \
-+   uint32_t val = NVVAL_MW_GET_X((p), d##_##r##_##f(i));                       \
-+   char name[80];                                                              \
-+   snprintf(name, sizeof(name), "%s(%d)", #f, i);                              \
-+   debug_printf("   %-36s: "o"\n", name, val);                                 \
-+} while(0)
-+#define NVQMD_VAL_(X,_1,_2,_3,_4,_5,_6,IMPL,...) IMPL
-+#define NVQMD_VAL(A...) NVQMD_VAL_(X, ##A, NVQMD_VAL_I, NVQMD_VAL_N)(X, ##A)
-+
-+#define NVQMD_DEF(d,r,p,f,e...) do {                                           \
-+   static const char *ev[] = { NVQMD_ENUM(d##_##r##_##f,##e) };                \
-+   uint32_t val = NVVAL_MW_GET((p), d, r, f);                                  \
-+   if (val < ARRAY_SIZE(ev) && ev[val])                                        \
-+      debug_printf("   %-36s: %s\n", #f, ev[val]);                             \
-+   else                                                                        \
-+      debug_printf("   %-36s: UNKNOWN 0x%x\n", #f, val);                       \
-+} while(0)
-+#define NVQMD_IDX(d,r,p,f,i,e...) do {                                         \
-+   static const char *ev[] = { NVQMD_ENUM(d##_##r##_##f,##e) };                \
-+   char name[80];                                                              \
-+   snprintf(name, sizeof(name), "%s(%d)", #f, i);                              \
-+   uint32_t val = NVVAL_MW_GET((p), d, r, f, i);                               \
-+   if (val < ARRAY_SIZE(ev) && ev[val])                                        \
-+      debug_printf("   %-36s: %s\n", name, ev[val]);                           \
-+   else                                                                        \
-+      debug_printf("   %-36s: UNKNOWN 0x%x\n", name, val);                     \
-+} while(0)
-+
-+void NVA0C0QmdDump_V00_06(uint32_t *);
-+void NVC0C0QmdDump_V02_01(uint32_t *);
-+void NVC3C0QmdDump_V02_02(uint32_t *);
-+#endif
-diff --git a/src/gallium/drivers/nouveau/nvc0/qmda0c0.c b/src/gallium/drivers/nouveau/nvc0/qmda0c0.c
-new file mode 100644
-index 00000000000..7103a893af5
---- /dev/null
-+++ b/src/gallium/drivers/nouveau/nvc0/qmda0c0.c
-@@ -0,0 +1,166 @@
-+/*
-+ * Copyright 2020 Red Hat Inc.
-+ *
-+ * Permission is hereby granted, free of charge, to any person obtaining a
-+ * copy of this software and associated documentation files (the "Software"),
-+ * to deal in the Software without restriction, including without limitation
-+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
-+ * and/or sell copies of the Software, and to permit persons to whom the
-+ * Software is furnished to do so, subject to the following conditions:
-+ *
-+ * The above copyright notice and this permission notice shall be included in
-+ * all copies or substantial portions of the Software.
-+ *
-+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
-+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
-+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-+ * OTHER DEALINGS IN THE SOFTWARE.
-+ */
-+#include "qmd.h"
-+#include "cla0c0qmd.h"
-+
-+#define NVA0C0_QMDV00_06_VAL(a...) NVQMD_VAL(NVA0C0, QMDV00_06, ##a)
-+#define NVA0C0_QMDV00_06_DEF(a...) NVQMD_DEF(NVA0C0, QMDV00_06, ##a)
-+#define NVA0C0_QMDV00_06_IDX(a...) NVQMD_IDX(NVA0C0, QMDV00_06, ##a)
-+
-+void
-+NVA0C0QmdDump_V00_06(uint32_t *qmd)
-+{
-+   NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_V1_A, "0x%x");
-+   NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_V1_B, "0x%x");
-+   NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_V1_C, "0x%x");
-+   NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_V1_D, "0x%x");
-+   NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_V1_E, "0x%x");
-+   NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_V1_F, "0x%x");
-+   NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_V1_G, "0x%x");
-+   NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_V1_H, "0x%x");
-+   NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_A_A, "0x%x");
-+   NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_V1_I, "0x%x");
-+   NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_V1_J, "0x%x");
-+   NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_A, "0x%x");
-+   NVA0C0_QMDV00_06_DEF(qmd, QMD_RESERVED_V1_K, FALSE, TRUE);
-+   NVA0C0_QMDV00_06_DEF(qmd, QMD_RESERVED_V1_L, FALSE, TRUE);
-+   NVA0C0_QMDV00_06_DEF(qmd, SEMAPHORE_RELEASE_ENABLE0, FALSE, TRUE);
-+   NVA0C0_QMDV00_06_DEF(qmd, SEMAPHORE_RELEASE_ENABLE1, FALSE, TRUE);
-+   NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_B, "0x%x");
-+   NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_V1_M, "0x%x");
-+   NVA0C0_QMDV00_06_DEF(qmd, QMD_RESERVED_V1_N, FALSE, TRUE);
-+   NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_V1_O, "0x%x");
-+   NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_C, "0x%x");
-+   NVA0C0_QMDV00_06_DEF(qmd, INVALIDATE_TEXTURE_HEADER_CACHE, FALSE, TRUE);
-+   NVA0C0_QMDV00_06_DEF(qmd, INVALIDATE_TEXTURE_SAMPLER_CACHE, FALSE, TRUE);
-+   NVA0C0_QMDV00_06_DEF(qmd, INVALIDATE_TEXTURE_DATA_CACHE, FALSE, TRUE);
-+   NVA0C0_QMDV00_06_DEF(qmd, INVALIDATE_SHADER_DATA_CACHE, FALSE, TRUE);
-+   NVA0C0_QMDV00_06_DEF(qmd, INVALIDATE_INSTRUCTION_CACHE, FALSE, TRUE);
-+   NVA0C0_QMDV00_06_DEF(qmd, INVALIDATE_SHADER_CONSTANT_CACHE, FALSE, TRUE);
-+   NVA0C0_QMDV00_06_VAL(qmd, PROGRAM_OFFSET, "0x%x");
-+   NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_V1_P, "0x%x");
-+   NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_V1_Q, "0x%x");
-+   NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_D, "0x%x");
-+   NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_V1_R, "0x%x");
-+   NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_V1_S, "0x%x");
-+   NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_E, "0x%x");
-+   NVA0C0_QMDV00_06_DEF(qmd, RELEASE_MEMBAR_TYPE, FE_NONE, FE_SYSMEMBAR);
-+   NVA0C0_QMDV00_06_DEF(qmd, CWD_MEMBAR_TYPE, L1_NONE, L1_SYSMEMBAR, L1_MEMBAR);
-+   NVA0C0_QMDV00_06_DEF(qmd, QMD_RESERVED_V1_T, FALSE, TRUE);
-+   NVA0C0_QMDV00_06_DEF(qmd, QMD_RESERVED_V1_U, FALSE, TRUE);
-+   NVA0C0_QMDV00_06_DEF(qmd, THROTTLED, FALSE, TRUE);
-+   NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_E2_A, "0x%x");
-+   NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_E2_B, "0x%x");
-+   NVA0C0_QMDV00_06_DEF(qmd, API_VISIBLE_CALL_LIMIT, _32, NO_CHECK);
-+   NVA0C0_QMDV00_06_DEF(qmd, SHARED_MEMORY_BANK_MAPPING, FOUR_BYTES_PER_BANK,
-+                                                         EIGHT_BYTES_PER_BANK);
-+   NVA0C0_QMDV00_06_DEF(qmd, SAMPLER_INDEX, INDEPENDENTLY, VIA_HEADER_INDEX);
-+   NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_E3_A, "0x%x");
-+   NVA0C0_QMDV00_06_VAL(qmd, CTA_RASTER_WIDTH, "0x%x");
-+   NVA0C0_QMDV00_06_VAL(qmd, CTA_RASTER_HEIGHT, "0x%x");
-+   NVA0C0_QMDV00_06_VAL(qmd, CTA_RASTER_DEPTH, "0x%x");
-+   NVA0C0_QMDV00_06_VAL(qmd, CTA_RASTER_WIDTH_RESUME, "0x%x");
-+   NVA0C0_QMDV00_06_VAL(qmd, CTA_RASTER_HEIGHT_RESUME, "0x%x");
-+   NVA0C0_QMDV00_06_VAL(qmd, CTA_RASTER_DEPTH_RESUME, "0x%x");
-+   NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_V1_V, "0x%x");
-+   NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_F, "0x%x");
-+   NVA0C0_QMDV00_06_DEF(qmd, QMD_RESERVED_V1_W, FALSE, TRUE);
-+   NVA0C0_QMDV00_06_VAL(qmd, SHARED_MEMORY_SIZE, "0x%x");
-+   NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_G, "0x%x");
-+   NVA0C0_QMDV00_06_VAL(qmd, QMD_VERSION, "0x%x");
-+   NVA0C0_QMDV00_06_VAL(qmd, QMD_MAJOR_VERSION, "0x%x");
-+   NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_H, "0x%x");
-+   NVA0C0_QMDV00_06_VAL(qmd, CTA_THREAD_DIMENSION0, "0x%x");
-+   NVA0C0_QMDV00_06_VAL(qmd, CTA_THREAD_DIMENSION1, "0x%x");
-+   NVA0C0_QMDV00_06_VAL(qmd, CTA_THREAD_DIMENSION2, "0x%x");
-+   for (int i = 0; i < 8; i++)
-+      NVA0C0_QMDV00_06_IDX(qmd, CONSTANT_BUFFER_VALID, i, FALSE, TRUE);
-+   NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_I, "0x%x");
-+   NVA0C0_QMDV00_06_DEF(qmd, L1_CONFIGURATION,
-+                             DIRECTLY_ADDRESSABLE_MEMORY_SIZE_16KB,
-+                             DIRECTLY_ADDRESSABLE_MEMORY_SIZE_32KB,
-+                             DIRECTLY_ADDRESSABLE_MEMORY_SIZE_48KB);
-+   NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_V1_X, "0x%x");
-+   NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_V1_Y, "0x%x");
-+   NVA0C0_QMDV00_06_VAL(qmd, RELEASE0_ADDRESS_LOWER, "0x%x");
-+   NVA0C0_QMDV00_06_VAL(qmd, RELEASE0_ADDRESS_UPPER, "0x%x");
-+   NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_J, "0x%x");
-+   NVA0C0_QMDV00_06_DEF(qmd, RELEASE0_REDUCTION_OP, RED_ADD,
-+                                                    RED_MIN,
-+                                                    RED_MAX,
-+                                                    RED_INC,
-+                                                    RED_DEC,
-+                                                    RED_AND,
-+                                                    RED_OR,
-+                                                    RED_XOR);
-+   NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_K, "0x%x");
-+   NVA0C0_QMDV00_06_DEF(qmd, RELEASE0_REDUCTION_FORMAT, UNSIGNED_32, SIGNED_32);
-+   NVA0C0_QMDV00_06_DEF(qmd, RELEASE0_REDUCTION_ENABLE, FALSE, TRUE);
-+   NVA0C0_QMDV00_06_DEF(qmd, RELEASE0_STRUCTURE_SIZE, FOUR_WORDS, ONE_WORD);
-+   NVA0C0_QMDV00_06_VAL(qmd, RELEASE0_PAYLOAD, "0x%x");
-+   NVA0C0_QMDV00_06_VAL(qmd, RELEASE1_ADDRESS_LOWER, "0x%x");
-+   NVA0C0_QMDV00_06_VAL(qmd, RELEASE1_ADDRESS_UPPER, "0x%x");
-+   NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_L, "0x%x");
-+   NVA0C0_QMDV00_06_DEF(qmd, RELEASE1_REDUCTION_OP, RED_ADD,
-+                                                    RED_MIN,
-+                                                    RED_MAX,
-+                                                    RED_INC,
-+                                                    RED_DEC,
-+                                                    RED_AND,
-+                                                    RED_OR,
-+                                                    RED_XOR);
-+   NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_M, "0x%x");
-+   NVA0C0_QMDV00_06_DEF(qmd, RELEASE1_REDUCTION_FORMAT, UNSIGNED_32, SIGNED_32);
-+   NVA0C0_QMDV00_06_DEF(qmd, RELEASE1_REDUCTION_ENABLE, FALSE, TRUE);
-+   NVA0C0_QMDV00_06_DEF(qmd, RELEASE1_STRUCTURE_SIZE, FOUR_WORDS, ONE_WORD);
-+   NVA0C0_QMDV00_06_VAL(qmd, RELEASE1_PAYLOAD, "0x%x");
-+   for (int i = 0; i < 8; i++) {
-+      NVA0C0_QMDV00_06_VAL(qmd, CONSTANT_BUFFER_ADDR_LOWER, i, "0x%x");
-+      NVA0C0_QMDV00_06_VAL(qmd, CONSTANT_BUFFER_ADDR_UPPER, i, "0x%x");
-+      NVA0C0_QMDV00_06_VAL(qmd, CONSTANT_BUFFER_RESERVED_ADDR, i, "0x%x");
-+      NVA0C0_QMDV00_06_IDX(qmd, CONSTANT_BUFFER_INVALIDATE, i, FALSE, TRUE);
-+      NVA0C0_QMDV00_06_VAL(qmd, CONSTANT_BUFFER_SIZE, i, "0x%x");
-+   }
-+   NVA0C0_QMDV00_06_VAL(qmd, SHADER_LOCAL_MEMORY_LOW_SIZE, "0x%x");
-+   NVA0C0_QMDV00_06_VAL(qmd, QMD_RESERVED_N, "0x%x");
-+   NVA0C0_QMDV00_06_VAL(qmd, BARRIER_COUNT, "0x%x");
-+   NVA0C0_QMDV00_06_VAL(qmd, SHADER_LOCAL_MEMORY_HIGH_SIZE, "0x%x");
-+   NVA0C0_QMDV00_06_VAL(qmd, REGISTER_COUNT, "0x%x");
-+   NVA0C0_QMDV00_06_VAL(qmd, SHADER_LOCAL_MEMORY_CRS_SIZE, "0x%x");
-+   NVA0C0_QMDV00_06_VAL(qmd, SASS_VERSION, "0x%x");
-+   NVA0C0_QMDV00_06_VAL(qmd, QMD_SPARE_A, "0x%x");
-+   NVA0C0_QMDV00_06_VAL(qmd, QMD_SPARE_B, "0x%x");
-+   NVA0C0_QMDV00_06_VAL(qmd, QMD_SPARE_C, "0x%x");
-+   NVA0C0_QMDV00_06_VAL(qmd, QMD_SPARE_D, "0x%x");
-+   NVA0C0_QMDV00_06_VAL(qmd, QMD_SPARE_E, "0x%x");
-+   NVA0C0_QMDV00_06_VAL(qmd, QMD_SPARE_F, "0x%x");
-+   NVA0C0_QMDV00_06_VAL(qmd, QMD_SPARE_G, "0x%x");
-+   NVA0C0_QMDV00_06_VAL(qmd, QMD_SPARE_H, "0x%x");
-+   NVA0C0_QMDV00_06_VAL(qmd, QMD_SPARE_I, "0x%x");
-+   NVA0C0_QMDV00_06_VAL(qmd, QMD_SPARE_J, "0x%x");
-+   NVA0C0_QMDV00_06_VAL(qmd, QMD_SPARE_K, "0x%x");
-+   NVA0C0_QMDV00_06_VAL(qmd, QMD_SPARE_L, "0x%x");
-+   NVA0C0_QMDV00_06_VAL(qmd, QMD_SPARE_M, "0x%x");
-+   NVA0C0_QMDV00_06_VAL(qmd, QMD_SPARE_N, "0x%x");
-+   NVA0C0_QMDV00_06_VAL(qmd, DEBUG_ID_UPPER, "0x%x");
-+   NVA0C0_QMDV00_06_VAL(qmd, DEBUG_ID_LOWER, "0x%x");
-+}
-diff --git a/src/gallium/drivers/nouveau/nvc0/qmdc0c0.c b/src/gallium/drivers/nouveau/nvc0/qmdc0c0.c
-new file mode 100644
-index 00000000000..945439ee0c8
---- /dev/null
-+++ b/src/gallium/drivers/nouveau/nvc0/qmdc0c0.c
-@@ -0,0 +1,165 @@
-+/*
-+ * Copyright 2020 Red Hat Inc.
-+ *
-+ * Permission is hereby granted, free of charge, to any person obtaining a
-+ * copy of this software and associated documentation files (the "Software"),
-+ * to deal in the Software without restriction, including without limitation
-+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
-+ * and/or sell copies of the Software, and to permit persons to whom the
-+ * Software is furnished to do so, subject to the following conditions:
-+ *
-+ * The above copyright notice and this permission notice shall be included in
-+ * all copies or substantial portions of the Software.
-+ *
-+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
-+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
-+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-+ * OTHER DEALINGS IN THE SOFTWARE.
-+ */
-+#include "qmd.h"
-+#include "clc0c0qmd.h"
-+
-+#define NVC0C0_QMDV02_01_VAL(a...) NVQMD_VAL(NVC0C0, QMDV02_01, ##a)
-+#define NVC0C0_QMDV02_01_DEF(a...) NVQMD_DEF(NVC0C0, QMDV02_01, ##a)
-+#define NVC0C0_QMDV02_01_IDX(a...) NVQMD_IDX(NVC0C0, QMDV02_01, ##a)
-+
-+void
-+NVC0C0QmdDump_V02_01(uint32_t *qmd)
-+{
-+   NVC0C0_QMDV02_01_VAL(qmd, OUTER_PUT, "0x%x");
-+   NVC0C0_QMDV02_01_VAL(qmd, OUTER_OVERFLOW, "0x%x");
-+   NVC0C0_QMDV02_01_VAL(qmd, OUTER_GET, "0x%x");
-+   NVC0C0_QMDV02_01_VAL(qmd, OUTER_STICKY_OVERFLOW, "0x%x");
-+   NVC0C0_QMDV02_01_VAL(qmd, INNER_GET, "0x%x");
-+   NVC0C0_QMDV02_01_VAL(qmd, INNER_OVERFLOW, "0x%x");
-+   NVC0C0_QMDV02_01_VAL(qmd, INNER_PUT, "0x%x");
-+   NVC0C0_QMDV02_01_VAL(qmd, INNER_STICKY_OVERFLOW, "0x%x");
-+   NVC0C0_QMDV02_01_VAL(qmd, QMD_GROUP_ID, "0x%x");
-+   NVC0C0_QMDV02_01_VAL(qmd, SM_GLOBAL_CACHING_ENABLE, "0x%x");
-+   NVC0C0_QMDV02_01_DEF(qmd, RUN_CTA_IN_ONE_SM_PARTITION, FALSE, TRUE);
-+   NVC0C0_QMDV02_01_DEF(qmd, IS_QUEUE, FALSE, TRUE);
-+   NVC0C0_QMDV02_01_DEF(qmd, ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST, FALSE, TRUE);
-+   NVC0C0_QMDV02_01_DEF(qmd, SEMAPHORE_RELEASE_ENABLE0, FALSE, TRUE);
-+   NVC0C0_QMDV02_01_DEF(qmd, SEMAPHORE_RELEASE_ENABLE1, FALSE, TRUE);
-+   NVC0C0_QMDV02_01_DEF(qmd, REQUIRE_SCHEDULING_PCAS, FALSE, TRUE);
-+   NVC0C0_QMDV02_01_DEF(qmd, DEPENDENT_QMD_SCHEDULE_ENABLE, FALSE, TRUE);
-+   NVC0C0_QMDV02_01_DEF(qmd, DEPENDENT_QMD_TYPE, QUEUE, GRID);
-+   NVC0C0_QMDV02_01_DEF(qmd, DEPENDENT_QMD_FIELD_COPY, FALSE, TRUE);
-+   NVC0C0_QMDV02_01_VAL(qmd, QMD_RESERVED_B, "0x%x");
-+   NVC0C0_QMDV02_01_VAL(qmd, CIRCULAR_QUEUE_SIZE, "0x%x");
-+   NVC0C0_QMDV02_01_VAL(qmd, QMD_RESERVED_C, "0x%x");
-+   NVC0C0_QMDV02_01_DEF(qmd, INVALIDATE_TEXTURE_HEADER_CACHE, FALSE, TRUE);
-+   NVC0C0_QMDV02_01_DEF(qmd, INVALIDATE_TEXTURE_SAMPLER_CACHE, FALSE, TRUE);
-+   NVC0C0_QMDV02_01_DEF(qmd, INVALIDATE_TEXTURE_DATA_CACHE, FALSE, TRUE);
-+   NVC0C0_QMDV02_01_DEF(qmd, INVALIDATE_SHADER_DATA_CACHE, FALSE, TRUE);
-+   NVC0C0_QMDV02_01_DEF(qmd, INVALIDATE_INSTRUCTION_CACHE, FALSE, TRUE);
-+   NVC0C0_QMDV02_01_DEF(qmd, INVALIDATE_SHADER_CONSTANT_CACHE, FALSE, TRUE);
-+   NVC0C0_QMDV02_01_VAL(qmd, CTA_RASTER_WIDTH_RESUME, "0x%x");
-+   NVC0C0_QMDV02_01_VAL(qmd, CTA_RASTER_HEIGHT_RESUME, "0x%x");
-+   NVC0C0_QMDV02_01_VAL(qmd, CTA_RASTER_DEPTH_RESUME, "0x%x");
-+   NVC0C0_QMDV02_01_VAL(qmd, PROGRAM_OFFSET, "0x%x");
-+   NVC0C0_QMDV02_01_VAL(qmd, CIRCULAR_QUEUE_ADDR_LOWER, "0x%x");
-+   NVC0C0_QMDV02_01_VAL(qmd, CIRCULAR_QUEUE_ADDR_UPPER, "0x%x");
-+   NVC0C0_QMDV02_01_VAL(qmd, QMD_RESERVED_D, "0x%x");
-+   NVC0C0_QMDV02_01_VAL(qmd, CIRCULAR_QUEUE_ENTRY_SIZE, "0x%x");
-+   NVC0C0_QMDV02_01_VAL(qmd, CWD_REFERENCE_COUNT_ID, "0x%x");
-+   NVC0C0_QMDV02_01_VAL(qmd, CWD_REFERENCE_COUNT_DELTA_MINUS_ONE, "0x%x");
-+   NVC0C0_QMDV02_01_DEF(qmd, RELEASE_MEMBAR_TYPE, FE_NONE, FE_SYSMEMBAR);
-+   NVC0C0_QMDV02_01_DEF(qmd, CWD_REFERENCE_COUNT_INCR_ENABLE, FALSE, TRUE);
-+   NVC0C0_QMDV02_01_DEF(qmd, CWD_MEMBAR_TYPE, L1_NONE, L1_SYSMEMBAR, L1_MEMBAR);
-+   NVC0C0_QMDV02_01_DEF(qmd, SEQUENTIALLY_RUN_CTAS, FALSE, TRUE);
-+   NVC0C0_QMDV02_01_DEF(qmd, CWD_REFERENCE_COUNT_DECR_ENABLE, FALSE, TRUE);
-+   NVC0C0_QMDV02_01_DEF(qmd, THROTTLED, FALSE, TRUE);
-+   NVC0C0_QMDV02_01_DEF(qmd, API_VISIBLE_CALL_LIMIT, _32, NO_CHECK);
-+   NVC0C0_QMDV02_01_DEF(qmd, SAMPLER_INDEX, INDEPENDENTLY, VIA_HEADER_INDEX);
-+   NVC0C0_QMDV02_01_VAL(qmd, CTA_RASTER_WIDTH, "0x%x");
-+   NVC0C0_QMDV02_01_VAL(qmd, CTA_RASTER_HEIGHT, "0x%x");
-+   NVC0C0_QMDV02_01_VAL(qmd, QMD_RESERVED13A, "0x%x");
-+   NVC0C0_QMDV02_01_VAL(qmd, CTA_RASTER_DEPTH, "0x%x");
-+   NVC0C0_QMDV02_01_VAL(qmd, QMD_RESERVED14A, "0x%x");
-+   NVC0C0_QMDV02_01_VAL(qmd, DEPENDENT_QMD_POINTER, "0x%x");
-+   NVC0C0_QMDV02_01_VAL(qmd, QUEUE_ENTRIES_PER_CTA_MINUS_ONE, "0x%x");
-+   NVC0C0_QMDV02_01_VAL(qmd, COALESCE_WAITING_PERIOD, "0x%x");
-+   NVC0C0_QMDV02_01_VAL(qmd, SHARED_MEMORY_SIZE, "0x%x");
-+   NVC0C0_QMDV02_01_VAL(qmd, QMD_RESERVED_G, "0x%x");
-+   NVC0C0_QMDV02_01_VAL(qmd, QMD_VERSION, "0x%x");
-+   NVC0C0_QMDV02_01_VAL(qmd, QMD_MAJOR_VERSION, "0x%x");
-+   NVC0C0_QMDV02_01_VAL(qmd, QMD_RESERVED_H, "0x%x");
-+   NVC0C0_QMDV02_01_VAL(qmd, CTA_THREAD_DIMENSION0, "0x%x");
-+   NVC0C0_QMDV02_01_VAL(qmd, CTA_THREAD_DIMENSION1, "0x%x");
-+   NVC0C0_QMDV02_01_VAL(qmd, CTA_THREAD_DIMENSION2, "0x%x");
-+   for (int i = 0; i < 8; i++)
-+      NVC0C0_QMDV02_01_IDX(qmd, CONSTANT_BUFFER_VALID, i, FALSE, TRUE);
-+   NVC0C0_QMDV02_01_VAL(qmd, QMD_RESERVED_I, "0x%x");
-+   NVC0C0_QMDV02_01_VAL(qmd, SM_DISABLE_MASK_LOWER, "0x%x");
-+   NVC0C0_QMDV02_01_VAL(qmd, SM_DISABLE_MASK_UPPER, "0x%x");
-+   NVC0C0_QMDV02_01_VAL(qmd, RELEASE0_ADDRESS_LOWER, "0x%x");
-+   NVC0C0_QMDV02_01_VAL(qmd, RELEASE0_ADDRESS_UPPER, "0x%x");
-+   NVC0C0_QMDV02_01_VAL(qmd, QMD_RESERVED_J, "0x%x");
-+   NVC0C0_QMDV02_01_DEF(qmd, RELEASE0_REDUCTION_OP, RED_ADD,
-+                                                    RED_MIN,
-+                                                    RED_MAX,
-+                                                    RED_INC,
-+                                                    RED_DEC,
-+                                                    RED_AND,
-+                                                    RED_OR,
-+                                                    RED_XOR);
-+   NVC0C0_QMDV02_01_VAL(qmd, QMD_RESERVED_K, "0x%x");
-+   NVC0C0_QMDV02_01_DEF(qmd, RELEASE0_REDUCTION_FORMAT, UNSIGNED_32, SIGNED_32);
-+   NVC0C0_QMDV02_01_DEF(qmd, RELEASE0_REDUCTION_ENABLE, FALSE, TRUE);
-+   NVC0C0_QMDV02_01_DEF(qmd, RELEASE0_STRUCTURE_SIZE, FOUR_WORDS, ONE_WORD);
-+   NVC0C0_QMDV02_01_VAL(qmd, RELEASE0_PAYLOAD, "0x%x");
-+   NVC0C0_QMDV02_01_VAL(qmd, RELEASE1_ADDRESS_LOWER, "0x%x");
-+   NVC0C0_QMDV02_01_VAL(qmd, RELEASE1_ADDRESS_UPPER, "0x%x");
-+   NVC0C0_QMDV02_01_VAL(qmd, QMD_RESERVED_L, "0x%x");
-+   NVC0C0_QMDV02_01_DEF(qmd, RELEASE1_REDUCTION_OP, RED_ADD,
-+                                                    RED_MIN,
-+                                                    RED_MAX,
-+                                                    RED_INC,
-+                                                    RED_DEC,
-+                                                    RED_AND,
-+                                                    RED_OR,
-+                                                    RED_XOR);
-+   NVC0C0_QMDV02_01_VAL(qmd, QMD_RESERVED_M, "0x%x");
-+   NVC0C0_QMDV02_01_DEF(qmd, RELEASE1_REDUCTION_FORMAT, UNSIGNED_32, SIGNED_32);
-+   NVC0C0_QMDV02_01_DEF(qmd, RELEASE1_REDUCTION_ENABLE, FALSE, TRUE);
-+   NVC0C0_QMDV02_01_DEF(qmd, RELEASE1_STRUCTURE_SIZE, FOUR_WORDS, ONE_WORD);
-+   NVC0C0_QMDV02_01_VAL(qmd, RELEASE1_PAYLOAD, "0x%x");
-+   NVC0C0_QMDV02_01_VAL(qmd, SHADER_LOCAL_MEMORY_LOW_SIZE, "0x%x");
-+   NVC0C0_QMDV02_01_VAL(qmd, QMD_RESERVED_N, "0x%x");
-+   NVC0C0_QMDV02_01_VAL(qmd, BARRIER_COUNT, "0x%x");
-+   NVC0C0_QMDV02_01_VAL(qmd, SHADER_LOCAL_MEMORY_HIGH_SIZE, "0x%x");
-+   NVC0C0_QMDV02_01_VAL(qmd, REGISTER_COUNT, "0x%x");
-+   NVC0C0_QMDV02_01_VAL(qmd, SHADER_LOCAL_MEMORY_CRS_SIZE, "0x%x");
-+   NVC0C0_QMDV02_01_VAL(qmd, SASS_VERSION, "0x%x");
-+   for (int i = 0; i < 8; i++) {
-+      NVC0C0_QMDV02_01_VAL(qmd, CONSTANT_BUFFER_ADDR_LOWER, i, "0x%x");
-+      NVC0C0_QMDV02_01_VAL(qmd, CONSTANT_BUFFER_ADDR_UPPER, i, "0x%x");
-+      NVC0C0_QMDV02_01_VAL(qmd, CONSTANT_BUFFER_RESERVED_ADDR, i, "0x%x");
-+      NVC0C0_QMDV02_01_IDX(qmd, CONSTANT_BUFFER_INVALIDATE, i, FALSE, TRUE);
-+      NVC0C0_QMDV02_01_VAL(qmd, CONSTANT_BUFFER_SIZE_SHIFTED4, i, "0x%x");
-+   }
-+   NVC0C0_QMDV02_01_VAL(qmd, QMD_RESERVED_R, "0x%x");
-+   NVC0C0_QMDV02_01_VAL(qmd, QMD_RESERVED_S, "0x%x");
-+   NVC0C0_QMDV02_01_VAL(qmd, HW_ONLY_INNER_GET, "0x%x");
-+   NVC0C0_QMDV02_01_VAL(qmd, HW_ONLY_REQUIRE_SCHEDULING_PCAS, "0x%x");
-+   NVC0C0_QMDV02_01_VAL(qmd, HW_ONLY_INNER_PUT, "0x%x");
-+   NVC0C0_QMDV02_01_VAL(qmd, HW_ONLY_SCG_TYPE, "0x%x");
-+   NVC0C0_QMDV02_01_VAL(qmd, HW_ONLY_SPAN_LIST_HEAD_INDEX, "0x%x");
-+   NVC0C0_QMDV02_01_VAL(qmd, QMD_RESERVED_Q, "0x%x");
-+   NVC0C0_QMDV02_01_DEF(qmd, HW_ONLY_SPAN_LIST_HEAD_INDEX_VALID, FALSE, TRUE);
-+   NVC0C0_QMDV02_01_VAL(qmd, HW_ONLY_SKED_NEXT_QMD_POINTER, "0x%x");
-+   NVC0C0_QMDV02_01_VAL(qmd, QMD_SPARE_G, "0x%x");
-+   NVC0C0_QMDV02_01_VAL(qmd, QMD_SPARE_H, "0x%x");
-+   NVC0C0_QMDV02_01_VAL(qmd, QMD_SPARE_I, "0x%x");
-+   NVC0C0_QMDV02_01_VAL(qmd, QMD_SPARE_J, "0x%x");
-+   NVC0C0_QMDV02_01_VAL(qmd, QMD_SPARE_K, "0x%x");
-+   NVC0C0_QMDV02_01_VAL(qmd, QMD_SPARE_L, "0x%x");
-+   NVC0C0_QMDV02_01_VAL(qmd, QMD_SPARE_M, "0x%x");
-+   NVC0C0_QMDV02_01_VAL(qmd, QMD_SPARE_N, "0x%x");
-+   NVC0C0_QMDV02_01_VAL(qmd, DEBUG_ID_UPPER, "0x%x");
-+   NVC0C0_QMDV02_01_VAL(qmd, DEBUG_ID_LOWER, "0x%x");
-+}
-diff --git a/src/gallium/drivers/nouveau/nvc0/qmdc3c0.c b/src/gallium/drivers/nouveau/nvc0/qmdc3c0.c
-new file mode 100644
-index 00000000000..c9bd8966114
---- /dev/null
-+++ b/src/gallium/drivers/nouveau/nvc0/qmdc3c0.c
-@@ -0,0 +1,168 @@
-+/*
-+ * Copyright 2020 Red Hat Inc.
-+ *
-+ * Permission is hereby granted, free of charge, to any person obtaining a
-+ * copy of this software and associated documentation files (the "Software"),
-+ * to deal in the Software without restriction, including without limitation
-+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
-+ * and/or sell copies of the Software, and to permit persons to whom the
-+ * Software is furnished to do so, subject to the following conditions:
-+ *
-+ * The above copyright notice and this permission notice shall be included in
-+ * all copies or substantial portions of the Software.
-+ *
-+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
-+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
-+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
-+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-+ * OTHER DEALINGS IN THE SOFTWARE.
-+ */
-+#include "qmd.h"
-+#include "clc3c0qmd.h"
-+
-+#define NVC3C0_QMDV02_02_VAL(a...) NVQMD_VAL(NVC3C0, QMDV02_02, ##a)
-+#define NVC3C0_QMDV02_02_DEF(a...) NVQMD_DEF(NVC3C0, QMDV02_02, ##a)
-+#define NVC3C0_QMDV02_02_IDX(a...) NVQMD_IDX(NVC3C0, QMDV02_02, ##a)
-+
-+void
-+NVC3C0QmdDump_V02_02(uint32_t *qmd)
-+{
-+   NVC3C0_QMDV02_02_VAL(qmd, OUTER_PUT, "0x%x");
-+   NVC3C0_QMDV02_02_VAL(qmd, OUTER_OVERFLOW, "0x%x");
-+   NVC3C0_QMDV02_02_VAL(qmd, OUTER_GET, "0x%x");
-+   NVC3C0_QMDV02_02_VAL(qmd, OUTER_STICKY_OVERFLOW, "0x%x");
-+   NVC3C0_QMDV02_02_VAL(qmd, INNER_GET, "0x%x");
-+   NVC3C0_QMDV02_02_VAL(qmd, INNER_OVERFLOW, "0x%x");
-+   NVC3C0_QMDV02_02_VAL(qmd, INNER_PUT, "0x%x");
-+   NVC3C0_QMDV02_02_VAL(qmd, INNER_STICKY_OVERFLOW, "0x%x");
-+   NVC3C0_QMDV02_02_VAL(qmd, QMD_GROUP_ID, "0x%x");
-+   NVC3C0_QMDV02_02_VAL(qmd, SM_GLOBAL_CACHING_ENABLE, "0x%x");
-+   NVC3C0_QMDV02_02_DEF(qmd, RUN_CTA_IN_ONE_SM_PARTITION, FALSE, TRUE);
-+   NVC3C0_QMDV02_02_DEF(qmd, IS_QUEUE, FALSE, TRUE);
-+   NVC3C0_QMDV02_02_DEF(qmd, ADD_TO_HEAD_OF_QMD_GROUP_LINKED_LIST, FALSE, TRUE);
-+   NVC3C0_QMDV02_02_DEF(qmd, SEMAPHORE_RELEASE_ENABLE0, FALSE, TRUE);
-+   NVC3C0_QMDV02_02_DEF(qmd, SEMAPHORE_RELEASE_ENABLE1, FALSE, TRUE);
-+   NVC3C0_QMDV02_02_DEF(qmd, REQUIRE_SCHEDULING_PCAS, FALSE, TRUE);
-+   NVC3C0_QMDV02_02_DEF(qmd, DEPENDENT_QMD_SCHEDULE_ENABLE, FALSE, TRUE);
-+   NVC3C0_QMDV02_02_DEF(qmd, DEPENDENT_QMD_TYPE, QUEUE, GRID);
-+   NVC3C0_QMDV02_02_DEF(qmd, DEPENDENT_QMD_FIELD_COPY, FALSE, TRUE);
-+   NVC3C0_QMDV02_02_VAL(qmd, QMD_RESERVED_B, "0x%x");
-+   NVC3C0_QMDV02_02_VAL(qmd, CIRCULAR_QUEUE_SIZE, "0x%x");
-+   NVC3C0_QMDV02_02_VAL(qmd, QMD_RESERVED_C, "0x%x");
-+   NVC3C0_QMDV02_02_DEF(qmd, INVALIDATE_TEXTURE_HEADER_CACHE, FALSE, TRUE);
-+   NVC3C0_QMDV02_02_DEF(qmd, INVALIDATE_TEXTURE_SAMPLER_CACHE, FALSE, TRUE);
-+   NVC3C0_QMDV02_02_DEF(qmd, INVALIDATE_TEXTURE_DATA_CACHE, FALSE, TRUE);
-+   NVC3C0_QMDV02_02_DEF(qmd, INVALIDATE_SHADER_DATA_CACHE, FALSE, TRUE);
-+   NVC3C0_QMDV02_02_DEF(qmd, INVALIDATE_INSTRUCTION_CACHE, FALSE, TRUE);
-+   NVC3C0_QMDV02_02_DEF(qmd, INVALIDATE_SHADER_CONSTANT_CACHE, FALSE, TRUE);
-+   NVC3C0_QMDV02_02_VAL(qmd, CTA_RASTER_WIDTH_RESUME, "0x%x");
-+   NVC3C0_QMDV02_02_VAL(qmd, CTA_RASTER_HEIGHT_RESUME, "0x%x");
-+   NVC3C0_QMDV02_02_VAL(qmd, CTA_RASTER_DEPTH_RESUME, "0x%x");
-+   NVC3C0_QMDV02_02_VAL(qmd, PROGRAM_OFFSET, "0x%x");
-+   NVC3C0_QMDV02_02_VAL(qmd, CIRCULAR_QUEUE_ADDR_LOWER, "0x%x");
-+   NVC3C0_QMDV02_02_VAL(qmd, CIRCULAR_QUEUE_ADDR_UPPER, "0x%x");
-+   NVC3C0_QMDV02_02_VAL(qmd, QMD_RESERVED_D, "0x%x");
-+   NVC3C0_QMDV02_02_VAL(qmd, CIRCULAR_QUEUE_ENTRY_SIZE, "0x%x");
-+   NVC3C0_QMDV02_02_VAL(qmd, CWD_REFERENCE_COUNT_ID, "0x%x");
-+   NVC3C0_QMDV02_02_VAL(qmd, CWD_REFERENCE_COUNT_DELTA_MINUS_ONE, "0x%x");
-+   NVC3C0_QMDV02_02_DEF(qmd, RELEASE_MEMBAR_TYPE, FE_NONE, FE_SYSMEMBAR);
-+   NVC3C0_QMDV02_02_DEF(qmd, CWD_REFERENCE_COUNT_INCR_ENABLE, FALSE, TRUE);
-+   NVC3C0_QMDV02_02_DEF(qmd, CWD_MEMBAR_TYPE, L1_NONE, L1_SYSMEMBAR, L1_MEMBAR);
-+   NVC3C0_QMDV02_02_DEF(qmd, SEQUENTIALLY_RUN_CTAS, FALSE, TRUE);
-+   NVC3C0_QMDV02_02_DEF(qmd, CWD_REFERENCE_COUNT_DECR_ENABLE, FALSE, TRUE);
-+   NVC3C0_QMDV02_02_DEF(qmd, API_VISIBLE_CALL_LIMIT, _32, NO_CHECK);
-+   NVC3C0_QMDV02_02_DEF(qmd, SAMPLER_INDEX, INDEPENDENTLY, VIA_HEADER_INDEX);
-+   NVC3C0_QMDV02_02_VAL(qmd, CTA_RASTER_WIDTH, "0x%x");
-+   NVC3C0_QMDV02_02_VAL(qmd, CTA_RASTER_HEIGHT, "0x%x");
-+   NVC3C0_QMDV02_02_VAL(qmd, QMD_RESERVED13A, "0x%x");
-+   NVC3C0_QMDV02_02_VAL(qmd, CTA_RASTER_DEPTH, "0x%x");
-+   NVC3C0_QMDV02_02_VAL(qmd, QMD_RESERVED14A, "0x%x");
-+   NVC3C0_QMDV02_02_VAL(qmd, DEPENDENT_QMD_POINTER, "0x%x");
-+   NVC3C0_QMDV02_02_VAL(qmd, QUEUE_ENTRIES_PER_CTA_MINUS_ONE, "0x%x");
-+   NVC3C0_QMDV02_02_VAL(qmd, COALESCE_WAITING_PERIOD, "0x%x");
-+   NVC3C0_QMDV02_02_VAL(qmd, SHARED_MEMORY_SIZE, "0x%x");
-+   NVC3C0_QMDV02_02_VAL(qmd, MIN_SM_CONFIG_SHARED_MEM_SIZE, "0x%x");
-+   NVC3C0_QMDV02_02_VAL(qmd, MAX_SM_CONFIG_SHARED_MEM_SIZE, "0x%x");
-+   NVC3C0_QMDV02_02_VAL(qmd, QMD_VERSION, "0x%x");
-+   NVC3C0_QMDV02_02_VAL(qmd, QMD_MAJOR_VERSION, "0x%x");
-+   NVC3C0_QMDV02_02_VAL(qmd, QMD_RESERVED_H, "0x%x");
-+   NVC3C0_QMDV02_02_VAL(qmd, CTA_THREAD_DIMENSION0, "0x%x");
-+   NVC3C0_QMDV02_02_VAL(qmd, CTA_THREAD_DIMENSION1, "0x%x");
-+   NVC3C0_QMDV02_02_VAL(qmd, CTA_THREAD_DIMENSION2, "0x%x");
-+   for (int i = 0; i < 8; i++)
-+      NVC3C0_QMDV02_02_IDX(qmd, CONSTANT_BUFFER_VALID, i, FALSE, TRUE);
-+   NVC3C0_QMDV02_02_VAL(qmd, REGISTER_COUNT_V, "0x%x");
-+   NVC3C0_QMDV02_02_VAL(qmd, TARGET_SM_CONFIG_SHARED_MEM_SIZE, "0x%x");
-+   NVC3C0_QMDV02_02_VAL(qmd, FREE_CTA_SLOTS_EMPTY_SM, "0x%x");
-+   NVC3C0_QMDV02_02_VAL(qmd, SM_DISABLE_MASK_LOWER, "0x%x");
-+   NVC3C0_QMDV02_02_VAL(qmd, SM_DISABLE_MASK_UPPER, "0x%x");
-+   NVC3C0_QMDV02_02_VAL(qmd, RELEASE0_ADDRESS_LOWER, "0x%x");
-+   NVC3C0_QMDV02_02_VAL(qmd, RELEASE0_ADDRESS_UPPER, "0x%x");
-+   NVC3C0_QMDV02_02_VAL(qmd, QMD_RESERVED_J, "0x%x");
-+   NVC3C0_QMDV02_02_DEF(qmd, RELEASE0_REDUCTION_OP, RED_ADD,
-+                                                    RED_MIN,
-+                                                    RED_MAX,
-+                                                    RED_INC,
-+                                                    RED_DEC,
-+                                                    RED_AND,
-+                                                    RED_OR,
-+                                                    RED_XOR);
-+   NVC3C0_QMDV02_02_VAL(qmd, QMD_RESERVED_K, "0x%x");
-+   NVC3C0_QMDV02_02_DEF(qmd, RELEASE0_REDUCTION_FORMAT, UNSIGNED_32, SIGNED_32);
-+   NVC3C0_QMDV02_02_DEF(qmd, RELEASE0_REDUCTION_ENABLE, FALSE, TRUE);
-+   NVC3C0_QMDV02_02_DEF(qmd, RELEASE0_STRUCTURE_SIZE, FOUR_WORDS, ONE_WORD);
-+   NVC3C0_QMDV02_02_VAL(qmd, RELEASE0_PAYLOAD, "0x%x");
-+   NVC3C0_QMDV02_02_VAL(qmd, RELEASE1_ADDRESS_LOWER, "0x%x");
-+   NVC3C0_QMDV02_02_VAL(qmd, RELEASE1_ADDRESS_UPPER, "0x%x");
-+   NVC3C0_QMDV02_02_VAL(qmd, QMD_RESERVED_L, "0x%x");
-+   NVC3C0_QMDV02_02_DEF(qmd, RELEASE1_REDUCTION_OP, RED_ADD,
-+                                                    RED_MIN,
-+                                                    RED_MAX,
-+                                                    RED_INC,
-+                                                    RED_DEC,
-+                                                    RED_AND,
-+                                                    RED_OR,
-+                                                    RED_XOR);
-+   NVC3C0_QMDV02_02_VAL(qmd, QMD_RESERVED_M, "0x%x");
-+   NVC3C0_QMDV02_02_DEF(qmd, RELEASE1_REDUCTION_FORMAT, UNSIGNED_32, SIGNED_32);
-+   NVC3C0_QMDV02_02_DEF(qmd, RELEASE1_REDUCTION_ENABLE, FALSE, TRUE);
-+   NVC3C0_QMDV02_02_DEF(qmd, RELEASE1_STRUCTURE_SIZE, FOUR_WORDS, ONE_WORD);
-+   NVC3C0_QMDV02_02_VAL(qmd, RELEASE1_PAYLOAD, "0x%x");
-+   NVC3C0_QMDV02_02_VAL(qmd, SHADER_LOCAL_MEMORY_LOW_SIZE, "0x%x");
-+   NVC3C0_QMDV02_02_VAL(qmd, QMD_RESERVED_N, "0x%x");
-+   NVC3C0_QMDV02_02_VAL(qmd, BARRIER_COUNT, "0x%x");
-+   NVC3C0_QMDV02_02_VAL(qmd, SHADER_LOCAL_MEMORY_HIGH_SIZE, "0x%x");
-+   NVC3C0_QMDV02_02_VAL(qmd, REGISTER_COUNT, "0x%x");
-+   NVC3C0_QMDV02_02_VAL(qmd, SHADER_LOCAL_MEMORY_CRS_SIZE, "0x%x");
-+   NVC3C0_QMDV02_02_VAL(qmd, SASS_VERSION, "0x%x");
-+   for (int i = 0; i < 8; i++) {
-+      NVC3C0_QMDV02_02_VAL(qmd, CONSTANT_BUFFER_ADDR_LOWER, i, "0x%x");
-+      NVC3C0_QMDV02_02_VAL(qmd, CONSTANT_BUFFER_ADDR_UPPER, i, "0x%x");
-+      NVC3C0_QMDV02_02_VAL(qmd, CONSTANT_BUFFER_RESERVED_ADDR, i, "0x%x");
-+      NVC3C0_QMDV02_02_IDX(qmd, CONSTANT_BUFFER_INVALIDATE, i, FALSE, TRUE);
-+      NVC3C0_QMDV02_02_VAL(qmd, CONSTANT_BUFFER_SIZE_SHIFTED4, i, "0x%x");
-+   }
-+   NVC3C0_QMDV02_02_VAL(qmd, PROGRAM_ADDRESS_LOWER, "0x%x");
-+   NVC3C0_QMDV02_02_VAL(qmd, PROGRAM_ADDRESS_UPPER, "0x%x");
-+   NVC3C0_QMDV02_02_VAL(qmd, QMD_RESERVED_S, "0x%x");
-+   NVC3C0_QMDV02_02_VAL(qmd, HW_ONLY_INNER_GET, "0x%x");
-+   NVC3C0_QMDV02_02_VAL(qmd, HW_ONLY_REQUIRE_SCHEDULING_PCAS, "0x%x");
-+   NVC3C0_QMDV02_02_VAL(qmd, HW_ONLY_INNER_PUT, "0x%x");
-+   NVC3C0_QMDV02_02_VAL(qmd, HW_ONLY_SCG_TYPE, "0x%x");
-+   NVC3C0_QMDV02_02_VAL(qmd, HW_ONLY_SPAN_LIST_HEAD_INDEX, "0x%x");
-+   NVC3C0_QMDV02_02_VAL(qmd, QMD_RESERVED_Q, "0x%x");
-+   NVC3C0_QMDV02_02_DEF(qmd, HW_ONLY_SPAN_LIST_HEAD_INDEX_VALID, FALSE, TRUE);
-+   NVC3C0_QMDV02_02_VAL(qmd, HW_ONLY_SKED_NEXT_QMD_POINTER, "0x%x");
-+   NVC3C0_QMDV02_02_VAL(qmd, QMD_SPARE_G, "0x%x");
-+   NVC3C0_QMDV02_02_VAL(qmd, QMD_SPARE_H, "0x%x");
-+   NVC3C0_QMDV02_02_VAL(qmd, QMD_SPARE_I, "0x%x");
-+   NVC3C0_QMDV02_02_VAL(qmd, QMD_SPARE_J, "0x%x");
-+   NVC3C0_QMDV02_02_VAL(qmd, QMD_SPARE_K, "0x%x");
-+   NVC3C0_QMDV02_02_VAL(qmd, QMD_SPARE_L, "0x%x");
-+   NVC3C0_QMDV02_02_VAL(qmd, QMD_SPARE_M, "0x%x");
-+   NVC3C0_QMDV02_02_VAL(qmd, QMD_SPARE_N, "0x%x");
-+   NVC3C0_QMDV02_02_VAL(qmd, DEBUG_ID_UPPER, "0x%x");
-+   NVC3C0_QMDV02_02_VAL(qmd, DEBUG_ID_LOWER, "0x%x");
-+}
-diff --git a/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c b/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c
-index 5c43518afcb..d123c8a1c17 100644
---- a/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c
-+++ b/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c
-@@ -104,6 +104,8 @@ nouveau_drm_screen_create(int fd)
- 	case 0x110:
- 	case 0x120:
- 	case 0x130:
-+	case 0x140:
-+	case 0x160:
- 		init = nvc0_screen_create;
- 		break;
- 	default:
diff --git a/SPECS/mesa.spec b/SPECS/mesa.spec
index 4e6c79d..694bfef 100644
--- a/SPECS/mesa.spec
+++ b/SPECS/mesa.spec
@@ -9,16 +9,16 @@
 %endif
 
 %ifarch %{ix86} x86_64
-%define platform_drivers ,i965
+%define platform_drivers i965
 %define with_vmware 1
 %define with_xa     1
 %define with_iris   1
 %endif
 
 %ifarch %{ix86} x86_64
-%define with_vulkan 1
+%define with_vulkan_hw 1
 %else
-%define with_vulkan 0
+%define with_vulkan_hw 0
 %endif
 
 %ifarch %{arm} aarch64
@@ -31,18 +31,20 @@
 
 %global dri_drivers %{?platform_drivers}
 
-%if 0%{?with_vulkan}
-%define vulkan_drivers intel,amd
+%if 0%{?with_vulkan_hw}
+%define vulkan_drivers swrast,intel,amd
+%else
+%define vulkan_drivers swrast
 %endif
 
 %global sanitize 0
 
-#global rctag rc4
+#global rctag rc2
 
 Name:           mesa
 Summary:        Mesa graphics libraries
-Version:        20.1.2
-Release:        3%{?rctag:.%{rctag}}%{?dist}
+Version:        20.3.3
+Release:        1%{?rctag:.%{rctag}}%{?dist}
 
 License:        MIT
 URL:            http://www.mesa3d.org
@@ -56,14 +58,9 @@ Source3:        Makefile
 # Fedora opts to ignore the optional part of clause 2 and treat that code as 2 clause BSD.
 Source4:        Mesa-MLAA-License-Clarification-Email.txt
 
-# fix llvmpipe big-endian (#1847064)
-Patch1: 0001-gallivm-nir-fix-const-loading-on-big-endian-systems.patch
-Patch2: 0001-glsl-fix-constant-packing-for-64-bit-big-endian.patch
-Patch3: 0001-gallivm-nir-fix-big-endian-64-bit-splitting-merging.patch
-
-# Add support for TU11x nvidia
-Patch10: 0001-nir-use-bitfield_insert-instead-of-bfi-in-nir_lower_.patch
-Patch11: nouveau-tu1xx-support.patch
+Patch0:	lavapipe-disable-env-var.patch
+Patch1: mesa-20.3.3-stable-fixes.patch
+Patch2: anv-remove-warning.patch
 
 BuildRequires:  gcc
 BuildRequires:  gcc-c++
@@ -72,7 +69,7 @@ BuildRequires:  meson >= 0.45
 %if %{with_hardware}
 BuildRequires:  kernel-headers
 %endif
-BuildRequires:  libdrm-devel >= 2.4.42
+BuildRequires:  libdrm-devel >= 2.4.103
 BuildRequires:  libXxf86vm-devel
 BuildRequires:  expat-devel
 BuildRequires:  xorg-x11-proto-devel
@@ -171,6 +168,7 @@ Provides:       libEGL-devel%{?_isa}
 %package dri-drivers
 Summary:        Mesa-based DRI drivers
 Requires:       %{name}-filesystem%{?_isa} = %{?epoch:%{epoch}:}%{version}-%{release}
+Requires:	libdrm >= 2.4.103
 
 %description dri-drivers
 %{summary}.
@@ -287,7 +285,6 @@ Requires:       %{name}-libd3d%{?_isa} = %{?epoch:%{epoch}:}%{version}-%{release
 %{summary}.
 %endif
 
-%if 0%{?with_vulkan}
 %package vulkan-drivers
 Summary:        Mesa Vulkan drivers
 Requires:       vulkan%{_isa}
@@ -295,6 +292,7 @@ Requires:       vulkan%{_isa}
 %description vulkan-drivers
 The drivers with support for the Vulkan API.
 
+%if 0%{?with_vulkan_hw}
 %package vulkan-devel
 Summary:        Mesa Vulkan development files
 Requires:       %{name}-vulkan-drivers%{?_isa} = %{?epoch:%{epoch}:}%{version}-%{release}
@@ -328,7 +326,7 @@ pathfix.py -i %{__python3} -pn bin/*.py src/egl/generate/*.py \
 export ASFLAGS="--generate-missing-build-notes=yes"
 %meson -Dcpp_std=gnu++14 \
   -Db_ndebug=true \
-  -Dplatforms=x11,wayland,drm,surfaceless \
+  -Dplatforms=x11,wayland \
   -Ddri3=true \
   -Ddri-drivers=%{?dri_drivers} \
 %if 0%{?with_hardware}
@@ -532,8 +530,8 @@ done
 %endif
 %endif
 
-%if 0%{?with_vulkan}
 %files vulkan-drivers
+%if 0%{?with_vulkan_hw}
 %{_libdir}/libvulkan_intel.so
 %{_libdir}/libvulkan_radeon.so
 %ifarch x86_64
@@ -543,14 +541,41 @@ done
 %{_datadir}/vulkan/icd.d/intel_icd.i686.json
 %{_datadir}/vulkan/icd.d/radeon_icd.i686.json
 %endif
+%endif
+%{_libdir}/libvulkan_lvp.so
+%{_datadir}/vulkan/icd.d/lvp_icd.*.json
 %{_libdir}/libVkLayer_MESA_device_select.so
 %{_datadir}/vulkan/implicit_layer.d/VkLayer_MESA_device_select.json
 
+%if 0%{?with_vulkan_hw}
 %files vulkan-devel
 %{_includedir}/vulkan/
 %endif
 
 %changelog
+* Tue Feb 16 2021 Dave Airlie <airlied@redhat.com> - 20.3.3-1
+- Update to 20.3.3 + upstream fixes for qemu regression
+
+* Mon Jan 11 2021 Dave Airlie <airlied@redhat.com> - 20.3.2-1
+- Update to 20.3.2 for upstream fixes
+
+* Mon Dec 21 2020 Dave Airlie <airlied@redhat.com> - 20.3.1-1
+- Update to 20.3.1 for radeon fix
+
+* Mon Dec 07 2020 Dave Airlie <airlied@redhat.com> - 20.3.0-2
+- Fix regression with radeon si/cik cards
+
+* Fri Dec 04 2020 Dave Airlie <airlied@redhat.com> - 20.3.0-1
+- Update to 20.3.0 release
+
+* Thu Nov 19 2020 Dave Airlie <airlied@redhat.com> - 20.3.0-0.1.rc2
+- Update 20.3.0-rc2
+- enable lavapipe behind env var so it can be used for testing
+
+* Wed Aug 05 2020 Dave Airlie <airlied@redhat.com> - 20.1.4-1
+- Update to 20.1.4
+- Update nouveau tu1xx support patch (Karol)
+
 * Mon Jun 29 2020 Dave Airlie <airlied@redhat.com> - 20.1.2-3
 - a fix on top of the big-endian fix (#1847064)